diff --git a/.CodeQL.yml b/.CodeQL.yml
new file mode 100644
index 000000000000..3c93eef49798
--- /dev/null
+++ b/.CodeQL.yml
@@ -0,0 +1,10 @@
+# This file configures CodeQL runs and TSA bug autofiling. For more information, see:
+# https://eng.ms/docs/cloud-ai-platform/devdiv/one-engineering-system-1es/1es-docs/codeql/troubleshooting/bugs/generated-library-code
+# (Access restricted to Microsoft employees only.)
+
+path_classifiers:
+  refs:
+    # The ref/ directories don't contain shipping implementations of code, so they should
+    # be excluded from analysis. If there is a problem at the API layer, the analysis
+    # engine will detect the problem in the src/ implementations anyway.
+    - src/libraries/**/ref/*
diff --git a/.config/dotnet-tools.json b/.config/dotnet-tools.json
index ebdcdc51aaef..9abee6864b8e 100644
--- a/.config/dotnet-tools.json
+++ b/.config/dotnet-tools.json
@@ -15,7 +15,7 @@
       ]
     },
     "microsoft.dotnet.xharness.cli": {
-      "version": "9.0.0-prerelease.24077.1",
+      "version": "9.0.0-prerelease.24203.1",
       "commands": [
         "xharness"
       ]
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 5a697ac08819..d76e325e8b6c 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -25,4 +25,5 @@ RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
         libssl-dev \
         libkrb5-dev \
         zlib1g-dev \
-        ninja-build
+        ninja-build \
+        tzdata
diff --git a/.devcontainer/scripts/onCreateCommand.sh b/.devcontainer/scripts/onCreateCommand.sh
index 066d0eccda87..6c2527c7d1ef 100755
--- a/.devcontainer/scripts/onCreateCommand.sh
+++ b/.devcontainer/scripts/onCreateCommand.sh
@@ -2,6 +2,26 @@
 
 set -e
 
+function wasm_common() {
+    # prebuild for WASM, so it is ready for wasm development
+    make -C src/mono/browser provision-wasm
+    export EMSDK_PATH=$PWD/src/mono/browser/emsdk
+    case "$1" in
+    wasm)
+        # Put your common commands for wasm here
+        ./build.sh mono+libs -os browser -c Release
+        ;;
+    wasm-multithreaded)
+        # Put your common commands for wasm-multithread here
+        ./build.sh mono+libs -os browser -c Release /p:WasmEnableThreads=true
+        ;;
+    *)
+        # install dotnet-serve for running wasm samples
+    ./dotnet.sh tool install dotnet-serve --version 1.10.172 --tool-path ./.dotnet-tools-global
+    ;;
+    esac
+}
+
 opt=$1
 case "$opt" in
 
@@ -20,15 +40,13 @@ case "$opt" in
     ;;
 
     wasm)
-        # prebuild for WASM, so it is ready for wasm development
-        make -C src/mono/browser provision-wasm
-        export EMSDK_PATH=$PWD/src/mono/browser/emsdk
-        ./build.sh mono+libs -os browser -c Release
+        wasm_common $opt
+    ;;
 
-        # install dotnet-serve for running wasm samples
-        ./dotnet.sh tool install dotnet-serve --version 1.10.172 --tool-path ./.dotnet-tools-global
+    wasm-multithreaded)
+        wasm_common $opt
     ;;
 esac
 
 # save the commit hash of the currently built assemblies, so developers know which version was built
-git rev-parse HEAD > ./artifacts/prebuild.sha
+git rev-parse HEAD > ./artifacts/prebuild.sha
\ No newline at end of file
diff --git a/.devcontainer/scripts/postCreateCommand.sh b/.devcontainer/scripts/postCreateCommand.sh
index b50fb9a7009f..4ca45cc03fbb 100755
--- a/.devcontainer/scripts/postCreateCommand.sh
+++ b/.devcontainer/scripts/postCreateCommand.sh
@@ -11,4 +11,4 @@ case "$opt" in
 esac
 
 # reset the repo to the commit hash that was used to build the prebuilt Codespace
-git reset --hard $(cat ./artifacts/prebuild.sha)
+git reset --hard $(cat ./artifacts/prebuild.sha)
\ No newline at end of file
diff --git a/.devcontainer/wasm-multiThreaded/Dockerfile b/.devcontainer/wasm-multiThreaded/Dockerfile
new file mode 100644
index 000000000000..75f2465b391b
--- /dev/null
+++ b/.devcontainer/wasm-multiThreaded/Dockerfile
@@ -0,0 +1,60 @@
+# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.192.0/containers/dotnet/.devcontainer/base.Dockerfile
+# For details on dotnet specific container, see: https://github.com/microsoft/vscode-dev-containers/tree/main/containers/dotnet
+
+# [Choice] .NET version: 6.0, 7.0
+ARG VARIANT="6.0-jammy"
+FROM mcr.microsoft.com/devcontainers/dotnet:0-${VARIANT}
+
+# Set up machine requirements to build the repo and the gh CLI
+RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
+    && apt-get -y install --no-install-recommends \
+        cmake \
+        llvm \
+        clang \
+        build-essential \
+        python3 \
+        curl \
+        git \
+        lldb \
+        liblldb-dev \
+        libunwind8 \
+        libunwind8-dev \
+        gettext \
+        libicu-dev \
+        liblttng-ust-dev \
+        libssl-dev \
+        libkrb5-dev \
+        zlib1g-dev \
+        ninja-build
+
+SHELL ["/bin/bash", "-c"]
+
+# Install LTS npm and node
+RUN source /usr/local/share/nvm/nvm.sh && nvm install --lts
+
+# Install V8 Engine
+RUN curl -sSL "https://netcorenativeassets.blob.core.windows.net/resource-packages/external/linux/chromium-v8/v8-linux64-rel-10.8.168.zip" -o ./v8.zip \
+    && unzip ./v8.zip -d /usr/local/v8 \
+    && echo $'#!/usr/bin/env bash\n\
+"/usr/local/v8/d8" --snapshot_blob="/usr/local/v8/snapshot_blob.bin" "$@"\n' > /usr/local/bin/v8 \
+    && chmod +x /usr/local/bin/v8
+
+# install chromium dependencies to run debugger tests:
+RUN sudo apt-get install libnss3 -y \
+    && apt-get install libatk1.0-0 -y \
+    && apt-get install libatk-bridge2.0-0 -y \
+    && apt-get install libcups2 -y \
+    && apt-get install libdrm2 -y \
+    && apt-get install libxkbcommon-x11-0 -y \
+    && apt-get install libxcomposite-dev -y \
+    && apt-get install libxdamage1 -y \
+    && apt-get install libxrandr2 -y \
+    && apt-get install libgbm-dev -y \
+    && apt-get install libpango-1.0-0 -y \
+    && apt-get install libcairo2 -y \
+    && apt-get install libasound2 -y
+
+# install firefox dependencies to run debugger tests:
+RUN sudo apt-get install libdbus-glib-1-2 -y \
+    && apt-get install libgtk-3-0 -y \
+    && apt-get install libx11-xcb-dev  -y
diff --git a/.devcontainer/wasm-multiThreaded/devcontainer.json b/.devcontainer/wasm-multiThreaded/devcontainer.json
new file mode 100644
index 000000000000..b885a0f3620f
--- /dev/null
+++ b/.devcontainer/wasm-multiThreaded/devcontainer.json
@@ -0,0 +1,66 @@
+// For format details, see https://aka.ms/devcontainer.json.
+{
+	"name": "WASM multithreaded development (prebuilt)",
+	"build": {
+		"dockerfile": "Dockerfile",
+		"args": {
+			// Update 'VARIANT' to pick a .NET Core version: 6.0, 7.0
+			"VARIANT": "6.0-jammy"
+		}
+	},
+	"hostRequirements": {
+		"cpus": 4,
+		"memory": "8gb",
+		"storage": "40gb"
+	},
+
+	"features": {
+		"ghcr.io/devcontainers/features/github-cli:1": {}
+	},
+
+	// Configure tool-specific properties.
+	"customizations": {
+		// Configure properties specific to VS Code.
+		"vscode": {
+			// Add the IDs of extensions you want installed when the container is created.
+			"extensions": [
+				"ms-dotnettools.csharp"
+			],
+			"settings": {
+				// Loading projects on demand is better for larger codebases
+				"omnisharp.enableMsBuildLoadProjectsOnDemand": true,
+				"omnisharp.enableRoslynAnalyzers": true,
+				"omnisharp.enableEditorConfigSupport": true,
+				"omnisharp.enableAsyncCompletion": true,
+				"omnisharp.testRunSettings": "${containerWorkspaceFolder}/artifacts/obj/vscode/.runsettings"
+			}
+		}
+	},
+
+	// Use 'onCreateCommand' to run pre-build commands inside the codespace
+	"onCreateCommand": "${containerWorkspaceFolder}/.devcontainer/scripts/onCreateCommand.sh wasm-multithreaded",
+
+	// Use 'postCreateCommand' to run commands after the container is created.
+	"postCreateCommand": "${containerWorkspaceFolder}/.devcontainer/scripts/postCreateCommand.sh wasm-multithreaded",
+
+	// Add the locally installed dotnet to the path to ensure that it is activated
+	// This allows developers to just use 'dotnet build' on the command-line, and the local dotnet version will be used.
+	// Add the global tools dir to the PATH so that globally installed tools will work
+	"remoteEnv": {
+		"PATH": "${containerWorkspaceFolder}/.dotnet:${containerWorkspaceFolder}/.dotnet-tools-global:${containerEnv:PATH}",
+		"DOTNET_MULTILEVEL_LOOKUP": "0",
+		// Path to provisioned Emscripten SDK, for rebuilding the wasm runtime
+		"EMSDK_PATH": "${containerWorkspaceFolder}/src/mono/browser/emsdk",
+	},
+
+	// Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
+	"remoteUser": "vscode",
+
+	// Forward mono samples port
+	"forwardPorts": [8000],
+	"portsAttributes": {
+		"8000": {
+			"label": "mono wasm samples (8000)",
+		}
+	}
+}
diff --git a/.devcontainer/wasm/devcontainer.json b/.devcontainer/wasm/devcontainer.json
index ab598dcb9a32..c9becdc18994 100644
--- a/.devcontainer/wasm/devcontainer.json
+++ b/.devcontainer/wasm/devcontainer.json
@@ -1,6 +1,6 @@
 // For format details, see https://aka.ms/devcontainer.json.
 {
-	"name": "WASM development (prebuilt)",
+	"name": "WASM singlethreaded development (prebuilt)",
 	"build": {
 		"dockerfile": "Dockerfile",
 		"args": {
@@ -10,7 +10,8 @@
 	},
 	"hostRequirements": {
 		"cpus": 4,
-		"memory": "8gb"
+		"memory": "8gb",
+		"storage": "40gb"
 	},
 
 	"features": {
@@ -40,7 +41,7 @@
 	"onCreateCommand": "${containerWorkspaceFolder}/.devcontainer/scripts/onCreateCommand.sh wasm",
 
 	// Use 'postCreateCommand' to run commands after the container is created.
-	"postCreateCommand": "${containerWorkspaceFolder}/.devcontainer/scripts/postCreateCommand.sh",
+	"postCreateCommand": "${containerWorkspaceFolder}/.devcontainer/scripts/postCreateCommand.sh wasm",
 
 	// Add the locally installed dotnet to the path to ensure that it is activated
 	// This allows developers to just use 'dotnet build' on the command-line, and the local dotnet version will be used.
diff --git a/.editorconfig b/.editorconfig
index 2d2860549c8f..7bbf4ec5b35e 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -92,6 +92,7 @@ dotnet_style_readonly_field = true:suggestion
 # Expression-level preferences
 dotnet_style_object_initializer = true:suggestion
 dotnet_style_collection_initializer = true:suggestion
+dotnet_style_prefer_collection_expression = when_types_exactly_match
 dotnet_style_explicit_tuple_names = true:suggestion
 dotnet_style_coalesce_expression = true:suggestion
 dotnet_style_null_propagation = true:suggestion
diff --git a/.github/CODEOWNERS-stop-notifications b/.github/CODEOWNERS-stop-notifications
index b042cebfc6b9..2d544524862e 100644
--- a/.github/CODEOWNERS-stop-notifications
+++ b/.github/CODEOWNERS-stop-notifications
@@ -21,43 +21,43 @@
 
 /src/mono @marek-safar
 
-/src/mono/llvm @vargaz @SamMonoRT
+/src/mono/llvm @lambdageek @steveisok
 
-/src/mono/mono/arch @vargaz
-/src/mono/mono/eglib @vargaz @lambdageek
+/src/mono/mono/arch @lambdageek @steveisok
+/src/mono/mono/eglib @lambdageek @steveisok
 
-/src/mono/mono/metadata @vargaz @lambdageek @thaystg
+/src/mono/mono/metadata @lambdageek @thaystg
 /src/mono/mono/metadata/*-win* @lateralusX @lambdageek
-/src/mono/mono/metadata/handle* @lambdageek @vargaz
-/src/mono/mono/metadata/monitor* @brzvlad @vargaz
-/src/mono/mono/metadata/sgen* @brzvlad @vargaz @lambdageek
+/src/mono/mono/metadata/handle* @lambdageek @steveisok
+/src/mono/mono/metadata/monitor* @brzvlad @steveisok
+/src/mono/mono/metadata/sgen* @brzvlad @lambdageek
 /src/mono/mono/metadata/thread* @lateralusX @lambdageek
 /src/mono/mono/metadata/w32* @lateralusX @lambdageek
 
 /src/mono/mono/eventpipe @lateralusX @lambdageek
 
-/src/mono/mono/mini @vargaz @lambdageek @SamMonoRT
-/src/mono/mono/mini/*cfgdump* @vargaz
-/src/mono/mono/mini/*exceptions* @vargaz @BrzVlad
-/src/mono/mono/mini/*llvm* @vargaz @fanyang-mono
-/src/mono/mono/mini/*ppc* @vargaz
+/src/mono/mono/mini @lambdageek @steveisok
+/src/mono/mono/mini/*cfgdump* @lambdageek
+/src/mono/mono/mini/*exceptions* @BrzVlad
+/src/mono/mono/mini/*llvm* @fanyang-mono @steveisok
+/src/mono/mono/mini/*ppc* @lambdageek
 /src/mono/mono/mini/*profiler* @BrzVlad @lambdageek
-/src/mono/mono/mini/*riscv* @vargaz @lambdageek
-/src/mono/mono/mini/*type-check* @lambdageek
-/src/mono/mono/mini/debugger-agent.c @vargaz @thaystg @lambdageek
-/src/mono/mono/mini/interp/* @BrzVlad @vargaz @kotlarmilos
+/src/mono/mono/mini/*riscv* @lambdageek @steveisok
+/src/mono/mono/mini/*type-check* @lambdageek @steveisok
+/src/mono/mono/mini/debugger-agent.c @thaystg @lambdageek
+/src/mono/mono/mini/interp/* @BrzVlad @kotlarmilos
 /src/mono/mono/mini/interp/*jiterp* @kg
 /src/mono/mono/mini/*simd* @fanyang-mono
 
 /src/mono/mono/profiler @BrzVlad @lambdageek
-/src/mono/mono/sgen @BrzVlad @lambdageek @SamMonoRT
+/src/mono/mono/sgen @BrzVlad @lambdageek
 
-/src/mono/mono/utils @vargaz @lambdageek
+/src/mono/mono/utils @lambdageek @steveisok
 /src/mono/mono/utils/*-win* @lateralusX @lambdageek
-/src/mono/mono/utils/atomic* @vargaz
-/src/mono/mono/utils/mono-hwcap* @vargaz
-/src/mono/mono/utils/mono-mem* @vargaz
-/src/mono/mono/utils/mono-threads* @lambdageek @vargaz
+/src/mono/mono/utils/atomic* @lambdageek @steveisok
+/src/mono/mono/utils/mono-hwcap* @lambdageek
+/src/mono/mono/utils/mono-mem* @lambdageek @steveisok
+/src/mono/mono/utils/mono-threads* @lambdageek
 
 /src/mono/dlls @thaystg @lambdageek
 
@@ -112,4 +112,4 @@
 # Area ownership and repo automation
 /docs/area-owners.*                                 @jeffhandley
 /docs/issue*.md                                     @jeffhandley
-/.github/fabricbot.json                             @jeffhandley
+/.github/policies/                                  @jeffhandley @mkArtakMSFT
diff --git a/.github/ISSUE_TEMPLATE/05_blank_issue.md b/.github/ISSUE_TEMPLATE/04_blank_issue.md
similarity index 100%
rename from .github/ISSUE_TEMPLATE/05_blank_issue.md
rename to .github/ISSUE_TEMPLATE/04_blank_issue.md
diff --git a/.github/ISSUE_TEMPLATE/04_ci_known_issue.yml b/.github/ISSUE_TEMPLATE/04_ci_known_issue.yml
deleted file mode 100644
index 17ec4e5e5ec9..000000000000
--- a/.github/ISSUE_TEMPLATE/04_ci_known_issue.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-name: CI Known Issue Report
-description: Create a known issue directly
-labels: ["blocking-clean-ci","Known Build Error"]
-body:
-  - type: markdown
-    attributes:
-      value: |
-        Use this template to report issues currently affecting PR stability, be it build or test failures.
-  - type: textarea
-    id: background
-    attributes:
-      label: Error Blob
-      description: Please identify a clear error string that can help identify future instances of this issue. For more information on how to fill this check our issue triage guidelines at [Failure Analysis](/dotnet/runtime/blob/main/docs/workflow/ci/failure-analysis.md#what-to-do-if-you-determine-the-failure-is-unrelated)
-      value: |
-        ```json
-        {
-          "ErrorMessage": "",
-          "BuildRetry": false,
-          "ErrorPattern": "",
-          "ExcludeConsoleLog": true
-        }
-        ```
-    validations:
-      required: true
-  - type: textarea
-    id: repro-steps
-    attributes:
-      label: Reproduction Steps
-      description: |
-        If possible describe where you observe the issue with links and any other relevant details.
-    validations:
-      required: false
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 54d8c5740bad..b14edd954ede 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -18,3 +18,6 @@ contact_links:
   - name: Issue with WPF
     url:  https://github.com/dotnet/wpf/issues/new/choose
     about: Please open issues relating to WPF in dotnet/wpf.
+  - name: CI Known Issue Report
+    url: https://helix.dot.net/BuildAnalysis/CreateKnownIssues
+    about: Use the helper to create a Known Issue in CI if failures in your runs are unrelated to your change. See [Failure Analysis](https://github.com/dotnet/runtime/blob/main/docs/workflow/ci/failure-analysis.md#what-to-do-if-you-determine-the-failure-is-unrelated) for triage instructions.
diff --git a/.github/fabricbot.json b/.github/fabricbot.json
deleted file mode 100644
index 175dcf4b416a..000000000000
--- a/.github/fabricbot.json
+++ /dev/null
@@ -1,2949 +0,0 @@
-[
-  {
-    "taskType": "scheduledAndTrigger",
-    "capabilityId": "IssueRouting",
-    "subCapability": "@Mention",
-    "version": "1.0",
-    "config": {
-      "taskName": "Area-owners",
-      "labelsAndMentions": [
-        {
-          "labels": [
-            "area-AssemblyLoader-coreclr"
-          ],
-          "mentionees": [
-            "vitek-karas",
-            "agocke",
-            "vsadov"
-          ]
-        },
-        {
-          "labels": [
-            "area-AssemblyLoader-mono"
-          ],
-          "mentionees": []
-        },
-        {
-          "labels": [
-            "area-CodeGen-coreclr"
-          ],
-          "mentionees": [
-            "JulieLeeMSFT",
-            "jakobbotsch"
-          ]
-        },
-        {
-          "labels": [
-            "area-Codegen-Interpreter-mono"
-          ],
-          "mentionees": [
-            "brzvlad",
-            "kotlarmilos"
-          ]
-        },
-        {
-          "labels": [
-            "area-Codegen-JIT-Mono"
-          ],
-          "mentionees": [
-            "SamMonoRT",
-            "vargaz"
-          ]
-        },
-        {
-          "labels": [
-            "area-CodeGen-LLVM-Mono"
-          ],
-          "mentionees": [
-            "SamMonoRT",
-            "vargaz"
-          ]
-        },
-        {
-          "labels": [
-            "area-Codegen-Intrinsics-mono"
-          ],
-          "mentionees": [
-            "SamMonoRT",
-            "fanyang-mono"
-          ]
-        },
-        {
-          "labels": [
-            "area-CodeGen-meta-Mono"
-          ],
-          "mentionees": [
-            "SamMonoRT",
-            "vargaz",
-            "lambdageek"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.DateTime"
-          ],
-          "mentionees": [
-            "dotnet/area-system-datetime"
-          ]
-        },
-        {
-          "labels": [
-            "area-Debugger-mono"
-          ],
-          "mentionees": [
-            "thaystg"
-          ]
-        },
-        {
-          "labels": [
-            "area-DependencyModel"
-          ],
-          "mentionees": [
-            "dotnet/area-dependencymodel"
-          ]
-        },
-        {
-          "labels": [
-            "area-Diagnostics-coreclr"
-          ],
-          "mentionees": [
-            "tommcdon"
-          ]
-        },
-        {
-          "labels": [
-            "area-Extensions-Caching"
-          ],
-          "mentionees": [
-            "dotnet/area-extensions-caching"
-          ]
-        },
-        {
-          "labels": [
-            "area-Extensions-Configuration"
-          ],
-          "mentionees": [
-            "dotnet/area-extensions-configuration"
-          ]
-        },
-        {
-          "labels": [
-            "area-Extensions-DependencyInjection"
-          ],
-          "mentionees": [
-            "dotnet/area-extensions-dependencyinjection"
-          ]
-        },
-        {
-          "labels": [
-            "area-Extensions-FileSystem"
-          ],
-          "mentionees": [
-            "dotnet/area-extensions-filesystem"
-          ]
-        },
-        {
-          "labels": [
-            "area-Extensions-Hosting"
-          ],
-          "mentionees": [
-            "dotnet/area-extensions-hosting"
-          ]
-        },
-        {
-          "labels": [
-            "area-Extensions-HttpClientFactory"
-          ],
-          "mentionees": [
-            "dotnet/ncl"
-          ]
-        },
-        {
-          "labels": [
-            "area-Extensions-Logging"
-          ],
-          "mentionees": [
-            "dotnet/area-extensions-logging"
-          ]
-        },
-        {
-          "labels": [
-            "area-Extensions-Options"
-          ],
-          "mentionees": [
-            "dotnet/area-extensions-options"
-          ]
-        },
-        {
-          "labels": [
-            "area-Extensions-Primitives"
-          ],
-          "mentionees": [
-            "dotnet/area-extensions-primitives"
-          ]
-        },
-        {
-          "labels": [
-            "area-GC-coreclr"
-          ],
-          "mentionees": [
-            "dotnet/gc"
-          ]
-        },
-        {
-          "labels": [
-            "area-GC-mono"
-          ],
-          "mentionees": [
-            "brzvlad"
-          ]
-        },
-        {
-          "labels": [
-            "area-Host"
-          ],
-          "mentionees": [
-            "vitek-karas",
-            "agocke",
-            "vsadov"
-          ]
-        },
-        {
-          "labels": [
-            "area-HostModel"
-          ],
-          "mentionees": [
-            "vitek-karas",
-            "agocke"
-          ]
-        },
-        {
-          "labels": [
-            "area-ILTools-coreclr"
-          ],
-          "mentionees": [
-            "JulieLeeMSFT"
-          ]
-        },
-        {
-          "labels": [
-            "area-Tools-ILVerification"
-          ],
-          "mentionees": [
-            "JulieLeeMSFT"
-          ]
-        },
-        {
-          "labels": [
-            "area-Infrastructure"
-          ],
-          "mentionees": [
-            "dotnet/runtime-infrastructure"
-          ]
-        },
-        {
-          "labels": [
-            "area-Infrastructure-coreclr"
-          ],
-          "mentionees": [
-            "hoyosjs"
-          ]
-        },
-        {
-          "labels": [
-            "area-Infrastructure-libraries"
-          ],
-          "mentionees": [
-            "dotnet/area-infrastructure-libraries"
-          ]
-        },
-        {
-          "labels": [
-            "area-Infrastructure-mono"
-          ],
-          "mentionees": [
-            "directhex"
-          ]
-        },
-        {
-          "labels": [
-            "area-Meta"
-          ],
-          "mentionees": [
-            "dotnet/area-meta"
-          ]
-        },
-        {
-          "labels": [
-            "area-Microsoft.CSharp"
-          ],
-          "mentionees": [
-            "cston"
-          ]
-        },
-        {
-          "labels": [
-            "area-Microsoft.Extensions"
-          ],
-          "mentionees": [
-            "dotnet/area-microsoft-extensions"
-          ]
-        },
-        {
-          "labels": [
-            "area-Microsoft.VisualBasic"
-          ],
-          "mentionees": [
-            "cston"
-          ]
-        },
-        {
-          "labels": [
-            "area-Microsoft.Win32"
-          ],
-          "mentionees": [
-            "dotnet/area-microsoft-win32"
-          ]
-        },
-        {
-          "labels": [
-            "area-NativeAOT-coreclr"
-          ],
-          "mentionees": [
-            "agocke",
-            "MichalStrehovsky",
-            "jkotas"
-          ]
-        },
-        {
-          "labels": [
-            "area-Single-File"
-          ],
-          "mentionees": [
-            "agocke",
-            "vitek-karas",
-            "vsadov"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Buffers"
-          ],
-          "mentionees": [
-            "dotnet/area-system-buffers"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.CodeDom"
-          ],
-          "mentionees": [
-            "dotnet/area-system-codedom"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Collections"
-          ],
-          "mentionees": [
-            "dotnet/area-system-collections"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.ComponentModel"
-          ],
-          "mentionees": [
-            "dotnet/area-system-componentmodel"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.ComponentModel.Composition"
-          ],
-          "mentionees": [
-            "dotnet/area-system-componentmodel-composition"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.ComponentModel.DataAnnotations"
-          ],
-          "mentionees": [
-            "dotnet/area-system-componentmodel-dataannotations"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Composition"
-          ],
-          "mentionees": [
-            "dotnet/area-system-composition"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Configuration"
-          ],
-          "mentionees": [
-            "dotnet/area-system-configuration"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Console"
-          ],
-          "mentionees": [
-            "dotnet/area-system-console"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Data"
-          ],
-          "mentionees": [
-            "roji",
-            "ajcvickers"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Data.Odbc"
-          ],
-          "mentionees": [
-            "roji",
-            "ajcvickers"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Data.OleDB"
-          ],
-          "mentionees": [
-            "roji",
-            "ajcvickers"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Data.SqlClient"
-          ],
-          "mentionees": [
-            "davoudeshtehari",
-            "david-engel",
-            "jrahnama"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Diagnostics"
-          ],
-          "mentionees": [
-            "tommcdon"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Diagnostics.Activity"
-          ],
-          "mentionees": [
-            "dotnet/area-system-diagnostics-activity"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Diagnostics.EventLog"
-          ],
-          "mentionees": [
-            "dotnet/area-system-diagnostics-eventlog"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Diagnostics.PerformanceCounter"
-          ],
-          "mentionees": [
-            "dotnet/area-system-diagnostics-performancecounter"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Diagnostics.Process"
-          ],
-          "mentionees": [
-            "dotnet/area-system-diagnostics-process"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Diagnostics.TraceSource"
-          ],
-          "mentionees": [
-            "dotnet/area-system-diagnostics-tracesource"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Diagnostics.Tracing"
-          ],
-          "mentionees": [
-            "tarekgh",
-            "tommcdon",
-            "pjanotti"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.DirectoryServices"
-          ],
-          "mentionees": [
-            "dotnet/area-system-directoryservices",
-            "jay98014"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Drawing"
-          ],
-          "mentionees": [
-            "dotnet/area-system-drawing"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Dynamic.Runtime"
-          ],
-          "mentionees": [
-            "cston"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Formats.Asn1"
-          ],
-          "mentionees": [
-            "dotnet/area-system-formats-asn1",
-            "bartonjs",
-            "vcsjones"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Formats.Cbor"
-          ],
-          "mentionees": [
-            "dotnet/area-system-formats-cbor",
-            "bartonjs",
-            "vcsjones"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Formats.Tar"
-          ],
-          "mentionees": [
-            "dotnet/area-system-formats-tar"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Globalization"
-          ],
-          "mentionees": [
-            "dotnet/area-system-globalization"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.IO"
-          ],
-          "mentionees": [
-            "dotnet/area-system-io"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.IO.Compression"
-          ],
-          "mentionees": [
-            "dotnet/area-system-io-compression"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.IO.Hashing"
-          ],
-          "mentionees": [
-            "dotnet/area-system-io-hashing",
-            "bartonjs",
-            "vcsjones"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.IO.Ports"
-          ],
-          "mentionees": [
-            "dotnet/area-system-io-ports"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Linq"
-          ],
-          "mentionees": [
-            "dotnet/area-system-linq"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Linq.Expressions"
-          ],
-          "mentionees": [
-            "cston"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Linq.Parallel"
-          ],
-          "mentionees": [
-            "dotnet/area-system-linq-parallel"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Management"
-          ],
-          "mentionees": [
-            "dotnet/area-system-management"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Memory"
-          ],
-          "mentionees": [
-            "dotnet/area-system-memory"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Net"
-          ],
-          "mentionees": [
-            "dotnet/ncl"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Net.Http"
-          ],
-          "mentionees": [
-            "dotnet/ncl"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Net.Quic"
-          ],
-          "mentionees": [
-            "dotnet/ncl"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Net.Security"
-          ],
-          "mentionees": [
-            "dotnet/ncl",
-            "bartonjs",
-            "vcsjones"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Net.Sockets"
-          ],
-          "mentionees": [
-            "dotnet/ncl"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Numerics"
-          ],
-          "mentionees": [
-            "dotnet/area-system-numerics"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Numerics.Tensors"
-          ],
-          "mentionees": [
-            "dotnet/area-system-numerics-tensors"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Reflection"
-          ],
-          "mentionees": [
-            "dotnet/area-system-reflection"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Reflection.Emit"
-          ],
-          "mentionees": [
-            "dotnet/area-system-reflection-emit"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Reflection.Metadata"
-          ],
-          "mentionees": [
-            "dotnet/area-system-reflection-metadata"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Resources"
-          ],
-          "mentionees": [
-            "dotnet/area-system-resources"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Runtime"
-          ],
-          "mentionees": [
-            "dotnet/area-system-runtime"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Runtime.CompilerServices"
-          ],
-          "mentionees": [
-            "dotnet/area-system-runtime-compilerservices"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Runtime.InteropServices"
-          ],
-          "mentionees": [
-            "dotnet/interop-contrib"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Runtime.Intrinsics"
-          ],
-          "mentionees": [
-            "dotnet/area-system-runtime-intrinsics"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Security"
-          ],
-          "mentionees": [
-            "dotnet/area-system-security",
-            "bartonjs",
-            "vcsjones"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.ServiceProcess"
-          ],
-          "mentionees": [
-            "dotnet/area-system-serviceprocess"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Speech"
-          ],
-          "mentionees": [
-            "dotnet/area-system-speech"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Text.Encoding"
-          ],
-          "mentionees": [
-            "dotnet/area-system-text-encoding"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Text.Encodings.Web"
-          ],
-          "mentionees": [
-            "dotnet/area-system-text-encodings-web"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Text.Json"
-          ],
-          "mentionees": [
-            "dotnet/area-system-text-json",
-            "gregsdennis"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Text.RegularExpressions"
-          ],
-          "mentionees": [
-            "dotnet/area-system-text-regularexpressions"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Threading"
-          ],
-          "mentionees": [
-            "mangod9"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Threading.Channels"
-          ],
-          "mentionees": [
-            "dotnet/area-system-threading-channels"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Threading.Tasks"
-          ],
-          "mentionees": [
-            "dotnet/area-system-threading-tasks"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Transactions"
-          ],
-          "mentionees": [
-            "roji",
-            "ajcvickers"
-          ]
-        },
-        {
-          "labels": [
-            "area-System.Xml"
-          ],
-          "mentionees": [
-            "dotnet/area-system-xml"
-          ]
-        },
-        {
-          "labels": [
-            "area-Tools-ILLink"
-          ],
-          "mentionees": [
-            "agocke",
-            "sbomer",
-            "vitek-karas"
-          ]
-        },
-        {
-          "labels": [
-            "area-vm-coreclr"
-          ],
-          "mentionees": [
-            "mangod9"
-          ]
-        }
-      ],
-      "replyTemplate": "Tagging subscribers to this area: ${mentionees}\nSee info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.",
-      "enableForPullRequests": true
-    },
-    "disabled": false
-  },
-  {
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "IssuesOnlyResponder",
-    "version": "1.0",
-    "config": {
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "labelAdded",
-            "parameters": {
-              "label": "breaking-change"
-            }
-          }
-        ]
-      },
-      "eventType": "issue",
-      "eventNames": [
-        "issues",
-        "project_card"
-      ],
-      "actions": [
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "needs-breaking-change-doc-created"
-          }
-        },
-        {
-          "name": "addReply",
-          "parameters": {
-            "comment": "Added `needs-breaking-change-doc-created` label because this issue has the `breaking-change` label. \n\n1. [ ] Create and link to this issue a matching issue in the dotnet/docs repo using the [breaking change documentation template](https://aka.ms/dotnet/docs/new-breaking-change-issue), then remove this `needs-breaking-change-doc-created` label.\n\nTagging @dotnet/compat for awareness of the breaking change."
-          }
-        }
-      ],
-      "taskName": "Add breaking change doc label to issue"
-    },
-    "disabled": false
-  },
-  {
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestResponder",
-    "version": "1.0",
-    "config": {
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "labelAdded",
-            "parameters": {
-              "label": "breaking-change"
-            }
-          }
-        ]
-      },
-      "eventType": "pull_request",
-      "eventNames": [
-        "pull_request",
-        "issues",
-        "project_card"
-      ],
-      "actions": [
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "needs-breaking-change-doc-created"
-          }
-        },
-        {
-          "name": "addReply",
-          "parameters": {
-            "comment": "Added `needs-breaking-change-doc-created` label because this PR has the `breaking-change` label. \n\nWhen you commit this breaking change:\n\n1. [ ] Create and link to this PR and the issue a matching issue in the dotnet/docs repo using the [breaking change documentation template](https://aka.ms/dotnet/docs/new-breaking-change-issue), then remove this `needs-breaking-change-doc-created` label.\n2. [ ] Ask a committer to mail the `.NET Breaking Change Notification` DL.\n\nTagging @dotnet/compat for awareness of the breaking change."
-          }
-        }
-      ],
-      "taskName": "Add breaking change doc label to PR"
-    },
-    "disabled": false
-  },
-  {
-    "taskType": "scheduledAndTrigger",
-    "capabilityId": "IssueRouting",
-    "subCapability": "@Mention",
-    "version": "1.0",
-    "config": {
-      "taskName": "@Mention for linkable-framework",
-      "labelsAndMentions": [
-        {
-          "labels": [
-            "linkable-framework"
-          ],
-          "mentionees": [
-            "eerhardt",
-            "vitek-karas",
-            "LakshanF",
-            "sbomer",
-            "joperezr",
-            "marek-safar"
-          ]
-        }
-      ],
-      "replyTemplate": "Tagging subscribers to 'linkable-framework': ${mentionees}\nSee info in area-owners.md if you want to be subscribed.",
-      "enableForPullRequests": true
-    }
-  },
-  {
-    "taskType": "scheduledAndTrigger",
-    "capabilityId": "IssueRouting",
-    "subCapability": "@Mention",
-    "version": "1.0",
-    "config": {
-      "taskName": "@Mention for size-reduction",
-      "replyTemplate": "Tagging subscribers to 'size-reduction': ${mentionees}\nSee info in area-owners.md if you want to be subscribed.",
-      "labelsAndMentions": [
-        {
-          "labels": [
-            "size-reduction"
-          ],
-          "mentionees": [
-            "eerhardt",
-            "SamMonoRT",
-            "marek-safar"
-          ]
-        }
-      ],
-      "enableForPullRequests": true
-    }
-  },
-  {
-    "taskType": "scheduledAndTrigger",
-    "capabilityId": "IssueRouting",
-    "subCapability": "@Mention",
-    "version": "1.0",
-    "config": {
-      "taskName": "@Mention for wasm",
-      "labelsAndMentions": [
-        {
-          "labels": [
-            "arch-wasm"
-          ],
-          "mentionees": [
-            "lewing"
-          ]
-        }
-      ],
-      "replyTemplate": "Tagging subscribers to 'arch-wasm': ${mentionees}\nSee info in area-owners.md if you want to be subscribed.",
-      "enableForPullRequests": true
-    }
-  },
-  {
-    "taskType": "scheduledAndTrigger",
-    "capabilityId": "IssueRouting",
-    "subCapability": "@Mention",
-    "version": "1.0",
-    "config": {
-      "taskName": "@Mention for ios",
-      "labelsAndMentions": [
-        {
-          "labels": [
-            "os-ios"
-          ],
-          "mentionees": [
-            "steveisok",
-            "akoeplinger",
-            "kotlarmilos"
-          ]
-        }
-      ],
-      "enableForPullRequests": true,
-      "replyTemplate": "Tagging subscribers to 'os-ios': ${mentionees}\nSee info in area-owners.md if you want to be subscribed."
-    }
-  },
-  {
-    "taskType": "scheduledAndTrigger",
-    "capabilityId": "IssueRouting",
-    "subCapability": "@Mention",
-    "version": "1.0",
-    "config": {
-      "taskName": "@Mention for android",
-      "labelsAndMentions": [
-        {
-          "labels": [
-            "os-android"
-          ],
-          "mentionees": [
-            "steveisok",
-            "akoeplinger"
-          ]
-        }
-      ],
-      "enableForPullRequests": true,
-      "replyTemplate": "Tagging subscribers to 'arch-android': ${mentionees}\nSee info in area-owners.md if you want to be subscribed."
-    }
-  },
-  {
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestResponder",
-    "version": "1.0",
-    "config": {
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "operator": "or",
-            "operands": [
-              {
-                "name": "prMatchesPattern",
-                "parameters": {
-                  "matchRegex": ".*ILLink.*"
-                }
-              },
-              {
-                "name": "prMatchesPattern",
-                "parameters": {
-                  "matchRegex": ".*illink.*"
-                }
-              }
-            ]
-          },
-          {
-            "operator": "not",
-            "operands": [
-              {
-                "name": "hasLabel",
-                "parameters": {
-                  "label": "linkable-framework"
-                }
-              }
-            ]
-          },
-          {
-            "name": "isOpen",
-            "parameters": {}
-          }
-        ]
-      },
-      "eventType": "pull_request",
-      "eventNames": [
-        "pull_request",
-        "issues",
-        "project_card"
-      ],
-      "taskName": "[Linkable-framework workgroup] Add linkable-framework label to new Prs that touch files with *ILLink* that not have it already",
-      "actions": [
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "linkable-framework"
-          }
-        }
-      ]
-    }
-  },
-  {
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestResponder",
-    "version": "1.0",
-    "config": {
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "operator": "or",
-            "operands": [
-              {
-                "name": "prMatchesPattern",
-                "parameters": {
-                  "matchRegex": ".*ILLink.*"
-                }
-              },
-              {
-                "name": "prMatchesPattern",
-                "parameters": {
-                  "matchRegex": ".*illink.*"
-                }
-              }
-            ]
-          },
-          {
-            "operator": "not",
-            "operands": [
-              {
-                "name": "hasLabel",
-                "parameters": {
-                  "label": "linkable-framework"
-                }
-              }
-            ]
-          },
-          {
-            "name": "isOpen",
-            "parameters": {}
-          },
-          {
-            "name": "isAction",
-            "parameters": {
-              "action": "synchronize"
-            }
-          }
-        ]
-      },
-      "eventType": "pull_request",
-      "eventNames": [
-        "pull_request",
-        "issues",
-        "project_card"
-      ],
-      "taskName": "[Linkable-framework workgroup] Add linkable-framework label to Prs that get changes pushed where they touch *ILLInk* files",
-      "actions": [
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "linkable-framework"
-          }
-        }
-      ]
-    }
-  },
-  {
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestResponder",
-    "version": "1.0",
-    "config": {
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "isActivitySender",
-            "parameters": {
-              "user": "dotnet-maestro[bot]"
-            }
-          },
-          {
-            "name": "isAction",
-            "parameters": {
-              "action": "opened"
-            }
-          },
-          {
-            "name": "titleContains",
-            "parameters": {
-              "titlePattern": "dotnet-optimization"
-            }
-          }
-        ]
-      },
-      "eventType": "pull_request",
-      "eventNames": [
-        "pull_request",
-        "issues",
-        "project_card"
-      ],
-      "taskName": "Auto-approve maestro PRs",
-      "actions": [
-        {
-          "name": "approvePullRequest",
-          "parameters": {
-            "comment": "Auto-approve dotnet-optimization PR"
-          }
-        }
-      ]
-    },
-    "disabled": true
-  },
-  {
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "IssuesOnlyResponder",
-    "version": "1.0",
-    "config": {
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "labelAdded",
-            "parameters": {
-              "label": "backlog-cleanup-candidate"
-            }
-          }
-        ]
-      },
-      "eventType": "issue",
-      "eventNames": [
-        "issues",
-        "project_card"
-      ],
-      "taskName": "Manual Issue Cleanup",
-      "actions": [
-        {
-          "name": "addReply",
-          "parameters": {
-            "comment": "Due to lack of recent activity, this issue has been marked as a candidate for backlog cleanup.  It will be closed if no further activity occurs within 14 more days. Any new comment (by anyone, not necessarily the author) will undo this process.\n\nThis process is part of our [issue cleanup automation](https://github.com/dotnet/runtime/blob/main/docs/issue-cleanup.md)."
-          }
-        },
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        }
-      ]
-    }
-  },
-  {
-    "taskType": "scheduled",
-    "capabilityId": "ScheduledSearch",
-    "subCapability": "ScheduledSearch",
-    "version": "1.1",
-    "config": {
-      "frequency": [
-        {
-          "weekDay": 0,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 1,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 2,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 3,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 4,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 5,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 6,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        }
-      ],
-      "searchTerms": [
-        {
-          "name": "noActivitySince",
-          "parameters": {
-            "days": 1644
-          }
-        },
-        {
-          "name": "isIssue",
-          "parameters": {}
-        },
-        {
-          "name": "isOpen",
-          "parameters": {}
-        },
-        {
-          "name": "noLabel",
-          "parameters": {
-            "label": "backlog-cleanup-candidate"
-          }
-        }
-      ],
-      "taskName": "Automated Issue cleanup",
-      "actions": [
-        {
-          "name": "addReply",
-          "parameters": {
-            "comment": "Due to lack of recent activity, this issue has been marked as a candidate for backlog cleanup.  It will be closed if no further activity occurs within 14 more days. Any new comment (by anyone, not necessarily the author) will undo this process.\n\nThis process is part of our [issue cleanup automation](https://github.com/dotnet/runtime/blob/main/docs/issue-cleanup.md)."
-          }
-        },
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "backlog-cleanup-candidate"
-          }
-        },
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        }
-      ]
-    }
-  },
-  {
-    "taskType": "scheduledAndTrigger",
-    "capabilityId": "IssueRouting",
-    "subCapability": "@Mention",
-    "version": "1.0",
-    "config": {
-      "taskName": "@Mention for tvos",
-      "labelsAndMentions": [
-        {
-          "labels": [
-            "os-tvos"
-          ],
-          "mentionees": [
-            "steveisok",
-            "akoeplinger"
-          ]
-        }
-      ],
-      "enableForPullRequests": true,
-      "replyTemplate": "Tagging subscribers to 'os-tvos': ${mentionees}\nSee info in area-owners.md if you want to be subscribed."
-    }
-  },
-  {
-    "taskType": "scheduledAndTrigger",
-    "capabilityId": "IssueRouting",
-    "subCapability": "@Mention",
-    "version": "1.0",
-    "config": {
-      "labelsAndMentions": [
-        {
-          "labels": [
-            "os-maccatalyst"
-          ],
-          "mentionees": [
-            "steveisok",
-            "akoeplinger"
-          ]
-        }
-      ],
-      "replyTemplate": "Tagging subscribers to 'os-maccatalyst': ${mentionees}\nSee info in area-owners.md if you want to be subscribed.",
-      "enableForPullRequests": true,
-      "taskName": "@Mention for maccatalyst"
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "IssuesOnlyResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Add untriaged label to new/reopened issues without a milestone",
-      "actions": [
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "untriaged"
-          }
-        }
-      ],
-      "eventType": "issue",
-      "eventNames": [
-        "issues"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "operator": "or",
-            "operands": [
-              {
-                "name": "isAction",
-                "parameters": {
-                  "action": "opened"
-                }
-              },
-              {
-                "name": "isAction",
-                "parameters": {
-                  "action": "reopened"
-                }
-              },
-              {
-                "name": "removedFromMilestone",
-                "parameters": {}
-              }
-            ]
-          },
-          {
-            "name": "isOpen",
-            "parameters": {}
-          },
-          {
-            "operator": "not",
-            "operands": [
-              {
-                "name": "isInMilestone",
-                "parameters": {}
-              }
-            ]
-          },
-          {
-            "operator": "not",
-            "operands": [
-              {
-                "name": "hasLabel",
-                "parameters": {
-                  "label": "untriaged"
-                }
-              }
-            ]
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "IssuesOnlyResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Remove untriaged label from issues when closed or added to a milestone",
-      "actions": [
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "untriaged"
-          }
-        }
-      ],
-      "eventType": "issue",
-      "eventNames": [
-        "issues"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "operator": "or",
-            "operands": [
-              {
-                "name": "isAction",
-                "parameters": {
-                  "action": "closed"
-                }
-              },
-              {
-                "name": "addedToMilestone",
-                "parameters": {}
-              }
-            ]
-          },
-          {
-            "name": "hasLabel",
-            "parameters": {
-              "label": "untriaged"
-            }
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "InPrLabel",
-    "subCapability": "InPrLabel",
-    "version": "1.0",
-    "config": {
-      "taskName": "Add `in-pr` label on issue when an open pull request is targeting it",
-      "inPrLabelText": "There is an active PR which will close this issue when it is merged",
-      "fixedLabelEnabled": false,
-      "label_inPr": "in-pr"
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Assign Team PRs to author",
-      "actions": [
-        {
-          "name": "assignToUser",
-          "parameters": {
-            "user": {
-              "type": "prAuthor"
-            }
-          }
-        }
-      ],
-      "eventType": "pull_request",
-      "eventNames": [
-        "pull_request"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "isAction",
-            "parameters": {
-              "action": "opened"
-            }
-          },
-          {
-            "operator": "not",
-            "operands": [
-              {
-                "name": "activitySenderHasPermissions",
-                "parameters": {
-                  "permissions": "read"
-                }
-              }
-            ]
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Label community PRs",
-      "actions": [
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "community-contribution"
-          }
-        }
-      ],
-      "eventType": "pull_request",
-      "eventNames": [
-        "pull_request"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "isAction",
-            "parameters": {
-              "action": "opened"
-            }
-          },
-          {
-            "operator": "and",
-            "operands": [
-              {
-                "operator": "not",
-                "operands": [
-                  {
-                    "name": "activitySenderHasPermissions",
-                    "parameters": {
-                      "permissions": "admin"
-                    }
-                  }
-                ]
-              },
-              {
-                "operator": "not",
-                "operands": [
-                  {
-                    "name": "activitySenderHasPermissions",
-                    "parameters": {
-                      "permissions": "maintain"
-                    }
-                  }
-                ]
-              },
-              {
-                "operator": "not",
-                "operands": [
-                  {
-                    "name": "activitySenderHasPermissions",
-                    "parameters": {
-                      "permissions": "write"
-                    }
-                  }
-                ]
-              },
-              {
-                "operator": "not",
-                "operands": [
-                  {
-                    "name": "isActivitySender",
-                    "parameters": {
-                      "user": "github-actions[bot]"
-                    }
-                  }
-                ]
-              },
-              {
-                "operator": "not",
-                "operands": [
-                  {
-                    "name": "isActivitySender",
-                    "parameters": {
-                      "user": "dotnet-maestro[bot]"
-                    }
-                  }
-                ]
-              },
-              {
-                "operator": "not",
-                "operands": [
-                  {
-                    "name": "isActivitySender",
-                    "parameters": {
-                      "user": "dotnet-maestro-bot[bot]"
-                    }
-                  }
-                ]
-              },
-              {
-                "operator": "not",
-                "operands": [
-                  {
-                    "name": "isActivitySender",
-                    "parameters": {
-                      "user": "dotnet-maestro-bot"
-                    }
-                  }
-                ]
-              },
-              {
-                "operator": "not",
-                "operands": [
-                  {
-                    "name": "isActivitySender",
-                    "parameters": {
-                      "user": "dotnet-maestro"
-                    }
-                  }
-                ]
-              },
-              {
-                "operator": "not",
-                "operands": [
-                  {
-                    "name": "isActivitySender",
-                    "parameters": {
-                      "user": "github-actions"
-                    }
-                  }
-                ]
-              }
-            ]
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "IssuesOnlyResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Needs-author-action notification",
-      "actions": [
-        {
-          "name": "addReply",
-          "parameters": {
-            "comment": "This issue has been marked `needs-author-action` and may be missing some important information."
-          }
-        }
-      ],
-      "eventType": "issue",
-      "eventNames": [
-        "issues"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "labelAdded",
-            "parameters": {
-              "label": "needs-author-action"
-            }
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestReviewResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "PR reviews with \"changes requested\" applies the needs-author-action label",
-      "actions": [
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "needs-author-action"
-          }
-        }
-      ],
-      "eventType": "pull_request",
-      "eventNames": [
-        "pull_request_review"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "operator": "not",
-            "operands": [
-              {
-                "name": "activitySenderHasPermissions",
-                "parameters": {
-                  "state": "changes_requested",
-                  "permissions": "read"
-                }
-              }
-            ]
-          },
-          {
-            "name": "isAction",
-            "parameters": {
-              "action": "submitted"
-            }
-          },
-          {
-            "name": "isReviewState",
-            "parameters": {
-              "state": "changes_requested"
-            }
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "IssueCommentResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Replace `needs-author-action` label with `needs-further-triage` label when the author comments on an issue that is not still untriaged",
-      "actions": [
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "needs-further-triage"
-          }
-        },
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "needs-author-action"
-          }
-        }
-      ],
-      "eventType": "issue",
-      "eventNames": [
-        "issue_comment"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "isAction",
-            "parameters": {
-              "action": "created"
-            }
-          },
-          {
-            "name": "isActivitySender",
-            "parameters": {
-              "user": {
-                "type": "author"
-              }
-            }
-          },
-          {
-            "name": "hasLabel",
-            "parameters": {
-              "label": "needs-author-action"
-            }
-          },
-          {
-            "operator": "not",
-            "operands": [
-              {
-                "name": "hasLabel",
-                "parameters": {
-                  "label": "untriaged"
-                }
-              }
-            ]
-          },
-          {
-            "name": "isOpen",
-            "parameters": {}
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "IssueCommentResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Remove `needs-author-action` label when the author comments on an `untriaged` issue",
-      "actions": [
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "needs-author-action"
-          }
-        }
-      ],
-      "eventType": "issue",
-      "eventNames": [
-        "issue_comment"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "isAction",
-            "parameters": {
-              "action": "created"
-            }
-          },
-          {
-            "name": "isActivitySender",
-            "parameters": {
-              "user": {
-                "type": "author"
-              }
-            }
-          },
-          {
-            "name": "hasLabel",
-            "parameters": {
-              "label": "needs-author-action"
-            }
-          },
-          {
-            "name": "hasLabel",
-            "parameters": {
-              "label": "untriaged"
-            }
-          },
-          {
-            "name": "isOpen",
-            "parameters": {}
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Pushing changes to PR branch removes the needs-author-action label",
-      "actions": [
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "needs-author-action"
-          }
-        }
-      ],
-      "eventType": "pull_request",
-      "eventNames": [
-        "pull_request"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "isAction",
-            "parameters": {
-              "action": "synchronize"
-            }
-          },
-          {
-            "name": "hasLabel",
-            "parameters": {
-              "label": "needs-author-action"
-            }
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestCommentResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Author commenting in PR removes the needs-author-action label",
-      "actions": [
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "needs-author-action"
-          }
-        }
-      ],
-      "eventType": "pull_request",
-      "eventNames": [
-        "issue_comment"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "isActivitySender",
-            "parameters": {
-              "user": {
-                "type": "author"
-              }
-            }
-          },
-          {
-            "name": "isAction",
-            "parameters": {
-              "action": "created"
-            }
-          },
-          {
-            "name": "hasLabel",
-            "parameters": {
-              "label": "needs-author-action"
-            }
-          },
-          {
-            "name": "isOpen",
-            "parameters": {}
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestReviewResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Author responding to a pull request review comment removes the needs-author-action label",
-      "actions": [
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "needs-author-action"
-          }
-        }
-      ],
-      "eventType": "pull_request",
-      "eventNames": [
-        "pull_request_review"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "isActivitySender",
-            "parameters": {
-              "user": {
-                "type": "author"
-              }
-            }
-          },
-          {
-            "name": "hasLabel",
-            "parameters": {
-              "label": "needs-author-action"
-            }
-          },
-          {
-            "name": "isAction",
-            "parameters": {
-              "action": "submitted"
-            }
-          },
-          {
-            "name": "isOpen",
-            "parameters": {}
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "scheduled",
-    "capabilityId": "ScheduledSearch",
-    "subCapability": "ScheduledSearch",
-    "version": "1.1",
-    "config": {
-      "taskName": "Add no-recent-activity label to issues",
-      "actions": [
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        },
-        {
-          "name": "addReply",
-          "parameters": {
-            "comment": "This issue has been automatically marked `no-recent-activity` because it has not had any activity for 14 days. It will be closed if no further activity occurs within 14 more days. Any new comment (by anyone, not necessarily the author) will remove `no-recent-activity`."
-          }
-        }
-      ],
-      "frequency": [
-        {
-          "weekDay": 0,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        },
-        {
-          "weekDay": 1,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        },
-        {
-          "weekDay": 2,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        },
-        {
-          "weekDay": 3,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        },
-        {
-          "weekDay": 4,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        },
-        {
-          "weekDay": 5,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        },
-        {
-          "weekDay": 6,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        }
-      ],
-      "searchTerms": [
-        {
-          "name": "isIssue",
-          "parameters": {}
-        },
-        {
-          "name": "isOpen",
-          "parameters": {}
-        },
-        {
-          "name": "hasLabel",
-          "parameters": {
-            "label": "needs-author-action"
-          }
-        },
-        {
-          "name": "noActivitySince",
-          "parameters": {
-            "days": 14
-          }
-        },
-        {
-          "name": "noLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        }
-      ]
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "scheduled",
-    "capabilityId": "ScheduledSearch",
-    "subCapability": "ScheduledSearch",
-    "version": "1.1",
-    "config": {
-      "taskName": "Add no-recent-activity label to PRs",
-      "actions": [
-        {
-          "name": "addLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        },
-        {
-          "name": "addReply",
-          "parameters": {
-            "comment": "This pull request has been automatically marked `no-recent-activity` because it has not had any activity for 14 days. It will be closed if no further activity occurs within 14 more days. Any new comment (by anyone, not necessarily the author) will remove `no-recent-activity`."
-          }
-        }
-      ],
-      "frequency": [
-        {
-          "weekDay": 0,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        },
-        {
-          "weekDay": 1,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        },
-        {
-          "weekDay": 2,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        },
-        {
-          "weekDay": 3,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        },
-        {
-          "weekDay": 4,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        },
-        {
-          "weekDay": 5,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        },
-        {
-          "weekDay": 6,
-          "hours": [
-            4,
-            10,
-            16,
-            22
-          ],
-          "timezoneOffset": 1
-        }
-      ],
-      "searchTerms": [
-        {
-          "name": "isPr",
-          "parameters": {}
-        },
-        {
-          "name": "isOpen",
-          "parameters": {}
-        },
-        {
-          "name": "hasLabel",
-          "parameters": {
-            "label": "needs-author-action"
-          }
-        },
-        {
-          "name": "noActivitySince",
-          "parameters": {
-            "days": 14
-          }
-        },
-        {
-          "name": "noLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        }
-      ]
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "IssuesOnlyResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Remove `no-recent-activity` label from issues when issue is modified",
-      "actions": [
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        },
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "backlog-cleanup-candidate"
-          }
-        }
-      ],
-      "eventType": "issue",
-      "eventNames": [
-        "issues"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "operator": "not",
-            "operands": [
-              {
-                "name": "isAction",
-                "parameters": {
-                  "action": "closed"
-                }
-              }
-            ]
-          },
-          {
-            "name": "hasLabel",
-            "parameters": {
-              "label": "no-recent-activity"
-            }
-          },
-          {
-            "operator": "not",
-            "operands": [
-              {
-                "name": "labelAdded",
-                "parameters": {
-                  "label": "no-recent-activity"
-                }
-              }
-            ]
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "IssueCommentResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Remove `no-recent-activity` label when an issue is commented on",
-      "actions": [
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        },
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "backlog-cleanup-candidate"
-          }
-        }
-      ],
-      "eventType": "issue",
-      "eventNames": [
-        "issue_comment"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "hasLabel",
-            "parameters": {
-              "label": "no-recent-activity"
-            }
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Remove `no-recent-activity` label from PRs when modified",
-      "actions": [
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        },
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "backlog-cleanup-candidate"
-          }
-        }
-      ],
-      "eventType": "pull_request",
-      "eventNames": [
-        "pull_request"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "isOpen",
-            "parameters": {}
-          },
-          {
-            "name": "hasLabel",
-            "parameters": {
-              "label": "no-recent-activity"
-            }
-          },
-          {
-            "operator": "not",
-            "operands": [
-              {
-                "name": "labelAdded",
-                "parameters": {
-                  "label": "no-recent-activity"
-                }
-              }
-            ]
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestCommentResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Remove `no-recent-activity` label from PRs when commented on",
-      "actions": [
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        },
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "backlog-cleanup-candidate"
-          }
-        }
-      ],
-      "eventType": "pull_request",
-      "eventNames": [
-        "issue_comment"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "hasLabel",
-            "parameters": {
-              "label": "no-recent-activity"
-            }
-          },
-          {
-            "name": "isOpen",
-            "parameters": {}
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "trigger",
-    "capabilityId": "IssueResponder",
-    "subCapability": "PullRequestReviewResponder",
-    "version": "1.0",
-    "config": {
-      "taskName": "Remove `no-recent-activity` label from PRs when new review is added",
-      "actions": [
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        },
-        {
-          "name": "removeLabel",
-          "parameters": {
-            "label": "backlog-cleanup-candidate"
-          }
-        }
-      ],
-      "eventType": "pull_request",
-      "eventNames": [
-        "pull_request_review"
-      ],
-      "conditions": {
-        "operator": "and",
-        "operands": [
-          {
-            "name": "hasLabel",
-            "parameters": {
-              "label": "no-recent-activity"
-            }
-          },
-          {
-            "name": "isOpen",
-            "parameters": {}
-          }
-        ]
-      }
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "scheduled",
-    "capabilityId": "ScheduledSearch",
-    "subCapability": "ScheduledSearch",
-    "version": "1.1",
-    "config": {
-      "taskName": "Close issues with no recent activity",
-      "actions": [
-        {
-          "name": "addReply",
-          "parameters": {
-            "comment": "This issue will now be closed since it had been marked `no-recent-activity` but received no further activity in the past 14 days. It is still possible to reopen or comment on the issue, but please note that the issue will be locked if it remains inactive for another 30 days."
-          }
-        },
-        {
-          "name": "closeIssue",
-          "parameters": {}
-        }
-      ],
-      "frequency": [
-        {
-          "weekDay": 0,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 1,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 2,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 3,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 4,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 5,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 6,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        }
-      ],
-      "searchTerms": [
-        {
-          "name": "isIssue",
-          "parameters": {}
-        },
-        {
-          "name": "isOpen",
-          "parameters": {}
-        },
-        {
-          "name": "hasLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        },
-        {
-          "name": "noActivitySince",
-          "parameters": {
-            "days": 14
-          }
-        }
-      ]
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "scheduled",
-    "capabilityId": "ScheduledSearch",
-    "subCapability": "ScheduledSearch",
-    "version": "1.1",
-    "config": {
-      "taskName": "Close PRs with no-recent-activity",
-      "actions": [
-        {
-          "name": "addReply",
-          "parameters": {
-            "comment": "This pull request will now be closed since it had been marked `no-recent-activity` but received no further activity in the past 14 days. It is still possible to reopen or comment on the pull request, but please note that it will be locked if it remains inactive for another 30 days."
-          }
-        },
-        {
-          "name": "closeIssue",
-          "parameters": {}
-        }
-      ],
-      "frequency": [
-        {
-          "weekDay": 0,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 1,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 2,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 3,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 4,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 5,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 6,
-          "hours": [
-            0,
-            6,
-            12,
-            18
-          ],
-          "timezoneOffset": 0
-        }
-      ],
-      "searchTerms": [
-        {
-          "name": "isPr",
-          "parameters": {}
-        },
-        {
-          "name": "isOpen",
-          "parameters": {}
-        },
-        {
-          "name": "hasLabel",
-          "parameters": {
-            "label": "no-recent-activity"
-          }
-        },
-        {
-          "name": "noActivitySince",
-          "parameters": {
-            "days": 14
-          }
-        }
-      ]
-    }
-  },
-  {
-    "taskSource": "fabricbot-config",
-    "taskType": "scheduled",
-    "capabilityId": "ScheduledSearch",
-    "subCapability": "ScheduledSearch",
-    "version": "1.1",
-    "config": {
-      "taskName": "Close inactive Draft PRs",
-      "actions": [
-        {
-          "name": "closeIssue",
-          "parameters": {}
-        },
-        {
-          "name": "addReply",
-          "parameters": {
-            "comment": "Draft Pull Request was automatically closed for 30 days of inactivity. Please [let us know](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you'd like to reopen it."
-          }
-        }
-      ],
-      "frequency": [
-        {
-          "weekDay": 0,
-          "hours": [
-            5,
-            11,
-            17,
-            23
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 1,
-          "hours": [
-            5,
-            11,
-            17,
-            23
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 2,
-          "hours": [
-            5,
-            11,
-            17,
-            23
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 3,
-          "hours": [
-            5,
-            11,
-            17,
-            23
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 4,
-          "hours": [
-            5,
-            11,
-            17,
-            23
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 5,
-          "hours": [
-            5,
-            11,
-            17,
-            23
-          ],
-          "timezoneOffset": 0
-        },
-        {
-          "weekDay": 6,
-          "hours": [
-            5,
-            11,
-            17,
-            23
-          ],
-          "timezoneOffset": 0
-        }
-      ],
-      "searchTerms": [
-        {
-          "name": "isDraftPr",
-          "parameters": {
-            "value": "true"
-          }
-        },
-        {
-          "name": "isOpen",
-          "parameters": {}
-        },
-        {
-          "name": "noActivitySince",
-          "parameters": {
-            "days": 30
-          }
-        }
-      ]
-    }
-  }
-]
\ No newline at end of file
diff --git a/.github/policies/resourceManagement.yml b/.github/policies/resourceManagement.yml
new file mode 100644
index 000000000000..bbfc4c3acb82
--- /dev/null
+++ b/.github/policies/resourceManagement.yml
@@ -0,0 +1,1881 @@
+id: 
+name: GitOps.PullRequestIssueManagement
+description: GitOps.PullRequestIssueManagement primitive
+owner: 
+resource: repository
+disabled: false
+where: 
+configuration:
+  resourceManagementConfiguration:
+    scheduledSearches:
+    - description: Automated Issue cleanup
+      frequencies:
+      - hourly:
+          hour: 6
+      filters:
+      - noActivitySince:
+          days: 1644
+      - isIssue
+      - isOpen
+      - isNotLabeledWith:
+          label: backlog-cleanup-candidate
+      actions:
+      - addReply:
+          reply: >-
+            Due to lack of recent activity, this issue has been marked as a candidate for backlog cleanup.  It will be closed if no further activity occurs within 14 more days. Any new comment (by anyone, not necessarily the author) will undo this process.
+
+
+            This process is part of our [issue cleanup automation](https://github.com/dotnet/runtime/blob/main/docs/issue-cleanup.md).
+      - addLabel:
+          label: backlog-cleanup-candidate
+      - addLabel:
+          label: no-recent-activity
+    - description: Add no-recent-activity label to issues
+      frequencies:
+      - hourly:
+          hour: 6
+      filters:
+      - isIssue
+      - isOpen
+      - hasLabel:
+          label: needs-author-action
+      - noActivitySince:
+          days: 14
+      - isNotLabeledWith:
+          label: no-recent-activity
+      actions:
+      - addLabel:
+          label: no-recent-activity
+      - addReply:
+          reply: This issue has been automatically marked `no-recent-activity` because it has not had any activity for 14 days. It will be closed if no further activity occurs within 14 more days. Any new comment (by anyone, not necessarily the author) will remove `no-recent-activity`.
+    - description: Add no-recent-activity label to PRs
+      frequencies:
+      - hourly:
+          hour: 6
+      filters:
+      - isPullRequest
+      - isOpen
+      - hasLabel:
+          label: needs-author-action
+      - noActivitySince:
+          days: 14
+      - isNotLabeledWith:
+          label: no-recent-activity
+      actions:
+      - addLabel:
+          label: no-recent-activity
+      - addReply:
+          reply: This pull request has been automatically marked `no-recent-activity` because it has not had any activity for 14 days. It will be closed if no further activity occurs within 14 more days. Any new comment (by anyone, not necessarily the author) will remove `no-recent-activity`.
+    - description: Close issues with no recent activity
+      frequencies:
+      - hourly:
+          hour: 6
+      filters:
+      - isIssue
+      - isOpen
+      - hasLabel:
+          label: no-recent-activity
+      - noActivitySince:
+          days: 14
+      actions:
+      - addReply:
+          reply: This issue will now be closed since it had been marked `no-recent-activity` but received no further activity in the past 14 days. It is still possible to reopen or comment on the issue, but please note that the issue will be locked if it remains inactive for another 30 days.
+      - closeIssue
+    - description: Close PRs with no-recent-activity
+      frequencies:
+      - hourly:
+          hour: 6
+      filters:
+      - isPullRequest
+      - isOpen
+      - hasLabel:
+          label: no-recent-activity
+      - noActivitySince:
+          days: 14
+      actions:
+      - addReply:
+          reply: This pull request will now be closed since it had been marked `no-recent-activity` but received no further activity in the past 14 days. It is still possible to reopen or comment on the pull request, but please note that it will be locked if it remains inactive for another 30 days.
+      - closeIssue
+    - description: Close inactive Draft PRs
+      frequencies:
+      - hourly:
+          hour: 6
+      filters:
+      - isDraftPullRequest
+      - isOpen
+      - noActivitySince:
+          days: 30
+      actions:
+      - closeIssue
+      - addReply:
+          reply: Draft Pull Request was automatically closed for 30 days of inactivity. Please [let us know](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you'd like to reopen it.
+    eventResponderTasks:
+    - if:
+      - or:
+        - payloadType: Issues
+        - payloadType: Pull_Request
+      - isAction:
+          action: Opened
+      then:
+      - if:
+        - hasLabel:
+            label: area-AssemblyLoader-coreclr
+        then:
+        - mentionUsers:
+            mentionees:
+            - vitek-karas
+            - agocke
+            - vsadov
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-AssemblyLoader-mono
+        then:
+        - mentionUsers:
+            mentionees: []
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-CodeGen-coreclr
+        then:
+        - mentionUsers:
+            mentionees:
+            - JulieLeeMSFT
+            - jakobbotsch
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Codegen-Interpreter-mono
+        then:
+        - mentionUsers:
+            mentionees:
+            - brzvlad
+            - kotlarmilos
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Codegen-JIT-Mono
+        then:
+        - mentionUsers:
+            mentionees:
+            - lambdageek
+            - steveisok
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-CodeGen-LLVM-Mono
+        then:
+        - mentionUsers:
+            mentionees:
+            - lambdageek
+            - steveisok
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Codegen-Intrinsics-mono
+        then:
+        - mentionUsers:
+            mentionees:
+            - fanyang-mono
+            - steveisok
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-CodeGen-meta-Mono
+        then:
+        - mentionUsers:
+            mentionees:
+            - steveisok
+            - lambdageek
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.DateTime
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-datetime
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Debugger-mono
+        then:
+        - mentionUsers:
+            mentionees:
+            - thaystg
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-DependencyModel
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-dependencymodel
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Diagnostics-coreclr
+        then:
+        - mentionUsers:
+            mentionees:
+            - tommcdon
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Extensions-Caching
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-extensions-caching
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Extensions-Configuration
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-extensions-configuration
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Extensions-DependencyInjection
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-extensions-dependencyinjection
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Extensions-FileSystem
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-extensions-filesystem
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Extensions-Hosting
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-extensions-hosting
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Extensions-HttpClientFactory
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/ncl
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Extensions-Logging
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-extensions-logging
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Extensions-Options
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-extensions-options
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Extensions-Primitives
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-extensions-primitives
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-GC-coreclr
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/gc
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-GC-mono
+        then:
+        - mentionUsers:
+            mentionees:
+            - brzvlad
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Host
+        then:
+        - mentionUsers:
+            mentionees:
+            - vitek-karas
+            - agocke
+            - vsadov
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-HostModel
+        then:
+        - mentionUsers:
+            mentionees:
+            - vitek-karas
+            - agocke
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-ILTools-coreclr
+        then:
+        - mentionUsers:
+            mentionees:
+            - JulieLeeMSFT
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Tools-ILVerification
+        then:
+        - mentionUsers:
+            mentionees:
+            - JulieLeeMSFT
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Infrastructure
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/runtime-infrastructure
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Infrastructure-coreclr
+        then:
+        - mentionUsers:
+            mentionees:
+            - hoyosjs
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Infrastructure-libraries
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-infrastructure-libraries
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Infrastructure-mono
+        then:
+        - mentionUsers:
+            mentionees:
+            - directhex
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Meta
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-meta
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Microsoft.CSharp
+        then:
+        - mentionUsers:
+            mentionees:
+            - cston
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Microsoft.Extensions
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-microsoft-extensions
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Microsoft.VisualBasic
+        then:
+        - mentionUsers:
+            mentionees:
+            - cston
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Microsoft.Win32
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-microsoft-win32
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-NativeAOT-coreclr
+        then:
+        - mentionUsers:
+            mentionees:
+            - agocke
+            - MichalStrehovsky
+            - jkotas
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Single-File
+        then:
+        - mentionUsers:
+            mentionees:
+            - agocke
+            - vitek-karas
+            - vsadov
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Buffers
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-buffers
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.CodeDom
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-codedom
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Collections
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-collections
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.ComponentModel
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-componentmodel
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.ComponentModel.Composition
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-componentmodel-composition
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.ComponentModel.DataAnnotations
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-componentmodel-dataannotations
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Composition
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-composition
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Configuration
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-configuration
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Console
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-console
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Data
+        then:
+        - mentionUsers:
+            mentionees:
+            - roji
+            - ajcvickers
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Data.Odbc
+        then:
+        - mentionUsers:
+            mentionees:
+            - roji
+            - ajcvickers
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Data.OleDB
+        then:
+        - mentionUsers:
+            mentionees:
+            - roji
+            - ajcvickers
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Data.SqlClient
+        then:
+        - mentionUsers:
+            mentionees:
+            - davoudeshtehari
+            - david-engel
+            - jrahnama
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Diagnostics
+        then:
+        - mentionUsers:
+            mentionees:
+            - tommcdon
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Diagnostics.Activity
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-diagnostics-activity
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Diagnostics.EventLog
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-diagnostics-eventlog
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Diagnostics.PerformanceCounter
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-diagnostics-performancecounter
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Diagnostics.Process
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-diagnostics-process
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Diagnostics.TraceSource
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-diagnostics-tracesource
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Diagnostics.Tracing
+        then:
+        - mentionUsers:
+            mentionees:
+            - tarekgh
+            - tommcdon
+            - pjanotti
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.DirectoryServices
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-directoryservices
+            - jay98014
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Drawing
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-drawing
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Dynamic.Runtime
+        then:
+        - mentionUsers:
+            mentionees:
+            - cston
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Formats.Asn1
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-formats-asn1
+            - bartonjs
+            - vcsjones
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Formats.Cbor
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-formats-cbor
+            - bartonjs
+            - vcsjones
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Formats.Tar
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-formats-tar
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Globalization
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-globalization
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.IO
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-io
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.IO.Compression
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-io-compression
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.IO.Hashing
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-io-hashing
+            - bartonjs
+            - vcsjones
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.IO.Ports
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-io-ports
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Linq
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-linq
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Linq.Expressions
+        then:
+        - mentionUsers:
+            mentionees:
+            - cston
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Linq.Parallel
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-linq-parallel
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Management
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-management
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Memory
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-memory
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Net
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/ncl
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Net.Http
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/ncl
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Net.Quic
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/ncl
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Net.Security
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/ncl
+            - bartonjs
+            - vcsjones
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Net.Sockets
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/ncl
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Numerics
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-numerics
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Numerics.Tensors
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-numerics-tensors
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Reflection
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-reflection
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Reflection.Emit
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-reflection-emit
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Reflection.Metadata
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-reflection-metadata
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Resources
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-resources
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Runtime
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-runtime
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Runtime.CompilerServices
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-runtime-compilerservices
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Runtime.InteropServices
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/interop-contrib
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Runtime.Intrinsics
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-runtime-intrinsics
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Security
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-security
+            - bartonjs
+            - vcsjones
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.ServiceProcess
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-serviceprocess
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Speech
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-speech
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Text.Encoding
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-text-encoding
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Text.Encodings.Web
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-text-encodings-web
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Text.Json
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-text-json
+            - gregsdennis
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Text.RegularExpressions
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-text-regularexpressions
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Threading
+        then:
+        - mentionUsers:
+            mentionees:
+            - mangod9
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Threading.Channels
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-threading-channels
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Threading.Tasks
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-threading-tasks
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Transactions
+        then:
+        - mentionUsers:
+            mentionees:
+            - roji
+            - ajcvickers
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-System.Xml
+        then:
+        - mentionUsers:
+            mentionees:
+            - dotnet/area-system-xml
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-Tools-ILLink
+        then:
+        - mentionUsers:
+            mentionees:
+            - agocke
+            - sbomer
+            - vitek-karas
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      - if:
+        - hasLabel:
+            label: area-vm-coreclr
+        then:
+        - mentionUsers:
+            mentionees:
+            - mangod9
+            replyTemplate: >-
+              Tagging subscribers to this area: ${mentionees}
+
+              See info in [area-owners.md](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) if you want to be subscribed.
+            assignMentionees: False
+      description: Area-owners
+    - if:
+      - payloadType: Issues
+      - labelAdded:
+          label: breaking-change
+      then:
+      - addLabel:
+          label: needs-breaking-change-doc-created
+      - addReply:
+          reply: >-
+            Added `needs-breaking-change-doc-created` label because this issue has the `breaking-change` label. 
+
+
+            1. [ ] Create and link to this issue a matching issue in the dotnet/docs repo using the [breaking change documentation template](https://aka.ms/dotnet/docs/new-breaking-change-issue), then remove this `needs-breaking-change-doc-created` label.
+
+
+            Tagging @dotnet/compat for awareness of the breaking change.
+      description: Add breaking change doc label to issue
+    - if:
+      - payloadType: Pull_Request
+      - labelAdded:
+          label: breaking-change
+      - isPullRequest
+      then:
+      - addLabel:
+          label: needs-breaking-change-doc-created
+      - addReply:
+          reply: >-
+            Added `needs-breaking-change-doc-created` label because this PR has the `breaking-change` label. 
+
+
+            When you commit this breaking change:
+
+
+            1. [ ] Create and link to this PR and the issue a matching issue in the dotnet/docs repo using the [breaking change documentation template](https://aka.ms/dotnet/docs/new-breaking-change-issue), then remove this `needs-breaking-change-doc-created` label.
+
+            2. [ ] Ask a committer to mail the `.NET Breaking Change Notification` DL.
+
+
+            Tagging @dotnet/compat for awareness of the breaking change.
+      description: Add breaking change doc label to PR
+    - if:
+      - or:
+        - payloadType: Issues
+        - payloadType: Pull_Request
+      - isAction:
+          action: Opened
+      then:
+      - if:
+        - hasLabel:
+            label: linkable-framework
+        then:
+        - mentionUsers:
+            mentionees:
+            - eerhardt
+            - vitek-karas
+            - LakshanF
+            - sbomer
+            - joperezr
+            - marek-safar
+            replyTemplate: >-
+              Tagging subscribers to 'linkable-framework': ${mentionees}
+
+              See info in area-owners.md if you want to be subscribed.
+            assignMentionees: False
+      description: '@Mention for linkable-framework'
+    - if:
+      - or:
+        - payloadType: Issues
+        - payloadType: Pull_Request
+      - isAction:
+          action: Opened
+      then:
+      - if:
+        - hasLabel:
+            label: size-reduction
+        then:
+        - mentionUsers:
+            mentionees:
+            - eerhardt
+            - SamMonoRT
+            - marek-safar
+            replyTemplate: >-
+              Tagging subscribers to 'size-reduction': ${mentionees}
+
+              See info in area-owners.md if you want to be subscribed.
+            assignMentionees: False
+      description: '@Mention for size-reduction'
+    - if:
+      - or:
+        - payloadType: Issues
+        - payloadType: Pull_Request
+      - isAction:
+          action: Opened
+      then:
+      - if:
+        - hasLabel:
+            label: arch-wasm
+        then:
+        - mentionUsers:
+            mentionees:
+            - lewing
+            replyTemplate: >-
+              Tagging subscribers to 'arch-wasm': ${mentionees}
+
+              See info in area-owners.md if you want to be subscribed.
+            assignMentionees: False
+      description: '@Mention for wasm'
+    - if:
+      - or:
+        - payloadType: Issues
+        - payloadType: Pull_Request
+      - isAction:
+          action: Opened
+      then:
+      - if:
+        - hasLabel:
+            label: os-ios
+        then:
+        - mentionUsers:
+            mentionees:
+            - steveisok
+            - akoeplinger
+            - kotlarmilos
+            replyTemplate: >-
+              Tagging subscribers to 'os-ios': ${mentionees}
+
+              See info in area-owners.md if you want to be subscribed.
+            assignMentionees: False
+      description: '@Mention for ios'
+    - if:
+      - or:
+        - payloadType: Issues
+        - payloadType: Pull_Request
+      - isAction:
+          action: Opened
+      then:
+      - if:
+        - hasLabel:
+            label: os-android
+        then:
+        - mentionUsers:
+            mentionees:
+            - steveisok
+            - akoeplinger
+            replyTemplate: >-
+              Tagging subscribers to 'arch-android': ${mentionees}
+
+              See info in area-owners.md if you want to be subscribed.
+            assignMentionees: False
+      description: '@Mention for android'
+    - if:
+      - payloadType: Pull_Request
+      - or:
+        - filesMatchPattern:
+            pattern: .*ILLink.*
+        - filesMatchPattern:
+            pattern: .*illink.*
+      - not:
+          hasLabel:
+            label: linkable-framework
+      - isPullRequest
+      - isOpen
+      then:
+      - addLabel:
+          label: linkable-framework
+      description: '[Linkable-framework workgroup] Add linkable-framework label to new Prs that touch files with *ILLink* that not have it already'
+    - if:
+      - payloadType: Pull_Request
+      - or:
+        - filesMatchPattern:
+            pattern: .*ILLink.*
+        - filesMatchPattern:
+            pattern: .*illink.*
+      - not:
+          hasLabel:
+            label: linkable-framework
+      - isPullRequest
+      - isOpen
+      - isAction:
+          action: Synchronize
+      then:
+      - addLabel:
+          label: linkable-framework
+      description: '[Linkable-framework workgroup] Add linkable-framework label to Prs that get changes pushed where they touch *ILLInk* files'
+    - if:
+      - payloadType: Issues
+      - labelAdded:
+          label: backlog-cleanup-candidate
+      then:
+      - addReply:
+          reply: >-
+            Due to lack of recent activity, this issue has been marked as a candidate for backlog cleanup.  It will be closed if no further activity occurs within 14 more days. Any new comment (by anyone, not necessarily the author) will undo this process.
+
+
+            This process is part of our [issue cleanup automation](https://github.com/dotnet/runtime/blob/main/docs/issue-cleanup.md).
+      - addLabel:
+          label: no-recent-activity
+      description: Manual Issue Cleanup
+    - if:
+      - or:
+        - payloadType: Issues
+        - payloadType: Pull_Request
+      - isAction:
+          action: Opened
+      then:
+      - if:
+        - hasLabel:
+            label: os-tvos
+        then:
+        - mentionUsers:
+            mentionees:
+            - steveisok
+            - akoeplinger
+            replyTemplate: >-
+              Tagging subscribers to 'os-tvos': ${mentionees}
+
+              See info in area-owners.md if you want to be subscribed.
+            assignMentionees: False
+      description: '@Mention for tvos'
+    - if:
+      - or:
+        - payloadType: Issues
+        - payloadType: Pull_Request
+      - isAction:
+          action: Opened
+      then:
+      - if:
+        - hasLabel:
+            label: os-maccatalyst
+        then:
+        - mentionUsers:
+            mentionees:
+            - steveisok
+            - akoeplinger
+            replyTemplate: >-
+              Tagging subscribers to 'os-maccatalyst': ${mentionees}
+
+              See info in area-owners.md if you want to be subscribed.
+            assignMentionees: False
+      description: '@Mention for maccatalyst'
+    - if:
+      - payloadType: Issues
+      - or:
+        - isAction:
+            action: Opened
+        - isAction:
+            action: Reopened
+      - isOpen
+      - not: isPartOfAnyMilestone
+      - not:
+          hasLabel:
+            label: untriaged
+      then:
+      - addLabel:
+          label: untriaged
+      description: Add untriaged label to new/reopened issues without a milestone
+    - if:
+      - payloadType: Issues
+      - or:
+        - isAction:
+            action: Closed
+        - isPartOfAnyMilestone
+      - hasLabel:
+          label: untriaged
+      then:
+      - removeLabel:
+          label: untriaged
+      description: Remove untriaged label from issues when closed or added to a milestone
+    - if:
+      - payloadType: Pull_Request
+      then:
+      - inPrLabel:
+          label: in-pr
+      description: Add `in-pr` label on issue when an open pull request is targeting it
+    - if:
+      - payloadType: Pull_Request
+      - isAction:
+          action: Opened
+      - not:
+          activitySenderHasPermission:
+            permission: Read
+      then:
+      - assignTo:
+          author: True
+      description: Assign Team PRs to author
+    - if:
+      - payloadType: Pull_Request
+      - isAction:
+          action: Opened
+      - isPullRequest
+      - and:
+        - not:
+            activitySenderHasPermission:
+              permission: Admin
+        - not:
+            activitySenderHasPermission:
+              permission: Write
+        - not:
+            isActivitySender:
+              user: github-actions[bot]
+              issueAuthor: False
+        - not:
+            isActivitySender:
+              user: dotnet-maestro[bot]
+              issueAuthor: False
+        - not:
+            isActivitySender:
+              user: dotnet-maestro-bot[bot]
+              issueAuthor: False
+        - not:
+            isActivitySender:
+              user: dotnet-maestro-bot
+              issueAuthor: False
+        - not:
+            isActivitySender:
+              user: dotnet-maestro
+              issueAuthor: False
+        - not:
+            isActivitySender:
+              user: github-actions
+              issueAuthor: False
+      then:
+      - addLabel:
+          label: community-contribution
+      description: Label community PRs
+    - if:
+      - payloadType: Issues
+      - labelAdded:
+          label: needs-author-action
+      then:
+      - addReply:
+          reply: This issue has been marked `needs-author-action` and may be missing some important information.
+      description: Needs-author-action notification
+    - if:
+      - payloadType: Pull_Request_Review
+      - not:
+          activitySenderHasPermission:
+            permission: Read
+      - isPullRequest
+      - isAction:
+          action: Submitted
+      - isReviewState:
+          reviewState: Changes_requested
+      then:
+      - addLabel:
+          label: needs-author-action
+      description: PR reviews with "changes requested" applies the needs-author-action label
+    - if:
+      - payloadType: Issue_Comment
+      - isAction:
+          action: Created
+      - isActivitySender:
+          issueAuthor: True
+      - hasLabel:
+          label: needs-author-action
+      - not:
+          hasLabel:
+            label: untriaged
+      - isIssue
+      - isOpen
+      then:
+      - addLabel:
+          label: needs-further-triage
+      - removeLabel:
+          label: needs-author-action
+      description: Replace `needs-author-action` label with `needs-further-triage` label when the author comments on an issue that is not still untriaged
+    - if:
+      - payloadType: Issue_Comment
+      - isAction:
+          action: Created
+      - isActivitySender:
+          issueAuthor: True
+      - hasLabel:
+          label: needs-author-action
+      - hasLabel:
+          label: untriaged
+      - isIssue
+      - isOpen
+      then:
+      - removeLabel:
+          label: needs-author-action
+      description: Remove `needs-author-action` label when the author comments on an `untriaged` issue
+    - if:
+      - payloadType: Pull_Request
+      - isPullRequest
+      - isAction:
+          action: Synchronize
+      - hasLabel:
+          label: needs-author-action
+      then:
+      - removeLabel:
+          label: needs-author-action
+      description: Pushing changes to PR branch removes the needs-author-action label
+    - if:
+      - payloadType: Issue_Comment
+      - isActivitySender:
+          issueAuthor: True
+      - isAction:
+          action: Created
+      - hasLabel:
+          label: needs-author-action
+      - isPullRequest
+      - isOpen
+      then:
+      - removeLabel:
+          label: needs-author-action
+      description: Author commenting in PR removes the needs-author-action label
+    - if:
+      - payloadType: Pull_Request_Review
+      - isActivitySender:
+          issueAuthor: True
+      - hasLabel:
+          label: needs-author-action
+      - isAction:
+          action: Submitted
+      - isPullRequest
+      - isOpen
+      then:
+      - removeLabel:
+          label: needs-author-action
+      description: Author responding to a pull request review comment removes the needs-author-action label
+    - if:
+      - payloadType: Issues
+      - not:
+          isAction:
+            action: Closed
+      - hasLabel:
+          label: no-recent-activity
+      - not:
+          labelAdded:
+            label: no-recent-activity
+      then:
+      - removeLabel:
+          label: no-recent-activity
+      - removeLabel:
+          label: backlog-cleanup-candidate
+      description: Remove `no-recent-activity` label from issues when issue is modified
+    - if:
+      - payloadType: Issue_Comment
+      - hasLabel:
+          label: no-recent-activity
+      - isIssue
+      then:
+      - removeLabel:
+          label: no-recent-activity
+      - removeLabel:
+          label: backlog-cleanup-candidate
+      description: Remove `no-recent-activity` label when an issue is commented on
+    - if:
+      - payloadType: Pull_Request
+      - isPullRequest
+      - isOpen
+      - hasLabel:
+          label: no-recent-activity
+      - not:
+          labelAdded:
+            label: no-recent-activity
+      then:
+      - removeLabel:
+          label: no-recent-activity
+      - removeLabel:
+          label: backlog-cleanup-candidate
+      description: Remove `no-recent-activity` label from PRs when modified
+    - if:
+      - payloadType: Issue_Comment
+      - hasLabel:
+          label: no-recent-activity
+      - isPullRequest
+      - isOpen
+      then:
+      - removeLabel:
+          label: no-recent-activity
+      - removeLabel:
+          label: backlog-cleanup-candidate
+      description: Remove `no-recent-activity` label from PRs when commented on
+    - if:
+      - payloadType: Pull_Request_Review
+      - hasLabel:
+          label: no-recent-activity
+      - isPullRequest
+      - isOpen
+      then:
+      - removeLabel:
+          label: no-recent-activity
+      - removeLabel:
+          label: backlog-cleanup-candidate
+      description: Remove `no-recent-activity` label from PRs when new review is added
+onFailure: 
+onSuccess: 
diff --git a/.vsconfig b/.vsconfig
index f949b82c3744..a8ffd2d93547 100644
--- a/.vsconfig
+++ b/.vsconfig
@@ -43,7 +43,7 @@
     "Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
     "Microsoft.VisualStudio.Component.VC.Tools.ARM64",
     "Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
-    "Microsoft.VisualStudio.Component.Windows10SDK.19041",
+    "Microsoft.VisualStudio.Component.Windows10SDK.20348",
     "Microsoft.VisualStudio.ComponentGroup.MSIX.Packaging",
     "Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
     "Microsoft.VisualStudio.Workload.CoreEditor",
diff --git a/Build.proj b/Build.proj
index baa240685ae5..0957ae6846e5 100644
--- a/Build.proj
+++ b/Build.proj
@@ -8,7 +8,7 @@
   </ItemGroup>
 
   <Import Project="$(RepositoryEngineeringDir)SubsetValidation.targets" />
-  <Import Project="$(RepositoryEngineeringDir)restore\optimizationData.targets" Condition="'$(DotNetBuildFromSource)' != 'true'" />
+  <Import Project="$(RepositoryEngineeringDir)restore\optimizationData.targets" Condition="'$(DotNetBuildSourceOnly)' != 'true'" />
 
   <Target Name="BuildLocalTasks"
           BeforeTargets="Build">
diff --git a/Directory.Build.props b/Directory.Build.props
index e4a81109078b..46c00ca53a38 100644
--- a/Directory.Build.props
+++ b/Directory.Build.props
@@ -5,12 +5,15 @@
     <ImportDirectoryBuildProps>false</ImportDirectoryBuildProps>
     <!-- Used to determine if we should build some packages only once across multiple official build legs.
          For offline builds we still set OfficialBuildId but we need to build all the packages for a single
-         leg only, so we also take DotNetBuildFromSource  into account. -->
-    <BuildingAnOfficialBuildLeg Condition="'$(BuildingAnOfficialBuildLeg)' == '' and '$(OfficialBuildId)' != '' and '$(DotNetBuildFromSource)' != 'true'">true</BuildingAnOfficialBuildLeg>
+         leg only, so we also take DotNetBuildSourceOnly into account. -->
+    <BuildingAnOfficialBuildLeg Condition="'$(BuildingAnOfficialBuildLeg)' == '' and '$(OfficialBuildId)' != '' and '$(DotNetBuildSourceOnly)' != 'true'">true</BuildingAnOfficialBuildLeg>
     <!-- When doing a source build, we want to build the various text-only manifests in
          all cases, rather than ordinarily where we build them during mobile or wasm
          build legs. This makes the manifests available on source-only builds. -->
-    <ForceBuildMobileManifests Condition="'$(DotNetBuildFromSource)' == 'true'">true</ForceBuildMobileManifests>
+    <ForceBuildMobileManifests Condition="'$(DotNetBuildSourceOnly)' == 'true'">true</ForceBuildMobileManifests>
+    <!-- Propagate the configuration and platform to project references. When building in VS, this defaults to true.
+         Explicitly setting it to false ensures projects are built in the corresponding configuration both in VS and on the command line. -->
+    <ShouldUnsetParentConfigurationAndPlatform>false</ShouldUnsetParentConfigurationAndPlatform>
   </PropertyGroup>
 
   <!-- Seeding these two properties from Platform allows one to use VS's configuration manager to switch targets -->
@@ -100,7 +103,7 @@
 
     <!-- The minimum supported .NET version. -->
     <NetCoreAppMinimum>net8.0</NetCoreAppMinimum>
-    <NetCoreAppMinimum Condition="'$(DotNetBuildFromSource)' == 'true'">$(NetCoreAppCurrent)</NetCoreAppMinimum>
+    <NetCoreAppMinimum Condition="'$(DotNetBuildSourceOnly)' == 'true'">$(NetCoreAppCurrent)</NetCoreAppMinimum>
 
     <!-- when this is updated, make sure to keep $(_NetCoreAppToolCurrent)
          in src/mono/wasm/build/WasmApp.LocalBuild.props
@@ -116,9 +119,9 @@
     <NetFrameworkCurrent>net48</NetFrameworkCurrent>
     <NetFrameworkToolCurrent>net472</NetFrameworkToolCurrent>
     <!-- Don't build for NETFramework during source-build. -->
-    <NetFrameworkMinimum Condition="'$(DotNetBuildFromSource)' == 'true'" />
-    <NetFrameworkToolCurrent Condition="'$(DotNetBuildFromSource)' == 'true'" />
-    <NetFrameworkCurrent Condition="'$(DotNetBuildFromSource)' == 'true'" />
+    <NetFrameworkMinimum Condition="'$(DotNetBuildSourceOnly)' == 'true'" />
+    <NetFrameworkToolCurrent Condition="'$(DotNetBuildSourceOnly)' == 'true'" />
+    <NetFrameworkCurrent Condition="'$(DotNetBuildSourceOnly)' == 'true'" />
 
     <!-- Important: Set this to the GA version (or a close approximation) during servicing and adjust the TFM property below. -->
     <ApiCompatNetCoreAppBaselineVersion>8.0.0</ApiCompatNetCoreAppBaselineVersion>
@@ -182,7 +185,7 @@
     <MonoTargetsTasksAssemblyPath>$([MSBuild]::NormalizePath('$(MonoTargetsTasksDir)', 'MonoTargetsTasks.dll'))</MonoTargetsTasksAssemblyPath>
     <TestExclusionListTasksAssemblyPath>$([MSBuild]::NormalizePath('$(TestExclusionListTasksDir)', 'TestExclusionListTasks.dll'))</TestExclusionListTasksAssemblyPath>
     <CoreCLRToolPath>$([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'coreclr', '$(TargetOS).$(TargetArchitecture).$(RuntimeConfiguration)'))</CoreCLRToolPath>
-    <ILAsmToolPath Condition="'$(DotNetBuildFromSource)' == 'true' or '$(BuildArchitecture)' == 's390x' or '$(BuildArchitecture)' == 'ppc64le'">$(CoreCLRToolPath)</ILAsmToolPath>
+    <ILAsmToolPath Condition="'$(DotNetBuildSourceOnly)' == 'true' or '$(BuildArchitecture)' == 's390x' or '$(BuildArchitecture)' == 'ppc64le'">$(CoreCLRToolPath)</ILAsmToolPath>
     <WasmtimeDir Condition="'$(WasmtimeDir)' == '' and '$(WASMTIME_PATH)' != '' and Exists($(WASMTIME_PATH))">$(WASMTIME_PATH)</WasmtimeDir>
     <WasmtimeDir Condition="'$(WasmtimeDir)' == ''">$([MSBuild]::NormalizeDirectory($(ArtifactsObjDir), 'wasmtime'))</WasmtimeDir>
     <InstallWasmtimeForTests Condition="'$(InstallWasmtimeForTests)' == '' and !Exists($(WasmtimeDir))">true</InstallWasmtimeForTests>
@@ -190,7 +193,7 @@
   </PropertyGroup>
 
   <PropertyGroup Label="CalculatePortableBuild">
-    <PortableBuild Condition="'$(PortableBuild)' == '' and '$(DotNetBuildFromSource)' == 'true'">false</PortableBuild>
+    <PortableBuild Condition="'$(PortableBuild)' == '' and '$(DotNetBuildSourceOnly)' == 'true'">false</PortableBuild>
     <PortableBuild Condition="'$(PortableBuild)' == ''">true</PortableBuild>
   </PropertyGroup>
 
@@ -310,8 +313,8 @@
 
   <!--Feature switches -->
   <PropertyGroup>
-    <NoPgoOptimize Condition="'$(NoPgoOptimize)' == '' and '$(DotNetBuildFromSource)' == 'true'">true</NoPgoOptimize>
-    <EnableNgenOptimization Condition="'$(EnableNgenOptimization)' == '' and '$(DotNetBuildFromSource)' == 'true'">false</EnableNgenOptimization>
+    <NoPgoOptimize Condition="'$(NoPgoOptimize)' == '' and '$(DotNetBuildSourceOnly)' == 'true'">true</NoPgoOptimize>
+    <EnableNgenOptimization Condition="'$(EnableNgenOptimization)' == '' and '$(DotNetBuildSourceOnly)' == 'true'">false</EnableNgenOptimization>
     <EnableNgenOptimization Condition="'$(EnableNgenOptimization)' == '' and ('$(Configuration)' == 'Release' or '$(Configuration)' == 'Checked')">true</EnableNgenOptimization>
     <!-- Enable NuGet static graph evaluation to optimize incremental restore -->
     <RestoreUseStaticGraphEvaluation>true</RestoreUseStaticGraphEvaluation>
@@ -325,6 +328,8 @@
                                   '$(OfficialBuildId)' == ''">true</DisableSourceLink>
     <!-- Runtime doesn't support Arcade-driven target framework filtering. -->
     <NoTargetFrameworkFiltering>true</NoTargetFrameworkFiltering>
+
+    <NativeBuildPartitionPropertiesToRemove>ClrFullNativeBuild;ClrRuntimeSubset;ClrJitSubset;ClrPalTestsSubset;ClrAllJitsSubset;ClrILToolsSubset;ClrNativeAotSubset;ClrSpmiSubset;ClrCrossComponentsSubset;ClrDebugSubset;HostArchitecture;PgoInstrument;NativeOptimizationDataSupported;CMakeArgs</NativeBuildPartitionPropertiesToRemove>
   </PropertyGroup>
 
   <!-- RepositoryEngineeringDir isn't set when Installer tests import this file. -->
@@ -392,7 +397,7 @@
     <!-- Always pass portable to override arcade sdk which uses embedded for local builds -->
     <DebugType>portable</DebugType>
     <DebugSymbols>true</DebugSymbols>
-    <KeepNativeSymbols Condition="'$(KeepNativeSymbols)' == '' and '$(DotNetBuildFromSource)' == 'true'">true</KeepNativeSymbols>
+    <KeepNativeSymbols Condition="'$(KeepNativeSymbols)' == '' and '$(DotNetBuildSourceOnly)' == 'true'">true</KeepNativeSymbols>
     <KeepNativeSymbols Condition="'$(KeepNativeSymbols)' == ''">false</KeepNativeSymbols>
     <!-- Used for launchSettings.json and runtime config files. -->
     <AppDesignerFolder>Properties</AppDesignerFolder>
diff --git a/Directory.Build.targets b/Directory.Build.targets
index 336051f191a4..f731eedc390c 100644
--- a/Directory.Build.targets
+++ b/Directory.Build.targets
@@ -21,7 +21,7 @@
   When .NET gets built from source, make the SDK aware there are bootstrap packages
   for Microsoft.NETCore.App.Runtime.<rid> and Microsoft.NETCore.App.Crossgen2.<rid>.
   -->
-  <ItemGroup Condition="'$(DotNetBuildFromSource)' == 'true'">
+  <ItemGroup Condition="'$(DotNetBuildSourceOnly)' == 'true'">
     <KnownFrameworkReference Update="Microsoft.NETCore.App">
       <RuntimePackRuntimeIdentifiers
         Condition="'%(TargetFramework)' == '$(NetCoreAppCurrent)'">%(RuntimePackRuntimeIdentifiers);$(PackageRID)</RuntimePackRuntimeIdentifiers>
@@ -85,6 +85,12 @@
     <GenerateRequiresPreviewFeaturesAttribute Condition="'$(TargetFrameworkMoniker)' == '$(NetCoreAppCurrentTargetFrameworkMoniker)'">true</GenerateRequiresPreviewFeaturesAttribute>
   </PropertyGroup>
 
+  <PropertyGroup>
+    <!-- when building from source we need to use the current version of MetadataLoadContext as the toolset version, but source-build imports
+         another props file which overrides the SystemReflectionMetadataLoadContextVersion from Version.props so we can't set it there -->
+    <SystemReflectionMetadataLoadContextToolsetVersion Condition="'$(DotNetBuildSourceOnly)' == 'true'">$(SystemReflectionMetadataLoadContextVersion)</SystemReflectionMetadataLoadContextToolsetVersion>
+  </PropertyGroup>
+
   <Target Name="ValidateTargetOSLowercase"
           Condition="!$(TargetOS.Equals($(TargetOS.ToLower()), StringComparison.InvariantCulture))">
     <Error Text="The passed-in TargetOS property value '$(TargetOS)' must be lowercase." />
diff --git a/THIRD-PARTY-NOTICES.TXT b/THIRD-PARTY-NOTICES.TXT
index ff5aaacd21b7..065abb6033e9 100644
--- a/THIRD-PARTY-NOTICES.TXT
+++ b/THIRD-PARTY-NOTICES.TXT
@@ -73,7 +73,7 @@ https://github.com/madler/zlib
 https://zlib.net/zlib_license.html
 
 /* zlib.h -- interface of the 'zlib' general purpose compression library
-  version 1.2.13, October 13th, 2022
+  version 1.3.1, January 22nd, 2024
 
   Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler
 
diff --git a/docs/area-owners.md b/docs/area-owners.md
index 52cb16d8d8d7..68b58e9a5e9b 100644
--- a/docs/area-owners.md
+++ b/docs/area-owners.md
@@ -16,13 +16,13 @@ Note: Editing this file doesn't update the mapping used by `@msftbot` for area-s
 | area-AssemblyLoader-mono                       | @agocke              | @agocke @elinor-fung                                |                                                                                                                                                                                                                                                                                           |
 | area-Build-mono                                | @lewing              | @akoeplinger                                        |                                                                                                                                                                                                                                                                                           |
 | area-Codeflow                                  | @dotnet/dnr-codeflow | @dotnet/dnr-codeflow                                | Used for automated PRs that ingest code from other repos                                                                                                                                                                                                                                  |
-| area-Codegen-AOT-mono                          | @steveisok           | @vargaz @kotlarmilos                                |                                                                                                                                                                                                                                                                                           |
+| area-Codegen-AOT-mono                          | @steveisok           | @kotlarmilos                                        |                                                                                                                                                                                                                                                                                           |
 | area-CodeGen-coreclr                           | @JulieLeeMSFT        | @BruceForstall @dotnet/jit-contrib                  |                                                                                                                                                                                                                                                                                           |
 | area-Codegen-Interpreter-mono                  | @vitek-karas         | @BrzVlad @kotlarmilos                               |                                                                                                                                                                                                                                                                                           |
 | area-Codegen-Intrinsics-mono                   | @steveisok           | @fanyang-mono                                       |                                                                                                                                                                                                                                                                                           |
-| area-Codegen-JIT-mono                          | @steveisok           | @vargaz                                             |                                                                                                                                                                                                                                                                                           |
-| area-Codegen-LLVM-mono                         | @steveisok           | @vargaz                                             |                                                                                                                                                                                                                                                                                           |
-| area-Codegen-meta-mono                         | @steveisok           | @vargaz                                             |                                                                                                                                                                                                                                                                                           |
+| area-Codegen-JIT-mono                          | @steveisok           |                                                     |                                                                                                                                                                                                                                                                                           |
+| area-Codegen-LLVM-mono                         | @steveisok           |                                                     |                                                                                                                                                                                                                                                                                           |
+| area-Codegen-meta-mono                         | @steveisok           |                                                     |                                                                                                                                                                                                                                                                                           |
 | area-CrossGen/NGEN-coreclr                     | @steveisok           | @dotnet/crossgen-contrib                            |                                                                                                                                                                                                                                                                                           |
 | area-crossgen2-coreclr                         | @steveisok           | @dotnet/crossgen-contrib                            |                                                                                                                                                                                                                                                                                           |
 | area-Debugger-mono                             | @tommcdon            | @thaystg                                            |                                                                                                                                                                                                                                                                                           |
@@ -73,9 +73,9 @@ Note: Editing this file doesn't update the mapping used by `@msftbot` for area-s
 | area-System.Composition                        | @ericstj             | @dotnet/area-system-composition                     |                                                                                                                                                                                                                                                                                           |
 | area-System.Configuration                      | @ericstj             | @dotnet/area-system-configuration                   |                                                                                                                                                                                                                                                                                           |
 | area-System.Console                            | @jeffhandley         | @dotnet/area-system-console                         |                                                                                                                                                                                                                                                                                           |
-| area-System.Data                               | @ajcvickers          | @ajcvickers @davoudeshtehari @david-engel @roji     | <ul><li>Odbc, OleDb - @saurabh500</li></ul>                                                                                                                                                                                                                                               |
-| area-System.Data.Odbc                          | @ajcvickers          | @ajcvickers @roji                                   |                                                                                                                                                                                                                                                                                           |
-| area-System.Data.OleDB                         | @ajcvickers          | @ajcvickers @roji                                   |                                                                                                                                                                                                                                                                                           |
+| area-System.Data                               | @sammonort          | @ajcvickers @davoudeshtehari @david-engel @roji     | <ul><li>Odbc, OleDb - @saurabh500</li></ul>                                                                                                                                                                                                                                               |
+| area-System.Data.Odbc                          | @sammonort          | @ajcvickers @roji                                   |                                                                                                                                                                                                                                                                                           |
+| area-System.Data.OleDB                         | @sammonort          | @ajcvickers @roji                                   |                                                                                                                                                                                                                                                                                           |
 | area-System.Data.SqlClient                     | @David-Engel         | @davoudeshtehari @david-engel @jrahnama             | Archived component - limited churn/contributions (see https://devblogs.microsoft.com/dotnet/introducing-the-new-microsoftdatasqlclient/)                                                                                                                                                  |
 | area-System.DateTime                           | @ericstj             | @dotnet/area-system-datetime                        | System namespace APIs related to dates and times, including DateOnly, DateTime, DateTimeKind, DateTimeOffset, DayOfWeek, TimeOnly, TimeSpan, TimeZone, and TimeZoneInfo                                                                                                                   |
 | area-System.Diagnostics                        | @tommcdon            | @dotnet/area-system-diagnostics                     |                                                                                                                                                                                                                                                                                           |
@@ -135,7 +135,7 @@ Note: Editing this file doesn't update the mapping used by `@msftbot` for area-s
 | area-System.Threading.Channels                 | @ericstj             | @dotnet/area-system-threading-channels              | Consultants: @stephentoub                                                                                                                                                                                                                                                                 |
 | area-System.Threading.RateLimiting             | @rafikiassumani-msft | @BrennanConroy @halter73                            |                                                                                                                                                                                                                                                                                           |
 | area-System.Threading.Tasks                    | @ericstj             | @dotnet/area-system-threading-tasks                 | Consultants: @stephentoub                                                                                                                                                                                                                                                                 |
-| area-System.Transactions                       | @ajcvickers          | @roji                                               |                                                                                                                                                                                                                                                                                           |
+| area-System.Transactions                       | @sammonort           | @roji                                               |                                                                                                                                                                                                                                                                                           |
 | area-System.Xml                                | @jeffhandley         | @dotnet/area-system-xml                             |                                                                                                                                                                                                                                                                                           |
 | area-TieredCompilation-coreclr                 | @mangod9             | @kouvel                                             |                                                                                                                                                                                                                                                                                           |
 | area-Tools-ILLink                              | @agocke              | @dotnet/illink                                      |                                                                                                                                                                                                                                                                                           |
@@ -162,16 +162,16 @@ Note: Editing this file doesn't update the mapping used by `@msftbot` for area-s
 > dedicated OS lead/owner, rather ownership falls back to the `area-*` label. However,
 > Windows is a supported operating system of course.
 
-| Operating System | Lead          | Owners (area experts to tag in PRs and issues)                      | Description     |
-|------------------|---------------|---------------------------------------------------------------------|-----------------|
-| os-android       | @vitek-karas  | @akoeplinger                                                        |                 |
-| os-freebsd       |               | @wfurt @Thefrank @sec                                               |                 |
-| os-maccatalyst   | @vitek-karas  | @kotlarmilos                                                        |                 |
-| os-ios           | @vitek-karas  | @vargaz, @kotlarmilos                                               |                 |
-| os-tizen         | @gbalykov     | @hjleee, @wscho77, @clamp03, @JongHeonChoi, @t-mustafin, @viewizard |                 |
-| os-tvos          | @vitek-karas  | @vargaz, @kotlarmilos                                               |                 |
-| os-wasi          | @lewing       | @pavelsavara                                                        |                 |
-| os-browser       | @lewing       | @pavelsavara                                                        |                 |
+| Operating System | Lead          | Owners (area experts to tag in PRs and issues)     | Description     |
+|------------------|---------------|----------------------------------------------------|-----------------|
+| os-android       | @vitek-karas  | @akoeplinger                                       |                 |
+| os-freebsd       |               | @wfurt @Thefrank @sec                              |                 |
+| os-maccatalyst   | @vitek-karas  | @kotlarmilos                                       |                 |
+| os-ios           | @vitek-karas  | @kotlarmilos                                       |                 |
+| os-tizen         | @gbalykov     | @dotnet/samsung                                    |                 |
+| os-tvos          | @vitek-karas  | @kotlarmilos                                       |                 |
+| os-wasi          | @lewing       | @pavelsavara                                       |                 |
+| os-browser       | @lewing       | @pavelsavara                                       |                 |
 
 ## Architectures
 
@@ -180,12 +180,12 @@ Note: Editing this file doesn't update the mapping used by `@msftbot` for area-s
 > [!NOTE]
 > Ownership isn't the same as supported. See [operating systems](#operating-systems) for details.
 
-| Architecture     | Lead          | Owners (area experts to tag in PRs and issues)                      | Description  |
-|------------------|---------------|---------------------------------------------------------------------|--------------|
-| arch-loongarch64 | @shushanhf    | @LuckyXu-HF                                                         |              |
-| arch-riscv       | @gbalykov     | @hjleee, @wscho77, @clamp03, @JongHeonChoi, @t-mustafin, @viewizard |              |
-| arch-s390x       | @uweigand     | @uweigand                                                           |              |
-| arch-wasm        | @lewing       | @lewing, @pavelsavara                                               |              |
+| Architecture     | Lead          | Owners (area experts to tag in PRs and issues)     | Description     |
+|------------------|---------------|----------------------------------------------------|-----------------|
+| arch-loongarch64 | @shushanhf    | @LuckyXu-HF                                        |                 |
+| arch-riscv       | @gbalykov     | @dotnet/samsung                                    |                 |
+| arch-s390x       | @uweigand     | @uweigand                                          |                 |
+| arch-wasm        | @lewing       | @lewing, @pavelsavara                              |                 |
 
 ## Community Triagers
 
diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md
index b16806514657..417f6fdec534 100644
--- a/docs/design/coreclr/botr/clr-abi.md
+++ b/docs/design/coreclr/botr/clr-abi.md
@@ -177,11 +177,13 @@ This section describes the conventions the JIT needs to follow when generating c
 
 ## Funclets
 
-For all platforms except Windows/x86, all managed EH handlers (finally, fault, filter, filter-handler, and catch) are extracted into their own 'funclets'. To the OS they are treated just like first class functions (separate PDATA and XDATA (`RUNTIME_FUNCTION` entry), etc.). The CLR currently treats them just like part of the parent function in many ways. The main function and all funclets must be allocated in a single code allocation (see hot cold splitting). They 'share' GC info. Only the main function prolog can be hot patched.
+For all platforms except Windows/x86 on CoreCLR, all managed EH handlers (finally, fault, filter, filter-handler, and catch) are extracted into their own 'funclets'. To the OS they are treated just like first class functions (separate PDATA and XDATA (`RUNTIME_FUNCTION` entry), etc.). The CLR currently treats them just like part of the parent function in many ways. The main function and all funclets must be allocated in a single code allocation (see hot cold splitting). They 'share' GC info. Only the main function prolog can be hot patched.
 
 The only way to enter a handler funclet is via a call. In the case of an exception, the call is from the VM's EH subsystem as part of exception dispatch/unwind. In the non-exceptional case, this is called local unwind or a non-local exit. In C# this is accomplished by simply falling-through/out of a try body or an explicit goto. In IL this is always accomplished via a LEAVE opcode, within a try body, targeting an IL offset outside the try body. In such cases the call is from the JITed code of the parent function.
 
-For Windows/x86, all handlers are generated within the method body, typically in lexical order. A nested try/catch is generated completely within the EH region in which it is nested. These handlers are essentially "in-line funclets", but they do not look like normal functions: they do not have a normal prolog or epilog, although they do have special entry/exit and register conventions. Also, nested handlers are not un-nested as for funclets: the code for a nested handler is generated within the handler in which it is nested.
+For Windows/x86 on CoreCLR, all handlers are generated within the method body, typically in lexical order. A nested try/catch is generated completely within the EH region in which it is nested. These handlers are essentially "in-line funclets", but they do not look like normal functions: they do not have a normal prolog or epilog, although they do have special entry/exit and register conventions. Also, nested handlers are not un-nested as for funclets: the code for a nested handler is generated within the handler in which it is nested.
+
+For Windows/x86 on NativeAOT and Linux/x86, funclets are used just like on other platforms.
 
 ## Cloned finallys
 
diff --git a/docs/design/coreclr/botr/guide-for-porting.md b/docs/design/coreclr/botr/guide-for-porting.md
index 5d2c01aa52d0..f7ca105bf165 100644
--- a/docs/design/coreclr/botr/guide-for-porting.md
+++ b/docs/design/coreclr/botr/guide-for-porting.md
@@ -413,12 +413,6 @@ Here is an annotated list of the stubs implemented for Unix on Arm64.
         Today use of this feature on Unix requires hand-written IL. On Windows
         this feature is commonly used by C++/CLI
 
-3.  EH Correctness. Some helpers are written in assembly to provide well known
-    locations for NullReferenceExceptions to be generated out of a SIGSEGV
-    signal.
-
-    1.  `JIT_MemSet`, and `JIT_MemCpy` have this requirement
-
 #### cgencpu.h
 
 This header is included by various code in the VM directory. It provides a large
diff --git a/docs/design/coreclr/jit/first-class-structs.md b/docs/design/coreclr/jit/first-class-structs.md
index dc017aee75f2..ce05fa6d8d84 100644
--- a/docs/design/coreclr/jit/first-class-structs.md
+++ b/docs/design/coreclr/jit/first-class-structs.md
@@ -74,13 +74,6 @@ encountered by most phases of the JIT:
   if it's a promoted struct field, or to a `GT_LCL_FLD` or GT_IND` by `fgMorphField()`.
   * Proposed: A non-promoted struct typed field should be transformed into a `GT_OBJ`, so that consistently all struct
     nodes, even r-values, have `ClassLayout`.
-* `GT_MKREFANY`: This produces a "known" struct type, which is currently obtained by
-  calling `impGetRefAnyClass()` which is a call over the JIT/EE interface. This node is always
-  eliminated, and its source address used to create a copy. If it is on the rhs
-  of an assignment, it will be eliminated during the importer. If it is a call argument it will
-  be eliminated during morph.
-  * The presence of any of these in a method disables struct promotion. See `case CEE_MKREFANY` in the
-    `Importer`, where it is asserted that these are rare, and therefore not worth the trouble to handle.
 
 ### Struct “objects” as lvalues
 
@@ -94,10 +87,6 @@ encountered by most phases of the JIT:
       [#21705](https://github.com/dotnet/coreclr/pull/21705) they are no longer large nodes.
   * `GT_STORE_OBJ` and `GT_STORE_BLK` have the same structure as `GT_OBJ` and `GT_BLK`, respectively
     * `Data()` is op2
-  * `GT_STORE_DYN_BLK` (GenTreeStoreDynBlk extends GenTreeBlk)
-    * Additional child `gtDynamicSize`
-    * Note that these aren't really struct stores; they represent dynamically sized blocks
-      of arbitrary data.
   * For `GT_LCL_FLD` nodes, we store a pointer to `ClassLayout` in the node.
   * For `GT_LCL_VAR` nodes, the `ClassLayout` is obtained from the `LclVarDsc`.
 
diff --git a/docs/design/coreclr/jit/jit-call-morphing.md b/docs/design/coreclr/jit/jit-call-morphing.md
index 2e83a9b0643b..eb6036551789 100644
--- a/docs/design/coreclr/jit/jit-call-morphing.md
+++ b/docs/design/coreclr/jit/jit-call-morphing.md
@@ -95,10 +95,10 @@ we force that argument and any previous argument that is marked with any of the
     we haven't marked as needing a temp but still need to store in the outgoing args
     area is marked as needing a placeholder temp using `needPlace`.
 3. We force any arguments that use `localloc` to be evaluated into temps.
-4. We mark any address taken locals with the `GTF_GLOB_REF` flag. For two special
-cases we call `SetNeedsTemp()` and set up the temp in `fgMorphArgs`. `SetNeedsTemp`
-records the tmpNum used and sets `isTmp` so that we handle it like the other temps.
-The special cases are for `GT_MKREFANY` and for a `TYP_STRUCT` argument passed by
+4. We mark any address taken locals with the `GTF_GLOB_REF` flag. For a special
+case we call `SetNeedsTemp()` and set up the temp in `fgMorphArgs`.
+`SetNeedsTemp` records the tmpNum used and sets `isTmp` so that we handle it
+like the other temps.  The special case is for a `TYP_STRUCT` argument passed by
 value when we can't optimize away the extra copy.
 
 
diff --git a/docs/design/coreclr/jit/profile-count-reconstruction.md b/docs/design/coreclr/jit/profile-count-reconstruction.md
new file mode 100644
index 000000000000..f5f4c8006eb5
--- /dev/null
+++ b/docs/design/coreclr/jit/profile-count-reconstruction.md
@@ -0,0 +1,288 @@
+## Numeric Solvers for Profile Count Reconstruction
+
+It may not be readily apparent how count reconstruction works. Perhaps these notes will shed some light on things.
+
+In our flowgraph model we assume that the edge likelihoods are trustworthy and well formed (meaning each edge's likelihood is in [0,1] and the sum of all likelihoods for a block's successor edges is 1).
+
+The appeal of edge well-formedness is easy to check and relatively easy to maintain during various optimizations. It is a *local* property.
+
+We will use $p_{i,j}$ to denote the likelihood that block $i$ transfers control to block $j$. Thus local consistency means:
+
+$$ 0 \le p_{i,j} \le 1 $$
+
+and, for blocks with successors:
+
+$$ \sum_i p_{i,j} = 1 $$
+
+By contrast, block weight consistency requires that the flow into a block be balanced by the flow out of a block. It is a *global* property and harder to maintain during optimizations. It may also not be true initially.
+
+We will use $w_j$ for the weight of block $j$. We will also assume there is an external source and sink of weight for some blocks (method entry and exit points), $e_j$. Then block consistency means:
+
+$$ e_j + \sum_i w_i p_{i,j}  = \sum_k w_j p_{j,k} $$
+
+where the LHS is flow in and the RHS is flow out of block $j$. But
+
+$$ \sum_k w_j p_{j,k} = w_j \sum_k p_{j,k} = w_j $$
+
+so we can restate this as saying the external flow plus the flow into the block must equal the block weight:
+
+$$ e_j + \sum_i w_i p_{i,j} = w_j$$
+
+The goal of this work is to explore methods for reconstructing a set of consistent block weights $w_j$ from the external weight sources and sinks $e_j$ and edge likelihoods $p_{i,j}$.
+
+### General Solution
+
+The above can be summarized in matrix-vector form as
+
+$$ \boldsymbol w = \boldsymbol e + \boldsymbol P \boldsymbol w $$
+
+where to be able to express the sum of incoming flow as a standard matrix-vector product we have:
+
+$$ \boldsymbol P_{i,j} = { p_{j,i} } $$
+
+(that is, in $\boldsymbol P$, the flow from block $i$ is described by the entries in the $i\text{th}$ column, and the flow into block $i$ by the $i\text{th}$ row). A bit of rearranging puts this into the standard linear equation form
+
+$$ (\boldsymbol I - \boldsymbol P) \boldsymbol w = \boldsymbol e$$
+
+and this can be solved (in principle) for $\boldsymbol w$ by computing the inverse of $\boldsymbol I - \boldsymbol P$ (assuming this exists), giving
+
+$$ \boldsymbol w = {(\boldsymbol I - \boldsymbol P)}^{-1} \boldsymbol e $$
+
+For example, given the following graph with edge likelihoods a shown:
+
+<p align="center">
+<img src="https://github.com/dotnet/runtime/assets/10121823/6b96e29e-14f6-4875-90a8-aab4a4988146" height=400 />
+</p>
+
+we have
+
+```math
+\boldsymbol P =
+\begin{bmatrix}
+ 0 &    0 &    0 & 0 \cr
+ 1 &    0 &  0.8 & 0 \cr
+ 0 &  0.5 &    0 & 0 \cr
+ 0 &  0.5 &  0.2 & 0
+\end{bmatrix}
+```
+
+Note each column save the last sums to 1.0, representing the fact that the outgoing likelihoods from each block must sum to 1.0, unless there are no successors.
+
+Thus
+```math
+(\boldsymbol I - \boldsymbol P) =
+\begin{bmatrix}
+ 1 &    0 &    0 & 0 \\\
+-1 &    1 & -0.8 & 0 \\\
+ 0 & -0.5 &    1 & 0 \\\
+ 0 & -0.5 & -0.2 & 1
+\end{bmatrix}
+```
+and so (details of computing the inverse left as exercise for the reader)
+```math
+{(\boldsymbol I - \boldsymbol P)}^{-1} =
+\begin{bmatrix}
+1 & 0 & 0 & 0 \\\
+1.67 & 1.67 & 1.33 & 0 \\\
+0.83 & 0.83 & 1.67 & 0 \\\
+1 & 1 & 1 & 1
+\end{bmatrix}
+```
+Note the elements of ${(\boldsymbol I - \boldsymbol P)}^{-1}$ are all non-negative; intuitively, if we increase flow anywhere in the graph, it can only cause weights to increase or stay the same.
+
+If we feed 6 units of flow into A, we have
+```math
+\boldsymbol w = \begin{bmatrix} 6 \\\ 10 \\\ 5 \\\ 6 \end{bmatrix}
+```
+
+or graphically
+
+<p align="center">
+<img src="https://github.com/dotnet/runtime/assets/10121823/6f0d7c59-ed97-4a7e-bcba-3abd7b32c352" height=400 />
+</p>
+
+However, explicit computation of the inverse of a matrix is computationally expensive.
+
+Also note (though it's not fully obvious from such a small example) that the matrix $(\boldsymbol I - \boldsymbol P)$ is *sparse*: a typical block has only 1 or 2 successors, so the number of nonzero entries in each column will generally be either 2 or 3, no matter how many nodes we have. The inverse of a sparse matrix is typically not sparse, so computing it is not only costly in time but also in space.
+
+So solution techniques that can leverage sparseness are of particular interest.
+
+### A More Practical Solution
+
+Note the matrix $\boldsymbol I - \boldsymbol P$ has non-negative diagonal elements and negative non-diagonal elements, since all entries of $\boldsymbol P$ are in the range [0,1].
+
+If we further restrict ourselves to the case where $p_{i,i} \lt 1$ (meaning there are are no infinite self-loops) then all the diagonal entries are positive and the matrix has an inverse with no negative elements.
+
+Such matrices are known as M-matrices.
+
+It is well known that for an M-matrix $(\boldsymbol I - \boldsymbol P)$ the inverse can be computed as the limit of an infinite series
+
+$$ {(\boldsymbol I - \boldsymbol P)}^{-1} = \boldsymbol I + \boldsymbol P + \boldsymbol P^2 + \dots $$
+
+This gives rise to a simple *iterative* procedure for computing an approximate value of $\boldsymbol w$ (here superscripts on $\boldsymbol w$ are successive iterates, not powers)
+
+$$ \boldsymbol w^{(0)} = \boldsymbol e $$
+
+$$ \boldsymbol w^{(1)} = (\boldsymbol I + \boldsymbol P) \boldsymbol e = \boldsymbol e + \boldsymbol P \boldsymbol w^{(0)} $$
+
+$$ \boldsymbol w^{(2)} = (\boldsymbol I + \boldsymbol P + \boldsymbol P^2) \boldsymbol e = \boldsymbol e + \boldsymbol P \boldsymbol w^{(1)}$$
+
+$$ \dots$$
+
+$$ \boldsymbol w^{(k + 1)} = \boldsymbol e + \boldsymbol P \boldsymbol w^{(k)} $$
+
+where we can achieve any desired precision for $\boldsymbol w$ by iterating until the successive $\boldsymbol w$ differ by a small amount.
+
+Intuitively this should make sense, we are effectively pouring weight into the entry block(s) and letting the weights flow around in the graph until they reach a fixed point. If we do this for the example above, we get the following sequence of values for $\boldsymbol w^n$:
+
+```math
+\boldsymbol w^{(0)} = \begin{bmatrix} 6 \\\ 0 \\\ 0 \\\ 0 \end{bmatrix},
+\boldsymbol w^{(1)} = \begin{bmatrix} 6 \\\ 6 \\\ 0 \\\ 0 \end{bmatrix},
+\boldsymbol w^{(2)} = \begin{bmatrix} 6 \\\ 6 \\\ 3 \\\ 3 \end{bmatrix},
+\boldsymbol w^{(3)} = \begin{bmatrix} 6 \\\ 8.4 \\\ 3 \\\ 3.6 \end{bmatrix},
+\boldsymbol w^{(4)} = \begin{bmatrix} 6 \\\ 8.4 \\\ 4.2 \\\ 3.6 \end{bmatrix},
+\boldsymbol w^{(5)} = \begin{bmatrix} 6 \\\ 9.36 \\\ 4.2 \\\ 3.6 \end{bmatrix},
+\dots,
+\boldsymbol w^{(20)} = \begin{bmatrix} 6 \\\ 9.9990 \\\ 4.9995 \\\ 5.9992 \end{bmatrix},
+\dots
+```
+
+and the process converges to the weights found using the inverse. However convergence is fairly slow.
+
+Classically this approach is known as *Jacobi's method*. At each iterative step, the new values are based only on the old values.
+
+### Jacobi's Method
+
+If you read the math literature on iterative solvers, Jacobi's method is often described as follows. Given a linear system $\boldsymbol A \boldsymbol x = \boldsymbol b$, a *splitting* of $\boldsymbol A$ is $\boldsymbol A = \boldsymbol M - \boldsymbol N$, where $\boldsymbol M^{-1}$ exists. Then the *iteration matrix* $\boldsymbol H$ is given by $\boldsymbol H = \boldsymbol M^{-1} \boldsymbol N$. Given some initial guess at an answer $\boldsymbol x^{(0)}$ the iteration scheme is:
+
+$$ \boldsymbol x^{(k+1)} = \boldsymbol H \boldsymbol x^{(k)} + \boldsymbol M^{-1}\boldsymbol b$$
+
+And provided that $\rho(\boldsymbol H) \lt 1$,
+
+$$\lim_{k \to \infty} \boldsymbol x^{(k)}=\boldsymbol A^{-1} \boldsymbol b$$
+
+In our case $\boldsymbol A = \boldsymbol I - \boldsymbol P$ and so the splitting is simply $\boldsymbol M = \boldsymbol I$ and $\boldsymbol N = \boldsymbol P$. Since $\boldsymbol M = \boldsymbol I$, $\boldsymbol M^{-1} = \boldsymbol I$ (the identity matrix is its own inverse), $\boldsymbol H = \boldsymbol P$, $\boldsymbol x = \boldsymbol w$ and $\boldsymbol b = \boldsymbol e$, we end up with
+
+$$ \boldsymbol w^{(k+1)} = \boldsymbol P \boldsymbol w^{(k)} + \boldsymbol e$$
+
+as we derived above.
+
+As an alternative we could split $\boldsymbol A = (\boldsymbol I - \boldsymbol P)$ into diagonal part $\boldsymbol M = \boldsymbol D$ and remainder part $\boldsymbol N$. This only leads to differences from the splitting above when there are self loops, otherwise the diagonal of $\boldsymbol P$ is all zeros.
+
+With that splitting,
+
+
+```math
+ \boldsymbol D^{-1}_{i,i} = 1/a_{i,i} =  1/(1 - p_{i,i})
+```
+
+so as $p_{i,i}$ gets close to 1.0 the value can be quite large: these are the count amplifications caused by self-loops. If we write things out component-wise we get the classic formulation for Jacobi iteration:
+
+```math
+ x^{(k+1)}_i = \frac{1}{a_{i,i}} \left (b_i - \sum_{j \ne i} a_{i,j} x^{(k)}_j  \right)
+```
+
+or in our block weight and edge likelihood notation
+
+```math
+ w^{(k+1)}_i = \frac{1}{(1 - p_{i,i})} \left (e_i + \sum_{j \ne i} p_{j,i} w^{(k)}_j  \right)
+```
+
+Intuitively this reads: the new value of node $i$ is the sum of the external input (if any) plus the weights flowing in from (non-self) predecessors, with the sum scaled up by the self-loop factor.
+
+### On Convergence and Stability
+
+While the iterative method above is guaranteed to converge when $\boldsymbol A$ is an M-matrix, its rate of convergence is potentially problematic. For an iterative scheme, the asymptotic rate of convergence can be shown to be $R \approx -log_{10} \rho(\boldsymbol H)$ digits / iteration.
+
+Here the spectral radius $\rho(\boldsymbol H)$ is the magnitude of the largest eigenvalue of $\boldsymbol H$. For the example above $\boldsymbol H = \boldsymbol P$ and $\rho(\boldsymbol P) \approx 0.63$, giving $R = 0.2$. So to converge to $4$ decimal places takes about $20$ iterations, as the table of data above indicates.
+
+it is also worth noting that for synthesis the matrix $(\boldsymbol I - \boldsymbol P)$ is often *ill-conditioned*, meaning that small changes in the input vector $\boldsymbol e$ (or small inaccuracies in the likelihoods $p_{i,j}$) can lead to large changes in the solution vector $\boldsymbol w$. In some sense this is a feature; we know that blocks in flow graphs can have widely varying weights, with some blocks rarely executed and others executed millions of times per call to the method. So it must be possible for $(\boldsymbol I - \boldsymbol P)$ to amplify the magnitude of a "small" input (say 1 call to the method) into large block counts.
+
+### Accelerating Convergence I: Gauss-Seidel and Reverse Postorder
+
+It's also well-known that Gauss-Seidel iteration often converges faster than Jacobi iteration. Here instead of always using the old iteration values, we try and use the new iteration values that are available, where we presume each update happens in order of increasing $i$:
+
+```math
+ x^{(k+1)}_i = \frac{1}{a_{i,i}} \left(b_i - \sum_{j \lt i} a_{i,j} x^{(k+1)}_j  - \sum_{j \gt i} a_{i,j} x^{(k)}_j \right) $$
+```
+
+or again in our notation
+
+```math
+ w^{(k+1)}_i = \frac{1}{(1 - p_{i,i})} \left(e_i + \sum_{j \lt i} p_{j,i} w^{(k + 1)}_j + \sum_{j \gt i} p_{j,i} w^{(k)}_j \right) $$
+```
+
+In the above scheme the order of visiting successive blocks is fixed unspecified, and (in principle) any order can be used. But by using a reverse postorder to index the blocks, we can ensure a maximal amount of forward propagation per iteration. Note that if a block has an incoming edge from a node that appears later in the reverse postorder, that block is a loop header.
+
+If we do, that the code above nicely corresponds to our notion of forward and backward edges in the RPO:
+
+```math
+ w^{(k+1)}_i = \frac{1}{\underbrace{(1 - p_{i,i}}_\text{self edge})} \left(e_i + \underbrace{\sum_{j \lt i} p_{j,i} w^{(k + 1)}_j}_\text{forward edges in RPO} + \underbrace{\sum_{j \gt i} p_{j,i} w^{(k)}_j}_\text{backward edges in RPO} \right)
+```
+
+Note because of the order of reads and writes, $\boldsymbol w^{(k+1)}$ can share storage with $\boldsymbol w^{(k)}$.
+
+On the example above this results in:
+
+$$
+\boldsymbol w^{(0)} = \begin{bmatrix} 6 \\\ 6 \\\ 3 \\\ 3 \end{bmatrix},
+\boldsymbol w^{(1)} = \begin{bmatrix} 6 \\\ 8.4 \\\ 4.2 \\\ 5.04 \end{bmatrix},
+\boldsymbol w^{(2)} = \begin{bmatrix} 6 \\\ 9.36 \\\ 4.68 \\\ 5.62 \end{bmatrix},
+\boldsymbol w^{(3)} = \begin{bmatrix} 6 \\\ 9.74 \\\ 4.87 \\\ 5.85 \end{bmatrix},
+\boldsymbol w^{(4)} = \begin{bmatrix} 6 \\\ 9.90 \\\ 4.95 \\\ 5.94 \end{bmatrix},
+\boldsymbol w^{(5)} = \begin{bmatrix} 6 \\\ 9.96 \\\ 4.98 \\\ 5.98 \end{bmatrix},
+\dots,
+\boldsymbol w^{(9)} = \begin{bmatrix} 6 \\\ 9.9990 \\\ 4.9995 \\\ 5.9994 \end{bmatrix},
+\dots
+$$
+
+So it is converging about twice as fast. As with the Jacobi method one can re-express this as a splitting and determine an iteration matrix $\boldsymbol H$ and determine the dominant eigenvalue, and from this the rate of convergence, but we will not do so here.
+
+### Accelerating Convergence II: Cyclic Probabilities
+
+A flow graph is reducible (or is said to have reducible loops) if every cycle in the graph has a block in the cycle that dominates the other blocks in the cycle. We will call such cycles natural loops, distinguished by their entry blocks.
+
+For reducible loops we can compute the amount by which they amplify flow using a technique described by Wu and Larus: given a loop head $h$ we classify the predecessor into two sets: input edges that do not come from a block within the loop, and back edges that come from a block within the loop. We then inject one unit of flow into the block and propagate it through the loop, and compute the sum of the weights on the back edges. This value will be some $p$ where $0 \le p \le 1$. Then the *cyclic probability* $C_p$ for $h$ is $C_p(h) = 1 / (1 - p)$. To avoid dividing by zero we artificially cap $C_p$ at some value less than $1$.
+
+Note also that the technique above won't compute an accurate $C_p$ for loops that contain improper (irreducible) loops, as solving for $C_p$ in such cases would require iteration (the single-pass $C_p$ will be an underestimate). So we must also track which loops contain improper loops.
+
+If we add this refinement to our algorithm we end up with:
+
+```math
+ w^{(k+1)}_i =
+\begin{cases}
+ C_p(i) \left(e_i + \sum_{j \lt i} p_{j,i} w^{(k + 1)}_j  \right), \text{ block } i \text{ is a natural loop head, and does not contain an improper loop} \\\
+ \frac{1}{(1 - p_{i,i})} \left(e_i + \sum_{j \lt i} p_{j,i} w^{(k + 1)}_j + \sum_{j \gt i} p_{j,i} w^{(k)}_j \right)
+\end{cases}
+```
+
+the second clause includes both blocks without any back edges, blocks with back edges that are not headers of natural loops, and blocks that are headers of natural loops where the loop contains an improper loop.
+
+On an example like the one above this converges in one pass. If any $C_p$ was capped then the solution will be approximate and we will have failed to achieve a global balance. But we will also (generally) have avoided creating infinite or very large counts.
+
+One can imagine that if we cap some $C_p$ we could also try to alter some of the $p_{j,i}$ to bring things back into balance, but this seems tricky if there are multiple paths through the loop. And we're basically deciding at that point that consistency is more important than accuracy.
+
+Since the remainder of the JIT is going to have to cope with lack of global balance anyways (recall it is hard to preserve) for now we are going to ty and tolerate reconstruction inconsistencies.
+
+The algorithm described above is implemented in the code as the `GaussSeidel` solver.
+
+### Cycles That Are Not Natural Loops, More Sophisticated Solvers, and Deep Nests
+
+If the flow graph has cycles that are not natural loops (irreducible loops) the above computations will converge but again may converge very slowly. On a sample of about 500 graphs with irreducible loops the modified Gauss-Seidel approach above required more than 20 iterations in 120 cases and more than 50 iterations in 70 cases, with worst-case around 500 iterations.
+
+SOR is a classic convergence altering technique, but unfortunately, for M-Matrices SOR can only safely be used to slow down convergence.
+
+There does not seem to be a good analog of $C_p$ for such cases, though it's possible that "block diagonal" solvers may be tackling exactly that problem.
+
+It's possible that more sophisticated solution techniques like BiCGstab or CGS might be worth consideration. Or perhaps a least-squares solution, if we're forced to be approximate, to try and minimize the overall approximation error.
+
+In very deep loop nests even $C_p$ is not enough to prevent creation of large counts. We could try and adjust the cap level downwards as the loops get deeper, or distribute the $C_p$ "tax" across all the loops. This tends to only be a problem for stress test cases.
+
+### References
+
+Carl D. Meyer. *Matrix Analysis and Applied Linear Algebra*, in particular section 7.10.
+
+Nick Higham. [What is an M-Matrix?](https://nhigham.com/2021/03/16/what-is-an-m-matrix/)
+
+Youfeng Wu and James R. Larus. Static branch frequency and program profile analysis, Micro-27 (1994).
+
diff --git a/docs/design/coreclr/jit/ryujit-overview.md b/docs/design/coreclr/jit/ryujit-overview.md
index cdb17002ee19..5e63d38e98f6 100644
--- a/docs/design/coreclr/jit/ryujit-overview.md
+++ b/docs/design/coreclr/jit/ryujit-overview.md
@@ -222,6 +222,7 @@ The top-level function of interest is `Compiler::compCompile`. It invokes the fo
 | [Common Subexpression Elimination (CSE)](#cse) | Elimination of redundant subexressions based on value numbers. |
 | [Assertion Propagation](#assertion-propagation) | Utilizes value numbers to propagate and transform based on properties such as non-nullness. |
 | [Range analysis](#range-analysis) | Eliminate array index range checks based on value numbers and assertions |
+| [Induction variable optimization](#iv-opts) | Optimize induction variables used inside natural loops based on scalar evolution analysis |
 | [VN-based dead store elimination](#vn-based-dead-store-elimination) | Eliminate stores that do not change the value of a local. |
 | [If conversion](#if-conversion) | Transform conditional definitions into `GT_SELECT` operators. |
 | [Rationalization](#rationalization) | Flowgraph order changes from `FGOrderTree` to `FGOrderLinear`. All `GT_COMMA` nodes are transformed. |
@@ -347,6 +348,11 @@ reused.
 
 Utilizes value numbers to propagate and transform based on properties such as non-nullness.
 
+### <a name="iv-opts"></a>Induction variable optimization
+
+Performs scalar evolution analysis and utilized it to optimize induction variables inside loops.
+Currently this entails IV widening which is done on x64 only.
+
 ### <a name="range-analysis"></a>Range analysis
 
 Optimize array index range checks based on value numbers and assertions.
diff --git a/docs/design/coreclr/jit/ryujit-tutorial.md b/docs/design/coreclr/jit/ryujit-tutorial.md
index 34466e45afbc..ec900ccc8cd9 100644
--- a/docs/design/coreclr/jit/ryujit-tutorial.md
+++ b/docs/design/coreclr/jit/ryujit-tutorial.md
@@ -447,6 +447,10 @@ This is the same diagram as before, but with additional links to indicate execut
   - Determine initial value for dependent phis
 - Eliminate checks where the range of the index is within the check range
 
+### Induction Variable Optimization
+- Perform scalar evolution analysis to describe values of IR nodes inside loops
+- Perform IV widening on x64 to avoid unnecessary zero extensions for array/span indexing
+
 ## RyuJIT Back-End
 
 ### Rationalization
diff --git a/docs/design/coreclr/jit/struct-abi.md b/docs/design/coreclr/jit/struct-abi.md
index efd299c5111e..1888dd571ae8 100644
--- a/docs/design/coreclr/jit/struct-abi.md
+++ b/docs/design/coreclr/jit/struct-abi.md
@@ -85,8 +85,7 @@ This method is responsible for the first part of what is currently `fgMorphArgs(
     - Note that the `isSplit` property would evaluate to false on targets where
       it is not supported, reducing the need for `ifdef`s (we can rely on the compiler
       to eliminate those dead paths).
-- Validate that each struct argument is either a `GT_LCL_VAR`, a `GT_OBJ`,
-  or a `GT_MKREFANY`.
+- Validate that each struct argument is either a `GT_LCL_VAR` or a `GT_OBJ`
 
 During the initial `fgMorph` phase, `fgMorphArgs()` does the following:
 
diff --git a/docs/design/datacontracts/GCHandle.md b/docs/design/datacontracts/GCHandle.md
new file mode 100644
index 000000000000..250b87f1e37a
--- /dev/null
+++ b/docs/design/datacontracts/GCHandle.md
@@ -0,0 +1,28 @@
+# Contract GCHandle
+
+This contract allows decoding and reading of GCHandles. This will also include handle enumeration in the future
+
+## Data structures defined by contract
+``` csharp
+struct DacGCHandle
+{
+    DacGCHandle(TargetPointer value) { Value = value; }
+    TargetPointer Value;
+}
+```
+
+## Apis of contract
+``` csharp
+TargetPointer GetObject(DacGCHandle gcHandle);
+```
+
+## Version 1
+
+``` csharp
+TargetPointer GetObject(DacGCHandle gcHandle)
+{
+    if (gcHandle.Value == TargetPointer.Null)
+        return TargetPointer.Null;
+    return Target.ReadTargetPointer(gcHandle.Value);
+}
+```
diff --git a/docs/design/datacontracts/SList.md b/docs/design/datacontracts/SList.md
new file mode 100644
index 000000000000..ee1e9c66e06b
--- /dev/null
+++ b/docs/design/datacontracts/SList.md
@@ -0,0 +1,78 @@
+# Contract SList
+
+This contract allows reading and iterating over an SList data structure.
+
+## Data structures defined by contract
+``` csharp
+class SListReader
+{
+    public abstract TargetPointer GetHead(TargetPointer slistPointer);
+    public abstract TargetPointer GetNext(TargetPointer entryInSList);
+    public IEnumerator<TargetPointer> EnumerateList(TargetPointer slistPointer)
+    {
+        TargetPointer current = GetHead(slistPointer);
+        
+        while (current != TargetPointer.Null)
+        {
+            yield return current;
+            current = GetNext(current);
+        }
+    }
+    public IEnumerator<TargetPointer> EnumerateListFromEntry(TargetPointer entryInSList)
+    {
+        TargetPointer current = entryInSList;
+        
+        while (current != TargetPointer.Null)
+        {
+            yield return current;
+            current = GetNext(current);
+        }
+    }
+}
+```
+
+## Apis of contract
+``` csharp
+SListReader GetReader(string typeOfDataStructure);
+```
+
+## Version 1
+
+``` csharp
+private class SListReaderV1 : SListReader
+{
+    uint _offsetToSLinkField;
+    Target Target;
+
+    SListReaderV1(Target target, string typeToEnumerate)
+    {
+        Target = target;
+        _offsetToSLinkField = Target.Contracts.GetFieldLayout(typeToEnumerate, "m_Link").Offset;
+    }
+    public override TargetPointer GetHead(TargetPointer slistPointer)
+    {
+        TargetPointer headPointer = new SListBase(Target, slistPointer).m_pHead;
+        TargetPointer slinkInHeadObject = new SLink(Target, headPointer).m_pNext;
+        if (slinkInHeadObject == TargetPointer.Null)
+            return TargetPointer.Null;
+        return slinkInHeadObject - _offsetToSLinkField;
+    }
+
+    public override TargetPointer GetNext(TargetPointer entryInSList)
+    {
+        if (entryInSList == TargetPointer.Null)
+            throw new ArgumentException();
+        
+        TargetPointer slinkPointer = entryInSList + _offsetToSLinkField;
+        TargetPointer slinkInObject = new SLink(Target, slinkPointer).m_pNext;
+        if (slinkInObject == TargetPointer.Null)
+            return TargetPointer.Null;
+        return slinkInHeadObject - _offsetToSLinkField;
+    }
+}
+
+SListReader GetReader(string typeOfDataStructure)
+{
+    return new SListReaderV1(typeOfDataStructure);
+}
+```
diff --git a/docs/design/datacontracts/Thread.md b/docs/design/datacontracts/Thread.md
new file mode 100644
index 000000000000..7bee0fe79fdc
--- /dev/null
+++ b/docs/design/datacontracts/Thread.md
@@ -0,0 +1,195 @@
+# Contract Thread
+
+This contract is for reading and iterating the threads of the process.
+
+## Data structures defined by contract
+``` csharp
+record struct DacThreadStoreData (
+    int ThreadCount,
+    TargetPointer FirstThread,
+    TargetPointer FinalizerThread,
+    TargetPointer GcThread);
+
+record struct DacThreadStoreCounts (
+    int UnstartedThreadCount,
+    int BackgroundThreadCount,
+    int PendingThreadCount,
+    int DeadThreadCount);
+
+enum ThreadState
+{
+    TS_Unknown                = 0x00000000,    // threads are initialized this way
+
+    TS_AbortRequested         = 0x00000001,    // Abort the thread
+
+    TS_GCSuspendPending       = 0x00000002,    // ThreadSuspend::SuspendRuntime watches this thread to leave coop mode.
+    TS_GCSuspendRedirected    = 0x00000004,    // ThreadSuspend::SuspendRuntime has redirected the thread to suspention routine.
+    TS_GCSuspendFlags         = TS_GCSuspendPending | TS_GCSuspendRedirected, // used to track suspension progress. Only SuspendRuntime writes/resets these.
+
+    TS_DebugSuspendPending    = 0x00000008,    // Is the debugger suspending threads?
+    TS_GCOnTransitions        = 0x00000010,    // Force a GC on stub transitions (GCStress only)
+
+    TS_LegalToJoin            = 0x00000020,    // Is it now legal to attempt a Join()
+
+    TS_ExecutingOnAltStack    = 0x00000040,    // Runtime is executing on an alternate stack located anywhere in the memory
+
+    TS_Hijacked               = 0x00000080,    // Return address has been hijacked
+
+    // unused                 = 0x00000100,
+    TS_Background             = 0x00000200,    // Thread is a background thread
+    TS_Unstarted              = 0x00000400,    // Thread has never been started
+    TS_Dead                   = 0x00000800,    // Thread is dead
+
+    TS_WeOwn                  = 0x00001000,    // Exposed object initiated this thread
+    TS_CoInitialized          = 0x00002000,    // CoInitialize has been called for this thread
+
+    TS_InSTA                  = 0x00004000,    // Thread hosts an STA
+    TS_InMTA                  = 0x00008000,    // Thread is part of the MTA
+
+    // Some bits that only have meaning for reporting the state to clients.
+    TS_ReportDead             = 0x00010000,    // in WaitForOtherThreads()
+    TS_FullyInitialized       = 0x00020000,    // Thread is fully initialized and we are ready to broadcast its existence to external clients
+
+    TS_TaskReset              = 0x00040000,    // The task is reset
+
+    TS_SyncSuspended          = 0x00080000,    // Suspended via WaitSuspendEvent
+    TS_DebugWillSync          = 0x00100000,    // Debugger will wait for this thread to sync
+
+    TS_StackCrawlNeeded       = 0x00200000,    // A stackcrawl is needed on this thread, such as for thread abort
+                                                // See comment for s_pWaitForStackCrawlEvent for reason.
+
+    // unused                 = 0x00400000,
+
+    // unused                 = 0x00800000,
+    TS_TPWorkerThread         = 0x01000000,    // is this a threadpool worker thread?
+
+    TS_Interruptible          = 0x02000000,    // sitting in a Sleep(), Wait(), Join()
+    TS_Interrupted            = 0x04000000,    // was awakened by an interrupt APC. !!! This can be moved to TSNC
+
+    TS_CompletionPortThread   = 0x08000000,    // Completion port thread
+
+    TS_AbortInitiated         = 0x10000000,    // set when abort is begun
+
+    TS_Finalized              = 0x20000000,    // The associated managed Thread object has been finalized.
+                                                // We can clean up the unmanaged part now.
+
+    TS_FailStarted            = 0x40000000,    // The thread fails during startup.
+    TS_Detached               = 0x80000000,    // Thread was detached by DllMain
+}
+
+record struct DacThreadData (
+    uint ThreadId;
+    TargetNUint OsThreadId;
+    ThreadState State;
+    bool PreemptiveGCDisabled
+    TargetPointer AllocContextPointer;
+    TargetPointer AllocContextLimit;
+    TargetPointer Frame;
+    TargetPointer FirstNestedException;
+    TargetPointer TEB;
+    DacGCHandle LastThrownObjectHandle;
+    TargetPointer NextThread;
+);
+```
+
+## Apis of contract
+``` csharp
+DacThreadStoreData GetThreadStoreData();
+DacThreadStoreCounts GetThreadCounts();
+DacThreadData GetThreadData(TargetPointer threadPointer);
+TargetPointer GetNestedExceptionInfo(TargetPointer nestedExceptionPointer, out TargetPointer nextNestedException);
+TargetPointer GetManagedThreadObject(TargetPointer threadPointer);
+```
+
+## Version 1
+
+
+
+``` csharp
+SListReader ThreadListReader = Contracts.SList.GetReader("Thread");
+
+DacThreadStoreData GetThreadStoreData()
+{
+    TargetPointer threadStore = Target.ReadGlobalTargetPointer("s_pThreadStore");
+    var runtimeThreadStore = new ThreadStore(Target, threadStore);
+
+    TargetPointer firstThread = ThreadListReader.GetHead(runtimeThreadStore.SList.Pointer);
+
+    return new DacThreadStoreData(
+        ThreadCount : runtimeThreadStore.m_ThreadCount,
+        FirstThread: firstThread,
+        FinalizerThread: Target.ReadGlobalTargetPointer("g_pFinalizerThread"),
+        GcThread: Target.ReadGlobalTargetPointer("g_pSuspensionThread"));
+}
+
+DacThreadStoreCounts GetThreadCounts()
+{
+    TargetPointer threadStore = Target.ReadGlobalTargetPointer("s_pThreadStore");
+    var runtimeThreadStore = new ThreadStore(Target, threadStore);
+
+    return new DacThreadStoreCounts(
+        ThreadCount : runtimeThreadStore.m_ThreadCount,
+        UnstartedThreadCount : runtimeThreadStore.m_UnstartedThreadCount,
+        BackgroundThreadCount : runtimeThreadStore.m_BackgroundThreadCount,
+        PendingThreadCount : runtimeThreadStore.m_PendingThreadCount,
+        DeadThreadCount: runtimeThreadStore.m_DeadThreadCount,
+}
+
+DacThreadData GetThreadData(TargetPointer threadPointer)
+{
+    var runtimeThread = new Thread(Target, threadPointer);
+
+    TargetPointer firstNestedException = TargetPointer.Null;
+    if (Target.ReadGlobalInt32("FEATURE_EH_FUNCLETS"))
+    {
+        if (runtimeThread.m_ExceptionState.m_pCurrentTracker != TargetPointer.Null)
+        {
+            firstNestedException = new ExceptionTrackerBase(Target, runtimeThread.m_ExceptionState.m_pCurrentTracker).m_pPrevNestedInfo;
+        }
+    }
+    else
+    {
+        firstNestedException = runtimeThread.m_ExceptionState.m_currentExInfo.m_pPrevNestedInfo;
+    }
+
+    return new DacThread(
+        ThreadId : runtimeThread.m_ThreadId,
+        OsThreadId : (OsThreadId)runtimeThread.m_OSThreadId,
+        State : (ThreadState)runtimeThread.m_State,
+        PreemptiveGCDisabled : thread.m_fPreemptiveGCDisabled != 0,
+        AllocContextPointer : thread.m_alloc_context.alloc_ptr,
+        AllocContextLimit : thread.m_alloc_context.alloc_limit,
+        Frame : thread.m_pFrame,
+        TEB : thread.Has_m_pTEB ? thread.m_pTEB : TargetPointer.Null,
+        LastThreadObjectHandle : new DacGCHandle(thread.m_LastThrownObjectHandle),
+        FirstNestedException : firstNestedException,
+        NextThread : ThreadListReader.GetHead.GetNext(threadPointer)
+    );
+}
+
+TargetPointer GetNestedExceptionInfo(TargetPointer nestedExceptionPointer, out TargetPointer nextNestedException)
+{
+    if (nestedExceptionPointer == TargetPointer.Null)
+    {
+        throw new InvalidArgumentException();
+    }
+    if (Target.ReadGlobalInt32("FEATURE_EH_FUNCLETS"))
+    {
+        var exData = new ExceptionTrackerBase(Target, nestedExceptionPointer);
+        nextNestedException = exData.m_pPrevNestedInfo;
+        return Contracts.GCHandle.GetObject(exData.m_hThrowable);
+    }
+    else
+    {
+        var exData = new ExInfo(Target, nestedExceptionPointer);
+        nextNestedException = exData.m_pPrevNestedInfo;
+        return Contracts.GCHandle.GetObject(exData.m_hThrowable);
+    }
+}
+
+TargetPointer GetManagedThreadObject(TargetPointer threadPointer)
+{
+    var runtimeThread = new Thread(Target, threadPointer);
+    return Contracts.GCHandle.GetObject(new DacGCHandle(runtimeThread.m_ExposedObject));
+}
+```
diff --git a/docs/design/datacontracts/contract-descriptor.md b/docs/design/datacontracts/contract-descriptor.md
new file mode 100644
index 000000000000..1e3ddabd6dd7
--- /dev/null
+++ b/docs/design/datacontracts/contract-descriptor.md
@@ -0,0 +1,100 @@
+# Contract Descriptor
+
+## Summary
+
+The [data contracts design](./datacontracts_design.md) is a mechanism that allows diagnostic tooling
+to understand the behavior of certain .NET runtime subsystems and data structures.  In a typical
+scenario, a diagnostic tool such as a debugger may have access to a target .NET process (or a memory
+dump of such a process) from which it may request to read and write certain regions of memory.
+
+This document describes a mechanism by which a diagnostic tool may acquire the following information:
+* some details about the target process' architecture
+* a collection of types and their sizes and/or the offsets of certain fields within each type
+* a collection of global values
+* a collection of /algorithmic contracts/ that are satisfied by the target process
+
+## Contract descriptor
+
+The contract descriptor consists of the follow structure.  All multi-byte values are in target architecture endianness.
+
+```c
+struct DotNetRuntimeContractDescriptor
+{
+    uint64_t magic;
+    uint32_t flags;
+    uint32_t descriptor_size;
+    const char *descriptor;
+    uint32_t aux_data_count;
+    uint32_t pad0;
+    uintptr_t *aux_data;
+};
+```
+
+The `magic` is `0x44_4e_43_43_44_41_43_00` ("DNCCDAC\0") stored using the target architecture
+endianness. This is sufficient to discover the target architecture endianness by comparing the
+value in memory to `0x44_4e_43_43_44_41_43_00` and to `0x00_43_41_44_43_43_4e_44`.
+
+The following `flags` bits are defined:
+
+| Bits 31-2 | Bit 1   | Bit 0 |
+| --------- | ------- | ----- |
+| Reserved  | ptrSize |   1   |
+
+If `ptrSize` is 0, the architecture is 64-bit.  If it is 1, the architecture is 32-bit.  The
+reserved bits should be written as zero.  Diagnostic tooling may ignore non-zero reserved bits.
+
+The `descriptor` is a pointer to a UTF-8 JSON string described in [data descriptor physical layout](./data_descriptor.md#Physical_JSON_descriptor).  The total number of bytes is given by `descriptor_size`.
+
+The auxiliary data for the JSON descriptor is stored at the location `aux_data` in `aux_data_count` pointer-sized slots.
+
+### Architecture properties
+
+Although `DotNetRuntimeContractDescriptor` contains enough information to discover the target
+architecture endianness pointer size, it is expected that in all scenarios diagnostic tooling will
+already have this information available through other channels.  Diagnostic tools may use the
+information derived from `DotNetRuntimeContractDescriptor` for validation.
+
+### Compatible contracts
+
+The `descriptor` is a JSON dictionary that is used for storing the [in-memory data descriptor](./data_descriptor.md#Physical_JSON_Descriptor)
+and the [compatible contracts](./datacontracts_design.md#Compatible_Contract).
+
+The compatible contracts are stored in the top-level key `"contracts"`.  The value will be a
+dictionary that contains each contract name as a key.  Each value is the version of the contract as
+a JSON integer constant.
+
+**Contract example**:
+
+``` jsonc
+{"Thread":1,"GCHandle":1,...}
+```
+
+**Complete in-memory data descriptor example**:
+
+``` jsonc
+{
+  "version": "0",
+  "baseline": "example-64",
+  "types":
+  {
+    "Thread": { "ThreadId": 32, "ThreadState": 0, "Next": 128 },
+    "ThreadStore": { "ThreadCount": 32, "ThreadList": 8 }
+  },
+  "globals":
+  {
+    "FEATURE_COMINTEROP": 0,
+    "s_pThreadStore": [ 0 ] // indirect from aux data offset 0
+  },
+  "contracts": {"Thread": 1, "GCHandle": 1, "ThreadStore": 1}
+}
+```
+
+## Contract symbol
+
+To aid in discovery, the contract descriptor should be exported by the module hosting the .NET
+runtime with the name `DotNetRuntimeContractDescriptor` using the C symbol naming conventions of the
+target platform.
+
+In scenarios where multiple .NET runtimes may be present in a single process, diagnostic tooling
+should look for the symbol in each loaded module to discover all the runtimes.
+
diff --git a/docs/design/datacontracts/contract_csharp_api_design.cs b/docs/design/datacontracts/contract_csharp_api_design.cs
new file mode 100644
index 000000000000..062c04806003
--- /dev/null
+++ b/docs/design/datacontracts/contract_csharp_api_design.cs
@@ -0,0 +1,386 @@
+namespace DataContracts
+{
+
+    // Indicate that this type is a DataContractType which should have the DataContractTypeSourceGenerator applied to it
+    // Also that any types nested in this type with the DataContractLayout define particular versioned layouts for data structures
+    class DataContractTypeAttribute : System.Attribute {}
+
+
+    // Defined on each specific data layout, the fields of the type are defined by the fields of the class
+    class DataContractLayoutAttribute : System.Attribute
+    {
+        public DataContractLayoutAttribute(uint version, uint typeSize) { Version = version; TypeSize = typeSize; }
+        public uint Version;
+        public uint TypeSize;
+    }
+
+    // Defined on the class that contains global fields for a contract. The name and version are used to identify the contract
+    class DataContractGlobalsAttribute : System.Attribute
+    {
+        public DataContractGlobalsAttribute(string name, uint version) { Name = name; Version = version; }
+        public string Name;
+        public uint Version;
+    }
+
+    // Defined on the class that contains an algorithmic contract. The version, and base type of the associated type are used to identify the contract,
+    // there must exist a constructor of the type with the following signature (DataContracts.Target target, uint contractVersion)
+    class DataContractAlgorithmAttribute : System.Attribute
+    {
+        public DataContractAlgorithmAttribute(params uint []version) { Name = name; Version = version; }
+        public uint[] Version;
+    }
+
+    struct TargetPointer
+    {
+        public ulong Value;
+        public static TargetPointer Null = new TargetPointer(0);
+        // Add a full set of operators to support pointer arithmetic
+    }
+
+    struct TargetNInt
+    {
+        public long Value;
+        // Add a full set of operators to support arithmetic as well as casting to/from TargetPointer 
+    }
+
+    struct TargetNUInt
+    {
+        public ulong Value;
+        // Add a full set of operators to support arithmetic as well as casting to/from TargetPointer
+    }
+
+    enum FieldType
+    {
+        Int8Type,
+        UInt8Type,
+        Int16Type,
+        UInt16Type,
+        Int32Type,
+        UInt32Type,
+        Int64Type,
+        UInt64Type,
+        NIntType,
+        NUIntType,
+        PointerType,
+
+        // Other values are dynamically assigned by the type definition rules
+    }
+
+    struct FieldLayout
+    {
+        public int Offset;
+        public FieldType Type;
+    }
+
+    interface IAlgorithmContract
+    {
+        void Init();
+    }
+
+    interface IContract
+    {
+        string Name { get; }
+        uint Version { get; }
+    }
+        class Target
+    {
+        // Users of the data contract may adjust this number to force re-reading of all data
+        public int CurrentEpoch = 0;
+
+        sbyte ReadInt8(TargetPointer pointer);
+        byte ReadUInt8(TargetPointer pointer);
+        short ReadInt16(TargetPointer pointer);
+        ushort ReadUInt16(TargetPointer pointer);
+        int ReadInt32(TargetPointer pointer);
+        uint ReadUInt32(TargetPointer pointer);
+        long ReadInt64(TargetPointer pointer);
+        ulong ReadUInt64(TargetPointer pointer);
+        TargetPointer ReadTargetPointer(TargetPointer pointer);
+        TargetNInt ReadNInt(TargetPointer pointer);
+        TargetNUInt ReadNUint(TargetPointer pointer);
+        byte[] ReadByteArray(TargetPointer pointer, ulong size);
+        void FillByteArray(TargetPointer pointer, byte[] array, ulong size);
+
+        bool TryReadInt8(TargetPointer pointer, out sbyte value);
+        bool TryReadUInt8(TargetPointer pointer, out byte value);
+        bool TryReadInt16(TargetPointer pointer, out short value);
+        bool TryReadUInt16(TargetPointer pointer, out ushort value);
+        bool TryReadInt32(TargetPointer pointer, out int value);
+        bool TryReadUInt32(TargetPointer pointer, out uint value);
+        bool TryReadInt64(TargetPointer pointer, out long value);
+        bool TryReadUInt64(TargetPointer pointer, out ulong value);
+        bool TryReadTargetPointer(TargetPointer pointer, out TargetPointer value);
+        bool TryReadNInt(TargetPointer pointer, out TargetNInt value);
+        bool TryReadNUInt(TargetPointer pointer, out TargetNUInt value);
+        bool TryReadByteArray(TargetPointer pointer, ulong size, out byte[] value);
+        bool TryFillByteArray(TargetPointer pointer, byte[] array, ulong size);
+
+        // If pointer is 0, then the return value will be 0
+        TargetPointer GetTargetPointerForField(TargetPointer pointer, FieldLayout fieldLayout);
+
+        sbyte ReadGlobalInt8(string globalName);
+        byte ReadGlobalUInt8(string globalName);
+        short ReadGlobalInt16(string globalName);
+        ushort ReadGlobalUInt16(string globalName);
+        int ReadGlobalInt32(string globalName);
+        uint ReadGlobalUInt32(string globalName);
+        long ReadGlobalInt64(string globalName);
+        ulong ReadGlobalUInt64(string globalName);
+        TargetPointer ReadGlobalTargetPointer(string globalName);
+
+        bool TryReadGlobalInt8(string globalName, out sbyte value);
+        bool TryReadGlobalUInt8(string globalName, out byte value);
+        bool TryReadGlobalInt16(string globalName, out short value);
+        bool TryReadGlobalUInt16(string globalName, out ushort value);
+        bool TryReadGlobalInt32(string globalName, out int value);
+        bool TryReadGlobalUInt32(string globalName, out uint value);
+        bool TryReadGlobalInt64(string globalName, out long value);
+        bool TryReadGlobalUInt64(string globalName, out ulong value);
+        bool TryReadGlobalTargetPointer(string globalName, out TargetPointer value);
+
+        Contracts Contract { get; }
+
+        partial class Contracts
+        {
+            FieldLayout GetFieldLayout(string typeName, string fieldName);
+            bool TryGetFieldLayout(string typeName, string fieldName, out FieldLayout layout);
+            int GetTypeSize(string typeName);
+            bool TryGetTypeSize(string typeName, out int size);
+
+            object GetContract(string contractName);
+            bool TryGetContract(string contractName, out object contract);
+
+            // Every contract that is defined has a field here. As an example this document defines a MethodTableContract
+            // If the contract is not supported by the runtime in use, then the implementation of the contract will be the base type which
+            // is defined to throw if it is ever used.
+
+            // List of contracts will be inserted here by source generator
+            MethodTableContract MethodTableContract;
+        }
+    }
+
+    // Types defined by contracts live here
+    namespace ContractDefinitions
+    {
+        class CompositeContract
+        {
+            List<Tuple<string, uint>> Subcontracts;
+        }
+
+        class DataStructureContract
+        {
+            string MethodTableName {get;}
+            List<Tuple<string, FieldLayout>> FieldData;
+        }
+
+        // Insert Algorithmic Contract definitions here
+        class MethodTableContract
+        {
+            public virtual int DynamicTypeID(TargetPointer methodTablePointer) { throw new NotImplementedException(); }
+            public virtual int BaseSize(TargetPointer methodTablePointer) { throw new NotImplementedException(); }
+        }
+    }
+
+    namespace ContractImplementation
+    {
+        // Get contract from the predefined contract database
+        static class PredefinedContracts
+        {
+            public static IContract GetContract(string name, uint version, Target target)
+            {
+                // Do some lookup and allocate an instance of the contract requested
+                //
+                // This lookup can either be reflection based, or we can do it based on a source generator.
+            }
+        }
+
+        [DataContractGlobals("FeatureFlags", 1)]
+        public class FeatureFlags_1
+        {
+            public const int FeatureComInterop = 0;
+        }
+
+        [DataContractGlobals("FeatureFlags", 2)]
+        public class FeatureFlags_2
+        {
+            public const int FeatureComInterop = 1;
+        }
+
+        [DataContractAlgorithm(1)]
+        class MethodTableContract_1 : ContractDefinitions.MethodTableContract, IAlgorithmContract
+        {
+            DataContracts.Target Target;
+            readonly uint ContractVersion;
+            public MethodTableContract_1(DataContracts.Target target, uint contractVersion) { Target = target; ContractVersion = contractVersion; }
+
+            public virtual int DynamicTypeID(TargetPointer methodTablePointer) { return new MethodTable(_target, methodTablePointer).dynamicTypeId; }
+            public virtual int BaseSize(TargetPointer methodTablePointer) { return new MethodTable(_target, methodTablePointer).baseSizeAndFlags & 0x3FFFFFFF; }
+        }
+
+        // This is used for version 2 and 3 of the contract, where the dynamic type id is no longer present, and baseSize has a new limitation in that it can only be a value up to 0x1FFFFFFF in v3
+        [DataContractAlgorithm(2, 3)]
+        class MethodTableContract_2 : ContractDefinitions.MethodTableContract, IAlgorithmContract
+        {
+            DataContracts.Target Target;
+            readonly uint ContractVersion;
+            public MethodTableContract_2(DataContracts.Target target, uint contractVersion) { Target = target; }
+
+            public virtual int DynamicTypeID(TargetPointer methodTablePointer)
+            {
+                throw new NotImplementedException();
+            }
+            public virtual int BaseSize(TargetPointer methodTablePointer)
+            {
+                return new MethodTable(_target, methodTablePointer).baseSizeAndFlags & ((ContractVersion == 3) ? 0x1FFFFFFF : 0x3FFFFFFF);
+            }
+        }
+
+        // We use a source generator to generate the actual runtime properties, and api for working with the fields on this type.
+        //
+        // The source generator would fill in most of the apis, and provide a bunch of properties that give a granular failure model where if a particular field isn't defined, it fails at the access point
+        // This example shows access to a type.
+        [DataContractType]
+        partial struct MethodTable
+        {
+            partial void Get_dynamicTypeId_optional(ref int value);
+            partial void Get_baseSizeAndFlags(ref int value);
+
+            [DataContractLayout(1, 8)]
+            public class DataLayout1
+            {
+                [FieldOffset(0)]
+                public int dynamicTypeId;
+                [FieldOffset(4)]
+                public int baseSize;
+            }
+            [DataContractLayout(2, 4)]
+            public class DataLayout2
+            {
+                [FieldOffset(0)]
+                public int baseSize;
+            }
+
+            // The rest of this is generated by a source generator
+            public uint TypeSize => _layout.TypeSize;
+            void Get_dynamicTypeId_optional(ref int value)
+            {
+                value = dynamicTypeId;
+            }
+            void Get_baseSizeAndFlags(ref int value)
+            {
+                value = baseSizeAndFlags;
+            }
+
+            private static int LayoutIndex = DataContracts.Target.RegisterLayout(MethodTableLayout.GetLayoutByTarget);
+
+            public readonly TargetPointer Pointer;
+            private int _epoch;
+            private readonly MethodTableLayout _layout;
+
+            public MethodTable(DataContracts.Target target, TargetPointer pointer)
+            {
+                Pointer = pointer;
+                _epoch = Int32.MinInt;
+                _layout = target.GetLayoutByIndex(LayoutIndex);
+            }
+            class MethodTableLayout
+            {
+                public static object GetLayoutByTarget(DataContracts.Target target)
+                {
+                    return new MethodTableLayout(target);
+                }
+
+                public readonly uint TypeSize;
+
+                private MethodTableLayout(DataContracts.Target target)
+                {
+                    Target = target;
+                    TypeSize = target.Contract.GetTypeSize("MethodTable");
+                    if (!_target.Contract.TryGetFieldLayout("MethodTable", "dynamicTypeId", out var dynamicTypeIdField))
+                    {
+                        dynamicTypeId_Offset = -1;
+                    }
+                    else
+                    {
+                        if (dynamicTypeIdField.Type != FieldType.Int32Type)
+                            dynamicTypeId_Offset = -2;
+                        else
+                            dynamicTypeId_Offset = dynamicTypeIdField.Offset;
+                    }
+                    if (!_target.Contract.TryGetFieldLayout("MethodTable", "baseSizeAndFlags", out var baseSizeAndFlagsField))
+                    {
+                        baseSizeAndFlags_Offset = -1;
+                    }
+                    else
+                    {
+                        if (baseSizeAndFlagsField.Type != FieldType.Int32Type)
+                            baseSizeAndFlags_Offset = -2;
+                        else
+                            baseSizeAndFlags_Offset = baseSizeAndFlagsField.Offset;
+                    }
+                }
+                public readonly DataContracts.Target Target;
+
+                int dynamicTypeId_Offset;
+                public int dynamicTypeId(TargetPointer pointer)
+                {
+                    if (dynamicTypeId_Offset == -1)
+                    {
+                        throw new Exception("MethodTable has no field dynamicTypeId");
+                    }
+                    if (dynamicTypeId_Offset == -2)
+                    {
+                        throw new Exception("MethodTable field dynamicTypeId does not have type int32");
+                    }
+                    return _target.ReadInt32(pointer + dynamicTypeId_Offset);
+                }
+                public bool Has_dynamicTypeId => dynamicTypeId_Offset >= 0;
+
+                int baseSizeAndFlags_Offset;
+                public int baseSizeAndFlags(TargetPointer pointer)
+                {
+                    if (baseSizeAndFlags_Offset == -1)
+                    {
+                        throw new Exception("MethodTable has no field baseSizeAndFlags");
+                    }
+                    if (baseSizeAndFlags_Offset == -2)
+                    {
+                        throw new Exception("MethodTable field baseSizeAndFlags does not have type int32");
+                    }
+                    return _target.ReadInt32(pointer + baseSizeAndFlags_Offset);
+                }
+            }
+
+            private int _dynamicTypeId;
+            public int dynamicTypeId
+            {
+                get
+                {
+                    int currentEpoch = _layout.Target.CurrentEpoch;
+                    if (_epoch != currentEpoch)
+                    {
+                        _dynamicTypeId = _layout.dynamicTypeId(Pointer);
+                        _epoch = currentEpoch;
+                    }
+                    return _dynamicTypeId;
+                }
+            }
+            public bool Has_dynamicTypeId => layout.Has_dynamicTypeId;
+
+            private int _baseSizeAndFlags;
+            public int baseSizeAndFlags
+            {
+                get
+                {
+                    int currentEpoch = _layout.Target.CurrentEpoch;
+                    if (_epoch != currentEpoch)
+                    {
+                        _baseSizeAndFlags = _layout.baseSizeAndFlags(Pointer);
+                        _epoch = currentEpoch;
+                    }
+                    return _baseSizeAndFlags;
+                }
+            }
+        }
+    }
+}
diff --git a/docs/design/datacontracts/data_descriptor.md b/docs/design/datacontracts/data_descriptor.md
new file mode 100644
index 000000000000..1338e1ae87aa
--- /dev/null
+++ b/docs/design/datacontracts/data_descriptor.md
@@ -0,0 +1,340 @@
+# Data Descriptors
+
+The [data contract](datacontracts_design.md) specification for .NET depends on each target .NET
+runtime describing a subset of its platform- and build-specific data structures to diagnostic
+tooling.  The information is given meaning by algorithmic contracts that describe how the low-level
+layout of the memory of a .NET process corresponds to high-level abstract data structures that
+represent the conceptual state of a .NET process.
+
+In this document we give a logical description of a data descriptor together with a physical
+manifestation.
+
+The physical format is used for two purposes:
+
+1. To publish well-known data descriptors in the `dotnet/runtime` repository in a machine- and
+human-readable form.  This data may be used for visualization, diagnostics, etc.  These data
+descriptors may be written by hand or with the aid of tooling.
+
+2. To embed a data descriptor blob within a particular instance of a target runtime.  The data
+descriptor blob will be discovered by diagnostic tooling from the memory of a target process.
+
+## Logical descriptor
+
+Each logical descriptor exists within an implied *target architecture* consisting of:
+* target architecture endianness (little endian or big endian)
+* target architecture pointer size (4 bytes or 8 bytes)
+
+The following *primitive types* are assumed: int8, uint8, int16, uint16, int32, uint32, int64,
+uint64, nint, nuint, pointer.  The multi-byte types are in the target architecture
+endianness.  The types `nint`, `nuint` and `pointer` have target architecture pointer size.
+
+The data descriptor consists of:
+* a collection of type structure descriptors
+* a collection of global value descriptors
+
+## Types
+
+The types (both primitive types and structures described by structure descriptors) are classified as
+having either determinate or indeterminate size.  Types with a determinate size may be used for
+pointer arithmetic, whereas types with an indeterminate size may not be.  Note that some sizes may
+be determinate, but *target specific*.  For example pointer types have a fixed size that varies by
+architecture.
+
+## Structure descriptors
+
+Each structure descriptor consists of:
+* a name
+* an optional size in bytes
+* a collection of field descriptors
+
+If the size is not given, the type has indeterminate size.  The size may also be given explicitly as
+"indeterminate" to emphasize that the type has indeterminate size.
+
+The collection of field descriptors may be empty.  In that case the type is opaque.  The primitive
+types may be thought of as opaque (for example: on ARM64 `nuint` is an opaque 8 byte type, `int64`
+is another opaque 8 byte type. `string` is an opaque type of indeterminate size).
+
+Type names must be globally unique within a single logical descriptor.
+
+### Field descriptors
+
+Each field descriptor consists of:
+* a name
+* a type
+* an offset in bytes from the beginning of the struct
+
+The name of a field descriptor must be unique within the definition of a structure.
+
+Two or more fields may have the same offsets or imply that the underlying fields overlap.  The field
+offsets need not be aligned using any sort of target-specific alignment rules.
+
+Each field's type may refer to one of the primitive types or to any other type defined in the logical descriptor.
+
+If a structure descriptor contains at least one field of indeterminate size, the whole structure
+must have indeterminate size.  Tooling is not required to, but may, signal a warning if a descriptor
+has a determinate size and contains indeterminate size fields.
+
+It is expected that tooling will signal a warning if a field specifies a type that does not appear
+in the logical descriptor.
+
+## Global value descriptors
+
+Each global value descriptor consists of:
+* a name
+* a type
+* a value
+
+The name of each global value must be unique within the logical descriptor.
+
+The type must be one of the determinate-size primitive types.
+
+The value must be an integral constant within the range of its type.  Signed values use the target's
+natural encoding.  Pointer values need not be aligned and need not point to addressable target
+memory.
+
+
+## Physical descriptors
+
+The physical descriptors are meant to describe *subsets* of a logical descriptor and to compose.
+
+In the .NET runtime there are two physical descriptors:
+* a "baseline" physical data descriptor with a well-known name,
+* an in-memory physical data descriptor that resides in the target process' memory
+
+When constructing the logical descriptor, first the baseline physical descriptor is consumed: the
+types and values from the baseline are added to the logical descriptor.  Then the types of the
+in-memory data descriptor are used to augment the baseline: fields are added or modified, sizes and
+offsets are overwritten.  The global values of the in-memory data descriptor are used to augment the
+baseline: new globals are added, existing globals are modified by overwriting their types or values.
+
+Rationale: If a type appears in multiple physical descriptors, the later appearances may add more
+fields or change the offsets or definite/indefinite sizes of prior definitions.  If a value appears
+multiple times, later definitions take precedence.
+
+## Physical JSON descriptor
+
+### Version
+
+This is version 0 of the physical descriptor.
+
+### Summary
+
+A data descriptor may be stored in the "JSON with comments" format.  There are two formats: a
+"regular" format and a "compact" format.  The baseline data descriptor may be either regular or
+compact.  The in-memory descriptor will typically be compact.
+
+The toplevel dictionary will contain:
+
+* `"version": 0`
+* optional `"baseline": "BASELINE_ID"` see below
+* `"types": TYPES_DESCRIPTOR` see below
+* `"globals": GLOBALS_DESCRIPTOR` see below
+
+Additional toplevel keys may be present. For example, the in-memory data descriptor will contain a
+`"contracts"` key (see [contract descriptor](./contract_descriptor.md#Compatible_contracts)) for the
+set of compatible contracts.
+
+### Baseline data descriptor identifier
+
+The in-memory descriptor may contain an optional string identifying a well-known baseline
+descriptor.  The identifier is an arbitrary string, that could be used, for example to tag a
+collection of globals and data structure layouts present in a particular release of a .NET runtime
+for a certain architecture (for example `net9.0/coreclr/linux-arm64`).  Global values and data structure
+layouts present in the data contract descriptor take precedence over the baseline contract.  This
+way variant builds can be specified as a delta over a baseline.  For example, debug builds of
+CoreCLR that include additional fields in a `MethodTable` data structure could be based on the same
+baseline as Release builds, but with the in-memory data descriptor augmented with new `MethodTable`
+fields and additional structure descriptors.
+
+It is not a requirement that the baseline is chosen so that additional "delta" is the smallest
+possible size, although for practical purposes that may be desired.
+
+Data descriptors are registered as "well known" by checking them into the main branch of
+`dotnet/runtime` in the `docs/design/datacontracts/data/` directory in the JSON format specified
+in the [data descriptor spec](./data_descriptor.md#Physical_JSON_Descriptor).  The relative path name (with `/` as the path separator, if any) of the descriptor without
+any extension is the identifier.  (for example:
+`/docs/design/datacontracts/data/net9.0/coreclr/linux-arm64.json` is the filename for the data
+descriptor with identifier `net9.0/coreclr/linux-arm64`)
+
+The baseline descriptors themselves must not have a baseline.
+
+### Types descriptor
+
+**Regular format**:
+
+The types will be in an array, with each type described by a dictionary containing keys:
+
+* `"name": "type name"` the name of each type
+* optional `"size": int | "indeterminate"` if omitted the size is indeterminate
+* optional `"fields": FIELD_ARRAY` if omitted same as a field array of length zero
+
+Each `FIELD_ARRAY` is an array of dictionaries each containing keys:
+
+* `"name": "field name"` the name of each field
+* `"type": "type name"` the name of a primitive type or another type defined in the logical descriptor
+* optional `"offset": int | "unknown"` the offset of the field or "unknown". If omitted, same as "unknown".
+
+**Compact format**:
+
+The types will be in a dictionary, with each type name being the key and a `FIELD_DICT` dictionary as a value.
+
+The `FIELD_DICT` will have a field name as a key, or the special name `"!"` as a key.
+
+If a key is `!` the value is an `int` giving the total size of the struct. The key must be omitted
+if the size is indeterminate.
+
+If the key is any other string, the value may be one of:
+
+* `[int, "type name"]` giving the type and offset of the field
+* `int` giving just the offset of the field with the type left unspecified
+
+Unknown offsets are not supported in the compact format.
+
+Rationale: the compact format is expected to be used for the in-memory data descriptor. In the
+common case the field type is known from the baseline descriptor. As a result, a field descriptor
+like `"field_name": 36` is the minimum necessary information to be conveyed.  If the field is not
+present in the baseline, then `"field_name": [12, "uint16"]` must be used.
+
+**Both formats**:
+
+Note that the logical descriptor does not contain "unknown" offsets: it is expected that the
+in-memory data descriptor will augment the baseline with a known offset for all fields in the
+baseline.
+
+Rationale: "unknown" offsets may be used to document in the physical JSON descriptor that the
+in-memory descriptor is expected to provide the offset of the field.
+
+### Global values
+
+**Regular format**:
+
+The global values will be in an array, with each value described by a dictionary containing keys:
+
+* `"name": "global value name"` the name of the global value
+* `"type": "type name"` the type of the global value
+* optional `"value": VALUE | [ int ] | "unknown"` the value of the global value, or an offset in an auxiliary array containing the value or "unknown".
+
+The `VALUE` may be a JSON numeric constant integer or a string containing a signed or unsigned
+decimal or hex (with prefix `0x` or `0X`) integer constant.  The constant must be within the range
+of the type of the global value.
+
+**Compact format**:
+
+The global values will be in a dictionary, with each key being the name of a global and the values being one of:
+
+* `[VALUE | [int], "type name"]` the type and value of a global
+* `VALUE | [int]` just the value of a global
+
+As in the regular format, `VALUE` is a numeric constant or a string containing an integer constant.
+
+Note that a two element array is unambiguously "type and value", whereas a one-element array is
+unambiguously "indirect value".
+
+**Both formats**
+
+For pointer and nuint globals, the value may be assumed to fit in a 64-bit unsigned integer.  For
+nint globals, the value may be assumed to fit in a 64-bit signed integer.
+
+Note that the logical descriptor does not contain "unknown" values: it is expected that the
+in-memory data descriptor will augment the baseline with a known offset for all fields in the
+baseline.
+
+If the value is given as a single-element array `[ int ]` then the value is stored in an auxiliary
+array that is part of the data contract descriptor.  Only in-memory data descriptors may have
+indirect values; baseline data descriptors may not have indirect values.
+
+Rationale: This allows tooling to generate the in-memory data descriptor as a single constant
+string.  For pointers, the address can be stored at a known offset in an in-proc
+array of pointers and the offset written into the constant JSON string.
+
+The indirection array is not part of the data descriptor spec.  It is part of the [contract
+descriptor](./contract_descriptor.md#Contract_descriptor).
+
+
+
+## Example
+
+This is an example of a baseline descriptor for a 64-bit architecture. Suppose it has the name `"example-64"`
+
+The baseline is given in the "regular" format.
+
+```jsonc
+{
+  "version": 0,
+  "types": [
+    {
+      "name": "GCHandle",
+      "size": 8,
+      "fields": [
+        { "name": "Value", "type": "pointer", "offset": 0 }
+      ]
+    },
+    {
+      "name": "Thread",
+      "size": "indeterminate",
+      "fields": [
+        { "name": "ThreadId", "type": "uint32", "offset": "unknown" },
+        { "name": "Next", "type": "pointer" }, // offset "unknown" is implied
+        { "name": "ThreadState", "type": "uint32" }
+      ]
+    },
+    {
+      "name": "ThreadStore",
+      "fields": [
+        { "name": "ThreadCount", "type": "int32" },
+        { "name": "ThreadList", "type": "pointer" }
+      ]
+    }
+  ],
+  "globals": [
+    { "name": "FEATURE_EH_FUNCLETS", "type": "uint8", "value": "0" }, // baseline defaults value to 0
+    { "name": "FEATURE_COMINTEROP", "type", "uint8", "value": "1"},
+    { "name": "s_pThreadStore", "type": "pointer" } // no baseline value
+  ]
+}
+```
+
+The following is an example of an in-memory descriptor that references the above baseline. The in-memory descriptor is in the "compact" format:
+
+```jsonc
+{
+  "version": "0",
+  "baseline": "example-64",
+  "types":
+  {
+    "Thread": { "ThreadId": 32, "ThreadState": 0, "Next": 128 },
+    "ThreadStore": { "ThreadCount": 32, "ThreadList": 8 }
+  },
+  "globals":
+  {
+    "FEATURE_COMINTEROP": 0,
+    "s_pThreadStore": [ 0 ] // indirect from aux data offset 0
+  }
+}
+```
+
+If the indirect values table has the values `0x0100ffe0` in offset 0, then a possible logical descriptor with the above physical descriptors will have the following types:
+
+| Type        | Size          | Field Name  | Field Type | Field Offset |
+| ----------- | ------------- | ----------- | ---------- | ------------ |
+| GCHandle    | 8             | Value       | pointer    | 0            |
+| Thread      | indeterminate | ThreadState | uint32     | 0            |
+|             |               | ThreadId    | uint32     | 32           |
+|             |               | Next        | pointer    | 128          |
+| ThreadStore | indeterminate | ThreadList  | pointer    | 8            |
+|             |               | ThreadCount | int32      | 32           |
+
+
+And the globals will be:
+
+| Name                | Type    | Value      |
+| ------------------- | ------- | ---------- |
+| FEATURE_COMINTEROP  | uint8   | 0          |
+| FEATURE_EH_FUNCLETS | uint8   | 0          |
+| s_pThreadStore      | pointer | 0x0100ffe0 |
+
+The `FEATURE_EH_FUNCLETS` global's value comes from the baseline - not the in-memory data
+descriptor.  By contrast, `FEATURE_COMINTEROP` comes from the in-memory data descriptor - with the
+value embedded directly in the json since it is known at build time and does not vary.  Finally the
+value of the pointer `s_pThreadStore` comes from the auxiliary vector's offset 0 since it is an
+execution-time value that is only known to the running process.
diff --git a/docs/design/datacontracts/datacontracts_design.md b/docs/design/datacontracts/datacontracts_design.md
new file mode 100644
index 000000000000..630dc9fc5639
--- /dev/null
+++ b/docs/design/datacontracts/datacontracts_design.md
@@ -0,0 +1,220 @@
+# Data Contracts
+
+The diagnostic data contract documents a subset of internal .NET runtime in-memory data structures. It enables diagnostic tools to inspect state of .NET runtime process by directly reading and interpreting process memory. It is meant to be used debuggers - for both live and post-mortem debugging, profilers, and other diagnostic tools. We expect it to enable innovative solutions like [unwinding through JITed code using eBPF filters](https://github.com/dotnet/runtime/issues/93550).
+
+The diagnostic data contract addresses multiple problems of the established .NET runtime debugger architecture. The established CoreCLR debugger architecture requires debugger to acquire and load DAC and DBI libraries that exactly match the version of .NET runtime being debugged. It comes with multiple challenges:
+- *Security*: The DBI and DAC libraries that match the exact .NET runtime may be untrusted (e.g. custom or 3rd party build of .NET runtime). https://github.com/dotnet/runtime/blob/main/docs/workflow/debugging/coreclr/debugging-runtime.md#resolving-signature-validation-errors-in-visual-studio has some additional context.
+- *Servicing*: It is difficult to ship a debugger-only fix in DBI and DAC libraries without shipping a new runtime build. Instead, we create a new runtime build and debugger behavior only improves once the new runtime build is targeted.
+- *Acquisition*: Where to acquire the DBI and DAC libraries that match the exact .NET runtime version from.
+- *Cross-architecture*: The host/target of DBI and DAC libraries may not be available. https://github.com/dotnet/runtime/blob/main/docs/design/features/cross-dac.md has some additional context.
+
+Diagnostic data contract addressed these challenges by eliminating the need for exactly matching DAC and DBI libraries.
+Data contracts represent the manner in which a tool which is not the runtime can reliably understand and observe the behavior of the runtime. Contracts are defined by their documentation, and the runtime describes what contracts are applicable to understanding that runtime.
+
+## Data Contract Descriptor
+The physical layout of this data is defined in [the contract descriptor](./contract_descriptor.md) doc. Its practical effects are discussed here.
+
+The Data Contract Descriptor has a set of records of the following forms.
+
+### Data descriptor
+
+The data descriptor is a logical entity that defines the layout of certain types relevant to one or
+more algorithmic contracts, as well as global values known to the target runtime that may be
+relevant to one or more algorithmic contracts.
+
+More details are provided in the [data descriptor spec](./data_descriptor.md).  We highlight some important aspects below:
+
+#### Global Values
+
+Global values which can be either primitive integer constants or pointers.
+All global values have a string describing their name, a type, and a value of one of the above types.
+
+#### Data Structure Layout
+
+Each data structure layout has a name for the type, followed by a list of fields. These fields can
+be primitive integer types or pointers or another named data structure type. Each field descriptor
+provides the offset of the field, the name of the field, and the type of the field.
+
+Data structures may have a determinate size, specified in the descriptor, or an indeterminate size.
+Determinate sizes are used by contracts for pointer arithmetic such as for iterating over arrays.
+The determinate size of a structure may be larger than the sum of the sizes of the fields specified
+in the data descriptor (that is, the data descriptor does not include every field and may not
+include padding bytes).
+
+### Compatible Contract
+
+Each compatible contract is described by a string naming the contract, and a uint32 version. It is an ERROR if multiple versions of a contract are specified in the contract descriptor.
+
+
+## Versioning of contracts
+Contracts are described an integer version number. A higher version number is not more recent, it just means different. In order to avoid conflicts, all contracts should be documented in the main branch of the dotnet repository with a version number which does not conflict with any other. It is expected that every version of every contract describes the same functionality/data layout/set of global values.
+
+## Contract data model
+Logically a contract may refer to another contract. If it does so, it will typically refer to other contracts by names which do not include the contract version. This is to allow for version flexibility. Logically once the Data Contract Descriptor is fully processed, there is a single list of contracts that represents the set of contracts useable with whatever runtime instance is being processed.
+
+## Algorithmic contracts
+
+Algorithmic contracts define how to process a given set of data structures to produce useful results. These are effectively code snippets which utilize the abstracted data structures and global values provided by data descriptor to produce useful output about a given program. Descriptions of these contracts may refer to functionality provided by other contracts to do their work. The algorithms provided in these contracts are designed to operate given the ability to read various primitive types and defined data structures from the process memory space, as well as perform general purpose computation.
+
+It is entirely reasonable for an algorithmic contract to have multiple entrypoints which take different inputs. For example imagine a contract which provides information about a `MethodTable`. It may provide the an api to get the `BaseSize` of a `MethodTable`, and an api to get the `DynamicTypeID` of a `MethodTable`. However, while the set of contracts which describe an older version of .NET may provide a means by which the `DynamicTypeID` may be acquired for a `MethodTable`, a newer runtime may not have that concept. In such a case, it is very reasonable to define that the `GetDynamicTypeID` api portion of that contract is defined to simply `throw new NotSupportedException();`
+
+For simplicity, as it can be expected that all developers who work on the .NET runtime understand C# to a fair degree, it is preferred that the algorithms be defined in C#, or at least psuedocode that looks like C#. It is also considered entirely permissible to refer to other specifications if the algorithm is a general purpose one which is well defined by the OS or some other body. (For example, it is expected that the unwinding algorithms will be defined by references into either the DWARF spec, or various Windows Unwind specifications.)
+
+For working with data from the target process/other contracts, the following C# interface is intended to be used within the algorithmic descriptions:
+
+Best practice is to either write the algorithm in C# like psuedocode working on top of the [C# style api](contract_csharp_api_design.cs) or by reference to specifications which are not co-developed with the runtime, such as OS/architecture specifications. Within the contract algorithm specification, the intention is that all interesting api work is done by using an instance of the `Target` class.
+
+Algorithmic contracts may include specifications for numbers which can be referred to in the contract or by other contracts. The intention is that these global values represent magic numbers and values which are useful for the operation of algorithmic contracts.
+
+While not all versions of a data structure are required to have the same fields/type of fields,
+algorithms may be built targeting the union of the set of field types defined in the data structure
+descriptors of possible target runtimes. Access to a field which isn't defined on the current
+runtime will produce an error.
+
+
+## Arrangement of contract specifications in the repo
+
+Specs shall be stored in the repo in a set of directories. `docs/design/datacontracts` Each one of them shall be a separate markdown file named with the name of contract. `docs/design/datacontracts/<contract_name>.md` Every version of each contract shall be located in the same file to facilitate understanding how variations between different contracts work.
+
+### Algorithmic Contract
+
+Algorithmic contracts describe how an algorithm that processes over data layouts work. Every version of an algorithmic contract presents a consistent api to consumers of the contract.
+
+There are several sections:
+1. The header, where a description of what the contract can do is placed.
+2. The exposed data structures of the contract.
+3. The api surface of the contract
+4. The set of versions of the contract.
+
+For each version of the contract, there shall be the set of versions that are associated with a particular implementation as well as some form of description of how the algorithm works for that version. Best practice is to either write the algorithm in C# like psuedocode working on top of the [C# style api](contract_csharp_api_design.cs) or by reference to specifications which are not co-developed with the runtime, such as OS/architecture specifications.
+
+``````
+# Contract `<contract_name>`
+
+Insert description of contract, and what it can do here.
+
+## Data structures defined by contract
+``` csharp
+record struct SomeStructUsedAsPartOfContractApi (int Value, int Value2);
+```
+
+## Apis of contract
+``` csharp
+SomeStructUsedAsPartOfContractApi GetStruct(TargetPointer pointerName);
+int ComputeInterestingValue(TargetPointer pointerName);
+int ComputeInterestingValue2(SomeStructUsedAsPartOfContractApi struct);
+```
+
+## Version 1
+
+Version 1 is what we started with
+
+``` csharp
+SomeStructUsedAsPartOfContractApi GetStruct(TargetPointer pointerName)
+{
+    var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName);
+    return new SomeStructUSedAsPartOfContractApi(runtimeDataStruct.Field1, runtimeDataStruct.Field2);
+}
+int ComputeInterestingValue(TargetPointer pointerName)
+{
+    var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName);
+    return runtimeDataStruct.Field1 + runtimeDataStruct.Field2;
+}
+int ComputeInterestingValue2(SomeStructUsedAsPartOfContractApi struct)
+{
+    return struct.Value2;
+}
+```
+
+## Version 2-5
+
+Versions 2 to 5 are similar in most ways, but differ based on their ContractVersion in others.
+
+``` csharp
+SomeStructUsedAsPartOfContractApi GetStruct(TargetPointer pointerName)
+{
+    var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName);
+    return new SomeStructUSedAsPartOfContractApi(runtimeDataStruct.Field1, runtimeDataStruct.Field2);
+}
+int ComputeInterestingValue(TargetPointer pointerName)
+{
+    var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName);
+    if (ContractVersion > 3)
+        return runtimeDataStruct.Field3 + runtimeDataStruct.Field2;
+    else
+        return runtimeDataStruct.Field3 ^ runtimeDataStruct.Field2;
+}
+int ComputeInterestingValue2(SomeStructUsedAsPartOfContractApi struct)
+{
+    if (ContractVersion > 4)
+        return struct.Value2;
+    else
+        return struct.Value1;
+}
+```
+``````
+
+Which should format like:
+# Contract `<contract_name>`
+
+Insert description of contract, and what it can do here.
+
+## Data structures defined by contract
+``` csharp
+record struct SomeStructUsedAsPartOfContractApi (int Value, int Value2);
+```
+
+## Apis of contract
+``` csharp
+SomeStructUsedAsPartOfContractApi GetStruct(TargetPointer pointerName);
+int ComputeInterestingValue(TargetPointer pointerName);
+int ComputeInterestingValue2(SomeStructUsedAsPartOfContractApi struct);
+```
+
+## Version 1
+
+Version 1 is what we started with
+
+``` csharp
+SomeStructUsedAsPartOfContractApi GetStruct(TargetPointer pointerName)
+{
+    var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName);
+    return new SomeStructUSedAsPartOfContractApi(runtimeDataStruct.Field1, runtimeDataStruct.Field2);
+}
+int ComputeInterestingValue(TargetPointer pointerName)
+{
+    var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName);
+    return runtimeDataStruct.Field1 + runtimeDataStruct.Field2;
+}
+int ComputeInterestingValue2(SomeStructUsedAsPartOfContractApi struct)
+{
+    return struct.Value2;
+}
+```
+
+## Version 2-5
+
+Versions 2 to 5 are similar in most ways, but differ based on their ContractVersion in others.
+
+``` csharp
+SomeStructUsedAsPartOfContractApi GetStruct(TargetPointer pointerName)
+{
+    var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName);
+    return new SomeStructUSedAsPartOfContractApi(runtimeDataStruct.Field1, runtimeDataStruct.Field2);
+}
+int ComputeInterestingValue(TargetPointer pointerName)
+{
+    var runtimeDataStruct = new SomeRuntimeDataStructure(Target, pointerName);
+    if (ContractVersion > 3)
+        return runtimeDataStruct.Field3 + runtimeDataStruct.Field2;
+    else
+        return runtimeDataStruct.Field3 ^ runtimeDataStruct.Field2;
+}
+int ComputeInterestingValue2(SomeStructUsedAsPartOfContractApi struct)
+{
+    if (ContractVersion > 4)
+        return struct.Value2;
+    else
+        return struct.Value1;
+}
+```
diff --git a/docs/design/features/byreflike-generics.md b/docs/design/features/byreflike-generics.md
index d644a25e7f3f..dec8c64a42db 100644
--- a/docs/design/features/byreflike-generics.md
+++ b/docs/design/features/byreflike-generics.md
@@ -28,37 +28,61 @@ The expansion of ByRefLike types as Generic parameters does not relax restrictio
 
 ## API Proposal
 
-Support for the following will be indicated by a new property. For .NET 7, the feature will be marked with `RequiresPreviewFeaturesAttribute` to indicate it is in [preview](https://github.com/dotnet/designs/blob/main/accepted/2021/preview-features/preview-features.md).
+A new `GenericParameterAttributes` value will be defined which also represents metadata defined in the `CorGenericParamAttr` enumeration.
 
 ```diff
-namespace System.Runtime.CompilerServices
+namespace System.Reflection
 {
-    public static partial class RuntimeFeature
+    [Flags]
+    public enum GenericParameterAttributes
     {
-+        /// <summary>
-+        /// Represents a runtime feature where byref-like types can be used in Generic parameters.
-+        /// </summary>
-+        public const string GenericsAcceptByRefLike = nameof(GenericsAcceptByRefLike);
++        AcceptByRefLike = 0x0020
     }
 }
 ```
 
-The compiler will need an indication for existing troublesome APIs where ByRefLike types will be permissable, but where the failure will be handled at runtime. An attribute will be created and added to these APIs.
+```diff
+typedef enum CorGenericParamAttr
+{
++   gpAcceptByRefLike = 0x0020 // type argument can be ByRefLike
+} CorGenericParamAttr;
+```
+
+The expansion of metadata will impact at least the following:
+
+- ILDasm/ILAsm/`System.Reflection.Metadata`/`System.Reflection.Emit` &ndash; https://github.com/dotnet/runtime
+- Cecil &ndash; https://github.com/jbevain/cecil
+- IL Trimmer &ndash; https://github.com/dotnet/runtime/tree/main/src/tools/illink
+- F# &ndash; https://github.com/fsharp/fsharp
+- C++/CLI &ndash; The MSVC team
+
+### Troublesome API mitigation
+
+If existing types are expected to add ByRefLike support, it is possible they contain previously valid APIs that will become invalid when ByRefLike types are permitted. A potential mitigation for this would be create an attribute to indicate to compilers that specific APIs are validated at run-time not compile-time. What follows is a potential solution.
+
+The compiler will be imbued with knowledge of an API that tells it where ByRefLike types will be permissable and where the failure will be handled by the runtime. The compiler will only respect the attribute that is defined in the same assembly containing `System.Object`.
 
 ```csharp
 namespace System.Runtime.CompilerServices
 {
     /// <summary>
-    /// Indicates to the compiler that constraint checks should be suppressed
-    /// and will instead be enforced at run-time.
+    /// Indicates to the compiler the ByRefLike constraint check should be suppressed.
     /// </summary>
-    [AttributeUsage(AttributeTargets.Constructor | AttributeTargets.Method | AttributeTargets.Property)]
-    internal sealed class SuppressConstraintChecksAttribute : Attribute
-    { }
+    /// <remarks>
+    /// The checking will be suppressed for both the signature and method body. These
+    /// checks are deferred and will be enforced at run-time.
+    /// </remarks>
+    /// <seealso href="https://github.com/dotnet/runtime/issues/99788">Design discussion</seealso>
+    [AttributeUsage(AttributeTargets.Constructor | AttributeTargets.Method | AttributeTargets.Property, Inherited = false, AllowMultiple = false)]
+    internal sealed class SuppressByRefLikeConstraintChecksAttribute : Attribute
+    {
+        /// <summary>Initializes the attribute.</summary>
+        public SuppressByRefLikeConstraintChecksAttribute() { }
+    }
 }
 ```
 
-Troublesome APIs:
+Current examples of APIs that would need the attribute applied:
 
 - [`Span<T>`](https://docs.microsoft.com/dotnet/api/system.span-1)
     - `public Span(T[]? array);`
@@ -73,34 +97,6 @@ Troublesome APIs:
     - `public static implicit operator ReadOnlySpan<T>(ArraySegment<T> segment);`
     - `public static implicit operator ReadOnlySpan<T>(T[]? array);`
 
-A new `GenericParameterAttributes` value will be defined which also represents metadata defined in the `CorGenericParamAttr` enumeration.
-
-```diff
-namespace System.Reflection
-{
-    [Flags]
-    public enum GenericParameterAttributes
-    {
-+        AcceptByRefLike = 0x0020
-    }
-}
-```
-
-```diff
-typedef enum CorGenericParamAttr
-{
-+   gpAcceptByRefLike = 0x0020 // type argument can be ByRefLike
-} CorGenericParamAttr;
-```
-
-The expansion of metadata will impact at least the following:
-
-- ILDasm/ILAsm/`System.Reflection.Metadata`/`System.Reflection.Emit` &ndash; https://github.com/dotnet/runtime
-- Cecil &ndash; https://github.com/jbevain/cecil
-- IL Trimmer &ndash; https://github.com/dotnet/runtime/tree/main/src/tools/illink
-- F# &ndash; https://github.com/fsharp/fsharp
-- C++/CLI &ndash; The MSVC team
-
 ## Semantic Proposal
 
 An API that is a JIT-time intrinsic will be needed to determine if a parameter is ByRefLike. This API would represent a check to occur at JIT time to avoid taking paths that would be invalid for some values of `T`. The existing `Type.IsByRefLike` property will be made an intrinsic (e.g., `typeof(T).IsByRefLike`).
@@ -127,3 +123,129 @@ The following are IL sequences involving the `box` instruction. They are used fo
 `box` ; `isinst` ; `unbox.any` &ndash; The box, `isint`, and unbox target types are all equal.
 
 `box` ; `isinst` ; `br_true/false` &ndash; The box target type is equal to the unboxed target type or the box target type is `Nullable<T>` and target type equalities can be computed.
+
+## Examples
+
+Below are valid and invalid examples of ByRefLike as Generic parameters. All examples use the **not official** syntax, `allows ref struct`, for indicating the Generic permits ByRefLike types.
+
+**1) Valid**
+```csharp
+class A<T1> where T1: allows ref struct
+{
+    public void M();
+}
+
+// The derived class is okay to lack the 'allows'
+// because the base permits non-ByRefLike (default)
+// _and_ ByRefLike types.
+class B<T2> : A<T2>
+{
+    public void N()
+        => M(); // Any T2 satisfies the constraints from A<>
+}
+```
+
+**2) Invalid**
+```csharp
+class A<T1>
+{
+    public void M();
+}
+
+// The derived class cannot push up the allows
+// constraint for ByRefLike types.
+class B<T2> : A<T2> where T2: allows ref struct
+{
+    public void N()
+        => M(); // A<> may not permit a T2
+}
+```
+
+**3) Valid**
+```csharp
+interface IA
+{
+    void M();
+}
+
+ref struct A : IA
+{
+    public void M() { }
+}
+
+class B
+{
+    // This call is permitted because no boxing is needed
+    // to dispatch to the method - it is implemented on A.
+    public static void C<T>(T t) where T: IA, allows ref struct
+        => t.M();
+}
+```
+
+**4) Invalid**
+```csharp
+interface IA
+{
+    public void M() { }
+}
+
+ref struct A : IA
+{
+    // Relies on IA::M() implementation.
+}
+
+class B
+{
+    // Reliance on a DIM forces the generic parameter
+    // to be boxed, which is invalid for ByRefLike types.
+    public static void C<T>(T t) where T: IA, allows ref struct
+        => t.M();
+}
+```
+
+**5) Valid**
+```csharp
+class A<T1> where T1: allows ref struct
+{
+}
+
+class B<T2>
+{
+    // The type parameter is okay to lack the 'allows'
+    // because the field permits non-ByRefLike (default)
+    // _and_ ByRefLike types.
+    A<T2> Field;
+}
+```
+
+**6) Invalid**
+```csharp
+class A<T1>
+{
+}
+
+class B<T2> where T2: allows ref struct
+{
+    // The type parameter can be passed to
+    // the field type, but will fail if
+    // T2 is a ByRefLike type.
+    A<T2> Field;
+}
+```
+
+**7) Invalid**
+```csharp
+class A
+{
+    virtual void M<T1>() where T1: allows ref struct;
+}
+
+class B : A
+{
+    // Override methods need to match be at least
+    // as restrictive with respect to constraints.
+    // If a user has an instance of A, they are
+    // not aware they could be calling B.
+    override void M<T2>();
+}
+```
\ No newline at end of file
diff --git a/docs/design/features/globalization-icu-wasm.md b/docs/design/features/globalization-icu-wasm.md
index 956807b30c5c..ed5c03e88aa2 100644
--- a/docs/design/features/globalization-icu-wasm.md
+++ b/docs/design/features/globalization-icu-wasm.md
@@ -28,7 +28,7 @@ Removing specific feature data might result in an exception that starts with `[C
 * For prerequisites run `.devcontainer/postCreateCommand.sh` (it is run automatically on creation if using Codespaces)
 * Building:
     ```
-    ./build.sh /p:TargetOS=Browser /p:TargetArchitecture=wasm /p:IcuTracing=true
+    ./build.sh /p:TargetOS=Browser /p:TargetArchitecture=wasm
     ```
   Output is located in `artifacts/bin/icu-browser-wasm`.
 
@@ -45,7 +45,7 @@ Removing specific feature data might result in an exception that starts with `[C
     ```
 * Building:
  ```bash
-  ./build.sh /p:TargetOS=Android /p:TargetArchitecture=x64 /p:IcuTracing=true
+  ./build.sh /p:TargetOS=Android /p:TargetArchitecture=x64
   ```
 
 Output from both builds will be located in subdirectories of `artifacts/bin`. Copy the generated `.dat` files to your project location and provide the path to it in the `.csproj`, e.g.:
diff --git a/docs/design/features/unsafeaccessors.md b/docs/design/features/unsafeaccessors.md
new file mode 100644
index 000000000000..8ce4d2a22ed2
--- /dev/null
+++ b/docs/design/features/unsafeaccessors.md
@@ -0,0 +1,137 @@
+# `UnsafeAccessorAttribute`
+
+## Background and motivation
+
+Number of existing .NET serializers depend on skipping member visibility checks for data serialization. Examples include System.Text.Json or EF Core. In order to skip the visibility checks, the serializers typically use dynamically emitted code (Reflection.Emit or Linq.Expressions) and classic reflection APIs as slow fallback. Neither of these two options are great for source generated serializers and native AOT compilation. This API proposal introduces a first class zero-overhead mechanism for skipping visibility checks.
+
+## Semantics
+
+This attribute will be applied to an `extern static` method. The implementation of the `extern static` method annotated with this attribute will be provided by the runtime based on the information in the attribute and the signature of the method that the attribute is applied to. The runtime will try to find the matching method or field and forward the call to it. If the matching method or field is not found, the body of the `extern static` method will throw `MissingFieldException` or `MissingMethodException`.
+
+For `Method`, `StaticMethod`, `Field`, and `StaticField`, the type of the first argument of the annotated `extern static` method identifies the owning type. Only the specific type defined will be examined for inaccessible members. The type hierarchy is not walked looking for a match.
+
+The value of the first argument is treated as `this` pointer for instance fields and methods.
+
+The first argument must be passed as `ref` for instance fields and methods on structs.
+
+The value of the first argument is not used by the implementation for static fields and methods.
+
+The return value for an accessor to a field can be `ref` if setting of the field is desired.
+
+Constructors can be accessed using Constructor or Method.
+
+The return type is considered for the signature match. Modreqs and modopts are initially not considered for the signature match. However, if an ambiguity exists ignoring modreqs and modopts, a precise match is attempted. If an ambiguity still exists, `AmbiguousMatchException` is thrown.
+
+By default, the attributed method's name dictates the name of the method/field. This can cause confusion in some cases since language abstractions, like C# local functions, generate mangled IL names. The solution to this is to use the `nameof` mechanism and define the `Name` property.
+
+Scenarios involving generics may require creating new generic types to contain the `extern static` method definition. The decision was made to require all `ELEMENT_TYPE_VAR` and `ELEMENT_TYPE_MVAR` instances to match identically type and generic parameter index. This means if the target method for access uses an `ELEMENT_TYPE_VAR`, the `extern static` method must also use an `ELEMENT_TYPE_VAR`. For example:
+
+```csharp
+class C<T>
+{
+    T M<U>(U u) => default;
+}
+
+class Accessor<V>
+{
+    // Correct - V is an ELEMENT_TYPE_VAR and W is ELEMENT_TYPE_VAR,
+    //           respectively the same as T and U in the definition of C<T>::M<U>().
+    [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "M")]
+    extern static void CallM<W>(C<V> c, W w);
+
+    // Incorrect - Since Y must be an ELEMENT_TYPE_VAR, but is ELEMENT_TYPE_MVAR below.
+    // [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "M")]
+    // extern static void CallM<Y, Z>(C<Y> c, Z z);
+}
+```
+
+Methods with the `UnsafeAccessorAttribute` that access members with generic parameters are expected to have the same declared constraints with the target member. Failure to do so results in unspecified behavior. For example:
+
+```csharp
+class C<T>
+{
+    T M<U>(U u) where U: Base => default;
+}
+
+class Accessor<V>
+{
+    // Correct - Constraints match the target member.
+    [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "M")]
+    extern static void CallM<W>(C<V> c, W w) where W: Base;
+
+    // Incorrect - Constraints do not match target member.
+    // [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "M")]
+    // extern static void CallM<W>(C<V> c, W w);
+}
+```
+
+## API
+
+```csharp
+namespace System.Runtime.CompilerServices;
+
+[AttributeUsage(AttributeTargets.Method, AllowMultiple = false, Inherited = false)]
+public class UnsafeAccessorAttribute : Attribute
+{
+    public UnsafeAccessorAttribute(UnsafeAccessorKind kind);
+
+    public UnsafeAccessorKind Kind { get; }
+
+    // The name defaults to the annotated method name if not specified.
+    // The name must be null for constructors
+    public string? Name { get; set; }
+}
+
+public enum UnsafeAccessorKind
+{
+    Constructor, // call instance constructor (`newobj` in IL)
+    Method, // call instance method (`callvirt` in IL)
+    StaticMethod, // call static method (`call` in IL)
+    Field, // address of instance field (`ldflda` in IL)
+    StaticField // address of static field (`ldsflda` in IL)
+};
+```
+
+## API Usage
+
+```csharp
+class UserData
+{
+    private UserData() { }
+    public string Name { get; set; }
+}
+
+[UnsafeAccessor(UnsafeAccessorKind.Constructor)]
+extern static UserData CallPrivateConstructor();
+
+// This API allows accessing backing fields for auto-implemented properties with unspeakable names.
+[UnsafeAccessor(UnsafeAccessorKind.Field, Name = "<Name>k__BackingField")]
+extern static ref string GetName(UserData userData);
+
+UserData ud = CallPrivateConstructor();
+GetName(ud) = "Joe";
+```
+
+Using generics
+
+```csharp
+class UserData<T>
+{
+    private T _field;
+    private UserData(T t) { _field = t; }
+    private U ConvertFieldToT<U>() => (U)_field;
+}
+
+// The Accessors class provides the generic Type parameter for the method definitions.
+class Accessors<V>
+{
+    [UnsafeAccessor(UnsafeAccessorKind.Constructor)]
+    extern static UserData<V> CallPrivateConstructor(V v);
+
+    [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "ConvertFieldToT")]
+    extern static U CallConvertFieldToT<U>(UserData<V> userData);
+}
+
+UserData<string> ud = Accessors<string>.CallPrivateConstructor("Joe");
+Accessors<string>.CallPrivateConstructor<object>(ud);
+```
\ No newline at end of file
diff --git a/docs/design/mono/aot.md b/docs/design/mono/aot.md
new file mode 100644
index 000000000000..07c5d416da70
--- /dev/null
+++ b/docs/design/mono/aot.md
@@ -0,0 +1,144 @@
+# Ahead of Time Compilation
+
+## Introduction
+
+The mono Ahead of Time (AOT) compiler enables the compilation of the IL code in a .NET assembly to
+a native object file. This file is called an AOT image. This AOT image can be used by the runtime to avoid
+having to JIT the IL code.
+
+## Usage
+
+The AOT compiler is integrated into the mono runtime executable, and can be run using the `--aot` command
+line argument, i.e.
+`<mono-executable> --aot HelloWorld.dll`
+
+## Source code structure
+
+- `aot-compiler.c`: The AOT compiler
+- `aot-runtime.c`: Code used at runtime to load AOT images
+- `image-writer.c`: Support code for emitting textual assembly
+- `dwarfwriter.c`: Support code for emitting DWARF debug info
+
+## Configurations
+
+### Desktop AOT
+
+In this mode, the AOT compiler creates a platform shared object file (.so/.dylib), i.e. `HelloWorld.dll.so`. During execution, when
+an assembly is loaded, the runtime loads the corresponding shared object and uses it to avoid having to AOT the methods in the
+assembly.
+
+Emission of the native code is done by first emitting an assembly (.s) file, then compiling and linking it with the system tools
+(`as`/`ld`, or `clang`).
+
+### Static AOT
+
+In this mode, the AOT compiler creates a platform object file (.o). This file needs to be linked into the application and registered
+with the runtime.
+
+Static compilation is enabled by using the `static` aot option, i.e. `--aot=static,...`. The resulting object file contains a linking
+symbol named `mono_aot_module_<assembly name>_info`. This symbol needs to be passed to the a runtime function before the
+runtime is initialized, i.e.:
+`mono_aot_register_module (mono_aot_module_HelloWorld_info);`
+
+### Full AOT
+
+In this mode, which can be combined with the other modes, the compiler generates additional code which enables the runtime to
+function without any code being generated at runtime. This includes 2 types of code:
+- code for 'extra' methods, i.e. generic instances, runtime generated wrappers methods, etc.
+- trampolines
+
+This is enabled by using `full` aot option, i.e. `--aot=full,...`. At runtime, all assemblies need to have a full-aot-ed AOT image
+present in order for the app to work. This is used on platforms which don't allow runtime code generation like IOS.
+
+### LLVM support
+
+LLVM support can be enabled using the `llvm` aot option, i.e. `--aot=llvm`. In this mode, instead of generating native code,
+the AOT compiler generates an LLVM bitcode (.bc), file, then compiles it to native code using the `opt`/`llc` LLVM tools. The
+various AOT data structures are also emitted into the .bc file instead of as assembly.
+Since the LLVM backend currently doesn't support all .net methods, a smaller assembly file is still emitted, and linked together
+with the `opt`/`llc` compiled object file into the final shared object file.
+
+## Versioning
+
+The generated AOT images have a dependency on the exact version input assembly used to generate them and the versions of all the
+referenced assemblies. This means the GUIDs of the assemblies have to match. If there is a mismatch, the AOT image will fail to load.
+
+## File structure
+
+The AOT images exports one symbol named `mono_aot_module_<assembly name>_info` which points to a `MonoAotFileInfo` structure,
+which contains pointers to the tables/structures. The AOT image contains:
+- the native code
+- data structures required to load the code
+- cached data intended to speed up runtime operation
+
+The AOT image contains serialized versions of many .NET objects like methods/types etc. This uses ad-hoc binary encodings.
+
+## Runtime support
+
+The `aot-runtime.c` file contains the runtime support for loading AOT images.
+
+### Loading AOT images
+
+When an assembly is loaded, the corresponding AOT images is either loaded using the system dynamic linker (`dlopen`), or
+found among the statically linked AOT images.
+
+### Loading methods
+
+Every method in the AOT image is assigned an index. The AOT methods corresponding to 'normal' .NET methods are assigned
+an index corresponding to their metadata token index, while the 'extra' methods are assigned subsequent indexes. There is
+a hash table inside the AOT image mapping extra methods to their AOT indexes. Loading a method consists of
+- finding its method index
+- finding the method code/data corresponding to the method index
+
+The mapping from method index to the code is done in an architecture specific way, designed to minimize the amount of
+runtime relocations in the AOT image. In some cases, this involves generating an extra table with assembly call instructions to
+all the methods, then disassembling this table at runtime.
+
+
+
+### Runtime constants
+
+The generated code needs to access data which is only available at runtime. For example, for an `ldstr "Hello"` instruction, the
+`"Hello"` string is a runtime constant.
+
+These constants are stored in a global table called the GOT which is modelled after the Global Offset Table in ELF images. The GOT
+table contains pointers to runtime objects. The AOT image contains descriptions of these runtime objects so the AOT runtime can
+compute them. The entries in the GOT are initialized either when the AOT image is loaded (for frequently used entries), or before
+the method which uses them is first executed.
+
+### Initializing methods
+
+Before an AOTed method can be executed, it might need some initialization. This involves:
+- executing its class cctor
+- initializing the GOT slots used by the method
+
+For methods compiled by the mono JIT, initialization is done when the method is loaded. This means that its not possible to
+have direct calls between methods. Instead, calls between methods go through small pieces of generated code called PLT
+(Program Linkage Table) entries, which transfer control to the runtime which loads the called method before executing it.
+For methods compiled by LLVM, the method entry contains a call to the runtime which initializes the method.
+
+## Trampolines
+
+In full-aot mode, the AOT compiler needs to emit all the trampolines which will be used at runtime. This is done in
+the following way:
+- For most trampolines, the AOT compiler calls the normal trampoline creation function with the `aot` argument set
+to TRUE, then saves the returned native code into the AOT image, along with some relocation information like the
+GOT slots used by the trampolines.
+- For some small trampolines, the AOT compiler directly emits platform specific assembly.
+
+The runtime might require an unbounded number of certain trampolines, but the AOT image can only contain a fixed
+number of them. To solve this problem, on some platforms (IOS), its possible to have infinite trampolines. This is
+implemented by emitting a different version of these trampolines which reference their corresponding data using
+relative addressing. At runtime, a page of these trampolines is mapped using `mmap` next to a writable page
+which contains their corresponding data. The same page of trampolines is mapped multiple times at multiple
+addresses.
+
+## Cross compilation
+
+Its possible to use the AOT compiler to target a platform different than the host. This requires a separate cross compiler
+build of the runtime.
+The generated code depends on offsets inside runtime structures like `MonoClass`/`MonoVTable` etc. which could
+differ between the host and the target. This is handled by having a tool called the offsets-tool, which is a python
+script which uses the clang python interface to compute and emit a C header file containing these offsets. The header
+file is passed as a cmake argument during the runtime build. Inside the runtime code, the `MONO_STRUCT_OFFSET`
+C macro reads the data from the offsets file to produce the offset corresponding to the target platform.
diff --git a/docs/design/mono/runtime-ilgen.md b/docs/design/mono/runtime-ilgen.md
new file mode 100644
index 000000000000..8c17bb697a2a
--- /dev/null
+++ b/docs/design/mono/runtime-ilgen.md
@@ -0,0 +1,110 @@
+# IL generation at runtime
+
+## Introduction
+
+The mono runtime makes extensive use of generating IL methods at runtime. These
+methods are called 'wrappers' in the runtime code, because some of them 'wrap' other
+methods, like a managed-to-native wrapper would wrap the native function being called.
+Wrappers have the `MonoMethod.wrapper_type` field set to the type of the wrapper.
+
+## Source code structure
+
+- `wrapper-types.h`: Enumeration of wrapper types
+- `marshal*`: Functions for generating wrappers
+- `method-builder*`: Low level functions for creating new IL methods/code at runtime
+
+## WrapperInfo
+
+Every wrapper has an associated `WrapperInfo` structure which describes the wrapper.
+This can be retrieved using the `mono_marshal_get_wrapper_info ()` function.
+Some wrappers have subtypes, these are stored in `WrapperInfo.subtype`.
+
+## Caching wrappers
+
+Wrappers should be unique, i.e. there should be only one instance of every wrapper. This is
+achieved by caching wrappers in wrapper type specific hash tables, which are stored in
+`MonoMemoryManager.wrapper_caches`.
+
+## Generics and wrappers
+
+Wrappers for generic instances should be created by doing:
+instance method -> generic method definition -> generic wrapper -> inflated wrapper
+
+## AOT support
+
+In full-aot mode, the AOT compiler will collect and emit the wrappers needed by the
+application at runtime. This involves serializing/deserializing the `WrapperInfo` structure.
+
+## Wrapper types
+
+### Managed-to-native
+
+These wrappers are used to make calls to native code. They are responsible for marshalling
+arguments and result values, setting up EH structures etc.
+
+### Native-to-managed
+
+These wrappers are used to call managed methods from native code. When a delegate is passed to
+native code, the native code receives a native-to-managed wrapper.
+
+### Delegate-invoke
+
+Used to handle more complicated cases of delegate invocation that the fastpaths in the JIT can't handle.
+
+### Synchronized
+
+Used to wrap synchronized methods. The wrapper does the locking.
+
+### Runtime-invoke
+
+Used to implement `mono_runtime_invoke ()`.
+
+### Dynamic-method
+
+These are not really wrappers, but methods created by user code using the `DynamicMethod` class.
+
+Note that these have no associated `WrapperInfo` structure.
+
+### Alloc
+
+SGEN allocator methods.
+
+### Write-barrier
+
+SGEN write barrier methods.
+
+### Castclass
+
+Used to implement complex casts.
+
+### Stelemref
+
+Used to implement stelem.ref.
+
+### Unbox
+
+Used to unbox the receiver before calling a method.
+
+### Managed-to-managed/other
+
+The rest of the wrappers, distinguished by their subtype.
+
+#### String-ctor
+
+Used to implement string ctors, the first argument is ignored, and a new string is allocated.
+
+#### Element-addr
+
+Used to implement ldelema in multi-dimensional arrays.
+
+#### Generic-array-helper
+
+Used to implement the implicit interfaces on arrays like IList<T> etc. Delegate to helper methods on the Array class.
+
+#### Structure-to-ptr
+
+Used to implement Marshal.StructureToPtr.
+
+#### Ptr-to-structure
+
+Used to implement Marshal.PtrToStructure.
diff --git a/docs/design/mono/wasm-aot.md b/docs/design/mono/wasm-aot.md
index ef907bfe0abe..20f900e35f47 100644
--- a/docs/design/mono/wasm-aot.md
+++ b/docs/design/mono/wasm-aot.md
@@ -6,15 +6,29 @@ The LLVM backend of the Mono JIT is used to generate an llvm .bc file for each a
 compiled to webassembly using emscripten, then the resulting wasm files are linked into the final app. The 'bitcode'/'llvmonly'
 variant of the LLVM backend is used since webassembly doesn't support inline assembly etc.
 
+## Source code structure
+
+`mini-llvm.c`: The LLVM backend.
+`mini-wasm.h/c`: The wasm backend. This is a minimal version of a normal mono JIT backend which only supports llvm.
+`llvm-runtime.cpp`: Code to throw/catch C++ exceptions.
+`aot-runtime-wasm.c`: Code related to interpreter/native transitions on wasm.
+`llvmonly-runtime.c`:  Runtime support for the generated AOT code.
+
+WASM specific code is inside `HOST_WASM/TARGET_WASM` defines.
+
 ## GC Support
 
 On wasm, the execution stack is not stored in linear memory, so its not possible to scan it for GC references. However, there
-is an additional C stack which stores variables whose addresses are taken. Variables which hold GC references are marked as
-'volatile' in the llvm backend, forcing llvm to spill those to the C stack so they can be scanned.
+is an additional C stack in linear memory which is managed explicitly by the generated wasm code. This stack is already
+scanned by the mono GC as on other platforms.
+To make GC references in AOTed methods visible to the GC, every method allocates a gc_pin area in its prolog, and
+stores arguments/locals with a reference type into it. This will cause the GC to pin those references so the rest of
+the generated code can treat them normally as LLVM values.
 
 ## Interpreter support
 
-Its possible for AOTed and interpreted code to interop, this is called mixed mode.
+On wasm, the two supported execution modes are interpreter, or aot+interpreter. This means its always
+possible to fall back to the interpreter if needed.
 For the AOT -> interpreter case, every call from AOTed code which might end up in the interpreter is
 emitted as an indirect call. When the callee is not found, a wrapper function is used which
 packages up the arguments into an array and passes control to the interpreter.
@@ -24,6 +38,22 @@ AOTed code. There is usually one aot->interp and interp->aot wrapper for each si
 some sharing. These wrappers are generated by the AOT compiler when the 'interp' aot option
 is used.
 
+## Exception handling
+
+On wasm, its not possible to walk the stack so the normal mono exception handling/unwind code
+cannot be used as is. Its also hard to map the .NET exception handling concepts like filter clauses
+to the llvm concepts. Instead, c++/wasm exceptions are used to implement unwinding, and the
+interpreter is used to execute EH code.
+When an exception needs to be thrown, we store the exception info in TLS, and throw a dummy C++ exception instead.
+Internally, this is implemented by emscripten either by calling into JS, or by using the wasm exception handling
+spec.
+The c++ exception is caught in the generated AOT code using the relevant llvm catch instructions. Then execution is
+transferred to the interpreter. This is done by creating a data structure on the stack containing all the IL level state like
+the IL offset and the values of all the IL level variables. The generated code continuously updates this state during
+execution. When an exception is caught, this IL state is passed to the interpreter which continues execution from
+that point.  This process is called `deopt` in the runtime code.
+Exceptions are also caught in various other places like the interpreter-aot boundary.
+
 ## Null checks
 
 Since wasm has no signal support, we generate explicit null checks.
@@ -59,8 +89,7 @@ if (vt_entry == null)
 	vt_entry = init_vt_entry ();
 ```
 
-### GC overhead
+### Exception handling
 
-Since GC variables are marked as volatile and stored on the C stack, they are loaded/stored on every access,
-even if there is no GC safe point between the accesses. Instead, they should only be loaded/stored around
-GC safe points.
+It might be possible to implement EH in the generated code without involving the interpreter. The
+current design adds a lot of overhead to methods which contain IL clauses.
diff --git a/docs/design/mono/web/README.md b/docs/design/mono/web/README.md
new file mode 100644
index 000000000000..50992e0a68b3
--- /dev/null
+++ b/docs/design/mono/web/README.md
@@ -0,0 +1 @@
+This directory contains the original mono runtime documentation from the [mono website](https://github.com/mono/website/tree/gh-pages/docs/advanced/runtime/docs).
diff --git a/docs/design/mono/web/aot.md b/docs/design/mono/web/aot.md
new file mode 100644
index 000000000000..ffa14737f3ee
--- /dev/null
+++ b/docs/design/mono/web/aot.md
@@ -0,0 +1,179 @@
+# Ahead of Time Compilation (AOT)
+
+Mono Ahead Of Time Compiler
+---------------------------
+
+The Ahead of Time compilation feature in Mono allows Mono to precompile assemblies to minimize JIT time, reduce memory usage at runtime and increase the code sharing across multiple running Mono application.
+
+To precompile an assembly use the following command:
+
+      mono --aot -O=all assembly.exe
+
+The \`--aot' flag instructs Mono to ahead-of-time compile your assembly, while the -O=all flag instructs Mono to use all the available optimizations.
+
+Besides code, the AOT file also contains cached metadata information which allows the runtime to avoid certain computations at runtime, like the computation of generic vtables. This reduces both startup time, and memory usage. It is possible to create an AOT image which contains only this cached information and no code by using the 'metadata-only' option during compilation:
+
+       mono --aot=metadata-only assembly.exe
+
+This works even on platforms where AOT is not normally supported.
+
+The code generated by Ahead-of-Time compiled images is position-independent code. This allows the same precompiled image to be reused across multiple applications without having different copies: this is the same way in which ELF shared libraries work: the code produced can be relocated to any address.
+
+The implementation of Position Independent Code has a performance impact on Ahead-of-Time compiled images but compiler bootstraps are still faster than JIT-compiled images, specially with all the new optimizations provided by the Mono engine.
+
+### The AOT File Format
+
+We use the native object format of the platform. That way it is possible to reuse existing tools like as/ld and the dynamic loader. On ELF platforms, the AOT compiler can generate an ELF .so file directly, on other platforms, it generates an assembly (.s) file which is then assembled and linked by as/ld into a shared library.
+
+The precompiled image is stored in a file next to the original assembly that is precompiled with the native extension for a shared library (on Linux its ".so" to the generated file).
+
+For example: basic.exe -\> basic.exe.so; corlib.dll -\> corlib.dll.so
+
+There is one global symbol in each AOT image named 'mono_aot_file_info'. This points to a MonoAotFileInfo structure which contains pointers to all the AOT data structures. In the latter parts of this document, fields of this structure are referenced using info-\>\<FIELD NAME\>.
+
+Binary data other than code is stored in one giant blob. Data items inside the blob can be found using several tables called 'XXX_offsets', like 'method_info_offsets'. These tables contain offsets into the blob, stored in a compact format using differential encoding plus an index.
+
+### Source file structure
+
+The AOT infrastructure is split into two files, aot-compiler.c and aot-runtime.c. aot-compiler.c contains the AOT compiler which is invoked by --aot, while aot-runtime.c contains the runtime support needed for loading code and other things from the aot files. The file image-writer.c contains the ELF writer/ASM writer code.
+
+### Compilation process
+
+AOT compilation consists of the following stages:
+
+-   collecting the methods to be compiled.
+-   compiling them using the JIT.
+-   emitting the JITted code and other information
+-   emitting the output file either directly, or by executing the system assembler/linker.
+
+### Handling methods
+
+There are two kinds of methods handled by AOT:
+
+-   Normal methods are methods from the METHODDEF table.
+-   'Extra' methods are either runtime generated methods (wrappers) or methods of inflated generic classes/inflated generic methods.
+
+Each method is identified by a method index. For normal methods, this is equivalent to its index in the METHOD metadata table. For extra methods, it is an arbitrary number. Compiled code is created by invoking the JIT, requesting it to created AOT code instead of normal code. This is done by the compile_method () function. The output of the JIT is compiled code and a set of patches (relocations). Each relocation specifies an offset inside the compiled code, and a runtime object whose address is accessed at that offset. Patches are described by a MonoJumpInfo structure. From the perspective of the AOT compiler, there are two kinds of patches:
+
+-   calls, which require an entry in the PLT table.
+-   everything else, which require an entry in the GOT table.
+
+How patches is handled is described in the next section. After all the method are compiled, they are emitted into the output file into a byte array called 'methods'. Each piece of compiled code is identified by the local symbol .Lm_\<method index\>. While compiled code is emitted, all the locations which have an associated patch are rewritten using a platform specific process so the final generated code will refer to the plt and got entries belonging to the patches. This is done by the emit_and_reloc_code () function. The compiled code array can be accessed using the 'methods' global symbol.
+
+### Handling patches
+
+Before a piece of AOTed code can be used, the GOT entries used by it must be filled out with the addresses of runtime objects. Those objects are identified by MonoJumpInfo structures. These stuctures are saved in a serialized form in the AOT file, so the AOT loader can deconstruct them. The serialization is done by the encode_patch () function, while the deserialization is done by the decode_patch_info () function. Every method has an associated method info blob stored inside the global blob. This contains all the information required to load the method at runtime:
+
+-   the first got entry used by the method.
+-   the number of got entries used by the method.
+-   the indexes of the got entries used by the method
+
+Each GOT entry is described by a serialized description stored in the global blob. The 'got_info_offsets' table maps got offsets to the offsets of their description.
+
+### The Procedure Linkage Table (PLT)
+
+Our PLT is similar to the elf PLT, it is used to handle calls between methods. If method A needs to call method B, then an entry is allocated in the PLT for method B, and A calls that entry instead of B directly. This is useful because in some cases the runtime needs to do some processing the first time B is called. The processing includes:
+
+-   if B is in another assembly, then it needs to be looked up, then JITted or the corresponding AOT code needs to be found.
+-   if B is in the same assembly, but has got slots, then the got slots need to be initialized.
+
+If none of these cases is true, then the PLT is not used, and the call is made directly to the native code of the target method. A PLT entry is usually implemented by a jump through a GOT entry, these entries are initially filled up with the address of a trampoline so the runtime can get control, and after the native code of the called method is created/found, the jump table entry is changed to point to the native code. All PLT entries also embed a integer offset after the jump which indexes into the 'plt_info' table, which stores the information required to find the called method. The PLT is emitted by the emit_plt () function.
+
+### Exception/Debug info
+
+Each compiled method has some additional info generated by the JIT, usable for debugging (IL offset-native offset maps) and exception handling (saved registers, native offsets of try/catch clauses). These are stored in the blob, and the 'ex_info_offsets' table can be used to find them.
+
+### Cached metadata
+
+When the runtime loads a class, it needs to compute a variety of information which is not readily available in the metadata, like the instance size, vtable, whenever the class has a finalizer/type initializer etc. Computing this information requires a lot of time, causes the loading of lots of metadata, and it usually involves the creation of many runtime data structures (MonoMethod/MonoMethodSignature etc), which are long living, and usually persist for the lifetime of the app. To avoid this, we compute the required information at aot compilation time, and save it into the aot image, into an array called 'class_info'. The runtime can query this information using the mono_aot_get_cached_class_info () function, and if the information is available, it can avoid computing it. To speed up mono_class_from_name (), a hash table mapping class names to class indexes is constructed and saved in the AOT file pointed to by the symbol 'class_name_table'.
+
+### Other data
+
+Things saved into the AOT file which are not covered elsewhere:
+
+-   info-\>assembly_guid A copy of the assembly GUID. When loading an AOT image, this GUID must match with the GUID of the assembly for the AOT image to be usable.
+
+-   info-\>version The version of the AOT file format. This is checked against the MONO_AOT_FILE_VERSION constant in mini.h before an AOT image is loaded. The version number must be incremented when an incompatible change is made to the AOT file format.
+
+-   info-\>image_table A list of assemblies referenced by this AOT module.
+
+-   info-\>plt The Program Linkage Table
+
+### LLVM Support
+
+It is possible to use LLVM in AOT mode. This is implemented by compiling methods using LLVM instead of the JIT, saving the resulting LLVM bytecode into an LLVM .bc file, compiling it using LLVM tools into a .s file, then appending our own AOT data structures to that file.
+
+### Full AOT mode
+
+Some platforms like the iphone prohibit JITted code, using technical and/or legal means. This is a significant problem for the mono runtime, since it generates a lot of code dynamically, using either the JIT or more low-level code generation macros. To solve this, the AOT compiler is able to function in full-aot or aot-only mode, where it generates and saves all the neccesary code in the aot image, so at runtime, no code needs to be generated. There are two kinds of code which needs to be considered:
+
+-   wrapper methods, that is methods whose IL is generated dynamically by the runtime. They are handled by generating them in the add_wrappers () function, then emitting them as 'extra' methods.
+-   trampolines and other small hand generated pieces of code. They are handled in an ad-hoc way in the emit_trampolines () function.
+
+### Emitting assembly/object code
+
+The output emission functionality is in the file image-writer.c. It can either emit assembly code (.s), or it can produce a shared image directly. The latter is only supported on x86/amd64 ELF. The emission of debug information is in the file dwarfwriter.c.
+
+### Performance considerations
+
+Using AOT code is a trade-off which might lead to higher or slower performance, depending on a lot of circumstances. Some of these are:
+
+-   AOT code needs to be loaded from disk before being used, so cold startup of an application using AOT code MIGHT be slower than using JITed code. Warm startup (when the code is already in the machines cache) should be faster. Also, JITing code takes time, and the JIT compiler also need to load additional metadata for the method from the disk, so startup can be faster even in the cold startup case.
+-   AOT code is usually compiled with all optimizations turned on, while JITted code is usually compiled with default optimizations, so the generated code in the AOT case could be faster.
+-   JITted code can directly access runtime data structures and helper functions, while AOT code needs to go through an indirection (the GOT) to access them, so it will be slower and somewhat bigger as well.
+-   When JITting code, the JIT compiler needs to load a lot of metadata about methods and types into memory.
+-   JITted code has better locality, meaning that if A method calls B, then the native code for A and B is usually quite close in memory, leading to better cache behavior thus improved performance. In contrast, the native code of methods inside the AOT file is in a somewhat random order.
+
+### Porting
+
+Generated native code needs to reference various runtime structures/functions whose address is only known at run time. JITted code can simple embed the address into the native code, but AOT code needs to do an indirection. This indirection is done through a table called the Global Offset Table (GOT), which is similar to the GOT table in the Elf spec. When the runtime saves the AOT image, it saves some information for each method describing the GOT table entries used by that method. When loading a method from an AOT image, the runtime will fill out the GOT entries needed by the method.
+
+#### Computing the address of the GOT
+
+Methods which need to access the GOT first need to compute its address. On the x86 it is done by code like this:
+
+            call <IP + 5>
+            pop ebx
+            add <OFFSET TO GOT>, ebx
+            <save got addr to a register>
+
+The variable representing the got is stored in cfg-\>got_var. It is allways allocated to a global register to prevent some problems with branches + basic blocks.
+
+#### Referencing GOT entries
+
+Any time the native code needs to access some other runtime structure/function (i.e. any time the backend calls mono_add_patch_info ()), the code pointed by the patch needs to load the value from the got. For example, instead of:
+
+        call <ABSOLUTE ADDR>
+
+it needs to do:
+
+        call *<OFFSET>(<GOT REG>)
+
+Here, the \<OFFSET\> can be 0, it will be fixed up by the AOT compiler.
+
+For more examples on the changes required, see
+
+svn diff -r 37739:38213 mini-x86.c
+
+### Back end functionality
+
+#### OP_AOTCONST
+
+Loading informarion from the GOT tables is done by the OP_AOTCONST opcode. Since the opcode implementation needs to reference the GOT symbol, which is not available during JITting, the backend should emit some placeholder code in mono_arch_output_basic_block (), and emit the real implementation in arch_emit_got_access () in aot-compiler.c.
+
+#### Constants
+
+AOTed code cannot contain literal constants like addresses etc. All occurences of those should be replaced by an OP_AOTCONST.
+
+#### PLT Entries
+
+PLT entries are emitted by arch_emit_plt_entry () in aot-compiler.c. Each PLT entry has a corresponding slot in the GOT. The PLT entry should load this GOT slot, and branch to it, without clobbering any argument registers or the return value. Since the return address is not updated, the AOT code obtains the address of the PLT entry by disassembling the call site which branched to the PLT entry. This is done by the mono_arch_get_call_target () function in tramp-\<ARCH\>.c. The information needed to resolve the target of the PLT entry is in the AOT tables, and an offset into these tables should be emitted as a word after the PLT entry. The mono_arch_get_plt_info_offset () function in tramp-\<ARCH\>.c is responsible for retrieving this offset. After the call is resolved, the GOT slot used by the PLT entry needs to be updated with the new address. This is done by the mono_arch_patch_plt_entry () function in tramp-\<ARCH\>.c.
+
+### Future Work
+
+-   Currently, when an AOT module is loaded, all of its dependent assemblies are also loaded eagerly, and these assemblies need to be exactly the same as the ones loaded when the AOT module was created ('hard binding'). Non-hard binding should be allowed.
+-   On x86, the generated code uses call 0, pop REG, add GOTOFFSET, REG to materialize the GOT address. Newer versions of gcc use a separate function to do this, maybe we need to do the same.
+-   Currently, we get vtable addresses from the GOT. Another solution would be to store the data from the vtables in the .bss section, so accessing them would involve less indirection.
+-   When saving information used to identify classes/methods, we use an add-hoc encoding. An encoding similar to the metadata encoding should be used instead.
+
+[Original version of this document in git](https://github.com/mono/mono/blob/e6d522976e24e572f0e7bc344ae4b8f79f955c6f/docs/aot-compiler.txt)
diff --git a/docs/design/mono/web/bitcode.md b/docs/design/mono/web/bitcode.md
new file mode 100644
index 000000000000..1d32c67cc7b6
--- /dev/null
+++ b/docs/design/mono/web/bitcode.md
@@ -0,0 +1,145 @@
+# Bitcode
+
+## Introduction
+
+Bitcode imposes the following major restrictions:
+
+-   No inline assembly/machine code
+-   Compilation using stock clang
+
+To enable the runtime to operate in this environment, a new execution mode 'llvmonly' was implemented. In this mode:
+
+-   everything is compiled to llvm bitcode, then compiled to native code using clang.
+-   no trampolines, etc. are used.
+
+In the rest of this document, 'normal mode' is used to refer to the JIT/full aot mode previously supported by the runtime.
+
+## Concepts
+
+### Passing extra arguments
+
+The runtime used trampolines to pass extra arguments to some generic shared methods. This is not possible in llvmonly mode. Instead, these arguments are passed normally as an additional argument, and the caller is responsible for passing them. The method address and the possible additional argument are encapsulated together into a function descriptor represented by a MonoFtnDesc structure. These function descriptors are used instead of method addresses anywhere where a callee might require an extra argument. A call using an ftndesc looks like this:
+
+``` c
+ftndesc->addr (<normal args>, ftndesc->arg);
+```
+
+The 'arg' field might be null, in which case the caller will pass one more argument than the callee requires, but that is not a problem with most calling conventions.
+
+### Lazy initialization
+
+Trampolines were used in many places in the runtime to initialize/load methods/code on demand. Instead, either the caller or the callee needs to check whenever initialization is required, and call into runtime code to do it.
+
+## Details
+
+### Method initialization
+
+AOT methods require the initialization of GOT slots they are using. In normal execution mode, this was accomplished by calling them through PLT entries. The PLT entry would look up the method code, initialize its GOT slots, then transfer control to it. In llvmonly mode, methods initialize themselves. Every AOT module has an 'inited' bit array with one bit for every method. The method code checks this bit in its prolog, and if its 0, calls a runtime function to initialize the method.
+
+In llvmonly mode, no trampolines are created for methods. Instead, the method's code is looked up immediately. This doesn't create lazy initialization problems because the method is initialized lazily, so looking up its code doesn't change managed state, i.e. it doesn't run type cctors etc.
+
+### Looking up methods
+
+In normal mode, AOT images contained a table mapping method indexes to method addresses. This table was emitted using inline assembly. In llvmonly mode, there is a generated llvm function which does this mapping using a switch statement.
+
+### Unbox trampolines
+
+In normal mode, these were emitted using inline assembly. In llvmonly mode, these are emitted as llvm code. With optimizations enabled, llvm can emit the same or very similar code.
+
+### Null checks
+
+Since the target plaform for bitcode doesn't support sigsegv signal handlers, explicit null checks are emitted.
+
+### Normal calls
+
+Calls are made through a GOT slot, or directly, if the callee is in the same assembly, and its corresponding llvm method can be looked up at compile time.
+
+### Virtual calls
+
+Vtable slots contain ftn descriptors. They are initialized to null when the vtable is created, so the calling code has to initialize them on demand. So a virtual calls looks like this:
+
+``` c
+if (vtable [slot] == null)
+   init_vtable_slot (vtable, slot);
+ftndesc = vtable [slot];
+<call using ftndesc>
+```
+
+### Interface calls
+
+Interface calls are implemented using IMT. The imt entries in the vtable contain an ftndesc. The ftndesc points to a imt thunk. IMT thunks are C functions implemented in the runtime. They receive the imt method, and a table of `<method, ftndesc>` pairs, and return the ftndesc corresponding to the imt method.
+
+The generated code looks like this:
+
+``` c
+imt_ftndesc = vtable [imt_slot];
+ftndesc = imt_ftndesc->addr (imt_method, imt_ftndesc->arg);
+<call using ftndesc>
+```
+
+The imt entries are initialized to point to an 'initial imt thunk', which computes the real imt thunk when first called, and replaces the imt entry to point to the real imt thunk. This means that the generated code doesn't need to check whenever the imt entry is initialized.
+
+### Generic virtual calls
+
+These are handled similarly to interface calls.
+
+### Gsharedvt
+
+There are two kinds of gsharedvt methods: ones with a variable signature, and those without one. A variable signature is a signature which includes parameters/return values whose size is not known at compile time. Gsharedvt methods without variable signatures are handler similarly as in normal mode. Methods with variable signatures are handles as follows: all parameters and returned by ref, even the fixed size ones. I.e., for `T foo<T> (int i, T t)`, both 'i' and 't' are passed by ref, and the result is returned by ref using a hidden argument. So the real signature of the gsharedvt version of foo looks like this:
+
+``` c
+void foo (ref T_GSHAREDVT vret, ref int i, ref T_GSHAREDVT t, <rgctx arg>);
+```
+
+Calls between normal and gsharedvt methods with a variable signature go though gsharedvt in/out wrappers. These are normal runtime wrappers generated by the runtime as IL code. The AOT compiler collects every possible concrete signature from the program, and generates in/out wrappers for them. Wrappers for similar signatures are shared to decrease the number of required wrappers.
+
+A gsharedvt in wrapper for the method above looks like this (T==int):
+
+``` c
+int gsharedvt_in_int_int (int i, int t, ftndesc callee)
+{
+    int res;
+
+    callee->addr (&res, &i, &t, callee->arg);
+    return res;
+}
+```
+
+While a gsharedvt out wrapper for the same instantiation looks like:
+
+``` c
+void gsharedvt_out_int_int (ref int vret, ref int i, ref int t, ftndesc callee)
+{
+    *vret = callee->addr (*i, *t, callee->arg);
+}
+```
+
+The last argument to the wrappers is an ftndesc for the method which needs to be called.
+
+### Delegates
+
+In normal mode, delegate trampolines and various small invoke trampolines are used to implement delegate creation/invocation efficiently. In llvmonly mode, we fall back to the normal delegate-invoke wrappers. The delegates need to invoke an ftndesc since the target method can require an extra argument. The 'addr' part of the ftndesc is stored in `MonoDelegate.method_ptr`, and the 'arg' part is stored in `MonoDelegate.extra_arg`. The delegate invoke wrapper uses a special IL opcode called `CEE_MONO_CALLI_EXTRA_ARG` to make the call which takes this into account.
+
+If the target method is gsharedvt, we cannot add an gsharedvt in wrapper around it, since the concrete signature required might not exist at compile time if the delegate is only invoked through a gsharedvt delegate-invoke wrapper. To work around this, we set the lowest bit of `MonoDelegate.extra_arg` to indicate this, and the `CALLI_EXTRA_ARG` opcode generates code which checks at runtime to see which calling conv needs to be used.
+
+### Runtime invoke
+
+Runtime invoke is used to dynamically invoke managed methods. It is implemented using runtime-invoke wrappers, which receive an C array of parameter values, and pass it to a method which is called.
+
+For example, the runtime-invoke wrapper for the `foo<int>` method above looks like:
+
+``` c
+void runtime_invoke_int_int (gpointer[] params, gpointer addr, gpointer *exc)
+{
+    try {
+         int ret = addr (params [0], params [1]);
+         return box(ret, typeof<int>);
+    } catch (Exception ex) {
+         *exc = ex;
+   }
+}
+```
+
+There is one runtime invoke wrapper for each possible signature, with some sharing. To cut down on the number of wrappers generated, in normal mode, we use a 'dyn-call' opcode which can support a large number of signatures.
+
+In llvmonly mode, we use the gsharedvt out wrappers which are already generated to support gsharedvt to implement runtime invokes. This is useful because the possible set of signatures for gsharedvt out wrappers is limited since all their arguments are pointers. Instead of invoking the method directly from the runtime-invoke wrapper, we invoke the gsharedvt out wrapper. So the call looks like this: runtime-invoke wrapper -> gsharedvt out wrapper -> target method.
diff --git a/docs/design/mono/web/coop-suspend.md b/docs/design/mono/web/coop-suspend.md
new file mode 100644
index 000000000000..78bffd3fca07
--- /dev/null
+++ b/docs/design/mono/web/coop-suspend.md
@@ -0,0 +1,243 @@
+# Runtime Cooperative Suspend
+
+## Intro: Preemptive, Cooperative and Hybrid Suspend
+
+The runtime needs to be able to suspend threads to perform all sorts of tasks, the main one being garbage collection.
+Those threads need to be suspended from another and historically Mono used signals (or similar APIs) to do it.
+
+The basic problem is that when the runtime needs to stop threads (for example at some steps during GC) there are two general approaches:
+* Preemptive - the runtime sends a signal to the thread and the signal handler for the thread puts it to sleep until it gets a resume signal. (or on Windows or Apple OSes, it uses a kernel calls to stop the thread).
+   The problem of using signals is that threads are suspended at arbitrary points in time, which requires the suspender
+thread to run in the equivalent of signal context - a very very restrictive setup. Not only that, but the fact that
+threads could be suspended while holding runtime and libc locks meant that not even basic things like printf were available.
+   Also on some platforms (watchOS, WebAssembly) we don't have enough OS facilities to examine the context when a thread is suspended - we can't see the contents of their registers, or their stack, and thus preemptive suspend on those systems wouldn't be useful for GC and other runtime operations that need to examine the state of suspended threads.
+* Cooperative - The alternative is to use cooperative suspend, where threads suspend themselves when the runtime requests it. To make
+this possible, frequent polling and checkpointing are required. This is a well understood model that goes along what
+the industry does.
+   With this, as long as the thread is running managed code, it will eventually reach a safepoint and suspend itself.  The advantage is that it will always be in a "nice" place.
+   There is more to keep track of in cooperative mode when a thread calls native code - while it's in native it won't have safepoints and it might block for arbitrary amounts of time.  So the runtime marks all the places where a thread goes from managed code ("GC Unsafe" - because it can manipulate managed memory) to native code ("GC Safe" - because it's not supposed to access managed memory).  When the thread is in GC Safe mode instead of trying to suspend it, we just let it run until it tries to come back to GC Unsafe mode.
+   The problem with cooperative suspend is that it relies on nice (cooperating) behavior from embedders and from native code - if the native code calls back into Mono suddenly it might be running managed code again when the GC thinks that it is not.  And that can cause problems.   So to use preemptive mode, the native code has to be explicitly annotated with GC transitions - telling the runtime when the thread is switching between GC Safe and GC Unsafe modes.
+* Hybrid suspend - a combination of the previous two approaches.  While the thread is in managed code or in the Mono runtime itself, it is in GC Unsafe mode.  In GC Unsafe mode we will try to suspend it cooperatively by expecting the thread to reach a safepoint and suspend itself.   But when the thread calls out to native code we switch it to GC Safe mode and start preemptively suspending it.  That way no matter what kind of native code it is running, we will stop it and it won't be able to invalidate our assumptions by touching managed memory or calling runtime functions.
+   Hybrid suspend requires even more bookkeeping (every embedding API function needs to switch from GC Safe mode to GC Unsafe on entry and back on exit), but all the bookkeeping is done by the runtime, not by the user code.
+  So hybrid suspend is a good approach because the embedder code doesn't need to be aware of it - it behaves just like preemptive.  But at the same time it is less likely to suspend the thread in a state that is inconvenient for the runtime, unlike preemptive suspend.
+
+## How cooperative and hybrid suspend works
+
+Cooperative suspend limits what a suspender thread can do to simply request that the target thread suspends itself.
+The target thread can serve a suspend in two manners, by frequently polling its state or checkpointing its state
+at points the runtime loses control of the thread (pinvoke, blocking syscall).
+
+We can split code in 3 categories: managed, runtime native code and foreign native code. This tells how coop suspend happens.
+
+### Managed code
+
+Managed code will check for suspend requests on function prologue, catch handlers and the back-edge of loops. This ensures that
+a suspend will be served in a bounded amount of time. Those suspend checks are done at what's referred as safepoints.
+
+This is implemented in mini.c:mono_insert_safepoints. It will add OP_GC_SAFE_POINT ops around the method.
+Then each backend will emit machine code for those new ops. [1]
+
+### Foreign native code
+
+This includes pinvokes and arbitrary native code when the runtime is embedded. Foreig code doesn't touch managed objects
+so it's safe for the GC to ignore both the stack and the code being executed by those.
+
+Before executing a pinvoke, we save the current thread registers and transition it to the equivalent of the suspended state.
+It means the GC can take the saved state as is and ignore that the thread keeps running.
+
+### Runtime native code
+
+This encompasses all runtime code, metatada, utils and mini. Special care must be taken to icalls.
+Runtime code is different as it operates on raw object pointers, meaning that the GC must be aware of them.
+To do so we handle runtime code just as managed code, except we don't get safepoints automatically inserted for us.
+
+Manual insertion of polling code and checkpointing must be done in the runtime. In addition to that, we must be careful
+of how we access managed memory once we save the thread state.
+
+## Current Implementation
+
+The current implementation is a state machine that tells what's the current status of a thread. These are the
+states:
+
+* Starting: Initial state of a thread, nothing interesting should happen while in this state.
+* Detached: Thread is shuting down, it won't touch managed memory or do any runtime work.
+* Running: The thread is running managed or runtime code.  There are no pending suspend requests.
+* AsyncSuspendRequested: The thread is running managed or runtime code and another thread requested that the current thread be suspended.
+* SelfSuspended: Thread suspended by itself.  This happens if a thread tried to switch to blocking, but there was a pending suspend requested and the thread suspended itself instead.  It will go back to running and the switch to blocking will be retried.
+* AsyncSuspended: Thread was async suspended, so it's on a signal handler or thread_suspend was called on it. (This state never happens when running threads are cooperatively suspended)
+* Blocking: The current thread is executing code that won't touch managed memory.  There are no pending suspend requests.
+* BlockingSuspendRequested: The current thread is executing code that won't touch managed memory, and someone requested it to suspend.  In full cooperative mode, the thread is assumed to still be suspended.
+* BlockingSelfSuspended: The current thread finished executing blocking code but there was a pending suspend against it, it's waiting to be resumed.
+* BlockingAsyncSuspended: The current thread was executing in blocking code, but it was preemptively suspended.  This is done in "hybrid" suspend mode.  When the thread resumes, it will go back to executing blocking code.
+
+![Coop state machine transition diagram](images/coop-state-machine.png)
+
+In addition to those states, there are a number of transitions, that are used to move a thread from one state to another.
+
+## mono-threads.c, mono-threads-coop.c, mono-threads-state-machine.c
+
+Thread suspension is modeled with a state machine, which means there are a bunch of transitions. Those
+are implemented in mono-threads-state-machine.c. One function per transition. All manipulation of the thread_state variable happens
+here. New functions must follow the same template of the existing ones and must include every state either on the switch or on the comments.
+
+mono-threads.c is the portable implementation of the threading infrastructure. Which there are multiple backends that implement target
+specific functionality. The amount of ifdefs here should be kept at a minimum.
+
+mono-threads-coop.c is the cooperative backend. It doesn't use any async APIs provided by the OS.
+
+## Adding coop to the runtime
+
+The runtime code must satisfy two properties to work with cooperative suspend, It must suspend in bounded time, by polling and
+check pointing before blocking, and it must coordinate with the GC when accessing the managed heap.
+
+We combine those two properties together are they are completementary. Every region of code in the runtime is then classified
+in one of 3 kinds, which tells what can and can't be done.
+
+### GC unsafe mode
+
+Under this mode, the GC won't be able to proceed until explicit polling or a transition to GC Safe mode happens.
+
+* Can touch managed memory (read/write).
+* Can call GC Unsafe or GC Neutral functions.
+* Can pass managed pointers to GC Safe regions/functions through pinning
+* Can return managed pointers
+* Cannot call foreign native code (embedder callbacks, pinvokes, etc)
+* Cannot call into blocking functions/syscalls
+* Cannot be detached
+
+## GC safe mode
+
+Under this mode, the GC will assume the thread is suspended and will scan the last saved state.
+
+* Can call into foreign functions.
+* Can call into blocking functions/syscalls
+* Can call GC Safe or GC Neutral functions
+* Can read from pinned managed memory
+* Cannot touch managed memory (read/write)
+* Cannot be detached
+
+## GC Neutral mode
+
+This mode only signals that the function works under Safe and Unsafe modes. The actual effect on the GC will depend
+on the dynamic mode the thread is when the function is executed.
+
+* Can call GC Neutral functions
+* Cannot call into foreign functions.
+* Cannot call into blocking functions/syscalls
+* Cannot read from pinned managed memory
+* Cannot touch managed memory (read/write)
+* Cannot be detached
+
+There's a special group of functions that are allowed to run detached. All they are allowed to do is
+attach, pick a GC mode and call into regular GC functions.
+
+All functions can transition from one mode to the other and then back. The runtime provides macros that
+make a region of a function run in a different mode. Those macros are defined in mono-threads-coop.h.
+
+Those macros define a possible transitions between GC safe/unsafe. They are:
+
+### MONO_SUSPEND_CHECK
+
+This polls the current GC state and possibly suspend the thread.
+Ok only under GC unsafe mode.
+
+Use it when a huge computation is happening with no explicit blocking happening.
+
+### MONO_PREPARE_BLOCKING / MONO_FINISH_BLOCKING
+
+Creates a C lexical scope. It causes a transition from Unsafe to Safe mode.
+Ok only under Unsafe mode.
+
+Great around a syscall that can block for a while (sockets, io).
+Managed pointers *cannot* leak into the GC Safe region, as the GC might run while the thread is in this section, and move the referenced object around in the managed heap, leading to an invalid naked object pointer. For example, the following code is broken:
+
+```c
+MonoArray *x;
+int res;
+MONO_PREPARE_BLOCKING
+res = read (1, mono_array_addr (x, char, 0), mono_array_length (x), 0); // if a GC run while read is blocked in the OS, the object x might be moved, and x would then point to garbage, or worst, in the middle of another object. And whenever the OS would write into the buffer passed to read, it would override managed memory.
+MONO_FINISH_BLOCKING
+```
+
+To safely use an object reference in a GC safe section, the object needs to be pinned in the managed heap with a GC handle, and you cannot access any ref field on this object.
+
+### MONO_PREPARE_RESET_BLOCKING / MONO_FINISH_RESET_BLOCKING
+
+Creates a C lexical scope. It causes a transition to Unsafe mode. Resets to the previous mode on exit.
+Ok under any mode.
+
+This covers the case where code was expected to be in GC Safe mode but it now needs to be under GC Unsafe.
+
+For example, the first call to a pinvoke will hit a trampoline that needs to move the runtime back into GC Unsafe
+mode before going around resolving it. Once the pinvoke is resolved, the previous mode must be restored.
+
+## Managed object handles
+
+Mono coop handles (`MonoObjectHandle`) allow native code to hold a
+handle to a managed object.  While currently raw pointers to managed
+objects in native code work without problems, they do so only because
+we use a conservative technique when the garbage collector is scanning
+the native stack: every object that looks like it may be referenced
+from the native stack is pinned.
+
+In the future, we want to move away from conservative scanning, and
+coop handles give native code a way to coordinate with the GC.
+
+TODO: Document this more
+
+### MONO_PREPARE_GC_CRITICAL_REGION / MONO_FINISH_GC_CRITICAL_REGION
+
+When a thread is in Unsafe mode and uses the coop handles, it may need
+to enter a *GC critical region* where it is manipulating the managed
+objects in a non-atomic manner and must not be interrupted by the GC.
+
+In a GC critical region:
+
+* The thread *must not* transition from Unsafe to Safe mode.
+* The thread *may* use `gc_handle_obj` to get a raw pointer to a managed object from a coop handle.
+
+GC critical regions may be nested (for example, you may enter a GC
+critical region and then call a function that again enters a GC
+critical region).
+
+#### MONO_REQ_GC_CRITICAL and MONO_REC_GC_NOT_CRITICAL
+
+In checked Mono builds, this pair of macros can be used to assert that
+the thread is (respectively, isn't) in a GC critical region.
+
+## Debugging
+
+There are two debug helpers in place. The first is the thread state dump when we fail to suspend in time.
+It dumps the thread state of each thread plus a cue card on the beginning to help us parse it.
+
+The second one are the toggles in mono-threads.h for specific logging of threading events. Those are VERY verbose
+but do help figure out what's going on.
+
+## Known issues
+
+### Can't handle the embedding API
+
+The current system doesn't take into account the runtime being used embedded. This boils down to a couple of issues.
+First, if a native thread calls into managed then keep doing its thing. We might not be leaving the thread in the
+appropriate state.
+
+Second, the embedding API allows for raw object access, which is incompatible with coop. We need to figure out how to expose
+coop to embedders.
+
+### Thread start/finish still bad
+
+There are a lot of hacks around how we handle threads starting and finishing. If a suspend hits a thread while it's
+starting/finishing we fail every now and then.
+
+### Non nested blocking state
+
+An early decision that I made was to disallow nested blocking states. It was forbidden because it's more complicated and
+could hide bugs in between the nesting. The downside is that it's hard to cover large blocks of code under a single blocking region.
+
+### Thread attach/detach
+
+This aspect of the runtime is due to some revision. I don't think it goes well with what we need now.
+
+## References
+
+[1] <https://github.com/mono/mono/commit/0e12ff3017d470676e94e561cd0de4ca22230532>
diff --git a/docs/design/mono/web/exception-handling.md b/docs/design/mono/web/exception-handling.md
new file mode 100644
index 000000000000..2561c9245e89
--- /dev/null
+++ b/docs/design/mono/web/exception-handling.md
@@ -0,0 +1,188 @@
+# Exception Handling
+
+Exception Handling In the Mono Runtime
+--------------------------------------
+
+### Introduction
+
+There are many types of exceptions which the runtime needs to handle. These are:
+
+-   exceptions thrown from managed code using the 'throw' or 'rethrow' CIL instructions.
+
+-   exceptions thrown by some IL instructions like InvalidCastException thrown by the 'castclass' CIL instruction.
+
+-   exceptions thrown by runtime code
+
+-   synchronous signals received while in managed code
+
+-   synchronous signals received while in native code
+
+-   asynchronous signals
+
+Since exception handling is very arch dependent, parts of the exception handling code reside in the arch specific exceptions-\<ARCH\>.c files. The architecture independent parts are in mini-exceptions.c. The different exception types listed above are generated in different parts of the runtime, but ultimately, they all end up in the mono_handle_exception () function in mini-exceptions.c.
+
+### Exceptions throw programmatically from managed code
+
+These exceptions are thrown from managed code using 'throw' or 'rethrow' CIL instructions. The JIT compiler will translate them to a call to a helper function called 'mono_arch_throw/rethrow_exception'.
+
+These helper functions do not exist at compile time, they are created dynamically at run time by the code in the exceptions-\<ARCH\>.c files.
+
+They perform various stack manipulation magic, then call a helper function usually named throw_exception (), which does further processing in C code, then calls mono_handle_exception() to do the rest.
+
+### Exceptions thrown implicitly from managed code
+
+These exceptions are thrown by some IL instructions when something goes wrong. When the JIT needs to throw such an exception, it emits a forward conditional branch and remembers its position, along with the exception which needs to be emitted. This is usually done in macros named EMIT_COND_SYSTEM_EXCEPTION in the mini-\<ARCH\>.c files.
+
+After the machine code for the method is emitted, the JIT calls the arch dependent mono_arch_emit_exceptions () function which will add the exception throwing code to the end of the method, and patches up the previous forward branches so they will point to this code.
+
+This has the advantage that the rarely-executed exception throwing code is kept separate from the method body, leading to better icache performance.
+
+The exception throwing code braches to the dynamically generated mono_arch_throw_corlib_exception helper function, which will create the proper exception object, does some stack manipulation, then calls throw_exception ().
+
+### Exceptions thrown by runtime code
+
+These exceptions are usually thrown by the implementations of InternalCalls (icalls). First an appropriate exception object is created with the help of various helper functions in metadata/exception.c, which has a separate helper function for allocating each kind of exception object used by the runtime code. Then the mono_raise_exception () function is called to actually throw the exception. That function never returns.
+
+An example:
+
+       if (something_is_wrong)
+          mono_raise_exception (mono_get_exception_index_out_of_range ());
+
+mono_raise_exception () simply passes the exception to the JIT side through an API, where it will be received by the helper created by mono_arch_throw_exception (). From now on, it is treated as an exception thrown from managed code.
+
+### Synchronous signals
+
+For performance reasons, the runtime does not do same checks required by the CLI spec. Instead, it relies on the CPU to do them. The two main checks which are omitted are null-pointer checks, and arithmetic checks. When a null pointer is dereferenced by JITted code, the CPU will notify the kernel through an interrupt, and the kernel will send a SIGSEGV signal to the process. The runtime installs a signal handler for SIGSEGV, which is sigsegv_signal_handler () in mini.c. The signal handler creates the appropriate exception object and calls mono_handle_exception () with it. Arithmetic exceptions like division by zero are handled similarly.
+
+### Synchronous signals in native code
+
+Receiving a signal such as SIGSEGV while in native code means something very bad has happened. Because of this, the runtime will abort after trying to print a managed plus a native stack trace. The logic is in the mono_handle_native_sigsegv () function.
+
+Note that there are two kinds of native code which can be the source of the signal:
+
+-   code inside the runtime
+-   code inside a native library loaded by an application, ie. libgtk+
+
+### Stack overflow checking
+
+Stack overflow exceptions need special handling. When a thread overflows its stack, the kernel sends it a normal SIGSEGV signal, but the signal handler tries to execute on the same stack as the thread leading to a further SIGSEGV which will terminate the thread. A solution is to use an alternative signal stack supported by UNIX operating systems through the sigaltstack (2) system call. When a thread starts up, the runtime will install an altstack using the mono_setup_altstack () function in mini-exceptions.c. When a SIGSEGV is received, the signal handler checks whenever the fault address is near the bottom of the threads normal stack. If it is, a StackOverflowException is created instead of a NullPointerException. This exception is handled like any other exception, with some minor differences.
+
+There are two reasons why sigaltstack is disabled by default:
+
+-   The main problem with sigaltstack() is that the stack employed by it is not visible to the GC and it is possible that the GC will miss it.
+
+-   Working sigaltstack support is very much os/kernel/libc dependent, so it is disabled by default.
+
+### Asynchronous signals
+
+Async signals are used by the runtime to notify a thread that it needs to change its state somehow. Currently, it is used for implementing thread abort/suspend/resume.
+
+Handling async signals correctly is a very hard problem, since the receiving thread can be in basically any state upon receipt of the signal. It can execute managed code, native code, it can hold various managed/native locks, or it can be in a process of acquiring them, it can be starting up, shutting down etc. Most of the C APIs used by the runtime are not asynch-signal safe, meaning it is not safe to call them from an async signal handler. In particular, the pthread locking functions are not async-safe, so if a signal handler interrupted code which was in the process of acquiring a lock, and the signal handler tries to acquire a lock, the thread will deadlock.
+
+When receiving an async signal, the signal handler first tries to determine whenever the thread was executing managed code when it was interrupted. If it did, then it is safe to interrupt it, so a ThreadAbortException is constructed and thrown. If the thread was executing native code, then it is generally not safe to interrupt it. In this case, the runtime sets a flag then returns from the signal handler. That flag is checked every time the runtime returns from native code to managed code, and the exception is thrown then. Also, a platform specific mechanism is used to cause the thread to interrupt any blocking operation it might be doing.
+
+The async signal handler is in sigusr1_signal_handler () in mini.c, while the logic which determines whenever an exception is safe to be thrown is in mono_thread_request_interruption ().
+
+### Stack unwinding during exception handling
+
+The execution state of a thread during exception handling is stored in an arch-specific structure called MonoContext. This structure contains the values of all the CPU registers relevant during exception handling, which usually means:
+
+-   IP (instruction pointer)
+-   SP (stack pointer)
+-   FP (frame pointer)
+-   callee saved registers
+
+Callee saved registers are the registers which are required by any procedure to be saved/restored before/after using them. They are usually defined by each platforms ABI (Application Binary Interface). For example, on x86, they are EBX, ESI and EDI.
+
+The code which calls mono_handle_exception () is required to construct the initial MonoContext. How this is done depends on the caller. For exceptions thrown from managed code, the mono_arch_throw_exception helper function saves the values of the required registers and passes them to throw_exception (), which will save them in the MonoContext structure. For exceptions thrown from signal handlers, the MonoContext stucture is initialized from the signal info received from the kernel.
+
+During exception handling, the runtime needs to 'unwind' the stack, i.e. given the state of the thread at a stack frame, construct the state at its callers. Since this is platform specific, it is done by a platform specific function called mono_arch_find_jit_info ().
+
+Two kinds of stack frames need handling:
+
+-   Managed frames are easier. The JIT will store some information about each managed method, like which callee-saved registers it uses. Based on this information, mono_arch_find_jit_info () can find the values of the registers on the thread stack, and restore them. On some platforms, the runtime now uses a generic unwinder based on the [DWARF unwinding interface](http://dwarfstd.org/Dwarf3.pdf). The generic unwinder is in the files unwind.h/unwind.c.
+
+-   Native frames are problematic, since we have no information about how to unwind through them. Some compilers generate unwind information for code, some don't. Also, there is no general purpose library to obtain and decode this unwind information. So the runtime uses a different solution. When managed code needs to call into native code, it does through a managed-\>native wrapper function, which is generated by the JIT. This function is responsible for saving the machine state into a per-thread structure called MonoLMF (Last Managed Frame). These LMF structures are stored on the threads stack, and are linked together using one of their fields. When the unwinder encounters a native frame, it simply pops one entry of the LMF 'stack', and uses it to restore the frame state to the moment before control passed to native code. In effect, all successive native frames are skipped together.
+
+### Problems/future work
+
+#### Raising exceptions from native code
+
+Currently, exceptions are raised by calling mono_raise_exception () in the middle of runtime code. This has two problems:
+
+-   No cleanup is done, ie. if the caller of the function which throws an exception has taken locks, or allocated memory, that is not cleaned up. For this reason, it is only safe to call mono_raise_exception () 'very close' to managed code, ie. in the icall functions themselves.
+
+-   To allow mono_raise_exception () to unwind through native code, we need to save the LMF structures which can add a lot of overhead even in the common case when no exception is thrown. So this is not zero-cost exception handling.
+
+An alternative might be to use a JNI style set-pending-exception API. Runtime code could call mono_set_pending_exception (), then return to its caller with an error indication allowing the caller to clean up. When execution returns to managed code, then managed-\>native wrapper could check whenever there is a pending exception and throw it if neccesary. Since we already check for pending thread interruption, this would have no overhead, allowing us to drop the LMF saving/restoring code, or significant parts of it.
+
+### libunwind
+
+There is an OSS project called libunwind which is a standalone stack unwinding library. It is currently in development, but it is used by default by gcc on ia64 for its stack unwinding. The mono runtime also uses it on ia64. It has several advantages in relation to our current unwinding code:
+
+-   it has a platform independent API, i.e. the same unwinding code can be used on multiple platforms.
+
+-   it can generate unwind tables which are correct at every instruction, i.e. can be used for unwinding from async signals.
+
+-   given sufficient unwind info generated by a C compiler, it can unwind through C code.
+
+-   most of its API is async-safe
+
+-   it implements the gcc C++ exception handling API, so in theory it can be used to implement mixed-language exception handling (i.e. C++ exception caught in mono, mono exception caught in C++).
+
+-   it is MIT licensed
+
+The biggest problem with libuwind is its platform support. ia64 support is complete/well tested, while support for other platforms is missing/incomplete.
+
+[http://www.hpl.hp.com/research/linux/libunwind/](http://www.hpl.hp.com/research/linux/libunwind/)
+
+### Architecture specific functions for EH
+
+This section contains documentation for the architecture specific functions which are needed to be implemented by each backend. These functions usually reside in the exceptions-\<ARCH\>.c file.
+
+#### mono_arch_handle_exception ()
+
+Prototype:
+
+``` bash
+gboolean
+mono_arch_handle_exception (void *ctx, gpointer obj);
+```
+
+This function is called by signal handlers. It receives the machine state as passed to the signal handlers in he CTX argument. On unix, this is an uncontext_t structure, It also receives the exception object in OBJ, which might be null. Handling exceptions in signal handlers is problematic for many reasons, so this function should set up CTX so when the signal handler returns, execution continues in another runtime function which does the real work. CTX/OBJ needs to be passed to that function. The former can be passed in TLS, while the later has to be passed in registers/on the stack (by modifying CTX), since TLS storage might not be GC tracked.
+
+[Original version of this document in git.](https://github.com/mono/mono/blob/2279f440996923ac66a6ea85cf101d89615aad69/docs/exception-handling.txt)
+
+#### mono_arch_get_restore_context ()
+
+Prototype:
+
+``` bash
+gpointer
+mono_arch_get_restore_context (MonoTrampInfo **info, gboolean aot);
+```
+
+This function should return a trampoline with the following signature:
+
+``` bash
+void restore_context (MonoContext *ctx);
+```
+
+The trampoline should set the machine state to the state in CTX, then jump to the PC in CTX. Only a subset of the state needs to be restored, i.e. the callee saved registers/sp/fp.
+
+#### mono_arch_get_call_filter ()
+
+Prototype:
+
+``` bash
+gpointer
+mono_arch_get_call_filter (MonoTrampInfo **info, gboolean aot)
+```
+
+This function should return a trampoline with the following signature:
+
+``` bash
+int call_filter (MonoContext *ctx, gpointer addr);
+```
+
+This trampoline is used to call finally and filter clauses during exception handling. It should setup a new stack frame, save callee saved registers there, restore the same registers from CTX, then make a call to ADDR, restore the saved registers, and return the result returned by the call as its result. Finally clauses need access to the method state, but they need to make calls etc too, so they execute in a nonstandard stack frame, where FP points to the original FP of the method frame, while SP is normal, i.e. it is below the frame created by call_filter (). This means that call_filter () needs to load FP from CTX, but it shouldn't load SP.
diff --git a/docs/design/mono/web/generic-sharing.md b/docs/design/mono/web/generic-sharing.md
new file mode 100644
index 000000000000..a671ac5c39ad
--- /dev/null
+++ b/docs/design/mono/web/generic-sharing.md
@@ -0,0 +1,139 @@
+# Generic Sharing
+
+Source code
+----------
+
+The code which implements generic sharing is in `mini-generic-sharing.c`. The architecture specific parts are in `mini-<arch>.c` and `tramp-<arch>.c`.
+
+RGCTX register
+--------------
+
+Generic shared code needs access to type information. This information is contained in a RGCTX for non-generic methods and in an MRGCTX for generic methods. It is passed in one of several ways, depending on the type of the called method:
+
+1.  Non-generic non-static methods of reference types have access to the RGCTX via the "this" argument (this-\>vtable-\>rgctx).
+
+2.  Non-generic static methods of reference types and non-generic methods of value types need to be passed a pointer to the caller's class's VTable in the MONO_ARCH_RGCTX_REG register.
+
+3.  Generic methods need to be passed a pointer to the MRGCTX in the `MONO_ARCH_RGCTX_REG` register.
+
+The `MONO_ARCH_RGCTX_REG` must not be clobbered by trampolines.
+
+`MONO_ARCH_RGCTX_REG` is the same as the IMT register on all platforms. The reason for this is that the RGCTX register is used to pass information to a concrete method, while the IMT register is used for indirect calls where
+the called method is not known, so the the same call doesn't use both an RGCTX and an IMT register.
+
+This register lifetime starts at the call site that loads it and ends in the callee prologue when it is either discarded or stored into a local variable.
+
+It's better to avoid registers used for argument passing for the RGCTX as it would make the code dealing with calling conventions code a lot harder.
+
+For indirect calls, the caller doesn't know the RGCTX value which needs to be passed to the callee. In this case, an 'rgctx trampoline' is used. These are small trampolines created by `mono_create_static_rgctx_trampoline()`. The caller calls the trampoline, which sets the RGCTX to the required value and jumps to the callee. These trampolines are inserted into the call chain when indirect calls are used (virtual calls, delegates, runtime invoke etc.).
+
+An alternative design would pass the rgctx as a normal parameter, which would avoid the need for an RGCTX register. The problem with this approach is that the caller might not know whenever the callee needs an RGCTX argument
+or not. I.e. the callee might be a non-shared method, or even a non-generic method (i.e. `Action<int>` can end up calling a `foo(int)` or a `foo<T> (T)` instantiated with `int`.).
+
+Method prologue
+---------------
+
+Generic shared code that have a `RGCTX` receive it in `RGCTX_REG`. There must be a check in mono_arch_emit_prolog for MonoCompile::rgctx_var and if set store it. See mini-x86.c for reference.
+
+Dealing with types
+------------------
+
+During JITting and at runtime, the generic parameters used in shared methods are represented by a `MonoGenericParam` with the `gshared_constraint` field pointing to a `MonoType` which identifies the set of types this
+generic param is constrained to. If the constraint is `object`, it means the parameter can match all reference types. If its `int`, it can match `int` and all enums whose basetype is `int` etc.
+
+Calling `mini_get_underlying_type()` on the type will return the constraint type. This is used through the JIT to handle generic parameters without needing to special case them, since for example, a generic parameter constrained to be a reference type can be handled the same way as `MONO_TYPE_OBJECT`.
+
+(M)RGCTX lazy fetch trampoline
+------------------------------
+
+The purpose of the lazy fetch trampoline is to fetch a slot from an (M)RGCTX which might not be inited, yet. In the latter case, it needs to go make a transition to unmanaged code to fill the slot. This is the layout of a RGCTX:
+
+         +---------------------------------+
+         | next | slot 0 | slot 1 | slot 2 |
+         +--|------------------------------+
+            |
+      +-----+
+      |  +---------------------------------
+      +->| next | slot 3 | slot 4 | slot 5 ....
+         +--|------------------------------
+            |
+      +-----+
+      |  +------------------------------------
+      +->| next | slot 10 | slot 11 | slot 12 ....
+         +--|---------------------------------
+            .
+            .
+            .
+
+For fetching a slot from a RGCTX the trampoline is passed a pointer (as a normal integer argument) to the VTable. From there it has to fetch the pointer to the RGCTX, which might be null. Then it has to traverse the correct number of "next" links, each of which might be NULL. Arriving at the right array it needs to fetch the slot, which might also be NULL. If any of the NULL cases, the trampoline must transition to unmanaged code to potentially setup the RGCTX and fill the slot. Here is pseudo-code for fetching slot 11:
+
+        ; vtable ptr in r1
+        ; fetch RGCTX array 0
+        r2 = *(r1 + offsetof(MonoVTable, runtime_generic_context))
+        if r2 == NULL goto unmanaged
+        ; fetch RGCTX array 1
+        r2 = *r2
+        if r2 == NULL goto unmanaged
+        ; fetch RGCTX array 2
+        r2 = *r2
+        if r2 == NULL goto unmanaged
+        ; fetch slot 11
+        r2 = *(r2 + 2 * sizeof (gpointer))
+        if r2 == NULL goto unmanaged
+        return r2
+      unmanaged:
+        jump unmanaged_fetch_code
+
+The number of slots in the arrays must be obtained from the function `mono_class_rgctx_get_array_size()`.
+
+The MRGCTX case is different in two aspects. First, the trampoline is not passed a pointer to a VTable, but a pointer directly to the MRGCTX, which is guaranteed not to be NULL (any of the next pointers and any of the slots can be NULL, though). Second, the layout of the first array is slightly different, in that the first two slots are occupied by a pointers to the class's VTable and to the method's method_inst. The next pointer is in the third slot and the first actual slot, "slot 0", in the fourth:
+
+         +--------------------------------------------------------+
+         | vtable | method_inst | next | slot 0 | slot 1 | slot 2 |
+         +-------------------------|------------------------------+
+                                   .
+                                   .
+
+All other arrays have the same layout as the RGCTX ones, except possibly for their length.
+
+The function to create the trampoline, mono_arch_create_rgctx_lazy_fetch_trampoline(), gets passed an encoded slot number. Use the macro `MONO_RGCTX_SLOT_IS_MRGCTX` to query whether a trampoline for an MRGCTX is needed, as opposed to one for a RGCTX. Use `MONO_RGCTX_SLOT_INDEX` to get the index of the slot (like 2 for "slot 2" as above). The unmanaged fetch code is yet another trampoline created via `mono_arch_create_specific_trampoline()`, of type `MONO_TRAMPOLINE_RGCTX_LAZY_FETCH`. It's given the slot number as the trampoline argument. In addition, the pointer to the VTable/MRGCTX is passed in `MONO_ARCH_VTABLE_REG` (like the VTable to the generic class init trampoline - see above).
+
+The RGCTX fetch trampoline code doesn't return code that must be jumped to, so, like for those trampolines (see above), the generic trampoline code must do a normal return instead.
+
+Getting generics information about a stack frame
+------------------------------------------------
+
+If a method is compiled with generic sharing, its `MonoJitInfo` has the `has_generic_jit_info` bit set. In that case, the `mono_jit_info_get_generic_jit_info()` function will return
+a `MonoGenericJitInfo` structure.
+
+The `MonoGenericJitInfo` contains information about the location of the this/vtable/MRGCTX variable, if the `has_this` flag is set. If that is the case, there are two possibilities:
+
+1.  `this_in_reg` is set. `this_reg` is the number of the register where the variable is stored.
+
+2.  `this_in_reg` is not set. The variable is stored at offset `this_offset` from the address in the register with number `this_reg`.
+
+The variable can either point to the "this" object, to a vtable or to an MRGCTX:
+
+1.  If the method is a non-generic non-static method of a reference type, the variable points to the "this" object.
+
+2.  If the method is a non-generic static method or a non-generic method of a value type, the variable points to the vtable of the class.
+
+3.  If the method is a generic method, the variable points to the MRGCTX of the method.
+
+Layout of the MRGCTX
+--------------------
+
+The MRGCTX is a structure that starts with `MonoMethodRuntimeGenericContext`, which contains a pointer to the vtable of the class and a pointer to the `MonoGenericInst` with the type arguments for the method.
+
+Blog posts about generic code sharing
+-------------------------------------
+
+-   [September 2007: Generics Sharing in Mono](http://schani.wordpress.com/2007/09/22/generics-sharing-in-mono/)
+-   [October 2007: The Trouble with Shared Generics](http://schani.wordpress.com/2007/10/12/the-trouble-with-shared-generics/)
+-   [October 2007: A Quick Generics Sharing Update](http://schani.wordpress.com/2007/10/15/a-quick-generics-sharing-update/)
+-   [January 2008: Other Types](http://schani.wordpress.com/2008/01/29/other-types/)
+-   [February 2008: Generic Types Are Lazy](http://schani.wordpress.com/2008/02/25/generic-types-are-lazy/)
+-   [March 2008: Sharing Static Methods](http://schani.wordpress.com/2008/03/10/sharing-static-methods/)
+-   [April 2008: Sharing Everything And Saving Memory](http://schani.wordpress.com/2008/04/22/sharing-everything-and-saving-memory/)
+-   [June 2008: Sharing Generic Methods](http://schani.wordpress.com/2008/06/02/sharing-generic-methods/)
+-   [June 2008: Another Generic Sharing Update](http://schani.wordpress.com/2008/06/27/another-generic-sharing-update/)
diff --git a/docs/design/mono/web/generics.md b/docs/design/mono/web/generics.md
new file mode 100644
index 000000000000..f2032da1f448
--- /dev/null
+++ b/docs/design/mono/web/generics.md
@@ -0,0 +1,58 @@
+# Generics
+
+Terminology
+-----------
+
+Type/Method instantiation == Type/Method instance == Inflated Type/Method.
+
+Generic Type Definitions
+------------------------
+
+These are represented by a normal `MonoClass` structure with the `generic_container` field set. This field points to a `MonoGenericContainer` structure, which stores information about the generic parameters of the generic type.
+
+Generic Type Instantiations
+---------------------------
+
+These are represented by a pair of `MonoGenericClass` and `MonoClass` structures. The `generic_class` field in MonoClass is used to link the two together. The reason for the split is to avoid allocating a large MonoClass if not needed.
+
+It would have been better to name `MonoGenericClass` `MonoInflatedClass` or something similar.
+
+Generic Method Definitions
+--------------------------
+
+These are represented by a `MonoMethod` structure with the `is_generic` field set to 1.
+
+Generic Method Instantiations
+-----------------------------
+
+These are represented by a `MonoMethodInflated` structure, which is an extension of the `MonoMethod` structure. Its `is_inflated` field is set to 1.
+
+One consequence of this design is that a method cannot be a pinvoke method/wrapper/dynamic method and an inflated method at the same time.
+
+MonoGenericContext
+------------------
+
+This structure holds information of an instantiation of a set of generic parameters with generic arguments. It is used by both type and method instatiations.
+
+Canonical generic instances
+---------------------------
+
+The runtime canonizes generic type/method instances, so for every set of generic arguments, there is only one type/method instance with those arguments. This is using caches in `metadata.c`.
+
+Lifetime of inflated types/methods
+----------------------------------
+
+Inflated types and methods depend on the assembly of the generic type/method definition they are inflated from, along with the assemblies of their generic arguments. This is handled using the concept of 'image sets' in metadata.c. Every inflated type/method belongs to an image set, which is a set of MonoImages. When one of the assemblies in an image set is unloaded, all the inflated types/methods belonging to the image set are freed. Memory for inflated types/methods cannot be allocated from mempools, it is allocated from the heap. The `mono_class_alloc/alloc0` functions can be used to allocate memory from the appropriate place.
+
+System.Reflection.Emit
+----------------------
+
+Generics support in System.Reflection.Emit (SRE) is very problematic because it is possible to create generic instances of not yet created dynamic types, i.e. if T is a generic TypeBuilder, it is possible to create T\<int\>. The latter is not a TypeBuilder any more, but a normal Type, which presents several problems:
+
+-   this type needs to be kept in sync with the original TypeBuilder, i.e. if methods/fields are added to the TypeBuilder, this should be reflected in the instantiation.
+-   this type cannot be used normally until its TypeBuilder is finished, ie. its not possible to create instances of it etc.
+
+These problems are currently handled by a hierarchy of C# classes which inherit from the normal reflection classes:
+
+-   `MonoGenericClass` represents an instantiation of a generic TypeBuilder. MS.NET calls this `TypeBuilderInstantiation`, a much better name.
+-   `Method/Field/Event/PropertyOnTypeBuilderInst` represents a method/field etc. of a `MonoGenericClass`.
diff --git a/docs/design/mono/web/glossary.md b/docs/design/mono/web/glossary.md
new file mode 100644
index 000000000000..46b95a919051
--- /dev/null
+++ b/docs/design/mono/web/glossary.md
@@ -0,0 +1,15 @@
+# Glossary
+
+This is a glossary of terms/abbreviations used in the runtime source code
+-------------------------------------------------------------------------
+
+-   AOT - Ahead of Time Compiler
+-   EH - Exception Handling
+-   GC - Garbage Collector
+-   JIT - Just In Time Compiler
+-   Boehm - The Boehm Conservative Garbage Collector
+-   trampoline - A function implemented using hand written assembly code. It is usually called from JITted code.
+-   SGEN - Mono's own generational garbage collector.
+-   SRE - System.Reflection.Emit
+-   vt - Valuetype
+-   vtype - Valuetype
diff --git a/docs/design/mono/web/gsharedvt.md b/docs/design/mono/web/gsharedvt.md
new file mode 100644
index 000000000000..f91d4dfd7de3
--- /dev/null
+++ b/docs/design/mono/web/gsharedvt.md
@@ -0,0 +1,195 @@
+# Generic sharing for valuetypes
+
+## The problem
+
+In some environments like ios, its not possible to generate native code at runtime. This means that we have to compile all possible methods used by the application at compilation time. For generic methods, this is not always possible, i.e.:
+
+``` c
+interface IFace {
+    void foo<T> (T t);
+}
+
+class Class1 : IFace {
+    public virtual void foo<T> (T t) {
+      ...
+    }
+}
+
+IFace o = new Class1 ();
+o.foo<int> ();
+```
+
+In this particular case, it is very hard to determine at compile time that `Class1:foo<int>` will be needed at runtime. For generic methods instantiated with reference types, the mono runtime supports 'generic sharing'.
+
+This means that we only compile one version of the method, and use it for all instantiations made with reference types, i.e. `Array.Sort<string>` and `Array.Sort<object>` is actually the same native method at runtime. Generating native code for generic shared methods is not very complex since all reference types have the same size: 1 word.
+
+In order to extend generic sharing to valuetypes, we need to solve many problems. Take the following method:
+
+``` c
+void swap<T> (T[] a, int i, int j)
+{
+   var t = a [i];
+   a [i] = a [j];
+   a [j] = t;
+}
+```
+
+Here, the size of 'T' is only known at runtime, so we don't know how much stack space to allocate for 't', or how much memory to copy from a \[i\] to t in the first assignment.
+
+For methods which contain their type parameters in their signatures, the situation is even more complex:
+
+``` c
+public T return_t<T> (T t) {
+    return t;
+}
+```
+
+Here, the native signature of the method depends on its type parameter. One caller might call this as `return_t<int> (1)`, passing in an int in one register, and expecting the result to be in the return register, while another might call this with a struct, passing it in registers and/or the stack, and expecting the result to be in a memory area whose address was passed in as an extra hidden parameter.
+
+## Basic implementation
+
+### Inside methods
+
+We refer to types which are type variables, or generic instances instantiated with type variables as 'gsharedvt types'. Types whose size depends on type variables are referred as 'variable types'. Since the size of variable types is only known at runtime, we cannot allocate static stack slots for them. Instead, we allocate a stack area for them at runtime using localloc, and dynamically compute their address when needed. The information required for this is stored in a `MonoGSharedVtMethodRuntimeInfo` structure. This structure is stored in an rgctx slot. At the start of the method, the following pseudo code is used to initialize the locals area:
+
+``` c
+info_var = rgctx_fetch(<METHOD GSHAREDVT INFO>)
+locals_var = localloc (info_var->locals_size)
+```
+
+Whenever an address of a variable sized locals is required, its computed using:
+
+``` c
+locals_var + info_var->locals_offsets [<local idx>]
+```
+
+Local variables are initialized using memset, and copied using memcpy. The size of the locals is fetched from the rgctx. So
+
+``` c
+T a = b;
+```
+
+is compiled to:
+
+``` c
+a_addr = locals_var + info_var->locals_offsets [<a idx>]
+b_addr = locals_var + info_var->locals_offsets [<b idx>]
+size = rgctx_fetch(<T size>)
+memcpy(a_addr, b_addr, size)
+```
+
+Methods complied with this type of sharing are called 'gsharedvt' methods.
+
+### Calling gsharedvt methods
+
+GSharedvt methods whose signature includes variable types use a different calling convention where gsharedvt arguments are passed by ref.
+
+``` c
+foo(int,int,int,T)
+```
+
+is called using:
+
+``` c
+foo(inti,int,int,T&)
+```
+
+The return value is returned using the same calling convention used to return large structures, i.e. by passing a hidden parameter pointing to a memory area where the method is expected to store the return value.
+
+When a call is made to a generic method from a normal method, the caller uses a signature with concrete types, i.e.: `return_t<int> (1)`. If the callee is also a normal method, then there is no further work needed. However, if the callee is a gsharedvt method, then we have to transition between the signature used by the caller (int (int) in this case), and the signature used by the callee . This process is very low level and architecture specific.
+
+It typically involves reordering values in registers, stack slots etc. It is done by a trampoline called the gsharedvt trampoline. The trampoline receives a pointer to an info structure which describes the calling convention used by the caller and the callee, and the steps needed to transition between the two. The info structure is not passed by the caller, so we use another trampoline to pass the info structure to the trampoline:
+
+So a call goes:
+
+``` c
+<caller> -> <gsharedvt arg trampoline> -> <gsharedvt trampoline> -> <callee>
+```
+
+The same is true in the reverse case, i.e. when the caller is a gsharedvt method, and the callee is a normal method.
+
+The info structure contains everything need to transfer arguments and make the call, this includes:
+
+-   the callee address.
+-   an rgctx to pass to the callee.
+-   a mapping for registers and stack slots.
+-   whenever this in an 'in' or 'out' case.
+-   etc.
+
+As an example, here is what happens for the `return_t<int>` case on ARM:
+
+-   The caller passes in the argument in r0, and expects the return value to be in r0.
+
+-   The callee receives the address of the int value in r1, and it receives the valuetype return address in r0.
+
+Here is the calling sequence:
+
+-   The caller puts the value 1 in r0, then makes the call, which goes to the trampoline code.
+
+-   The trampoline infrastructure detects that the call needs a gsharedvt trampoline. It computes the info structure holding the calling convention information, then creates a gsharedvt arg trampoline for it.
+
+-   The gsharedvt arg trampoline is called, which calls the gsharedvt trampoline, passing the info structure as an argument.
+
+-   The trampoline allocates a new stack frame, along with a 1 word area to hold the return value.
+
+-   It receives the parameter value in r0, saves it into one of its stack slots, and passes the address of the stack slot in r1.
+
+-   It puts the address of the return value into r0.
+
+-   It calls the gsharedvt method.
+
+-   The method copies the memory pointed to by r1 to the memory pointed to by r0, and returns to the trampoline.
+
+-   The trampoline loads the return value from the return value area into r0 and returns to the caller.
+
+-   The caller receives the return value in r0.
+
+For exception handling purposes, we create a wrapper method for the gsharedvt trampoline, so it shows up in stack traces, and the unwind code can unwind through it. There are two kinds of wrappers, 'in' and 'out'. 'in' wrappers handle calls made to gsharedvt methods from callers which use a variable signature, while 'out' wrappers handle calls made to normal methods from callers which use a variable signature. In later parts of this document, we use the term 'wrapper' to mean a gsharedvt arg trampoline.
+
+### Making calls out of gsharedvt methods
+
+#### Normal calls using a non-variable signature
+
+These are handed normally.
+
+#### Direct calls made using a variable signature
+
+These have several problems:
+
+-   The callee might end up being a gsharedvt or a non-gsharedvt method. The former doesn't need a wrapper, the latter does.
+
+-   The wrapper needs to do different things for different instantiations. This means that the call cannot be patched to go to a wrapper, since the wrapper is specific to one instantiation.
+
+To solve these problems, we make an indirect call through an rgctx entry. The rgctx entry resolver code determines what wrapper is needed, and patches the rgctx entry with the address of the wrapper, so later calls made from the gsharedvt method with the same instantiation will go straight to the wrapper.
+
+#### Virtual calls made using a variable signature
+
+Virtual methods have an extra complexity: there is only one vtable entry for a method, and it can be called by both normal and gsharedvt code. To solve this, when a virtual method is compiled as gsharedvt, we put an 'in' wrapper around it, and put the address of this wrapper into the vtable slot, instead of the method code. The virtual call will add an 'out' wrapper, so the call sequence will be:
+
+``` c
+<caller> -> <out wrapper> -> <in wrapper> -> <callee>
+```
+
+## AOT support
+
+We AOT a gsharedvt version of every generic method, and use it at runtime if the specific instantiation of a method is not found. We also save the gsharedvt trampoline to the mscorlib AOT image, along with a bunch of gsharedvt arg trampolines.
+
+## Implementation details
+
+The gsharedvt version of a method is represented by inflating the method with type parameters, just like in the normal gshared case. To distinguish between the two, we use anon generic parameters whose `gshared_constraint` field is set to point to a valuetype.
+
+Relevant files/functions include:
+
+-   `method-to-ir.c`:
+-   `mini-generic-sharing.c`: `instantiate_info ()`: This contains the code which handles calls made from gsharedvt methods through an rgctx entry.
+-   `mini-trampolines.c` `mini_add_method_trampolines ()`: This contains the code which handles calls made from normal methods to gsharedvt methods.
+-   `mini-<ARCH>-gsharedvt.c`: `mono_arch_get_gsharedvt_call_info ()`: This returns the arch specific info structure passed to the gsharedvt trampoline.
+-   `tramp-<ARCH>-gsharedvt.c`: `mono_arch_get_gsharedvt_trampoline ()`: This creates the gsharedvt trampoline. `mono_aot_get_gsharedvt_arg_trampoline ()`: This returns a gsharedvt arg trampoline which calls the gsharedvt trampoline passing in the info structure in an arch specific way.
+
+## Possible future work
+
+-   Optimizations:
+    -   Allocate the `info_var` and `locals_var` into registers.
+    -   Put more information into the info structure, to avoid rgctx fetch calls.
+    -   For calls made between gsharedvt methods, we add both an out and an in wrapper. This needs to be optimized so we only uses one wrapper in more cases, or create a more generalized wrapper, which can function as both an out and an in wrapper at the same time.
+-   The AOT complier tries to compile every instantiation which can be used at runtime. This leads to a lot of instantiations which are never used, and take up a lot of space. We might want to avoid generating some of these instantiations and use their gsharedvt versions instead. This is particularly true for methods where using the gsharedvt version might mean very little or no overhead.
diff --git a/docs/design/mono/web/images/0911030528Mp6F5SHL.png b/docs/design/mono/web/images/0911030528Mp6F5SHL.png
new file mode 100644
index 000000000000..3f9d60715c2b
Binary files /dev/null and b/docs/design/mono/web/images/0911030528Mp6F5SHL.png differ
diff --git a/docs/design/mono/web/images/coop-state-machine.png b/docs/design/mono/web/images/coop-state-machine.png
new file mode 100644
index 000000000000..9d9596e96735
Binary files /dev/null and b/docs/design/mono/web/images/coop-state-machine.png differ
diff --git a/docs/design/mono/web/images/igv-diff.png b/docs/design/mono/web/images/igv-diff.png
new file mode 100644
index 000000000000..61cba0025e6d
Binary files /dev/null and b/docs/design/mono/web/images/igv-diff.png differ
diff --git a/docs/design/mono/web/images/igv-screenshot.png b/docs/design/mono/web/images/igv-screenshot.png
new file mode 100644
index 000000000000..a14d97161bc9
Binary files /dev/null and b/docs/design/mono/web/images/igv-screenshot.png differ
diff --git a/docs/design/mono/web/linear-ir.md b/docs/design/mono/web/linear-ir.md
new file mode 100644
index 000000000000..af650f57a9c7
--- /dev/null
+++ b/docs/design/mono/web/linear-ir.md
@@ -0,0 +1,318 @@
+# Linear IR
+
+This document describes Mono's new JIT engine based on a rewrite to use a linear Intermediate Representation instead of the tree-based intermediate representation that was used up to Mono 2.0.
+
+You might also want to check [Mono's Runtime Documentation](/docs/advanced/runtime/docs/).
+
+Intermediate Representation (IR)
+--------------------------------
+
+The IR used by the JIT is standard three address code:
+
+OP dreg \<- sreg1 sreg2
+
+Here dreg, sreg1, sreg2 are virtual registers (vregs). OP is an opcode. For example:
+
+    int_add R5 <- R6 R7
+
+### Opcodes
+
+The opcodes used by the JIT are defined in the [mini-ops.h](https://github.com/mono/mono/blob/main/mono/mini/mini-ops.h) file. Each opcode has a value which is a C constant, a name, and some metadata containing information about the opcode like the type of its arguments and its return value. An example:
+
+    MINI_OP(OP_IADD, "int_add", IREG, IREG, IREG)
+
+The opcodes conform to the following naming conventions:
+
+-   CEE\_... opcodes are the original opcodes defined in the IL stream. The first pass of the JIT transforms these opcodes to the corresponding OP\_ opcodes so CEE\_ opcodes do not occur in the intermediate code. Correspondingly, they have no opcode metadata, and are not listed in mini-ops.h.
+-   OP_\<XX\> opcodes are either size agnostic, like OP_THROW, or operate on the natural pointer size of the machine, ie. OP_ADD adds two pointer size integers.
+-   OP_I\<XXX\> opcodes work on 32 bit integers, ie. vregs of type STACK_I4.
+-   OP_L\<XXX\> opcodes work on 64 bit integers, ie. vregs of type STACK_I8.
+-   OP_F\<XXX\> opcodes work on 64 bit floats, i.e. vregs of type STACK_R8.
+-   OP_V\<XXX\> opcodes work on valuetypes.
+-   OP_P\<XXX\> opcodes are macros which map to either OP_I\<XXX\> or OP_L\<XXX\> opcodes depending on whenever the architecture is 32 or 64 bits.
+
+### High/low level IR
+
+\<......\>
+
+### Representation of IR instructions
+
+Each IR instruction is represented by a MonoInst structure. The fields of the structure are used as follows:
+
+-   ins-\>opcode contains the opcode of the instruction. It is always set.
+
+-   ins-\>dreg, ins-\>sreg1, ins-\>sreg2 contain the the destination and source vregs of the instruction. If the instruction doesn't have a destination/and our source, the corresponding field is set to -1.
+
+-   ins-\>backend is used for various purposes:
+    -   for MonoInst's representing vtype variables, it indicates that the variable is in unmanaged format (used during marshalling)
+    -   instructions which operate on a register pair use it for storing the third input register of the instruction.
+    -   some opcodes, like X86_LEA use it for storing auxiliary information
+
+-   ins-\>next and ins-\>prev are used for linking the instructions.
+
+-   ins-\>ssa_op -\> not used anymore
+
+-   ins-\>cil_code -\> Points to the IL instructions this ins belongs to. Used for generating native offset-\> IL offset maps for debugging support.
+
+-   ins-\>flags is used for storing various flags
+
+-   ins-\>type and ins-\>klass contain type information for the result of the instruction. These fields are only used during the method_to_ir () pass.
+
+In addition to the fields above, each MonoInst structure contains two pointer sized fields which can be used by the instruction for storing arbitrary data. They can be accessed using a set of inst_\<XXX\> macros.
+
+Some guidelines for their usage are as follows:
+
+-   OP_\<XXX\>_IMM macros store their immediate argument in inst_imm.
+-   OP_\<XXX\>_MEMBASE macros store the basereg in inst_basereg (sreg1), and the displacement in inst_offset.
+-   OP_STORE\<XXX\>_MEMBASE macros store the basereg in inst_destbasereg (dreg), and the displacement in inst_offset. This has historical reasons since the dreg is not modified by the instruction.
+
+Virtual Registers (Vregs)
+-------------------------
+
+All IR instructions work on vregs. A vreg is identified by an index. A vreg also has a type, which is one of the MonoStackType enumeration values. This type is implicit, i.e. it is not stored anywhere. Rather, the type can be deduced from the opcodes which work on the vreg, i.e. the arguments of the OP_IADD opcode are of type STACK_I4.
+
+There are two types of vregs used inside the JIT: Local and Global. They have the following differences:
+
+### Local Vregs (lvreg)
+
+-   are local to a basic block
+-   are lightweight: allocating an lvreg is equivalent to increasing a counter, and they don't consume any memory.
+-   some optimization passes like local_deadce operate only on local vregs
+-   local vregs are assigned to hard registers (hregs) by the local register allocator. They do not participate in liveness analysis, and in global register allocation.
+-   they have no address, i.e. it is not possible to take their address
+-   they cannot be volatile
+
+### Global Vregs
+
+-   are heavyweight: allocating them is slower, and they consume memory. Each global vreg has an entry in the cfg-\>varinfo and cfg-\>vars arrays.
+-   global vregs are either allocated to hard registers during global register allocation, or are allocated to stack slots.
+-   they have an address, so it is possible to apply the LDADDR operator to them.
+-   The mapping between global vregs and their associated entry in the cfg-\>varinfo array is done by the cfg-\>vreg_to_inst array. There is a macro called get_vreg_to_inst () which indexes into this array. A vreg vr is global if get_vreg_to_inst (cfg, vr) returns non NULL.
+
+### Motivation
+
+The JIT allocates a large number of vregs. Most of these are created during the MSIL-\>IR phase, and represent the contents of the evaluation stack. By treating these vregs specially, we don't need to allocate memory for them, and don't need to include them in expensive optimization passes like liveness analysis. Also, lvregs enable the use of local versions of classic optimization passes, like copy/constant propagation and dead code elimination, which are much faster than their global counterparts, and thus can be included in the default optimization set of a JIT compiler.
+
+### Transitioning between the two states
+
+-   Most vregs start out being local. Others, like the ones representing the arguments and locals of a method, start out being global.
+-   Some transformations done by the JIT can break the invariant that an lvreg is local to a basic block. There is a separate pass, mono_handle_global_vregs (), which verifies this invariant and transforms lvregs into global vregs if neccesary. This pass also does the opposite transformation, by transforming global vregs used only in one bblock into an lvreg.
+-   If an address of a vreg needs to be taken, the vreg is transformed into a global vreg.
+
+JIT Passes
+----------
+
+### Method-to-IR
+
+This is the first pass of the JIT, and also the largest. Its job is to convert the IL code of the method to our intermediate representation. Complex opcodes like isinst are decomposed immediately. It also performs verification in parallel. The code is in the function method_to_ir () in method-to-ir.c.
+
+### Decompose-Long-Opts
+
+This pass is responsible for decomposing instructions operating on longs on 32 bit platforms as described in the section 'Handling longs on 32 bit machines'. This pass changes the CFG of the method by introducing new bblocks. It resides in the mono_decompose_long_opts () function in decompose.c.
+
+### Local Copy/Constant Propagation
+
+This pass performs copy and constant propagation on single bblocks. It works by making a linear pass over the instructions inside a basic block, remembering the instruction where each vreg was defined, and using this knowledge to replace references to vregs by their definition if possible. It resides in the mono_local_cprop2 () function in local-propagation.c. This pass can run anytime. Currently, it is executed twice:
+
+-   Just after the method-to-ir pass to clean up the many redundant copies generated during the initial conversion to IR.
+-   After the spill-global-vars pass to optimize the loads/stores created by that pass.
+
+### Branch Optimizations
+
+This pass performs a variety of simple branch optimizations. It resides in the optimize_branches () function in mini.c.
+
+This pass runs after local-cprop since it can use the transformations generated in that pass to eliminate conditional branches.
+
+### Handle-Global-Vregs
+
+This pass is responsible for promoting vregs used in more than one basic block into global vregs. It can also do the opposite transformation, i.e. it can denote global vregs used in only one basic block into local ones. It resides in the mono_handle_global_vregs () function in method-to-ir.c.
+
+This pass must be run before passes that need to distinguish between global and local vregs, i.e. local-deadce.
+
+### Local Dead Code Elimination
+
+This pass performs dead code elimination on single basic blocks. The instructions inside a basic block are processed in reverse order, and instructions whose target is a local vreg which is not used later in the bblock are eliminated.
+
+This pass mostly exists to get rid of the instructions made unnecessary by the local-cprop pass.
+
+This pass must be run after the handle-global-vregs pass since it needs to distinguish between global and local vregs.
+
+### Decompose VType Opts
+
+This pass is responsible for decomposing valuetype operations into simpler operations, as described in the section 'Handling valuetypes'. It resides in the mono_decompose_vtype_opts () function in decompose.c.
+
+This pass can be run anytime, but it should be run as late as possible to enable vtype opcodes to be optimized by the local and SSA optimizations.
+
+### SSA Optimizations
+
+These optimizations consists of:
+
+-   transformation of the IR to SSA form
+-   optimizations: deadce, copy/constant propagation
+-   transformation out of SSA form
+
+### Liveness Analysis
+
+This pass is responsible for calculating the liveness intervals for all global vregs using a classical backward dataflow analysis. It is usually the most expensive pass of the JIT especially for large methods with lots of variables and basic blocks. It resides in the liveness.c file.
+
+### Global Register Allocation
+
+This pass is responsible for allocating some vregs to one of the hard registers available for global register allocation. It uses a linear scan algorithm. It resides in the linear-scan.c file.
+
+### Allocate Vars
+
+This arch-specific function is responsible for allocating all variables (or global vregs) to either a hard reg (as determined during global register allocation) or to a stack location. It depends on the mono_allocate_stack_slots () function to allocate stack slots using a linear scan algorithm.
+
+### Spill Global Vars
+
+This pass is responsible for processing global vregs in the IR. Vregs which are assigned to hard registers are replaced with the given registers. Vregs which are assigned to stack slots are replaced by local vregs and loads/stores are generated between the local vreg and the stack location. In addition, this pass also performs some optimizations to minimalize the number of loads/stores added, and to fold them into the instructions themselves on x86/amd64. It resides in the mono_spill_global_vars () function in method-to-ir.c.
+
+This pass must be run after the allocate_vars () pass.
+
+Handling longs on 32 bit machines
+---------------------------------
+
+On 32 bit platforms like x86, the JIT needs to decompose opcodes operating on longs into opcodes operating on ints. This is done as follows:
+
+-   When a vreg of type 'long' is allocated, two consecutive vregs of type 'int' are allocated. These two vregs represent the most significant and less-significant word of the long value.
+-   In the decompose-long-opts pass, all opcodes operating on longs are replaced with opcodes operating on the component vregs of the original long vregs. I.e.
+
+<!-- -->
+
+      R11 <- LOR R21 R31
+     is replaced with:
+      R12 <- IOR R22 R32
+      R13 <- IOR R23 R33
+
+-   Some opcodes, like OP_LCALL can't be decomposed so they are retained in the IR. This leads to some complexity since other parts of the JIT has to be prepared to deal with long vregs.
+
+Handling valuetypes
+-------------------
+
+Valuetypes are first class citizens in the IR, i.e. there are opcodes operating on valuetypes, there are vtype vregs etc. This is done to allow the local and SSA optimizations to be able to work on valuetypes too, and to simplify other parts of the JIT. The decompose-vtype-opts pass is responsible for decomposing vtype opcodes into simpler ones. One of the most common operations on valuetypes is taking their address. Taking the address of a variable causes it to be ignored by most optimizations, so the JIT tries to avoid it if possible, for example using a VZERO opcode for initialization instead of LDADDR+INITOBJ etc. LDADDR opcodes are generated during the decompose-vtype-opts pass, but that pass is executed after all the other optimizations, so it is no longer a problem. Another complication is the fact the vregs have no type, which means that vtype opcodes have to have their ins-\>klass fields filled in to indicate the type which they operate on.
+
+Porting an existing backend to the new IR
+-----------------------------------------
+
+-   Add the following new functions:
+    -   mono_arch_emit_call (). Same as mono_arch_call_opcode (), but emits IR for pushing arguments to the stack. All the stuff in mono_arch_emit_this_vret_args () should be done in emit_call () too.
+    -   mono_arch_emit_outarg_vt (). Emits IR to push a vtype to the stack
+    -   mono_arch_emit_setret (). Emits IR to move its argument to the proper return register
+    -   mono_arch_emit_inst_for_method (). Same as mono_arch_get_inst_for_method, but also emits the instructions.
+
+-   Add new opcodes to cpu-\<ARCH\>.md and mono_arch_output_basic_block ():
+    -   dummy_use, dummy_store, not_reached
+    -   long_bCC and long_cCC opcodes
+    -   cond_exc_iCC opcodes
+    -   lcompare_imm == op_compare_imm
+    -   int_neg == neg
+    -   int_not == not
+    -   int_convXX == conv.iXX
+    -   op_jump_table
+    -   long_add == cee_add (on 64 bit platforms)
+    -   op_start_handler, op_endfinally, op_endfilter
+-   In mono_arch_create_vars, when the result is a valuetype, it needs to create a new variable to represent the hidden argument holding the vtype return address and store this variable into cfg-\>vret_addr.
+-   Also, in mono_arch_allocate_vars, when the result is a valuetype, it needs to setup cfg-\>vret_addr instead of cfg-\>ret.
+
+For more info, compare the already converted backends like x86/amd64/ia64 with their original versions in HEAD. For example: [[1]](https://lists.dot.net/pipermail/mono-patches/2006-April/073170.html)
+
+Benchmark results
+-----------------
+
+All the benchmarks were run on an amd64 machine in 64 bit mode.
+
+-   pnetmark score:
+
+<!-- -->
+
+        current JIT: 19725
+        linear IR: 24970 (25% faster)
+
+-   mini/bench.exe:
+
+<!-- -->
+
+        current JIT: 2.183 secs
+        linear IR: 1.972 secs (10% faster)
+
+-   corlib 2.0 compile:
+
+<!-- -->
+
+        current JIT: 9.421 secs
+        linear IR: 9.223 secs (2% faster)
+
+-   ziptest.exe from [https://bugzilla.novell.com/show_bug.cgi?id=342190](https://bugzilla.novell.com/show_bug.cgi?id=342190) on the zerofile.bin input file:
+
+<!-- -->
+
+        current JIT: 18.648 secs
+        linear IR: 9.934 secs (50% faster)
+
+-   decimal arithmetic benchmark from [https://lists.dot.net/pipermail/mono-devel-list/2008-May/028061.html](https://lists.dot.net/pipermail/mono-devel-list/2008-May/028061.html):
+
+<!-- -->
+
+       current JIT:
+         addition 3774.094 ms
+         substraction 3644.657 ms
+         multiplication 2959.355 ms
+         division 61897.441 ms
+       linear IR:
+         addition 3096.526 ms
+         substraction 3065.364 ms
+         multiplication 2270.676 ms
+         division 60514.169 ms
+
+-   IronPython pystone.py 5000000 iterations:
+
+<!-- -->
+
+       current JIT: 69255.7 pystones/second
+       linear IR: 83187.8 pystones/second (20% faster)
+
+ All the code size tests were measured using `mono --stats --compile-all <ASSEMBLY NAME>`
+
+-   corlib 1.0 native code size:
+
+<!-- -->
+
+        current JIT: 2100173 bytes
+        linear IR: 1851966 bytes (12% smaller)
+
+-   mcs.exe native code size:
+
+<!-- -->
+
+        current JIT: 1382372 bytes
+        linear IR: 1233707 bytes (11% smaller)
+
+-   all 1.0 assemblies combined:
+
+<!-- -->
+
+        current JIT: 15816800 bytes
+        linear IR: 12774991 bytes (20% smaller)
+
+Improvements compared to the Mono 1.x and Mono 2.0 JITs
+-------------------------------------------------------
+
+-   The old JIT used trees as its internal representation, and the only thing which was easy with trees was code generation, everything else is hard. With the linear IR, most things are easy, and only a few things are hard, like optimizations which transform multiple operations into one, like transforming a load+operation+store into an operation taking a memory operand on x86.
+
+-   Since there is only one IR instead of two, the new JIT is (hopefully) easier to understand and modify.
+
+-   There is an if-conversion pass which can convert simple if-then-else statements to predicated instructions on x86/64, eliminating branches.
+
+-   Due to various changes, the ABCREM pass can eliminate about twice as many array bound checks in corlib as the current JIT. It was also extended to eliminate redundant null checks.
+
+-   Handling of valuetypes is drastically improved, including:
+    -   allowing most optimization passes like constant and copy propagation to work on valuetypes.
+    -   elimination of redundant initialization code inserted because of the initlocals flag.
+    -   elimination of many redundant copies when the result of a call is passed as an argument to another call.
+    -   passing and returning small valuetypes in registers on x86/amd64.
+
+-   Due to the elimination of the tree format, it is much easier to generate IR code for complex IL instructions. Some things, like branches, which are almost impossible to generate in the current JIT in the method_to_ir () pass, can be generated easily.
+
+-   The handling of soft-float on ARM is done in a separate pass instead of in a miriad places, hopefully getting rid of bugs in this area.
+
+-   In the old representation the tree to code transformations were easy only if the "expression" to transform was represented as a tree. If, for some reason, the operation was "linearized", using local variables as intermediate results instead of the tree nodes, then the optimization simply did not take place. Or the jit developer had to code twice: once for the tree case and once for the "linear" case.
diff --git a/docs/design/mono/web/llvm-backend.md b/docs/design/mono/web/llvm-backend.md
new file mode 100644
index 000000000000..4fcb4810e2bb
--- /dev/null
+++ b/docs/design/mono/web/llvm-backend.md
@@ -0,0 +1,220 @@
+# LLVM Backend
+
+Mono includes a backend which compiles methods to native code using LLVM instead of the built in JIT.
+
+Usage
+-----
+
+The back end requires the usage of our LLVM fork/branches, see 'The LLVM Mono Branch' section below.
+
+The llvm back end can be enabled by passing `--enable-llvm=yes` or `--with-llvm=<llvm prefix>` to configure.
+
+Platform support
+---------------
+
+LLVM is currently supported on x86, amd64, arm and arm64.
+
+Architecture
+------------
+
+The backend works as follows:
+
+-   first, normal mono JIT IR is generated from the IL code
+-   the IR is transformed to SSA form
+-   the IR is converted to the LLVM IR
+-   the LLVM IR is compiled by LLVM into native code
+
+LLVM is accessed through the LLVM C binding.
+
+The backend doesn't currently support all IL features, like vararg calls. Methods using such features are compiled using the normal mono JIT. Thus LLVM compiled and JITted code can coexist in the same process.
+
+Sources
+-------
+
+The backend is in the files mini-llvm.c and mini-llvm-cpp.cpp. The former contains the bulk of the backend, while the latter contains c++ code which is needed because of deficiencies in the LLVM C binding which the backend uses.
+
+The LLVM Mono Branch
+--------------------
+
+We maintain a fork/branch of LLVM with various changes to enable better integration with mono. The repo is at:
+
+[https://github.com/dotnet/llvm-project](https://github.com/dotnet/llvm-project)
+
+The LLVM backend is currently only supported when using this version of LLVM. When using this version, it can compile about 99% of mscorlib methods.
+
+### Changes relative to stock LLVM
+
+The branch currently contains the following changes:
+
+-   additional mono specific calling conventions.
+-   support for loads/stores which can fault using LLVM intrinsics.
+-   support for saving the stack locations of some variables into the exception handling info emitted by LLVM.
+-   support for stores into TLS on x86.
+-   the LLVM version string is changed to signal that this is a branch, i.e. it looks like "2.8svn-mono".
+-   workarounds to force LLVM to generate direct calls on amd64.
+-   support for passing a blockaddress value as a parameter.
+-   emission of EH/unwind info in a mono-specific compact format.
+
+The changes consist of about 1.5k lines of code. The majority of this is the EH table emission.
+
+### Branches
+
+-   `release/6.x` and `release/9.x` contain our changes
+
+### Maintaining the repository
+
+The `release/*` branches are maintained by regularly rebasing them on top of upstream. This makes examining our changes easier. To merge changes from upstream to this repo, do:
+
+``` bash
+git remote add upstream https://github.com/llvm/llvm-project.git
+git fetch upstream
+git rebase upstream/<target branch>
+<fix conflicts/commit>
+git push origin
+```
+
+Due to the rapid pace of development, and the frequent reorganization/refactoring of LLVM code, merge conflicts are pretty common, so maintaining our fork is time consuming. A subset of our changes can probably be submitted to upstream LLVM, but it would require some effort to clean them up, document them, etc.
+
+Restrictions
+------------
+
+There are a number of constructs that are not supported by the LLVM backend. In those cases the Mono code generation engine will fall back to Mono's default compilation engine.
+
+### Exception Handlers
+
+Nested exception handlers are not supported because of the differences in sematics between mono's exception handling the c++ abi based exception handling used by LLVM.
+
+### Varargs
+
+These are implemented using a special calling convention in mono, i.e. passing a hidden 'signature cookie' argument, and passing all vararg arguments on the stack. LLVM doesn't support this calling convention.
+
+It might be possible to support this using the [LLVM vararg intrinsics](http://llvm.org/docs/LangRef.html#int_varargs).
+
+### save_lmf
+
+Wrapper methods which have method->save_lmf set are not yet supported.
+
+### Calling conventions
+
+Some complicated parameter passing conventions might not be supported on some platforms.
+
+Implementation details
+----------------------
+
+### Virtual calls
+
+The problem here is that the trampoline handing virtual calls needs to be able to obtain the vtable address and the offset. This is currently done by an arch specific function named mono_arch_get_vcall_slot_addr (), which works by disassembling the calling code to find out which register contains the vtable address. This doesn't work for LLVM since we can't control the format of the generated code, so disassembly would be very hard. Also, sometimes the code generated by LLVM is such that the vtable address cannot be obtained at all, i.e.:
+
+     mov %rax, <offset>(%rax)
+     call %rax
+
+To work around these problems, we use a separate vtable trampoline for each vtable slot index. The trampoline obtains the 'this' argument from the registers/stack, whose location is dicated by the calling convention. The 'this' argument plus the slot index can be used to compute the vtable slot and the called method.
+
+### Interface calls
+
+The problem here is that these calls receive a hidden argument called the IMT argument which is passed in a non-ABI register by the JIT, which cannot be done with LLVM. So we call a trampoline instead, which sets the IMT argument, then makes the virtual call.
+
+### Unwind info
+
+The JIT needs unwind info to unwind through LLVM generated methods. This is solved by obtaining the exception handling info generated by LLVM, then extracting the unwind info from it.
+
+### Exception Handling
+
+Methods with exception clauses are supported, altough there are some corner cases in the class library tests which still fail when ran with LLVM.
+
+LLVM uses the platform specific exception handling abi, which is the c++ ehabi on linux, while we use our home grown exception handling system. To make these two work together, we only use one LLVM EH intrinsic, the llvm.eh.selector intrinsic. This will force LLVM to generate exception handling tables. We decode those tables in mono_unwind_decode_fde () to obtain the addresses of the try-catch clauses, and save those to MonoJitInfo, just as with JIT compiled code. Finally clauses are handled differently than with JITted code. Instead of calling them from mono_handle_exception (), we save the exception handling state in TLS, then branch to them the same way we would branch to a catch handler. the code generated from ENDFINALLY will call mono_resume_unwind (), which will resume exception handling from the information saved in TLS.
+
+LLVM doesn't support implicit exceptions thrown by the execution of instructions. An implicit exception is for example a NullReferenceException that would be raised when you access an invalid memory location, typically in Mono and .NET, an uninitialized pointer.
+
+Implicit exceptions are implemented by adding a bunch of LLVM intrinsics to do loads/stores, and calling them using the LLVM 'invoke' instruction.
+
+Instead of generating DWARF/c++ EHABI exception handling tables, we generate our own tables using a mono specific format, which the mono runtime reads during execution. This has the following advantages:
+
+-   the tables are compact and take up less space.
+-   we can generate a lookup table similar to .eh_frame_hdr which is normally generated by the linker, allowing us to support macOS/iOS, since the apple linker doesn't support .eh_frame_hdr.
+-   the tables are pointed to by a normal global symbol, instead of residing in a separate segment, whose address cannot be looked up under macOS.
+
+### Generic Sharing
+
+There are two problems here: passing/receiving the hidden rgctx argument passed to some shared methods, and obtaining its value/the value of 'this' during exception handling.
+
+The former is implemented by adding a new mono specific calling convention which passes the 'rgctx' argument in the non-ABI register where mono expects it, i.e. R10 on amd64. The latter is implemented by marking the variables where these are stored with a mono specific LLVM custom metadata, and modifying LLVM to emit the final stack location of these variables into the exception handling info, where the runtime can retrieve it.
+
+AOT Support
+-----------
+
+This is implemented by emitting the LLVM IR into a LLVM bytecode file, then using the LLVM llc compiler to compile it, producing a .s file, then we append our normal AOT data structures, plus the code for methods not supported by LLVM to this file.
+
+A runtime which is not configured by --enable-llvm=yes can be made to use LLVM compiled AOT modules by using the --llvm command line argument: mono --llvm hello.exe
+
+Porting the backend to new architectures
+----------------------------------------
+
+The following changes has to be made to port the LLVM backend to a new architecture:
+
+-   Define MONO_ARCH_LLVM_SUPPORTED in mini-\<ARCH\>.h.
+-   Implement mono_arch_get_llvm_call_info () in mini-\<ARCH\>.h. This function is a variant of the arch specific get_call_info () function, it should return calling convention information for a signature.
+-   Define MONO_CONTEXT_SET_LLVM_EXC_REG() in mini-\<ARCH\>.h to the register used to pass the exception object to LLVM compiled landing pads. This is usually defined by the platform ABI.
+-   Implement the LLVM exception throwing trampolines in exceptions-\<ARCH\>.c. These trampolines differ from the normal ones because they receive the PC address of the throw site, instead of a displacement from the start of the method. See exceptions-amd64.c for an example.
+-   Implement the resume_unwind () trampoline, which is similar to the throw trampolines, but instead of throwing an exception, it should call mono_resume_unwind () with the constructed MonoContext.
+
+LLVM problems
+-------------
+
+Here is a list of problems whose solution would probably require changes to LLVM itself. Some of these problems are solved in various ways by changes on the LLVM Mono branch.
+
+-   the llvm.sqrt intrinsic doesn't work with NaNs, even through the underlying C function/machine instruction probably works with them. Worse, an optimization pass transforms sqrt(NaN) to 0.0, changing program behaviour, and masking the problem.
+-   there is no fabs intrinsic, instead llc seems to replace calls to functions named 'fabs' with the corresponding assembly, even if they are not the fabs from libm ?
+-   There is no way to tell LLVM that a result of a load is constant, i.e. in a loop like this:
+
+<!-- -->
+
+      for (int i = 0; i < arr.Length; ++i)
+         arr [i] = 0
+
+The arr.Length load cannot be moved outside the loop, since the store inside the loop can alias it. There is a llvm.invariant.start/end intrinsic, but that seems to be only useful for marking a memory area as invariant inside a basic block, so it cannot be used to mark a load globally invariant.
+
+[http://hlvm.llvm.org/bugs/show_bug.cgi?id=5441](http://hlvm.llvm.org/bugs/show_bug.cgi?id=5441)
+
+-   LLVM has no support for implicit exceptions:
+
+[http://llvm.org/bugs/show_bug.cgi?id=1269](http://llvm.org/bugs/show_bug.cgi?id=1269)
+
+-   LLVM thinks that loads from a NULL address lead to undefined behaviour, while it is quite well defined on most unices (SIGSEGV signal being sent). If an optimization pass determines that the source address of a load is NULL, it changes it to undef/unreachable, changing program behaviour. The only way to work around this seems to be marking all loads as volatile, which probably doesn't help optimizations.
+-   There seems to be no way to disable specific optimizations when running 'opt', i.e. do -std-compile-opts except tailcallelim.
+-   The x86 JIT seems to generate normal calls as
+
+<!-- -->
+
+      mov reg, imm
+      call *reg
+
+This makes it hard/impossible to patch the calling address after the called method has been compiled. \<p\> [http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-December/027999.html](http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-December/027999.html)
+
+-   LLVM Bugs: [[1]](http://llvm.org/bugs/show_bug.cgi?id=6102)
+
+Future Work
+-----------
+
+### Array Bounds Check (ABC) elimination
+
+Mono already contains a ABC elimination pass, which is fairly effective at eliminating simple bounds check, i.e. the one in:
+
+for (int i = 0; i \< arr.Length; ++i)
+
+      sum += arr [i];
+
+However, it has problems with "partially redundant" check, i.e. checks which cannot be proven to be reduntant, but they are unlikely to be hit at runtime. With LLVM's extensive analysis and program transformation passes, it might be possible to eliminate these from loops, by changing them to loop-invariant checks and hoisting them out of loops, i.e. changing:
+
+      for (int i = 0; i < len; ++i)
+        sum += arr [i];
+
+to:
+
+      if (len < arr.Length) {
+          <loop without checks>
+      } else {
+          <loop with checks>
+      }
+
+LLVM has a LoopUnswitch pass which can do something like this for constant expressions, it needs to be extended to handle the ABC checks too. Unfortunately, this cannot be done currently because the arr.Length instruction is converted to a volatile load by mono's LLVM backend, since it can fault if arr is null. This means that the load is not loop invariant, so it cannot be hoisted out of the loop.
diff --git a/docs/design/mono/web/memory-management.md b/docs/design/mono/web/memory-management.md
new file mode 100644
index 000000000000..75755969163a
--- /dev/null
+++ b/docs/design/mono/web/memory-management.md
@@ -0,0 +1,48 @@
+# Memory Management
+
+Metadata memory management
+--------------------------
+
+Most metadata structures have a lifetime which is equal to the MonoImage where they are loaded from. These structures should be allocated from the memory pool of the corresponding MonoImage. The memory pool is protected by the loader lock. Examples of metadata structures in this category:
+
+-   MonoClass
+-   MonoMethod
+-   MonoType
+
+Memory owned by these structures should be allocated from the image mempool as well. Examples include: klass-\>methods, klass-\>fields, method-\>signature etc.
+
+Generics complicates things. A generic class could have many instantinations where the generic arguments are from different assemblies. Where should we allocate memory for instantinations ? We can allocate from the mempool of the image which contains the generic type definition, but that would mean that the instantinations would remain in memory even after the assemblies containing their type arguments are unloaded, leading to a memory leak. Therefore, we do the following:
+
+-   data structures representing the generic definitions are allocated from the image mempool as usual. These include:
+
+<!-- -->
+
+     * generic class definition (MonoGenericClass->container_class)
+     * generic method definitions
+     * type parameters (MonoGenericParam)
+
+-   data structures representing inflated classes/images are allocated from the heap. They are owned by an 'image-set' which is the set of all images they depend on. When an image is unloaded, all image-sets it belongs to are freed, causing the data structures owned by the image-sets to be freed too. The structures handled this way include:
+
+<!-- -->
+
+     * MonoGenericClass
+     * MonoGenericInst
+     * inflated MonoMethods
+
+[Original version of this document in git.](https://github.com/mono/mono/blob/425844619cbce18eaa64205b9007f0c833e4a5c4/docs/memory-management.txt)
+
+Memory management for executable code
+-------------------------------------
+
+Executable code is managed using 'code-managers', whose implementation is in utils/mono-codeman.{h,c}. These allow the allocation of memory which is suitable for storing executable code, i.e.:
+
+-   It has the required executable (x) permission.
+-   The alignment of the memory blocks allocated from the code manager matches the preferred function alignment of the platform.
+
+Code managers also allow a certain percent of the memory they manage to be reserved for storing things like function thunks.
+
+The runtime contains the following code managers:
+
+-   There is a global code manager declared in mini.c which is used to manage code memory whose lifetime is equal to the lifetime of the runtime. Memory for trampolines is allocated from the global code manager.
+-   Every domain has a code manager which is used for allocating memory used by JITted code belonging to that domain.
+-   Every 'dynamic' method, i.e. a method whose lifetime is not equal to the runtime or a domain, has its own code manager.
diff --git a/docs/design/mono/web/mini-porting.md b/docs/design/mono/web/mini-porting.md
new file mode 100644
index 000000000000..8d3977982ce5
--- /dev/null
+++ b/docs/design/mono/web/mini-porting.md
@@ -0,0 +1,373 @@
+# Porting the Engine
+
+## Introduction
+
+This documents describes the process of porting the mono JIT to a new CPU architecture. The new mono JIT has been designed to make porting easier though at the same time enable the port to take full advantage from the new architecture features and instructions. Knowledge of the mini architecture (described in the mini-doc.txt file) is a requirement for understanding this guide, as well as an earlier document about porting the mono interpreter (available on the web site).
+
+There are six main areas that a port needs to implement to have a fully-functional JIT for a given architecture:
+
+-   instruction selection
+-   native code emission
+-   call conventions and register allocation
+-   method trampolines
+-   exception handling
+-   minor helper methods
+
+To take advantage of some not-so-common processor features (for example conditional execution of instructions as may be found on ARM or ia64), it may be needed to develop an high-level optimization, but doing so is not a requirement for getting the JIT to work.
+
+We'll see in more details each of the steps required, note, though, that a new port may just as well start from a cut and paste of an existing port to a similar architecture (for example from x86 to amd64, or from powerpc to sparc).
+
+The architecture specific code is split from the rest of the JIT, for example the x86 specific code and data is all included in the following files in the distribution:
+
+mini-x86.h mini-x86.c inssel-x86.brg cpu-pentium.md tramp-x86.c exceptions-x86.c
+
+I suggest a similar split for other architectures as well.
+
+Note that this document is still incomplete: some sections are only sketched and some are missing, but the important info to get a port going is already described.
+
+## Architecture-specific instructions and instruction selection
+
+The JIT already provides a set of instructions that can be easily mapped to a great variety of different processor instructions. Sometimes it may be necessary or advisable to add a new instruction that represent more closely an instruction in the architecture. Note that a mini instruction can be used to represent also a short sequence of CPU low-level instructions, but note that each instruction represents the minimum amount of code the instruction scheduler will handle (i.e., the scheduler won't schedule the instructions that compose the low-level sequence as individual instructions, but just the whole sequence, as an indivisible block).
+
+New instructions are created by adding a line in the mini-ops.h file, assigning an opcode and a name. To specify the input and output for the instruction, there are two different places, depending on the context in which the instruction gets used.
+
+If an instruction is used as a low-level CPU instruction, the info is specified in a machine description file. The description file is processed by the genmdesc program to provide a data structure that can be easily used from C code to query the needed info about the instruction.
+
+As an example, let's consider the add instruction for both x86 and ppc:
+
+    x86 version:
+        add: dest:i src1:i src2:i len:2 clob:1
+    ppc version:
+        add: dest:i src1:i src2:i len:4
+
+Note that the instruction takes two input integer registers on both CPU, but on x86 the first source register is clobbered (clob:1) and the length in bytes of the instruction differs.
+
+Note that integer adds and floating point adds use different opcodes, unlike the IL language (64 bit add is done with two instructions on 32 bit architectures, using a add that sets the carry and an add with carry).
+
+A specific CPU port may assign any meaning to the clob field for an instruction since the value will be processed in an arch-specific file anyway.
+
+See the top of the existing cpu-pentium.md file for more info on other fields: the info may or may not be applicable to a different CPU, in this latter case the info can be ignored.
+
+So, one of the first things needed in a port is to write a cpu-$(arch).md machine description file and fill it with the needed info. As a start, only a few instructions can be specified, like the ones required to do simple integer operations. The default rules of the instruction selector will emit the common instructions and so we're ready to go for the next step in porting the JIT.
+
+## Native code emission
+
+Since the first step in porting mono to a new CPU is to port the interpreter, there should be already a file that allows the emission of binary native code in a buffer for the architecture. This file should be placed in the
+
+``` bash
+   mono/arch/$(arch)/
+```
+
+directory.
+
+The bulk of the code emission happens in the mini-$(arch).c file, in a function called `mono_arch_output_basic_block ()`. This function takes a basic block, walks the list of instructions in the block and emits the binary code for each. Optionally a peephole optimization pass is done on the basic block, but this can be left for later, when the port actually works.
+
+This function is very simple, there is just a big switch on the instruction opcode and in the corresponding case the functions or macros to emit the binary native code are used. Note that in this function the lengths of the instructions are used to determine if the buffer for the code needs enlarging.
+
+To complete the code emission for a method, a few other functions need implementing as well:
+
+``` c
+  mono_arch_emit_prolog ()
+    mono_arch_emit_epilog ()
+    mono_arch_patch_code ()
+```
+
+`mono_arch_emit_prolog ()` will emit the code to setup the stack frame for a method, optionally call the callbacks used in profiling and tracing, and move the arguments to their home location (in a caller-save register if the variable was allocated to one, or in a stack location if the argument was passed in a volatile register and wasn't allocated a non-volatile one). caller-save registers used by the function are saved in the prolog as well.
+
+`mono_arch_emit_epilog ()` will emit the code needed to return from the function, optionally calling the profiling or tracing callbacks. At this point the basic blocks or the code that was moved out of the normal flow for the function can be emitted as well (this is usually done to provide better info for the static branch predictor). In the epilog, caller-save registers are restored if they were used.
+
+Note that, to help exception handling and stack unwinding, when there is a transition from managed to unmanaged code, some special processing needs to be done (basically, saving all the registers and setting up the links in the Last Managed Frame structure).
+
+When the epilog has been emitted, the upper level code arranges for the buffer of memory that contains the native code to be copied in an area of executable memory and at this point, instructions that use relative addressing need to be patched to have the right offsets: this work is done by `mono_arch_patch_code ()`.
+
+## Call conventions and register allocation
+
+To account for the differences in the call conventions, a few functions need to be implemented.
+
+`mono_arch_allocate_vars ()` assigns to both arguments and local variables the offset relative to the frame register where they are stored, dead variables are simply discarded. The total amount of stack needed is calculated.
+
+`mono_arch_call_opcode ()` is the function that more closely deals with the call convention on a given system. For each argument to a function call, an instruction is created that actually puts the argument where needed, be it the stack or a specific register. This function can also re-arrange th order of evaluation when multiple arguments are involved if needed (like, on x86 arguments are pushed on the stack in reverse order). The function needs to carefully take into accounts platform specific issues, like how structures are returned as well as the differences in size and/or alignment of managed and corresponding unmanaged structures.
+
+The other chunk of code that needs to deal with the call convention and other specifics of a CPU, is the local register allocator, implemented in a function named `mono_arch_local_regalloc ()`. The local allocator deals with a basic block at a time and basically just allocates registers for temporary values during expression evaluation, spilling and unspilling as necessary.
+
+The local allocator needs to take into account clobbering information, both during simple instructions and during function calls and it needs to deal with other architecture-specific weirdnesses, like instructions that take inputs only in specific registers or output only is some.
+
+Some effort will be put later in moving most of the local register allocator to a common file so that the code can be shared more for similar, risc-like CPUs. The register allocator does a first pass on the instructions in a block, collecting liveness information and in a backward pass on the same list performs the actual register allocation, inserting the instructions needed to spill values, if necessary.
+
+The cross-platform local register allocator is now implemented and it is documented in the jit-regalloc file.
+
+When this part of code is implemented, some testing can be done with the generated code for the new architecture. Most helpful is the use of the --regression command line switch to run the regression tests (basic.cs, for example).
+
+Note that the JIT will try to initialize the runtime, but it may not be able yet to compile and execute complex code: commenting most of the code in the `mini_init()` function in mini.c is needed to let the JIT just compile the regression tests. Also, using multiple -v switches on the command line makes the JIT dump an increasing amount of information during compilation.
+
+Values loaded into registers need to be extended as needed by the ECMA specs:
+
+-   integers smaller than 4 bytes are extended to int32 values
+-   32 bit floats are extended to double precision (in particular this means that currently all the floating point operations operate on doubles)
+
+## Method trampolines
+
+To get better startup performance, the JIT actually compiles a method only when needed. To achieve this, when a call to a method is compiled, we actually emit a call to a magic trampoline. The magic trampoline is a function written in assembly that invokes the compiler to compile the given method and jumps to the newly compiled code, ensuring the arguments it received are passed correctly to the actual method.
+
+Before jumping to the new code, though, the magic trampoline takes care of patching the call site so that next time the call will go directly to the method instead of the trampoline. How does this all work?
+
+`mono_arch_create_jit_trampoline ()` creates a small function that just preserves the arguments passed to it and adds an additional argument (the method to compile) before calling the generic trampoline. This small function is called the specific trampoline, because it is method-specific (the method to compile is hard-code in the instruction stream).
+
+The generic trampoline saves all the arguments that could get clobbered and calls a C function that will do two things:
+
+-   actually call the JIT to compile the method
+-   identify the calling code so that it can be patched to call directly the actual method
+
+If the 'this' argument to a method is a boxed valuetype that is passed to a method that expects just a pointer to the data, an additional unboxing trampoline will need to be inserted as well.
+
+## Exception handling
+
+Exception handling is likely the most difficult part of the port, as it needs to deal with unwinding (both managed and unmanaged code) and calling catch and filter blocks. It also needs to deal with signals, because mono takes advantage of the MMU in the CPU and of the operation system to handle dereferences of the NULL pointer. Some of the function needed to implement the mechanisms are:
+
+`mono_arch_get_throw_exception ()` returns a function that takes an exception object and invokes an arch-specific function that will enter the exception processing. To do so, all the relevant registers need to be saved and passed on.
+
+`mono_arch_handle_exception ()` this function takes the exception thrown and a context that describes the state of the CPU at the time the exception was thrown. The function needs to implement the exception handling mechanism, so it makes a search for an handler for the exception and if none is found, it follows the unhandled exception path (that can print a trace and exit or just abort the current thread). The difficulty here is to unwind the stack correctly, by restoring the register state at each call site in the call chain, calling finally, filters and handler blocks while doing so.
+
+As part of exception handling a couple of internal calls need to be implemented as well.
+
+`ves_icall_get_frame_info ()` returns info about a specific frame.
+
+`mono_jit_walk_stack ()` walks the stack and calls a callback with info for each frame found.
+
+`ves_icall_get_trace ()` return an array of StackFrame objects.
+
+### Code generation for filter/finally handlers
+
+Filter and finally handlers are called from 2 different locations:
+
+-   from within the method containing the exception clauses
+-   from the stack unwinding code
+
+To make this possible we implement them like subroutines, ending with a "return" statement. The subroutine does not save the base pointer, because we need access to the local variables of the enclosing method. Its is possible that instructions inside those handlers modify the stack pointer, thus we save the stack pointer at the start of the handler, and restore it at the end. We have to use a "call" instruction to execute such finally handlers.
+
+The MIR code for filter and finally handlers looks like:
+
+       OP_START_HANDLER
+       ...
+       OP_END_FINALLY | OP_ENDFILTER(reg)
+
+OP_START_HANDLER: should save the stack pointer somewhere OP_END_FINALLY: restores the stack pointers and returns. OP_ENDFILTER (reg): restores the stack pointers and returns the value in "reg".
+
+### Calling finally/filter handlers
+
+There is a special opcode to call those handler, its called OP_CALL_HANDLER. It simple emits a call instruction.
+
+Its a bit more complex to call handler from outside (in the stack unwinding code), because we have to restore the whole context of the method first. After that we simply emit a call instruction to invoke the handler. Its usually possible to use the same code to call filter and finally handlers (see arch_get_call_filter).
+
+### Calling catch handlers
+
+Catch handlers are always called from the stack unwinding code. Unlike finally clauses or filters, catch handler never return. Instead we simply restore the whole context, and restart execution at the catch handler.
+
+### Passing Exception objects to catch handlers and filters
+
+We use a local variable to store exception objects. The stack unwinding code must store the exception object into this variable before calling catch handler or filter.
+
+## Minor helper methods
+
+A few minor helper methods are referenced from the arch-independent code. Some of them are:
+
+`mono_arch_cpu_optimizations ()` This function returns a mask of optimizations that should be enabled for the current CPU and a mask of optimizations that should be excluded, instead.
+
+`mono_arch_regname ()` Returns the name for a numeric register.
+
+`mono_arch_get_allocatable_int_vars ()` Returns a list of variables that can be allocated to the integer registers in the current architecture.
+
+`mono_arch_get_global_int_regs ()` Returns a list of caller-save registers that can be used to allocate variables in the current method.
+
+`mono_arch_instrument_mem_needs ()`
+
+`mono_arch_instrument_prolog ()`
+
+`mono_arch_instrument_epilog ()` Functions needed to implement the profiling interface.
+
+## Testing the port
+
+The JIT has a set of regression tests in \*.cs files inside the mini directory.
+
+The usual method of testing a port is by compiling these tests on another machine with a working runtime by typing 'make rcheck', then copying TestDriver.dll and \*.exe to the mini directory. The tests can be run by typing:
+
+``` bash
+   ./mono --regression <exe file name>
+```
+
+The suggested order for working through these tests is the following:
+
+-   basic.exe
+-   basic-long.exe
+-   basic-float.exe
+-   basic-calls.exe
+-   objects.exe
+-   arrays.exe
+-   exceptions.exe
+-   iltests.exe
+-   generics.exe
+
+## Writing regression tests
+
+Regression tests for the JIT should be written for any bug found in the JIT in one of the \*.cs files in the mini directory. Eventually all the operations of the JIT should be tested (including the ones that get selected only when some specific optimization is enabled).
+
+## Platform specific optimizations
+
+An example of a platform-specific optimization is the peephole optimization: we look at a small window of code at a time and we replace one or more instructions with others that perform better for the given architecture or CPU.
+
+## Function descriptors
+
+Some ABIs, like those for IA64 and PPC64, don't use direct function pointers, but so called function descriptors. A function descriptor is a short data structure which contains at least a pointer to the code of the function and a pointer to a GOT/TOC, which needs to be loaded into a specific register prior to jumping to the function. Global variables and large constants are accessed through that register.
+
+Mono does not need function descriptors for the JITted code, but we need to handle them when calling unmanaged code and we need to create them when passing managed code to unmanaged code.
+
+`mono_create_ftnptr()` creates a function descriptor for a piece of generated code within a specific domain.
+
+`mono_get_addr_from_ftnptr()` returns the pointer to the native code in a function descriptor. Never use this function to generate a jump to a function without loading the GOT/TOC register unless the function descriptor was created by `mono_create_ftnptr()`.
+
+See the sources for IA64 and PPC64 on when to create and when to dereference function descriptors. On PPC64 function descriptors for various generated helper functions (in exceptions-ppc.c and tramp-ppc.c) are generated in front of the code they refer to (see `ppc_create_pre_code_ftnptr()`). On IA64 they are created separately.
+
+## Emulated opcodes
+
+Mini has code for emulating quite a few opcodes, most notably operations on longs, int/float conversions and atomic operations. If an architecture wishes such an opcode to be emulated, mini produces icalls instead of those opcodes. This should only be considered when the operation cannot be implemented efficiently and thus the overhead occured by the icall is not relatively large. Emulation of operations is controlled by #defines in the arch header, but the naming is not consistent. They usually start with `MONO_ARCH_EMULATE_`, `MONO_ARCH_NO_EMULATE_` and `MONO_ARCH_HAVE_`.
+
+## Prolog/Epilog
+
+The method prolog is emitted by the mono_arch_emit_prolog () function. It usually consists of the following parts:
+
+-   Allocate frame: set fp to sp, decrement sp.
+-   Save callee saved registers to the frame
+-   Initialize the LMF structure
+-   Link the LMF structure: This implements the following pseudo code:
+
+<!-- -->
+
+     lmf->lmf_addr = mono_get_lmf_addr ()
+     lmf->previous_lmf = *(lmf->lmf_addr)
+     *(lmf->lmf_addr)->lmf
+
+-   Compute bb->max_offset for each basic block: This enables mono_jit_output_basic_block () to emit short branches where possible.
+-   Store the runtime generic context, see the Generic Sharing section.
+-   Store the signature cookie used by vararg methods.
+-   Transfer arguments to the location they are allocated to, i.e. load arguments received on the stack to registers if needed, and store arguments received in registers to the stack/callee saved registers if needed.
+-   Initialize the various variables used by the soft debugger code.
+-   Implement tracing support.
+
+The epilog is emitted by the mono_arch_emit_epilog () function. It usually consists of the following parts:
+
+-   Restore the LMF by doing:
+
+<!-- -->
+
+     *(lmf->lmf_addr) = lmf->previous_lmf.
+
+-   Load returned valuetypes into registers if needed.
+-   Implement tracing support.
+-   Restore callee saved registers.
+-   Pop frame.
+-   Return to the caller.
+
+Care must be taken during these steps to avoid clobbering the registers holding the return value of the method.
+
+Callee saved registers are either saved to dedicated stack slots, or they are saved into the LMF. The stack slots where various things are saved are allocated by mono_arch_allocate_vars ().
+
+## Delegate Invocation
+
+A delegate is invoked like this by JITted code:
+
+delegate->invoke_impl (delegate, arg1, arg2, arg3, ...)
+
+Here, 'invoke_impl' originally points to a trampoline which ends up calling the 'mono_delegate_trampoline' C function. This function tries to find an architecture specific optimized implementation by calling 'mono_arch_get_delegate_invoke_impl'.
+
+mono_arch_get_delegate_invoke_impl () should return a small trampoline for invoking the delegate which matches the following pseudo code:
+
+-for instance delegates:
+
+delegate->method_ptr (delegate->target, arg1, arg2, arg3, ...)
+
+-   for static delegates:
+
+delegate->method_ptr (arg1, arg2, arg3, ...)
+
+## Varargs
+
+The vararg calling convention is implemented as follows:
+
+### Caller side
+
+-   The caller passes in a 'signature cookie', which is a hidden argument containing a MonoSignature\*.
+
+<!-- -->
+
+     This argument is passed just before the implicit arguments, i.e. if the callee signature is this:
+     foo (string format, ...)
+
+and the callee signature is this:
+
+     foo ("%d %d", 1, 2)
+
+then the real callee signature would look like:
+
+     foo ("%d %d", <signature cookie>, 1, 2)
+
+To simplify things, both the sig cookie and the implicit arguments are always passed on the stack and not in registers. mono_arch_emit_call () is responsible for emitting this argument.
+
+### Callee side
+
+-   mono_arch_allocate_vars () is responsible for allocating a local variable slot where the sig cookie will be saved. cfg->sig_cookie should contain the stack offset of the local variable slot.
+-   mono_arch_emit_prolog () is responsible for saving the sig cookie argument into the local variable.
+-   The implementation of OP_ARGLIST should load the sig cookie from the local variable, and save it into its dreg, which will point to a local variable of type RuntimeArgumentHandle.
+-   The fetching of vararg arguments is implemented by icalls in icalls.c.
+
+tests/vararg.exe contains test cases to exercise this functionality.
+
+## Unwind info
+
+On most platforms, the JIT uses DWARF unwind info to unwind the stack during exception handling. The API and some documentation is in the mini-unwind.h file. The mono_arch_emit_prolog () function is required to emit this information using the macros in mini-unwind.h, and the mono_arch_find_jit_info () function needs to pass it to mono_unwind_frame (). In addition to this, the various trampolines might also have unwind info, which makes stack walks possible when using the gdb integration (XDEBUG).
+
+The task of a stack unwinder is to construct the machine state at the caller of the current stack frame, i.e: - find the return address of the caller - find the values of the various callee saved registers in the caller at the point of the call
+
+The DWARF unwinder is based on the concept of a CFA, or Canonical Frame Address. This is an address of the stack frame which does not change during the execution of the method. By convention, the CFA is equal to the value of the stack pointer prior to the instruction which transferred execution to the current method. So for example, on x86, the value of the CFA on enter to the method is esp+4 because of the pushing of the return address. There are two kinds of unwind directives:
+
+-   those that specify how to compute the CFA at any point in the method using a \<reg>+\<offset>
+-   those that specify where a given register is saved in relation to the CFA.
+
+For a typical x86 method prolog, the unwind info might look like this:
+
+``` bash
+- <cfa=esp+8>
+- <return addr at cfa+0>
+push ebp
+- <ebp saved at cfa-4>
+mov ebp, esp
+- <cfa=ebp+8>
+```
+
+## Generic Sharing
+
+Generic code sharing is optional. See the document on [generic-sharing](/docs/advanced/runtime/docs/generic-sharing/) for information on how to support it on an architecture.
+
+### MONO_ARCH_RGCTX_REG
+
+The MONO_ARCH_RGCTX_REG define should be set to a hardware register which will be used to pass the 'mrgctx' hidden argument to generic shared methods. It should be a caller saved register which is not used in local register allocation. Also, any code which gets executed between the caller and the callee (i.e. trampolines) needs to avoid clobbering this registers. The easiest solution is to set it to the be the same as MONO_ARCH_IMT_REG, since IMT/generic sharing are never used together during a call. The method prolog must save this register to cfg->rgctx_var.
+
+### Static RGCTX trampolines
+
+These trampolines are created by mono_arch_get_static_rgctx_trampoline (). They are used to call generic shared methods indirectly from code which cannot pass an MRGCTX. They should implement the following pseudo code:
+
+    <mrgctx reg> = mrgctx
+    jump <method addr>
+
+### Generic Class Init Trampoline
+
+This one of a kind trampoline is created by mono_arch_create_generic_class_init_trampoline (). They are used to run the .cctor of the vtable passed in as an argument in MONO_ARCH_VTABLE_REG. They should implement the following pseudo code:
+
+    vtable = <vtable reg>
+    if (!vtable->initialized)
+      <call jit icall "specific_trampoline_generic_class_init">
+
+The generic trampoline code needs to be modified to pass the argument received in MONO_ARCH_VTABLE_REG to the C trampoline function, which is mono_generic_class_init_trampoline ().
+
+### RGCTX Lazy Fetch Trampoline
+
+These trampolines are created by mono_arch_create_rgctx_lazy_fetch_trampoline (). They are used for fetching values out of an MonoRuntimeGenericContext, lazily initializing them as needed.
diff --git a/docs/design/mono/web/mono-error.md b/docs/design/mono/web/mono-error.md
new file mode 100644
index 000000000000..dd4c9f057546
--- /dev/null
+++ b/docs/design/mono/web/mono-error.md
@@ -0,0 +1,144 @@
+# Error handling and MonoError
+
+## MonoError
+
+MonoError is the latest attempt at cleaning up and sanitizing error handling in the runtime. This document highlights some of the design goals and decisions, the implementation and the migration strategy.
+
+### Design goals
+
+-   Replace the majority of the adhoc error handling subsystems present today in the runtime. Each one is broken in a subtle way, has slightly different semantics and error conversion between them is spot, at best.
+
+-   Map well to the final destination of all runtime errors: managed exceptions. This includes being compatible with .net when it comes to the kind of exception produced by a given error condition.
+
+-   Be explicit, lack any magic. The loader-error setup does control flow happens in the background through a TLS variable, which made it very brittle and error prone.
+
+-   Explicit and multiple error scopes. Make it possible to have multiple error scopes and make them explicit. We need to support nested scopes during type loading, even if reporting is flat.
+
+-   Be as simple as possible. Error handling is the hardest part of the runtime to test so it must be simple. Which means complex error reporting, such as chaining, is out of question.
+
+## Current implementation
+
+The current implementation exists in mono-error.h and mono-error-internals.h. The split is so API users can consume errors, but they are not supported to be able to produce them - such use case has yet to arise.
+
+#### Writing a function that produces errors
+
+``` c
+/**
+ *
+ * @returns NULL on error
+ */
+void*
+my_function (int a, MonoError *error)
+{
+    if (a <= 0) {//
+        mono_error_set_argument (error, "a", "argument a must be bigger than zero, it was %d", a);
+        return NULL;
+    }
+    return malloc (a);
+}
+```
+
+Important points from the above:
+
+-   Add a "MonoError \*error" argument as the last to your function
+-   Call one of the mono_error_set functions based on what managed exception this should produce and the available information
+-   Document that a NULL returns means an error
+
+## Writing a function that consumes errors
+
+``` c
+void
+other_function (void)
+{
+    ERROR_DECL (error);
+    void *res;
+
+    res = my_function (10, error);
+    //handling the error:
+    //1st option: set the pending exception.  Only safe to do in icalls
+    if (mono_error_set_pending_exception (error)) //returns TRUE if an exception was set
+        return;
+
+    //2nd option: legacy code that can't handle failures:
+    mono_error_assert_ok (error);
+
+    //3rd option (deprecated): raise an exception and write a FIXME note
+    //  (implicit cleanup, no-op if there was no error)
+    mono_error_raise_exception (error); /* FIXME don't raise here */
+
+    //4th option: ignore
+    mono_error_cleanup (error);
+}
+```
+
+Important points from the above:
+
+-   Use `ERROR_DECL (error)` to declare and initialize a `MonoError *error` variable. (Under the hood, it declares a local `MonoError error_value` using `ERROR_DECL_VALUE (error_value)`. You may use `ERROR_DECL_VALUE (e)` to declare a variable local variable yourself. It's pretty unusual to need to do that, however.)
+-   Pass it to the required function and always do something with the result
+-   Given we're still transitioning, not all code can handle in the same ways
+
+## Handling the transition
+
+The transition work is not complete and we're doing it piece-by-piece to ensure we don't introduce massive regressions in the runtime. The idea is to move the least amount of code a time to use the new error machinery.
+
+Here are the rules for code conversion:
+
+-   Mono API functions that need to call functions which take a MonoError should assert on failure or cleanup the error as there's no adequate alternative at this point. They **must not** use `mono_error_raise_exception` or `mono_error_set_pending_exception`
+
+-   When possible, change the function signature. If not, add a \_checked variant and add the `MONO_RT_EXTERNAL_ONLY` to the non-checked version if it's in the Mono API. That symbol will prevent the rest of the Mono runtime from calling the non-checked version.
+
+## Advanced technique: using a local error to raise a different exception
+
+Suppose you want to call a function `foo_checked()` but you want to raise a different exception if it fails. In this case, it makes sense to create a local error variable to handle the call to `foo_checked`:
+
+``` c
+int
+my_function (MonoObject *arg, MonoError *error)
+{
+    ERROR_DECL (local_error);
+    int result = foo_checked (arg, local_error);
+    if (!is_ok (local_error)) {
+        mono_error_set_execution_engine (error, "Could not successfully call foo_checked, due to: %s", mono_error_get_message (local_error));
+        mono_error_cleanup (local_error);
+    }
+    return result;
+```
+
+-   Pass `local_error` to `foo_checked`
+-   Check the result and if it wasn't okay, set a different error code on `error` It is common to use `mono_error_get_message` to include the message from the local failure as part of the new exception
+-   Cleanup `local_error` to release its resources
+
+## Advanced technique: MonoErrorBoxed and mono_class_set_failure
+
+Normally we store a `MonoError` on the stack. The usual scenario is that managed code calls into the runtime, we perform some operations, and then we either return a result or convert a `MonoError` into a pending exception. So a stack lifetime for a `MonoError` makes sense.
+
+There is one scenario where we need a heap-allocated `MonoError` whose lifetime is tied to a `MonoImage`: the initialization of a managed class. `MonoErrorBoxed` is a thin wrapper around a `MonoError` that identifies a `MonoError` that is allocated in the mempool of a `MonoImage`. It is created using `mono_error_box()` and converted back to an ordinary `MonoError` using `mono_error_unbox()`.
+
+``` c
+static int
+some_class_init_helper (MonoClass *k)
+{
+    if (mono_class_has_failure (k))
+        return -1; /* Already a failure, don't bother trying to init it */
+    ERROR_DECL (local_error);
+    int result = foo_checked (k, local_error);
+    if (!is_ok (error)) {
+      mono_class_set_failure (k, mono_error_box (local_error, k->image));
+      mono_error_cleanup (local_error);
+    }
+    return result;
+}
+```
+
+-   Check whether the class is already marked as a failure
+-   Pass a `local_error` to `foo_checked`
+-   Check the result and if it wasn't okay, allocate a boxed `MonoError` in the mempool of the class's image
+-   Mark the class that failed with the boxed error
+-   Cleanup the `local_error` to release its resources
+
+### Design issues
+
+-   Memory management of the error setting functions is not consistent or clear
+-   Use a static initializer in the declaration site instead of mono_error_init?
+-   Force an error to always be set or only when there's an exception situation? I.E. mono_class_from_name failing to find the class X finding the class but it failed to load.
+-   g_assert (mono_errork_ok (&error)) could be replaced by a macro that uses g_error so we can see the error contents on crashes.
diff --git a/docs/design/mono/web/other.md b/docs/design/mono/web/other.md
new file mode 100644
index 000000000000..f3bfe69601b8
--- /dev/null
+++ b/docs/design/mono/web/other.md
@@ -0,0 +1,105 @@
+# Other notes
+
+## Faster runtime builds
+
+To speed up runtime builds, use one or more of the following:
+
+-   Turn off optimization by passing CFLAGS=-O0 to configure.
+-   Turn off generation of libmono by passing --disable-libraries to configure.
+-   Turn off boeh support by passing --disable-boehm to configure.
+-   Build in parallel, i.e. using make -j4.
+-   Use ccache by passing CC="ccache gcc" CXX="ccache g++" to configure.
+
+## Runtime debugging methods
+
+### Debugging crashes which don't happen inside gdb, or only happen when a test program is ran in a loop
+
+Set the MONO_DEBUG env variable to 'suspend-on-sigsegv'. This causes the runtime native SIGSEGV handler to spin in a loop, so gdb can be attached to the running process.
+
+### Setting native breakpoints in managed methods
+
+Use the --break \<METHOD> command line argument. The JIT will generate a native breakpoint (INT on x86) into the prolog of the given method. Use --break-at-bb \<METHOD> \<bb num> to set a breakpoint at the start of a given basic block.
+
+### Displaying JIT debug output
+
+Use the -v -v -v -v command line argument. Set the MONO_VERBOSE_METHOD env variable to display output for only one method.
+
+### Dumping JIT IR to IGV
+
+Set `MONO_JIT_DUMP_METHOD` to specify a method to dump over network to a running instance of the [IdealGraphVisualizer (IGV)](http://ssw.jku.at/General/Staff/TW/igv.html). An IGV build that is compatible with the implementation in Mono is available for [Mac/Linux/Windows](https://github.com/lewurm/GraalJVMCI8/releases/tag/v0.1) and requires at least JRE 1.7 to run.
+
+On Mac:
+
+``` bash
+$ # unpack zip file
+$ open idealgraphvisualizer
+.
+```
+
+For Linux there's `bin/idealgraphvisualizer` and for Windows there's `bin/idealgraphvisualizer.exe`. After starting IGV, it will listen on port 4445 and is ready to receive graphs.
+
+Here an example for dumping the IR of a method:
+
+``` bash
+$ cat fib.cs
+using System;
+
+public class Fib {
+
+        public static int fib (int n) {
+                if (n < 2)
+                        return 1;
+                return fib(n-2)+fib(n-1);
+        }
+        public static int Main (string[] args) {
+                int repeat = 1;
+
+                if (args.Length == 1)
+                        repeat = Convert.ToInt32 (args [0]);
+
+                // Console.WriteLine ("Repeat = " + repeat);
+
+                if (repeat > 32) {
+                        Console.WriteLine ("{0}", fib (repeat));
+                        return 0;
+                }
+
+                for (int i = 0; i < repeat; i++)
+                        if (fib (32) != 3524578)
+                                return 1;
+
+                return 0;
+        }
+}
+$ csc fib.cs
+$ MONO_JIT_DUMP_METHOD=Fib::fib mono fib.exe
+cfg_dump: create context for "Fib::fib"
+```
+
+now switch to IGV, you should see something like that: [![igv-screenshot.png](images/igv-screenshot.png)](images/igv-screenshot.png)
+
+you can explore the different compiler passes in the navigation bar on the left side. IGV also has a graph diff feature:
+
+[![igv-diff.png](/images/igv-diff.png)](/images/igv-diff.png)
+
+### Displaying runtime debug output
+
+Set the MONO_LOG_LEVEL env variable to 'debug'. The log output is useful for diagnosing assembly loading/AOT/pinvoke problems.
+
+### mono_debug_count ()
+
+This is useful for debugging problems where a piece of code is executed many times, and we need to find out which run causes the runtime to misbehave, i.e. which method is miscompiled by the JIT etc. It works by changing
+
+``` bash
+do_something ()
+```
+
+To:
+
+``` bash
+if (mono_debug_count ()) {
+  <do something>
+}
+```
+
+mono_debug_count () is controlled by the COUNT env variable, the first COUNT times it is called, it will return TRUE, after that, it will return FALSE. This allows us to find out exactly which execution of \<do something> causes the problem by running the application while varying the value of COUNT using a binary search.
diff --git a/docs/design/mono/web/register-allocation.md b/docs/design/mono/web/register-allocation.md
new file mode 100644
index 000000000000..e6247d8eb958
--- /dev/null
+++ b/docs/design/mono/web/register-allocation.md
@@ -0,0 +1,153 @@
+# Register allocation in the Mono JIT
+
+### Global Register Allocation
+
+\<TODO\>
+
+### Local Register Allocation
+
+This section describes the cross-platform local register allocator which is in the file mini-codegen.c.
+
+The input to the allocator is a basic block which contains linear IL, ie. instructions of the form:
+
+      DEST <- SRC1 OP SRC2
+
+where DEST, SRC1, and SRC2 are virtual registers (vregs). The job of the allocator is to assign hard or physical registers (hregs) to each virtual registers so the vreg references in the instructions can be replaced with their assigned hreg, allowing machine code to be generated later.
+
+The allocator needs information about the number and types of arguments of instructions. It takes this information from the machine description files. It also needs arch specific information, like the number and type of the hard registers. It gets this information from arch-specific macros.
+
+Currently, the vregs and hregs are partitioned into two classes: integer and floating point.
+
+The allocator consists of two phases: In the first phase, a forward pass is made over the instructions, collecting liveness information for vregs. In the second phase, a backward pass is made over the instructions, assigning registers. This backward mode of operation makes the allocator somewhat difficult to understand, but leads to better code in most cases.
+
+#### Allocator state
+
+The state of the allocator is stored in two arrays: iassign and isymbolic. iassign maps vregs to hregs, while isymbolic is the opposite. For a vreg, iassign [vreg] can contain the following values:
+
+       -1                      vreg has no assigned hreg
+
+       hreg index (>= 0)            vreg is assigned to the given hreg. This means later instructions (which we have already processed due to the backward direction) expect the value of vreg to be found in hreg.
+
+       spill slot index (< -1)  vreg is spilled to the given spill slot. This means later instructions expect the value of vreg to be found on the stack in the given spill slot. When this vreg is used as a dreg of an instruction, a spill store needs to be generated after the instruction saving its value to the given spill slot.
+
+Also, the allocator keeps track of which hregs are free and which are used. This information is stored in a bitmask called ifree_mask.
+
+There is a similar set of data structures for floating point registers.
+
+#### Spilling
+
+When an allocator needs a free hreg, but all of them are assigned, it needs to free up one of them. It does this by spilling the contents of the vreg which is currently assigned to the selected hreg. Since later instructions expect the vreg to be found in the selected hreg, the allocator emits a spill-load instruction to load the value from the spill slot into the hreg after the currently processed instruction. When the vreg which is spilled is a destination in an instruction, the allocator will emit a spill-store to store the value into the spill slot.
+
+#### Fixed registers
+
+Some architectures, notably x86/amd64 require that the arguments/results of some instructions be assigned to specific hregs. An example is the shift opcodes on x86, where the second argument must be in ECX. The allocator has support for this. It tries to allocate the vreg to the required hreg. If thats not possible, then it will emit compensation code which moves values to the correct registers before/after the instruction.
+
+Fixed registers are mainly used on x86, but they are useful on more regular architectures on well, for example to model that after a call instruction, the return of the call is in a specific register.
+
+A special case of fixed registers is two address architectures, like the x86, where the instructions place their results into their first argument. This is modelled in the allocator by allocating SRC1 and DEST to the same hreg.
+
+#### Global registers
+
+Some variables might already be allocated to hardware registers during the global allocation phase. In this case, SRC1, SRC2 and DEST might already be a hardware register. The allocator needs to do nothing in this case, except when the architecture uses fixed registers, in which case it needs to emit compensation code.
+
+#### Register pairs
+
+64 bit arithmetic on 32 bit machines requires instructions whose arguments are not registers, but register pairs. The allocator has support for this, both for freely allocatable register pairs, and for register pairs which are constrained to specific hregs (EDX:EAX on x86).
+
+#### Floating point stack
+
+The x86 architecture uses a floating point register stack instead of a set of fp registers. The allocator supports this by a post-processing pass which keeps track of the height of the fp stack, and spills/loads values from the stack as neccesary.
+
+#### Calls
+
+Calls need special handling for two reasons: first, they will clobber all caller-save registers, meaning their contents will need to be spilled. Also, some architectures pass arguments in registers. The registers used for passing arguments are usually the same as the ones used for local allocation, so the allocator needs to handle them specially. This is done as follows: the MonoInst for the call instruction contains a map mapping vregs which contain the argument values to hregs where the argument needs to be placed,like this (on amd64):
+
+    R33 -> RDI
+    R34 -> RSI
+    ...
+
+When the allocator processes the call instruction, it allocates the vregs in the map to their associated hregs. So the call instruction is processed as if having a variable number of arguments which fixed register assignments.
+
+An example:
+
+      R33 <- 1
+      R34 <- 2
+      call
+
+When the call instruction is processed, R33 is assigned to RDI, and R34 is assigned to RSI. Later, when the two assignment instructions are processed, R33 and R34 are already assigned to a hreg, so they are replaced with the associated hreg leading to the following final code:
+
+      RDI <- 1
+      RSI <- 1
+      call
+
+#### Machine description files
+
+A typical entry in the machine description files looks like this:
+
+shl: dest:i src1:i src2:s clob:1 len:2
+
+The allocator is only interested in the dest,src1,src2 and clob fields. It understands the following values for the dest, src1, src2 fields:
+
+-   i - integer register
+-   f - fp register
+-   b - base register (same as i, but the instruction does not modify the reg)
+-   m - fp register, even if an fp stack is used (no fp stack tracking)
+
+It understands the following values for the clob field:
+
+-   1 - sreg1 needs to be the same as dreg
+-   c - instruction clobbers the caller-save registers
+
+Beside these values, an architecture can define additional values (like the 's' in the example). The allocator depends on a set of arch-specific macros to convert these values to information it needs during allocation.
+
+#### Arch specific macros
+
+These macros usually receive a value from the machine description file (like the 's' in the example). The examples below are for x86.
+
+    /*
+     * A bitmask selecting the caller-save registers (these are used for local
+     * allocation).
+     */
+    #define MONO_ARCH_CALLEE_REGS X86_CALLEE_REGS
+
+    /*
+     * A bitmask selecting the callee-saved registers (these are usually used for
+     * global allocation).
+     */
+    #define MONO_ARCH_CALLEE_SAVED_REGS X86_CALLER_REGS
+
+    /* Same for the floating point registers */
+    #define MONO_ARCH_CALLEE_FREGS 0
+    #define MONO_ARCH_CALLEE_SAVED_FREGS 0
+
+    /* Whenever the target uses a floating point stack */
+    #define MONO_ARCH_USE_FPSTACK TRUE
+
+    /* The size of the floating point stack */
+    #define MONO_ARCH_FPSTACK_SIZE 6
+
+    /*
+     * Given a descriptor value from the machine description file, return the fixed
+     * hard reg corresponding to that value.
+     */
+    #define MONO_ARCH_INST_FIXED_REG(desc) ((desc == 's') ? X86_ECX : ((desc == 'a') ? X86_EAX : ((desc == 'd') ? X86_EDX : ((desc == 'y') ? X86_EAX : ((desc == 'l') ? X86_EAX : -1)))))
+
+    /*
+     * A bitmask selecting the hregs which can be used for allocating sreg2 for
+     * a given instruction.
+     */
+    #define MONO_ARCH_INST_SREG2_MASK(ins) (((ins [MONO_INST_CLOB] == 'a') || (ins [MONO_INST_CLOB] == 'd')) ? (1 << X86_EDX) : 0)
+
+    /*
+     * Given a descriptor value, return whenever it denotes a register pair.
+     */
+    #define MONO_ARCH_INST_IS_REGPAIR(desc) (desc == 'l' || desc == 'L')
+
+    /*
+     * Given a descriptor value, and the first register of a regpair, return a
+     * bitmask selecting the hregs which can be used for allocating the second
+     * register of the regpair.
+     */
+    #define MONO_ARCH_INST_REGPAIR_REG2(desc,hreg1) (desc == 'l' ? X86_EDX : -1)
+
+[Original version of this document in git.](https://github.com/mono/mono/blob/4b2982c3096e3b17156bf00a062777ed364e3674/docs/jit-regalloc)
diff --git a/docs/design/mono/web/soft-debugger-wire-format.md b/docs/design/mono/web/soft-debugger-wire-format.md
new file mode 100644
index 000000000000..49facbc283df
--- /dev/null
+++ b/docs/design/mono/web/soft-debugger-wire-format.md
@@ -0,0 +1,469 @@
+# Soft Debugger Wire Format
+
+## Introduction
+
+The [Mono Soft Debugger](/docs/advanced/runtime/docs/soft-debugger/) (SDB) is a debugger implemented by the Mono runtime. The Mono runtime exposes an interface that debugger clients can use to debug a Mono application. Mono provides a convenience library in the form of the Mono.Debugger.Soft.dll that can be used to communicate with a running Mono process.
+
+The Mono.Debugger.Soft.dll library uses a protocol over sockets to debug applications. The wire protocol is inspired by the [JDWP (Java Debug Wire Protocol)](http://download.oracle.com/javase/1,5.0/docs/guide/jpda/jdwp-spec.html). Familiarity with that specification is a good read.
+
+This document describes the wire protocol used between debugging clients and the Mono runtime.
+
+Where possible, the corresponding protocol detail is linked to a function name and file location in Mono source code. These informations are based on Mono master version at revision *f42ba4a168e7cb9b9486b8a96c53752e4467be8a*.
+
+## Protocol details
+
+### Transport
+
+Mono SDB protocol, just like its Java counterpart, was designed with no specific transport in mind. However, presently the public Mono SDB only has a TCP/IP transport available (under the transport name of `dt_socket`). Other transports can be plugged by modifying this interface.
+
+#### Bootstraping a connection
+
+To boostrap a connection, the client send handshake to the server (see `debugger-agent.c:1034`) in the form of the 13 ASCII characters string "DWP-Handshake" and wait for the server reply which consist of the exact same ASCII character sequence.
+
+### Packets
+
+Just like JDWP, Mono SDB protocol is packet-based with two types of packet: command and reply. All fields in a packet is sent in big-endian format which is transparently handled in Mono source code with corresponding helper encode/decode functions.
+
+Command packet are used by either side (client or server) to request information, act on the execution of the debugged program or to inform of some event. Replies is only sent in response to a command with information on the success/failure of the operation and any extra data depending on the command that triggered it.
+
+Both type of packet contains a header. The header is always 11 bytes long. Their descriptions are given afterwards:
+
+<table border="1">
+  <thead>
+    <tr>
+      <th colspan="12" style="text-align: center">Command packet header</th>
+    </tr>
+    <tr>
+      <th>byte 1</th>
+      <th>byte 2</th>
+      <th>byte 3</th>
+      <th>byte 4</th>
+      <th>byte 5</th>
+      <th>byte 6</th>
+      <th>byte 7</th>
+      <th>byte 8</th>
+      <th>byte 9</th>
+      <th>byte 10</th>
+      <th>byte 11</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td colspan="4">length</td>
+      <td colspan="4">id</td>
+      <td>flags</td>
+      <td>command set</td>
+      <td>command</td>
+    </tr>
+  </tbody>
+</table>
+
+In Mono SDB source code, the command header is decoded in the server thread `debugger_thread` function at `debugger-agent.c:7583`.
+
+<table border="1">
+  <thead>
+    <tr>
+      <th colspan="12" style="text-align: center">Reply packet header</th>
+    </tr>
+    <tr>
+      <th>byte 1</th>
+      <th>byte 2</th>
+      <th>byte 3</th>
+      <th>byte 4</th>
+      <th>byte 5</th>
+      <th>byte 6</th>
+      <th>byte 7</th>
+      <th>byte 8</th>
+      <th>byte 9</th>
+      <th>byte 10</th>
+      <th>byte 11</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td colspan="4">length</td>
+      <td colspan="4">id</td>
+      <td>flags</td>
+      <td colspan="2">error code</td>
+    </tr>
+  </tbody>
+</table>
+In Mono SDB source code, a reply packet is constructed and sent by the `send_reply_packet` function in `debugger-agent:1514`.
+
+#### Packet field details
+
+##### Common fields
+
+length : The total length in byte of the packet including header i.e. this value will be 11 if the packet only consists of header with no other data
+
+id : Uniquely identify sent packet command/reply pair so that they can be asynchronously matched. This is in practise a simple monotonic integer counter. Note that client and server may use the same id value when sending their packets as the uniqueness property is only with respect to a specific source.
+
+flags : At the moment this value is only used with a reply packet in which case its value is set to `0x80`. A command packet should have this value set to 0.
+
+##### Command specific fields
+
+command set : This value allows grouping commands into similar blocks for quicker processing. The different command sets with their values are given below:
+
+| Command set      | Value |
+|:-----------------|:------|
+| Virtual Machine  | 1     |
+| Object reference | 9     |
+| String reference | 10    |
+| Threads          | 11    |
+| Array reference  | 13    |
+| Event request    | 15    |
+| Stack frame      | 16    |
+| AppDomain        | 20    |
+| Assembly         | 21    |
+| Method           | 22    |
+| Type             | 23    |
+| Module           | 24    |
+| Events           | 64    |
+
+command : Tell what command this packet corresponds to. This value is relative to the previously defined command set so the values are reused across different command sets. Definition of each command is given in a later chapter.
+
+##### Reply specific fields
+
+error code : Define which error occured or if the command was successful. Error code definition is given below:
+
+| Error name                | Value | Mono specific notes                                                        |
+|:--------------------------|:------|:---------------------------------------------------------------------------|
+| Success                   | 0     |                                                                            |
+| Invalid object            | 20    |                                                                            |
+| Invalid field ID          | 25    |                                                                            |
+| Invalid frame ID          | 30    |                                                                            |
+| Not Implemented           | 100   |                                                                            |
+| Not Suspended             | 101   |                                                                            |
+| Invalid argument          | 102   |                                                                            |
+| Unloaded                  | 103   | AppDomain has been unloaded                                                |
+| No Invocation             | 104   | Returned when trying to abort a thread which isn't in a runtime invocation |
+| Absent information        | 105   | Returned when a requested method debug information isn't available         |
+| No seq point at IL Offset | 106   | Returned when a breakpoint couldn't be set                                 |
+
+#### Data type marshalling
+
+| Name    | Size             | Description                                                                                                                                                             |
+|:--------|:-----------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| byte    | 1 byte           | A byte value                                                                                                                                                            |
+| short   | 2 byte           | A UInt16 value                                                                                                                                                          |
+| int     | 4 bytes          | A UInt32 value                                                                                                                                                          |
+| long    | 8 bytes          | A UInt64 value                                                                                                                                                          |
+| id      | 4 bytes          | The same size is used for all IDs (ObjectID, PointerID, TypeId, MethodID, AssemblyID, ModuleID, FieldID, PropertyID, DomainID)                                          |
+| string  | At least 4 bytes | A string consists of a leading int value giving the string size followed by *size* bytes of character data. Thus an empty string is simply a 4 bytes integer value of 0 |
+| variant | At least 1 byte  | A variant type is a special value which consists of a leading byte giving away the MonoType information of the variant followed directly by its raw value.              |
+| boolean | 4 bytes (an int) | Tough not strictly a type, a boolean is represented by an int value whose value is 1 for true and 0 for false.                                                          |
+
+Most of the encoding function for these types are defined as `buffer_add_*` functions starting from `debugger-agent.c:1429`. Their counterpart are of the form `decode_*` starting from `debugger-agent.c:1349`.
+
+A lot command returns or accepts fixed-length list of value. In these case, such a list is always prefixed with an int value giving its length followed by *length* element of the same type (which needs to be inferred from the context). When such a list is used the term "list" will be used. For clarification, an empty list is thus a single int value equals to 0.
+
+#### Various enumeration value definition
+
+For the record, the following C enumerations define the values used for flags, kind, ... parameters in some commands.
+
+``` c
+typedef enum {
+    EVENT_KIND_VM_START = 0,
+    EVENT_KIND_VM_DEATH = 1,
+    EVENT_KIND_THREAD_START = 2,
+    EVENT_KIND_THREAD_DEATH = 3,
+    EVENT_KIND_APPDOMAIN_CREATE = 4,
+    EVENT_KIND_APPDOMAIN_UNLOAD = 5,
+    EVENT_KIND_METHOD_ENTRY = 6,
+    EVENT_KIND_METHOD_EXIT = 7,
+    EVENT_KIND_ASSEMBLY_LOAD = 8,
+    EVENT_KIND_ASSEMBLY_UNLOAD = 9,
+    EVENT_KIND_BREAKPOINT = 10,
+    EVENT_KIND_STEP = 11,
+    EVENT_KIND_TYPE_LOAD = 12,
+    EVENT_KIND_EXCEPTION = 13,
+    EVENT_KIND_KEEPALIVE = 14,
+    EVENT_KIND_USER_BREAK = 15,
+    EVENT_KIND_USER_LOG = 16
+} EventKind;
+ 
+typedef enum {
+    SUSPEND_POLICY_NONE = 0,
+    SUSPEND_POLICY_EVENT_THREAD = 1,
+    SUSPEND_POLICY_ALL = 2
+} SuspendPolicy;
+ 
+typedef enum {
+    MOD_KIND_COUNT = 1,
+    MOD_KIND_THREAD_ONLY = 3,
+    MOD_KIND_LOCATION_ONLY = 7,
+    MOD_KIND_EXCEPTION_ONLY = 8,
+    MOD_KIND_STEP = 10,
+    MOD_KIND_ASSEMBLY_ONLY = 11,
+    MOD_KIND_SOURCE_FILE_ONLY = 12,
+    MOD_KIND_TYPE_NAME_ONLY = 13,
+    MOD_KIND_NONE = 14
+} ModifierKind;
+ 
+typedef enum {
+    STEP_DEPTH_INTO = 0,
+    STEP_DEPTH_OVER = 1,
+    STEP_DEPTH_OUT = 2
+} StepDepth;
+ 
+typedef enum {
+    STEP_SIZE_MIN = 0,
+    STEP_SIZE_LINE = 1
+} StepSize;
+ 
+typedef enum {
+    TOKEN_TYPE_STRING = 0,
+    TOKEN_TYPE_TYPE = 1,
+    TOKEN_TYPE_FIELD = 2,
+    TOKEN_TYPE_METHOD = 3,
+    TOKEN_TYPE_UNKNOWN = 4
+} DebuggerTokenType;
+ 
+typedef enum {
+    VALUE_TYPE_ID_NULL = 0xf0,
+    VALUE_TYPE_ID_TYPE = 0xf1,
+    VALUE_TYPE_ID_PARENT_VTYPE = 0xf2
+} ValueTypeId;
+ 
+typedef enum {
+    FRAME_FLAG_DEBUGGER_INVOKE = 1,
+
+    // Use to allow the debugger to display managed-to-native transitions in stack frames.
+    FRAME_FLAG_NATIVE_TRANSITION = 2
+} StackFrameFlags;
+ 
+typedef enum {
+    INVOKE_FLAG_DISABLE_BREAKPOINTS = 1,
+    INVOKE_FLAG_SINGLE_THREADED = 2,
+
+    // Allow for returning the changed value types after an invocation
+    INVOKE_FLAG_RETURN_OUT_THIS = 4,
+
+    // Allows the return of modified value types after invocation
+    INVOKE_FLAG_RETURN_OUT_ARGS = 8,
+
+    // Performs a virtual method invocation
+    INVOKE_FLAG_VIRTUAL = 16
+} InvokeFlags;
+```
+
+### Command list
+
+Types given in each command comments corresponds to the type described above. When there are additional arguments or multiple values in a command's reply, they are each time described in the order they appear or have to appear in the data part. Not also that there is no kind of separation sequence or added alignement padding between each value.
+
+In all cases, if you ask for a command that doesn't exist, a reply will be sent with an error code of NOT_IMPLEMENTED.
+
+#### Virtual machine commands
+
+| Name                      | Value | Action and type of reply                                                                                                                                                | Additional parameters                                                                                                                                                                                                                                                                                                                                                                          | Possible error code returned                                      |
+|:--------------------------|:------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------|
+| VERSION                   | 1     | Returns a mono virtual machine version information (string) followed by two int giving respectively the runtime major and minor version                                 | None                                                                                                                                                                                                                                                                                                                                                                                           | None                                                              |
+| ALL_THREADS               | 2     | Returns a list of ObjectID each mapping to a System.Threading.Thread instance.                                                                                          | None                                                                                                                                                                                                                                                                                                                                                                                           | None                                                              |
+| SUSPEND                   | 3     | Suspend the VM execution and returns an empty reply                                                                                                                     | None                                                                                                                                                                                                                                                                                                                                                                                           | None                                                              |
+| RESUME                    | 4     | Resume the VM execution and returns an empty reply                                                                                                                      | None                                                                                                                                                                                                                                                                                                                                                                                           | NOT_SUSPENDED                                                     |
+| EXIT                      | 5     | Stop VM and returns an empty reply                                                                                                                                      | Ask for a exit code (int) to be used by the VM when it exits                                                                                                                                                                                                                                                                                                                                   | None                                                              |
+| DISPOSE                   | 6     | Clear event requests, resume the VM and disconnect                                                                                                                      | None                                                                                                                                                                                                                                                                                                                                                                                           | None                                                              |
+| INVOKE_METHOD             | 7     | Returns a boolean telling if the call was successful followed by an exception object (as a variant) if it was not and by the actual returned value (variant) if it was. | Ask for an ObjectID (id) mapping to a System.Threading.Thread instance, a flags value (int) to pass to the invoke request, the MethodID (id) of the method to invoke, a variant value to be used as *this* (VALUE_TYPE_ID_NULL in case of a valuetype) and a list of variant value representing the parameters of the method.                                                                  | INVALID_OBJECT, NOT_SUSPENDED, INVALID_METHODID, INVALID_ARGUMENT |
+| SET_PROTOCOL_VERSION      | 8     | Returns an empty reply                                                                                                                                                  | Ask for two int giving respectively the major and minor version of the procotol to use.                                                                                                                                                                                                                                                                                                        | None                                                              |
+| ABORT_INVOKE              | 9     | Abort the invocation and returns an empty reply                                                                                                                         | Ask for an ObjectID (id) mapping to a System.Threading.Thread instance and the id (int) of the command packet that set up the invocation to cancel                                                                                                                                                                                                                                             | INVALID_OBJECT, NO_INVOCATION                                     |
+| SET_KEEPALIVE             | 10    | Set up the new keep alive value and returns an empty reply                                                                                                              | Ask for a timeout value (int)                                                                                                                                                                                                                                                                                                                                                                  | None                                                              |
+| GET_TYPES_FOR_SOURCE_FILE | 11    | Returns a list of TypeID (id) of class defined inside the supplied file name                                                                                            | Ask for a file name (string) and an ignore case flag (byte) although setting it to something different than 0 isn't currently supported.                                                                                                                                                                                                                                                       | None                                                              |
+| GET_TYPES                 | 12    | Returns a list of TypeID (id) of type which corresponds to the provided type name                                                                                       | Ask for type name (string) and a ignore case flag (byte) which acts like a boolean value                                                                                                                                                                                                                                                                                                       | INVALID_ARGUMENT                                                  |
+| INVOKE_METHODS            | 13    | Batch invocation of methods                                                                                                                                             | Ask for an ObjectID (id) mapping to a System.Threading.Thread instance, a flags value (int) to pass to the invoke request, the number of methods to invoke (int), and for each method the the MethodID (id) for each method to invoke, a variant value to be used as *this* (VALUE_TYPE_ID_NULL in case of a valuetype) and a list of variant value representing the parameters of the method. | INVALID_OBJECT, NOT_SUSPENDED, INVALID_METHODID, INVALID_ARGUMENT |
+| VM_START_BUFFERING        | 14    | Initiates the buffering of reply packets to improve latency. Must be paired with a VM_STOP_BUFFERING command                                                            | None                                                                                                                                                                                                                                                                                                                                                                                           | None                                                              |
+| VM_STOP_BUFFERING         | 15    | Ends the block of buffered commands, must come after a call to VM_START_BUFFERING                                                                                       | None                                                                                                                                                                                                                                                                                                                                                                                           | None                                                              |
+
+The main function handling these commands is `vm_commands` and is situated at `debugger-agent.c:5671`
+
+#### Events commands
+
+Events allows the debuggee to act on program execution (stepping) and also to set up things like breakpoints, watchpoints, exception catching, etc.
+
+| Name                          | Value | Type of reply                                        | Additional parameters                                                                                                                                                                             | Possible error code returned                                                                    |
+|:------------------------------|:------|:-----------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|
+| REQUEST_SET                   | 1     | Returns the request id (int)                         | Ask for 3 bytes giving the event kind (EventKind enumeration), suspend policy (SuspendPolicy enumeration) and a list of modifiers which content is context dependent and given in the table below | INVALID_METHODID, INVALID_TYPEID, NO_SEQ_POINT_AT_IL_OFFSET, INVALID_OBJECT, INVALID_ASSEMBLYID |
+| REQUEST_CLEAR                 | 2     | Clear the requested event and returns an empty reply | Ask for an event type (byte) and a request id (int)                                                                                                                                               | None                                                                                            |
+| REQUEST_CLEAR_ALL_BREAKPOINTS | 3     | Returns an empty reply                               | None                                                                                                                                                                                              | None                                                                                            |
+
+The main function handling these commands is `event_commands` and is situated at `debugger-agent.c:5916`
+
+Each modifier has the first byte describing the modification it's carrying out and corresponding to the values found in the ModifierKind enumeration. The following table list the remaining body depending on the modification value.
+
+| Mod value        | Body                                                                                                                                           |
+|:-----------------|:-----------------------------------------------------------------------------------------------------------------------------------------------|
+| COUNT            | a MethodID (id)                                                                                                                                |
+| LOCATION_ONLY    | a MethodID (id) and a location information (long)                                                                                              |
+| STEP             | A thread id, size of the step (int) corresponding to the StepSize enumeration and depth of it (int) corresponding to the StepDepth enumeration |
+| THREAD_ONLY      | A thread id                                                                                                                                    |
+| EXCEPTION_ONLY   | A TypeID representing a exception type and two byte values setting respectively the caught and uncaught filter                                 |
+| ASSEMBLY_ONLY    | A list of AssemblyID (id)                                                                                                                      |
+| SOURCE_FILE_ONLY | A list of source file name (string)                                                                                                            |
+| TYPE_NAME_ONLY   | A list of type name (string)                                                                                                                   |
+| NONE             |                                                                                                                                                |
+
+#### Thread commands
+
+Each command requires at least one ObjectID (of type id) parameter mapping to a thread instance before any additional parameter the command may require.
+
+| Name           | Value | Type of reply                                                                                         | Additional parameters                                                                                 | Possible error code returned |
+|:---------------|:------|:------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------|:-----------------------------|
+| GET_FRAME_INFO | 1     | Returns a list of quadruplet of frame ID (int), MethodID (id), IL offset (int) and frame flags (byte) | Ask for a start frame (currently other value than 0 aren't supported) as an int and a length as a int | INVALID_OBJECT               |
+| GET_NAME       | 2     | Returns the name of the thread as a string                                                            | None                                                                                                  | INVALID_OBJECT               |
+| GET_STATE      | 3     | Return the thread state as an int                                                                     | None                                                                                                  | INVALID_OBJECT               |
+| GET_INFO       | 4     | Returns a byte value telling if the thread is a threadpool thread (1) or not (0)                      | None                                                                                                  | INVALID_OBJECT               |
+| GET_ID         | 5     | Returns the thread id (address of the object) as a long                                               | None                                                                                                  | INVALID_OBJECT               |
+| GET_TID        | 6     | Returns the proper thread id (or TID) as a long                                                       | None                                                                                                  | INVALID_OBJECT               |
+| SET_IP         | 7     | Set the location where execution will return when this thread is resumed                              | Thread ID (int), Method ID (long), IL offset (long)                                                   | INVALID_ARGUMENT             |
+
+The main function handling these commands is `thread_commands` and is situated at `debugger-agent.c:6991`
+
+#### AppDomains commands
+
+| Name               | Value | Type of reply                                                   | Additional parameters                                                                                                                   | Possible error code returned     |
+|:-------------------|:------|:----------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------|
+| GET_ROOT_DOMAIN    | 1     | Returns the DomainID of the root domain                         | None                                                                                                                                    | None                             |
+| GET_FRIENDLY_NAME  | 2     | Returns the friendly name as a string of the provided DomainID  | Ask for a DomainID (id)                                                                                                                 | INVALID_DOMAINID                 |
+| GET_ASSEMBLIES     | 3     | Returns a list of AssemblyID contained inside this AppDomain    | Ask for a DomainID (id)                                                                                                                 | INVALID_DOMAINID                 |
+| GET_ENTRY_ASSEMBLY | 4     | Returns the entry AssemblyID of this domain                     | Ask for a DomainID (id)                                                                                                                 | INVALID_DOMAINID                 |
+| CREATE_STRING      | 5     | Returns the ObjectID of the created string                      | Ask for a DomainID (id) where to create the new string and a string typed value to put inside the domain                                | INVALID_DOMAINID                 |
+| GET_CORLIB         | 6     | Returns the AssemblyID of the load corlib inside this AppDomain | Ask for a DomainID (id)                                                                                                                 | INVALID_DOMAINID                 |
+| CREATE_BOXED_VALUE | 7     | Returns the ObjectID of the boxed value                         | Ask for a DomainID (id), TypeID of the type that is going to be boxed and a variant value which is going to be put into the boxed value | INVALID_DOMAINID, INVALID_TYPEID |
+
+The main function handling these commands is `domain_commands` and is situated at `debugger-agent.c:6104`
+
+#### Assembly commands
+
+Each command requires at least one AssemblyID (of type id) parameter before any additional parameter the command may require.
+
+| Name                | Value | Type of reply                                                                                                                 | Additional parameters                                                                                            | Possible error code returned |
+|:--------------------|:------|:------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------|:-----------------------------|
+| GET_LOCATION        | 1     | Returns the filename (string) of image associated to the assembly                                                             | None                                                                                                             | INVALID_ASSEMBLYID           |
+| GET_ENTRY_POINT     | 2     | Returns the MethodID (id) of the entry point or a 0 id if there is none (in case of dynamic assembly or library for instance) | None                                                                                                             | INVALID_ASSEMBLYID           |
+| GET_MANIFEST_MODULE | 3     | Returns the ModuleID (id) of the assembly                                                                                     | None                                                                                                             | INVALID_ASSEMBLYID           |
+| GET_OBJECT          | 4     | Returns the ObjectID of the AssemblyID object instance                                                                        | None                                                                                                             | INVALID_ASSEMBLYID           |
+| GET_TYPE            | 5     | Returns the TypeID of the found type or a null id if it wasn't found                                                          | Ask for a type information in form of a string and a byte value to tell if case should be ignored (1) or not (0) | INVALID_ASSEMBLYID           |
+| GET_NAME            | 6     | Return the full name of the assembly as a string                                                                              | None                                                                                                             | INVALID_ASSEMBLYID           |
+
+The main function handling these commands is `assembly_commands` and is situated at `debugger-agent.c:6203`
+
+#### Module commands
+
+| Name                | Value | Type of reply                                                                                                   | Additional parameters   | Possible error code returned |
+|:--------------------|:------|:----------------------------------------------------------------------------------------------------------------|:------------------------|:-----------------------------|
+| CMD_MODULE_GET_INFO | 1     | Returns the following strings: basename of the image, scope name, full name, GUID and the image AssemblyID (id) | Ask for a ModuleID (id) | None                         |
+
+The main function handling these commands is `module_commands` and is situated at `debugger-agent.c:6295`
+
+#### Method commands
+
+Each command requires at least one MethodID (of type id) parameter before any additional parameter the command may require.
+
+| Name                | Value | Type of reply                                                                                                                                                                                                                                                                  | Additional parameters                                                          | Possible error code returned       |
+|:--------------------|:------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------|:-----------------------------------|
+| GET_NAME            | 1     | Returns a string of the method name                                                                                                                                                                                                                                            | None                                                                           | INVALID_METHODID                   |
+| GET_DECLARING_TYPE  | 2     | Returns a TypeID of the declaring type for this method                                                                                                                                                                                                                         | None                                                                           | INVALID_METHODID                   |
+| GET_DEBUG_INFO      | 3     | Returns the code size of the method (int), source file name (string) and a list of tuple of IL offset (int) and line numbers (int) for the method                                                                                                                              | None                                                                           | INVALID_METHODID                   |
+| GET_PARAM_INFO      | 4     | Returns the call convention (int), parameter count (int), generic parameter count (int), TypeID of the returned value (id), *parameter count* TypeID for each parameter type and finally *parameter count* parameter name (string) for each parameter.                         | None                                                                           | INVALID_METHODID                   |
+| GET_LOCALS_INFO     | 5     | Returns the number of locals (int) followed by the TypeID (id) for each locals, followed by the name (string) of each locals (empty string if there is none) and finally followed by the scope of each locals which is a tuple of int giving the start address and end offset. | None                                                                           | INVALID_METHODID                   |
+| GET_INFO            | 6     | Returns 3 int representing respectively the method flags, implementation flags and token                                                                                                                                                                                       | None                                                                           | INVALID_METHODID                   |
+| GET_BODY            | 7     | Returns a list of byte corresponding to the method IL code.                                                                                                                                                                                                                    | None                                                                           | INVALID_METHODID                   |
+| RESOLVE_TOKEN       | 8     | Returns a variant value corresponding to the provided token                                                                                                                                                                                                                    | Ask for a token value (int)                                                    | INVALID_METHODID                   |
+| GET_CATTRS          | 9     | Returns the custom attributes for the methods                                                                                                                                                                                                                                  | Method ID, attribute-type ID                                                   | INVALID_METHODID,LOADER_ERROR      |
+| MAKE_GENERIC_METHOD | 10    | Makes a generic version of the method                                                                                                                                                                                                                                          | Method ID, number of type arguments (int), TypeID for each type argument (int) | INVALID_ARGUMENT, INVALID_METHODID |
+
+The main functions handling these commands are `method_commands` and `method_commands_internal` and are situated at `debugger-agent.c:6968` and `debugger-agent.c:6968` respectively.
+
+#### Type commands
+
+Each command requires at least one TypeID (of type id) parameter before any additional parameter the command may require.
+
+| Name                | Value | Type of reply                                                                                                                                                                                                                                                                                                                                                                       | Additional parameters                                                                                                                                                      | Possible error code returned                    |
+|:--------------------|:------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------|
+| GET_INFO            | 1     | Returns the following informations about the type in that order: namespace (string), class name (string), full name (string), AssemblyID (id), ModuleID (id), TypeID (id), TypeID (id) of underlying type (or a 0 id if there is none), type token (int), type rank (byte), type flags (int), underlying byval type (byte) flags (see after table) and a list of nested type TypeID | None                                                                                                                                                                       | INVALID_TYPEID                                  |
+| GET_METHODS         | 2     | Returns a list of MethodID corresponding to each of the method of the type                                                                                                                                                                                                                                                                                                          | None                                                                                                                                                                       | INVALID_TYPEID                                  |
+| GET_FIELDS          | 3     | Returns list of quadruplet of FieldID (id), field name (string), field TypeID (id), field attributes (int)                                                                                                                                                                                                                                                                          | None                                                                                                                                                                       | INVALID_TYPEID                                  |
+| GET_VALUES          | 4     | Returns a number of variant value equals to the number of FieldID that was passed as parameter. If the field had a ThreadStatic attribute applied to it, value fetched are from the current thread point of view.                                                                                                                                                                   | Ask for a list of FieldID representing this type static fields to the the value of. Only static field are supported.                                                       | INVALID_TYPEID, INVALID_FIELDID                 |
+| GET_OBJECT          | 5     | Returns an ObjectID corresponding to the type instance                                                                                                                                                                                                                                                                                                                              | None                                                                                                                                                                       | INVALID_TYPEID                                  |
+| GET_SOURCE_FILES    | 6     | Returns the same output than GET_SOURCE_FILES_2 except only the basename of each path is returned                                                                                                                                                                                                                                                                                   | None                                                                                                                                                                       | INVALID_TYPEID                                  |
+| SET_VALUES          | 7     | Returns an empty response                                                                                                                                                                                                                                                                                                                                                           | Ask for a list of tuple of FieldID and variant value. Only pure static field can be set (i.e. with no extra attribute like ThreadStatic).                                  | INVALID_TYPEID, INVALID_FIELDID                 |
+| IS_ASSIGNABLE_FROM  | 8     | Returns a boolean equals to true if the type is assignable from the other provided type, false otherwise                                                                                                                                                                                                                                                                            | Ask for an extra TypeID                                                                                                                                                    | INVALID_TYPEID                                  |
+| GET_PROPERTIES      | 9     | Returns a list of quadruplet of FieldID (id), get accessor MethodID (string), set accessor MethodID (id), property attributes (int)                                                                                                                                                                                                                                                 | None                                                                                                                                                                       | INVALID_TYPEID                                  |
+| GET_CATTRS          | 10    | Returns a list of custom attribute applied on the type. Custom attribute definition is given below.                                                                                                                                                                                                                                                                                 | Ask for a TypeID of an custom attribute type                                                                                                                               | INVALID_TYPEID                                  |
+| GET_FIELD_CATTRS    | 11    | Returns a list of custom attributes of a type's field. Custom attribute definition is given below.                                                                                                                                                                                                                                                                                  | Ask for a FieldID of one the type field and a TypeID of an custom attribute type                                                                                           | INVALID_TYPEID, INVALID_FIELDID                 |
+| GET_PROPERTY_CATTRS | 12    | Returns a list of custom attributes of a type's property. Custom attribute definition is given below.                                                                                                                                                                                                                                                                               | Ask for a PropertyID of one the type field and a TypeID of an custom attribute type                                                                                        | INVALID_TYPEID, INVALID_PROPERTYID              |
+| GET_SOURCE_FILES_2  | 13    | Returns a list of source file full paths (string) where the type is defined                                                                                                                                                                                                                                                                                                         | None                                                                                                                                                                       | INVALID_TYPEID                                  |
+| GET_VALUES_2        | 14    | Returns a number of variant value equals to the number of FieldID that was passed as parameter. If the field had a ThreadStatic attribute applied to it, value fetched are from the thread parameter point of view.                                                                                                                                                                 | Ask for an ObjectID representing a System.Thread instance and a list of FieldID representing this type static fields to the the value of. Only static field are supported. | INVALID_OBJECT, INVALID_TYPEID, INVALID_FIELDID |
+
+The main functions handling these commands are `type_commands` and `type_commands_internal` and are situated at `debugger-agent.c:6726` and `debugger-agent.c:6403` respectively.
+
+Byval flags is an indication of the type attribute for a parameter when it's passed by value. A description of these flags follows:
+
+| byte 1            | byte 2              | byte 3          | byte 4            | byte 5 | byte 6 | byte 7 | byte 8 |
+|:------------------|:--------------------|:----------------|:------------------|:-------|:-------|:-------|:-------|
+| Is a pointer type | Is a primitive type | Is a value type | Is an enumeration | Unused | Unused | Unused | Unused |
+
+Custom attribute definition is as follows: MethodID of the attribute ctor, a list of variant objects representing the typed arguments of the attribute prepended by a length attribute (int) and another list representing named arguments of which elements are either tuple of the constant 0x53 followed by a variant value (in case the named argument is a field) or a triplet of the constant 0x54 followed by a PropertyID followed by a variant value (in case the named argument is a property). In both list case, an empty list is simply one int of value 0.
+
+#### Stackframe commands
+
+Each command requires at least one ObjectID (of type id) parameter mapping to a System.Threading.Thread instance and a FrameID (of type id) before any additional parameter the command may require.
+
+| Name       | Value | Type of reply                                                                                                                                                                        | Additional parameters                                                                             | Possible error code returned                                          |
+|:-----------|:------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------|
+| GET_VALUES | 1     | Returns a list of miscelleanous typed values. If the position information was negative, the value corresponds to a parameter and if it was positive to a local variable.             | Ask for a list of position (int) information.                                                     | INVALID_OBJECT, INVALID_FRAMEID, ABSENT_INFORMATION                   |
+| GET_THIS   | 2     | Returns the *this* value prepended by a single byte value describing its type, or the special TYPE_ID_NULL (byte) value which is equal to 0xf0 in case there is no *this* parameter. | None                                                                                              | INVALID_OBJECT, INVALID_FRAMEID, ABSENT_INFORMATION                   |
+| SET_VALUES | 3     | Returns an empty reply                                                                                                                                                               | Ask for a list of pair of position (int) information and variant whose value is going to be used. | INVALID_OBJECT, INVALID_FRAMEID, ABSENT_INFORMATION, INVALID_ARGUMENT |
+
+The main function handling these commands is `frame_commands` and is situated at `debugger-agent.c:7082`
+
+#### Array commands
+
+Each command requires at least one ObjectID (of type id) parameter mapping to a System.Array instance before any additional parameter the command may require.
+
+| Name       | Value | Type of reply                                                                                                                                                                                                                                                                                                                                    | Additional parameters                                                                                                                                                                                               | Possible error code returned |
+|:-----------|:------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------|
+| GET_LENGTH | 1     | Returns an int corresponding to the array rank followed by a set of int pair corresponding respectively to the length and lower bound of each of the array dimensions. In case of a single dimensional zero-based array, the returned data amount to 3 int values with the second being the total length of the array and the third one being 0. | None                                                                                                                                                                                                                | INVALID_OBJECT               |
+| GET_VALUES | 2     | Returns a list of *length* elements which individual size in bytes depends on the underlying type of the System.Array instance.                                                                                                                                                                                                                  | Ask for an index (int) and a length (int) to determine the range of value to return                                                                                                                                 | INVALID_OBJECT               |
+| SET_VALUES | 3     | Return an empty reply                                                                                                                                                                                                                                                                                                                            | Ask for an index (int) and a length (int) to determine the range of value to set and a *length* number of trailing values whose type and byte size match those of the underlying type of the System.Array instance. | INVALID_OBJECT               |
+
+The main function handling these commands is `vm_commands` and is situated at `debugger-agent.c:5671`
+
+#### String commands
+
+Each command requires at least one ObjectID (of type id) parameter mapping to a System.String instance before any additional parameter the command may require.
+
+| Name       | Value | Type of reply                                                                                                      | Additional parameters                                                                   | Possible error code returned     |
+|:-----------|:------|:-------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:---------------------------------|
+| GET_VALUE  | 1     | Returns a UTF8-encoded string corresponding to the System.String instance with its length prepended as a int value | None                                                                                    | INVALID_OBJECT                   |
+| GET_LENGTH | 2     | Returns the length of a UTF8-encoded string corresponding to the System.String instance as an int value            | None                                                                                    | INVALID_OBJECT                   |
+| GET_CHARS  | 3     | Returns *length* short values each encoding a character of the string slice                                        | Ask for a start index (long) and a length parameter (long) of the string slice to take. | INVALID_OBJECT, INVALID_ARGUMENT |
+
+The main function handling these commands is `string_commands` and is situated at `debugger-agent.c:7293`
+
+#### Object commands
+
+Each command requires at least one ObjectID (of type id) parameter before any additional parameter the command may require.
+
+| Name         | Value | Type of reply                                                                                                     | Additional parameters                                                             | Possible error code returned              |
+|:-------------|:------|:------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------|
+| GET_TYPE     | 1     | Returns the TypeID as an id                                                                                       | None                                                                              | INVALID_OBJECT                            |
+| GET_VALUES   | 2     | Returns *length* values of miscellaneous type and size corresponding to the underlying type of each queried field | Ask for a list of FieldID to fetch value of                                       | INVALID_OBJECT, UNLOADED, INVALID_FIELDID |
+| IS_COLLECTED | 3     | Returns an int equals to 1 if the object has been collected by GC, 0 otherwise                                    | None                                                                              | None                                      |
+| GET_ADDRESS  | 4     | Returns a long value corresponding to the address where the object is stored in memory                            | None                                                                              | INVALID_OBJECT                            |
+| GET_DOMAIN   | 5     | Returns an id corresponding to the DomainID the object is located in                                              | None                                                                              | INVALID_OBJECT                            |
+| SET_VALUES   | 6     | Returns an empty reply                                                                                            | Ask for a list of tuple of FieldID (id) and of the value that should be set to it | INVALID_OBJECT, UNLOADED, INVALID_FIELDID |
+
+The main function handling these commands is `object_commands` and is situated at `debugger-agent.c:7318`
+
+#### Composite commands
+
+| Name      | Value | Description                                                              |
+|:----------|:------|:-------------------------------------------------------------------------|
+| COMPOSITE | 100   | This command is actually part of the event command set and is used for ? |
+
+## Differences with JDWP
+
+-   Handshake ASCII sequence is DWP-Handshake instead of JDWP-Handshake
+-   Some new Mono specific command set such as AppDomain, Assembly or Module and removal/renaming of some Java specific set such as InterfaceType, ThreadGroupReference, ClassLoaderReference, etc.
+-   Mono SDB protocol has its own specific ID types related to the new command sets.
+-   SDB protocol has less error code although some are Mono-specific like "No Invocation", "Absent Informations" and "No seq point at IL offset" codes.
diff --git a/docs/design/mono/web/soft-debugger.md b/docs/design/mono/web/soft-debugger.md
new file mode 100644
index 000000000000..4a9dbb36f542
--- /dev/null
+++ b/docs/design/mono/web/soft-debugger.md
@@ -0,0 +1,91 @@
+# Soft-Mode Debugger
+
+The Mono Soft Debugger is a new debugging framework for Mono. Unlike regular debuggers which act as all-knowing and controlling programs that control a separate process, the Mono Soft Debugger is actually a cooperative debugger that is built into the Mono runtime.
+
+Applications communicate with the Mono runtime and request debugging operations to be performed on the target process.
+
+ The Mono Soft Debugger first became available with Mono 2.6 and is primarily used today with [Mono on the iPhone](http://monotouch.net) and is used from the [MonoDevelop IDE](http://monodevelop.com).
+
+Architecture
+------------
+
+The following diagram is useful in the discussion of the soft debugger:
+
+[![0911030528Mp6F5SHL.png](images/0911030528Mp6F5SHL.png)](images/0911030528Mp6F5SHL.png)
+
+The soft debugger lives inside the Mono runtime. Debuggers communicate with this component with a compact protocol over a socket connection. For ease of use the protocol has been encapsulated in the Mono.Debugger.Soft.dll API which different IDEs can use to communicate with the target.
+
+The soft debugger work both with Just-in-Time compiled code, and with [batch compiled code](/docs/advanced/aot/) allowing it to debug both regular Mono applications on a desktop, or applications on devices like the iPhone or the [PlayStation 3](/docs/about-mono/supported-platforms/playstation3/).
+
+### Debugger Agent
+
+The debugger agent is a module inside the mono runtime which offers debugging services to client programs.
+
+### Wire Protocol
+
+Clients communicate with the agent using a wire protocol over a socket transport. Read our [Soft Debugger Wire Protocol](/docs/advanced/runtime/docs/soft-debugger-wire-format/) document for details about the protocol.
+
+The wire protocol is inspired by the [Java Debug Wire Protocol](http://java.sun.com/j2se/1.5.0/docs/guide/jpda/jdwp-spec.html).
+
+### Client library
+
+The client library is a C# assembly which uses the wire protocol to communicate with the debugger agent running inside the mono runtime. It is based on the [Java Debug Interface](http://java.sun.com/j2se/1.5.0/docs/guide/jpda/jdi/). The assembly is named Mono.Debugger.Soft.dll, and its source is in mcs/class/Mono.Debugger.Soft.
+
+Implementation
+--------------
+
+### Agent
+
+The source code is in mini/debugger-agent.{h,c}. Unlike the JDWP agent in Java, the debugger agent is tightly integrated with the mono runtime because mono doesn't have a tool interface with similar capabilities as JVMTI in Java.
+
+#### Design
+
+The design for the agent was to choose solutions which were easy to implement, they can be improved later. This means that some things like step out/over can be very slow, the code generated by the JIT when debugging is enabled is larger/slower etc.
+
+#### The debugger thread
+
+The agent starts its own thread which it uses to communicate with clients using the wire protocol.
+
+#### Event handling
+
+On startup, the agent registers callbacks for events using the mono profiler interface. When a callback is called, it searches the list of event requests for a request matching the event type. If one is found, the event is sent to the client using the wire protocol.
+
+#### Suspend/Resume
+
+Suspending/Resuming the runtime is the most complex part of the debugger agent. There are many complications: - threads running managed code/native code/transitioning between the two. - threads starting up/terminating. - multiple suspend/resume operations happening in parallel.
+
+Threads running native code can't be suspended, because they can hold locks which are needed by the debugger and the rest of the runtime to function. So they are left running, and are only suspended when they enter managed code. We save enough state at managed-\>native transitions to be able to produce stack traces and examine the state of stack frames. However, debugger invocations are not supported on threads which are running managed code, so property evaluation is not possible on these threads.
+
+A suspend can be started by a normal runtime thread when it receives an event which asks for the runtime to suspend, or it can be started by the debugger thread in response to a VM.Suspend command. In contrast, a resume can only be started by the debugger thread in response to a VM.Resume command.
+
+Threads running managed code are suspended by turning on single stepping, and suspending the thread when it reaches the single step event handler. Threads running native code are treated as suspended.
+
+A suspend can be started by calling suspend_vm (), which is an async operation. This means that when the client receives an event, the runtime might not be entirely suspended yet, so code which needs the runtime to be suspended like the stack frame processing code needs to call wait_for_suspend (). After starting a suspend, the thread needs to suspend itself by calling suspend_current ().
+
+#### Sequence points
+
+A sequence point is an IL offset where the program can be stopped and its state can be examined. Currently the debugger determines sequence points automatically. A sequence point is placed at the places:
+
+-   IL offsets where the IL stack is empty. This generally corresponds to the end of C# statements.
+-   IL offsets which contain the NOP IL instructions. This can be used by a compiler to insert extra sequence points, like between nested calls.
+-   IL offsets which have a corresponding line number entry in the .mdb file.
+
+The mdbdump tool in mcs/tools/mdbdump can be used to examine the line number tables inside an .mdb file.
+
+A sequence point is represented by the JIT opcode OP_SEQ_POINT. The JIT backends generate code from this opcode which implements single stepping/breakpoints.
+
+#### Single Stepping
+
+The implementation of single stepping is target specific. On most platforms, it is implemented by allocating a memory page and having the implementation of OP_SEQ_POINT read from that page. Single stepping is then turned on by read-protecting that page, causing the memory read to turn into a SIGSEGV or similar signal. The signal handler needs to determine whenever the signal was caused by access to this page, and if it is, transfer control to the single step handler code in the debugger agent.
+
+Step over/out is implemented by single stepping repeatedly until the condition becomes true (i.e. we reach a different line/parent frame).
+
+#### Breakpoints
+
+Breakpoints are usually implemented similarly to single stepping, by reading from a memory page. OP_SEQ_POINT generates a few nops to act as a placeholder, then the code to read from the trigger page is written to the JITted code when the breakpoint is enabled, and changed back to nops when the breakpoint is disabled.
+
+#### AOT support
+
+AOTed code can be debugged by compiling it with the 'soft-debug' aot option, i.e: mono --debug --aot=soft-debug foo.dll
+
+In the AOT case, the code can'be be patched at runtime, so breakpoints are implemented by reading from per-method table with one entry per sequence point, which is either NULL or points to the breakpoint trigger page.
diff --git a/docs/design/mono/web/thread-safety.md b/docs/design/mono/web/thread-safety.md
new file mode 100644
index 000000000000..6449866acff1
--- /dev/null
+++ b/docs/design/mono/web/thread-safety.md
@@ -0,0 +1,129 @@
+# Thread Safety/Synchronization
+
+Thread safety of metadata structures
+------------------------------------
+
+### Synchronization of read-only data
+
+Read-only data is data which is not modified after creation, like the actual binary metadata in the metadata tables.
+
+There are three kinds of threads with regards to read-only data:
+
+-   readers
+-   the creator of the data
+-   the destroyer of the data
+
+Most threads are readers.
+
+-   synchronization between readers is not necessary
+-   synchronization between the writers is done using locks.
+-   synchronization between the readers and the creator is done by not exposing the data to readers before it is fully constructed.
+-   synchronization between the readers and the destroyer: TBD.
+
+### Deadlock prevention plan
+
+Hold locks for the shortest time possible. Avoid calling functions inside locks which might obtain global locks (i.e. locks known outside this module).
+
+### Locks
+
+#### Simple locks
+
+There are a lot of global data structures which can be protected by a 'simple' lock. Simple means:
+
+-   the lock protects only this data structure or it only protects the data structures in a given C module. An example would be the appdomains list in domain.c
+-   the lock can span many modules, but it still protects access to a single resource or set of resources. An example would be the image lock, which protects all data structures that belong to a given MonoImage.
+-   the lock is only held for a short amount of time, and no other lock is acquired inside this simple lock. Thus there is no possibility of deadlock.
+
+Simple locks include, at least, the following :
+
+-   the per-image lock acquired by using mono_image_(un)lock functions.
+-   the threads lock acquired by using mono_threads_(un)lock.
+
+#### The loader lock
+
+This locks is held by class loading routines and any global synchronization routines. This is effectively the runtime global lock. Other locks can call code that acquire the loader lock out of order if the current thread already owns it.
+
+#### The domain lock
+
+Each appdomain has a lock which protects the per-domain data structures.
+
+#### The domain jit code hash lock
+
+This per-domain lock protects the JIT'ed code of each domain. Originally we used the domain lock, but it was split to reduce contention.
+
+#### Allocation locks and foreign locks
+
+Mono features a few memory allocation subsystems such as: a lock-free allocator, the GC. Those subsystems are designed so they don't rely on any of the other subsystems in the runtime. This ensures that locking within them is transparent to the rest of the runtime and are not covered here. It's the same rule  when dealing with locking that happens within libc.
+
+### The locking hierarchy
+
+It is useful to model locks by a locking hierarchy, which is a relation between locks, which is reflexive, transitive, and antisymmetric, in other words, a lattice. If a thread wants to acquire a lock B, while already holding A, it can only do it if A \< B. If all threads work this way, then no deadlocks can occur.
+
+Our locking hierarchy so far looks like this (if lock A is above lock B, then A \< B):
+
+        <LOADER LOCK>
+            \
+           <DOMAIN LOCK>
+            \                 \                  \
+           <DOMAIN JIT LOCK> <SIMPLE LOCK 1>    <SIMPLE LOCK 2>
+
+For example: if a thread wants to hold a domain jit lock, a domain lock and the loader lock, it must acquire them in the order: loader lock, domain lock, domain jit lock.
+
+### Notes
+
+Some common scenarios:
+
+-   if a function needs to access a data structure, then it should lock it itself, and do not count on its caller locking it. So for example, the image-\>class_cache hash table would be locked by mono_class_get().
+
+-   there are lots of places where a runtime data structure is created and stored in a cache. In these places, care must be taken to avoid multiple threads creating the same runtime structure, for example, two threads might call mono_class_get () with the same class name. There are two choices here:
+
+<!-- -->
+
+        <enter mutex>
+        <check that item is created>
+        if (created) {
+            <leave mutex>
+            return item
+        }
+        <create item>
+        <store it in cache>
+        <leave mutex>
+
+This is the easiest solution, but it requires holding the lock for the whole time which might create a scalability problem, and could also lead to deadlock.
+
+        <enter mutex>
+        <check that item is created>
+        <leave mutex>
+        if (created) {
+            return item
+        }
+        <create item>
+        <enter mutex>
+        <check that item is created>
+        if (created) {
+            /* Another thread already created and stored the same item */
+            <free our item>
+            <leave mutex>
+            return orig item
+        }
+        else {
+            <store item in cache>
+            <leave mutex>
+            return item
+        }
+
+This solution does not present scalability problems, but the created item might be hard to destroy (like a MonoClass). If memory is allocated from a mempool, that memory is leaked, but the leak is very rare and it is bounded.
+
+-   lazy initialization of hashtables etc. is not thread safe
+
+[Original version of this document in git](https://github.com/mono/mono/blob/8f91e420d7fbbab7da758e57160d1d762129f38a/docs/thread-safety.txt)
+
+### The Lock Tracer
+
+Mono now have a lock tracer that allows to record the locking behavior of the runtime during execution and later verify it's correctness.
+
+To enable lock tracer support define LOCK_TRACER in mono/mono/metadata/lock-tracer.h and recompile mono. To enable it at runtime define the MONO_ENABLE_LOCK_TRACER environment variable.
+
+The lock tracer produces a file in the same directory of the application, it's named 'lock.ZZZ' where ZZZ is the pid of the mono process.
+
+After producing such lock file, run the trace decoder that can be found in mono/data/lock-decoder. It currently only works on linux and macOS, it requires binutils to be installed. The decoder will report locking errors specifying the functions that caused it.
diff --git a/docs/design/mono/web/trampolines.md b/docs/design/mono/web/trampolines.md
new file mode 100644
index 000000000000..a1ad2b70b5b3
--- /dev/null
+++ b/docs/design/mono/web/trampolines.md
@@ -0,0 +1,75 @@
+# Trampolines
+
+Trampolines are small, hand-written pieces of assembly code used to perform various tasks in the mono runtime. They are generated at runtime using the native code generation macros used by the JIT. They usually have a corresponding C function they can fall back to if they need to perform a more complicated task. They can be viewed as ways to pass control from JITted code back to the runtime.
+
+The common code for all architectures is in mini-trampolines.c, this file contains the trampoline creation functions plus the C functions called by the trampolines. The tramp-\<ARCH\>.c files contain the arch-dependent code which creates the trampolines themselves.
+
+Most, but not all trampolines consist of two parts:
+
+-   a generic part containing most of the code. This is created by the mono_arch_create_trampoline_code () function in tramp-\<ARCH\>.c. Generic trampolines can be large (1kb).
+-   a specific part whose job is to call the generic part, passing in a parameter. The parameter to pass and the method by it is passed depends on the type of the trampoline. Specific trampolines are created by the mono_arch_create_specific_trampoline () function in tramp-\<ARCH\>.c. Specific trampolines are small, since the runtime creates lots of them.
+
+The generic part saves the machine state to the stack, and calls one of the trampoline functions in mini-trampolines.c with the state, the call site, and the argument passed by the specific trampoline. After the C function returns, it either returns normally, or branches to the address returned by the C function, depending on the trampoline type.
+
+Trampoline types are given by the MonoTrampolineType enumeration in [mini.h](https://github.com/mono/mono/blob/main/mono/mini/mini.h).
+
+The platform specific code for trampolines is in the file tramp-\<ARCH\>.c for each architecture, while the cross platform code is in mini-trampolines.c. There are two types of functions in mini-trampolines.c:
+
+-   The actual C functions called by the trampolines.
+-   Functions to create the different trampolines types.
+
+Trampoline creation functions have the following signature:
+
+``` bash
+gpointer
+mono_arch_create_foo_trampoline (<args>, MonoTrampInfo **info, gboolean aot)
+```
+
+The function should return a pointer to the newly created trampoline, allocating memory from either the global code manager, or from a domain's code manager. If INFO is not NULL, it is set to a pointer to a MonoTrampInfo structure, which contains information about the trampoline, like its name, unwind info, etc. This is used for two purposes:
+
+-   Saving the trampoline info an AOT image in 'full-aot' mode.
+-   Saving debug info about the trampoline in XDEBUG mode.
+
+### JIT Trampolines
+
+These trampolines are used to JIT compile a method the first time it is called. When the JIT compiles a call instruction, it doesn't compile the called method right away. Instead, it creates a JIT trampoline, and emits a call instruction referencing the trampoline. When the trampoline is called, it calls mono_magic_trampoline () which compiles the target method, and returns the address of the compiled code to the trampoline which branches to it. This process is somewhat slow, so mono_magic_trampoline () tries to patch the calling JITted code so it calls the compiled code instead of the trampoline from now on. This is done by mono_arch_patch_callsite () in tramp-\<ARCH\>.c.
+
+### Virtual Call Trampolines
+
+There is one virtual call trampoline per vtable slot index. The trampoline uses this index plus the 'this' argument which is passed in a fixed register/stack slots by the managed calling convention to obtain the virtual method which needs to be compiled. It then patches the vtable slot with the address of the newly compiled method.
+
+\<TODO IMT\>
+
+### Jump Trampolines
+
+Jump trampolines are very similar to JIT trampolines, they even use the same mono_magic_trampoline () C function. They are used to implement the LDFTN and the JMP IL opcodes.
+
+### Class Init Trampolines
+
+These trampolines are used to implement the type initialization sematics of the CLI spec. They call the mono_class_init_trampoline () C function which executes the class initializer of the class passed as the trampoline argument, then replaces the code calling the class init trampoline with NOPs so it is not executed anymore.
+
+### Generic Class Init Trampoline
+
+This is similar to the class init trampolines, but is used for initalizing classes which are only known at run-time, in generic-shared code. It receives the class to be initialized in a register instead of from a specific trampoline. This means there is only one instance of this trampoline.
+
+### RGCTX Lazy Fetch Trampolines
+
+These are used for fetching values from a runtime generic context, lazily initializing the values if they do not exist yet. There is one instance of this trampoline for each offset value.
+
+### AOT Trampolines
+
+These are similar to the JIT trampolines but instead of receiving a MonoMethod to compile, they receive an image+token pair. If the method identified by this pair is also AOT compiled, the address of its compiled code can be obtained without loading the metadata for the method.
+
+### AOT PLT Trampolines
+
+These trampolines handle calls made from AOT code though the PLT.
+
+### Delegate Trampolines
+
+These trampolines are used to handle the first call made to the delegate though its Invoke method. They call mono_delegate_trampoline () which creates a specialized calling sequence optimized to the delegate instance before calling it. Further calls will go through to this optimized code sequence.
+
+### Monitor Enter/Exit Trampolines
+
+These trampolines implement the fastpath of Monitor.Enter/Exit on some platforms.
+
+\<TODO REMAINING TRAMPOLINE TYPES\>
diff --git a/docs/infra/automation.md b/docs/infra/automation.md
index 6b15e2a91716..a4ed601cf33e 100644
--- a/docs/infra/automation.md
+++ b/docs/infra/automation.md
@@ -1,13 +1,9 @@
 ## Automation
 
-### Fabric Bot
+### Policy Service Bot
 
-This repository uses Fabric Bot to automate issue and pull request management. All automation rules are defined in the [`.github/fabricbot.json`](../../.github/fabricbot.json) file.
+This repository uses the Policy Service bot to automate issue and pull request management. All automation rules are defined in the [`.github/policies`](../../.github/policies) folder.
 
 #### Notifications
 
-You are welcome to enable notifications for yourself for one or more areas. You will be tagged whenever there are new issues and PR's in the area. You do not need to have commit access for this. To add or remove notifications for yourself, please offer a PR that edits the "mentionees" value for that area. [Here is an example](https://github.com/dotnet/runtime/commit/c28b13f0cf4e2127a74285b65188413ca7e677d4).
-
-#### Other changes
-
-For any other changes, you will need access to the [`Fabric Bot portal`](https://portal.fabricbot.ms/bot/) which is only available to Microsoft employees at present. Ensure you are signed out from the portal, choose "Import Configuration" option and make changes using the editor. It's necessary to use the portal because there is at present no published JSON schema for the configuration format.
+You are welcome to enable notifications for yourself for one or more areas. You will be tagged whenever there are new issues and PR's in the area. You do not need to have commit access for this. To add or remove notifications for yourself, please offer a PR that edits the "mentionees" value for that area in the policy YAML file.
diff --git a/docs/project/glossary.md b/docs/project/glossary.md
index c1ff2d29db01..8e1de1bef964 100644
--- a/docs/project/glossary.md
+++ b/docs/project/glossary.md
@@ -25,6 +25,7 @@ terminology.
 | EE | [Execution Engine](https://docs.microsoft.com/dotnet/standard/managed-execution-process#running_code). |
 | GC | [Garbage Collector](https://github.com/dotnet/runtime/blob/main/docs/design/coreclr/botr/garbage-collection.md). |
 | IBC | Instrumented Block Counts - used as extension (`*.ibc`) for old PGO files. |
+| IJW | "It Just Works" - Codename for [C++/CLI](https://learn.microsoft.com/cpp/dotnet/dotnet-programming-with-cpp-cli-visual-cpp) managed/native interop |
 | IPC | Inter-Process Communication. |
 | IL | Intermediate Language. Equivalent to CIL, also equivalent to [MSIL](https://docs.microsoft.com/dotnet/standard/managed-execution-process#compiling-to-msil). |
 | JIT | [Just-in-Time](https://github.com/dotnet/runtime/blob/main/docs/design/coreclr/jit/ryujit-overview.md) compiler. RyuJIT is the code name for the next generation Just-in-Time(aka "JIT") for the .NET runtime. |
diff --git a/docs/project/list-of-diagnostics.md b/docs/project/list-of-diagnostics.md
index 4cae3a85a87d..9ad0f02f5f88 100644
--- a/docs/project/list-of-diagnostics.md
+++ b/docs/project/list-of-diagnostics.md
@@ -108,6 +108,7 @@ The PR that reveals the implementation of the `<IncludeInternalObsoleteAttribute
 |  __`SYSLIB0051`__ | This API supports obsolete formatter-based serialization. It should not be called or extended by application code. |
 |  __`SYSLIB0052`__ | This API supports obsolete mechanisms for Regex extensibility. It is not supported. |
 |  __`SYSLIB0053`__ | AesGcm should indicate the required tag size for encryption and decryption. Use a constructor that accepts the tag size. |
+|  __`SYSLIB0054`__ | Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead. |
 
 ## Analyzer Warnings
 
diff --git a/docs/project/public-signing.md b/docs/project/public-signing.md
index 5e40bc5d4020..c9482e79d532 100644
--- a/docs/project/public-signing.md
+++ b/docs/project/public-signing.md
@@ -17,6 +17,7 @@ Known issues when debugging and testing public signed assemblies on .NET Framewo
 - You will not be able to load the assembly in an AppDomain where shadow copying is turned on.
 - You will not be able to load the assembly in a partially trusted AppDomain
 - You will not be able to pre-compile ASP.NET applications
+- You may not be able to load the assembly if the `app.config` file specifies `<configuration>`/`<runtime>`/`<NetFx40_LegacySecurityPolicy enabled="true" />`
 
 The `corflags.exe` tool that ships with the .NET Framework SDK can show whether a binary is delay-signed or strong-named. For a delay-signed assembly it may show:
 
diff --git a/docs/workflow/building/coreclr/macos-instructions.md b/docs/workflow/building/coreclr/macos-instructions.md
index 8deaf4578bcc..7ac0d0c6e0f8 100644
--- a/docs/workflow/building/coreclr/macos-instructions.md
+++ b/docs/workflow/building/coreclr/macos-instructions.md
@@ -33,6 +33,16 @@ It is possible to get a macOS ARM64 build using an Intel x64 Mac and vice versa,
 
 The Core_Root provides one of the main ways to test your build. Full instructions on how to build it in the [CoreCLR testing doc](/docs/workflow/testing/coreclr/testing.md), and we also have a detailed guide on how to use it for your own testing in [its own dedicated doc](/docs/workflow/testing/using-corerun-and-coreroot.md).
 
+## Debugging information
+
+The build process puts native component symbol and debugging information into `.dwarf` files, one for each built binary. This is not the native format used by macOS, and debuggers like LLDB can't automatically find them. The native format used by macOS is `.dSYM` bundles. To build `.dSYM` bundles and get a better inner-loop developer experience on macOS (e.g., have the LLDB debugger automatically find program symbols and display source code lines, etc.), build as follows:
+
+```bash
+./build.sh --subset clr --cmakeargs "-DCLR_CMAKE_APPLE_DSYM=TRUE"
+```
+
+(Note: converting the entire build process to build and package `.dSYM` bundles on macOS by default is tracked by [this](https://github.com/dotnet/runtime/issues/92911) issue.)
+
 ## Native Sanitizers
 
 CoreCLR can be built with native sanitizers like AddressSanitizer to help catch memory safety issues. To build the project with native sanitizers, add the `-fsanitize address` argument to the build script like the following:
diff --git a/docs/workflow/building/coreclr/nativeaot.md b/docs/workflow/building/coreclr/nativeaot.md
index 2a712ca84448..ec70d809a14e 100644
--- a/docs/workflow/building/coreclr/nativeaot.md
+++ b/docs/workflow/building/coreclr/nativeaot.md
@@ -78,8 +78,6 @@ You should now be able to publish the project for Wasm: `dotnet publish --self-c
 
 The paths to major components can be overridden using `IlcToolsPath`, `IlcSdkPath`, `IlcFrameworkPath`, `IlcFrameworkNativePath` and `IlcMibcPath` properties for `dotnet publish`. For example, `/p:IlcToolsPath=<repo root>\artifacts\bin\coreclr\windows.x64.Debug\ilc` can be used to override the compiler with a local debug build for troubleshooting or quick iterations.
 
-The component that writes out object files (objwriter.dll/libobjwriter.so/libobjwriter.dylib) is based on LLVM and doesn't build in the runtime repo. It gets published as a NuGet package out of the [dotnet/llvm-project](https://github.com/dotnet/llvm-project) repo (branch [objwriter/12.x](https://github.com/dotnet/llvm-project/tree/objwriter/12.x)). If you're working on ObjWriter or bringing up a new platform that doesn't have ObjWriter packages yet, as additional pre-requisites you need to build objwriter out of that repo and replace the file in the output.
-
 ### Building packages
 
 Run `build[.cmd|.sh] -c Release` from the repo root to build the NativeAOT toolchain packages. The build will place the toolchain packages at `artifacts\packages\Release\Shipping`. To publish your project using these packages:
diff --git a/docs/workflow/ci/failure-analysis.md b/docs/workflow/ci/failure-analysis.md
index 57917c841316..4b3e96334277 100644
--- a/docs/workflow/ci/failure-analysis.md
+++ b/docs/workflow/ci/failure-analysis.md
@@ -12,6 +12,19 @@
 
 ## Triaging errors seen in CI
 
+## Summary
+
+**Passing Build Analysis is required to merge into the runtime repo**.
+
+To resolve failures, do the following, in order:
+
+1. Fix the problem if your PR is the cause.
+2. For all failures not in the "Known test errors" section, [try to file a Known Build Error issue](#what-to-do-if-you-determine-the-failure-is-unrelated).
+3. If all else fails, perform a [manual bypass](#bypassing-build-analysis).
+
+
+## Details
+
 In case of failure, any PR on the runtime will have a failed GitHub check - PR Build Analysis - which has a summary of all failures, including a list of matching  known issues as well as any regressions introduced to the build or the tests. This tab should be your first stop for analyzing the PR failures.
 
 ![Build analysis check](analysis-check.png)
@@ -78,6 +91,7 @@ If you have considered all the diagnostic artifacts and determined the failure i
     ````
     It already contains most of the essential information, but *it is very important that you fill out the json blob*.
 
+    - You can now use the [Build Analysis Known Issue Helper](https://helix.dot.net/BuildAnalysis/CreateKnownIssues) to create an issue. It assists in adding the right set of labels, fill the necessary paths in the json blob, and it will validate that it matches the text presented for the issue found in the logs.
     - You can add into the `ErrorMessage` field the string that you found uniquely identifies the issue. In case you need to use a regex, use the `ErrorPattern` field instead. This is a limited to a single-line, non-backtracking regex as described [here](https://github.com/dotnet/arcade/blob/main/Documentation/Projects/Build%20Analysis/KnownIssues.md#regex-matching). This regex also needs to be appropriately escaped. Check the [arcade known issues](https://github.com/dotnet/arcade/blob/main/Documentation/Projects/Build%20Analysis/KnownIssues.md#filling-out-known-issues-json-blob) documentation for a good guide on proper regex and JSON escaping.
     - The field `ExcludeConsoleLog` describes if the execution logs should be considered on top of the individual test results. **For most cases, this should be set to `true` as the failure will happen within a single test**. Setting it to `false` will mean all failures within an xUnit set of tests will also get attributed to this particular error, since there's one log describing all the problems. Due to limitations in Known Issues around rate limiting and xUnit resiliency, setting `ExcludeConsoleLog=false` is necessary in two scenarios:
       + Nested tests as reported to Azure DevOps. Essentially this means theory failures, which look like this when reported in Azure DevOps: ![xUnit theory seen in azure devops](theory-azdo.png).
@@ -85,7 +99,11 @@ If you have considered all the diagnostic artifacts and determined the failure i
       + Native crashes in libraries also require using the console log. This is needed as the crash corrupts the test results to be reported to Azure DevOps, so only the console logs are left.
     - Optionally you can add specifics as needed like leg, configuration parameters, available dump links.
 
-Once the issue is open, feel free to rerun the `Build Analysis` check and the issue should be recognized as known if all was filed correctly and you are ready to merge once all unrelated issues are marked as known. However, there are some known limitations to the system as previously described. Additionally, the system only looks at the error message the stacktrace fields of an Azure DevOps test result, and the console log in the helix queue. If rerunning the check doesn't pick up the known issue and you feel it should, feel free to tag  @dotnet/runtime-infrastructure to request infrastructure team for help.
+Once the issue is open, feel free to rerun the `Build Analysis` check and the issue should be recognized as known if all was filed correctly and you are ready to merge once all unrelated issues are marked as known. However, there are some known limitations to the system as previously described. Additionally, the system only looks at the error message the stacktrace fields of an Azure DevOps test result, and the console log in the helix queue.
+
+The `Build Analysis` requests are sent to a queue. In certain scenarios, this queue can have many items to process and it can take a while for the status to be updated. If you do not see the status getting updated, be patient and wait at least 10 minutes before investigating further.
+
+If rerunning the check doesn't pick up the known issue and you feel it should, feel free to tag  @dotnet/runtime-infrastructure to request infrastructure team for help.
 
 After you do this, if the failure is occurring frequently as per the data captured in the recently opened issue, please disable the failing test(s) with the corresponding tracking issue link in a follow-up Pull Request.
 
@@ -95,6 +113,18 @@ After you do this, if the failure is occurring frequently as per the data captur
 
 There are plenty of intermittent failures that won't manifest again on a retry. Therefore these steps should be followed for every iteration of the PR build, e.g. before retrying/rebuilding.
 
+### Bypassing build analysis
+
+To unconditionally bypass the build analysis check (turn it green), you can add a comment to your PR with the following text:
+
+```
+/ba-g <reason>
+```
+
+The `Build Analysis` requests are sent to a queue. In certain scenarios, this queue can have many items to process and it can take a while for the status to be updated. If you do not see the status getting updated, be patient and wait at least 10 minutes before investigating further.
+
+For more information, see https://github.com/dotnet/arcade/blob/main/Documentation/Projects/Build%20Analysis/EscapeMechanismforBuildAnalysis.md
+
 ### Examples of Build Analysis
 
 #### Good usage examples
diff --git a/docs/workflow/ci/pr-guide.md b/docs/workflow/ci/pr-guide.md
index b2698ba5e489..e48dfe0fb7f1 100644
--- a/docs/workflow/ci/pr-guide.md
+++ b/docs/workflow/ci/pr-guide.md
@@ -15,7 +15,7 @@ To merge pull requests, you must have write permissions in the repository. If yo
 
 ## Pull Request Ownership
 
-Every pull request will have automatically a single `area-*` label assigned. The label not only indicates the code segment which the change touches but also the owner. We maintain a list of [areas owners](area-owners.md) for all dotnet/runtime labels. They are responsible for landing pull requests in their area in a timely manner and for helping contributors with their submitted pull request. You can ask them for assistance if you need help with landing your changes.
+Every pull request will have automatically a single `area-*` label assigned. The label not only indicates the code segment which the change touches but also the owner. We maintain a list of [areas owners](../../area-owners.md) for all dotnet/runtime labels. They are responsible for landing pull requests in their area in a timely manner and for helping contributors with their submitted pull request. You can ask them for assistance if you need help with landing your changes.
 
 If during the code review process a merge conflict occurs the area owner is responsible for its resolution. Pull requests should not be on hold due to the author's unwillingness to resolve code conflicts. GitHub makes this easier by allowing simple conflict resolution using the [conflict-editor](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/resolving-a-merge-conflict-on-github).
 
diff --git a/docs/workflow/ci/triaging-failures.md b/docs/workflow/ci/triaging-failures.md
index 1baa56052774..bf5e80f7522e 100644
--- a/docs/workflow/ci/triaging-failures.md
+++ b/docs/workflow/ci/triaging-failures.md
@@ -8,7 +8,7 @@ stress mode test configuration failures, such as failures in a JIT stress test r
 
 One goal of failure investigation is to quickly route failures to the correct area owner. The ownership of various product areas
 is detailed [here](../../area-owners.md). The GitHub auto-tagging bot uses the ownership information
-in the file [fabricbot.json](../../../.github/fabricbot.json).
+in the file [Policy Service configuration](../../../.github/policies).
 
 ## Platform configuration
 
diff --git a/docs/workflow/debugging/coreclr/debugging-aot-compilers.md b/docs/workflow/debugging/coreclr/debugging-aot-compilers.md
index 341e5489548e..7896e1b8bb50 100644
--- a/docs/workflow/debugging/coreclr/debugging-aot-compilers.md
+++ b/docs/workflow/debugging/coreclr/debugging-aot-compilers.md
@@ -85,7 +85,7 @@ The object files generated by the ILC compiler contain debug information for met
 
 The ILC compiler typically compiles the whole program - it loosely corresponds to the composite mode of crossgen2. There is a multifile mode, where each managed assembly corresponds to a single object file, but this mode is not shipping.
 
-The object files generated by the ILC compiler are written out using an LLVM-based object writer (consumed as a NuGet package built out of the dotnet/llvm-project repo, branch objwriter/12.x). The object writer uses the LLVM assembler APIs (APIs meant to be used by tools that convert textual assembly into machine code) to emit object files in PE/ELF/Mach-O formats.
+The supported object files generated by the ILC compiler are PE/ELF/Mach-O formats.
 
 ## Example of debugging a test application in Crossgen2
 
diff --git a/docs/workflow/debugging/coreclr/debugging-runtime.md b/docs/workflow/debugging/coreclr/debugging-runtime.md
index dd92fe93cfaf..6edce3c7646e 100644
--- a/docs/workflow/debugging/coreclr/debugging-runtime.md
+++ b/docs/workflow/debugging/coreclr/debugging-runtime.md
@@ -150,7 +150,7 @@ It might also be the case that you would need the latest changes in SOS, or you'
 **NOTE**: Only `lldb` is supported to use with SOS. You can also use `gdb`, `cgdb`, or other debuggers, but you might not have access to SOS.
 
 1. Perform a build of the _clr_ subset of the runtime repo.
-2. Start lldb passing `corerun`, the app to run (e.g. `HelloWorld.dll`), and any arguments this app might need: `lldb /path/to/corerun /path/to/app.dll <app args go here>`
+2. Start lldb passing `corerun`, the app to run (e.g. `HelloWorld.dll`), and any arguments this app might need: `lldb -- /path/to/corerun /path/to/app.dll <app args go here>`
 3. If you're using the installed version of SOS, you can skip this step. If you built SOS manually, you have to load it before starting the debugging session: `plugin load /path/to/built/sos/libsosplugin.so`. Note that `.so` is for Linux, and `.dylib` is for macOS. You can find more information in the diagnostics repo [private sos build doc](https://github.com/dotnet/diagnostics/blob/main/documentation/using-sos-private-build.md).
 4. Launch program: `process launch -s`
 5. To stop breaks on _SIGUSR1_ signals used by the runtime run the following command: `process handle -s false SIGUSR1`
diff --git a/docs/workflow/debugging/mono/android-debugging.md b/docs/workflow/debugging/mono/android-debugging.md
index 918ac1503efa..7e86eb775324 100644
--- a/docs/workflow/debugging/mono/android-debugging.md
+++ b/docs/workflow/debugging/mono/android-debugging.md
@@ -57,25 +57,27 @@ Since you're debugging an optimized release build, it is likely the debugger wil
 
 ## Native debugging using a local debug build of Mono
 
-Build the runtime for your android architecture: `ANDROID_NDK_ROOT=<path_to_android_ndk> ./build.sh --os android --arch x86 -c Debug`. See the instructions for [Testing Android](../../testing/libraries/testing-android.md) for details.
+Ensure the prerequisites are met for [Testing Android](../../testing/libraries/testing-android.md#prerequisites).
 
+Build the runtime for your android architecture `<ANDROID_ARCH>` and keep debug symbols in the binary:
 
-In the source code for the C# project, add the following to the .csproj (replacing `<RUNTIME_GIT_ROOT>` by the appropriate location):
+`./build.sh -s mono+libs -os android -arch <ANDROID_ARCH> -c Debug /p:KeepNativeSymbols=true`
+
+In the source code for the C# project, add the following to the .csproj (replacing `<RUNTIME_GIT_ROOT>` by the appropriate location and `<ANDROID_ARCH>` with the built android architecture):
 
 ```
   <Target Name="UpdateRuntimePack"
             AfterTargets="ResolveFrameworkReferences">
       <ItemGroup>
-        <ResolvedRuntimePack PackageDirectory="<RUNTIME_GIT_ROOT>/artifacts/bin/microsoft.netcore.app.runtime.android-x86/Debug"
+        <ResolvedRuntimePack PackageDirectory="<RUNTIME_GIT_ROOT>/artifacts/bin/microsoft.netcore.app.runtime.android-<ANDROID_ARCH>/Debug"
                              Condition="'%(ResolvedRuntimePack.FrameworkName)' == 'Microsoft.NETCore.App'" />
       </ItemGroup>
   </Target>
 ```
 
-Then rebuild and reinstall the project, open the apk in Android Studio, and debug.  The
-runtime native libraries will be stripped, so to make use of debug symbols, you
-will need to follow the steps above (rename `*.so.dbg` in the artifacts to
-`*.so.so` and add them to the APK project in Android Studio)
+Then rebuild and reinstall the project, open the apk in Android Studio (File > Profile or Debug APK), and debug.
+
+Note: If debugging in Android Studio stops at signals `SIGPWR` and `SIGXCPU` during startup, configure LLDB to not stop the process for those signals via `process handle -p true -s false -n true SIGPWR` and `process handle -p true -s false -n true SIGXCPU` in Android Studio's LLDB tab.
 
 ## Native and managed debugging or debugging the managed debugger
 
diff --git a/docs/workflow/debugging/mono/wasm-debugging.md b/docs/workflow/debugging/mono/wasm-debugging.md
index 59014ef147e2..80b06319eb36 100644
--- a/docs/workflow/debugging/mono/wasm-debugging.md
+++ b/docs/workflow/debugging/mono/wasm-debugging.md
@@ -180,8 +180,8 @@ $func166 @ dotnet.wasm:0xba0a
 $func2810 @ dotnet.wasm:0xabacf
 $func1615 @ dotnet.wasm:0x6f8eb
 $func1619 @ dotnet.wasm:0x6ff58
-$mono_wasm_invoke_method @ dotnet.wasm:0x96c9
-Module._mono_wasm_invoke_method @ dotnet.6.0.1.hopd7ipo8x.js:1
+$mono_wasm_invoke_jsexport @ dotnet.wasm:0x96c9
+Module.mono_wasm_invoke_jsexport @ dotnet.6.0.1.hopd7ipo8x.js:1
 managed__Microsoft_AspNetCore_Components_WebAssembly__Microsoft_AspNetCore_Components_WebAssembly_Services_DefaultWebAssemblyJSRuntime_BeginInvokeDotNet @ managed__Microsoft_AspNetCore_Components_WebAssembly__Microsoft_AspNetCore_Components_WebAssembly_Services_DefaultWebAssemblyJSRuntime_BeginInvokeDotNet:19
 beginInvokeDotNetFromJS @ blazor.webassembly.js:1
 b @ blazor.webassembly.js:1
@@ -244,8 +244,8 @@ $mono_jit_runtime_invoke @ dotnet.wasm:0x1dec32
 $do_runtime_invoke @ dotnet.wasm:0x95fca
 $mono_runtime_try_invoke @ dotnet.wasm:0x966fe
 $mono_runtime_invoke @ dotnet.wasm:0x98982
-$mono_wasm_invoke_method @ dotnet.wasm:0x227de2
-Module._mono_wasm_invoke_method @ dotnet..y6ggkhlo8e.js:9927
+$mono_wasm_invoke_jsexport @ dotnet.wasm:0x227de2
+Module.mono_wasm_invoke_jsexport @ dotnet..y6ggkhlo8e.js:9927
 managed__Microsoft_AspNetCore_Components_WebAssembly__Microsoft_AspNetCore_Components_WebAssembly_Services_DefaultWebAssemblyJSRuntime_BeginInvokeDotNet @ managed__Microsoft_AspNetCore_Components_WebAssembly__Microsoft_AspNetCore_Components_WebAssembly_Services_DefaultWebAssemblyJSRuntime_BeginInvokeDotNet:19
 beginInvokeDotNetFromJS @ blazor.webassembly.js:1
 b @ blazor.webassembly.js:1
diff --git a/docs/workflow/testing/host/testing.md b/docs/workflow/testing/host/testing.md
index 35c7359c411a..a217d1dd0ab9 100644
--- a/docs/workflow/testing/host/testing.md
+++ b/docs/workflow/testing/host/testing.md
@@ -13,15 +13,15 @@ To build the host tests, first build the product:
     * [CoreCLR](../../building/coreclr/README.md) build instructions
     * [Libraries](../../building/libraries/README.md) build instructions
 
-2.  Build the host and packs:
+2.  Build the host:
     ```
-    build.cmd/sh -subset host+packs.product -runtimeConfiguration Release -librariesConfiguration Release
+    build.cmd/sh -subset host -runtimeConfiguration Release -librariesConfiguration Release
     ```
     If using a configuration other than Release for CoreCLR/libraries, specify the desired configuration in the `-runtimeConfiguration`/`-librariesConfiguration` arguments.
 
 ### Building all tests
 
-The host tests are part of the `host` subset by default, so building the `host` subset also builds the host test. To build just the host tests:
+The host tests are part of the `host` subset by default, so building the `host` subset also builds the host tests. To build just the host tests:
 ```
 build.cmd/sh -subset host.tests -runtimeConfiguration Release -librariesConfiguration Release
 ```
@@ -36,16 +36,18 @@ dotnet build src\installer\tests\HostActivation.Tests
 ## Test context
 
 The host tests depend on:
-  1. Product binaries in a directory layout matching that of a .NET install
-  2. Restored [test projects](/src/installer/tests/Assets/TestProjects) which will be built and run by the tests
-  3. TestContextVariables.txt file with property and value pairs which will be read by the tests
+  1. Pre-built [test project](/src/installer/tests/Assets/Projects) output which will be copied and run by the tests. The `host.pretest` subset builds these projects.
+  2. Product binaries in a directory layout matching that of a .NET install. The `host.pretest` subset creates this layout.
+  3. TestContextVariables.txt files with property and value pairs which will be read by the tests. The `host.tests` subset creates these files as part of building the tests.
 
 When [running all tests](#running-all-tests), the build is configured such that these are created/performed before the start of the test run.
 
-In order to create (or update) these dependencies without running all tests, the build targets that create them - RefreshProjectTestAssets and SetupTestContextVariables - can be directly run for the desired test project. For example:
-```
-dotnet build src\installer\tests\HostActivation.Tests -t:RefreshProjectTestAssets;SetupTestContextVariables -p:RuntimeConfiguration=Release -p:LibrariesConfiguration=Release
-```
+In order to create (or update) these dependencies without running all tests:
+  1. Build the `host.pretest` subset. By default, this is included in the `host` subset. This corresponds to (1) and (2) above.
+  2. Build the desired test project. This corresponds to (3) above. Building the test itself will run the `SetupTestContextVariables` target, but it can also be run independently - for example:
+  ```
+  dotnet build src\installer\tests\HostActivation.Tests -t:SetupTestContextVariables -p:RuntimeConfiguration=Release -p:LibrariesConfiguration=Release
+  ```
 
 ## Running tests
 
@@ -78,6 +80,21 @@ The `category!=failing` is to respect the [filtering traits](../libraries/filter
 
 The [Microsoft.DotNet.CoreSetup.sln](/src/installer/Microsoft.DotNet.CoreSetup.sln) can be used to run and debug host tests through Visual Studio. When using the solution, the product should have already been [built](#building-tests) and the [test context](#test-context) set up.
 
+If you built the runtime or libraries with a different configuration from the host, you have to specify this when starting visual studio:
+
+```console
+build.cmd -vs Microsoft.DotNet.CoreSetup -rc Release -lc Release
+```
+
+## Investigating failures
+
+When [running all tests](#running-all-tests), reports with results will be generated under `<repo_root>\artifacts\TestResults`. When [running individual tests](#running-specific-tests), results will be output to the console by default and can be configured via [`dotnet test` options](https://learn.microsoft.com/dotnet/core/tools/dotnet-test#options).
+
+In order to test the hosting components, the tests launch a separate process (e.g. `dotnet`, apphost, native host) and validate the expected output (standard output and error) of the launched process. This usually involves copying or creating test artifacts in the form of an application to run or a .NET install to run against.
+
+On failure, tests will report the file, arguments, and environment for the launched process that failed validation. With [preserved test artifacts](#preserving-test-artifacts), this information can be used to directly debug the specific scenario that the test was running.
+
 ### Preserving test artifacts
 
-In order to test the hosting components, the tests launch a separate process (e.g. `dotnet`, apphost, native host) and validate the expected output (standard output and error) of the launched process. This usually involves copying or creating test artifacts in the form of an application to run or a .NET install to run against. The tests will delete these artifacts after the test finishes. To allow inspection or usage after the test finishes, set the environment variable `PRESERVE_TEST_RUNS=1` to avoid deleting the test artifacts.
+The tests will delete any generated test artifacts after the test finishes. To allow inspection or usage after the test finishes, set the environment variable `PRESERVE_TEST_RUNS=1` to avoid deleting the test artifacts.
+
diff --git a/docs/workflow/testing/host/using-apphost.md b/docs/workflow/testing/host/using-apphost.md
index 67c129ce338f..764dc6ad68e5 100644
--- a/docs/workflow/testing/host/using-apphost.md
+++ b/docs/workflow/testing/host/using-apphost.md
@@ -23,8 +23,8 @@ Building and publishing your project should now use the `apphost`/`singlefilehos
 
 Alternatives to this method include copying the desired apphost to the appropriate `<dotnet_root>/packs` and NuGet cache directories or building the NuGet packages locally and configuring the application to use them via a NuGet.config and the `KnownAppHostPack` item.
 
-## Pointing at a local .NET root
+# Pointing at a local .NET root
 
 For a [framework-dependent application](https://docs.microsoft.com/dotnet/core/deploying/#publish-framework-dependent), you can set the `DOTNET_ROOT` environment variable to point at a local .NET layout.
 
-The [libraries tests](../libraries/testing.md) construct and use such a layout based on your local runtime and libraries build as part of the `libs.pretest` subset. To use that layout, set `DOTNET_ROOT=<repo_root>/artifacts/bin/testhost/net8.0-<os>-<configuration>-<arch>`. Note that the host components (`hostfxr`, `hostpolicy`) in that layout are not from the local build.
+The [libraries tests](../libraries/testing.md) construct and use such a layout based on your local runtime, host, and libraries build as part of the `libs.pretest` subset. To use that layout, set `DOTNET_ROOT=<repo_root>/artifacts/bin/testhost/net8.0-<os>-<configuration>-<arch>` and then run the .NET application.
diff --git a/eng/Analyzers.targets b/eng/Analyzers.targets
index 7cb7a76abac7..856ceb89ae9c 100644
--- a/eng/Analyzers.targets
+++ b/eng/Analyzers.targets
@@ -8,7 +8,7 @@
   </PropertyGroup>
   <PropertyGroup>
     <!-- Disable analyzers in sourcebuild -->
-    <RunAnalyzers Condition="'$(DotNetBuildFromSource)' == 'true'">false</RunAnalyzers>
+    <RunAnalyzers Condition="'$(DotNetBuildSourceOnly)' == 'true'">false</RunAnalyzers>
     <EnableNETAnalyzers Condition="'$(EnableNETAnalyzers)' == ''">$(RunAnalyzers)</EnableNETAnalyzers>
   </PropertyGroup>
   <PropertyGroup Condition="'$(RunAnalyzers)' != 'false'">
diff --git a/eng/CodeAnalysis.src.globalconfig b/eng/CodeAnalysis.src.globalconfig
index 2677ac469e66..21a53462cc5d 100644
--- a/eng/CodeAnalysis.src.globalconfig
+++ b/eng/CodeAnalysis.src.globalconfig
@@ -274,6 +274,12 @@ dotnet_diagnostic.CA1512.severity = warning
 # CA1513: Use ObjectDisposedException throw helper
 dotnet_diagnostic.CA1513.severity = warning
 
+# CA1514: Avoid redundant length argument
+dotnet_diagnostic.CA1514.severity = warning
+
+# CA1515: Consider making public types internal
+dotnet_diagnostic.CA1515.severity = none
+
 # CA1700: Do not name enum values 'Reserved'
 dotnet_diagnostic.CA1700.severity = none
 
@@ -483,6 +489,15 @@ dotnet_diagnostic.CA1863.severity = suggestion
 # CA1864: Prefer the 'IDictionary.TryAdd(TKey, TValue)' method
 dotnet_diagnostic.CA1864.severity = warning
 
+# CA1865: Use char overload
+dotnet_diagnostic.CA1865.severity = warning
+
+# CA1866: Use char overload
+dotnet_diagnostic.CA1866.severity = warning
+
+# CA1867: Use char overload
+dotnet_diagnostic.CA1867.severity = warning
+
 # CA1868: Unnecessary call to 'Contains' for sets
 dotnet_diagnostic.CA1868.severity = warning
 
@@ -492,6 +507,12 @@ dotnet_diagnostic.CA1869.severity = warning
 # CA1870: Use a cached 'SearchValues' instance
 dotnet_diagnostic.CA1870.severity = warning
 
+# CA1871: Do not pass a nullable struct to 'ArgumentNullException.ThrowIfNull'
+dotnet_diagnostic.CA1871.severity = warning
+
+# CA1872: Prefer 'Convert.ToHexString' and 'Convert.ToHexStringLower' over call chains based on 'BitConverter.ToString'
+dotnet_diagnostic.CA1872.severity = warning
+
 # CA2000: Dispose objects before losing scope
 dotnet_diagnostic.CA2000.severity = none
 
@@ -540,6 +561,9 @@ dotnet_diagnostic.CA2020.severity = warning
 # CA2021: Do not call Enumerable.Cast<T> or Enumerable.OfType<T> with incompatible types
 dotnet_diagnostic.CA2021.severity = warning
 
+# CA2022: Avoid inexact read with 'Stream.Read'
+dotnet_diagnostic.CA2022.severity = warning
+
 # CA2100: Review SQL queries for security vulnerabilities
 dotnet_diagnostic.CA2100.severity = none
 
@@ -601,9 +625,6 @@ dotnet_diagnostic.CA2226.severity = none
 # CA2227: Collection properties should be read only
 dotnet_diagnostic.CA2227.severity = none
 
-# CA2229: Implement serialization constructors
-dotnet_diagnostic.CA2229.severity = warning
-
 # CA2231: Overload operator equals on overriding value type Equals
 dotnet_diagnostic.CA2231.severity = none
 
@@ -679,6 +700,18 @@ dotnet_diagnostic.CA2260.severity = warning
 # CA2261: Do not use ConfigureAwaitOptions.SuppressThrowing with Task<TResult>
 dotnet_diagnostic.CA2261.severity = warning
 
+# CA2262: Set 'MaxResponseHeadersLength' properly
+dotnet_diagnostic.CA2262.severity = warning
+
+# CA2263: Prefer generic overload when type is known
+dotnet_diagnostic.CA2263.severity = suggestion
+
+# CA2264: Do not pass a non-nullable value to 'ArgumentNullException.ThrowIfNull'
+dotnet_diagnostic.CA2264.severity = warning
+
+# CA2265: Do not compare Span<T> to 'null' or 'default'
+dotnet_diagnostic.CA2265.severity = warning
+
 # CA2300: Do not use insecure deserializer BinaryFormatter
 dotnet_diagnostic.CA2300.severity = none
 
@@ -1806,7 +1839,7 @@ dotnet_diagnostic.IDE0200.severity = warning
 # IDE0210: Use top-level statements
 dotnet_diagnostic.IDE0210.severity = none
 
-# IDE0211: Use program main
+# IDE0211: Convert to 'Program.Main' style program
 dotnet_diagnostic.IDE0211.severity = none
 
 # IDE0220: foreach cast
@@ -1824,6 +1857,9 @@ dotnet_diagnostic.IDE0241.severity = suggestion
 # IDE0250: Make struct readonly
 dotnet_diagnostic.IDE0250.severity = suggestion
 
+# IDE0251: Make member readonly
+dotnet_diagnostic.IDE0251.severity = suggestion
+
 # IDE0260: Use pattern matching
 dotnet_diagnostic.IDE0260.severity = suggestion
 
@@ -1833,6 +1869,27 @@ dotnet_diagnostic.IDE0270.severity = suggestion
 # IDE0280: Use 'nameof'
 dotnet_diagnostic.IDE0280.severity = warning
 
+# IDE0290: Use primary constructor
+dotnet_diagnostic.IDE0290.severity = suggestion
+
+# IDE0300: Use collection expression for array
+dotnet_diagnostic.IDE0300.severity = suggestion
+
+# IDE0301: Use collection expression for empty
+dotnet_diagnostic.IDE0301.severity = suggestion
+
+# IDE0302: Use collection expression for stackalloc
+dotnet_diagnostic.IDE0302.severity = suggestion
+
+# IDE0303: Use collection expression for Create()
+dotnet_diagnostic.IDE0303.severity = suggestion
+
+# IDE0304: Use collection expression for builder
+dotnet_diagnostic.IDE0304.severity = suggestion
+
+# IDE0305: Use collection expression for fluent
+dotnet_diagnostic.IDE0305.severity = suggestion
+
 # IDE1005: Delegate invocation can be simplified.
 dotnet_diagnostic.IDE1005.severity = warning
 
@@ -1853,3 +1910,9 @@ dotnet_diagnostic.IDE2003.severity = silent
 
 # IDE2004: Blank line not allowed after constructor initializer colon
 dotnet_diagnostic.IDE2004.severity = silent
+
+# IDE2005: Blank line not allowed after conditional expression token
+dotnet_diagnostic.IDE2005.severity = silent
+
+# IDE2006: Blank line not allowed after arrow expression clause token
+dotnet_diagnostic.IDE2006.severity = silent
diff --git a/eng/CodeAnalysis.test.globalconfig b/eng/CodeAnalysis.test.globalconfig
index 79e35931782f..0d944fbd890f 100644
--- a/eng/CodeAnalysis.test.globalconfig
+++ b/eng/CodeAnalysis.test.globalconfig
@@ -273,6 +273,12 @@ dotnet_diagnostic.CA1512.severity = none
 # CA1513: Use ObjectDisposedException throw helper
 dotnet_diagnostic.CA1513.severity = none
 
+# CA1514: Avoid redundant length argument
+dotnet_diagnostic.CA1514.severity = none
+
+# CA1515: Consider making public types internal
+dotnet_diagnostic.CA1515.severity = none
+
 # CA1700: Do not name enum values 'Reserved'
 dotnet_diagnostic.CA1700.severity = none
 
@@ -480,6 +486,15 @@ dotnet_diagnostic.CA1863.severity = none
 # CA1864: Prefer the 'IDictionary.TryAdd(TKey, TValue)' method
 dotnet_diagnostic.CA1864.severity = none
 
+# CA1865: Use char overload
+dotnet_diagnostic.CA1865.severity = none
+
+# CA1866: Use char overload
+dotnet_diagnostic.CA1866.severity = none
+
+# CA1867: Use char overload
+dotnet_diagnostic.CA1867.severity = none
+
 # CA1868: Unnecessary call to 'Contains' for sets
 dotnet_diagnostic.CA1868.severity = none
 
@@ -489,6 +504,12 @@ dotnet_diagnostic.CA1869.severity = none
 # CA1870: Use a cached 'SearchValues' instance
 dotnet_diagnostic.CA1870.severity = none
 
+# CA1871: Do not pass a nullable struct to 'ArgumentNullException.ThrowIfNull'
+dotnet_diagnostic.CA1871.severity = none
+
+# CA1872: Prefer 'Convert.ToHexString' and 'Convert.ToHexStringLower' over call chains based on 'BitConverter.ToString'
+dotnet_diagnostic.CA1872.severity = none
+
 # CA2000: Dispose objects before losing scope
 dotnet_diagnostic.CA2000.severity = none
 
@@ -537,6 +558,9 @@ dotnet_diagnostic.CA2020.severity = none
 # CA2021: Do not call Enumerable.Cast<T> or Enumerable.OfType<T> with incompatible types
 dotnet_diagnostic.CA2021.severity = none
 
+# CA2022: Avoid inexact read with 'Stream.Read'
+dotnet_diagnostic.CA2022.severity = none
+
 # CA2100: Review SQL queries for security vulnerabilities
 dotnet_diagnostic.CA2100.severity = none
 
@@ -675,6 +699,18 @@ dotnet_diagnostic.CA2260.severity = none
 # CA2261: Do not use ConfigureAwaitOptions.SuppressThrowing with Task<TResult>
 dotnet_diagnostic.CA2261.severity = none
 
+# CA2262: Set 'MaxResponseHeadersLength' properly
+dotnet_diagnostic.CA2262.severity = none
+
+# CA2263: Prefer generic overload when type is known
+dotnet_diagnostic.CA2263.severity = none
+
+# CA2264: Do not pass a non-nullable value to 'ArgumentNullException.ThrowIfNull'
+dotnet_diagnostic.CA2264.severity = none
+
+# CA2265: Do not compare Span<T> to 'null' or 'default'
+dotnet_diagnostic.CA2265.severity = none
+
 # CA2300: Do not use insecure deserializer BinaryFormatter
 dotnet_diagnostic.CA2300.severity = none
 
@@ -1800,7 +1836,7 @@ dotnet_diagnostic.IDE0200.severity = silent
 # IDE0210: Use top-level statements
 dotnet_diagnostic.IDE0210.severity = silent
 
-# IDE0211: Use program main
+# IDE0211: Convert to 'Program.Main' style program
 dotnet_diagnostic.IDE0211.severity = silent
 
 # IDE0220: foreach cast
@@ -1818,6 +1854,9 @@ dotnet_diagnostic.IDE0241.severity = silent
 # IDE0250: Make struct readonly
 dotnet_diagnostic.IDE0250.severity = silent
 
+# IDE0251: Make member readonly
+dotnet_diagnostic.IDE0251.severity = silent
+
 # IDE0260: Use pattern matching
 dotnet_diagnostic.IDE0260.severity = silent
 
@@ -1827,6 +1866,27 @@ dotnet_diagnostic.IDE0270.severity = silent
 # IDE0280: Use 'nameof'
 dotnet_diagnostic.IDE0280.severity = silent
 
+# IDE0290: Use primary constructor
+dotnet_diagnostic.IDE0290.severity = silent
+
+# IDE0300: Use collection expression for array
+dotnet_diagnostic.IDE0300.severity = silent
+
+# IDE0301: Use collection expression for empty
+dotnet_diagnostic.IDE0301.severity = silent
+
+# IDE0302: Use collection expression for stackalloc
+dotnet_diagnostic.IDE0302.severity = silent
+
+# IDE0303: Use collection expression for Create()
+dotnet_diagnostic.IDE0303.severity = silent
+
+# IDE0304: Use collection expression for builder
+dotnet_diagnostic.IDE0304.severity = silent
+
+# IDE0305: Use collection expression for fluent
+dotnet_diagnostic.IDE0305.severity = silent
+
 # IDE1005: Delegate invocation can be simplified.
 dotnet_diagnostic.IDE1005.severity = silent
 
@@ -1848,6 +1908,12 @@ dotnet_diagnostic.IDE2003.severity = silent
 # IDE2004: Blank line not allowed after constructor initializer colon
 dotnet_diagnostic.IDE2004.severity = silent
 
+# IDE2005: Blank line not allowed after conditional expression token
+dotnet_diagnostic.IDE2005.severity = silent
+
+# IDE2006: Blank line not allowed after arrow expression clause token
+dotnet_diagnostic.IDE2006.severity = silent
+
 # xUnit1000: Test classes must be public
 dotnet_diagnostic.xUnit1000.severity = warning
 
diff --git a/eng/DiaSymReaderNative.targets b/eng/DiaSymReaderNative.targets
index caa482f4b6e8..ac8dc7e36a06 100644
--- a/eng/DiaSymReaderNative.targets
+++ b/eng/DiaSymReaderNative.targets
@@ -18,7 +18,7 @@
     package can't be referenced directly but rather has to have it's assets manually copied 
     out. This logic is responsible for doing that.
   -->
-  <ItemGroup Condition="'$(DotNetBuildFromSource)' != 'true'">
+  <ItemGroup Condition="'$(DotNetBuildSourceOnly)' != 'true'">
     <Content Include="$(NuGetPackageRoot)\microsoft.diasymreader.native\$(MicrosoftDiaSymReaderNativeVersion)\runtimes\win\native\Microsoft.DiaSymReader.Native.x86.dll">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
       <Visible>false</Visible>
diff --git a/eng/DotNetBuild.props b/eng/DotNetBuild.props
index c830b112c32c..06ea2ee04665 100644
--- a/eng/DotNetBuild.props
+++ b/eng/DotNetBuild.props
@@ -32,17 +32,22 @@
     <ShortStack Condition="'$(TargetOS)' == 'tvossimulator'">true</ShortStack>
     <ShortStack Condition="'$(TargetOS)' == 'maccatalyst'">true</ShortStack>
     <ShortStack Condition="'$(TargetOS)' == 'android'">true</ShortStack>
+    <ShortStack Condition="'$(TargetOS)' == 'linux-bionic'">true</ShortStack>
   </PropertyGroup>
 
   <Target Name="GetRuntimeSourceBuildCommandConfiguration"
           BeforeTargets="GetSourceBuildCommandConfiguration">
     <PropertyGroup>
-      <!-- Properties that control the source-build configuration should be added to the repository and guarded with the DotNetBuildFromSource Condition.
-           This allows to build the repository using './build.sh <args> /p:DotNetBuildFromSource=true'.
-           Properties that control flags from source-build, and the expected output for source-build should be added to this file. -->
+      <!-- Properties that control source-only build configurations should be added to the repository and guarded with DotNetBuildSourceOnly conditions.
+           This allows to build the repository using './build.sh <args> /p:DotNetBuildSourceOnly=true'.
+           Properties that control flags from the VMR build, and the expected output for the VMR build should be added to this file. -->
+
+      <!-- Enable regular Arcade publishing in VMR build -->
+      <InnerBuildArgs Condition="'$(DotNetBuildOrchestrator)' == 'true'">$(InnerBuildArgs) $(FlagParameterPrefix)restore $(FlagParameterPrefix)build $(FlagParameterPrefix)publish</InnerBuildArgs>
+
       <InnerBuildArgs>$(InnerBuildArgs) $(FlagParameterPrefix)arch $(TargetArch)</InnerBuildArgs>
-      <InnerBuildArgs>$(InnerBuildArgs) $(FlagParameterPrefix)os $(TargetOS)</InnerBuildArgs>
-      <InnerBuildArgs Condition="'$(TargetArch)' != '$(_hostArch)' and '$(ShortStack)' != 'true'">$(InnerBuildArgs) $(FlagParameterPrefix)cross</InnerBuildArgs>
+      <InnerBuildArgs Condition="'$(DotNetBuildSourceOnly)' != 'true'">$(InnerBuildArgs) $(FlagParameterPrefix)os $(TargetOS)</InnerBuildArgs>
+      <InnerBuildArgs Condition="'$(CrossBuild)' == 'true' or ('$(TargetArch)' != '$(_hostArch)' and '$(ShortStack)' != 'true')">$(InnerBuildArgs) $(FlagParameterPrefix)cross</InnerBuildArgs>
       <InnerBuildArgs>$(InnerBuildArgs) $(FlagParameterPrefix)configuration $(Configuration)</InnerBuildArgs>
       <InnerBuildArgs Condition="'$(ShortStack)' != 'true'">$(InnerBuildArgs) $(FlagParameterPrefix)allconfigurations</InnerBuildArgs>
       <InnerBuildArgs>$(InnerBuildArgs) $(FlagParameterPrefix)verbosity $(LogVerbosity)</InnerBuildArgs>
@@ -57,14 +62,25 @@
       <!-- BaseOS is an expected known rid in the graph that TargetRid is compatible with.
            It's used to add TargetRid in the graph if the parent can't be detected. -->
       <InnerBuildArgs>$(InnerBuildArgs) /p:AdditionalRuntimeIdentifierParent=$(BaseOS)</InnerBuildArgs>
+      <!-- Pass through special build modes controlled by properties -->
+      <InnerBuildArgs Condition="'$(DotNetBuildRuntimeWasmEnableThreads)' == 'true'">$(InnerBuildArgs) /p:WasmEnableThreads=true</InnerBuildArgs>
+      <InnerBuildArgs Condition="'$(DotNetBuildRuntimeNativeAOTRuntimePack)' == 'true'">$(InnerBuildArgs) $(FlagParameterPrefix)s clr.nativeaotlibs+clr.nativeaotruntime+libs+packs /p:BuildNativeAOTRuntimePack=true /p:SkipLibrariesNativeRuntimePackages=true</InnerBuildArgs>
+      <InnerBuildArgs Condition="'$(PgoInstrument)' == 'true'">$(InnerBuildArgs) $(FlagParameterPrefix)pgoinstrument</InnerBuildArgs>
 
       <!-- This prop needs to be passed to the inner build manually as the BaseInnerSourceBuildCommand gets overriden above -->
-      <InnerBuildArgs Condition="'$(ArcadeBuildFromSource)' == 'true'">$(InnerBuildArgs) /p:ArcadeBuildFromSource=true</InnerBuildArgs>
-      <InnerBuildArgs Condition="'$(ArcadeBuildVertical)' == 'true'">$(InnerBuildArgs) /p:ArcadeBuildVertical=true</InnerBuildArgs>
+      <InnerBuildArgs Condition="'$(DotNetBuildRepo)' == 'true'">$(InnerBuildArgs) /p:DotNetBuildRepo=true</InnerBuildArgs>
+      <InnerBuildArgs Condition="'$(DotNetBuildOrchestrator)' == 'true'">$(InnerBuildArgs) /p:DotNetBuildOrchestrator=true</InnerBuildArgs>
       <InnerBuildArgs Condition="'$(OfficialBuildId)' != ''">$(InnerBuildArgs) /p:OfficialBuildId=$(OfficialBuildId)</InnerBuildArgs>
       <InnerBuildArgs Condition="'$(ContinuousIntegrationBuild)' != ''">$(InnerBuildArgs) /p:ContinuousIntegrationBuild=$(ContinuousIntegrationBuild)</InnerBuildArgs>
       <InnerBuildArgs Condition="'$(PortableBuild)' != ''">$(InnerBuildArgs) /p:PortableBuild=$(PortableBuild)</InnerBuildArgs>
       <InnerBuildArgs Condition="'$(RestoreConfigFile)' != ''">$(InnerBuildArgs) /p:RestoreConfigFile=$(RestoreConfigFile)</InnerBuildArgs>
+
+      <!-- Pass locations for assets, packages and symbols -->
+      <InnerBuildArgs Condition="'$(SourceBuiltAssetsDir)' != ''">$(InnerBuildArgs) /p:SourceBuiltAssetsDir=$(SourceBuiltAssetsDir)</InnerBuildArgs>
+      <InnerBuildArgs Condition="'$(SourceBuiltShippingPackagesDir)' != ''">$(InnerBuildArgs) /p:SourceBuiltShippingPackagesDir=$(SourceBuiltShippingPackagesDir)</InnerBuildArgs>
+      <InnerBuildArgs Condition="'$(SourceBuiltNonShippingPackagesDir)' != ''">$(InnerBuildArgs) /p:SourceBuiltNonShippingPackagesDir=$(SourceBuiltNonShippingPackagesDir)</InnerBuildArgs>
+      <InnerBuildArgs Condition="'$(SourceBuiltAssetManifestsDir)' != ''">$(InnerBuildArgs) /p:SourceBuiltAssetManifestsDir=$(SourceBuiltAssetManifestsDir)</InnerBuildArgs>
+      <InnerBuildArgs Condition="'$(SourceBuiltSymbolsDir)' != ''">$(InnerBuildArgs) /p:SourceBuiltSymbolsDir=$(SourceBuiltSymbolsDir)</InnerBuildArgs>
     </PropertyGroup>
   </Target>
 
diff --git a/eng/Publishing.props b/eng/Publishing.props
index 920e79cbbd2f..1507fc850339 100644
--- a/eng/Publishing.props
+++ b/eng/Publishing.props
@@ -1,6 +1,39 @@
 <Project>
-  <!-- TODO: Consolidate the Publishing.props files into here. -->
+
   <PropertyGroup>
-    <PublishingVersion>3</PublishingVersion>
+    <ProducesDotNetReleaseShippingAssets>true</ProducesDotNetReleaseShippingAssets>
   </PropertyGroup>
-</Project>
\ No newline at end of file
+
+  <!-- Include installer archives and packages which aren't globbed by default.
+       Don't include Symbols archive as it is already included in Arcade's Publish.proj, with correct blob path. -->
+  <Target Name="PublishRuntimeInstallers"
+          BeforeTargets="BeforePublish"
+          Condition="'$(DotNetBuildRepo)' == 'true'">
+    <!-- Retrieve runtime's runtime pack product version.
+         Don't stabilize the package version in order to retrieve the VersionSuffix. -->
+    <MSBuild Projects="$(RepoRoot)src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.sfxproj"
+             Targets="ReturnProductVersion"
+             Properties="IsShipping=false;
+                         Crossgen2SdkOverridePropsPath=;
+                         Crossgen2SdkOverrideTargetsPath=">
+      <Output TaskParameter="TargetOutputs" PropertyName="RuntimeRuntimePackProductVersion" />
+    </MSBuild>
+
+    <ItemGroup>
+      <InstallerToPublish Include="$(ArtifactsPackagesDir)**\*.tar.gz;
+                                   $(ArtifactsPackagesDir)**\*.zip;
+                                   $(ArtifactsPackagesDir)**\*.deb;
+                                   $(ArtifactsPackagesDir)**\*.rpm;
+                                   $(ArtifactsPackagesDir)**\*.pkg;
+                                   $(ArtifactsPackagesDir)**\*.exe;
+                                   $(ArtifactsPackagesDir)**\*.msi"
+                          Exclude="$(ArtifactsPackagesDir)**\Symbols.runtime.tar.gz" />
+
+      <ItemsToPushToBlobFeed Include="@(InstallerToPublish)"
+                             IsShipping="$([System.String]::Copy('%(RecursiveDir)').StartsWith('Shipping'))"
+                             PublishFlatContainer="true"
+                             RelativeBlobPath="Runtime/$(RuntimeRuntimePackProductVersion)/%(Filename)%(Extension)" />
+    </ItemGroup>
+  </Target>
+
+</Project>
diff --git a/eng/Subsets.props b/eng/Subsets.props
index 4a0277852d17..8026bf97e046 100644
--- a/eng/Subsets.props
+++ b/eng/Subsets.props
@@ -40,7 +40,12 @@
     <DefaultSubsets Condition="'$(TargetsMobile)' == 'true'">mono+libs+packs</DefaultSubsets>
     <DefaultSubsets Condition="'$(TargetsLinuxBionic)' == 'true'">mono+libs+host+packs</DefaultSubsets>
     <!-- In source build, mono is only supported as primary runtime flavor. On Windows mono is supported for x86/x64 only. -->
-    <DefaultSubsets Condition="('$(DotNetBuildFromSource)' == 'true' and '$(PrimaryRuntimeFlavor)' != 'Mono') or ('$(TargetOS)' == 'windows' and '$(TargetArchitecture)' != 'x86' and '$(TargetArchitecture)' != 'x64')">clr+libs+tools+host+packs</DefaultSubsets>
+    <DefaultSubsets Condition="('$(DotNetBuildSourceOnly)' == 'true' and '$(PrimaryRuntimeFlavor)' != 'Mono') or ('$(TargetOS)' == 'windows' and '$(TargetArchitecture)' != 'x86' and '$(TargetArchitecture)' != 'x64')">clr+libs+tools+host+packs</DefaultSubsets>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <!-- If we're building in the VMR, then we want to build all of the assets even in a PGO-instrumented build as downstream repos will not be able to pull assets from a matching non-PGO-instrumented build. -->
+    <BuildOnlyPgoInstrumentedAssets Condition="'$(PgoInstrument)' == 'true' and '$(DotNetBuild)' != 'true'">true</BuildOnlyPgoInstrumentedAssets>
   </PropertyGroup>
 
   <!-- Init _subset here to allow RuntimeFlavor to be set as early as possible -->
@@ -81,19 +86,19 @@
                                         '$(BuildTargetFramework)' == '' or
                                         '$(BuildAllConfigurations)' == 'true'">libs.native+</DefaultLibrariesSubsets>
     <DefaultLibrariesSubsets>$(DefaultLibrariesSubsets)libs.sfx+libs.oob+libs.pretest</DefaultLibrariesSubsets>
-    <!-- Respect the DotNetBuildTests product flag when building the product. --> 
+    <!-- Respect the DotNetBuildTests product flag when building the product. -->
     <DefaultLibrariesSubsets Condition="'$(DotNetBuildTests)' == 'true'">$(DefaultLibrariesSubsets)+libs.tests</DefaultLibrariesSubsets>
 
     <DefaultToolsSubsets>tools.illink</DefaultToolsSubsets>
 
     <DefaultHostSubsets>host.native+host.tools+host.pkg</DefaultHostSubsets>
-    <DefaultHostSubsets Condition="'$(DotNetBuildFromSource)' != 'true'">$(DefaultHostSubsets)+host.pretest+host.tests</DefaultHostSubsets>
+    <DefaultHostSubsets Condition="'$(DotNetBuildSourceOnly)' != 'true'">$(DefaultHostSubsets)+host.pretest+host.tests</DefaultHostSubsets>
     <DefaultHostSubsets Condition="'$(RuntimeFlavor)' != '$(PrimaryRuntimeFlavor)' or '$(TargetsMobile)' == 'true'"></DefaultHostSubsets>
     <DefaultHostSubsets Condition="'$(RuntimeFlavor)' != '$(PrimaryRuntimeFlavor)' and '$(TargetsMobile)' != 'true'">host.native</DefaultHostSubsets>
 
     <DefaultPacksSubsets>packs.product</DefaultPacksSubsets>
-    <DefaultPacksSubsets Condition="'$(BuildMonoAOTCrossCompilerOnly)' != 'true' and '$(DotNetBuildFromSource)' != 'true'">$(DefaultPacksSubsets)+packs.tests</DefaultPacksSubsets>
-    <DefaultPacksSubsets Condition="'$(DotNetBuildFromSource)' == 'true'">$(DefaultPacksSubsets)+packs.installers</DefaultPacksSubsets>
+    <DefaultPacksSubsets Condition="'$(BuildMonoAOTCrossCompilerOnly)' != 'true' and '$(DotNetBuildSourceOnly)' != 'true'">$(DefaultPacksSubsets)+packs.tests</DefaultPacksSubsets>
+    <DefaultPacksSubsets Condition="'$(DotNetBuildSourceOnly)' == 'true'">$(DefaultPacksSubsets)+packs.installers</DefaultPacksSubsets>
     <DefaultPacksSubsets Condition="'$(RuntimeFlavor)' != 'Mono' and '$(ForceBuildMobileManifests)' == 'true'">$(DefaultPacksSubsets)+mono.manifests</DefaultPacksSubsets>
   </PropertyGroup>
 
@@ -116,10 +121,12 @@
 
   <PropertyGroup>
     <!-- CLR NativeAot only builds in a subset of the matrix -->
-    <NativeAotSupported Condition="('$(TargetOS)' == 'windows' or '$(TargetOS)' == 'linux' or '$(TargetOS)' == 'osx' or '$(TargetOS)' == 'maccatalyst' or '$(TargetOS)' == 'iossimulator' or '$(TargetOS)' == 'ios' or '$(TargetOS)' == 'tvossimulator' or '$(TargetOS)' == 'tvos' or '$(TargetOS)' == 'freebsd') and ('$(TargetArchitecture)' == 'x64' or '$(TargetArchitecture)' == 'arm64' or '$(TargetArchitecture)' == 'arm')">true</NativeAotSupported>
+    <_NativeAotSupportedOS Condition="'$(TargetOS)' == 'windows' or '$(TargetOS)' == 'linux' or '$(TargetOS)' == 'osx' or '$(TargetOS)' == 'maccatalyst' or '$(TargetOS)' == 'iossimulator' or '$(TargetOS)' == 'ios' or '$(TargetOS)' == 'tvossimulator' or '$(TargetOS)' == 'tvos' or '$(TargetOS)' == 'freebsd'">true</_NativeAotSupportedOS>
+    <_NativeAotSupportedArch Condition="'$(TargetArchitecture)' == 'x64' or '$(TargetArchitecture)' == 'arm64' or '$(TargetArchitecture)' == 'arm' or ('$(TargetOS)' == 'windows' and '$(TargetArchitecture)' == 'x86')">true</_NativeAotSupportedArch>
+    <NativeAotSupported Condition="'$(_NativeAotSupportedOS)' == 'true' and $(_NativeAotSupportedArch) == 'true'">true</NativeAotSupported>
 
     <!-- If we're building clr.nativeaotlibs and not building the CLR runtime, compile libraries against NativeAOT CoreLib -->
-    <UseNativeAotCoreLib Condition="'$(TestNativeAot)' == 'true' or ($(_subset.Contains('+clr.nativeaotlibs+')) and !$(_subset.Contains('+clr.native+')) and !$(_subset.Contains('+clr.runtime+')))">true</UseNativeAotCoreLib>
+    <UseNativeAotCoreLib Condition="'$(TestNativeAot)' == 'true' or ($(_subset.Contains('+clr.nativeaotlibs+')) and !$(_subset.Contains('+clr.native+')) and !$(_subset.Contains('+clr.runtime+')) and !$(_subset.Contains('+clr.corelib+')))">true</UseNativeAotCoreLib>
   </PropertyGroup>
 
   <ItemGroup>
@@ -261,7 +268,7 @@
   </PropertyGroup>
 
   <ItemGroup Condition="'$(ClrRuntimeBuildSubsets)' != '' or $(_subset.Contains('+clr.nativeprereqs+'))">
-    <ProjectToBuild Include="$(CoreClrProjectRoot)runtime-prereqs.proj" Category="clr" />
+    <ProjectToBuild Include="$(CoreClrProjectRoot)runtime-prereqs.proj" Category="clr" GlobalPropertiesToRemove="$(NativeBuildPartitionPropertiesToRemove)" />
   </ItemGroup>
 
   <ItemGroup Condition="'$(ClrRuntimeBuildSubsets)' != ''">
@@ -352,7 +359,7 @@
                              $(CoreClrProjectRoot)tools\aot\ILCompiler\repro\repro.csproj;
                              $(CoreClrProjectRoot)tools\r2rtest\R2RTest.csproj;
                              $(CoreClrProjectRoot)tools\PdbChecker\PdbChecker.csproj;
-                             $(CoreClrProjectRoot)tools\AssemblyChecker\AssemblyChecker.csproj" Category="clr" Condition="'$(DotNetBuildFromSource)' != 'true'"/>
+                             $(CoreClrProjectRoot)tools\AssemblyChecker\AssemblyChecker.csproj" Category="clr" Condition="'$(DotNetBuildSourceOnly)' != 'true'"/>
     <ProjectToBuild Include="$(CoreClrProjectRoot)tools\aot\crossgen2\crossgen2.csproj" Category="clr" />
     <ProjectToBuild Include="$(CoreClrProjectRoot)tools\aot\ILCompiler.Build.Tasks\ILCompiler.Build.Tasks.csproj" Category="clr" Condition="'$(NativeAotSupported)' == 'true'" />
     <ProjectToBuild Include="$(CoreClrProjectRoot)tools\aot\ILCompiler\ILCompiler.csproj" Category="clr" Condition="'$(NativeAotSupported)' == 'true'" />
@@ -366,11 +373,11 @@
 
   <ItemGroup Condition="$(_subset.Contains('+clr.toolstests+'))">
     <ProjectToBuild Include="$(CoreClrProjectRoot)tools\aot\ILCompiler.TypeSystem.Tests\ILCompiler.TypeSystem.Tests.csproj"
-      Test="true" Category="clr" Condition="'$(DotNetBuildFromSource)' != 'true'"/>
+      Test="true" Category="clr" Condition="'$(DotNetBuildSourceOnly)' != 'true'"/>
     <ProjectToBuild Include="$(CoreClrProjectRoot)tools\aot\ILCompiler.Compiler.Tests\ILCompiler.Compiler.Tests.csproj"
-      Test="true" Category="clr" Condition="'$(DotNetBuildFromSource)' != 'true' and '$(NativeAotSupported)' == 'true'"/>
+      Test="true" Category="clr" Condition="'$(DotNetBuildSourceOnly)' != 'true' and '$(NativeAotSupported)' == 'true'"/>
     <ProjectToBuild Include="$(CoreClrProjectRoot)tools\aot\ILCompiler.Trimming.Tests\ILCompiler.Trimming.Tests.csproj"
-      Test="true" Category="clr" Condition="'$(DotNetBuildFromSource)' != 'true' and '$(NativeAotSupported)' == 'true'"/>
+      Test="true" Category="clr" Condition="'$(DotNetBuildSourceOnly)' != 'true' and '$(NativeAotSupported)' == 'true'"/>
   </ItemGroup>
 
   <ItemGroup Condition="$(_subset.Contains('+tools.illink+'))">
@@ -402,9 +409,9 @@
     <ProjectToBuild Include="$(CoreClrProjectRoot)crossgen-corelib.proj" Category="clr" />
   </ItemGroup>
 
-  <ItemGroup Condition="$(_subset.Contains('+clr.packages+')) and '$(PgoInstrument)' != 'true'">
+  <ItemGroup Condition="$(_subset.Contains('+clr.packages+')) and '$(BuildOnlyPgoInstrumentedAssets)' != 'true'">
     <ProjectToBuild Include="$(CoreClrProjectRoot).nuget\coreclr-packages.proj" Pack="true" Category="clr" />
-    <ProjectToBuild Include="$(CoreClrProjectRoot)tools\dotnet-pgo\dotnet-pgo-pack.proj" Pack="true" Category="clr" Condition="'$(DotNetBuildFromSource)' != 'true' and '$(RuntimeFlavor)' != 'Mono'"/>
+    <ProjectToBuild Include="$(CoreClrProjectRoot)tools\dotnet-pgo\dotnet-pgo-pack.proj" Pack="true" Category="clr" Condition="'$(DotNetBuildSourceOnly)' != 'true' and '$(RuntimeFlavor)' != 'Mono'"/>
   </ItemGroup>
 
   <ItemGroup Condition="$(_subset.Contains('+clr.nativeaotlibs+')) and ('$(NativeAotSupported)' == 'true' or ('$(TargetArchitecture)' == 'wasm'))">
@@ -464,7 +471,7 @@
     <ProjectToBuild Include="@(ManagedProjectToBuild)" BuildInParallel="true" Pack="true" Category="host" />
   </ItemGroup>
 
-  <ItemGroup Condition="$(_subset.Contains('+host.pkg+')) and '$(PgoInstrument)' != 'true'">
+  <ItemGroup Condition="$(_subset.Contains('+host.pkg+')) and '$(BuildOnlyPgoInstrumentedAssets)' != 'true'">
     <PkgprojProjectToBuild Include="$(InstallerProjectRoot)pkg\projects\host-packages.proj" SignPhase="MsiFiles" />
     <ProjectToBuild Include="@(PkgprojProjectToBuild)" Pack="true" Category="host" />
   </ItemGroup>
@@ -509,7 +516,7 @@
 
   <!-- Host.pretest subset (consumes live built libraries assets so needs to come after libraries) -->
   <ItemGroup Condition="$(_subset.Contains('+host.pretest+'))">
-    <ProjectToBuild Include="$(InstallerProjectRoot)tests\Assets\Projects\**\*.csproj" Category="host" />
+    <ProjectToBuild Include="$(InstallerProjectRoot)tests\pretest.proj" Category="host" />
   </ItemGroup>
 
   <!-- Host.tests subset (consumes live built libraries assets so needs to come after libraries) -->
@@ -524,10 +531,10 @@
 
   <Choose>
     <When Condition="$(_subset.Contains('+packs.product+'))">
-      <ItemGroup Condition="'$(PgoInstrument)' != 'true'">
+      <ItemGroup Condition="'$(BuildOnlyPgoInstrumentedAssets)' != 'true'">
         <SharedFrameworkProjectToBuild Condition="'$(BuildMonoAOTCrossCompilerOnly)' != 'true'" Include="$(InstallerProjectRoot)pkg\sfx\Microsoft.NETCore.App\Microsoft.NETCore.App.Ref.sfxproj" />
       </ItemGroup>
-      <ItemGroup Condition="'$(BuildNativeAOTRuntimePack)' != 'true' and '$(PgoInstrument)' != 'true'">
+      <ItemGroup Condition="'$(BuildNativeAOTRuntimePack)' != 'true' and '$(BuildOnlyPgoInstrumentedAssets)' != 'true'">
         <SharedFrameworkProjectToBuild Condition="'$(RuntimeFlavor)' == '$(PrimaryRuntimeFlavor)' and '$(TargetsMobile)' != 'true'" Include="$(InstallerProjectRoot)pkg\sfx\Microsoft.NETCore.App\Microsoft.NETCore.App.Host.sfxproj" />
         <SharedFrameworkProjectToBuild Condition="'$(RuntimeFlavor)' != 'Mono'" Include="$(InstallerProjectRoot)pkg\sfx\Microsoft.NETCore.App\Microsoft.NETCore.App.Crossgen2.sfxproj" />
         <SharedFrameworkProjectToBuild Condition="'$(RuntimeFlavor)' == '$(PrimaryRuntimeFlavor)' and '$(TargetsMobile)' != 'true'" Include="$(InstallerProjectRoot)pkg\sfx\installers\dotnet-host.proj" />
@@ -537,7 +544,7 @@
         <SharedFrameworkProjectToBuild Condition="'$(MonoCrossAOTTargetOS)' != ''" Include="$(InstallerProjectRoot)pkg\sfx\Microsoft.NETCore.App\monocrossaot.sfxproj" Pack="true" />
       </ItemGroup>
       <ItemGroup>
-        <ProjectToBuild Condition="'$(NativeAotSupported)' == 'true' and '$(RuntimeFlavor)' != 'Mono' and '$(PgoInstrument)' != 'true'" Include="$(InstallerProjectRoot)\pkg\projects\nativeaot-packages.proj" Category="packs" />
+        <ProjectToBuild Condition="'$(NativeAotSupported)' == 'true' and '$(RuntimeFlavor)' != 'Mono' and '$(BuildOnlyPgoInstrumentedAssets)' != 'true'" Include="$(InstallerProjectRoot)\pkg\projects\nativeaot-packages.proj" Category="packs" />
       </ItemGroup>
       <ItemGroup>
         <SharedFrameworkProjectToBuild Condition="'$(BuildMonoAOTCrossCompilerOnly)' != 'true'" Include="$(InstallerProjectRoot)pkg\sfx\Microsoft.NETCore.App\Microsoft.NETCore.App.Runtime.sfxproj" />
@@ -547,12 +554,12 @@
     </When>
   </Choose>
 
-  <ItemGroup Condition="$(_subset.Contains('+packs.installers+')) AND '$(PgoInstrument)' != 'true'">
+  <ItemGroup Condition="$(_subset.Contains('+packs.installers+')) AND '$(BuildOnlyPgoInstrumentedAssets)' != 'true'">
     <InstallerProjectToBuild Include="$(InstallerProjectRoot)pkg\sfx\installers.proj" />
     <ProjectToBuild Include="@(InstallerProjectToBuild)" Category="packs" />
   </ItemGroup>
 
-  <ItemGroup Condition="$(_subset.Contains('+packs.tests+')) AND '$(PgoInstrument)' != 'true'">
+  <ItemGroup Condition="$(_subset.Contains('+packs.tests+')) AND '$(BuildOnlyPgoInstrumentedAssets)' != 'true'">
     <TestProjectToBuild Include="$(InstallerProjectRoot)tests\Microsoft.DotNet.CoreSetup.Packaging.Tests\Microsoft.DotNet.CoreSetup.Packaging.Tests.csproj" />
     <ProjectToBuild Include="@(TestProjectToBuild)" BuildInParallel="true" Test="true" Category="packs" />
   </ItemGroup>
diff --git a/eng/Tools.props b/eng/Tools.props
index 01cae1f2b230..3baa40f4f32e 100644
--- a/eng/Tools.props
+++ b/eng/Tools.props
@@ -11,7 +11,7 @@
   </ItemGroup>
 
   <!-- excluded from source build -->
-  <ItemGroup Condition="'$(DotNetBuildFromSource)' != 'true'">
+  <ItemGroup Condition="'$(DotNetBuildSourceOnly)' != 'true'">
     <PackageReference Include="Microsoft.DotNet.VersionTools.Tasks" Version="$(MicrosoftDotNetVersionToolsTasksVersion)" />
   </ItemGroup>
 
diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml
index 98c699e1821b..b5717f05a5f5 100644
--- a/eng/Version.Details.xml
+++ b/eng/Version.Details.xml
@@ -1,84 +1,52 @@
 <Dependencies>
   <ProductDependencies>
-    <Dependency Name="Microsoft.NETCore.Runtime.ICU.Transport" Version="9.0.0-preview.2.24072.3">
+    <Dependency Name="Microsoft.NETCore.Runtime.ICU.Transport" Version="9.0.0-preview.4.24201.1">
       <Uri>https://github.com/dotnet/icu</Uri>
-      <Sha>694cd153a9083da273595fabb73818d4e8a49f40</Sha>
+      <Sha>1441a3fcbfa87c94b98a27605b06db7dd862f3e4</Sha>
     </Dependency>
-    <Dependency Name="System.Net.MsQuic.Transport" Version="9.0.0-alpha.1.24067.1">
+    <Dependency Name="System.Net.MsQuic.Transport" Version="9.0.0-alpha.1.24167.3">
       <Uri>https://github.com/dotnet/msquic</Uri>
-      <Sha>3fb2583170384341dbbc444cd5bb3d2319433fb6</Sha>
+      <Sha>6281631a8328ffdbb1b63b231af1aaa803915b23</Sha>
     </Dependency>
     <Dependency Name="System.ServiceModel.Primitives" Version="4.9.0-rc2.21473.1">
       <Uri>https://github.com/dotnet/wcf</Uri>
       <Sha>7f504aabb1988e9a093c1e74d8040bd52feb2f01</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.NET.Runtime.Emscripten.3.1.34.Python.win-x64" Version="9.0.0-preview.2.24108.4">
+    <Dependency Name="Microsoft.NET.Runtime.Emscripten.3.1.34.Python.win-x64" Version="9.0.0-preview.4.24204.9">
       <Uri>https://github.com/dotnet/emsdk</Uri>
-      <Sha>d3abc57b72e22d012e3601feea54b8e3dd64ff21</Sha>
+      <Sha>9ad7c262f14dc5e40a64030ade7788b36e74adf0</Sha>
     </Dependency>
-    <Dependency Name="runtime.linux-arm64.Microsoft.NETCore.Runtime.ObjWriter" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.linux-arm64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.linux-x64.Microsoft.NETCore.Runtime.ObjWriter" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.linux-x64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.linux-musl-arm64.Microsoft.NETCore.Runtime.ObjWriter" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.linux-musl-arm64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.linux-musl-x64.Microsoft.NETCore.Runtime.ObjWriter" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.linux-musl-x64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.win-arm64.Microsoft.NETCore.Runtime.ObjWriter" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.win-arm64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.win-x64.Microsoft.NETCore.Runtime.ObjWriter" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.win-x64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.osx-arm64.Microsoft.NETCore.Runtime.ObjWriter" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.osx-arm64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.osx-x64.Microsoft.NETCore.Runtime.ObjWriter" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.osx-x64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
-    </Dependency>
-    <Dependency Name="runtime.linux-arm64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
-      <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
-    </Dependency>
-    <Dependency Name="runtime.linux-x64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
-      <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
-    </Dependency>
-    <Dependency Name="runtime.linux-musl-arm64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
-      <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
-    </Dependency>
-    <Dependency Name="runtime.linux-musl-x64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
-      <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
-    </Dependency>
-    <Dependency Name="runtime.win-arm64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
-      <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
-    </Dependency>
-    <Dependency Name="runtime.win-x64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
-      <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
-    </Dependency>
-    <Dependency Name="runtime.osx-arm64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
-      <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
-    </Dependency>
-    <Dependency Name="runtime.osx-x64.Microsoft.NETCore.Runtime.JIT.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
-      <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
     <Dependency Name="System.CommandLine" Version="2.0.0-beta4.23407.1">
       <Uri>https://github.com/dotnet/command-line-api</Uri>
@@ -90,352 +58,351 @@
       <Sha>a045dd54a4c44723c215d992288160eb1401bb7f</Sha>
       <SourceBuild RepoName="command-line-api" ManagedOnly="true" />
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Cecil" Version="0.11.4-alpha.24065.1">
+    <Dependency Name="Microsoft.DotNet.Cecil" Version="0.11.4-alpha.24168.1">
       <Uri>https://github.com/dotnet/cecil</Uri>
-      <Sha>b8c2293cd1cbd9d0fe6f32d7b5befbd526b5a175</Sha>
+      <Sha>9c8ea966df62f764523b51772763e74e71040a92</Sha>
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.cecil" Version="0.11.4-alpha.24065.1">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.cecil" Version="0.11.4-alpha.24168.1">
       <Uri>https://github.com/dotnet/cecil</Uri>
-      <Sha>b8c2293cd1cbd9d0fe6f32d7b5befbd526b5a175</Sha>
+      <Sha>9c8ea966df62f764523b51772763e74e71040a92</Sha>
       <SourceBuild RepoName="cecil" ManagedOnly="true" />
     </Dependency>
-    <Dependency Name="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport" Version="9.0.0-preview.2.24108.4">
+    <Dependency Name="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport" Version="9.0.0-preview.4.24204.9">
       <Uri>https://github.com/dotnet/emsdk</Uri>
-      <Sha>d3abc57b72e22d012e3601feea54b8e3dd64ff21</Sha>
+      <Sha>9ad7c262f14dc5e40a64030ade7788b36e74adf0</Sha>
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.emsdk" Version="9.0.0-preview.2.24108.4">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.emsdk" Version="9.0.0-preview.4.24204.9">
       <Uri>https://github.com/dotnet/emsdk</Uri>
-      <Sha>d3abc57b72e22d012e3601feea54b8e3dd64ff21</Sha>
+      <Sha>9ad7c262f14dc5e40a64030ade7788b36e74adf0</Sha>
       <SourceBuild RepoName="emsdk" ManagedOnly="true" />
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.source-build-reference-packages" Version="9.0.0-alpha.1.24108.1">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.source-build-reference-packages" Version="9.0.0-alpha.1.24162.2">
       <Uri>https://github.com/dotnet/source-build-reference-packages</Uri>
-      <Sha>6b94d1513777c3aa0426f648649ce06d0d705bb2</Sha>
+      <Sha>c0b5d69a1a1513528c77fffff708c7502d57c35c</Sha>
       <SourceBuild RepoName="source-build-reference-packages" ManagedOnly="true" />
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.source-build-externals" Version="9.0.0-alpha.1.24106.1">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.source-build-externals" Version="9.0.0-alpha.1.24203.1">
       <Uri>https://github.com/dotnet/source-build-externals</Uri>
-      <Sha>f1ef074dfcf79d2f2da6e6ff9df8696a32aa063c</Sha>
+      <Sha>1e2e91d2544726b2cf68109f946178ef6bef3ad9</Sha>
       <SourceBuild RepoName="source-build-externals" ManagedOnly="true" />
     </Dependency>
   </ProductDependencies>
   <ToolsetDependencies>
-    <Dependency Name="Microsoft.DotNet.Arcade.Sdk" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.Arcade.Sdk" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.arcade" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.arcade" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
       <SourceBuild RepoName="arcade" ManagedOnly="true" />
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.XliffTasks" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.XliffTasks" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Helix.Sdk" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.Helix.Sdk" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.GenAPI" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.GenAPI" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.GenFacades" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.GenFacades" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.XUnitAssert" Version="2.6.7-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.XUnitAssert" Version="2.6.7-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.XUnitExtensions" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.XUnitExtensions" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.XUnitConsoleRunner" Version="2.6.7-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.XUnitConsoleRunner" Version="2.6.7-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.Archives" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.Archives" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.Packaging" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.Packaging" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.Installers" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.Installers" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.Templating" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.Templating" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.Workloads" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.Workloads" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.CodeAnalysis" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.CodeAnalysis" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.TargetFramework" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.TargetFramework" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.RemoteExecutor" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.RemoteExecutor" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.Feed" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.Feed" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.VersionTools.Tasks" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.VersionTools.Tasks" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.SharedFramework.Sdk" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.SharedFramework.Sdk" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="System.ComponentModel.TypeConverter.TestData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.ComponentModel.TypeConverter.TestData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="System.Data.Common.TestData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.Data.Common.TestData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="System.Drawing.Common.TestData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.Drawing.Common.TestData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="System.Formats.Tar.TestData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.Formats.Tar.TestData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="System.IO.Compression.TestData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.IO.Compression.TestData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="System.IO.Packaging.TestData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.IO.Packaging.TestData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="System.Net.TestData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.Net.TestData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="System.Private.Runtime.UnicodeData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.Private.Runtime.UnicodeData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="System.Runtime.TimeZoneData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.Runtime.TimeZoneData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="System.Security.Cryptography.X509Certificates.TestData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.Security.Cryptography.X509Certificates.TestData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="System.Text.RegularExpressions.TestData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.Text.RegularExpressions.TestData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="System.Windows.Extensions.TestData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.Windows.Extensions.TestData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.CilStrip.Sources" Version="9.0.0-beta.24072.1">
+    <Dependency Name="Microsoft.DotNet.CilStrip.Sources" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="runtime.linux-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.linux-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.linux-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.linux-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.linux-musl-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.linux-musl-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.linux-musl-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.linux-musl-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.linux-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.linux-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.linux-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.linux-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.linux-musl-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.linux-musl-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.linux-musl-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.linux-musl-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.win-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.win-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.win-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.win-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.osx-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.osx-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.osx-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.osx-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.osx-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.osx-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Sdk" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="runtime.osx-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24105.1" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
+    <Dependency Name="runtime.osx-x64.Microsoft.NETCore.Runtime.Mono.LLVM.Tools" Version="16.0.5-alpha.1.24203.4" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-9.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
-      <Sha>cb7d881de3674394a5f98d167bfb58f9aff9768b</Sha>
+      <Sha>8b4f10702e13ea221a33e91c2ef46c4b7910b56c</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.NETCore.App.Runtime.win-x64" Version="9.0.0-alpha.1.24072.1">
+    <Dependency Name="Microsoft.NETCore.App.Runtime.win-x64" Version="9.0.0-preview.4.24201.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>205ef031e0fe5152dede0bd9f99d0f6f9e7f1e45</Sha>
+      <Sha>ec4437be46d8b90bc9fa6740c556bd860d9fe5ab</Sha>
     </Dependency>
-    <Dependency Name="runtime.native.System.IO.Ports" Version="9.0.0-alpha.1.24072.1">
+    <Dependency Name="runtime.native.System.IO.Ports" Version="9.0.0-preview.4.24201.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>205ef031e0fe5152dede0bd9f99d0f6f9e7f1e45</Sha>
+      <Sha>ec4437be46d8b90bc9fa6740c556bd860d9fe5ab</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.NETCore.ILAsm" Version="9.0.0-alpha.1.24072.1">
+    <Dependency Name="Microsoft.NETCore.ILAsm" Version="9.0.0-preview.4.24201.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>205ef031e0fe5152dede0bd9f99d0f6f9e7f1e45</Sha>
+      <Sha>ec4437be46d8b90bc9fa6740c556bd860d9fe5ab</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.NET.Sdk.IL" Version="9.0.0-alpha.1.24072.1">
+    <Dependency Name="Microsoft.NET.Sdk.IL" Version="9.0.0-preview.4.24201.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>205ef031e0fe5152dede0bd9f99d0f6f9e7f1e45</Sha>
+      <Sha>ec4437be46d8b90bc9fa6740c556bd860d9fe5ab</Sha>
     </Dependency>
-    <Dependency Name="System.Text.Json" Version="9.0.0-alpha.1.24072.1">
+    <Dependency Name="System.Text.Json" Version="9.0.0-preview.4.24201.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>205ef031e0fe5152dede0bd9f99d0f6f9e7f1e45</Sha>
+      <Sha>ec4437be46d8b90bc9fa6740c556bd860d9fe5ab</Sha>
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.runtime.linux-x64" Version="9.0.0-alpha.1.24072.1">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.runtime.linux-x64" Version="9.0.0-preview.4.24201.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>205ef031e0fe5152dede0bd9f99d0f6f9e7f1e45</Sha>
+      <Sha>ec4437be46d8b90bc9fa6740c556bd860d9fe5ab</Sha>
       <SourceBuild RepoName="runtime" ManagedOnly="false" />
     </Dependency>
-    <Dependency Name="System.Reflection.MetadataLoadContext" Version="8.0.0">
+    <Dependency Name="Microsoft.DotNet.ILCompiler" Version="9.0.0-preview.4.24201.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>4dffd80c4d77c27e772a0be26e8036af77fbb26e</Sha>
-      <SourceBuild RepoName="runtime" ManagedOnly="false" />
+      <Sha>ec4437be46d8b90bc9fa6740c556bd860d9fe5ab</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.ILCompiler" Version="9.0.0-alpha.1.24072.1">
+    <Dependency Name="System.Reflection.Metadata" Version="9.0.0-preview.4.24201.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>205ef031e0fe5152dede0bd9f99d0f6f9e7f1e45</Sha>
+      <Sha>ec4437be46d8b90bc9fa6740c556bd860d9fe5ab</Sha>
     </Dependency>
-    <Dependency Name="System.Reflection.Metadata" Version="9.0.0-alpha.1.24072.1">
+    <Dependency Name="System.Reflection.MetadataLoadContext" Version="9.0.0-preview.4.24201.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>205ef031e0fe5152dede0bd9f99d0f6f9e7f1e45</Sha>
+      <Sha>ec4437be46d8b90bc9fa6740c556bd860d9fe5ab</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.XHarness.TestRunners.Common" Version="9.0.0-prerelease.24077.1">
+    <Dependency Name="Microsoft.DotNet.XHarness.TestRunners.Common" Version="9.0.0-prerelease.24203.1">
       <Uri>https://github.com/dotnet/xharness</Uri>
-      <Sha>f49b5c0db06528a9580686a5b63b0e5b4aba566b</Sha>
+      <Sha>28af9496b0e260f7e66ec549b39f1410ee9743d1</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.XHarness.TestRunners.Xunit" Version="9.0.0-prerelease.24077.1">
+    <Dependency Name="Microsoft.DotNet.XHarness.TestRunners.Xunit" Version="9.0.0-prerelease.24203.1">
       <Uri>https://github.com/dotnet/xharness</Uri>
-      <Sha>f49b5c0db06528a9580686a5b63b0e5b4aba566b</Sha>
+      <Sha>28af9496b0e260f7e66ec549b39f1410ee9743d1</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.XHarness.CLI" Version="9.0.0-prerelease.24077.1">
+    <Dependency Name="Microsoft.DotNet.XHarness.CLI" Version="9.0.0-prerelease.24203.1">
       <Uri>https://github.com/dotnet/xharness</Uri>
-      <Sha>f49b5c0db06528a9580686a5b63b0e5b4aba566b</Sha>
+      <Sha>28af9496b0e260f7e66ec549b39f1410ee9743d1</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.PackageTesting" Version="9.0.0-beta.24106.2">
+    <Dependency Name="Microsoft.DotNet.PackageTesting" Version="9.0.0-beta.24205.4">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>f7eb7794c703dc29a83b414b786e9a154f0ca042</Sha>
+      <Sha>541820fbd313f9bb82b756b66d258fe316d5e48b</Sha>
     </Dependency>
-    <Dependency Name="optimization.windows_nt-x64.MIBC.Runtime" Version="1.0.0-prerelease.24104.2">
+    <Dependency Name="optimization.windows_nt-x64.MIBC.Runtime" Version="1.0.0-prerelease.24106.4">
       <Uri>https://dev.azure.com/dnceng/internal/_git/dotnet-optimization</Uri>
-      <Sha>db9f1c2362565f3ef41c8e8feb5ed49ab11a6459</Sha>
+      <Sha>78a5b978e1965c1335edb4b9a22bc4d6ff5a77a6</Sha>
     </Dependency>
-    <Dependency Name="optimization.windows_nt-x86.MIBC.Runtime" Version="1.0.0-prerelease.24104.2">
+    <Dependency Name="optimization.windows_nt-x86.MIBC.Runtime" Version="1.0.0-prerelease.24106.4">
       <Uri>https://dev.azure.com/dnceng/internal/_git/dotnet-optimization</Uri>
-      <Sha>db9f1c2362565f3ef41c8e8feb5ed49ab11a6459</Sha>
+      <Sha>78a5b978e1965c1335edb4b9a22bc4d6ff5a77a6</Sha>
     </Dependency>
-    <Dependency Name="optimization.linux-x64.MIBC.Runtime" Version="1.0.0-prerelease.24104.2">
+    <Dependency Name="optimization.linux-x64.MIBC.Runtime" Version="1.0.0-prerelease.24106.4">
       <Uri>https://dev.azure.com/dnceng/internal/_git/dotnet-optimization</Uri>
-      <Sha>db9f1c2362565f3ef41c8e8feb5ed49ab11a6459</Sha>
+      <Sha>78a5b978e1965c1335edb4b9a22bc4d6ff5a77a6</Sha>
     </Dependency>
-    <Dependency Name="optimization.PGO.CoreCLR" Version="1.0.0-prerelease.24104.2">
+    <Dependency Name="optimization.PGO.CoreCLR" Version="1.0.0-prerelease.24106.4">
       <Uri>https://dev.azure.com/dnceng/internal/_git/dotnet-optimization</Uri>
-      <Sha>db9f1c2362565f3ef41c8e8feb5ed49ab11a6459</Sha>
+      <Sha>78a5b978e1965c1335edb4b9a22bc4d6ff5a77a6</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.HotReload.Utils.Generator.BuildTool" Version="9.0.0-alpha.0.24072.1">
+    <Dependency Name="Microsoft.DotNet.HotReload.Utils.Generator.BuildTool" Version="9.0.0-alpha.0.24201.1">
       <Uri>https://github.com/dotnet/hotreload-utils</Uri>
-      <Sha>fe2c1fb118054c09934acc75fde0e7165f5d385f</Sha>
+      <Sha>668ee30182fea845064853c46be5f54ac6efd110</Sha>
     </Dependency>
-    <Dependency Name="System.Runtime.Numerics.TestData" Version="9.0.0-beta.24072.1">
+    <Dependency Name="System.Runtime.Numerics.TestData" Version="9.0.0-beta.24203.1">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>a321e366dc8783b4b84127eb50d7feeda6702c0f</Sha>
+      <Sha>ad97a45c2567fa7c3a067079f166c3f3c9fecd60</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.Net.Compilers.Toolset" Version="4.10.0-1.24069.13">
+    <Dependency Name="Microsoft.Net.Compilers.Toolset" Version="4.10.0-3.24202.15">
       <Uri>https://github.com/dotnet/roslyn</Uri>
-      <Sha>2fe96bca1092f880e91eea6eb17ea3487d89309a</Sha>
+      <Sha>cbca41cad4e21c29548e9e57d7135740b6f78df9</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.CodeAnalysis" Version="4.10.0-1.24069.13">
+    <Dependency Name="Microsoft.CodeAnalysis" Version="4.10.0-3.24202.15">
       <Uri>https://github.com/dotnet/roslyn</Uri>
-      <Sha>2fe96bca1092f880e91eea6eb17ea3487d89309a</Sha>
+      <Sha>cbca41cad4e21c29548e9e57d7135740b6f78df9</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.CodeAnalysis.CSharp" Version="4.10.0-1.24069.13">
+    <Dependency Name="Microsoft.CodeAnalysis.CSharp" Version="4.10.0-3.24202.15">
       <Uri>https://github.com/dotnet/roslyn</Uri>
-      <Sha>2fe96bca1092f880e91eea6eb17ea3487d89309a</Sha>
+      <Sha>cbca41cad4e21c29548e9e57d7135740b6f78df9</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.CodeAnalysis.Analyzers" Version="3.11.0-beta1.24072.1">
+    <Dependency Name="Microsoft.CodeAnalysis.Analyzers" Version="3.11.0-beta1.24177.1">
       <Uri>https://github.com/dotnet/roslyn-analyzers</Uri>
-      <Sha>e39798fc8357615ab319c81b20acfb036ef7b513</Sha>
+      <Sha>ad732e236e7ffcb66de4b45a1b736aad4ccdcd83</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.CodeAnalysis.NetAnalyzers" Version="9.0.0-preview.24072.1">
+    <Dependency Name="Microsoft.CodeAnalysis.NetAnalyzers" Version="9.0.0-preview.24177.1">
       <Uri>https://github.com/dotnet/roslyn-analyzers</Uri>
-      <Sha>e39798fc8357615ab319c81b20acfb036ef7b513</Sha>
+      <Sha>ad732e236e7ffcb66de4b45a1b736aad4ccdcd83</Sha>
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.roslyn" Version="4.10.0-1.24069.13">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.roslyn" Version="4.10.0-3.24202.15">
       <Uri>https://github.com/dotnet/roslyn</Uri>
-      <Sha>2fe96bca1092f880e91eea6eb17ea3487d89309a</Sha>
+      <Sha>cbca41cad4e21c29548e9e57d7135740b6f78df9</Sha>
       <SourceBuild RepoName="roslyn" ManagedOnly="true" />
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.ApiCompat.Task" Version="9.0.100-alpha.1.24072.3">
+    <Dependency Name="Microsoft.DotNet.ApiCompat.Task" Version="9.0.100-preview.4.24175.4">
       <Uri>https://github.com/dotnet/sdk</Uri>
-      <Sha>de4f12b8ab6692b01776d362f4fa609fd3f1154a</Sha>
+      <Sha>219a6fc9954d632d7c119b31d59ff1516ff04d98</Sha>
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.sdk" Version="9.0.100-alpha.1.24072.3">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.sdk" Version="9.0.100-preview.4.24175.4">
       <Uri>https://github.com/dotnet/sdk</Uri>
-      <Sha>de4f12b8ab6692b01776d362f4fa609fd3f1154a</Sha>
+      <Sha>219a6fc9954d632d7c119b31d59ff1516ff04d98</Sha>
       <SourceBuild RepoName="sdk" ManagedOnly="true" />
     </Dependency>
-    <Dependency Name="optimization.windows_nt-arm64.MIBC.Runtime" Version="1.0.0-prerelease.24104.2">
+    <Dependency Name="optimization.windows_nt-arm64.MIBC.Runtime" Version="1.0.0-prerelease.24106.4">
       <Uri>https://dev.azure.com/dnceng/internal/_git/dotnet-optimization</Uri>
-      <Sha>db9f1c2362565f3ef41c8e8feb5ed49ab11a6459</Sha>
+      <Sha>78a5b978e1965c1335edb4b9a22bc4d6ff5a77a6</Sha>
     </Dependency>
-    <Dependency Name="optimization.linux-arm64.MIBC.Runtime" Version="1.0.0-prerelease.24104.2">
+    <Dependency Name="optimization.linux-arm64.MIBC.Runtime" Version="1.0.0-prerelease.24106.4">
       <Uri>https://dev.azure.com/dnceng/internal/_git/dotnet-optimization</Uri>
-      <Sha>db9f1c2362565f3ef41c8e8feb5ed49ab11a6459</Sha>
+      <Sha>78a5b978e1965c1335edb4b9a22bc4d6ff5a77a6</Sha>
     </Dependency>
     <!-- Necessary for source-build. This allows the package to be retrieved from previously-source-built artifacts
          and flow in as dependencies of the packages produced by runtime. -->
@@ -443,9 +410,9 @@
       <Uri>https://github.com/NuGet/NuGet.Client</Uri>
       <Sha>8fef55f5a55a3b4f2c96cd1a9b5ddc51d4b927f8</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.Dotnet.Sdk.Internal" Version="9.0.100-alpha.1.24070.3">
+    <Dependency Name="Microsoft.Dotnet.Sdk.Internal" Version="9.0.100-preview.4.24175.5">
       <Uri>https://github.com/dotnet/installer</Uri>
-      <Sha>f91d4ca399ea4389d04b4700965d207f3a8d5c3b</Sha>
+      <Sha>dc43d363d284c26cb1b463683d6ebb2c88ab0b58</Sha>
     </Dependency>
   </ToolsetDependencies>
 </Dependencies>
diff --git a/eng/Versions.props b/eng/Versions.props
index 6dacf17ea232..cd1960205b4f 100644
--- a/eng/Versions.props
+++ b/eng/Versions.props
@@ -7,11 +7,11 @@
     <MinorVersion>0</MinorVersion>
     <PatchVersion>0</PatchVersion>
     <SdkBandVersion>9.0.100</SdkBandVersion>
-    <PackageVersionNet8>8.0.0</PackageVersionNet8>
+    <PackageVersionNet8>8.0.2</PackageVersionNet8>
     <PackageVersionNet7>7.0.$([MSBuild]::Add($([System.Version]::Parse('$(PackageVersionNet8)').Build),14))</PackageVersionNet7>
     <PackageVersionNet6>6.0.$([MSBuild]::Add($([System.Version]::Parse('$(PackageVersionNet7)').Build),11))</PackageVersionNet6>
     <PreReleaseVersionLabel>preview</PreReleaseVersionLabel>
-    <PreReleaseVersionIteration>2</PreReleaseVersionIteration>
+    <PreReleaseVersionIteration>4</PreReleaseVersionIteration>
     <!-- Enable to remove prerelease label. -->
     <StabilizePackageVersion Condition="'$(StabilizePackageVersion)' == ''">false</StabilizePackageVersion>
     <DotNetFinalVersionKind Condition="'$(StabilizePackageVersion)' == 'true'">release</DotNetFinalVersionKind>
@@ -26,25 +26,25 @@
     <FlagNetStandard1XDependencies Condition="'$(FlagNetStandard1XDependencies)' == ''">true</FlagNetStandard1XDependencies>
     <!-- Runtime controls its dependency graph via Traversal projects and doesn't want or need Arcade's ExcludeFrom infrastructure. -->
     <DisableArcadeExcludeFromBuildSupport>true</DisableArcadeExcludeFromBuildSupport>
-    <!-- Use SDK compilers in full source-build. -->
-    <UsingToolMicrosoftNetCompilers Condition="'$(DotNetBuildFromSourceFlavor)' != 'Product'">true</UsingToolMicrosoftNetCompilers>
+    <!-- Use SDK compilers in full VMR builds. -->
+    <UsingToolMicrosoftNetCompilers Condition="'$(DotNetBuildOrchestrator)' != 'true'">true</UsingToolMicrosoftNetCompilers>
   </PropertyGroup>
   <ItemGroup>
     <WorkloadSdkBandVersions Include="$(SdkBandVersion)" SupportsMachineArch="true" />
   </ItemGroup>
   <PropertyGroup>
     <!-- dotnet/roslyn-analyzers dependencies -->
-    <MicrosoftCodeAnalysisAnalyzersVersion>3.11.0-beta1.24072.1</MicrosoftCodeAnalysisAnalyzersVersion>
-    <MicrosoftCodeAnalysisNetAnalyzersVersion>9.0.0-preview.24072.1</MicrosoftCodeAnalysisNetAnalyzersVersion>
+    <MicrosoftCodeAnalysisAnalyzersVersion>3.11.0-beta1.24177.1</MicrosoftCodeAnalysisAnalyzersVersion>
+    <MicrosoftCodeAnalysisNetAnalyzersVersion>9.0.0-preview.24177.1</MicrosoftCodeAnalysisNetAnalyzersVersion>
     <!-- dotnet/roslyn dependencies -->
     <!--
       These versions should not be used by any project that contributes to the design-time experience in VS, such as an analyzer, code-fix, or generator assembly.
       Any tools that contribute to the design-time experience should use the MicrosoftCodeAnalysisVersion_LatestVS property above to ensure
       they do not break the local dev experience.
     -->
-    <MicrosoftCodeAnalysisCSharpVersion>4.10.0-1.24069.13</MicrosoftCodeAnalysisCSharpVersion>
-    <MicrosoftCodeAnalysisVersion>4.10.0-1.24069.13</MicrosoftCodeAnalysisVersion>
-    <MicrosoftNetCompilersToolsetVersion>4.10.0-1.24069.13</MicrosoftNetCompilersToolsetVersion>
+    <MicrosoftCodeAnalysisCSharpVersion>4.10.0-3.24202.15</MicrosoftCodeAnalysisCSharpVersion>
+    <MicrosoftCodeAnalysisVersion>4.10.0-3.24202.15</MicrosoftCodeAnalysisVersion>
+    <MicrosoftNetCompilersToolsetVersion>4.10.0-3.24202.15</MicrosoftNetCompilersToolsetVersion>
   </PropertyGroup>
   <!--
     For source generator support we need to target multiple versions of Roslyn in order to be able to run on older versions of Roslyn.
@@ -81,91 +81,83 @@
   <PropertyGroup>
     <StaticCsVersion>0.2.0</StaticCsVersion>
     <!-- SDK dependencies -->
-    <MicrosoftDotNetApiCompatTaskVersion>9.0.100-alpha.1.24072.3</MicrosoftDotNetApiCompatTaskVersion>
+    <MicrosoftDotNetApiCompatTaskVersion>9.0.100-preview.4.24175.4</MicrosoftDotNetApiCompatTaskVersion>
     <!-- Arcade dependencies -->
-    <MicrosoftDotNetBuildTasksFeedVersion>9.0.0-beta.24106.2</MicrosoftDotNetBuildTasksFeedVersion>
-    <MicrosoftDotNetCodeAnalysisVersion>9.0.0-beta.24106.2</MicrosoftDotNetCodeAnalysisVersion>
-    <MicrosoftDotNetGenAPIVersion>9.0.0-beta.24106.2</MicrosoftDotNetGenAPIVersion>
-    <MicrosoftDotNetGenFacadesVersion>9.0.0-beta.24106.2</MicrosoftDotNetGenFacadesVersion>
-    <MicrosoftDotNetXUnitAssertVersion>2.6.7-beta.24106.2</MicrosoftDotNetXUnitAssertVersion>
-    <MicrosoftDotNetXUnitExtensionsVersion>9.0.0-beta.24106.2</MicrosoftDotNetXUnitExtensionsVersion>
-    <MicrosoftDotNetXUnitConsoleRunnerVersion>2.6.7-beta.24106.2</MicrosoftDotNetXUnitConsoleRunnerVersion>
-    <MicrosoftDotNetBuildTasksArchivesVersion>9.0.0-beta.24106.2</MicrosoftDotNetBuildTasksArchivesVersion>
-    <MicrosoftDotNetBuildTasksInstallersVersion>9.0.0-beta.24106.2</MicrosoftDotNetBuildTasksInstallersVersion>
-    <MicrosoftDotNetBuildTasksPackagingVersion>9.0.0-beta.24106.2</MicrosoftDotNetBuildTasksPackagingVersion>
-    <MicrosoftDotNetBuildTasksTargetFrameworkVersion>9.0.0-beta.24106.2</MicrosoftDotNetBuildTasksTargetFrameworkVersion>
-    <MicrosoftDotNetBuildTasksTemplatingVersion>9.0.0-beta.24106.2</MicrosoftDotNetBuildTasksTemplatingVersion>
-    <MicrosoftDotNetBuildTasksWorkloadsPackageVersion>9.0.0-beta.24106.2</MicrosoftDotNetBuildTasksWorkloadsPackageVersion>
-    <MicrosoftDotNetRemoteExecutorVersion>9.0.0-beta.24106.2</MicrosoftDotNetRemoteExecutorVersion>
-    <MicrosoftDotNetVersionToolsTasksVersion>9.0.0-beta.24106.2</MicrosoftDotNetVersionToolsTasksVersion>
-    <MicrosoftDotNetPackageTestingVersion>9.0.0-beta.24106.2</MicrosoftDotNetPackageTestingVersion>
+    <MicrosoftDotNetBuildTasksFeedVersion>9.0.0-beta.24205.4</MicrosoftDotNetBuildTasksFeedVersion>
+    <MicrosoftDotNetCodeAnalysisVersion>9.0.0-beta.24205.4</MicrosoftDotNetCodeAnalysisVersion>
+    <MicrosoftDotNetGenAPIVersion>9.0.0-beta.24205.4</MicrosoftDotNetGenAPIVersion>
+    <MicrosoftDotNetGenFacadesVersion>9.0.0-beta.24205.4</MicrosoftDotNetGenFacadesVersion>
+    <MicrosoftDotNetXUnitAssertVersion>2.6.7-beta.24205.4</MicrosoftDotNetXUnitAssertVersion>
+    <MicrosoftDotNetXUnitExtensionsVersion>9.0.0-beta.24205.4</MicrosoftDotNetXUnitExtensionsVersion>
+    <MicrosoftDotNetXUnitConsoleRunnerVersion>2.6.7-beta.24205.4</MicrosoftDotNetXUnitConsoleRunnerVersion>
+    <MicrosoftDotNetBuildTasksArchivesVersion>9.0.0-beta.24205.4</MicrosoftDotNetBuildTasksArchivesVersion>
+    <MicrosoftDotNetBuildTasksInstallersVersion>9.0.0-beta.24205.4</MicrosoftDotNetBuildTasksInstallersVersion>
+    <MicrosoftDotNetBuildTasksPackagingVersion>9.0.0-beta.24205.4</MicrosoftDotNetBuildTasksPackagingVersion>
+    <MicrosoftDotNetBuildTasksTargetFrameworkVersion>9.0.0-beta.24205.4</MicrosoftDotNetBuildTasksTargetFrameworkVersion>
+    <MicrosoftDotNetBuildTasksTemplatingVersion>9.0.0-beta.24205.4</MicrosoftDotNetBuildTasksTemplatingVersion>
+    <MicrosoftDotNetBuildTasksWorkloadsPackageVersion>9.0.0-beta.24205.4</MicrosoftDotNetBuildTasksWorkloadsPackageVersion>
+    <MicrosoftDotNetRemoteExecutorVersion>9.0.0-beta.24205.4</MicrosoftDotNetRemoteExecutorVersion>
+    <MicrosoftDotNetVersionToolsTasksVersion>9.0.0-beta.24205.4</MicrosoftDotNetVersionToolsTasksVersion>
+    <MicrosoftDotNetPackageTestingVersion>9.0.0-beta.24205.4</MicrosoftDotNetPackageTestingVersion>
     <!-- TODO: Remove pinned xunit.analyzers version: https://github.com/dotnet/runtime/issues/97088 -->
     <XUnitAnalyzersVersion>1.4.0</XUnitAnalyzersVersion>
     <!-- NuGet dependencies -->
     <NuGetBuildTasksPackVersion>6.0.0-preview.1.102</NuGetBuildTasksPackVersion>
     <!-- Installer dependencies -->
-    <MicrosoftNETCoreAppRuntimewinx64Version>9.0.0-alpha.1.24072.1</MicrosoftNETCoreAppRuntimewinx64Version>
+    <MicrosoftNETCoreAppRuntimewinx64Version>9.0.0-preview.4.24201.1</MicrosoftNETCoreAppRuntimewinx64Version>
     <MicrosoftExtensionsDependencyModelVersion>6.0.0</MicrosoftExtensionsDependencyModelVersion>
     <!-- ILAsm dependencies -->
-    <MicrosoftNETCoreILAsmVersion>9.0.0-alpha.1.24072.1</MicrosoftNETCoreILAsmVersion>
-    <!-- ObjWriter dependencies -->
-    <runtimelinuxarm64MicrosoftNETCoreRuntimeObjWriterVersion>16.0.5-alpha.1.24105.1</runtimelinuxarm64MicrosoftNETCoreRuntimeObjWriterVersion>
-    <runtimelinuxx64MicrosoftNETCoreRuntimeObjWriterVersion>16.0.5-alpha.1.24105.1</runtimelinuxx64MicrosoftNETCoreRuntimeObjWriterVersion>
-    <runtimelinuxmuslarm64MicrosoftNETCoreRuntimeObjWriterVersion>16.0.5-alpha.1.24105.1</runtimelinuxmuslarm64MicrosoftNETCoreRuntimeObjWriterVersion>
-    <runtimelinuxmuslx64MicrosoftNETCoreRuntimeObjWriterVersion>16.0.5-alpha.1.24105.1</runtimelinuxmuslx64MicrosoftNETCoreRuntimeObjWriterVersion>
-    <runtimewinarm64MicrosoftNETCoreRuntimeObjWriterVersion>16.0.5-alpha.1.24105.1</runtimewinarm64MicrosoftNETCoreRuntimeObjWriterVersion>
-    <runtimewinx64MicrosoftNETCoreRuntimeObjWriterVersion>16.0.5-alpha.1.24105.1</runtimewinx64MicrosoftNETCoreRuntimeObjWriterVersion>
-    <runtimeosxarm64MicrosoftNETCoreRuntimeObjWriterVersion>16.0.5-alpha.1.24105.1</runtimeosxarm64MicrosoftNETCoreRuntimeObjWriterVersion>
-    <runtimeosxx64MicrosoftNETCoreRuntimeObjWriterVersion>16.0.5-alpha.1.24105.1</runtimeosxx64MicrosoftNETCoreRuntimeObjWriterVersion>
+    <MicrosoftNETCoreILAsmVersion>9.0.0-preview.4.24201.1</MicrosoftNETCoreILAsmVersion>
     <!-- Libraries dependencies -->
     <MicrosoftBclAsyncInterfacesVersion>6.0.0</MicrosoftBclAsyncInterfacesVersion>
     <MicrosoftBclHashCodeVersion>1.1.1</MicrosoftBclHashCodeVersion>
     <MicrosoftWin32RegistryVersion>5.0.0</MicrosoftWin32RegistryVersion>
     <StyleCopAnalyzersVersion>1.2.0-beta.507</StyleCopAnalyzersVersion>
     <SystemBuffersVersion>4.5.1</SystemBuffersVersion>
-    <SystemCollectionsImmutableVersion>8.0.0</SystemCollectionsImmutableVersion>
     <SystemComponentModelAnnotationsVersion>5.0.0</SystemComponentModelAnnotationsVersion>
     <SystemDataSqlClientVersion>4.8.6</SystemDataSqlClientVersion>
     <SystemDrawingCommonVersion>8.0.0</SystemDrawingCommonVersion>
     <SystemIOFileSystemAccessControlVersion>5.0.0</SystemIOFileSystemAccessControlVersion>
     <SystemMemoryVersion>4.5.5</SystemMemoryVersion>
-    <SystemReflectionMetadataVersion>9.0.0-alpha.1.24072.1</SystemReflectionMetadataVersion>
-    <!-- Keep toolset versions in sync with dotnet/msbuild and dotnet/sdk -->
-    <SystemReflectionMetadataToolsetVersion>8.0.0</SystemReflectionMetadataToolsetVersion>
-    <SystemReflectionMetadataLoadContextVersion>8.0.0</SystemReflectionMetadataLoadContextVersion>
+    <SystemReflectionMetadataVersion>9.0.0-preview.4.24201.1</SystemReflectionMetadataVersion>
+    <SystemReflectionMetadataLoadContextVersion>9.0.0-preview.4.24201.1</SystemReflectionMetadataLoadContextVersion>
     <SystemSecurityAccessControlVersion>6.0.0</SystemSecurityAccessControlVersion>
     <SystemSecurityCryptographyCngVersion>5.0.0</SystemSecurityCryptographyCngVersion>
     <SystemSecurityCryptographyOpenSslVersion>5.0.0</SystemSecurityCryptographyOpenSslVersion>
     <SystemSecurityPrincipalWindowsVersion>5.0.0</SystemSecurityPrincipalWindowsVersion>
     <SystemSecurityPermissionsVersion>7.0.0</SystemSecurityPermissionsVersion>
-    <SystemTextJsonVersion>9.0.0-alpha.1.24072.1</SystemTextJsonVersion>
+    <SystemTextJsonVersion>9.0.0-preview.4.24201.1</SystemTextJsonVersion>
     <SystemRuntimeCompilerServicesUnsafeVersion>6.0.0</SystemRuntimeCompilerServicesUnsafeVersion>
     <SystemThreadingAccessControlVersion>7.0.0</SystemThreadingAccessControlVersion>
     <SystemThreadingTasksExtensionsVersion>4.5.4</SystemThreadingTasksExtensionsVersion>
     <SystemValueTupleVersion>4.5.0</SystemValueTupleVersion>
-    <runtimenativeSystemIOPortsVersion>9.0.0-alpha.1.24072.1</runtimenativeSystemIOPortsVersion>
+    <runtimenativeSystemIOPortsVersion>9.0.0-preview.4.24201.1</runtimenativeSystemIOPortsVersion>
+    <!-- Keep toolset versions in sync with dotnet/msbuild and dotnet/sdk -->
+    <SystemCollectionsImmutableToolsetVersion>8.0.0</SystemCollectionsImmutableToolsetVersion>
+    <SystemReflectionMetadataToolsetVersion>8.0.0</SystemReflectionMetadataToolsetVersion>
+    <SystemReflectionMetadataLoadContextToolsetVersion>8.0.0</SystemReflectionMetadataLoadContextToolsetVersion>
     <!-- Runtime-Assets dependencies -->
-    <SystemRuntimeNumericsTestDataVersion>9.0.0-beta.24072.1</SystemRuntimeNumericsTestDataVersion>
-    <SystemComponentModelTypeConverterTestDataVersion>9.0.0-beta.24072.1</SystemComponentModelTypeConverterTestDataVersion>
-    <SystemDataCommonTestDataVersion>9.0.0-beta.24072.1</SystemDataCommonTestDataVersion>
-    <SystemDrawingCommonTestDataVersion>9.0.0-beta.24072.1</SystemDrawingCommonTestDataVersion>
-    <SystemFormatsTarTestDataVersion>9.0.0-beta.24072.1</SystemFormatsTarTestDataVersion>
-    <SystemIOCompressionTestDataVersion>9.0.0-beta.24072.1</SystemIOCompressionTestDataVersion>
-    <SystemIOPackagingTestDataVersion>9.0.0-beta.24072.1</SystemIOPackagingTestDataVersion>
-    <SystemNetTestDataVersion>9.0.0-beta.24072.1</SystemNetTestDataVersion>
-    <SystemPrivateRuntimeUnicodeDataVersion>9.0.0-beta.24072.1</SystemPrivateRuntimeUnicodeDataVersion>
-    <SystemRuntimeTimeZoneDataVersion>9.0.0-beta.24072.1</SystemRuntimeTimeZoneDataVersion>
-    <SystemSecurityCryptographyX509CertificatesTestDataVersion>9.0.0-beta.24072.1</SystemSecurityCryptographyX509CertificatesTestDataVersion>
-    <SystemTextRegularExpressionsTestDataVersion>9.0.0-beta.24072.1</SystemTextRegularExpressionsTestDataVersion>
-    <SystemWindowsExtensionsTestDataVersion>9.0.0-beta.24072.1</SystemWindowsExtensionsTestDataVersion>
-    <MicrosoftDotNetCilStripSourcesVersion>9.0.0-beta.24072.1</MicrosoftDotNetCilStripSourcesVersion>
+    <SystemRuntimeNumericsTestDataVersion>9.0.0-beta.24203.1</SystemRuntimeNumericsTestDataVersion>
+    <SystemComponentModelTypeConverterTestDataVersion>9.0.0-beta.24203.1</SystemComponentModelTypeConverterTestDataVersion>
+    <SystemDataCommonTestDataVersion>9.0.0-beta.24203.1</SystemDataCommonTestDataVersion>
+    <SystemDrawingCommonTestDataVersion>9.0.0-beta.24203.1</SystemDrawingCommonTestDataVersion>
+    <SystemFormatsTarTestDataVersion>9.0.0-beta.24203.1</SystemFormatsTarTestDataVersion>
+    <SystemIOCompressionTestDataVersion>9.0.0-beta.24203.1</SystemIOCompressionTestDataVersion>
+    <SystemIOPackagingTestDataVersion>9.0.0-beta.24203.1</SystemIOPackagingTestDataVersion>
+    <SystemNetTestDataVersion>9.0.0-beta.24203.1</SystemNetTestDataVersion>
+    <SystemPrivateRuntimeUnicodeDataVersion>9.0.0-beta.24203.1</SystemPrivateRuntimeUnicodeDataVersion>
+    <SystemRuntimeTimeZoneDataVersion>9.0.0-beta.24203.1</SystemRuntimeTimeZoneDataVersion>
+    <SystemSecurityCryptographyX509CertificatesTestDataVersion>9.0.0-beta.24203.1</SystemSecurityCryptographyX509CertificatesTestDataVersion>
+    <SystemTextRegularExpressionsTestDataVersion>9.0.0-beta.24203.1</SystemTextRegularExpressionsTestDataVersion>
+    <SystemWindowsExtensionsTestDataVersion>9.0.0-beta.24203.1</SystemWindowsExtensionsTestDataVersion>
+    <MicrosoftDotNetCilStripSourcesVersion>9.0.0-beta.24203.1</MicrosoftDotNetCilStripSourcesVersion>
     <!-- dotnet-optimization dependencies -->
-    <optimizationwindows_ntx64MIBCRuntimeVersion>1.0.0-prerelease.24104.2</optimizationwindows_ntx64MIBCRuntimeVersion>
-    <optimizationwindows_ntx86MIBCRuntimeVersion>1.0.0-prerelease.24104.2</optimizationwindows_ntx86MIBCRuntimeVersion>
-    <optimizationwindows_ntarm64MIBCRuntimeVersion>1.0.0-prerelease.24104.2</optimizationwindows_ntarm64MIBCRuntimeVersion>
-    <optimizationlinuxx64MIBCRuntimeVersion>1.0.0-prerelease.24104.2</optimizationlinuxx64MIBCRuntimeVersion>
-    <optimizationlinuxarm64MIBCRuntimeVersion>1.0.0-prerelease.24104.2</optimizationlinuxarm64MIBCRuntimeVersion>
-    <optimizationPGOCoreCLRVersion>1.0.0-prerelease.24104.2</optimizationPGOCoreCLRVersion>
+    <optimizationwindows_ntx64MIBCRuntimeVersion>1.0.0-prerelease.24106.4</optimizationwindows_ntx64MIBCRuntimeVersion>
+    <optimizationwindows_ntx86MIBCRuntimeVersion>1.0.0-prerelease.24106.4</optimizationwindows_ntx86MIBCRuntimeVersion>
+    <optimizationwindows_ntarm64MIBCRuntimeVersion>1.0.0-prerelease.24106.4</optimizationwindows_ntarm64MIBCRuntimeVersion>
+    <optimizationlinuxx64MIBCRuntimeVersion>1.0.0-prerelease.24106.4</optimizationlinuxx64MIBCRuntimeVersion>
+    <optimizationlinuxarm64MIBCRuntimeVersion>1.0.0-prerelease.24106.4</optimizationlinuxarm64MIBCRuntimeVersion>
+    <optimizationPGOCoreCLRVersion>1.0.0-prerelease.24106.4</optimizationPGOCoreCLRVersion>
     <!-- Not auto-updated. -->
     <MicrosoftDiaSymReaderVersion>2.0.0</MicrosoftDiaSymReaderVersion>
     <MicrosoftDiaSymReaderNativeVersion>17.8.0-beta1.23475.2</MicrosoftDiaSymReaderNativeVersion>
@@ -186,10 +178,10 @@
     <!-- Testing -->
     <MicrosoftNETCoreCoreDisToolsVersion>1.4.0</MicrosoftNETCoreCoreDisToolsVersion>
     <MicrosoftNETTestSdkVersion>17.4.0-preview-20220707-01</MicrosoftNETTestSdkVersion>
-    <MicrosoftDotNetXHarnessTestRunnersCommonVersion>9.0.0-prerelease.24077.1</MicrosoftDotNetXHarnessTestRunnersCommonVersion>
-    <MicrosoftDotNetXHarnessTestRunnersXunitVersion>9.0.0-prerelease.24077.1</MicrosoftDotNetXHarnessTestRunnersXunitVersion>
-    <MicrosoftDotNetXHarnessCLIVersion>9.0.0-prerelease.24077.1</MicrosoftDotNetXHarnessCLIVersion>
-    <MicrosoftDotNetHotReloadUtilsGeneratorBuildToolVersion>9.0.0-alpha.0.24072.1</MicrosoftDotNetHotReloadUtilsGeneratorBuildToolVersion>
+    <MicrosoftDotNetXHarnessTestRunnersCommonVersion>9.0.0-prerelease.24203.1</MicrosoftDotNetXHarnessTestRunnersCommonVersion>
+    <MicrosoftDotNetXHarnessTestRunnersXunitVersion>9.0.0-prerelease.24203.1</MicrosoftDotNetXHarnessTestRunnersXunitVersion>
+    <MicrosoftDotNetXHarnessCLIVersion>9.0.0-prerelease.24203.1</MicrosoftDotNetXHarnessCLIVersion>
+    <MicrosoftDotNetHotReloadUtilsGeneratorBuildToolVersion>9.0.0-alpha.0.24201.1</MicrosoftDotNetHotReloadUtilsGeneratorBuildToolVersion>
     <NUnitVersion>3.12.0</NUnitVersion>
     <NUnit3TestAdapterVersion>4.5.0</NUnit3TestAdapterVersion>
     <CoverletCollectorVersion>6.0.0</CoverletCollectorVersion>
@@ -215,53 +207,53 @@
     <!-- Docs -->
     <MicrosoftPrivateIntellisenseVersion>8.0.0-preview-20230918.1</MicrosoftPrivateIntellisenseVersion>
     <!-- Mono Cecil -->
-    <MicrosoftDotNetCecilVersion>0.11.4-alpha.24065.1</MicrosoftDotNetCecilVersion>
+    <MicrosoftDotNetCecilVersion>0.11.4-alpha.24168.1</MicrosoftDotNetCecilVersion>
     <!-- ILCompiler -->
-    <MicrosoftDotNetILCompilerVersion>9.0.0-alpha.1.24072.1</MicrosoftDotNetILCompilerVersion>
+    <MicrosoftDotNetILCompilerVersion>9.0.0-preview.4.24201.1</MicrosoftDotNetILCompilerVersion>
     <!-- ICU -->
-    <MicrosoftNETCoreRuntimeICUTransportVersion>9.0.0-preview.2.24072.3</MicrosoftNETCoreRuntimeICUTransportVersion>
+    <MicrosoftNETCoreRuntimeICUTransportVersion>9.0.0-preview.4.24201.1</MicrosoftNETCoreRuntimeICUTransportVersion>
     <!-- MsQuic -->
-    <MicrosoftNativeQuicMsQuicVersion>2.2.3</MicrosoftNativeQuicMsQuicVersion>
-    <SystemNetMsQuicTransportVersion>9.0.0-alpha.1.24067.1</SystemNetMsQuicTransportVersion>
+    <MicrosoftNativeQuicMsQuicSchannelVersion>2.3.5</MicrosoftNativeQuicMsQuicSchannelVersion>
+    <SystemNetMsQuicTransportVersion>9.0.0-alpha.1.24167.3</SystemNetMsQuicTransportVersion>
     <!-- Mono LLVM -->
-    <runtimelinuxarm64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24105.1</runtimelinuxarm64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
-    <runtimelinuxarm64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24105.1</runtimelinuxarm64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
-    <runtimelinuxmuslarm64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24105.1</runtimelinuxmuslarm64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
-    <runtimelinuxmuslarm64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24105.1</runtimelinuxmuslarm64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
-    <runtimelinuxx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24105.1</runtimelinuxx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
-    <runtimelinuxx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24105.1</runtimelinuxx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
-    <runtimelinuxmuslx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24105.1</runtimelinuxmuslx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
-    <runtimelinuxmuslx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24105.1</runtimelinuxmuslx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
-    <runtimewinx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24105.1</runtimewinx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
-    <runtimewinx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24105.1</runtimewinx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
-    <runtimeosxarm64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24105.1</runtimeosxarm64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
-    <runtimeosxarm64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24105.1</runtimeosxarm64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
-    <runtimeosxx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24105.1</runtimeosxx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
-    <runtimeosxx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24105.1</runtimeosxx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
+    <runtimelinuxarm64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24203.4</runtimelinuxarm64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
+    <runtimelinuxarm64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24203.4</runtimelinuxarm64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
+    <runtimelinuxmuslarm64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24203.4</runtimelinuxmuslarm64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
+    <runtimelinuxmuslarm64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24203.4</runtimelinuxmuslarm64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
+    <runtimelinuxx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24203.4</runtimelinuxx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
+    <runtimelinuxx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24203.4</runtimelinuxx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
+    <runtimelinuxmuslx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24203.4</runtimelinuxmuslx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
+    <runtimelinuxmuslx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24203.4</runtimelinuxmuslx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
+    <runtimewinx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24203.4</runtimewinx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
+    <runtimewinx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24203.4</runtimewinx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
+    <runtimeosxarm64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24203.4</runtimeosxarm64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
+    <runtimeosxarm64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24203.4</runtimeosxarm64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
+    <runtimeosxx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>16.0.5-alpha.1.24203.4</runtimeosxx64MicrosoftNETCoreRuntimeMonoLLVMSdkVersion>
+    <runtimeosxx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>16.0.5-alpha.1.24203.4</runtimeosxx64MicrosoftNETCoreRuntimeMonoLLVMToolsVersion>
     <!-- emscripten / Node
          Note: when the name is updated, make sure to update dependency name in eng/pipelines/common/xplat-setup.yml
                like - DarcDependenciesChanged.Microsoft_NET_Workload_Emscripten_Current_Manifest-9_0_100_Transport
     -->
-    <MicrosoftNETWorkloadEmscriptenCurrentManifest90100TransportVersion>9.0.0-preview.2.24108.4</MicrosoftNETWorkloadEmscriptenCurrentManifest90100TransportVersion>
+    <MicrosoftNETWorkloadEmscriptenCurrentManifest90100TransportVersion>9.0.0-preview.4.24204.9</MicrosoftNETWorkloadEmscriptenCurrentManifest90100TransportVersion>
     <MicrosoftNETRuntimeEmscriptenVersion>$(MicrosoftNETWorkloadEmscriptenCurrentManifest90100TransportVersion)</MicrosoftNETRuntimeEmscriptenVersion>
-    <MicrosoftNETRuntimeEmscripten3134Pythonwinx64Version>9.0.0-preview.2.24108.4</MicrosoftNETRuntimeEmscripten3134Pythonwinx64Version>
+    <MicrosoftNETRuntimeEmscripten3134Pythonwinx64Version>9.0.0-preview.4.24204.9</MicrosoftNETRuntimeEmscripten3134Pythonwinx64Version>
     <!-- workloads -->
     <SwixPackageVersion>1.1.87-gba258badda</SwixPackageVersion>
     <WixPackageVersion>1.0.0-v3.14.0.5722</WixPackageVersion>
     <!-- JIT Tools -->
-    <runtimelinuxarm64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24105.1</runtimelinuxarm64MicrosoftNETCoreRuntimeJITToolsVersion>
-    <runtimelinuxx64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24105.1</runtimelinuxx64MicrosoftNETCoreRuntimeJITToolsVersion>
-    <runtimelinuxmuslarm64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24105.1</runtimelinuxmuslarm64MicrosoftNETCoreRuntimeJITToolsVersion>
-    <runtimelinuxmuslx64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24105.1</runtimelinuxmuslx64MicrosoftNETCoreRuntimeJITToolsVersion>
-    <runtimewinarm64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24105.1</runtimewinarm64MicrosoftNETCoreRuntimeJITToolsVersion>
-    <runtimewinx64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24105.1</runtimewinx64MicrosoftNETCoreRuntimeJITToolsVersion>
-    <runtimeosxarm64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24105.1</runtimeosxarm64MicrosoftNETCoreRuntimeJITToolsVersion>
-    <runtimeosxx64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24105.1</runtimeosxx64MicrosoftNETCoreRuntimeJITToolsVersion>
+    <runtimelinuxarm64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24203.4</runtimelinuxarm64MicrosoftNETCoreRuntimeJITToolsVersion>
+    <runtimelinuxx64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24203.4</runtimelinuxx64MicrosoftNETCoreRuntimeJITToolsVersion>
+    <runtimelinuxmuslarm64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24203.4</runtimelinuxmuslarm64MicrosoftNETCoreRuntimeJITToolsVersion>
+    <runtimelinuxmuslx64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24203.4</runtimelinuxmuslx64MicrosoftNETCoreRuntimeJITToolsVersion>
+    <runtimewinarm64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24203.4</runtimewinarm64MicrosoftNETCoreRuntimeJITToolsVersion>
+    <runtimewinx64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24203.4</runtimewinx64MicrosoftNETCoreRuntimeJITToolsVersion>
+    <runtimeosxarm64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24203.4</runtimeosxarm64MicrosoftNETCoreRuntimeJITToolsVersion>
+    <runtimeosxx64MicrosoftNETCoreRuntimeJITToolsVersion>16.0.5-alpha.1.24203.4</runtimeosxx64MicrosoftNETCoreRuntimeJITToolsVersion>
     <!-- BrowserDebugProxy libs -->
     <MicrosoftExtensionsLoggingVersion>3.1.7</MicrosoftExtensionsLoggingVersion>
     <MicrosoftSymbolStoreVersion>1.0.406601</MicrosoftSymbolStoreVersion>
     <!-- installer version, for testing workloads -->
-    <MicrosoftDotnetSdkInternalVersion>9.0.100-alpha.1.24070.3</MicrosoftDotnetSdkInternalVersion>
+    <MicrosoftDotnetSdkInternalVersion>9.0.100-preview.4.24175.5</MicrosoftDotnetSdkInternalVersion>
     <SdkVersionForWorkloadTesting>$(MicrosoftDotnetSdkInternalVersion)</SdkVersionForWorkloadTesting>
   </PropertyGroup>
 </Project>
diff --git a/eng/build-analysis-configuration.json b/eng/build-analysis-configuration.json
new file mode 100644
index 000000000000..d647594ab402
--- /dev/null
+++ b/eng/build-analysis-configuration.json
@@ -0,0 +1,12 @@
+{
+    "PipelinesToAnalyze":[
+       {
+          "PipelineId": 129,
+          "PipelineName": "runtime"
+       },
+       {
+         "PipelineId": 133,
+         "PipelineName": "runtime-dev-innerloop"
+       }
+    ]
+}
diff --git a/eng/build.ps1 b/eng/build.ps1
index db18267f33e1..1f72dabed00e 100644
--- a/eng/build.ps1
+++ b/eng/build.ps1
@@ -12,6 +12,7 @@ Param(
   [string]$testscope,
   [switch]$testnobuild,
   [ValidateSet("x86","x64","arm","arm64","wasm")][string[]][Alias('a')]$arch = @([System.Runtime.InteropServices.RuntimeInformation]::ProcessArchitecture.ToString().ToLowerInvariant()),
+  [switch]$cross = $false,
   [string][Alias('s')]$subset,
   [ValidateSet("Debug","Release","Checked")][string][Alias('rc')]$runtimeConfiguration,
   [ValidateSet("Debug","Release")][string][Alias('lc')]$librariesConfiguration,
@@ -138,7 +139,7 @@ if (-not $PSBoundParameters.ContainsKey("subset") -and $properties.Length -gt 0
 }
 
 if ($subset -eq 'help') {
-  Invoke-Expression "& `"$PSScriptRoot/common/build.ps1`" -restore -build /p:subset=help /clp:nosummary"
+  Invoke-Expression "& `"$PSScriptRoot/common/build.ps1`" -restore -build /p:subset=help /clp:nosummary /tl:false"
   exit 0
 }
 
@@ -261,6 +262,12 @@ if ($vs) {
     $env:RUNTIMECONFIGURATION=$runtimeConfiguration
   }
 
+  if ($librariesConfiguration)
+  {
+    # Respect the LibrariesConfiguration variable for building inside VS with different libraries configurations
+    $env:LIBRARIESCONFIGURATION=$librariesConfiguration
+  }
+
   # Respect the RuntimeFlavor variable for building inside VS with a different CoreLib and runtime
   if ($runtimeFlavor)
   {
@@ -325,6 +332,9 @@ if ($env:TreatWarningsAsErrors -eq 'false') {
   $arguments += " -warnAsError 0"
 }
 
+# disable terminal logger for now: https://github.com/dotnet/runtime/issues/97211
+$arguments += " /tl:false"
+
 # Disable targeting pack caching as we reference a partially constructed targeting pack and update it later.
 # The later changes are ignored when using the cache.
 $env:DOTNETSDK_ALLOW_TARGETING_PACK_CACHING=0
diff --git a/eng/build.sh b/eng/build.sh
index 67f3cfeea472..df6326146672 100755
--- a/eng/build.sh
+++ b/eng/build.sh
@@ -149,7 +149,7 @@ initDistroRid()
 
 showSubsetHelp()
 {
-  "$scriptroot/common/build.sh" "-restore" "-build" "/p:Subset=help" "/clp:nosummary"
+  "$scriptroot/common/build.sh" "-restore" "-build" "/p:Subset=help" "/clp:nosummary /tl:false"
 }
 
 arguments=''
@@ -553,6 +553,9 @@ if [[ "${TreatWarningsAsErrors:-}" == "false" ]]; then
     arguments="$arguments -warnAsError 0"
 fi
 
+# disable terminal logger for now: https://github.com/dotnet/runtime/issues/97211
+arguments="$arguments -tl:false"
+
 initDistroRid "$os" "$arch" "$crossBuild"
 
 # Disable targeting pack caching as we reference a partially constructed targeting pack and update it later.
diff --git a/eng/codeOptimization.targets b/eng/codeOptimization.targets
index d42a39e3dfd2..5f990a983476 100644
--- a/eng/codeOptimization.targets
+++ b/eng/codeOptimization.targets
@@ -30,7 +30,7 @@
   <Target Name="AddReadyToRunPgoOptions" DependsOnTargets="ResolveRuntimeFilesFromLocalBuild" BeforeTargets="ResolveReadyToRunCompilers" Condition="'$(PublishReadyToRun)' == 'true'">
     <PropertyGroup>
       <!-- Only use mibc files if UsingToolIbcOptimization is false. Allows enabling/disabling using ibc instead of mibc data -->
-      <IncludeMibcFilesInReadyToRun Condition="'$(UsingToolIbcOptimization)' != 'true' and '$(EnableNgenOptimization)' == 'true' and '$(DotNetBuildFromSource)' != 'true'">true</IncludeMibcFilesInReadyToRun>
+      <IncludeMibcFilesInReadyToRun Condition="'$(UsingToolIbcOptimization)' != 'true' and '$(EnableNgenOptimization)' == 'true' and '$(DotNetBuildSourceOnly)' != 'true'">true</IncludeMibcFilesInReadyToRun>
     </PropertyGroup>
     <ItemGroup>
       <PublishReadyToRunPgoFiles Condition="'$(IncludeMibcFilesInReadyToRun)' == 'true'" Include="$(CoreCLRArtifactsPath)StandardOptimizationData.mibc"/>
diff --git a/eng/common/SetupNugetSources.ps1 b/eng/common/SetupNugetSources.ps1
index 6c65e81925f2..efa2fd72bfaa 100644
--- a/eng/common/SetupNugetSources.ps1
+++ b/eng/common/SetupNugetSources.ps1
@@ -35,7 +35,7 @@ Set-StrictMode -Version 2.0
 . $PSScriptRoot\tools.ps1
 
 # Add source entry to PackageSources
-function AddPackageSource($sources, $SourceName, $SourceEndPoint, $creds, $Username, $Password) {
+function AddPackageSource($sources, $SourceName, $SourceEndPoint, $creds, $Username, $pwd) {
     $packageSource = $sources.SelectSingleNode("add[@key='$SourceName']")
     
     if ($packageSource -eq $null)
@@ -48,12 +48,11 @@ function AddPackageSource($sources, $SourceName, $SourceEndPoint, $creds, $Usern
     else {
         Write-Host "Package source $SourceName already present."
     }
-    
-    AddCredential -Creds $creds -Source $SourceName -Username $Username -Password $Password
+    AddCredential -Creds $creds -Source $SourceName -Username $Username -pwd $pwd
 }
 
 # Add a credential node for the specified source
-function AddCredential($creds, $source, $username, $password) {
+function AddCredential($creds, $source, $username, $pwd) {
     # Looks for credential configuration for the given SourceName. Create it if none is found.
     $sourceElement = $creds.SelectSingleNode($Source)
     if ($sourceElement -eq $null)
@@ -82,17 +81,18 @@ function AddCredential($creds, $source, $username, $password) {
         $passwordElement.SetAttribute("key", "ClearTextPassword")
         $sourceElement.AppendChild($passwordElement) | Out-Null
     }
-    $passwordElement.SetAttribute("value", $Password)
+    
+    $passwordElement.SetAttribute("value", $pwd)
 }
 
-function InsertMaestroPrivateFeedCredentials($Sources, $Creds, $Username, $Password) {
+function InsertMaestroPrivateFeedCredentials($Sources, $Creds, $Username, $pwd) {
     $maestroPrivateSources = $Sources.SelectNodes("add[contains(@key,'darc-int')]")
 
     Write-Host "Inserting credentials for $($maestroPrivateSources.Count) Maestro's private feeds."
     
     ForEach ($PackageSource in $maestroPrivateSources) {
         Write-Host "`tInserting credential for Maestro's feed:" $PackageSource.Key
-        AddCredential -Creds $creds -Source $PackageSource.Key -Username $Username -Password $Password
+        AddCredential -Creds $creds -Source $PackageSource.Key -Username $Username -pwd $pwd
     }
 }
 
@@ -144,13 +144,13 @@ if ($disabledSources -ne $null) {
 $userName = "dn-bot"
 
 # Insert credential nodes for Maestro's private feeds
-InsertMaestroPrivateFeedCredentials -Sources $sources -Creds $creds -Username $userName -Password $Password
+InsertMaestroPrivateFeedCredentials -Sources $sources -Creds $creds -Username $userName -pwd $Password
 
 # 3.1 uses a different feed url format so it's handled differently here
 $dotnet31Source = $sources.SelectSingleNode("add[@key='dotnet3.1']")
 if ($dotnet31Source -ne $null) {
-    AddPackageSource -Sources $sources -SourceName "dotnet3.1-internal" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/_packaging/dotnet3.1-internal/nuget/v2" -Creds $creds -Username $userName -Password $Password
-    AddPackageSource -Sources $sources -SourceName "dotnet3.1-internal-transport" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/_packaging/dotnet3.1-internal-transport/nuget/v2" -Creds $creds -Username $userName -Password $Password
+    AddPackageSource -Sources $sources -SourceName "dotnet3.1-internal" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/_packaging/dotnet3.1-internal/nuget/v2" -Creds $creds -Username $userName -pwd $Password
+    AddPackageSource -Sources $sources -SourceName "dotnet3.1-internal-transport" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/_packaging/dotnet3.1-internal-transport/nuget/v2" -Creds $creds -Username $userName -pwd $Password
 }
 
 $dotnetVersions = @('5','6','7','8')
@@ -159,9 +159,9 @@ foreach ($dotnetVersion in $dotnetVersions) {
     $feedPrefix = "dotnet" + $dotnetVersion;
     $dotnetSource = $sources.SelectSingleNode("add[@key='$feedPrefix']")
     if ($dotnetSource -ne $null) {
-        AddPackageSource -Sources $sources -SourceName "$feedPrefix-internal" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/internal/_packaging/$feedPrefix-internal/nuget/v2" -Creds $creds -Username $userName -Password $Password
-        AddPackageSource -Sources $sources -SourceName "$feedPrefix-internal-transport" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/internal/_packaging/$feedPrefix-internal-transport/nuget/v2" -Creds $creds -Username $userName -Password $Password
+        AddPackageSource -Sources $sources -SourceName "$feedPrefix-internal" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/internal/_packaging/$feedPrefix-internal/nuget/v2" -Creds $creds -Username $userName -pwd $Password
+        AddPackageSource -Sources $sources -SourceName "$feedPrefix-internal-transport" -SourceEndPoint "https://pkgs.dev.azure.com/dnceng/internal/_packaging/$feedPrefix-internal-transport/nuget/v2" -Creds $creds -Username $userName -pwd $Password
     }
 }
 
-$doc.Save($filename)
+$doc.Save($filename)
\ No newline at end of file
diff --git a/eng/common/build.ps1 b/eng/common/build.ps1
index 7100bc2a7cb6..2a115068a927 100644
--- a/eng/common/build.ps1
+++ b/eng/common/build.ps1
@@ -125,7 +125,6 @@ function Build {
     /p:Test=$test `
     /p:Pack=$pack `
     /p:DotNetBuildRepo=$($productBuild -or $verticalBuild) `
-    /p:ArcadeBuildVertical=$verticalBuild `
     /p:IntegrationTest=$integrationTest `
     /p:PerformanceTest=$performanceTest `
     /p:Sign=$sign `
diff --git a/eng/common/build.sh b/eng/common/build.sh
index bec7d02594f6..d82ebf742808 100755
--- a/eng/common/build.sh
+++ b/eng/common/build.sh
@@ -241,7 +241,6 @@ function Build {
     /p:DotNetBuildRepo=$product_build \
     /p:ArcadeBuildFromSource=$source_build \
     /p:DotNetBuildSourceOnly=$source_build \
-    /p:ArcadeBuildVertical=$vertical_build \
     /p:Rebuild=$rebuild \
     /p:Test=$test \
     /p:Pack=$pack \
diff --git a/eng/common/native/init-compiler.sh b/eng/common/native/init-compiler.sh
index f5c1ec7eafeb..afdeb7a4d54a 100644
--- a/eng/common/native/init-compiler.sh
+++ b/eng/common/native/init-compiler.sh
@@ -63,7 +63,7 @@ if [ -z "$CLR_CC" ]; then
     # Set default versions
     if [ -z "$majorVersion" ]; then
         # note: gcc (all versions) and clang versions higher than 6 do not have minor version in file name, if it is zero.
-        if [ "$compiler" = "clang" ]; then versions="17 16 15 14 13 12 11 10 9 8 7 6.0 5.0 4.0 3.9 3.8 3.7 3.6 3.5"
+        if [ "$compiler" = "clang" ]; then versions="18 17 16 15 14 13 12 11 10 9 8 7 6.0 5.0 4.0 3.9 3.8 3.7 3.6 3.5"
         elif [ "$compiler" = "gcc" ]; then versions="13 12 11 10 9 8 7 6 5 4.9"; fi
 
         for version in $versions; do
@@ -125,8 +125,8 @@ if [ -z "$CC" ]; then
     exit 1
 fi
 
-# Only lld version >= 9 can be considered stable. lld doesn't support s390x.
-if [ "$compiler" = "clang" ] && [ -n "$majorVersion" ] && [ "$majorVersion" -ge 9 ] && [ "$build_arch" != "s390x" ]; then
+# Only lld version >= 9 can be considered stable. lld supports s390x starting from 18.0.
+if [ "$compiler" = "clang" ] && [ -n "$majorVersion" ] && [ "$majorVersion" -ge 9 ] && ([ "$build_arch" != "s390x" ] || [ "$majorVersion" -ge 18 ]); then
     if "$CC" -fuse-ld=lld -Wl,--version >/dev/null 2>&1; then
         LDFLAGS="-fuse-ld=lld"
     fi
diff --git a/eng/common/native/init-distro-rid.sh b/eng/common/native/init-distro-rid.sh
index de1687b2ccbe..228be0b15986 100644
--- a/eng/common/native/init-distro-rid.sh
+++ b/eng/common/native/init-distro-rid.sh
@@ -1,4 +1,4 @@
-#!/usr/bin/env bash
+#!/bin/sh
 
 # getNonPortableDistroRid
 #
@@ -11,21 +11,20 @@
 #   non-portable rid
 getNonPortableDistroRid()
 {
-    local targetOs="$1"
-    local targetArch="$2"
-    local rootfsDir="$3"
-    local nonPortableRid=""
+    targetOs="$1"
+    targetArch="$2"
+    rootfsDir="$3"
+    nonPortableRid=""
 
     if [ "$targetOs" = "linux" ]; then
+        # shellcheck disable=SC1091
         if [ -e "${rootfsDir}/etc/os-release" ]; then
-            source "${rootfsDir}/etc/os-release"
-
-            if [[ "${ID}" == "rhel" || "${ID}" == "rocky" || "${ID}" == "alpine" ]]; then
-                # remove the last version digit
-                VERSION_ID="${VERSION_ID%.*}"
+            . "${rootfsDir}/etc/os-release"
+            if [ "${ID}" = "rhel" ] || [ "${ID}" = "rocky" ] || [ "${ID}" = "alpine" ] || [ "${ID}" = "ol" ]; then
+                VERSION_ID="${VERSION_ID%.*}" # Remove the last version digit for these distros
             fi
 
-            if [[ "${VERSION_ID:-}" =~ ^([[:digit:]]|\.)+$ ]]; then
+            if echo "${VERSION_ID:-}" | grep -qE '^([[:digit:]]|\.)+$'; then
                 nonPortableRid="${ID}.${VERSION_ID}-${targetArch}"
             else
                 # Rolling release distros either do not set VERSION_ID, set it as blank or
@@ -33,45 +32,45 @@ getNonPortableDistroRid()
                 # so omit it here to be consistent with everything else.
                 nonPortableRid="${ID}-${targetArch}"
             fi
-
         elif [ -e "${rootfsDir}/android_platform" ]; then
-            source "$rootfsDir"/android_platform
+            # shellcheck disable=SC1091
+            . "${rootfsDir}/android_platform"
             nonPortableRid="$RID"
         fi
     fi
 
     if [ "$targetOs" = "freebsd" ]; then
-        # $rootfsDir can be empty. freebsd-version is shell script and it should always work.
-        __freebsd_major_version=$($rootfsDir/bin/freebsd-version | { read v; echo "${v%%.*}"; })
+        # $rootfsDir can be empty. freebsd-version is a shell script and should always work.
+        __freebsd_major_version=$("$rootfsDir"/bin/freebsd-version | cut -d'.' -f1)
         nonPortableRid="freebsd.$__freebsd_major_version-${targetArch}"
-    elif command -v getprop && getprop ro.product.system.model 2>&1 | grep -qi android; then
+    elif command -v getprop >/dev/null && getprop ro.product.system.model | grep -qi android; then
         __android_sdk_version=$(getprop ro.build.version.sdk)
         nonPortableRid="android.$__android_sdk_version-${targetArch}"
     elif [ "$targetOs" = "illumos" ]; then
         __uname_version=$(uname -v)
         case "$__uname_version" in
             omnios-*)
-                __omnios_major_version=$(echo "${__uname_version:8:2}")
-                nonPortableRid=omnios."$__omnios_major_version"-"$targetArch"
-            ;;
+                __omnios_major_version=$(echo "$__uname_version" | cut -c9-10)
+                nonPortableRid="omnios.$__omnios_major_version-${targetArch}"
+                ;;
             joyent_*)
-                __smartos_major_version=$(echo "${__uname_version:7:4}")
-                nonPortableRid=smartos."$__smartos_major_version"-"$targetArch"
-            ;;
-            illumos_*)
-                nonPortableRid=openindiana-"$targetArch"
-            ;;
+                __smartos_major_version=$(echo "$__uname_version" | cut -c9-10)
+                nonPortableRid="smartos.$__smartos_major_version-${targetArch}"
+                ;;
+            *)
+                nonPortableRid="illumos-${targetArch}"
+                ;;
         esac
     elif [ "$targetOs" = "solaris" ]; then
         __uname_version=$(uname -v)
-        __solaris_major_version=$(echo "${__uname_version%.*}")
-        nonPortableRid=solaris."$__solaris_major_version"-"$targetArch"
+        __solaris_major_version=$(echo "$__uname_version" | cut -d'.' -f1)
+        nonPortableRid="solaris.$__solaris_major_version-${targetArch}"
     elif [ "$targetOs" = "haiku" ]; then
-        __uname_release=$(uname -r)
+        __uname_release="$(uname -r)"
         nonPortableRid=haiku.r"$__uname_release"-"$targetArch"
     fi
 
-    echo "$(echo $nonPortableRid | tr '[:upper:]' '[:lower:]')"
+    echo "$nonPortableRid" | tr '[:upper:]' '[:lower:]'
 }
 
 # initDistroRidGlobal
@@ -85,26 +84,23 @@ getNonPortableDistroRid()
 #   None
 #
 # Notes:
-#
-# It is important to note that the function does not return anything, but it
-# exports the following variables on success:
-#
-#   __DistroRid   : Non-portable rid of the target platform.
-#   __PortableTargetOS  : OS-part of the portable rid that corresponds to the target platform.
-#
+#   It is important to note that the function does not return anything, but it
+#   exports the following variables on success:
+#     __DistroRid   : Non-portable rid of the target platform.
+#     __PortableTargetOS  : OS-part of the portable rid that corresponds to the target platform.
 initDistroRidGlobal()
 {
-    local targetOs="$1"
-    local targetArch="$2"
-    local rootfsDir=""
-    if [ "$#" -ge 3 ]; then
+    targetOs="$1"
+    targetArch="$2"
+    rootfsDir=""
+    if [ $# -ge 3 ]; then
         rootfsDir="$3"
     fi
 
     if [ -n "${rootfsDir}" ]; then
         # We may have a cross build. Check for the existence of the rootfsDir
         if [ ! -e "${rootfsDir}" ]; then
-            echo "Error rootfsDir has been passed, but the location is not valid."
+            echo "Error: rootfsDir has been passed, but the location is not valid."
             exit 1
         fi
     fi
@@ -119,7 +115,7 @@ initDistroRidGlobal()
             STRINGS="$(command -v llvm-strings || true)"
         fi
 
-        # Check for musl-based distros (e.g Alpine Linux, Void Linux).
+        # Check for musl-based distros (e.g. Alpine Linux, Void Linux).
         if "${rootfsDir}/usr/bin/ldd" --version 2>&1 | grep -q musl ||
                 ( [ -n "$STRINGS" ] && "$STRINGS" "${rootfsDir}/usr/bin/ldd" 2>&1 | grep -q musl ); then
             __PortableTargetOS="linux-musl"
diff --git a/eng/common/native/init-os-and-arch.sh b/eng/common/native/init-os-and-arch.sh
index caa448ff0300..38921d4338f7 100644
--- a/eng/common/native/init-os-and-arch.sh
+++ b/eng/common/native/init-os-and-arch.sh
@@ -1,4 +1,4 @@
-#!/usr/bin/env bash
+#!/bin/sh
 
 # Use uname to determine what the OS is.
 OSName=$(uname -s | tr '[:upper:]' '[:lower:]')
@@ -54,6 +54,7 @@ case "$CPUName" in
         ;;
 
     armv7l|armv8l)
+        # shellcheck disable=SC1091
         if (NAME=""; . /etc/os-release; test "$NAME" = "Tizen"); then
             arch=armel
         else
diff --git a/eng/common/post-build/publish-using-darc.ps1 b/eng/common/post-build/publish-using-darc.ps1
index 1e779fec4dd1..5a3a32ea8d75 100644
--- a/eng/common/post-build/publish-using-darc.ps1
+++ b/eng/common/post-build/publish-using-darc.ps1
@@ -12,7 +12,7 @@ param(
 try {
   . $PSScriptRoot\post-build-utils.ps1
 
-  $darc = Get-Darc 
+  $darc = Get-Darc
 
   $optionalParams = [System.Collections.ArrayList]::new()
 
@@ -46,7 +46,7 @@ try {
   }
 
   Write-Host 'done.'
-} 
+}
 catch {
   Write-Host $_
   Write-PipelineTelemetryError -Category 'PromoteBuild' -Message "There was an error while trying to publish build '$BuildId' to default channels."
diff --git a/eng/common/templates-official/job/job.yml b/eng/common/templates-official/job/job.yml
new file mode 100644
index 000000000000..c63e17e863ed
--- /dev/null
+++ b/eng/common/templates-official/job/job.yml
@@ -0,0 +1,264 @@
+# Internal resources (telemetry, microbuild) can only be accessed from non-public projects,
+# and some (Microbuild) should only be applied to non-PR cases for internal builds.
+
+parameters:
+# Job schema parameters - https://docs.microsoft.com/en-us/azure/devops/pipelines/yaml-schema?view=vsts&tabs=schema#job
+  cancelTimeoutInMinutes: ''
+  condition: ''
+  container: ''
+  continueOnError: false
+  dependsOn: ''
+  displayName: ''
+  pool: ''
+  steps: []
+  strategy: ''
+  timeoutInMinutes: ''
+  variables: []
+  workspace: ''
+  templateContext: ''
+
+# Job base template specific parameters
+  # See schema documentation - https://github.com/dotnet/arcade/blob/master/Documentation/AzureDevOps/TemplateSchema.md
+  artifacts: ''
+  enableMicrobuild: false
+  enablePublishBuildArtifacts: false
+  enablePublishBuildAssets: false
+  enablePublishTestResults: false
+  enablePublishUsingPipelines: false
+  enableBuildRetry: false
+  disableComponentGovernance: ''
+  componentGovernanceIgnoreDirectories: ''
+  mergeTestResults: false
+  testRunTitle: ''
+  testResultsFormat: ''
+  name: ''
+  preSteps: []
+  runAsPublic: false
+# Sbom related params
+  enableSbom: true
+  PackageVersion: 7.0.0
+  BuildDropPath: '$(Build.SourcesDirectory)/artifacts'
+
+jobs:
+- job: ${{ parameters.name }}
+
+  ${{ if ne(parameters.cancelTimeoutInMinutes, '') }}:
+    cancelTimeoutInMinutes: ${{ parameters.cancelTimeoutInMinutes }}
+
+  ${{ if ne(parameters.condition, '') }}:
+    condition: ${{ parameters.condition }}
+
+  ${{ if ne(parameters.container, '') }}:
+    container: ${{ parameters.container }}
+
+  ${{ if ne(parameters.continueOnError, '') }}:
+    continueOnError: ${{ parameters.continueOnError }}
+
+  ${{ if ne(parameters.dependsOn, '') }}:
+    dependsOn: ${{ parameters.dependsOn }}
+
+  ${{ if ne(parameters.displayName, '') }}:
+    displayName: ${{ parameters.displayName }}
+
+  ${{ if ne(parameters.pool, '') }}:
+    pool: ${{ parameters.pool }}
+
+  ${{ if ne(parameters.strategy, '') }}:
+    strategy: ${{ parameters.strategy }}
+
+  ${{ if ne(parameters.timeoutInMinutes, '') }}:
+    timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
+
+  ${{ if ne(parameters.templateContext, '') }}:
+    templateContext: ${{ parameters.templateContext }}
+
+  variables:
+  - ${{ if ne(parameters.enableTelemetry, 'false') }}:
+    - name: DOTNET_CLI_TELEMETRY_PROFILE
+      value: '$(Build.Repository.Uri)'
+  - ${{ if eq(parameters.enableRichCodeNavigation, 'true') }}:
+    - name: EnableRichCodeNavigation
+      value: 'true'
+  # Retry signature validation up to three times, waiting 2 seconds between attempts.
+  # See https://learn.microsoft.com/en-us/nuget/reference/errors-and-warnings/nu3028#retry-untrusted-root-failures
+  - name: NUGET_EXPERIMENTAL_CHAIN_BUILD_RETRY_POLICY
+    value: 3,2000
+  - ${{ each variable in parameters.variables }}:
+    # handle name-value variable syntax
+    # example:
+    # - name: [key]
+    #   value: [value]
+    - ${{ if ne(variable.name, '') }}:
+      - name: ${{ variable.name }}
+        value: ${{ variable.value }}
+
+    # handle variable groups
+    - ${{ if ne(variable.group, '') }}:
+      - group: ${{ variable.group }}
+
+    # handle template variable syntax
+    # example:
+    # - template: path/to/template.yml
+    #   parameters:
+    #     [key]: [value]
+    - ${{ if ne(variable.template, '') }}:
+      - template: ${{ variable.template }}
+        ${{ if ne(variable.parameters, '') }}:
+          parameters: ${{ variable.parameters }}
+
+    # handle key-value variable syntax.
+    # example:
+    # - [key]: [value]
+    - ${{ if and(eq(variable.name, ''), eq(variable.group, ''), eq(variable.template, '')) }}:
+      - ${{ each pair in variable }}:
+        - name: ${{ pair.key }}
+          value: ${{ pair.value }}
+
+  # DotNet-HelixApi-Access provides 'HelixApiAccessToken' for internal builds
+  - ${{ if and(eq(parameters.enableTelemetry, 'true'), eq(parameters.runAsPublic, 'false'), ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+    - group: DotNet-HelixApi-Access
+
+  ${{ if ne(parameters.workspace, '') }}:
+    workspace: ${{ parameters.workspace }}
+
+  steps:
+  - ${{ if ne(parameters.preSteps, '') }}:
+    - ${{ each preStep in parameters.preSteps }}:
+      - ${{ preStep }}
+
+  - ${{ if and(eq(parameters.runAsPublic, 'false'), ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+    - ${{ if eq(parameters.enableMicrobuild, 'true') }}:
+      - task: MicroBuildSigningPlugin@4
+        displayName: Install MicroBuild plugin
+        inputs:
+          signType: $(_SignType)
+          zipSources: false
+          feedSource: https://dnceng.pkgs.visualstudio.com/_packaging/MicroBuildToolset/nuget/v3/index.json
+        env:
+          TeamName: $(_TeamName)
+          MicroBuildOutputFolderOverride: '$(Agent.TempDirectory)'
+        continueOnError: ${{ parameters.continueOnError }}
+        condition: and(succeeded(), in(variables['_SignType'], 'real', 'test'), eq(variables['Agent.Os'], 'Windows_NT'))
+
+  - ${{ if and(eq(parameters.runAsPublic, 'false'), eq(variables['System.TeamProject'], 'internal')) }}:
+    - task: NuGetAuthenticate@1
+
+  - ${{ if and(ne(parameters.artifacts.download, 'false'), ne(parameters.artifacts.download, '')) }}:
+    - task: DownloadPipelineArtifact@2
+      inputs:
+        buildType: current
+        artifactName: ${{ coalesce(parameters.artifacts.download.name, 'Artifacts_$(Agent.OS)_$(_BuildConfig)') }}
+        targetPath: ${{ coalesce(parameters.artifacts.download.path, 'artifacts') }}
+        itemPattern: ${{ coalesce(parameters.artifacts.download.pattern, '**') }}
+
+  - ${{ each step in parameters.steps }}:
+    - ${{ step }}
+
+  - ${{ if eq(parameters.enableRichCodeNavigation, true) }}:
+    - task: RichCodeNavIndexer@0
+      displayName: RichCodeNav Upload
+      inputs:
+        languages: ${{ coalesce(parameters.richCodeNavigationLanguage, 'csharp') }}
+        environment: ${{ coalesce(parameters.richCodeNavigationEnvironment, 'internal') }}
+        richNavLogOutputDirectory: $(Build.SourcesDirectory)/artifacts/bin
+        uploadRichNavArtifacts: ${{ coalesce(parameters.richCodeNavigationUploadArtifacts, false) }}
+      continueOnError: true
+
+  - template: /eng/common/templates-official/steps/component-governance.yml
+    parameters:
+      ${{ if eq(parameters.disableComponentGovernance, '') }}:
+        ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest'), eq(parameters.runAsPublic, 'false'), or(startsWith(variables['Build.SourceBranch'], 'refs/heads/release/'), startsWith(variables['Build.SourceBranch'], 'refs/heads/dotnet/'), startsWith(variables['Build.SourceBranch'], 'refs/heads/microsoft/'), eq(variables['Build.SourceBranch'], 'refs/heads/main'))) }}:
+          disableComponentGovernance: false
+        ${{ else }}:
+          disableComponentGovernance: true
+      ${{ else }}:
+        disableComponentGovernance: ${{ parameters.disableComponentGovernance }}
+      componentGovernanceIgnoreDirectories: ${{ parameters.componentGovernanceIgnoreDirectories }}
+
+  - ${{ if eq(parameters.enableMicrobuild, 'true') }}:
+    - ${{ if and(eq(parameters.runAsPublic, 'false'), ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+      - task: MicroBuildCleanup@1
+        displayName: Execute Microbuild cleanup tasks
+        condition: and(always(), in(variables['_SignType'], 'real', 'test'), eq(variables['Agent.Os'], 'Windows_NT'))
+        continueOnError: ${{ parameters.continueOnError }}
+        env:
+          TeamName: $(_TeamName)
+
+  - ${{ if ne(parameters.artifacts.publish, '') }}:
+    - ${{ if and(ne(parameters.artifacts.publish.artifacts, 'false'), ne(parameters.artifacts.publish.artifacts, '')) }}:
+      - task: CopyFiles@2
+        displayName: Gather binaries for publish to artifacts
+        inputs:
+          SourceFolder: 'artifacts/bin'
+          Contents: '**'
+          TargetFolder: '$(Build.ArtifactStagingDirectory)/artifacts/bin'
+      - task: CopyFiles@2
+        displayName: Gather packages for publish to artifacts
+        inputs:
+          SourceFolder: 'artifacts/packages'
+          Contents: '**'
+          TargetFolder: '$(Build.ArtifactStagingDirectory)/artifacts/packages'
+      - task: 1ES.PublishBuildArtifacts@1
+        displayName: Publish pipeline artifacts
+        inputs:
+          PathtoPublish: '$(Build.ArtifactStagingDirectory)/artifacts'
+          PublishLocation: Container
+          ArtifactName: ${{ coalesce(parameters.artifacts.publish.artifacts.name , 'Artifacts_$(Agent.Os)_$(_BuildConfig)') }}
+        continueOnError: true
+        condition: always()
+    - ${{ if and(ne(parameters.artifacts.publish.logs, 'false'), ne(parameters.artifacts.publish.logs, '')) }}:
+      - task: 1ES.PublishPipelineArtifact@1
+        inputs:
+          targetPath: 'artifacts/log'
+          artifactName: ${{ coalesce(parameters.artifacts.publish.logs.name, 'Logs_Build_$(Agent.Os)_$(_BuildConfig)') }}
+        displayName: 'Publish logs'
+        continueOnError: true
+        condition: always()
+
+  - ${{ if ne(parameters.enablePublishBuildArtifacts, 'false') }}:
+    - task: 1ES.PublishBuildArtifacts@1
+      displayName: Publish Logs
+      inputs:
+        PathtoPublish: '$(Build.SourcesDirectory)/artifacts/log/$(_BuildConfig)'
+        PublishLocation: Container
+        ArtifactName: ${{ coalesce(parameters.enablePublishBuildArtifacts.artifactName, '$(Agent.Os)_$(Agent.JobName)' ) }}
+      continueOnError: true
+      condition: always()
+
+  - ${{ if or(and(eq(parameters.enablePublishTestResults, 'true'), eq(parameters.testResultsFormat, '')), eq(parameters.testResultsFormat, 'xunit')) }}:
+    - task: PublishTestResults@2
+      displayName: Publish XUnit Test Results
+      inputs:
+        testResultsFormat: 'xUnit'
+        testResultsFiles: '*.xml'
+        searchFolder: '$(Build.SourcesDirectory)/artifacts/TestResults/$(_BuildConfig)'
+        testRunTitle: ${{ coalesce(parameters.testRunTitle, parameters.name, '$(System.JobName)') }}-xunit
+        mergeTestResults: ${{ parameters.mergeTestResults }}
+      continueOnError: true
+      condition: always()
+  - ${{ if or(and(eq(parameters.enablePublishTestResults, 'true'), eq(parameters.testResultsFormat, '')), eq(parameters.testResultsFormat, 'vstest')) }}:
+    - task: PublishTestResults@2
+      displayName: Publish TRX Test Results
+      inputs:
+        testResultsFormat: 'VSTest'
+        testResultsFiles: '*.trx'
+        searchFolder: '$(Build.SourcesDirectory)/artifacts/TestResults/$(_BuildConfig)'
+        testRunTitle: ${{ coalesce(parameters.testRunTitle, parameters.name, '$(System.JobName)') }}-trx
+        mergeTestResults: ${{ parameters.mergeTestResults }}
+      continueOnError: true
+      condition: always()
+
+  - ${{ if and(eq(parameters.runAsPublic, 'false'), ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest'), eq(parameters.enableSbom, 'true')) }}:
+    - template: /eng/common/templates-official/steps/generate-sbom.yml
+      parameters:
+        PackageVersion: ${{ parameters.packageVersion}}
+        BuildDropPath: ${{ parameters.buildDropPath }}
+        IgnoreDirectories: ${{ parameters.componentGovernanceIgnoreDirectories }}
+
+  - ${{ if eq(parameters.enableBuildRetry, 'true') }}:
+    - task: 1ES.PublishPipelineArtifact@1
+      inputs:
+        targetPath: '$(Build.SourcesDirectory)\eng\common\BuildConfiguration'
+        artifactName: 'BuildConfiguration'
+      displayName: 'Publish build retry configuration'
+      continueOnError: true
\ No newline at end of file
diff --git a/eng/common/templates-official/job/onelocbuild.yml b/eng/common/templates-official/job/onelocbuild.yml
new file mode 100644
index 000000000000..ba9ba4930329
--- /dev/null
+++ b/eng/common/templates-official/job/onelocbuild.yml
@@ -0,0 +1,112 @@
+parameters:
+  # Optional: dependencies of the job
+  dependsOn: ''
+
+  # Optional: A defined YAML pool - https://docs.microsoft.com/en-us/azure/devops/pipelines/yaml-schema?view=vsts&tabs=schema#pool
+  pool: ''
+    
+  CeapexPat: $(dn-bot-ceapex-package-r) # PAT for the loc AzDO instance https://dev.azure.com/ceapex
+  GithubPat: $(BotAccount-dotnet-bot-repo-PAT)
+
+  SourcesDirectory: $(Build.SourcesDirectory)
+  CreatePr: true
+  AutoCompletePr: false
+  ReusePr: true
+  UseLfLineEndings: true
+  UseCheckedInLocProjectJson: false
+  SkipLocProjectJsonGeneration: false
+  LanguageSet: VS_Main_Languages
+  LclSource: lclFilesInRepo
+  LclPackageId: ''
+  RepoType: gitHub
+  GitHubOrg: dotnet
+  MirrorRepo: ''
+  MirrorBranch: main
+  condition: ''
+  JobNameSuffix: ''
+
+jobs:
+- job: OneLocBuild${{ parameters.JobNameSuffix }}
+  
+  dependsOn: ${{ parameters.dependsOn }}
+
+  displayName: OneLocBuild${{ parameters.JobNameSuffix }}
+
+  variables:
+    - group: OneLocBuildVariables # Contains the CeapexPat and GithubPat
+    - name: _GenerateLocProjectArguments
+      value: -SourcesDirectory ${{ parameters.SourcesDirectory }}
+        -LanguageSet "${{ parameters.LanguageSet }}"
+        -CreateNeutralXlfs
+    - ${{ if eq(parameters.UseCheckedInLocProjectJson, 'true') }}:
+      - name: _GenerateLocProjectArguments
+        value: ${{ variables._GenerateLocProjectArguments }} -UseCheckedInLocProjectJson
+    - template: /eng/common/templates-official/variables/pool-providers.yml
+
+  ${{ if ne(parameters.pool, '') }}:
+    pool: ${{ parameters.pool }}
+  ${{ if eq(parameters.pool, '') }}:
+    pool:
+      # We don't use the collection uri here because it might vary (.visualstudio.com vs. dev.azure.com)
+      ${{ if eq(variables['System.TeamProject'], 'DevDiv') }}:
+        name: AzurePipelines-EO
+        image: 1ESPT-Windows2022
+        demands: Cmd
+        os: windows
+      # If it's not devdiv, it's dnceng
+      ${{ if ne(variables['System.TeamProject'], 'DevDiv') }}:
+        name: $(DncEngInternalBuildPool)
+        image: 1es-windows-2022-pt
+        os: windows
+
+  steps:
+    - ${{ if ne(parameters.SkipLocProjectJsonGeneration, 'true') }}:
+      - task: Powershell@2
+        inputs:
+          filePath: $(Build.SourcesDirectory)/eng/common/generate-locproject.ps1
+          arguments: $(_GenerateLocProjectArguments)
+        displayName: Generate LocProject.json
+        condition: ${{ parameters.condition }}
+
+    - task: OneLocBuild@2
+      displayName: OneLocBuild
+      env:
+        SYSTEM_ACCESSTOKEN: $(System.AccessToken)
+      inputs:
+        locProj: eng/Localize/LocProject.json
+        outDir: $(Build.ArtifactStagingDirectory)
+        lclSource: ${{ parameters.LclSource }}
+        lclPackageId: ${{ parameters.LclPackageId }}
+        isCreatePrSelected: ${{ parameters.CreatePr }}
+        isAutoCompletePrSelected: ${{ parameters.AutoCompletePr }}
+        ${{ if eq(parameters.CreatePr, true) }}:
+          isUseLfLineEndingsSelected: ${{ parameters.UseLfLineEndings }}
+          ${{ if eq(parameters.RepoType, 'gitHub') }}:
+            isShouldReusePrSelected: ${{ parameters.ReusePr }}
+        packageSourceAuth: patAuth
+        patVariable: ${{ parameters.CeapexPat }}
+        ${{ if eq(parameters.RepoType, 'gitHub') }}:
+          repoType: ${{ parameters.RepoType }}
+          gitHubPatVariable: "${{ parameters.GithubPat }}"
+        ${{ if ne(parameters.MirrorRepo, '') }}:
+          isMirrorRepoSelected: true
+          gitHubOrganization: ${{ parameters.GitHubOrg }}
+          mirrorRepo: ${{ parameters.MirrorRepo }}
+          mirrorBranch: ${{ parameters.MirrorBranch }}
+      condition: ${{ parameters.condition }}
+
+    - task: 1ES.PublishBuildArtifacts@1
+      displayName: Publish Localization Files
+      inputs:
+        PathtoPublish: '$(Build.ArtifactStagingDirectory)/loc'
+        PublishLocation: Container
+        ArtifactName: Loc
+      condition: ${{ parameters.condition }}
+
+    - task: 1ES.PublishBuildArtifacts@1
+      displayName: Publish LocProject.json
+      inputs:
+        PathtoPublish: '$(Build.SourcesDirectory)/eng/Localize/'
+        PublishLocation: Container
+        ArtifactName: Loc
+      condition: ${{ parameters.condition }}
\ No newline at end of file
diff --git a/eng/common/templates-official/job/publish-build-assets.yml b/eng/common/templates-official/job/publish-build-assets.yml
new file mode 100644
index 000000000000..38340d3e3861
--- /dev/null
+++ b/eng/common/templates-official/job/publish-build-assets.yml
@@ -0,0 +1,159 @@
+parameters:
+  configuration: 'Debug'
+
+  # Optional: condition for the job to run
+  condition: ''
+
+  # Optional: 'true' if future jobs should run even if this job fails
+  continueOnError: false
+
+  # Optional: dependencies of the job
+  dependsOn: ''
+
+  # Optional: Include PublishBuildArtifacts task
+  enablePublishBuildArtifacts: false
+
+  # Optional: A defined YAML pool - https://docs.microsoft.com/en-us/azure/devops/pipelines/yaml-schema?view=vsts&tabs=schema#pool
+  pool: {}
+
+  # Optional: should run as a public build even in the internal project
+  #           if 'true', the build won't run any of the internal only steps, even if it is running in non-public projects.
+  runAsPublic: false
+
+  # Optional: whether the build's artifacts will be published using release pipelines or direct feed publishing
+  publishUsingPipelines: false
+
+  # Optional: whether the build's artifacts will be published using release pipelines or direct feed publishing
+  publishAssetsImmediately: false
+
+  artifactsPublishingAdditionalParameters: ''
+
+  signingValidationAdditionalParameters: ''
+
+jobs:
+- job: Asset_Registry_Publish
+
+  dependsOn: ${{ parameters.dependsOn }}
+  timeoutInMinutes: 150
+
+  ${{ if eq(parameters.publishAssetsImmediately, 'true') }}:
+    displayName: Publish Assets
+  ${{ else }}:
+    displayName: Publish to Build Asset Registry
+
+  variables:
+  - template: /eng/common/templates-official/variables/pool-providers.yml
+  - ${{ if and(eq(parameters.runAsPublic, 'false'), ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+    - group: Publish-Build-Assets
+    - group: AzureDevOps-Artifact-Feeds-Pats
+    - name: runCodesignValidationInjection
+      value: false
+    # unconditional - needed for logs publishing (redactor tool version)
+    - template: /eng/common/templates-official/post-build/common-variables.yml
+
+  pool:
+    # We don't use the collection uri here because it might vary (.visualstudio.com vs. dev.azure.com)
+    ${{ if eq(variables['System.TeamProject'], 'DevDiv') }}:
+      name: AzurePipelines-EO
+      image: 1ESPT-Windows2022
+      demands: Cmd
+      os: windows
+    # If it's not devdiv, it's dnceng
+    ${{ if ne(variables['System.TeamProject'], 'DevDiv') }}:
+      name: NetCore1ESPool-Publishing-Internal
+      image: windows.vs2019.amd64
+      os: windows
+  steps:
+  - ${{ if and(eq(parameters.runAsPublic, 'false'), ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+    - checkout: self
+      fetchDepth: 3
+      clean: true
+      
+    - task: DownloadBuildArtifacts@0
+      displayName: Download artifact
+      inputs:
+        artifactName: AssetManifests
+        downloadPath: '$(Build.StagingDirectory)/Download'
+        checkDownloadedFiles: true
+      condition: ${{ parameters.condition }}
+      continueOnError: ${{ parameters.continueOnError }}
+    
+    - task: NuGetAuthenticate@1
+
+    - task: PowerShell@2
+      displayName: Publish Build Assets
+      inputs:
+        filePath: eng\common\sdk-task.ps1
+        arguments: -task PublishBuildAssets -restore -msbuildEngine dotnet
+          /p:ManifestsPath='$(Build.StagingDirectory)/Download/AssetManifests'
+          /p:BuildAssetRegistryToken=$(MaestroAccessToken)
+          /p:MaestroApiEndpoint=https://maestro.dot.net
+          /p:PublishUsingPipelines=${{ parameters.publishUsingPipelines }}
+          /p:OfficialBuildId=$(Build.BuildNumber)
+      condition: ${{ parameters.condition }}
+      continueOnError: ${{ parameters.continueOnError }}
+    
+    - task: powershell@2
+      displayName: Create ReleaseConfigs Artifact
+      inputs:
+        targetType: inline
+        script: |
+          New-Item -Path "$(Build.StagingDirectory)/ReleaseConfigs" -ItemType Directory -Force
+          $filePath = "$(Build.StagingDirectory)/ReleaseConfigs/ReleaseConfigs.txt"
+          Add-Content -Path $filePath -Value $(BARBuildId)
+          Add-Content -Path $filePath -Value "$(DefaultChannels)"
+          Add-Content -Path $filePath -Value $(IsStableBuild)
+    
+    - task: 1ES.PublishBuildArtifacts@1
+      displayName: Publish ReleaseConfigs Artifact
+      inputs:
+        PathtoPublish: '$(Build.StagingDirectory)/ReleaseConfigs'
+        PublishLocation: Container
+        ArtifactName: ReleaseConfigs
+
+    - task: powershell@2
+      displayName: Check if SymbolPublishingExclusionsFile.txt exists
+      inputs:
+        targetType: inline
+        script: |
+          $symbolExclusionfile = "$(Build.SourcesDirectory)/eng/SymbolPublishingExclusionsFile.txt"
+          if(Test-Path -Path $symbolExclusionfile)
+          {
+            Write-Host "SymbolExclusionFile exists"
+            Write-Host "##vso[task.setvariable variable=SymbolExclusionFile]true"
+          }
+          else{
+           Write-Host "Symbols Exclusion file does not exists"
+           Write-Host "##vso[task.setvariable variable=SymbolExclusionFile]false"
+          }
+
+    - task: 1ES.PublishBuildArtifacts@1
+      displayName: Publish SymbolPublishingExclusionsFile Artifact
+      condition: eq(variables['SymbolExclusionFile'], 'true') 
+      inputs:
+        PathtoPublish: '$(Build.SourcesDirectory)/eng/SymbolPublishingExclusionsFile.txt'
+        PublishLocation: Container
+        ArtifactName: ReleaseConfigs
+
+    - ${{ if eq(parameters.publishAssetsImmediately, 'true') }}:
+      - template: /eng/common/templates-official/post-build/setup-maestro-vars.yml
+        parameters:
+          BARBuildId: ${{ parameters.BARBuildId }}
+          PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }}
+
+      - task: PowerShell@2
+        displayName: Publish Using Darc
+        inputs:
+          filePath: $(Build.SourcesDirectory)/eng/common/post-build/publish-using-darc.ps1
+          arguments: -BuildId $(BARBuildId) 
+            -PublishingInfraVersion 3
+            -AzdoToken '$(publishing-dnceng-devdiv-code-r-build-re)'
+            -MaestroToken '$(MaestroApiAccessToken)'
+            -WaitPublishingFinish true
+            -ArtifactsPublishingAdditionalParameters '${{ parameters.artifactsPublishingAdditionalParameters }}'
+            -SymbolPublishingAdditionalParameters '${{ parameters.symbolPublishingAdditionalParameters }}'
+
+    - ${{ if eq(parameters.enablePublishBuildArtifacts, 'true') }}:
+      - template: /eng/common/templates-official/steps/publish-logs.yml
+        parameters:
+          JobLabel: 'Publish_Artifacts_Logs'     
diff --git a/eng/common/templates-official/job/source-build.yml b/eng/common/templates-official/job/source-build.yml
new file mode 100644
index 000000000000..50f04e642a35
--- /dev/null
+++ b/eng/common/templates-official/job/source-build.yml
@@ -0,0 +1,67 @@
+parameters:
+  # This template adds arcade-powered source-build to CI. The template produces a server job with a
+  # default ID 'Source_Build_Complete' to put in a dependency list if necessary.
+
+  # Specifies the prefix for source-build jobs added to pipeline. Use this if disambiguation needed.
+  jobNamePrefix: 'Source_Build'
+
+  # Defines the platform on which to run the job. By default, a linux-x64 machine, suitable for
+  # managed-only repositories. This is an object with these properties:
+  #
+  # name: ''
+  #   The name of the job. This is included in the job ID.
+  # targetRID: ''
+  #   The name of the target RID to use, instead of the one auto-detected by Arcade.
+  # nonPortable: false
+  #   Enables non-portable mode. This means a more specific RID (e.g. fedora.32-x64 rather than
+  #   linux-x64), and compiling against distro-provided packages rather than portable ones.
+  # skipPublishValidation: false
+  #   Disables publishing validation.  By default, a check is performed to ensure no packages are
+  #   published by source-build.
+  # container: ''
+  #   A container to use. Runs in docker.
+  # pool: {}
+  #   A pool to use. Runs directly on an agent.
+  # buildScript: ''
+  #   Specifies the build script to invoke to perform the build in the repo. The default
+  #   './build.sh' should work for typical Arcade repositories, but this is customizable for
+  #   difficult situations.
+  # jobProperties: {}
+  #   A list of job properties to inject at the top level, for potential extensibility beyond
+  #   container and pool.
+  platform: {}
+
+jobs:
+- job: ${{ parameters.jobNamePrefix }}_${{ parameters.platform.name }}
+  displayName: Source-Build (${{ parameters.platform.name }})
+
+  ${{ each property in parameters.platform.jobProperties }}:
+    ${{ property.key }}: ${{ property.value }}
+
+  ${{ if ne(parameters.platform.container, '') }}:
+    container: ${{ parameters.platform.container }}
+
+  ${{ if eq(parameters.platform.pool, '') }}:
+    # The default VM host AzDO pool. This should be capable of running Docker containers: almost all
+    # source-build builds run in Docker, including the default managed platform.
+    # /eng/common/templates-official/variables/pool-providers.yml can't be used here (some customers declare variables already), so duplicate its logic
+    pool:
+      ${{ if eq(variables['System.TeamProject'], 'public') }}:
+        name: $[replace(replace(eq(contains(coalesce(variables['System.PullRequest.TargetBranch'], variables['Build.SourceBranch'], 'refs/heads/main'), 'release'), 'true'), True, 'NetCore-Svc-Public' ), False, 'NetCore-Public')]
+        demands: ImageOverride -equals build.ubuntu.1804.amd64
+
+      ${{ if eq(variables['System.TeamProject'], 'internal') }}:
+        name: $[replace(replace(eq(contains(coalesce(variables['System.PullRequest.TargetBranch'], variables['Build.SourceBranch'], 'refs/heads/main'), 'release'), 'true'), True, 'NetCore1ESPool-Svc-Internal'), False, 'NetCore1ESPool-Internal')]
+        image: 1es-mariner-2-pt
+        os: linux
+
+  ${{ if ne(parameters.platform.pool, '') }}:
+    pool: ${{ parameters.platform.pool }}
+
+  workspace:
+    clean: all
+
+  steps:
+  - template: /eng/common/templates-official/steps/source-build.yml
+    parameters:
+      platform: ${{ parameters.platform }}
diff --git a/eng/common/templates-official/job/source-index-stage1.yml b/eng/common/templates-official/job/source-index-stage1.yml
new file mode 100644
index 000000000000..53a9ef51fd82
--- /dev/null
+++ b/eng/common/templates-official/job/source-index-stage1.yml
@@ -0,0 +1,67 @@
+parameters:
+  runAsPublic: false
+  sourceIndexPackageVersion: 1.0.1-20240129.2
+  sourceIndexPackageSource: https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json
+  sourceIndexBuildCommand: powershell -NoLogo -NoProfile -ExecutionPolicy Bypass -Command "eng/common/build.ps1 -restore -build -binarylog -ci"
+  preSteps: []
+  binlogPath: artifacts/log/Debug/Build.binlog
+  condition: ''
+  dependsOn: ''
+  pool: ''
+
+jobs:
+- job: SourceIndexStage1
+  dependsOn: ${{ parameters.dependsOn }}
+  condition: ${{ parameters.condition }}
+  variables:
+  - name: SourceIndexPackageVersion
+    value: ${{ parameters.sourceIndexPackageVersion }}
+  - name: SourceIndexPackageSource
+    value: ${{ parameters.sourceIndexPackageSource }}
+  - name: BinlogPath
+    value: ${{ parameters.binlogPath }}
+  - ${{ if and(eq(parameters.runAsPublic, 'false'), ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+    - group: source-dot-net stage1 variables
+  - template: /eng/common/templates-official/variables/pool-providers.yml
+
+  ${{ if ne(parameters.pool, '') }}:
+    pool: ${{ parameters.pool }}
+  ${{ if eq(parameters.pool, '') }}:
+    pool:
+      ${{ if eq(variables['System.TeamProject'], 'public') }}:
+        name: $(DncEngPublicBuildPool)
+        image: windows.vs2022.amd64.open
+      ${{ if eq(variables['System.TeamProject'], 'internal') }}:
+        name: $(DncEngInternalBuildPool)
+        image: windows.vs2022.amd64
+
+  steps:
+  - ${{ each preStep in parameters.preSteps }}:
+    - ${{ preStep }}
+
+  - task: UseDotNet@2
+    displayName: Use .NET 8 SDK
+    inputs:
+      packageType: sdk
+      version: 8.0.x
+      installationPath: $(Agent.TempDirectory)/dotnet
+      workingDirectory: $(Agent.TempDirectory)
+
+  - script: |
+      $(Agent.TempDirectory)/dotnet/dotnet tool install BinLogToSln --version $(SourceIndexPackageVersion) --add-source $(SourceIndexPackageSource) --tool-path $(Agent.TempDirectory)/.source-index/tools
+      $(Agent.TempDirectory)/dotnet/dotnet tool install UploadIndexStage1 --version $(SourceIndexPackageVersion) --add-source $(SourceIndexPackageSource) --tool-path $(Agent.TempDirectory)/.source-index/tools
+    displayName: Download Tools
+    # Set working directory to temp directory so 'dotnet' doesn't try to use global.json and use the repo's sdk.
+    workingDirectory: $(Agent.TempDirectory)
+
+  - script: ${{ parameters.sourceIndexBuildCommand }}
+    displayName: Build Repository
+
+  - script: $(Agent.TempDirectory)/.source-index/tools/BinLogToSln -i $(BinlogPath) -r $(Build.SourcesDirectory) -n $(Build.Repository.Name) -o .source-index/stage1output
+    displayName: Process Binlog into indexable sln
+
+  - ${{ if and(eq(parameters.runAsPublic, 'false'), ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+    - script: $(Agent.TempDirectory)/.source-index/tools/UploadIndexStage1 -i .source-index/stage1output -n $(Build.Repository.Name)
+      displayName: Upload stage1 artifacts to source index
+      env:
+        BLOB_CONTAINER_URL: $(source-dot-net-stage1-blob-container-url)
diff --git a/eng/common/templates-official/jobs/codeql-build.yml b/eng/common/templates-official/jobs/codeql-build.yml
new file mode 100644
index 000000000000..b68d3c2f3199
--- /dev/null
+++ b/eng/common/templates-official/jobs/codeql-build.yml
@@ -0,0 +1,31 @@
+parameters:
+  # See schema documentation in /Documentation/AzureDevOps/TemplateSchema.md
+  continueOnError: false
+  # Required: A collection of jobs to run - https://docs.microsoft.com/en-us/azure/devops/pipelines/yaml-schema?view=vsts&tabs=schema#job
+  jobs: []
+  # Optional: if specified, restore and use this version of Guardian instead of the default.
+  overrideGuardianVersion: ''
+
+jobs:
+- template: /eng/common/templates-official/jobs/jobs.yml
+  parameters:
+    enableMicrobuild: false
+    enablePublishBuildArtifacts: false
+    enablePublishTestResults: false
+    enablePublishBuildAssets: false
+    enablePublishUsingPipelines: false
+    enableTelemetry: true
+
+    variables:
+      - group: Publish-Build-Assets
+      # The Guardian version specified in 'eng/common/sdl/packages.config'. This value must be kept in
+      # sync with the packages.config file.
+      - name: DefaultGuardianVersion
+        value: 0.109.0
+      - name: GuardianPackagesConfigFile
+        value: $(Build.SourcesDirectory)\eng\common\sdl\packages.config
+      - name: GuardianVersion
+        value: ${{ coalesce(parameters.overrideGuardianVersion, '$(DefaultGuardianVersion)') }}
+  
+    jobs: ${{ parameters.jobs }}
+        
diff --git a/eng/common/templates-official/jobs/jobs.yml b/eng/common/templates-official/jobs/jobs.yml
new file mode 100644
index 000000000000..857a0f8ba43e
--- /dev/null
+++ b/eng/common/templates-official/jobs/jobs.yml
@@ -0,0 +1,97 @@
+parameters:
+  # See schema documentation in /Documentation/AzureDevOps/TemplateSchema.md
+  continueOnError: false
+
+  # Optional: Include PublishBuildArtifacts task
+  enablePublishBuildArtifacts: false
+
+  # Optional: Enable publishing using release pipelines
+  enablePublishUsingPipelines: false
+
+  # Optional: Enable running the source-build jobs to build repo from source
+  enableSourceBuild: false
+
+  # Optional: Parameters for source-build template.
+  #           See /eng/common/templates-official/jobs/source-build.yml for options
+  sourceBuildParameters: []
+
+  graphFileGeneration:
+    # Optional: Enable generating the graph files at the end of the build
+    enabled: false
+    # Optional: Include toolset dependencies in the generated graph files
+    includeToolset: false
+    
+  # Required: A collection of jobs to run - https://docs.microsoft.com/en-us/azure/devops/pipelines/yaml-schema?view=vsts&tabs=schema#job
+  jobs: []
+
+  # Optional: Override automatically derived dependsOn value for "publish build assets" job
+  publishBuildAssetsDependsOn: ''
+
+  # Optional: Publish the assets as soon as the publish to BAR stage is complete, rather doing so in a separate stage.
+  publishAssetsImmediately: false
+
+  # Optional: If using publishAssetsImmediately and additional parameters are needed, can be used to send along additional parameters (normally sent to post-build.yml)
+  artifactsPublishingAdditionalParameters: ''
+  signingValidationAdditionalParameters: ''
+
+  # Optional: should run as a public build even in the internal project
+  #           if 'true', the build won't run any of the internal only steps, even if it is running in non-public projects.
+  runAsPublic: false
+
+  enableSourceIndex: false
+  sourceIndexParams: {}
+
+# Internal resources (telemetry, microbuild) can only be accessed from non-public projects,
+# and some (Microbuild) should only be applied to non-PR cases for internal builds.
+
+jobs:
+- ${{ each job in parameters.jobs }}:
+  - template: ../job/job.yml
+    parameters: 
+      # pass along parameters
+      ${{ each parameter in parameters }}:
+        ${{ if ne(parameter.key, 'jobs') }}:
+          ${{ parameter.key }}: ${{ parameter.value }}
+
+      # pass along job properties
+      ${{ each property in job }}:
+        ${{ if ne(property.key, 'job') }}:
+          ${{ property.key }}: ${{ property.value }}
+
+      name: ${{ job.job }}
+
+- ${{ if eq(parameters.enableSourceBuild, true) }}:
+  - template: /eng/common/templates-official/jobs/source-build.yml
+    parameters:
+      allCompletedJobId: Source_Build_Complete
+      ${{ each parameter in parameters.sourceBuildParameters }}:
+        ${{ parameter.key }}: ${{ parameter.value }}
+
+- ${{ if eq(parameters.enableSourceIndex, 'true') }}:
+  - template: ../job/source-index-stage1.yml
+    parameters:
+      runAsPublic: ${{ parameters.runAsPublic }}
+      ${{ each parameter in parameters.sourceIndexParams }}:
+        ${{ parameter.key }}: ${{ parameter.value }}
+
+- ${{ if and(eq(parameters.runAsPublic, 'false'), ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+  - ${{ if or(eq(parameters.enablePublishBuildAssets, true), eq(parameters.artifacts.publish.manifests, 'true'), ne(parameters.artifacts.publish.manifests, '')) }}:
+    - template: ../job/publish-build-assets.yml
+      parameters:
+        continueOnError: ${{ parameters.continueOnError }}
+        dependsOn:
+        - ${{ if ne(parameters.publishBuildAssetsDependsOn, '') }}:
+          - ${{ each job in parameters.publishBuildAssetsDependsOn }}:
+            - ${{ job.job }}
+        - ${{ if eq(parameters.publishBuildAssetsDependsOn, '') }}:
+          - ${{ each job in parameters.jobs }}:
+            - ${{ job.job }}
+        - ${{ if eq(parameters.enableSourceBuild, true) }}:
+          - Source_Build_Complete
+
+        runAsPublic: ${{ parameters.runAsPublic }}
+        publishUsingPipelines: ${{ parameters.enablePublishUsingPipelines }}
+        publishAssetsImmediately: ${{ parameters.publishAssetsImmediately }}
+        enablePublishBuildArtifacts: ${{ parameters.enablePublishBuildArtifacts }}
+        artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }}
+        signingValidationAdditionalParameters: ${{ parameters.signingValidationAdditionalParameters }}
diff --git a/eng/common/templates-official/jobs/source-build.yml b/eng/common/templates-official/jobs/source-build.yml
new file mode 100644
index 000000000000..08e5db9bb116
--- /dev/null
+++ b/eng/common/templates-official/jobs/source-build.yml
@@ -0,0 +1,46 @@
+parameters:
+  # This template adds arcade-powered source-build to CI. A job is created for each platform, as
+  # well as an optional server job that completes when all platform jobs complete.
+
+  # The name of the "join" job for all source-build platforms. If set to empty string, the job is
+  # not included. Existing repo pipelines can use this job depend on all source-build jobs
+  # completing without maintaining a separate list of every single job ID: just depend on this one
+  # server job. By default, not included. Recommended name if used: 'Source_Build_Complete'.
+  allCompletedJobId: ''
+
+  # See /eng/common/templates-official/job/source-build.yml
+  jobNamePrefix: 'Source_Build'
+
+  # This is the default platform provided by Arcade, intended for use by a managed-only repo.
+  defaultManagedPlatform:
+    name: 'Managed'
+    container: 'mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream8'
+
+  # Defines the platforms on which to run build jobs. One job is created for each platform, and the
+  # object in this array is sent to the job template as 'platform'. If no platforms are specified,
+  # one job runs on 'defaultManagedPlatform'.
+  platforms: []
+
+jobs:
+
+- ${{ if ne(parameters.allCompletedJobId, '') }}:
+  - job: ${{ parameters.allCompletedJobId }}
+    displayName: Source-Build Complete
+    pool: server
+    dependsOn:
+    - ${{ each platform in parameters.platforms }}:
+      - ${{ parameters.jobNamePrefix }}_${{ platform.name }}
+    - ${{ if eq(length(parameters.platforms), 0) }}:
+      - ${{ parameters.jobNamePrefix }}_${{ parameters.defaultManagedPlatform.name }}
+
+- ${{ each platform in parameters.platforms }}:
+  - template: /eng/common/templates-official/job/source-build.yml
+    parameters:
+      jobNamePrefix: ${{ parameters.jobNamePrefix }}
+      platform: ${{ platform }}
+
+- ${{ if eq(length(parameters.platforms), 0) }}:
+  - template: /eng/common/templates-official/job/source-build.yml
+    parameters:
+      jobNamePrefix: ${{ parameters.jobNamePrefix }}
+      platform: ${{ parameters.defaultManagedPlatform }}
diff --git a/eng/common/templates-official/post-build/common-variables.yml b/eng/common/templates-official/post-build/common-variables.yml
new file mode 100644
index 000000000000..b9ede10bf099
--- /dev/null
+++ b/eng/common/templates-official/post-build/common-variables.yml
@@ -0,0 +1,24 @@
+variables:
+  - group: Publish-Build-Assets
+
+  # Whether the build is internal or not
+  - name: IsInternalBuild
+    value: ${{ and(ne(variables['System.TeamProject'], 'public'), contains(variables['Build.SourceBranch'], 'internal')) }}
+
+  # Default Maestro++ API Endpoint and API Version
+  - name: MaestroApiEndPoint
+    value: "https://maestro.dot.net"
+  - name: MaestroApiAccessToken
+    value: $(MaestroAccessToken)
+  - name: MaestroApiVersion
+    value: "2020-02-20"
+
+  - name: SourceLinkCLIVersion
+    value: 3.0.0
+  - name: SymbolToolVersion
+    value: 1.0.1
+  - name: BinlogToolVersion
+    value: 1.0.11
+
+  - name: runCodesignValidationInjection
+    value: false
diff --git a/eng/common/templates-official/post-build/post-build.yml b/eng/common/templates-official/post-build/post-build.yml
new file mode 100644
index 000000000000..d286e956bdfa
--- /dev/null
+++ b/eng/common/templates-official/post-build/post-build.yml
@@ -0,0 +1,285 @@
+parameters:
+  # Which publishing infra should be used. THIS SHOULD MATCH THE VERSION ON THE BUILD MANIFEST.
+  # Publishing V1 is no longer supported
+  # Publishing V2 is no longer supported
+  # Publishing V3 is the default
+  - name: publishingInfraVersion
+    displayName: Which version of publishing should be used to promote the build definition?
+    type: number
+    default: 3
+    values:
+    - 3
+
+  - name: BARBuildId
+    displayName: BAR Build Id
+    type: number
+    default: 0
+
+  - name: PromoteToChannelIds
+    displayName: Channel to promote BARBuildId to
+    type: string
+    default: ''
+
+  - name: enableSourceLinkValidation
+    displayName: Enable SourceLink validation
+    type: boolean
+    default: false
+
+  - name: enableSigningValidation
+    displayName: Enable signing validation
+    type: boolean
+    default: true
+
+  - name: enableSymbolValidation
+    displayName: Enable symbol validation
+    type: boolean
+    default: false
+
+  - name: enableNugetValidation
+    displayName: Enable NuGet validation
+    type: boolean
+    default: true
+    
+  - name: publishInstallersAndChecksums
+    displayName: Publish installers and checksums
+    type: boolean
+    default: true
+
+  - name: SDLValidationParameters
+    type: object
+    default:
+      enable: false
+      publishGdn: false
+      continueOnError: false
+      params: ''
+      artifactNames: ''
+      downloadArtifacts: true
+
+  # These parameters let the user customize the call to sdk-task.ps1 for publishing
+  # symbols & general artifacts as well as for signing validation
+  - name: symbolPublishingAdditionalParameters
+    displayName: Symbol publishing additional parameters
+    type: string
+    default: ''
+
+  - name: artifactsPublishingAdditionalParameters
+    displayName: Artifact publishing additional parameters
+    type: string
+    default: ''
+
+  - name: signingValidationAdditionalParameters
+    displayName: Signing validation additional parameters
+    type: string
+    default: ''
+
+  # Which stages should finish execution before post-build stages start
+  - name: validateDependsOn
+    type: object
+    default:
+    - build
+
+  - name: publishDependsOn
+    type: object
+    default:
+    - Validate
+
+  # Optional: Call asset publishing rather than running in a separate stage
+  - name: publishAssetsImmediately
+    type: boolean
+    default: false
+
+stages:
+- ${{ if or(eq( parameters.enableNugetValidation, 'true'), eq(parameters.enableSigningValidation, 'true'), eq(parameters.enableSourceLinkValidation, 'true'), eq(parameters.SDLValidationParameters.enable, 'true')) }}:
+  - stage: Validate
+    dependsOn: ${{ parameters.validateDependsOn }}
+    displayName: Validate Build Assets
+    variables:
+      - template: common-variables.yml
+      - template: /eng/common/templates-official/variables/pool-providers.yml
+    jobs:
+    - job:
+      displayName: NuGet Validation
+      condition: and(succeededOrFailed(), eq( ${{ parameters.enableNugetValidation }}, 'true'))
+      pool:
+        # We don't use the collection uri here because it might vary (.visualstudio.com vs. dev.azure.com)
+        ${{ if eq(variables['System.TeamProject'], 'DevDiv') }}:
+          name: AzurePipelines-EO
+          image: 1ESPT-Windows2022
+          demands: Cmd
+          os: windows
+        # If it's not devdiv, it's dnceng
+        ${{ else }}:
+          name: $(DncEngInternalBuildPool)
+          image: 1es-windows-2022-pt
+          os: windows
+
+      steps:
+        - template: setup-maestro-vars.yml
+          parameters:
+            BARBuildId: ${{ parameters.BARBuildId }}
+            PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }}
+
+        - task: DownloadBuildArtifacts@0
+          displayName: Download Package Artifacts
+          inputs:
+            buildType: specific
+            buildVersionToDownload: specific
+            project: $(AzDOProjectName)
+            pipeline: $(AzDOPipelineId)
+            buildId: $(AzDOBuildId)
+            artifactName: PackageArtifacts
+            checkDownloadedFiles: true
+
+        - task: PowerShell@2
+          displayName: Validate
+          inputs:
+            filePath: $(Build.SourcesDirectory)/eng/common/post-build/nuget-validation.ps1
+            arguments: -PackagesPath $(Build.ArtifactStagingDirectory)/PackageArtifacts/ 
+              -ToolDestinationPath $(Agent.BuildDirectory)/Extract/ 
+
+    - job:
+      displayName: Signing Validation
+      condition: and( eq( ${{ parameters.enableSigningValidation }}, 'true'), ne( variables['PostBuildSign'], 'true'))
+      pool:
+        # We don't use the collection uri here because it might vary (.visualstudio.com vs. dev.azure.com)
+        ${{ if eq(variables['System.TeamProject'], 'DevDiv') }}:
+          name: AzurePipelines-EO
+          image: 1ESPT-Windows2022
+          demands: Cmd
+          os: windows
+        # If it's not devdiv, it's dnceng
+        ${{ else }}:
+          name: $(DncEngInternalBuildPool)
+          image: 1es-windows-2022-pt
+          os: windows
+      steps:
+        - template: setup-maestro-vars.yml
+          parameters:
+            BARBuildId: ${{ parameters.BARBuildId }}
+            PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }}
+
+        - task: DownloadBuildArtifacts@0
+          displayName: Download Package Artifacts
+          inputs:
+            buildType: specific
+            buildVersionToDownload: specific
+            project: $(AzDOProjectName)
+            pipeline: $(AzDOPipelineId)
+            buildId: $(AzDOBuildId)
+            artifactName: PackageArtifacts
+            checkDownloadedFiles: true
+            itemPattern: |
+              **
+              !**/Microsoft.SourceBuild.Intermediate.*.nupkg
+
+        # This is necessary whenever we want to publish/restore to an AzDO private feed
+        # Since sdk-task.ps1 tries to restore packages we need to do this authentication here
+        # otherwise it'll complain about accessing a private feed.
+        - task: NuGetAuthenticate@1
+          displayName: 'Authenticate to AzDO Feeds'
+
+        # Signing validation will optionally work with the buildmanifest file which is downloaded from
+        # Azure DevOps above.
+        - task: PowerShell@2
+          displayName: Validate
+          inputs:
+            filePath: eng\common\sdk-task.ps1
+            arguments: -task SigningValidation -restore -msbuildEngine vs
+              /p:PackageBasePath='$(Build.ArtifactStagingDirectory)/PackageArtifacts'
+              /p:SignCheckExclusionsFile='$(Build.SourcesDirectory)/eng/SignCheckExclusionsFile.txt'
+              ${{ parameters.signingValidationAdditionalParameters }}
+
+        - template: ../steps/publish-logs.yml
+          parameters:
+            StageLabel: 'Validation'
+            JobLabel: 'Signing'
+            BinlogToolVersion: $(BinlogToolVersion)
+
+    - job:
+      displayName: SourceLink Validation
+      condition: eq( ${{ parameters.enableSourceLinkValidation }}, 'true')
+      pool:
+        # We don't use the collection uri here because it might vary (.visualstudio.com vs. dev.azure.com)
+        ${{ if eq(variables['System.TeamProject'], 'DevDiv') }}:
+          name: AzurePipelines-EO
+          image: 1ESPT-Windows2022
+          demands: Cmd
+          os: windows
+        # If it's not devdiv, it's dnceng
+        ${{ else }}:
+          name: $(DncEngInternalBuildPool)
+          image: 1es-windows-2022-pt
+          os: windows
+      steps:
+        - template: setup-maestro-vars.yml
+          parameters:
+            BARBuildId: ${{ parameters.BARBuildId }}
+            PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }}
+
+        - task: DownloadBuildArtifacts@0
+          displayName: Download Blob Artifacts
+          inputs:
+            buildType: specific
+            buildVersionToDownload: specific
+            project: $(AzDOProjectName)
+            pipeline: $(AzDOPipelineId)
+            buildId: $(AzDOBuildId)
+            artifactName: BlobArtifacts
+            checkDownloadedFiles: true
+
+        - task: PowerShell@2
+          displayName: Validate
+          inputs:
+            filePath: $(Build.SourcesDirectory)/eng/common/post-build/sourcelink-validation.ps1
+            arguments: -InputPath $(Build.ArtifactStagingDirectory)/BlobArtifacts/ 
+              -ExtractPath $(Agent.BuildDirectory)/Extract/ 
+              -GHRepoName $(Build.Repository.Name) 
+              -GHCommit $(Build.SourceVersion)
+              -SourcelinkCliVersion $(SourceLinkCLIVersion)
+          continueOnError: true
+
+- ${{ if ne(parameters.publishAssetsImmediately, 'true') }}:
+  - stage: publish_using_darc
+    ${{ if or(eq(parameters.enableNugetValidation, 'true'), eq(parameters.enableSigningValidation, 'true'), eq(parameters.enableSourceLinkValidation, 'true'), eq(parameters.SDLValidationParameters.enable, 'true')) }}:
+      dependsOn: ${{ parameters.publishDependsOn }}
+    ${{ else }}:
+      dependsOn: ${{ parameters.validateDependsOn }}
+    displayName: Publish using Darc
+    variables:
+      - template: common-variables.yml
+      - template: /eng/common/templates-official/variables/pool-providers.yml
+    jobs:
+    - job:
+      displayName: Publish Using Darc
+      timeoutInMinutes: 120
+      pool:
+        # We don't use the collection uri here because it might vary (.visualstudio.com vs. dev.azure.com)
+        ${{ if eq(variables['System.TeamProject'], 'DevDiv') }}:
+          name: AzurePipelines-EO
+          image: 1ESPT-Windows2022
+          demands: Cmd
+          os: windows
+        # If it's not devdiv, it's dnceng
+        ${{ else }}:
+          name: NetCore1ESPool-Publishing-Internal
+          image: windows.vs2019.amd64
+          os: windows
+      steps:
+        - template: setup-maestro-vars.yml
+          parameters:
+            BARBuildId: ${{ parameters.BARBuildId }}
+            PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }}
+
+        - task: NuGetAuthenticate@1
+
+        - task: PowerShell@2
+          displayName: Publish Using Darc
+          inputs:
+            filePath: $(Build.SourcesDirectory)/eng/common/post-build/publish-using-darc.ps1
+            arguments: -BuildId $(BARBuildId) 
+              -PublishingInfraVersion ${{ parameters.publishingInfraVersion }}
+              -AzdoToken '$(publishing-dnceng-devdiv-code-r-build-re)'
+              -MaestroToken '$(MaestroApiAccessToken)'
+              -WaitPublishingFinish true
+              -ArtifactsPublishingAdditionalParameters '${{ parameters.artifactsPublishingAdditionalParameters }}'
+              -SymbolPublishingAdditionalParameters '${{ parameters.symbolPublishingAdditionalParameters }}'
diff --git a/eng/common/templates-official/post-build/setup-maestro-vars.yml b/eng/common/templates-official/post-build/setup-maestro-vars.yml
new file mode 100644
index 000000000000..0c87f149a4ad
--- /dev/null
+++ b/eng/common/templates-official/post-build/setup-maestro-vars.yml
@@ -0,0 +1,70 @@
+parameters:
+  BARBuildId: ''
+  PromoteToChannelIds: ''
+
+steps:
+  - ${{ if eq(coalesce(parameters.PromoteToChannelIds, 0), 0) }}:
+    - task: DownloadBuildArtifacts@0
+      displayName: Download Release Configs
+      inputs:
+        buildType: current
+        artifactName: ReleaseConfigs
+        checkDownloadedFiles: true
+
+  - task: PowerShell@2
+    name: setReleaseVars
+    displayName: Set Release Configs Vars
+    inputs:
+      targetType: inline
+      pwsh: true
+      script: |
+        try {
+          if (!$Env:PromoteToMaestroChannels -or $Env:PromoteToMaestroChannels.Trim() -eq '') {
+            $Content = Get-Content $(Build.StagingDirectory)/ReleaseConfigs/ReleaseConfigs.txt
+
+            $BarId = $Content | Select -Index 0
+            $Channels = $Content | Select -Index 1             
+            $IsStableBuild = $Content | Select -Index 2
+
+            $AzureDevOpsProject = $Env:System_TeamProject
+            $AzureDevOpsBuildDefinitionId = $Env:System_DefinitionId
+            $AzureDevOpsBuildId = $Env:Build_BuildId
+          }
+          else {
+            $buildApiEndpoint = "${Env:MaestroApiEndPoint}/api/builds/${Env:BARBuildId}?api-version=${Env:MaestroApiVersion}"
+
+            $apiHeaders = New-Object 'System.Collections.Generic.Dictionary[[String],[String]]'
+            $apiHeaders.Add('Accept', 'application/json')
+            $apiHeaders.Add('Authorization',"Bearer ${Env:MAESTRO_API_TOKEN}")
+
+            $buildInfo = try { Invoke-WebRequest -Method Get -Uri $buildApiEndpoint -Headers $apiHeaders | ConvertFrom-Json } catch { Write-Host "Error: $_" }
+            
+            $BarId = $Env:BARBuildId
+            $Channels = $Env:PromoteToMaestroChannels -split ","
+            $Channels = $Channels -join "]["
+            $Channels = "[$Channels]"
+
+            $IsStableBuild = $buildInfo.stable
+            $AzureDevOpsProject = $buildInfo.azureDevOpsProject
+            $AzureDevOpsBuildDefinitionId = $buildInfo.azureDevOpsBuildDefinitionId
+            $AzureDevOpsBuildId = $buildInfo.azureDevOpsBuildId
+          }
+
+          Write-Host "##vso[task.setvariable variable=BARBuildId]$BarId"
+          Write-Host "##vso[task.setvariable variable=TargetChannels]$Channels"
+          Write-Host "##vso[task.setvariable variable=IsStableBuild]$IsStableBuild"
+
+          Write-Host "##vso[task.setvariable variable=AzDOProjectName]$AzureDevOpsProject"
+          Write-Host "##vso[task.setvariable variable=AzDOPipelineId]$AzureDevOpsBuildDefinitionId"
+          Write-Host "##vso[task.setvariable variable=AzDOBuildId]$AzureDevOpsBuildId"
+        }
+        catch {
+          Write-Host $_
+          Write-Host $_.Exception
+          Write-Host $_.ScriptStackTrace
+          exit 1
+        }
+    env:
+      MAESTRO_API_TOKEN: $(MaestroApiAccessToken)
+      BARBuildId: ${{ parameters.BARBuildId }}
+      PromoteToMaestroChannels: ${{ parameters.PromoteToChannelIds }}
diff --git a/eng/common/templates-official/post-build/trigger-subscription.yml b/eng/common/templates-official/post-build/trigger-subscription.yml
new file mode 100644
index 000000000000..da669030daf6
--- /dev/null
+++ b/eng/common/templates-official/post-build/trigger-subscription.yml
@@ -0,0 +1,13 @@
+parameters:
+  ChannelId: 0
+
+steps:
+- task: PowerShell@2
+  displayName: Triggering subscriptions
+  inputs:
+    filePath: $(Build.SourcesDirectory)/eng/common/post-build/trigger-subscriptions.ps1
+    arguments: -SourceRepo $(Build.Repository.Uri)
+      -ChannelId ${{ parameters.ChannelId }}
+      -MaestroApiAccessToken $(MaestroAccessToken)
+      -MaestroApiEndPoint $(MaestroApiEndPoint)
+      -MaestroApiVersion $(MaestroApiVersion)
diff --git a/eng/common/templates-official/steps/add-build-to-channel.yml b/eng/common/templates-official/steps/add-build-to-channel.yml
new file mode 100644
index 000000000000..f67a210d62f3
--- /dev/null
+++ b/eng/common/templates-official/steps/add-build-to-channel.yml
@@ -0,0 +1,13 @@
+parameters:
+  ChannelId: 0
+
+steps:
+- task: PowerShell@2
+  displayName: Add Build to Channel
+  inputs:
+    filePath: $(Build.SourcesDirectory)/eng/common/post-build/add-build-to-channel.ps1
+    arguments: -BuildId $(BARBuildId) 
+      -ChannelId ${{ parameters.ChannelId }}
+      -MaestroApiAccessToken $(MaestroApiAccessToken)
+      -MaestroApiEndPoint $(MaestroApiEndPoint)
+      -MaestroApiVersion $(MaestroApiVersion) 
diff --git a/eng/common/templates-official/steps/component-governance.yml b/eng/common/templates-official/steps/component-governance.yml
new file mode 100644
index 000000000000..0ecec47b0c91
--- /dev/null
+++ b/eng/common/templates-official/steps/component-governance.yml
@@ -0,0 +1,13 @@
+parameters:
+  disableComponentGovernance: false
+  componentGovernanceIgnoreDirectories: ''
+
+steps:
+- ${{ if eq(parameters.disableComponentGovernance, 'true') }}:
+  - script: "echo ##vso[task.setvariable variable=skipComponentGovernanceDetection]true"
+    displayName: Set skipComponentGovernanceDetection variable
+- ${{ if ne(parameters.disableComponentGovernance, 'true') }}:
+  - task: ComponentGovernanceComponentDetection@0
+    continueOnError: true
+    inputs:
+      ignoreDirectories: ${{ parameters.componentGovernanceIgnoreDirectories }}
\ No newline at end of file
diff --git a/eng/common/templates-official/steps/generate-sbom.yml b/eng/common/templates-official/steps/generate-sbom.yml
new file mode 100644
index 000000000000..488b560e8ba4
--- /dev/null
+++ b/eng/common/templates-official/steps/generate-sbom.yml
@@ -0,0 +1,48 @@
+# BuildDropPath - The root folder of the drop directory for which the manifest file will be generated.
+# PackageName - The name of the package this SBOM represents.
+# PackageVersion - The version of the package this SBOM represents. 
+# ManifestDirPath - The path of the directory where the generated manifest files will be placed
+# IgnoreDirectories - Directories to ignore for SBOM generation. This will be passed through to the CG component detector.
+
+parameters:
+  PackageVersion: 7.0.0
+  BuildDropPath: '$(Build.SourcesDirectory)/artifacts'
+  PackageName: '.NET'
+  ManifestDirPath: $(Build.ArtifactStagingDirectory)/sbom
+  IgnoreDirectories: ''
+  sbomContinueOnError: true
+
+steps:
+- task: PowerShell@2 
+  displayName: Prep for SBOM generation in (Non-linux)
+  condition: or(eq(variables['Agent.Os'], 'Windows_NT'), eq(variables['Agent.Os'], 'Darwin'))
+  inputs: 
+    filePath: ./eng/common/generate-sbom-prep.ps1
+    arguments: ${{parameters.manifestDirPath}}
+
+# Chmodding is a workaround for https://github.com/dotnet/arcade/issues/8461
+- script: |
+    chmod +x ./eng/common/generate-sbom-prep.sh
+    ./eng/common/generate-sbom-prep.sh ${{parameters.manifestDirPath}}
+  displayName: Prep for SBOM generation in (Linux)
+  condition: eq(variables['Agent.Os'], 'Linux')
+  continueOnError: ${{ parameters.sbomContinueOnError }}
+
+- task: AzureArtifacts.manifest-generator-task.manifest-generator-task.ManifestGeneratorTask@0
+  displayName: 'Generate SBOM manifest'
+  continueOnError: ${{ parameters.sbomContinueOnError }}
+  inputs:
+      PackageName: ${{ parameters.packageName }}
+      BuildDropPath: ${{ parameters.buildDropPath }}
+      PackageVersion: ${{ parameters.packageVersion }}
+      ManifestDirPath: ${{ parameters.manifestDirPath }}
+      ${{ if ne(parameters.IgnoreDirectories, '') }}:
+        AdditionalComponentDetectorArgs: '--IgnoreDirectories ${{ parameters.IgnoreDirectories }}'
+
+- task: 1ES.PublishPipelineArtifact@1
+  displayName: Publish SBOM manifest
+  continueOnError: ${{parameters.sbomContinueOnError}}
+  inputs:
+    targetPath: '${{parameters.manifestDirPath}}'
+    artifactName: $(ARTIFACT_NAME)
+
diff --git a/eng/common/templates-official/steps/publish-logs.yml b/eng/common/templates-official/steps/publish-logs.yml
new file mode 100644
index 000000000000..84b2f559c56e
--- /dev/null
+++ b/eng/common/templates-official/steps/publish-logs.yml
@@ -0,0 +1,49 @@
+parameters:
+  StageLabel: ''
+  JobLabel: ''
+  CustomSensitiveDataList: ''
+  # A default - in case value from eng/common/templates-official/post-build/common-variables.yml is not passed
+  BinlogToolVersion: '1.0.11'
+
+steps:
+- task: Powershell@2
+  displayName: Prepare Binlogs to Upload
+  inputs:
+    targetType: inline
+    script: |
+      New-Item -ItemType Directory $(Build.SourcesDirectory)/PostBuildLogs/${{parameters.StageLabel}}/${{parameters.JobLabel}}/
+      Move-Item -Path $(Build.SourcesDirectory)/artifacts/log/Debug/* $(Build.SourcesDirectory)/PostBuildLogs/${{parameters.StageLabel}}/${{parameters.JobLabel}}/
+  continueOnError: true
+  condition: always()
+    
+- task: PowerShell@2
+  displayName: Redact Logs
+  inputs:
+    filePath: $(Build.SourcesDirectory)/eng/common/post-build/redact-logs.ps1
+    # For now this needs to have explicit list of all sensitive data. Taken from eng/publishing/v3/publish.yml
+    # Sensitive data can as well be added to $(Build.SourcesDirectory)/eng/BinlogSecretsRedactionFile.txt'
+    #  If the file exists - sensitive data for redaction will be sourced from it
+    #  (single entry per line, lines starting with '# ' are considered comments and skipped)
+    arguments: -InputPath '$(Build.SourcesDirectory)/PostBuildLogs' 
+      -BinlogToolVersion ${{parameters.BinlogToolVersion}}
+      -TokensFilePath '$(Build.SourcesDirectory)/eng/BinlogSecretsRedactionFile.txt'
+      '$(publishing-dnceng-devdiv-code-r-build-re)'
+      '$(MaestroAccessToken)'
+      '$(dn-bot-all-orgs-artifact-feeds-rw)'
+      '$(akams-client-id)'
+      '$(akams-client-secret)'
+      '$(microsoft-symbol-server-pat)'
+      '$(symweb-symbol-server-pat)'
+      '$(dn-bot-all-orgs-build-rw-code-rw)'
+      ${{parameters.CustomSensitiveDataList}}
+  continueOnError: true
+  condition: always()
+      
+- task: 1ES.PublishBuildArtifacts@1
+  displayName: Publish Logs
+  inputs:
+    PathtoPublish: '$(Build.SourcesDirectory)/PostBuildLogs'
+    PublishLocation: Container
+    ArtifactName: PostBuildLogs
+  continueOnError: true
+  condition: always()
diff --git a/eng/common/templates-official/steps/retain-build.yml b/eng/common/templates-official/steps/retain-build.yml
new file mode 100644
index 000000000000..83d97a26a01f
--- /dev/null
+++ b/eng/common/templates-official/steps/retain-build.yml
@@ -0,0 +1,28 @@
+parameters:
+  # Optional azure devops PAT with build execute permissions for the build's organization,
+  # only needed if the build that should be retained ran on a different organization than 
+  # the pipeline where this template is executing from
+  Token: ''
+  # Optional BuildId to retain, defaults to the current running build
+  BuildId: ''
+  # Azure devops Organization URI for the build in the https://dev.azure.com/<organization> format.
+  # Defaults to the organization the current pipeline is running on
+  AzdoOrgUri: '$(System.CollectionUri)'
+  # Azure devops project for the build. Defaults to the project the current pipeline is running on
+  AzdoProject: '$(System.TeamProject)'
+
+steps:
+  - task: powershell@2
+    inputs:
+      targetType: 'filePath'
+      filePath: eng/common/retain-build.ps1
+      pwsh: true
+      arguments: >
+        -AzdoOrgUri: ${{parameters.AzdoOrgUri}}
+        -AzdoProject ${{parameters.AzdoProject}}
+        -Token ${{coalesce(parameters.Token, '$env:SYSTEM_ACCESSTOKEN') }}
+        -BuildId ${{coalesce(parameters.BuildId, '$env:BUILD_ID')}}
+    displayName: Enable permanent build retention
+    env:
+      SYSTEM_ACCESSTOKEN: $(System.AccessToken)
+      BUILD_ID: $(Build.BuildId)
\ No newline at end of file
diff --git a/eng/common/templates-official/steps/send-to-helix.yml b/eng/common/templates-official/steps/send-to-helix.yml
new file mode 100644
index 000000000000..68fa739c4ab2
--- /dev/null
+++ b/eng/common/templates-official/steps/send-to-helix.yml
@@ -0,0 +1,93 @@
+# Please remember to update the documentation if you make changes to these parameters!
+parameters:
+  HelixSource: 'pr/default'              # required -- sources must start with pr/, official/, prodcon/, or agent/
+  HelixType: 'tests/default/'            # required -- Helix telemetry which identifies what type of data this is; should include "test" for clarity and must end in '/'
+  HelixBuild: $(Build.BuildNumber)       # required -- the build number Helix will use to identify this -- automatically set to the AzDO build number
+  HelixTargetQueues: ''                  # required -- semicolon-delimited list of Helix queues to test on; see https://helix.dot.net/ for a list of queues
+  HelixAccessToken: ''                   # required -- access token to make Helix API requests; should be provided by the appropriate variable group
+  HelixProjectPath: 'eng/common/helixpublish.proj'  # optional -- path to the project file to build relative to BUILD_SOURCESDIRECTORY
+  HelixProjectArguments: ''              # optional -- arguments passed to the build command
+  HelixConfiguration: ''                 # optional -- additional property attached to a job
+  HelixPreCommands: ''                   # optional -- commands to run before Helix work item execution
+  HelixPostCommands: ''                  # optional -- commands to run after Helix work item execution
+  WorkItemDirectory: ''                  # optional -- a payload directory to zip up and send to Helix; requires WorkItemCommand; incompatible with XUnitProjects
+  WorkItemCommand: ''                    # optional -- a command to execute on the payload; requires WorkItemDirectory; incompatible with XUnitProjects
+  WorkItemTimeout: ''                    # optional -- a timeout in TimeSpan.Parse-ready value (e.g. 00:02:00) for the work item command; requires WorkItemDirectory; incompatible with XUnitProjects
+  CorrelationPayloadDirectory: ''        # optional -- a directory to zip up and send to Helix as a correlation payload
+  XUnitProjects: ''                      # optional -- semicolon-delimited list of XUnitProjects to parse and send to Helix; requires XUnitRuntimeTargetFramework, XUnitPublishTargetFramework, XUnitRunnerVersion, and IncludeDotNetCli=true
+  XUnitWorkItemTimeout: ''               # optional -- the workitem timeout in seconds for all workitems created from the xUnit projects specified by XUnitProjects
+  XUnitPublishTargetFramework: ''        # optional -- framework to use to publish your xUnit projects
+  XUnitRuntimeTargetFramework: ''        # optional -- framework to use for the xUnit console runner
+  XUnitRunnerVersion: ''                 # optional -- version of the xUnit nuget package you wish to use on Helix; required for XUnitProjects
+  IncludeDotNetCli: false                # optional -- true will download a version of the .NET CLI onto the Helix machine as a correlation payload; requires DotNetCliPackageType and DotNetCliVersion
+  DotNetCliPackageType: ''               # optional -- either 'sdk', 'runtime' or 'aspnetcore-runtime'; determines whether the sdk or runtime will be sent to Helix; see https://raw.githubusercontent.com/dotnet/core/main/release-notes/releases-index.json
+  DotNetCliVersion: ''                   # optional -- version of the CLI to send to Helix; based on this: https://raw.githubusercontent.com/dotnet/core/main/release-notes/releases-index.json
+  WaitForWorkItemCompletion: true        # optional -- true will make the task wait until work items have been completed and fail the build if work items fail. False is "fire and forget."
+  IsExternal: false                      # [DEPRECATED] -- doesn't do anything, jobs are external if HelixAccessToken is empty and Creator is set
+  HelixBaseUri: 'https://helix.dot.net/' # optional -- sets the Helix API base URI (allows targeting https://helix.int-dot.net )
+  Creator: ''                            # optional -- if the build is external, use this to specify who is sending the job
+  DisplayNamePrefix: 'Run Tests'         # optional -- rename the beginning of the displayName of the steps in AzDO 
+  condition: succeeded()                 # optional -- condition for step to execute; defaults to succeeded()
+  continueOnError: false                 # optional -- determines whether to continue the build if the step errors; defaults to false
+
+steps:
+  - powershell: 'powershell "$env:BUILD_SOURCESDIRECTORY\eng\common\msbuild.ps1 $env:BUILD_SOURCESDIRECTORY/${{ parameters.HelixProjectPath }} /restore /p:TreatWarningsAsErrors=false ${{ parameters.HelixProjectArguments }} /t:Test /bl:$env:BUILD_SOURCESDIRECTORY\artifacts\log\$env:BuildConfig\SendToHelix.binlog"'
+    displayName: ${{ parameters.DisplayNamePrefix }} (Windows)
+    env:
+      BuildConfig: $(_BuildConfig)
+      HelixSource: ${{ parameters.HelixSource }}
+      HelixType: ${{ parameters.HelixType }}
+      HelixBuild: ${{ parameters.HelixBuild }}
+      HelixConfiguration:  ${{ parameters.HelixConfiguration }}
+      HelixTargetQueues: ${{ parameters.HelixTargetQueues }}
+      HelixAccessToken: ${{ parameters.HelixAccessToken }}
+      HelixPreCommands: ${{ parameters.HelixPreCommands }}
+      HelixPostCommands: ${{ parameters.HelixPostCommands }}
+      WorkItemDirectory: ${{ parameters.WorkItemDirectory }}
+      WorkItemCommand: ${{ parameters.WorkItemCommand }}
+      WorkItemTimeout: ${{ parameters.WorkItemTimeout }}
+      CorrelationPayloadDirectory: ${{ parameters.CorrelationPayloadDirectory }}
+      XUnitProjects: ${{ parameters.XUnitProjects }}
+      XUnitWorkItemTimeout: ${{ parameters.XUnitWorkItemTimeout }}
+      XUnitPublishTargetFramework: ${{ parameters.XUnitPublishTargetFramework }}
+      XUnitRuntimeTargetFramework: ${{ parameters.XUnitRuntimeTargetFramework }}
+      XUnitRunnerVersion: ${{ parameters.XUnitRunnerVersion }}
+      IncludeDotNetCli: ${{ parameters.IncludeDotNetCli }}
+      DotNetCliPackageType: ${{ parameters.DotNetCliPackageType }}
+      DotNetCliVersion: ${{ parameters.DotNetCliVersion }}
+      WaitForWorkItemCompletion: ${{ parameters.WaitForWorkItemCompletion }}
+      HelixBaseUri: ${{ parameters.HelixBaseUri }}
+      Creator: ${{ parameters.Creator }}
+      SYSTEM_ACCESSTOKEN: $(System.AccessToken)
+    condition: and(${{ parameters.condition }}, eq(variables['Agent.Os'], 'Windows_NT'))
+    continueOnError: ${{ parameters.continueOnError }}
+  - script: $BUILD_SOURCESDIRECTORY/eng/common/msbuild.sh $BUILD_SOURCESDIRECTORY/${{ parameters.HelixProjectPath }} /restore /p:TreatWarningsAsErrors=false ${{ parameters.HelixProjectArguments }} /t:Test /bl:$BUILD_SOURCESDIRECTORY/artifacts/log/$BuildConfig/SendToHelix.binlog
+    displayName: ${{ parameters.DisplayNamePrefix }} (Unix)
+    env:
+      BuildConfig: $(_BuildConfig)
+      HelixSource: ${{ parameters.HelixSource }}
+      HelixType: ${{ parameters.HelixType }}
+      HelixBuild: ${{ parameters.HelixBuild }}
+      HelixConfiguration:  ${{ parameters.HelixConfiguration }}
+      HelixTargetQueues: ${{ parameters.HelixTargetQueues }}
+      HelixAccessToken: ${{ parameters.HelixAccessToken }}
+      HelixPreCommands: ${{ parameters.HelixPreCommands }}
+      HelixPostCommands: ${{ parameters.HelixPostCommands }}
+      WorkItemDirectory: ${{ parameters.WorkItemDirectory }}
+      WorkItemCommand: ${{ parameters.WorkItemCommand }}
+      WorkItemTimeout: ${{ parameters.WorkItemTimeout }}
+      CorrelationPayloadDirectory: ${{ parameters.CorrelationPayloadDirectory }}
+      XUnitProjects: ${{ parameters.XUnitProjects }}
+      XUnitWorkItemTimeout: ${{ parameters.XUnitWorkItemTimeout }}
+      XUnitPublishTargetFramework: ${{ parameters.XUnitPublishTargetFramework }}
+      XUnitRuntimeTargetFramework: ${{ parameters.XUnitRuntimeTargetFramework }}
+      XUnitRunnerVersion: ${{ parameters.XUnitRunnerVersion }}
+      IncludeDotNetCli: ${{ parameters.IncludeDotNetCli }}
+      DotNetCliPackageType: ${{ parameters.DotNetCliPackageType }}
+      DotNetCliVersion: ${{ parameters.DotNetCliVersion }}
+      WaitForWorkItemCompletion: ${{ parameters.WaitForWorkItemCompletion }}
+      HelixBaseUri: ${{ parameters.HelixBaseUri }}
+      Creator: ${{ parameters.Creator }}
+      SYSTEM_ACCESSTOKEN: $(System.AccessToken)
+    condition: and(${{ parameters.condition }}, ne(variables['Agent.Os'], 'Windows_NT'))
+    continueOnError: ${{ parameters.continueOnError }}
diff --git a/eng/common/templates-official/steps/source-build.yml b/eng/common/templates-official/steps/source-build.yml
new file mode 100644
index 000000000000..53ed57b6d48a
--- /dev/null
+++ b/eng/common/templates-official/steps/source-build.yml
@@ -0,0 +1,131 @@
+parameters:
+  # This template adds arcade-powered source-build to CI.
+
+  # This is a 'steps' template, and is intended for advanced scenarios where the existing build
+  # infra has a careful build methodology that must be followed. For example, a repo
+  # (dotnet/runtime) might choose to clone the GitHub repo only once and store it as a pipeline
+  # artifact for all subsequent jobs to use, to reduce dependence on a strong network connection to
+  # GitHub. Using this steps template leaves room for that infra to be included.
+
+  # Defines the platform on which to run the steps. See 'eng/common/templates-official/job/source-build.yml'
+  # for details. The entire object is described in the 'job' template for simplicity, even though
+  # the usage of the properties on this object is split between the 'job' and 'steps' templates.
+  platform: {}
+
+steps:
+# Build. Keep it self-contained for simple reusability. (No source-build-specific job variables.)
+- script: |
+    set -x
+    df -h
+
+    # If building on the internal project, the artifact feeds variable may be available (usually only if needed)
+    # In that case, call the feed setup script to add internal feeds corresponding to public ones.
+    # In addition, add an msbuild argument to copy the WIP from the repo to the target build location.
+    # This is because SetupNuGetSources.sh will alter the current NuGet.config file, and we need to preserve those
+    # changes.
+    internalRestoreArgs=
+    if [ '$(dn-bot-dnceng-artifact-feeds-rw)' != '$''(dn-bot-dnceng-artifact-feeds-rw)' ]; then
+      # Temporarily work around https://github.com/dotnet/arcade/issues/7709
+      chmod +x $(Build.SourcesDirectory)/eng/common/SetupNugetSources.sh
+      $(Build.SourcesDirectory)/eng/common/SetupNugetSources.sh $(Build.SourcesDirectory)/NuGet.config $(dn-bot-dnceng-artifact-feeds-rw)
+      internalRestoreArgs='/p:CopyWipIntoInnerSourceBuildRepo=true'
+
+      # The 'Copy WIP' feature of source build uses git stash to apply changes from the original repo.
+      # This only works if there is a username/email configured, which won't be the case in most CI runs.
+      git config --get user.email
+      if [ $? -ne 0 ]; then
+        git config user.email dn-bot@microsoft.com
+        git config user.name dn-bot
+      fi
+    fi
+
+    # If building on the internal project, the internal storage variable may be available (usually only if needed)
+    # In that case, add variables to allow the download of internal runtimes if the specified versions are not found
+    # in the default public locations.
+    internalRuntimeDownloadArgs=
+    if [ '$(dotnetbuilds-internal-container-read-token-base64)' != '$''(dotnetbuilds-internal-container-read-token-base64)' ]; then
+      internalRuntimeDownloadArgs='/p:DotNetRuntimeSourceFeed=https://dotnetbuilds.blob.core.windows.net/internal /p:DotNetRuntimeSourceFeedKey=$(dotnetbuilds-internal-container-read-token-base64) --runtimesourcefeed https://dotnetbuilds.blob.core.windows.net/internal --runtimesourcefeedkey $(dotnetbuilds-internal-container-read-token-base64)'
+    fi
+
+    buildConfig=Release
+    # Check if AzDO substitutes in a build config from a variable, and use it if so.
+    if [ '$(_BuildConfig)' != '$''(_BuildConfig)' ]; then
+      buildConfig='$(_BuildConfig)'
+    fi
+
+    officialBuildArgs=
+    if [ '${{ and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}' = 'True' ]; then
+      officialBuildArgs='/p:DotNetPublishUsingPipelines=true /p:OfficialBuildId=$(BUILD.BUILDNUMBER)'
+    fi
+
+    targetRidArgs=
+    if [ '${{ parameters.platform.targetRID }}' != '' ]; then
+      targetRidArgs='/p:TargetRid=${{ parameters.platform.targetRID }}'
+    fi
+
+    runtimeOsArgs=
+    if [ '${{ parameters.platform.runtimeOS }}' != '' ]; then
+      runtimeOsArgs='/p:RuntimeOS=${{ parameters.platform.runtimeOS }}'
+    fi
+
+    baseOsArgs=
+    if [ '${{ parameters.platform.baseOS }}' != '' ]; then
+      baseOsArgs='/p:BaseOS=${{ parameters.platform.baseOS }}'
+    fi
+
+    publishArgs=
+    if [ '${{ parameters.platform.skipPublishValidation }}' != 'true' ]; then
+      publishArgs='--publish'
+    fi
+
+    assetManifestFileName=SourceBuild_RidSpecific.xml
+    if [ '${{ parameters.platform.name }}' != '' ]; then
+      assetManifestFileName=SourceBuild_${{ parameters.platform.name }}.xml
+    fi
+
+    ${{ coalesce(parameters.platform.buildScript, './build.sh') }} --ci \
+      --configuration $buildConfig \
+      --restore --build --pack $publishArgs -bl \
+      $officialBuildArgs \
+      $internalRuntimeDownloadArgs \
+      $internalRestoreArgs \
+      $targetRidArgs \
+      $runtimeOsArgs \
+      $baseOsArgs \
+      /p:SourceBuildNonPortable=${{ parameters.platform.nonPortable }} \
+      /p:ArcadeBuildFromSource=true \
+      /p:DotNetBuildSourceOnly=true \
+      /p:DotNetBuildRepo=true \
+      /p:AssetManifestFileName=$assetManifestFileName
+  displayName: Build
+
+# Upload build logs for diagnosis.
+- task: CopyFiles@2
+  displayName: Prepare BuildLogs staging directory
+  inputs:
+    SourceFolder: '$(Build.SourcesDirectory)'
+    Contents: |
+      **/*.log
+      **/*.binlog
+      artifacts/sb/prebuilt-report/**
+    TargetFolder: '$(Build.StagingDirectory)/BuildLogs'
+    CleanTargetFolder: true
+  continueOnError: true
+  condition: succeededOrFailed()
+
+- task: 1ES.PublishPipelineArtifact@1
+  displayName: Publish BuildLogs
+  inputs:
+    targetPath: '$(Build.StagingDirectory)/BuildLogs'
+    artifactName: BuildLogs_SourceBuild_${{ parameters.platform.name }}_Attempt$(System.JobAttempt)
+  continueOnError: true
+  condition: succeededOrFailed()
+
+# Manually inject component detection so that we can ignore the source build upstream cache, which contains
+# a nupkg cache of input packages (a local feed).
+# This path must match the upstream cache path in property 'CurrentRepoSourceBuiltNupkgCacheDir'
+# in src\Microsoft.DotNet.Arcade.Sdk\tools\SourceBuild\SourceBuildArcade.targets
+- task: ComponentGovernanceComponentDetection@0
+  displayName: Component Detection (Exclude upstream cache)
+  inputs:
+    ignoreDirectories: '$(Build.SourcesDirectory)/artifacts/sb/src/artifacts/obj/source-built-upstream-cache'
diff --git a/eng/common/templates-official/variables/pool-providers.yml b/eng/common/templates-official/variables/pool-providers.yml
new file mode 100644
index 000000000000..beab7d1bfba0
--- /dev/null
+++ b/eng/common/templates-official/variables/pool-providers.yml
@@ -0,0 +1,45 @@
+# Select a pool provider based off branch name. Anything with branch name containing 'release' must go into an -Svc pool, 
+# otherwise it should go into the "normal" pools. This separates out the queueing and billing of released branches.
+
+# Motivation: 
+#   Once a given branch of a repository's output has been officially "shipped" once, it is then considered to be COGS
+#   (Cost of goods sold) and should be moved to a servicing pool provider. This allows both separation of queueing
+#   (allowing release builds and main PR builds to not intefere with each other) and billing (required for COGS.
+#   Additionally, the pool provider name itself may be subject to change when the .NET Core Engineering Services 
+#   team needs to move resources around and create new and potentially differently-named pools. Using this template 
+#   file from an Arcade-ified repo helps guard against both having to update one's release/* branches and renaming.
+
+# How to use: 
+#  This yaml assumes your shipped product branches use the naming convention "release/..." (which many do).
+#  If we find alternate naming conventions in broad usage it can be added to the condition below.
+#
+#  First, import the template in an arcade-ified repo to pick up the variables, e.g.:
+#
+#  variables:
+#  - template: /eng/common/templates-official/variables/pool-providers.yml
+#
+#  ... then anywhere specifying the pool provider use the runtime variables,
+#      $(DncEngInternalBuildPool)
+#
+#        pool:
+#           name: $(DncEngInternalBuildPool)
+#           image: 1es-windows-2022-pt
+
+variables:
+  # Coalesce the target and source branches so we know when a PR targets a release branch
+  # If these variables are somehow missing, fall back to main (tends to have more capacity)
+
+  # Any new -Svc alternative pools should have variables added here to allow for splitting work
+
+  - name: DncEngInternalBuildPool
+    value: $[
+        replace(
+          replace(
+            eq(contains(coalesce(variables['System.PullRequest.TargetBranch'], variables['Build.SourceBranch'], 'refs/heads/main'), 'release'), 'true'),
+            True,
+            'NetCore1ESPool-Svc-Internal'
+          ),
+          False,
+          'NetCore1ESPool-Internal'
+        )
+      ]
\ No newline at end of file
diff --git a/eng/common/templates-official/variables/sdl-variables.yml b/eng/common/templates-official/variables/sdl-variables.yml
new file mode 100644
index 000000000000..dbdd66d4a4b3
--- /dev/null
+++ b/eng/common/templates-official/variables/sdl-variables.yml
@@ -0,0 +1,7 @@
+variables:
+# The Guardian version specified in 'eng/common/sdl/packages.config'. This value must be kept in
+# sync with the packages.config file.
+- name: DefaultGuardianVersion
+  value: 0.109.0
+- name: GuardianPackagesConfigFile
+  value: $(Build.SourcesDirectory)\eng\common\sdl\packages.config
\ No newline at end of file
diff --git a/eng/common/templates/job/job.yml b/eng/common/templates/job/job.yml
index 01c0dd995e4b..a3277bf15c51 100644
--- a/eng/common/templates/job/job.yml
+++ b/eng/common/templates/job/job.yml
@@ -15,6 +15,7 @@ parameters:
   timeoutInMinutes: ''
   variables: []
   workspace: ''
+  templateContext: ''
 
 # Job base template specific parameters
   # See schema documentation - https://github.com/dotnet/arcade/blob/master/Documentation/AzureDevOps/TemplateSchema.md
@@ -68,6 +69,9 @@ jobs:
   ${{ if ne(parameters.timeoutInMinutes, '') }}:
     timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
 
+  ${{ if ne(parameters.templateContext, '') }}:
+    templateContext: ${{ parameters.templateContext }}
+
   variables:
   - ${{ if ne(parameters.enableTelemetry, 'false') }}:
     - name: DOTNET_CLI_TELEMETRY_PROFILE
diff --git a/eng/common/templates/job/publish-build-assets.yml b/eng/common/templates/job/publish-build-assets.yml
index 3115990d5114..bb42240f865b 100644
--- a/eng/common/templates/job/publish-build-assets.yml
+++ b/eng/common/templates/job/publish-build-assets.yml
@@ -58,7 +58,7 @@ jobs:
       demands: Cmd
     # If it's not devdiv, it's dnceng
     ${{ if ne(variables['System.TeamProject'], 'DevDiv') }}:
-      name: $(DncEngInternalBuildPool)
+      name: NetCore1ESPool-Publishing-Internal
       demands: ImageOverride -equals windows.vs2019.amd64
 
   steps:
@@ -66,7 +66,7 @@ jobs:
     - checkout: self
       fetchDepth: 3
       clean: true
-      
+
     - task: DownloadBuildArtifacts@0
       displayName: Download artifact
       inputs:
@@ -75,7 +75,7 @@ jobs:
         checkDownloadedFiles: true
       condition: ${{ parameters.condition }}
       continueOnError: ${{ parameters.continueOnError }}
-    
+
     - task: NuGetAuthenticate@1
 
     - task: PowerShell@2
@@ -90,7 +90,7 @@ jobs:
           /p:OfficialBuildId=$(Build.BuildNumber)
       condition: ${{ parameters.condition }}
       continueOnError: ${{ parameters.continueOnError }}
-    
+
     - task: powershell@2
       displayName: Create ReleaseConfigs Artifact
       inputs:
@@ -99,7 +99,7 @@ jobs:
           Add-Content -Path "$(Build.StagingDirectory)/ReleaseConfigs.txt" -Value $(BARBuildId)
           Add-Content -Path "$(Build.StagingDirectory)/ReleaseConfigs.txt" -Value "$(DefaultChannels)"
           Add-Content -Path "$(Build.StagingDirectory)/ReleaseConfigs.txt" -Value $(IsStableBuild)
-    
+
     - task: PublishBuildArtifacts@1
       displayName: Publish ReleaseConfigs Artifact
       inputs:
@@ -125,7 +125,7 @@ jobs:
 
     - task: PublishBuildArtifacts@1
       displayName: Publish SymbolPublishingExclusionsFile Artifact
-      condition: eq(variables['SymbolExclusionFile'], 'true') 
+      condition: eq(variables['SymbolExclusionFile'], 'true')
       inputs:
         PathtoPublish: '$(Build.SourcesDirectory)/eng/SymbolPublishingExclusionsFile.txt'
         PublishLocation: Container
@@ -141,7 +141,7 @@ jobs:
         displayName: Publish Using Darc
         inputs:
           filePath: $(Build.SourcesDirectory)/eng/common/post-build/publish-using-darc.ps1
-          arguments: -BuildId $(BARBuildId) 
+          arguments: -BuildId $(BARBuildId)
             -PublishingInfraVersion 3
             -AzdoToken '$(publishing-dnceng-devdiv-code-r-build-re)'
             -MaestroToken '$(MaestroApiAccessToken)'
@@ -152,4 +152,4 @@ jobs:
     - ${{ if eq(parameters.enablePublishBuildArtifacts, 'true') }}:
       - template: /eng/common/templates/steps/publish-logs.yml
         parameters:
-          JobLabel: 'Publish_Artifacts_Logs'     
+          JobLabel: 'Publish_Artifacts_Logs'
diff --git a/eng/common/templates/post-build/post-build.yml b/eng/common/templates/post-build/post-build.yml
index bbc010fe7326..ee70e2b399c5 100644
--- a/eng/common/templates/post-build/post-build.yml
+++ b/eng/common/templates/post-build/post-build.yml
@@ -39,7 +39,7 @@ parameters:
     displayName: Enable NuGet validation
     type: boolean
     default: true
-    
+
   - name: publishInstallersAndChecksums
     displayName: Publish installers and checksums
     type: boolean
@@ -131,8 +131,8 @@ stages:
           displayName: Validate
           inputs:
             filePath: $(Build.SourcesDirectory)/eng/common/post-build/nuget-validation.ps1
-            arguments: -PackagesPath $(Build.ArtifactStagingDirectory)/PackageArtifacts/ 
-              -ToolDestinationPath $(Agent.BuildDirectory)/Extract/ 
+            arguments: -PackagesPath $(Build.ArtifactStagingDirectory)/PackageArtifacts/
+              -ToolDestinationPath $(Agent.BuildDirectory)/Extract/
 
     - job:
       displayName: Signing Validation
@@ -222,9 +222,9 @@ stages:
           displayName: Validate
           inputs:
             filePath: $(Build.SourcesDirectory)/eng/common/post-build/sourcelink-validation.ps1
-            arguments: -InputPath $(Build.ArtifactStagingDirectory)/BlobArtifacts/ 
-              -ExtractPath $(Agent.BuildDirectory)/Extract/ 
-              -GHRepoName $(Build.Repository.Name) 
+            arguments: -InputPath $(Build.ArtifactStagingDirectory)/BlobArtifacts/
+              -ExtractPath $(Agent.BuildDirectory)/Extract/
+              -GHRepoName $(Build.Repository.Name)
               -GHCommit $(Build.SourceVersion)
               -SourcelinkCliVersion $(SourceLinkCLIVersion)
           continueOnError: true
@@ -259,7 +259,7 @@ stages:
           demands: Cmd
         # If it's not devdiv, it's dnceng
         ${{ else }}:
-          name: $(DncEngInternalBuildPool)
+          name: NetCore1ESPool-Publishing-Internal
           demands: ImageOverride -equals windows.vs2019.amd64
       steps:
         - template: setup-maestro-vars.yml
@@ -273,7 +273,7 @@ stages:
           displayName: Publish Using Darc
           inputs:
             filePath: $(Build.SourcesDirectory)/eng/common/post-build/publish-using-darc.ps1
-            arguments: -BuildId $(BARBuildId) 
+            arguments: -BuildId $(BARBuildId)
               -PublishingInfraVersion ${{ parameters.publishingInfraVersion }}
               -AzdoToken '$(publishing-dnceng-devdiv-code-r-build-re)'
               -MaestroToken '$(MaestroApiAccessToken)'
diff --git a/eng/common/templates/steps/send-to-helix.yml b/eng/common/templates/steps/send-to-helix.yml
index 3eb7e2d5f840..68fa739c4ab2 100644
--- a/eng/common/templates/steps/send-to-helix.yml
+++ b/eng/common/templates/steps/send-to-helix.yml
@@ -5,6 +5,8 @@ parameters:
   HelixBuild: $(Build.BuildNumber)       # required -- the build number Helix will use to identify this -- automatically set to the AzDO build number
   HelixTargetQueues: ''                  # required -- semicolon-delimited list of Helix queues to test on; see https://helix.dot.net/ for a list of queues
   HelixAccessToken: ''                   # required -- access token to make Helix API requests; should be provided by the appropriate variable group
+  HelixProjectPath: 'eng/common/helixpublish.proj'  # optional -- path to the project file to build relative to BUILD_SOURCESDIRECTORY
+  HelixProjectArguments: ''              # optional -- arguments passed to the build command
   HelixConfiguration: ''                 # optional -- additional property attached to a job
   HelixPreCommands: ''                   # optional -- commands to run before Helix work item execution
   HelixPostCommands: ''                  # optional -- commands to run after Helix work item execution
@@ -29,7 +31,7 @@ parameters:
   continueOnError: false                 # optional -- determines whether to continue the build if the step errors; defaults to false
 
 steps:
-  - powershell: 'powershell "$env:BUILD_SOURCESDIRECTORY\eng\common\msbuild.ps1 $env:BUILD_SOURCESDIRECTORY\eng\common\helixpublish.proj /restore /p:TreatWarningsAsErrors=false /t:Test /bl:$env:BUILD_SOURCESDIRECTORY\artifacts\log\$env:BuildConfig\SendToHelix.binlog"'
+  - powershell: 'powershell "$env:BUILD_SOURCESDIRECTORY\eng\common\msbuild.ps1 $env:BUILD_SOURCESDIRECTORY/${{ parameters.HelixProjectPath }} /restore /p:TreatWarningsAsErrors=false ${{ parameters.HelixProjectArguments }} /t:Test /bl:$env:BUILD_SOURCESDIRECTORY\artifacts\log\$env:BuildConfig\SendToHelix.binlog"'
     displayName: ${{ parameters.DisplayNamePrefix }} (Windows)
     env:
       BuildConfig: $(_BuildConfig)
@@ -59,7 +61,7 @@ steps:
       SYSTEM_ACCESSTOKEN: $(System.AccessToken)
     condition: and(${{ parameters.condition }}, eq(variables['Agent.Os'], 'Windows_NT'))
     continueOnError: ${{ parameters.continueOnError }}
-  - script: $BUILD_SOURCESDIRECTORY/eng/common/msbuild.sh $BUILD_SOURCESDIRECTORY/eng/common/helixpublish.proj /restore /p:TreatWarningsAsErrors=false /t:Test /bl:$BUILD_SOURCESDIRECTORY/artifacts/log/$BuildConfig/SendToHelix.binlog
+  - script: $BUILD_SOURCESDIRECTORY/eng/common/msbuild.sh $BUILD_SOURCESDIRECTORY/${{ parameters.HelixProjectPath }} /restore /p:TreatWarningsAsErrors=false ${{ parameters.HelixProjectArguments }} /t:Test /bl:$BUILD_SOURCESDIRECTORY/artifacts/log/$BuildConfig/SendToHelix.binlog
     displayName: ${{ parameters.DisplayNamePrefix }} (Unix)
     env:
       BuildConfig: $(_BuildConfig)
diff --git a/eng/common/templates/variables/pool-providers.yml b/eng/common/templates/variables/pool-providers.yml
index 9cc5c550d3b3..d236f9fdbb15 100644
--- a/eng/common/templates/variables/pool-providers.yml
+++ b/eng/common/templates/variables/pool-providers.yml
@@ -1,15 +1,15 @@
-# Select a pool provider based off branch name. Anything with branch name containing 'release' must go into an -Svc pool, 
+# Select a pool provider based off branch name. Anything with branch name containing 'release' must go into an -Svc pool,
 # otherwise it should go into the "normal" pools. This separates out the queueing and billing of released branches.
 
-# Motivation: 
+# Motivation:
 #   Once a given branch of a repository's output has been officially "shipped" once, it is then considered to be COGS
 #   (Cost of goods sold) and should be moved to a servicing pool provider. This allows both separation of queueing
 #   (allowing release builds and main PR builds to not intefere with each other) and billing (required for COGS.
-#   Additionally, the pool provider name itself may be subject to change when the .NET Core Engineering Services 
-#   team needs to move resources around and create new and potentially differently-named pools. Using this template 
+#   Additionally, the pool provider name itself may be subject to change when the .NET Core Engineering Services
+#   team needs to move resources around and create new and potentially differently-named pools. Using this template
 #   file from an Arcade-ified repo helps guard against both having to update one's release/* branches and renaming.
 
-# How to use: 
+# How to use:
 #  This yaml assumes your shipped product branches use the naming convention "release/..." (which many do).
 #  If we find alternate naming conventions in broad usage it can be added to the condition below.
 #
@@ -54,4 +54,4 @@ variables:
           False,
           'NetCore1ESPool-Internal'
         )
-      ]
\ No newline at end of file
+      ]
diff --git a/eng/common/tools.ps1 b/eng/common/tools.ps1
index 7d8dc89b919b..a58aef2847e1 100644
--- a/eng/common/tools.ps1
+++ b/eng/common/tools.ps1
@@ -65,6 +65,11 @@ $ErrorActionPreference = 'Stop'
 # Base-64 encoded SAS token that has permission to storage container described by $runtimeSourceFeed
 [string]$runtimeSourceFeedKey = if (Test-Path variable:runtimeSourceFeedKey) { $runtimeSourceFeedKey } else { $null }
 
+# True if the build is a product build
+[bool]$productBuild = if (Test-Path variable:productBuild) { $productBuild } else { $false }
+
+[String[]]$properties = if (Test-Path variable:properties) { $properties } else { @() }
+
 function Create-Directory ([string[]] $path) {
     New-Item -Path $path -Force -ItemType 'Directory' | Out-Null
 }
@@ -850,7 +855,8 @@ function MSBuild-Core() {
     }
 
     # When running on Azure Pipelines, override the returned exit code to avoid double logging.
-    if ($ci -and $env:SYSTEM_TEAMPROJECT -ne $null) {
+    # Skip this when the build is a child of the VMR orchestrator build.
+    if ($ci -and $env:SYSTEM_TEAMPROJECT -ne $null -and !$productBuild -and -not($properties -like "*DotNetBuildRepo=true*")) {
       Write-PipelineSetResult -Result "Failed" -Message "msbuild execution failed."
       # Exiting with an exit code causes the azure pipelines task to log yet another "noise" error
       # The above Write-PipelineSetResult will cause the task to be marked as failure without adding yet another error
diff --git a/eng/common/tools.sh b/eng/common/tools.sh
index ece4b7307953..db64e298ff63 100755
--- a/eng/common/tools.sh
+++ b/eng/common/tools.sh
@@ -68,6 +68,9 @@ fi
 runtime_source_feed=${runtime_source_feed:-''}
 runtime_source_feed_key=${runtime_source_feed_key:-''}
 
+# True if the build is a product build
+product_build=${product_build:-false}
+
 # Resolve any symlinks in the given path.
 function ResolvePath {
   local path=$1
@@ -141,7 +144,7 @@ function InitializeDotNetCli {
   if [[ $global_json_has_runtimes == false && -n "${DOTNET_INSTALL_DIR:-}" && -d "$DOTNET_INSTALL_DIR/sdk/$dotnet_sdk_version" ]]; then
     dotnet_root="$DOTNET_INSTALL_DIR"
   else
-    dotnet_root="$repo_root/.dotnet"
+    dotnet_root="${repo_root}.dotnet"
 
     export DOTNET_INSTALL_DIR="$dotnet_root"
 
@@ -503,7 +506,8 @@ function MSBuild-Core {
       echo "Build failed with exit code $exit_code. Check errors above."
 
       # When running on Azure Pipelines, override the returned exit code to avoid double logging.
-      if [[ "$ci" == "true" && -n ${SYSTEM_TEAMPROJECT:-} ]]; then
+      # Skip this when the build is a child of the VMR orchestrator build.
+      if [[ "$ci" == true && -n ${SYSTEM_TEAMPROJECT:-} && "$product_build" != true && "$properties" != *"DotNetBuildRepo=true"* ]]; then
         Write-PipelineSetResult -result "Failed" -message "msbuild execution failed."
         # Exiting with an exit code causes the azure pipelines task to log yet another "noise" error
         # The above Write-PipelineSetResult will cause the task to be marked as failure without adding yet another error
diff --git a/eng/formatting/download-tools.ps1 b/eng/formatting/download-tools.ps1
index 603a015c5e5f..62d518bb11a8 100644
--- a/eng/formatting/download-tools.ps1
+++ b/eng/formatting/download-tools.ps1
@@ -8,17 +8,25 @@ function DownloadClangTool {
         $downloadOutputPath
     )
 
-    $baseUri = "https://clrjit.blob.core.windows.net/clang-tools/windows"
+    $clangVersion = "17.0.6"
+    $clangToolsRootUrl = "https://clrjit2.blob.core.windows.net/clang-tools"
+    $clangPlatform = "windows-x64"
+
+    $toolUrl = "$clangToolsRootUrl/$clangVersion/$clangPlatform/$toolName.exe"
+    $targetPath = "$downloadOutputPath\$toolName.exe" 
 
     if (-not $(ls $downloadOutputPath | Where-Object { $_.Name -eq "$toolName.exe" })) {
 
         Retry({
-            Write-Output "Downloading '$baseUri/$toolName.exe'"
+            Write-Output "Downloading '$toolUrl' to '$targetPath'"
             # Pass -PassThru as otherwise Invoke-WebRequest leaves a corrupted file if the download fails. With -PassThru the download is buffered first.
             # -UseBasicParsing is necessary for older PowerShells when Internet Explorer might not be installed/configured
-            $null = Invoke-WebRequest -Uri "$baseUri/$toolName.exe" -OutFile $(Join-Path $downloadOutputPath -ChildPath "$toolName.exe") -PassThru -UseBasicParsing
+            $null = Invoke-WebRequest -Uri "$toolUrl" -OutFile $(Join-Path $downloadOutputPath -ChildPath "$toolName.exe") -PassThru -UseBasicParsing
         })
     }
+    else {
+        Write-Output "Found '$targetPath'"
+    }
 }
 
 $downloadPathFolder = Split-Path $PSScriptRoot -Parent | Split-Path -Parent | Join-Path -ChildPath "artifacts" | Join-Path -ChildPath "tools"
diff --git a/eng/formatting/download-tools.sh b/eng/formatting/download-tools.sh
index 44459dbc885b..023ed55ed6e0 100755
--- a/eng/formatting/download-tools.sh
+++ b/eng/formatting/download-tools.sh
@@ -15,21 +15,22 @@ done
 scriptroot="$( cd -P "$( dirname "$source" )" && pwd )"
 
 function DownloadClangTool {
-    targetPlatform=$(dotnet --info |grep RID:)
-    targetPlatform=${targetPlatform##*RID:* }
-    echo "dotnet RID: ${targetPlatform}"
+
+    clangVersion="17.0.6"
+    clangToolsRootUrl="https://clrjit2.blob.core.windows.net/clang-tools"
+
+    clangPlatform="$(dotnet --info | grep 'RID:')"
+    clangPlatform="${clangPlatform##*RID:* }"
+    echo "dotnet RID: ${clangPlatform}"
 
     # override common RIDs with compatible version so we don't need to upload binaries for each RID
-    case $targetPlatform in
-        osx.*-x64)
-            targetPlatform=osx.10.15-x64
-            ;;
+    case $clangPlatform in
         ubuntu.*-x64)
-            targetPlatform=ubuntu.18.04-x64
-            ;;
+        clangPlatform=linux-x64
+        ;;
     esac
 
-    toolUrl=https://clrjit.blob.core.windows.net/clang-tools/${targetPlatform}/$1
+    toolUrl="${clangToolsRootUrl}/${clangVersion}/${clangPlatform}/$1"
     toolOutput=$2/$1
 
     echo "Downloading $1 from ${toolUrl} to ${toolOutput}"
diff --git a/eng/install-native-dependencies.sh b/eng/install-native-dependencies.sh
index 3fa4d87a9e4a..42a3727b3188 100755
--- a/eng/install-native-dependencies.sh
+++ b/eng/install-native-dependencies.sh
@@ -20,17 +20,19 @@ case "$os" in
             . /etc/os-release
         fi
 
-        if [ "$ID" != "debian" ] && [ "$ID_LIKE" != "debian" ]; then
+        if [ "$ID" = "debian" ] || [ "$ID_LIKE" = "debian" ]; then
+            apt update
+
+            apt install -y build-essential gettext locales cmake llvm clang lldb liblldb-dev libunwind8-dev libicu-dev liblttng-ust-dev \
+                libssl-dev libkrb5-dev zlib1g-dev
+
+            localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
+        elif [ "$ID" = "alpine" ]; then
+            apk add build-base cmake bash curl clang llvm-dev krb5-dev lttng-ust-dev icu-dev zlib-dev openssl-dev
+        else
             echo "Unsupported distro. distro: $ID"
             exit 1
         fi
-
-        apt update
-
-        apt install -y build-essential gettext locales cmake llvm clang lldb liblldb-dev libunwind8-dev libicu-dev liblttng-ust-dev \
-            libssl-dev libkrb5-dev zlib1g-dev
-
-        localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
         ;;
 
     osx|maccatalyst|ios|iossimulator|tvos|tvossimulator)
diff --git a/eng/liveBuilds.targets b/eng/liveBuilds.targets
index db0010023759..c4c03648e2e3 100644
--- a/eng/liveBuilds.targets
+++ b/eng/liveBuilds.targets
@@ -188,6 +188,9 @@
         IsNative="" />
       <ExcludeNativeLibrariesRuntimeFiles Condition="'$(IncludeOOBLibraries)' != 'true'"
                                           Include="$(LibrariesNativeArtifactsPath)libSystem.IO.Ports.Native.*" />
+
+      <ExcludeNativeLibrariesRuntimeFiles Condition="'$(RuntimeFlavor)' != 'Mono'"
+                                          Include="$(LibrariesNativeArtifactsPath)System.Globalization.Native.dll;$(LibrariesNativeArtifactsPath)System.Globalization.Native.so;$(LibrariesNativeArtifactsPath)System.Globalization.Native.dylib" />
       <LibrariesRuntimeFiles Include="
         $(LibrariesNativeArtifactsPath)*.dat;
         $(LibrariesNativeArtifactsPath)*.dll;
diff --git a/eng/native/configurecompiler.cmake b/eng/native/configurecompiler.cmake
index e81ffc10e06f..3c36d74faa0f 100644
--- a/eng/native/configurecompiler.cmake
+++ b/eng/native/configurecompiler.cmake
@@ -28,7 +28,6 @@ if (CLR_CMAKE_HOST_UNIX)
         add_compile_options(-Wno-null-conversion)
         add_compile_options(-glldb)
     else()
-        add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-Werror=conversion-null>)
         add_compile_options(-g)
     endif()
 endif()
@@ -444,6 +443,8 @@ endif(CLR_CMAKE_HOST_WIN32)
 
 # Unconditionally define _FILE_OFFSET_BITS as 64 on all platforms.
 add_definitions(-D_FILE_OFFSET_BITS=64)
+# Unconditionally define _TIME_BITS as 64 on all platforms.
+add_definitions(-D_TIME_BITS=64)
 
 # Architecture specific files folder name
 if (CLR_CMAKE_TARGET_ARCH_AMD64)
@@ -618,6 +619,9 @@ if (CLR_CMAKE_HOST_UNIX)
     # other clang 16.0 suppressions
     add_compile_options(-Wno-single-bit-bitfield-constant-conversion)
     add_compile_options(-Wno-cast-function-type-strict)
+
+    # clang 18.1 supressions
+    add_compile_options(-Wno-switch-default)
   else()
     add_compile_options(-Wno-uninitialized)
     add_compile_options(-Wno-strict-aliasing)
@@ -854,10 +858,32 @@ if (MSVC)
   add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4013>) # 'function' undefined - assuming extern returning int.
   add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4102>) # "'%$S' : unreferenced label".
   add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4551>) # Function call missing argument list.
-  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4700>) # Local used w/o being initialized.
   add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4640>) # 'instance' : construction of local static object is not thread-safe
   add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4806>) # Unsafe operation involving type 'bool'.
 
+  # SDL requires the below warnings to be treated as errors:
+  # More info: https://liquid.microsoft.com/Web/Object/Read/ms.security/Requirements/Microsoft.Security.SystemsADM.10086
+  # (Access to that URL restricted to Microsoft employees.)
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4055>) # 'conversion' : from data pointer 'type1' to function pointer 'type2'
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4146>) # unary minus operator applied to unsigned type, result still unsigned
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4242>) # 'identifier' : conversion from 'type1' to 'type2', possible loss of data
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4244>) # 'conversion' conversion from 'type1' to 'type2', possible loss of data
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4267>) # 'var' : conversion from 'size_t' to 'type', possible loss of data
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4302>) # 'conversion' : truncation from 'type 1' to 'type 2'
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4308>) # negative integral constant converted to unsigned type
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4509>) # nonstandard extension used: 'function' uses SEH and 'object' has destructor
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4510>) # 'class' : default constructor could not be generated
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4532>) # 'continue' : jump out of __finally/finally block has undefined behavior during termination handling
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4533>) # initialization of 'variable' is skipped by 'instruction'
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4610>) # object 'class' can never be instantiated - user-defined constructor required
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4611>) # interaction between 'function' and C++ object destruction is non-portable
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4700>) # uninitialized local variable 'name' used
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4701>) # Potentially uninitialized local variable 'name' used
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4703>) # Potentially uninitialized local pointer variable 'name' used
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4789>) # destination of memory copy is too small
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4995>) # 'function': name was marked as #pragma deprecated
+  add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/we4996>) # 'function': was declared deprecated
+
   # Set Warning Level 3:
   add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/w34092>) # Sizeof returns 'unsigned long'.
   add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/w34121>) # Structure is sensitive to alignment.
diff --git a/eng/native/configureplatform.cmake b/eng/native/configureplatform.cmake
index 91f8edc6b0b9..08f98049ebc0 100644
--- a/eng/native/configureplatform.cmake
+++ b/eng/native/configureplatform.cmake
@@ -27,6 +27,8 @@ if(CLR_CMAKE_HOST_OS STREQUAL linux)
             endif()
         elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL i686)
             set(CLR_CMAKE_HOST_UNIX_X86 1)
+        elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL aarch64)
+            set(CLR_CMAKE_HOST_UNIX_ARM64 1)
         else()
             clr_unknown_arch()
         endif()
diff --git a/eng/native/functions.cmake b/eng/native/functions.cmake
index 543722a9c0a5..6629e926afac 100644
--- a/eng/native/functions.cmake
+++ b/eng/native/functions.cmake
@@ -1,8 +1,8 @@
 function(clr_unknown_arch)
     if (WIN32)
-        message(FATAL_ERROR "Only AMD64, ARM64, ARM and I386 are supported. Found: ${CMAKE_SYSTEM_PROCESSOR}")
+        message(FATAL_ERROR "Only AMD64, ARM64, ARM and I386 hosts are supported. Found: ${CMAKE_SYSTEM_PROCESSOR}")
     elseif(CLR_CROSS_COMPONENTS_BUILD)
-        message(FATAL_ERROR "Only AMD64, I386 host are supported for linux cross-architecture component. Found: ${CMAKE_SYSTEM_PROCESSOR}")
+        message(FATAL_ERROR "Only AMD64, ARM64 and I386 hosts are supported for linux cross-architecture component. Found: ${CMAKE_SYSTEM_PROCESSOR}")
     else()
         message(FATAL_ERROR "'${CMAKE_SYSTEM_PROCESSOR}' is an unsupported architecture.")
     endif()
@@ -220,6 +220,12 @@ endfunction(convert_to_absolute_path)
 function(preprocess_file inputFilename outputFilename)
   get_compile_definitions(PREPROCESS_DEFINITIONS)
   get_include_directories(PREPROCESS_INCLUDE_DIRECTORIES)
+  get_source_file_property(SOURCE_FILE_DEFINITIONS ${inputFilename} COMPILE_DEFINITIONS)
+  
+  foreach(DEFINITION IN LISTS SOURCE_FILE_DEFINITIONS)
+    list(APPEND PREPROCESS_DEFINITIONS -D${DEFINITION})
+  endforeach()
+
   if (MSVC)
     add_custom_command(
         OUTPUT ${outputFilename}
@@ -228,9 +234,12 @@ function(preprocess_file inputFilename outputFilename)
         COMMENT "Preprocessing ${inputFilename}. Outputting to ${outputFilename}"
     )
   else()
+    if (CMAKE_CXX_COMPILER_TARGET AND CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+      set(_LOCAL_CROSS_TARGET "--target=${CMAKE_CXX_COMPILER_TARGET}")
+    endif()
     add_custom_command(
         OUTPUT ${outputFilename}
-        COMMAND ${CMAKE_CXX_COMPILER} -E -P ${PREPROCESS_DEFINITIONS} ${PREPROCESS_INCLUDE_DIRECTORIES} -o ${outputFilename} -x c ${inputFilename}
+        COMMAND ${CMAKE_CXX_COMPILER} ${_LOCAL_CROSS_TARGET} -E -P ${PREPROCESS_DEFINITIONS} ${PREPROCESS_INCLUDE_DIRECTORIES} -o ${outputFilename} -x c ${inputFilename}
         DEPENDS ${inputFilename}
         COMMENT "Preprocessing ${inputFilename}. Outputting to ${outputFilename}"
     )
@@ -369,7 +378,11 @@ endfunction()
 function (get_symbol_file_name targetName outputSymbolFilename)
   if (CLR_CMAKE_HOST_UNIX)
     if (CLR_CMAKE_TARGET_APPLE)
-      set(strip_destination_file $<TARGET_FILE:${targetName}>.dwarf)
+      if (CLR_CMAKE_APPLE_DSYM)
+        set(strip_destination_file $<TARGET_FILE:${targetName}>.dSYM)
+      else ()
+        set(strip_destination_file $<TARGET_FILE:${targetName}>.dwarf)
+      endif ()
     else ()
       set(strip_destination_file $<TARGET_FILE:${targetName}>.dbg)
     endif ()
@@ -416,7 +429,9 @@ function(strip_symbols targetName outputFilename)
         OUTPUT_VARIABLE DSYMUTIL_HELP_OUTPUT
       )
 
-      set(DSYMUTIL_OPTS "--flat")
+      if (NOT CLR_CMAKE_APPLE_DSYM)
+        set(DSYMUTIL_OPTS "--flat")
+      endif ()
       if ("${DSYMUTIL_HELP_OUTPUT}" MATCHES "--minimize")
         list(APPEND DSYMUTIL_OPTS "--minimize")
       endif ()
diff --git a/eng/native/tryrun.cmake b/eng/native/tryrun.cmake
index a239af9fc937..a4cc7eaf9310 100644
--- a/eng/native/tryrun.cmake
+++ b/eng/native/tryrun.cmake
@@ -58,15 +58,6 @@ if(DARWIN)
     set_cache_value(HAVE_CLOCK_REALTIME_EXITCODE 0)
     set_cache_value(HAVE_CLOCK_THREAD_CPUTIME_EXITCODE 0)
     set_cache_value(HAVE_CLOCK_GETTIME_NSEC_NP_EXITCODE 0)
-    set_cache_value(HAVE_COMPATIBLE_ACOS_EXITCODE 0)
-    set_cache_value(HAVE_COMPATIBLE_ASIN_EXITCODE 0)
-    set_cache_value(HAVE_COMPATIBLE_ATAN2_EXITCODE 0)
-    set_cache_value(HAVE_COMPATIBLE_EXP_EXITCODE 1)
-    set_cache_value(HAVE_COMPATIBLE_ILOGB0_EXITCODE 0)
-    set_cache_value(HAVE_COMPATIBLE_ILOGBNAN_EXITCODE 1)
-    set_cache_value(HAVE_COMPATIBLE_LOG10_EXITCODE 0)
-    set_cache_value(HAVE_COMPATIBLE_LOG_EXITCODE 0)
-    set_cache_value(HAVE_COMPATIBLE_POW_EXITCODE 0)
     set_cache_value(HAVE_FUNCTIONAL_PTHREAD_ROBUST_MUTEXES_EXITCODE 1)
     set_cache_value(HAVE_LARGE_SNPRINTF_SUPPORT_EXITCODE 0)
     set_cache_value(HAVE_MMAP_DEV_ZERO_EXITCODE 1)
@@ -75,8 +66,6 @@ if(DARWIN)
     set_cache_value(HAVE_PROCFS_STATM_EXITCODE 1)
     set_cache_value(HAVE_SCHED_GETCPU_EXITCODE 1)
     set_cache_value(HAVE_SCHED_GET_PRIORITY_EXITCODE 0)
-    set_cache_value(HAVE_VALID_NEGATIVE_INF_POW_EXITCODE 0)
-    set_cache_value(HAVE_VALID_POSITIVE_INF_POW_EXITCODE 0)
     set_cache_value(HAVE_WORKING_CLOCK_GETTIME_EXITCODE 0)
     set_cache_value(HAVE_WORKING_GETTIMEOFDAY_EXITCODE 0)
     set_cache_value(MMAP_ANON_IGNORES_PROTECTION_EXITCODE 1)
@@ -97,14 +86,6 @@ elseif(TARGET_ARCH_NAME MATCHES "^(armel|arm|armv6|arm64|loongarch64|riscv64|s39
   set_cache_value(HAVE_CLOCK_MONOTONIC_EXITCODE 0)
   set_cache_value(HAVE_CLOCK_REALTIME_EXITCODE 0)
   set_cache_value(HAVE_CLOCK_THREAD_CPUTIME_EXITCODE 0)
-  set_cache_value(HAVE_COMPATIBLE_ACOS_EXITCODE 0)
-  set_cache_value(HAVE_COMPATIBLE_ASIN_EXITCODE 0)
-  set_cache_value(HAVE_COMPATIBLE_ATAN2_EXITCODE 0)
-  set_cache_value(HAVE_COMPATIBLE_ILOGB0_EXITCODE 1)
-  set_cache_value(HAVE_COMPATIBLE_ILOGBNAN_EXITCODE 1)
-  set_cache_value(HAVE_COMPATIBLE_LOG10_EXITCODE 0)
-  set_cache_value(HAVE_COMPATIBLE_LOG_EXITCODE 0)
-  set_cache_value(HAVE_COMPATIBLE_POW_EXITCODE 0)
   set_cache_value(HAVE_LARGE_SNPRINTF_SUPPORT_EXITCODE 0)
   set_cache_value(HAVE_MMAP_DEV_ZERO_EXITCODE 0)
   set_cache_value(HAVE_PROCFS_CTL_EXITCODE 1)
@@ -112,8 +93,6 @@ elseif(TARGET_ARCH_NAME MATCHES "^(armel|arm|armv6|arm64|loongarch64|riscv64|s39
   set_cache_value(HAVE_PROCFS_STATM_EXITCODE 0)
   set_cache_value(HAVE_SCHED_GETCPU_EXITCODE 0)
   set_cache_value(HAVE_SCHED_GET_PRIORITY_EXITCODE 0)
-  set_cache_value(HAVE_VALID_NEGATIVE_INF_POW_EXITCODE 0)
-  set_cache_value(HAVE_VALID_POSITIVE_INF_POW_EXITCODE 0)
   set_cache_value(HAVE_WORKING_CLOCK_GETTIME_EXITCODE 0)
   set_cache_value(HAVE_WORKING_GETTIMEOFDAY_EXITCODE 0)
   set_cache_value(ONE_SHARED_MAPPING_PER_FILEREGION_PER_PROCESS_EXITCODE 1)
@@ -140,16 +119,8 @@ elseif(TARGET_ARCH_NAME MATCHES "^(armel|arm|armv6|arm64|loongarch64|riscv64|s39
     set_cache_value(HAVE_PROCFS_STAT 0)
     set_cache_value(HAVE_PROCFS_STATM 0)
     set_cache_value(UNGETC_NOT_RETURN_EOF 0)
-    set_cache_value(HAVE_COMPATIBLE_ILOGBNAN 1)
     set_cache_value(HAVE_FUNCTIONAL_PTHREAD_ROBUST_MUTEXES_EXITCODE 0)
   elseif(ILLUMOS)
-    set_cache_value(HAVE_COMPATIBLE_ACOS_EXITCODE 1)
-    set_cache_value(HAVE_COMPATIBLE_ASIN_EXITCODE 1)
-    set_cache_value(HAVE_COMPATIBLE_ATAN2_EXITCODE 1)
-    set_cache_value(HAVE_COMPATIBLE_POW_EXITCODE 1)
-    set_cache_value(HAVE_COMPATIBLE_ILOGBNAN_EXITCODE 0)
-    set_cache_value(HAVE_COMPATIBLE_LOG10_EXITCODE 1)
-    set_cache_value(HAVE_COMPATIBLE_LOG_EXITCODE 1)
     set_cache_value(HAVE_LARGE_SNPRINTF_SUPPORT_EXITCODE 1)
     set_cache_value(HAVE_PROCFS_CTL_EXITCODE 0)
     set_cache_value(SSCANF_SUPPORT_ll_EXITCODE 1)
@@ -162,8 +133,6 @@ elseif(TARGET_ARCH_NAME MATCHES "^(armel|arm|armv6|arm64|loongarch64|riscv64|s39
     set_cache_value(HAVE_FUNCTIONAL_PTHREAD_ROBUST_MUTEXES_EXITCODE 0)
   elseif(HAIKU)
     set_cache_value(HAVE_CLOCK_MONOTONIC_COARSE_EXITCODE 1)
-    set_cache_value(HAVE_COMPATIBLE_EXP_EXITCODE 0)
-    set_cache_value(HAVE_COMPATIBLE_ILOGBNAN_EXITCODE 0)
     set_cache_value(HAVE_PROCFS_STAT_EXITCODE 1)
     set_cache_value(HAVE_PROCFS_STATM_EXITCODE 1)
   endif()
diff --git a/eng/native/tryrun_ios_tvos.cmake b/eng/native/tryrun_ios_tvos.cmake
index 790c802c30ac..e3dfeb62780b 100644
--- a/eng/native/tryrun_ios_tvos.cmake
+++ b/eng/native/tryrun_ios_tvos.cmake
@@ -17,15 +17,6 @@ set_cache_value(HAVE_BROKEN_FIFO_SELECT_EXITCODE 1)
 set_cache_value(HAVE_CLOCK_REALTIME_EXITCODE 0)
 set_cache_value(HAVE_CLOCK_THREAD_CPUTIME_EXITCODE 0)
 set_cache_value(HAVE_CLOCK_GETTIME_NSEC_NP_EXITCODE 0)
-set_cache_value(HAVE_COMPATIBLE_ACOS_EXITCODE 0)
-set_cache_value(HAVE_COMPATIBLE_ASIN_EXITCODE 0)
-set_cache_value(HAVE_COMPATIBLE_ATAN2_EXITCODE 0)
-set_cache_value(HAVE_COMPATIBLE_EXP_EXITCODE 1)
-set_cache_value(HAVE_COMPATIBLE_ILOGB0_EXITCODE 0)
-set_cache_value(HAVE_COMPATIBLE_ILOGBNAN_EXITCODE 1)
-set_cache_value(HAVE_COMPATIBLE_LOG10_EXITCODE 0)
-set_cache_value(HAVE_COMPATIBLE_LOG_EXITCODE 0)
-set_cache_value(HAVE_COMPATIBLE_POW_EXITCODE 0)
 set_cache_value(HAVE_FUNCTIONAL_PTHREAD_ROBUST_MUTEXES_EXITCODE 1)
 set_cache_value(HAVE_LARGE_SNPRINTF_SUPPORT_EXITCODE 0)
 set_cache_value(HAVE_MMAP_DEV_ZERO_EXITCODE 1)
@@ -33,8 +24,6 @@ set_cache_value(HAVE_PROCFS_CTL_EXITCODE 1)
 set_cache_value(HAVE_PROCFS_STAT_EXITCODE 1)
 set_cache_value(HAVE_PROCFS_STATM_EXITCODE 1)
 set_cache_value(HAVE_SCHED_GET_PRIORITY_EXITCODE 0)
-set_cache_value(HAVE_VALID_NEGATIVE_INF_POW_EXITCODE 0)
-set_cache_value(HAVE_VALID_POSITIVE_INF_POW_EXITCODE 0)
 set_cache_value(HAVE_WORKING_CLOCK_GETTIME_EXITCODE 0)
 set_cache_value(HAVE_WORKING_GETTIMEOFDAY_EXITCODE 0)
 set_cache_value(MMAP_ANON_IGNORES_PROTECTION_EXITCODE 1)
diff --git a/eng/nativepgo.targets b/eng/nativepgo.targets
index 9f5984efdb63..99344e20b8e2 100644
--- a/eng/nativepgo.targets
+++ b/eng/nativepgo.targets
@@ -9,7 +9,7 @@
     <_NativeOptimizationDataPackageTarget Condition="'$(TargetOS)' == 'windows'">windows_nt-$(TargetArchitecture.ToLower())</_NativeOptimizationDataPackageTarget>
 
   </PropertyGroup>
-  <ItemGroup Condition="'$(optimizationPGOCoreCLRVersion)'!='' and '$(DotNetBuildFromSource)' != 'true'">
+  <ItemGroup Condition="'$(optimizationPGOCoreCLRVersion)'!='' and '$(DotNetBuildSourceOnly)' != 'true'">
     <PackageReference Include="optimization.windows_nt-x64.PGO.CoreCLR"
       Version="$(optimizationPGOCoreCLRVersion)"
       GeneratePathProperty="true" />
diff --git a/eng/packaging.targets b/eng/packaging.targets
index fde0ccb624b8..554e705e1887 100644
--- a/eng/packaging.targets
+++ b/eng/packaging.targets
@@ -4,12 +4,12 @@
     <!-- Package Validation isn't helpful when authoring shared framework packages. -->
     <EnablePackageValidation Condition="'$(EnablePackageValidation)' == '' and
                                         '$(UsingMicrosoftDotNetSharedFrameworkSdk)' != 'true' and
-                                        '$(DotNetBuildFromSource)' != 'true'">true</EnablePackageValidation>
+                                        '$(DotNetBuildSourceOnly)' != 'true'">true</EnablePackageValidation>
     <!-- Don't perform baseline validation if we don't have a stable prebuilt version.
          Don't attempt to restore prebuilts during source-build. -->
     <DisablePackageBaselineValidation Condition="'$(IsShipping)' == 'false' or
                                                  '$(SuppressFinalPackageVersion)' == 'true' or
-                                                 '$(DotNetBuildFromSource)' == 'true'">true</DisablePackageBaselineValidation>
+                                                 '$(DotNetBuildSourceOnly)' == 'true'">true</DisablePackageBaselineValidation>
     <PackageValidationBaselineVersion Condition="'$(PackageValidationBaselineVersion)' == ''">$(ApiCompatNetCoreAppBaselineVersion)</PackageValidationBaselineVersion>
 
     <BeforePack>$(BeforePack);IncludeAnalyzersInPackage;AddNETStandardCompatErrorFileForPackaging</BeforePack>
@@ -39,7 +39,7 @@
     <!-- When in source-build we need to generate all packages when building for all configurations even in servicing. -->
     <GeneratePackageOnBuild Condition="'$(GeneratePackageOnBuild)' != 'true' and
                                        '$(BuildAllConfigurations)' == 'true' and
-                                       '$(DotNetBuildFromSource)' == 'true'">true</GeneratePackageOnBuild>
+                                       '$(DotNetBuildSourceOnly)' == 'true'">true</GeneratePackageOnBuild>
 
     <!-- During NoBuild pack invocations, skip project reference build. Necessary for the IncludeProjectReferencesWithPackAttributeInPackage target. -->
     <BuildProjectReferences Condition="'$(NoBuild)' == 'true'">false</BuildProjectReferences>
@@ -307,7 +307,7 @@
   <Target Name="ValidateServicingVersionIsProperlySet"
           Condition="'$(PreReleaseVersionLabel)' == 'servicing' and
                      '$(PackageUseIncrementalServicingVersion)' == 'true' and
-                     '$(DotNetBuildFromSource)' != 'true'"
+                     '$(DotNetBuildSourceOnly)' != 'true'"
           AfterTargets="GenerateNuspec">
     <Error Condition="'$(ServicingVersion)' == '0'" Text="ServicingVersion is set to 0 and it should be an increment of the patch version from the last released package." />
   </Target>
diff --git a/eng/pipelines/common/build-coreclr-and-libraries-job.yml b/eng/pipelines/common/build-coreclr-and-libraries-job.yml
deleted file mode 100644
index 71cefc5dbbd0..000000000000
--- a/eng/pipelines/common/build-coreclr-and-libraries-job.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-parameters:
-  buildConfig: ''
-  archType: ''
-  osGroup: ''
-  osSubgroup: ''
-  targetRid: ''
-  container: ''
-  testGroup: ''
-  crossBuild: false
-  timeoutInMinutes: ''
-  signBinaries: false
-  variables: {}
-  pool: ''
-  platform: ''
-  testBuildPlatforms: []
-  disableComponentGovernance: false
-
-jobs:
-- template: /eng/pipelines/coreclr/templates/build-job.yml
-  parameters:
-    buildConfig: ${{ parameters.buildConfig }}
-    archType: ${{ parameters.archType }}
-    osGroup: ${{ parameters.osGroup }}
-    osSubgroup: ${{ parameters.osSubgroup }}
-    targetRid: ${{ parameters.targetRid }}
-    container: ${{ parameters.container }}
-    testGroup: ${{ parameters.testGroup }}
-    crossBuild: ${{ parameters.crossBuild }}
-    timeoutInminutes: ${{ parameters.timeoutInMinutes }}
-    signBinaries: ${{ parameters.signBinaries }}
-    variables: ${{ parameters.variables }}
-    pool: ${{ parameters.pool }}
-    disableComponentGovernance: ${{ parameters.disableComponentGovernance }}
-
-- template: /eng/pipelines/libraries/build-job.yml
-  parameters:
-    ${{ if eq(parameters.buildConfig, 'debug') }}:
-      buildConfig: Debug
-    ${{ if ne(parameters.buildConfig, 'debug') }}:
-      buildConfig: Release
-    archType: ${{ parameters.archType }}
-    osGroup: ${{ parameters.osGroup }}
-    osSubgroup: ${{ parameters.osSubgroup }}
-    container: ${{ parameters.container }}
-    testGroup: ${{ parameters.testGroup }}
-    crossBuild: ${{ parameters.crossBuild }}
-    timeoutInminutes: ${{ parameters.timeoutInMinutes }}
-    variables: ${{ parameters.variables }}
-    pool: ${{ parameters.pool }}
-    liveRuntimeBuildConfig: ${{ parameters.buildConfig }}
-    platform: ${{ parameters.platform }}
-    testBuildPlatforms: ${{ parameters.testBuildPlatforms }}
-    disableComponentGovernance: ${{ parameters.disableComponentGovernance }}
diff --git a/eng/pipelines/common/evaluate-default-paths.yml b/eng/pipelines/common/evaluate-default-paths.yml
index a5b40862c30c..edbc1c618f60 100644
--- a/eng/pipelines/common/evaluate-default-paths.yml
+++ b/eng/pipelines/common/evaluate-default-paths.yml
@@ -28,7 +28,7 @@ parameters:
         src/mono/nuget/Microsoft.NET.Runtime.wasm.Sample.Mono/*
         src/mono/nuget/Microsoft.NET.Sdk.WebAssembly.Pack/*
         src/mono/nuget/Microsoft.NETCore.BrowserDebugHost.Transport/*
-        src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/*
+        src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/**/*
         src/mono/nuget/Microsoft.NET.Workload*
         src/mono/sample/wasm/*
         src/mono/browser/*
@@ -213,7 +213,7 @@ jobs:
       - eng/testing/scenarios/BuildWasmAppsJobsList.txt
       - eng/testing/tests.browser.targets
       - eng/testing/tests.was*.targets
-      - src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/WorkloadTesting.Core.targets
+      - src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/Sdk/WorkloadTesting.Core.targets
       - eng/testing/workloads-browser.targets
       - eng/testing/workloads-testing.targets
       - eng/testing/workloads-wasi.targets
@@ -303,7 +303,7 @@ jobs:
       exclude:
       - eng/testing/scenarios/BuildWasiAppsJobsList.txt
       - eng/testing/scenarios/BuildWasmAppsJobsList.txt
-        src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/WorkloadTesting.Core.targets
+        src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/Sdk/WorkloadTesting.Core.targets
       - eng/testing/workloads-browser.targets
       - eng/testing/workloads-testing.targets
       - eng/testing/workloads-wasi.targets
diff --git a/eng/pipelines/common/evaluate-paths-job.yml b/eng/pipelines/common/evaluate-paths-job.yml
index 64c3a12584c2..696e9ac909f8 100644
--- a/eng/pipelines/common/evaluate-paths-job.yml
+++ b/eng/pipelines/common/evaluate-paths-job.yml
@@ -11,7 +11,7 @@ parameters:
   #     1st we evaluate changes for all paths except ones in excluded list. If we can't find
   #     any applicable changes like that, then we evaluate changes for included paths
   #     if any of these two finds changes, then a variable will be set to true.
-  #  In order to consume this variable you need to reference it via: $[ dependencies.evaluate_paths.outputs['SetPathVars_<subset>.containschange'] ]
+  #  In order to consume this variable you need to reference it via: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_<subset>.containschange'] ]
   #
   #  Array form example
   #  paths:
diff --git a/eng/pipelines/common/global-build-job.yml b/eng/pipelines/common/global-build-job.yml
index 5862568033f5..b9cc92597319 100644
--- a/eng/pipelines/common/global-build-job.yml
+++ b/eng/pipelines/common/global-build-job.yml
@@ -12,12 +12,18 @@ parameters:
   targetRid: ''
   timeoutInMinutes: ''
   dependsOn: []
+  # The following parameter is used to specify dependencies on other global build for the same platform.
+  # We provide this mechanism to allow for global builds to depend on other global builds and use the multiplexing
+  # that platform-matrix.yml enables.
+  # Each item can have the following properties:
+  # - nameSuffix: The suffix of the job name to depend on.
+  # - buildConfig: The configuration of the job to depend on.
+  dependsOnGlobalBuilds: []
   pool: ''
   platform: ''
   condition: true
   useContinueOnErrorDuringBuild: false
   shouldContinueOnError: false
-  dependOnEvaluatePaths: false
   isOfficialBuild: false
   isSourceBuild: false
   isNonPortableSourceBuild: false
@@ -32,9 +38,11 @@ parameters:
   preBuildSteps: []
   enableRichCodeNavigation: false
   richCodeNavigationLanguage: 'csharp'
+  disableComponentGovernance: ''
+  templatePath: 'templates'
 
 jobs:
-- template: /eng/common/templates/job/job.yml
+- template: /eng/common/${{ parameters.templatePath }}/job/job.yml
   parameters:
     ${{ if eq(parameters.hostedOs, '') }}:
       name: ${{ format('build_{0}{1}_{2}_{3}_{4}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig, parameters.nameSuffix) }}
@@ -52,21 +60,29 @@ jobs:
     enableRichCodeNavigation: ${{ parameters.enableRichCodeNavigation }}
     richCodeNavigationLanguage: ${{ parameters.richCodeNavigationLanguage }}
 
+    artifacts:
+      publish:
+        logs:
+          ${{ if notin(parameters.osGroup, 'browser', 'wasi') }}:
+            name: Logs_Build_Attempt$(System.JobAttempt)_${{ parameters.osGroup }}_${{ parameters.osSubGroup }}_${{ parameters.archType }}_${{ parameters.buildConfig }}_${{ parameters.nameSuffix }}
+          ${{ if in(parameters.osGroup, 'browser', 'wasi') }}:
+            name: Logs_Build_Attempt$(System.JobAttempt)_${{ parameters.osGroup }}_${{ parameters.archType }}_${{ parameters.hostedOs }}_${{ parameters.buildConfig }}_${{ parameters.nameSuffix }}
+
     # Component governance does not work on musl machines
     ${{ if eq(parameters.osSubGroup, '_musl') }}:
       disableComponentGovernance: true
+    ${{ else }}:
+      disableComponentGovernance: ${{ parameters.disableComponentGovernance }}
 
     workspace:
       clean: all
 
-    ${{ if and(ne(parameters.dependOnEvaluatePaths, true),ne(parameters.dependsOn,'')) }}:
-      dependsOn: ${{ parameters.dependsOn }}
-
-    ${{ if eq(parameters.dependOnEvaluatePaths, true) }}:
+    ${{ if or(ne(parameters.dependsOn,''), ne(parameters.dependsOnGlobalBuilds,'')) }}:
       dependsOn:
-      - evaluate_paths
-      - ${{ if ne(parameters.dependsOn,'') }}:
-        - ${{ parameters.dependsOn }}
+      - ${{ each build in parameters.dependsOn }}:
+        - ${{ build }}
+      - ${{ each globalBuild in parameters.dependsOnGlobalBuilds }}:
+        - ${{ format('build_{0}{1}_{2}_{3}_{4}', parameters.osGroup, parameters.osSubgroup, parameters.archType, coalesce(globalBuild.buildConfig, parameters.buildConfig), globalBuild.nameSuffix) }}
 
     variables:
       - ${{ if eq(variables['System.TeamProject'], 'internal') }}:
@@ -131,6 +147,7 @@ jobs:
 
       - ${{ each variable in parameters.variables }}:
         - ${{ variable }}
+
     steps:
     - ${{ if eq(parameters.osGroup, 'windows') }}:
       - template: /eng/pipelines/common/templates/disable-vsupdate-or-failfast.yml
@@ -171,7 +188,7 @@ jobs:
           path: '$(Build.SourcesDirectory)/artifacts/obj/mono/offsetfiles'
 
     - ${{ if eq(parameters.isSourceBuild, true) }}:
-      - template: /eng/common/templates/steps/source-build.yml
+      - template: /eng/common/${{ parameters.templatePath }}/steps/source-build.yml
         parameters:
           platform:
             baseOS: ${{ parameters.baseOS }}
@@ -295,15 +312,3 @@ jobs:
         - powershell: ./eng/collect_vsinfo.ps1 -ArchiveRunName postbuild_log
           displayName: Collect vslogs on exit
           condition: always()
-
-    - task: PublishBuildArtifacts@1
-      displayName: Publish Logs
-      inputs:
-        PathtoPublish: '$(Build.SourcesDirectory)/artifacts/log/'
-        PublishLocation: Container
-        ${{ if notin(parameters.osGroup, 'browser', 'wasi') }}:
-          ArtifactName: Logs_Build_Attempt$(System.JobAttempt)_${{ parameters.osGroup }}_${{ parameters.osSubGroup }}_${{ parameters.archType }}_${{ parameters.buildConfig }}_${{ parameters.nameSuffix }}
-        ${{ if in(parameters.osGroup, 'browser', 'wasi') }}:
-          ArtifactName: Logs_Build_Attempt$(System.JobAttempt)_${{ parameters.osGroup }}_${{ parameters.archType }}_${{ parameters.hostedOs }}_${{ parameters.buildConfig }}_${{ parameters.nameSuffix }}
-      continueOnError: true
-      condition: always()
diff --git a/eng/pipelines/common/platform-matrix.yml b/eng/pipelines/common/platform-matrix.yml
index 776cdae314c4..7245e5216804 100644
--- a/eng/pipelines/common/platform-matrix.yml
+++ b/eng/pipelines/common/platform-matrix.yml
@@ -158,6 +158,30 @@ jobs:
         crossBuild: true
         ${{ insert }}: ${{ parameters.jobParameters }}
 
+# Linux Bionic arm
+
+- ${{ if containsValue(parameters.platforms, 'linux_bionic_arm') }}:
+  - template: xplat-setup.yml
+    parameters:
+      jobTemplate: ${{ parameters.jobTemplate }}
+      helixQueuesTemplate: ${{ parameters.helixQueuesTemplate }}
+      variables: ${{ parameters.variables }}
+      osGroup: linux
+      osSubgroup: _bionic
+      archType: arm
+      targetRid: linux-bionic-arm
+      platform: linux_bionic_arm
+      shouldContinueOnError: ${{ parameters.shouldContinueOnError }}
+      container: linux_bionic
+      jobParameters:
+        runtimeFlavor: mono
+        # We build on Linux, but the test queue runs Windows, so
+        # we need to override the test script generation
+        runScriptWindowsCmd: true
+        buildConfig: ${{ parameters.buildConfig }}
+        helixQueueGroup: ${{ parameters.helixQueueGroup }}
+        ${{ insert }}: ${{ parameters.jobParameters }}
+
 # Linux Bionic arm64
 
 - ${{ if containsValue(parameters.platforms, 'linux_bionic_arm64') }}:
@@ -248,7 +272,6 @@ jobs:
         buildConfig: ${{ parameters.buildConfig }}
         helixQueueGroup: ${{ parameters.helixQueueGroup }}
         crossBuild: true
-        disableClrTest: true
         ${{ insert }}: ${{ parameters.jobParameters }}
 
 # Runtime-dev-innerloop build
@@ -439,7 +462,6 @@ jobs:
         buildConfig: ${{ parameters.buildConfig }}
         helixQueueGroup: ${{ parameters.helixQueueGroup }}
         crossBuild: true
-        disableClrTest: true
         ${{ insert }}: ${{ parameters.jobParameters }}
 
 # WASI WebAssembly
@@ -896,7 +918,6 @@ jobs:
         buildConfig: ${{ parameters.buildConfig }}
         helixQueueGroup: ${{ parameters.helixQueueGroup }}
         crossBuild: true
-        disableClrTest: true
         ${{ insert }}: ${{ parameters.jobParameters }}
 
 # Windows x64
diff --git a/eng/pipelines/common/templates/global-build-step.yml b/eng/pipelines/common/templates/global-build-step.yml
index 7f38a9fd1184..b88207c516c1 100644
--- a/eng/pipelines/common/templates/global-build-step.yml
+++ b/eng/pipelines/common/templates/global-build-step.yml
@@ -3,10 +3,16 @@ parameters:
   useContinueOnErrorDuringBuild: false
   shouldContinueOnError: false
   archParameter: $(_archParameter)
+  crossArg: $(crossArg)
   displayName: Build product
+  container: ''
+  condition: succeeded()
 
 steps:
-  - script: $(Build.SourcesDirectory)$(dir)build$(scriptExt) -ci ${{ parameters.archParameter }} $(_osParameter) $(crossArg) ${{ parameters.buildArgs }} $(_officialBuildParameter) $(_buildDarwinFrameworksParameter) $(_overrideTestScriptWindowsCmdParameter)
+  - script: $(Build.SourcesDirectory)$(dir)build$(scriptExt) -ci ${{ parameters.archParameter }} $(_osParameter) ${{ parameters.crossArg }} ${{ parameters.buildArgs }} $(_officialBuildParameter) $(_buildDarwinFrameworksParameter) $(_overrideTestScriptWindowsCmdParameter)
     displayName: ${{ parameters.displayName }}
     ${{ if eq(parameters.useContinueOnErrorDuringBuild, true) }}:
       continueOnError: ${{ parameters.shouldContinueOnError }}
+    ${{ if ne(parameters.container, '') }}:
+      target: ${{ parameters.container }}
+    condition: ${{ parameters.condition }}
diff --git a/eng/pipelines/common/templates/pipeline-with-resources.yml b/eng/pipelines/common/templates/pipeline-with-resources.yml
index 7b41d3c2abc6..5ae3a3f7a38f 100644
--- a/eng/pipelines/common/templates/pipeline-with-resources.yml
+++ b/eng/pipelines/common/templates/pipeline-with-resources.yml
@@ -1,114 +1,124 @@
 parameters:
   - name: stages
     type: stageList
-
-resources:
-  containers:
-    - container: linux_arm
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-arm
-      env:
-        ROOTFS_DIR: /crossrootfs/arm
-
-    - container: linux_armv6
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-20.04-cross-armv6-raspbian-10
-      env:
-        ROOTFS_DIR: /crossrootfs/armv6
-
-    - container: linux_arm64
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-biarch-amd64-arm64
-      env:
-        ROOTFS_HOST_DIR: /crossrootfs/x64
-        ROOTFS_DIR: /crossrootfs/arm64
-
-    - container: linux_musl_x64
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-amd64-alpine
-      env:
-        ROOTFS_DIR: /crossrootfs/x64
-
-    - container: linux_musl_arm
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-arm-alpine
-      env:
-        ROOTFS_DIR: /crossrootfs/arm
-
-    - container: linux_musl_arm64
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-arm64-alpine
-      env:
-        ROOTFS_DIR: /crossrootfs/arm64
-
-    # This container contains all required toolsets to build for Android and for Linux with bionic libc.
-    - container: linux_bionic
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-android-amd64
-
-    # This container contains all required toolsets to build for Android as well as tooling to build docker images.
-    - container: android_docker
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-android-docker
-
-    - container: linux_x64
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-amd64
-      env:
-        ROOTFS_DIR: /crossrootfs/x64
-
-    - container: linux_x86
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-x86
-      env:
-        ROOTFS_DIR: /crossrootfs/x86
-
-    - container: linux_x64_dev_innerloop
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04
-
-    # We use a CentOS Stream 8 image here to test building from source on CentOS Stream 8.
-    - container: SourceBuild_centos_x64
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream8
-
-    # AlmaLinux 8 is a RHEL 8 rebuild, so we use it to test building from source on RHEL 8.
-    - container: SourceBuild_linux_x64
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:almalinux-8-source-build
-
-    - container: linux_s390x
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-18.04-cross-s390x
-      env:
-        ROOTFS_DIR: /crossrootfs/s390x
-
-    - container: linux_ppc64le
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-18.04-cross-ppc64le
-      env:
-        ROOTFS_DIR: /crossrootfs/ppc64le
-
-    - container: linux_riscv64
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04-cross-riscv64
-      env:
-        ROOTFS_DIR: /crossrootfs/riscv64
-
-    - container: debian-12-gcc13-amd64
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-gcc13-amd64
-
-    - container: linux_x64_llvmaot
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream8
-
-    - container: browser_wasm
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-webassembly-20230913040940-1edc1c6
-      env:
-        ROOTFS_DIR: /crossrootfs/x64
-
-    - container: wasi_wasm
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-webassembly-20230917141449-2aaa02c
-      env:
-        ROOTFS_DIR: /crossrootfs/x64
-
-    - container: freebsd_x64
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-amd64-freebsd-13
-      env:
-        ROOTFS_DIR: /crossrootfs/x64
-
-    - container: tizen_armel
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-18.04-cross-armel-tizen
-      env:
-        ROOTFS_DIR: /crossrootfs/armel
-
-    - container: debpkg
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04-debpkg
-
-    - container: rpmpkg
-      image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-fpm
-
-stages: ${{ parameters.stages }}
+  - name: isOfficialBuild
+    type: boolean
+    default: false
+
+extends:
+  template: templateDispatch.yml
+  parameters:
+    ${{ if parameters.isOfficialBuild }}:
+      templatePath: template1es.yml
+    ${{ else }}:
+      templatePath: templatePublic.yml
+
+    stages: ${{ parameters.stages }}
+
+    containers:
+      linux_arm:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-arm
+        env:
+          ROOTFS_DIR: /crossrootfs/arm
+
+      linux_armv6:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-20.04-cross-armv6-raspbian-10
+        env:
+          ROOTFS_DIR: /crossrootfs/armv6
+
+      linux_arm64:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-biarch-amd64-arm64
+        env:
+          ROOTFS_HOST_DIR: /crossrootfs/x64
+          ROOTFS_DIR: /crossrootfs/arm64
+
+      linux_musl_x64:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-amd64-alpine
+        env:
+          ROOTFS_DIR: /crossrootfs/x64
+
+      linux_musl_arm:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-arm-alpine
+        env:
+          ROOTFS_DIR: /crossrootfs/arm
+
+      linux_musl_arm64:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-arm64-alpine
+        env:
+          ROOTFS_DIR: /crossrootfs/arm64
+
+      # This container contains all required toolsets to build for Android and for Linux with bionic libc.
+      linux_bionic:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-android-amd64
+
+      # This container contains all required toolsets to build for Android as well as tooling to build docker images.
+      android_docker:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-android-docker
+
+      linux_x64:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-amd64
+        env:
+          ROOTFS_DIR: /crossrootfs/x64
+
+      linux_x86:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-x86
+        env:
+          ROOTFS_DIR: /crossrootfs/x86
+
+      linux_x64_dev_innerloop:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04
+
+      # We use a CentOS Stream 8 image here to test building from source on CentOS Stream 8.
+      SourceBuild_centos_x64:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream8
+
+      # AlmaLinux 8 is a RHEL 8 rebuild, so we use it to test building from source on RHEL 8.
+      SourceBuild_linux_x64:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:almalinux-8-source-build
+
+      linux_s390x:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-18.04-cross-s390x
+        env:
+          ROOTFS_DIR: /crossrootfs/s390x
+
+      linux_ppc64le:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-18.04-cross-ppc64le
+        env:
+          ROOTFS_DIR: /crossrootfs/ppc64le
+
+      linux_riscv64:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04-cross-riscv64
+        env:
+          ROOTFS_DIR: /crossrootfs/riscv64
+
+      debian-12-gcc13-amd64:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:debian-12-gcc13-amd64
+
+      linux_x64_llvmaot:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream8
+
+      browser_wasm:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-webassembly-20230913040940-1edc1c6
+        env:
+          ROOTFS_DIR: /crossrootfs/x64
+
+      wasi_wasm:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-webassembly
+        env:
+          ROOTFS_DIR: /crossrootfs/x64
+
+      freebsd_x64:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-cross-amd64-freebsd-13
+        env:
+          ROOTFS_DIR: /crossrootfs/x64
+
+      tizen_armel:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04-cross-armel-tizen
+        env:
+          ROOTFS_DIR: /crossrootfs/armel
+
+      debpkg:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04-debpkg
+
+      rpmpkg:
+        image: mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-fpm
\ No newline at end of file
diff --git a/eng/pipelines/common/templates/publish-build-artifacts.yml b/eng/pipelines/common/templates/publish-build-artifacts.yml
new file mode 100644
index 000000000000..b9b263c361f8
--- /dev/null
+++ b/eng/pipelines/common/templates/publish-build-artifacts.yml
@@ -0,0 +1,22 @@
+parameters:
+  - name: isOfficialBuild
+    type: boolean
+  - name: displayName
+    type: string
+  - name: inputs
+    type: object
+  - name: condition
+    type: string
+    default: ''
+
+steps:
+  - ${{ if parameters.isOfficialBuild }}:
+    - task: 1ES.PublishBuildArtifacts@1
+      displayName: ${{ parameters.displayName }}
+      inputs: ${{ parameters.inputs }}
+      condition: ${{ parameters.condition }}
+  - ${{ else }}:
+    - task: PublishBuildArtifacts@1
+      displayName: ${{ parameters.displayName }}
+      inputs: ${{ parameters.inputs }}
+      condition: ${{ parameters.condition }}
\ No newline at end of file
diff --git a/eng/pipelines/common/templates/publish-pipeline-artifacts.yml b/eng/pipelines/common/templates/publish-pipeline-artifacts.yml
new file mode 100644
index 000000000000..81f292ec5528
--- /dev/null
+++ b/eng/pipelines/common/templates/publish-pipeline-artifacts.yml
@@ -0,0 +1,17 @@
+parameters:
+- name: displayName
+  type: string
+- name: inputs
+  type: object
+- name: isOfficialBuild
+  type: boolean
+
+steps:
+  - ${{ if parameters.isOfficialBuild }}:
+    - task: 1ES.PublishPipelineArtifact@1
+      displayName: ${{ parameters.displayName }}
+      inputs: ${{ parameters.inputs }}
+  - ${{ else }}:
+    - task: PublishPipelineArtifact@1
+      displayName: ${{ parameters.displayName }}
+      inputs: ${{ parameters.inputs }}
\ No newline at end of file
diff --git a/eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml b/eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml
index 4769f35a9c82..b0a2043bbd57 100644
--- a/eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml
+++ b/eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml
@@ -38,39 +38,48 @@ steps:
       archType: ${{ parameters.archType }}
       buildConfig: ${{ parameters.buildConfig }}
       testBuildArgs: ${{ parameters.testBuildArgs }}
-
-  # Build a Mono LLVM AOT cross-compiler for non-amd64 targets (in this case, just arm64)
-  - ${{ if and(eq(parameters.runtimeFlavor, 'mono'), or(eq(parameters.runtimeVariant, 'llvmaot'), eq(parameters.runtimeVariant, 'llvmfullaot'))) }}:
+  # Build a Mono AOT cross-compiler for non-amd64 targets (in this case, just arm64)
+  - ${{ if and(eq(parameters.runtimeFlavor, 'mono'), in(parameters.runtimeVariant, 'llvmaot', 'llvmfullaot', 'minifullaot')) }}:
     - ${{ if eq(parameters.archType, 'arm64') }}:
-      - script: ./build.sh
-                -subset mono
-                -c ${{ parameters.buildConfig }}
-                -arch ${{ parameters.archType }}
-                /p:BuildMonoAotCrossCompiler=true
-                /p:BuildMonoAotCrossCompilerOnly=true
-                /p:MonoLibClang="/usr/local/lib/libclang.so.16"
-                /p:MonoAOTEnableLLVM=true
-                /p:CrossBuild=true
-        displayName: "Build Mono LLVM AOT cross compiler"
+      - ${{ if eq(parameters.runtimeVariant, 'minifullaot') }}:
+        - script: ./build.sh
+                  -subset mono
+                  -c ${{ parameters.buildConfig }}
+                  -arch ${{ parameters.archType }}
+                  /p:BuildMonoAotCrossCompiler=true
+                  /p:BuildMonoAotCrossCompilerOnly=true
+                  /p:CrossBuild=true
+          displayName: "Build Mono Mini AOT cross compiler"
+      - ${{ else }}:
+        - script: ./build.sh
+                  -subset mono
+                  -c ${{ parameters.buildConfig }}
+                  -arch ${{ parameters.archType }}
+                  /p:BuildMonoAotCrossCompiler=true
+                  /p:BuildMonoAotCrossCompilerOnly=true
+                  /p:MonoLibClang="/usr/local/lib/libclang.so.16"
+                  /p:MonoAOTEnableLLVM=true
+                  /p:CrossBuild=true
+          displayName: "Build Mono LLVM AOT cross compiler"
 
     - ${{ if eq(parameters.archType, 'x64') }}:
       - ${{ if eq(parameters.runtimeVariant, 'llvmaot') }}:
-        - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(logRootNameArg)MonoAot mono_aot ${{ parameters.buildConfig }} ${{ parameters.archType }}
-          displayName: "LLVM AOT compile CoreCLR tests"
+        - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(logRootNameArg)MonoAot mono_aot ${{ parameters.buildConfig }} ${{ parameters.archType }} /p:RuntimeVariant=${{ parameters.runtimeVariant }}
+          displayName: "AOT compile CoreCLR tests"
           target: ${{ coalesce(parameters.llvmAotStepContainer, parameters.container) }}
-      - ${{ if eq(parameters.runtimeVariant, 'llvmfullaot') }}:
-        - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(logRootNameArg)MonoAot mono_fullaot ${{ parameters.buildConfig }} ${{ parameters.archType }}
-          displayName: "LLVM AOT compile CoreCLR tests"
+      - ${{ if in(parameters.runtimeVariant, 'llvmfullaot', 'minifullaot') }}:
+        - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(logRootNameArg)MonoAot mono_fullaot ${{ parameters.buildConfig }} ${{ parameters.archType }} /p:RuntimeVariant=${{ parameters.runtimeVariant }} -maxcpucount:1
+          displayName: "AOT compile CoreCLR tests"
           target: ${{ coalesce(parameters.llvmAotStepContainer, parameters.container) }}
     - ${{ if eq(parameters.archType, 'arm64') }}:
       - ${{ if eq(parameters.runtimeVariant, 'llvmaot') }}:
-        - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(logRootNameArg)MonoAot mono_aot ${{ parameters.buildConfig }} ${{ parameters.archType }} cross /p:RuntimeVariant=llvmfullaot -maxcpucount:2
-          displayName: "LLVM AOT cross-compile CoreCLR tests"
+        - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(logRootNameArg)MonoAot mono_aot ${{ parameters.buildConfig }} ${{ parameters.archType }} cross /p:RuntimeVariant=${{ parameters.runtimeVariant }} -maxcpucount:2
+          displayName: "AOT cross-compile CoreCLR tests"
           env:
             __MonoToolPrefix: aarch64-linux-gnu-
-      - ${{ if eq(parameters.runtimeVariant, 'llvmfullaot') }}:
-        - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(logRootNameArg)MonoAot mono_fullaot ${{ parameters.buildConfig }} ${{ parameters.archType }} cross /p:RuntimeVariant=llvmfullaot -maxcpucount:2
-          displayName: "LLVM AOT cross-compile CoreCLR tests"
+      - ${{ if in(parameters.runtimeVariant, 'llvmfullaot', 'minifullaot') }}:
+        - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(logRootNameArg)MonoAot mono_fullaot ${{ parameters.buildConfig }} ${{ parameters.archType }} cross /p:RuntimeVariant=${{ parameters.runtimeVariant }} -maxcpucount:2
+          displayName: "AOT cross-compile CoreCLR tests"
           env:
             __MonoToolPrefix: aarch64-linux-gnu-
 
diff --git a/eng/pipelines/common/templates/runtimes/build-test-job.yml b/eng/pipelines/common/templates/runtimes/build-test-job.yml
index e249e8ac922a..c86e4e04f111 100644
--- a/eng/pipelines/common/templates/runtimes/build-test-job.yml
+++ b/eng/pipelines/common/templates/runtimes/build-test-job.yml
@@ -12,20 +12,14 @@ parameters:
   runtimeFlavor: 'coreclr'
   runtimeVariant: ''
   dependsOn: []
-  dependOnEvaluatePaths: false
   crossBuild: false
+  isOfficialBuild: false
 
 ### Build managed test components (native components are getting built as part
 ### of the product build job).
 
-### TODO: As of today, build of managed test components requires the product build
-### as a prerequisite due to dependency on System.Private.Corelib. After switching
-### over to its reference assembly we should be able to remove this dependency and
-### run managed test builds in parallel with the product build job.
-
-
 jobs:
-- template: /eng/pipelines/${{ parameters.runtimeFlavor }}/templates/xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
     buildConfig: ${{ parameters.buildConfig }}
     archType: ${{ parameters.archType }}
@@ -36,8 +30,6 @@ jobs:
     runtimeVariant: ${{ parameters.runtimeVariant }}
     testGroup: ${{ parameters.testGroup }}
     pool: ${{ parameters.pool }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
-    compilerArg: ''
 
     # Test jobs should continue on error for internal builds
     ${{ if eq(variables['System.TeamProject'], 'internal') }}:
@@ -64,13 +56,16 @@ jobs:
     ${{ if notIn(parameters.testGroup, 'innerloop', 'clrinterpreter') }}:
       timeoutInMinutes: 160
 
+    artifacts:
+      publish:
+        logs:
+          name: '${{ parameters.runtimeFlavor }}_Common_Runtime_TestBuildLogs_Attempt$(System.JobAttempt)_AnyOS_AnyCPU_$(buildConfig)_${{ parameters.testGroup }}'
+
     variables:
       - ${{ each variable in parameters.variables }}:
         - ${{ variable }}
       - name: liveRuntimeBuildParams
         value: 'libs+clr.iltools -c Release -ci'
-      - name: compilerArg
-        value: ''
 
       - name: runtimeFlavorArgs
         value: ''
@@ -91,6 +86,12 @@ jobs:
           - name: testTreeFilterArg
             value: 'tree GC/Scenarios/GCSimulator'
 
+      - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+        parameters:
+          runtimeFlavor: coreclr
+          testGroup: ${{ parameters.testGroup }}
+          liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }}
+
     steps:
 
     # Install test build dependencies
@@ -99,7 +100,7 @@ jobs:
         displayName: Install native dependencies
 
     # Build core/libraries dependencies of test build
-    - script: $(Build.SourcesDirectory)/build$(scriptExt) $(liveRuntimeBuildParams) $(crossArg) $(compilerArg)
+    - script: $(Build.SourcesDirectory)/build$(scriptExt) $(liveRuntimeBuildParams) $(crossArg)
       displayName: Build coreclr/libs components needed by test build
 
     - ${{ if in(parameters.osGroup, 'osx', 'ios', 'tvos') }}:
@@ -141,13 +142,3 @@ jobs:
         archiveExtension: '.tar.gz'
         artifactName: $(microsoftNetSdkIlArtifactName)
         displayName: 'Microsoft.NET.Sdk.IL package'
-
-
-    # Publish Logs
-    - task: PublishPipelineArtifact@1
-      displayName: Publish Logs
-      inputs:
-        targetPath: $(Build.SourcesDirectory)/artifacts/log
-        artifactName: '${{ parameters.runtimeFlavor }}_Common_Runtime_TestBuildLogs_Attempt$(System.JobAttempt)_AnyOS_AnyCPU_$(buildConfig)_${{ parameters.testGroup }}'
-      continueOnError: true
-      condition: always()
diff --git a/eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml b/eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
new file mode 100644
index 000000000000..6351952b1695
--- /dev/null
+++ b/eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
@@ -0,0 +1,65 @@
+# Variables to resolve the paths and artifact names for the test assets used and produced to run the tests
+# in the src/tests tree.
+parameters:
+  runtimeFlavor: 'coreclr'
+  runtimeVariant: ''
+  liveLibrariesBuildConfig: ''
+  testGroup: 'innerloop'
+  configOverride: ''
+
+variables:
+  - name: binTestsPath
+    value: '$(Build.SourcesDirectory)/artifacts/tests/coreclr'
+
+  # Build product defines what we are trying to build, either coreclr or mono
+  - name: buildProductRootFolderPath
+    value: '$(Build.SourcesDirectory)/artifacts/bin/${{ parameters.runtimeFlavor }}/$(osGroup).$(archType).$(buildConfigUpper)'
+
+  - ${{ if ne(parameters.runtimeFlavor, 'mono') }}:
+    - name: managedGenericTestArtifactName
+      value: 'CoreCLRManagedTestArtifacts_AnyOS_AnyCPU_$(buildConfig)'
+  - ${{ else }}:
+    - name: managedGenericTestArtifactName
+      value: 'MonoManagedTestArtifacts_AnyOS_AnyCPU_$(buildConfig)'
+
+  - name: managedTestArtifactRootFolderPath
+    value: '$(binTestsPath)/$(osGroup).$(archType).$(buildConfigUpper)'
+
+  - name: nativeTestArtifactConfig
+    value: $(_BuildConfig)
+
+  - ${{ if ne(parameters.configOverride, '') }}:
+    - name: nativeTestArtifactConfig
+      value: ${{ parameters.configOverride }}
+
+  - name: nativeTestArtifactName
+    value: 'CoreCLRNativeTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(nativeTestArtifactConfig)'
+
+  - name: nativeTestArtifactRootFolderPath
+    value: '$(binTestsPath)/obj/$(osGroup).$(archType).$(nativeTestArtifactConfig)'
+
+  - name: microsoftNetSdkIlFolderPath
+    value: '$(Build.SourcesDirectory)/.packages/microsoft.net.sdk.il'
+
+  - name: microsoftNetSdkIlArtifactName
+    value: 'MicrosoftNetSdkIlPackage_AnyOS_AnyCPU_$(buildConfig)'
+
+  - name: librariesOverrideArg
+    value : ''
+
+  - ${{ if ne(parameters.liveLibrariesBuildConfig, '') }}:
+    - name: librariesOverrideArg
+      value : ' /p:LibrariesConfiguration=${{ parameters.liveLibrariesBuildConfig }}'
+
+  - name: priorityArg
+    value: ''
+
+  # 'innerloop' and 'clrinterpreter' jobs run the Priority 0 tests; everything else runs the Priority 1 tests.
+  # 'gc-standalone' is forced to run pri0 as well to start with.
+  - ${{ if and(ne(parameters.testGroup, 'innerloop'), ne(parameters.testGroup, 'clrinterpreter'), ne(parameters.testGroup, 'gc-standalone'), ne(parameters.testGroup, 'gc-standalone-server') ) }}:
+    - ${{ if ne(parameters.osGroup, 'windows') }}:
+      - name: priorityArg
+        value: 'priority1'
+    - ${{ if eq(parameters.osGroup, 'windows') }}:
+      - name: priorityArg
+        value: '-priority 1'
diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml
index a2e13dca489d..d6404617a3e1 100644
--- a/eng/pipelines/common/templates/runtimes/run-test-job.yml
+++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml
@@ -22,8 +22,10 @@ parameters:
   runtimeFlavor: 'coreclr'
   shouldContinueOnError: false
   dependsOn: []
-  dependOnEvaluatePaths: false
   SuperPmiCollect: false
+  unifiedArtifactsName: ''
+  unifiedBuildNameSuffix: ''
+  unifiedBuildConfigOverride: ''
 
 ### Test run job
 
@@ -31,7 +33,7 @@ parameters:
 ### buildConfig and archType.
 
 jobs:
-- template: /eng/pipelines/${{ parameters.runtimeFlavor }}/templates/xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
     buildConfig: ${{ parameters.buildConfig }}
     archType: ${{ parameters.archType }}
@@ -44,8 +46,8 @@ jobs:
     helixType: 'build/tests/'
     runtimeVariant: ${{ parameters.runtimeVariant }}
     pool: ${{ parameters.pool }}
-    condition: ${{ parameters.condition }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
+    condition: and(succeeded(), ${{ parameters.condition }})
+    logsName: '${{ parameters.runtimeFlavor }}_${{ parameters.runtimeVariant }}_$(LogNamePrefix)_Attempt$(System.JobAttempt)_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)_${{ parameters.testGroup }}'
 
     # Test jobs should continue on error for internal builds
     ${{ if eq(variables['System.TeamProject'], 'internal') }}:
@@ -60,22 +62,8 @@ jobs:
         - '${{ parameters.runtimeFlavor }}_common_test_build_p0_AnyOS_AnyCPU_${{parameters.buildConfig }}'
       - ${{ if notIn(parameters.testGroup, 'innerloop', 'clrinterpreter') }}:
         - '${{ parameters.runtimeFlavor }}_common_test_build_p1_AnyOS_AnyCPU_${{parameters.buildConfig }}'
-      - ${{ if or( eq(parameters.runtimeVariant, 'minijit'), eq(parameters.runtimeVariant, 'monointerpreter'), eq(parameters.runtimeVariant, 'llvmaot'), eq(parameters.runtimeVariant, 'llvmfullaot'))  }}:
-        # This is needed for creating a CORE_ROOT in the current design.
-        - ${{ format('coreclr_{0}_product_build_{1}{2}_{3}_{4}', '', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-      - ${{ if or( eq(parameters.runtimeVariant, 'minijit'), eq(parameters.runtimeVariant, 'monointerpreter')) }} :
-        # minijit and mono interpreter runtimevariants do not require any special build of the runtime
-        - ${{ format('{0}_{1}_product_build_{2}{3}_{4}_{5}', parameters.runtimeFlavor, '', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-      - ${{ if not(or(eq(parameters.runtimeVariant, 'minijit'), eq(parameters.runtimeVariant, 'monointerpreter')))  }}:
-        - ${{ if eq(parameters.runtimeVariant, 'llvmfullaot') }}:
-          - ${{ format('{0}_llvmaot_product_build_{1}{2}_{3}_{4}', parameters.runtimeFlavor, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-        - ${{ if ne(parameters.runtimeVariant, 'llvmfullaot') }}:
-          - ${{ format('{0}_{1}_product_build_{2}{3}_{4}_{5}', parameters.runtimeFlavor, parameters.runtimeVariant, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-      - ${{ if ne(parameters.liveLibrariesBuildConfig, '') }}:
-        - ${{ format('libraries_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.liveLibrariesBuildConfig) }}
-      # SuperPMI collection needs to run mcs.exe on the AzDO machine. Assume that's an x64 machine, and download an x64 product build if needed.
-      - ${{ if and(eq(parameters.SuperPmiCollect, true), ne(parameters.archType, 'x64')) }}:
-          - ${{ format('coreclr_{0}_product_build_{1}{2}_{3}_{4}', '', parameters.osGroup, parameters.osSubgroup, 'x64', parameters.buildConfig) }}
+      - ${{ if ne(parameters.unifiedArtifactsName, '')}}:
+        - 'build_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_${{ coalesce(parameters.unifiedBuildConfigOverride, parameters.buildConfig) }}_${{ parameters.unifiedBuildNameSuffix }}'
 
     # Compute job name from template parameters
     ${{ if in(parameters.testGroup, 'innerloop', 'clrinterpreter') }}:
@@ -87,6 +75,13 @@ jobs:
 
     variables:
 
+    - name: osGroup
+      value: ${{ parameters.osGroup }}
+    - name: osSubgroup
+      value: ${{ parameters.osSubgroup }}
+    - name: archType
+      value: ${{ parameters.archType }}
+
     - name: monoAotBuildshCommand
       value: ''
 
@@ -132,6 +127,12 @@ jobs:
       - name: testTreeFilterArg
         value: 'tree GC/Scenarios/GCSimulator'
 
+    - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+      parameters:
+        runtimeFlavor: coreclr
+        testGroup: ${{ parameters.testGroup }}
+        liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }}
+
     # Variables used for SuperPMI collection
     - ${{ if eq(parameters.SuperPmiCollect, true) }}:
       - MchFileTag: '${{ parameters.osGroup }}.${{ parameters.archType }}.${{ parameters.buildConfig }}'
@@ -139,11 +140,10 @@ jobs:
         value: 'run'
       - name: CollectionName
         value: 'coreclr_tests'
+      - template: /eng/pipelines/coreclr/templates/jit-python-variables.yml
+        parameters:
+          osGroup: ${{ parameters.osGroup }}
       - ${{ if eq(parameters.osGroup, 'windows') }}:
-        - name: PythonScript
-          value: 'py -3'
-        - name: PipScript
-          value: 'py -3 -m pip'
         - name: MchFilesLocation
           value: '$(Build.SourcesDirectory)\artifacts\helixresults\'
         - name: MergedMchFileLocation
@@ -151,10 +151,6 @@ jobs:
         - name: SpmiLogsLocation
           value: '$(Build.SourcesDirectory)\artifacts\spmi_logs\'
       - ${{ if ne(parameters.osGroup, 'windows') }}:
-        - name: PythonScript
-          value: 'python3'
-        - name: PipScript
-          value: 'pip3'
         - name: MchFilesLocation
           value: '$(Build.SourcesDirectory)/artifacts/helixresults/'
         - name: MergedMchFileLocation
@@ -199,43 +195,12 @@ jobs:
 
     steps:
 
-    # Optionally download live-built libraries
-    - ${{ if ne(parameters.liveLibrariesBuildConfig, '') }}:
-      - template: /eng/pipelines/common/download-artifact-step.yml
-        parameters:
-          unpackFolder: $(librariesDownloadDir)
-          cleanUnpackFolder: false
-          artifactFileName: '$(librariesBuildArtifactName)$(archiveExtension)'
-          artifactName: '$(librariesBuildArtifactName)'
-          displayName: 'live-built libraries'
-
-
-    # Download and unzip managed test artifacts
-    - template: /eng/pipelines/common/download-artifact-step.yml
-      parameters:
-        unpackFolder: '$(managedTestArtifactRootFolderPath)'
-        artifactFileName: '$(managedGenericTestArtifactName).tar.gz'
-        artifactName: '$(managedGenericTestArtifactName)'
-        displayName: 'generic managed test artifacts'
-
-
-    # Download product binaries directory
     - template: /eng/pipelines/common/download-artifact-step.yml
       parameters:
-        unpackFolder: $(buildProductRootFolderPath)
-        artifactFileName: '$(buildProductArtifactName)$(archiveExtension)'
-        artifactName: '$(buildProductArtifactName)'
-        displayName: 'product build'
-
-
-    - ${{ if eq(parameters.runtimeFlavor, 'mono') }}:
-      # We need to explicitly download CoreCLR for Mono
-      - template: /eng/pipelines/common/download-artifact-step.yml
-        parameters:
-          unpackFolder: $(coreClrProductRootFolderPath)
-          artifactFileName: '$(coreClrProductArtifactName)$(archiveExtension)'
-          artifactName: '$(coreClrProductArtifactName)'
-          displayName: 'CoreCLR product download for Mono'
+        unpackFolder: $(Build.SourcesDirectory)/artifacts/bin
+        artifactFileName: '${{ parameters.unifiedArtifactsName }}$(archiveExtension)'
+        artifactName: '${{ parameters.unifiedArtifactsName }}'
+        displayName: 'unified artifacts'
 
     # Download and unzip the Microsoft.NET.Sdk.IL package needed for traversing
     # ilproj test projects during copynativeonly.
@@ -246,6 +211,13 @@ jobs:
         artifactName: '$(microsoftNetSdkIlArtifactName)'
         displayName: 'Microsoft.NET.Sdk.IL package'
 
+    # Download and unzip managed test artifacts
+    - template: /eng/pipelines/common/download-artifact-step.yml
+      parameters:
+        unpackFolder: '$(managedTestArtifactRootFolderPath)'
+        artifactFileName: '$(managedGenericTestArtifactName).tar.gz'
+        artifactName: '$(managedGenericTestArtifactName)'
+        displayName: 'generic managed test artifacts'
 
     # Download and unzip native test artifacts
     - template: /eng/pipelines/common/download-artifact-step.yml
@@ -255,17 +227,6 @@ jobs:
         artifactName: '$(nativeTestArtifactName)'
         displayName: 'native test artifacts'
 
-
-    # SuperPMI collection: Download x64 coreclr if running on non-x64 configuration (needed for mcs.exe)
-    - ${{ if and(eq(parameters.SuperPmiCollect, true), ne(parameters.archType, 'x64')) }}:
-      - template: /eng/pipelines/common/download-artifact-step.yml
-        parameters:
-          unpackFolder: '$(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).x64.$(buildConfigUpper)'
-          artifactFileName: 'CoreCLRProduct__$(osGroup)$(osSubgroup)_x64_$(buildConfig)$(archiveExtension)'
-          artifactName: 'CoreCLRProduct__$(osGroup)$(osSubgroup)_x64_$(buildConfig)'
-          displayName: 'CoreCLR product build (x64)'
-
-
     # Publish native test components to test output folder. Sadly we cannot do this
     # during product build (so that we could zip up the files in their final test location
     # and directly unzip them there after download). Unfortunately the logic to copy
@@ -389,7 +350,7 @@ jobs:
           - jitstress1_tiered
           - jitstress2
           - jitstress2_tiered
-          - zapdisable
+          - disabler2r
           - tailcallstress
         ${{ if in(parameters.testGroup, 'jitstress-random') }}:
           scenarios:
@@ -507,9 +468,9 @@ jobs:
         ${{ if in(parameters.testGroup, 'gcstress-extra') }}:
           scenarios:
           - heapverify1
-          - gcstress0xc_zapdisable
-          - gcstress0xc_zapdisable_jitstress2
-          - gcstress0xc_zapdisable_heapverify1
+          - gcstress0xc_disabler2r
+          - gcstress0xc_disabler2r_jitstress2
+          - gcstress0xc_disabler2r_heapverify1
           - gcstress0xc_jitstress1
           - gcstress0xc_jitstress2
           - gcstress0xc_tailcallstress
@@ -575,6 +536,7 @@ jobs:
             scenarios:
             - jitosr_stress
             - jitpartialcompilation_pgo
+            - jitoptrepeat
           ${{ else }}:
             scenarios:
             - jitosr_stress
@@ -585,7 +547,8 @@ jobs:
             - jitobjectstackallocation
             - jitphysicalpromotion_only
             - jitphysicalpromotion_full
-            - jitcrossblocklocalassertionprop
+            - jitrlcse
+            - jitoptrepeat
         ${{ if in(parameters.testGroup, 'jit-cfg') }}:
           scenarios:
           - jitcfg
@@ -599,15 +562,6 @@ jobs:
           scenarios:
           - clrinterpreter
 
-    # Publish Logs
-    - task: PublishPipelineArtifact@1
-      displayName: Publish Logs
-      inputs:
-        targetPath: $(Build.SourcesDirectory)/artifacts/log
-        artifactName: '${{ parameters.runtimeFlavor }}_${{ parameters.runtimeVariant }}_$(LogNamePrefix)_Attempt$(System.JobAttempt)_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)_${{ parameters.testGroup }}'
-      continueOnError: true
-      condition: always()
-
     ########################################################################################################
     #
     # Finalize SuperPMI collection: (1) merge all MCH files generated by all Helix jobs, (2) upload MCH file to Azure Storage, (3) upload log files
@@ -632,6 +586,9 @@ jobs:
           displayName: Create SuperPMI directories
           condition: always()
 
+      - script: $(PythonSetupScript)
+        displayName: Enable python venv
+
       - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi.py merge-mch -log_level DEBUG -pattern $(MchFilesLocation)$(CollectionName).$(CollectionType)*.mch -output_mch_path $(MergedMchFileLocation)$(CollectionName).$(CollectionType).$(MchFileTag).mch
         displayName: 'Merge $(CollectionName)-$(CollectionType) SuperPMI collections'
         condition: always()
@@ -656,7 +613,7 @@ jobs:
         condition: always()
 
       # Ensure the Python azure-storage-blob package is installed before doing the upload.
-      - script: $(PipScript) install --user --upgrade pip && $(PipScript) install --user azure.storage.blob==12.5.0 --force-reinstall
+      - script: $(PipScript) install --upgrade pip && $(PipScript) install azure.storage.blob==12.5.0 --force-reinstall
         displayName: Upgrade Pip to latest and install azure-storage-blob Python package
         condition: always()
 
diff --git a/eng/pipelines/common/templates/runtimes/xplat-job.yml b/eng/pipelines/common/templates/runtimes/xplat-job.yml
index 23e74c70e57a..e22f8f968c47 100644
--- a/eng/pipelines/common/templates/runtimes/xplat-job.yml
+++ b/eng/pipelines/common/templates/runtimes/xplat-job.yml
@@ -9,22 +9,23 @@ parameters:
   crossBuild: false
   strategy: ''
   pool: ''
+  logsName: ''
 
   # arcade-specific parameters
   condition: ''
   continueOnError: false
   dependsOn: ''
-  dependOnEvaluatePaths: false
   displayName: ''
   timeoutInMinutes: ''
   enableMicrobuild: ''
   gatherAssetManifests: false
-  disableComponentGovernance: false
+  disableComponentGovernance: ''
+  templatePath: 'templates'
 
   variables: {} ## any extra variables to add to the defaults defined below
 
 jobs:
-- template: /eng/common/templates/job/job.yml
+- template: /eng/common/${{ parameters.templatePath }}/job/job.yml
   parameters:
 
     name: ${{ parameters.name }}
@@ -32,8 +33,6 @@ jobs:
     container: ${{ parameters.container }}
     condition: ${{ parameters.condition }}
     dependsOn:
-      - ${{ if eq(parameters.dependOnEvaluatePaths, true) }}:
-        - evaluate_paths
       - ${{ if ne(parameters.dependsOn, '') }}:
         - ${{ parameters.dependsOn }}
 
@@ -64,19 +63,30 @@ jobs:
     ${{ if eq(parameters.osGroup, 'windows') }}:
       agentOs: windows
 
-    # Disable component governance if requested or on musl machines where it does not work well
-    ${{ if or(eq(parameters.disableComponentGovernance, true), eq(parameters.osSubGroup, '_musl')) }}:
+    # Component governance does not work on musl machines
+    ${{ if eq(parameters.osSubGroup, '_musl') }}:
       disableComponentGovernance: true
+    ${{ else }}:
+      disableComponentGovernance: ${{ parameters.disableComponentGovernance }}
 
     # Setting this results in the arcade job template including a step
     # that gathers asset manifests and publishes them to pipeline
     # storage. Only relevant for build jobs.
     enablePublishBuildAssets: ${{ parameters.gatherAssetManifests }}
 
+    artifacts:
+      publish:
+        ${{ if ne(parameters.logsName, '') }}:
+          logs:
+            name: '${{ parameters.logsName }}'
+
     variables:
     - name: buildConfig
       value: ${{ parameters.buildConfig }}
 
+    - name: _BuildConfig
+      value: ${{ parameters.buildConfig }}
+
     - ${{ if and(eq(variables['System.TeamProject'], 'internal'), ne(variables['Build.Reason'], 'PullRequest')) }}:
       - name: _HelixSource
         value: official/dotnet/runtime/$(Build.SourceBranch)
diff --git a/eng/pipelines/common/templates/template1es.yml b/eng/pipelines/common/templates/template1es.yml
new file mode 100644
index 000000000000..0770e37d6bd0
--- /dev/null
+++ b/eng/pipelines/common/templates/template1es.yml
@@ -0,0 +1,31 @@
+
+
+parameters:
+  - name: templatePath
+    type: string
+    default: 'templates-official'
+  - name: stages
+    type: stageList
+  - name: containers
+    type: object
+
+
+resources:
+  repositories:
+  - repository: 1ESPipelineTemplates
+    type: git
+    name: 1ESPipelineTemplates/1ESPipelineTemplates
+    ref: refs/tags/release
+
+extends:
+  template: v1/1ES.Official.PipelineTemplate.yml@1ESPipelineTemplates
+  parameters:
+    pool:
+      name: $(DncEngInternalBuildPool)
+      image: 1es-windows-2022
+      os: windows
+
+    containers:
+      ${{ parameters.containers }}
+
+    stages: ${{ parameters.stages }}
\ No newline at end of file
diff --git a/eng/pipelines/common/templates/templateDispatch.yml b/eng/pipelines/common/templates/templateDispatch.yml
new file mode 100644
index 000000000000..1860af47aeef
--- /dev/null
+++ b/eng/pipelines/common/templates/templateDispatch.yml
@@ -0,0 +1,13 @@
+parameters:
+  - name: templatePath
+    type: string
+  - name: stages
+    type: stageList
+  - name: containers
+    type: object
+
+extends:
+  template: ${{ parameters.templatePath }}
+  parameters:
+    stages: ${{ parameters.stages }}
+    containers: ${{ parameters.containers }}
\ No newline at end of file
diff --git a/eng/pipelines/common/templates/templatePublic.yml b/eng/pipelines/common/templates/templatePublic.yml
new file mode 100644
index 000000000000..cd7c02720167
--- /dev/null
+++ b/eng/pipelines/common/templates/templatePublic.yml
@@ -0,0 +1,21 @@
+
+parameters:
+  - name: templatePath
+    type: string
+    default: 'templates'
+  - name: stages
+    type: stageList
+  - name: containers
+    type: object
+
+resources:
+  containers:
+    - ${{ each container_pair in parameters.containers }}:
+      - ${{ if container_pair.value.image }}:
+        - container: ${{ container_pair.key }}
+          ${{ each pair in container_pair.value }}:
+            ${{ if notIn(pair.key, 'tenantId', 'identityType', 'registry') }}:
+              ${{ pair.key }}: ${{ pair.value }}
+
+
+stages: ${{ parameters.stages }}
\ No newline at end of file
diff --git a/eng/pipelines/common/templates/wasm-debugger-tests.yml b/eng/pipelines/common/templates/wasm-debugger-tests.yml
index 17c0f415cf1b..a61321809bc5 100644
--- a/eng/pipelines/common/templates/wasm-debugger-tests.yml
+++ b/eng/pipelines/common/templates/wasm-debugger-tests.yml
@@ -28,9 +28,9 @@ jobs:
         value: $[
           or(
             eq(variables['wasmDarcDependenciesChanged'], true),
-            eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
-            eq(dependencies.evaluate_paths_outputs['DarcDependenciesChanged.Microsoft_DotNet_HotReload_Utils_Generator_BuildTool'], true),
-            eq(dependencies.evaluate_paths.outputs['SetPathVars_wasmdebuggertests.containsChange'], true))
+            eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+            eq(stageDependencies.EvaluatePaths.evaluate_paths_outputs['DarcDependenciesChanged.Microsoft_DotNet_HotReload_Utils_Generator_BuildTool'], true),
+            eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_wasmdebuggertests.containsChange'], true))
           ]
     jobParameters:
       testGroup: innerloop
diff --git a/eng/pipelines/common/templates/wasm-library-tests.yml b/eng/pipelines/common/templates/wasm-library-tests.yml
index c2ab64d273bb..b1b041604439 100644
--- a/eng/pipelines/common/templates/wasm-library-tests.yml
+++ b/eng/pipelines/common/templates/wasm-library-tests.yml
@@ -34,10 +34,10 @@ jobs:
         value: $[
           or(
             eq(variables['wasmDarcDependenciesChanged'], true),
-            eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
-            eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-            eq(dependencies.evaluate_paths.outputs['SetPathVars_wasm_chrome.containsChange'], true),
-            eq(dependencies.evaluate_paths.outputs['SetPathVars_wasm_specific_except_wbt_dbg.containsChange'], true))
+            eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+            eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+            eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_wasm_chrome.containsChange'], true),
+            eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_wasm_specific_except_wbt_dbg.containsChange'], true))
          ]
       # run smoke tests only if:
       # - explicitly requested
@@ -50,11 +50,11 @@ jobs:
               eq('${{ parameters.shouldRunSmokeOnly }}', 'onLibrariesAndIllinkChanges'),
               ne(variables['wasmDarcDependenciesChanged'], true),
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true)
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true)
               ),
-              ne(dependencies.evaluate_paths.outputs['SetPathVars_wasm_chrome.containsChange'], true),
-              ne(dependencies.evaluate_paths.outputs['SetPathVars_wasm_specific_except_wbt_dbg.containsChange'], true)
+              ne(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_wasm_chrome.containsChange'], true),
+              ne(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_wasm_specific_except_wbt_dbg.containsChange'], true)
             )
           )
           ]
diff --git a/eng/pipelines/common/templates/wasm-runtime-tests.yml b/eng/pipelines/common/templates/wasm-runtime-tests.yml
index d8a4b76b54b5..3a8affe48a21 100644
--- a/eng/pipelines/common/templates/wasm-runtime-tests.yml
+++ b/eng/pipelines/common/templates/wasm-runtime-tests.yml
@@ -28,8 +28,8 @@ jobs:
         value: $[
           or(
             eq(variables['wasmDarcDependenciesChanged'], true),
-            eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
-            eq(dependencies.evaluate_paths.outputs['SetPathVars_wasm_runtimetests.containsChange'], true))
+            eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+            eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_wasm_runtimetests.containsChange'], true))
           ]
     jobParameters:
       testGroup: innerloop
diff --git a/eng/pipelines/common/upload-artifact-step.yml b/eng/pipelines/common/upload-artifact-step.yml
index 249da066c7aa..d4091a7cc192 100644
--- a/eng/pipelines/common/upload-artifact-step.yml
+++ b/eng/pipelines/common/upload-artifact-step.yml
@@ -7,6 +7,7 @@ parameters:
   artifactName: ''
   displayName: ''
   condition: succeeded()
+  isOfficialBuild: false
 
 steps:
   # Zip Artifact
@@ -20,9 +21,11 @@ steps:
       includeRootFolder: ${{ parameters.includeRootFolder }}
     condition: ${{ parameters.condition }}
 
-  - task: PublishBuildArtifacts@1
-    displayName: 'Publish ${{ parameters.displayName }}'
-    inputs:
-      pathtoPublish: $(Build.StagingDirectory)/${{ parameters.artifactName }}${{ parameters.archiveExtension }}
-      artifactName:  ${{ parameters.artifactName }}
-    condition: ${{ parameters.condition }}
+  - template: /eng/pipelines/common/templates/publish-build-artifacts.yml
+    parameters:
+      isOfficialBuild: ${{ parameters.isOfficialBuild }}
+      displayName: 'Publish ${{ parameters.displayName }}'
+      inputs:
+        PathtoPublish: $(Build.StagingDirectory)/${{ parameters.artifactName }}${{ parameters.archiveExtension }}
+        artifactName:  ${{ parameters.artifactName }}
+      condition: ${{ parameters.condition }}
\ No newline at end of file
diff --git a/eng/pipelines/common/upload-intermediate-artifacts-step.yml b/eng/pipelines/common/upload-intermediate-artifacts-step.yml
index bde6c61a0a04..b22c60be9e36 100644
--- a/eng/pipelines/common/upload-intermediate-artifacts-step.yml
+++ b/eng/pipelines/common/upload-intermediate-artifacts-step.yml
@@ -2,6 +2,7 @@ parameters:
   name: ''
   publishPackagesCondition: always()
   publishVSSetupCondition: false
+  isOfficialBuild: true
 
 steps:
 - task: CopyFiles@2
@@ -25,9 +26,11 @@ steps:
     TargetFolder: '$(Build.StagingDirectory)/IntermediateArtifacts/${{ parameters.name }}'
     CleanTargetFolder: true
 
-- task: PublishBuildArtifacts@1
-  displayName: Publish intermediate artifacts
-  inputs:
-    pathToPublish: '$(Build.StagingDirectory)/IntermediateArtifacts'
-    artifactName: IntermediateArtifacts
-    artifactType: container
+- template: /eng/pipelines/common/templates/publish-build-artifacts.yml
+  parameters:
+    isOfficialBuild: ${{ parameters.isOfficialBuild }}
+    displayName: Publish intermediate artifacts
+    inputs:
+      PathtoPublish: '$(Build.StagingDirectory)/IntermediateArtifacts'
+      ArtifactName: IntermediateArtifacts
+      ArtifactType: container
diff --git a/eng/pipelines/common/variables.yml b/eng/pipelines/common/variables.yml
index 3aeca9ca759b..b06920ec2d36 100644
--- a/eng/pipelines/common/variables.yml
+++ b/eng/pipelines/common/variables.yml
@@ -1,6 +1,6 @@
 variables:
 
-# These values enable longer delays, configurable number of retries, and special understanding of TCP hang-up 
+# These values enable longer delays, configurable number of retries, and special understanding of TCP hang-up
 # See https://github.com/NuGet/Home/issues/11027 for details
 - name: NUGET_ENABLE_EXPERIMENTAL_HTTP_RETRY
   value: true
@@ -40,10 +40,6 @@ variables:
 - name: isNotSpecificPlatformOnlyBuild
   value: ${{ notin(variables['Build.DefinitionName'], 'runtime-wasm', 'runtime-wasm-libtests', 'runtime-wasm-non-libtests', 'runtime-ioslike', 'runtime-ioslikesimulator', 'runtime-android', 'runtime-androidemulator', 'runtime-maccatalyst', 'runtime-linuxbionic') }}
 
-# We only run evaluate paths on runtime and runtime-community pipelines on PRs
-# keep in sync with /eng/pipelines/common/xplat-setup.yml
-- name: dependOnEvaluatePaths
-  value: ${{ and(eq(variables['Build.Reason'], 'PullRequest'), in(variables['Build.DefinitionName'], 'runtime', 'runtime-community', 'runtime-extra-platforms', 'runtime-wasm', 'runtime-wasm-libtests', 'runtime-wasm-non-libtests', 'runtime-ioslike', 'runtime-ioslikesimulator', 'runtime-android', 'runtime-androidemulator', 'runtime-maccatalyst', 'runtime-linuxbionic', 'dotnet-linker-tests', 'runtime-dev-innerloop', 'runtime-coreclr superpmi-replay', 'runtime-coreclr superpmi-diffs')) }}
 - name: debugOnPrReleaseOnRolling
   ${{ if ne(variables['Build.Reason'], 'PullRequest') }}:
     value: Release
diff --git a/eng/pipelines/common/xplat-setup.yml b/eng/pipelines/common/xplat-setup.yml
index 32a7ac671d2e..f50a2db9e81e 100644
--- a/eng/pipelines/common/xplat-setup.yml
+++ b/eng/pipelines/common/xplat-setup.yml
@@ -18,11 +18,8 @@ jobs:
   parameters:
     shouldContinueOnError: ${{ or(eq(parameters.shouldContinueOnError, true), and(ne(parameters.shouldContinueOnError, 'forceFalse'), endsWith(variables['Build.DefinitionName'], 'staging'), eq(variables['Build.Reason'], 'PullRequest'))) }}
 
-    # keep in sync with /eng/pipelines/common/variables.yml
-    dependOnEvaluatePaths: ${{ and(eq(variables['Build.Reason'], 'PullRequest'), in(variables['Build.DefinitionName'], 'runtime', 'runtime-community', 'runtime-extra-platforms', 'runtime-wasm', 'runtime-wasm-libtests', 'runtime-wasm-non-libtests', 'dotnet-linker-tests', 'runtime-dev-innerloop', 'runtime-coreclr superpmi-replay', 'runtime-coreclr superpmi-diffs')) }}
-
     variables:
-      - template: /eng/common/templates/variables/pool-providers.yml
+      - template: /eng/common/${{ coalesce(parameters.jobParameters.templatePath, 'templates') }}/variables/pool-providers.yml
       # Disable component governance in our CI builds. These builds are not shipping nor
       # are they a service. Also the component governance jobs issue lots of inconsequential
       # warnings and errors into our build timelines that make it hard to track down
@@ -45,7 +42,7 @@ jobs:
 
       - name: _BuildConfig
         value: $(buildConfigUpper)
-        
+
       - name: archType
         value: ${{ parameters.archType }}
 
@@ -117,22 +114,22 @@ jobs:
       - ${{ if eq(parameters.archType, 'wasm') }}:
         - name: wasmDarcDependenciesChanged
           value: $[ or(
-                      eq(dependencies.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_NET_Workload_Emscripten_Current_Manifest-9_0_100_Transport'], true),
-                      eq(dependencies.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_DotNet_Build_Tasks_Workloads'], true),
-                      eq(dependencies.evaluate_paths.outputs['DarcDependenciesChanged.System_Runtime_TimeZoneData'], true),
-                      eq(dependencies.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_Net_Compilers_Toolset'], true),
-                      eq(dependencies.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_CodeAnalysis'], true),
-                      eq(dependencies.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_CodeAnalysis_CSharp'], true),
-                      eq(dependencies.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_CodeAnalysis_Analyzers'], true),
-                      eq(dependencies.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_CodeAnalysis_NetAnalyzers'], true),
-                      eq(dependencies.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_NET_ILLink_Tasks'], true)) ]
+                      eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_NET_Workload_Emscripten_Current_Manifest-9_0_100_Transport'], true),
+                      eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_DotNet_Build_Tasks_Workloads'], true),
+                      eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['DarcDependenciesChanged.System_Runtime_TimeZoneData'], true),
+                      eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_Net_Compilers_Toolset'], true),
+                      eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_CodeAnalysis'], true),
+                      eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_CodeAnalysis_CSharp'], true),
+                      eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_CodeAnalysis_Analyzers'], true),
+                      eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_CodeAnalysis_NetAnalyzers'], true),
+                      eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_NET_ILLink_Tasks'], true)) ]
 
         - name: shouldRunWasmBuildTestsOnDefaultPipeline
           value: $[
             or(
               eq(variables['wasmDarcDependenciesChanged'], true),
-              eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
-              eq(dependencies.evaluate_paths.outputs['SetPathVars_wasmbuildtests.containsChange'], true))
+              eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+              eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_wasmbuildtests.containsChange'], true))
             ]
 
         # needed for Wasm.Build.Tests
@@ -168,21 +165,28 @@ jobs:
         # Official Build Linux Pool
         ${{ if and(or(in(parameters.osGroup, 'linux', 'freebsd', 'android', 'tizen'), eq(parameters.jobParameters.hostedOs, 'linux')), ne(variables['System.TeamProject'], 'public')) }}:
           name: $(DncEngInternalBuildPool)
-          demands: ImageOverride -equals Build.Ubuntu.2204.Amd64
+          demands: ImageOverride -equals 1es-ubuntu-2204
+          os: linux
 
-        # OSX Build Pool (we don't have on-prem OSX BuildPool).
-        ${{ if in(parameters.osGroup, 'osx', 'maccatalyst', 'ios', 'iossimulator', 'tvos', 'tvossimulator') }}:
+        # OSX Public Build Pool (we don't have on-prem OSX BuildPool).
+        ${{ if and(in(parameters.osGroup, 'osx', 'maccatalyst', 'ios', 'iossimulator', 'tvos', 'tvossimulator'), eq(variables['System.TeamProject'], 'public')) }}:
           vmImage: 'macos-12'
 
+        # OSX Internal Pool
+        ${{ if and(in(parameters.osGroup, 'osx', 'maccatalyst', 'ios', 'iossimulator', 'tvos', 'tvossimulator'), ne(variables['System.TeamProject'], 'public')) }}:
+          name: "Azure Pipelines"
+          vmImage: 'macOS-12'
+          os: macOS
+
         # Official Build Windows Pool
         ${{ if and(or(eq(parameters.osGroup, 'windows'), eq(parameters.jobParameters.hostedOs, 'windows')), ne(variables['System.TeamProject'], 'public')) }}:
           name: $(DncEngInternalBuildPool)
-          demands: ImageOverride -equals windows.vs2022preview.amd64
+          demands: ImageOverride -equals windows.vs2022.amd64
 
         # Public Windows Build Pool
         ${{ if and(or(eq(parameters.osGroup, 'windows'), eq(parameters.jobParameters.hostedOs, 'windows')), eq(variables['System.TeamProject'], 'public')) }}:
           name: $(DncEngPublicBuildPool)
-          demands: ImageOverride -equals windows.vs2022preview.amd64.open
+          demands: ImageOverride -equals windows.vs2022.amd64.open
 
     ${{ if eq(parameters.helixQueuesTemplate, '') }}:
       # macOS hosted pool machines are slower so we need to give a greater timeout than the 60 mins default.
diff --git a/eng/pipelines/coreclr/ci.yml b/eng/pipelines/coreclr/ci.yml
index 1b7b180a85f1..c07ebacbf30f 100644
--- a/eng/pipelines/coreclr/ci.yml
+++ b/eng/pipelines/coreclr/ci.yml
@@ -44,7 +44,7 @@ extends:
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: debug
           platforms:
           - linux_arm
@@ -56,29 +56,20 @@ extends:
           - osx_x64
           - windows_arm64
           jobParameters:
-            testGroup: outerloop
-
-      #
-      # Checked builds
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-job.yml
-          buildConfig: checked
-          platformGroup: all
-          platforms:
-          # It is too early to include osx_arm64 in platform group all
-          # Adding it here will enable it also
-          - osx_arm64
-          jobParameters:
-            testGroup: outerloop
+            buildArgs: -s clr -c $(_BuildConfig)
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: outerloop
 
       #
       # Release builds
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: release
           platforms:
           - linux_arm
@@ -88,23 +79,43 @@ extends:
           - osx_x64
           - windows_x86
           jobParameters:
-            testGroup: outerloop
+            buildArgs: -s clr -c $(_BuildConfig)
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: outerloop
 
       #
-      # Release library builds
+      # Checked builds
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/libraries/build-job.yml
-          buildConfig: Release
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: checked
           platformGroup: all
           platforms:
           # It is too early to include osx_arm64 in platform group all
           # Adding it here will enable it also
           - osx_arm64
           jobParameters:
-            isOfficialBuild: false
-            liveRuntimeBuildConfig: checked
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: outerloop
 
       #
       # Checked test builds
@@ -134,6 +145,7 @@ extends:
           jobParameters:
             testGroup: outerloop
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
 
       #
       # Checked R2R test runs
@@ -158,6 +170,7 @@ extends:
             readyToRun: true
             displayNameArgs: R2R_CG2
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
 
       #
       # Formatting
diff --git a/eng/pipelines/coreclr/clrinterpreter.yml b/eng/pipelines/coreclr/clrinterpreter.yml
index eac505a9f707..288e297c6f90 100644
--- a/eng/pipelines/coreclr/clrinterpreter.yml
+++ b/eng/pipelines/coreclr/clrinterpreter.yml
@@ -21,7 +21,7 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - linux_arm
@@ -32,7 +32,22 @@ extends:
           - windows_arm64
           - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: clrinterpreter
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release /p:CMakeArgs="-DFEATURE_INTERPRETER=1"
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: CoreCLRInterpreterBuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: clrinterpreter
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -59,3 +74,7 @@ extends:
           jobParameters:
             testGroup: clrinterpreter
             liveLibrariesBuildConfig: Release
+            dependsOn:
+              - build_$(osGroup)$(osSubgroup)_$(archType)_checked_
+              - coreclr_common_test_build_pri0_AnyOS_AnyCPU_checked
+            unifiedArtifactsName: CoreCLRInterpreterBuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/crossgen2-composite.yml b/eng/pipelines/coreclr/crossgen2-composite.yml
index 2830cdc47ded..0dbe78f5907f 100644
--- a/eng/pipelines/coreclr/crossgen2-composite.yml
+++ b/eng/pipelines/coreclr/crossgen2-composite.yml
@@ -20,7 +20,7 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - linux_arm
@@ -31,9 +31,23 @@ extends:
           - windows_x86
           - windows_x64
           - windows_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: innerloop
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: innerloop
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -64,3 +78,4 @@ extends:
             compositeBuildMode: true
             displayNameArgs: Composite
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/crossgen2-gcstress.yml b/eng/pipelines/coreclr/crossgen2-gcstress.yml
index a004e7904a82..a509018edc0b 100644
--- a/eng/pipelines/coreclr/crossgen2-gcstress.yml
+++ b/eng/pipelines/coreclr/crossgen2-gcstress.yml
@@ -20,7 +20,7 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - linux_x64
@@ -30,9 +30,23 @@ extends:
           # - osx_x64
           - windows_x64
           - windows_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: gcstress-extra
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: gcstress-extra
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -62,3 +76,4 @@ extends:
             compositeBuildMode: true
             displayNameArgs: Composite
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/crossgen2-outerloop.yml b/eng/pipelines/coreclr/crossgen2-outerloop.yml
index 978143090b42..833513ea532e 100644
--- a/eng/pipelines/coreclr/crossgen2-outerloop.yml
+++ b/eng/pipelines/coreclr/crossgen2-outerloop.yml
@@ -20,41 +20,10 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - linux_arm
-          - linux_x64
-          - linux_arm64
-          - osx_arm64
-          - osx_x64
-          - windows_x86
-          - windows_x64
-          - windows_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: outerloop
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-job.yml
-          buildConfig: Release
-          platforms:
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - osx_arm64
-          - windows_x86
-          - windows_x64
-          jobParameters:
-            testGroup: outerloop
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/libraries/build-job.yml
-          buildConfig: Release
-          platforms:
-          - linux_arm
           - linux_arm64
           - linux_x64
           - osx_arm64
@@ -62,10 +31,23 @@ extends:
           - windows_x86
           - windows_x64
           - windows_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            isOfficialBuild: false
-            liveRuntimeBuildConfig: Release
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: Checked_CoreCLR_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: outerloop
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -95,6 +77,7 @@ extends:
             compositeBuildMode: true
             displayNameArgs: R2R_Composite
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: Checked_CoreCLR_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
 
       # Outerloop testing in non-composite mode
       - template: /eng/pipelines/common/platform-matrix.yml
@@ -116,6 +99,34 @@ extends:
             readyToRun: true
             displayNameArgs: R2R
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: Checked_CoreCLR_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+
+      # Build release CoreCLR for Crossgen2 baseline generation
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: release
+          platforms:
+          - linux_arm
+          - linux_arm64
+          - linux_x64
+          - osx_arm64
+          - osx_x64
+          - windows_x86
+          - windows_x64
+          - windows_arm64
+          jobParameters:
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: Release_CoreCLR_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
 
       # Build Crossgen2 baselines
       # These are the various crossgen2 targets that are supported, and cover all major
diff --git a/eng/pipelines/coreclr/crossgen2.yml b/eng/pipelines/coreclr/crossgen2.yml
index 53ea5878d32c..a52fbae3a091 100644
--- a/eng/pipelines/coreclr/crossgen2.yml
+++ b/eng/pipelines/coreclr/crossgen2.yml
@@ -20,7 +20,7 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - linux_x64
@@ -29,9 +29,23 @@ extends:
           - osx_x64
           - windows_x64
           - windows_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: innerloop
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: innerloop
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -59,6 +73,7 @@ extends:
             readyToRun: true
             displayNameArgs: R2R_CG2
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -100,3 +115,4 @@ extends:
             hotColdSplitting: true
             displayNameArgs: R2R_CG2_HotColdSplitting
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/exploratory.yml b/eng/pipelines/coreclr/exploratory.yml
index af76dec9da24..1c0b6a7680bd 100644
--- a/eng/pipelines/coreclr/exploratory.yml
+++ b/eng/pipelines/coreclr/exploratory.yml
@@ -21,27 +21,11 @@ extends:
     stages:
     - stage: Build
       jobs:
-
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          # Linux tests are built on the OSX machines.
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - windows_arm64
-          - osx_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: outerloop
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/jit-exploratory-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          helixQueuesGroup: ci
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           buildConfig: checked
           platforms:
           # Linux tests are built on the OSX machines.
@@ -52,9 +36,17 @@ extends:
           - windows_x86
           - windows_arm64
           - osx_arm64
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           jobParameters:
-            testGroup: outerloop
-            liveLibrariesBuildConfig: Release
-            toolName: ${{ variables.toolName }}
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            timeoutInMinutes: 360
+            postBuildSteps:
+              - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(_BuildConfig) $(archType) generatelayoutonly
+                displayName: Create Core_Root
+                condition: succeeded()
+              - template: /eng/pipelines/coreclr/templates/jit-exploratory-steps.yml
+                parameters:
+                  toolName: ${{ variables.toolName }}
+            extraVariablesTemplates:
+              - template: /eng/pipelines/coreclr/templates/jit-exploratory-variables.yml
+                parameters:
+                  toolName: ${{ variables.toolName }}
diff --git a/eng/pipelines/coreclr/gc-longrunning.yml b/eng/pipelines/coreclr/gc-longrunning.yml
index c58e6bd0ab5b..0dbd66cd9ea6 100644
--- a/eng/pipelines/coreclr/gc-longrunning.yml
+++ b/eng/pipelines/coreclr/gc-longrunning.yml
@@ -20,7 +20,7 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: release
           platforms:
           - linux_x64
@@ -28,9 +28,23 @@ extends:
           - windows_x64
           - windows_arm64
           - osx_x64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: gc-longrunning
+            buildArgs: -s clr+libs -c $(_BuildConfig)
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: gc-longrunning
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -55,3 +69,4 @@ extends:
           jobParameters:
             testGroup: gc-longrunning
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/gc-simulator.yml b/eng/pipelines/coreclr/gc-simulator.yml
index fd4b700a7053..a0bda3d46210 100644
--- a/eng/pipelines/coreclr/gc-simulator.yml
+++ b/eng/pipelines/coreclr/gc-simulator.yml
@@ -20,7 +20,7 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: release
           platforms:
           # disable Linux x64 for now until OOMs are resolved.
@@ -29,9 +29,23 @@ extends:
           - windows_x64
           - windows_arm64
           - osx_x64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: gc-simulator
+            buildArgs: -s clr+libs -c $(_BuildConfig)
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: gc-simulator
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -57,3 +71,4 @@ extends:
           jobParameters:
             testGroup: gc-simulator
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/gc-standalone.yml b/eng/pipelines/coreclr/gc-standalone.yml
index 7ebf86566f3b..6089ac89178e 100644
--- a/eng/pipelines/coreclr/gc-standalone.yml
+++ b/eng/pipelines/coreclr/gc-standalone.yml
@@ -20,15 +20,30 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - linux_arm64
+          - linux_x64
           - windows_arm64
           - windows_x64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: gc-standalone
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: gc-standalone
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -54,6 +69,7 @@ extends:
             testGroup: gc-standalone
             displayNameArgs: GCStandAlone
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -70,3 +86,4 @@ extends:
             testGroup: gc-standalone-server
             displayNameArgs: GCStandAloneServer
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/gcstress-extra.yml b/eng/pipelines/coreclr/gcstress-extra.yml
index 61e5f3651f08..d981efa133d1 100644
--- a/eng/pipelines/coreclr/gcstress-extra.yml
+++ b/eng/pipelines/coreclr/gcstress-extra.yml
@@ -20,16 +20,30 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platformGroup: gcstress
           platforms:
           # It is too early to include osx_arm64 in platform group gcstress
           # Adding it here will enable it also
           - osx_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: gcstress-extra
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: gcstress-extra
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -54,3 +68,4 @@ extends:
           jobParameters:
             testGroup: gcstress-extra
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/gcstress0x3-gcstress0xc.yml b/eng/pipelines/coreclr/gcstress0x3-gcstress0xc.yml
index 2e501b2e18ab..5376898b7bf3 100644
--- a/eng/pipelines/coreclr/gcstress0x3-gcstress0xc.yml
+++ b/eng/pipelines/coreclr/gcstress0x3-gcstress0xc.yml
@@ -20,16 +20,30 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platformGroup: gcstress
           platforms:
           # It is too early to include osx_arm64 in platform group gcstress
           # Adding it here will enable it also
           - osx_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: gcstress0x3-gcstress0xc
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: gcstress0x3-gcstress0xc
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -54,3 +68,4 @@ extends:
           jobParameters:
             testGroup: gcstress0x3-gcstress0xc
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/ilasm.yml b/eng/pipelines/coreclr/ilasm.yml
index c57709a37d1b..474772728439 100644
--- a/eng/pipelines/coreclr/ilasm.yml
+++ b/eng/pipelines/coreclr/ilasm.yml
@@ -19,55 +19,16 @@ schedules:
 
 variables:
   - template: /eng/pipelines/common/variables.yml
-
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          - osx_x64
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - windows_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: ilasm
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: ilasm
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          - osx_x64
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - windows_arm64
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: ilasm
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - osx_arm64
+    - osx_x64
+    - linux_arm
+    - linux_arm64
+    - linux_x64
+    - windows_x64
+    - windows_x86
+    - windows_arm64
+    testGroup: ilasm
diff --git a/eng/pipelines/coreclr/jit-cfg.yml b/eng/pipelines/coreclr/jit-cfg.yml
index 87ae66fbe432..80fe44da119d 100644
--- a/eng/pipelines/coreclr/jit-cfg.yml
+++ b/eng/pipelines/coreclr/jit-cfg.yml
@@ -10,47 +10,12 @@ schedules:
 
 variables:
   - template: /eng/pipelines/common/variables.yml
-
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_arm64
-          - linux_x64
-          - windows_arm64
-          - windows_x64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jit-cfg
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jit-cfg
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_arm64
-          - linux_x64
-          - windows_arm64
-          - windows_x64
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: jit-cfg
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - linux_arm64
+    - linux_x64
+    - windows_x64
+    - windows_arm64
+    testGroup: jit-cfg
\ No newline at end of file
diff --git a/eng/pipelines/coreclr/jit-experimental.yml b/eng/pipelines/coreclr/jit-experimental.yml
index a599f343e310..56453d894796 100644
--- a/eng/pipelines/coreclr/jit-experimental.yml
+++ b/eng/pipelines/coreclr/jit-experimental.yml
@@ -12,49 +12,13 @@ variables:
   - template: /eng/pipelines/common/variables.yml
 
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          - osx_x64
-          - linux_arm64
-          - linux_x64
-          - windows_arm64
-          - windows_x64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jit-experimental
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jit-experimental
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          - osx_x64
-          - linux_arm64
-          - linux_x64
-          - windows_arm64
-          - windows_x64
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: jit-experimental
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - osx_arm64
+    - osx_x64
+    - linux_arm64
+    - linux_x64
+    - windows_x64
+    - windows_arm64
+    testGroup: jit-experimental
\ No newline at end of file
diff --git a/eng/pipelines/coreclr/jitrollingbuild.yml b/eng/pipelines/coreclr/jitrollingbuild.yml
index b2d2b86f2a1c..c99557f4c3cc 100644
--- a/eng/pipelines/coreclr/jitrollingbuild.yml
+++ b/eng/pipelines/coreclr/jitrollingbuild.yml
@@ -22,10 +22,9 @@ extends:
     stages:
     - stage: Build
       jobs:
-
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-jit-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - osx_arm64
@@ -38,10 +37,15 @@ extends:
           - windows_arm64
           jobParameters:
             disableComponentGovernance: true # Not a shipping artifact
+            buildArgs: -s clr.alljits+clr.spmi -c $(_BuildConfig)
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/upload-jits-steps.yml
+            extraVariablesTemplates:
+              - template: /eng/pipelines/coreclr/templates/jit-python-variables.yml
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-jit-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: release
           platforms:
           - osx_arm64
@@ -54,3 +58,8 @@ extends:
           - windows_arm64
           jobParameters:
             disableComponentGovernance: true # Not a shipping artifact
+            buildArgs: -s clr.alljits+clr.spmi -c $(_BuildConfig) /p:NoPgoOptimize=true
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/upload-jits-steps.yml
+            extraVariablesTemplates:
+              - template: /eng/pipelines/coreclr/templates/jit-python-variables.yml
diff --git a/eng/pipelines/coreclr/jitstress-isas-arm.yml b/eng/pipelines/coreclr/jitstress-isas-arm.yml
index 7e178b4f4259..0733c08ab8b4 100644
--- a/eng/pipelines/coreclr/jitstress-isas-arm.yml
+++ b/eng/pipelines/coreclr/jitstress-isas-arm.yml
@@ -12,43 +12,10 @@ variables:
   - template: /eng/pipelines/common/variables.yml
 
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_arm64
-          - osx_arm64
-          - windows_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstress-isas-arm
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstress-isas-arm
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_arm64
-          - osx_arm64
-          - windows_arm64
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: jitstress-isas-arm
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - osx_arm64
+    - linux_arm64
+    - windows_arm64
+    testGroup: jitstress-isas-arm
diff --git a/eng/pipelines/coreclr/jitstress-isas-avx512.yml b/eng/pipelines/coreclr/jitstress-isas-avx512.yml
index 4074c2b91358..87b33c847952 100644
--- a/eng/pipelines/coreclr/jitstress-isas-avx512.yml
+++ b/eng/pipelines/coreclr/jitstress-isas-avx512.yml
@@ -27,43 +27,10 @@ variables:
   - template: /eng/pipelines/common/variables.yml
 
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstress-isas-avx512
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstress-isas-avx512
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: jitstress-isas-avx512
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - linux_x64
+    - windows_x64
+    - windows_x86
+    testGroup: jitstress-isas-avx512
diff --git a/eng/pipelines/coreclr/jitstress-isas-x86.yml b/eng/pipelines/coreclr/jitstress-isas-x86.yml
index b5a1b9764fc5..0951fd572772 100644
--- a/eng/pipelines/coreclr/jitstress-isas-x86.yml
+++ b/eng/pipelines/coreclr/jitstress-isas-x86.yml
@@ -12,45 +12,11 @@ variables:
   - template: /eng/pipelines/common/variables.yml
 
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_x64
-          - osx_x64
-          - windows_x64
-          - windows_x86
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstress-isas-x86
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstress-isas-x86
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_x64
-          - osx_x64
-          - windows_x64
-          - windows_x86
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: jitstress-isas-x86
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - osx_x64
+    - linux_x64
+    - windows_x64
+    - windows_x86
+    testGroup: jitstress-isas-x86
diff --git a/eng/pipelines/coreclr/jitstress-random.yml b/eng/pipelines/coreclr/jitstress-random.yml
index f9437db5ea5c..ffbda2acb480 100644
--- a/eng/pipelines/coreclr/jitstress-random.yml
+++ b/eng/pipelines/coreclr/jitstress-random.yml
@@ -12,53 +12,15 @@ variables:
   - template: /eng/pipelines/common/variables.yml
 
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          - osx_x64
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - windows_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstress-random
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstress-random
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          - osx_x64
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - windows_arm64
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: jitstress-random
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - osx_arm64
+    - osx_x64
+    - linux_arm
+    - linux_arm64
+    - linux_x64
+    - windows_x64
+    - windows_x86
+    - windows_arm64
+    testGroup: jitstress-random
diff --git a/eng/pipelines/coreclr/jitstress.yml b/eng/pipelines/coreclr/jitstress.yml
index 71263872c092..0e9e0d052dfe 100644
--- a/eng/pipelines/coreclr/jitstress.yml
+++ b/eng/pipelines/coreclr/jitstress.yml
@@ -12,53 +12,15 @@ variables:
   - template: /eng/pipelines/common/variables.yml
 
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          - osx_x64
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - windows_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstress
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstress
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          - osx_x64
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - windows_arm64
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: jitstress
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - osx_arm64
+    - osx_x64
+    - linux_arm
+    - linux_arm64
+    - linux_x64
+    - windows_x64
+    - windows_x86
+    - windows_arm64
+    testGroup: jitstress
diff --git a/eng/pipelines/coreclr/jitstress2-jitstressregs.yml b/eng/pipelines/coreclr/jitstress2-jitstressregs.yml
index ee672273e564..0e2c961e4def 100644
--- a/eng/pipelines/coreclr/jitstress2-jitstressregs.yml
+++ b/eng/pipelines/coreclr/jitstress2-jitstressregs.yml
@@ -12,53 +12,15 @@ variables:
   - template: /eng/pipelines/common/variables.yml
 
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          - osx_x64
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - windows_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstress2-jitstressregs
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: checked
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          - osx_x64
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - windows_arm64
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: jitstress2-jitstressregs
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - osx_arm64
+    - osx_x64
+    - linux_arm
+    - linux_arm64
+    - linux_x64
+    - windows_x64
+    - windows_x86
+    - windows_arm64
+    testGroup: jitstress2-jitstressregs
diff --git a/eng/pipelines/coreclr/jitstressregs-x86.yml b/eng/pipelines/coreclr/jitstressregs-x86.yml
index 8b8009e63cf5..63ba958217f4 100644
--- a/eng/pipelines/coreclr/jitstressregs-x86.yml
+++ b/eng/pipelines/coreclr/jitstressregs-x86.yml
@@ -12,43 +12,10 @@ variables:
   - template: /eng/pipelines/common/variables.yml
 
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstressregs-x86
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstressregs-x86
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: jitstressregs-x86
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - linux_x64
+    - windows_x64
+    - windows_x86
+    testGroup: jitstressregs-x86
diff --git a/eng/pipelines/coreclr/jitstressregs.yml b/eng/pipelines/coreclr/jitstressregs.yml
index d947e48efa2e..25ab3ead6ba4 100644
--- a/eng/pipelines/coreclr/jitstressregs.yml
+++ b/eng/pipelines/coreclr/jitstressregs.yml
@@ -12,53 +12,15 @@ variables:
   - template: /eng/pipelines/common/variables.yml
 
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          - osx_x64
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - windows_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstressregs
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: jitstressregs
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          - osx_x64
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_x64
-          - windows_x86
-          - windows_arm64
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: jitstressregs
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - osx_arm64
+    - osx_x64
+    - linux_arm
+    - linux_arm64
+    - linux_x64
+    - windows_x64
+    - windows_x86
+    - windows_arm64
+    testGroup: jitstressregs
\ No newline at end of file
diff --git a/eng/pipelines/coreclr/libraries-gcstress-extra.yml b/eng/pipelines/coreclr/libraries-gcstress-extra.yml
index c87a99b56c1d..433f00e859e6 100644
--- a/eng/pipelines/coreclr/libraries-gcstress-extra.yml
+++ b/eng/pipelines/coreclr/libraries-gcstress-extra.yml
@@ -19,35 +19,27 @@ extends:
     - stage: Build
       jobs:
 
-      #
-      # Build CoreCLR checked and libraries Release
-      #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platformGroup: gcstress
-          jobParameters:
-            # libraries test build platforms
-            testBuildPlatforms:
-            - linux_x64
-            - windows_x64
-
-      #
-      # Libraries Test Run using Release libraries, Checked CoreCLR, and stress modes
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/libraries/run-test-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: Release
-          platformGroup: gcstress
           helixQueueGroup: libraries
           helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+          platformGroup: gcstress
           jobParameters:
             # Default timeout is 150 minutes (2.5 hours), which is not enough for stress.
-            timeoutInMinutes: 600
-            testScope: innerloop
-            liveRuntimeBuildConfig: checked
-            dependsOnTestBuildConfiguration: Release
-            dependsOnTestArchitecture: x64
-            coreclrTestGroup: gcstress-extra
+            timeoutInMinutes: 660
+            buildArgs: -s clr+libs+libs.tests -rc Checked -c $(_BuildConfig) /p:ArchiveTests=true
+            postBuildSteps:
+              - template: /eng/pipelines/libraries/helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  extraHelixArguments: /maxcpucount:10
+                  scenarios:
+                    - heapverify1
+                    - gcstress0xc_disabler2r
+                    - gcstress0xc_disabler2r_jitstress2
+                    - gcstress0xc_disabler2r_heapverify1
+                    - gcstress0xc_jitstress1
+                    - gcstress0xc_jitstress2
+                    - gcstress0xc_jitminopts_heapverify1
diff --git a/eng/pipelines/coreclr/libraries-gcstress0x3-gcstress0xc.yml b/eng/pipelines/coreclr/libraries-gcstress0x3-gcstress0xc.yml
index 1acc98df2392..e98ce3fa581f 100644
--- a/eng/pipelines/coreclr/libraries-gcstress0x3-gcstress0xc.yml
+++ b/eng/pipelines/coreclr/libraries-gcstress0x3-gcstress0xc.yml
@@ -12,6 +12,12 @@ trigger: none
 variables:
   - template: /eng/pipelines/common/variables.yml
 
+extends:
+  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  parameters:
+    stages:
+    - stage: Build
+      jobs:
 extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
@@ -19,35 +25,24 @@ extends:
     - stage: Build
       jobs:
 
-      #
-      # Build CoreCLR checked and libraries Release
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platformGroup: gcstress
-          jobParameters:
-            # libraries test build platforms
-            testBuildPlatforms:
-            - linux_x64
-            - windows_x64
-
-      #
-      # Libraries Test Run using Release libraries, Checked CoreCLR, and stress modes
-      #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/libraries/run-test-job.yml
-          buildConfig: Release
-          platformGroup: gcstress
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: release
           helixQueueGroup: libraries
           helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+          platformGroup: gcstress
           jobParameters:
             # Default timeout is 150 minutes (2.5 hours), which is not enough for stress.
-            timeoutInMinutes: 600
-            testScope: innerloop
-            liveRuntimeBuildConfig: checked
-            dependsOnTestBuildConfiguration: Release
-            dependsOnTestArchitecture: x64
-            coreclrTestGroup: gcstress0x3-gcstress0xc
+            timeoutInMinutes: 660
+            buildArgs: -s clr+libs+libs.tests -rc Checked -c $(_BuildConfig) /p:ArchiveTests=true
+            postBuildSteps:
+              - template: /eng/pipelines/libraries/helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  extraHelixArguments: /maxcpucount:10
+                  scenarios:
+                    # Disable gcstress0x3 for now; it causes lots of test timeouts. Investigate this after
+                    # gcstress0xc runs are clean. Tracking issue: https://github.com/dotnet/runtime/issues/38903.
+                    # - gcstress0x3
+                    - gcstress0xc
diff --git a/eng/pipelines/coreclr/libraries-jitstress-random.yml b/eng/pipelines/coreclr/libraries-jitstress-random.yml
index eba6fb360f94..3514f4d624a9 100644
--- a/eng/pipelines/coreclr/libraries-jitstress-random.yml
+++ b/eng/pipelines/coreclr/libraries-jitstress-random.yml
@@ -18,13 +18,12 @@ extends:
     - stage: Build
       jobs:
 
-      #
-      # Build CoreCLR checked and libraries Release
-      #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: Release
+          helixQueueGroup: libraries
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           platforms:
           - linux_x64
           - linux_arm
@@ -32,33 +31,15 @@ extends:
           - windows_x86
           - windows_x64
           - windows_arm64
-          jobParameters:
-            # libraries test build platforms
-            testBuildPlatforms:
-            - linux_x64
-            - windows_x64
-
-      #
-      # Libraries Test Run using Release libraries, Checked CoreCLR, and stress modes
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/libraries/run-test-job.yml
-          buildConfig: Release
-          platforms:
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_arm64
-          - windows_x64
-          - windows_x86
-          helixQueueGroup: libraries
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           jobParameters:
             # Default timeout is 150 minutes (2.5 hours), which is not enough for stress.
-            timeoutInMinutes: 300
-            testScope: innerloop
-            liveRuntimeBuildConfig: checked
-            dependsOnTestBuildConfiguration: Release
-            dependsOnTestArchitecture: x64
-            coreclrTestGroup: jitstress-random
+            timeoutInMinutes: 360
+            buildArgs: -s clr+libs+libs.tests -rc Checked -c $(_BuildConfig) /p:ArchiveTests=true
+            postBuildSteps:
+              - template: /eng/pipelines/libraries/helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  extraHelixArguments: /maxcpucount:10
+                  scenarios:
+                    - jitstress_random_1
+                    - jitstress_random_2
diff --git a/eng/pipelines/coreclr/libraries-jitstress.yml b/eng/pipelines/coreclr/libraries-jitstress.yml
index 4efcd5937723..1ac2c5b34dc0 100644
--- a/eng/pipelines/coreclr/libraries-jitstress.yml
+++ b/eng/pipelines/coreclr/libraries-jitstress.yml
@@ -18,13 +18,12 @@ extends:
     - stage: Build
       jobs:
 
-      #
-      # Build CoreCLR checked and libraries Release
-      #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: Release
+          helixQueueGroup: libraries
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           platforms:
           - linux_x64
           - linux_arm
@@ -32,33 +31,21 @@ extends:
           - windows_x86
           - windows_x64
           - windows_arm64
-          jobParameters:
-            # libraries test build platforms
-            testBuildPlatforms:
-            - linux_x64
-            - windows_x64
-
-      #
-      # Libraries Test Run using Release libraries, Checked CoreCLR, and stress modes
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/libraries/run-test-job.yml
-          buildConfig: Release
-          platforms:
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_arm64
-          - windows_x64
-          - windows_x86
-          helixQueueGroup: libraries
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           jobParameters:
             # Default timeout is 150 minutes (2.5 hours), which is not enough for stress.
-            timeoutInMinutes: 300
-            testScope: innerloop
-            liveRuntimeBuildConfig: checked
-            dependsOnTestBuildConfiguration: Release
-            dependsOnTestArchitecture: x64
-            coreclrTestGroup: jitstress
+            timeoutInMinutes: 360
+            buildArgs: -s clr+libs+libs.tests -rc Checked -c $(_BuildConfig) /p:ArchiveTests=true
+            postBuildSteps:
+              - template: /eng/pipelines/libraries/helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  extraHelixArguments: /maxcpucount:10
+                  scenarios:
+                    - no_tiered_compilation
+                    - jitminopts
+                    - jitstress1
+                    - jitstress1_tiered
+                    - jitstress2
+                    - jitstress2_tiered
+                    - disabler2r
+                    - tailcallstress
diff --git a/eng/pipelines/coreclr/libraries-jitstress2-jitstressregs.yml b/eng/pipelines/coreclr/libraries-jitstress2-jitstressregs.yml
index 2a80b2757be9..83d8e131c6d7 100644
--- a/eng/pipelines/coreclr/libraries-jitstress2-jitstressregs.yml
+++ b/eng/pipelines/coreclr/libraries-jitstress2-jitstressregs.yml
@@ -18,13 +18,12 @@ extends:
     - stage: Build
       jobs:
 
-      #
-      # Build CoreCLR checked and libraries Release
-      #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: Release
+          helixQueueGroup: libraries
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           platforms:
           - linux_x64
           - linux_arm
@@ -32,33 +31,22 @@ extends:
           - windows_x86
           - windows_x64
           - windows_arm64
-          jobParameters:
-            # libraries test build platforms
-            testBuildPlatforms:
-            - linux_x64
-            - windows_x64
-
-      #
-      # Libraries Test Run using Release libraries, Checked CoreCLR, and stress modes
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/libraries/run-test-job.yml
-          buildConfig: Release
-          platforms:
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_arm64
-          - windows_x64
-          - windows_x86
-          helixQueueGroup: libraries
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           jobParameters:
             # Default timeout is 150 minutes (2.5 hours), which is not enough for stress.
-            timeoutInMinutes: 300
-            testScope: innerloop
-            liveRuntimeBuildConfig: checked
-            dependsOnTestBuildConfiguration: Release
-            dependsOnTestArchitecture: x64
-            coreclrTestGroup: jitstress2-jitstressregs
+            timeoutInMinutes: 360
+            buildArgs: -s clr+libs+libs.tests -rc Checked -c $(_BuildConfig) /p:ArchiveTests=true
+            postBuildSteps:
+              - template: /eng/pipelines/libraries/helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  extraHelixArguments: /maxcpucount:10
+                  scenarios:
+                    - jitstress2_jitstressregs1
+                    - jitstress2_jitstressregs2
+                    - jitstress2_jitstressregs3
+                    - jitstress2_jitstressregs4
+                    - jitstress2_jitstressregs8
+                    - jitstress2_jitstressregs0x10
+                    - jitstress2_jitstressregs0x80
+                    - jitstress2_jitstressregs0x1000
+                    - jitstress2_jitstressregs0x2000
\ No newline at end of file
diff --git a/eng/pipelines/coreclr/libraries-jitstressregs.yml b/eng/pipelines/coreclr/libraries-jitstressregs.yml
index 57ab5bae5326..48c1aa93d08c 100644
--- a/eng/pipelines/coreclr/libraries-jitstressregs.yml
+++ b/eng/pipelines/coreclr/libraries-jitstressregs.yml
@@ -18,13 +18,12 @@ extends:
     - stage: Build
       jobs:
 
-      #
-      # Build CoreCLR checked and libraries Release
-      #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: Release
+          helixQueueGroup: libraries
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           platforms:
           - linux_x64
           - linux_arm
@@ -32,33 +31,22 @@ extends:
           - windows_x86
           - windows_x64
           - windows_arm64
-          jobParameters:
-            # libraries test build platforms
-            testBuildPlatforms:
-            - linux_x64
-            - windows_x64
-
-      #
-      # Libraries Test Run using Release libraries, Checked CoreCLR, and stress modes
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/libraries/run-test-job.yml
-          buildConfig: Release
-          platforms:
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - windows_arm64
-          - windows_x64
-          - windows_x86
-          helixQueueGroup: libraries
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           jobParameters:
             # Default timeout is 150 minutes (2.5 hours), which is not enough for stress.
-            timeoutInMinutes: 300
-            testScope: innerloop
-            liveRuntimeBuildConfig: checked
-            dependsOnTestBuildConfiguration: Release
-            dependsOnTestArchitecture: x64
-            coreclrTestGroup: jitstressregs
+            timeoutInMinutes: 360
+            buildArgs: -s clr+libs+libs.tests -rc Checked -c $(_BuildConfig) /p:ArchiveTests=true
+            postBuildSteps:
+              - template: /eng/pipelines/libraries/helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  extraHelixArguments: /maxcpucount:10
+                  scenarios:
+                    - jitstressregs1
+                    - jitstressregs2
+                    - jitstressregs3
+                    - jitstressregs4
+                    - jitstressregs8
+                    - jitstressregs0x10
+                    - jitstressregs0x80
+                    - jitstressregs0x1000
+                    - jitstressregs0x2000
diff --git a/eng/pipelines/coreclr/libraries-pgo.yml b/eng/pipelines/coreclr/libraries-pgo.yml
index 46643af71d2a..a8f0e16b01f3 100644
--- a/eng/pipelines/coreclr/libraries-pgo.yml
+++ b/eng/pipelines/coreclr/libraries-pgo.yml
@@ -18,46 +18,56 @@ extends:
     - stage: Build
       jobs:
 
-      #
-      # Build CoreCLR checked and libraries Release
-      #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: Release
+          helixQueueGroup: libraries
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           platforms:
-          - linux_x64
-          - linux_arm
-          - linux_arm64
-          - windows_x86
-          - windows_x64
           - windows_arm64
           jobParameters:
-            # libraries test build platforms
-            testBuildPlatforms:
-            - linux_x64
-            - windows_x64
+            # Default timeout is 150 minutes (2.5 hours), which is not enough for stress.
+            timeoutInMinutes: 660
+            buildArgs: -s clr+libs+libs.tests -rc Checked -c $(_BuildConfig) /p:ArchiveTests=true
+            postBuildSteps:
+              - template: /eng/pipelines/libraries/helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  extraHelixArguments: /maxcpucount:10
+                  scenarios:
+                    - defaultpgo
 
-      #
-      # Libraries Test Run using Release libraries, Checked CoreCLR, and stress modes
-      #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/libraries/run-test-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: Release
+          helixQueueGroup: libraries
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           platforms:
+          - linux_x64
           - linux_arm
           - linux_arm64
-          - linux_x64
-          - windows_arm64
-          - windows_x64
           - windows_x86
-          helixQueueGroup: libraries
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+          - windows_x64
           jobParameters:
-            timeoutInMinutes: 600
-            testScope: innerloop
-            liveRuntimeBuildConfig: checked
-            dependsOnTestBuildConfiguration: Release
-            dependsOnTestArchitecture: x64
-            coreclrTestGroup: pgo
+            # Default timeout is 150 minutes (2.5 hours), which is not enough for stress.
+            timeoutInMinutes: 660
+            buildArgs: -s clr+libs+libs.tests -rc Checked -c $(_BuildConfig) /p:ArchiveTests=true
+            postBuildSteps:
+              - template: /eng/pipelines/libraries/helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  extraHelixArguments: /maxcpucount:10
+                  scenarios:
+                    - defaultpgo
+                    - fullpgo
+                    - fullpgo_methodprofiling
+                    - fullpgo_random_gdv
+                    - fullpgo_random_gdv_methodprofiling_only
+                    - fullpgo_random_gdv_edge
+                    - jitosr_stress
+                    - jitosr_stress_random
+                    - syntheticpgo
+                    - syntheticpgo_blend
+                    - jitrlcse
diff --git a/eng/pipelines/coreclr/perf-non-wasm-jobs.yml b/eng/pipelines/coreclr/perf-non-wasm-jobs.yml
index 79dce1867bf6..c48103af929c 100644
--- a/eng/pipelines/coreclr/perf-non-wasm-jobs.yml
+++ b/eng/pipelines/coreclr/perf-non-wasm-jobs.yml
@@ -5,7 +5,7 @@ jobs:
   # build coreclr and libraries
   - template: /eng/pipelines/common/platform-matrix.yml
     parameters:
-      jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+      jobTemplate: /eng/pipelines/common/global-build-job.yml
       buildConfig: release
       platforms:
       - linux_x64
@@ -13,7 +13,19 @@ jobs:
       - windows_x86
       - linux_musl_x64
       jobParameters:
-        testGroup: perf
+        nameSuffix: coreclr
+        buildArgs: -s clr+libs+host+packs -c $(_BuildConfig)
+        isOfficialBuild: false
+        postBuildSteps:
+        - template: /eng/pipelines/common/upload-artifact-step.yml
+          parameters:
+            rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+            includeRootFolder: false
+            archiveType: $(archiveType)
+            archiveExtension: $(archiveExtension)
+            tarCompression: $(tarCompression)
+            artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)_coreclr
+            displayName: Build Assets
 
   # build mono for AOT
   - template: /eng/pipelines/common/platform-matrix.yml
@@ -64,11 +76,25 @@ jobs:
   # build mono
   - template: /eng/pipelines/common/platform-matrix.yml
     parameters:
-      jobTemplate: /eng/pipelines/mono/templates/build-job.yml
-      runtimeFlavor: mono
+      jobTemplate: /eng/pipelines/common/global-build-job.yml
       buildConfig: release
+      runtimeFlavor: mono
       platforms:
       - linux_x64
+      jobParameters:
+        nameSuffix: mono
+        buildArgs: -s mono+clr.iltools+clr.hosts+libs+host+packs -c $(_BuildConfig)
+        isOfficialBuild: false
+        postBuildSteps:
+        - template: /eng/pipelines/common/upload-artifact-step.yml
+          parameters:
+            rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+            includeRootFolder: false
+            archiveType: $(archiveType)
+            archiveExtension: $(archiveExtension)
+            tarCompression: $(tarCompression)
+            artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)_mono
+            displayName: Build Assets
 
   # Build and run iOS Mono and NativeAOT scenarios
   - template: /eng/pipelines/coreclr/templates/build-and-run-perf-ios-scenarios.yml
@@ -263,6 +289,23 @@ jobs:
         runJobTemplate: /eng/pipelines/coreclr/templates/run-performance-job.yml
         logicalmachine: 'perfowl'
 
+  # run coreclr perfviper microbenchmarks perf job
+  - template: /eng/pipelines/common/platform-matrix.yml
+    parameters:
+      jobTemplate: /eng/pipelines/coreclr/templates/perf-job.yml
+      buildConfig: release
+      runtimeFlavor: coreclr
+      platforms:
+      - linux_x64
+      - windows_x64
+      jobParameters:
+        testGroup: perf
+        liveLibrariesBuildConfig: Release
+        projectFile: microbenchmarks.proj
+        runKind: micro
+        runJobTemplate: /eng/pipelines/coreclr/templates/run-performance-job.yml
+        logicalmachine: 'perfviper'
+
   # run coreclr perfowl microbenchmarks perf gdv3 jobs
   - template: /eng/pipelines/common/platform-matrix.yml
     parameters:
@@ -281,6 +324,42 @@ jobs:
         logicalmachine: 'perfowl'
         experimentName: 'gdv3'
 
+  # run coreclr perfowl microbenchmarks perf rlcse jobs
+  - template: /eng/pipelines/common/platform-matrix.yml
+    parameters:
+      jobTemplate: /eng/pipelines/coreclr/templates/perf-job.yml
+      buildConfig: release
+      runtimeFlavor: coreclr
+      platforms:
+      - linux_x64
+      - windows_x64
+      jobParameters:
+        testGroup: perf
+        liveLibrariesBuildConfig: Release
+        projectFile: microbenchmarks.proj
+        runKind: micro
+        runJobTemplate: /eng/pipelines/coreclr/templates/run-performance-job.yml
+        logicalmachine: 'perfowl'
+        experimentName: 'rlcse'
+
+  # run coreclr perfowl microbenchmarks perf jitoptrepeat jobs
+  - template: /eng/pipelines/common/platform-matrix.yml
+    parameters:
+      jobTemplate: /eng/pipelines/coreclr/templates/perf-job.yml
+      buildConfig: release
+      runtimeFlavor: coreclr
+      platforms:
+      - linux_x64
+      - windows_x64
+      jobParameters:
+        testGroup: perf
+        liveLibrariesBuildConfig: Release
+        projectFile: microbenchmarks.proj
+        runKind: micro
+        runJobTemplate: /eng/pipelines/coreclr/templates/run-performance-job.yml
+        logicalmachine: 'perfowl'
+        experimentName: 'jitoptrepeat'
+
   # run coreclr crossgen perf job
   - template: /eng/pipelines/common/platform-matrix.yml
     parameters:
@@ -315,31 +394,31 @@ jobs:
           - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml
             parameters:
               name: MonoRuntimePacks
+              isOfficialBuild: false
 
-  # Disabled due to: https://github.com/dotnet/performance/issues/3655
-  # # build PerfBDN app
-  # - template: /eng/pipelines/common/platform-matrix.yml
-  #   parameters:
-  #     jobTemplate: /eng/pipelines/common/global-build-job.yml
-  #     buildConfig: release
-  #     runtimeFlavor: mono
-  #     platforms:
-  #     - ios_arm64
-  #     jobParameters:
-  #       dependsOn:
-  #        - Build_android_arm64_release_Mono_Packs
-  #       buildArgs: -s mono -c $(_BuildConfig)
-  #       nameSuffix: PerfBDNApp
-  #       isOfficialBuild: false
-  #       pool:
-  #         vmImage: 'macos-12'
-  #       postBuildSteps:
-  #         - template: /eng/pipelines/coreclr/templates/build-perf-bdn-app.yml
-  #           parameters:
-  #             rootFolder: '$(Build.SourcesDirectory)/artifacts/'
-  #             includeRootFolder: true
-  #             displayName: Android BDN App Artifacts
-  #             artifactName: PerfBDNAppArm
-  #             archiveExtension: '.tar.gz'
-  #             archiveType: tar
-  #             tarCompression: gz
+  # build PerfBDN app
+  - template: /eng/pipelines/common/platform-matrix.yml
+    parameters:
+      jobTemplate: /eng/pipelines/common/global-build-job.yml
+      buildConfig: release
+      runtimeFlavor: mono
+      platforms:
+      - ios_arm64
+      jobParameters:
+        dependsOn:
+         - Build_android_arm64_release_Mono_Packs
+        buildArgs: -s mono -c $(_BuildConfig)
+        nameSuffix: PerfBDNApp
+        isOfficialBuild: false
+        pool:
+          vmImage: 'macos-12'
+        postBuildSteps:
+          - template: /eng/pipelines/coreclr/templates/build-perf-bdn-app.yml
+            parameters:
+              rootFolder: '$(Build.SourcesDirectory)/artifacts/'
+              includeRootFolder: true
+              displayName: Android BDN App Artifacts
+              artifactName: PerfBDNAppArm
+              archiveExtension: '.tar.gz'
+              archiveType: tar
+              tarCompression: gz
diff --git a/eng/pipelines/coreclr/perf-wasm-jobs.yml b/eng/pipelines/coreclr/perf-wasm-jobs.yml
index 9b6fa299752b..3f33e90cbc8f 100644
--- a/eng/pipelines/coreclr/perf-wasm-jobs.yml
+++ b/eng/pipelines/coreclr/perf-wasm-jobs.yml
@@ -67,7 +67,6 @@ jobs:
       jobparameters:
         testgroup: perf
         livelibrariesbuildconfig: Release
-        skipLiveLibrariesDownload: true
         runtimetype: wasm
         codegentype: 'aot'
         projectfile: microbenchmarks.proj
@@ -109,7 +108,6 @@ jobs:
       jobParameters:
         testGroup: perf
         liveLibrariesBuildConfig: Release
-        skipLiveLibrariesDownload: true
         runtimeType: wasm
         codeGenType: 'wasm'
         projectFile: microbenchmarks.proj
@@ -138,7 +136,6 @@ jobs:
         jobparameters:
           testgroup: perf
           livelibrariesbuildconfig: Release
-          skipLiveLibrariesDownload: true
           runtimetype: wasm
           codegentype: 'aot'
           projectfile: microbenchmarks.proj
@@ -165,7 +162,6 @@ jobs:
       jobParameters:
         testGroup: perf
         liveLibrariesBuildConfig: Release
-        skipLiveLibrariesDownload: true
         runtimeType: wasm
         projectFile: blazor_perf.proj
         runKind: blazor_scenarios
@@ -188,7 +184,6 @@ jobs:
       jobParameters:
         testGroup: perf
         liveLibrariesBuildConfig: Release
-        skipLiveLibrariesDownload: true
         runtimeType: wasm
         projectFile: blazor_perf.proj
         runKind: blazor_scenarios
diff --git a/eng/pipelines/coreclr/perf_slow.yml b/eng/pipelines/coreclr/perf_slow.yml
index 49afdad10f57..4a6fa32a81f1 100644
--- a/eng/pipelines/coreclr/perf_slow.yml
+++ b/eng/pipelines/coreclr/perf_slow.yml
@@ -1,3 +1,11 @@
+parameters:
+- name: runPrivateJobs
+  type: boolean
+  default: false
+- name: runScheduledJobs
+  type: boolean
+  default: false
+
 trigger:
   batch: true
   branches:
@@ -34,26 +42,29 @@ extends:
     - stage: Build
       jobs:
 
-      - ${{ if and(ne(variables['System.TeamProject'], 'public'), in(variables['Build.Reason'], 'Schedule')) }}:
-
+      - ${{ if and(ne(variables['System.TeamProject'], 'public'), or(in(variables['Build.Reason'], 'Schedule'), parameters.runScheduledJobs)) }}:
         # build mono
         - template: /eng/pipelines/common/platform-matrix.yml
           parameters:
-            jobTemplate: /eng/pipelines/mono/templates/build-job.yml
-            runtimeFlavor: mono
-            buildConfig: release
-            platforms:
-            - linux_arm64
-
-        # build coreclr and libraries
-        - template: /eng/pipelines/common/platform-matrix.yml
-          parameters:
-            jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+            jobTemplate: /eng/pipelines/common/global-build-job.yml
             buildConfig: release
+            runtimeFlavor: mono
             platforms:
             - linux_arm64
             jobParameters:
-              testGroup: perf
+              nameSuffix: mono
+              buildArgs: -s mono+clr.iltools+clr.hosts+libs+host+packs -c $(_BuildConfig)
+              isOfficialBuild: false
+              postBuildSteps:
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)_mono
+                  displayName: Build Assets
 
         # run arm64 interpreter jobs for mono
         - template: /eng/pipelines/common/platform-matrix.yml
@@ -74,12 +85,12 @@ extends:
               logicalmachine: 'perfampere'
               timeoutInMinutes: 720
 
-      - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'Schedule')) }}:
+      - ${{ if and(ne(variables['System.TeamProject'], 'public'), or(notin(variables['Build.Reason'], 'Schedule', 'Manual'), parameters.runPrivateJobs)) }}:
 
         # build coreclr and libraries
         - template: /eng/pipelines/common/platform-matrix.yml
           parameters:
-            jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+            jobTemplate: /eng/pipelines/common/global-build-job.yml
             buildConfig: release
             platforms:
             - linux_x64
@@ -87,7 +98,19 @@ extends:
             - linux_arm64
             - windows_arm64
             jobParameters:
-              testGroup: perf
+              nameSuffix: coreclr
+              buildArgs: -s clr+libs+host+packs -c $(_BuildConfig)
+              isOfficialBuild: false
+              postBuildSteps:
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)_coreclr
+                  displayName: Build Assets
 
         - template: /eng/pipelines/common/platform-matrix.yml
           parameters:
diff --git a/eng/pipelines/coreclr/pgo.yml b/eng/pipelines/coreclr/pgo.yml
index f6d00e6fea50..26b2846d1a19 100644
--- a/eng/pipelines/coreclr/pgo.yml
+++ b/eng/pipelines/coreclr/pgo.yml
@@ -12,51 +12,14 @@ variables:
   - template: /eng/pipelines/common/variables.yml
 
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - osx_arm64
-          - windows_arm64
-          - windows_x64
-          - windows_x86
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: pgo
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: pgo
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - osx_arm64
-          - windows_arm64
-          - windows_x64
-          - windows_x86
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: pgo
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - osx_arm64
+    - linux_arm
+    - linux_arm64
+    - linux_x64
+    - windows_x64
+    - windows_x86
+    - windows_arm64
+    testGroup: pgo
diff --git a/eng/pipelines/coreclr/pgostress.yml b/eng/pipelines/coreclr/pgostress.yml
index 228b3b582e5d..fdd8e964082d 100644
--- a/eng/pipelines/coreclr/pgostress.yml
+++ b/eng/pipelines/coreclr/pgostress.yml
@@ -12,51 +12,14 @@ variables:
   - template: /eng/pipelines/common/variables.yml
 
 extends:
-  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  template:  /eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
   parameters:
-    stages:
-    - stage: Build
-      jobs:
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - osx_arm64
-          - windows_arm64
-          - windows_x64
-          - windows_x86
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: pgostress
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: pgostress
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_arm
-          - linux_arm64
-          - linux_x64
-          - osx_arm64
-          - windows_arm64
-          - windows_x64
-          - windows_x86
-          helixQueueGroup: ci
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: pgostress
-            liveLibrariesBuildConfig: Release
+    platforms:
+    - osx_arm64
+    - linux_arm
+    - linux_arm64
+    - linux_x64
+    - windows_x64
+    - windows_x86
+    - windows_arm64
+    testGroup: pgostress
diff --git a/eng/pipelines/coreclr/r2r-extra.yml b/eng/pipelines/coreclr/r2r-extra.yml
index acc1c6a96207..71324e3224f3 100644
--- a/eng/pipelines/coreclr/r2r-extra.yml
+++ b/eng/pipelines/coreclr/r2r-extra.yml
@@ -20,16 +20,30 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platformGroup: gcstress
           platforms:
           # It is too early to include osx_arm64 in platform group gcstress
           # Adding it here will enable it also
           - osx_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: r2r-extra
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: r2r-extra
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -56,3 +70,4 @@ extends:
             readyToRun: true
             displayNameArgs: R2R
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/r2r.yml b/eng/pipelines/coreclr/r2r.yml
index 4f94eb6a1570..977580703f71 100644
--- a/eng/pipelines/coreclr/r2r.yml
+++ b/eng/pipelines/coreclr/r2r.yml
@@ -20,7 +20,7 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - linux_arm
@@ -30,9 +30,23 @@ extends:
           - windows_arm64
           - windows_x64
           - windows_x86
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: outerloop
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: outerloop
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -62,3 +76,4 @@ extends:
             readyToRun: true
             displayNameArgs: R2R
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/release-tests.yml b/eng/pipelines/coreclr/release-tests.yml
index 9ccf72546391..d25c3069b7cf 100644
--- a/eng/pipelines/coreclr/release-tests.yml
+++ b/eng/pipelines/coreclr/release-tests.yml
@@ -23,7 +23,7 @@ extends:
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: release
           platformGroup: all
           platforms:
@@ -31,8 +31,22 @@ extends:
           # Adding it here will enable it also
           - osx_arm64
           jobParameters:
-            testGroup: outerloop
-            isOfficialBuild: false
+            buildArgs: -s clr+libs -c $(_BuildConfig)
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: outerloop
 
       #
       # Release test builds
@@ -63,6 +77,7 @@ extends:
           jobParameters:
             testGroup: outerloop
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
 
       #
       # Release R2R test runs
@@ -79,4 +94,5 @@ extends:
             liveLibrariesBuildConfig: Release
             readyToRun: true
             displayNameArgs: R2R
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
 
diff --git a/eng/pipelines/coreclr/runincontext.yml b/eng/pipelines/coreclr/runincontext.yml
index 9135532fd5f9..6ade58de8fe5 100644
--- a/eng/pipelines/coreclr/runincontext.yml
+++ b/eng/pipelines/coreclr/runincontext.yml
@@ -20,15 +20,29 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - linux_x64
           - windows_x64
           - windows_x86
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: outerloop
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: outerloop
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -54,3 +68,4 @@ extends:
             runInUnloadableContext: true
             displayNameArgs: RunInContext
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/runtime-nativeaot-outerloop.yml b/eng/pipelines/coreclr/runtime-nativeaot-outerloop.yml
index 164485e7d007..e8bfd86cd81d 100644
--- a/eng/pipelines/coreclr/runtime-nativeaot-outerloop.yml
+++ b/eng/pipelines/coreclr/runtime-nativeaot-outerloop.yml
@@ -57,17 +57,21 @@ extends:
           platforms:
           - windows_x64
           - windows_arm64
+          - windows_x86
           - osx_x64
           - osx_arm64
           - linux_x64
+          - linux_arm
           - linux_arm64
           - linux_musl_x64
+          - linux_musl_arm64
           jobParameters:
             testGroup: innerloop
             isSingleFile: true
             nameSuffix: NativeAOT_Libs
-            buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) /p:TestNativeAot=true /p:ArchiveTests=true /p:IlcUseServerGc=false
+            buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) /p:TestNativeAot=true /p:ArchiveTests=true /p:IlcUseServerGc=false /p:RunAnalyzers=false
             timeoutInMinutes: 300 # doesn't normally take this long, but I've seen Helix queues backed up for 160 minutes
+            includeAllPlatforms: true
             # extra steps, run tests
             postBuildSteps:
               - template: /eng/pipelines/libraries/helix.yml
@@ -91,7 +95,7 @@ extends:
             testGroup: innerloop
             isSingleFile: true
             nameSuffix: NativeAOT_Checked_Libs
-            buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:TestNativeAot=true /p:ArchiveTests=true /p:IlcUseServerGc=false
+            buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:TestNativeAot=true /p:ArchiveTests=true /p:IlcUseServerGc=false /p:RunAnalyzers=false
             timeoutInMinutes: 360
             # extra steps, run tests
             postBuildSteps:
@@ -116,7 +120,7 @@ extends:
             testGroup: innerloop
             isSingleFile: true
             nameSuffix: NativeAOT_Checked_Libs_SizeOpt
-            buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:TestNativeAot=true /p:ArchiveTests=true /p:OptimizationPreference=Size /p:IlcUseServerGc=false
+            buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:TestNativeAot=true /p:ArchiveTests=true /p:OptimizationPreference=Size /p:IlcUseServerGc=false /p:RunAnalyzers=false
             timeoutInMinutes: 240
             # extra steps, run tests
             postBuildSteps:
@@ -141,7 +145,7 @@ extends:
             testGroup: innerloop
             isSingleFile: true
             nameSuffix: NativeAOT_Checked_Libs_SpeedOpt
-            buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:TestNativeAot=true /p:ArchiveTests=true /p:OptimizationPreference=Speed /p:IlcUseServerGc=false
+            buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:TestNativeAot=true /p:ArchiveTests=true /p:OptimizationPreference=Speed /p:IlcUseServerGc=false /p:RunAnalyzers=false
             timeoutInMinutes: 240
             # extra steps, run tests
             postBuildSteps:
@@ -161,16 +165,18 @@ extends:
           buildConfig: Checked
           platforms:
           - windows_x64
+          - windows_x86
           - linux_x64
+          - linux_arm
           variables:
           - name: timeoutPerTestInMinutes
             value: 60
           - name: timeoutPerTestCollectionInMinutes
             value: 180
           jobParameters:
-            timeoutInMinutes: 240
+            timeoutInMinutes: 300 # doesn't normally take this long, but we have had Helix queues backed up for over an hour
             nameSuffix: NativeAOT_Pri0
-            buildArgs: -s clr.aot+host.native+libs -rc $(_BuildConfig) -lc Release -hc Release
+            buildArgs: -s clr.aot+host.native+libs -rc $(_BuildConfig) -lc Release -hc Release /p:RunAnalyzers=false
             postBuildSteps:
               - template: /eng/pipelines/coreclr/nativeaot-post-build-steps.yml
                 parameters:
diff --git a/eng/pipelines/coreclr/superpmi-asmdiffs-checked-release.yml b/eng/pipelines/coreclr/superpmi-asmdiffs-checked-release.yml
index a356acb4fd9c..4d25196bdc8b 100644
--- a/eng/pipelines/coreclr/superpmi-asmdiffs-checked-release.yml
+++ b/eng/pipelines/coreclr/superpmi-asmdiffs-checked-release.yml
@@ -17,26 +17,46 @@ extends:
     stages:
     - stage: Build
       jobs:
-
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-jit-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - windows_x64
           - windows_x86
           jobParameters:
-            uploadAs: 'pipelineArtifacts'
+            buildArgs: -s clr.alljits+clr.spmi -c $(_BuildConfig)
+            postBuildSteps:
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  tarCompression: $(tarCompression)
+                  archiveExtension: $(archiveExtension)
+                  artifactName: CheckedJIT_$(osGroup)$(osSubgroup)_$(archType)
+                  displayName: JIT and SuperPMI Assets
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-jit-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: release
           platforms:
           - windows_x64
           - windows_x86
+          - linux_x64
           jobParameters:
-            uploadAs: 'pipelineArtifacts'
+            buildArgs: -s clr.alljits+clr.spmi -c $(_BuildConfig) /p:NoPgoOptimize=true
+            postBuildSteps:
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  tarCompression: $(tarCompression)
+                  archiveExtension: $(archiveExtension)
+                  artifactName: ReleaseJIT_$(osGroup)$(osSubgroup)_$(archType)
+                  displayName: JIT and SuperPMI Assets
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
diff --git a/eng/pipelines/coreclr/superpmi-collect.yml b/eng/pipelines/coreclr/superpmi-collect.yml
index 336125d43bc1..e784fe45be71 100644
--- a/eng/pipelines/coreclr/superpmi-collect.yml
+++ b/eng/pipelines/coreclr/superpmi-collect.yml
@@ -35,36 +35,116 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
-          - osx_arm64
-          - linux_arm
-          - linux_arm64
-          - linux_x64
           - windows_x64
+          - linux_x64
+          jobParameters:
+            testGroup: outerloop
+            buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/helix
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: outerloop
+            disableComponentGovernance: true # No shipping artifacts produced by this pipeline
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: checked
+          platforms:
           - windows_x86
           - windows_arm64
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
+          - osx_arm64
           jobParameters:
             testGroup: outerloop
-            # libraries test build platforms
-            testBuildPlatforms:
-            - linux_x64
-            - windows_x64
+            buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true
+            postBuildSteps:
+              # Build CLR assets for x64 as well as the target as we need an x64 mcs
+              - template: /eng/pipelines/common/templates/global-build-step.yml
+                parameters:
+                  buildArgs: -s clr.spmi -c $(_BuildConfig)
+                  archParameter: -arch x64
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/helix
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: outerloop
             disableComponentGovernance: true # No shipping artifacts produced by this pipeline
 
-      # superpmi-collect-job that targets macOS/arm64 depends on coreclr binaries produced by the macOS/x64 job
-      # We don't collect osx-x64 (it's essentially the same as linux-x64). If we did, we'd add osx_x64 in the
-      # build-coreclr-and-libraries-job.yml above, and remove this.
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
-          - osx_x64
+          - linux_arm
+          - linux_arm64
           jobParameters:
             testGroup: outerloop
+            buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true
+            postBuildSteps:
+              # Build CLR assets for x64 as well as the target as we need an x64 mcs
+              - template: /eng/pipelines/common/templates/global-build-step.yml
+                parameters:
+                  buildArgs: -s clr.spmi -c $(_BuildConfig)
+                  archParameter: -arch x64
+                  container: linux_x64
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/helix
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: outerloop
             disableComponentGovernance: true # No shipping artifacts produced by this pipeline
 
       - template: /eng/pipelines/common/platform-matrix.yml
@@ -217,6 +297,7 @@ extends:
             testGroup: outerloop
             liveLibrariesBuildConfig: Release
             SuperPmiCollect: true
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -255,12 +336,16 @@ extends:
           helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           jobParameters:
             testScope: innerloop
-            liveRuntimeBuildConfig: checked
+            liveRuntimeBuildConfig: Checked
             dependsOnTestBuildConfiguration: Release
             dependsOnTestArchitecture: x64
-            coreclrTestGroup: superpmi_collection
+            scenarios:
+            - normal
             SuperPmiCollect: true
             SuperPmiCollectionName: libraries_tests
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked
+            helixArtifactsName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked
+            unifiedBuildConfigOverride: checked
 
       #
       # Collection of libraries test run: no_tiered_compilation
@@ -282,9 +367,13 @@ extends:
           helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           jobParameters:
             testScope: innerloop
-            liveRuntimeBuildConfig: checked
+            liveRuntimeBuildConfig: Checked
             dependsOnTestBuildConfiguration: Release
             dependsOnTestArchitecture: x64
-            coreclrTestGroup: superpmi_collection_no_tiered_compilation
+            scenarios:
+            - no_tiered_compilation
             SuperPmiCollect: true
             SuperPmiCollectionName: libraries_tests_no_tiered_compilation
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked
+            helixArtifactsName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked
+            unifiedBuildConfigOverride: checked
diff --git a/eng/pipelines/coreclr/superpmi-diffs.yml b/eng/pipelines/coreclr/superpmi-diffs.yml
index 423f1e8b1dd1..c9cae0c63ac3 100644
--- a/eng/pipelines/coreclr/superpmi-diffs.yml
+++ b/eng/pipelines/coreclr/superpmi-diffs.yml
@@ -11,7 +11,7 @@ parameters:
 # This pipeline only runs on GitHub PRs, not on merges.
 trigger: none
 
-# Only run on changes to the JIT directory. 
+# Only run on changes to the JIT directory.
 pr:
   branches:
     include:
@@ -29,13 +29,13 @@ extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
     stages:
-    - stage: Build
-      jobs:
-
-      # Don't run if the JIT-EE GUID has changed,
-      # since there won't be any SuperPMI collections with the new GUID until the collection
-      # pipeline completes after this PR is merged.
-      - ${{ if eq(variables.dependOnEvaluatePaths, true) }}:
+    # Don't run if the JIT-EE GUID has changed,
+    # since there won't be any SuperPMI collections with the new GUID until the collection
+    # pipeline completes after this PR is merged.
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
         - template: /eng/pipelines/common/evaluate-paths-job.yml
           parameters:
             paths:
@@ -43,28 +43,51 @@ extends:
               include:
               - src/coreclr/inc/jiteeversionguid.h
 
+    - stage: Build
+      jobs:
+
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-jit-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - windows_x64
           - windows_x86
           jobParameters:
-            uploadAs: 'pipelineArtifacts'
-            condition: not(eq(dependencies.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true))
+            buildArgs: -s clr.alljits+clr.spmi -c $(_BuildConfig)
+            postBuildSteps:
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  tarCompression: $(tarCompression)
+                  archiveExtension: $(archiveExtension)
+                  artifactName: CheckedJIT_$(osGroup)$(osSubgroup)_$(archType)
+                  displayName: JIT and SuperPMI Assets
+            condition: not(eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true))
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-jit-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: release
           platforms:
           - windows_x64
           - windows_x86
           - linux_x64
           jobParameters:
-            uploadAs: 'pipelineArtifacts'
-            condition: not(eq(dependencies.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true))
+            buildArgs: -s clr.alljits+clr.spmi -c $(_BuildConfig) /p:NoPgoOptimize=true
+            postBuildSteps:
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  tarCompression: $(tarCompression)
+                  archiveExtension: $(archiveExtension)
+                  artifactName: ReleaseJIT_$(osGroup)$(osSubgroup)_$(archType)
+                  displayName: JIT and SuperPMI Assets
+            condition: not(eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true))
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -76,7 +99,7 @@ extends:
           helixQueueGroup: ci
           helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           jobParameters:
-            condition: not(eq(dependencies.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true))
+            condition: not(eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true))
             diffType: asmdiffs
             baseJitOptions: ${{ parameters.spmi_jitoptions_base }}
             diffJitOptions: ${{ parameters.spmi_jitoptions_diff }}
@@ -92,7 +115,7 @@ extends:
           helixQueueGroup: ci
           helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           jobParameters:
-            condition: not(eq(dependencies.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true))
+            condition: not(eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true))
             diffType: tpdiff
             baseJitOptions: ${{ parameters.spmi_jitoptions_base }}
             diffJitOptions: ${{ parameters.spmi_jitoptions_diff }}
diff --git a/eng/pipelines/coreclr/superpmi-replay.yml b/eng/pipelines/coreclr/superpmi-replay.yml
index 88e575b37d72..d562dbee4555 100644
--- a/eng/pipelines/coreclr/superpmi-replay.yml
+++ b/eng/pipelines/coreclr/superpmi-replay.yml
@@ -22,10 +22,13 @@ extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
     stages:
-    - stage: Build
-      jobs:
-
-      - ${{ if eq(variables.dependOnEvaluatePaths, true) }}:
+    # Don't run if the JIT-EE GUID has changed,
+    # since there won't be any SuperPMI collections with the new GUID until the collection
+    # pipeline completes after this PR is merged.
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
         - template: /eng/pipelines/common/evaluate-paths-job.yml
           parameters:
             paths:
@@ -33,16 +36,29 @@ extends:
               include:
               - src/coreclr/inc/jiteeversionguid.h
 
+    - stage: Build
+      jobs:
+
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-jit-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - windows_x64
           - windows_x86
           jobParameters:
-            uploadAs: 'pipelineArtifacts'
-            condition: not(eq(dependencies.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true))
+            buildArgs: -s clr.alljits+clr.spmi -c $(_BuildConfig)
+            postBuildSteps:
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  tarCompression: $(tarCompression)
+                  archiveExtension: $(archiveExtension)
+                  artifactName: CheckedJIT_$(osGroup)$(osSubgroup)_$(archType)
+                  displayName: JIT and SuperPMI Assets
+            condition: not(eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true))
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -54,4 +70,4 @@ extends:
           helixQueueGroup: ci
           helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           jobParameters:
-            condition: not(eq(dependencies.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true))
+            condition: not(eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_jiteeversionguid.containsChange'], true))
diff --git a/eng/pipelines/coreclr/templates/build-jit-job.yml b/eng/pipelines/coreclr/templates/build-jit-job.yml
deleted file mode 100644
index 1c131f3fc8f1..000000000000
--- a/eng/pipelines/coreclr/templates/build-jit-job.yml
+++ /dev/null
@@ -1,144 +0,0 @@
-parameters:
-  archType: ''
-  buildConfig: ''
-  container: ''
-  crossBuild: false
-  osGroup: ''
-  osSubgroup: ''
-  condition: true
-  pool: ''
-  timeoutInMinutes: ''
-  variables: {}
-  dependOnEvaluatePaths: false
-  disableComponentGovernance: false
-  uploadAs: 'azureBlob'
-
-### Product build
-jobs:
-- template: xplat-pipeline-job.yml
-  parameters:
-    buildConfig: ${{ parameters.buildConfig }}
-    archType: ${{ parameters.archType }}
-    osGroup: ${{ parameters.osGroup }}
-    osSubgroup: ${{ parameters.osSubgroup }}
-    condition: ${{ parameters.condition }}
-    helixType: 'build/product/'
-    enableMicrobuild: true
-    pool: ${{ parameters.pool }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
-    disableComponentGovernance: ${{ parameters.disableComponentGovernance }}
-
-    # Compute job name from template parameters
-    name: ${{ format('coreclr_jit_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-    displayName: ${{ format('CoreCLR JIT Build {0}{1} {2} {3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-
-    # Run all steps in the container.
-    # Note that the containers are defined in platform-matrix.yml
-    container: ${{ parameters.container }}
-
-    timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
-
-    crossBuild: ${{ parameters.crossBuild }}
-
-    gatherAssetManifests: true
-
-    variables:
-    - name: osGroup
-      value: ${{ parameters.osGroup }}
-    - name: osSubgroup
-      value: ${{ parameters.osSubgroup }}
-
-    - name: publishLogsArtifactPrefix
-      value: 'BuildLogs_CoreCLR_JIT'
-    - name: uploadAs
-      value: ${{ parameters.uploadAs }}
-
-    - name: compilerArg
-      value: ''
-
-    - ${{ if eq(parameters.osGroup, 'windows') }}:
-      - name: PythonSetupScript
-        value: 'py -3 -m venv $(Build.SourcesDirectory)\venv'
-      - name: PythonScript
-        value: '$(Build.SourcesDirectory)\venv\Scripts\python.exe'
-      - name: PipScript
-        value: '$(Build.SourcesDirectory)\venv\Scripts\python.exe -m pip'
-    - ${{ if ne(parameters.osGroup, 'windows') }}:
-      - name: PythonSetupScript
-        value: 'python3 -m venv $(Build.SourcesDirectory)/venv'
-      - name: PythonScript
-        value: '$(Build.SourcesDirectory)/venv/bin/python3'
-      - name: PipScript
-        value: '$(Build.SourcesDirectory)/venv/bin/pip3'
-
-    - ${{ parameters.variables }}
-
-    steps:
-
-    # Install native dependencies
-    # Linux builds use docker images with dependencies preinstalled,
-    # and FreeBSD builds use a build agent with dependencies
-    # preinstalled, so we only need this step for OSX and Windows.
-    - ${{ if in(parameters.osGroup, 'osx', 'maccatalyst', 'ios', 'iossimulator', 'tvos', 'tvossimulator') }}:
-      - script: $(Build.SourcesDirectory)/eng/install-native-dependencies.sh $(osGroup)
-        displayName: Install native dependencies (OSX)
-
-    # Install internal tools on official builds
-    # Since our internal tools are behind an authenticated feed,
-    # we need to use the DotNetCli AzDO task to restore from the feed using a service connection.
-    # We can't do this from within the build, so we need to do this as a separate step.
-    - ${{ if and(eq(variables['System.TeamProject'], 'internal'), ne(variables['Build.Reason'], 'PullRequest')) }}:
-      - template: /eng/pipelines/common/restore-internal-tools.yml
-
-    # Build/Generate native prerequisites
-    - script: $(Build.SourcesDirectory)$(dir)build$(scriptExt) -subset clr.nativeprereqs $(crossArg) -arch $(archType) -c $(buildConfig) -ci /bl:$(Build.SourcesDirectory)artifacts/log/$(buildConfig)/CoreCLRNativePrereqs.binlog
-      displayName: Build and generate native prerequisites
-
-    # Build CoreCLR JIT
-    - ${{ if ne(parameters.osGroup, 'windows') }}:
-      - script: $(Build.SourcesDirectory)/src/coreclr/build-runtime$(scriptExt) $(buildConfig) $(archType) $(crossArg) -ci $(compilerArg) -component alljits -component spmi
-        displayName: Build CoreCLR JIT
-    - ${{ if eq(parameters.osGroup, 'windows') }}:
-      - script: $(Build.SourcesDirectory)/src/coreclr/build-runtime$(scriptExt) $(buildConfig) $(archType) -ci -component alljits -component spmi
-        displayName: Build CoreCLR JIT
-
-    - ${{ if eq(parameters.uploadAs, 'azureBlob') }}:
-      # Add authenticated pip feed
-      - task: PipAuthenticate@1
-        displayName: 'Pip Authenticate'
-        inputs:
-          artifactFeeds: public/dotnet-public-pypi
-          onlyAddExtraIndex: false
-
-      - script: $(PythonSetupScript)
-        displayName: Enable python venv
-
-      # Ensure the Python azure-storage-blob package is installed before doing the upload.
-      - script: $(PipScript) install --upgrade pip && $(PipScript) install azure.storage.blob==12.5.0 --force-reinstall
-        displayName: Upgrade Pip to latest and install azure-storage-blob Python package
-
-      - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/jitrollingbuild.py upload -build_type $(buildConfig) -arch $(archType) -host_os $(osGroup) -git_hash $(Build.SourceVersion) --use_latest_jit_change
-        displayName: Upload JIT to Azure Storage
-        env:
-          CLRJIT_AZ_KEY: $(clrjit_key1) # secret key stored as variable in pipeline
-
-    - ${{ if eq(parameters.uploadAs, 'pipelineArtifacts') }}:
-      # Publish product output directory for consumption by tests.
-      - template: /eng/pipelines/common/upload-artifact-step.yml
-        parameters:
-          rootFolder: $(buildProductRootFolderPath)
-          includeRootFolder: false
-          archiveType: $(archiveType)
-          tarCompression: $(tarCompression)
-          archiveExtension: $(archiveExtension)
-          artifactName: $(buildProductArtifactName)
-          displayName: 'product build'
-
-    # Publish Logs
-    - task: PublishPipelineArtifact@1
-      displayName: Publish Logs
-      inputs:
-        targetPath: $(Build.SourcesDirectory)/artifacts/log
-        artifactName: '$(publishLogsArtifactPrefix)_Attempt$(System.JobAttempt)_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-      continueOnError: true
-      condition: always()
diff --git a/eng/pipelines/coreclr/templates/build-job.yml b/eng/pipelines/coreclr/templates/build-job.yml
deleted file mode 100644
index 9ed21ff433cd..000000000000
--- a/eng/pipelines/coreclr/templates/build-job.yml
+++ /dev/null
@@ -1,262 +0,0 @@
-parameters:
-  archType: ''
-  buildConfig: ''
-  condition: true
-  container: ''
-  crossBuild: false
-  dependOnEvaluatePaths: false
-  disableComponentGovernance: false
-  disableClrTest: false
-  isOfficialBuild: false
-  osGroup: ''
-  osSubgroup: ''
-  platform: ''
-  pool: ''
-  runtimeVariant: ''
-  signBinaries: false
-  testGroup: ''
-  timeoutInMinutes: ''
-  variables: {}
-
-### Product build
-jobs:
-- template: xplat-pipeline-job.yml
-  parameters:
-    buildConfig: ${{ parameters.buildConfig }}
-    archType: ${{ parameters.archType }}
-    osGroup: ${{ parameters.osGroup }}
-    osSubgroup: ${{ parameters.osSubgroup }}
-    runtimeVariant: ${{ parameters.runtimeVariant }}
-    testGroup: ${{ parameters.testGroup }}
-    helixType: 'build/product/'
-    enableMicrobuild: true
-    pool: ${{ parameters.pool }}
-    condition: ${{ parameters.condition }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
-    disableComponentGovernance: ${{ parameters.disableComponentGovernance }}
-    disableClrTest: ${{ parameters.disableClrTest }}
-
-    # Compute job name from template parameters
-    name: ${{ format('coreclr_{0}_product_build_{1}{2}_{3}_{4}',
-      parameters.runtimeVariant,
-      parameters.osGroup,
-      parameters.osSubgroup,
-      parameters.archType,
-      parameters.buildConfig) }}
-    displayName: ${{ format('CoreCLR {0} Product Build {1}{2} {3} {4}',
-      parameters.runtimeVariant, parameters.osGroup, parameters.osSubgroup,
-      parameters.archType,
-      parameters.buildConfig) }}
-
-    # Run all steps in the container.
-    # Note that the containers are defined in platform-matrix.yml
-    container: ${{ parameters.container }}
-
-    timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
-
-    crossBuild: ${{ parameters.crossBuild }}
-
-    gatherAssetManifests: true
-    variables:
-    - name: osGroup
-      value: ${{ parameters.osGroup }}
-    - name: osSubgroup
-      value: ${{ parameters.osSubgroup }}
-    - name: compilerArg
-      value: ''
-    - name: publishLogsArtifactPrefix
-      value: 'BuildLogs_CoreCLR'
-    - name: officialBuildIdArg
-      value: ''
-    - ${{ if eq(parameters.isOfficialBuild, true) }}:
-      - name: officialBuildIdArg
-        value: '/p:OfficialBuildId=$(Build.BuildNumber)'
-    - name: enforcePgoArg
-      value: ''
-    # The EnforcePGO script is only supported on Windows and is not supported on arm64.
-    - ${{ if and(eq(parameters.buildConfig, 'Release'), and(eq(parameters.osGroup, 'windows'), ne(parameters.archType, 'arm64'))) }}:
-      - name: enforcePgoArg
-        value: '-enforcepgo'
-
-    - name: clrInterpreterBuildArg
-      value: ''
-    - ${{ if eq(parameters.testGroup, 'clrinterpreter') }}:
-      - name: clrInterpreterBuildArg
-        value: '-cmakeargs "-DFEATURE_INTERPRETER=1"'
-
-    - name: clrRuntimeComponentsBuildArg
-      value: ''
-    - ${{ if ne(parameters.testGroup, 'innerloop') }}:
-      - name: clrRuntimeComponentsBuildArg
-        value: '-component runtime -component alljits -component nativeaot -component spmi '
-
-    - name: SignType
-      value: $[ coalesce(variables.OfficialSignType, 'real') ]
-
-    # Set a default empty argument for the pgo path.
-    # This will be set during the 'native prerequisites' step if PGO optimization is enabled.
-    - name: CoreClrPgoDataArg
-      value: ''
-
-    - name: nativeSymbols
-      value: ''
-    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
-      - name: nativeSymbols
-        value: '--keepnativesymbols'
-
-    - ${{ parameters.variables }}
-
-    steps:
-    # Install native dependencies
-    # Linux builds use docker images with dependencies preinstalled,
-    # and FreeBSD builds use a build agent with dependencies
-    # preinstalled, so we only need this step for OSX and Windows.
-    - ${{ if in(parameters.osGroup, 'osx', 'maccatalyst', 'ios', 'iossimulator', 'tvos', 'tvossimulator') }}:
-      - script: $(Build.SourcesDirectory)/eng/install-native-dependencies.sh $(osGroup)
-        displayName: Install native dependencies
-
-    # Install internal tools on official builds
-    # Since our internal tools are behind an authenticated feed,
-    # we need to use the DotNetCli AzDO task to restore from the feed using a service connection.
-    # We can't do this from within the build, so we need to do this as a separate step.
-    - ${{ if and(eq(variables['System.TeamProject'], 'internal'), ne(variables['Build.Reason'], 'PullRequest')) }}:
-      - template: /eng/pipelines/common/restore-internal-tools.yml
-
-    # Install MicroBuild for signing the DAC and DBI
-    - ${{ if and(eq(variables['System.TeamProject'], 'internal'), eq(parameters.signBinaries, true), eq(parameters.osGroup, 'windows')) }}:
-      - task: MicroBuildSigningPlugin@2
-        displayName: Install MicroBuild plugin for Signing
-        inputs:
-          signType: $(SignType)
-          zipSources: false
-          feedSource: https://dnceng.pkgs.visualstudio.com/_packaging/MicroBuildToolset/nuget/v3/index.json
-        continueOnError: false
-        condition: and(succeeded(), in(variables['SignType'], 'real', 'test'))
-
-    - ${{ if ne(variables['System.TeamProject'], 'public') }}:
-      - ${{ if ne(parameters.osGroup, 'windows') }}:
-        - task: Bash@3
-          displayName: Setup Private Feeds Credentials
-          inputs:
-            filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.sh
-            arguments: $(Build.SourcesDirectory)/NuGet.config $Token
-          env:
-            Token: $(dn-bot-dnceng-artifact-feeds-rw)
-      - ${{ if eq(parameters.osGroup, 'windows') }}:
-        - task: PowerShell@2
-          displayName: Setup Private Feeds Credentials
-          inputs:
-            filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.ps1
-            arguments: -ConfigFile $(Build.SourcesDirectory)/NuGet.config -Password $Env:Token
-          env:
-            Token: $(dn-bot-dnceng-artifact-feeds-rw)
-
-    - ${{ if in(parameters.osGroup, 'osx', 'ios', 'tvos') }}:
-      - script: |
-          du -sh $(Build.SourcesDirectory)/*
-          df -h
-        displayName: Disk Usage before Build
-
-    # Build/Generate native prerequisites
-    - script: $(Build.SourcesDirectory)$(dir)build$(scriptExt) -subset clr.nativeprereqs $(crossArg) -arch $(archType) $(osArg) -c $(buildConfig) $(officialBuildIdArg) -ci /bl:$(Build.SourcesDirectory)artifacts/log/$(buildConfig)/CoreCLRNativePrereqs.binlog
-      displayName: Build and generate native prerequisites
-
-    # Build CoreCLR Runtime
-    - ${{ if ne(parameters.osGroup, 'windows') }}:
-      - script: $(Build.SourcesDirectory)/src/coreclr/build-runtime$(scriptExt) $(buildConfig) $(archType) $(crossArg) $(osArg) -ci $(compilerArg) $(clrRuntimeComponentsBuildArg) $(officialBuildIdArg) $(clrInterpreterBuildArg) $(CoreClrPgoDataArg) $(nativeSymbols)
-        displayName: Build CoreCLR Runtime
-    - ${{ if eq(parameters.osGroup, 'windows') }}:
-      - script: $(Build.SourcesDirectory)/src/coreclr/build-runtime$(scriptExt) $(buildConfig) $(archType) -ci $(enforcePgoArg) $(officialBuildIdArg) $(clrInterpreterBuildArg) $(CoreClrPgoDataArg)
-        displayName: Build CoreCLR Runtime
-
-    - ${{ if or(eq(parameters.crossBuild, 'true'), ne(parameters.archType, 'x64')) }}:
-      - script: $(Build.SourcesDirectory)/src/coreclr/build-runtime$(scriptExt) $(buildConfig) $(archType) -hostarch x64 $(osArg) -ci $(compilerArg) -component crosscomponents -cmakeargs "-DCLR_CROSS_COMPONENTS_BUILD=1" $(officialBuildIdArg)
-        displayName: Build CoreCLR Cross-Arch Tools (Tools that run on x64 targeting x86)
-
-    - ${{ if in(parameters.osGroup, 'osx', 'ios', 'tvos') }}:
-      - script: |
-          du -sh $(Build.SourcesDirectory)/*
-          df -h
-        displayName: Disk Usage after Build
-
-    # Build CoreCLR Managed Components
-    - script: $(Build.SourcesDirectory)$(dir)build$(scriptExt) -subset clr.corelib+clr.nativecorelib+clr.nativeaotlibs+clr.tools+clr.packages $(crossArg) $(compilerArg) -arch $(archType) $(osArg) -c $(buildConfig) $(officialBuildIdArg) -ci
-      displayName: Build managed product components and packages
-
-    # Build native test components
-    - ${{ if and(ne(parameters.isOfficialBuild, true), ne(parameters.disableClrTest, true)) }}:
-      - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) skipmanaged skipgeneratelayout $(buildConfig) $(archType) $(crossArg) $(osArg) $(priorityArg) $(compilerArg)
-        displayName: Build native test components
-
-    # Sign and add entitlements to these MacOS binaries
-    - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
-      - ${{ if eq(parameters.osGroup, 'osx') }}:
-
-        - template: /eng/pipelines/common/macos-sign-with-entitlements.yml
-          parameters:
-            filesToSign:
-            - name: createdump
-              path: $(buildProductRootFolderPath)
-            - name: corerun
-              path: $(buildProductRootFolderPath)
-
-        - task: CopyFiles@2
-          displayName: 'Copy signed createdump to sharedFramework'
-          inputs:
-            contents: createdump
-            sourceFolder: $(buildProductRootFolderPath)
-            targetFolder: $(buildProductRootFolderPath)/sharedFramework
-            overWrite: true
-
-    - ${{ if and(eq(parameters.osGroup, 'windows'), eq(parameters.signBinaries, true)) }}:
-      - template: /eng/pipelines/coreclr/templates/sign-diagnostic-files.yml
-        parameters:
-          basePath: $(buildProductRootFolderPath)
-          isOfficialBuild: ${{ parameters.signBinaries }}
-          timeoutInMinutes: 30
-
-    - ${{ if ne(parameters.disableClrTest, true) }}:
-      # Publish product output directory for consumption by tests.
-      - template: /eng/pipelines/common/upload-artifact-step.yml
-        parameters:
-          rootFolder: $(buildProductRootFolderPath)
-          includeRootFolder: false
-          archiveType: $(archiveType)
-          tarCompression: $(tarCompression)
-          archiveExtension: $(archiveExtension)
-          artifactName: $(buildProductArtifactName)
-          displayName: 'product build'
-
-    - ${{ if and(ne(parameters.testGroup, ''), ne(parameters.disableClrTest, true)) }}:
-      # Publish test native components for consumption by test execution.
-      - ${{ if ne(parameters.isOfficialBuild, true) }}:
-        - template: /eng/pipelines/common/upload-artifact-step.yml
-          parameters:
-            rootFolder: $(nativeTestArtifactRootFolderPath)
-            includeRootFolder: false
-            archiveType: $(archiveType)
-            tarCompression: $(tarCompression)
-            archiveExtension: $(archiveExtension)
-            artifactName: $(nativeTestArtifactName)
-            displayName: 'native test components'
-
-    # Save packages using the prepare-signed-artifacts format.
-    - ${{ if eq(parameters.isOfficialBuild, true) }}:
-      - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml
-        parameters:
-          name: ${{ parameters.platform }}
-
-    - ${{ if and(eq(parameters.isOfficialBuild, true), eq(parameters.osGroup, 'windows')) }}:
-      - powershell: ./eng/collect_vsinfo.ps1 -ArchiveRunName postbuild_log
-        displayName: Collect vslogs on exit
-        condition: always()
-
-
-    # Publish Logs
-    - task: PublishPipelineArtifact@1
-      displayName: Publish Logs
-      inputs:
-        targetPath: $(Build.SourcesDirectory)/artifacts/log
-        artifactName: '$(publishLogsArtifactPrefix)_Attempt$(System.JobAttempt)_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-      continueOnError: true
-      condition: always()
diff --git a/eng/pipelines/coreclr/templates/build-native-test-assets-step.yml b/eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
new file mode 100644
index 000000000000..e8d1fcfe69cc
--- /dev/null
+++ b/eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
@@ -0,0 +1,16 @@
+# Build the native assets for the tests in the src/tests
+parameters:
+  compiler: ''
+
+steps:
+  - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) skipmanaged skipgeneratelayout $(nativeTestArtifactConfig) $(archType) $(crossArg) $(priorityArg) ${{ parameters.compiler }}
+    displayName: Build native test components
+  - template: /eng/pipelines/common/upload-artifact-step.yml
+    parameters:
+      rootFolder: $(nativeTestArtifactRootFolderPath)
+      includeRootFolder: false
+      archiveType: $(archiveType)
+      tarCompression: $(tarCompression)
+      archiveExtension: $(archiveExtension)
+      artifactName: $(nativeTestArtifactName)
+      displayName: 'native test components'
diff --git a/eng/pipelines/coreclr/templates/build-perf-bdn-app.yml b/eng/pipelines/coreclr/templates/build-perf-bdn-app.yml
index ddcea7b91471..02c963728857 100644
--- a/eng/pipelines/coreclr/templates/build-perf-bdn-app.yml
+++ b/eng/pipelines/coreclr/templates/build-perf-bdn-app.yml
@@ -16,7 +16,7 @@ parameters:
   archiveExtension: ''
   archiveType: ''
   tarCompression: ''
-  framework: 'net8.0' # Framework version to get versions for and build for
+  framework: 'net9.0' # Framework version to get versions for and build for
   perfRepo: 'main' # Perf repo to pull for the PerfLabExporter
 
 
@@ -61,13 +61,13 @@ steps:
       echo '{ }' > ./global.json
       curl -o NuGet.config 'https://raw.githubusercontent.com/dotnet/maui/${{parameters.framework}}/NuGet.config'
       curl -o dotnet-install.sh 'https://dotnet.microsoft.com/download/dotnet/scripts/v1/dotnet-install.sh'
-      curl -Lo maui-supported-sdk-version.json 'https://aka.ms/dotnet/sdk/maui/${{parameters.framework}}.json'
+      curl -Lo maui-supported-sdk-version.json 'https://maui.blob.core.windows.net/metadata/sdks/${{parameters.framework}}.json'
       version=$(sed -nr 's/\s*"version": "(.*)"/\1/p' ./maui-supported-sdk-version.json)
       chmod -R a+rx .
       ./dotnet-install.sh --version $version --install-dir .
       ./dotnet --info
-      ./dotnet workload install maui --from-rollback-file https://aka.ms/dotnet/maui/${{parameters.framework}}.json --configfile NuGet.config
-      ./dotnet workload install android --from-rollback-file https://aka.ms/dotnet/maui/${{parameters.framework}}.json --configfile NuGet.config
+      ./dotnet workload install maui --from-rollback-file https://maui.blob.core.windows.net/metadata/rollbacks/${{parameters.framework}}.json --configfile NuGet.config
+      ./dotnet workload install android --from-rollback-file https://maui.blob.core.windows.net/metadata/rollbacks/${{parameters.framework}}.json --configfile NuGet.config
     displayName: Install MAUI workload
     workingDirectory: $(Build.SourcesDirectory)
 
@@ -147,7 +147,7 @@ steps:
 
 # Remove the embed assemblies from source
   - script: |
-      ../dotnet build ./src/Core/tests/Benchmarks.Droid/Benchmarks.Droid.csproj --configuration Release -bl:BenchmarksDroid.binlog /p:TF_Build=False
+      ../dotnet build ./src/Core/tests/Benchmarks.Droid/Benchmarks.Droid.csproj --configuration Release -bl:BenchmarksDroid.binlog /p:TF_Build=False /p:ForceNet8Current=true
       mv ./src/Core/tests/Benchmarks.Droid/bin/Release/${{parameters.framework}}-android/android-arm64/com.microsoft.maui.benchmarks-Signed.apk ./MonoBenchmarksDroid.apk
     displayName:  Build BDN Android App
     workingDirectory: $(Build.SourcesDirectory)/maui
diff --git a/eng/pipelines/coreclr/templates/crossgen2-comparison-build-job.yml b/eng/pipelines/coreclr/templates/crossgen2-comparison-build-job.yml
index 3ca668998b13..b576e9ceb458 100644
--- a/eng/pipelines/coreclr/templates/crossgen2-comparison-build-job.yml
+++ b/eng/pipelines/coreclr/templates/crossgen2-comparison-build-job.yml
@@ -7,7 +7,6 @@ parameters:
   helixQueues: ''
   runtimeVariant: ''
   crossBuild: false
-  dependOnEvaluatePaths: false
   variables: {}
   pool: ''
 
@@ -24,7 +23,7 @@ parameters:
 ### crossgen matches that of native, e.g. arm-hosted-arm-targeting, crossgen.
 
 jobs:
-- template: xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
     buildConfig: ${{ parameters.buildConfig }}
     archType: ${{ parameters.archType }}
@@ -34,7 +33,6 @@ jobs:
     liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }}
     helixType: 'test/crossgen-comparison/'
     pool: ${{ parameters.pool }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
 
     # Compute job name from template parameters
     name: ${{ format('test_crossgen2_comparison_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
@@ -82,9 +80,7 @@ jobs:
 
     # Test job depends on the corresponding build job
     dependsOn:
-    - ${{ format('coreclr_{0}_product_build_{1}{2}_{3}_{4}', parameters.runtimeVariant, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-    - ${{ if ne(parameters.liveLibrariesBuildConfig, '') }}:
-      - ${{ format('libraries_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.liveLibrariesBuildConfig) }}
+    - build_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_${{ parameters.buildConfig }}_
 
     # Run all steps in the container.
     # Note that the containers are defined in platform-matrix.yml
@@ -96,21 +92,11 @@ jobs:
     # Download product build
     - template: /eng/pipelines/common/download-artifact-step.yml
       parameters:
-        unpackFolder: $(buildProductRootFolderPath)
-        artifactFileName: '$(buildProductArtifactName)$(archiveExtension)'
-        artifactName: '$(buildProductArtifactName)'
+        unpackFolder: $(Build.SourcesDirectory)/artifacts/bin
+        artifactFileName: 'Release_CoreCLR_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)$(archiveExtension)'
+        artifactName: 'Release_CoreCLR_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)'
         displayName: 'product build'
 
-    # Optionally download live-built libraries
-    - ${{ if ne(parameters.liveLibrariesBuildConfig, '') }}:
-      - template: /eng/pipelines/common/download-artifact-step.yml
-        parameters:
-          unpackFolder: $(librariesDownloadDir)
-          cleanUnpackFolder: false
-          artifactFileName: '$(librariesBuildArtifactName)$(archiveExtension)'
-          artifactName: '$(librariesBuildArtifactName)'
-          displayName: 'live-built libraries'
-
     # Populate Core_Root
     - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(buildConfig) $(archType) $(crossArg) generatelayoutonly
       displayName: Populate Core_Root
diff --git a/eng/pipelines/coreclr/templates/crossgen2-comparison-job.yml b/eng/pipelines/coreclr/templates/crossgen2-comparison-job.yml
index 4faa6c501cfe..1183d2598a48 100644
--- a/eng/pipelines/coreclr/templates/crossgen2-comparison-job.yml
+++ b/eng/pipelines/coreclr/templates/crossgen2-comparison-job.yml
@@ -7,17 +7,10 @@ parameters:
   helixQueues: ''
   runtimeVariant: ''
   crossBuild: false
-  dependOnEvaluatePaths: false
   variables: {}
   pool: ''
   targetarch: ''
   targetos: ''
-
-  # When set to a non-empty value (Debug / Release), it determines libraries
-  # build configuration to use for the tests. Setting this property implies
-  # a dependency of this job on the appropriate libraries build and is used
-  # to construct the name of the Azure artifact representing libraries build
-  # to use for building the tests.
   liveLibrariesBuildConfig: ''
 
 ### Crossgen-comparison job
@@ -26,7 +19,7 @@ parameters:
 ### crossgen matches that of native, e.g. arm-hosted-arm-targeting, crossgen.
 
 jobs:
-- template: xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
     buildConfig: ${{ parameters.buildConfig }}
     archType: ${{ parameters.archType }}
@@ -38,7 +31,6 @@ jobs:
     pool: ${{ parameters.pool }}
     targetos: ${{ parameters.targetos }}
     targetarch: ${{ parameters.targetarch }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
 
     # Compute job name from template parameters
     name: ${{ format('test_crossgen2_comparison_{0}{1}_{2}_{3}_{4}_{5}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig, parameters.targetarch, parameters.targetos) }}
@@ -84,9 +76,7 @@ jobs:
     # Test job depends on the corresponding build job
     dependsOn:
     - ${{ format('test_crossgen2_comparison_build_{0}_{1}_Release', parameters.targetos, parameters.targetarch)}}
-    - ${{ format('coreclr_{0}_product_build_{1}{2}_{3}_{4}', parameters.runtimeVariant, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-    - ${{ if ne(parameters.liveLibrariesBuildConfig, '') }}:
-      - ${{ format('libraries_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.liveLibrariesBuildConfig) }}
+    - build_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_${{ parameters.buildConfig }}_
 
     # Run all steps in the container.
     # Note that the containers are defined in platform-matrix.yml
@@ -98,21 +88,11 @@ jobs:
     # Download product build
     - template: /eng/pipelines/common/download-artifact-step.yml
       parameters:
-        unpackFolder: $(buildProductRootFolderPath)
-        artifactFileName: '$(buildProductArtifactName)$(archiveExtension)'
-        artifactName: '$(buildProductArtifactName)'
+        unpackFolder: $(Build.SourcesDirectory)/artifacts/bin
+        artifactFileName: 'Release_CoreCLR_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)$(archiveExtension)'
+        artifactName: 'Release_CoreCLR_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)'
         displayName: 'product build'
 
-    # Optionally download live-built libraries
-    - ${{ if ne(parameters.liveLibrariesBuildConfig, '') }}:
-      - template: /eng/pipelines/common/download-artifact-step.yml
-        parameters:
-          unpackFolder: $(librariesDownloadDir)
-          cleanUnpackFolder: false
-          artifactFileName: '$(librariesBuildArtifactName)$(archiveExtension)'
-          artifactName: '$(librariesBuildArtifactName)'
-          displayName: 'live-built libraries'
-
     # Populate Core_Root
     - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(buildConfig) $(archType) $(crossArg) generatelayoutonly
       displayName: Populate Core_Root
diff --git a/eng/pipelines/coreclr/templates/format-job.yml b/eng/pipelines/coreclr/templates/format-job.yml
index 88e5184db032..a4d5181fd6b5 100644
--- a/eng/pipelines/coreclr/templates/format-job.yml
+++ b/eng/pipelines/coreclr/templates/format-job.yml
@@ -5,7 +5,6 @@ parameters:
   osSubgroup: ''
   container: ''
   crossBuild: false
-  dependOnEvaluatePaths: false
   timeoutInMinutes: ''
   variables: {}
   pool: ''
@@ -13,7 +12,7 @@ parameters:
 
 ### Format job
 jobs:
-- template: xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
     buildConfig: ${{ parameters.buildConfig }}
     archType: ${{ parameters.archType }}
@@ -21,24 +20,18 @@ jobs:
     osSubgroup: ${{ parameters.osSubgroup }}
     container: ${{ parameters.container }}
     crossBuild: ${{ parameters.crossBuild }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
     timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
     name: ${{ format('format_{0}{1}_{2}', parameters.osGroup, parameters.osSubgroup, parameters.archType) }}
     displayName: ${{ format('Formatting {0}{1} {2}', parameters.osGroup, parameters.osSubgroup, parameters.archType) }}
     helixType: 'format'
     pool: ${{ parameters.pool }}
     variables:
-
+    - template: /eng/pipelines/coreclr/templates/jit-python-variables.yml
+      parameters:
+        osGroup: ${{ parameters.osGroup }}
     - ${{ each variable in parameters.variables }}:
       - ${{insert}}: ${{ variable }}
 
-    - ${{ if eq(parameters.osGroup, 'windows') }}:
-      - name: PythonScript
-        value: 'py -3'
-    - ${{ if ne(parameters.osGroup, 'windows') }}:
-      - name: PythonScript
-        value: 'python3'
-
     - ${{ if eq(parameters.osGroup, 'linux') }}:
       - name: LinuxCrossArg
         value: '--cross'
@@ -46,7 +39,7 @@ jobs:
       - name: LinuxCrossArg
         value: ''
 
-    condition: ${{ parameters.condition }}
+    condition: and(succeeded(), ${{ parameters.condition }})
 
     steps:
 
@@ -55,10 +48,12 @@ jobs:
       displayName: 'Install .NET SDK'
       inputs:
         packageType: 'sdk'
-        version: '6.x'
-        includePreviewVersions: true
+        version: '8.x'
         installationPath: $(Agent.ToolsDirectory)/dotnet
 
+    - script: $(PythonSetupScript)
+      displayName: Enable python venv
+
     - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/jitformat.py -r $(Build.SourcesDirectory) -o $(osGroup) -a $(archType) $(LinuxCrossArg)
       displayName: Run jitformat.py
 
diff --git a/eng/pipelines/coreclr/templates/helix-queues-setup.yml b/eng/pipelines/coreclr/templates/helix-queues-setup.yml
index 7b4ce6c6c7f4..c8558ac117ec 100644
--- a/eng/pipelines/coreclr/templates/helix-queues-setup.yml
+++ b/eng/pipelines/coreclr/templates/helix-queues-setup.yml
@@ -8,7 +8,6 @@ parameters:
   pool: ''
   platform: ''
   shouldContinueOnError: false
-  dependOnEvaluatePaths: false
   jobParameters: {}
 
 # parameters.jobParameters.helixQueueGroup values:
@@ -31,7 +30,6 @@ jobs:
     pool: ${{ parameters.pool }}
     platform: ${{ parameters.platform }}
     shouldContinueOnError: ${{ parameters.shouldContinueOnError }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
     helixQueues:
 
     # iOS Simulator/Mac Catalyst arm64
@@ -86,9 +84,9 @@ jobs:
     # Linux musl arm32
     - ${{ if eq(parameters.platform, 'linux_musl_arm') }}:
       - ${{ if eq(variables['System.TeamProject'], 'public') }}:
-        - (Alpine.316.Arm32.Open)Ubuntu.2004.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.16-helix-arm32v7
+        - (Alpine.316.Arm32.Open)Ubuntu.2004.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.17-helix-arm32v7
       - ${{ if eq(variables['System.TeamProject'], 'internal') }}:
-        - (Alpine.316.Arm32)Ubuntu.2004.ArmArch@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.16-helix-arm32v7
+        - (Alpine.316.Arm32)Ubuntu.2004.ArmArch@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.17-helix-arm32v7
 
     # Linux musl arm64
     - ${{ if eq(parameters.platform, 'linux_musl_arm64') }}:
diff --git a/eng/pipelines/coreclr/templates/jit-exploratory-job.yml b/eng/pipelines/coreclr/templates/jit-exploratory-job.yml
deleted file mode 100644
index 94771613c666..000000000000
--- a/eng/pipelines/coreclr/templates/jit-exploratory-job.yml
+++ /dev/null
@@ -1,75 +0,0 @@
-parameters:
-  buildConfig: ''
-  archType: ''
-  osGroup: ''
-  osSubgroup: ''
-  runtimeVariant: ''
-  testGroup: ''
-  framework: net6.0 # Specify the appropriate framework when running release branches (ie netcoreapp3.0 for release/3.0)
-  liveLibrariesBuildConfig: ''
-  variables: {}
-  helixQueues: ''
-  runtimeType: 'coreclr'
-  pool: ''
-  codeGenType: 'JIT'
-  runJobTemplate: '/eng/pipelines/coreclr/templates/jit-run-exploratory-job.yml'
-  additionalSetupParameters: ''
-  toolName: '' # Antigen or Fuzzlyn
-
-### Exploratory job
-
-### Each exploratory job depends on a corresponding build job with the same
-### buildConfig and archType.
-
-jobs:
-- template: ${{ parameters.runJobTemplate }}
-  parameters:
-    # Compute job name from template parameters
-    jobName: ${{ format('exploratory_{0}{1}_{2}_{3}_{4}_{5}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig, parameters.runtimeType, parameters.codeGenType) }}
-    displayName: ${{ format('Exploratory {0}{1} {2} {3} {4} {5}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig, parameters.runtimeType, parameters.codeGenType) }}
-    pool: ${{ parameters.pool }}
-    buildConfig: ${{ parameters.buildConfig }}
-    archType: ${{ parameters.archType }}
-    osGroup: ${{ parameters.osGroup }}
-    osSubgroup: ${{ parameters.osSubgroup }}
-    runtimeVariant: ${{ parameters.runtimeVariant }}
-    liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }}
-    runtimeType: ${{ parameters.runtimeType }}
-    codeGenType: ${{ parameters.codeGenType }}
-    testGroup: ${{ parameters.testGroup }}
-    helixQueues: ${{ parameters.helixQueues }}
-    additionalSetupParameters: ${{ parameters.additionalSetupParameters }}
-    toolName: ${{ parameters.toolName }}
-    # Test job depends on the corresponding build job
-    dependsOn:
-    - ${{ format('coreclr_{0}_product_build_{1}{2}_{3}_{4}', parameters.runtimeVariant, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-    - ${{ format('libraries_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.liveLibrariesBuildConfig) }}
-
-    variables: ${{ parameters.variables }}
-
-    frameworks:
-      - ${{ parameters.framework }}
-    steps:
-    # Extra steps that will be passed to the exploratory template and run before sending the job to helix (all of which is done in the template)
-
-    # Optionally download live-built libraries
-    - template: /eng/pipelines/common/download-artifact-step.yml
-      parameters:
-        unpackFolder: $(librariesDownloadDir)
-        cleanUnpackFolder: false
-        artifactFileName: '$(librariesBuildArtifactName)$(archiveExtension)'
-        artifactName: '$(librariesBuildArtifactName)'
-        displayName: 'live-built libraries'
-
-    # Download coreclr
-    - template: /eng/pipelines/common/download-artifact-step.yml
-      parameters:
-        unpackFolder: $(buildProductRootFolderPath)
-        artifactFileName: '$(buildProductArtifactName)$(archiveExtension)'
-        artifactName: '$(buildProductArtifactName)'
-        displayName: 'Coreclr product build'
-
-    # Create Core_Root
-    - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(buildConfig) $(archType) generatelayoutonly $(librariesOverrideArg)
-      displayName: Create Core_Root
-      condition: succeeded()
diff --git a/eng/pipelines/coreclr/templates/jit-exploratory-steps.yml b/eng/pipelines/coreclr/templates/jit-exploratory-steps.yml
new file mode 100644
index 000000000000..b26906ded6ad
--- /dev/null
+++ b/eng/pipelines/coreclr/templates/jit-exploratory-steps.yml
@@ -0,0 +1,90 @@
+parameters:
+  osGroup: ''                     # required -- operating system for the job
+  osSubgroup: ''                  # optional -- operating system subgroup
+  archType: ''                    # required -- targeting CPU architecture
+  buildConfig: ''                 # required -- build configuration
+  helixQueues: ''                 # required -- Helix queues
+  toolName: ''                    # required -- which tool to use: Antigen or Fuzzlyn?
+
+steps:
+- script: $(PythonSetupScript)
+  displayName: Enable python venv
+
+- script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/fuzzer_setup.py -tool_name $(toolName) -source_directory $(Build.SourcesDirectory) -core_root_directory $(Core_Root_Dir) -arch $(archType) -platform $(osGroup)
+  displayName: ${{ format('{0} setup ({1}-{2})', parameters.toolName, parameters.osGroup, parameters.archType) }}
+
+  # Run exploratory tool in helix
+- template: /eng/pipelines/common/templates/runtimes/send-to-helix-step.yml
+  parameters:
+    displayName: 'Send job to Helix'
+    helixBuild: $(Build.BuildNumber)
+    helixSource: $(_HelixSource)
+    helixType: 'build/tests/'
+    helixQueues: ${{ join(',', parameters.helixQueues) }}
+    creator: dotnet-bot
+    WorkItemTimeout: 2:30 # 2.5 hours
+    WorkItemDirectory: '$(WorkItemDirectory)'
+    CorrelationPayloadDirectory: '$(CorrelationPayloadDirectory)'
+    helixProjectArguments: '$(Build.SourcesDirectory)/src/coreclr/scripts/exploratory.proj'
+    BuildConfig: ${{ parameters.buildConfig }}
+    osGroup: ${{ parameters.osGroup }}
+    RunConfiguration: '$(RunConfiguration)'
+    ToolName: ${{ parameters.toolName }}
+    RunReason: '$(RunReason)'
+
+# Always upload the available issues-summary.txt files
+- task: CopyFiles@2
+  displayName: Copying issues-summary.txt of all partitions
+  inputs:
+    sourceFolder: '$(HelixResults)'
+    contents: '**/issues-summary-*.txt'
+    targetFolder: '$(IssuesLocation)'
+  continueOnError: true
+  condition: always()
+
+# Always upload the available AllIssues-*.zip files to be used for summarization
+- task: CopyFiles@2
+  displayName: Copying AllIssues.zip of all partitions
+  inputs:
+    sourceFolder: '$(HelixResults)'
+    contents: '**/AllIssues-*.zip'
+    targetFolder: '$(IssuesLocation)'
+  continueOnError: true
+  condition: always()
+
+- task: PublishPipelineArtifact@1
+  displayName: Publish issue files
+  inputs:
+    targetPath: $(IssuesLocation)
+    artifactName: 'Issues_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)'
+  continueOnError: true
+  condition: always()
+
+# Always upload the available log files
+- task: CopyFiles@2
+  displayName: Copying fuzzer logs of all partitions
+  inputs:
+    sourceFolder: '$(HelixResults)'
+    contents: '**/*.log'
+    targetFolder: '$(FuzzerLogsLocation)'
+  continueOnError: true
+  condition: always()
+
+- task: PublishPipelineArtifact@1
+  displayName: Publish fuzzer log files
+  inputs:
+    targetPath: '$(FuzzerLogsLocation)'
+    artifactName: '$(toolName)_Logs_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)'
+  continueOnError: true
+  condition: always()
+
+- script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/$(SummarizeScript) -issues_directory $(IssuesLocation) -arch $(archType) -platform $(osGroup)$(osSubgroup) -build_config $(_BuildConfig)
+  displayName: ${{ format('Summarize ({0}{1} {2})', parameters.osGroup, parameters.osSubgroup, parameters.archType) }}
+  condition: always()
+
+- task: PublishPipelineArtifact@1
+  displayName: ${{ format('Publish {0} build logs', parameters.toolName) }}
+  inputs:
+    targetPath: $(Build.SourcesDirectory)/artifacts/log
+    artifactName: '$(toolName)_BuildLogs_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)'
+  condition: always()
diff --git a/eng/pipelines/coreclr/templates/jit-exploratory-variables.yml b/eng/pipelines/coreclr/templates/jit-exploratory-variables.yml
new file mode 100644
index 000000000000..9af139026c5d
--- /dev/null
+++ b/eng/pipelines/coreclr/templates/jit-exploratory-variables.yml
@@ -0,0 +1,53 @@
+parameters:
+  osGroup: ''                     # required -- operating system for the job
+  osSubgroup: ''                  # optional -- operating system subgroup
+  archType: ''                    # required -- targeting CPU architecture
+  buildConfig: ''                 # required -- build configuration
+  toolName: ''                    # required -- which tool to use: Antigen or Fuzzlyn?
+
+variables:
+- template: /eng/pipelines/coreclr/templates/jit-python-variables.yml
+  parameters:
+    osGroup: ${{ parameters.osGroup }}
+
+- ${{ if in(variables['Build.Reason'], 'Schedule') }}:
+  - name: RunReason
+    value: 'Scheduled'
+- ${{ if notin(variables['Build.Reason'], 'Schedule') }}:
+  - name: RunReason
+    value: 'PR'
+- ${{ if eq(parameters.osGroup, 'windows') }}:
+  - name: Core_Root_Dir
+    value: '$(Build.SourcesDirectory)\artifacts\tests\coreclr\${{ parameters.osGroup }}.${{ parameters.archType }}.${{ parameters.buildConfig }}\Tests\Core_Root'
+  - name: HelixResults
+    value: '$(Build.SourcesDirectory)\artifacts\helixresults\'
+  - name: IssuesLocation
+    value: '$(Build.SourcesDirectory)\artifacts\issues\'
+  - name: AntigenLogsLocation
+    value: '$(Build.SourcesDirectory)\artifacts\antigen_logs\'
+  - name: FuzzlynLogsLocation
+    value: '$(Build.SourcesDirectory)\artifacts\fuzzlyn_logs\'
+
+- ${{ if ne(parameters.osGroup, 'windows') }}:
+  - name: Core_Root_Dir
+    value: '$(Build.SourcesDirectory)/artifacts/tests/coreclr/${{ parameters.osGroup }}.${{ parameters.archType }}.$(buildConfigUpper)/Tests/Core_Root'
+  - name: HelixResults
+    value: '$(Build.SourcesDirectory)/artifacts/helixresults/'
+  - name: IssuesLocation
+    value: '$(Build.SourcesDirectory)/artifacts/issues/'
+  - name: AntigenLogsLocation
+    value: '$(Build.SourcesDirectory)/artifacts/antigen_logs/'
+  - name: FuzzlynLogsLocation
+    value: '$(Build.SourcesDirectory)/artifacts/fuzzlyn_logs/'
+
+- ${{ if eq(parameters.toolName, 'Antigen') }}:
+  - name: SummarizeScript
+    value: 'antigen_summarize.py'
+  - name: FuzzerLogsLocation
+    value: '$(AntigenLogsLocation)'
+
+- ${{ if eq(parameters.toolName, 'Fuzzlyn') }}:
+  - name: SummarizeScript
+    value: 'fuzzlyn_summarize.py'
+  - name: FuzzerLogsLocation
+    value: '$(FuzzlynLogsLocation)'
diff --git a/eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml b/eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
new file mode 100644
index 000000000000..1e9c70114c56
--- /dev/null
+++ b/eng/pipelines/coreclr/templates/jit-outerloop-pipeline.yml
@@ -0,0 +1,57 @@
+parameters:
+  - name: platforms
+    type: object
+  - name: testGroup
+    type: string
+    default: outerloop
+
+extends:
+  template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
+  parameters:
+    stages:
+    - stage: Build
+      jobs:
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: checked
+          platforms: ${{ parameters.platforms }}
+          jobParameters:
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: ${{ parameters.testGroup }}
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
+          buildConfig: checked
+          platforms:
+          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
+          jobParameters:
+            testGroup: ${{ parameters.testGroup }}
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
+          buildConfig: checked
+          platforms: ${{ parameters.platforms }}
+          helixQueueGroup: ci
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+          jobParameters:
+            testGroup: ${{ parameters.testGroup }}
+            liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/coreclr/templates/jit-python-variables.yml b/eng/pipelines/coreclr/templates/jit-python-variables.yml
new file mode 100644
index 000000000000..3ca90600cfb0
--- /dev/null
+++ b/eng/pipelines/coreclr/templates/jit-python-variables.yml
@@ -0,0 +1,18 @@
+parameters:
+  osGroup: ''
+
+variables:
+- ${{ if eq(parameters.osGroup, 'windows') }}:
+  - name: PythonSetupScript
+    value: 'py -3 -m venv $(Build.SourcesDirectory)\venv'
+  - name: PythonScript
+    value: '$(Build.SourcesDirectory)\venv\Scripts\python.exe'
+  - name: PipScript
+    value: '$(Build.SourcesDirectory)\venv\Scripts\python.exe -m pip'
+- ${{ if ne(parameters.osGroup, 'windows') }}:
+  - name: PythonSetupScript
+    value: 'python3 -m venv $(Build.SourcesDirectory)/venv'
+  - name: PythonScript
+    value: '$(Build.SourcesDirectory)/venv/bin/python3'
+  - name: PipScript
+    value: '$(Build.SourcesDirectory)/venv/bin/pip3'
diff --git a/eng/pipelines/coreclr/templates/jit-run-exploratory-job.yml b/eng/pipelines/coreclr/templates/jit-run-exploratory-job.yml
deleted file mode 100644
index 91472d75b57d..000000000000
--- a/eng/pipelines/coreclr/templates/jit-run-exploratory-job.yml
+++ /dev/null
@@ -1,185 +0,0 @@
-parameters:
-  steps: []                       # optional -- any additional steps that need to happen before pulling down the jitutils repo and sending the jitutils to helix (ie building your repo)
-  variables: []                   # optional -- list of additional variables to send to the template
-  jobName: ''                     # required -- job name
-  displayName: ''                 # optional -- display name for the job. Will use jobName if not passed
-  pool: ''                        # required -- name of the Build pool
-  container: ''                   # required -- name of the container
-  buildConfig: ''                 # required -- build configuration
-  archType: ''                    # required -- targeting CPU architecture
-  osGroup: ''                     # required -- operating system for the job
-  osSubgroup: ''                  # optional -- operating system subgroup
-  continueOnError: 'false'        # optional -- determines whether to continue the build if the step errors
-  dependsOn: ''                   # optional -- dependencies of the job
-  timeoutInMinutes: 320           # optional -- timeout for the job
-  enableTelemetry: false          # optional -- enable for telemetry
-  liveLibrariesBuildConfig: ''    # optional -- live-live libraries configuration to use for the run
-  runtimeType: 'coreclr'          # optional -- Sets the runtime as coreclr or mono
-  codeGenType: 'JIT'              # optional -- Decides on the codegen technology if running on mono
-  helixQueues: ''                 # required -- Helix queues
-  dependOnEvaluatePaths: false
-  toolName: ''                    # required -- which tool to use: Antigen or Fuzzlyn?
-
-jobs:
-- template: xplat-pipeline-job.yml
-  parameters:
-    dependsOn: ${{ parameters.dependsOn }}
-    buildConfig: ${{ parameters.buildConfig }}
-    archType: ${{ parameters.archType }}
-    osGroup: ${{ parameters.osGroup }}
-    osSubgroup: ${{ parameters.osSubgroup }}
-    liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }}
-    enableTelemetry: ${{ parameters.enableTelemetry }}
-    enablePublishBuildArtifacts: true
-    continueOnError: ${{ parameters.continueOnError }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
-    timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
-
-    ${{ if ne(parameters.displayName, '') }}:
-      displayName: '${{ parameters.displayName }}'
-    ${{ if eq(parameters.displayName, '') }}:
-      displayName: '${{ parameters.jobName }}'
-
-    variables:
-    - ${{ each variable in parameters.variables }}:
-      - ${{insert}}: ${{ variable }}
-
-    - HelixApiAccessToken: ''
-    - HelixPreCommand: ''
-
-    - ${{ if in(variables['Build.Reason'], 'Schedule') }}:
-      - name: RunReason
-        value: 'Scheduled'
-    - ${{ if notin(variables['Build.Reason'], 'Schedule') }}:
-      - name: RunReason
-        value: 'PR'
-    - ${{ if eq(parameters.osGroup, 'windows') }}:
-      - name: PythonScript
-        value: 'py -3'
-      - name: PipScript
-        value: 'py -3 -m pip'
-      - name: Core_Root_Dir
-        value: '$(Build.SourcesDirectory)\artifacts\tests\coreclr\${{ parameters.osGroup }}.${{ parameters.archType }}.${{ parameters.buildConfig }}\Tests\Core_Root'
-      - name: HelixResults
-        value: '$(Build.SourcesDirectory)\artifacts\helixresults\'
-      - name: IssuesLocation
-        value: '$(Build.SourcesDirectory)\artifacts\issues\'
-      - name: AntigenLogsLocation
-        value: '$(Build.SourcesDirectory)\artifacts\antigen_logs\'
-      - name: FuzzlynLogsLocation
-        value: '$(Build.SourcesDirectory)\artifacts\fuzzlyn_logs\'
-
-    - ${{ if ne(parameters.osGroup, 'windows') }}:
-      - name: PythonScript
-        value: 'python3'
-      - name: PipScript
-        value: 'pip3'
-      - name: Core_Root_Dir
-        value: '$(Build.SourcesDirectory)/artifacts/tests/coreclr/${{ parameters.osGroup }}.${{ parameters.archType }}.$(buildConfigUpper)/Tests/Core_Root'
-      - name: HelixResults
-        value: '$(Build.SourcesDirectory)/artifacts/helixresults/'
-      - name: IssuesLocation
-        value: '$(Build.SourcesDirectory)/artifacts/issues/'
-      - name: AntigenLogsLocation
-        value: '$(Build.SourcesDirectory)/artifacts/antigen_logs/'
-      - name: FuzzlynLogsLocation
-        value: '$(Build.SourcesDirectory)/artifacts/fuzzlyn_logs/'
-
-    - ${{ if eq(parameters.toolName, 'Antigen') }}:
-      - name: SummarizeScript
-        value: 'antigen_summarize.py'
-      - name: FuzzerLogsLocation
-        value: '$(AntigenLogsLocation)'
-
-    - ${{ if eq(parameters.toolName, 'Fuzzlyn') }}:
-      - name: SummarizeScript
-        value: 'fuzzlyn_summarize.py'
-      - name: FuzzerLogsLocation
-        value: '$(FuzzlynLogsLocation)'
-
-    workspace:
-      clean: all
-    pool:
-      ${{ parameters.pool }}
-    container: ${{ parameters.container }}
-    steps:
-    - ${{ parameters.steps }}
-
-    - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/fuzzer_setup.py -tool_name $(toolName) -source_directory $(Build.SourcesDirectory) -core_root_directory $(Core_Root_Dir) -arch $(archType) -platform $(osGroup)
-      displayName: ${{ format('{0} setup ({1}-{2})', parameters.toolName, parameters.osGroup, parameters.archType) }}
-
-      # Run exploratory tool in helix
-    - template: /eng/pipelines/common/templates/runtimes/send-to-helix-step.yml
-      parameters:
-        displayName: 'Send job to Helix'
-        helixBuild: $(Build.BuildNumber)
-        helixSource: $(_HelixSource)
-        helixType: 'build/tests/'
-        helixQueues: ${{ join(',', parameters.helixQueues) }}
-        creator: dotnet-bot
-        WorkItemTimeout: 2:30 # 2.5 hours
-        WorkItemDirectory: '$(WorkItemDirectory)'
-        CorrelationPayloadDirectory: '$(CorrelationPayloadDirectory)'
-        helixProjectArguments: '$(Build.SourcesDirectory)/src/coreclr/scripts/exploratory.proj'
-        BuildConfig: ${{ parameters.buildConfig }}
-        osGroup: ${{ parameters.osGroup }}
-        RunConfiguration: '$(RunConfiguration)'
-        ToolName: ${{ parameters.toolName }}
-        RunReason: '$(RunReason)'
-
-    # Always upload the available issues-summary.txt files
-    - task: CopyFiles@2
-      displayName: Copying issues-summary.txt of all partitions
-      inputs:
-        sourceFolder: '$(HelixResults)'
-        contents: '**/issues-summary-*.txt'
-        targetFolder: '$(IssuesLocation)'
-      continueOnError: true
-      condition: always()
-
-    # Always upload the available AllIssues-*.zip files to be used for summarization
-    - task: CopyFiles@2
-      displayName: Copying AllIssues.zip of all partitions
-      inputs:
-        sourceFolder: '$(HelixResults)'
-        contents: '**/AllIssues-*.zip'
-        targetFolder: '$(IssuesLocation)'
-      continueOnError: true
-      condition: always()
-
-    - task: PublishPipelineArtifact@1
-      displayName: Publish issue files
-      inputs:
-        targetPath: $(IssuesLocation)
-        artifactName: 'Issues_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-      continueOnError: true
-      condition: always()
-
-    # Always upload the available log files
-    - task: CopyFiles@2
-      displayName: Copying fuzzer logs of all partitions
-      inputs:
-        sourceFolder: '$(HelixResults)'
-        contents: '**/*.log'
-        targetFolder: '$(FuzzerLogsLocation)'
-      continueOnError: true
-      condition: always()
-
-    - task: PublishPipelineArtifact@1
-      displayName: Publish fuzzer log files
-      inputs:
-        targetPath: '$(FuzzerLogsLocation)'
-        artifactName: '$(toolName)_Logs_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-      continueOnError: true
-      condition: always()
-
-    - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/$(SummarizeScript) -issues_directory $(IssuesLocation) -arch $(archType) -platform $(osGroup)$(osSubgroup) -build_config $(buildConfig)
-      displayName: ${{ format('Summarize ({0}{1} {2})', parameters.osGroup, parameters.osSubgroup, parameters.archType) }}
-      condition: always()
-
-    - task: PublishPipelineArtifact@1
-      displayName: ${{ format('Publish {0} build logs', parameters.toolName) }}
-      inputs:
-        targetPath: $(Build.SourcesDirectory)/artifacts/log
-        artifactName: '$(toolName)_BuildLogs_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-      condition: always()
diff --git a/eng/pipelines/coreclr/templates/perf-job.yml b/eng/pipelines/coreclr/templates/perf-job.yml
index 7adba086e45c..0d3f5b46a750 100644
--- a/eng/pipelines/coreclr/templates/perf-job.yml
+++ b/eng/pipelines/coreclr/templates/perf-job.yml
@@ -24,7 +24,6 @@ parameters:
   iOSLlvmBuild: 'False'
   iOSStripSymbols: 'False'
   hybridGlobalization: 'False'
-  skipLiveLibrariesDownload: false
   collectHelixLogsScript: ''
   timeoutInMinutes: 320
   compare: false
@@ -78,19 +77,17 @@ jobs:
     # Test job depends on the corresponding build job
     ${{ if eq(parameters.downloadSpecificBuild.buildId, '') }}:
       dependsOn:
-      - ${{ if not(in(parameters.runtimeType, 'AndroidMono', 'iOSMono', 'iOSNativeAOT', 'wasm')) }}:
-        - ${{ format('coreclr_{0}_product_build_{1}{2}_{3}_{4}', parameters.runtimeVariant, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-      - ${{ if and(ne(parameters.liveLibrariesBuildConfig, ''), eq(parameters.skipLiveLibrariesDownload, 'false')) }}:
-        - ${{ format('libraries_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.liveLibrariesBuildConfig) }}
+      - ${{ if not(or(in(parameters.runtimeType, 'AndroidMono', 'iOSMono', 'iOSNativeAOT', 'wasm'), and(eq(parameters.runtimeType, 'mono'), ne(parameters.codeGenType, 'AOT')))) }}:
+        - ${{ format('build_{0}{1}_{2}_{3}_{4}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig, 'coreclr') }}
       - ${{ if and(eq(parameters.runtimeType, 'mono'), ne(parameters.codeGenType, 'AOT')) }}:
-        - ${{ format('mono_{0}_product_build_{1}{2}_{3}_{4}', parameters.runtimeVariant, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
+        - ${{ format('build_{0}{1}_{2}_{3}_{4}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig, 'mono') }}
       - ${{ if eq(parameters.runtimeType, 'wasm')}}:
         - ${{ format('build_{0}{1}_{2}_{3}_{4}_{5}', 'browser', '', 'wasm', 'linux', parameters.buildConfig, parameters.runtimeType) }}
       - ${{ if and(eq(parameters.codeGenType, 'AOT'), ne(parameters.runtimeType, 'wasm'))}}:
         - ${{ format('build_{0}{1}_{2}_{3}_{4}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig, parameters.codeGenType) }}
       - ${{ if eq(parameters.runtimeType, 'AndroidMono')}}:
         - ${{ 'build_android_arm64_release_AndroidMono' }}
-        # - ${{ 'Build_ios_arm64_release_PerfBDNApp' }} Disabled per: https://github.com/dotnet/performance/issues/3655
+        - ${{ 'Build_ios_arm64_release_PerfBDNApp' }}
       - ${{ if eq(parameters.runtimeType, 'iOSMono')}}:
         - ${{ 'build_ios_arm64_release_iOSMono' }}
       - ${{ if eq(parameters.runtimeType, 'iOSNativeAOT')}}:
@@ -142,6 +139,15 @@ jobs:
     - ${{ if eq(parameters.crossBuild, true) }}:
       - _crossBuildPropertyArg: '-cross'
 
+    - name: librariesDownloadDir
+      value: '$(Build.SourcesDirectory)/artifacts'
+
+    - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+      parameters:
+        runtimeFlavor: coreclr
+        testGroup: ${{ parameters.testGroup }}
+        liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }}
+
     frameworks:
       - ${{ parameters.framework }}
     steps:
@@ -150,35 +156,6 @@ jobs:
         $(Build.SourcesDirectory)/eng/common/msbuild.sh $(Build.SourcesDirectory)/eng/testing/performance/add_properties_to_pipeline.proj /t:SetVariables
       displayName: Add Properties To Pipeline Env
 
-    # Optionally download live-built libraries
-    - ${{ if and(ne(parameters.liveLibrariesBuildConfig, ''), eq(parameters.skipLiveLibrariesDownload, 'false')) }}:
-      - template: /eng/pipelines/common/download-artifact-step.yml
-        parameters:
-          unpackFolder: $(librariesDownloadDir)
-          cleanUnpackFolder: false
-          artifactFileName: '$(librariesBuildArtifactName)$(archiveExtension)'
-          artifactName: '$(librariesBuildArtifactName)'
-          displayName: 'live-built libraries'
-
-    # Download coreclr
-    - ${{ if not(in(parameters.runtimeType, 'AndroidMono', 'iOSMono', 'iOSNativeAOT', 'wasm')) }}:
-      - template: /eng/pipelines/common/download-artifact-step.yml
-        parameters:
-          unpackFolder: $(buildProductRootFolderPath)
-          artifactFileName: '$(buildProductArtifactName)$(archiveExtension)'
-          artifactName: '$(buildProductArtifactName)'
-          displayName: 'Coreclr product build'
-
-    # Download mono
-    - ${{ if and(eq(parameters.runtimeType, 'mono'), ne(parameters.codeGenType, 'AOT')) }}:
-      - template: /eng/pipelines/common/download-artifact-step.yml
-        parameters:
-          unpackFolder: $(librariesDownloadDir)/bin/mono/$(osGroup).$(archType).$(buildConfigUpper)
-          cleanUnpackFolder: false
-          artifactFileName: 'MonoProduct_${{ parameters.runtimeVariant }}_$(osGroup)_$(archType)_$(buildConfig)$(archiveExtension)'
-          artifactName: 'MonoProduct_${{ parameters.runtimeVariant }}_$(osGroup)_$(archType)_$(buildConfig)'
-          displayName: 'Mono runtime'
-
     # Download wasm
     - ${{ if eq(parameters.runtimeType, 'wasm') }}:
       - ${{ if eq(parameters.downloadSpecificBuild.buildId, '') }}:
@@ -206,9 +183,15 @@ jobs:
           find $(librariesDownloadDir)/bin/wasm -type d &&
           find $(librariesDownloadDir)/bin/wasm -type f -exec chmod 664 {} \;
         displayName: "Create wasm directory (Linux)"
+    - ${{ elseif eq(parameters.codeGenType, 'AOT') }}:
+      # Download mono AOT
+      - template: /eng/pipelines/common/download-artifact-step.yml
+        parameters:
+          unpackFolder: $(Build.SourcesDirectory)/artifacts/bin
+          artifactFileName: 'BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)_coreclr$(archiveExtension)'
+          artifactName: 'BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)_coreclr'
+          displayName: 'Runtime artifacts'
 
-    # Download mono AOT
-    - ${{ if and(eq(parameters.codeGenType, 'AOT'), ne(parameters.runtimeType, 'wasm')) }}:
       - template: /eng/pipelines/common/download-artifact-step.yml
         parameters:
           unpackFolder: $(librariesDownloadDir)/LinuxMonoAOT
@@ -218,9 +201,16 @@ jobs:
 
       - script: "mkdir -p $(librariesDownloadDir)/bin/aot;mkdir -p $(librariesDownloadDir)/bin/aot/pack;cp -r $(librariesDownloadDir)/LinuxMonoAOT/artifacts/bin/mono/linux.${{ parameters.archType }}.Release/cross/linux-${{ parameters.archType }}/* $(librariesDownloadDir)/bin/aot;cp -r $(librariesDownloadDir)/LinuxMonoAOT/artifacts/bin/microsoft.netcore.app.runtime.linux-${{ parameters.archType }}/Release/* $(librariesDownloadDir)/bin/aot/pack"
         displayName: "Create aot directory (Linux)"
+    - ${{ elseif in(parameters.runtimeType, 'coreclr', 'mono') }}:
+      - template: /eng/pipelines/common/download-artifact-step.yml
+        parameters:
+          unpackFolder: $(Build.SourcesDirectory)/artifacts/bin
+          artifactFileName: 'BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)_${{ parameters.runtimeType }}$(archiveExtension)'
+          artifactName: 'BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)_${{ parameters.runtimeType }}'
+          displayName: 'Runtime artifacts'
 
-    # Download artifacts for Android Testing
-    - ${{ if eq(parameters.runtimeType, 'AndroidMono')}}:
+    - ${{ elseif eq(parameters.runtimeType, 'AndroidMono')}}:
+      # Download artifacts for Android Testing
       - template: /eng/pipelines/common/download-artifact-step.yml
         parameters:
           unpackFolder: $(Build.SourcesDirectory)/androidHelloWorld
@@ -228,17 +218,16 @@ jobs:
           artifactFileName: 'AndroidMonoarm64.tar.gz'
           artifactName: 'AndroidMonoarm64'
           displayName: 'Mono Android HelloWorld'
-      # Disabled per: https://github.com/dotnet/performance/issues/3655
-      # - template: /eng/pipelines/common/download-artifact-step.yml
-      #   parameters:
-      #     unpackFolder: $(Build.SourcesDirectory)
-      #     cleanUnpackFolder: false
-      #     artifactFileName: 'AndroidBDNApk.tar.gz'
-      #     artifactName: 'AndroidBDNApk'
-      #     displayName: 'Mono Android BDN Apk'
+      - template: /eng/pipelines/common/download-artifact-step.yml
+        parameters:
+          unpackFolder: $(Build.SourcesDirectory)
+          cleanUnpackFolder: false
+          artifactFileName: 'AndroidBDNApk.tar.gz'
+          artifactName: 'AndroidBDNApk'
+          displayName: 'Mono Android BDN Apk'
 
     # Download iOSMono and Native AOT tests
-    - ${{ if or(eq(parameters.runtimeType, 'iOSMono'), eq(parameters.runtimeType, 'iOSNativeAOT')) }}:
+    - ${{ elseif or(eq(parameters.runtimeType, 'iOSMono'), eq(parameters.runtimeType, 'iOSNativeAOT')) }}:
       - template: /eng/pipelines/common/download-artifact-step.yml
         parameters:
           unpackFolder: $(Build.SourcesDirectory)/iosHelloWorld
@@ -286,13 +275,19 @@ jobs:
     # Create Core_Root
     - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(buildConfig) $(archType) generatelayoutonly $(librariesOverrideArg) $(_crossBuildPropertyArg)
       displayName: Create Core_Root
-      condition: and(succeeded(), ne(variables.runtimeFlavorName, 'Mono'), ne('${{ parameters.runtimeType }}', 'wasm'), not(in('${{ parameters.runtimeType }}', 'AndroidMono', 'iOSMono', 'iOSNativeAOT')))
+      condition: and(succeeded(), ne(variables.runtimeFlavorName, 'Mono'), not(in('${{ parameters.runtimeType }}', 'wasm', 'AndroidMono', 'iOSMono', 'iOSNativeAOT')))
 
-    # Copy the runtime directory into the testhost folder to include OOBs.
-    - script: "build.cmd -subset libs.pretest -configuration release -ci -arch $(archType) -testscope innerloop /p:RuntimeArtifactsPath=$(librariesDownloadDir)\\bin\\mono\\$(osGroup).$(archType).$(buildConfigUpper) /p:RuntimeFlavor=mono;xcopy $(Build.SourcesDirectory)\\artifacts\\bin\\runtime\\${{parameters.framework}}-$(osGroup)-$(buildConfigUpper)-$(archType)\\* $(Build.SourcesDirectory)\\artifacts\\bin\\testhost\\${{parameters.framework}}-$(osGroup)-$(buildConfigUpper)-$(archType)\\shared\\Microsoft.NETCore.App\\$(productVersion) /E /I /Y;xcopy $(Build.SourcesDirectory)\\artifacts\\bin\\testhost\\${{parameters.framework}}-$(osGroup)-$(buildConfigUpper)-$(archType)\\* $(Build.SourcesDirectory)\\.dotnet-mono /E /I /Y;copy $(Build.SourcesDirectory)\\artifacts\\bin\\coreclr\\$(osGroup).$(archType).$(buildConfigUpper)\\corerun.exe $(Build.SourcesDirectory)\\.dotnet-mono\\shared\\Microsoft.NETCore.App\\$(productVersion)\\corerun.exe"
-      displayName: "Create mono dotnet (Windows)"
-      condition: and(and(succeeded(), eq(variables.runtimeFlavorName, 'Mono')), eq(variables.osGroup, 'windows'), not(in('${{ parameters.runtimeType }}', 'AndroidMono', 'iOSMono', 'iOSNativeAOT')))
+    - task: CopyFiles@2
+      displayName: Create .dotnet-mono folder
+      inputs:
+        SourceFolder: $(Build.SourcesDirectory)/artifacts/bin/testhost/${{ parameters.framework }}-$(osGroup)-$(buildConfigUpper)-$(archType)
+        TargetFolder: $(Build.SourcesDirectory)/.dotnet-mono
+      condition: and(succeeded(), eq(variables.runtimeFlavorName, 'Mono'), ne(variables.osGroup, 'windows'), notIn('${{ parameters.runtimeType }}', 'wasm', 'AndroidMono', 'iOSMono', 'iOSNativeAOT'))
 
-    - script: "mkdir $(Build.SourcesDirectory)/.dotnet-mono;./build.sh -subset libs.pretest -configuration release -ci -arch $(archType) -testscope innerloop /p:RuntimeArtifactsPath=$(librariesDownloadDir)/bin/mono/$(osGroup).$(archType).$(buildConfigUpper) /p:RuntimeFlavor=mono;cp $(Build.SourcesDirectory)/artifacts/bin/runtime/${{parameters.framework}}-$(osGroup)-$(buildConfigUpper)-$(archType)/* $(Build.SourcesDirectory)/artifacts/bin/testhost/${{parameters.framework}}-$(osGroup)-$(buildConfigUpper)-$(archType)/shared/Microsoft.NETCore.App/$(productVersion) -rf;cp $(Build.SourcesDirectory)/artifacts/bin/testhost/${{parameters.framework}}-$(osGroup)-$(buildConfigUpper)-$(archType)/* $(Build.SourcesDirectory)/.dotnet-mono -r;cp $(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).$(buildConfigUpper)/corerun $(Build.SourcesDirectory)/.dotnet-mono/shared/Microsoft.NETCore.App/$(productVersion)/corerun"
-      displayName: "Create mono dotnet (Linux)"
-      condition: and(and(succeeded(), eq(variables.runtimeFlavorName, 'Mono')), ne(variables.osGroup, 'windows'), not(in('${{ parameters.runtimeType }}', 'AndroidMono', 'iOSMono', 'iOSNativeAOT')))
+    - task: CopyFiles@2
+      displayName: Copy corerun to .dotnet-mono
+      inputs:
+        SourceFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).$(buildConfigUpper)
+        Contents: 'corerun*'
+        TargetFolder: $(Build.SourcesDirectory)/.dotnet-mono/shared/Microsoft.NETCore.App/$(productVersion)
+      condition: and(succeeded(), eq(variables.runtimeFlavorName, 'Mono'), ne(variables.osGroup, 'windows'), notIn('${{ parameters.runtimeType }}', 'wasm', 'AndroidMono', 'iOSMono', 'iOSNativeAOT'))
diff --git a/eng/pipelines/coreclr/templates/run-paltests-step.yml b/eng/pipelines/coreclr/templates/run-paltests-step.yml
index 3b3881986f7d..d64feb72206d 100644
--- a/eng/pipelines/coreclr/templates/run-paltests-step.yml
+++ b/eng/pipelines/coreclr/templates/run-paltests-step.yml
@@ -5,7 +5,6 @@ parameters:
   archType: ''                    # required -- targeting CPU architecture
   osGroup: ''                     # required -- operating system for the job
   osSubgroup: ''                  # optional -- operating system subgroup
-  dependOnEvaluatePaths: false
 
 steps:
   - template: /eng/pipelines/common/templates/runtimes/send-to-helix-step.yml
diff --git a/eng/pipelines/coreclr/templates/run-performance-job.yml b/eng/pipelines/coreclr/templates/run-performance-job.yml
index 51eb0ce0e967..12d630a5b3f3 100644
--- a/eng/pipelines/coreclr/templates/run-performance-job.yml
+++ b/eng/pipelines/coreclr/templates/run-performance-job.yml
@@ -31,7 +31,7 @@ parameters:
   additionalSetupParameters: ''   # optional -- additional setup parameters that are job-specific
 
 jobs:
-- template: xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
     dependsOn: ${{ parameters.dependsOn }}
     buildConfig: ${{ parameters.buildConfig }}
@@ -51,6 +51,8 @@ jobs:
 
     timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
 
+    logsName: 'Performance_Run_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)_${{ parameters.runtimeType }}_${{ parameters.codeGenType }}_${{ parameters.runKind }}_${{ parameters.logicalMachine }}_${{ parameters.javascriptEngine }}_${{ parameters.pgoRunType }}_${{ parameters.physicalPromotionRunType }}_${{ parameters.r2rRunType }}_${{ parameters.experimentName }}'
+
     variables:
     - ${{ each variable in parameters.variables }}:
       - ${{insert}}: ${{ variable }}
@@ -153,7 +155,6 @@ jobs:
           - HelixPreCommand: 'export MONO_ENV_OPTIONS="--interpreter";$(ExtraMSBuildLogsLinux)'
           - Interpreter: ' --monointerpreter'
 
-
     workspace:
       clean: all
     pool:
@@ -191,11 +192,4 @@ jobs:
         WorkItemDirectory: '$(WorkItemDirectory)' # WorkItemDirectory can not be empty, so we send it some docs to keep it happy
         CorrelationPayloadDirectory: '$(PayloadDirectory)' # it gets checked out to a folder with shorter path than WorkItemDirectory so we can avoid file name too long exceptions
         ProjectFile: ${{ parameters.projectFile }}
-        osGroup: ${{ parameters.osGroup }}
-    - task: PublishPipelineArtifact@1
-      displayName: Publish Logs
-      inputs:
-        targetPath: $(Build.SourcesDirectory)/artifacts/log
-        artifactName: 'Performance_Run_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)_${{ parameters.runtimeType }}_${{ parameters.codeGenType }}_${{ parameters.runKind }}_${{ parameters.logicalMachine }}_${{ parameters.javascriptEngine }}_${{ parameters.pgoRunType }}_${{ parameters.physicalPromotionRunType }}_${{ parameters.r2rRunType }}_${{ parameters.experimentName }}'
-      continueOnError: true
-      condition: always()
+        osGroup: ${{ parameters.osGroup }}
\ No newline at end of file
diff --git a/eng/pipelines/coreclr/templates/run-scenarios-job.yml b/eng/pipelines/coreclr/templates/run-scenarios-job.yml
index 44910148e652..78db1ae8cbd0 100644
--- a/eng/pipelines/coreclr/templates/run-scenarios-job.yml
+++ b/eng/pipelines/coreclr/templates/run-scenarios-job.yml
@@ -23,7 +23,7 @@ parameters:
   additionalSetupParameters: ''   # optional -- additional setup parameters that are job-specific
 
 jobs:
-- template: xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
     dependsOn: ${{ parameters.dependsOn }}
     buildConfig: ${{ parameters.buildConfig }}
@@ -34,6 +34,7 @@ jobs:
     enableTelemetry: ${{ parameters.enableTelemetry }}
     enablePublishBuildArtifacts: true
     continueOnError: ${{ parameters.continueOnError }}
+    logsName: 'Performance_Run_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)_${{ parameters.runtimeType }}_${{ parameters.codeGenType }}_${{ parameters.runKind }}_$(iOSLlvmBuild)_$(iOSStripSymbols)_$(hybridGlobalization)'
 
     ${{ if ne(parameters.displayName, '') }}:
       displayName: '${{ parameters.displayName }}'
@@ -108,6 +109,9 @@ jobs:
       ${{ if ne(parameters.runtimeType, 'wasm') }}:
         value: --install-dir $(PayloadDirectory)/dotnet
 
+    - name: librariesDownloadDir
+      value: '$(Build.SourcesDirectory)/artifacts'
+
     workspace:
       clean: all
     pool:
@@ -210,12 +214,3 @@ jobs:
         CorrelationPayloadDirectory: '$(PayloadDirectory)' # contains performance repo and built product
         ProjectFile: ${{ parameters.projectFile }}
         osGroup: ${{ parameters.osGroup }}
-
-    # publish logs
-    - task: PublishPipelineArtifact@1
-      displayName: Publish Logs
-      inputs:
-        targetPath: $(Build.SourcesDirectory)/artifacts/log
-        artifactName: 'Performance_Run_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)_${{ parameters.runtimeType }}_${{ parameters.codeGenType }}_${{ parameters.runKind }}_$(iOSLlvmBuild)_$(iOSStripSymbols)_$(hybridGlobalization)'
-      continueOnError: true
-      condition: always()
diff --git a/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-checked-release-job.yml b/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-checked-release-job.yml
index 155d82205938..6284c6fd6893 100644
--- a/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-checked-release-job.yml
+++ b/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-checked-release-job.yml
@@ -15,10 +15,9 @@ parameters:
   enableTelemetry: false          # optional -- enable for telemetry
   liveLibrariesBuildConfig: ''    # optional -- live-live libraries configuration to use for the run
   helixQueues: ''                 # required -- Helix queues
-  dependOnEvaluatePaths: false
 
 jobs:
-- template: xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
     dependsOn: ${{ parameters.dependsOn }}
     buildConfig: ${{ parameters.buildConfig }}
@@ -29,7 +28,6 @@ jobs:
     enableTelemetry: ${{ parameters.enableTelemetry }}
     enablePublishBuildArtifacts: true
     continueOnError: ${{ parameters.continueOnError }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
     timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
 
     ${{ if ne(parameters.displayName, '') }}:
@@ -38,11 +36,10 @@ jobs:
       displayName: '${{ parameters.jobName }}'
 
     variables:
+    - template: /eng/pipelines/coreclr/templates/jit-python-variables.yml
+      parameters:
+        osGroup: ${{ parameters.osGroup }}
 
-    - name: PythonScript
-      value: 'py -3'
-    - name: PipScript
-      value: 'py -3 -m pip'
     - name: SpmiCollectionLocation
       value: '$(Build.SourcesDirectory)\artifacts\spmi\'
     - name: SpmiLogsLocation
@@ -50,6 +47,12 @@ jobs:
     - name: HelixResultLocation
       value: '$(Build.SourcesDirectory)\artifacts\helixresults\'
 
+    - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+      parameters:
+        runtimeFlavor: coreclr
+        testGroup: ${{ parameters.testGroup }}
+        liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }}
+
     - ${{ each variable in parameters.variables }}:
       - ${{insert}}: ${{ variable }}
 
@@ -65,6 +68,9 @@ jobs:
         mkdir -p $(SpmiCollectionLocation)
       displayName: Create directory for SPMI collection
 
+    - script: $(PythonSetupScript)
+      displayName: Enable python venv
+
     - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi_asmdiffs_checked_release_setup.py -source_directory $(Build.SourcesDirectory) -checked_directory $(buildProductRootFolderPath) -release_directory $(releaseProductRootFolderPath) -arch $(archType)
       displayName: ${{ format('SuperPMI asmdiffs checked release setup ({0} {1})', parameters.osGroup, parameters.archType) }}
 
diff --git a/eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml b/eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml
index d83f6e7e089d..9903c61d4d41 100644
--- a/eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml
+++ b/eng/pipelines/coreclr/templates/run-superpmi-collect-job.yml
@@ -16,10 +16,9 @@ parameters:
   liveLibrariesBuildConfig: ''    # optional -- live-live libraries configuration to use for the run
   collectionType: ''
   collectionName: ''
-  dependOnEvaluatePaths: false
 
 jobs:
-- template: xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
     dependsOn: ${{ parameters.dependsOn }}
     buildConfig: ${{ parameters.buildConfig }}
@@ -32,7 +31,6 @@ jobs:
     continueOnError: ${{ parameters.continueOnError }}
     collectionType: $ {{ parameters.collectionType }}
     collectionName: ${{ parameters.collectionName }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
 
     ${{ if ne(parameters.displayName, '') }}:
       displayName: '${{ parameters.displayName }}'
@@ -42,10 +40,16 @@ jobs:
     # tests collection takes longer so increase timeout to 8 hours
     ${{ if or(eq(parameters.collectionName, 'coreclr_tests'), eq(parameters.collectionName, 'libraries_tests')) }}:
       timeoutInMinutes: 480
-    ${{ if and(ne(parameters.collectionName, 'coreclr_tests'), ne(parameters.collectionName, 'libraries_tests')) }}:
+    ${{ else }}:
       timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
 
     variables:
+    - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+      parameters:
+        runtimeFlavor: coreclr
+        testGroup: ${{ parameters.testGroup }}
+        liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }}
+
     - ${{ each variable in parameters.variables }}:
       - ${{insert}}: ${{ variable }}
     - HelixApiAccessToken: ''
@@ -54,13 +58,11 @@ jobs:
     - CollectionType: ${{ parameters.collectionType }}
     - CollectionName: ${{ parameters.collectionName }}
 
+    - template: /eng/pipelines/coreclr/templates/jit-python-variables.yml
+      parameters:
+        osGroup: ${{ parameters.osGroup }}
+
     - ${{ if eq(parameters.osGroup, 'windows') }}:
-      - name: PythonSetupScript
-        value: 'py -3 -m venv $(Build.SourcesDirectory)\venv'
-      - name: PythonScript
-        value: '$(Build.SourcesDirectory)\venv\Scripts\python.exe'
-      - name: PipScript
-        value: '$(Build.SourcesDirectory)\venv\Scripts\python.exe -m pip'
       - name: Core_Root_Dir
         value: '$(Build.SourcesDirectory)\artifacts\tests\coreclr\${{ parameters.osGroup }}.${{ parameters.archType }}.${{ parameters.buildConfig }}\Tests\Core_Root'
       - name: MchFilesLocation
@@ -72,12 +74,6 @@ jobs:
       - name: PayloadLocation
         value: '$(Build.SourcesDirectory)\payload'
     - ${{ if ne(parameters.osGroup, 'windows') }}:
-      - name: PythonSetupScript
-        value: 'python3 -m venv $(Build.SourcesDirectory)/venv'
-      - name: PythonScript
-        value: '$(Build.SourcesDirectory)/venv/bin/python3'
-      - name: PipScript
-        value: '$(Build.SourcesDirectory)/venv/bin/pip3'
       - name: Core_Root_Dir
         value: '$(Build.SourcesDirectory)/artifacts/tests/coreclr/${{ parameters.osGroup }}.${{ parameters.archType }}.$(buildConfigUpper)/Tests/Core_Root'
       - name: MchFilesLocation
@@ -97,7 +93,7 @@ jobs:
         value: '$(Core_Root_Dir)'
     - ${{ if eq(parameters.collectionName, 'realworld') }}:
       - name: InputDirectory
-        value: '$(Core_Root_Dir)'    
+        value: '$(Core_Root_Dir)'
     - ${{ if eq(parameters.collectionName, 'coreclr_tests') }}:
       - name: InputDirectory
         value: '$(managedTestArtifactRootFolderPath)'
@@ -118,6 +114,7 @@ jobs:
 
     - script: $(PythonSetupScript)
       displayName: Enable python venv
+      condition: always()
 
     - script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi_collect_setup.py -payload_directory $(PayloadLocation) -source_directory $(Build.SourcesDirectory) -core_root_directory $(Core_Root_Dir) -arch $(archType) -platform $(osGroup) -mch_file_tag $(MchFileTag) -input_directory $(InputDirectory) -collection_name $(CollectionName) -collection_type $(CollectionType) -max_size 25 # size in MB
       displayName: ${{ format('SuperPMI setup ({0})', parameters.osGroup) }}
diff --git a/eng/pipelines/coreclr/templates/run-superpmi-diffs-job.yml b/eng/pipelines/coreclr/templates/run-superpmi-diffs-job.yml
index da61351842a5..31f98a90baf9 100644
--- a/eng/pipelines/coreclr/templates/run-superpmi-diffs-job.yml
+++ b/eng/pipelines/coreclr/templates/run-superpmi-diffs-job.yml
@@ -15,13 +15,12 @@ parameters:
   enableTelemetry: false          # optional -- enable for telemetry
   liveLibrariesBuildConfig: ''    # optional -- live-live libraries configuration to use for the run
   helixQueues: ''                 # required -- Helix queues
-  dependOnEvaluatePaths: false
   diffType: 'asmdiffs'            # required -- 'asmdiffs', 'tpdiff', or 'all'
   baseJitOptions: ''              # e.g. JitStressModeNames=STRESS_PHYSICAL_PROMOTION;JitFullyInt=1
   diffJitOptions: ''
 
 jobs:
-- template: xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
     dependsOn: ${{ parameters.dependsOn }}
     buildConfig: ${{ parameters.buildConfig }}
@@ -32,7 +31,6 @@ jobs:
     enableTelemetry: ${{ parameters.enableTelemetry }}
     enablePublishBuildArtifacts: true
     continueOnError: ${{ parameters.continueOnError }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
     timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
 
     ${{ if ne(parameters.displayName, '') }}:
@@ -47,11 +45,17 @@ jobs:
     - name: diffType
       value: ${{ parameters.diffType }}
 
+    - template: /eng/pipelines/coreclr/templates/jit-python-variables.yml
+      parameters:
+        osGroup: ${{ parameters.osGroup }}
+
+    - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+      parameters:
+        runtimeFlavor: coreclr
+        testGroup: ${{ parameters.testGroup }}
+        liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }}
+
     - ${{ if eq(parameters.osGroup, 'windows') }}:
-      - name: PythonScript
-        value: 'py -3'
-      - name: PipScript
-        value: 'py -3 -m pip'
       - name: SpmiCollectionLocation
         value: '$(Build.SourcesDirectory)\artifacts\spmi\'
       - name: SpmiLogsLocation
@@ -61,10 +65,6 @@ jobs:
       - name: HelixResultLocation
         value: '$(Build.SourcesDirectory)\artifacts\helixresults\'
     - ${{ if ne(parameters.osGroup, 'windows') }}:
-      - name: PythonScript
-        value: 'python3'
-      - name: PipScript
-        value: 'pip3'
       - name: SpmiCollectionLocation
         value: '$(Build.SourcesDirectory)/artifacts/spmi/'
       - name: SpmiLogsLocation
@@ -94,6 +94,9 @@ jobs:
     steps:
     - ${{ parameters.steps }}
 
+    - script: $(PythonSetupScript)
+      displayName: Enable python venv
+
     - ${{ if ne(parameters.osGroup, 'windows') }}:
       - script: |
           mkdir -p $(SpmiCollectionLocation)
diff --git a/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml b/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml
index 3083a080c230..b8907cfc5607 100644
--- a/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml
+++ b/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml
@@ -15,10 +15,9 @@ parameters:
   enableTelemetry: false          # optional -- enable for telemetry
   liveLibrariesBuildConfig: ''    # optional -- live-live libraries configuration to use for the run
   helixQueues: ''                 # required -- Helix queues
-  dependOnEvaluatePaths: false
 
 jobs:
-- template: xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
     dependsOn: ${{ parameters.dependsOn }}
     buildConfig: ${{ parameters.buildConfig }}
@@ -29,7 +28,6 @@ jobs:
     enableTelemetry: ${{ parameters.enableTelemetry }}
     enablePublishBuildArtifacts: true
     continueOnError: ${{ parameters.continueOnError }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
     timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
 
     ${{ if ne(parameters.displayName, '') }}:
@@ -38,13 +36,19 @@ jobs:
       displayName: '${{ parameters.jobName }}'
 
     variables:
+    - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+      parameters:
+        runtimeFlavor: coreclr
+        testGroup: ${{ parameters.testGroup }}
+        liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }}
+
     - ${{ each variable in parameters.variables }}:
       - ${{insert}}: ${{ variable }}
 
-    - name: PythonScript
-      value: 'py -3'
-    - name: PipScript
-      value: 'py -3 -m pip'
+    - template: /eng/pipelines/coreclr/templates/jit-python-variables.yml
+      parameters:
+        osGroup: ${{ parameters.osGroup }}
+
     - name: SpmiCollectionLocation
       value: '$(Build.SourcesDirectory)\artifacts\spmi\'
     - name: SpmiLogsLocation
@@ -60,6 +64,9 @@ jobs:
     steps:
     - ${{ parameters.steps }}
 
+    - script: $(PythonSetupScript)
+      displayName: Enable python venv
+
     - script: |
         mkdir $(SpmiCollectionLocation)
         mkdir $(SpmiLogsLocation)
diff --git a/eng/pipelines/coreclr/templates/superpmi-asmdiffs-checked-release-job.yml b/eng/pipelines/coreclr/templates/superpmi-asmdiffs-checked-release-job.yml
index 659483c9bc30..0376cf567cbe 100644
--- a/eng/pipelines/coreclr/templates/superpmi-asmdiffs-checked-release-job.yml
+++ b/eng/pipelines/coreclr/templates/superpmi-asmdiffs-checked-release-job.yml
@@ -7,7 +7,6 @@ parameters:
   timeoutInMinutes: 320           # build timeout
   variables: {}
   helixQueues: ''
-  dependOnEvaluatePaths: false
   runJobTemplate: '/eng/pipelines/coreclr/templates/run-superpmi-asmdiffs-checked-release-job.yml'
 
 jobs:
@@ -20,12 +19,11 @@ jobs:
     archType: ${{ parameters.archType }}
     osGroup: ${{ parameters.osGroup }}
     osSubgroup: ${{ parameters.osSubgroup }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
     timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
     helixQueues: ${{ parameters.helixQueues }}
     dependsOn:
-     - ${{ format('coreclr_jit_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, 'checked') }}
-     - ${{ format('coreclr_jit_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, 'release') }}
+      - 'build_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_checked_'
+      - 'build_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_release_'
 
     variables:
     - ${{ each variable in parameters.variables }}:
@@ -33,23 +31,22 @@ jobs:
 
     - name: releaseProductRootFolderPath
       value: '$(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).Release'
-    - name: releaseProductArtifactName
-      value: 'CoreCLRProduct_${{ parameters.runtimeVariant }}_$(osGroup)$(osSubgroup)_$(archType)_release'
 
     steps:
-
     # Download jit checked builds
     - template: /eng/pipelines/common/download-artifact-step.yml
       parameters:
-        unpackFolder: $(buildProductRootFolderPath)
-        artifactFileName: '$(buildProductArtifactName)$(archiveExtension)'
-        artifactName: '$(buildProductArtifactName)'
+        unpackFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr
+        artifactFileName: 'CheckedJIT_$(osGroup)$(osSubgroup)_$(archType)$(archiveExtension)'
+        artifactName: 'CheckedJIT_$(osGroup)$(osSubgroup)_$(archType)'
         displayName: 'JIT checked build'
+        cleanUnpackFolder: false
 
-    #Download jit release builds
+    # Download jit release builds
     - template: /eng/pipelines/common/download-artifact-step.yml
       parameters:
-        unpackFolder: $(releaseProductRootFolderPath)
-        artifactFileName: '$(releaseProductArtifactName)$(archiveExtension)'
-        artifactName: '$(releaseProductArtifactName)'
+        unpackFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr
+        artifactFileName: 'ReleaseJIT_$(osGroup)$(osSubgroup)_$(archType)$(archiveExtension)'
+        artifactName: 'ReleaseJIT_$(osGroup)$(osSubgroup)_$(archType)'
         displayName: 'JIT release build'
+        cleanUnpackFolder: false
diff --git a/eng/pipelines/coreclr/templates/superpmi-collect-job.yml b/eng/pipelines/coreclr/templates/superpmi-collect-job.yml
index b77ec27e6e5c..6af16c3db876 100644
--- a/eng/pipelines/coreclr/templates/superpmi-collect-job.yml
+++ b/eng/pipelines/coreclr/templates/superpmi-collect-job.yml
@@ -29,12 +29,7 @@ jobs:
     collectionName: ${{ parameters.collectionName }}
     # Test job depends on the corresponding build job
     dependsOn:
-     - ${{ format('coreclr__product_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-     # Depend on coreclr x64 so we can download it and use mcs.exe from it while publishing non-x64 arch SPMI collection
-     - ${{ if ne(parameters.archType, 'x64') }}:
-       - ${{ format('coreclr__product_build_{0}{1}_x64_{2}', parameters.osGroup, parameters.osSubgroup, parameters.buildConfig) }}
-     - ${{ if ne(parameters.liveLibrariesBuildConfig, '') }}:
-       - ${{ format('libraries_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.liveLibrariesBuildConfig) }}
+     - 'build_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_${{ parameters.buildConfig }}_'
      - ${{ if eq(parameters.collectionName, 'coreclr_tests') }}:
         - 'coreclr_common_test_build_p1_AnyOS_AnyCPU_${{parameters.buildConfig }}'
 
@@ -43,32 +38,29 @@ jobs:
     steps:
     # Extra steps that will be passed to the superpmi template and run before sending the job to helix (all of which is done in the template)
 
-    # Optionally download live-built libraries
-    - ${{ if ne(parameters.liveLibrariesBuildConfig, '') }}:
-      - template: /eng/pipelines/common/download-artifact-step.yml
-        parameters:
-          unpackFolder: $(librariesDownloadDir)
-          cleanUnpackFolder: false
-          artifactFileName: '$(librariesBuildArtifactName)$(archiveExtension)'
-          artifactName: '$(librariesBuildArtifactName)'
-          displayName: 'live-built libraries'
-
-    # Download coreclr
+    # Download runtime artifacts
     - template: /eng/pipelines/common/download-artifact-step.yml
       parameters:
-        unpackFolder: $(buildProductRootFolderPath)
-        artifactFileName: '$(buildProductArtifactName)$(archiveExtension)'
-        artifactName: '$(buildProductArtifactName)'
-        displayName: 'Coreclr product build'
+        unpackFolder: $(Build.SourcesDirectory)/artifacts/bin
+        artifactFileName: 'BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)$(archiveExtension)'
+        artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+        displayName: 'Runtime build artifacts'
 
-    # Download x64 coreclr if running on non-x64 configuration
-    - ${{ if ne(parameters.archType, 'x64') }}:
+    # Unzip individual test projects
+    - ${{ if eq(parameters.collectionName, 'libraries_tests') }}:
       - template: /eng/pipelines/common/download-artifact-step.yml
         parameters:
-          unpackFolder: '$(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).x64.$(buildConfigUpper)'
-          artifactFileName: 'CoreCLRProduct__$(osGroup)$(osSubgroup)_x64_$(buildConfig)$(archiveExtension)'
-          artifactName: 'CoreCLRProduct__$(osGroup)$(osSubgroup)_x64_$(buildConfig)'
-          displayName: 'Coreclr product build (x64)'
+          unpackFolder: '$(Build.SourcesDirectory)/artifacts/helix/'
+          artifactFileName: 'LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)$(archiveExtension)'
+          artifactName: LibrariesTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+          displayName: 'Libraries test artifacts'
+      - task: ExtractFiles@1
+        displayName: 'Unzip libraries tests'
+        inputs:
+          archiveFilePatterns: '$(Build.SourcesDirectory)/artifacts/helix/**/*.zip'
+          destinationFolder: '$(Build.SourcesDirectory)/artifacts/tests/libraries/$(osGroup).$(archType).$(buildConfigUpper)'
+          cleanDestinationFolder: true
+          overwriteExistingFiles: true
 
     # Download and unzip managed test artifacts
     - ${{ if eq(parameters.collectionName, 'coreclr_tests') }}:
@@ -79,25 +71,6 @@ jobs:
           artifactName: '$(managedGenericTestArtifactName)'
           displayName: 'generic managed test artifacts'
 
-    # Download and unzip libraries test artifacts
-    - ${{ if eq(parameters.collectionName, 'libraries_tests') }}:
-      - template: /eng/pipelines/common/download-artifact-step.yml
-        parameters:
-          unpackFolder: '$(Build.SourcesDirectory)/artifacts/tests/libraries_zipped/$(osGroup).$(archType).$(buildConfigUpper)'
-          artifactFileName: 'libraries_test_assets_${{ parameters.osGroup }}_$(archType)_Release$(archiveExtension)'
-          artifactName: ${{ format('libraries_test_assets_{0}_$(archType)_Release', parameters.osGroup) }}
-          displayName: 'generic libraries test artifacts'
-
-    # Unzip individual test projects
-    - ${{ if eq(parameters.collectionName, 'libraries_tests') }}:
-      - task: ExtractFiles@1
-        displayName: 'Unzip Tests.zip'
-        inputs:
-          archiveFilePatterns: '$(Build.SourcesDirectory)/artifacts/tests/libraries_zipped/$(osGroup).$(archType).$(buildConfigUpper)/**/*.zip'
-          destinationFolder: '$(Build.SourcesDirectory)/artifacts/tests/libraries/$(osGroup).$(archType).$(buildConfigUpper)'
-          cleanDestinationFolder: true
-          overwriteExistingFiles: true
-
     # Create Core_Root
     - script: $(Build.SourcesDirectory)/src/tests/build$(scriptExt) $(buildConfig) $(archType) generatelayoutonly $(librariesOverrideArg)
       displayName: Create Core_Root
diff --git a/eng/pipelines/coreclr/templates/superpmi-diffs-job.yml b/eng/pipelines/coreclr/templates/superpmi-diffs-job.yml
index 2375089e250b..f132c5ca7ea2 100644
--- a/eng/pipelines/coreclr/templates/superpmi-diffs-job.yml
+++ b/eng/pipelines/coreclr/templates/superpmi-diffs-job.yml
@@ -8,7 +8,6 @@ parameters:
   timeoutInMinutes: 240           # build timeout
   variables: {}
   helixQueues: ''
-  dependOnEvaluatePaths: false
   runJobTemplate: '/eng/pipelines/coreclr/templates/run-superpmi-diffs-job.yml'
   diffType: 'asmdiffs'            # required -- 'asmdiffs', 'tpdiff', or 'all'
   baseJitOptions: ''              # e.g. JitStressModeNames=STRESS_PHYSICAL_PROMOTION;JitFullyInt=1
@@ -25,7 +24,6 @@ jobs:
     osGroup: ${{ parameters.osGroup }}
     osSubgroup: ${{ parameters.osSubgroup }}
     condition: ${{ parameters.condition }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
     timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
     helixQueues: ${{ parameters.helixQueues }}
     diffType: ${{ parameters.diffType }}
@@ -33,9 +31,9 @@ jobs:
     diffJitOptions: ${{ parameters.diffJitOptions }}
     dependsOn:
       - ${{ if in(parameters.diffType, 'asmdiffs', 'all') }}:
-        - ${{ format('coreclr_jit_build_{0}{1}_{2}_checked', parameters.osGroup, parameters.osSubgroup, parameters.archType) }}
+        - 'build_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_checked_'
       - ${{ if in(parameters.diffType, 'tpdiff', 'all') }}:
-        - ${{ format('coreclr_jit_build_{0}{1}_{2}_release', parameters.osGroup, parameters.osSubgroup, parameters.archType) }}
+        - 'build_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_release_'
 
     variables:
     - ${{ each variable in parameters.variables }}:
@@ -48,8 +46,6 @@ jobs:
       - ${{ if ne(parameters.osGroup, 'windows') }}:
         - name: releaseProductRootFolderPath
           value: '$(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).Release'
-      - name: releaseProductArtifactName
-        value: 'CoreCLRProduct_${{ parameters.runtimeVariant }}_$(osGroup)$(osSubgroup)_$(archType)_release'
 
     steps:
 
@@ -57,16 +53,18 @@ jobs:
       # Download jit checked builds
       - template: /eng/pipelines/common/download-artifact-step.yml
         parameters:
-          unpackFolder: $(buildProductRootFolderPath)
-          artifactFileName: '$(buildProductArtifactName)$(archiveExtension)'
-          artifactName: '$(buildProductArtifactName)'
+          unpackFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr
+          artifactFileName: 'CheckedJIT_$(osGroup)$(osSubgroup)_$(archType)$(archiveExtension)'
+          artifactName: 'CheckedJIT_$(osGroup)$(osSubgroup)_$(archType)'
           displayName: 'JIT checked build'
+          cleanUnpackFolder: false
 
     - ${{ if in(parameters.diffType, 'tpdiff', 'all') }}:
       # Download jit release builds
       - template: /eng/pipelines/common/download-artifact-step.yml
         parameters:
-          unpackFolder: $(releaseProductRootFolderPath)
-          artifactFileName: '$(releaseProductArtifactName)$(archiveExtension)'
-          artifactName: '$(releaseProductArtifactName)'
+          unpackFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr
+          artifactFileName: 'ReleaseJIT_$(osGroup)$(osSubgroup)_$(archType)$(archiveExtension)'
+          artifactName: 'ReleaseJIT_$(osGroup)$(osSubgroup)_$(archType)'
           displayName: 'JIT release build'
+          cleanUnpackFolder: false
diff --git a/eng/pipelines/coreclr/templates/superpmi-replay-job.yml b/eng/pipelines/coreclr/templates/superpmi-replay-job.yml
index 979f95ce685d..ea7854339a21 100644
--- a/eng/pipelines/coreclr/templates/superpmi-replay-job.yml
+++ b/eng/pipelines/coreclr/templates/superpmi-replay-job.yml
@@ -8,7 +8,6 @@ parameters:
   timeoutInMinutes: 320           # build timeout
   variables: {}
   helixQueues: ''
-  dependOnEvaluatePaths: false
   runJobTemplate: '/eng/pipelines/coreclr/templates/run-superpmi-replay-job.yml'
 
 jobs:
@@ -22,11 +21,10 @@ jobs:
     osGroup: ${{ parameters.osGroup }}
     osSubgroup: ${{ parameters.osSubgroup }}
     condition: ${{ parameters.condition }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
     timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
     helixQueues: ${{ parameters.helixQueues }}
     dependsOn:
-     - ${{ format('coreclr_jit_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
+      - 'build_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_${{ parameters.buildConfig }}_'
 
     variables: ${{ parameters.variables }}
 
@@ -35,7 +33,8 @@ jobs:
     # Download jit builds
     - template: /eng/pipelines/common/download-artifact-step.yml
       parameters:
-        unpackFolder: $(buildProductRootFolderPath)
-        artifactFileName: '$(buildProductArtifactName)$(archiveExtension)'
-        artifactName: '$(buildProductArtifactName)'
-        displayName: 'JIT product build'
\ No newline at end of file
+        unpackFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr
+        artifactFileName: 'CheckedJIT_$(osGroup)$(osSubgroup)_$(archType)$(archiveExtension)'
+        artifactName: 'CheckedJIT_$(osGroup)$(osSubgroup)_$(archType)'
+        displayName: 'JIT checked build'
+        cleanUnpackFolder: false
diff --git a/eng/pipelines/coreclr/templates/upload-jits-steps.yml b/eng/pipelines/coreclr/templates/upload-jits-steps.yml
new file mode 100644
index 000000000000..912732b90b84
--- /dev/null
+++ b/eng/pipelines/coreclr/templates/upload-jits-steps.yml
@@ -0,0 +1,19 @@
+steps:
+# Add authenticated pip feed
+- task: PipAuthenticate@1
+  displayName: 'Pip Authenticate'
+  inputs:
+    artifactFeeds: public/dotnet-public-pypi
+    onlyAddExtraIndex: false
+
+- script: $(PythonSetupScript)
+  displayName: Enable python venv
+
+# Ensure the Python azure-storage-blob package is installed before doing the upload.
+- script: $(PipScript) install --upgrade pip && $(PipScript) install azure.storage.blob==12.5.0 --force-reinstall
+  displayName: Upgrade Pip to latest and install azure-storage-blob Python package
+
+- script: $(PythonScript) $(Build.SourcesDirectory)/src/coreclr/scripts/jitrollingbuild.py upload -build_type $(_BuildConfig) -arch $(archType) -host_os $(osGroup) -git_hash $(Build.SourceVersion) --use_latest_jit_change
+  displayName: Upload JIT to Azure Storage
+  env:
+    CLRJIT_AZ_KEY: $(clrjit_key1) # secret key stored as variable in pipeline
diff --git a/eng/pipelines/coreclr/templates/xplat-pipeline-job.yml b/eng/pipelines/coreclr/templates/xplat-pipeline-job.yml
deleted file mode 100644
index b6c57be0c7fb..000000000000
--- a/eng/pipelines/coreclr/templates/xplat-pipeline-job.yml
+++ /dev/null
@@ -1,132 +0,0 @@
-parameters:
-  buildConfig: ''
-  archType: ''
-  osGroup: ''
-  osSubgroup: ''
-  name: ''
-  helixType: '(unspecified)'
-  container: ''
-  testGroup: ''
-  crossBuild: false
-  liveLibrariesBuildConfig: ''
-  strategy: ''
-  pool: ''
-
-  # arcade-specific parameters
-  condition: true
-  continueOnError: false
-  dependsOn: ''
-  dependOnEvaluatePaths: false
-  displayName: ''
-  timeoutInMinutes: ''
-  enableMicrobuild: ''
-  gatherAssetManifests: false
-  disableComponentGovernance: false
-
-  variables: {} ## any extra variables to add to the defaults defined below
-
-jobs:
-- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
-  parameters:
-    buildConfig: ${{ parameters.buildConfig }}
-    archType: ${{ parameters.archType }}
-    osGroup: ${{ parameters.osGroup }}
-    osSubgroup: ${{ parameters.osSubgroup }}
-    name: ${{ parameters.name }}
-    helixType: ${{ parameters.helixType }}
-    container: ${{ parameters.container }}
-    crossBuild: ${{ parameters.crossBuild }}
-    strategy: ${{ parameters.strategy }}
-    pool: ${{ parameters.pool }}
-
-    # arcade-specific parameters
-    condition: and(succeeded(), ${{ parameters.condition }})
-    continueOnError: ${{ parameters.continueOnError }}
-    dependsOn: ${{ parameters.dependsOn }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
-    displayName: ${{ parameters.displayName }}
-    timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
-    enableMicrobuild: ${{ parameters.enableMicrobuild }}
-    gatherAssetManifests: ${{ parameters.gatherAssetManifests }}
-    disableComponentGovernance: ${{ parameters.disableComponentGovernance }}
-
-    variables:
-    - ${{ if ne(parameters.testGroup, '') }}:
-      - name: testArtifactRootName
-        value: ${{ parameters.Group }}${{ parameters.Subgroup }}_${{ parameters.archType }}_${{ parameters.buildConfig }}_${{ parameters.testGroup }}
-
-    - ${{ if eq(parameters.testGroup, '') }}:
-      - name: testArtifactRootName
-        value: ${{ parameters.Group }}${{ parameters.Subgroup }}_${{ parameters.archType }}_${{ parameters.buildConfig }}
-
-    - name: binTestsPath
-      value: '$(Build.SourcesDirectory)/artifacts/tests/coreclr'
-
-    # Build product defines what we are trying to build, either coreclr or mono
-    - name: buildProductArtifactName
-      value: 'CoreCLRProduct_${{ parameters.runtimeVariant }}_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-
-    - name: buildProductRootFolderPath
-      value: '$(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).$(buildConfigUpper)'
-
-    # We need this because both mono and coreclr build currently depends on CoreClr
-    - name: coreClrProductArtifactName
-      value: 'CoreCLRProduct_${{ parameters.runtimeVariant }}_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-
-    - name: coreClrProductRootFolderPath
-      value: '$(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).$(buildConfigUpper)'
-
-    - name: corelibProductArtifactName
-      value: 'CoreLib_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-
-    - name: managedGenericTestArtifactName
-      value: 'CoreCLRManagedTestArtifacts_AnyOS_AnyCPU_$(buildConfig)'
-
-    - name: managedTestArtifactRootFolderPath
-      value: '$(binTestsPath)/$(osGroup).$(archType).$(buildConfigUpper)'
-
-    - name: nativeTestArtifactName
-      value: 'CoreCLRNativeTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-
-    - name: nativeTestArtifactRootFolderPath
-      value: '$(binTestsPath)/obj/$(osGroup).$(archType).$(buildConfigUpper)'
-
-    - name: microsoftNetSdkIlFolderPath
-      value: '$(Build.SourcesDirectory)/.packages/microsoft.net.sdk.il'
-
-    - name: microsoftNetSdkIlArtifactName
-      value: 'MicrosoftNetSdkIlPackage_AnyOS_AnyCPU_$(buildConfig)'
-
-    - name: priorityArg
-      value: ''
-
-    # 'innerloop' and 'clrinterpreter' jobs run the Priority 0 tests; everything else runs the Priority 1 tests.
-    # 'gc-standalone' is forced to run pri0 as well to start with.
-    - ${{ if and(ne(parameters.testGroup, 'innerloop'), ne(parameters.testGroup, 'clrinterpreter'), ne(parameters.testGroup, 'gc-standalone'), ne(parameters.testGroup, 'gc-standalone-server') ) }}:
-      - ${{ if ne(parameters.osGroup, 'windows') }}:
-        - name: priorityArg
-          value: 'priority1'
-      - ${{ if eq(parameters.osGroup, 'windows') }}:
-        - name: priorityArg
-          value: '-priority 1'
-
-    - librariesBuildArtifactName: ''
-    - librariesOverrideArg: ''
-    - librariesDownloadDir: ''
-
-    - ${{ if ne(parameters.liveLibrariesBuildConfig, '') }}:
-      - librariesBuildArtifactName: ${{ format('libraries_bin_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.liveLibrariesBuildConfig) }}
-      - librariesDownloadDir: $(Build.SourcesDirectory)/artifacts
-      - librariesOverrideArg: ' /p:LibrariesConfiguration=${{ parameters.liveLibrariesBuildConfig }}'
-
-    - ${{ each variable in parameters.variables }}:
-      - ${{insert}}: ${{ variable }}
-
-    - name: osArg
-      value: ''
-
-    - ${{ if eq(parameters.osGroup, 'freebsd') }}:
-      - name: osArg
-        value: -os freebsd
-
-    steps: ${{ parameters.steps }}
diff --git a/eng/pipelines/coreclr/tieringtest.yml b/eng/pipelines/coreclr/tieringtest.yml
index 117fac1b5ded..2ce790dcc26f 100644
--- a/eng/pipelines/coreclr/tieringtest.yml
+++ b/eng/pipelines/coreclr/tieringtest.yml
@@ -20,15 +20,29 @@ extends:
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - linux_x64
           - windows_x64
           - windows_x86
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            testGroup: outerloop
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: outerloop
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -54,3 +68,4 @@ extends:
             tieringTest: true
             displayNameArgs: TieringTest
             liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-android.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-android.yml
index 9fd1769fe18f..71d304c556e9 100644
--- a/eng/pipelines/extra-platforms/runtime-extra-platforms-android.yml
+++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-android.yml
@@ -68,9 +68,9 @@ jobs:
     variables:
       # map dependencies variables to local variables
       - name: librariesContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
       - name: monoContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
     jobParameters:
       testGroup: innerloop
       nameSuffix: AllSubsets_Mono
diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-androidemulator.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-androidemulator.yml
index a114b1b744a1..4badaf93186c 100644
--- a/eng/pipelines/extra-platforms/runtime-extra-platforms-androidemulator.yml
+++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-androidemulator.yml
@@ -103,9 +103,9 @@ jobs:
     variables:
       # map dependencies variables to local variables
       - name: librariesContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
       - name: monoContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
     jobParameters:
       testGroup: innerloop
       nameSuffix: AllSubsets_Mono
diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslike.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslike.yml
index ef0425042b5e..051cb8b286bd 100644
--- a/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslike.yml
+++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslike.yml
@@ -28,9 +28,9 @@ jobs:
     variables:
       # map dependencies variables to local variables
       - name: librariesContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
       - name: monoContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
     jobParameters:
       testGroup: innerloop
       nameSuffix: AllSubsets_Mono
@@ -109,9 +109,9 @@ jobs:
     variables:
       # map dependencies variables to local variables
       - name: librariesContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
       - name: coreclrContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'] ]
     jobParameters:
       testGroup: innerloop
       nameSuffix: AllSubsets_NativeAOT
diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslikesimulator.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslikesimulator.yml
index b11b4be72ed6..7ce0a0c3568a 100644
--- a/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslikesimulator.yml
+++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-ioslikesimulator.yml
@@ -31,9 +31,9 @@ jobs:
     variables:
       # map dependencies variables to local variables
       - name: librariesContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
       - name: monoContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
     jobParameters:
       testGroup: innerloop
       nameSuffix: AllSubsets_Mono
diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-maccatalyst.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-maccatalyst.yml
index 936fe60bb483..d21dd56df264 100644
--- a/eng/pipelines/extra-platforms/runtime-extra-platforms-maccatalyst.yml
+++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-maccatalyst.yml
@@ -28,9 +28,9 @@ jobs:
     variables:
       # map dependencies variables to local variables
       - name: librariesContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
       - name: monoContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
     jobParameters:
       testGroup: innerloop
       nameSuffix: AllSubsets_Mono
@@ -62,9 +62,9 @@ jobs:
     variables:
       # map dependencies variables to local variables
       - name: librariesContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
       - name: monoContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
     jobParameters:
       testGroup: innerloop
       nameSuffix: AllSubsets_Mono_AppSandbox
diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-other.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-other.yml
index 10b3e9baca01..e47cb4996cc7 100644
--- a/eng/pipelines/extra-platforms/runtime-extra-platforms-other.yml
+++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-other.yml
@@ -7,50 +7,13 @@ parameters:
 
 jobs:
 
-#
-# Build CoreCLR release
-# Always as they are needed by Installer and we always build and test the Installer.
-#
-- template: /eng/pipelines/common/platform-matrix.yml
-  parameters:
-    jobTemplate: /eng/pipelines/coreclr/templates/build-job.yml
-    buildConfig: release
-    platforms:
-    - linux_x64
-    - linux_arm
-    - linux_arm64
-    - linux_musl_x64
-    - osx_x64
-    - windows_x64
-    - windows_x86
-    - windows_arm64
-    jobParameters:
-      testGroup: innerloop
-
-#
-# Build libraries using live CoreLib
-#
-- template: /eng/pipelines/common/platform-matrix.yml
-  parameters:
-    jobTemplate: /eng/pipelines/libraries/build-job.yml
-    buildConfig: Release
-    platforms:
-    - linux_x64
-    - linux_arm
-    - linux_arm64
-    - linux_musl_x64
-    - osx_x64
-    - windows_x64
-    - windows_x86
-    - windows_arm64
-
 #
 # Libraries Release Test Execution against a release coreclr runtime
 # Only when the PR contains a libraries change
 #
 - template: /eng/pipelines/common/platform-matrix.yml
   parameters:
-    jobTemplate: /eng/pipelines/libraries/run-test-job.yml
+    jobTemplate: /eng/pipelines/common/global-build-job.yml
     buildConfig: Release
     platforms:
     - linux_x64
@@ -63,13 +26,18 @@ jobs:
     - windows_arm64
     helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml
     jobParameters:
-      isOfficialBuild: false
-      isExtraPlatforms: ${{ parameters.isExtraPlatformsBuild }}
-      testScope: innerloop
-      liveRuntimeBuildConfig: release
+      buildArgs: -s tools+clr+libs+libs.tests -c $(_BuildConfig) -testscope innerloop /p:ArchiveTests=true
+      nameSuffix: Libraries_Release_CoreCLR
+      timeoutInMinutes: 150
+      postBuildSteps:
+        - template: /eng/pipelines/libraries/helix.yml
+          parameters:
+            creator: dotnet-bot
+            testRunNamePrefixSuffix: Libraries_Release_CoreCLR
+      isExtraPlatformsBuild: ${{ parameters.isExtraPlatformsBuild }}
       condition: >-
         or(
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+          eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
           eq(variables['isRollingBuild'], true))
 
 # Run net48 tests on win-x64
@@ -91,9 +59,10 @@ jobs:
             creator: dotnet-bot
             testRunNamePrefixSuffix: NET48_$(_BuildConfig)
             extraHelixArguments: /p:BuildTargetFramework=net48
+      isExtraPlatformsBuild: ${{ parameters.isExtraPlatformsBuild }}
       condition: >-
         or(
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+          eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
           eq(variables['isRollingBuild'], true))
 
 #### MONO LEGS
@@ -112,19 +81,20 @@ jobs:
     variables:
       # map dependencies variables to local variables
       - name: librariesContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
       - name: monoContainsChange
-        value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+        value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
     jobParameters:
       testScope: innerloop
       nameSuffix: AllSubsets_Mono
       buildArgs: -s mono+libs+host+packs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true
       timeoutInMinutes: 120
+      isExtraPlatformsBuild: ${{ parameters.isExtraPlatformsBuild }}
       condition: >-
         or(
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+          eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+          eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+          eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
           eq(variables['isRollingBuild'], true))
       # extra steps, run tests
       postBuildSteps:
@@ -139,68 +109,7 @@ jobs:
               eq(variables['isRollingBuild'], true))
 
 #
-# Build the whole product using Mono and run runtime tests
-# Build Mono release
-# Only when libraries, mono, or the runtime tests changed
-# Currently only these architectures are needed for the runtime tests.
-- template: /eng/pipelines/common/platform-matrix.yml
-  parameters:
-    jobTemplate: /eng/pipelines/mono/templates/build-job.yml
-    runtimeFlavor: mono
-    buildConfig: release
-    platforms:
-    - linux_arm64
-    jobParameters:
-      condition: >-
-        or(
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-          eq(variables['isRollingBuild'], true))
-
-#
-# Mono Test builds with CoreCLR runtime tests using live libraries debug build
-# Only when Mono is changed
-- template: /eng/pipelines/common/platform-matrix.yml
-  parameters:
-    jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-    buildConfig: release
-    runtimeFlavor: mono
-    platforms:
-    - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-    jobParameters:
-      testGroup: innerloop
-      condition: >-
-        or(
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-          eq(variables['isRollingBuild'], true))
-
-#
-# Mono CoreCLR runtime Test executions using live libraries in jit mode
-# Only when Mono is changed
-- template: /eng/pipelines/common/platform-matrix.yml
-  parameters:
-    jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-    buildConfig: release
-    runtimeFlavor: mono
-    platforms:
-    - linux_arm64
-    helixQueueGroup: pr
-    helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-    jobParameters:
-      testGroup: innerloop
-      liveLibrariesBuildConfig: Release
-      liveRuntimeBuildConfig: release
-      runtimeVariant: minijit
-      condition: >-
-        or(
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-          eq(variables['isRollingBuild'], true))
-
-#
-# Mono CoreCLR runtime Test executions using live libraries and LLVM Full AOT
+# Mono CoreCLR runtime test executions using live libraries and mini Full AOT
 # Only when Mono is changed
 #
 - template: /eng/pipelines/common/platform-matrix.yml
@@ -211,7 +120,8 @@ jobs:
     runtimeFlavor: mono
     platforms:
       - linux_x64
-      - linux_arm64
+      # Tracking issue: https://github.com/dotnet/runtime/issues/90427
+      # linux_arm64
     variables:
       - name: timeoutPerTestInMinutes
         value: 60
@@ -219,21 +129,21 @@ jobs:
         value: 180
     jobParameters:
       testGroup: innerloop
-      nameSuffix: AllSubsets_Mono_LLVMFullAot_RuntimeTests
-      runtimeVariant: llvmfullaot
-      buildArgs: -s mono+libs+clr.hosts+clr.iltools -c Release /p:MonoEnableLLVM=true /p:MonoBundleLLVMOptimizer=true
+      nameSuffix: AllSubsets_Mono_MiniFullAot_RuntimeTests
+      runtimeVariant: minifullaot
+      buildArgs: -s mono+libs+clr.hosts -c Release
       timeoutInMinutes: 300
-
+      isExtraPlatformsBuild: ${{ parameters.isExtraPlatformsBuild }}
       condition: >-
         or(
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+          eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+          eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
           eq(variables['isRollingBuild'], true))
       postBuildSteps:
         - template: /eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml
           parameters:
             creator: dotnet-bot
-            llvmAotStepContainer: linux_x64_llvmaot
+            llvmAotStepContainer: linux_x64
             testRunNamePrefixSuffix: Mono_Release
       extraVariablesTemplates:
         - template: /eng/pipelines/common/templates/runtimes/test-variables.yml
@@ -241,28 +151,43 @@ jobs:
 #
 # Mono CoreCLR runtime Test executions using live libraries in interpreter mode
 # Only when Mono is changed
+
 - template: /eng/pipelines/common/platform-matrix.yml
   parameters:
-    jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-    buildConfig: release
+    jobTemplate: /eng/pipelines/common/global-build-job.yml
+    helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+    buildConfig: Release
     runtimeFlavor: mono
     platforms:
-    - linux_arm64
-    helixQueueGroup: pr
-    helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+      - linux_arm64
+      - osx_arm64
+    variables:
+      - name: timeoutPerTestInMinutes
+        value: 60
+      - name: timeoutPerTestCollectionInMinutes
+        value: 180
     jobParameters:
       testGroup: innerloop
-      liveLibrariesBuildConfig: Release
-      liveRuntimeBuildConfig: release
+      nameSuffix: AllSubsets_Mono_Interpreter_RuntimeTests
       runtimeVariant: monointerpreter
+      buildArgs: -s mono+libs+clr.hosts+clr.iltools -c Release
+      timeoutInMinutes: 180
+      isExtraPlatformsBuild: ${{ parameters.isExtraPlatformsBuild }}
       condition: >-
         or(
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+          eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+          eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
           eq(variables['isRollingBuild'], true))
+      postBuildSteps:
+        - template: /eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml
+          parameters:
+            creator: dotnet-bot
+            testRunNamePrefixSuffix: Mono_Release
+      extraVariablesTemplates:
+        - template: /eng/pipelines/common/templates/runtimes/test-variables.yml
 
 #
-# Mono CoreCLR runtime Test executions using live libraries in interpreter mode
+# Mono CoreCLR runtime Test executions using live libraries in JIT mode
 # Only when Mono is changed
 
 - template: /eng/pipelines/common/platform-matrix.yml
@@ -272,7 +197,7 @@ jobs:
     buildConfig: Release
     runtimeFlavor: mono
     platforms:
-      - osx_arm64
+      - linux_arm64
     variables:
       - name: timeoutPerTestInMinutes
         value: 60
@@ -280,14 +205,15 @@ jobs:
         value: 180
     jobParameters:
       testGroup: innerloop
-      nameSuffix: AllSubsets_Mono_Interpreter_RuntimeTests
-      runtimeVariant: monointerpreter
+      nameSuffix: AllSubsets_Mono_Minijit_RuntimeTests
+      runtimeVariant: minijit
       buildArgs: -s mono+libs+clr.hosts+clr.iltools -c Release
       timeoutInMinutes: 180
+      isExtraPlatformsBuild: ${{ parameters.isExtraPlatformsBuild }}
       condition: >-
         or(
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-          eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+          eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+          eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
           eq(variables['isRollingBuild'], true))
       postBuildSteps:
         - template: /eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml
diff --git a/eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml b/eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml
index 31d15946c50d..fc8d757233cd 100644
--- a/eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml
+++ b/eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml
@@ -290,17 +290,18 @@ jobs:
       # ff tests are unstable currently
       shouldContinueOnError: true
 
-  - template: /eng/pipelines/common/templates/wasm-debugger-tests.yml
-    parameters:
-      platforms:
-        - Browser_wasm
-        - Browser_wasm_win
-      extraBuildArgs: /p:WasmEnableThreads=true /p:AotHostArchitecture=x64 /p:AotHostOS=$(_hostedOS)
-      nameSuffix: DebuggerTests_MultiThreaded
-      alwaysRun: ${{ parameters.isWasmOnlyBuild }}
-      isExtraPlatformsBuild: ${{ parameters.isExtraPlatformsBuild }}
-      isWasmOnlyBuild: ${{ parameters.isWasmOnlyBuild }}
-      runOnlyOnWasmOnlyPipelines: true
+  # Active Issue https://github.com/dotnet/runtime/issues/98771
+  # - template: /eng/pipelines/common/templates/wasm-debugger-tests.yml
+  #   parameters:
+  #     platforms:
+  #       - Browser_wasm
+  #       - Browser_wasm_win
+  #     extraBuildArgs: /p:WasmEnableThreads=true /p:AotHostArchitecture=x64 /p:AotHostOS=$(_hostedOS)
+  #     nameSuffix: DebuggerTests_MultiThreaded
+  #     alwaysRun: ${{ parameters.isWasmOnlyBuild }}
+  #     isExtraPlatformsBuild: ${{ parameters.isExtraPlatformsBuild }}
+  #     isWasmOnlyBuild: ${{ parameters.isWasmOnlyBuild }}
+  #     runOnlyOnWasmOnlyPipelines: true
 
   # Disable for now
   #- template: /eng/pipelines/coreclr/perf-wasm-jobs.yml
diff --git a/eng/pipelines/global-build.yml b/eng/pipelines/global-build.yml
index 0ce7a20fbc9f..32e4ec79c831 100644
--- a/eng/pipelines/global-build.yml
+++ b/eng/pipelines/global-build.yml
@@ -30,16 +30,17 @@ variables:
 extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
+    isOfficialBuild: false
     stages:
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
+          - template: /eng/pipelines/common/evaluate-default-paths.yml
+
     - stage: Build
       jobs:
 
-      #
-      # Evaluate paths
-      #
-      - ${{ if eq(variables.dependOnEvaluatePaths, true) }}:
-        - template: /eng/pipelines/common/evaluate-default-paths.yml
-
       #
       # Build with Release config and Debug runtimeConfiguration
       #
@@ -58,8 +59,8 @@ extends:
             timeoutInMinutes: 120
             condition:
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -78,8 +79,8 @@ extends:
             timeoutInMinutes: 120
             condition:
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -98,8 +99,8 @@ extends:
             timeoutInMinutes: 120
             condition:
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -119,7 +120,7 @@ extends:
             timeoutInMinutes: 120
             condition:
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_non_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -139,7 +140,7 @@ extends:
             timeoutInMinutes: 120
             condition:
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_non_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -158,7 +159,7 @@ extends:
             timeoutInMinutes: 120
             condition:
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -174,4 +175,4 @@ extends:
             nameSuffix: PortableSourceBuild
             timeoutInMinutes: 95
             condition:
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true)
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true)
diff --git a/eng/pipelines/installer/jobs/steps/build-linux-package.yml b/eng/pipelines/installer/jobs/steps/build-linux-package.yml
deleted file mode 100644
index 102eab770c27..000000000000
--- a/eng/pipelines/installer/jobs/steps/build-linux-package.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-parameters:
-  packageType: null
-  target: ''
-  packageStepDescription: null
-  packagingArgs: ''
-  subsetArg: ''
-  condition: succeeded()
-
-steps:
-## Run NuGet Authentication for each of the side containers
-- ${{ if ne(variables['System.TeamProject'], 'public') }}:
-  - task: NuGetAuthenticate@1
-    target: ${{ parameters.target }}
-- script: |
-    $(Build.SourcesDirectory)/build.sh \
-      --ci \
-      ${{ parameters.subsetArg }} \
-      ${{ parameters.packagingArgs }} \
-      $(CommonMSBuildArgs) \
-      $(LiveOverridePathArgs) \
-      /bl:artifacts/log/$(_BuildConfig)/msbuild.${{ parameters.packageType }}.installers.binlog
-  displayName: Package ${{ parameters.packageStepDescription }} - ${{ parameters.packageType }}
-  target: ${{ parameters.target }}
-  condition: ${{ parameters.condition }}
-# Broken symbolic links break the SBOM processing
-# We make some symlinks during the installer generation process,
-# but they aren't always valid on disk afterwards. Some of our tooling,
-# in particular the SBOM tooling, breaks on broken symlinks.
-- script: find . -xtype l -delete
-  displayName: Remove broken symbolic links
diff --git a/eng/pipelines/installer/jobs/steps/upload-job-artifacts.yml b/eng/pipelines/installer/jobs/steps/upload-job-artifacts.yml
deleted file mode 100644
index 4012b9a4fa34..000000000000
--- a/eng/pipelines/installer/jobs/steps/upload-job-artifacts.yml
+++ /dev/null
@@ -1,75 +0,0 @@
-parameters:
-  name: ''
-  runtimeFlavor: 'coreclr'
-  runtimeVariant: ''
-  isOfficialBuild: false
-
-steps:
-# Upload build artifacts (packages) to pipeline only if official, to save storage space.
-- ${{ if eq(parameters.isOfficialBuild, true) }}:
-  - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml
-    parameters:
-      name: ${{ parameters.name }}
-
-- task: PublishTestResults@2
-  displayName: Publish Test Results
-  inputs:
-    testResultsFormat: 'VSTest'
-    testResultsFiles: '*.trx'
-    searchFolder: '$(Build.SourcesDirectory)/artifacts/TestResults/$(_BuildConfig)'
-    mergeTestResults: true
-    testRunTitle: Installer-${{ parameters.runtimeFlavor }}-${{ parameters.name }}-$(_BuildConfig)
-  continueOnError: true
-  condition: eq(variables.SkipTests, false)
-
-# Upload binaries and symbols on failure to allow debugging issues
-- ${{ if eq(parameters.skipTests, false) }}:
-  - task: CopyFiles@2
-    displayName: Prepare binaries to publish
-    inputs:
-      SourceFolder: '$(Build.SourcesDirectory)/artifacts/bin'
-      Contents: |
-        */corehost/**
-        */corehost_test/**
-      TargetFolder: '$(Build.StagingDirectory)/Binaries'
-    continueOnError: true
-    condition: failed()
-
-  - task: ArchiveFiles@2
-    displayName: Zip binaries
-    inputs:
-      rootFolderOrFile: '$(Build.StagingDirectory)/Binaries'
-      archiveFile: '$(Build.StagingDirectory)/corehost-bin-${{ parameters.name }}-$(_BuildConfig)$(archiveExtension)'
-      archiveType: $(archiveType)
-      tarCompression: $(tarCompression)
-      includeRootFolder: false
-    continueOnError: true
-    condition: failed()
-
-  - task: PublishBuildArtifacts@1
-    displayName: Publish binaries
-    inputs:
-      pathtoPublish: '$(Build.StagingDirectory)/corehost-bin-${{ parameters.name }}-$(_BuildConfig)$(archiveExtension)'
-      artifactName: Installer-Binaries-${{ parameters.runtimeFlavor }}-${{ parameters.runtimeVariant }}-${{ parameters.name }}-$(_BuildConfig)
-    continueOnError: true
-    condition: failed()
-
-- task: CopyFiles@2
-  displayName: Prepare BuildLogs staging directory
-  inputs:
-    SourceFolder: '$(Build.SourcesDirectory)'
-    Contents: |
-      **/*.log
-      **/*.binlog
-    TargetFolder: '$(Build.StagingDirectory)/BuildLogs'
-    CleanTargetFolder: true
-  continueOnError: true
-  condition: always()
-
-- task: PublishPipelineArtifact@1
-  displayName: Publish BuildLogs
-  inputs:
-    targetPath: '$(Build.StagingDirectory)/BuildLogs'
-    artifactName: Installer-Logs_Attempt$(System.JobAttempt)-${{ parameters.runtimeFlavor }}-${{ parameters.runtimeVariant }}-${{ parameters.name }}-$(_BuildConfig)
-  continueOnError: true
-  condition: always()
diff --git a/eng/pipelines/installer/steps/build-linux-package.yml b/eng/pipelines/installer/steps/build-linux-package.yml
new file mode 100644
index 000000000000..ef905d6c1646
--- /dev/null
+++ b/eng/pipelines/installer/steps/build-linux-package.yml
@@ -0,0 +1,33 @@
+parameters:
+  osGroup: ''
+  osSubgroup: ''
+  packageType: null
+  target: ''
+  packageStepDescription: null
+  packagingArgs: ''
+  condition: succeeded()
+
+steps:
+- ${{ if and(eq(parameters.osGroup, 'linux'), eq(parameters.osSubgroup, '')) }}:
+  ## Run NuGet Authentication for each of the side containers
+  - ${{ if and(ne(variables['System.TeamProject'], 'public'), ne(parameters.target, '')) }}:
+    - task: NuGetAuthenticate@1
+      target: ${{ parameters.target }}
+      condition: ${{ parameters.condition }}
+
+  - template: /eng/pipelines/common/templates/global-build-step.yml
+    parameters:
+      buildArgs: -s packs.installers ${{ parameters.packagingArgs }} /bl:artifacts/log/$(_BuildConfig)/msbuild.${{ parameters.packageType }}.installers.binlog
+      container: ${{ parameters.target }}
+      displayName: Package Runtime Deps, Runtime, Framework Packs - ${{ parameters.packageType }} packages
+      # Even for cross-build targets, our installer build steps are not cross-builds
+      crossArg: ''
+      condition: ${{ parameters.condition }}
+
+  # Broken symbolic links break the SBOM processing
+  # We make some symlinks during the installer generation process,
+  # but they aren't always valid on disk afterwards. Some of our tooling,
+  # in particular the SBOM tooling, breaks on broken symlinks.
+  - script: find . -xtype l -delete
+    displayName: Remove broken symbolic links
+    condition: ${{ parameters.condition }}
diff --git a/eng/pipelines/installer/steps/upload-job-artifacts.yml b/eng/pipelines/installer/steps/upload-job-artifacts.yml
new file mode 100644
index 000000000000..076f989780bb
--- /dev/null
+++ b/eng/pipelines/installer/steps/upload-job-artifacts.yml
@@ -0,0 +1,26 @@
+parameters:
+  name: ''
+
+steps:
+# Upload binaries and symbols on failure to allow debugging issues
+- task: CopyFiles@2
+  displayName: Prepare binaries to publish
+  inputs:
+    SourceFolder: '$(Build.SourcesDirectory)/artifacts/bin'
+    Contents: |
+      */corehost/**
+      */corehost_test/**
+    TargetFolder: '$(Build.StagingDirectory)/Binaries'
+  continueOnError: true
+  condition: failed()
+
+- template: /eng/pipelines/common/upload-artifact-step.yml
+  parameters:
+    rootFolder: '$(Build.StagingDirectory)/Binaries'
+    includeRootFolder: false
+    archiveType: $(archiveType)
+    archiveExtension: $(archiveExtension)
+    tarCompression: $(tarCompression)
+    artifactName: 'Installer-Binaries-${{ parameters.name }}-$(_BuildConfig)'
+    displayName: 'Binaries'
+    condition: failed()
diff --git a/eng/pipelines/libraries/base-job.yml b/eng/pipelines/libraries/base-job.yml
deleted file mode 100644
index f41dd67d02a4..000000000000
--- a/eng/pipelines/libraries/base-job.yml
+++ /dev/null
@@ -1,146 +0,0 @@
-parameters:
-  buildConfig: ''
-  osGroup: ''
-  archType: ''
-  osSubgroup: ''
-  crossBuild: false
-  framework: 'net9.0'
-  isSourceBuild: false
-  liveRuntimeBuildConfig: ''
-  runtimeFlavor: 'coreclr'
-  timeoutInMinutes: 150
-  condition: true
-  container: ''
-  steps: []
-  dependsOn: []
-  dependOnEvaluatePaths: false
-  disableComponentGovernance: false
-  variables: {}
-  name: ''
-  displayName: ''
-  testDisplayName: ''
-  testScope: ''
-  pool: ''
-  runTests: false
-  SuperPmiCollect: false
-
-jobs:
-  - template: /eng/common/templates/job/job.yml
-    parameters:
-      displayName: ${{ format('Libraries {0} {1}{2} {3} {4}', parameters.displayName, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-      name: ${{ format('libraries_{0}_{1}{2}_{3}_{4}', parameters.name, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-
-      enableTelemetry: ${{ parameters.isOfficialBuild }} # TODO: figure out if it's needed
-      container: ${{ parameters.container }}
-      condition: and(succeeded(), ${{ parameters.condition }})
-      helixRepo: dotnet/runtime
-      pool: ${{ parameters.pool }}
-
-      # Disable component governance if requested or on musl machines where it does not work well
-      ${{ if or(eq(parameters.disableComponentGovernance, true), eq(parameters.osSubGroup, '_musl')) }}:
-        disableComponentGovernance: true
-
-      variables:
-        - ${{ if eq(variables['System.TeamProject'], 'internal') }}:
-          - group: DotNet-HelixApi-Access
-          - group: AzureDevOps-Artifact-Feeds-Pats
-
-        - _buildScriptFileName: build
-
-        - _msbuildCommonParameters: ''
-        # rename this variable, due to collision with build-native.proj
-        - _osArg: ''
-        - _finalFrameworkArg: ''
-        - _testModeArg: ''
-        - _buildScript: $(_buildScriptFileName)$(scriptExt)
-        - _testScopeArg: ''
-        - _extraHelixArguments: ''
-        - _crossBuildPropertyArg: ''
-        - _testRunNamePrefixSuffix: ''
-
-        - librariesBuildArtifactName: ${{ format('libraries_bin_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-
-        - ${{ if ne(parameters.testScope, '') }}:
-          - _testScopeArg: -testscope ${{ parameters.testScope }}
-
-        - ${{ if eq(parameters.crossBuild, true) }}:
-          - _crossBuildPropertyArg: /p:CrossBuild=true
-
-        # force a value for OS when cross-building
-        - ${{ if in(parameters.osGroup, 'browser', 'ios', 'tvos', 'android', 'freebsd') }}:
-          - _osArg: -os ${{ parameters.osGroup }}
-
-        - ${{ if ne(parameters.framework, '') }}:
-          - _finalFrameworkArg: -framework ${{ parameters.framework }}
-          - _extraHelixArguments: /p:BuildTargetFramework=${{ parameters.framework }}
-
-        - ${{ if eq(parameters.isOfficialBuild, true) }}:
-          - _msbuildCommonParameters: /p:OfficialBuildId=$(Build.BuildNumber)
-
-        - _runtimeArtifactName: ''
-        - _runtimeDownloadPath: ''
-        - _runtimeArtifactsPathArg: ''
-        - _runtimeConfigurationArg: ''
-
-        - ${{ if ne(parameters.liveRuntimeBuildConfig, '') }}:
-          - _runtimeDownloadPath: '$(Build.SourcesDirectory)/artifacts/transport/${{ parameters.runtimeFlavor }}'
-          - _runtimeConfigurationArg: -rc ${{ parameters.liveRuntimeBuildConfig }}
-          - ${{ if eq(parameters.runTests, true) }}:
-            - _runtimeArtifactName: '$(runtimeFlavorName)Product_${{ parameters.runtimeVariant}}_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_${{ parameters.liveRuntimeBuildConfig }}'
-            - _runtimeArtifactsPathArg: ' /p:RuntimeArtifactsPath=$(_runtimeDownloadPath)'
-          - ${{ if eq(parameters.testDisplayName, '') }}:
-            - _testRunNamePrefixSuffix: $(runtimeFlavorName)_${{ parameters.liveRuntimeBuildConfig }}
-          - ${{ if ne(parameters.testDisplayName, '') }}:
-            - _testRunNamePrefixSuffix: ${{ parameters.testDisplayName }}
-
-        - ${{ if ne(parameters.osGroup, 'windows') }}:
-          - _buildScript: ./$(_buildScriptFileName)$(scriptExt)
-
-        - _buildArguments: $(_runtimeConfigurationArg) -configuration ${{ parameters.buildConfig }} -ci -arch ${{ parameters.archType }} $(_finalFrameworkArg) $(_testModeArg) $(_testScopeArg) $(_osArg) $(_msbuildCommonParameters) $(_runtimeArtifactsPathArg) $(_crossBuildPropertyArg)
-        - ${{ parameters.variables }}
-
-        # we need to override this value to support build-coreclr-and-libraries-job.yml
-        - _BuildConfig: ${{ parameters.buildConfig }}
-
-      dependsOn:
-      - ${{ if eq(parameters.dependOnEvaluatePaths, true) }}:
-        - evaluate_paths
-      - ${{ parameters.dependsOn }}
-      workspace:
-        clean: all
-
-      enablePublishBuildArtifacts: true
-      timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
-
-      steps:
-      - checkout: self
-        clean: true
-        fetchDepth: $(checkoutFetchDepth)
-
-      - ${{ if and(ne(parameters.liveRuntimeBuildConfig, ''), eq(parameters.runTests, true)) }}:
-        - template: /eng/pipelines/common/download-artifact-step.yml
-          parameters:
-            unpackFolder: $(_runtimeDownloadPath)
-            artifactFileName: '$(_runtimeArtifactName)$(archiveExtension)'
-            artifactName: '$(_runtimeArtifactName)'
-            displayName: '$(runtimeFlavorName) build drop'
-
-      - ${{ if ne(variables['System.TeamProject'], 'public') }}:
-        - ${{ if ne(parameters.osGroup, 'windows') }}:
-          - task: Bash@3
-            displayName: Setup Private Feeds Credentials
-            inputs:
-              filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.sh
-              arguments: $(Build.SourcesDirectory)/NuGet.config $Token
-            env:
-              Token: $(dn-bot-dnceng-artifact-feeds-rw)
-        - ${{ if eq(parameters.osGroup, 'windows') }}:
-          - task: PowerShell@2
-            displayName: Setup Private Feeds Credentials
-            inputs:
-              filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.ps1
-              arguments: -ConfigFile $(Build.SourcesDirectory)/NuGet.config -Password $Env:Token
-            env:
-              Token: $(dn-bot-dnceng-artifact-feeds-rw)
-
-      - ${{ parameters.steps }}
diff --git a/eng/pipelines/libraries/build-job.yml b/eng/pipelines/libraries/build-job.yml
deleted file mode 100644
index b73c8d09f0a6..000000000000
--- a/eng/pipelines/libraries/build-job.yml
+++ /dev/null
@@ -1,98 +0,0 @@
-parameters:
-  buildConfig: ''
-  osGroup: ''
-  osSubgroup: ''
-  archType: ''
-  targetRid: ''
-  crossBuild: false
-  framework: 'net9.0'
-  isOfficialBuild: false
-  runtimeVariant: ''
-  platform: ''
-  testScope: ''
-
-  timeoutInMinutes: 150
-  container: ''
-  condition: true
-  dependOnEvaluatePaths: false
-  disableComponentGovernance: false
-  shouldContinueOnError: false
-  variables: {}
-  pool: ''
-
-jobs:
-  - template: /eng/pipelines/libraries/base-job.yml
-    parameters:
-      buildConfig: ${{ parameters.buildConfig }}
-      osGroup:  ${{ parameters.osGroup }}
-      osSubgroup:  ${{ parameters.osSubgroup }}
-      archType:  ${{ parameters.archType }}
-      crossBuild: ${{ parameters.crossBuild }}
-      framework:  ${{ parameters.framework }}
-      isOfficialBuild: ${{ parameters.isOfficialBuild }}
-      runtimeFlavor: ${{ parameters.runtimeFlavor }}
-      runTests: false
-      timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
-      container: ${{ parameters.container }}
-      condition: ${{ parameters.condition }}
-      dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
-      disableComponentGovernance: ${{ parameters.disableComponentGovernance }}
-      pool: ${{ parameters.pool }}
-      runtimeVariant: ${{ parameters.runtimeVariant }}
-      testScope: ${{ parameters.testScope }}
-      name: build
-      displayName: 'Build'
-
-      variables:
-        - librariesTestsArtifactName: ${{ format('libraries_test_assets_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-        - _subset: tools+libs+libs.tests
-        - _buildAction: ''
-        - _additionalBuildArguments: '/p:ArchiveTests=true'
-        - ${{ parameters.variables }}
-
-      steps:
-        - ${{ if in(parameters.osGroup, 'osx', 'maccatalyst', 'ios', 'iossimulator', 'tvos', 'tvossimulator') }}:
-          - script: $(Build.SourcesDirectory)/eng/install-native-dependencies.sh ${{ parameters.osGroup }}
-            displayName: Install Build Dependencies
-
-          - script: |
-              du -sh $(Build.SourcesDirectory)/*
-              df -h
-            displayName: Disk Usage before Build
-
-        - script: $(_buildScript)
-                -subset $(_subset)
-                $(_buildAction)
-                $(_buildArguments)
-                $(_additionalBuildArguments)
-          displayName: Restore and Build Product
-
-        - ${{ if in(parameters.osGroup, 'osx', 'ios', 'tvos') }}:
-          - script: |
-              du -sh $(Build.SourcesDirectory)/*
-              df -h
-            displayName: Disk Usage after Build
-
-        - template: /eng/pipelines/libraries/prepare-for-bin-publish.yml
-
-        - template: /eng/pipelines/common/upload-artifact-step.yml
-          parameters:
-            rootFolder: $(Build.ArtifactStagingDirectory)/artifacts
-            includeRootFolder: false
-            archiveType: $(archiveType)
-            archiveExtension: $(archiveExtension)
-            tarCompression: $(tarCompression)
-            artifactName: $(librariesBuildArtifactName)
-            displayName: Build Assets
-
-        # Upload test assets
-        # We'll pull them down in another job to send to Helix
-        - template: /eng/pipelines/common/upload-artifact-step.yml
-          parameters:
-            rootFolder: $(Build.SourcesDirectory)/artifacts/helix
-            includeRootFolder: true
-            archiveType: $(archiveType)
-            archiveExtension: $(archiveExtension)
-            tarCompression: $(tarCompression)
-            artifactName: $(librariesTestsArtifactName)
-            displayName: Test Assets
\ No newline at end of file
diff --git a/eng/pipelines/libraries/helix-queues-setup.yml b/eng/pipelines/libraries/helix-queues-setup.yml
index d6f52839e600..cb52e5acf042 100644
--- a/eng/pipelines/libraries/helix-queues-setup.yml
+++ b/eng/pipelines/libraries/helix-queues-setup.yml
@@ -8,7 +8,6 @@ parameters:
   pool: ''
   platform: ''
   shouldContinueOnError: false
-  dependOnEvaluatePaths: false
   jobParameters: {}
 
 jobs:
@@ -22,12 +21,11 @@ jobs:
     pool: ${{ parameters.pool }}
     platform: ${{ parameters.platform }}
     shouldContinueOnError: ${{ parameters.shouldContinueOnError }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths}}
     helixQueues:
 
     # Linux arm
     - ${{ if eq(parameters.platform, 'linux_arm') }}:
-      - ${{ if or(eq(parameters.jobParameters.isExtraPlatforms, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
+      - ${{ if or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
         - (Debian.11.Arm32.Open)Ubuntu.2004.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-11-helix-arm32v7
 
     # Linux armv6
@@ -37,18 +35,18 @@ jobs:
     # Linux arm64
     - ${{ if eq(parameters.platform, 'linux_arm64') }}:
       - (Ubuntu.2204.Arm64.Open)Ubuntu.2004.ArmArch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04-helix-arm64v8
-      - ${{ if or(ne(parameters.jobParameters.isExtraPlatforms, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
+      - ${{ if or(ne(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
         - (Debian.11.Arm64.Open)Ubuntu.2004.Armarch.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-11-helix-arm64v8
 
     # Linux musl x64
     - ${{ if eq(parameters.platform, 'linux_musl_x64') }}:
-      - ${{ if or(ne(parameters.jobParameters.isExtraPlatforms, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
+      - ${{ if or(ne(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
         - (Alpine.316.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.16-helix-amd64
-      - ${{ if or(eq(parameters.jobParameters.isExtraPlatforms, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
+      - ${{ if or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
         - (Alpine.318.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.18-helix-amd64
 
     # Linux musl arm64
-    - ${{ if and(eq(parameters.platform, 'linux_musl_arm64'), or(eq(parameters.jobParameters.isExtraPlatforms, true), eq(parameters.jobParameters.includeAllPlatforms, true))) }}:
+    - ${{ if and(eq(parameters.platform, 'linux_musl_arm64'), or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true))) }}:
       - (Alpine.318.Arm64.Open)ubuntu.2004.armarch.open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.18-helix-arm64v8
       - (Alpine.316.Arm64.Open)ubuntu.2004.armarch.open@mcr.microsoft.com/dotnet-buildtools/prereqs:alpine-3.16-helix-arm64v8
 
@@ -62,14 +60,14 @@ jobs:
           - (Ubuntu.2204.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-22.04-helix-amd64
           - (Debian.11.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-11-helix-amd64
         - ${{ if or(ne(parameters.jobParameters.testScope, 'outerloop'), ne(parameters.jobParameters.runtimeFlavor, 'mono')) }}:
-          - ${{ if or(eq(parameters.jobParameters.isExtraPlatforms, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
+          - ${{ if or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
             - SLES.15.Amd64.Open
             - (Fedora.38.Amd64.Open)ubuntu.2204.amd64.open@mcr.microsoft.com/dotnet-buildtools/prereqs:fedora-38-helix
             - Ubuntu.2204.Amd64.Open
             - (Debian.11.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-11-helix-amd64
             - (Mariner.2.0.Amd64.Open)Ubuntu.2204.Amd64.open@mcr.microsoft.com/dotnet-buildtools/prereqs:cbl-mariner-2.0-helix-amd64
             - (openSUSE.15.2.Amd64.Open)Ubuntu.2204.Amd64.open@mcr.microsoft.com/dotnet-buildtools/prereqs:opensuse-15.2-helix-amd64
-          - ${{ if or(ne(parameters.jobParameters.isExtraPlatforms, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
+          - ${{ if or(ne(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
             - (Centos.8.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:centos-stream8-helix
             - (Debian.11.Amd64.Open)Ubuntu.2204.Amd64.Open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-11-helix-amd64
             - Ubuntu.2204.Amd64.Open
@@ -96,7 +94,7 @@ jobs:
     # Android
     - ${{ if in(parameters.platform, 'android_x86', 'android_x64', 'linux_bionic_x64') }}:
       - Ubuntu.2204.Amd64.Android.29.Open
-    - ${{ if in(parameters.platform, 'android_arm', 'android_arm64', 'linux_bionic_arm64') }}:
+    - ${{ if in(parameters.platform, 'android_arm', 'android_arm64', 'linux_bionic_arm', 'linux_bionic_arm64') }}:
       - Windows.11.Amd64.Android.Open
 
     # iOS Simulator/Mac Catalyst arm64
@@ -124,11 +122,11 @@ jobs:
           - Windows.Amd64.Server2022.Open
         # libraries on coreclr (outerloop and innerloop), or libraries on mono innerloop
         - ${{ if or(ne(parameters.jobParameters.testScope, 'outerloop'), ne(parameters.jobParameters.runtimeFlavor, 'mono')) }}:
-          - ${{ if or(eq(parameters.jobParameters.isExtraPlatforms, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
+          - ${{ if or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
             - Windows.Amd64.Server2022.Open
             - ${{ if ne(parameters.jobParameters.testScope, 'outerloop') }}:
               - (Windows.10.Amd64.ServerRS5.Open)windows.10.amd64.serverrs5.open@mcr.microsoft.com/dotnet-buildtools/prereqs:windowsservercore-ltsc2019-helix-amd64
-          - ${{ if or(ne(parameters.jobParameters.isExtraPlatforms, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
+          - ${{ if or(ne(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
             - Windows.81.Amd64.Open
             - Windows.Amd64.Server2022.Open
             - Windows.11.Amd64.Client.Open
@@ -152,10 +150,10 @@ jobs:
           - Windows.11.Amd64.Client.Open
         # libraries on coreclr (outerloop and innerloop), or libraries on mono innerloop
         - ${{ if or(ne(parameters.jobParameters.testScope, 'outerloop'), ne(parameters.jobParameters.runtimeFlavor, 'mono')) }}:
-          - ${{ if or(eq(parameters.jobParameters.isExtraPlatforms, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
+          - ${{ if or(eq(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
             - Windows.11.Amd64.Client.Open
             - Windows.Amd64.Server2022.Open
-          - ${{ if or(ne(parameters.jobParameters.isExtraPlatforms, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
+          - ${{ if or(ne(parameters.jobParameters.isExtraPlatformsBuild, true), eq(parameters.jobParameters.includeAllPlatforms, true)) }}:
             - Windows.Amd64.Server2022.Open
             - Windows.7.Amd64.Open
 
diff --git a/eng/pipelines/libraries/run-test-job.yml b/eng/pipelines/libraries/run-test-job.yml
index 5c68b4377ee1..bc3697359f04 100644
--- a/eng/pipelines/libraries/run-test-job.yml
+++ b/eng/pipelines/libraries/run-test-job.yml
@@ -4,7 +4,6 @@ parameters:
   osSubgroup: ''
   archType: ''
   targetRid: ''
-  framework: 'net9.0'
   isOfficialBuild: false
   liveRuntimeBuildConfig: ''
   runtimeFlavor: 'coreclr'
@@ -15,72 +14,65 @@ parameters:
   runtimeVariant: ''
   testScope: ''
   helixQueues: []
-  dependOnEvaluatePaths: false
   condition: true
   shouldContinueOnError: false
   variables: {}
-  # coreclrTestGroup: if empty, then a normal, default test run is created. If set, it indicates a set of
-  # stress modes that each test will be run with. This is the same usage as 'testGroup' in
-  # eng/pipelines/common/templates/runtimes/run-test-job.yml.
-  coreclrTestGroup: ''
+  scenarios: []
   SuperPmiCollect: false
   SuperPmiCollectionType: 'run'
   SuperPmiCollectionName: 'libraries_tests'
   dependsOn: []
+  unifiedArtifactsName: ''
+  helixArtifactsName: ''
+  unifiedBuildNameSuffix: ''
+  unifiedBuildConfigOverride: ''
 
 jobs:
-  - template: /eng/pipelines/libraries/base-job.yml
+  - template: /eng/common/templates/job/job.yml
     parameters:
-      buildConfig: ${{ parameters.buildConfig }}
-      osGroup:  ${{ parameters.osGroup }}
-      osSubgroup:  ${{ parameters.osSubgroup }}
-      archType:  ${{ parameters.archType }}
-      crossBuild: ${{ parameters.crossBuild }}
-      framework:  ${{ parameters.framework }}
-      isOfficialBuild: ${{ parameters.isOfficialBuild }}
-      liveRuntimeBuildConfig: ${{ parameters.liveRuntimeBuildConfig }}
-      runtimeFlavor: ${{ parameters.runtimeFlavor }}
-      runtimeVariant: ${{ parameters.runtimeVariant }}
+      enablePublishBuildArtifacts: true
       timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
       container: ${{ parameters.container }}
-      condition: ${{ parameters.condition }}
-      testScope: ${{ parameters.testScope }}
-      SuperPmiCollect: ${{ parameters.SuperPmiCollect }}
-      runTests: true
+      condition: and(succeeded(), ${{ parameters.condition }})
+      helixRepo: dotnet/runtime
       ${{ if eq(parameters.SuperPmiCollect, true) }}:
-        displayName: ${{ format('SuperPMI collection {0} {1} {2}', parameters.SuperPmiCollectionName, parameters.liveRuntimeBuildConfig, parameters.runtimeDisplayName) }}
-        name: ${{ format('spmi_{0}_{1}_{2}', parameters.SuperPmiCollectionName, parameters.liveRuntimeBuildConfig, parameters.runtimeDisplayName) }}
+        displayName: ${{ format('Libraries SuperPMI collection {0} {1} {2} {3}{4} {5} {6}', parameters.SuperPmiCollectionName, parameters.liveRuntimeBuildConfig, parameters.runtimeDisplayName, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
+        name: ${{ format('spmi_{0}_{1}_{2}_{3}{4}_{5}_{6}', parameters.SuperPmiCollectionName, parameters.liveRuntimeBuildConfig, parameters.runtimeDisplayName, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
       ${{ else }}:
         ${{ if ne(parameters.liveRuntimeBuildConfig, '') }}:
-          displayName: ${{ format('Test Run {0} {1}', parameters.liveRuntimeBuildConfig, parameters.runtimeDisplayName) }}
-          name: ${{ format('test_run_{0}_{1}', parameters.liveRuntimeBuildConfig, parameters.runtimeDisplayName) }}
+          displayName: ${{ format('Libraries Test Run {0} {1} {2}{3} {4} {5}', parameters.liveRuntimeBuildConfig, parameters.runtimeDisplayName, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
+          name: ${{ format('libraries_test_run_{0}_{1}_{2}{3}_{4}_{5}', parameters.liveRuntimeBuildConfig, parameters.runtimeDisplayName, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
         ${{ if eq(parameters.liveRuntimeBuildConfig, '') }}:
           displayName: 'Test Run'
           name: test_run
-      ${{ if eq(parameters.interpreter, 'true') }}:
-        testDisplayName: ${{ parameters.runtimeFlavor }}_interpreter_${{ parameters.liveRuntimeBuildConfig }}
 
       # To run the tests we just send to helix and wait, use ubuntu hosted pools for faster providing and to not back up our build pools
       pool: ${{ parameters.pool }}
 
-      dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
+      # Component governance does not work on musl machines
+      ${{ if eq(parameters.osSubGroup, '_musl') }}:
+        disableComponentGovernance: true
+
       dependsOn:
       - ${{ if ne(parameters.dependsOn[0], '') }}:
         - ${{ parameters.dependsOn }}
-      - ${{ if eq(parameters.dependsOn[0], '') }}:
-        - ${{ format('libraries_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-        - ${{ if ne(parameters.liveRuntimeBuildConfig, '') }}:
-          - ${{ format('{0}_{1}_product_build_{2}{3}_{4}_{5}', parameters.runtimeFlavor, parameters.runtimeVariant, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.liveRuntimeBuildConfig) }}
-      # SuperPMI collection needs to run mcs.exe on the AzDO machine. Assume that's an x64 machine, and download an x64 product build if needed.
-      - ${{ if and(eq(parameters.SuperPmiCollect, true), ne(parameters.archType, 'x64')) }}:
-          - ${{ format('{0}_{1}_product_build_{2}{3}_{4}_{5}', 'coreclr', '', parameters.osGroup, parameters.osSubgroup, 'x64', parameters.liveRuntimeBuildConfig) }}
+      - ${{ else }}:
+        - 'build_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_${{ coalesce(parameters.unifiedBuildConfigOverride, parameters.buildConfig) }}_${{ parameters.unifiedBuildNameSuffix }}'
+
+      workspace:
+        clean: all
 
       variables:
+        - ${{ if eq(variables['System.TeamProject'], 'internal') }}:
+          - group: DotNet-HelixApi-Access
+          - group: AzureDevOps-Artifact-Feeds-Pats
 
-        - librariesTestsArtifactName: ${{ format('libraries_test_assets_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
         - _archiveTestsParameter: /p:ArchiveTests=true
 
         - ${{ if eq(parameters.SuperPmiCollect, true) }}:
+          - template: /eng/pipelines/coreclr/templates/jit-python-variables.yml
+            parameters:
+              osGroup: ${{ parameters.osGroup }}
           - template: /eng/pipelines/libraries/superpmi-collect-variables.yml
             parameters:
               buildConfig: ${{ parameters.buildConfig }}
@@ -91,172 +83,85 @@ jobs:
         - ${{ parameters.variables }}
 
       steps:
-
-        # SuperPMI collection: Download x64 coreclr if running on non-x64 configuration (needed for mcs.exe on AzDO machine; see `SuperPmiMcsPath`.
-        - ${{ if and(eq(parameters.SuperPmiCollect, true), ne(parameters.archType, 'x64')) }}:
-          - template: /eng/pipelines/common/download-artifact-step.yml
-            parameters:
-              unpackFolder: $(_runtimeX64DownloadPath)
-              artifactFileName: 'CoreCLRProduct__${{ parameters.osGroup }}${{ parameters.osSubgroup }}_x64_${{ parameters.liveRuntimeBuildConfig }}$(archiveExtension)'
-              artifactName: 'CoreCLRProduct__${{ parameters.osGroup }}${{ parameters.osSubgroup }}_x64_${{ parameters.liveRuntimeBuildConfig }}'
-              displayName: 'CoreCLR product build (x64)'
-
-        - template: /eng/pipelines/common/download-artifact-step.yml
-          parameters:
-            displayName: Build Assets
-            cleanUnpackFolder: false
-            artifactName: $(librariesBuildArtifactName)
-            artifactFileName: $(librariesBuildArtifactName)$(archiveExtension)
-            unpackFolder: $(Build.SourcesDirectory)/artifacts
-
-        - template: /eng/pipelines/common/download-artifact-step.yml
-          parameters:
-            displayName: Test Assets
-            cleanUnpackFolder: false
-            artifactName: $(librariesTestsArtifactName)
-            artifactFileName: $(librariesTestsArtifactName)$(archiveExtension)
-            unpackFolder: $(Build.SourcesDirectory)/artifacts
-
-        - ${{ if in(parameters.osGroup, 'osx', 'maccatalyst', 'ios', 'iossimulator', 'tvos', 'tvossimulator') }}:
-          - script: $(Build.SourcesDirectory)/eng/install-native-dependencies.sh ${{ parameters.osGroup }}
-            displayName: Install Build Dependencies
-
-        - ${{ if ne(parameters.liveRuntimeBuildConfig, '') }}:
-          - script: $(_buildScript)
-                    -subset host.native+libs.pretest
-                    $(_buildArguments)
-                    /p:RuntimeFlavor=${{ parameters.runtimeFlavor }}
-                    /bl:$(Build.SourcesDirectory)/artifacts/log/$(_BuildConfig)/overrideRuntimeFromLiveDrop.binlog
-            displayName: Prepare TestHost with runtime $(runtimeFlavorName)
-
-        - template: /eng/pipelines/libraries/helix.yml
+      - checkout: self
+        clean: true
+        fetchDepth: $(checkoutFetchDepth)
+
+      - ${{ if ne(variables['System.TeamProject'], 'public') }}:
+        - ${{ if ne(parameters.osGroup, 'windows') }}:
+          - task: Bash@3
+            displayName: Setup Private Feeds Credentials
+            inputs:
+              filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.sh
+              arguments: $(Build.SourcesDirectory)/NuGet.config $Token
+            env:
+              Token: $(dn-bot-dnceng-artifact-feeds-rw)
+        - ${{ if eq(parameters.osGroup, 'windows') }}:
+          - task: PowerShell@2
+            displayName: Setup Private Feeds Credentials
+            inputs:
+              filePath: $(Build.SourcesDirectory)/eng/common/SetupNugetSources.ps1
+              arguments: -ConfigFile $(Build.SourcesDirectory)/NuGet.config -Password $Env:Token
+            env:
+              Token: $(dn-bot-dnceng-artifact-feeds-rw)
+
+      - ${{ if in(parameters.osGroup, 'osx', 'maccatalyst', 'ios', 'iossimulator', 'tvos', 'tvossimulator') }}:
+        - script: $(Build.SourcesDirectory)/eng/install-native-dependencies.sh ${{ parameters.osGroup }}
+          displayName: Install Build Dependencies
+
+      - template: /eng/pipelines/common/download-artifact-step.yml
+        parameters:
+          unpackFolder: $(Build.SourcesDirectory)/artifacts/bin
+          artifactFileName: '${{ parameters.unifiedArtifactsName }}$(archiveExtension)'
+          artifactName: '${{ parameters.unifiedArtifactsName }}'
+          displayName: 'unified artifacts'
+
+      - template: /eng/pipelines/common/download-artifact-step.yml
+        parameters:
+          displayName: Test Assets
+          artifactName: ${{ parameters.helixArtifactsName }}
+          artifactFileName: '${{ parameters.helixArtifactsName }}$(archiveExtension)'
+          unpackFolder: $(Build.SourcesDirectory)/artifacts/helix
+
+      - template: /eng/pipelines/libraries/helix.yml
+        parameters:
+          runtimeFlavor: ${{ parameters.runtimeFlavor }}
+          osGroup: ${{ parameters.osGroup }}
+          targetRid: ${{ parameters.targetRid }}
+          archType: ${{ parameters.archType }}
+          buildConfig: ${{ parameters.buildConfig }}
+          helixQueues: ${{ parameters.helixQueues }}
+          testScope: ${{ parameters.testScope }}
+          interpreter: ${{ parameters.interpreter }}
+          shouldContinueOnError: ${{ parameters.shouldContinueOnError }}
+          creator: dotnet-bot
+          ${{ if eq(parameters.interpreter, 'true') }}:
+            testRunNamePrefixSuffix: ${{ parameters.runtimeFlavor }}_interpreter_${{ parameters.liveRuntimeBuildConfig }}
+          ${{ else }}:
+            testRunNamePrefixSuffix: ${{ parameters.runtimeDisplayName }}_${{ parameters.liveRuntimeBuildConfig }}
+          SuperPmiCollect: ${{ parameters.SuperPmiCollect }}
+          SuperPmiCollectionType: ${{ parameters.SuperPmiCollectionType }}
+          SuperPmiCollectionName: ${{ parameters.SuperPmiCollectionName }}
+          ${{ if eq(parameters.SuperPmiCollect, true) }}:
+            extraHelixArguments: /p:RuntimeConfiguration=${{ parameters.liveRuntimeBuildConfig }}
+
+          ${{ if ne(parameters.scenarios[0], '') }}:
+            scenarios: ${{ parameters.scenarios }}
+
+      - ${{ if eq(parameters.SuperPmiCollect, true) }}:
+        - template: /eng/pipelines/libraries/superpmi-postprocess-step.yml
           parameters:
-            runtimeFlavor: ${{ parameters.runtimeFlavor }}
+            buildConfig: ${{ parameters.buildConfig }}
+            buildConfigUpper: $(buildConfigUpper)
             osGroup: ${{ parameters.osGroup }}
-            targetRid: ${{ parameters.targetRid }}
+            osSubgroup: ${{ parameters.osSubgroup }}
             archType: ${{ parameters.archType }}
-            buildConfig: ${{ parameters.buildConfig }}
-            helixQueues: ${{ parameters.helixQueues }}
-            testScope: ${{ parameters.testScope }}
-            interpreter: ${{ parameters.interpreter }}
-            shouldContinueOnError: ${{ parameters.shouldContinueOnError }}
-            creator: dotnet-bot
-            testRunNamePrefixSuffix: $(_testRunNamePrefixSuffix)
-            SuperPmiCollect: ${{ parameters.SuperPmiCollect }}
             SuperPmiCollectionType: ${{ parameters.SuperPmiCollectionType }}
             SuperPmiCollectionName: ${{ parameters.SuperPmiCollectionName }}
-
-            # coreclrTestGroup: The following mappings of 'coreclrTestGroup' to 'scenarios' is copied from
-            # eng/pipelines/common/templates/runtimes/run-test-job.yml (with 'testGroup' replaced by 'coreclrTestGroup'
-            # for clarity), and should remain in sync. This is only a subset; only the testGroups that are
-            # used to test the libraries have been added here. More could be added if we decided to test the
-            # libraries with more stress modes. The scenario tags are interpreted by
-            # src\tests\Common\testenvironment.proj.
-            #
-            # The one difference here compared to eng/pipelines/common/templates/runtimes/run-test-job.yml is
-            # that 'jitstress' contains 'no_tiered_compilation'. The 'normal' (default) test mode
-            # is run in a regular CI job, so there is no need to duplicate it here. So, add 'no_tiered_compilation'
-            # to the 'jitstress' job instead of adding a new job just for 'no_tiered_compilation'.
-
-            # src/libraries/sendtohelix.proj processes one scenario per parallel MSBuild invocation. Each invocation only
-            # creates Helix work items and them waits for their completion on the remote Helix machines, so is not
-            # computationally intensive. We want Helix to be provided with all the possible work items in up front,
-            # so can do as much work in parallel as possible. Thus, increase the amount of allowed MSBuild parallelism
-            # to at least the maximum number of scenarios to be processed in a coreclrTestGroup. If there is no
-            # coreclrTestGroup then there is only one scenario (the default scenario), so don't change the MSBuild argument.
-            ${{ if ne(parameters.coreclrTestGroup, '') }}:
-              extraHelixArguments: $(_extraHelixArguments) /maxcpucount:10
-            ${{ else }}:
-              extraHelixArguments: $(_extraHelixArguments)
-
-            ${{ if in(parameters.coreclrTestGroup, 'superpmi_collection') }}:
-              scenarios:
-              - normal
-            ${{ if in(parameters.coreclrTestGroup, 'superpmi_collection_no_tiered_compilation') }}:
-              scenarios:
-              - no_tiered_compilation
-            ${{ if in(parameters.coreclrTestGroup, 'jitstress') }}:
-              scenarios:
-              - no_tiered_compilation
-              - jitminopts
-              - jitstress1
-              - jitstress1_tiered
-              - jitstress2
-              - jitstress2_tiered
-              - zapdisable
-              - tailcallstress
-            ${{ if in(parameters.coreclrTestGroup, 'jitstress-random') }}:
-              scenarios:
-              - jitstress_random_1
-              - jitstress_random_2
-            ${{ if in(parameters.coreclrTestGroup, 'jitstressregs' ) }}:
-              scenarios:
-              - jitstressregs1
-              - jitstressregs2
-              - jitstressregs3
-              - jitstressregs4
-              - jitstressregs8
-              - jitstressregs0x10
-              - jitstressregs0x80
-              - jitstressregs0x1000
-              - jitstressregs0x2000
-            ${{ if in(parameters.coreclrTestGroup, 'jitstress2-jitstressregs') }}:
-              scenarios:
-              - jitstress2_jitstressregs1
-              - jitstress2_jitstressregs2
-              - jitstress2_jitstressregs3
-              - jitstress2_jitstressregs4
-              - jitstress2_jitstressregs8
-              - jitstress2_jitstressregs0x10
-              - jitstress2_jitstressregs0x80
-              - jitstress2_jitstressregs0x1000
-              - jitstress2_jitstressregs0x2000
-            ${{ if in(parameters.coreclrTestGroup, 'gcstress0x3-gcstress0xc') }}:
-              scenarios:
-              # Disable gcstress0x3 for now; it causes lots of test timeouts. Investigate this after
-              # gcstress0xc runs are clean. Tracking issue: https://github.com/dotnet/runtime/issues/38903.
-              # - gcstress0x3
-              - gcstress0xc
-            ${{ if in(parameters.coreclrTestGroup, 'gcstress-extra') }}:
-              scenarios:
-              - heapverify1
-              - gcstress0xc_zapdisable
-              - gcstress0xc_zapdisable_jitstress2
-              - gcstress0xc_zapdisable_heapverify1
-              - gcstress0xc_jitstress1
-              - gcstress0xc_jitstress2
-              - gcstress0xc_jitminopts_heapverify1
-            ${{ if in(parameters.coreclrTestGroup, 'pgo') }}:
-              ${{ if and(eq(parameters.osGroup, 'windows'), eq(parameters.archType, 'arm64')) }}:
-                scenarios:
-                - defaultpgo
-              ${{ else }}:
-                scenarios:
-                - defaultpgo
-                - fullpgo
-                - fullpgo_methodprofiling
-                - fullpgo_random_gdv
-                - fullpgo_random_gdv_methodprofiling_only
-                - fullpgo_random_gdv_edge
-                - jitosr_stress
-                - jitosr_stress_random
-                - syntheticpgo
-                - syntheticpgo_blend
-                - jitcrossblocklocalassertionprop
-
-        - ${{ if eq(parameters.SuperPmiCollect, true) }}:
-          - template: /eng/pipelines/libraries/superpmi-postprocess-step.yml
-            parameters:
-              buildConfig: ${{ parameters.buildConfig }}
-              buildConfigUpper: $(buildConfigUpper)
-              osGroup: ${{ parameters.osGroup }}
-              osSubgroup: ${{ parameters.osSubgroup }}
-              archType: ${{ parameters.archType }}
-              SuperPmiCollectionType: ${{ parameters.SuperPmiCollectionType }}
-              SuperPmiCollectionName: ${{ parameters.SuperPmiCollectionName }}
-              MergedMchFileLocation: $(MergedMchFileLocation)
-              MchFilesLocation: $(MchFilesLocation)
-              SpmiLogsLocation: $(SpmiLogsLocation)
-              SuperPmiMcsPath: $(SuperPmiMcsPath)
-              PythonScript: $(PythonScript)
-              PipScript: $(PipScript)
+            MergedMchFileLocation: $(MergedMchFileLocation)
+            MchFilesLocation: $(MchFilesLocation)
+            SpmiLogsLocation: $(SpmiLogsLocation)
+            SuperPmiMcsPath: $(SuperPmiMcsPath)
+            PythonSetupScript: $(PythonSetupScript)
+            PythonScript: $(PythonScript)
+            PipScript: $(PipScript)
diff --git a/eng/pipelines/libraries/stress/http.yml b/eng/pipelines/libraries/stress/http.yml
index f4f9c45de36e..68bfcef6c508 100644
--- a/eng/pipelines/libraries/stress/http.yml
+++ b/eng/pipelines/libraries/stress/http.yml
@@ -119,6 +119,9 @@ extends:
           lfs: false
 
         - powershell: |
+            # Workaround for https://github.com/microsoft/azure-pipelines-agent/issues/4554. Undo when the image bug is fixed.
+            Remove-Item -Path "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\Microsoft.VCToolsVersion.v143.default.txt"
+
             $(dockerfilesFolder)/build-docker-sdk.ps1 -w -t $(sdkBaseImage) -c $(BUILD_CONFIGURATION)
             echo "##vso[task.setvariable variable=succeeded;isOutput=true]true"
           name: buildRuntime
diff --git a/eng/pipelines/libraries/stress/ssl.yml b/eng/pipelines/libraries/stress/ssl.yml
index 1052d3b04285..1e8cddf228bc 100644
--- a/eng/pipelines/libraries/stress/ssl.yml
+++ b/eng/pipelines/libraries/stress/ssl.yml
@@ -76,6 +76,9 @@ extends:
           lfs: false
 
         - powershell: |
+            # Workaround for https://github.com/microsoft/azure-pipelines-agent/issues/4554. Undo when the image bug is fixed.
+            Remove-Item -Path "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\Microsoft.VCToolsVersion.v143.default.txt"
+
             $(dockerfilesFolder)/build-docker-sdk.ps1 -w -t $(sdkBaseImage) -c $(BUILD_CONFIGURATION)
           displayName: Build CLR and Libraries
 
diff --git a/eng/pipelines/libraries/superpmi-collect-variables.yml b/eng/pipelines/libraries/superpmi-collect-variables.yml
index 509bd6094245..82ead1c26ca7 100644
--- a/eng/pipelines/libraries/superpmi-collect-variables.yml
+++ b/eng/pipelines/libraries/superpmi-collect-variables.yml
@@ -13,20 +13,14 @@ variables:
       value: 'Release'
   - name: _runtimeX64DownloadPath
     value: ''
-  # superpmi.py 'merge-mch' needs to be able to find the mcs tool. Point SuperPmiMcsPath at the downloaded CoreCLR binaries. For non-x64 targets, download an x64
-  # build and point at that. Pass this to superpmi.py as the '-core_root' argument. It's not actually a "Core_Root" directory, but all it needs is to find mcs.
+  # superpmi.py 'merge-mch' needs to be able to find the mcs tool. Point SuperPmiMcsPath at the downloaded CoreCLR binaries. For non-x64 targets, we'll also have an x64 mcs tool available.
+  # so point the non-x64 builds at the x64 artifacts.
   - name: SuperPmiMcsPath
-    value: $(_runtimeDownloadPath)
+    value: $(Build.SourcesDirectory)/artifacts/bin/coreclr/${{ parameters.osGroup }}.${{ parameters.archType }}.$(buildConfigUpper)
   - ${{ if ne(parameters.archType, 'x64') }}:
-    - name: _runtimeX64DownloadPath
-      value: '$(Build.SourcesDirectory)/artifacts/transport/${{ parameters.runtimeFlavor }}.x64'
     - name: SuperPmiMcsPath
-      value: $(_runtimeX64DownloadPath)
+      value: '$(Build.SourcesDirectory)/artifacts/bin/coreclr/${{ parameters.osGroup }}.x64.$(buildConfigUpper)/'
   - ${{ if eq(parameters.osGroup, 'windows') }}:
-    - name: PythonScript
-      value: 'py -3'
-    - name: PipScript
-      value: 'py -3 -m pip'
     - name: MchFilesLocation
       value: '$(Build.SourcesDirectory)\artifacts\helixresults\'
     - name: MergedMchFileLocation
@@ -34,13 +28,9 @@ variables:
     - name: SpmiLogsLocation
       value: '$(Build.SourcesDirectory)\artifacts\spmi_logs\'
   - ${{ if ne(parameters.osGroup, 'windows') }}:
-    - name: PythonScript
-      value: 'python3'
-    - name: PipScript
-      value: 'pip3'
     - name: MchFilesLocation
       value: '$(Build.SourcesDirectory)/artifacts/helixresults/'
     - name: MergedMchFileLocation
       value: '$(Build.SourcesDirectory)/artifacts/spmi_collection/'
     - name: SpmiLogsLocation
-      value: '$(Build.SourcesDirectory)/artifacts/spmi_logs/'
\ No newline at end of file
+      value: '$(Build.SourcesDirectory)/artifacts/spmi_logs/'
diff --git a/eng/pipelines/libraries/superpmi-postprocess-step.yml b/eng/pipelines/libraries/superpmi-postprocess-step.yml
index ef29b332c751..e9ae5d13f32f 100644
--- a/eng/pipelines/libraries/superpmi-postprocess-step.yml
+++ b/eng/pipelines/libraries/superpmi-postprocess-step.yml
@@ -18,6 +18,7 @@ parameters:
   MchFilesLocation: ''
   SpmiLogsLocation: ''
   SuperPmiMcsPath: ''
+  PythonSetupScript: ''
   PythonScript: ''
   PipScript: ''
 
@@ -37,6 +38,10 @@ steps:
       displayName: 'Create SuperPMI directories'
       condition: always()
 
+  - script: ${{ parameters.PythonSetupScript }}
+    displayName: Enable python venv
+    condition: always()
+
   - script: ${{ parameters.PythonScript }} $(Build.SourcesDirectory)/src/coreclr/scripts/superpmi.py merge-mch -log_level DEBUG -pattern ${{ parameters.MchFilesLocation }}${{ parameters.SuperPmiCollectionName }}.${{ parameters.SuperPmiCollectionType }}*.mch -output_mch_path ${{ parameters.MergedMchFileLocation }}${{ parameters.SuperPmiCollectionName }}.${{ parameters.SuperPmiCollectionType }}.${{ parameters.osGroup }}.${{ parameters.archType }}.${{ parameters.buildConfig }}.mch -core_root ${{ parameters.SuperPmiMcsPath }}
     displayName: 'Merge ${{ parameters.SuperPmiCollectionName }}-${{ parameters.SuperPmiCollectionType }} SuperPMI collections'
     condition: always()
@@ -61,7 +66,7 @@ steps:
     condition: always()
 
   # Ensure the Python azure-storage-blob package is installed before doing the upload.
-  - script: ${{ parameters.PipScript }} install --user --upgrade pip && ${{ parameters.PipScript }} install --user azure.storage.blob==12.5.0 --force-reinstall
+  - script: ${{ parameters.PipScript }} install --upgrade pip && ${{ parameters.PipScript }} install azure.storage.blob==12.5.0 --force-reinstall
     displayName: Upgrade Pip to latest and install azure-storage-blob Python package
     condition: always()
 
@@ -84,4 +89,4 @@ steps:
     inputs:
       targetPath: ${{ parameters.SpmiLogsLocation }}
       artifactName: 'SuperPMI_Logs_${{ parameters.SuperPmiCollectionName }}_${{ parameters.SuperPmiCollectionType }}_${{ parameters.osGroup }}${{ parameters.osSubgroup }}_${{ parameters.archType }}_${{ parameters.buildConfig }}'
-    condition: always()
\ No newline at end of file
+    condition: always()
diff --git a/eng/pipelines/mono/templates/build-job.yml b/eng/pipelines/mono/templates/build-job.yml
deleted file mode 100644
index 86e0813c7c7e..000000000000
--- a/eng/pipelines/mono/templates/build-job.yml
+++ /dev/null
@@ -1,184 +0,0 @@
-parameters:
-  buildConfig: ''
-  archType: ''
-  osGroup: ''
-  osSubgroup: ''
-  platform: ''
-  container: ''
-  timeoutInMinutes: ''
-  variables: {}
-  pool: ''
-  condition: true
-  runtimeVariant: ''
-  isOfficialBuild: false
-  crossBuild: false
-  dependsOn: []
-  monoCrossAOTTargetOS: []
-  dependOnEvaluatePaths: false
-
-### Product build
-jobs:
-- template: xplat-pipeline-job.yml
-  parameters:
-    buildConfig: ${{ parameters.buildConfig }}
-    archType: ${{ parameters.archType }}
-    osGroup: ${{ parameters.osGroup }}
-    osSubgroup: ${{ parameters.osSubgroup }}
-    helixType: 'build/product/'
-    enableMicrobuild: true
-    pool: ${{ parameters.pool }}
-    runtimeVariant: ${{ parameters.runtimeVariant }}
-    crossBuild: ${{ parameters.crossBuild }}
-    monoCrossAOTTargetOS: ${{ parameters.monoCrossAOTTargetOS }}
-    condition: ${{ parameters.condition }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
-
-    # Compute job name from template parameters
-    name: ${{ format('mono_{0}_product_build_{1}{2}_{3}_{4}', parameters.runtimeVariant, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-    displayName: ${{ format('Mono {0} Product Build {1}{2} {3} {4}', parameters.runtimeVariant, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }}
-
-    # Run all steps in the container.
-    # Note that the containers are defined in platform-matrix.yml
-    container: ${{ parameters.container }}
-
-    dependsOn: ${{ parameters.dependsOn }}
-    timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
-
-    gatherAssetManifests: true
-    variables:
-    - name: osGroup
-      value: ${{ parameters.osGroup }}
-    - name: osSubgroup
-      value: ${{ parameters.osSubgroup }}
-    - name: officialBuildIdArg
-      value: ''
-    - name: osOverride
-      value: ''
-    - name: aotCrossParameter
-      value: ''
-    - name: llvmParameter
-      value: ''
-    - name: darwinFrameworks
-      value: ''
-    - ${{ if eq(parameters.isOfficialBuild, true) }}:
-      - name: officialBuildIdArg
-        value: '/p:OfficialBuildId=$(Build.BuildNumber)'
-    - ${{ if and(eq(parameters.osSubgroup, '_musl'), eq(parameters.osGroup, 'linux')) }}:
-      # Set output RID manually: musl isn't properly detected. Make sure to also convert linux to
-      # lowercase for RID format. (Detection normally converts, but we're preventing it.)
-      - name: OutputRidArg
-        value: /p:OutputRID=linux-musl-${{ parameters.archType }}
-      - name: _PortableBuild
-        value: true
-    - ${{ if eq(parameters.osGroup, 'tvos') }}:
-      - name: osOverride
-        value: -os tvos
-      - name: darwinFrameworks
-        value: /p:BuildDarwinFrameworks=true
-    - ${{ if eq(parameters.osGroup, 'tvossimulator') }}:
-      - name: osOverride
-        value: -os tvossimulator
-      - name: darwinFrameworks
-        value: /p:BuildDarwinFrameworks=true
-    - ${{ if eq(parameters.osGroup, 'ios') }}:
-      - name: osOverride
-        value: -os ios
-      - name: darwinFrameworks
-        value: /p:BuildDarwinFrameworks=true
-    - ${{ if eq(parameters.osGroup, 'iossimulator') }}:
-      - name: osOverride
-        value: -os iossimulator
-      - name: darwinFrameworks
-        value: /p:BuildDarwinFrameworks=true
-    - ${{ if eq(parameters.osGroup, 'android') }}:
-      - name: osOverride
-        value: -os android
-    - ${{ if eq(parameters.osGroup, 'browser') }}:
-      - name: archType
-        value: wasm
-      - name: osOverride
-        value: '-os browser'
-    - ${{ if eq(parameters.osGroup, 'wasi') }}:
-      - name: archType
-        value: wasm
-      - name: osOverride
-        value: '-os wasi'
-    - ${{ if eq(parameters.runtimeVariant, 'llvmjit') }}:
-      - name: llvmParameter
-        value: /p:MonoEnableLLVM=true /p:MonoBundleLLVMOptimizer=false
-    - ${{ if eq(parameters.runtimeVariant, 'llvmaot') }}:
-      - name: llvmParameter
-        value: /p:MonoEnableLLVM=true /p:MonoBundleLLVMOptimizer=true
-    - ${{ if gt(length(parameters.monoCrossAOTTargetOS),0) }}:
-      - name: aotCrossParameter
-        value: /p:MonoCrossAOTTargetOS=${{join('+',parameters.monoCrossAOTTargetOS)}} /p:SkipMonoCrossJitConfigure=true /p:BuildMonoAOTCrossCompilerOnly=true
-    - ${{ parameters.variables }}
-
-    steps:
-
-    # Install native dependencies
-    # Linux builds use docker images with dependencies preinstalled,
-    # and FreeBSD builds use a build agent with dependencies
-    # preinstalled, so we only need this step for OSX and Windows.
-    - ${{ if in(parameters.osGroup, 'osx', 'maccatalyst', 'ios', 'iossimulator', 'tvos', 'tvossimulator') }}:
-      - script: $(Build.SourcesDirectory)/eng/install-native-dependencies.sh $(osGroup)
-        displayName: Install native dependencies
-
-    - ${{ each monoCrossAOTTargetOS in parameters.monoCrossAOTTargetOS }}:
-      - task: DownloadPipelineArtifact@2
-        displayName: Download ${{monoCrossAOTTargetOS}} AOT offset files
-        inputs:
-          artifact: Mono_Offsets_${{monoCrossAOTTargetOS}}
-          path: '$(Build.SourcesDirectory)/artifacts/obj/mono/offsetfiles'
-
-    - ${{ if in(parameters.osGroup, 'osx', 'ios', 'tvos') }}:
-      - script: |
-          du -sh $(Build.SourcesDirectory)/*
-          df -h
-        displayName: Disk Usage before Build
-
-    # Build
-    - ${{ if ne(parameters.osGroup, 'windows') }}:
-      - script: ./build$(scriptExt) -subset mono+clr.hosts $(crossArg) -c $(buildConfig) -arch $(archType) $(osOverride) -ci $(officialBuildIdArg) $(aotCrossParameter) $(llvmParameter) $(darwinFrameworks)
-        displayName: Build product
-    - ${{ if eq(parameters.osGroup, 'windows') }}:
-      - script: build$(scriptExt) -subset mono+clr.hosts -c $(buildConfig) -arch $(archType) $(osOverride) -ci $(officialBuildIdArg) $(aotCrossParameter) $(llvmParameter)
-        displayName: Build product
-
-    - ${{ if in(parameters.osGroup, 'osx', 'ios', 'tvos') }}:
-      - script: |
-          du -sh $(Build.SourcesDirectory)/*
-          df -h
-        displayName: Disk Usage after Build
-
-    # Publish product output directory for consumption by tests.
-    - template: /eng/pipelines/common/upload-artifact-step.yml
-      parameters:
-        rootFolder: $(buildProductRootFolderPath)
-        includeRootFolder: false
-        archiveType: $(archiveType)
-        tarCompression: $(tarCompression)
-        archiveExtension: $(archiveExtension)
-        artifactName: $(buildProductArtifactName)
-        displayName: 'product build'
-
-    # Build packages
-    - ${{ if ne(parameters.osGroup, 'windows') }}:
-      - script: ./build$(scriptExt) -subset mono+clr.hosts $(crossArg) -c $(buildConfig) -arch $(archType) $(osOverride) -ci $(officialBuildIdArg) $(aotCrossParameter) $(llvmParameter) -pack $(OutputRidArg)
-        displayName: Build nupkg
-    - ${{ if eq(parameters.osGroup, 'windows') }}:
-      - script: build$(scriptExt) -subset mono+clr.hosts -c $(buildConfig) -arch $(archType) $(osOverride) -ci $(officialBuildIdArg) $(aotCrossParameter) $(llvmParameter) -pack $(OutputRidArg)
-        displayName: Build nupkg
-
-    - ${{ if and(eq(parameters.isOfficialBuild, true), eq(parameters.osGroup, 'windows')) }}:
-      - powershell: ./eng/collect_vsinfo.ps1 -ArchiveRunName postbuild_log
-        displayName: Collect vslogs on exit
-        condition: always()
-    # Publish Logs
-    - task: PublishPipelineArtifact@1
-      displayName: Publish Logs
-      inputs:
-        targetPath: $(Build.SourcesDirectory)/artifacts/log
-        artifactName: 'BuildLogs_Attempt$(System.JobAttempt)_Mono_${{ parameters.runtimeVariant }}_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-      continueOnError: true
-      condition: always()
diff --git a/eng/pipelines/mono/templates/generate-offsets.yml b/eng/pipelines/mono/templates/generate-offsets.yml
index c68adfe67a9e..8d8d781dd326 100644
--- a/eng/pipelines/mono/templates/generate-offsets.yml
+++ b/eng/pipelines/mono/templates/generate-offsets.yml
@@ -9,11 +9,13 @@ parameters:
   pool: ''
   condition: true
   isOfficialBuild: false
+  templatePath: 'templates'
 
 ### Product build
 jobs:
-- template: xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
+    templatePath: ${{ parameters.templatePath }}
     buildConfig: ${{ parameters.buildConfig }}
     osGroup: ${{ parameters.osGroup }}
     osSubGroup: ${{ parameters.osSubGroup }}
@@ -22,6 +24,7 @@ jobs:
     pool: ${{ parameters.pool }}
     condition: ${{ parameters.condition }}
     dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
+    logsName: 'BuildLogs_Attempt$(System.JobAttempt)_Mono_Offsets_$(osGroup)$(osSubGroup)'
 
     # Compute job name from template parameters
     name: ${{ format('mono_{0}{1}_offsets', parameters.osGroup, parameters.osSubGroup) }}
@@ -76,17 +79,10 @@ jobs:
         contents: '**/offsets-*.h'
         targetFolder: '$(Build.SourcesDirectory)/artifacts/obj/mono/offsetfiles/'
 
-    - task: PublishPipelineArtifact@1
-      displayName: Upload offset files
-      inputs:
-        targetPath: '$(Build.SourcesDirectory)/artifacts/obj/mono/offsetfiles'
-        artifactName: 'Mono_Offsets_$(osGroup)$(osSubGroup)'
-
-    # Publish Logs
-    - task: PublishPipelineArtifact@1
-      displayName: Publish Logs
-      inputs:
-        targetPath: $(Build.SourcesDirectory)/artifacts/log
-        artifactName: 'BuildLogs_Attempt$(System.JobAttempt)_Mono_Offsets_$(osGroup)$(osSubGroup)'
-      continueOnError: true
-      condition: always()
+    - template: /eng/pipelines/common/templates/publish-pipeline-artifacts.yml
+      parameters:
+        displayName: Upload offset files
+        isOfficialBuild: ${{ parameters.isOfficialBuild }}
+        inputs:
+          targetPath: '$(Build.SourcesDirectory)/artifacts/obj/mono/offsetfiles'
+          artifactName: 'Mono_Offsets_$(osGroup)$(osSubGroup)'
diff --git a/eng/pipelines/mono/templates/workloads-build.yml b/eng/pipelines/mono/templates/workloads-build.yml
index 20ff5c29d5d8..72e4c3adc8fb 100644
--- a/eng/pipelines/mono/templates/workloads-build.yml
+++ b/eng/pipelines/mono/templates/workloads-build.yml
@@ -2,7 +2,6 @@ parameters:
   archType: ''
   buildConfig: ''
   container: ''
-  dependOnEvaluatePaths: false
   dependsOn: []
   isOfficialBuild: false
   osGroup: ''
@@ -12,22 +11,24 @@ parameters:
   runtimeVariant: ''
   testGroup: ''
   timeoutInMinutes: ''
+  templatePath: 'templates'
   variables: {}
 
 jobs:
-- template: xplat-pipeline-job.yml
+- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
   parameters:
+    templatePath: ${{ parameters.templatePath }}
     archType: ${{ parameters.archType }}
     buildConfig: ${{ parameters.buildConfig }}
     container: ${{ parameters.container }}
-    condition: ${{ parameters.isOfficialBuild }}
+    condition: and(succeeded(), ${{ parameters.isOfficialBuild }})
     helixType: 'build/product/'
     osGroup: ${{ parameters.osGroup }}
     osSubgroup: ${{ parameters.osSubgroup }}
     pool: ${{ parameters.pool }}
     runtimeVariant: ${{ parameters.runtimeVariant }}
     timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
+    logsName: WorkloadLogs_Attempt$(System.JobAttempt)
 
     dependsOn: ${{ parameters.dependsOn }}
 
@@ -42,6 +43,10 @@ jobs:
         value: '/p:OfficialBuildId=$(Build.BuildNumber)'
     - name: SignType
       value: $[ coalesce(variables.OfficialSignType, 'real') ]
+    - name: workloadPackagesPath
+      value: $(Build.SourcesDirectory)/artifacts/workloadPackages
+    - name: workloadArtifactsPath
+      value: $(Build.SourcesDirectory)/artifacts/workloads
     - ${{ parameters.variables }}
 
     steps:
@@ -96,15 +101,6 @@ jobs:
       parameters:
         name: workloads
 
-    # Publish Logs
-    - task: PublishPipelineArtifact@1
-      displayName: Publish Logs
-      inputs:
-        targetPath: $(Build.SourcesDirectory)/artifacts/log
-        artifactName: 'WorkloadLogs_Attempt$(System.JobAttempt)'
-      continueOnError: true
-      condition: always()
-
     # Delete wixpdb files before they are uploaded to artifacts
     - task: DeleteFiles@1
       displayName: Delete wixpdb's
diff --git a/eng/pipelines/mono/templates/xplat-pipeline-job.yml b/eng/pipelines/mono/templates/xplat-pipeline-job.yml
deleted file mode 100644
index 1ca84d9caac1..000000000000
--- a/eng/pipelines/mono/templates/xplat-pipeline-job.yml
+++ /dev/null
@@ -1,128 +0,0 @@
-parameters:
-  buildConfig: ''
-  archType: ''
-  osGroup: ''
-  osSubgroup: ''
-  name: ''
-  helixType: '(unspecified)'
-  container: ''
-  crossBuild: false
-  liveLibrariesBuildConfig: ''
-  strategy: ''
-  pool: ''
-  runtimeVariant: ''
-  liveRuntimeBuildConfig: 'release'
-
-  # arcade-specific parameters
-  condition: true
-  continueOnError: false
-  dependsOn: ''
-  dependOnEvaluatePaths: false
-  displayName: ''
-  timeoutInMinutes: ''
-  enableMicrobuild: ''
-  gatherAssetManifests: false
-
-  variables: {} ## any extra variables to add to the defaults defined below
-
-jobs:
-- template: /eng/pipelines/common/templates/runtimes/xplat-job.yml
-  parameters:
-    buildConfig: ${{ parameters.buildConfig }}
-    archType: ${{ parameters.archType }}
-    osGroup: ${{ parameters.osGroup }}
-    osSubgroup: ${{ parameters.osSubgroup }}
-    name: ${{ parameters.name }}
-    helixType: ${{ parameters.helixType }}
-    container: ${{ parameters.container }}
-    crossBuild: ${{ parameters.crossBuild }}
-    strategy: ${{ parameters.strategy }}
-    pool: ${{ parameters.pool }}
-    runtimeVariant: ${{ parameters.runtimeVariant }}
-
-    # arcade-specific parameters
-    condition: and(succeeded(), ${{ parameters.condition }})
-    continueOnError: ${{ parameters.continueOnError }}
-    dependsOn: ${{ parameters.dependsOn }}
-    dependOnEvaluatePaths: ${{ parameters.dependOnEvaluatePaths }}
-    displayName: ${{ parameters.displayName }}
-    timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
-    enableMicrobuild: ${{ parameters.enableMicrobuild }}
-    gatherAssetManifests: ${{ parameters.gatherAssetManifests }}
-
-    variables:
-    - name: coreClrProductArtifactName
-      value: 'CoreCLRProduct__$(osGroup)$(osSubgroup)_$(archType)_${{ parameters.liveRuntimeBuildConfig }}'
-
-    - name: coreClrProductRootFolderPath
-      value: '$(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).$(liveRuntimeBuildConfigUpper)'
-
-    - name: buildProductArtifactName
-      value: 'MonoProduct_${{ parameters.runtimeVariant }}_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-
-    # minijit and monointerpreter do not use separate product builds.
-    - ${{ if or(eq(parameters.runtimeVariant, 'minijit'), eq(parameters.runtimeVariant, 'monointerpreter')) }}:
-      - name : buildProductArtifactName
-        value : 'MonoProduct__$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-
-    - ${{ if eq(parameters.runtimeVariant, 'llvmfullaot') }}:
-      - name : buildProductArtifactName
-        value : 'MonoProduct_llvmaot_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-
-    - name: binTestsPath
-      value: '$(Build.SourcesDirectory)/artifacts/tests/coreclr'
-
-    - name: buildProductRootFolderPath
-      value: '$(Build.SourcesDirectory)/artifacts/bin/mono/$(osGroup).$(archType).$(buildConfigUpper)'
-
-    - name: managedTestArtifactRootFolderPath
-      value: '$(binTestsPath)/$(osGroup).$(archType).$(buildConfigUpper)'
-
-    - name: managedGenericTestArtifactName
-      value: 'MonoManagedTestArtifacts_AnyOS_AnyCPU_$(buildConfig)'
-
-    - name: microsoftNetSdkIlFolderPath
-      value: '$(Build.SourcesDirectory)/.packages/microsoft.net.sdk.il'
-
-    - name: microsoftNetSdkIlArtifactName
-      value: 'MicrosoftNetSdkIlPackage_AnyOS_AnyCPU_$(buildConfig)'
-
-    - name: monoRepoRoot
-      value: '$(Build.SourcesDirectory)/src/mono'
-
-    - name: nativeTestArtifactName
-      value: 'CoreCLRNativeTestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)'
-
-    - name: nativeTestArtifactRootFolderPath
-      value: '$(binTestsPath)/obj/$(osGroup).$(archType).$(buildConfigUpper)'
-
-    - name: workloadPackagesPath
-      value: $(Build.SourcesDirectory)/artifacts/workloadPackages
-
-    - name: workloadArtifactsPath
-      value: $(Build.SourcesDirectory)/artifacts/workloads
-
-    - name: liveRuntimeBuildConfigUpper
-      ${{ if eq(parameters.liveRuntimeBuildConfig, 'release') }}:
-        value: 'Release'
-      ${{ if eq(parameters.liveRuntimeBuildConfig, 'checked') }}:
-        value: 'Checked'
-      ${{ if eq(parameters.liveRuntimeBuildConfig, 'debug') }}:
-        value: 'Debug'
-
-    - name: priorityArg
-      value: ''
-
-    - librariesBuildArtifactName: ''
-    - librariesOverrideArg: ''
-    - librariesDownloadDir: ''
-
-    - ${{ if ne(parameters.liveLibrariesBuildConfig, '') }}:
-      - librariesBuildArtifactName: ${{ format('libraries_bin_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.liveLibrariesBuildConfig) }}
-      - librariesDownloadDir: $(Build.SourcesDirectory)/artifacts
-      - librariesOverrideArg: ' /p:LibrariesConfiguration=${{ parameters.liveLibrariesBuildConfig }}'
-
-    - ${{ each variable in parameters.variables }}:
-      - ${{insert}}: ${{ variable }}
-
-    steps: ${{ parameters.steps }}
diff --git a/eng/pipelines/official/jobs/prepare-signed-artifacts.yml b/eng/pipelines/official/jobs/prepare-signed-artifacts.yml
index 908f2b64c71c..24fd2df48d74 100644
--- a/eng/pipelines/official/jobs/prepare-signed-artifacts.yml
+++ b/eng/pipelines/official/jobs/prepare-signed-artifacts.yml
@@ -20,6 +20,14 @@ jobs:
   - name: SignType
     value: $[ coalesce(variables.OfficialSignType, 'real') ]
 
+  templateContext:
+    outputs:
+    - output: pipelineArtifact
+      displayName: 'Publish BuildLogs'
+      condition: succeededOrFailed()
+      targetPath: '$(Build.StagingDirectory)\BuildLogs'
+      artifactName: ${{ parameters.logArtifactName }}
+
   steps:
   - checkout: self
     clean: true
@@ -65,11 +73,4 @@ jobs:
         **/*.binlog
       TargetFolder: '$(Build.StagingDirectory)\BuildLogs'
     continueOnError: true
-    condition: succeededOrFailed()
-
-  - task: PublishPipelineArtifact@1
-    displayName: Publish BuildLogs
-    inputs:
-      targetPath: '$(Build.StagingDirectory)\BuildLogs'
-      artifactName: ${{ parameters.logArtifactName }}
-    condition: succeededOrFailed()
+    condition: succeededOrFailed()
\ No newline at end of file
diff --git a/eng/pipelines/official/stages/publish.yml b/eng/pipelines/official/stages/publish.yml
index 0a25cdd54a2c..7e636fb42ab7 100644
--- a/eng/pipelines/official/stages/publish.yml
+++ b/eng/pipelines/official/stages/publish.yml
@@ -7,7 +7,7 @@ stages:
 - stage: PrepareForPublish
   displayName: Prepare for Publish
   variables:
-  - template: /eng/common/templates/variables/pool-providers.yml
+  - template: /eng/common/templates-official/variables/pool-providers.yml
   jobs:
   # Prep artifacts: sign them and upload pipeline artifacts expected by stages-based publishing.
   - template: /eng/pipelines/official/jobs/prepare-signed-artifacts.yml
@@ -15,7 +15,7 @@ stages:
       PublishRidAgnosticPackagesFromPlatform: ${{ parameters.PublishRidAgnosticPackagesFromPlatform }}
 
   # Publish to Build Asset Registry in order to generate the ReleaseConfigs artifact.
-  - template: /eng/common/templates/job/publish-build-assets.yml
+  - template: /eng/common/templates-official/job/publish-build-assets.yml
     parameters:
       publishUsingPipelines: true
       publishAssetsImmediately: true
@@ -26,7 +26,7 @@ stages:
       symbolPublishingAdditionalParameters: '/p:PublishSpecialClrFiles=true'
 
 # Stages-based publishing entry point
-- template: /eng/common/templates/post-build/post-build.yml
+- template: /eng/common/templates-official/post-build/post-build.yml
   parameters:
     publishingInfraVersion: ${{ parameters.publishingInfraVersion }}
     validateDependsOn:
diff --git a/eng/pipelines/runtime-cet.yml b/eng/pipelines/runtime-cet.yml
index c756a3c83d05..459905bd89ee 100644
--- a/eng/pipelines/runtime-cet.yml
+++ b/eng/pipelines/runtime-cet.yml
@@ -38,35 +38,34 @@ extends:
       jobs:
 
       #
-      # Build CoreCLR checked
-      # Only when CoreCLR is changed
+      # Build CoreCLR and Libraries
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: checked
           platforms:
           - windows_x64
           jobParameters:
-            testGroup: innerloop
+            buildArgs: -s clr+libs -c $(_BuildConfig) -lc Release
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: innerloop
 
       #
-      # Build CoreCLR release
-      # Always as they are needed by Installer and we always build and test the Installer.
-      #
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/libraries/build-job.yml
-          buildConfig: release
-          platforms:
-          - windows_x64
-          jobParameters:
-            testScope: innerloop
-
-      #
-      # CoreCLR Test builds using live libraries release build
-      # Only when CoreCLR is changed
+      # CoreCLR Test build
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
@@ -77,10 +76,8 @@ extends:
           jobParameters:
             testGroup: innerloop
 
-
       #
       # CoreCLR Test executions using live libraries
-      # Only when CoreCLR is changed
       #
 
       - template: /eng/pipelines/common/platform-matrix.yml
@@ -95,3 +92,4 @@ extends:
             testGroup: innerloop
             liveLibrariesBuildConfig: release
             useCodeFlowEnforcement: true
+            unifiedArtifactsName: BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
diff --git a/eng/pipelines/runtime-community.yml b/eng/pipelines/runtime-community.yml
index a91388e244b0..0ec649c6216c 100644
--- a/eng/pipelines/runtime-community.yml
+++ b/eng/pipelines/runtime-community.yml
@@ -33,13 +33,14 @@ extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
     stages:
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
+          - template: /eng/pipelines/common/evaluate-default-paths.yml
+
     - stage: Build
       jobs:
-      #
-      # Evaluate paths
-      #
-      - ${{ if eq(variables.dependOnEvaluatePaths, true) }}:
-        - template: /eng/pipelines/common/evaluate-default-paths.yml
 
       #
       # s390x & PPC64 little endian
@@ -57,9 +58,9 @@ extends:
           variables:
             # map dependencies variables to local variables
             - name: librariesContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
             - name: monoContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
           jobParameters:
             testGroup: innerloop
             nameSuffix: AllSubsets_Mono
@@ -67,8 +68,8 @@ extends:
             timeoutInMinutes: 180
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
             # extra steps, run tests
             postBuildSteps:
@@ -96,9 +97,9 @@ extends:
           variables:
             # map dependencies variables to local variables
             - name: librariesContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
             - name: monoContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
           jobParameters:
             testScope: innerloop
             nameSuffix: AllSubsets_Mono
@@ -106,8 +107,8 @@ extends:
             timeoutInMinutes: 120
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -124,9 +125,9 @@ extends:
           variables:
             # map dependencies variables to local variables
             - name: librariesContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
             - name: monoContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
           jobParameters:
             testScope: innerloop
             nameSuffix: AllSubsets_Mono
@@ -134,8 +135,8 @@ extends:
             timeoutInMinutes: 120
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
             ${{ if eq(variables['isRollingBuild'], true) }}:
               # extra steps, run tests
diff --git a/eng/pipelines/runtime-extra-platforms.yml b/eng/pipelines/runtime-extra-platforms.yml
index 958eabb9d33d..d1997faf9662 100644
--- a/eng/pipelines/runtime-extra-platforms.yml
+++ b/eng/pipelines/runtime-extra-platforms.yml
@@ -40,15 +40,15 @@ extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
     stages:
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
+          - template: /eng/pipelines/common/evaluate-default-paths.yml
+
     - stage: Build
       jobs:
 
-      #
-      # Evaluate paths
-      #
-      - ${{ if eq(variables.dependOnEvaluatePaths, true) }}:
-        - template: /eng/pipelines/common/evaluate-default-paths.yml
-
       # Add wasm jobs only for rolling builds
       - ${{ if eq(variables.isRollingBuild, true) }}:
         - template: /eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml
diff --git a/eng/pipelines/runtime-linker-tests.yml b/eng/pipelines/runtime-linker-tests.yml
index 3ae027a5f3e7..aec5e1057ac5 100644
--- a/eng/pipelines/runtime-linker-tests.yml
+++ b/eng/pipelines/runtime-linker-tests.yml
@@ -57,15 +57,14 @@ extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
     stages:
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
+          - template: /eng/pipelines/common/evaluate-default-paths.yml
+
     - stage: Build
       jobs:
-
-      #
-      # Evaluate paths
-      #
-      - ${{ if eq(variables.dependOnEvaluatePaths, true) }}:
-        - template: /eng/pipelines/common/evaluate-default-paths.yml
-
       #
       # Build and Test ILLink in Release config vertical for Windows, Linux and OSX
       #
@@ -84,7 +83,7 @@ extends:
             nameSuffix: ILLink_Runtime_Testing
             condition:
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
             buildArgs: -s tools.illinktests -test -c $(_BuildConfig)
 
@@ -105,7 +104,7 @@ extends:
             nameSuffix: Runtime_Release
             condition:
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
             buildArgs: -s clr+libs+tools.illink -c $(_BuildConfig)
             postBuildSteps:
@@ -128,10 +127,10 @@ extends:
             condition:
               or(
                 eq(variables['isRollingBuild'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_wasm_specific_except_wbt_dbg.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_NET_ILLink_Tasks'], true))
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_wasm_specific_except_wbt_dbg.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['DarcDependenciesChanged.Microsoft_NET_ILLink_Tasks'], true))
             postBuildSteps:
               - template: /eng/pipelines/libraries/execute-trimming-tests-steps.yml
                 parameters:
diff --git a/eng/pipelines/runtime-llvm.yml b/eng/pipelines/runtime-llvm.yml
index 9d358e5f7930..ee96cc2a04f5 100644
--- a/eng/pipelines/runtime-llvm.yml
+++ b/eng/pipelines/runtime-llvm.yml
@@ -28,24 +28,6 @@ schedules:
       - main
     always: false # run only if there were changes since the last successful scheduled run.
 
-pr:
-  branches:
-    include:
-    - main
-    - release/*.*
-  paths:
-    include:
-    - '*'
-    exclude:
-    - '**.md'
-    - eng/Version.Details.xml
-    - .devcontainer/*
-    - .github/*
-    - docs/*
-    - LICENSE.TXT
-    - PATENTS.TXT
-    - THIRD-PARTY-NOTICES.TXT
-
 variables:
   - template: /eng/pipelines/common/variables.yml
 
@@ -53,15 +35,14 @@ extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
     stages:
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
+          - template: /eng/pipelines/common/evaluate-default-paths.yml
+
     - stage: Build
       jobs:
-
-      #
-      # Evaluate paths
-      #
-      - ${{ if eq(variables.dependOnEvaluatePaths, true) }}:
-        - template: /eng/pipelines/common/evaluate-default-paths.yml
-
       #
       # Build Mono and Installer on LLVMJIT mode
       #
@@ -79,9 +60,9 @@ extends:
                       /p:MonoEnableLLVM=true /p:MonoBundleLLVMOptimizer=false
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       - template: /eng/pipelines/common/platform-matrix.yml
@@ -99,9 +80,9 @@ extends:
                       /p:MonoEnableLLVM=true /p:MonoBundleLLVMOptimizer=false
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -118,13 +99,12 @@ extends:
           jobParameters:
             testGroup: innerloop
             nameSuffix: AllSubsets_Mono_LLVMAOT
-            buildArgs: -s mono+libs+host+packs -c $(_BuildConfig)
-                      /p:MonoEnableLLVM=true /p:MonoAOTEnableLLVM=true /p:MonoBundleLLVMOptimizer=true
+            buildArgs: -s mono+libs+host+packs -c $(_BuildConfig) /p:MonoEnableLLVM=true /p:MonoAOTEnableLLVM=true
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       - template: /eng/pipelines/common/platform-matrix.yml
@@ -137,82 +117,135 @@ extends:
           jobParameters:
             testGroup: innerloop
             nameSuffix: AllSubsets_Mono_LLVMAOT
-            buildArgs: -s mono+libs+host+packs -c $(_BuildConfig)
-                      /p:MonoEnableLLVM=true /p:MonoAOTEnableLLVM=true /p:MonoBundleLLVMOptimizer=true
+            buildArgs: -s mono+libs+host+packs -c $(_BuildConfig) /p:MonoEnableLLVM=true /p:MonoAOTEnableLLVM=true
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
-      #
-      # Build Mono release with LLVM AOT
-      # Only when mono, or the runtime tests changed
-      #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/mono/templates/build-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+          buildConfig: Release
           runtimeFlavor: mono
-          buildConfig: release
           platforms:
-          - linux_x64
-          - linux_arm64
+            - linux_x64
+            # Disabled pending outcome of https://github.com/dotnet/runtime/issues/60234 investigation
+            #- linux_arm64
+          variables:
+            - name: timeoutPerTestInMinutes
+              value: 60
+            - name: timeoutPerTestCollectionInMinutes
+              value: 180
           jobParameters:
+            testGroup: innerloop
+            nameSuffix: AllSubsets_Mono_LLVMAOT_RuntimeTests
             runtimeVariant: llvmaot
+            buildArgs: -s mono+libs+clr.hosts+clr.iltools -c $(_BuildConfig) -lc ${{ variables.debugOnPrReleaseOnRolling }} /p:MonoEnableLLVM=true
+            timeoutInMinutes: 360
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
+            postBuildSteps:
+              - template: /eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  llvmAotStepContainer: linux_x64_llvmaot
+                  testRunNamePrefixSuffix: Mono_Release
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/test-variables.yml
 
       #
-      # Mono CoreCLR runtime Test executions using live libraries and LLVM AOT
+      # Mono CoreCLR runtime Test executions using live libraries and LLVM Full AOT
       # Only when Mono is changed
+      # This job runs non-intrinsics runtime tests due to OOM issues
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: release
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+          buildConfig: Release
           runtimeFlavor: mono
           platforms:
-          - linux_x64_llvmaot
-          # Disabled pending outcome of https://github.com/dotnet/runtime/issues/60234 investigation
-          #- linux_arm64
-          helixQueueGroup: pr
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+            - linux_x64
+            # Tracking issue: https://github.com/dotnet/runtime/issues/90427
+            # - linux_arm64
+          variables:
+            - name: timeoutPerTestInMinutes
+              value: 60
+            - name: timeoutPerTestCollectionInMinutes
+              value: 180
           jobParameters:
             testGroup: innerloop
-            liveLibrariesBuildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
-            liveRuntimeBuildConfig: release
-            runtimeVariant: llvmaot
+            nameSuffix: AllSubsets_Mono_LLVMFULLAOT_RuntimeTests
+            runtimeVariant: llvmfullaot
+            buildArgs: -s mono+libs+clr.hosts+clr.iltools -c $(_BuildConfig) -lc ${{ variables.debugOnPrReleaseOnRolling }} /p:MonoEnableLLVM=true
+            timeoutInMinutes: 360
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
+            postBuildSteps:
+              - template: /eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  llvmAotStepContainer: linux_x64_llvmaot
+                  testRunNamePrefixSuffix: Mono_Release
+                  testBuildArgs: >-
+                    -tree:CoreMangLib -tree:Exceptions -tree:GC -tree:Interop -tree:Loader -tree:Regressions -tree:baseservices
+                    -tree:ilasm -tree:ilverify -tree:managed -tree:profiler -tree:readytorun -tree:reflection -tree:tracing
+                    -tree:JIT/BBT -tree:JIT/CodeGenBringUpTests -tree:JIT/Directed -tree:JIT/Generics -tree:JIT/IL_Conformance
+                    -tree:JIT/Math -tree:JIT/Methodical -tree:JIT/PGO -tree:JIT/Performance -tree:JIT/Regression -tree:JIT/RyuJIT
+                    -tree:JIT/Stress -tree:JIT/common -tree:JIT/jit64 -tree:JIT/opt -tree:JIT/superpmi
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/test-variables.yml
 
       #
       # Mono CoreCLR runtime Test executions using live libraries and LLVM Full AOT
       # Only when Mono is changed
+      # This job runs the runtime intrinsics tests due to OOM issues
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: release
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+          buildConfig: Release
           runtimeFlavor: mono
           platforms:
-          - linux_x64_llvmaot
-          - linux_arm64
-          helixQueueGroup: pr
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+            - linux_x64
+            # Tracking issue: https://github.com/dotnet/runtime/issues/90427
+            # - linux_arm64
+          variables:
+            - name: timeoutPerTestInMinutes
+              value: 60
+            - name: timeoutPerTestCollectionInMinutes
+              value: 180
           jobParameters:
             testGroup: innerloop
-            liveLibrariesBuildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
-            liveRuntimeBuildConfig: release
+            nameSuffix: AllSubsets_Mono_LLVMFULLAOT_RuntimeIntrinsicsTests
             runtimeVariant: llvmfullaot
+            buildArgs: -s mono+libs+clr.hosts+clr.iltools -c $(_BuildConfig) -lc ${{ variables.debugOnPrReleaseOnRolling }} /p:MonoEnableLLVM=true
+            timeoutInMinutes: 360
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
+            postBuildSteps:
+              - template: /eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  llvmAotStepContainer: linux_x64_llvmaot
+                  testRunNamePrefixSuffix: Mono_Release
+                  testBuildArgs: -tree:JIT/Intrinsics -tree:JIT/HardwareIntrinsics -tree:JIT/SIMD
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/test-variables.yml
diff --git a/eng/pipelines/runtime-official.yml b/eng/pipelines/runtime-official.yml
index d8ad9c571806..ba09a957605e 100644
--- a/eng/pipelines/runtime-official.yml
+++ b/eng/pipelines/runtime-official.yml
@@ -30,6 +30,7 @@ variables:
 extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
+    isOfficialBuild: true
     stages:
     - stage: Build
       jobs:
@@ -38,7 +39,7 @@ extends:
       # Localization build
       #
 
-      - template: /eng/common/templates/job/onelocbuild.yml
+      - template: /eng/common/templates-official/job/onelocbuild.yml
         parameters:
           MirrorRepo: runtime
           MirrorBranch: main
@@ -49,7 +50,7 @@ extends:
       # Source Index Build
       #
       - ${{ if eq(variables['Build.SourceBranch'], 'refs/heads/main') }}:
-        - template: /eng/common/templates/job/source-index-stage1.yml
+        - template: /eng/common/templates-official/job/source-index-stage1.yml
           parameters:
             sourceIndexBuildCommand: build.cmd -subset libs.sfx+libs.oob -binarylog -os linux -ci /p:SkipLibrariesNativeRuntimePackages=true
 
@@ -64,8 +65,10 @@ extends:
           buildConfig: release
           platforms:
           - windows_x64
+          - windows_x86
           - windows_arm64
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s clr.runtime+clr.alljits+clr.nativeaotruntime -c $(_BuildConfig) /bl:$(Build.SourcesDirectory)/artifacts/logs/$(_BuildConfig)/CoreClrNativeBuild.binlog
             nameSuffix: CoreCLR
             isOfficialBuild: ${{ variables.isOfficialBuild }}
@@ -87,40 +90,6 @@ extends:
               parameters:
                 name: $(osGroup)$(osSubgroup)_$(archType)
 
-
-      #
-      # Build CoreCLR runtime packs
-      # Windows x86
-      # No NativeAOT as NativeAOT is not supported on x86
-      # Sign diagnostic files after native build
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/global-build-job.yml
-          buildConfig: release
-          platforms:
-          - windows_x86
-          jobParameters:
-            buildArgs: -s clr.runtime+clr.alljits -c $(_BuildConfig) /bl:$(Build.SourcesDirectory)/artifacts/logs/$(_BuildConfig)/CoreClrNativeBuild.binlog
-            nameSuffix: CoreCLR
-            isOfficialBuild: ${{ variables.isOfficialBuild }}
-            timeoutInMinutes: 120
-            postBuildSteps:
-            - template: /eng/pipelines/coreclr/templates/sign-diagnostic-files.yml
-              parameters:
-                basePath: $(Build.SourcesDirectory)/artifacts/bin/coreclr
-                isOfficialBuild: ${{ variables.isOfficialBuild }}
-                timeoutInMinutes: 30
-            # Now that we've signed the diagnostic files, do the rest of the build.
-            - template: /eng/pipelines/common/templates/global-build-step.yml
-              parameters:
-                buildArgs: -s clr.corelib+clr.nativecorelib+clr.tools+clr.packages+libs+host+packs -c $(_BuildConfig)
-                displayName: Build managed CoreCLR components, all libraries, hosts, and packs
-
-            # Upload the results.
-            - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml
-              parameters:
-                name: $(osGroup)$(osSubgroup)_$(archType)
       #
       # Build CoreCLR runtime packs
       # Mac x64/arm64
@@ -134,6 +103,7 @@ extends:
           - osx_arm64
           - osx_x64
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s clr.runtime+clr.alljits+clr.nativeaotruntime+host.native -c $(_BuildConfig) /bl:$(Build.SourcesDirectory)/artifacts/logs/$(_BuildConfig)/CoreClrNativeBuild.binlog
             nameSuffix: CoreCLR
             isOfficialBuild: ${{ variables.isOfficialBuild }}
@@ -189,6 +159,7 @@ extends:
           - linux_musl_arm
           - linux_musl_arm64
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s clr.runtime+clr.alljits+clr.corelib+clr.nativecorelib+clr.tools+clr.aot+clr.packages+libs+host+packs -c $(_BuildConfig)
             nameSuffix: CoreCLR
             isOfficialBuild: ${{ variables.isOfficialBuild }}
@@ -201,28 +172,24 @@ extends:
                   SourceFolder: $(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).$(_BuildConfig)
                   Contents: libcoreclr.so
                   TargetFolder: $(Build.SourcesDirectory)/artifacts/CoreCLRCrossDacArtifacts/$(osGroup)$(osSubgroup).$(archType).$(_BuildConfig)/$(crossDacHostArch)
-              - task: PublishBuildArtifacts@1
+              - task: 1ES.PublishBuildArtifacts@1
                 displayName: Publish runtime for CrossDac
                 inputs:
-                  pathToPublish: $(Build.SourcesDirectory)/artifacts/CoreCLRCrossDacArtifacts
+                  PathtoPublish: $(Build.SourcesDirectory)/artifacts/CoreCLRCrossDacArtifacts
                   PublishLocation: Container
-                  artifactName: CoreCLRCrossDacArtifacts
+                  ArtifactName: CoreCLRCrossDacArtifacts
               # Create RPMs and DEBs
-              - template: /eng/pipelines/installer/jobs/steps/build-linux-package.yml
+              - template: /eng/pipelines/installer/steps/build-linux-package.yml
                 parameters:
                   packageType: deb
                   target: debpkg
-                  packageStepDescription: Runtime Deps, Runtime, Framework Packs Deb installers
-                  subsetArg: -s packs.installers
-                  packagingArgs: -c $(_BuildConfig) --arch $(archType) --os $(osGroup) --ci /p:OfficialBuildId=$(Build.BuildNumber) /p:BuildDebPackage=true
+                  packagingArgs: -c $(_BuildConfig) /p:BuildDebPackage=true
                   condition: and(succeeded(), eq(variables.osSubgroup, ''), eq(variables.archType, 'x64'))
-              - template: /eng/pipelines/installer/jobs/steps/build-linux-package.yml
+              - template: /eng/pipelines/installer/steps/build-linux-package.yml
                 parameters:
                   packageType: rpm
                   target: rpmpkg
-                  packageStepDescription: Runtime Deps, Runtime, Framework Packs RPM installers
-                  subsetArg: -s packs.installers
-                  packagingArgs: -c $(_BuildConfig) --arch $(archType) --os $(osGroup) --ci /p:OfficialBuildId=$(Build.BuildNumber) /p:BuildRpmPackage=true
+                  packagingArgs: -c $(_BuildConfig) /p:BuildRpmPackage=true
                   condition: and(succeeded(), eq(variables.osSubgroup, ''), in(variables.archType, 'x64', 'arm64'))
 
               # Upload the results.
@@ -242,6 +209,7 @@ extends:
           platforms:
           - windows_x64
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s crossdacpack -c $(_BuildConfig) /p:CrossDacArtifactsDir=$(crossDacArtifactsPath)
             nameSuffix: CrossDac
             isOfficialBuild: ${{ variables.isOfficialBuild }}
@@ -310,14 +278,19 @@ extends:
           - iossimulator_arm64
           - ios_arm64
           - linux_x64
+          - linux_arm
           - linux_arm64
           - linux_musl_x64
+          - linux_musl_arm
           - linux_musl_arm64
           - linux_bionic_x64
+          - linux_bionic_arm
           - linux_bionic_arm64
+          - windows_x86
           - windows_x64
           - windows_arm64
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s clr.nativeaotlibs+clr.nativeaotruntime+libs+packs -c $(_BuildConfig) /p:BuildNativeAOTRuntimePack=true /p:SkipLibrariesNativeRuntimePackages=true
             nameSuffix: NativeAOT
             isOfficialBuild: ${{ variables.isOfficialBuild }}
@@ -361,6 +334,7 @@ extends:
           - windows_x86
           # - windows_arm64
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s mono+libs+host+packs -c $(_BuildConfig) /p:BuildMonoAOTCrossCompiler=false
             nameSuffix: Mono
             isOfficialBuild: ${{ variables.isOfficialBuild }}
@@ -378,6 +352,7 @@ extends:
           - browser_wasm
           - wasi_wasm
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s mono+libs+host+packs -c $(_BuildConfig) /p:AotHostArchitecture=x64 /p:AotHostOS=$(_hostedOS)
             nameSuffix: Mono
             isOfficialBuild: ${{ variables.isOfficialBuild }}
@@ -394,6 +369,7 @@ extends:
           platforms:
           - browser_wasm
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s mono+libs+host+packs -c $(_BuildConfig) /p:WasmEnableThreads=true /p:AotHostArchitecture=x64 /p:AotHostOS=$(_hostedOS)
             nameSuffix: Mono_multithread
             isOfficialBuild: ${{ variables.isOfficialBuild }}
@@ -417,6 +393,7 @@ extends:
           - ios_arm64
           - maccatalyst_x64
           jobParameters:
+            templatePath: 'templates-official'
             isOfficialBuild: ${{ variables.isOfficialBuild }}
 
       #
@@ -433,6 +410,7 @@ extends:
           - linux_arm64
           - linux_musl_arm64
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s mono+packs -c $(_BuildConfig)
                       /p:MonoCrossAOTTargetOS=android+browser+wasi /p:SkipMonoCrossJitConfigure=true /p:BuildMonoAOTCrossCompilerOnly=true
             nameSuffix: CrossAOT_Mono
@@ -460,6 +438,7 @@ extends:
           - windows_arm64
           - windows_x64
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s mono+packs -c $(_BuildConfig)
                       /p:MonoCrossAOTTargetOS=android+browser+wasi /p:SkipMonoCrossJitConfigure=true /p:BuildMonoAOTCrossCompilerOnly=true
             nameSuffix: CrossAOT_Mono
@@ -487,6 +466,7 @@ extends:
           - osx_x64
           - osx_arm64
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s mono+packs -c $(_BuildConfig)
                       /p:MonoCrossAOTTargetOS=android+browser+wasi+tvos+ios+maccatalyst /p:SkipMonoCrossJitConfigure=true /p:BuildMonoAOTCrossCompilerOnly=true
             nameSuffix: CrossAOT_Mono
@@ -532,6 +512,7 @@ extends:
             buildConfig: release
             runtimeFlavor: mono
             jobParameters:
+              templatePath: 'templates-official'
               buildArgs: -s mono+libs+host+packs -c $(_BuildConfig)
                         /p:MonoEnableLLVM=true /p:MonoBundleLLVMOptimizer=false
               nameSuffix: Mono_LLVMJIT
@@ -546,6 +527,7 @@ extends:
             buildConfig: release
             runtimeFlavor: mono
             jobParameters:
+              templatePath: 'templates-official'
               buildArgs: -s mono+libs+host+packs -c $(_BuildConfig)
                           /p:MonoEnableLLVM=true /p:MonoAOTEnableLLVM=true /p:MonoBundleLLVMOptimizer=true
               nameSuffix: Mono_LLVMAOT
@@ -566,6 +548,7 @@ extends:
           platforms:
           - windows_x64
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s tools+libs -allConfigurations -c $(_BuildConfig) /p:TestAssemblies=false /p:TestPackages=true
             nameSuffix: Libraries_AllConfigurations
             isOfficialBuild: ${{ variables.isOfficialBuild }}
@@ -585,7 +568,9 @@ extends:
           platforms:
           - SourceBuild_linux_x64
           jobParameters:
+            templatePath: 'templates-official'
             nameSuffix: PortableSourceBuild
+            isOfficialBuild: ${{ variables.isOfficialBuild }}
             postBuildSteps:
               - template: /eng/pipelines/common/upload-intermediate-artifacts-step.yml
                 parameters:
@@ -607,6 +592,7 @@ extends:
           - windows_arm64
           - linux_arm64
           jobParameters:
+            templatePath: 'templates-official'
             buildArgs: -s clr.native+clr.corelib+clr.tools+clr.nativecorelib+libs+host+packs -c $(_BuildConfig) -pgoinstrument /p:SkipLibrariesNativeRuntimePackages=true
             isOfficialBuild: ${{ variables.isOfficialBuild }}
             nameSuffix: PGO
@@ -626,6 +612,7 @@ extends:
           platforms:
           - windows_x64
           jobParameters:
+            templatePath: 'templates-official'
             isOfficialBuild: ${{ variables.isOfficialBuild }}
             timeoutInMinutes: 120
             dependsOn:
diff --git a/eng/pipelines/runtime-wasm-dbgtests.yml b/eng/pipelines/runtime-wasm-dbgtests.yml
index fb5ee6123fdb..f367c9065f3d 100644
--- a/eng/pipelines/runtime-wasm-dbgtests.yml
+++ b/eng/pipelines/runtime-wasm-dbgtests.yml
@@ -8,14 +8,14 @@ extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
     stages:
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
+          - template: /eng/pipelines/common/evaluate-default-paths.yml
+
     - stage: Build
       jobs:
-
-      #
-      # Evaluate paths
-      #
-      - template: /eng/pipelines/common/evaluate-default-paths.yml
-
       - template: /eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml
         parameters:
           isExtraPlatformsBuild: ${{ variables.isExtraPlatformsBuild }}
diff --git a/eng/pipelines/runtime-wasm-libtests.yml b/eng/pipelines/runtime-wasm-libtests.yml
index 60912d2e4426..7ee8225777f4 100644
--- a/eng/pipelines/runtime-wasm-libtests.yml
+++ b/eng/pipelines/runtime-wasm-libtests.yml
@@ -7,14 +7,14 @@ extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
     stages:
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
+          - template: /eng/pipelines/common/evaluate-default-paths.yml
+
     - stage: Build
       jobs:
-
-      #
-      # Evaluate paths
-      #
-      - template: /eng/pipelines/common/evaluate-default-paths.yml
-
       - template: /eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml
         parameters:
           isExtraPlatformsBuild: ${{ variables.isExtraPlatformsBuild }}
diff --git a/eng/pipelines/runtime-wasm-non-libtests.yml b/eng/pipelines/runtime-wasm-non-libtests.yml
index 906c56fa607e..2260d4327778 100644
--- a/eng/pipelines/runtime-wasm-non-libtests.yml
+++ b/eng/pipelines/runtime-wasm-non-libtests.yml
@@ -7,14 +7,14 @@ extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
     stages:
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
+          - template: /eng/pipelines/common/evaluate-default-paths.yml
+
     - stage: Build
       jobs:
-
-      #
-      # Evaluate paths
-      #
-      - template: /eng/pipelines/common/evaluate-default-paths.yml
-
       - template: /eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml
         parameters:
           isExtraPlatformsBuild: ${{ variables.isExtraPlatformsBuild }}
diff --git a/eng/pipelines/runtime-wasm-optional.yml b/eng/pipelines/runtime-wasm-optional.yml
index a1392e624561..a4c00241d7cf 100644
--- a/eng/pipelines/runtime-wasm-optional.yml
+++ b/eng/pipelines/runtime-wasm-optional.yml
@@ -8,14 +8,14 @@ extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
     stages:
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
+          - template: /eng/pipelines/common/evaluate-default-paths.yml
+
     - stage: Build
       jobs:
-
-      #
-      # Evaluate paths
-      #
-      - template: /eng/pipelines/common/evaluate-default-paths.yml
-
       - template: /eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml
         parameters:
           isExtraPlatformsBuild: ${{ variables.isExtraPlatformsBuild }}
diff --git a/eng/pipelines/runtime-wasm-perf.yml b/eng/pipelines/runtime-wasm-perf.yml
index 86097cbbbff5..91e508e2c966 100644
--- a/eng/pipelines/runtime-wasm-perf.yml
+++ b/eng/pipelines/runtime-wasm-perf.yml
@@ -27,13 +27,6 @@ extends:
     stages:
     - stage: Build
       jobs:
-
-      #
-      # Evaluate paths
-      #
-      #- ${{ if eq(variables.dependOnEvaluatePaths, true) }}:
-        #- template: /eng/pipelines/common/evaluate-default-paths.yml
-
       - template: /eng/pipelines/coreclr/perf-wasm-jobs.yml
         parameters:
           runProfile: 'v8'
diff --git a/eng/pipelines/runtime-wasm.yml b/eng/pipelines/runtime-wasm.yml
index 1aa23ee7ee03..5727ff30b86a 100644
--- a/eng/pipelines/runtime-wasm.yml
+++ b/eng/pipelines/runtime-wasm.yml
@@ -11,14 +11,14 @@ extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
     stages:
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
+          - template: /eng/pipelines/common/evaluate-default-paths.yml
+
     - stage: Build
       jobs:
-
-      #
-      # Evaluate paths
-      #
-      - template: /eng/pipelines/common/evaluate-default-paths.yml
-
       - template: /eng/pipelines/extra-platforms/runtime-extra-platforms-wasm.yml
         parameters:
           isExtraPlatformsBuild: ${{ variables.isExtraPlatformsBuild }}
diff --git a/eng/pipelines/runtime.yml b/eng/pipelines/runtime.yml
index be2870611e61..98dc5285250f 100644
--- a/eng/pipelines/runtime.yml
+++ b/eng/pipelines/runtime.yml
@@ -57,118 +57,389 @@ variables:
 extends:
   template:  /eng/pipelines/common/templates/pipeline-with-resources.yml
   parameters:
+    isOfficialBuild: false
     stages:
+    - ${{ if eq(variables['Build.Reason'], 'PullRequest') }}:
+      - stage: EvaluatePaths
+        displayName: Evaluate Paths
+        jobs:
+          - template: /eng/pipelines/common/evaluate-default-paths.yml
+
     - stage: Build
       jobs:
-      #
-      # Evaluate paths
-      #
-      - ${{ if eq(variables.dependOnEvaluatePaths, true) }}:
-        - template: /eng/pipelines/common/evaluate-default-paths.yml
 
       #
-      # Build CoreCLR checked
-      # Only when CoreCLR is changed
+      # Build CoreCLR verticals where we don't run host tests
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-job.yml
-          buildConfig: checked
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
           platforms:
-          - linux_x64
-          - linux_arm
-          - linux_arm64
-          - linux_riscv64
           - linux_musl_arm
           - linux_musl_arm64
-          - linux_musl_x64
-          - osx_arm64
-          - windows_x86
-          - windows_x64
           - windows_arm64
+          - linux_arm
           jobParameters:
-            testGroup: innerloop
+            nameSuffix: AllSubsets_CoreCLR_ReleaseRuntimeLibs
+            buildArgs: -s clr+libs+host+packs -rc Release -lc Release -c $(_BuildConfig)
+            timeoutInMinutes: 120
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
-      #
-      # Build the whole product using GNU compiler toolchain
-      # When CoreCLR, Mono, Libraries, Installer and src/tests are changed
-      #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
           jobTemplate: /eng/pipelines/common/global-build-job.yml
-          buildConfig: checked
+          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
           platforms:
-          - gcc_linux_x64
+          - osx_arm64
           jobParameters:
-            testGroup: innerloop
-            nameSuffix: Native_GCC
-            buildArgs: -s clr.native+libs.native+mono+host.native -c $(_BuildConfig) -gcc
+            nameSuffix: AllSubsets_CoreCLR
+            buildArgs: -s clr+libs+host+packs -rc Release -c Release -lc $(_BuildConfig)
+            timeoutInMinutes: 120
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
+          platforms:
+          - linux_arm64
+          - linux_musl_x64
+          jobParameters:
+            nameSuffix: AllSubsets_CoreCLR
+            buildArgs: -s clr+libs+host+packs -rc Release -c Release -lc $(_BuildConfig)
+            timeoutInMinutes: 120
             postBuildSteps:
-              - template: /eng/pipelines/common/templates/runtimes/build-runtime-tests.yml
+              - template: /eng/pipelines/installer/steps/build-linux-package.yml
                 parameters:
-                  testBuildArgs: skipmanaged skipgeneratelayout skiprestorepackages -gcc
+                  packageType: rpm
+                  target: rpmpkg
+                  packagingArgs: -c Release -lc $(_BuildConfig) /p:BuildRpmPackage=true
+                  condition: and(succeeded(), eq(variables.osSubgroup, ''))
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
-      # Build CoreCLR osx_x64 checked
-      # Only when CoreCLR or Libraries is changed
+      # Build CoreCLR and Libraries with Libraries tests
+      # For running libraries tests and installer tests
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-job.yml
-          buildConfig: checked
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
           platforms:
+          - linux_x64
+          - linux_musl_x64
           - osx_x64
+          - windows_x64
           jobParameters:
-            testGroup: innerloop
+            nameSuffix: CoreCLR_Libraries
+            buildArgs: -s clr+libs+libs.tests -rc Release -c $(_BuildConfig) /p:ArchiveTests=true
+            timeoutInMinutes: 120
+            postBuildSteps:
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: CoreCLR_Libraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/helix
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: CoreCLR_Libraries_TestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: release
+          platforms:
+          - windows_x86
+          jobParameters:
+            nameSuffix: CoreCLR_Libraries
+            buildArgs: -s clr+libs+libs.tests -c $(_BuildConfig) /p:ArchiveTests=true
+            timeoutInMinutes: 120
+            postBuildSteps:
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: CoreCLR_Libraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/helix
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: CoreCLR_Libraries_TestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
-      # Build CoreCLR release
-      # Always as they are needed by Installer and we always build and test the Installer.
+      # Build CoreCLR and Libraries with the respective tests
+      # for the test configurations we run.
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/coreclr/templates/build-job.yml
-          buildConfig: release
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
           platforms:
-          - osx_arm64
+          - linux_arm64
           - osx_x64
+          jobParameters:
+            nameSuffix: Libraries_CheckedCoreCLR
+            buildArgs: -s clr+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:ArchiveTests=true
+            timeoutInMinutes: 120
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: Libraries_CheckedCoreCLR_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/helix
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: Libraries_CheckedCoreCLR_TestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: innerloop
+                  configOverride: Checked
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
+          platforms:
           - linux_x64
-          - linux_arm
-          - linux_arm64
+          - windows_x64
+          jobParameters:
+            nameSuffix: Libraries_CheckedCoreCLR
+            buildArgs: -s clr+libs -c $(_BuildConfig) -rc Checked
+            timeoutInMinutes: 120
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: Libraries_CheckedCoreCLR_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: innerloop
+                  configOverride: Checked
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
+          platforms:
           - linux_musl_x64
+          - windows_x86
+          jobParameters:
+            nameSuffix: Libraries_CheckedCoreCLR
+            buildArgs: -s clr+libs+libs.tests -c $(_BuildConfig) -rc Checked /p:ArchiveTests=true
+            timeoutInMinutes: 120
+            postBuildSteps:
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: Libraries_CheckedCoreCLR_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/helix
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: Libraries_CheckedCoreCLR_TestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
+          platforms:
+          - osx_arm64
+          jobParameters:
+            nameSuffix: Libraries_CheckedCoreCLR
+            buildArgs: -s clr+libs -c $(_BuildConfig) -rc Checked
+            timeoutInMinutes: 120
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: Libraries_CheckedCoreCLR_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: innerloop
+                  configOverride: Checked
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr_AppleSilicon.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: checked
+          platforms:
+          - linux_arm
+          - windows_arm64
+          - windows_x86
+          jobParameters:
+            nameSuffix: CoreCLR_ReleaseLibraries
+            buildArgs: -s clr+libs -rc $(_BuildConfig) -c Release
+            timeoutInMinutes: 120
+            postBuildSteps:
+              - template: /eng/pipelines/coreclr/templates/build-native-test-assets-step.yml
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: CoreCLR_ReleaseLibraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/native-test-assets-variables.yml
+                parameters:
+                  testGroup: innerloop
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: checked
+          platforms:
+          - linux_x64
           - linux_musl_arm
           - linux_musl_arm64
           - windows_x64
-          - windows_x86
-          - windows_arm64
-          - freebsd_x64
+          jobParameters:
+            nameSuffix: CoreCLR_ReleaseLibraries
+            buildArgs: -s clr+libs+libs.tests -rc $(_BuildConfig) -c Release /p:ArchiveTests=true
+            timeoutInMinutes: 120
+            postBuildSteps:
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: CoreCLR_ReleaseLibraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+                  displayName: Build Assets
+              - template: /eng/pipelines/common/upload-artifact-step.yml
+                parameters:
+                  rootFolder: $(Build.SourcesDirectory)/artifacts/helix
+                  includeRootFolder: false
+                  archiveType: $(archiveType)
+                  archiveExtension: $(archiveExtension)
+                  tarCompression: $(tarCompression)
+                  artifactName: CoreCLR_ReleaseLibraries_TestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      #
+      # Build the whole product using GNU compiler toolchain
+      # When CoreCLR, Mono, Libraries, Installer and src/tests are changed
+      #
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: checked
+          platforms:
+          - gcc_linux_x64
           jobParameters:
             testGroup: innerloop
-            # Mono/runtimetests also need this, but skip for wasm
-            condition:
+            nameSuffix: Native_GCC
+            buildArgs: -s clr.native+libs.native+mono+host.native -c $(_BuildConfig) -gcc
+            postBuildSteps:
+              - template: /eng/pipelines/common/templates/runtimes/build-runtime-tests.yml
+                parameters:
+                  testBuildArgs: skipmanaged skipgeneratelayout skiprestorepackages -gcc
+            condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -189,7 +460,7 @@ extends:
                   eq(variables['Build.SourceBranchName'], 'main'),
                   eq(variables['System.PullRequest.TargetBranch'], 'main')),
                 or(
-                  eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr_jit.containsChange'], true),
+                  eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr_jit.containsChange'], true),
                   eq(variables['isRollingBuild'], true)))
 
       #
@@ -210,7 +481,7 @@ extends:
             timeoutInMinutes: 120
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -231,7 +502,49 @@ extends:
             timeoutInMinutes: 120
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      #
+      # Build CoreCLR without building test assets
+      #
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml
+          buildConfig: checked
+          runtimeFlavor: coreclr
+          platforms:
+          - linux_riscv64
+          jobParameters:
+            testScope: innerloop
+            nameSuffix: CoreCLR
+            buildArgs: -s clr.native+clr.tools+clr.corelib+clr.nativecorelib+clr.aot+clr.packages -c $(_BuildConfig)
+            timeoutInMinutes: 120
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      #
+      # Build CoreCLR + Libs + Host + Packs
+      #
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml
+          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
+          runtimeFlavor: coreclr
+          platforms:
+          - freebsd_x64
+          jobParameters:
+            testScope: innerloop
+            nameSuffix: CoreCLR
+            buildArgs: -s clr+libs+host+packs -c $(_BuildConfig) -rc Checked
+            timeoutInMinutes: 120
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -254,7 +567,7 @@ extends:
           jobParameters:
             timeoutInMinutes: 120
             nameSuffix: NativeAOT
-            buildArgs: -s clr.aot+host.native+libs -rc $(_BuildConfig) -lc Release -hc Release
+            buildArgs: -s clr.aot+host.native+libs -rc $(_BuildConfig) -lc Release -hc Release /p:RunAnalyzers=false
             postBuildSteps:
               - template: /eng/pipelines/coreclr/nativeaot-post-build-steps.yml
                 parameters:
@@ -269,9 +582,9 @@ extends:
                   liveLibrariesBuildConfig: Release
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -293,7 +606,7 @@ extends:
           jobParameters:
             timeoutInMinutes: 180
             nameSuffix: NativeAOT
-            buildArgs: -s clr.aot+host.native+libs.native+libs.sfx -rc $(_BuildConfig) -lc Release -hc Release
+            buildArgs: -s clr.aot+host.native+libs.native+libs.sfx -rc $(_BuildConfig) -lc Release -hc Release /p:RunAnalyzers=false
             postBuildSteps:
               - template: /eng/pipelines/coreclr/nativeaot-post-build-steps.yml
                 parameters:
@@ -308,9 +621,9 @@ extends:
                   liveLibrariesBuildConfig: Release
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -338,7 +651,7 @@ extends:
             testGroup: innerloop
             timeoutInMinutes: 120
             nameSuffix: NativeAOT
-            buildArgs: -s clr.aot+host.native+libs+tools.illink -c $(_BuildConfig) -rc $(_BuildConfig) -lc Release -hc Release
+            buildArgs: -s clr.aot+host.native+libs+tools.illink -c $(_BuildConfig) -rc $(_BuildConfig) -lc Release -hc Release /p:RunAnalyzers=false
             postBuildSteps:
               - template: /eng/pipelines/coreclr/nativeaot-post-build-steps.yml
                 parameters:
@@ -353,9 +666,9 @@ extends:
                   liveLibrariesBuildConfig: Release
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -375,7 +688,7 @@ extends:
             testGroup: innerloop
             isSingleFile: true
             nameSuffix: NativeAOT_Libraries
-            buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) /p:TestNativeAot=true /p:RunSmokeTestsOnly=true /p:ArchiveTests=true
+            buildArgs: -s clr.aot+host.native+libs+libs.tests -c $(_BuildConfig) /p:TestNativeAot=true /p:RunSmokeTestsOnly=true /p:ArchiveTests=true /p:RunAnalyzers=false
             timeoutInMinutes: 240 # Doesn't actually take long, but we've seen the ARM64 Helix queue often get backlogged for 2+ hours
             # extra steps, run tests
             postBuildSteps:
@@ -385,9 +698,9 @@ extends:
                   testRunNamePrefixSuffix: NativeAOT_$(_BuildConfig)
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       # Build and test clr tools
@@ -406,8 +719,8 @@ extends:
             # We want to run AOT tests when illink changes because there's share code and tests from illink which are used by AOT
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
       #
       # Build CrossDacs
@@ -432,7 +745,7 @@ extends:
               artifact: CoreCLRCrossDacArtifacts
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       # Build Mono AOT offset headers once, for consumption elsewhere
@@ -441,6 +754,7 @@ extends:
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
           jobTemplate: /eng/pipelines/mono/templates/generate-offsets.yml
+          templatePath: 'templates'
           buildConfig: release
           platforms:
           - android_x64
@@ -454,8 +768,8 @@ extends:
             # needed by crossaot
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       # Build the whole product using Mono runtime
@@ -475,9 +789,9 @@ extends:
             buildArgs: -s mono+libs+host+packs -c $(_BuildConfig)
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       - template: /eng/pipelines/common/platform-matrix.yml
@@ -494,9 +808,9 @@ extends:
             buildArgs: -s mono+libs+host+packs -c $(_BuildConfig)
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -647,9 +961,9 @@ extends:
           variables:
             # map dependencies variables to local variables
             - name: librariesContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
             - name: monoContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
           jobParameters:
             testGroup: innerloop
             nameSuffix: AllSubsets_Mono
@@ -657,9 +971,9 @@ extends:
             timeoutInMinutes: 480
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
             # extra steps, run tests
             postBuildSteps:
@@ -689,9 +1003,9 @@ extends:
           variables:
             # map dependencies variables to local variables
             - name: librariesContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
             - name: monoContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
           jobParameters:
             testGroup: innerloop
             nameSuffix: AllSubsets_Mono
@@ -699,9 +1013,9 @@ extends:
             timeoutInMinutes: 480
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
             # extra steps, run tests
             postBuildSteps:
@@ -732,9 +1046,9 @@ extends:
           variables:
             # map dependencies variables to local variables
             - name: librariesContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
             - name: coreclrContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'] ]
           jobParameters:
             testGroup: innerloop
             nameSuffix: AllSubsets_NativeAOT
@@ -742,9 +1056,9 @@ extends:
             timeoutInMinutes: 180
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
             # extra steps, run tests
             postBuildSteps:
@@ -776,9 +1090,9 @@ extends:
           variables:
             # map dependencies variables to local variables
             - name: librariesContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'] ]
             - name: monoContainsChange
-              value: $[ dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
+              value: $[ stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'] ]
           jobParameters:
             testGroup: innerloop
             nameSuffix: AllSubsets_Mono
@@ -786,9 +1100,9 @@ extends:
             timeoutInMinutes: 180
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
             # extra steps, run tests
             postBuildSteps:
@@ -819,9 +1133,9 @@ extends:
                       /p:MonoEnableLLVM=true /p:MonoBundleLLVMOptimizer=false
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       - template: /eng/pipelines/common/platform-matrix.yml
@@ -839,9 +1153,9 @@ extends:
                       /p:MonoEnableLLVM=true /p:MonoBundleLLVMOptimizer=false
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -862,9 +1176,9 @@ extends:
                       /p:MonoEnableLLVM=true /p:MonoAOTEnableLLVM=true /p:MonoBundleLLVMOptimizer=true
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       - template: /eng/pipelines/common/platform-matrix.yml
@@ -881,9 +1195,9 @@ extends:
                       /p:MonoEnableLLVM=true /p:MonoAOTEnableLLVM=true /p:MonoBundleLLVMOptimizer=true
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -892,7 +1206,7 @@ extends:
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/mono/templates/build-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           runtimeFlavor: mono
           buildConfig: debug
           platforms:
@@ -905,18 +1219,19 @@ extends:
           - windows_x86
           # - windows_arm64
           jobParameters:
+            nameSuffix: Mono_Runtime
+            buildArgs: -s mono -c $(_BuildConfig)
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
       # Build Mono release AOT cross-compilers
-      # Only when mono changed
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/mono/templates/build-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           runtimeFlavor: mono
           buildConfig: release
           platforms:
@@ -924,9 +1239,37 @@ extends:
           - linux_musl_x64
           - linux_arm64
           - linux_musl_arm64
+          jobParameters:
+            buildArgs: -s mono+packs -c $(_BuildConfig)
+                      /p:MonoCrossAOTTargetOS=android+browser+wasi /p:SkipMonoCrossJitConfigure=true /p:BuildMonoAOTCrossCompilerOnly=true
+            nameSuffix: CrossAOT_Mono
+            runtimeVariant: crossaot
+            dependsOn:
+            - mono_android_offsets
+            - mono_browser_offsets
+            - mono_wasi_offsets
+            monoCrossAOTTargetOS:
+            - android
+            - browser
+            - wasi
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          runtimeFlavor: mono
+          buildConfig: release
+          platforms:
           - windows_arm64
           - windows_x64
           jobParameters:
+            buildArgs: -s mono+packs -c $(_BuildConfig)
+                      /p:MonoCrossAOTTargetOS=android+browser+wasi /p:SkipMonoCrossJitConfigure=true /p:BuildMonoAOTCrossCompilerOnly=true
+            nameSuffix: CrossAOT_Mono
             runtimeVariant: crossaot
             dependsOn:
             - mono_android_offsets
@@ -938,19 +1281,26 @@ extends:
             - wasi
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
+      #
+      # Build Mono release AOT cross-compilers
+      # Only when mono changed
+      #
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/mono/templates/build-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           runtimeFlavor: mono
           buildConfig: release
           platforms:
           - osx_x64
           - osx_arm64
           jobParameters:
+            buildArgs: -s mono+packs -c $(_BuildConfig)
+                      /p:MonoCrossAOTTargetOS=android+browser+wasi+tvos+ios+maccatalyst /p:SkipMonoCrossJitConfigure=true /p:BuildMonoAOTCrossCompilerOnly=true
+            nameSuffix: CrossAOT_Mono
             runtimeVariant: crossaot
             dependsOn:
             - mono_android_offsets
@@ -968,153 +1318,10 @@ extends:
             - maccatalyst
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
-      #
-      # Build Mono release
-      # Only when libraries or mono changed
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/mono/templates/build-job.yml
-          runtimeFlavor: mono
-          buildConfig: release
-          platforms:
-          - linux_x64
-          # - linux_musl_arm64
-          - windows_x64
-          - windows_x86
-          # - windows_arm64
-          jobParameters:
-            condition: >-
-              or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(variables['isRollingBuild'], true))
-
-      #
-      # Build Mono release
-      # Only when libraries, mono, or the runtime tests changed
-      # Currently only these architectures are needed for the runtime tests.
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/mono/templates/build-job.yml
-          runtimeFlavor: mono
-          buildConfig: release
-          platforms:
-          - osx_x64
-          - linux_arm64
-          jobParameters:
-            condition: >-
-              or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(variables['isRollingBuild'], true))
-
-      #
-      # Build Mono release with LLVM AOT
-      # Only when mono, or the runtime tests changed
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/mono/templates/build-job.yml
-          runtimeFlavor: mono
-          buildConfig: release
-          platforms:
-          - linux_x64
-          - linux_arm64
-          jobParameters:
-            runtimeVariant: llvmaot
-            condition: >-
-              or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(variables['isRollingBuild'], true))
-
-      #
-      # Build libraries using live CoreLib
-      # These set of libraries are built always no matter what changed
-      # The reason for that is because Corelib and Installer needs it and
-      # These are part of the test matrix for Libraries changes.
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/libraries/build-job.yml
-          buildConfig: Release
-          platforms:
-          - linux_arm
-          - linux_musl_arm
-          - linux_musl_arm64
-          - windows_arm64
-          - windows_x86
-          jobParameters:
-            condition:
-              or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(variables['isRollingBuild'], true))
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/libraries/build-job.yml
-          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
-          platforms:
-          - linux_arm64
-          - linux_musl_x64
-          - linux_x64
-          - osx_arm64
-          - osx_x64
-          - windows_x64
-          - freebsd_x64
-          jobParameters:
-            testScope: innerloop
-            condition:
-              or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_installer.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(variables['isRollingBuild'], true))
-
-      #
-      # Libraries debug build that only runs when coreclr is changed
-      # Only do this on PR builds since we use the Release builds for these test runs in CI
-      # and those are already built above
-      #
-      - ${{ if eq(variables['isRollingBuild'], false) }}:
-        - template: /eng/pipelines/common/platform-matrix.yml
-          parameters:
-            jobTemplate: /eng/pipelines/libraries/build-job.yml
-            buildConfig: Debug
-            platforms:
-            - windows_x86
-            jobParameters:
-              condition: >-
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true)
-
-      #
-      # Libraries release build that only runs when coreclr is changed in PRs
-      # We need these for checked coreclr + release libraries tests runs.
-      #
-      - ${{ if eq(variables['isRollingBuild'], false) }}:
-        - template: /eng/pipelines/common/platform-matrix.yml
-          parameters:
-            jobTemplate: /eng/pipelines/libraries/build-job.yml
-            buildConfig: Release
-            platforms:
-            - linux_x64
-            - windows_x64
-            jobParameters:
-              condition: >-
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true)
-
       #
       # Build and test libraries for .NET Framework
       #
@@ -1138,7 +1345,7 @@ extends:
                   extraHelixArguments: /p:BuildTargetFramework=net48
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -1156,8 +1363,8 @@ extends:
             timeoutInMinutes: 150
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_tools_illink.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -1167,158 +1374,112 @@ extends:
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/installer/jobs/build-job.yml
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
           platforms:
-            - linux_musl_arm
-            - linux_musl_arm64
             - windows_x86
-            - windows_arm64
-            - linux_arm
           jobParameters:
-            liveRuntimeBuildConfig: release
-            liveLibrariesBuildConfig: Release
-            runOnlyIfDependenciesSucceeded: true
+            nameSuffix: Installer_Build_And_Test
+            buildArgs: -s host+packs -c $(_BuildConfig) -lc Release -rc Release -test
+            dependsOnGlobalBuilds:
+              - nameSuffix: CoreCLR_Libraries
+                buildConfig: release
+            preBuildSteps:
+              - template: /eng/pipelines/common/download-artifact-step.yml
+                parameters:
+                  artifactName: CoreCLR_Libraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Release
+                  artifactFileName: CoreCLR_Libraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Release$(archiveExtension)
+                  unpackFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  displayName: 'unified artifacts'
+            enablePublishTestResults: true
+            testRunTitle: Installer-$(osGroup)$(osSubgroup)_$(archType)
+            postBuildSteps:
+              - template: /eng/pipelines/installer/steps/upload-job-artifacts.yml
+                parameters:
+                  name: $(osGroup)$(osSubgroup)_$(archType)
             condition:
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/installer/jobs/build-job.yml
-          buildConfig: Release
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          buildConfig: release
           platforms:
-            - osx_arm64
+            - windows_x64
             - osx_x64
             - linux_x64
-            - linux_arm64
-            - linux_musl_x64
-            - windows_x64
-            - freebsd_x64
           jobParameters:
-            liveRuntimeBuildConfig: release
-            liveLibrariesBuildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
-            runOnlyIfDependenciesSucceeded: true
+            nameSuffix: Installer_Build_And_Test
+            buildArgs: -s host+packs -c $(_BuildConfig) -lc ${{ variables.debugOnPrReleaseOnRolling }} -rc Release -test
+            dependsOnGlobalBuilds:
+              - nameSuffix: CoreCLR_Libraries
+                buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
+            preBuildSteps:
+              - template: /eng/pipelines/common/download-artifact-step.yml
+                parameters:
+                  artifactName: CoreCLR_Libraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(debugOnPrReleaseOnRolling)
+                  artifactFileName: CoreCLR_Libraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(debugOnPrReleaseOnRolling)$(archiveExtension)
+                  unpackFolder: $(Build.SourcesDirectory)/artifacts/bin
+                  displayName: 'unified artifacts'
+            enablePublishTestResults: true
+            testRunTitle: Installer-$(osGroup)$(osSubgroup)_$(archType)
+            postBuildSteps:
+              - template: /eng/pipelines/installer/steps/upload-job-artifacts.yml
+                parameters:
+                  name: $(osGroup)$(osSubgroup)_$(archType)
+              - template: /eng/pipelines/installer/steps/build-linux-package.yml
+                parameters:
+                  packageType: deb
+                  target: debpkg
+                  packagingArgs: -c $(_BuildConfig) -lc ${{ variables.debugOnPrReleaseOnRolling }} /p:BuildDebPackage=true
+              - template: /eng/pipelines/installer/steps/build-linux-package.yml
+                parameters:
+                  packageType: rpm
+                  target: rpmpkg
+                  packagingArgs: -c $(_BuildConfig) -lc ${{ variables.debugOnPrReleaseOnRolling }} /p:BuildRpmPackage=true
             condition:
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_non_mono_and_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
-      # CoreCLR Test builds using live libraries release build
-      # Only when CoreCLR is changed
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: checked
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: innerloop
-            condition: >-
-              or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(variables['isRollingBuild'], true))
-
-      #
-      # CoreCLR Test executions using live libraries
-      # Only when CoreCLR is changed
+      # Build the whole product using Mono and run runtime tests
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - linux_arm
-          - windows_x86
-          - windows_arm64
-          helixQueueGroup: pr
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: innerloop
-            liveLibrariesBuildConfig: Release
-            condition: >-
-              or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(variables['isRollingBuild'], true))
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_x64
-          - linux_x64
-          - linux_arm64
-          - windows_x64
-          helixQueueGroup: pr
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: innerloop
-            liveLibrariesBuildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
-            condition: >-
-              or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(variables['isRollingBuild'], true))
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: checked
-          platforms:
-          - osx_arm64
-          helixQueueGroup: pr
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
           helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
-          jobParameters:
-            testGroup: innerloop
-            liveLibrariesBuildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
-            condition: >-
-              or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr_AppleSilicon.containsChange'], true),
-                eq(variables['isRollingBuild'], true))
-
-      #
-      # Mono Test builds with CoreCLR runtime tests using live libraries debug build
-      # Only when Mono is changed
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
-          buildConfig: release
-          runtimeFlavor: mono
-          platforms:
-          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
-          jobParameters:
-            testGroup: innerloop
-            condition: >-
-              or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(variables['isRollingBuild'], true))
-
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
-          buildConfig: release
+          buildConfig: Release
           runtimeFlavor: mono
           platforms:
-          - windows_x64
-          helixQueueGroup: pr
-          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+            - osx_x64
+            - linux_arm64
+          variables:
+            - name: timeoutPerTestInMinutes
+              value: 60
+            - name: timeoutPerTestCollectionInMinutes
+              value: 180
           jobParameters:
             testGroup: innerloop
-            liveLibrariesBuildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
-            liveRuntimeBuildConfig: release
+            nameSuffix: AllSubsets_Mono_Minijit_RuntimeTests
             runtimeVariant: minijit
+            buildArgs: -s mono+libs+clr.hosts+clr.iltools -c Release
+            timeoutInMinutes: 180
             condition: >-
-              or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
-                eq(variables['isRollingBuild'], true))
+                  or(
+                    eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                    eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                    eq(variables['isRollingBuild'], true))
+
+            postBuildSteps:
+              - template: /eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  testRunNamePrefixSuffix: Mono_Release
+            extraVariablesTemplates:
+              - template: /eng/pipelines/common/templates/runtimes/test-variables.yml
 
       #
       # Build the whole product using Mono and run runtime tests
@@ -1330,8 +1491,7 @@ extends:
           buildConfig: Release
           runtimeFlavor: mono
           platforms:
-            - osx_x64
-            - linux_arm64
+            - windows_x64
           variables:
             - name: timeoutPerTestInMinutes
               value: 60
@@ -1341,12 +1501,12 @@ extends:
             testGroup: innerloop
             nameSuffix: AllSubsets_Mono_Minijit_RuntimeTests
             runtimeVariant: minijit
-            buildArgs: -s mono+libs+clr.hosts+clr.iltools -c Release
+            buildArgs: -s mono+libs+clr.hosts+clr.iltools -c Release -lc ${{ variables.debugOnPrReleaseOnRolling }}
             timeoutInMinutes: 180
             condition: >-
                   or(
-                    eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                    eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                    eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                    eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
                     eq(variables['isRollingBuild'], true))
 
             postBuildSteps:
@@ -1356,6 +1516,8 @@ extends:
                   testRunNamePrefixSuffix: Mono_Release
             extraVariablesTemplates:
               - template: /eng/pipelines/common/templates/runtimes/test-variables.yml
+                parameters:
+                  liveLibrariesBuildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
 
       #
       # Mono CoreCLR runtime Test executions using live libraries in interpreter mode
@@ -1382,8 +1544,8 @@ extends:
             timeoutInMinutes: 180
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
             postBuildSteps:
               - template: /eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml
@@ -1420,8 +1582,8 @@ extends:
 
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
             postBuildSteps:
               - template: /eng/pipelines/common/templates/runtimes/build-runtime-tests-and-send-to-helix.yml
@@ -1433,55 +1595,88 @@ extends:
               - template: /eng/pipelines/common/templates/runtimes/test-variables.yml
 
       #
-      # Libraries Release Test Execution against a release mono runtime.
-      # Only when libraries or mono changed
+      # CoreCLR Test builds using live libraries release build
+      # Only when CoreCLR is changed
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/libraries/run-test-job.yml
-          runtimeFlavor: mono
-          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
+          jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml
+          buildConfig: checked
           platforms:
-          # - windows_x64
-          - osx_x64
-          - linux_arm64
-          - linux_x64
-          helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml
+          - CoreClrTestBuildHost # Either osx_x64 or linux_x64
           jobParameters:
-            isOfficialBuild: false
-            runtimeDisplayName: mono
-            testScope: innerloop
-            liveRuntimeBuildConfig: release
+            testGroup: innerloop
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
-      # Libraries Release Test Execution against a release mono interpreter runtime.
-      # Only when libraries or mono changed
+      # CoreCLR Test executions using live libraries
+      # Only when CoreCLR is changed
       #
       - template: /eng/pipelines/common/platform-matrix.yml
         parameters:
-          jobTemplate: /eng/pipelines/libraries/run-test-job.yml
-          runtimeFlavor: mono
-          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
+          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
+          buildConfig: checked
+          platforms:
+          - linux_arm
+          - windows_x86
+          - windows_arm64
+          helixQueueGroup: pr
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+          jobParameters:
+            testGroup: innerloop
+            liveLibrariesBuildConfig: Release
+            unifiedArtifactsName: CoreCLR_ReleaseLibraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            unifiedBuildNameSuffix: CoreCLR_ReleaseLibraries
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
+          buildConfig: checked
           platforms:
-          # - windows_x64
-          #- osx_x64
+          - osx_x64
           - linux_x64
-          helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml
+          - linux_arm64
+          - windows_x64
+          helixQueueGroup: pr
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
           jobParameters:
-            isOfficialBuild: false
-            interpreter: true
-            runtimeDisplayName: mono_interpreter
-            testScope: innerloop
-            liveRuntimeBuildConfig: release
+            testGroup: innerloop
+            liveLibrariesBuildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
+            unifiedArtifactsName: Libraries_CheckedCoreCLR_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(debugOnPrReleaseOnRolling)
+            unifiedBuildNameSuffix: Libraries_CheckedCoreCLR
+            unifiedBuildConfigOverride: ${{ variables.debugOnPrReleaseOnRolling }}
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_runtimetests.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
+          buildConfig: checked
+          platforms:
+          - osx_arm64
+          helixQueueGroup: pr
+          helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+          jobParameters:
+            testGroup: innerloop
+            liveLibrariesBuildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
+            unifiedArtifactsName: Libraries_CheckedCoreCLR_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(debugOnPrReleaseOnRolling)
+            unifiedBuildNameSuffix: Libraries_CheckedCoreCLR
+            unifiedBuildConfigOverride: ${{ variables.debugOnPrReleaseOnRolling }}
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr_AppleSilicon.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -1499,9 +1694,12 @@ extends:
             isOfficialBuild: false
             testScope: innerloop
             liveRuntimeBuildConfig: release
+            unifiedArtifactsName: CoreCLR_Libraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            helixArtifactsName: CoreCLR_Libraries_TestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            unifiedBuildNameSuffix: CoreCLR_Libraries
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -1522,11 +1720,14 @@ extends:
             isOfficialBuild: false
             testScope: innerloop
             liveRuntimeBuildConfig: release
+            unifiedArtifactsName: CoreCLR_Libraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(debugOnPrReleaseOnRolling)
+            helixArtifactsName: CoreCLR_Libraries_TestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(debugOnPrReleaseOnRolling)
+            unifiedBuildNameSuffix: CoreCLR_Libraries
+            unifiedBuildConfigOverride: ${{ variables.debugOnPrReleaseOnRolling }}
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
-
       # The next three jobs run checked coreclr + <either debug or release in PR> libraries tests.
       # The matrix looks like the following, where the right columns specify which configurations
       # the libraries tests are built in.
@@ -1559,9 +1760,12 @@ extends:
           jobParameters:
             testScope: innerloop
             liveRuntimeBuildConfig: checked
+            unifiedArtifactsName: Libraries_CheckedCoreCLR_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            helixArtifactsName: Libraries_CheckedCoreCLR_TestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            unifiedBuildNameSuffix: Libraries_CheckedCoreCLR
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
@@ -1582,9 +1786,13 @@ extends:
           jobParameters:
             testScope: innerloop
             liveRuntimeBuildConfig: checked
+            unifiedArtifactsName: CoreCLR_ReleaseLibraries_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked
+            helixArtifactsName: CoreCLR_ReleaseLibraries_TestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_Checked
+            unifiedBuildNameSuffix: CoreCLR_ReleaseLibraries
+            unifiedBuildConfigOverride: checked
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       - template: /eng/pipelines/common/platform-matrix.yml
@@ -1598,10 +1806,76 @@ extends:
           jobParameters:
             testScope: innerloop
             liveRuntimeBuildConfig: checked
+            unifiedArtifactsName: Libraries_CheckedCoreCLR_BuildArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            helixArtifactsName: Libraries_CheckedCoreCLR_TestArtifacts_$(osGroup)$(osSubgroup)_$(archType)_$(_BuildConfig)
+            unifiedBuildNameSuffix: Libraries_CheckedCoreCLR
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      #
+      # Build and test Mono Interpreter with the libraries testss
+      #
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml
+          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
+          runtimeFlavor: mono
+          runtimeVariant: monointerpreter
+          platforms:
+            - linux_x64
+            #- osx_x64
+            #- windows_x64
+          jobParameters:
+            testGroup: innerloop
+            nameSuffix: Mono_Interpreter_LibrariesTests
+            buildArgs: -s mono+libs+libs.tests -rc Release -c $(_BuildConfig) /p:ArchiveTests=true
+            timeoutInMinutes: 480
+            # extra steps, run tests
+            postBuildSteps:
+              - template: /eng/pipelines/libraries/helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  testRunNamePrefixSuffix: Mono_Interpreter_$(_BuildConfig)
+                  interpreter: true
+            condition: >-
+              or(
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
+                eq(variables['isRollingBuild'], true))
+
+      #
+      # Build and test Mono Minijit with the libraries testss
+      #
+      - template: /eng/pipelines/common/platform-matrix.yml
+        parameters:
+          jobTemplate: /eng/pipelines/common/global-build-job.yml
+          helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml
+          buildConfig: ${{ variables.debugOnPrReleaseOnRolling }}
+          runtimeFlavor: mono
+          platforms:
+            - linux_arm64
+            - linux_x64
+            - osx_x64
+            #- windows_x64
+          jobParameters:
+            testGroup: innerloop
+            nameSuffix: Mono_MiniJIT_LibrariesTests
+            buildArgs: -s mono+libs+libs.tests -rc Release -c $(_BuildConfig) /p:ArchiveTests=true
+            timeoutInMinutes: 480
+            # extra steps, run tests
+            postBuildSteps:
+              - template: /eng/pipelines/libraries/helix.yml
+                parameters:
+                  creator: dotnet-bot
+                  testRunNamePrefixSuffix: Mono_Minijit_$(_BuildConfig)
             condition: >-
               or(
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_coreclr.containsChange'], true),
-                eq(dependencies.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_libraries.containsChange'], true),
+                eq(stageDependencies.EvaluatePaths.evaluate_paths.outputs['SetPathVars_mono_excluding_wasm.containsChange'], true),
                 eq(variables['isRollingBuild'], true))
 
       #
diff --git a/eng/pipelines/runtimelab.yml b/eng/pipelines/runtimelab.yml
index 351a1ec04671..4fa6320f178a 100644
--- a/eng/pipelines/runtimelab.yml
+++ b/eng/pipelines/runtimelab.yml
@@ -49,85 +49,96 @@ extends:
     stages:
     - stage: Build
       jobs:
-      #
-      # Build with Release libraries and Debug runtime
-      #
-      - ${{ if ne(variables.isOfficialBuild, true) }}:
-        - template: /eng/pipelines/common/platform-matrix.yml
-          parameters:
-            jobTemplate: /eng/pipelines/common/global-build-job.yml
-            buildConfig: Debug
-            platforms:
-          # - Linux_x64
-            - windows_x64
-            - OSX_x64
-            - Browser_wasm_win
-            - wasi_wasm_win
-            jobParameters:
-              timeoutInMinutes: 300
-              testGroup: innerloop
-              buildArgs: -s clr.aot+libs+nativeaot.packages -lc Release -rc Debug
-              postBuildSteps:
-                - template: /eng/pipelines/runtimelab/runtimelab-post-build-steps.yml
-                  parameters:
-                    librariesConfiguration: Release
 
-      #
-      # Build with Debug libraries and Checked runtime
-      #
-      - ${{ if ne(variables.isOfficialBuild, true) }}:
-        - template: /eng/pipelines/common/platform-matrix.yml
-          parameters:
-            jobTemplate: /eng/pipelines/common/global-build-job.yml
-            buildConfig: Checked
-            platforms:
-          # - linux_x64
-            - windows_x64
-            jobParameters:
-              timeoutInMinutes: 100
-              testGroup: innerloop
-              buildArgs: -s clr+libs+host+packs -c debug -runtimeConfiguration Checked
-              postBuildSteps:
-                - template: /eng/pipelines/runtimelab/runtimelab-post-build-steps.yml
-                  parameters:
-                    uploadRuntimeTests: true
-                    librariesConfiguration: Debug
+        - ${{ if ne(variables.isOfficialBuild, true) }}:
+          #
+          # Build and test with Debug libraries and Debug runtime
+          #
+          - template: /eng/pipelines/common/platform-matrix.yml
+            parameters:
+              jobTemplate: /eng/pipelines/common/global-build-job.yml
+              helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+              buildConfig: debug
+              platforms:
+#              - linux_x64
+              - osx_x64
+              - windows_x64
+              - Browser_wasm_win
+              - wasi_wasm_win
+              jobParameters:
+                timeoutInMinutes: 300
+                buildArgs: -s clr.aot+libs+nativeaot.packages -c debug -rc $(_BuildConfig)
+                postBuildSteps:
+                  - template: /eng/pipelines/runtimelab/runtimelab-post-build-steps.yml
+                    parameters:
+                      librariesConfiguration: Debug
 
-      #
-      # Build with Release libraries and Release runtime (used for official builds) - Wasm
-      #
-      - template: /eng/pipelines/common/platform-matrix.yml
-        parameters:
-          jobTemplate: /eng/pipelines/common/global-build-job.yml
-          buildConfig: Release
-          platforms:
-    #     - linux_x64        # Tests fail with a few errors, e.g. System.BadImageFormatException: Read out of bounds.  TODO-LLVM try to reinstate when more merged
-    #     - linux_musl_x64
-    #     - linux_arm64      # ILCompiler for LLVM depends on libLLVM.runtime.linux-arm64 with version (>= 11.0.0) which is missing https://github.com/microsoft/LLVMSharp/issues/177. TODO: reinstate when we remove LLVMSharp dependency
-    #     - linux_arm        # ILCompiler for LLVM depends on libLLVM.runtime.linux-arm64 with version (>= 11.0.0) which is missing https://github.com/microsoft/LLVMSharp/issues/177. TODO: reinstate when we remove LLVMSharp dependency
-    #     - linux_musl_arm64 # ILCompiler for LLVM depends on libLLVM.runtime.linux-arm64 with version (>= 11.0.0) which is missing https://github.com/microsoft/LLVMSharp/issues/177. TODO: reinstate when we remove LLVMSharp dependency
-    #     - windows_x64      # Part of the combined (target + host) WASM build below
-          - windows_arm64
-          - OSX_x64
-          - Browser_wasm_win
-          - wasi_wasm_win
-          jobParameters:
-            timeoutInMinutes: 300
-            isOfficialBuild: ${{ variables.isOfficialBuild }}
-            testGroup: innerloop
-            postBuildSteps:
-              - template: /eng/pipelines/runtimelab/runtimelab-post-build-steps.yml
-                parameters:
-                  isOfficialBuild: ${{ variables.isOfficialBuild }}
-                  uploadLibrariesTests: ${{ eq(variables.isOfficialBuild, false) }}
-                  uploadIntermediateArtifacts: ${{ variables.isOfficialBuild }}
-                  librariesConfiguration: Release
-            ${{ if eq(variables.isOfficialBuild, false) }}:
-              buildArgs: -s clr.aot+libs+nativeaot.packages -c $(_BuildConfig) /p:ArchiveTests=true
-            ${{ if eq(variables.isOfficialBuild, true) }}:
-              buildArgs: -s clr.aot+libs+nativeaot.packages -c $(_BuildConfig)
+          #
+          # Build and test with Debug libraries and Checked runtime
+          #
+          - template: /eng/pipelines/common/platform-matrix.yml
+            parameters:
+              jobTemplate: /eng/pipelines/common/global-build-job.yml
+              helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
+              buildConfig: checked
+              platforms:
+#              - linux_x64
+              - windows_x64
+              jobParameters:
+                timeoutInMinutes: 300
+                buildArgs: -s clr.aot+libs+nativeaot.packages -c debug -rc $(_BuildConfig)
+                postBuildSteps:
+                  - template: /eng/pipelines/runtimelab/runtimelab-post-build-steps.yml
+                    parameters:
+                      uploadRuntimeTests: true
+                      librariesConfiguration: Debug
+
+          #
+          # Build and test with Release libraries and Release runtime
+          #
+          - template: /eng/pipelines/common/platform-matrix.yml
+            parameters:
+              jobTemplate: /eng/pipelines/common/global-build-job.yml
+              helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml
+              buildConfig: release
+              platforms:
+#              - linux_x64
+              - osx_x64
+              - windows_x64
+              - Browser_wasm_win
+              - wasi_wasm_win
+              jobParameters:
+                timeoutInMinutes: 300
+                buildArgs: -s  clr.aot+libs+nativeaot.packages -c $(_BuildConfig) /p:ArchiveTests=true
+                postBuildSteps:
+                  - template: /eng/pipelines/runtimelab/runtimelab-post-build-steps.yml
+                    parameters:
+                      librariesConfiguration: Release
+
+        - ${{ else }}:
+          #
+          # Build the whole product with Release CoreCLR
+          #
+          - template: /eng/pipelines/common/platform-matrix.yml
+            parameters:
+              jobTemplate: /eng/pipelines/common/global-build-job.yml
+              helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml
+              buildConfig: release
+              platforms:
+#              - linux_x64
+              - windows_x64
+              jobParameters:
+                isOfficialBuild: true
+                timeoutInMinutes: 380
+                buildArgs: -s clr+libs+hosts+packs -c $(_BuildConfig)
+                postBuildSteps:
+                  - template: /eng/pipelines/runtimelab/runtimelab-post-build-steps.yml
+                    parameters:
+                      isOfficialBuild: true
+                      uploadIntermediateArtifacts: true
+                      librariesConfiguration: Release
 
     - ${{ if eq(variables.isOfficialBuild, true) }}:
       - template: /eng/pipelines/official/stages/publish.yml
         parameters:
-          isOfficialBuild: ${{ variables.isOfficialBuild }}
+          isOfficialBuild: true
diff --git a/eng/pipelines/runtimelab/install-llvm.ps1 b/eng/pipelines/runtimelab/install-llvm.ps1
index f09201118b96..bccc9d980e5c 100644
--- a/eng/pipelines/runtimelab/install-llvm.ps1
+++ b/eng/pipelines/runtimelab/install-llvm.ps1
@@ -22,7 +22,7 @@ if (!(gcm cmake -ErrorAction SilentlyContinue))
 if (!$NoClone)
 {
     $LlvmProjectTag = "llvmorg-17.0.4"
-    $DepthOption = if ($CI) {"--depth","1"} {}
+    $DepthOption = if ($CI) {"--depth","1"} else {}
     git clone https://github.com/llvm/llvm-project --branch $LlvmProjectTag $DepthOption
 }
 elseif (!(Test-Path llvm-project))
@@ -72,7 +72,7 @@ foreach ($Config in $Configs)
     }
     else
     {
-        $LlvmCmakeConfigEnvVarName = if ($Config -eq "Release") {"LLVM_CMAKE_CONFIG_RELEASE"} {"LLVM_CMAKE_CONFIG_DEBUG"}
+        $LlvmCmakeConfigEnvVarName = if ($Config -eq "Release") {"LLVM_CMAKE_CONFIG_RELEASE"} else {"LLVM_CMAKE_CONFIG_DEBUG"}
     }
 
     Write-Host "Setting $LlvmCmakeConfigEnvVarName to '$LlvmCmakeConfigPath'"
diff --git a/eng/pipelines/runtimelab/runtimelab-post-build-steps.yml b/eng/pipelines/runtimelab/runtimelab-post-build-steps.yml
index b71811c40781..0498bcd4d5e9 100644
--- a/eng/pipelines/runtimelab/runtimelab-post-build-steps.yml
+++ b/eng/pipelines/runtimelab/runtimelab-post-build-steps.yml
@@ -29,12 +29,12 @@ steps:
   - ${{ if eq(parameters.platform, 'browser_wasm_win') }}:
     - script: |
         call $(Build.SourcesDirectory)\wasm-tools\emsdk\emsdk_env
-        $(Build.SourcesDirectory)/src/tests/build$(scriptExt) nativeaot $(buildConfigUpper) ${{ parameters.archType }} tree nativeaot
+        $(Build.SourcesDirectory)/src/tests/build$(scriptExt) nativeaot $(buildConfigUpper) ${{ parameters.archType }} tree nativeaot /p:LibrariesConfiguration=${{ parameters.librariesConfiguration }}
       displayName: Build WebAssembly tests
   - ${{ elseif eq(parameters.platform, 'wasi_wasm_win') }}:
     - script: |
         call $(Build.SourcesDirectory)\wasm-tools\emsdk\emsdk_env
-        $(Build.SourcesDirectory)/src/tests/build$(scriptExt) nativeaot $(buildConfigUpper) ${{ parameters.archType }} wasi tree nativeaot
+        $(Build.SourcesDirectory)/src/tests/build$(scriptExt) nativeaot $(buildConfigUpper) ${{ parameters.archType }} wasi tree nativeaot /p:LibrariesConfiguration=${{ parameters.librariesConfiguration }}
       displayName: Build WebAssembly tests
 
   - ${{ elseif eq(parameters.osGroup, 'windows') }}:
diff --git a/eng/resolveContract.targets b/eng/resolveContract.targets
index b90fa5de28a7..fb93fcd09e9a 100644
--- a/eng/resolveContract.targets
+++ b/eng/resolveContract.targets
@@ -51,6 +51,7 @@
   <PropertyGroup>
     <ApiCompatContractItemName>ResolvedMatchingContract</ApiCompatContractItemName>
     <ApiCompatStrictMode Condition="'$(ApiCompatStrictMode)' == ''">true</ApiCompatStrictMode>
+    <ApiCompatUseRoslynToolsetPackagePath Condition="'$(DotNetBuildSourceOnly)' == 'true'">true</ApiCompatUseRoslynToolsetPackagePath>
 
     <!-- Optional rules -->
     <ApiCompatEnableRuleAttributesMustMatch>true</ApiCompatEnableRuleAttributesMustMatch>
@@ -75,7 +76,7 @@
        all the inputs available, some suppressions might only apply to one or the other and hence unnecessary
        suppressions can't be determined.
        Disable the validation under source build as that might use an out-of-date SDK and not the ApiCompat.Task package. -->
-  <PropertyGroup Condition="('$(IsPackable)' == 'true' and '$(IsRuntimeAndReferenceAssembly)' != 'true') or '$(DotNetBuildFromSource)' == 'true'">
+  <PropertyGroup Condition="('$(IsPackable)' == 'true' and '$(IsRuntimeAndReferenceAssembly)' != 'true') or '$(DotNetBuildSourceOnly)' == 'true'">
     <ApiCompatPreserveUnnecessarySuppressions>true</ApiCompatPreserveUnnecessarySuppressions>
     <ApiCompatPermitUnnecessarySuppressions>true</ApiCompatPermitUnnecessarySuppressions>
   </PropertyGroup>
@@ -136,7 +137,7 @@
                       Version="$(MicrosoftDotNetGenApiVersion)"
                       PrivateAssets="all"
                       IsImplicitlyDefined="true"
-                      Condition="'$(DotNetBuildFromSource)' != 'true'" />
+                      Condition="'$(DotNetBuildSourceOnly)' != 'true'" />
   </ItemGroup>
 
   <!-- Generate a .txt file with all public types of the project referenced by ProjectForGenAPIDocIdGeneration (e.g. System.Private.CoreLib or System.Private.Uri implementation assembly).
diff --git a/eng/targetingpacks.targets b/eng/targetingpacks.targets
index befd249f231d..8a6a8b96a2f5 100644
--- a/eng/targetingpacks.targets
+++ b/eng/targetingpacks.targets
@@ -43,7 +43,7 @@
                              LatestRuntimeFrameworkVersion="$(ProductVersion)"
                              RuntimeFrameworkName="$(LocalFrameworkOverrideName)"
                              RuntimePackNamePatterns="$(LocalFrameworkOverrideName).Runtime.**RID**"
-                             RuntimePackRuntimeIdentifiers="linux-arm;linux-arm64;linux-musl-arm64;linux-musl-x64;linux-x64;osx-x64;rhel.6-x64;tizen.4.0.0-armel;tizen.5.0.0-armel;win-arm64;win-x64;win-x86;linux-musl-arm;osx-arm64;maccatalyst-x64;maccatalyst-arm64;linux-s390x;linux-bionic-arm;linux-bionic-arm64;linux-bionic-x64;linux-bionic-x86;freebsd-x64;freebsd-arm64;linux-ppc64le"
+                             RuntimePackRuntimeIdentifiers="linux-arm;linux-arm64;linux-musl-arm64;linux-musl-x64;linux-x64;osx-x64;rhel.6-x64;tizen.4.0.0-armel;tizen.5.0.0-armel;win-arm64;win-x64;win-x86;linux-musl-arm;osx-arm64;maccatalyst-x64;maccatalyst-arm64;linux-s390x;linux-bionic-arm;linux-bionic-arm64;linux-bionic-x64;linux-bionic-x86;freebsd-x64;freebsd-arm64;linux-ppc64le;linux-riscv64;linux-musl-riscv64"
                              TargetFramework="$(NetCoreAppCurrent)"
                              TargetingPackName="$(LocalFrameworkOverrideName).Ref"
                              TargetingPackVersion="$(ProductVersion)"
@@ -53,7 +53,7 @@
                       RuntimeFrameworkName="$(LocalFrameworkOverrideName)"
                       LatestRuntimeFrameworkVersion="$(ProductVersion)"
                       RuntimePackNamePatterns="$(LocalFrameworkOverrideName).Runtime.Mono.**RID**"
-                      RuntimePackRuntimeIdentifiers="linux-arm;linux-arm64;linux-musl-arm64;linux-musl-x64;linux-x64;osx-x64;rhel.6-x64;win-arm64;win-x64;win-x86;linux-musl-arm;osx-arm64;linux-s390x;linux-bionic-arm;linux-bionic-arm64;linux-bionic-x64;linux-bionic-x86;browser-wasm;ios-arm64;ios-arm;iossimulator-arm64;iossimulator-x64;iossimulator-x86;tvos-arm64;tvossimulator-arm64;tvossimulator-x64;maccatalyst-x64;maccatalyst-arm64;android-arm64;android-arm;android-x64;android-x86"
+                      RuntimePackRuntimeIdentifiers="linux-arm;linux-arm64;linux-musl-arm64;linux-musl-x64;linux-x64;osx-x64;linux-riscv64;linux-musl-riscv64;rhel.6-x64;win-arm64;win-x64;win-x86;linux-musl-arm;osx-arm64;linux-s390x;linux-bionic-arm;linux-bionic-arm64;linux-bionic-x64;linux-bionic-x86;browser-wasm;ios-arm64;ios-arm;iossimulator-arm64;iossimulator-x64;iossimulator-x86;tvos-arm64;tvossimulator-arm64;tvossimulator-x64;maccatalyst-x64;maccatalyst-arm64;android-arm64;android-arm;android-x64;android-x86"
                       RuntimePackLabels="Mono"
                       Condition="'$(UseLocalTargetingRuntimePack)' == 'true' and ('@(KnownRuntimePack)' == '' or @(KnownRuntimePack->WithMetadataValue('Identity', 'Microsoft.NETCore.App')->WithMetadataValue('RuntimePackLabels', 'Mono')->WithMetadataValue('TargetFramework', '$(NetCoreAppCurrent)')) == '')" />
     <!-- always add wasi-wasm as it is never added by the sdk -->
@@ -78,7 +78,7 @@
                         TargetFramework="$(NetCoreAppCurrent)"
                         Crossgen2PackNamePattern="$(LocalFrameworkOverrideName).Crossgen2.**RID**"
                         Crossgen2PackVersion="$(ProductVersion)"
-                        Crossgen2RuntimeIdentifiers="linux-musl-x64;linux-x64;win-x64;linux-arm;linux-arm64;linux-musl-arm;linux-musl-arm64;osx-arm64;osx-x64;win-arm64;win-x86"
+                        Crossgen2RuntimeIdentifiers="linux-musl-x64;linux-x64;win-x64;linux-arm;linux-arm64;linux-musl-arm;linux-musl-arm64;osx-arm64;osx-x64;win-arm64;win-x86;linux-riscv64;linux-musl-riscv64"
                         Condition="'$(UseLocalCrossgen2Pack)' == 'true' and '@(KnownCrossgen2Pack->AnyHaveMetadataValue('TargetFramework', '$(NetCoreAppCurrent)'))' != 'true'" />
     <KnownAppHostPack Include="$(LocalFrameworkOverrideName)"
                       ExcludedRuntimeIdentifiers="android"
diff --git a/eng/testing/ChromeVersions.props b/eng/testing/ChromeVersions.props
index 9cea0303f97b..c919397b3a06 100644
--- a/eng/testing/ChromeVersions.props
+++ b/eng/testing/ChromeVersions.props
@@ -1,13 +1,12 @@
 <Project>
   <PropertyGroup>
-      <linux_ChromeVersion>120.0.6099.129</linux_ChromeVersion>
-      <linux_ChromeRevision>1217362</linux_ChromeRevision>
-      <linux_ChromeBaseSnapshotUrl>https://storage.googleapis.com/chromium-browser-snapshots/Linux_x64/1217362</linux_ChromeBaseSnapshotUrl>
-      <linux_V8Version>12.0.267</linux_V8Version>
-
-      <win_ChromeVersion>120.0.6099.130</win_ChromeVersion>
-      <win_ChromeRevision>1217362</win_ChromeRevision>
-      <win_ChromeBaseSnapshotUrl>https://storage.googleapis.com/chromium-browser-snapshots/Win_x64/1217378</win_ChromeBaseSnapshotUrl>
-      <win_V8Version>12.0.267</win_V8Version>
+    <linux_ChromeVersion>123.0.6312.58</linux_ChromeVersion>
+    <linux_ChromeRevision>1262506</linux_ChromeRevision>
+    <linux_ChromeBaseSnapshotUrl>https://storage.googleapis.com/chromium-browser-snapshots/Linux_x64/1262506</linux_ChromeBaseSnapshotUrl>
+    <linux_V8Version>12.3.219</linux_V8Version>
+    <win_ChromeVersion>123.0.6312.58</win_ChromeVersion>
+    <win_ChromeRevision>1262506</win_ChromeRevision>
+    <win_ChromeBaseSnapshotUrl>https://storage.googleapis.com/chromium-browser-snapshots/Win_x64/1262514</win_ChromeBaseSnapshotUrl>
+    <win_V8Version>12.3.219</win_V8Version>
   </PropertyGroup>
 </Project>
diff --git a/eng/testing/WasmRunnerTemplate.cmd b/eng/testing/WasmRunnerTemplate.cmd
index 83aeb53cad03..f92cee17cc9d 100644
--- a/eng/testing/WasmRunnerTemplate.cmd
+++ b/eng/testing/WasmRunnerTemplate.cmd
@@ -59,6 +59,9 @@ if /I [%XHARNESS_COMMAND%] == [test] (
     if [%BROWSER_PATH%] == [] if not [%HELIX_CORRELATION_PAYLOAD%] == [] (
         set "BROWSER_PATH=--browser-path^=%HELIX_CORRELATION_PAYLOAD%\chrome-win\chrome.exe"
     )
+    if [%JS_ENGINE_ARGS%] == [] (
+        set "JS_ENGINE_ARGS=--browser-arg^=--js-flags^=--stack-trace-limit^=1000"
+    )
 )
 
 if [%XHARNESS_ARGS%] == [] (
diff --git a/eng/testing/WasmRunnerTemplate.sh b/eng/testing/WasmRunnerTemplate.sh
index 71347666cde8..4f5856546fc5 100644
--- a/eng/testing/WasmRunnerTemplate.sh
+++ b/eng/testing/WasmRunnerTemplate.sh
@@ -58,6 +58,10 @@ if [[ "$XHARNESS_COMMAND" == "test" ]]; then
 			fi
 		fi
 	fi
+else
+	if [[ -z "$JS_ENGINE_ARGS" ]]; then
+		JS_ENGINE_ARGS="--browser-arg=--js-flags=--stack-trace-limit=1000"
+	fi
 fi
 
 if [[ -z "$XHARNESS_ARGS" ]]; then
diff --git a/eng/testing/bump-chrome-version.proj b/eng/testing/bump-chrome-version.proj
index 7bb437919b08..334e37bfd32b 100644
--- a/eng/testing/bump-chrome-version.proj
+++ b/eng/testing/bump-chrome-version.proj
@@ -4,6 +4,7 @@
   <UsingTask AssemblyFile="$(WasmBuildTasksAssemblyPath)" TaskName="Microsoft.WebAssembly.Build.Tasks.UpdateChromeVersions" />
   <PropertyGroup>
     <ChromeVersionsPath>$(RepositoryEngineeringDir)testing\ChromeVersions.props</ChromeVersionsPath>
+    <EnvVarsForPRPath>$(RepositoryEngineeringDir)testing\bump-chrome-pr.env</EnvVarsForPRPath>
   </PropertyGroup>
 
   <Target Name="UpdateChromeVersion">
@@ -13,19 +14,13 @@
                 Channel="$(ChromeChannel)"
                 MaxMajorVersionsToCheck="1"
                 IntermediateOutputPath="$(ArtifactsObjDir)"
-                ChromeVersionsPath="$(ChromeVersionsPath)">                
+                ChromeVersionsPath="$(ChromeVersionsPath)"
+                EnvVarsForPRPath="$(EnvVarsForPRPath)">                
       <Output TaskParameter="VersionsChanged" PropertyName="VersionsChanged" />
     </UpdateChromeVersions>
 
-    <ItemGroup>
-      <!-- ensure newline at the end -->
-      <EnvVarForPR Include="CHROME_LINUX_VER=$(linux_ChromeVersion)" />
-      <EnvVarForPR Include="CHROME_WIN_VER=$(win_ChromeVersion)" />
-    </ItemGroup>
-
     <Message Text="No major changes: skipping version props update." Importance="High" Condition="'$(VersionsChanged)' != 'true'"/>
     <Message Text="Version props got updated" Importance="High" Condition="'$(VersionsChanged)' == 'true'"/>
-
   </Target>
 
   <Import Project="..\..\Directory.Build.targets" />
diff --git a/eng/testing/linker/project.csproj.template b/eng/testing/linker/project.csproj.template
index 41466e8d4492..d31c8df9c927 100644
--- a/eng/testing/linker/project.csproj.template
+++ b/eng/testing/linker/project.csproj.template
@@ -75,6 +75,12 @@
     {AdditionalProjectReferences}
   </ItemGroup>
 
+  <Target Name="RemoveInvariantGlobalization" BeforeTargets="_SetWasmBuildNativeDefaults" Condition="'$(TargetArchitecture)' == 'wasm'">
+    <ItemGroup>
+      <_BoolPropertiesThatTriggerRelinking Remove="InvariantGlobalization" />
+    </ItemGroup>
+  </Target>
+
   <Target Name="LocateNativeCompiler"
           Condition="'$(PublishAot)' == 'true' and '$(_hostOS)' != 'win'"
           BeforeTargets="SetupOSSpecificProps">
diff --git a/eng/testing/outerBuild.targets b/eng/testing/outerBuild.targets
index c071944c21d9..6465a272bca8 100644
--- a/eng/testing/outerBuild.targets
+++ b/eng/testing/outerBuild.targets
@@ -1,12 +1,19 @@
 <Project>
-  <Target Name="Test" DependsOnTargets="GetProjectWithBestTargetFrameworks">    
+  <Target Name="Test" DependsOnTargets="RemoveNetFrameworkProjectsOnNonWindows;GetProjectWithBestTargetFrameworks">
     <MSBuild Projects="@(InnerBuildProjectsWithBestTargetFramework)"
              Targets="Test">
     </MSBuild>
   </Target>
-  <Target Name="VSTest" DependsOnTargets="GetProjectWithBestTargetFrameworks">    
+  <!-- Manually remove .NET Framework projects on non-Windows: https://github.com/microsoft/vstest/issues/4908 -->
+  <Target Name="VSTest" DependsOnTargets="RemoveNetFrameworkProjectsOnNonWindows;GetProjectWithBestTargetFrameworks">
     <MSBuild Projects="@(InnerBuildProjectsWithBestTargetFramework)"
              Targets="VSTest">
     </MSBuild>
   </Target>
+  <Target Name="RemoveNetFrameworkProjectsOnNonWindows" Condition="'$(TargetOS)' != 'windows'">
+    <PropertyGroup>
+      <!-- we'd try to run tests with Mono for netfx projects which doesn't work so filter them out -->
+      <IncludeNetFrameworkTfms>false</IncludeNetFrameworkTfms>
+    </PropertyGroup>
+  </Target>
 </Project>
\ No newline at end of file
diff --git a/eng/testing/performance/android_scenarios.proj b/eng/testing/performance/android_scenarios.proj
index c2f3e7b1955e..4d0aad300cd9 100644
--- a/eng/testing/performance/android_scenarios.proj
+++ b/eng/testing/performance/android_scenarios.proj
@@ -35,24 +35,24 @@
         <Command>$(Python) test.py sod --scenario-name &quot;%(Identity)&quot;</Command>
         <PostCommands>$(Python) post.py</PostCommands>
     </HelixWorkItem>
-    <!-- <HelixWorkItem Include="SOD - Android Benchmarks.Droid APK Size">
+    <HelixWorkItem Include="SOD - Android Benchmarks.Droid APK Size">
         <PayloadDirectory>$(WorkItemDirectory)</PayloadDirectory>
-        <PreCommands>cd $(ScenarioDirectory)bdnandroid;copy %HELIX_CORRELATION_PAYLOAD%\MonoBenchmarksDroid.apk .;$(Python) pre.py -1-apk-name MonoBenchmarksDroid.apk</PreCommands>
-        <Command>$(Python) test.py sod -1-scenario-name &quot;%(Identity)&quot;</Command>
+        <PreCommands>cd $(ScenarioDirectory)bdnandroid;copy %HELIX_CORRELATION_PAYLOAD%\MonoBenchmarksDroid.apk .;$(Python) pre.py --apk-name MonoBenchmarksDroid.apk</PreCommands>
+        <Command>$(Python) test.py sod --scenario-name &quot;%(Identity)&quot;</Command>
         <PostCommands>$(Python) post.py</PostCommands>
     </HelixWorkItem>
     <HelixWorkItem Include="SOD - Android Benchmarks.Droid Extracted Size">
         <PayloadDirectory>$(WorkItemDirectory)</PayloadDirectory>
-        <PreCommands>cd $(ScenarioDirectory)bdnandroid;copy %HELIX_CORRELATION_PAYLOAD%\MonoBenchmarksDroid.apk .;$(Python) pre.py -1-unzip -1-apk-name MonoBenchmarksDroid.apk</PreCommands>
-        <Command>$(Python) test.py sod -1-scenario-name &quot;%(Identity)&quot;</Command>
+        <PreCommands>cd $(ScenarioDirectory)bdnandroid;copy %HELIX_CORRELATION_PAYLOAD%\MonoBenchmarksDroid.apk .;$(Python) pre.py --unzip --apk-name MonoBenchmarksDroid.apk</PreCommands>
+        <Command>$(Python) test.py sod --scenario-name &quot;%(Identity)&quot;</Command>
         <PostCommands>$(Python) post.py</PostCommands>
-    </HelixWorkItem> -->
-    <!-- <HelixWorkItem Include="Mobile Benchmark - Android Benchmarks.Droid Benchmark Run"> Disabled per https://github.com/dotnet/performance/issues/3655
+    </HelixWorkItem>
+    <HelixWorkItem Include="Mobile Benchmark - Android Benchmarks.Droid Benchmark Run">
         <PayloadDirectory>$(WorkItemDirectory)</PayloadDirectory>
-        <PreCommands>echo on;set XHARNESSPATH=$(XharnessPath);cd $(ScenarioDirectory)bdnandroid;copy %HELIX_CORRELATION_PAYLOAD%\MonoBenchmarksDroid.apk .;$(Python) pre.py -1-apk-name MonoBenchmarksDroid.apk</PreCommands>
-        <Command>$(Python) test.py androidinstrumentation -1-package-path .\pub\MonoBenchmarksDroid.apk -1-package-name com.microsoft.maui.benchmarks -1-instrumentation-name com.microsoft.maui.MainInstrumentation -1-scenario-name &quot;%(Identity)&quot;</Command>
+        <PreCommands>echo on;set XHARNESSPATH=$(XharnessPath);cd $(ScenarioDirectory)bdnandroid;copy %HELIX_CORRELATION_PAYLOAD%\MonoBenchmarksDroid.apk .;$(Python) pre.py --restart-device --apk-name MonoBenchmarksDroid.apk</PreCommands>
+        <Command>$(Python) test.py androidinstrumentation --package-path .\pub\MonoBenchmarksDroid.apk --package-name com.microsoft.maui.benchmarks --instrumentation-name com.microsoft.maui.MainInstrumentation --scenario-name &quot;%(Identity)&quot;</Command>
         <PostCommands>$(Python) post.py</PostCommands>
         <Timeout>00:30:00</Timeout>
-    </HelixWorkItem> -->
+    </HelixWorkItem>
   </ItemGroup>
 </Project>
diff --git a/eng/testing/performance/performance-setup.ps1 b/eng/testing/performance/performance-setup.ps1
index f7d321930627..8e9ff8736469 100644
--- a/eng/testing/performance/performance-setup.ps1
+++ b/eng/testing/performance/performance-setup.ps1
@@ -50,14 +50,15 @@ $Queue = ""
 
 if ($Internal) {
     switch ($LogicalMachine) {
-        "perftiger" { $Queue = "Windows.10.Amd64.19H1.Tiger.Perf" }
-        "perftiger_crossgen" { $Queue = "Windows.10.Amd64.19H1.Tiger.Perf" }
+        "perftiger" { $Queue = "Windows.11.Amd64.Tiger.Perf" }
+        "perftiger_crossgen" { $Queue = "Windows.11.Amd64.Tiger.Perf" }
         "perfowl" { $Queue = "Windows.11.Amd64.Owl.Perf" }
-        "perfsurf" { $Queue = "Windows.10.Arm64.Perf.Surf" }
+        "perfsurf" { $Queue = "Windows.11.Arm64.Surf.Perf" }
         "perfpixel4a" { $Queue = "Windows.11.Amd64.Pixel.Perf" }
         "perfampere" { $Queue = "Windows.Server.Arm64.Perf" }
+        "perfviper" { $Queue = "Windows.11.Amd64.Viper.Perf" }
         "cloudvm" { $Queue = "Windows.10.Amd64" }
-        Default { $Queue = "Windows.10.Amd64.19H1.Tiger.Perf" }
+        Default { $Queue = "Windows.11.Amd64.Tiger.Perf" }
     }
     $PerfLabArguments = "--upload-to-perflab-container"
     $ExtraBenchmarkDotNetArguments = ""
@@ -140,7 +141,7 @@ if ($NoR2R) {
 }
 
 if ($ExperimentName) {
-    $SetupArguments = "$SetupArguments --experiment-name '$ExperimentName'"
+    $SetupArguments = "$SetupArguments --experiment-name $ExperimentName"
 }
 
 if ($UseLocalCommitTime) {
diff --git a/eng/testing/performance/performance-setup.sh b/eng/testing/performance/performance-setup.sh
index 6eeb7223ffb4..ff04015375a3 100755
--- a/eng/testing/performance/performance-setup.sh
+++ b/eng/testing/performance/performance-setup.sh
@@ -301,6 +301,8 @@ if [[ "$internal" == true ]]; then
         queue=OSX.13.Amd64.Iphone.Perf
     elif [[ "$logical_machine" == "perfampere" ]]; then
         queue=Ubuntu.2204.Arm64.Perf
+    elif [[ "$logical_machine" == "perfviper" ]]; then
+        queue=Ubuntu.2204.Amd64.Viper.Perf
     elif [[ "$logical_machine" == "cloudvm" ]]; then
         queue=Ubuntu.2204.Amd64
     elif [[ "$architecture" == "arm64" ]]; then
@@ -492,7 +494,7 @@ if [[ "$nor2r" == "true" ]]; then
 fi
 
 if [[ ! -z "$experimentname" ]]; then
-    setup_arguments="$setup_arguments --experiment-name '$experimentname'"
+    setup_arguments="$setup_arguments --experiment-name $experimentname"
 fi
 
 if [[ "$monoaot" == "true" ]]; then
diff --git a/eng/testing/scenarios/BuildWasiAppsJobsList.txt b/eng/testing/scenarios/BuildWasiAppsJobsList.txt
index b68f7fe3d30e..bdb9ecf6e5f0 100644
--- a/eng/testing/scenarios/BuildWasiAppsJobsList.txt
+++ b/eng/testing/scenarios/BuildWasiAppsJobsList.txt
@@ -3,3 +3,4 @@ Wasi.Build.Tests.ILStripTests
 Wasi.Build.Tests.SdkMissingTests
 Wasi.Build.Tests.RuntimeConfigTests
 Wasi.Build.Tests.WasiTemplateTests
+Wasi.Build.Tests.PInvokeTableGeneratorTests
diff --git a/eng/testing/scenarios/BuildWasmAppsJobsList.txt b/eng/testing/scenarios/BuildWasmAppsJobsList.txt
index 3315a5b7fe35..5ccb34b25e18 100644
--- a/eng/testing/scenarios/BuildWasmAppsJobsList.txt
+++ b/eng/testing/scenarios/BuildWasmAppsJobsList.txt
@@ -36,11 +36,13 @@ Wasm.Build.Tests.TestAppScenarios.AppSettingsTests
 Wasm.Build.Tests.TestAppScenarios.LazyLoadingTests
 Wasm.Build.Tests.TestAppScenarios.LibraryInitializerTests
 Wasm.Build.Tests.TestAppScenarios.SatelliteLoadingTests
+Wasm.Build.Tests.TestAppScenarios.DownloadResourceProgressTests
+Wasm.Build.Tests.TestAppScenarios.SignalRClientTests
 Wasm.Build.Tests.WasmBuildAppTest
 Wasm.Build.Tests.WasmNativeDefaultsTests
 Wasm.Build.Tests.WasmRunOutOfAppBundleTests
 Wasm.Build.Tests.WasmSIMDTests
 Wasm.Build.Tests.WasmTemplateTests
 Wasm.Build.Tests.WorkloadTests
-Wasm.Build.Tests.TestAppScenarios.DownloadResourceProgressTests
 Wasm.Build.Tests.MT.Blazor.SimpleMultiThreadedTests
+Wasm.Build.Tests.TestAppScenarios.DebugLevelTests
diff --git a/eng/testing/tests.browser.targets b/eng/testing/tests.browser.targets
index df305affa4b4..982b8589e76c 100644
--- a/eng/testing/tests.browser.targets
+++ b/eng/testing/tests.browser.targets
@@ -87,13 +87,14 @@
     <_AppArgs Condition="'$(IsFunctionalTest)' != 'true' and '$(WasmMainAssemblyFileName)' != ''">--run $(WasmMainAssemblyFileName)</_AppArgs>
     <_AppArgs Condition="'$(IsFunctionalTest)' == 'true'">--run $(AssemblyName).dll</_AppArgs>
 
-    <_XUnitBackgroundExec Condition="'$(_XUnitBackgroundExec)' == '' and '$(WasmEnableThreads)' == 'true'">true</_XUnitBackgroundExec>
     <WasmTestAppArgs Condition="'$(_XUnitBackgroundExec)' == 'true'">$(WasmTestAppArgs) -backgroundExec</WasmTestAppArgs>
+    <WasmXHarnessMonoArgs Condition="'$(_XUnitBackgroundExec)' == 'true'">$(WasmXHarnessMonoArgs) --setenv=IsWasmBackgroundExec=true</WasmXHarnessMonoArgs>
     <_AppArgs Condition="'$(WasmTestAppArgs)' != ''">$(_AppArgs) $(WasmTestAppArgs)</_AppArgs>
 
-    <WasmXHarnessMonoArgs Condition="'$(XunitShowProgress)' == 'true'">$(WasmXHarnessMonoArgs) --setenv=XHARNESS_LOG_TEST_START=1</WasmXHarnessMonoArgs>
+    <WasmXHarnessMonoArgs Condition="'$(XunitShowProgress)' == 'true'">$(WasmXHarnessMonoArgs) --setenv=XHARNESS_LOG_TEST_START=true</WasmXHarnessMonoArgs>
     <!-- help unit test with PlatformDetection.IsThreadingSupported via IsBrowserThreadingSupported env variable -->
     <WasmXHarnessMonoArgs Condition="'$(WasmEnableThreads)' == 'true'">$(WasmXHarnessMonoArgs) --setenv=IsBrowserThreadingSupported=true</WasmXHarnessMonoArgs>
+    <WasmXHarnessMaxParallelThreads Condition="'$(WasmEnableThreads)' == 'true' and '$(WasmXHarnessMaxParallelThreads)' == ''">8</WasmXHarnessMaxParallelThreads>
   </PropertyGroup>
 
   <PropertyGroup Condition="'$(RunScriptCommand)' == ''">
@@ -108,9 +109,12 @@
     <_XHarnessArgs Condition="'$(_UseWasmSymbolicator)' == 'true'" >$(_XHarnessArgs) --symbolicator WasmSymbolicator.dll,Microsoft.WebAssembly.Internal.SymbolicatorWrapperForXHarness</_XHarnessArgs>
     <_XHarnessArgs Condition="'$(_WasmBrowserPathForTests)' != ''" >$(_XHarnessArgs) &quot;--browser-path=$(_WasmBrowserPathForTests)&quot;</_XHarnessArgs>
     <_XHarnessArgs Condition="'$(WasmXHarnessTestsTimeout)' != ''" >$(_XHarnessArgs) &quot;--timeout=$(WasmXHarnessTestsTimeout)&quot;</_XHarnessArgs>
+    <_XHarnessArgs Condition="'$(WasmXHarnessVerbosity)' != ''"    >$(_XHarnessArgs) --verbosity=$(WasmXHarnessVerbosity)</_XHarnessArgs><!-- Trace, Debug, Information (default), Warning, Error, Critical -->
     <_XHarnessArgs Condition="'$(WasmXHarnessArgsCli)' != ''"      >$(_XHarnessArgs) $(WasmXHarnessArgsCli)</_XHarnessArgs>
 
-    <!-- There two flavors of WasmXHarnessArgs and WasmXHarnessMonoArgs, one is MSBuild property and the other is environment variable -->
+    <_AppArgs Condition="'$(WasmEnableThreads)' == 'true'">$(_AppArgs) -threads</_AppArgs>
+    <_AppArgs Condition="'$(WasmXHarnessMaxParallelThreads)' != ''">$(_AppArgs) -parallelThreads $(WasmXHarnessMaxParallelThreads)</_AppArgs>
+    <!-- There are two flavors of WasmXHarnessArgs and WasmXHarnessMonoArgs, one is MSBuild property and the other is environment variable -->
     <RunScriptCommand Condition="'$(OS)' != 'Windows_NT'">$HARNESS_RUNNER $(_XHarnessArgs) %24XHARNESS_ARGS %24WasmXHarnessArgs -- $(WasmXHarnessMonoArgs) %24WasmXHarnessMonoArgs $(_AppArgs) %24WasmTestAppArgs</RunScriptCommand>
     <RunScriptCommand Condition="'$(OS)' == 'Windows_NT'">%HARNESS_RUNNER% $(_XHarnessArgs) %XHARNESS_ARGS% %WasmXHarnessArgs%  -- $(WasmXHarnessMonoArgs) %WasmXHarnessMonoArgs% $(_AppArgs) %WasmTestAppArgs%</RunScriptCommand>
   </PropertyGroup>
@@ -147,6 +151,12 @@
     <WasmNestedPublishAppDependsOn>PrepareForWasmBuildApp;$(WasmNestedPublishAppDependsOn)</WasmNestedPublishAppDependsOn>
   </PropertyGroup>
 
+  <Target Name="IncludeTestAssemblyInVFS" BeforeTargets="PrepareForWasmBuildApp">
+    <ItemGroup>
+      <WasmFilesToIncludeFromPublishDir Include="$(AssemblyName).dll" Exclude="@(WasmFilesToIncludeFromPublishDir)" />
+    </ItemGroup>
+  </Target>
+
   <Target Name="PrepareForWasmBuildApp">
     <PropertyGroup>
       <WasmAppDir>$(BundleDir)</WasmAppDir>
@@ -197,7 +207,7 @@
       <!-- Include files specified by test projects from publish dir -->
       <WasmFilesToIncludeInFileSystem
               Include="$(PublishDir)%(WasmFilesToIncludeFromPublishDir.Identity)"
-              TargetPath="%(WasmFilesToIncludeFromPublishDir.Identity)"
+              TargetPath="%(WasmFilesToIncludeFromPublishDir.TargetDir)/%(WasmFilesToIncludeFromPublishDir.Identity)"
               Condition="'%(WasmFilesToIncludeFromPublishDir.Identity)' != ''" />
     </ItemGroup>
 
diff --git a/eng/testing/tests.wasi.targets b/eng/testing/tests.wasi.targets
index 8516230d7e6d..d147fea218fe 100644
--- a/eng/testing/tests.wasi.targets
+++ b/eng/testing/tests.wasi.targets
@@ -31,7 +31,7 @@
     <_AppArgs Condition="'$(WasmTestAppArgs)' != ''">$(_AppArgs) -- $(WasmTestAppArgs)</_AppArgs>
 
     <!-- FIXME: wasttime specific param name -->
-    <WasmXHarnessMonoArgs Condition="'$(XunitShowProgress)' == 'true'">$(WasmXHarnessMonoArgs) --env=XHARNESS_LOG_TEST_START=1</WasmXHarnessMonoArgs>
+    <WasmXHarnessMonoArgs Condition="'$(XunitShowProgress)' == 'true'">$(WasmXHarnessMonoArgs) --env=XHARNESS_LOG_TEST_START=true</WasmXHarnessMonoArgs>
   </PropertyGroup>
 
   <PropertyGroup Condition="'$(RunScriptCommand)' == ''">
@@ -51,7 +51,7 @@
 
     <_InvariantGlobalization Condition="'$(InvariantGlobalization)' == 'true'">--env=DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=true</_InvariantGlobalization>
 
-    <!-- There two flavors of WasmXHarnessArgs and WasmXHarnessMonoArgs, one is MSBuild property and the other is environment variable -->
+    <!-- There are two flavors of WasmXHarnessArgs and WasmXHarnessMonoArgs, one is MSBuild property and the other is environment variable -->
     <RunScriptCommand Condition="'$(OS)' != 'Windows_NT'">$HARNESS_RUNNER $(_XHarnessArgs) %24XHARNESS_ARGS %24WasmXHarnessArgs -- $(WasmXHarnessMonoArgs) %24WasmXHarnessMonoArgs $(_InvariantGlobalization) %24_InvariantGlobalization  $(_AppArgs) %24WasmTestAppArgs</RunScriptCommand>
     <RunScriptCommand Condition="'$(OS)' == 'Windows_NT'">%HARNESS_RUNNER% $(_XHarnessArgs) %XHARNESS_ARGS% %WasmXHarnessArgs%  -- $(WasmXHarnessMonoArgs) %WasmXHarnessMonoArgs% $(_InvariantGlobalization) %_InvariantGlobalization% $(_AppArgs) %WasmTestAppArgs%</RunScriptCommand>
   </PropertyGroup>
diff --git a/global.json b/global.json
index f7ee8d365eb8..b3d764e6c152 100644
--- a/global.json
+++ b/global.json
@@ -1,18 +1,18 @@
 {
   "sdk": {
-    "version": "9.0.100-alpha.1.23615.4",
+    "version": "9.0.100-preview.1.24101.2",
     "allowPrerelease": true,
     "rollForward": "major"
   },
   "tools": {
-    "dotnet": "9.0.100-alpha.1.23615.4"
+    "dotnet": "9.0.100-preview.1.24101.2"
   },
   "msbuild-sdks": {
-    "Microsoft.DotNet.Arcade.Sdk": "9.0.0-beta.24106.2",
-    "Microsoft.DotNet.Helix.Sdk": "9.0.0-beta.24106.2",
-    "Microsoft.DotNet.SharedFramework.Sdk": "9.0.0-beta.24106.2",
+    "Microsoft.DotNet.Arcade.Sdk": "9.0.0-beta.24205.4",
+    "Microsoft.DotNet.Helix.Sdk": "9.0.0-beta.24205.4",
+    "Microsoft.DotNet.SharedFramework.Sdk": "9.0.0-beta.24205.4",
     "Microsoft.Build.NoTargets": "3.7.0",
     "Microsoft.Build.Traversal": "3.4.0",
-    "Microsoft.NET.Sdk.IL": "9.0.0-alpha.1.24072.1"
+    "Microsoft.NET.Sdk.IL": "9.0.0-preview.4.24201.1"
   }
 }
diff --git a/src/coreclr/.nuget/coreclr-packages.proj b/src/coreclr/.nuget/coreclr-packages.proj
index 80e3f60a9192..f7c43af0e783 100644
--- a/src/coreclr/.nuget/coreclr-packages.proj
+++ b/src/coreclr/.nuget/coreclr-packages.proj
@@ -1,11 +1,11 @@
 <Project Sdk="Microsoft.Build.Traversal" DefaultTargets="Pack">
-  <ItemGroup Condition="'$(TargetOS)' == 'windows' or '$(DotNetBuildFromSource)' == 'true'">
+  <ItemGroup Condition="'$(TargetOS)' == 'windows' or '$(DotNetBuildSourceOnly)' == 'true'">
     <ProjectReference Include="Microsoft.NET.Sdk.IL\Microsoft.NET.Sdk.IL.pkgproj" />
-    <ProjectReference Include="Microsoft.ILVerification\Microsoft.ILVerification.pkgproj" Condition="'$(DotNetBuildFromSource)' != 'true'" />
-    <ProjectReference Include="dotnet-ilverify\dotnet-ilverify.pkgproj" Condition="'$(DotNetBuildFromSource)' != 'true'" />
+    <ProjectReference Include="Microsoft.ILVerification\Microsoft.ILVerification.pkgproj" Condition="'$(DotNetBuildSourceOnly)' != 'true'" />
+    <ProjectReference Include="dotnet-ilverify\dotnet-ilverify.pkgproj" Condition="'$(DotNetBuildSourceOnly)' != 'true'" />
   </ItemGroup>
 
-  <ItemGroup Condition="'$(RuntimeFlavor)' == 'CoreCLR' and '$(DotNetBuildFromSource)' != 'true'">
+  <ItemGroup Condition="'$(RuntimeFlavor)' == 'CoreCLR' and '$(DotNetBuildSourceOnly)' != 'true'">
     <ProjectReference Include="ILCompiler.Reflection.ReadyToRun.Experimental\ILCompiler.Reflection.ReadyToRun.Experimental.pkgproj" />
   </ItemGroup>
 
diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt
index 029de0220070..68af87ce57e8 100644
--- a/src/coreclr/CMakeLists.txt
+++ b/src/coreclr/CMakeLists.txt
@@ -153,7 +153,7 @@ endif(NOT CLR_CMAKE_TARGET_ARCH_WASM)
 
 if(NOT CLR_CROSS_COMPONENTS_BUILD)
   # NativeAOT only buildable for a subset of CoreCLR-supported configurations
-  if(CLR_CMAKE_HOST_ARCH_ARM64 OR CLR_CMAKE_HOST_ARCH_AMD64 OR CLR_CMAKE_HOST_ARCH_ARM OR CLR_CMAKE_TARGET_ARCH_WASM)
+  if(CLR_CMAKE_HOST_ARCH_ARM64 OR CLR_CMAKE_HOST_ARCH_AMD64 OR CLR_CMAKE_HOST_ARCH_ARM OR CLR_CMAKE_TARGET_ARCH_WASM OR (CLR_CMAKE_HOST_ARCH_I386 AND CLR_CMAKE_HOST_WIN32))
     add_subdirectory(nativeaot)
   endif()
 endif(NOT CLR_CROSS_COMPONENTS_BUILD)
@@ -210,11 +210,12 @@ if(CLR_CMAKE_HOST_UNIX)
     add_subdirectory(debug/createdump)
   endif(CLR_CMAKE_HOST_OSX OR (CLR_CMAKE_HOST_LINUX AND NOT CLR_CMAKE_HOST_UNIX_X86 AND NOT CLR_CMAKE_HOST_ANDROID))
 
-  # Include the dummy c++ include files
-  include_directories("pal/inc/rt/cpp")
-
-  # This prevents inclusion of standard C compiler headers
-  add_compile_options(-nostdinc)
+  # The CoreCLR PAL used to redefine NULL, which caused a number of null conversion and arithmetic
+  # warnings and errors to be suppressed.
+  # Suppress these warnings here to avoid breaking the build.
+  add_compile_options($<$<COMPILE_LANG_AND_ID:CXX,Clang,AppleClang>:-Wno-null-arithmetic>)
+  add_compile_options($<$<COMPILE_LANG_AND_ID:CXX,GNU>:-Wno-conversion-null>)
+  add_compile_options($<$<COMPILE_LANG_AND_ID:CXX,GNU>:-Wno-pointer-arith>)
 
   set (NATIVE_RESOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/nativeresources)
   include_directories(${NATIVE_RESOURCE_DIR})
@@ -226,7 +227,7 @@ if(CLR_CMAKE_HOST_UNIX)
   # given Windows .rc file. The target C++ file path is returned in the
   # variable specified by the TARGET_FILE parameter.
   function(build_resources SOURCE TARGET_NAME TARGET_FILE)
-
+    set_property(SOURCE ${SOURCE} APPEND PROPERTY COMPILE_DEFINITIONS "RC_INVOKED")
     set(PREPROCESSED_SOURCE ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.rc.i)
 
     preprocess_file(${SOURCE} ${PREPROCESSED_SOURCE})
diff --git a/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.csproj
index 6b3ddff0cc86..9ef1024c449d 100644
--- a/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.csproj
+++ b/src/coreclr/System.Private.CoreLib/System.Private.CoreLib.csproj
@@ -1,4 +1,4 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
     <EnableDefaultItems>false</EnableDefaultItems>
@@ -46,8 +46,7 @@
     <!-- Override InformationalVersion during servicing as it's returned via public api. -->
     <InformationalVersion Condition="'$(PreReleaseVersionLabel)' == 'servicing'">$(ProductVersion)</InformationalVersion>
     <InformationalVersion Condition="'$(StabilizePackageVersion)' == 'true'">$(ProductVersion)</InformationalVersion>
-    <!-- AD0001 : https://github.com/dotnet/runtime/issues/90356 -->
-    <NoWarn>$(NoWarn),0419,0649;AD0001</NoWarn>
+    <NoWarn>$(NoWarn),0419,0649</NoWarn>
     <Nullable>enable</Nullable>
 
     <!-- Ignore all previous constants since SPCL is sensitive to what is defined and the Sdk adds some by default -->
@@ -137,8 +136,6 @@
     <Compile Include="$(BclSourcesRoot)\System\Delegate.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Diagnostics\Debugger.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Diagnostics\EditAndContinueHelper.cs" />
-    <Compile Include="$(BclSourcesRoot)\System\Diagnostics\Eventing\EventPipe.CoreCLR.cs" />
-    <Compile Include="$(BclSourcesRoot)\System\Diagnostics\Eventing\NativeRuntimeEventSource.Threading.NativeSinks.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Diagnostics\ICustomDebuggerNotification.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Diagnostics\StackFrame.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Diagnostics\StackFrameHelper.cs" />
@@ -203,18 +200,17 @@
     <Compile Include="$(BclSourcesRoot)\System\Reflection\RuntimePropertyInfo.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Reflection\TypeNameParser.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Reflection\Metadata\RuntimeTypeMetadataUpdateHandler.cs" />
-    <Compile Include="$(BclSourcesRoot)\System\Resources\ManifestBasedResourceGroveler.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Runtime\CompilerServices\CastHelpers.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Runtime\CompilerServices\ICastableHelpers.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Runtime\CompilerServices\RuntimeHelpers.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Runtime\ControlledExecution.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Runtime\DependentHandle.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)..\nativeaot\Common\src\System\Runtime\RhFailFastReason.cs" Condition="('$(Platform)' != 'x86' or '$(TargetsWindows)' != 'true')" />
-    <Compile Include="$(MSBuildThisFileDirectory)..\nativeaot\Runtime.Base\src\System\Runtime\ExceptionHandling.cs" Condition="('$(Platform)' != 'x86' or '$(TargetsWindows)' != 'true')" />
-    <Compile Include="$(MSBuildThisFileDirectory)..\nativeaot\Runtime.Base\src\System\Runtime\ExceptionIDs.cs" Condition="('$(Platform)' != 'x86' or '$(TargetsWindows)' != 'true')" />
-    <Compile Include="$(MSBuildThisFileDirectory)..\nativeaot\Runtime.Base\src\System\Runtime\StackFrameIterator.cs" Condition="('$(Platform)' != 'x86' or '$(TargetsWindows)' != 'true')" />
-    <Compile Include="$(BclSourcesRoot)\System\Runtime\ExceptionServices\AsmOffsets.cs" Condition="('$(Platform)' != 'x86' or '$(TargetsWindows)' != 'true')" />
-    <Compile Include="$(BclSourcesRoot)\System\Runtime\ExceptionServices\InternalCalls.cs" Condition="('$(Platform)' != 'x86' or '$(TargetsWindows)' != 'true')" />
+    <Compile Include="$(MSBuildThisFileDirectory)..\nativeaot\Common\src\System\Runtime\RhFailFastReason.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)..\nativeaot\Runtime.Base\src\System\Runtime\ExceptionHandling.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)..\nativeaot\Runtime.Base\src\System\Runtime\ExceptionIDs.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)..\nativeaot\Runtime.Base\src\System\Runtime\StackFrameIterator.cs" />
+    <Compile Include="$(BclSourcesRoot)\System\Runtime\ExceptionServices\AsmOffsets.cs" />
+    <Compile Include="$(BclSourcesRoot)\System\Runtime\ExceptionServices\InternalCalls.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Runtime\GCSettings.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Runtime\JitInfo.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Runtime\InteropServices\ComTypes\IEnumerable.cs" />
@@ -226,12 +222,10 @@
     <Compile Include="$(BclSourcesRoot)\System\Runtime\InteropServices\NativeLibrary.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Runtime\Intrinsics\X86\X86Base.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Runtime\Loader\AssemblyLoadContext.CoreCLR.cs" />
-    <Compile Include="$(BclSourcesRoot)\System\Runtime\Versioning\CompatibilitySwitch.cs" />
     <Compile Include="$(BclSourcesRoot)\System\RuntimeArgumentHandle.cs" />
     <Compile Include="$(BclSourcesRoot)\System\RuntimeHandles.cs" />
     <Compile Include="$(BclSourcesRoot)\System\RuntimeType.ActivatorCache.cs" />
     <Compile Include="$(BclSourcesRoot)\System\RuntimeType.CoreCLR.cs" />
-    <Compile Include="$(BclSourcesRoot)\System\Security\DynamicSecurityMethodAttribute.cs" />
     <Compile Include="$(BclSourcesRoot)\System\StartupHookProvider.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\String.CoreCLR.cs" />
     <Compile Include="$(BclSourcesRoot)\System\StubHelpers.cs" />
diff --git a/src/coreclr/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.xml b/src/coreclr/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.xml
index fb07d67f37c6..d7fd368adac7 100644
--- a/src/coreclr/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.xml
+++ b/src/coreclr/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.xml
@@ -2,11 +2,6 @@
   <assembly fullname="System.Private.CoreLib">
     <type fullname="System.Runtime.CompilerServices.RuntimeFeature" feature="System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeSupported" featurevalue="true">
       <method signature="System.Boolean get_IsDynamicCodeCompiled()" body="stub" value="true" />
-      <method signature="System.Boolean get_IsDynamicCodeSupported()" body="stub" value="true" />
-    </type>
-    <type fullname="System.Runtime.CompilerServices.RuntimeFeature" feature="System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeSupported" featurevalue="false">
-      <method signature="System.Boolean get_IsDynamicCodeCompiled()" body="stub" value="false" />
-      <method signature="System.Boolean get_IsDynamicCodeSupported()" body="stub" value="false" />
     </type>
   </assembly>
 </linker>
diff --git a/src/coreclr/System.Private.CoreLib/src/Internal/Runtime/InteropServices/ComActivator.cs b/src/coreclr/System.Private.CoreLib/src/Internal/Runtime/InteropServices/ComActivator.cs
index db8d4ead4659..77f64abd1b42 100644
--- a/src/coreclr/System.Private.CoreLib/src/Internal/Runtime/InteropServices/ComActivator.cs
+++ b/src/coreclr/System.Private.CoreLib/src/Internal/Runtime/InteropServices/ComActivator.cs
@@ -203,7 +203,6 @@ private static void ClassRegistrationScenarioForType(ComActivationContext cxt, b
                     // Finally validate signature
                     ReadOnlySpan<ParameterInfo> methParams = method.GetParametersAsSpan();
                     if (method.ReturnType != typeof(void)
-                        || methParams == null
                         || methParams.Length != 1
                         || (methParams[0].ParameterType != typeof(string) && methParams[0].ParameterType != typeof(Type)))
                     {
diff --git a/src/coreclr/System.Private.CoreLib/src/System/ArgIterator.cs b/src/coreclr/System.Private.CoreLib/src/System/ArgIterator.cs
index e7c2a99eeefa..d49d1dcb270b 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/ArgIterator.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/ArgIterator.cs
@@ -24,7 +24,7 @@ private struct SigPointer
         private int _remainingArgs;             // # of remaining args.
 
 #if TARGET_WINDOWS // Native Varargs are not supported on Unix
-        // ArgIterator is a ref struct. It does not require pinning.
+        // ArgIterator is a ref struct. It does not require pinning, therefore Unsafe.AsPointer is safe.
         // This method null checks the this pointer as a side-effect.
         private ArgIterator* ThisPtr => (ArgIterator*)Unsafe.AsPointer(ref _argCookie);
 
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Array.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Array.CoreCLR.cs
index 16d9067567ee..de7b3021c458 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Array.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Array.CoreCLR.cs
@@ -74,7 +74,7 @@ private static unsafe void CopyImpl(Array sourceArray, int sourceIndex, Array de
                 if (pMT->ContainsGCPointers)
                     Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount);
                 else
-                    Buffer.Memmove(ref dst, ref src, byteCount);
+                    SpanHelpers.Memmove(ref dst, ref src, byteCount);
 
                 // GC.KeepAlive(sourceArray) not required. pMT kept alive via sourceArray
                 return;
@@ -184,7 +184,7 @@ private static unsafe void CopyImplUnBoxEachElement(Array sourceArray, int sourc
                 }
                 else
                 {
-                    Buffer.Memmove(ref dest, ref obj.GetRawData(), destSize);
+                    SpanHelpers.Memmove(ref dest, ref obj.GetRawData(), destSize);
                 }
             }
         }
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Environment.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Environment.CoreCLR.cs
index 8bbd6e98ddaa..e0a24a42ef32 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Environment.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Environment.CoreCLR.cs
@@ -5,6 +5,7 @@
 using System.Diagnostics.CodeAnalysis;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using System.Security;
 using System.Threading;
 
 namespace System
@@ -33,12 +34,15 @@ public static extern int ExitCode
             set;
         }
 
-        // Note: The CLR's Watson bucketization code looks at the caller of the FCALL method
-        // to assign blame for crashes.  Don't mess with this, such as by making it call
-        // another managed helper method, unless you consult with some CLR Watson experts.
         [DoesNotReturn]
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        public static extern void FailFast(string? message);
+        [DynamicSecurityMethod] // Methods containing StackCrawlMark local var has to be marked DynamicSecurityMethod
+        public static void FailFast(string? message)
+        {
+            // Note: The CLR's Watson bucketization code looks at the our caller
+            // to assign blame for crashes.
+            StackCrawlMark mark = StackCrawlMark.LookForMyCaller;
+            FailFast(ref mark, message, exception: null, errorMessage: null);
+        }
 
         // This overload of FailFast will allow you to specify the exception object
         // whose bucket details *could* be used when undergoing the failfast process.
@@ -54,12 +58,34 @@ public static extern int ExitCode
         //    IP for bucketing. If the exception object is not preallocated, it will use the bucket
         //    details contained in the object (if any).
         [DoesNotReturn]
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        public static extern void FailFast(string? message, Exception? exception);
+        [DynamicSecurityMethod] // Methods containing StackCrawlMark local var has to be marked DynamicSecurityMethod
+        public static void FailFast(string? message, Exception? exception)
+        {
+            // Note: The CLR's Watson bucketization code looks at the our caller
+            // to assign blame for crashes.
+            StackCrawlMark mark = StackCrawlMark.LookForMyCaller;
+            FailFast(ref mark, message, exception, errorMessage: null);
+        }
+
+        [DoesNotReturn]
+        [DynamicSecurityMethod] // Methods containing StackCrawlMark local var has to be marked DynamicSecurityMethod
+        internal static void FailFast(string? message, Exception? exception, string? errorMessage)
+        {
+            // Note: The CLR's Watson bucketization code looks at the our caller
+            // to assign blame for crashes.
+            StackCrawlMark mark = StackCrawlMark.LookForMyCaller;
+            FailFast(ref mark, message, exception, errorMessage);
+        }
+
+        [DoesNotReturn]
+        private static void FailFast(ref StackCrawlMark mark, string? message, Exception? exception, string? errorMessage)
+        {
+            FailFast(new StackCrawlMarkHandle(ref mark), message, ObjectHandleOnStack.Create(ref exception), errorMessage);
+        }
 
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "Environment_FailFast", StringMarshalling = StringMarshalling.Utf16)]
         [DoesNotReturn]
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        internal static extern void FailFast(string? message, Exception? exception, string? errorMessage);
+        private static partial void FailFast(StackCrawlMarkHandle mark, string? message, ObjectHandleOnStack exception, string? errorMessage);
 
         private static unsafe string[] InitializeCommandLineArgs(char* exePath, int argc, char** argv) // invoked from VM
         {
diff --git a/src/coreclr/System.Private.CoreLib/src/System/GC.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/GC.CoreCLR.cs
index 590fd5b18cee..697788316ba0 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/GC.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/GC.CoreCLR.cs
@@ -865,7 +865,9 @@ public static unsafe IReadOnlyDictionary<string, object> GetConfigurationVariabl
                 Configurations = new Dictionary<string, object>()
             };
 
-            _EnumerateConfigurationValues(Unsafe.AsPointer(ref context), &ConfigCallback);
+#pragma warning disable CS8500 // takes address of managed type
+            _EnumerateConfigurationValues(&context, &ConfigCallback);
+#pragma warning restore CS8500
             return context.Configurations!;
         }
 
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Math.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Math.CoreCLR.cs
index a619dc4b1ca7..5dd46b02d423 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Math.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Math.CoreCLR.cs
@@ -115,10 +115,6 @@ public static unsafe (double Sin, double Cos) SinCos(double x)
         [MethodImpl(MethodImplOptions.InternalCall)]
         public static extern double Tanh(double value);
 
-        [Intrinsic]
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern double FMod(double x, double y);
-
         [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern unsafe double ModF(double x, double* intptr);
 
diff --git a/src/coreclr/System.Private.CoreLib/src/System/MathF.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/MathF.CoreCLR.cs
index 855a1b1e7ef1..e9caae4c18a3 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/MathF.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/MathF.CoreCLR.cs
@@ -112,10 +112,6 @@ public static unsafe (float Sin, float Cos) SinCos(float x)
         [MethodImpl(MethodImplOptions.InternalCall)]
         public static extern float Tanh(float x);
 
-        [Intrinsic]
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern float FMod(float x, float y);
-
         [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern unsafe float ModF(float x, float* intptr);
 
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Object.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Object.CoreCLR.cs
index 70cff629fc28..940d1622bad1 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Object.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Object.CoreCLR.cs
@@ -1,6 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Diagnostics;
 using System.Runtime.CompilerServices;
 
 namespace System
@@ -19,7 +20,9 @@ public partial class Object
         [Intrinsic]
         protected internal unsafe object MemberwiseClone()
         {
-            object clone = RuntimeHelpers.AllocateUninitializedClone(this);
+            object clone = this;
+            RuntimeHelpers.AllocateUninitializedClone(ObjectHandleOnStack.Create(ref clone));
+            Debug.Assert(clone != this);
 
             // copy contents of "this" to the clone
 
@@ -30,7 +33,7 @@ protected internal unsafe object MemberwiseClone()
             if (RuntimeHelpers.GetMethodTable(clone)->ContainsGCPointers)
                 Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount);
             else
-                Buffer.Memmove(ref dst, ref src, byteCount);
+                SpanHelpers.Memmove(ref dst, ref src, byteCount);
 
             return clone;
         }
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/DynamicILGenerator.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/DynamicILGenerator.cs
index 2b695f1baf5b..327113c63f9a 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/DynamicILGenerator.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/Emit/DynamicILGenerator.cs
@@ -417,7 +417,7 @@ private int GetMemberRefToken(MethodInfo methodInfo, Type[]? optionalParameterTy
                 throw new ArgumentException(SR.Argument_MustBeRuntimeMethodInfo, nameof(methodInfo));
 
             ReadOnlySpan<ParameterInfo> paramInfo = methodInfo.GetParametersAsSpan();
-            if (paramInfo != null && paramInfo.Length != 0)
+            if (paramInfo.Length != 0)
             {
                 parameterTypes = new Type[paramInfo.Length];
                 requiredCustomModifiers = new Type[parameterTypes.Length][];
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/MethodBase.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/MethodBase.CoreCLR.cs
index 26383704e392..8e8d7148b397 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/MethodBase.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/MethodBase.CoreCLR.cs
@@ -2,6 +2,8 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 using System.Security;
 using System.Threading;
 
@@ -34,12 +36,16 @@ public abstract partial class MethodBase : MemberInfo
             return RuntimeType.GetMethodBase(declaringType.GetRuntimeType(), handle.GetMethodInfo());
         }
 
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "MethodBase_GetCurrentMethod")]
+        private static partial RuntimeMethodHandleInternal GetCurrentMethod(StackCrawlMarkHandle stackMark);
+
         [RequiresUnreferencedCode("Metadata for the method might be incomplete or removed")]
         [DynamicSecurityMethod] // Methods containing StackCrawlMark local var has to be marked DynamicSecurityMethod
         public static MethodBase? GetCurrentMethod()
         {
             StackCrawlMark stackMark = StackCrawlMark.LookForMyCaller;
-            return RuntimeMethodInfo.InternalGetCurrentMethod(ref stackMark);
+            RuntimeMethodHandleInternal methodHandle = GetCurrentMethod(new StackCrawlMarkHandle(ref stackMark));
+            return methodHandle.IsNullHandle() ? null : RuntimeType.GetMethodBase(null, methodHandle);
         }
         #endregion
 
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RtFieldInfo.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RtFieldInfo.cs
index 6a8aaf6898b8..9bd2298eb935 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RtFieldInfo.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RtFieldInfo.cs
@@ -18,46 +18,18 @@ internal sealed unsafe class RtFieldInfo : RuntimeFieldInfo, IRuntimeFieldInfo
         // lazy caching
         private string? m_name;
         private RuntimeType? m_fieldType;
-        private InvocationFlags m_invocationFlags;
-        internal InvocationFlags InvocationFlags
-        {
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            get => (m_invocationFlags & InvocationFlags.Initialized) != 0 ?
-                    m_invocationFlags : InitializeInvocationFlags();
-        }
+        private FieldAccessor? m_fieldAccessor;
 
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        private InvocationFlags InitializeInvocationFlags()
+        internal FieldAccessor FieldAccessor
         {
-            Type? declaringType = DeclaringType;
-
-            InvocationFlags invocationFlags = 0;
-
-            // first take care of all the NO_INVOKE cases
-            if (declaringType != null && declaringType.ContainsGenericParameters)
-            {
-                invocationFlags |= InvocationFlags.NoInvoke;
-            }
-
-            // If the invocationFlags are still 0, then
-            // this should be an usable field, determine the other flags
-            if (invocationFlags == 0)
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get
             {
-                if ((m_fieldAttributes & FieldAttributes.InitOnly) != 0)
-                    invocationFlags |= InvocationFlags.SpecialField;
-
-                if ((m_fieldAttributes & FieldAttributes.HasFieldRVA) != 0)
-                    invocationFlags |= InvocationFlags.SpecialField;
-
-                // find out if the field type is one of the following: Primitive, Enum or Pointer
-                Type fieldType = FieldType;
-                if (fieldType.IsPointer || fieldType.IsEnum || fieldType.IsPrimitive)
-                    invocationFlags |= InvocationFlags.FieldSpecialCast;
+                m_fieldAccessor ??= new FieldAccessor(this);
+                return m_fieldAccessor;
             }
-
-            // must be last to avoid threading problems
-            return m_invocationFlags = invocationFlags | InvocationFlags.Initialized;
         }
+
         #endregion
 
         #region Constructor
@@ -75,28 +47,6 @@ internal RtFieldInfo(
         #endregion
 
         #region Internal Members
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal void CheckConsistency(object? target)
-        {
-            // only test instance fields
-            if ((m_fieldAttributes & FieldAttributes.Static) != FieldAttributes.Static)
-            {
-                if (!m_declaringType.IsInstanceOfType(target))
-                {
-                    if (target == null)
-                    {
-                        throw new TargetException(SR.RFLCT_Targ_StatFldReqTarg);
-                    }
-                    else
-                    {
-                        throw new ArgumentException(
-                            SR.Format(SR.Arg_FieldDeclTarget,
-                                Name, m_declaringType, target.GetType()));
-                    }
-                }
-            }
-        }
-
         internal override bool CacheEquals(object? o)
         {
             return o is RtFieldInfo m && m.m_fieldHandle == m_fieldHandle;
@@ -131,36 +81,7 @@ public override int GetHashCode() =>
         #region FieldInfo Overrides
         [DebuggerStepThrough]
         [DebuggerHidden]
-        public override object? GetValue(object? obj)
-        {
-            InvocationFlags invocationFlags = InvocationFlags;
-            RuntimeType? declaringType = DeclaringType as RuntimeType;
-
-            if ((invocationFlags & InvocationFlags.NoInvoke) != 0)
-            {
-                if (declaringType != null && DeclaringType!.ContainsGenericParameters)
-                    throw new InvalidOperationException(SR.Arg_UnboundGenField);
-
-                throw new FieldAccessException();
-            }
-
-            CheckConsistency(obj);
-
-            RuntimeType fieldType = (RuntimeType)FieldType;
-
-            bool domainInitialized = false;
-            if (declaringType == null)
-            {
-                return RuntimeFieldHandle.GetValue(this, obj, fieldType, null, ref domainInitialized);
-            }
-            else
-            {
-                domainInitialized = declaringType.DomainInitialized;
-                object? retVal = RuntimeFieldHandle.GetValue(this, obj, fieldType, declaringType, ref domainInitialized);
-                declaringType.DomainInitialized = domainInitialized;
-                return retVal;
-            }
-        }
+        public override object? GetValue(object? obj) => FieldAccessor.GetValue(obj);
 
         public override object GetRawConstantValue() { throw new InvalidOperationException(); }
 
@@ -180,45 +101,7 @@ public override int GetHashCode() =>
         [DebuggerStepThrough]
         [DebuggerHidden]
         public override void SetValue(object? obj, object? value, BindingFlags invokeAttr, Binder? binder, CultureInfo? culture)
-        {
-            InvocationFlags invocationFlags = InvocationFlags;
-            RuntimeType? declaringType = DeclaringType as RuntimeType;
-
-            if ((invocationFlags & InvocationFlags.NoInvoke) != 0)
-            {
-                if (declaringType != null && declaringType.ContainsGenericParameters)
-                    throw new InvalidOperationException(SR.Arg_UnboundGenField);
-
-                throw new FieldAccessException();
-            }
-
-            CheckConsistency(obj);
-
-            RuntimeType fieldType = (RuntimeType)FieldType;
-            if (value is null)
-            {
-                if (fieldType.IsActualValueType)
-                {
-                    fieldType.CheckValue(ref value, binder, culture, invokeAttr);
-                }
-            }
-            else if (!ReferenceEquals(value.GetType(), fieldType))
-            {
-                fieldType.CheckValue(ref value, binder, culture, invokeAttr);
-            }
-
-            bool domainInitialized = false;
-            if (declaringType is null)
-            {
-                RuntimeFieldHandle.SetValue(this, obj, value, fieldType, m_fieldAttributes, null, ref domainInitialized);
-            }
-            else
-            {
-                domainInitialized = declaringType.DomainInitialized;
-                RuntimeFieldHandle.SetValue(this, obj, value, fieldType, m_fieldAttributes, declaringType, ref domainInitialized);
-                declaringType.DomainInitialized = domainInitialized;
-            }
-        }
+            => FieldAccessor.SetValue(obj, value, invokeAttr, binder, culture);
 
         [DebuggerStepThrough]
         [DebuggerHidden]
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeAssembly.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeAssembly.cs
index b5cff2f1e42e..53f2690948df 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeAssembly.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeAssembly.cs
@@ -645,27 +645,29 @@ public override Assembly GetSatelliteAssembly(CultureInfo culture, Version? vers
         {
             ArgumentNullException.ThrowIfNull(culture);
 
-            return InternalGetSatelliteAssembly(culture, version, throwOnFileNotFound: true)!;
+            return InternalGetSatelliteAssembly(this, culture, version, throwOnFileNotFound: true)!;
         }
 
         [DynamicSecurityMethod] // Methods containing StackCrawlMark local var has to be marked DynamicSecurityMethod
-        internal Assembly? InternalGetSatelliteAssembly(CultureInfo culture,
+        internal static Assembly? InternalGetSatelliteAssembly(Assembly assembly,
+                                                       CultureInfo culture,
                                                        Version? version,
                                                        bool throwOnFileNotFound)
         {
             var an = new AssemblyName();
-            an.SetPublicKey(GetPublicKey());
-            an.Flags = GetFlags() | AssemblyNameFlags.PublicKey;
-            an.Version = version ?? GetVersion();
+            RuntimeAssembly runtimeAssembly = (RuntimeAssembly)assembly;
+            an.SetPublicKey(runtimeAssembly.GetPublicKey());
+            an.Flags = runtimeAssembly.GetFlags() | AssemblyNameFlags.PublicKey;
+            an.Version = version ?? runtimeAssembly.GetVersion();
             an.CultureInfo = culture;
-            an.Name = GetSimpleName() + ".resources";
+            an.Name = runtimeAssembly.GetSimpleName() + ".resources";
 
             // This stack crawl mark is never used because the requesting assembly is explicitly specified,
             // so the value could be anything.
             StackCrawlMark unused = default;
-            RuntimeAssembly? retAssembly = InternalLoad(an, ref unused, requestingAssembly: this, throwOnFileNotFound: throwOnFileNotFound);
+            RuntimeAssembly? retAssembly = InternalLoad(an, ref unused, requestingAssembly: runtimeAssembly, throwOnFileNotFound: throwOnFileNotFound);
 
-            if (retAssembly == this)
+            if (retAssembly == runtimeAssembly)
             {
                 retAssembly = null;
             }
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeCustomAttributeData.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeCustomAttributeData.cs
index 1b3032a24607..dd0b5cf897f4 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeCustomAttributeData.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeCustomAttributeData.cs
@@ -472,6 +472,7 @@ public override IList<CustomAttributeNamedArgument> NamedArguments
                             if (p.EncodedArgument is not null
                                 && p.EncodedArgument.CustomAttributeType.EncodedType != CustomAttributeEncoding.Undefined)
                             {
+                                Debug.Assert(p.MemberInfo is not null);
                                 namedArgs[j++] = new CustomAttributeNamedArgument(
                                     p.MemberInfo,
                                     new CustomAttributeTypedArgument(m_scope, p.EncodedArgument));
@@ -1114,7 +1115,7 @@ public CustomAttributeType(RuntimeType parameterType)
         public Type? EnumType { get; }
     }
 
-    internal static unsafe class CustomAttribute
+    internal static unsafe partial class CustomAttribute
     {
         #region Internal Static Members
         internal static bool IsDefined(RuntimeType type, RuntimeType? caType, bool inherit)
@@ -1526,7 +1527,7 @@ private static void AddCustomAttributes(
                 object attribute;
                 if (ctorWithParameters is not null)
                 {
-                    attribute = CreateCaObject(decoratedModule, attributeType, ctorWithParameters, ref blobStart, blobEnd, out cNamedArgs);
+                    attribute = CreateCustomAttributeInstance(decoratedModule, attributeType, ctorWithParameters, ref blobStart, blobEnd, out cNamedArgs);
                 }
                 else
                 {
@@ -1794,8 +1795,16 @@ internal static AttributeUsageAttribute GetAttributeUsage(RuntimeType decoratedA
                 if (attributeUsageAttribute is not null)
                     throw new FormatException(SR.Format(SR.Format_AttributeUsage, attributeType));
 
-                ParseAttributeUsageAttribute(caRecord.blob, out AttributeTargets targets, out bool inherited, out bool allowMultiple);
-                attributeUsageAttribute = new AttributeUsageAttribute(targets, allowMultiple, inherited);
+                if (!ParseAttributeUsageAttribute(
+                    caRecord.blob,
+                    out AttributeTargets attrTargets,
+                    out bool allowMultiple,
+                    out bool inherited))
+                {
+                    throw new CustomAttributeFormatException();
+                }
+
+                attributeUsageAttribute = new AttributeUsageAttribute(attrTargets, allowMultiple: allowMultiple, inherited: inherited);
             }
 
             return attributeUsageAttribute ?? AttributeUsageAttribute.Default;
@@ -1838,42 +1847,93 @@ internal static object[] CreateAttributeArrayHelper(RuntimeType caType, int elem
         }
         #endregion
 
-        #region Private Static FCalls
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern void _ParseAttributeUsageAttribute(
-            IntPtr pCa, int cCa, out int targets, out bool inherited, out bool allowMultiple);
-        private static void ParseAttributeUsageAttribute(
-            ConstArray ca, out AttributeTargets targets, out bool inherited, out bool allowMultiple)
-        {
-            _ParseAttributeUsageAttribute(ca.Signature, ca.Length, out int _targets, out inherited, out allowMultiple);
-            targets = (AttributeTargets)_targets;
-        }
-
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern object _CreateCaObject(RuntimeModule pModule, RuntimeType type, IRuntimeMethodInfo pCtor, byte** ppBlob, byte* pEndBlob, int* pcNamedArgs);
-        private static object CreateCaObject(RuntimeModule module, RuntimeType type, IRuntimeMethodInfo ctor, ref IntPtr blob, IntPtr blobEnd, out int namedArgs)
-        {
-            byte* pBlob = (byte*)blob;
-            byte* pBlobEnd = (byte*)blobEnd;
-            int cNamedArgs;
-            object ca = _CreateCaObject(module, type, ctor, &pBlob, pBlobEnd, &cNamedArgs);
-            blob = (IntPtr)pBlob;
-            namedArgs = cNamedArgs;
-            return ca;
-        }
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "CustomAttribute_ParseAttributeUsageAttribute")]
+        [SuppressGCTransition]
+        private static partial int ParseAttributeUsageAttribute(
+            IntPtr pData,
+            int cData,
+            int* pTargets,
+            int* pAllowMultiple,
+            int* pInherited);
+
+        private static bool ParseAttributeUsageAttribute(
+            ConstArray blob,
+            out AttributeTargets attrTargets,
+            out bool allowMultiple,
+            out bool inherited)
+        {
+            int attrTargetsLocal = 0;
+            int allowMultipleLocal = 0;
+            int inheritedLocal = 0;
+            int result = ParseAttributeUsageAttribute(blob.Signature, blob.Length, &attrTargetsLocal, &allowMultipleLocal, &inheritedLocal);
+            attrTargets = (AttributeTargets)attrTargetsLocal;
+            allowMultiple = allowMultipleLocal != 0;
+            inherited = inheritedLocal != 0;
+            return result != 0;
+        }
+
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "CustomAttribute_CreateCustomAttributeInstance")]
+        private static partial void CreateCustomAttributeInstance(
+            QCallModule pModule,
+            ObjectHandleOnStack type,
+            ObjectHandleOnStack pCtor,
+            ref IntPtr ppBlob,
+            IntPtr pEndBlob,
+            out int pcNamedArgs,
+            ObjectHandleOnStack instance);
+
+        private static object CreateCustomAttributeInstance(RuntimeModule module, RuntimeType type, IRuntimeMethodInfo ctor, ref IntPtr blob, IntPtr blobEnd, out int namedArgs)
+        {
+            if (module is null)
+            {
+                throw new ArgumentNullException(SR.Arg_InvalidHandle);
+            }
+
+            object? result = null;
+            CreateCustomAttributeInstance(
+                new QCallModule(ref module),
+                ObjectHandleOnStack.Create(ref type),
+                ObjectHandleOnStack.Create(ref ctor),
+                ref blob,
+                blobEnd,
+                out namedArgs,
+                ObjectHandleOnStack.Create(ref result));
+            return result!;
+        }
+
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "CustomAttribute_CreatePropertyOrFieldData", StringMarshalling = StringMarshalling.Utf16)]
+        private static partial void CreatePropertyOrFieldData(
+            QCallModule pModule,
+            ref IntPtr ppBlobStart,
+            IntPtr pBlobEnd,
+            StringHandleOnStack name,
+            [MarshalAs(UnmanagedType.Bool)] out bool bIsProperty,
+            ObjectHandleOnStack type,
+            ObjectHandleOnStack value);
 
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern void _GetPropertyOrFieldData(
-            RuntimeModule pModule, byte** ppBlobStart, byte* pBlobEnd, out string name, out bool bIsProperty, out RuntimeType type, out object value);
         private static void GetPropertyOrFieldData(
             RuntimeModule module, ref IntPtr blobStart, IntPtr blobEnd, out string name, out bool isProperty, out RuntimeType? type, out object? value)
         {
-            byte* pBlobStart = (byte*)blobStart;
-            _GetPropertyOrFieldData(
-                module, &pBlobStart, (byte*)blobEnd, out name, out isProperty, out type, out value);
-            blobStart = (IntPtr)pBlobStart;
+            if (module is null)
+            {
+                throw new ArgumentNullException(SR.Arg_InvalidHandle);
+            }
+
+            string? nameLocal = null;
+            RuntimeType? typeLocal = null;
+            object? valueLocal = null;
+            CreatePropertyOrFieldData(
+                new QCallModule(ref module),
+                ref blobStart,
+                blobEnd,
+                new StringHandleOnStack(ref nameLocal),
+                out isProperty,
+                ObjectHandleOnStack.Create(ref typeLocal),
+                ObjectHandleOnStack.Create(ref valueLocal));
+            name = nameLocal!;
+            type = typeLocal;
+            value = valueLocal;
         }
-        #endregion
     }
 
     internal static class PseudoCustomAttribute
@@ -1918,12 +1978,18 @@ private static HashSet<RuntimeType> CreatePseudoCustomAttributeHashSet()
         private static void VerifyPseudoCustomAttribute(RuntimeType pca)
         {
             // If any of these are invariants are no longer true will have to
-            // re-architect the PCA product logic and test cases -- you've been warned!
-            Debug.Assert(pca.BaseType == typeof(Attribute), "Pseudo CA Error");
+            // re-architect the PCA product logic and test cases.
+            Debug.Assert(pca.BaseType == typeof(Attribute), "Pseudo CA Error - Incorrect base type");
             AttributeUsageAttribute usage = CustomAttribute.GetAttributeUsage(pca);
-            Debug.Assert(!usage.Inherited, "Pseudo CA Error");
-            // AllowMultiple is true for TypeForwardedToAttribute
-            // Debug.Assert(usage.AllowMultiple == false, "Pseudo CA Error");
+            Debug.Assert(!usage.Inherited, "Pseudo CA Error - Unexpected Inherited value");
+            if (pca == typeof(TypeForwardedToAttribute))
+            {
+                Debug.Assert(usage.AllowMultiple, "Pseudo CA Error - Unexpected AllowMultiple value");
+            }
+            else
+            {
+                Debug.Assert(!usage.AllowMultiple, "Pseudo CA Error - Unexpected AllowMultiple value");
+            }
         }
         #endregion
 
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeFieldInfo.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeFieldInfo.cs
index dab1d07145db..a314edaab2a1 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeFieldInfo.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeFieldInfo.cs
@@ -11,7 +11,7 @@ internal abstract class RuntimeFieldInfo : FieldInfo
         #region Private Data Members
         private readonly BindingFlags m_bindingFlags;
         protected readonly RuntimeTypeCache m_reflectedTypeCache;
-        protected readonly RuntimeType m_declaringType;
+        protected internal readonly RuntimeType m_declaringType;
         #endregion
 
         #region Constructor
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeMethodInfo.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeMethodInfo.CoreCLR.cs
index de60e189ac91..9c999912382a 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeMethodInfo.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Reflection/RuntimeMethodInfo.CoreCLR.cs
@@ -488,17 +488,5 @@ public override bool ContainsGenericParameters
             }
         }
         #endregion
-
-        #region Legacy Internal
-        internal static MethodBase? InternalGetCurrentMethod(ref StackCrawlMark stackMark)
-        {
-            IRuntimeMethodInfo? method = RuntimeMethodHandle.GetCurrentMethod(ref stackMark);
-
-            if (method == null)
-                return null;
-
-            return RuntimeType.GetMethodBase(method);
-        }
-        #endregion
     }
 }
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.CoreCLR.cs
deleted file mode 100644
index 05805072cd7c..000000000000
--- a/src/coreclr/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.CoreCLR.cs
+++ /dev/null
@@ -1,19 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Globalization;
-using System.Reflection;
-
-namespace System.Resources
-{
-    internal sealed partial class ManifestBasedResourceGroveler
-    {
-        // Internal version of GetSatelliteAssembly that avoids throwing FileNotFoundException
-        private static Assembly? InternalGetSatelliteAssembly(Assembly mainAssembly,
-                                                             CultureInfo culture,
-                                                             Version? version)
-        {
-            return ((RuntimeAssembly)mainAssembly).InternalGetSatelliteAssembly(culture, version, throwOnFileNotFound: false);
-        }
-    }
-}
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs
index 5f6c7070958a..0c640dd1c2f0 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs
@@ -2,13 +2,12 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Diagnostics;
-using System.Numerics;
-using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
-using System.Threading;
 
 namespace System.Runtime.CompilerServices
 {
+    [StackTraceHidden]
+    [DebuggerStepThrough]
     internal static unsafe class CastHelpers
     {
         // In coreclr the table is allocated and written to on the native side.
@@ -24,14 +23,12 @@ internal static unsafe class CastHelpers
         private static extern ref byte Unbox_Helper(void* toTypeHnd, object obj);
 
         [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern void WriteBarrier(ref object? dst, object obj);
+        private static extern void WriteBarrier(ref object? dst, object? obj);
 
         // IsInstanceOf test used for unusual cases (naked type parameters, variant generic types)
         // Unlike the IsInstanceOfInterface and IsInstanceOfClass functions,
         // this test must deal with all kinds of type tests
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
         private static object? IsInstanceOfAny(void* toTypeHnd, object? obj)
         {
             if (obj != null)
@@ -63,8 +60,6 @@ internal static unsafe class CastHelpers
         }
 
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
         private static object? IsInstanceOfInterface(void* toTypeHnd, object? obj)
         {
             const int unrollSize = 4;
@@ -134,8 +129,6 @@ internal static unsafe class CastHelpers
         }
 
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
         private static object? IsInstanceOfClass(void* toTypeHnd, object? obj)
         {
             if (obj == null || RuntimeHelpers.GetMethodTable(obj) == toTypeHnd)
@@ -184,8 +177,6 @@ internal static unsafe class CastHelpers
         }
 
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
         [MethodImpl(MethodImplOptions.NoInlining)]
         private static object? IsInstance_Helper(void* toTypeHnd, object obj)
         {
@@ -207,8 +198,6 @@ internal static unsafe class CastHelpers
         // Unlike the ChkCastInterface and ChkCastClass functions,
         // this test must deal with all kinds of type tests
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
         internal static object? ChkCastAny(void* toTypeHnd, object? obj)
         {
             CastResult result;
@@ -237,8 +226,6 @@ internal static unsafe class CastHelpers
         }
 
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
         [MethodImpl(MethodImplOptions.NoInlining)]
         private static object? ChkCast_Helper(void* toTypeHnd, object obj)
         {
@@ -253,8 +240,6 @@ internal static unsafe class CastHelpers
         }
 
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
         private static object? ChkCastInterface(void* toTypeHnd, object? obj)
         {
             const int unrollSize = 4;
@@ -321,8 +306,6 @@ internal static unsafe class CastHelpers
         }
 
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
         private static object? ChkCastClass(void* toTypeHnd, object? obj)
         {
             if (obj == null || RuntimeHelpers.GetMethodTable(obj) == toTypeHnd)
@@ -336,8 +319,6 @@ internal static unsafe class CastHelpers
         // Optimized helper for classes. Assumes that the trivial cases
         // has been taken care of by the inlined check
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
         private static object? ChkCastClassSpecial(void* toTypeHnd, object obj)
         {
             MethodTable* mt = RuntimeHelpers.GetMethodTable(obj);
@@ -384,52 +365,53 @@ internal static unsafe class CastHelpers
         }
 
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
         private static ref byte Unbox(void* toTypeHnd, object obj)
         {
-            // this will throw NullReferenceException if obj is null, attributed to the user code, as expected.
+            // This will throw NullReferenceException if obj is null.
             if (RuntimeHelpers.GetMethodTable(obj) == toTypeHnd)
                 return ref obj.GetRawData();
 
             return ref Unbox_Helper(toTypeHnd, obj);
         }
 
-        internal struct ArrayElement
+        [DebuggerHidden]
+        private static void ThrowIndexOutOfRangeException()
         {
-            public object? Value;
+            throw new IndexOutOfRangeException();
         }
 
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
-        private static ref object? ThrowArrayMismatchException()
+        private static void ThrowArrayMismatchException()
         {
             throw new ArrayTypeMismatchException();
         }
 
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
-        private static ref object? LdelemaRef(Array array, nint index, void* type)
+        private static ref object? LdelemaRef(object?[] array, nint index, void* type)
         {
-            // this will throw appropriate exceptions if array is null or access is out of range.
-            ref object? element = ref Unsafe.As<ArrayElement[]>(array)[index].Value;
+            // This will throw NullReferenceException if array is null.
+            if ((nuint)index >= (uint)array.Length)
+                ThrowIndexOutOfRangeException();
+
+            Debug.Assert(index >= 0);
+            ref object? element = ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index);
             void* elementType = RuntimeHelpers.GetMethodTable(array)->ElementType;
 
-            if (elementType == type)
-                return ref element;
+            if (elementType != type)
+                ThrowArrayMismatchException();
 
-            return ref ThrowArrayMismatchException();
+            return ref element;
         }
 
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
-        private static void StelemRef(Array array, nint index, object? obj)
+        private static void StelemRef(object?[] array, nint index, object? obj)
         {
-            // this will throw appropriate exceptions if array is null or access is out of range.
-            ref object? element = ref Unsafe.As<ArrayElement[]>(array)[index].Value;
+            // This will throw NullReferenceException if array is null.
+            if ((nuint)index >= (uint)array.Length)
+                ThrowIndexOutOfRangeException();
+
+            Debug.Assert(index >= 0);
+            ref object? element = ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index);
             void* elementType = RuntimeHelpers.GetMethodTable(array)->ElementType;
 
             if (obj == null)
@@ -454,8 +436,6 @@ private static void StelemRef(Array array, nint index, object? obj)
         }
 
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
         [MethodImpl(MethodImplOptions.NoInlining)]
         private static void StelemRef_Helper(ref object? element, void* elementType, object obj)
         {
@@ -470,20 +450,17 @@ private static void StelemRef_Helper(ref object? element, void* elementType, obj
         }
 
         [DebuggerHidden]
-        [StackTraceHidden]
-        [DebuggerStepThrough]
         private static void StelemRef_Helper_NoCacheLookup(ref object? element, void* elementType, object obj)
         {
             Debug.Assert(obj != null);
 
             obj = IsInstanceOfAny_NoCacheLookup(elementType, obj);
-            if (obj != null)
+            if (obj == null)
             {
-                WriteBarrier(ref element, obj);
-                return;
+                ThrowArrayMismatchException();
             }
 
-            throw new ArrayTypeMismatchException();
+            WriteBarrier(ref element, obj);
         }
     }
 }
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs
index 4e75d7db895c..69506c2feda1 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs
@@ -139,8 +139,32 @@ public static unsafe void PrepareMethod(RuntimeMethodHandle method, RuntimeTypeH
         [MethodImpl(MethodImplOptions.InternalCall)]
         internal static extern int TryGetHashCode(object o);
 
+        public static new unsafe bool Equals(object? o1, object? o2)
+        {
+            // Compare by ref for normal classes, by value for value types.
+
+            if (ReferenceEquals(o1, o2))
+                return true;
+
+            if (o1 is null || o2 is null)
+                return false;
+
+            MethodTable* pMT = GetMethodTable(o1);
+
+            // If it's not a value class, don't compare by value
+            if (!pMT->IsValueType)
+                return false;
+
+            // Make sure they are the same type.
+            if (pMT != GetMethodTable(o2))
+                return false;
+
+            // Compare the contents
+            return ContentEquals(o1, o2);
+        }
+
         [MethodImpl(MethodImplOptions.InternalCall)]
-        public static extern new bool Equals(object? o1, object? o2);
+        private static extern unsafe bool ContentEquals(object o1, object o2);
 
         [Obsolete("OffsetToStringData has been deprecated. Use string.GetPinnableReference() instead.")]
         public static int OffsetToStringData
@@ -194,8 +218,8 @@ public static object GetUninitializedObject(
             return rt.GetUninitializedObject();
         }
 
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        internal static extern object AllocateUninitializedClone(object obj);
+        [LibraryImport(QCall, EntryPoint = "ObjectNative_AllocateUninitializedClone")]
+        internal static partial void AllocateUninitializedClone(ObjectHandleOnStack objHandle);
 
         /// <returns>true if given type is reference type or value type that contains references</returns>
         [Intrinsic]
@@ -392,6 +416,48 @@ private static unsafe void DispatchTailCalls(
                 }
             }
         }
+
+        /// <summary>
+        /// Create a boxed object of the specified type from the data located at the target reference.
+        /// </summary>
+        /// <param name="target">The target data</param>
+        /// <param name="type">The type of box to create.</param>
+        /// <returns>A boxed object containing the specified data.</returns>
+        /// <exception cref="ArgumentNullException">The specified type handle is <c>null</c>.</exception>
+        /// <exception cref="ArgumentException">The specified type cannot have a boxed instance of itself created.</exception>
+        /// <exception cref="NotSupportedException">The passed in type is a by-ref-like type.</exception>
+        public static unsafe object? Box(ref byte target, RuntimeTypeHandle type)
+        {
+            if (type.IsNullHandle())
+                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.type);
+
+            TypeHandle handle = type.GetNativeTypeHandle();
+
+            if (handle.IsTypeDesc)
+                throw new ArgumentException(SR.Arg_TypeNotSupported);
+
+            MethodTable* pMT = handle.AsMethodTable();
+
+            if (pMT->ContainsGenericVariables)
+                throw new ArgumentException(SR.Arg_TypeNotSupported);
+
+            if (pMT->IsValueType)
+            {
+                if (pMT->IsByRefLike)
+                    throw new NotSupportedException(SR.NotSupported_ByRefLike);
+
+                if (MethodTable.AreSameType(pMT, (MethodTable*)RuntimeTypeHandle.ToIntPtr(typeof(void).TypeHandle)))
+                    throw new ArgumentException(SR.Arg_TypeNotSupported);
+
+                object? result = Box(pMT, ref target);
+                GC.KeepAlive(type);
+                return result;
+            }
+            else
+            {
+                return Unsafe.As<byte, object?>(ref target);
+            }
+        }
     }
     // Helper class to assist with unsafe pinning of arbitrary objects.
     // It's used by VM code.
@@ -462,7 +528,13 @@ internal unsafe struct MethodTable
 
         // Additional conditional fields (see methodtable.h).
         // m_pModule
-        // m_pAuxiliaryData
+
+        /// <summary>
+        /// A pointer to auxiliary data that is cold for method table.
+        /// </summary>
+        [FieldOffset(AuxiliaryDataOffset)]
+        public MethodTableAuxiliaryData* AuxiliaryData;
+
         // union {
         //   m_pEEClass (pointer to the EE class)
         //   m_pCanonMT (pointer to the canonical method table)
@@ -493,6 +565,7 @@ internal unsafe struct MethodTable
 
         // WFLAGS_HIGH_ENUM
         private const uint enum_flag_ContainsPointers = 0x01000000;
+        private const uint enum_flag_ContainsGenericVariables = 0x20000000;
         private const uint enum_flag_HasComponentSize = 0x80000000;
         private const uint enum_flag_HasTypeEquivalence = 0x02000000;
         private const uint enum_flag_Category_Mask = 0x000F0000;
@@ -523,6 +596,12 @@ internal unsafe struct MethodTable
 
         private const int ParentMethodTableOffset = 0x10 + DebugClassNamePtr;
 
+#if TARGET_64BIT
+        private const int AuxiliaryDataOffset = 0x20 + DebugClassNamePtr;
+#else
+        private const int AuxiliaryDataOffset = 0x18 + DebugClassNamePtr;
+#endif
+
 #if TARGET_64BIT
         private const int ElementTypeOffset = 0x30 + DebugClassNamePtr;
 #else
@@ -594,6 +673,8 @@ public bool IsConstructedGenericType
             }
         }
 
+        public bool ContainsGenericVariables => (Flags & enum_flag_ContainsGenericVariables) != 0;
+
         /// <summary>
         /// Gets a <see cref="TypeHandle"/> for the element type of the current type.
         /// </summary>
@@ -610,6 +691,28 @@ public TypeHandle GetArrayElementTypeHandle()
         public extern uint GetNumInstanceFieldBytes();
     }
 
+    // Subset of src\vm\methodtable.h
+    [StructLayout(LayoutKind.Explicit)]
+    internal unsafe struct MethodTableAuxiliaryData
+    {
+        [FieldOffset(0)]
+        private uint Flags;
+
+        private const uint enum_flag_CanCompareBitsOrUseFastGetHashCode = 0x0001;     // Is any field type or sub field type overrode Equals or GetHashCode
+        private const uint enum_flag_HasCheckedCanCompareBitsOrUseFastGetHashCode = 0x0002;  // Whether we have checked the overridden Equals or GetHashCode
+
+        public bool HasCheckedCanCompareBitsOrUseFastGetHashCode => (Flags & enum_flag_HasCheckedCanCompareBitsOrUseFastGetHashCode) != 0;
+
+        public bool CanCompareBitsOrUseFastGetHashCode
+        {
+            get
+            {
+                Debug.Assert(HasCheckedCanCompareBitsOrUseFastGetHashCode);
+                return (Flags & enum_flag_CanCompareBitsOrUseFastGetHashCode) != 0;
+            }
+        }
+    }
+
     /// <summary>
     /// A type handle, which can wrap either a pointer to a <c>TypeDesc</c> or to a <see cref="MethodTable"/>.
     /// </summary>
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs
index fb70ddcc703f..7db188808e26 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs
@@ -48,14 +48,14 @@ class AsmOffsets
 
 #if TARGET_64BIT
     public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x8;
-    public const int SIZEOF__StackFrameIterator = 0x370;
-    public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x352;
-    public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x368;
+    public const int SIZEOF__StackFrameIterator = 0x358;
+    public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x33A;
+    public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x350;
 #else // TARGET_64BIT
     public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x4;
-    public const int SIZEOF__StackFrameIterator = 0x2d8;
-    public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x2c2;
-    public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x2d0;
+    public const int SIZEOF__StackFrameIterator = 0x2c8;
+    public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x2b6;
+    public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x2c4;
 #endif // TARGET_64BIT
 
 #else // DEBUG
@@ -94,14 +94,14 @@ class AsmOffsets
 
 #if TARGET_64BIT
     public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x8;
-    public const int SIZEOF__StackFrameIterator = 0x370;
-    public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x34a;
-    public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x360;
+    public const int SIZEOF__StackFrameIterator = 0x350;
+    public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x332;
+    public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x348;
 #else // TARGET_64BIT
     public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x4;
-    public const int SIZEOF__StackFrameIterator = 0x2d0;
-    public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x2ba;
-    public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x2c8;
+    public const int SIZEOF__StackFrameIterator = 0x2c0;
+    public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x2ae;
+    public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x2bc;
 #endif // TARGET_64BIT
 
 #endif // DEBUG
@@ -155,7 +155,7 @@ class AsmOffsets
     public const int OFFSETOF__ExInfo__m_kind = 0xd0;
     public const int OFFSETOF__ExInfo__m_passNumber = 0xd1;
     public const int OFFSETOF__ExInfo__m_idxCurClause = 0xd4;
-    public const int OFFSETOF__ExInfo__m_frameIter = 0xe0;
+    public const int OFFSETOF__ExInfo__m_frameIter = 0xd8;
     public const int OFFSETOF__ExInfo__m_notifyDebuggerSP = OFFSETOF__ExInfo__m_frameIter + SIZEOF__StackFrameIterator;
 #else // TARGET_64BIT
     public const int SIZEOF__EHEnum = 0x10;
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/InternalCalls.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/InternalCalls.cs
index 4ae608fc17d2..228f58c0ea4d 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/InternalCalls.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/InternalCalls.cs
@@ -42,7 +42,7 @@ internal static unsafe partial bool RhpCallFilterFunclet(
 
         [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "EHEnumInitFromStackFrameIterator")]
         [return: MarshalAs(UnmanagedType.Bool)]
-        internal static unsafe partial bool RhpEHEnumInitFromStackFrameIterator(ref StackFrameIterator pFrameIter, byte** pMethodStartAddress, void* pEHEnum);
+        internal static unsafe partial bool RhpEHEnumInitFromStackFrameIterator(ref StackFrameIterator pFrameIter, out EH.MethodRegionInfo pMethodRegionInfo, void* pEHEnum);
 
         [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "EHEnumNext")]
         [return: MarshalAs(UnmanagedType.Bool)]
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs
index c04665aa6c22..bbdccc6cd2ee 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs
@@ -266,7 +266,7 @@ public static unsafe void StructureToPtr(object structure, IntPtr ptr, bool fDel
             }
             else
             {
-                Buffer.Memmove(ref *(byte*)ptr, ref structure.GetRawData(), size);
+                SpanHelpers.Memmove(ref *(byte*)ptr, ref structure.GetRawData(), size);
             }
         }
 
@@ -291,7 +291,7 @@ private static unsafe void PtrToStructureHelper(IntPtr ptr, object structure, bo
             }
             else
             {
-                Buffer.Memmove(ref structure.GetRawData(), ref *(byte*)ptr, size);
+                SpanHelpers.Memmove(ref structure.GetRawData(), ref *(byte*)ptr, size);
             }
         }
 
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/Versioning/CompatibilitySwitch.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/Versioning/CompatibilitySwitch.cs
deleted file mode 100644
index d90f81d48e98..000000000000
--- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/Versioning/CompatibilitySwitch.cs
+++ /dev/null
@@ -1,13 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Runtime.CompilerServices;
-
-namespace System.Runtime.Versioning
-{
-    internal static class CompatibilitySwitch
-    {
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        internal static extern string? GetValueInternal(string compatibilitySwitchName);
-    }
-}
diff --git a/src/coreclr/System.Private.CoreLib/src/System/RuntimeHandles.cs b/src/coreclr/System.Private.CoreLib/src/System/RuntimeHandles.cs
index 73b9bb167f7a..001a9fcdfee6 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/RuntimeHandles.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/RuntimeHandles.cs
@@ -87,6 +87,11 @@ internal bool IsNullHandle()
             return m_type == null;
         }
 
+        internal TypeHandle GetNativeTypeHandle()
+        {
+            return m_type.GetNativeTypeHandle();
+        }
+
         internal static bool IsTypeDefinition(RuntimeType type)
         {
             CorElementType corElemType = GetCorElementType(type);
@@ -844,13 +849,6 @@ internal static partial Interop.BOOL IsCAVisibleFromDecoratedType(
             QCallTypeHandle sourceTypeHandle,
             QCallModule sourceModule);
 
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern IRuntimeMethodInfo? _GetCurrentMethod(ref StackCrawlMark stackMark);
-        internal static IRuntimeMethodInfo? GetCurrentMethod(ref StackCrawlMark stackMark)
-        {
-            return _GetCurrentMethod(ref stackMark);
-        }
-
         [MethodImpl(MethodImplOptions.InternalCall)]
         internal static extern MethodAttributes GetAttributes(RuntimeMethodHandleInternal method);
 
@@ -1087,6 +1085,7 @@ public RuntimeFieldInfoStub(RuntimeFieldHandleInternal fieldHandle, object keepa
         private object? m_d;
         private int m_b;
         private object? m_e;
+        private object? m_f;
         private RuntimeFieldHandleInternal m_fieldHandle;
 #pragma warning restore 414, 169, IDE0044
 
@@ -1189,17 +1188,26 @@ internal static RuntimeType GetApproxDeclaringType(IRuntimeFieldInfo field)
             return type;
         }
 
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        internal static extern bool IsFastPathSupported(RtFieldInfo field);
+
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        internal static extern int GetInstanceFieldOffset(RtFieldInfo field);
+
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        internal static extern IntPtr GetStaticFieldAddress(RtFieldInfo field);
+
         [MethodImpl(MethodImplOptions.InternalCall)]
         internal static extern int GetToken(RtFieldInfo field);
 
         [MethodImpl(MethodImplOptions.InternalCall)]
-        internal static extern object? GetValue(RtFieldInfo field, object? instance, RuntimeType fieldType, RuntimeType? declaringType, ref bool domainInitialized);
+        internal static extern object? GetValue(RtFieldInfo field, object? instance, RuntimeType fieldType, RuntimeType? declaringType, ref bool isClassInitialized);
 
         [MethodImpl(MethodImplOptions.InternalCall)]
         internal static extern object? GetValueDirect(RtFieldInfo field, RuntimeType fieldType, void* pTypedRef, RuntimeType? contextType);
 
         [MethodImpl(MethodImplOptions.InternalCall)]
-        internal static extern void SetValue(RtFieldInfo field, object? obj, object? value, RuntimeType fieldType, FieldAttributes fieldAttr, RuntimeType? declaringType, ref bool domainInitialized);
+        internal static extern void SetValue(RtFieldInfo field, object? obj, object? value, RuntimeType fieldType, RuntimeType? declaringType, ref bool isClassInitialized);
 
         [MethodImpl(MethodImplOptions.InternalCall)]
         internal static extern void SetValueDirect(RtFieldInfo field, RuntimeType fieldType, void* pTypedRef, object? value, RuntimeType? contextType);
diff --git a/src/coreclr/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs
index 1329f1f7140b..6a1fae9e05c7 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs
@@ -1438,7 +1438,6 @@ internal T[] GetMemberList(MemberListType listType, string? name, CacheType cach
             private string? m_toString;
             private string? m_namespace;
             private readonly bool m_isGlobal;
-            private bool m_bIsDomainInitialized;
             private MemberInfoCache<RuntimeMethodInfo>? m_methodInfoCache;
             private MemberInfoCache<RuntimeConstructorInfo>? m_constructorInfoCache;
             private MemberInfoCache<RuntimeFieldInfo>? m_fieldInfoCache;
@@ -1523,12 +1522,6 @@ internal Type[] FunctionPointerReturnAndParameterTypes
                 }
             }
 
-            internal bool DomainInitialized
-            {
-                get => m_bIsDomainInitialized;
-                set => m_bIsDomainInitialized = value;
-            }
-
             internal string? GetName(TypeNameKind kind)
             {
                 switch (kind)
@@ -1935,12 +1928,6 @@ internal object? GenericCache
             set => Cache.GenericCache = value;
         }
 
-        internal bool DomainInitialized
-        {
-            get => Cache.DomainInitialized;
-            set => Cache.DomainInitialized = value;
-        }
-
         internal static FieldInfo GetFieldInfo(IRuntimeFieldInfo fieldHandle)
         {
             return GetFieldInfo(RuntimeFieldHandle.GetApproxDeclaringType(fieldHandle), fieldHandle);
@@ -2386,7 +2373,9 @@ private static bool FilterApplyMethodBase(
         #region Private Data Members
 
 #pragma warning disable CA1823
+#pragma warning disable CS0169
         private readonly object m_keepalive; // This will be filled with a LoaderAllocator reference when this RuntimeType represents a collectible type
+#pragma warning restore CS0169
 #pragma warning restore CA1823
         private IntPtr m_cache;
         internal IntPtr m_handle;
@@ -2770,7 +2759,12 @@ public override InterfaceMapping GetInterfaceMap([DynamicallyAccessedMembers(Dyn
                 MethodBase? rtTypeMethodBase = GetMethodBase(reflectedType, classRtMethodHandle);
                 // a class may not implement all the methods of an interface (abstract class) so null is a valid value
                 Debug.Assert(rtTypeMethodBase is null || rtTypeMethodBase is RuntimeMethodInfo);
-                im.TargetMethods[i] = (MethodInfo)rtTypeMethodBase!;
+                RuntimeMethodInfo? targetMethod = (RuntimeMethodInfo?)rtTypeMethodBase;
+                // the TargetMethod provided to us by runtime internals may be a generic method instance,
+                //  potentially with invalid arguments. TargetMethods in the InterfaceMap should never be
+                //  instances, only definitions.
+                im.TargetMethods[i] = (targetMethod is { IsGenericMethod: true, IsGenericMethodDefinition: false })
+                    ? targetMethod.GetGenericMethodDefinition() : targetMethod!;
             }
 
             return im;
diff --git a/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs
index f15ad03d8218..d19cb01034a7 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs
@@ -23,13 +23,12 @@ public static string Intern(string str)
         }
 
         [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "String_IsInterned")]
-        [return: MarshalAs(UnmanagedType.Bool)]
         private static partial void IsInterned(StringHandleOnStack src);
 
         public static string? IsInterned(string str)
         {
             ArgumentNullException.ThrowIfNull(str);
-            Intern(new StringHandleOnStack(ref str!));
+            IsInterned(new StringHandleOnStack(ref str!));
             return str;
         }
 
@@ -39,7 +38,7 @@ internal static unsafe void InternalCopy(string src, IntPtr dest, int len)
         {
             if (len != 0)
             {
-                Buffer.Memmove(ref *(byte*)dest, ref Unsafe.As<char, byte>(ref src.GetRawStringData()), (nuint)len);
+                SpanHelpers.Memmove(ref *(byte*)dest, ref Unsafe.As<char, byte>(ref src.GetRawStringData()), (nuint)len);
             }
         }
 
diff --git a/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs b/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs
index 9874eef6dc22..e772613855c7 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs
@@ -103,7 +103,7 @@ internal static unsafe IntPtr ConvertToNative(int flags, string strManaged, IntP
                     // + 1 for the null character from the user.  + 1 for the null character we put in.
                     pbNativeBuffer = (byte*)Marshal.AllocCoTaskMem(nb + 2);
 
-                    Buffer.Memmove(ref *pbNativeBuffer, ref MemoryMarshal.GetArrayDataReference(bytes), (nuint)nb);
+                    SpanHelpers.Memmove(ref *pbNativeBuffer, ref MemoryMarshal.GetArrayDataReference(bytes), (nuint)nb);
                 }
             }
 
@@ -360,7 +360,7 @@ internal static unsafe IntPtr ConvertToNative(string strManaged, bool fBestFit,
 
                 Debug.Assert(nbytesused >= 0 && nbytesused < nbytes, "Insufficient buffer allocated in VBByValStrMarshaler.ConvertToNative");
 
-                Buffer.Memmove(ref *pNative, ref MemoryMarshal.GetArrayDataReference(bytes), (nuint)nbytesused);
+                SpanHelpers.Memmove(ref *pNative, ref MemoryMarshal.GetArrayDataReference(bytes), (nuint)nbytesused);
 
                 pNative[nbytesused] = 0;
                 *pLength = nbytesused;
@@ -409,7 +409,7 @@ internal static unsafe IntPtr ConvertToNative(int flags, string strManaged)
             IntPtr bstr = Marshal.AllocBSTRByteLen(length);
             if (bytes != null)
             {
-                Buffer.Memmove(ref *(byte*)bstr, ref MemoryMarshal.GetArrayDataReference(bytes), length);
+                SpanHelpers.Memmove(ref *(byte*)bstr, ref MemoryMarshal.GetArrayDataReference(bytes), length);
             }
 
             return bstr;
@@ -1315,6 +1315,75 @@ public IntPtr AddRef()
         }
     }  // class CleanupWorkListElement
 
+    internal unsafe struct CopyConstructorCookie
+    {
+        private void* m_source;
+
+        private nuint m_destinationOffset;
+
+        public delegate*<void*, void*, void> m_copyConstructor;
+
+        public delegate*<void*, void> m_destructor;
+
+        public CopyConstructorCookie* m_next;
+
+        [StackTraceHidden]
+        public void ExecuteCopy(void* destinationBase)
+        {
+            if (m_copyConstructor != null)
+            {
+                m_copyConstructor((byte*)destinationBase + m_destinationOffset, m_source);
+            }
+
+            if (m_destructor != null)
+            {
+                m_destructor(m_source);
+            }
+        }
+    }
+
+    internal unsafe struct CopyConstructorChain
+    {
+        public void* m_realTarget;
+        public CopyConstructorCookie* m_head;
+
+        public void Add(CopyConstructorCookie* cookie)
+        {
+            cookie->m_next = m_head;
+            m_head = cookie;
+        }
+
+        [ThreadStatic]
+        private static CopyConstructorChain s_copyConstructorChain;
+
+        public void Install(void* realTarget)
+        {
+            m_realTarget = realTarget;
+            s_copyConstructorChain = this;
+        }
+
+        [StackTraceHidden]
+        private void ExecuteCopies(void* destinationBase)
+        {
+            for (CopyConstructorCookie* current = m_head; current != null; current = current->m_next)
+            {
+                current->ExecuteCopy(destinationBase);
+            }
+        }
+
+        [UnmanagedCallersOnly]
+        [StackTraceHidden]
+        public static void* ExecuteCurrentCopiesAndGetTarget(void* destinationBase)
+        {
+            void* target = s_copyConstructorChain.m_realTarget;
+            s_copyConstructorChain.ExecuteCopies(destinationBase);
+            // Reset this instance to ensure we don't accidentally execute the copies again.
+            // All of the pointers point to the stack, so we don't need to free any memory.
+            s_copyConstructorChain = default;
+            return target;
+        }
+    }
+
     internal static partial class StubHelpers
     {
         [MethodImpl(MethodImplOptions.InternalCall)]
@@ -1484,7 +1553,7 @@ internal static unsafe void FmtClassUpdateNativeInternal(object obj, byte* pNati
             }
             else
             {
-                Buffer.Memmove(ref *pNative, ref obj.GetRawData(), size);
+                SpanHelpers.Memmove(ref *pNative, ref obj.GetRawData(), size);
             }
         }
 
@@ -1503,7 +1572,7 @@ internal static unsafe void FmtClassUpdateCLRInternal(object obj, byte* pNative)
             }
             else
             {
-                Buffer.Memmove(ref obj.GetRawData(), ref *pNative, size);
+                SpanHelpers.Memmove(ref obj.GetRawData(), ref *pNative, size);
             }
         }
 
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs
index e2d0a033b5d0..93df3bdfed9f 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs
@@ -42,48 +42,6 @@ public static long Decrement(ref long location) =>
         #endregion
 
         #region Exchange
-        /// <summary>Sets a 8-bit unsigned integer to a specified value and returns the original value, as an atomic operation.</summary>
-        /// <param name="location1">The variable to set to the specified value.</param>
-        /// <param name="value">The value to which the <paramref name="location1"/> parameter is set.</param>
-        /// <returns>The original value of <paramref name="location1"/>.</returns>
-        /// <exception cref="NullReferenceException">The address of location1 is a null pointer.</exception>
-        [Intrinsic]
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static byte Exchange(ref byte location1, byte value)
-        {
-#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
-            return Exchange(ref location1, value); // Must expand intrinsic
-#else
-            if (Unsafe.IsNullRef(ref location1))
-                ThrowHelper.ThrowNullReferenceException();
-            return Exchange8(ref location1, value);
-#endif
-        }
-
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern byte Exchange8(ref byte location1, byte value);
-
-        /// <summary>Sets a 16-bit signed integer to a specified value and returns the original value, as an atomic operation.</summary>
-        /// <param name="location1">The variable to set to the specified value.</param>
-        /// <param name="value">The value to which the <paramref name="location1"/> parameter is set.</param>
-        /// <returns>The original value of <paramref name="location1"/>.</returns>
-        /// <exception cref="NullReferenceException">The address of location1 is a null pointer.</exception>
-        [Intrinsic]
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static short Exchange(ref short location1, short value)
-        {
-#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
-            return Exchange(ref location1, value); // Must expand intrinsic
-#else
-            if (Unsafe.IsNullRef(ref location1))
-                ThrowHelper.ThrowNullReferenceException();
-            return Exchange16(ref location1, value);
-#endif
-        }
-
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern short Exchange16(ref short location1, short value);
-
         /// <summary>Sets a 32-bit signed integer to a specified value and returns the original value, as an atomic operation.</summary>
         /// <param name="location1">The variable to set to the specified value.</param>
         /// <param name="value">The value to which the <paramref name="location1"/> parameter is set.</param>
@@ -162,50 +120,6 @@ public static T Exchange<T>([NotNullIfNotNull(nameof(value))] ref T location1, T
 #endregion
 
         #region CompareExchange
-        /// <summary>Compares two 8-bit unsigned integers for equality and, if they are equal, replaces the first value.</summary>
-        /// <param name="location1">The destination, whose value is compared with <paramref name="comparand"/> and possibly replaced.</param>
-        /// <param name="value">The value that replaces the destination value if the comparison results in equality.</param>
-        /// <param name="comparand">The value that is compared to the value at <paramref name="location1"/>.</param>
-        /// <returns>The original value in <paramref name="location1"/>.</returns>
-        /// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
-        [Intrinsic]
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static byte CompareExchange(ref byte location1, byte value, byte comparand)
-        {
-#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
-            return CompareExchange(ref location1, value, comparand); // Must expand intrinsic
-#else
-            if (Unsafe.IsNullRef(ref location1))
-                ThrowHelper.ThrowNullReferenceException();
-            return CompareExchange8(ref location1, value, comparand);
-#endif
-        }
-
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern byte CompareExchange8(ref byte location1, byte value, byte comparand);
-
-        /// <summary>Compares two 16-bit signed integers for equality and, if they are equal, replaces the first value.</summary>
-        /// <param name="location1">The destination, whose value is compared with <paramref name="comparand"/> and possibly replaced.</param>
-        /// <param name="value">The value that replaces the destination value if the comparison results in equality.</param>
-        /// <param name="comparand">The value that is compared to the value at <paramref name="location1"/>.</param>
-        /// <returns>The original value in <paramref name="location1"/>.</returns>
-        /// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
-        [Intrinsic]
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static short CompareExchange(ref short location1, short value, short comparand)
-        {
-#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
-            return CompareExchange(ref location1, value, comparand); // Must expand intrinsic
-#else
-            if (Unsafe.IsNullRef(ref location1))
-                ThrowHelper.ThrowNullReferenceException();
-            return CompareExchange16(ref location1, value, comparand);
-#endif
-        }
-
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern short CompareExchange16(ref short location1, short value, short comparand);
-
         /// <summary>Compares two 32-bit signed integers for equality and, if they are equal, replaces the first value.</summary>
         /// <param name="location1">The destination, whose value is compared with <paramref name="comparand"/> and possibly replaced.</param>
         /// <param name="value">The value that replaces the destination value if the comparison results in equality.</param>
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Unix.cs b/src/coreclr/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Unix.cs
index 677cfb171832..8fe80e728685 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Unix.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Unix.cs
@@ -11,7 +11,7 @@ namespace System.Threading
     /// <summary>
     /// A LIFO semaphore implemented using the PAL's semaphore with uninterruptible waits.
     /// </summary>
-    internal sealed partial class LowLevelLifoSemaphore : LowLevelLifoSemaphoreBase, IDisposable
+    internal sealed partial class LowLevelLifoSemaphore : IDisposable
     {
         private Semaphore? _semaphore;
 
@@ -34,7 +34,7 @@ public bool WaitCore(int timeoutMs)
         [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "WaitHandle_CorWaitOnePrioritizedNative")]
         private static partial int WaitNative(SafeWaitHandle handle, int timeoutMs);
 
-        protected override void ReleaseCore(int count)
+        private void ReleaseCore(int count)
         {
             Debug.Assert(_semaphore != null);
             Debug.Assert(count > 0);
diff --git a/src/coreclr/System.Private.CoreLib/src/System/ValueType.cs b/src/coreclr/System.Private.CoreLib/src/System/ValueType.cs
index cc13e37e083f..f4c3acb31adf 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/ValueType.cs
+++ b/src/coreclr/System.Private.CoreLib/src/System/ValueType.cs
@@ -10,15 +10,17 @@
 **
 ===========================================================*/
 
+using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Reflection;
 using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 
 namespace System
 {
     [Serializable]
     [TypeForwardedFrom("mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089")]
-    public abstract class ValueType
+    public abstract partial class ValueType
     {
         [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2075:UnrecognizedReflectionPattern",
             Justification = "Trimmed fields don't make a difference for equality")]
@@ -36,7 +38,7 @@ public override unsafe bool Equals([NotNullWhen(true)] object? obj)
 
             // if there are no GC references in this object we can avoid reflection
             // and do a fast memcmp
-            if (CanCompareBits(this))
+            if (CanCompareBitsOrUseFastGetHashCode(RuntimeHelpers.GetMethodTable(obj))) // MethodTable kept alive by access to object below
             {
                 return SpanHelpers.SequenceEqual(
                     ref RuntimeHelpers.GetRawData(this),
@@ -66,8 +68,23 @@ ref RuntimeHelpers.GetRawData(obj),
             return true;
         }
 
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern bool CanCompareBits(object obj);
+        // Return true if the valuetype does not contain pointer, is tightly packed,
+        // does not have floating point number field and does not override Equals method.
+        private static unsafe bool CanCompareBitsOrUseFastGetHashCode(MethodTable* pMT)
+        {
+            MethodTableAuxiliaryData* pAuxData = pMT->AuxiliaryData;
+
+            if (pAuxData->HasCheckedCanCompareBitsOrUseFastGetHashCode)
+            {
+                return pAuxData->CanCompareBitsOrUseFastGetHashCode;
+            }
+
+            return CanCompareBitsOrUseFastGetHashCodeHelper(pMT);
+        }
+
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "MethodTable_CanCompareBitsOrUseFastGetHashCode")]
+        [return: MarshalAs(UnmanagedType.Bool)]
+        private static unsafe partial bool CanCompareBitsOrUseFastGetHashCodeHelper(MethodTable* pMT);
 
         /*=================================GetHashCode==================================
         **Action: Our algorithm for returning the hashcode is a little bit complex.  We look
@@ -79,8 +96,74 @@ ref RuntimeHelpers.GetRawData(obj),
         **Arguments: None.
         **Exceptions: None.
         ==============================================================================*/
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        public extern override int GetHashCode();
+        public override unsafe int GetHashCode()
+        {
+            // The default implementation of GetHashCode() for all value types.
+            // Note that this implementation reveals the value of the fields.
+            // So if the value type contains any sensitive information it should
+            // implement its own GetHashCode().
+
+            MethodTable* pMT = RuntimeHelpers.GetMethodTable(this);
+            ref byte rawData = ref RuntimeHelpers.GetRawData(this);
+            HashCode hashCode = default;
+
+            // To get less colliding and more evenly distributed hash codes,
+            // we munge the class index into the hashcode
+            hashCode.Add((IntPtr)pMT);
+
+            if (CanCompareBitsOrUseFastGetHashCode(pMT))
+            {
+                // this is a struct with no refs and no "strange" offsets
+                uint size = pMT->GetNumInstanceFieldBytes();
+                hashCode.AddBytes(MemoryMarshal.CreateReadOnlySpan(ref rawData, (int)size));
+            }
+            else
+            {
+                object thisRef = this;
+                switch (GetHashCodeStrategy(pMT, ObjectHandleOnStack.Create(ref thisRef), out uint fieldOffset, out uint fieldSize, out MethodTable* fieldMT))
+                {
+                    case ValueTypeHashCodeStrategy.ReferenceField:
+                        hashCode.Add(Unsafe.As<byte, object>(ref Unsafe.AddByteOffset(ref rawData, fieldOffset)).GetHashCode());
+                        break;
+
+                    case ValueTypeHashCodeStrategy.DoubleField:
+                        hashCode.Add(Unsafe.As<byte, double>(ref Unsafe.AddByteOffset(ref rawData, fieldOffset)).GetHashCode());
+                        break;
+
+                    case ValueTypeHashCodeStrategy.SingleField:
+                        hashCode.Add(Unsafe.As<byte, float>(ref Unsafe.AddByteOffset(ref rawData, fieldOffset)).GetHashCode());
+                        break;
+
+                    case ValueTypeHashCodeStrategy.FastGetHashCode:
+                        Debug.Assert(fieldSize != 0);
+                        hashCode.AddBytes(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AddByteOffset(ref rawData, fieldOffset), (int)fieldSize));
+                        break;
+
+                    case ValueTypeHashCodeStrategy.ValueTypeOverride:
+                        Debug.Assert(fieldMT != null);
+                        // Box the field to handle complicated cases like mutable method and shared generic
+                        hashCode.Add(RuntimeHelpers.Box(fieldMT, ref Unsafe.AddByteOffset(ref rawData, fieldOffset))?.GetHashCode() ?? 0);
+                        break;
+                }
+            }
+
+            return hashCode.ToHashCode();
+        }
+
+        // Must match the definition in src\vm\comutilnative.cpp
+        private enum ValueTypeHashCodeStrategy
+        {
+            None,
+            ReferenceField,
+            DoubleField,
+            SingleField,
+            FastGetHashCode,
+            ValueTypeOverride,
+        }
+
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "ValueType_GetHashCodeStrategy")]
+        private static unsafe partial ValueTypeHashCodeStrategy GetHashCodeStrategy(
+            MethodTable* pMT, ObjectHandleOnStack objHandle, out uint fieldOffset, out uint fieldSize, out MethodTable* fieldMT);
 
         public override string? ToString()
         {
diff --git a/src/coreclr/binder/assemblyname.cpp b/src/coreclr/binder/assemblyname.cpp
index 9eea2ee8ba73..0c96f6be47ec 100644
--- a/src/coreclr/binder/assemblyname.cpp
+++ b/src/coreclr/binder/assemblyname.cpp
@@ -11,10 +11,10 @@
 //
 // ============================================================
 
+#include "common.h"
 #include "assemblyname.hpp"
 #include "assemblybindercommon.hpp"
 
-#include "common.h"
 #include "utils.hpp"
 
 #include "textualidentityparser.hpp"
diff --git a/src/coreclr/classlibnative/bcltype/objectnative.cpp b/src/coreclr/classlibnative/bcltype/objectnative.cpp
index 4622955b44ad..afbda5fad991 100644
--- a/src/coreclr/classlibnative/bcltype/objectnative.cpp
+++ b/src/coreclr/classlibnative/bcltype/objectnative.cpp
@@ -123,48 +123,22 @@ FCIMPL1(INT32, ObjectNative::TryGetHashCode, Object* obj) {
 }
 FCIMPLEND
 
-//
-// Compare by ref for normal classes, by value for value types.
-//
-// <TODO>@todo: it would be nice to customize this method based on the
-// defining class rather than doing a runtime check whether it is
-// a value type.</TODO>
-//
-
-FCIMPL2(FC_BOOL_RET, ObjectNative::Equals, Object *pThisRef, Object *pCompareRef)
+FCIMPL2(FC_BOOL_RET, ObjectNative::ContentEquals, Object *pThisRef, Object *pCompareRef)
 {
-    CONTRACTL
-    {
-        FCALL_CHECK;
-        INJECT_FAULT(FCThrow(kOutOfMemoryException););
-    }
-    CONTRACTL_END;
-
-    if (pThisRef == pCompareRef)
-        FC_RETURN_BOOL(TRUE);
+    FCALL_CONTRACT;
 
-    // Since we are in FCALL, we must handle NULL specially.
-    if (pThisRef == NULL || pCompareRef == NULL)
-        FC_RETURN_BOOL(FALSE);
+    // Should be ensured by caller
+    _ASSERTE(pThisRef != NULL);
+    _ASSERTE(pCompareRef != NULL);
+    _ASSERTE(pThisRef->GetMethodTable() == pCompareRef->GetMethodTable());
 
     MethodTable *pThisMT = pThisRef->GetMethodTable();
 
-    // If it's not a value class, don't compare by value
-    if (!pThisMT->IsValueType())
-        FC_RETURN_BOOL(FALSE);
-
-    // Make sure they are the same type.
-    if (pThisMT != pCompareRef->GetMethodTable())
-        FC_RETURN_BOOL(FALSE);
-
-    // Compare the contents (size - vtable - sync block index).
-    DWORD dwBaseSize = pThisMT->GetBaseSize();
-    if(pThisMT == g_pStringClass)
-        dwBaseSize -= sizeof(WCHAR);
+    // Compare the contents
     BOOL ret = memcmp(
-        (void *) (pThisRef+1),
-        (void *) (pCompareRef+1),
-        dwBaseSize - sizeof(Object) - sizeof(int)) == 0;
+        pThisRef->GetData(),
+        pCompareRef->GetData(),
+        pThisMT->GetNumInstanceFieldBytes()) == 0;
 
     FC_GC_POLL_RET();
 
@@ -215,36 +189,34 @@ FCIMPL1(Object*, ObjectNative::GetClass, Object* pThis)
 }
 FCIMPLEND
 
-FCIMPL1(Object*, ObjectNative::AllocateUninitializedClone, Object* pObjUNSAFE)
+extern "C" void QCALLTYPE ObjectNative_AllocateUninitializedClone(QCall::ObjectHandleOnStack objHandle)
 {
-    FCALL_CONTRACT;
-
-    // Delegate error handling to managed side (it will throw NullReferenceException)
-    if (pObjUNSAFE == NULL)
-        return NULL;
+    QCALL_CONTRACT;
 
-    OBJECTREF refClone  = ObjectToOBJECTREF(pObjUNSAFE);
+    BEGIN_QCALL;
 
-    HELPER_METHOD_FRAME_BEGIN_RET_1(refClone);
+    GCX_COOP();
 
+    OBJECTREF refClone = objHandle.Get();
+    _ASSERTE(refClone != NULL); // Should be handled at managed side
     MethodTable* pMT = refClone->GetMethodTable();
-
+    
     // assert that String has overloaded the Clone() method
     _ASSERTE(pMT != g_pStringClass);
-
-    if (pMT->IsArray()) {
-        refClone = DupArrayForCloning((BASEARRAYREF)refClone);
-    } else {
+    
+    if (pMT->IsArray())
+    {
+        objHandle.Set(DupArrayForCloning((BASEARRAYREF)refClone));
+    }
+    else
+    {
         // We don't need to call the <cinit> because we know
         //  that it has been called....(It was called before this was created)
-        refClone = AllocateObject(pMT);
+        objHandle.Set(AllocateObject(pMT));
     }
 
-    HELPER_METHOD_FRAME_END();
-
-    return OBJECTREFToObject(refClone);
+    END_QCALL;
 }
-FCIMPLEND
 
 extern "C" BOOL QCALLTYPE Monitor_Wait(QCall::ObjectHandleOnStack pThis, INT32 Timeout)
 {
diff --git a/src/coreclr/classlibnative/bcltype/objectnative.h b/src/coreclr/classlibnative/bcltype/objectnative.h
index d8948922dd0b..418fd2561d7c 100644
--- a/src/coreclr/classlibnative/bcltype/objectnative.h
+++ b/src/coreclr/classlibnative/bcltype/objectnative.h
@@ -27,12 +27,12 @@ class ObjectNative
 
     static FCDECL1(INT32, GetHashCode, Object* vThisRef);
     static FCDECL1(INT32, TryGetHashCode, Object* vThisRef);
-    static FCDECL2(FC_BOOL_RET, Equals, Object *pThisRef, Object *pCompareRef);
-    static FCDECL1(Object*, AllocateUninitializedClone, Object* pObjUNSAFE);
+    static FCDECL2(FC_BOOL_RET, ContentEquals, Object *pThisRef, Object *pCompareRef);
     static FCDECL1(Object*, GetClass, Object* pThis);
     static FCDECL1(FC_BOOL_RET, IsLockHeld, Object* pThisUNSAFE);
 };
 
+extern "C" void QCALLTYPE ObjectNative_AllocateUninitializedClone(QCall::ObjectHandleOnStack objHandle);
 extern "C" BOOL QCALLTYPE Monitor_Wait(QCall::ObjectHandleOnStack pThis, INT32 Timeout);
 extern "C" void QCALLTYPE Monitor_Pulse(QCall::ObjectHandleOnStack pThis);
 extern "C" void QCALLTYPE Monitor_PulseAll(QCall::ObjectHandleOnStack pThis);
diff --git a/src/coreclr/classlibnative/bcltype/system.cpp b/src/coreclr/classlibnative/bcltype/system.cpp
index ef02743b3669..5d2f00cd849d 100644
--- a/src/coreclr/classlibnative/bcltype/system.cpp
+++ b/src/coreclr/classlibnative/bcltype/system.cpp
@@ -133,124 +133,67 @@ extern "C" INT32 QCALLTYPE Environment_GetProcessorCount()
     return processorCount;
 }
 
-// FailFast is supported in BCL.small as internal to support failing fast in places where EEE used to be thrown.
-//
-// Static message buffer used by SystemNative::FailFast to avoid reliance on a
-// managed string object buffer. This buffer is not always used, see comments in
-// the method below.
-WCHAR g_szFailFastBuffer[256];
-WCHAR *g_pFailFastBuffer = g_szFailFastBuffer;
-
-#define FAIL_FAST_STATIC_BUFFER_LENGTH (sizeof(g_szFailFastBuffer) / sizeof(WCHAR))
-
-// This is the common code for FailFast processing that is wrapped by the two
-// FailFast FCalls below.
-void SystemNative::GenericFailFast(STRINGREF refMesgString, EXCEPTIONREF refExceptionForWatsonBucketing, UINT_PTR retAddress, UINT exitCode, STRINGREF refErrorSourceString)
-{
+struct FindFailFastCallerStruct {
+    StackCrawlMark* pStackMark;
+    UINT_PTR        retAddress;
+};
+
+// This method is called by the GetMethod function and will crawl backward
+//  up the stack for integer methods.
+static StackWalkAction FindFailFastCallerCallback(CrawlFrame* frame, VOID* data) {
     CONTRACTL
     {
-        THROWS;
-        GC_TRIGGERS;
-        MODE_COOPERATIVE;
-    }CONTRACTL_END;
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    }
+    CONTRACTL_END;
+
+    FindFailFastCallerStruct* pFindCaller = (FindFailFastCallerStruct*) data;
+
+    // The check here is between the address of a local variable
+    // (the stack mark) and a pointer to the EIP for a frame
+    // (which is actually the pointer to the return address to the
+    // function from the previous frame). So we'll actually notice
+    // which frame the stack mark was in one frame later. This is
+    // fine since we only implement LookForMyCaller.
+    _ASSERTE(*pFindCaller->pStackMark == LookForMyCaller);
+    if (!frame->IsInCalleesFrames(pFindCaller->pStackMark))
+        return SWA_CONTINUE;
+
+    pFindCaller->retAddress = GetControlPC(frame->GetRegisterSet());
+    return SWA_ABORT;
+}
 
-    struct
-    {
-        STRINGREF refMesgString;
-        EXCEPTIONREF refExceptionForWatsonBucketing;
-        STRINGREF refErrorSourceString;
-    } gc;
-    gc.refMesgString = refMesgString;
-    gc.refExceptionForWatsonBucketing = refExceptionForWatsonBucketing;
-    gc.refErrorSourceString = refErrorSourceString;
-
-    GCPROTECT_BEGIN(gc);
-
-    // Managed code injected FailFast maps onto the unmanaged version
-    // (EEPolicy::HandleFatalError) in the following manner: the exit code is
-    // always set to COR_E_FAILFAST and the address passed (usually a failing
-    // EIP) is in fact the address of a unicode message buffer (explaining the
-    // reason for the fault).
-    // The message string comes from a managed string object so we can't rely on
-    // the buffer remaining in place below our feet. But equally we don't want
-    // to inject failure points (by, for example, allocating a heap buffer or a
-    // pinning handle) when we have a much higher chance than usual of actually
-    // tripping those failure points and eradicating useful debugging info.
-    // We employ various strategies to deal with this:
-    //   o  If the message is small enough we copy it into a static buffer
-    //      (g_szFailFastBuffer).
-    //   o  Otherwise we try to allocate a buffer of the required size on the
-    //      heap. This buffer will be leaked.
-    //   o  If the allocation above fails we return to the static buffer and
-    //      truncate the message.
-    //
-    // Another option would seem to be to implement a new frame type that
-    // protects object references as pinned, but that seems like overkill for
-    // just this problem.
-    WCHAR  *pszMessageBuffer = NULL;
-    DWORD   cchMessage = (gc.refMesgString == NULL) ? 0 : gc.refMesgString->GetStringLength();
-
-    WCHAR * errorSourceString = NULL;
-
-    if (gc.refErrorSourceString != NULL)
-    {
-        DWORD cchErrorSource = gc.refErrorSourceString->GetStringLength();
-        errorSourceString = new (nothrow) WCHAR[cchErrorSource + 1];
+extern "C" void QCALLTYPE Environment_FailFast(QCall::StackCrawlMarkHandle mark, PCWSTR message, QCall::ObjectHandleOnStack exception, PCWSTR errorSource)
+{
+    QCALL_CONTRACT;
 
-        if (errorSourceString != NULL)
-        {
-            memcpyNoGCRefs(errorSourceString, gc.refErrorSourceString->GetBuffer(), cchErrorSource * sizeof(WCHAR));
-            errorSourceString[cchErrorSource] = W('\0');
-        }
-    }
+    BEGIN_QCALL;
 
-    if (cchMessage < FAIL_FAST_STATIC_BUFFER_LENGTH)
-    {
-        // The static buffer can be used only once to avoid race condition with other threads
-        pszMessageBuffer = InterlockedExchangeT(&g_pFailFastBuffer, NULL);
-    }
+    GCX_COOP();
 
-    if (pszMessageBuffer == NULL)
-    {
-        // We can fail here, but we can handle the fault.
-        CONTRACT_VIOLATION(FaultViolation);
-        pszMessageBuffer = new (nothrow) WCHAR[cchMessage + 1];
-        if (pszMessageBuffer == NULL)
-        {
-            // Truncate the message to what will fit in the static buffer.
-            cchMessage = FAIL_FAST_STATIC_BUFFER_LENGTH - 1;
-            pszMessageBuffer = InterlockedExchangeT(&g_pFailFastBuffer, NULL);
-        }
-    }
+    FindFailFastCallerStruct findCallerData;
+    findCallerData.pStackMark = mark;
+    findCallerData.retAddress = 0;
+    GetThread()->StackWalkFrames(FindFailFastCallerCallback, &findCallerData, FUNCTIONSONLY | QUICKUNWIND);
 
-    const WCHAR *pszMessage;
-    if (pszMessageBuffer != NULL)
+    if (message == NULL || message[0] == W('\0'))
     {
-        if (cchMessage > 0)
-            memcpyNoGCRefs(pszMessageBuffer, gc.refMesgString->GetBuffer(), cchMessage * sizeof(WCHAR));
-        pszMessageBuffer[cchMessage] = W('\0');
-        pszMessage = pszMessageBuffer;
+        WszOutputDebugString(W("CLR: Managed code called FailFast without specifying a reason.\r\n"));
     }
     else
     {
-        pszMessage = W("There is not enough memory to print the supplied FailFast message.");
-        cchMessage = (DWORD)u16_strlen(pszMessage);
-    }
-
-    if (cchMessage == 0) {
-        WszOutputDebugString(W("CLR: Managed code called FailFast without specifying a reason.\r\n"));
-    }
-    else {
         WszOutputDebugString(W("CLR: Managed code called FailFast.\r\n"));
-        WszOutputDebugString(pszMessage);
+        WszOutputDebugString(message);
         WszOutputDebugString(W("\r\n"));
     }
 
     LPCWSTR argExceptionString = NULL;
     StackSString msg;
-    if (gc.refExceptionForWatsonBucketing != NULL)
+    if (exception.Get() != NULL)
     {
-        GetExceptionMessage(gc.refExceptionForWatsonBucketing, msg);
+        GetExceptionMessage(exception.Get(), msg);
         argExceptionString = msg.GetUnicode();
     }
 
@@ -263,11 +206,11 @@ void SystemNative::GenericFailFast(STRINGREF refMesgString, EXCEPTIONREF refExce
     // skip this, if required.
     if (IsWatsonEnabled())
     {
-        if ((gc.refExceptionForWatsonBucketing == NULL) || !SetupWatsonBucketsForFailFast(gc.refExceptionForWatsonBucketing))
+        if ((exception.Get() == NULL) || !SetupWatsonBucketsForFailFast((EXCEPTIONREF)exception.Get()))
         {
             PTR_EHWatsonBucketTracker pUEWatsonBucketTracker = pThread->GetExceptionState()->GetUEWatsonBucketTracker();
             _ASSERTE(pUEWatsonBucketTracker != NULL);
-            pUEWatsonBucketTracker->SaveIpForWatsonBucket(retAddress);
+            pUEWatsonBucketTracker->SaveIpForWatsonBucket(findCallerData.retAddress);
             pUEWatsonBucketTracker->CaptureUnhandledInfoForWatson(TypeOfReportedError::FatalError, pThread, NULL);
             if (pUEWatsonBucketTracker->RetrieveWatsonBuckets() == NULL)
             {
@@ -279,90 +222,13 @@ void SystemNative::GenericFailFast(STRINGREF refMesgString, EXCEPTIONREF refExce
 
     // stash the user-provided exception object. this will be used as
     // the inner exception object to the FatalExecutionEngineException.
-    if (gc.refExceptionForWatsonBucketing != NULL)
-        pThread->SetLastThrownObject(gc.refExceptionForWatsonBucketing);
-
-    EEPolicy::HandleFatalError(exitCode, retAddress, pszMessage, NULL, errorSourceString, argExceptionString);
-
-    GCPROTECT_END();
-}
-
-// Note: Do not merge this FCALL method with any other FailFast overloads.
-// Watson uses the managed FailFast method with one String for crash dump bucketization.
-FCIMPL1(VOID, SystemNative::FailFast, StringObject* refMessageUNSAFE)
-{
-    FCALL_CONTRACT;
-
-    STRINGREF refMessage = (STRINGREF)refMessageUNSAFE;
-
-    HELPER_METHOD_FRAME_BEGIN_1(refMessage);
-
-    // The HelperMethodFrame knows how to get the return address.
-    UINT_PTR retaddr = HELPER_METHOD_FRAME_GET_RETURN_ADDRESS();
-
-    // Call the actual worker to perform failfast
-    GenericFailFast(refMessage, NULL, retaddr, COR_E_FAILFAST, NULL);
+    if (exception.Get() != NULL)
+        pThread->SetLastThrownObject(exception.Get());
 
-    HELPER_METHOD_FRAME_END();
-}
-FCIMPLEND
-
-FCIMPL2(VOID, SystemNative::FailFastWithExitCode, StringObject* refMessageUNSAFE, UINT exitCode)
-{
-    FCALL_CONTRACT;
-
-    STRINGREF refMessage = (STRINGREF)refMessageUNSAFE;
-
-    HELPER_METHOD_FRAME_BEGIN_1(refMessage);
-
-    // The HelperMethodFrame knows how to get the return address.
-    UINT_PTR retaddr = HELPER_METHOD_FRAME_GET_RETURN_ADDRESS();
-
-    // Call the actual worker to perform failfast
-    GenericFailFast(refMessage, NULL, retaddr, exitCode, NULL);
-
-    HELPER_METHOD_FRAME_END();
-}
-FCIMPLEND
-
-FCIMPL2(VOID, SystemNative::FailFastWithException, StringObject* refMessageUNSAFE, ExceptionObject* refExceptionUNSAFE)
-{
-    FCALL_CONTRACT;
-
-    STRINGREF refMessage = (STRINGREF)refMessageUNSAFE;
-    EXCEPTIONREF refException = (EXCEPTIONREF)refExceptionUNSAFE;
+    EEPolicy::HandleFatalError(COR_E_FAILFAST, findCallerData.retAddress, message, NULL, errorSource, argExceptionString);
 
-    HELPER_METHOD_FRAME_BEGIN_2(refMessage, refException);
-
-    // The HelperMethodFrame knows how to get the return address.
-    UINT_PTR retaddr = HELPER_METHOD_FRAME_GET_RETURN_ADDRESS();
-
-    // Call the actual worker to perform failfast
-    GenericFailFast(refMessage, refException, retaddr, COR_E_FAILFAST, NULL);
-
-    HELPER_METHOD_FRAME_END();
-}
-FCIMPLEND
-
-FCIMPL3(VOID, SystemNative::FailFastWithExceptionAndSource, StringObject* refMessageUNSAFE, ExceptionObject* refExceptionUNSAFE, StringObject* errorSourceUNSAFE)
-{
-    FCALL_CONTRACT;
-
-    STRINGREF refMessage = (STRINGREF)refMessageUNSAFE;
-    EXCEPTIONREF refException = (EXCEPTIONREF)refExceptionUNSAFE;
-    STRINGREF errorSource = (STRINGREF)errorSourceUNSAFE;
-
-    HELPER_METHOD_FRAME_BEGIN_3(refMessage, refException, errorSource);
-
-    // The HelperMethodFrame knows how to get the return address.
-    UINT_PTR retaddr = HELPER_METHOD_FRAME_GET_RETURN_ADDRESS();
-
-    // Call the actual worker to perform failfast
-    GenericFailFast(refMessage, refException, retaddr, COR_E_FAILFAST, errorSource);
-
-    HELPER_METHOD_FRAME_END();
+    END_QCALL;
 }
-FCIMPLEND
 
 FCIMPL0(FC_BOOL_RET, SystemNative::IsServerGC)
 {
diff --git a/src/coreclr/classlibnative/bcltype/system.h b/src/coreclr/classlibnative/bcltype/system.h
index b4a773a847c3..9c5ab7ada84a 100644
--- a/src/coreclr/classlibnative/bcltype/system.h
+++ b/src/coreclr/classlibnative/bcltype/system.h
@@ -43,23 +43,16 @@ class SystemNative
     static FCDECL1(VOID,SetExitCode,INT32 exitcode);
     static FCDECL0(INT32, GetExitCode);
 
-    static FCDECL1(VOID, FailFast, StringObject* refMessageUNSAFE);
-    static FCDECL2(VOID, FailFastWithExitCode, StringObject* refMessageUNSAFE, UINT exitCode);
-    static FCDECL2(VOID, FailFastWithException, StringObject* refMessageUNSAFE, ExceptionObject* refExceptionUNSAFE);
-    static FCDECL3(VOID, FailFastWithExceptionAndSource, StringObject* refMessageUNSAFE, ExceptionObject* refExceptionUNSAFE, StringObject* errorSourceUNSAFE);
-
     static FCDECL0(FC_BOOL_RET, IsServerGC);
 
     // Return a method info for the method were the exception was thrown
     static FCDECL1(ReflectMethodObject*, GetMethodFromStackTrace, ArrayBase* pStackTraceUNSAFE);
-
-private:
-    // Common processing code for FailFast
-    static void GenericFailFast(STRINGREF refMesgString, EXCEPTIONREF refExceptionForWatsonBucketing, UINT_PTR retAddress, UINT exitCode, STRINGREF errorSource);
 };
 
 extern "C" void QCALLTYPE Environment_Exit(INT32 exitcode);
 
+extern "C" void QCALLTYPE Environment_FailFast(QCall::StackCrawlMarkHandle mark, PCWSTR message, QCall::ObjectHandleOnStack exception, PCWSTR errorSource);
+
 // Returns the number of logical processors that can be used by managed code
 extern "C" INT32 QCALLTYPE Environment_GetProcessorCount();
 
diff --git a/src/coreclr/classlibnative/float/CMakeLists.txt b/src/coreclr/classlibnative/float/CMakeLists.txt
index a3032c4887c4..1dbe160248f2 100644
--- a/src/coreclr/classlibnative/float/CMakeLists.txt
+++ b/src/coreclr/classlibnative/float/CMakeLists.txt
@@ -1,5 +1,3 @@
-set(CMAKE_INCLUDE_CURRENT_DIR ON)
-
 include_directories("../inc")
 
 set(FLOAT_SOURCES
diff --git a/src/coreclr/classlibnative/float/floatdouble.cpp b/src/coreclr/classlibnative/float/floatdouble.cpp
index d20b772eb220..7ecd6e375d63 100644
--- a/src/coreclr/classlibnative/float/floatdouble.cpp
+++ b/src/coreclr/classlibnative/float/floatdouble.cpp
@@ -181,15 +181,6 @@ FCIMPLEND
 #pragma float_control(pop)
 #endif
 
-/*=====================================FMod=====================================
-**
-==============================================================================*/
-FCIMPL2_VV(double, COMDouble::FMod, double x, double y)
-    FCALL_CONTRACT;
-
-    return fmod(x, y);
-FCIMPLEND
-
 /*=====================================FusedMultiplyAdd==========================
 **
 ==============================================================================*/
@@ -253,6 +244,13 @@ FCIMPL1_V(double, COMDouble::Sin, double x)
     return sin(x);
 FCIMPLEND
 
+#if defined(_MSC_VER)
+// The /fp:fast form of `sincos` for xarch returns sin twice, rather than sincos
+// https://developercommunity.visualstudio.com/t/MSVCs-sincos-implementation-is-incorrec/10582378
+#pragma float_control(push)
+#pragma float_control(precise, on)
+#endif
+
 /*====================================SinCos====================================
 **
 ==============================================================================*/
@@ -262,12 +260,18 @@ FCIMPL3_VII(void, COMDouble::SinCos, double x, double* pSin, double* pCos)
 #ifdef _MSC_VER
     *pSin = sin(x);
     *pCos = cos(x);
+#elif defined(__APPLE__)
+    __sincos(x, pSin, pCos);
 #else
     sincos(x, pSin, pCos);
 #endif
 
 FCIMPLEND
 
+#if defined(_MSC_VER)
+#pragma float_control(pop)
+#endif
+
 /*=====================================Sinh=====================================
 **
 ==============================================================================*/
diff --git a/src/coreclr/classlibnative/float/floatsingle.cpp b/src/coreclr/classlibnative/float/floatsingle.cpp
index 1694fd78cb84..5b7bdaa7c613 100644
--- a/src/coreclr/classlibnative/float/floatsingle.cpp
+++ b/src/coreclr/classlibnative/float/floatsingle.cpp
@@ -156,15 +156,6 @@ FCIMPL1_V(float, COMSingle::Floor, float x)
     return floorf(x);
 FCIMPLEND
 
-/*=====================================FMod=====================================
-**
-==============================================================================*/
-FCIMPL2_VV(float, COMSingle::FMod, float x, float y)
-    FCALL_CONTRACT;
-
-    return fmodf(x, y);
-FCIMPLEND
-
 /*=====================================FusedMultiplyAdd==========================
 **
 ==============================================================================*/
@@ -228,6 +219,13 @@ FCIMPL1_V(float, COMSingle::Sin, float x)
     return sinf(x);
 FCIMPLEND
 
+#if defined(_MSC_VER)
+// The /fp:fast form of `sincos` for xarch returns sin twice, rather than sincos
+// https://developercommunity.visualstudio.com/t/MSVCs-sincos-implementation-is-incorrec/10582378
+#pragma float_control(push)
+#pragma float_control(precise, on)
+#endif
+
 /*====================================SinCos====================================
 **
 ==============================================================================*/
@@ -237,12 +235,18 @@ FCIMPL3_VII(void, COMSingle::SinCos, float x, float* pSin, float* pCos)
 #ifdef _MSC_VER
     *pSin = sinf(x);
     *pCos = cosf(x);
+#elif defined(__APPLE__)
+    __sincosf(x, pSin, pCos);
 #else
     sincosf(x, pSin, pCos);
 #endif
 
 FCIMPLEND
 
+#if defined(_MSC_VER)
+#pragma float_control(pop)
+#endif
+
 /*=====================================Sinh=====================================
 **
 ==============================================================================*/
diff --git a/src/coreclr/classlibnative/inc/floatdouble.h b/src/coreclr/classlibnative/inc/floatdouble.h
index 43fecbd47431..ee257bf0d39d 100644
--- a/src/coreclr/classlibnative/inc/floatdouble.h
+++ b/src/coreclr/classlibnative/inc/floatdouble.h
@@ -22,7 +22,6 @@ class COMDouble {
     FCDECL1_V(static double, Cosh, double x);
     FCDECL1_V(static double, Exp, double x);
     FCDECL1_V(static double, Floor, double x);
-    FCDECL2_VV(static double, FMod, double x, double y);
     FCDECL3_VVV(static double, FusedMultiplyAdd, double x, double y, double z);
     FCDECL1_V(static double, Log, double x);
     FCDECL1_V(static double, Log2, double x);
diff --git a/src/coreclr/classlibnative/inc/floatsingle.h b/src/coreclr/classlibnative/inc/floatsingle.h
index 765032ce6371..76e236704d11 100644
--- a/src/coreclr/classlibnative/inc/floatsingle.h
+++ b/src/coreclr/classlibnative/inc/floatsingle.h
@@ -22,7 +22,6 @@ class COMSingle {
     FCDECL1_V(static float, Cosh, float x);
     FCDECL1_V(static float, Exp, float x);
     FCDECL1_V(static float, Floor, float x);
-    FCDECL2_VV(static float, FMod, float x, float y);
     FCDECL3_VVV(static float, FusedMultiplyAdd, float x, float y, float z);
     FCDECL1_V(static float, Log, float x);
     FCDECL1_V(static float, Log2, float x);
diff --git a/src/coreclr/clr.featuredefines.props b/src/coreclr/clr.featuredefines.props
index dccd5d0f150c..7905f8a573d7 100644
--- a/src/coreclr/clr.featuredefines.props
+++ b/src/coreclr/clr.featuredefines.props
@@ -1,37 +1,26 @@
 <Project>
     <PropertyGroup>
         <FeatureCoreCLR>true</FeatureCoreCLR>
-        <FeatureCollectibleALC>true</FeatureCollectibleALC>
         <FeatureEventTrace>true</FeatureEventTrace>
         <FeatureICastable>true</FeatureICastable>
         <FeaturePerfTracing>true</FeaturePerfTracing>
         <FeatureTypeEquivalence>true</FeatureTypeEquivalence>
-        <FeatureBasicFreeze>true</FeatureBasicFreeze>
         <ProfilingSupportedBuild>true</ProfilingSupportedBuild>
     </PropertyGroup>
 
     <PropertyGroup Condition="'$(TargetsUnix)' == 'true'">
         <FeatureXplatEventSource Condition="'$(TargetOS)' == 'linux'">true</FeatureXplatEventSource>
-
         <FeatureArrayStubAsIL>true</FeatureArrayStubAsIL>
         <FeatureMulticastStubAsIL>true</FeatureMulticastStubAsIL>
-        <FeaturePortableShuffleThunks Condition="'$(Platform)' != 'x86'">true</FeaturePortableShuffleThunks>
-        <FeatureInstantiatingStubAsIL>true</FeatureInstantiatingStubAsIL>
-        <FeatureStubsAsIL>true</FeatureStubsAsIL>
         <FeatureComWrappers>true</FeatureComWrappers>
     </PropertyGroup>
 
     <PropertyGroup Condition="'$(TargetsWindows)' == 'true'">
         <FeatureArrayStubAsIL Condition="'$(Platform)' != 'x86'">true</FeatureArrayStubAsIL>
         <FeatureMulticastStubAsIL Condition="'$(Platform)' != 'x86'">true</FeatureMulticastStubAsIL>
-        <FeatureStubsAsIL Condition="'$(Platform)' == 'arm64'">true</FeatureStubsAsIL>
         <FeatureComWrappers>true</FeatureComWrappers>
         <FeatureCominterop>true</FeatureCominterop>
         <FeatureCominteropApartmentSupport>true</FeatureCominteropApartmentSupport>
-        <FeatureWin32Registry>true</FeatureWin32Registry>
-        <FeatureProfAttach>true</FeatureProfAttach>
-        <FeaturePortableShuffleThunks Condition="'$(Platform)' != 'x86'">true</FeaturePortableShuffleThunks>
-        <FeatureInstantiatingStubAsIL Condition="'$(Platform)' != 'x86'">true</FeatureInstantiatingStubAsIL>
     </PropertyGroup>
 
     <PropertyGroup Condition="'$(TargetsOSX)' == 'true' OR '$(TargetsMacCatalyst)' == 'true' OR '$(TargetsiOS)' == 'true' OR '$(TargetstvOS)' == 'true'">
@@ -45,23 +34,16 @@
     <PropertyGroup>
         <DefineConstants Condition="'$(FeatureArrayStubAsIL)' == 'true'">$(DefineConstants);FEATURE_ARRAYSTUB_AS_IL</DefineConstants>
         <DefineConstants Condition="'$(FeatureMulticastStubAsIL)' == 'true'">$(DefineConstants);FEATURE_MULTICASTSTUB_AS_IL</DefineConstants>
-        <DefineConstants Condition="'$(FeatureInstantiatingStubAsIL)' == 'true'">$(DefineConstants);FEATURE_INSTANTIATINGSTUB_AS_IL</DefineConstants>
-        <DefineConstants Condition="'$(FeatureStubsAsIL)' == 'true'">$(DefineConstants);FEATURE_STUBS_AS_IL</DefineConstants>
-        <DefineConstants Condition="'$(FeatureCollectibleALC)' == 'true'">$(DefineConstants);FEATURE_COLLECTIBLE_ALC</DefineConstants>
         <DefineConstants Condition="'$(FeatureComWrappers)' == 'true'">$(DefineConstants);FEATURE_COMWRAPPERS</DefineConstants>
         <DefineConstants Condition="'$(FeatureCominterop)' == 'true'">$(DefineConstants);FEATURE_COMINTEROP</DefineConstants>
         <DefineConstants Condition="'$(FeatureCominteropApartmentSupport)' == 'true'">$(DefineConstants);FEATURE_COMINTEROP_APARTMENT_SUPPORT</DefineConstants>
         <DefineConstants Condition="'$(FeatureObjCMarshal)' == 'true'">$(DefineConstants);FEATURE_OBJCMARSHAL</DefineConstants>
         <DefineConstants Condition="'$(FeaturePerfTracing)' == 'true'">$(DefineConstants);FEATURE_PERFTRACING</DefineConstants>
         <DefineConstants Condition="'$(FeatureXplatEventSource)' == 'true'">$(DefineConstants);FEATURE_EVENTSOURCE_XPLAT</DefineConstants>
-        <DefineConstants Condition="'$(FeatureWin32Registry)' == 'true'">$(DefineConstants);FEATURE_WIN32_REGISTRY</DefineConstants>
         <DefineConstants Condition="'$(FeatureTypeEquivalence)' == 'true'">$(DefineConstants);FEATURE_TYPEEQUIVALENCE</DefineConstants>
-        <DefineConstants Condition="'$(FeatureBasicFreeze)' == 'true'">$(DefineConstants);FEATURE_BASICFREEZE</DefineConstants>
-        <DefineConstants Condition="'$(FeaturePortableShuffleThunks)' == 'true'">$(DefineConstants);FEATURE_PORTABLE_SHUFFLE_THUNKS</DefineConstants>
         <DefineConstants Condition="'$(FeatureICastable)' == 'true'">$(DefineConstants);FEATURE_ICASTABLE</DefineConstants>
         <DefineConstants Condition="'$(FeatureEHFunclets)' == 'true'">$(DefineConstants);FEATURE_EH_FUNCLETS</DefineConstants>
 
         <DefineConstants Condition="'$(ProfilingSupportedBuild)' == 'true'">$(DefineConstants);PROFILING_SUPPORTED</DefineConstants>
-        <DefineConstants Condition="'$(FeatureProfAttach)' == 'true'">$(DefineConstants);FEATURE_PROFAPI_ATTACH_DETACH</DefineConstants>
     </PropertyGroup>
 </Project>
diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake
index 30391aa30c0b..a739bc6660f6 100644
--- a/src/coreclr/clrdefinitions.cmake
+++ b/src/coreclr/clrdefinitions.cmake
@@ -53,6 +53,7 @@ if(CLR_CMAKE_HOST_WIN32)
   add_definitions(-D_WIN32_WINNT=0x0602)
   add_definitions(-DWIN32_LEAN_AND_MEAN)
   add_definitions(-D_CRT_SECURE_NO_WARNINGS)
+  add_compile_definitions(NOMINMAX)
 endif(CLR_CMAKE_HOST_WIN32)
 
 if (NOT (CLR_CMAKE_TARGET_ARCH_I386 AND CLR_CMAKE_TARGET_UNIX))
@@ -92,6 +93,7 @@ if(CLR_CMAKE_TARGET_WIN32)
     add_definitions(-DFEATURE_COMINTEROP)
     add_definitions(-DFEATURE_COMINTEROP_APARTMENT_SUPPORT)
     add_definitions(-DFEATURE_COMINTEROP_UNMANAGED_ACTIVATION)
+    add_definitions(-DFEATURE_IJW) # C++/CLI managed/native interop support
 endif(CLR_CMAKE_TARGET_WIN32)
 
 add_definitions(-DFEATURE_BASICFREEZE)
@@ -154,6 +156,9 @@ endif(CLR_CMAKE_TARGET_LINUX AND CLR_CMAKE_HOST_LINUX)
 if(CLR_CMAKE_TARGET_FREEBSD)
     add_compile_definitions(FEATURE_PERFMAP)
 endif(CLR_CMAKE_TARGET_FREEBSD)
+if(CLR_CMAKE_TARGET_APPLE)
+    add_compile_definitions(FEATURE_PERFMAP)
+endif(CLR_CMAKE_TARGET_APPLE)
 
 if(FEATURE_COMWRAPPERS)
     add_compile_definitions(FEATURE_COMWRAPPERS)
@@ -203,9 +208,6 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGE
   add_definitions(-DFEATURE_MANUALLY_MANAGED_CARD_BUNDLES)
 endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64)
 
-if(NOT CLR_CMAKE_TARGET_UNIX)
-    add_definitions(-DFEATURE_WIN32_REGISTRY)
-endif(NOT CLR_CMAKE_TARGET_UNIX)
 add_definitions(-D_SECURE_SCL=0)
 add_definitions(-DUNICODE)
 add_definitions(-D_UNICODE)
@@ -258,7 +260,7 @@ function(set_target_definitions_to_custom_os_and_arch)
     if (TARGETDETAILS_OS STREQUAL "unix_anyos")
       target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE TARGET_UNIX_ANYOS)
     endif()
-  elseif (TARGETDETAILS_OS STREQUAL "win")
+  elseif (TARGETDETAILS_OS MATCHES "^win")
     target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE TARGET_WINDOWS)
   endif((TARGETDETAILS_OS MATCHES "^unix"))
 
@@ -293,8 +295,4 @@ function(set_target_definitions_to_custom_os_and_arch)
   if (TARGETDETAILS_ARCH STREQUAL "armel")
     target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE ARM_SOFTFP)
   endif()
-
-  if (NOT (TARGETDETAILS_ARCH STREQUAL "x86") OR (TARGETDETAILS_OS MATCHES "^unix"))
-    target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_EH_FUNCLETS)
-  endif (NOT (TARGETDETAILS_ARCH STREQUAL "x86") OR (TARGETDETAILS_OS MATCHES "^unix"))
 endfunction()
diff --git a/src/coreclr/crossgen-corelib.proj b/src/coreclr/crossgen-corelib.proj
index 1c63c777c7eb..1d0a6e2262ef 100644
--- a/src/coreclr/crossgen-corelib.proj
+++ b/src/coreclr/crossgen-corelib.proj
@@ -6,7 +6,7 @@
 
   <ItemGroup>
     <ProjectReference Include="$(CoreClrProjectRoot)/tools/aot/crossgen2/crossgen2_inbuild.csproj" OutputItemType="Crossgen2" />
-    <ProjectReference Include="$(CoreClrProjectRoot)/tools/dotnet-pgo/dotnet-pgo.csproj" OutputItemType="DotNetPgo" Condition="'$(DotNetBuildFromSource)' != 'true'" />
+    <ProjectReference Include="$(CoreClrProjectRoot)/tools/dotnet-pgo/dotnet-pgo.csproj" OutputItemType="DotNetPgo" Condition="'$(DotNetBuildSourceOnly)' != 'true'" />
     <ProjectReference Include="$([MSBuild]::NormalizePath('$(CoreClrProjectRoot)', 'System.Private.CoreLib', 'System.Private.CoreLib.csproj'))" OutputItemType="CoreLib" />
   </ItemGroup>
 
@@ -23,7 +23,6 @@
   <PropertyGroup>
     <BuildDll>true</BuildDll>
     <BuildDll Condition="'$(TargetOS)' == 'netbsd' or '$(TargetOS)' == 'illumos' or '$(TargetOS)' == 'solaris' or '$(TargetOS)' == 'haiku'">false</BuildDll>
-    <BuildDll Condition="'$(TargetArchitecture)' == 'riscv64'">false</BuildDll>
 
     <BuildPdb>false</BuildPdb>
     <BuildPdb Condition="$(BuildDll) and '$(OS)' == 'Windows_NT' and '$(TargetOS)' == 'windows'">true</BuildPdb>
diff --git a/src/coreclr/debug/createdump/CMakeLists.txt b/src/coreclr/debug/createdump/CMakeLists.txt
index 71e5b78b08e5..3c72b8a0fa42 100644
--- a/src/coreclr/debug/createdump/CMakeLists.txt
+++ b/src/coreclr/debug/createdump/CMakeLists.txt
@@ -56,8 +56,6 @@ else(CLR_CMAKE_HOST_WIN32)
       endif(CLR_CMAKE_HOST_OSX)
     endif (CORECLR_SET_RPATH)
 
-    add_definitions(-DPAL_STDCPP_COMPAT)
-
     # This is so we can include "version.c"
     include_directories(${CMAKE_BINARY_DIR})
 
diff --git a/src/coreclr/debug/createdump/crashinfounix.cpp b/src/coreclr/debug/createdump/crashinfounix.cpp
index 9f72707263b0..263818e7ad79 100644
--- a/src/coreclr/debug/createdump/crashinfounix.cpp
+++ b/src/coreclr/debug/createdump/crashinfounix.cpp
@@ -221,7 +221,7 @@ CrashInfo::EnumerateMemoryRegions()
         printf_error("snprintf failed building /proc/<pid>/maps\n");
         return false;
     }
-    FILE* mapsFile = fopen(mapPath, "r");
+    FILE* mapsFile = fopen(mapPath, "rb");
     if (mapsFile == nullptr)
     {
         printf_error("Problem reading maps file: fopen(%s) FAILED %s (%d)\n", mapPath, strerror(errno), errno);
@@ -554,7 +554,7 @@ GetStatus(pid_t pid, pid_t* ppid, pid_t* tgid, std::string* name)
         return false;
     }
 
-    FILE *statusFile = fopen(statusPath, "r");
+    FILE *statusFile = fopen(statusPath, "rb");
     if (statusFile == nullptr)
     {
         printf_error("GetStatus fopen(%s) FAILED %s (%d)\n", statusPath, strerror(errno), errno);
diff --git a/src/coreclr/debug/createdump/createdumppal.cpp b/src/coreclr/debug/createdump/createdumppal.cpp
index 4dd7204ce91f..03b06a84a461 100644
--- a/src/coreclr/debug/createdump/createdumppal.cpp
+++ b/src/coreclr/debug/createdump/createdumppal.cpp
@@ -230,26 +230,6 @@ size_t u16_strlen(const WCHAR* str)
 //
 
 #ifdef _DEBUG
-
-PAL_FILE *
-__cdecl
-PAL_get_stderr(int caller)
-{
-    return (PAL_FILE*)stderr;
-}
-
-int
-__cdecl
-PAL_fprintf(PAL_FILE* stream, const char* format, ...)
-{
-    va_list args;
-    va_start(args, format);
-    int result = vfprintf((FILE*)stream, format, args);
-    fflush((FILE*)stream);
-    va_end(args);
-    return result;
-}
-
 DWORD
 PALAPI
 GetCurrentProcessId()
diff --git a/src/coreclr/debug/daccess/daccess.cpp b/src/coreclr/debug/daccess/daccess.cpp
index e79dab808def..d6b8d99d7c37 100644
--- a/src/coreclr/debug/daccess/daccess.cpp
+++ b/src/coreclr/debug/daccess/daccess.cpp
@@ -3231,6 +3231,10 @@ ClrDataAccess::QueryInterface(THIS_
     {
         ifaceRet = static_cast<ISOSDacInterface13*>(this);
     }
+    else if (IsEqualIID(interfaceId, __uuidof(ISOSDacInterface14)))
+    {
+        ifaceRet = static_cast<ISOSDacInterface14*>(this);
+    }
     else
     {
         *iface = NULL;
@@ -5789,7 +5793,7 @@ ClrDataAccess::RawGetMethodName(
             SIZE_T maxPrecodeSize = sizeof(StubPrecode);
 
 #ifdef HAS_THISPTR_RETBUF_PRECODE
-            maxPrecodeSize = max(maxPrecodeSize, sizeof(ThisPtrRetBufPrecode));
+            maxPrecodeSize = max((size_t)maxPrecodeSize, sizeof(ThisPtrRetBufPrecode));
 #endif
 
             for (SIZE_T i = 0; i < maxPrecodeSize / PRECODE_ALIGNMENT; i++)
diff --git a/src/coreclr/debug/daccess/dacdbiimpl.cpp b/src/coreclr/debug/daccess/dacdbiimpl.cpp
index bc1e6ad84754..d1a0a1ada64d 100644
--- a/src/coreclr/debug/daccess/dacdbiimpl.cpp
+++ b/src/coreclr/debug/daccess/dacdbiimpl.cpp
@@ -1202,7 +1202,7 @@ mdSignature DacDbiInterfaceImpl::GetILCodeAndSigHelper(Module *       pModule,
     TADDR pTargetIL; // target address of start of IL blob
 
     // This works for methods in dynamic modules, and methods overridden by a profiler.
-    pTargetIL = pModule->GetDynamicIL(mdMethodToken, TRUE);
+    pTargetIL = pModule->GetDynamicIL(mdMethodToken);
 
     // Method not overridden - get the original copy of the IL by going to the PE file/RVA
     // If this is in a dynamic module then don't even attempt this since ReflectionModule::GetIL isn't
@@ -1577,16 +1577,8 @@ void DacDbiInterfaceImpl::GetStaticsBases(TypeHandle thExact,
                                          PTR_BYTE *  ppNonGCStaticsBase)
  {
     MethodTable * pMT = thExact.GetMethodTable();
-    Module * pModuleForStatics = pMT->GetModuleForStatics();
-    if (pModuleForStatics != NULL)
-    {
-        PTR_DomainLocalModule pLocalModule = pModuleForStatics->GetDomainLocalModule();
-        if (pLocalModule != NULL)
-        {
-            *ppGCStaticsBase = pLocalModule->GetGCStaticsBasePointer(pMT);
-            *ppNonGCStaticsBase = pLocalModule->GetNonGCStaticsBasePointer(pMT);
-        }
-    }
+    *ppGCStaticsBase = pMT->GetGCStaticsBasePointer();
+    *ppNonGCStaticsBase = pMT->GetNonGCStaticsBasePointer();
 } // DacDbiInterfaceImpl::GetStaticsBases
 
 //-----------------------------------------------------------------------------
@@ -5627,10 +5619,13 @@ void DacDbiInterfaceImpl::LookupEnCVersions(Module*          pModule,
     DebuggerJitInfo * pDJI = NULL;
     EX_TRY_ALLOW_DATATARGET_MISSING_MEMORY
     {
-        pDMI = g_pDebugger->GetOrCreateMethodInfo(pModule, mdMethod);
-        if (pDMI != NULL)
+        if (g_pDebugger != NULL)
         {
-            pDJI = pDMI->FindJitInfo(pMD, CORDB_ADDRESS_TO_TADDR(pNativeStartAddress));
+            pDMI = g_pDebugger->GetOrCreateMethodInfo(pModule, mdMethod);
+            if (pDMI != NULL)
+            {
+                pDJI = pDMI->FindJitInfo(pMD, CORDB_ADDRESS_TO_TADDR(pNativeStartAddress));
+            }
         }
     }
     EX_END_CATCH_ALLOW_DATATARGET_MISSING_MEMORY;
@@ -7455,13 +7450,13 @@ HRESULT DacDbiInterfaceImpl::GetILCodeVersionNodeData(VMPTR_ILCodeVersionNode vm
 #ifdef FEATURE_REJIT
     ILCodeVersion ilCode(vmILCodeVersionNode.GetDacPtr());
     pData->m_state = ilCode.GetRejitState();
-    pData->m_pbIL = PTR_TO_CORDB_ADDRESS(dac_cast<ULONG_PTR>(ilCode.GetIL()));
+    pData->m_pbIL = PTR_TO_CORDB_ADDRESS(dac_cast<TADDR>(ilCode.GetIL()));
     pData->m_dwCodegenFlags = ilCode.GetJitFlags();
     const InstrumentedILOffsetMapping* pMapping = ilCode.GetInstrumentedILMap();
     if (pMapping)
     {
         pData->m_cInstrumentedMapEntries = (ULONG)pMapping->GetCount();
-        pData->m_rgInstrumentedMapEntries = PTR_TO_CORDB_ADDRESS(dac_cast<ULONG_PTR>(pMapping->GetOffsets()));
+        pData->m_rgInstrumentedMapEntries = PTR_TO_CORDB_ADDRESS(dac_cast<TADDR>(pMapping->GetOffsets()));
     }
     else
     {
@@ -7479,6 +7474,10 @@ HRESULT DacDbiInterfaceImpl::GetDefinesBitField(ULONG32 *pDefines)
     DD_ENTER_MAY_THROW;
     if (pDefines == NULL)
         return E_INVALIDARG;
+
+    if (g_pDebugger == NULL)
+        return CORDBG_E_NOTREADY;
+        
     *pDefines = g_pDebugger->m_defines;
     return S_OK;
 }
@@ -7488,6 +7487,10 @@ HRESULT DacDbiInterfaceImpl::GetMDStructuresVersion(ULONG32* pMDStructuresVersio
     DD_ENTER_MAY_THROW;
     if (pMDStructuresVersion == NULL)
         return E_INVALIDARG;
+
+    if (g_pDebugger == NULL)
+        return CORDBG_E_NOTREADY;
+
     *pMDStructuresVersion = g_pDebugger->m_mdDataStructureVersion;
     return S_OK;
 }
diff --git a/src/coreclr/debug/daccess/dacimpl.h b/src/coreclr/debug/daccess/dacimpl.h
index 03756c671657..e698eed4c180 100644
--- a/src/coreclr/debug/daccess/dacimpl.h
+++ b/src/coreclr/debug/daccess/dacimpl.h
@@ -816,7 +816,8 @@ class ClrDataAccess
       public ISOSDacInterface10,
       public ISOSDacInterface11,
       public ISOSDacInterface12,
-      public ISOSDacInterface13
+      public ISOSDacInterface13,
+      public ISOSDacInterface14
 {
 public:
     ClrDataAccess(ICorDebugDataTarget * pTarget, ICLRDataTarget * pLegacyTarget=0);
@@ -1216,6 +1217,11 @@ class ClrDataAccess
     virtual HRESULT STDMETHODCALLTYPE GetGCFreeRegions(ISOSMemoryEnum **ppEnum);
     virtual HRESULT STDMETHODCALLTYPE LockedFlush();
 
+    // ISOSDacInterface14
+    virtual HRESULT STDMETHODCALLTYPE GetStaticBaseAddress(CLRDATA_ADDRESS methodTable, CLRDATA_ADDRESS *nonGCStaticsAddress, CLRDATA_ADDRESS *GCStaticsAddress);
+    virtual HRESULT STDMETHODCALLTYPE GetThreadStaticBaseAddress(CLRDATA_ADDRESS methodTable, CLRDATA_ADDRESS thread, CLRDATA_ADDRESS *nonGCStaticsAddress, CLRDATA_ADDRESS *GCStaticsAddress);
+    virtual HRESULT STDMETHODCALLTYPE GetMethodTableInitializationFlags(CLRDATA_ADDRESS methodTable, MethodTableInitializationFlags *initializationStatus);
+    
     //
     // ClrDataAccess.
     //
diff --git a/src/coreclr/debug/daccess/fntableaccess.h b/src/coreclr/debug/daccess/fntableaccess.h
index cfe9a2eea2ae..c4a72a6d93de 100644
--- a/src/coreclr/debug/daccess/fntableaccess.h
+++ b/src/coreclr/debug/daccess/fntableaccess.h
@@ -16,13 +16,8 @@
 #ifndef TARGET_UNIX
 #define DEBUGSUPPORT_STUBS_HAVE_UNWIND_INFO
 #endif // !TARGET_UNIX
-
-#ifndef USE_INDIRECT_CODEHEADER
-#define USE_INDIRECT_CODEHEADER
-#endif  // USE_INDIRECT_CODEHEADER
 #endif
 
-
 struct FakeEEJitManager
 {
     LPVOID      __VFN_table;
diff --git a/src/coreclr/debug/daccess/request.cpp b/src/coreclr/debug/daccess/request.cpp
index 1e0912ea05cd..dea3b8544ab9 100644
--- a/src/coreclr/debug/daccess/request.cpp
+++ b/src/coreclr/debug/daccess/request.cpp
@@ -2577,7 +2577,7 @@ ClrDataAccess::GetAssemblyData(CLRDATA_ADDRESS cdBaseDomainPtr, CLRDATA_ADDRESS
     }
 
     assemblyData->AssemblyPtr = HOST_CDADDR(pAssembly);
-    assemblyData->ClassLoader = HOST_CDADDR(pAssembly->GetLoader());
+    assemblyData->ClassLoader = 0;
     assemblyData->ParentDomain = HOST_CDADDR(AppDomain::GetCurrentDomain());
     assemblyData->isDynamic = pAssembly->IsDynamic();
     assemblyData->ModuleCount = 0;
@@ -3810,9 +3810,13 @@ ClrDataAccess::GetJumpThunkTarget(T_CONTEXT *ctx, CLRDATA_ADDRESS *targetIP, CLR
 #ifdef TARGET_AMD64
     SOSDacEnter();
 
-    if (!GetAnyThunkTarget(ctx, targetIP, targetMD))
+    TADDR tempTargetIP, tempTargetMD;
+    if (!GetAnyThunkTarget(ctx, &tempTargetIP, &tempTargetMD))
         hr = E_FAIL;
 
+    *targetIP = TO_CDADDR(tempTargetIP);
+    *targetMD = TO_CDADDR(tempTargetMD);
+
     SOSDacLeave();
     return hr;
 #else
@@ -4653,7 +4657,7 @@ HRESULT ClrDataAccess::GetProfilerModifiedILInformation(CLRDATA_ADDRESS methodDe
         pILData->rejitID = static_cast<ULONG>(pCodeVersionManager->GetActiveILCodeVersion(pMD).GetVersionId());
     }
 
-    TADDR pDynamicIL = pMD->GetModule()->GetDynamicIL(pMD->GetMemberDef(), TRUE);
+    TADDR pDynamicIL = pMD->GetModule()->GetDynamicIL(pMD->GetMemberDef());
     if (pDynamicIL != NULL)
     {
         pILData->type = DacpProfilerILData::ILModified;
@@ -4695,7 +4699,7 @@ HRESULT ClrDataAccess::GetMethodsWithProfilerModifiedIL(CLRDATA_ADDRESS mod, CLR
             {
                 PTR_MethodDesc pMD = dac_cast<PTR_MethodDesc>(itMethods.GetMethodDesc());
 
-                TADDR pDynamicIL = pModule->GetDynamicIL(pMD->GetMemberDef(), TRUE);
+                TADDR pDynamicIL = pModule->GetDynamicIL(pMD->GetMemberDef());
                 ILCodeVersion ilVersion = pCodeVersionManager->GetActiveILCodeVersion(pMD);
                 if (ilVersion.GetRejitState() != ILCodeVersion::kStateActive || !ilVersion.HasDefaultIL() || pDynamicIL != NULL)
                 {
@@ -5397,3 +5401,119 @@ HRESULT ClrDataAccess::LockedFlush()
     SOSDacLeave();
     return hr;
 }
+
+HRESULT STDMETHODCALLTYPE ClrDataAccess::GetStaticBaseAddress(CLRDATA_ADDRESS methodTable, CLRDATA_ADDRESS *nonGCStaticsAddress, CLRDATA_ADDRESS *GCStaticsAddress)
+{
+    if (!nonGCStaticsAddress && !GCStaticsAddress)
+        return E_POINTER;
+    
+    if (!methodTable)
+        return E_INVALIDARG;
+    
+    SOSDacEnter();
+
+    PTR_MethodTable mTable = PTR_MethodTable(TO_TADDR(methodTable));
+
+    BOOL bIsFree = FALSE;
+    if (!DacValidateMethodTable(mTable, bIsFree))
+    {
+        hr = E_INVALIDARG;
+    }
+    else
+    {
+        if (GCStaticsAddress != NULL)
+        {
+            *GCStaticsAddress = PTR_CDADDR(mTable->GetGCStaticsBasePointer());
+        }
+        if (nonGCStaticsAddress != NULL)
+        {
+            *nonGCStaticsAddress = PTR_CDADDR(mTable->GetNonGCStaticsBasePointer());
+        }
+    }
+
+    SOSDacLeave();
+    return hr;
+}
+
+
+HRESULT STDMETHODCALLTYPE ClrDataAccess::GetThreadStaticBaseAddress(CLRDATA_ADDRESS methodTable, CLRDATA_ADDRESS threadPtr, CLRDATA_ADDRESS *nonGCStaticsAddress, CLRDATA_ADDRESS *GCStaticsAddress)
+{
+    if (!nonGCStaticsAddress && !GCStaticsAddress)
+        return E_POINTER;
+    
+    if (!methodTable)
+        return E_INVALIDARG;
+
+    if (!threadPtr)
+        return E_INVALIDARG;
+    
+    SOSDacEnter();
+
+    PTR_MethodTable mTable = PTR_MethodTable(TO_TADDR(methodTable));
+    PTR_Thread thread = PTR_Thread(TO_TADDR(threadPtr));
+
+
+    BOOL bIsFree = FALSE;
+    if (!DacValidateMethodTable(mTable, bIsFree))
+    {
+        hr = E_INVALIDARG;
+    }
+    else
+    {
+        if (mTable->GetClass()->GetNumThreadStaticFields() == 0)
+        {
+            if (GCStaticsAddress != NULL)
+            {
+                *GCStaticsAddress = 0;
+            }
+            if (nonGCStaticsAddress != NULL)
+            {
+                *nonGCStaticsAddress = 0;
+            }
+        }
+        else
+        {
+            if (GCStaticsAddress != NULL)
+            {
+                *GCStaticsAddress = PTR_CDADDR(mTable->GetGCThreadStaticsBasePointer(thread));
+            }
+            if (nonGCStaticsAddress != NULL)
+            {
+                *nonGCStaticsAddress = PTR_CDADDR(mTable->GetNonGCThreadStaticsBasePointer(thread));
+            }
+        }
+    }
+
+    SOSDacLeave();
+    return hr;
+}
+
+HRESULT STDMETHODCALLTYPE ClrDataAccess::GetMethodTableInitializationFlags(CLRDATA_ADDRESS methodTable, MethodTableInitializationFlags *initializationStatus)
+{
+    if (!methodTable)
+        return E_INVALIDARG;
+
+    if (!initializationStatus)
+        return E_POINTER;
+
+    SOSDacEnter();
+
+    *initializationStatus = (MethodTableInitializationFlags)0;
+    PTR_MethodTable mTable = PTR_MethodTable(TO_TADDR(methodTable));
+    BOOL bIsFree = FALSE;
+    if (!DacValidateMethodTable(mTable, bIsFree))
+    {
+        hr = E_INVALIDARG;
+    }
+    else
+    {
+        *initializationStatus = mTable->IsClassInited() ? MethodTableInitialized : (MethodTableInitializationFlags)0;
+        if (mTable->IsInitError())
+        {
+            *initializationStatus = (MethodTableInitializationFlags)(*initializationStatus | MethodTableInitializationFailed);
+        }
+    }
+
+    SOSDacLeave();
+    return hr;
+}
diff --git a/src/coreclr/debug/dbgutil/CMakeLists.txt b/src/coreclr/debug/dbgutil/CMakeLists.txt
index 2d8e02b07fc7..0ad223630a58 100644
--- a/src/coreclr/debug/dbgutil/CMakeLists.txt
+++ b/src/coreclr/debug/dbgutil/CMakeLists.txt
@@ -9,8 +9,6 @@ if(CLR_CMAKE_HOST_WIN32 OR CLR_CMAKE_HOST_OSX)
   include_directories(${CLR_DIR}/inc/llvm)
 endif(CLR_CMAKE_HOST_WIN32 OR CLR_CMAKE_HOST_OSX)
 
-add_definitions(-DPAL_STDCPP_COMPAT)
-
 if(CLR_CMAKE_TARGET_LINUX_MUSL)
     add_definitions(-DTARGET_LINUX_MUSL)
 endif(CLR_CMAKE_TARGET_LINUX_MUSL)
diff --git a/src/coreclr/debug/debug-pal/CMakeLists.txt b/src/coreclr/debug/debug-pal/CMakeLists.txt
index baa11c163dff..adc8efacab4a 100644
--- a/src/coreclr/debug/debug-pal/CMakeLists.txt
+++ b/src/coreclr/debug/debug-pal/CMakeLists.txt
@@ -2,8 +2,6 @@ include_directories(../inc)
 include_directories(../../pal/inc)
 include_directories(${EP_GENERATED_HEADER_PATH})
 
-add_definitions(-DPAL_STDCPP_COMPAT)
-
 set(SHARED_EVENTPIPE_SOURCE_PATH ${CLR_SRC_NATIVE_DIR}/eventpipe)
 add_definitions(-DFEATURE_CORECLR)
 add_definitions(-DFEATURE_PERFTRACING)
diff --git a/src/coreclr/debug/di/module.cpp b/src/coreclr/debug/di/module.cpp
index ca8314db5339..1f4216908637 100644
--- a/src/coreclr/debug/di/module.cpp
+++ b/src/coreclr/debug/di/module.cpp
@@ -4250,12 +4250,12 @@ HRESULT CordbNativeCode::GetILToNativeMapping(ULONG32                    cMap,
         LoadNativeInfo();
 
         SequencePoints * pSeqPts = GetSequencePoints();
-        DebuggerILToNativeMap * rgMapInt = pSeqPts->GetMapAddr();
         ULONG32 cMapIntCount = pSeqPts->GetEntryCount();
 
         // If they gave us space to copy into...
-        if (map != NULL)
+        if (map != NULL && cMapIntCount != 0)
         {
+            DebuggerILToNativeMap * rgMapInt = pSeqPts->GetMapAddr();
             // Only copy as much as either they gave us or we have to copy.
             ULONG32 cMapToCopy = min(cMap, cMapIntCount);
 
diff --git a/src/coreclr/debug/di/rspriv.h b/src/coreclr/debug/di/rspriv.h
index ceadc7eedafe..63886b56bfa5 100644
--- a/src/coreclr/debug/di/rspriv.h
+++ b/src/coreclr/debug/di/rspriv.h
@@ -3975,9 +3975,9 @@ class CordbProcess :
 
     // CORDB_ADDRESS's are UINT_PTR's (64 bit under HOST_64BIT, 32 bit otherwise)
 #if defined(TARGET_64BIT)
-#define MAX_ADDRESS     (_UI64_MAX)
+#define MAX_ADDRESS     (UINT64_MAX)
 #else
-#define MAX_ADDRESS     (_UI32_MAX)
+#define MAX_ADDRESS     (UINT32_MAX)
 #endif
 #define MIN_ADDRESS     (0x0)
     CORDB_ADDRESS       m_minPatchAddr; //smallest patch in table
@@ -7325,7 +7325,8 @@ class CordbJITILFrame : public CordbBase, public ICorDebugILFrame, public ICorDe
                     GENERICS_TYPE_TOKEN   exactGenericArgsToken,
                     DWORD                 dwExactGenericArgsTokenIndex,
                     bool                  fVarArgFnx,
-                    CordbReJitILCode *    pReJitCode);
+                    CordbReJitILCode *    pReJitCode,
+                    bool                  fAdjustedIP);
     HRESULT Init();
     virtual ~CordbJITILFrame();
     virtual void Neuter();
@@ -7436,6 +7437,7 @@ class CordbJITILFrame : public CordbBase, public ICorDebugILFrame, public ICorDe
 
     CordbILCode* GetOriginalILCode();
     CordbReJitILCode* GetReJitILCode();
+    void AdjustIPAfterException();
 
 private:
     void    RefreshCachedVarArgSigParserIfNeeded();
@@ -7503,6 +7505,7 @@ class CordbJITILFrame : public CordbBase, public ICorDebugILFrame, public ICorDe
 
     // if this frame is instrumented with rejit, this will point to the instrumented IL code
     RSSmartPtr<CordbReJitILCode> m_pReJitCode;
+    BOOL m_adjustedIP;
 };
 
 /* ------------------------------------------------------------------------- *
diff --git a/src/coreclr/debug/di/rsstackwalk.cpp b/src/coreclr/debug/di/rsstackwalk.cpp
index 751d18dcc179..f2bf3777bb6b 100644
--- a/src/coreclr/debug/di/rsstackwalk.cpp
+++ b/src/coreclr/debug/di/rsstackwalk.cpp
@@ -776,7 +776,8 @@ HRESULT CordbStackWalk::GetFrameWorker(ICorDebugFrame ** ppFrame)
                                                                 frameData.v.exactGenericArgsToken,
                                                                 frameData.v.dwExactGenericArgsTokenIndex,
                                                                 !!frameData.v.fVarArgs,
-                                                                pReJitCode));
+                                                                pReJitCode,
+                                                                pJITFuncData->justAfterILThrow));
 
             // Initialize the frame.  This is a nop if the method is not a vararg method.
             hr = pJITILFrame->Init();
diff --git a/src/coreclr/debug/di/rsthread.cpp b/src/coreclr/debug/di/rsthread.cpp
index 7b969ee65d3d..1f455dad376d 100644
--- a/src/coreclr/debug/di/rsthread.cpp
+++ b/src/coreclr/debug/di/rsthread.cpp
@@ -5122,7 +5122,7 @@ HRESULT CordbValueEnum::Next(ULONG celt, ICorDebugValue *values[], ULONG *pceltF
 
     HRESULT hr = S_OK;
 
-    int iMax = min( m_iMax, m_iCurrent+celt);
+    int iMax = (int)min( (ULONG)m_iMax, m_iCurrent+celt);
     int i;
     for (i = m_iCurrent; i< iMax;i++)
     {
@@ -7396,7 +7396,8 @@ CordbJITILFrame::CordbJITILFrame(CordbNativeFrame *    pNativeFrame,
                                  GENERICS_TYPE_TOKEN   exactGenericArgsToken,
                                  DWORD                 dwExactGenericArgsTokenIndex,
                                  bool                  fVarArgFnx,
-                                 CordbReJitILCode *    pRejitCode)
+                                 CordbReJitILCode *    pRejitCode,
+                                 bool                  fAdjustedIP)
   : CordbBase(pNativeFrame->GetProcess(), 0, enumCordbJITILFrame),
     m_nativeFrame(pNativeFrame),
     m_ilCode(pCode),
@@ -7411,7 +7412,8 @@ CordbJITILFrame::CordbJITILFrame(CordbNativeFrame *    pNativeFrame,
     m_genericArgsLoaded(false),
     m_frameParamsToken(exactGenericArgsToken),
     m_dwFrameParamsTokenIndex(dwExactGenericArgsTokenIndex),
-    m_pReJitCode(pRejitCode)
+    m_pReJitCode(pRejitCode),
+    m_adjustedIP(fAdjustedIP)
 {
     // We'll initialize the SigParser in CordbJITILFrame::Init().
     m_sigParserCached = SigParser(NULL, 0);
@@ -8184,7 +8186,7 @@ HRESULT CordbJITILFrame::FabricateNativeInfo(DWORD dwIndex,
             // first argument, but thereafter we have to decrement it
             // before getting the variable's location from it.  So increment
             // it here to be consistent later.
-            rpCur += max(cbType, cbArchitectureMin);
+            rpCur += max((ULONG)cbType, cbArchitectureMin);
 #endif
 
             // Grab the IL code's function's method signature so we can see if it's static.
@@ -8217,7 +8219,7 @@ HRESULT CordbJITILFrame::FabricateNativeInfo(DWORD dwIndex,
                 IfFailThrow(pArgType->GetUnboxedObjectSize(&cbType));
 
 #if defined(TARGET_X86) // STACK_GROWS_DOWN_ON_ARGS_WALK
-                rpCur -= max(cbType, cbArchitectureMin);
+                rpCur -= max((ULONG)cbType, cbArchitectureMin);
                 m_rgNVI[i].loc.vlFixedVarArg.vlfvOffset =
                     (unsigned)(m_FirstArgAddr - rpCur);
 
@@ -8227,7 +8229,7 @@ HRESULT CordbJITILFrame::FabricateNativeInfo(DWORD dwIndex,
 #else // STACK_GROWS_UP_ON_ARGS_WALK
                 m_rgNVI[i].loc.vlFixedVarArg.vlfvOffset =
                     (unsigned)(rpCur - m_FirstArgAddr);
-                rpCur += max(cbType, cbArchitectureMin);
+                rpCur += max((ULONG)cbType, cbArchitectureMin);
                 AlignAddressForType(pArgType, rpCur);
 #endif
 
@@ -9027,6 +9029,21 @@ CordbReJitILCode* CordbJITILFrame::GetReJitILCode()
     return m_pReJitCode;
 }
 
+void CordbJITILFrame::AdjustIPAfterException()
+{
+    CordbNativeFrame* nativeFrameToAdjustIP = m_nativeFrame;
+    if (!m_adjustedIP)
+    {
+        DWORD nativeOffsetToMap = (DWORD)nativeFrameToAdjustIP->m_ip - STACKWALK_CONTROLPC_ADJUST_OFFSET;
+        CorDebugMappingResult mappingType;
+        ULONG uILOffset = nativeFrameToAdjustIP->m_nativeCode->GetSequencePoints()->MapNativeOffsetToIL(
+                nativeOffsetToMap,
+                &mappingType);
+        m_ip= uILOffset;
+        m_adjustedIP = true;
+    }
+}
+
 /* ------------------------------------------------------------------------- *
  * Eval class
  * ------------------------------------------------------------------------- */
@@ -10860,7 +10877,7 @@ HRESULT CordbCodeEnum::Next(ULONG celt, ICorDebugCode *values[], ULONG *pceltFet
 
     HRESULT hr = S_OK;
 
-    int iMax = min( m_iMax, m_iCurrent+celt);
+    int iMax = (int)min( (ULONG)m_iMax, m_iCurrent+celt);
     int i;
 
     for (i = m_iCurrent; i < iMax; i++)
diff --git a/src/coreclr/debug/di/rstype.cpp b/src/coreclr/debug/di/rstype.cpp
index 45ccd44be656..ae686064e96c 100644
--- a/src/coreclr/debug/di/rstype.cpp
+++ b/src/coreclr/debug/di/rstype.cpp
@@ -2898,7 +2898,7 @@ HRESULT CordbTypeEnum::Next(ULONG celt, ICorDebugType *values[], ULONG *pceltFet
 
     HRESULT hr = S_OK;
 
-    int iMax = min( m_iMax, m_iCurrent+celt);
+    int iMax = (int)min( (ULONG)m_iMax, m_iCurrent+celt);
     int i;
 
     for (i = m_iCurrent; i < iMax; i++)
diff --git a/src/coreclr/debug/di/shimcallback.cpp b/src/coreclr/debug/di/shimcallback.cpp
index 4e8f029209de..bf6c817fc880 100644
--- a/src/coreclr/debug/di/shimcallback.cpp
+++ b/src/coreclr/debug/di/shimcallback.cpp
@@ -1408,7 +1408,7 @@ HRESULT ShimProxyCallback::DataBreakpoint(ICorDebugProcess* pProcess, ICorDebugT
             this->m_pThread.Assign(pThread);
 
             _ASSERTE(contextSize == sizeof(CONTEXT));
-            this->m_contextSize = min(contextSize, sizeof(CONTEXT));
+            this->m_contextSize = min(contextSize, (ULONG32)sizeof(CONTEXT));
             memcpy(&(this->m_context), pContext, this->m_contextSize);
         }
 
diff --git a/src/coreclr/debug/di/shimpriv.h b/src/coreclr/debug/di/shimpriv.h
index 1ce2f6857d48..ff0f16436a1f 100644
--- a/src/coreclr/debug/di/shimpriv.h
+++ b/src/coreclr/debug/di/shimpriv.h
@@ -780,6 +780,8 @@ class ShimStackWalk
 
         // Indicate whether we are processing a converted frame.
         bool m_fHasConvertedFrame;
+
+        bool m_fHasException;
     };
 
     // A ShimStackWalk is deleted when a process is continued, or when the stack is changed in any way
diff --git a/src/coreclr/debug/di/shimstackwalk.cpp b/src/coreclr/debug/di/shimstackwalk.cpp
index c47620c7bc09..46213d4ca364 100644
--- a/src/coreclr/debug/di/shimstackwalk.cpp
+++ b/src/coreclr/debug/di/shimstackwalk.cpp
@@ -312,6 +312,7 @@ void ShimStackWalk::Populate()
                             // because of the leaf STUBFRAME_EXCEPTION.
                             chainInfo.CancelUMChain();
                             swInfo.m_fSkipChain = true;
+                            swInfo.m_fHasException = true;
                         }
                     }
 
@@ -988,6 +989,20 @@ CorDebugInternalFrameType ShimStackWalk::GetInternalFrameType(ICorDebugInternalF
 
 void ShimStackWalk::AppendFrame(ICorDebugFrame * pFrame, StackWalkInfo * pStackWalkInfo)
 {
+    // We've detected we're in a stackwalk where we have an exception and no further managed frames 
+    // are on top of this frame. To ensure our IP points to the user line that threw the exception, 
+    // we ask the frame to adjust the IP to the call instruction as currently it points to the instruction after it.
+    if (pStackWalkInfo->m_fHasException && pStackWalkInfo->m_cFrame == 0)
+    {
+        RSExtSmartPtr<ICorDebugILFrame> pNFrame3;
+        HRESULT hr = pFrame->QueryInterface(IID_ICorDebugILFrame, reinterpret_cast<void **>(&pNFrame3));
+        if (pNFrame3 != NULL)
+        {
+            CordbJITILFrame* JITILFrameToAdjustIP = (static_cast<CordbJITILFrame*>(pNFrame3.GetValue()));
+            JITILFrameToAdjustIP->AdjustIPAfterException();
+            pStackWalkInfo->m_fHasException = false;                                    
+        }
+    }
     // grow the
     ICorDebugFrame ** ppFrame = m_stackFrames.AppendThrowing();
 
@@ -1469,7 +1484,8 @@ ShimStackWalk::StackWalkInfo::StackWalkInfo()
     m_fProcessingInternalFrame(false),
     m_fSkipChain(false),
     m_fLeafFrame(true),
-    m_fHasConvertedFrame(false)
+    m_fHasConvertedFrame(false),
+    m_fHasException(false)
 {
     m_pChildFrame.Assign(NULL);
     m_pConvertedInternalFrame2.Assign(NULL);
diff --git a/src/coreclr/debug/di/stdafx.h b/src/coreclr/debug/di/stdafx.h
index 061c576c4725..8ee806f88f27 100644
--- a/src/coreclr/debug/di/stdafx.h
+++ b/src/coreclr/debug/di/stdafx.h
@@ -10,6 +10,9 @@
 #include <stdio.h>
 #include <windows.h>
 #include <winnt.h>
+#include <algorithm>
+using std::min;
+using std::max;
 
 #include <dbgtargetcontext.h>
 
diff --git a/src/coreclr/debug/ee/controller.h b/src/coreclr/debug/ee/controller.h
index a2d8dc2e2602..b838e11c0f85 100644
--- a/src/coreclr/debug/ee/controller.h
+++ b/src/coreclr/debug/ee/controller.h
@@ -827,7 +827,7 @@ class DebuggerPatchTable : private CHashTableAndData<CNewZeroData>
     DebuggerControllerPatch * GetPatch(PTR_CORDB_ADDRESS_TYPE address)
     {
         SUPPORTS_DAC;
-        ARM_ONLY(_ASSERTE(dac_cast<DWORD>(address) & THUMB_CODE));
+        ARM_ONLY(_ASSERTE(dac_cast<TADDR>(address) & THUMB_CODE));
 
         DebuggerControllerPatch * pPatch =
             dac_cast<PTR_DebuggerControllerPatch>(Find(HashAddress(address), (SIZE_T)(dac_cast<TADDR>(address))));
diff --git a/src/coreclr/debug/ee/debugger.cpp b/src/coreclr/debug/ee/debugger.cpp
index b97f76c4a03c..79aa2d5f13fc 100644
--- a/src/coreclr/debug/ee/debugger.cpp
+++ b/src/coreclr/debug/ee/debugger.cpp
@@ -3029,7 +3029,7 @@ HRESULT Debugger::GetILToNativeMappingIntoArrays(
     if (pDJI == NULL)
         return E_FAIL;
 
-    ULONG32 cMap = min(cMapMax, pDJI->GetSequenceMapCount());
+    ULONG32 cMap = min((ULONG32)cMapMax, pDJI->GetSequenceMapCount());
     DebuggerILToNativeMap * rgMapInt = pDJI->GetSequenceMap();
 
     NewArrayHolder<UINT> rguiILOffsetTemp = new (nothrow) UINT[cMap];
@@ -11566,17 +11566,26 @@ HRESULT Debugger::GetAndSendInterceptCommand(DebuggerIPCEvent *event)
                         //
                         // Set up the VM side of intercepting.
                         //
+                        StackFrame sfInterceptFramePointer;
+                        if (g_isNewExceptionHandlingEnabled)
+                        {
+                            sfInterceptFramePointer = StackFrame::FromRegDisplay(&(csi.m_activeFrame.registers));
+                        }
+                        else
+                        {
+#if defined (TARGET_ARM )|| defined (TARGET_ARM64 )
+                            // ARM requires the caller stack pointer, not the current stack pointer
+                            sfInterceptFramePointer = CallerStackFrame::FromRegDisplay(&(csi.m_activeFrame.registers));
+#else
+                            sfInterceptFramePointer = StackFrame::FromRegDisplay(&(csi.m_activeFrame.registers));
+#endif
+                        }
                         if (pExState->GetDebuggerState()->SetDebuggerInterceptInfo(csi.m_activeFrame.pIJM,
                                                               pThread,
                                                               csi.m_activeFrame.MethodToken,
                                                               csi.m_activeFrame.md,
                                                               foundOffset,
-#if defined (TARGET_ARM )|| defined (TARGET_ARM64 )
-                                                              // ARM requires the caller stack pointer, not the current stack pointer
-                                                              CallerStackFrame::FromRegDisplay(&(csi.m_activeFrame.registers)),
-#else
-                                                              StackFrame::FromRegDisplay(&(csi.m_activeFrame.registers)),
-#endif
+                                                              sfInterceptFramePointer,
                                                               pExState->GetFlags()
                                                              ))
                         {
diff --git a/src/coreclr/debug/ee/debugger.h b/src/coreclr/debug/ee/debugger.h
index ac2a3218f735..2b8573e31b36 100644
--- a/src/coreclr/debug/ee/debugger.h
+++ b/src/coreclr/debug/ee/debugger.h
@@ -3892,8 +3892,6 @@ HANDLE OpenWin32EventOrThrow(
 // Returns true if the specified IL offset has a special meaning (eg. prolog, etc.)
 bool DbgIsSpecialILOffset(DWORD offset);
 
-#if !defined(TARGET_X86)
 void FixupDispatcherContext(T_DISPATCHER_CONTEXT* pDispatcherContext, T_CONTEXT* pContext, PEXCEPTION_ROUTINE pUnwindPersonalityRoutine = NULL);
-#endif
 
 #endif /* DEBUGGER_H_ */
diff --git a/src/coreclr/debug/ee/debugger.inl b/src/coreclr/debug/ee/debugger.inl
index 61b44c9466e5..8b7a973f48ef 100644
--- a/src/coreclr/debug/ee/debugger.inl
+++ b/src/coreclr/debug/ee/debugger.inl
@@ -213,7 +213,7 @@ inline TADDR FuncEvalFrame::GetReturnAddressPtr()
 //
 // This updates the register display for a FuncEvalFrame.
 //
-inline void FuncEvalFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+inline void FuncEvalFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     SUPPORTS_DAC;
     DebuggerEval * pDE = GetDebuggerEval();
diff --git a/src/coreclr/debug/ee/funceval.cpp b/src/coreclr/debug/ee/funceval.cpp
index 7844edbe8b30..a7e888452c78 100644
--- a/src/coreclr/debug/ee/funceval.cpp
+++ b/src/coreclr/debug/ee/funceval.cpp
@@ -2806,7 +2806,7 @@ void PackArgumentArray(DebuggerEval *pDE,
 
 #ifdef FEATURE_HFA
         // The buffer for HFAs has to be always ENREGISTERED_RETURNTYPE_MAXSIZE
-        size = max(size, ENREGISTERED_RETURNTYPE_MAXSIZE);
+        size = max(size, (unsigned)ENREGISTERED_RETURNTYPE_MAXSIZE);
 #endif
 
         BYTE * pTemp = new (interopsafe) BYTE[ALIGN_UP(sizeof(ValueClassInfo), 8) + size];
diff --git a/src/coreclr/debug/ee/stdafx.h b/src/coreclr/debug/ee/stdafx.h
index f21a670e210b..21ef5f0efa32 100644
--- a/src/coreclr/debug/ee/stdafx.h
+++ b/src/coreclr/debug/ee/stdafx.h
@@ -12,6 +12,8 @@
 #include <stdint.h>
 #include <wchar.h>
 #include <stdio.h>
+#include <algorithm>
+#include <cmath>
 
 #include <windows.h>
 
diff --git a/src/coreclr/debug/inc/arm64/primitives.h b/src/coreclr/debug/inc/arm64/primitives.h
index 05c03c7b3094..5f8b5262d993 100644
--- a/src/coreclr/debug/inc/arm64/primitives.h
+++ b/src/coreclr/debug/inc/arm64/primitives.h
@@ -153,9 +153,9 @@ inline void CORDbgSetInstruction(CORDB_ADDRESS_TYPE* address,
 #if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) && defined(HOST_OSX)
     ExecutableWriterHolder<void> instructionWriterHolder((LPVOID)address, sizeof(PRD_TYPE));
 
-    ULONGLONG ptraddr = dac_cast<ULONGLONG>(instructionWriterHolder.GetRW());
+    TADDR ptraddr = dac_cast<TADDR>(instructionWriterHolder.GetRW());
 #else // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX
-    ULONGLONG ptraddr = dac_cast<ULONGLONG>(address);
+    TADDR ptraddr = dac_cast<TADDR>(address);
 #endif // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX
     *(PRD_TYPE *)ptraddr = instruction;
     FlushInstructionCache(GetCurrentProcess(),
@@ -167,7 +167,7 @@ inline PRD_TYPE CORDbgGetInstruction(UNALIGNED CORDB_ADDRESS_TYPE* address)
 {
     LIMITED_METHOD_CONTRACT;
 
-    ULONGLONG ptraddr = dac_cast<ULONGLONG>(address);
+    TADDR ptraddr = dac_cast<TADDR>(address);
     return *(PRD_TYPE *)ptraddr;
 }
 
diff --git a/src/coreclr/debug/inc/dbgipcevents.h b/src/coreclr/debug/inc/dbgipcevents.h
index 0eb393c37fce..1545aa280837 100644
--- a/src/coreclr/debug/inc/dbgipcevents.h
+++ b/src/coreclr/debug/inc/dbgipcevents.h
@@ -768,7 +768,7 @@ class MSLAYOUT VMPTR_Base
     //
     // Operators to emulate Pointer semantics.
     //
-    bool IsNull() { SUPPORTS_DAC; return m_addr == NULL; }
+    bool IsNull() { SUPPORTS_DAC; return m_addr == (TADDR)0; }
 
     static VMPTR_This NullPtr()
     {
diff --git a/src/coreclr/debug/inc/loongarch64/primitives.h b/src/coreclr/debug/inc/loongarch64/primitives.h
index 97e4fb9541a2..750f8a617c17 100644
--- a/src/coreclr/debug/inc/loongarch64/primitives.h
+++ b/src/coreclr/debug/inc/loongarch64/primitives.h
@@ -20,7 +20,7 @@ typedef DPTR(CORDB_ADDRESS_TYPE)    PTR_CORDB_ADDRESS_TYPE;
 
 // Given a return address retrieved during stackwalk,
 // this is the offset by which it should be decremented to land at the call instruction.
-#define STACKWALK_CONTROLPC_ADJUST_OFFSET 8
+#define STACKWALK_CONTROLPC_ADJUST_OFFSET 4
 
 #define PRD_TYPE                               LONG
 #define CORDbg_BREAK_INSTRUCTION_SIZE 4
@@ -135,7 +135,7 @@ inline void CORDbgSetInstruction(CORDB_ADDRESS_TYPE* address,
     // In a DAC build, this function assumes the input is an host address.
     LIMITED_METHOD_DAC_CONTRACT;
 
-    ULONGLONG ptraddr = dac_cast<ULONGLONG>(address);
+    TADDR ptraddr = dac_cast<TADDR>(address);
     *(PRD_TYPE *)ptraddr = instruction;
     FlushInstructionCache(GetCurrentProcess(),
                           address,
@@ -146,7 +146,7 @@ inline PRD_TYPE CORDbgGetInstruction(UNALIGNED CORDB_ADDRESS_TYPE* address)
 {
     LIMITED_METHOD_CONTRACT;
 
-    ULONGLONG ptraddr = dac_cast<ULONGLONG>(address);
+    TADDR ptraddr = dac_cast<TADDR>(address);
     return *(PRD_TYPE *)ptraddr;
 }
 
diff --git a/src/coreclr/debug/inc/riscv64/primitives.h b/src/coreclr/debug/inc/riscv64/primitives.h
index 066397fcda71..17ace22981c7 100644
--- a/src/coreclr/debug/inc/riscv64/primitives.h
+++ b/src/coreclr/debug/inc/riscv64/primitives.h
@@ -137,7 +137,7 @@ inline void CORDbgSetInstruction(CORDB_ADDRESS_TYPE* address,
     // In a DAC build, this function assumes the input is an host address.
     LIMITED_METHOD_DAC_CONTRACT;
 
-    ULONGLONG ptraddr = dac_cast<ULONGLONG>(address);
+    TADDR ptraddr = dac_cast<TADDR>(address);
     *(PRD_TYPE *)ptraddr = instruction;
     FlushInstructionCache(GetCurrentProcess(),
                           address,
@@ -148,7 +148,7 @@ inline PRD_TYPE CORDbgGetInstruction(UNALIGNED CORDB_ADDRESS_TYPE* address)
 {
     LIMITED_METHOD_CONTRACT;
 
-    ULONGLONG ptraddr = dac_cast<ULONGLONG>(address);
+    TADDR ptraddr = dac_cast<TADDR>(address);
     return *(PRD_TYPE *)ptraddr;
 }
 
diff --git a/src/coreclr/debug/shared/dbgtransportsession.cpp b/src/coreclr/debug/shared/dbgtransportsession.cpp
index 8b8ca6203c95..3bebb8282aed 100644
--- a/src/coreclr/debug/shared/dbgtransportsession.cpp
+++ b/src/coreclr/debug/shared/dbgtransportsession.cpp
@@ -1949,7 +1949,7 @@ void DbgTransportSession::TransportWorker()
                     DWORD   cbBytesToRead = sReceiveHeader.TypeSpecificData.MemoryAccess.m_cbLeftSideBuffer;
                     while (cbBytesToRead)
                     {
-                        DWORD cbTransfer = min(cbBytesToRead, sizeof(rgDummy));
+                        DWORD cbTransfer = min(cbBytesToRead, (DWORD)sizeof(rgDummy));
                         if (!ReceiveBlock(rgDummy, cbTransfer))
                             HANDLE_TRANSIENT_ERROR();
                         cbBytesToRead -= cbTransfer;
diff --git a/src/coreclr/dlls/mscordac/mscordac_unixexports.src b/src/coreclr/dlls/mscordac/mscordac_unixexports.src
index 4e65be98fee1..ad056eb1104e 100644
--- a/src/coreclr/dlls/mscordac/mscordac_unixexports.src
+++ b/src/coreclr/dlls/mscordac/mscordac_unixexports.src
@@ -22,17 +22,10 @@ nativeStringResourceTable_mscorrc
 ; All the # exports are prefixed with DAC_
 #PAL_CatchHardwareExceptionHolderEnter
 #PAL_CatchHardwareExceptionHolderExit
-#PAL_bsearch
 #PAL_CopyModuleData
-#PAL_errno
-#PAL_fflush
-#PAL__flushall
-#PAL_free
 #PAL_GetLogicalCpuCountFromOS
 #PAL_GetTotalCpuCount
 #PAL_GetUnwindInfoSize
-#PAL_get_stdout
-#PAL_get_stderr
 #PAL_GetApplicationGroupId
 #PAL_GetTransportName
 #PAL_GetCurrentThread
@@ -49,10 +42,6 @@ nativeStringResourceTable_mscorrc
 #PAL_ReadProcessMemory
 #PAL_ProbeMemory
 #PAL_Random
-#PAL_malloc
-#PAL_realloc
-#PAL_qsort
-#PAL_fprintf
 #PAL__wcstoui64
 #PAL_wcstoul
 #PAL_wcstod
@@ -65,9 +54,6 @@ nativeStringResourceTable_mscorrc
 #PAL_wcschr
 #PAL_wcscat
 #PAL_wcsstr
-#PAL__open
-#PAL__pread
-#PAL__close
 
 #_wcsicmp
 #sprintf_s
diff --git a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt
index 2e2a8bf87ecc..c600af1fb6aa 100644
--- a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt
+++ b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt
@@ -111,6 +111,12 @@ set(CORECLR_LIBRARIES
     gc_pal
 )
 
+if(CLR_CMAKE_TARGET_ARCH_AMD64)
+    list(APPEND CORECLR_LIBRARIES
+        gc_vxsort
+    )
+endif(CLR_CMAKE_TARGET_ARCH_AMD64)
+
 if(CLR_CMAKE_TARGET_WIN32)
     list(APPEND CORECLR_LIBRARIES
         ${STATIC_MT_CRT_LIB}
diff --git a/src/coreclr/dlls/mscorpe/stdafx.h b/src/coreclr/dlls/mscorpe/stdafx.h
index 996113b50015..bd78a49013c9 100644
--- a/src/coreclr/dlls/mscorpe/stdafx.h
+++ b/src/coreclr/dlls/mscorpe/stdafx.h
@@ -11,6 +11,7 @@
 #include <assert.h>
 #include <stdio.h>
 #include <stddef.h>
+#include <algorithm>
 
 #define FEATURE_NO_HOST     // Do not use host interface
 #include <utilcode.h>
@@ -21,3 +22,6 @@
 #include "ceegen.h"
 #include "ceefilegenwriter.h"
 #include "ceesectionstring.h"
+
+using std::min;
+using std::max;
diff --git a/src/coreclr/dlls/mscorrc/mscorrc.rc b/src/coreclr/dlls/mscorrc/mscorrc.rc
index eb48bb390d09..b95946881f10 100644
--- a/src/coreclr/dlls/mscorrc/mscorrc.rc
+++ b/src/coreclr/dlls/mscorrc/mscorrc.rc
@@ -309,9 +309,12 @@ BEGIN
 	IDS_CLASSLOAD_GENERICTYPE_RECURSIVE     "Could not load type '%1' from assembly '%2' because it has recursive generic definition."
     IDS_CLASSLOAD_TOOMANYGENERICARGS        "Could not load type '%1' from assembly '%2'. Internal limitation: Too many generic arguments."
 
-    IDS_CLASSLOAD_INLINE_ARRAY_FIELD_COUNT  "InlineArrayAttribute requires that the target type has a single instance field. Type: '%1'.  Assembly: '%2'."
-    IDS_CLASSLOAD_INLINE_ARRAY_LENGTH       "InlineArrayAttribute requires that the length argument is greater than 0. Type: '%1'.  Assembly: '%2'."
-    IDS_CLASSLOAD_INLINE_ARRAY_EXPLICIT     "InlineArrayAttribute cannot be applied to a type with explicit layout. Type: '%1'.  Assembly: '%2'."
+    IDS_CLASSLOAD_INLINE_ARRAY_FIELD_COUNT  "InlineArrayAttribute requires that the target type has a single instance field. Type: '%1'. Assembly: '%2'."
+    IDS_CLASSLOAD_INLINE_ARRAY_LENGTH       "InlineArrayAttribute requires that the length argument is greater than 0. Type: '%1'. Assembly: '%2'."
+    IDS_CLASSLOAD_INLINE_ARRAY_EXPLICIT     "InlineArrayAttribute cannot be applied to a type with explicit layout. Type: '%1'. Assembly: '%2'."
+
+    IDS_CLASSLOAD_BYREF_OF_BYREF            "Could not create a ByRef of a ByRef. Type: '%1'. Assembly: '%2'."
+    IDS_CLASSLOAD_POINTER_OF_BYREF          "Could not create a pointer to a ByRef. Type: '%1'. Assembly: '%2'."
 
     IDS_INVALID_RECURSIVE_GENERIC_FIELD_LOAD "Could not load type '%1' from assembly '%2' because of an invalid self-referential generic field."
 
@@ -488,9 +491,6 @@ BEGIN
     IDS_EE_CLASS_TO_VARIANT_TLB_NOT_REG     "Type '%1' cannot be marshalled to a Variant. Type library is not registered."
     IDS_EE_CANNOT_MAP_TO_MANAGED_VC         "The specified record cannot be mapped to a managed value class."
 
-    IDS_EE_SAFEHANDLECLOSED                 "Safe handle has been closed"
-    IDS_EE_SAFEHANDLECANNOTSETHANDLE        "Safe handle's handle field can only be set if the safe handle is not closed and has a ref count of 1."
-
     IDS_EE_SH_IN_VARIANT_NOT_SUPPORTED      "SafeHandle derived types cannot be stored in Variants."
 
     IDS_EE_CH_IN_VARIANT_NOT_SUPPORTED      "CriticalHandle derived types cannot be stored in Variants."
diff --git a/src/coreclr/dlls/mscorrc/resource.h b/src/coreclr/dlls/mscorrc/resource.h
index 88473e27d102..f1e7f1fba9a3 100644
--- a/src/coreclr/dlls/mscorrc/resource.h
+++ b/src/coreclr/dlls/mscorrc/resource.h
@@ -174,6 +174,9 @@
 #define IDS_CLASSLOAD_INLINE_ARRAY_LENGTH       0x17ad
 #define IDS_CLASSLOAD_INLINE_ARRAY_EXPLICIT     0x17ae
 
+#define IDS_CLASSLOAD_BYREF_OF_BYREF            0x17af
+#define IDS_CLASSLOAD_POINTER_OF_BYREF          0x17b0
+
 #define IDS_DEBUG_USERBREAKPOINT                0x17b6
 
 #define IDS_PERFORMANCEMON_FUNCNOTFOUND         0x17bb
@@ -240,9 +243,6 @@
 #define IDS_EE_METHOD_NOT_FOUND_ON_EV_PROV      0x1a24
 #define IDS_EE_BAD_COMEVENTITF_CLASS            0x1a25
 
-#define IDS_EE_COREXEMAIN2_FAILED_TITLE         0x1a2b
-#define IDS_EE_COREXEMAIN2_FAILED_TEXT          0x1a2c
-
 #define IDS_EE_ICUSTOMMARSHALERNOTIMPL          0x1a2e
 #define IDS_EE_GETINSTANCENOTIMPL               0x1a2f
 
@@ -262,9 +262,6 @@
 #define IDS_EE_BADMARSHAL_RETURNSHCOMTONATIVE   0x1a3c
 #define IDS_EE_BADMARSHAL_SAFEHANDLE            0x1a3d
 
-#define IDS_EE_SAFEHANDLECLOSED                 0x1a3f
-#define IDS_EE_SAFEHANDLECANNOTSETHANDLE        0x1a40
-
 #define IDS_EE_BADMARSHAL_ABSTRACTRETSAFEHANDLE 0x1a44
 #define IDS_EE_SH_IN_VARIANT_NOT_SUPPORTED      0x1a47
 
diff --git a/src/coreclr/gc/CMakeLists.txt b/src/coreclr/gc/CMakeLists.txt
index a1509b9898b6..89937554c041 100644
--- a/src/coreclr/gc/CMakeLists.txt
+++ b/src/coreclr/gc/CMakeLists.txt
@@ -36,20 +36,9 @@ else()
     windows/Native.rc)
 endif(CLR_CMAKE_HOST_UNIX)
 
-if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
-  set (GC_SOURCES
-    ${GC_SOURCES}
-    vxsort/isa_detection.cpp
-    vxsort/do_vxsort_avx2.cpp
-    vxsort/do_vxsort_avx512.cpp
-    vxsort/machine_traits.avx2.cpp
-    vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.cpp
-    vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp
-    vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp
-    vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp
-    vxsort/smallsort/avx2_load_mask_tables.cpp
-)
-endif (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
+if (CLR_CMAKE_TARGET_ARCH_AMD64)
+  add_subdirectory(vxsort)
+endif (CLR_CMAKE_TARGET_ARCH_AMD64)
 
 if (CLR_CMAKE_TARGET_WIN32)
   set(GC_HEADERS
@@ -87,7 +76,7 @@ if (CLR_CMAKE_TARGET_WIN32)
     handletablepriv.h
     objecthandle.h
     softwarewritewatch.h
-    vxsort/do_vxsort.h)
+  )
 endif(CLR_CMAKE_TARGET_WIN32)
 
 if(CLR_CMAKE_HOST_WIN32)
@@ -100,6 +89,13 @@ endif(CLR_CMAKE_HOST_WIN32)
 
 set (GC_LINK_LIBRARIES ${GC_LINK_LIBRARIES} gc_pal)
 
+if(CLR_CMAKE_TARGET_ARCH_AMD64)
+    list(APPEND GC_LINK_LIBRARIES
+        gc_vxsort
+    )
+endif(CLR_CMAKE_TARGET_ARCH_AMD64)
+
+
 list(APPEND GC_SOURCES ${GC_HEADERS})
 
 convert_to_absolute_path(GC_SOURCES ${GC_SOURCES})
diff --git a/src/coreclr/gc/env/common.h b/src/coreclr/gc/env/common.h
index 78562ef0438b..5d8cff7f7790 100644
--- a/src/coreclr/gc/env/common.h
+++ b/src/coreclr/gc/env/common.h
@@ -22,8 +22,12 @@
 #include <stdarg.h>
 #include <memory.h>
 #include <limits.h>
+#include <math.h>
 
 #include <new>
+#include <type_traits>
+#include <limits>
+#include <algorithm>
 
 #ifdef TARGET_UNIX
 #include <pthread.h>
diff --git a/src/coreclr/gc/env/gcenv.base.h b/src/coreclr/gc/env/gcenv.base.h
index a059f5d33b92..3e0122f0ea50 100644
--- a/src/coreclr/gc/env/gcenv.base.h
+++ b/src/coreclr/gc/env/gcenv.base.h
@@ -100,14 +100,6 @@ inline HRESULT HRESULT_FROM_WIN32(unsigned long x)
 
 #define ZeroMemory(Destination,Length) memset((Destination),0,(Length))
 
-#ifndef min
-#define min(a,b) (((a) < (b)) ? (a) : (b))
-#endif
-
-#ifndef max
-#define max(a,b) (((a) > (b)) ? (a) : (b))
-#endif
-
 #define C_ASSERT(cond) static_assert( cond, #cond )
 
 #define UNREFERENCED_PARAMETER(P)          (void)(P)
@@ -393,17 +385,11 @@ inline void* ALIGN_DOWN(void* ptr, size_t alignment)
     return reinterpret_cast<void*>(ALIGN_DOWN(as_size_t, alignment));
 }
 
-inline int GetRandomInt(int max)
-{
-    return rand() % max;
-}
-
 typedef struct _PROCESSOR_NUMBER {
     uint16_t Group;
     uint8_t Number;
     uint8_t Reserved;
 } PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
-
 #endif // _INC_WINDOWS
 
 // -----------------------------------------------------------------------------------------------------------
diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp
index 0471326c0af5..e43047cf6e11 100644
--- a/src/coreclr/gc/gc.cpp
+++ b/src/coreclr/gc/gc.cpp
@@ -18,7 +18,7 @@
 
 #include "gcpriv.h"
 
-#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
+#ifdef TARGET_AMD64
 #define USE_VXSORT
 #else
 #define USE_INTROSORT
@@ -2367,6 +2367,7 @@ int         gc_heap::conserve_mem_setting = 0;
 bool        gc_heap::spin_count_unit_config_p = false;
 
 uint64_t    gc_heap::suspended_start_time = 0;
+uint64_t    gc_heap::change_heap_count_time = 0;
 uint64_t    gc_heap::end_gc_time = 0;
 uint64_t    gc_heap::total_suspended_time = 0;
 uint64_t    gc_heap::process_start_time = 0;
@@ -3112,7 +3113,7 @@ void gc_history_global::print()
 
 uint32_t limit_time_to_uint32 (uint64_t time)
 {
-    time = min (time, UINT32_MAX);
+    time = min (time, (uint64_t)UINT32_MAX);
     return (uint32_t)time;
 }
 
@@ -6249,12 +6250,14 @@ class heap_select
     static uint16_t proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
     static uint16_t heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
     static uint16_t heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
-    static uint16_t proc_no_to_numa_node[MAX_SUPPORTED_CPUS];
     static uint16_t numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
+
+#ifdef HEAP_BALANCE_INSTRUMENTATION
     // Note this is the total numa nodes GC heaps are on. There might be
     // more on the machine if GC threads aren't using all of them.
     static uint16_t total_numa_nodes;
     static node_heap_count heaps_on_node[MAX_SUPPORTED_NODES];
+#endif
 
     static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers)
     {
@@ -6323,7 +6326,6 @@ class heap_select
                 // we found a heap on cur_node_no
                 heap_no_to_proc_no[cur_heap_no] = proc_no[i];
                 heap_no_to_numa_node[cur_heap_no] = cur_node_no;
-                proc_no_to_numa_node[proc_no[i]] = cur_node_no;
 
                 cur_heap_no++;
             }
@@ -6411,37 +6413,16 @@ class heap_select
         return GCToOSInterface::CanGetCurrentProcessorNumber();
     }
 
-    static uint16_t find_heap_no_from_proc_no(uint16_t proc_no)
-    {
-        return proc_no_to_heap_no[proc_no];
-    }
-
     static uint16_t find_proc_no_from_heap_no(int heap_number)
     {
         return heap_no_to_proc_no[heap_number];
     }
 
-    static void set_proc_no_for_heap(int heap_number, uint16_t proc_no)
-    {
-        heap_no_to_proc_no[heap_number] = proc_no;
-    }
-
     static uint16_t find_numa_node_from_heap_no(int heap_number)
     {
         return heap_no_to_numa_node[heap_number];
     }
 
-    static uint16_t find_numa_node_from_proc_no (uint16_t proc_no)
-    {
-        return proc_no_to_numa_node[proc_no];
-    }
-
-    static void set_numa_node_for_heap_and_proc(int heap_number, uint16_t proc_no, uint16_t numa_node)
-    {
-        heap_no_to_numa_node[heap_number] = numa_node;
-        proc_no_to_numa_node[proc_no] = numa_node;
-    }
-
     static void init_numa_node_to_heap_map(int nheaps)
     {
         // Called right after GCHeap::Init() for each heap
@@ -6450,84 +6431,126 @@ class heap_select
         // numa_node_to_heap_map[numa_node + 1] is set to the first heap number not on that node
         // Set the start of the heap number range for the first NUMA node
         numa_node_to_heap_map[heap_no_to_numa_node[0]] = 0;
+#ifdef HEAP_BALANCE_INSTRUMENTATION
         total_numa_nodes = 0;
         memset (heaps_on_node, 0, sizeof (heaps_on_node));
         heaps_on_node[0].node_no = heap_no_to_numa_node[0];
         heaps_on_node[0].heap_count = 1;
+#endif //HEAP_BALANCE_INSTRUMENTATION
 
         for (int i=1; i < nheaps; i++)
         {
             if (heap_no_to_numa_node[i] != heap_no_to_numa_node[i-1])
             {
+#ifdef HEAP_BALANCE_INSTRUMENTATION
                 total_numa_nodes++;
                 heaps_on_node[total_numa_nodes].node_no = heap_no_to_numa_node[i];
+#endif
 
                 // Set the end of the heap number range for the previous NUMA node
                 numa_node_to_heap_map[heap_no_to_numa_node[i-1] + 1] =
                 // Set the start of the heap number range for the current NUMA node
                 numa_node_to_heap_map[heap_no_to_numa_node[i]] = (uint16_t)i;
             }
+#ifdef HEAP_BALANCE_INSTRUMENTATION
             (heaps_on_node[total_numa_nodes].heap_count)++;
+#endif
         }
 
         // Set the end of the heap range for the last NUMA node
         numa_node_to_heap_map[heap_no_to_numa_node[nheaps-1] + 1] = (uint16_t)nheaps; //mark the end with nheaps
+
+#ifdef HEAP_BALANCE_INSTRUMENTATION
         total_numa_nodes++;
+#endif
     }
 
-    // TODO: curently this doesn't work with GCHeapAffinitizeMask/GCHeapAffinitizeRanges
-    // because the heaps may not be on contiguous active procs.
-    //
-    // This is for scenarios where GCHeapCount is specified as something like
-    // (g_num_active_processors - 2) to allow less randomization to the Server GC threads.
-    // In this case we want to assign the right heaps to those procs, ie if they share
-    // the same numa node we want to assign local heaps to those procs. Otherwise we
-    // let the heap balancing mechanism take over for now.
-    static void distribute_other_procs()
+    static bool get_info_proc (int index, uint16_t* proc_no, uint16_t* node_no, int* start_heap, int* end_heap)
+    {
+        if (!GCToOSInterface::GetProcessorForHeap ((uint16_t)index, proc_no, node_no))
+            return false;
+
+        if (*node_no == NUMA_NODE_UNDEFINED)
+            *node_no = 0;
+
+        *start_heap = (int)numa_node_to_heap_map[*node_no];
+        *end_heap = (int)(numa_node_to_heap_map[*node_no + 1]);
+
+        return true;
+    }
+
+    static void distribute_other_procs (bool distribute_all_p)
     {
         if (affinity_config_specified_p)
             return;
 
-        uint16_t proc_no = 0;
-        uint16_t node_no = 0;
-        bool res = false;
-        int start_heap = -1;
-        int end_heap = -1;
-        int current_node_no = -1;
-        int current_heap_on_node = -1;
-
-        for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++)
+        if (distribute_all_p)
         {
-            if (!GCToOSInterface::GetProcessorForHeap ((uint16_t)i, &proc_no, &node_no))
-                break;
+            uint16_t current_heap_no_on_node[MAX_SUPPORTED_CPUS];
+            memset (current_heap_no_on_node, 0, sizeof (current_heap_no_on_node));
+            uint16_t current_heap_no = 0;
 
-            if (node_no == NUMA_NODE_UNDEFINED)
-                node_no = 0;
+            uint16_t proc_no = 0;
+            uint16_t node_no = 0;
 
-            int start_heap = (int)numa_node_to_heap_map[node_no];
-            int end_heap = (int)(numa_node_to_heap_map[node_no + 1]);
-
-            if ((end_heap - start_heap) > 0)
+            for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++)
             {
-                if (node_no == current_node_no)
+                int start_heap, end_heap;
+                if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap))
+                    break;
+
+                // This indicates there are heaps on this node
+                if ((end_heap - start_heap) > 0)
                 {
-                    // We already iterated through all heaps on this node, don't add more procs to these
-                    // heaps.
-                    if (current_heap_on_node >= end_heap)
-                    {
-                        continue;
-                    }
+                    proc_no_to_heap_no[proc_no] = (current_heap_no_on_node[node_no] % (uint16_t)(end_heap - start_heap)) + (uint16_t)start_heap;
+                    (current_heap_no_on_node[node_no])++;
                 }
                 else
                 {
-                    current_node_no = node_no;
-                    current_heap_on_node = start_heap;
+                    proc_no_to_heap_no[proc_no] = current_heap_no % gc_heap::n_heaps;
+                    (current_heap_no)++;
                 }
+            }
+        }
+        else
+        {
+            // This is for scenarios where GCHeapCount is specified as something like
+            // (g_num_active_processors - 2) to allow less randomization to the Server GC threads.
+            // In this case we want to assign the right heaps to those procs, ie if they share
+            // the same numa node we want to assign local heaps to those procs. Otherwise we
+            // let the heap balancing mechanism take over for now.
+            uint16_t proc_no = 0;
+            uint16_t node_no = 0;
+            int current_node_no = -1;
+            int current_heap_on_node = -1;
 
-                proc_no_to_heap_no[proc_no] = (uint16_t)current_heap_on_node;
-                proc_no_to_numa_node[proc_no] = (uint16_t)node_no;
+            for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++)
+            {
+                int start_heap, end_heap;
+                if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap))
+                    break;
 
-                current_heap_on_node++;
+                if ((end_heap - start_heap) > 0)
+                {
+                    if (node_no == current_node_no)
+                    {
+                        // We already iterated through all heaps on this node, don't add more procs to these
+                        // heaps.
+                        if (current_heap_on_node >= end_heap)
+                        {
+                            continue;
+                        }
+                    }
+                    else
+                    {
+                        current_node_no = node_no;
+                        current_heap_on_node = start_heap;
+                    }
+
+                    proc_no_to_heap_no[proc_no] = (uint16_t)current_heap_on_node;
+
+                    current_heap_on_node++;
+                }
             }
         }
     }
@@ -6541,43 +6564,6 @@ class heap_select
         dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPget_heap_range: %d is in numa node %d, start = %d, end = %d", hn, numa_node, *start, *end));
 #endif //HEAP_BALANCE_INSTRUMENTATION
     }
-
-    // This gets the next valid numa node index starting at current_index+1.
-    // It assumes that current_index is a valid node index.
-    // If current_index+1 is at the end this will start at the beginning. So this will
-    // always return a valid node index, along with that node's start/end heaps.
-    static uint16_t get_next_numa_node (uint16_t current_index, int* start, int* end)
-    {
-        int start_index = current_index + 1;
-        int nheaps = gc_heap::n_heaps;
-
-        bool found_node_with_heaps_p = false;
-        do
-        {
-            int start_heap = (int)numa_node_to_heap_map[start_index];
-            int end_heap = (int)numa_node_to_heap_map[start_index + 1];
-            if (start_heap == nheaps)
-            {
-                // This is the last node.
-                start_index = 0;
-                continue;
-            }
-
-            if ((end_heap - start_heap) == 0)
-            {
-                // This node has no heaps.
-                start_index++;
-            }
-            else
-            {
-                found_node_with_heaps_p = true;
-                *start = start_heap;
-                *end = end_heap;
-            }
-        } while (!found_node_with_heaps_p);
-
-        return (uint16_t)start_index;
-    }
 };
 uint8_t* heap_select::sniff_buffer;
 unsigned heap_select::n_sniff_buffers;
@@ -6585,10 +6571,11 @@ unsigned heap_select::cur_sniff_index;
 uint16_t heap_select::proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
 uint16_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
 uint16_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
-uint16_t heap_select::proc_no_to_numa_node[MAX_SUPPORTED_CPUS];
 uint16_t heap_select::numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
+#ifdef HEAP_BALANCE_INSTRUMENTATION
 uint16_t  heap_select::total_numa_nodes;
 node_heap_count heap_select::heaps_on_node[MAX_SUPPORTED_NODES];
+#endif
 
 #ifdef HEAP_BALANCE_INSTRUMENTATION
 // This records info we use to look at effect of different strategies
@@ -6985,7 +6972,7 @@ void gc_heap::gc_thread_function ()
 
                 dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index];
                 wait_time = min (wait_time, (uint32_t)(sample.elapsed_between_gcs / 1000 / 3));
-                wait_time = max (wait_time, 1);
+                wait_time = max (wait_time, 1u);
 
                 dprintf (6666, ("gc#0 thread waiting for %d ms (betwen GCs %I64d)", wait_time, sample.elapsed_between_gcs));
             }
@@ -7035,7 +7022,7 @@ void gc_heap::gc_thread_function ()
             }
 
             // wait till the threads that should have gone idle at least reached the place where they are about to wait on the idle event.
-            if ((gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) && 
+            if ((gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) &&
                 (n_heaps != dynamic_heap_count_data.last_n_heaps))
             {
                 int spin_count = 1024;
@@ -10318,11 +10305,11 @@ static void do_vxsort (uint8_t** item_array, ptrdiff_t item_count, uint8_t* rang
 {
     // above this threshold, using AVX2 for sorting will likely pay off
     // despite possible downclocking on some devices
-    const size_t AVX2_THRESHOLD_SIZE = 8 * 1024;
+    const ptrdiff_t AVX2_THRESHOLD_SIZE = 8 * 1024;
 
     // above this threshold, using AVX512F for sorting will likely pay off
     // despite possible downclocking on current devices
-    const size_t AVX512F_THRESHOLD_SIZE = 128 * 1024;
+    const ptrdiff_t AVX512F_THRESHOLD_SIZE = 128 * 1024;
 
     if (item_count <= 1)
         return;
@@ -12154,7 +12141,7 @@ void gc_heap::clear_region_demoted (heap_segment* region)
 
 int gc_heap::get_plan_gen_num (int gen_number)
 {
-    return ((settings.promotion) ? min ((gen_number + 1), max_generation) : gen_number);
+    return ((settings.promotion) ? min ((gen_number + 1), (int)max_generation) : gen_number);
 }
 
 uint8_t* gc_heap::get_uoh_start_object (heap_segment* region, generation* gen)
@@ -12293,7 +12280,7 @@ void gc_heap::init_heap_segment (heap_segment* seg, gc_heap* hp
 #endif //MULTIPLE_HEAPS
 
 #ifdef USE_REGIONS
-    int gen_num_for_region = min (gen_num, max_generation);
+    int gen_num_for_region = min (gen_num, (int)max_generation);
     set_region_gen_num (seg, gen_num_for_region);
     heap_segment_plan_gen_num (seg) = gen_num_for_region;
     heap_segment_swept_in_plan (seg) = false;
@@ -13313,7 +13300,7 @@ void gc_heap::distribute_free_regions()
             const int i = 0;
             const int n_heaps = 1;
 #endif //MULTIPLE_HEAPS
-            ptrdiff_t budget_gen = max (hp->estimate_gen_growth (gen), 0);
+            ptrdiff_t budget_gen = max (hp->estimate_gen_growth (gen), (ptrdiff_t)0);
             int kind = gen >= loh_generation;
             size_t budget_gen_in_region_units = (budget_gen + (region_size[kind] - 1)) / region_size[kind];
             dprintf (REGIONS_LOG, ("h%2d gen %d has an estimated growth of %zd bytes (%zd regions)", i, gen, budget_gen, budget_gen_in_region_units));
@@ -13533,7 +13520,7 @@ void gc_heap::distribute_free_regions()
     if (ephemeral_elapsed >= DECOMMIT_TIME_STEP_MILLISECONDS)
     {
         gc_last_ephemeral_decommit_time = dd_time_clock (dd0);
-        size_t decommit_step_milliseconds = min (ephemeral_elapsed, (10*1000));
+        size_t decommit_step_milliseconds = min (ephemeral_elapsed, (size_t)(10*1000));
 
         decommit_step (decommit_step_milliseconds);
     }
@@ -13909,7 +13896,7 @@ uint32_t adjust_heaps_hard_limit_worker (uint32_t nhp, size_t limit)
     size_t aligned_limit =  align_on_segment_hard_limit (limit);
     uint32_t nhp_oh = (uint32_t)(aligned_limit / min_segment_size_hard_limit);
     nhp = min (nhp_oh, nhp);
-    return (max (nhp, 1));
+    return (max (nhp, 1u));
 }
 
 uint32_t gc_heap::adjust_heaps_hard_limit (uint32_t nhp)
@@ -14313,7 +14300,7 @@ gc_heap::init_semi_shared()
 #endif //!USE_REGIONS
 
 #ifdef MULTIPLE_HEAPS
-    mark_list_size = min (100*1024, max (8192, soh_segment_size/(2*10*32)));
+    mark_list_size = min ((size_t)100*1024, max ((size_t)8192, soh_segment_size/(2*10*32)));
 #ifdef DYNAMIC_HEAP_COUNT
     if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
     {
@@ -14335,7 +14322,7 @@ gc_heap::init_semi_shared()
     }
 #else //MULTIPLE_HEAPS
 
-    mark_list_size = min(100*1024, max (8192, soh_segment_size/(64*32)));
+    mark_list_size = min((size_t)100*1024, max ((size_t)8192, soh_segment_size/(64*32)));
     g_mark_list_total_size = mark_list_size;
     g_mark_list = make_mark_list (mark_list_size);
 
@@ -14457,7 +14444,7 @@ gc_heap::init_semi_shared()
     if (bgc_tuning::enable_fl_tuning && (current_memory_load < bgc_tuning::memory_load_goal))
     {
         uint32_t distance_to_goal = bgc_tuning::memory_load_goal - current_memory_load;
-        bgc_tuning::stepping_interval = max (distance_to_goal / 10, 1);
+        bgc_tuning::stepping_interval = max (distance_to_goal / 10, 1u);
         bgc_tuning::last_stepping_mem_load = current_memory_load;
         bgc_tuning::last_stepping_bgc_count = 0;
         dprintf (BGC_TUNING_LOG, ("current ml: %d, %d to goal, interval: %d",
@@ -19204,7 +19191,7 @@ void gc_heap::balance_heaps (alloc_context* acontext)
 
 #ifdef HEAP_BALANCE_INSTRUMENTATION
                         int current_proc_no_before_set_ideal = GCToOSInterface::GetCurrentProcessorNumber ();
-                        if (current_proc_no_before_set_ideal != last_proc_no)
+                        if ((uint16_t)current_proc_no_before_set_ideal != last_proc_no)
                         {
                             dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPSPa: %d->%d", last_proc_no, current_proc_no_before_set_ideal));
                             multiple_procs_p = true;
@@ -20838,6 +20825,12 @@ int gc_heap::joined_generation_to_condemn (BOOL should_evaluate_elevation,
         }
     }
 
+    if (settings.reason == reason_induced_aggressive)
+    {
+        gc_data_global.gen_to_condemn_reasons.set_condition (gen_joined_aggressive);
+        settings.loh_compaction = TRUE;
+    }
+
 #ifdef BGC_SERVO_TUNING
     if (bgc_tuning::should_trigger_ngc2())
     {
@@ -20961,14 +20954,14 @@ size_t gc_heap::get_total_allocated_since_last_gc()
     {
         gc_heap* hp = gc_heap::g_heaps[i];
 #else //MULTIPLE_HEAPS
-    {
-        gc_heap* hp = pGenGCHeap;
+        {
+            gc_heap* hp = pGenGCHeap;
 #endif //MULTIPLE_HEAPS
-        total_allocated_size += hp->allocated_since_last_gc[0] + hp->allocated_since_last_gc[1];
-        hp->allocated_since_last_gc[0] = 0;
-        hp->allocated_since_last_gc[1] = 0;
-    }
-    return total_allocated_size;
+            total_allocated_size += hp->allocated_since_last_gc[0] + hp->allocated_since_last_gc[1];
+            hp->allocated_since_last_gc[0] = 0;
+            hp->allocated_since_last_gc[1] = 0;
+        }
+        return total_allocated_size;
 }
 
 // Gets what's allocated on both SOH, LOH, etc that hasn't been collected.
@@ -21822,13 +21815,13 @@ size_t gc_heap::min_reclaim_fragmentation_threshold (uint32_t num_heaps)
     dprintf (GTC_LOG, ("min av: %zd, 10%% gen2: %zd, 3%% mem: %zd",
         min_mem_based_on_available, ten_percent_size, three_percent_mem));
 #endif //SIMPLE_DPRINTF
-    return (size_t)(min (min_mem_based_on_available, min (ten_percent_size, three_percent_mem)));
+    return (size_t)(min ((uint64_t)min_mem_based_on_available, min ((uint64_t)ten_percent_size, three_percent_mem)));
 }
 
 inline
 uint64_t gc_heap::min_high_fragmentation_threshold(uint64_t available_mem, uint32_t num_heaps)
 {
-    return min (available_mem, (256*1024*1024)) / num_heaps;
+    return min (available_mem, (uint64_t)(256*1024*1024)) / num_heaps;
 }
 
 enum {
@@ -22015,7 +22008,7 @@ void gc_heap::update_end_gc_time_per_heap()
 
         if (heap_number == 0)
         {
-            dprintf (6666, ("prev gen%d GC end time: prev start %I64d + prev gc elapsed %Id = %I64d",
+            dprintf (3, ("prev gen%d GC end time: prev start %I64d + prev gc elapsed %Id = %I64d",
                 gen_number, dd_previous_time_clock (dd), dd_gc_elapsed_time (dd), (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd))));
         }
 
@@ -22023,45 +22016,53 @@ void gc_heap::update_end_gc_time_per_heap()
 
         if (heap_number == 0)
         {
-            dprintf (6666, ("updated NGC%d %Id elapsed time to %I64d - %I64d = %I64d", gen_number, dd_gc_clock (dd), end_gc_time, dd_time_clock (dd), dd_gc_elapsed_time (dd)));
+            dprintf (3, ("updated NGC%d %Id elapsed time to %I64d - %I64d = %I64d", gen_number, dd_gc_clock (dd), end_gc_time, dd_time_clock (dd), dd_gc_elapsed_time (dd)));
         }
     }
 
 #ifdef DYNAMIC_HEAP_COUNT
     if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes))
     {
-        dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index];
-        sample.elapsed_between_gcs = end_gc_time - last_suspended_end_time;
-        sample.gc_pause_time = dd_gc_elapsed_time (dynamic_data_of (0));
-        sample.msl_wait_time = get_msl_wait_time();
+        if (settings.gc_index > 1)
+        {
+            dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index];
+            sample.elapsed_between_gcs = end_gc_time - last_suspended_end_time;
+            sample.gc_pause_time = dd_gc_elapsed_time (dynamic_data_of (0));
+            sample.msl_wait_time = get_msl_wait_time ();
+            // could cache this - we will get it again soon in do_post_gc
+            sample.gc_survived_size = get_total_promoted ();
 
-        dprintf (6666, ("sample#%d: this GC end %I64d - last sus end %I64d = %I64d, this GC pause %I64d, msl wait %I64d",
-            dynamic_heap_count_data.sample_index, end_gc_time, last_suspended_end_time, sample.elapsed_between_gcs, sample.gc_pause_time, sample.msl_wait_time));
+            dprintf (6666, ("sample#%d: this GC end %I64d - last sus end %I64d = %I64d, this GC pause %I64d, msl wait %I64d",
+                dynamic_heap_count_data.sample_index, end_gc_time, last_suspended_end_time, sample.elapsed_between_gcs, sample.gc_pause_time, sample.msl_wait_time));
 
-        last_suspended_end_time = end_gc_time;
+            GCEventFireHeapCountSample_V1 (
+                (uint64_t)VolatileLoadWithoutBarrier (&settings.gc_index),
+                sample.elapsed_between_gcs,
+                sample.gc_pause_time,
+                sample.msl_wait_time);
 
-        GCEventFireHeapCountSample_V1 (
-            (uint64_t)VolatileLoadWithoutBarrier (&settings.gc_index),
-            sample.elapsed_between_gcs,
-            sample.gc_pause_time,
-            sample.msl_wait_time);
+            dynamic_heap_count_data.sample_index = (dynamic_heap_count_data.sample_index + 1) % dynamic_heap_count_data_t::sample_size;
+            (dynamic_heap_count_data.current_samples_count)++;
 
-        dynamic_heap_count_data.sample_index = (dynamic_heap_count_data.sample_index + 1) % dynamic_heap_count_data_t::sample_size;
+            if (settings.condemned_generation == max_generation)
+            {
+                gc_index_full_gc_end = dd_gc_clock (dynamic_data_of (0));
+                size_t elapsed_between_gen2_gcs = end_gc_time - prev_gen2_end_time;
+                size_t gen2_elapsed_time = sample.gc_pause_time;
+                dynamic_heap_count_data_t::gen2_sample& g2_sample = dynamic_heap_count_data.gen2_samples[dynamic_heap_count_data.gen2_sample_index];
+                g2_sample.gc_index = VolatileLoadWithoutBarrier (&(settings.gc_index));
+                g2_sample.gc_percent = (float)gen2_elapsed_time * 100.0f / elapsed_between_gen2_gcs;
+                (dynamic_heap_count_data.current_gen2_samples_count)++;
 
-        if (settings.condemned_generation == max_generation)
-        {
-            gc_index_full_gc_end = dd_gc_clock (dynamic_data_of (0));
-            size_t elapsed_between_gen2_gcs = end_gc_time - prev_gen2_end_time;
-            size_t gen2_elapsed_time = sample.gc_pause_time;
-            dynamic_heap_count_data.gen2_gc_percents[dynamic_heap_count_data.gen2_sample_index] = (float)gen2_elapsed_time * 100.0f / elapsed_between_gen2_gcs;
+                dprintf (6666, ("gen2 sample#%d: this GC end %I64d - last gen2 end %I64d = %I64d, GC elapsed %I64d, percent %.3f",
+                    dynamic_heap_count_data.gen2_sample_index, end_gc_time, prev_gen2_end_time, elapsed_between_gen2_gcs, gen2_elapsed_time, g2_sample.gc_percent));
+                dynamic_heap_count_data.gen2_sample_index = (dynamic_heap_count_data.gen2_sample_index + 1) % dynamic_heap_count_data_t::sample_size;
+            }
 
-            dprintf (6666, ("gen2 sample#%d: this GC end %I64d - last gen2 end %I64d = %I64d, GC elapsed %I64d, percent %.3f",
-                dynamic_heap_count_data.gen2_sample_index, end_gc_time, prev_gen2_end_time, elapsed_between_gen2_gcs,
-                gen2_elapsed_time, dynamic_heap_count_data.gen2_gc_percents[dynamic_heap_count_data.gen2_sample_index]));
-            dynamic_heap_count_data.gen2_sample_index = (dynamic_heap_count_data.gen2_sample_index + 1) % dynamic_heap_count_data_t::sample_size;
+            calculate_new_heap_count ();
         }
 
-        calculate_new_heap_count ();
+        last_suspended_end_time = end_gc_time;
     }
 #endif //DYNAMIC_HEAP_COUNT
 }
@@ -22081,7 +22082,7 @@ size_t gc_heap::exponential_smoothing (int gen, size_t collection_count, size_t
 {
     // to avoid spikes in mem usage due to short terms fluctuations in survivorship,
     // apply some smoothing.
-    size_t smoothing = min(3, collection_count);
+    size_t smoothing = min((size_t)3, collection_count);
 
     size_t desired_total = desired_per_heap * n_heaps;
     size_t new_smoothed_desired_total = desired_total / smoothing + ((smoothed_desired_total[gen] / smoothing) * (smoothing - 1));
@@ -22190,7 +22191,7 @@ void gc_heap::gc1()
     }
 
     //adjust the allocation size from the pinned quantities.
-    for (int gen_number = 0; gen_number <= min (max_generation,n+1); gen_number++)
+    for (int gen_number = 0; gen_number <= min ((int)max_generation,n+1); gen_number++)
     {
         generation* gn = generation_of (gen_number);
         if (settings.compaction)
@@ -22228,11 +22229,16 @@ void gc_heap::gc1()
             dprintf (6666, ("updating BGC %Id elapsed time to %I64d - %I64d = %I64d", dd_gc_clock (dd), end_gc_time, dd_time_clock (dd), dd_gc_elapsed_time (dd)));
 
             float bgc_percent = (float)dd_gc_elapsed_time (dd) * 100.0f / (float)time_since_last_gen2;
-            dynamic_heap_count_data.gen2_gc_percents[dynamic_heap_count_data.gen2_sample_index] = bgc_percent;
+            dynamic_heap_count_data_t::gen2_sample& g2_sample = dynamic_heap_count_data.gen2_samples[dynamic_heap_count_data.gen2_sample_index];
+            g2_sample.gc_index = VolatileLoadWithoutBarrier (&(settings.gc_index));
+            g2_sample.gc_percent = bgc_percent;
             dprintf (6666, ("gen2 sample %d elapsed %Id * 100 / time inbetween gen2 %Id = %.3f",
                 dynamic_heap_count_data.gen2_sample_index, dd_gc_elapsed_time (dd), time_since_last_gen2, bgc_percent));
             dynamic_heap_count_data.gen2_sample_index = (dynamic_heap_count_data.gen2_sample_index + 1) % dynamic_heap_count_data_t::sample_size;
+            (dynamic_heap_count_data.current_gen2_samples_count)++;
             gc_index_full_gc_end = dd_gc_clock (dynamic_data_of (0));
+
+            calculate_new_heap_count ();
         }
 #endif //DYNAMIC_HEAP_COUNT
 
@@ -22365,7 +22371,7 @@ void gc_heap::gc1()
         if (alloc_contexts_used >= 1)
         {
             allocation_quantum = Align (min ((size_t)CLR_SIZE,
-                                            (size_t)max (1024, get_new_allocation (0) / (2 * alloc_contexts_used))),
+                                            (size_t)max ((size_t)1024, get_new_allocation (0) / (2 * alloc_contexts_used))),
                                             get_alignment_constant(FALSE));
             dprintf (3, ("New allocation quantum: %zd(0x%zx)", allocation_quantum, allocation_quantum));
         }
@@ -22520,6 +22526,15 @@ void gc_heap::gc1()
             {
                 limit = total_generation_count-1;
             }
+
+            size_t total_max_gen_size = 0;
+            for (int i = 0; i < gc_heap::n_heaps; i++)
+            {
+                gc_heap* hp = gc_heap::g_heaps[i];
+                dynamic_data* dd = hp->dynamic_data_of (max_generation);
+                total_max_gen_size += dd_current_size (dd) + dd_desired_allocation (dd);
+            }
+
             for (int gen = 0; gen <= limit; gen++)
             {
                 size_t total_desired = 0;
@@ -22548,20 +22563,35 @@ void gc_heap::gc1()
                     total_already_consumed = temp_total_already_consumed;
                 }
 
-                size_t desired_per_heap = Align (total_desired/gc_heap::n_heaps,
-                                                    get_alignment_constant (gen <= max_generation));
+                size_t desired_per_heap = Align (total_desired/gc_heap::n_heaps, get_alignment_constant (gen <= max_generation));
 
                 size_t already_consumed_per_heap = total_already_consumed / gc_heap::n_heaps;
 
                 if (gen == 0)
                 {
-#if 1 //subsumed by the linear allocation model
+#ifdef DYNAMIC_HEAP_COUNT
+                    if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
+                    {
+                        size_t new_allocation_datas = dynamic_heap_count_data.compute_gen0_new_allocation (total_max_gen_size);
+                        new_allocation_datas = Align (new_allocation_datas, get_alignment_constant (gen <= max_generation));
+                        dprintf (6666, ("gen0 new_alloc %Id (%.3fmb), from datas: %Id (%.3fmb)",
+                            desired_per_heap, ((double)desired_per_heap / 1000.0 / 1000.0),
+                            new_allocation_datas, ((double)new_allocation_datas / 1000.0 / 1000.0)));
+                        desired_per_heap = min (desired_per_heap, new_allocation_datas);
+                    }
+#endif //DYNAMIC_HEAP_COUNT
+
                     // to avoid spikes in mem usage due to short terms fluctuations in survivorship,
                     // apply some smoothing.
+                    size_t desired_per_heap_before_smoothing = desired_per_heap;
                     desired_per_heap = exponential_smoothing (gen, dd_collection_count (dynamic_data_of(gen)), desired_per_heap);
-#endif //0
+                    size_t desired_per_heap_after_smoothing = desired_per_heap;
 
-                    if (!heap_hard_limit)
+                    if (!heap_hard_limit
+#ifdef DYNAMIC_HEAP_COUNT
+                        && (dynamic_adaptation_mode != dynamic_adaptation_to_application_sizes)
+#endif //DYNAMIC_HEAP_COUNT
+                        )
                     {
                         // if desired_per_heap is close to min_gc_size, trim it
                         // down to min_gc_size to stay in the cache
@@ -22578,7 +22608,10 @@ void gc_heap::gc1()
                     }
 #ifdef HOST_64BIT
                     desired_per_heap = joined_youngest_desired (desired_per_heap);
-                    dprintf (2, ("final gen0 new_alloc: %zd", desired_per_heap));
+
+                    dprintf (6666, ("final gen0 new_alloc: total desired: %Id (%.3fmb/heap), before smooth %zd -> after smooth %zd -> after joined %zd",
+                        total_desired, ((double)(total_desired / n_heaps)/ 1000.0 / 1000.0),
+                        desired_per_heap_before_smoothing, desired_per_heap_after_smoothing, desired_per_heap));
 #endif // HOST_64BIT
                     gc_data_global.final_youngest_desired = desired_per_heap;
                 }
@@ -24211,6 +24244,9 @@ size_t gc_heap::get_promoted_bytes()
 
     dprintf (3, ("h%d getting surv", heap_number));
     size_t promoted = 0;
+#ifdef _MSC_VER
+#pragma loop(no_vector)
+#endif
     for (size_t i = 0; i < region_count; i++)
     {
         if (survived_per_region[i] > 0)
@@ -25075,7 +25111,6 @@ void gc_heap::recommission_heap()
 
         // copy some fields from heap0
 
-
         // this is copied to dd_previous_time_clock at the start of GC
         dd_time_clock     (dd) = dd_time_clock (heap0_dd);
 
@@ -25152,37 +25187,90 @@ float median_of_3 (float a, float b, float c)
     return b;
 }
 
-size_t gc_heap::get_num_completed_gcs ()
+float log_with_base (float x, float base)
 {
-    size_t num_completed_gcs = settings.gc_index;
-#ifdef BACKGROUND_GC
-    if (g_heaps[0]->is_bgc_in_progress ())
+    assert (x > base);
+
+    return (float)(log(x) / log(base));
+}
+
+float mean (float* arr, int size)
+{
+    float sum = 0.0;
+
+    for (int i = 0; i < size; i++)
     {
-        num_completed_gcs--;
-        dprintf (6666, ("BGC in prog, completed GCs -> %Id", num_completed_gcs));
+        sum += arr[i];
+    }
+    return (sum / size);
+}
+
+// Change it to a desired number if you want to print.
+int max_times_to_print_tcp = 0;
+
+// Return the slope, and the average values in the avg arg.
+float slope (float* y, int n, float* avg)
+{
+    assert (n > 0);
+
+    if (n == 1)
+    {
+        dprintf (6666, ("only 1 tcp: %.3f, no slope", y[0]));
+        *avg = y[0];
+        return 0.0;
+    }
+
+    int sum_x = 0;
+
+    for (int i = 0; i < n; i++)
+    {
+        sum_x += i;
+
+        if (max_times_to_print_tcp >= 0)
+        {
+            dprintf (6666, ("%.3f, ", y[i]));
+        }
+    }
+
+    float avg_x = (float)sum_x / n;
+    float avg_y = mean (y, n);
+    *avg = avg_y;
+
+    float numerator = 0.0;
+    float denominator = 0.0;
+
+    for (int i = 0; i < n; ++i)
+    {
+        numerator += ((float)i - avg_x) * (y[i] - avg_y);
+        denominator += ((float)i - avg_x) * (i - avg_x);
     }
-#endif //BACKGROUND_GC
 
-    return num_completed_gcs;
+    max_times_to_print_tcp--;
+
+    return (numerator / denominator);
 }
 
 int gc_heap::calculate_new_heap_count ()
 {
     assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes);
 
-    size_t num_completed_gcs = get_num_completed_gcs ();
-
-    dprintf (6666, ("current GC %Id(completed: %Id), prev completed GCs %Id, last full GC happened at index %Id",
-        VolatileLoadWithoutBarrier (&settings.gc_index), num_completed_gcs, dynamic_heap_count_data.prev_num_completed_gcs, gc_index_full_gc_end));
+    dprintf (6666, ("current num of samples %Id (g2: %Id) prev processed %Id (g2: %Id), last full GC happened at index %Id",
+        dynamic_heap_count_data.current_samples_count, dynamic_heap_count_data.current_gen2_samples_count,
+        dynamic_heap_count_data.processed_samples_count, dynamic_heap_count_data.processed_gen2_samples_count, gc_index_full_gc_end));
 
-    if (num_completed_gcs < (dynamic_heap_count_data.prev_num_completed_gcs + dynamic_heap_count_data_t::sample_size))
+    if ((dynamic_heap_count_data.current_samples_count < (dynamic_heap_count_data.processed_samples_count + dynamic_heap_count_data_t::sample_size)) &&
+        (dynamic_heap_count_data.current_gen2_samples_count < (dynamic_heap_count_data.processed_gen2_samples_count + dynamic_heap_count_data_t::sample_size)))
     {
         dprintf (6666, ("not enough GCs, skipping"));
         return n_heaps;
     }
 
+    bool process_eph_samples_p = (dynamic_heap_count_data.current_samples_count >= (dynamic_heap_count_data.processed_samples_count + dynamic_heap_count_data_t::sample_size));
+    bool process_gen2_samples_p = (dynamic_heap_count_data.current_gen2_samples_count >= (dynamic_heap_count_data.processed_gen2_samples_count + dynamic_heap_count_data_t::sample_size));
+
+    size_t current_gc_index = VolatileLoadWithoutBarrier (&settings.gc_index);
     float median_gen2_tcp_percent = 0.0f;
-    if (gc_index_full_gc_end >= (settings.gc_index - dynamic_heap_count_data_t::sample_size))
+    if (dynamic_heap_count_data.current_gen2_samples_count >= (dynamic_heap_count_data.processed_gen2_samples_count + dynamic_heap_count_data_t::sample_size))
     {
         median_gen2_tcp_percent = dynamic_heap_count_data.get_median_gen2_gc_percent ();
     }
@@ -25202,6 +25290,43 @@ int gc_heap::calculate_new_heap_count ()
     }
 
     float median_throughput_cost_percent = median_of_3 (throughput_cost_percents[0], throughput_cost_percents[1], throughput_cost_percents[2]);
+    float avg_throughput_cost_percent = (float)((throughput_cost_percents[0] + throughput_cost_percents[1] + throughput_cost_percents[2]) / 3.0);
+
+    // One of the reasons for outliers is something temporarily affected GC work. We pick the min tcp if the survival is very stable to avoid counting these outliers.
+    float min_tcp = throughput_cost_percents[0];
+    size_t min_survived = dynamic_heap_count_data.samples[0].gc_survived_size;
+    uint64_t min_pause = dynamic_heap_count_data.samples[0].gc_pause_time;
+    for (int i = 1; i < dynamic_heap_count_data_t::sample_size; i++)
+    {
+        min_tcp = min (throughput_cost_percents[i], min_tcp);
+        min_survived = min (dynamic_heap_count_data.samples[i].gc_survived_size, min_survived);
+        min_pause = min (dynamic_heap_count_data.samples[i].gc_pause_time, min_pause);
+    }
+
+    dprintf (6666, ("checking if samples are stable %Id %Id %Id, min tcp %.3f, min pause %I64d",
+        dynamic_heap_count_data.samples[0].gc_survived_size, dynamic_heap_count_data.samples[1].gc_survived_size, dynamic_heap_count_data.samples[2].gc_survived_size,
+        min_tcp, min_pause));
+
+    bool survived_stable_p = true;
+    if (min_survived > 0)
+    {
+        for (int i = 0; i < dynamic_heap_count_data_t::sample_size; i++)
+        {
+            dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[i];
+            float diff = (float)(sample.gc_survived_size - min_survived) / (float)min_survived;
+            dprintf (6666, ("sample %d diff from min is %Id -> %.3f", i, (sample.gc_survived_size - min_survived), diff));
+            if (diff >= 0.15)
+            {
+                survived_stable_p = false;
+            }
+        }
+    }
+
+    if (survived_stable_p)
+    {
+        dprintf (6666, ("survived is stable, so we pick min tcp %.3f", min_tcp));
+        median_throughput_cost_percent = min_tcp;
+    }
 
     // apply exponential smoothing and use 1/3 for the smoothing factor
     const float smoothing = 3;
@@ -25216,10 +25341,13 @@ int gc_heap::calculate_new_heap_count ()
         smoothed_median_throughput_cost_percent = median_throughput_cost_percent;
     }
 
-    dprintf (6666, ("median tcp: %.3f, smoothed tcp: %.3f, gen2 tcp %.3f(%.3f, %.3f, %.3f)",
-        median_throughput_cost_percent, smoothed_median_throughput_cost_percent, median_gen2_tcp_percent,
-        dynamic_heap_count_data.gen2_gc_percents[0], dynamic_heap_count_data.gen2_gc_percents[1], dynamic_heap_count_data.gen2_gc_percents[2]));
+    dprintf (6666, ("median tcp: %.3f, smoothed tcp: %.3f, avg tcp: %.3f, gen2 tcp %.3f(%.3f, %.3f, %.3f)",
+        median_throughput_cost_percent, smoothed_median_throughput_cost_percent, avg_throughput_cost_percent, median_gen2_tcp_percent,
+        dynamic_heap_count_data.gen2_samples[0].gc_percent, dynamic_heap_count_data.gen2_samples[1].gc_percent, dynamic_heap_count_data.gen2_samples[2].gc_percent));
 
+    //
+    // I'm keeping the old logic for now just to handle gen2.
+    //
     size_t heap_size = 0;
     for (int i = 0; i < n_heaps; i++)
     {
@@ -25246,8 +25374,11 @@ int gc_heap::calculate_new_heap_count ()
     // on the way up, we essentially multiply the heap count by 1.5, so we go 1, 2, 3, 5, 8 ...
     // we don't go all the way to the number of CPUs, but stay 1 or 2 short
     int step_up = (n_heaps + 1) / 2;
-    int extra_heaps = 1 + (n_max_heaps >= 32);
-    step_up = min (step_up, n_max_heaps - extra_heaps - n_heaps);
+    int extra_heaps = (n_max_heaps >= 16) + (n_max_heaps >= 64);
+    int actual_n_max_heaps = n_max_heaps - extra_heaps;
+    int max_growth = max ((n_max_heaps / 4), (1 + (actual_n_max_heaps > 3)));
+
+    step_up = min (step_up, (actual_n_max_heaps - n_heaps));
 
     // on the way down, we essentially divide the heap count by 1.5
     int step_down = (n_heaps + 1) / 3;
@@ -25285,49 +25416,337 @@ int gc_heap::calculate_new_heap_count ()
     dprintf (6666, ("stress %d -> %d", n_heaps, new_n_heaps));
 #else //STRESS_DYNAMIC_HEAP_COUNT
     int new_n_heaps = n_heaps;
-    if (median_throughput_cost_percent > 10.0f)
-    {
-        // ramp up more agressively - use as many heaps as it would take to bring
-        // the tcp down to 5%
-        new_n_heaps = (int)(n_heaps * (median_throughput_cost_percent / 5.0));
-        dprintf (6666, ("[CHP0] tcp %.3f -> %d * %.3f = %d", median_throughput_cost_percent, n_heaps, (median_throughput_cost_percent / 5.0), new_n_heaps));
-        new_n_heaps = min (new_n_heaps, n_max_heaps - extra_heaps);
-    }
-    // if the median tcp is 10% or less, react slower
-    else if ((smoothed_median_throughput_cost_percent > 5.0f) || (median_gen2_tcp_percent > 10.0f))
+
+    // target_tcp should be configurable.
+    float target_tcp = 5.0;
+    float target_gen2_tcp = 10.0;
+    float log_base = (float)1.11;
+
+    dynamic_heap_count_data.add_to_recorded_tcp (median_throughput_cost_percent);
+
+    // This is the average of whatever is in the recorded tcp buffer.
+    float avg_recorded_tcp = 0.0;
+
+    size_t num_gcs_since_last_change = current_gc_index - dynamic_heap_count_data.last_changed_gc_index;
+
+    if (process_eph_samples_p)
     {
-        if (smoothed_median_throughput_cost_percent > 5.0f)
+        dynamic_heap_count_data.last_processed_stcp = smoothed_median_throughput_cost_percent;
+
+        if ((median_throughput_cost_percent > 10.0f) || (smoothed_median_throughput_cost_percent > target_tcp))
         {
-            dprintf (6666, ("[CHP1] stcp %.3f > 5, %d + %d = %d", smoothed_median_throughput_cost_percent, n_heaps, step_up, (n_heaps + step_up)));
+            // If median is high but stcp is lower than target, and if this situation continues, stcp will quickly be above target anyway; otherwise
+            // we treat it as an outlier.
+            if (smoothed_median_throughput_cost_percent >= (target_tcp + 1.0))
+            {
+                float step_up_float = (float)(1 + actual_n_max_heaps * log_with_base ((smoothed_median_throughput_cost_percent - target_tcp), log_base) / 100.0);
+                int step_up_int = (int)step_up_float;
+
+                dprintf (6666, ("[CHP0] inc %d(%.3f), last inc %d, %Id GCs elapsed, last stcp %.3f",
+                    step_up_int, step_up_float, (int)dynamic_heap_count_data.last_changed_count,
+                    num_gcs_since_last_change, dynamic_heap_count_data.last_changed_stcp));
+
+                // Don't adjust if we just adjusted last time we checked, unless we are in an extreme situation.
+                if ((smoothed_median_throughput_cost_percent < 20.0f) &&
+                    (avg_throughput_cost_percent < 20.0f) &&
+                    (num_gcs_since_last_change < (2 * dynamic_heap_count_data_t::sample_size)))
+                {
+                    dprintf (6666, ("[CHP0] we just adjusted %Id GCs ago, skipping", num_gcs_since_last_change));
+                }
+                else
+                {
+                    if (step_up_int)
+                    {
+                        if (dynamic_heap_count_data.dec_failure_count)
+                        {
+                            dprintf (6666, ("[CHP0] intending to grow, reset dec failure count (was %d)", dynamic_heap_count_data.dec_failure_count));
+                            dynamic_heap_count_data.dec_failure_count = 0;
+                        }
+
+                        if (((int)dynamic_heap_count_data.last_changed_count > 0) && (dynamic_heap_count_data.last_changed_gc_index > 0.0) &&
+                            (num_gcs_since_last_change <= (3 * dynamic_heap_count_data_t::sample_size)))
+                        {
+                            dprintf (6666, ("[CHP0-0] just grew %d GCs ago, no change", num_gcs_since_last_change));
+                            step_up_int = 0;
+                        }
+                        else
+                        {
+                            // If the calculation tells us to grow, we should check to see if the slope has been coming down rapidly, if so there's no reason to grow.
+                            int above_target_tcp_count = dynamic_heap_count_data.rearrange_recorded_tcp ();
+                            float above_target_tcp_slope = slope (dynamic_heap_count_data.recorded_tcp_rearranged, above_target_tcp_count, &avg_recorded_tcp);
+                            float diff_pct = (target_tcp - avg_recorded_tcp) / target_tcp;
+                            float adjusted_target_tcp = dynamic_heap_count_data.get_range_upper (target_tcp);
+
+                            dprintf (6666, ("[CHP0] slope of last %d samples is %.3f. avg %.3f (%.3f%%), current tcp %.3f, adjusted target is %.3f, failure count is %d",
+                                above_target_tcp_count, above_target_tcp_slope, avg_recorded_tcp, (diff_pct * 100.0),
+                                median_throughput_cost_percent, adjusted_target_tcp, dynamic_heap_count_data.inc_failure_count));
+
+                            if (dynamic_heap_count_data.is_tcp_in_range (diff_pct, above_target_tcp_slope))
+                            {
+                                step_up_int = 0;
+                                dprintf (6666, ("[CHP0-1] slope %.3f and already close to target %.3f (%.3f%%), no change", above_target_tcp_slope, avg_recorded_tcp, (diff_pct * 100.0)));
+                            }
+                            else
+                            {
+                                if (above_target_tcp_slope < 0.0)
+                                {
+                                    // If we are already trending down and the tcp is small enough, just wait.
+                                    if ((median_throughput_cost_percent < adjusted_target_tcp) || (avg_recorded_tcp < adjusted_target_tcp))
+                                    {
+                                        step_up_int = 0;
+                                        dprintf (6666, ("[CHP0-2] trending down, slope is %.3f, tcp is %.3f, avg is %.3f, already below adjusted target %.3f, no change",
+                                            above_target_tcp_slope, median_throughput_cost_percent, avg_recorded_tcp, adjusted_target_tcp));
+                                    }
+                                }
+                                else
+                                {
+                                    // We are trending up, but we have too few samples and the avg is already small enough.
+                                    if ((above_target_tcp_count <= dynamic_heap_count_data.inc_recheck_threshold) && (avg_recorded_tcp < adjusted_target_tcp))
+                                    {
+                                        step_up_int = 0;
+                                        dprintf (6666, ("[CHP0-3] trending up, only %d samples, slope is %.3f, avg is %.3f already below adjusted target %.3f, no change",
+                                            above_target_tcp_count, above_target_tcp_slope, avg_recorded_tcp, adjusted_target_tcp));
+                                    }
+                                }
+                            }
+                        }
+
+                        // If we still decided to grow, check if we need to grow aggressively.
+                        if (step_up_int)
+                        {
+                            if (((int)dynamic_heap_count_data.last_changed_count > 0) && (dynamic_heap_count_data.last_changed_gc_index > 0.0))
+                            {
+                                if (num_gcs_since_last_change > (16 * dynamic_heap_count_data_t::sample_size))
+                                {
+                                    dynamic_heap_count_data.inc_failure_count = 0;
+                                    dprintf (6666, ("[CHP0-4] grew %d GCs ago, too far in the past, set aggressive factor to 0, grow from %d -> %d more heaps",
+                                        num_gcs_since_last_change, dynamic_heap_count_data.inc_failure_count, step_up_int, (step_up_int * (dynamic_heap_count_data.inc_failure_count + 1))));
+                                }
+                                else
+                                {
+                                    (dynamic_heap_count_data.inc_failure_count)++;
+                                    dprintf (6666, ("[CHP0-4] grew %d GCs ago, aggressive factor is %d, grow more aggressively from %d -> %d more heaps",
+                                        num_gcs_since_last_change, dynamic_heap_count_data.inc_failure_count, step_up_int, (step_up_int * (dynamic_heap_count_data.inc_failure_count + 1))));
+                                }
+                                step_up_int *= dynamic_heap_count_data.inc_failure_count + 1;
+                            }
+                        }
+                    }
+
+                    step_up_int = min (step_up_int, max_growth);
+
+                    new_n_heaps = n_heaps + step_up_int;
+                    new_n_heaps = min (new_n_heaps, actual_n_max_heaps);
+
+                    // If we are going to grow to be very close to max heap, it's better to just grow to it.
+                    if ((new_n_heaps < actual_n_max_heaps) && dynamic_heap_count_data.is_close_to_max (new_n_heaps, actual_n_max_heaps))
+                    {
+                        dprintf (6666, ("[CHP0-5] %d is close to max heaps %d, grow to max", new_n_heaps, actual_n_max_heaps));
+                        new_n_heaps = actual_n_max_heaps;
+                    }
+
+                    if (new_n_heaps > n_heaps)
+                    {
+                        dynamic_heap_count_data.last_changed_gc_index = current_gc_index;
+                        dynamic_heap_count_data.last_changed_count = step_up_float;
+                        dynamic_heap_count_data.last_changed_stcp = smoothed_median_throughput_cost_percent;
+                    }
+
+                    dprintf (6666, ("[CHP0] tcp %.3f, stcp %.3f -> (%d -> %.3f) -> %d + %d = %d -> %d",
+                        median_throughput_cost_percent, smoothed_median_throughput_cost_percent,
+                        actual_n_max_heaps, step_up_float, step_up_int, n_heaps, (n_heaps + step_up_int), new_n_heaps));
+                }
+            }
         }
         else
         {
-            dprintf (6666, ("[CHP2] tcp %.3f > 10, %d + %d = %d", median_gen2_tcp_percent, n_heaps, step_up, (n_heaps + step_up)));
+            // When we are below target, we accumulate the distance to target and only adjust when we've accumulated enough in this state. Note that
+            // this can include tcp's that are slightly above target, as long as it's not high enough for us to adjust the heap count. If we are just
+            // oscillating around target, this makes those tcp's cancel each other out.
+            if (dynamic_heap_count_data.below_target_accumulation == 0)
+            {
+                dynamic_heap_count_data.first_below_target_gc_index = current_gc_index;
+                dynamic_heap_count_data.init_recorded_tcp ();
+                dynamic_heap_count_data.add_to_recorded_tcp (median_throughput_cost_percent);
+            }
+            dprintf (6666, ("[CHP1] last time adjusted %s by %d at GC#%Id (%Id GCs since), stcp was %.3f, now stcp is %.3f",
+                ((dynamic_heap_count_data.last_changed_count > 0.0) ? "up" : "down"), (int)dynamic_heap_count_data.last_changed_count,
+                dynamic_heap_count_data.last_changed_gc_index, num_gcs_since_last_change,
+                dynamic_heap_count_data.last_changed_stcp, smoothed_median_throughput_cost_percent));
+
+            float below_target_diff = target_tcp - median_throughput_cost_percent;
+            dynamic_heap_count_data.below_target_accumulation += below_target_diff;
+
+            dprintf (6666, ("[CHP1] below target for the past %Id GCs, accumulated %.3f, min (10%% of max is %.2f, 20%% of hc is %.2f)",
+                (current_gc_index - dynamic_heap_count_data.first_below_target_gc_index), dynamic_heap_count_data.below_target_accumulation,
+                (actual_n_max_heaps * 0.1), (n_heaps * 0.2)));
+
+            if (dynamic_heap_count_data.below_target_accumulation >= dynamic_heap_count_data.below_target_threshold)
+            {
+                int below_target_tcp_count = dynamic_heap_count_data.rearrange_recorded_tcp ();
+                float below_target_tcp_slope = slope (dynamic_heap_count_data.recorded_tcp_rearranged, below_target_tcp_count, &avg_recorded_tcp);
+                float diff_pct = (target_tcp - smoothed_median_throughput_cost_percent) / target_tcp;
+                int step_down_int = (int)(diff_pct / 2.0 * n_heaps);
+                if ((step_down_int == 0) && dynamic_heap_count_data.is_tcp_far_below (diff_pct))
+                {
+                    dprintf (6666, ("[CHP1] we are far below target, reduce by 1 heap"));
+                    step_down_int = 1;
+                }
+
+                dprintf (6666, ("[CHP1] observed %d tcp's <= or ~ target, avg %.3f, slope %.3f, stcp %.3f%% below target, shrink by %.3f%% * %d = %d heaps",
+                    below_target_tcp_count, avg_recorded_tcp, below_target_tcp_slope, (diff_pct * 100.0), (diff_pct * 50.0), n_heaps, step_down_int));
+
+                bool shrink_p = false;
+                if (dynamic_heap_count_data.is_tcp_in_range (diff_pct, below_target_tcp_slope))
+                {
+                    step_down_int = 0;
+                    dprintf (6666, ("[CHP1-0] slope %.3f is flat and stcp is already close to target %.3f (%.3f%%), no change",
+                        below_target_tcp_slope, smoothed_median_throughput_cost_percent, (diff_pct * 100.0)));
+                }
+                else
+                {
+                    // If we adjusted last time and it was unsuccessful, we need to inc our failure count.
+                    // If we have a non zero failure count, we don't want to adjust for a while if we continue to be in that same situation.
+                    bool last_dec_p = (dynamic_heap_count_data.last_changed_gc_index > 0) && (dynamic_heap_count_data.last_changed_count < 0.0);
+                    float last_dec_tcp_diff_pct = (last_dec_p ?
+                        ((smoothed_median_throughput_cost_percent - dynamic_heap_count_data.last_changed_stcp) / dynamic_heap_count_data.last_changed_stcp) : 0.0f);
+                    bool stable_p = last_dec_p && ((last_dec_tcp_diff_pct <= 0.2) && (last_dec_tcp_diff_pct >= -0.2));
+                    dprintf (6666, ("[CHP1] since last adjustment stcp changed %.3f->%.3f = %.3f%%, %s, dec_failure_count is %d",
+                        dynamic_heap_count_data.last_changed_stcp, smoothed_median_throughput_cost_percent, (last_dec_tcp_diff_pct * 100.0),
+                        (stable_p ? "stable" : "not stable"), dynamic_heap_count_data.dec_failure_count));
+
+                    bool check_dec_p = true;
+
+                    if (stable_p)
+                    {
+                        if (dynamic_heap_count_data.dec_failure_count)
+                        {
+                            (dynamic_heap_count_data.dec_failure_count)++;
+                        }
+                        else
+                        {
+                            dynamic_heap_count_data.dec_failure_count = 1;
+                        }
+
+                        if (dynamic_heap_count_data.dec_failure_count <= dynamic_heap_count_data.dec_failure_recheck_threshold)
+                        {
+                            check_dec_p = false;
+                            dprintf (6666, ("[CHP1-1] dec was still unsuccessful, <= %d, no change", dynamic_heap_count_data.dec_failure_recheck_threshold));
+                        }
+                    }
+
+                    if (check_dec_p)
+                    {
+                        dynamic_heap_count_data.dec_failure_count = 0;
+
+                        if (below_target_tcp_slope <= 0.0)
+                        {
+                            shrink_p = true;
+                        }
+                        else
+                        {
+                            // It's trending upwards, but if takes too many samples to get to target, we do want to shrink.
+                            int num_samples_to_goal = (int)((target_tcp + below_target_tcp_slope - median_throughput_cost_percent) / below_target_tcp_slope);
+                            bool far_below_goal_p = (num_samples_to_goal > (3 * dynamic_heap_count_data_t::sample_size));
+                            dprintf (6666, ("[CHP1] it'll take ((%.3f + %.3f - %.3f) / %.3f = %d) samples to get to target, %s",
+                                target_tcp, below_target_tcp_slope, median_throughput_cost_percent, below_target_tcp_slope,
+                                num_samples_to_goal, (far_below_goal_p ? "shrink" : "no change")));
+
+                            if (far_below_goal_p)
+                            {
+                                // We could be in a situation where the slope changes directions but since we only compute one number, we take another look at
+                                // the samples to make a better assessment by looking at the highest tcps and if their average is close to target, we don't shrink.
+                                //
+                                // TODO - we only check this when the slope is going up but since this includes the situation where the slope changes directions
+                                // we should really be checking this regardless of the slope to handle that.
+                                float highest_avg_tcp = 0.0;
+                                int highest_count = dynamic_heap_count_data.highest_avg_recorded_tcp (below_target_tcp_count, avg_recorded_tcp, &highest_avg_tcp);
+                                float highest_count_pct = (float)highest_count / (float)below_target_tcp_count;
+
+                                shrink_p = (highest_count_pct < 0.3) || (highest_avg_tcp < (target_tcp * 0.8));
+                                dprintf (6666, ("[CHP1-2] %d samples were above avg (%.3f%%), their avg is %.3f (%s)",
+                                    highest_count, (highest_count_pct * 100.0), highest_avg_tcp, (shrink_p ? "shrink" : "no change")));
+                            }
+                        }
+                    }
+                }
+
+                if (shrink_p && step_down_int && (new_n_heaps > step_down_int))
+                {
+                    if (step_down_int == 1)
+                    {
+                        if (dynamic_heap_count_data.should_dec_by_one())
+                        {
+                            dprintf (6666, ("[CHP1-3] shrink by one heap"));
+                        }
+                        else
+                        {
+                            step_down_int = 0;
+                            dprintf (6666, ("[CHP1-3] don't shrink just yet if it's just one heap"));
+                        }
+                    }
+                    else
+                    {
+                        dynamic_heap_count_data.reset_dec_by_one();
+                        dprintf (6666, ("[CHP1-3] shrink by %d heap(s), reset dec by one", step_down_int));
+                    }
+
+                    new_n_heaps -= step_down_int;
+                    dprintf (6666, ("[CHP1] shrink by %d heaps -> %d", step_down_int, new_n_heaps));
+                }
+
+                // Always reinit the buffer as we want to look at the more recent history.
+                dynamic_heap_count_data.init_recorded_tcp ();
+                dynamic_heap_count_data.below_target_accumulation = 0;
+            }
+
+            if (new_n_heaps < n_heaps)
+            {
+                dynamic_heap_count_data.last_changed_gc_index = current_gc_index;
+                dynamic_heap_count_data.last_changed_count = (float)(new_n_heaps - n_heaps);
+                dynamic_heap_count_data.last_changed_stcp = smoothed_median_throughput_cost_percent;
+                dprintf (6666, ("[CHP1] setting last changed gc index to %Id, count to %.3f, stcp to %.3f",
+                    dynamic_heap_count_data.last_changed_gc_index, dynamic_heap_count_data.last_changed_count, dynamic_heap_count_data.last_changed_stcp));
+
+                if (dynamic_heap_count_data.inc_failure_count)
+                {
+                    dprintf (6666, ("[CHP1] shrink, reset inc failure count (was %d)", dynamic_heap_count_data.inc_failure_count));
+                    dynamic_heap_count_data.inc_failure_count = 0;
+                }
+            }
         }
-        new_n_heaps += step_up;
-    }
-    // if we can save at least 1% more in time than we spend in space, increase number of heaps
-    else if ((tcp_reduction_per_step_up - scp_increase_per_step_up) >= 1.0f)
-    {
-        dprintf (6666, ("[CHP3] % .3f - % .3f = % .3f, % d + % d = % d",
-            tcp_reduction_per_step_up, scp_increase_per_step_up, (tcp_reduction_per_step_up - scp_increase_per_step_up),
-            n_heaps, step_up, (n_heaps + step_up)));
-        new_n_heaps += step_up;
     }
-    // if we can save at least 1% more in space than we spend in time, decrease number of heaps
-    else if ((smoothed_median_throughput_cost_percent < 1.0f) &&
-        (median_gen2_tcp_percent < 5.0f) &&
-        ((scp_decrease_per_step_down - tcp_increase_per_step_down) >= 1.0f))
+
+    if ((new_n_heaps == n_heaps) && !process_eph_samples_p && process_gen2_samples_p)
     {
-        dprintf (6666, ("[CHP4] stcp %.3f tcp %.3f, %.3f - %.3f = %.3f, %d + %d = %d",
-            smoothed_median_throughput_cost_percent, median_gen2_tcp_percent,
-            scp_decrease_per_step_down, tcp_increase_per_step_down, (scp_decrease_per_step_down - tcp_increase_per_step_down),
-            n_heaps, step_up, (n_heaps + step_up)));
-        new_n_heaps -= step_down;
+        // The gen2 samples only serve as a backstop so this is quite crude.
+        if (median_gen2_tcp_percent > target_gen2_tcp)
+        {
+            float step_up_percent = log_with_base ((median_gen2_tcp_percent - target_gen2_tcp + log_base), log_base);
+            float step_up_float = (float)(step_up_percent / 100.0 * actual_n_max_heaps);
+            new_n_heaps += (int)step_up_float;
+            new_n_heaps = min (new_n_heaps, actual_n_max_heaps);
+            dprintf (6666, ("[CHP2-0] gen2 tcp: %.3f, inc by %.3f%% = %d, %d -> %d", median_gen2_tcp_percent, step_up_percent, (int)step_up_float, n_heaps, new_n_heaps));
+
+            if ((new_n_heaps < actual_n_max_heaps) && dynamic_heap_count_data.is_close_to_max (new_n_heaps, actual_n_max_heaps))
+            {
+                dprintf (6666, ("[CHP2-1] %d is close to max heaps %d, grow to max", new_n_heaps, actual_n_max_heaps));
+                new_n_heaps = actual_n_max_heaps;
+            }
+        }
+        else if ((dynamic_heap_count_data.last_processed_stcp < 1.0) &&
+                    (median_gen2_tcp_percent < (target_gen2_tcp / 2)) &&
+                    (scp_decrease_per_step_down - tcp_increase_per_step_down >= 1.0f))
+        {
+            new_n_heaps -= step_down;
+            dprintf (6666, ("[CHP3-0] last eph stcp: %.3f, gen2 tcp: %.3f, dec by %d, %d -> %d",
+                dynamic_heap_count_data.last_processed_stcp, median_gen2_tcp_percent, step_down, n_heaps, new_n_heaps));
+        }
     }
 
     assert (new_n_heaps >= 1);
-    assert (new_n_heaps <= n_max_heaps);
+    assert (new_n_heaps <= actual_n_max_heaps);
+
 #endif //STRESS_DYNAMIC_HEAP_COUNT
 
     // store data used for decision to emit in ETW event
@@ -25350,13 +25769,28 @@ int gc_heap::calculate_new_heap_count ()
         dynamic_heap_count_data.scp_decrease_per_step_down
     );
 
-    dynamic_heap_count_data.prev_num_completed_gcs = num_completed_gcs;
+    if (process_eph_samples_p)
+    {
+        dprintf (6666, ("processed eph samples, updating processed %Id -> %Id", dynamic_heap_count_data.processed_samples_count, dynamic_heap_count_data.current_samples_count));
+        dynamic_heap_count_data.processed_samples_count = dynamic_heap_count_data.current_samples_count;
+    }
+
+    if (process_gen2_samples_p)
+    {
+        dprintf (6666, ("processed gen2 samples, updating processed %Id -> %Id", dynamic_heap_count_data.processed_gen2_samples_count, dynamic_heap_count_data.current_gen2_samples_count));
+        dynamic_heap_count_data.processed_gen2_samples_count = dynamic_heap_count_data.current_gen2_samples_count;
+    }
 
     if (new_n_heaps != n_heaps)
     {
-        dprintf (6666, ("should change! %d->%d", n_heaps, new_n_heaps));
+        dprintf (6666, ("GC#%Id should change! %d->%d (%s)",
+            VolatileLoadWithoutBarrier (&settings.gc_index), n_heaps, new_n_heaps, ((n_heaps < new_n_heaps) ? "INC" : "DEC")));
         dynamic_heap_count_data.heap_count_to_change_to = new_n_heaps;
         dynamic_heap_count_data.should_change_heap_count = true;
+        dynamic_heap_count_data.init_recorded_tcp ();
+        dynamic_heap_count_data.below_target_accumulation = 0;
+        dynamic_heap_count_data.first_below_target_gc_index = current_gc_index;
+        dprintf (6666, ("CHANGING HC, resetting tcp index, below target"));
     }
 
     return new_n_heaps;
@@ -25389,7 +25823,7 @@ void gc_heap::check_heap_count ()
 
     if (dynamic_heap_count_data.new_n_heaps != n_heaps)
     {
-        dprintf (6666, ("prep to change from %d to %d", n_heaps, dynamic_heap_count_data.new_n_heaps));
+        dprintf (6666, ("prep to change from %d to %d at GC#%Id", n_heaps, dynamic_heap_count_data.new_n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index)));
         if (!prepare_to_change_heap_count (dynamic_heap_count_data.new_n_heaps))
         {
             // we don't have sufficient resources - reset the new heap count
@@ -25399,11 +25833,15 @@ void gc_heap::check_heap_count ()
 
     if (dynamic_heap_count_data.new_n_heaps == n_heaps)
     {
-        // heap count stays the same, no work to do
-        dynamic_heap_count_data.prev_num_completed_gcs = get_num_completed_gcs ();
+        dynamic_heap_count_data.last_changed_gc_index = 0;
+        dynamic_heap_count_data.last_changed_count = 0.0;
+
+        dynamic_heap_count_data.processed_samples_count = dynamic_heap_count_data.current_samples_count;
+        dynamic_heap_count_data.processed_gen2_samples_count = dynamic_heap_count_data.current_gen2_samples_count;
         dynamic_heap_count_data.should_change_heap_count = false;
 
-        dprintf (6666, ("heap count stays the same %d, no work to do, set prev completed to %Id", dynamic_heap_count_data.new_n_heaps, dynamic_heap_count_data.prev_num_completed_gcs));
+        dprintf (6666, ("heap count stays the same %d, no work to do, set processed sample count to %Id",
+            dynamic_heap_count_data.new_n_heaps, dynamic_heap_count_data.current_samples_count));
 
         return;
     }
@@ -25443,17 +25881,14 @@ void gc_heap::check_heap_count ()
 
     int old_n_heaps = n_heaps;
 
-    (dynamic_heap_count_data.heap_count_change_count)++;
     change_heap_count (dynamic_heap_count_data.new_n_heaps);
 
     GCToEEInterface::RestartEE(TRUE);
     dprintf (9999, ("h0 restarted EE"));
 
-    // we made changes to the heap count that will change the overhead,
-    // so change the smoothed overhead to reflect that
-    dynamic_heap_count_data.smoothed_median_throughput_cost_percent = dynamic_heap_count_data.smoothed_median_throughput_cost_percent / n_heaps * old_n_heaps;
+    dynamic_heap_count_data.smoothed_median_throughput_cost_percent = 0.0;
 
-    dprintf (6666, ("h0 finished changing, set should change to false!"));
+    dprintf (6666, ("h0 finished changing, set should change to false!\n"));
     dynamic_heap_count_data.should_change_heap_count = false;
 }
 
@@ -25593,6 +26028,8 @@ bool gc_heap::prepare_to_change_heap_count (int new_n_heaps)
 
 bool gc_heap::change_heap_count (int new_n_heaps)
 {
+    uint64_t start_time = 0;
+
     dprintf (9999, ("BEG heap%d changing %d->%d", heap_number, n_heaps, new_n_heaps));
 
     // use this variable for clarity - n_heaps will change during the transition
@@ -25617,11 +26054,9 @@ bool gc_heap::change_heap_count (int new_n_heaps)
 
     assert (dynamic_heap_count_data.new_n_heaps != old_n_heaps);
 
-    dprintf (9999, ("Waiting h0 heap%d changing %d->%d", heap_number, n_heaps, new_n_heaps));
-
     if (heap_number == 0)
     {
-        dprintf (3, ("switching heap count from %d to %d heaps", old_n_heaps, new_n_heaps));
+        start_time = GetHighPrecisionTimeStamp ();
 
         // spread finalization data out to heaps coming into service
         // if this step fails, we can still continue
@@ -25827,6 +26262,7 @@ bool gc_heap::change_heap_count (int new_n_heaps)
                 gc_t_join.restart ();
             }
         }
+
 #ifdef BACKGROUND_GC
         // there should be no items in the bgc_alloc_lock
         bgc_alloc_lock->check();
@@ -25837,23 +26273,31 @@ bool gc_heap::change_heap_count (int new_n_heaps)
     {
         // compute the total budget per generation over the old heaps
         // and figure out what the new budget per heap is
-        ptrdiff_t budget_per_heap[total_generation_count];
+        ptrdiff_t new_alloc_per_heap[total_generation_count];
+        size_t desired_alloc_per_heap[total_generation_count];
         for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
         {
-            ptrdiff_t total_budget = 0;
+            ptrdiff_t total_new_alloc = 0;
+            size_t total_desired_alloc = 0;
             for (int i = 0; i < old_n_heaps; i++)
             {
                 gc_heap* hp = g_heaps[i];
 
                 dynamic_data* dd = hp->dynamic_data_of (gen_idx);
-                total_budget += dd_new_allocation (dd);
+                total_new_alloc += dd_new_allocation (dd);
+                total_desired_alloc += dd_desired_allocation (dd);
             }
             // distribute the total budget for this generation over all new heaps if we are increasing heap count,
             // but keep the budget per heap if we are decreasing heap count
             int max_n_heaps = max (old_n_heaps, new_n_heaps);
-            budget_per_heap[gen_idx] = Align (total_budget/max_n_heaps, get_alignment_constant (gen_idx <= max_generation));
-
-            dprintf (6666, ("g%d: total budget: %zd budget per heap: %zd", gen_idx, total_budget, budget_per_heap[gen_idx]));
+            new_alloc_per_heap[gen_idx] = Align (total_new_alloc / max_n_heaps, get_alignment_constant (gen_idx <= max_generation));
+            desired_alloc_per_heap[gen_idx] = Align (total_desired_alloc / max_n_heaps, get_alignment_constant (gen_idx <= max_generation));
+            size_t allocated_in_budget = total_desired_alloc - total_new_alloc;
+            dprintf (6666, ("g%d: total budget %zd (%zd / heap), left in budget: %zd (%zd / heap), (allocated %Id, %.3f%%), min %zd",
+                gen_idx, total_desired_alloc, desired_alloc_per_heap[gen_idx],
+                total_new_alloc, new_alloc_per_heap[gen_idx],
+                allocated_in_budget, ((double)allocated_in_budget * 100.0 / (double)total_desired_alloc),
+                dd_min_size (g_heaps[0]->dynamic_data_of (gen_idx))));
         }
 
         // distribute the new budget per heap over the new heaps
@@ -25864,10 +26308,10 @@ bool gc_heap::change_heap_count (int new_n_heaps)
 
             for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
             {
-                // distribute the total budget over all heaps, but don't go below the min budget
+                // distribute the total leftover budget over all heaps.
                 dynamic_data* dd = hp->dynamic_data_of (gen_idx);
-                dd_new_allocation (dd) = max (budget_per_heap[gen_idx], (ptrdiff_t)dd_min_size (dd));
-                dd_desired_allocation (dd) = dd_new_allocation (dd);
+                dd_new_allocation (dd) = new_alloc_per_heap[gen_idx];
+                dd_desired_allocation (dd) = max (desired_alloc_per_heap[gen_idx], dd_min_size (dd));
 
                 // recompute dd_fragmentation and dd_current_size
                 generation* gen = hp->generation_of (gen_idx);
@@ -25876,10 +26320,11 @@ bool gc_heap::change_heap_count (int new_n_heaps)
                 assert (gen_size >= dd_fragmentation (dd));
                 dd_current_size (dd) = gen_size - dd_fragmentation (dd);
 
-                dprintf (6666, ("h%d g%d: new allocation: %zd generation_size: %zd fragmentation: %zd current_size: %zd",
+                dprintf (3, ("h%d g%d: budget: %zd, left in budget: %zd, generation_size: %zd fragmentation: %zd current_size: %zd",
                     i,
                     gen_idx,
-                    dd_new_allocation (dd),
+                    desired_alloc_per_heap[gen_idx],
+                    new_alloc_per_heap[gen_idx],
                     gen_size,
                     dd_fragmentation (dd),
                     dd_current_size (dd)));
@@ -25916,6 +26361,11 @@ bool gc_heap::change_heap_count (int new_n_heaps)
         }
     }
 
+    if (heap_number == 0)
+    {
+        change_heap_count_time = GetHighPrecisionTimeStamp() - start_time;
+    }
+
     return true;
 }
 
@@ -28144,7 +28594,7 @@ BOOL gc_heap::background_process_mark_overflow (BOOL concurrent_p)
         if (grow_mark_array_p)
         {
             // Try to grow the array.
-            size_t new_size = max (MARK_STACK_INITIAL_LENGTH, 2*background_mark_stack_array_length);
+            size_t new_size = max ((size_t)MARK_STACK_INITIAL_LENGTH, 2*background_mark_stack_array_length);
 
             if ((new_size * sizeof(mark)) > 100*1024)
             {
@@ -28484,7 +28934,7 @@ BOOL gc_heap::process_mark_overflow(int condemned_gen_number)
         overflow_p = TRUE;
         // Try to grow the array.
         size_t new_size =
-            max (MARK_STACK_INITIAL_LENGTH, 2*mark_stack_array_length);
+            max ((size_t)MARK_STACK_INITIAL_LENGTH, 2*mark_stack_array_length);
 
         if ((new_size * sizeof(mark)) > 100*1024)
         {
@@ -28787,7 +29237,7 @@ BOOL gc_heap::decide_on_promotion_surv (size_t threshold)
     {
         gc_heap* hp = pGenGCHeap;
 #endif //MULTIPLE_HEAPS
-        dynamic_data* dd = hp->dynamic_data_of (min ((settings.condemned_generation + 1), max_generation));
+        dynamic_data* dd = hp->dynamic_data_of (min ((int)(settings.condemned_generation + 1), (int)max_generation));
         size_t older_gen_size = dd_current_size (dd) + (dd_desired_allocation (dd) - dd_new_allocation (dd));
 
         size_t promoted = hp->total_promoted_bytes;
@@ -28863,7 +29313,7 @@ void gc_heap::verify_region_to_generation_map()
                 }
                 size_t region_index_start = get_basic_region_index_for_address (get_region_start (region));
                 size_t region_index_end = get_basic_region_index_for_address (heap_segment_reserved (region));
-                int gen_num = min (gen_number, soh_gen2);
+                int gen_num = min (gen_number, (int)soh_gen2);
                 assert (gen_num == heap_segment_gen_num (region));
                 int plan_gen_num = heap_segment_plan_gen_num (region);
                 bool is_demoted = (region->flags & heap_segment_flags_demoted) != 0;
@@ -30759,7 +31209,7 @@ BOOL gc_heap::plan_loh()
 
 void gc_heap::compact_loh()
 {
-    assert (loh_compaction_requested() || heap_hard_limit || conserve_mem_setting);
+    assert (loh_compaction_requested() || heap_hard_limit || conserve_mem_setting || (settings.reason == reason_induced_aggressive));
 
 #ifdef FEATURE_EVENT_TRACE
     uint64_t start_time = 0, end_time;
@@ -32096,7 +32546,7 @@ void gc_heap::plan_phase (int condemned_gen_number)
 
     if ((condemned_gen_number < max_generation))
     {
-        older_gen = generation_of (min (max_generation, 1 + condemned_gen_number));
+        older_gen = generation_of (min ((int)max_generation, 1 + condemned_gen_number));
         generation_allocator (older_gen)->copy_to_alloc_list (r_free_list);
 
         r_free_list_space = generation_free_list_space (older_gen);
@@ -33667,7 +34117,7 @@ void gc_heap::plan_phase (int condemned_gen_number)
         {
             reset_pinned_queue_bos();
 #ifndef USE_REGIONS
-            unsigned int  gen_number = min (max_generation, 1 + condemned_gen_number);
+            unsigned int  gen_number = (unsigned int)min ((int)max_generation, 1 + condemned_gen_number);
             generation*  gen = generation_of (gen_number);
             uint8_t*  low = generation_allocation_start (generation_of (gen_number-1));
             uint8_t*  high =  heap_segment_allocated (ephemeral_heap_segment);
@@ -42004,8 +42454,8 @@ BOOL gc_heap::best_fit (size_t free_space,
 #endif // SEG_REUSE_STATS
         if (free_space_items)
         {
-            max_free_space_items = min (MAX_NUM_FREE_SPACES, free_space_items * 2);
-            max_free_space_items = max (max_free_space_items, MIN_NUM_FREE_SPACES);
+            max_free_space_items = min ((size_t)MAX_NUM_FREE_SPACES, free_space_items * 2);
+            max_free_space_items = max (max_free_space_items, (size_t)MIN_NUM_FREE_SPACES);
         }
         else
         {
@@ -42236,8 +42686,8 @@ BOOL gc_heap::can_expand_into_p (heap_segment* seg, size_t min_free_size, size_t
                 memcpy (ordered_free_space_indices,
                         saved_ordered_free_space_indices,
                         sizeof(ordered_free_space_indices));
-                max_free_space_items = max (MIN_NUM_FREE_SPACES, free_space_items * 3 / 2);
-                max_free_space_items = min (MAX_NUM_FREE_SPACES, max_free_space_items);
+                max_free_space_items = max ((size_t)MIN_NUM_FREE_SPACES, free_space_items * 3 / 2);
+                max_free_space_items = min ((size_t)MAX_NUM_FREE_SPACES, max_free_space_items);
                 dprintf (SEG_REUSE_LOG_0, ("could fit! %zd free spaces, %zd max", free_space_items, max_free_space_items));
             }
 
@@ -42921,14 +43371,14 @@ void gc_heap::init_static_data()
 
     size_t gen0_max_size =
 #ifdef MULTIPLE_HEAPS
-        max (6*1024*1024, min ( Align(soh_segment_size/2), 200*1024*1024));
+        max ((size_t)6*1024*1024, min ( Align(soh_segment_size/2), (size_t)200*1024*1024));
 #else //MULTIPLE_HEAPS
         (
 #ifdef BACKGROUND_GC
             gc_can_use_concurrent ?
             6*1024*1024 :
 #endif //BACKGROUND_GC
-            max (6*1024*1024,  min ( Align(soh_segment_size/2), 200*1024*1024))
+            max ((size_t)6*1024*1024,  min ( Align(soh_segment_size/2), (size_t)200*1024*1024))
         );
 #endif //MULTIPLE_HEAPS
 
@@ -42958,14 +43408,14 @@ void gc_heap::init_static_data()
     // TODO: gen0_max_size has a 200mb cap; gen1_max_size should also have a cap.
     size_t gen1_max_size = (size_t)
 #ifdef MULTIPLE_HEAPS
-        max (6*1024*1024, Align(soh_segment_size/2));
+        max ((size_t)6*1024*1024, Align(soh_segment_size/2));
 #else //MULTIPLE_HEAPS
         (
 #ifdef BACKGROUND_GC
             gc_can_use_concurrent ?
             6*1024*1024 :
 #endif //BACKGROUND_GC
-            max (6*1024*1024, Align(soh_segment_size/2))
+            max ((size_t)6*1024*1024, Align(soh_segment_size/2))
         );
 #endif //MULTIPLE_HEAPS
 
@@ -43112,7 +43562,7 @@ size_t gc_heap::desired_new_allocation (dynamic_data* dd,
             }
             else
             {
-                new_size = (size_t) min (max ( (f * current_size), min_gc_size), max_size);
+                new_size = (size_t) min (max ( (size_t)(f * current_size), min_gc_size), max_size);
             }
 
             assert ((new_size >= current_size) || (new_size == max_size));
@@ -43184,7 +43634,7 @@ size_t gc_heap::desired_new_allocation (dynamic_data* dd,
             size_t survivors = out;
             cst = float (survivors) / float (dd_begin_data_size (dd));
             f = surv_to_growth (cst, limit, max_limit);
-            new_allocation = (size_t) min (max ((f * (survivors)), min_gc_size), max_size);
+            new_allocation = (size_t) min (max ((size_t)(f * (survivors)), min_gc_size), max_size);
 
             new_allocation = linear_allocation_model (allocation_fraction, new_allocation,
                                                       dd_desired_allocation (dd), time_since_previous_collection_secs);
@@ -43213,35 +43663,6 @@ size_t gc_heap::desired_new_allocation (dynamic_data* dd,
                     new_allocation = min (new_allocation,
                                           max (min_gc_size, (max_size/3)));
                 }
-
-#ifdef DYNAMIC_HEAP_COUNT
-                if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
-                {
-                    // if this is set, limit gen 0 size to a small multiple of the older generations
-                    float f_older_gen = ((10.0f / conserve_mem_setting) - 1) * 0.5f;
-
-                    // compute the total size of the older generations
-                    size_t older_size = 0;
-                    for (int gen_index_older = 1; gen_index_older < total_generation_count; gen_index_older++)
-                    {
-                        dynamic_data* dd_older = dynamic_data_of (gen_index_older);
-                        older_size += dd_current_size (dd_older);
-                    }
-                    // derive a new allocation size from it
-                    size_t new_allocation_from_older = (size_t)(older_size*f_older_gen);
-
-                    // limit the new allocation to this value
-                    new_allocation = min (new_allocation, new_allocation_from_older);
-
-                    // but make sure it doesn't drop below the minimum size
-                    new_allocation = max (new_allocation, min_gc_size);
-
-                    dprintf (2, ("f_older_gen: %d%% older_size: %zd new_allocation: %zd",
-                        (int)(f_older_gen*100),
-                        older_size,
-                        new_allocation));
-                }
-#endif //DYNAMIC_HEAP_COUNT
             }
         }
 
@@ -43279,9 +43700,9 @@ size_t gc_heap::generation_plan_size (int gen_number)
     return result;
 #else //USE_REGIONS
     if (0 == gen_number)
-        return max((heap_segment_plan_allocated (ephemeral_heap_segment) -
+        return (size_t)max((heap_segment_plan_allocated (ephemeral_heap_segment) -
                     generation_plan_allocation_start (generation_of (gen_number))),
-                   (int)Align (min_obj_size));
+                   (ptrdiff_t)Align (min_obj_size));
     else
     {
         generation* gen = generation_of (gen_number);
@@ -43330,9 +43751,9 @@ size_t gc_heap::generation_size (int gen_number)
     return result;
 #else //USE_REGIONS
     if (0 == gen_number)
-        return max((heap_segment_allocated (ephemeral_heap_segment) -
+        return (size_t)max((heap_segment_allocated (ephemeral_heap_segment) -
                     generation_allocation_start (generation_of (gen_number))),
-                   (int)Align (min_obj_size));
+                   (ptrdiff_t)Align (min_obj_size));
     else
     {
         generation* gen = generation_of (gen_number);
@@ -43414,7 +43835,7 @@ size_t gc_heap::trim_youngest_desired (uint32_t memory_load,
     }
     else
     {
-        size_t total_max_allocation = max (mem_one_percent, total_min_allocation);
+        size_t total_max_allocation = max ((size_t)mem_one_percent, total_min_allocation);
         return min (total_new_allocation, total_max_allocation);
     }
 }
@@ -43749,7 +44170,7 @@ void gc_heap::decommit_ephemeral_segment_pages()
     dynamic_data* dd0 = dynamic_data_of (0);
 
     ptrdiff_t desired_allocation = dd_new_allocation (dd0) +
-                                   max (estimate_gen_growth (soh_gen1), 0) +
+                                   max (estimate_gen_growth (soh_gen1), (ptrdiff_t)0) +
                                    loh_size_threshold;
 
     size_t slack_space =
@@ -43798,7 +44219,7 @@ void gc_heap::decommit_ephemeral_segment_pages()
 
     // we do a max of DECOMMIT_SIZE_PER_MILLISECOND per millisecond of elapsed time since the last GC
     // we limit the elapsed time to 10 seconds to avoid spending too much time decommitting
-    ptrdiff_t max_decommit_size = min (ephemeral_elapsed, (10*1000)) * DECOMMIT_SIZE_PER_MILLISECOND;
+    ptrdiff_t max_decommit_size = min (ephemeral_elapsed, (size_t)(10*1000)) * DECOMMIT_SIZE_PER_MILLISECOND;
     decommit_size = min (decommit_size, max_decommit_size);
 
     slack_space = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment) - decommit_size;
@@ -46778,7 +47199,7 @@ enable_no_gc_region_callback_status gc_heap::enable_no_gc_callback(NoGCRegionCal
                 soh_withheld_budget = soh_withheld_budget / gc_heap::n_heaps;
                 loh_withheld_budget = loh_withheld_budget / gc_heap::n_heaps;
 #endif
-                soh_withheld_budget = max(soh_withheld_budget, 1);
+                soh_withheld_budget = max(soh_withheld_budget, (size_t)1);
                 soh_withheld_budget = Align(soh_withheld_budget, get_alignment_constant (TRUE));
                 loh_withheld_budget = Align(loh_withheld_budget, get_alignment_constant (FALSE));
 #ifdef MULTIPLE_HEAPS
@@ -47193,7 +47614,7 @@ void gc_heap::verify_regions (int gen_number, bool can_verify_gen_num, bool can_
         }
         if (can_verify_gen_num)
         {
-            if (heap_segment_gen_num (seg_in_gen) != min (gen_number, max_generation))
+            if (heap_segment_gen_num (seg_in_gen) != min (gen_number, (int)max_generation))
             {
                 dprintf (REGIONS_LOG, ("h%d gen%d region %p(%p) gen is %d!",
                     heap_number, gen_number, seg_in_gen, heap_segment_mem (seg_in_gen),
@@ -48043,7 +48464,7 @@ HRESULT GCHeap::Initialize()
 
     nhp = ((nhp_from_config == 0) ? g_num_active_processors : nhp_from_config);
 
-    nhp = min (nhp, MAX_SUPPORTED_CPUS);
+    nhp = min (nhp, (uint32_t)MAX_SUPPORTED_CPUS);
 
     gc_heap::gc_thread_no_affinitize_p = (gc_heap::heap_hard_limit ?
         !affinity_config_specified_p : (GCConfig::GetNoAffinitize() != 0));
@@ -48192,7 +48613,7 @@ HRESULT GCHeap::Initialize()
     /*
      * Allocation requests less than loh_size_threshold will be allocated on the small object heap.
      *
-     * An object cannot span more than one region and regions in small object heap are of the same size - gc_region_size. 
+     * An object cannot span more than one region and regions in small object heap are of the same size - gc_region_size.
      * However, the space available for actual allocations is reduced by the following implementation details -
      *
      * 1.) heap_segment_mem is set to the new pages + sizeof(aligned_plug_and_gap) in make_heap_segment.
@@ -48208,7 +48629,7 @@ HRESULT GCHeap::Initialize()
 #ifdef FEATURE_STRUCTALIGN
     /*
      * The above assumed FEATURE_STRUCTALIGN is not turned on for platforms where USE_REGIONS is supported, otherwise it is possible
-     * that the allocation size is inflated by ComputeMaxStructAlignPad in GCHeap::Alloc and we have to compute an upper bound of that 
+     * that the allocation size is inflated by ComputeMaxStructAlignPad in GCHeap::Alloc and we have to compute an upper bound of that
      * function.
      *
      * Note that ComputeMaxStructAlignPad is defined to be 0 if FEATURE_STRUCTALIGN is turned off.
@@ -48306,7 +48727,13 @@ HRESULT GCHeap::Initialize()
     // them which means it's important to know their numa nodes and map them to a reasonable
     // heap, ie, we wouldn't want to have all such procs go to heap 0.
     if (g_num_active_processors > nhp)
-        heap_select::distribute_other_procs();
+    {
+        bool distribute_all_p = false;
+#ifdef DYNAMIC_HEAP_COUNT
+        distribute_all_p = (gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes);
+#endif //DYNAMIC_HEAP_COUNT
+        heap_select::distribute_other_procs (distribute_all_p);
+    }
 
     gc_heap* hp = gc_heap::g_heaps[0];
 
@@ -48340,13 +48767,13 @@ HRESULT GCHeap::Initialize()
     for (int numa_node_index = 0; numa_node_index < total_numa_nodes_on_machine; numa_node_index++)
     {
         int hb_info_size_per_node = hb_info_size_per_proc * procs_per_numa_node;
-        uint8_t* numa_mem = (uint8_t*)GCToOSInterface::VirtualReserve (hb_info_size_per_node, 0, 0, numa_node_index);
+        uint8_t* numa_mem = (uint8_t*)GCToOSInterface::VirtualReserve (hb_info_size_per_node, 0, 0, (uint16_t)numa_node_index);
         if (!numa_mem)
         {
             GCToEEInterface::LogErrorToHost("Reservation of numa_mem failed");
             return E_FAIL;
         }
-        if (!GCToOSInterface::VirtualCommit (numa_mem, hb_info_size_per_node, numa_node_index))
+        if (!GCToOSInterface::VirtualCommit (numa_mem, hb_info_size_per_node, (uint16_t)numa_node_index))
         {
             GCToEEInterface::LogErrorToHost("Commit of numa_mem failed");
             return E_FAIL;
@@ -48381,7 +48808,8 @@ HRESULT GCHeap::Initialize()
             // start with only 1 heap
             gc_heap::smoothed_desired_total[0] /= gc_heap::n_heaps;
             int initial_n_heaps = 1;
-            dprintf (9999, ("gc_heap::n_heaps is %d, initial %d", gc_heap::n_heaps, initial_n_heaps));
+
+            dprintf (6666, ("n_heaps is %d, initial n_heaps is %d, %d cores", gc_heap::n_heaps, initial_n_heaps, g_num_processors));
 
             {
                 if (!gc_heap::prepare_to_change_heap_count (initial_n_heaps))
@@ -48405,6 +48833,16 @@ HRESULT GCHeap::Initialize()
             // This needs to be different from our initial heap count so we can make sure we wait for
             // the idle threads correctly in gc_thread_function.
             gc_heap::dynamic_heap_count_data.last_n_heaps = 0;
+            // This should be adjusted based on the target tcp. See comments in gcpriv.h
+            gc_heap::dynamic_heap_count_data.below_target_threshold = 10.0;
+            gc_heap::dynamic_heap_count_data.inc_recheck_threshold = 5;
+            gc_heap::dynamic_heap_count_data.dec_failure_recheck_threshold = 5;
+            // This should really be set as part of computing static data and should take conserve_mem_setting into consideration.
+            gc_heap::dynamic_heap_count_data.max_gen0_new_allocation = min (dd_max_size (gc_heap::g_heaps[0]->dynamic_data_of (0)), (size_t)(64 * 1024 * 1024));
+            gc_heap::dynamic_heap_count_data.min_gen0_new_allocation = dd_min_size (gc_heap::g_heaps[0]->dynamic_data_of (0));
+
+            dprintf (6666, ("datas max gen0 budget %Id, min %Id",
+                gc_heap::dynamic_heap_count_data.max_gen0_new_allocation, gc_heap::dynamic_heap_count_data.min_gen0_new_allocation));
         }
 #endif //DYNAMIC_HEAP_COUNT
         GCScan::GcRuntimeStructuresValid (TRUE);
@@ -49371,7 +49809,7 @@ GCHeap::GarbageCollect (int generation, bool low_memory_p, int mode)
     gc_heap* hpt = 0;
 #endif //MULTIPLE_HEAPS
 
-    generation = (generation < 0) ? max_generation : min (generation, max_generation);
+    generation = (generation < 0) ? max_generation : min (generation, (int)max_generation);
     dynamic_data* dd = hpt->dynamic_data_of (generation);
 
 #ifdef BACKGROUND_GC
@@ -49469,7 +49907,7 @@ size_t
 GCHeap::GarbageCollectTry (int generation, BOOL low_memory_p, int mode)
 {
     int gen = (generation < 0) ?
-               max_generation : min (generation, max_generation);
+               max_generation : min (generation, (int)max_generation);
 
     gc_reason reason = reason_empty;
 
@@ -49574,7 +50012,8 @@ void gc_heap::do_pre_gc()
         settings.condemned_generation,
         total_allocated_since_last_gc,
         (settings.concurrent ? "BGC" : (gc_heap::background_running_p() ? "FGC" : "NGC")),
-        settings.b_state));
+        settings.b_state,
+        n_heaps));
 #else
     dprintf (1, ("*GC* %d(gen0:%d)(%d)(alloc: %zd)",
         VolatileLoad(&settings.gc_index),
@@ -50823,11 +51262,11 @@ size_t gc_heap::get_gen0_min_size()
 #ifdef SERVER_GC
         // performance data seems to indicate halving the size results
         // in optimal perf.  Ask for adjusted gen0 size.
-        gen0size = max(GCToOSInterface::GetCacheSizePerLogicalCpu(FALSE),(256*1024));
+        gen0size = max(GCToOSInterface::GetCacheSizePerLogicalCpu(FALSE), (size_t)(256*1024));
 
         // if gen0 size is too large given the available memory, reduce it.
         // Get true cache size, as we don't want to reduce below this.
-        size_t trueSize = max(GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE),(256*1024));
+        size_t trueSize = max(GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE), (size_t)(256*1024));
         dprintf (1, ("cache: %zd-%zd",
             GCToOSInterface::GetCacheSizePerLogicalCpu(FALSE),
             GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE)));
@@ -50835,8 +51274,8 @@ size_t gc_heap::get_gen0_min_size()
         int n_heaps = gc_heap::n_heaps;
 #else //SERVER_GC
         size_t trueSize = GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE);
-        gen0size = max((4*trueSize/5),(256*1024));
-        trueSize = max(trueSize, (256*1024));
+        gen0size = max((4*trueSize/5),(size_t)(256*1024));
+        trueSize = max(trueSize, (size_t)(256*1024));
         int n_heaps = 1;
 #endif //SERVER_GC
 
@@ -50844,7 +51283,7 @@ size_t gc_heap::get_gen0_min_size()
         if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
         {
             // if we are asked to be stingy with memory, limit gen 0 size
-            gen0size = min (gen0size, (4*1024*1024));
+            gen0size = min (gen0size, (size_t)(4*1024*1024));
         }
 #endif //DYNAMIC_HEAP_COUNT
 
@@ -51465,7 +51904,7 @@ CFinalize::UpdatePromotedGenerations (int gen, BOOL gen_0_empty_p)
     // it was promoted or not
     if (gen_0_empty_p)
     {
-        for (int i = min (gen+1, max_generation); i > 0; i--)
+        for (int i = min (gen+1, (int)max_generation); i > 0; i--)
         {
             m_FillPointers [gen_segment(i)] = m_FillPointers [gen_segment(i-1)];
         }
@@ -52368,7 +52807,7 @@ bool gc_heap::compute_memory_settings(bool is_initialization, uint32_t& nhp, uin
             if (is_initialization)
 #endif //USE_REGIONS
             {
-                heap_hard_limit = (size_t)max ((20 * 1024 * 1024), physical_mem_for_gc);
+                heap_hard_limit = (size_t)max ((uint64_t)(20 * 1024 * 1024), physical_mem_for_gc);
             }
         }
     }
@@ -52416,8 +52855,8 @@ bool gc_heap::compute_memory_settings(bool is_initialization, uint32_t& nhp, uin
     uint32_t highmem_th_from_config = (uint32_t)GCConfig::GetGCHighMemPercent();
     if (highmem_th_from_config)
     {
-        high_memory_load_th = min (99, highmem_th_from_config);
-        v_high_memory_load_th = min (99, (highmem_th_from_config + 7));
+        high_memory_load_th = min (99u, highmem_th_from_config);
+        v_high_memory_load_th = min (99u, (highmem_th_from_config + 7));
 #ifdef FEATURE_EVENT_TRACE
         high_mem_percent_from_config = highmem_th_from_config;
 #endif //FEATURE_EVENT_TRACE
diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h
index d342cf697b42..6bd88798a493 100644
--- a/src/coreclr/gc/gcpriv.h
+++ b/src/coreclr/gc/gcpriv.h
@@ -2558,8 +2558,6 @@ class gc_heap
     // re-initialize a heap in preparation to putting it back into service
     PER_HEAP_METHOD void recommission_heap();
 
-    PER_HEAP_ISOLATED_METHOD size_t get_num_completed_gcs();
-
     PER_HEAP_ISOLATED_METHOD int calculate_new_heap_count();
 
     // check if we should change the heap count
@@ -3349,8 +3347,8 @@ class gc_heap
         size_t new_current_total_committed);
 
 #ifdef USE_REGIONS
-    PER_HEAP_ISOLATED_METHOD void compute_committed_bytes(size_t& total_committed, size_t& committed_decommit, size_t& committed_free, 
-                                  size_t& committed_bookkeeping, size_t& new_current_total_committed, size_t& new_current_total_committed_bookkeeping, 
+    PER_HEAP_ISOLATED_METHOD void compute_committed_bytes(size_t& total_committed, size_t& committed_decommit, size_t& committed_free,
+                                  size_t& committed_bookkeeping, size_t& new_current_total_committed, size_t& new_current_total_committed_bookkeeping,
                                   size_t* new_committed_by_oh);
 #endif
 
@@ -4230,7 +4228,7 @@ class gc_heap
 
 #ifdef DYNAMIC_HEAP_COUNT
     // Sample collection -
-    // 
+    //
     // For every GC, we collect the msl wait time + GC pause duration info and use both to calculate the
     // throughput cost percentage. We will also be using the wait time and the GC pause duration separately
     // for other purposes in the future.
@@ -4240,21 +4238,224 @@ class gc_heap
     struct dynamic_heap_count_data_t
     {
         static const int sample_size = 3;
+        static const int recorded_tcp_array_size = 64;
 
         struct sample
         {
             uint64_t    elapsed_between_gcs;    // time between gcs in microseconds (this should really be between_pauses)
             uint64_t    gc_pause_time;          // pause time for this GC
             uint64_t    msl_wait_time;
+            size_t      gc_survived_size;
         };
 
         uint32_t        sample_index;
         sample          samples[sample_size];
-        size_t          prev_num_completed_gcs;
+
+        size_t          current_samples_count;
+        size_t          processed_samples_count;
+
+        //
+        // We need to observe the history of tcp's so record them in a small buffer.
+        //
+        float           recorded_tcp_rearranged[recorded_tcp_array_size];
+        float           recorded_tcp[recorded_tcp_array_size];
+        int             recorded_tcp_index;
+        int             total_recorded_tcp;
+
+        int add_to_recorded_tcp (float tcp)
+        {
+            total_recorded_tcp++;
+
+            recorded_tcp[recorded_tcp_index] = tcp;
+            recorded_tcp_index++;
+            if (recorded_tcp_index == recorded_tcp_array_size)
+            {
+                recorded_tcp_index = 0;
+            }
+
+            return recorded_tcp_index;
+        }
+
+        int rearrange_recorded_tcp ()
+        {
+            int count = recorded_tcp_array_size;
+            int copied_count = 0;
+
+            if (total_recorded_tcp >= recorded_tcp_array_size)
+            {
+                int earlier_entry_size = recorded_tcp_array_size - recorded_tcp_index;
+                memcpy (recorded_tcp_rearranged, (recorded_tcp + recorded_tcp_index), (earlier_entry_size * sizeof (float)));
+
+                copied_count = earlier_entry_size;
+            }
+
+            if (recorded_tcp_index)
+            {
+                memcpy ((recorded_tcp_rearranged + copied_count), recorded_tcp, (recorded_tcp_index * sizeof (float)));
+                copied_count += recorded_tcp_index;
+            }
+
+            return copied_count;
+        }
+
+        int highest_avg_recorded_tcp (int count, float avg, float* highest_avg)
+        {
+            float highest_sum = 0.0;
+            int highest_count = 0;
+
+            for (int i = 0; i < count; i++)
+            {
+                if (recorded_tcp_rearranged[i] > avg)
+                {
+                    highest_count++;
+                    highest_sum += recorded_tcp_rearranged[i];
+                }
+            }
+
+            if (highest_count)
+            {
+                *highest_avg = highest_sum / highest_count;
+            }
+
+            return highest_count;
+        }
+
+        void init_recorded_tcp ()
+        {
+            total_recorded_tcp = 0;
+            recorded_tcp_index = 0;
+            dprintf (6666, ("INIT tcp buffer"));
+        }
+
+        int get_recorded_tcp_count () { return total_recorded_tcp; }
+
+        //
+        // Maintain some info about last time we did change heap count.
+        //
+        size_t          last_changed_gc_index;
+        // This is intentionally kept as a float for precision.
+        float           last_changed_count;
+        float           last_changed_stcp;
+
+        //
+        // For tuning above/below target tcp.
+        //
+        // If we just increased the heap count and immediately need to grow again, that counts as a failure.
+        // The higher the failure count, the more aggressive we should grow.
+        int             inc_failure_count;
+
+        // If we are trending up and the tcp is already close enough to target, we need this many samples
+        // before we adjust.
+        int             inc_recheck_threshold;
+
+        // If we shrink and the stcp doesn't change much, that counts as a failure. For the below target case
+        // it's fine to stay here for a while. Either it'll naturally change and break out of this situation
+        // or we wait for a while before we re-evaluate. How long we wait is defined by dec_recheck_threshold
+        // each time our calculation tells us to shrink.
+        int             dec_failure_count;
+        int             dec_failure_recheck_threshold;
+
+        // If we continue to be below target for an extended period of time, ie, we've accumulated more than
+        // below_target_threshold, we want to reduce the heap count.
+        float           below_target_accumulation;
+        float           below_target_threshold;
+
+        // TODO: we should refactor this and the inc checks into a utility class.
+        bool            dec_by_one_scheduled;
+        int             dec_by_one_count;
+
+        // Currently only used for dprintf.
+        size_t          first_below_target_gc_index;
+
+        float get_range_upper (float t)
+        {
+            return (t * 1.2f);
+        }
+
+        bool is_tcp_in_range (float diff_pct, float slope)
+        {
+            return ((diff_pct <= 0.2) && (diff_pct >= -0.2) && (slope <= 0.1) && (slope >= -0.1));
+        }
+
+        bool is_tcp_far_below (float diff_pct)
+        {
+            return (diff_pct >= 0.4);
+        }
+
+        bool is_close_to_max (int new_n, int max)
+        {
+            return ((max - new_n) <= (max / 10));
+        }
+
+        bool should_dec_by_one()
+        {
+            if (!dec_by_one_scheduled)
+            {
+                dec_by_one_scheduled = true;
+            }
+
+            if (dec_by_one_scheduled)
+            {
+                dec_by_one_count++;
+                dprintf (6666, ("scheduled to dec by 1 heap %d times", dec_by_one_count));
+            }
+
+            return (dec_by_one_count >= 5);
+        }
+
+        void reset_dec_by_one()
+        {
+            dec_by_one_scheduled = false;
+            dec_by_one_count = 0;
+        }
+
+        size_t          max_gen0_new_allocation;
+        size_t          min_gen0_new_allocation;
+
+        size_t compute_gen0_new_allocation (size_t total_old_gen_size)
+        {
+            assert (total_old_gen_size > 0);
+
+            // TODO: adjust these based on conserve_mem_setting.
+            double old_gen_growth_factor = 16.0 / sqrt ((double)total_old_gen_size / 1000.0 / 1000.0);
+            double saved_old_gen_growth_factor = old_gen_growth_factor;
+            old_gen_growth_factor = min (10.0, old_gen_growth_factor);
+            old_gen_growth_factor = max (0.1, old_gen_growth_factor);
+
+            size_t total_new_allocation_old_gen = (size_t)(old_gen_growth_factor * (double)total_old_gen_size);
+            size_t new_allocation_old_gen = total_new_allocation_old_gen / n_heaps;
+
+            dprintf (6666, ("total gen2 %Id (%.3fmb), factor %.3f=>%.3f -> total gen0 new_alloc %Id (%Id/heap, %.3fmb)",
+                total_old_gen_size, ((double)total_old_gen_size / 1000.0 / 1000.0),
+                saved_old_gen_growth_factor, old_gen_growth_factor, total_new_allocation_old_gen,
+                new_allocation_old_gen, ((double)new_allocation_old_gen / 1000.0 / 1000.0)));
+
+            new_allocation_old_gen = min (max_gen0_new_allocation, new_allocation_old_gen);
+            new_allocation_old_gen = max (min_gen0_new_allocation, new_allocation_old_gen);
+
+            return new_allocation_old_gen;
+        }
+
+        //
+        // gen2 GCs are handled separately only as a backstop.
+        //
+        struct gen2_sample
+        {
+            // Recording the gen2 GC indices so we know how far apart they are. Currently unused
+            // but we should consider how much value there is if they are very far apart.
+            size_t gc_index;
+            // This is (gc_elapsed_time / time inbetween this and the last gen2 GC)
+            float gc_percent;
+        };
 
         uint32_t        gen2_sample_index;
-        // This is (gc_elapsed_time / time inbetween this and the last gen2 GC)
-        float           gen2_gc_percents[sample_size];
+        gen2_sample     gen2_samples[sample_size];
+
+        size_t          current_gen2_samples_count;
+        size_t          processed_gen2_samples_count;
+
+        // This records the stcp last time we processed ephemeral samples. We use it
+        float           last_processed_stcp;
 
         float median_throughput_cost_percent;          // estimated overhead of allocator + gc
         float smoothed_median_throughput_cost_percent; // exponentially smoothed version
@@ -4273,14 +4474,13 @@ class gc_heap
 
         bool            should_change_heap_count;
         int             heap_count_to_change_to;
-        int             heap_count_change_count;
 #ifdef STRESS_DYNAMIC_HEAP_COUNT
         int             lowest_heap_with_msl_uoh;
 #endif //STRESS_DYNAMIC_HEAP_COUNT
 
         float get_median_gen2_gc_percent()
         {
-            return median_of_3 (gen2_gc_percents[0], gen2_gc_percents[1], gen2_gc_percents[2]);
+            return median_of_3 (gen2_samples[0].gc_percent, gen2_samples[1].gc_percent, gen2_samples[2].gc_percent);
         }
     };
     PER_HEAP_ISOLATED_FIELD_MAINTAINED dynamic_heap_count_data_t dynamic_heap_count_data;
@@ -4477,6 +4677,9 @@ class gc_heap
     // at the beginning of a BGC and the PM triggered full GCs
     // fall into this case.
     PER_HEAP_ISOLATED_FIELD_DIAG_ONLY uint64_t suspended_start_time;
+    // Right now this is diag only but may be used functionally later.
+    PER_HEAP_ISOLATED_FIELD_DIAG_ONLY uint64_t change_heap_count_time;
+    // TEMP END
     PER_HEAP_ISOLATED_FIELD_DIAG_ONLY uint64_t end_gc_time;
     PER_HEAP_ISOLATED_FIELD_DIAG_ONLY uint64_t total_suspended_time;
     PER_HEAP_ISOLATED_FIELD_DIAG_ONLY uint64_t process_start_time;
@@ -5818,3 +6021,6 @@ class card_marking_enumerator
 #else
 #define THIS_ARG
 #endif // FEATURE_CARD_MARKING_STEALING
+
+using std::min;
+using std::max;
diff --git a/src/coreclr/gc/gcrecord.h b/src/coreclr/gc/gcrecord.h
index 44641157e9f1..b611e7c0c1c4 100644
--- a/src/coreclr/gc/gcrecord.h
+++ b/src/coreclr/gc/gcrecord.h
@@ -73,7 +73,8 @@ enum gc_condemn_reason_condition
     gen_joined_servo_postpone = 27,
     gen_joined_stress_mix = 28,
     gen_joined_stress = 29,
-    gcrc_max = 30
+    gen_joined_aggressive = 30,
+    gcrc_max = 31
 };
 
 #ifdef DT_LOG
diff --git a/src/coreclr/gc/gcsvr.cpp b/src/coreclr/gc/gcsvr.cpp
index 9e4a78473530..5dc848f40c3f 100644
--- a/src/coreclr/gc/gcsvr.cpp
+++ b/src/coreclr/gc/gcsvr.cpp
@@ -20,7 +20,7 @@
 
 #define SERVER_GC 1
 
-#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
+#ifdef TARGET_AMD64
 #include "vxsort/do_vxsort.h"
 #endif
 
diff --git a/src/coreclr/gc/gcwks.cpp b/src/coreclr/gc/gcwks.cpp
index 7d599e8d8e51..6b4cfe168146 100644
--- a/src/coreclr/gc/gcwks.cpp
+++ b/src/coreclr/gc/gcwks.cpp
@@ -20,7 +20,7 @@
 #undef SERVER_GC
 #endif
 
-#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
+#ifdef TARGET_AMD64
 #include "vxsort/do_vxsort.h"
 #endif
 
diff --git a/src/coreclr/gc/sample/CMakeLists.txt b/src/coreclr/gc/sample/CMakeLists.txt
index 94a736e8c812..1f297fd23133 100644
--- a/src/coreclr/gc/sample/CMakeLists.txt
+++ b/src/coreclr/gc/sample/CMakeLists.txt
@@ -53,6 +53,7 @@ if(CLR_CMAKE_TARGET_WIN32)
     list(APPEND SOURCES
         ../windows/gcenv.windows.cpp)
     add_definitions(-DUNICODE)
+    add_compile_definitions(NOMINMAX)
 else()
     list(APPEND SOURCES
         ../gcenv.unix.cpp)
diff --git a/src/coreclr/gc/sample/GCSample.cpp b/src/coreclr/gc/sample/GCSample.cpp
index 41e275035b91..0f2afc7c20a7 100644
--- a/src/coreclr/gc/sample/GCSample.cpp
+++ b/src/coreclr/gc/sample/GCSample.cpp
@@ -176,7 +176,7 @@ int __cdecl main(int argc, char* argv[])
     // GC expects the size of ObjHeader (extra void*) to be included in the size.
     baseSize = baseSize + sizeof(ObjHeader);
     // Add padding as necessary. GC requires the object size to be at least MIN_OBJECT_SIZE.
-    My_MethodTable.m_MT.m_baseSize = max(baseSize, MIN_OBJECT_SIZE);
+    My_MethodTable.m_MT.m_baseSize = max(baseSize, (uint32_t)MIN_OBJECT_SIZE);
 
     My_MethodTable.m_MT.m_componentSize = 0;    // Array component size
     My_MethodTable.m_MT.m_flags = MTFlag_ContainsPointers;
diff --git a/src/coreclr/gc/sample/GCSample.vcxproj b/src/coreclr/gc/sample/GCSample.vcxproj
index 6e33738d18d0..0b7e657b35f8 100644
--- a/src/coreclr/gc/sample/GCSample.vcxproj
+++ b/src/coreclr/gc/sample/GCSample.vcxproj
@@ -51,7 +51,7 @@
       <PrecompiledHeader>Use</PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;HOST_X86;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WIN32;HOST_X86;NOMINMAX;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <SDLCheck>true</SDLCheck>
       <PrecompiledHeaderFile>common.h</PrecompiledHeaderFile>
       <AdditionalIncludeDirectories>.;..;..\env</AdditionalIncludeDirectories>
@@ -68,7 +68,7 @@
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;HOST_X86;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WIN32;HOST_X86;NOMINMAX;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <SDLCheck>true</SDLCheck>
       <AdditionalIncludeDirectories>.;..;..\env</AdditionalIncludeDirectories>
     </ClCompile>
@@ -109,4 +109,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/src/coreclr/gc/unix/cgroup.cpp b/src/coreclr/gc/unix/cgroup.cpp
index dece84578be9..af9a8042cb86 100644
--- a/src/coreclr/gc/unix/cgroup.cpp
+++ b/src/coreclr/gc/unix/cgroup.cpp
@@ -41,7 +41,6 @@ Module Name:
 #endif
 
 #define CGROUP2_SUPER_MAGIC 0x63677270
-#define TMPFS_MAGIC 0x01021994
 
 #define PROC_MOUNTINFO_FILENAME "/proc/self/mountinfo"
 #define PROC_CGROUP_FILENAME "/proc/self/cgroup"
@@ -131,12 +130,16 @@ class CGroup
         if (result != 0)
             return 0;
 
-        switch (stats.f_type)
+        if (stats.f_type == CGROUP2_SUPER_MAGIC)
         {
-            case TMPFS_MAGIC: return 1;
-            case CGROUP2_SUPER_MAGIC: return 2;
-            default:
-                return 0;
+            return 2;
+        }
+        else
+        {
+            // Assume that if /sys/fs/cgroup exists and the file system type is not cgroup2fs,
+            // it is cgroup v1. Typically the file system type is tmpfs, but other values have
+            // been seen in the wild.
+            return 1;
         }
 #endif
     }
diff --git a/src/coreclr/gc/unix/gcenv.unix.cpp b/src/coreclr/gc/unix/gcenv.unix.cpp
index 6b0a0b06dfbf..192a4c1216df 100644
--- a/src/coreclr/gc/unix/gcenv.unix.cpp
+++ b/src/coreclr/gc/unix/gcenv.unix.cpp
@@ -35,12 +35,6 @@
 #define __has_cpp_attribute(x) (0)
 #endif
 
-#if __has_cpp_attribute(fallthrough)
-#define FALLTHROUGH [[fallthrough]]
-#else
-#define FALLTHROUGH
-#endif
-
 #include <algorithm>
 
 #if HAVE_SYS_TIME_H
@@ -880,29 +874,30 @@ bool ReadMemoryValueFromFile(const char* filename, uint64_t* val)
     return result;
 }
 
-#define UPDATE_CACHE_SIZE_AND_LEVEL(NEW_CACHE_SIZE, NEW_CACHE_LEVEL) if (NEW_CACHE_SIZE > cacheSize) { cacheSize = NEW_CACHE_SIZE; cacheLevel = NEW_CACHE_LEVEL; }
-
 static size_t GetLogicalProcessorCacheSizeFromOS()
 {
     size_t cacheLevel = 0;
     size_t cacheSize = 0;
-    size_t size;
 
-#ifdef _SC_LEVEL1_DCACHE_SIZE
-    size = ( size_t) sysconf(_SC_LEVEL1_DCACHE_SIZE);
-    UPDATE_CACHE_SIZE_AND_LEVEL(size, 1)
-#endif
-#ifdef _SC_LEVEL2_CACHE_SIZE
-    size = ( size_t) sysconf(_SC_LEVEL2_CACHE_SIZE);
-    UPDATE_CACHE_SIZE_AND_LEVEL(size, 2)
-#endif
-#ifdef _SC_LEVEL3_CACHE_SIZE
-    size = ( size_t) sysconf(_SC_LEVEL3_CACHE_SIZE);
-    UPDATE_CACHE_SIZE_AND_LEVEL(size, 3)
-#endif
-#ifdef _SC_LEVEL4_CACHE_SIZE
-    size = ( size_t) sysconf(_SC_LEVEL4_CACHE_SIZE);
-    UPDATE_CACHE_SIZE_AND_LEVEL(size, 4)
+#if defined(_SC_LEVEL1_DCACHE_SIZE) || defined(_SC_LEVEL2_CACHE_SIZE) || defined(_SC_LEVEL3_CACHE_SIZE) || defined(_SC_LEVEL4_CACHE_SIZE)
+    const int cacheLevelNames[] =
+    {
+        _SC_LEVEL1_DCACHE_SIZE,
+        _SC_LEVEL2_CACHE_SIZE,
+        _SC_LEVEL3_CACHE_SIZE,
+        _SC_LEVEL4_CACHE_SIZE,
+    };
+
+    for (int i = ARRAY_SIZE(cacheLevelNames) - 1; i >= 0; i--)
+    {
+        long size = sysconf(cacheLevelNames[i]);
+        if (size > 0)
+        {
+            cacheSize = (size_t)size;
+            cacheLevel = i + 1;
+            break;
+        }
+    }
 #endif
 
 #if defined(TARGET_LINUX) && !defined(HOST_ARM) && !defined(HOST_X86)
@@ -924,17 +919,16 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
         {
             path_to_size_file[index] = (char)(48 + i);
 
-            if (ReadMemoryValueFromFile(path_to_size_file, &size))
+            uint64_t cache_size_from_sys_file = 0;
+
+            if (ReadMemoryValueFromFile(path_to_size_file, &cache_size_from_sys_file))
             {
-                path_to_level_file[index] = (char)(48 + i);
+                cacheSize = std::max(cacheSize, (size_t)cache_size_from_sys_file);
 
+                path_to_level_file[index] = (char)(48 + i);
                 if (ReadMemoryValueFromFile(path_to_level_file, &level))
                 {
-                    UPDATE_CACHE_SIZE_AND_LEVEL(size, level)
-                }
-                else
-                {
-                    cacheSize = std::max(cacheSize, size);
+                    cacheLevel = level;
                 }
             }
         }
@@ -986,7 +980,7 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
         if (success)
         {
             assert(cacheSizeFromSysctl > 0);
-            cacheSize = ( size_t) cacheSizeFromSysctl;
+            cacheSize = (size_t) cacheSizeFromSysctl;
         }
     }
 #endif
diff --git a/src/coreclr/gc/vxsort/CMakeLists.txt b/src/coreclr/gc/vxsort/CMakeLists.txt
new file mode 100644
index 000000000000..fc55956832e3
--- /dev/null
+++ b/src/coreclr/gc/vxsort/CMakeLists.txt
@@ -0,0 +1,29 @@
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+include_directories("../env")
+
+if(CLR_CMAKE_HOST_UNIX)
+  set_source_files_properties(isa_detection.cpp PROPERTIES COMPILE_FLAGS -mavx2)
+  set_source_files_properties(do_vxsort_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
+  set_source_files_properties(do_vxsort_avx512.cpp PROPERTIES COMPILE_FLAGS -mavx2)
+  set_source_files_properties(machine_traits.avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
+  set_source_files_properties(smallsort/bitonic_sort.AVX2.int64_t.generated.cpp PROPERTIES COMPILE_FLAGS -mavx2)
+  set_source_files_properties(smallsort/bitonic_sort.AVX2.int32_t.generated.cpp PROPERTIES COMPILE_FLAGS -mavx2)
+  set_source_files_properties(smallsort/bitonic_sort.AVX512.int64_t.generated.cpp PROPERTIES COMPILE_FLAGS -mavx2)
+  set_source_files_properties(smallsort/bitonic_sort.AVX512.int32_t.generated.cpp PROPERTIES COMPILE_FLAGS -mavx2)
+  set_source_files_properties(smallsort/avx2_load_mask_tables.cpp PROPERTIES COMPILE_FLAGS -mavx2)
+endif(CLR_CMAKE_HOST_UNIX)
+
+set (VXSORT_SOURCES
+  isa_detection.cpp
+  do_vxsort_avx2.cpp
+  do_vxsort_avx512.cpp
+  machine_traits.avx2.cpp
+  smallsort/bitonic_sort.AVX2.int64_t.generated.cpp
+  smallsort/bitonic_sort.AVX2.int32_t.generated.cpp
+  smallsort/bitonic_sort.AVX512.int64_t.generated.cpp
+  smallsort/bitonic_sort.AVX512.int32_t.generated.cpp
+  smallsort/avx2_load_mask_tables.cpp
+  do_vxsort.h
+)
+
+add_library(gc_vxsort STATIC ${VXSORT_SOURCES})
diff --git a/src/coreclr/gc/vxsort/defs.h b/src/coreclr/gc/vxsort/defs.h
index 0cc72b23fa24..d6373a21ad26 100644
--- a/src/coreclr/gc/vxsort/defs.h
+++ b/src/coreclr/gc/vxsort/defs.h
@@ -45,45 +45,6 @@
 #define NOINLINE __attribute__((noinline))
 #endif
 
-namespace std {
-template <class _Ty>
-class numeric_limits {
-   public:
-    static constexpr _Ty Max() { static_assert(sizeof(_Ty) != sizeof(_Ty), "func must be specialized!"); return _Ty(); }
-    static constexpr _Ty Min() { static_assert(sizeof(_Ty) != sizeof(_Ty), "func must be specialized!"); return _Ty(); }
-};
-
-template <>
-class numeric_limits<int32_t> {
-public:
-    static constexpr int32_t Max() { return 0x7fffffff; }
-    static constexpr int32_t Min() { return -0x7fffffff - 1; }
-};
-
-template <>
-class numeric_limits<uint32_t> {
-public:
-    static constexpr uint32_t Max() { return 0xffffffff; }
-    static constexpr uint32_t Min() { return 0; }
-};
-
-template <>
-class numeric_limits<int64_t> {
-   public:
-    static constexpr int64_t Max() { return 0x7fffffffffffffffi64; }
-
-    static constexpr int64_t Min() { return -0x7fffffffffffffffi64 - 1; }
-};
-}  // namespace std
-
-#ifndef max
-template <typename T>
-T max(T a, T b) {
-    if (a > b)
-        return a;
-    else
-        return b;
-}
-#endif
-
+using std::max;
+using std::min;
 #endif  // VXSORT_DEFS_H
diff --git a/src/coreclr/gc/vxsort/machine_traits.avx2.h b/src/coreclr/gc/vxsort/machine_traits.avx2.h
index ccadc2a9a27a..7aca281e288e 100644
--- a/src/coreclr/gc/vxsort/machine_traits.avx2.h
+++ b/src/coreclr/gc/vxsort/machine_traits.avx2.h
@@ -13,6 +13,7 @@
 #include <immintrin.h>
 #include <assert.h>
 #include <inttypes.h>
+#include <type_traits>
 #include "defs.h"
 #include "machine_traits.h"
 
@@ -123,8 +124,7 @@ class vxsort_machine_traits<int64_t, AVX2> {
 
     template <int Shift>
     static constexpr bool can_pack(T span) {
-        const auto PACK_LIMIT = (((TU) std::numeric_limits<uint32_t>::Max() + 1)) << Shift;
-        return ((TU) span) < PACK_LIMIT;
+        return ((TU) span) < ((((TU) std::numeric_limits<uint32_t>::max() + 1)) << Shift);
     }
 
     static INLINE TV load_vec(TV* p) { return _mm256_lddqu_si256(p); }
diff --git a/src/coreclr/gc/vxsort/machine_traits.avx512.h b/src/coreclr/gc/vxsort/machine_traits.avx512.h
index 8df8660aa13a..78f59dee99a3 100644
--- a/src/coreclr/gc/vxsort/machine_traits.avx512.h
+++ b/src/coreclr/gc/vxsort/machine_traits.avx512.h
@@ -11,6 +11,7 @@
 #include "vxsort_targets_enable_avx512.h"
 
 #include <immintrin.h>
+#include <type_traits>
 #include "defs.h"
 #include "machine_traits.h"
 
@@ -92,8 +93,7 @@ class vxsort_machine_traits<int64_t, AVX512> {
 
     template <int Shift>
     static constexpr bool can_pack(T span) {
-        const auto PACK_LIMIT = (((TU) std::numeric_limits<uint32_t>::Max() + 1)) << Shift;
-        return ((TU) span) < PACK_LIMIT;
+        return ((TU) span) < ((((TU) std::numeric_limits<uint32_t>::max() + 1)) << Shift);
     }
 
     static INLINE TV load_vec(TV* p) { return _mm512_loadu_si512(p); }
diff --git a/src/coreclr/gc/vxsort/packer.h b/src/coreclr/gc/vxsort/packer.h
index be50b7d5fb41..94f293dac71f 100644
--- a/src/coreclr/gc/vxsort/packer.h
+++ b/src/coreclr/gc/vxsort/packer.h
@@ -56,7 +56,7 @@ class packer {
    public:
 
     static void pack(TFrom *mem, size_t len, TFrom base) {
-        TFrom offset = MT::template shift_n_sub<Shift>(base, (TFrom) std::numeric_limits<TTo>::Min());
+        TFrom offset = MT::template shift_n_sub<Shift>(base, (TFrom) std::numeric_limits<TTo>::min());
         auto baseVec = MT::broadcast(offset);
 
         auto pre_aligned_mem = reinterpret_cast<TFrom *>(reinterpret_cast<size_t>(mem) & ~ALIGN_MASK);
@@ -87,8 +87,8 @@ class packer {
 
         assert(AH::is_aligned(mem_read));
 
-        auto memv_read = (TV *) mem_read;
-        auto memv_write = (TV *) mem_write;
+        TV * memv_read = (TV *) mem_read;
+        TV * memv_write = (TV *) mem_write;
 
         auto lenv = len / N;
         len -= (lenv * N);
@@ -156,7 +156,7 @@ class packer {
 
 
     static void unpack(TTo *mem, size_t len, TFrom base) {
-        TFrom offset = MT::template shift_n_sub<Shift>(base, (TFrom) std::numeric_limits<TTo>::Min());
+        TFrom offset = MT::template shift_n_sub<Shift>(base, (TFrom) std::numeric_limits<TTo>::min());
         auto baseVec = MT::broadcast(offset);
 
         auto mem_read = mem + len;
@@ -184,8 +184,8 @@ class packer {
         assert(AH::is_aligned(mem_read));
 
         auto lenv = len / (N * 2);
-        auto memv_read = ((TV *) mem_read) - 1;
-        auto memv_write = ((TV *) mem_write) - 2;
+        TV * memv_read = ((TV *) mem_read) - 1;
+        TV * memv_write = ((TV *) mem_write) - 2;
         len -= lenv * N * 2;
 
         while (lenv >= Unroll) {
diff --git a/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.h b/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.h
index c3f141c1046b..c805a425fbea 100644
--- a/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.h
+++ b/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.h
@@ -39,7 +39,7 @@ extern "C" const uint8_t mask_table_8[M8_SIZE];
 
 template<> struct bitonic<int32_t, AVX2> {
     static const int N = 8;
-    static constexpr int32_t MAX = std::numeric_limits<int32_t>::Max();
+    static constexpr int32_t MAX = std::numeric_limits<int32_t>::max();
 public:
 
     static INLINE void sort_01v_ascending(__m256i& d01) {
diff --git a/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.h b/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.h
index a012161c99dd..c3403bbe31aa 100644
--- a/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.h
+++ b/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.h
@@ -39,7 +39,7 @@ extern "C" const uint8_t mask_table_8[M8_SIZE];
 
 template<> struct bitonic<int64_t, AVX2> {
     static const int N = 4;
-    static constexpr int64_t MAX = std::numeric_limits<int64_t>::Max();
+    static constexpr int64_t MAX = std::numeric_limits<int64_t>::max();
 public:
 
     static INLINE void sort_01v_ascending(__m256i& d01) {
diff --git a/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.h b/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.h
index 1326c8fee5e5..eb9ee4d27592 100644
--- a/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.h
+++ b/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.h
@@ -36,7 +36,7 @@ namespace vxsort {
 namespace smallsort {
 template<> struct bitonic<int32_t, AVX512> {
     static const int N = 16;
-    static constexpr int32_t MAX = std::numeric_limits<int32_t>::Max();
+    static constexpr int32_t MAX = std::numeric_limits<int32_t>::max();
 public:
 
     static INLINE void sort_01v_ascending(__m512i& d01) {
diff --git a/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.h b/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.h
index ac44992fe239..98fe507b7343 100644
--- a/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.h
+++ b/src/coreclr/gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.h
@@ -36,7 +36,7 @@ namespace vxsort {
 namespace smallsort {
 template<> struct bitonic<int64_t, AVX512> {
     static const int N = 8;
-    static constexpr int64_t MAX = std::numeric_limits<int64_t>::Max();
+    static constexpr int64_t MAX = std::numeric_limits<int64_t>::max();
 public:
 
     static INLINE void sort_01v_ascending(__m512i& d01) {
diff --git a/src/coreclr/gc/vxsort/smallsort/codegen/avx2.py b/src/coreclr/gc/vxsort/smallsort/codegen/avx2.py
index 9944cbbc8968..b9c39770d549 100644
--- a/src/coreclr/gc/vxsort/smallsort/codegen/avx2.py
+++ b/src/coreclr/gc/vxsort/smallsort/codegen/avx2.py
@@ -303,7 +303,7 @@ def generate_prologue(self, f):
 
 template<> struct bitonic<{t}, AVX2> {{
     static const int N = {self.vector_size()};
-    static constexpr {t} MAX = std::numeric_limits<{t}>::Max();
+    static constexpr {t} MAX = std::numeric_limits<{t}>::max();
 public:
 """
         print(s, file=f)
diff --git a/src/coreclr/gc/vxsort/smallsort/codegen/avx512.py b/src/coreclr/gc/vxsort/smallsort/codegen/avx512.py
index e259027c5636..9b417723c6e3 100644
--- a/src/coreclr/gc/vxsort/smallsort/codegen/avx512.py
+++ b/src/coreclr/gc/vxsort/smallsort/codegen/avx512.py
@@ -299,7 +299,7 @@ def generate_prologue(self, f):
 namespace smallsort {{
 template<> struct bitonic<{t}, AVX512> {{
     static const int N = {self.vector_size()};
-    static constexpr {t} MAX = std::numeric_limits<{t}>::Max();
+    static constexpr {t} MAX = std::numeric_limits<{t}>::max();
 public:
 """
         print(s, file=f)
diff --git a/src/coreclr/gc/vxsort/vxsort.h b/src/coreclr/gc/vxsort/vxsort.h
index b8eaac51f421..ace20c10734f 100644
--- a/src/coreclr/gc/vxsort/vxsort.h
+++ b/src/coreclr/gc/vxsort/vxsort.h
@@ -13,10 +13,11 @@
 #endif
 #endif
 
-
 #include <assert.h>
 #include <immintrin.h>
 
+#include <minipal/utils.h>
+
 #include "defs.h"
 #include "alignment.h"
 #include "machine_traits.h"
@@ -374,7 +375,7 @@ class vxsort {
         auto pivot = *right;
         // We do this here just in case we need to pre-align to the right
         // We end up
-        *right = std::numeric_limits<T>::Max();
+        *right = std::numeric_limits<T>::max();
 
         // Broadcast the selected pivot
         const TV P = MT::broadcast(pivot);
@@ -421,16 +422,16 @@ class vxsort {
 
         // From now on, we are fully aligned
         // and all reading is done in full vector units
-        auto readLeftV = (TV*) readLeft;
-        auto readRightV = (TV*) readRight;
+        TV* readLeftV = (TV*) readLeft;
+        TV* readRightV = (TV*) readRight;
         #ifndef NDEBUG
         readLeft = nullptr;
         readRight = nullptr;
         #endif
 
         for (auto u = 0; u < InnerUnroll; u++) {
-            auto dl = MT::load_vec(readLeftV + u);
-            auto dr = MT::load_vec(readRightV - (u + 1));
+            TV dl = MT::load_vec(readLeftV + u);
+            TV dr = MT::load_vec(readRightV - (u + 1));
             partition_block(dl, P, tmpLeft, tmpRight);
             partition_block(dr, P, tmpLeft, tmpRight);
         }
@@ -458,31 +459,53 @@ class vxsort {
 
             switch (InnerUnroll) {
                 case 12: d12 = MT::load_vec(nextPtr + InnerUnroll - 12);
+                FALLTHROUGH;
                 case 11: d11 = MT::load_vec(nextPtr + InnerUnroll - 11);
+                FALLTHROUGH;
                 case 10: d10 = MT::load_vec(nextPtr + InnerUnroll - 10);
+                FALLTHROUGH;
                 case  9: d09 = MT::load_vec(nextPtr + InnerUnroll -  9);
+                FALLTHROUGH;
                 case  8: d08 = MT::load_vec(nextPtr + InnerUnroll -  8);
+                FALLTHROUGH;
                 case  7: d07 = MT::load_vec(nextPtr + InnerUnroll -  7);
+                FALLTHROUGH;
                 case  6: d06 = MT::load_vec(nextPtr + InnerUnroll -  6);
+                FALLTHROUGH;
                 case  5: d05 = MT::load_vec(nextPtr + InnerUnroll -  5);
+                FALLTHROUGH;
                 case  4: d04 = MT::load_vec(nextPtr + InnerUnroll -  4);
+                FALLTHROUGH;
                 case  3: d03 = MT::load_vec(nextPtr + InnerUnroll -  3);
+                FALLTHROUGH;
                 case  2: d02 = MT::load_vec(nextPtr + InnerUnroll -  2);
+                FALLTHROUGH;
                 case  1: d01 = MT::load_vec(nextPtr + InnerUnroll -  1);
             }
 
             switch (InnerUnroll) {
                 case 12: partition_block(d12, P, writeLeft, writeRight);
+                FALLTHROUGH;
                 case 11: partition_block(d11, P, writeLeft, writeRight);
+                FALLTHROUGH;
                 case 10: partition_block(d10, P, writeLeft, writeRight);
+                FALLTHROUGH;
                 case  9: partition_block(d09, P, writeLeft, writeRight);
+                FALLTHROUGH;
                 case  8: partition_block(d08, P, writeLeft, writeRight);
+                FALLTHROUGH;
                 case  7: partition_block(d07, P, writeLeft, writeRight);
+                FALLTHROUGH;
                 case  6: partition_block(d06, P, writeLeft, writeRight);
+                FALLTHROUGH;
                 case  5: partition_block(d05, P, writeLeft, writeRight);
+                FALLTHROUGH;
                 case  4: partition_block(d04, P, writeLeft, writeRight);
+                FALLTHROUGH;
                 case  3: partition_block(d03, P, writeLeft, writeRight);
+                FALLTHROUGH;
                 case  2: partition_block(d02, P, writeLeft, writeRight);
+                FALLTHROUGH;
                 case  1: partition_block(d01, P, writeLeft, writeRight);
               }
           }
@@ -499,7 +522,7 @@ class vxsort {
                 readLeftV += 1;
             }
 
-            auto d = MT::load_vec(nextPtr);
+            TV d = MT::load_vec(nextPtr);
             partition_block(d, P, writeLeft, writeRight);
             //partition_block_without_compress(d, P, writeLeft, writeRight);
         }
@@ -534,8 +557,8 @@ class vxsort {
         const auto rightAlign = hint.right_align;
         const auto rai = ~((rightAlign - 1) >> 31);
         const auto lai = leftAlign >> 31;
-        const auto preAlignedLeft  = (TV*) (left + leftAlign);
-        const auto preAlignedRight = (TV*) (right + rightAlign - N);
+        TV* const  preAlignedLeft  = (TV*) (left + leftAlign);
+        TV* const  preAlignedRight = (TV*) (right + rightAlign - N);
 
 #ifdef VXSORT_STATS
         vxsort_stats<T>::bump_vec_loads(2);
@@ -554,8 +577,8 @@ class vxsort {
         //       were actually needed to be written to the right hand side
         //    e) We write the right portion of the left vector to the right side
         //       now that its write position has been updated
-        auto RT0 = MT::load_vec(preAlignedRight);
-        auto LT0 = MT::load_vec(preAlignedLeft);
+        TV RT0 = MT::load_vec(preAlignedRight);
+        TV LT0 = MT::load_vec(preAlignedLeft);
         auto rtMask = MT::get_cmpgt_mask(RT0, P);
         auto ltMask = MT::get_cmpgt_mask(LT0, P);
         const auto rtPopCountRightPart = max(_mm_popcnt_u32(rtMask), rightAlign);
@@ -617,8 +640,8 @@ class vxsort {
      *        larger-than than all values contained within the provided array.
      */
     NOINLINE void sort(T* left, T* right,
-                       T left_hint = std::numeric_limits<T>::Min(),
-                       T right_hint = std::numeric_limits<T>::Max())
+                       T left_hint = std::numeric_limits<T>::min(),
+                       T right_hint = std::numeric_limits<T>::max())
     {
 //        init_isa_detection();
 
diff --git a/src/coreclr/gc/windows/gcenv.windows.cpp b/src/coreclr/gc/windows/gcenv.windows.cpp
index 0aae8e035bbb..608751dd169a 100644
--- a/src/coreclr/gc/windows/gcenv.windows.cpp
+++ b/src/coreclr/gc/windows/gcenv.windows.cpp
@@ -290,8 +290,8 @@ static size_t GetRestrictedPhysicalMemoryLimit()
                 (job_process_memory_limit != (size_t)UINTPTR_MAX) ||
                 (job_workingset_limit != (size_t)UINTPTR_MAX))
             {
-                job_physical_memory_limit = min (job_memory_limit, job_process_memory_limit);
-                job_physical_memory_limit = min (job_physical_memory_limit, job_workingset_limit);
+                job_physical_memory_limit = std::min (job_memory_limit, job_process_memory_limit);
+                job_physical_memory_limit = std::min (job_physical_memory_limit, job_workingset_limit);
 
                 MEMORYSTATUSEX ms;
                 ::GetProcessMemoryLoad(&ms);
@@ -299,7 +299,7 @@ static size_t GetRestrictedPhysicalMemoryLimit()
                 total_physical = ms.ullAvailPhys;
 
                 // A sanity check in case someone set a larger limit than there is actual physical memory.
-                job_physical_memory_limit = (size_t) min (job_physical_memory_limit, ms.ullTotalPhys);
+                job_physical_memory_limit = (size_t) std::min (job_physical_memory_limit, (size_t)ms.ullTotalPhys);
             }
         }
     }
@@ -1139,7 +1139,7 @@ bool GCToOSInterface::GetNumaInfo(uint16_t* total_nodes, uint32_t* max_procs_per
                     mask &= mask - 1;
                 }
 
-                currentProcsOnNode = max(currentProcsOnNode, procsOnNode);
+                currentProcsOnNode = std::max(currentProcsOnNode, procsOnNode);
             }
             *max_procs_per_node = currentProcsOnNode;
             *total_nodes = (uint16_t)g_nNodes;
@@ -1163,7 +1163,7 @@ bool GCToOSInterface::GetCPUGroupInfo(uint16_t* total_groups, uint32_t* max_proc
         DWORD currentProcsInGroup = 0;
         for (WORD i = 0; i < g_nGroups; i++)
         {
-            currentProcsInGroup = max(currentProcsInGroup, g_CPUGroupInfoArray[i].nr_active);
+            currentProcsInGroup = std::max(currentProcsInGroup, (DWORD)g_CPUGroupInfoArray[i].nr_active);
         }
         *max_procs_per_group = currentProcsInGroup;
         return true;
diff --git a/src/coreclr/gcinfo/CMakeLists.txt b/src/coreclr/gcinfo/CMakeLists.txt
index 3885cc14a0a4..cdc4ae794c8e 100644
--- a/src/coreclr/gcinfo/CMakeLists.txt
+++ b/src/coreclr/gcinfo/CMakeLists.txt
@@ -77,8 +77,10 @@ if (CLR_CMAKE_TARGET_ARCH_RISCV64)
   create_gcinfo_lib(TARGET gcinfo_unix_riscv64 OS unix ARCH riscv64)
 endif (CLR_CMAKE_TARGET_ARCH_RISCV64)
 
-create_gcinfo_lib(TARGET gcinfo_universal_arm OS universal ARCH arm)
-create_gcinfo_lib(TARGET gcinfo_win_x86 OS win ARCH x86)
+if (NOT CLR_CMAKE_TARGET_ARCH_RISCV64)
+  create_gcinfo_lib(TARGET gcinfo_universal_arm OS universal ARCH arm)
+  create_gcinfo_lib(TARGET gcinfo_win_x86 OS win ARCH x86)
+endif (NOT CLR_CMAKE_TARGET_ARCH_RISCV64)
 
 if (CLR_CMAKE_TARGET_ARCH_I386 AND CLR_CMAKE_TARGET_UNIX)
   create_gcinfo_lib(TARGET gcinfo_unix_x86 OS unix ARCH x86)
diff --git a/src/coreclr/hosts/coreshim/CoreShim.h b/src/coreclr/hosts/coreshim/CoreShim.h
index 97b630bdb9e1..9be052926ec5 100644
--- a/src/coreclr/hosts/coreshim/CoreShim.h
+++ b/src/coreclr/hosts/coreshim/CoreShim.h
@@ -5,7 +5,6 @@
 #define _CORESHIM_H_
 
 // Platform
-#define NOMINMAX
 #include <Windows.h>
 #include <combaseapi.h>
 
diff --git a/src/coreclr/ilasm/asmparse.y b/src/coreclr/ilasm/asmparse.y
index 73ef9a892b5e..c9861d58d797 100644
--- a/src/coreclr/ilasm/asmparse.y
+++ b/src/coreclr/ilasm/asmparse.y
@@ -486,7 +486,7 @@ typarAttrib             : '+'                               { $$ = gpCovariant;
                         | '-'                               { $$ = gpContravariant; }
                         | CLASS_                            { $$ = gpReferenceTypeConstraint; }
                         | VALUETYPE_                        { $$ = gpNotNullableValueTypeConstraint; }
-                        | BYREFLIKE_                        { $$ = gpAcceptByRefLike; }
+                        | BYREFLIKE_                        { $$ = gpAllowByRefLike; }
                         | _CTOR                             { $$ = gpDefaultConstructorConstraint; }
                         | FLAGS_ '(' int32 ')'              { $$ = (CorGenericParamAttr)$3; }
                         ;
diff --git a/src/coreclr/ilasm/main.cpp b/src/coreclr/ilasm/main.cpp
index 838f05aa996c..0fe683838d89 100644
--- a/src/coreclr/ilasm/main.cpp
+++ b/src/coreclr/ilasm/main.cpp
@@ -531,7 +531,8 @@ extern "C" int _cdecl wmain(int argc, _In_ WCHAR **argv)
                     else
                     {
                     InvalidOption:
-                        fprintf(stderr, "Error : Invalid Option: %LS\n", argv[i]);
+                        MAKE_UTF8PTR_FROMWIDE_NOTHROW(invalidOpt, argv[i]);
+                        fprintf(stderr, "Error : Invalid Option: %s\n", invalidOpt);
                         goto ErrorExit;
                     }
                 }
diff --git a/src/coreclr/ilasm/prebuilt/asmparse.cpp b/src/coreclr/ilasm/prebuilt/asmparse.cpp
index 6bf91f56c57f..08f686f29018 100644
--- a/src/coreclr/ilasm/prebuilt/asmparse.cpp
+++ b/src/coreclr/ilasm/prebuilt/asmparse.cpp
@@ -2523,7 +2523,7 @@ case 152:
 { yyval.int32 = gpNotNullableValueTypeConstraint; } break;
 case 153:
 #line 489 "asmparse.y"
-{ yyval.int32 = gpAcceptByRefLike; } break;
+{ yyval.int32 = gpAllowByRefLike; } break;
 case 154:
 #line 490 "asmparse.y"
 { yyval.int32 = gpDefaultConstructorConstraint; } break;
diff --git a/src/coreclr/ildasm/dasm.cpp b/src/coreclr/ildasm/dasm.cpp
index 860b36f2e936..da3aa514c0dc 100644
--- a/src/coreclr/ildasm/dasm.cpp
+++ b/src/coreclr/ildasm/dasm.cpp
@@ -1914,7 +1914,7 @@ BYTE* PrettyPrintCABlobValue(PCCOR_SIGNATURE &typePtr,
                 for(n=0; n < numElements; n++)
                 {
                     if(n) appendStr(out," ");
-                    _gcvt_s(str,64,*((float*)dataPtr), 8);
+                    sprintf_s(str, 64, "%.*g", 8, (double)(*((float*)dataPtr)));
                     float df = (float)atof(str);
                     // Must compare as underlying bytes, not floating point otherwise optimizer will
                     // try to enregister and compare 80-bit precision number with 32-bit precision number!!!!
@@ -1933,7 +1933,7 @@ BYTE* PrettyPrintCABlobValue(PCCOR_SIGNATURE &typePtr,
                 {
                     if(n) appendStr(out," ");
                     char *pch;
-                    _gcvt_s(str,64,*((double*)dataPtr), 17);
+                    sprintf_s(str, 64, "%.*g", 17, *((double*)dataPtr));
                     double df = strtod(str, &pch);
                     // Must compare as underlying bytes, not floating point otherwise optimizer will
                     // try to enregister and compare 80-bit precision number with 64-bit precision number!!!!
@@ -2605,7 +2605,7 @@ void DumpDefaultValue(mdToken tok, __inout __nullterminated char* szString, void
         case ELEMENT_TYPE_R4:
             {
                 char szf[32];
-                _gcvt_s(szf,32,MDDV.m_fltValue, 8);
+                sprintf_s(szf, 32, "%.*g", 8, (double)MDDV.m_fltValue);
                 float df = (float)atof(szf);
                 // Must compare as underlying bytes, not floating point otherwise optimizer will
                 // try to enregister and compare 80-bit precision number with 32-bit precision number!!!!
@@ -2619,7 +2619,7 @@ void DumpDefaultValue(mdToken tok, __inout __nullterminated char* szString, void
         case ELEMENT_TYPE_R8:
             {
                 char szf[32], *pch;
-                _gcvt_s(szf,32,MDDV.m_dblValue, 17);
+                sprintf_s(szf, 32, "%.*g", 17, MDDV.m_dblValue);
                 double df = strtod(szf, &pch); //atof(szf);
                 szf[31]=0;
                 // Must compare as underlying bytes, not floating point otherwise optimizer will
@@ -3081,7 +3081,7 @@ char *DumpGenericPars(_Inout_updates_(SZSTRING_SIZE) char* szString, mdToken tok
         if ((attr & gpNotNullableValueTypeConstraint) != 0)
             szptr += sprintf_s(szptr,SZSTRING_REMAINING_SIZE(szptr), "valuetype ");
         CHECK_REMAINING_SIZE;
-        if ((attr & gpAcceptByRefLike) != 0)
+        if ((attr & gpAllowByRefLike) != 0)
             szptr += sprintf_s(szptr,SZSTRING_REMAINING_SIZE(szptr), "byreflike ");
         CHECK_REMAINING_SIZE;
         if ((attr & gpDefaultConstructorConstraint) != 0)
@@ -7352,9 +7352,14 @@ void CloseNamespace(__inout __nullterminated char* szString)
 
 FILE* OpenOutput(_In_ __nullterminated const WCHAR* wzFileName)
 {
+#ifdef HOST_WINDOWS
     FILE*   pfile = NULL;
         if(g_uCodePage == 0xFFFFFFFF) _wfopen_s(&pfile,wzFileName,W("wb"));
         else _wfopen_s(&pfile,wzFileName,W("wt"));
+#else
+    FILE*   pfile = NULL;
+    _wfopen_s(&pfile,wzFileName,W("w"));
+#endif
 
     if(pfile)
     {
diff --git a/src/coreclr/ildasm/dis.cpp b/src/coreclr/ildasm/dis.cpp
index 58c86e0e9ae7..2ad1ecd2d200 100644
--- a/src/coreclr/ildasm/dis.cpp
+++ b/src/coreclr/ildasm/dis.cpp
@@ -1113,14 +1113,19 @@ BOOL Disassemble(IMDInternalImport *pImport, BYTE *ILHeader, void *GUICookie, md
                         {
                             if(pFile) fclose(pFile);
                             pFile = NULL;
-                            if(fopen_s(&pFile,szFileName,"rt") != 0)
+#ifdef HOST_WINDOWS
+                            const char* const mode = "rt";
+#else
+                            const char* const mode = "r";
+#endif
+                            if(fopen_s(&pFile,szFileName, mode) != 0)
                             {
                                 char* pch = strrchr(szFileName, DIRECTORY_SEPARATOR_CHAR_A);
 #ifdef HOST_WINDOWS
                                 if(pch == NULL) pch = strrchr(szFileName,':');
 #endif
                                 pFile = NULL;
-                                if(pch) fopen_s(&pFile,pch+1,"rt");
+                                if(pch) fopen_s(&pFile,pch+1, mode);
                             }
                             if(bIsNewFile)
                             {
@@ -1568,7 +1573,7 @@ BOOL Disassemble(IMDInternalImport *pImport, BYTE *ILHeader, void *GUICookie, md
                 if(f==0.0)
                     strcpy_s(szf,32,((v>>24)==0)? "0.0" : "-0.0");
                 else
-                    _gcvt_s(szf,32,(double)f, 8);
+                    sprintf_s(szf, 32, "%.*g", 8, (double)f);
                 float fd = (float)atof(szf);
                 // Must compare as underlying bytes, not floating point otherwise optimizer will
                 // try to enregister and compare 80-bit precision number with 32-bit precision number!!!!
@@ -1607,7 +1612,7 @@ BOOL Disassemble(IMDInternalImport *pImport, BYTE *ILHeader, void *GUICookie, md
                 if(d==0.0)
                     strcpy_s(szf,32,((v>>56)==0)? "0.0" : "-0.0");
                 else
-                    _gcvt_s(szf,32,d, 17);
+                    sprintf_s(szf, 32, "%.*g", 17, d);
                 double df = strtod(szf, &pch); //atof(szf);
                 // Must compare as underlying bytes, not floating point otherwise optimizer will
                 // try to enregister and compare 80-bit precision number with 64-bit precision number!!!!
diff --git a/src/coreclr/ildasm/ildasmpch.h b/src/coreclr/ildasm/ildasmpch.h
index 9d89ba46db52..5bb192dd14e1 100644
--- a/src/coreclr/ildasm/ildasmpch.h
+++ b/src/coreclr/ildasm/ildasmpch.h
@@ -12,6 +12,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
+#include <algorithm>
+
+using std::min;
+using std::max;
 
 #ifndef Debug_ReportError
 #define Debug_ReportError(strMessage)
diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def
index 3bccb73e03a5..7f94e9e0996a 100644
--- a/src/coreclr/inc/CrstTypes.def
+++ b/src/coreclr/inc/CrstTypes.def
@@ -367,7 +367,7 @@ Crst PendingTypeLoadEntry
                    DomainLocalBlock Exception ExecuteManRangeLock FuncPtrStubs
                    FusionAppCtx GlobalStrLiteralMap HandleTable IbcProfile
                    IJWFixupData IJWHash ISymUnmanagedReader Jit JumpStubCache LoaderHeap
-                   Module ModuleLookupTable PEImage SecurityStackwalkCache
+                   Module ModuleLookupTable PEImage
                    SigConvert SingleUseLock StubDispatchCache StubUnwindInfoHeapSegments
                    SyncBlockCache SystemDomain ThreadIdDispenser ThreadStore TypeIDMap UnresolvedClassLock
     SameLevelAs PendingTypeLoadEntry
@@ -426,9 +426,6 @@ End
 Crst SaveModuleProfileData
 End
 
-Crst SecurityStackwalkCache
-End
-
 Crst SigConvert
     AcquiredBefore LoaderHeap
 End
diff --git a/src/coreclr/inc/allocacheck.h b/src/coreclr/inc/allocacheck.h
index ea7e6df316f0..1c4f0a584971 100644
--- a/src/coreclr/inc/allocacheck.h
+++ b/src/coreclr/inc/allocacheck.h
@@ -23,7 +23,16 @@
 
 #ifndef AllocaCheck_h
 #define AllocaCheck_h
-#include <malloc.h>			// for alloca itself
+
+#if defined(HOST_WINDOWS)
+#include <malloc.h> // for alloca itself
+#else
+#if defined(__has_include)
+#if __has_include(<alloca.h>)
+#include <alloca.h>
+#endif // __has_include(alloca.h)
+#endif // defined(__has_include)
+#endif // defined(HOST_WINDOWS)
 
 #if defined(assert) && !defined(_ASSERTE)
 #define _ASSERTE assert
diff --git a/src/coreclr/inc/bitvector.h b/src/coreclr/inc/bitvector.h
index df06b4c75c66..0f17697dddce 100644
--- a/src/coreclr/inc/bitvector.h
+++ b/src/coreclr/inc/bitvector.h
@@ -32,7 +32,9 @@
 #define UNDEF_ASSERTE
 #endif
 
+#ifndef FEATURE_NATIVEAOT
 #define USE_BITVECTOR 1
+#endif
 #if USE_BITVECTOR
 
 /* The bitvector class is meant to be a drop in replacement for an integer
diff --git a/src/coreclr/inc/check.h b/src/coreclr/inc/check.h
index c1ac08016d83..30ea0fdaf4d8 100644
--- a/src/coreclr/inc/check.h
+++ b/src/coreclr/inc/check.h
@@ -111,7 +111,7 @@ class CHECK
 #ifdef _DEBUG
               , m_condition (NULL)
               , m_file(NULL)
-              , m_line(NULL)
+              , m_line(0)
               , m_pCount(NULL)
 #endif
     {}
@@ -684,6 +684,9 @@ CHECK CheckAligned(UINT value, UINT alignment);
 CHECK CheckAligned(ULONG value, UINT alignment);
 #endif
 CHECK CheckAligned(UINT64 value, UINT alignment);
+#ifdef __APPLE__
+CHECK CheckAligned(SIZE_T value, UINT alignment);
+#endif
 CHECK CheckAligned(const void *address, UINT alignment);
 
 CHECK CheckOverflow(UINT value1, UINT value2);
@@ -691,6 +694,9 @@ CHECK CheckOverflow(UINT value1, UINT value2);
 CHECK CheckOverflow(ULONG value1, ULONG value2);
 #endif
 CHECK CheckOverflow(UINT64 value1, UINT64 value2);
+#ifdef __APPLE__
+CHECK CheckOverflow(SIZE_T value1, SIZE_T value2);
+#endif
 CHECK CheckOverflow(PTR_CVOID address, UINT offset);
 #if defined(_MSC_VER)
 CHECK CheckOverflow(const void *address, ULONG offset);
@@ -702,11 +708,17 @@ CHECK CheckUnderflow(UINT value1, UINT value2);
 CHECK CheckUnderflow(ULONG value1, ULONG value2);
 #endif
 CHECK CheckUnderflow(UINT64 value1, UINT64 value2);
+#ifdef __APPLE__
+CHECK CheckUnderflow(SIZE_T value1, SIZE_T value2);
+#endif
 CHECK CheckUnderflow(const void *address, UINT offset);
 #if defined(_MSC_VER)
 CHECK CheckUnderflow(const void *address, ULONG offset);
 #endif
 CHECK CheckUnderflow(const void *address, UINT64 offset);
+#ifdef __APPLE__
+CHECK CheckUnderflow(const void *address, SIZE_T offset);
+#endif
 CHECK CheckUnderflow(const void *address, void *address2);
 
 CHECK CheckZeroedMemory(const void *memory, SIZE_T size);
diff --git a/src/coreclr/inc/check.inl b/src/coreclr/inc/check.inl
index 9296c48f7a7a..34a2956d1be6 100644
--- a/src/coreclr/inc/check.inl
+++ b/src/coreclr/inc/check.inl
@@ -156,6 +156,15 @@ inline CHECK CheckAligned(UINT64 value, UINT alignment)
     CHECK_OK;
 }
 
+#ifdef __APPLE__
+inline CHECK CheckAligned(SIZE_T value, UINT alignment)
+{
+    STATIC_CONTRACT_WRAPPER;
+    CHECK(AlignmentTrim(value, alignment) == 0);
+    CHECK_OK;
+}
+#endif
+
 inline CHECK CheckAligned(const void *address, UINT alignment)
 {
     STATIC_CONTRACT_WRAPPER;
@@ -183,6 +192,14 @@ inline CHECK CheckOverflow(UINT64 value1, UINT64 value2)
     CHECK_OK;
 }
 
+#ifdef __APPLE__
+inline CHECK CheckOverflow(SIZE_T value1, SIZE_T value2)
+{
+    CHECK(value1 + value2 >= value1);
+    CHECK_OK;
+}
+#endif
+
 inline CHECK CheckOverflow(PTR_CVOID address, UINT offset)
 {
     TADDR targetAddr = dac_cast<TADDR>(address);
@@ -254,6 +271,15 @@ inline CHECK CheckUnderflow(UINT64 value1, UINT64 value2)
     CHECK_OK;
 }
 
+#ifdef __APPLE__
+inline CHECK CheckUnderflow(SIZE_T value1, SIZE_T value2)
+{
+    CHECK(value1 - value2 <= value1);
+
+    CHECK_OK;
+}
+#endif
+
 inline CHECK CheckUnderflow(const void *address, UINT offset)
 {
 #if POINTER_BITS == 32
@@ -290,6 +316,20 @@ inline CHECK CheckUnderflow(const void *address, UINT64 offset)
     CHECK_OK;
 }
 
+#ifdef __APPLE__
+inline CHECK CheckUnderflow(const void *address, SIZE_T offset)
+{
+#if POINTER_BITS == 32
+    CHECK(offset >> 32 == 0);
+    CHECK((UINT) (SIZE_T) address - (UINT) offset <= (UINT) (SIZE_T) address);
+#else
+    CHECK((UINT64) address - offset <= (UINT64) address);
+#endif
+
+    CHECK_OK;
+}
+#endif
+
 inline CHECK CheckUnderflow(const void *address, void *address2)
 {
 #if POINTER_BITS == 32
diff --git a/src/coreclr/inc/clr_std/algorithm b/src/coreclr/inc/clr_std/algorithm
deleted file mode 100644
index ebd21b09c5e5..000000000000
--- a/src/coreclr/inc/clr_std/algorithm
+++ /dev/null
@@ -1,118 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-//
-// clr_std/algorithm
-//
-// Copy of some key Standard Template Library functionality
-
-#ifdef _MSC_VER
-#pragma once
-#endif
-
-#ifdef USE_STL
-#include <algorithm>
-#else
-#ifndef __clr_std_algorithm_h__
-#define __clr_std_algorithm_h__
-
-namespace std
-{
-    template<class iter, class CompareFunc>
-    iter find_if ( iter first, iter last, CompareFunc comp )
-    {
-        for ( ; first!=last ; first++ )
-            if ( comp(*first) )
-                break;
-        return first;
-    }
-
-    template<class iter, class T>
-    iter find(iter first, iter last, const T& val)
-    {
-        for (;first != last; first++)
-        {
-            if (*first == val)
-                break;
-        }
-        return first;
-    }
-
-    template <class iter, class comp>
-    iter qsort_partition( iter first, iter last, iter pivot, comp compare )
-    {
-        iter lastMinusOne = last - 1;
-        swap(pivot, lastMinusOne);
-
-        // Pivot is at end
-        pivot = last - 1;
-
-        iter partitionLoc = first;
-
-        for (iter partitionWalk = first; partitionWalk != pivot; ++partitionWalk)
-        {
-            if (compare(*partitionWalk, *pivot))
-            {
-                swap(*partitionWalk, *partitionLoc);
-                partitionLoc++;
-            }
-        }
-        swap(*pivot, *partitionLoc);
-
-        return partitionLoc;
-    }
-
-    template <class iter, class comp>
-    void sort_worker ( iter first, iter last, comp compare )
-    {
-        typename iter::difference_type RangeSize = last - first;
-
-        // When down to a list of size 1, be done
-        if (RangeSize < 2)
-            return;
-
-        // Pick pivot
-
-        // Use simple pick middle algorithm
-        iter pivotLoc = first + (RangeSize / 2);
-
-        // Partition
-        pivotLoc = qsort_partition(first, last, pivotLoc, compare);
-
-        // Sort first array
-        sort_worker(first, pivotLoc, compare);
-
-        // Sort second array
-        sort_worker(pivotLoc + 1, last, compare);
-    }
-
-    template <class iter, class comp>
-    void sort ( iter first, iter last, comp compare )
-    {
-        sort_worker(first, last, compare);
-        if (first != last)
-        {
-            for (iter i = first; i < (last - 1); i++)
-            {
-                // Assert that the sort function works.
-                assert(!compare(*(i+1), *i));
-            }
-        }
-    }
-
-    template<class InIter, class OutIter, class Fn1>
-    OutIter transform( InIter first, InIter last, OutIter dest, Fn1 func )
-    {
-        for ( ; first!=last ; ++first, ++dest )
-            *dest = func(*first);
-        return dest;
-    }
-
-} // namespace std
-
-#endif /* __clr_std_algorithm_h__ */
-
-#endif // !USE_STL
-
-// Help the VIM editor figure out what kind of file this no-extension file is.
-// vim: filetype=cpp
diff --git a/src/coreclr/inc/clr_std/string b/src/coreclr/inc/clr_std/string
deleted file mode 100644
index 59ac67b98653..000000000000
--- a/src/coreclr/inc/clr_std/string
+++ /dev/null
@@ -1,425 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-//
-// clr_std/string
-//
-// Copy of some key Standard Template Library functionality
-//
-// This was created for use with SuperPMI. It has the minimal functionality needed by SuperPMI. It hasn't
-// been tested elsewhere.
-
-#ifdef _MSC_VER
-#pragma once
-#endif
-
-#ifdef USE_STL
-#include <string>
-#else
-#ifndef __clr_std_string_h__
-#define __clr_std_string_h__
-
-#include "clr_std/vector"
-
-namespace std
-{
-
-template<class T>
-class basic_string
-{
-public:
-    typedef T value_type;
-    typedef size_t size_type;
-    typedef typename vector<T>::iterator iterator;
-    typedef typename vector<T>::const_iterator const_iterator;
-
-    basic_string()
-        : m_string(1) // start with a string of length 1 for null terminator
-    {
-        m_string[0] = T();
-    }
-
-    basic_string(const basic_string<T>& _Right)
-    {
-        assign(_Right);
-    }
-
-    // Initialize a string with _Count characters from the string pointed at by _Ptr.
-    // If you want to include the trailing null character, _Count needs to include that.
-    basic_string(const value_type* _Ptr, size_type _Count)
-        : m_string(_Count + 1) // add 1 for a null terminator
-    {
-        copy(_Ptr, _Count);
-    }
-
-    basic_string(const value_type* _Ptr) : basic_string(_Ptr, c_len(_Ptr))
-    {
-    }
-
-    void reserve(size_t newcapacity)
-    {
-        m_string.reserve(newcapacity + 1); // add 1 for the null terminator
-    }
-
-    //
-    // Assignment
-    //
-
-    basic_string<T>& operator=(const basic_string<T>& _Right)
-    {
-        if (this != &_Right)
-        {
-            assign(_Right);
-        }
-        return (*this);
-    }
-
-    basic_string<T>& assign(const basic_string<T>& _Right)
-    {
-        m_string.resize(_Right.size() + 1); // +1 for null terminator
-        copy(_Right);
-        return (*this);
-    }
-
-    //
-    // Basic data copying
-    //
-
-    void copy(const basic_string<T>& _Right)
-    {
-        assert(size() >= _Right.size());
-        size_type i;
-        for (i = 0; i < _Right.size(); i++)
-        {
-            m_string[i] = _Right.m_string[i];
-        }
-        m_string[i] = T();
-    }
-
-    void copy(const value_type* _Ptr, size_type _Count)
-    {
-        assert(size() >= _Count);
-        size_type i;
-        for (i = 0; i < _Count; i++)
-        {
-            m_string[i] = _Ptr[i];
-        }
-        m_string[i] = T();
-    }
-
-    //
-    // Appending
-    //
-
-    // Append a C-style string to the string.
-    basic_string<T>& operator+=(const value_type* _Ptr)
-    {
-        size_type oldsize = size();         // doesn't include null terminator
-        size_type addsize = c_len(_Ptr);    // doesn't include null terminator
-        size_type newsize = oldsize + addsize + 1;
-        m_string.resize(newsize);
-        size_type i;
-        for (i = oldsize; i < newsize - 1; i++)
-        {
-            m_string[i] = *_Ptr++;
-        }
-        m_string[i] = T();
-        return (*this);
-    }
-
-    basic_string<T>& operator+=(const basic_string<T>& _Right)
-    {
-        size_type oldsize = size();         // doesn't include null terminator
-        size_type addsize = _Right.size();  // doesn't include null terminator
-        size_type newsize = oldsize + addsize + 1;
-        m_string.resize(newsize);
-        size_type new_index = oldsize, right_index = 0;
-        while (right_index < addsize)
-        {
-            m_string[new_index] = _Right.m_string[right_index];
-            ++new_index;
-            ++right_index;
-        }
-        m_string[new_index] = T();
-        return (*this);
-    }
-
-    basic_string<T>& operator+=(value_type _Ch)
-    {
-        size_type oldsize = size();         // doesn't include null terminator
-        m_string[oldsize] = _Ch; // Replace the null terminator with the new symbol.
-        m_string.push_back(T()); // Return the replaced terminator again.
-        return (*this);
-    }
-
-    ~basic_string()
-    {
-        // vector destructor does all the work
-    }
-
-    size_t size() const
-    {
-        assert(m_string.size() > 0);
-        return m_string.size() - 1;      // Don't report the null terminator.
-    }
-
-    size_t length() const
-    {
-        return size();
-    }
-
-    T& operator[](size_t iIndex)
-    {
-        assert(iIndex < size() + 1);    // allow looking at the null terminator
-        return m_string[iIndex];
-    }
-
-    const T* c_str() const
-    {
-        return m_string.data();
-    }
-
-    iterator begin()
-    {
-        return m_string.begin();
-    }
-
-    iterator end()
-    {
-        return m_string.end();
-    }
-
-    const_iterator cbegin() const
-    {
-        return m_string.cbegin();
-    }
-
-    const_iterator cend() const
-    {
-        return m_string.cend();
-    }
-
-    basic_string<T> substr(size_type _Off = 0, size_type _Count = npos) const
-    {
-        size_type cursize = size();
-        if (_Off >= cursize)
-        {
-            // result will be empty
-            return basic_string<T>();
-        }
-        else
-        {
-            if ((_Count == npos) ||     // No count specified; take the whole string suffix
-                (_Off + _Count > cursize)) // Count specified is too many characters; just take the whole suffix
-            {
-                _Count = cursize - _Off;
-            }
-            return basic_string<T>(m_string.data() + _Off, _Count);
-        }
-    }
-
-    size_type find_last_of(value_type _Ch) const
-    {
-        for (size_type _Off = size(); _Off != 0; _Off--)
-        {
-            if (m_string[_Off - 1] == _Ch)
-            {
-                return _Off - 1;
-            }
-        }
-        return npos;
-    }
-
-    bool empty() const
-    {
-        return size() == 0;
-    }
-
-    int compare(const basic_string<T>& _Str) const
-    {
-        size_type i;
-        size_type compareSize = size();
-        if (_Str.size() < compareSize)
-        {
-            // This string is longer; compare character-by-character only as many characters as we have.
-            compareSize = _Str.size();
-        }
-        for (i = 0; i < compareSize; i++)
-        {
-            if (m_string[i] != _Str.m_string[i])
-            {
-                if (m_string[i] < _Str.m_string[i])
-                {
-                    return -1;
-                }
-                else
-                {
-                    return 1;
-                }
-            }
-        }
-
-        // All the characters we compared were identical, but one string might be longer than the other.
-        if (size() == _Str.size())
-        {
-            // We compared everything.
-            return 0;
-        }
-        else if (size() < _Str.size())
-        {
-            // _Str has more characters than this.
-            return -1;
-        }
-        else
-        {
-            // this has more characters than _Str
-            return 1;
-        }
-    }
-
-    static const size_type npos = size_type(-1);
-
-private:
-
-    // Compute the length in characters of a null-terminated C-style string, not including the trailing null character.
-    // _Ptr must not be nullptr.
-    size_type c_len(const value_type* _Ptr)
-    {
-        size_type count;
-        for (count = 0; *_Ptr != T(); _Ptr++)
-        {
-            count++;
-        }
-        return count;
-    }
-
-    vector<T>   m_string;   // use a vector<> to represent the string, to avoid reimplementing similar functionality
-
-}; // class basic_string
-
-//
-// String class instantiations
-//
-
-typedef basic_string<char> string;
-
-//
-// Numeric conversions
-//
-
-// convert integer T to string
-template<class T> inline
-string _IntToString(const char *_Fmt, T _Val)
-{
-    const size_t MaxIntBufSize = 21; /* can hold -2^63 and 2^64 - 1, plus NUL */
-	char buf[MaxIntBufSize];
-	int len = sprintf_s(buf, MaxIntBufSize, _Fmt, _Val);
-	return (string(buf, len));
-}
-
-inline string to_string(int _Val)
-{
-	return (_IntToString("%d", _Val));
-}
-
-inline string to_string(unsigned int _Val)
-{
-	return (_IntToString("%u", _Val));
-}
-
-inline string to_string(long _Val)
-{
-	return (_IntToString("%ld", _Val));
-}
-
-inline string to_string(unsigned long _Val)
-{
-	return (_IntToString("%lu", _Val));
-}
-
-inline string to_string(long long _Val)
-{
-	return (_IntToString("%lld", _Val));
-}
-
-inline string to_string(unsigned long long _Val)
-{
-	return (_IntToString("%llu", _Val));
-}
-
-//
-// Comparisons
-//
-
-template<class T> inline
-bool operator==(
-    const basic_string<T>& _Left,
-    const basic_string<T>& _Right)
-{
-    return (_Left.compare(_Right) == 0);
-}
-
-template<class T> inline
-bool operator!=(
-    const basic_string<T>& _Left,
-    const basic_string<T>& _Right)
-{
-    return (!(_Left == _Right));
-}
-
-template<class T> inline
-bool operator<(
-    const basic_string<T>& _Left,
-    const basic_string<T>& _Right)
-{
-    return (_Left.compare(_Right) < 0);
-}
-
-template<class T> inline
-bool operator>(
-    const basic_string<T>& _Left,
-    const basic_string<T>& _Right)
-{
-    return (_Right < _Left);
-}
-
-template<class T> inline
-bool operator<=(
-    const basic_string<T>& _Left,
-    const basic_string<T>& _Right)
-{
-    return (!(_Right < _Left));
-}
-
-template<class T> inline
-bool operator>=(
-    const basic_string<T>& _Left,
-    const basic_string<T>& _Right)
-{
-    return (!(_Left < _Right));
-}
-
-//
-// String concatenation and other string operations
-//
-
-template<class T> inline
-basic_string<T> operator+(
-    const basic_string<T>& _Left,
-    const basic_string<T>& _Right)
-{
-    basic_string<T> ret;
-    ret.reserve(_Left.size() + _Right.size());
-    ret += _Left;
-    ret += _Right;
-    return ret;
-}
-
-}; // namespace std
-
-#endif /* __clr_std_string_h__ */
-
-#endif // !USE_STL
-
-// Help the VIM editor figure out what kind of file this no-extension file is.
-// vim: filetype=cpp
diff --git a/src/coreclr/inc/clr_std/type_traits b/src/coreclr/inc/clr_std/type_traits
deleted file mode 100644
index 12af99d5c4fe..000000000000
--- a/src/coreclr/inc/clr_std/type_traits
+++ /dev/null
@@ -1,627 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-//
-// clr_std/utility
-//
-// Copy of some key Standard Template Library functionality.
-// See http://msdn.microsoft.com/en-us/library/bb982077.aspx for documentation.
-//
-
-#ifdef _MSC_VER
-#pragma once
-#endif
-
-#ifndef __clr_std_type_traits_h__
-#define __clr_std_type_traits_h__
-
-#ifdef USE_STL
-
-#include <type_traits>
-
-#else
-
-namespace std
-{
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS remove_const
-    template<class _Ty>
-    struct remove_const
-    {   // remove top level const qualifier
-        typedef _Ty type;
-    };
-
-    template<class _Ty>
-    struct remove_const<const _Ty>
-    {   // remove top level const qualifier
-        typedef _Ty type;
-    };
-
-    template<class _Ty>
-    struct remove_const<const _Ty[]>
-    {   // remove top level const qualifier
-        typedef _Ty type[];
-    };
-
-    template<class _Ty, unsigned int _Nx>
-    struct remove_const<const _Ty[_Nx]>
-    {   // remove top level const qualifier
-        typedef _Ty type[_Nx];
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS remove_volatile
-    template<class _Ty>
-    struct remove_volatile
-    {   // remove top level volatile qualifier
-        typedef _Ty type;
-    };
-
-    template<class _Ty>
-    struct remove_volatile<volatile _Ty>
-    {   // remove top level volatile qualifier
-        typedef _Ty type;
-    };
-
-    template<class _Ty>
-    struct remove_volatile<volatile _Ty[]>
-    {   // remove top level volatile qualifier
-        typedef _Ty type[];
-    };
-
-    template<class _Ty, unsigned int _Nx>
-    struct remove_volatile<volatile _Ty[_Nx]>
-    {   // remove top level volatile qualifier
-        typedef _Ty type[_Nx];
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS remove_cv
-    template<class _Ty>
-    struct remove_cv
-    {   // remove top level const and volatile qualifiers
-        typedef typename remove_const<typename remove_volatile<_Ty>::type>::type type;
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE remove_reference
-    template<class T>
-    struct remove_reference
-    {   // remove reference
-        typedef T type;
-    };
-
-    template<class T>
-    struct remove_reference<T&>
-    {   // remove reference
-        typedef T type;
-    };
-
-    template<class T>
-    struct remove_reference<T&&>
-    {   // remove rvalue reference
-        typedef T type;
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE remove_pointer
-    template<class T>
-    struct remove_pointer
-    {   // remove pointer
-        typedef T type;
-    };
-
-    template<class T>
-    struct remove_pointer<T*>
-    {   // remove pointer
-        typedef T type;
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE FUNCTION identity
-    template<class T>
-    struct identity
-    {   // map T to type unchanged
-        typedef T type;
-
-        inline
-        const T& operator()(const T& left) const
-        {   // apply identity operator to operand
-            return (left);
-        }
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS integral_constant
-    template<class _Ty, _Ty _Val>
-    struct integral_constant
-    {   // convenient template for integral constant types
-        static const _Ty value = _Val;
-
-        typedef _Ty value_type;
-        typedef integral_constant<_Ty, _Val> type;
-    };
-
-    typedef integral_constant<bool, true> true_type;
-    typedef integral_constant<bool, false> false_type;
-
-    // TEMPLATE CLASS _Cat_base
-    template<bool>
-    struct _Cat_base
-        : false_type
-    {    // base class for type predicates
-    };
-
-    template<>
-    struct _Cat_base<true>
-        : true_type
-    {    // base class for type predicates
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS enable_if
-    template<bool _Test, class _Type = void>
-    struct enable_if
-    {   // type is undefined for assumed !_Test
-    };
-
-    template<class _Type>
-    struct enable_if<true, _Type>
-    {   // type is _Type for _Test
-        typedef _Type type;
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS conditional
-    template<bool _Test, class _Ty1, class _Ty2>
-    struct conditional
-    {   // type is _Ty2 for assumed !_Test
-        typedef _Ty2 type;
-    };
-
-    template<class _Ty1, class _Ty2>
-    struct conditional<true, _Ty1, _Ty2>
-    {   // type is _Ty1 for _Test
-        typedef _Ty1 type;
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS make_unsigned
-    template<typename Type1>
-    struct make_unsigned
-    {
-    };
-
-    template<>
-    struct make_unsigned<int>
-    {
-        typedef unsigned int type;
-    };
-
-#ifndef HOST_UNIX
-
-    template<>
-    struct make_unsigned<long>
-    {
-        typedef unsigned long type;
-    };
-
-#endif // !HOST_UNIX
-
-    template<>
-    struct make_unsigned<__int64>
-    {
-        typedef unsigned __int64 type;
-    };
-
-    template<>
-    struct make_unsigned<size_t>
-    {
-        typedef size_t type;
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS make_signed
-    template<typename Type1>
-    struct make_signed
-    {
-    };
-
-    template<>
-    struct make_signed<unsigned int>
-    {
-        typedef signed int type;
-    };
-
-#ifndef HOST_UNIX
-
-    template<>
-    struct make_signed<unsigned long>
-    {
-        typedef signed long type;
-    };
-
-#endif // !HOST_UNIX
-
-    template<>
-    struct make_signed<unsigned __int64>
-    {
-        typedef signed __int64 type;
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS is_lvalue_reference
-    template<class _Ty>
-    struct is_lvalue_reference
-        : false_type
-    {   // determine whether _Ty is an lvalue reference
-    };
-
-    template<class _Ty>
-    struct is_lvalue_reference<_Ty&>
-        : true_type
-    {   // determine whether _Ty is an lvalue reference
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS is_rvalue_reference
-    template<class _Ty>
-    struct is_rvalue_reference
-        : false_type
-    {   // determine whether _Ty is an rvalue reference
-    };
-
-    template<class _Ty>
-    struct is_rvalue_reference<_Ty&&>
-        : true_type
-    {   // determine whether _Ty is an rvalue reference
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS is_reference
-    template<class _Ty>
-    struct is_reference
-        : conditional<
-            is_lvalue_reference<_Ty>::value || is_rvalue_reference<_Ty>::value,
-            true_type,
-            false_type>::type
-    {   // determine whether _Ty is a reference
-    };
-
-    // TEMPLATE CLASS is_pointer
-    template<class _Ty>
-    struct is_pointer
-        : false_type
-    {   // determine whether _Ty is a pointer
-    };
-
-    template<class _Ty>
-    struct is_pointer<_Ty *>
-        : true_type
-    {   // determine whether _Ty is a pointer
-    };
-
-    // TEMPLATE CLASS _Is_integral
-    template<class _Ty>
-    struct _Is_integral
-        : false_type
-    {    // determine whether _Ty is integral
-    };
-
-    template<>
-    struct _Is_integral<bool>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-
-    template<>
-    struct _Is_integral<char>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-
-    template<>
-    struct _Is_integral<unsigned char>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-
-    template<>
-    struct _Is_integral<signed char>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-
-    template<>
-    struct _Is_integral<unsigned short>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-
-    template<>
-    struct _Is_integral<signed short>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-
-    template<>
-    struct _Is_integral<unsigned int>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-
-    template<>
-    struct _Is_integral<signed int>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-
-// On Unix 'long' is a 64-bit type (same as __int64) and the following two definitions
-// conflict with _Is_integral<unsigned __int64> and _Is_integral<signed __int64>.
-#ifndef HOST_UNIX
-    template<>
-    struct _Is_integral<unsigned long>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-
-    template<>
-    struct _Is_integral<signed long>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-#endif /* HOST_UNIX */
-
- #if _HAS_CHAR16_T_LANGUAGE_SUPPORT
-    template<>
-    struct _Is_integral<char16_t>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-
-    template<>
-    struct _Is_integral<char32_t>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
- #endif /* _HAS_CHAR16_T_LANGUAGE_SUPPORT */
-
-    template<>
-    struct _Is_integral<unsigned __int64>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-
-    template<>
-    struct _Is_integral<signed __int64>
-        : true_type
-    {    // determine whether _Ty is integral
-    };
-
-    // TEMPLATE CLASS is_integral
-    template<class _Ty>
-    struct is_integral
-        : _Is_integral<typename remove_cv<_Ty>::type>
-    {    // determine whether _Ty is integral
-    };
-
-    // TEMPLATE CLASS _Is_floating_point
-    template<class _Ty>
-    struct _Is_floating_point
-        : false_type
-    {    // determine whether _Ty is floating point
-    };
-
-    template<>
-    struct _Is_floating_point<float>
-        : true_type
-    {    // determine whether _Ty is floating point
-    };
-
-    template<>
-    struct _Is_floating_point<double>
-        : true_type
-    {    // determine whether _Ty is floating point
-    };
-
-// In PAL, we define long as int and so this becomes int double,
-// which is a nonsense
-#ifndef HOST_UNIX
-    template<>
-    struct _Is_floating_point<long double>
-        : true_type
-    {    // determine whether _Ty is floating point
-    };
-#endif
-
-    // TEMPLATE CLASS is_floating_point
-    template<class _Ty>
-    struct is_floating_point
-        : _Is_floating_point<typename remove_cv<_Ty>::type>
-    {    // determine whether _Ty is floating point
-    };
-
-    // TEMPLATE CLASS is_arithmetic
-    template<class _Ty>
-    struct is_arithmetic
-    : _Cat_base<is_integral<_Ty>::value
-        || is_floating_point<_Ty>::value>
-    {    // determine whether _Ty is an arithmetic type
-    };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS is_signed
-    template <typename T>
-    struct is_signed : conditional<
-        static_cast<typename remove_const<T>::type>(-1) < 0, true_type, false_type>::type {};
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS is_same
-    template<class T1, class T2>
-    struct is_same : false_type { };
-
-    //-----------------------------------------------------------------------------------------
-    template<class T1>
-    struct is_same<T1, T1> : true_type { };
-
-    //-----------------------------------------------------------------------------------------
-    // TEMPLATE CLASS is_base_of
-#ifdef _MSC_VER
-
-    template <typename TBase, typename TDerived>
-    struct is_base_of :
-        conditional<__is_base_of( TBase, TDerived), true_type, false_type>::type {};
-
-#else
-    namespace detail
-    {
-        //-------------------------------------------------------------------------------------
-        // Helper types Small and Big - guarantee that sizeof(Small) < sizeof(Big)
-        //
-
-        template <class T, class U>
-        struct conversion_helper
-        {
-            typedef char Small;
-            struct Big { char dummy[2]; };
-            static Big   Test(...);
-            static Small Test(U);
-            static T MakeT();
-        };
-
-        //-------------------------------------------------------------------------------------
-        // class template conversion
-        // Figures out the conversion relationships between two types
-        // Invocations (T and U are types):
-        // a) conversion<T, U>::exists
-        // returns (at compile time) true if there is an implicit conversion from T
-        // to U (example: Derived to Base)
-        // b) conversion<T, U>::exists2Way
-        // returns (at compile time) true if there are both conversions from T
-        // to U and from U to T (example: int to char and back)
-        // c) conversion<T, U>::sameType
-        // returns (at compile time) true if T and U represent the same type
-        //
-        // NOTE: might not work if T and U are in a private inheritance hierarchy.
-        //
-
-        template <class T, class U>
-        struct conversion
-        {
-            typedef detail::conversion_helper<T, U> H;
-            static const bool exists = sizeof(typename H::Small) == sizeof((H::Test(H::MakeT())));
-            static const bool exists2Way = exists && conversion<U, T>::exists;
-            static const bool sameType = false;
-        };
-
-        template <class T>
-        struct conversion<T, T>
-        {
-            static const bool exists = true;
-            static const bool exists2Way = true;
-            static const bool sameType = true;
-        };
-
-        template <class T>
-        struct conversion<void, T>
-        {
-            static const bool exists = false;
-            static const bool exists2Way = false;
-            static const bool sameType = false;
-        };
-
-        template <class T>
-        struct conversion<T, void>
-        {
-            static const bool exists = false;
-            static const bool exists2Way = false;
-            static const bool sameType = false;
-        };
-
-        template <>
-        struct conversion<void, void>
-        {
-            static const bool exists = true;
-            static const bool exists2Way = true;
-            static const bool sameType = true;
-        };
-    } // detail
-
-    // Note that we need to compare pointer types here, since conversion of types by-value
-    // just tells us whether or not an implicit conversion constructor exists. We handle
-    // type parameters that are already pointers specially; see below.
-    template <typename TBase, typename TDerived>
-    struct is_base_of :
-        conditional<detail::conversion<TDerived *, TBase *>::exists, true_type, false_type>::type {};
-
-    // Specialization to handle type parameters that are already pointers.
-    template <typename TBase, typename TDerived>
-    struct is_base_of<TBase *, TDerived *> :
-        conditional<detail::conversion<TDerived *, TBase *>::exists, true_type, false_type>::type {};
-
-    // Specialization to handle invalid mixing of pointer types.
-    template <typename TBase, typename TDerived>
-    struct is_base_of<TBase *, TDerived> :
-        false_type {};
-
-    // Specialization to handle invalid mixing of pointer types.
-    template <typename TBase, typename TDerived>
-    struct is_base_of<TBase, TDerived *> :
-        false_type {};
-
-#endif
-
-    namespace detail
-    {
-        template <typename...>
-        using void_t = void;
-    }
-    // Always false dependent-value for static_asserts.
-    template <typename...>
-    struct _Always_false
-    {
-        const bool value = false;
-    };
-
-    template <class _Ty, class = void>
-    struct _Add_reference { // add reference (non-referenceable type)
-        using _Lvalue = _Ty;
-        using _Rvalue = _Ty;
-    };
-
-    template <class _Ty>
-    struct _Add_reference<_Ty, detail::void_t<_Ty&>> { // (referenceable type)
-        using _Lvalue = _Ty&;
-        using _Rvalue = _Ty&&;
-    };
-
-    template <class _Ty>
-    struct add_lvalue_reference {
-        using type = typename _Add_reference<_Ty>::_Lvalue;
-    };
-
-    template <class _Ty>
-    struct add_rvalue_reference {
-        using type = typename _Add_reference<_Ty>::_Rvalue;
-    };
-
-    template<typename _Ty>
-    typename add_rvalue_reference<_Ty>::type declval() noexcept
-    {
-        static_assert(_Always_false<_Ty>::value, "Calling declval is ill-formed, see N4892 [declval]/2.");
-    }
-} // namespace std
-
-#endif // !USE_STL
-
-#define REM_CONST(T)    typename std::remove_const< T >::type
-#define REM_CV(T)       typename std::remove_cv< T >::type
-#define REM_REF(T)      typename std::remove_reference< T >::type
-
-#define REF_T(T)        REM_REF(T) &
-#define REF_CT(T)       REM_REF(REM_CONST(T)) const &
-
-#endif // __clr_std_type_traits_h__
-
-// Help the VIM editor figure out what kind of file this no-extension file is.
-// vim: filetype=cpp
diff --git a/src/coreclr/inc/clr_std/vector b/src/coreclr/inc/clr_std/vector
deleted file mode 100644
index c2d1caba890a..000000000000
--- a/src/coreclr/inc/clr_std/vector
+++ /dev/null
@@ -1,462 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-//
-// clr_std/vector
-//
-// Copy of some key Standard Template Library functionality
-//
-
-#ifdef _MSC_VER
-#pragma once
-#endif
-
-#ifdef USE_STL
-#include <vector>
-#else
-#ifndef __clr_std_vector_h__
-#define __clr_std_vector_h__
-
-// This is defined in the debugmacrosext.h header, but don't take a dependency on that.
-#ifndef INDEBUG
-#ifdef _DEBUG
-#define INDEBUG(x)          x
-#else
-#define INDEBUG(x)
-#endif
-#endif // !def INDEBUG
-
-namespace std
-{
-    template <class T>
-    class vector
-    {
-        public:
-            class const_iterator;
-
-            class iterator
-            {
-                friend class std::vector<T>::const_iterator;
-            public:
-                typedef T         value_type;
-                typedef ptrdiff_t difference_type;
-                typedef T*        pointer;
-                typedef T&        reference;
-
-                typedef class vector<T>::iterator _MyIter;
-
-                _MyIter &operator++()
-                {
-                    m_ptr++;
-                    return *this;
-                }
-
-                _MyIter operator++(int)
-                {
-                    // post-increment ++
-                    _MyIter myiter(m_ptr);
-                    m_ptr++;
-                    return myiter;
-                }
-
-                _MyIter &operator--()
-                {
-                    m_ptr--;
-                    return *this;
-                }
-
-                _MyIter operator--(int)
-                {
-                    // post-decrement --
-                    _MyIter myiter(m_ptr);
-                    m_ptr--;
-                    return myiter;
-                }
-
-                _MyIter operator- (ptrdiff_t n)
-                {
-                    _MyIter myiter(m_ptr);
-                    myiter.m_ptr -= n;
-                    return myiter;
-                }
-
-                ptrdiff_t operator- (_MyIter right)
-                {
-                    _MyIter myiter(m_ptr);
-                    return myiter.m_ptr - right.m_ptr;
-                }
-
-                _MyIter operator+ (ptrdiff_t n)
-                {
-                    _MyIter myiter(m_ptr);
-                    myiter.m_ptr += n;
-                    return myiter;
-                }
-
-                T* operator->() const
-                {
-                    return m_ptr;
-                }
-
-                T & operator*() const
-                {
-                    return *m_ptr;
-                }
-
-                bool operator==(const _MyIter& _Right) const
-                {
-                    bool equals = this->m_ptr == _Right.m_ptr;
-                    return equals;
-                }
-
-                bool operator!=(const _MyIter& _Right) const
-                {
-                    bool equals = this->m_ptr == _Right.m_ptr;
-                    return !equals;
-                }
-
-                bool operator<(const _MyIter& _Right) const
-                {
-                    return this->m_ptr < _Right.m_ptr;
-                }
-
-                bool operator>(const _MyIter& _Right) const
-                {
-                    return this->m_ptr > _Right.m_ptr;
-                }
-            public:
-                explicit iterator(T* ptr)
-                {
-                    m_ptr = ptr;
-                }
-
-                private:
-                    T* m_ptr;
-            }; // class iterator
-
-            class const_iterator
-            {
-            public:
-                typedef class vector<T>::const_iterator _MyIter;
-                typedef class vector<T>::iterator _MyNonConstIter;
-
-                _MyIter &operator++()
-                {
-                    m_ptr++;
-                    return *this;
-                }
-
-                _MyIter operator++(int)
-                {
-                    // post-increment ++
-                    _MyIter myiter(m_ptr);
-                    m_ptr++;
-                    return myiter;
-                }
-
-                const T* operator->() const
-                {
-                    return m_ptr;
-                }
-
-                const T & operator*() const
-                {
-                    return *m_ptr;
-                }
-
-                bool operator==(const _MyIter& _Right) const
-                {
-                    bool equals = this->m_ptr == _Right.m_ptr;
-                    return equals;
-                }
-
-                bool operator!=(const _MyIter& _Right) const
-                {
-                    bool equals = this->m_ptr == _Right.m_ptr;
-                    return !equals;
-                }
-
-            public:
-                explicit const_iterator(T* ptr)
-                {
-                    m_ptr = ptr;
-                }
-                const_iterator(const _MyNonConstIter &nonConstIterator)
-                {
-                    m_ptr = nonConstIterator.m_ptr;
-                }
-
-            private:
-                T* m_ptr;
-            }; // class const iterator
-
-
-        public:
-            explicit vector(size_t n = 0)
-            {
-                m_size = 0;
-                m_capacity = 0;
-                m_pelements = NULL;
-                m_isBufferOwner = true;
-                resize(n);
-            }
-
-            ~vector()
-            {
-                if (m_isBufferOwner)
-                {
-                    erase(m_pelements, 0, m_size);
-                    delete [] (BYTE*)m_pelements; // cast to BYTE* as we don't want this delete to invoke T's dtor
-                }
-                else
-                {
-                    m_size = 0;
-                    m_capacity = 0;
-                }
-            }
-
-            vector(const vector<T>&) = delete;
-            vector<T>& operator=(const vector<T>&) = delete;
-
-            vector(vector<T>&& v) noexcept
-                : m_size(v.m_size)
-                , m_capacity(v.m_capacity)
-                , m_pelements(v.m_pelements)
-                , m_isBufferOwner(v.m_isBufferOwner)
-            {
-                v.m_isBufferOwner = false;
-            }
-
-            vector<T>& operator=(vector<T>&& v) noexcept
-            {
-                if (m_isBufferOwner)
-                {
-                    erase(m_pelements, 0, m_size);
-                    delete [] (BYTE*)m_pelements;
-                }
-
-                m_size = v.m_size;
-                m_capacity = v.m_capacity;
-                m_pelements = v.m_pelements;
-                m_isBufferOwner = v.m_isBufferOwner;
-                v.m_isBufferOwner = false;
-                return *this;
-            }
-
-            size_t size() const
-            {
-                return m_size;
-            }
-
-            T & operator[](size_t iIndex)
-            {
-                assert(iIndex < m_size);
-                return m_pelements[iIndex];
-            }
-
-            T & operator[](size_t iIndex) const
-            {
-                assert(iIndex < m_size);
-                return m_pelements[iIndex];
-            }
-
-            void resize(size_t newsize)
-            {
-                assert(m_isBufferOwner);
-                size_t oldsize = this->size();
-                resize_noinit(newsize);
-                if (newsize > oldsize)
-                {
-                    fill_uninitialized_with_default_value(m_pelements, oldsize, newsize);
-                }
-            }
-
-            void clear()
-            {
-                assert(m_isBufferOwner);
-                resize(0);
-            }
-
-            void resize(size_t newsize, T c)
-            {
-                assert(m_isBufferOwner);
-                size_t oldsize = this->size();
-                resize_noinit(newsize);
-                if (newsize > oldsize)
-                {
-                    for (size_t i = oldsize; i < newsize; i++)
-                    {
-                        m_pelements[i] = c;
-                    }
-                }
-            }
-
-            void wrap(size_t numElements, T* pElements)
-            {
-                m_size = numElements;
-                m_pelements = pElements;
-                m_isBufferOwner = false;
-            }
-
-            void resize_noinit(size_t newsize)
-            {
-                assert(m_isBufferOwner);
-                size_t oldsize = this->size();
-                if (newsize < oldsize)
-                {
-                    // Shrink
-                    erase(m_pelements, newsize, oldsize);
-                }
-                else if (newsize > oldsize)
-                {
-                    // Grow
-                    reserve(newsize);
-                }
-                m_size = newsize;
-            }
-
-            void push_back(const T & val)
-            {
-                assert(m_isBufferOwner);
-                if (m_size + 1 < m_size)
-                {
-                    assert("push_back: overflow");
-                    // @todo: how to throw.
-                }
-                resize(m_size + 1, val);
-            }
-
-            void reserve(size_t newcapacity)
-            {
-                assert(m_isBufferOwner);
-                if (newcapacity > m_capacity)
-                {
-                    // To avoid resizing for every element that gets added to a vector, we
-                    // allocate at least twice the old capacity, or 16 elements, whichever is greater.
-                    newcapacity = max(newcapacity, max(m_capacity * 2, 16));
-
-                    size_t bytesNeeded = newcapacity * sizeof(T);
-                    if (bytesNeeded / sizeof(T) != newcapacity)
-                    {
-                        assert("resize: overflow");
-                        // @todo: how to throw something here?
-                    }
-
-
-                    T *pelements = (T*)(new BYTE[bytesNeeded]);  // Allocate as BYTE array to avoid automatic construction
-                    INDEBUG(memset(pelements, 0xcc, bytesNeeded));
-                    for (size_t i = 0; i < m_size; i++)
-                    {
-                        pelements[i] = m_pelements[i];
-                    }
-
-                    erase(m_pelements, 0, m_size);
-                    delete [] (BYTE*)m_pelements; // cast to BYTE* as we don't want this delete to invoke T's dtor
-
-                    m_pelements = pelements;
-                    m_capacity = newcapacity;
-                }
-            }
-
-            iterator begin()
-            {
-                return iterator(m_pelements);
-            }
-
-            iterator end()
-            {
-                return iterator(m_pelements + m_size);
-            }
-
-            const_iterator cbegin() const
-            {
-                return const_iterator(m_pelements);
-            }
-
-            const_iterator cend() const
-            {
-                return const_iterator(m_pelements + m_size);
-            }
-
-            iterator erase(iterator position)
-            {
-                assert(m_isBufferOwner);
-                assert((position > begin() || position == begin()) && position < end());
-                ptrdiff_t index = position - begin();
-                erase(m_pelements, index, index + 1);
-                memcpy(&m_pelements[index], &m_pelements[index + 1], sizeof(T) * (m_size - index - 1));
-                --m_size;
-                return iterator(m_pelements + (position - begin()));
-            }
-
-            iterator erase(iterator position, iterator positionEnd)
-            {
-                assert(m_isBufferOwner);
-                assert((position > begin() || position == begin()) && position < end());
-                ptrdiff_t index = position - begin();
-                ptrdiff_t elements = positionEnd - position;
-                erase(m_pelements, index, index + elements);
-                memcpy(&m_pelements[index], &m_pelements[index + elements], sizeof(T) * (m_size - index - elements));
-                m_size -= elements;
-                return iterator(m_pelements + (position - begin()));
-            }
-            
-            T* data()
-            {
-                return m_pelements;
-            }
-
-            const T* data() const
-            {
-                return m_pelements;
-            }
-
-         private:
-            // Transition a subset of the array from uninitialized to initialized with default value for T.
-            static void fill_uninitialized_with_default_value(T* pelements, size_t startIdx, size_t endIdx)
-            {
-                assert(startIdx <= endIdx);
-                assert(pelements != NULL || startIdx == endIdx);
-                for (size_t i = startIdx; i < endIdx; i++)
-                {
-                    INDEBUG(assert(0xcc == *((BYTE*)&pelements[i])));
-                    pelements[i] = T();
-                }
-            }
-
-            // Transition a subset of the array from a valid value of T to uninitialized.
-            static void erase(T* pelements, size_t startIdx, size_t endIdx)
-            {
-                assert(startIdx <= endIdx);
-                assert(pelements != NULL || startIdx == endIdx);
-                for (size_t i = startIdx; i < endIdx; i++)
-                {
-                    pelements[i].~T();
-                }
-
-                INDEBUG(memset(&pelements[startIdx], 0xcc, (endIdx - startIdx) * sizeof(T)));
-            }
-
-         private:
-            size_t    m_size;       //# of elements
-            size_t    m_capacity;   //# of elements allocated
-            T        *m_pelements;  //actual array
-                                    //  invariants:
-                                    //    dimensions == m_capacity
-                                    //    elements 0 thru m_size-1 always contain constructed T values.
-                                    //    elements from m_size thru m_capacity - 1 contain memory garbage (0xcc in DEBUG).
-            bool    m_isBufferOwner; // indicate if this vector creates its own buffer, or wraps an existing buffer.
-
-
-
-
-    };  // class vector
-
-}; // namespace std
-
-#endif /* __clr_std_vector_h__ */
-
-#endif // !USE_STL
-
-// Help the VIM editor figure out what kind of file this no-extension file is.
-// vim: filetype=cpp
diff --git a/src/coreclr/inc/clrconfignocache.h b/src/coreclr/inc/clrconfignocache.h
index f75504a2289a..01675a24201d 100644
--- a/src/coreclr/inc/clrconfignocache.h
+++ b/src/coreclr/inc/clrconfignocache.h
@@ -46,6 +46,8 @@ class CLRConfigNoCache
         {
             return false;
         }
+
+        result = (DWORD)rawResult;
         bool fSuccess = endPtr != _value;
         return fSuccess;
     }
diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index 12563f8f9705..ddc7c79506ad 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -259,7 +259,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_legacyCorruptedStateExceptionsPolicy, W("le
 CONFIG_DWORD_INFO(INTERNAL_SuppressLostExceptionTypeAssert, W("SuppressLostExceptionTypeAssert"), 0, "")
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_UseEntryPointFilter, W("UseEntryPointFilter"), 0, "")
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_Corhost_Swallow_Uncaught_Exceptions, W("Corhost_Swallow_Uncaught_Exceptions"), 0, "")
-RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableNewExceptionHandling, W("EnableNewExceptionHandling"), 0, "Enable new exception handling.");
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_LegacyExceptionHandling, W("LegacyExceptionHandling"), 0, "Enable legacy exception handling.");
 
 
 ///
@@ -304,12 +304,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitDebuggable, W("JitDebuggable"), 0, "If set,
 #endif
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitEnableNoWayAssert, W("JitEnableNoWayAssert"), INTERNAL_JitEnableNoWayAssert_Default, "")
 
-#if defined(TARGET_RISCV64)
-// TODO-RISCV64-CQ: In RISCV64, currently jitc always generates JitFramed codes.
-RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_JitFramed, W("JitFramed"), 1, "Forces EBP frames")
-#else
 RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_JitFramed, W("JitFramed"), 0, "Forces EBP frames")
-#endif // TARGET_RISCV64
 CONFIG_DWORD_INFO(INTERNAL_JitThrowOnAssertionFailure, W("JitThrowOnAssertionFailure"), 0, "Throw managed exception on assertion failures during JIT instead of failfast")
 CONFIG_DWORD_INFO(INTERNAL_JitGCStress, W("JitGCStress"), 0, "GC stress mode for jit")
 CONFIG_DWORD_INFO(INTERNAL_JitHeartbeat, W("JitHeartbeat"), 0, "")
@@ -485,7 +480,7 @@ RETAIL_CONFIG_STRING_INFO(UNSUPPORTED_ETW_ObjectAllocationEventsPerTypePerSec, W
 RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_ProfAPI_ValidateNGENInstrumentation, W("ProfAPI_ValidateNGENInstrumentation"), 0, "This flag enables additional validations when using the IMetaDataEmit APIs for NGEN'ed images to ensure only supported edits are made.")
 
 #ifdef FEATURE_PERFMAP
-RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapEnabled, W("PerfMapEnabled"), 0, "This flag is used on Linux to enable writing /tmp/perf-$pid.map. It is disabled by default")
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapEnabled, W("PerfMapEnabled"), 0, "This flag is used on Linux and macOS to enable writing /tmp/perf-$pid.map. It is disabled by default")
 RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_PerfMapJitDumpPath, W("PerfMapJitDumpPath"), "Specifies a path to write the perf jitdump file. Defaults to /tmp", CLRConfig::LookupOptions::TrimWhiteSpaceFromStringValue)
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapIgnoreSignal, W("PerfMapIgnoreSignal"), 0, "When perf map is enabled, this option will configure the specified signal to be accepted and ignored as a marker in the perf logs.  It is disabled by default")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapShowOptimizationTiers, W("PerfMapShowOptimizationTiers"), 1, "Shows optimization tiers in the perf map for methods, as part of the symbol name. Useful for seeing separate stack frames for different optimization tiers of each method.")
@@ -765,6 +760,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F,                W("EnableAVX512F
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F_VL,             W("EnableAVX512F_VL"),          1, "Allows AVX512F_VL+ hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI,             W("EnableAVX512VBMI"),          1, "Allows AVX512VBMI+ hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI_VL,          W("EnableAVX512VBMI_VL"),       1, "Allows AVX512VBMI_VL+ hardware intrinsics to be disabled")
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v1,                W("EnableAVX10v1"),             1, "Allows AVX10v1+ hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI,                W("EnableAVXVNNI"),             1, "Allows AVXVNNI+ hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1,                   W("EnableBMI1"),                1, "Allows BMI1+ hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2,                   W("EnableBMI2"),                1, "Allows BMI2+ hardware intrinsics to be disabled")
diff --git a/src/coreclr/inc/clrnt.h b/src/coreclr/inc/clrnt.h
index 56245ea46f25..cacc865b715f 100644
--- a/src/coreclr/inc/clrnt.h
+++ b/src/coreclr/inc/clrnt.h
@@ -184,19 +184,23 @@ RtlVirtualUnwind_Unsafe(
 #ifdef HOST_X86
 typedef struct _RUNTIME_FUNCTION {
     DWORD BeginAddress;
+    // NOTE: R2R doesn't include EndAddress (see docs/design/coreclr/botr/readytorun-format.md).
+    // NativeAOT does include the EndAddress because the Microsoft linker expects it. In NativeAOT
+    // the info is generated in the managed ObjectWriter, so the structures don't have to match.
+    // DWORD EndAddress;
     DWORD UnwindData;
 } RUNTIME_FUNCTION, *PRUNTIME_FUNCTION;
 
 typedef struct _DISPATCHER_CONTEXT {
     _EXCEPTION_REGISTRATION_RECORD* RegistrationPointer;
 } DISPATCHER_CONTEXT, *PDISPATCHER_CONTEXT;
+
 #endif // HOST_X86
 #endif // !HOST_UNIX
 
 #define RUNTIME_FUNCTION__BeginAddress(prf)             (prf)->BeginAddress
 #define RUNTIME_FUNCTION__SetBeginAddress(prf,addr)     ((prf)->BeginAddress = (addr))
 
-#ifdef FEATURE_EH_FUNCLETS
 #include "win64unwind.h"
 #include "daccess.h"
 
@@ -207,7 +211,7 @@ RtlpGetFunctionEndAddress (
     _In_ TADDR ImageBase
     )
 {
-    PTR_UNWIND_INFO pUnwindInfo = (PTR_UNWIND_INFO)(ImageBase + FunctionEntry->UnwindData);
+    PUNWIND_INFO pUnwindInfo = (PUNWIND_INFO)(ImageBase + FunctionEntry->UnwindData);
 
     return FunctionEntry->BeginAddress + pUnwindInfo->FunctionLength;
 }
@@ -218,10 +222,7 @@ RtlpGetFunctionEndAddress (
 #define RUNTIME_FUNCTION__SetUnwindInfoAddress(prf, addr) do { (prf)->UnwindData = (addr); } while(0)
 
 #ifdef HOST_X86
-EXTERN_C
-NTSYSAPI
 PEXCEPTION_ROUTINE
-NTAPI
 RtlVirtualUnwind (
     _In_ DWORD HandlerType,
     _In_ DWORD ImageBase,
@@ -233,7 +234,6 @@ RtlVirtualUnwind (
     __inout_opt PT_KNONVOLATILE_CONTEXT_POINTERS ContextPointers
     );
 #endif // HOST_X86
-#endif // FEATURE_EH_FUNCLETS
 
 #endif // TARGET_X86
 
diff --git a/src/coreclr/inc/clrtypes.h b/src/coreclr/inc/clrtypes.h
index 19e9720b34d9..9094e4932a25 100644
--- a/src/coreclr/inc/clrtypes.h
+++ b/src/coreclr/inc/clrtypes.h
@@ -370,6 +370,15 @@ inline UINT64 AlignDown(UINT64 value, UINT alignment)
     return (value&~(UINT64)(alignment-1));
 }
 
+#ifdef __APPLE__
+inline SIZE_T AlignDown(SIZE_T value, UINT alignment)
+{
+    STATIC_CONTRACT_LEAF;
+    STATIC_CONTRACT_SUPPORTS_DAC;
+    return (value&~(SIZE_T)(alignment-1));
+}
+#endif // __APPLE__
+
 inline UINT AlignmentPad(UINT value, UINT alignment)
 {
     STATIC_CONTRACT_WRAPPER;
diff --git a/src/coreclr/inc/contract.h b/src/coreclr/inc/contract.h
index d4376d61da85..6658d4a999cd 100644
--- a/src/coreclr/inc/contract.h
+++ b/src/coreclr/inc/contract.h
@@ -140,7 +140,6 @@
 //              ModeViolation
 //              FaultViolation
 //              FaultNotFatal
-//              HostViolation
 //              LoadsTypeViolation
 //              TakesLockViolation
 //
@@ -233,7 +232,6 @@
 
 #include "specstrings.h"
 #include "clrtypes.h"
-#include "malloc.h"
 #include "check.h"
 #include "debugreturn.h"
 #include "staticcontract.h"
@@ -378,7 +376,7 @@ struct DbgStateLockState
 
 #define CONTRACT_BITMASK_OK_TO_THROW          0x1 << 0
 #define CONTRACT_BITMASK_FAULT_FORBID         0x1 << 1
-#define CONTRACT_BITMASK_HOSTCALLS            0x1 << 2
+// Unused                                     0x1 << 2
 #define CONTRACT_BITMASK_SOTOLERANT           0x1 << 3
 #define CONTRACT_BITMASK_DEBUGONLY            0x1 << 4
 #define CONTRACT_BITMASK_SONOTMAINLINE        0x1 << 5
@@ -422,7 +420,6 @@ struct ClrDebugState
         // By default, GetThread() is perfectly fine to call
         // By default, it's ok to take a lock (or call someone who does)
         m_flags             = CONTRACT_BITMASK_OK_TO_THROW|
-                              CONTRACT_BITMASK_HOSTCALLS|
                               CONTRACT_BITMASK_SOTOLERANT|
                               CONTRACT_BITMASK_OK_TO_LOCK|
                               CONTRACT_BITMASK_OK_TO_RETAKE_LOCK;
@@ -512,30 +509,6 @@ struct ClrDebugState
         CONTRACT_BITMASK_RESET(CONTRACT_BITMASK_FAULT_FORBID);
     }
 
-    //--//
-    BOOL IsHostCaller()
-    {
-        return CONTRACT_BITMASK_IS_SET(CONTRACT_BITMASK_HOSTCALLS);
-    }
-
-    void SetHostCaller()
-    {
-        CONTRACT_BITMASK_SET(CONTRACT_BITMASK_HOSTCALLS);
-    }
-
-
-    BOOL SetHostCaller(BOOL value)
-    {
-        BOOL prevState = CONTRACT_BITMASK_IS_SET(CONTRACT_BITMASK_HOSTCALLS);
-        CONTRACT_BITMASK_UPDATE(CONTRACT_BITMASK_HOSTCALLS,value);
-        return prevState;
-    }
-
-    void ResetHostCaller()
-    {
-        CONTRACT_BITMASK_RESET(CONTRACT_BITMASK_HOSTCALLS);
-    }
-
     //--//
     BOOL IsDebugOnly()
     {
@@ -896,11 +869,8 @@ class BaseContract
 
         SO_MAINLINE_No          = 0x00000800,  // code is not part of our mainline SO scenario
 
-        // Any place where we can't safely call into the host should have a HOST_NoCalls contract
-        HOST_Mask               = 0x00003000,
-        HOST_Calls              = 0x00002000,
-        HOST_NoCalls            = 0x00001000,
-        HOST_Disabled           = 0x00000000,   // the default
+        // Unused               = 0x00002000,
+        // Unused               = 0x00001000,
 
         // These enforce the CAN_TAKE_LOCK / CANNOT_TAKE_LOCK contracts
         CAN_TAKE_LOCK_Mask      = 0x00060000,
@@ -920,7 +890,7 @@ class BaseContract
         LOADS_TYPE_Disabled     = 0x00000000,   // the default
 
         ALL_Disabled            = THROWS_Disabled|GC_Disabled|FAULT_Disabled|MODE_Disabled|LOADS_TYPE_Disabled|
-                                  HOST_Disabled|CAN_TAKE_LOCK_Disabled|CAN_RETAKE_LOCK_No_Disabled
+                                  CAN_TAKE_LOCK_Disabled|CAN_RETAKE_LOCK_No_Disabled
 
     };
 
@@ -1124,7 +1094,6 @@ enum ContractViolationBits
     FaultNotFatal   = 0x00000010,  // suppress INJECT_FAULT but not fault injection by harness
     LoadsTypeViolation      = 0x00000040,  // suppress LOADS_TYPE tags in this scope
     TakesLockViolation      = 0x00000080,  // suppress CAN_TAKE_LOCK tags in this scope
-    HostViolation           = 0x00000100,  // suppress HOST_CALLS tags in this scope
 
     //These are not violation bits. We steal some bits out of the violation mask to serve as
     // general flag bits.
@@ -1667,7 +1636,7 @@ class ContractViolationHolder
     FORCEINLINE void EnterInternal(UINT_PTR violationMask)
     {
         _ASSERTE(0 == (violationMask & ~(ThrowsViolation | GCViolation | ModeViolation | FaultViolation |
-            FaultNotFatal | HostViolation |
+            FaultNotFatal |
             TakesLockViolation | LoadsTypeViolation)) ||
             violationMask == AllViolation);
 
@@ -1738,9 +1707,6 @@ enum PermanentContractViolationReason
     ReasonIBC,                           // Code runs in IBC scenarios only and the violation is safe.
     ReasonNGEN,                          // Code runs in NGEN scenarios only and the violation is safe.
     ReasonProfilerCallout,               // Profiler implementers are guaranteed not to throw.
-    ReasonUnsupportedForSQLF1Profiling,  // This code path violates HOST_NOCALLS, but that's ok b/c SQL will never
-                                         // invoke it, and thus SQL/F1 profiling (the primary reason to enforce
-                                         // HOST_NOCALLS) is not in danger.
     ReasonRuntimeReentrancy,             // e.g. SafeQueryInterface
     ReasonShutdownOnly,                  // Code path only runs as part of Shutdown and the violation is safe.
     ReasonSOTolerance,                   // We would like to redesign SO contracts anyways
@@ -2006,54 +1972,6 @@ inline ClrDebugState *GetClrDebugState(BOOL fAlloc)
 
     return NULL;
 }
-#endif // ENABLE_CONTRACTS_IMPL
-
-#ifdef ENABLE_CONTRACTS_IMPL
-
-class HostNoCallHolder
-{
-    public:
-    DEBUG_NOINLINE HostNoCallHolder()
-        {
-        SCAN_SCOPE_BEGIN;
-        STATIC_CONTRACT_HOST_NOCALLS;
-
-            m_clrDebugState = GetClrDebugState();
-            m_previousState = m_clrDebugState->SetHostCaller(FALSE);
-        }
-
-    DEBUG_NOINLINE ~HostNoCallHolder()
-        {
-        SCAN_SCOPE_END;
-
-            m_clrDebugState->SetHostCaller(m_previousState);
-        }
-
-     private:
-        BOOL m_previousState;
-        ClrDebugState* m_clrDebugState;
-
-};
-
-#define BEGIN_HOST_NOCALL_CODE \
-    {                             \
-        HostNoCallHolder __hostNoCallHolder;        \
-        CantAllocHolder __cantAlloc;
-
-#define END_HOST_NOCALL_CODE   \
-    }
-
-#else // ENABLE_CONTRACTS_IMPL
-#define BEGIN_HOST_NOCALL_CODE                      \
-    {                                               \
-        CantAllocHolder __cantAlloc;                \
-
-#define END_HOST_NOCALL_CODE                        \
-    }
-#endif
-
-
-#if defined(ENABLE_CONTRACTS_IMPL)
 
 // Macros to indicate we're taking or releasing locks
 
diff --git a/src/coreclr/inc/contract.inl b/src/coreclr/inc/contract.inl
index d614f84e74f2..211b6b5a1d70 100644
--- a/src/coreclr/inc/contract.inl
+++ b/src/coreclr/inc/contract.inl
@@ -352,7 +352,7 @@ inline void DbgStateLockData::LockTaken(DbgStateLockType dbgStateLockType,
 
     // Remember as many of these new entrances in m_rgTakenLockInfos as we can
     for (UINT i = cCombinedLocks;
-         i < min (ARRAY_SIZE(m_rgTakenLockInfos), cCombinedLocks + cTakes);
+         i < std::min (ARRAY_SIZE(m_rgTakenLockInfos), (size_t)(cCombinedLocks + cTakes));
          i++)
     {
         m_rgTakenLockInfos[i].m_pvLock = pvLock;
@@ -377,7 +377,7 @@ inline void DbgStateLockData::LockReleased(DbgStateLockType dbgStateLockType, UI
     // If lock count is within range of our m_rgTakenLockInfos buffer size, then
     // make sure we're releasing locks in reverse order of how we took them
     for (UINT i = cCombinedLocks - cReleases;
-         i < min (ARRAY_SIZE(m_rgTakenLockInfos), cCombinedLocks);
+         i < std::min (ARRAY_SIZE(m_rgTakenLockInfos), (size_t)cCombinedLocks);
          i++)
     {
         if (m_rgTakenLockInfos[i].m_pvLock != pvLock)
@@ -443,7 +443,7 @@ inline BOOL DbgStateLockState::IsLockRetaken(void * pvLock)
     // m_cLocksEnteringCannotRetakeLock records the number of locks that were taken
     // when CANNOT_RETAKE_LOCK contract was constructed.
     for (UINT i = 0;
-        i < min(ARRAY_SIZE(m_pLockData->m_rgTakenLockInfos), m_cLocksEnteringCannotRetakeLock);
+        i < std::min(ARRAY_SIZE(m_pLockData->m_rgTakenLockInfos), (size_t)m_cLocksEnteringCannotRetakeLock);
         ++i)
     {
         if (m_pLockData->m_rgTakenLockInfos[i].m_pvLock == pvLock)
diff --git a/src/coreclr/inc/corcompile.h b/src/coreclr/inc/corcompile.h
index e4baf3423fca..014509221a0d 100644
--- a/src/coreclr/inc/corcompile.h
+++ b/src/coreclr/inc/corcompile.h
@@ -186,7 +186,6 @@ enum EncodeMethodSigFlags
 
 enum EncodeFieldSigFlags
 {
-    ENCODE_FIELD_SIG_IndexInsteadOfToken        = 0x08,
     ENCODE_FIELD_SIG_MemberRefToken             = 0x10,
     ENCODE_FIELD_SIG_OwnerType                  = 0x40,
 };
diff --git a/src/coreclr/inc/cordebuginfo.h b/src/coreclr/inc/cordebuginfo.h
index 63c8cd5fe9b9..1818c4fc1f81 100644
--- a/src/coreclr/inc/cordebuginfo.h
+++ b/src/coreclr/inc/cordebuginfo.h
@@ -2,7 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 //
-// Keep in sync with llvm/tools/objwriter/cordebuginfo.h in current objwriter branch in https://github.com/dotnet/llvm-project repo
+// Keep in sync with src\coreclr\tools\Common\JitInterface\CorInfoTypes.VarInfo.cs
 //
 
 /**********************************************************************************/
diff --git a/src/coreclr/inc/corhdr.h b/src/coreclr/inc/corhdr.h
index 3f67b33da916..c12c1cfdd4f7 100644
--- a/src/coreclr/inc/corhdr.h
+++ b/src/coreclr/inc/corhdr.h
@@ -847,7 +847,7 @@ typedef enum CorGenericParamAttr
     gpReferenceTypeConstraint = 0x0004,      // type argument must be a reference type
     gpNotNullableValueTypeConstraint   =   0x0008,      // type argument must be a value type but not Nullable
     gpDefaultConstructorConstraint = 0x0010, // type argument must have a public default constructor
-    gpAcceptByRefLike = 0x0020, // type argument can be ByRefLike
+    gpAllowByRefLike = 0x0020, // type argument can be ByRefLike
 } CorGenericParamAttr;
 
 // structures and enums moved from COR.H
diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h
index b02f8c6491b6..a946110869fc 100644
--- a/src/coreclr/inc/corinfo.h
+++ b/src/coreclr/inc/corinfo.h
@@ -397,8 +397,8 @@ enum CorInfoHelpFunc
     CORINFO_HELP_DBL2ULNG_OVF,
     CORINFO_HELP_FLTREM,
     CORINFO_HELP_DBLREM,
-    CORINFO_HELP_FLTROUND,
-    CORINFO_HELP_DBLROUND,
+    CORINFO_HELP_FLTROUND,              // unused, remove once MINIMUM_READYTORUN_MAJOR_VERSION > 9
+    CORINFO_HELP_DBLROUND,              // unused, remove once MINIMUM_READYTORUN_MAJOR_VERSION > 9
 
     /* Allocating a new object. Always use ICorClassInfo::getNewHelper() to decide
        which is the right helper to use to allocate an object of a given type. */
@@ -573,7 +573,10 @@ enum CorInfoHelpFunc
     CORINFO_HELP_INIT_PINVOKE_FRAME,   // initialize an inlined PInvoke Frame for the JIT-compiler
 
     CORINFO_HELP_MEMSET,                // Init block of memory
+    CORINFO_HELP_MEMZERO,               // Init block of memory with zeroes
     CORINFO_HELP_MEMCPY,                // Copy block of memory
+    CORINFO_HELP_NATIVE_MEMSET,         // Init block of memory using native memset (not safe for pDst being null,
+                                        // not safe for unbounded size, does not trigger GC)
 
     CORINFO_HELP_RUNTIMEHANDLE_METHOD,          // determine a type/field/method handle at run-time
     CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG,      // determine a type/field/method handle at run-time, with IBC logging
@@ -1016,36 +1019,6 @@ enum CorInfoInitClassResult
                                             // requirement around class initialization such as shared generics.
 };
 
-// Reason codes for making indirect calls
-#define INDIRECT_CALL_REASONS() \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_UNKNOWN) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_EXOTIC) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_PINVOKE) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_GENERIC) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_NO_CODE) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_FIXUPS) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_STUB) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_REMOTING) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_CER) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_RESTORE_METHOD) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_RESTORE_FIRST_CALL) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_RESTORE_VALUE_TYPE) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_RESTORE) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_CANT_PATCH) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_PROFILING) \
-    INDIRECT_CALL_REASON_FUNC(CORINFO_INDIRECT_CALL_OTHER_LOADER_MODULE) \
-
-enum CorInfoIndirectCallReason
-{
-    #undef INDIRECT_CALL_REASON_FUNC
-    #define INDIRECT_CALL_REASON_FUNC(x) x,
-    INDIRECT_CALL_REASONS()
-
-    #undef INDIRECT_CALL_REASON_FUNC
-
-    CORINFO_INDIRECT_CALL_COUNT
-};
-
 inline bool dontInline(CorInfoInline val) {
     return(val < 0);
 }
@@ -1990,6 +1963,16 @@ enum class GetTypeLayoutResult
     Failure,
 };
 
+#define MAX_SWIFT_LOWERED_ELEMENTS 4
+
+struct CORINFO_SWIFT_LOWERING
+{
+    bool byReference;
+    CorInfoType loweredElements[MAX_SWIFT_LOWERED_ELEMENTS];
+    uint32_t offsets[MAX_SWIFT_LOWERED_ELEMENTS];
+    size_t numLoweredElements;
+};
+
 #define SIZEOF__CORINFO_Object                            TARGET_POINTER_SIZE /* methTable */
 
 #define CORINFO_Array_MaxLength                           0x7FFFFFC7
@@ -2069,7 +2052,7 @@ class ICorStaticInfo
     // Example of a scenario addressed by notifyMethodInfoUsage:
     //  1) Crossgen (with --opt-cross-module=MyLib) attempts to inline a call from MyLib.dll into MyApp.dll
     //     and realizes that the call always throws.
-    //  2) JIT aborts the inlining attempt and marks the call as no-return instead. The code that follows the call is 
+    //  2) JIT aborts the inlining attempt and marks the call as no-return instead. The code that follows the call is
     //     replaced with a breakpoint instruction that is expected to be unreachable.
     //  3) MyLib is updated to a new version so it's no longer within the same version bubble with MyApp.dll
     //     and the new version of the call no longer throws and does some work.
@@ -2229,6 +2212,7 @@ class ICorStaticInfo
     // should be looked up at runtime.
     virtual void expandRawHandleIntrinsic(
         CORINFO_RESOLVED_TOKEN *        pResolvedToken,
+        CORINFO_METHOD_HANDLE           callerHandle,
         CORINFO_GENERICHANDLE_RESULT *  pResult) = 0;
 
     // Is the given type in System.Private.Corelib and marked with IntrinsicAttribute?
@@ -2645,6 +2629,7 @@ class ICorStaticInfo
             CORINFO_RESOLVED_TOKEN *        pResolvedToken,
             CORINFO_LOOKUP_KIND *           pGenericLookupKind,
             CorInfoHelpFunc                 id,
+            CORINFO_METHOD_HANDLE           callerHandle,
             CORINFO_CONST_LOOKUP *          pLookup
             ) = 0;
 
@@ -2652,6 +2637,7 @@ class ICorStaticInfo
             CORINFO_RESOLVED_TOKEN *    pTargetMethod,
             mdToken                     targetConstraint,
             CORINFO_CLASS_HANDLE        delegateType,
+            CORINFO_METHOD_HANDLE       callerHandle,
             CORINFO_LOOKUP *            pLookup
             ) = 0;
 
@@ -2927,6 +2913,13 @@ class ICorStaticInfo
             uint32_t                          numMappings         // [IN] Number of rich mappings
             ) = 0;
 
+    // Report back some metadata about the compilation to the EE -- for
+    // example, metrics about the compilation.
+    virtual void reportMetadata(
+        const char* key,
+        const void* value,
+        size_t length) = 0;
+
     /*-------------------------- Misc ---------------------------------------*/
 
     // Used to allocate memory that needs to handed to the EE.
@@ -2971,7 +2964,7 @@ class ICorStaticInfo
             CORINFO_CLASS_HANDLE*       vcTypeRet       /* OUT */
             ) = 0;
 
-    // Obtains a list of exact classes for a given base type. Returns 0 if the number of
+    // Obtains a list of exact classes for a given base type. Returns -1 if the number of
     // the exact classes is greater than maxExactClasses or if more types might be loaded
     // in future.
     virtual int getExactClasses(
@@ -3071,6 +3064,9 @@ class ICorStaticInfo
             SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR*    structPassInRegDescPtr  /* OUT */
             ) = 0;
 
+    // Classifies a swift structure into primitives or an implicit byref for ABI purposes.
+    virtual void getSwiftLowering(CORINFO_CLASS_HANDLE structHnd, CORINFO_SWIFT_LOWERING* pLowering) = 0;
+
     virtual uint32_t getLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls) = 0;
     virtual uint32_t getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls) = 0;
 };
@@ -3181,6 +3177,7 @@ class ICorDynamicInfo : public ICorStaticInfo
     virtual void embedGenericHandle(
             CORINFO_RESOLVED_TOKEN *        pResolvedToken,
             bool                            fEmbedParent, // `true` - embeds parent type handle of the field/method handle
+            CORINFO_METHOD_HANDLE           callerHandle,
             CORINFO_GENERICHANDLE_RESULT *  pResult
             ) = 0;
 
@@ -3383,8 +3380,18 @@ class ICorDynamicInfo : public ICorStaticInfo
 #define IMAGE_REL_BASED_REL32           0x10
 #define IMAGE_REL_BASED_THUMB_BRANCH24  0x13
 #define IMAGE_REL_SECREL                0x104
+
+// Linux x64
+// GD model
 #define IMAGE_REL_TLSGD                 0x105
 
+// Linux arm64
+//    TLSDESC  (dynamic)
+#define IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21   0x107
+#define IMAGE_REL_AARCH64_TLSDESC_LD64_LO12    0x108
+#define IMAGE_REL_AARCH64_TLSDESC_ADD_LO12     0x109
+#define IMAGE_REL_AARCH64_TLSDESC_CALL         0x10A
+
 // The identifier for ARM32-specific PC-relative address
 // computation corresponds to the following instruction
 // sequence:
diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h
index e24bfab038ce..e3b57b6a1e04 100644
--- a/src/coreclr/inc/corinfoinstructionset.h
+++ b/src/coreclr/inc/corinfoinstructionset.h
@@ -75,38 +75,44 @@ enum CORINFO_InstructionSet
     InstructionSet_AVX512DQ_VL=30,
     InstructionSet_AVX512VBMI=31,
     InstructionSet_AVX512VBMI_VL=32,
-    InstructionSet_VectorT128=33,
-    InstructionSet_VectorT256=34,
-    InstructionSet_VectorT512=35,
-    InstructionSet_X86Base_X64=36,
-    InstructionSet_SSE_X64=37,
-    InstructionSet_SSE2_X64=38,
-    InstructionSet_SSE3_X64=39,
-    InstructionSet_SSSE3_X64=40,
-    InstructionSet_SSE41_X64=41,
-    InstructionSet_SSE42_X64=42,
-    InstructionSet_AVX_X64=43,
-    InstructionSet_AVX2_X64=44,
-    InstructionSet_AES_X64=45,
-    InstructionSet_BMI1_X64=46,
-    InstructionSet_BMI2_X64=47,
-    InstructionSet_FMA_X64=48,
-    InstructionSet_LZCNT_X64=49,
-    InstructionSet_PCLMULQDQ_X64=50,
-    InstructionSet_POPCNT_X64=51,
-    InstructionSet_AVXVNNI_X64=52,
-    InstructionSet_MOVBE_X64=53,
-    InstructionSet_X86Serialize_X64=54,
-    InstructionSet_AVX512F_X64=55,
-    InstructionSet_AVX512F_VL_X64=56,
-    InstructionSet_AVX512BW_X64=57,
-    InstructionSet_AVX512BW_VL_X64=58,
-    InstructionSet_AVX512CD_X64=59,
-    InstructionSet_AVX512CD_VL_X64=60,
-    InstructionSet_AVX512DQ_X64=61,
-    InstructionSet_AVX512DQ_VL_X64=62,
-    InstructionSet_AVX512VBMI_X64=63,
-    InstructionSet_AVX512VBMI_VL_X64=64,
+    InstructionSet_AVX10v1=33,
+    InstructionSet_AVX10v1_V256=34,
+    InstructionSet_AVX10v1_V512=35,
+    InstructionSet_VectorT128=36,
+    InstructionSet_VectorT256=37,
+    InstructionSet_VectorT512=38,
+    InstructionSet_X86Base_X64=39,
+    InstructionSet_SSE_X64=40,
+    InstructionSet_SSE2_X64=41,
+    InstructionSet_SSE3_X64=42,
+    InstructionSet_SSSE3_X64=43,
+    InstructionSet_SSE41_X64=44,
+    InstructionSet_SSE42_X64=45,
+    InstructionSet_AVX_X64=46,
+    InstructionSet_AVX2_X64=47,
+    InstructionSet_AES_X64=48,
+    InstructionSet_BMI1_X64=49,
+    InstructionSet_BMI2_X64=50,
+    InstructionSet_FMA_X64=51,
+    InstructionSet_LZCNT_X64=52,
+    InstructionSet_PCLMULQDQ_X64=53,
+    InstructionSet_POPCNT_X64=54,
+    InstructionSet_AVXVNNI_X64=55,
+    InstructionSet_MOVBE_X64=56,
+    InstructionSet_X86Serialize_X64=57,
+    InstructionSet_AVX512F_X64=58,
+    InstructionSet_AVX512F_VL_X64=59,
+    InstructionSet_AVX512BW_X64=60,
+    InstructionSet_AVX512BW_VL_X64=61,
+    InstructionSet_AVX512CD_X64=62,
+    InstructionSet_AVX512CD_VL_X64=63,
+    InstructionSet_AVX512DQ_X64=64,
+    InstructionSet_AVX512DQ_VL_X64=65,
+    InstructionSet_AVX512VBMI_X64=66,
+    InstructionSet_AVX512VBMI_VL_X64=67,
+    InstructionSet_AVX10v1_X64=68,
+    InstructionSet_AVX10v1_V256_X64=69,
+    InstructionSet_AVX10v1_V512_X64=70,
 #endif // TARGET_AMD64
 #ifdef TARGET_X86
     InstructionSet_X86Base=1,
@@ -141,38 +147,44 @@ enum CORINFO_InstructionSet
     InstructionSet_AVX512DQ_VL=30,
     InstructionSet_AVX512VBMI=31,
     InstructionSet_AVX512VBMI_VL=32,
-    InstructionSet_VectorT128=33,
-    InstructionSet_VectorT256=34,
-    InstructionSet_VectorT512=35,
-    InstructionSet_X86Base_X64=36,
-    InstructionSet_SSE_X64=37,
-    InstructionSet_SSE2_X64=38,
-    InstructionSet_SSE3_X64=39,
-    InstructionSet_SSSE3_X64=40,
-    InstructionSet_SSE41_X64=41,
-    InstructionSet_SSE42_X64=42,
-    InstructionSet_AVX_X64=43,
-    InstructionSet_AVX2_X64=44,
-    InstructionSet_AES_X64=45,
-    InstructionSet_BMI1_X64=46,
-    InstructionSet_BMI2_X64=47,
-    InstructionSet_FMA_X64=48,
-    InstructionSet_LZCNT_X64=49,
-    InstructionSet_PCLMULQDQ_X64=50,
-    InstructionSet_POPCNT_X64=51,
-    InstructionSet_AVXVNNI_X64=52,
-    InstructionSet_MOVBE_X64=53,
-    InstructionSet_X86Serialize_X64=54,
-    InstructionSet_AVX512F_X64=55,
-    InstructionSet_AVX512F_VL_X64=56,
-    InstructionSet_AVX512BW_X64=57,
-    InstructionSet_AVX512BW_VL_X64=58,
-    InstructionSet_AVX512CD_X64=59,
-    InstructionSet_AVX512CD_VL_X64=60,
-    InstructionSet_AVX512DQ_X64=61,
-    InstructionSet_AVX512DQ_VL_X64=62,
-    InstructionSet_AVX512VBMI_X64=63,
-    InstructionSet_AVX512VBMI_VL_X64=64,
+    InstructionSet_AVX10v1=33,
+    InstructionSet_AVX10v1_V256=34,
+    InstructionSet_AVX10v1_V512=35,
+    InstructionSet_VectorT128=36,
+    InstructionSet_VectorT256=37,
+    InstructionSet_VectorT512=38,
+    InstructionSet_X86Base_X64=39,
+    InstructionSet_SSE_X64=40,
+    InstructionSet_SSE2_X64=41,
+    InstructionSet_SSE3_X64=42,
+    InstructionSet_SSSE3_X64=43,
+    InstructionSet_SSE41_X64=44,
+    InstructionSet_SSE42_X64=45,
+    InstructionSet_AVX_X64=46,
+    InstructionSet_AVX2_X64=47,
+    InstructionSet_AES_X64=48,
+    InstructionSet_BMI1_X64=49,
+    InstructionSet_BMI2_X64=50,
+    InstructionSet_FMA_X64=51,
+    InstructionSet_LZCNT_X64=52,
+    InstructionSet_PCLMULQDQ_X64=53,
+    InstructionSet_POPCNT_X64=54,
+    InstructionSet_AVXVNNI_X64=55,
+    InstructionSet_MOVBE_X64=56,
+    InstructionSet_X86Serialize_X64=57,
+    InstructionSet_AVX512F_X64=58,
+    InstructionSet_AVX512F_VL_X64=59,
+    InstructionSet_AVX512BW_X64=60,
+    InstructionSet_AVX512BW_VL_X64=61,
+    InstructionSet_AVX512CD_X64=62,
+    InstructionSet_AVX512CD_VL_X64=63,
+    InstructionSet_AVX512DQ_X64=64,
+    InstructionSet_AVX512DQ_VL_X64=65,
+    InstructionSet_AVX512VBMI_X64=66,
+    InstructionSet_AVX512VBMI_VL_X64=67,
+    InstructionSet_AVX10v1_X64=68,
+    InstructionSet_AVX10v1_V256_X64=69,
+    InstructionSet_AVX10v1_V512_X64=70,
 #endif // TARGET_X86
 
 };
@@ -346,6 +358,12 @@ struct CORINFO_InstructionSetFlags
             AddInstructionSet(InstructionSet_AVX512VBMI_X64);
         if (HasInstructionSet(InstructionSet_AVX512VBMI_VL))
             AddInstructionSet(InstructionSet_AVX512VBMI_VL_X64);
+        if (HasInstructionSet(InstructionSet_AVX10v1))
+            AddInstructionSet(InstructionSet_AVX10v1_X64);
+        if (HasInstructionSet(InstructionSet_AVX10v1_V256))
+            AddInstructionSet(InstructionSet_AVX10v1_V256_X64);
+        if (HasInstructionSet(InstructionSet_AVX10v1_V512))
+            AddInstructionSet(InstructionSet_AVX10v1_V512_X64);
 #endif // TARGET_AMD64
 #ifdef TARGET_X86
 #endif // TARGET_X86
@@ -542,6 +560,18 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
             resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
         if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL))
             resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL_X64);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_X64))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_X64);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V256_X64))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V256);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V256_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V256))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V256_X64);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512_X64))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512_X64);
         if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
             resultflags.RemoveInstructionSet(InstructionSet_SSE);
         if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE))
@@ -614,6 +644,34 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
             resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
         if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
             resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_FMA))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V256);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V256))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
         if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
             resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
         if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
@@ -700,6 +758,34 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
             resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
         if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
             resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_FMA))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V256);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V256))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
+        if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL))
+            resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
         if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
             resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
         if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
@@ -902,6 +988,18 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
             return "AVX512VBMI_VL";
         case InstructionSet_AVX512VBMI_VL_X64 :
             return "AVX512VBMI_VL_X64";
+        case InstructionSet_AVX10v1 :
+            return "AVX10v1";
+        case InstructionSet_AVX10v1_X64 :
+            return "AVX10v1_X64";
+        case InstructionSet_AVX10v1_V256 :
+            return "AVX10v1_V256";
+        case InstructionSet_AVX10v1_V256_X64 :
+            return "AVX10v1_V256_X64";
+        case InstructionSet_AVX10v1_V512 :
+            return "AVX10v1_V512";
+        case InstructionSet_AVX10v1_V512_X64 :
+            return "AVX10v1_V512_X64";
         case InstructionSet_VectorT128 :
             return "VectorT128";
         case InstructionSet_VectorT256 :
@@ -974,6 +1072,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
             return "AVX512VBMI";
         case InstructionSet_AVX512VBMI_VL :
             return "AVX512VBMI_VL";
+        case InstructionSet_AVX10v1 :
+            return "AVX10v1";
+        case InstructionSet_AVX10v1_V256 :
+            return "AVX10v1_V256";
+        case InstructionSet_AVX10v1_V512 :
+            return "AVX10v1_V512";
         case InstructionSet_VectorT128 :
             return "VectorT128";
         case InstructionSet_VectorT256 :
@@ -1044,6 +1148,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
         case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL;
         case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI;
         case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL;
+        case READYTORUN_INSTRUCTION_Avx10v1: return InstructionSet_AVX10v1;
+        case READYTORUN_INSTRUCTION_Avx10v1_V256: return InstructionSet_AVX10v1_V256;
+        case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1_V512;
         case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
         case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
         case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
@@ -1078,6 +1185,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
         case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL;
         case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI;
         case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL;
+        case READYTORUN_INSTRUCTION_Avx10v1: return InstructionSet_AVX10v1;
+        case READYTORUN_INSTRUCTION_Avx10v1_V256: return InstructionSet_AVX10v1_V256;
+        case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1_V512;
         case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
         case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
         case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
diff --git a/src/coreclr/inc/crosscomp.h b/src/coreclr/inc/crosscomp.h
index e688bfaaebb3..aeb061ca5ba4 100644
--- a/src/coreclr/inc/crosscomp.h
+++ b/src/coreclr/inc/crosscomp.h
@@ -26,10 +26,6 @@
 #endif
 #endif // TARGET_WINDOWS
 
-#ifdef TARGET_WASM
-#include "wasm.h"
-#endif
-
 #ifdef UNICODE
 #define MAKE_TARGET_DLLNAME(name) MAKE_TARGET_DLLNAME_W(name)
 #else
diff --git a/src/coreclr/inc/crsttypes_generated.h b/src/coreclr/inc/crsttypes_generated.h
index 70847a5b367f..79864b97db01 100644
--- a/src/coreclr/inc/crsttypes_generated.h
+++ b/src/coreclr/inc/crsttypes_generated.h
@@ -107,34 +107,33 @@ enum CrstType
     CrstRetThunkCache = 89,
     CrstSavedExceptionInfo = 90,
     CrstSaveModuleProfileData = 91,
-    CrstSecurityStackwalkCache = 92,
-    CrstSigConvert = 93,
-    CrstSingleUseLock = 94,
-    CrstSpecialStatics = 95,
-    CrstStackSampler = 96,
-    CrstStaticBoxInit = 97,
-    CrstStressLog = 98,
-    CrstStubCache = 99,
-    CrstStubDispatchCache = 100,
-    CrstStubUnwindInfoHeapSegments = 101,
-    CrstSyncBlockCache = 102,
-    CrstSyncHashLock = 103,
-    CrstSystemBaseDomain = 104,
-    CrstSystemDomain = 105,
-    CrstSystemDomainDelayedUnloadList = 106,
-    CrstThreadIdDispenser = 107,
-    CrstThreadStore = 108,
-    CrstTieredCompilation = 109,
-    CrstTypeEquivalenceMap = 110,
-    CrstTypeIDMap = 111,
-    CrstUMEntryThunkCache = 112,
-    CrstUMEntryThunkFreeListLock = 113,
-    CrstUniqueStack = 114,
-    CrstUnresolvedClassLock = 115,
-    CrstUnwindInfoTableLock = 116,
-    CrstVSDIndirectionCellLock = 117,
-    CrstWrapperTemplate = 118,
-    kNumberOfCrstTypes = 119
+    CrstSigConvert = 92,
+    CrstSingleUseLock = 93,
+    CrstSpecialStatics = 94,
+    CrstStackSampler = 95,
+    CrstStaticBoxInit = 96,
+    CrstStressLog = 97,
+    CrstStubCache = 98,
+    CrstStubDispatchCache = 99,
+    CrstStubUnwindInfoHeapSegments = 100,
+    CrstSyncBlockCache = 101,
+    CrstSyncHashLock = 102,
+    CrstSystemBaseDomain = 103,
+    CrstSystemDomain = 104,
+    CrstSystemDomainDelayedUnloadList = 105,
+    CrstThreadIdDispenser = 106,
+    CrstThreadStore = 107,
+    CrstTieredCompilation = 108,
+    CrstTypeEquivalenceMap = 109,
+    CrstTypeIDMap = 110,
+    CrstUMEntryThunkCache = 111,
+    CrstUMEntryThunkFreeListLock = 112,
+    CrstUniqueStack = 113,
+    CrstUnresolvedClassLock = 114,
+    CrstUnwindInfoTableLock = 115,
+    CrstVSDIndirectionCellLock = 116,
+    CrstWrapperTemplate = 117,
+    kNumberOfCrstTypes = 118
 };
 
 #endif // __CRST_TYPES_INCLUDED
@@ -237,7 +236,6 @@ int g_rgCrstLevelMap[] =
     4,          // CrstRetThunkCache
     3,          // CrstSavedExceptionInfo
     0,          // CrstSaveModuleProfileData
-    0,          // CrstSecurityStackwalkCache
     4,          // CrstSigConvert
     5,          // CrstSingleUseLock
     0,          // CrstSpecialStatics
@@ -361,7 +359,6 @@ LPCSTR g_rgCrstNameMap[] =
     "CrstRetThunkCache",
     "CrstSavedExceptionInfo",
     "CrstSaveModuleProfileData",
-    "CrstSecurityStackwalkCache",
     "CrstSigConvert",
     "CrstSingleUseLock",
     "CrstSpecialStatics",
diff --git a/src/coreclr/inc/crtwrap.h b/src/coreclr/inc/crtwrap.h
index d3ab3a28be7c..59b68d7d4669 100644
--- a/src/coreclr/inc/crtwrap.h
+++ b/src/coreclr/inc/crtwrap.h
@@ -11,11 +11,11 @@
 #define __CrtWrap_h__
 
 #include <stdint.h>
+#include <stddef.h>
 #include <windows.h>
 #include <objbase.h>
 #include "debugmacros.h"
 #include <stdlib.h>
-#include <malloc.h>
 #include <wchar.h>
 #include <stdio.h>
 
diff --git a/src/coreclr/inc/daccess.h b/src/coreclr/inc/daccess.h
index 83d0664aff8c..2d7cd5f34625 100644
--- a/src/coreclr/inc/daccess.h
+++ b/src/coreclr/inc/daccess.h
@@ -561,6 +561,10 @@
 #ifndef NATIVEAOT
 #include <stdint.h>
 
+#if !defined(HOST_WINDOWS)
+#include <pal_mstypes.h>
+#endif
+
 #include "switches.h"
 #include "safemath.h"
 #include "corerror.h"
@@ -568,12 +572,8 @@
 // Keep in sync with the definitions in dbgutil.cpp and createdump.h
 #define DACCESS_TABLE_SYMBOL "g_dacTable"
 
-#ifdef PAL_STDCPP_COMPAT
 #include <type_traits>
-#else
-#include "clr_std/type_traits"
 #include "crosscomp.h"
-#endif
 
 #include <dn-u16.h>
 
@@ -614,8 +614,7 @@ struct DacTableHeader
 // Define TADDR as a non-pointer value so use of it as a pointer
 // will not work properly.  Define it as unsigned so
 // pointer comparisons aren't affected by sign.
-// This requires special casting to ULONG64 to sign-extend if necessary.
-typedef ULONG_PTR TADDR;
+typedef uintptr_t TADDR;
 
 // TSIZE_T used for counts or ranges that need to span the size of a
 // target pointer.  For cross-plat, this may be different than SIZE_T
@@ -807,7 +806,6 @@ struct COR_ILMETHOD* DacGetIlMethod(TADDR methAddr);
 struct _UNWIND_INFO * DacGetUnwindInfo(TADDR taUnwindInfo);
 
 // virtually unwind a CONTEXT out-of-process
-struct _KNONVOLATILE_CONTEXT_POINTERS;
 BOOL DacUnwindStackFrame(T_CONTEXT * pContext, T_KNONVOLATILE_CONTEXT_POINTERS* pContextPointers);
 #endif // FEATURE_EH_FUNCLETS
 
@@ -2128,7 +2126,7 @@ inline void DACCOP_IGNORE(DacCopWarningCode code, const char * szReasonString)
 // Declare TADDR as a non-pointer type so that arithmetic
 // can be done on it directly, as with the DACCESS_COMPILE definition.
 // This also helps expose pointer usage that may need to be changed.
-typedef ULONG_PTR TADDR;
+typedef uintptr_t TADDR;
 
 typedef void* PTR_VOID;
 typedef LPVOID* PTR_PTR_VOID;
@@ -2375,6 +2373,7 @@ typedef DPTR(int32_t)      PTR_int32_t;
 typedef DPTR(uint32_t)     PTR_uint32_t;
 typedef DPTR(uint64_t)     PTR_uint64_t;
 typedef DPTR(uintptr_t)    PTR_uintptr_t;
+typedef DPTR(TADDR)        PTR_TADDR;
 
 #ifndef NATIVEAOT
 typedef ArrayDPTR(BYTE)    PTR_BYTE;
@@ -2396,7 +2395,6 @@ typedef DPTR(ULONG64) PTR_ULONG64;
 typedef DPTR(INT64)   PTR_INT64;
 typedef DPTR(UINT64)  PTR_UINT64;
 typedef DPTR(SIZE_T)  PTR_SIZE_T;
-typedef DPTR(TADDR)   PTR_TADDR;
 typedef DPTR(int)     PTR_int;
 typedef DPTR(BOOL)    PTR_BOOL;
 typedef DPTR(unsigned) PTR_unsigned;
@@ -2439,7 +2437,7 @@ typedef DPTR(IMAGE_TLS_DIRECTORY)   PTR_IMAGE_TLS_DIRECTORY;
 #endif
 
 #ifndef NATIVEAOT
-#if defined(TARGET_X86) && defined(TARGET_UNIX)
+#if defined(TARGET_X86) && defined(FEATURE_EH_FUNCLETS)
 typedef DPTR(struct _UNWIND_INFO)      PTR_UNWIND_INFO;
 #endif
 
diff --git a/src/coreclr/inc/dacprivate.h b/src/coreclr/inc/dacprivate.h
index e8d0be5aba07..ae91e940ce22 100644
--- a/src/coreclr/inc/dacprivate.h
+++ b/src/coreclr/inc/dacprivate.h
@@ -467,7 +467,7 @@ struct MSLAYOUT DacpAssemblyData
 
     HRESULT Request(ISOSDacInterface *sos, CLRDATA_ADDRESS addr)
     {
-        return Request(sos, addr, NULL);
+        return Request(sos, addr, 0);
     }
 };
 
@@ -577,7 +577,7 @@ struct MSLAYOUT DacpMethodDescData
     {
         return sos->GetMethodDescData(
             addr,
-            NULL,   // IP address
+            0,      // IP address
             this,
             0,      // cRejitData
             NULL,   // rejitData[]
diff --git a/src/coreclr/inc/dacvars.h b/src/coreclr/inc/dacvars.h
index b632887e86d0..8f710c8fde12 100644
--- a/src/coreclr/inc/dacvars.h
+++ b/src/coreclr/inc/dacvars.h
@@ -124,8 +124,6 @@ DEFINE_DACVAR(PTR_SString, SString__s_Empty, SString::s_Empty)
 
 DEFINE_DACVAR(INT32, ArrayBase__s_arrayBoundsZero, ArrayBase::s_arrayBoundsZero)
 
-DEFINE_DACVAR(BOOL, StackwalkCache__s_Enabled, StackwalkCache::s_Enabled)
-
 DEFINE_DACVAR(PTR_JITNotification, dac__g_pNotificationTable, ::g_pNotificationTable)
 DEFINE_DACVAR(ULONG32, dac__g_dacNotificationFlags, ::g_dacNotificationFlags)
 DEFINE_DACVAR(PTR_GcNotification, dac__g_pGcNotificationTable, ::g_pGcNotificationTable)
diff --git a/src/coreclr/inc/eetwain.h b/src/coreclr/inc/eetwain.h
index 9beca3f37290..bee2f658ee7c 100644
--- a/src/coreclr/inc/eetwain.h
+++ b/src/coreclr/inc/eetwain.h
@@ -35,8 +35,8 @@
 #define USE_GC_INFO_DECODER
 #endif
 
-#if (defined(TARGET_X86) && !defined(TARGET_UNIX)) || defined(TARGET_AMD64)
-#define HAS_QUICKUNWIND
+#ifdef TARGET_AMD64
+#define HAS_LIGHTUNWIND
 #endif
 
 #define CHECK_APP_DOMAIN    0
@@ -89,7 +89,6 @@ enum ICodeManagerFlags
     ExecutionAborted =  0x0002, // execution of this function has been aborted
                                     // (i.e. it will not continue execution at the
                                     // current location)
-    AbortingCall    =   0x0004, // The current call will never return
     UpdateAllRegs   =   0x0008, // update full register set
     CodeAltered     =   0x0010, // code of that function might be altered
                                     // (e.g. by debugger), need to call EE
@@ -103,6 +102,11 @@ enum ICodeManagerFlags
     NoReportUntracked
                     =   0x0080, // EnumGCRefs/EnumerateLiveSlots should *not* include
                                 // any untracked slots
+
+    LightUnwind     =   0x0100, // Unwind just enough to get return addresses
+    ReportFPBasedSlotsOnly
+                    =   0x0200, // EnumGCRefs/EnumerateLiveSlots should only include
+                                // slots that are based on the frame pointer
 };
 
 //*****************************************************************************
@@ -201,8 +205,7 @@ virtual ULONG32 GetStackParameterSize(EECodeInfo* pCodeInfo) = 0;
 virtual bool UnwindStackFrame(PREGDISPLAY     pContext,
                               EECodeInfo     *pCodeInfo,
                               unsigned        flags,
-                              CodeManState   *pState,
-                              StackwalkCacheUnwindInfo  *pUnwindInfo) = 0;
+                              CodeManState   *pState) = 0;
 
 /*
     Is the function currently at a "GC safe point" ?
@@ -425,11 +428,10 @@ bool UnwindStackFrame(
                 PREGDISPLAY     pContext,
                 EECodeInfo     *pCodeInfo,
                 unsigned        flags,
-                CodeManState   *pState,
-                StackwalkCacheUnwindInfo  *pUnwindInfo);
+                CodeManState   *pState);
 
-#ifdef HAS_QUICKUNWIND
-enum QuickUnwindFlag
+#ifdef HAS_LIGHTUNWIND
+enum LightUnwindFlag
 {
     UnwindCurrentStackFrame,
     EnsureCallerStackFrameIsValid
@@ -441,11 +443,11 @@ enum QuickUnwindFlag
   */
 
 static
-void QuickUnwindStackFrame(
+void LightUnwindStackFrame(
              PREGDISPLAY pRD,
-             StackwalkCacheEntry *pCacheEntry,
-             QuickUnwindFlag flag);
-#endif // HAS_QUICKUNWIND
+             EECodeInfo     *pCodeInfo,
+             LightUnwindFlag flag);
+#endif // HAS_LIGHTUNWIND
 
 /*
     Is the function currently at a "GC safe point" ?
@@ -615,7 +617,7 @@ HRESULT FixContextForEnC(PCONTEXT        pCtx,
 #endif // #ifndef DACCESS_COMPILE
 
 #ifdef FEATURE_EH_FUNCLETS
-    static void EnsureCallerContextIsValid( PREGDISPLAY pRD, StackwalkCacheEntry* pCacheEntry, EECodeInfo * pCodeInfo = NULL );
+    static void EnsureCallerContextIsValid( PREGDISPLAY pRD, EECodeInfo * pCodeInfo = NULL, unsigned flags = 0);
     static size_t GetCallerSp( PREGDISPLAY  pRD );
 #ifdef TARGET_X86
     static size_t GetResumeSp( PCONTEXT  pContext );
@@ -629,124 +631,7 @@ HRESULT FixContextForEnC(PCONTEXT        pCtx,
 };
 
 #ifdef TARGET_X86
-bool UnwindStackFrame(PREGDISPLAY     pContext,
-                      EECodeInfo     *pCodeInfo,
-                      unsigned        flags,
-                      CodeManState   *pState,
-                      StackwalkCacheUnwindInfo  *pUnwindInfo);
-
-size_t DecodeGCHdrInfo(GCInfoToken gcInfoToken,
-                       unsigned    curOffset,
-                       hdrInfo   * infoPtr);
-#endif
-
-/*****************************************************************************
- <TODO>ToDo: Do we want to include JIT/IL/target.h? </TODO>
- */
-
-enum regNum
-{
-        REGI_EAX, REGI_ECX, REGI_EDX, REGI_EBX,
-        REGI_ESP, REGI_EBP, REGI_ESI, REGI_EDI,
-        REGI_COUNT,
-        REGI_NA = REGI_COUNT
-};
-
-/*****************************************************************************
- Register masks
- */
-
-enum RegMask
-{
-    RM_EAX = 0x01,
-    RM_ECX = 0x02,
-    RM_EDX = 0x04,
-    RM_EBX = 0x08,
-    RM_ESP = 0x10,
-    RM_EBP = 0x20,
-    RM_ESI = 0x40,
-    RM_EDI = 0x80,
-
-    RM_NONE = 0x00,
-    RM_ALL = (RM_EAX|RM_ECX|RM_EDX|RM_EBX|RM_ESP|RM_EBP|RM_ESI|RM_EDI),
-    RM_CALLEE_SAVED = (RM_EBP|RM_EBX|RM_ESI|RM_EDI),
-    RM_CALLEE_TRASHED = (RM_ALL & ~RM_CALLEE_SAVED),
-};
-
-/*****************************************************************************
- *
- *  Helper to extract basic info from a method info block.
- */
-
-struct hdrInfo
-{
-    unsigned int        methodSize;     // native code bytes
-    unsigned int        argSize;        // in bytes
-    unsigned int        stackSize;      // including callee saved registers
-    unsigned int        rawStkSize;     // excluding callee saved registers
-    ReturnKind          returnKind;     // The ReturnKind for this method.
-
-    unsigned int        prologSize;
-
-    // Size of the epilogs in the method.
-    // For methods which use CEE_JMP, some epilogs may end with a "ret" instruction
-    // and some may end with a "jmp". The epilogSize reported should be for the
-    // epilog with the smallest size.
-    unsigned int        epilogSize;
-
-    unsigned char       epilogCnt;
-    bool                epilogEnd;      // is the epilog at the end of the method
-
-    bool                ebpFrame;       // locals and arguments addressed relative to EBP
-    bool                doubleAlign;    // is the stack double-aligned? locals addressed relative to ESP, and arguments relative to EBP
-    bool                interruptible;  // intr. at all times (excluding prolog/epilog), not just call sites
-
-    bool                handlers;       // has callable handlers
-    bool                localloc;       // uses localloc
-    bool                editNcontinue;  // has been compiled in EnC mode
-    bool                varargs;        // is this a varargs routine
-    bool                profCallbacks;  // does the method have Enter-Leave callbacks
-    bool                genericsContext;// has a reported generic context parameter
-    bool                genericsContextIsMethodDesc;// reported generic context parameter is methoddesc
-    bool                isSpeculativeStackWalk; // is the stackwalk seeded by an untrusted source (e.g., sampling profiler)?
-
-    // These always includes EBP for EBP-frames and double-aligned-frames
-    RegMask             savedRegMask:8; // which callee-saved regs are saved on stack
-
-    // Count of the callee-saved registers, excluding the frame pointer.
-    // This does not include EBP for EBP-frames and double-aligned-frames.
-    unsigned int        savedRegsCountExclFP;
-
-    unsigned int        untrackedCnt;
-    unsigned int        varPtrTableSize;
-    unsigned int        argTabOffset;   // INVALID_ARGTAB_OFFSET if argtab must be reached by stepping through ptr tables
-    unsigned int        gsCookieOffset; // INVALID_GS_COOKIE_OFFSET if there is no GuardStack cookie
-
-    unsigned int        syncStartOffset; // start/end code offset of the protected region in synchronized methods.
-    unsigned int        syncEndOffset;   // INVALID_SYNC_OFFSET if there not synchronized method
-    unsigned int        syncEpilogStart; // The start of the epilog. Synchronized methods are guaranteed to have no more than one epilog.
-    unsigned int        revPInvokeOffset; // INVALID_REV_PINVOKE_OFFSET if there is no Reverse PInvoke frame
-
-    enum { NOT_IN_PROLOG = -1, NOT_IN_EPILOG = -1 };
-
-    int                 prologOffs;     // NOT_IN_PROLOG if not in prolog
-    int                 epilogOffs;     // NOT_IN_EPILOG if not in epilog. It is never 0
-
-    //
-    // Results passed back from scanArgRegTable
-    //
-    regNum              thisPtrResult;  // register holding "this"
-    RegMask             regMaskResult;  // registers currently holding GC ptrs
-    RegMask            iregMaskResult;  // iptr qualifier for regMaskResult
-    unsigned            argHnumResult;
-    PTR_CBYTE            argTabResult;  // Table of encoded offsets of pending ptr args
-    unsigned              argTabBytes;  // Number of bytes in argTabResult[]
-
-    // These next two are now large structs (i.e 132 bytes each)
-
-    ptrArgTP            argMaskResult;  // pending arguments mask
-    ptrArgTP           iargMaskResult;  // iptr qualifier for argMaskResult
-};
+#include "gc_unwind_x86.h"
 
 /*****************************************************************************
   How the stackwalkers buffer will be interpreted
@@ -757,6 +642,9 @@ struct CodeManStateBuf
     DWORD       hdrInfoSize;
     hdrInfo     hdrInfoBody;
 };
+
+#endif
+
 //*****************************************************************************
 #endif // _EETWAIN_H
 //*****************************************************************************
diff --git a/src/coreclr/inc/gc_unwind_x86.h b/src/coreclr/inc/gc_unwind_x86.h
new file mode 100644
index 000000000000..e5be6b2e4aa4
--- /dev/null
+++ b/src/coreclr/inc/gc_unwind_x86.h
@@ -0,0 +1,138 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef _UNWIND_X86_H
+#define _UNWIND_X86_H
+
+// This file is shared between CoreCLR and NativeAOT. Some of the differences are handled
+// with the FEATURE_NATIVEAOT and FEATURE_EH_FUNCLETS defines. There are three main methods
+// that are used by both runtimes - DecodeGCHdrInfo, UnwindStackFrameX86, and EnumGcRefsX86.
+//
+// The IN_EH_FUNCLETS and IN_EH_FUNCLETS_COMMA macros are used to specify some parameters
+// for the above methods that are specific for a certain runtime or configuration.
+#ifdef FEATURE_EH_FUNCLETS
+#define IN_EH_FUNCLETS(a) a
+#define IN_EH_FUNCLETS_COMMA(a) a,
+#else
+#define IN_EH_FUNCLETS(a)
+#define IN_EH_FUNCLETS_COMMA(a)
+#endif
+
+enum regNum
+{
+        REGI_EAX, REGI_ECX, REGI_EDX, REGI_EBX,
+        REGI_ESP, REGI_EBP, REGI_ESI, REGI_EDI,
+        REGI_COUNT,
+        REGI_NA = REGI_COUNT
+};
+
+/*****************************************************************************
+ Register masks
+ */
+
+enum RegMask
+{
+    RM_EAX = 0x01,
+    RM_ECX = 0x02,
+    RM_EDX = 0x04,
+    RM_EBX = 0x08,
+    RM_ESP = 0x10,
+    RM_EBP = 0x20,
+    RM_ESI = 0x40,
+    RM_EDI = 0x80,
+
+    RM_NONE = 0x00,
+    RM_ALL = (RM_EAX|RM_ECX|RM_EDX|RM_EBX|RM_ESP|RM_EBP|RM_ESI|RM_EDI),
+    RM_CALLEE_SAVED = (RM_EBP|RM_EBX|RM_ESI|RM_EDI),
+    RM_CALLEE_TRASHED = (RM_ALL & ~RM_CALLEE_SAVED),
+};
+
+/*****************************************************************************
+ *
+ *  Helper to extract basic info from a method info block.
+ */
+
+struct hdrInfo
+{
+    unsigned int        methodSize;     // native code bytes
+    unsigned int        argSize;        // in bytes
+    unsigned int        stackSize;      // including callee saved registers
+    unsigned int        rawStkSize;     // excluding callee saved registers
+    ReturnKind          returnKind;     // The ReturnKind for this method.
+
+    unsigned int        prologSize;
+
+    // Size of the epilogs in the method.
+    // For methods which use CEE_JMP, some epilogs may end with a "ret" instruction
+    // and some may end with a "jmp". The epilogSize reported should be for the
+    // epilog with the smallest size.
+    unsigned int        epilogSize;
+
+    unsigned char       epilogCnt;
+    bool                epilogEnd;      // is the epilog at the end of the method
+
+    bool                ebpFrame;       // locals and arguments addressed relative to EBP
+    bool                doubleAlign;    // is the stack double-aligned? locals addressed relative to ESP, and arguments relative to EBP
+    bool                interruptible;  // intr. at all times (excluding prolog/epilog), not just call sites
+
+    bool                handlers;       // has callable handlers
+    bool                localloc;       // uses localloc
+    bool                editNcontinue;  // has been compiled in EnC mode
+    bool                varargs;        // is this a varargs routine
+    bool                profCallbacks;  // does the method have Enter-Leave callbacks
+    bool                genericsContext;// has a reported generic context parameter
+    bool                genericsContextIsMethodDesc;// reported generic context parameter is methoddesc
+    bool                isSpeculativeStackWalk; // is the stackwalk seeded by an untrusted source (e.g., sampling profiler)?
+
+    // These always includes EBP for EBP-frames and double-aligned-frames
+    RegMask             savedRegMask:8; // which callee-saved regs are saved on stack
+
+    // Count of the callee-saved registers, excluding the frame pointer.
+    // This does not include EBP for EBP-frames and double-aligned-frames.
+    unsigned int        savedRegsCountExclFP;
+
+    unsigned int        untrackedCnt;
+    unsigned int        varPtrTableSize;
+    unsigned int        argTabOffset;   // INVALID_ARGTAB_OFFSET if argtab must be reached by stepping through ptr tables
+    unsigned int        gsCookieOffset; // INVALID_GS_COOKIE_OFFSET if there is no GuardStack cookie
+
+    unsigned int        syncStartOffset; // start/end code offset of the protected region in synchronized methods.
+    unsigned int        syncEndOffset;   // INVALID_SYNC_OFFSET if there not synchronized method
+    unsigned int        syncEpilogStart; // The start of the epilog. Synchronized methods are guaranteed to have no more than one epilog.
+    unsigned int        revPInvokeOffset; // INVALID_REV_PINVOKE_OFFSET if there is no Reverse PInvoke frame
+
+    enum { NOT_IN_PROLOG = -1, NOT_IN_EPILOG = -1 };
+
+    int                 prologOffs;     // NOT_IN_PROLOG if not in prolog
+    int                 epilogOffs;     // NOT_IN_EPILOG if not in epilog. It is never 0
+
+    //
+    // Results passed back from scanArgRegTable
+    //
+    regNum              thisPtrResult;  // register holding "this"
+    RegMask             regMaskResult;  // registers currently holding GC ptrs
+    RegMask            iregMaskResult;  // iptr qualifier for regMaskResult
+    unsigned            argHnumResult;
+    PTR_CBYTE            argTabResult;  // Table of encoded offsets of pending ptr args
+    unsigned              argTabBytes;  // Number of bytes in argTabResult[]
+
+    // These next two are now large structs (i.e 132 bytes each)
+
+    ptrArgTP            argMaskResult;  // pending arguments mask
+    ptrArgTP           iargMaskResult;  // iptr qualifier for argMaskResult
+};
+
+bool UnwindStackFrameX86(PREGDISPLAY     pContext,
+                         PTR_CBYTE       methodStart,
+                         DWORD           curOffs,
+                         hdrInfo *       info,
+                         PTR_CBYTE       table,
+                         IN_EH_FUNCLETS_COMMA(PTR_CBYTE       funcletStart)
+                         IN_EH_FUNCLETS_COMMA(bool            isFunclet)
+                         bool            updateAllRegs);
+
+size_t DecodeGCHdrInfo(GCInfoToken gcInfoToken,
+                       unsigned    curOffset,
+                       hdrInfo   * infoPtr);
+
+#endif // _UNWIND_X86_H
diff --git a/src/coreclr/inc/gcinfo.h b/src/coreclr/inc/gcinfo.h
index 66933b10f044..16bff25525a9 100644
--- a/src/coreclr/inc/gcinfo.h
+++ b/src/coreclr/inc/gcinfo.h
@@ -13,9 +13,6 @@
 /*****************************************************************************/
 
 #include "daccess.h"
-#include "windef.h"     // For BYTE
-
-// Some declarations in this file are used on non-x86 platforms, but most are x86-specific.
 
 // Use the lower 2 bits of the offsets stored in the tables
 // to encode properties
@@ -23,14 +20,15 @@
 const unsigned        OFFSET_MASK  = 0x3;  // mask to access the low 2 bits
 
 //
-//  Note for untracked locals the flags allowed are "pinned" and "byref"
-//   and for tracked locals the flags allowed are "this" and "byref"
 //  Note that these definitions should also match the definitions of
 //   GC_CALL_INTERIOR and GC_CALL_PINNED in VM/gc.h
 //
 const unsigned  byref_OFFSET_FLAG  = 0x1;  // the offset is an interior ptr
 const unsigned pinned_OFFSET_FLAG  = 0x2;  // the offset is a pinned ptr
-#if !defined(TARGET_X86) || !defined(FEATURE_EH_FUNCLETS)
+#if defined(TARGET_X86)
+// JIT32_ENCODER has additional restriction on x86 without funclets: 
+// - for untracked locals the flags allowed are "pinned" and "byref"
+// - for tracked locals the flags allowed are "this" and "byref"
 const unsigned   this_OFFSET_FLAG  = 0x2;  // the offset is "this"
 #endif
 
@@ -57,9 +55,17 @@ const unsigned   this_OFFSET_FLAG  = 0x2;  // the offset is "this"
 struct GCInfoToken
 {
     PTR_VOID Info;
-    UINT32 Version;
+    uint32_t Version;
+
+#ifdef FEATURE_NATIVEAOT
+    GCInfoToken(PTR_VOID info)
+    {
+        Info = info;
+        Version = GCINFO_VERSION;
+    }
+#endif
 
-    static UINT32 ReadyToRunVersionToGcInfoVersion(UINT32 readyToRunMajorVersion)
+    static uint32_t ReadyToRunVersionToGcInfoVersion(uint32_t readyToRunMajorVersion)
     {
         // GcInfo version is current from  ReadyToRun version 2.0
         return GCINFO_VERSION;
diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h
index 34af8c530556..b42f5aae8f60 100644
--- a/src/coreclr/inc/gcinfodecoder.h
+++ b/src/coreclr/inc/gcinfodecoder.h
@@ -31,7 +31,17 @@
 
 #ifdef FEATURE_NATIVEAOT
 
+#include "gcinfo.h"
+
 typedef ArrayDPTR(const uint8_t) PTR_CBYTE;
+#ifdef TARGET_X86
+// Bridge few additional pointer types used in x86 unwinding code
+typedef DPTR(DWORD) PTR_DWORD;
+typedef DPTR(WORD) PTR_WORD;
+typedef DPTR(BYTE) PTR_BYTE;
+typedef DPTR(signed char) PTR_SBYTE;
+typedef DPTR(INT32) PTR_INT32;
+#endif
 
 #define LIMITED_METHOD_CONTRACT
 #define SUPPORTS_DAC
@@ -50,22 +60,12 @@ typedef ArrayDPTR(const uint8_t) PTR_CBYTE;
 #define SSIZE_T intptr_t
 #define LPVOID void*
 
+#define CHECK_APP_DOMAIN    0
+
 typedef void * OBJECTREF;
 
 #define GET_CALLER_SP(pREGDISPLAY) ((TADDR)0)
 
-struct GCInfoToken
-{
-    PTR_VOID Info;
-    UINT32 Version;
-
-    GCInfoToken(PTR_VOID info)
-    {
-        Info = info;
-        Version = 2;
-    }
-};
-
 #else // FEATURE_NATIVEAOT
 
 // Stuff from cgencpu.h:
@@ -185,6 +185,9 @@ enum ICodeManagerFlags
     NoReportUntracked
                     =   0x0080, // EnumGCRefs/EnumerateLiveSlots should *not* include
                                 // any untracked slots
+    ReportFPBasedSlotsOnly
+                    =   0x0200, // EnumGCRefs/EnumerateLiveSlots should only include
+                                // slots that are based on the frame pointer
 };
 
 #endif // !_strike_h
@@ -674,11 +677,12 @@ class GcInfoDecoder
     {
         _ASSERTE(slotIndex < slotDecoder.GetNumSlots());
         const GcSlotDesc* pSlot = slotDecoder.GetSlotDesc(slotIndex);
+        bool reportFpBasedSlotsOnly = (inputFlags & ReportFPBasedSlotsOnly);
 
         if(slotIndex < slotDecoder.GetNumRegisters())
         {
             UINT32 regNum = pSlot->Slot.RegisterNumber;
-            if( reportScratchSlots || !IsScratchRegister( regNum, pRD ) )
+            if( ( reportScratchSlots || !IsScratchRegister( regNum, pRD ) ) && !reportFpBasedSlotsOnly )
             {
                 ReportRegisterToGC(
                             regNum,
@@ -698,7 +702,9 @@ class GcInfoDecoder
         {
             INT32 spOffset = pSlot->Slot.Stack.SpOffset;
             GcStackSlotBase spBase = pSlot->Slot.Stack.Base;
-            if( reportScratchSlots || !IsScratchStackSlot(spOffset, spBase, pRD) )
+
+            if( ( reportScratchSlots || !IsScratchStackSlot(spOffset, spBase, pRD) ) &&
+                ( !reportFpBasedSlotsOnly || (GC_FRAMEREG_REL == spBase ) ) )
             {
                 ReportStackSlotToGC(
                             spOffset,
diff --git a/src/coreclr/inc/gcmsg.inl b/src/coreclr/inc/gcmsg.inl
index 14e81ad5f25d..4171b7e8fa91 100644
--- a/src/coreclr/inc/gcmsg.inl
+++ b/src/coreclr/inc/gcmsg.inl
@@ -44,13 +44,13 @@
     static const char* gcDetailedStartMsg()
     {
         STATIC_CONTRACT_LEAF;
-        return "*GC* %d(gen0:%d)(%d)(alloc: %zd)(%s)(%d)";
+        return "*GC* %d(gen0:%d)(%d)(alloc: %zd)(%s)(%d)(%d)";
     }
 
     static const char* gcDetailedEndMsg()
     {
         STATIC_CONTRACT_LEAF;
-        return "*EGC* %zd(gen0:%zd)(%zd)(%d)(%s)(%s)(%s)(ml: %d->%d)";
+        return "*EGC* %zd(gen0:%zd)(%zd)(%d)(%s)(%s)(%s)(ml: %d->%d)\n";
     }
 
     static const char* gcStartMarkMsg()
diff --git a/src/coreclr/inc/holder.h b/src/coreclr/inc/holder.h
index 16551b141ca1..984260308d04 100644
--- a/src/coreclr/inc/holder.h
+++ b/src/coreclr/inc/holder.h
@@ -11,13 +11,8 @@
 #include "volatile.h"
 #include "palclr.h"
 
-#ifdef PAL_STDCPP_COMPAT
 #include <utility>
 #include <type_traits>
-#else
-#include "clr_std/utility"
-#include "clr_std/type_traits"
-#endif
 
 #if defined(FEATURE_COMINTEROP) && !defined(STRIKE)
 #include <Activation.h>
diff --git a/src/coreclr/inc/icorjitinfoimpl_generated.h b/src/coreclr/inc/icorjitinfoimpl_generated.h
index 8dd993f5b478..2348162d9485 100644
--- a/src/coreclr/inc/icorjitinfoimpl_generated.h
+++ b/src/coreclr/inc/icorjitinfoimpl_generated.h
@@ -104,6 +104,7 @@ CORINFO_CLASS_HANDLE getDefaultEqualityComparerClass(
 
 void expandRawHandleIntrinsic(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_GENERICHANDLE_RESULT* pResult) override;
 
 bool isIntrinsicType(
@@ -297,12 +298,14 @@ bool getReadyToRunHelper(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
           CORINFO_LOOKUP_KIND* pGenericLookupKind,
           CorInfoHelpFunc id,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_CONST_LOOKUP* pLookup) override;
 
 void getReadyToRunDelegateCtorHelper(
           CORINFO_RESOLVED_TOKEN* pTargetMethod,
           mdToken targetConstraint,
           CORINFO_CLASS_HANDLE delegateType,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_LOOKUP* pLookup) override;
 
 CorInfoInitClassResult initClass(
@@ -438,6 +441,11 @@ void reportRichMappings(
           ICorDebugInfo::RichOffsetMapping* mappings,
           uint32_t numMappings) override;
 
+void reportMetadata(
+          const char* key,
+          const void* value,
+          size_t length) override;
+
 void* allocateArray(
           size_t cBytes) override;
 
@@ -499,6 +507,10 @@ bool getSystemVAmd64PassStructInRegisterDescriptor(
           CORINFO_CLASS_HANDLE structHnd,
           SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr) override;
 
+void getSwiftLowering(
+          CORINFO_CLASS_HANDLE structHnd,
+          CORINFO_SWIFT_LOWERING* pLowering) override;
+
 uint32_t getLoongArch64PassStructInRegisterFlags(
           CORINFO_CLASS_HANDLE structHnd) override;
 
@@ -551,6 +563,7 @@ CORINFO_FIELD_HANDLE embedFieldHandle(
 void embedGenericHandle(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
           bool fEmbedParent,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_GENERICHANDLE_RESULT* pResult) override;
 
 void getLocationOfThisType(
diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h
index 5d63f9df9a80..0e9f6e2940bc 100644
--- a/src/coreclr/inc/jiteeversionguid.h
+++ b/src/coreclr/inc/jiteeversionguid.h
@@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
 #define GUID_DEFINED
 #endif // !GUID_DEFINED
 
-constexpr GUID JITEEVersionIdentifier = { /* b8a05f18-503e-47e4-9193-931c50b151d1 */
-    0xb8a05f18,
-    0x503e,
-    0x47e4,
-    {0x91, 0x93, 0x93, 0x1c, 0x50, 0xb1, 0x51, 0xd1}
+constexpr GUID JITEEVersionIdentifier = { /* 3c216494-65f8-49e2-b69a-7f272193bcc6 */
+    0x3c216494,
+    0x65f8,
+    0x49e2,
+    {0xb6, 0x9a, 0x7f, 0x27, 0x21, 0x93, 0xbc, 0xc6}
   };
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h
index b417edc2a946..8a7798c166b7 100644
--- a/src/coreclr/inc/jithelpers.h
+++ b/src/coreclr/inc/jithelpers.h
@@ -55,18 +55,18 @@
     JITHELPER(CORINFO_HELP_ULMOD,               JIT_ULMod,          CORINFO_HELP_SIG_16_STACK)
     JITHELPER(CORINFO_HELP_LNG2DBL,             JIT_Lng2Dbl,        CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_ULNG2DBL,            JIT_ULng2Dbl,       CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_DBL2INT,      JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DBL2INT,             JIT_Dbl2Int,        CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_DBL2INT_OVF,         JIT_Dbl2IntOvf,     CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_DBL2LNG,      JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DBL2LNG,             JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_DBL2LNG_OVF,         JIT_Dbl2LngOvf,     CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT,     JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DBL2UINT,            JIT_Dbl2UInt,       CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_DBL2UINT_OVF,        JIT_Dbl2UIntOvf,    CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_DBL2ULNG,            JIT_Dbl2ULng,       CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_DBL2ULNG_OVF,        JIT_Dbl2ULngOvf,    CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_FLTREM,              JIT_FltRem,         CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_DBLREM,              JIT_DblRem,         CORINFO_HELP_SIG_16_STACK)
-    JITHELPER(CORINFO_HELP_FLTROUND,            JIT_FloatRound,     CORINFO_HELP_SIG_8_STACK)
-    JITHELPER(CORINFO_HELP_DBLROUND,            JIT_DoubleRound,    CORINFO_HELP_SIG_16_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_FLTROUND,     NULL,               CORINFO_HELP_SIG_8_STACK)
+    DYNAMICJITHELPER(CORINFO_HELP_DBLROUND,     NULL,               CORINFO_HELP_SIG_16_STACK)
 
     // Allocating a new object
     JITHELPER(CORINFO_HELP_NEWFAST,                     JIT_New,    CORINFO_HELP_SIG_REG_ONLY)
@@ -204,7 +204,7 @@
     JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE,              JIT_GetSharedNonGCThreadStaticBase, CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR,          JIT_GetSharedGCThreadStaticBase, CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR,       JIT_GetSharedNonGCThreadStaticBase, CORINFO_HELP_SIG_REG_ONLY)
-    JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS,    JIT_GetSharedGCThreadStaticBaseDynamicClass, CORINFO_HELP_SIG_REG_ONLY)    
+    JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS,    JIT_GetSharedGCThreadStaticBaseDynamicClass, CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS, JIT_GetSharedNonGCThreadStaticBaseDynamicClass, CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED,    JIT_GetSharedGCThreadStaticBaseOptimized, CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED,       JIT_GetSharedNonGCThreadStaticBaseOptimized, CORINFO_HELP_SIG_REG_ONLY)
@@ -236,13 +236,10 @@
     DYNAMICJITHELPER(CORINFO_HELP_INIT_PINVOKE_FRAME,  NULL,        CORINFO_HELP_SIG_REG_ONLY)
 #endif
 
-#ifdef TARGET_X86
-    JITHELPER(CORINFO_HELP_MEMSET,              NULL,               CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB)
-    JITHELPER(CORINFO_HELP_MEMCPY,              NULL,               CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB)
-#else
-    JITHELPER(CORINFO_HELP_MEMSET,              JIT_MemSet,         CORINFO_HELP_SIG_REG_ONLY)
-    JITHELPER(CORINFO_HELP_MEMCPY,              JIT_MemCpy,         CORINFO_HELP_SIG_REG_ONLY)
-#endif
+    DYNAMICJITHELPER(CORINFO_HELP_MEMSET, NULL, CORINFO_HELP_SIG_REG_ONLY)
+    DYNAMICJITHELPER(CORINFO_HELP_MEMZERO, NULL, CORINFO_HELP_SIG_REG_ONLY)
+    DYNAMICJITHELPER(CORINFO_HELP_MEMCPY, NULL, CORINFO_HELP_SIG_REG_ONLY)
+    JITHELPER(CORINFO_HELP_NATIVE_MEMSET, Jit_NativeMemSet, CORINFO_HELP_SIG_REG_ONLY)
 
     // Generics
     JITHELPER(CORINFO_HELP_RUNTIMEHANDLE_METHOD,    JIT_GenericHandleMethod,        CORINFO_HELP_SIG_REG_ONLY)
diff --git a/src/coreclr/inc/loaderheap.h b/src/coreclr/inc/loaderheap.h
index 216668315cbf..b155d0188b84 100644
--- a/src/coreclr/inc/loaderheap.h
+++ b/src/coreclr/inc/loaderheap.h
@@ -158,7 +158,7 @@ struct LoaderHeapEvent;
 inline UINT32 GetStubCodePageSize()
 {
 #if defined(TARGET_ARM64) && defined(TARGET_UNIX)
-    return max(16*1024, GetOsPageSize());
+    return max(16*1024u, GetOsPageSize());
 #elif defined(TARGET_ARM)
     return 4096; // ARM is special as the 32bit instruction set does not easily permit a 16KB offset
 #else
diff --git a/src/coreclr/inc/nibblemapmacros.h b/src/coreclr/inc/nibblemapmacros.h
index 9554b5d1dd9c..b9da5b39f234 100644
--- a/src/coreclr/inc/nibblemapmacros.h
+++ b/src/coreclr/inc/nibblemapmacros.h
@@ -26,7 +26,6 @@
 
 #if defined(HOST_64BIT)
 // TODO: bump up the windows CODE_ALIGN to 16 and iron out any nibble map bugs that exist.
-// TODO: there is something wrong with USE_INDIRECT_CODEHEADER with CODE_ALIGN=16
 # define CODE_ALIGN             4
 # define LOG2_CODE_ALIGN        2
 #else
diff --git a/src/coreclr/inc/palclr_win.h b/src/coreclr/inc/palclr_win.h
index a9ee78e32f42..be0b725e1a68 100644
--- a/src/coreclr/inc/palclr_win.h
+++ b/src/coreclr/inc/palclr_win.h
@@ -140,8 +140,4 @@
 typedef HMODULE NATIVE_LIBRARY_HANDLE;
 #endif // HOST_WINDOWS
 
-#ifndef FALLTHROUGH
-#define FALLTHROUGH __fallthrough
-#endif // FALLTHROUGH
-
 #endif	// __PALCLR_WIN_H__
diff --git a/src/coreclr/inc/random.h b/src/coreclr/inc/random.h
index 0bd2164cbb10..6a8d7001b204 100644
--- a/src/coreclr/inc/random.h
+++ b/src/coreclr/inc/random.h
@@ -13,10 +13,7 @@
 // 2) It can have multiple instantiations with different seeds
 // 3) It behaves the same regardless of whether we build with VC++ or GCC
 //
-// If you are working in the VM, we have a convenience method: code:GetRandomInt.  This usess a thread-local
-// Random instance if a Thread object is available, and otherwise falls back to a global instance
-// with a spin-lock.
-//
+// If you are working in the VM, we have a convenience method: code:GetRandomInt.
 
 #ifndef _CLRRANDOM_H_
 #define _CLRRANDOM_H_
@@ -27,7 +24,7 @@
 // Forbid the use of srand()/rand(), as these are globally shared facilities and our use of them would
 // interfere with native user code in the same process. This override is not compatible with stl headers.
 //
-#if !defined(DO_NOT_DISABLE_RAND) && !defined(USE_STL)
+#if !defined(DO_NOT_DISABLE_RAND)
 
 #ifdef srand
 #undef srand
@@ -39,7 +36,7 @@
 #endif
 #define rand Do_not_use_rand
 
-#endif //!DO_NOT_DISABLE_RAND && !USE_STL
+#endif //!DO_NOT_DISABLE_RAND
 
 
 class CLRRandom
diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h
index b3128cb00e4b..88219146a123 100644
--- a/src/coreclr/inc/readytorun.h
+++ b/src/coreclr/inc/readytorun.h
@@ -20,7 +20,7 @@
 // If you update this, ensure you run `git grep MINIMUM_READYTORUN_MAJOR_VERSION`
 // and handle pending work.
 #define READYTORUN_MAJOR_VERSION 0x0009
-#define READYTORUN_MINOR_VERSION 0x0001
+#define READYTORUN_MINOR_VERSION 0x0002
 
 #define MINIMUM_READYTORUN_MAJOR_VERSION 0x009
 
@@ -33,6 +33,8 @@
 // R2R Version 8.0 Changes the alignment of the Int128 type
 // R2R Version 9.0 adds support for the Vector512 type
 // R2R Version 9.1 adds new helpers to allocate objects on frozen segments
+// R2R Version 9.2 adds MemZero and NativeMemSet helpers
+
 
 struct READYTORUN_CORE_HEADER
 {
@@ -182,7 +184,6 @@ enum ReadyToRunMethodSigFlags
 
 enum ReadyToRunFieldSigFlags
 {
-    READYTORUN_FIELD_SIG_IndexInsteadOfToken    = 0x08,
     READYTORUN_FIELD_SIG_MemberRefToken         = 0x10,
     READYTORUN_FIELD_SIG_OwnerType              = 0x40,
 };
@@ -325,7 +326,9 @@ enum ReadyToRunHelper
     READYTORUN_HELPER_Stelem_Ref                = 0x38,
     READYTORUN_HELPER_Ldelema_Ref               = 0x39,
 
-    READYTORUN_HELPER_MemSet                    = 0x40,
+    READYTORUN_HELPER_MemZero                   = 0x3E,
+    READYTORUN_HELPER_MemSet                    = 0x3F,
+    READYTORUN_HELPER_NativeMemSet              = 0x40,
     READYTORUN_HELPER_MemCpy                    = 0x41,
 
     // PInvoke helpers
@@ -441,10 +444,6 @@ enum ReadyToRunHelper
     READYTORUN_HELPER_StackProbe                = 0x111,
 
     READYTORUN_HELPER_GetCurrentManagedThreadId = 0x112,
-
-    // Array helpers for use with native ints
-    READYTORUN_HELPER_Stelem_Ref_I                = 0x113,
-    READYTORUN_HELPER_Ldelema_Ref_I               = 0x114,
 };
 
 #include "readytoruninstructionset.h"
diff --git a/src/coreclr/inc/readytorunhelpers.h b/src/coreclr/inc/readytorunhelpers.h
index 8691f9b9cb8c..bbb586e8eb4a 100644
--- a/src/coreclr/inc/readytorunhelpers.h
+++ b/src/coreclr/inc/readytorunhelpers.h
@@ -29,6 +29,8 @@ HELPER(READYTORUN_HELPER_Stelem_Ref,                CORINFO_HELP_ARRADDR_ST,
 HELPER(READYTORUN_HELPER_Ldelema_Ref,               CORINFO_HELP_LDELEMA_REF,                       )
 
 HELPER(READYTORUN_HELPER_MemSet,                    CORINFO_HELP_MEMSET,                            )
+HELPER(READYTORUN_HELPER_MemZero,                   CORINFO_HELP_MEMZERO,                           )
+HELPER(READYTORUN_HELPER_NativeMemSet,              CORINFO_HELP_NATIVE_MEMSET,                     )
 HELPER(READYTORUN_HELPER_MemCpy,                    CORINFO_HELP_MEMCPY,                            )
 
 HELPER(READYTORUN_HELPER_LogMethodEnter,            CORINFO_HELP_BBT_FCN_ENTER,                     )
diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h
index 4ec3b030ae5b..fe388c04a60f 100644
--- a/src/coreclr/inc/readytoruninstructionset.h
+++ b/src/coreclr/inc/readytoruninstructionset.h
@@ -52,6 +52,9 @@ enum ReadyToRunInstructionSet
     READYTORUN_INSTRUCTION_VectorT512=41,
     READYTORUN_INSTRUCTION_Rcpc2=42,
     READYTORUN_INSTRUCTION_Sve=43,
+    READYTORUN_INSTRUCTION_Avx10v1=44,
+    READYTORUN_INSTRUCTION_Avx10v1_V256=45,
+    READYTORUN_INSTRUCTION_Avx10v1_V512=46,
 
 };
 
diff --git a/src/coreclr/inc/regdisp.h b/src/coreclr/inc/regdisp.h
index 4832791ebfa5..ec47b9019dbc 100644
--- a/src/coreclr/inc/regdisp.h
+++ b/src/coreclr/inc/regdisp.h
@@ -131,6 +131,12 @@ inline LPVOID GetRegdisplayFPAddress(REGDISPLAY *display) {
     return (LPVOID)display->GetEbpLocation();
 }
 
+inline void SetRegdisplayPCTAddr(REGDISPLAY *display, TADDR addr)
+{
+    display->PCTAddr = addr;
+    display->ControlPC = *PTR_PCODE(addr);
+}
+
 
 // This function tells us if the given stack pointer is in one of the frames of the functions called by the given frame
 inline BOOL IsInCalleesFrames(REGDISPLAY *display, LPVOID stackPointer) {
@@ -318,7 +324,7 @@ struct REGDISPLAY : public REGDISPLAY_BASE {
         memset(this, 0, sizeof(REGDISPLAY));
 
         // Setup the pointer to ControlPC field
-        pPC = &ControlPC;
+        pPC = (DWORD *)&ControlPC;
     }
 };
 
@@ -447,7 +453,7 @@ inline void FillContextPointers(PT_KNONVOLATILE_CONTEXT_POINTERS pCtxPtrs, PT_CO
 }
 #endif // FEATURE_EH_FUNCLETS
 
-inline void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, PT_CONTEXT pCallerCtx = NULL)
+inline void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, PT_CONTEXT pCallerCtx = NULL, bool fLightUnwind = false)
 {
     WRAPPER_NO_CONTRACT;
 
@@ -497,6 +503,16 @@ inline void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, PT_CONTEXT pC
         pRD->IsCallerSPValid      = TRUE;        // Don't add usage of this field.  This is only temporary.
     }
 
+#ifdef DEBUG_REGDISPLAY
+    pRD->_pThread = NULL;
+#endif // DEBUG_REGDISPLAY
+
+    // This will setup the PC and SP
+    SyncRegDisplayToCurrentContext(pRD);
+
+    if (fLightUnwind)
+        return;
+
     FillContextPointers(&pRD->ctxPtrsOne, pctx);
 
 #if defined(TARGET_ARM)
@@ -550,12 +566,6 @@ inline void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, PT_CONTEXT pC
     pRD->volatileCurrContextPointers.T6 = &pctx->T6;
 #endif // TARGET_RISCV64
 
-#ifdef DEBUG_REGDISPLAY
-    pRD->_pThread = NULL;
-#endif // DEBUG_REGDISPLAY
-
-    // This will setup the PC and SP
-    SyncRegDisplayToCurrentContext(pRD);
 #endif // !FEATURE_EH_FUNCLETS
 }
 
diff --git a/src/coreclr/inc/safemath.h b/src/coreclr/inc/safemath.h
index fcd51af3de8c..ff1fcbee7811 100644
--- a/src/coreclr/inc/safemath.h
+++ b/src/coreclr/inc/safemath.h
@@ -31,11 +31,11 @@
 
 #include "static_assert.h"
 
-#ifdef PAL_STDCPP_COMPAT
 #include <type_traits>
-#else
-#include "clr_std/type_traits"
-#endif
+
+#ifdef FEATURE_PAL
+#include "pal_mstypes.h"
+#endif // FEATURE_PAL
 
 //==================================================================
 // Semantics: if val can be represented as the exact same value
diff --git a/src/coreclr/inc/sospriv.idl b/src/coreclr/inc/sospriv.idl
index a13760f7732a..c377df57a153 100644
--- a/src/coreclr/inc/sospriv.idl
+++ b/src/coreclr/inc/sospriv.idl
@@ -43,12 +43,14 @@ typedef unsigned int size_t;
 typedef int ModuleMapType;
 typedef int VCSHeapType;
 typedef int LoaderHeapKind;
+typedef int MethodTableInitializationFlags;
 cpp_quote("#endif")
 
 
 cpp_quote("typedef enum { TYPEDEFTOMETHODTABLE, TYPEREFTOMETHODTABLE } ModuleMapType;")
 cpp_quote("typedef enum {IndcellHeap, LookupHeap, ResolveHeap, DispatchHeap, CacheEntryHeap, VtableHeap} VCSHeapType;")
 cpp_quote("typedef enum {LoaderHeapKindNormal = 0, LoaderHeapKindExplicitControl = 1} LoaderHeapKind;")
+cpp_quote("typedef enum {MethodTableInitialized = 1, MethodTableInitializationFailed = 2} MethodTableInitializationFlags;")
 cpp_quote("typedef enum {FreeUnknownRegion = 0, FreeGlobalHugeRegion = 1, FreeGlobalRegion = 2, FreeRegion = 3, FreeSohSegment = 4, FreeUohSegment = 5 } FreeRegionKind;")
 
 typedef void (*MODULEMAPTRAVERSE)(UINT index, CLRDATA_ADDRESS methodTable,LPVOID token);
@@ -505,3 +507,15 @@ interface ISOSDacInterface13 : IUnknown
     HRESULT GetGCFreeRegions(ISOSMemoryEnum **ppEnum);
     HRESULT LockedFlush();
 }
+
+[
+    object,
+    local,
+    uuid(9aa22aca-6dc6-4a0c-b4e0-70d2416b9837)
+]
+interface ISOSDacInterface14 : IUnknown
+{
+    HRESULT GetStaticBaseAddress(CLRDATA_ADDRESS methodTable, CLRDATA_ADDRESS *nonGCStaticsAddress, CLRDATA_ADDRESS *GCStaticsAddress);
+    HRESULT GetThreadStaticBaseAddress(CLRDATA_ADDRESS methodTable, CLRDATA_ADDRESS thread, CLRDATA_ADDRESS *nonGCStaticsAddress, CLRDATA_ADDRESS *GCStaticsAddress);
+    HRESULT GetMethodTableInitializationFlags(CLRDATA_ADDRESS methodTable, MethodTableInitializationFlags *initializationStatus);
+}
diff --git a/src/coreclr/inc/sstring.h b/src/coreclr/inc/sstring.h
index 14244d779d82..1b58f299bef7 100644
--- a/src/coreclr/inc/sstring.h
+++ b/src/coreclr/inc/sstring.h
@@ -574,7 +574,6 @@ class EMPTY_BASES_DECL SString : private SBuffer
 public:
     BOOL LoadResource(CCompRC::ResourceCategory eCategory, int resourceID);
     HRESULT LoadResourceAndReturnHR(CCompRC::ResourceCategory eCategory, int resourceID);
-    HRESULT LoadResourceAndReturnHR(CCompRC* pResourceDLL, CCompRC::ResourceCategory eCategory, int resourceID);
     BOOL FormatMessage(DWORD dwFlags, LPCVOID lpSource, DWORD dwMessageId, DWORD dwLanguageId,
                        const SString &arg1 = Empty(), const SString &arg2 = Empty(),
                        const SString &arg3 = Empty(), const SString &arg4 = Empty(),
diff --git a/src/coreclr/inc/staticcontract.h b/src/coreclr/inc/staticcontract.h
index ecb528829b70..df26383593e7 100644
--- a/src/coreclr/inc/staticcontract.h
+++ b/src/coreclr/inc/staticcontract.h
@@ -93,8 +93,6 @@
 #define ANNOTATION_FN_MODE_COOPERATIVE      __annotation(W("MODE_COOPERATIVE ") SCAN_WIDEN(__FUNCTION__))
 #define ANNOTATION_FN_MODE_PREEMPTIVE       __annotation(W("MODE_PREEMPTIVE ") SCAN_WIDEN(__FUNCTION__))
 #define ANNOTATION_FN_MODE_ANY              __annotation(W("MODE_ANY ") SCAN_WIDEN(__FUNCTION__))
-#define ANNOTATION_FN_HOST_NOCALLS          __annotation(W("HOST_NOCALLS ") SCAN_WIDEN(__FUNCTION__))
-#define ANNOTATION_FN_HOST_CALLS            __annotation(W("HOST_CALLS ") SCAN_WIDEN(__FUNCTION__))
 
 #define ANNOTATION_ENTRY_POINT              __annotation(W("SO_EP ") SCAN_WIDEN(__FUNCTION__))
 
@@ -135,9 +133,6 @@
 #define ANNOTATION_TRY_MARKER               { }
 #define ANNOTATION_CATCH_MARKER             { }
 
-#define ANNOTATION_FN_HOST_NOCALLS          { }
-#define ANNOTATION_FN_HOST_CALLS            { }
-
 #define ANNOTATION_FN_SPECIAL_HOLDER_BEGIN  { }
 #define ANNOTATION_SPECIAL_HOLDER_END       { }
 #define ANNOTATION_SPECIAL_HOLDER_CALLER_NEEDS_DYNAMIC_CONTRACT { }
@@ -155,8 +150,6 @@
 #define ANNOTATION_FN_MODE_COOPERATIVE      { }
 #define ANNOTATION_FN_MODE_PREEMPTIVE       { }
 #define ANNOTATION_FN_MODE_ANY              { }
-#define ANNOTATION_FN_HOST_NOCALLS          { }
-#define ANNOTATION_FN_HOST_CALLS            { }
 
 #define ANNOTATION_SUPPORTS_DAC             { }
 #define ANNOTATION_SUPPORTS_DAC_HOST_ONLY   { }
@@ -179,8 +172,6 @@
 #define STATIC_CONTRACT_FORBID_FAULT        ANNOTATION_FN_FORBID_FAULT
 #define STATIC_CONTRACT_GC_TRIGGERS         ANNOTATION_FN_GC_TRIGGERS
 #define STATIC_CONTRACT_GC_NOTRIGGER        ANNOTATION_FN_GC_NOTRIGGER
-#define STATIC_CONTRACT_HOST_NOCALLS        ANNOTATION_FN_HOST_NOCALLS
-#define STATIC_CONTRACT_HOST_CALLS          ANNOTATION_FN_HOST_CALLS
 
 #define STATIC_CONTRACT_SUPPORTS_DAC        ANNOTATION_SUPPORTS_DAC
 #define STATIC_CONTRACT_SUPPORTS_DAC_HOST_ONLY ANNOTATION_SUPPORTS_DAC_HOST_ONLY
diff --git a/src/coreclr/inc/stdmacros.h b/src/coreclr/inc/stdmacros.h
index 7e4ae79c535c..79f9225321fe 100644
--- a/src/coreclr/inc/stdmacros.h
+++ b/src/coreclr/inc/stdmacros.h
@@ -159,9 +159,9 @@
     #define DBG_ADDR(ptr)      (DWORD)((UINT_PTR)(ptr))
 #endif // HOST_64BIT
 
-#ifdef TARGET_ARM
+#if defined(HOST_ARM) || defined(HOST_RISCV64)
     #define ALIGN_ACCESS        ((1<<LOG2_PTRSIZE)-1)
-#endif
+#endif // HOST_ARM || HOST_RISCV64
 
 
 #ifndef ALLOC_ALIGN_CONSTANT
diff --git a/src/coreclr/inc/utilcode.h b/src/coreclr/inc/utilcode.h
index fe5db13f6b97..55713550aedc 100644
--- a/src/coreclr/inc/utilcode.h
+++ b/src/coreclr/inc/utilcode.h
@@ -10,14 +10,16 @@
 #ifndef __UtilCode_h__
 #define __UtilCode_h__
 
+#include <type_traits>
+#include <algorithm>
+#include <stdio.h>
+#include <limits.h>
+
 #include "crtwrap.h"
 #include "winwrap.h"
 #include <wchar.h>
-#include <stdio.h>
-#include <malloc.h>
 #include <ole2.h>
 #include <oleauto.h>
-#include <limits.h>
 #include "clrtypes.h"
 #include "safewrap.h"
 #include "volatile.h"
@@ -29,12 +31,6 @@
 #include "safemath.h"
 #include "new.hpp"
 
-#ifdef PAL_STDCPP_COMPAT
-#include <type_traits>
-#else
-#include "clr_std/type_traits"
-#endif
-
 #include "contract.h"
 
 #include <minipal/utils.h>
@@ -224,7 +220,7 @@ typedef LPSTR   LPUTF8;
 #define MAKE_UTF8PTR_FROMWIDE_NOTHROW(ptrname, widestr) \
     CQuickBytes __qb##ptrname; \
     int __l##ptrname = (int)u16_strlen(widestr); \
-    LPUTF8 ptrname = 0; \
+    LPUTF8 ptrname = NULL; \
     if (__l##ptrname <= MAKE_MAX_LENGTH) { \
         __l##ptrname = (int)((__l##ptrname + 1) * 2 * sizeof(char)); \
         ptrname = (LPUTF8) __qb##ptrname.AllocNoThrow(__l##ptrname); \
@@ -240,12 +236,12 @@ typedef LPSTR   LPUTF8;
                     if (WszWideCharToMultiByte(CP_UTF8, 0, widestr, -1, ptrname, __lsize##ptrname, NULL, NULL) != 0) { \
                         ptrname[__l##ptrname] = 0; \
                     } else { \
-                        ptrname = 0; \
+                        ptrname = NULL; \
                     } \
                 } \
             } \
             else { \
-                ptrname = 0; \
+                ptrname = NULL; \
             } \
         } \
     } \
@@ -255,7 +251,7 @@ typedef LPSTR   LPUTF8;
 #define MAKE_WIDEPTR_FROMUTF8N_NOTHROW(ptrname, utf8str, n8chrs) \
     CQuickBytes __qb##ptrname; \
     int __l##ptrname; \
-    LPWSTR ptrname = 0; \
+    LPWSTR ptrname = NULL; \
     __l##ptrname = WszMultiByteToWideChar(CP_UTF8, 0, utf8str, n8chrs, 0, 0); \
     if (__l##ptrname <= MAKE_MAX_LENGTH) { \
         ptrname = (LPWSTR) __qb##ptrname.AllocNoThrow((__l##ptrname+1)*sizeof(WCHAR));  \
@@ -263,7 +259,7 @@ typedef LPSTR   LPUTF8;
             if (WszMultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, utf8str, n8chrs, ptrname, __l##ptrname) != 0) { \
                 ptrname[__l##ptrname] = 0; \
             } else { \
-                ptrname = 0; \
+                ptrname = NULL; \
             } \
         } \
     }
@@ -302,28 +298,6 @@ inline WCHAR* FormatInteger(WCHAR* str, size_t strCount, const char* fmt, I v)
     return str;
 }
 
-//*****************************************************************************
-// Placement new is used to new and object at an exact location.  The pointer
-// is simply returned to the caller without actually using the heap.  The
-// advantage here is that you cause the ctor() code for the object to be run.
-// This is ideal for heaps of C++ objects that need to get init'd multiple times.
-// Example:
-//      void        *pMem = GetMemFromSomePlace();
-//      Foo *p = new (pMem) Foo;
-//      DoSomething(p);
-//      p->~Foo();
-//*****************************************************************************
-#ifndef __PLACEMENT_NEW_INLINE
-#define __PLACEMENT_NEW_INLINE
-inline void *__cdecl operator new(size_t, void *_P)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-    return (_P);
-}
-#endif // __PLACEMENT_NEW_INLINE
-
-
 /********************************************************************************/
 /* portability helpers */
 
@@ -1920,7 +1894,7 @@ class CHashTableAndData : public CHashTable
     ~CHashTableAndData()
     {
         WRAPPER_NO_CONTRACT;
-        if (m_pcEntries != NULL)
+        if (m_pcEntries != (TADDR)NULL)
             MemMgr::Free((BYTE*)m_pcEntries, MemMgr::RoundSize(m_iEntries * m_iEntrySize));
     }
 
@@ -2100,7 +2074,7 @@ int CHashTableAndData<MemMgr>::Grow()   // 1 if successful, 0 if not.
     int         iCurSize;               // Current size in bytes.
     int         iEntries;               // New # of entries.
 
-    _ASSERTE(m_pcEntries != NULL);
+    _ASSERTE(m_pcEntries != (TADDR)NULL);
     _ASSERTE(m_iFree == UINT32_MAX);
 
     // Compute the current size and new # of entries.
@@ -3934,37 +3908,6 @@ inline T* InterlockedCompareExchangeT(
     return InterlockedCompareExchangeT(destination, exchange, static_cast<T*>(comparand));
 }
 
-// NULL pointer variants of the above to avoid having to cast NULL
-// to the appropriate pointer type.
-template <typename T>
-inline T* InterlockedExchangeT(
-    T* volatile *   target,
-    int             value) // When NULL is provided as argument.
-{
-    //STATIC_ASSERT(value == 0);
-    return InterlockedExchangeT(target, nullptr);
-}
-
-template <typename T>
-inline T* InterlockedCompareExchangeT(
-    T* volatile *   destination,
-    int             exchange,  // When NULL is provided as argument.
-    T*              comparand)
-{
-    //STATIC_ASSERT(exchange == 0);
-    return InterlockedCompareExchangeT(destination, nullptr, comparand);
-}
-
-template <typename T>
-inline T* InterlockedCompareExchangeT(
-    T* volatile *   destination,
-    T*              exchange,
-    int             comparand) // When NULL is provided as argument.
-{
-    //STATIC_ASSERT(comparand == 0);
-    return InterlockedCompareExchangeT(destination, exchange, nullptr);
-}
-
 #undef InterlockedExchangePointer
 #define InterlockedExchangePointer Use_InterlockedExchangeT
 #undef InterlockedCompareExchangePointer
diff --git a/src/coreclr/inc/winwrap.h b/src/coreclr/inc/winwrap.h
index 4cf0b9655ad1..6235e4b5a181 100644
--- a/src/coreclr/inc/winwrap.h
+++ b/src/coreclr/inc/winwrap.h
@@ -254,13 +254,4 @@ WszCreateProcess(
     LPPROCESS_INFORMATION lpProcessInformation
     );
 
-#ifdef HOST_WINDOWS
-
-//
-// Workaround for https://github.com/microsoft/WindowsAppSDK/issues/4074
-// Windows SDK is missing InterlockedCompareExchange8 definition.
-//
-#define InterlockedCompareExchange8 _InterlockedCompareExchange8
-
-#endif // HOST_WINDOWS
 #endif  // __WIN_WRAP_H__
diff --git a/src/coreclr/jit/.clang-format b/src/coreclr/jit/.clang-format
index 1e3930f7379d..307b1d7128bd 100644
--- a/src/coreclr/jit/.clang-format
+++ b/src/coreclr/jit/.clang-format
@@ -1,80 +1,131 @@
 ---
-Language:     Cpp
+Language: Cpp
 AccessModifierOffset: -4
 AlignAfterOpenBracket: Align
-AlignConsecutiveAssignments: true
-AlignConsecutiveDeclarations: true
-AlignEscapedNewlinesLeft: false
-AlignOperands:   true
-AlignTrailingComments: true
+
+AlignConsecutiveAssignments:
+  Enabled: true
+  AcrossEmptyLines: false
+  AcrossComments: false
+  AlignCompound: false
+  PadOperators: true
+
+AlignConsecutiveBitFields:
+  Enabled: true
+  AcrossEmptyLines: false
+  AcrossComments: false
+
+AlignConsecutiveDeclarations:
+  Enabled: true
+  AcrossEmptyLines: false
+  AcrossComments: false
+
+AlignConsecutiveMacros:
+  Enabled: true
+  AcrossEmptyLines: false
+  AcrossComments: false
+
+AlignEscapedNewlines: Right
+AlignOperands: true
+
+AlignTrailingComments:
+  Kind: Always
+  OverEmptyLines: 0
+
+AllowAllArgumentsOnNextLine: true
 AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
+AllowShortBlocksOnASingleLine: Never
 AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: Empty
-AllowShortIfStatementsOnASingleLine: false
+AllowShortEnumsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLambdasOnASingleLine: Empty
 AllowShortLoopsOnASingleLine: false
 AlwaysBreakAfterDefinitionReturnType: None
 AlwaysBreakBeforeMultilineStrings: false
 AlwaysBreakTemplateDeclarations: true
 BinPackArguments: true
 BinPackParameters: false
+
+BreakBeforeBraces: Custom
 BraceWrapping:
-  AfterClass:      true
-  AfterControlStatement: true
-  AfterEnum:       false
-  AfterFunction:   true
-  AfterNamespace:  false
-  AfterObjCDeclaration: false
-  AfterStruct:     true
-  AfterUnion:      true
-  BeforeCatch:     true
-  BeforeElse:      true
-  IndentBraces:    false
+  AfterCaseLabel: true
+  AfterClass: true
+  AfterControlStatement: Always
+  AfterEnum: true
+  AfterFunction: true
+  AfterNamespace: true
+  AfterStruct: true
+  AfterUnion: true
+  AfterExternBlock: true
+  BeforeCatch: true
+  BeforeElse: true
+  BeforeLambdaBody: false
+  BeforeWhile: false
+  IndentBraces: false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+
 BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Allman
 BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: true
-ColumnLimit:   120
-CommentPragmas:  '^ IWYU pragma:'
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
+BreakConstructorInitializers: BeforeComma
+BreakInheritanceList: BeforeComma
+BreakStringLiterals: false
+
+ColumnLimit: 120
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: false
 ConstructorInitializerIndentWidth: 4
 ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true
 DerivePointerAlignment: false
-DisableFormat:   false
+DisableFormat: false
+
+EmptyLineAfterAccessModifier: Leave
+EmptyLineBeforeAccessModifier: Leave
 ExperimentalAutoDetectBinPacking: false
-ForEachMacros:   [  ]
+ForEachMacros: [  ]
+IndentAccessModifiers: false
+IndentCaseBlocks: false
 IndentCaseLabels: true
-IndentWidth:     4
+IndentExternBlock: false
+IndentGotoLabels: true
+IndentPPDirectives: None
+IndentWidth: 4
 IndentWrappedFunctionNames: false
+
+InsertNewlineAtEOF: true
 KeepEmptyLinesAtTheStartOfBlocks: true
+LambdaBodyIndentation: OuterScope
 MacroBlockBegin: ''
 MacroBlockEnd:   ''
 MaxEmptyLinesToKeep: 1
 NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: true
+
 PenaltyBreakBeforeFirstCallParameter: 400
 PenaltyBreakComment: 50
 PenaltyBreakFirstLessLess: 500
 PenaltyBreakString: 1000
 PenaltyExcessCharacter: 1000000
 PenaltyReturnTypeOnItsOwnLine: 100000
+
 PointerAlignment: Left
 ReflowComments:  true
-SortIncludes:    false
+SortIncludes:    Never
+
 SpaceAfterCStyleCast: false
 SpaceBeforeAssignmentOperators: true
 SpaceBeforeParens: ControlStatements
 SpaceInEmptyParentheses: false
 SpacesBeforeTrailingComments: 1
-SpacesInAngles:  false
+SpacesInAngles: false
 SpacesInContainerLiterals: true
 SpacesInCStyleCastParentheses: false
 SpacesInParentheses: false
 SpacesInSquareBrackets: false
-Standard:        Cpp11
+
+Standard:        Latest
 TabWidth:        4
 UseTab:          Never
 ...
diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt
index 0732d71382da..efb3e4dd46b7 100644
--- a/src/coreclr/jit/CMakeLists.txt
+++ b/src/coreclr/jit/CMakeLists.txt
@@ -81,6 +81,7 @@ function(create_standalone_jit)
   if ((TARGETDETAILS_ARCH STREQUAL "x64") OR (TARGETDETAILS_ARCH STREQUAL "arm64") OR ((TARGETDETAILS_ARCH STREQUAL "x86") AND NOT (TARGETDETAILS_OS STREQUAL "unix")))
     target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_SIMD)
     target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_HW_INTRINSICS)
+    target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MASKED_HW_INTRINSICS)
   endif ()
 
   if ((TARGETDETAILS_ARCH STREQUAL "wasm64") OR (TARGETDETAILS_ARCH STREQUAL "wasm32"))
@@ -103,6 +104,7 @@ endfunction()
 if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND NOT CLR_CMAKE_HOST_UNIX))
   add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_SIMD>)
   add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_HW_INTRINSICS>)
+  add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_MASKED_HW_INTRINSICS>)
 endif ()
 
 # JIT_BUILD disables certain PAL_TRY debugging features
@@ -113,12 +115,12 @@ if(CLR_CMAKE_TARGET_WIN32)
 endif(CLR_CMAKE_TARGET_WIN32)
 
 set( JIT_SOURCES
+  abi.cpp
   alloc.cpp
   assertionprop.cpp
   bitset.cpp
   block.cpp
   buildstring.cpp
-  layout.cpp
   codegencommon.cpp
   codegenlinear.cpp
   compiler.cpp
@@ -147,20 +149,23 @@ set( JIT_SOURCES
   gentree.cpp
   gschecks.cpp
   hashbv.cpp
-  hwintrinsic.cpp
+  helperexpansion.cpp
   hostallocator.cpp
+  hwintrinsic.cpp
   ifconversion.cpp
-  helperexpansion.cpp
-  indirectcalltransformer.cpp
-  importercalls.cpp
   importer.cpp
+  importercalls.cpp
   importervectorization.cpp
+  indirectcalltransformer.cpp
+  inductionvariableopts.cpp
   inline.cpp
   inlinepolicy.cpp
   instr.cpp
   jitconfig.cpp
   jiteh.cpp
   jithashtable.cpp
+  jitmetadata.cpp
+  layout.cpp
   lclmorph.cpp
   lclvars.cpp
   likelyclass.cpp
@@ -175,7 +180,6 @@ set( JIT_SOURCES
   objectalloc.cpp
   optcse.cpp
   optimizebools.cpp
-  switchrecognition.cpp
   optimizer.cpp
   patchpoint.cpp
   phase.cpp
@@ -188,6 +192,7 @@ set( JIT_SOURCES
   regalloc.cpp
   registerargconvention.cpp
   regset.cpp
+  scev.cpp
   scopeinfo.cpp
   sideeffects.cpp
   sm.cpp
@@ -196,6 +201,7 @@ set( JIT_SOURCES
   ssabuilder.cpp
   ssarenamestate.cpp
   stacklevelsetter.cpp
+  switchrecognition.cpp
   treelifeupdater.cpp
   unwind.cpp
   utils.cpp
@@ -275,6 +281,7 @@ set( JIT_ARM64_SOURCES
   codegenarm64.cpp
   codegenarm64test.cpp
   emitarm64.cpp
+  emitarm64sve.cpp
   lowerarmarch.cpp
   lsraarmarch.cpp
   lsraarm64.cpp
@@ -324,6 +331,7 @@ set( JIT_HEADERS
   ../inc/corjitflags.h
   ../inc/corjithost.h
   _typeinfo.h
+  abi.h
   alloc.h
   arraystack.h
   bitset.h
@@ -377,6 +385,8 @@ set( JIT_HEADERS
   jitexpandarray.h
   jitgcinfo.h
   jithashtable.h
+  jitmetadata.h
+  jitmetadatalist.h
   jitpch.h
   jitstd.h
   lir.h
@@ -401,6 +411,7 @@ set( JIT_HEADERS
   registerargconvention.h
   register.h
   regset.h
+  scev.h
   sideeffects.h
   simd.h
   simdashwintrinsic.h
@@ -417,7 +428,6 @@ set( JIT_HEADERS
   targetamd64.h
   targetarm.h
   targetarm64.h
-  tinyarray.h
   treelifeupdater.h
   typelist.h
   unwind.h
diff --git a/src/coreclr/jit/ICorJitInfo_names_generated.h b/src/coreclr/jit/ICorJitInfo_names_generated.h
index 5fe1f716d474..30c499518e00 100644
--- a/src/coreclr/jit/ICorJitInfo_names_generated.h
+++ b/src/coreclr/jit/ICorJitInfo_names_generated.h
@@ -108,6 +108,7 @@ DEF_CLR_API(setBoundaries)
 DEF_CLR_API(getVars)
 DEF_CLR_API(setVars)
 DEF_CLR_API(reportRichMappings)
+DEF_CLR_API(reportMetadata)
 DEF_CLR_API(allocateArray)
 DEF_CLR_API(freeArray)
 DEF_CLR_API(getArgNext)
@@ -124,6 +125,7 @@ DEF_CLR_API(printMethodName)
 DEF_CLR_API(getMethodNameFromMetadata)
 DEF_CLR_API(getMethodHash)
 DEF_CLR_API(getSystemVAmd64PassStructInRegisterDescriptor)
+DEF_CLR_API(getSwiftLowering)
 DEF_CLR_API(getLoongArch64PassStructInRegisterFlags)
 DEF_CLR_API(getRISCV64PassStructInRegisterFlags)
 DEF_CLR_API(getThreadTLSIndex)
diff --git a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp
index cae9b5d7b39e..77af720739ec 100644
--- a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp
+++ b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp
@@ -202,10 +202,11 @@ CORINFO_CLASS_HANDLE WrapICorJitInfo::getDefaultEqualityComparerClass(
 
 void WrapICorJitInfo::expandRawHandleIntrinsic(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     API_ENTER(expandRawHandleIntrinsic);
-    wrapHnd->expandRawHandleIntrinsic(pResolvedToken, pResult);
+    wrapHnd->expandRawHandleIntrinsic(pResolvedToken, callerHandle, pResult);
     API_LEAVE(expandRawHandleIntrinsic);
 }
 
@@ -687,10 +688,11 @@ bool WrapICorJitInfo::getReadyToRunHelper(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
           CORINFO_LOOKUP_KIND* pGenericLookupKind,
           CorInfoHelpFunc id,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_CONST_LOOKUP* pLookup)
 {
     API_ENTER(getReadyToRunHelper);
-    bool temp = wrapHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, pLookup);
+    bool temp = wrapHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, callerHandle, pLookup);
     API_LEAVE(getReadyToRunHelper);
     return temp;
 }
@@ -699,10 +701,11 @@ void WrapICorJitInfo::getReadyToRunDelegateCtorHelper(
           CORINFO_RESOLVED_TOKEN* pTargetMethod,
           mdToken targetConstraint,
           CORINFO_CLASS_HANDLE delegateType,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_LOOKUP* pLookup)
 {
     API_ENTER(getReadyToRunDelegateCtorHelper);
-    wrapHnd->getReadyToRunDelegateCtorHelper(pTargetMethod, targetConstraint, delegateType, pLookup);
+    wrapHnd->getReadyToRunDelegateCtorHelper(pTargetMethod, targetConstraint, delegateType, callerHandle, pLookup);
     API_LEAVE(getReadyToRunDelegateCtorHelper);
 }
 
@@ -1028,6 +1031,16 @@ void WrapICorJitInfo::reportRichMappings(
     API_LEAVE(reportRichMappings);
 }
 
+void WrapICorJitInfo::reportMetadata(
+          const char* key,
+          const void* value,
+          size_t length)
+{
+    API_ENTER(reportMetadata);
+    wrapHnd->reportMetadata(key, value, length);
+    API_LEAVE(reportMetadata);
+}
+
 void* WrapICorJitInfo::allocateArray(
           size_t cBytes)
 {
@@ -1183,6 +1196,15 @@ bool WrapICorJitInfo::getSystemVAmd64PassStructInRegisterDescriptor(
     return temp;
 }
 
+void WrapICorJitInfo::getSwiftLowering(
+          CORINFO_CLASS_HANDLE structHnd,
+          CORINFO_SWIFT_LOWERING* pLowering)
+{
+    API_ENTER(getSwiftLowering);
+    wrapHnd->getSwiftLowering(structHnd, pLowering);
+    API_LEAVE(getSwiftLowering);
+}
+
 uint32_t WrapICorJitInfo::getLoongArch64PassStructInRegisterFlags(
           CORINFO_CLASS_HANDLE structHnd)
 {
@@ -1311,10 +1333,11 @@ CORINFO_FIELD_HANDLE WrapICorJitInfo::embedFieldHandle(
 void WrapICorJitInfo::embedGenericHandle(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
           bool fEmbedParent,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     API_ENTER(embedGenericHandle);
-    wrapHnd->embedGenericHandle(pResolvedToken, fEmbedParent, pResult);
+    wrapHnd->embedGenericHandle(pResolvedToken, fEmbedParent, callerHandle, pResult);
     API_LEAVE(embedGenericHandle);
 }
 
diff --git a/src/coreclr/jit/_typeinfo.h b/src/coreclr/jit/_typeinfo.h
index 42526eeb8de4..9285535b5531 100644
--- a/src/coreclr/jit/_typeinfo.h
+++ b/src/coreclr/jit/_typeinfo.h
@@ -41,25 +41,34 @@ class typeInfo
 private:
     var_types m_type;
 
-    union {
+    union
+    {
         CORINFO_CLASS_HANDLE m_cls;               // Valid, but not always available, for TYP_REFs.
         methodPointerInfo*   m_methodPointerInfo; // Valid only for function pointers.
     };
 
 public:
-    typeInfo() : m_type(TYP_UNDEF), m_cls(NO_CLASS_HANDLE)
+    typeInfo()
+        : m_type(TYP_UNDEF)
+        , m_cls(NO_CLASS_HANDLE)
     {
     }
 
-    typeInfo(var_types type) : m_type(type), m_cls(NO_CLASS_HANDLE)
+    typeInfo(var_types type)
+        : m_type(type)
+        , m_cls(NO_CLASS_HANDLE)
     {
     }
 
-    typeInfo(CORINFO_CLASS_HANDLE cls) : m_type(TYP_REF), m_cls(cls)
+    typeInfo(CORINFO_CLASS_HANDLE cls)
+        : m_type(TYP_REF)
+        , m_cls(cls)
     {
     }
 
-    typeInfo(methodPointerInfo* methodPointerInfo) : m_type(TYP_I_IMPL), m_methodPointerInfo(methodPointerInfo)
+    typeInfo(methodPointerInfo* methodPointerInfo)
+        : m_type(TYP_I_IMPL)
+        , m_methodPointerInfo(methodPointerInfo)
     {
         assert(methodPointerInfo != nullptr);
         assert(methodPointerInfo->m_token.hMethod != nullptr);
diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp
new file mode 100644
index 000000000000..91f74fca03c8
--- /dev/null
+++ b/src/coreclr/jit/abi.cpp
@@ -0,0 +1,410 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "jitpch.h"
+#include "abi.h"
+
+//-----------------------------------------------------------------------------
+// IsPassedInRegister:
+//   Check if this segment is passed in a register.
+//
+// Return Value:
+//   True if this is passed in a register.
+//
+bool ABIPassingSegment::IsPassedInRegister() const
+{
+    return m_register != REG_NA;
+}
+
+//-----------------------------------------------------------------------------
+// IsPassedOnStack:
+//   Check if this segment is passed on the stack.
+//
+// Return Value:
+//   True if this is passed on the stack.
+//
+bool ABIPassingSegment::IsPassedOnStack() const
+{
+    return m_register == REG_NA;
+}
+
+//-----------------------------------------------------------------------------
+// GetRegister:
+//   Get the register that this segment is passed in.
+//
+// Return Value:
+//   The register.
+//
+regNumber ABIPassingSegment::GetRegister() const
+{
+    assert(IsPassedInRegister());
+    return m_register;
+}
+
+//-----------------------------------------------------------------------------
+// GetRegisterMask:
+//   Get the mask of registers that this segment is passed in.
+//
+// Return Value:
+//   The register mask.
+//
+regMaskTP ABIPassingSegment::GetRegisterMask() const
+{
+    assert(IsPassedInRegister());
+    regMaskTP reg = genRegMask(m_register);
+
+#ifdef TARGET_ARM
+    if (genIsValidFloatReg(m_register) && (Size == 8))
+    {
+        reg |= genRegMask(REG_NEXT(m_register));
+    }
+#endif
+
+    return reg;
+}
+
+//-----------------------------------------------------------------------------
+// GetStackOffset:
+//   Get the stack offset where this segment is passed.
+//
+// Return Value:
+//   Offset relative to the first stack argument.
+//
+unsigned ABIPassingSegment::GetStackOffset() const
+{
+    assert(IsPassedOnStack());
+    return m_stackOffset;
+}
+
+//-----------------------------------------------------------------------------
+// GetRegisterStoreType:
+//   Return a type that can be used to store from the register this segment is
+//   in, taking the segment's size into account.
+//
+// Return Value:
+//   A type that matches ABIPassingSegment::Size and the register type.
+//
+var_types ABIPassingSegment::GetRegisterStoreType() const
+{
+    assert(IsPassedInRegister());
+    if (genIsValidFloatReg(m_register))
+    {
+        switch (Size)
+        {
+            case 4:
+                return TYP_FLOAT;
+            case 8:
+                return TYP_DOUBLE;
+#ifdef FEATURE_SIMD
+            case 16:
+                return TYP_SIMD16;
+#endif
+            default:
+                return TYP_UNDEF;
+        }
+    }
+    else
+    {
+        switch (Size)
+        {
+            case 1:
+                return TYP_UBYTE;
+            case 2:
+                return TYP_USHORT;
+            case 4:
+                return TYP_INT;
+#ifdef TARGET_64BIT
+            case 8:
+                return TYP_LONG;
+#endif
+            default:
+                return TYP_UNDEF;
+        }
+    }
+}
+
+//-----------------------------------------------------------------------------
+// InRegister:
+//   Create an ABIPassingSegment representing that a segment is passed in a
+//   register.
+//
+// Parameters:
+//   reg    - The register the segment is passed in
+//   offset - The offset of the segment that is passed in the register
+//   size   - The size of the segment passed in the register
+//
+// Return Value:
+//   New instance of ABIPassingSegment.
+//
+ABIPassingSegment ABIPassingSegment::InRegister(regNumber reg, unsigned offset, unsigned size)
+{
+    assert(reg != REG_NA);
+    ABIPassingSegment segment;
+    segment.m_register    = reg;
+    segment.m_stackOffset = 0;
+    segment.Offset        = offset;
+    segment.Size          = size;
+    return segment;
+}
+
+//-----------------------------------------------------------------------------
+// OnStack:
+//   Create an ABIPassingSegment representing that a segment is passed on the
+//   stack.
+//
+// Parameters:
+//   stackOffset - Offset relative to the first stack parameter/argument
+//   offset      - The offset of the segment that is passed in the register
+//   size        - The size of the segment passed in the register
+//
+// Return Value:
+//   New instance of ABIPassingSegment.
+//
+ABIPassingSegment ABIPassingSegment::OnStack(unsigned stackOffset, unsigned offset, unsigned size)
+{
+    ABIPassingSegment segment;
+    segment.m_register    = REG_NA;
+    segment.m_stackOffset = stackOffset;
+    segment.Offset        = offset;
+    segment.Size          = size;
+    return segment;
+}
+
+//-----------------------------------------------------------------------------
+// HasAnyRegisterSegment:
+//   Check if any part of this value is passed in a register.
+//
+// Return Value:
+//   True if so.
+//
+bool ABIPassingInformation::HasAnyRegisterSegment() const
+{
+    for (unsigned i = 0; i < NumSegments; i++)
+    {
+        if (Segments[i].IsPassedInRegister())
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+//-----------------------------------------------------------------------------
+// HasAnyStackSegment:
+//   Check if any part of this value is passed on the stack.
+//
+// Return Value:
+//   True if so.
+//
+bool ABIPassingInformation::HasAnyStackSegment() const
+{
+    for (unsigned i = 0; i < NumSegments; i++)
+    {
+        if (Segments[i].IsPassedOnStack())
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+//-----------------------------------------------------------------------------
+// HasExactlyOneStackSegment:
+//   Check if this value is passed as a single stack segment.
+//
+// Return Value:
+//   True if so.
+//
+bool ABIPassingInformation::HasExactlyOneStackSegment() const
+{
+    return (NumSegments == 1) && Segments[0].IsPassedOnStack();
+}
+
+//-----------------------------------------------------------------------------
+// IsSplitAcrossRegistersAndStack:
+//   Check if this ABIPassingInformation represents passing a value in both
+//   registers and on stack.
+//
+// Return Value:
+//   True if the value is passed in both registers and on stack.
+//
+bool ABIPassingInformation::IsSplitAcrossRegistersAndStack() const
+{
+    bool anyReg   = false;
+    bool anyStack = false;
+    for (unsigned i = 0; i < NumSegments; i++)
+    {
+        anyReg |= Segments[i].IsPassedInRegister();
+        anyStack |= Segments[i].IsPassedOnStack();
+    }
+    return anyReg && anyStack;
+}
+
+//-----------------------------------------------------------------------------
+// FromSegment:
+//   Create ABIPassingInformation from a single segment.
+//
+// Parameters:
+//   comp    - Compiler instance
+//   segment - The single segment that represents the passing information
+//
+// Return Value:
+//   An instance of ABIPassingInformation.
+//
+ABIPassingInformation ABIPassingInformation::FromSegment(Compiler* comp, const ABIPassingSegment& segment)
+{
+    ABIPassingInformation info;
+    info.NumSegments = 1;
+    info.Segments    = new (comp, CMK_ABI) ABIPassingSegment(segment);
+    return info;
+}
+
+#ifdef DEBUG
+//-----------------------------------------------------------------------------
+// Dump:
+//   Dump the ABIPassingInformation to stdout.
+//
+void ABIPassingInformation::Dump() const
+{
+    if (NumSegments != 1)
+    {
+        printf("%u segments\n", NumSegments);
+    }
+
+    for (unsigned i = 0; i < NumSegments; i++)
+    {
+        if (NumSegments > 1)
+        {
+            printf("  [%u] ", i);
+        }
+
+        const ABIPassingSegment& seg = Segments[i];
+
+        if (Segments[i].IsPassedInRegister())
+        {
+            printf("[%02u..%02u) reg %s\n", seg.Offset, seg.Offset + seg.Size, getRegName(seg.GetRegister()));
+        }
+        else
+        {
+            printf("[%02u..%02u) stack @ +%02u\n", seg.Offset, seg.Offset + seg.Size, seg.GetStackOffset());
+        }
+    }
+}
+#endif
+
+//-----------------------------------------------------------------------------
+// RegisterQueue::Dequeue:
+//   Dequeue a register from the queue.
+//
+// Return Value:
+//   The dequeued register.
+//
+regNumber RegisterQueue::Dequeue()
+{
+    assert(Count() > 0);
+    return static_cast<regNumber>(m_regs[m_index++]);
+}
+
+//-----------------------------------------------------------------------------
+// RegisterQueue::Peek:
+//   Peek at the head of the queue.
+//
+// Return Value:
+//   The head register in the queue.
+//
+regNumber RegisterQueue::Peek()
+{
+    assert(Count() > 0);
+    return static_cast<regNumber>(m_regs[m_index]);
+}
+
+//-----------------------------------------------------------------------------
+// RegisterQueue::Clear:
+//   Clear the register queue.
+//
+void RegisterQueue::Clear()
+{
+    m_index = m_numRegs;
+}
+
+#ifdef SWIFT_SUPPORT
+//-----------------------------------------------------------------------------
+// Classify:
+//   Classify a parameter for the Swift ABI.
+//
+// Parameters:
+//   comp           - Compiler instance
+//   type           - The type of the parameter
+//   structLayout   - The layout of the struct. Expected to be non-null if
+//                    varTypeIsStruct(type) is true.
+//   wellKnownParam - Well known type of the parameter (if it may affect its ABI classification)
+//
+// Returns:
+//   Classification information for the parameter.
+//
+ABIPassingInformation SwiftABIClassifier::Classify(Compiler*    comp,
+                                                   var_types    type,
+                                                   ClassLayout* structLayout,
+                                                   WellKnownArg wellKnownParam)
+{
+#ifdef TARGET_AMD64
+    if (wellKnownParam == WellKnownArg::RetBuffer)
+    {
+        return ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(REG_SWIFT_ARG_RET_BUFF, 0,
+                                                                                      TARGET_POINTER_SIZE));
+    }
+#endif
+
+    if (wellKnownParam == WellKnownArg::SwiftSelf)
+    {
+        return ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(REG_SWIFT_SELF, 0,
+                                                                                      TARGET_POINTER_SIZE));
+    }
+
+    if (wellKnownParam == WellKnownArg::SwiftError)
+    {
+        // We aren't actually going to pass the SwiftError* parameter in REG_SWIFT_ERROR.
+        // We won't be using this parameter at all, and shouldn't allocate registers/stack space for it,
+        // as that will mess with other args.
+        // Quirk: To work around the JIT for now, "pass" it in REG_SWIFT_ERROR,
+        // and let CodeGen::genFnProlog handle the rest.
+        return ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(REG_SWIFT_ERROR, 0,
+                                                                                      TARGET_POINTER_SIZE));
+    }
+
+    if (type == TYP_STRUCT)
+    {
+        const CORINFO_SWIFT_LOWERING* lowering = comp->GetSwiftLowering(structLayout->GetClassHandle());
+        if (lowering->byReference)
+        {
+            return m_classifier.Classify(comp, TYP_I_IMPL, nullptr, WellKnownArg::None);
+        }
+
+        ArrayStack<ABIPassingSegment> segments(comp->getAllocator(CMK_ABI));
+        for (unsigned i = 0; i < lowering->numLoweredElements; i++)
+        {
+            var_types             elemType = JITtype2varType(lowering->loweredElements[i]);
+            ABIPassingInformation elemInfo = m_classifier.Classify(comp, elemType, nullptr, WellKnownArg::None);
+
+            for (unsigned j = 0; j < elemInfo.NumSegments; j++)
+            {
+                ABIPassingSegment newSegment = elemInfo.Segments[j];
+                newSegment.Offset += lowering->offsets[i];
+                segments.Push(newSegment);
+            }
+        }
+
+        ABIPassingInformation result;
+        result.NumSegments = static_cast<unsigned>(segments.Height());
+        result.Segments    = new (comp, CMK_ABI) ABIPassingSegment[result.NumSegments];
+        for (int i = 0; i < segments.Height(); i++)
+        {
+            result.Segments[i] = segments.Bottom(i);
+        }
+
+        return result;
+    }
+
+    return m_classifier.Classify(comp, type, structLayout, wellKnownParam);
+}
+#endif
diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h
new file mode 100644
index 000000000000..7236627d375d
--- /dev/null
+++ b/src/coreclr/jit/abi.h
@@ -0,0 +1,209 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#pragma once
+
+class ABIPassingSegment
+{
+    regNumber m_register    = REG_NA;
+    unsigned  m_stackOffset = 0;
+
+public:
+    bool IsPassedInRegister() const;
+    bool IsPassedOnStack() const;
+
+    // Start offset of the segment within the parameter/argument. For example, a struct like { int32_t x; uint64_t y }
+    // may have two segments
+    // 1. Register(Offset=0, Type=TYP_INT, Size=4, Register=REG_ESI)
+    // 2. Register(Offset=8, Type=TYP_LONG, Size=8, Register=REG_EDI)
+    // on some ABIs, where the size of the first segment is not sufficient to
+    // compute the offset of the second.
+    unsigned Offset = 0;
+    // Size of the segment being passed.
+    unsigned Size = 0;
+
+    // If this segment is passed in a register, return the particular register.
+    regNumber GetRegister() const;
+
+    regMaskTP GetRegisterMask() const;
+
+    // If this segment is passed on the stack then return the particular stack
+    // offset, relative to the first stack argument's offset.
+    unsigned GetStackOffset() const;
+
+    var_types GetRegisterStoreType() const;
+
+    static ABIPassingSegment InRegister(regNumber reg, unsigned offset, unsigned size);
+    static ABIPassingSegment OnStack(unsigned stackOffset, unsigned offset, unsigned size);
+};
+
+struct ABIPassingInformation
+{
+    // The number of segments used to pass the value. Examples:
+    // - On SysV x64, structs can be passed in two registers, resulting in two
+    // register segments
+    // - On arm64/arm32, HFAs can be passed in up to four registers, giving
+    // four register segments
+    // - On arm32, structs can be split out over register and stack, giving
+    // multiple register segments and a struct segment.
+    // - On Windows x64, all parameters always fit into one stack slot or
+    // register, and thus always have NumSegments == 1
+    unsigned           NumSegments = 0;
+    ABIPassingSegment* Segments    = nullptr;
+
+    bool HasAnyRegisterSegment() const;
+    bool HasAnyStackSegment() const;
+    bool HasExactlyOneStackSegment() const;
+    bool IsSplitAcrossRegistersAndStack() const;
+
+    static ABIPassingInformation FromSegment(Compiler* comp, const ABIPassingSegment& segment);
+
+#ifdef DEBUG
+    void Dump() const;
+#endif
+};
+
+class RegisterQueue
+{
+    const regNumber* m_regs;
+    unsigned int     m_numRegs;
+    unsigned int     m_index = 0;
+
+public:
+    RegisterQueue(const regNumber* regs, unsigned int numRegs)
+        : m_regs(regs)
+        , m_numRegs(numRegs)
+    {
+    }
+
+    unsigned Count()
+    {
+        return m_numRegs - m_index;
+    }
+
+    regNumber Dequeue();
+    regNumber Peek();
+    void      Clear();
+};
+
+struct ClassifierInfo
+{
+    CorInfoCallConvExtension CallConv   = CorInfoCallConvExtension::Managed;
+    bool                     IsVarArgs  = false;
+    bool                     HasThis    = false;
+    bool                     HasRetBuff = false;
+};
+
+class X86Classifier
+{
+    RegisterQueue m_regs;
+    unsigned      m_stackArgSize = 0;
+
+public:
+    X86Classifier(const ClassifierInfo& info);
+
+    ABIPassingInformation Classify(Compiler*    comp,
+                                   var_types    type,
+                                   ClassLayout* structLayout,
+                                   WellKnownArg wellKnownParam);
+};
+
+class WinX64Classifier
+{
+    RegisterQueue m_intRegs;
+    RegisterQueue m_floatRegs;
+    unsigned      m_stackArgSize = 0;
+
+public:
+    WinX64Classifier(const ClassifierInfo& info);
+
+    ABIPassingInformation Classify(Compiler*    comp,
+                                   var_types    type,
+                                   ClassLayout* structLayout,
+                                   WellKnownArg wellKnownParam);
+};
+
+class SysVX64Classifier
+{
+    RegisterQueue m_intRegs;
+    RegisterQueue m_floatRegs;
+    unsigned      m_stackArgSize = 0;
+
+public:
+    SysVX64Classifier(const ClassifierInfo& info);
+
+    ABIPassingInformation Classify(Compiler*    comp,
+                                   var_types    type,
+                                   ClassLayout* structLayout,
+                                   WellKnownArg wellKnownParam);
+};
+
+class Arm64Classifier
+{
+    const ClassifierInfo& m_info;
+    RegisterQueue         m_intRegs;
+    RegisterQueue         m_floatRegs;
+    unsigned              m_stackArgSize = 0;
+
+public:
+    Arm64Classifier(const ClassifierInfo& info);
+
+    ABIPassingInformation Classify(Compiler*    comp,
+                                   var_types    type,
+                                   ClassLayout* structLayout,
+                                   WellKnownArg wellKnownParam);
+};
+
+class Arm32Classifier
+{
+    const ClassifierInfo& m_info;
+    // 4 int regs are available for parameters. This gives the index of the
+    // next one.
+    // A.k.a. "NCRN": Next Core Register Number
+    unsigned m_nextIntReg = 0;
+    // 16 float regs are available for parameters. We keep them as a mask as
+    // they can be backfilled.
+    unsigned m_floatRegs = 0xFFFF;
+    // A.k.a. "NSAA": Next Stack Argument Address
+    unsigned m_stackArgSize = 0;
+
+    ABIPassingInformation ClassifyFloat(Compiler* comp, var_types type, unsigned elems);
+
+public:
+    Arm32Classifier(const ClassifierInfo& info);
+
+    ABIPassingInformation Classify(Compiler*    comp,
+                                   var_types    type,
+                                   ClassLayout* structLayout,
+                                   WellKnownArg wellKnownParam);
+};
+
+#if defined(TARGET_X86)
+typedef X86Classifier PlatformClassifier;
+#elif defined(WINDOWS_AMD64_ABI)
+typedef WinX64Classifier PlatformClassifier;
+#elif defined(UNIX_AMD64_ABI)
+typedef SysVX64Classifier PlatformClassifier;
+#elif defined(TARGET_ARM64)
+typedef Arm64Classifier PlatformClassifier;
+#elif defined(TARGET_ARM)
+typedef Arm32Classifier PlatformClassifier;
+#endif
+
+#ifdef SWIFT_SUPPORT
+class SwiftABIClassifier
+{
+    PlatformClassifier m_classifier;
+
+public:
+    SwiftABIClassifier(const ClassifierInfo& info)
+        : m_classifier(info)
+    {
+    }
+
+    ABIPassingInformation Classify(Compiler*    comp,
+                                   var_types    type,
+                                   ClassLayout* structLayout,
+                                   WellKnownArg wellKnownParam);
+};
+#endif
diff --git a/src/coreclr/jit/alloc.cpp b/src/coreclr/jit/alloc.cpp
index 6300376beeb6..2fcb3f877418 100644
--- a/src/coreclr/jit/alloc.cpp
+++ b/src/coreclr/jit/alloc.cpp
@@ -42,7 +42,10 @@ size_t ArenaAllocator::getDefaultPageSize()
 // ArenaAllocator::ArenaAllocator:
 //    Default-constructs an arena allocator.
 ArenaAllocator::ArenaAllocator()
-    : m_firstPage(nullptr), m_lastPage(nullptr), m_nextFreeByte(nullptr), m_lastFreeByte(nullptr)
+    : m_firstPage(nullptr)
+    , m_lastPage(nullptr)
+    , m_nextFreeByte(nullptr)
+    , m_lastFreeByte(nullptr)
 {
 #if MEASURE_MEM_ALLOC
     memset(&m_stats, 0, sizeof(m_stats));
@@ -153,7 +156,11 @@ void* ArenaAllocator::allocateHostMemory(size_t size, size_t* pActualSize)
     if (bypassHostAllocator())
     {
         *pActualSize = size;
-        void* p      = malloc(size);
+        if (size == 0)
+        {
+            size = 1;
+        }
+        void* p = malloc(size);
         if (p == nullptr)
         {
             NOMEM();
diff --git a/src/coreclr/jit/alloc.h b/src/coreclr/jit/alloc.h
index cb3da79232f8..8899b87ad355 100644
--- a/src/coreclr/jit/alloc.h
+++ b/src/coreclr/jit/alloc.h
@@ -22,9 +22,9 @@ enum CompMemKind
 class ArenaAllocator
 {
 private:
-    ArenaAllocator(const ArenaAllocator& other) = delete;
+    ArenaAllocator(const ArenaAllocator& other)            = delete;
     ArenaAllocator& operator=(const ArenaAllocator& other) = delete;
-    ArenaAllocator& operator=(ArenaAllocator&& other) = delete;
+    ArenaAllocator& operator=(ArenaAllocator&& other)      = delete;
 
     struct PageDescriptor
     {
@@ -52,7 +52,7 @@ class ArenaAllocator
     void* allocateNewPage(size_t size);
 
     static void* allocateHostMemory(size_t size, size_t* pActualSize);
-    static void freeHostMemory(void* block, size_t size);
+    static void  freeHostMemory(void* block, size_t size);
 
 #if MEASURE_MEM_ALLOC
     struct MemStats
@@ -125,8 +125,8 @@ class ArenaAllocator
 
 public:
     MemStatsAllocator* getMemStatsAllocator(CompMemKind kind);
-    void finishMemStats();
-    void dumpMemStats(FILE* file);
+    void               finishMemStats();
+    void               dumpMemStats(FILE* file);
 
     static void dumpMaxMemStats(FILE* file);
     static void dumpAggregateMemStats(FILE* file);
@@ -276,7 +276,8 @@ class CompIAllocator : public IAllocator
     char          m_zeroLenAllocTarg;
 
 public:
-    CompIAllocator(CompAllocator alloc) : m_alloc(alloc)
+    CompIAllocator(CompAllocator alloc)
+        : m_alloc(alloc)
     {
     }
 
diff --git a/src/coreclr/jit/arraystack.h b/src/coreclr/jit/arraystack.h
index 83a43c9432ba..5d8a697a3820 100644
--- a/src/coreclr/jit/arraystack.h
+++ b/src/coreclr/jit/arraystack.h
@@ -10,7 +10,8 @@ class ArrayStack
     static const int builtinSize = 8;
 
 public:
-    explicit ArrayStack(CompAllocator alloc, int initialCapacity = builtinSize) : m_alloc(alloc)
+    explicit ArrayStack(CompAllocator alloc, int initialCapacity = builtinSize)
+        : m_alloc(alloc)
     {
         if (initialCapacity > builtinSize)
         {
diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp
index 692b470b4755..ebcb101663a3 100644
--- a/src/coreclr/jit/assertionprop.cpp
+++ b/src/coreclr/jit/assertionprop.cpp
@@ -673,7 +673,7 @@ void Compiler::optAssertionInit(bool isLocalProp)
         // Local assertion prop keeps mappings from each local var to the assertions about that var.
         //
         optAssertionDep =
-            new (this, CMK_AssertionProp) JitExpandArray<ASSERT_TP>(getAllocator(CMK_AssertionProp), max(1, lvaCount));
+            new (this, CMK_AssertionProp) JitExpandArray<ASSERT_TP>(getAllocator(CMK_AssertionProp), max(1u, lvaCount));
 
         if (optCrossBlockLocalAssertionProp)
         {
@@ -856,12 +856,35 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex asse
             case O2K_IND_CNS_INT:
                 if (curAssertion->op1.kind == O1K_EXACT_TYPE)
                 {
-                    printf("Exact Type MT(%08X)", dspPtr(curAssertion->op2.u1.iconVal));
-                    assert(curAssertion->op2.HasIconFlag());
+                    ssize_t iconVal = curAssertion->op2.u1.iconVal;
+                    if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) || opts.IsReadyToRun())
+                    {
+                        printf("Exact Type MT(0x%p)", dspPtr(iconVal));
+                    }
+                    else
+                    {
+                        printf("Exact Type MT(0x%p %s)", dspPtr(iconVal),
+                               eeGetClassName((CORINFO_CLASS_HANDLE)iconVal));
+                    }
+
+                    // We might want to assert:
+                    //      assert(curAssertion->op2.HasIconFlag());
+                    // However, if we run CSE with shared constant mode, we may end up with an expression instead
+                    // of the original handle value. If we then use JitOptRepeat to re-build value numbers, we lose
+                    // knowledge that the constant was ever a handle, as the expression creating the original value
+                    // was not (and can't be) assigned a handle flag.
                 }
                 else if (curAssertion->op1.kind == O1K_SUBTYPE)
                 {
-                    printf("MT(%08X)", dspPtr(curAssertion->op2.u1.iconVal));
+                    ssize_t iconVal = curAssertion->op2.u1.iconVal;
+                    if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) || opts.IsReadyToRun())
+                    {
+                        printf("MT(0x%p)", dspPtr(iconVal));
+                    }
+                    else
+                    {
+                        printf("MT(0x%p %s)", dspPtr(iconVal), eeGetClassName((CORINFO_CLASS_HANDLE)iconVal));
+                    }
                     assert(curAssertion->op2.HasIconFlag());
                 }
                 else if ((curAssertion->op1.kind == O1K_BOUND_OPER_BND) ||
@@ -1368,7 +1391,7 @@ AssertionIndex Compiler::optCreateAssertion(GenTree*         op1,
                     {
                         noway_assert(op2->gtOper == GT_CNS_DBL);
                         /* If we have an NaN value then don't record it */
-                        if (_isnan(op2->AsDblCon()->DconValue()))
+                        if (FloatingPointUtils::isNaN(op2->AsDblCon()->DconValue()))
                         {
                             goto DONE_ASSERTION; // Don't make an assertion
                         }
@@ -1501,9 +1524,8 @@ AssertionIndex Compiler::optCreateAssertion(GenTree*         op1,
             assertion.op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum();
 
             assert((assertion.op1.lcl.ssaNum == SsaConfig::RESERVED_SSA_NUM) ||
-                   (assertion.op1.vn ==
-                    vnStore->VNConservativeNormalValue(
-                        lvaGetDesc(lclNum)->GetPerSsaData(assertion.op1.lcl.ssaNum)->m_vnPair)));
+                   (assertion.op1.vn == vnStore->VNConservativeNormalValue(
+                                            lvaGetDesc(lclNum)->GetPerSsaData(assertion.op1.lcl.ssaNum)->m_vnPair)));
 
             ssize_t      cnsValue  = 0;
             GenTreeFlags iconFlags = GTF_EMPTY;
@@ -1691,8 +1713,8 @@ bool Compiler::optAssertionVnInvolvesNan(AssertionDsc* assertion)
         if (vnStore->IsVNConstant(vns[i]))
         {
             var_types type = vnStore->TypeOfVN(vns[i]);
-            if ((type == TYP_FLOAT && _isnan(vnStore->ConstantValue<float>(vns[i])) != 0) ||
-                (type == TYP_DOUBLE && _isnan(vnStore->ConstantValue<double>(vns[i])) != 0))
+            if ((type == TYP_FLOAT && FloatingPointUtils::isNaN(vnStore->ConstantValue<float>(vns[i])) != 0) ||
+                (type == TYP_DOUBLE && FloatingPointUtils::isNaN(vnStore->ConstantValue<double>(vns[i])) != 0))
             {
                 return true;
             }
@@ -1860,7 +1882,6 @@ void Compiler::optDebugCheckAssertion(AssertionDsc* assertion)
             {
                 case O1K_EXACT_TYPE:
                 case O1K_SUBTYPE:
-                    assert(assertion->op2.HasIconFlag());
                     break;
                 case O1K_LCLVAR:
                     assert((lvaGetDesc(assertion->op1.lcl.lclNum)->lvType != TYP_REF) ||
@@ -2621,28 +2642,20 @@ AssertionIndex Compiler::optAssertionIsSubtype(GenTree* tree, GenTree* methodTab
     {
         AssertionIndex const index        = GetAssertionIndex(bvIndex);
         AssertionDsc*        curAssertion = optGetAssertion(index);
-        if (curAssertion->assertionKind != OAK_EQUAL ||
-            (curAssertion->op1.kind != O1K_SUBTYPE && curAssertion->op1.kind != O1K_EXACT_TYPE))
-        {
-            continue;
-        }
-
-        // If local assertion prop use "lcl" based comparison, if global assertion prop use vn based comparison.
-        if ((optLocalAssertionProp) ? (curAssertion->op1.lcl.lclNum != tree->AsLclVarCommon()->GetLclNum())
-                                    : (curAssertion->op1.vn != vnStore->VNConservativeNormalValue(tree->gtVNPair)))
-        {
+        if ((curAssertion->assertionKind != OAK_EQUAL) ||
+            ((curAssertion->op1.kind != O1K_SUBTYPE) && (curAssertion->op1.kind != O1K_EXACT_TYPE)))
+        {
+            // TODO-CQ: We might benefit from OAK_NOT_EQUAL assertion as well, e.g.:
+            // if (obj is not MyClass) // obj is known to be never of MyClass class
+            // {
+            //     if (obj is MyClass) // can be folded to false
+            //     {
+            //
             continue;
         }
 
-        if (curAssertion->op2.kind == O2K_IND_CNS_INT)
-        {
-            if (methodTableArg->gtOper != GT_IND)
-            {
-                continue;
-            }
-            methodTableArg = methodTableArg->AsOp()->gtOp1;
-        }
-        else if (curAssertion->op2.kind != O2K_CONST_INT)
+        if ((curAssertion->op1.vn != vnStore->VNConservativeNormalValue(tree->gtVNPair) ||
+             (curAssertion->op2.kind != O2K_CONST_INT)))
         {
             continue;
         }
@@ -2656,6 +2669,8 @@ AssertionIndex Compiler::optAssertionIsSubtype(GenTree* tree, GenTree* methodTab
 
         if (curAssertion->op2.u1.iconVal == methodTableVal)
         {
+            // TODO-CQ: if they don't match, we might still be able to prove that the result is foldable via
+            // compareTypesForCast.
             return index;
         }
     }
@@ -2663,7 +2678,107 @@ AssertionIndex Compiler::optAssertionIsSubtype(GenTree* tree, GenTree* methodTab
 }
 
 //------------------------------------------------------------------------------
-// optVNConstantPropOnTree: Substitutes tree with an evaluated constant while
+// optVNBasedFoldExpr_Call: Folds given call using VN to a simpler tree.
+//
+// Arguments:
+//    block  -  The block containing the tree.
+//    parent -  The parent node of the tree.
+//    call   -  The call to fold
+//
+// Return Value:
+//    Returns a new tree or nullptr if nothing is changed.
+//
+GenTree* Compiler::optVNBasedFoldExpr_Call(BasicBlock* block, GenTree* parent, GenTreeCall* call)
+{
+    switch (call->GetHelperNum())
+    {
+        case CORINFO_HELP_CHKCASTARRAY:
+        case CORINFO_HELP_CHKCASTANY:
+        case CORINFO_HELP_CHKCASTINTERFACE:
+        case CORINFO_HELP_CHKCASTCLASS:
+        case CORINFO_HELP_ISINSTANCEOFARRAY:
+        case CORINFO_HELP_ISINSTANCEOFCLASS:
+        case CORINFO_HELP_ISINSTANCEOFANY:
+        case CORINFO_HELP_ISINSTANCEOFINTERFACE:
+        {
+            GenTree* castClsArg = call->gtArgs.GetUserArgByIndex(0)->GetNode();
+            GenTree* castObjArg = call->gtArgs.GetUserArgByIndex(1)->GetNode();
+
+            if ((castObjArg->gtFlags & GTF_ALL_EFFECT) != 0)
+            {
+                // It won't be trivial to properly extract side-effects from the call node.
+                // Ideally, we only need side effects from the castClsArg argument as the call itself
+                // won't throw any exceptions. But we should not forget about the EarlyNode (setup args)
+                return nullptr;
+            }
+
+            // If object has the same VN as the cast, then the cast is effectively a no-op.
+            //
+            if (castObjArg->gtVNPair == call->gtVNPair)
+            {
+                return gtWrapWithSideEffects(castObjArg, call, GTF_ALL_EFFECT, true);
+            }
+
+            // Let's see if gtGetClassHandle may help us to fold the cast (since VNForCast did not).
+            if (castClsArg->IsIconHandle(GTF_ICON_CLASS_HDL))
+            {
+                bool                 isExact;
+                bool                 isNonNull;
+                CORINFO_CLASS_HANDLE castFrom = gtGetClassHandle(castObjArg, &isExact, &isNonNull);
+                if (castFrom != NO_CLASS_HANDLE)
+                {
+                    CORINFO_CLASS_HANDLE castTo = gtGetHelperArgClassHandle(castClsArg);
+                    if (info.compCompHnd->compareTypesForCast(castFrom, castTo) == TypeCompareState::Must)
+                    {
+                        return gtWrapWithSideEffects(castObjArg, call, GTF_ALL_EFFECT, true);
+                    }
+                }
+            }
+        }
+        break;
+
+        default:
+            break;
+    }
+
+    return nullptr;
+}
+
+//------------------------------------------------------------------------------
+// optVNBasedFoldExpr: Folds given tree using VN to a constant or a simpler tree.
+//
+// Arguments:
+//    block  -  The block containing the tree.
+//    parent -  The parent node of the tree.
+//    tree   -  The tree to fold.
+//
+// Return Value:
+//    Returns a new tree or nullptr if nothing is changed.
+//
+GenTree* Compiler::optVNBasedFoldExpr(BasicBlock* block, GenTree* parent, GenTree* tree)
+{
+    // First, attempt to fold it to a constant if possible.
+    GenTree* foldedToCns = optVNBasedFoldConstExpr(block, parent, tree);
+    if (foldedToCns != nullptr)
+    {
+        return foldedToCns;
+    }
+
+    switch (tree->OperGet())
+    {
+        case GT_CALL:
+            return optVNBasedFoldExpr_Call(block, parent, tree->AsCall());
+
+            // We can add more VN-based foldings here.
+
+        default:
+            break;
+    }
+    return nullptr;
+}
+
+//------------------------------------------------------------------------------
+// optVNBasedFoldConstExpr: Substitutes tree with an evaluated constant while
 //                          managing side-effects.
 //
 // Arguments:
@@ -2690,7 +2805,7 @@ AssertionIndex Compiler::optAssertionIsSubtype(GenTree* tree, GenTree* methodTab
 //    the relop will evaluate to "true" or "false" statically, then the side-effects
 //    will be put into new statements, presuming the JTrue will be folded away.
 //
-GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* parent, GenTree* tree)
+GenTree* Compiler::optVNBasedFoldConstExpr(BasicBlock* block, GenTree* parent, GenTree* tree)
 {
     if (tree->OperGet() == GT_JTRUE)
     {
@@ -2932,6 +3047,18 @@ GenTree* Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTree* parent, G
             break;
         }
         break;
+
+        case TYP_MASK:
+        {
+            simdmask_t value = vnStore->ConstantValue<simdmask_t>(vnCns);
+
+            GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet());
+            memcpy(&vecCon->gtSimdVal, &value, sizeof(simdmask_t));
+
+            conValTree = vecCon;
+            break;
+        }
+        break;
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -3197,7 +3324,7 @@ bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock,
 //
 GenTree* Compiler::optConstantAssertionProp(AssertionDsc*        curAssertion,
                                             GenTreeLclVarCommon* tree,
-                                            Statement* stmt DEBUGARG(AssertionIndex index))
+                                            Statement* stmt      DEBUGARG(AssertionIndex index))
 {
     const unsigned lclNum = tree->GetLclNum();
 
@@ -3452,7 +3579,7 @@ bool Compiler::optAssertionProp_LclVarTypeCheck(GenTree* tree, LclVarDsc* lclVar
 //
 GenTree* Compiler::optCopyAssertionProp(AssertionDsc*        curAssertion,
                                         GenTreeLclVarCommon* tree,
-                                        Statement* stmt DEBUGARG(AssertionIndex index))
+                                        Statement* stmt      DEBUGARG(AssertionIndex index))
 {
     const AssertionDsc::AssertionDscOp1& op1 = curAssertion->op1;
     const AssertionDsc::AssertionDscOp2& op2 = curAssertion->op2;
@@ -4100,20 +4227,20 @@ AssertionIndex Compiler::optGlobalAssertionIsEqualOrNotEqual(ASSERT_VALARG_TP as
             return assertionIndex;
         }
 
-        // Look for matching exact type assertions based on vtable accesses
+        // Look for matching exact type assertions based on vtable accesses. E.g.:
+        //
+        //   op1:       VNF_InvariantLoad(myObj) or in other words: a vtable access
+        //   op2:       'MyType' class handle
+        //   Assertion: 'myObj's type is exactly MyType
+        //
         if ((curAssertion->assertionKind == OAK_EQUAL) && (curAssertion->op1.kind == O1K_EXACT_TYPE) &&
-            op1->OperIs(GT_IND))
+            (curAssertion->op2.vn == vnStore->VNConservativeNormalValue(op2->gtVNPair)) && op1->TypeIs(TYP_I_IMPL))
         {
-            GenTree* indirAddr = op1->AsIndir()->Addr();
-
-            if (indirAddr->OperIs(GT_LCL_VAR) && (indirAddr->TypeGet() == TYP_REF))
+            VNFuncApp funcApp;
+            if (vnStore->GetVNFunc(vnStore->VNConservativeNormalValue(op1->gtVNPair), &funcApp) &&
+                (funcApp.m_func == VNF_InvariantLoad) && (curAssertion->op1.vn == funcApp.m_args[0]))
             {
-                // op1 is accessing vtable of a ref type local var
-                if ((curAssertion->op1.vn == vnStore->VNConservativeNormalValue(indirAddr->gtVNPair)) &&
-                    (curAssertion->op2.vn == vnStore->VNConservativeNormalValue(op2->gtVNPair)))
-                {
-                    return assertionIndex;
-                }
+                return assertionIndex;
             }
         }
     }
@@ -4351,7 +4478,7 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen
             // which will yield a false correctly. Instead if IL had "op1 != NaN", then we already
             // made op1 NaN which will yield a true correctly. Note that this is irrespective of the
             // assertion we have made.
-            allowReverse = (_isnan(constant) == 0);
+            allowReverse = !FloatingPointUtils::isNaN(constant);
         }
         else if (op1->TypeGet() == TYP_FLOAT)
         {
@@ -4359,7 +4486,7 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen
             op1->BashToConst(constant);
 
             // See comments for TYP_DOUBLE.
-            allowReverse = (_isnan(constant) == 0);
+            allowReverse = !FloatingPointUtils::isNaN(constant);
         }
         else if (op1->TypeGet() == TYP_REF)
         {
@@ -4401,8 +4528,9 @@ GenTree* Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, Gen
         {
             printf("\nVN relop based copy assertion prop in " FMT_BB ":\n", compCurBB->bbNum);
             printf("Assertion index=#%02u: V%02d.%02d %s V%02d.%02d\n", index, op1->AsLclVar()->GetLclNum(),
-                   op1->AsLclVar()->GetSsaNum(), (curAssertion->assertionKind == OAK_EQUAL) ? "==" : "!=",
-                   op2->AsLclVar()->GetLclNum(), op2->AsLclVar()->GetSsaNum());
+                   op1->AsLclVar()->GetSsaNum(),
+                   (curAssertion->assertionKind == OAK_EQUAL) ? "==" : "!=", op2->AsLclVar()->GetLclNum(),
+                   op2->AsLclVar()->GetSsaNum());
             gtDispTree(tree, nullptr, nullptr, true);
         }
 #endif
@@ -4696,7 +4824,7 @@ GenTree* Compiler::optAssertionProp_Ind(ASSERT_VALARG_TP assertions, GenTree* tr
 //   If both VN and assertion table yield a matching assertion, "pVnBased"
 //   is only set and the return value is "NO_ASSERTION_INDEX."
 //
-bool Compiler::optAssertionIsNonNull(GenTree*         op,
+bool Compiler::optAssertionIsNonNull(GenTree*                    op,
                                      ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased)
                                          DEBUGARG(AssertionIndex* pIndex))
 {
@@ -4743,7 +4871,7 @@ bool Compiler::optAssertionIsNonNull(GenTree*         op,
 // Return Value:
 //   index of assertion, or NO_ASSERTION_INDEX
 //
-AssertionIndex Compiler::optAssertionIsNonNullInternal(GenTree*         op,
+AssertionIndex Compiler::optAssertionIsNonNullInternal(GenTree*                    op,
                                                        ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased))
 {
 
@@ -5088,7 +5216,8 @@ GenTree* Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCal
     {
         return optAssertionProp_Update(call, call, stmt);
     }
-    else if (!optLocalAssertionProp && call->IsHelperCall())
+
+    if (!optLocalAssertionProp && call->IsHelperCall())
     {
         const CorInfoHelpFunc helper = eeGetHelperNum(call->gtCallMethHnd);
         if ((helper == CORINFO_HELP_ISINSTANCEOFINTERFACE) || (helper == CORINFO_HELP_ISINSTANCEOFARRAY) ||
@@ -5097,33 +5226,21 @@ GenTree* Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCal
             (helper == CORINFO_HELP_CHKCASTCLASS) || (helper == CORINFO_HELP_CHKCASTANY) ||
             (helper == CORINFO_HELP_CHKCASTCLASS_SPECIAL))
         {
-            GenTree* arg1 = call->gtArgs.GetArgByIndex(1)->GetNode();
-            if (arg1->gtOper != GT_LCL_VAR)
-            {
-                return nullptr;
-            }
+            GenTree* castToArg = call->gtArgs.GetArgByIndex(0)->GetNode();
+            GenTree* objArg    = call->gtArgs.GetArgByIndex(1)->GetNode();
 
-            GenTree* arg2 = call->gtArgs.GetArgByIndex(0)->GetNode();
-
-            unsigned index = optAssertionIsSubtype(arg1, arg2, assertions);
-            if (index != NO_ASSERTION_INDEX)
+            // We require objArg to be side effect free due to limitations in gtWrapWithSideEffects
+            if ((objArg->gtFlags & GTF_ALL_EFFECT) == 0)
             {
-#ifdef DEBUG
-                if (verbose)
+                const unsigned index = optAssertionIsSubtype(objArg, castToArg, assertions);
+                if (index != NO_ASSERTION_INDEX)
                 {
-                    printf("\nDid VN based subtype prop for index #%02u in " FMT_BB ":\n", index, compCurBB->bbNum);
-                    gtDispTree(call, nullptr, nullptr, true);
-                }
-#endif
-                GenTree* list = nullptr;
-                gtExtractSideEffList(call, &list, GTF_SIDE_EFFECT, true);
-                if (list != nullptr)
-                {
-                    arg1 = gtNewOperNode(GT_COMMA, call->TypeGet(), list, arg1);
-                    fgSetTreeSeq(arg1);
-                }
+                    JITDUMP("\nDid VN based subtype prop for index #%02u in " FMT_BB ":\n", index, compCurBB->bbNum);
+                    DISPTREE(call);
 
-                return optAssertionProp_Update(arg1, call, stmt);
+                    objArg = gtWrapWithSideEffects(objArg, call, GTF_SIDE_EFFECT, true);
+                    return optAssertionProp_Update(objArg, call, stmt);
+                }
             }
 
             // Leave a hint for fgLateCastExpansion that obj is never null.
@@ -5131,7 +5248,7 @@ GenTree* Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCal
             INDEBUG(bool vnBased = false);
             // GTF_CALL_M_CAST_CAN_BE_EXPANDED check is to improve TP
             if (((call->gtCallMoreFlags & GTF_CALL_M_CAST_CAN_BE_EXPANDED) != 0) &&
-                optAssertionIsNonNull(arg1, assertions DEBUGARG(&vnBased) DEBUGARG(&nonNullIdx)))
+                optAssertionIsNonNull(objArg, assertions DEBUGARG(&vnBased) DEBUGARG(&nonNullIdx)))
             {
                 call->gtCallMoreFlags |= GTF_CALL_M_CAST_OBJ_NONNULL;
                 return optAssertionProp_Update(call, call, stmt);
@@ -5156,12 +5273,12 @@ GenTree* Compiler::optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, GenTree
     assert(tree->OperIs(GT_BOUNDS_CHECK));
 
 #ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
-    if (JitConfig.JitNoRangeChks())
+    if (JitConfig.JitNoRngChks())
     {
 #ifdef DEBUG
         if (verbose)
         {
-            printf("\nFlagging check redundant due to JitNoRangeChks in " FMT_BB ":\n", compCurBB->bbNum);
+            printf("\nFlagging check redundant due to JitNoRngChks in " FMT_BB ":\n", compCurBB->bbNum);
             gtDispTree(tree, nullptr, nullptr, true);
         }
 #endif // DEBUG
@@ -5317,6 +5434,25 @@ GenTree* Compiler::optAssertionProp_Update(GenTree* newTree, GenTree* tree, Stat
             if (parent != nullptr)
             {
                 parent->ReplaceOperand(useEdge, newTree);
+
+                // If the parent is a GT_IND and we replaced the child with a handle constant, we might need
+                // to mark the GT_IND as invariant. This is the same as what gtNewIndOfIconHandleNode() does.
+                // Review: should some kind of more general morphing take care of this?
+                // Should this share code with gtNewIndOfIconHandleNode()?
+
+                if (parent->OperIs(GT_IND) && newTree->IsIconHandle())
+                {
+                    GenTreeFlags iconFlags = newTree->GetIconHandleFlag();
+                    if (GenTree::HandleKindDataIsInvariant(iconFlags))
+                    {
+                        parent->gtFlags |= GTF_IND_INVARIANT;
+                        if (iconFlags == GTF_ICON_STR_HDL)
+                        {
+                            // String literals are never null
+                            parent->gtFlags |= GTF_IND_NONNULL;
+                        }
+                    }
+                }
             }
             else
             {
@@ -5388,7 +5524,6 @@ GenTree* Compiler::optAssertionProp(ASSERT_VALARG_TP assertions, GenTree* tree,
         case GT_IND:
         case GT_STOREIND:
         case GT_NULLCHECK:
-        case GT_STORE_DYN_BLK:
             return optAssertionProp_Ind(assertions, tree, stmt);
 
         case GT_BOUNDS_CHECK:
@@ -6042,7 +6177,7 @@ ASSERT_TP* Compiler::optComputeAssertionGen()
                 AssertionIndex valueAssertionIndex;
                 AssertionIndex jumpDestAssertionIndex;
 
-                if (info.IsNextEdgeAssertion())
+                if (info.AssertionHoldsOnFalseEdge())
                 {
                     valueAssertionIndex    = info.GetAssertionIndex();
                     jumpDestAssertionIndex = optFindComplementary(info.GetAssertionIndex());
@@ -6148,7 +6283,9 @@ struct VNAssertionPropVisitorInfo
     Statement*  stmt;
     BasicBlock* block;
     VNAssertionPropVisitorInfo(Compiler* pThis, BasicBlock* block, Statement* stmt)
-        : pThis(pThis), stmt(stmt), block(block)
+        : pThis(pThis)
+        , stmt(stmt)
+        , block(block)
     {
     }
 };
@@ -6300,8 +6437,8 @@ GenTree* Compiler::optVNConstantPropOnJTrue(BasicBlock* block, GenTree* test)
 }
 
 //------------------------------------------------------------------------------
-// optVNConstantPropCurStmt
-//    Performs constant prop on the current statement's tree nodes.
+// optVNBasedFoldCurStmt: Performs VN-based folding
+//    on the current statement's tree nodes using VN.
 //
 // Assumption:
 //    This function is called as part of a post-order tree walk.
@@ -6315,17 +6452,12 @@ GenTree* Compiler::optVNConstantPropOnJTrue(BasicBlock* block, GenTree* test)
 // Return Value:
 //    Returns the standard visitor walk result.
 //
-// Description:
-//    Checks if a node is an R-value and evaluates to a constant. If the node
-//    evaluates to constant, then the tree is replaced by its side effects and
-//    the constant node.
-//
-Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block,
-                                                          Statement*  stmt,
-                                                          GenTree*    parent,
-                                                          GenTree*    tree)
+Compiler::fgWalkResult Compiler::optVNBasedFoldCurStmt(BasicBlock* block,
+                                                       Statement*  stmt,
+                                                       GenTree*    parent,
+                                                       GenTree*    tree)
 {
-    // Don't perform const prop on expressions marked with GTF_DONT_CSE
+    // Don't try and fold expressions marked with GTF_DONT_CSE
     // TODO-ASG: delete.
     if (!tree->CanCSE())
     {
@@ -6413,8 +6545,8 @@ Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block,
             return WALK_CONTINUE;
     }
 
-    // Perform the constant propagation
-    GenTree* newTree = optVNConstantPropOnTree(block, parent, tree);
+    // Perform the VN-based folding:
+    GenTree* newTree = optVNBasedFoldExpr(block, parent, tree);
 
     if (newTree == nullptr)
     {
@@ -6427,7 +6559,7 @@ Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block,
 
     optAssertionProp_Update(newTree, tree, stmt);
 
-    JITDUMP("After constant propagation on [%06u]:\n", tree->gtTreeID);
+    JITDUMP("After VN-based fold of [%06u]:\n", tree->gtTreeID);
     DBEXEC(VERBOSE, gtDispStmt(stmt));
 
     return WALK_CONTINUE;
@@ -6495,7 +6627,7 @@ Compiler::fgWalkResult Compiler::optVNAssertionPropCurStmtVisitor(GenTree** ppTr
 
     pThis->optVnNonNullPropCurStmt(pData->block, pData->stmt, *ppTree);
 
-    return pThis->optVNConstantPropCurStmt(pData->block, pData->stmt, data->parent, *ppTree);
+    return pThis->optVNBasedFoldCurStmt(pData->block, pData->stmt, data->parent, *ppTree);
 }
 
 /*****************************************************************************
diff --git a/src/coreclr/jit/bitset.h b/src/coreclr/jit/bitset.h
index b34d1f04b85f..6f1e3d8dcd0d 100644
--- a/src/coreclr/jit/bitset.h
+++ b/src/coreclr/jit/bitset.h
@@ -59,7 +59,10 @@ class BitSetSupport
         FILE*       OpOutputFile;
 
     public:
-        BitSetOpCounter(const char* fileName) : TotalOps(0), m_fileName(fileName), OpOutputFile(nullptr)
+        BitSetOpCounter(const char* fileName)
+            : TotalOps(0)
+            , m_fileName(fileName)
+            , OpOutputFile(nullptr)
         {
             for (unsigned i = 0; i < BSOP_NUMOPS; i++)
             {
@@ -435,7 +438,9 @@ class BitSetOpsWithCounter
         Env      m_env;
 
     public:
-        Iter(Env env, BitSetValueArgType bs) : m_iter(env, bs), m_env(env)
+        Iter(Env env, BitSetValueArgType bs)
+            : m_iter(env, bs)
+            , m_env(env)
         {
         }
 
@@ -449,8 +454,8 @@ class BitSetOpsWithCounter
 
 // We define symbolic names for the various bitset implementations available, to allow choices between them.
 
-#define BSUInt64 0
-#define BSShortLong 1
+#define BSUInt64      0
+#define BSShortLong   1
 #define BSUInt64Class 2
 
 /*****************************************************************************/
diff --git a/src/coreclr/jit/bitsetasshortlong.h b/src/coreclr/jit/bitsetasshortlong.h
index 2ef293820fd2..006f66fc178d 100644
--- a/src/coreclr/jit/bitsetasshortlong.h
+++ b/src/coreclr/jit/bitsetasshortlong.h
@@ -32,36 +32,36 @@ class BitSetOps</*BitSetType*/ BitSetShortLongRep,
     }
 
     // The operations on the "long" (pointer-to-array-of-size_t) versions of the representation.
-    static void AssignLong(Env env, BitSetShortLongRep& lhs, BitSetShortLongRep rhs);
+    static void               AssignLong(Env env, BitSetShortLongRep& lhs, BitSetShortLongRep rhs);
     static BitSetShortLongRep MakeSingletonLong(Env env, unsigned bitNum);
     static BitSetShortLongRep MakeCopyLong(Env env, BitSetShortLongRep bs);
-    static bool IsEmptyLong(Env env, BitSetShortLongRep bs);
-    static unsigned CountLong(Env env, BitSetShortLongRep bs);
-    static bool IsEmptyUnionLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
-    static void UnionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
-    static bool UnionDLongChanged(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
-    static void DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
-    static void AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
-    static bool TryAddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
-    static void RemoveElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
-    static void ClearDLong(Env env, BitSetShortLongRep& bs);
+    static bool               IsEmptyLong(Env env, BitSetShortLongRep bs);
+    static unsigned           CountLong(Env env, BitSetShortLongRep bs);
+    static bool               IsEmptyUnionLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
+    static void               UnionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
+    static bool               UnionDLongChanged(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
+    static void               DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
+    static void               AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
+    static bool               TryAddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
+    static void               RemoveElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
+    static void               ClearDLong(Env env, BitSetShortLongRep& bs);
     static BitSetShortLongRep MakeUninitArrayBits(Env env);
     static BitSetShortLongRep MakeEmptyArrayBits(Env env);
     static BitSetShortLongRep MakeFullArrayBits(Env env);
-    static bool IsMemberLong(Env env, BitSetShortLongRep bs, unsigned i);
-    static bool EqualLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
-    static bool IsSubsetLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
-    static bool IsEmptyIntersectionLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
-    static void IntersectionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
-    static void DataFlowDLong(Env                      env,
-                              BitSetShortLongRep&      out,
-                              const BitSetShortLongRep gen,
-                              const BitSetShortLongRep in);
-    static void LivenessDLong(Env                      env,
-                              BitSetShortLongRep&      in,
-                              const BitSetShortLongRep def,
-                              const BitSetShortLongRep use,
-                              const BitSetShortLongRep out);
+    static bool               IsMemberLong(Env env, BitSetShortLongRep bs, unsigned i);
+    static bool               EqualLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
+    static bool               IsSubsetLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
+    static bool               IsEmptyIntersectionLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
+    static void               IntersectionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
+    static void               DataFlowDLong(Env                      env,
+                                            BitSetShortLongRep&      out,
+                                            const BitSetShortLongRep gen,
+                                            const BitSetShortLongRep in);
+    static void               LivenessDLong(Env                      env,
+                                            BitSetShortLongRep&      in,
+                                            const BitSetShortLongRep def,
+                                            const BitSetShortLongRep use,
+                                            const BitSetShortLongRep out);
 #ifdef DEBUG
     static const char* ToStringLong(Env env, BitSetShortLongRep bs);
 #endif
@@ -500,7 +500,9 @@ class BitSetOps</*BitSetType*/ BitSetShortLongRep,
         unsigned m_bitNum;
 
     public:
-        Iter(Env env, const BitSetShortLongRep& bs) : m_bs(bs), m_bitNum(0)
+        Iter(Env env, const BitSetShortLongRep& bs)
+            : m_bs(bs)
+            , m_bitNum(0)
         {
             if (BitSetOps::IsShort(env))
             {
diff --git a/src/coreclr/jit/bitsetasuint64.h b/src/coreclr/jit/bitsetasuint64.h
index 27ebad72094c..7b757833ce0a 100644
--- a/src/coreclr/jit/bitsetasuint64.h
+++ b/src/coreclr/jit/bitsetasuint64.h
@@ -215,7 +215,9 @@ class BitSetOps</*BitSetType*/ UINT64,
         unsigned m_bitNum;
 
     public:
-        Iter(Env env, const UINT64& bits) : m_bits(bits), m_bitNum(0)
+        Iter(Env env, const UINT64& bits)
+            : m_bits(bits)
+            , m_bitNum(0)
         {
         }
 
diff --git a/src/coreclr/jit/bitsetasuint64inclass.h b/src/coreclr/jit/bitsetasuint64inclass.h
index 0424a87ad8c4..eee51638de5e 100644
--- a/src/coreclr/jit/bitsetasuint64inclass.h
+++ b/src/coreclr/jit/bitsetasuint64inclass.h
@@ -256,7 +256,8 @@ class BitSetUint64ValueRetType
     BitSetUint64<Env, BitSetTraits> m_bs;
 
 public:
-    BitSetUint64ValueRetType(const BitSetUint64<Env, BitSetTraits>& bs) : m_bs(bs)
+    BitSetUint64ValueRetType(const BitSetUint64<Env, BitSetTraits>& bs)
+        : m_bs(bs)
     {
     }
 };
@@ -451,7 +452,9 @@ class BitSetOps</*BitSetType*/ BitSetUint64<Env, BitSetTraits>,
         unsigned m_bitNum;
 
     public:
-        Iter(Env env, const BitSetUint64<Env, BitSetTraits>& bs) : m_bits(bs.m_bits), m_bitNum(0)
+        Iter(Env env, const BitSetUint64<Env, BitSetTraits>& bs)
+            : m_bits(bs.m_bits)
+            , m_bitNum(0)
         {
         }
 
diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp
index e71566da1f76..60dbce6aaf00 100644
--- a/src/coreclr/jit/block.cpp
+++ b/src/coreclr/jit/block.cpp
@@ -34,7 +34,7 @@ unsigned BasicBlock::s_nMaxTrees;
 FlowEdge* ShuffleHelper(unsigned hash, FlowEdge* res)
 {
     FlowEdge* head = res;
-    for (FlowEdge *prev = nullptr; res != nullptr; prev = res, res = res->getNextPredEdge())
+    for (FlowEdge* prev = nullptr; res != nullptr; prev = res, res = res->getNextPredEdge())
     {
         unsigned blkHash = (hash ^ (res->getSourceBlock()->bbNum << 16) ^ res->getSourceBlock()->bbNum);
         if (((blkHash % 1879) & 1) && prev != nullptr)
@@ -68,6 +68,71 @@ unsigned SsaStressHashHelper()
 }
 #endif
 
+//------------------------------------------------------------------------
+// setLikelihood: set the likelihood of a flow edge
+//
+// Arguments:
+//   likelihood -- value in range [0.0, 1.0] indicating how likely
+//     the source block is to transfer control along this edge.
+//
+void FlowEdge::setLikelihood(weight_t likelihood)
+{
+    assert(likelihood >= 0.0);
+    assert(likelihood <= 1.0);
+
+#ifdef DEBUG
+    if (m_likelihoodSet)
+    {
+        JITDUMP("setting likelihood of " FMT_BB " -> " FMT_BB " from " FMT_WT " to " FMT_WT "\n", m_sourceBlock->bbNum,
+                m_destBlock->bbNum, m_likelihood, likelihood);
+    }
+    else
+    {
+        JITDUMP("setting likelihood of " FMT_BB " -> " FMT_BB " to " FMT_WT "\n", m_sourceBlock->bbNum,
+                m_destBlock->bbNum, likelihood);
+    }
+
+    m_likelihoodSet = true;
+#endif // DEBUG
+
+    m_likelihood = likelihood;
+}
+
+//------------------------------------------------------------------------
+// addLikelihood: adjust the likelihood of a flow edge
+//
+// Arguments:
+//   addedLikelihood -- value in range [-likelihood, 1.0 - likelihood]
+//     to add to current likelihood.
+//
+void FlowEdge::addLikelihood(weight_t addedLikelihood)
+{
+    assert(m_likelihoodSet);
+
+    weight_t newLikelihood = m_likelihood + addedLikelihood;
+
+    // Tolerate slight overflow or underflow
+    //
+    const weight_t eps = 0.0001;
+
+    if ((newLikelihood < 0) && (newLikelihood > -eps))
+    {
+        newLikelihood = 0.0;
+    }
+    else if ((newLikelihood > 1) && (newLikelihood < 1 + eps))
+    {
+        newLikelihood = 1.0;
+    }
+
+    assert(newLikelihood >= 0.0);
+    assert(newLikelihood <= 1.0);
+
+    JITDUMP("updating likelihood of " FMT_BB " -> " FMT_BB " from " FMT_WT " to " FMT_WT "\n", m_sourceBlock->bbNum,
+            m_destBlock->bbNum, m_likelihood, newLikelihood);
+
+    m_likelihood = newLikelihood;
+}
+
 //------------------------------------------------------------------------
 //  AllSuccessorEnumerator: Construct an instance of the enumerator.
 //
@@ -75,7 +140,8 @@ unsigned SsaStressHashHelper()
 //     comp  - Compiler instance
 //     block - The block whose successors are to be iterated
 //
-AllSuccessorEnumerator::AllSuccessorEnumerator(Compiler* comp, BasicBlock* block) : m_block(block)
+AllSuccessorEnumerator::AllSuccessorEnumerator(Compiler* comp, BasicBlock* block)
+    : m_block(block)
 {
     m_numSuccs = 0;
     block->VisitAllSuccs(comp, [this](BasicBlock* succ) {
@@ -521,7 +587,6 @@ void BasicBlock::dspFlags() const
         {BBF_HAS_ALIGN, "has-align"},
         {BBF_HAS_MDARRAYREF, "mdarr"},
         {BBF_NEEDS_GCPOLL, "gcpoll"},
-        {BBF_NONE_QUIRK, "q"},
     };
 
     bool first = true;
@@ -626,20 +691,33 @@ void BasicBlock::dspSuccs(Compiler* compiler)
 // things strictly.
 void BasicBlock::dspKind() const
 {
-    auto dspBlockNum = [](const BasicBlock* b) -> const char* {
+    auto dspBlockNum = [](const FlowEdge* e) -> const char* {
         static char buffers[3][64]; // static array of 3 to allow 3 concurrent calls in one printf()
         static int  nextBufferIndex = 0;
 
-        auto& buffer    = buffers[nextBufferIndex];
-        nextBufferIndex = (nextBufferIndex + 1) % ArrLen(buffers);
+        auto& buffer              = buffers[nextBufferIndex];
+        nextBufferIndex           = (nextBufferIndex + 1) % ArrLen(buffers);
+        const size_t sizeOfBuffer = ArrLen(buffer);
+        int          written;
 
+        const BasicBlock* b = e->getDestinationBlock();
         if (b == nullptr)
         {
-            _snprintf_s(buffer, ArrLen(buffer), ArrLen(buffer), "NULL");
+            written = _snprintf_s(buffer, sizeOfBuffer, sizeOfBuffer, "NULL");
         }
         else
         {
-            _snprintf_s(buffer, ArrLen(buffer), ArrLen(buffer), FMT_BB, b->bbNum);
+            written = _snprintf_s(buffer, sizeOfBuffer, sizeOfBuffer, FMT_BB, b->bbNum);
+        }
+
+        const bool printEdgeLikelihoods = true; // TODO: parameterize this?
+        if (printEdgeLikelihoods)
+        {
+            if (e->hasLikelihood())
+            {
+                written = _snprintf_s(buffer + written, sizeOfBuffer - written, sizeOfBuffer - written, "(" FMT_WT ")",
+                                      e->getLikelihood());
+            }
         }
 
         return buffer;
@@ -658,8 +736,8 @@ void BasicBlock::dspKind() const
             }
             else
             {
-                const unsigned     jumpCnt = bbEhfTargets->bbeCount;
-                BasicBlock** const jumpTab = bbEhfTargets->bbeSuccs;
+                const unsigned   jumpCnt = bbEhfTargets->bbeCount;
+                FlowEdge** const jumpTab = bbEhfTargets->bbeSuccs;
 
                 for (unsigned i = 0; i < jumpCnt; i++)
                 {
@@ -676,11 +754,11 @@ void BasicBlock::dspKind() const
             break;
 
         case BBJ_EHFILTERRET:
-            printf(" -> %s (fltret)", dspBlockNum(bbTarget));
+            printf(" -> %s (fltret)", dspBlockNum(GetTargetEdge()));
             break;
 
         case BBJ_EHCATCHRET:
-            printf(" -> %s (cret)", dspBlockNum(bbTarget));
+            printf(" -> %s (cret)", dspBlockNum(GetTargetEdge()));
             break;
 
         case BBJ_THROW:
@@ -694,36 +772,36 @@ void BasicBlock::dspKind() const
         case BBJ_ALWAYS:
             if (HasFlag(BBF_KEEP_BBJ_ALWAYS))
             {
-                printf(" -> %s (ALWAYS)", dspBlockNum(bbTarget));
+                printf(" -> %s (ALWAYS)", dspBlockNum(GetTargetEdge()));
             }
             else
             {
-                printf(" -> %s (always)", dspBlockNum(bbTarget));
+                printf(" -> %s (always)", dspBlockNum(GetTargetEdge()));
             }
             break;
 
         case BBJ_LEAVE:
-            printf(" -> %s (leave)", dspBlockNum(bbTarget));
+            printf(" -> %s (leave)", dspBlockNum(GetTargetEdge()));
             break;
 
         case BBJ_CALLFINALLY:
-            printf(" -> %s (callf)", dspBlockNum(bbTarget));
+            printf(" -> %s (callf)", dspBlockNum(GetTargetEdge()));
             break;
 
         case BBJ_CALLFINALLYRET:
-            printf(" -> %s (callfr)", dspBlockNum(bbTarget));
+            printf(" -> %s (callfr)", dspBlockNum(GetTargetEdge()));
             break;
 
         case BBJ_COND:
-            printf(" -> %s,%s (cond)", dspBlockNum(bbTrueTarget), dspBlockNum(bbFalseTarget));
+            printf(" -> %s,%s (cond)", dspBlockNum(GetTrueEdge()), dspBlockNum(GetFalseEdge()));
             break;
 
         case BBJ_SWITCH:
         {
             printf(" ->");
 
-            const unsigned     jumpCnt = bbSwtTargets->bbsCount;
-            BasicBlock** const jumpTab = bbSwtTargets->bbsDstTab;
+            const unsigned   jumpCnt = bbSwtTargets->bbsCount;
+            FlowEdge** const jumpTab = bbSwtTargets->bbsDstTab;
 
             for (unsigned i = 0; i < jumpCnt; i++)
             {
@@ -837,46 +915,6 @@ void BasicBlock::CloneBlockState(Compiler* compiler, BasicBlock* to, const Basic
     }
 }
 
-//------------------------------------------------------------------------
-// CopyTarget: Copy the block kind and targets. The targets in the `from` block remain valid.
-// Use `TransferTarget` to copy the pointer to the target descriptor (e.g., for BBJ_SWITCH/BBJ_EHFINALLYRET)
-// after which the `from` block target is invalid.
-//
-// Arguments:
-//    compiler - Jit compiler instance
-//    from - Block to copy from
-//
-void BasicBlock::CopyTarget(Compiler* compiler, const BasicBlock* from)
-{
-    switch (from->GetKind())
-    {
-        case BBJ_SWITCH:
-            SetSwitch(new (compiler, CMK_BasicBlock) BBswtDesc(compiler, from->GetSwitchTargets()));
-            break;
-        case BBJ_EHFINALLYRET:
-            SetEhf(new (compiler, CMK_BasicBlock) BBehfDesc(compiler, from->GetEhfTargets()));
-            break;
-        case BBJ_COND:
-            SetCond(from->GetTrueTarget(), from->GetFalseTarget());
-            break;
-        case BBJ_ALWAYS:
-            SetKindAndTarget(from->GetKind(), from->GetTarget());
-            CopyFlags(from, BBF_NONE_QUIRK);
-            break;
-        case BBJ_CALLFINALLY:
-        case BBJ_CALLFINALLYRET:
-        case BBJ_EHCATCHRET:
-        case BBJ_EHFILTERRET:
-        case BBJ_LEAVE:
-            SetKindAndTarget(from->GetKind(), from->GetTarget());
-            break;
-        default:
-            SetKindAndTarget(from->GetKind()); // Clear the target
-            break;
-    }
-    assert(KindIs(from->GetKind()));
-}
-
 //------------------------------------------------------------------------
 // TransferTarget: Like CopyTarget, but copies the target descriptors for block types which have
 // them (BBJ_SWITCH/BBJ_EHFINALLYRET), that is, take their memory, after which the `from` block
@@ -897,22 +935,24 @@ void BasicBlock::TransferTarget(BasicBlock* from)
             SetEhf(from->GetEhfTargets());
             from->bbEhfTargets = nullptr; // Make sure nobody uses the descriptor after this.
             break;
+
+        // TransferTarget may be called after setting the source block of `from`'s
+        // successor edges to this block.
+        // This means calling GetTarget/GetTrueTarget/GetFalseTarget would trigger asserts.
+        // Avoid this by accessing the edges directly.
         case BBJ_COND:
-            SetCond(from->GetTrueTarget(), from->GetFalseTarget());
+            SetCond(from->bbTrueEdge, from->bbFalseEdge);
             break;
         case BBJ_ALWAYS:
-            SetKindAndTarget(from->GetKind(), from->GetTarget());
-            CopyFlags(from, BBF_NONE_QUIRK);
-            break;
         case BBJ_CALLFINALLY:
         case BBJ_CALLFINALLYRET:
         case BBJ_EHCATCHRET:
         case BBJ_EHFILTERRET:
         case BBJ_LEAVE:
-            SetKindAndTarget(from->GetKind(), from->GetTarget());
+            SetKindAndTargetEdge(from->GetKind(), from->bbTargetEdge);
             break;
         default:
-            SetKindAndTarget(from->GetKind()); // Clear the target
+            SetKindAndTargetEdge(from->GetKind()); // Clear the target
             break;
     }
     assert(KindIs(from->GetKind()));
@@ -1025,7 +1065,7 @@ BasicBlock* BasicBlock::GetUniquePred(Compiler* compiler) const
 //
 BasicBlock* BasicBlock::GetUniqueSucc() const
 {
-    return KindIs(BBJ_ALWAYS) ? bbTarget : nullptr;
+    return KindIs(BBJ_ALWAYS) ? GetTarget() : nullptr;
 }
 
 // Static vars.
@@ -1185,7 +1225,7 @@ unsigned BasicBlock::NumSucc() const
             return 1;
 
         case BBJ_COND:
-            if (bbTrueTarget == bbFalseTarget)
+            if (bbTrueEdge == bbFalseEdge)
             {
                 return 1;
             }
@@ -1220,15 +1260,15 @@ unsigned BasicBlock::NumSucc() const
 }
 
 //------------------------------------------------------------------------
-// GetSucc: Returns the requested block successor. See the declaration comment for details.
+// GetSucc: Returns the requested successor edge. See the declaration comment for details.
 //
 // Arguments:
 //    i - index of successor to return. 0 <= i <= NumSucc().
 //
 // Return Value:
-//    Requested successor block
+//    Requested successor edge
 //
-BasicBlock* BasicBlock::GetSucc(unsigned i) const
+FlowEdge* BasicBlock::GetSuccEdge(unsigned i) const
 {
     assert(i < NumSucc()); // Index bounds check.
     switch (bbKind)
@@ -1239,18 +1279,18 @@ BasicBlock* BasicBlock::GetSucc(unsigned i) const
         case BBJ_EHCATCHRET:
         case BBJ_EHFILTERRET:
         case BBJ_LEAVE:
-            return bbTarget;
+            return GetTargetEdge();
 
         case BBJ_COND:
             if (i == 0)
             {
-                return bbFalseTarget;
+                return GetFalseEdge();
             }
             else
             {
                 assert(i == 1);
-                assert(bbFalseTarget != bbTrueTarget);
-                return bbTrueTarget;
+                assert(bbTrueEdge != bbFalseEdge);
+                return GetTrueEdge();
             }
 
         case BBJ_EHFINALLYRET:
@@ -1264,6 +1304,20 @@ BasicBlock* BasicBlock::GetSucc(unsigned i) const
     }
 }
 
+//------------------------------------------------------------------------
+// GetSucc: Returns the requested block successor. See the declaration comment for details.
+//
+// Arguments:
+//    i - index of successor to return. 0 <= i <= NumSucc().
+//
+// Return Value:
+//    Requested successor block
+//
+BasicBlock* BasicBlock::GetSucc(unsigned i) const
+{
+    return GetSuccEdge(i)->getDestinationBlock();
+}
+
 //------------------------------------------------------------------------
 // NumSucc: Returns the count of block successors. See the declaration comment for details.
 //
@@ -1310,7 +1364,7 @@ unsigned BasicBlock::NumSucc(Compiler* comp)
             return 1;
 
         case BBJ_COND:
-            if (bbTrueTarget == bbFalseTarget)
+            if (bbTrueEdge == bbFalseEdge)
             {
                 return 1;
             }
@@ -1331,16 +1385,16 @@ unsigned BasicBlock::NumSucc(Compiler* comp)
 }
 
 //------------------------------------------------------------------------
-// GetSucc: Returns the requested block successor. See the declaration comment for details.
+// GetSucc: Returns the requested successor edge. See the declaration comment for details.
 //
 // Arguments:
 //    i - index of successor to return. 0 <= i <= NumSucc(comp).
 //    comp - Compiler instance
 //
 // Return Value:
-//    Requested successor block
+//    Requested successor edge
 //
-BasicBlock* BasicBlock::GetSucc(unsigned i, Compiler* comp)
+FlowEdge* BasicBlock::GetSuccEdge(unsigned i, Compiler* comp)
 {
     assert(comp != nullptr);
 
@@ -1349,8 +1403,8 @@ BasicBlock* BasicBlock::GetSucc(unsigned i, Compiler* comp)
     {
         case BBJ_EHFILTERRET:
             // Handler is the (sole) normal successor of the filter.
-            assert(comp->fgFirstBlockOfHandler(this) == bbTarget);
-            return bbTarget;
+            assert(comp->fgFirstBlockOfHandler(this) == GetTarget());
+            return GetTargetEdge();
 
         case BBJ_EHFINALLYRET:
             assert(bbEhfTargets != nullptr);
@@ -1362,18 +1416,18 @@ BasicBlock* BasicBlock::GetSucc(unsigned i, Compiler* comp)
         case BBJ_ALWAYS:
         case BBJ_EHCATCHRET:
         case BBJ_LEAVE:
-            return bbTarget;
+            return GetTargetEdge();
 
         case BBJ_COND:
             if (i == 0)
             {
-                return bbFalseTarget;
+                return GetFalseEdge();
             }
             else
             {
                 assert(i == 1);
-                assert(bbFalseTarget != bbTrueTarget);
-                return bbTrueTarget;
+                assert(bbTrueEdge != bbFalseEdge);
+                return GetTrueEdge();
             }
 
         case BBJ_SWITCH:
@@ -1388,6 +1442,21 @@ BasicBlock* BasicBlock::GetSucc(unsigned i, Compiler* comp)
     }
 }
 
+//------------------------------------------------------------------------
+// GetSucc: Returns the requested block successor. See the declaration comment for details.
+//
+// Arguments:
+//    i - index of successor to return. 0 <= i <= NumSucc(comp).
+//    comp - Compiler instance
+//
+// Return Value:
+//    Requested successor block
+//
+BasicBlock* BasicBlock::GetSucc(unsigned i, Compiler* comp)
+{
+    return GetSuccEdge(i, comp)->getDestinationBlock();
+}
+
 void BasicBlock::InitVarSets(Compiler* comp)
 {
     VarSetOps::AssignNoCopy(comp, bbVarUse, VarSetOps::MakeEmpty(comp));
@@ -1625,15 +1694,10 @@ BasicBlock* BasicBlock::New(Compiler* compiler)
     return block;
 }
 
-BasicBlock* BasicBlock::New(Compiler* compiler, BBKinds kind, BasicBlock* target /* = nullptr */)
+BasicBlock* BasicBlock::New(Compiler* compiler, BBKinds kind)
 {
     BasicBlock* block = BasicBlock::New(compiler);
-
-    // In some cases, we don't know a block's jump target during initialization, so don't check the jump kind/target
-    // yet.
-    // The checks will be done any time the jump kind/target is read or written to after initialization.
-    block->bbKind   = kind;
-    block->bbTarget = target;
+    block->bbKind     = kind;
 
     if (block->KindIs(BBJ_THROW))
     {
@@ -1748,9 +1812,7 @@ bool BasicBlock::hasEHBoundaryIn() const
     bool returnVal = (bbCatchTyp != BBCT_NONE);
     if (!returnVal)
     {
-#if FEATURE_EH_FUNCLETS
         assert(!HasFlag(BBF_FUNCLET_BEG));
-#endif // FEATURE_EH_FUNCLETS
     }
     return returnVal;
 }
@@ -1769,16 +1831,23 @@ bool BasicBlock::hasEHBoundaryIn() const
 //
 bool BasicBlock::hasEHBoundaryOut() const
 {
-    bool returnVal = KindIs(BBJ_EHFILTERRET, BBJ_EHFINALLYRET, BBJ_EHFAULTRET);
-
-#if FEATURE_EH_FUNCLETS
-    if (bbKind == BBJ_EHCATCHRET)
-    {
-        returnVal = true;
-    }
-#endif // FEATURE_EH_FUNCLETS
+    return KindIs(BBJ_EHFILTERRET, BBJ_EHFINALLYRET, BBJ_EHFAULTRET, BBJ_EHCATCHRET);
+}
 
-    return returnVal;
+//------------------------------------------------------------------------
+// BBswtDesc copy ctor: copy a switch descriptor, but don't set up the jump table
+//
+// Arguments:
+//    other - existing switch descriptor to copy (except for its jump table)
+//
+BBswtDesc::BBswtDesc(const BBswtDesc* other)
+    : bbsDstTab(nullptr)
+    , bbsCount(other->bbsCount)
+    , bbsDominantCase(other->bbsDominantCase)
+    , bbsDominantFraction(other->bbsDominantFraction)
+    , bbsHasDefault(other->bbsHasDefault)
+    , bbsHasDominantCase(other->bbsHasDominantCase)
+{
 }
 
 //------------------------------------------------------------------------
@@ -1798,7 +1867,7 @@ BBswtDesc::BBswtDesc(Compiler* comp, const BBswtDesc* other)
 {
     // Allocate and fill in a new dst tab
     //
-    bbsDstTab = new (comp, CMK_BasicBlock) BasicBlock*[bbsCount];
+    bbsDstTab = new (comp, CMK_FlowEdge) FlowEdge*[bbsCount];
     for (unsigned i = 0; i < bbsCount; i++)
     {
         bbsDstTab[i] = other->bbsDstTab[i];
@@ -1812,11 +1881,12 @@ BBswtDesc::BBswtDesc(Compiler* comp, const BBswtDesc* other)
 //    comp - compiler instance
 //    other - existing descriptor to copy
 //
-BBehfDesc::BBehfDesc(Compiler* comp, const BBehfDesc* other) : bbeCount(other->bbeCount)
+BBehfDesc::BBehfDesc(Compiler* comp, const BBehfDesc* other)
+    : bbeCount(other->bbeCount)
 {
     // Allocate and fill in a new dst tab
     //
-    bbeSuccs = new (comp, CMK_BasicBlock) BasicBlock*[bbeCount];
+    bbeSuccs = new (comp, CMK_FlowEdge) FlowEdge*[bbeCount];
     for (unsigned i = 0; i < bbeCount; i++)
     {
         bbeSuccs[i] = other->bbeSuccs[i];
diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h
index d434ae8a92b0..500b5274b6f4 100644
--- a/src/coreclr/jit/block.h
+++ b/src/coreclr/jit/block.h
@@ -46,9 +46,13 @@ typedef BitVec_ValRet_T ASSERT_VALRET_TP;
 // Use this format for loop indices
 #define FMT_LP "L%02u"
 
-// And this format for profile weights
+// Use this format for profile weights
 #define FMT_WT "%.7g"
 
+// Use this format for profile weights where we want to conserve horizontal space, at the expense of displaying
+// less precision.
+#define FMT_WT_NARROW "%.3g"
+
 /*****************************************************************************
  *
  *  Each basic block ends with a jump which is described as a value
@@ -62,7 +66,7 @@ enum BBKinds : BYTE
     BBJ_EHFINALLYRET,// block ends with 'endfinally' (for finally)
     BBJ_EHFAULTRET,  // block ends with 'endfinally' (IL alias for 'endfault') (for fault)
     BBJ_EHFILTERRET, // block ends with 'endfilter'
-    BBJ_EHCATCHRET,  // block ends with a leave out of a catch (only #if defined(FEATURE_EH_FUNCLETS))
+    BBJ_EHCATCHRET,  // block ends with a leave out of a catch
     BBJ_THROW,       // block ends with 'throw'
     BBJ_RETURN,      // block ends with 'ret'
     BBJ_ALWAYS,      // block always jumps to the target
@@ -158,7 +162,8 @@ class MemoryKindIterator
     int value;
 
 public:
-    explicit inline MemoryKindIterator(int val) : value(val)
+    explicit inline MemoryKindIterator(int val)
+        : value(val)
     {
     }
     inline MemoryKindIterator& operator++()
@@ -240,7 +245,8 @@ class PredEdgeList
     };
 
 public:
-    PredEdgeList(FlowEdge* pred) : m_begin(pred)
+    PredEdgeList(FlowEdge* pred)
+        : m_begin(pred)
     {
     }
 
@@ -258,7 +264,9 @@ class PredEdgeList
 // PredBlockList: adapter class for forward iteration of the predecessor edge linked list yielding
 // predecessor blocks, using range-based `for`, normally used via BasicBlock::PredBlocks(), e.g.:
 //    for (BasicBlock* const predBlock : block->PredBlocks()) ...
+// allowEdits controls whether the iterator should be resilient to changes to the predecessor list.
 //
+template <bool allowEdits>
 class PredBlockList
 {
     FlowEdge* m_begin;
@@ -270,13 +278,12 @@ class PredBlockList
     {
         FlowEdge* m_pred;
 
-#ifdef DEBUG
-        // Try to guard against the user of the iterator from making changes to the IR that would invalidate
-        // the iterator: cache the edge we think should be next, then check it when we actually do the `++`
+        // When allowEdits=false, try to guard against the user of the iterator from modifying the predecessor list
+        // being traversed: cache the edge we think should be next, then check it when we actually do the `++`
         // operation. This is a bit conservative, but attempts to protect against callers assuming too much about
         // this iterator implementation.
+        // When allowEdits=true, m_next is always used to update m_pred, so changes to m_pred don't break the iterator.
         FlowEdge* m_next;
-#endif
 
     public:
         iterator(FlowEdge* pred);
@@ -292,7 +299,8 @@ class PredBlockList
     };
 
 public:
-    PredBlockList(FlowEdge* pred) : m_begin(pred)
+    PredBlockList(FlowEdge* pred)
+        : m_begin(pred)
     {
     }
 
@@ -307,38 +315,69 @@ class PredBlockList
     }
 };
 
-// BBArrayIterator: forward iterator for an array of BasicBlock*, such as the BBswtDesc->bbsDstTab.
+// BBArrayIterator: forward iterator for an array of BasicBlock*.
 // It is an error (with assert) to yield a nullptr BasicBlock* in this array.
-// `m_bbEntry` can be nullptr, but it only makes sense if both the begin and end of an iteration range are nullptr
+// `m_edgeEntry` can be nullptr, but it only makes sense if both the begin and end of an iteration range are nullptr
 // (meaning, no actual iteration will happen).
 //
 class BBArrayIterator
 {
-    BasicBlock* const* m_bbEntry;
+    FlowEdge* const* m_edgeEntry;
 
 public:
-    BBArrayIterator(BasicBlock* const* bbEntry) : m_bbEntry(bbEntry)
+    BBArrayIterator(FlowEdge* const* edgeEntry)
+        : m_edgeEntry(edgeEntry)
     {
     }
 
-    BasicBlock* operator*() const
-    {
-        assert(m_bbEntry != nullptr);
-        BasicBlock* bTarget = *m_bbEntry;
-        assert(bTarget != nullptr);
-        return bTarget;
-    }
+    BasicBlock* operator*() const;
 
     BBArrayIterator& operator++()
     {
-        assert(m_bbEntry != nullptr);
-        ++m_bbEntry;
+        assert(m_edgeEntry != nullptr);
+        ++m_edgeEntry;
         return *this;
     }
 
     bool operator!=(const BBArrayIterator& i) const
     {
-        return m_bbEntry != i.m_bbEntry;
+        return m_edgeEntry != i.m_edgeEntry;
+    }
+};
+
+// FlowEdgeArrayIterator: forward iterator for an array of FlowEdge*, such as the BBswtDesc->bbsDstTab.
+// It is an error (with assert) to yield a nullptr FlowEdge* in this array.
+// `m_edgeEntry` can be nullptr, but it only makes sense if both the begin and end of an iteration range are nullptr
+// (meaning, no actual iteration will happen).
+//
+class FlowEdgeArrayIterator
+{
+    FlowEdge* const* m_edgeEntry;
+
+public:
+    FlowEdgeArrayIterator(FlowEdge* const* edgeEntry)
+        : m_edgeEntry(edgeEntry)
+    {
+    }
+
+    FlowEdge* operator*() const
+    {
+        assert(m_edgeEntry != nullptr);
+        FlowEdge* const edge = *m_edgeEntry;
+        assert(edge != nullptr);
+        return edge;
+    }
+
+    FlowEdgeArrayIterator& operator++()
+    {
+        assert(m_edgeEntry != nullptr);
+        ++m_edgeEntry;
+        return *this;
+    }
+
+    bool operator!=(const FlowEdgeArrayIterator& i) const
+    {
+        return m_edgeEntry != i.m_edgeEntry;
     }
 };
 
@@ -422,11 +461,7 @@ enum BasicBlockFlags : unsigned __int64
     BBF_RECURSIVE_TAILCALL             = MAKE_BBFLAG(37), // Block has recursive tailcall that may turn into a loop
     BBF_NO_CSE_IN                      = MAKE_BBFLAG(38), // Block should kill off any incoming CSE
     BBF_CAN_ADD_PRED                   = MAKE_BBFLAG(39), // Ok to add pred edge to this block, even when "safe" edge creation disabled
-    BBF_NONE_QUIRK                     = MAKE_BBFLAG(40), // Block was created as a BBJ_ALWAYS to the next block,
-                                                          // and should be treated as if it falls through.
-                                                          // This is just to reduce diffs from removing BBJ_NONE.
-                                                          // (TODO: Remove this quirk after refactoring Compiler::fgFindInsertPoint)
-    BBF_HAS_VALUE_PROFILE              = MAKE_BBFLAG(41), // Block has a node that needs a value probing
+    BBF_HAS_VALUE_PROFILE              = MAKE_BBFLAG(40), // Block has a node that needs a value probing
 
     // The following are sets of flags.
 
@@ -452,7 +487,7 @@ enum BasicBlockFlags : unsigned __int64
     // TODO: Should BBF_RUN_RARELY be added to BBF_SPLIT_GAINED ?
 
     BBF_SPLIT_GAINED = BBF_DONT_REMOVE | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_PROF_WEIGHT | \
-                       BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE | BBF_HAS_VALUE_PROFILE | BBF_HAS_MDARRAYREF | BBF_NEEDS_GCPOLL | BBF_NONE_QUIRK,
+                       BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE | BBF_HAS_VALUE_PROFILE | BBF_HAS_MDARRAYREF | BBF_NEEDS_GCPOLL,
 
     // Flags that must be propagated to a new block if code is copied from a block to a new block. These are flags that
     // limit processing of a block if the code in question doesn't exist. This is conservative; we might not
@@ -500,6 +535,185 @@ enum class BasicBlockVisit
 
 // clang-format on
 
+//-------------------------------------------------------------------------
+// FlowEdge -- control flow edge
+//
+// In compiler terminology the control flow between two BasicBlocks
+// is typically referred to as an "edge".  Most well known are the
+// backward branches for loops, which are often called "back-edges".
+//
+// "struct FlowEdge" is the type that represents our control flow edges.
+// This type is a linked list of zero or more "edges".
+// (The list of zero edges is represented by NULL.)
+// Every BasicBlock has a field called bbPreds of this type.  This field
+// represents the list of "edges" that flow into this BasicBlock.
+// The FlowEdge type only stores the BasicBlock* of the source for the
+// control flow edge.  The destination block for the control flow edge
+// is implied to be the block which contained the bbPreds field.
+//
+// For a switch branch target there may be multiple "edges" that have
+// the same source block (and destination block).  We need to count the
+// number of these edges so that during optimization we will know when
+// we have zero of them.  Rather than have extra FlowEdge entries we
+// track this via the DupCount property.
+//
+// When we have Profile weight for the BasicBlocks we can usually compute
+// the number of times each edge was executed by examining the adjacent
+// BasicBlock weights.  As we are doing for BasicBlocks, we call the number
+// of times that a control flow edge was executed the "edge weight".
+// In order to compute the edge weights we need to use a bounded range
+// for every edge weight. These two fields, 'flEdgeWeightMin' and 'flEdgeWeightMax'
+// are used to hold a bounded range.  Most often these will converge such
+// that both values are the same and that value is the exact edge weight.
+// Sometimes we are left with a rage of possible values between [Min..Max]
+// which represents an inexact edge weight.
+//
+// The bbPreds list is initially created by Compiler::fgLinkBasicBlocks()
+// and is incrementally kept up to date.
+//
+// The edge weight are computed by Compiler::fgComputeEdgeWeights()
+// the edge weights are used to straighten conditional branches
+// by Compiler::fgReorderBlocks()
+//
+struct FlowEdge
+{
+private:
+    // The next predecessor edge in the list, nullptr for end of list.
+    FlowEdge* m_nextPredEdge;
+
+    // The source of the control flow
+    BasicBlock* m_sourceBlock;
+
+    // The destination of the control flow
+    BasicBlock* m_destBlock;
+
+    // Edge weights
+    weight_t m_edgeWeightMin;
+    weight_t m_edgeWeightMax;
+
+    // Likelihood that m_sourceBlock transfers control along this edge.
+    // Values in range [0..1]
+    weight_t m_likelihood;
+
+    // The count of duplicate "edges" (used for switch stmts or degenerate branches)
+    unsigned m_dupCount;
+
+    // True if likelihood has been set
+    INDEBUG(bool m_likelihoodSet);
+
+public:
+    FlowEdge(BasicBlock* sourceBlock, BasicBlock* destBlock, FlowEdge* rest)
+        : m_nextPredEdge(rest)
+        , m_sourceBlock(sourceBlock)
+        , m_destBlock(destBlock)
+        , m_edgeWeightMin(0)
+        , m_edgeWeightMax(0)
+        , m_likelihood(0)
+        , m_dupCount(0)
+#ifdef DEBUG
+        , m_likelihoodSet(false)
+#endif // DEBUG
+    {
+    }
+
+    FlowEdge* getNextPredEdge() const
+    {
+        return m_nextPredEdge;
+    }
+
+    FlowEdge** getNextPredEdgeRef()
+    {
+        return &m_nextPredEdge;
+    }
+
+    void setNextPredEdge(FlowEdge* newEdge)
+    {
+        m_nextPredEdge = newEdge;
+    }
+
+    BasicBlock* getSourceBlock() const
+    {
+        assert(m_sourceBlock != nullptr);
+        return m_sourceBlock;
+    }
+
+    void setSourceBlock(BasicBlock* newBlock)
+    {
+        assert(newBlock != nullptr);
+        m_sourceBlock = newBlock;
+    }
+
+    BasicBlock* getDestinationBlock() const
+    {
+        assert(m_destBlock != nullptr);
+        return m_destBlock;
+    }
+
+    void setDestinationBlock(BasicBlock* newBlock)
+    {
+        assert(newBlock != nullptr);
+        m_destBlock = newBlock;
+    }
+
+    weight_t edgeWeightMin() const
+    {
+        return m_edgeWeightMin;
+    }
+
+    weight_t edgeWeightMax() const
+    {
+        return m_edgeWeightMax;
+    }
+
+    // These two methods are used to set new values for edge weights.
+    // They return false if the newWeight is not between the current [min..max]
+    // when slop is non-zero we allow for the case where our weights might be off by 'slop'
+    //
+    bool setEdgeWeightMinChecked(weight_t newWeight, BasicBlock* bDst, weight_t slop, bool* wbUsedSlop);
+    bool setEdgeWeightMaxChecked(weight_t newWeight, BasicBlock* bDst, weight_t slop, bool* wbUsedSlop);
+    void setEdgeWeights(weight_t newMinWeight, weight_t newMaxWeight, BasicBlock* bDst);
+
+    weight_t getLikelihood() const
+    {
+        assert(m_likelihoodSet);
+        return m_likelihood;
+    }
+
+    void setLikelihood(weight_t likelihood);
+    void addLikelihood(weight_t addedLikelihod);
+
+    void clearLikelihood()
+    {
+        m_likelihood = 0.0;
+        INDEBUG(m_likelihoodSet = false);
+    }
+
+#ifdef DEBUG
+    bool hasLikelihood() const
+    {
+        return m_likelihoodSet;
+    }
+#endif // DEBUG
+
+    weight_t getLikelyWeight() const;
+
+    unsigned getDupCount() const
+    {
+        return m_dupCount;
+    }
+
+    void incrementDupCount()
+    {
+        m_dupCount++;
+    }
+
+    void decrementDupCount()
+    {
+        assert(m_dupCount >= 1);
+        m_dupCount--;
+    }
+};
+
 //------------------------------------------------------------------------
 // BasicBlock: describes a basic block in the flowgraph.
 //
@@ -518,20 +732,21 @@ struct BasicBlock : private LIR::Range
     BBKinds bbKind; // jump (if any) at the end of this block
 
     /* The following union describes the jump target(s) of this block */
-    union {
-        unsigned    bbTargetOffs; // PC offset (temporary only)
-        BasicBlock* bbTarget;     // basic block
-        BasicBlock* bbTrueTarget; // BBJ_COND jump target when its condition is true (alias for bbTarget)
-        BBswtDesc*  bbSwtTargets; // switch descriptor
-        BBehfDesc*  bbEhfTargets; // BBJ_EHFINALLYRET descriptor
+    union
+    {
+        unsigned   bbTargetOffs; // PC offset (temporary only)
+        FlowEdge*  bbTargetEdge; // successor edge for block kinds with only one successor (BBJ_ALWAYS, etc)
+        FlowEdge*  bbTrueEdge;   // BBJ_COND successor edge when its condition is true (alias for bbTargetEdge)
+        BBswtDesc* bbSwtTargets; // switch descriptor
+        BBehfDesc* bbEhfTargets; // BBJ_EHFINALLYRET descriptor
     };
 
-    // Points to the successor of a BBJ_COND block if bbTrueTarget is not taken
-    BasicBlock* bbFalseTarget;
+    // Successor edge of a BBJ_COND block if bbTrueEdge is not taken
+    FlowEdge* bbFalseEdge;
 
 public:
     static BasicBlock* New(Compiler* compiler);
-    static BasicBlock* New(Compiler* compiler, BBKinds kind, BasicBlock* target = nullptr);
+    static BasicBlock* New(Compiler* compiler, BBKinds kind);
     static BasicBlock* New(Compiler* compiler, BBehfDesc* ehfTargets);
     static BasicBlock* New(Compiler* compiler, BBswtDesc* swtTargets);
     static BasicBlock* New(Compiler* compiler, BBKinds kind, unsigned targetOffs);
@@ -617,100 +832,145 @@ struct BasicBlock : private LIR::Range
         return bbTargetOffs;
     }
 
-    void SetKindAndTarget(BBKinds kind, unsigned targetOffs)
-    {
-        bbKind       = kind;
-        bbTargetOffs = targetOffs;
-        assert(KindIs(BBJ_ALWAYS, BBJ_COND, BBJ_LEAVE));
-    }
-
     bool HasTarget() const
     {
-        // These block types should always have bbTarget set
+        // These block types should always have bbTargetEdge set
         return KindIs(BBJ_ALWAYS, BBJ_CALLFINALLY, BBJ_CALLFINALLYRET, BBJ_EHCATCHRET, BBJ_EHFILTERRET, BBJ_LEAVE);
     }
 
     BasicBlock* GetTarget() const
     {
-        // Only block kinds that use `bbTarget` can access it, and it must be non-null.
+        return GetTargetEdge()->getDestinationBlock();
+    }
+
+    FlowEdge* GetTargetEdge() const
+    {
+        // Only block kinds that use `bbTargetEdge` can access it, and it must be non-null.
         assert(HasInitializedTarget());
-        return bbTarget;
+        assert(bbTargetEdge->getSourceBlock() == this);
+        assert(bbTargetEdge->getDestinationBlock() != nullptr);
+        return bbTargetEdge;
     }
 
-    void SetTarget(BasicBlock* target)
+    void SetTargetEdge(FlowEdge* targetEdge)
     {
         // SetKindAndTarget() nulls target for non-jump kinds,
-        // so don't use SetTarget() to null bbTarget without updating bbKind.
-        bbTarget = target;
+        // so don't use SetTargetEdge() to null bbTargetEdge without updating bbKind.
+        bbTargetEdge = targetEdge;
         assert(HasInitializedTarget());
+        assert(bbTargetEdge->getSourceBlock() == this);
+        assert(bbTargetEdge->getDestinationBlock() != nullptr);
+
+        // This is the only successor edge for this block, so likelihood should be 1.0
+        bbTargetEdge->setLikelihood(1.0);
     }
 
     BasicBlock* GetTrueTarget() const
+    {
+        return GetTrueEdge()->getDestinationBlock();
+    }
+
+    FlowEdge* GetTrueEdge() const
     {
         assert(KindIs(BBJ_COND));
-        assert(bbTrueTarget != nullptr);
-        return bbTrueTarget;
+        assert(bbTrueEdge != nullptr);
+        assert(bbTrueEdge->getSourceBlock() == this);
+        assert(bbTrueEdge->getDestinationBlock() != nullptr);
+        return bbTrueEdge;
     }
 
-    void SetTrueTarget(BasicBlock* target)
+    void SetTrueEdge(FlowEdge* trueEdge)
     {
         assert(KindIs(BBJ_COND));
-        assert(target != nullptr);
-        bbTrueTarget = target;
+        bbTrueEdge = trueEdge;
+        assert(bbTrueEdge != nullptr);
+        assert(bbTrueEdge->getSourceBlock() == this);
+        assert(bbTrueEdge->getDestinationBlock() != nullptr);
     }
 
     bool TrueTargetIs(const BasicBlock* target) const
     {
-        assert(KindIs(BBJ_COND));
-        assert(bbTrueTarget != nullptr);
-        return (bbTrueTarget == target);
+        return (GetTrueTarget() == target);
+    }
+
+    bool TrueEdgeIs(const FlowEdge* targetEdge) const
+    {
+        return (GetTrueEdge() == targetEdge);
     }
 
     BasicBlock* GetFalseTarget() const
+    {
+        return GetFalseEdge()->getDestinationBlock();
+    }
+
+    FlowEdge* GetFalseEdge() const
     {
         assert(KindIs(BBJ_COND));
-        assert(bbFalseTarget != nullptr);
-        return bbFalseTarget;
+        assert(bbFalseEdge != nullptr);
+        assert(bbFalseEdge->getSourceBlock() == this);
+        assert(bbFalseEdge->getDestinationBlock() != nullptr);
+        return bbFalseEdge;
     }
 
-    void SetFalseTarget(BasicBlock* target)
+    void SetFalseEdge(FlowEdge* falseEdge)
     {
         assert(KindIs(BBJ_COND));
-        assert(target != nullptr);
-        bbFalseTarget = target;
+        bbFalseEdge = falseEdge;
+        assert(bbFalseEdge != nullptr);
+        assert(bbFalseEdge->getSourceBlock() == this);
+        assert(bbFalseEdge->getDestinationBlock() != nullptr);
     }
 
     bool FalseTargetIs(const BasicBlock* target) const
     {
-        assert(KindIs(BBJ_COND));
-        assert(bbFalseTarget != nullptr);
-        return (bbFalseTarget == target);
+        return (GetFalseTarget() == target);
+    }
+
+    bool FalseEdgeIs(const FlowEdge* targetEdge) const
+    {
+        return (GetFalseEdge() == targetEdge);
+    }
+
+    void SetCond(FlowEdge* trueEdge, FlowEdge* falseEdge)
+    {
+        bbKind = BBJ_COND;
+        SetTrueEdge(trueEdge);
+        SetFalseEdge(falseEdge);
     }
 
-    void SetCond(BasicBlock* trueTarget, BasicBlock* falseTarget)
+    // In most cases, a block's true and false targets are known by the time SetCond is called.
+    // To simplify the few cases where the false target isn't available until later,
+    // overload SetCond to initialize only the true target.
+    // This simplifies, for example, lowering switch blocks into jump sequences.
+    void SetCond(FlowEdge* trueEdge)
     {
-        assert(trueTarget != nullptr);
-        bbKind        = BBJ_COND;
-        bbTrueTarget  = trueTarget;
-        bbFalseTarget = falseTarget;
+        bbKind = BBJ_COND;
+        SetTrueEdge(trueEdge);
     }
 
-    // Set both the block kind and target. This can clear `bbTarget` when setting
-    // block kinds that don't use `bbTarget`.
-    void SetKindAndTarget(BBKinds kind, BasicBlock* target = nullptr)
+    // Set both the block kind and target edge.
+    void SetKindAndTargetEdge(BBKinds kind, FlowEdge* targetEdge)
     {
-        bbKind   = kind;
-        bbTarget = target;
+        bbKind       = kind;
+        bbTargetEdge = targetEdge;
+        assert(HasInitializedTarget());
 
-        // If bbKind indicates this block has a jump, bbTarget cannot be null.
-        // You shouldn't use this to set a BBJ_COND, BBJ_SWITCH, or BBJ_EHFINALLYRET.
-        assert(HasTarget() ? HasInitializedTarget() : (bbTarget == nullptr));
+        // This is the only successor edge for this block, so likelihood should be 1.0
+        bbTargetEdge->setLikelihood(1.0);
+    }
+
+    // Set the block kind, and clear bbTargetEdge.
+    void SetKindAndTargetEdge(BBKinds kind)
+    {
+        bbKind       = kind;
+        bbTargetEdge = nullptr;
+        assert(!HasTarget());
     }
 
     bool HasInitializedTarget() const
     {
         assert(HasTarget());
-        return (bbTarget != nullptr);
+        return (bbTargetEdge != nullptr);
     }
 
     bool TargetIs(const BasicBlock* target) const
@@ -756,19 +1016,13 @@ struct BasicBlock : private LIR::Range
         bbEhfTargets = ehfTarget;
     }
 
-    // BBJ_CALLFINALLYRET uses the `bbTarget` field. However, also treat it specially:
+    // BBJ_CALLFINALLYRET uses the `bbTargetEdge` field. However, also treat it specially:
     // for callers that know they want a continuation, use this function instead of the
     // general `GetTarget()` to allow asserting on the block kind.
     BasicBlock* GetFinallyContinuation() const
     {
         assert(KindIs(BBJ_CALLFINALLYRET));
-        return bbTarget;
-    }
-
-    void SetFinallyContinuation(BasicBlock* finallyContinuation)
-    {
-        assert(KindIs(BBJ_CALLFINALLYRET));
-        bbTarget = finallyContinuation;
+        return GetTarget();
     }
 
 #ifdef DEBUG
@@ -777,21 +1031,42 @@ struct BasicBlock : private LIR::Range
     BasicBlock* GetTargetRaw() const
     {
         assert(HasTarget());
-        return bbTarget;
+        return (bbTargetEdge == nullptr) ? nullptr : bbTargetEdge->getDestinationBlock();
     }
 
     // Return the BBJ_COND true target; it might be null. Only used during dumping.
     BasicBlock* GetTrueTargetRaw() const
     {
         assert(KindIs(BBJ_COND));
-        return bbTrueTarget;
+        return (bbTrueEdge == nullptr) ? nullptr : bbTrueEdge->getDestinationBlock();
     }
 
     // Return the BBJ_COND false target; it might be null. Only used during dumping.
     BasicBlock* GetFalseTargetRaw() const
     {
         assert(KindIs(BBJ_COND));
-        return bbFalseTarget;
+        return (bbFalseEdge == nullptr) ? nullptr : bbFalseEdge->getDestinationBlock();
+    }
+
+    // Return the target edge; it might be null. Only used during dumping.
+    FlowEdge* GetTargetEdgeRaw() const
+    {
+        assert(HasTarget());
+        return bbTargetEdge;
+    }
+
+    // Return the BBJ_COND true target edge; it might be null. Only used during dumping.
+    FlowEdge* GetTrueEdgeRaw() const
+    {
+        assert(KindIs(BBJ_COND));
+        return bbTrueEdge;
+    }
+
+    // Return the BBJ_COND false target edge; it might be null. Only used during dumping.
+    FlowEdge* GetFalseEdgeRaw() const
+    {
+        assert(KindIs(BBJ_COND));
+        return bbFalseEdge;
     }
 
 #endif // DEBUG
@@ -896,11 +1171,11 @@ struct BasicBlock : private LIR::Range
     }
 
 #ifdef DEBUG
-    void     dspFlags() const;         // Print the flags
-    unsigned dspPreds() const;         // Print the predecessors (bbPreds)
-    void dspSuccs(Compiler* compiler); // Print the successors. The 'compiler' argument determines whether EH
-                                       // regions are printed: see NumSucc() for details.
-    void dspKind() const;              // Print the block jump kind (e.g., BBJ_ALWAYS, BBJ_COND, etc.).
+    void     dspFlags() const;             // Print the flags
+    unsigned dspPreds() const;             // Print the predecessors (bbPreds)
+    void     dspSuccs(Compiler* compiler); // Print the successors. The 'compiler' argument determines whether EH
+                                           // regions are printed: see NumSucc() for details.
+    void dspKind() const;                  // Print the block jump kind (e.g., BBJ_ALWAYS, BBJ_COND, etc.).
 
     // Print a simple basic block header for various output, including a list of predecessors and successors.
     void dspBlockHeader(Compiler* compiler, bool showKind = true, bool showFlags = false, bool showPreds = true);
@@ -908,11 +1183,11 @@ struct BasicBlock : private LIR::Range
     const char* dspToString(int blockNumPadding = 0) const;
 #endif // DEBUG
 
-#define BB_UNITY_WEIGHT 100.0        // how much a normal execute once block weighs
-#define BB_UNITY_WEIGHT_UNSIGNED 100 // how much a normal execute once block weighs
-#define BB_LOOP_WEIGHT_SCALE 8.0     // synthetic profile scale factor for loops
-#define BB_ZERO_WEIGHT 0.0
-#define BB_MAX_WEIGHT FLT_MAX // maximum finite weight  -- needs rethinking.
+#define BB_UNITY_WEIGHT          100.0 // how much a normal execute once block weighs
+#define BB_UNITY_WEIGHT_UNSIGNED 100   // how much a normal execute once block weighs
+#define BB_LOOP_WEIGHT_SCALE     8.0   // synthetic profile scale factor for loops
+#define BB_ZERO_WEIGHT           0.0
+#define BB_MAX_WEIGHT            FLT_MAX // maximum finite weight  -- needs rethinking.
 
     weight_t bbWeight; // The dynamic execution weight of this block
 
@@ -1081,7 +1356,11 @@ struct BasicBlock : private LIR::Range
     unsigned NumSucc() const;
     unsigned NumSucc(Compiler* comp);
 
-    // GetSucc: Returns the "i"th successor. Requires (0 <= i < NumSucc()).
+    // GetSuccEdge: Returns the "i"th successor edge. Requires (0 <= i < NumSucc()).
+    FlowEdge* GetSuccEdge(unsigned i) const;
+    FlowEdge* GetSuccEdge(unsigned i, Compiler* comp);
+
+    // GetSucc: Returns the "i"th successor block. Requires (0 <= i < NumSucc()).
     BasicBlock* GetSucc(unsigned i) const;
     BasicBlock* GetSucc(unsigned i, Compiler* comp);
 
@@ -1129,12 +1408,14 @@ struct BasicBlock : private LIR::Range
 
 #define NO_BASE_TMP UINT_MAX // base# to use when we have none
 
-    union {
+    union
+    {
         unsigned bbStkTempsIn;       // base# for input stack temps
         int      bbCountSchemaIndex; // schema index for count instrumentation
     };
 
-    union {
+    union
+    {
         unsigned bbStkTempsOut;          // base# for output stack temps
         int      bbHistogramSchemaIndex; // schema index for histogram instrumentation
     };
@@ -1254,18 +1535,15 @@ struct BasicBlock : private LIR::Range
     bool hasEHBoundaryOut() const;
 
 // Some non-zero value that will not collide with real tokens for bbCatchTyp
-#define BBCT_NONE 0x00000000
-#define BBCT_FAULT 0xFFFFFFFC
-#define BBCT_FINALLY 0xFFFFFFFD
-#define BBCT_FILTER 0xFFFFFFFE
-#define BBCT_FILTER_HANDLER 0xFFFFFFFF
+#define BBCT_NONE                   0x00000000
+#define BBCT_FAULT                  0xFFFFFFFC
+#define BBCT_FINALLY                0xFFFFFFFD
+#define BBCT_FILTER                 0xFFFFFFFE
+#define BBCT_FILTER_HANDLER         0xFFFFFFFF
 #define handlerGetsXcptnObj(hndTyp) ((hndTyp) != BBCT_NONE && (hndTyp) != BBCT_FAULT && (hndTyp) != BBCT_FINALLY)
 
     // TODO-Cleanup: Get rid of bbStkDepth and use bbStackDepthOnEntry() instead
-    union {
-        unsigned short bbStkDepth; // stack depth on entry
-        unsigned short bbFPinVars; // number of inner enregistered FP vars
-    };
+    unsigned short bbStkDepth; // stack depth on entry
 
     // Basic block predecessor lists. Predecessor lists are created by fgLinkBasicBlocks(), stored
     // in 'bbPreds', and then maintained throughout compilation. 'fgPredsComputed' will be 'true' after the
@@ -1284,9 +1562,18 @@ struct BasicBlock : private LIR::Range
     // PredBlocks: convenience method for enabling range-based `for` iteration over predecessor blocks, e.g.:
     //    for (BasicBlock* const predBlock : block->PredBlocks()) ...
     //
-    PredBlockList PredBlocks() const
+    PredBlockList<false> PredBlocks() const
+    {
+        return PredBlockList<false>(bbPreds);
+    }
+
+    // PredBlocksEditing: convenience method for enabling range-based `for` iteration over predecessor blocks, e.g.:
+    //    for (BasicBlock* const predBlock : block->PredBlocksEditing()) ...
+    // This iterator tolerates modifications to bbPreds.
+    //
+    PredBlockList<true> PredBlocksEditing() const
     {
-        return PredBlockList(bbPreds);
+        return PredBlockList<true>(bbPreds);
     }
 
     // Pred list maintenance
@@ -1295,7 +1582,8 @@ struct BasicBlock : private LIR::Range
     void ensurePredListOrder(Compiler* compiler);
     void reorderPredList(Compiler* compiler);
 
-    union {
+    union
+    {
         BasicBlock* bbIDom;          // Represent the closest dominator to this block (called the Immediate
                                      // Dominator) used to compute the dominance tree.
         FlowEdge* bbLastPred;        // Used early on by fgLinkBasicBlock/fgAddRefPred
@@ -1344,7 +1632,9 @@ struct BasicBlock : private LIR::Range
             return m_ssaNum;
         }
 
-        MemoryPhiArg(unsigned ssaNum, MemoryPhiArg* nextArg = nullptr) : m_ssaNum(ssaNum), m_nextArg(nextArg)
+        MemoryPhiArg(unsigned ssaNum, MemoryPhiArg* nextArg = nullptr)
+            : m_ssaNum(ssaNum)
+            , m_nextArg(nextArg)
         {
         }
 
@@ -1370,18 +1660,21 @@ struct BasicBlock : private LIR::Range
      *  thus we can union them since the two operations are completely disjunct.
      */
 
-    union {
+    union
+    {
         EXPSET_TP bbCseGen;             // CSEs computed by block
         ASSERT_TP bbAssertionGen;       // assertions created by block (global prop)
         ASSERT_TP bbAssertionOutIfTrue; // assertions available on exit along true/jump edge (BBJ_COND, local prop)
     };
 
-    union {
+    union
+    {
         EXPSET_TP bbCseIn;       // CSEs available on entry
         ASSERT_TP bbAssertionIn; // assertions available on entry (global prop)
     };
 
-    union {
+    union
+    {
         EXPSET_TP bbCseOut;              // CSEs available on exit
         ASSERT_TP bbAssertionOut;        // assertions available on exit (global prop, local prop & !BBJ_COND)
         ASSERT_TP bbAssertionOutIfFalse; // assertions available on exit along false/next edge (BBJ_COND, local prop)
@@ -1389,15 +1682,7 @@ struct BasicBlock : private LIR::Range
 
     void* bbEmitCookie;
 
-#ifdef VERIFIER
-    stackDesc bbStackIn;  // stack descriptor for  input
-    stackDesc bbStackOut; // stack descriptor for output
-
-    verTypeVal* bbTypesIn;  // list of variable types on  input
-    verTypeVal* bbTypesOut; // list of variable types on output
-#endif                      // VERIFIER
-
-//-------------------------------------------------------------------------
+    //-------------------------------------------------------------------------
 
 #if MEASURE_BLOCK_SIZE
     static size_t s_Size;
@@ -1432,8 +1717,8 @@ struct BasicBlock : private LIR::Range
     unsigned bbID;
 #endif // DEBUG
 
-    unsigned bbStackDepthOnEntry() const;
-    void bbSetStack(StackEntry* stack);
+    unsigned    bbStackDepthOnEntry() const;
+    void        bbSetStack(StackEntry* stack);
     StackEntry* bbStackOnEntry() const;
 
     // "bbNum" is one-based (for unknown reasons); it is sometimes useful to have the corresponding
@@ -1483,7 +1768,10 @@ struct BasicBlock : private LIR::Range
     Statement* FirstNonPhiDef() const;
     Statement* FirstNonPhiDefOrCatchArgStore() const;
 
-    BasicBlock() : bbStmtList(nullptr), bbLiveIn(VarSetOps::UninitVal()), bbLiveOut(VarSetOps::UninitVal())
+    BasicBlock()
+        : bbStmtList(nullptr)
+        , bbLiveIn(VarSetOps::UninitVal())
+        , bbLiveOut(VarSetOps::UninitVal())
     {
     }
 
@@ -1495,7 +1783,9 @@ struct BasicBlock : private LIR::Range
         BasicBlock* m_block;
 
     public:
-        Successors(Compiler* comp, BasicBlock* block) : m_comp(comp), m_block(block)
+        Successors(Compiler* comp, BasicBlock* block)
+            : m_comp(comp)
+            , m_block(block)
         {
         }
 
@@ -1506,11 +1796,15 @@ struct BasicBlock : private LIR::Range
             TPosition   m_pos;
 
         public:
-            iterator(Compiler* comp, BasicBlock* block) : m_comp(comp), m_block(block), m_pos(comp, block)
+            iterator(Compiler* comp, BasicBlock* block)
+                : m_comp(comp)
+                , m_block(block)
+                , m_pos(comp, block)
             {
             }
 
-            iterator() : m_pos()
+            iterator()
+                : m_pos()
             {
             }
 
@@ -1560,24 +1854,66 @@ struct BasicBlock : private LIR::Range
 
     bool HasPotentialEHSuccs(Compiler* comp);
 
-    // BBSuccList: adapter class for forward iteration of block successors, using range-based `for`,
-    // normally used via BasicBlock::Succs(), e.g.:
-    //    for (BasicBlock* const target : block->Succs()) ...
+    // Base class for Successor block/edge iterators.
     //
-    class BBSuccList
+    class SuccList
     {
+    protected:
         // For one or two successors, pre-compute and stash the successors inline, in m_succs[], so we don't
         // need to call a function or execute another `switch` to get them. Also, pre-compute the begin and end
         // points of the iteration, for use by BBArrayIterator. `m_begin` and `m_end` will either point at
         // `m_succs` or at the switch table successor array.
-        BasicBlock*        m_succs[2];
-        BasicBlock* const* m_begin;
-        BasicBlock* const* m_end;
+        FlowEdge*        m_succs[2];
+        FlowEdge* const* m_begin;
+        FlowEdge* const* m_end;
 
+        SuccList(const BasicBlock* block);
+    };
+
+    // BBSuccList: adapter class for forward iteration of block successors, using range-based `for`,
+    // normally used via BasicBlock::Succs(), e.g.:
+    //    for (BasicBlock* const target : block->Succs()) ...
+    //
+    class BBSuccList : private SuccList
+    {
     public:
-        BBSuccList(const BasicBlock* block);
-        BBArrayIterator begin() const;
-        BBArrayIterator end() const;
+        BBSuccList(const BasicBlock* block)
+            : SuccList(block)
+        {
+        }
+
+        BBArrayIterator begin() const
+        {
+            return BBArrayIterator(m_begin);
+        }
+
+        BBArrayIterator end() const
+        {
+            return BBArrayIterator(m_end);
+        }
+    };
+
+    // BBSuccEdgeList: adapter class for forward iteration of block successors edges, using range-based `for`,
+    // normally used via BasicBlock::SuccEdges(), e.g.:
+    //    for (FlowEdge* const succEdge : block->SuccEdges()) ...
+    //
+    class BBSuccEdgeList : private SuccList
+    {
+    public:
+        BBSuccEdgeList(const BasicBlock* block)
+            : SuccList(block)
+        {
+        }
+
+        FlowEdgeArrayIterator begin() const
+        {
+            return FlowEdgeArrayIterator(m_begin);
+        }
+
+        FlowEdgeArrayIterator end() const
+        {
+            return FlowEdgeArrayIterator(m_end);
+        }
     };
 
     // BBCompilerSuccList: adapter class for forward iteration of block successors, using range-based `for`,
@@ -1591,7 +1927,7 @@ struct BasicBlock : private LIR::Range
         Compiler*   m_comp;
         BasicBlock* m_block;
 
-        // iterator: forward iterator for an array of BasicBlock*, such as the BBswtDesc->bbsDstTab.
+        // iterator: forward iterator for an array of BasicBlock*
         //
         class iterator
         {
@@ -1601,7 +1937,9 @@ struct BasicBlock : private LIR::Range
 
         public:
             iterator(Compiler* comp, BasicBlock* block, unsigned succNum)
-                : m_comp(comp), m_block(block), m_succNum(succNum)
+                : m_comp(comp)
+                , m_block(block)
+                , m_succNum(succNum)
             {
             }
 
@@ -1626,7 +1964,74 @@ struct BasicBlock : private LIR::Range
         };
 
     public:
-        BBCompilerSuccList(Compiler* comp, BasicBlock* block) : m_comp(comp), m_block(block)
+        BBCompilerSuccList(Compiler* comp, BasicBlock* block)
+            : m_comp(comp)
+            , m_block(block)
+        {
+        }
+
+        iterator begin() const
+        {
+            return iterator(m_comp, m_block, 0);
+        }
+
+        iterator end() const
+        {
+            return iterator(m_comp, m_block, m_block->NumSucc(m_comp));
+        }
+    };
+
+    // BBCompilerSuccEdgeList: adapter class for forward iteration of block successors edges, using range-based `for`,
+    // normally used via BasicBlock::SuccEdges(), e.g.:
+    //    for (FlowEdge* const succEdge : block->SuccEdges(compiler)) ...
+    //
+    // This version uses NumSucc(Compiler*)/GetSucc(Compiler*). See the documentation there for the explanation
+    // of the implications of this versus the version that does not take `Compiler*`.
+    class BBCompilerSuccEdgeList
+    {
+        Compiler*   m_comp;
+        BasicBlock* m_block;
+
+        // iterator: forward iterator for an array of BasicBlock*
+        //
+        class iterator
+        {
+            Compiler*   m_comp;
+            BasicBlock* m_block;
+            unsigned    m_succNum;
+
+        public:
+            iterator(Compiler* comp, BasicBlock* block, unsigned succNum)
+                : m_comp(comp)
+                , m_block(block)
+                , m_succNum(succNum)
+            {
+            }
+
+            FlowEdge* operator*() const
+            {
+                assert(m_block != nullptr);
+                FlowEdge* succEdge = m_block->GetSuccEdge(m_succNum, m_comp);
+                assert(succEdge != nullptr);
+                return succEdge;
+            }
+
+            iterator& operator++()
+            {
+                ++m_succNum;
+                return *this;
+            }
+
+            bool operator!=(const iterator& i) const
+            {
+                return m_succNum != i.m_succNum;
+            }
+        };
+
+    public:
+        BBCompilerSuccEdgeList(Compiler* comp, BasicBlock* block)
+            : m_comp(comp)
+            , m_block(block)
         {
         }
 
@@ -1657,12 +2062,19 @@ struct BasicBlock : private LIR::Range
         return BBCompilerSuccList(comp, this);
     }
 
+    BBSuccEdgeList SuccEdges()
+    {
+        return BBSuccEdgeList(this);
+    }
+
+    BBCompilerSuccEdgeList SuccEdges(Compiler* comp)
+    {
+        return BBCompilerSuccEdgeList(comp, this);
+    }
+
     // Clone block state and statements from `from` block to `to` block (which must be new/empty)
     static void CloneBlockState(Compiler* compiler, BasicBlock* to, const BasicBlock* from);
 
-    // Copy the block kind and targets. The `from` block is untouched.
-    void CopyTarget(Compiler* compiler, const BasicBlock* from);
-
     // Copy the block kind and take memory ownership of the targets.
     void TransferTarget(BasicBlock* from);
 
@@ -1729,7 +2141,8 @@ class BasicBlockIterator
     BasicBlock* m_block;
 
 public:
-    BasicBlockIterator(BasicBlock* block) : m_block(block)
+    BasicBlockIterator(BasicBlock* block)
+        : m_block(block)
     {
     }
 
@@ -1765,7 +2178,8 @@ class BasicBlockSimpleList
     BasicBlock* m_begin;
 
 public:
-    BasicBlockSimpleList(BasicBlock* begin) : m_begin(begin)
+    BasicBlockSimpleList(BasicBlock* begin)
+        : m_begin(begin)
     {
     }
 
@@ -1795,7 +2209,9 @@ class BasicBlockRangeList
     BasicBlock* m_end;
 
 public:
-    BasicBlockRangeList(BasicBlock* begin, BasicBlock* end) : m_begin(begin), m_end(end)
+    BasicBlockRangeList(BasicBlock* begin, BasicBlock* end)
+        : m_begin(begin)
+        , m_end(end)
     {
         assert(begin != nullptr);
         assert(end != nullptr);
@@ -1824,8 +2240,8 @@ class BasicBlockRangeList
 //
 struct BBswtDesc
 {
-    BasicBlock** bbsDstTab; // case label table address
-    unsigned     bbsCount;  // count of cases (includes 'default' if bbsHasDefault)
+    FlowEdge** bbsDstTab; // case label table address
+    unsigned   bbsCount;  // count of cases (includes 'default' if bbsHasDefault)
 
     // Case number and likelihood of most likely case
     // (only known with PGO, only valid if bbsHasDominantCase is true)
@@ -1835,10 +2251,14 @@ struct BBswtDesc
     bool bbsHasDefault;      // true if last switch case is a default case
     bool bbsHasDominantCase; // true if switch has a dominant case
 
-    BBswtDesc() : bbsHasDefault(true), bbsHasDominantCase(false)
+    BBswtDesc()
+        : bbsHasDefault(true)
+        , bbsHasDominantCase(false)
     {
     }
 
+    BBswtDesc(const BBswtDesc* other);
+
     BBswtDesc(Compiler* comp, const BBswtDesc* other);
 
     void removeDefault()
@@ -1849,7 +2269,7 @@ struct BBswtDesc
         bbsCount--;
     }
 
-    BasicBlock* getDefault()
+    FlowEdge* getDefault()
     {
         assert(bbsHasDefault);
         assert(bbsCount > 0);
@@ -1860,7 +2280,8 @@ struct BBswtDesc
 // BBSwitchTargetList out-of-class-declaration implementations (here due to C++ ordering requirements).
 //
 
-inline BBSwitchTargetList::BBSwitchTargetList(BBswtDesc* bbsDesc) : m_bbsDesc(bbsDesc)
+inline BBSwitchTargetList::BBSwitchTargetList(BBswtDesc* bbsDesc)
+    : m_bbsDesc(bbsDesc)
 {
     assert(m_bbsDesc != nullptr);
     assert(m_bbsDesc->bbsDstTab != nullptr);
@@ -1880,10 +2301,12 @@ inline BBArrayIterator BBSwitchTargetList::end() const
 //
 struct BBehfDesc
 {
-    BasicBlock** bbeSuccs; // array of `BasicBlock*` pointing to BBJ_EHFINALLYRET block successors
-    unsigned     bbeCount; // size of `bbeSuccs` array
+    FlowEdge** bbeSuccs; // array of `FlowEdge*` pointing to BBJ_EHFINALLYRET block successors
+    unsigned   bbeCount; // size of `bbeSuccs` array
 
-    BBehfDesc() : bbeSuccs(nullptr), bbeCount(0)
+    BBehfDesc()
+        : bbeSuccs(nullptr)
+        , bbeCount(0)
     {
     }
 
@@ -1893,7 +2316,8 @@ struct BBehfDesc
 // BBEhfSuccList out-of-class-declaration implementations (here due to C++ ordering requirements).
 //
 
-inline BBEhfSuccList::BBEhfSuccList(BBehfDesc* bbeDesc) : m_bbeDesc(bbeDesc)
+inline BBEhfSuccList::BBEhfSuccList(BBehfDesc* bbeDesc)
+    : m_bbeDesc(bbeDesc)
 {
     assert(m_bbeDesc != nullptr);
     assert((m_bbeDesc->bbeSuccs != nullptr) || (m_bbeDesc->bbeCount == 0));
@@ -1909,11 +2333,12 @@ inline BBArrayIterator BBEhfSuccList::end() const
     return BBArrayIterator(m_bbeDesc->bbeSuccs + m_bbeDesc->bbeCount);
 }
 
-// BBSuccList out-of-class-declaration implementations
+// SuccList out-of-class-declaration implementations
 //
-inline BasicBlock::BBSuccList::BBSuccList(const BasicBlock* block)
+inline BasicBlock::SuccList::SuccList(const BasicBlock* block)
 {
     assert(block != nullptr);
+
     switch (block->bbKind)
     {
         case BBJ_THROW:
@@ -1930,24 +2355,24 @@ inline BasicBlock::BBSuccList::BBSuccList(const BasicBlock* block)
         case BBJ_EHCATCHRET:
         case BBJ_EHFILTERRET:
         case BBJ_LEAVE:
-            m_succs[0] = block->bbTarget;
+            m_succs[0] = block->GetTargetEdge();
             m_begin    = &m_succs[0];
             m_end      = &m_succs[1];
             break;
 
         case BBJ_COND:
-            m_succs[0] = block->bbFalseTarget;
+            m_succs[0] = block->GetFalseEdge();
             m_begin    = &m_succs[0];
 
             // If both fall-through and branch successors are identical, then only include
             // them once in the iteration (this is the same behavior as NumSucc()/GetSucc()).
-            if (block->TrueTargetIs(block->GetFalseTarget()))
+            if (block->TrueEdgeIs(block->GetFalseEdge()))
             {
                 m_end = &m_succs[1];
             }
             else
             {
-                m_succs[1] = block->bbTrueTarget;
+                m_succs[1] = block->GetTrueEdge();
                 m_end      = &m_succs[2];
             }
             break;
@@ -1983,16 +2408,6 @@ inline BasicBlock::BBSuccList::BBSuccList(const BasicBlock* block)
     assert(m_end >= m_begin);
 }
 
-inline BBArrayIterator BasicBlock::BBSuccList::begin() const
-{
-    return BBArrayIterator(m_begin);
-}
-
-inline BBArrayIterator BasicBlock::BBSuccList::end() const
-{
-    return BBArrayIterator(m_end);
-}
-
 // We have a simpler struct, BasicBlockList, which is simply a singly-linked
 // list of blocks.
 
@@ -2001,201 +2416,42 @@ struct BasicBlockList
     BasicBlockList* next;  // The next BasicBlock in the list, nullptr for end of list.
     BasicBlock*     block; // The BasicBlock of interest.
 
-    BasicBlockList() : next(nullptr), block(nullptr)
+    BasicBlockList()
+        : next(nullptr)
+        , block(nullptr)
     {
     }
 
-    BasicBlockList(BasicBlock* blk, BasicBlockList* rest) : next(rest), block(blk)
+    BasicBlockList(BasicBlock* blk, BasicBlockList* rest)
+        : next(rest)
+        , block(blk)
     {
     }
 };
 
-//-------------------------------------------------------------------------
-// FlowEdge -- control flow edge
-//
-// In compiler terminology the control flow between two BasicBlocks
-// is typically referred to as an "edge".  Most well known are the
-// backward branches for loops, which are often called "back-edges".
-//
-// "struct FlowEdge" is the type that represents our control flow edges.
-// This type is a linked list of zero or more "edges".
-// (The list of zero edges is represented by NULL.)
-// Every BasicBlock has a field called bbPreds of this type.  This field
-// represents the list of "edges" that flow into this BasicBlock.
-// The FlowEdge type only stores the BasicBlock* of the source for the
-// control flow edge.  The destination block for the control flow edge
-// is implied to be the block which contained the bbPreds field.
-//
-// For a switch branch target there may be multiple "edges" that have
-// the same source block (and destination block).  We need to count the
-// number of these edges so that during optimization we will know when
-// we have zero of them.  Rather than have extra FlowEdge entries we
-// track this via the DupCount property.
-//
-// When we have Profile weight for the BasicBlocks we can usually compute
-// the number of times each edge was executed by examining the adjacent
-// BasicBlock weights.  As we are doing for BasicBlocks, we call the number
-// of times that a control flow edge was executed the "edge weight".
-// In order to compute the edge weights we need to use a bounded range
-// for every edge weight. These two fields, 'flEdgeWeightMin' and 'flEdgeWeightMax'
-// are used to hold a bounded range.  Most often these will converge such
-// that both values are the same and that value is the exact edge weight.
-// Sometimes we are left with a rage of possible values between [Min..Max]
-// which represents an inexact edge weight.
-//
-// The bbPreds list is initially created by Compiler::fgLinkBasicBlocks()
-// and is incrementally kept up to date.
-//
-// The edge weight are computed by Compiler::fgComputeEdgeWeights()
-// the edge weights are used to straighten conditional branches
-// by Compiler::fgReorderBlocks()
-//
-struct FlowEdge
-{
-private:
-    // The next predecessor edge in the list, nullptr for end of list.
-    FlowEdge* m_nextPredEdge;
-
-    // The source of the control flow
-    BasicBlock* m_sourceBlock;
-
-    // The destination of the control flow
-    BasicBlock* m_destBlock;
-
-    // Edge weights
-    weight_t m_edgeWeightMin;
-    weight_t m_edgeWeightMax;
-
-    // Likelihood that m_sourceBlock transfers control along this edge.
-    // Values in range [0..1]
-    weight_t m_likelihood;
-
-    // The count of duplicate "edges" (used for switch stmts or degenerate branches)
-    unsigned m_dupCount;
-
-    // True if likelihood has been set
-    bool m_likelihoodSet;
-
-public:
-    FlowEdge(BasicBlock* sourceBlock, BasicBlock* destBlock, FlowEdge* rest)
-        : m_nextPredEdge(rest)
-        , m_sourceBlock(sourceBlock)
-        , m_destBlock(destBlock)
-        , m_edgeWeightMin(0)
-        , m_edgeWeightMax(0)
-        , m_likelihood(0)
-        , m_dupCount(0)
-        , m_likelihoodSet(false)
-    {
-    }
-
-    FlowEdge* getNextPredEdge() const
-    {
-        return m_nextPredEdge;
-    }
-
-    FlowEdge** getNextPredEdgeRef()
-    {
-        return &m_nextPredEdge;
-    }
-
-    void setNextPredEdge(FlowEdge* newEdge)
-    {
-        m_nextPredEdge = newEdge;
-    }
-
-    BasicBlock* getSourceBlock() const
-    {
-        assert(m_sourceBlock != nullptr);
-        return m_sourceBlock;
-    }
-
-    void setSourceBlock(BasicBlock* newBlock)
-    {
-        assert(newBlock != nullptr);
-        m_sourceBlock = newBlock;
-    }
-
-    BasicBlock* getDestinationBlock() const
-    {
-        assert(m_destBlock != nullptr);
-        return m_destBlock;
-    }
-
-    void setDestinationBlock(BasicBlock* newBlock)
-    {
-        assert(newBlock != nullptr);
-        m_destBlock = newBlock;
-    }
-
-    weight_t edgeWeightMin() const
-    {
-        return m_edgeWeightMin;
-    }
-
-    weight_t edgeWeightMax() const
-    {
-        return m_edgeWeightMax;
-    }
+// FlowEdge implementations (that are required to be defined after the declaration of BasicBlock)
 
-    // These two methods are used to set new values for edge weights.
-    // They return false if the newWeight is not between the current [min..max]
-    // when slop is non-zero we allow for the case where our weights might be off by 'slop'
-    //
-    bool setEdgeWeightMinChecked(weight_t newWeight, BasicBlock* bDst, weight_t slop, bool* wbUsedSlop);
-    bool setEdgeWeightMaxChecked(weight_t newWeight, BasicBlock* bDst, weight_t slop, bool* wbUsedSlop);
-    void setEdgeWeights(weight_t newMinWeight, weight_t newMaxWeight, BasicBlock* bDst);
-
-    weight_t getLikelihood() const
-    {
-        return m_likelihood;
-    }
-
-    void setLikelihood(weight_t likelihood)
-    {
-        assert(likelihood >= 0.0);
-        assert(likelihood <= 1.0);
-        m_likelihoodSet = true;
-        m_likelihood    = likelihood;
-    }
-
-    void clearLikelihood()
-    {
-        m_likelihood    = 0.0;
-        m_likelihoodSet = false;
-    }
-
-    bool hasLikelihood() const
-    {
-        return m_likelihoodSet;
-    }
-
-    weight_t getLikelyWeight() const
-    {
-        assert(m_likelihoodSet);
-        return m_likelihood * m_sourceBlock->bbWeight;
-    }
-
-    unsigned getDupCount() const
-    {
-        return m_dupCount;
-    }
+inline weight_t FlowEdge::getLikelyWeight() const
+{
+    assert(m_likelihoodSet);
+    return m_likelihood * m_sourceBlock->bbWeight;
+}
 
-    void incrementDupCount()
-    {
-        m_dupCount++;
-    }
+// BasicBlock iterator implementations (that are required to be defined after the declaration of FlowEdge)
 
-    void decrementDupCount()
-    {
-        assert(m_dupCount >= 1);
-        m_dupCount--;
-    }
-};
+inline BasicBlock* BBArrayIterator::operator*() const
+{
+    assert(m_edgeEntry != nullptr);
+    FlowEdge* edgeTarget = *m_edgeEntry;
+    assert(edgeTarget != nullptr);
+    assert(edgeTarget->getDestinationBlock() != nullptr);
+    return edgeTarget->getDestinationBlock();
+}
 
 // Pred list iterator implementations (that are required to be defined after the declaration of BasicBlock and FlowEdge)
 
-inline PredEdgeList::iterator::iterator(FlowEdge* pred) : m_pred(pred)
+inline PredEdgeList::iterator::iterator(FlowEdge* pred)
+    : m_pred(pred)
 {
 #ifdef DEBUG
     m_next = (m_pred == nullptr) ? nullptr : m_pred->getNextPredEdge();
@@ -2216,29 +2472,46 @@ inline PredEdgeList::iterator& PredEdgeList::iterator::operator++()
     return *this;
 }
 
-inline PredBlockList::iterator::iterator(FlowEdge* pred) : m_pred(pred)
+template <bool allowEdits>
+inline PredBlockList<allowEdits>::iterator::iterator(FlowEdge* pred)
+    : m_pred(pred)
 {
-#ifdef DEBUG
-    m_next = (m_pred == nullptr) ? nullptr : m_pred->getNextPredEdge();
-#endif
+    bool initNextPointer = allowEdits;
+    INDEBUG(initNextPointer = true);
+    if (initNextPointer)
+    {
+        m_next = (m_pred == nullptr) ? nullptr : m_pred->getNextPredEdge();
+    }
 }
 
-inline BasicBlock* PredBlockList::iterator::operator*() const
+template <bool allowEdits>
+inline BasicBlock* PredBlockList<allowEdits>::iterator::operator*() const
 {
     return m_pred->getSourceBlock();
 }
 
-inline PredBlockList::iterator& PredBlockList::iterator::operator++()
+template <bool allowEdits>
+inline typename PredBlockList<allowEdits>::iterator& PredBlockList<allowEdits>::iterator::operator++()
 {
-    FlowEdge* next = m_pred->getNextPredEdge();
+    if (allowEdits)
+    {
+        // For editing iterators, m_next is always used and maintained
+        m_pred = m_next;
+        m_next = (m_next == nullptr) ? nullptr : m_next->getNextPredEdge();
+    }
+    else
+    {
+        FlowEdge* next = m_pred->getNextPredEdge();
 
 #ifdef DEBUG
-    // Check that the next block is the one we expect to see.
-    assert(next == m_next);
-    m_next = (next == nullptr) ? nullptr : next->getNextPredEdge();
+        // If allowEdits=false, check that the next block is the one we expect to see.
+        assert(next == m_next);
+        m_next = (m_next == nullptr) ? nullptr : m_next->getNextPredEdge();
 #endif // DEBUG
 
-    m_pred = next;
+        m_pred = next;
+    }
+
     return *this;
 }
 
@@ -2256,7 +2529,8 @@ void* emitCodeGetCookie(const BasicBlock* block);
 class AllSuccessorEnumerator
 {
     BasicBlock* m_block;
-    union {
+    union
+    {
         // We store up to 4 successors inline in the enumerator. For ASP.NET
         // and libraries.pmi this is enough in 99.7% of cases.
         BasicBlock*  m_successors[4];
diff --git a/src/coreclr/jit/blockset.h b/src/coreclr/jit/blockset.h
index 83de7a5dad1e..f69e1e59ace3 100644
--- a/src/coreclr/jit/blockset.h
+++ b/src/coreclr/jit/blockset.h
@@ -24,10 +24,11 @@
 #include "compilerbitsettraits.h"
 #include "bitsetasshortlong.h"
 
-class BlockSetOps : public BitSetOps</*BitSetType*/ BitSetShortLongRep,
-                                     /*Brand*/ BSShortLong,
-                                     /*Env*/ Compiler*,
-                                     /*BitSetTraits*/ BasicBlockBitSetTraits>
+class BlockSetOps
+    : public BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                       /*Brand*/ BSShortLong,
+                       /*Env*/ Compiler*,
+                       /*BitSetTraits*/ BasicBlockBitSetTraits>
 {
 public:
     // Specialize BlockSetOps::MakeFull(). Since we number basic blocks from one, we remove bit zero from
diff --git a/src/coreclr/jit/buildstring.cpp b/src/coreclr/jit/buildstring.cpp
index f432fec47475..3f0222ad2649 100644
--- a/src/coreclr/jit/buildstring.cpp
+++ b/src/coreclr/jit/buildstring.cpp
@@ -1,9 +1,9 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-#define STRINGIFY(L) #L
+#define STRINGIFY(L)     #L
 #define MAKESTRING(M, L) M(L)
-#define STRINGIZE(X) MAKESTRING(STRINGIFY, X)
+#define STRINGIZE(X)     MAKESTRING(STRINGIFY, X)
 
 #if defined(__clang__)
 #define BUILD_COMPILER                                                                                                 \
diff --git a/src/coreclr/jit/clrjit.natvis b/src/coreclr/jit/clrjit.natvis
index 95dd3dc30568..cfbc6a181e97 100644
--- a/src/coreclr/jit/clrjit.natvis
+++ b/src/coreclr/jit/clrjit.natvis
@@ -8,9 +8,8 @@ The .NET Foundation licenses this file to you under the MIT license.
 <!--
 Visual Studio debugger visualizers for RyuJIT.
 
-Documentation for VS natvis format: https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects?view=vs-2019
-
-Documentation for VS debugger format specifiers: https://docs.microsoft.com/en-us/visualstudio/debugger/format-specifiers-in-cpp?view=vs-2019
+Documentation for VS natvis format: https://learn.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects?view=vs-2022
+Documentation for VS debugger format specifiers: https://learn.microsoft.com/en-us/visualstudio/debugger/format-specifiers-in-cpp?view=vs-2022
 -->
 
 <AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
@@ -21,12 +20,17 @@ Documentation for VS debugger format specifiers: https://docs.microsoft.com/en-u
   </Type>
 
   <Type Name="BasicBlock">
-    <DisplayString Condition="bbKind==BBJ_COND || bbKind==BBJ_ALWAYS || bbKind==BBJ_LEAVE || bbKind==BBJ_EHCATCHRET || bbKind==BBJ_CALLFINALLY || bbKind==BBJ_CALLFINALLYRET || bbKind==BBJ_EHFILTERRET">BB{bbNum,d}->BB{bbTarget->bbNum,d}; {bbKind,en}</DisplayString>
+    <DisplayString Condition="bbKind==BBJ_COND || bbKind==BBJ_ALWAYS || bbKind==BBJ_LEAVE || bbKind==BBJ_EHCATCHRET || bbKind==BBJ_CALLFINALLY || bbKind==BBJ_CALLFINALLYRET || bbKind==BBJ_EHFILTERRET">BB{bbNum,d}->BB{bbTargetEdge->m_destBlock->bbNum,d}; {bbKind,en}</DisplayString>
     <DisplayString Condition="bbKind==BBJ_SWITCH">BB{bbNum,d}; {bbKind,en}; {bbSwtTargets->bbsCount} cases</DisplayString>
     <DisplayString Condition="bbKind==BBJ_EHFINALLYRET">BB{bbNum,d}; {bbKind,en}; {bbEhfTargets->bbeCount} succs</DisplayString>
     <DisplayString>BB{bbNum,d}; {bbKind,en}</DisplayString>
   </Type>
 
+  <Type Name="FlowEdge">
+    <DisplayString Condition="m_dupCount!=1">BB{m_sourceBlock->bbNum,d}->BB{m_destBlock->bbNum,d} ({m_likelihood,g}) (dup {m_dupCount,d})</DisplayString>
+    <DisplayString>BB{m_sourceBlock->bbNum,d}->BB{m_destBlock->bbNum,d} ({m_likelihood,g})</DisplayString>
+  </Type>
+
   <Type Name="Compiler::LoopDsc">
     <DisplayString Condition="lpFlags &amp; LPFLG_REMOVED">REMOVED</DisplayString>
     <DisplayString Condition="lpFlags &amp; LPFLG_HAS_PREHEAD">[BB{lpTop->bbNum,d}..BB{lpBottom->bbNum,d}] pre-h:BB{lpHead->bbNum,d} e:BB{lpEntry->bbNum,d} {lpFlags,en}</DisplayString>
@@ -86,6 +90,11 @@ Documentation for VS debugger format specifiers: https://docs.microsoft.com/en-u
     <DisplayString>{gtTreeID, d}: [{gtOper,en}, {gtType,en} V{((GenTreeLclFld*)this)-&gt;_gtLclNum,u}[+{((GenTreeLclFld*)this)-&gt;m_lclOffs,u}]]</DisplayString>
   </Type>
 
+  <!-- Scalar evolution -->
+  <Type Name="Scev">
+    <DisplayString>[{Oper,en}, {Type,en}]</DisplayString>
+  </Type>
+
   <!-- Register allocation -->
   <Type Name="LinearScan">
     <DisplayString>LinearScan</DisplayString>
@@ -169,6 +178,7 @@ Documentation for VS debugger format specifiers: https://docs.microsoft.com/en-u
   </Type>
 
   <Type Name="Interval">
+    <DisplayString Condition="this->isUpperVector">[U{this->relatedInterval->varNum,d}, #{this->intervalIndex, d}, reg={(regNumber)physReg, en}]</DisplayString>
     <DisplayString Condition="this->isLocalVar">[V{this->varNum,d}, #{this->intervalIndex, d}, reg={(regNumber)physReg, en}]</DisplayString>
     <DisplayString Condition="this->isConstant">[C{this->intervalIndex, d}, reg={(regNumber)physReg, en}]</DisplayString>
     <DisplayString>[I{this->intervalIndex, d}, reg={(regNumber)physReg, en}]</DisplayString>
diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index 7a43d08cd40f..161180a5fcf7 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -101,9 +101,7 @@ class CodeGen final : public CodeGenInterface
         }
     }
 
-    static bool genShouldRoundFP();
-
-    static GenTreeIndir indirForm(var_types type, GenTree* base);
+    static GenTreeIndir    indirForm(var_types type, GenTree* base);
     static GenTreeStoreInd storeIndirForm(var_types type, GenTree* base, GenTree* data);
 
     GenTreeIntCon intForm(var_types type, ssize_t value);
@@ -175,12 +173,11 @@ class CodeGen final : public CodeGenInterface
     // the GC info.  Requires "codeSize" to be the size of the generated code, "prologSize" and "epilogSize"
     // to be the sizes of the prolog and epilog, respectively.  In DEBUG, makes a check involving the
     // "codePtr", assumed to be a pointer to the start of the generated code.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef JIT32_GCENCODER
     void* genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr));
-    void* genCreateAndStoreGCInfoJIT32(unsigned codeSize,
-                                       unsigned prologSize,
+    void* genCreateAndStoreGCInfoJIT32(unsigned            codeSize,
+                                       unsigned            prologSize,
                                        unsigned epilogSize DEBUGARG(void* codePtr));
 #else  // !JIT32_GCENCODER
     void genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr));
@@ -208,7 +205,7 @@ class CodeGen final : public CodeGenInterface
     unsigned genCurDispOffset;
 
     static const char* genInsName(instruction ins);
-    const char* genInsDisplayName(emitter::instrDesc* id);
+    const char*        genInsDisplayName(emitter::instrDesc* id);
 
     static const char* genSizeStr(emitAttr size);
 
@@ -276,6 +273,9 @@ class CodeGen final : public CodeGenInterface
 #else
     void genEnregisterOSRArgsAndLocals();
 #endif
+
+    void genHomeSwiftStructParameters(bool handleStack);
+
     void genCheckUseBlockInit();
 #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
     void genClearStackVec3ArgUpperBits();
@@ -319,11 +319,17 @@ class CodeGen final : public CodeGenInterface
         regNumber reg2;
         bool      useSaveNextPair;
 
-        RegPair(regNumber reg1) : reg1(reg1), reg2(REG_NA), useSaveNextPair(false)
+        RegPair(regNumber reg1)
+            : reg1(reg1)
+            , reg2(REG_NA)
+            , useSaveNextPair(false)
         {
         }
 
-        RegPair(regNumber reg1, regNumber reg2) : reg1(reg1), reg2(reg2), useSaveNextPair(false)
+        RegPair(regNumber reg1, regNumber reg2)
+            : reg1(reg1)
+            , reg2(reg2)
+            , useSaveNextPair(false)
         {
             assert(reg2 == REG_NEXT(reg1));
         }
@@ -366,8 +372,8 @@ class CodeGen final : public CodeGenInterface
 
     bool genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg);
 
-    void genPushFltRegs(regMaskTP regMask);
-    void genPopFltRegs(regMaskTP regMask);
+    void      genPushFltRegs(regMaskTP regMask);
+    void      genPopFltRegs(regMaskTP regMask);
     regMaskTP genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat);
 
     regMaskTP genJmpCallArgMask();
@@ -476,8 +482,6 @@ class CodeGen final : public CodeGenInterface
     // Save/Restore callee saved float regs to stack
     void genPreserveCalleeSavedFltRegs(unsigned lclFrameSize);
     void genRestoreCalleeSavedFltRegs(unsigned lclFrameSize);
-    // Generate VZeroupper instruction to avoid AVX/SSE transition penalty
-    void genVzeroupperIfNeeded(bool check256bitOnly = true);
 
 #endif // TARGET_XARCH
 
@@ -527,7 +531,6 @@ class CodeGen final : public CodeGenInterface
     //
     // Epilog functions
     //
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_ARM)
     bool genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog);
@@ -556,8 +559,6 @@ class CodeGen final : public CodeGenInterface
     void genFnProlog();
     void genFnEpilog(BasicBlock* block);
 
-#if defined(FEATURE_EH_FUNCLETS)
-
     void genReserveFuncletProlog(BasicBlock* block);
     void genReserveFuncletEpilog(BasicBlock* block);
     void genFuncletProlog(BasicBlock* block);
@@ -640,16 +641,6 @@ class CodeGen final : public CodeGenInterface
 
     void genUpdateCurrentFunclet(BasicBlock* block);
 
-#else // !FEATURE_EH_FUNCLETS
-
-    // This is a no-op when there are no funclets!
-    void genUpdateCurrentFunclet(BasicBlock* block)
-    {
-        return;
-    }
-
-#endif // !FEATURE_EH_FUNCLETS
-
     void genGeneratePrologsAndEpilogs();
 
 #if defined(DEBUG)
@@ -683,17 +674,17 @@ class CodeGen final : public CodeGenInterface
     void      genSinglePush();
     void      genSinglePop();
     regMaskTP genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs);
-    void genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs);
-
-/*
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XX                                                                           XX
-XX                           Debugging Support                               XX
-XX                                                                           XX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-*/
+    void      genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs);
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           Debugging Support                               XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
 
 #ifdef DEBUG
     void genIPmappingDisp(unsigned mappingNum, const IPmappingDsc* ipMapping);
@@ -744,9 +735,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void siOpenScopesForNonTrackedVars(const BasicBlock* block, unsigned int lastBlockILEndOffset);
 
 protected:
-#if defined(FEATURE_EH_FUNCLETS)
     bool siInFuncletRegion; // Have we seen the start of the funclet region?
-#endif                      // FEATURE_EH_FUNCLETS
 
     IL_OFFSET siLastEndOffs; // IL offset of the (exclusive) end of the last block processed
 
@@ -943,7 +932,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genCompareFloat(GenTree* treeNode);
     void genCompareInt(GenTree* treeNode);
 #ifdef TARGET_XARCH
-    bool genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opType);
+    bool     genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opType);
     GenTree* genTryFindFlagsConsumer(GenTree* flagsProducer, GenCondition** condition);
 #endif
 
@@ -978,13 +967,23 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #ifdef FEATURE_HW_INTRINSICS
     void genHWIntrinsic(GenTreeHWIntrinsic* node);
 #if defined(TARGET_XARCH)
-    void genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, regNumber reg, GenTree* rmOp);
+    void genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node,
+                             instruction         ins,
+                             emitAttr            attr,
+                             regNumber           reg,
+                             GenTree*            rmOp,
+                             insOpts             instOptions = INS_OPTS_NONE);
     void genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival);
     void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, insOpts instOptions);
     void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival);
     void genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr);
-    void genHWIntrinsic_R_R_R_RM(
-        instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTree* op3);
+    void genHWIntrinsic_R_R_R_RM(instruction ins,
+                                 emitAttr    attr,
+                                 regNumber   targetReg,
+                                 regNumber   op1Reg,
+                                 regNumber   op2Reg,
+                                 GenTree*    op3,
+                                 insOpts     instOptions = INS_OPTS_NONE);
     void genHWIntrinsic_R_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival);
 
     void genBaseIntrinsic(GenTreeHWIntrinsic* node);
@@ -996,7 +995,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
     void genAESIntrinsic(GenTreeHWIntrinsic* node);
     void genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
-    void genFMAIntrinsic(GenTreeHWIntrinsic* node);
+    void genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
     void genPermuteVar2x(GenTreeHWIntrinsic* node);
     void genLZCNTIntrinsic(GenTreeHWIntrinsic* node);
     void genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node);
@@ -1010,6 +1009,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
                                          regNumber                 baseReg,
                                          regNumber                 offsReg,
                                          HWIntrinsicSwitchCaseBody emitSwCase);
+
+    void genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* node, GenTree* lastOp);
 #endif // defined(TARGET_XARCH)
 
 #ifdef TARGET_ARM64
@@ -1104,12 +1105,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genSpillLocal(unsigned varNum, var_types type, GenTreeLclVar* lclNode, regNumber regNum);
     void genUnspillLocal(
         unsigned varNum, var_types type, GenTreeLclVar* lclNode, regNumber regNum, bool reSpill, bool isLastUse);
-    void genUnspillRegIfNeeded(GenTree* tree);
-    void genUnspillRegIfNeeded(GenTree* tree, unsigned multiRegIndex);
+    void      genUnspillRegIfNeeded(GenTree* tree);
+    void      genUnspillRegIfNeeded(GenTree* tree, unsigned multiRegIndex);
     regNumber genConsumeReg(GenTree* tree);
     regNumber genConsumeReg(GenTree* tree, unsigned multiRegIndex);
-    void genCopyRegIfNeeded(GenTree* tree, regNumber needReg);
-    void genConsumeRegAndCopy(GenTree* tree, regNumber needReg);
+    void      genCopyRegIfNeeded(GenTree* tree, regNumber needReg);
+    void      genConsumeRegAndCopy(GenTree* tree, regNumber needReg);
 
     void genConsumeIfReg(GenTree* tree)
     {
@@ -1119,15 +1120,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         }
     }
 
-    void genRegCopy(GenTree* tree);
+    void      genRegCopy(GenTree* tree);
     regNumber genRegCopy(GenTree* tree, unsigned multiRegIndex);
-    void genTransferRegGCState(regNumber dst, regNumber src);
-    void genConsumeAddress(GenTree* addr);
-    void genConsumeAddrMode(GenTreeAddrMode* mode);
-    void genSetBlockSize(GenTreeBlk* blkNode, regNumber sizeReg);
-    void genConsumeBlockSrc(GenTreeBlk* blkNode);
-    void genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg);
-    void genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
+    void      genTransferRegGCState(regNumber dst, regNumber src);
+    void      genConsumeAddress(GenTree* addr);
+    void      genConsumeAddrMode(GenTreeAddrMode* mode);
+    void      genSetBlockSize(GenTreeBlk* blkNode, regNumber sizeReg);
+    void      genConsumeBlockSrc(GenTreeBlk* blkNode);
+    void      genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg);
+    void      genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
 
 #ifdef FEATURE_PUT_STRUCT_ARG_STK
     void genConsumePutStructArgStk(GenTreePutArgStk* putArgStkNode,
@@ -1172,10 +1173,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genCodeForCpObj(GenTreeBlk* cpObjNode);
     void genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode);
     void genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode);
-#ifndef TARGET_X86
-    void genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode);
-#endif
     void genCodeForPhysReg(GenTreePhysReg* tree);
+#ifdef SWIFT_SUPPORT
+    void genCodeForSwiftErrorReg(GenTree* tree);
+#endif // SWIFT_SUPPORT
     void genCodeForNullCheck(GenTreeIndir* tree);
     void genCodeForCmpXchg(GenTreeCmpXchg* tree);
     void genCodeForReuseVal(GenTree* treeNode);
@@ -1235,10 +1236,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     unsigned genMove4IfNeeded(unsigned size, regNumber tmpReg, GenTree* src, unsigned offset);
     unsigned genMove2IfNeeded(unsigned size, regNumber tmpReg, GenTree* src, unsigned offset);
     unsigned genMove1IfNeeded(unsigned size, regNumber tmpReg, GenTree* src, unsigned offset);
-    void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
-    void genStoreRegToStackArg(var_types type, regNumber reg, int offset);
-    void genStructPutArgRepMovs(GenTreePutArgStk* putArgStkNode);
-    void genStructPutArgUnroll(GenTreePutArgStk* putArgStkNode);
+    void     genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
+    void     genStoreRegToStackArg(var_types type, regNumber reg, int offset);
+    void     genStructPutArgRepMovs(GenTreePutArgStk* putArgStkNode);
+    void     genStructPutArgUnroll(GenTreePutArgStk* putArgStkNode);
 #ifdef TARGET_X86
     void genStructPutArgPush(GenTreePutArgStk* putArgStkNode);
 #else
@@ -1246,15 +1247,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #endif
 #endif // FEATURE_PUT_STRUCT_ARG_STK
 
-    void genCodeForStoreBlk(GenTreeBlk* storeBlkNode);
-#ifndef TARGET_X86
-    void genCodeForInitBlkHelper(GenTreeBlk* initBlkNode);
-#endif
-    void genCodeForInitBlkLoop(GenTreeBlk* initBlkNode);
-    void genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode);
-    void genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode);
-    void genJumpTable(GenTree* tree);
-    void genTableBasedSwitch(GenTree* tree);
+    void     genCodeForStoreBlk(GenTreeBlk* storeBlkNode);
+    void     genCodeForInitBlkLoop(GenTreeBlk* initBlkNode);
+    void     genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode);
+    void     genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode);
+    unsigned genEmitJumpTable(GenTree* treeNode, bool relativeAddr);
+    void     genJumpTable(GenTree* tree);
+    void     genTableBasedSwitch(GenTree* tree);
 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
     instruction genGetInsForOper(GenTree* treeNode);
 #else
@@ -1264,13 +1263,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
                                       regNumber     targetReg,
                                       GenTreeIndir* indir,
                                       bool*         needsBarrier);
-    bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data);
-    GenTree* getCallTarget(const GenTreeCall* call, CORINFO_METHOD_HANDLE* methHnd);
-    regNumber getCallIndirectionCellReg(GenTreeCall* call);
-    void genCall(GenTreeCall* call);
-    void genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackArgBytes));
-    void genDefinePendingCallLabel(GenTreeCall* call);
-    void genJmpMethod(GenTree* jmp);
+    bool        genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data);
+    GenTree*    getCallTarget(const GenTreeCall* call, CORINFO_METHOD_HANDLE* methHnd);
+    regNumber   getCallIndirectionCellReg(GenTreeCall* call);
+    void        genCall(GenTreeCall* call);
+    void        genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackArgBytes));
+    void        genDefinePendingCallLabel(GenTreeCall* call);
+    void        genJmpMethod(GenTree* jmp);
     BasicBlock* genCallFinally(BasicBlock* block);
 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
     // TODO: refactor for LA.
@@ -1281,11 +1280,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genCodeForBfiz(GenTreeOp* tree);
 #endif // TARGET_ARM64
 
-#if defined(FEATURE_EH_FUNCLETS)
     void genEHCatchRet(BasicBlock* block);
-#else  // !FEATURE_EH_FUNCLETS
+#if defined(FEATURE_EH_WINDOWS_X86)
     void genEHFinallyOrFilterRet(BasicBlock* block);
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
     void genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode);
     void genMultiRegStoreToLocal(GenTreeLclVar* lclNode);
@@ -1312,13 +1310,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genReturn(GenTree* treeNode);
 
 #ifdef TARGET_XARCH
-    void genStackPointerConstantAdjustment(ssize_t spDelta, bool trackSpAdjustments);
-    void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool trackSpAdjustments);
+    void           genStackPointerConstantAdjustment(ssize_t spDelta, bool trackSpAdjustments);
+    void           genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool trackSpAdjustments);
     target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool trackSpAdjustments);
-    void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta);
+    void           genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta);
 #else  // !TARGET_XARCH
-    void genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp);
-    void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp);
+    void           genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp);
+    void           genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp);
     target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp);
 #endif // !TARGET_XARCH
 
@@ -1352,8 +1350,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 #ifdef DEBUG
     GenTree* lastConsumedNode;
-    void genNumberOperandUse(GenTree* const operand, int& useNum) const;
-    void genCheckConsumeNode(GenTree* const node);
+    void     genNumberOperandUse(GenTree* const operand, int& useNum) const;
+    void     genCheckConsumeNode(GenTree* const node);
 #else  // !DEBUG
     inline void genCheckConsumeNode(GenTree* treeNode)
     {
@@ -1431,7 +1429,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 #if defined(TARGET_XARCH)
 
-    enum class OperandKind{
+    enum class OperandKind
+    {
         ClsVar, // [CLS_VAR_ADDR]                 - "C" in the emitter.
         Local,  // [Local or spill temp + offset] - "S" in the emitter.
         Indir,  // [base+index*scale+disp]        - "A" in the emitter.
@@ -1442,7 +1441,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     class OperandDesc
     {
         OperandKind m_kind;
-        union {
+        union
+        {
             struct
             {
                 CORINFO_FIELD_HANDLE m_fieldHnd;
@@ -1470,30 +1470,45 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         };
 
     public:
-        OperandDesc(CORINFO_FIELD_HANDLE fieldHnd) : m_kind(OperandKind::ClsVar), m_fieldHnd(fieldHnd)
+        OperandDesc(CORINFO_FIELD_HANDLE fieldHnd)
+            : m_kind(OperandKind::ClsVar)
+            , m_fieldHnd(fieldHnd)
         {
         }
 
-        OperandDesc(int varNum, uint16_t offset) : m_kind(OperandKind::Local), m_varNum(varNum), m_offset(offset)
+        OperandDesc(int varNum, uint16_t offset)
+            : m_kind(OperandKind::Local)
+            , m_varNum(varNum)
+            , m_offset(offset)
         {
         }
 
         OperandDesc(GenTreeIndir* indir)
-            : m_kind(OperandKind::Indir), m_addr(indir->Addr()), m_indir(indir), m_indirType(indir->TypeGet())
+            : m_kind(OperandKind::Indir)
+            , m_addr(indir->Addr())
+            , m_indir(indir)
+            , m_indirType(indir->TypeGet())
         {
         }
 
         OperandDesc(var_types indirType, GenTree* addr)
-            : m_kind(OperandKind::Indir), m_addr(addr), m_indir(nullptr), m_indirType(indirType)
+            : m_kind(OperandKind::Indir)
+            , m_addr(addr)
+            , m_indir(nullptr)
+            , m_indirType(indirType)
         {
         }
 
         OperandDesc(ssize_t immediate, bool immediateNeedsReloc)
-            : m_kind(OperandKind::Imm), m_immediate(immediate), m_immediateNeedsReloc(immediateNeedsReloc)
+            : m_kind(OperandKind::Imm)
+            , m_immediate(immediate)
+            , m_immediateNeedsReloc(immediateNeedsReloc)
         {
         }
 
-        OperandDesc(regNumber reg) : m_kind(OperandKind::Reg), m_reg(reg)
+        OperandDesc(regNumber reg)
+            : m_kind(OperandKind::Reg)
+            , m_reg(reg)
         {
         }
 
@@ -1683,7 +1698,9 @@ class CodeGenPhase final : public Phase
 {
 public:
     CodeGenPhase(CodeGen* _codeGen, Phases _phase, void (CodeGen::*_action)())
-        : Phase(_codeGen->GetCompiler(), _phase), codeGen(_codeGen), action(_action)
+        : Phase(_codeGen->GetCompiler(), _phase)
+        , codeGen(_codeGen)
+        , action(_action)
     {
     }
 
diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp
index 4a8c08a89858..2c010f116a26 100644
--- a/src/coreclr/jit/codegenarm.cpp
+++ b/src/coreclr/jit/codegenarm.cpp
@@ -173,9 +173,9 @@ void CodeGen::genEHCatchRet(BasicBlock* block)
 //------------------------------------------------------------------------
 // instGen_Set_Reg_To_Imm: Move an immediate value into an integer register.
 //
-void CodeGen::instGen_Set_Reg_To_Imm(emitAttr  size,
-                                     regNumber reg,
-                                     ssize_t   imm,
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr       size,
+                                     regNumber      reg,
+                                     ssize_t        imm,
                                      insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
 {
     // reg cannot be a FP register
@@ -647,29 +647,7 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode)
 //
 void CodeGen::genJumpTable(GenTree* treeNode)
 {
-    noway_assert(compiler->compCurBB->KindIs(BBJ_SWITCH));
-    assert(treeNode->OperGet() == GT_JMPTABLE);
-
-    unsigned     jumpCount = compiler->compCurBB->GetSwitchTargets()->bbsCount;
-    BasicBlock** jumpTable = compiler->compCurBB->GetSwitchTargets()->bbsDstTab;
-    unsigned     jmpTabBase;
-
-    jmpTabBase = GetEmitter()->emitBBTableDataGenBeg(jumpCount, false);
-
-    JITDUMP("\n      J_M%03u_DS%02u LABEL   DWORD\n", compiler->compMethodID, jmpTabBase);
-
-    for (unsigned i = 0; i < jumpCount; i++)
-    {
-        BasicBlock* target = *jumpTable++;
-        noway_assert(target->HasFlag(BBF_HAS_LABEL));
-
-        JITDUMP("            DD      L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum);
-
-        GetEmitter()->emitDataGenData(i, target);
-    }
-
-    GetEmitter()->emitDataGenEnd();
-
+    unsigned jmpTabBase = genEmitJumpTable(treeNode, false);
     genMov32RelocatableDataLabel(jmpTabBase, treeNode->GetRegNum());
 
     genProduceReg(treeNode);
@@ -1673,7 +1651,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
                                    callTargetReg, // ireg
                                    REG_NA, 0, 0,  // xreg, xmul, disp
                                    false          // isJump
-                                   );
+        );
     }
     else
     {
@@ -1682,7 +1660,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
                                    gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), REG_NA, REG_NA, 0,
                                    0,    /* ilOffset, ireg, xreg, xmul, disp */
                                    false /* isJump */
-                                   );
+        );
     }
 
     regSet.verifyRegistersUsed(RBM_CALLEE_TRASH);
@@ -1732,7 +1710,10 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
                       0,           // argSize. Again, we have to lie about it
                       EA_UNKNOWN); // retSize
 
-    if (initReg == argReg)
+    // If initReg is trashed, either because it was an arg to the enter
+    // callback, or because the enter callback itself trashes it, then it needs
+    // to be zero'ed again before using.
+    if (((RBM_PROFILER_ENTER_TRASH | RBM_PROFILER_ENTER_ARG) & genRegMask(initReg)) != RBM_NONE)
     {
         *pInitRegZeroed = false;
     }
@@ -2171,7 +2152,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
 {
     assert(compiler->compGeneratingEpilog);
 
-    regMaskTP maskPopRegs      = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+    regMaskTP maskPopRegs      = regSet.rsGetModifiedCalleeSavedRegsMask();
     regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT;
     regMaskTP maskPopRegsInt   = maskPopRegs & ~maskPopRegsFloat;
 
diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp
index 4efe4a235f6a..cd1b1558d93e 100644
--- a/src/coreclr/jit/codegenarm64.cpp
+++ b/src/coreclr/jit/codegenarm64.cpp
@@ -36,7 +36,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
 {
     assert(compiler->compGeneratingEpilog);
 
-    regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+    regMaskTP rsRestoreRegs = regSet.rsGetModifiedCalleeSavedRegsMask();
 
     if (isFramePointerUsed())
     {
@@ -432,7 +432,7 @@ void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool*
     {
         // spDelta is negative in the prolog, positive in the epilog, but we always tell the unwind codes the positive
         // value.
-        ssize_t  spDeltaAbs    = abs(spDelta);
+        ssize_t  spDeltaAbs    = std::abs(spDelta);
         unsigned unwindSpDelta = (unsigned)spDeltaAbs;
         assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned
 
@@ -1884,8 +1884,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
 
         if (compiler->lvaPSPSym != BAD_VAR_NUM)
         {
-            if (CallerSP_to_PSP_slot_delta !=
-                compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
+            if (CallerSP_to_PSP_slot_delta != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for
+                                                                                                           // debugging
             {
                 printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
                        compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
@@ -2216,9 +2216,9 @@ void CodeGen::genEHCatchRet(BasicBlock* block)
 
 //  move an immediate value into an integer register
 
-void CodeGen::instGen_Set_Reg_To_Imm(emitAttr  size,
-                                     regNumber reg,
-                                     ssize_t   imm,
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr       size,
+                                     regNumber      reg,
+                                     ssize_t        imm,
                                      insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
 {
     // reg cannot be a FP register
@@ -2333,6 +2333,11 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
             if (con->ImmedValNeedsReloc(compiler))
             {
                 attr = EA_SET_FLG(attr, EA_CNS_RELOC_FLG);
+                if (tree->IsTlsIconHandle())
+                {
+                    // no need to generate because we generate it as part of GT_CALL
+                    break;
+                }
             }
 
             if (targetType == TYP_BYREF)
@@ -3621,7 +3626,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
     unsigned     slots  = layout->GetSlotCount();
 
     // Temp register(s) used to perform the sequence of loads and stores.
-    regNumber tmpReg  = cpObjNode->ExtractTempReg();
+    regNumber tmpReg  = cpObjNode->ExtractTempReg(RBM_ALLINT);
     regNumber tmpReg2 = REG_NA;
 
     assert(genIsValidIntReg(tmpReg));
@@ -3630,7 +3635,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
 
     if (slots > 1)
     {
-        tmpReg2 = cpObjNode->GetSingleTempReg();
+        tmpReg2 = cpObjNode->ExtractTempReg(RBM_ALLINT);
         assert(tmpReg2 != tmpReg);
         assert(genIsValidIntReg(tmpReg2));
         assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF);
@@ -3677,26 +3682,60 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
     {
         unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();
 
+        // We might also need SIMD regs if we have 4 or more continuous non-gc slots
+        // On ARM64, SIMD loads/stores provide 8-byte atomicity guarantees when aligned to 8 bytes.
+        regNumber tmpSimdReg1 = REG_NA;
+        regNumber tmpSimdReg2 = REG_NA;
+        if ((slots >= 4) && compiler->IsBaselineSimdIsaSupported())
+        {
+            tmpSimdReg1 = cpObjNode->ExtractTempReg(RBM_ALLFLOAT);
+            tmpSimdReg2 = cpObjNode->ExtractTempReg(RBM_ALLFLOAT);
+        }
+
         unsigned i = 0;
         while (i < slots)
         {
             if (!layout->IsGCPtr(i))
             {
-                // Check if the next slot's type is also TYP_GC_NONE and use ldp/stp
-                if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
+                // How many continuous non-gc slots do we have?
+                unsigned nonGcSlots = 0;
+                do
                 {
-                    emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF,
-                                          2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
-                    emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF,
-                                          2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
-                    ++i; // extra increment of i, since we are copying two items
-                }
-                else
+                    nonGcSlots++;
+                    i++;
+                } while ((i < slots) && !layout->IsGCPtr(i));
+
+                const regNumber srcReg = REG_WRITE_BARRIER_SRC_BYREF;
+                const regNumber dstReg = REG_WRITE_BARRIER_DST_BYREF;
+                while (nonGcSlots > 0)
                 {
-                    emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
-                                        INS_OPTS_POST_INDEX);
-                    emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
-                                        INS_OPTS_POST_INDEX);
+                    regNumber tmp1 = tmpReg;
+                    regNumber tmp2 = tmpReg2;
+                    emitAttr  size = EA_8BYTE;
+                    insOpts   opts = INS_OPTS_POST_INDEX;
+
+                    // Copy at least two slots at a time
+                    if (nonGcSlots >= 2)
+                    {
+                        // Do 4 slots at a time if SIMD is supported
+                        if ((nonGcSlots >= 4) && compiler->IsBaselineSimdIsaSupported())
+                        {
+                            // We need SIMD temp regs now
+                            tmp1 = tmpSimdReg1;
+                            tmp2 = tmpSimdReg2;
+                            size = EA_16BYTE;
+                            nonGcSlots -= 2;
+                        }
+                        nonGcSlots -= 2;
+                        emit->emitIns_R_R_R_I(INS_ldp, size, tmp1, tmp2, srcReg, EA_SIZE(size) * 2, opts);
+                        emit->emitIns_R_R_R_I(INS_stp, size, tmp1, tmp2, dstReg, EA_SIZE(size) * 2, opts);
+                    }
+                    else
+                    {
+                        nonGcSlots--;
+                        emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmp1, srcReg, EA_SIZE(size), opts);
+                        emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmp1, dstReg, EA_SIZE(size), opts);
+                    }
                 }
             }
             else
@@ -3704,8 +3743,8 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
                 // In the case of a GC-Pointer we'll call the ByRef write barrier helper
                 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
                 gcPtrCount--;
+                i++;
             }
-            ++i;
         }
         assert(gcPtrCount == 0);
     }
@@ -3745,32 +3784,7 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode)
 // emits the table and an instruction to get the address of the first element
 void CodeGen::genJumpTable(GenTree* treeNode)
 {
-    noway_assert(compiler->compCurBB->KindIs(BBJ_SWITCH));
-    assert(treeNode->OperGet() == GT_JMPTABLE);
-
-    unsigned     jumpCount = compiler->compCurBB->GetSwitchTargets()->bbsCount;
-    BasicBlock** jumpTable = compiler->compCurBB->GetSwitchTargets()->bbsDstTab;
-    unsigned     jmpTabOffs;
-    unsigned     jmpTabBase;
-
-    jmpTabBase = GetEmitter()->emitBBTableDataGenBeg(jumpCount, true);
-
-    jmpTabOffs = 0;
-
-    JITDUMP("\n      J_M%03u_DS%02u LABEL   DWORD\n", compiler->compMethodID, jmpTabBase);
-
-    for (unsigned i = 0; i < jumpCount; i++)
-    {
-        BasicBlock* target = *jumpTable++;
-        noway_assert(target->HasFlag(BBF_HAS_LABEL));
-
-        JITDUMP("            DD      L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum);
-
-        GetEmitter()->emitDataGenData(i, target);
-    };
-
-    GetEmitter()->emitDataGenEnd();
-
+    unsigned jmpTabBase = genEmitJumpTable(treeNode, true);
     // Access to inline data is 'abstracted' by a special type of static member
     // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
     // to constant data, not a real static field.
@@ -5116,7 +5130,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
                                gcInfo.gcRegByrefSetCur, DebugInfo(), callTarget, /* ireg */
                                REG_NA, 0, 0,                                     /* xreg, xmul, disp */
                                false                                             /* isJump */
-                               );
+    );
 
     regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
     regSet.verifyRegistersUsed(killMask);
@@ -5441,7 +5455,11 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
 
     genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
 
-    if ((genRegMask(initReg) & RBM_PROFILER_ENTER_TRASH) != RBM_NONE)
+    // If initReg is trashed, either because it was an arg to the enter
+    // callback, or because the enter callback itself trashes it, then it needs
+    // to be zero'ed again before using.
+    if (((RBM_PROFILER_ENTER_TRASH | RBM_PROFILER_ENTER_ARG_FUNC_ID | RBM_PROFILER_ENTER_ARG_CALLER_SP) &
+         genRegMask(initReg)) != RBM_NONE)
     {
         *pInitRegZeroed = false;
     }
@@ -5725,8 +5743,8 @@ void CodeGen::genCodeForBfiz(GenTreeOp* tree)
     GenTree*     castOp     = cast->CastOp();
 
     genConsumeRegs(castOp);
-    unsigned srcBits = varTypeIsSmall(cast->CastToType()) ? genTypeSize(cast->CastToType()) * BITS_PER_BYTE
-                                                          : genTypeSize(castOp) * BITS_PER_BYTE;
+    unsigned   srcBits    = varTypeIsSmall(cast->CastToType()) ? genTypeSize(cast->CastToType()) * BITS_PER_BYTE
+                                                               : genTypeSize(castOp) * BITS_PER_BYTE;
     const bool isUnsigned = cast->IsUnsigned() || varTypeIsUnsigned(cast->CastToType());
     GetEmitter()->emitIns_R_R_I_I(isUnsigned ? INS_ubfiz : INS_sbfiz, size, tree->GetRegNum(), castOp->GetRegNum(),
                                   (int)shiftByImm, (int)srcBits);
diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp
index 619047a20102..52633ed6733e 100644
--- a/src/coreclr/jit/codegenarm64test.cpp
+++ b/src/coreclr/jit/codegenarm64test.cpp
@@ -4581,6 +4581,14 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R(INS_sve_subr, EA_SCALABLE, REG_V2, REG_P0, REG_V13,
                               INS_OPTS_SCALABLE_S); // SUBR    <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+    // IF_SVE_AB_3B
+    theEmitter->emitIns_R_R_R(INS_sve_addpt, EA_SCALABLE, REG_V0, REG_P1, REG_V2,
+                              INS_OPTS_SCALABLE_D); // ADDPT <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D
+    theEmitter->emitIns_R_R_R(INS_sve_subpt, EA_SCALABLE, REG_V0, REG_P1, REG_V2,
+                              INS_OPTS_SCALABLE_D); // SUBPT <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D
+#endif                                              // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+
     // IF_SVE_AC_3A
     theEmitter->emitIns_R_R_R(INS_sve_sdiv, EA_SCALABLE, REG_V3, REG_P2, REG_V9,
                               INS_OPTS_SCALABLE_S); // SDIV     <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
@@ -4725,6 +4733,54 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_P0, REG_V0, INS_OPTS_SCALABLE_S,
                               INS_SCALABLE_OPTS_WIDE); // LSR     <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D
 
+    // IF_SVE_CE_2A
+    theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_P2, REG_V12, INS_OPTS_SCALABLE_B,
+                            INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV    <Pd>.B, <Zn>
+    theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_P7, REG_V2, INS_OPTS_SCALABLE_H,
+                            INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV    <Pd>.H, <Zn>[0]
+
+    // IF_SVE_CE_2B
+    theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P15, REG_V7, 7, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV    <Pd>.D, <Zn>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P7, REG_V16, 0, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV    <Pd>.D, <Zn>[<imm>]
+
+    // IF_SVE_CE_2C
+    theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P0, REG_V31, 1, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV    <Pd>.H, <Zn>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P1, REG_V1, 0, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV    <Pd>.H, <Zn>[<imm>]
+
+    // IF_SVE_CE_2D
+    theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P3, REG_V9, 3, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV    <Pd>.S, <Zn>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P10, REG_V4, 0, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV    <Pd>.S, <Zn>[<imm>]
+
+    // IF_SVE_CF_2A
+    theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_V11, REG_P12, INS_OPTS_SCALABLE_B,
+                            INS_SCALABLE_OPTS_TO_VECTOR); // PMOV    <Zd>, <Pn>.B
+    theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_V2, REG_P7, INS_OPTS_SCALABLE_S,
+                            INS_SCALABLE_OPTS_TO_VECTOR); // PMOV    <Zd>[0], <Pn>.S
+
+    // IF_SVE_CF_2B
+    theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V6, REG_P8, 7, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_TO_VECTOR); // PMOV    <Zd>[<imm>], <Pn>.D
+    theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V9, REG_P7, 0, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_TO_VECTOR); // PMOV    <Zd>[<imm>], <Pn>.D
+
+    // IF_SVE_CF_2C
+    theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V8, REG_P4, 1, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_TO_VECTOR); // PMOV    <Zd>[<imm>], <Pn>.H
+    theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V5, REG_P9, 0, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_TO_VECTOR); // PMOV    <Zd>[<imm>], <Pn>.H
+
+    // IF_SVE_CF_2D
+    theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V14, REG_P2, 3, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_TO_VECTOR); // PMOV    <Zd>[<imm>], <Pn>.S
+    theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V3, REG_P15, 0, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_TO_VECTOR); // PMOV    <Zd>[<imm>], <Pn>.S
+
     // IF_SVE_CJ_2A
     theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_P1, REG_P2,
                             INS_OPTS_SCALABLE_B); // REV     <Pd>.<T>, <Pn>.<T>
@@ -4876,16 +4932,16 @@ void CodeGen::genArm64EmitterUnitTestsSve()
                                 INS_OPTS_SCALABLE_B); /* SEL     <Pd>.B, <Pg>, <Pn>.B, <Pm>.B */
 
     // IF_SVE_CZ_4A_A
-    theEmitter->emitIns_R_R(INS_sve_movs, EA_SCALABLE, REG_P0, REG_P15,
-                            INS_OPTS_SCALABLE_B); /* MOVS    <Pd>.B, <Pn>.B */
+    theEmitter->emitIns_R_R(INS_sve_movs, EA_SCALABLE, REG_P0, REG_P15, INS_OPTS_SCALABLE_B); /* MOVS    <Pd>.B, <Pn>.B
+                                                                                               */
 
     // IF_SVE_CZ_4A_K
     theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_P0, REG_P8, REG_P15, INS_OPTS_SCALABLE_B,
                               INS_SCALABLE_OPTS_PREDICATE_MERGE); /* MOV     <Pd>.B, <Pg>/M, <Pn>.B */
 
     // IF_SVE_CZ_4A_L
-    theEmitter->emitIns_R_R(INS_sve_mov, EA_SCALABLE, REG_P0, REG_P15,
-                            INS_OPTS_SCALABLE_B); /* MOV     <Pd>.B, <Pn>.B */
+    theEmitter->emitIns_R_R(INS_sve_mov, EA_SCALABLE, REG_P0, REG_P15, INS_OPTS_SCALABLE_B); /* MOV     <Pd>.B, <Pn>.B
+                                                                                              */
 
     // IF_SVE_DA_4A
     theEmitter->emitIns_R_R_R_R(INS_sve_brkpa, EA_SCALABLE, REG_P0, REG_P1, REG_P10, REG_P15,
@@ -5035,6 +5091,18 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R_R(INS_sve_nmatch, EA_SCALABLE, REG_P0, REG_P7, REG_V11, REG_V31,
                                 INS_OPTS_SCALABLE_H); // NMATCH  <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
 
+    // IF_SVE_GQ_3A
+    theEmitter->emitIns_R_R_R(INS_sve_bfcvtnt, EA_SCALABLE, REG_V3, REG_P0, REG_V4); // BFCVTNT <Zd>.H, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtlt, EA_SCALABLE, REG_V0, REG_P7, REG_V1,
+                              INS_OPTS_S_TO_D); // FCVTLT  <Zd>.D, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtlt, EA_SCALABLE, REG_V14, REG_P7, REG_V20,
+                              INS_OPTS_H_TO_S); // FCVTLT  <Zd>.S, <Pg>/M, <Zn>.H
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtnt, EA_SCALABLE, REG_V18, REG_P3, REG_V9,
+                              INS_OPTS_S_TO_H); // FCVTNT  <Zd>.H, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtnt, EA_SCALABLE, REG_V12, REG_P3, REG_V5,
+                              INS_OPTS_D_TO_S);                                      // FCVTNT  <Zd>.S, <Pg>/M, <Zn>.D
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtxnt, EA_SCALABLE, REG_V1, REG_P2, REG_V3); // FCVTXNT <Zd>.S, <Pg>/M, <Zn>.D
+
     // IF_SVE_GR_3A
     theEmitter->emitIns_R_R_R(INS_sve_faddp, EA_SCALABLE, REG_V16, REG_P3, REG_V19,
                               INS_OPTS_SCALABLE_H); // FADDP   <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
@@ -5089,6 +5157,106 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R(INS_sve_fsubr, EA_SCALABLE, REG_V6, REG_P4, REG_V29,
                               INS_OPTS_SCALABLE_D); // FSUBR   <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
 
+    // IF_SVE_HL_3B
+    theEmitter->emitIns_R_R_R(INS_sve_bfadd, EA_SCALABLE, REG_V0, REG_P0, REG_V1,
+                              INS_OPTS_SCALABLE_H); // BFADD <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_bfmax, EA_SCALABLE, REG_V2, REG_P1, REG_V3,
+                              INS_OPTS_SCALABLE_H); // BFMAX <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_bfmaxnm, EA_SCALABLE, REG_V4, REG_P2, REG_V5,
+                              INS_OPTS_SCALABLE_H); // BFMAXNM <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_bfmin, EA_SCALABLE, REG_V6, REG_P3, REG_V7,
+                              INS_OPTS_SCALABLE_H); // BFMIN <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_bfminnm, EA_SCALABLE, REG_V8, REG_P4, REG_V9,
+                              INS_OPTS_SCALABLE_H); // BFMINNM <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_bfmul, EA_SCALABLE, REG_V10, REG_P5, REG_V11,
+                              INS_OPTS_SCALABLE_H); // BFMUL <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_bfsub, EA_SCALABLE, REG_V12, REG_P6, REG_V13,
+                              INS_OPTS_SCALABLE_H); // BFSUB <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H
+
+    // IF_SVE_HO_3A
+    theEmitter->emitIns_R_R_R(INS_sve_bfcvt, EA_SCALABLE, REG_V3, REG_P2, REG_V9,
+                              INS_OPTS_S_TO_H); // BFCVT   <Zd>.H, <Pg>/M, <Zn>.S
+
+    // IF_SVE_HO_3B
+    theEmitter->emitIns_R_R_R(INS_sve_fcvt, EA_SCALABLE, REG_V7, REG_P7, REG_V1,
+                              INS_OPTS_S_TO_D); // FCVT    <Zd>.D, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_fcvt, EA_SCALABLE, REG_V29, REG_P3, REG_V12,
+                              INS_OPTS_D_TO_S); // FCVT    <Zd>.S, <Pg>/M, <Zn>.D
+    theEmitter->emitIns_R_R_R(INS_sve_fcvt, EA_SCALABLE, REG_V0, REG_P4, REG_V13,
+                              INS_OPTS_D_TO_H); // FCVT    <Zd>.H, <Pg>/M, <Zn>.D
+    theEmitter->emitIns_R_R_R(INS_sve_fcvt, EA_SCALABLE, REG_V1, REG_P5, REG_V14,
+                              INS_OPTS_H_TO_D); // FCVT    <Zd>.D, <Pg>/M, <Zn>.H
+    theEmitter->emitIns_R_R_R(INS_sve_fcvt, EA_SCALABLE, REG_V2, REG_P6, REG_V15,
+                              INS_OPTS_S_TO_H); // FCVT    <Zd>.H, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_fcvt, EA_SCALABLE, REG_V3, REG_P7, REG_V16,
+                              INS_OPTS_H_TO_S); // FCVT    <Zd>.S, <Pg>/M, <Zn>.H
+
+    // IF_SVE_HO_3C
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtx, EA_SCALABLE, REG_V2, REG_P0, REG_V6,
+                              INS_OPTS_D_TO_S); // FCVTX   <Zd>.S, <Pg>/M, <Zn>.D
+
+    // IF_SVE_HP_3B
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzs, EA_SCALABLE, REG_V9, REG_P1, REG_V3,
+                              INS_OPTS_SCALABLE_S); // FCVTZS  <Zd>.S, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzs, EA_SCALABLE, REG_V5, REG_P0, REG_V24,
+                              INS_OPTS_S_TO_D); // FCVTZS  <Zd>.D, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzs, EA_SCALABLE, REG_V12, REG_P3, REG_V6,
+                              INS_OPTS_D_TO_S); // FCVTZS  <Zd>.S, <Pg>/M, <Zn>.D
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzs, EA_SCALABLE, REG_V2, REG_P1, REG_V17,
+                              INS_OPTS_SCALABLE_D); // FCVTZS  <Zd>.D, <Pg>/M, <Zn>.D
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzs, EA_SCALABLE, REG_V3, REG_P2, REG_V18,
+                              INS_OPTS_SCALABLE_H); // FCVTZS  <Zd>.H, <Pg>/M, <Zn>.H
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzs, EA_SCALABLE, REG_V4, REG_P3, REG_V19,
+                              INS_OPTS_H_TO_S); // FCVTZS  <Zd>.S, <Pg>/M, <Zn>.H
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzs, EA_SCALABLE, REG_V5, REG_P4, REG_V20,
+                              INS_OPTS_H_TO_D); // FCVTZS  <Zd>.D, <Pg>/M, <Zn>.H
+
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzu, EA_SCALABLE, REG_V3, REG_P2, REG_V10,
+                              INS_OPTS_SCALABLE_S); // FCVTZU  <Zd>.S, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzu, EA_SCALABLE, REG_V10, REG_P7, REG_V1,
+                              INS_OPTS_S_TO_D); // FCVTZU  <Zd>.D, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzu, EA_SCALABLE, REG_V4, REG_P3, REG_V13,
+                              INS_OPTS_D_TO_S); // FCVTZU  <Zd>.S, <Pg>/M, <Zn>.D
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzu, EA_SCALABLE, REG_V22, REG_P6, REG_V4,
+                              INS_OPTS_SCALABLE_D); // FCVTZU  <Zd>.D, <Pg>/M, <Zn>.D
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzu, EA_SCALABLE, REG_V23, REG_P7, REG_V5,
+                              INS_OPTS_SCALABLE_H); // FCVTZU  <Zd>.H, <Pg>/M, <Zn>.H
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzu, EA_SCALABLE, REG_V24, REG_P0, REG_V6,
+                              INS_OPTS_H_TO_S); // FCVTZU  <Zd>.S, <Pg>/M, <Zn>.H
+    theEmitter->emitIns_R_R_R(INS_sve_fcvtzu, EA_SCALABLE, REG_V25, REG_P1, REG_V7,
+                              INS_OPTS_H_TO_D); // FCVTZU  <Zd>.D, <Pg>/M, <Zn>.H
+
+    // IF_SVE_HS_3A
+    theEmitter->emitIns_R_R_R(INS_sve_scvtf, EA_SCALABLE, REG_V19, REG_P2, REG_V8,
+                              INS_OPTS_SCALABLE_S); // SCVTF   <Zd>.S, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_scvtf, EA_SCALABLE, REG_V1, REG_P5, REG_V19,
+                              INS_OPTS_S_TO_D); // SCVTF   <Zd>.D, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_scvtf, EA_SCALABLE, REG_V4, REG_P0, REG_V14,
+                              INS_OPTS_D_TO_S); // SCVTF   <Zd>.S, <Pg>/M, <Zn>.D
+    theEmitter->emitIns_R_R_R(INS_sve_scvtf, EA_SCALABLE, REG_V0, REG_P0, REG_V0,
+                              INS_OPTS_SCALABLE_D); // SCVTF   <Zd>.D, <Pg>/M, <Zn>.D
+    theEmitter->emitIns_R_R_R(INS_sve_scvtf, EA_SCALABLE, REG_V12, REG_P5, REG_V14,
+                              INS_OPTS_SCALABLE_H); // SCVTF  <Zd>.H, <Pg>/M, <Zn>.H
+    theEmitter->emitIns_R_R_R(INS_sve_scvtf, EA_SCALABLE, REG_V14, REG_P7, REG_V16,
+                              INS_OPTS_S_TO_H); // SCVTF  <Zd>.H, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_scvtf, EA_SCALABLE, REG_V16, REG_P1, REG_V18,
+                              INS_OPTS_D_TO_H); // SCVTF  <Zd>.H, <Pg>/M, <Zn>.D
+
+    theEmitter->emitIns_R_R_R(INS_sve_ucvtf, EA_SCALABLE, REG_V17, REG_P6, REG_V11,
+                              INS_OPTS_SCALABLE_S); // UCVTF   <Zd>.S, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_ucvtf, EA_SCALABLE, REG_V3, REG_P3, REG_V20,
+                              INS_OPTS_S_TO_D); // UCVTF   <Zd>.D, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_ucvtf, EA_SCALABLE, REG_V8, REG_P1, REG_V7,
+                              INS_OPTS_D_TO_S); // UCVTF   <Zd>.S, <Pg>/M, <Zn>.D
+    theEmitter->emitIns_R_R_R(INS_sve_ucvtf, EA_SCALABLE, REG_V8, REG_P4, REG_V9,
+                              INS_OPTS_SCALABLE_D); // UCVTF   <Zd>.D, <Pg>/M, <Zn>.D
+    theEmitter->emitIns_R_R_R(INS_sve_ucvtf, EA_SCALABLE, REG_V13, REG_P6, REG_V15,
+                              INS_OPTS_SCALABLE_H); // UCVTF  <Zd>.H, <Pg>/M, <Zn>.H
+    theEmitter->emitIns_R_R_R(INS_sve_ucvtf, EA_SCALABLE, REG_V15, REG_P0, REG_V17,
+                              INS_OPTS_S_TO_H); // UCVTF  <Zd>.H, <Pg>/M, <Zn>.S
+    theEmitter->emitIns_R_R_R(INS_sve_ucvtf, EA_SCALABLE, REG_V17, REG_P2, REG_V19,
+                              INS_OPTS_D_TO_H); // UCVTF  <Zd>.H, <Pg>/M, <Zn>.D
+
     // IF_SVE_HT_4A
     theEmitter->emitIns_R_R_R_R(INS_sve_facge, EA_SCALABLE, REG_P0, REG_P0, REG_V10, REG_V31,
                                 INS_OPTS_SCALABLE_H); // FACGE   <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
@@ -5113,6 +5281,16 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R_R(INS_sve_fcmuo, EA_SCALABLE, REG_P5, REG_P2, REG_V31, REG_V20,
                                 INS_OPTS_SCALABLE_S); // FCMUO   <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
 
+    // IF_SVE_HU_4A
+    theEmitter->emitIns_R_R_R_R(INS_sve_fmla, EA_SCALABLE, REG_V0, REG_P0, REG_V1, REG_V2,
+                                INS_OPTS_SCALABLE_H); // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R_R(INS_sve_fmls, EA_SCALABLE, REG_V3, REG_P2, REG_V4, REG_V5,
+                                INS_OPTS_SCALABLE_S); // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R_R(INS_sve_fnmla, EA_SCALABLE, REG_V6, REG_P4, REG_V7, REG_V8,
+                                INS_OPTS_SCALABLE_D); // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R_R(INS_sve_fnmls, EA_SCALABLE, REG_V9, REG_P6, REG_V10, REG_V11,
+                                INS_OPTS_SCALABLE_H); // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+
     // IF_SVE_AF_3A
     theEmitter->emitIns_R_R_R(INS_sve_andv, EA_1BYTE, REG_V0, REG_P0, REG_V0,
                               INS_OPTS_SCALABLE_B); // ANDV    <V><d>, <Pg>, <Zn>.<T>
@@ -5218,7 +5396,7 @@ void CodeGen::genArm64EmitterUnitTestsSve()
                               INS_OPTS_SCALABLE_D); // UXTW    <Zd>.D, <Pg>/M, <Zn>.D
 
     // IF_SVE_AR_4A
-    theEmitter->emitIns_R_R_R_R(INS_sve_mla, EA_SCALABLE, REG_V0, REG_P0, REG_P0, REG_V19,
+    theEmitter->emitIns_R_R_R_R(INS_sve_mla, EA_SCALABLE, REG_V0, REG_P0, REG_V2, REG_V19,
                                 INS_OPTS_SCALABLE_B); // MLA     <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
     theEmitter->emitIns_R_R_R_R(INS_sve_mls, EA_SCALABLE, REG_V2, REG_P1, REG_V31, REG_V31,
                                 INS_OPTS_SCALABLE_H); // MLS     <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
@@ -5257,6 +5435,10 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R(INS_sve_umulh, EA_SCALABLE, REG_V31, REG_V5, REG_V0, INS_OPTS_SCALABLE_D,
                               INS_SCALABLE_OPTS_UNPREDICATED); // UMULH   <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
 
+    // IF_SVE_BD_3B
+    theEmitter->emitIns_R_R_R(INS_sve_pmul, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // PMUL <Zd>.B, <Zn>.B, <Zm>.B
+
     // IF_SVE_BE_3A
     theEmitter->emitIns_R_R_R(INS_sve_sqdmulh, EA_SCALABLE, REG_V7, REG_V28, REG_V0,
                               INS_OPTS_SCALABLE_B); // SQDMULH <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
@@ -5271,10 +5453,590 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V29, REG_V10, REG_V22, INS_OPTS_SCALABLE_S,
                               INS_SCALABLE_OPTS_UNPREDICATED_WIDE); // LSR     <Zd>.<T>, <Zn>.<T>, <Zm>.D
 
+    // IF_SVE_BH_3A
+    theEmitter->emitInsSve_R_R_R_I(INS_sve_adr, EA_SCALABLE, REG_V4, REG_V2, REG_V0, 0, INS_OPTS_SCALABLE_D,
+                                   INS_SCALABLE_OPTS_LSL_N); // ADR     <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>{, <mod><amount>}]
+    theEmitter->emitInsSve_R_R_R_I(INS_sve_adr, EA_SCALABLE, REG_V29, REG_V1, REG_V10, 1, INS_OPTS_SCALABLE_S,
+                                   INS_SCALABLE_OPTS_LSL_N); // ADR     <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>{, <mod><amount>}]
+
+    // IF_SVE_BH_3B
+    theEmitter->emitInsSve_R_R_R_I(INS_sve_adr, EA_SCALABLE, REG_V9, REG_V7, REG_V9, 0,
+                                   INS_OPTS_SCALABLE_D_SXTW); // ADR     <Zd>.D, [<Zn>.D, <Zm>.D, SXTW{<amount>}]
+    theEmitter->emitInsSve_R_R_R_I(INS_sve_adr, EA_SCALABLE, REG_V12, REG_V3, REG_V5, 2,
+                                   INS_OPTS_SCALABLE_D_SXTW); // ADR     <Zd>.D, [<Zn>.D, <Zm>.D, SXTW{<amount>}]
+
+    // IF_SVE_BH_3B_A
+    theEmitter->emitInsSve_R_R_R_I(INS_sve_adr, EA_SCALABLE, REG_V9, REG_V10, REG_V14, 0,
+                                   INS_OPTS_SCALABLE_D_UXTW); // ADR     <Zd>.D, [<Zn>.D, <Zm>.D, UXTW{<amount>}]
+    theEmitter->emitInsSve_R_R_R_I(INS_sve_adr, EA_SCALABLE, REG_V3, REG_V15, REG_V11, 3,
+                                   INS_OPTS_SCALABLE_D_UXTW); // ADR     <Zd>.D, [<Zn>.D, <Zm>.D, UXTW{<amount>}]
+
     // IF_SVE_BK_3A
     theEmitter->emitIns_R_R_R(INS_sve_ftssel, EA_SCALABLE, REG_V17, REG_V16, REG_V15,
                               INS_OPTS_SCALABLE_D); // FTSSEL  <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
 
+    // IF_SVE_BR_3A
+    theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // TRN1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // TRN1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // TRN2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // TRN2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // UZP1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // UZP1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V18, REG_V19, REG_V20, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // UZP2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V21, REG_V22, REG_V23, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // UZP2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V24, REG_V25, REG_V26, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // ZIP1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V27, REG_V28, REG_V29, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // ZIP1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V30, REG_V31, REG_V0, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // ZIP2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V1, REG_V2, REG_V3, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // ZIP2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+
+    // IF_SVE_BR_3B
+    theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_Q,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // TRN1 <Zd>.Q, <Zn>.Q, <Zm>.Q
+    theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_Q,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // TRN2 <Zd>.Q, <Zn>.Q, <Zm>.Q
+    theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_Q,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // UZP1 <Zd>.Q, <Zn>.Q, <Zm>.Q
+    theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_Q,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // UZP2 <Zd>.Q, <Zn>.Q, <Zm>.Q
+    theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_Q,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // ZIP1 <Zd>.Q, <Zn>.Q, <Zm>.Q
+    theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_Q,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // ZIP2 <Zd>.Q, <Zn>.Q, <Zm>.Q
+
+    // IF_SVE_BS_1A
+    theEmitter->emitIns_R_I(INS_sve_and, EA_SCALABLE, REG_V0, 0x00000000000000AA,
+                            INS_OPTS_SCALABLE_B); // AND <Zdn>.<T>, <Zdn>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_bic, EA_SCALABLE, REG_V1, 0xFFFFFFFFFFFFFF55,
+                            INS_OPTS_SCALABLE_B); // BIC <Zdn>.<T>, <Zdn>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_and, EA_SCALABLE, REG_V2, 0x000000000000FF00,
+                            INS_OPTS_SCALABLE_H); // AND <Zdn>.<T>, <Zdn>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_bic, EA_SCALABLE, REG_V3, 0xFFFFFFFFFFFF00FF,
+                            INS_OPTS_SCALABLE_H); // BIC <Zdn>.<T>, <Zdn>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_eor, EA_SCALABLE, REG_V4, 0x0000000003FFC000,
+                            INS_OPTS_SCALABLE_S); // EOR <Zdn>.<T>, <Zdn>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_eon, EA_SCALABLE, REG_V5, 0xFFFFFFFFFC003FFF,
+                            INS_OPTS_SCALABLE_S); // EON <Zdn>.<T>, <Zdn>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_orr, EA_SCALABLE, REG_V6, 0x00FFFFF000000000,
+                            INS_OPTS_SCALABLE_D); // ORR <Zdn>.<T>, <Zdn>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_orn, EA_SCALABLE, REG_V7, 0xFF00000FFFFFFFFF,
+                            INS_OPTS_SCALABLE_D); // ORN <Zdn>.<T>, <Zdn>.<T>, #<const>
+
+    // IF_SVE_BT_1A
+    theEmitter->emitIns_R_I(INS_sve_dupm, EA_SCALABLE, REG_V0, 0x0000000000000070,
+                            INS_OPTS_SCALABLE_B); // DUPM <Zd>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_dupm, EA_SCALABLE, REG_V1, 0x0000000000003FFC,
+                            INS_OPTS_SCALABLE_H); // DUPM <Zd>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_dupm, EA_SCALABLE, REG_V2, 0x0000000000007000,
+                            INS_OPTS_SCALABLE_S); // DUPM <Zd>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_dupm, EA_SCALABLE, REG_V3, 0xFFFFFFFFFFFF0000,
+                            INS_OPTS_SCALABLE_D); // DUPM <Zd>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V4, 0x000000000000003F, INS_OPTS_SCALABLE_B,
+                            INS_SCALABLE_OPTS_IMM_BITMASK); // MOV <Zd>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V5, 0x0000000000000700, INS_OPTS_SCALABLE_H,
+                            INS_SCALABLE_OPTS_IMM_BITMASK); // MOV <Zd>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V6, 0x0000000000FFFFF0, INS_OPTS_SCALABLE_S,
+                            INS_SCALABLE_OPTS_IMM_BITMASK); // MOV <Zd>.<T>, #<const>
+    theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V7, 0xFFFFF00000FFFFFF, INS_OPTS_SCALABLE_D,
+                            INS_SCALABLE_OPTS_IMM_BITMASK); // MOV <Zd>.<T>, #<const>
+
+    // IF_SVE_BV_2A
+    theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V15, REG_P5, 0,
+                              INS_OPTS_SCALABLE_B); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+    theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V5, REG_P15, 27,
+                              INS_OPTS_SCALABLE_B); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+    theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V31, REG_P0, -128,
+                              INS_OPTS_SCALABLE_B); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P5, 127,
+                              INS_OPTS_SCALABLE_B); // MOV <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+
+    theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V15, REG_P5, 0,
+                              INS_OPTS_SCALABLE_H); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V23, REG_P12, 10,
+                              INS_OPTS_SCALABLE_S); // MOV <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+    theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V4, REG_P0, -128,
+                              INS_OPTS_SCALABLE_D); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V19, REG_P15, 127,
+                              INS_OPTS_SCALABLE_H); // MOV <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+
+    theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, 256,
+                              INS_OPTS_SCALABLE_S); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+    theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, 3072,
+                              INS_OPTS_SCALABLE_D); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+    theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, -3072,
+                              INS_OPTS_SCALABLE_H); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+    theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, -32768,
+                              INS_OPTS_SCALABLE_S); // CPY <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P0, 32512,
+                              INS_OPTS_SCALABLE_D); // MOV <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+
+    // IF_SVE_BV_2A_A
+    theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P12, 5, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_PREDICATE_MERGE); // CPY <Zd>.<T>, <Pg>/M, #<imm>{, <shift>}
+
+    // IF_SVE_BV_2A_J
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V27, REG_P13, 5632, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV <Zd>.<T>, <Pg>/M, #<imm>{, <shift>}
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V27, REG_P13, -5632, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV <Zd>.<T>, <Pg>/M, #<imm>{, <shift>}
+
+    // IF_SVE_BV_2B
+    theEmitter->emitIns_R_R(INS_sve_fmov, EA_SCALABLE, REG_V0, REG_P1,
+                            INS_OPTS_SCALABLE_H); // FMOV <Zd>.<T>, <Pg>/M, #0.0
+    theEmitter->emitIns_R_R(INS_sve_fmov, EA_SCALABLE, REG_V2, REG_P3,
+                            INS_OPTS_SCALABLE_S); // FMOV <Zd>.<T>, <Pg>/M, #0.0
+    theEmitter->emitIns_R_R(INS_sve_fmov, EA_SCALABLE, REG_V4, REG_P5,
+                            INS_OPTS_SCALABLE_D); // FMOV <Zd>.<T>, <Pg>/M, #0.0
+
+    // IF_SVE_BW_2A
+    // MOV should be preferred alias when emitting DUP
+    theEmitter->emitIns_R_R_I(INS_sve_dup, EA_SCALABLE, REG_V4, REG_V12, 63,
+                              INS_OPTS_SCALABLE_B); // MOV     <Zd>.<T>, <Zn>.<T>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_dup, EA_SCALABLE, REG_V8, REG_V9, 31,
+                              INS_OPTS_SCALABLE_H); // MOV     <Zd>.<T>, <Zn>.<T>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_dup, EA_SCALABLE, REG_V11, REG_V28, 15,
+                              INS_OPTS_SCALABLE_S); // MOV     <Zd>.<T>, <Zn>.<T>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_dup, EA_SCALABLE, REG_V21, REG_V12, 7,
+                              INS_OPTS_SCALABLE_D); // MOV     <Zd>.<T>, <Zn>.<T>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_dup, EA_SCALABLE, REG_V14, REG_V7, 3,
+                              INS_OPTS_SCALABLE_Q); // MOV     <Zd>.<T>, <Zn>.<T>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_dup, EA_SCALABLE, REG_V13, REG_V8, 0,
+                              INS_OPTS_SCALABLE_B); // MOV     <Zd>.<T>, <V><n>
+    theEmitter->emitIns_R_R_I(INS_sve_dup, EA_SCALABLE, REG_V2, REG_V0, 0,
+                              INS_OPTS_SCALABLE_H); // MOV     <Zd>.<T>, <V><n>
+    theEmitter->emitIns_R_R_I(INS_sve_dup, EA_SCALABLE, REG_V15, REG_V31, 0,
+                              INS_OPTS_SCALABLE_S); // MOV     <Zd>.<T>, <V><n>
+    theEmitter->emitIns_R_R_I(INS_sve_dup, EA_SCALABLE, REG_V23, REG_V27, 0,
+                              INS_OPTS_SCALABLE_D); // MOV     <Zd>.<T>, <V><n>
+    theEmitter->emitIns_R_R_I(INS_sve_dup, EA_SCALABLE, REG_V4, REG_V3, 0,
+                              INS_OPTS_SCALABLE_Q); // MOV     <Zd>.<T>, <V><n>
+
+    // MOV implementation should produce same output as DUP implementation with same parameters
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V1, REG_V16, 63, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_BROADCAST); // MOV     <Zd>.<T>, <Zn>.<T>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V17, REG_V18, 31, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_BROADCAST); // MOV     <Zd>.<T>, <Zn>.<T>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V9, REG_V11, 15, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_BROADCAST); // MOV     <Zd>.<T>, <Zn>.<T>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V2, REG_V3, 7, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_BROADCAST); // MOV     <Zd>.<T>, <Zn>.<T>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V3, REG_V8, 3, INS_OPTS_SCALABLE_Q,
+                              INS_SCALABLE_OPTS_BROADCAST); // MOV     <Zd>.<T>, <Zn>.<T>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V13, REG_V9, 0, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_BROADCAST); // MOV     <Zd>.<T>, <V><n>
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V12, REG_V6, 0, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_BROADCAST); // MOV     <Zd>.<T>, <V><n>
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V2, REG_V7, 0, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_BROADCAST); // MOV     <Zd>.<T>, <V><n>
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_BROADCAST); // MOV     <Zd>.<T>, <V><n>
+    theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V10, REG_V20, 0, INS_OPTS_SCALABLE_Q,
+                              INS_SCALABLE_OPTS_BROADCAST); // MOV     <Zd>.<T>, <V><n>
+
+    // IF_SVE_BZ_3A
+    theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // TBL <Zd>.<T>, {<Zn>.<T>}, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_H); // TBL <Zd>.<T>, {<Zn>.<T>}, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_tbx, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_S); // TBX <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_tbx, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_D); // TBX <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+
+    // IF_SVE_BZ_3A_A
+    theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // TBL <Zd>.<T>, {<Zn1>.<T>, <Zn2>.<T>}, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // TBL <Zd>.<T>, {<Zn1>.<T>, <Zn2>.<T>}, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // TBL <Zd>.<T>, {<Zn1>.<T>, <Zn2>.<T>}, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // TBL <Zd>.<T>, {<Zn1>.<T>, <Zn2>.<T>}, <Zm>.<T>
+
+    // IF_SVE_CA_3A
+    theEmitter->emitIns_R_R_R(INS_sve_tbxq, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // TBXQ <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_tbxq, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_H); // TBXQ <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_tbxq, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_S); // TBXQ <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_tbxq, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_D); // TBXQ <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+
+    // IF_SVE_EH_3A
+    theEmitter->emitIns_R_R_R(INS_sve_sdot, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_S); // SDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_sdot, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_D); // SDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_udot, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_S); // UDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_udot, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_D); // UDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+
+    // IF_SVE_EL_3A
+    theEmitter->emitIns_R_R_R(INS_sve_smlalb, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // SMLALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_smlalt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_S); // SMLALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_smlslb, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_D); // SMLSLB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_smlslt, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_H); // SMLSLT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_umlalb, EA_SCALABLE, REG_V12, REG_V13, REG_V14,
+                              INS_OPTS_SCALABLE_S); // UMLALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_umlalt, EA_SCALABLE, REG_V15, REG_V16, REG_V17,
+                              INS_OPTS_SCALABLE_D); // UMLALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_umlslb, EA_SCALABLE, REG_V18, REG_V19, REG_V20,
+                              INS_OPTS_SCALABLE_H); // UMLSLB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_umlslt, EA_SCALABLE, REG_V21, REG_V22, REG_V23,
+                              INS_OPTS_SCALABLE_S); // UMLSLT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+
+    // IF_SVE_EM_3A
+    theEmitter->emitIns_R_R_R(INS_sve_sqrdmlah, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // SQRDMLAH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_sqrdmlah, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_H); // SQRDMLAH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_S); // SQRDMLSH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_D); // SQRDMLSH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+
+    // IF_SVE_EN_3A
+    theEmitter->emitIns_R_R_R(INS_sve_sqdmlalbt, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // SQDMLALBT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_sqdmlslbt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_S); // SQDMLSLBT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_sqdmlslbt, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_D); // SQDMLSLBT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+
+    // IF_SVE_EO_3A
+    theEmitter->emitIns_R_R_R(INS_sve_sqdmlalb, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // SQDMLALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_sqdmlalt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_S); // SQDMLALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_sqdmlslb, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_D); // SQDMLSLB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_sqdmlslt, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_H); // SQDMLSLT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+
+    // IF_SVE_EV_3A
+    theEmitter->emitIns_R_R_R(INS_sve_sclamp, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // SCLAMP <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_sclamp, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_H); // SCLAMP <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_uclamp, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_S); // UCLAMP <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_uclamp, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_D); // UCLAMP <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+
+    // IF_SVE_EX_3A
+    theEmitter->emitIns_R_R_R(INS_sve_tblq, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // TBLQ <Zd>.<T>, {<Zn>.<T>}, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_uzpq1, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_H); // UZPQ1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_uzpq2, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_S); // UZPQ2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_zipq1, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_D); // ZIPQ1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_zipq2, EA_SCALABLE, REG_V12, REG_V13, REG_V14,
+                              INS_OPTS_SCALABLE_B); // ZIPQ2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+
+    // IF_SVE_FL_3A
+    theEmitter->emitIns_R_R_R(INS_sve_sabdlb, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // SABDLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_sabdlt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_S); // SABDLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_saddlb, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_D); // SADDLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_saddlt, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_H); // SADDLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_ssublb, EA_SCALABLE, REG_V12, REG_V13, REG_V14,
+                              INS_OPTS_SCALABLE_S); // SSUBLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_ssublt, EA_SCALABLE, REG_V15, REG_V16, REG_V17,
+                              INS_OPTS_SCALABLE_D); // SSUBLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_uabdlb, EA_SCALABLE, REG_V18, REG_V19, REG_V20,
+                              INS_OPTS_SCALABLE_H); // UABDLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_uabdlt, EA_SCALABLE, REG_V21, REG_V22, REG_V24,
+                              INS_OPTS_SCALABLE_S); // UABDLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_uaddlb, EA_SCALABLE, REG_V24, REG_V25, REG_V26,
+                              INS_OPTS_SCALABLE_D); // UADDLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_uaddlt, EA_SCALABLE, REG_V27, REG_V28, REG_V29,
+                              INS_OPTS_SCALABLE_H); // UADDLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_usublb, EA_SCALABLE, REG_V30, REG_V31, REG_V0,
+                              INS_OPTS_SCALABLE_S); // USUBLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_usublt, EA_SCALABLE, REG_V1, REG_V2, REG_V3,
+                              INS_OPTS_SCALABLE_D); // USUBLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+
+    // IF_SVE_FM_3A
+    theEmitter->emitIns_R_R_R(INS_sve_saddwb, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // SADDWB <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_saddwt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_S); // SADDWT <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_ssubwb, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_D); // SSUBWB <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_ssubwt, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_H); // SSUBWT <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_uaddwb, EA_SCALABLE, REG_V12, REG_V13, REG_V14,
+                              INS_OPTS_SCALABLE_S); // UADDWB <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_uaddwt, EA_SCALABLE, REG_V15, REG_V16, REG_V17,
+                              INS_OPTS_SCALABLE_D); // UADDWT <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_usubwb, EA_SCALABLE, REG_V18, REG_V19, REG_V20,
+                              INS_OPTS_SCALABLE_H); // USUBWB <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_usubwt, EA_SCALABLE, REG_V21, REG_V22, REG_V23,
+                              INS_OPTS_SCALABLE_S); // USUBWT <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+
+    // IF_SVE_FN_3A
+    theEmitter->emitIns_R_R_R(INS_sve_pmullb, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // PMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_pmullt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_D); // PMULLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_smullb, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_H); // SMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_smullt, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_D); // SMULLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_sqdmullb, EA_SCALABLE, REG_V12, REG_V13, REG_V14,
+                              INS_OPTS_SCALABLE_H); // SQDMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_sqdmullt, EA_SCALABLE, REG_V15, REG_V16, REG_V17,
+                              INS_OPTS_SCALABLE_D); // SQDMULLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_umullb, EA_SCALABLE, REG_V18, REG_V19, REG_V20,
+                              INS_OPTS_SCALABLE_H); // UMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_umullt, EA_SCALABLE, REG_V21, REG_V22, REG_V23,
+                              INS_OPTS_SCALABLE_D); // UMULLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+
+    // IF_SVE_FN_3B
+    theEmitter->emitIns_R_R_R(INS_sve_pmullb, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_Q); // PMULLB <Zd>.Q, <Zn>.D, <Zm>.D
+    theEmitter->emitIns_R_R_R(INS_sve_pmullt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_Q); // PMULLT <Zd>.Q, <Zn>.D, <Zm>.D
+
+    // IF_SVE_FO_3A
+    theEmitter->emitIns_R_R_R(INS_sve_smmla, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_S); // SMMLA <Zda>.S, <Zn>.B, <Zm>.B
+    theEmitter->emitIns_R_R_R(INS_sve_ummla, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_S); // UMMLA <Zda>.S, <Zn>.B, <Zm>.B
+    theEmitter->emitIns_R_R_R(INS_sve_usmmla, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_S); // USMMLA <Zda>.S, <Zn>.B, <Zm>.B
+
+    // IF_SVE_FP_3A
+    theEmitter->emitIns_R_R_R(INS_sve_eorbt, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // EORBT <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_eorbt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_H); // EORBT <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_eortb, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_S); // EORTB <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_eortb, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_D); // EORTB <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+
+    // IF_SVE_FQ_3A
+    theEmitter->emitIns_R_R_R(INS_sve_bdep, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // BDEP <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_bext, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_H); // BEXT <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_bgrp, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_S); // BGRP <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_bgrp, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_D); // BGRP <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+
+    // IF_SVE_FS_3A
+    theEmitter->emitIns_R_R_R(INS_sve_saddlbt, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // SADDLBT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_ssublbt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_S); // SSUBLBT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_ssubltb, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_D); // SSUBLTB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+
+    // IF_SVE_FW_3A
+    theEmitter->emitIns_R_R_R(INS_sve_saba, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // SABA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_saba, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_H); // SABA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_uaba, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_S); // UABA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_uaba, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_D); // UABA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+
+    // IF_SVE_FX_3A
+    theEmitter->emitIns_R_R_R(INS_sve_sabalb, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // SABALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_sabalt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_S); // SABALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_uabalb, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_D); // UABALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_uabalt, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_H); // UABALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+    // IF_SVE_GC_3A
+    theEmitter->emitIns_R_R_R(INS_sve_addhnb, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // ADDHNB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_addhnt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_H); // ADDHNT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_raddhnb, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_S); // RADDHNB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_raddhnt, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_B); // RADDHNT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_rsubhnb, EA_SCALABLE, REG_V12, REG_V13, REG_V14,
+                              INS_OPTS_SCALABLE_H); // RSUBHNB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_rsubhnt, EA_SCALABLE, REG_V15, REG_V16, REG_V17,
+                              INS_OPTS_SCALABLE_S); // RSUBHNT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_subhnb, EA_SCALABLE, REG_V18, REG_V19, REG_V20,
+                              INS_OPTS_SCALABLE_B); // SUBHNB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+    theEmitter->emitIns_R_R_R(INS_sve_subhnt, EA_SCALABLE, REG_V21, REG_V22, REG_V23,
+                              INS_OPTS_SCALABLE_H); // SUBHNT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+#endif                                              // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+
+    // IF_SVE_GF_3A
+    theEmitter->emitIns_R_R_R(INS_sve_histseg, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // HISTSEG <Zd>.B, <Zn>.B, <Zm>.B
+
+    // IF_SVE_GW_3A
+    theEmitter->emitIns_R_R_R(INS_sve_fclamp, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // FCLAMP <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_fclamp, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_S); // FCLAMP <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_fclamp, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_D); // FCLAMP <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+
+    // IF_SVE_GW_3B
+    theEmitter->emitIns_R_R_R(INS_sve_bfclamp, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // BFCLAMP <Zd>.H, <Zn>.H, <Zm>.H
+
+    // IF_SVE_HK_3A
+    theEmitter->emitIns_R_R_R(INS_sve_fadd, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_fmul, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_frecps, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_frsqrts, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_fsub, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_ftsmul, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+
+    // IF_SVE_HK_3B
+    theEmitter->emitIns_R_R_R(INS_sve_bfadd, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // BFADD <Zd>.H, <Zn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_bfmul, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // BFMUL <Zd>.H, <Zn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_bfsub, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // BFSUB <Zd>.H, <Zn>.H, <Zm>.H
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+    // IF_SVE_AT_3B
+    theEmitter->emitIns_R_R_R(INS_sve_addpt, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // ADDPT <Zd>.D, <Zn>.D, <Zm>.D
+    theEmitter->emitIns_R_R_R(INS_sve_subpt, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // SUBPT <Zd>.D, <Zn>.D, <Zm>.D
+#endif                                                         // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+
+    // IF_SVE_AU_3A
+    theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // AND <Zd>.D, <Zn>.D, <Zm>.D
+    theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // BIC <Zd>.D, <Zn>.D, <Zm>.D
+    theEmitter->emitIns_R_R_R(INS_sve_eor, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // EOR <Zd>.D, <Zn>.D, <Zm>.D
+    theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // MOV <Zd>.D, <Zn>.D
+    theEmitter->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED); // ORR <Zd>.D, <Zn>.D, <Zm>.D
+
+    // IF_SVE_AV_3A
+    theEmitter->emitIns_R_R_R(INS_sve_bcax, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_D); // BCAX <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+    theEmitter->emitIns_R_R_R(INS_sve_bsl, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_D); // BSL <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+    theEmitter->emitIns_R_R_R(INS_sve_bsl1n, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_D); // BSL1N <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+    theEmitter->emitIns_R_R_R(INS_sve_bsl2n, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_D); // BSL2N <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+    theEmitter->emitIns_R_R_R(INS_sve_eor3, EA_SCALABLE, REG_V12, REG_V13, REG_V14,
+                              INS_OPTS_SCALABLE_D); // EOR3 <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+    theEmitter->emitIns_R_R_R(INS_sve_nbsl, EA_SCALABLE, REG_V15, REG_V16, REG_V17,
+                              INS_OPTS_SCALABLE_D); // NBSL <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+
+    // IF_SVE_AW_2A
+    theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V0, REG_V1, 1,
+                              INS_OPTS_SCALABLE_B); // XAR <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V2, REG_V3, 8,
+                              INS_OPTS_SCALABLE_B); // XAR <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V4, REG_V5, 2,
+                              INS_OPTS_SCALABLE_H); // XAR <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V6, REG_V7, 16,
+                              INS_OPTS_SCALABLE_H); // XAR <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V8, REG_V9, 3,
+                              INS_OPTS_SCALABLE_S); // XAR <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V10, REG_V11, 32,
+                              INS_OPTS_SCALABLE_S); // XAR <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V12, REG_V13, 4,
+                              INS_OPTS_SCALABLE_D); // XAR <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V14, REG_V15, 64,
+                              INS_OPTS_SCALABLE_D); // XAR <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<const>
+
+    // IF_SVE_AX_1A
+    theEmitter->emitIns_R_I_I(INS_sve_index, EA_SCALABLE, REG_V0, -16, 15,
+                              INS_OPTS_SCALABLE_B); // INDEX <Zd>.<T>, #<imm1>, #<imm2>
+    theEmitter->emitIns_R_I_I(INS_sve_index, EA_SCALABLE, REG_V1, 15, -16,
+                              INS_OPTS_SCALABLE_H); // INDEX <Zd>.<T>, #<imm1>, #<imm2>
+    theEmitter->emitIns_R_I_I(INS_sve_index, EA_SCALABLE, REG_V2, 0, 0,
+                              INS_OPTS_SCALABLE_S); // INDEX <Zd>.<T>, #<imm1>, #<imm2>
+    theEmitter->emitIns_R_I_I(INS_sve_index, EA_SCALABLE, REG_V3, -5, 5,
+                              INS_OPTS_SCALABLE_D); // INDEX <Zd>.<T>, #<imm1>, #<imm2>
+
+    // IF_SVE_AY_2A
+    theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V0, REG_R0, -16, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_IMM_FIRST); // INDEX <Zd>.<T>, #<imm>, <R><m>
+    theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V1, REG_R1, 0, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_IMM_FIRST); // INDEX <Zd>.<T>, #<imm>, <R><m>
+    theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V2, REG_R2, 5, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_IMM_FIRST); // INDEX <Zd>.<T>, #<imm>, <R><m>
+    theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V3, REG_R3, 10, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_IMM_FIRST); // INDEX <Zd>.<T>, #<imm>, <R><m>
+    theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V4, REG_ZR, -16, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_IMM_FIRST); // INDEX <Zd>.<T>, #<imm>, <R><m>
+    theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V5, REG_ZR, 15, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_IMM_FIRST); // INDEX <Zd>.<T>, #<imm>, <R><m>
+
+    // IF_SVE_AZ_2A
+    theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V0, REG_R0, -16,
+                              INS_OPTS_SCALABLE_B); // INDEX <Zd>.<T>, <R><n>, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V1, REG_R1, 0,
+                              INS_OPTS_SCALABLE_H); // INDEX <Zd>.<T>, <R><n>, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V2, REG_R2, 5,
+                              INS_OPTS_SCALABLE_S); // INDEX <Zd>.<T>, <R><n>, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V3, REG_R3, 10,
+                              INS_OPTS_SCALABLE_D); // INDEX <Zd>.<T>, <R><n>, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V4, REG_ZR, -16,
+                              INS_OPTS_SCALABLE_B); // INDEX <Zd>.<T>, <R><n>, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V5, REG_ZR, 15,
+                              INS_OPTS_SCALABLE_D); // INDEX <Zd>.<T>, <R><n>, #<imm>
+
+    // IF_SVE_BB_2A
+    theEmitter->emitIns_R_R_I(INS_sve_addpl, EA_8BYTE, REG_R0, REG_R1, -32); // ADDPL <Xd|SP>, <Xn|SP>, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_addpl, EA_8BYTE, REG_R2, REG_SP, 0);   // ADDPL <Xd|SP>, <Xn|SP>, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_addvl, EA_8BYTE, REG_R3, REG_R4, 5);   // ADDVL <Xd|SP>, <Xn|SP>, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_addvl, EA_8BYTE, REG_SP, REG_R5, 31);  // ADDVL <Xd|SP>, <Xn|SP>, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_addvl, EA_8BYTE, REG_SP, REG_SP, 0);   // ADDVL <Xd|SP>, <Xn|SP>, #<imm>
+
+    // IF_SVE_BC_1A
+    theEmitter->emitIns_R_I(INS_sve_rdvl, EA_8BYTE, REG_R0, -32); // RDVL <Xd>, #<imm>
+    theEmitter->emitIns_R_I(INS_sve_rdvl, EA_8BYTE, REG_R5, 0);   // RDVL <Xd>, #<imm>
+    theEmitter->emitIns_R_I(INS_sve_rdvl, EA_8BYTE, REG_R10, 5);  // RDVL <Xd>, #<imm>
+    theEmitter->emitIns_R_I(INS_sve_rdvl, EA_8BYTE, REG_R15, 31); // RDVL <Xd>, #<imm>
+
     // IF_SVE_BL_1A
     theEmitter->emitIns_R_PATTERN_I(INS_sve_cntb, EA_8BYTE, REG_R0, SVE_PATTERN_POW2,
                                     1); // CNTB    <Xd>{, <pattern>{, MUL #<imm>}}
@@ -5291,6 +6053,154 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_PATTERN_I(INS_sve_cnth, EA_8BYTE, REG_R5, SVE_PATTERN_ALL,
                                     13); // CNTH    <Xd>{, <pattern>{, MUL #<imm>}}
 
+    // IF_SVE_BM_1A
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_decb, EA_8BYTE, REG_R0, SVE_PATTERN_POW2,
+                                    1); // DECB <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_decd, EA_8BYTE, REG_R1, SVE_PATTERN_VL16,
+                                    3); // DECD <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_dech, EA_8BYTE, REG_R2, SVE_PATTERN_VL32,
+                                    5); // DECH <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_decw, EA_8BYTE, REG_R3, SVE_PATTERN_VL64,
+                                    7); // DECW <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_incb, EA_8BYTE, REG_R4, SVE_PATTERN_VL128,
+                                    9); // INCB <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_incd, EA_8BYTE, REG_R5, SVE_PATTERN_MUL3,
+                                    10); // INCD <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_inch, EA_8BYTE, REG_R6, SVE_PATTERN_MUL4,
+                                    13); // INCH <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_incw, EA_8BYTE, REG_R7, SVE_PATTERN_ALL,
+                                    16); // INCW <Xdn>{, <pattern>{, MUL #<imm>}}
+
+    // IF_SVE_BN_1A
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_decd, EA_SCALABLE, REG_V0, SVE_PATTERN_POW2, 1,
+                                    INS_OPTS_SCALABLE_D); // DECD <Zdn>.D{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_dech, EA_SCALABLE, REG_V1, SVE_PATTERN_VL2, 2,
+                                    INS_OPTS_SCALABLE_H); // DECH <Zdn>.H{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_decw, EA_SCALABLE, REG_V2, SVE_PATTERN_VL3, 4,
+                                    INS_OPTS_SCALABLE_S); // DECW <Zdn>.S{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_incd, EA_SCALABLE, REG_V3, SVE_PATTERN_VL4, 8,
+                                    INS_OPTS_SCALABLE_D); // INCD <Zdn>.D{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_inch, EA_SCALABLE, REG_V4, SVE_PATTERN_VL5, 12,
+                                    INS_OPTS_SCALABLE_H); // INCH <Zdn>.H{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_incw, EA_SCALABLE, REG_V5, SVE_PATTERN_VL6, 16,
+                                    INS_OPTS_SCALABLE_S); // INCW <Zdn>.S{, <pattern>{, MUL #<imm>}}
+
+    // IF_SVE_BO_1A
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecb, EA_4BYTE, REG_R0, SVE_PATTERN_POW2,
+                                    1); // SQDECB <Xdn>, <Wdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecd, EA_8BYTE, REG_R1, SVE_PATTERN_VL1,
+                                    2); // SQDECD <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdech, EA_4BYTE, REG_R2, SVE_PATTERN_VL2,
+                                    3); // SQDECH <Xdn>, <Wdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecw, EA_8BYTE, REG_R3, SVE_PATTERN_VL3,
+                                    4); // SQDECW <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincb, EA_4BYTE, REG_R4, SVE_PATTERN_VL4,
+                                    5); // SQINCB <Xdn>, <Wdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincd, EA_8BYTE, REG_R5, SVE_PATTERN_VL5,
+                                    6); // SQINCD <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqinch, EA_4BYTE, REG_R6, SVE_PATTERN_VL6,
+                                    7); // SQINCH <Xdn>, <Wdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincw, EA_8BYTE, REG_R7, SVE_PATTERN_VL7,
+                                    8); // SQINCW <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecb, EA_4BYTE, REG_R8, SVE_PATTERN_VL8,
+                                    9); // UQDECB <Wdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecd, EA_8BYTE, REG_R9, SVE_PATTERN_VL16,
+                                    10); // UQDECD <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdech, EA_4BYTE, REG_R10, SVE_PATTERN_VL32,
+                                    11); // UQDECH <Wdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecw, EA_8BYTE, REG_R11, SVE_PATTERN_VL64,
+                                    12); // UQDECW <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincb, EA_4BYTE, REG_R12, SVE_PATTERN_VL128,
+                                    13); // UQINCB <Wdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincd, EA_8BYTE, REG_R13, SVE_PATTERN_VL256,
+                                    14); // UQINCD <Xdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqinch, EA_4BYTE, REG_R14, SVE_PATTERN_MUL4,
+                                    15); // UQINCH <Wdn>{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincw, EA_8BYTE, REG_R15, SVE_PATTERN_ALL,
+                                    16); // UQINCW <Xdn>{, <pattern>{, MUL #<imm>}}
+
+    // IF_SVE_BP_1A
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecd, EA_SCALABLE, REG_V0, SVE_PATTERN_VL1, 1,
+                                    INS_OPTS_SCALABLE_D); // SQDECD <Zdn>.D{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdech, EA_SCALABLE, REG_V1, SVE_PATTERN_VL2, 2,
+                                    INS_OPTS_SCALABLE_H); // SQDECH <Zdn>.H{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecw, EA_SCALABLE, REG_V2, SVE_PATTERN_VL3, 3,
+                                    INS_OPTS_SCALABLE_S); // SQDECW <Zdn>.S{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincd, EA_SCALABLE, REG_V3, SVE_PATTERN_VL4, 4,
+                                    INS_OPTS_SCALABLE_D); // SQINCD <Zdn>.D{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqinch, EA_SCALABLE, REG_V4, SVE_PATTERN_VL5, 5,
+                                    INS_OPTS_SCALABLE_H); // SQINCH <Zdn>.H{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincw, EA_SCALABLE, REG_V5, SVE_PATTERN_VL6, 6,
+                                    INS_OPTS_SCALABLE_S); // SQINCW <Zdn>.S{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecd, EA_SCALABLE, REG_V6, SVE_PATTERN_VL7, 7,
+                                    INS_OPTS_SCALABLE_D); // UQDECD <Zdn>.D{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdech, EA_SCALABLE, REG_V7, SVE_PATTERN_VL8, 8,
+                                    INS_OPTS_SCALABLE_H); // UQDECH <Zdn>.H{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecw, EA_SCALABLE, REG_V8, SVE_PATTERN_VL16, 9,
+                                    INS_OPTS_SCALABLE_S); // UQDECW <Zdn>.S{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincd, EA_SCALABLE, REG_V9, SVE_PATTERN_VL32, 10,
+                                    INS_OPTS_SCALABLE_D); // UQINCD <Zdn>.D{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqinch, EA_SCALABLE, REG_V10, SVE_PATTERN_POW2, 11,
+                                    INS_OPTS_SCALABLE_H); // UQINCH <Zdn>.H{, <pattern>{, MUL #<imm>}}
+    theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincw, EA_SCALABLE, REG_V11, SVE_PATTERN_ALL, 16,
+                                    INS_OPTS_SCALABLE_S); // UQINCW <Zdn>.S{, <pattern>{, MUL #<imm>}}
+
+    // IF_SVE_BQ_2A
+    theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V0, REG_V1, 0, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT <Zd>.B, {<Zn1>.B, <Zn2>.B }, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V2, REG_V3, 5, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT <Zd>.B, {<Zn1>.B, <Zn2>.B }, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V4, REG_V5, 128, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT <Zd>.B, {<Zn1>.B, <Zn2>.B }, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V6, REG_FP_LAST, 255, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT <Zd>.B, {<Zn1>.B, <Zn2>.B }, #<imm>
+
+    // IF_SVE_BQ_2B
+    theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V0, REG_V1, 0,
+                              INS_OPTS_SCALABLE_B); // EXT <Zdn>.B, <Zdn>.B, <Zm>.B, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V2, REG_V3, 31,
+                              INS_OPTS_SCALABLE_B); // EXT <Zdn>.B, <Zdn>.B, <Zm>.B, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V4, REG_V5, 64,
+                              INS_OPTS_SCALABLE_B); // EXT <Zdn>.B, <Zdn>.B, <Zm>.B, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V6, REG_V7, 255,
+                              INS_OPTS_SCALABLE_B); // EXT <Zdn>.B, <Zdn>.B, <Zm>.B, #<imm>
+
+    // IF_SVE_BU_2A
+    theEmitter->emitIns_R_R_F(INS_sve_fcpy, EA_SCALABLE, REG_V0, REG_P1, 2.0,
+                              INS_OPTS_SCALABLE_H); // FCPY <Zd>.<T>, <Pg>/M, #<const>
+    theEmitter->emitIns_R_R_F(INS_sve_fcpy, EA_SCALABLE, REG_V2, REG_P3, 1.0,
+                              INS_OPTS_SCALABLE_S); // FCPY <Zd>.<T>, <Pg>/M, #<const>
+    theEmitter->emitIns_R_R_F(INS_sve_fcpy, EA_SCALABLE, REG_V4, REG_P5, -10.0,
+                              INS_OPTS_SCALABLE_D); // FCPY <Zd>.<T>, <Pg>/M, #<const>
+    theEmitter->emitIns_R_R_F(INS_sve_fmov, EA_SCALABLE, REG_V6, REG_P7, -0.125,
+                              INS_OPTS_SCALABLE_H); // FMOV <Zd>.<T>, <Pg>/M, #<const>
+    theEmitter->emitIns_R_R_F(INS_sve_fmov, EA_SCALABLE, REG_V8, REG_P9, 31.0,
+                              INS_OPTS_SCALABLE_S); // FMOV <Zd>.<T>, <Pg>/M, #<const>
+    theEmitter->emitIns_R_R_F(INS_sve_fmov, EA_SCALABLE, REG_V10, REG_P11, 0.5,
+                              INS_OPTS_SCALABLE_D); // FMOV <Zd>.<T>, <Pg>/M, #<const>
+
+    // IF_SVE_CC_2A
+    theEmitter->emitIns_R_R(INS_sve_insr, EA_SCALABLE, REG_V0, REG_V13,
+                            INS_OPTS_SCALABLE_B); // INSR    <Zdn>.<T>, <V><m>
+    theEmitter->emitIns_R_R(INS_sve_insr, EA_SCALABLE, REG_V29, REG_V0,
+                            INS_OPTS_SCALABLE_H); // INSR    <Zdn>.<T>, <V><m>
+    theEmitter->emitIns_R_R(INS_sve_insr, EA_SCALABLE, REG_V4, REG_V15,
+                            INS_OPTS_SCALABLE_S); // INSR    <Zdn>.<T>, <V><m>
+    theEmitter->emitIns_R_R(INS_sve_insr, EA_SCALABLE, REG_V8, REG_V2,
+                            INS_OPTS_SCALABLE_D); // INSR    <Zdn>.<T>, <V><m>
+
+    // IF_SVE_CD_2A
+    theEmitter->emitIns_R_R(INS_sve_insr, EA_SCALABLE, REG_V4, REG_R23,
+                            INS_OPTS_SCALABLE_B); // INSR    <Zdn>.<T>, <R><m>
+    theEmitter->emitIns_R_R(INS_sve_insr, EA_SCALABLE, REG_V11, REG_R1,
+                            INS_OPTS_SCALABLE_H); // INSR    <Zdn>.<T>, <R><m>
+    theEmitter->emitIns_R_R(INS_sve_insr, EA_SCALABLE, REG_V14, REG_R9,
+                            INS_OPTS_SCALABLE_S); // INSR    <Zdn>.<T>, <R><m>
+    theEmitter->emitIns_R_R(INS_sve_insr, EA_SCALABLE, REG_V19, REG_R0,
+                            INS_OPTS_SCALABLE_D); // INSR    <Zdn>.<T>, <R><m>
+    theEmitter->emitIns_R_R(INS_sve_insr, EA_SCALABLE, REG_V29, REG_ZR,
+                            INS_OPTS_SCALABLE_D); // INSR    <Zdn>.<T>, <R><m>
+
     // IF_SVE_CI_3A
     theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_P1, REG_P3, REG_P4,
                               INS_OPTS_SCALABLE_B); // TRN1    <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
@@ -5387,6 +6297,14 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R(INS_sve_splice, EA_SCALABLE, REG_V2, REG_P6, REG_V28,
                               INS_OPTS_SCALABLE_S); // SPLICE  <Zdn>.<T>, <Pv>, <Zdn>.<T>, <Zm>.<T>
 
+    // IF_SVE_CW_4A
+    theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P0, REG_V30, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV     <Zd>.<T>, <Pv>/M, <Zn>.<T>
+    theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V29, REG_P15, REG_V28, REG_V4, INS_OPTS_SCALABLE_D,
+                                INS_SCALABLE_OPTS_UNPREDICATED); // SEL     <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V5, REG_P13, REG_V27, REG_V5, INS_OPTS_SCALABLE_S,
+                                INS_SCALABLE_OPTS_UNPREDICATED); // SEL     <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T>
+
     // IF_SVE_EQ_3A
     // Note: Scalable size is the size of the destination <T>, not the source <Tb>.
     theEmitter->emitIns_R_R_R(INS_sve_sadalp, EA_SCALABLE, REG_V26, REG_P3, REG_V8,
@@ -5410,6 +6328,19 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R(INS_sve_ursqrte, EA_SCALABLE, REG_V3, REG_P0, REG_V5,
                               INS_OPTS_SCALABLE_S); // URSQRTE <Zd>.S, <Pg>/M, <Zn>.S
 
+    // IF_SVE_FZ_2A
+    theEmitter->emitIns_R_R(INS_sve_sqcvtn, EA_SCALABLE, REG_V0, REG_V2);   // SQCVTN <Zd>.H, {<Zn1>.S-<Zn2>.S }
+    theEmitter->emitIns_R_R(INS_sve_sqcvtun, EA_SCALABLE, REG_V6, REG_V8);  // SQCVTUN <Zd>.H, {<Zn1>.S-<Zn2>.S }
+    theEmitter->emitIns_R_R(INS_sve_uqcvtn, EA_SCALABLE, REG_V14, REG_V16); // UQCVTN <Zd>.H, {<Zn1>.S-<Zn2>.S }
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+    // IF_SVE_HG_2A
+    theEmitter->emitIns_R_R(INS_sve_bfcvtn, EA_SCALABLE, REG_V0, REG_V2);   // BFCVTN <Zd>.B, {<Zn1>.H-<Zn2>.H }
+    theEmitter->emitIns_R_R(INS_sve_fcvtn, EA_SCALABLE, REG_V2, REG_V4);    // FCVTN <Zd>.B, {<Zn1>.H-<Zn2>.H }
+    theEmitter->emitIns_R_R(INS_sve_fcvtnb, EA_SCALABLE, REG_V6, REG_V8);   // FCVTNB <Zd>.B, {<Zn1>.S-<Zn2>.S }
+    theEmitter->emitIns_R_R(INS_sve_fcvtnt, EA_SCALABLE, REG_V14, REG_V16); // FCVTNT <Zd>.B, {<Zn1>.S-<Zn2>.S }
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+
     // IF_SVE_GA_2A
     theEmitter->emitIns_R_R_I(INS_sve_sqrshrn, EA_SCALABLE, REG_V0, REG_V0, 5,
                               INS_OPTS_SCALABLE_H); // SQRSHRN <Zd>.H, {<Zn1>.S-<Zn2>.S }, #<const>
@@ -5430,6 +6361,40 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_I(INS_sve_uqrshrn, EA_SCALABLE, REG_V15, REG_V12, 1,
                               INS_OPTS_SCALABLE_H); // UQRSHRN <Zd>.H, {<Zn1>.S-<Zn2>.S }, #<const>
 
+    // IF_SVE_GB_2A
+    theEmitter->emitIns_R_R_I(INS_sve_rshrnb, EA_SCALABLE, REG_V0, REG_V1, 1,
+                              INS_OPTS_SCALABLE_B); // RSHRNB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_rshrnt, EA_SCALABLE, REG_V2, REG_V3, 1,
+                              INS_OPTS_SCALABLE_H); // RSHRNT <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_shrnb, EA_SCALABLE, REG_V4, REG_V5, 1,
+                              INS_OPTS_SCALABLE_S); // SHRNB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_shrnt, EA_SCALABLE, REG_V6, REG_V7, 2,
+                              INS_OPTS_SCALABLE_B); // SHRNT <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sqrshrnb, EA_SCALABLE, REG_V8, REG_V9, 3,
+                              INS_OPTS_SCALABLE_H); // SQRSHRNB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sqrshrnt, EA_SCALABLE, REG_V10, REG_V11, 4,
+                              INS_OPTS_SCALABLE_S); // SQRSHRNT <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sqrshrunb, EA_SCALABLE, REG_V12, REG_V13, 5,
+                              INS_OPTS_SCALABLE_B); // SQRSHRUNB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sqrshrunt, EA_SCALABLE, REG_V14, REG_V15, 8,
+                              INS_OPTS_SCALABLE_H); // SQRSHRUNT <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sqshrnb, EA_SCALABLE, REG_V16, REG_V17, 8,
+                              INS_OPTS_SCALABLE_S); // SQSHRNB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sqshrnt, EA_SCALABLE, REG_V18, REG_V19, 6,
+                              INS_OPTS_SCALABLE_B); // SQSHRNT <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sqshrunb, EA_SCALABLE, REG_V20, REG_V21, 13,
+                              INS_OPTS_SCALABLE_H); // SQSHRUNB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sqshrunt, EA_SCALABLE, REG_V22, REG_V23, 16,
+                              INS_OPTS_SCALABLE_S); // SQSHRUNT <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_uqrshrnb, EA_SCALABLE, REG_V24, REG_V25, 7,
+                              INS_OPTS_SCALABLE_B); // UQRSHRNB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_uqrshrnt, EA_SCALABLE, REG_V26, REG_V27, 16,
+                              INS_OPTS_SCALABLE_H); // UQRSHRNT <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_uqshrnb, EA_SCALABLE, REG_V28, REG_V29, 32,
+                              INS_OPTS_SCALABLE_S); // UQSHRNB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_uqshrnt, EA_SCALABLE, REG_V30, REG_V31, 8,
+                              INS_OPTS_SCALABLE_B); // UQSHRNT <Zd>.<T>, <Zn>.<Tb>, #<const>
+
     // IF_SVE_DL_2A
     theEmitter->emitIns_R_R(INS_sve_cntp, EA_8BYTE, REG_R0, REG_P0, INS_OPTS_SCALABLE_B,
                             INS_SCALABLE_OPTS_VL_2X); // CNTP <Xd>, <PNn>.<T>, <vl>
@@ -5609,8 +6574,24 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R(INS_sve_aesimc, EA_SCALABLE, REG_V0); // AESIMC  <Zdn>.B, <Zdn>.B
     theEmitter->emitIns_R(INS_sve_aesmc, EA_SCALABLE, REG_V5);  // AESMC   <Zdn>.B, <Zdn>.B
 
-// IF_SVE_GS_3A
 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+    // IF_SVE_GN_3A
+    theEmitter->emitIns_R_R_R(INS_sve_fmlalb, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // FMLALB <Zda>.H, <Zn>.B, <Zm>.B
+    theEmitter->emitIns_R_R_R(INS_sve_fmlalt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_B); // FMLALT <Zda>.H, <Zn>.B, <Zm>.B
+
+    // IF_SVE_GO_3A
+    theEmitter->emitIns_R_R_R(INS_sve_fmlallbb, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // FMLALLBB <Zda>.S, <Zn>.B, <Zm>.B
+    theEmitter->emitIns_R_R_R(INS_sve_fmlallbt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_B); // FMLALLBT <Zda>.S, <Zn>.B, <Zm>.B
+    theEmitter->emitIns_R_R_R(INS_sve_fmlalltb, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_B); // FMLALLTB <Zda>.S, <Zn>.B, <Zm>.B
+    theEmitter->emitIns_R_R_R(INS_sve_fmlalltt, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_B); // FMLALLTT <Zda>.S, <Zn>.B, <Zm>.B
+
+    // IF_SVE_GS_3A
     theEmitter->emitIns_R_R_R(INS_sve_faddqv, EA_8BYTE, REG_V16, REG_P0, REG_V12,
                               INS_OPTS_SCALABLE_H); // FADDQV  <Vd>.<T>, <Pg>, <Zn>.<Tb>
     theEmitter->emitIns_R_R_R(INS_sve_fmaxnmqv, EA_8BYTE, REG_V17, REG_P1, REG_V11,
@@ -5653,6 +6634,16 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R_I(INS_sve_bfmls, EA_SCALABLE, REG_V7, REG_V8, REG_V7, 7,
                                 INS_OPTS_SCALABLE_H); // BFMLS <Zda>.H, <Zn>.H, <Zm>.H[<imm>]
 
+    // IF_SVE_GV_3A
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, 0,
+                                  INS_OPTS_SCALABLE_S); // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 90,
+                                  INS_OPTS_SCALABLE_S); // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 180,
+                                  INS_OPTS_SCALABLE_S); // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 270,
+                                  INS_OPTS_SCALABLE_S); // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
+
     // IF_SVE_GX_3A
     theEmitter->emitIns_R_R_R_I(INS_sve_fmul, EA_SCALABLE, REG_V0, REG_V2, REG_V1, 0,
                                 INS_OPTS_SCALABLE_S); // FMUL <Zda>.S, <Zn>.S, <Zm>.S[<imm>]
@@ -5693,6 +6684,89 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R_I(INS_sve_bfdot, EA_SCALABLE, REG_V12, REG_V14, REG_V7, 3,
                                 INS_OPTS_SCALABLE_H); // BFDOT <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+    // IF_SVE_GY_3A
+    theEmitter->emitIns_R_R_R_I(INS_sve_fdot, EA_SCALABLE, REG_V0, REG_V2, REG_V1,
+                                1); // FDOT <Zda>.H, <Zn>.B, <Zm>.B[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_fdot, EA_SCALABLE, REG_V4, REG_V6, REG_V3,
+                                3); // FDOT <Zda>.H, <Zn>.B, <Zm>.B[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_fdot, EA_SCALABLE, REG_V8, REG_V10, REG_V5,
+                                5); // FDOT <Zda>.H, <Zn>.B, <Zm>.B[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_fdot, EA_SCALABLE, REG_V12, REG_V14, REG_V7,
+                                7); // FDOT <Zda>.H, <Zn>.B, <Zm>.B[<imm>]
+
+    // IF_SVE_GY_3B_D
+    theEmitter->emitIns_R_R_R_I(INS_sve_fdot, EA_SCALABLE, REG_V0, REG_V2, REG_V1, 0,
+                                INS_OPTS_SCALABLE_B); // FDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_fdot, EA_SCALABLE, REG_V4, REG_V6, REG_V3, 1,
+                                INS_OPTS_SCALABLE_B); // FDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_fdot, EA_SCALABLE, REG_V8, REG_V10, REG_V5, 2,
+                                INS_OPTS_SCALABLE_B); // FDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_fdot, EA_SCALABLE, REG_V12, REG_V14, REG_V7, 3,
+                                INS_OPTS_SCALABLE_B); // FDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
+#endif                                                // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+
+    // IF_SVE_GZ_3A
+    theEmitter->emitIns_R_R_R_I(INS_sve_bfmlalb, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0,
+                                INS_OPTS_SCALABLE_H); // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_bfmlalt, EA_SCALABLE, REG_V2, REG_V3, REG_V1, 1,
+                                INS_OPTS_SCALABLE_H); // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_bfmlslb, EA_SCALABLE, REG_V4, REG_V5, REG_V2, 2,
+                                INS_OPTS_SCALABLE_H); // BFMLSLB <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_bfmlslt, EA_SCALABLE, REG_V6, REG_V7, REG_V3, 3,
+                                INS_OPTS_SCALABLE_H); // BFMLSLT <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_fmlalb, EA_SCALABLE, REG_V8, REG_V9, REG_V4, 4,
+                                INS_OPTS_SCALABLE_H); // FMLALB <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_fmlalt, EA_SCALABLE, REG_V10, REG_V11, REG_V5, 5,
+                                INS_OPTS_SCALABLE_H); // FMLALT <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_fmlslb, EA_SCALABLE, REG_V12, REG_V13, REG_V6, 6,
+                                INS_OPTS_SCALABLE_H); // FMLSLB <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_fmlslt, EA_SCALABLE, REG_V14, REG_V15, REG_V7, 7,
+                                INS_OPTS_SCALABLE_H); // FMLSLT <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
+
+    // IF_SVE_HA_3A
+    theEmitter->emitIns_R_R_R(INS_sve_bfdot, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // BFDOT <Zda>.S, <Zn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_fdot, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_H); // FDOT <Zda>.S, <Zn>.H, <Zm>.H
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+    // IF_SVE_HA_3A_E
+    theEmitter->emitIns_R_R_R(INS_sve_fdot, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_B); // FDOT <Zda>.H, <Zn>.B, <Zm>.B
+
+    // IF_SVE_HA_3A_F
+    theEmitter->emitIns_R_R_R(INS_sve_fdot, EA_SCALABLE, REG_V9, REG_V10, REG_V11); // FDOT <Zda>.S, <Zn>.B, <Zm>.B
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+
+    // IF_SVE_HB_3A
+    theEmitter->emitIns_R_R_R(INS_sve_bfmlalb, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // BFMLALB <Zda>.S, <Zn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_bfmlalt, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_H); // BFMLALT <Zda>.S, <Zn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_bfmlslb, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_H); // BFMLSLB <Zda>.S, <Zn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_bfmlslt, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_H); // BFMLSLT <Zda>.S, <Zn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_fmlalb, EA_SCALABLE, REG_V12, REG_V13, REG_V14,
+                              INS_OPTS_SCALABLE_H); // FMLALB <Zda>.S, <Zn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_fmlalt, EA_SCALABLE, REG_V15, REG_V16, REG_V17,
+                              INS_OPTS_SCALABLE_H); // FMLALT <Zda>.S, <Zn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_fmlslb, EA_SCALABLE, REG_V18, REG_V19, REG_V20,
+                              INS_OPTS_SCALABLE_H); // FMLSLB <Zda>.S, <Zn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_fmlslt, EA_SCALABLE, REG_V21, REG_V22, REG_V23,
+                              INS_OPTS_SCALABLE_H); // FMLSLT <Zda>.S, <Zn>.H, <Zm>.H
+
+    // IF_SVE_HD_3A
+    theEmitter->emitIns_R_R_R(INS_sve_bfmmla, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // BFMMLA <Zda>.S, <Zn>.H, <Zm>.H
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+    // IF_SVE_HD_3A_A
+    theEmitter->emitIns_R_R_R(INS_sve_fmmla, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_D); // FMMLA <Zda>.D, <Zn>.D, <Zm>.D
+#endif                                              // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+
     // IF_SVE_HE_3A
     theEmitter->emitIns_R_R_R(INS_sve_faddv, EA_2BYTE, REG_V21, REG_P7, REG_V7,
                               INS_OPTS_SCALABLE_H); // FADDV   <V><d>, <Pg>, <Zn>.<T>
@@ -5779,6 +6853,18 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R(INS_sve_whilewr, EA_8BYTE, REG_P7, REG_R14, REG_R15,
                               INS_OPTS_SCALABLE_D); // WHILEWR <Pd>.<T>, <Xn>, <Xm>
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+    // IF_SVE_DV_4A
+    theEmitter->emitIns_R_R_R_R_I(INS_sve_psel, EA_SCALABLE, REG_P0, REG_P1, REG_P2, REG_R12, 15,
+                                  INS_OPTS_SCALABLE_B); // PSEL <Pd>, <Pn>, <Pm>.<T>[<Wv>, <imm>]
+    theEmitter->emitIns_R_R_R_R_I(INS_sve_psel, EA_SCALABLE, REG_P3, REG_P4, REG_P5, REG_R13, 7,
+                                  INS_OPTS_SCALABLE_H); // PSEL <Pd>, <Pn>, <Pm>.<T>[<Wv>, <imm>]
+    theEmitter->emitIns_R_R_R_R_I(INS_sve_psel, EA_SCALABLE, REG_P6, REG_P7, REG_P8, REG_R14, 3,
+                                  INS_OPTS_SCALABLE_S); // PSEL <Pd>, <Pn>, <Pm>.<T>[<Wv>, <imm>]
+    theEmitter->emitIns_R_R_R_R_I(INS_sve_psel, EA_SCALABLE, REG_P9, REG_P10, REG_P11, REG_R15, 1,
+                                  INS_OPTS_SCALABLE_D); // PSEL <Pd>, <Pn>, <Pm>.<T>[<Wv>, <imm>]
+#endif                                                  // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+
     // IF_SVE_DW_2A
     theEmitter->emitIns_R_R_I(INS_sve_pext, EA_SCALABLE, REG_P0, REG_P8, 0,
                               INS_OPTS_SCALABLE_B); // PEXT <Pd>.<T>, <PNn>[<imm>]
@@ -5869,20 +6955,20 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     // IF_SVE_EB_1A
     theEmitter->emitIns_R_I(INS_sve_dup, EA_SCALABLE, REG_V0, -128,
                             INS_OPTS_SCALABLE_B); // DUP <Zd>.<T>, #<imm>{, <shift>}
-    theEmitter->emitIns_R_I(INS_sve_dup, EA_SCALABLE, REG_V1, 0, INS_OPTS_SCALABLE_H,
-                            INS_SCALABLE_OPTS_SHIFT); // DUP <Zd>.<T>, #<imm>{, <shift>}
+    theEmitter->emitIns_R_I(INS_sve_dup, EA_SCALABLE, REG_V1, 0,
+                            INS_OPTS_SCALABLE_H); // DUP <Zd>.<T>, #<imm>{, <shift>}
     theEmitter->emitIns_R_I(INS_sve_dup, EA_SCALABLE, REG_V2, 5,
                             INS_OPTS_SCALABLE_S); // DUP <Zd>.<T>, #<imm>{, <shift>}
     theEmitter->emitIns_R_I(INS_sve_dup, EA_SCALABLE, REG_V3, 127,
                             INS_OPTS_SCALABLE_D); // DUP <Zd>.<T>, #<imm>{, <shift>}
-    theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V4, 0,
-                            INS_OPTS_SCALABLE_B); // MOV <Zd>.<T>, #<imm>{, <shift>}
-    theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V5, -128, INS_OPTS_SCALABLE_H,
-                            INS_SCALABLE_OPTS_SHIFT); // MOV <Zd>.<T>, #<imm>{, <shift>}
-    theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V6, 5, INS_OPTS_SCALABLE_S,
-                            INS_SCALABLE_OPTS_SHIFT); // MOV <Zd>.<T>, #<imm>{, <shift>}
-    theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V7, 127, INS_OPTS_SCALABLE_D,
-                            INS_SCALABLE_OPTS_SHIFT); // MOV <Zd>.<T>, #<imm>{, <shift>}
+    theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V4, 256,
+                            INS_OPTS_SCALABLE_D); // MOV <Zd>.<T>, #<imm>{, <shift>}
+    theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V5, -32768,
+                            INS_OPTS_SCALABLE_H); // MOV <Zd>.<T>, #<imm>{, <shift>}
+    theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V6, 1280,
+                            INS_OPTS_SCALABLE_S); // MOV <Zd>.<T>, #<imm>{, <shift>}
+    theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V7, 32512,
+                            INS_OPTS_SCALABLE_D); // MOV <Zd>.<T>, #<imm>{, <shift>}
 
     // IF_SVE_EB_1B
     theEmitter->emitIns_R(INS_sve_fmov, EA_SCALABLE, REG_V0, INS_OPTS_SCALABLE_B); // FMOV <Zd>.<T>, #0.0
@@ -5893,18 +6979,18 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     // IF_SVE_EC_1A
     theEmitter->emitIns_R_I(INS_sve_add, EA_SCALABLE, REG_V0, 0,
                             INS_OPTS_SCALABLE_B); // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-    theEmitter->emitIns_R_I(INS_sve_sqadd, EA_SCALABLE, REG_V1, 0, INS_OPTS_SCALABLE_H,
-                            INS_SCALABLE_OPTS_SHIFT); // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-    theEmitter->emitIns_R_I(INS_sve_sqsub, EA_SCALABLE, REG_V2, 1,
+    theEmitter->emitIns_R_I(INS_sve_sqadd, EA_SCALABLE, REG_V1, 5,
+                            INS_OPTS_SCALABLE_H); // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+    theEmitter->emitIns_R_I(INS_sve_sqsub, EA_SCALABLE, REG_V2, 128,
                             INS_OPTS_SCALABLE_S); // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-    theEmitter->emitIns_R_I(INS_sve_sub, EA_SCALABLE, REG_V3, 128,
+    theEmitter->emitIns_R_I(INS_sve_sub, EA_SCALABLE, REG_V3, 255,
                             INS_OPTS_SCALABLE_D); // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-    theEmitter->emitIns_R_I(INS_sve_subr, EA_SCALABLE, REG_V4, 255,
-                            INS_OPTS_SCALABLE_B); // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-    theEmitter->emitIns_R_I(INS_sve_uqadd, EA_SCALABLE, REG_V5, 5, INS_OPTS_SCALABLE_H,
-                            INS_SCALABLE_OPTS_SHIFT); // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-    theEmitter->emitIns_R_I(INS_sve_uqsub, EA_SCALABLE, REG_V6, 255, INS_OPTS_SCALABLE_S,
-                            INS_SCALABLE_OPTS_SHIFT); // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+    theEmitter->emitIns_R_I(INS_sve_subr, EA_SCALABLE, REG_V4, 256,
+                            INS_OPTS_SCALABLE_D); // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+    theEmitter->emitIns_R_I(INS_sve_uqadd, EA_SCALABLE, REG_V5, 1280,
+                            INS_OPTS_SCALABLE_H); // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+    theEmitter->emitIns_R_I(INS_sve_uqsub, EA_SCALABLE, REG_V6, 65280,
+                            INS_OPTS_SCALABLE_S); // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
 
     // IF_SVE_EG_3A
     theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V1, REG_V2, REG_V0, 0,
@@ -5916,6 +7002,42 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V7, REG_V8, REG_V3, 3,
                                 INS_OPTS_SCALABLE_H); // UDOT <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
 
+    // IF_SVE_EJ_3A
+    theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V0, REG_V1, REG_V2, 0,
+                                INS_OPTS_SCALABLE_S); // CDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>, <const>
+    theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V3, REG_V4, REG_V5, 90,
+                                INS_OPTS_SCALABLE_S); // CDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>, <const>
+    theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V6, REG_V7, REG_V8, 180,
+                                INS_OPTS_SCALABLE_D); // CDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>, <const>
+    theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V9, REG_V10, REG_V11, 270,
+                                INS_OPTS_SCALABLE_D); // CDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>, <const>
+
+    // IF_SVE_EK_3A
+    theEmitter->emitIns_R_R_R_I(INS_sve_cmla, EA_SCALABLE, REG_V0, REG_V1, REG_V2, 0,
+                                INS_OPTS_SCALABLE_B); // CMLA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_R_I(INS_sve_cmla, EA_SCALABLE, REG_V3, REG_V4, REG_V5, 90,
+                                INS_OPTS_SCALABLE_H); // CMLA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_R_I(INS_sve_cmla, EA_SCALABLE, REG_V6, REG_V7, REG_V8, 180,
+                                INS_OPTS_SCALABLE_S); // CMLA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_R_I(INS_sve_cmla, EA_SCALABLE, REG_V9, REG_V10, REG_V11, 270,
+                                INS_OPTS_SCALABLE_D); // CMLA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V12, REG_V13, REG_V14, 0,
+                                INS_OPTS_SCALABLE_B); // SQRDCMLAH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V15, REG_V16, REG_V17, 90,
+                                INS_OPTS_SCALABLE_H); // SQRDCMLAH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V18, REG_V19, REG_V20, 180,
+                                INS_OPTS_SCALABLE_S); // SQRDCMLAH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V21, REG_V22, REG_V23, 270,
+                                INS_OPTS_SCALABLE_D); // SQRDCMLAH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+    // IF_SVE_EW_3A
+    theEmitter->emitIns_R_R_R(INS_sve_mlapt, EA_SCALABLE, REG_V0, REG_V1, REG_V2); // MLAPT <Zda>.D, <Zn>.D, <Zm>.D
+
+    // IF_SVE_EW_3B
+    theEmitter->emitIns_R_R_R(INS_sve_madpt, EA_SCALABLE, REG_V3, REG_V4, REG_V5); // MADPT <Zdn>.D, <Zm>.D, <Za>.D
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+
     // IF_SVE_EY_3A
     theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V9, REG_V10, REG_V4, 0,
                                 INS_OPTS_SCALABLE_B); // SDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
@@ -5926,6 +7048,16 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V15, REG_V16, REG_V7, 3,
                                 INS_OPTS_SCALABLE_B); // UDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
 
+    // IF_SVE_EY_3B
+    theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V0, REG_V1, REG_V0,
+                                0); // SDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_sdot, EA_SCALABLE, REG_V2, REG_V3, REG_V5,
+                                1); // SDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V4, REG_V5, REG_V10,
+                                0); // UDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_udot, EA_SCALABLE, REG_V6, REG_V7, REG_V15,
+                                1); // UDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>]
+
     // IF_SVE_EZ_3A
     theEmitter->emitIns_R_R_R_I(INS_sve_sudot, EA_SCALABLE, REG_V17, REG_V18, REG_V0, 0,
                                 INS_OPTS_SCALABLE_B); // SUDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
@@ -6138,6 +7270,98 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R_I(INS_sve_mls, EA_SCALABLE, REG_V22, REG_V23, REG_V15, 1,
                                 INS_OPTS_SCALABLE_D); // MLS <Zda>.D, <Zn>.D, <Zm>.D[<imm>]
 
+    // IF_SVE_FK_3A
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlah, EA_SCALABLE, REG_V0, REG_V1, REG_V1, 1,
+                                INS_OPTS_SCALABLE_H); // SQRDMLAH <Zda>.H, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlah, EA_SCALABLE, REG_V2, REG_V3, REG_V3, 3,
+                                INS_OPTS_SCALABLE_H); // SQRDMLAH <Zda>.H, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V4, REG_V5, REG_V5, 5,
+                                INS_OPTS_SCALABLE_H); // SQRDMLSH <Zda>.H, <Zn>.H, <Zm>.H[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V6, REG_V7, REG_V7, 7,
+                                INS_OPTS_SCALABLE_H); // SQRDMLSH <Zda>.H, <Zn>.H, <Zm>.H[<imm>]
+
+    // IF_SVE_FK_3B
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlah, EA_SCALABLE, REG_V8, REG_V9, REG_V0, 0,
+                                INS_OPTS_SCALABLE_S); // SQRDMLAH <Zda>.S, <Zn>.S, <Zm>.S[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlah, EA_SCALABLE, REG_V10, REG_V11, REG_V2, 1,
+                                INS_OPTS_SCALABLE_S); // SQRDMLAH <Zda>.S, <Zn>.S, <Zm>.S[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V12, REG_V13, REG_V4, 2,
+                                INS_OPTS_SCALABLE_S); // SQRDMLSH <Zda>.S, <Zn>.S, <Zm>.S[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V14, REG_V15, REG_V6, 3,
+                                INS_OPTS_SCALABLE_S); // SQRDMLSH <Zda>.S, <Zn>.S, <Zm>.S[<imm>]
+
+    // IF_SVE_FK_3C
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlah, EA_SCALABLE, REG_V16, REG_V17, REG_V0, 0,
+                                INS_OPTS_SCALABLE_D); // SQRDMLAH <Zda>.D, <Zn>.D, <Zm>.D[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlah, EA_SCALABLE, REG_V18, REG_V19, REG_V5, 1,
+                                INS_OPTS_SCALABLE_D); // SQRDMLAH <Zda>.D, <Zn>.D, <Zm>.D[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V20, REG_V21, REG_V10, 0,
+                                INS_OPTS_SCALABLE_D); // SQRDMLSH <Zda>.D, <Zn>.D, <Zm>.D[<imm>]
+    theEmitter->emitIns_R_R_R_I(INS_sve_sqrdmlsh, EA_SCALABLE, REG_V22, REG_V23, REG_V15, 1,
+                                INS_OPTS_SCALABLE_D); // SQRDMLSH <Zda>.D, <Zn>.D, <Zm>.D[<imm>]
+
+    // IF_SVE_FR_2A
+    theEmitter->emitIns_R_R_I(INS_sve_sshllb, EA_SCALABLE, REG_V0, REG_V1, 1,
+                              INS_OPTS_SCALABLE_B); // SSHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sshllt, EA_SCALABLE, REG_V2, REG_V3, 3,
+                              INS_OPTS_SCALABLE_B); // SSHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_ushllb, EA_SCALABLE, REG_V4, REG_V5, 5,
+                              INS_OPTS_SCALABLE_B); // USHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_ushllt, EA_SCALABLE, REG_V6, REG_V7, 7,
+                              INS_OPTS_SCALABLE_B); // USHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sshllb, EA_SCALABLE, REG_V8, REG_V9, 0,
+                              INS_OPTS_SCALABLE_H); // SSHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sshllt, EA_SCALABLE, REG_V10, REG_V11, 5,
+                              INS_OPTS_SCALABLE_H); // SSHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_ushllb, EA_SCALABLE, REG_V12, REG_V13, 10,
+                              INS_OPTS_SCALABLE_H); // USHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_ushllt, EA_SCALABLE, REG_V14, REG_V15, 15,
+                              INS_OPTS_SCALABLE_H); // USHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sshllb, EA_SCALABLE, REG_V16, REG_V17, 8,
+                              INS_OPTS_SCALABLE_S); // SSHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sshllt, EA_SCALABLE, REG_V18, REG_V19, 16,
+                              INS_OPTS_SCALABLE_S); // SSHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_ushllb, EA_SCALABLE, REG_V20, REG_V21, 24,
+                              INS_OPTS_SCALABLE_S); // USHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_ushllt, EA_SCALABLE, REG_V22, REG_V23, 31,
+                              INS_OPTS_SCALABLE_S); // USHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
+
+    // IF_SVE_FV_2A
+    theEmitter->emitIns_R_R_I(INS_sve_cadd, EA_SCALABLE, REG_V0, REG_V1, 90,
+                              INS_OPTS_SCALABLE_B); // CADD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_I(INS_sve_cadd, EA_SCALABLE, REG_V2, REG_V3, 90,
+                              INS_OPTS_SCALABLE_H); // CADD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_I(INS_sve_cadd, EA_SCALABLE, REG_V4, REG_V5, 270,
+                              INS_OPTS_SCALABLE_S); // CADD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_I(INS_sve_cadd, EA_SCALABLE, REG_V6, REG_V7, 270,
+                              INS_OPTS_SCALABLE_D); // CADD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_I(INS_sve_sqcadd, EA_SCALABLE, REG_V8, REG_V9, 270,
+                              INS_OPTS_SCALABLE_B); // SQCADD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_I(INS_sve_sqcadd, EA_SCALABLE, REG_V10, REG_V11, 270,
+                              INS_OPTS_SCALABLE_H); // SQCADD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_I(INS_sve_sqcadd, EA_SCALABLE, REG_V12, REG_V13, 90,
+                              INS_OPTS_SCALABLE_S); // SQCADD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const>
+    theEmitter->emitIns_R_R_I(INS_sve_sqcadd, EA_SCALABLE, REG_V14, REG_V15, 90,
+                              INS_OPTS_SCALABLE_D); // SQCADD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const>
+
+    // IF_SVE_FY_3A
+    theEmitter->emitIns_R_R_R(INS_sve_adclb, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_S); // ADCLB <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_adclb, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_D); // ADCLB <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_adclt, EA_SCALABLE, REG_V6, REG_V7, REG_V8,
+                              INS_OPTS_SCALABLE_S); // ADCLT <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_adclt, EA_SCALABLE, REG_V9, REG_V10, REG_V11,
+                              INS_OPTS_SCALABLE_D); // ADCLT <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_sbclb, EA_SCALABLE, REG_V12, REG_V13, REG_V14,
+                              INS_OPTS_SCALABLE_S); // SBCLB <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_sbclb, EA_SCALABLE, REG_V15, REG_V16, REG_V17,
+                              INS_OPTS_SCALABLE_D); // SBCLB <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_sbclt, EA_SCALABLE, REG_V18, REG_V19, REG_V20,
+                              INS_OPTS_SCALABLE_S); // SBCLT <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R(INS_sve_sbclt, EA_SCALABLE, REG_V21, REG_V22, REG_V23,
+                              INS_OPTS_SCALABLE_D); // SBCLT <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+
     // IF_SVE_ED_1A
     theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128,
                             INS_OPTS_SCALABLE_B); // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm>
@@ -6166,64 +7390,74 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_I(INS_sve_mul, EA_SCALABLE, REG_V3, 127,
                             INS_OPTS_SCALABLE_D); // MUL <Zdn>.<T>, <Zdn>.<T>, #<imm>
 
+    // IF_SVE_EF_3A
+    theEmitter->emitIns_R_R_R(INS_sve_sdot, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_H); // SDOT <Zda>.S, <Zn>.H, <Zm>.H
+    theEmitter->emitIns_R_R_R(INS_sve_udot, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_H); // UDOT <Zda>.S, <Zn>.H, <Zm>.H
+
+    // IF_SVE_EI_3A
+    theEmitter->emitIns_R_R_R(INS_sve_usdot, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_B); // USDOT <Zda>.S, <Zn>.B, <Zm>.B
+
     // IF_SVE_FA_3A
     theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V0, REG_V7, REG_V1, 3, 0,
                                   INS_OPTS_SCALABLE_B); // CDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V2, REG_V5, REG_V3, 2, 1,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V2, REG_V5, REG_V3, 2, 90,
                                   INS_OPTS_SCALABLE_B); // CDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V4, REG_V3, REG_V5, 1, 2,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V4, REG_V3, REG_V5, 1, 180,
                                   INS_OPTS_SCALABLE_B); // CDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V6, REG_V1, REG_V7, 0, 3,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V6, REG_V1, REG_V7, 0, 270,
                                   INS_OPTS_SCALABLE_B); // CDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>], <const>
 
     // IF_SVE_FA_3B
     theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, 0,
                                   INS_OPTS_SCALABLE_H); // CDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 1,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 90,
                                   INS_OPTS_SCALABLE_H); // CDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 2,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 180,
                                   INS_OPTS_SCALABLE_H); // CDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 3,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_cdot, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 270,
                                   INS_OPTS_SCALABLE_H); // CDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>], <const>
 
     // IF_SVE_FB_3A
     theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V0, REG_V7, REG_V1, 3, 0,
                                   INS_OPTS_SCALABLE_H); // CMLA <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V2, REG_V5, REG_V3, 2, 1,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V2, REG_V5, REG_V3, 2, 90,
                                   INS_OPTS_SCALABLE_H); // CMLA <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V4, REG_V3, REG_V5, 1, 2,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V4, REG_V3, REG_V5, 1, 180,
                                   INS_OPTS_SCALABLE_H); // CMLA <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V6, REG_V1, REG_V7, 0, 3,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V6, REG_V1, REG_V7, 0, 270,
                                   INS_OPTS_SCALABLE_H); // CMLA <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
 
     // IF_SVE_FB_3B
     theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, 0,
                                   INS_OPTS_SCALABLE_S); // CMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 1,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 90,
                                   INS_OPTS_SCALABLE_S); // CMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 2,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 180,
                                   INS_OPTS_SCALABLE_S); // CMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 3,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_cmla, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 270,
                                   INS_OPTS_SCALABLE_S); // CMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
 
     // IF_SVE_FC_3A
     theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V0, REG_V7, REG_V1, 3, 0,
                                   INS_OPTS_SCALABLE_H); // SQRDCMLAH <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V2, REG_V5, REG_V3, 2, 1,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V2, REG_V5, REG_V3, 2, 90,
                                   INS_OPTS_SCALABLE_H); // SQRDCMLAH <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V4, REG_V3, REG_V5, 1, 2,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V4, REG_V3, REG_V5, 1, 180,
                                   INS_OPTS_SCALABLE_H); // SQRDCMLAH <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V6, REG_V1, REG_V7, 0, 3,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V6, REG_V1, REG_V7, 0, 270,
                                   INS_OPTS_SCALABLE_H); // SQRDCMLAH <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
 
     // IF_SVE_FC_3B
     theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, 0,
                                   INS_OPTS_SCALABLE_S); // SQRDCMLAH <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 1,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 90,
                                   INS_OPTS_SCALABLE_S); // SQRDCMLAH <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 2,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 180,
                                   INS_OPTS_SCALABLE_S); // SQRDCMLAH <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
-    theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 3,
+    theEmitter->emitIns_R_R_R_I_I(INS_sve_sqrdcmlah, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 270,
                                   INS_OPTS_SCALABLE_S); // SQRDCMLAH <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
 
     // IF_SVE_IH_3A
@@ -7223,6 +8457,18 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P3, REG_V0, REG_V6, 270,
                                   INS_OPTS_SCALABLE_D); // FCMLA   <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const>
 
+    // IF_SVE_GI_4A
+    theEmitter->emitIns_R_R_R_R(INS_sve_histcnt, EA_SCALABLE, REG_V0, REG_P0, REG_V1, REG_V2,
+                                INS_OPTS_SCALABLE_S); // HISTCNT <Zd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+    theEmitter->emitIns_R_R_R_R(INS_sve_histcnt, EA_SCALABLE, REG_V3, REG_P7, REG_V4, REG_V5,
+                                INS_OPTS_SCALABLE_D); // HISTCNT <Zd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+
+    // IF_SVE_GJ_3A
+    theEmitter->emitIns_R_R_R(INS_sve_rax1, EA_SCALABLE, REG_V0, REG_V1, REG_V2,
+                              INS_OPTS_SCALABLE_D); // RAX1 <Zd>.D, <Zn>.D, <Zm>.D
+    theEmitter->emitIns_R_R_R(INS_sve_sm4ekey, EA_SCALABLE, REG_V3, REG_V4, REG_V5,
+                              INS_OPTS_SCALABLE_S); // SM4EKEY <Zd>.S, <Zn>.S, <Zm>.S
+
     // IF_SVE_HI_3A
     theEmitter->emitIns_R_R_R(INS_sve_fcmeq, EA_SCALABLE, REG_P2, REG_P3, REG_V4,
                               INS_OPTS_SCALABLE_H); // FCMEQ   <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
@@ -7339,6 +8585,628 @@ void CodeGen::genArm64EmitterUnitTestsSve()
                               INS_SCALABLE_OPTS_UNPREDICATED);
     theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, 255, INS_OPTS_NONE,
                               INS_SCALABLE_OPTS_UNPREDICATED);
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+    // IF_SVE_GG_3A
+    // LUTI2 <Zd>.B, {<Zn>.B }, <Zm>[<index>]
+    // luti2   z0.b, {z0.b}, z0[0]  // 01000101-00100000-10110000-00000000
+    // CHECK-INST: luti2   z0.b, { z0.b }, z0[0]
+    // CHECK-ENCODING: [0x00,0xb0,0x20,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti2, EA_SCALABLE, REG_V0, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_B);
+    // luti2   z21.b, {z10.b}, z21[1]  // 01000101-01110101-10110001-01010101
+    // CHECK-INST: luti2   z21.b, { z10.b }, z21[1]
+    // CHECK-ENCODING: [0x55,0xb1,0x75,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti2, EA_SCALABLE, REG_V21, REG_V10, REG_V21, 1, INS_OPTS_SCALABLE_B);
+
+    // IF_SVE_GH_3B
+    // LUTI4 <Zd>.H, {<Zn1>.H, <Zn2>.H }, <Zm>[<index>]
+    // luti4   z0.h, {z0.h, z1.h}, z0[0]  // 01000101-00100000-10110100-00000000
+    // CHECK-INST: luti4   z0.h, { z0.h, z1.h }, z0[0]
+    // CHECK-ENCODING: [0x00,0xb4,0x20,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti4, EA_SCALABLE, REG_V0, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_H, EA_UNKNOWN,
+                                INS_SCALABLE_OPTS_WITH_VECTOR_PAIR);
+    // luti4   z21.h, {z10.h, z11.h}, z21[1]  // 01000101-01110101-10110101-01010101
+    // CHECK-INST: luti4   z21.h, { z10.h, z11.h }, z21[1]
+    // CHECK-ENCODING: [0x55,0xb5,0x75,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti4, EA_SCALABLE, REG_V21, REG_V10, REG_V21, 1, INS_OPTS_SCALABLE_H,
+                                EA_UNKNOWN, INS_SCALABLE_OPTS_WITH_VECTOR_PAIR);
+    // luti4   z31.h, {z31.h, z0.h}, z31[3]  // 01000101-11111111-10110111-11111111
+    // CHECK-INST: luti4   z31.h, { z31.h, z0.h }, z31[3]
+    // CHECK-ENCODING: [0xff,0xb7,0xff,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti4, EA_SCALABLE, REG_V31, REG_V31, REG_V31, 3, INS_OPTS_SCALABLE_H,
+                                EA_UNKNOWN, INS_SCALABLE_OPTS_WITH_VECTOR_PAIR);
+
+    // IF_SVE_GH_3B_B
+    // LUTI4 <Zd>.H, {<Zn>.H }, <Zm>[<index>]
+    // luti4   z0.h, {z0.h}, z0[0]  // 01000101-00100000-10111100-00000000
+    // CHECK-INST: luti4   z0.h, { z0.h }, z0[0]
+    // CHECK-ENCODING: [0x00,0xbc,0x20,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti4, EA_SCALABLE, REG_V0, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_H);
+    // luti4   z21.h, {z10.h}, z21[1]  // 01000101-01110101-10111101-01010101
+    // CHECK-INST: luti4   z21.h, { z10.h }, z21[1]
+    // CHECK-ENCODING: [0x55,0xbd,0x75,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti4, EA_SCALABLE, REG_V21, REG_V10, REG_V21, 1, INS_OPTS_SCALABLE_H);
+    // luti4   z31.h, {z31.h}, z31[3]  // 01000101-11111111-10111111-11111111
+    // CHECK-INST: luti4   z31.h, { z31.h }, z31[3]
+    // CHECK-ENCODING: [0xff,0xbf,0xff,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti4, EA_SCALABLE, REG_V31, REG_V31, REG_V31, 3, INS_OPTS_SCALABLE_H);
+
+    // IF_SVE_GG_3B
+    // LUTI2 <Zd>.H, {<Zn>.H }, <Zm>[<index>]
+    // luti2   z0.h, {z0.h}, z0[0]  // 01000101-00100000-10101000-00000000
+    // CHECK-INST: luti2   z0.h, { z0.h }, z0[0]
+    // CHECK-ENCODING: [0x00,0xa8,0x20,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti2, EA_SCALABLE, REG_V0, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_H);
+    // luti2   z21.h, {z10.h}, z21[3]  // 01000101-01110101-10111001-01010101
+    // CHECK-INST: luti2   z21.h, { z10.h }, z21[3]
+    // CHECK-ENCODING: [0x55,0xb9,0x75,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti2, EA_SCALABLE, REG_V21, REG_V10, REG_V21, 3, INS_OPTS_SCALABLE_H);
+    // luti2 z31.h, {z31.h}, z31[7]                      // 01000101-11111111-10111011-11111111
+    // CHECK-INST: luti2   z31.h, { z31.h }, z31[7]
+    // CHECK-ENCODING: [0xff,0xbb,0xff,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti2, EA_SCALABLE, REG_V31, REG_V31, REG_V31, 7, INS_OPTS_SCALABLE_H);
+
+    // IF_SVE_GH_3A
+    // LUTI4 <Zd>.B, {<Zn>.B }, <Zm>[<index>]
+    // luti4   z0.b, {z0.b}, z0[0]  // 01000101-01100000-10100100-00000000
+    // CHECK-INST: luti4   z0.b, { z0.b }, z0[0]
+    // CHECK-ENCODING: [0x00,0xa4,0x60,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti4, EA_SCALABLE, REG_V0, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_B);
+    // luti4   z31.b, {z31.b}, z31[1]  // 01000101-11111111-10100111-11111111
+    // CHECK-INST: luti4   z31.b, { z31.b }, z31[1]
+    // CHECK-ENCODING: [0xff,0xa7,0xff,0x45]
+    theEmitter->emitIns_R_R_R_I(INS_sve_luti4, EA_SCALABLE, REG_V31, REG_V31, REG_V31, 1, INS_OPTS_SCALABLE_B);
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+
+    // IF_SVE_HY_3A
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfd, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P6, REG_R5, REG_V4,
+                                    INS_OPTS_SCALABLE_S_UXTW,
+                                    INS_SCALABLE_OPTS_MOD_N); // PRFD    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #3]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfh, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P7, REG_R8, REG_V9,
+                                    INS_OPTS_SCALABLE_S_SXTW,
+                                    INS_SCALABLE_OPTS_MOD_N); // PRFH    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfw, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P0, REG_R2, REG_V1,
+                                    INS_OPTS_SCALABLE_S_UXTW,
+                                    INS_SCALABLE_OPTS_MOD_N); // PRFW    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL1STRM, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL2KEEP, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL2STRM, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL3KEEP, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL3STRM, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PSTL1KEEP, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PSTL1STRM, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PSTL2KEEP, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PSTL2STRM, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PSTL3KEEP, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PSTL3STRM, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_CONST6, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_SXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_CONST7, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_SXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_CONST14, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_CONST15, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_S_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+
+    // IF_SVE_HY_3A_A
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P1, REG_R2, REG_V3,
+                                    INS_OPTS_SCALABLE_D_UXTW); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfd, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P6, REG_R5, REG_V4,
+                                    INS_OPTS_SCALABLE_D_UXTW,
+                                    INS_SCALABLE_OPTS_MOD_N); // PRFD    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfh, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P7, REG_R8, REG_V9,
+                                    INS_OPTS_SCALABLE_D_SXTW,
+                                    INS_SCALABLE_OPTS_MOD_N); // PRFH    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfw, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P0, REG_R2, REG_V1,
+                                    INS_OPTS_SCALABLE_D_UXTW,
+                                    INS_SCALABLE_OPTS_MOD_N); // PRFW    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2]
+
+    // IF_SVE_HY_3B
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P0, REG_R1, REG_V2,
+                                    INS_OPTS_SCALABLE_D); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfd, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P7, REG_R4, REG_V3,
+                                    INS_OPTS_SCALABLE_D,
+                                    INS_SCALABLE_OPTS_LSL_N); // PRFD    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfh, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P6, REG_R5, REG_V4,
+                                    INS_OPTS_SCALABLE_D,
+                                    INS_SCALABLE_OPTS_LSL_N); // PRFH    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfw, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P3, REG_R2, REG_V1,
+                                    INS_OPTS_SCALABLE_D,
+                                    INS_SCALABLE_OPTS_LSL_N); // PRFW    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2]
+
+    // IF_SVE_HZ_2A_B
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P1, REG_V2, 0,
+                                    INS_OPTS_SCALABLE_S); // PRFB    <prfop>, <Pg>, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfd, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P4, REG_V3, 248,
+                                    INS_OPTS_SCALABLE_S); // PRFD    <prfop>, <Pg>, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfh, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P0, REG_V4, 62,
+                                    INS_OPTS_SCALABLE_S); // PRFH    <prfop>, <Pg>, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfw, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P7, REG_V5, 124,
+                                    INS_OPTS_SCALABLE_S); // PRFW    <prfop>, <Pg>, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P1, REG_V2, 31,
+                                    INS_OPTS_SCALABLE_D); // PRFB    <prfop>, <Pg>, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfd, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P4, REG_V3, 248,
+                                    INS_OPTS_SCALABLE_D); // PRFD    <prfop>, <Pg>, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfh, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P0, REG_V4, 62,
+                                    INS_OPTS_SCALABLE_D); // PRFH    <prfop>, <Pg>, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfw, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P7, REG_V5, 124,
+                                    INS_OPTS_SCALABLE_D); // PRFW    <prfop>, <Pg>, [<Zn>.D{, #<imm>}]
+
+    // IF_SVE_IA_2A
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P2, REG_R3,
+                                    -32); // PRFB    <prfop>, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfd, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P7, REG_R4,
+                                    31); // PRFD    <prfop>, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfh, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P1, REG_R2,
+                                    0); // PRFH    <prfop>, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfw, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P0, REG_R5,
+                                    -32); // PRFW    <prfop>, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+    theEmitter->emitIns_PRFOP_R_R_I(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P2, REG_R3,
+                                    17); // PRFB    <prfop>, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+
+    // IF_SVE_IB_3A
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfb, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P0, REG_R1,
+                                    REG_R2); // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Xm>]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfd, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P5, REG_R4, REG_R3,
+                                    INS_OPTS_NONE,
+                                    INS_SCALABLE_OPTS_LSL_N); // PRFD    <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfh, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P6, REG_R7, REG_R8,
+                                    INS_OPTS_NONE,
+                                    INS_SCALABLE_OPTS_LSL_N); // PRFH    <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #1]
+    theEmitter->emitIns_PRFOP_R_R_R(INS_sve_prfw, EA_SCALABLE, SVE_PRFOP_PLDL1KEEP, REG_P7, REG_R1, REG_R9,
+                                    INS_OPTS_NONE,
+                                    INS_SCALABLE_OPTS_LSL_N); // PRFW    <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
+    // IF_SVE_HX_3A_B
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V0, REG_P0, REG_V1, 0,
+                                INS_OPTS_SCALABLE_S); // LD1B    {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V2, REG_P7, REG_V3, 5,
+                                INS_OPTS_SCALABLE_S); // LD1SB   {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ldff1b, EA_SCALABLE, REG_V4, REG_P3, REG_V1, 5,
+                                INS_OPTS_SCALABLE_S); // LDFF1B  {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sb, EA_SCALABLE, REG_V2, REG_P6, REG_V0, 31,
+                                INS_OPTS_SCALABLE_S); // LDFF1SB {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V0, REG_P0, REG_V1, 0,
+                                INS_OPTS_SCALABLE_D); // LD1B    {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V2, REG_P7, REG_V3, 5,
+                                INS_OPTS_SCALABLE_D); // LD1SB   {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ldff1b, EA_SCALABLE, REG_V4, REG_P3, REG_V1, 5,
+                                INS_OPTS_SCALABLE_D); // LDFF1B  {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sb, EA_SCALABLE, REG_V2, REG_P6, REG_V0, 31,
+                                INS_OPTS_SCALABLE_D); // LDFF1SB {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+
+    // IF_SVE_HX_3A_E
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V1, REG_P0, REG_V2, 0,
+                                INS_OPTS_SCALABLE_S); // LD1H    {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V2, REG_P4, REG_V3, 2,
+                                INS_OPTS_SCALABLE_S); // LD1SH   {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V1, REG_P2, REG_V9, 124,
+                                INS_OPTS_SCALABLE_S); // LD1W    {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ldff1h, EA_SCALABLE, REG_V4, REG_P7, REG_V3, 6,
+                                INS_OPTS_SCALABLE_S); // LDFF1H  {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sh, EA_SCALABLE, REG_V3, REG_P5, REG_V4, 62,
+                                INS_OPTS_SCALABLE_S); // LDFF1SH {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ldff1w, EA_SCALABLE, REG_V2, REG_P1, REG_V3, 124,
+                                INS_OPTS_SCALABLE_S); // LDFF1W  {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V1, REG_P0, REG_V2, 0,
+                                INS_OPTS_SCALABLE_D); // LD1H    {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V2, REG_P4, REG_V3, 2,
+                                INS_OPTS_SCALABLE_D); // LD1SH   {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V1, REG_P2, REG_V9, 124,
+                                INS_OPTS_SCALABLE_D); // LD1W    {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ldff1h, EA_SCALABLE, REG_V4, REG_P7, REG_V3, 6,
+                                INS_OPTS_SCALABLE_D); // LDFF1H  {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sh, EA_SCALABLE, REG_V3, REG_P5, REG_V4, 62,
+                                INS_OPTS_SCALABLE_D); // LDFF1SH {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ldff1w, EA_SCALABLE, REG_V2, REG_P1, REG_V3, 124,
+                                INS_OPTS_SCALABLE_D); // LDFF1W  {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+
+    // IF_SVE_IV_3A
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 0,
+                                INS_OPTS_SCALABLE_D); // LD1D    {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1sw, EA_SCALABLE, REG_V6, REG_P5, REG_V4, 0,
+                                INS_OPTS_SCALABLE_D); // LD1SW   {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ldff1d, EA_SCALABLE, REG_V7, REG_P3, REG_V1, 248,
+                                INS_OPTS_SCALABLE_D); // LDFF1D  {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sw, EA_SCALABLE, REG_V2, REG_P0, REG_V4, 124,
+                                INS_OPTS_SCALABLE_D); // LDFF1SW {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+
+    // IF_SVE_JI_3A_A
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 0,
+                                INS_OPTS_SCALABLE_S); // ST1B    {<Zt>.S }, <Pg>, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 31,
+                                INS_OPTS_SCALABLE_S); // ST1B    {<Zt>.S }, <Pg>, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P3, REG_V2, 0,
+                                INS_OPTS_SCALABLE_S); // ST1H    {<Zt>.S }, <Pg>, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P3, REG_V2, 62,
+                                INS_OPTS_SCALABLE_S); // ST1H    {<Zt>.S }, <Pg>, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V5, REG_P4, REG_V1, 0,
+                                INS_OPTS_SCALABLE_S); // ST1W    {<Zt>.S }, <Pg>, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V5, REG_P4, REG_V1, 124,
+                                INS_OPTS_SCALABLE_S); // ST1W    {<Zt>.S }, <Pg>, [<Zn>.S{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 0,
+                                INS_OPTS_SCALABLE_D); // ST1B    {<Zt>.D }, <Pg>, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 31,
+                                INS_OPTS_SCALABLE_D); // ST1B    {<Zt>.D }, <Pg>, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P3, REG_V2, 0,
+                                INS_OPTS_SCALABLE_D); // ST1H    {<Zt>.D }, <Pg>, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P3, REG_V2, 62,
+                                INS_OPTS_SCALABLE_D); // ST1H    {<Zt>.D }, <Pg>, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V5, REG_P4, REG_V1, 0,
+                                INS_OPTS_SCALABLE_D); // ST1W    {<Zt>.D }, <Pg>, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V5, REG_P4, REG_V1, 124,
+                                INS_OPTS_SCALABLE_D); // ST1W    {<Zt>.D }, <Pg>, [<Zn>.D{, #<imm>}]
+
+    // IF_SVE_JL_3A
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P7, REG_V4, 0,
+                                INS_OPTS_SCALABLE_D); // ST1D    {<Zt>.D }, <Pg>, [<Zn>.D{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P7, REG_V4, 248,
+                                INS_OPTS_SCALABLE_D); // ST1D    {<Zt>.D }, <Pg>, [<Zn>.D{, #<imm>}]
+
+    // IF_SVE_IC_3A
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rd, EA_SCALABLE, REG_V1, REG_P2, REG_R3, 504,
+                                INS_OPTS_SCALABLE_D); // LD1RD   {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsw, EA_SCALABLE, REG_V4, REG_P5, REG_R6, 252,
+                                INS_OPTS_SCALABLE_D); // LD1RSW  {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+
+    // IF_SVE_IC_3A_A
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsh, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 0,
+                                INS_OPTS_SCALABLE_S); // LD1RSH  {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 0,
+                                INS_OPTS_SCALABLE_S); // LD1RW   {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsh, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 126,
+                                INS_OPTS_SCALABLE_D); // LD1RSH  {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 252,
+                                INS_OPTS_SCALABLE_D); // LD1RW   {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+
+    // IF_SVE_IC_3A_B
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 0,
+                                INS_OPTS_SCALABLE_H); // LD1RH   {<Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V6, REG_P5, REG_R4, 0,
+                                INS_OPTS_SCALABLE_H); // LD1RSB  {<Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 126,
+                                INS_OPTS_SCALABLE_S); // LD1RH   {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V2, REG_P1, REG_R0, 63,
+                                INS_OPTS_SCALABLE_S); // LD1RSB  {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 126,
+                                INS_OPTS_SCALABLE_D); // LD1RH   {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V4, REG_P5, REG_R6, 63,
+                                INS_OPTS_SCALABLE_D); // LD1RSB  {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+
+    // IF_SVE_IC_3A_C
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V1, REG_P2, REG_R3, 0,
+                                INS_OPTS_SCALABLE_B); // LD1RB   {<Zt>.B }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 63,
+                                INS_OPTS_SCALABLE_H); // LD1RB   {<Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V6, REG_P7, REG_R8, 0,
+                                INS_OPTS_SCALABLE_S); // LD1RB   {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+    theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V1, REG_P0, REG_R9, 63,
+                                INS_OPTS_SCALABLE_B); // LD1RB   {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+
+    // IF_SVE_HF_2A
+    // FRECPE  <Zd>.<T>, <Zn>.<T>
+    theEmitter->emitIns_R_R(INS_sve_frecpe, EA_SCALABLE, REG_V0, REG_V2, INS_OPTS_SCALABLE_H);
+    // FRSQRTE <Zd>.<T>, <Zn>.<T>
+    theEmitter->emitIns_R_R(INS_sve_frsqrte, EA_SCALABLE, REG_V5, REG_V3, INS_OPTS_SCALABLE_S);
+    // FRSQRTE <Zd>.<T>, <Zn>.<T>
+    theEmitter->emitIns_R_R(INS_sve_frsqrte, EA_SCALABLE, REG_V9, REG_V5, INS_OPTS_SCALABLE_D);
+
+    // IF_SVE_CH_2A
+    // SUNPKHI <Zd>.<T>, <Zn>.<Tb>
+    theEmitter->emitIns_R_R(INS_sve_sunpkhi, EA_SCALABLE, REG_V2, REG_V4, INS_OPTS_SCALABLE_H);
+    // SUNPKLO <Zd>.<T>, <Zn>.<Tb>
+    theEmitter->emitIns_R_R(INS_sve_sunpklo, EA_SCALABLE, REG_V1, REG_V5, INS_OPTS_SCALABLE_S);
+    // UUNPKHI <Zd>.<T>, <Zn>.<Tb>
+    theEmitter->emitIns_R_R(INS_sve_uunpkhi, EA_SCALABLE, REG_V5, REG_V1, INS_OPTS_SCALABLE_D);
+    // UUNPKLO <Zd>.<T>, <Zn>.<Tb>
+    theEmitter->emitIns_R_R(INS_sve_uunpklo, EA_SCALABLE, REG_V8, REG_V6, INS_OPTS_SCALABLE_S);
+
+    // IF_SVE_CG_2A
+    // REV     <Zd>.<T>, <Zn>.<T>
+    theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_V2, REG_V3, INS_OPTS_SCALABLE_B,
+                            INS_SCALABLE_OPTS_UNPREDICATED);
+    // REV     <Zd>.<T>, <Zn>.<T>
+    theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_V2, REG_V4, INS_OPTS_SCALABLE_H,
+                            INS_SCALABLE_OPTS_UNPREDICATED);
+    // REV     <Zd>.<T>, <Zn>.<T>
+    theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_V7, REG_V1, INS_OPTS_SCALABLE_S,
+                            INS_SCALABLE_OPTS_UNPREDICATED);
+    // REV     <Zd>.<T>, <Zn>.<T>
+    theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_V2, REG_V5, INS_OPTS_SCALABLE_D,
+                            INS_SCALABLE_OPTS_UNPREDICATED);
+
+    // IF_SVE_CB_2A
+    // Note: EA_4BYTE used for B and H (source register is W)
+    // DUP     <Zd>.<T>, <R><n|SP>
+    theEmitter->emitIns_R_R(INS_sve_dup, EA_4BYTE, REG_V0, REG_R1, INS_OPTS_SCALABLE_B);
+    // DUP     <Zd>.<T>, <R><n|SP>
+    theEmitter->emitIns_R_R(INS_sve_dup, EA_4BYTE, REG_V2, REG_R3, INS_OPTS_SCALABLE_H);
+    // DUP     <Zd>.<T>, <R><n|SP>
+    theEmitter->emitIns_R_R(INS_sve_dup, EA_4BYTE, REG_V1, REG_R5, INS_OPTS_SCALABLE_S);
+    // DUP     <Zd>.<T>, <R><n|SP>
+    theEmitter->emitIns_R_R(INS_sve_dup, EA_8BYTE, REG_V4, REG_SP, INS_OPTS_SCALABLE_D);
+    // MOV     <Zd>.<T>, <R><n|SP>
+    theEmitter->emitIns_R_R(INS_sve_mov, EA_4BYTE, REG_V4, REG_R2, INS_OPTS_SCALABLE_B);
+    // MOV     <Zd>.<T>, <R><n|SP>
+    theEmitter->emitIns_R_R(INS_sve_mov, EA_4BYTE, REG_V4, REG_R2, INS_OPTS_SCALABLE_H);
+    // MOV     <Zd>.<T>, <R><n|SP>
+    theEmitter->emitIns_R_R(INS_sve_mov, EA_4BYTE, REG_V1, REG_R3, INS_OPTS_SCALABLE_S);
+    // MOV     <Zd>.<T>, <R><n|SP>
+    theEmitter->emitIns_R_R(INS_sve_mov, EA_8BYTE, REG_V5, REG_SP, INS_OPTS_SCALABLE_D);
+    // MOV     <Zd>.<T>, <R><n|SP>
+    theEmitter->emitIns_R_R(INS_sve_mov, EA_8BYTE, REG_V2, REG_R9, INS_OPTS_SCALABLE_D);
+
+    // IF_SVE_BJ_2A
+    // FEXPA   <Zd>.<T>, <Zn>.<T>
+    theEmitter->emitIns_R_R(INS_sve_fexpa, EA_SCALABLE, REG_V0, REG_V1, INS_OPTS_SCALABLE_H);
+    // FEXPA   <Zd>.<T>, <Zn>.<T>
+    theEmitter->emitIns_R_R(INS_sve_fexpa, EA_SCALABLE, REG_V3, REG_V0, INS_OPTS_SCALABLE_S);
+    // FEXPA   <Zd>.<T>, <Zn>.<T>
+    theEmitter->emitIns_R_R(INS_sve_fexpa, EA_SCALABLE, REG_V1, REG_V0, INS_OPTS_SCALABLE_D);
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+    // IF_SVE_HH_2A
+    // BF1CVT  <Zd>.H, <Zn>.B
+    theEmitter->emitIns_R_R(INS_sve_bf1cvt, EA_SCALABLE, REG_V2, REG_V3, INS_OPTS_SCALABLE_H);
+    // BF1CVTLT <Zd>.H, <Zn>.B
+    theEmitter->emitIns_R_R(INS_sve_bf1cvtlt, EA_SCALABLE, REG_V1, REG_V5, INS_OPTS_SCALABLE_H);
+    // BF2CVT  <Zd>.H, <Zn>.B
+    theEmitter->emitIns_R_R(INS_sve_bf2cvt, EA_SCALABLE, REG_V6, REG_V2, INS_OPTS_SCALABLE_H);
+    // BF2CVTLT <Zd>.H, <Zn>.B
+    theEmitter->emitIns_R_R(INS_sve_bf2cvtlt, EA_SCALABLE, REG_V3, REG_V1, INS_OPTS_SCALABLE_H);
+    // F1CVT   <Zd>.H, <Zn>.B
+    theEmitter->emitIns_R_R(INS_sve_f1cvt, EA_SCALABLE, REG_V6, REG_V7, INS_OPTS_SCALABLE_H);
+    // F1CVTLT <Zd>.H, <Zn>.B
+    theEmitter->emitIns_R_R(INS_sve_f1cvtlt, EA_SCALABLE, REG_V1, REG_V8, INS_OPTS_SCALABLE_H);
+    // F2CVT   <Zd>.H, <Zn>.B
+    theEmitter->emitIns_R_R(INS_sve_f2cvt, EA_SCALABLE, REG_V3, REG_V4, INS_OPTS_SCALABLE_H);
+    // F2CVTLT <Zd>.H, <Zn>.B
+    theEmitter->emitIns_R_R(INS_sve_f2cvtlt, EA_SCALABLE, REG_V1, REG_V2, INS_OPTS_SCALABLE_H);
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+
+    // IF_SVE_BI_2A
+    // MOVPRFX <Zd>, <Zn>
+    theEmitter->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, REG_V3, REG_V5);
+
+    // IF_SVE_BF_2A
+    // ASR <Zd>.<T>, <Zn>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 7, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 8, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 5, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 16, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 9, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 32, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 15, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 64, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 33, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    // LSL <Zd>.<T>, <Zn>.<T>, #<const
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V31, REG_V31, 7, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 5, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V31, REG_V31, 15, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 9, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V31, REG_V31, 31, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 15, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V31, REG_V31, 63, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 33, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    // LSR <Zd>.<T>, <Zn>.<T>, #<const
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V31, REG_V31, 8, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 5, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V31, REG_V31, 16, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 9, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V31, REG_V31, 32, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 15, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V31, REG_V31, 64, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 33, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+    theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D,
+                              INS_SCALABLE_OPTS_UNPREDICATED);
+
+    // IF_SVE_FT_2A
+    // SLI <Zd>.<T>, <Zn>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V31, REG_V31, 7, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V0, REG_V31, 3, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V31, REG_V31, 15, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V0, REG_V31, 7, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V31, REG_V31, 31, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V0, REG_V31, 17, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V31, REG_V31, 63, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V0, REG_V31, 31, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_sli, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D);
+    // SRI <Zd>.<T>, <Zn>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V31, REG_V31, 8, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V0, REG_V31, 3, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V31, REG_V31, 16, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V0, REG_V31, 7, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V31, REG_V31, 32, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V0, REG_V31, 17, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V31, REG_V31, 64, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V0, REG_V31, 31, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_sri, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D);
+
+    // IF_SVE_FU_2A
+    // SRSRA <Zda>.<T>, <Zn>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V31, REG_V31, 8, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V0, REG_V31, 3, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V31, REG_V31, 16, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V0, REG_V31, 7, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V31, REG_V31, 32, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V0, REG_V31, 17, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V31, REG_V31, 64, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V0, REG_V31, 31, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_srsra, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D);
+    // SSRA <Zda>.<T>, <Zn>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V31, REG_V31, 8, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V0, REG_V31, 3, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V31, REG_V31, 16, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V0, REG_V31, 7, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V31, REG_V31, 32, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V0, REG_V31, 17, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V31, REG_V31, 64, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V0, REG_V31, 31, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_ssra, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D);
+    // URSRA <Zda>.<T>, <Zn>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V31, REG_V31, 8, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V0, REG_V31, 3, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V31, REG_V31, 16, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V0, REG_V31, 7, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V31, REG_V31, 32, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V0, REG_V31, 17, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V31, REG_V31, 64, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V0, REG_V31, 31, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_ursra, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D);
+    // USRA <Zda>.<T>, <Zn>.<T>, #<const>
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V31, REG_V31, 8, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V31, 3, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V31, REG_V31, 16, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V31, 7, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V31, REG_V31, 32, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V31, 17, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V31, REG_V31, 64, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V31, 31, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D);
+
+    // IF_SVE_BX_2A
+    // DUPQ    <Zd>.<T>, <Zn>.<T>[<imm>]
+    theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V21, REG_V10, 10, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V31, REG_V31, 15, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V21, REG_V10, 5, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V31, REG_V31, 7, INS_OPTS_SCALABLE_H);
+    theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V21, REG_V10, 2, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V31, REG_V31, 3, INS_OPTS_SCALABLE_S);
+    theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_D);
+    theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V31, REG_V31, 1, INS_OPTS_SCALABLE_D);
+
+    // IF_SVE_BY_2A
+    // EXTQ    <Zdn>.B, <Zdn>.B, <Zm>.B, #<imm>
+    theEmitter->emitIns_R_R_I(INS_sve_extq, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_B);
+    theEmitter->emitIns_R_R_I(INS_sve_extq, EA_SCALABLE, REG_V31, REG_V31, 15, INS_OPTS_SCALABLE_B);
 }
 
 #endif // defined(TARGET_ARM64) && defined(DEBUG)
diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp
index cb04cf702f2b..a9e2a41f73f9 100644
--- a/src/coreclr/jit/codegenarmarch.cpp
+++ b/src/coreclr/jit/codegenarmarch.cpp
@@ -441,6 +441,12 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             break;
 #endif // TARGET_ARM64
 
+#ifdef SWIFT_SUPPORT
+        case GT_SWIFT_ERROR:
+            genCodeForSwiftErrorReg(treeNode);
+            break;
+#endif // SWIFT_SUPPORT
+
         case GT_RELOAD:
             // do nothing - reload is just a marker.
             // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
@@ -490,7 +496,8 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             break;
 
         case GT_PINVOKE_PROLOG:
-            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) &
+                          ~fullIntArgRegMask(compiler->info.compCallConv)) == 0);
 
 #ifdef PSEUDORANDOM_NOP_INSERTION
             // the runtime side requires the codegen here to be consistent
@@ -507,7 +514,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
 #endif
             break;
 
-        case GT_STORE_DYN_BLK:
         case GT_STORE_BLK:
             genCodeForStoreBlk(treeNode->AsBlk());
             break;
@@ -721,8 +727,8 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode)
             break;
 
 #if defined(FEATURE_SIMD)
-        // The handling is a bit more complex so genSimdUpperSave/Restore
-        // handles genConsumeOperands and genProduceReg
+            // The handling is a bit more complex so genSimdUpperSave/Restore
+            // handles genConsumeOperands and genProduceReg
 
         case NI_SIMD_UpperRestore:
         {
@@ -855,7 +861,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
 
             emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNumOut, argOffsetOut);
 #else  // !TARGET_ARM64
-            // There is no zero register on ARM32
+       // There is no zero register on ARM32
             unreached();
 #endif // !TARGET_ARM64
         }
@@ -1012,9 +1018,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
                 nextIndex += 2;
             }
 #else  // TARGET_ARM
-            // For a >= 4 byte sizes we will generate a ldr and str instruction each loop
-            //             ldr     r2, [r0]
-            //             str     r2, [sp, #16]
+       // For a >= 4 byte sizes we will generate a ldr and str instruction each loop
+       //             ldr     r2, [r0]
+       //             str     r2, [sp, #16]
             while (remainingSize >= TARGET_POINTER_SIZE)
             {
                 var_types type = layout->GetGCPtrType(nextIndex);
@@ -1806,7 +1812,7 @@ instruction CodeGen::genGetVolatileLdStIns(instruction   currentIns,
         assert(!addrIsInReg);
         switch (currentIns)
         {
-            // Loads
+                // Loads
 
             case INS_ldrb:
                 return INS_ldapurb;
@@ -1817,7 +1823,7 @@ instruction CodeGen::genGetVolatileLdStIns(instruction   currentIns,
             case INS_ldr:
                 return INS_ldapur;
 
-            // Stores
+                // Stores
 
             case INS_strb:
                 return INS_stlurb;
@@ -1849,7 +1855,7 @@ instruction CodeGen::genGetVolatileLdStIns(instruction   currentIns,
     const bool hasRcpc1 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc);
     switch (currentIns)
     {
-        // Loads
+            // Loads
 
         case INS_ldrb:
             return hasRcpc1 ? INS_ldaprb : INS_ldarb;
@@ -1860,7 +1866,7 @@ instruction CodeGen::genGetVolatileLdStIns(instruction   currentIns,
         case INS_ldr:
             return hasRcpc1 ? INS_ldapr : INS_ldar;
 
-        // Stores
+            // Stores
 
         case INS_strb:
             return INS_stlrb;
@@ -1925,37 +1931,6 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)
     genProduceReg(tree);
 }
 
-//----------------------------------------------------------------------------------
-// genCodeForCpBlkHelper - Generate code for a CpBlk node by the means of the VM memcpy helper call
-//
-// Arguments:
-//    cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK]
-//
-// Preconditions:
-//   The register assignments have been set appropriately.
-//   This is validated by genConsumeBlockOp().
-//
-void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode)
-{
-    // Destination address goes in arg0, source address goes in arg1, and size goes in arg2.
-    // genConsumeBlockOp takes care of this for us.
-    genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
-    if (cpBlkNode->IsVolatile())
-    {
-        // issue a full memory barrier before a volatile CpBlk operation
-        instGen_MemoryBarrier();
-    }
-
-    genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
-
-    if (cpBlkNode->IsVolatile())
-    {
-        // issue a load barrier after a volatile CpBlk operation
-        instGen_MemoryBarrier(BARRIER_LOAD_ONLY);
-    }
-}
-
 #ifdef TARGET_ARM64
 
 // The following classes
@@ -2085,7 +2060,10 @@ class ProducingStreamBaseInstrs
 {
 public:
     ProducingStreamBaseInstrs(regNumber intReg1, regNumber intReg2, regNumber addrReg, emitter* emitter)
-        : intReg1(intReg1), intReg2(intReg2), addrReg(addrReg), emitter(emitter)
+        : intReg1(intReg1)
+        , intReg2(intReg2)
+        , addrReg(addrReg)
+        , emitter(emitter)
     {
     }
 
@@ -2146,7 +2124,11 @@ class ProducingStream
 {
 public:
     ProducingStream(regNumber intReg1, regNumber simdReg1, regNumber simdReg2, regNumber addrReg, emitter* emitter)
-        : intReg1(intReg1), simdReg1(simdReg1), simdReg2(simdReg2), addrReg(addrReg), emitter(emitter)
+        : intReg1(intReg1)
+        , simdReg1(simdReg1)
+        , simdReg2(simdReg2)
+        , addrReg(addrReg)
+        , emitter(emitter)
     {
     }
 
@@ -2269,7 +2251,9 @@ class BlockUnrollHelper
 class InitBlockUnrollHelper
 {
 public:
-    InitBlockUnrollHelper(int dstOffset, unsigned byteCount) : dstStartOffset(dstOffset), byteCount(byteCount)
+    InitBlockUnrollHelper(int dstOffset, unsigned byteCount)
+        : dstStartOffset(dstOffset)
+        , byteCount(byteCount)
     {
     }
 
@@ -2398,7 +2382,9 @@ class CopyBlockUnrollHelper
 {
 public:
     CopyBlockUnrollHelper(int srcOffset, int dstOffset, unsigned byteCount)
-        : srcStartOffset(srcOffset), dstStartOffset(dstOffset), byteCount(byteCount)
+        : srcStartOffset(srcOffset)
+        , dstStartOffset(dstOffset)
+        , byteCount(byteCount)
     {
     }
 
@@ -3218,31 +3204,6 @@ void CodeGen::genCodeForMemmove(GenTreeBlk* tree)
 #endif
 }
 
-//------------------------------------------------------------------------
-// genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call
-//
-// Arguments:
-//    initBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK]
-//
-// Preconditions:
-//   The register assignments have been set appropriately.
-//   This is validated by genConsumeBlockOp().
-//
-void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode)
-{
-    // Size goes in arg2, source address goes in arg1, and size goes in arg2.
-    // genConsumeBlockOp takes care of this for us.
-    genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
-    if (initBlkNode->IsVolatile())
-    {
-        // issue a full memory barrier before a volatile initBlock Operation
-        instGen_MemoryBarrier();
-    }
-
-    genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
-}
-
 //------------------------------------------------------------------------
 // genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop.
 //    It's needed for cases when size is too big to unroll and we're not allowed
@@ -3461,7 +3422,7 @@ void CodeGen::genCall(GenTreeCall* call)
             for (unsigned i = 0; i < regCount; ++i)
             {
                 var_types regType      = pRetTypeDesc->GetReturnRegType(i);
-                returnReg              = pRetTypeDesc->GetABIReturnReg(i);
+                returnReg              = pRetTypeDesc->GetABIReturnReg(i, call->GetUnmanagedCallConv());
                 regNumber allocatedReg = call->GetRegNumByIdx(i);
                 inst_Mov(regType, allocatedReg, returnReg, /* canSkip */ true);
             }
@@ -3482,13 +3443,13 @@ void CodeGen::genCall(GenTreeCall* call)
             else
 #endif // TARGET_ARM
                 if (varTypeUsesFloatArgReg(returnType))
-            {
-                returnReg = REG_FLOATRET;
-            }
-            else
-            {
-                returnReg = REG_INTRET;
-            }
+                {
+                    returnReg = REG_FLOATRET;
+                }
+                else
+                {
+                    returnReg = REG_INTRET;
+                }
 
             if (call->GetRegNum() != returnReg)
             {
@@ -3623,6 +3584,44 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
         //
         assert(genIsValidIntReg(target->GetRegNum()));
 
+#ifdef TARGET_ARM64
+        bool isTlsHandleTarget =
+            compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && TargetOS::IsUnix && target->IsTlsIconHandle();
+
+        if (isTlsHandleTarget)
+        {
+            assert(call->gtFlags & GTF_TLS_GET_ADDR);
+            emitter*       emitter  = GetEmitter();
+            emitAttr       attr     = (emitAttr)(EA_CNS_TLSGD_RELOC | EA_CNS_RELOC_FLG | retSize);
+            GenTreeIntCon* iconNode = target->AsIntCon();
+            methHnd                 = (CORINFO_METHOD_HANDLE)iconNode->gtIconVal;
+            retSize                 = EA_SET_FLG(retSize, EA_CNS_TLSGD_RELOC);
+
+            // For NativeAOT, linux/arm64, linker wants the following pattern, so we will generate
+            // it as part of the call. Generating individual instructions is tricky to get it
+            // correct in the format the way linker needs. Also, we might end up spilling or
+            // reloading a register, which can break the pattern.
+            //
+            //      mrs  x1, tpidr_el0
+            //      adrp x0, :tlsdesc:tlsRoot   ; R_AARCH64_TLSDESC_ADR_PAGE21
+            //      ldr  x2, [x0]               ; R_AARCH64_TLSDESC_LD64_LO12
+            //      add  x0, x0, #0             ; R_AARCH64_TLSDESC_ADD_LO12
+            //      blr  x2                     ; R_AARCH64_TLSDESC_CALL
+            //      add  x0, x1, x0
+            // We guaranteed in LSRA that r0, r1 and r2 are assigned to this node.
+
+            // mrs
+            emitter->emitIns_R(INS_mrs_tpid0, attr, REG_R1);
+
+            // adrp
+            // ldr
+            // add
+            emitter->emitIns_Adrp_Ldr_Add(attr, REG_R0, target->GetRegNum(),
+                                          (ssize_t)methHnd DEBUGARG(iconNode->gtTargetHandle)
+                                              DEBUGARG(iconNode->gtFlags));
+        }
+#endif
+
         // clang-format off
         genEmitCall(emitter::EC_INDIR_R,
                     methHnd,
@@ -3633,6 +3632,14 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
                     di,
                     target->GetRegNum(),
                     call->IsFastTailCall());
+
+#ifdef TARGET_ARM64
+        if (isTlsHandleTarget)
+        {
+            // add x0, x1, x0
+            GetEmitter()->emitIns_R_R_R(INS_add, EA_8BYTE, REG_R0, REG_R1, REG_R0);
+        }
+#endif
         // clang-format on
     }
     else
@@ -3698,19 +3705,19 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
             else
 #endif // FEATURE_READYTORUN
                 if (call->gtCallType == CT_HELPER)
-            {
-                CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
-                noway_assert(helperNum != CORINFO_HELP_UNDEF);
+                {
+                    CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
+                    noway_assert(helperNum != CORINFO_HELP_UNDEF);
 
-                void* pAddr = nullptr;
-                addr        = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
-                assert(pAddr == nullptr);
-            }
-            else
-            {
-                // Direct call to a non-virtual user function.
-                addr = call->gtDirectCallAddress;
-            }
+                    void* pAddr = nullptr;
+                    addr        = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+                    assert(pAddr == nullptr);
+                }
+                else
+                {
+                    // Direct call to a non-virtual user function.
+                    addr = call->gtDirectCallAddress;
+                }
 
             assert(addr != nullptr);
 
@@ -4376,8 +4383,8 @@ void CodeGen::genFloatToFloatCast(GenTree* treeNode)
 //------------------------------------------------------------------------
 // genCreateAndStoreGCInfo: Create and record GC Info for the function.
 //
-void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
-                                      unsigned prologSize,
+void CodeGen::genCreateAndStoreGCInfo(unsigned            codeSize,
+                                      unsigned            prologSize,
                                       unsigned epilogSize DEBUGARG(void* codePtr))
 {
     IAllocator*    allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC());
@@ -4561,14 +4568,14 @@ void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock)
 }
 
 //------------------------------------------------------------------------
-// genCodeForStoreBlk: Produce code for a GT_STORE_DYN_BLK/GT_STORE_BLK node.
+// genCodeForStoreBlk: Produce code for a GT_STORE_BLK node.
 //
 // Arguments:
 //    tree - the node
 //
 void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
 {
-    assert(blkOp->OperIs(GT_STORE_DYN_BLK, GT_STORE_BLK));
+    assert(blkOp->OperIs(GT_STORE_BLK));
 
     bool isCopyBlk = blkOp->OperIsCopyBlkOp();
 
@@ -4584,18 +4591,6 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
             genCodeForInitBlkLoop(blkOp);
             break;
 
-        case GenTreeBlk::BlkOpKindHelper:
-            assert(!blkOp->gtBlkOpGcUnsafe);
-            if (isCopyBlk)
-            {
-                genCodeForCpBlkHelper(blkOp);
-            }
-            else
-            {
-                genCodeForInitBlkHelper(blkOp);
-            }
-            break;
-
         case GenTreeBlk::BlkOpKindUnroll:
         case GenTreeBlk::BlkOpKindUnrollMemmove:
             if (isCopyBlk)
@@ -4845,7 +4840,7 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc)
     for (unsigned i = 0; i < regCount; ++i)
     {
         var_types type = retTypeDesc->GetReturnRegType(i);
-        regNumber reg  = retTypeDesc->GetABIReturnReg(i);
+        regNumber reg  = retTypeDesc->GetABIReturnReg(i, compiler->info.compCallConv);
         if (varTypeIsFloating(type))
         {
             // If the register piece is to be passed in a floating point register
@@ -4898,7 +4893,7 @@ void CodeGen::genPushCalleeSavedRegisters()
                      intRegState.rsCalleeRegArgMaskLiveIn);
 #endif
 
-    regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+    regMaskTP rsPushRegs = regSet.rsGetModifiedCalleeSavedRegsMask();
 
 #if ETW_EBP_FRAMED
     if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
@@ -5546,8 +5541,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
     }
 
     if (jmpEpilog ||
-        genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED) ==
-            RBM_NONE)
+        genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedFltCalleeSavedRegsMask()) == RBM_NONE)
     {
         genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted);
     }
@@ -5619,9 +5613,9 @@ void CodeGen::genFnEpilog(BasicBlock* block)
 #if !FEATURE_FASTTAILCALL
         noway_assert(jmpNode->gtOper == GT_JMP);
 #else  // FEATURE_FASTTAILCALL
-        // armarch
-        // If jmpNode is GT_JMP then gtNext must be null.
-        // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
+       // armarch
+       // If jmpNode is GT_JMP then gtNext must be null.
+       // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
         noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
 
         // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
@@ -5714,7 +5708,6 @@ void CodeGen::genFnEpilog(BasicBlock* block)
                                        0,             // disp
                                        true);         // isJump
             // clang-format on
-            CLANG_FORMAT_COMMENT_ANCHOR;
 #endif // TARGET_ARMARCH
         }
 #if FEATURE_FASTTAILCALL
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index c468473067b6..021a5d9dc579 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -43,7 +43,6 @@ void CodeGenInterface::setFramePointerRequiredEH(bool value)
         // if they are fully-interruptible.  So if we have a catch
         // or finally that will keep frame-vars alive, we need to
         // force fully-interruptible.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
         if (verbose)
@@ -65,7 +64,10 @@ CodeGenInterface* getCodeGenerator(Compiler* comp)
 
 // CodeGen constructor
 CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
-    : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler), treeLifeUpdater(nullptr)
+    : gcInfo(theCompiler)
+    , regSet(theCompiler, gcInfo)
+    , compiler(theCompiler)
+    , treeLifeUpdater(nullptr)
 {
 }
 
@@ -84,7 +86,8 @@ void CodeGenInterface::CopyRegisterInfo()
 
 /*****************************************************************************/
 
-CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
+CodeGen::CodeGen(Compiler* theCompiler)
+    : CodeGenInterface(theCompiler)
 {
 #if defined(TARGET_XARCH)
     negBitmaskFlt  = nullptr;
@@ -120,7 +123,6 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
 #endif
 
 #ifdef DEBUG
-    genTempLiveChg        = true;
     genTrnslLocalVarCount = 0;
 
     // Shouldn't be used before it is set in genFnProlog()
@@ -262,29 +264,6 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta() const
 
 #endif // defined(TARGET_X86) || defined(TARGET_ARM)
 
-/*****************************************************************************
- * Should we round simple operations (assignments, arithmetic operations, etc.)
- */
-
-// inline
-// static
-bool CodeGen::genShouldRoundFP()
-{
-    RoundLevel roundLevel = getRoundFloatLevel();
-
-    switch (roundLevel)
-    {
-        case ROUND_NEVER:
-        case ROUND_CMP_CONST:
-        case ROUND_CMP:
-            return false;
-
-        default:
-            assert(roundLevel == ROUND_ALWAYS);
-            return true;
-    }
-}
-
 /*****************************************************************************
  *
  *  Initialize some global variables.
@@ -315,10 +294,8 @@ void CodeGen::genPrepForCompiler()
         }
     }
     VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
-    genLastLiveMask = RBM_NONE;
-#ifdef DEBUG
-    compiler->fgBBcountAtCodegen = compiler->fgBBcount;
-#endif
+    genLastLiveMask                        = RBM_NONE;
+    compiler->Metrics.BasicBlocksAtCodegen = compiler->fgBBcount;
 }
 
 //------------------------------------------------------------------------
@@ -414,9 +391,7 @@ void CodeGen::genMarkLabelsForCodegen()
             case BBJ_CALLFINALLY:
                 // The finally target itself will get marked by walking the EH table, below, and marking
                 // all handler begins.
-                CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if FEATURE_EH_CALLFINALLY_THUNKS
+                if (compiler->UsesCallFinallyThunks())
                 {
                     // For callfinally thunks, we need to mark the block following the callfinally/callfinallyret pair,
                     // as that's needed for identifying the range of the "duplicate finally" region in EH data.
@@ -431,8 +406,6 @@ void CodeGen::genMarkLabelsForCodegen()
                         bbToLabel->SetFlags(BBF_HAS_LABEL);
                     }
                 }
-#endif // FEATURE_EH_CALLFINALLY_THUNKS
-
                 break;
 
             case BBJ_CALLFINALLYRET:
@@ -597,28 +570,6 @@ void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bo
 // compHelperCallKillSet: Gets a register mask that represents the kill set for a helper call.
 // Not all JIT Helper calls follow the standard ABI on the target architecture.
 //
-// TODO-CQ: Currently this list is incomplete (not all helpers calls are
-//          enumerated) and not 100% accurate (some killsets are bigger than
-//          what they really are).
-//          There's some work to be done in several places in the JIT to
-//          accurately track the registers that are getting killed by
-//          helper calls:
-//              a) LSRA needs several changes to accommodate more precise killsets
-//                 for every helper call it sees (both explicitly [easy] and
-//                 implicitly [hard])
-//              b) Currently for AMD64, when we generate code for a helper call
-//                 we're independently over-pessimizing the killsets of the call
-//                 (independently from LSRA) and this needs changes
-//                 both in CodeGenAmd64.cpp and emitx86.cpp.
-//
-//                 The best solution for this problem would be to try to centralize
-//                 the killset information in a single place but then make the
-//                 corresponding changes so every code generation phase is in sync
-//                 about this.
-//
-//         The interim solution is to only add known helper calls that don't
-//         follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
-//
 // Arguments:
 //   helper - The helper being inquired about
 //
@@ -629,6 +580,12 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
 {
     switch (helper)
     {
+        // Most of the helpers are written in C++ and C# and we can't make
+        // any additional assumptions beyond the standard ABI. However, some are written in raw assembly,
+        // so we can narrow down the kill sets.
+        //
+        // TODO-CQ: Inspect all asm helpers and narrow down the kill sets for them.
+        //
         case CORINFO_HELP_ASSIGN_REF:
         case CORINFO_HELP_CHECKED_ASSIGN_REF:
             return RBM_CALLEE_TRASH_WRITEBARRIER;
@@ -970,7 +927,6 @@ void CodeGen::genAdjustStackLevel(BasicBlock* block)
 {
 #if !FEATURE_FIXED_OUT_ARGS
     // Check for inserted throw blocks and adjust genStackLevel.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(UNIX_X86_ABI)
     if (isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
@@ -1119,7 +1075,6 @@ bool CodeGen::genCreateAddrMode(GenTree*  addr,
        constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
        here if we find a scaled index.
     */
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
     assert(mul == 0);
 
@@ -1505,10 +1460,11 @@ void CodeGen::genExitCode(BasicBlock* block)
 void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, BasicBlock* failBlk)
 {
     bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks();
-#if defined(UNIX_X86_ABI) && defined(FEATURE_EH_FUNCLETS)
+#if defined(UNIX_X86_ABI)
+    // TODO: Is this really UNIX_X86_ABI specific? Should we guard with compiler->UsesFunclets() instead?
     // Inline exception-throwing code in funclet to make it possible to unwind funclet frames.
     useThrowHlpBlk = useThrowHlpBlk && (compiler->funCurrentFunc()->funKind == FUNC_ROOT);
-#endif // UNIX_X86_ABI && FEATURE_EH_FUNCLETS
+#endif // UNIX_X86_ABI
 
     if (useThrowHlpBlk)
     {
@@ -1628,8 +1584,6 @@ void CodeGen::genCheckOverflow(GenTree* tree)
 }
 #endif
 
-#if defined(FEATURE_EH_FUNCLETS)
-
 /*****************************************************************************
  *
  *  Update the current funclet as needed by calling genUpdateCurrentFunclet().
@@ -1640,6 +1594,11 @@ void CodeGen::genCheckOverflow(GenTree* tree)
 
 void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
 {
+    if (!compiler->UsesFunclets())
+    {
+        return;
+    }
+
     if (block->HasFlag(BBF_FUNCLET_BEG))
     {
         compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
@@ -1656,7 +1615,7 @@ void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
     }
     else
     {
-        assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
+        assert(compiler->funCurrentFuncIdx() <= compiler->compFuncInfoCount);
         if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
         {
             assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
@@ -1673,8 +1632,6 @@ void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
     }
 }
 
-#endif // FEATURE_EH_FUNCLETS
-
 //----------------------------------------------------------------------
 // genGenerateCode: Generate code for the function.
 //
@@ -1733,9 +1690,6 @@ void CodeGen::genGenerateMachineCode()
 
     /* Prepare the emitter */
     GetEmitter()->Init();
-#ifdef DEBUG
-    VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
-#endif
 
 #ifdef DEBUG
     if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
@@ -1918,7 +1872,7 @@ void CodeGen::genGenerateMachineCode()
                             (compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
                                 !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)
 #endif
-                                );
+    );
 
     /* Now generate code for the function */
     genCodeForBBlist();
@@ -2042,7 +1996,7 @@ void CodeGen::genEmitMachineCode()
 
         printf("; Total bytes of code %d, prolog size %d, PerfScore %.2f, instruction count %d, allocated bytes for "
                "code %d",
-               codeSize, prologSize, compiler->info.compPerfScore, instrCount,
+               codeSize, prologSize, compiler->Metrics.PerfScore, instrCount,
                GetEmitter()->emitTotalHotCodeSize + GetEmitter()->emitTotalColdCodeSize);
 
         if (dspMetrics)
@@ -2075,7 +2029,8 @@ void CodeGen::genEmitMachineCode()
         {
             printf("; ============================================================\n\n");
         }
-        printf(""); // in our logic this causes a flush
+
+        fflush(jitstdout());
     }
 
     if (verbose)
@@ -2237,14 +2192,13 @@ void CodeGen::genReportEH()
 
     unsigned EHCount = compiler->compHndBBtabCount;
 
-#if defined(FEATURE_EH_FUNCLETS)
     // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
     // VM.
     unsigned duplicateClauseCount = 0;
     unsigned enclosingTryIndex;
 
     // Duplicate clauses are not used by NativeAOT ABI
-    if (!isNativeAOT)
+    if (compiler->UsesFunclets() && !isNativeAOT)
     {
         for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
         {
@@ -2259,11 +2213,10 @@ void CodeGen::genReportEH()
         EHCount += duplicateClauseCount;
     }
 
-#if FEATURE_EH_CALLFINALLY_THUNKS
     unsigned clonedFinallyCount = 0;
 
     // Duplicate clauses are not used by NativeAOT ABI
-    if (!isNativeAOT)
+    if (compiler->UsesFunclets() && compiler->UsesCallFinallyThunks() && !isNativeAOT)
     {
         // We don't keep track of how many cloned finally there are. So, go through and count.
         // We do a quick pass first through the EH table to see if there are any try/finally
@@ -2291,27 +2244,33 @@ void CodeGen::genReportEH()
             EHCount += clonedFinallyCount;
         }
     }
-#endif // FEATURE_EH_CALLFINALLY_THUNKS
-
-#endif // FEATURE_EH_FUNCLETS
 
 #ifdef DEBUG
     if (compiler->opts.dspEHTable)
     {
-#if defined(FEATURE_EH_FUNCLETS)
-#if FEATURE_EH_CALLFINALLY_THUNKS
-        printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
-               compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
-        assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
-#else  // !FEATURE_EH_CALLFINALLY_THUNKS
-        printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
-               compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
-        assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
-#endif // !FEATURE_EH_CALLFINALLY_THUNKS
-#else  // !FEATURE_EH_FUNCLETS
-        printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
-        assert(compiler->compHndBBtabCount == EHCount);
-#endif // !FEATURE_EH_FUNCLETS
+        if (compiler->UsesFunclets())
+        {
+            if (compiler->UsesCallFinallyThunks())
+            {
+                printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to "
+                       "VM\n",
+                       compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
+                assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
+            }
+            else
+            {
+                printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
+                       compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
+                assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
+            }
+        }
+#if defined(FEATURE_EH_WINDOWS_X86)
+        else
+        {
+            printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
+            assert(compiler->compHndBBtabCount == EHCount);
+        }
+#endif // FEATURE_EH_WINDOWS_X86
     }
 #endif // DEBUG
 
@@ -2379,7 +2338,6 @@ void CodeGen::genReportEH()
         ++XTnum;
     }
 
-#if defined(FEATURE_EH_FUNCLETS)
     // Now output duplicated clauses.
     //
     // If a funclet has been created by moving a handler out of a try region that it was originally nested
@@ -2602,7 +2560,6 @@ void CodeGen::genReportEH()
         assert(duplicateClauseCount == reportedDuplicateClauseCount);
     } // if (duplicateClauseCount > 0)
 
-#if FEATURE_EH_CALLFINALLY_THUNKS
     if (clonedFinallyCount > 0)
     {
         unsigned reportedClonedFinallyCount = 0;
@@ -2656,10 +2613,7 @@ void CodeGen::genReportEH()
         }     // for each block
 
         assert(clonedFinallyCount == reportedClonedFinallyCount);
-    }  // if (clonedFinallyCount > 0)
-#endif // FEATURE_EH_CALLFINALLY_THUNKS
-
-#endif // FEATURE_EH_FUNCLETS
+    } // if (clonedFinallyCount > 0)
 
     assert(XTnum == EHCount);
 }
@@ -2855,6 +2809,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  *  assigned location, in the function prolog.
  */
 
+// std::max isn't constexpr until C++14 and we're still on C++11
+constexpr size_t const_max(size_t a, size_t b)
+{
+    return a > b ? a : b;
+}
+
 #ifdef _PREFAST_
 #pragma warning(push)
 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
@@ -2912,9 +2872,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
     else // we are doing the integer registers
     {
         noway_assert(argMax <= MAX_REG_ARG);
-        if (hasFixedRetBuffReg())
+        if (hasFixedRetBuffReg(compiler->info.compCallConv))
         {
-            fixedRetBufIndex = theFixedRetBuffArgNum();
+            fixedRetBufIndex = theFixedRetBuffArgNum(compiler->info.compCallConv);
             // We have an additional integer register argument when hasFixedRetBuffReg() is true
             argMax = fixedRetBufIndex + 1;
             assert(argMax == (MAX_REG_ARG + 1));
@@ -2948,7 +2908,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
         bool circular;     // true if this register participates in a circular dependency loop.
         bool hfaConflict;  // arg is part of an HFA that will end up in the same register
                            // but in a different slot (eg arg in s3 = v3.s[0], needs to end up in v3.s[3])
-    } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
+    } regArgTab[const_max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
 
     unsigned   varNum;
     LclVarDsc* varDsc;
@@ -3004,6 +2964,29 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
             }
         }
 
+#ifdef SWIFT_SUPPORT
+        // The Swift self parameter is passed in a callee save register and is
+        // not part of the arg register order that this function relies on to
+        // handle conflicts. For this reason we always mark it as DNER and
+        // handle it outside the normal register arguments.
+        // TODO-CQ: Fix this.
+        if (varNum == compiler->lvaSwiftSelfArg)
+        {
+            continue;
+        }
+
+        // On a similar note, the SwiftError* parameter is not a real argument,
+        // and should not be allocated any registers/stack space.
+        // We mark it as being passed in REG_SWIFT_ERROR so it won't interfere with other args.
+        // In genFnProlog, we should have removed this callee-save register from intRegState.rsCalleeRegArgMaskLiveIn.
+        // TODO-CQ: Fix this.
+        if (varNum == compiler->lvaSwiftErrorArg)
+        {
+            assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_SWIFT_ERROR) == 0);
+            continue;
+        }
+#endif
+
         var_types regType = compiler->mangleVarArgsType(varDsc->TypeGet());
         // Change regType to the HFA type when we have a HFA argument
         if (varDsc->lvIsHfaRegArg())
@@ -3110,7 +3093,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                     slotRegType = compiler->GetEightByteType(structDesc, slotCounter);
                 }
 
-                regArgNum = genMapRegNumToRegArgNum(regNum, slotRegType);
+                regArgNum = genMapRegNumToRegArgNum(regNum, slotRegType, compiler->info.compCallConv);
 
                 if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
                     (doingFloat && (structDesc.IsSseSlot(slotCounter))))
@@ -3144,7 +3127,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
 #endif // defined(UNIX_AMD64_ABI)
         {
             // Bingo - add it to our table
-            regArgNum = genMapRegNumToRegArgNum(varDsc->GetArgReg(), regType);
+            regArgNum = genMapRegNumToRegArgNum(varDsc->GetArgReg(), regType, compiler->info.compCallConv);
             slots     = 1;
 
             if (TargetArchitecture::IsArm32 ||
@@ -3207,7 +3190,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
         for (int i = 0; i < slots; i++)
         {
             regType          = regArgTab[regArgNum + i].type;
-            regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
+            regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType, compiler->info.compCallConv);
 
 #if !defined(UNIX_AMD64_ABI)
             assert((i > 0) || (regNum == varDsc->GetArgReg()));
@@ -3231,9 +3214,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
 #ifdef TARGET_X86
                     noway_assert(varDsc->lvType == TYP_STRUCT);
 #else  // !TARGET_X86
-                    // For LSRA, it may not be in regArgMaskLive if it has a zero
-                    // refcnt.  This is in contrast with the non-LSRA case in which all
-                    // non-tracked args are assumed live on entry.
+       // For LSRA, it may not be in regArgMaskLive if it has a zero
+       // refcnt.  This is in contrast with the non-LSRA case in which all
+       // non-tracked args are assumed live on entry.
                     noway_assert((varDsc->lvRefCnt() == 0) || (varDsc->lvType == TYP_STRUCT) ||
                                  (varDsc->IsAddressExposed() && compiler->info.compIsVarArgs) ||
                                  (varDsc->IsAddressExposed() && compiler->opts.compUseSoftFP));
@@ -3348,7 +3331,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                 noway_assert(!regArgTab[argNum].stackArg);
 
                 var_types regType = regArgTab[argNum].type;
-                regNumber regNum  = genMapRegArgNumToRegNum(argNum, regType);
+                regNumber regNum  = genMapRegArgNumToRegNum(argNum, regType, compiler->info.compCallConv);
 
                 regNumber destRegNum = REG_NA;
                 if (varTypeIsPromotable(varDsc) &&
@@ -3428,7 +3411,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                 if (genRegMask(destRegNum) & regArgMaskLive)
                 {
                     /* we are trashing a live argument register - record it */
-                    unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
+                    unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType, compiler->info.compCallConv);
                     noway_assert(destRegArgNum < argMax);
                     regArgTab[destRegArgNum].trashBy = argNum;
                 }
@@ -3447,7 +3430,6 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
 
     /* At this point, everything that has the "circular" flag
      * set to "true" forms a circular dependency */
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (regArgMaskLive)
@@ -3575,7 +3557,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
         noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
 #endif // TARGET_X86
 
-        regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
+        regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType, compiler->info.compCallConv);
 
         // Stack argument - if the ref count is 0 don't care about it
 
@@ -3794,8 +3776,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
 
                 assert(xtraReg != REG_NA);
 
-                regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
+                regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType, compiler->info.compCallConv);
                 GetEmitter()->emitIns_Mov(insCopy, size, xtraReg, begRegNum, /* canSkip */ false);
+                assert(!genIsValidIntReg(xtraReg) || !genIsValidFloatReg(begRegNum));
 
                 regSet.verifyRegUsed(xtraReg);
 
@@ -3806,10 +3789,11 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                 {
                     /* mov dest, src */
 
-                    regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
-                    regNumber srcRegNum  = genMapRegArgNumToRegNum(srcReg, destMemType);
+                    regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType, compiler->info.compCallConv);
+                    regNumber srcRegNum  = genMapRegArgNumToRegNum(srcReg, destMemType, compiler->info.compCallConv);
 
                     GetEmitter()->emitIns_Mov(insCopy, size, destRegNum, srcRegNum, /* canSkip */ false);
+                    assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(srcRegNum));
 
                     regSet.verifyRegUsed(destRegNum);
 
@@ -3858,9 +3842,10 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
 
                 /* move the dest reg (begReg) in the extra reg */
 
-                regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
+                regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType, compiler->info.compCallConv);
 
                 GetEmitter()->emitIns_Mov(insCopy, size, destRegNum, xtraReg, /* canSkip */ false);
+                assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(xtraReg));
 
                 regSet.verifyRegUsed(destRegNum);
                 /* mark the beginning register as processed */
@@ -3915,7 +3900,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
 
             assert(varDsc->lvIsHfa());
             assert((argNum >= firstArgNum) && (argNum <= lastArgNum));
-            assert(destRegNum == genMapRegArgNumToRegNum(argNum, regType));
+            assert(destRegNum == genMapRegArgNumToRegNum(argNum, regType, compiler->info.compCallConv));
 
             // Pass 0: move the conflicting part; Pass1: insert everything else
             //
@@ -3923,8 +3908,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
             {
                 for (unsigned currentArgNum = firstArgNum; currentArgNum <= lastArgNum; currentArgNum++)
                 {
-                    const regNumber regNum = genMapRegArgNumToRegNum(currentArgNum, regType);
-                    bool            insertArg =
+                    const regNumber regNum =
+                        genMapRegArgNumToRegNum(currentArgNum, regType, compiler->info.compCallConv);
+                    bool insertArg =
                         ((pass == 0) && (currentArgNum == argNum)) || ((pass == 1) && (currentArgNum != argNum));
 
                     if (insertArg)
@@ -3935,6 +3921,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                         // todo -- suppress self move
                         GetEmitter()->emitIns_R_R_I_I(INS_mov, EA_4BYTE, destRegNum, regNum,
                                                       regArgTab[currentArgNum].slot - 1, 0);
+                        assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(regNum));
                         regArgTab[currentArgNum].processed = true;
                         regArgMaskLive &= ~genRegMask(regNum);
                     }
@@ -3965,7 +3952,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
             varNum                     = regArgTab[argNum].varNum;
             varDsc                     = compiler->lvaGetDesc(varNum);
             const var_types regType    = regArgTab[argNum].type;
-            const regNumber regNum     = genMapRegArgNumToRegNum(argNum, regType);
+            const regNumber regNum     = genMapRegArgNumToRegNum(argNum, regType, compiler->info.compCallConv);
             const var_types varRegType = varDsc->GetRegisterType();
 
 #if defined(UNIX_AMD64_ABI)
@@ -4108,6 +4095,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                 }
 #endif
                 inst_Mov(destMemType, destRegNum, regNum, /* canSkip */ false, size);
+                assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(regNum));
             }
 
             /* mark the argument as processed */
@@ -4128,11 +4116,13 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
             {
                 argRegCount          = 2;
                 int       nextArgNum = argNum + 1;
-                regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type);
+                regNumber nextRegNum =
+                    genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type, compiler->info.compCallConv);
                 noway_assert(regArgTab[nextArgNum].varNum == varNum);
                 // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
                 // and moves the 0th element of the src reg into the 1st element of the dest reg.
                 GetEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varRegType), destRegNum, nextRegNum, 0);
+                assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(nextRegNum));
                 // Set destRegNum to regNum so that we skip the setting of the register below,
                 // but mark argNum as processed and clear regNum from the live mask.
                 destRegNum = regNum;
@@ -4154,12 +4144,14 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                         {
                             int        nextArgNum  = argNum + i;
                             LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + i);
-                            regNumber  nextRegNum  = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type);
+                            regNumber  nextRegNum  = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type,
+                                                                             compiler->info.compCallConv);
                             destRegNum             = fieldVarDsc->GetRegNum();
                             noway_assert(regArgTab[nextArgNum].varNum == varNum);
                             noway_assert(genIsValidFloatReg(nextRegNum));
                             noway_assert(genIsValidFloatReg(destRegNum));
                             GetEmitter()->emitIns_Mov(INS_mov, EA_8BYTE, destRegNum, nextRegNum, /* canSkip */ false);
+                            assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(nextRegNum));
                         }
                     }
 #if defined(TARGET_ARM64) && defined(FEATURE_SIMD)
@@ -4175,11 +4167,13 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                             int         nextArgNum  = argNum + i;
                             regArgElem* nextArgElem = &regArgTab[nextArgNum];
                             var_types   nextArgType = nextArgElem->type;
-                            regNumber   nextRegNum  = genMapRegArgNumToRegNum(nextArgNum, nextArgType);
+                            regNumber   nextRegNum =
+                                genMapRegArgNumToRegNum(nextArgNum, nextArgType, compiler->info.compCallConv);
                             noway_assert(nextArgElem->varNum == varNum);
                             noway_assert(genIsValidFloatReg(nextRegNum));
                             noway_assert(genIsValidFloatReg(destRegNum));
                             GetEmitter()->emitIns_R_R_I_I(INS_mov, EA_4BYTE, destRegNum, nextRegNum, i, 0);
+                            assert(!genIsValidIntReg(destRegNum) || !genIsValidFloatReg(nextRegNum));
                         }
                     }
 #endif // defined(TARGET_ARM64) && defined(FEATURE_SIMD)
@@ -4194,7 +4188,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                 int nextArgNum = argNum + regSlot;
                 assert(!regArgTab[nextArgNum].processed);
                 regArgTab[nextArgNum].processed = true;
-                regNumber nextRegNum            = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type);
+                regNumber nextRegNum =
+                    genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type, compiler->info.compCallConv);
                 regArgMaskLive &= ~genRegMask(nextRegNum);
             }
 #endif // FEATURE_MULTIREG_ARGS
@@ -4234,7 +4229,7 @@ void CodeGen::genEnregisterIncomingStackArgs()
     regNumber tmp_reg    = REG_NA;
 #endif
 
-    for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    for (LclVarDsc* varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
     {
         /* Is this variable a parameter? */
 
@@ -4308,7 +4303,7 @@ void CodeGen::genEnregisterIncomingStackArgs()
                 }
             }
         }
-#else // !TARGET_LOONGARCH64
+#else  // !TARGET_LOONGARCH64
         GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0);
 #endif // !TARGET_LOONGARCH64
 
@@ -4502,7 +4497,6 @@ void CodeGen::genCheckUseBlockInit()
     // find structs that are guaranteed to be block initialized.
     // If this logic changes, Compiler::fgVarNeedsExplicitZeroInit needs
     // to be modified.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef TARGET_64BIT
 #if defined(TARGET_AMD64)
@@ -4514,6 +4508,7 @@ void CodeGen::genCheckUseBlockInit()
 #else // !defined(TARGET_AMD64)
 
     genUseBlockInit = (genInitStkLclCnt > 8);
+
 #endif
 #else
 
@@ -4967,6 +4962,110 @@ void CodeGen::genEnregisterOSRArgsAndLocals()
     }
 }
 
+#ifdef SWIFT_SUPPORT
+
+//-----------------------------------------------------------------------------
+// genHomeSwiftStructParameters:
+//  Reassemble Swift struct parameters if necessary.
+//
+// Parameters:
+//   handleStack - If true, reassemble the segments that were passed on the stack.
+//                 If false, reassemble the segments that were passed in registers.
+//
+void CodeGen::genHomeSwiftStructParameters(bool handleStack)
+{
+    for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++)
+    {
+        if (lclNum == compiler->lvaSwiftSelfArg)
+        {
+            continue;
+        }
+
+        LclVarDsc* dsc = compiler->lvaGetDesc(lclNum);
+        if ((dsc->TypeGet() != TYP_STRUCT) || compiler->lvaIsImplicitByRefLocal(lclNum) || !dsc->lvOnFrame)
+        {
+            continue;
+        }
+
+        JITDUMP("Homing Swift parameter V%02u: ", lclNum);
+        const ABIPassingInformation& abiInfo = compiler->lvaParameterPassingInfo[lclNum];
+        DBEXEC(VERBOSE, abiInfo.Dump());
+
+        for (unsigned i = 0; i < abiInfo.NumSegments; i++)
+        {
+            const ABIPassingSegment& seg = abiInfo.Segments[i];
+            if (seg.IsPassedOnStack() != handleStack)
+            {
+                continue;
+            }
+
+            if (seg.IsPassedInRegister())
+            {
+                RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? &floatRegState : &intRegState;
+                regMaskTP regs     = seg.GetRegisterMask();
+
+                if ((regState->rsCalleeRegArgMaskLiveIn & regs) != RBM_NONE)
+                {
+                    var_types storeType = seg.GetRegisterStoreType();
+                    assert(storeType != TYP_UNDEF);
+                    GetEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), seg.GetRegister(), lclNum,
+                                              seg.Offset);
+
+                    regState->rsCalleeRegArgMaskLiveIn &= ~regs;
+                }
+            }
+            else
+            {
+                var_types loadType = TYP_UNDEF;
+                switch (seg.Size)
+                {
+                    case 1:
+                        loadType = TYP_UBYTE;
+                        break;
+                    case 2:
+                        loadType = TYP_USHORT;
+                        break;
+                    case 4:
+                        loadType = TYP_INT;
+                        break;
+                    case 8:
+                        loadType = TYP_LONG;
+                        break;
+                    default:
+                        assert(!"Unexpected segment size for struct parameter not passed implicitly by ref");
+                        continue;
+                }
+
+                int offset;
+                if (isFramePointerUsed())
+                {
+                    offset = -genCallerSPtoFPdelta();
+                }
+                else
+                {
+                    offset = -genCallerSPtoInitialSPdelta();
+                }
+
+                offset += (int)seg.GetStackOffset();
+
+                // Move the incoming segment to the local stack frame. We can
+                // use REG_SCRATCH as a temporary register here as we ensured
+                // that during LSRA build.
+#ifdef TARGET_XARCH
+                GetEmitter()->emitIns_R_AR(ins_Load(loadType), emitTypeSize(loadType), REG_SCRATCH,
+                                           genFramePointerReg(), offset);
+#else
+                genInstrWithConstant(ins_Load(loadType), emitTypeSize(loadType), REG_SCRATCH, genFramePointerReg(),
+                                     offset, REG_SCRATCH);
+#endif
+
+                GetEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), REG_SCRATCH, lclNum, seg.Offset);
+            }
+        }
+    }
+}
+#endif
+
 /*-----------------------------------------------------------------------------
  *
  *  Save the generic context argument.
@@ -5241,8 +5340,6 @@ void CodeGen::genReserveEpilog(BasicBlock* block)
                                           block->IsLast());
 }
 
-#if defined(FEATURE_EH_FUNCLETS)
-
 /*****************************************************************************
  *
  *  Reserve space for a funclet prolog.
@@ -5250,6 +5347,7 @@ void CodeGen::genReserveEpilog(BasicBlock* block)
 
 void CodeGen::genReserveFuncletProlog(BasicBlock* block)
 {
+    assert(compiler->UsesFunclets());
     assert(block != nullptr);
 
     /* Currently, no registers are live on entry to the prolog, except maybe
@@ -5280,6 +5378,7 @@ void CodeGen::genReserveFuncletProlog(BasicBlock* block)
 
 void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
 {
+    assert(compiler->UsesFunclets());
     assert(block != nullptr);
 
     JITDUMP("Reserving funclet epilog IG for block " FMT_BB "\n", block->bbNum);
@@ -5288,8 +5387,6 @@ void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
                                           gcInfo.gcRegByrefSetCur, block->IsLast());
 }
 
-#endif // FEATURE_EH_FUNCLETS
-
 /*****************************************************************************
  *  Finalize the frame size and offset assignments.
  *
@@ -5309,7 +5406,6 @@ void CodeGen::genFinalizeFrame()
     genCheckUseBlockInit();
 
     // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_X86)
 
@@ -5361,7 +5457,7 @@ void CodeGen::genFinalizeFrame()
         }
         noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
 #else  // !TARGET_AMD64 && !TARGET_ARM64
-        // On x86 we save all callee saved regs so the saved reg area size is consistent
+       // On x86 we save all callee saved regs so the saved reg area size is consistent
         regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
 #endif // !TARGET_AMD64 && !TARGET_ARM64
     }
@@ -5391,7 +5487,7 @@ void CodeGen::genFinalizeFrame()
     noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
 #endif
 
-    regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+    regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedCalleeSavedRegsMask();
 
 #ifdef TARGET_ARMARCH
     if (isFramePointerUsed())
@@ -5604,7 +5700,7 @@ void CodeGen::genFnProlog()
     }
 #endif // DEBUG
 
-#if defined(FEATURE_EH_FUNCLETS) && defined(DEBUG)
+#if defined(DEBUG)
 
     // We cannot force 0-initialization of the PSPSym
     // as it will overwrite the real value
@@ -5614,7 +5710,7 @@ void CodeGen::genFnProlog()
         assert(!varDsc->lvMustInit);
     }
 
-#endif // FEATURE_EH_FUNCLETS && DEBUG
+#endif // DEBUG
 
     /*-------------------------------------------------------------------------
      *
@@ -5776,7 +5872,6 @@ void CodeGen::genFnProlog()
 
         // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
         // previous frame pointer. Thus, stkOffs can't be zero.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !defined(TARGET_AMD64)
         // However, on amd64 there is no requirement to chain frame pointers.
@@ -6064,14 +6159,13 @@ void CodeGen::genFnProlog()
     // Subtract the local frame size from SP.
     //
     //-------------------------------------------------------------------------
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
     regMaskTP maskStackAlloc = RBM_NONE;
 
 #ifdef TARGET_ARM
     maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize + extraFrameSize,
-                                               regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
+                                               regSet.rsGetModifiedFltCalleeSavedRegsMask());
 #endif // TARGET_ARM
 
     if (maskStackAlloc == RBM_NONE)
@@ -6090,7 +6184,7 @@ void CodeGen::genFnProlog()
     }
 #endif // TARGET_AMD64
 
-//-------------------------------------------------------------------------
+    //-------------------------------------------------------------------------
 
 #ifdef TARGET_ARM
     if (compiler->compLocallocUsed)
@@ -6116,11 +6210,11 @@ void CodeGen::genFnProlog()
 #endif // TARGET_AMD64
     compiler->unwindEndProlog();
 
-//-------------------------------------------------------------------------
-//
-// This is the end of the OS-reported prolog for purposes of unwinding
-//
-//-------------------------------------------------------------------------
+    //-------------------------------------------------------------------------
+    //
+    // This is the end of the OS-reported prolog for purposes of unwinding
+    //
+    //-------------------------------------------------------------------------
 
 #ifdef TARGET_ARM
     if (needToEstablishFP)
@@ -6146,33 +6240,35 @@ void CodeGen::genFnProlog()
 
     genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
 
-#if defined(FEATURE_EH_FUNCLETS)
-
-    genSetPSPSym(initReg, &initRegZeroed);
-
-#else // !FEATURE_EH_FUNCLETS
-
-    // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
-    if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
+    if (compiler->UsesFunclets())
+    {
+        genSetPSPSym(initReg, &initRegZeroed);
+    }
+    else
     {
-        // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
-        unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE;
+#if defined(FEATURE_EH_WINDOWS_X86)
+        // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
+        if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
+        {
+            // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+            unsigned filterEndOffsetSlotOffs =
+                compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE;
 
-        // Zero out the slot for nesting level 0
-        unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE;
+            // Zero out the slot for nesting level 0
+            unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE;
 
-        if (!initRegZeroed)
-        {
-            instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
-            initRegZeroed = true;
-        }
+            if (!initRegZeroed)
+            {
+                instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+                initRegZeroed = true;
+            }
 
-        GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
-                                  firstSlotOffs);
+            GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
+                                      firstSlotOffs);
+        }
+#endif // FEATURE_EH_WINDOWS_X86
     }
 
-#endif // !FEATURE_EH_FUNCLETS
-
     genReportGenericContextArg(initReg, &initRegZeroed);
 
 #ifdef JIT32_GCENCODER
@@ -6231,6 +6327,25 @@ void CodeGen::genFnProlog()
      * Take care of register arguments first
      */
 
+#ifdef SWIFT_SUPPORT
+    if (compiler->info.compCallConv == CorInfoCallConvExtension::Swift)
+    {
+        if ((compiler->lvaSwiftSelfArg != BAD_VAR_NUM) &&
+            ((intRegState.rsCalleeRegArgMaskLiveIn & RBM_SWIFT_SELF) != 0))
+        {
+            GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SWIFT_SELF, compiler->lvaSwiftSelfArg, 0);
+            intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SWIFT_SELF;
+        }
+
+        if (compiler->lvaSwiftErrorArg != BAD_VAR_NUM)
+        {
+            intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SWIFT_ERROR;
+        }
+
+        genHomeSwiftStructParameters(/* handleStack */ false);
+    }
+#endif
+
     // Home incoming arguments and generate any required inits.
     // OSR handles this by moving the values from the original frame.
     //
@@ -6242,8 +6357,7 @@ void CodeGen::genFnProlog()
         // we've set the live-in regs with values from the Tier0 frame.
         //
         // Otherwise we'll do some of these fetches twice.
-        //
-        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
         genEnregisterOSRArgsAndLocals(initReg, &initRegZeroed);
 #else
@@ -6453,6 +6567,47 @@ void CodeGen::genFnProlog()
 #pragma warning(pop)
 #endif
 
+//----------------------------------------------------------------------------------
+// genEmitJumpTable: emit jump table and return its base offset
+//
+// Arguments:
+//    treeNode     - the GT_JMPTABLE node
+//    relativeAddr - if true, references are treated as 4-byte relative addresses,
+//                   otherwise they are absolute pointers
+//
+// Return Value:
+//    base offset to jump table
+//
+// Assumption:
+//    The current basic block in process ends with a switch statement
+//
+unsigned CodeGen::genEmitJumpTable(GenTree* treeNode, bool relativeAddr)
+{
+    noway_assert(compiler->compCurBB->KindIs(BBJ_SWITCH));
+    assert(treeNode->OperGet() == GT_JMPTABLE);
+
+    emitter*       emit       = GetEmitter();
+    const unsigned jumpCount  = compiler->compCurBB->GetSwitchTargets()->bbsCount;
+    FlowEdge**     jumpTable  = compiler->compCurBB->GetSwitchTargets()->bbsDstTab;
+    const unsigned jmpTabBase = emit->emitBBTableDataGenBeg(jumpCount, relativeAddr);
+
+    JITDUMP("\n      J_M%03u_DS%02u LABEL   DWORD\n", compiler->compMethodID, jmpTabBase);
+
+    for (unsigned i = 0; i < jumpCount; i++)
+    {
+        BasicBlock* target = (*jumpTable)->getDestinationBlock();
+        jumpTable++;
+        noway_assert(target->HasFlag(BBF_HAS_LABEL));
+
+        JITDUMP("            DD      L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum);
+
+        emit->emitDataGenData(i, target);
+    };
+
+    emit->emitDataGenEnd();
+    return jmpTabBase;
+}
+
 //------------------------------------------------------------------------
 // getCallTarget - Get the node that evaluates to the call target
 //
@@ -6547,10 +6702,19 @@ void CodeGen::genDefinePendingCallLabel(GenTreeCall* call)
     // For certain indirect calls we may introduce helper calls before that we need to skip:
     // - CFG may introduce a call to the validator first
     // - Generic virtual methods may compute the target dynamically through a separate helper call
-    if (call->IsHelperCall(compiler, CORINFO_HELP_VALIDATE_INDIRECT_CALL) ||
-        call->IsHelperCall(compiler, CORINFO_HELP_VIRTUAL_FUNC_PTR))
+    // - memset/memcpy helper calls emitted for GT_STORE_BLK
+    if (call->IsHelperCall())
     {
-        return;
+        switch (compiler->eeGetHelperNum(call->gtCallMethHnd))
+        {
+            case CORINFO_HELP_VALIDATE_INDIRECT_CALL:
+            case CORINFO_HELP_VIRTUAL_FUNC_PTR:
+            case CORINFO_HELP_MEMSET:
+            case CORINFO_HELP_MEMCPY:
+                return;
+            default:
+                break;
+        }
     }
 
     genDefineInlineTempLabel(genPendingCallLabel);
@@ -6584,17 +6748,15 @@ void CodeGen::genGeneratePrologsAndEpilogs()
     genFnProlog();
 
     // Generate all the prologs and epilogs.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
-#if defined(FEATURE_EH_FUNCLETS)
-
-    // Capture the data we're going to use in the funclet prolog and epilog generation. This is
-    // information computed during codegen, or during function prolog generation, like
-    // frame offsets. It must run after main function prolog generation.
-
-    genCaptureFuncletPrologEpilogInfo();
+    if (compiler->UsesFunclets())
+    {
+        // Capture the data we're going to use in the funclet prolog and epilog generation. This is
+        // information computed during codegen, or during function prolog generation, like
+        // frame offsets. It must run after main function prolog generation.
 
-#endif // FEATURE_EH_FUNCLETS
+        genCaptureFuncletPrologEpilogInfo();
+    }
 
     // Walk the list of prologs and epilogs and generate them.
     // We maintain a list of prolog and epilog basic blocks in
@@ -6730,13 +6892,11 @@ unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
 unsigned CodeGen::getFirstArgWithStackSlot()
 {
 #if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
-    unsigned baseVarNum = 0;
     // Iterate over all the lvParam variables in the Lcl var table until we find the first one
     // that's passed on the stack.
-    LclVarDsc* varDsc = nullptr;
     for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
     {
-        varDsc = compiler->lvaGetDesc(i);
+        LclVarDsc* varDsc = compiler->lvaGetDesc(i);
 
         // We should have found a stack parameter (and broken out of this loop) before
         // we find any non-parameters.
@@ -6744,13 +6904,12 @@ unsigned CodeGen::getFirstArgWithStackSlot()
 
         if (varDsc->GetArgReg() == REG_STK)
         {
-            baseVarNum = i;
-            break;
+            return i;
         }
     }
-    assert(varDsc != nullptr);
 
-    return baseVarNum;
+    assert(!"Expected to find a parameter passed on the stack");
+    return BAD_VAR_NUM;
 #elif defined(TARGET_AMD64)
     return 0;
 #else  // TARGET_X86
@@ -7355,6 +7514,26 @@ void CodeGen::genReportRichDebugInfoToFile()
 
 #endif
 
+//------------------------------------------------------------------------
+// SuccessfulSibling:
+//   Find the next sibling inline context that was successfully inlined.
+//
+// Parameters:
+//   context - the inline context. Can be nullptr in which case nullptr is returned.
+//
+// Returns:
+//   The sibling, or nullptr if there is no succesful sibling.
+//
+static InlineContext* SuccessfulSibling(InlineContext* context)
+{
+    while ((context != nullptr) && !context->IsSuccess())
+    {
+        context = context->GetSibling();
+    }
+
+    return context;
+}
+
 //------------------------------------------------------------------------
 // genRecordRichDebugInfoInlineTree:
 //   Recursively process a context in the inline tree and record information
@@ -7366,26 +7545,28 @@ void CodeGen::genReportRichDebugInfoToFile()
 //
 void CodeGen::genRecordRichDebugInfoInlineTree(InlineContext* context, ICorDebugInfo::InlineTreeNode* nodes)
 {
-    if (context->IsSuccess())
-    {
-        // We expect 1 + NumInlines unique ordinals
-        assert(context->GetOrdinal() <= compiler->m_inlineStrategy->GetInlineCount());
+    assert(context->IsSuccess());
 
-        ICorDebugInfo::InlineTreeNode* node = &nodes[context->GetOrdinal()];
-        node->Method                        = context->GetCallee();
-        node->ILOffset                      = context->GetActualCallOffset();
-        node->Child                         = context->GetChild() == nullptr ? 0 : context->GetChild()->GetOrdinal();
-        node->Sibling = context->GetSibling() == nullptr ? 0 : context->GetSibling()->GetOrdinal();
-    }
+    // We expect 1 + NumInlines unique ordinals
+    assert(context->GetOrdinal() <= compiler->m_inlineStrategy->GetInlineCount());
 
-    if (context->GetSibling() != nullptr)
+    InlineContext* successfulChild   = SuccessfulSibling(context->GetChild());
+    InlineContext* successfulSibling = SuccessfulSibling(context->GetSibling());
+
+    ICorDebugInfo::InlineTreeNode* node = &nodes[context->GetOrdinal()];
+    node->Method                        = context->GetCallee();
+    node->ILOffset                      = context->GetActualCallOffset();
+    node->Child                         = successfulChild == nullptr ? 0 : successfulChild->GetOrdinal();
+    node->Sibling                       = successfulSibling == nullptr ? 0 : successfulSibling->GetOrdinal();
+
+    if (successfulSibling != nullptr)
     {
-        genRecordRichDebugInfoInlineTree(context->GetSibling(), nodes);
+        genRecordRichDebugInfoInlineTree(successfulSibling, nodes);
     }
 
-    if (context->GetChild() != nullptr)
+    if (successfulChild != nullptr)
     {
-        genRecordRichDebugInfoInlineTree(context->GetChild(), nodes);
+        genRecordRichDebugInfoInlineTree(successfulChild, nodes);
     }
 }
 
@@ -7435,6 +7616,28 @@ void CodeGen::genReportRichDebugInfo()
         mappingIndex++;
     }
 
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Reported inline tree:\n");
+        for (unsigned i = 0; i < numContexts; i++)
+        {
+            printf("  [#%d] %s @ %d, child = %d, sibling = %d\n", i,
+                   compiler->eeGetMethodFullName(inlineTree[i].Method), inlineTree[i].ILOffset, inlineTree[i].Child,
+                   inlineTree[i].Sibling);
+        }
+
+        printf("\nReported rich mappings:\n");
+        for (size_t i = 0; i < mappingIndex; i++)
+        {
+            printf("  [%zu] 0x%x <-> IL %d in #%d\n", i, mappings[i].NativeOffset, mappings[i].ILOffset,
+                   mappings[i].Inlinee);
+        }
+
+        printf("\n");
+    }
+#endif
+
     compiler->info.compCompHnd->reportRichMappings(inlineTree, numContexts, mappings, numRichMappings);
 }
 
@@ -7685,7 +7888,8 @@ void CodeGen::genReturn(GenTree* treeNode)
             {
                 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
                 {
-                    gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
+                    gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv),
+                                           retTypeDesc.GetReturnRegType(i));
                 }
             }
         }
@@ -7702,7 +7906,7 @@ void CodeGen::genReturn(GenTree* treeNode)
             {
                 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
                 {
-                    gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i)));
+                    gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv)));
                 }
             }
         }
@@ -7712,23 +7916,39 @@ void CodeGen::genReturn(GenTree* treeNode)
 #if defined(DEBUG) && defined(TARGET_XARCH)
     bool doStackPointerCheck = compiler->opts.compStackCheckOnRet;
 
-#if defined(FEATURE_EH_FUNCLETS)
-    // Don't do stack pointer check at the return from a funclet; only for the main function.
-    if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
+    if (compiler->UsesFunclets())
     {
-        doStackPointerCheck = false;
+        // Don't do stack pointer check at the return from a funclet; only for the main function.
+        if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
+        {
+            doStackPointerCheck = false;
+        }
     }
-#else  // !FEATURE_EH_FUNCLETS
-    // Don't generate stack checks for x86 finally/filter EH returns: these are not invoked
-    // with the same SP as the main function. See also CodeGen::genEHFinallyOrFilterRet().
-    if (compiler->compCurBB->KindIs(BBJ_EHFINALLYRET, BBJ_EHFAULTRET, BBJ_EHFILTERRET))
+    else
     {
-        doStackPointerCheck = false;
+#if defined(FEATURE_EH_WINDOWS_X86)
+        // Don't generate stack checks for x86 finally/filter EH returns: these are not invoked
+        // with the same SP as the main function. See also CodeGen::genEHFinallyOrFilterRet().
+        if (compiler->compCurBB->KindIs(BBJ_EHFINALLYRET, BBJ_EHFAULTRET, BBJ_EHFILTERRET))
+        {
+            doStackPointerCheck = false;
+        }
+#endif // FEATURE_EH_WINDOWS_X86
     }
-#endif // !FEATURE_EH_FUNCLETS
 
     genStackPointerCheck(doStackPointerCheck, compiler->lvaReturnSpCheck);
 #endif // defined(DEBUG) && defined(TARGET_XARCH)
+
+#ifdef SWIFT_SUPPORT
+    // If this method has a SwiftError* out parameter, load the SwiftError pseudolocal value into the error register.
+    // TODO-CQ: Introduce GenTree node that models returning a normal and Swift error value.
+    if (compiler->lvaSwiftErrorArg != BAD_VAR_NUM)
+    {
+        assert(compiler->info.compCallConv == CorInfoCallConvExtension::Swift);
+        assert(compiler->lvaSwiftErrorLocal != BAD_VAR_NUM);
+        GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SWIFT_ERROR, compiler->lvaSwiftErrorLocal, 0);
+    }
+#endif // SWIFT_SUPPORT
 }
 
 //------------------------------------------------------------------------
@@ -7809,7 +8029,7 @@ void CodeGen::genStructReturn(GenTree* treeNode)
         // On LoongArch64, for a struct like "{ int, double }", "retTypeDesc" will be "{ TYP_INT, TYP_DOUBLE }",
         // i. e. not include the padding for the first field, and so the general loop below won't work.
         var_types type  = retTypeDesc.GetReturnRegType(0);
-        regNumber toReg = retTypeDesc.GetABIReturnReg(0);
+        regNumber toReg = retTypeDesc.GetABIReturnReg(0, compiler->info.compCallConv);
         GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), 0);
         if (regCount > 1)
         {
@@ -7817,15 +8037,35 @@ void CodeGen::genStructReturn(GenTree* treeNode)
             int offset = genTypeSize(type);
             type       = retTypeDesc.GetReturnRegType(1);
             offset     = (int)((unsigned int)offset < genTypeSize(type) ? genTypeSize(type) : offset);
-            toReg      = retTypeDesc.GetABIReturnReg(1);
+            toReg      = retTypeDesc.GetABIReturnReg(1, compiler->info.compCallConv);
             GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
         }
-#else  // !TARGET_LOONGARCH64 && !TARGET_RISCV64
+#else // !TARGET_LOONGARCH64 && !TARGET_RISCV64
+
+#ifdef SWIFT_SUPPORT
+        const uint32_t* offsets = nullptr;
+        if (compiler->info.compCallConv == CorInfoCallConvExtension::Swift)
+        {
+            CORINFO_CLASS_HANDLE          retTypeHnd = compiler->info.compMethodInfo->args.retTypeClass;
+            const CORINFO_SWIFT_LOWERING* lowering   = compiler->GetSwiftLowering(retTypeHnd);
+            assert(!lowering->byReference && (regCount == lowering->numLoweredElements));
+            offsets = lowering->offsets;
+        }
+#endif
+
         int offset = 0;
         for (unsigned i = 0; i < regCount; ++i)
         {
             var_types type  = retTypeDesc.GetReturnRegType(i);
-            regNumber toReg = retTypeDesc.GetABIReturnReg(i);
+            regNumber toReg = retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv);
+
+#ifdef SWIFT_SUPPORT
+            if (offsets != nullptr)
+            {
+                offset = offsets[i];
+            }
+#endif
+
             GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
             offset += genTypeSize(type);
         }
@@ -7836,7 +8076,7 @@ void CodeGen::genStructReturn(GenTree* treeNode)
         for (unsigned i = 0; i < regCount; ++i)
         {
             var_types type    = retTypeDesc.GetReturnRegType(i);
-            regNumber toReg   = retTypeDesc.GetABIReturnReg(i);
+            regNumber toReg   = retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv);
             regNumber fromReg = op1->GetRegByIndex(i);
             if ((fromReg == REG_NA) && op1->OperIs(GT_COPY))
             {
@@ -7901,7 +8141,7 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode)
 
     unsigned   lclNum = lclNode->GetLclNum();
     LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum);
-    if (op1->OperIs(GT_CALL))
+    if (actualOp1->OperIs(GT_CALL))
     {
         assert(regCount <= MAX_RET_REG_COUNT);
         noway_assert(varDsc->lvIsMultiRegRet);
@@ -7960,6 +8200,16 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode)
         assert(regCount == varDsc->lvFieldCnt);
     }
 
+#ifdef SWIFT_SUPPORT
+    const uint32_t* offsets = nullptr;
+    if (actualOp1->IsCall() && (actualOp1->AsCall()->GetUnmanagedCallConv() == CorInfoCallConvExtension::Swift))
+    {
+        const CORINFO_SWIFT_LOWERING* lowering = compiler->GetSwiftLowering(actualOp1->AsCall()->gtRetClsHnd);
+        assert(!lowering->byReference && (regCount == lowering->numLoweredElements));
+        offsets = lowering->offsets;
+    }
+#endif
+
     for (unsigned i = 0; i < regCount; ++i)
     {
         regNumber reg     = genConsumeReg(op1, i);
@@ -8002,6 +8252,12 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode)
 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
             // should consider the padding field within a struct.
             offset = (offset % genTypeSize(srcType)) ? AlignUp(offset, genTypeSize(srcType)) : offset;
+#endif
+#ifdef SWIFT_SUPPORT
+            if (offsets != nullptr)
+            {
+                offset = offsets[i];
+            }
 #endif
             // Several fields could be passed in one register, copy using the register type.
             // It could rewrite memory outside of the fields but local on the stack are rounded to POINTER_SIZE so
@@ -8334,7 +8590,6 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn)
         if ((size / TARGET_POINTER_SIZE) > 16)
         {
             // This will require more than 16 instructions, switch to rep stosd/memset call.
-            CLANG_FORMAT_COMMENT_ANCHOR;
 #if defined(TARGET_XARCH)
             GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_EDI, (int)varNum, 0);
             assert(size % 4 == 0);
@@ -8351,7 +8606,9 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn)
             GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_0, (int)varNum, 0);
             instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_1, static_cast<char>(poisonVal));
             instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_ARG_2, size);
-            genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
+
+            // Call non-managed memset
+            genEmitHelperCall(CORINFO_HELP_NATIVE_MEMSET, 0, EA_UNKNOWN);
             // May kill REG_SCRATCH, so we need to reload it.
             hasPoisonImm = false;
 #endif
@@ -8369,7 +8626,7 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn)
             bool fpBased;
             int  addr = compiler->lvaFrameAddress((int)varNum, &fpBased);
 #else
-            int addr     = 0;
+            int addr = 0;
 #endif
             int end = addr + (int)size;
             for (int offs = addr; offs < end;)
@@ -8515,3 +8772,31 @@ void CodeGen::genCodeForReuseVal(GenTree* treeNode)
         genDefineTempLabel(genCreateTempLabel());
     }
 }
+
+#ifdef SWIFT_SUPPORT
+//---------------------------------------------------------------------
+// genCodeForSwiftErrorReg - generate code for a GT_SWIFT_ERROR node
+//
+// Arguments
+//    tree - the GT_SWIFT_ERROR node
+//
+// Return value:
+//    None
+//
+void CodeGen::genCodeForSwiftErrorReg(GenTree* tree)
+{
+    assert(tree->OperIs(GT_SWIFT_ERROR));
+
+    var_types targetType = tree->TypeGet();
+    regNumber targetReg  = tree->GetRegNum();
+
+    // LSRA should have picked REG_SWIFT_ERROR as the destination register, too
+    // (see LinearScan::BuildNode for an explanation of why we want this)
+    assert(targetReg == REG_SWIFT_ERROR);
+
+    inst_Mov(targetType, targetReg, REG_SWIFT_ERROR, /* canSkip */ true);
+    genTransferRegGCState(targetReg, REG_SWIFT_ERROR);
+
+    genProduceReg(tree);
+}
+#endif // SWIFT_SUPPORT
diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h
index f45713a3f1d8..c47f505afc8f 100644
--- a/src/coreclr/jit/codegeninterface.h
+++ b/src/coreclr/jit/codegeninterface.h
@@ -154,11 +154,6 @@ class CodeGenInterface
     void genUpdateVarReg(LclVarDsc* varDsc, GenTree* tree);
 
 protected:
-#ifdef DEBUG
-    VARSET_TP genTempOldLife;
-    bool      genTempLiveChg;
-#endif
-
     VARSET_TP genLastLiveSet;  // A one element map (genLastLiveSet-> genLastLiveMask)
     regMaskTP genLastLiveMask; // these two are used in genLiveMask
 
@@ -171,8 +166,8 @@ class CodeGenInterface
     TreeLifeUpdater<true>* treeLifeUpdater;
 
 public:
-    bool genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf);
-    bool genUseOptimizedWriteBarriers(GenTreeStoreInd* store);
+    bool            genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf);
+    bool            genUseOptimizedWriteBarriers(GenTreeStoreInd* store);
     CorInfoHelpFunc genWriteBarrierHelperForWriteBarrierForm(GCInfo::WriteBarrierForm wbf);
 
 #ifdef DEBUG
@@ -448,7 +443,8 @@ class CodeGenInterface
     {
         siVarLocType vlType;
 
-        union {
+        union
+        {
             // VLT_REG/VLT_REG_FP -- Any pointer-sized enregistered value (TYP_INT, TYP_REF, etc)
             // eg. EAX
             // VLT_REG_BYREF -- the specified register contains the address of the variable
@@ -633,7 +629,9 @@ class CodeGenInterface
             VariableLiveRange(CodeGenInterface::siVarLoc varLocation,
                               emitLocation               startEmitLocation,
                               emitLocation               endEmitLocation)
-                : m_StartEmitLocation(startEmitLocation), m_EndEmitLocation(endEmitLocation), m_VarLocation(varLocation)
+                : m_StartEmitLocation(startEmitLocation)
+                , m_EndEmitLocation(endEmitLocation)
+                , m_VarLocation(varLocation)
             {
             }
 
@@ -681,7 +679,8 @@ class CodeGenInterface
 
         public:
             LiveRangeDumper(const LiveRangeList* liveRanges)
-                : m_startingLiveRange(liveRanges->end()), m_hasLiveRangesToDump(false){};
+                : m_startingLiveRange(liveRanges->end())
+                , m_hasLiveRangesToDump(false){};
 
             // Make the dumper point to the last "VariableLiveRange" opened or nullptr if all are closed
             void resetDumper(const LiveRangeList* list);
@@ -762,7 +761,7 @@ class CodeGenInterface
 
         LiveRangeList* getLiveRangesForVarForBody(unsigned int varNum) const;
         LiveRangeList* getLiveRangesForVarForProlog(unsigned int varNum) const;
-        size_t getLiveRangesCount() const;
+        size_t         getLiveRangesCount() const;
 
         // For parameters locations on prolog
         void psiStartVariableLiveRange(CodeGenInterface::siVarLoc varLocation, unsigned int varNum);
diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
index 913f3a47002a..351ca1494283 100644
--- a/src/coreclr/jit/codegenlinear.cpp
+++ b/src/coreclr/jit/codegenlinear.cpp
@@ -157,7 +157,8 @@ void CodeGen::genCodeForBBlist()
     genMarkLabelsForCodegen();
 
     assert(!compiler->fgFirstBBScratch ||
-           compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+           compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch
+                                                               // has to be first.
 
     /* Initialize structures used in the block list iteration */
     genInitialize();
@@ -375,17 +376,26 @@ void CodeGen::genCodeForBBlist()
 
         bool firstMapping = true;
 
-#if defined(FEATURE_EH_FUNCLETS)
         if (block->HasFlag(BBF_FUNCLET_BEG))
         {
+            assert(compiler->UsesFunclets());
             genReserveFuncletProlog(block);
         }
-#endif // FEATURE_EH_FUNCLETS
 
         // Clear compCurStmt and compCurLifeTree.
         compiler->compCurStmt     = nullptr;
         compiler->compCurLifeTree = nullptr;
 
+#ifdef SWIFT_SUPPORT
+        // Reassemble Swift struct parameters on the local stack frame in the
+        // scratch BB right after the prolog. There can be arbitrary amounts of
+        // codegen related to doing this, so it cannot be done in the prolog.
+        if (compiler->fgBBisScratch(block) && compiler->lvaHasAnySwiftStackParamToReassemble())
+        {
+            genHomeSwiftStructParameters(/* handleStack */ true);
+        }
+#endif
+
         // Emit poisoning into scratch BB that comes right after prolog.
         // We cannot emit this code in the prolog as it might make the prolog too large.
         if (compiler->compShouldPoisonFrame() && compiler->fgBBisScratch(block))
@@ -395,7 +405,6 @@ void CodeGen::genCodeForBBlist()
 
         // Traverse the block in linear order, generating code for each node as we
         // as we encounter it.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
         // Set the use-order numbers for each node.
@@ -622,7 +631,7 @@ void CodeGen::genCodeForBBlist()
                     case BBJ_THROW:
                     case BBJ_CALLFINALLY:
                     case BBJ_EHCATCHRET:
-                    // We're going to generate more code below anyway, so no need for the NOP.
+                        // We're going to generate more code below anyway, so no need for the NOP.
 
                     case BBJ_RETURN:
                     case BBJ_EHFINALLYRET:
@@ -633,7 +642,7 @@ void CodeGen::genCodeForBBlist()
 
                     case BBJ_COND:
                     case BBJ_SWITCH:
-                    // These can't have a call as the last instruction!
+                        // These can't have a call as the last instruction!
 
                     default:
                         noway_assert(!"Unexpected bbKind");
@@ -712,7 +721,9 @@ void CodeGen::genCodeForBBlist()
 
                     if ((call != nullptr) && (call->gtOper == GT_CALL))
                     {
-                        if ((call->AsCall()->gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN) != 0)
+                        if ((call->AsCall()->gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN) != 0 ||
+                            ((call->AsCall()->gtCallType == CT_HELPER) &&
+                             Compiler::s_helperCallProperties.AlwaysThrow(call->AsCall()->GetHelperNum())))
                         {
                             instGen(INS_BREAKPOINT); // This should never get executed
                         }
@@ -725,37 +736,45 @@ void CodeGen::genCodeForBBlist()
                 block = genCallFinally(block);
                 break;
 
-#if defined(FEATURE_EH_FUNCLETS)
-
             case BBJ_EHCATCHRET:
+                assert(compiler->UsesFunclets());
                 genEHCatchRet(block);
                 FALLTHROUGH;
 
             case BBJ_EHFINALLYRET:
             case BBJ_EHFAULTRET:
             case BBJ_EHFILTERRET:
-                genReserveFuncletEpilog(block);
-                break;
-
-#else // !FEATURE_EH_FUNCLETS
-
-            case BBJ_EHCATCHRET:
-                noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86
-                break;
-
-            case BBJ_EHFINALLYRET:
-            case BBJ_EHFAULTRET:
-            case BBJ_EHFILTERRET:
-                genEHFinallyOrFilterRet(block);
+                if (compiler->UsesFunclets())
+                {
+                    genReserveFuncletEpilog(block);
+                }
+#if defined(FEATURE_EH_WINDOWS_X86)
+                else
+                {
+                    genEHFinallyOrFilterRet(block);
+                }
+#endif // FEATURE_EH_WINDOWS_X86
                 break;
 
-#endif // !FEATURE_EH_FUNCLETS
-
             case BBJ_SWITCH:
                 break;
 
             case BBJ_ALWAYS:
             {
+                GenTree* call = block->lastNode();
+                if ((call != nullptr) && (call->gtOper == GT_CALL))
+                {
+                    if ((call->AsCall()->gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN) != 0 ||
+                        ((call->AsCall()->gtCallType == CT_HELPER) &&
+                         Compiler::s_helperCallProperties.AlwaysThrow(call->AsCall()->GetHelperNum())))
+                    {
+                        // NOTE: We should probably never see a BBJ_ALWAYS block ending with a throw in a first place.
+                        //       If that is fixed, this condition can be just an assert.
+                        //       For the reasons why we insert a BP, see the similar code in "case BBJ_THROW:" above.
+                        instGen(INS_BREAKPOINT); // This should never get executed
+                    }
+                }
+
                 // If this block jumps to the next one, we might be able to skip emitting the jump
                 if (block->CanRemoveJumpToNext(compiler))
                 {
@@ -812,9 +831,7 @@ void CodeGen::genCodeForBBlist()
 
             assert(ShouldAlignLoops());
             assert(!block->isBBCallFinallyPairTail());
-#if FEATURE_EH_CALLFINALLY_THUNKS
             assert(!block->KindIs(BBJ_CALLFINALLY));
-#endif // FEATURE_EH_CALLFINALLY_THUNKS
 
             GetEmitter()->emitLoopAlignment(DEBUG_ARG1(block->KindIs(BBJ_ALWAYS) && !removedJmp));
         }
@@ -840,7 +857,7 @@ void CodeGen::genCodeForBBlist()
 #endif // DEBUG
     }  //------------------ END-FOR each block of the method -------------------
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
     // If this is a synchronized method on x86, and we generated all the code without
     // generating the "exit monitor" call, then we must have deleted the single return block
     // with that call because it was dead code. We still need to report the monitor range
@@ -850,14 +867,15 @@ void CodeGen::genCodeForBBlist()
     // Do this before cleaning the GC refs below; we don't want to create an IG that clears
     // the `this` pointer for lvaKeepAliveAndReportThis.
 
-    if ((compiler->info.compFlags & CORINFO_FLG_SYNCH) && (compiler->syncEndEmitCookie == nullptr))
+    if (!compiler->UsesFunclets() && (compiler->info.compFlags & CORINFO_FLG_SYNCH) &&
+        (compiler->syncEndEmitCookie == nullptr))
     {
         JITDUMP("Synchronized method with missing exit monitor call; adding final label\n");
         compiler->syncEndEmitCookie =
             GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
         noway_assert(compiler->syncEndEmitCookie != nullptr);
     }
-#endif // !FEATURE_EH_FUNCLETS
+#endif
 
     // There could be variables alive at this point. For example see lvaKeepAliveAndReportThis.
     // This call is for cleaning the GC refs
@@ -1012,45 +1030,6 @@ void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTree* tree)
     varDsc->SetRegNum(tree->GetRegNum());
 }
 
-//------------------------------------------------------------------------
-// sameRegAsDst: Return the child that has the same reg as the dst (if any)
-//
-// Arguments:
-//    tree  - the node of interest
-//    other - an out parameter to return the other child
-//
-// Notes:
-//    If 'tree' has a child with the same assigned register as its target reg,
-//    that child will be returned, and 'other' will contain the non-matching child.
-//    Otherwise, both other and the return value will be nullptr.
-//
-GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
-{
-    if (tree->GetRegNum() == REG_NA)
-    {
-        other = nullptr;
-        return nullptr;
-    }
-
-    GenTree* op1 = tree->AsOp()->gtOp1;
-    GenTree* op2 = tree->AsOp()->gtOp2;
-    if (op1->GetRegNum() == tree->GetRegNum())
-    {
-        other = op2;
-        return op1;
-    }
-    if (op2->GetRegNum() == tree->GetRegNum())
-    {
-        other = op1;
-        return op2;
-    }
-    else
-    {
-        other = nullptr;
-        return nullptr;
-    }
-}
-
 //------------------------------------------------------------------------
 // genUnspillLocal: Reload a register candidate local into a register, if needed.
 //
@@ -1802,7 +1781,6 @@ void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode,
 
     // If the op1 is already in the dstReg - nothing to do.
     // Otherwise load the op1 (the address) into the dstReg to copy the struct on the stack by value.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef TARGET_X86
     assert(dstReg != REG_SPBASE);
@@ -1891,8 +1869,8 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg
         var_types type            = use.GetType();
         unsigned  thisFieldOffset = argOffset + use.GetOffset();
 
-// Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
-// argument area.
+        // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
+        // argument area.
 
 #if defined(FEATURE_SIMD)
         if (type == TYP_SIMD12)
@@ -1935,18 +1913,9 @@ void CodeGen::genSetBlockSize(GenTreeBlk* blkNode, regNumber sizeReg)
 {
     if (sizeReg != REG_NA)
     {
-        unsigned blockSize = blkNode->Size();
-        if (!blkNode->OperIs(GT_STORE_DYN_BLK))
-        {
-            assert((blkNode->gtRsvdRegs & genRegMask(sizeReg)) != 0);
-            // This can go via helper which takes the size as a native uint.
-            instGen_Set_Reg_To_Imm(EA_PTRSIZE, sizeReg, blockSize);
-        }
-        else
-        {
-            GenTree* sizeNode = blkNode->AsStoreDynBlk()->gtDynamicSize;
-            inst_Mov(sizeNode->TypeGet(), sizeReg, sizeNode->GetRegNum(), /* canSkip */ true);
-        }
+        assert((blkNode->gtRsvdRegs & genRegMask(sizeReg)) != 0);
+        // This can go via helper which takes the size as a native uint.
+        instGen_Set_Reg_To_Imm(EA_PTRSIZE, sizeReg, blkNode->Size());
     }
 }
 
@@ -2052,12 +2021,6 @@ void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber
     genConsumeReg(dstAddr);
     // The source may be a local or in a register; 'genConsumeBlockSrc' will check that.
     genConsumeBlockSrc(blkNode);
-    // 'genSetBlockSize' (called below) will ensure that a register has been reserved as needed
-    // in the case where the size is a constant (i.e. it is not GT_STORE_DYN_BLK).
-    if (blkNode->OperGet() == GT_STORE_DYN_BLK)
-    {
-        genConsumeReg(blkNode->AsStoreDynBlk()->gtDynamicSize);
-    }
 
     // Next, perform any necessary moves.
     genCopyRegIfNeeded(dstAddr, dstReg);
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 41266917205a..94329e348610 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -1225,9 +1225,9 @@ void CodeGen::genFnEpilog(BasicBlock* block)
 #if !FEATURE_FASTTAILCALL
         noway_assert(jmpNode->gtOper == GT_JMP);
 #else  // FEATURE_FASTTAILCALL
-        // armarch
-        // If jmpNode is GT_JMP then gtNext must be null.
-        // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
+       // armarch
+       // If jmpNode is GT_JMP then gtNext must be null.
+       // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
         noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
 
         // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
@@ -1307,7 +1307,6 @@ void CodeGen::genFnEpilog(BasicBlock* block)
                                        0,             // disp
                                        true);         // isJump
             // clang-format on
-            CLANG_FORMAT_COMMENT_ANCHOR;
         }
 #if FEATURE_FASTTAILCALL
         else
@@ -1573,9 +1572,9 @@ void CodeGen::genEHCatchRet(BasicBlock* block)
 }
 
 //  move an immediate value into an integer register
-void CodeGen::instGen_Set_Reg_To_Imm(emitAttr  size,
-                                     regNumber reg,
-                                     ssize_t   imm,
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr       size,
+                                     regNumber      reg,
+                                     ssize_t        imm,
                                      insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
 {
     emitter* emit = GetEmitter();
@@ -2418,7 +2417,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
         // Floating point divide never raises an exception
         assert(varTypeIsFloating(tree->gtOp1));
         assert(varTypeIsFloating(tree->gtOp2));
-        assert(tree->gtOper == GT_DIV);
+        assert(tree->OperIs(GT_DIV));
 
         instruction ins = genGetInsForOper(tree);
         emit->emitIns_R_R_R(ins, emitActualTypeSize(targetType), tree->GetRegNum(), tree->gtOp1->GetRegNum(),
@@ -2480,7 +2479,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
         }
 
         // check (MinInt / -1) => ArithmeticException
-        if (tree->gtOper == GT_DIV || tree->gtOper == GT_MOD)
+        if (tree->OperIs(GT_DIV, GT_MOD))
         {
             if ((exSetFlags & ExceptionSetFlags::ArithmeticException) != ExceptionSetFlags::None)
             {
@@ -2514,55 +2513,20 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
             // Generate the sdiv instruction
             if (size == EA_4BYTE)
             {
-                if (tree->OperGet() == GT_DIV)
-                {
-                    ins = INS_div_w;
-                }
-                else
-                {
-                    ins = INS_mod_w;
-                }
+                ins = tree->OperIs(GT_DIV) ? INS_div_w : INS_mod_w;
             }
             else
             {
-                if (tree->OperGet() == GT_DIV)
-                {
-                    ins = INS_div_d;
-                }
-                else
-                {
-                    ins = INS_mod_d;
-                }
+                ins = tree->OperIs(GT_DIV) ? INS_div_d : INS_mod_d;
             }
 
             emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg);
         }
-        else // if (tree->gtOper == GT_UDIV) GT_UMOD
+        else // tree->OperIs(GT_UDIV, GT_UMOD)
         {
-            // Only one possible exception
-            //     (AnyVal /  0) => DivideByZeroException
-            //
-            // Note that division by the constant 0 was already checked for above by the
-            // op2->IsIntegralConst(0) check
-            //
-
-            if (!divisorOp->IsCnsIntOrI())
-            {
-                // divisorOp is not a constant, so it could be zero
-                //
-                genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg);
-            }
-
             if (size == EA_4BYTE)
             {
-                if (tree->OperGet() == GT_UDIV)
-                {
-                    ins = INS_div_wu;
-                }
-                else
-                {
-                    ins = INS_mod_wu;
-                }
+                ins = tree->OperIs(GT_UDIV) ? INS_div_wu : INS_mod_wu;
 
                 // TODO-LOONGARCH64: here is just for signed-extension ?
                 emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, Reg1, Reg1, 0);
@@ -2570,14 +2534,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
             }
             else
             {
-                if (tree->OperGet() == GT_UDIV)
-                {
-                    ins = INS_div_du;
-                }
-                else
-                {
-                    ins = INS_mod_du;
-                }
+                ins = tree->OperIs(GT_UDIV) ? INS_div_du : INS_mod_du;
             }
 
             emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg);
@@ -2927,32 +2884,7 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode)
 // emits the table and an instruction to get the address of the first element
 void CodeGen::genJumpTable(GenTree* treeNode)
 {
-    noway_assert(compiler->compCurBB->KindIs(BBJ_SWITCH));
-    assert(treeNode->OperGet() == GT_JMPTABLE);
-
-    unsigned     jumpCount = compiler->compCurBB->GetSwitchTargets()->bbsCount;
-    BasicBlock** jumpTable = compiler->compCurBB->GetSwitchTargets()->bbsDstTab;
-    unsigned     jmpTabOffs;
-    unsigned     jmpTabBase;
-
-    jmpTabBase = GetEmitter()->emitBBTableDataGenBeg(jumpCount, true);
-
-    jmpTabOffs = 0;
-
-    JITDUMP("\n      J_M%03u_DS%02u LABEL   DWORD\n", compiler->compMethodID, jmpTabBase);
-
-    for (unsigned i = 0; i < jumpCount; i++)
-    {
-        BasicBlock* target = *jumpTable++;
-        noway_assert(target->HasFlag(BBF_HAS_LABEL));
-
-        JITDUMP("            DD      L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum);
-
-        GetEmitter()->emitDataGenData(i, target);
-    };
-
-    GetEmitter()->emitDataGenEnd();
-
+    unsigned jmpTabBase = genEmitJumpTable(treeNode, true);
     // Access to inline data is 'abstracted' by a special type of static member
     // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
     // to constant data, not a real static field.
@@ -3401,7 +3333,7 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
                                callTarget,                                                    /* ireg */
                                REG_NA, 0, 0,                                                  /* xreg, xmul, disp */
                                false                                                          /* isJump */
-                               );
+    );
 
     regMaskTP killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
     regSet.verifyRegistersUsed(killMask);
@@ -4465,7 +4397,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
                                callTarget,                           /* ireg */
                                REG_NA, 0, 0,                         /* xreg, xmul, disp */
                                false                                 /* isJump */
-                               );
+    );
 
     regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
     regSet.verifyRegistersUsed(killMask);
@@ -5008,7 +4940,8 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             break;
 
         case GT_PINVOKE_PROLOG:
-            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) &
+                          ~fullIntArgRegMask(compiler->info.compCallConv)) == 0);
 
 // the runtime side requires the codegen here to be consistent
 #ifdef PSEUDORANDOM_NOP_INSERTION
@@ -5021,7 +4954,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             emit->emitIns_R_L(INS_ld_d, EA_PTRSIZE, genPendingCallLabel, targetReg);
             break;
 
-        case GT_STORE_DYN_BLK:
         case GT_STORE_BLK:
             genCodeForStoreBlk(treeNode->AsBlk());
             break;
@@ -5316,7 +5248,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
 
                 // addrNode can either be a GT_LCL_ADDR<0> or an address expression
                 //
-                if (addrNode->IsLclVarAddr())
+                if (addrNode->isContained() && addrNode->IsLclVarAddr())
                 {
                     // We have a GT_BLK(GT_LCL_ADDR<0>)
                     //
@@ -5589,7 +5521,7 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode)
 
         // addrNode can either be a GT_LCL_ADDR<0> or an address expression
         //
-        if (addrNode->IsLclVarAddr())
+        if (addrNode->isContained() && addrNode->IsLclVarAddr())
         {
             // We have a GT_BLK(GT_LCL_ADDR<0>)
             //
@@ -6130,37 +6062,6 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)
     genProduceReg(tree);
 }
 
-//----------------------------------------------------------------------------------
-// genCodeForCpBlkHelper - Generate code for a CpBlk node by the means of the VM memcpy helper call
-//
-// Arguments:
-//    cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK]
-//
-// Preconditions:
-//   The register assignments have been set appropriately.
-//   This is validated by genConsumeBlockOp().
-//
-void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode)
-{
-    // Destination address goes in arg0, source address goes in arg1, and size goes in arg2.
-    // genConsumeBlockOp takes care of this for us.
-    genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
-    if (cpBlkNode->IsVolatile())
-    {
-        // issue a full memory barrier before a volatile CpBlk operation
-        instGen_MemoryBarrier();
-    }
-
-    genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
-
-    if (cpBlkNode->IsVolatile())
-    {
-        // issue a INS_BARRIER_RMB after a volatile CpBlk operation
-        instGen_MemoryBarrier(BARRIER_FULL);
-    }
-}
-
 //----------------------------------------------------------------------------------
 // genCodeForCpBlkUnroll: Generates CpBlk code by performing a loop unroll
 //
@@ -6343,31 +6244,6 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
     }
 }
 
-//------------------------------------------------------------------------
-// genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call
-//
-// Arguments:
-//    initBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK]
-//
-// Preconditions:
-//   The register assignments have been set appropriately.
-//   This is validated by genConsumeBlockOp().
-//
-void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode)
-{
-    // Size goes in arg2, source address goes in arg1, and size goes in arg2.
-    // genConsumeBlockOp takes care of this for us.
-    genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
-    if (initBlkNode->IsVolatile())
-    {
-        // issue a full memory barrier before a volatile initBlock Operation
-        instGen_MemoryBarrier();
-    }
-
-    genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
-}
-
 //------------------------------------------------------------------------
 // genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop.
 //    It's needed for cases when size is too big to unroll and we're not allowed
@@ -6416,27 +6292,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode)
     }
 }
 
-// Generate code for a load from some address + offset
-//   base: tree node which can be either a local address or arbitrary node
-//   offset: distance from the base from which to load
-void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset)
-{
-    emitter* emit = GetEmitter();
-
-    if (base->OperIs(GT_LCL_ADDR))
-    {
-        if (base->gtOper == GT_LCL_ADDR)
-        {
-            offset += base->AsLclFld()->GetLclOffs();
-        }
-        emit->emitIns_R_S(ins, size, dst, base->AsLclVarCommon()->GetLclNum(), offset);
-    }
-    else
-    {
-        emit->emitIns_R_R_I(ins, size, dst, base->GetRegNum(), offset);
-    }
-}
-
 //------------------------------------------------------------------------
 // genCall: Produce code for a GT_CALL node
 //
@@ -6581,7 +6436,7 @@ void CodeGen::genCall(GenTreeCall* call)
             for (unsigned i = 0; i < regCount; ++i)
             {
                 var_types regType      = pRetTypeDesc->GetReturnRegType(i);
-                returnReg              = pRetTypeDesc->GetABIReturnReg(i);
+                returnReg              = pRetTypeDesc->GetABIReturnReg(i, call->GetUnmanagedCallConv());
                 regNumber allocatedReg = call->GetRegNumByIdx(i);
                 inst_Mov(regType, allocatedReg, returnReg, /* canSkip */ true);
             }
@@ -6792,19 +6647,19 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
             else
 #endif // FEATURE_READYTORUN
                 if (call->gtCallType == CT_HELPER)
-            {
-                CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
-                noway_assert(helperNum != CORINFO_HELP_UNDEF);
+                {
+                    CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
+                    noway_assert(helperNum != CORINFO_HELP_UNDEF);
 
-                void* pAddr = nullptr;
-                addr        = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
-                assert(pAddr == nullptr);
-            }
-            else
-            {
-                // Direct call to a non-virtual user function.
-                addr = call->gtDirectCallAddress;
-            }
+                    void* pAddr = nullptr;
+                    addr        = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+                    assert(pAddr == nullptr);
+                }
+                else
+                {
+                    // Direct call to a non-virtual user function.
+                    addr = call->gtDirectCallAddress;
+                }
 
             assert(addr != nullptr);
 
@@ -7237,8 +7092,8 @@ void CodeGen::genFloatToFloatCast(GenTree* treeNode)
 //------------------------------------------------------------------------
 // genCreateAndStoreGCInfo: Create and record GC Info for the function.
 //
-void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
-                                      unsigned prologSize,
+void CodeGen::genCreateAndStoreGCInfo(unsigned            codeSize,
+                                      unsigned            prologSize,
                                       unsigned epilogSize DEBUGARG(void* codePtr))
 {
     IAllocator*    allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC());
@@ -7304,14 +7159,14 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
 }
 
 //------------------------------------------------------------------------
-// genCodeForStoreBlk: Produce code for a GT_STORE_DYN_BLK/GT_STORE_BLK node.
+// genCodeForStoreBlk: Produce code for a GT_STORE_BLK node.
 //
 // Arguments:
 //    tree - the node
 //
 void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
 {
-    assert(blkOp->OperIs(GT_STORE_DYN_BLK, GT_STORE_BLK));
+    assert(blkOp->OperIs(GT_STORE_BLK));
 
     if (blkOp->gtBlkOpGcUnsafe)
     {
@@ -7332,17 +7187,6 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
             genCodeForInitBlkLoop(blkOp);
             break;
 
-        case GenTreeBlk::BlkOpKindHelper:
-            if (isCopyBlk)
-            {
-                genCodeForCpBlkHelper(blkOp);
-            }
-            else
-            {
-                genCodeForInitBlkHelper(blkOp);
-            }
-            break;
-
         case GenTreeBlk::BlkOpKindUnroll:
             if (isCopyBlk)
             {
@@ -7770,7 +7614,7 @@ inline void CodeGen::genJumpToThrowHlpBlk_la(
                            callTarget,                                                    /* ireg */
                            REG_NA, 0, 0,                                                  /* xreg, xmul, disp */
                            false                                                          /* isJump */
-                           );
+        );
 
         regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)(compiler->acdHelper(codeKind)));
         regSet.verifyRegistersUsed(killMask);
@@ -7869,7 +7713,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
 {
     assert(compiler->compGeneratingProlog);
 
-    regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+    regMaskTP rsPushRegs = regSet.rsGetModifiedCalleeSavedRegsMask();
 
 #if ETW_EBP_FRAMED
     if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
@@ -8034,7 +7878,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
 {
     assert(compiler->compGeneratingEpilog);
 
-    regMaskTP regsToRestoreMask = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+    regMaskTP regsToRestoreMask = regSet.rsGetModifiedCalleeSavedRegsMask();
 
     assert(isFramePointerUsed());
 
@@ -8707,7 +8551,11 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
 
     genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
 
-    if ((genRegMask(initReg) & RBM_PROFILER_ENTER_TRASH) != RBM_NONE)
+    // If initReg is trashed, either because it was an arg to the enter
+    // callback, or because the enter callback itself trashes it, then it needs
+    // to be zero'ed again before using.
+    if (((RBM_PROFILER_ENTER_TRASH | RBM_PROFILER_ENTER_ARG_FUNC_ID | RBM_PROFILER_ENTER_ARG_CALLER_SP) &
+         genRegMask(initReg)) != RBM_NONE)
     {
         *pInitRegZeroed = false;
     }
diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp
index a468c026c22c..0df6f56c5b76 100644
--- a/src/coreclr/jit/codegenriscv64.cpp
+++ b/src/coreclr/jit/codegenriscv64.cpp
@@ -80,6 +80,9 @@ bool CodeGen::genInstrWithConstant(instruction ins,
         case INS_flw:
         case INS_ld:
         case INS_fld:
+        case INS_lbu:
+        case INS_lhu:
+        case INS_lwu:
             break;
 
         default:
@@ -1209,9 +1212,9 @@ void CodeGen::genFnEpilog(BasicBlock* block)
 #if !FEATURE_FASTTAILCALL
         noway_assert(jmpNode->gtOper == GT_JMP);
 #else  // FEATURE_FASTTAILCALL
-        // armarch
-        // If jmpNode is GT_JMP then gtNext must be null.
-        // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
+       // armarch
+       // If jmpNode is GT_JMP then gtNext must be null.
+       // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
         noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
 
         // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
@@ -1291,7 +1294,6 @@ void CodeGen::genFnEpilog(BasicBlock* block)
                                        0,             // disp
                                        true);         // isJump
             // clang-format on
-            CLANG_FORMAT_COMMENT_ANCHOR;
         }
 #if FEATURE_FASTTAILCALL
         else
@@ -1531,9 +1533,9 @@ void CodeGen::genEHCatchRet(BasicBlock* block)
 }
 
 //  move an immediate value into an integer register
-void CodeGen::instGen_Set_Reg_To_Imm(emitAttr  size,
-                                     regNumber reg,
-                                     ssize_t   imm,
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr       size,
+                                     regNumber      reg,
+                                     ssize_t        imm,
                                      insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
 {
     emitter* emit = GetEmitter();
@@ -2482,18 +2484,6 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
         }
         else // if (tree->OperIs(GT_UDIV, GT_UMOD))
         {
-            // Only one possible exception
-            //     (AnyVal /  0) => DivideByZeroException
-            //
-            // Note that division by the constant 0 was already checked for above by the
-            // op2->IsIntegralConst(0) check
-
-            if (!divisorOp->IsCnsIntOrI())
-            {
-                // divisorOp is not a constant, so it could be zero
-                genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg);
-            }
-
             if (tree->OperIs(GT_UDIV))
             {
                 ins = is4 ? INS_divuw : INS_divu;
@@ -2511,7 +2501,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
 // Generate code for InitBlk by performing a loop unroll
 // Preconditions:
 //   a) Both the size and fill byte value are integer constants.
-//   b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
+//   b) The size of the struct to initialize is smaller than getUnrollThreshold() bytes.
 void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
 {
     assert(node->OperIs(GT_STORE_BLK));
@@ -2849,32 +2839,7 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode)
 // emits the table and an instruction to get the address of the first element
 void CodeGen::genJumpTable(GenTree* treeNode)
 {
-    noway_assert(compiler->compCurBB->KindIs(BBJ_SWITCH));
-    assert(treeNode->OperGet() == GT_JMPTABLE);
-
-    unsigned     jumpCount = compiler->compCurBB->GetSwitchTargets()->bbsCount;
-    BasicBlock** jumpTable = compiler->compCurBB->GetSwitchTargets()->bbsDstTab;
-    unsigned     jmpTabOffs;
-    unsigned     jmpTabBase;
-
-    jmpTabBase = GetEmitter()->emitBBTableDataGenBeg(jumpCount, true);
-
-    jmpTabOffs = 0;
-
-    JITDUMP("\n      J_M%03u_DS%02u LABEL   DWORD\n", compiler->compMethodID, jmpTabBase);
-
-    for (unsigned i = 0; i < jumpCount; i++)
-    {
-        BasicBlock* target = *jumpTable++;
-        noway_assert(target->HasFlag(BBF_HAS_LABEL));
-
-        JITDUMP("            DD      L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum);
-
-        GetEmitter()->emitDataGenData(i, target);
-    };
-
-    GetEmitter()->emitDataGenEnd();
-
+    unsigned jmpTabBase = genEmitJumpTable(treeNode, true);
     // Access to inline data is 'abstracted' by a special type of static member
     // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
     // to constant data, not a real static field.
@@ -3372,7 +3337,7 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
                                callTarget,                                                    /* ireg */
                                REG_NA, 0, 0,                                                  /* xreg, xmul, disp */
                                false                                                          /* isJump */
-                               );
+    );
 
     regMaskTP killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
     regSet.verifyRegistersUsed(killMask);
@@ -4393,7 +4358,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
                                callTarget,                           /* ireg */
                                REG_NA, 0, 0,                         /* xreg, xmul, disp */
                                false                                 /* isJump */
-                               );
+    );
 
     regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
     regSet.verifyRegistersUsed(killMask);
@@ -5088,7 +5053,8 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             break;
 
         case GT_PINVOKE_PROLOG:
-            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) &
+                          ~fullIntArgRegMask(compiler->info.compCallConv)) == 0);
 
 // the runtime side requires the codegen here to be consistent
 #ifdef PSEUDORANDOM_NOP_INSERTION
@@ -5101,7 +5067,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             emit->emitIns_R_L(INS_ld, EA_PTRSIZE, genPendingCallLabel, targetReg);
             break;
 
-        case GT_STORE_DYN_BLK:
         case GT_STORE_BLK:
             genCodeForStoreBlk(treeNode->AsBlk());
             break;
@@ -6166,37 +6131,6 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)
     genProduceReg(tree);
 }
 
-//----------------------------------------------------------------------------------
-// genCodeForCpBlkHelper - Generate code for a CpBlk node by the means of the VM memcpy helper call
-//
-// Arguments:
-//    cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK]
-//
-// Preconditions:
-//   The register assignments have been set appropriately.
-//   This is validated by genConsumeBlockOp().
-//
-void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode)
-{
-    // Destination address goes in arg0, source address goes in arg1, and size goes in arg2.
-    // genConsumeBlockOp takes care of this for us.
-    genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
-    if (cpBlkNode->IsVolatile())
-    {
-        // issue a full memory barrier before a volatile CpBlk operation
-        instGen_MemoryBarrier();
-    }
-
-    genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
-
-    if (cpBlkNode->IsVolatile())
-    {
-        // issue a INS_BARRIER_RMB after a volatile CpBlk operation
-        instGen_MemoryBarrier(BARRIER_FULL);
-    }
-}
-
 //----------------------------------------------------------------------------------
 // genCodeForCpBlkUnroll: Generates CpBlk code by performing a loop unroll
 //
@@ -6207,7 +6141,7 @@ void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode)
 //    None
 //
 // Assumption:
-//  The size argument of the CpBlk node is a constant and <= CPBLK_UNROLL_LIMIT bytes.
+//  The size argument of the CpBlk node is a constant and <= getUnrollThreshold() bytes.
 //
 void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
 {
@@ -6434,31 +6368,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode)
     }
 }
 
-//------------------------------------------------------------------------
-// genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call
-//
-// Arguments:
-//    initBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK]
-//
-// Preconditions:
-//   The register assignments have been set appropriately.
-//   This is validated by genConsumeBlockOp().
-//
-void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode)
-{
-    // Size goes in arg2, source address goes in arg1, and size goes in arg2.
-    // genConsumeBlockOp takes care of this for us.
-    genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
-    if (initBlkNode->IsVolatile())
-    {
-        // issue a full memory barrier before a volatile initBlock Operation
-        instGen_MemoryBarrier();
-    }
-
-    genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
-}
-
 //------------------------------------------------------------------------
 // genCall: Produce code for a GT_CALL node
 //
@@ -6603,7 +6512,7 @@ void CodeGen::genCall(GenTreeCall* call)
             for (unsigned i = 0; i < regCount; ++i)
             {
                 var_types regType      = pRetTypeDesc->GetReturnRegType(i);
-                returnReg              = pRetTypeDesc->GetABIReturnReg(i);
+                returnReg              = pRetTypeDesc->GetABIReturnReg(i, call->GetUnmanagedCallConv());
                 regNumber allocatedReg = call->GetRegNumByIdx(i);
                 inst_Mov(regType, allocatedReg, returnReg, /* canSkip */ true);
             }
@@ -6814,19 +6723,19 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
             else
 #endif // FEATURE_READYTORUN
                 if (call->gtCallType == CT_HELPER)
-            {
-                CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
-                noway_assert(helperNum != CORINFO_HELP_UNDEF);
+                {
+                    CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
+                    noway_assert(helperNum != CORINFO_HELP_UNDEF);
 
-                void* pAddr = nullptr;
-                addr        = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
-                assert(pAddr == nullptr);
-            }
-            else
-            {
-                // Direct call to a non-virtual user function.
-                addr = call->gtDirectCallAddress;
-            }
+                    void* pAddr = nullptr;
+                    addr        = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+                    assert(pAddr == nullptr);
+                }
+                else
+                {
+                    // Direct call to a non-virtual user function.
+                    addr = call->gtDirectCallAddress;
+                }
 
             assert(addr != nullptr);
 
@@ -7253,8 +7162,8 @@ void CodeGen::genFloatToFloatCast(GenTree* treeNode)
 //------------------------------------------------------------------------
 // genCreateAndStoreGCInfo: Create and record GC Info for the function.
 //
-void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
-                                      unsigned prologSize,
+void CodeGen::genCreateAndStoreGCInfo(unsigned            codeSize,
+                                      unsigned            prologSize,
                                       unsigned epilogSize DEBUGARG(void* codePtr))
 {
     IAllocator*    allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC());
@@ -7301,14 +7210,14 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
 }
 
 //------------------------------------------------------------------------
-// genCodeForStoreBlk: Produce code for a GT_STORE_DYN_BLK/GT_STORE_BLK node.
+// genCodeForStoreBlk: Produce code for a GT_STORE_BLK node.
 //
 // Arguments:
 //    tree - the node
 //
 void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
 {
-    assert(blkOp->OperIs(GT_STORE_DYN_BLK, GT_STORE_BLK));
+    assert(blkOp->OperIs(GT_STORE_BLK));
 
     if (blkOp->gtBlkOpGcUnsafe)
     {
@@ -7328,17 +7237,6 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
             genCodeForInitBlkLoop(blkOp);
             break;
 
-        case GenTreeBlk::BlkOpKindHelper:
-            if (isCopyBlk)
-            {
-                genCodeForCpBlkHelper(blkOp);
-            }
-            else
-            {
-                genCodeForInitBlkHelper(blkOp);
-            }
-            break;
-
         case GenTreeBlk::BlkOpKindUnroll:
             if (isCopyBlk)
             {
@@ -7784,7 +7682,7 @@ void CodeGen::genJumpToThrowHlpBlk_la(
                            callTarget,                                                    /* ireg */
                            REG_NA, 0, 0,                                                  /* xreg, xmul, disp */
                            false                                                          /* isJump */
-                           );
+        );
 
         regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)(compiler->acdHelper(codeKind)));
         regSet.verifyRegistersUsed(killMask);
@@ -7887,8 +7785,13 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
 {
     assert(compiler->compGeneratingProlog);
 
+    // The 'initReg' could have been calculated as one of the callee-saved registers (let's say T0, T1 and T2 are in
+    // use, so the next possible register is S1, which should be callee-save register). This is fine, as long as we
+    // save callee-saved registers before using 'initReg' for the first time. Instead, we can use REG_SCRATCH
+    // beforehand. We don't care if REG_SCRATCH will be overwritten, so we'll skip 'RegZeroed check'.
+    //
     // Unlike on x86/x64, we can also push float registers to stack
-    regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+    regMaskTP rsPushRegs = regSet.rsGetModifiedCalleeSavedRegsMask();
 
 #if ETW_EBP_FRAMED
     if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
@@ -7999,11 +7902,11 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
             calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
             offset            = calleeSaveSPDelta - offset;
 
-            genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true);
+            genStackPointerAdjustment(-calleeSaveSPDelta, REG_SCRATCH, nullptr, /* reportUnwindData */ true);
         }
         else
         {
-            genStackPointerAdjustment(-totalFrameSize, initReg, pInitRegZeroed, /* reportUnwindData */ true);
+            genStackPointerAdjustment(-totalFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true);
         }
     }
 
@@ -8012,6 +7915,8 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
     genSaveCalleeSavedRegistersHelp(rsPushRegs, offset, 0);
     offset += (int)(genCountBits(rsPushRegs) << 3); // each reg has 8 bytes
 
+    // From now on, we can safely use initReg.
+
     emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, offset);
     compiler->unwindSaveReg(REG_RA, offset);
 
@@ -8049,7 +7954,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
 {
     assert(compiler->compGeneratingEpilog);
 
-    regMaskTP regsToRestoreMask = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+    regMaskTP regsToRestoreMask = regSet.rsGetModifiedCalleeSavedRegsMask();
 
     // On RV64 we always use the FP (frame-pointer)
     assert(isFramePointerUsed());
@@ -8081,7 +7986,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
                 unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
                 dspBool(compiler->compLocallocUsed));
 
-        if ((compiler->lvaOutgoingArgSpaceSize + (compiler->compCalleeRegsPushed << 3)) >= 2040)
+        if ((compiler->lvaOutgoingArgSpaceSize + (compiler->compCalleeRegsPushed << 3)) > 2047)
         {
             calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize & 0xfffffff0;
 
@@ -8093,8 +7998,8 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
             {
                 genStackPointerAdjustment(calleeSaveSPOffset, REG_RA, nullptr, /* reportUnwindData */ true);
             }
+            remainingSPSize    = totalFrameSize - calleeSaveSPOffset;
             calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize - calleeSaveSPOffset;
-            remainingSPSize    = remainingSPSize - calleeSaveSPOffset;
         }
         else
         {
@@ -8249,7 +8154,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
                     {
                         if (genIsValidIntReg(varDsc->GetArgReg()))
                         {
-                            assert(isValidIntArgReg(varDsc->GetArgReg()));
+                            assert(isValidIntArgReg(varDsc->GetArgReg(), compiler->info.compCallConv));
                             regArg[varDsc->GetArgReg() - REG_ARG_FIRST]     = varDsc->GetArgReg();
                             regArgInit[varDsc->GetArgReg() - REG_ARG_FIRST] = varDsc->GetArgInitReg();
                             regArgAttr[varDsc->GetArgReg() - REG_ARG_FIRST] =
@@ -8499,10 +8404,11 @@ void CodeGen::genFnPrologCalleeRegArgs()
     {
         for (int i = MAX_REG_ARG + MAX_FLOAT_REG_ARG - 1; i >= 0; i--)
         {
-            if (regArg[i] != REG_NA && !isValidIntArgReg(regArgInit[i]) && !isValidFloatArgReg(regArgInit[i]))
+            if (regArg[i] != REG_NA && !isValidIntArgReg(regArgInit[i], compiler->info.compCallConv) &&
+                !isValidFloatArgReg(regArgInit[i]))
             {
                 assert(regArg[i] != regArgInit[i]);
-                assert(isValidIntArgReg(regArg[i]) || isValidFloatArgReg(regArg[i]));
+                assert(isValidIntArgReg(regArg[i], compiler->info.compCallConv) || isValidFloatArgReg(regArg[i]));
 
                 GetEmitter()->emitIns_Mov(regArgAttr[i], regArgInit[i], regArg[i], false);
 
@@ -8542,9 +8448,10 @@ void CodeGen::genFnPrologCalleeRegArgs()
                         assert(cur != indexList[count2] && "Attempt to move several values on same register.");
                     }
                     assert(cur < MAX_REG_ARG + MAX_FLOAT_REG_ARG);
-                    assert(isValidIntArgReg(regArg[cur]) || isValidFloatArgReg(regArg[cur]));
+                    assert(isValidIntArgReg(regArg[cur], compiler->info.compCallConv) ||
+                           isValidFloatArgReg(regArg[cur]));
 
-                    if (isValidIntArgReg(regArgInit[cur]))
+                    if (isValidIntArgReg(regArgInit[cur], compiler->info.compCallConv))
                     {
                         cur = regArgInit[cur] - REG_ARG_FIRST;
                     }
@@ -8634,7 +8541,11 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
 
     genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
 
-    if ((genRegMask(initReg) & RBM_PROFILER_ENTER_TRASH))
+    // If initReg is trashed, either because it was an arg to the enter
+    // callback, or because the enter callback itself trashes it, then it needs
+    // to be zero'ed again before using.
+    if (((RBM_PROFILER_ENTER_TRASH | RBM_PROFILER_ENTER_ARG_FUNC_ID | RBM_PROFILER_ENTER_ARG_CALLER_SP) &
+         genRegMask(initReg)) != RBM_NONE)
     {
         *pInitRegZeroed = false;
     }
diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 03a255d98078..3e5f1a4b38a6 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -121,7 +121,8 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
 
             for (unsigned i = 0; i < regCount; ++i)
             {
-                gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
+                gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv),
+                                       retTypeDesc.GetReturnRegType(i));
             }
         }
     }
@@ -155,9 +156,9 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
         regGSCheck     = REG_EAX;
         regMaskGSCheck = RBM_EAX;
 #else  // !TARGET_X86
-        // Jmp calls: specify method handle using which JIT queries VM for its entry point
-        // address and hence it can neither be a VSD call nor PInvoke calli with cookie
-        // parameter.  Therefore, in case of jmp calls it is safe to use R11.
+       // Jmp calls: specify method handle using which JIT queries VM for its entry point
+       // address and hence it can neither be a VSD call nor PInvoke calli with cookie
+       // parameter.  Therefore, in case of jmp calls it is safe to use R11.
         regGSCheck = REG_R11;
 #endif // !TARGET_X86
     }
@@ -209,130 +210,134 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
 
     BasicBlock* const nextBlock = block->Next();
 
-#if defined(FEATURE_EH_FUNCLETS)
-    // Generate a call to the finally, like this:
-    //      mov         rcx,qword ptr [rbp + 20H]       // Load rcx with PSPSym
-    //      call        finally-funclet
-    //      jmp         finally-return                  // Only for non-retless finally calls
-    // The jmp can be a NOP if we're going to the next block.
-    // If we're generating code for the main function (not a funclet), and there is no localloc,
-    // then RSP at this point is the same value as that stored in the PSPSym. So just copy RSP
-    // instead of loading the PSPSym in this case, or if PSPSym is not used (NativeAOT ABI).
-
-    if ((compiler->lvaPSPSym == BAD_VAR_NUM) ||
-        (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT)))
+    if (compiler->UsesFunclets())
     {
+        // Generate a call to the finally, like this:
+        //      mov         rcx,qword ptr [rbp + 20H]       // Load rcx with PSPSym
+        //      call        finally-funclet
+        //      jmp         finally-return                  // Only for non-retless finally calls
+        // The jmp can be a NOP if we're going to the next block.
+        // If we're generating code for the main function (not a funclet), and there is no localloc,
+        // then RSP at this point is the same value as that stored in the PSPSym. So just copy RSP
+        // instead of loading the PSPSym in this case, or if PSPSym is not used (NativeAOT ABI).
+
+        if ((compiler->lvaPSPSym == BAD_VAR_NUM) ||
+            (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT)))
+        {
 #ifndef UNIX_X86_ABI
-        inst_Mov(TYP_I_IMPL, REG_ARG_0, REG_SPBASE, /* canSkip */ false);
+            inst_Mov(TYP_I_IMPL, REG_ARG_0, REG_SPBASE, /* canSkip */ false);
 #endif // !UNIX_X86_ABI
-    }
-    else
-    {
-        GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
-    }
-    GetEmitter()->emitIns_J(INS_call, block->GetTarget());
-
-    if (block->HasFlag(BBF_RETLESS_CALL))
-    {
-        // We have a retless call, and the last instruction generated was a call.
-        // If the next block is in a different EH region (or is the end of the code
-        // block), then we need to generate a breakpoint here (since it will never
-        // get executed) to get proper unwind behavior.
+        }
+        else
+        {
+            GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
+        }
+        GetEmitter()->emitIns_J(INS_call, block->GetTarget());
 
-        if ((nextBlock == nullptr) || !BasicBlock::sameEHRegion(block, nextBlock))
+        if (block->HasFlag(BBF_RETLESS_CALL))
         {
-            instGen(INS_BREAKPOINT); // This should never get executed
+            // We have a retless call, and the last instruction generated was a call.
+            // If the next block is in a different EH region (or is the end of the code
+            // block), then we need to generate a breakpoint here (since it will never
+            // get executed) to get proper unwind behavior.
+
+            if ((nextBlock == nullptr) || !BasicBlock::sameEHRegion(block, nextBlock))
+            {
+                instGen(INS_BREAKPOINT); // This should never get executed
+            }
         }
-    }
-    else
-    {
+        else
+        {
 // TODO-Linux-x86: Do we need to handle the GC information for this NOP or JMP specially, as is done for other
 // architectures?
 #ifndef JIT32_GCENCODER
-        // Because of the way the flowgraph is connected, the liveness info for this one instruction
-        // after the call is not (can not be) correct in cases where a variable has a last use in the
-        // handler.  So turn off GC reporting for this single instruction.
-        GetEmitter()->emitDisableGC();
+            // Because of the way the flowgraph is connected, the liveness info for this one instruction
+            // after the call is not (can not be) correct in cases where a variable has a last use in the
+            // handler.  So turn off GC reporting for this single instruction.
+            GetEmitter()->emitDisableGC();
 #endif // JIT32_GCENCODER
 
-        BasicBlock* const finallyContinuation = nextBlock->GetFinallyContinuation();
+            BasicBlock* const finallyContinuation = nextBlock->GetFinallyContinuation();
 
-        // Now go to where the finally funclet needs to return to.
-        if (nextBlock->NextIs(finallyContinuation) && !compiler->fgInDifferentRegions(nextBlock, finallyContinuation))
-        {
-            // Fall-through.
-            // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
-            // to the next instruction? This would depend on stack walking from within the finally
-            // handler working without this instruction being in this special EH region.
-            instGen(INS_nop);
-        }
-        else
-        {
-            inst_JMP(EJ_jmp, finallyContinuation);
-        }
+            // Now go to where the finally funclet needs to return to.
+            if (nextBlock->NextIs(finallyContinuation) &&
+                !compiler->fgInDifferentRegions(nextBlock, finallyContinuation))
+            {
+                // Fall-through.
+                // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
+                // to the next instruction? This would depend on stack walking from within the finally
+                // handler working without this instruction being in this special EH region.
+                instGen(INS_nop);
+            }
+            else
+            {
+                inst_JMP(EJ_jmp, finallyContinuation);
+            }
 
 #ifndef JIT32_GCENCODER
-        GetEmitter()->emitEnableGC();
+            GetEmitter()->emitEnableGC();
 #endif // JIT32_GCENCODER
+        }
     }
-
-#else // !FEATURE_EH_FUNCLETS
-
-    // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
-    // corresponding to the finally's nesting level. When invoked in response to an exception, the
-    // EE does this.
-    //
-    // We have a BBJ_CALLFINALLY possibly paired with a following BBJ_CALLFINALLYRET.
-    //
-    // We will emit :
-    //      mov [ebp - (n + 1)], 0
-    //      mov [ebp -  n     ], 0xFC
-    //      push &step
-    //      jmp  finallyBlock
-    // ...
-    // step:
-    //      mov [ebp -  n     ], 0
-    //      jmp leaveTarget
-    // ...
-    // leaveTarget:
-
-    noway_assert(isFramePointerUsed());
-
-    // Get the nesting level which contains the finally
-    unsigned finallyNesting = 0;
-    compiler->fgGetNestingLevel(block, &finallyNesting);
-
-    // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
-    unsigned filterEndOffsetSlotOffs;
-    filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
-
-    unsigned curNestingSlotOffs;
-    curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
-
-    // Zero out the slot for the next nesting level
-    GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar,
-                              curNestingSlotOffs - TARGET_POINTER_SIZE, 0);
-    GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar, curNestingSlotOffs, LCL_FINALLY_MARK);
-
-    // Now push the address where the finally funclet should return to directly.
-    if (!block->HasFlag(BBF_RETLESS_CALL))
-    {
-        assert(block->isBBCallFinallyPair());
-        GetEmitter()->emitIns_J(INS_push_hide, nextBlock->GetFinallyContinuation());
-    }
+#if defined(FEATURE_EH_WINDOWS_X86)
     else
     {
-        // EE expects a DWORD, so we provide 0
-        inst_IV(INS_push_hide, 0);
-    }
-
-    // Jump to the finally BB
-    inst_JMP(EJ_jmp, block->GetTarget());
+        // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
+        // corresponding to the finally's nesting level. When invoked in response to an exception, the
+        // EE does this.
+        //
+        // We have a BBJ_CALLFINALLY possibly paired with a following BBJ_CALLFINALLYRET.
+        //
+        // We will emit :
+        //      mov [ebp - (n + 1)], 0
+        //      mov [ebp -  n     ], 0xFC
+        //      push &step
+        //      jmp  finallyBlock
+        // ...
+        // step:
+        //      mov [ebp -  n     ], 0
+        //      jmp leaveTarget
+        // ...
+        // leaveTarget:
+
+        noway_assert(isFramePointerUsed());
+
+        // Get the nesting level which contains the finally
+        unsigned finallyNesting = 0;
+        compiler->fgGetNestingLevel(block, &finallyNesting);
+
+        // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+        unsigned filterEndOffsetSlotOffs;
+        filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
+
+        unsigned curNestingSlotOffs;
+        curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
+
+        // Zero out the slot for the next nesting level
+        GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar,
+                                  curNestingSlotOffs - TARGET_POINTER_SIZE, 0);
+        GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar, curNestingSlotOffs,
+                                  LCL_FINALLY_MARK);
+
+        // Now push the address where the finally funclet should return to directly.
+        if (!block->HasFlag(BBF_RETLESS_CALL))
+        {
+            assert(block->isBBCallFinallyPair());
+            GetEmitter()->emitIns_J(INS_push_hide, nextBlock->GetFinallyContinuation());
+        }
+        else
+        {
+            // EE expects a DWORD, so we provide 0
+            inst_IV(INS_push_hide, 0);
+        }
 
-#endif // !FEATURE_EH_FUNCLETS
+        // Jump to the finally BB
+        inst_JMP(EJ_jmp, block->GetTarget());
+    }
+#endif // FEATURE_EH_WINDOWS_X86
 
     // The BBJ_CALLFINALLYRET is used because the BBJ_CALLFINALLY can't point to the
-    // jump target using bbTarget - that is already used to point
+    // jump target using bbTargetEdge - that is already used to point
     // to the finally block. So just skip past the BBJ_CALLFINALLYRET unless the
     // block is RETLESS.
     if (!block->HasFlag(BBF_RETLESS_CALL))
@@ -343,7 +348,6 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
     return block;
 }
 
-#if defined(FEATURE_EH_FUNCLETS)
 void CodeGen::genEHCatchRet(BasicBlock* block)
 {
     // Set RAX to the address the VM should return to after the catch.
@@ -353,10 +357,11 @@ void CodeGen::genEHCatchRet(BasicBlock* block)
     GetEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->GetTarget(), REG_INTRET);
 }
 
-#else // !FEATURE_EH_FUNCLETS
+#if defined(FEATURE_EH_WINDOWS_X86)
 
 void CodeGen::genEHFinallyOrFilterRet(BasicBlock* block)
 {
+    assert(!compiler->UsesFunclets());
     // The last statement of the block must be a GT_RETFILT, which has already been generated.
     assert(block->lastNode() != nullptr);
     assert(block->lastNode()->OperGet() == GT_RETFILT);
@@ -382,13 +387,13 @@ void CodeGen::genEHFinallyOrFilterRet(BasicBlock* block)
     }
 }
 
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
 //  Move an immediate value into an integer register
 
-void CodeGen::instGen_Set_Reg_To_Imm(emitAttr  size,
-                                     regNumber reg,
-                                     ssize_t   imm,
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr       size,
+                                     regNumber      reg,
+                                     ssize_t        imm,
                                      insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
 {
     // reg cannot be a FP register
@@ -1318,8 +1323,8 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc)
     // This is a case of operand is in a single reg and needs to be
     // returned in multiple ABI return registers.
     regNumber opReg = src->GetRegNum();
-    regNumber reg0  = retTypeDesc->GetABIReturnReg(0);
-    regNumber reg1  = retTypeDesc->GetABIReturnReg(1);
+    regNumber reg0  = retTypeDesc->GetABIReturnReg(0, compiler->info.compCallConv);
+    regNumber reg1  = retTypeDesc->GetABIReturnReg(1, compiler->info.compCallConv);
 
     assert((reg0 != REG_NA) && (reg1 != REG_NA) && (opReg != REG_NA));
 
@@ -1718,7 +1723,6 @@ void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock, bool isRemovableJ
     //
     // Thus only on x86 do we need to assert that the stack level at the target block matches the current stack level.
     //
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef UNIX_X86_ABI
     // bbTgtStkDepth is a (pure) argument count (stack alignment padding should be excluded).
@@ -2107,6 +2111,12 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
         case GT_NOP:
             break;
 
+#ifdef SWIFT_SUPPORT
+        case GT_SWIFT_ERROR:
+            genCodeForSwiftErrorReg(treeNode);
+            break;
+#endif // SWIFT_SUPPORT
+
         case GT_KEEPALIVE:
             genConsumeRegs(treeNode->AsOp()->gtOp1);
             break;
@@ -2138,7 +2148,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             genConsumeReg(treeNode);
             break;
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
         case GT_END_LFIN:
 
             // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates:
@@ -2151,8 +2161,8 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
 
             // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
             unsigned filterEndOffsetSlotOffs;
-            PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
-                          TARGET_POINTER_SIZE); // below doesn't underflow.
+            PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) > TARGET_POINTER_SIZE); // below doesn't
+                                                                                                      // underflow.
             filterEndOffsetSlotOffs =
                 (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
 
@@ -2161,10 +2171,11 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar, (unsigned)curNestingSlotOffs,
                                       0);
             break;
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
         case GT_PINVOKE_PROLOG:
-            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) &
+                          ~fullIntArgRegMask(compiler->info.compCallConv)) == 0);
 
 #ifdef PSEUDORANDOM_NOP_INSERTION
             // the runtime side requires the codegen here to be consistent
@@ -2177,7 +2188,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->GetRegNum());
             break;
 
-        case GT_STORE_DYN_BLK:
         case GT_STORE_BLK:
             genCodeForStoreBlk(treeNode->AsBlk());
             break;
@@ -2242,7 +2252,6 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode)
     // This case is always a call (AsCall() will assert if it is not).
     GenTreeCall*          call        = actualOp1->AsCall();
     const ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
-    assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT);
 
     assert(regCount == 2);
     regNumber targetReg = lclNode->GetRegNum();
@@ -3051,7 +3060,7 @@ void CodeGen::genLclHeap(GenTree* tree)
 
 void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
 {
-    assert(storeBlkNode->OperIs(GT_STORE_DYN_BLK, GT_STORE_BLK));
+    assert(storeBlkNode->OperIs(GT_STORE_BLK));
 
     bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();
 
@@ -3070,19 +3079,6 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
             genCodeForInitBlkLoop(storeBlkNode);
             break;
 
-#ifdef TARGET_AMD64
-        case GenTreeBlk::BlkOpKindHelper:
-            assert(!storeBlkNode->gtBlkOpGcUnsafe);
-            if (isCopyBlk)
-            {
-                genCodeForCpBlkHelper(storeBlkNode);
-            }
-            else
-            {
-                genCodeForInitBlkHelper(storeBlkNode);
-            }
-            break;
-#endif // TARGET_AMD64
         case GenTreeBlk::BlkOpKindRepInstr:
 #ifndef JIT32_GCENCODER
             assert(!storeBlkNode->gtBlkOpGcUnsafe);
@@ -3402,27 +3398,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode)
     }
 }
 
-#ifdef TARGET_AMD64
-//------------------------------------------------------------------------
-// genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call
-//
-// Arguments:
-//    initBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK]
-//
-// Preconditions:
-//   The register assignments have been set appropriately.
-//   This is validated by genConsumeBlockOp().
-//
-void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode)
-{
-    // Destination address goes in arg0, source address goes in arg1, and size goes in arg2.
-    // genConsumeBlockOp takes care of this for us.
-    genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
-    genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
-}
-#endif // TARGET_AMD64
-
 #ifdef FEATURE_PUT_STRUCT_ARG_STK
 // Generate code for a load from some address + offset
 //   base: tree node which can be either a local or an indir
@@ -4310,27 +4285,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
     gcInfo.gcMarkRegSetNpt(RBM_RDI);
 }
 
-#ifdef TARGET_AMD64
-//----------------------------------------------------------------------------------
-// genCodeForCpBlkHelper - Generate code for a CpBlk node by the means of the VM memcpy helper call
-//
-// Arguments:
-//    cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK]
-//
-// Preconditions:
-//   The register assignments have been set appropriately.
-//   This is validated by genConsumeBlockOp().
-//
-void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode)
-{
-    // Destination address goes in arg0, source address goes in arg1, and size goes in arg2.
-    // genConsumeBlockOp takes care of this for us.
-    genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
-    genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
-}
-#endif // TARGET_AMD64
-
 // generate code do a switch statement based on a table of ip-relative offsets
 void CodeGen::genTableBasedSwitch(GenTree* treeNode)
 {
@@ -4353,32 +4307,7 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode)
 // emits the table and an instruction to get the address of the first element
 void CodeGen::genJumpTable(GenTree* treeNode)
 {
-    noway_assert(compiler->compCurBB->KindIs(BBJ_SWITCH));
-    assert(treeNode->OperGet() == GT_JMPTABLE);
-
-    unsigned     jumpCount = compiler->compCurBB->GetSwitchTargets()->bbsCount;
-    BasicBlock** jumpTable = compiler->compCurBB->GetSwitchTargets()->bbsDstTab;
-    unsigned     jmpTabOffs;
-    unsigned     jmpTabBase;
-
-    jmpTabBase = GetEmitter()->emitBBTableDataGenBeg(jumpCount, true);
-
-    jmpTabOffs = 0;
-
-    JITDUMP("\n      J_M%03u_DS%02u LABEL   DWORD\n", compiler->compMethodID, jmpTabBase);
-
-    for (unsigned i = 0; i < jumpCount; i++)
-    {
-        BasicBlock* target = *jumpTable++;
-        noway_assert(target->HasFlag(BBF_HAS_LABEL));
-
-        JITDUMP("            DD      L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum);
-
-        GetEmitter()->emitDataGenData(i, target);
-    };
-
-    GetEmitter()->emitDataGenEnd();
-
+    unsigned jmpTabBase = genEmitJumpTable(treeNode, true);
     // Access to inline data is 'abstracted' by a special type of static member
     // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
     // to constant data, not a real static field.
@@ -6074,16 +6003,18 @@ void CodeGen::genCall(GenTreeCall* call)
     }
 #endif // defined(DEBUG) && defined(TARGET_X86)
 
-    // When it's a PInvoke call and the call type is USER function, we issue VZEROUPPER here
-    // if the function contains 256bit AVX instructions, this is to avoid AVX-256 to Legacy SSE
-    // transition penalty, assuming the user function contains legacy SSE instruction.
-    // To limit code size increase impact: we only issue VZEROUPPER before PInvoke call, not issue
-    // VZEROUPPER after PInvoke call because transition penalty from legacy SSE to AVX only happens
-    // when there's preceding 256-bit AVX to legacy SSE transition penalty.
-    // This applies to 512bit AVX512 instructions as well.
-    if (call->IsPInvoke() && (call->gtCallType == CT_USER_FUNC) && (GetEmitter()->Contains256bitOrMoreAVX()))
+    if (GetEmitter()->Contains256bitOrMoreAVX() && call->NeedsVzeroupper(compiler))
     {
-        assert(compiler->canUseVexEncoding());
+        // The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states:
+        //   Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean
+        //   between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a
+        //   VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX
+        //   register) and before any call to an unknown function.
+
+        // This method contains a call that needs vzeroupper but also uses 256-bit or higher
+        // AVX itself. This means we couldn't optimize to only emitting a single vzeroupper in
+        // the method prologue and instead need to insert one before each call that needs it.
+
         instGen(INS_vzeroupper);
     }
 
@@ -6136,7 +6067,7 @@ void CodeGen::genCall(GenTreeCall* call)
                 for (unsigned i = 0; i < regCount; ++i)
                 {
                     var_types regType      = retTypeDesc->GetReturnRegType(i);
-                    returnReg              = retTypeDesc->GetABIReturnReg(i);
+                    returnReg              = retTypeDesc->GetABIReturnReg(i, call->GetUnmanagedCallConv());
                     regNumber allocatedReg = call->GetRegNumByIdx(i);
                     inst_Mov(regType, allocatedReg, returnReg, /* canSkip */ true);
                 }
@@ -6147,7 +6078,7 @@ void CodeGen::genCall(GenTreeCall* call)
                 // the native compiler doesn't guarantee it.
                 if (call->IsUnmanaged() && (returnType == TYP_SIMD12))
                 {
-                    returnReg = retTypeDesc->GetABIReturnReg(1);
+                    returnReg = retTypeDesc->GetABIReturnReg(1, call->GetUnmanagedCallConv());
                     genSimd12UpperClear(returnReg);
                 }
 #endif // FEATURE_SIMD
@@ -6165,13 +6096,13 @@ void CodeGen::genCall(GenTreeCall* call)
                 else
 #endif // TARGET_X86
                     if (varTypeIsFloating(returnType))
-                {
-                    returnReg = REG_FLOATRET;
-                }
-                else
-                {
-                    returnReg = REG_INTRET;
-                }
+                    {
+                        returnReg = REG_FLOATRET;
+                    }
+                    else
+                    {
+                        returnReg = REG_INTRET;
+                    }
 
                 inst_Mov(returnType, call->GetRegNum(), returnReg, /* canSkip */ true);
             }
@@ -6193,37 +6124,42 @@ void CodeGen::genCall(GenTreeCall* call)
                          compiler->lvaCallSpCheck, call->CallerPop() ? 0 : stackArgBytes, REG_ARG_0);
 #endif // defined(DEBUG) && defined(TARGET_X86)
 
-#if !defined(FEATURE_EH_FUNCLETS)
-    //-------------------------------------------------------------------------
-    // Create a label for tracking of region protected by the monitor in synchronized methods.
-    // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
-    // so the GC state vars have been updated before creating the label.
-
-    if ((call->gtCallType == CT_HELPER) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
+#if defined(FEATURE_EH_WINDOWS_X86)
+    if (!compiler->UsesFunclets())
     {
-        CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(call->gtCallMethHnd);
-        noway_assert(helperNum != CORINFO_HELP_UNDEF);
-        switch (helperNum)
+        //-------------------------------------------------------------------------
+        // Create a label for tracking of region protected by the monitor in synchronized methods.
+        // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
+        // so the GC state vars have been updated before creating the label.
+
+        if ((call->gtCallType == CT_HELPER) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
         {
-            case CORINFO_HELP_MON_ENTER:
-            case CORINFO_HELP_MON_ENTER_STATIC:
-                noway_assert(compiler->syncStartEmitCookie == nullptr);
-                compiler->syncStartEmitCookie =
-                    GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
-                noway_assert(compiler->syncStartEmitCookie != nullptr);
-                break;
-            case CORINFO_HELP_MON_EXIT:
-            case CORINFO_HELP_MON_EXIT_STATIC:
-                noway_assert(compiler->syncEndEmitCookie == nullptr);
-                compiler->syncEndEmitCookie =
-                    GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
-                noway_assert(compiler->syncEndEmitCookie != nullptr);
-                break;
-            default:
-                break;
+            CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(call->gtCallMethHnd);
+            noway_assert(helperNum != CORINFO_HELP_UNDEF);
+            switch (helperNum)
+            {
+                case CORINFO_HELP_MON_ENTER:
+                case CORINFO_HELP_MON_ENTER_STATIC:
+                    noway_assert(compiler->syncStartEmitCookie == nullptr);
+                    compiler->syncStartEmitCookie =
+                        GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                   gcInfo.gcRegByrefSetCur);
+                    noway_assert(compiler->syncStartEmitCookie != nullptr);
+                    break;
+                case CORINFO_HELP_MON_EXIT:
+                case CORINFO_HELP_MON_EXIT_STATIC:
+                    noway_assert(compiler->syncEndEmitCookie == nullptr);
+                    compiler->syncEndEmitCookie =
+                        GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                   gcInfo.gcRegByrefSetCur);
+                    noway_assert(compiler->syncEndEmitCookie != nullptr);
+                    break;
+                default:
+                    break;
+            }
         }
     }
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
     unsigned stackAdjustBias = 0;
 
@@ -6710,7 +6646,6 @@ void CodeGen::genJmpMethod(GenTree* jmp)
 #endif // !defined(UNIX_AMD64_ABI)
         {
             // Register argument
-            CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef TARGET_X86
             noway_assert(isRegParamType(genActualType(varDsc->TypeGet())) ||
                          ((varDsc->TypeGet() == TYP_STRUCT) &&
@@ -6908,19 +6843,6 @@ void CodeGen::genCompareFloat(GenTree* treeNode)
     ins     = (op1Type == TYP_FLOAT) ? INS_ucomiss : INS_ucomisd;
     cmpAttr = emitTypeSize(op1Type);
 
-    var_types targetType = treeNode->TypeGet();
-
-    // Clear target reg in advance via "xor reg,reg" to avoid movzx after SETCC
-    if ((targetReg != REG_NA) && (op1->GetRegNum() != targetReg) && (op2->GetRegNum() != targetReg) &&
-        !varTypeIsByte(targetType))
-    {
-        regMaskTP targetRegMask = genRegMask(targetReg);
-        if (((op1->gtGetContainedRegMask() | op2->gtGetContainedRegMask()) & targetRegMask) == 0)
-        {
-            instGen_Set_Reg_To_Zero(emitTypeSize(TYP_I_IMPL), targetReg);
-            targetType = TYP_UBYTE; // just a tip for inst_SETCC that movzx is not needed
-        }
-    }
     GetEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
 
     // Are we evaluating this into a register?
@@ -6937,7 +6859,7 @@ void CodeGen::genCompareFloat(GenTree* treeNode)
             condition = GenCondition(GenCondition::P);
         }
 
-        inst_SETCC(condition, targetType, targetReg);
+        inst_SETCC(condition, treeNode->TypeGet(), targetReg);
         genProduceReg(tree);
     }
 }
@@ -7077,22 +6999,8 @@ void CodeGen::genCompareInt(GenTree* treeNode)
     // TYP_UINT and TYP_ULONG should not appear here, only small types can be unsigned
     assert(!varTypeIsUnsigned(type) || varTypeIsSmall(type));
 
-    var_types targetType = tree->TypeGet();
-
     if (!canReuseFlags || !genCanAvoidEmittingCompareAgainstZero(tree, type))
     {
-        // Clear target reg in advance via "xor reg,reg" to avoid movzx after SETCC
-        if ((targetReg != REG_NA) && (op1->GetRegNum() != targetReg) && (op2->GetRegNum() != targetReg) &&
-            !varTypeIsByte(targetType))
-        {
-            regMaskTP targetRegMask = genRegMask(targetReg);
-            if (((op1->gtGetContainedRegMask() | op2->gtGetContainedRegMask()) & targetRegMask) == 0)
-            {
-                instGen_Set_Reg_To_Zero(emitTypeSize(TYP_I_IMPL), targetReg);
-                targetType = TYP_UBYTE; // just a tip for inst_SETCC that movzx is not needed
-            }
-        }
-
         emitAttr size    = emitTypeSize(type);
         bool     canSkip = compiler->opts.OptimizationEnabled() && (ins == INS_cmp) && !op1->isUsedFromMemory() &&
                        !op2->isUsedFromMemory() && emit->IsRedundantCmp(size, op1->GetRegNum(), op2->GetRegNum());
@@ -7106,7 +7014,7 @@ void CodeGen::genCompareInt(GenTree* treeNode)
     // Are we evaluating this into a register?
     if (targetReg != REG_NA)
     {
-        inst_SETCC(GenCondition::FromIntegralRelop(tree), targetType, targetReg);
+        inst_SETCC(GenCondition::FromIntegralRelop(tree), tree->TypeGet(), targetReg);
         genProduceReg(tree);
     }
 }
@@ -7703,13 +7611,16 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
 
     // We shouldn't be seeing uint64 here as it should have been converted
-    // into a helper call by either front-end or lowering phase.
-    assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
+    // into a helper call by either front-end or lowering phase, unless we have AVX512F
+    // accelerated conversions.
+    assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
+           compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
 
     // If the dstType is TYP_UINT, we have 32-bits to encode the
     // float number. Any of 33rd or above bits can be the sign bit.
     // To achieve it we pretend as if we are converting it to a long.
-    if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
+    if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))) &&
+        !compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
     {
         dstType = TYP_LONG;
     }
@@ -7717,7 +7628,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     // Note that we need to specify dstType here so that it will determine
     // the size of destination integer register and also the rex.w prefix.
     genConsumeOperands(treeNode->AsOp());
-    instruction ins = ins_FloatConv(TYP_INT, srcType, emitTypeSize(srcType));
+    instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
     GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
     genProduceReg(treeNode);
 }
@@ -8139,8 +8050,8 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode)
         }
 
 #if defined(FEATURE_SIMD)
-        // The handling is a bit more complex so genSimdUpperSave/Restore
-        // handles genConsumeOperands and genProduceReg
+            // The handling is a bit more complex so genSimdUpperSave/Restore
+            // handles genConsumeOperands and genProduceReg
 
         case NI_SIMD_UpperRestore:
         {
@@ -8210,7 +8121,7 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTree* treeNode)
 #ifdef UNIX_AMD64_ABI
         assert(!varDsc->lvIsRegArg && varDsc->GetArgReg() == REG_STK);
 #else  // !UNIX_AMD64_ABI
-        // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0.
+       // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0.
         assert(varDsc->lvIsRegArg && (varDsc->GetArgReg() == REG_ARG_0 || varDsc->GetArgReg() == REG_FLTARG_0));
 #endif // !UNIX_AMD64_ABI
 #endif // !DEBUG
@@ -8683,7 +8594,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
         unsigned argOffset = putArgStk->getArgOffset();
 
 #ifdef DEBUG
-        CallArg* callArg   = putArgStk->gtCall->gtArgs.FindByNode(putArgStk);
+        CallArg* callArg = putArgStk->gtCall->gtArgs.FindByNode(putArgStk);
         assert(callArg != nullptr);
         assert(argOffset == callArg->AbiInfo.ByteOffset);
 #endif
@@ -8936,8 +8847,8 @@ CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigne
 }
 
 #ifdef JIT32_GCENCODER
-void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
-                                            unsigned prologSize,
+void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned            codeSize,
+                                            unsigned            prologSize,
                                             unsigned epilogSize DEBUGARG(void* codePtr))
 {
     BYTE    headerBuf[64];
@@ -8945,13 +8856,12 @@ void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
 
     int s_cached;
 
-#ifdef FEATURE_EH_FUNCLETS
     // We should do this before gcInfoBlockHdrSave since varPtrTableSize must be finalized before it
     if (compiler->ehAnyFunclets())
     {
+        assert(compiler->UsesFunclets());
         gcInfo.gcMarkFilterVarsPinned();
     }
-#endif
 
 #ifdef DEBUG
     size_t headerSize =
@@ -9224,7 +9134,6 @@ void CodeGen::genAmd64EmitterUnitTestsSse2()
     //
     // Loads
     //
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
     genDefineTempLabel(genCreateTempLabel());
 
@@ -9339,8 +9248,8 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
 
     unsigned saveStackLvl2 = genStackLevel;
 
-// Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
-// for x86 stack unwinding
+    // Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
+    // for x86 stack unwinding
 
 #if defined(UNIX_X86_ABI)
     // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
@@ -9593,8 +9502,10 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
         }
     }
 
-    // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
-    if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
+    // If initReg is trashed, either because it was an arg to the enter
+    // callback, or because the enter callback itself trashes it, then it needs
+    // to be zero'ed again before using.
+    if (((RBM_PROFILER_ENTER_TRASH | RBM_ARG_0 | RBM_ARG_1) & genRegMask(initReg)) != 0)
     {
         *pInitRegZeroed = false;
     }
@@ -9630,8 +9541,10 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
     // "mov r11, helper addr; call r11"
     genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
 
-    // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
-    if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
+    // If initReg is trashed, either because it was an arg to the enter
+    // callback, or because the enter callback itself trashes it, then it needs
+    // to be zero'ed again before using.
+    if (((RBM_PROFILER_ENTER_TRASH | RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1) & genRegMask(initReg)) != 0)
     {
         *pInitRegZeroed = false;
     }
@@ -9855,7 +9768,7 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters()
     // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
     // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
     // here.
-    regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_OSR_INT_CALLEE_SAVED;
+    regMaskTP rsPushRegs = regSet.rsGetModifiedOsrIntCalleeSavedRegsMask();
 
 #if ETW_EBP_FRAMED
     if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
@@ -9936,7 +9849,7 @@ void CodeGen::genPushCalleeSavedRegisters()
     // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
     // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
     // here.
-    regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
+    regMaskTP rsPushRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask();
 
 #if ETW_EBP_FRAMED
     if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
@@ -9994,7 +9907,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
     //
     if (doesSupersetOfNormalPops)
     {
-        regMaskTP rsPopRegs = regSet.rsGetModifiedRegsMask() & RBM_OSR_INT_CALLEE_SAVED;
+        regMaskTP rsPopRegs = regSet.rsGetModifiedOsrIntCalleeSavedRegsMask();
         regMaskTP tier0CalleeSaves =
             ((regMaskTP)compiler->info.compPatchpointInfo->CalleeSaveRegisters()) & RBM_OSR_INT_CALLEE_SAVED;
         regMaskTP additionalCalleeSaves = rsPopRegs & ~tier0CalleeSaves;
@@ -10014,7 +9927,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
 
     // Registers saved by a normal prolog
     //
-    regMaskTP      rsPopRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
+    regMaskTP      rsPopRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask();
     const unsigned popCount  = genPopCalleeSavedRegistersFromMask(rsPopRegs);
     noway_assert(compiler->compCalleeRegsPushed == popCount);
 }
@@ -10201,7 +10114,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
 
             regMaskTP const tier0CalleeSaves           = (regMaskTP)patchpointInfo->CalleeSaveRegisters();
             regMaskTP const tier0IntCalleeSaves        = tier0CalleeSaves & RBM_OSR_INT_CALLEE_SAVED;
-            regMaskTP const osrIntCalleeSaves          = regSet.rsGetModifiedRegsMask() & RBM_OSR_INT_CALLEE_SAVED;
+            regMaskTP const osrIntCalleeSaves          = regSet.rsGetModifiedOsrIntCalleeSavedRegsMask();
             regMaskTP const allIntCalleeSaves          = osrIntCalleeSaves | tier0IntCalleeSaves;
             unsigned const  tier0FrameSize             = patchpointInfo->TotalFrameSize() + REGSIZE_BYTES;
             unsigned const  tier0IntCalleeSaveUsedSize = genCountBits(allIntCalleeSaves) * REGSIZE_BYTES;
@@ -10302,7 +10215,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
                     // do an LEA to "pop off" the frame allocation.
                     needLea = true;
 #else  // !TARGET_AMD64
-                    // We will just generate "mov esp, ebp" and be done with it.
+       // We will just generate "mov esp, ebp" and be done with it.
                     needMovEspEbp = true;
 #endif // !TARGET_AMD64
                 }
@@ -10525,8 +10438,6 @@ void CodeGen::genFnEpilog(BasicBlock* block)
     }
 }
 
-#if defined(FEATURE_EH_FUNCLETS)
-
 #if defined(TARGET_AMD64)
 
 /*****************************************************************************
@@ -10842,8 +10753,10 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
 
     // TODO We may need EBP restore sequence here if we introduce PSPSym
 
+#ifdef UNIX_X86_ABI
     // Add a padding for 16-byte alignment
     inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE);
+#endif
 }
 
 /*****************************************************************************
@@ -10862,8 +10775,10 @@ void CodeGen::genFuncletEpilog()
 
     ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
 
+#ifdef UNIX_X86_ABI
     // Revert a padding that was added for 16-byte alignment
     inst_RV_IV(INS_add, REG_SPBASE, 12, EA_PTRSIZE);
+#endif
 
     instGen_Return(0);
 }
@@ -10912,8 +10827,6 @@ void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
 #endif // TARGET*
 }
 
-#endif // FEATURE_EH_FUNCLETS
-
 //-----------------------------------------------------------------------------
 // genZeroInitFrameUsingBlockInit: architecture-specific helper for genZeroInitFrame in the case
 // `genUseBlockInit` is set.
@@ -10984,7 +10897,6 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
     else
     {
         // Grab a non-argument, non-callee saved XMM reg
-        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef UNIX_AMD64_ABI
         // System V x64 first temp reg is xmm8
         regNumber zeroSIMDReg = genRegNumFromMask(RBM_XMM8);
@@ -11043,8 +10955,8 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
             assert(i == alignmentLoBlkSize);
         }
 #else  // !defined(TARGET_AMD64)
-        // While we aren't aligning the start, we still want to
-        // zero anything that is not in a 16 byte chunk at end
+       // While we aren't aligning the start, we still want to
+       // zero anything that is not in a 16 byte chunk at end
         int alignmentBlkSize   = blkSize & -XMM_REGSIZE_BYTES;
         int alignmentHiBlkSize = blkSize - alignmentBlkSize;
         int alignedLclHi       = untrLclLo + alignmentBlkSize;
@@ -11188,12 +11100,27 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
 //             funclet frames: this will be FuncletInfo.fiSpDelta.
 void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
 {
-    genVzeroupperIfNeeded(false);
     regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
 
     // Only callee saved floating point registers should be in regMask
     assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
 
+    if (GetEmitter()->ContainsCallNeedingVzeroupper() && !GetEmitter()->Contains256bitOrMoreAVX())
+    {
+        // The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states:
+        //   Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean
+        //   between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a
+        //   VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX
+        //   register) and before any call to an unknown function.
+
+        // This method contains a call that needs vzeroupper but also doesn't use 256-bit or higher
+        // AVX itself. Thus we can optimize to only emitting a single vzeroupper in the function prologue
+        // This reduces the overall amount of codegen, particularly for more common paths not using any
+        // SIMD or floating-point.
+
+        instGen(INS_vzeroupper);
+    }
+
     // fast path return
     if (regMask == RBM_NONE)
     {
@@ -11208,8 +11135,8 @@ void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
     assert((offset % 16) == 0);
     instruction copyIns = ins_Copy(TYP_FLOAT);
 #else  // !TARGET_AMD64
-    unsigned    offset            = lclFrameSize - XMM_REGSIZE_BYTES;
-    instruction copyIns           = INS_movupd;
+    unsigned    offset  = lclFrameSize - XMM_REGSIZE_BYTES;
+    instruction copyIns = INS_movupd;
 #endif // !TARGET_AMD64
 
     for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
@@ -11241,10 +11168,20 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
     // Only callee saved floating point registers should be in regMask
     assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
 
+    if (GetEmitter()->Contains256bitOrMoreAVX())
+    {
+        // The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states:
+        //   Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean
+        //   between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a
+        //   VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX
+        //   register) and before any call to an unknown function.
+
+        instGen(INS_vzeroupper);
+    }
+
     // fast path return
     if (regMask == RBM_NONE)
     {
-        genVzeroupperIfNeeded();
         return;
     }
 
@@ -11287,37 +11224,6 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
             offset -= XMM_REGSIZE_BYTES;
         }
     }
-    genVzeroupperIfNeeded();
-}
-
-// Generate Vzeroupper instruction as needed to zero out upper 128b-bit of all YMM registers so that the
-// AVX/Legacy SSE transition penalties can be avoided. This function is been used in genPreserveCalleeSavedFltRegs
-// (prolog) and genRestoreCalleeSavedFltRegs (epilog). Issue VZEROUPPER in Prolog if the method contains
-// 128-bit or 256-bit AVX code, to avoid legacy SSE to AVX transition penalty, which could happen when native
-// code contains legacy SSE code calling into JIT AVX code (e.g. reverse pinvoke). Issue VZEROUPPER in Epilog
-// if the method contains 256-bit AVX code, to avoid AVX to legacy SSE transition penalty.
-//
-// Params
-//   check256bitOnly  - true to check if the function contains 256-bit AVX instruction and generate Vzeroupper
-//      instruction, false to check if the function contains AVX instruction (either 128-bit or 256-bit).
-//
-void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)
-{
-    bool emitVzeroUpper = false;
-    if (check256bitOnly)
-    {
-        emitVzeroUpper = GetEmitter()->Contains256bitOrMoreAVX();
-    }
-    else
-    {
-        emitVzeroUpper = GetEmitter()->ContainsAVX();
-    }
-
-    if (emitVzeroUpper)
-    {
-        assert(compiler->canUseVexEncoding());
-        instGen(INS_vzeroupper);
-    }
 }
 
 //-----------------------------------------------------------------------------------
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 814c307b7b49..43f9a1cc58e7 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -657,11 +657,11 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
         // have a struct that is larger than that.
         //
         if (structSize <= MAX_PASS_SINGLEREG_BYTES)
-    {
-        // We set the "primitive" useType based upon the structSize
-        // and also examine the clsHnd to see if it is an HFA of count one
-        useType = getPrimitiveTypeForStruct(structSize, clsHnd, isVarArg);
-    }
+        {
+            // We set the "primitive" useType based upon the structSize
+            // and also examine the clsHnd to see if it is an HFA of count one
+            useType = getPrimitiveTypeForStruct(structSize, clsHnd, isVarArg);
+        }
 #else
     if (isTrivialPointerSizedStruct(clsHnd))
     {
@@ -770,7 +770,6 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
         {
             // We have a (large) struct that can't be replaced with a "primitive" type
             // and can't be passed in multiple registers
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_X86) || defined(TARGET_ARM) || defined(UNIX_AMD64_ABI)
 
@@ -870,6 +869,42 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
     }
     assert(structSize > 0);
 
+#ifdef SWIFT_SUPPORT
+    if (callConv == CorInfoCallConvExtension::Swift)
+    {
+        const CORINFO_SWIFT_LOWERING* lowering = GetSwiftLowering(clsHnd);
+        if (lowering->byReference)
+        {
+            howToReturnStruct = SPK_ByReference;
+            useType           = TYP_UNKNOWN;
+        }
+        else if (lowering->numLoweredElements == 1)
+        {
+            useType = JITtype2varType(lowering->loweredElements[0]);
+            if (genTypeSize(useType) == structSize)
+            {
+                howToReturnStruct = SPK_PrimitiveType;
+            }
+            else
+            {
+                howToReturnStruct = SPK_EnclosingType;
+            }
+        }
+        else
+        {
+            howToReturnStruct = SPK_ByValue;
+            useType           = TYP_STRUCT;
+        }
+
+        if (wbReturnStruct != nullptr)
+        {
+            *wbReturnStruct = howToReturnStruct;
+        }
+
+        return useType;
+    }
+#endif
+
 #ifdef UNIX_AMD64_ABI
     // An 8-byte struct may need to be returned in a floating point register
     // So we always consult the struct "Classifier" routine
@@ -1138,11 +1173,15 @@ struct FileLine
     unsigned m_line;
     char*    m_condStr;
 
-    FileLine() : m_file(nullptr), m_line(0), m_condStr(nullptr)
+    FileLine()
+        : m_file(nullptr)
+        , m_line(0)
+        , m_condStr(nullptr)
     {
     }
 
-    FileLine(const char* file, unsigned line, const char* condStr) : m_line(line)
+    FileLine(const char* file, unsigned line, const char* condStr)
+        : m_line(line)
     {
         size_t newSize = (strlen(file) + 1) * sizeof(char);
         m_file         = HostAllocator::getHostAllocator().allocate<char>(newSize);
@@ -1181,7 +1220,7 @@ struct FileLine
 };
 
 typedef JitHashTable<FileLine, FileLine, size_t, HostAllocator> FileLineToCountMap;
-FileLineToCountMap* NowayAssertMap;
+FileLineToCountMap*                                             NowayAssertMap;
 
 void Compiler::RecordNowayAssert(const char* filename, unsigned line, const char* condStr)
 {
@@ -1214,7 +1253,8 @@ struct NowayAssertCountMap
     size_t   count;
     FileLine fl;
 
-    NowayAssertCountMap() : count(0)
+    NowayAssertCountMap()
+        : count(0)
     {
     }
 
@@ -1817,9 +1857,13 @@ void Compiler::compInit(ArenaAllocator*       pAlloc,
     info.compMethodName = eeGetMethodName(methodHnd);
     info.compClassName  = eeGetClassName(info.compClassHnd);
     info.compFullName   = eeGetMethodFullName(methodHnd);
-    info.compPerfScore  = 0.0;
 
     info.compMethodSuperPMIIndex = g_jitHost->getIntConfigValue(W("SuperPMIMethodContextNumber"), -1);
+
+    if (!compIsForInlining())
+    {
+        JitMetadata::report(this, JitMetadata::MethodFullName, info.compFullName, strlen(info.compFullName));
+    }
 #endif // defined(DEBUG) || defined(LATE_DISASM) || DUMP_FLOWGRAPHS
 
 #if defined(DEBUG)
@@ -1843,6 +1887,11 @@ void Compiler::compInit(ArenaAllocator*       pAlloc,
 
     eeInfoInitialized = false;
 
+#if defined(FEATURE_EH_WINDOWS_X86)
+    // Cache Native AOT ABI check. This must happen *after* eeInfoInitialized is initialized, above.
+    eeIsNativeAotAbi = IsTargetAbi(CORINFO_NATIVEAOT_ABI);
+#endif
+
     compDoAggressiveInlining = false;
 
     if (compIsForInlining())
@@ -1895,9 +1944,6 @@ void Compiler::compInit(ArenaAllocator*       pAlloc,
         //
         // Initialize all the per-method statistics gathering data structures.
         //
-
-        optLoopsCloned = 0;
-
 #if LOOP_HOIST_STATS
         m_loopsConsidered             = 0;
         m_curLoopHasHoistedExpression = false;
@@ -1982,6 +2028,10 @@ void Compiler::compInit(ArenaAllocator*       pAlloc,
     fgSsaValid                 = false;
     fgVNPassesCompleted        = 0;
 
+#ifdef SWIFT_SUPPORT
+    m_swiftLoweringCache = nullptr;
+#endif
+
     // check that HelperCallProperties are initialized
 
     assert(s_helperCallProperties.IsPure(CORINFO_HELP_GETSHARED_GCSTATIC_BASE));
@@ -1999,6 +2049,8 @@ void Compiler::compInit(ArenaAllocator*       pAlloc,
     compUsesThrowHelper = false;
 
     m_preferredInitCctor = CORINFO_HELP_UNDEF;
+
+    new (&Metrics, jitstd::placement_t()) JitMetrics();
 }
 
 /*****************************************************************************
@@ -2016,8 +2068,8 @@ void Compiler::compDone()
 #endif // LATE_DISASM
 }
 
-void* Compiler::compGetHelperFtn(CorInfoHelpFunc ftnNum,        /* IN  */
-                                 void**          ppIndirection) /* OUT */
+void* Compiler::compGetHelperFtn(CorInfoHelpFunc ftnNum, /* IN  */
+                                 void**          ppIndirection)   /* OUT */
 {
     void* addr;
 
@@ -2072,9 +2124,9 @@ void Compiler::compDoComponentUnitTestsOnce()
 // compGetJitDefaultFill:
 //
 // Return Value:
-//    An unsigned char value used to initizalize memory allocated by the JIT.
-//    The default value is taken from DOTNET_JitDefaultFill,  if is not set
-//    the value will be 0xdd.  When JitStress is active a random value based
+//    An unsigned char value used to initialize memory allocated by the JIT.
+//    The default value is taken from DOTNET_JitDefaultFill. If it is not set
+//    the value will be 0xdd. When JitStress is active a random value based
 //    on the method hash is used.
 //
 // Notes:
@@ -2264,7 +2316,6 @@ void Compiler::compSetProcessor()
     //
     // Processor specific optimizations
     //
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
     CORINFO_InstructionSetFlags instructionSetFlags = jitFlags.GetInstructionSetFlags();
     opts.compSupportsISA.Reset();
@@ -2346,6 +2397,7 @@ void Compiler::compSetProcessor()
             // Assume each JITted method does not contain AVX instruction at first
             codeGen->GetEmitter()->SetContainsAVX(false);
             codeGen->GetEmitter()->SetContains256bitOrMoreAVX(false);
+            codeGen->GetEmitter()->SetContainsCallNeedingVzeroupper(false);
         }
         if (canUseEvexEncoding())
         {
@@ -2773,6 +2825,11 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
     fgPgoFailReason  = nullptr;
     fgPgoSource      = ICorJitInfo::PgoSource::Unknown;
     fgPgoHaveWeights = false;
+    fgPgoSynthesized = false;
+
+#ifdef DEBUG
+    fgPgoConsistent = false;
+#endif
 
     if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT))
     {
@@ -2861,7 +2918,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
     // The rest of the opts fields that we initialize here
     // should only be used when we generate code for the method
     // They should not be used when importing or inlining
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if FEATURE_TAILCALL_OPT
     opts.compTailCallLoopOpt = true;
@@ -2888,6 +2944,11 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
     opts.disAlignment = false;
     opts.disCodeBytes = false;
 
+    opts.optRepeat          = false;
+    opts.optRepeatIteration = 0;
+    opts.optRepeatCount     = 1;
+    opts.optRepeatActive    = false;
+
 #ifdef DEBUG
     opts.dspInstrs       = false;
     opts.dspLines        = false;
@@ -2902,7 +2963,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
     opts.disAsm2         = false;
     opts.dspUnwind       = false;
     opts.compLongAddress = false;
-    opts.optRepeat       = false;
 
 #ifdef LATE_DISASM
     opts.doLateDisasm = false;
@@ -2984,9 +3044,11 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
             opts.compLongAddress = true;
         }
 
-        if (JitConfig.JitOptRepeat().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args))
+        if ((JitConfig.JitEnableOptRepeat() != 0) &&
+            (JitConfig.JitOptRepeat().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args)))
         {
-            opts.optRepeat = true;
+            opts.optRepeat      = true;
+            opts.optRepeatCount = JitConfig.JitOptRepeatCount();
         }
 
         opts.dspMetrics = (JitConfig.JitMetrics() != 0);
@@ -3063,6 +3125,13 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
     {
         opts.disAsm = true;
     }
+
+    if ((JitConfig.JitEnableOptRepeat() != 0) &&
+        (JitConfig.JitOptRepeat().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args)))
+    {
+        opts.optRepeat      = true;
+        opts.optRepeatCount = JitConfig.JitOptRepeatCount();
+    }
 #endif // !DEBUG
 
 #ifndef DEBUG
@@ -3088,7 +3157,42 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
         }
     }
 
-//-------------------------------------------------------------------------
+    if (opts.optRepeat)
+    {
+        // Defer printing this until now, after the "START" line printed above.
+        JITDUMP("\n*************** JitOptRepeat enabled; repetition count: %d\n\n", opts.optRepeatCount);
+    }
+    else if (JitConfig.JitEnableOptRepeat() != 0)
+    {
+#ifdef DEBUG
+        // Opt-in to JitOptRepeat based on method hash ranges.
+        // The default is no JitOptRepeat.
+        static ConfigMethodRange fJitOptRepeatRange;
+        fJitOptRepeatRange.EnsureInit(JitConfig.JitOptRepeatRange());
+        assert(!fJitOptRepeatRange.Error());
+        if (!fJitOptRepeatRange.IsEmpty() && fJitOptRepeatRange.Contains(info.compMethodHash()))
+        {
+            opts.optRepeat      = true;
+            opts.optRepeatCount = JitConfig.JitOptRepeatCount();
+
+            JITDUMP("\n*************** JitOptRepeat enabled by JitOptRepeatRange; repetition count: %d\n\n",
+                    opts.optRepeatCount);
+        }
+
+        if (!opts.optRepeat && compStressCompile(STRESS_OPT_REPEAT, 10))
+        {
+            // Turn on optRepeat as part of JitStress. In this case, decide how many iterations to do, from 2 to 5,
+            // based on a random number seeded by the method hash.
+            opts.optRepeat = true;
+
+            CLRRandom rng;
+            rng.Init(info.compMethodHash());
+            opts.optRepeatCount = rng.Next(4) + 2; // generates [2..5]
+
+            JITDUMP("\n*************** JitOptRepeat for stress; repetition count: %d\n\n", opts.optRepeatCount);
+        }
+#endif // DEBUG
+    }
 
 #ifdef DEBUG
 #ifndef TARGET_WASM
@@ -3348,11 +3452,10 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
             printf("OPTIONS: OSR variant with entry point 0x%x\n", info.compILEntry);
         }
 
-        printf("OPTIONS: compCodeOpt = %s\n",
-               (opts.compCodeOpt == BLENDED_CODE)
-                   ? "BLENDED_CODE"
-                   : (opts.compCodeOpt == SMALL_CODE) ? "SMALL_CODE"
-                                                      : (opts.compCodeOpt == FAST_CODE) ? "FAST_CODE" : "UNKNOWN_CODE");
+        printf("OPTIONS: compCodeOpt = %s\n", (opts.compCodeOpt == BLENDED_CODE) ? "BLENDED_CODE"
+                                              : (opts.compCodeOpt == SMALL_CODE) ? "SMALL_CODE"
+                                              : (opts.compCodeOpt == FAST_CODE)  ? "FAST_CODE"
+                                                                                 : "UNKNOWN_CODE");
 
         printf("OPTIONS: compDbgCode = %s\n", dspBool(opts.compDbgCode));
         printf("OPTIONS: compDbgInfo = %s\n", dspBool(opts.compDbgInfo));
@@ -3587,6 +3690,14 @@ bool Compiler::compStressCompileHelper(compStressArea stressArea, unsigned weigh
         return false;
     }
 
+    // Does user allow using this STRESS_MODE through the command line?
+    const WCHAR* strStressModeNamesAllow = JitConfig.JitStressModeNamesAllow();
+    if ((strStressModeNamesAllow != nullptr) &&
+        (u16_strstr(strStressModeNamesAllow, s_compStressModeNamesW[stressArea]) == nullptr))
+    {
+        return false;
+    }
+
     // Does user explicitly set this STRESS_MODE through the command line?
     const WCHAR* strStressModeNames = JitConfig.JitStressModeNames();
     if (strStressModeNames != nullptr)
@@ -3945,8 +4056,9 @@ void Compiler::compSetOptimizationLevel()
         }
         if (theMinOptsValue == true)
         {
-            JITLOG((LL_INFO10000, "IL Code Size,Instr %4d,%4d, Basic Block count %3d, Local Variable Num,Ref count "
-                                  "%3d,%3d for method %s\n",
+            JITLOG((LL_INFO10000,
+                    "IL Code Size,Instr %4d,%4d, Basic Block count %3d, Local Variable Num,Ref count "
+                    "%3d,%3d for method %s\n",
                     info.compILCodeSize, opts.instrCount, fgBBcount, lvaCount, opts.lvRefCount, info.compFullName));
             if (JitConfig.JitBreakOnMinOpts() != 0)
             {
@@ -4647,11 +4759,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
     //
     DoPhase(this, PHASE_MORPH_ADD_INTERNAL, &Compiler::fgAddInternal);
 
-    // Disable profile checks now.
-    // Over time we will move this further and further back in the phase list, as we fix issues.
-    //
-    activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE;
-
     // Remove empty try regions
     //
     DoPhase(this, PHASE_EMPTY_TRY, &Compiler::fgRemoveEmptyTry);
@@ -4734,7 +4841,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
     {
         // Tail merge
         //
-        DoPhase(this, PHASE_HEAD_TAIL_MERGE, [this]() { return fgHeadTailMerge(true); });
+        DoPhase(this, PHASE_HEAD_TAIL_MERGE, [this]() {
+            return fgHeadTailMerge(true);
+        });
 
         // Merge common throw blocks
         //
@@ -4805,7 +4914,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
     DoPhase(this, PHASE_MORPH_GLOBAL, &Compiler::fgMorphBlocks);
 
     auto postMorphPhase = [this]() {
-
         // Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args
         fgMarkDemotedImplicitByRefArgs();
         lvaRefCountState       = RCS_INVALID;
@@ -4842,13 +4950,12 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
     //
     DoPhase(this, PHASE_COMPUTE_EDGE_WEIGHTS, &Compiler::fgComputeBlockAndEdgeWeights);
 
-#if defined(FEATURE_EH_FUNCLETS)
-
-    // Create funclets from the EH handlers.
-    //
-    DoPhase(this, PHASE_CREATE_FUNCLETS, &Compiler::fgCreateFunclets);
-
-#endif // FEATURE_EH_FUNCLETS
+    if (UsesFunclets())
+    {
+        // Create funclets from the EH handlers.
+        //
+        DoPhase(this, PHASE_CREATE_FUNCLETS, &Compiler::fgCreateFunclets);
+    }
 
     if (opts.OptimizationEnabled())
     {
@@ -4862,25 +4969,27 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
 
         // Second pass of tail merge
         //
-        DoPhase(this, PHASE_HEAD_TAIL_MERGE2, [this]() { return fgHeadTailMerge(false); });
+        DoPhase(this, PHASE_HEAD_TAIL_MERGE2, [this]() {
+            return fgHeadTailMerge(false);
+        });
 
         // Canonicalize entry to give a unique dominator tree root
         //
         DoPhase(this, PHASE_CANONICALIZE_ENTRY, &Compiler::fgCanonicalizeFirstBB);
 
-        // Compute reachability sets and dominators.
+        // Compute DFS tree and remove all unreachable blocks.
         //
-        DoPhase(this, PHASE_COMPUTE_REACHABILITY, &Compiler::fgComputeReachability);
-
-        // Scale block weights and mark run rarely blocks.
-        //
-        DoPhase(this, PHASE_SET_BLOCK_WEIGHTS, &Compiler::optSetBlockWeights);
+        DoPhase(this, PHASE_DFS_BLOCKS2, &Compiler::fgDfsBlocksAndRemove);
 
         // Discover and classify natural loops (e.g. mark iterative loops as such). Also marks loop blocks
         // and sets bbWeight to the loop nesting levels.
         //
         DoPhase(this, PHASE_FIND_LOOPS, &Compiler::optFindLoopsPhase);
 
+        // Scale block weights and mark run rarely blocks.
+        //
+        DoPhase(this, PHASE_SET_BLOCK_WEIGHTS, &Compiler::optSetBlockWeights);
+
         // Clone loops with optimization opportunities, and choose one based on dynamic condition evaluation.
         //
         DoPhase(this, PHASE_CLONE_LOOPS, &Compiler::optCloneLoops);
@@ -4932,18 +5041,19 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
         bool doValueNum                = true;
         bool doLoopHoisting            = true;
         bool doCopyProp                = true;
+        bool doOptimizeIVs             = true;
         bool doBranchOpt               = true;
         bool doCse                     = true;
         bool doAssertionProp           = true;
         bool doVNBasedIntrinExpansion  = true;
         bool doRangeAnalysis           = true;
         bool doVNBasedDeadStoreRemoval = true;
-        int  iterations                = 1;
 
 #if defined(OPT_CONFIG)
         doSsa                     = (JitConfig.JitDoSsa() != 0);
         doEarlyProp               = doSsa && (JitConfig.JitDoEarlyProp() != 0);
         doValueNum                = doSsa && (JitConfig.JitDoValueNumber() != 0);
+        doOptimizeIVs             = doSsa && (JitConfig.JitDoOptimizeIVs() != 0);
         doLoopHoisting            = doValueNum && (JitConfig.JitDoLoopHoisting() != 0);
         doCopyProp                = doValueNum && (JitConfig.JitDoCopyProp() != 0);
         doBranchOpt               = doValueNum && (JitConfig.JitDoRedundantBranchOpts() != 0);
@@ -4952,15 +5062,23 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
         doVNBasedIntrinExpansion  = doValueNum;
         doRangeAnalysis           = doAssertionProp && (JitConfig.JitDoRangeAnalysis() != 0);
         doVNBasedDeadStoreRemoval = doValueNum && (JitConfig.JitDoVNBasedDeadStoreRemoval() != 0);
+#endif // defined(OPT_CONFIG)
 
         if (opts.optRepeat)
         {
-            iterations = JitConfig.JitOptRepeatCount();
+            opts.optRepeatActive = true;
         }
-#endif // defined(OPT_CONFIG)
 
-        while (iterations > 0)
+        while (++opts.optRepeatIteration <= opts.optRepeatCount)
         {
+#ifdef DEBUG
+            if (verbose && opts.optRepeat)
+            {
+                printf("\n*************** JitOptRepeat: iteration %d of %d\n\n", opts.optRepeatIteration,
+                       opts.optRepeatCount);
+            }
+#endif // DEBUG
+
             fgModified = false;
 
             if (doSsa)
@@ -5044,6 +5162,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
                 DoPhase(this, PHASE_OPTIMIZE_INDEX_CHECKS, &Compiler::rangeCheckPhase);
             }
 
+            if (doOptimizeIVs)
+            {
+                // Simplify and optimize induction variables used in natural loops
+                //
+                DoPhase(this, PHASE_OPTIMIZE_INDUCTION_VARIABLES, &Compiler::optInductionVariables);
+            }
+
             if (doVNBasedDeadStoreRemoval)
             {
                 // Note: this invalidates SSA and value numbers on tree nodes.
@@ -5051,6 +5176,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
                 DoPhase(this, PHASE_VN_BASED_DEAD_STORE_REMOVAL, &Compiler::optVNBasedDeadStoreRemoval);
             }
 
+            // Conservatively mark all VNs as stale
+            vnStore = nullptr;
+
             if (fgModified)
             {
                 // update the flowgraph if we modified it during the optimization phase
@@ -5063,11 +5191,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
             }
 
             // Iterate if requested, resetting annotations first.
-            if (--iterations == 0)
+            if (opts.optRepeatIteration == opts.optRepeatCount)
             {
                 break;
             }
 
+            assert(opts.optRepeat);
+
             // We may have optimized away the canonical entry BB that SSA
             // depends on above, so if we are going for another iteration then
             // make sure we still have a canonical entry.
@@ -5076,15 +5206,31 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
 
             ResetOptAnnotations();
             RecomputeFlowGraphAnnotations();
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Trees before next JitOptRepeat iteration:\n");
+                fgDispBasicBlocks(true);
+            }
+#endif // DEBUG
+        }
+
+        if (opts.optRepeat)
+        {
+            opts.optRepeatActive = false;
         }
     }
 
-    optLoopsRequirePreHeaders = false;
+    optLoopsCanonical = false;
 
 #ifdef DEBUG
     DoPhase(this, PHASE_STRESS_SPLIT_TREE, &Compiler::StressSplitTree);
 #endif
 
+    // Expand casts
+    DoPhase(this, PHASE_EXPAND_CASTS, &Compiler::fgLateCastExpansion);
+
     // Expand runtime lookups (an optimization but we'd better run it in tier0 too)
     DoPhase(this, PHASE_EXPAND_RTLOOKUPS, &Compiler::fgExpandRuntimeLookups);
 
@@ -5094,9 +5240,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
     // Expand thread local access
     DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess);
 
-    // Expand casts
-    DoPhase(this, PHASE_EXPAND_CASTS, &Compiler::fgLateCastExpansion);
-
     // Insert GC Polls
     DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls);
 
@@ -5220,12 +5363,21 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
 
     // Now that lowering is completed we can proceed to perform register allocation
     //
-    auto linearScanPhase = [this]() { m_pLinearScan->doLinearScan(); };
+    auto linearScanPhase = [this]() {
+        m_pLinearScan->doLinearScan();
+    };
     DoPhase(this, PHASE_LINEAR_SCAN, linearScanPhase);
 
     // Copied from rpPredictRegUse()
     SetFullPtrRegMapRequired(codeGen->GetInterruptible() || !codeGen->isFramePointerUsed());
 
+    if (opts.OptimizationEnabled())
+    {
+        // LSRA and stack level setting can modify the flowgraph.
+        // Now that it won't change, run post-layout optimizations.
+        DoPhase(this, PHASE_OPTIMIZE_POST_LAYOUT, &Compiler::optOptimizePostLayout);
+    }
+
 #if FEATURE_LOOP_ALIGN
     // Place loop alignment instructions
     DoPhase(this, PHASE_ALIGN_LOOPS, &Compiler::placeLoopAlignInstructions);
@@ -5293,7 +5445,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
 #ifdef DEBUG
         if (JitConfig.JitMetrics() > 0)
         {
-            sprintf_s(metricPart, 128, ", perfScore=%.2f, numCse=%u", info.compPerfScore, optCSEcount);
+            sprintf_s(metricPart, 128, ", perfScore=%.2f, numCse=%u", Metrics.PerfScore, optCSEcount);
         }
 #endif
 
@@ -5315,7 +5467,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
 #elif FEATURE_SIMD
         fprintf(compJitFuncInfoFile, " %s\n", eeGetMethodFullName(info.compMethodHnd));
 #endif
-        fprintf(compJitFuncInfoFile, ""); // in our logic this causes a flush
+        fflush(compJitFuncInfoFile);
     }
 #endif // FUNC_INFO_LOGGING
 }
@@ -5382,29 +5534,26 @@ bool Compiler::shouldAlignLoop(FlowGraphNaturalLoop* loop, BasicBlock* top)
 
     assert(!top->IsFirst());
 
-#if FEATURE_EH_CALLFINALLY_THUNKS
-    if (top->Prev()->KindIs(BBJ_CALLFINALLY))
+    if (UsesCallFinallyThunks() && top->Prev()->KindIs(BBJ_CALLFINALLY))
     {
         // It must be a retless BBJ_CALLFINALLY if we get here.
         assert(!top->Prev()->isBBCallFinallyPair());
 
         // If the block before the loop start is a retless BBJ_CALLFINALLY
-        // with FEATURE_EH_CALLFINALLY_THUNKS, we can't add alignment
+        // with UsesCallFinallyThunks, we can't add alignment
         // because it will affect reported EH region range. For x86 (where
-        // !FEATURE_EH_CALLFINALLY_THUNKS), we can allow this.
+        // !UsesCallFinallyThunks), we can allow this.
 
         JITDUMP("Skipping alignment for " FMT_LP "; its top block follows a CALLFINALLY block\n", loop->GetIndex());
         return false;
     }
-#endif // FEATURE_EH_CALLFINALLY_THUNKS
 
     if (top->Prev()->isBBCallFinallyPairTail())
     {
         // If the previous block is the BBJ_CALLFINALLYRET of a
         // BBJ_CALLFINALLY/BBJ_CALLFINALLYRET pair, then we can't add alignment
         // because we can't add instructions in that block. In the
-        // FEATURE_EH_CALLFINALLY_THUNKS case, it would affect the
-        // reported EH, as above.
+        // UsesCallFinallyThunks case, it would affect the reported EH, as above.
         JITDUMP("Skipping alignment for " FMT_LP "; its top block follows a CALLFINALLY/ALWAYS pair\n",
                 loop->GetIndex());
         return false;
@@ -5497,7 +5646,7 @@ PhaseStatus Compiler::placeLoopAlignInstructions()
         {
             block->SetFlags(BBF_LOOP_ALIGN);
             BitVecOps::AddElemD(&loopTraits, alignedLoops, loop->GetIndex());
-            INDEBUG(loopAlignCandidates++);
+            Metrics.LoopAlignmentCandidates++;
 
             BasicBlock* prev = block->Prev();
             // shouldAlignLoop should have guaranteed these properties.
@@ -5546,7 +5695,7 @@ PhaseStatus Compiler::placeLoopAlignInstructions()
         }
     }
 
-    JITDUMP("Found %u candidates for loop alignment\n", loopAlignCandidates);
+    JITDUMP("Found %d candidates for loop alignment\n", Metrics.LoopAlignmentCandidates);
 
     return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
 }
@@ -5589,7 +5738,7 @@ void Compiler::SplitTreesRandomly()
     rng.Init(info.compMethodHash() ^ 0x077cc4d4);
 
     // Splitting creates a lot of new locals. Set a limit on how many we end up creating here.
-    unsigned maxLvaCount = max(lvaCount * 2, 50000);
+    unsigned maxLvaCount = max(lvaCount * 2, 50000u);
 
     for (BasicBlock* block : Blocks())
     {
@@ -5651,7 +5800,7 @@ void Compiler::SplitTreesRandomly()
 void Compiler::SplitTreesRemoveCommas()
 {
     // Splitting creates a lot of new locals. Set a limit on how many we end up creating here.
-    unsigned maxLvaCount = max(lvaCount * 2, 50000);
+    unsigned maxLvaCount = max(lvaCount * 2, 50000u);
 
     for (BasicBlock* block : Blocks())
     {
@@ -5757,7 +5906,6 @@ void Compiler::generatePatchpointInfo()
     //
     // For arm64, if the frame pointer is not at the top of the frame, we need to adjust the
     // offset.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_AMD64)
     // We add +TARGET_POINTER_SIZE here is to account for the slot that Jit_Patchpoint
@@ -5857,7 +6005,7 @@ void Compiler::generatePatchpointInfo()
     // Record callee save registers.
     // Currently only needed for x64.
     //
-    regMaskTP rsPushRegs = codeGen->regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+    regMaskTP rsPushRegs = codeGen->regSet.rsGetModifiedCalleeSavedRegsMask();
     rsPushRegs |= RBM_FPBASE;
     patchpointInfo->SetCalleeSaveRegisters((uint64_t)rsPushRegs);
     JITDUMP("--OSR-- Tier0 callee saves: ");
@@ -5876,18 +6024,19 @@ void Compiler::generatePatchpointInfo()
 //    The intent of this method is to clear any information typically assumed
 //    to be set only once; it is used between iterations when JitOptRepeat is
 //    in effect.
-
+//
 void Compiler::ResetOptAnnotations()
 {
     assert(opts.optRepeat);
     assert(JitConfig.JitOptRepeatCount() > 0);
     fgResetForSsa();
-    vnStore              = nullptr;
-    m_blockToEHPreds     = nullptr;
-    m_dominancePreds     = nullptr;
-    fgSsaPassesCompleted = 0;
-    fgVNPassesCompleted  = 0;
-    fgSsaValid           = false;
+    vnStore                    = nullptr;
+    m_blockToEHPreds           = nullptr;
+    m_dominancePreds           = nullptr;
+    fgSsaPassesCompleted       = 0;
+    fgVNPassesCompleted        = 0;
+    fgSsaValid                 = false;
+    m_nodeToLoopMemoryBlockMap = nullptr;
 
     for (BasicBlock* const block : Blocks())
     {
@@ -5918,19 +6067,20 @@ void Compiler::RecomputeFlowGraphAnnotations()
     // Recompute reachability sets, dominators, and loops.
     optResetLoopInfo();
 
-    fgComputeReachability();
-    optSetBlockWeights();
-
+    fgRenumberBlocks();
     fgInvalidateDfsTree();
-    m_dfsTree = fgComputeDfs();
+    fgDfsBlocksAndRemove();
     optFindLoops();
 
-    if (fgHasLoops)
+    // Should we call this using the phase method:
+    //    DoPhase(this, PHASE_SET_BLOCK_WEIGHTS, &Compiler::optSetBlockWeights);
+    // ? It could be called multiple times.
+    optSetBlockWeights();
+
+    if (m_domTree == nullptr)
     {
-        optFindAndScaleGeneralLoopBlocks();
+        m_domTree = FlowGraphDominatorTree::Build(m_dfsTree);
     }
-
-    m_domTree = FlowGraphDominatorTree::Build(m_dfsTree);
 }
 
 /*****************************************************************************/
@@ -6111,12 +6261,12 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
         // We need to assume, by default, that all flags coming from the VM are invalid.
         instructionSetFlags.Reset();
 
-// We then add each available instruction set for the target architecture provided
-// that the corresponding JitConfig switch hasn't explicitly asked for it to be
-// disabled. This allows us to default to "everything" supported for altjit scenarios
-// while also still allowing instruction set opt-out providing users with the ability
-// to, for example, see and debug ARM64 codegen for any desired CPU configuration without
-// needing to have the hardware in question.
+        // We then add each available instruction set for the target architecture provided
+        // that the corresponding JitConfig switch hasn't explicitly asked for it to be
+        // disabled. This allows us to default to "everything" supported for altjit scenarios
+        // while also still allowing instruction set opt-out providing users with the ability
+        // to, for example, see and debug ARM64 codegen for any desired CPU configuration without
+        // needing to have the hardware in question.
 
 #if defined(TARGET_ARM64)
         if (JitConfig.EnableHWIntrinsic() != 0)
@@ -6528,6 +6678,8 @@ void Compiler::compCompileFinish()
         compArenaAllocator->finishMemStats();
         memAllocHist.record((unsigned)((compArenaAllocator->getTotalBytesAllocated() + 1023) / 1024));
         memUsedHist.record((unsigned)((compArenaAllocator->getTotalBytesUsed() + 1023) / 1024));
+
+        Metrics.BytesAllocated = (int64_t)compArenaAllocator->getTotalBytesUsed();
     }
 
 #ifdef DEBUG
@@ -6711,7 +6863,7 @@ void Compiler::compCompileFinish()
 
         printf(" %3d |", optCallCount);
         printf(" %3d |", optIndirectCallCount);
-        printf(" %3d |", fgBBcountAtCodegen);
+        printf(" %3d |", Metrics.BasicBlocksAtCodegen);
         printf(" %3d |", lvaCount);
 
         if (opts.MinOpts())
@@ -6724,13 +6876,13 @@ void Compiler::compCompileFinish()
             printf(" %3d |", optCSEcount);
         }
 
-        if (info.compPerfScore < 9999.995)
+        if (Metrics.PerfScore < 9999.995)
         {
-            printf(" %7.2f |", info.compPerfScore);
+            printf(" %7.2f |", Metrics.PerfScore);
         }
         else
         {
-            printf(" %7.0f |", info.compPerfScore);
+            printf(" %7.0f |", Metrics.PerfScore);
         }
 
         printf(" %4d |", info.compMethodInfo->ILCodeSize);
@@ -6741,9 +6893,13 @@ void Compiler::compCompileFinish()
         printf(""); // in our logic this causes a flush
     }
 
+    JITDUMP("Final metrics:\n");
+    Metrics.report(this);
+    DBEXEC(verbose, Metrics.dump());
+
     if (verbose)
     {
-        printf("****** DONE compiling %s\n", info.compFullName);
+        printf("\n****** DONE compiling %s\n", info.compFullName);
         printf(""); // in our logic this causes a flush
     }
 
@@ -7238,6 +7394,13 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
         opts.disAsm = false;
     }
 
+#ifdef DEBUG
+    {
+        const char* tieringName = compGetTieringName(true);
+        JitMetadata::report(this, JitMetadata::TieringName, tieringName, strlen(tieringName));
+    }
+#endif
+
 #if COUNT_BASIC_BLOCKS
     bbCntTable.record(fgBBcount);
 
@@ -7458,7 +7621,7 @@ void Compiler::compInitVarScopeMap()
     compVarScopeMap = new (getAllocator()) VarNumToScopeDscMap(getAllocator());
 
     // 599 prime to limit huge allocations; for ex: duplicated scopes on single var.
-    compVarScopeMap->Reallocate(min(info.compVarScopesCount, 599U));
+    compVarScopeMap->Reallocate(min(info.compVarScopesCount, 599u));
 
     for (unsigned i = 0; i < info.compVarScopesCount; ++i)
     {
@@ -7885,112 +8048,105 @@ int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd,
 #endif
     param.result = result;
 
-    setErrorTrap(compHnd, Param*, pParamOuter, &param)
-    {
-        setErrorTrap(nullptr, Param*, pParam, pParamOuter)
-        {
-            if (pParam->inlineInfo)
-            {
-                // Lazily create the inlinee compiler object
-                if (pParam->inlineInfo->InlinerCompiler->InlineeCompiler == nullptr)
-                {
-                    pParam->inlineInfo->InlinerCompiler->InlineeCompiler =
-                        (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
-                }
+    setErrorTrap(compHnd, Param*, pParamOuter, &param){setErrorTrap(nullptr, Param*, pParam, pParamOuter){
+        if (pParam->inlineInfo){// Lazily create the inlinee compiler object
+                                if (pParam->inlineInfo->InlinerCompiler->InlineeCompiler == nullptr){
+                                    pParam->inlineInfo->InlinerCompiler->InlineeCompiler =
+                                        (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
+}
 
-                // Use the inlinee compiler object
-                pParam->pComp = pParam->inlineInfo->InlinerCompiler->InlineeCompiler;
+// Use the inlinee compiler object
+pParam->pComp = pParam->inlineInfo->InlinerCompiler->InlineeCompiler;
 #ifdef DEBUG
 // memset(pParam->pComp, 0xEE, sizeof(Compiler));
 #endif
-            }
-            else
-            {
-                // Allocate create the inliner compiler object
-                pParam->pComp = (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
-            }
+}
+else
+{
+    // Allocate create the inliner compiler object
+    pParam->pComp = (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
+}
 
 #if MEASURE_CLRAPI_CALLS
-            pParam->wrapCLR = WrapICorJitInfo::makeOne(pParam->pAlloc, pParam->pComp, pParam->compHnd);
+pParam->wrapCLR = WrapICorJitInfo::makeOne(pParam->pAlloc, pParam->pComp, pParam->compHnd);
 #endif
 
-            // push this compiler on the stack (TLS)
-            pParam->pComp->prevCompiler = JitTls::GetCompiler();
-            JitTls::SetCompiler(pParam->pComp);
+// push this compiler on the stack (TLS)
+pParam->pComp->prevCompiler = JitTls::GetCompiler();
+JitTls::SetCompiler(pParam->pComp);
 
 // PREFIX_ASSUME gets turned into ASSERT_CHECK and we cannot have it here
 #if defined(_PREFAST_) || defined(_PREFIX_)
-            PREFIX_ASSUME(pParam->pComp != NULL);
+PREFIX_ASSUME(pParam->pComp != NULL);
 #else
-            assert(pParam->pComp != nullptr);
+assert(pParam->pComp != nullptr);
 #endif
 
-            pParam->pComp->compInit(pParam->pAlloc, pParam->methodHnd, pParam->compHnd, pParam->methodInfo,
-                                    pParam->inlineInfo);
+pParam->pComp->compInit(pParam->pAlloc, pParam->methodHnd, pParam->compHnd, pParam->methodInfo, pParam->inlineInfo);
 
 #ifdef DEBUG
-            pParam->pComp->jitFallbackCompile = pParam->jitFallbackCompile;
+pParam->pComp->jitFallbackCompile = pParam->jitFallbackCompile;
 #endif
 
-            // Now generate the code
-            pParam->result = pParam->pComp->compCompile(pParam->classPtr, pParam->methodCodePtr, pParam->methodCodeSize,
-                                                        pParam->compileFlags);
-        }
-        finallyErrorTrap()
-        {
-            Compiler* pCompiler = pParamOuter->pComp;
+// Now generate the code
+pParam->result =
+    pParam->pComp->compCompile(pParam->classPtr, pParam->methodCodePtr, pParam->methodCodeSize, pParam->compileFlags);
+}
+finallyErrorTrap()
+{
+    Compiler* pCompiler = pParamOuter->pComp;
 
-            // If OOM is thrown when allocating memory for a pComp, we will end up here.
-            // For this case, pComp and also pCompiler will be a nullptr
-            //
-            if (pCompiler != nullptr)
-            {
-                pCompiler->info.compCode = nullptr;
+    // If OOM is thrown when allocating memory for a pComp, we will end up here.
+    // For this case, pComp and also pCompiler will be a nullptr
+    //
+    if (pCompiler != nullptr)
+    {
+        pCompiler->info.compCode = nullptr;
 
-                // pop the compiler off the TLS stack only if it was linked above
-                assert(JitTls::GetCompiler() == pCompiler);
-                JitTls::SetCompiler(pCompiler->prevCompiler);
-            }
+        // pop the compiler off the TLS stack only if it was linked above
+        assert(JitTls::GetCompiler() == pCompiler);
+        JitTls::SetCompiler(pCompiler->prevCompiler);
+    }
 
-            if (pParamOuter->inlineInfo == nullptr)
-            {
-                // Free up the allocator we were using
-                pParamOuter->pAlloc->destroy();
-            }
-        }
-        endErrorTrap()
+    if (pParamOuter->inlineInfo == nullptr)
+    {
+        // Free up the allocator we were using
+        pParamOuter->pAlloc->destroy();
     }
-    impJitErrorTrap()
+}
+endErrorTrap()
+}
+impJitErrorTrap()
+{
+    // If we were looking at an inlinee....
+    if (inlineInfo != nullptr)
     {
-        // If we were looking at an inlinee....
-        if (inlineInfo != nullptr)
-        {
-            // Note that we failed to compile the inlinee, and that
-            // there's no point trying to inline it again anywhere else.
-            inlineInfo->inlineResult->NoteFatal(InlineObservation::CALLEE_COMPILATION_ERROR);
-        }
-        param.result = __errc;
+        // Note that we failed to compile the inlinee, and that
+        // there's no point trying to inline it again anywhere else.
+        inlineInfo->inlineResult->NoteFatal(InlineObservation::CALLEE_COMPILATION_ERROR);
     }
-    endErrorTrap()
+    param.result = __errc;
+}
+endErrorTrap()
 
-        result = param.result;
+    result = param.result;
 
-    if (!inlineInfo &&
-        (result == CORJIT_INTERNALERROR || result == CORJIT_RECOVERABLEERROR || result == CORJIT_IMPLLIMITATION) &&
-        !jitFallbackCompile)
-    {
-        // If we failed the JIT, reattempt with debuggable code.
-        jitFallbackCompile = true;
+if (!inlineInfo &&
+    (result == CORJIT_INTERNALERROR || result == CORJIT_RECOVERABLEERROR || result == CORJIT_IMPLLIMITATION) &&
+    !jitFallbackCompile)
+{
+    // If we failed the JIT, reattempt with debuggable code.
+    jitFallbackCompile = true;
 
-        // Update the flags for 'safer' code generation.
-        compileFlags->Set(JitFlags::JIT_FLAG_MIN_OPT);
-        compileFlags->Clear(JitFlags::JIT_FLAG_SIZE_OPT);
-        compileFlags->Clear(JitFlags::JIT_FLAG_SPEED_OPT);
+    // Update the flags for 'safer' code generation.
+    compileFlags->Set(JitFlags::JIT_FLAG_MIN_OPT);
+    compileFlags->Clear(JitFlags::JIT_FLAG_SIZE_OPT);
+    compileFlags->Clear(JitFlags::JIT_FLAG_SPEED_OPT);
 
-        goto START;
-    }
+    goto START;
+}
 
-    return result;
+return result;
 }
 
 #if defined(UNIX_AMD64_ABI)
@@ -8741,8 +8897,9 @@ void CompTimeSummaryInfo::Print(FILE* f)
         double pslop_pct = 100.0 * m_total.m_parentPhaseEndSlop * 1000.0 / countsPerSec / totTime_ms;
         if (pslop_pct >= 1.0)
         {
-            fprintf(f, "\n  'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles = "
-                       "%3.1f%% of total.\n\n",
+            fprintf(f,
+                    "\n  'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles = "
+                    "%3.1f%% of total.\n\n",
                     m_total.m_parentPhaseEndSlop / 1000000.0, pslop_pct);
         }
     }
@@ -8782,8 +8939,9 @@ void CompTimeSummaryInfo::Print(FILE* f)
         double fslop_ms = m_filtered.m_parentPhaseEndSlop * 1000.0 / countsPerSec;
         if (fslop_ms > 1.0)
         {
-            fprintf(f, "\n  'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles = "
-                       "%3.1f%% of total.\n\n",
+            fprintf(f,
+                    "\n  'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles = "
+                    "%3.1f%% of total.\n\n",
                     m_filtered.m_parentPhaseEndSlop / 1000000.0, fslop_ms);
         }
     }
@@ -8881,7 +9039,8 @@ void CompTimeSummaryInfo::Print(FILE* f)
     fprintf(f, "\n");
 }
 
-JitTimer::JitTimer(unsigned byteCodeSize) : m_info(byteCodeSize)
+JitTimer::JitTimer(unsigned byteCodeSize)
+    : m_info(byteCodeSize)
 {
 #if MEASURE_CLRAPI_CALLS
     m_CLRcallInvokes = 0;
@@ -9133,7 +9292,7 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
     // for a DEBUG build (presumably not for the time info), just re-use it.
     const char* methName = comp->info.compFullName;
 #else
-    const char*          methName  = comp->eeGetMethodFullName(comp->info.compMethodHnd);
+    const char* methName = comp->eeGetMethodFullName(comp->info.compMethodHnd);
 #endif
 
     // Try and access the SPMI index to report in the data set.
@@ -9166,12 +9325,12 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
     fprintf(s_csvFile, "%u,", comp->info.compILCodeSize);
     fprintf(s_csvFile, "%u,", comp->fgBBcount);
     fprintf(s_csvFile, "%u,", comp->opts.MinOpts());
-    fprintf(s_csvFile, "%u,", comp->optNumNaturalLoopsFound);
-    fprintf(s_csvFile, "%u,", comp->optLoopsCloned);
+    fprintf(s_csvFile, "%d,", comp->Metrics.LoopsFoundDuringOpts);
+    fprintf(s_csvFile, "%d,", comp->Metrics.LoopsCloned);
 #if FEATURE_LOOP_ALIGN
 #ifdef DEBUG
-    fprintf(s_csvFile, "%u,", comp->loopAlignCandidates);
-    fprintf(s_csvFile, "%u,", comp->loopsAligned);
+    fprintf(s_csvFile, "%d,", comp->Metrics.LoopAlignmentCandidates);
+    fprintf(s_csvFile, "%d,", comp->Metrics.LoopsAligned);
 #endif // DEBUG
 #endif // FEATURE_LOOP_ALIGN
     unsigned __int64 totCycles = 0;
@@ -9181,7 +9340,7 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
         {
             totCycles += m_info.m_cyclesByPhase[i];
         }
-        fprintf(s_csvFile, "%llu,", m_info.m_cyclesByPhase[i]);
+        fprintf(s_csvFile, "%llu,", (unsigned long long)m_info.m_cyclesByPhase[i]);
 
         if ((JitConfig.JitMeasureIR() != 0) && PhaseReportsIRSize[i])
         {
@@ -9194,7 +9353,7 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
     fprintf(s_csvFile, "%u,", comp->info.compNativeCodeSize);
     fprintf(s_csvFile, "%zu,", comp->compInfoBlkSize);
     fprintf(s_csvFile, "%zu,", comp->compGetArenaAllocator()->getTotalBytesAllocated());
-    fprintf(s_csvFile, "%llu,", m_info.m_totalCycles);
+    fprintf(s_csvFile, "%llu,", (unsigned long long)m_info.m_totalCycles);
     fprintf(s_csvFile, "%f\n", CachedCyclesPerSecond());
 
     fflush(s_csvFile);
@@ -9481,6 +9640,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #pragma comment(linker, "/include:cLoops")
 #pragma comment(linker, "/include:cLoopsA")
 #pragma comment(linker, "/include:cLoop")
+#pragma comment(linker, "/include:cScev")
 #pragma comment(linker, "/include:cTreeFlags")
 #pragma comment(linker, "/include:cVN")
 
@@ -9506,6 +9666,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #pragma comment(linker, "/include:dCVarSet")
 #pragma comment(linker, "/include:dLoop")
 #pragma comment(linker, "/include:dLoops")
+#pragma comment(linker, "/include:dScev")
 #pragma comment(linker, "/include:dTreeFlags")
 #pragma comment(linker, "/include:dVN")
 
@@ -9749,24 +9910,39 @@ JITDBGAPI void __cdecl cCVarSet(Compiler* comp, VARSET_VALARG_TP vars)
 JITDBGAPI void __cdecl cLoops(Compiler* comp)
 {
     static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
-    printf("===================================================================== *NewLoops %u\n", sequenceNumber++);
+    printf("===================================================================== *Loops %u\n", sequenceNumber++);
     FlowGraphNaturalLoops::Dump(comp->m_loops);
 }
 
 JITDBGAPI void __cdecl cLoopsA(Compiler* comp, FlowGraphNaturalLoops* loops)
 {
     static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
-    printf("===================================================================== *NewLoopsA %u\n", sequenceNumber++);
+    printf("===================================================================== *LoopsA %u\n", sequenceNumber++);
     FlowGraphNaturalLoops::Dump(loops);
 }
 
 JITDBGAPI void __cdecl cLoop(Compiler* comp, FlowGraphNaturalLoop* loop)
 {
     static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
-    printf("===================================================================== *NewLoop %u\n", sequenceNumber++);
+    printf("===================================================================== *Loop %u\n", sequenceNumber++);
     FlowGraphNaturalLoop::Dump(loop);
 }
 
+JITDBGAPI void __cdecl cScev(Compiler* comp, Scev* scev)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *Scev %u\n", sequenceNumber++);
+    if (scev == nullptr)
+    {
+        printf("  NULL\n");
+    }
+    else
+    {
+        scev->Dump(comp);
+        printf("\n");
+    }
+}
+
 JITDBGAPI void __cdecl cTreeFlags(Compiler* comp, GenTree* tree)
 {
     static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
@@ -9779,7 +9955,6 @@ JITDBGAPI void __cdecl cTreeFlags(Compiler* comp, GenTree* tree)
         chars += printf("flags=");
 
         // Node flags
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(DEBUG)
         if (tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE)
@@ -9960,14 +10135,6 @@ JITDBGAPI void __cdecl cTreeFlags(Compiler* comp, GenTree* tree)
                 }
                 break;
 
-            case GT_QMARK:
-
-                if (tree->gtFlags & GTF_QMARK_CAST_INSTOF)
-                {
-                    chars += printf("[QMARK_CAST_INSTOF]");
-                }
-                break;
-
             case GT_BOX:
 
                 if (tree->gtFlags & GTF_BOX_VALUE)
@@ -10000,7 +10167,6 @@ JITDBGAPI void __cdecl cTreeFlags(Compiler* comp, GenTree* tree)
 
             case GT_BLK:
             case GT_STORE_BLK:
-            case GT_STORE_DYN_BLK:
 
                 if (tree->gtFlags & GTF_IND_VOLATILE)
                 {
@@ -10365,6 +10531,11 @@ JITDBGAPI void __cdecl dLoop(FlowGraphNaturalLoop* loop)
     cLoop(JitTls::GetCompiler(), loop);
 }
 
+JITDBGAPI void __cdecl dScev(Scev* scev)
+{
+    cScev(JitTls::GetCompiler(), scev);
+}
+
 JITDBGAPI void __cdecl dTreeFlags(GenTree* tree)
 {
     cTreeFlags(JitTls::GetCompiler(), tree);
@@ -10538,6 +10709,7 @@ HelperCallProperties Compiler::s_helperCallProperties;
 // Return Value:
 //    true       - tree kills GC refs on callee save registers
 //    false      - tree doesn't affect GC refs on callee save registers
+//
 bool Compiler::killGCRefs(GenTree* tree)
 {
     if (tree->IsCall())
@@ -10707,6 +10879,31 @@ const char* Compiler::devirtualizationDetailToString(CORINFO_DEVIRTUALIZATION_DE
             return "undefined";
     }
 }
+
+//------------------------------------------------------------------------------
+// printfAlloc: printf a string and allocate the result in CMK_DebugOnly
+// memory.
+//
+// Arguments:
+//    format - Format string
+//
+// Returns:
+//    Allocated string.
+//
+const char* Compiler::printfAlloc(const char* format, ...)
+{
+    char    str[512];
+    va_list args;
+    va_start(args, format);
+    int result = vsprintf_s(str, ArrLen(str), format, args);
+    va_end(args);
+    assert((result >= 0) && ((unsigned)result < ArrLen(str)));
+
+    char* resultStr = new (this, CMK_DebugOnly) char[result + 1];
+    memcpy(resultStr, str, (unsigned)result + 1);
+    return resultStr;
+}
+
 #endif // defined(DEBUG)
 
 #if TRACK_ENREG_STATS
@@ -10809,6 +11006,10 @@ void Compiler::EnregisterStats::RecordLocal(const LclVarDsc* varDsc)
                 m_simdUserForcesDep++;
                 break;
 
+            case DoNotEnregisterReason::NonStandardParameter:
+                m_nonStandardParameter++;
+                break;
+
             default:
                 unreached();
                 break;
@@ -10936,6 +11137,7 @@ void Compiler::EnregisterStats::Dump(FILE* fout) const
     PRINT_STATS(m_returnSpCheck, notEnreg);
     PRINT_STATS(m_callSpCheck, notEnreg);
     PRINT_STATS(m_simdUserForcesDep, notEnreg);
+    PRINT_STATS(m_nonStandardParameter, notEnreg);
 
     fprintf(fout, "\nAddr exposed details:\n");
     if (m_addrExposed == 0)
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index d70d1c6e3647..436ced83ae05 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -40,8 +40,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #include "arraystack.h"
 #include "hashbv.h"
 #include "jitexpandarray.h"
-#include "tinyarray.h"
 #include "valuenum.h"
+#include "scev.h"
 #include "namedintrinsiclist.h"
 #ifdef LATE_DISASM
 #include "disasm.h"
@@ -49,6 +49,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 #include "codegeninterface.h"
 #include "regset.h"
+#include "abi.h"
 #include "jitgcinfo.h"
 
 #if DUMP_GC_TABLES && defined(JIT32_GCENCODER)
@@ -65,6 +66,8 @@ inline var_types genActualType(T value);
 #include "simd.h"
 #include "simdashwintrinsic.h"
 
+#include "jitmetadata.h"
+
 /*****************************************************************************
  *                  Forward declarations
  */
@@ -235,11 +238,13 @@ class LclSsaVarDsc
     {
     }
 
-    LclSsaVarDsc(BasicBlock* block) : m_block(block)
+    LclSsaVarDsc(BasicBlock* block)
+        : m_block(block)
     {
     }
 
-    LclSsaVarDsc(BasicBlock* block, GenTreeLclVarCommon* defNode) : m_block(block)
+    LclSsaVarDsc(BasicBlock* block, GenTreeLclVarCommon* defNode)
+        : m_block(block)
     {
         SetDefNode(defNode);
     }
@@ -349,7 +354,7 @@ class SsaDefArray
     void GrowArray(CompAllocator alloc)
     {
         unsigned oldSize = m_arraySize;
-        unsigned newSize = max(2, oldSize * 2);
+        unsigned newSize = max(2u, oldSize * 2);
 
         T* newArray = alloc.allocate<T>(newSize);
 
@@ -364,7 +369,10 @@ class SsaDefArray
 
 public:
     // Construct an empty SsaDefArray.
-    SsaDefArray() : m_array(nullptr), m_arraySize(0), m_count(0)
+    SsaDefArray()
+        : m_array(nullptr)
+        , m_arraySize(0)
+        , m_count(0)
     {
     }
 
@@ -457,13 +465,14 @@ enum class DoNotEnregisterReason
 #endif
     LclAddrNode, // the local is accessed with LCL_ADDR_VAR/FLD.
     CastTakesAddr,
-    StoreBlkSrc,          // the local is used as STORE_BLK source.
-    SwizzleArg,           // the local is passed using LCL_FLD as another type.
-    BlockOpRet,           // the struct is returned and it promoted or there is a cast.
-    ReturnSpCheck,        // the local is used to do SP check on return from function
-    CallSpCheck,          // the local is used to do SP check on every call
-    SimdUserForcesDep,    // a promoted struct was used by a SIMD/HWI node; it must be dependently promoted
-    HiddenBufferStructArg // the argument is a hidden return buffer passed to a method.
+    StoreBlkSrc,           // the local is used as STORE_BLK source.
+    SwizzleArg,            // the local is passed using LCL_FLD as another type.
+    BlockOpRet,            // the struct is returned and it promoted or there is a cast.
+    ReturnSpCheck,         // the local is used to do SP check on return from function
+    CallSpCheck,           // the local is used to do SP check on every call
+    SimdUserForcesDep,     // a promoted struct was used by a SIMD/HWI node; it must be dependently promoted
+    HiddenBufferStructArg, // the argument is a hidden return buffer passed to a method.
+    NonStandardParameter,  // local is a parameter that is passed in a register unhandled by genFnPrologCalleeRegArgs
 };
 
 enum class AddressExposedReason
@@ -490,7 +499,6 @@ class LclVarDsc
     // The constructor. Most things can just be zero'ed.
     //
     // Initialize the ArgRegs to REG_STK.
-    // Morph will update if this local is passed in a register.
     LclVarDsc() :
 #if defined(TARGET_WASM)
         lvLlvmArgNum(BAD_LLVM_ARG_NUM),
@@ -509,11 +517,11 @@ class LclVarDsc
     // note this only packs because var_types is a typedef of unsigned char
     var_types lvType : 5; // TYP_INT/LONG/FLOAT/DOUBLE/REF
 
-    unsigned char lvIsParam : 1;           // is this a parameter?
-    unsigned char lvIsRegArg : 1;          // is this an argument that was passed by register?
+    unsigned char lvIsParam           : 1; // is this a parameter?
+    unsigned char lvIsRegArg          : 1; // is this an argument that was passed by register?
     unsigned char lvFramePointerBased : 1; // 0 = off of REG_SPBASE (e.g., ESP), 1 = off of REG_FPBASE (e.g., EBP)
 
-    unsigned char lvOnFrame : 1;  // (part of) the variable lives on the frame
+    unsigned char lvOnFrame  : 1; // (part of) the variable lives on the frame
     unsigned char lvRegister : 1; // assigned to live in a register? For RyuJIT backend, this is only set if the
                                   // variable is in the same register for the entire function.
     unsigned char lvTracked : 1;  // is this a tracked variable?
@@ -535,16 +543,16 @@ class LclVarDsc
                             // We cannot reason reliably about the value of the variable.
 public:
     unsigned char lvDoNotEnregister : 1; // Do not enregister this variable.
-    unsigned char lvFieldAccessed : 1;   // The var is a struct local, and a field of the variable is accessed.  Affects
+    unsigned char lvFieldAccessed   : 1; // The var is a struct local, and a field of the variable is accessed.  Affects
                                          // struct promotion.
     unsigned char lvLiveInOutOfHndlr : 1; // The variable is live in or out of an exception handler, and therefore must
                                           // be on the stack (at least at those boundaries.)
 
-    unsigned char lvInSsa : 1;       // The variable is in SSA form (set by SsaBuilder)
-    unsigned char lvIsCSE : 1;       // Indicates if this LclVar is a CSE variable.
+    unsigned char lvInSsa       : 1; // The variable is in SSA form (set by SsaBuilder)
+    unsigned char lvIsCSE       : 1; // Indicates if this LclVar is a CSE variable.
     unsigned char lvHasLdAddrOp : 1; // has ldloca or ldarga opcode on this local.
 
-    unsigned char lvHasILStoreOp : 1;         // there is at least one STLOC or STARG on this local
+    unsigned char lvHasILStoreOp         : 1; // there is at least one STLOC or STARG on this local
     unsigned char lvHasMultipleILStoreOp : 1; // there is more than one STLOC on this local
 
     unsigned char lvIsTemp : 1; // Short-lifetime compiler temp
@@ -559,13 +567,13 @@ class LclVarDsc
 #if defined(TARGET_LOONGARCH64)
     unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64.
     unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for LA-ABI64.
-    unsigned char lvIsSplit : 1;   // Set if the argument is splited.
+    unsigned char lvIsSplit   : 1; // Set if the argument is splited.
 #endif                             // defined(TARGET_LOONGARCH64)
 
 #if defined(TARGET_RISCV64)
     unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for RISCV64.
     unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for RISCV64.
-    unsigned char lvIsSplit : 1;   // Set if the argument is splited.
+    unsigned char lvIsSplit   : 1; // Set if the argument is splited.
 #endif                             // defined(TARGET_RISCV64)
 
     unsigned char lvSingleDef : 1; // variable has a single def. Used to identify ref type locals that can get type
@@ -594,7 +602,7 @@ class LclVarDsc
     unsigned char lvQuirkToLong : 1; // Quirk to allocate this LclVar as a 64-bit long
 #endif
 #ifdef DEBUG
-    unsigned char lvKeepType : 1;       // Don't change the type of this variable
+    unsigned char lvKeepType       : 1; // Don't change the type of this variable
     unsigned char lvNoLclFldStress : 1; // Can't apply local field stress on this one
 #endif
     unsigned char lvIsPtr : 1; // Might this be used in an address computation? (used by buffer overflow security
@@ -649,8 +657,8 @@ class LclVarDsc
 
 #ifdef DEBUG
     unsigned char lvClassInfoUpdated : 1; // true if this var has updated class handle or exactness
-    unsigned char lvIsHoist : 1;          // CSE temp for a hoisted tree
-    unsigned char lvIsMultiDefCSE : 1;    // CSE temp for a multi-def CSE
+    unsigned char lvIsHoist          : 1; // CSE temp for a hoisted tree
+    unsigned char lvIsMultiDefCSE    : 1; // CSE temp for a multi-def CSE
 #endif
 
     unsigned char lvImplicitlyReferenced : 1; // true if there are non-IR references to this local (prolog, epilog, gc,
@@ -675,7 +683,8 @@ class LclVarDsc
     unsigned char lvIsSpan : 1; // The local is a Span<T>
 
 public:
-    union {
+    union
+    {
         unsigned lvFieldLclStart; // The index of the local var representing the first field in the promoted struct
                                   // local.  For implicit byref parameters, this gets hijacked between
                                   // fgRetypeImplicitByRefArgs and fgMarkDemotedImplicitByRefArgs to point to the
@@ -901,7 +910,7 @@ class LclVarDsc
         assert(_lvRegNum == reg);
     }
 
-/////////////////////
+    /////////////////////
 
 #if defined(TARGET_64BIT)
 
@@ -1087,13 +1096,13 @@ class LclVarDsc
 
 public:
     unsigned short lvRefCnt(RefCountState state = RCS_NORMAL) const;
-    void incLvRefCnt(unsigned short delta, RefCountState state = RCS_NORMAL);
-    void setLvRefCnt(unsigned short newValue, RefCountState state = RCS_NORMAL);
-    void incLvRefCntSaturating(unsigned short delta, RefCountState state = RCS_NORMAL);
+    void           incLvRefCnt(unsigned short delta, RefCountState state = RCS_NORMAL);
+    void           setLvRefCnt(unsigned short newValue, RefCountState state = RCS_NORMAL);
+    void           incLvRefCntSaturating(unsigned short delta, RefCountState state = RCS_NORMAL);
 
     weight_t lvRefCntWtd(RefCountState state = RCS_NORMAL) const;
-    void incLvRefCntWtd(weight_t delta, RefCountState state = RCS_NORMAL);
-    void setLvRefCntWtd(weight_t newValue, RefCountState state = RCS_NORMAL);
+    void     incLvRefCntWtd(weight_t delta, RefCountState state = RCS_NORMAL);
+    void     setLvRefCntWtd(weight_t newValue, RefCountState state = RCS_NORMAL);
 
 private:
     int lvStkOffs; // stack offset of home in bytes.
@@ -1127,11 +1136,6 @@ class LclVarDsc
     {
         return (var_types)lvType;
     }
-    bool lvStackAligned() const
-    {
-        assert(lvIsStructField);
-        return ((lvFldOffset % TARGET_POINTER_SIZE) == 0);
-    }
 
     // NormalizeOnLoad Rules:
     //     1. All small locals are actually TYP_INT locals.
@@ -1352,7 +1356,8 @@ class IntegralRange
     IntegralRange() = default;
 
     IntegralRange(SymbolicIntegerValue lowerBound, SymbolicIntegerValue upperBound)
-        : m_lowerBound(lowerBound), m_upperBound(upperBound)
+        : m_lowerBound(lowerBound)
+        , m_upperBound(upperBound)
     {
         assert(lowerBound <= upperBound);
     }
@@ -1384,7 +1389,7 @@ class IntegralRange
         return (m_lowerBound == other.m_lowerBound) && (m_upperBound == other.m_upperBound);
     }
 
-    static int64_t SymbolicToRealValue(SymbolicIntegerValue value);
+    static int64_t              SymbolicToRealValue(SymbolicIntegerValue value);
     static SymbolicIntegerValue LowerBoundForType(var_types type);
     static SymbolicIntegerValue UpperBoundForType(var_types type);
 
@@ -1440,7 +1445,10 @@ class TempDsc
     var_types tdType;
 
 public:
-    TempDsc(int _tdNum, unsigned _tdSize, var_types _tdType) : tdNum(_tdNum), tdSize((BYTE)_tdSize), tdType(_tdType)
+    TempDsc(int _tdNum, unsigned _tdSize, var_types _tdType)
+        : tdNum(_tdNum)
+        , tdSize((BYTE)_tdSize)
+        , tdType(_tdType)
     {
 #ifdef DEBUG
         // temps must have a negative number (so they have a different number from all local variables)
@@ -1504,9 +1512,9 @@ enum class PhaseStatus : unsigned
 class LinearScanInterface
 {
 public:
-    virtual PhaseStatus doLinearScan()                         = 0;
-    virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb) = 0;
-    virtual bool willEnregisterLocalVars() const               = 0;
+    virtual PhaseStatus doLinearScan()                                = 0;
+    virtual void        recordVarLocationsAtStartOfBB(BasicBlock* bb) = 0;
+    virtual bool        willEnregisterLocalVars() const               = 0;
 #if TRACK_LSRA_STATS
     virtual void dumpLsraStatsCsv(FILE* file)     = 0;
     virtual void dumpLsraStatsSummary(FILE* file) = 0;
@@ -1597,9 +1605,10 @@ enum class ProfileChecks : unsigned int
     CHECK_NONE          = 0,
     CHECK_CLASSIC       = 1 << 0, // check "classic" jit weights
     CHECK_HASLIKELIHOOD = 1 << 1, // check all FlowEdges for hasLikelihood
-    CHECK_LIKELY        = 1 << 2, // fully check likelihood based weights
-    RAISE_ASSERT        = 1 << 3, // assert on check failure
-    CHECK_ALL_BLOCKS    = 1 << 4, // check blocks even if bbHasProfileWeight is false
+    CHECK_LIKELIHOODSUM = 1 << 2, // check block successor likelihoods sum to 1                              
+    CHECK_LIKELY        = 1 << 3, // fully check likelihood based weights
+    RAISE_ASSERT        = 1 << 4, // assert on check failure
+    CHECK_ALL_BLOCKS    = 1 << 5, // check blocks even if bbHasProfileWeight is false
 };
 
 inline constexpr ProfileChecks operator ~(ProfileChecks a)
@@ -1963,6 +1972,10 @@ class FlowGraphDfsTree
         return m_hasCycle;
     }
 
+#ifdef DEBUG
+    void Dump() const;
+#endif // DEBUG
+
     bool Contains(BasicBlock* block) const;
     bool IsAncestor(BasicBlock* ancestor, BasicBlock* descendant) const;
 };
@@ -2098,6 +2111,9 @@ class FlowGraphNaturalLoop
     // Can be used to store additional annotations for this loop on the side.
     unsigned m_index = 0;
 
+    // True if this loop contains an improper loop header
+    bool m_containsImproperHeader = false;
+
     FlowGraphNaturalLoop(const FlowGraphDfsTree* dfsTree, BasicBlock* head);
 
     unsigned LoopBlockBitVecIndex(BasicBlock* block);
@@ -2186,6 +2202,11 @@ class FlowGraphNaturalLoop
     bool ContainsBlock(BasicBlock* block);
     bool ContainsLoop(FlowGraphNaturalLoop* childLoop);
 
+    bool ContainsImproperHeader() const
+    {
+        return m_containsImproperHeader;
+    }
+
     unsigned NumLoopBlocks();
 
     template<typename TFunc>
@@ -2200,6 +2221,9 @@ class FlowGraphNaturalLoop
     template<typename TFunc>
     BasicBlockVisit VisitLoopBlocksLexical(TFunc func);
 
+    template<typename TFunc>
+    BasicBlockVisit VisitRegularExitBlocks(TFunc func);
+
     BasicBlock* GetLexicallyTopMostBlock();
     BasicBlock* GetLexicallyBottomMostBlock();
 
@@ -2229,6 +2253,8 @@ class FlowGraphNaturalLoops
     // Collection of loops that were found.
     jitstd::vector<FlowGraphNaturalLoop*> m_loops;
 
+    unsigned m_improperLoopHeaders;
+
     FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs);
 
     static bool FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, ArrayStack<BasicBlock*>& worklist);
@@ -2239,7 +2265,7 @@ class FlowGraphNaturalLoops
         return m_dfsTree;
     }
 
-    size_t NumLoops()
+    size_t NumLoops() const
     {
         return m_loops.size();
     }
@@ -2306,6 +2332,13 @@ class FlowGraphNaturalLoops
 
     static FlowGraphNaturalLoops* Find(const FlowGraphDfsTree* dfs);
 
+    // Number of blocks with DFS backedges that are not natural loop headers
+    // (indicates presence of "irreducible" loops)
+    unsigned ImproperLoopHeaders() const
+    {
+        return m_improperLoopHeaders;
+    }
+
 #ifdef DEBUG
     static void Dump(FlowGraphNaturalLoops* loops);
 #endif // DEBUG
@@ -2331,7 +2364,13 @@ class FlowGraphDominatorTree
     }
 
     static BasicBlock* IntersectDom(BasicBlock* block1, BasicBlock* block2);
+
 public:
+    const FlowGraphDfsTree* GetDfsTree()
+    {
+        return m_dfsTree;
+    }
+
     BasicBlock* Intersect(BasicBlock* block, BasicBlock* block2);
     bool Dominates(BasicBlock* dominator, BasicBlock* dominated);
 
@@ -2359,6 +2398,10 @@ class BlockToNaturalLoopMap
     FlowGraphNaturalLoop* GetLoop(BasicBlock* block);
 
     static BlockToNaturalLoopMap* Build(FlowGraphNaturalLoops* loops);
+
+#ifdef DEBUG
+    void Dump() const;
+#endif // DEBUG
 };
 
 // Represents a data structure that can answer A -> B reachability queries in
@@ -2366,23 +2409,28 @@ class BlockToNaturalLoopMap
 // exceptional flow, then CanReach returns false.
 class BlockReachabilitySets
 {
-    FlowGraphDfsTree* m_dfsTree;
+    const FlowGraphDfsTree* m_dfsTree;
     BitVec* m_reachabilitySets;
 
-    BlockReachabilitySets(FlowGraphDfsTree* dfsTree, BitVec* reachabilitySets)
+    BlockReachabilitySets(const FlowGraphDfsTree* dfsTree, BitVec* reachabilitySets)
         : m_dfsTree(dfsTree)
         , m_reachabilitySets(reachabilitySets)
     {
     }
 
 public:
+    const FlowGraphDfsTree* GetDfsTree()
+    {
+        return m_dfsTree;
+    }
+
     bool CanReach(BasicBlock* from, BasicBlock* to);
 
 #ifdef DEBUG
     void Dump();
 #endif
 
-    static BlockReachabilitySets* Build(FlowGraphDfsTree* dfsTree);
+    static BlockReachabilitySets* Build(const FlowGraphDfsTree* dfsTree);
 };
 
 enum class FieldKindForVN
@@ -2504,6 +2552,7 @@ class Compiler
     friend class CSE_HeuristicRandom;
     friend class CSE_HeuristicReplay;
     friend class CSE_HeuristicRL;
+    friend class CSE_HeuristicParameterized;
     friend class CSE_Heuristic;
     friend class CodeGenInterface;
     friend class CodeGen;
@@ -2692,7 +2741,7 @@ class Compiler
 // Exception handling functions
 //
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
 
     bool ehNeedsShadowSPslots()
     {
@@ -2705,7 +2754,7 @@ class Compiler
     // etc.
     unsigned ehMaxHndNestingCount;
 
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
     static bool jitIsBetween(unsigned value, unsigned start, unsigned end);
     static bool jitIsBetweenInclusive(unsigned value, unsigned start, unsigned end);
@@ -2802,7 +2851,6 @@ class Compiler
     bool ehCallFinallyInCorrectRegion(BasicBlock* blockCallFinally, unsigned finallyIndex);
 #endif // DEBUG
 
-#if defined(FEATURE_EH_FUNCLETS)
     // Do we need a PSPSym in the main function? For codegen purposes, we only need one
     // if there is a filter that protects a region with a nested EH clause (such as a
     // try/catch nested in the 'try' body of a try/filter/filter-handler). See
@@ -2823,23 +2871,6 @@ class Compiler
 
     unsigned bbThrowIndex(BasicBlock* blk); // Get the index to use as the cache key for sharing throw blocks
 
-#else  // !FEATURE_EH_FUNCLETS
-
-    bool ehAnyFunclets()
-    {
-        return false;
-    }
-    unsigned ehFuncletCount()
-    {
-        return 0;
-    }
-
-    unsigned bbThrowIndex(BasicBlock* blk)
-    {
-        return blk->bbTryIndex;
-    } // Get the index to use as the cache key for sharing throw blocks
-#endif // !FEATURE_EH_FUNCLETS
-
     FlowEdge* BlockPredsWithEH(BasicBlock* blk);
     FlowEdge* BlockDominancePreds(BasicBlock* blk);
 
@@ -2890,12 +2921,8 @@ class Compiler
 
     void fgRemoveEHTableEntry(unsigned XTnum);
 
-#if defined(FEATURE_EH_FUNCLETS)
-
     EHblkDsc* fgAddEHTableEntry(unsigned XTnum);
 
-#endif // FEATURE_EH_FUNCLETS
-
     void fgSortEHTable();
 
     // Causes the EH table to obey some well-formedness conditions, by inserting
@@ -3014,14 +3041,15 @@ class Compiler
 
     GenTree* gtNewConWithPattern(var_types type, uint8_t pattern);
 
-    GenTreeLclVar* gtNewStoreLclVarNode(unsigned lclNum, GenTree* data);
+    GenTreeLclVar* gtNewStoreLclVarNode(unsigned lclNum, GenTree* value);
 
     GenTreeLclFld* gtNewStoreLclFldNode(
-        unsigned lclNum, var_types type, ClassLayout* layout, unsigned offset, GenTree* data);
+        unsigned lclNum, var_types type, ClassLayout* layout, unsigned offset, GenTree* value);
 
-    GenTreeLclFld* gtNewStoreLclFldNode(unsigned lclNum, var_types type, unsigned offset, GenTree* data)
+    GenTreeLclFld* gtNewStoreLclFldNode(unsigned lclNum, var_types type, unsigned offset, GenTree* value)
     {
-        return gtNewStoreLclFldNode(lclNum, type, (type == TYP_STRUCT) ? data->GetLayout(this) : nullptr, offset, data);
+        return gtNewStoreLclFldNode(
+            lclNum, type, (type == TYP_STRUCT) ? value->GetLayout(this) : nullptr, offset, value);
     }
 
     GenTree* gtNewPutArgReg(var_types type, GenTree* arg, regNumber argReg);
@@ -3176,6 +3204,14 @@ class Compiler
                                  CorInfoType simdBaseJitType,
                                  unsigned    simdSize);
 
+#if defined(TARGET_XARCH)
+    GenTree* gtNewSimdCvtNode(var_types              type,
+                              GenTree*               op1,
+                              CorInfoType            simdTargetBaseJitType,
+                              CorInfoType            simdSourceBaseJitType,
+                              unsigned               simdSize);
+#endif //TARGET_XARCH
+
     GenTree* gtNewSimdCreateBroadcastNode(
         var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);
 
@@ -3329,6 +3365,8 @@ class Compiler
 #endif
 #endif // FEATURE_HW_INTRINSICS
 
+    GenTree* gtNewMemoryBarrier(bool loadOnly = false);
+
     GenTree* gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd);
 
     GenTreeLclFld* gtNewLclFldNode(unsigned lnum, var_types type, unsigned offset);
@@ -3366,7 +3404,7 @@ class Compiler
 
     GenTreeMDArr* gtNewMDArrLowerBound(GenTree* arrayOp, unsigned dim, unsigned rank, BasicBlock* block);
 
-    void gtInitializeStoreNode(GenTree* store, GenTree* data);
+    void gtInitializeStoreNode(GenTree* store, GenTree* value);
 
     void gtInitializeIndirNode(GenTreeIndir* indir, GenTreeFlags indirFlags);
 
@@ -3375,13 +3413,10 @@ class Compiler
     GenTreeIndir* gtNewIndir(var_types typ, GenTree* addr, GenTreeFlags indirFlags = GTF_EMPTY);
 
     GenTreeBlk* gtNewStoreBlkNode(
-        ClassLayout* layout, GenTree* addr, GenTree* data, GenTreeFlags indirFlags = GTF_EMPTY);
-
-    GenTreeStoreDynBlk* gtNewStoreDynBlkNode(
-        GenTree* addr, GenTree* data, GenTree* dynamicSize, GenTreeFlags indirFlags = GTF_EMPTY);
+        ClassLayout* layout, GenTree* addr, GenTree* value, GenTreeFlags indirFlags = GTF_EMPTY);
 
     GenTreeStoreInd* gtNewStoreIndNode(
-        var_types type, GenTree* addr, GenTree* data, GenTreeFlags indirFlags = GTF_EMPTY);
+        var_types type, GenTree* addr, GenTree* value, GenTreeFlags indirFlags = GTF_EMPTY);
 
     GenTree* gtNewLoadValueNode(
         var_types type, ClassLayout* layout, GenTree* addr, GenTreeFlags indirFlags = GTF_EMPTY);
@@ -3397,16 +3432,17 @@ class Compiler
     }
 
     GenTree* gtNewStoreValueNode(
-        var_types type, ClassLayout* layout, GenTree* addr, GenTree* data, GenTreeFlags indirFlags = GTF_EMPTY);
+        var_types type, ClassLayout* layout, GenTree* addr, GenTree* value, GenTreeFlags indirFlags = GTF_EMPTY);
 
-    GenTree* gtNewStoreValueNode(ClassLayout* layout, GenTree* addr, GenTree* data, GenTreeFlags indirFlags = GTF_EMPTY)
+    GenTree* gtNewStoreValueNode(
+        ClassLayout* layout, GenTree* addr, GenTree* value, GenTreeFlags indirFlags = GTF_EMPTY)
     {
-        return gtNewStoreValueNode(layout->GetType(), layout, addr, data, indirFlags);
+        return gtNewStoreValueNode(layout->GetType(), layout, addr, value, indirFlags);
     }
 
-    GenTree* gtNewStoreValueNode(var_types type, GenTree* addr, GenTree* data, GenTreeFlags indirFlags = GTF_EMPTY)
+    GenTree* gtNewStoreValueNode(var_types type, GenTree* addr, GenTree* value, GenTreeFlags indirFlags = GTF_EMPTY)
     {
-        return gtNewStoreValueNode(type, nullptr, addr, data, indirFlags);
+        return gtNewStoreValueNode(type, nullptr, addr, value, indirFlags);
     }
 
     GenTree* gtNewNullCheck(GenTree* addr, BasicBlock* basicBlock);
@@ -3429,7 +3465,7 @@ class Compiler
                               CORINFO_ACCESS_FLAGS    access,
                               CORINFO_FIELD_INFO*     pFieldInfo,
                               var_types               lclTyp,
-                              GenTree*                assg);
+                              GenTree*                value);
 
     GenTree* gtNewNothingNode();
 
@@ -3444,12 +3480,17 @@ class Compiler
     GenTreeAllocObj* gtNewAllocObjNode(
         unsigned int helper, bool helperHasSideEffects, CORINFO_CLASS_HANDLE clsHnd, var_types type, GenTree* op1);
 
-    GenTreeAllocObj* gtNewAllocObjNode(CORINFO_RESOLVED_TOKEN* pResolvedToken, bool useParent);
+    GenTreeAllocObj* gtNewAllocObjNode(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, bool useParent);
 
     GenTree* gtNewRuntimeLookup(CORINFO_GENERIC_HANDLE hnd, CorInfoGenericHandleType hndTyp, GenTree* lookupTree);
 
     GenTreeIndir* gtNewMethodTableLookup(GenTree* obj);
 
+#if defined(TARGET_ARM64)
+    GenTree* gtNewSimdConvertVectorToMaskNode(var_types type, GenTree* node, CorInfoType simdBaseJitType, unsigned simdSize);
+    GenTree* gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type);
+#endif
+
     //------------------------------------------------------------------------
     // Other GenTree functions
 
@@ -3536,15 +3577,14 @@ class Compiler
                               GenTreeFlags GenTreeFlags = GTF_SIDE_EFFECT,
                               bool         ignoreRoot   = false);
 
+    GenTree* gtWrapWithSideEffects(GenTree*     tree,
+                                   GenTree*     sideEffectsSource,
+                                   GenTreeFlags sideEffectsFlags = GTF_SIDE_EFFECT,
+                                   bool         ignoreRoot       = false);
+
     bool gtSplitTree(
         BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse);
 
-    // Static fields of struct types (and sometimes the types that those are reduced to) are represented by having the
-    // static field contain an object pointer to the boxed struct.  This simplifies the GC implementation...but
-    // complicates the JIT somewhat.  This predicate returns "true" iff a node with type "fieldNodeType", representing
-    // the given "fldHnd", is such an object pointer.
-    bool gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_FIELD_HANDLE fldHnd);
-
     bool gtStoreDefinesField(
         LclVarDsc* fieldVarDsc, ssize_t offset, unsigned size, ssize_t* pFieldStoreOffset, unsigned* pFieldStoreSize);
 
@@ -3759,6 +3799,8 @@ class Compiler
     LclVarDsc* lvaTable;    // variable descriptor table
     unsigned   lvaTableCnt; // lvaTable size (>= lvaCount)
 
+    ABIPassingInformation* lvaParameterPassingInfo;
+
     unsigned lvaTrackedCount;             // actual # of locals being tracked
     unsigned lvaTrackedCountInSizeTUnits; // min # of size_t's sufficient to hold a bit for all the locals being tracked
 
@@ -3841,6 +3883,12 @@ class Compiler
     // where it is used to detect tail-call chains.
     unsigned lvaRetAddrVar;
 
+#ifdef SWIFT_SUPPORT
+    unsigned lvaSwiftSelfArg;
+    unsigned lvaSwiftErrorArg;
+    unsigned lvaSwiftErrorLocal;
+#endif
+
 #if defined(DEBUG) && defined(TARGET_XARCH)
 
     unsigned lvaReturnSpCheck; // Stores SP to confirm it is not corrupted on return.
@@ -3862,10 +3910,10 @@ class Compiler
 //-------------------------------------------------------------------------
 // All these frame offsets are inter-related and must be kept in sync
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
     // This is used for the callable handlers
     unsigned lvaShadowSPslotsVar; // Block-layout TYP_STRUCT variable for all the shadow SP slots
-#endif                            // FEATURE_EH_FUNCLETS
+#endif                            // FEATURE_EH_WINDOWS_X86
 
     int lvaCachedGenericContextArgOffs;
     int lvaCachedGenericContextArgOffset(); // For CORINFO_CALLCONV_PARAMTYPE and if generic context is passed as
@@ -3905,6 +3953,7 @@ class Compiler
     int lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs);
 #endif // !UNIX_AMD64_ABI
     void lvaAssignVirtualFrameOffsetsToLocals();
+    bool lvaParamHasLocalStackSpace(unsigned lclNum);
     int lvaAllocLocalAndSetVirtualOffset(unsigned lclNum, unsigned size, int stkOffs);
 #ifdef TARGET_AMD64
     // Returns true if compCalleeRegsPushed (including RBP if used as frame pointer) is even.
@@ -3963,6 +4012,14 @@ class Compiler
                        CORINFO_ARG_LIST_HANDLE varList,
                        CORINFO_SIG_INFO*       varSig);
 
+    template <typename Classifier>
+    void lvaClassifyParameterABI(Classifier& classifier);
+
+    void lvaClassifyParameterABI();
+
+    bool lvaInitSpecialSwiftParam(CORINFO_ARG_LIST_HANDLE argHnd, InitVarDscInfo* varDscInfo, CorInfoType type, CORINFO_CLASS_HANDLE typeHnd);
+    bool lvaHasAnySwiftStackParamToReassemble();
+
     var_types lvaGetActualType(unsigned lclNum);
     var_types lvaGetRealType(unsigned lclNum);
 
@@ -4203,9 +4260,7 @@ class Compiler
 
     unsigned lvaStubArgumentVar; // variable representing the secret stub argument coming in EAX
 
-#if defined(FEATURE_EH_FUNCLETS)
     unsigned lvaPSPSym; // variable representing the PSPSym
-#endif
 
     InlineInfo*     impInlineInfo; // Only present for inlinees
     InlineStrategy* m_inlineStrategy;
@@ -4375,7 +4430,13 @@ class Compiler
     void impCheckForPInvokeCall(
         GenTreeCall* call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block);
     GenTreeCall* impImportIndirectCall(CORINFO_SIG_INFO* sig, const DebugInfo& di = DebugInfo());
-    void impPopArgsForUnmanagedCall(GenTreeCall* call, CORINFO_SIG_INFO* sig);
+    void impPopArgsForUnmanagedCall(GenTreeCall* call, CORINFO_SIG_INFO* sig, GenTree** swiftErrorNode);
+    void impPopArgsForSwiftCall(GenTreeCall* call, CORINFO_SIG_INFO* sig, GenTree** swiftErrorNode);
+    void impRetypeUnmanagedCallArgs(GenTreeCall* call);
+
+#ifdef SWIFT_SUPPORT
+    void impAppendSwiftErrorStore(GenTree* const swiftErrorNode);
+#endif // SWIFT_SUPPORT
 
     void impInsertHelperCall(CORINFO_HELPER_DESC* helperCall);
     void impHandleAccessAllowed(CorInfoIsAccessAllowedResult result, CORINFO_HELPER_DESC* helperCall);
@@ -4421,6 +4482,9 @@ class Compiler
     GenTree* impImplicitR4orR8Cast(GenTree* tree, var_types dstTyp);
 
     void impImportLeave(BasicBlock* block);
+#if defined(FEATURE_EH_WINDOWS_X86)
+    void impImportLeaveEHRegions(BasicBlock* block);
+#endif
     void impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr);
     GenTree* impTypeIsAssignable(GenTree* typeTo, GenTree* typeFrom);
 
@@ -4430,17 +4494,23 @@ class Compiler
         Ordinal           = 4,
         OrdinalIgnoreCase = 5
     };
-    enum StringComparisonJoint
+    enum class StringComparisonJoint
     {
         Eq,  // (d1 == cns1) && (s2 == cns2)
         Xor, // (d1 ^ cns1) | (s2 ^ cns2)
     };
-    GenTree* impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO* sig, unsigned methodFlags);
-    GenTree* impSpanEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO* sig, unsigned methodFlags);
+    enum class StringComparisonKind
+    {
+        Equals,
+        StartsWith,
+        EndsWith
+    };
+    GenTree* impUtf16StringComparison(StringComparisonKind kind, CORINFO_SIG_INFO* sig, unsigned methodFlags);
+    GenTree* impUtf16SpanComparison(StringComparisonKind kind, CORINFO_SIG_INFO* sig, unsigned methodFlags);
     GenTree* impExpandHalfConstEquals(GenTreeLclVarCommon*   data,
                                       GenTree*         lengthFld,
                                       bool             checkForNull,
-                                      bool             startsWith,
+                                      StringComparisonKind kind,
                                       WCHAR*           cnsData,
                                       int              len,
                                       int              dataOffset,
@@ -4450,7 +4520,7 @@ class Compiler
                                  ssize_t               offset,
                                  ssize_t               value,
                                  StringComparison      ignoreCase,
-                                 StringComparisonJoint joint = Eq);
+                                 StringComparisonJoint joint = StringComparisonJoint::Eq);
     GenTree* impExpandHalfConstEqualsSWAR(
         GenTreeLclVarCommon* data, WCHAR* cns, int len, int dataOffset, StringComparison cmpMode);
     GenTree* impExpandHalfConstEqualsSIMD(
@@ -4577,12 +4647,12 @@ class Compiler
     void impAppendStmt(Statement* stmt);
     void impInsertStmtBefore(Statement* stmt, Statement* stmtBefore);
     Statement* impAppendTree(GenTree* tree, unsigned chkLevel, const DebugInfo& di, bool checkConsumedDebugInfo = true);
-    void impStoreTemp(unsigned         lclNum,
-                      GenTree*         val,
-                      unsigned         curLevel,
-                      Statement**      pAfterStmt = nullptr,
-                      const DebugInfo& di         = DebugInfo(),
-                      BasicBlock*      block      = nullptr);
+    void impStoreToTemp(unsigned         lclNum,
+                        GenTree*         val,
+                        unsigned         curLevel,
+                        Statement**      pAfterStmt = nullptr,
+                        const DebugInfo& di         = DebugInfo(),
+                        BasicBlock*      block      = nullptr);
     Statement* impExtractLastStmt();
     GenTree* impCloneExpr(GenTree*             tree,
                           GenTree**            clone,
@@ -4904,7 +4974,7 @@ class Compiler
 
     unsigned impInlineFetchLocal(unsigned lclNum DEBUGARG(const char* reason));
 
-    GenTree* impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, InlLclVarInfo* lclTypeInfo);
+    GenTree* impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& lclInfo);
 
     bool impInlineIsThis(GenTree* tree, InlArgInfo* inlArgInfo);
 
@@ -4963,19 +5033,16 @@ class Compiler
     BasicBlock* fgFirstColdBlock; // First block to be placed in the cold section
     BasicBlock* fgEntryBB;        // For OSR, the original method's entry point
     BasicBlock* fgOSREntryBB;     // For OSR, the logical entry point (~ patchpoint)
-#if defined(FEATURE_EH_FUNCLETS)
     BasicBlock* fgFirstFuncletBB; // First block of outlined funclets (to allow block insertion before the funclets)
-#endif
     BasicBlock* fgFirstBBScratch;   // Block inserted for initialization stuff. Is nullptr if no such block has been
                                     // created.
     BasicBlockList* fgReturnBlocks; // list of BBJ_RETURN blocks
     unsigned        fgEdgeCount;    // # of control flow edges between the BBs
     unsigned        fgBBcount;      // # of BBs in the method (in the linked list that starts with fgFirstBB)
 #ifdef DEBUG
-    unsigned                     fgBBcountAtCodegen; // # of BBs in the method at the start of codegen
     jitstd::vector<BasicBlock*>* fgBBOrder;          // ordered vector of BBs
 #endif
-    // Used as a quick check for whether loop alignment should look for natural loops.
+    // Used as a quick check for whether phases downstream of loop finding should look for natural loops.
     // If true: there may or may not be any natural loops in the flow graph, so try to find them
     // If false: there's definitely not any natural loops in the flow graph
     bool         fgMightHaveNaturalLoops;
@@ -4996,8 +5063,11 @@ class Compiler
     FlowGraphDominatorTree* m_domTree;
     BlockReachabilitySets* m_reachabilitySets;
 
-    bool optLoopsRequirePreHeaders; // Do we require that all loops (in m_loops) have pre-headers?
-    unsigned optNumNaturalLoopsFound; // Number of natural loops found in the loop finding phase
+    // Do we require loops to be in canonical form? The canonical form ensures that:
+    // 1. All loops have preheaders (single entry blocks that always enter the loop)
+    // 2. All loop exits where bbIsHandlerBeg(exit) is false have only loop predecessors.
+    //
+    bool optLoopsCanonical;
 
     bool fgBBVarSetsInited;
 
@@ -5078,34 +5148,31 @@ class Compiler
     void fgExtendEHRegionBefore(BasicBlock* block);
     void fgExtendEHRegionAfter(BasicBlock* block);
 
-    BasicBlock* fgNewBBbefore(BBKinds jumpKind, BasicBlock* block, bool extendRegion, BasicBlock* jumpDest = nullptr);
+    BasicBlock* fgNewBBbefore(BBKinds jumpKind, BasicBlock* block, bool extendRegion);
 
-    BasicBlock* fgNewBBafter(BBKinds jumpKind, BasicBlock* block, bool extendRegion, BasicBlock* jumpDest = nullptr);
+    BasicBlock* fgNewBBafter(BBKinds jumpKind, BasicBlock* block, bool extendRegion);
 
-    BasicBlock* fgNewBBFromTreeAfter(BBKinds jumpKind, BasicBlock* block, GenTree* tree, DebugInfo& debugInfo, BasicBlock* jumpDest = nullptr, bool updateSideEffects = false);
+    BasicBlock* fgNewBBFromTreeAfter(BBKinds jumpKind, BasicBlock* block, GenTree* tree, DebugInfo& debugInfo, bool updateSideEffects = false);
 
     BasicBlock* fgNewBBinRegion(BBKinds jumpKind,
                                 unsigned    tryIndex,
                                 unsigned    hndIndex,
                                 BasicBlock* nearBlk,
-                                BasicBlock* jumpDest    = nullptr,
                                 bool        putInFilter = false,
                                 bool        runRarely   = false,
                                 bool        insertAtEnd = false);
 
     BasicBlock* fgNewBBinRegion(BBKinds jumpKind,
                                 BasicBlock* srcBlk,
-                                BasicBlock* jumpDest    = nullptr,
                                 bool        runRarely   = false,
                                 bool        insertAtEnd = false);
 
-    BasicBlock* fgNewBBinRegion(BBKinds jumpKind, BasicBlock* jumpDest = nullptr);
+    BasicBlock* fgNewBBinRegion(BBKinds jumpKind);
 
     BasicBlock* fgNewBBinRegionWorker(BBKinds jumpKind,
                                       BasicBlock* afterBlk,
                                       unsigned    xcptnIndex,
-                                      bool        putInTryRegion,
-                                      BasicBlock* jumpDest = nullptr);
+                                      bool        putInTryRegion);
 
     void fgInsertBBbefore(BasicBlock* insertBeforeBlk, BasicBlock* newBlk);
     void fgInsertBBafter(BasicBlock* insertAfterBlk, BasicBlock* newBlk);
@@ -5118,7 +5185,6 @@ class Compiler
 
     bool fgModified;             // True if the flow graph has been modified recently
     bool fgPredsComputed;        // Have we computed the bbPreds list
-    bool fgReturnBlocksComputed; // Have we computed the return blocks list?
     bool fgOptimizedFinally;     // Did we optimize any try-finallys?
     bool fgCanonicalizedFirstBB; // TODO-Quirk: did we end up canonicalizing first BB?
 
@@ -5169,9 +5235,7 @@ class Compiler
                                             // This is derived from the profile data
                                             // or is BB_UNITY_WEIGHT when we don't have profile data
 
-#if defined(FEATURE_EH_FUNCLETS)
     bool fgFuncletsCreated; // true if the funclet creation phase has been run
-#endif                      // FEATURE_EH_FUNCLETS
 
     bool fgGlobalMorph; // indicates if we are during the global morphing phase
                         // since fgMorphTree can be called from several places
@@ -5218,14 +5282,6 @@ class Compiler
     void fgCleanupContinuation(BasicBlock* continuation);
 
     PhaseStatus fgTailMergeThrows();
-    void fgTailMergeThrowsFallThroughHelper(BasicBlock* predBlock,
-                                            BasicBlock* nonCanonicalBlock,
-                                            BasicBlock* canonicalBlock,
-                                            FlowEdge*   predEdge);
-    void fgTailMergeThrowsJumpToHelper(BasicBlock* predBlock,
-                                       BasicBlock* nonCanonicalBlock,
-                                       BasicBlock* canonicalBlock,
-                                       FlowEdge*   predEdge);
 
     bool fgRetargetBranchesToCanonicalCallFinally(BasicBlock*      block,
                                                   BasicBlock*      handler,
@@ -5233,16 +5289,12 @@ class Compiler
 
     GenTree* fgGetCritSectOfStaticMethod();
 
-#if defined(FEATURE_EH_FUNCLETS)
-
     void fgAddSyncMethodEnterExit();
 
     GenTree* fgCreateMonitorTree(unsigned lvaMonitorBool, unsigned lvaThisVar, BasicBlock* block, bool enter);
 
     void fgConvertSyncReturnToLeave(BasicBlock* block);
 
-#endif // FEATURE_EH_FUNCLETS
-
     void fgAddReversePInvokeEnterExit();
 
     bool fgMoreThanOneReturnBlock();
@@ -5356,7 +5408,6 @@ class Compiler
     Statement* fgNewStmtFromTree(GenTree* tree, const DebugInfo& di);
 
     GenTreeQmark* fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst = nullptr);
-    bool fgExpandQmarkForCastInstOf(BasicBlock* block, Statement* stmt);
     bool fgExpandQmarkStmt(BasicBlock* block, Statement* stmt);
     void fgExpandQmarkNodes();
 
@@ -5496,6 +5547,12 @@ class Compiler
         return m_signatureToLookupInfoMap;
     }
 
+#ifdef SWIFT_SUPPORT
+    typedef JitHashTable<CORINFO_CLASS_HANDLE, JitPtrKeyFuncs<struct CORINFO_CLASS_STRUCT_>, CORINFO_SWIFT_LOWERING*> SwiftLoweringMap;
+    SwiftLoweringMap* m_swiftLoweringCache;
+    const CORINFO_SWIFT_LOWERING* GetSwiftLowering(CORINFO_CLASS_HANDLE clsHnd);
+#endif
+
     void optRecordLoopMemoryDependence(GenTree* tree, BasicBlock* block, ValueNum memoryVN);
     void optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree);
 
@@ -5550,6 +5607,8 @@ class Compiler
     void fgValueNumberFieldStore(
         GenTree* storeNode, GenTree* baseAddr, FieldSeq* fieldSeq, ssize_t offset, unsigned storeSize, ValueNum value);
 
+    static bool fgGetStaticFieldSeqAndAddress(ValueNumStore* vnStore, GenTree* tree, ssize_t* byteOffset, FieldSeq** pFseq);
+
     bool fgValueNumberConstLoad(GenTreeIndir* tree);
 
     // Compute the value number for a byref-exposed load of the given type via the given pointerVN.
@@ -5600,8 +5659,6 @@ class Compiler
     void fgUpdateConstTreeValueNumber(GenTree* tree);
 
     // Assumes that all inputs to "tree" have had value numbers assigned; assigns a VN to tree.
-    // (With some exceptions: the VN of the lhs of an assignment is assigned as part of the
-    // assignment.)
     void fgValueNumberTree(GenTree* tree);
 
     void fgValueNumberStore(GenTree* tree);
@@ -5777,8 +5834,6 @@ class Compiler
     template <typename CanRemoveBlockBody>
     bool fgRemoveUnreachableBlocks(CanRemoveBlockBody canRemoveBlock);
 
-    PhaseStatus fgComputeReachability(); // Perform flow graph node reachability analysis.
-
     PhaseStatus fgComputeDominators(); // Compute dominators
 
     bool fgRemoveDeadBlocks(); // Identify and remove dead blocks.
@@ -5827,21 +5882,15 @@ class Compiler
 public:
     // For many purposes, it is desirable to be able to enumerate the *distinct* targets of a switch statement,
     // skipping duplicate targets.  (E.g., in flow analyses that are only interested in the set of possible targets.)
-    // SwitchUniqueSuccSet contains the non-duplicated switch targets.
-    // (Code that modifies the jump table of a switch has an obligation to call Compiler::UpdateSwitchTableTarget,
-    // which in turn will call the "UpdateTarget" method of this type if a SwitchUniqueSuccSet has already
-    // been computed for the switch block.  If a switch block is deleted or is transformed into a non-switch,
-    // we leave the entry associated with the block, but it will no longer be accessed.)
+    // SwitchUniqueSuccSet contains the non-duplicated switch successor edges.
+    // Code that modifies the flowgraph (such as by renumbering blocks) must call Compiler::InvalidateUniqueSwitchSuccMap,
+    // and code that modifies the targets of a switch block must call Compiler::fgInvalidateSwitchDescMapEntry.
+    // If the unique targets of a switch block are needed later, they will be recomputed, ensuring they're up-to-date.
     struct SwitchUniqueSuccSet
     {
-        unsigned     numDistinctSuccs; // Number of distinct targets of the switch.
-        BasicBlock** nonDuplicates;    // Array of "numDistinctSuccs", containing all the distinct switch target
-                                       // successors.
-
-        // The switch block "switchBlk" just had an entry with value "from" modified to the value "to".
-        // Update "this" as necessary: if "from" is no longer an element of the jump table of "switchBlk",
-        // remove it from "this", and ensure that "to" is a member.  Use "alloc" to do any required allocation.
-        void UpdateTarget(CompAllocator alloc, BasicBlock* switchBlk, BasicBlock* from, BasicBlock* to);
+        unsigned   numDistinctSuccs; // Number of distinct targets of the switch.
+        FlowEdge** nonDuplicates;    // Array of "numDistinctSuccs", containing all the distinct switch target
+                                     // successor edges.
     };
 
     typedef JitHashTable<BasicBlock*, JitPtrKeyFuncs<BasicBlock>, SwitchUniqueSuccSet> BlockToSwitchDescMap;
@@ -5873,11 +5922,6 @@ class Compiler
     // the corresponding SwitchUniqueSuccSet.
     SwitchUniqueSuccSet GetDescriptorForSwitch(BasicBlock* switchBlk);
 
-    // The switch block "switchBlk" just had an entry with value "from" modified to the value "to".
-    // Update "this" as necessary: if "from" is no longer an element of the jump table of "switchBlk",
-    // remove it from "this", and ensure that "to" is a member.
-    void UpdateSwitchTableTarget(BasicBlock* switchBlk, BasicBlock* from, BasicBlock* to);
-
     // Remove the "SwitchUniqueSuccSet" of "switchBlk" in the BlockToSwitchDescMap.
     void fgInvalidateSwitchDescMapEntry(BasicBlock* switchBlk);
 
@@ -5889,7 +5933,7 @@ class Compiler
 
     FlowEdge* fgGetPredForBlock(BasicBlock* block, BasicBlock* blockPred, FlowEdge*** ptrToPred);
 
-    FlowEdge* fgRemoveRefPred(BasicBlock* block, BasicBlock* blockPred);
+    void fgRemoveRefPred(FlowEdge* edge);
 
     FlowEdge* fgRemoveAllRefPreds(BasicBlock* block, BasicBlock* blockPred);
 
@@ -5901,16 +5945,28 @@ class Compiler
 
     void fgReplaceEhfSuccessor(BasicBlock* block, BasicBlock* oldSucc, BasicBlock* newSucc);
 
-    void fgRemoveEhfSuccessor(BasicBlock* block, BasicBlock* succ);
+    void fgRemoveEhfSuccessor(BasicBlock* block, const unsigned succIndex);
+    
+    void fgRemoveEhfSuccessor(FlowEdge* succEdge);
 
     void fgReplaceJumpTarget(BasicBlock* block, BasicBlock* oldTarget, BasicBlock* newTarget);
 
-    void fgReplacePred(BasicBlock* block, BasicBlock* oldPred, BasicBlock* newPred);
+    void fgReplacePred(FlowEdge* edge, BasicBlock* const newPred);
 
     // initializingPreds is only 'true' when we are computing preds in fgLinkBasicBlocks()
     template <bool initializingPreds = false>
     FlowEdge* fgAddRefPred(BasicBlock* block, BasicBlock* blockPred, FlowEdge* oldEdge = nullptr);
 
+private:
+    FlowEdge** fgGetPredInsertPoint(BasicBlock* blockPred, BasicBlock* newTarget);
+
+public:
+    void fgRedirectTargetEdge(BasicBlock* block, BasicBlock* newTarget);
+
+    void fgRedirectTrueEdge(BasicBlock* block, BasicBlock* newTarget);
+
+    void fgRedirectFalseEdge(BasicBlock* block, BasicBlock* newTarget);
+
     void fgFindBasicBlocks();
 
     bool fgCheckEHCanInsertAfterBlock(BasicBlock* blk, unsigned regionIndex, bool putInTryRegion);
@@ -5934,7 +5990,7 @@ class Compiler
 
     PhaseStatus fgCanonicalizeFirstBB();
 
-    void fgSetEHRegionForNewPreheader(BasicBlock* preheader);
+    void fgSetEHRegionForNewPreheaderOrExit(BasicBlock* preheader);
 
     void fgUnreachableBlock(BasicBlock* block);
 
@@ -5979,15 +6035,14 @@ class Compiler
     };
     BasicBlock* fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE relocateType);
 
-#if defined(FEATURE_EH_FUNCLETS)
     bool fgIsIntraHandlerPred(BasicBlock* predBlock, BasicBlock* block);
     bool fgAnyIntraHandlerPreds(BasicBlock* block);
     void fgInsertFuncletPrologBlock(BasicBlock* block);
     void        fgCreateFuncletPrologBlocks();
     PhaseStatus fgCreateFunclets();
-#else  // !FEATURE_EH_FUNCLETS
+#if defined(FEATURE_EH_WINDOWS_X86)
     bool fgRelocateEHRegions();
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
     bool fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock* target);
 
@@ -6014,9 +6069,7 @@ class Compiler
 
     bool fgReorderBlocks(bool useProfile);
 
-#ifdef FEATURE_EH_FUNCLETS
     bool fgFuncletsAreCold();
-#endif // FEATURE_EH_FUNCLETS
 
     PhaseStatus fgDetermineFirstColdBlock();
 
@@ -6069,7 +6122,11 @@ class Compiler
 
     void fgDispBBLiveness(BasicBlock* block);
     void fgDispBBLiveness();
-    void fgTableDispBasicBlock(const BasicBlock* block, const BasicBlock* nextBlock = nullptr, int blockTargetFieldWidth = 21, int ibcColWidth = 0);
+    void fgTableDispBasicBlock(const BasicBlock* block,
+        const BasicBlock* nextBlock = nullptr,
+        bool printEdgeLikelihoods = true,
+        int blockTargetFieldWidth = 21,
+        int ibcColWidth = 0);
     void fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock, bool dumpTrees);
     void fgDispBasicBlocks(bool dumpTrees = false);
     void fgDumpStmtTree(const BasicBlock* block, Statement* stmt);
@@ -6100,11 +6157,11 @@ class Compiler
     void fgDebugCheckFlagsHelper(GenTree* tree, GenTreeFlags actualFlags, GenTreeFlags expectedFlags);
     void fgDebugCheckTryFinallyExits();
     void fgDebugCheckProfileWeights();
-    void fgDebugCheckProfileWeights(ProfileChecks checks);
+    bool fgDebugCheckProfileWeights(ProfileChecks checks);
     bool fgDebugCheckIncomingProfileData(BasicBlock* block, ProfileChecks checks);
     bool fgDebugCheckOutgoingProfileData(BasicBlock* block, ProfileChecks checks);
 
-    void fgDebugCheckDfsTree();
+    void fgDebugCheckFlowGraphAnnotations();
 
 #endif // DEBUG
 
@@ -6240,6 +6297,13 @@ class Compiler
     unsigned                               fgPgoInlineeNoPgo;
     unsigned                               fgPgoInlineeNoPgoSingleBlock;
     bool                                   fgPgoHaveWeights;
+    bool                                   fgPgoSynthesized;
+    bool                                   fgPgoConsistent;
+
+#ifdef DEBUG
+    bool                                   fgPgoConsistentCheck;
+#endif
+
 
     void WalkSpanningTree(SpanningTreeVisitor* visitor);
     void fgSetProfileWeight(BasicBlock* block, weight_t weight);
@@ -6288,7 +6352,7 @@ class Compiler
     //                  Create a new temporary variable to hold the result of *ppTree,
     //                  and transform the graph accordingly.
     GenTree* fgInsertCommaFormTemp(GenTree** ppTree);
-    TempInfo fgMakeTemp(GenTree* rhs);
+    TempInfo fgMakeTemp(GenTree* value);
     GenTree* fgMakeMultiUse(GenTree** ppTree);
 
     //                  Recognize a bitwise rotation pattern and convert into a GT_ROL or a GT_ROR node.
@@ -6384,7 +6448,7 @@ class Compiler
     bool fgMorphCombineSIMDFieldStores(BasicBlock* block, Statement* stmt);
     void impMarkContiguousSIMDFieldStores(Statement* stmt);
 
-    // fgPreviousCandidateSIMDFieldStoreStmt is only used for tracking previous simd field assignment
+    // fgPreviousCandidateSIMDFieldStoreStmt is only used for tracking previous simd field store
     // in function: Compiler::impMarkContiguousSIMDFieldStores.
     Statement* fgPreviousCandidateSIMDFieldStoreStmt;
 
@@ -6463,7 +6527,6 @@ class Compiler
 public:
     GenTree* fgMorphInitBlock(GenTree* tree);
     GenTree* fgMorphCopyBlock(GenTree* tree);
-    GenTree* fgMorphStoreDynBlock(GenTreeStoreDynBlk* tree);
 private:
     GenTree* fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optAssertionPropDone = nullptr);
     void fgTryReplaceStructLocalWithField(GenTree* tree);
@@ -6513,7 +6576,7 @@ class Compiler
 
     //----------------------- Liveness analysis -------------------------------
 
-    VARSET_TP fgCurUseSet; // vars used     by block (before an assignment)
+    VARSET_TP fgCurUseSet; // vars used     by block (before a def)
     VARSET_TP fgCurDefSet; // vars assigned by block (before a use)
 
     MemoryKindSet fgCurMemoryUse;   // True iff the current basic block uses memory.
@@ -6622,6 +6685,7 @@ class Compiler
 
     void fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* result, InlineContext** createdContext);
     void fgInsertInlineeBlocks(InlineInfo* pInlineInfo);
+    void fgInsertInlineeArgument(const InlArgInfo& argInfo, BasicBlock* block, Statement** afterStmt, Statement** newStmt, const DebugInfo& callDI);
     Statement* fgInlinePrependStatements(InlineInfo* inlineInfo);
     void fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, Statement* stmt);
 
@@ -6659,11 +6723,6 @@ class Compiler
     bool fgForwardSubHasStoreInterference(Statement* defStmt, Statement* nextStmt, GenTree* nextStmtUse);
     void fgForwardSubUpdateLiveness(GenTree* newSubListFirst, GenTree* newSubListLast);
 
-    // The given local variable, required to be a struct variable, is being assigned via
-    // a "lclField", to make it masquerade as an integral type in the ABI.  Make sure that
-    // the variable is not enregistered, and is therefore not promoted independently.
-    void fgLclFldAssign(unsigned lclNum);
-
     enum TypeProducerKind
     {
         TPK_Unknown = 0, // May not be a RuntimeType
@@ -6684,6 +6743,8 @@ class Compiler
 public:
     bool fgIsBigOffset(size_t offset);
 
+    bool IsValidLclAddr(unsigned lclNum, unsigned offset);
+
 private:
     bool fgNeedReturnSpillTemp();
 
@@ -6801,23 +6862,29 @@ class Compiler
 public:
     PhaseStatus optOptimizeBools();
     PhaseStatus optSwitchRecognition();
-    bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, GenTree* nodeToTest);
+    bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest);
     bool optSwitchDetectAndConvert(BasicBlock* firstBlock);
 
     PhaseStatus optInvertLoops();    // Invert loops so they're entered at top and tested at bottom.
     PhaseStatus optOptimizeFlow();   // Simplify flow graph and do tail duplication
     PhaseStatus optOptimizeLayout(); // Optimize the BasicBlock layout of the method
+    PhaseStatus optOptimizePostLayout(); // Run optimizations after block layout is finalized
     PhaseStatus optSetBlockWeights();
     PhaseStatus optFindLoopsPhase(); // Finds loops and records them in the loop table
 
     void optFindLoops();
     bool optCanonicalizeLoops();
+
     void optCompactLoops();
     void optCompactLoop(FlowGraphNaturalLoop* loop);
     BasicBlock* optFindLoopCompactionInsertionPoint(FlowGraphNaturalLoop* loop, BasicBlock* top);
     BasicBlock* optTryAdvanceLoopCompactionInsertionPoint(FlowGraphNaturalLoop* loop, BasicBlock* insertionPoint, BasicBlock* top, BasicBlock* bottom);
     bool optCreatePreheader(FlowGraphNaturalLoop* loop);
-    void optSetPreheaderWeight(FlowGraphNaturalLoop* loop, BasicBlock* preheader);
+    void optSetWeightForPreheaderOrExit(FlowGraphNaturalLoop* loop, BasicBlock* block);
+    weight_t optEstimateEdgeLikelihood(BasicBlock* from, BasicBlock* to, bool* fromProfile);
+
+    bool optCanonicalizeExits(FlowGraphNaturalLoop* loop);
+    bool optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit);
 
     PhaseStatus optCloneLoops();
     void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context);
@@ -6830,16 +6897,11 @@ class Compiler
 
 public:
     bool fgHasLoops;
-#ifdef DEBUG
-    unsigned loopAlignCandidates; // number of candidates identified by placeLoopAlignInstructions
-    unsigned loopsAligned;        // number of loops actually aligned
-#endif                          // DEBUG
 
 protected:
     unsigned optCallCount;         // number of calls made in the method
     unsigned optIndirectCallCount; // number of virtual, interface and indirect calls made in the method
     unsigned optNativeCallCount;   // number of Pinvoke/Native calls made in the method
-    unsigned optLoopsCloned;       // number of loops cloned in the current method.
 
 #ifdef DEBUG
     void optCheckPreds();
@@ -6858,16 +6920,9 @@ class Compiler
     bool optExtractInitTestIncr(
         BasicBlock** pInitBlock, BasicBlock* bottom, BasicBlock* top, GenTree** ppInit, GenTree** ppTest, GenTree** ppIncr);
 
-    enum class RedirectBlockOption
-    {
-        DoNotChangePredLists, // do not modify pred lists
-        UpdatePredLists,      // add/remove to pred lists
-        AddToPredLists,       // only add to pred lists
-    };
-
-    void optRedirectBlock(BasicBlock*      blk,
-                          BlockToBlockMap* redirectMap,
-                          const RedirectBlockOption = RedirectBlockOption::DoNotChangePredLists);
+    void optSetMappedBlockTargets(BasicBlock*      blk,
+                          BasicBlock*      newBlk,
+                          BlockToBlockMap* redirectMap);
 
     // Marks the containsCall information to "loop" and any parent loops.
     void AddContainsCallAllContainingLoops(FlowGraphNaturalLoop* loop);
@@ -7039,6 +7094,9 @@ class Compiler
         return (enckey & ~TARGET_SIGN_BIT) << CSE_CONST_SHARED_LOW_BITS;
     }
 
+    static bool optSharedConstantCSEEnabled();
+    static bool optConstantCSEEnabled();
+
 /**************************************************************************
  *                   Value Number based CSEs
  *************************************************************************/
@@ -7068,6 +7126,7 @@ class Compiler
     unsigned optCSEstart;          // The first local variable number that is a CSE
     unsigned optCSEattempt;        // The number of CSEs attempted so far.
     unsigned optCSEcount;          // The total count of CSEs introduced.
+    unsigned optCSEunmarks;        // Number of CSE trees unmarked
     weight_t optCSEweight;         // The weight of the current block when we are doing PerformCSE
     CSE_HeuristicCommon* optCSEheuristic; // CSE Heuristic to use for this method
 
@@ -7386,23 +7445,23 @@ class Compiler
 
     typedef JitHashTable<unsigned, JitSmallPrimitiveKeyFuncs<unsigned>, GenTree*> LocalNumberToNullCheckTreeMap;
 
-    GenTree* getArrayLengthFromAllocation(GenTree* tree DEBUGARG(BasicBlock* block));
-    GenTree* optPropGetValueRec(unsigned lclNum, unsigned ssaNum, optPropKind valueKind, int walkDepth);
-    GenTree* optPropGetValue(unsigned lclNum, unsigned ssaNum, optPropKind valueKind);
-    GenTree* optEarlyPropRewriteTree(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap);
-    bool optDoEarlyPropForBlock(BasicBlock* block);
+    GenTree*    getArrayLengthFromAllocation(GenTree* tree DEBUGARG(BasicBlock* block));
+    GenTree*    optPropGetValueRec(unsigned lclNum, unsigned ssaNum, optPropKind valueKind, int walkDepth);
+    GenTree*    optPropGetValue(unsigned lclNum, unsigned ssaNum, optPropKind valueKind);
+    GenTree*    optEarlyPropRewriteTree(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap);
+    bool        optDoEarlyPropForBlock(BasicBlock* block);
     bool        optDoEarlyPropForFunc();
     PhaseStatus optEarlyProp();
-    bool optFoldNullCheck(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap);
-    GenTree* optFindNullCheckToFold(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap);
-    bool optIsNullCheckFoldingLegal(GenTree*    tree,
-                                    GenTree*    nullCheckTree,
-                                    GenTree**   nullCheckParent,
-                                    Statement** nullCheckStmt);
-    bool optCanMoveNullCheckPastTree(GenTree* tree,
-                                     unsigned nullCheckLclNum,
-                                     bool     isInsideTry,
-                                     bool     checkSideEffectSummary);
+    bool        optFoldNullCheck(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap);
+    GenTree*    optFindNullCheckToFold(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap);
+    bool        optIsNullCheckFoldingLegal(GenTree*    tree,
+                                           GenTree*    nullCheckTree,
+                                           GenTree**   nullCheckParent,
+                                           Statement** nullCheckStmt);
+    bool        optCanMoveNullCheckPastTree(GenTree* tree,
+                                            unsigned nullCheckLclNum,
+                                            bool     isInsideTry,
+                                            bool     checkSideEffectSummary);
 #if DEBUG
     void optCheckFlagsAreSet(unsigned    methodFlag,
                              const char* methodFlagStr,
@@ -7412,19 +7471,31 @@ class Compiler
                              BasicBlock* basicBlock);
 #endif
 
+    PhaseStatus optInductionVariables();
+    bool        optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop);
+    bool        optIsIVWideningProfitable(unsigned                lclNum,
+                                          BasicBlock*             initBlock,
+                                          bool                    initedToConstant,
+                                          FlowGraphNaturalLoop*   loop,
+                                          ArrayStack<Statement*>& ivUses);
+    void        optBestEffortReplaceNarrowIVUses(
+               unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt);
+    void optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt);
+    void optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop);
+
     // Redundant branch opts
     //
-    PhaseStatus optRedundantBranches();
-    bool optRedundantRelop(BasicBlock* const block);
-    bool optRedundantBranch(BasicBlock* const block);
-    bool optJumpThreadDom(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop);
-    bool optJumpThreadPhi(BasicBlock* const block, GenTree* tree, ValueNum treeNormVN);
-    bool optJumpThreadCheck(BasicBlock* const block, BasicBlock* const domBlock);
-    bool optJumpThreadCore(JumpThreadInfo& jti);
-    bool optReachable(BasicBlock* const fromBlock, BasicBlock* const toBlock, BasicBlock* const excludedBlock);
+    PhaseStatus   optRedundantBranches();
+    bool          optRedundantRelop(BasicBlock* const block);
+    bool          optRedundantBranch(BasicBlock* const block);
+    bool          optJumpThreadDom(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop);
+    bool          optJumpThreadPhi(BasicBlock* const block, GenTree* tree, ValueNum treeNormVN);
+    bool          optJumpThreadCheck(BasicBlock* const block, BasicBlock* const domBlock);
+    bool          optJumpThreadCore(JumpThreadInfo& jti);
+    bool          optReachable(BasicBlock* const fromBlock, BasicBlock* const toBlock, BasicBlock* const excludedBlock);
     BitVecTraits* optReachableBitVecTraits;
     BitVec        optReachableBitVec;
-    void optRelopImpliesRelop(RelopImplicationInfo* rii);
+    void          optRelopImpliesRelop(RelopImplicationInfo* rii);
 
     /**************************************************************************
      *               Value/Assertion propagation
@@ -7491,14 +7562,15 @@ class Compiler
         {
             optOp1Kind kind; // a normal LclVar, or Exact-type or Subtype
             ValueNum   vn;
-            union {
+            union
+            {
                 SsaVar lcl;
                 ArrBnd bnd;
             };
         } op1;
         struct AssertionDscOp2
         {
-            optOp2Kind kind; // a const or copy assignment
+            optOp2Kind kind; // a const or copy assertion
         private:
             uint16_t m_encodedIconFlags; // encoded icon gtFlags, don't use directly
         public:
@@ -7511,7 +7583,8 @@ class Compiler
 #endif
                 FieldSeq* fieldSeq;
             };
-            union {
+            union
+            {
                 SsaVar        lcl;
                 IntVal        u1;
                 __int64       lconVal;
@@ -7716,7 +7789,7 @@ class Compiler
     AssertionIndex*            optComplementaryAssertionMap;
     JitExpandArray<ASSERT_TP>* optAssertionDep; // table that holds dependent assertions (assertions
                                                 // using the value of a local var) for each local var
-    AssertionDsc*  optAssertionTabPrivate;      // table that holds info about value assignments
+    AssertionDsc*  optAssertionTabPrivate;      // table that holds info about assertions
     AssertionIndex optAssertionCount;           // total number of assertions in the assertion table
     AssertionIndex optMaxAssertionCount;
     bool           optCrossBlockLocalAssertionProp;
@@ -7728,47 +7801,49 @@ class Compiler
     bool           optCanPropSubRange;
 
 public:
-    void optVnNonNullPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* tree);
-    fgWalkResult optVNConstantPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* parent, GenTree* tree);
-    GenTree* optVNConstantPropOnJTrue(BasicBlock* block, GenTree* test);
-    GenTree* optVNConstantPropOnTree(BasicBlock* block, GenTree* parent, GenTree* tree);
-    GenTree* optExtractSideEffListFromConst(GenTree* tree);
+    void         optVnNonNullPropCurStmt(BasicBlock* block, Statement* stmt, GenTree* tree);
+    fgWalkResult optVNBasedFoldCurStmt(BasicBlock* block, Statement* stmt, GenTree* parent, GenTree* tree);
+    GenTree*     optVNConstantPropOnJTrue(BasicBlock* block, GenTree* test);
+    GenTree*     optVNBasedFoldConstExpr(BasicBlock* block, GenTree* parent, GenTree* tree);
+    GenTree*     optVNBasedFoldExpr(BasicBlock* block, GenTree* parent, GenTree* tree);
+    GenTree*     optVNBasedFoldExpr_Call(BasicBlock* block, GenTree* parent, GenTreeCall* call);
+    GenTree*     optExtractSideEffListFromConst(GenTree* tree);
 
     AssertionIndex GetAssertionCount()
     {
         return optAssertionCount;
     }
-    ASSERT_TP* bbJtrueAssertionOut;
+    ASSERT_TP*                                                                     bbJtrueAssertionOut;
     typedef JitHashTable<ValueNum, JitSmallPrimitiveKeyFuncs<ValueNum>, ASSERT_TP> ValueNumToAssertsMap;
-    ValueNumToAssertsMap* optValueNumToAsserts;
+    ValueNumToAssertsMap*                                                          optValueNumToAsserts;
 
     // Assertion prop helpers.
-    ASSERT_TP& GetAssertionDep(unsigned lclNum);
+    ASSERT_TP&    GetAssertionDep(unsigned lclNum);
     AssertionDsc* optGetAssertion(AssertionIndex assertIndex);
-    void optAssertionInit(bool isLocalProp);
-    void optAssertionTraitsInit(AssertionIndex assertionCount);
-    void optAssertionReset(AssertionIndex limit);
-    void optAssertionRemove(AssertionIndex index);
+    void          optAssertionInit(bool isLocalProp);
+    void          optAssertionTraitsInit(AssertionIndex assertionCount);
+    void          optAssertionReset(AssertionIndex limit);
+    void          optAssertionRemove(AssertionIndex index);
 
     // Assertion prop data flow functions.
     PhaseStatus optAssertionPropMain();
-    Statement* optVNAssertionPropCurStmt(BasicBlock* block, Statement* stmt);
-    bool optIsTreeKnownIntValue(bool vnBased, GenTree* tree, ssize_t* pConstant, GenTreeFlags* pIconFlags);
-    ASSERT_TP* optInitAssertionDataflowFlags();
-    ASSERT_TP* optComputeAssertionGen();
+    Statement*  optVNAssertionPropCurStmt(BasicBlock* block, Statement* stmt);
+    bool        optIsTreeKnownIntValue(bool vnBased, GenTree* tree, ssize_t* pConstant, GenTreeFlags* pIconFlags);
+    ASSERT_TP*  optInitAssertionDataflowFlags();
+    ASSERT_TP*  optComputeAssertionGen();
 
     // Assertion Gen functions.
-    void optAssertionGen(GenTree* tree);
+    void           optAssertionGen(GenTree* tree);
     AssertionIndex optAssertionGenCast(GenTreeCast* cast);
     AssertionIndex optAssertionGenPhiDefn(GenTree* tree);
-    AssertionInfo optCreateJTrueBoundsAssertion(GenTree* tree);
-    AssertionInfo optAssertionGenJtrue(GenTree* tree);
+    AssertionInfo  optCreateJTrueBoundsAssertion(GenTree* tree);
+    AssertionInfo  optAssertionGenJtrue(GenTree* tree);
     AssertionIndex optCreateJtrueAssertions(GenTree*                   op1,
                                             GenTree*                   op2,
                                             Compiler::optAssertionKind assertionKind,
                                             bool                       helperCallArgs = false);
     AssertionIndex optFindComplementary(AssertionIndex assertionIndex);
-    void optMapComplementary(AssertionIndex assertionIndex, AssertionIndex index);
+    void           optMapComplementary(AssertionIndex assertionIndex, AssertionIndex index);
 
     ValueNum optConservativeNormalVN(GenTree* tree);
 
@@ -7789,9 +7864,9 @@ class Compiler
                                          GenTree*       op2,
                                          bool           helperCallArgs = false);
 
-    bool optAssertionVnInvolvesNan(AssertionDsc* assertion);
+    bool           optAssertionVnInvolvesNan(AssertionDsc* assertion);
     AssertionIndex optAddAssertion(AssertionDsc* assertion);
-    void optAddVnAssertionMapping(ValueNum vn, AssertionIndex index);
+    void           optAddVnAssertionMapping(ValueNum vn, AssertionIndex index);
 #ifdef DEBUG
     void optPrintVnAssertionMapping();
 #endif
@@ -7801,8 +7876,8 @@ class Compiler
     AssertionIndex optAssertionIsSubrange(GenTree* tree, IntegralRange range, ASSERT_VALARG_TP assertions);
     AssertionIndex optAssertionIsSubtype(GenTree* tree, GenTree* methodTableArg, ASSERT_VALARG_TP assertions);
     AssertionIndex optAssertionIsNonNullInternal(GenTree* op, ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased));
-    bool optAssertionIsNonNull(GenTree*         op,
-                               ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased) DEBUGARG(AssertionIndex* pIndex));
+    bool           optAssertionIsNonNull(GenTree*                    op,
+                                         ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased) DEBUGARG(AssertionIndex* pIndex));
 
     AssertionIndex optGlobalAssertionIsEqualOrNotEqual(ASSERT_VALARG_TP assertions, GenTree* op1, GenTree* op2);
     AssertionIndex optGlobalAssertionIsEqualOrNotEqualZero(ASSERT_VALARG_TP assertions, GenTree* op1);
@@ -7810,15 +7885,15 @@ class Compiler
         optOp1Kind op1Kind, unsigned lclNum, optOp2Kind op2Kind, ssize_t cnsVal, ASSERT_VALARG_TP assertions);
 
     // Assertion prop for lcl var functions.
-    bool optAssertionProp_LclVarTypeCheck(GenTree* tree, LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc);
+    bool     optAssertionProp_LclVarTypeCheck(GenTree* tree, LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc);
     GenTree* optCopyAssertionProp(AssertionDsc*        curAssertion,
                                   GenTreeLclVarCommon* tree,
-                                  Statement* stmt DEBUGARG(AssertionIndex index));
+                                  Statement* stmt      DEBUGARG(AssertionIndex index));
     GenTree* optConstantAssertionProp(AssertionDsc*        curAssertion,
                                       GenTreeLclVarCommon* tree,
-                                      Statement* stmt DEBUGARG(AssertionIndex index));
-    bool optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, GenTree* destParent, GenTree* value);
-    bool optZeroObjAssertionProp(GenTree* tree, ASSERT_VALARG_TP assertions);
+                                      Statement* stmt      DEBUGARG(AssertionIndex index));
+    bool     optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock, GenTree* destParent, GenTree* value);
+    bool     optZeroObjAssertionProp(GenTree* tree, ASSERT_VALARG_TP assertions);
 
     // Assertion propagation functions.
     GenTree* optAssertionProp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt, BasicBlock* block);
@@ -7838,8 +7913,8 @@ class Compiler
     GenTree* optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions, GenTree* tree, Statement* stmt);
     GenTree* optAssertionProp_Update(GenTree* newTree, GenTree* tree, Statement* stmt);
     GenTree* optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCall* call);
-    bool optNonNullAssertionProp_Ind(ASSERT_VALARG_TP assertions, GenTree* indir);
-    bool optWriteBarrierAssertionProp_StoreInd(ASSERT_VALARG_TP assertions, GenTreeStoreInd* indir);
+    bool     optNonNullAssertionProp_Ind(ASSERT_VALARG_TP assertions, GenTree* indir);
+    bool     optWriteBarrierAssertionProp_StoreInd(ASSERT_VALARG_TP assertions, GenTreeStoreInd* indir);
 
     void optAssertionProp_RangeProperties(ASSERT_VALARG_TP assertions,
                                           GenTree*         tree,
@@ -7895,11 +7970,11 @@ class Compiler
     bool optReconstructArrIndex(GenTree* tree, ArrIndex* result);
     bool optIdentifyLoopOptInfo(FlowGraphNaturalLoop* loop, LoopCloneContext* context);
     static fgWalkPreFn optCanOptimizeByLoopCloningVisitor;
-    fgWalkResult optCanOptimizeByLoopCloning(GenTree* tree, LoopCloneVisitorInfo* info);
-    bool optObtainLoopCloningOpts(LoopCloneContext* context);
-    bool optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* context);
-    bool optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneVisitorInfo* info);
-    bool optIsHandleOrIndirOfHandle(GenTree* tree, GenTreeFlags handleType);
+    fgWalkResult       optCanOptimizeByLoopCloning(GenTree* tree, LoopCloneVisitorInfo* info);
+    bool               optObtainLoopCloningOpts(LoopCloneContext* context);
+    bool               optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* context);
+    bool               optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneVisitorInfo* info);
+    bool               optIsHandleOrIndirOfHandle(GenTree* tree, GenTreeFlags handleType);
 
     static bool optLoopCloningEnabled();
 
@@ -8062,7 +8137,7 @@ class Compiler
 
     const char* eeGetClassName(CORINFO_CLASS_HANDLE clsHnd, char* buffer = nullptr, size_t bufferSize = 0);
 
-    void eePrintObjectDescription(const char* prefix, CORINFO_OBJECT_HANDLE handle);
+    void        eePrintObjectDescription(const char* prefix, CORINFO_OBJECT_HANDLE handle);
     const char* eeGetShortClassName(CORINFO_CLASS_HANDLE clsHnd);
 
 #if defined(DEBUG)
@@ -8071,12 +8146,12 @@ class Compiler
 
     unsigned compMethodHash(CORINFO_METHOD_HANDLE methodHandle);
 
-    var_types eeGetArgType(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig);
-    var_types eeGetArgType(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig, bool* isPinned);
+    var_types            eeGetArgType(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig);
+    var_types            eeGetArgType(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig, bool* isPinned);
     CORINFO_CLASS_HANDLE eeGetArgClass(CORINFO_SIG_INFO* sig, CORINFO_ARG_LIST_HANDLE list);
     CORINFO_CLASS_HANDLE eeGetClassFromContext(CORINFO_CONTEXT_HANDLE context);
-    unsigned eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig);
-    static unsigned eeGetArgSizeAlignment(var_types type, bool isFloatHfa);
+    unsigned             eeGetArgSize(CorInfoType corInfoType, CORINFO_CLASS_HANDLE typeHnd);
+    static unsigned      eeGetArgSizeAlignment(var_types type, bool isFloatHfa);
 
     // VOM info, method sigs
 
@@ -8191,6 +8266,30 @@ class Compiler
         return eeGetEEInfo()->targetAbi == abi;
     }
 
+#if defined(FEATURE_EH_WINDOWS_X86)
+    bool eeIsNativeAotAbi;
+    bool UsesFunclets() const
+    {
+        return eeIsNativeAotAbi;
+    }
+
+    bool UsesCallFinallyThunks() const
+    {
+        // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
+        return UsesFunclets();
+    }
+#else
+    bool UsesFunclets() const
+    {
+        return true;
+    }
+
+    bool UsesCallFinallyThunks() const
+    {
+        return true;
+    }
+#endif
+
     bool generateCFIUnwindCodes()
     {
 #if defined(FEATURE_CFI_SUPPORT)
@@ -8207,7 +8306,7 @@ class Compiler
     unsigned eeBoundariesCount;
 
     ICorDebugInfo::OffsetMapping* eeBoundaries; // Boundaries to report to the EE
-    void eeSetLIcount(unsigned count);
+    void                          eeSetLIcount(unsigned count);
     void eeSetLIinfo(unsigned which, UNATIVE_OFFSET offs, IPmappingDscKind kind, const ILLocation& loc);
     void eeSetLIdone();
 
@@ -8215,7 +8314,7 @@ class Compiler
     static void eeDispILOffs(IL_OFFSET offs);
     static void eeDispSourceMappingOffs(uint32_t offs);
     static void eeDispLineInfo(const ICorDebugInfo::OffsetMapping* line);
-    void eeDispLineInfos();
+    void        eeDispLineInfos();
 #endif // DEBUG
 
     // Debugging support - Local var info
@@ -8231,7 +8330,7 @@ class Compiler
         UNATIVE_OFFSET             endOffset;
         DWORD                      varNumber;
         CodeGenInterface::siVarLoc loc;
-    } * eeVars;
+    }*   eeVars;
     void eeSetLVcount(unsigned count);
     void eeSetLVinfo(unsigned                          which,
                      UNATIVE_OFFSET                    startOffs,
@@ -8266,7 +8365,7 @@ class Compiler
 
     WORD eeGetRelocTypeHint(void* target);
 
-// ICorStaticInfo wrapper functions
+    // ICorStaticInfo wrapper functions
 
 #if defined(UNIX_AMD64_ABI)
 #ifdef DEBUG
@@ -8295,7 +8394,11 @@ class Compiler
     template <typename Functor>
     bool eeRunFunctorWithSPMIErrorTrap(Functor f)
     {
-        return eeRunWithSPMIErrorTrap<Functor>([](Functor* pf) { (*pf)(); }, &f);
+        return eeRunWithSPMIErrorTrap<Functor>(
+            [](Functor* pf) {
+            (*pf)();
+        },
+            &f);
     }
 
     bool eeRunWithSPMIErrorTrapImp(void (*function)(void*), void* param);
@@ -8303,7 +8406,7 @@ class Compiler
     // Utility functions
 
     static CORINFO_METHOD_HANDLE eeFindHelper(unsigned helper);
-    static CorInfoHelpFunc eeGetHelperNum(CORINFO_METHOD_HANDLE method);
+    static CorInfoHelpFunc       eeGetHelperNum(CORINFO_METHOD_HANDLE method);
 
     enum StaticHelperReturnValue
     {
@@ -8360,7 +8463,7 @@ class Compiler
     // structure and IL offset is needed only when generating debuggable code. Therefore
     // it is desirable to avoid memory size penalty in retail scenarios.
     typedef JitHashTable<GenTree*, JitPtrKeyFuncs<GenTree>, DebugInfo> CallSiteDebugInfoTable;
-    CallSiteDebugInfoTable* genCallSite2DebugInfoMap;
+    CallSiteDebugInfoTable*                                            genCallSite2DebugInfoMap;
 
     unsigned    genReturnLocal; // Local number for the return value when applicable.
     BasicBlock* genReturnBB;    // jumped to when not optimizing for speed.
@@ -8402,11 +8505,11 @@ class Compiler
         return codeGen->doDoubleAlign();
     }
     DWORD getCanDoubleAlign();
-    bool shouldDoubleAlign(unsigned refCntStk,
-                           unsigned refCntReg,
-                           weight_t refCntWtdReg,
-                           unsigned refCntStkParam,
-                           weight_t refCntWtdStkDbl);
+    bool  shouldDoubleAlign(unsigned refCntStk,
+                            unsigned refCntReg,
+                            weight_t refCntWtdReg,
+                            unsigned refCntStkParam,
+                            weight_t refCntWtdStkDbl);
 #endif // DOUBLE_ALIGN
 
     bool IsFullPtrRegMapRequired()
@@ -8425,40 +8528,32 @@ class Compiler
 #endif // TARGET_WASM
     }
 
-// Things that MAY belong either in CodeGen or CodeGenContext
-
-#if defined(FEATURE_EH_FUNCLETS)
+    // Things that MAY belong either in CodeGen or CodeGenContext
     FuncInfoDsc*   compFuncInfos;
     unsigned short compCurrFuncIdx;
     unsigned short compFuncInfoCount;
+    FuncInfoDsc    compFuncInfoRoot;
 
     unsigned short compFuncCount()
     {
-        assert(fgFuncletsCreated);
-        return compFuncInfoCount;
-    }
-
-#else // !FEATURE_EH_FUNCLETS
-
-    // This is a no-op when there are no funclets!
-    void genUpdateCurrentFunclet(BasicBlock* block)
-    {
-        return;
+        if (UsesFunclets())
+        {
+            assert(fgFuncletsCreated);
+            return compFuncInfoCount;
+        }
+        else
+        {
+            return 1;
+        }
     }
 
-    FuncInfoDsc compFuncInfoRoot;
-
-    static const unsigned compCurrFuncIdx = 0;
-
-    unsigned short compFuncCount()
+    unsigned short funCurrentFuncIdx()
     {
-        return 1;
+        return UsesFunclets() ? compCurrFuncIdx : 0;
     }
 
-#endif // !FEATURE_EH_FUNCLETS
-
     FuncInfoDsc* funCurrentFunc();
-    void funSetCurrentFunc(unsigned funcIdx);
+    void         funSetCurrentFunc(unsigned funcIdx);
     FuncInfoDsc* funGetFunc(unsigned funcIdx);
     unsigned int funGetFuncIdx(BasicBlock* block);
 
@@ -8481,15 +8576,15 @@ class Compiler
     // not all JIT Helper calls follow the standard ABI on the target architecture.
     regMaskTP compHelperCallKillSet(CorInfoHelpFunc helper);
 
-/*
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XX                                                                           XX
-XX                           UnwindInfo                                      XX
-XX                                                                           XX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-*/
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           UnwindInfo                                      XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
 
 #if !defined(__GNUC__)
 #pragma region Unwind information
@@ -8564,22 +8659,20 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     //
 
 private:
-#if defined(FEATURE_EH_FUNCLETS)
     void unwindGetFuncLocations(FuncInfoDsc*             func,
                                 bool                     getHotSectionData,
                                 /* OUT */ emitLocation** ppStartLoc,
                                 /* OUT */ emitLocation** ppEndLoc);
-#endif // FEATURE_EH_FUNCLETS
 
     void unwindReserveFunc(FuncInfoDsc* func);
     void unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode);
 
-#if defined(TARGET_AMD64) || (defined(TARGET_X86) && defined(FEATURE_EH_FUNCLETS))
+#if defined(TARGET_AMD64) || defined(TARGET_X86)
 
     void unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode);
     void unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pColdCode, bool isHotCode);
 
-#endif // TARGET_AMD64 || (TARGET_X86 && FEATURE_EH_FUNCLETS)
+#endif // TARGET_AMD64 || TARGET_X86
 
     UNATIVE_OFFSET unwindGetCurrentOffset(FuncInfoDsc* func);
 
@@ -8603,13 +8696,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 #if defined(FEATURE_CFI_SUPPORT)
     short mapRegNumToDwarfReg(regNumber reg);
-    void createCfiCode(FuncInfoDsc* func, UNATIVE_OFFSET codeOffset, UCHAR opcode, short dwarfReg, INT offset = 0);
-    void unwindPushPopCFI(regNumber reg);
-    void unwindBegPrologCFI();
-    void unwindPushPopMaskCFI(regMaskTP regMask, bool isFloat);
-    void unwindAllocStackCFI(unsigned size);
-    void unwindSetFrameRegCFI(regNumber reg, unsigned offset);
-    void unwindEmitFuncCFI(FuncInfoDsc* func, void* pHotCode, void* pColdCode);
+    void  createCfiCode(FuncInfoDsc* func, UNATIVE_OFFSET codeOffset, UCHAR opcode, short dwarfReg, INT offset = 0);
+    void  unwindPushPopCFI(regNumber reg);
+    void  unwindBegPrologCFI();
+    void  unwindPushPopMaskCFI(regMaskTP regMask, bool isFloat);
+    void  unwindAllocStackCFI(unsigned size);
+    void  unwindSetFrameRegCFI(regNumber reg, unsigned offset);
+    void  unwindEmitFuncCFI(FuncInfoDsc* func, void* pHotCode, void* pColdCode);
 #ifdef DEBUG
     void DumpCfiInfo(bool                  isHotCode,
                      UNATIVE_OFFSET        startOffset,
@@ -8858,11 +8951,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
     GenTree* impSIMDPopStack();
 
-    void setLclRelatedToSIMDIntrinsic(GenTree* tree);
-    bool areFieldsContiguous(GenTreeIndir* op1, GenTreeIndir* op2);
-    bool areLocalFieldsContiguous(GenTreeLclFld* first, GenTreeLclFld* second);
-    bool areArrayElementsContiguous(GenTree* op1, GenTree* op2);
-    bool areArgumentsContiguous(GenTree* op1, GenTree* op2);
+    void     setLclRelatedToSIMDIntrinsic(GenTree* tree);
+    bool     areFieldsContiguous(GenTreeIndir* op1, GenTreeIndir* op2);
+    bool     areLocalFieldsContiguous(GenTreeLclFld* first, GenTreeLclFld* second);
+    bool     areArrayElementsContiguous(GenTree* op1, GenTree* op2);
+    bool     areArgumentsContiguous(GenTree* op1, GenTree* op2);
     GenTree* CreateAddressNodeForSimdHWIntrinsicCreate(GenTree* tree, var_types simdBaseType, unsigned simdSize);
 
     // Get the size of the SIMD type in bytes
@@ -8890,7 +8983,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         // We need to report the ISA dependency to the VM so that scenarios
         // such as R2R work correctly for larger vector sizes, so we always
         // do `compExactlyDependsOn` for such cases.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_XARCH)
         if (compExactlyDependsOn(InstructionSet_VectorT512))
@@ -9247,6 +9339,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         // | arm64       |   256  |   128  | ldp/stp (2x128bit)
         // | arm         |    32  |    16  | no SIMD support
         // | loongarch64 |    64  |    32  | no SIMD support
+        // | riscv64     |    64  |    32  | no SIMD support
         //
         // We might want to use a different multiplier for truly hot/cold blocks based on PGO data
         //
@@ -9429,6 +9522,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     }
 
 #ifdef TARGET_XARCH
+public:
     bool canUseVexEncoding() const
     {
         return compOpportunisticallyDependsOn(InstructionSet_AVX);
@@ -9445,6 +9539,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         return compOpportunisticallyDependsOn(InstructionSet_AVX512F);
     }
 
+private:
     //------------------------------------------------------------------------
     // DoJitStressEvexEncoding- Answer the question: Do we force EVEX encoding.
     //
@@ -9513,8 +9608,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     bool compSwitchedToMinOpts;        // Codegen initially was Tier1/FullOpts but jit switched to MinOpts
     bool compSuppressedZeroInit;       // There are vars with lvSuppressedZeroInit set
 
-// NOTE: These values are only reliable after
-//       the importing is completely finished.
+    // NOTE: These values are only reliable after
+    //       the importing is completely finished.
 
 #ifdef DEBUG
     // State information - which phases have completed?
@@ -9606,11 +9701,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         uint32_t preferredVectorByteLength;
 #endif // TARGET_XARCH
 
-// optimize maximally and/or favor speed over size?
+        // optimize maximally and/or favor speed over size?
 
-#define DEFAULT_MIN_OPTS_CODE_SIZE 60000
-#define DEFAULT_MIN_OPTS_INSTR_COUNT 20000
-#define DEFAULT_MIN_OPTS_BB_COUNT 2000
+#define DEFAULT_MIN_OPTS_CODE_SIZE    60000
+#define DEFAULT_MIN_OPTS_INSTR_COUNT  20000
+#define DEFAULT_MIN_OPTS_BB_COUNT     2000
 #define DEFAULT_MIN_OPTS_LV_NUM_COUNT 2000
 #define DEFAULT_MIN_OPTS_LV_REF_COUNT 8000
 
@@ -9826,9 +9921,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         bool genFPopt;   // Can we do frame-pointer-omission optimization?
         bool altJit;     // True if we are an altjit and are compiling this method
 
-#ifdef OPT_CONFIG
-        bool optRepeat; // Repeat optimizer phases k times
-#endif
+        bool optRepeat;          // Repeat optimizer phases k times
+        int  optRepeatIteration; // The current optRepeat iteration: from 0 to optRepeatCount. optRepeatCount can be
+                                 // zero, in which case no optimizations in the set of repeated optimizations are
+                                 // performed. optRepeatIteration will only be zero if optRepeatCount is zero.
+        int  optRepeatCount;     // How many times to repeat. By default, comes from JitConfig.JitOptRepeatCount().
+        bool optRepeatActive;    // `true` if we are in the range of phases being repeated.
 
         bool disAsm;       // Display native code as it is generated
         bool disTesting;   // Display BEGIN METHOD/END METHOD anchors for disasm testing
@@ -10024,9 +10122,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
     const char* devirtualizationDetailToString(CORINFO_DEVIRTUALIZATION_DETAIL detail);
 
+    const char* printfAlloc(const char* format, ...);
+
 #endif // DEBUG
 
-// clang-format off
+    // clang-format off
 #define STRESS_MODES                                                                            \
                                                                                                 \
         STRESS_MODE(NONE)                                                                       \
@@ -10070,13 +10170,14 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         STRESS_MODE(PHYSICAL_PROMOTION) /* Use physical promotion */                            \
         STRESS_MODE(PHYSICAL_PROMOTION_COST)                                                    \
         STRESS_MODE(UNWIND) /* stress unwind info; e.g., create function fragments */           \
+        STRESS_MODE(OPT_REPEAT) /* stress JitOptRepeat */                                       \
                                                                                                 \
         /* After COUNT_VARN, stress level 2 does all of these all the time */                   \
                                                                                                 \
         STRESS_MODE(COUNT_VARN)                                                                 \
                                                                                                 \
         /* "Check" stress areas that can be exhaustively used if we */                          \
-        /*  dont care about performance at all */                                               \
+        /*  don't care about performance at all */                                              \
                                                                                                 \
         STRESS_MODE(FORCE_INLINE) /* Treat every method as AggressiveInlining */                \
         STRESS_MODE(CHK_FLOW_UPDATE)                                                            \
@@ -10095,7 +10196,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         STRESS_MODES
 #undef STRESS_MODE
     };
-// clang-format on
+    // clang-format on
 
 #ifdef DEBUG
     static const LPCWSTR s_compStressModeNamesW[STRESS_COUNT + 1];
@@ -10105,8 +10206,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 #define MAX_STRESS_WEIGHT 100
 
-    bool compStressCompile(compStressArea stressArea, unsigned weightPercentage);
-    bool compStressCompileHelper(compStressArea stressArea, unsigned weightPercentage);
+    bool            compStressCompile(compStressArea stressArea, unsigned weightPercentage);
+    bool            compStressCompileHelper(compStressArea stressArea, unsigned weightPercentage);
     static unsigned compStressAreaHash(compStressArea area);
 
 #ifdef DEBUG
@@ -10176,7 +10277,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         const char* compMethodName;
         const char* compClassName;
         const char* compFullName;
-        double      compPerfScore;
         int         compMethodSuperPMIIndex; // useful when debugging under SuperPMI
 
 #endif // defined(DEBUG) || defined(LATE_DISASM) || DUMP_FLOWGRAPHS
@@ -10211,11 +10311,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         // (2) the code is hot/cold split, and we issued less code than we expected
         // in the cold section (the hot section will always be padded out to compTotalHotCodeSize).
 
-        bool compIsStatic : 1;           // Is the method static (no 'this' pointer)?
-        bool compIsVarArgs : 1;          // Does the method have varargs parameters?
-        bool compInitMem : 1;            // Is the CORINFO_OPT_INIT_LOCALS bit set in the method info options?
-        bool compProfilerCallback : 1;   // JIT inserted a profiler Enter callback
-        bool compPublishStubParam : 1;   // EAX captured in prolog will be available through an intrinsic
+        bool compIsStatic           : 1; // Is the method static (no 'this' pointer)?
+        bool compIsVarArgs          : 1; // Does the method have varargs parameters?
+        bool compInitMem            : 1; // Is the CORINFO_OPT_INIT_LOCALS bit set in the method info options?
+        bool compProfilerCallback   : 1; // JIT inserted a profiler Enter callback
+        bool compPublishStubParam   : 1; // EAX captured in prolog will be available through an intrinsic
         bool compHasNextCallRetAddr : 1; // The NextCallReturnAddress intrinsic is used.
 
         var_types compRetType;       // Return type of the method as declared in IL (including SIMD normalization)
@@ -10227,8 +10327,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         unsigned compArgStackSize; // Incoming argument stack size in bytes
 #endif                             // FEATURE_FASTTAILCALL
 
-        unsigned compRetBuffArg; // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present);
-        int compTypeCtxtArg; // position of hidden param for type context for generic code (CORINFO_CALLCONV_PARAMTYPE)
+        unsigned compRetBuffArg;    // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present);
+        unsigned compTypeCtxtArg;   // position of hidden param for type context for generic code
+                                    // (CORINFO_CALLCONV_PARAMTYPE)
         unsigned       compThisArg; // position of implicit this pointer param (not to be confused with lvaArg0Var)
         unsigned       compILlocalsCount; // Number of vars : args + locals (incl. implicit but not hidden)
         unsigned       compLocalsCount;   // Number of vars : args + locals (incl. implicit and     hidden)
@@ -10304,7 +10405,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         // There are cases where implicit RetBuf argument should be explicitly returned in a register.
         // In such cases the return type is changed to TYP_BYREF and appropriate IR is generated.
         // These cases are:
-        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef TARGET_AMD64
         // 1. on x64 Windows and Unix the address of RetBuf needs to be returned by
         //    methods with hidden RetBufArg in RAX. In such case GT_RETURN is of TYP_BYREF,
@@ -10323,7 +10423,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #endif
         // 3. Windows ARM64 native instance calling convention requires the address of RetBuff
         //    to be returned in x0.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 #if defined(TARGET_ARM64)
         if (TargetOS::IsWindows)
         {
@@ -10334,8 +10433,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
             }
         }
 #endif // TARGET_ARM64
-        // 4. x86 unmanaged calling conventions require the address of RetBuff to be returned in eax.
-        CLANG_FORMAT_COMMENT_ANCHOR;
+       // 4. x86 unmanaged calling conventions require the address of RetBuff to be returned in eax.
 #if defined(TARGET_X86)
         if (info.compCallConv != CorInfoCallConvExtension::Managed)
         {
@@ -10423,7 +10521,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     var_types TypeHandleToVarType(CORINFO_CLASS_HANDLE handle, ClassLayout** pLayout = nullptr);
     var_types TypeHandleToVarType(CorInfoType jitType, CORINFO_CLASS_HANDLE handle, ClassLayout** pLayout = nullptr);
 
-//-------------------------- Global Compiler Data ------------------------------------
+    //-------------------------- Global Compiler Data ------------------------------------
 
 #ifdef DEBUG
 private:
@@ -10450,14 +10548,14 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     unsigned  compHndBBtabCount;      // element count of used elements in EH data array
     unsigned  compHndBBtabAllocCount; // element count of allocated elements in EH data array
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
 
     //-------------------------------------------------------------------------
     //  Tracking of region covered by the monitor in synchronized methods
     void* syncStartEmitCookie; // the emitter cookie for first instruction after the call to MON_ENTER
     void* syncEndEmitCookie;   // the emitter cookie for first instruction after the call to MON_EXIT
 
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
     Phases      mostRecentlyActivePhase; // the most recently active phase
     PhaseChecks activePhaseChecks;       // the currently active phase checks
@@ -10531,8 +10629,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
     //------------ Some utility functions --------------
 
-    void* compGetHelperFtn(CorInfoHelpFunc ftnNum,         /* IN  */
-                           void**          ppIndirection); /* OUT */
+    void* compGetHelperFtn(CorInfoHelpFunc ftnNum, /* IN  */
+                           void**          ppIndirection);  /* OUT */
 
     // Several JIT/EE interface functions return a CorInfoType, and also return a
     // class handle as an out parameter if the type is a value class.  Returns the
@@ -10547,17 +10645,17 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void compDoComponentUnitTestsOnce();
 #endif // DEBUG
 
-    int compCompile(CORINFO_MODULE_HANDLE classPtr,
-                    void**                methodCodePtr,
-                    uint32_t*             methodCodeSize,
-                    JitFlags*             compileFlags);
+    int  compCompile(CORINFO_MODULE_HANDLE classPtr,
+                     void**                methodCodePtr,
+                     uint32_t*             methodCodeSize,
+                     JitFlags*             compileFlags);
     void compCompileFinish();
-    int compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
-                          COMP_HANDLE           compHnd,
-                          CORINFO_METHOD_INFO*  methodInfo,
-                          void**                methodCodePtr,
-                          uint32_t*             methodCodeSize,
-                          JitFlags*             compileFlag);
+    int  compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
+                           COMP_HANDLE           compHnd,
+                           CORINFO_METHOD_INFO*  methodInfo,
+                           void**                methodCodePtr,
+                           uint32_t*             methodCodeSize,
+                           JitFlags*             compileFlag);
 
     ArenaAllocator* compGetArenaAllocator();
 
@@ -10609,6 +10707,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         unsigned m_returnSpCheck;
         unsigned m_callSpCheck;
         unsigned m_simdUserForcesDep;
+        unsigned m_nonStandardParameter;
         unsigned m_liveInOutHndlr;
         unsigned m_depField;
         unsigned m_noRegVars;
@@ -10637,6 +10736,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     static EnregisterStats s_enregisterStats;
 #endif // TRACK_ENREG_STATS
 
+    JitMetrics Metrics;
+
     bool compIsForInlining() const;
     bool compDonotInline();
 
@@ -10645,10 +10746,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     static unsigned char compGetJitDefaultFill(Compiler* comp);
 
     const char* compLocalVarName(unsigned varNum, unsigned offs);
-    VarName compVarName(regNumber reg, bool isFloatReg = false);
+    VarName     compVarName(regNumber reg, bool isFloatReg = false);
     const char* compFPregVarName(unsigned fpReg, bool displayVar = false);
-    void compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP);
-    void compDspSrcLinesByLineNum(unsigned line, bool seek = false);
+    void        compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP);
+    void        compDspSrcLinesByLineNum(unsigned line, bool seek = false);
 #endif // DEBUG
     const char* compRegNameForSize(regNumber reg, size_t size);
     const char* compRegVarName(regNumber reg, bool displayVar = false, bool isFloatReg = false);
@@ -10820,8 +10921,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void verInitCurrentState();
     void verResetCurrentState(BasicBlock* block, EntryState* currentState);
 
-    void verConvertBBToThrowVerificationException(BasicBlock* block DEBUGARG(bool logMsg));
-    void verHandleVerificationFailure(BasicBlock* block DEBUGARG(bool logMsg));
+    void     verConvertBBToThrowVerificationException(BasicBlock* block DEBUGARG(bool logMsg));
+    void     verHandleVerificationFailure(BasicBlock* block DEBUGARG(bool logMsg));
     typeInfo verMakeTypeInfoForLocal(unsigned lclNum);
     typeInfo verMakeTypeInfo(CORINFO_CLASS_HANDLE clsHnd); // converts from jit type representation to typeInfo
     typeInfo verMakeTypeInfo(CorInfoType          ciType,
@@ -10918,8 +11019,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     static fgWalkPreFn gsMarkPtrsAndAssignGroups; // Shadow param analysis tree-walk
     static fgWalkPreFn gsReplaceShadowParams;     // Shadow param replacement tree-walk
 
-#define DEFAULT_MAX_INLINE_SIZE 100 // Methods with >  DEFAULT_MAX_INLINE_SIZE IL bytes will never be inlined.
-                                    // This can be overwritten by setting DOTNET_JITInlineSize env variable.
+#define DEFAULT_MAX_INLINE_SIZE                                                                                        \
+    100 // Methods with >  DEFAULT_MAX_INLINE_SIZE IL bytes will never be inlined.
+        // This can be overwritten by setting DOTNET_JITInlineSize env variable.
 
 #define DEFAULT_MAX_INLINE_DEPTH 20 // Methods at more than this level deep will not be inlined
 
@@ -11103,7 +11205,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 #endif // defined(UNIX_AMD64_ABI)
 
-    void fgMorphMultiregStructArgs(GenTreeCall* call);
+    void     fgMorphMultiregStructArgs(GenTreeCall* call);
     GenTree* fgMorphMultiregStructArg(CallArg* arg);
 
     bool killGCRefs(GenTree* tree);
@@ -11258,7 +11360,9 @@ class GenTreeVisitor
     Compiler*            m_compiler;
     ArrayStack<GenTree*> m_ancestors;
 
-    GenTreeVisitor(Compiler* compiler) : m_compiler(compiler), m_ancestors(compiler->getAllocator(CMK_ArrayStack))
+    GenTreeVisitor(Compiler* compiler)
+        : m_compiler(compiler)
+        , m_ancestors(compiler->getAllocator(CMK_ArrayStack))
     {
         assert(compiler != nullptr);
 
@@ -11338,9 +11442,9 @@ class GenTreeVisitor
             case GT_START_NONGC:
             case GT_START_PREEMPTGC:
             case GT_PROF_HOOK:
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
             case GT_END_LFIN:
-#endif // !FEATURE_EH_FUNCLETS
+#endif // !FEATURE_EH_WINDOWS_X86
             case GT_PHI_ARG:
             case GT_JMPTABLE:
             case GT_PHYSREG:
@@ -11349,6 +11453,7 @@ class GenTreeVisitor
             case GT_PINVOKE_EPILOG:
             case GT_IL_OFFSET:
             case GT_NOP:
+            case GT_SWIFT_ERROR:
                 break;
 
             // Lclvar unary operators
@@ -11476,28 +11581,6 @@ class GenTreeVisitor
                 break;
             }
 
-            case GT_STORE_DYN_BLK:
-            {
-                GenTreeStoreDynBlk* const dynBlock = node->AsStoreDynBlk();
-
-                result = WalkTree(&dynBlock->gtOp1, dynBlock);
-                if (result == fgWalkResult::WALK_ABORT)
-                {
-                    return result;
-                }
-                result = WalkTree(&dynBlock->gtOp2, dynBlock);
-                if (result == fgWalkResult::WALK_ABORT)
-                {
-                    return result;
-                }
-                result = WalkTree(&dynBlock->gtDynamicSize, dynBlock);
-                if (result == fgWalkResult::WALK_ABORT)
-                {
-                    return result;
-                }
-                break;
-            }
-
             case GT_CALL:
             {
                 GenTreeCall* const call = node->AsCall();
@@ -11702,7 +11785,8 @@ class DomTreeVisitor
 protected:
     Compiler* m_compiler;
 
-    DomTreeVisitor(Compiler* compiler) : m_compiler(compiler)
+    DomTreeVisitor(Compiler* compiler)
+        : m_compiler(compiler)
     {
     }
 
@@ -11791,7 +11875,8 @@ class EHClauses
         EHblkDsc* m_ehDsc;
 
     public:
-        iterator(EHblkDsc* ehDsc) : m_ehDsc(ehDsc)
+        iterator(EHblkDsc* ehDsc)
+            : m_ehDsc(ehDsc)
         {
         }
 
@@ -11813,7 +11898,9 @@ class EHClauses
     };
 
 public:
-    EHClauses(Compiler* comp) : m_begin(comp->compHndBBtab), m_end(comp->compHndBBtab + comp->compHndBBtabCount)
+    EHClauses(Compiler* comp)
+        : m_begin(comp->compHndBBtab)
+        , m_end(comp->compHndBBtab + comp->compHndBBtabCount)
     {
         assert((m_begin != nullptr) || (m_begin == m_end));
     }
@@ -11850,7 +11937,9 @@ class StringPrinter
 
 public:
     StringPrinter(CompAllocator alloc, char* buffer = nullptr, size_t bufferMax = 0)
-        : m_alloc(alloc), m_buffer(buffer), m_bufferMax(bufferMax)
+        : m_alloc(alloc)
+        , m_buffer(buffer)
+        , m_bufferMax(bufferMax)
     {
         if ((m_buffer == nullptr) || (m_bufferMax == 0))
         {
diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp
index 3893c2efd0a2..0c61cfe83299 100644
--- a/src/coreclr/jit/compiler.hpp
+++ b/src/coreclr/jit/compiler.hpp
@@ -36,7 +36,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 inline bool getInlinePInvokeEnabled()
 {
 #ifdef DEBUG
-    return JitConfig.JitPInvokeEnabled() && !JitConfig.StressCOMCall();
+    return JitConfig.JitPInvokeEnabled();
 #else
     return true;
 #endif
@@ -65,26 +65,6 @@ inline UINT32 forceCastToUInt32(double d)
     return u;
 }
 
-enum RoundLevel
-{
-    ROUND_NEVER     = 0, // Never round
-    ROUND_CMP_CONST = 1, // Round values compared against constants
-    ROUND_CMP       = 2, // Round comparands and return values
-    ROUND_ALWAYS    = 3, // Round always
-
-    COUNT_ROUND_LEVEL,
-    DEFAULT_ROUND_LEVEL = ROUND_NEVER
-};
-
-inline RoundLevel getRoundFloatLevel()
-{
-#ifdef DEBUG
-    return (RoundLevel)JitConfig.JitRoundFloat();
-#else
-    return DEFAULT_ROUND_LEVEL;
-#endif
-}
-
 /*****************************************************************************/
 /*****************************************************************************
  *
@@ -98,9 +78,9 @@ inline T genFindLowestBit(T value)
 }
 
 /*****************************************************************************
-*
-*  Return true if the given value has exactly zero or one bits set.
-*/
+ *
+ *  Return true if the given value has exactly zero or one bits set.
+ */
 
 template <typename T>
 inline bool genMaxOneBit(T value)
@@ -109,9 +89,9 @@ inline bool genMaxOneBit(T value)
 }
 
 /*****************************************************************************
-*
-*  Return true if the given value has exactly one bit set.
-*/
+ *
+ *  Return true if the given value has exactly one bit set.
+ */
 
 template <typename T>
 inline bool genExactlyOneBit(T value)
@@ -294,6 +274,20 @@ class Dumpable
     virtual void dump(FILE* output) = 0;
 };
 
+// Helper class record and display a simple single value.
+class Counter : public Dumpable
+{
+public:
+    int64_t Value;
+
+    Counter(int64_t initialValue = 0)
+        : Value(initialValue)
+    {
+    }
+
+    void dump(FILE* output);
+};
+
 // Helper class to record and display a histogram of different values.
 // Usage like:
 // static unsigned s_buckets[] = { 1, 2, 5, 10, 0 }; // Must have terminating 0
@@ -339,7 +333,8 @@ class Histogram : public Dumpable
 class NodeCounts : public Dumpable
 {
 public:
-    NodeCounts() : m_counts()
+    NodeCounts()
+        : m_counts()
     {
     }
 
@@ -551,7 +546,7 @@ BasicBlockVisit BasicBlock::VisitEHEnclosedHandlerSecondPassSuccs(Compiler* comp
 //   3. As part of two pass EH, control may bypass filters and flow directly to
 //   filter-handlers
 //
-template <bool         skipJumpDest, typename TFunc>
+template <bool skipJumpDest, typename TFunc>
 static BasicBlockVisit VisitEHSuccs(Compiler* comp, BasicBlock* block, TFunc func)
 {
     if (!block->HasPotentialEHSuccs(comp))
@@ -644,34 +639,34 @@ BasicBlockVisit BasicBlock::VisitAllSuccs(Compiler* comp, TFunc func)
             {
                 for (unsigned i = 0; i < bbEhfTargets->bbeCount; i++)
                 {
-                    RETURN_ON_ABORT(func(bbEhfTargets->bbeSuccs[i]));
+                    RETURN_ON_ABORT(func(bbEhfTargets->bbeSuccs[i]->getDestinationBlock()));
                 }
             }
 
             return VisitEHSuccs(comp, func);
 
         case BBJ_CALLFINALLY:
-            RETURN_ON_ABORT(func(bbTarget));
+            RETURN_ON_ABORT(func(GetTarget()));
             return ::VisitEHSuccs</* skipJumpDest */ true, TFunc>(comp, this, func);
 
         case BBJ_CALLFINALLYRET:
             // These are "pseudo-blocks" and control never actually flows into them
             // (codegen directly jumps to its successor after finally calls).
-            return func(bbTarget);
+            return func(GetTarget());
 
         case BBJ_EHCATCHRET:
         case BBJ_EHFILTERRET:
         case BBJ_LEAVE:
         case BBJ_ALWAYS:
-            RETURN_ON_ABORT(func(bbTarget));
+            RETURN_ON_ABORT(func(GetTarget()));
             return VisitEHSuccs(comp, func);
 
         case BBJ_COND:
-            RETURN_ON_ABORT(func(bbFalseTarget));
+            RETURN_ON_ABORT(func(GetFalseTarget()));
 
-            if (bbTrueTarget != bbFalseTarget)
+            if (!TrueEdgeIs(GetFalseEdge()))
             {
-                RETURN_ON_ABORT(func(bbTrueTarget));
+                RETURN_ON_ABORT(func(GetTrueTarget()));
             }
 
             return VisitEHSuccs(comp, func);
@@ -681,7 +676,7 @@ BasicBlockVisit BasicBlock::VisitAllSuccs(Compiler* comp, TFunc func)
             Compiler::SwitchUniqueSuccSet sd = comp->GetDescriptorForSwitch(this);
             for (unsigned i = 0; i < sd.numDistinctSuccs; i++)
             {
-                RETURN_ON_ABORT(func(sd.nonDuplicates[i]));
+                RETURN_ON_ABORT(func(sd.nonDuplicates[i]->getDestinationBlock()));
             }
 
             return VisitEHSuccs(comp, func);
@@ -719,7 +714,7 @@ BasicBlockVisit BasicBlock::VisitRegularSuccs(Compiler* comp, TFunc func)
             {
                 for (unsigned i = 0; i < bbEhfTargets->bbeCount; i++)
                 {
-                    RETURN_ON_ABORT(func(bbEhfTargets->bbeSuccs[i]));
+                    RETURN_ON_ABORT(func(bbEhfTargets->bbeSuccs[i]->getDestinationBlock()));
                 }
             }
 
@@ -731,14 +726,14 @@ BasicBlockVisit BasicBlock::VisitRegularSuccs(Compiler* comp, TFunc func)
         case BBJ_EHFILTERRET:
         case BBJ_LEAVE:
         case BBJ_ALWAYS:
-            return func(bbTarget);
+            return func(GetTarget());
 
         case BBJ_COND:
-            RETURN_ON_ABORT(func(bbFalseTarget));
+            RETURN_ON_ABORT(func(GetFalseTarget()));
 
-            if (bbTrueTarget != bbFalseTarget)
+            if (!TrueEdgeIs(GetFalseEdge()))
             {
-                RETURN_ON_ABORT(func(bbTrueTarget));
+                RETURN_ON_ABORT(func(GetTrueTarget()));
             }
 
             return BasicBlockVisit::Continue;
@@ -748,7 +743,7 @@ BasicBlockVisit BasicBlock::VisitRegularSuccs(Compiler* comp, TFunc func)
             Compiler::SwitchUniqueSuccSet sd = comp->GetDescriptorForSwitch(this);
             for (unsigned i = 0; i < sd.numDistinctSuccs; i++)
             {
-                RETURN_ON_ABORT(func(sd.nonDuplicates[i]));
+                RETURN_ON_ABORT(func(sd.nonDuplicates[i]->getDestinationBlock()));
             }
 
             return BasicBlockVisit::Continue;
@@ -794,8 +789,6 @@ inline bool BasicBlock::HasPotentialEHSuccs(Compiler* comp)
     return hndDesc->InFilterRegionBBRange(this);
 }
 
-#if defined(FEATURE_EH_FUNCLETS)
-
 /*****************************************************************************
  *  Get the FuncInfoDsc for the funclet we are currently generating code for.
  *  This is only valid during codegen.
@@ -803,7 +796,14 @@ inline bool BasicBlock::HasPotentialEHSuccs(Compiler* comp)
  */
 inline FuncInfoDsc* Compiler::funCurrentFunc()
 {
-    return funGetFunc(compCurrFuncIdx);
+    if (UsesFunclets())
+    {
+        return funGetFunc(compCurrFuncIdx);
+    }
+    else
+    {
+        return &compFuncInfoRoot;
+    }
 }
 
 /*****************************************************************************
@@ -813,10 +813,17 @@ inline FuncInfoDsc* Compiler::funCurrentFunc()
  */
 inline void Compiler::funSetCurrentFunc(unsigned funcIdx)
 {
-    assert(fgFuncletsCreated);
-    assert(FitsIn<unsigned short>(funcIdx));
-    noway_assert(funcIdx < compFuncInfoCount);
-    compCurrFuncIdx = (unsigned short)funcIdx;
+    if (UsesFunclets())
+    {
+        assert(fgFuncletsCreated);
+        assert(FitsIn<unsigned short>(funcIdx));
+        noway_assert(funcIdx < compFuncInfoCount);
+        compCurrFuncIdx = (unsigned short)funcIdx;
+    }
+    else
+    {
+        assert(funcIdx == 0);
+    }
 }
 
 /*****************************************************************************
@@ -826,9 +833,17 @@ inline void Compiler::funSetCurrentFunc(unsigned funcIdx)
  */
 inline FuncInfoDsc* Compiler::funGetFunc(unsigned funcIdx)
 {
-    assert(fgFuncletsCreated);
-    assert(funcIdx < compFuncInfoCount);
-    return &compFuncInfos[funcIdx];
+    if (UsesFunclets())
+    {
+        assert(fgFuncletsCreated);
+        assert(funcIdx < compFuncInfoCount);
+        return &compFuncInfos[funcIdx];
+    }
+    else
+    {
+        assert(funcIdx == 0);
+        return &compFuncInfoRoot;
+    }
 }
 
 /*****************************************************************************
@@ -841,71 +856,33 @@ inline FuncInfoDsc* Compiler::funGetFunc(unsigned funcIdx)
  */
 inline unsigned Compiler::funGetFuncIdx(BasicBlock* block)
 {
-    assert(fgFuncletsCreated);
-    assert(block->HasFlag(BBF_FUNCLET_BEG));
-
-    EHblkDsc*    eh      = ehGetDsc(block->getHndIndex());
-    unsigned int funcIdx = eh->ebdFuncIndex;
-    if (eh->ebdHndBeg != block)
+    if (UsesFunclets())
     {
-        // If this is a filter EH clause, but we want the funclet
-        // for the filter (not the filter handler), it is the previous one
-        noway_assert(eh->HasFilter());
-        noway_assert(eh->ebdFilter == block);
-        assert(funGetFunc(funcIdx)->funKind == FUNC_HANDLER);
-        assert(funGetFunc(funcIdx)->funEHIndex == funGetFunc(funcIdx - 1)->funEHIndex);
-        assert(funGetFunc(funcIdx - 1)->funKind == FUNC_FILTER);
-        funcIdx--;
-    }
+        assert(fgFuncletsCreated);
+        assert(block->HasFlag(BBF_FUNCLET_BEG));
 
-    return funcIdx;
-}
-
-#else // !FEATURE_EH_FUNCLETS
-
-/*****************************************************************************
- *  Get the FuncInfoDsc for the funclet we are currently generating code for.
- *  This is only valid during codegen.  For non-funclet platforms, this is
- *  always the root function.
- *
- */
-inline FuncInfoDsc* Compiler::funCurrentFunc()
-{
-    return &compFuncInfoRoot;
-}
-
-/*****************************************************************************
- *  Change which funclet we are currently generating code for.
- *  This is only valid after funclets are created.
- *
- */
-inline void Compiler::funSetCurrentFunc(unsigned funcIdx)
-{
-    assert(funcIdx == 0);
-}
-
-/*****************************************************************************
- *  Get the FuncInfoDsc for the givven funclet.
- *  This is only valid after funclets are created.
- *
- */
-inline FuncInfoDsc* Compiler::funGetFunc(unsigned funcIdx)
-{
-    assert(funcIdx == 0);
-    return &compFuncInfoRoot;
-}
+        EHblkDsc*    eh      = ehGetDsc(block->getHndIndex());
+        unsigned int funcIdx = eh->ebdFuncIndex;
+        if (eh->ebdHndBeg != block)
+        {
+            // If this is a filter EH clause, but we want the funclet
+            // for the filter (not the filter handler), it is the previous one
+            noway_assert(eh->HasFilter());
+            noway_assert(eh->ebdFilter == block);
+            assert(funGetFunc(funcIdx)->funKind == FUNC_HANDLER);
+            assert(funGetFunc(funcIdx)->funEHIndex == funGetFunc(funcIdx - 1)->funEHIndex);
+            assert(funGetFunc(funcIdx - 1)->funKind == FUNC_FILTER);
+            funcIdx--;
+        }
 
-/*****************************************************************************
- *  No funclets, so always 0.
- *
- */
-inline unsigned Compiler::funGetFuncIdx(BasicBlock* block)
-{
-    return 0;
+        return funcIdx;
+    }
+    else
+    {
+        return 0;
+    }
 }
 
-#endif // !FEATURE_EH_FUNCLETS
-
 //------------------------------------------------------------------------------
 // genRegNumFromMask : Maps a single register mask to a register number.
 //
@@ -1282,8 +1259,8 @@ inline Statement* Compiler::gtNewStmt(GenTree* expr, const DebugInfo& di)
 inline GenTree* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1)
 {
     assert((GenTree::OperKind(oper) & (GTK_UNOP | GTK_BINOP)) != 0);
-    assert((GenTree::OperKind(oper) & GTK_EXOP) ==
-           0); // Can't use this to construct any types that extend unary/binary operator.
+    assert((GenTree::OperKind(oper) & GTK_EXOP) == 0); // Can't use this to construct any types that extend unary/binary
+                                                       // operator.
     assert(op1 != nullptr || oper == GT_RETFILT || (oper == GT_RETURN && type == TYP_VOID));
 
     GenTree* node = new (this, oper) GenTreeOp(oper, type, op1, nullptr);
@@ -1329,7 +1306,7 @@ inline GenTreeIntCon* Compiler::gtNewIconHandleNode(size_t value, GenTreeFlags f
     node = new (this, LargeOpOpcode())
         GenTreeIntCon(gtGetTypeForIconFlags(flags), value, fields DEBUGARG(/*largeNode*/ true));
 #else
-    node             = new (this, GT_CNS_INT) GenTreeIntCon(gtGetTypeForIconFlags(flags), value, fields);
+    node = new (this, GT_CNS_INT) GenTreeIntCon(gtGetTypeForIconFlags(flags), value, fields);
 #endif
     node->gtFlags |= flags;
     return node;
@@ -2432,7 +2409,7 @@ inline bool Compiler::lvaReportParamTypeArg()
 {
     if (info.compMethodInfo->options & (CORINFO_GENERICS_CTXT_FROM_METHODDESC | CORINFO_GENERICS_CTXT_FROM_METHODTABLE))
     {
-        assert(info.compTypeCtxtArg != -1);
+        assert(info.compTypeCtxtArg != BAD_VAR_NUM);
 
         // If the VM requires us to keep the generics context alive and report it (for example, if any catch
         // clause catches a type that uses a generic parameter of this method) this flag will be set.
@@ -2530,8 +2507,8 @@ inline
             assert(varDsc->lvIsParam);
 #endif // UNIX_AMD64_ABI
 #else  // !TARGET_AMD64
-            // For other targets, a stack parameter that is enregistered or prespilled
-            // for profiling on ARM will have a stack location.
+       // For other targets, a stack parameter that is enregistered or prespilled
+       // for profiling on ARM will have a stack location.
             assert((varDsc->lvIsParam && !varDsc->lvIsRegArg) || isPrespilledArg);
 #endif // !TARGET_AMD64
         }
@@ -2602,23 +2579,22 @@ inline
             if (!FPbased)
             {
                 // Worst case stack based offset.
-                CLANG_FORMAT_COMMENT_ANCHOR;
 #if FEATURE_FIXED_OUT_ARGS
                 int outGoingArgSpaceSize = lvaOutgoingArgSpaceSize;
 #else
                 int outGoingArgSpaceSize = 0;
 #endif
-                varOffset = outGoingArgSpaceSize + max(-varNum * TARGET_POINTER_SIZE, (int)lvaGetMaxSpillTempSize());
+                varOffset =
+                    outGoingArgSpaceSize + max(-varNum * (int)TARGET_POINTER_SIZE, (int)lvaGetMaxSpillTempSize());
             }
             else
             {
                 // Worst case FP based offset.
-                CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef TARGET_ARM
                 varOffset = codeGen->genCallerSPtoInitialSPdelta() - codeGen->genCallerSPtoFPdelta();
 #else
-                varOffset                = -(codeGen->genTotalFrameSize());
+                varOffset = -(codeGen->genTotalFrameSize());
 #endif
             }
         }
@@ -2672,7 +2648,7 @@ inline
         *pBaseReg = REG_SPBASE;
     }
 #else
-    *pFPbased                            = FPbased;
+    *pFPbased = FPbased;
 #endif
 
     return varOffset;
@@ -2702,7 +2678,6 @@ inline bool Compiler::lvaIsOriginalThisArg(unsigned varNum)
     {
         LclVarDsc* varDsc = lvaGetDesc(varNum);
         // Should never write to or take the address of the original 'this' arg
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifndef JIT32_GCENCODER
         // With the general encoder/decoder, when the original 'this' arg is needed as a generics context param, we
@@ -2764,13 +2739,13 @@ inline unsigned Compiler::compMapILargNum(unsigned ILargNum)
         assert(ILargNum < info.compLocalsCount); // compLocals count already adjusted.
     }
 
-    if (ILargNum >= (unsigned)info.compTypeCtxtArg)
+    if (ILargNum >= info.compTypeCtxtArg)
     {
         ILargNum++;
         assert(ILargNum < info.compLocalsCount); // compLocals count already adjusted.
     }
 
-    if (ILargNum >= (unsigned)lvaVarargsHandleArg)
+    if (ILargNum >= lvaVarargsHandleArg)
     {
         ILargNum++;
         assert(ILargNum < info.compLocalsCount); // compLocals count already adjusted.
@@ -2808,8 +2783,8 @@ inline var_types Compiler::mangleVarArgsType(var_types type)
 
         if (varTypeIsSIMD(type))
         {
-            // Vectors also get passed in int registers. Use TYP_INT.
-            return TYP_INT;
+            // Vectors should be considered like passing a struct
+            return TYP_STRUCT;
         }
     }
 #endif // defined(TARGET_ARMARCH)
@@ -3156,6 +3131,24 @@ inline bool Compiler::fgIsBigOffset(size_t offset)
     return (offset > compMaxUncheckedOffsetForNullObject);
 }
 
+//------------------------------------------------------------------------
+// IsValidLclAddr: Can the given local address be represented as "LCL_FLD_ADDR"?
+//
+// Local address nodes cannot point beyond the local and can only store
+// 16 bits worth of offset.
+//
+// Arguments:
+//    lclNum - The local's number
+//    offset - The address' offset
+//
+// Return Value:
+//    Whether "LCL_FLD_ADDR<lclNum> [+offset]" would be valid IR.
+//
+inline bool Compiler::IsValidLclAddr(unsigned lclNum, unsigned offset)
+{
+    return (offset < UINT16_MAX) && (offset < lvaLclExactSize(lclNum));
+}
+
 /*
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -3265,11 +3258,11 @@ inline int getJitStressLevel()
  * we return the fixed return buffer register
  */
 
-inline regNumber genMapIntRegArgNumToRegNum(unsigned argNum)
+inline regNumber genMapIntRegArgNumToRegNum(unsigned argNum, CorInfoCallConvExtension callConv)
 {
-    if (hasFixedRetBuffReg() && (argNum == theFixedRetBuffArgNum()))
+    if (hasFixedRetBuffReg(callConv) && (argNum == theFixedRetBuffArgNum(callConv)))
     {
-        return theFixedRetBuffReg();
+        return theFixedRetBuffReg(callConv);
     }
 
     assert(argNum < ArrLen(intArgRegs));
@@ -3289,7 +3282,7 @@ inline regNumber genMapFloatRegArgNumToRegNum(unsigned argNum)
 #endif
 }
 
-__forceinline regNumber genMapRegArgNumToRegNum(unsigned argNum, var_types type)
+__forceinline regNumber genMapRegArgNumToRegNum(unsigned argNum, var_types type, CorInfoCallConvExtension callConv)
 {
     if (varTypeUsesFloatArgReg(type))
     {
@@ -3297,7 +3290,7 @@ __forceinline regNumber genMapRegArgNumToRegNum(unsigned argNum, var_types type)
     }
     else
     {
-        return genMapIntRegArgNumToRegNum(argNum);
+        return genMapIntRegArgNumToRegNum(argNum, callConv);
     }
 }
 
@@ -3352,9 +3345,9 @@ __forceinline regMaskTP genMapArgNumToRegMask(unsigned argNum, var_types type)
  * If we have a fixed return buffer register we return theFixedRetBuffArgNum
  */
 
-inline unsigned genMapIntRegNumToRegArgNum(regNumber regNum)
+inline unsigned genMapIntRegNumToRegArgNum(regNumber regNum, CorInfoCallConvExtension callConv)
 {
-    assert(genRegMask(regNum) & fullIntArgRegMask());
+    assert(genRegMask(regNum) & fullIntArgRegMask(callConv));
 
     switch (regNum)
     {
@@ -3390,9 +3383,9 @@ inline unsigned genMapIntRegNumToRegArgNum(regNumber regNum)
 #endif
         default:
             // Check for the Arm64 fixed return buffer argument register
-            if (hasFixedRetBuffReg() && (regNum == theFixedRetBuffReg()))
+            if (hasFixedRetBuffReg(callConv) && (regNum == theFixedRetBuffReg(callConv)))
             {
-                return theFixedRetBuffArgNum();
+                return theFixedRetBuffArgNum(callConv);
             }
             else
             {
@@ -3450,7 +3443,7 @@ inline unsigned genMapFloatRegNumToRegArgNum(regNumber regNum)
 #endif // !arm
 }
 
-inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type)
+inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type, CorInfoCallConvExtension callConv)
 {
     if (varTypeUsesFloatArgReg(type))
     {
@@ -3458,7 +3451,7 @@ inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type)
     }
     else
     {
-        return genMapIntRegNumToRegArgNum(regNum);
+        return genMapIntRegNumToRegArgNum(regNum, callConv);
     }
 }
 
@@ -3559,8 +3552,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 /*****************************************************************************
  *
- *  The following resets the value assignment table
- *  used only during local assertion prop
+ *  The following resets the assertions table used only during local assertion prop
  */
 
 inline void Compiler::optAssertionReset(AssertionIndex limit)
@@ -3613,7 +3605,7 @@ inline void Compiler::optAssertionReset(AssertionIndex limit)
 
 /*****************************************************************************
  *
- *  The following removes the i-th entry in the value assignment table
+ *  The following removes the i-th entry in the assertions table
  *  used only during local assertion prop
  */
 
@@ -4107,9 +4099,7 @@ bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum)
     result = result || (varNum == lvaOutgoingArgSpaceVar);
 #endif
 
-#if defined(FEATURE_EH_FUNCLETS)
     result = result || (varNum == lvaPSPSym);
-#endif
 
     return result;
 }
@@ -4226,9 +4216,9 @@ void GenTree::VisitOperands(TVisitor visitor)
         case GT_START_NONGC:
         case GT_START_PREEMPTGC:
         case GT_PROF_HOOK:
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
         case GT_END_LFIN:
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
         case GT_PHI_ARG:
         case GT_JMPTABLE:
         case GT_PHYSREG:
@@ -4237,6 +4227,7 @@ void GenTree::VisitOperands(TVisitor visitor)
         case GT_PINVOKE_EPILOG:
         case GT_IL_OFFSET:
         case GT_NOP:
+        case GT_SWIFT_ERROR:
             return;
 
         // Unary operators with an optional operand
@@ -4352,21 +4343,6 @@ void GenTree::VisitOperands(TVisitor visitor)
             return;
         }
 
-        case GT_STORE_DYN_BLK:
-        {
-            GenTreeStoreDynBlk* const dynBlock = this->AsStoreDynBlk();
-            if (visitor(dynBlock->gtOp1) == VisitResult::Abort)
-            {
-                return;
-            }
-            if (visitor(dynBlock->gtOp2) == VisitResult::Abort)
-            {
-                return;
-            }
-            visitor(dynBlock->gtDynamicSize);
-            return;
-        }
-
         case GT_CALL:
         {
             GenTreeCall* const call = this->AsCall();
@@ -4788,7 +4764,6 @@ unsigned Compiler::fgRunDfs(VisitPreorder visitPreorder, VisitPostorder visitPos
     ArrayStack<AllSuccessorEnumerator> blocks(getAllocator(CMK_DepthFirstSearch));
 
     auto dfsFrom = [&](BasicBlock* firstBB) {
-
         BitVecOps::AddElemD(&traits, visited, firstBB->bbNum);
         blocks.Emplace(this, firstBB);
         visitPreorder(firstBB, preOrderIndex++);
@@ -4814,7 +4789,6 @@ unsigned Compiler::fgRunDfs(VisitPreorder visitPreorder, VisitPostorder visitPos
                 visitPostorder(block, postOrderIndex++);
             }
         }
-
     };
 
     dfsFrom(fgFirstBB);
@@ -4859,7 +4833,7 @@ template <typename TFunc>
 BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksReversePostOrder(TFunc func)
 {
     BitVecTraits traits(m_blocksSize, m_dfsTree->GetCompiler());
-    bool result = BitVecOps::VisitBits(&traits, m_blocks, [=](unsigned index) {
+    bool         result = BitVecOps::VisitBits(&traits, m_blocks, [=](unsigned index) {
         // head block rpo index = PostOrderCount - 1 - headPreOrderIndex
         // loop block rpo index = head block rpoIndex + index
         // loop block po index = PostOrderCount - 1 - loop block rpo index
@@ -4891,7 +4865,7 @@ template <typename TFunc>
 BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksPostOrder(TFunc func)
 {
     BitVecTraits traits(m_blocksSize, m_dfsTree->GetCompiler());
-    bool result = BitVecOps::VisitBitsReverse(&traits, m_blocks, [=](unsigned index) {
+    bool         result = BitVecOps::VisitBitsReverse(&traits, m_blocks, [=](unsigned index) {
         unsigned poIndex = m_header->bbPostorderNum - index;
         assert(poIndex < m_dfsTree->GetPostOrderCount());
         return func(m_dfsTree->GetPostOrder(poIndex)) == BasicBlockVisit::Continue;
@@ -4974,6 +4948,51 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksLexical(TFunc func)
     return BasicBlockVisit::Continue;
 }
 
+//------------------------------------------------------------------------------
+// FlowGraphNaturalLoop::VisitRegularExitBlocks: Visit non-handler blocks that
+// are outside the loop but that may have regular predecessors inside the loop.
+//
+// Type parameters:
+//   TFunc - Callback functor type
+//
+// Arguments:
+//   func - Callback functor that takes a BasicBlock* and returns a
+//   BasicBlockVisit.
+//
+// Returns:
+//   BasicBlockVisit that indicated whether the visit was aborted by the
+//   callback or whether all blocks were visited.
+//
+// Remarks:
+//   Note that no handler begins are visited by this function, even if they
+//   have regular predecessors inside the loop (for example, finally handlers
+//   can have regular BBJ_CALLFINALLY predecessors inside the loop). This
+//   choice is motivated by the fact that such handlers will also show up as
+//   exceptional exit blocks that must always be handled specially by client
+//   code regardless.
+//
+template <typename TFunc>
+BasicBlockVisit FlowGraphNaturalLoop::VisitRegularExitBlocks(TFunc func)
+{
+    Compiler* comp = m_dfsTree->GetCompiler();
+
+    BitVecTraits traits = m_dfsTree->PostOrderTraits();
+    BitVec       visited(BitVecOps::MakeEmpty(&traits));
+
+    for (FlowEdge* edge : ExitEdges())
+    {
+        BasicBlock* exit = edge->getDestinationBlock();
+        assert(m_dfsTree->Contains(exit) && !ContainsBlock(exit));
+        if (!comp->bbIsHandlerBeg(exit) && BitVecOps::TryAddElemD(&traits, visited, exit->bbPostorderNum) &&
+            (func(exit) == BasicBlockVisit::Abort))
+        {
+            return BasicBlockVisit::Abort;
+        }
+    }
+
+    return BasicBlockVisit::Continue;
+}
+
 /*****************************************************************************/
 #endif //_COMPILER_HPP_
 /*****************************************************************************/
diff --git a/src/coreclr/jit/compilerbitsettraits.h b/src/coreclr/jit/compilerbitsettraits.h
index 02223b1ecedf..965ffac55465 100644
--- a/src/coreclr/jit/compilerbitsettraits.h
+++ b/src/coreclr/jit/compilerbitsettraits.h
@@ -107,7 +107,9 @@ struct BitVecTraits
     Compiler* comp;
 
 public:
-    BitVecTraits(unsigned size, Compiler* comp) : size(size), comp(comp)
+    BitVecTraits(unsigned size, Compiler* comp)
+        : size(size)
+        , comp(comp)
     {
         const unsigned elemBits = 8 * sizeof(size_t);
         arraySize               = roundUp(size, elemBits) / elemBits;
diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h
index 835d85f798d2..0221eadb0674 100644
--- a/src/coreclr/jit/compmemkind.h
+++ b/src/coreclr/jit/compmemkind.h
@@ -10,6 +10,7 @@
 // and the corresponding array of string names for these enum members.
 
 // clang-format off
+CompMemKindMacro(ABI)
 CompMemKindMacro(AssertionProp)
 CompMemKindMacro(ASTNode)
 CompMemKindMacro(InstDesc)
@@ -50,6 +51,7 @@ CompMemKindMacro(LoopOpt)
 CompMemKindMacro(LoopClone)
 CompMemKindMacro(LoopUnroll)
 CompMemKindMacro(LoopHoist)
+CompMemKindMacro(LoopIVOpts)
 CompMemKindMacro(Unknown)
 CompMemKindMacro(RangeCheck)
 CompMemKindMacro(CopyProp)
diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h
index c69c7309a8c2..0fb92c9f33c0 100644
--- a/src/coreclr/jit/compphases.h
+++ b/src/coreclr/jit/compphases.h
@@ -42,6 +42,7 @@ CompPhaseNameMacro(PHASE_CLONE_FINALLY,              "Clone finally",
 CompPhaseNameMacro(PHASE_UPDATE_FINALLY_FLAGS,       "Update finally target flags",    false, -1, false)
 CompPhaseNameMacro(PHASE_EARLY_UPDATE_FLOW_GRAPH,    "Update flow graph early pass",   false, -1, false)
 CompPhaseNameMacro(PHASE_DFS_BLOCKS,                 "DFS blocks and remove dead code",false, -1, false)
+CompPhaseNameMacro(PHASE_DFS_BLOCKS2,                "DFS blocks and remove dead code 2",false, -1, false)
 CompPhaseNameMacro(PHASE_STR_ADRLCL,                 "Morph - Structs/AddrExp",        false, -1, false)
 CompPhaseNameMacro(PHASE_EARLY_LIVENESS,             "Early liveness",                 false, -1, false)
 CompPhaseNameMacro(PHASE_PHYSICAL_PROMOTION,         "Physical promotion",             false, -1, false)
@@ -54,16 +55,14 @@ CompPhaseNameMacro(PHASE_POST_MORPH,                 "Post-Morph",
 CompPhaseNameMacro(PHASE_MORPH_END,                  "Morph - Finish",                 false, -1, true)
 CompPhaseNameMacro(PHASE_GS_COOKIE,                  "GS Cookie",                      false, -1, false)
 CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS,       "Compute edge weights (1, false)",false, -1, false)
-#if defined(FEATURE_EH_FUNCLETS)
 CompPhaseNameMacro(PHASE_CREATE_FUNCLETS,            "Create EH funclets",             false, -1, false)
-#endif // FEATURE_EH_FUNCLETS
 CompPhaseNameMacro(PHASE_HEAD_TAIL_MERGE,            "Head and tail merge",            false, -1, false)
 CompPhaseNameMacro(PHASE_MERGE_THROWS,               "Merge throw blocks",             false, -1, false)
 CompPhaseNameMacro(PHASE_INVERT_LOOPS,               "Invert loops",                   false, -1, false)
 CompPhaseNameMacro(PHASE_HEAD_TAIL_MERGE2,           "Post-morph head and tail merge", false, -1, false)
 CompPhaseNameMacro(PHASE_OPTIMIZE_FLOW,              "Optimize control flow",          false, -1, false)
 CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT,            "Optimize layout",                false, -1, false)
-CompPhaseNameMacro(PHASE_COMPUTE_REACHABILITY,       "Compute blocks reachability",    false, -1, false)
+CompPhaseNameMacro(PHASE_OPTIMIZE_POST_LAYOUT,       "Optimize post-layout",           false, -1, false)
 CompPhaseNameMacro(PHASE_COMPUTE_DOMINATORS,         "Compute dominators",             false, -1, false)
 CompPhaseNameMacro(PHASE_CANONICALIZE_ENTRY,         "Canonicalize entry",             false, -1, false)
 CompPhaseNameMacro(PHASE_SET_BLOCK_WEIGHTS,          "Set block weights",              false, -1, false)
@@ -84,6 +83,7 @@ CompPhaseNameMacro(PHASE_BUILD_SSA_DF,               "SSA: DF",
 CompPhaseNameMacro(PHASE_BUILD_SSA_INSERT_PHIS,      "SSA: insert phis",               false, PHASE_BUILD_SSA, false)
 CompPhaseNameMacro(PHASE_BUILD_SSA_RENAME,           "SSA: rename",                    false, PHASE_BUILD_SSA, false)
 CompPhaseNameMacro(PHASE_EARLY_PROP,                 "Early Value Propagation",        false, -1, false)
+CompPhaseNameMacro(PHASE_OPTIMIZE_INDUCTION_VARIABLES, "Optimize Induction Variables", false, -1, false)
 CompPhaseNameMacro(PHASE_VALUE_NUMBER,               "Do value numbering",             false, -1, false)
 CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS,      "Optimize index checks",          false, -1, false)
 CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES,       "Optimize Valnum CSEs",           false, -1, false)
diff --git a/src/coreclr/jit/copyprop.cpp b/src/coreclr/jit/copyprop.cpp
index 90a593ef65b2..bb645b26bb49 100644
--- a/src/coreclr/jit/copyprop.cpp
+++ b/src/coreclr/jit/copyprop.cpp
@@ -291,8 +291,8 @@ void Compiler::optCopyPropPushDef(GenTree* defNode, GenTreeLclVarCommon* lclNode
 {
     unsigned lclNum = lclNode->GetLclNum();
 
-    // Shadowed parameters are special: they will (at most) have one use, that is one on the RHS of an
-    // assignment to their shadow, and we must not substitute them anywhere. So we'll not push any defs.
+    // Shadowed parameters are special: they will (at most) have one use, as values in a store
+    // to their shadow, and we must not substitute them anywhere. So we'll not push any defs.
     if ((gsShadowVarInfo != nullptr) && lvaGetDesc(lclNum)->lvIsParam &&
         (gsShadowVarInfo[lclNum].shadowCopy != BAD_VAR_NUM))
     {
@@ -462,7 +462,9 @@ PhaseStatus Compiler::optVnCopyProp()
 
     public:
         CopyPropDomTreeVisitor(Compiler* compiler)
-            : DomTreeVisitor(compiler), m_curSsaName(compiler->getAllocator(CMK_CopyProp)), m_madeChanges(false)
+            : DomTreeVisitor(compiler)
+            , m_curSsaName(compiler->getAllocator(CMK_CopyProp))
+            , m_madeChanges(false)
         {
         }
 
diff --git a/src/coreclr/jit/debuginfo.h b/src/coreclr/jit/debuginfo.h
index 3f628840765d..72119b905c94 100644
--- a/src/coreclr/jit/debuginfo.h
+++ b/src/coreclr/jit/debuginfo.h
@@ -12,12 +12,17 @@ class InlineContext;
 class ILLocation
 {
 public:
-    ILLocation() : m_offset(BAD_IL_OFFSET), m_isStackEmpty(false), m_isCall(false)
+    ILLocation()
+        : m_offset(BAD_IL_OFFSET)
+        , m_isStackEmpty(false)
+        , m_isCall(false)
     {
     }
 
     ILLocation(IL_OFFSET offset, bool isStackEmpty, bool isCall)
-        : m_offset(offset), m_isStackEmpty(isStackEmpty), m_isCall(isCall)
+        : m_offset(offset)
+        , m_isStackEmpty(isStackEmpty)
+        , m_isCall(isCall)
     {
     }
 
@@ -65,18 +70,21 @@ class ILLocation
 private:
     IL_OFFSET m_offset;
     bool      m_isStackEmpty : 1;
-    bool      m_isCall : 1;
+    bool      m_isCall       : 1;
 };
 
 // Represents debug information about a statement.
 class DebugInfo
 {
 public:
-    DebugInfo() : m_inlineContext(nullptr)
+    DebugInfo()
+        : m_inlineContext(nullptr)
     {
     }
 
-    DebugInfo(InlineContext* inlineContext, ILLocation loc) : m_inlineContext(inlineContext), m_location(loc)
+    DebugInfo(InlineContext* inlineContext, ILLocation loc)
+        : m_inlineContext(inlineContext)
+        , m_location(loc)
     {
     }
 
diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp
index c7b28b15a5c6..ea87a996dbb1 100644
--- a/src/coreclr/jit/decomposelongs.cpp
+++ b/src/coreclr/jit/decomposelongs.cpp
@@ -2169,22 +2169,12 @@ void DecomposeLongs::TryPromoteLongVar(unsigned lclNum)
     for (unsigned index = 0; index < 2; ++index)
     {
         // Grab the temp for the field local.
-        CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUG
-        char buf[200];
-        sprintf_s(buf, sizeof(buf), "%s V%02u.%s (fldOffset=0x%x)", "field", lclNum, index == 0 ? "lo" : "hi",
-                  index * 4);
-
-        // We need to copy 'buf' as lvaGrabTemp() below caches a copy to its argument.
-        size_t len  = strlen(buf) + 1;
-        char*  bufp = m_compiler->getAllocator(CMK_DebugOnly).allocate<char>(len);
-        strcpy_s(bufp, len, buf);
-#endif
 
         // Lifetime of field locals might span multiple BBs, so they are long lifetime temps.
-        unsigned fieldLclNum = m_compiler->lvaGrabTemp(false DEBUGARG(bufp));
-        varDsc               = m_compiler->lvaGetDesc(lclNum);
+        unsigned fieldLclNum = m_compiler->lvaGrabTemp(
+            false DEBUGARG(m_compiler->printfAlloc("%s V%02u.%s (fldOffset=0x%x)", "field", lclNum,
+                                                   index == 0 ? "lo" : "hi", index * 4)));
+        varDsc = m_compiler->lvaGetDesc(lclNum);
 
         LclVarDsc* fieldVarDsc       = m_compiler->lvaGetDesc(fieldLclNum);
         fieldVarDsc->lvType          = TYP_INT;
diff --git a/src/coreclr/jit/decomposelongs.h b/src/coreclr/jit/decomposelongs.h
index b8ddc6210799..744061091e42 100644
--- a/src/coreclr/jit/decomposelongs.h
+++ b/src/coreclr/jit/decomposelongs.h
@@ -18,7 +18,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 class DecomposeLongs
 {
 public:
-    DecomposeLongs(Compiler* compiler) : m_compiler(compiler)
+    DecomposeLongs(Compiler* compiler)
+        : m_compiler(compiler)
     {
     }
 
@@ -72,7 +73,7 @@ class DecomposeLongs
     GenTree* RepresentOpAsLocalVar(GenTree* op, GenTree* user, GenTree** edge);
     GenTree* EnsureIntSized(GenTree* node, bool signExtend);
 
-    GenTree* StoreNodeToVar(LIR::Use& use);
+    GenTree*          StoreNodeToVar(LIR::Use& use);
     static genTreeOps GetHiOper(genTreeOps oper);
     static genTreeOps GetLoOper(genTreeOps oper);
 
diff --git a/src/coreclr/jit/disasm.cpp b/src/coreclr/jit/disasm.cpp
index 2a49f9d8cb55..bff93c85150a 100644
--- a/src/coreclr/jit/disasm.cpp
+++ b/src/coreclr/jit/disasm.cpp
@@ -1,12 +1,12 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 /***********************************************************************
-*
-* File: disasm.cpp
-*
-*  This file handles disassembly for the "late disassembler".
-*
-***********************************************************************/
+ *
+ * File: disasm.cpp
+ *
+ *  This file handles disassembly for the "late disassembler".
+ *
+ ***********************************************************************/
 
 #include "jitpch.h"
 #ifdef _MSC_VER
@@ -23,7 +23,7 @@ FILE* g_disAsmFileCorDisTools;
 #endif // USE_COREDISTOOLS
 
 // Define DISASM_DEBUG to get verbose output of late disassembler inner workings.
-//#define DISASM_DEBUG
+// #define DISASM_DEBUG
 #ifdef DISASM_DEBUG
 #ifdef DEBUG
 #define DISASM_DUMP(...)                                                                                               \
@@ -96,12 +96,12 @@ typedef struct codeFix
 {
     codeFix* cfNext;
     unsigned cfFixup;
-} * codeFixPtr;
+}* codeFixPtr;
 
 typedef struct codeBlk
 {
     codeFix* cbFixupLst;
-} * codeBlkPtr;
+}* codeBlkPtr;
 
 #ifdef USE_MSVCDIS
 
@@ -139,7 +139,7 @@ size_t DisAssembler::disCchAddrMember(
 
     switch (terminationType)
     {
-        // int disCallSize;
+            // int disCallSize;
 
         case DISX86::trmtaJmpShort:
         case DISX86::trmtaJmpCcShort:
@@ -228,7 +228,7 @@ size_t DisAssembler::disCchAddrMember(
 
     switch (terminationType)
     {
-        // int disCallSize;
+            // int disCallSize;
 
         case DISARM64::TRMTA::trmtaBra:
         case DISARM64::TRMTA::trmtaBraCase:
@@ -620,7 +620,7 @@ size_t DisAssembler::disCchRegRelMember(
 
         case DISX86::trmtaFallThrough:
 
-        /* some instructions like division have a TRAP termination type - ignore it */
+            /* some instructions like division have a TRAP termination type - ignore it */
 
         case DISX86::trmtaTrap:
         case DISX86::trmtaTrapCc:
@@ -715,7 +715,7 @@ size_t DisAssembler::disCchRegRelMember(
 
         case DISARM64::TRMTA::trmtaFallThrough:
 
-        /* some instructions like division have a TRAP termination type - ignore it */
+            /* some instructions like division have a TRAP termination type - ignore it */
 
         case DISARM64::TRMTA::trmtaTrap:
         case DISARM64::TRMTA::trmtaTrapCc:
@@ -1261,7 +1261,7 @@ void DisAssembler::DisasmBuffer(FILE* pfile, bool printit)
 #elif defined(TARGET_AMD64)
     pdis = DIS::PdisNew(DIS::distX8664);
 #elif defined(TARGET_ARM64)
-    pdis                      = DIS::PdisNew(DIS::distArm64);
+    pdis = DIS::PdisNew(DIS::distArm64);
 #else // TARGET*
 #error Unsupported or unset target architecture
 #endif
@@ -1340,7 +1340,7 @@ void DisAssembler::DisasmBuffer(FILE* pfile, bool printit)
 #else
                            false // Display code bytes?
 #endif
-                           );
+        );
 
         ibCur += (unsigned)cb;
     }
@@ -1680,7 +1680,7 @@ bool DisAssembler::InitCoredistoolsLibrary()
 
     s_disCoreDisToolsLibraryLoadSuccessful = true; // We made it!
 
-// done initializing
+    // done initializing
 
 FinishedInitializing:
     InterlockedExchange(&s_disCoreDisToolsLibraryInitializing, 0); // unlock initialization
@@ -1703,7 +1703,7 @@ bool DisAssembler::InitCoredistoolsDisasm()
 #if defined(TARGET_ARM64)
     coreDisTargetArchitecture = Target_Arm64;
 #elif defined(TARGET_ARM)
-    coreDisTargetArchitecture        = Target_Thumb;
+    coreDisTargetArchitecture = Target_Thumb;
 #elif defined(TARGET_X86)
     coreDisTargetArchitecture = Target_X86;
 #elif defined(TARGET_AMD64)
diff --git a/src/coreclr/jit/earlyprop.cpp b/src/coreclr/jit/earlyprop.cpp
index a63c34babce9..ef03524a1810 100644
--- a/src/coreclr/jit/earlyprop.cpp
+++ b/src/coreclr/jit/earlyprop.cpp
@@ -371,21 +371,22 @@ GenTree* Compiler::optPropGetValueRec(unsigned lclNum, unsigned ssaNum, optPropK
     {
         assert(ssaDefStore->OperIsLocalStore());
 
-        GenTree* data = ssaDefStore->Data();
+        GenTree* defValue = ssaDefStore->Data();
 
-        // Recursively track the Rhs for "entire" stores.
-        if (ssaDefStore->OperIs(GT_STORE_LCL_VAR) && (ssaDefStore->GetLclNum() == lclNum) && data->OperIs(GT_LCL_VAR))
+        // Recursively track the value for "entire" stores.
+        if (ssaDefStore->OperIs(GT_STORE_LCL_VAR) && (ssaDefStore->GetLclNum() == lclNum) &&
+            defValue->OperIs(GT_LCL_VAR))
         {
-            unsigned dataLclNum = data->AsLclVarCommon()->GetLclNum();
-            unsigned dataSsaNum = data->AsLclVarCommon()->GetSsaNum();
+            unsigned defValueLclNum = defValue->AsLclVar()->GetLclNum();
+            unsigned defValueSsaNum = defValue->AsLclVar()->GetSsaNum();
 
-            value = optPropGetValueRec(dataLclNum, dataSsaNum, valueKind, walkDepth + 1);
+            value = optPropGetValueRec(defValueLclNum, defValueSsaNum, valueKind, walkDepth + 1);
         }
         else
         {
             if (valueKind == optPropKind::OPK_ARRAYLEN)
             {
-                value = getArrayLengthFromAllocation(data DEBUGARG(ssaVarDsc->GetBlock()));
+                value = getArrayLengthFromAllocation(defValue DEBUGARG(ssaVarDsc->GetBlock()));
                 if (value != nullptr)
                 {
                     if (!value->IsCnsIntOrI())
diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp
index 3b89a8246a6f..5736b46daac0 100644
--- a/src/coreclr/jit/ee_il_dll.cpp
+++ b/src/coreclr/jit/ee_il_dll.cpp
@@ -211,7 +211,9 @@ void SetJitTls(void* value)
 
 #if defined(DEBUG)
 
-JitTls::JitTls(ICorJitInfo* jitInfo) : m_compiler(nullptr), m_logEnv(jitInfo)
+JitTls::JitTls(ICorJitInfo* jitInfo)
+    : m_compiler(nullptr)
+    , m_logEnv(jitInfo)
 {
     m_next = reinterpret_cast<JitTls*>(GetJitTls());
     SetJitTls(this);
@@ -345,11 +347,11 @@ void CILJit::setTargetOS(CORINFO_OS os)
 //   including padding after the actual value.
 //
 // Arguments:
-//   list - the arg list handle pointing to the argument
-//   sig  - the signature for the arg's method
+//   corInfoType - EE type of the argument
+//   typeHnd     - if the type is a value class, its class handle
 //
 // Return value:
-//   the number of stack slots in stack arguments for the call.
+//   the size in bytes when the type is passed on the stack for the call.
 //
 // Notes:
 //   - On most platforms arguments are passed with TARGET_POINTER_SIZE alignment,
@@ -357,21 +359,19 @@ void CILJit::setTargetOS(CORINFO_OS os)
 //   It is different for arm64 apple that packs some types without alignment and padding.
 //   If the argument is passed by reference then the method returns REF size.
 //
-unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig)
+unsigned Compiler::eeGetArgSize(CorInfoType corInfoType, CORINFO_CLASS_HANDLE typeHnd)
 {
+    var_types argType = JITtype2varType(corInfoType);
+
 #if defined(TARGET_AMD64)
 
     // Everything fits into a single 'slot' size
     // to accommodate irregular sized structs, they are passed byref
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef UNIX_AMD64_ABI
-    CORINFO_CLASS_HANDLE argClass;
-    CorInfoType          argTypeJit = strip(info.compCompHnd->getArgType(sig, list, &argClass));
-    var_types            argType    = JITtype2varType(argTypeJit);
     if (varTypeIsStruct(argType))
     {
-        unsigned structSize = info.compCompHnd->getClassSize(argClass);
+        unsigned structSize = info.compCompHnd->getClassSize(typeHnd);
         return roundUp(structSize, TARGET_POINTER_SIZE);
     }
 #endif // UNIX_AMD64_ABI
@@ -379,26 +379,22 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO*
 
 #else // !TARGET_AMD64
 
-    CORINFO_CLASS_HANDLE argClass;
-    CorInfoType          argTypeJit = strip(info.compCompHnd->getArgType(sig, list, &argClass));
-    var_types            argType    = JITtype2varType(argTypeJit);
-    unsigned             argSize;
+    unsigned argSize;
 
     var_types hfaType = TYP_UNDEF;
     bool      isHfa   = false;
 
     if (varTypeIsStruct(argType))
     {
-        hfaType             = GetHfaType(argClass);
+        hfaType             = GetHfaType(typeHnd);
         isHfa               = (hfaType != TYP_UNDEF);
-        unsigned structSize = info.compCompHnd->getClassSize(argClass);
+        unsigned structSize = info.compCompHnd->getClassSize(typeHnd);
 
         // make certain the EE passes us back the right thing for refanys
-        assert(argTypeJit != CORINFO_TYPE_REFANY || structSize == 2 * TARGET_POINTER_SIZE);
+        assert(corInfoType != CORINFO_TYPE_REFANY || structSize == 2 * TARGET_POINTER_SIZE);
 
         // For each target that supports passing struct args in multiple registers
         // apply the target specific rules for them here:
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if FEATURE_MULTIREG_ARGS
 #if defined(TARGET_ARM64)
@@ -1417,7 +1413,9 @@ bool Compiler::eeRunWithSPMIErrorTrapImp(void (*function)(void*), void* param)
 unsigned Compiler::eeTryGetClassSize(CORINFO_CLASS_HANDLE clsHnd)
 {
     unsigned classSize = UINT_MAX;
-    eeRunFunctorWithSPMIErrorTrap([&]() { classSize = info.compCompHnd->getClassSize(clsHnd); });
+    eeRunFunctorWithSPMIErrorTrap([&]() {
+        classSize = info.compCompHnd->getClassSize(clsHnd);
+    });
 
     return classSize;
 }
diff --git a/src/coreclr/jit/ee_il_dll.hpp b/src/coreclr/jit/ee_il_dll.hpp
index c3801d88292f..d676ba8caa47 100644
--- a/src/coreclr/jit/ee_il_dll.hpp
+++ b/src/coreclr/jit/ee_il_dll.hpp
@@ -10,12 +10,12 @@ class CILJit : public ICorJitCompiler
                                unsigned             flags,           /* IN */
                                uint8_t**            nativeEntry,     /* OUT */
                                uint32_t*            nativeSizeOfCode /* OUT */
-                               );
+    );
 
     void ProcessShutdownWork(ICorStaticInfo* statInfo);
 
     void getVersionIdentifier(GUID* versionIdentifier /* OUT */
-                              );
+    );
 
     void setTargetOS(CORINFO_OS os);
 };
diff --git a/src/coreclr/jit/eeinterface.cpp b/src/coreclr/jit/eeinterface.cpp
index d9852afb9e53..a6552c219429 100644
--- a/src/coreclr/jit/eeinterface.cpp
+++ b/src/coreclr/jit/eeinterface.cpp
@@ -210,7 +210,7 @@ void Compiler::eePrintTypeOrJitAlias(StringPrinter* printer, CORINFO_CLASS_HANDL
 }
 
 static const char* s_jitHelperNames[CORINFO_HELP_COUNT] = {
-#define JITHELPER(code, pfnHelper, sig) #code,
+#define JITHELPER(code, pfnHelper, sig)        #code,
 #define DYNAMICJITHELPER(code, pfnHelper, sig) #code,
 #include "jithelpers.h"
 };
@@ -403,10 +403,9 @@ const char* Compiler::eeGetMethodFullName(
         CORINFO_SIG_INFO sig;
         eeGetMethodSig(hnd, &sig);
         eePrintMethod(&p, clsHnd, hnd, &sig,
-                      /* includeClassInstantiation */ true,
-                      /* includeMethodInstantiation */ true,
-                      /* includeSignature */ true, includeReturnType, includeThisSpecifier);
-
+                                      /* includeClassInstantiation */ true,
+                                      /* includeMethodInstantiation */ true,
+                                      /* includeSignature */ true, includeReturnType, includeThisSpecifier);
     });
 
     if (success)
@@ -475,13 +474,12 @@ const char* Compiler::eeGetMethodName(CORINFO_METHOD_HANDLE methHnd, char* buffe
     StringPrinter p(getAllocator(CMK_DebugOnly), buffer, bufferSize);
     bool          success = eeRunFunctorWithSPMIErrorTrap([&]() {
         eePrintMethod(&p, NO_CLASS_HANDLE, methHnd,
-                      /* sig */ nullptr,
-                      /* includeClassInstantiation */ false,
-                      /* includeMethodInstantiation */ false,
-                      /* includeSignature */ false,
-                      /* includeReturnType */ false,
-                      /* includeThisSpecifier */ false);
-
+                               /* sig */ nullptr,
+                               /* includeClassInstantiation */ false,
+                               /* includeMethodInstantiation */ false,
+                               /* includeSignature */ false,
+                               /* includeReturnType */ false,
+                               /* includeThisSpecifier */ false);
     });
 
     if (!success)
@@ -512,7 +510,9 @@ const char* Compiler::eeGetMethodName(CORINFO_METHOD_HANDLE methHnd, char* buffe
 const char* Compiler::eeGetFieldName(CORINFO_FIELD_HANDLE fldHnd, bool includeType, char* buffer, size_t bufferSize)
 {
     StringPrinter p(getAllocator(CMK_DebugOnly), buffer, bufferSize);
-    bool          success = eeRunFunctorWithSPMIErrorTrap([&]() { eePrintField(&p, fldHnd, includeType); });
+    bool          success = eeRunFunctorWithSPMIErrorTrap([&]() {
+        eePrintField(&p, fldHnd, includeType);
+    });
 
     if (success)
     {
@@ -525,7 +525,9 @@ const char* Compiler::eeGetFieldName(CORINFO_FIELD_HANDLE fldHnd, bool includeTy
     {
         p.Append("<unknown class>:");
 
-        success = eeRunFunctorWithSPMIErrorTrap([&]() { eePrintField(&p, fldHnd, false); });
+        success = eeRunFunctorWithSPMIErrorTrap([&]() {
+            eePrintField(&p, fldHnd, false);
+        });
 
         if (success)
         {
@@ -560,7 +562,9 @@ const char* Compiler::eeGetFieldName(CORINFO_FIELD_HANDLE fldHnd, bool includeTy
 const char* Compiler::eeGetClassName(CORINFO_CLASS_HANDLE clsHnd, char* buffer, size_t bufferSize)
 {
     StringPrinter printer(getAllocator(CMK_DebugOnly), buffer, bufferSize);
-    if (!eeRunFunctorWithSPMIErrorTrap([&]() { eePrintType(&printer, clsHnd, true); }))
+    if (!eeRunFunctorWithSPMIErrorTrap([&]() {
+        eePrintType(&printer, clsHnd, true);
+    }))
     {
         printer.Truncate(0);
         printer.Append("<unknown class>");
@@ -581,7 +585,9 @@ const char* Compiler::eeGetClassName(CORINFO_CLASS_HANDLE clsHnd, char* buffer,
 const char* Compiler::eeGetShortClassName(CORINFO_CLASS_HANDLE clsHnd)
 {
     StringPrinter printer(getAllocator(CMK_DebugOnly));
-    if (!eeRunFunctorWithSPMIErrorTrap([&]() { eePrintType(&printer, clsHnd, false); }))
+    if (!eeRunFunctorWithSPMIErrorTrap([&]() {
+        eePrintType(&printer, clsHnd, false);
+    }))
     {
         printer.Truncate(0);
         printer.Append("<unknown class>");
@@ -597,8 +603,9 @@ void Compiler::eePrintObjectDescription(const char* prefix, CORINFO_OBJECT_HANDL
     size_t       actualLen = 0;
 
     // Ignore potential SPMI failures
-    bool success = eeRunFunctorWithSPMIErrorTrap(
-        [&]() { actualLen = this->info.compCompHnd->printObjectDescription(handle, str, maxStrSize); });
+    bool success = eeRunFunctorWithSPMIErrorTrap([&]() {
+        actualLen = this->info.compCompHnd->printObjectDescription(handle, str, maxStrSize);
+    });
 
     if (!success)
     {
@@ -614,5 +621,5 @@ void Compiler::eePrintObjectDescription(const char* prefix, CORINFO_OBJECT_HANDL
         }
     }
 
-    printf("%s '%s'\n", prefix, str);
+    printf("%s '%s'", prefix, str);
 }
diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index d3ac84e7919a..5259b936646f 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -787,7 +787,7 @@ void emitter::emitGenIG(insGroup* ig)
         IMPL_LIMITATION("Too many arguments pushed on stack");
     }
 
-//  printf("Start IG #%02u [stk=%02u]\n", ig->igNum, emitCurStackLvl);
+    //  printf("Start IG #%02u [stk=%02u]\n", ig->igNum, emitCurStackLvl);
 
 #endif
 
@@ -1205,7 +1205,7 @@ void emitter::emitBegFN(bool hasFramePtr
                         ,
                         bool chkAlign
 #endif
-                        )
+)
 {
     insGroup* ig;
 
@@ -1608,11 +1608,8 @@ void* emitter::emitAllocAnyInstr(size_t sz, emitAttr opsz)
     // the prolog/epilog placeholder groups ARE generated in order, and are
     // re-used. But generating additional groups would not work.
     if (emitComp->compStressCompile(Compiler::STRESS_EMITTER, 1) && emitCurIGinsCnt && !emitIGisInProlog(emitCurIG) &&
-        !emitIGisInEpilog(emitCurIG) && !emitCurIG->endsWithAlignInstr()
-#if defined(FEATURE_EH_FUNCLETS)
-        && !emitIGisInFuncletProlog(emitCurIG) && !emitIGisInFuncletEpilog(emitCurIG)
-#endif // FEATURE_EH_FUNCLETS
-            )
+        !emitIGisInEpilog(emitCurIG) && !emitCurIG->endsWithAlignInstr() && !emitIGisInFuncletProlog(emitCurIG) &&
+        !emitIGisInFuncletEpilog(emitCurIG))
     {
         emitNxtIG(true);
     }
@@ -1627,7 +1624,7 @@ void* emitter::emitAllocAnyInstr(size_t sz, emitAttr opsz)
         !emitIGisInProlog(emitCurIG) && // don't do this in prolog or epilog
         !emitIGisInEpilog(emitCurIG) &&
         emitRandomNops // sometimes we turn off where exact codegen is needed (pinvoke inline)
-        )
+    )
     {
         if (emitNextNop == 0)
         {
@@ -1761,7 +1758,7 @@ void* emitter::emitAllocAnyInstr(size_t sz, emitAttr opsz)
 #ifndef TARGET_AMD64
         && emitComp->opts.compReloc
 #endif // TARGET_AMD64
-        )
+    )
     {
         /* Mark idInfo()->idDspReloc to remember that the            */
         /* address mode has a displacement that is relocatable       */
@@ -2070,11 +2067,7 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
 
     bool extend = false;
 
-    if (igType == IGPT_EPILOG
-#if defined(FEATURE_EH_FUNCLETS)
-        || igType == IGPT_FUNCLET_EPILOG
-#endif // FEATURE_EH_FUNCLETS
-        )
+    if (igType == IGPT_EPILOG || igType == IGPT_FUNCLET_EPILOG)
     {
 #ifdef TARGET_AMD64
         emitOutputPreEpilogNOP();
@@ -2108,7 +2101,7 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
      * case, we need to make sure any re-used fields, such as igFuncIdx, are correct.
      */
 
-    igPh->igFuncIdx = emitComp->compCurrFuncIdx;
+    igPh->igFuncIdx = emitComp->funCurrentFuncIdx();
 
     /* Create a separate block of memory to store placeholder information.
      * We could use unions to put some of this into the insGroup itself, but we don't
@@ -2144,7 +2137,6 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
     {
         igPh->igFlags |= IGF_EPILOG;
     }
-#if defined(FEATURE_EH_FUNCLETS)
     else if (igType == IGPT_FUNCLET_PROLOG)
     {
         igPh->igFlags |= IGF_FUNCLET_PROLOG;
@@ -2153,7 +2145,6 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
     {
         igPh->igFlags |= IGF_FUNCLET_EPILOG;
     }
-#endif // FEATURE_EH_FUNCLETS
 
     /* Link it into the placeholder list */
 
@@ -2174,7 +2165,6 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
     emitCurIGsize += MAX_PLACEHOLDER_IG_SIZE;
     emitCurCodeOffset += emitCurIGsize;
 
-#if defined(FEATURE_EH_FUNCLETS)
     // Add the appropriate IP mapping debugging record for this placeholder
     // group. genExitCode() adds the mapping for main function epilogs.
     if (emitComp->opts.compDbgInfo)
@@ -2188,7 +2178,6 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
             codeGen->genIPmappingAdd(IPmappingDscKind::Epilog, DebugInfo(), true);
         }
     }
-#endif // FEATURE_EH_FUNCLETS
 
     /* Start a new IG if more code follows */
 
@@ -2198,11 +2187,7 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
     }
     else
     {
-        if (igType == IGPT_EPILOG
-#if defined(FEATURE_EH_FUNCLETS)
-            || igType == IGPT_FUNCLET_EPILOG
-#endif // FEATURE_EH_FUNCLETS
-            )
+        if (igType == IGPT_EPILOG || igType == IGPT_FUNCLET_EPILOG)
         {
             // If this was an epilog, then assume this is the end of any currently in progress
             // no-GC region. If a block after the epilog needs to be no-GC, it needs to call
@@ -2249,12 +2234,10 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
 void emitter::emitGeneratePrologEpilog()
 {
 #ifdef DEBUG
-    unsigned prologCnt = 0;
-    unsigned epilogCnt = 0;
-#if defined(FEATURE_EH_FUNCLETS)
+    unsigned prologCnt        = 0;
+    unsigned epilogCnt        = 0;
     unsigned funcletPrologCnt = 0;
     unsigned funcletEpilogCnt = 0;
-#endif // FEATURE_EH_FUNCLETS
 #endif // DEBUG
 
     insGroup* igPh;
@@ -2284,8 +2267,6 @@ void emitter::emitGeneratePrologEpilog()
                 emitEndFnEpilog();
                 break;
 
-#if defined(FEATURE_EH_FUNCLETS)
-
             case IGPT_FUNCLET_PROLOG:
                 INDEBUG(++funcletPrologCnt);
                 emitBegFuncletProlog(igPh);
@@ -2300,8 +2281,6 @@ void emitter::emitGeneratePrologEpilog()
                 emitEndFuncletEpilog();
                 break;
 
-#endif // FEATURE_EH_FUNCLETS
-
             default:
                 unreached();
         }
@@ -2311,17 +2290,16 @@ void emitter::emitGeneratePrologEpilog()
     if (emitComp->verbose)
     {
         printf("%d prologs, %d epilogs", prologCnt, epilogCnt);
-#if defined(FEATURE_EH_FUNCLETS)
-        printf(", %d funclet prologs, %d funclet epilogs", funcletPrologCnt, funcletEpilogCnt);
-#endif // FEATURE_EH_FUNCLETS
+        if (emitComp->UsesFunclets())
+        {
+            printf(", %d funclet prologs, %d funclet epilogs", funcletPrologCnt, funcletEpilogCnt);
+        }
         printf("\n");
 
-// prolog/epilog code doesn't use this yet
-// noway_assert(prologCnt == 1);
-// noway_assert(epilogCnt == emitEpilogCnt); // Is this correct?
-#if defined(FEATURE_EH_FUNCLETS)
+        // prolog/epilog code doesn't use this yet
+        // noway_assert(prologCnt == 1);
+        // noway_assert(epilogCnt == emitEpilogCnt); // Is this correct?
         assert(funcletPrologCnt == emitComp->ehFuncletCount());
-#endif // FEATURE_EH_FUNCLETS
     }
 #endif // DEBUG
 }
@@ -2509,18 +2487,16 @@ void emitter::emitEndFnEpilog()
         // because the only instruction is the last one and thus a slight
         // underestimation of the epilog size is harmless (since the EIP
         // can not be between instructions).
-        assert(emitEpilogCnt == 1 ||
-               (emitExitSeqSize - newSize) <= 5 // delta between size of various forms of jmp (size is either 6 or 5),
-                                                // and various forms of ret (size is either 1 or 3). The combination can
-                                                // be anything between 1 and 5.
-               );
+        assert(emitEpilogCnt == 1 || (emitExitSeqSize - newSize) <= 5 // delta between size of various forms of jmp
+                                                                      // (size is either 6 or 5), and various forms of
+                                                                      // ret (size is either 1 or 3). The combination
+                                                                      // can be anything between 1 and 5.
+        );
         emitExitSeqSize = newSize;
     }
 #endif // JIT32_GCENCODER
 }
 
-#if defined(FEATURE_EH_FUNCLETS)
-
 /*****************************************************************************
  *
  *  Begin generating a funclet prolog.
@@ -2528,6 +2504,7 @@ void emitter::emitEndFnEpilog()
 
 void emitter::emitBegFuncletProlog(insGroup* igPh)
 {
+    assert(emitComp->UsesFunclets());
     emitBegPrologEpilog(igPh);
 }
 
@@ -2538,6 +2515,7 @@ void emitter::emitBegFuncletProlog(insGroup* igPh)
 
 void emitter::emitEndFuncletProlog()
 {
+    assert(emitComp->UsesFunclets());
     emitEndPrologEpilog();
 }
 
@@ -2548,6 +2526,7 @@ void emitter::emitEndFuncletProlog()
 
 void emitter::emitBegFuncletEpilog(insGroup* igPh)
 {
+    assert(emitComp->UsesFunclets());
     emitBegPrologEpilog(igPh);
 }
 
@@ -2558,11 +2537,10 @@ void emitter::emitBegFuncletEpilog(insGroup* igPh)
 
 void emitter::emitEndFuncletEpilog()
 {
+    assert(emitComp->UsesFunclets());
     emitEndPrologEpilog();
 }
 
-#endif // FEATURE_EH_FUNCLETS
-
 #ifdef JIT32_GCENCODER
 
 //
@@ -2825,11 +2803,11 @@ bool emitter::emitNoGChelper(CorInfoHelpFunc helpFunc)
         case CORINFO_HELP_LRSH:
         case CORINFO_HELP_LRSZ:
 
-//  case CORINFO_HELP_LMUL:
-//  case CORINFO_HELP_LDIV:
-//  case CORINFO_HELP_LMOD:
-//  case CORINFO_HELP_ULDIV:
-//  case CORINFO_HELP_ULMOD:
+            //  case CORINFO_HELP_LMUL:
+            //  case CORINFO_HELP_LDIV:
+            //  case CORINFO_HELP_LMOD:
+            //  case CORINFO_HELP_ULDIV:
+            //  case CORINFO_HELP_ULMOD:
 
 #ifdef TARGET_X86
         case CORINFO_HELP_ASSIGN_REF_EAX:
@@ -2890,8 +2868,8 @@ bool emitter::emitNoGChelper(CORINFO_METHOD_HANDLE methHnd)
  *  Mark the current spot as having a label.
  */
 
-void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars,
-                            regMaskTP        gcrefRegs,
+void* emitter::emitAddLabel(VARSET_VALARG_TP    GCvars,
+                            regMaskTP           gcrefRegs,
                             regMaskTP byrefRegs DEBUG_ARG(BasicBlock* block))
 {
     /* Create a new IG if the current one is non-empty */
@@ -2999,16 +2977,12 @@ bool emitter::emitIsFuncEnd(emitLocation* emitLoc, emitLocation* emitLocNextFrag
     if (ig->igNext->igFlags & IGF_FUNCLET_PROLOG)
         return true;
 
-#if defined(FEATURE_EH_FUNCLETS)
-
     // Is the next IG a placeholder group for a funclet prolog?
     if ((ig->igNext->igFlags & IGF_PLACEHOLDER) && (ig->igNext->igPhData->igPhType == IGPT_FUNCLET_PROLOG))
     {
         return true;
     }
 
-#endif // FEATURE_EH_FUNCLETS
-
     return false;
 }
 
@@ -3088,7 +3062,7 @@ void emitter::emitSplit(emitLocation*         startLoc,
             return;
         }
 
-// Report it!
+        // Report it!
 
 #ifdef DEBUG
         if (EMITVERBOSE)
@@ -3605,7 +3579,7 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int              argCnt,
                                                  VARSET_VALARG_TP GCvars,
                                                  regMaskTP        gcrefRegs,
                                                  regMaskTP        byrefRegs,
-                                                 emitAttr         retSizeIn
+                                                 emitAttr retSizeIn
                                                      MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize))
 {
     emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE;
@@ -3688,7 +3662,7 @@ emitter::instrDesc* emitter::emitNewInstrCallDir(int              argCnt,
                                                  VARSET_VALARG_TP GCvars,
                                                  regMaskTP        gcrefRegs,
                                                  regMaskTP        byrefRegs,
-                                                 emitAttr         retSizeIn
+                                                 emitAttr retSizeIn
                                                      MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize))
 {
     emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE;
@@ -3912,8 +3886,8 @@ void emitter::emitDispRegPtrListDelta()
     // Dump any deltas in regPtrDsc's for outgoing args; these aren't captured in the other sets.
     if (debugPrevRegPtrDsc != codeGen->gcInfo.gcRegPtrLast)
     {
-        for (regPtrDsc* dsc = (debugPrevRegPtrDsc == nullptr) ? codeGen->gcInfo.gcRegPtrList
-                                                              : debugPrevRegPtrDsc->rpdNext;
+        for (regPtrDsc* dsc      = (debugPrevRegPtrDsc == nullptr) ? codeGen->gcInfo.gcRegPtrList
+                                                                   : debugPrevRegPtrDsc->rpdNext;
              dsc != nullptr; dsc = dsc->rpdNext)
         {
             // The non-arg regPtrDscs are reflected in the register sets debugPrevGCrefRegs/emitThisGCrefRegs
@@ -4042,14 +4016,12 @@ void emitter::emitDispIG(insGroup* ig, bool displayFunc, bool displayInstruction
             case IGPT_EPILOG:
                 pszType = "epilog";
                 break;
-#if defined(FEATURE_EH_FUNCLETS)
             case IGPT_FUNCLET_PROLOG:
                 pszType = "funclet prolog";
                 break;
             case IGPT_FUNCLET_EPILOG:
                 pszType = "funclet epilog";
                 break;
-#endif // FEATURE_EH_FUNCLETS
             default:
                 pszType = "UNKNOWN";
                 break;
@@ -4393,11 +4365,11 @@ size_t emitter::emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp)
     float insExeCost = insEvaluateExecutionCost(id);
     // All compPerfScore calculations must be performed using doubles
     double insPerfScore = (double)(ig->igWeight / (double)BB_UNITY_WEIGHT) * insExeCost;
-    emitComp->info.compPerfScore += insPerfScore;
+    emitComp->Metrics.PerfScore += insPerfScore;
     ig->igPerfScore += insPerfScore;
 #endif // defined(DEBUG) || defined(LATE_DISASM)
 
-// printf("[S=%02u]\n", emitCurStackLvl);
+    // printf("[S=%02u]\n", emitCurStackLvl);
 
 #if EMIT_TRACK_STACK_DEPTH
 
@@ -4559,7 +4531,7 @@ void emitter::emitDispCommentForHandle(size_t handle, size_t cookie, GenTreeFlag
 #ifdef DEBUG
         emitComp->eePrintObjectDescription(commentPrefix, (CORINFO_OBJECT_HANDLE)handle);
 #else
-        str                   = "frozen object handle";
+        str = "frozen object handle";
 #endif
     }
     else if (flag == GTF_ICON_CLASS_HDL)
@@ -4667,7 +4639,6 @@ void emitter::emitRemoveJumpToNextInst()
 
                 // the last instruction in the group is the jmp we're looking for
                 // and it jumps to the next instruction group so we don't need it
-                CLANG_FORMAT_COMMENT_ANCHOR
 
 #ifdef DEBUG
                 unsigned instructionCount = jmpGroup->igInsCnt;
@@ -4870,9 +4841,9 @@ void emitter::emitJumpDistBind()
 
     int jmp_iteration = 1;
 
-/*****************************************************************************/
-/* If we iterate to look for more jumps to shorten, we start again here.     */
-/*****************************************************************************/
+    /*****************************************************************************/
+    /* If we iterate to look for more jumps to shorten, we start again here.     */
+    /*****************************************************************************/
 
 AGAIN:
 
@@ -4880,10 +4851,10 @@ void emitter::emitJumpDistBind()
     emitCheckIGList();
 #endif
 
-/*
-    In the following loop we convert all jump targets from "BasicBlock *"
-    to "insGroup *" values. We also estimate which jumps will be short.
- */
+    /*
+        In the following loop we convert all jump targets from "BasicBlock *"
+        to "insGroup *" values. We also estimate which jumps will be short.
+     */
 
 #ifdef DEBUG
     insGroup*     lastIG = nullptr;
@@ -5023,7 +4994,7 @@ void emitter::emitJumpDistBind()
         }
 #endif // TARGET_ARM64
 
-/* Make sure the jumps are properly ordered */
+        /* Make sure the jumps are properly ordered */
 
 #ifdef DEBUG
         assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < jmp->idjOffs);
@@ -5079,7 +5050,6 @@ void emitter::emitJumpDistBind()
         jmp->idjOffs -= adjLJ;
 
         // If this is a jump via register, the instruction size does not change, so we are done.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_ARM64)
         // JIT code and data will be allocated together for arm64 so the relative offset to JIT data is known.
@@ -5145,7 +5115,6 @@ void emitter::emitJumpDistBind()
         else
         {
             /* First time we've seen this label, convert its target */
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
             if (EMITVERBOSE)
@@ -5427,9 +5396,9 @@ void emitter::emitJumpDistBind()
 
         continue;
 
-    /*****************************************************************************/
-    /* Handle conversion to short jump                                           */
-    /*****************************************************************************/
+        /*****************************************************************************/
+        /* Handle conversion to short jump                                           */
+        /*****************************************************************************/
 
     SHORT_JMP:
 
@@ -5469,9 +5438,9 @@ void emitter::emitJumpDistBind()
 
 #if defined(TARGET_ARM)
 
-    /*****************************************************************************/
-    /* Handle conversion to medium jump                                          */
-    /*****************************************************************************/
+        /*****************************************************************************/
+        /* Handle conversion to medium jump                                          */
+        /*****************************************************************************/
 
     MEDIUM_JMP:
 
@@ -5496,7 +5465,7 @@ void emitter::emitJumpDistBind()
 
 #endif // TARGET_ARM
 
-    /*****************************************************************************/
+        /*****************************************************************************/
 
     NEXT_JMP:
 
@@ -5554,7 +5523,6 @@ void emitter::emitJumpDistBind()
 #endif
 
         /* Is there a chance of other jumps becoming short? */
-        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef DEBUG
 #if defined(TARGET_ARM)
         if (EMITVERBOSE)
@@ -5572,7 +5540,7 @@ void emitter::emitJumpDistBind()
 #if defined(TARGET_ARM)
             || (minMediumExtra <= adjIG)
 #endif // TARGET_ARM
-                )
+        )
         {
             jmp_iteration++;
 
@@ -5827,8 +5795,8 @@ bool emitter::emitEndsWithAlignInstr()
 //  Returns:  size of a loop in bytes.
 //
 unsigned emitter::getLoopSize(insGroup* igLoopHeader,
-                              unsigned  maxLoopSize                     //
-                              DEBUG_ARG(bool isAlignAdjusted)           //
+                              unsigned maxLoopSize                      //
+                                  DEBUG_ARG(bool isAlignAdjusted)       //
                               DEBUG_ARG(UNATIVE_OFFSET containingIGNum) //
                               DEBUG_ARG(UNATIVE_OFFSET loopHeadPredIGNum))
 {
@@ -5881,7 +5849,6 @@ unsigned emitter::getLoopSize(insGroup* igLoopHeader,
             //      jne IG06
             //
             //
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
             if ((igInLoop->igLoopBackEdge != nullptr) && (igInLoop->igLoopBackEdge != igLoopHeader))
@@ -6236,17 +6203,17 @@ void emitter::emitLoopAlignAdjustments()
                 }
 #endif // TARGET_XARCH & TARGET_ARM64
 #endif // DEBUG
-                // Adjust the padding amount in all align instructions in this IG
+       // Adjust the padding amount in all align instructions in this IG
                 instrDescAlign *alignInstrToAdj = alignInstr, *prevAlignInstr = nullptr;
                 for (; alignInstrToAdj != nullptr && alignInstrToAdj->idaIG == alignInstr->idaIG;
                      alignInstrToAdj = alignInstrToAdj->idaNext)
                 {
 
 #if defined(TARGET_XARCH)
-                    unsigned newPadding = min(paddingToAdj, MAX_ENCODED_SIZE);
+                    unsigned newPadding = min(paddingToAdj, (unsigned)MAX_ENCODED_SIZE);
                     alignInstrToAdj->idCodeSize(newPadding);
 #elif defined(TARGET_ARM64)
-                    unsigned newPadding = min(paddingToAdj, INSTR_ENCODED_SIZE);
+                    unsigned newPadding = min(paddingToAdj, (unsigned)INSTR_ENCODED_SIZE);
                     if (newPadding == 0)
                     {
                         alignInstrToAdj->idInsOpt(INS_OPTS_NONE);
@@ -6332,7 +6299,7 @@ void emitter::emitLoopAlignAdjustments()
 //     3b. If the loop already fits in minimum alignmentBoundary blocks, then return 0. // already best aligned
 //     3c. return paddingNeeded.
 //
-unsigned emitter::emitCalculatePaddingForLoopAlignment(insGroup* loopHeadIG,
+unsigned emitter::emitCalculatePaddingForLoopAlignment(insGroup*     loopHeadIG,
                                                        size_t offset DEBUG_ARG(bool isAlignAdjusted)
                                                            DEBUG_ARG(UNATIVE_OFFSET containingIGNum)
                                                                DEBUG_ARG(UNATIVE_OFFSET loopHeadPredIGNum))
@@ -6673,18 +6640,18 @@ void emitter::emitComputeCodeSizes()
 // Returns:
 //    size of the method code, in bytes
 //
-unsigned emitter::emitEndCodeGen(Compiler* comp,
-                                 bool      contTrkPtrLcls,
-                                 bool      fullyInt,
-                                 bool      fullPtrMap,
-                                 unsigned  xcptnsCount,
-                                 unsigned* prologSize,
-                                 unsigned* epilogSize,
-                                 void**    codeAddr,
-                                 void**    codeAddrRW,
-                                 void**    coldCodeAddr,
-                                 void**    coldCodeAddrRW,
-                                 void**    consAddr,
+unsigned emitter::emitEndCodeGen(Compiler*         comp,
+                                 bool              contTrkPtrLcls,
+                                 bool              fullyInt,
+                                 bool              fullPtrMap,
+                                 unsigned          xcptnsCount,
+                                 unsigned*         prologSize,
+                                 unsigned*         epilogSize,
+                                 void**            codeAddr,
+                                 void**            codeAddrRW,
+                                 void**            coldCodeAddr,
+                                 void**            coldCodeAddrRW,
+                                 void**            consAddr,
                                  void** consAddrRW DEBUGARG(unsigned* instrCount))
 {
 #ifdef DEBUG
@@ -6949,7 +6916,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
     *consAddrRW               = consBlockRW;
 
     /* Nothing has been pushed on the stack */
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if EMIT_TRACK_STACK_DEPTH
     emitCurStackLvl = 0;
@@ -7116,18 +7082,17 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
 
                 assert(indx < emitComp->lvaTrackedCount);
 
-// printf("Variable #%2u/%2u is at stack offset %d\n", num, indx, offs);
+                // printf("Variable #%2u/%2u is at stack offset %d\n", num, indx, offs);
 
-#ifdef JIT32_GCENCODER
-#ifndef FEATURE_EH_FUNCLETS
+#if defined(JIT32_GCENCODER) && defined(FEATURE_EH_WINDOWS_X86)
                 // Remember the frame offset of the "this" argument for synchronized methods.
-                if (emitComp->lvaIsOriginalThisArg(num) && emitComp->lvaKeepAliveAndReportThis())
+                if (!emitComp->UsesFunclets() && emitComp->lvaIsOriginalThisArg(num) &&
+                    emitComp->lvaKeepAliveAndReportThis())
                 {
                     emitSyncThisObjOffs = offs;
                     offs |= this_OFFSET_FLAG;
                 }
-#endif
-#endif // JIT32_GCENCODER
+#endif // JIT32_GCENCODER && FEATURE_EH_WINDOWS_X86
 
                 if (dsc->TypeGet() == TYP_BYREF)
                 {
@@ -7612,7 +7577,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
                 if (jmp->idjShort)
                 {
                     // Patch Forward Short Jump
-                    CLANG_FORMAT_COMMENT_ANCHOR;
 #if defined(TARGET_XARCH)
                     *(BYTE*)(adr + writeableOffset) -= (BYTE)adj;
 #elif defined(TARGET_ARM)
@@ -7632,7 +7596,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
                 else
                 {
                     // Patch Forward non-Short Jump
-                    CLANG_FORMAT_COMMENT_ANCHOR;
 #if defined(TARGET_XARCH)
                     *(int*)(adr + writeableOffset) -= adj;
 #elif defined(TARGET_ARMARCH)
@@ -8154,13 +8117,13 @@ CORINFO_FIELD_HANDLE emitter::emitFltOrDblConst(double constValue, emitAttr attr
 // Return Value:
 //    A field handle representing the data offset to access the constant.
 //
+// Note:
+// Access to inline data is 'abstracted' by a special type of static member
+// (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+// to constant data, not a real static field.
+//
 CORINFO_FIELD_HANDLE emitter::emitSimd8Const(simd8_t constValue)
 {
-    // Access to inline data is 'abstracted' by a special type of static member
-    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
-    // to constant data, not a real static field.
-    CLANG_FORMAT_COMMENT_ANCHOR;
-
     unsigned cnsSize  = 8;
     unsigned cnsAlign = cnsSize;
 
@@ -8177,11 +8140,6 @@ CORINFO_FIELD_HANDLE emitter::emitSimd8Const(simd8_t constValue)
 
 CORINFO_FIELD_HANDLE emitter::emitSimd16Const(simd16_t constValue)
 {
-    // Access to inline data is 'abstracted' by a special type of static member
-    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
-    // to constant data, not a real static field.
-    CLANG_FORMAT_COMMENT_ANCHOR;
-
     unsigned cnsSize  = 16;
     unsigned cnsAlign = cnsSize;
 
@@ -8199,11 +8157,6 @@ CORINFO_FIELD_HANDLE emitter::emitSimd16Const(simd16_t constValue)
 #if defined(TARGET_XARCH)
 CORINFO_FIELD_HANDLE emitter::emitSimd32Const(simd32_t constValue)
 {
-    // Access to inline data is 'abstracted' by a special type of static member
-    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
-    // to constant data, not a real static field.
-    CLANG_FORMAT_COMMENT_ANCHOR;
-
     unsigned cnsSize  = 32;
     unsigned cnsAlign = cnsSize;
 
@@ -8218,11 +8171,6 @@ CORINFO_FIELD_HANDLE emitter::emitSimd32Const(simd32_t constValue)
 
 CORINFO_FIELD_HANDLE emitter::emitSimd64Const(simd64_t constValue)
 {
-    // Access to inline data is 'abstracted' by a special type of static member
-    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
-    // to constant data, not a real static field.
-    CLANG_FORMAT_COMMENT_ANCHOR;
-
     unsigned cnsSize  = 64;
     unsigned cnsAlign = cnsSize;
 
@@ -8234,6 +8182,22 @@ CORINFO_FIELD_HANDLE emitter::emitSimd64Const(simd64_t constValue)
     UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD64);
     return emitComp->eeFindJitDataOffs(cnum);
 }
+
+CORINFO_FIELD_HANDLE emitter::emitSimdMaskConst(simdmask_t constValue)
+{
+    unsigned cnsSize  = 8;
+    unsigned cnsAlign = cnsSize;
+
+#ifdef TARGET_XARCH
+    if (emitComp->compCodeOpt() == Compiler::SMALL_CODE)
+    {
+        cnsAlign = dataSection::MIN_DATA_ALIGN;
+    }
+#endif // TARGET_XARCH
+
+    UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_MASK);
+    return emitComp->eeFindJitDataOffs(cnum);
+}
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -8444,7 +8408,7 @@ void emitter::emitDispDataSec(dataSecDsc* section, BYTE* dst)
                         printf("\tdd\t%08Xh", (uint32_t)(size_t)emitOffsetToPtr(ig->igOffs));
                     }
 #else  // TARGET_64BIT
-                    // We have a 64-BIT target
+       // We have a 64-BIT target
                     if (emitComp->opts.disDiffable)
                     {
                         printf("\tdq\t%s\n", blockLabel);
@@ -8609,10 +8573,10 @@ void emitter::emitGCvarLiveSet(int offs, GCtype gcType, BYTE* addr, ssize_t disp
 
     desc->vpdNext = nullptr;
 
-#if !defined(JIT32_GCENCODER) || !defined(FEATURE_EH_FUNCLETS)
     /* the lower 2 bits encode props about the stk ptr */
 
-    if (offs == emitSyncThisObjOffs)
+#if defined(JIT32_GCENCODER) && defined(FEATURE_EH_WINDOWS_X86)
+    if (!emitComp->UsesFunclets() && offs == emitSyncThisObjOffs)
     {
         desc->vpdVarNum |= this_OFFSET_FLAG;
     }
@@ -9040,7 +9004,7 @@ void emitter::emitGCregDeadSet(GCtype gcType, regMaskTP regMask, BYTE* addr)
 
 unsigned char emitter::emitOutputByte(BYTE* dst, ssize_t val)
 {
-    BYTE* dstRW = dst + writeableOffset;
+    BYTE* dstRW                    = dst + writeableOffset;
     *castto(dstRW, unsigned char*) = (unsigned char)val;
 
 #ifdef DEBUG
@@ -9579,7 +9543,7 @@ void emitter::emitInitIG(insGroup* ig)
 
     /* Set the current function index */
 
-    ig->igFuncIdx = emitComp->compCurrFuncIdx;
+    ig->igFuncIdx = emitComp->funCurrentFuncIdx();
 
     ig->igFlags = 0;
 
@@ -9806,13 +9770,13 @@ cnsval_ssize_t emitter::emitGetInsSC(const instrDesc* id) const
     else
 #endif // TARGET_ARM
         if (id->idIsLargeCns())
-    {
-        return ((instrDescCns*)id)->idcCnsVal;
-    }
-    else
-    {
-        return id->idSmallCns();
-    }
+        {
+            return ((instrDescCns*)id)->idcCnsVal;
+        }
+        else
+        {
+            return id->idSmallCns();
+        }
 }
 
 #ifdef TARGET_ARM
@@ -9923,7 +9887,7 @@ void emitter::emitStackPop(BYTE* addr, bool isCall, unsigned char callInstrSize,
 #ifndef JIT32_GCENCODER
             || (emitComp->IsFullPtrRegMapRequired() && !emitComp->GetInterruptible() && isCall)
 #endif // JIT32_GCENCODER
-                )
+        )
         {
             emitStackPopLargeStk(addr, isCall, callInstrSize, 0);
         }
@@ -10067,7 +10031,6 @@ void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callIn
             Or do we have a partially interruptible EBP-less frame, and any
             of EDI,ESI,EBX,EBP are live, or is there an outer/pending call?
          */
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !FPO_INTERRUPTIBLE
         if (emitFullyInt || (gcrefRegs == 0 && byrefRegs == 0 && u2.emitGcArgTrackCnt == 0))
@@ -10200,17 +10163,17 @@ void emitter::emitStackKillArgs(BYTE* addr, unsigned count, unsigned char callIn
 
 #ifdef DEBUG
 
-void emitter::emitRecordRelocationHelp(void*       location,            /* IN */
-                                       void*       target,              /* IN */
-                                       uint16_t    fRelocType,          /* IN */
-                                       const char* relocTypeName,       /* IN */
+void emitter::emitRecordRelocationHelp(void*       location,        /* IN */
+                                       void*       target,          /* IN */
+                                       uint16_t    fRelocType,      /* IN */
+                                       const char* relocTypeName,   /* IN */
                                        int32_t     addlDelta /* = 0 */) /* IN */
 
 #else // !DEBUG
 
-void emitter::emitRecordRelocation(void*    location,            /* IN */
-                                   void*    target,              /* IN */
-                                   uint16_t fRelocType,          /* IN */
+void emitter::emitRecordRelocation(void*    location,           /* IN */
+                                   void*    target,             /* IN */
+                                   uint16_t fRelocType,         /* IN */
                                    int32_t  addlDelta /* = 0 */) /* IN */
 
 #endif // !DEBUG
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index ed57477d209e..b41ef5fa5c1c 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -118,7 +118,7 @@ inline const char* GCtypeStr(GCtype gcType)
 
 #if DEBUG_EMIT
 #define INTERESTING_JUMP_NUM -1 // set to 0 to see all jump info
-//#define INTERESTING_JUMP_NUM    0
+// #define INTERESTING_JUMP_NUM    0
 #endif
 
 /*****************************************************************************
@@ -129,11 +129,15 @@ inline const char* GCtypeStr(GCtype gcType)
 class emitLocation
 {
 public:
-    emitLocation() : ig(nullptr), codePos(0)
+    emitLocation()
+        : ig(nullptr)
+        , codePos(0)
     {
     }
 
-    emitLocation(insGroup* _ig) : ig(_ig), codePos(0)
+    emitLocation(insGroup* _ig)
+        : ig(_ig)
+        , codePos(0)
     {
     }
 
@@ -147,7 +151,9 @@ class emitLocation
         CaptureLocation(emit);
     }
 
-    emitLocation(void* emitCookie) : ig((insGroup*)emitCookie), codePos(0)
+    emitLocation(void* emitCookie)
+        : ig((insGroup*)emitCookie)
+        , codePos(0)
     {
     }
 
@@ -228,10 +234,8 @@ enum insGroupPlaceholderType : unsigned char
 {
     IGPT_PROLOG, // currently unused
     IGPT_EPILOG,
-#if defined(FEATURE_EH_FUNCLETS)
     IGPT_FUNCLET_PROLOG,
     IGPT_FUNCLET_EPILOG,
-#endif // FEATURE_EH_FUNCLETS
 };
 
 #if defined(_MSC_VER) && defined(TARGET_ARM)
@@ -286,20 +290,23 @@ struct insGroup
     insGroup* igLoopBackEdge; // "last" back-edge that branches back to an aligned loop head.
 #endif
 
-#define IGF_GC_VARS 0x0001        // new set of live GC ref variables
-#define IGF_BYREF_REGS 0x0002     // new set of live by-ref registers
+#define IGF_GC_VARS        0x0001 // new set of live GC ref variables
+#define IGF_BYREF_REGS     0x0002 // new set of live by-ref registers
 #define IGF_FUNCLET_PROLOG 0x0004 // this group belongs to a funclet prolog
 #define IGF_FUNCLET_EPILOG 0x0008 // this group belongs to a funclet epilog.
-#define IGF_EPILOG 0x0010         // this group belongs to a main function epilog
-#define IGF_NOGCINTERRUPT 0x0020  // this IG is in a no-interrupt region (prolog, epilog, etc.)
-#define IGF_UPD_ISZ 0x0040        // some instruction sizes updated
-#define IGF_PLACEHOLDER 0x0080    // this is a placeholder group, to be filled in later
-#define IGF_EXTEND 0x0100         // this block is conceptually an extension of the previous block
-                                  // and the emitter should continue to track GC info as if there was no new block.
-#define IGF_HAS_ALIGN 0x0200      // this group contains an alignment instruction(s) at the end to align either the next
-                                  // IG, or, if this IG contains with an unconditional branch, some subsequent IG.
-#define IGF_REMOVED_ALIGN 0x0400  // IG was marked as having an alignment instruction(s), but was later unmarked
-                                  // without updating the IG's size/offsets.
+#define IGF_EPILOG         0x0010 // this group belongs to a main function epilog
+#define IGF_NOGCINTERRUPT  0x0020 // this IG is in a no-interrupt region (prolog, epilog, etc.)
+#define IGF_UPD_ISZ        0x0040 // some instruction sizes updated
+#define IGF_PLACEHOLDER    0x0080 // this is a placeholder group, to be filled in later
+#define IGF_EXTEND                                                                                                     \
+    0x0100 // this block is conceptually an extension of the previous block
+           // and the emitter should continue to track GC info as if there was no new block.
+#define IGF_HAS_ALIGN                                                                                                  \
+    0x0200 // this group contains an alignment instruction(s) at the end to align either the next
+           // IG, or, if this IG contains with an unconditional branch, some subsequent IG.
+#define IGF_REMOVED_ALIGN                                                                                              \
+    0x0400                           // IG was marked as having an alignment instruction(s), but was later unmarked
+                                     // without updating the IG's size/offsets.
 #define IGF_HAS_REMOVABLE_JMP 0x0800 // this group ends with an unconditional jump which is a candidate for removal
 #ifdef TARGET_ARM64
 #define IGF_HAS_REMOVED_INSTR 0x1000 // this group has an instruction that was removed.
@@ -308,24 +315,20 @@ struct insGroup
 // Mask of IGF_* flags that should be propagated to new blocks when they are created.
 // This allows prologs and epilogs to be any number of IGs, but still be
 // automatically marked properly.
-#if defined(FEATURE_EH_FUNCLETS)
 #ifdef DEBUG
 #define IGF_PROPAGATE_MASK (IGF_EPILOG | IGF_FUNCLET_PROLOG | IGF_FUNCLET_EPILOG)
 #else // DEBUG
 #define IGF_PROPAGATE_MASK (IGF_EPILOG | IGF_FUNCLET_PROLOG)
 #endif // DEBUG
-#else  // !FEATURE_EH_FUNCLETS
-#define IGF_PROPAGATE_MASK (IGF_EPILOG)
-#endif // !FEATURE_EH_FUNCLETS
 
     // Try to do better packing based on how large regMaskSmall is (8, 16, or 64 bits).
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !(REGMASK_BITS <= 32)
     regMaskSmall igGCregs; // set of registers with live GC refs
 #endif                     // !(REGMASK_BITS <= 32)
 
-    union {
+    union
+    {
         BYTE*                    igData;   // addr of instruction descriptors
         insPlaceholderGroupData* igPhData; // when igFlags & IGF_PLACEHOLDER
     };
@@ -428,8 +431,8 @@ struct emitLclVarAddr
     //
 protected:
     unsigned _lvaVarNum : 15; // Usually the lvaVarNum
-    unsigned _lvaExtra : 15;  // Usually the lvaOffset
-    unsigned _lvaTag : 2;     // tag field to support larger varnums
+    unsigned _lvaExtra  : 15; // Usually the lvaOffset
+    unsigned _lvaTag    : 2;  // tag field to support larger varnums
 };
 
 enum idAddrUnionTag
@@ -513,7 +516,7 @@ class emitter
 #ifdef TARGET_AMD64
         OPSZP = OPSZ8,
 #else
-        OPSZP         = OPSZ4,
+        OPSZP = OPSZ4,
 #endif
     };
 
@@ -522,7 +525,7 @@ class emitter
     static const emitAttr emitSizeDecode[];
 
     static emitter::opSize emitEncodeSize(emitAttr size);
-    static emitAttr emitDecodeSize(emitter::opSize ensz);
+    static emitAttr        emitDecodeSize(emitter::opSize ensz);
 
     // Currently, we only allow one IG for the prolog
     bool emitIGisInProlog(const insGroup* ig)
@@ -535,8 +538,6 @@ class emitter
         return (ig != nullptr) && ((ig->igFlags & IGF_EPILOG) != 0);
     }
 
-#if defined(FEATURE_EH_FUNCLETS)
-
     bool emitIGisInFuncletProlog(const insGroup* ig)
     {
         return (ig != nullptr) && ((ig->igFlags & IGF_FUNCLET_PROLOG) != 0);
@@ -547,8 +548,6 @@ class emitter
         return (ig != nullptr) && ((ig->igFlags & IGF_FUNCLET_EPILOG) != 0);
     }
 
-#endif // FEATURE_EH_FUNCLETS
-
     void emitRecomputeIGoffsets();
 
     void emitDispCommentForHandle(size_t handle, size_t cookie, GenTreeFlags flags);
@@ -570,10 +569,10 @@ class emitter
 
 #ifdef TARGET_XARCH
 
-#define AM_DISP_BITS ((sizeof(unsigned) * 8) - 2 * (REGNUM_BITS + 1) - 2)
+#define AM_DISP_BITS    ((sizeof(unsigned) * 8) - 2 * (REGNUM_BITS + 1) - 2)
 #define AM_DISP_BIG_VAL (-(1 << (AM_DISP_BITS - 1)))
-#define AM_DISP_MIN (-((1 << (AM_DISP_BITS - 1)) - 1))
-#define AM_DISP_MAX (+((1 << (AM_DISP_BITS - 1)) - 1))
+#define AM_DISP_MIN     (-((1 << (AM_DISP_BITS - 1)) - 1))
+#define AM_DISP_MAX     (+((1 << (AM_DISP_BITS - 1)) - 1))
 
     struct emitAddrMode
     {
@@ -643,9 +642,9 @@ class emitter
         static_assert_no_msg(IF_COUNT <= 128);
         insFormat _idInsFmt : 7;
 #elif defined(TARGET_LOONGARCH64)
-        unsigned    _idCodeSize : 5; // the instruction(s) size of this instrDesc described.
+        unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described.
 #elif defined(TARGET_RISCV64)
-        unsigned    _idCodeSize : 6; // the instruction(s) size of this instrDesc described.
+        unsigned _idCodeSize : 6; // the instruction(s) size of this instrDesc described.
 #elif defined(TARGET_ARM64)
         static_assert_no_msg(IF_COUNT <= 1024);
         insFormat _idInsFmt : 10;
@@ -685,7 +684,7 @@ class emitter
         {
         }
 #elif defined(TARGET_RISCV64)
-        insFormat   idInsFmt() const
+        insFormat idInsFmt() const
         {
             NYI_RISCV64("idInsFmt-----unimplemented on RISCV64 yet----");
             return (insFormat)0;
@@ -695,7 +694,7 @@ class emitter
             NYI_RISCV64("idInsFmt-----unimplemented on RISCV64 yet----");
         }
 #else
-        insFormat   idInsFmt() const
+        insFormat idInsFmt() const
         {
             return _idInsFmt;
         }
@@ -721,7 +720,7 @@ class emitter
     private:
 #if defined(TARGET_XARCH)
         unsigned _idCodeSize : 4; // size of instruction in bytes. Max size of an Intel instruction is 15 bytes.
-        opSize   _idOpSize : 3;   // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16, 5=32
+        opSize   _idOpSize   : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16, 5=32
                                   // At this point we have fully consumed first DWORD so that next field
                                   // doesn't cross a byte boundary.
 #elif defined(TARGET_ARM64)
@@ -730,7 +729,7 @@ class emitter
 #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 /* _idOpSize defined below. */
 #else
-        opSize    _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8
+        opSize _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8
 #endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
 
         // On Amd64, this is where the second DWORD begins
@@ -763,9 +762,9 @@ class emitter
         // loongarch64: 28 bits
         // risc-v:      28 bits
 
-        unsigned _idSmallDsc : 1;  // is this a "small" descriptor?
-        unsigned _idLargeCns : 1;  // does a large constant     follow?
-        unsigned _idLargeDsp : 1;  // does a large displacement follow?
+        unsigned _idSmallDsc  : 1; // is this a "small" descriptor?
+        unsigned _idLargeCns  : 1; // does a large constant     follow?
+        unsigned _idLargeDsp  : 1; // does a large displacement follow?
         unsigned _idLargeCall : 1; // large call descriptor used
 
         // We have several pieces of information we need to encode but which are only applicable
@@ -776,15 +775,15 @@ class emitter
         unsigned _idCustom2 : 1;
         unsigned _idCustom3 : 1;
 
-#define _idBound _idCustom1 /* jump target / frame offset bound */
-#define _idTlsGD _idCustom2 /* Used to store information related to TLS GD access on linux */
-#define _idNoGC _idCustom3  /* Some helpers don't get recorded in GC tables */
+#define _idBound          _idCustom1 /* jump target / frame offset bound */
+#define _idTlsGD          _idCustom2 /* Used to store information related to TLS GD access on linux */
+#define _idNoGC           _idCustom3 /* Some helpers don't get recorded in GC tables */
 #define _idEvexAaaContext (_idCustom3 << 2) | (_idCustom2 << 1) | _idCustom1 /* bits used for the EVEX.aaa context */
 
 #if !defined(TARGET_ARMARCH)
         unsigned _idCustom4 : 1;
 
-#define _idCallRegPtr _idCustom4   /* IL indirect calls : addr in reg */
+#define _idCallRegPtr   _idCustom4 /* IL indirect calls : addr in reg */
 #define _idEvexZContext _idCustom4 /* bits used for the EVEX.z context */
 #endif                             // !TARGET_ARMARCH
 
@@ -797,17 +796,16 @@ class emitter
 #endif //  TARGET_XARCH
 
 #ifdef TARGET_ARM64
-
-        unsigned _idLclVar : 1;    // access a local on stack
-        unsigned _idLclVarPair : 1 // carries information for 2 GC lcl vars.
+        unsigned _idLclVar     : 1; // access a local on stack
+        unsigned _idLclVarPair : 1; // carries information for 2 GC lcl vars.
 #endif
 
 #ifdef TARGET_LOONGARCH64
-            // TODO-LoongArch64: maybe delete on future.
-            opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16
-        insOpts    _idInsOpt : 6; // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the
-                                  // accessing a local on stack.
-        unsigned _idLclVar : 1;   // access a local on stack.
+        // TODO-LoongArch64: maybe delete on future.
+        opSize  _idOpSize : 3;  // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16
+        insOpts _idInsOpt : 6;  // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the
+                                // accessing a local on stack.
+        unsigned _idLclVar : 1; // access a local on stack.
 #endif
 
 #ifdef TARGET_RISCV64
@@ -818,11 +816,11 @@ class emitter
 #endif
 
 #ifdef TARGET_ARM
-        insSize  _idInsSize : 2;   // size of instruction: 16, 32 or 48 bits
-        insFlags _idInsFlags : 1;  // will this instruction set the flags
-        unsigned _idLclVar : 1;    // access a local on stack
+        insSize  _idInsSize   : 2; // size of instruction: 16, 32 or 48 bits
+        insFlags _idInsFlags  : 1; // will this instruction set the flags
+        unsigned _idLclVar    : 1; // access a local on stack
         unsigned _idLclFPBase : 1; // access a local on stack - SP based offset
-        insOpts  _idInsOpt : 3;    // options for Load/Store instructions
+        insOpts  _idInsOpt    : 3; // options for Load/Store instructions
 #endif
 
         ////////////////////////////////////////////////////////////////////////
@@ -838,7 +836,6 @@ class emitter
         // How many bits have been used beyond the first 32?
         // Define ID_EXTRA_BITFIELD_BITS to that number.
         //
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_ARM)
 #define ID_EXTRA_BITFIELD_BITS (16)
@@ -866,7 +863,6 @@ class emitter
         // All instrDesc types are <= 56 bytes, but we also need m_debugInfoSize,
         // which is pointer sized, so 5 bits are required on 64-bit and 4 bits
         // on 32-bit.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef HOST_64BIT
         unsigned _idScaledPrevOffset : 5;
@@ -888,11 +884,10 @@ class emitter
         // arm64:       60/55 bits
         // loongarch64: 53/48 bits
         // risc-v:      53/48 bits
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #define ID_EXTRA_BITS (ID_EXTRA_RELOC_BITS + ID_EXTRA_BITFIELD_BITS + ID_EXTRA_PREV_OFFSET_BITS)
 
-/* Use whatever bits are left over for small constants */
+        /* Use whatever bits are left over for small constants */
 
 #define ID_BIT_SMALL_CNS (32 - ID_EXTRA_BITS)
         C_ASSERT(ID_BIT_SMALL_CNS > 0);
@@ -905,7 +900,6 @@ class emitter
         // arm64:        4/9 bits
         // loongarch64: 11/16 bits
         // risc-v:      11/16 bits
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #define ID_ADJ_SMALL_CNS (int)(1 << (ID_BIT_SMALL_CNS - 1))
 #define ID_CNT_SMALL_CNS (int)(1 << ID_BIT_SMALL_CNS)
@@ -930,7 +924,6 @@ class emitter
         //
         // SMALL_IDSC_SIZE is this size, in bytes.
         //
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #define SMALL_IDSC_SIZE 8
 
@@ -947,13 +940,13 @@ class emitter
         }
 
     private:
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
         void checkSizes();
 
-        union idAddrUnion {
-// TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts
-// about reading what we think is here, to avoid unexpected corruption issues.
+        union idAddrUnion
+        {
+            // TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts
+            // about reading what we think is here, to avoid unexpected corruption issues.
 
 #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64)
             emitLclVarAddr iiaLclVar;
@@ -1458,25 +1451,36 @@ class emitter
             assert(!idIsSmallDsc());
             idAddr()->_idRegBit = val ? 1 : 0;
         }
-        bool idOptionalShift() const
+        insSvePattern idSvePattern() const
         {
             assert(!idIsSmallDsc());
-            return (idAddr()->_idRegBit == 1);
+            return (idAddr()->_idSvePattern);
         }
-        void idOptionalShift(bool val)
+        void idSvePattern(insSvePattern idSvePattern)
         {
             assert(!idIsSmallDsc());
-            idAddr()->_idRegBit = val ? 1 : 0;
+            idAddr()->_idSvePattern = idSvePattern;
         }
-        insSvePattern idSvePattern() const
+        insSvePrfop idSvePrfop() const
         {
             assert(!idIsSmallDsc());
-            return (idAddr()->_idSvePattern);
+            return (insSvePrfop)(idAddr()->_idReg4);
         }
-        void idSvePattern(insSvePattern idSvePattern)
+        void idSvePrfop(insSvePrfop idSvePrfop)
         {
             assert(!idIsSmallDsc());
-            idAddr()->_idSvePattern = idSvePattern;
+            idAddr()->_idReg4 = (regNumber)idSvePrfop;
+        }
+        bool idHasShift() const
+        {
+            return !idIsSmallDsc() && (idAddr()->_idRegBit == 1);
+        }
+        void idHasShift(bool val)
+        {
+            if (!idIsSmallDsc())
+            {
+                idAddr()->_idRegBit = val ? 1 : 0;
+            }
         }
 #endif // TARGET_ARM64
 
@@ -1846,137 +1850,137 @@ class emitter
 
 #define PERFSCORE_THROUGHPUT_1C 1.0f // Single Issue
 
-#define PERFSCORE_THROUGHPUT_2C 2.0f     // slower - 2 cycles
-#define PERFSCORE_THROUGHPUT_3C 3.0f     // slower - 3 cycles
-#define PERFSCORE_THROUGHPUT_4C 4.0f     // slower - 4 cycles
-#define PERFSCORE_THROUGHPUT_5C 5.0f     // slower - 5 cycles
-#define PERFSCORE_THROUGHPUT_6C 6.0f     // slower - 6 cycles
-#define PERFSCORE_THROUGHPUT_7C 7.0f     // slower - 7 cycles
-#define PERFSCORE_THROUGHPUT_8C 8.0f     // slower - 8 cycles
-#define PERFSCORE_THROUGHPUT_9C 9.0f     // slower - 9 cycles
-#define PERFSCORE_THROUGHPUT_10C 10.0f   // slower - 10 cycles
-#define PERFSCORE_THROUGHPUT_11C 10.0f   // slower - 10 cycles
-#define PERFSCORE_THROUGHPUT_13C 13.0f   // slower - 13 cycles
-#define PERFSCORE_THROUGHPUT_14C 14.0f   // slower - 13 cycles
-#define PERFSCORE_THROUGHPUT_16C 16.0f   // slower - 13 cycles
-#define PERFSCORE_THROUGHPUT_19C 19.0f   // slower - 19 cycles
-#define PERFSCORE_THROUGHPUT_25C 25.0f   // slower - 25 cycles
-#define PERFSCORE_THROUGHPUT_33C 33.0f   // slower - 33 cycles
-#define PERFSCORE_THROUGHPUT_50C 50.0f   // slower - 50 cycles
-#define PERFSCORE_THROUGHPUT_52C 52.0f   // slower - 52 cycles
-#define PERFSCORE_THROUGHPUT_57C 57.0f   // slower - 57 cycles
+#define PERFSCORE_THROUGHPUT_2C   2.0f   // slower - 2 cycles
+#define PERFSCORE_THROUGHPUT_3C   3.0f   // slower - 3 cycles
+#define PERFSCORE_THROUGHPUT_4C   4.0f   // slower - 4 cycles
+#define PERFSCORE_THROUGHPUT_5C   5.0f   // slower - 5 cycles
+#define PERFSCORE_THROUGHPUT_6C   6.0f   // slower - 6 cycles
+#define PERFSCORE_THROUGHPUT_7C   7.0f   // slower - 7 cycles
+#define PERFSCORE_THROUGHPUT_8C   8.0f   // slower - 8 cycles
+#define PERFSCORE_THROUGHPUT_9C   9.0f   // slower - 9 cycles
+#define PERFSCORE_THROUGHPUT_10C  10.0f  // slower - 10 cycles
+#define PERFSCORE_THROUGHPUT_11C  10.0f  // slower - 10 cycles
+#define PERFSCORE_THROUGHPUT_13C  13.0f  // slower - 13 cycles
+#define PERFSCORE_THROUGHPUT_14C  14.0f  // slower - 13 cycles
+#define PERFSCORE_THROUGHPUT_16C  16.0f  // slower - 13 cycles
+#define PERFSCORE_THROUGHPUT_19C  19.0f  // slower - 19 cycles
+#define PERFSCORE_THROUGHPUT_25C  25.0f  // slower - 25 cycles
+#define PERFSCORE_THROUGHPUT_33C  33.0f  // slower - 33 cycles
+#define PERFSCORE_THROUGHPUT_50C  50.0f  // slower - 50 cycles
+#define PERFSCORE_THROUGHPUT_52C  52.0f  // slower - 52 cycles
+#define PERFSCORE_THROUGHPUT_57C  57.0f  // slower - 57 cycles
 #define PERFSCORE_THROUGHPUT_140C 140.0f // slower - 140 cycles
 
 #define PERFSCORE_LATENCY_ILLEGAL -1024.0f
 
 #define PERFSCORE_LATENCY_ZERO 0.0f
-#define PERFSCORE_LATENCY_1C 1.0f
-#define PERFSCORE_LATENCY_2C 2.0f
-#define PERFSCORE_LATENCY_3C 3.0f
-#define PERFSCORE_LATENCY_4C 4.0f
-#define PERFSCORE_LATENCY_5C 5.0f
-#define PERFSCORE_LATENCY_6C 6.0f
-#define PERFSCORE_LATENCY_7C 7.0f
-#define PERFSCORE_LATENCY_8C 8.0f
-#define PERFSCORE_LATENCY_9C 9.0f
-#define PERFSCORE_LATENCY_10C 10.0f
-#define PERFSCORE_LATENCY_11C 11.0f
-#define PERFSCORE_LATENCY_12C 12.0f
-#define PERFSCORE_LATENCY_13C 13.0f
-#define PERFSCORE_LATENCY_14C 14.0f
-#define PERFSCORE_LATENCY_15C 15.0f
-#define PERFSCORE_LATENCY_16C 16.0f
-#define PERFSCORE_LATENCY_18C 18.0f
-#define PERFSCORE_LATENCY_20C 20.0f
-#define PERFSCORE_LATENCY_22C 22.0f
-#define PERFSCORE_LATENCY_23C 23.0f
-#define PERFSCORE_LATENCY_26C 26.0f
-#define PERFSCORE_LATENCY_62C 62.0f
-#define PERFSCORE_LATENCY_69C 69.0f
+#define PERFSCORE_LATENCY_1C   1.0f
+#define PERFSCORE_LATENCY_2C   2.0f
+#define PERFSCORE_LATENCY_3C   3.0f
+#define PERFSCORE_LATENCY_4C   4.0f
+#define PERFSCORE_LATENCY_5C   5.0f
+#define PERFSCORE_LATENCY_6C   6.0f
+#define PERFSCORE_LATENCY_7C   7.0f
+#define PERFSCORE_LATENCY_8C   8.0f
+#define PERFSCORE_LATENCY_9C   9.0f
+#define PERFSCORE_LATENCY_10C  10.0f
+#define PERFSCORE_LATENCY_11C  11.0f
+#define PERFSCORE_LATENCY_12C  12.0f
+#define PERFSCORE_LATENCY_13C  13.0f
+#define PERFSCORE_LATENCY_14C  14.0f
+#define PERFSCORE_LATENCY_15C  15.0f
+#define PERFSCORE_LATENCY_16C  16.0f
+#define PERFSCORE_LATENCY_18C  18.0f
+#define PERFSCORE_LATENCY_20C  20.0f
+#define PERFSCORE_LATENCY_22C  22.0f
+#define PERFSCORE_LATENCY_23C  23.0f
+#define PERFSCORE_LATENCY_26C  26.0f
+#define PERFSCORE_LATENCY_62C  62.0f
+#define PERFSCORE_LATENCY_69C  69.0f
 #define PERFSCORE_LATENCY_140C 140.0f
 #define PERFSCORE_LATENCY_400C 400.0f // Intel microcode issue with these instructions
 
-#define PERFSCORE_LATENCY_BRANCH_DIRECT 1.0f   // cost of an unconditional branch
-#define PERFSCORE_LATENCY_BRANCH_COND 2.0f     // includes cost of a possible misprediction
+#define PERFSCORE_LATENCY_BRANCH_DIRECT   1.0f // cost of an unconditional branch
+#define PERFSCORE_LATENCY_BRANCH_COND     2.0f // includes cost of a possible misprediction
 #define PERFSCORE_LATENCY_BRANCH_INDIRECT 2.0f // includes cost of a possible misprediction
 
 #if defined(TARGET_XARCH)
 
 // a read,write or modify from stack location, possible def to use latency from L0 cache
-#define PERFSCORE_LATENCY_RD_STACK PERFSCORE_LATENCY_2C
-#define PERFSCORE_LATENCY_WR_STACK PERFSCORE_LATENCY_2C
+#define PERFSCORE_LATENCY_RD_STACK    PERFSCORE_LATENCY_2C
+#define PERFSCORE_LATENCY_WR_STACK    PERFSCORE_LATENCY_2C
 #define PERFSCORE_LATENCY_RD_WR_STACK PERFSCORE_LATENCY_5C
 
 // a read, write or modify from constant location, possible def to use latency from L0 cache
-#define PERFSCORE_LATENCY_RD_CONST_ADDR PERFSCORE_LATENCY_2C
-#define PERFSCORE_LATENCY_WR_CONST_ADDR PERFSCORE_LATENCY_2C
+#define PERFSCORE_LATENCY_RD_CONST_ADDR    PERFSCORE_LATENCY_2C
+#define PERFSCORE_LATENCY_WR_CONST_ADDR    PERFSCORE_LATENCY_2C
 #define PERFSCORE_LATENCY_RD_WR_CONST_ADDR PERFSCORE_LATENCY_5C
 
 // a read, write or modify from memory location, possible def to use latency from L0 or L1 cache
 // plus an extra cost  (of 1.0) for a increased chance  of a cache miss
-#define PERFSCORE_LATENCY_RD_GENERAL PERFSCORE_LATENCY_3C
-#define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_RD_GENERAL    PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_WR_GENERAL    PERFSCORE_LATENCY_3C
 #define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_6C
 
 #elif defined(TARGET_ARM64) || defined(TARGET_ARM)
 
 // a read,write or modify from stack location, possible def to use latency from L0 cache
-#define PERFSCORE_LATENCY_RD_STACK PERFSCORE_LATENCY_3C
-#define PERFSCORE_LATENCY_WR_STACK PERFSCORE_LATENCY_1C
-#define PERFSCORE_LATENCY_RD_WR_STACK PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_RD_STACK         PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_WR_STACK         PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_WR_STACK      PERFSCORE_LATENCY_3C
 
 // a read, write or modify from constant location, possible def to use latency from L0 cache
-#define PERFSCORE_LATENCY_RD_CONST_ADDR PERFSCORE_LATENCY_3C
-#define PERFSCORE_LATENCY_WR_CONST_ADDR PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_CONST_ADDR    PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_WR_CONST_ADDR    PERFSCORE_LATENCY_1C
 #define PERFSCORE_LATENCY_RD_WR_CONST_ADDR PERFSCORE_LATENCY_3C
 
 // a read, write or modify from memory location, possible def to use latency from L0 or L1 cache
 // plus an extra cost  (of 1.0) for a increased chance  of a cache miss
-#define PERFSCORE_LATENCY_RD_GENERAL PERFSCORE_LATENCY_4C
-#define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C
-#define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C
+#define PERFSCORE_LATENCY_RD_GENERAL       PERFSCORE_LATENCY_4C
+#define PERFSCORE_LATENCY_WR_GENERAL       PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_WR_GENERAL    PERFSCORE_LATENCY_4C
 
 #elif defined(TARGET_LOONGARCH64)
 // a read,write or modify from stack location, possible def to use latency from L0 cache
-#define PERFSCORE_LATENCY_RD_STACK PERFSCORE_LATENCY_3C
-#define PERFSCORE_LATENCY_WR_STACK PERFSCORE_LATENCY_1C
-#define PERFSCORE_LATENCY_RD_WR_STACK PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_RD_STACK         PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_WR_STACK         PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_WR_STACK      PERFSCORE_LATENCY_3C
 
 // a read, write or modify from constant location, possible def to use latency from L0 cache
-#define PERFSCORE_LATENCY_RD_CONST_ADDR PERFSCORE_LATENCY_3C
-#define PERFSCORE_LATENCY_WR_CONST_ADDR PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_CONST_ADDR    PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_WR_CONST_ADDR    PERFSCORE_LATENCY_1C
 #define PERFSCORE_LATENCY_RD_WR_CONST_ADDR PERFSCORE_LATENCY_3C
 
 // a read, write or modify from memory location, possible def to use latency from L0 or L1 cache
 // plus an extra cost  (of 1.0) for a increased chance  of a cache miss
-#define PERFSCORE_LATENCY_RD_GENERAL PERFSCORE_LATENCY_4C
-#define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C
-#define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C
+#define PERFSCORE_LATENCY_RD_GENERAL       PERFSCORE_LATENCY_4C
+#define PERFSCORE_LATENCY_WR_GENERAL       PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_WR_GENERAL    PERFSCORE_LATENCY_4C
 
 #elif defined(TARGET_RISCV64)
 // a read,write or modify from stack location, possible def to use latency from L0 cache
-#define PERFSCORE_LATENCY_RD_STACK PERFSCORE_LATENCY_3C
-#define PERFSCORE_LATENCY_WR_STACK PERFSCORE_LATENCY_1C
-#define PERFSCORE_LATENCY_RD_WR_STACK PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_RD_STACK         PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_WR_STACK         PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_WR_STACK      PERFSCORE_LATENCY_3C
 
 // a read, write or modify from constant location, possible def to use latency from L0 cache
-#define PERFSCORE_LATENCY_RD_CONST_ADDR PERFSCORE_LATENCY_3C
-#define PERFSCORE_LATENCY_WR_CONST_ADDR PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_CONST_ADDR    PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_WR_CONST_ADDR    PERFSCORE_LATENCY_1C
 #define PERFSCORE_LATENCY_RD_WR_CONST_ADDR PERFSCORE_LATENCY_3C
 
 // a read, write or modify from memory location, possible def to use latency from L0 or L1 cache
 // plus an extra cost  (of 1.0) for a increased chance  of a cache miss
-#define PERFSCORE_LATENCY_RD_GENERAL PERFSCORE_LATENCY_4C
-#define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C
-#define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C
+#define PERFSCORE_LATENCY_RD_GENERAL       PERFSCORE_LATENCY_4C
+#define PERFSCORE_LATENCY_WR_GENERAL       PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_WR_GENERAL    PERFSCORE_LATENCY_4C
 
 #endif // TARGET_XXX
 
 // Make this an enum:
 //
-#define PERFSCORE_MEMORY_NONE 0
-#define PERFSCORE_MEMORY_READ 1
-#define PERFSCORE_MEMORY_WRITE 2
+#define PERFSCORE_MEMORY_NONE       0
+#define PERFSCORE_MEMORY_READ       1
+#define PERFSCORE_MEMORY_WRITE      2
 #define PERFSCORE_MEMORY_READ_WRITE 3
 
     struct insExecutionCharacteristics
@@ -2009,7 +2013,8 @@ class emitter
         instrDescJmp* idjNext; // next jump in the group/method
         insGroup*     idjIG;   // containing group
 
-        union {
+        union
+        {
             BYTE* idjAddr; // address of jump ins (for patching)
         } idjTemp;
 
@@ -2032,7 +2037,7 @@ class emitter
 #else
             30;
 #endif
-        unsigned idjShort : 1;    // is the jump known to be a short one?
+        unsigned idjShort    : 1; // is the jump known to be a short one?
         unsigned idjKeepLong : 1; // should the jump be kept long? (used for hot to cold and cold to hot jumps)
     };
 
@@ -2173,7 +2178,9 @@ class emitter
         alignas(alignof(T)) char idStorage[sizeof(T)];
 
     public:
-        inlineInstrDesc() : idDebugInfo(nullptr), idStorage()
+        inlineInstrDesc()
+            : idDebugInfo(nullptr)
+            , idStorage()
         {
             static_assert_no_msg((offsetof(inlineInstrDesc<T>, idStorage) - sizeof(instrDescDebugInfo*)) ==
                                  offsetof(inlineInstrDesc<T>, idDebugInfo));
@@ -2199,7 +2206,7 @@ class emitter
 #endif // TARGET_ARM
 
     insUpdateModes emitInsUpdateMode(instruction ins);
-    insFormat emitInsModeFormat(instruction ins, insFormat base);
+    insFormat      emitInsModeFormat(instruction ins, insFormat base);
 
     static const BYTE emitInsModeFmtTab[];
 #ifdef DEBUG
@@ -2214,7 +2221,7 @@ class emitter
     ssize_t emitGetInsDsp(instrDesc* id);
     ssize_t emitGetInsAmd(instrDesc* id);
 
-    ssize_t emitGetInsCIdisp(instrDesc* id);
+    ssize_t  emitGetInsCIdisp(instrDesc* id);
     unsigned emitGetInsCIargs(instrDesc* id);
 
     inline emitAttr emitGetMemOpSize(instrDesc* id) const;
@@ -2227,7 +2234,7 @@ class emitter
 #endif // TARGET_XARCH
 
     cnsval_ssize_t emitGetInsSC(const instrDesc* id) const;
-    unsigned emitInsCount;
+    unsigned       emitInsCount;
 
     /************************************************************************/
     /*           A few routines used for debug display purposes             */
@@ -2253,11 +2260,11 @@ class emitter
     regMaskTP  debugPrevGCrefRegs;
     regMaskTP  debugPrevByrefRegs;
     void       emitDispInsIndent();
-    void emitDispGCDeltaTitle(const char* title);
-    void emitDispGCRegDelta(const char* title, regMaskTP prevRegs, regMaskTP curRegs);
-    void emitDispGCVarDelta();
-    void emitDispRegPtrListDelta();
-    void emitDispGCInfoDelta();
+    void       emitDispGCDeltaTitle(const char* title);
+    void       emitDispGCRegDelta(const char* title, regMaskTP prevRegs, regMaskTP curRegs);
+    void       emitDispGCVarDelta();
+    void       emitDispRegPtrListDelta();
+    void       emitDispGCInfoDelta();
 
     void emitDispIGflags(unsigned flags);
     void emitDispIG(insGroup* ig,
@@ -2314,7 +2321,9 @@ class emitter
         EpilogList*  elNext;
         emitLocation elLoc;
 
-        EpilogList() : elNext(nullptr), elLoc()
+        EpilogList()
+            : elNext(nullptr)
+            , elLoc()
         {
         }
     };
@@ -2337,26 +2346,22 @@ class emitter
     void emitBegFnEpilog(insGroup* igPh);
     void emitEndFnEpilog();
 
-#if defined(FEATURE_EH_FUNCLETS)
-
     void emitBegFuncletProlog(insGroup* igPh);
     void emitEndFuncletProlog();
 
     void emitBegFuncletEpilog(insGroup* igPh);
     void emitEndFuncletEpilog();
 
-#endif // FEATURE_EH_FUNCLETS
-
     /************************************************************************/
     /*    Methods to record a code position and later convert to offset     */
     /************************************************************************/
 
-    unsigned emitFindInsNum(const insGroup* ig, const instrDesc* id) const;
+    unsigned       emitFindInsNum(const insGroup* ig, const instrDesc* id) const;
     UNATIVE_OFFSET emitFindOffset(const insGroup* ig, unsigned insNum) const;
 
-/************************************************************************/
-/*        Members and methods used to issue (encode) instructions.      */
-/************************************************************************/
+    /************************************************************************/
+    /*        Members and methods used to issue (encode) instructions.      */
+    /************************************************************************/
 
 #ifdef DEBUG
     // If we have started issuing instructions from the list of instrDesc, this is set
@@ -2447,9 +2452,9 @@ class emitter
 #endif // TARGET_LOONGARCH64 || TARGET_RISCV64
 
     instrDesc* emitFirstInstrDesc(BYTE* idData) const;
-    void emitAdvanceInstrDesc(instrDesc** id, size_t idSize) const;
-    size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp);
-    size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp);
+    void       emitAdvanceInstrDesc(instrDesc** id, size_t idSize) const;
+    size_t     emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp);
+    size_t     emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp);
 
     bool emitHasFramePtr;
 
@@ -2495,17 +2500,18 @@ class emitter
 #if defined(TARGET_XARCH)
     CORINFO_FIELD_HANDLE emitSimd32Const(simd32_t constValue);
     CORINFO_FIELD_HANDLE emitSimd64Const(simd64_t constValue);
+    CORINFO_FIELD_HANDLE emitSimdMaskConst(simdmask_t constValue);
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
     regNumber emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src);
     regNumber emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2);
-    void emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem);
-    void emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem);
-    void emitInsStoreLcl(instruction ins, emitAttr attr, GenTreeLclVarCommon* varNode);
+    void      emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem);
+    void      emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem);
+    void      emitInsStoreLcl(instruction ins, emitAttr attr, GenTreeLclVarCommon* varNode);
     insFormat emitMapFmtForIns(insFormat fmt, instruction ins);
     insFormat emitMapFmtAtoM(insFormat fmt);
-    void emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins);
-    void spillIntArgRegsToShadowSlots();
+    void      emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins);
+    void      spillIntArgRegsToShadowSlots();
 
 #ifdef TARGET_XARCH
     bool emitIsInstrWritingToReg(instrDesc* id, regNumber reg);
@@ -2543,7 +2549,6 @@ class emitter
     // instruction group depends on the instruction mix as well as DEBUG/non-DEBUG build type. See the
     // EMITTER_STATS output for various statistics related to this.
     //
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 // ARM32/64, LoongArch and RISC-V can require a bigger prolog instruction group. One scenario
@@ -2592,22 +2597,22 @@ class emitter
     // non-adaptive alignment on xarch, this points to the first align instruction of the series of align instructions.
     instrDescAlign* emitAlignLastGroup;
 
-    unsigned getLoopSize(insGroup* igLoopHeader,
+    unsigned getLoopSize(insGroup*            igLoopHeader,
                          unsigned maxLoopSize DEBUG_ARG(bool isAlignAdjusted) DEBUG_ARG(UNATIVE_OFFSET containingIGNum)
                              DEBUG_ARG(UNATIVE_OFFSET loopHeadPredIGNum)); // Get the smallest loop size
-    void emitLoopAlignment(DEBUG_ARG1(bool isPlacedBehindJmp));
-    bool emitEndsWithAlignInstr(); // Validate if newLabel is appropriate
-    bool emitSetLoopBackEdge(const BasicBlock* loopTopBlock);
+    void     emitLoopAlignment(DEBUG_ARG1(bool isPlacedBehindJmp));
+    bool     emitEndsWithAlignInstr(); // Validate if newLabel is appropriate
+    bool     emitSetLoopBackEdge(const BasicBlock* loopTopBlock);
     void     emitLoopAlignAdjustments(); // Predict if loop alignment is needed and make appropriate adjustments
-    unsigned emitCalculatePaddingForLoopAlignment(insGroup* ig,
+    unsigned emitCalculatePaddingForLoopAlignment(insGroup*     ig,
                                                   size_t offset DEBUG_ARG(bool isAlignAdjusted)
                                                       DEBUG_ARG(UNATIVE_OFFSET containingIGNum)
                                                           DEBUG_ARG(UNATIVE_OFFSET loopHeadPredIGNum));
 
-    void emitLoopAlign(unsigned paddingBytes, bool isFirstAlign DEBUG_ARG(bool isPlacedBehindJmp));
-    void emitLongLoopAlign(unsigned alignmentBoundary DEBUG_ARG(bool isPlacedBehindJmp));
+    void            emitLoopAlign(unsigned paddingBytes, bool isFirstAlign DEBUG_ARG(bool isPlacedBehindJmp));
+    void            emitLongLoopAlign(unsigned alignmentBoundary DEBUG_ARG(bool isPlacedBehindJmp));
     instrDescAlign* emitAlignInNextIG(instrDescAlign* alignInstr);
-    void emitConnectAlignInstrWithCurIG();
+    void            emitConnectAlignInstrWithCurIG();
 
 #endif
 
@@ -2680,7 +2685,7 @@ class emitter
     void emitSetSecondRetRegGCType(instrDescCGCA* id, emitAttr secondRetSize);
 #endif // MULTIREG_HAS_SECOND_GC_RET
 
-    static void emitEncodeCallGCregs(regMaskTP regs, instrDesc* id);
+    static void     emitEncodeCallGCregs(regMaskTP regs, instrDesc* id);
     static unsigned emitDecodeCallGCregs(instrDesc* id);
 
     unsigned emitNxtIGnum;
@@ -2704,8 +2709,8 @@ class emitter
 
     insGroup* emitAllocAndLinkIG();
     insGroup* emitAllocIG();
-    void emitInitIG(insGroup* ig);
-    void emitInsertIGAfter(insGroup* insertAfterIG, insGroup* ig);
+    void      emitInitIG(insGroup* ig);
+    void      emitInsertIGAfter(insGroup* insertAfterIG, insGroup* ig);
 
     void emitNewIG();
 
@@ -2720,9 +2725,9 @@ class emitter
     static bool emitJmpInstHasNoCode(instrDesc* id);
 #endif
 
-    void emitGenIG(insGroup* ig);
+    void      emitGenIG(insGroup* ig);
     insGroup* emitSavIG(bool emitAdd = false);
-    void emitNxtIG(bool extend = false);
+    void      emitNxtIG(bool extend = false);
 
 #ifdef TARGET_ARM64
     void emitRemoveLastInstruction();
@@ -2852,8 +2857,8 @@ class emitter
     // Mark this instruction group as having a label; return the new instruction group.
     // Sets the emitter's record of the currently live GC variables
     // and registers.
-    void* emitAddLabel(VARSET_VALARG_TP GCvars,
-                       regMaskTP        gcrefRegs,
+    void* emitAddLabel(VARSET_VALARG_TP    GCvars,
+                       regMaskTP           gcrefRegs,
                        regMaskTP byrefRegs DEBUG_ARG(BasicBlock* block = nullptr));
 
     // Same as above, except the label is added and is conceptually "inline" in
@@ -2861,7 +2866,7 @@ class emitter
     // continues to track GC info as if there was no label.
     void* emitAddInlineLabel();
 
-    void emitPrintLabel(const insGroup* ig) const;
+    void        emitPrintLabel(const insGroup* ig) const;
     const char* emitLabelString(const insGroup* ig) const;
 
 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
@@ -3084,15 +3089,15 @@ class emitter
         return (offs >= emitGCrFrameOffsMin) && (offs < emitGCrFrameOffsMax);
     }
 
-    static instruction emitJumpKindToIns(emitJumpKind jumpKind);
+    static instruction  emitJumpKindToIns(emitJumpKind jumpKind);
     static emitJumpKind emitInsToJumpKind(instruction ins);
     static emitJumpKind emitReverseJumpKind(emitJumpKind jumpKind);
 
 #ifdef DEBUG
 #ifndef TARGET_LOONGARCH64
     void emitInsSanityCheck(instrDesc* id);
-#endif
-#endif
+#endif // TARGET_LOONGARCH64
+#endif // DEBUG
 
 #ifdef TARGET_ARMARCH
     // Returns true if instruction "id->idIns()" writes to a register that might be used to contain a GC
@@ -3149,7 +3154,8 @@ class emitter
 
     bool emitSimpleStkUsed; // using the "simple" stack table?
 
-    union {
+    union
+    {
         struct // if emitSimpleStkUsed==true
         {
 
@@ -3197,8 +3203,8 @@ class emitter
 
 #ifdef DEBUG
     const char* emitGetFrameReg();
-    void emitDispRegSet(regMaskTP regs);
-    void emitDispVarSet();
+    void        emitDispRegSet(regMaskTP regs);
+    void        emitDispVarSet();
 #endif
 
     void emitGCregLiveUpd(GCtype gcType, regNumber reg, BYTE* addr);
@@ -3263,7 +3269,11 @@ class emitter
         UNATIVE_OFFSET dsdOffs;
         UNATIVE_OFFSET alignment; // in bytes, defaults to 4
 
-        dataSecDsc() : dsdList(nullptr), dsdLast(nullptr), dsdOffs(0), alignment(4)
+        dataSecDsc()
+            : dsdList(nullptr)
+            , dsdLast(nullptr)
+            , dsdOffs(0)
+            , alignment(4)
         {
         }
     };
@@ -3281,9 +3291,9 @@ class emitter
 
     COMP_HANDLE emitCmpHandle;
 
-/************************************************************************/
-/*               Helpers for interface to EE                            */
-/************************************************************************/
+    /************************************************************************/
+    /*               Helpers for interface to EE                            */
+    /************************************************************************/
 
 #ifdef DEBUG
 
@@ -3293,25 +3303,25 @@ class emitter
 #define emitRecordRelocationWithAddlDelta(location, target, fRelocType, addlDelta)                                     \
     emitRecordRelocationHelp(location, target, fRelocType, #fRelocType, addlDelta)
 
-    void emitRecordRelocationHelp(void*       location,       /* IN */
-                                  void*       target,         /* IN */
-                                  uint16_t    fRelocType,     /* IN */
-                                  const char* relocTypeName,  /* IN */
-                                  int32_t     addlDelta = 0); /* IN */
+    void emitRecordRelocationHelp(void*       location,      /* IN */
+                                  void*       target,        /* IN */
+                                  uint16_t    fRelocType,    /* IN */
+                                  const char* relocTypeName, /* IN */
+                                  int32_t     addlDelta = 0);    /* IN */
 
 #else // !DEBUG
 
     void emitRecordRelocationWithAddlDelta(void*    location,   /* IN */
                                            void*    target,     /* IN */
                                            uint16_t fRelocType, /* IN */
-                                           int32_t  addlDelta)  /* IN */
+                                           int32_t  addlDelta)   /* IN */
     {
         emitRecordRelocation(location, target, fRelocType, addlDelta);
     }
 
-    void emitRecordRelocation(void*    location,       /* IN */
-                              void*    target,         /* IN */
-                              uint16_t fRelocType,     /* IN */
+    void emitRecordRelocation(void*    location,      /* IN */
+                              void*    target,        /* IN */
+                              uint16_t fRelocType,    /* IN */
                               int32_t  addlDelta = 0); /* IN */
 
 #endif // !DEBUG
@@ -3331,9 +3341,9 @@ class emitter
     CORINFO_SIG_INFO* emitScratchSigInfo;
 #endif // DEBUG
 
-/************************************************************************/
-/*               Logic to collect and display statistics                */
-/************************************************************************/
+    /************************************************************************/
+    /*               Logic to collect and display statistics                */
+    /************************************************************************/
 
 #if EMITTER_STATS
 
@@ -3470,10 +3480,10 @@ class emitter
     }
 #endif // EMITTER_STATS
 
-/*************************************************************************
- *
- *  Define any target-dependent emitter members.
- */
+    /*************************************************************************
+     *
+     *  Define any target-dependent emitter members.
+     */
 
 #include "emitdef.h"
 
@@ -3988,7 +3998,8 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
         case INS_comiss:
         case INS_cvtss2sd:
         case INS_cvtss2si:
-        case INS_cvttss2si:
+        case INS_cvttss2si32:
+        case INS_cvttss2si64:
         case INS_divss:
         case INS_extractps:
         case INS_insertps:
@@ -4031,7 +4042,8 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
         case INS_comisd:
         case INS_cvtsd2si:
         case INS_cvtsd2ss:
-        case INS_cvttsd2si:
+        case INS_cvttsd2si32:
+        case INS_cvttsd2si64:
         case INS_divsd:
         case INS_maxsd:
         case INS_minsd:
diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp
index 3fa92b60d0e5..5a20f8a1f940 100644
--- a/src/coreclr/jit/emitarm.cpp
+++ b/src/coreclr/jit/emitarm.cpp
@@ -700,8 +700,8 @@ emitter::insFormat emitter::emitInsFormat(instruction ins)
 }
 
 // INST_FP is 1
-#define LD 2
-#define ST 4
+#define LD  2
+#define ST  4
 #define CMP 8
 
 // clang-format off
@@ -1708,10 +1708,10 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
  *  Add an instruction referencing a register and a constant.
  */
 
-void emitter::emitIns_R_I(instruction    ins,
-                          emitAttr       attr,
-                          regNumber      reg,
-                          target_ssize_t imm,
+void emitter::emitIns_R_I(instruction                                ins,
+                          emitAttr                                   attr,
+                          regNumber                                  reg,
+                          target_ssize_t                             imm,
                           insFlags flags /* = INS_FLAGS_DONT_CARE */ DEBUGARG(GenTreeFlags gtFlags))
 
 {
@@ -1738,7 +1738,7 @@ void emitter::emitIns_R_I(instruction    ins,
                         ins = INS_sub;
                     else // ins == INS_sub
                         ins = INS_add;
-                    imm     = -imm;
+                    imm = -imm;
                 }
                 fmt = IF_T1_J0;
                 sf  = INS_FLAGS_SET;
@@ -2607,7 +2607,7 @@ void emitter::emitIns_R_R_I(instruction ins,
                         ins = INS_sub;
                     else
                         ins = INS_add;
-                    imm     = -imm;
+                    imm = -imm;
                 }
                 fmt = IF_T1_G;
                 sf  = INS_FLAGS_SET;
@@ -2621,7 +2621,7 @@ void emitter::emitIns_R_R_I(instruction ins,
                         ins = INS_sub;
                     else
                         ins = INS_add;
-                    imm     = -imm;
+                    imm = -imm;
                 }
                 // Use Thumb-1 encoding
                 emitIns_R_I(ins, attr, reg1, imm, flags);
@@ -2982,9 +2982,9 @@ void emitter::emitIns_R_R_I(instruction ins,
                     }
                 }
             }
-        //
-        // If we did not find a thumb-1 encoding above
-        //
+            //
+            // If we did not find a thumb-1 encoding above
+            //
 
         COMMON_THUMB2_LDST:
             assert(fmt == IF_NONE);
@@ -3185,8 +3185,8 @@ void emitter::emitIns_R_R_R(instruction ins,
         case INS_mul:
             if (insMustSetFlags(flags))
             {
-                assert(reg1 !=
-                       REG_PC); // VM debugging single stepper doesn't support PC register with this instruction.
+                assert(reg1 != REG_PC); // VM debugging single stepper doesn't support PC register with this
+                                        // instruction.
                 assert(reg2 != REG_PC);
                 assert(reg3 != REG_PC);
 
@@ -4836,7 +4836,7 @@ void emitter::emitIns_Call(EmitCallType          callType,
     if (m_debugInfoSize > 0)
     {
         INDEBUG(id->idDebugOnlyInfo()->idCallSig = sigInfo);
-        id->idDebugOnlyInfo()->idMemCookie       = (size_t)methHnd; // method token
+        id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
     }
 
 #ifdef LATE_DISASM
@@ -5236,7 +5236,7 @@ unsigned emitter::emitOutput_Thumb1Instr(BYTE* dst, code_t code)
 unsigned emitter::emitOutput_Thumb2Instr(BYTE* dst, code_t code)
 {
     unsigned short word1 = (code >> 16) & 0xffff;
-    unsigned short word2 = (code)&0xffff;
+    unsigned short word2 = (code) & 0xffff;
     assert((code_t)((word1 << 16) | word2) == code);
 
 #ifdef DEBUG
@@ -5342,7 +5342,7 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
 
     if (dstOffs <= srcOffs)
     {
-/* This is a backward jump - distance is known at this point */
+        /* This is a backward jump - distance is known at this point */
 
 #if DEBUG_EMIT
         if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
@@ -5731,7 +5731,7 @@ BYTE* emitter::emitOutputIT(BYTE* dst, instruction ins, insFormat fmt, code_t co
 #endif // FEATURE_ITINSTRUCTION
 
 /*****************************************************************************
-*
+ *
  *  Append the machine code corresponding to the given instruction descriptor
  *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
  *  is the instruction group that contains the instruction. Updates '*dp' to
@@ -6504,7 +6504,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                 code |= (immHi << 16);
                 code |= immLo;
 
-                disp = abs(disp);
+                disp = std::abs(disp);
                 assert((disp & 0x00fffffe) == disp);
 
                 callInstrSize = SafeCvtAssert<unsigned char>(emitOutput_Thumb2Instr(dst, code));
@@ -6561,9 +6561,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 
             break;
 
-        /********************************************************************/
-        /*                            oops                                  */
-        /********************************************************************/
+            /********************************************************************/
+            /*                            oops                                  */
+            /********************************************************************/
 
         default:
 
diff --git a/src/coreclr/jit/emitarm.h b/src/coreclr/jit/emitarm.h
index 245196bfa183..6ae0c57dea6d 100644
--- a/src/coreclr/jit/emitarm.h
+++ b/src/coreclr/jit/emitarm.h
@@ -81,7 +81,7 @@ bool emitInsIsStore(instruction ins);
 bool emitInsIsLoadOrStore(instruction ins);
 
 emitter::insFormat emitInsFormat(instruction ins);
-emitter::code_t emitInsCode(instruction ins, insFormat fmt);
+emitter::code_t    emitInsCode(instruction ins, insFormat fmt);
 
 // Generate code for a load or store operation and handle the case
 // of contained GT_LEA op1 with [base + index<<scale + offset]
@@ -90,7 +90,7 @@ void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTr
 
 static bool IsMovInstruction(instruction ins);
 static bool isModImmConst(int imm);
-static int encodeModImmConst(int imm);
+static int  encodeModImmConst(int imm);
 
 static int insUnscaleImm(instruction ins, int imm);
 
diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp
index 296623036d8d..181b9706e416 100644
--- a/src/coreclr/jit/emitarm64.cpp
+++ b/src/coreclr/jit/emitarm64.cpp
@@ -53,9 +53,9 @@ const emitJumpKind emitReverseJumpKinds[] = {
 }
 
 /*****************************************************************************
-* Look up the jump kind for an instruction. It better be a conditional
-* branch instruction with a jump kind!
-*/
+ * Look up the jump kind for an instruction. It better be a conditional
+ * branch instruction with a jump kind!
+ */
 
 /*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins)
 {
@@ -167,7 +167,7 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) const
 #ifdef DEBUG
 /*****************************************************************************
  *
- *  The following called for each recorded instruction -- use for debugging.
+ *  The following is called for each recorded instruction -- use for debugging.
  */
 void emitter::emitInsSanityCheck(instrDesc* id)
 {
@@ -215,7 +215,15 @@ void emitter::emitInsSanityCheck(instrDesc* id)
             break;
 
         case IF_BR_1B: // BR_1B   ................ ......nnnnn.....         Rn
-            assert(isGeneralRegister(id->idReg3()));
+            if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && id->idIsTlsGD())
+            {
+                assert(isGeneralRegister(id->idReg1()));
+                assert(id->idAddr()->iiaAddr != nullptr);
+            }
+            else
+            {
+                assert(isGeneralRegister(id->idReg3()));
+            }
             break;
 
         case IF_LS_1A: // LS_1A   .X......iiiiiiii iiiiiiiiiiittttt      Rt    PC imm(1MB)
@@ -227,7 +235,7 @@ void emitter::emitInsSanityCheck(instrDesc* id)
             assert(isIntegerRegister(id->idReg1()) || // ZR
                    isVectorRegister(id->idReg1()));
             assert(isIntegerRegister(id->idReg2())); // SP
-            assert(emitGetInsSC(id) == 0);
+            assert((emitGetInsSC(id) == 0) || (id->idIsTlsGD()));
             assert(insOptsNone(id->idInsOpt()));
             break;
 
@@ -235,7 +243,7 @@ void emitter::emitInsSanityCheck(instrDesc* id)
             assert(isIntegerRegister(id->idReg1()) || // ZR
                    isVectorRegister(id->idReg1()));
             assert(isIntegerRegister(id->idReg2())); // SP
-            assert(isValidUimm12(emitGetInsSC(id)));
+            assert(isValidUimm<12>(emitGetInsSC(id)));
             assert(insOptsNone(id->idInsOpt()));
             break;
 
@@ -350,7 +358,7 @@ void emitter::emitInsSanityCheck(instrDesc* id)
         case IF_DI_1A: // DI_1A   X.......shiiiiii iiiiiinnnnn.....         Rn    imm(i12,sh)
             assert(isValidGeneralDatasize(id->idOpSize()));
             assert(isGeneralRegister(id->idReg1()));
-            assert(isValidUimm12(emitGetInsSC(id)));
+            assert(isValidUimm<12>(emitGetInsSC(id)));
             assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
             break;
 
@@ -386,7 +394,7 @@ void emitter::emitInsSanityCheck(instrDesc* id)
             assert(isValidGeneralDatasize(id->idOpSize()));
             assert(isIntegerRegister(id->idReg1())); // SP
             assert(isIntegerRegister(id->idReg2())); // SP
-            assert(isValidUimm12(emitGetInsSC(id)));
+            assert(isValidUimm<12>(emitGetInsSC(id)));
             assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
             break;
 
@@ -531,8 +539,8 @@ void emitter::emitInsSanityCheck(instrDesc* id)
             assert(emitGetInsSC(id) <= 4);
             if (insOptsLSL(id->idInsOpt()))
             {
-                assert((emitGetInsSC(id) > 0) ||
-                       (id->idReg2() == REG_ZR)); // REG_ZR encodes SP and we allow a shift of zero
+                assert((emitGetInsSC(id) > 0) || (id->idReg2() == REG_ZR)); // REG_ZR encodes SP and we allow a shift of
+                                                                            // zero
             }
             break;
 
@@ -566,7 +574,7 @@ void emitter::emitInsSanityCheck(instrDesc* id)
             elemsize = id->idOpSize();
             assert(isValidVectorElemsizeFloat(elemsize));
             assert(isVectorRegister(id->idReg1()));
-            assert(isValidUimm8(emitGetInsSC(id)));
+            assert(isValidUimm<8>(emitGetInsSC(id)));
             break;
 
         case IF_DV_1B: // DV_1B   .QX..........iii cmod..iiiiiddddd      Vd imm8    (immediate vector)
@@ -605,7 +613,7 @@ void emitter::emitInsSanityCheck(instrDesc* id)
                 }
             }
             assert(isVectorRegister(id->idReg1()));
-            assert(isValidUimm8(imm));
+            assert(isValidUimm<8>(imm));
             break;
 
         case IF_DV_1C: // DV_1C   .........X...... ......nnnnn.....      Vn #0.0    (fcmp - with zero)
@@ -943,13930 +951,2711 @@ void emitter::emitInsSanityCheck(instrDesc* id)
             assert(datasize == EA_8BYTE);
             break;
 
-        case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements
-            assert(isPredicateRegister(id->idReg1())); // DDDD
-            assert(isPredicateRegister(id->idReg2())); // NNNN
-            break;
-
-        // Scalable.
-        case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated)
-        case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated)
-        case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated)
-        case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated)
-        case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated)
-        case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector
-        case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated)
-        case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic
-        case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract
-        case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left
-                           // (predicated)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt())); // xx
-            assert(isVectorRegister(id->idReg1()));          // ddddd
-            assert(isLowPredicateRegister(id->idReg2()));    // ggg
-            assert(isVectorRegister(id->idReg3()));          // mmmmm
-            assert(isScalableVectorSize(elemsize));
+        default:
+            // fallback to check SVE instructions.
+            emitInsSveSanityCheck(id);
             break;
+    }
+}
+#endif // DEBUG
 
-        // Scalable, .S or .D.
-        case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated)
-        case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements
-            elemsize = id->idOpSize();
-            assert(insOptsScalableWords(id->idInsOpt())); // xx
-            assert(isVectorRegister(id->idReg1()));       // ddddd
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
+bool emitter::emitInsMayWriteToGCReg(instrDesc* id)
+{
+    instruction ins = id->idIns();
+    insFormat   fmt = id->idInsFmt();
 
-        // Scalable, Merge or Zero predicate.
-        case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));       // nnnnn
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // ddddd
-            assert(isScalableVectorSize(elemsize));
-            break;
+    switch (fmt)
+    {
 
-        // Scalable, with shift immediate.
-        case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));       // ddddd
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isValidVectorShiftAmount(emitGetInsSC(id), optGetSveElemsize(id->idInsOpt()), true));
-            assert(isScalableVectorSize(elemsize));
-            break;
+            // These are the formats with "destination" registers:
 
-        // Scalable Wide.
-        case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableWide(id->idInsOpt()));  // xx
-            assert(isVectorRegister(id->idReg1()));       // ddddd
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        // Scalable to/from SIMD scalar.
-        case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated)
-        case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated)
-        case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar
-        case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector
-                           // (predicated)
-        case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt())); // xx
-            assert(isVectorRegister(id->idReg1()));          // ddddd
-            assert(isLowPredicateRegister(id->idReg2()));    // ggg
-            assert(isVectorRegister(id->idReg3()));          // mmmmm
-            assert(isValidVectorElemsize(elemsize));
-            break;
+        case IF_DI_1B: // DI_1B   X........hwiiiii iiiiiiiiiiiddddd      Rd       imm(i16,hw)
+        case IF_DI_1D: // DI_1D   X........Nrrrrrr ssssss.....ddddd      Rd       imm(N,r,s)
+        case IF_DI_1E: // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd       simm21
 
-        // Scalable to FP SIMD scalar.
-        case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction
-        case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableFloat(id->idInsOpt())); // xx
-            assert(isVectorRegister(id->idReg1()));       // ddddd
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // mmmmm
-            assert(isValidVectorElemsizeSveFloat(elemsize));
-            break;
+        case IF_DI_2A: // DI_2A   X.......shiiiiii iiiiiinnnnnddddd      Rd Rn    imm(i12,sh)
+        case IF_DI_2B: // DI_2B   X.........Xnnnnn ssssssnnnnnddddd      Rd Rn    imm(0-63)
+        case IF_DI_2C: // DI_2C   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imm(N,r,s)
+        case IF_DI_2D: // DI_2D   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imr, imms   (N,r,s)
 
-        // Scalable to general register.
-        case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register
-        case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt())); // xx
-            assert(isGeneralRegister(id->idReg1()));         // ddddd
-            assert(isLowPredicateRegister(id->idReg2()));    // ggg
-            assert(isVectorRegister(id->idReg3()));          // mmmmm
-            assert(isValidScalarDatasize(elemsize));
-            break;
-
-        // Scalable, 4 regs (location of reg3 and reg4 can switch)
-        case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend
-                           // (predicated)
-        case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand
-                           // (predicated)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt())); // xx
-            assert(isVectorRegister(id->idReg1()));          // ddddd
-            assert(isLowPredicateRegister(id->idReg2()));    // ggg
-            assert(isVectorRegister(id->idReg3()));
-            assert(isVectorRegister(id->idReg4()));
-            assert(isScalableVectorSize(elemsize));
-            break;
+        case IF_DR_1D: // DR_1D   X............... cccc.......ddddd      Rd       cond
 
-        // Scalable, unpredicated
-        case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated)
-        case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
-        case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high
-                           // (unpredicated)
-        case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated)
-        case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt())); // xx
-            assert(isVectorRegister(id->idReg1()));          // ddddd
-            assert(isVectorRegister(id->idReg2()));          // nnnnn
-            assert(isVectorRegister(id->idReg3()));          // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
+        case IF_DR_2D: // DR_2D   X..........nnnnn cccc..nnnnnddddd      Rd Rn    cond
+        case IF_DR_2E: // DR_2E   X..........mmmmm ...........ddddd      Rd    Rm
+        case IF_DR_2F: // DR_2F   X.......sh.mmmmm ssssss.....ddddd      Rd    Rm {LSL,LSR,ASR} imm(0-63)
+        case IF_DR_2G: // DR_2G   X............... ......nnnnnddddd      Rd Rn
+        case IF_DR_2H: // DR_2H   X........X...... ......nnnnnddddd      Rd Rn
 
-        // Scalable, no predicates. General purpose source registers
-        case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register
-                           // increment)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt())); // xx
-            assert(isVectorRegister(id->idReg1()));          // ddddd
-            assert(isGeneralRegisterOrZR(id->idReg2()));     // nnnnn
-            assert(isGeneralRegisterOrZR(id->idReg3()));     // mmmmm
-            assert(isValidScalarDatasize(elemsize));
-            break;
+        case IF_DR_3A: // DR_3A   X..........mmmmm ......nnnnnddddd      Rd Rn Rm
+        case IF_DR_3B: // DR_3B   X.......sh.mmmmm ssssssnnnnnddddd      Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
+        case IF_DR_3C: // DR_3C   X..........mmmmm xxxsssnnnnnddddd      Rd Rn Rm ext(Rm) LSL imm(0-4)
+        case IF_DR_3D: // DR_3D   X..........mmmmm cccc..nnnnnddddd      Rd Rn Rm cond
+        case IF_DR_3E: // DR_3E   X........X.mmmmm ssssssnnnnnddddd      Rd Rn Rm imm(0-63)
 
-        case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count
-            elemsize = id->idOpSize();
-            assert(id->idInsOpt() == INS_OPTS_NONE);
-            assert(isGeneralRegister(id->idReg1()));
-            assert(elemsize == EA_8BYTE);
-            assert(isValidUimm4From1(emitGetInsSC(id)));
-            break;
+        case IF_DR_4A: // DR_4A   X..........mmmmm .aaaaannnnnddddd      Rd Rn Rm Ra
 
-        case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isPredicateRegister(id->idReg1())); // DDDD
-            assert(isPredicateRegister(id->idReg2())); // NNNN
-            assert(isPredicateRegister(id->idReg3())); // MMMM
-            break;
+        case IF_DV_2B: // DV_2B   .Q.........iiiii ......nnnnnddddd      Rd Vn[]    (umov - to general)
+        case IF_DV_2H: // DV_2H   X........X...... ......nnnnnddddd      Rd Vn      (fmov - to general)
+            return true;
 
-        case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize));
-            assert(insOptsScalableStandard(id->idInsOpt())); // xx
-            assert(isPredicateRegister(id->idReg1()));       // DDDD
-            assert(isPredicateRegister(id->idReg2()));       // NNNN
-            break;
+        case IF_DV_2C: // DV_2C   .Q.........iiiii ......nnnnnddddd      Vd Rn      (dup/ins - vector from general)
+        case IF_DV_2D: // DV_2D   .Q.........iiiii ......nnnnnddddd      Vd Vn[]    (dup - vector)
+        case IF_DV_2E: // DV_2E   ...........iiiii ......nnnnnddddd      Vd Vn[]    (dup - scalar)
+        case IF_DV_2F: // DV_2F   ...........iiiii .jjjj.nnnnnddddd      Vd[] Vn[]  (ins - element)
+        case IF_DV_2G: // DV_2G   .........X...... ......nnnnnddddd      Vd Vn      (fmov, fcvtXX - register)
+        case IF_DV_2I: // DV_2I   X........X...... ......nnnnnddddd      Vd Rn      (fmov - from general)
+        case IF_DV_2J: // DV_2J   ........SS.....D D.....nnnnnddddd      Vd Vn      (fcvt)
+        case IF_DV_2K: // DV_2K   .........X.mmmmm ......nnnnn.....      Vn Vm      (fcmp)
+        case IF_DV_2L: // DV_2L   ........XX...... ......nnnnnddddd      Vd Vn      (abs, neg - scalar)
+        case IF_DV_2M: // DV_2M   .Q......XX...... ......nnnnnddddd      Vd Vn      (abs, neg - vector)
+        case IF_DV_2P: // DV_2P   ................ ......nnnnnddddd      Vd Vn      (aes*, sha1su1) - Vd both source and
+                       // destination
 
-        case IF_SVE_CT_3A:                          // ................ ...gggnnnnnddddd -- SVE reverse doublewords
-            assert(isVectorRegister(id->idReg1())); // ddddd
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // nnnnn
-            break;
+        case IF_DV_2Q: // DV_2Q   .........X...... ......nnnnnddddd      Sd Vn      (faddp, fmaxnmp, fmaxp, fminnmp,
+                       // fminp - scalar)
+        case IF_DV_2R: // DV_2R   .Q.......X...... ......nnnnnddddd      Sd Vn      (fmaxnmv, fmaxv, fminnmv, fminv)
+        case IF_DV_2S: // DV_2S   ........XX...... ......nnnnnddddd      Sd Vn      (addp - scalar)
 
-        // Scalable, 4 regs, to predicate register.
-        case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize));
-            assert(insOptsScalableStandard(id->idInsOpt())); // xx
-            assert(isPredicateRegister(id->idReg1()));       // DDDD
-            assert(isLowPredicateRegister(id->idReg2()));    // ggg
-            assert(isVectorRegister(id->idReg3()));          // nnnnn
-            assert(isVectorRegister(id->idReg4()));          // mmmmm
-            break;
+        case IF_DV_3A:  // DV_3A   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+        case IF_DV_3AI: // DV_3AI  .Q......XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector)
+        case IF_DV_3B:  // DV_3B   .Q.......X.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+        case IF_DV_3BI: // DV_3BI  .Q.......XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by element)
+        case IF_DV_3C:  // DV_3C   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+        case IF_DV_3D:  // DV_3D   .........X.mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
+        case IF_DV_3DI: // DV_3DI  .........XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by element)
+        case IF_DV_3E:  // DV_3E   ........XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
+        case IF_DV_3EI: // DV_3EI  ........XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by element)
+        case IF_DV_3F:  // DV_3F   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+        case IF_DV_3G:  // DV_3G   .Q.........mmmmm .iiii.nnnnnddddd      Vd Vn Vm imm (vector)
+        case IF_DV_4A:  // DV_4A   .........X.mmmmm .aaaaannnnnddddd      Vd Va Vn Vm (scalar)
+            // Tracked GC pointers cannot be placed into the SIMD registers.
+            return false;
 
-        case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize));
-            assert(insOptsScalableWide(id->idInsOpt()));  // xx
-            assert(isPredicateRegister(id->idReg1()));    // DDDD
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // nnnnn
-            assert(isVectorRegister(id->idReg4()));       // mmmmm
-            break;
+            // These are the load/store formats with "target" registers:
 
-        case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize));
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isPredicateRegister(id->idReg1()));    // DDDD
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // nnnnn
-            assert(isValidSimm5(emitGetInsSC(id)));       // iiiii
-            break;
+        case IF_LS_1A: // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt    PC imm(1MB)
+        case IF_LS_2A: // LS_2A   .X.......X...... ......nnnnnttttt      Rt Rn
+        case IF_LS_2B: // LS_2B   .X.......Xiiiiii iiiiiinnnnnttttt      Rt Rn    imm(0-4095)
+        case IF_LS_2C: // LS_2C   .X.......X.iiiii iiiiP.nnnnnttttt      Rt Rn    imm(-256..+255) pre/post inc
+        case IF_LS_2D: // LS_2D   .Q.............. ....ssnnnnnttttt      Vt Rn
+        case IF_LS_2E: // LS_2E   .Q.............. ....ssnnnnnttttt      Vt Rn
+        case IF_LS_2F: // LS_2F   .Q.............. xx.Sssnnnnnttttt      Vt[] Rn
+        case IF_LS_2G: // LS_2G   .Q.............. xx.Sssnnnnnttttt      Vt[] Rn
+        case IF_LS_3A: // LS_3A   .X.......X.mmmmm xxxS..nnnnnttttt      Rt Rn Rm ext(Rm) LSL {}
+        case IF_LS_3B: // LS_3B   X............... .aaaaannnnnttttt      Rt Ra Rn
+        case IF_LS_3C: // LS_3C   X.........iiiiii iaaaaannnnnttttt      Rt Ra Rn imm(im7,sh)
+        case IF_LS_3D: // LS_3D   .X.......X.mmmmm ......nnnnnttttt      Wm Rt Rn
+        case IF_LS_3F: // LS_3F   .Q.........mmmmm ....ssnnnnnttttt      Vt Rn Rm
+        case IF_LS_3G: // LS_3G   .Q.........mmmmm ...Sssnnnnnttttt      Vt[] Rn Rm
 
-        case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize));
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isPredicateRegister(id->idReg1()));    // DDDD
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // nnnnn
-            assert(isValidUimm7(emitGetInsSC(id)));       // iiiii
-            break;
-
-        case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed)
-        case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed)
-        case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed)
-        case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
-        case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
-        case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
-        case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
-        case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
-        case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1())); // ddddd
-            assert(isVectorRegister(id->idReg2())); // nnnnn
-            assert(isVectorRegister(id->idReg3())); // mmm
-            assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7));
-            assert(isValidUimm2(emitGetInsSC(id))); // ii
-            break;
-
-        case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
-        case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
-        case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
-        case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
-        case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
-        case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
-        case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
-        case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
-        case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1())); // ddddd
-            assert(isVectorRegister(id->idReg2())); // nnnnn
-            assert(isVectorRegister(id->idReg3())); // mmm
-            assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7));
-            assert(isValidUimm3(emitGetInsSC(id))); // iii
-            break;
-
-        case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
-        case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
-        case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
-        case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_S);
-            assert(isVectorRegister(id->idReg1())); // ddddd
-            assert(isVectorRegister(id->idReg2())); // nnnnn
-            assert(isVectorRegister(id->idReg3())); // mmmm
-            assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V15));
-            assert(isValidUimm2(emitGetInsSC(id))); // ii
-            break;
-
-        case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
-        case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
-        case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
-        case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
-        case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1())); // ddddd
-            assert(isVectorRegister(id->idReg2())); // nnnnn
-            assert(isVectorRegister(id->idReg3())); // mmmm
-            assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V15));
-            assert(isValidImm1(emitGetInsSC(id))); // i
-            break;
-
-        case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(id->idReg1())); // DDDD
-            assert(isPredicateRegister(id->idReg2())); // gggg
-            assert(isPredicateRegister(id->idReg3())); // NNNN
+            // For the Store instructions the "target" register is actually a "source" value
 
-            switch (id->idIns())
+            if (emitInsIsStore(ins))
             {
-                case INS_sve_and:
-                case INS_sve_ands:
-                case INS_sve_bic:
-                case INS_sve_bics:
-                case INS_sve_eor:
-                case INS_sve_eors:
-                case INS_sve_nand:
-                case INS_sve_nands:
-                case INS_sve_nor:
-                case INS_sve_nors:
-                case INS_sve_orn:
-                case INS_sve_orns:
-                case INS_sve_orr:
-                case INS_sve_orrs:
-                case INS_sve_sel:
-                    assert(isPredicateRegister(id->idReg4())); // MMMM
-                    break;
-
-                case INS_sve_mov:
-                case INS_sve_movs:
-                case INS_sve_not:
-                case INS_sve_nots:
-                    // no fourth register
-                    break;
-
-                default:
-                    unreached();
-                    break;
+                return false;
+            }
+            else
+            {
+                assert(emitInsIsLoad(ins));
+                return true;
             }
-            break;
 
-        case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-        case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(id->idReg1())); // DDDD
-            assert(isPredicateRegister(id->idReg2())); // NNNN
-            break;
+        case IF_LS_3E: // LS_3E   .X.........mmmmm ......nnnnnttttt      Rm Rt Rn ARMv8.1 LSE Atomics
+            // ARMv8.1 Atomics
+            assert(emitInsIsStore(ins));
+            assert(emitInsIsLoad(ins));
+            return true;
 
-        case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-        case IF_SVE_DB_3A:   // ................ ..gggg.NNNNMDDDD -- SVE partition break condition
-        case IF_SVE_DB_3B:   // ................ ..gggg.NNNN.DDDD -- SVE partition break condition
-        case IF_SVE_DC_3A:   // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize));
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(id->idReg1())); // DDDD
-            assert(isPredicateRegister(id->idReg2())); // gggg
-            assert(isPredicateRegister(id->idReg3())); // NNNN
-            break;
+        case IF_SR_1A: // SR_1A   ................ ...........ttttt      Rt       (dc zva, mrs)
+            return ins == INS_mrs_tpid0;
 
-        case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize));
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(id->idReg1())); // DDDD
-            assert(isPredicateRegister(id->idReg2())); // gggg
-            assert(isPredicateRegister(id->idReg3())); // NNNN
-            assert(isPredicateRegister(id->idReg4())); // MMMM
-            break;
+        default:
+            return false;
+    }
+}
 
-        case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active
-        case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated)
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(id->idReg1())); // DDDD
-            assert(isPredicateRegister(id->idReg2())); // gggg
-            break;
+bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
+{
+    if (!id->idIsLclVar())
+        return false;
 
-        case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize));
-            assert(isPredicateRegister(id->idReg1()));       // DDDD
-            assert(insOptsScalableStandard(id->idInsOpt())); // xx
-            break;
+    instruction ins = id->idIns();
 
-        case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active
-        case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize));
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isPredicateRegister(id->idReg1())); // DDDD
-            assert(isPredicateRegister(id->idReg2())); // gggg
-            break;
+    // This list is related to the list of instructions used to store local vars in emitIns_S_R().
+    // We don't accept writing to float local vars.
 
-        case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated)
-        case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize));
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(id->idReg1())); // DDDD
-            break;
+    switch (ins)
+    {
+        case INS_strb:
+        case INS_strh:
+        case INS_str:
+        case INS_stur:
+        case INS_sturb:
+        case INS_sturh:
+            return true;
+        default:
+            return false;
+    }
+}
 
-        case IF_SVE_DK_3A: // ........xx...... ..gggg.NNNNddddd -- SVE predicate count
-            assert(id->idOpSize() == EA_8BYTE);
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isGeneralRegister(id->idReg1()));   // ddddd
-            assert(isPredicateRegister(id->idReg2())); // gggg
-            assert(isPredicateRegister(id->idReg3())); // NNNN
-            break;
+bool emitter::emitInsWritesToLclVarStackLocPair(instrDesc* id)
+{
+    if (!id->idIsLclVar())
+        return false;
 
-        case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize));
-            assert(insOptsScalableAtMaxHalf(id->idInsOpt()));
-            assert(isPredicateRegister(id->idReg1()));    // DDDD
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // nnnnn
-            assert(isVectorRegister(id->idReg4()));       // mmmmm
-            break;
+    instruction ins = id->idIns();
 
-        case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize));
-            assert(insOptsScalableFloat(id->idInsOpt()));
-            assert(isPredicateRegister(id->idReg1()));    // DDDD
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // nnnnn
-            assert(isVectorRegister(id->idReg4()));       // mmmmm
-            break;
-
-        // Scalable FP.
-        case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations
-        case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated)
-        case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations
-            elemsize = id->idOpSize();
-            assert(insOptsScalableFloat(id->idInsOpt())); // xx
-            assert(isVectorRegister(id->idReg1()));       // ddddd
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
+    // This list is related to the list of instructions used to store local vars in emitIns_S_S_R_R().
+    // We don't accept writing to float local vars.
 
-        // Scalable to Simd Vector.
-        case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords)
-        case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords)
-        case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords)
-            datasize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt())); // xx
-            assert(isVectorRegister(id->idReg1()));          // ddddd
-            assert(isLowPredicateRegister(id->idReg2()));    // ggg
-            assert(isVectorRegister(id->idReg3()));          // mmmmm
-            assert(datasize == EA_8BYTE);
-            break;
+    switch (ins)
+    {
+        case INS_stnp:
+        case INS_stp:
+            return true;
+        default:
+            return false;
+    }
+}
 
-        // Scalable FP to Simd Vector.
-        case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords)
-            datasize = id->idOpSize();
-            assert(insOptsScalableFloat(id->idInsOpt())); // xx
-            assert(isVectorRegister(id->idReg1()));       // ddddd
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // mmmmm
-            assert(datasize == EA_8BYTE);
-            break;
+bool emitter::emitInsMayWriteMultipleRegs(instrDesc* id)
+{
+    instruction ins = id->idIns();
 
-        // Scalable, widening to scalar SIMD.
-        case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableWide(id->idInsOpt()));  // xx
-            assert(isVectorRegister(id->idReg1()));       // ddddd
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // mmmmm
-            assert(isValidVectorElemsizeWidening(elemsize));
-            break;
+    switch (ins)
+    {
+        case INS_ldp:
+        case INS_ldpsw:
+        case INS_ldnp:
+            return true;
+        default:
+            return false;
+    }
+}
 
-        // Scalable, possibly FP.
-        case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated)
-            switch (id->idIns())
-            {
-                case INS_sve_fabs:
-                case INS_sve_fneg:
-                    assert(insOptsScalableFloat(id->idInsOpt())); // xx
-                    break;
+// Takes an instrDesc 'id' and uses the instruction 'ins' to determine the
+// size of the target register that is written or read by the instruction.
+// Note that even if EA_4BYTE is returned a load instruction will still
+// always zero the upper 4 bytes of the target register.
+// This method is required so that we can distinguish between loads that are
+// sign-extending as they can have two different sizes for their target register.
+// Additionally for instructions like 'ldr' and 'str' these can load/store
+// either 4 byte or 8 bytes to/from the target register.
+// By convention the small unsigned load instructions are considered to write
+// a 4 byte sized target register, though since these also zero the upper 4 bytes
+// they could equally be considered to write the unsigned value to full 8 byte register.
+//
+emitAttr emitter::emitInsTargetRegSize(instrDesc* id)
+{
+    instruction ins    = id->idIns();
+    emitAttr    result = EA_UNKNOWN;
 
-                default:
-                    assert(insOptsScalableStandard(id->idInsOpt())); // xx
-                    break;
-            }
-            elemsize = id->idOpSize();
-            assert(isVectorRegister(id->idReg1()));       // ddddd
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
+    // This is used to determine the size of the target registers for a load/store instruction
 
-        // Scalable, various sizes.
-        case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated)
-        case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements
-            switch (id->idIns())
-            {
-                case INS_sve_abs:
-                case INS_sve_neg:
-                case INS_sve_rbit:
-                    assert(insOptsScalableStandard(id->idInsOpt()));
-                    break;
-
-                case INS_sve_sxtb:
-                case INS_sve_uxtb:
-                case INS_sve_revb:
-                    assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
-                    break;
-
-                case INS_sve_sxth:
-                case INS_sve_uxth:
-                case INS_sve_revh:
-                    assert(insOptsScalableWords(id->idInsOpt()));
-                    break;
-
-                default:
-                    assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
-                    break;
-            }
-            elemsize = id->idOpSize();
-            assert(isVectorRegister(id->idReg1()));       // ddddd
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // mmmmm
-            assert(isScalableVectorSize(elemsize));
+    switch (ins)
+    {
+        case INS_ldxrb:
+        case INS_ldarb:
+        case INS_ldaprb:
+        case INS_ldaxrb:
+        case INS_stxrb:
+        case INS_stlrb:
+        case INS_stlxrb:
+        case INS_ldrb:
+        case INS_strb:
+        case INS_ldurb:
+        case INS_ldapurb:
+        case INS_sturb:
+        case INS_stlurb:
+            result = EA_4BYTE;
             break;
 
-        case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
-        case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
-            elemsize = id->idOpSize();
-            assert(isScalableVectorSize(elemsize)); // xx
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));       // ddddd
-            assert(isLowPredicateRegister(id->idReg2())); // VVV
-            assert(isVectorRegister(id->idReg3()));       // nnnnn
+        case INS_ldxrh:
+        case INS_ldarh:
+        case INS_ldaprh:
+        case INS_ldaxrh:
+        case INS_stxrh:
+        case INS_stlrh:
+        case INS_stlxrh:
+        case INS_ldrh:
+        case INS_strh:
+        case INS_ldurh:
+        case INS_sturh:
+        case INS_ldapurh:
+        case INS_stlurh:
+            result = EA_4BYTE;
             break;
 
-        // Scalable from general scalar (possibly SP)
-        case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt())); // xx
-            assert(isVectorRegister(id->idReg1()));          // ddddd
-            assert(isLowPredicateRegister(id->idReg2()));    // ggg
-            assert(isGeneralRegisterOrZR(id->idReg3()));     // mmmmm
-            assert(isValidScalarDatasize(elemsize));
+        case INS_ldrsb:
+        case INS_ldursb:
+        case INS_ldrsh:
+        case INS_ldursh:
+            if (id->idOpSize() == EA_8BYTE)
+                result = EA_8BYTE;
+            else
+                result = EA_4BYTE;
             break;
 
-        // Scalable, .H, .S or .D
-        case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long
-        case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value
-            elemsize = id->idOpSize();
-            assert(insOptsScalableAtLeastHalf(id->idInsOpt())); // xx
-            assert(isVectorRegister(id->idReg1()));             // ddddd
-            assert(isLowPredicateRegister(id->idReg2()));       // ggg
-            assert(isVectorRegister(id->idReg3()));             // mmmmm
-            assert(isScalableVectorSize(elemsize));
+        case INS_ldrsw:
+        case INS_ldursw:
+        case INS_ldpsw:
+            result = EA_8BYTE;
             break;
 
-        // Scalable, possibly fixed to .S
-        case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated)
-            elemsize = id->idOpSize();
-            switch (id->idIns())
-            {
-                case INS_sve_sqabs:
-                case INS_sve_sqneg:
-                    assert(insOptsScalableStandard(id->idInsOpt()));
-                    break;
-
-                default:
-                    assert(id->idInsOpt() == INS_OPTS_SCALABLE_S);
-                    break;
-            }
-            assert(isVectorRegister(id->idReg1()));       // ddddd
-            assert(isLowPredicateRegister(id->idReg2())); // ggg
-            assert(isVectorRegister(id->idReg3()));       // mmmmm
-            assert(isScalableVectorSize(elemsize));
+        case INS_ldp:
+        case INS_stp:
+        case INS_ldnp:
+        case INS_stnp:
+            result = id->idOpSize();
             break;
 
-        case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_H);
-            assert(isVectorRegister(id->idReg1())); // nnnn
-            assert(isVectorRegister(id->idReg2())); // ddddd
-            assert(isScalableVectorSize(id->idOpSize()));
+        case INS_ldxr:
+        case INS_ldar:
+        case INS_ldapr:
+        case INS_ldaxr:
+        case INS_stxr:
+        case INS_stlr:
+        case INS_stlxr:
+        case INS_ldr:
+        case INS_str:
+        case INS_ldur:
+        case INS_stur:
+        case INS_ldapur:
+        case INS_stlur:
+            result = id->idOpSize();
             break;
 
-        case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter)
-            assert(id->idOpSize() == EA_8BYTE);
-
-            FALLTHROUGH;
-        case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
-        case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
-            assert(isGeneralRegister(id->idReg1()));                          // ddddd
-            assert(isPredicateRegister(id->idReg2()));                        // MMMM
-            assert(isValidGeneralDatasize(id->idOpSize()));
+        default:
+            NO_WAY("unexpected instruction");
             break;
+    }
+    return result;
+}
 
-        case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
-        case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
-            assert(insOptsScalableAtLeastHalf(id->idInsOpt())); // xx
-            assert(isPredicateRegister(id->idReg1()));          // MMMM
-            assert(isVectorRegister(id->idReg2()));             // ddddd
-            assert(isScalableVectorSize(id->idOpSize()));
-            break;
+// Takes an instrDesc and uses the instruction to determine the 'size' of the
+// data that is loaded from memory.
+//
+emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
+{
+    instruction ins    = id->idIns();
+    emitAttr    result = EA_UNKNOWN;
 
-        case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise
-            break;
+    // The 'result' returned is the 'size' of the data that is loaded from memory.
 
-        case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(id->idReg1())); // NNNN
+    switch (ins)
+    {
+        case INS_ldarb:
+        case INS_ldaprb:
+        case INS_stlrb:
+        case INS_ldrb:
+        case INS_strb:
+        case INS_ldurb:
+        case INS_ldapurb:
+        case INS_sturb:
+        case INS_stlurb:
+        case INS_ldrsb:
+        case INS_ldursb:
+            result = EA_1BYTE;
             break;
 
-        case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars
-            assert(insOptsNone(id->idInsOpt()));
-            assert(isGeneralRegister(id->idReg1()));        // nnnnn
-            assert(isGeneralRegister(id->idReg2()));        // mmmmm
-            assert(isValidGeneralDatasize(id->idOpSize())); // x
+        case INS_ldarh:
+        case INS_ldaprh:
+        case INS_stlrh:
+        case INS_ldrh:
+        case INS_strh:
+        case INS_ldurh:
+        case INS_sturh:
+        case INS_ldrsh:
+        case INS_ldursh:
+        case INS_ldapurh:
+        case INS_stlurh:
+            result = EA_2BYTE;
             break;
 
-        case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1())); // nnnnn
-            assert(isVectorRegister(id->idReg2())); // ddddd
-            assert(optGetSveElemsize(id->idInsOpt()) != EA_8BYTE);
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
-                                                                              // x
+        case INS_ldrsw:
+        case INS_ldursw:
+        case INS_ldpsw:
+            result = EA_4BYTE;
             break;
 
-        case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1())); // ddddd
-            assert(isVectorRegister(id->idReg2())); // mmmmm
-            if (id->idInsOpt() == INS_OPTS_SCALABLE_S)
-            {
-                assert(id->idIns() == INS_sve_sm4e);
-            }
-            else
-            {
-                assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
-            }
-            assert(isScalableVectorSize(elemsize));
+        case INS_ldp:
+        case INS_stp:
+        case INS_ldnp:
+        case INS_stnp:
+            result = id->idOpSize();
             break;
 
-        case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1())); // ddddd
-            assert(isScalableVectorSize(elemsize));
+        case INS_ldar:
+        case INS_ldapr:
+        case INS_stlr:
+        case INS_ldr:
+        case INS_str:
+        case INS_ldur:
+        case INS_stur:
+        case INS_ldapur:
+        case INS_stlur:
+            result = id->idOpSize();
             break;
 
-        case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare
-            assert(id->idOpSize() == EA_8BYTE);
-
-            FALLTHROUGH;
-        case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isPredicateRegister(id->idReg1()));                        // DDDD
-            assert(isGeneralRegister(id->idReg2()));                          // nnnnn
-            assert(isValidGeneralDatasize(id->idOpSize()));                   // X
-            assert(isGeneralRegister(id->idReg3()));                          // mmmmm
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+        default:
+            NO_WAY("unexpected instruction");
             break;
+    }
+    return result;
+}
 
-        case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
-            assert(isValidImm1(emitGetInsSC(id))); // i
-
-            FALLTHROUGH;
-        case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isPredicateRegister(id->idReg1()));                        // DDDD
-            assert(isHighPredicateRegister(id->idReg2()));                    // NNN
-            assert(isValidUimm2(emitGetInsSC(id)));                           // ii
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
-            break;
-
-        case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate
-                           // pair)
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isLowPredicateRegister(id->idReg1()));                     // DDD
-            assert(isGeneralRegister(id->idReg2()));                          // nnnnn
-            assert(isGeneralRegister(id->idReg3()));                          // mmmmm
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
-            break;
-
-        case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit
-                           // (predicate-as-counter)
-            assert(insOptsScalableStandard(id->idInsOpt()));                  // L
-            assert(isHighPredicateRegister(id->idReg1()));                    // DDD
-            assert(isGeneralRegister(id->idReg2()));                          // nnnnn
-            assert(isGeneralRegister(id->idReg3()));                          // mmmmm
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
-            break;
-
-        case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isHighPredicateRegister(id->idReg1()));                    // DDD
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
-            break;
-
-        case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated)
-            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));                           // ddddd
-            assert(isValidUimm8(emitGetInsSC(id)));                           // iiiiiiii
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
-            break;
-
-        case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated)
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            // Size specifier must be able to fit left-shifted immediate
-            assert(insOptsScalableAtLeastHalf(id->idInsOpt()) || !id->idOptionalShift());
-            assert(isVectorRegister(id->idReg1()));                           // ddddd
-            assert(isValidSimm8(emitGetInsSC(id)));                           // iiiiiiii
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
-            break;
-
-        case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated)
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            // Size specifier must be able to fit left-shifted immediate
-            assert(insOptsScalableAtLeastHalf(id->idInsOpt()) || !id->idOptionalShift());
-            assert(isVectorRegister(id->idReg1()));                           // ddddd
-            assert(isValidUimm8(emitGetInsSC(id)));                           // iiiiiiii
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
-            break;
-
-        case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated)
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));                           // ddddd
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
-            break;
-
-        case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated)
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));                                   // ddddd
-            assert(isValidSimm8(emitGetInsSC(id)) || isValidUimm8(emitGetInsSC(id))); // iiiiiiii
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt())));         // xx
-            break;
-
-        case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated)
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));                           // ddddd
-            assert(isValidSimm8(emitGetInsSC(id)));                           // iiiiiiii
-            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
-            break;
-
-        case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
-        case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
-        case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1())); // ddddd
-            assert(isVectorRegister(id->idReg2())); // nnnnn
-            assert(isVectorRegister(id->idReg3())); // mmm
-            assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7));
-            assert(isValidUimm4(emitGetInsSC(id))); // ii rr
-            break;
-
-        case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
-        case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
-        case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1())); // ddddd
-            assert(isVectorRegister(id->idReg2())); // nnnnn
-            assert(isVectorRegister(id->idReg3())); // mmm
-            assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V15));
-            assert(isValidUimm3(emitGetInsSC(id))); // i rr
-            break;
-
-        case IF_SVE_IH_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
-                             // immediate)
-        case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
-                             // immediate)
-        case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
-                             // immediate)
-        case IF_SVE_IJ_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate)
-        case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
-                             // immediate)
-        case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
-                             // immediate)
-        case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
-                             // immediate)
-        case IF_SVE_IM_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus
-                             // immediate)
-        case IF_SVE_IO_3A:   // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus
-                             // immediate)
-        case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
-                           // immediate)
-        case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate)
-        case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
-                           // immediate)
-        case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
-                           // immediate)
-        case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-        case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-        case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate)
-            elemsize = id->idOpSize();
-            assert(insOptsScalable(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isScalableVectorSize(elemsize));
-
-            switch (id->idIns())
-            {
-                case INS_sve_ld2b:
-                case INS_sve_ld2h:
-                case INS_sve_ld2w:
-                case INS_sve_ld2d:
-                case INS_sve_ld2q:
-                case INS_sve_st2b:
-                case INS_sve_st2h:
-                case INS_sve_st2w:
-                case INS_sve_st2d:
-                case INS_sve_st2q:
-                    assert(isValidSimm4_MultipleOf2(emitGetInsSC(id))); // iiii
-                    break;
+/*****************************************************************************/
 
-                case INS_sve_ld3b:
-                case INS_sve_ld3h:
-                case INS_sve_ld3w:
-                case INS_sve_ld3d:
-                case INS_sve_ld3q:
-                case INS_sve_st3b:
-                case INS_sve_st3h:
-                case INS_sve_st3w:
-                case INS_sve_st3d:
-                case INS_sve_st3q:
-                    assert(isValidSimm4_MultipleOf3(emitGetInsSC(id))); // iiii
-                    break;
+// clang-format off
+static const char * const  xRegNames[] =
+{
+    #define REGDEF(name, rnum, mask, xname, wname) xname,
+    #include "register.h"
+};
 
-                case INS_sve_ld4b:
-                case INS_sve_ld4h:
-                case INS_sve_ld4w:
-                case INS_sve_ld4d:
-                case INS_sve_ld4q:
-                case INS_sve_st4b:
-                case INS_sve_st4h:
-                case INS_sve_st4w:
-                case INS_sve_st4d:
-                case INS_sve_st4q:
-                    assert(isValidSimm4_MultipleOf4(emitGetInsSC(id))); // iiii
-                    break;
+static const char * const  wRegNames[] =
+{
+    #define REGDEF(name, rnum, mask, xname, wname) wname,
+    #include "register.h"
+};
 
-                case INS_sve_ld1rqb:
-                case INS_sve_ld1rqd:
-                case INS_sve_ld1rqh:
-                case INS_sve_ld1rqw:
-                    assert(isValidSimm4_MultipleOf16(emitGetInsSC(id))); // iiii
-                    break;
+static const char * const  vRegNames[] =
+{
+    "v0",  "v1",  "v2",  "v3",  "v4",
+    "v5",  "v6",  "v7",  "v8",  "v9",
+    "v10", "v11", "v12", "v13", "v14",
+    "v15", "v16", "v17", "v18", "v19",
+    "v20", "v21", "v22", "v23", "v24",
+    "v25", "v26", "v27", "v28", "v29",
+    "v30", "v31"
+};
 
-                case INS_sve_ld1rob:
-                case INS_sve_ld1rod:
-                case INS_sve_ld1roh:
-                case INS_sve_ld1row:
-                    assert(isValidSimm4_MultipleOf32(emitGetInsSC(id))); // iiii
-                    break;
+static const char * const  qRegNames[] =
+{
+    "q0",  "q1",  "q2",  "q3",  "q4",
+    "q5",  "q6",  "q7",  "q8",  "q9",
+    "q10", "q11", "q12", "q13", "q14",
+    "q15", "q16", "q17", "q18", "q19",
+    "q20", "q21", "q22", "q23", "q24",
+    "q25", "q26", "q27", "q28", "q29",
+    "q30", "q31"
+};
 
-                default:
-                    assert(isValidSimm4(emitGetInsSC(id))); // iiii
-                    break;
-            }
-            break;
+static const char * const  hRegNames[] =
+{
+    "h0",  "h1",  "h2",  "h3",  "h4",
+    "h5",  "h6",  "h7",  "h8",  "h9",
+    "h10", "h11", "h12", "h13", "h14",
+    "h15", "h16", "h17", "h18", "h19",
+    "h20", "h21", "h22", "h23", "h24",
+    "h25", "h26", "h27", "h28", "h29",
+    "h30", "h31"
+};
+static const char * const  bRegNames[] =
+{
+    "b0",  "b1",  "b2",  "b3",  "b4",
+    "b5",  "b6",  "b7",  "b8",  "b9",
+    "b10", "b11", "b12", "b13", "b14",
+    "b15", "b16", "b17", "b18", "b19",
+    "b20", "b21", "b22", "b23", "b24",
+    "b25", "b26", "b27", "b28", "b29",
+    "b30", "b31"
+};
 
-        case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-            elemsize = id->idOpSize();
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isGeneralRegister(id->idReg4()));   // mmmmm
-            assert(isScalableVectorSize(elemsize));    // xx
-            // st1h is reserved for scalable B
-            assert((id->idIns() == INS_sve_st1h) ? insOptsScalableAtLeastHalf(id->idInsOpt())
-                                                 : insOptsScalableStandard(id->idInsOpt()));
-            break;
+// clang-format on
 
-        case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableWords(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isGeneralRegister(id->idReg4()));   // mmmmm
-            assert(isScalableVectorSize(elemsize));    // x
-            break;
-
-        case IF_SVE_JJ_4A:   // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
-                           // offsets)
-        case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit
-                             // unscaled offsets)
-            elemsize = id->idOpSize();
-            assert(insOptsScalable32bitExtends(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isScalableVectorSize(elemsize));
-            break;
+//------------------------------------------------------------------------
+// emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name.
+//
+// Arguments:
+//    reg - A general-purpose register or SIMD and floating-point register.
+//    size - A register size.
+//    varName - unused parameter.
+//
+// Return value:
+//    A string that represents a general-purpose register name or SIMD and floating-point scalar register name.
+//
+const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) const
+{
+    assert(reg < REG_COUNT);
 
-        case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-            imm      = emitGetInsSC(id);
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isScalableVectorSize(elemsize));    // xx
-            assert(isValidSimm4(imm));                 // iiii
-            break;
+    const char* rn = nullptr;
 
-        case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-            imm      = emitGetInsSC(id);
-            elemsize = id->idOpSize();
-            assert(insOptsScalableWords(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isScalableVectorSize(elemsize));    // x
-            assert(isValidSimm4(imm));                 // iiii
-            break;
-
-        case IF_SVE_HW_4A:   // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_IU_4A:   // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-            elemsize = id->idOpSize();
-            assert(insOptsScalable32bitExtends(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isVectorRegister(id->idReg4()));    // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_HW_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-            elemsize = id->idOpSize();
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isVectorRegister(id->idReg4()));    // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_IF_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
-                             // scalar)
-        case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
-                             // scalar)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableWords(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));      // ttttt
-            assert(isPredicateRegister(id->idReg2()));   // ggg
-            assert(isVectorRegister(id->idReg3()));      // nnnnn
-            assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar)
-        case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));      // ttttt
-            assert(isPredicateRegister(id->idReg2()));   // ggg
-            assert(isGeneralRegister(id->idReg3()));     // nnnnn
-            assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
-            elemsize = id->idOpSize();
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isGeneralRegister(id->idReg4()));   // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
-            elemsize = id->idOpSize();
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q);
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isGeneralRegister(id->idReg4()));   // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableWordsOrQuadwords(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isGeneralRegister(id->idReg4()));   // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_IK_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isGeneralRegister(id->idReg4()));   // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
-                           // scalar)
-            elemsize = id->idOpSize();
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q);
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isGeneralRegister(id->idReg4()));   // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_IU_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-            elemsize = id->idOpSize();
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isVectorRegister(id->idReg4()));    // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar)
-        case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar)
-            elemsize = id->idOpSize();
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q);
-            assert(isVectorRegister(id->idReg1()));      // ttttt
-            assert(isPredicateRegister(id->idReg2()));   // ggg
-            assert(isVectorRegister(id->idReg3()));      // nnnnn
-            assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
+    if (size == EA_8BYTE)
+    {
+        rn = xRegNames[reg];
+    }
+    else if (size == EA_4BYTE)
+    {
+        rn = wRegNames[reg];
+    }
+    else if (isVectorRegister(reg))
+    {
+        if (size == EA_16BYTE)
+        {
+            rn = qRegNames[reg - REG_V0];
+        }
+        else if (size == EA_2BYTE)
+        {
+            rn = hRegNames[reg - REG_V0];
+        }
+        else if (size == EA_1BYTE)
+        {
+            rn = bRegNames[reg - REG_V0];
+        }
+        else if (size == EA_SCALABLE)
+        {
+            rn = emitSveRegName(reg);
+        }
+    }
 
-        case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus
-                           // scalar)
-            elemsize = id->idOpSize();
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
-            assert(isVectorRegister(id->idReg1()));      // ttttt
-            assert(isPredicateRegister(id->idReg2()));   // ggg
-            assert(isVectorRegister(id->idReg3()));      // nnnnn
-            assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_IZ_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
-                             // scalar)
-        case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
-                             // scalar)
-        case IF_SVE_JA_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus
-                             // scalar)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableWords(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));      // ttttt
-            assert(isPredicateRegister(id->idReg2()));   // ggg
-            assert(isVectorRegister(id->idReg3()));      // nnnnn
-            assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
+    assert(rn != nullptr);
 
-        case IF_SVE_JD_4C:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-        case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableDoubleWordsOrQuadword(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isGeneralRegister(id->idReg4()));   // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
+    return rn;
+}
 
-        case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
-                           // scalar)
-            elemsize = id->idOpSize();
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q);
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isGeneralRegister(id->idReg4()));   // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar)
-        case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar)
-        case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar)
-        case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
-                           // scalar)
-        case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar)
-            elemsize = id->idOpSize();
-            assert(insOptsScalableStandard(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isGeneralRegister(id->idReg4()));   // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
-
-        case IF_SVE_JJ_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
-                           // offsets)
-            elemsize = id->idOpSize();
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
-            assert(isVectorRegister(id->idReg1()));    // ttttt
-            assert(isPredicateRegister(id->idReg2())); // ggg
-            assert(isGeneralRegister(id->idReg3()));   // nnnnn
-            assert(isVectorRegister(id->idReg4()));    // mmmmm
-            assert(isScalableVectorSize(elemsize));
-            break;
+//------------------------------------------------------------------------
+// emitVectorRegName: Returns a SIMD vector register name.
+//
+// Arguments:
+//    reg - A SIMD and floating-point register.
+//
+// Return value:
+//    A string  that represents a SIMD vector register name.
+//
+const char* emitter::emitVectorRegName(regNumber reg)
+{
+    assert((reg >= REG_V0) && (reg <= REG_V31));
 
-        case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated)
-            imm      = emitGetInsSC(id);
-            elemsize = id->idOpSize();
-            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));
-            assert(isLowPredicateRegister(id->idReg2()));
-            assert(isVectorRegister(id->idReg3()));
-            assert(emitIsValidEncodedRotationImm90_or_270(imm));
-            assert(isScalableVectorSize(elemsize));
-            break;
+    int index = (int)reg - (int)REG_V0;
 
-        case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated)
-            imm      = emitGetInsSC(id);
-            elemsize = id->idOpSize();
-            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));
-            assert(isLowPredicateRegister(id->idReg2()));
-            assert(isVectorRegister(id->idReg3()));
-            assert(isVectorRegister(id->idReg4()));
-            assert(emitIsValidEncodedRotationImm0_to_270(imm));
-            assert(isScalableVectorSize(elemsize));
-            break;
+    return vRegNames[index];
+}
 
-        case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero
-            elemsize = id->idOpSize();
-            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
-            assert(isPredicateRegister(id->idReg1()));
-            assert(isLowPredicateRegister(id->idReg2()));
-            assert(isVectorRegister(id->idReg3()));
-            assert(isScalableVectorSize(elemsize));
-            break;
+/*****************************************************************************
+ *
+ *  Returns the base encoding of the given CPU instruction.
+ */
 
-        case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate
-                           // (predicated)
-            imm      = emitGetInsSC(id);
-            elemsize = id->idOpSize();
-            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));
-            assert(isLowPredicateRegister(id->idReg2()));
-            assert(isScalableVectorSize(elemsize));
-            assert(emitIsValidEncodedSmallFloatImm(imm));
-            break;
+emitter::insFormat emitter::emitInsFormat(instruction ins)
+{
+    // clang-format off
+    const static insFormat insFormats[] =
+    {
+        #define INST1(id, nm, info, fmt, e1                                ) fmt,
+        #define INST2(id, nm, info, fmt, e1, e2                            ) fmt,
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        ) fmt,
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) fmt,
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) fmt,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) fmt,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt,
+        #include "instrs.h"
+        #define INST1(id, nm, info, fmt, e1                                                     ) fmt,
+        #define INST2(id, nm, info, fmt, e1, e2                                                 ) fmt,
+        #define INST3(id, nm, info, fmt, e1, e2, e3                                             ) fmt,
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                                         ) fmt,
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                     ) fmt,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                 ) fmt,
+        #define INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                             ) fmt,
+        #define INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                         ) fmt,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                     ) fmt,
+        #define INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10,e11           ) fmt,
+        #define INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13) fmt,
+        #include "instrsarm64sve.h"
+    };
+    // clang-format on
 
-        case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient
-            imm      = emitGetInsSC(id);
-            elemsize = id->idOpSize();
-            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));
-            assert(isVectorRegister(id->idReg2()));
-            assert(isValidUimm3(imm));
-            assert(isScalableVectorSize(elemsize));
-            break;
+    assert(ins < ArrLen(insFormats));
+    assert((insFormats[ins] != IF_NONE));
 
-        case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer
-            elemsize = id->idOpSize();
-            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));
-            assert(isLowPredicateRegister(id->idReg2()));
-            assert(isVectorRegister(id->idReg3()));
-            assert(isScalableVectorSize(elemsize));
-            break;
+    return insFormats[ins];
+}
 
-        case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend
-            elemsize = id->idOpSize();
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_H);
-            assert(isVectorRegister(id->idReg1()));
-            assert(isLowPredicateRegister(id->idReg2()));
-            assert(isVectorRegister(id->idReg3()));
-            assert(isVectorRegister(id->idReg4()));
-            assert(isScalableVectorSize(elemsize));
-            break;
+#define LD  1
+#define ST  2
+#define CMP 4
+#define RSH 8
+#define WID 16
+#define LNG 32
+#define NRW 64
+#define WR2 128 // writes operand 2 instead of 1
 
-        case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing
-                           // multiplicand
-            elemsize = id->idOpSize();
-            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
-            assert(isVectorRegister(id->idReg1()));
-            assert(isLowPredicateRegister(id->idReg2()));
-            assert(isVectorRegister(id->idReg3()));
-            assert(isVectorRegister(id->idReg4()));
-            assert(isScalableVectorSize(elemsize));
-            break;
+// clang-format off
+/*static*/ const BYTE CodeGenInterface::instInfo[] =
+{
+    #define INST1(id, nm, info, fmt, e1                                ) info,
+    #define INST2(id, nm, info, fmt, e1, e2                            ) info,
+    #define INST3(id, nm, info, fmt, e1, e2, e3                        ) info,
+    #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) info,
+    #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) info,
+    #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) info,
+    #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) info,
+    #include "instrs.h"
+    #define INST1(id, nm, info, fmt, e1                                                     ) info,
+    #define INST2(id, nm, info, fmt, e1, e2                                                 ) info,
+    #define INST3(id, nm, info, fmt, e1, e2, e3                                             ) info,
+    #define INST4(id, nm, info, fmt, e1, e2, e3, e4                                         ) info,
+    #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                     ) info,
+    #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                 ) info,
+    #define INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                             ) info,
+    #define INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                         ) info,
+    #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                     ) info,
+    #define INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10,e11           ) info,
+    #define INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13) info,
+    #include "instrsarm64sve.h"
+};
+// clang-format on
 
-        case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register
-        case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register
-            elemsize = id->idOpSize();
-            assert(insOptsNone(id->idInsOpt()));
-            assert(isScalableVectorSize(elemsize));
-            assert(isPredicateRegister(id->idReg1())); // TTTT
-            assert(isGeneralRegister(id->idReg2()));   // nnnnn
-            assert(isValidSimm9(emitGetInsSC(id)));    // iii
-                                                       // iiiiii
-            break;
+//------------------------------------------------------------------------
+// emitInsIsCompare: Returns true if the instruction is some kind of compare or test instruction.
+//
+bool emitter::emitInsIsCompare(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & CMP) != 0;
+    else
+        return false;
+}
 
-        case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register
-        case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register
-            elemsize = id->idOpSize();
-            assert(insOptsNone(id->idInsOpt()));
-            assert(isScalableVectorSize(elemsize));
-            assert(isVectorRegister(id->idReg1()));  // ttttt
-            assert(isGeneralRegister(id->idReg2())); // nnnnn
-            assert(isValidSimm9(emitGetInsSC(id)));  // iii
-                                                     // iiiiii
-            break;
+//------------------------------------------------------------------------
+// emitInsIsLoad: Returns true if the instruction is some kind of load instruction.
+//
+bool emitter::emitInsIsLoad(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & LD) != 0;
+    else
+        return false;
+}
 
-        default:
-            printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
-            assert(!"Unexpected format");
-            break;
-    }
+//------------------------------------------------------------------------
+// emitInsIsStore: Returns true if the instruction is some kind of store instruction.
+//
+bool emitter::emitInsIsStore(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & ST) != 0;
+    else
+        return false;
 }
-#endif // DEBUG
 
-bool emitter::emitInsMayWriteToGCReg(instrDesc* id)
+//------------------------------------------------------------------------
+// emitInsIsLoadOrStore: Returns true if the instruction is some kind of load or store instruction.
+//
+bool emitter::emitInsIsLoadOrStore(instruction ins)
 {
-    instruction ins = id->idIns();
-    insFormat   fmt = id->idInsFmt();
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & (LD | ST)) != 0;
+    else
+        return false;
+}
 
-    switch (fmt)
-    {
+//------------------------------------------------------------------------
+// emitInsIsVectorRightShift: Returns true if the instruction is ASIMD right shift.
+//
+bool emitter::emitInsIsVectorRightShift(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & RSH) != 0;
+    else
+        return false;
+}
 
-        // These are the formats with "destination" registers:
+//------------------------------------------------------------------------
+// emitInsIsVectorLong: Returns true if the instruction has the destination register that is double that of both source
+// operands. Indicated by the suffix L.
+//
+bool emitter::emitInsIsVectorLong(instruction ins)
+{
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & LNG) != 0;
+    else
+        return false;
+}
 
-        case IF_DI_1B: // DI_1B   X........hwiiiii iiiiiiiiiiiddddd      Rd       imm(i16,hw)
-        case IF_DI_1D: // DI_1D   X........Nrrrrrr ssssss.....ddddd      Rd       imm(N,r,s)
-        case IF_DI_1E: // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd       simm21
-
-        case IF_DI_2A: // DI_2A   X.......shiiiiii iiiiiinnnnnddddd      Rd Rn    imm(i12,sh)
-        case IF_DI_2B: // DI_2B   X.........Xnnnnn ssssssnnnnnddddd      Rd Rn    imm(0-63)
-        case IF_DI_2C: // DI_2C   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imm(N,r,s)
-        case IF_DI_2D: // DI_2D   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imr, imms   (N,r,s)
-
-        case IF_DR_1D: // DR_1D   X............... cccc.......ddddd      Rd       cond
-
-        case IF_DR_2D: // DR_2D   X..........nnnnn cccc..nnnnnddddd      Rd Rn    cond
-        case IF_DR_2E: // DR_2E   X..........mmmmm ...........ddddd      Rd    Rm
-        case IF_DR_2F: // DR_2F   X.......sh.mmmmm ssssss.....ddddd      Rd    Rm {LSL,LSR,ASR} imm(0-63)
-        case IF_DR_2G: // DR_2G   X............... ......nnnnnddddd      Rd Rn
-        case IF_DR_2H: // DR_2H   X........X...... ......nnnnnddddd      Rd Rn
-
-        case IF_DR_3A: // DR_3A   X..........mmmmm ......nnnnnddddd      Rd Rn Rm
-        case IF_DR_3B: // DR_3B   X.......sh.mmmmm ssssssnnnnnddddd      Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
-        case IF_DR_3C: // DR_3C   X..........mmmmm xxxsssnnnnnddddd      Rd Rn Rm ext(Rm) LSL imm(0-4)
-        case IF_DR_3D: // DR_3D   X..........mmmmm cccc..nnnnnddddd      Rd Rn Rm cond
-        case IF_DR_3E: // DR_3E   X........X.mmmmm ssssssnnnnnddddd      Rd Rn Rm imm(0-63)
-
-        case IF_DR_4A: // DR_4A   X..........mmmmm .aaaaannnnnddddd      Rd Rn Rm Ra
-
-        case IF_DV_2B: // DV_2B   .Q.........iiiii ......nnnnnddddd      Rd Vn[]    (umov - to general)
-        case IF_DV_2H: // DV_2H   X........X...... ......nnnnnddddd      Rd Vn      (fmov - to general)
-            return true;
-
-        case IF_DV_2C: // DV_2C   .Q.........iiiii ......nnnnnddddd      Vd Rn      (dup/ins - vector from general)
-        case IF_DV_2D: // DV_2D   .Q.........iiiii ......nnnnnddddd      Vd Vn[]    (dup - vector)
-        case IF_DV_2E: // DV_2E   ...........iiiii ......nnnnnddddd      Vd Vn[]    (dup - scalar)
-        case IF_DV_2F: // DV_2F   ...........iiiii .jjjj.nnnnnddddd      Vd[] Vn[]  (ins - element)
-        case IF_DV_2G: // DV_2G   .........X...... ......nnnnnddddd      Vd Vn      (fmov, fcvtXX - register)
-        case IF_DV_2I: // DV_2I   X........X...... ......nnnnnddddd      Vd Rn      (fmov - from general)
-        case IF_DV_2J: // DV_2J   ........SS.....D D.....nnnnnddddd      Vd Vn      (fcvt)
-        case IF_DV_2K: // DV_2K   .........X.mmmmm ......nnnnn.....      Vn Vm      (fcmp)
-        case IF_DV_2L: // DV_2L   ........XX...... ......nnnnnddddd      Vd Vn      (abs, neg - scalar)
-        case IF_DV_2M: // DV_2M   .Q......XX...... ......nnnnnddddd      Vd Vn      (abs, neg - vector)
-        case IF_DV_2P: // DV_2P   ................ ......nnnnnddddd      Vd Vn      (aes*, sha1su1) - Vd both source and
-                       // destination
-
-        case IF_DV_2Q: // DV_2Q   .........X...... ......nnnnnddddd      Sd Vn      (faddp, fmaxnmp, fmaxp, fminnmp,
-                       // fminp - scalar)
-        case IF_DV_2R: // DV_2R   .Q.......X...... ......nnnnnddddd      Sd Vn      (fmaxnmv, fmaxv, fminnmv, fminv)
-        case IF_DV_2S: // DV_2S   ........XX...... ......nnnnnddddd      Sd Vn      (addp - scalar)
-
-        case IF_DV_3A:  // DV_3A   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
-        case IF_DV_3AI: // DV_3AI  .Q......XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector)
-        case IF_DV_3B:  // DV_3B   .Q.......X.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
-        case IF_DV_3BI: // DV_3BI  .Q.......XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by element)
-        case IF_DV_3C:  // DV_3C   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
-        case IF_DV_3D:  // DV_3D   .........X.mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
-        case IF_DV_3DI: // DV_3DI  .........XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by element)
-        case IF_DV_3E:  // DV_3E   ........XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
-        case IF_DV_3EI: // DV_3EI  ........XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by element)
-        case IF_DV_3F:  // DV_3F   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
-        case IF_DV_3G:  // DV_3G   .Q.........mmmmm .iiii.nnnnnddddd      Vd Vn Vm imm (vector)
-        case IF_DV_4A:  // DV_4A   .........X.mmmmm .aaaaannnnnddddd      Vd Va Vn Vm (scalar)
-            // Tracked GC pointers cannot be placed into the SIMD registers.
-            return false;
-
-        // These are the load/store formats with "target" registers:
-
-        case IF_LS_1A: // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt    PC imm(1MB)
-        case IF_LS_2A: // LS_2A   .X.......X...... ......nnnnnttttt      Rt Rn
-        case IF_LS_2B: // LS_2B   .X.......Xiiiiii iiiiiinnnnnttttt      Rt Rn    imm(0-4095)
-        case IF_LS_2C: // LS_2C   .X.......X.iiiii iiiiP.nnnnnttttt      Rt Rn    imm(-256..+255) pre/post inc
-        case IF_LS_2D: // LS_2D   .Q.............. ....ssnnnnnttttt      Vt Rn
-        case IF_LS_2E: // LS_2E   .Q.............. ....ssnnnnnttttt      Vt Rn
-        case IF_LS_2F: // LS_2F   .Q.............. xx.Sssnnnnnttttt      Vt[] Rn
-        case IF_LS_2G: // LS_2G   .Q.............. xx.Sssnnnnnttttt      Vt[] Rn
-        case IF_LS_3A: // LS_3A   .X.......X.mmmmm xxxS..nnnnnttttt      Rt Rn Rm ext(Rm) LSL {}
-        case IF_LS_3B: // LS_3B   X............... .aaaaannnnnttttt      Rt Ra Rn
-        case IF_LS_3C: // LS_3C   X.........iiiiii iaaaaannnnnttttt      Rt Ra Rn imm(im7,sh)
-        case IF_LS_3D: // LS_3D   .X.......X.mmmmm ......nnnnnttttt      Wm Rt Rn
-        case IF_LS_3F: // LS_3F   .Q.........mmmmm ....ssnnnnnttttt      Vt Rn Rm
-        case IF_LS_3G: // LS_3G   .Q.........mmmmm ...Sssnnnnnttttt      Vt[] Rn Rm
-
-            // For the Store instructions the "target" register is actually a "source" value
-
-            if (emitInsIsStore(ins))
-            {
-                return false;
-            }
-            else
-            {
-                assert(emitInsIsLoad(ins));
-                return true;
-            }
-
-        case IF_LS_3E: // LS_3E   .X.........mmmmm ......nnnnnttttt      Rm Rt Rn ARMv8.1 LSE Atomics
-            // ARMv8.1 Atomics
-            assert(emitInsIsStore(ins));
-            assert(emitInsIsLoad(ins));
-            return true;
-
-        case IF_SR_1A: // SR_1A   ................ ...........ttttt      Rt       (dc zva, mrs)
-            return ins == INS_mrs_tpid0;
-
-        default:
-            return false;
-    }
-}
-
-bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
+//------------------------------------------------------------------------
+// emitInsIsVectorNarrow: Returns true if the element width of the destination register of the instruction is half that
+// of both source operands. Indicated by the suffix N.
+//
+bool emitter::emitInsIsVectorNarrow(instruction ins)
 {
-    if (!id->idIsLclVar())
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & NRW) != 0;
+    else
         return false;
-
-    instruction ins = id->idIns();
-
-    // This list is related to the list of instructions used to store local vars in emitIns_S_R().
-    // We don't accept writing to float local vars.
-
-    switch (ins)
-    {
-        case INS_strb:
-        case INS_strh:
-        case INS_str:
-        case INS_stur:
-        case INS_sturb:
-        case INS_sturh:
-            return true;
-        default:
-            return false;
-    }
 }
 
-bool emitter::emitInsWritesToLclVarStackLocPair(instrDesc* id)
+//------------------------------------------------------------------------
+// emitInsIsVectorWide: Returns true if the element width of the destination register and the first source operand of
+// the instruction is double that of the second source operand. Indicated by the suffix W.
+//
+bool emitter::emitInsIsVectorWide(instruction ins)
 {
-    if (!id->idIsLclVar())
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & WID) != 0;
+    else
         return false;
-
-    instruction ins = id->idIns();
-
-    // This list is related to the list of instructions used to store local vars in emitIns_S_S_R_R().
-    // We don't accept writing to float local vars.
-
-    switch (ins)
-    {
-        case INS_stnp:
-        case INS_stp:
-            return true;
-        default:
-            return false;
-    }
 }
 
-bool emitter::emitInsMayWriteMultipleRegs(instrDesc* id)
+//------------------------------------------------------------------------
+// emitInsDestIsOp2: Returns true if the instruction is one of the special
+// cases that has its destination register as the second register operand
+// instead of the first.
+//
+bool emitter::emitInsDestIsOp2(instruction ins)
 {
-    instruction ins = id->idIns();
-
-    switch (ins)
-    {
-        case INS_ldp:
-        case INS_ldpsw:
-        case INS_ldnp:
-            return true;
-        default:
-            return false;
-    }
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & WR2) != 0;
+    else
+        return false;
 }
 
-// Takes an instrDesc 'id' and uses the instruction 'ins' to determine the
-// size of the target register that is written or read by the instruction.
-// Note that even if EA_4BYTE is returned a load instruction will still
-// always zero the upper 4 bytes of the target register.
-// This method is required so that we can distinguish between loads that are
-// sign-extending as they can have two different sizes for their target register.
-// Additionally for instructions like 'ldr' and 'str' these can load/store
-// either 4 byte or 8 bytes to/from the target register.
-// By convention the small unsigned load instructions are considered to write
-// a 4 byte sized target register, though since these also zero the upper 4 bytes
-// they could equally be considered to write the unsigned value to full 8 byte register.
-//
-emitAttr emitter::emitInsTargetRegSize(instrDesc* id)
-{
-    instruction ins    = id->idIns();
-    emitAttr    result = EA_UNKNOWN;
+#undef LD
+#undef ST
+#undef CMP
+#undef RHS
+#undef WID
+#undef LNG
+#undef NRW
+#undef WR2
 
-    // This is used to determine the size of the target registers for a load/store instruction
+/*****************************************************************************
+ *
+ *  Returns the specific encoding of the given CPU instruction and format
+ */
 
-    switch (ins)
+emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
+{
+    // clang-format off
+    const static code_t insCodes1[] =
     {
-        case INS_ldxrb:
-        case INS_ldarb:
-        case INS_ldaprb:
-        case INS_ldaxrb:
-        case INS_stxrb:
-        case INS_stlrb:
-        case INS_stlxrb:
-        case INS_ldrb:
-        case INS_strb:
-        case INS_ldurb:
-        case INS_ldapurb:
-        case INS_sturb:
-        case INS_stlurb:
-            result = EA_4BYTE;
-            break;
-
-        case INS_ldxrh:
-        case INS_ldarh:
-        case INS_ldaprh:
-        case INS_ldaxrh:
-        case INS_stxrh:
-        case INS_stlrh:
-        case INS_stlxrh:
-        case INS_ldrh:
-        case INS_strh:
-        case INS_ldurh:
-        case INS_sturh:
-        case INS_ldapurh:
-        case INS_stlurh:
-            result = EA_4BYTE;
-            break;
-
-        case INS_ldrsb:
-        case INS_ldursb:
-        case INS_ldrsh:
-        case INS_ldursh:
-            if (id->idOpSize() == EA_8BYTE)
-                result = EA_8BYTE;
-            else
-                result = EA_4BYTE;
-            break;
-
-        case INS_ldrsw:
-        case INS_ldursw:
-        case INS_ldpsw:
-            result = EA_8BYTE;
-            break;
-
-        case INS_ldp:
-        case INS_stp:
-        case INS_ldnp:
-        case INS_stnp:
-            result = id->idOpSize();
-            break;
-
-        case INS_ldxr:
-        case INS_ldar:
-        case INS_ldapr:
-        case INS_ldaxr:
-        case INS_stxr:
-        case INS_stlr:
-        case INS_stlxr:
-        case INS_ldr:
-        case INS_str:
-        case INS_ldur:
-        case INS_stur:
-        case INS_ldapur:
-        case INS_stlur:
-            result = id->idOpSize();
-            break;
-
-        default:
-            NO_WAY("unexpected instruction");
-            break;
-    }
-    return result;
-}
+        #define INST1(id, nm, info, fmt, e1                                ) e1,
+        #define INST2(id, nm, info, fmt, e1, e2                            ) e1,
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        ) e1,
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) e1,
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e1,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e1,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e1,
+        #include "instrs.h"
+    };
+    const static code_t insCodes2[] =
+    {
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            ) e2,
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        ) e2,
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) e2,
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e2,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e2,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e2,
+        #include "instrs.h"
+    };
+    const static code_t insCodes3[] =
+    {
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        ) e3,
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) e3,
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e3,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e3,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e3,
+        #include "instrs.h"
+    };
+    const static code_t insCodes4[] =
+    {
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) e4,
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e4,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e4,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e4,
+        #include "instrs.h"
+    };
+    const static code_t insCodes5[] =
+    {
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e5,
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e5,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e5,
+        #include "instrs.h"
+    };
+    const static code_t insCodes6[] =
+    {
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e6,
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e6,
+        #include "instrs.h"
+    };
+    const static code_t insCodes7[] =
+    {
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            )
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e7,
+        #include "instrs.h"
+    };
+    const static code_t insCodes8[] =
+    {
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            )
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e8,
+        #include "instrs.h"
+    };
+    const static code_t insCodes9[] =
+    {
+        #define INST1(id, nm, info, fmt, e1                                )
+        #define INST2(id, nm, info, fmt, e1, e2                            )
+        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            )
+        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e9,
+        #include "instrs.h"
+    };
+    // clang-format on
 
-// Takes an instrDesc and uses the instruction to determine the 'size' of the
-// data that is loaded from memory.
-//
-emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
-{
-    instruction ins    = id->idIns();
-    emitAttr    result = EA_UNKNOWN;
+    const static insFormat formatEncode9[9]  = {IF_DR_2E, IF_DR_2G, IF_DI_1B, IF_DI_1D, IF_DV_3C,
+                                                IF_DV_2B, IF_DV_2C, IF_DV_2E, IF_DV_2F};
+    const static insFormat formatEncode6A[6] = {IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A, IF_DV_3A, IF_DV_3E};
+    const static insFormat formatEncode6B[6] = {IF_LS_2D, IF_LS_3F, IF_LS_2E, IF_LS_2F, IF_LS_3G, IF_LS_2G};
+    const static insFormat formatEncode5A[5] = {IF_LS_2A, IF_LS_2B, IF_LS_2C, IF_LS_3A, IF_LS_1A};
+    const static insFormat formatEncode5B[5] = {IF_DV_2G, IF_DV_2H, IF_DV_2I, IF_DV_1A, IF_DV_1B};
+    const static insFormat formatEncode5C[5] = {IF_DR_3A, IF_DR_3B, IF_DI_2C, IF_DV_3C, IF_DV_1B};
+    const static insFormat formatEncode4A[4] = {IF_LS_2A, IF_LS_2B, IF_LS_2C, IF_LS_3A};
+    const static insFormat formatEncode4B[4] = {IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A};
+    const static insFormat formatEncode4C[4] = {IF_DR_2A, IF_DR_2B, IF_DR_2C, IF_DI_1A};
+    const static insFormat formatEncode4D[4] = {IF_DV_3B, IF_DV_3D, IF_DV_3BI, IF_DV_3DI};
+    const static insFormat formatEncode4E[4] = {IF_DR_3A, IF_DR_3B, IF_DI_2C, IF_DV_3C};
+    const static insFormat formatEncode4F[4] = {IF_DR_3A, IF_DR_3B, IF_DV_3C, IF_DV_1B};
+    const static insFormat formatEncode4G[4] = {IF_DR_2E, IF_DR_2F, IF_DV_2M, IF_DV_2L};
+    const static insFormat formatEncode4H[4] = {IF_DV_3E, IF_DV_3A, IF_DV_2L, IF_DV_2M};
+    const static insFormat formatEncode4I[4] = {IF_DV_3D, IF_DV_3B, IF_DV_2G, IF_DV_2A};
+    const static insFormat formatEncode4J[4] = {IF_DV_2N, IF_DV_2O, IF_DV_3E, IF_DV_3A};
+    const static insFormat formatEncode4K[4] = {IF_DV_3E, IF_DV_3A, IF_DV_3EI, IF_DV_3AI};
+    const static insFormat formatEncode3A[3] = {IF_DR_3A, IF_DR_3B, IF_DI_2C};
+    const static insFormat formatEncode3B[3] = {IF_DR_2A, IF_DR_2B, IF_DI_1C};
+    const static insFormat formatEncode3C[3] = {IF_DR_3A, IF_DR_3B, IF_DV_3C};
+    const static insFormat formatEncode3D[3] = {IF_DV_2C, IF_DV_2D, IF_DV_2E};
+    const static insFormat formatEncode3E[3] = {IF_DV_3B, IF_DV_3BI, IF_DV_3DI};
+    const static insFormat formatEncode3F[3] = {IF_DV_2A, IF_DV_2G, IF_DV_2H};
+    const static insFormat formatEncode3G[3] = {IF_DV_2A, IF_DV_2G, IF_DV_2I};
+    const static insFormat formatEncode3H[3] = {IF_DR_3A, IF_DV_3A, IF_DV_3AI};
+    const static insFormat formatEncode3I[3] = {IF_DR_2E, IF_DR_2F, IF_DV_2M};
+    const static insFormat formatEncode3J[3] = {IF_LS_2D, IF_LS_3F, IF_LS_2E};
+    const static insFormat formatEncode2A[2] = {IF_DR_2E, IF_DR_2F};
+    const static insFormat formatEncode2B[2] = {IF_DR_3A, IF_DR_3B};
+    const static insFormat formatEncode2C[2] = {IF_DR_3A, IF_DI_2D};
+    const static insFormat formatEncode2D[2] = {IF_DR_3A, IF_DI_2B};
+    const static insFormat formatEncode2E[2] = {IF_LS_3B, IF_LS_3C};
+    const static insFormat formatEncode2F[2] = {IF_DR_2I, IF_DI_1F};
+    const static insFormat formatEncode2G[2] = {IF_DV_3B, IF_DV_3D};
+    const static insFormat formatEncode2H[2] = {IF_DV_2C, IF_DV_2F};
+    const static insFormat formatEncode2I[2] = {IF_DV_2K, IF_DV_1C};
+    const static insFormat formatEncode2J[2] = {IF_DV_2A, IF_DV_2G};
+    const static insFormat formatEncode2K[2] = {IF_DV_2M, IF_DV_2L};
+    const static insFormat formatEncode2L[2] = {IF_DR_2G, IF_DV_2M};
+    const static insFormat formatEncode2M[2] = {IF_DV_3A, IF_DV_3AI};
+    const static insFormat formatEncode2N[2] = {IF_DV_2N, IF_DV_2O};
+    const static insFormat formatEncode2O[2] = {IF_DV_3E, IF_DV_3A};
+    const static insFormat formatEncode2P[2] = {IF_DV_2Q, IF_DV_3B};
+    const static insFormat formatEncode2Q[2] = {IF_DV_2S, IF_DV_3A};
 
-    // The 'result' returned is the 'size' of the data that is loaded from memory.
+    code_t    code           = BAD_CODE;
+    insFormat insFmt         = emitInsFormat(ins);
+    bool      encoding_found = false;
+    int       index          = -1;
 
-    switch (ins)
+    switch (insFmt)
     {
-        case INS_ldarb:
-        case INS_ldaprb:
-        case INS_stlrb:
-        case INS_ldrb:
-        case INS_strb:
-        case INS_ldurb:
-        case INS_ldapurb:
-        case INS_sturb:
-        case INS_stlurb:
-        case INS_ldrsb:
-        case INS_ldursb:
-            result = EA_1BYTE;
+        case IF_EN9:
+            for (index = 0; index < 9; index++)
+            {
+                if (fmt == formatEncode9[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
             break;
 
-        case INS_ldarh:
-        case INS_ldaprh:
-        case INS_stlrh:
-        case INS_ldrh:
-        case INS_strh:
-        case INS_ldurh:
-        case INS_sturh:
-        case INS_ldrsh:
-        case INS_ldursh:
-        case INS_ldapurh:
-        case INS_stlurh:
-            result = EA_2BYTE;
+        case IF_EN6A:
+            for (index = 0; index < 6; index++)
+            {
+                if (fmt == formatEncode6A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
             break;
 
-        case INS_ldrsw:
-        case INS_ldursw:
-        case INS_ldpsw:
-            result = EA_4BYTE;
+        case IF_EN6B:
+            for (index = 0; index < 6; index++)
+            {
+                if (fmt == formatEncode6B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
             break;
 
-        case INS_ldp:
-        case INS_stp:
-        case INS_ldnp:
-        case INS_stnp:
-            result = id->idOpSize();
+        case IF_EN5A:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
             break;
 
-        case INS_ldar:
-        case INS_ldapr:
-        case INS_stlr:
-        case INS_ldr:
-        case INS_str:
-        case INS_ldur:
-        case INS_stur:
-        case INS_ldapur:
-        case INS_stlur:
-            result = id->idOpSize();
+        case IF_EN5B:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
             break;
 
-        default:
-            NO_WAY("unexpected instruction");
+        case IF_EN5C:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
             break;
-    }
-    return result;
-}
 
-/*****************************************************************************/
+        case IF_EN4A:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-// clang-format off
-static const char * const  xRegNames[] =
-{
-    #define REGDEF(name, rnum, mask, xname, wname) xname,
-    #include "register.h"
-};
+        case IF_EN4B:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-static const char * const  wRegNames[] =
-{
-    #define REGDEF(name, rnum, mask, xname, wname) wname,
-    #include "register.h"
-};
+        case IF_EN4C:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
+        case IF_EN4D:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4D[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-static const char * const  zRegNames[] =
-{
-    "z0",  "z1",  "z2",  "z3",  "z4",
-    "z5",  "z6",  "z7",  "z8",  "z9",
-    "z10", "z11", "z12", "z13", "z14",
-    "z15", "z16", "z17", "z18", "z19",
-    "z20", "z21", "z22", "z23", "z24",
-    "z25", "z26", "z27", "z28", "z29",
-    "z30", "z31"
-};
+        case IF_EN4E:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4E[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-static const char * const  vRegNames[] =
-{
-    "v0",  "v1",  "v2",  "v3",  "v4",
-    "v5",  "v6",  "v7",  "v8",  "v9",
-    "v10", "v11", "v12", "v13", "v14",
-    "v15", "v16", "v17", "v18", "v19",
-    "v20", "v21", "v22", "v23", "v24",
-    "v25", "v26", "v27", "v28", "v29",
-    "v30", "v31"
-};
+        case IF_EN4F:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4F[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-static const char * const  qRegNames[] =
-{
-    "q0",  "q1",  "q2",  "q3",  "q4",
-    "q5",  "q6",  "q7",  "q8",  "q9",
-    "q10", "q11", "q12", "q13", "q14",
-    "q15", "q16", "q17", "q18", "q19",
-    "q20", "q21", "q22", "q23", "q24",
-    "q25", "q26", "q27", "q28", "q29",
-    "q30", "q31"
-};
+        case IF_EN4G:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4G[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-static const char * const  hRegNames[] =
-{
-    "h0",  "h1",  "h2",  "h3",  "h4",
-    "h5",  "h6",  "h7",  "h8",  "h9",
-    "h10", "h11", "h12", "h13", "h14",
-    "h15", "h16", "h17", "h18", "h19",
-    "h20", "h21", "h22", "h23", "h24",
-    "h25", "h26", "h27", "h28", "h29",
-    "h30", "h31"
-};
-static const char * const  bRegNames[] =
-{
-    "b0",  "b1",  "b2",  "b3",  "b4",
-    "b5",  "b6",  "b7",  "b8",  "b9",
-    "b10", "b11", "b12", "b13", "b14",
-    "b15", "b16", "b17", "b18", "b19",
-    "b20", "b21", "b22", "b23", "b24",
-    "b25", "b26", "b27", "b28", "b29",
-    "b30", "b31"
-};
+        case IF_EN4H:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4H[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-static const char * const  pRegNames[] =
-{
-    "p0",  "p1",  "p2",  "p3",  "p4",
-    "p5",  "p6",  "p7",  "p8",  "p9",
-    "p10", "p11", "p12", "p13", "p14",
-    "p15"
-};
+        case IF_EN4I:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4I[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-static const char * const  pnRegNames[] =
-{
-    "pn0",  "pn1",  "pn2",  "pn3",  "pn4",
-    "pn5",  "pn6",  "pn7",  "pn8",  "pn9",
-    "pn10", "pn11", "pn12", "pn13", "pn14",
-    "pn15"
-};
+        case IF_EN4J:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4J[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-static const char * const  svePatternNames[] =
-{
-    "pow2", "vl1", "vl2", "vl3",
-    "vl4", "vl5", "vl6", "vl7",
-    "vl8", "vl16", "vl32", "vl64",
-    "vl128", "vl256", "invalid", "invalid",
-    "invalid", "invalid", "invalid", "invalid",
-    "invalid", "invalid", "invalid", "invalid",
-    "invalid", "invalid", "invalid", "invalid",
-    "invalid", "mul4", "mul3", "all"
-};
+        case IF_EN4K:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4K[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-// clang-format on
+        case IF_EN3A:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-//------------------------------------------------------------------------
-// emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name.
-//
-// Arguments:
-//    reg - A general-purpose register or SIMD and floating-point register.
-//    size - A register size.
-//    varName - unused parameter.
-//
-// Return value:
-//    A string that represents a general-purpose register name or SIMD and floating-point scalar register name.
-//
-const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) const
-{
-    assert(reg < REG_COUNT);
+        case IF_EN3B:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-    const char* rn = nullptr;
+        case IF_EN3C:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-    if (size == EA_8BYTE)
-    {
-        rn = xRegNames[reg];
-    }
-    else if (size == EA_4BYTE)
-    {
-        rn = wRegNames[reg];
-    }
-    else if (isVectorRegister(reg))
-    {
-        if (size == EA_16BYTE)
-        {
-            rn = qRegNames[reg - REG_V0];
-        }
-        else if (size == EA_2BYTE)
-        {
-            rn = hRegNames[reg - REG_V0];
-        }
-        else if (size == EA_1BYTE)
-        {
-            rn = bRegNames[reg - REG_V0];
-        }
-        else if (size == EA_SCALABLE)
-        {
-            rn = zRegNames[reg - REG_V0];
-        }
-    }
+        case IF_EN3D:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3D[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-    assert(rn != nullptr);
+        case IF_EN3E:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3E[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-    return rn;
-}
+        case IF_EN3F:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3F[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-//------------------------------------------------------------------------
-// emitSveRegName: Returns a scalable vector register name.
-//
-// Arguments:
-//    reg - A SIMD and floating-point register.
-//
-// Return value:
-//    A string that represents a scalable vector register name.
-//
-const char* emitter::emitSveRegName(regNumber reg)
-{
-    assert((reg >= REG_V0) && (reg <= REG_V31));
+        case IF_EN3G:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3G[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-    int index = (int)reg - (int)REG_V0;
+        case IF_EN3H:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3H[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-    return zRegNames[index];
-}
+        case IF_EN3I:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3I[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-//------------------------------------------------------------------------
-// emitVectorRegName: Returns a SIMD vector register name.
-//
-// Arguments:
-//    reg - A SIMD and floating-point register.
-//
-// Return value:
-//    A string  that represents a SIMD vector register name.
-//
-const char* emitter::emitVectorRegName(regNumber reg)
-{
-    assert((reg >= REG_V0) && (reg <= REG_V31));
+        case IF_EN3J:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3J[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-    int index = (int)reg - (int)REG_V0;
+        case IF_EN2A:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-    return vRegNames[index];
-}
+        case IF_EN2B:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-//------------------------------------------------------------------------
-// emitPredicateRegName: Returns a predicate register name.
-//
-// Arguments:
-//    reg - A predicate register.
-//
-// Return value:
-//    A string that represents a predicate register name.
-//
-const char* emitter::emitPredicateRegName(regNumber reg, PredicateType ptype)
-{
-    assert((reg >= REG_P0) && (reg <= REG_P15));
+        case IF_EN2C:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-    const int  index     = (int)reg - (int)REG_P0;
-    const bool usePnRegs = (ptype == PREDICATE_N) || (ptype == PREDICATE_N_SIZED);
+        case IF_EN2D:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2D[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-    return usePnRegs ? pnRegNames[index] : pRegNames[index];
-}
+        case IF_EN2E:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2E[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-/*****************************************************************************
- *
- *  Returns the base encoding of the given CPU instruction.
- */
+        case IF_EN2F:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2F[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-emitter::insFormat emitter::emitInsFormat(instruction ins)
-{
-    // clang-format off
-    const static insFormat insFormats[] =
-    {
-        #define INST1(id, nm, info, fmt, e1                                ) fmt,
-        #define INST2(id, nm, info, fmt, e1, e2                            ) fmt,
-        #define INST3(id, nm, info, fmt, e1, e2, e3                        ) fmt,
-        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) fmt,
-        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) fmt,
-        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) fmt,
-        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt,
-        #include "instrs.h"
-        #define INST1(id, nm, info, fmt, e1                                                     ) fmt,
-        #define INST2(id, nm, info, fmt, e1, e2                                                 ) fmt,
-        #define INST3(id, nm, info, fmt, e1, e2, e3                                             ) fmt,
-        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                                         ) fmt,
-        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                     ) fmt,
-        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                 ) fmt,
-        #define INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                             ) fmt,
-        #define INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                         ) fmt,
-        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                     ) fmt,
-        #define INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10,e11           ) fmt,
-        #define INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13) fmt,
-        #include "instrsarm64sve.h"
-    };
-    // clang-format on
+        case IF_EN2G:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2G[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-    assert(ins < ArrLen(insFormats));
-    assert((insFormats[ins] != IF_NONE));
+        case IF_EN2H:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2H[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-    return insFormats[ins];
-}
+        case IF_EN2I:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2I[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-#define LD 1
-#define ST 2
-#define CMP 4
-#define RSH 8
-#define WID 16
-#define LNG 32
-#define NRW 64
-#define WR2 128 // writes operand 2 instead of 1
+        case IF_EN2J:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2J[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-// clang-format off
-/*static*/ const BYTE CodeGenInterface::instInfo[] =
-{
-    #define INST1(id, nm, info, fmt, e1                                ) info,
-    #define INST2(id, nm, info, fmt, e1, e2                            ) info,
-    #define INST3(id, nm, info, fmt, e1, e2, e3                        ) info,
-    #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) info,
-    #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) info,
-    #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) info,
-    #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) info,
-    #include "instrs.h"
-    #define INST1(id, nm, info, fmt, e1                                                     ) info,
-    #define INST2(id, nm, info, fmt, e1, e2                                                 ) info,
-    #define INST3(id, nm, info, fmt, e1, e2, e3                                             ) info,
-    #define INST4(id, nm, info, fmt, e1, e2, e3, e4                                         ) info,
-    #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                     ) info,
-    #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                 ) info,
-    #define INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                             ) info,
-    #define INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                         ) info,
-    #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                     ) info,
-    #define INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10,e11           ) info,
-    #define INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13) info,
-    #include "instrsarm64sve.h"
-};
-// clang-format on
+        case IF_EN2K:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2K[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-//------------------------------------------------------------------------
-// emitInsIsCompare: Returns true if the instruction is some kind of compare or test instruction.
-//
-bool emitter::emitInsIsCompare(instruction ins)
-{
-    // We have pseudo ins like lea which are not included in emitInsLdStTab.
-    if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & CMP) != 0;
-    else
-        return false;
-}
+        case IF_EN2L:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2L[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-//------------------------------------------------------------------------
-// emitInsIsLoad: Returns true if the instruction is some kind of load instruction.
-//
-bool emitter::emitInsIsLoad(instruction ins)
-{
-    // We have pseudo ins like lea which are not included in emitInsLdStTab.
-    if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & LD) != 0;
-    else
-        return false;
-}
+        case IF_EN2M:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2M[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-//------------------------------------------------------------------------
-// emitInsIsStore: Returns true if the instruction is some kind of store instruction.
-//
-bool emitter::emitInsIsStore(instruction ins)
-{
-    // We have pseudo ins like lea which are not included in emitInsLdStTab.
-    if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & ST) != 0;
-    else
-        return false;
-}
+        case IF_EN2N:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2N[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-//------------------------------------------------------------------------
-// emitInsIsLoadOrStore: Returns true if the instruction is some kind of load or store instruction.
-//
-bool emitter::emitInsIsLoadOrStore(instruction ins)
-{
-    // We have pseudo ins like lea which are not included in emitInsLdStTab.
-    if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & (LD | ST)) != 0;
-    else
-        return false;
-}
+        case IF_EN2O:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2O[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-//------------------------------------------------------------------------
-// emitInsIsVectorRightShift: Returns true if the instruction is ASIMD right shift.
-//
-bool emitter::emitInsIsVectorRightShift(instruction ins)
-{
-    // We have pseudo ins like lea which are not included in emitInsLdStTab.
-    if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & RSH) != 0;
-    else
-        return false;
-}
+        case IF_EN2P:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2P[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-//------------------------------------------------------------------------
-// emitInsIsVectorLong: Returns true if the instruction has the destination register that is double that of both source
-// operands. Indicated by the suffix L.
-//
-bool emitter::emitInsIsVectorLong(instruction ins)
-{
-    if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & LNG) != 0;
-    else
-        return false;
-}
+        case IF_EN2Q:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2Q[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
 
-//------------------------------------------------------------------------
-// emitInsIsVectorNarrow: Returns true if the element width of the destination register of the instruction is half that
-// of both source operands. Indicated by the suffix N.
-//
-bool emitter::emitInsIsVectorNarrow(instruction ins)
-{
-    if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & NRW) != 0;
-    else
-        return false;
+        default:
+            if (fmt == insFmt)
+            {
+                encoding_found = true;
+                index          = 0;
+            }
+            else
+            {
+                encoding_found = false;
+            }
+            break;
+    }
+
+    assert(encoding_found);
+
+    switch (index)
+    {
+        case 0:
+            assert(ins < ArrLen(insCodes1));
+            code = insCodes1[ins];
+            break;
+        case 1:
+            assert(ins < ArrLen(insCodes2));
+            code = insCodes2[ins];
+            break;
+        case 2:
+            assert(ins < ArrLen(insCodes3));
+            code = insCodes3[ins];
+            break;
+        case 3:
+            assert(ins < ArrLen(insCodes4));
+            code = insCodes4[ins];
+            break;
+        case 4:
+            assert(ins < ArrLen(insCodes5));
+            code = insCodes5[ins];
+            break;
+        case 5:
+            assert(ins < ArrLen(insCodes6));
+            code = insCodes6[ins];
+            break;
+        case 6:
+            assert(ins < ArrLen(insCodes7));
+            code = insCodes7[ins];
+            break;
+        case 7:
+            assert(ins < ArrLen(insCodes8));
+            code = insCodes8[ins];
+            break;
+        case 8:
+            assert(ins < ArrLen(insCodes9));
+            code = insCodes9[ins];
+            break;
+    }
+
+    assert((code != BAD_CODE));
+
+    return code;
 }
 
-//------------------------------------------------------------------------
-// emitInsIsVectorWide: Returns true if the element width of the destination register and the first source operand of
-// the instruction is double that of the second source operand. Indicated by the suffix W.
-//
-bool emitter::emitInsIsVectorWide(instruction ins)
+// true if this 'imm' can be encoded as a input operand to a mov instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_mov(INT64 imm, emitAttr size)
 {
-    if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & WID) != 0;
-    else
-        return false;
-}
+    // Check for "MOV (wide immediate)".
+    if (canEncodeHalfwordImm(imm, size))
+        return true;
 
-//------------------------------------------------------------------------
-// emitInsDestIsOp2: Returns true if the instruction is one of the special
-// cases that has its destination register as the second register operand
-// instead of the first.
-//
-bool emitter::emitInsDestIsOp2(instruction ins)
-{
-    if (ins < ArrLen(CodeGenInterface::instInfo))
-        return (CodeGenInterface::instInfo[ins] & WR2) != 0;
-    else
-        return false;
-}
+    // Next try the ones-complement form of 'halfword immediate' imm(i16,hw),
+    // namely "MOV (inverted wide immediate)".
+    ssize_t notOfImm = NOT_helper(imm, getBitWidth(size));
+    if (canEncodeHalfwordImm(notOfImm, size))
+        return true;
 
-#undef LD
-#undef ST
-#undef CMP
-#undef RHS
-#undef WID
-#undef LNG
-#undef NRW
-#undef WR2
+    // Finally try "MOV (bitmask immediate)" imm(N,r,s)
+    if (canEncodeBitMaskImm(imm, size))
+        return true;
 
-/*****************************************************************************
- *
- *  Returns the specific encoding of the given CPU instruction and format
- */
+    return false;
+}
 
-emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
+// true if this 'imm' can be encoded as a input operand to a vector movi instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_movi(INT64 imm, emitAttr elemsize)
 {
-    // clang-format off
-    const static code_t insCodes1[] =
-    {
-        #define INST1(id, nm, info, fmt, e1                                ) e1,
-        #define INST2(id, nm, info, fmt, e1, e2                            ) e1,
-        #define INST3(id, nm, info, fmt, e1, e2, e3                        ) e1,
-        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) e1,
-        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e1,
-        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e1,
-        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e1,
-        #include "instrs.h"
-    };
-    const static code_t insCodes2[] =
-    {
-        #define INST1(id, nm, info, fmt, e1                                )
-        #define INST2(id, nm, info, fmt, e1, e2                            ) e2,
-        #define INST3(id, nm, info, fmt, e1, e2, e3                        ) e2,
-        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) e2,
-        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e2,
-        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e2,
-        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e2,
-        #include "instrs.h"
-    };
-    const static code_t insCodes3[] =
-    {
-        #define INST1(id, nm, info, fmt, e1                                )
-        #define INST2(id, nm, info, fmt, e1, e2                            )
-        #define INST3(id, nm, info, fmt, e1, e2, e3                        ) e3,
-        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) e3,
-        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e3,
-        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e3,
-        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e3,
-        #include "instrs.h"
-    };
-    const static code_t insCodes4[] =
-    {
-        #define INST1(id, nm, info, fmt, e1                                )
-        #define INST2(id, nm, info, fmt, e1, e2                            )
-        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
-        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    ) e4,
-        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e4,
-        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e4,
-        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e4,
-        #include "instrs.h"
-    };
-    const static code_t insCodes5[] =
-    {
-        #define INST1(id, nm, info, fmt, e1                                )
-        #define INST2(id, nm, info, fmt, e1, e2                            )
-        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
-        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
-        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                ) e5,
-        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e5,
-        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e5,
-        #include "instrs.h"
-    };
-    const static code_t insCodes6[] =
-    {
-        #define INST1(id, nm, info, fmt, e1                                )
-        #define INST2(id, nm, info, fmt, e1, e2                            )
-        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
-        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
-        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                )
-        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            ) e6,
-        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e6,
-        #include "instrs.h"
-    };
-    const static code_t insCodes7[] =
-    {
-        #define INST1(id, nm, info, fmt, e1                                )
-        #define INST2(id, nm, info, fmt, e1, e2                            )
-        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
-        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
-        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                )
-        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            )
-        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e7,
-        #include "instrs.h"
-    };
-    const static code_t insCodes8[] =
-    {
-        #define INST1(id, nm, info, fmt, e1                                )
-        #define INST2(id, nm, info, fmt, e1, e2                            )
-        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
-        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
-        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                )
-        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            )
-        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e8,
-        #include "instrs.h"
-    };
-    const static code_t insCodes9[] =
-    {
-        #define INST1(id, nm, info, fmt, e1                                )
-        #define INST2(id, nm, info, fmt, e1, e2                            )
-        #define INST3(id, nm, info, fmt, e1, e2, e3                        )
-        #define INST4(id, nm, info, fmt, e1, e2, e3, e4                    )
-        #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                )
-        #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6            )
-        #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e9,
-        #include "instrs.h"
-    };
-    // clang-format on
-
-    const static insFormat formatEncode9[9] = {IF_DR_2E, IF_DR_2G, IF_DI_1B, IF_DI_1D, IF_DV_3C,
-                                               IF_DV_2B, IF_DV_2C, IF_DV_2E, IF_DV_2F};
-    const static insFormat formatEncode6A[6] = {IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A, IF_DV_3A, IF_DV_3E};
-    const static insFormat formatEncode6B[6] = {IF_LS_2D, IF_LS_3F, IF_LS_2E, IF_LS_2F, IF_LS_3G, IF_LS_2G};
-    const static insFormat formatEncode5A[5] = {IF_LS_2A, IF_LS_2B, IF_LS_2C, IF_LS_3A, IF_LS_1A};
-    const static insFormat formatEncode5B[5] = {IF_DV_2G, IF_DV_2H, IF_DV_2I, IF_DV_1A, IF_DV_1B};
-    const static insFormat formatEncode5C[5] = {IF_DR_3A, IF_DR_3B, IF_DI_2C, IF_DV_3C, IF_DV_1B};
-    const static insFormat formatEncode4A[4] = {IF_LS_2A, IF_LS_2B, IF_LS_2C, IF_LS_3A};
-    const static insFormat formatEncode4B[4] = {IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A};
-    const static insFormat formatEncode4C[4] = {IF_DR_2A, IF_DR_2B, IF_DR_2C, IF_DI_1A};
-    const static insFormat formatEncode4D[4] = {IF_DV_3B, IF_DV_3D, IF_DV_3BI, IF_DV_3DI};
-    const static insFormat formatEncode4E[4] = {IF_DR_3A, IF_DR_3B, IF_DI_2C, IF_DV_3C};
-    const static insFormat formatEncode4F[4] = {IF_DR_3A, IF_DR_3B, IF_DV_3C, IF_DV_1B};
-    const static insFormat formatEncode4G[4] = {IF_DR_2E, IF_DR_2F, IF_DV_2M, IF_DV_2L};
-    const static insFormat formatEncode4H[4] = {IF_DV_3E, IF_DV_3A, IF_DV_2L, IF_DV_2M};
-    const static insFormat formatEncode4I[4] = {IF_DV_3D, IF_DV_3B, IF_DV_2G, IF_DV_2A};
-    const static insFormat formatEncode4J[4] = {IF_DV_2N, IF_DV_2O, IF_DV_3E, IF_DV_3A};
-    const static insFormat formatEncode4K[4] = {IF_DV_3E, IF_DV_3A, IF_DV_3EI, IF_DV_3AI};
-    const static insFormat formatEncode3A[3] = {IF_DR_3A, IF_DR_3B, IF_DI_2C};
-    const static insFormat formatEncode3B[3] = {IF_DR_2A, IF_DR_2B, IF_DI_1C};
-    const static insFormat formatEncode3C[3] = {IF_DR_3A, IF_DR_3B, IF_DV_3C};
-    const static insFormat formatEncode3D[3] = {IF_DV_2C, IF_DV_2D, IF_DV_2E};
-    const static insFormat formatEncode3E[3] = {IF_DV_3B, IF_DV_3BI, IF_DV_3DI};
-    const static insFormat formatEncode3F[3] = {IF_DV_2A, IF_DV_2G, IF_DV_2H};
-    const static insFormat formatEncode3G[3] = {IF_DV_2A, IF_DV_2G, IF_DV_2I};
-    const static insFormat formatEncode3H[3] = {IF_DR_3A, IF_DV_3A, IF_DV_3AI};
-    const static insFormat formatEncode3I[3] = {IF_DR_2E, IF_DR_2F, IF_DV_2M};
-    const static insFormat formatEncode3J[3] = {IF_LS_2D, IF_LS_3F, IF_LS_2E};
-    const static insFormat formatEncode2A[2] = {IF_DR_2E, IF_DR_2F};
-    const static insFormat formatEncode2B[2] = {IF_DR_3A, IF_DR_3B};
-    const static insFormat formatEncode2C[2] = {IF_DR_3A, IF_DI_2D};
-    const static insFormat formatEncode2D[2] = {IF_DR_3A, IF_DI_2B};
-    const static insFormat formatEncode2E[2] = {IF_LS_3B, IF_LS_3C};
-    const static insFormat formatEncode2F[2] = {IF_DR_2I, IF_DI_1F};
-    const static insFormat formatEncode2G[2] = {IF_DV_3B, IF_DV_3D};
-    const static insFormat formatEncode2H[2] = {IF_DV_2C, IF_DV_2F};
-    const static insFormat formatEncode2I[2] = {IF_DV_2K, IF_DV_1C};
-    const static insFormat formatEncode2J[2] = {IF_DV_2A, IF_DV_2G};
-    const static insFormat formatEncode2K[2] = {IF_DV_2M, IF_DV_2L};
-    const static insFormat formatEncode2L[2] = {IF_DR_2G, IF_DV_2M};
-    const static insFormat formatEncode2M[2] = {IF_DV_3A, IF_DV_3AI};
-    const static insFormat formatEncode2N[2] = {IF_DV_2N, IF_DV_2O};
-    const static insFormat formatEncode2O[2] = {IF_DV_3E, IF_DV_3A};
-    const static insFormat formatEncode2P[2] = {IF_DV_2Q, IF_DV_3B};
-    const static insFormat formatEncode2Q[2] = {IF_DV_2S, IF_DV_3A};
-
-    code_t    code           = BAD_CODE;
-    insFormat insFmt         = emitInsFormat(ins);
-    bool      encoding_found = false;
-    int       index          = -1;
-
-    switch (insFmt)
+    if (elemsize == EA_8BYTE)
     {
-        case IF_EN9:
-            for (index = 0; index < 9; index++)
+        UINT64 uimm = imm;
+        while (uimm != 0)
+        {
+            INT64 loByte = uimm & 0xFF;
+            if ((loByte == 0) || (loByte == 0xFF))
             {
-                if (fmt == formatEncode9[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
+                uimm >>= 8;
             }
-            break;
-
-        case IF_EN6A:
-            for (index = 0; index < 6; index++)
+            else
             {
-                if (fmt == formatEncode6A[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
+                return false;
             }
-            break;
+        }
+        assert(uimm == 0);
+        return true;
+    }
+    else
+    {
+        // First try the standard 'byteShifted immediate' imm(i8,bySh)
+        if (canEncodeByteShiftedImm(imm, elemsize, true))
+            return true;
 
-        case IF_EN6B:
-            for (index = 0; index < 6; index++)
-            {
-                if (fmt == formatEncode6B[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+        // Next try the ones-complement form of the 'immediate' imm(i8,bySh)
+        ssize_t notOfImm = NOT_helper(imm, getBitWidth(elemsize));
+        if (canEncodeByteShiftedImm(notOfImm, elemsize, true))
+            return true;
+    }
+    return false;
+}
 
-        case IF_EN5A:
-            for (index = 0; index < 5; index++)
-            {
-                if (fmt == formatEncode5A[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+// true if this 'imm' can be encoded as a input operand to a fmov instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_fmov(double immDbl)
+{
+    if (canEncodeFloatImm8(immDbl))
+        return true;
 
-        case IF_EN5B:
-            for (index = 0; index < 5; index++)
-            {
-                if (fmt == formatEncode5B[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    return false;
+}
 
-        case IF_EN5C:
-            for (index = 0; index < 5; index++)
-            {
-                if (fmt == formatEncode5C[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+// true if this 'imm' can be encoded as a input operand to an add instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_add(INT64 imm, emitAttr size)
+{
+    if (unsigned_abs(imm) <= 0x0fff)
+        return true;
+    else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding
+        return true;
 
-        case IF_EN4A:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4A[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    return false;
+}
 
-        case IF_EN4B:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4B[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+// true if this 'imm' can be encoded as a input operand to an non-add/sub alu instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_cmp(INT64 imm, emitAttr size)
+{
+    return emitIns_valid_imm_for_add(imm, size);
+}
 
-        case IF_EN4C:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4C[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+// true if this 'imm' can be encoded as a input operand to an non-add/sub alu instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_alu(INT64 imm, emitAttr size)
+{
+    if (canEncodeBitMaskImm(imm, size))
+        return true;
 
-        case IF_EN4D:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4D[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    return false;
+}
 
-        case IF_EN4E:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4E[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+// true if this 'imm' can be encoded as the offset in an unscaled ldr/str instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_unscaled_ldst_offset(INT64 imm)
+{
+    return (imm >= -256) && (imm <= 255);
+}
 
-        case IF_EN4F:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4F[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+// true if this 'imm' can be encoded as the offset in a ldr/str instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr attr)
+{
+    if (imm == 0)
+        return true; // Encodable using IF_LS_2A
 
-        case IF_EN4G:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4G[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    if (isValidSimm<9>(imm))
+        return true; // Encodable using IF_LS_2C (or possibly IF_LS_2B)
 
-        case IF_EN4H:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4H[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    if (imm < 0)
+        return false; // not encodable
 
-        case IF_EN4I:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4I[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    emitAttr size  = EA_SIZE(attr);
+    unsigned scale = NaturalScale_helper(size);
+    ssize_t  mask  = size - 1; // the mask of low bits that must be zero to encode the immediate
 
-        case IF_EN4J:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4J[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
+        return true; // Encodable using IF_LS_2B
 
-        case IF_EN4K:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4K[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    return false; // not encodable
+}
 
-        case IF_EN3A:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3A[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+// true if this 'imm' can be encoded as a input operand to a ccmp instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_ccmp(INT64 imm)
+{
+    return ((imm & 0x01f) == imm);
+}
 
-        case IF_EN3B:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3B[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+// true if 'imm' can be encoded as an offset in a ldp/stp instruction
+/*static*/ bool emitter::canEncodeLoadOrStorePairOffset(INT64 imm, emitAttr attr)
+{
+    assert((attr == EA_4BYTE) || (attr == EA_8BYTE) || (attr == EA_16BYTE));
+    const int size = EA_SIZE_IN_BYTES(attr);
+    return (imm % size == 0) && (imm >= -64 * size) && (imm < 64 * size);
+}
 
-        case IF_EN3C:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3C[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+/************************************************************************
+ *
+ *   A helper method to return the natural scale for an EA 'size'
+ */
 
-        case IF_EN3D:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3D[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+/*static*/ unsigned emitter::NaturalScale_helper(emitAttr size)
+{
+    assert(size == EA_1BYTE || size == EA_2BYTE || size == EA_4BYTE || size == EA_8BYTE || size == EA_16BYTE);
+    return BitOperations::Log2((unsigned)size);
+}
 
-        case IF_EN3E:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3E[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+/************************************************************************
+ *
+ *  A helper method to perform a Rotate-Right shift operation
+ *  the source is 'value' and it is rotated right by 'sh' bits
+ *  'value' is considered to be a fixed size 'width' set of bits.
+ *
+ *  Example
+ *      value is '00001111', sh is 2 and width is 8
+ *     result is '11000011'
+ */
 
-        case IF_EN3F:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3F[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+/*static*/ UINT64 emitter::ROR_helper(UINT64 value, unsigned sh, unsigned width)
+{
+    assert(width <= 64);
+    // Check that 'value' fits in 'width' bits
+    assert((width == 64) || (value < (1ULL << width)));
+    // We don't support shifts >= width
+    assert(sh < width);
 
-        case IF_EN3G:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3G[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    UINT64 result;
 
-        case IF_EN3H:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3H[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    unsigned rsh = sh;
+    unsigned lsh = width - rsh;
 
-        case IF_EN3I:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3I[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    result = (value >> rsh);
+    result |= (value << lsh);
 
-        case IF_EN3J:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3J[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    if (width < 64)
+    {
+        // mask off any extra bits that we got from the left shift
+        result &= ((1ULL << width) - 1);
+    }
+    return result;
+}
+/************************************************************************
+ *
+ *  A helper method to perform a 'NOT' bitwise complement operation.
+ *  'value' is considered to be a fixed size 'width' set of bits.
+ *
+ *  Example
+ *      value is '01001011', and width is 8
+ *     result is '10110100'
+ */
 
-        case IF_EN2A:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2A[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+/*static*/ UINT64 emitter::NOT_helper(UINT64 value, unsigned width)
+{
+    assert(width <= 64);
 
-        case IF_EN2B:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2B[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    UINT64 result = ~value;
 
-        case IF_EN2C:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2C[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    if (width < 64)
+    {
+        // Check that 'value' fits in 'width' bits. Don't consider "sign" bits above width.
+        UINT64 maxVal       = 1ULL << width;
+        UINT64 lowBitsMask  = maxVal - 1;
+        UINT64 signBitsMask = ~lowBitsMask | (1ULL << (width - 1)); // The high bits must be set, and the top bit
+                                                                    // (sign bit) must be set.
+        assert((value < maxVal) || ((value & signBitsMask) == signBitsMask));
 
-        case IF_EN2D:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2D[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+        // mask off any extra bits that we got from the complement operation
+        result &= lowBitsMask;
+    }
 
-        case IF_EN2E:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2E[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    return result;
+}
 
-        case IF_EN2F:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2F[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+/************************************************************************
+ *
+ *  A helper method to perform a bit Replicate operation
+ *  the source is 'value' with a fixed size 'width' set of bits.
+ *  value is replicated to fill out 8/16/32/64 bits as determined by 'size'.
+ *
+ *  Example
+ *      value is '11000011' (0xE3), width is 8 and size is EA_8BYTE
+ *     result is '11000011 11000011 11000011 11000011 11000011 11000011 11000011 11000011'
+ *               0xE3E3E3E3E3E3E3E3
+ */
 
-        case IF_EN2G:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2G[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+/*static*/ UINT64 emitter::Replicate_helper(UINT64 value, unsigned width, emitAttr size)
+{
+    unsigned immWidth = getBitWidth(size);
+    assert(width <= immWidth);
 
-        case IF_EN2H:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2H[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    UINT64   result     = value;
+    unsigned filledBits = width;
 
-        case IF_EN2I:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2I[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    while (filledBits < immWidth)
+    {
+        value <<= width;
+        result |= value;
+        filledBits += width;
+    }
+    return result;
+}
 
-        case IF_EN2J:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2J[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+/************************************************************************
+ *
+ *  Convert an imm(N,r,s) into a 64-bit immediate
+ *  inputs 'bmImm' a bitMaskImm struct
+ *         'size' specifies the size of the result (8/16/32/64 bits)
+ */
 
-        case IF_EN2K:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2K[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+/*static*/ INT64 emitter::emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size)
+{
+    unsigned N = bmImm.immN; // read the N,R and S values from the 'bitMaskImm' encoding
+    unsigned R = bmImm.immR;
+    unsigned S = bmImm.immS;
 
-        case IF_EN2L:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2L[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    unsigned elemWidth = 64; // used when N == 1
 
-        case IF_EN2M:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2M[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    if (N == 0) // find the smaller elemWidth when N == 0
+    {
+        // Scan S for the highest bit not set
+        elemWidth = 32;
+        for (unsigned bitNum = 5; bitNum > 0; bitNum--)
+        {
+            unsigned oneBit = elemWidth;
+            if ((S & oneBit) == 0)
+                break;
+            elemWidth /= 2;
+        }
+    }
+    else
+    {
+        assert(size == EA_8BYTE);
+    }
 
-        case IF_EN2N:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2N[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    unsigned maskSR = elemWidth - 1;
 
-        case IF_EN2O:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2O[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    S &= maskSR;
+    R &= maskSR;
 
-        case IF_EN2P:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2P[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    // encoding for S is one less than the number of consecutive one bits
+    S++; // Number of consecutive ones to generate in 'welem'
 
-        case IF_EN2Q:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2Q[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
+    // At this point:
+    //
+    //    'elemWidth' is the number of bits that we will use for the ROR and Replicate operations
+    //    'S'         is the number of consecutive 1 bits for the immediate
+    //    'R'         is the number of bits that we will Rotate Right the immediate
+    //    'size'      selects the final size of the immediate that we return (64 or 32 bits)
 
-        default:
-            if (fmt == insFmt)
-            {
-                encoding_found = true;
-                index          = 0;
-            }
-            else
-            {
-                encoding_found = false;
-            }
-            break;
-    }
+    assert(S < elemWidth); // 'elemWidth' consecutive one's is a reserved encoding
 
-    assert(encoding_found);
+    UINT64 welem;
+    UINT64 wmask;
 
-    switch (index)
-    {
-        case 0:
-            assert(ins < ArrLen(insCodes1));
-            code = insCodes1[ins];
-            break;
-        case 1:
-            assert(ins < ArrLen(insCodes2));
-            code = insCodes2[ins];
-            break;
-        case 2:
-            assert(ins < ArrLen(insCodes3));
-            code = insCodes3[ins];
-            break;
-        case 3:
-            assert(ins < ArrLen(insCodes4));
-            code = insCodes4[ins];
-            break;
-        case 4:
-            assert(ins < ArrLen(insCodes5));
-            code = insCodes5[ins];
-            break;
-        case 5:
-            assert(ins < ArrLen(insCodes6));
-            code = insCodes6[ins];
-            break;
-        case 6:
-            assert(ins < ArrLen(insCodes7));
-            code = insCodes7[ins];
-            break;
-        case 7:
-            assert(ins < ArrLen(insCodes8));
-            code = insCodes8[ins];
-            break;
-        case 8:
-            assert(ins < ArrLen(insCodes9));
-            code = insCodes9[ins];
-            break;
-    }
+    welem = (1ULL << S) - 1;
 
-    assert((code != BAD_CODE));
+    wmask = ROR_helper(welem, R, elemWidth);
+    wmask = Replicate_helper(wmask, elemWidth, size);
 
-    return code;
+    return wmask;
 }
 
 /*****************************************************************************
  *
- *  Returns the specific encoding of the given CPU instruction and format
+ *  Check if an immediate can use the left shifted by 12 bits encoding
  */
 
-emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt)
+/*static*/ bool emitter::canEncodeWithShiftImmBy12(INT64 imm)
 {
-    // clang-format off
-    const static code_t insCodes1[] =
-    {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) e1,
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) e1,
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) e1,
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) e1,
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) e1,
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) e1,
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e1,
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e1,
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e1,
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e1,
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e1,
-        #include "instrsarm64sve.h"
-    };
-
-    const static code_t insCodes2[] =
+    if (imm < 0)
     {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) 
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) e2,
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) e2,
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) e2,
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) e2,
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) e2,
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e2,
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e2,
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e2,
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e2,
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e2,
-        #include "instrsarm64sve.h"
-    };
+        imm = -imm; // convert to unsigned
+    }
 
-    const static code_t insCodes3[] =
+    if (imm < 0)
     {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) 
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) e3,
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) e3,
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) e3,
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) e3,
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e3,
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e3,
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e3,
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e3,
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e3,
-        #include "instrsarm64sve.h"
-    };
+        return false; // Must be MIN_INT64
+    }
 
-    const static code_t insCodes4[] =
+    if ((imm & 0xfff) != 0) // Now the low 12 bits all have to be zero
     {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) 
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) e4,
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) e4,
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) e4,
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e4,
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e4,
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e4,
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e4,
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e4,
-        #include "instrsarm64sve.h"
-    };
+        return false;
+    }
 
-    const static code_t insCodes5[] =
-    {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) 
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) e5,
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) e5,
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e5,
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e5,
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e5,
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e5,
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e5,
-        #include "instrsarm64sve.h"
-    };
+    imm >>= 12; // shift right by 12 bits
 
-    const static code_t insCodes6[] =
-    {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) 
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) e6,
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e6,
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e6,
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e6,
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e6,
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e6,
-        #include "instrsarm64sve.h"
-    };
+    return (imm <= 0x0fff); // Does it fit in 12 bits
+}
 
-    const static code_t insCodes7[] =
-    {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) 
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e7,
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e7,
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e7,
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e7,
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e7,
-        #include "instrsarm64sve.h"
-    };
+/*****************************************************************************
+ *
+ *  Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
+ */
 
-    const static code_t insCodes8[] =
-    {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) 
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) 
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e8,
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e8,
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e8,
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e8,
-        #include "instrsarm64sve.h"
-    };
+/*static*/ INT64 emitter::normalizeImm64(INT64 imm, emitAttr size)
+{
+    unsigned immWidth = getBitWidth(size);
+    INT64    result   = imm;
 
-    const static code_t insCodes9[] =
+    if (immWidth < 64)
     {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) 
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) 
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) 
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e9,
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e9,
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e9,
-        #include "instrsarm64sve.h"
-    };
-
-    const static code_t insCodes10[] =
-    {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) 
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) 
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) 
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) 
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e10,
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e10,
-        #include "instrsarm64sve.h"
-    };
+        // Check that 'imm' fits in 'immWidth' bits. Don't consider "sign" bits above width.
+        INT64 maxVal      = 1LL << immWidth;
+        INT64 lowBitsMask = maxVal - 1;
+        INT64 hiBitsMask  = ~lowBitsMask;
+        INT64 signBitsMask =
+            hiBitsMask | (1LL << (immWidth - 1)); // The high bits must be set, and the top bit (sign bit) must be set.
+        assert((imm < maxVal) || ((imm & signBitsMask) == signBitsMask));
 
-    const static code_t insCodes11[] =
-    {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) 
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) 
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) 
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) 
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e11,
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e11,
-        #include "instrsarm64sve.h"
-    };
+        // mask off the hiBits
+        result &= lowBitsMask;
+    }
+    return result;
+}
 
-    const static code_t insCodes12[] =
-    {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) 
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) 
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) 
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) 
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) 
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e12,
-        #include "instrsarm64sve.h"
-    };
+/************************************************************************
+ *
+ *  returns true if 'imm' of 'size bits (8/16/32/64) can be encoded
+ *  using the ARM64 'bitmask immediate' form.
+ *  When a non-null value is passed for 'wbBMI' then this method
+ *  writes back the 'N','S' and 'R' values use to encode this immediate
+ *
+ */
 
-    const static code_t insCodes13[] =
-    {
-        #define   INST1(id, nm, info, fmt, e1                                                       ) 
-        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
-        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
-        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
-        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
-        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
-        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) 
-        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) 
-        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) 
-        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) 
-        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e13,
-        #include "instrsarm64sve.h"
-    };
+/*static*/ bool emitter::canEncodeBitMaskImm(INT64 imm, emitAttr size, emitter::bitMaskImm* wbBMI)
+{
+    unsigned immWidth = getBitWidth(size);
+    unsigned maxLen;
 
-    // clang-format on
-    const static insFormat formatEncode13A[13] = {IF_SVE_AU_3A, IF_SVE_BT_1A, IF_SVE_BV_2A,   IF_SVE_BV_2A_J,
-                                                  IF_SVE_BW_2A, IF_SVE_CB_2A, IF_SVE_CP_3A,   IF_SVE_CQ_3A,
-                                                  IF_SVE_CW_4A, IF_SVE_CZ_4A, IF_SVE_CZ_4A_K, IF_SVE_CZ_4A_L,
-                                                  IF_SVE_EB_1A};
-    const static insFormat formatEncode11A[11] = {IF_SVE_JD_4B,   IF_SVE_JD_4C,   IF_SVE_JI_3A_A, IF_SVE_JJ_4A,
-                                                  IF_SVE_JJ_4A_B, IF_SVE_JJ_4A_C, IF_SVE_JJ_4A_D, IF_SVE_JJ_4B,
-                                                  IF_SVE_JJ_4B_E, IF_SVE_JN_3B,   IF_SVE_JN_3C};
-    const static insFormat formatEncode9A[9] = {IF_SVE_HW_4A,   IF_SVE_HW_4A_A, IF_SVE_HW_4A_B,
-                                                IF_SVE_HW_4A_C, IF_SVE_HW_4B,   IF_SVE_HW_4B_D,
-                                                IF_SVE_HX_3A_E, IF_SVE_IJ_3A_F, IF_SVE_IK_4A_G};
-    const static insFormat formatEncode9B[9] = {IF_SVE_HW_4A,   IF_SVE_HW_4A_A, IF_SVE_HW_4A_B,
-                                                IF_SVE_HW_4A_C, IF_SVE_HW_4B,   IF_SVE_HW_4B_D,
-                                                IF_SVE_HX_3A_E, IF_SVE_IJ_3A_G, IF_SVE_IK_4A_I};
-    const static insFormat formatEncode9C[9] = {IF_SVE_HW_4A,   IF_SVE_HW_4A_A, IF_SVE_HW_4A_B,
-                                                IF_SVE_HW_4A_C, IF_SVE_HW_4B,   IF_SVE_HW_4B_D,
-                                                IF_SVE_HX_3A_E, IF_SVE_IH_3A_F, IF_SVE_II_4A_H};
-    const static insFormat formatEncode9D[9] = {IF_SVE_IH_3A,   IF_SVE_IH_3A_A, IF_SVE_II_4A,
-                                                IF_SVE_II_4A_B, IF_SVE_IU_4A,   IF_SVE_IU_4A_C,
-                                                IF_SVE_IU_4B,   IF_SVE_IU_4B_D, IF_SVE_IV_3A};
-    const static insFormat formatEncode9E[9] = {IF_SVE_JD_4A,   IF_SVE_JI_3A_A, IF_SVE_JJ_4A,
-                                                IF_SVE_JJ_4A_B, IF_SVE_JJ_4A_C, IF_SVE_JJ_4A_D,
-                                                IF_SVE_JJ_4B,   IF_SVE_JJ_4B_E, IF_SVE_JN_3A};
-    const static insFormat formatEncode9F[9] = {IF_SVE_JD_4C,   IF_SVE_JD_4C_A, IF_SVE_JJ_4A,
-                                                IF_SVE_JJ_4A_B, IF_SVE_JJ_4B,   IF_SVE_JJ_4B_C,
-                                                IF_SVE_JL_3A,   IF_SVE_JN_3C,   IF_SVE_JN_3C_D};
-    const static insFormat formatEncode8A[8] = {IF_SVE_CE_2A, IF_SVE_CE_2B, IF_SVE_CE_2C, IF_SVE_CE_2D,
-                                                IF_SVE_CF_2A, IF_SVE_CF_2B, IF_SVE_CF_2C, IF_SVE_CF_2D};
-    const static insFormat formatEncode8B[8] = {IF_SVE_HW_4A, IF_SVE_HW_4A_A, IF_SVE_HW_4A_B, IF_SVE_HW_4A_C,
-                                                IF_SVE_HW_4B, IF_SVE_HW_4B_D, IF_SVE_HX_3A_E, IF_SVE_IG_4A_F};
-    const static insFormat formatEncode8C[8] = {IF_SVE_HW_4A, IF_SVE_HW_4A_A, IF_SVE_HW_4A_B, IF_SVE_HW_4A_C,
-                                                IF_SVE_HW_4B, IF_SVE_HW_4B_D, IF_SVE_HX_3A_E, IF_SVE_IG_4A_G};
-    const static insFormat formatEncode7A[7] = {IF_SVE_IJ_3A, IF_SVE_IK_4A,   IF_SVE_IU_4A, IF_SVE_IU_4A_A,
-                                                IF_SVE_IU_4B, IF_SVE_IU_4B_B, IF_SVE_IV_3A};
-    const static insFormat formatEncode6A[6] = {IF_SVE_AE_3A, IF_SVE_BD_3A, IF_SVE_EE_1A,
-                                                IF_SVE_FD_3A, IF_SVE_FD_3B, IF_SVE_FD_3C};
-    const static insFormat formatEncode6B[6] = {IF_SVE_GY_3A, IF_SVE_GY_3B,   IF_SVE_GY_3B_D,
-                                                IF_SVE_HA_3A, IF_SVE_HA_3A_E, IF_SVE_HA_3A_F};
-    const static insFormat formatEncode6C[6] = {IF_SVE_HW_4A,   IF_SVE_HW_4A_A, IF_SVE_HW_4B,
-                                                IF_SVE_HX_3A_B, IF_SVE_IJ_3A_D, IF_SVE_IK_4A_F};
-    const static insFormat formatEncode6D[6] = {IF_SVE_HW_4A,   IF_SVE_HW_4A_A, IF_SVE_HW_4B,
-                                                IF_SVE_HX_3A_B, IF_SVE_IJ_3A_E, IF_SVE_IK_4A_H};
-    const static insFormat formatEncode6E[6] = {IF_SVE_HY_3A,   IF_SVE_HY_3A_A, IF_SVE_HY_3B,
-                                                IF_SVE_HZ_2A_B, IF_SVE_IA_2A,   IF_SVE_IB_3A};
-    const static insFormat formatEncode6F[6] = {IF_SVE_IG_4A, IF_SVE_IU_4A,   IF_SVE_IU_4A_A,
-                                                IF_SVE_IU_4B, IF_SVE_IU_4B_B, IF_SVE_IV_3A};
-    const static insFormat formatEncode6G[6] = {IF_SVE_JD_4A,   IF_SVE_JI_3A_A, IF_SVE_JK_4A,
-                                                IF_SVE_JK_4A_B, IF_SVE_JK_4B,   IF_SVE_JN_3A};
-    const static insFormat formatEncode5A[5] = {IF_SVE_AM_2A, IF_SVE_AN_3A, IF_SVE_AO_3A, IF_SVE_BF_2A, IF_SVE_BG_3A};
-    const static insFormat formatEncode5B[5] = {IF_SVE_GX_3A, IF_SVE_GX_3B, IF_SVE_HK_3A, IF_SVE_HL_3A, IF_SVE_HM_2A};
-    const static insFormat formatEncode5C[5] = {IF_SVE_EF_3A, IF_SVE_EG_3A, IF_SVE_EH_3A, IF_SVE_EY_3A, IF_SVE_EY_3B};
-    const static insFormat formatEncode5D[5] = {IF_SVE_HW_4A, IF_SVE_HW_4A_A, IF_SVE_HW_4B, IF_SVE_HX_3A_B,
-                                                IF_SVE_IG_4A_D};
-    const static insFormat formatEncode5E[5] = {IF_SVE_HW_4A, IF_SVE_HW_4A_A, IF_SVE_HW_4B, IF_SVE_HX_3A_B,
-                                                IF_SVE_IG_4A_E};
-    const static insFormat formatEncode4A[4]  = {IF_SVE_AA_3A, IF_SVE_AU_3A, IF_SVE_BS_1A, IF_SVE_CZ_4A};
-    const static insFormat formatEncode4B[4]  = {IF_SVE_BU_2A, IF_SVE_BV_2B, IF_SVE_EA_1A, IF_SVE_EB_1B};
-    const static insFormat formatEncode4C[4]  = {IF_SVE_HS_3A, IF_SVE_HS_3A_H, IF_SVE_HS_3A_I, IF_SVE_HS_3A_J};
-    const static insFormat formatEncode4D[4]  = {IF_SVE_HP_3B, IF_SVE_HP_3B_H, IF_SVE_HP_3B_I, IF_SVE_HP_3B_J};
-    const static insFormat formatEncode4E[4]  = {IF_SVE_BE_3A, IF_SVE_FI_3A, IF_SVE_FI_3B, IF_SVE_FI_3C};
-    const static insFormat formatEncode4F[4]  = {IF_SVE_EM_3A, IF_SVE_FK_3A, IF_SVE_FK_3B, IF_SVE_FK_3C};
-    const static insFormat formatEncode4G[4]  = {IF_SVE_AR_4A, IF_SVE_FF_3A, IF_SVE_FF_3B, IF_SVE_FF_3C};
-    const static insFormat formatEncode4H[4]  = {IF_SVE_GM_3A, IF_SVE_GN_3A, IF_SVE_GZ_3A, IF_SVE_HB_3A};
-    const static insFormat formatEncode4I[4]  = {IF_SVE_AX_1A, IF_SVE_AY_2A, IF_SVE_AZ_2A, IF_SVE_BA_3A};
-    const static insFormat formatEncode4J[4]  = {IF_SVE_BV_2A, IF_SVE_BV_2A_A, IF_SVE_CP_3A, IF_SVE_CQ_3A};
-    const static insFormat formatEncode4K[4]  = {IF_SVE_IF_4A, IF_SVE_IF_4A_A, IF_SVE_IM_3A, IF_SVE_IN_4A};
-    const static insFormat formatEncode4L[4]  = {IF_SVE_IZ_4A, IF_SVE_IZ_4A_A, IF_SVE_JB_4A, IF_SVE_JM_3A};
-    const static insFormat formatEncode3A[3]  = {IF_SVE_AB_3A, IF_SVE_AT_3A, IF_SVE_EC_1A};
-    const static insFormat formatEncode3B[3]  = {IF_SVE_BH_3A, IF_SVE_BH_3B, IF_SVE_BH_3B_A};
-    const static insFormat formatEncode3C[3]  = {IF_SVE_BW_2A, IF_SVE_CB_2A, IF_SVE_EB_1A};
-    const static insFormat formatEncode3D[3]  = {IF_SVE_BR_3A, IF_SVE_BR_3B, IF_SVE_CI_3A};
-    const static insFormat formatEncode3E[3]  = {IF_SVE_AT_3A, IF_SVE_EC_1A, IF_SVE_ET_3A};
-    const static insFormat formatEncode3F[3]  = {IF_SVE_GU_3A, IF_SVE_GU_3B, IF_SVE_HU_4A};
-    const static insFormat formatEncode3G[3]  = {IF_SVE_GH_3A, IF_SVE_GH_3B, IF_SVE_GH_3B_B};
-    const static insFormat formatEncode3H[3]  = {IF_SVE_HK_3A, IF_SVE_HL_3A, IF_SVE_HM_2A};
-    const static insFormat formatEncode3I[3]  = {IF_SVE_CM_3A, IF_SVE_CN_3A, IF_SVE_CO_3A};
-    const static insFormat formatEncode3J[3]  = {IF_SVE_CX_4A, IF_SVE_CX_4A_A, IF_SVE_CY_3A};
-    const static insFormat formatEncode3K[3]  = {IF_SVE_CX_4A, IF_SVE_CX_4A_A, IF_SVE_CY_3B};
-    const static insFormat formatEncode3L[3]  = {IF_SVE_DT_3A, IF_SVE_DX_3A, IF_SVE_DY_3A};
-    const static insFormat formatEncode3M[3]  = {IF_SVE_EJ_3A, IF_SVE_FA_3A, IF_SVE_FA_3B};
-    const static insFormat formatEncode3N[3]  = {IF_SVE_EK_3A, IF_SVE_FB_3A, IF_SVE_FB_3B};
-    const static insFormat formatEncode3O[3]  = {IF_SVE_EK_3A, IF_SVE_FC_3A, IF_SVE_FC_3B};
-    const static insFormat formatEncode3P[3]  = {IF_SVE_EL_3A, IF_SVE_FG_3A, IF_SVE_FG_3B};
-    const static insFormat formatEncode3Q[3]  = {IF_SVE_EO_3A, IF_SVE_FJ_3A, IF_SVE_FJ_3B};
-    const static insFormat formatEncode3R[3]  = {IF_SVE_FE_3A, IF_SVE_FE_3B, IF_SVE_FN_3A};
-    const static insFormat formatEncode3S[3]  = {IF_SVE_FH_3A, IF_SVE_FH_3B, IF_SVE_FN_3A};
-    const static insFormat formatEncode3T[3]  = {IF_SVE_GX_3C, IF_SVE_HK_3B, IF_SVE_HL_3B};
-    const static insFormat formatEncode3U[3]  = {IF_SVE_IM_3A, IF_SVE_IN_4A, IF_SVE_IX_4A};
-    const static insFormat formatEncode3V[3]  = {IF_SVE_JA_4A, IF_SVE_JB_4A, IF_SVE_JM_3A};
-    const static insFormat formatEncode2AA[2] = {IF_SVE_ID_2A, IF_SVE_IE_2A};
-    const static insFormat formatEncode2AB[2] = {IF_SVE_JG_2A, IF_SVE_JH_2A};
-    const static insFormat formatEncode2AC[2] = {IF_SVE_AD_3A, IF_SVE_ED_1A};
-    const static insFormat formatEncode2AD[2] = {IF_SVE_AB_3B, IF_SVE_AT_3B};
-    const static insFormat formatEncode2AE[2] = {IF_SVE_CG_2A, IF_SVE_CJ_2A};
-    const static insFormat formatEncode2AF[2] = {IF_SVE_AE_3A, IF_SVE_BD_3A};
-    const static insFormat formatEncode2AG[2] = {IF_SVE_BS_1A, IF_SVE_CZ_4A};
-    const static insFormat formatEncode2AH[2] = {IF_SVE_BQ_2A, IF_SVE_BQ_2B};
-    const static insFormat formatEncode2AI[2] = {IF_SVE_AM_2A, IF_SVE_EU_3A};
-    const static insFormat formatEncode2AJ[2] = {IF_SVE_HI_3A, IF_SVE_HT_4A};
-    const static insFormat formatEncode2AK[2] = {IF_SVE_BZ_3A, IF_SVE_BZ_3A_A};
-    const static insFormat formatEncode2AL[2] = {IF_SVE_GG_3A, IF_SVE_GG_3B};
-    const static insFormat formatEncode2AM[2] = {IF_SVE_HL_3A, IF_SVE_HM_2A};
-    const static insFormat formatEncode2AN[2] = {IF_SVE_EI_3A, IF_SVE_EZ_3A};
-    const static insFormat formatEncode2AO[2] = {IF_SVE_GT_4A, IF_SVE_GV_3A};
-    const static insFormat formatEncode2AP[2] = {IF_SVE_GY_3B, IF_SVE_HA_3A};
-    const static insFormat formatEncode2AQ[2] = {IF_SVE_GO_3A, IF_SVE_HC_3A};
-    const static insFormat formatEncode2AR[2] = {IF_SVE_AP_3A, IF_SVE_CZ_4A};
-    const static insFormat formatEncode2AS[2] = {IF_SVE_HO_3A, IF_SVE_HO_3A_B};
-    const static insFormat formatEncode2AT[2] = {IF_SVE_AB_3A, IF_SVE_EC_1A};
-    const static insFormat formatEncode2AU[2] = {IF_SVE_AH_3A, IF_SVE_BI_2A};
-    const static insFormat formatEncode2AV[2] = {IF_SVE_BM_1A, IF_SVE_BN_1A};
-    const static insFormat formatEncode2AW[2] = {IF_SVE_BO_1A, IF_SVE_BP_1A};
-    const static insFormat formatEncode2AX[2] = {IF_SVE_CC_2A, IF_SVE_CD_2A};
-    const static insFormat formatEncode2AY[2] = {IF_SVE_CR_3A, IF_SVE_CS_3A};
-    const static insFormat formatEncode2AZ[2] = {IF_SVE_CV_3A, IF_SVE_CV_3B};
-    const static insFormat formatEncode2BA[2] = {IF_SVE_CW_4A, IF_SVE_CZ_4A};
-    const static insFormat formatEncode2BB[2] = {IF_SVE_CZ_4A, IF_SVE_CZ_4A_A};
-    const static insFormat formatEncode2BC[2] = {IF_SVE_DE_1A, IF_SVE_DZ_1A};
-    const static insFormat formatEncode2BD[2] = {IF_SVE_DG_2A, IF_SVE_DH_1A};
-    const static insFormat formatEncode2BE[2] = {IF_SVE_DK_3A, IF_SVE_DL_2A};
-    const static insFormat formatEncode2BF[2] = {IF_SVE_DM_2A, IF_SVE_DN_2A};
-    const static insFormat formatEncode2BG[2] = {IF_SVE_DO_2A, IF_SVE_DP_2A};
-    const static insFormat formatEncode2BH[2] = {IF_SVE_DW_2A, IF_SVE_DW_2B};
-    const static insFormat formatEncode2BI[2] = {IF_SVE_FN_3A, IF_SVE_FN_3B};
-    const static insFormat formatEncode2BJ[2] = {IF_SVE_GQ_3A, IF_SVE_HG_2A};
-    const static insFormat formatEncode2BK[2] = {IF_SVE_GU_3C, IF_SVE_HU_4B};
-    const static insFormat formatEncode2BL[2] = {IF_SVE_GZ_3A, IF_SVE_HB_3A};
-    const static insFormat formatEncode2BM[2] = {IF_SVE_HK_3B, IF_SVE_HL_3B};
-    const static insFormat formatEncode2BN[2] = {IF_SVE_IF_4A, IF_SVE_IF_4A_A};
-    const static insFormat formatEncode2BO[2] = {IF_SVE_IO_3A, IF_SVE_IP_4A};
-    const static insFormat formatEncode2BP[2] = {IF_SVE_IQ_3A, IF_SVE_IR_4A};
-    const static insFormat formatEncode2BQ[2] = {IF_SVE_IS_3A, IF_SVE_IT_4A};
-    const static insFormat formatEncode2BR[2] = {IF_SVE_JC_4A, IF_SVE_JO_3A};
-    const static insFormat formatEncode2BS[2] = {IF_SVE_JE_3A, IF_SVE_JF_4A};
+    switch (size)
+    {
+        case EA_1BYTE:
+            maxLen = 3;
+            break;
 
-    code_t    code           = BAD_CODE;
-    insFormat insFmt         = emitInsFormat(ins);
-    bool      encoding_found = false;
-    int       index          = -1;
+        case EA_2BYTE:
+            maxLen = 4;
+            break;
 
-    switch (insFmt)
-    {
-        case IF_SVE_13A:
-            for (index = 0; index < 13; index++)
-            {
-                if (fmt == formatEncode13A[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
+        case EA_4BYTE:
+            maxLen = 5;
             break;
-        case IF_SVE_11A:
-            for (index = 0; index < 11; index++)
-            {
-                if (fmt == formatEncode11A[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
+
+        case EA_8BYTE:
+            maxLen = 6;
             break;
-        case IF_SVE_9A:
-            for (index = 0; index < 9; index++)
-            {
-                if (fmt == formatEncode9A[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
+
+        default:
+            assert(!"Invalid size");
+            maxLen = 0;
             break;
-        case IF_SVE_9B:
-            for (index = 0; index < 9; index++)
-            {
-                if (fmt == formatEncode9B[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_9C:
-            for (index = 0; index < 9; index++)
-            {
-                if (fmt == formatEncode9C[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_9D:
-            for (index = 0; index < 9; index++)
-            {
-                if (fmt == formatEncode9D[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_9E:
-            for (index = 0; index < 9; index++)
-            {
-                if (fmt == formatEncode9E[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_9F:
-            for (index = 0; index < 9; index++)
-            {
-                if (fmt == formatEncode9F[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_8A:
-            for (index = 0; index < 8; index++)
-            {
-                if (fmt == formatEncode8A[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_8B:
-            for (index = 0; index < 8; index++)
-            {
-                if (fmt == formatEncode8B[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_8C:
-            for (index = 0; index < 8; index++)
-            {
-                if (fmt == formatEncode8C[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_7A:
-            for (index = 0; index < 7; index++)
+    }
+
+    imm = normalizeImm64(imm, size);
+
+    // Starting with len=1, elemWidth is 2 bits
+    //               len=2, elemWidth is 4 bits
+    //               len=3, elemWidth is 8 bits
+    //               len=4, elemWidth is 16 bits
+    //               len=5, elemWidth is 32 bits
+    //               len=6, elemWidth is 64 bits
+    //
+    for (unsigned len = 1; (len <= maxLen); len++)
+    {
+        unsigned elemWidth = 1 << len;
+        UINT64   elemMask  = ((UINT64)-1) >> (64 - elemWidth);
+        UINT64   tempImm   = (UINT64)imm;        // A working copy of 'imm' that we can mutate
+        UINT64   elemVal   = tempImm & elemMask; // The low 'elemWidth' bits of 'imm'
+
+        // Check for all 1's or 0's as these can't be encoded
+        if ((elemVal == 0) || (elemVal == elemMask))
+            continue;
+
+        // 'checkedBits' is the count of bits that are known to match 'elemVal' when replicated
+        unsigned checkedBits = elemWidth; // by definition the first 'elemWidth' bits match
+
+        // Now check to see if each of the next bits match...
+        //
+        while (checkedBits < immWidth)
+        {
+            tempImm >>= elemWidth;
+
+            UINT64 nextElem = tempImm & elemMask;
+            if (nextElem != elemVal)
             {
-                if (fmt == formatEncode7A[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
+                // Not matching, exit this loop and checkedBits will not be equal to immWidth
+                break;
             }
-            break;
-        case IF_SVE_6A:
-            for (index = 0; index < 6; index++)
+
+            // The 'nextElem' is matching, so increment 'checkedBits'
+            checkedBits += elemWidth;
+        }
+
+        // Did the full immediate contain bits that can be formed by repeating 'elemVal'?
+        if (checkedBits == immWidth)
+        {
+            // We are not quite done, since the only values that we can encode as a
+            // 'bitmask immediate' are those that can be formed by starting with a
+            // bit string of 0*1* that is rotated by some number of bits.
+            //
+            // We check to see if 'elemVal' can be formed using these restrictions.
+            //
+            // Observation:
+            // Rotating by one bit any value that passes these restrictions
+            // can be xor-ed with the original value and will result it a string
+            // of bits that have exactly two 1 bits: 'elemRorXor'
+            // Further the distance between the two one bits tells us the value
+            // of S and the location of the 1 bits tells us the value of R
+            //
+            // Some examples:   (immWidth is 8)
+            //
+            // S=4,R=0   S=5,R=3   S=3,R=6
+            // elemVal:        00001111  11100011  00011100
+            // elemRor:        10000111  11110001  00001110
+            // elemRorXor:     10001000  00010010  00010010
+            //      compute S  45678---  ---5678-  ---3210-
+            //      compute R  01234567  ---34567  ------67
+
+            UINT64 elemRor    = ROR_helper(elemVal, 1, elemWidth); // Rotate 'elemVal' Right by one bit
+            UINT64 elemRorXor = elemVal ^ elemRor;                 // Xor elemVal and elemRor
+
+            // If we only have a two-bit change in elemROR then we can form a mask for this value
+            unsigned bitCount = 0;
+            UINT64   oneBit   = 0x1;
+            unsigned R        = elemWidth; // R is shift count for ROR (rotate right shift)
+            unsigned S        = 0;         // S is number of consecutive one bits
+            int      incr     = -1;
+
+            // Loop over the 'elemWidth' bits in 'elemRorXor'
+            //
+            for (unsigned bitNum = 0; bitNum < elemWidth; bitNum++)
             {
-                if (fmt == formatEncode6A[index])
+                if (incr == -1)
                 {
-                    encoding_found = true;
-                    break;
+                    R--; // We decrement R by one whenever incr is -1
                 }
-            }
-            break;
-        case IF_SVE_6B:
-            for (index = 0; index < 6; index++)
-            {
-                if (fmt == formatEncode6B[index])
+                if (bitCount == 1)
                 {
-                    encoding_found = true;
-                    break;
+                    S += incr; // We incr/decr S, after we find the first one bit in 'elemRorXor'
                 }
-            }
-            break;
-        case IF_SVE_6C:
-            for (index = 0; index < 6; index++)
-            {
-                if (fmt == formatEncode6C[index])
+
+                // Is this bit position a 1 bit in 'elemRorXor'?
+                //
+                if (oneBit & elemRorXor)
                 {
-                    encoding_found = true;
-                    break;
+                    bitCount++;
+                    // Is this the first 1 bit that we found in 'elemRorXor'?
+                    if (bitCount == 1)
+                    {
+                        // Does this 1 bit represent a transition to zero bits?
+                        bool toZeros = ((oneBit & elemVal) != 0);
+                        if (toZeros)
+                        {
+                            // S :: Count down from elemWidth
+                            S    = elemWidth;
+                            incr = -1;
+                        }
+                        else // this 1 bit represent a transition to one bits.
+                        {
+                            // S :: Count up from zero
+                            S    = 0;
+                            incr = +1;
+                        }
+                    }
+                    else // bitCount > 1
+                    {
+                        // We found the second (or third...) 1 bit in 'elemRorXor'
+                        incr = 0; // stop decrementing 'R'
+
+                        if (bitCount > 2)
+                        {
+                            // More than 2 transitions from 0/1 in 'elemVal'
+                            // This means that 'elemVal' can't be encoded
+                            // using a 'bitmask immediate'.
+                            //
+                            // Furthermore, it will continue to fail
+                            // with any larger 'len' that we try.
+                            // so just return false.
+                            //
+                            return false;
+                        }
+                    }
                 }
+
+                // shift oneBit left by one bit to test the next position
+                oneBit <<= 1;
             }
-            break;
-        case IF_SVE_6D:
-            for (index = 0; index < 6; index++)
+
+            // We expect that bitCount will always be two at this point
+            // but just in case return false for any bad cases.
+            //
+            assert(bitCount == 2);
+            if (bitCount != 2)
+                return false;
+
+            // Perform some sanity checks on the values of 'S' and 'R'
+            assert(S > 0);
+            assert(S < elemWidth);
+            assert(R < elemWidth);
+
+            // Does the caller want us to return the N,R,S encoding values?
+            //
+            if (wbBMI != nullptr)
             {
-                if (fmt == formatEncode6D[index])
+
+                // The encoding used for S is one less than the
+                //  number of consecutive one bits
+                S--;
+
+                if (len == 6)
                 {
-                    encoding_found = true;
-                    break;
+                    wbBMI->immN = 1;
                 }
-            }
-            break;
-        case IF_SVE_6E:
-            for (index = 0; index < 6; index++)
-            {
-                if (fmt == formatEncode6E[index])
+                else
                 {
-                    encoding_found = true;
-                    break;
+                    wbBMI->immN = 0;
+                    // The encoding used for 'S' here is a bit peculiar.
+                    //
+                    // The upper bits need to be complemented, followed by a zero bit
+                    // then the value of 'S-1'
+                    //
+                    unsigned upperBitsOfS = 64 - (1 << (len + 1));
+                    S |= upperBitsOfS;
                 }
+                wbBMI->immR = R;
+                wbBMI->immS = S;
+
+                // Verify that what we are returning is correct.
+                assert(imm == emitDecodeBitMaskImm(*wbBMI, size));
             }
-            break;
-        case IF_SVE_6F:
-            for (index = 0; index < 6; index++)
-            {
-                if (fmt == formatEncode6F[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_6G:
-            for (index = 0; index < 6; index++)
-            {
-                if (fmt == formatEncode6G[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_5A:
-            for (index = 0; index < 5; index++)
-            {
-                if (fmt == formatEncode5A[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_5B:
-            for (index = 0; index < 5; index++)
-            {
-                if (fmt == formatEncode5B[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_5C:
-            for (index = 0; index < 5; index++)
+            // Tell the caller that we can successfully encode this immediate
+            // using a 'bitmask immediate'.
+            //
+            return true;
+        }
+    }
+    return false;
+}
+
+/************************************************************************
+ *
+ *  Convert an imm(i16,hw) into a 32/64-bit immediate
+ *  inputs 'hwImm' a halfwordImm struct
+ *         'size' specifies the size of the result (64 or 32 bits)
+ */
+
+/*static*/ INT64 emitter::emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size)
+{
+    assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
+
+    unsigned hw  = hwImm.immHW;
+    INT64    val = (INT64)hwImm.immVal;
+
+    assert((hw <= 1) || (size == EA_8BYTE));
+
+    INT64 result = val << (16 * hw);
+    return result;
+}
+
+/************************************************************************
+ *
+ *  returns true if 'imm' of 'size' bits (32/64) can be encoded
+ *  using the ARM64 'halfword immediate' form.
+ *  When a non-null value is passed for 'wbHWI' then this method
+ *  writes back the 'immHW' and 'immVal' values use to encode this immediate
+ *
+ */
+
+/*static*/ bool emitter::canEncodeHalfwordImm(INT64 imm, emitAttr size, emitter::halfwordImm* wbHWI)
+{
+    assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
+
+    unsigned immWidth = (size == EA_8BYTE) ? 64 : 32;
+    unsigned maxHW    = (size == EA_8BYTE) ? 4 : 2;
+
+    // setup immMask to a (EA_4BYTE) 0x00000000_FFFFFFFF or (EA_8BYTE) 0xFFFFFFFF_FFFFFFFF
+    const UINT64 immMask = ((UINT64)-1) >> (64 - immWidth);
+    const INT64  mask16  = (INT64)0xFFFF;
+
+    imm = normalizeImm64(imm, size);
+
+    // Try each of the valid hw shift sizes
+    for (unsigned hw = 0; (hw < maxHW); hw++)
+    {
+        INT64 curMask   = mask16 << (hw * 16); // Represents the mask of the bits in the current halfword
+        INT64 checkBits = immMask & ~curMask;
+
+        // Excluding the current halfword (using ~curMask)
+        //  does the immediate have zero bits in every other bit that we care about?
+        //  note we care about all 64-bits for EA_8BYTE
+        //  and we care about the lowest 32 bits for EA_4BYTE
+        //
+        if ((imm & checkBits) == 0)
+        {
+            // Does the caller want us to return the imm(i16,hw) encoding values?
+            //
+            if (wbHWI != nullptr)
             {
-                if (fmt == formatEncode5C[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
+                INT64 val     = ((imm & curMask) >> (hw * 16)) & mask16;
+                wbHWI->immHW  = hw;
+                wbHWI->immVal = val;
+
+                // Verify that what we are returning is correct.
+                assert(imm == emitDecodeHalfwordImm(*wbHWI, size));
             }
-            break;
-        case IF_SVE_5D:
-            for (index = 0; index < 5; index++)
+            // Tell the caller that we can successfully encode this immediate
+            // using a 'halfword immediate'.
+            //
+            return true;
+        }
+    }
+    return false;
+}
+
+/************************************************************************
+ *
+ *  Convert an imm(i8,sh) into a 16/32-bit immediate
+ *  inputs 'bsImm' a byteShiftedImm struct
+ *         'size' specifies the size of the result (16 or 32 bits)
+ */
+
+/*static*/ UINT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size)
+{
+    bool     onesShift = (bsImm.immOnes == 1);
+    unsigned bySh      = bsImm.immBY;          // Num Bytes to shift 0,1,2,3
+    UINT32   result    = (UINT32)bsImm.immVal; // 8-bit immediate
+
+    if (bySh > 0)
+    {
+        assert((size == EA_2BYTE) || (size == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms
+        if (size == EA_2BYTE)
+        {
+            assert(bySh < 2);
+        }
+        else
+        {
+            assert(bySh < 4);
+        }
+
+        result <<= (8 * bySh);
+
+        if (onesShift)
+        {
+            result |= ((1 << (8 * bySh)) - 1);
+        }
+    }
+    return result;
+}
+
+/************************************************************************
+ *
+ *  returns true if 'imm' of 'size' bits (16/32) can be encoded
+ *  using the ARM64 'byteShifted immediate' form.
+ *  When a non-null value is passed for 'wbBSI' then this method
+ *  writes back the 'immBY' and 'immVal' values use to encode this immediate
+ *
+ */
+
+/*static*/ bool emitter::canEncodeByteShiftedImm(INT64                    imm,
+                                                 emitAttr                 size,
+                                                 bool                     allow_MSL,
+                                                 emitter::byteShiftedImm* wbBSI)
+{
+    bool     canEncode = false;
+    bool     onesShift = false; // true if we use the shifting ones variant
+    unsigned bySh      = 0;     // number of bytes to shift: 0, 1, 2, 3
+    unsigned imm8      = 0;     // immediate to use in the encoding
+
+    imm = normalizeImm64(imm, size);
+
+    if (size == EA_1BYTE)
+    {
+        imm8 = (unsigned)imm;
+        assert(imm8 < 0x100);
+        canEncode = true;
+    }
+    else if (size == EA_8BYTE)
+    {
+        imm8 = (unsigned)imm;
+        assert(imm8 < 0x100);
+        canEncode = true;
+    }
+    else
+    {
+        assert((size == EA_2BYTE) || (size == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms
+
+        unsigned immWidth = (size == EA_4BYTE) ? 32 : 16;
+        unsigned maxBY    = (size == EA_4BYTE) ? 4 : 2;
+
+        // setup immMask to a (EA_2BYTE) 0x0000FFFF or (EA_4BYTE) 0xFFFFFFFF
+        const UINT32 immMask = ((UINT32)-1) >> (32 - immWidth);
+        const INT32  mask8   = (INT32)0xFF;
+
+        // Try each of the valid by shift sizes
+        for (bySh = 0; (bySh < maxBY); bySh++)
+        {
+            INT32 curMask   = mask8 << (bySh * 8); // Represents the mask of the bits in the current byteShifted
+            INT32 checkBits = immMask & ~curMask;
+            INT32 immCheck  = (imm & checkBits);
+
+            // Excluding the current byte (using ~curMask)
+            //  does the immediate have zero bits in every other bit that we care about?
+            //  or can be use the shifted one variant?
+            //  note we care about all 32-bits for EA_4BYTE
+            //  and we care about the lowest 16 bits for EA_2BYTE
+            //
+            if (immCheck == 0)
             {
-                if (fmt == formatEncode5D[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
+                canEncode = true;
             }
-            break;
-        case IF_SVE_5E:
-            for (index = 0; index < 5; index++)
+
+            // MSL is only supported for 32-bit.
+            if (allow_MSL && (size == EA_4BYTE))
             {
-                if (fmt == formatEncode5E[index])
+                if ((bySh == 1) && (immCheck == 0xFF))
                 {
-                    encoding_found = true;
-                    break;
+                    canEncode = true;
+                    onesShift = true;
                 }
-            }
-            break;
-        case IF_SVE_4A:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4A[index])
+                else if ((bySh == 2) && (immCheck == 0xFFFF))
                 {
-                    encoding_found = true;
-                    break;
+                    canEncode = true;
+                    onesShift = true;
                 }
             }
-            break;
-        case IF_SVE_4B:
-            for (index = 0; index < 4; index++)
+            if (canEncode)
             {
-                if (fmt == formatEncode4B[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
+                imm8 = (unsigned)(((imm & curMask) >> (bySh * 8)) & mask8);
+                break;
             }
-            break;
-        case IF_SVE_4C:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4C[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_4D:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4D[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_4E:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4E[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_4F:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4F[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_4G:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4G[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_4H:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4H[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_4I:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4I[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_4J:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4J[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_4K:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4K[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_4L:
-            for (index = 0; index < 4; index++)
-            {
-                if (fmt == formatEncode4L[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3A:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3A[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3B:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3B[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3C:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3C[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3D:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3D[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3E:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3E[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3F:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3F[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3G:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3G[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3H:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3H[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3I:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3I[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3J:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3J[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3K:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3K[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3L:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3L[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3M:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3M[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3N:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3N[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3O:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3O[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3P:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3P[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3Q:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3Q[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3R:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3R[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3S:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3S[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3T:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3T[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3U:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3U[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_3V:
-            for (index = 0; index < 3; index++)
-            {
-                if (fmt == formatEncode3V[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AA:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AA[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AB:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AB[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AC:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AC[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AD:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AD[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AE:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AE[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AF:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AF[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AG:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AG[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AH:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AH[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AI:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AI[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AJ:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AJ[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AK:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AK[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AL:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AL[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AM:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AM[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AN:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AN[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AO:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AO[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AP:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AP[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AQ:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AQ[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AR:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AR[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AS:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AS[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AT:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AT[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AU:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AU[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AV:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AV[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AW:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AW[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AX:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AX[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AY:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AY[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2AZ:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2AZ[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BA:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BA[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BB:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BB[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BC:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BC[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BD:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BD[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BE:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BE[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BF:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BF[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BG:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BG[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BH:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BH[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BI:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BI[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BJ:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BJ[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BK:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BK[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BL:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BL[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BM:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BM[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BN:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BN[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BO:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BO[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BP:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BP[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BQ:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BQ[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BR:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BR[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        case IF_SVE_2BS:
-            for (index = 0; index < 2; index++)
-            {
-                if (fmt == formatEncode2BS[index])
-                {
-                    encoding_found = true;
-                    break;
-                }
-            }
-            break;
-        default:
-            if (fmt == insFmt)
-            {
-                encoding_found = true;
-                index          = 0;
-            }
-            else
-            {
-                encoding_found = false;
-            }
-            break;
-    }
-
-    assert(encoding_found);
-    const unsigned sve_ins_offset = ((unsigned)ins - INS_sve_invalid);
-
-    switch (index)
-    {
-        case 0:
-            assert(sve_ins_offset < ArrLen(insCodes1));
-            code = insCodes1[sve_ins_offset];
-            break;
-        case 1:
-            assert(sve_ins_offset < ArrLen(insCodes2));
-            code = insCodes2[sve_ins_offset];
-            break;
-        case 2:
-            assert(sve_ins_offset < ArrLen(insCodes3));
-            code = insCodes3[sve_ins_offset];
-            break;
-        case 3:
-            assert(sve_ins_offset < ArrLen(insCodes4));
-            code = insCodes4[sve_ins_offset];
-            break;
-        case 4:
-            assert(sve_ins_offset < ArrLen(insCodes5));
-            code = insCodes5[sve_ins_offset];
-            break;
-        case 5:
-            assert(sve_ins_offset < ArrLen(insCodes6));
-            code = insCodes6[sve_ins_offset];
-            break;
-        case 6:
-            assert(sve_ins_offset < ArrLen(insCodes7));
-            code = insCodes7[sve_ins_offset];
-            break;
-        case 7:
-            assert(sve_ins_offset < ArrLen(insCodes8));
-            code = insCodes8[sve_ins_offset];
-            break;
-        case 8:
-            assert(sve_ins_offset < ArrLen(insCodes9));
-            code = insCodes9[sve_ins_offset];
-            break;
-        case 9:
-            assert(sve_ins_offset < ArrLen(insCodes10));
-            code = insCodes10[sve_ins_offset];
-            break;
-        case 10:
-            assert(sve_ins_offset < ArrLen(insCodes11));
-            code = insCodes11[sve_ins_offset];
-            break;
-        case 11:
-            assert(sve_ins_offset < ArrLen(insCodes12));
-            code = insCodes12[sve_ins_offset];
-            break;
-        case 12:
-            assert(sve_ins_offset < ArrLen(insCodes13));
-            code = insCodes13[sve_ins_offset];
-            break;
-    }
-
-    assert((code != BAD_CODE));
-
-    return code;
-}
-
-// true if this 'imm' can be encoded as a input operand to a mov instruction
-/*static*/ bool emitter::emitIns_valid_imm_for_mov(INT64 imm, emitAttr size)
-{
-    // Check for "MOV (wide immediate)".
-    if (canEncodeHalfwordImm(imm, size))
-        return true;
-
-    // Next try the ones-complement form of 'halfword immediate' imm(i16,hw),
-    // namely "MOV (inverted wide immediate)".
-    ssize_t notOfImm = NOT_helper(imm, getBitWidth(size));
-    if (canEncodeHalfwordImm(notOfImm, size))
-        return true;
-
-    // Finally try "MOV (bitmask immediate)" imm(N,r,s)
-    if (canEncodeBitMaskImm(imm, size))
-        return true;
-
-    return false;
-}
-
-// true if this 'imm' can be encoded as a input operand to a vector movi instruction
-/*static*/ bool emitter::emitIns_valid_imm_for_movi(INT64 imm, emitAttr elemsize)
-{
-    if (elemsize == EA_8BYTE)
-    {
-        UINT64 uimm = imm;
-        while (uimm != 0)
-        {
-            INT64 loByte = uimm & 0xFF;
-            if ((loByte == 0) || (loByte == 0xFF))
-            {
-                uimm >>= 8;
-            }
-            else
-            {
-                return false;
-            }
-        }
-        assert(uimm == 0);
-        return true;
-    }
-    else
-    {
-        // First try the standard 'byteShifted immediate' imm(i8,bySh)
-        if (canEncodeByteShiftedImm(imm, elemsize, true))
-            return true;
-
-        // Next try the ones-complement form of the 'immediate' imm(i8,bySh)
-        ssize_t notOfImm = NOT_helper(imm, getBitWidth(elemsize));
-        if (canEncodeByteShiftedImm(notOfImm, elemsize, true))
-            return true;
-    }
-    return false;
-}
-
-// true if this 'imm' can be encoded as a input operand to a fmov instruction
-/*static*/ bool emitter::emitIns_valid_imm_for_fmov(double immDbl)
-{
-    if (canEncodeFloatImm8(immDbl))
-        return true;
-
-    return false;
-}
-
-// true if this 'imm' can be encoded as a input operand to an add instruction
-/*static*/ bool emitter::emitIns_valid_imm_for_add(INT64 imm, emitAttr size)
-{
-    if (unsigned_abs(imm) <= 0x0fff)
-        return true;
-    else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding
-        return true;
-
-    return false;
-}
-
-// true if this 'imm' can be encoded as a input operand to an non-add/sub alu instruction
-/*static*/ bool emitter::emitIns_valid_imm_for_cmp(INT64 imm, emitAttr size)
-{
-    return emitIns_valid_imm_for_add(imm, size);
-}
-
-// true if this 'imm' can be encoded as a input operand to an non-add/sub alu instruction
-/*static*/ bool emitter::emitIns_valid_imm_for_alu(INT64 imm, emitAttr size)
-{
-    if (canEncodeBitMaskImm(imm, size))
-        return true;
-
-    return false;
-}
-
-// true if this 'imm' can be encoded as the offset in an unscaled ldr/str instruction
-/*static*/ bool emitter::emitIns_valid_imm_for_unscaled_ldst_offset(INT64 imm)
-{
-    return (imm >= -256) && (imm <= 255);
-}
-
-// true if this 'imm' can be encoded as the offset in a ldr/str instruction
-/*static*/ bool emitter::emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr attr)
-{
-    if (imm == 0)
-        return true; // Encodable using IF_LS_2A
-
-    if ((imm >= -256) && (imm <= 255))
-        return true; // Encodable using IF_LS_2C (or possibly IF_LS_2B)
-
-    if (imm < 0)
-        return false; // not encodable
-
-    emitAttr size  = EA_SIZE(attr);
-    unsigned scale = NaturalScale_helper(size);
-    ssize_t  mask  = size - 1; // the mask of low bits that must be zero to encode the immediate
-
-    if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
-        return true; // Encodable using IF_LS_2B
-
-    return false; // not encodable
-}
-
-// true if this 'imm' can be encoded as a input operand to a ccmp instruction
-/*static*/ bool emitter::emitIns_valid_imm_for_ccmp(INT64 imm)
-{
-    return ((imm & 0x01f) == imm);
-}
-
-// true if 'imm' can be encoded as an offset in a ldp/stp instruction
-/*static*/ bool emitter::canEncodeLoadOrStorePairOffset(INT64 imm, emitAttr attr)
-{
-    assert((attr == EA_4BYTE) || (attr == EA_8BYTE) || (attr == EA_16BYTE));
-    const int size = EA_SIZE_IN_BYTES(attr);
-    return (imm % size == 0) && (imm >= -64 * size) && (imm < 64 * size);
-}
-
-/************************************************************************
- *
- *   A helper method to return the natural scale for an EA 'size'
- */
-
-/*static*/ unsigned emitter::NaturalScale_helper(emitAttr size)
-{
-    assert(size == EA_1BYTE || size == EA_2BYTE || size == EA_4BYTE || size == EA_8BYTE || size == EA_16BYTE);
-    return BitOperations::Log2((unsigned)size);
-}
-
-/************************************************************************
- *
- *  A helper method to perform a Rotate-Right shift operation
- *  the source is 'value' and it is rotated right by 'sh' bits
- *  'value' is considered to be a fixed size 'width' set of bits.
- *
- *  Example
- *      value is '00001111', sh is 2 and width is 8
- *     result is '11000011'
- */
-
-/*static*/ UINT64 emitter::ROR_helper(UINT64 value, unsigned sh, unsigned width)
-{
-    assert(width <= 64);
-    // Check that 'value' fits in 'width' bits
-    assert((width == 64) || (value < (1ULL << width)));
-    // We don't support shifts >= width
-    assert(sh < width);
-
-    UINT64 result;
-
-    unsigned rsh = sh;
-    unsigned lsh = width - rsh;
-
-    result = (value >> rsh);
-    result |= (value << lsh);
-
-    if (width < 64)
-    {
-        // mask off any extra bits that we got from the left shift
-        result &= ((1ULL << width) - 1);
-    }
-    return result;
-}
-/************************************************************************
- *
- *  A helper method to perform a 'NOT' bitwise complement operation.
- *  'value' is considered to be a fixed size 'width' set of bits.
- *
- *  Example
- *      value is '01001011', and width is 8
- *     result is '10110100'
- */
-
-/*static*/ UINT64 emitter::NOT_helper(UINT64 value, unsigned width)
-{
-    assert(width <= 64);
-
-    UINT64 result = ~value;
-
-    if (width < 64)
-    {
-        // Check that 'value' fits in 'width' bits. Don't consider "sign" bits above width.
-        UINT64 maxVal       = 1ULL << width;
-        UINT64 lowBitsMask  = maxVal - 1;
-        UINT64 signBitsMask = ~lowBitsMask | (1ULL << (width - 1)); // The high bits must be set, and the top bit
-                                                                    // (sign bit) must be set.
-        assert((value < maxVal) || ((value & signBitsMask) == signBitsMask));
-
-        // mask off any extra bits that we got from the complement operation
-        result &= lowBitsMask;
-    }
-
-    return result;
-}
-
-/************************************************************************
- *
- *  A helper method to perform a bit Replicate operation
- *  the source is 'value' with a fixed size 'width' set of bits.
- *  value is replicated to fill out 32 or 64 bits as determined by 'size'.
- *
- *  Example
- *      value is '11000011' (0xE3), width is 8 and size is EA_8BYTE
- *     result is '11000011 11000011 11000011 11000011 11000011 11000011 11000011 11000011'
- *               0xE3E3E3E3E3E3E3E3
- */
-
-/*static*/ UINT64 emitter::Replicate_helper(UINT64 value, unsigned width, emitAttr size)
-{
-    assert(emitter::isValidGeneralDatasize(size));
-
-    unsigned immWidth = (size == EA_8BYTE) ? 64 : 32;
-    assert(width <= immWidth);
-
-    UINT64   result     = value;
-    unsigned filledBits = width;
-
-    while (filledBits < immWidth)
-    {
-        value <<= width;
-        result |= value;
-        filledBits += width;
-    }
-    return result;
-}
-
-/************************************************************************
- *
- *  Convert an imm(N,r,s) into a 64-bit immediate
- *  inputs 'bmImm' a bitMaskImm struct
- *         'size' specifies the size of the result (64 or 32 bits)
- */
-
-/*static*/ INT64 emitter::emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size)
-{
-    assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
-
-    unsigned N = bmImm.immN; // read the N,R and S values from the 'bitMaskImm' encoding
-    unsigned R = bmImm.immR;
-    unsigned S = bmImm.immS;
-
-    unsigned elemWidth = 64; // used when N == 1
-
-    if (N == 0) // find the smaller elemWidth when N == 0
-    {
-        // Scan S for the highest bit not set
-        elemWidth = 32;
-        for (unsigned bitNum = 5; bitNum > 0; bitNum--)
-        {
-            unsigned oneBit = elemWidth;
-            if ((S & oneBit) == 0)
-                break;
-            elemWidth /= 2;
-        }
-    }
-    else
-    {
-        assert(size == EA_8BYTE);
-    }
-
-    unsigned maskSR = elemWidth - 1;
-
-    S &= maskSR;
-    R &= maskSR;
-
-    // encoding for S is one less than the number of consecutive one bits
-    S++; // Number of consecutive ones to generate in 'welem'
-
-    // At this point:
-    //
-    //    'elemWidth' is the number of bits that we will use for the ROR and Replicate operations
-    //    'S'         is the number of consecutive 1 bits for the immediate
-    //    'R'         is the number of bits that we will Rotate Right the immediate
-    //    'size'      selects the final size of the immediate that we return (64 or 32 bits)
-
-    assert(S < elemWidth); // 'elemWidth' consecutive one's is a reserved encoding
-
-    UINT64 welem;
-    UINT64 wmask;
-
-    welem = (1ULL << S) - 1;
-
-    wmask = ROR_helper(welem, R, elemWidth);
-    wmask = Replicate_helper(wmask, elemWidth, size);
-
-    return wmask;
-}
-
-/*****************************************************************************
- *
- *  Check if an immediate can use the left shifted by 12 bits encoding
- */
-
-/*static*/ bool emitter::canEncodeWithShiftImmBy12(INT64 imm)
-{
-    if (imm < 0)
-    {
-        imm = -imm; // convert to unsigned
-    }
-
-    if (imm < 0)
-    {
-        return false; // Must be MIN_INT64
-    }
-
-    if ((imm & 0xfff) != 0) // Now the low 12 bits all have to be zero
-    {
-        return false;
-    }
-
-    imm >>= 12; // shift right by 12 bits
-
-    return (imm <= 0x0fff); // Does it fit in 12 bits
-}
-
-/*****************************************************************************
- *
- *  Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
- */
-
-/*static*/ INT64 emitter::normalizeImm64(INT64 imm, emitAttr size)
-{
-    unsigned immWidth = getBitWidth(size);
-    INT64    result   = imm;
-
-    if (immWidth < 64)
-    {
-        // Check that 'imm' fits in 'immWidth' bits. Don't consider "sign" bits above width.
-        INT64 maxVal      = 1LL << immWidth;
-        INT64 lowBitsMask = maxVal - 1;
-        INT64 hiBitsMask  = ~lowBitsMask;
-        INT64 signBitsMask =
-            hiBitsMask | (1LL << (immWidth - 1)); // The high bits must be set, and the top bit (sign bit) must be set.
-        assert((imm < maxVal) || ((imm & signBitsMask) == signBitsMask));
-
-        // mask off the hiBits
-        result &= lowBitsMask;
-    }
-    return result;
-}
-
-/*****************************************************************************
- *
- *  Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
- */
-
-/*static*/ INT32 emitter::normalizeImm32(INT32 imm, emitAttr size)
-{
-    unsigned immWidth = getBitWidth(size);
-    INT32    result   = imm;
-
-    if (immWidth < 32)
-    {
-        // Check that 'imm' fits in 'immWidth' bits. Don't consider "sign" bits above width.
-        INT32 maxVal       = 1 << immWidth;
-        INT32 lowBitsMask  = maxVal - 1;
-        INT32 hiBitsMask   = ~lowBitsMask;
-        INT32 signBitsMask = hiBitsMask | (1 << (immWidth - 1)); // The high bits must be set, and the top bit
-                                                                 // (sign bit) must be set.
-        assert((imm < maxVal) || ((imm & signBitsMask) == signBitsMask));
-
-        // mask off the hiBits
-        result &= lowBitsMask;
-    }
-    return result;
-}
-
-/************************************************************************
- *
- *  returns true if 'imm' of 'size bits (32/64) can be encoded
- *  using the ARM64 'bitmask immediate' form.
- *  When a non-null value is passed for 'wbBMI' then this method
- *  writes back the 'N','S' and 'R' values use to encode this immediate
- *
- */
-
-/*static*/ bool emitter::canEncodeBitMaskImm(INT64 imm, emitAttr size, emitter::bitMaskImm* wbBMI)
-{
-    assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
-
-    unsigned immWidth = (size == EA_8BYTE) ? 64 : 32;
-    unsigned maxLen   = (size == EA_8BYTE) ? 6 : 5;
-
-    imm = normalizeImm64(imm, size);
-
-    // Starting with len=1, elemWidth is 2 bits
-    //               len=2, elemWidth is 4 bits
-    //               len=3, elemWidth is 8 bits
-    //               len=4, elemWidth is 16 bits
-    //               len=5, elemWidth is 32 bits
-    // (optionally)  len=6, elemWidth is 64 bits
-    //
-    for (unsigned len = 1; (len <= maxLen); len++)
-    {
-        unsigned elemWidth = 1 << len;
-        UINT64   elemMask  = ((UINT64)-1) >> (64 - elemWidth);
-        UINT64   tempImm   = (UINT64)imm;        // A working copy of 'imm' that we can mutate
-        UINT64   elemVal   = tempImm & elemMask; // The low 'elemWidth' bits of 'imm'
-
-        // Check for all 1's or 0's as these can't be encoded
-        if ((elemVal == 0) || (elemVal == elemMask))
-            continue;
-
-        // 'checkedBits' is the count of bits that are known to match 'elemVal' when replicated
-        unsigned checkedBits = elemWidth; // by definition the first 'elemWidth' bits match
-
-        // Now check to see if each of the next bits match...
-        //
-        while (checkedBits < immWidth)
-        {
-            tempImm >>= elemWidth;
-
-            UINT64 nextElem = tempImm & elemMask;
-            if (nextElem != elemVal)
-            {
-                // Not matching, exit this loop and checkedBits will not be equal to immWidth
-                break;
-            }
-
-            // The 'nextElem' is matching, so increment 'checkedBits'
-            checkedBits += elemWidth;
-        }
-
-        // Did the full immediate contain bits that can be formed by repeating 'elemVal'?
-        if (checkedBits == immWidth)
-        {
-            // We are not quite done, since the only values that we can encode as a
-            // 'bitmask immediate' are those that can be formed by starting with a
-            // bit string of 0*1* that is rotated by some number of bits.
-            //
-            // We check to see if 'elemVal' can be formed using these restrictions.
-            //
-            // Observation:
-            // Rotating by one bit any value that passes these restrictions
-            // can be xor-ed with the original value and will result it a string
-            // of bits that have exactly two 1 bits: 'elemRorXor'
-            // Further the distance between the two one bits tells us the value
-            // of S and the location of the 1 bits tells us the value of R
-            //
-            // Some examples:   (immWidth is 8)
-            //
-            // S=4,R=0   S=5,R=3   S=3,R=6
-            // elemVal:        00001111  11100011  00011100
-            // elemRor:        10000111  11110001  00001110
-            // elemRorXor:     10001000  00010010  00010010
-            //      compute S  45678---  ---5678-  ---3210-
-            //      compute R  01234567  ---34567  ------67
-
-            UINT64 elemRor    = ROR_helper(elemVal, 1, elemWidth); // Rotate 'elemVal' Right by one bit
-            UINT64 elemRorXor = elemVal ^ elemRor;                 // Xor elemVal and elemRor
-
-            // If we only have a two-bit change in elemROR then we can form a mask for this value
-            unsigned bitCount = 0;
-            UINT64   oneBit   = 0x1;
-            unsigned R        = elemWidth; // R is shift count for ROR (rotate right shift)
-            unsigned S        = 0;         // S is number of consecutive one bits
-            int      incr     = -1;
-
-            // Loop over the 'elemWidth' bits in 'elemRorXor'
-            //
-            for (unsigned bitNum = 0; bitNum < elemWidth; bitNum++)
-            {
-                if (incr == -1)
-                {
-                    R--; // We decrement R by one whenever incr is -1
-                }
-                if (bitCount == 1)
-                {
-                    S += incr; // We incr/decr S, after we find the first one bit in 'elemRorXor'
-                }
-
-                // Is this bit position a 1 bit in 'elemRorXor'?
-                //
-                if (oneBit & elemRorXor)
-                {
-                    bitCount++;
-                    // Is this the first 1 bit that we found in 'elemRorXor'?
-                    if (bitCount == 1)
-                    {
-                        // Does this 1 bit represent a transition to zero bits?
-                        bool toZeros = ((oneBit & elemVal) != 0);
-                        if (toZeros)
-                        {
-                            // S :: Count down from elemWidth
-                            S    = elemWidth;
-                            incr = -1;
-                        }
-                        else // this 1 bit represent a transition to one bits.
-                        {
-                            // S :: Count up from zero
-                            S    = 0;
-                            incr = +1;
-                        }
-                    }
-                    else // bitCount > 1
-                    {
-                        // We found the second (or third...) 1 bit in 'elemRorXor'
-                        incr = 0; // stop decrementing 'R'
-
-                        if (bitCount > 2)
-                        {
-                            // More than 2 transitions from 0/1 in 'elemVal'
-                            // This means that 'elemVal' can't be encoded
-                            // using a 'bitmask immediate'.
-                            //
-                            // Furthermore, it will continue to fail
-                            // with any larger 'len' that we try.
-                            // so just return false.
-                            //
-                            return false;
-                        }
-                    }
-                }
-
-                // shift oneBit left by one bit to test the next position
-                oneBit <<= 1;
-            }
-
-            // We expect that bitCount will always be two at this point
-            // but just in case return false for any bad cases.
-            //
-            assert(bitCount == 2);
-            if (bitCount != 2)
-                return false;
-
-            // Perform some sanity checks on the values of 'S' and 'R'
-            assert(S > 0);
-            assert(S < elemWidth);
-            assert(R < elemWidth);
-
-            // Does the caller want us to return the N,R,S encoding values?
-            //
-            if (wbBMI != nullptr)
-            {
-
-                // The encoding used for S is one less than the
-                //  number of consecutive one bits
-                S--;
-
-                if (len == 6)
-                {
-                    wbBMI->immN = 1;
-                }
-                else
-                {
-                    wbBMI->immN = 0;
-                    // The encoding used for 'S' here is a bit peculiar.
-                    //
-                    // The upper bits need to be complemented, followed by a zero bit
-                    // then the value of 'S-1'
-                    //
-                    unsigned upperBitsOfS = 64 - (1 << (len + 1));
-                    S |= upperBitsOfS;
-                }
-                wbBMI->immR = R;
-                wbBMI->immS = S;
-
-                // Verify that what we are returning is correct.
-                assert(imm == emitDecodeBitMaskImm(*wbBMI, size));
-            }
-            // Tell the caller that we can successfully encode this immediate
-            // using a 'bitmask immediate'.
-            //
-            return true;
-        }
-    }
-    return false;
-}
-
-/************************************************************************
- *
- *  Convert a 64-bit immediate into its 'bitmask immediate' representation imm(N,r,s)
- */
-
-/*static*/ emitter::bitMaskImm emitter::emitEncodeBitMaskImm(INT64 imm, emitAttr size)
-{
-    emitter::bitMaskImm result;
-    result.immNRS = 0;
-
-    bool canEncode = canEncodeBitMaskImm(imm, size, &result);
-    assert(canEncode);
-
-    return result;
-}
-
-/************************************************************************
- *
- *  Convert an imm(i16,hw) into a 32/64-bit immediate
- *  inputs 'hwImm' a halfwordImm struct
- *         'size' specifies the size of the result (64 or 32 bits)
- */
-
-/*static*/ INT64 emitter::emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size)
-{
-    assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
-
-    unsigned hw  = hwImm.immHW;
-    INT64    val = (INT64)hwImm.immVal;
-
-    assert((hw <= 1) || (size == EA_8BYTE));
-
-    INT64 result = val << (16 * hw);
-    return result;
-}
-
-/************************************************************************
- *
- *  returns true if 'imm' of 'size' bits (32/64) can be encoded
- *  using the ARM64 'halfword immediate' form.
- *  When a non-null value is passed for 'wbHWI' then this method
- *  writes back the 'immHW' and 'immVal' values use to encode this immediate
- *
- */
-
-/*static*/ bool emitter::canEncodeHalfwordImm(INT64 imm, emitAttr size, emitter::halfwordImm* wbHWI)
-{
-    assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
-
-    unsigned immWidth = (size == EA_8BYTE) ? 64 : 32;
-    unsigned maxHW    = (size == EA_8BYTE) ? 4 : 2;
-
-    // setup immMask to a (EA_4BYTE) 0x00000000_FFFFFFFF or (EA_8BYTE) 0xFFFFFFFF_FFFFFFFF
-    const UINT64 immMask = ((UINT64)-1) >> (64 - immWidth);
-    const INT64  mask16  = (INT64)0xFFFF;
-
-    imm = normalizeImm64(imm, size);
-
-    // Try each of the valid hw shift sizes
-    for (unsigned hw = 0; (hw < maxHW); hw++)
-    {
-        INT64 curMask   = mask16 << (hw * 16); // Represents the mask of the bits in the current halfword
-        INT64 checkBits = immMask & ~curMask;
-
-        // Excluding the current halfword (using ~curMask)
-        //  does the immediate have zero bits in every other bit that we care about?
-        //  note we care about all 64-bits for EA_8BYTE
-        //  and we care about the lowest 32 bits for EA_4BYTE
-        //
-        if ((imm & checkBits) == 0)
-        {
-            // Does the caller want us to return the imm(i16,hw) encoding values?
-            //
-            if (wbHWI != nullptr)
-            {
-                INT64 val     = ((imm & curMask) >> (hw * 16)) & mask16;
-                wbHWI->immHW  = hw;
-                wbHWI->immVal = val;
-
-                // Verify that what we are returning is correct.
-                assert(imm == emitDecodeHalfwordImm(*wbHWI, size));
-            }
-            // Tell the caller that we can successfully encode this immediate
-            // using a 'halfword immediate'.
-            //
-            return true;
-        }
-    }
-    return false;
-}
-
-/************************************************************************
- *
- *  Convert a 64-bit immediate into its 'halfword immediate' representation imm(i16,hw)
- */
-
-/*static*/ emitter::halfwordImm emitter::emitEncodeHalfwordImm(INT64 imm, emitAttr size)
-{
-    emitter::halfwordImm result;
-    result.immHWVal = 0;
-
-    bool canEncode = canEncodeHalfwordImm(imm, size, &result);
-    assert(canEncode);
-
-    return result;
-}
-
-/************************************************************************
- *
- *  Convert an imm(i8,sh) into a 16/32-bit immediate
- *  inputs 'bsImm' a byteShiftedImm struct
- *         'size' specifies the size of the result (16 or 32 bits)
- */
-
-/*static*/ UINT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size)
-{
-    bool     onesShift = (bsImm.immOnes == 1);
-    unsigned bySh      = bsImm.immBY;          // Num Bytes to shift 0,1,2,3
-    UINT32   result    = (UINT32)bsImm.immVal; // 8-bit immediate
-
-    if (bySh > 0)
-    {
-        assert((size == EA_2BYTE) || (size == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms
-        if (size == EA_2BYTE)
-        {
-            assert(bySh < 2);
-        }
-        else
-        {
-            assert(bySh < 4);
-        }
-
-        result <<= (8 * bySh);
-
-        if (onesShift)
-        {
-            result |= ((1 << (8 * bySh)) - 1);
-        }
-    }
-    return result;
-}
-
-/************************************************************************
- *
- *  returns true if 'imm' of 'size' bits (16/32) can be encoded
- *  using the ARM64 'byteShifted immediate' form.
- *  When a non-null value is passed for 'wbBSI' then this method
- *  writes back the 'immBY' and 'immVal' values use to encode this immediate
- *
- */
-
-/*static*/ bool emitter::canEncodeByteShiftedImm(INT64                    imm,
-                                                 emitAttr                 size,
-                                                 bool                     allow_MSL,
-                                                 emitter::byteShiftedImm* wbBSI)
-{
-    bool     canEncode = false;
-    bool     onesShift = false; // true if we use the shifting ones variant
-    unsigned bySh      = 0;     // number of bytes to shift: 0, 1, 2, 3
-    unsigned imm8      = 0;     // immediate to use in the encoding
-
-    imm = normalizeImm64(imm, size);
-
-    if (size == EA_1BYTE)
-    {
-        imm8 = (unsigned)imm;
-        assert(imm8 < 0x100);
-        canEncode = true;
-    }
-    else if (size == EA_8BYTE)
-    {
-        imm8 = (unsigned)imm;
-        assert(imm8 < 0x100);
-        canEncode = true;
-    }
-    else
-    {
-        assert((size == EA_2BYTE) || (size == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms
-
-        unsigned immWidth = (size == EA_4BYTE) ? 32 : 16;
-        unsigned maxBY    = (size == EA_4BYTE) ? 4 : 2;
-
-        // setup immMask to a (EA_2BYTE) 0x0000FFFF or (EA_4BYTE) 0xFFFFFFFF
-        const UINT32 immMask = ((UINT32)-1) >> (32 - immWidth);
-        const INT32  mask8   = (INT32)0xFF;
-
-        // Try each of the valid by shift sizes
-        for (bySh = 0; (bySh < maxBY); bySh++)
-        {
-            INT32 curMask   = mask8 << (bySh * 8); // Represents the mask of the bits in the current byteShifted
-            INT32 checkBits = immMask & ~curMask;
-            INT32 immCheck  = (imm & checkBits);
-
-            // Excluding the current byte (using ~curMask)
-            //  does the immediate have zero bits in every other bit that we care about?
-            //  or can be use the shifted one variant?
-            //  note we care about all 32-bits for EA_4BYTE
-            //  and we care about the lowest 16 bits for EA_2BYTE
-            //
-            if (immCheck == 0)
-            {
-                canEncode = true;
-            }
-
-            // MSL is only supported for 32-bit.
-            if (allow_MSL && (size == EA_4BYTE))
-            {
-                if ((bySh == 1) && (immCheck == 0xFF))
-                {
-                    canEncode = true;
-                    onesShift = true;
-                }
-                else if ((bySh == 2) && (immCheck == 0xFFFF))
-                {
-                    canEncode = true;
-                    onesShift = true;
-                }
-            }
-            if (canEncode)
-            {
-                imm8 = (unsigned)(((imm & curMask) >> (bySh * 8)) & mask8);
-                break;
-            }
-        }
-    }
-
-    if (canEncode)
-    {
-        // Does the caller want us to return the imm(i8,bySh) encoding values?
-        //
-        if (wbBSI != nullptr)
-        {
-            wbBSI->immOnes = onesShift;
-            wbBSI->immBY   = bySh;
-            wbBSI->immVal  = imm8;
-
-            // Verify that what we are returning is correct.
-            assert(imm == emitDecodeByteShiftedImm(*wbBSI, size));
-        }
-        // Tell the caller that we can successfully encode this immediate
-        // using a 'byteShifted immediate'.
-        //
-        return true;
-    }
-    return false;
-}
-
-/************************************************************************
- *
- *  Convert a 32-bit immediate into its 'byteShifted immediate' representation imm(i8,by)
- */
-
-/*static*/ emitter::byteShiftedImm emitter::emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL)
-{
-    emitter::byteShiftedImm result;
-    result.immBSVal = 0;
-
-    bool canEncode = canEncodeByteShiftedImm(imm, size, allow_MSL, &result);
-    assert(canEncode);
-
-    return result;
-}
-
-/************************************************************************
- *
- *  Convert a 'float 8-bit immediate' into a double.
- *  inputs 'fpImm' a floatImm8 struct
- */
-
-/*static*/ double emitter::emitDecodeFloatImm8(const emitter::floatImm8 fpImm)
-{
-    unsigned sign  = fpImm.immSign;
-    unsigned exp   = fpImm.immExp ^ 0x4;
-    unsigned mant  = fpImm.immMant + 16;
-    unsigned scale = 16 * 8;
-
-    while (exp > 0)
-    {
-        scale /= 2;
-        exp--;
-    }
-
-    double result = ((double)mant) / ((double)scale);
-    if (sign == 1)
-    {
-        result = -result;
-    }
-
-    return result;
-}
-
-/************************************************************************
- *
- *  returns true if the 'immDbl' can be encoded using the 'float 8-bit immediate' form.
- *  also returns the encoding if wbFPI is non-null
- *
- */
-
-/*static*/ bool emitter::canEncodeFloatImm8(double immDbl, emitter::floatImm8* wbFPI)
-{
-    bool   canEncode = false;
-    double val       = immDbl;
-
-    int sign = 0;
-    if (val < 0.0)
-    {
-        val  = -val;
-        sign = 1;
-    }
-
-    int exp = 0;
-    while ((val < 1.0) && (exp >= -4))
-    {
-        val *= 2.0;
-        exp--;
-    }
-    while ((val >= 2.0) && (exp <= 5))
-    {
-        val *= 0.5;
-        exp++;
-    }
-    exp += 3;
-    val *= 16.0;
-    int ival = (int)val;
-
-    if ((exp >= 0) && (exp <= 7))
-    {
-        if (val == (double)ival)
-        {
-            canEncode = true;
-
-            if (wbFPI != nullptr)
-            {
-                ival -= 16;
-                assert((ival >= 0) && (ival <= 15));
-
-                wbFPI->immSign = sign;
-                wbFPI->immExp  = exp ^ 0x4;
-                wbFPI->immMant = ival;
-                unsigned imm8  = wbFPI->immFPIVal;
-                assert((imm8 >= 0) && (imm8 <= 0xff));
-            }
-        }
-    }
-
-    return canEncode;
-}
-
-/************************************************************************
- *
- *  Convert a double into its 'float 8-bit immediate' representation
- */
-
-/*static*/ emitter::floatImm8 emitter::emitEncodeFloatImm8(double immDbl)
-{
-    emitter::floatImm8 result;
-    result.immFPIVal = 0;
-
-    bool canEncode = canEncodeFloatImm8(immDbl, &result);
-    assert(canEncode);
-
-    return result;
-}
-
-/************************************************************************
- *
- *  Convert a rotation value that is 90 or 270 into a smaller encoding that matches one-to-one with the 'rot' field.
- */
-
-/*static*/ ssize_t emitter::emitEncodeRotationImm90_or_270(ssize_t imm)
-{
-    switch (imm)
-    {
-        case 90:
-            return 0;
-
-        case 270:
-            return 1;
-
-        default:
-            break;
-    }
-
-    assert(!"Invalid rotation value");
-    return 0;
-}
-
-/************************************************************************
- *
- *  Convert an encoded rotation value to 90 or 270.
- */
-
-/*static*/ ssize_t emitter::emitDecodeRotationImm90_or_270(ssize_t imm)
-{
-    assert(emitIsValidEncodedRotationImm0_to_270(imm));
-    switch (imm)
-    {
-        case 0:
-            return 90;
-
-        case 1:
-            return 270;
-
-        default:
-            break;
-    }
-
-    return 0;
-}
-
-/************************************************************************
- *
- *  Check if the immediate value is a valid encoded rotation value for 90 or 270.
- */
-
-/*static*/ bool emitter::emitIsValidEncodedRotationImm90_or_270(ssize_t imm)
-{
-    return (imm == 0) || (imm == 1);
-}
-
-/************************************************************************
- *
- *  Convert a rotation value that is 0, 90, 180 or 270 into a smaller encoding that matches one-to-one with the 'rot'
- * field.
- */
-
-/*static*/ ssize_t emitter::emitEncodeRotationImm0_to_270(ssize_t imm)
-{
-    switch (imm)
-    {
-        case 0:
-            return 0;
-
-        case 90:
-            return 1;
-
-        case 180:
-            return 2;
-
-        case 270:
-            return 3;
-
-        default:
-            break;
-    }
-
-    assert(!"Invalid rotation value");
-    return 0;
-}
-
-/************************************************************************
- *
- *  Convert an encoded rotation value to 0, 90, 180 or 270.
- */
-
-/*static*/ ssize_t emitter::emitDecodeRotationImm0_to_270(ssize_t imm)
-{
-    assert(emitIsValidEncodedRotationImm0_to_270(imm));
-    switch (imm)
-    {
-        case 0:
-            return 0;
-
-        case 1:
-            return 90;
-
-        case 2:
-            return 180;
-
-        case 3:
-            return 270;
-
-        default:
-            break;
-    }
-
-    return 0;
-}
-
-/************************************************************************
- *
- *  Check if the immediate value is a valid encoded rotation value for 0, 90, 180 or 270.
- */
-
-/*static*/ bool emitter::emitIsValidEncodedRotationImm0_to_270(ssize_t imm)
-{
-    return (imm >= 0) && (imm <= 3);
-}
-
-/************************************************************************
- *
- *  Convert a small immediate float value to an encoded version that matches one-to-one with the instructions.
- *  The instruction determines the value.
- */
-
-/*static*/ ssize_t emitter::emitEncodeSmallFloatImm(double immDbl, instruction ins)
-{
-#ifdef DEBUG
-    switch (ins)
-    {
-        case INS_sve_fadd:
-        case INS_sve_fsub:
-        case INS_sve_fsubr:
-            assert((immDbl == 0.5) || (immDbl == 1.0));
-            break;
-
-        case INS_sve_fmax:
-        case INS_sve_fmaxnm:
-        case INS_sve_fmin:
-        case INS_sve_fminnm:
-            assert((immDbl == 0) || (immDbl == 1.0));
-            break;
-
-        case INS_sve_fmul:
-            assert((immDbl == 0.5) || (immDbl == 2.0));
-            break;
-
-        default:
-            assert(!"Invalid instruction");
-            break;
-    }
-#endif // DEBUG
-    if (immDbl < 1.0)
-    {
-        return 0;
-    }
-    return 1;
-}
-
-/************************************************************************
- *
- *  Convert an encoded small float immediate value. The instruction determines the value.
- */
-
-/*static*/ double emitter::emitDecodeSmallFloatImm(ssize_t imm, instruction ins)
-{
-    assert(emitIsValidEncodedSmallFloatImm(imm));
-    switch (ins)
-    {
-        case INS_sve_fadd:
-        case INS_sve_fsub:
-        case INS_sve_fsubr:
-            if (imm == 0)
-            {
-                return 0.5;
-            }
-            else
-            {
-                return 1.0;
-            }
-
-        case INS_sve_fmax:
-        case INS_sve_fmaxnm:
-        case INS_sve_fmin:
-        case INS_sve_fminnm:
-            if (imm == 0)
-            {
-                return 0.0;
-            }
-            else
-            {
-                return 1.0;
-            }
-            break;
-
-        case INS_sve_fmul:
-            if (imm == 0)
-            {
-                return 0.5;
-            }
-            else
-            {
-                return 2.0;
-            }
-            break;
-
-        default:
-            break;
-    }
-
-    assert(!"Invalid instruction");
-    return 0.0;
-}
-
-/************************************************************************
- *
- *  Check if the immediate value is a valid encoded small float.
- */
-
-/*static*/ bool emitter::emitIsValidEncodedSmallFloatImm(size_t imm)
-{
-    return (imm == 0) || (imm == 1);
-}
-
-/*****************************************************************************
- *
- *  For the given 'ins' returns the reverse instruction
- *  if one exists, otherwise returns INS_INVALID
- */
-
-/*static*/ instruction emitter::insReverse(instruction ins)
-{
-    switch (ins)
-    {
-        case INS_add:
-            return INS_sub;
-        case INS_adds:
-            return INS_subs;
-
-        case INS_sub:
-            return INS_add;
-        case INS_subs:
-            return INS_adds;
-
-        case INS_cmp:
-            return INS_cmn;
-        case INS_cmn:
-            return INS_cmp;
-
-        case INS_ccmp:
-            return INS_ccmn;
-        case INS_ccmn:
-            return INS_ccmp;
-
-        default:
-            return INS_invalid;
-    }
-}
-
-/*****************************************************************************
- *
- *  For the given 'datasize' and 'elemsize', make the proper arrangement option
- *  returns the insOpts that specifies the vector register arrangement
- *  if one does not exist returns INS_OPTS_NONE
- */
-
-/*static*/ insOpts emitter::optMakeArrangement(emitAttr datasize, emitAttr elemsize)
-{
-    insOpts result = INS_OPTS_NONE;
-
-    if (datasize == EA_8BYTE)
-    {
-        switch (elemsize)
-        {
-            case EA_1BYTE:
-                result = INS_OPTS_8B;
-                break;
-            case EA_2BYTE:
-                result = INS_OPTS_4H;
-                break;
-            case EA_4BYTE:
-                result = INS_OPTS_2S;
-                break;
-            case EA_8BYTE:
-                result = INS_OPTS_1D;
-                break;
-            default:
-                unreached();
-                break;
-        }
-    }
-    else if (datasize == EA_16BYTE)
-    {
-        switch (elemsize)
-        {
-            case EA_1BYTE:
-                result = INS_OPTS_16B;
-                break;
-            case EA_2BYTE:
-                result = INS_OPTS_8H;
-                break;
-            case EA_4BYTE:
-                result = INS_OPTS_4S;
-                break;
-            case EA_8BYTE:
-                result = INS_OPTS_2D;
-                break;
-            default:
-                unreached();
-                break;
-        }
-    }
-    return result;
-}
-
-/*****************************************************************************
- *
- *  For the given 'datasize' and arrangement 'opts'
- *  returns true is the pair specifies a valid arrangement
- */
-/*static*/ bool emitter::isValidArrangement(emitAttr datasize, insOpts opt)
-{
-    if (datasize == EA_8BYTE)
-    {
-        if ((opt == INS_OPTS_8B) || (opt == INS_OPTS_4H) || (opt == INS_OPTS_2S) || (opt == INS_OPTS_1D))
-        {
-            return true;
-        }
-    }
-    else if (datasize == EA_16BYTE)
-    {
-        if ((opt == INS_OPTS_16B) || (opt == INS_OPTS_8H) || (opt == INS_OPTS_4S) || (opt == INS_OPTS_2D))
-        {
-            return true;
-        }
-    }
-    return false;
-}
-
-//------------------------------------------------------------------------
-// insGetRegisterListSize: Returns a size of the register list a given instruction operates on.
-//
-// Arguments:
-//   ins - An instruction which uses a register list
-//         (e.g. ld1 (2 registers), ld1r, st1, tbl, tbx).
-//
-// Return value:
-//   A number of consecutive SIMD and floating-point registers the instruction loads to/store from.
-//
-/*static*/ unsigned emitter::insGetRegisterListSize(instruction ins)
-{
-    unsigned registerListSize = 0;
-
-    switch (ins)
-    {
-        case INS_ld1:
-        case INS_ld1r:
-        case INS_st1:
-        case INS_tbl:
-        case INS_tbx:
-            registerListSize = 1;
-            break;
-
-        case INS_ld1_2regs:
-        case INS_ld2:
-        case INS_ld2r:
-        case INS_st1_2regs:
-        case INS_st2:
-        case INS_tbl_2regs:
-        case INS_tbx_2regs:
-            registerListSize = 2;
-            break;
-
-        case INS_ld1_3regs:
-        case INS_ld3:
-        case INS_ld3r:
-        case INS_st1_3regs:
-        case INS_st3:
-        case INS_tbl_3regs:
-        case INS_tbx_3regs:
-            registerListSize = 3;
-            break;
-
-        case INS_ld1_4regs:
-        case INS_ld4:
-        case INS_ld4r:
-        case INS_st1_4regs:
-        case INS_st4:
-        case INS_tbl_4regs:
-        case INS_tbx_4regs:
-            registerListSize = 4;
-            break;
-
-        default:
-            assert(!"Unexpected instruction");
-            break;
-    }
-
-    return registerListSize;
-}
-
-//  For the given 'arrangement' returns the 'datasize' specified by the vector register arrangement
-//  asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
-//
-/*static*/ emitAttr emitter::optGetDatasize(insOpts arrangement)
-{
-    if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_2S) ||
-        (arrangement == INS_OPTS_1D))
-    {
-        return EA_8BYTE;
-    }
-    else if ((arrangement == INS_OPTS_16B) || (arrangement == INS_OPTS_8H) || (arrangement == INS_OPTS_4S) ||
-             (arrangement == INS_OPTS_2D))
-    {
-        return EA_16BYTE;
-    }
-    else
-    {
-        assert(!" invalid 'arrangement' value");
-        return EA_UNKNOWN;
-    }
-}
-
-//  For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement
-//  asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
-//
-/*static*/ emitAttr emitter::optGetElemsize(insOpts arrangement)
-{
-    if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_16B))
-    {
-        return EA_1BYTE;
-    }
-    else if ((arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_8H))
-    {
-        return EA_2BYTE;
-    }
-    else if ((arrangement == INS_OPTS_2S) || (arrangement == INS_OPTS_4S))
-    {
-        return EA_4BYTE;
-    }
-    else if ((arrangement == INS_OPTS_1D) || (arrangement == INS_OPTS_2D))
-    {
-        return EA_8BYTE;
-    }
-    else
-    {
-        assert(!" invalid 'arrangement' value");
-        return EA_UNKNOWN;
-    }
-}
-
-//  For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement
-//  asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
-//
-/*static*/ emitAttr emitter::optGetSveElemsize(insOpts arrangement)
-{
-    switch (arrangement)
-    {
-        case INS_OPTS_SCALABLE_B:
-            return EA_1BYTE;
-
-        case INS_OPTS_SCALABLE_H:
-            return EA_2BYTE;
-
-        case INS_OPTS_SCALABLE_S:
-        case INS_OPTS_SCALABLE_S_UXTW:
-        case INS_OPTS_SCALABLE_S_SXTW:
-            return EA_4BYTE;
-
-        case INS_OPTS_SCALABLE_D:
-        case INS_OPTS_SCALABLE_D_UXTW:
-        case INS_OPTS_SCALABLE_D_SXTW:
-            return EA_8BYTE;
-
-        case INS_OPTS_SCALABLE_Q:
-            return EA_16BYTE;
-
-        default:
-            assert(!"Invalid insOpt for vector register");
-            return EA_UNKNOWN;
-    }
-}
-
-/*static*/ insOpts emitter::optWidenElemsizeArrangement(insOpts arrangement)
-{
-    if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_16B))
-    {
-        return INS_OPTS_8H;
-    }
-    else if ((arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_8H))
-    {
-        return INS_OPTS_4S;
-    }
-    else if ((arrangement == INS_OPTS_2S) || (arrangement == INS_OPTS_4S))
-    {
-        return INS_OPTS_2D;
-    }
-    else
-    {
-        assert(!" invalid 'arrangement' value");
-        return INS_OPTS_NONE;
-    }
-}
-
-/*static*/ insOpts emitter::optWidenSveElemsizeArrangement(insOpts arrangement)
-{
-    switch (arrangement)
-    {
-        case INS_OPTS_SCALABLE_B:
-            return INS_OPTS_SCALABLE_H;
-
-        case INS_OPTS_SCALABLE_H:
-            return INS_OPTS_SCALABLE_S;
-
-        case INS_OPTS_SCALABLE_S:
-            return INS_OPTS_SCALABLE_D;
-
-        default:
-            assert(!" invalid 'arrangement' value");
-            return INS_OPTS_NONE;
-    }
-}
-
-/*static*/ insOpts emitter::optSveToQuadwordElemsizeArrangement(insOpts arrangement)
-{
-    switch (arrangement)
-    {
-        case INS_OPTS_SCALABLE_B:
-            return INS_OPTS_16B;
-
-        case INS_OPTS_SCALABLE_H:
-            return INS_OPTS_8H;
-
-        case INS_OPTS_SCALABLE_S:
-            return INS_OPTS_4S;
-
-        case INS_OPTS_SCALABLE_D:
-            return INS_OPTS_2D;
-
-        default:
-            assert(!" invalid 'arrangement' value");
-            return INS_OPTS_NONE;
-    }
-}
-
-/*static*/ emitAttr emitter::widenDatasize(emitAttr datasize)
-{
-    if (datasize == EA_1BYTE)
-    {
-        return EA_2BYTE;
-    }
-    else if (datasize == EA_2BYTE)
-    {
-        return EA_4BYTE;
-    }
-    else if (datasize == EA_4BYTE)
-    {
-        return EA_8BYTE;
-    }
-    else
-    {
-        assert(!" invalid 'datasize' value");
-        return EA_UNKNOWN;
-    }
-}
-
-//  For the given 'srcArrangement' returns the "widen" 'dstArrangement' specifying the destination vector register
-//  arrangement
-//  asserts and returns INS_OPTS_NONE if an invalid 'srcArrangement' value is passed
-//
-/*static*/ insOpts emitter::optWidenDstArrangement(insOpts srcArrangement)
-{
-    insOpts dstArrangement = INS_OPTS_NONE;
-
-    switch (srcArrangement)
-    {
-        case INS_OPTS_8B:
-            dstArrangement = INS_OPTS_4H;
-            break;
-
-        case INS_OPTS_16B:
-            dstArrangement = INS_OPTS_8H;
-            break;
-
-        case INS_OPTS_4H:
-            dstArrangement = INS_OPTS_2S;
-            break;
-
-        case INS_OPTS_8H:
-            dstArrangement = INS_OPTS_4S;
-            break;
-
-        case INS_OPTS_2S:
-            dstArrangement = INS_OPTS_1D;
-            break;
-
-        case INS_OPTS_4S:
-            dstArrangement = INS_OPTS_2D;
-            break;
-
-        default:
-            assert(!" invalid 'srcArrangement' value");
-            break;
-    }
-
-    return dstArrangement;
-}
-
-//  For the given 'conversion' returns the 'dstsize' specified by the conversion option
-/*static*/ emitAttr emitter::optGetDstsize(insOpts conversion)
-{
-    switch (conversion)
-    {
-        case INS_OPTS_S_TO_8BYTE:
-        case INS_OPTS_D_TO_8BYTE:
-        case INS_OPTS_4BYTE_TO_D:
-        case INS_OPTS_8BYTE_TO_D:
-        case INS_OPTS_S_TO_D:
-        case INS_OPTS_H_TO_D:
-
-            return EA_8BYTE;
-
-        case INS_OPTS_S_TO_4BYTE:
-        case INS_OPTS_D_TO_4BYTE:
-        case INS_OPTS_4BYTE_TO_S:
-        case INS_OPTS_8BYTE_TO_S:
-        case INS_OPTS_D_TO_S:
-        case INS_OPTS_H_TO_S:
-
-            return EA_4BYTE;
-
-        case INS_OPTS_S_TO_H:
-        case INS_OPTS_D_TO_H:
-
-            return EA_2BYTE;
-
-        default:
-            assert(!" invalid 'conversion' value");
-            return EA_UNKNOWN;
-    }
-}
-
-//  For the given 'conversion' returns the 'srcsize' specified by the conversion option
-/*static*/ emitAttr emitter::optGetSrcsize(insOpts conversion)
-{
-    switch (conversion)
-    {
-        case INS_OPTS_D_TO_8BYTE:
-        case INS_OPTS_D_TO_4BYTE:
-        case INS_OPTS_8BYTE_TO_D:
-        case INS_OPTS_8BYTE_TO_S:
-        case INS_OPTS_D_TO_S:
-        case INS_OPTS_D_TO_H:
-
-            return EA_8BYTE;
-
-        case INS_OPTS_S_TO_8BYTE:
-        case INS_OPTS_S_TO_4BYTE:
-        case INS_OPTS_4BYTE_TO_S:
-        case INS_OPTS_4BYTE_TO_D:
-        case INS_OPTS_S_TO_D:
-        case INS_OPTS_S_TO_H:
-
-            return EA_4BYTE;
-
-        case INS_OPTS_H_TO_S:
-        case INS_OPTS_H_TO_D:
-
-            return EA_2BYTE;
-
-        default:
-            assert(!" invalid 'conversion' value");
-            return EA_UNKNOWN;
-    }
-}
-
-//    For the given 'size' and 'index' returns true if it specifies a valid index for a vector register of 'size'
-/*static*/ bool emitter::isValidVectorIndex(emitAttr datasize, emitAttr elemsize, ssize_t index)
-{
-    assert(isValidVectorDatasize(datasize));
-    assert(isValidVectorElemsize(elemsize));
-
-    bool result = false;
-    if (index >= 0)
-    {
-        if (datasize == EA_8BYTE)
-        {
-            switch (elemsize)
-            {
-                case EA_1BYTE:
-                    result = (index < 8);
-                    break;
-                case EA_2BYTE:
-                    result = (index < 4);
-                    break;
-                case EA_4BYTE:
-                    result = (index < 2);
-                    break;
-                case EA_8BYTE:
-                    result = (index < 1);
-                    break;
-                default:
-                    unreached();
-                    break;
-            }
-        }
-        else if (datasize == EA_16BYTE)
-        {
-            switch (elemsize)
-            {
-                case EA_1BYTE:
-                    result = (index < 16);
-                    break;
-                case EA_2BYTE:
-                    result = (index < 8);
-                    break;
-                case EA_4BYTE:
-                    result = (index < 4);
-                    break;
-                case EA_8BYTE:
-                    result = (index < 2);
-                    break;
-                default:
-                    unreached();
-                    break;
-            }
-        }
-    }
-    return result;
-}
-
-/*****************************************************************************
- *
- *  Add an instruction with no operands.
- */
-
-void emitter::emitIns(instruction ins)
-{
-    instrDesc* id  = emitNewInstrSmall(EA_8BYTE);
-    insFormat  fmt = emitInsFormat(ins);
-
-    if (ins != INS_BREAKPOINT)
-    {
-        assert(fmt == IF_SN_0A);
-    }
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction with a single immediate value.
- */
-
-void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm)
-{
-    insFormat fmt = IF_NONE;
-
-    /* Figure out the encoding format of the instruction */
-    if (ins == INS_BREAKPOINT)
-    {
-        if ((imm & 0x0000ffff) == imm)
-        {
-            fmt = IF_SI_0A;
-        }
-        else
-        {
-            assert(!"Instruction cannot be encoded: IF_SI_0A");
-        }
-    }
-    else if (ins == INS_sve_setffr)
-    {
-        fmt  = IF_SVE_DQ_0A;
-        attr = EA_PTRSIZE;
-        imm  = 0;
-    }
-    else
-    {
-        unreached();
-    }
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstrSC(attr, imm);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing a single register.
- */
-
-void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts opt /* = INS_OPTS_NONE */)
-{
-    insFormat  fmt = IF_NONE;
-    instrDesc* id  = emitNewInstrSmall(attr);
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_br:
-        case INS_ret:
-            assert(isGeneralRegister(reg));
-            fmt = IF_BR_1A;
-            break;
-
-        case INS_dczva:
-            assert(isGeneralRegister(reg));
-            assert(attr == EA_8BYTE);
-            fmt = IF_SR_1A;
-            break;
-
-        case INS_mrs_tpid0:
-            fmt = IF_SR_1A;
-            break;
-
-        case INS_sve_aesmc:
-        case INS_sve_aesimc:
-            id->idInsOpt(INS_OPTS_SCALABLE_B);
-            assert(isVectorRegister(reg)); // ddddd
-            assert(isScalableVectorSize(attr));
-            fmt = IF_SVE_GL_1A;
-            break;
-
-        case INS_sve_rdffr:
-            id->idInsOpt(INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(reg)); // DDDD
-            fmt = IF_SVE_DH_1A;
-            break;
-
-        case INS_sve_pfalse:
-            id->idInsOpt(INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(reg)); // DDDD
-            fmt = IF_SVE_DJ_1A;
-            break;
-
-        case INS_sve_wrffr:
-            id->idInsOpt(INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(reg)); // NNNN
-            fmt = IF_SVE_DR_1A;
-            break;
-
-        case INS_sve_ptrue:
-            assert(insOptsScalableStandard(opt));
-            assert(isHighPredicateRegister(reg));                  // DDD
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-            id->idInsOpt(opt);
-            fmt = IF_SVE_DZ_1A;
-            break;
-
-        case INS_sve_fmov:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg));                         // ddddd
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-            id->idReg1(reg);
-            id->idInsOpt(opt);
-            fmt = IF_SVE_EB_1B;
-
-            // FMOV is a pseudo-instruction for DUP, which is aliased by MOV;
-            // MOV is the preferred disassembly
-            ins = INS_sve_mov;
-            break;
-
-        default:
-            unreached();
-    }
-
-    assert(fmt != IF_NONE);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idReg1(reg);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing a register and a constant.
- */
-
-void emitter::emitIns_R_I(instruction     ins,
-                          emitAttr        attr,
-                          regNumber       reg,
-                          ssize_t         imm,
-                          insOpts         opt /* = INS_OPTS_NONE */,
-                          insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */
-                          DEBUGARG(size_t targetHandle /* = 0 */) DEBUGARG(GenTreeFlags gtFlags /* = GTF_EMPTY */))
-{
-    emitAttr  size          = EA_SIZE(attr);
-    emitAttr  elemsize      = EA_UNKNOWN;
-    insFormat fmt           = IF_NONE;
-    bool      canEncode     = false;
-    bool      signedImm     = false;
-    bool      optionalShift = false;
-    bool      hasShift      = true;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        bitMaskImm     bmi;
-        halfwordImm    hwi;
-        byteShiftedImm bsi;
-        ssize_t        notOfImm;
-
-        case INS_tst:
-            assert(insOptsNone(opt));
-            assert(isGeneralRegister(reg));
-            bmi.immNRS = 0;
-            canEncode  = canEncodeBitMaskImm(imm, size, &bmi);
-            if (canEncode)
-            {
-                imm = bmi.immNRS;
-                assert(isValidImmNRS(imm, size));
-                fmt = IF_DI_1C;
-            }
-            break;
-
-        case INS_movk:
-        case INS_movn:
-        case INS_movz:
-            assert(isValidGeneralDatasize(size));
-            assert(insOptsNone(opt)); // No LSL here (you must use emitIns_R_I_I if a shift is needed)
-            assert(isGeneralRegister(reg));
-            assert(isValidUimm16(imm));
-
-            hwi.immHW  = 0;
-            hwi.immVal = imm;
-            assert(imm == emitDecodeHalfwordImm(hwi, size));
-
-            imm       = hwi.immHWVal;
-            canEncode = true;
-            fmt       = IF_DI_1B;
-            break;
-
-        case INS_mov:
-            assert(isValidGeneralDatasize(size));
-            assert(insOptsNone(opt)); // No explicit LSL here
-            // We will automatically determine the shift based upon the imm
-
-            // First try the standard 'halfword immediate' imm(i16,hw)
-            hwi.immHWVal = 0;
-            canEncode    = canEncodeHalfwordImm(imm, size, &hwi);
-            if (canEncode)
-            {
-                // uses a movz encoding
-                assert(isGeneralRegister(reg));
-                imm = hwi.immHWVal;
-                assert(isValidImmHWVal(imm, size));
-                fmt = IF_DI_1B;
-                break;
-            }
-
-            // Next try the ones-complement form of 'halfword immediate' imm(i16,hw)
-            notOfImm  = NOT_helper(imm, getBitWidth(size));
-            canEncode = canEncodeHalfwordImm(notOfImm, size, &hwi);
-            if (canEncode)
-            {
-                assert(isGeneralRegister(reg));
-                imm = hwi.immHWVal;
-                ins = INS_movn; // uses a movn encoding
-                assert(isValidImmHWVal(imm, size));
-                fmt = IF_DI_1B;
-                break;
-            }
-
-            // Finally try the 'bitmask immediate' imm(N,r,s)
-            bmi.immNRS = 0;
-            canEncode  = canEncodeBitMaskImm(imm, size, &bmi);
-            if (canEncode)
-            {
-                assert(isGeneralRegisterOrSP(reg));
-                reg = encodingSPtoZR(reg);
-                imm = bmi.immNRS;
-                assert(isValidImmNRS(imm, size));
-                fmt = IF_DI_1D;
-                break;
-            }
-            else
-            {
-                assert(!"Instruction cannot be encoded: mov imm");
-            }
-
-            break;
-
-        case INS_movi:
-            assert(isValidVectorDatasize(size));
-            assert(isVectorRegister(reg));
-            if (insOptsNone(opt) && (size == EA_8BYTE))
-            {
-                opt = INS_OPTS_1D;
-            }
-            assert(isValidArrangement(size, opt));
-            elemsize = optGetElemsize(opt);
-
-            if (elemsize == EA_8BYTE)
-            {
-                size_t   uimm = imm;
-                ssize_t  imm8 = 0;
-                unsigned pos  = 0;
-                canEncode     = true;
-                while (uimm != 0)
-                {
-                    INT64 loByte = uimm & 0xFF;
-                    if (((loByte == 0) || (loByte == 0xFF)) && (pos < 8))
-                    {
-                        if (loByte == 0xFF)
-                        {
-                            imm8 |= (ssize_t{1} << pos);
-                        }
-                        uimm >>= 8;
-                        pos++;
-                    }
-                    else
-                    {
-                        canEncode = false;
-                        break;
-                    }
-                }
-                imm = imm8;
-                assert(isValidUimm8(imm));
-                fmt = IF_DV_1B;
-                break;
-            }
-            else
-            {
-                // Vector operation
-
-                // No explicit LSL/MSL is used for the immediate
-                // We will automatically determine the shift based upon the value of imm
-
-                // First try the standard 'byteShifted immediate' imm(i8,bySh)
-                bsi.immBSVal = 0;
-                canEncode    = canEncodeByteShiftedImm(imm, elemsize, true, &bsi);
-                if (canEncode)
-                {
-                    imm = bsi.immBSVal;
-                    assert(isValidImmBSVal(imm, size));
-                    fmt = IF_DV_1B;
-                    break;
-                }
-
-                // Next try the ones-complement form of the 'immediate' imm(i8,bySh)
-                if ((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)) // Only EA_2BYTE or EA_4BYTE forms
-                {
-                    notOfImm  = NOT_helper(imm, getBitWidth(elemsize));
-                    canEncode = canEncodeByteShiftedImm(notOfImm, elemsize, true, &bsi);
-                    if (canEncode)
-                    {
-                        imm = bsi.immBSVal;
-                        ins = INS_mvni; // uses a mvni encoding
-                        assert(isValidImmBSVal(imm, size));
-                        fmt = IF_DV_1B;
-                        break;
-                    }
-                }
-            }
-            break;
-
-        case INS_orr:
-        case INS_bic:
-        case INS_mvni:
-            assert(isValidVectorDatasize(size));
-            assert(isVectorRegister(reg));
-            assert(isValidArrangement(size, opt));
-            elemsize = optGetElemsize(opt);
-            assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms
-
-            // Vector operation
-
-            // No explicit LSL/MSL is used for the immediate
-            // We will automatically determine the shift based upon the value of imm
-
-            // First try the standard 'byteShifted immediate' imm(i8,bySh)
-            bsi.immBSVal = 0;
-            canEncode    = canEncodeByteShiftedImm(imm, elemsize,
-                                                (ins == INS_mvni), // mvni supports the ones shifting variant (aka MSL)
-                                                &bsi);
-            if (canEncode)
-            {
-                imm = bsi.immBSVal;
-                assert(isValidImmBSVal(imm, size));
-                fmt = IF_DV_1B;
-                break;
-            }
-            break;
-
-        case INS_cmp:
-        case INS_cmn:
-            assert(insOptsNone(opt));
-            assert(isGeneralRegister(reg));
-
-            if (unsigned_abs(imm) <= 0x0fff)
-            {
-                if (imm < 0)
-                {
-                    ins = insReverse(ins);
-                    imm = -imm;
-                }
-                assert(isValidUimm12(imm));
-                canEncode = true;
-                fmt       = IF_DI_1A;
-            }
-            else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding
-            {
-                // Encoding will use a 12-bit left shift of the immediate
-                opt = INS_OPTS_LSL12;
-                if (imm < 0)
-                {
-                    ins = insReverse(ins);
-                    imm = -imm;
-                }
-                assert((imm & 0xfff) == 0);
-                imm >>= 12;
-                assert(isValidUimm12(imm));
-                canEncode = true;
-                fmt       = IF_DI_1A;
-            }
-            else
-            {
-                assert(!"Instruction cannot be encoded: IF_DI_1A");
-            }
-            break;
-
-        case INS_sve_smax:
-        case INS_sve_smin:
-            signedImm = true;
-
-            FALLTHROUGH;
-        case INS_sve_umax:
-        case INS_sve_umin:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg));                         // ddddd
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-
-            if (signedImm)
-            {
-                assert(isValidSimm8(imm)); // iiiiiiii
-            }
-            else
-            {
-                assert(isValidUimm8(imm)); // iiiiiiii
-            }
-
-            fmt       = IF_SVE_ED_1A;
-            canEncode = true;
-            break;
-
-        case INS_sve_mul:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg));                         // ddddd
-            assert(isValidSimm8(imm));                             // iiiiiiii
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-            fmt       = IF_SVE_EE_1A;
-            canEncode = true;
-            break;
-
-        case INS_sve_mov:
-        case INS_sve_dup:
-            optionalShift = true;
-            hasShift      = (sopt == INS_SCALABLE_OPTS_SHIFT);
-
-            assert(insOptsScalableStandard(opt));
-            // Size specifier must be able to fit left-shifted immediate
-            assert(!hasShift || insOptsScalableAtLeastHalf(opt));
-            assert(insScalableOptsNone(sopt) || hasShift);         // h
-            assert(isVectorRegister(reg));                         // ddddd
-            assert(isValidSimm8(imm));                             // iiiiiiii
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-            fmt       = IF_SVE_EB_1A;
-            canEncode = true;
-
-            // MOV is an alias for DUP, and is always the preferred disassembly.
-            ins = INS_sve_mov;
-            break;
-
-        case INS_sve_add:
-        case INS_sve_sub:
-        case INS_sve_sqadd:
-        case INS_sve_sqsub:
-        case INS_sve_uqadd:
-        case INS_sve_uqsub:
-        case INS_sve_subr:
-            optionalShift = true;
-            hasShift      = (sopt == INS_SCALABLE_OPTS_SHIFT);
-
-            assert(insOptsScalableStandard(opt));
-            // Size specifier must be able to fit left-shifted immediate
-            assert(!hasShift || insOptsScalableAtLeastHalf(opt));
-            assert(insScalableOptsNone(sopt) || hasShift);         // h
-            assert(isVectorRegister(reg));                         // ddddd
-            assert(isValidUimm8(imm));                             // iiiiiiii
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-            fmt       = IF_SVE_EC_1A;
-            canEncode = true;
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-
-    assert(canEncode);
-    assert(fmt != IF_NONE);
-
-    instrDesc* id;
-
-    if (!optionalShift)
-    {
-        id = emitNewInstrSC(attr, imm);
-    }
-    else
-    {
-        // Instructions with optional shifts (MOV, DUP, etc.) need larger instrDesc to store state
-        id = emitNewInstrCns(attr, imm);
-        id->idOptionalShift(hasShift);
-    }
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(reg);
-
-#ifdef DEBUG
-    id->idDebugOnlyInfo()->idMemCookie = targetHandle;
-    id->idDebugOnlyInfo()->idFlags     = gtFlags;
-#endif
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing a register and a floating point constant.
- */
-
-void emitter::emitIns_R_F(
-    instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt /* = INS_OPTS_NONE */)
-
-{
-    emitAttr  size      = EA_SIZE(attr);
-    emitAttr  elemsize  = EA_UNKNOWN;
-    insFormat fmt       = IF_NONE;
-    ssize_t   imm       = 0;
-    bool      canEncode = false;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        floatImm8 fpi;
-
-        case INS_fcmp:
-        case INS_fcmpe:
-            assert(insOptsNone(opt));
-            assert(isValidVectorElemsizeFloat(size));
-            assert(isVectorRegister(reg));
-            if (immDbl == 0.0)
-            {
-                canEncode = true;
-                fmt       = IF_DV_1C;
-            }
-            break;
-
-        case INS_fmov:
-            assert(isVectorRegister(reg));
-            fpi.immFPIVal = 0;
-            canEncode     = canEncodeFloatImm8(immDbl, &fpi);
-
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                elemsize = optGetElemsize(opt);
-                assert(isValidVectorElemsizeFloat(elemsize));
-                assert(opt != INS_OPTS_1D); // Reserved encoding
-
-                if (canEncode)
-                {
-                    imm = fpi.immFPIVal;
-                    assert((imm >= 0) && (imm <= 0xff));
-                    fmt = IF_DV_1B;
-                }
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert(isValidVectorElemsizeFloat(size));
-
-                if (canEncode)
-                {
-                    imm = fpi.immFPIVal;
-                    assert((imm >= 0) && (imm <= 0xff));
-                    fmt = IF_DV_1A;
-                }
-            }
-            break;
-
-        case INS_sve_fmov:
-        case INS_sve_fdup:
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(isVectorRegister(reg));                         // ddddd
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-
-            fpi.immFPIVal = 0;
-            canEncode     = canEncodeFloatImm8(immDbl, &fpi);
-            imm           = fpi.immFPIVal;
-            fmt           = IF_SVE_EA_1A;
-
-            // FMOV is an alias for FDUP, and is always the preferred disassembly.
-            ins = INS_sve_fmov;
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-
-    assert(canEncode);
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstrSC(attr, imm);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(reg);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-//------------------------------------------------------------------------
-// emitIns_Mov: Emits a move instruction
-//
-// Arguments:
-//    ins       -- The instruction being emitted
-//    attr      -- The emit attribute
-//    dstReg    -- The destination register
-//    srcReg    -- The source register
-//    canSkip   -- true if the move can be elided when dstReg == srcReg, otherwise false
-//    insOpts   -- The instruction options
-//
-void emitter::emitIns_Mov(
-    instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */)
-{
-    assert(IsMovInstruction(ins));
-
-    emitAttr  size     = EA_SIZE(attr);
-    emitAttr  elemsize = EA_UNKNOWN;
-    insFormat fmt      = IF_NONE;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_mov:
-        {
-            assert(insOptsNone(opt));
-
-            if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip))
-            {
-                // These instructions have no side effect and can be skipped
-                return;
-            }
-
-            // Check for the 'mov' aliases for the vector registers
-            if (isVectorRegister(dstReg))
-            {
-                if (isVectorRegister(srcReg) && isValidVectorDatasize(size))
-                {
-                    return emitIns_R_R_R(INS_mov, size, dstReg, srcReg, srcReg);
-                }
-                else
-                {
-                    return emitIns_R_R_I(INS_mov, size, dstReg, srcReg, 0);
-                }
-            }
-            else
-            {
-                if (isVectorRegister(srcReg))
-                {
-                    assert(isGeneralRegister(dstReg));
-                    return emitIns_R_R_I(INS_mov, size, dstReg, srcReg, 0);
-                }
-            }
-
-            // Is this a MOV to/from SP instruction?
-            if ((dstReg == REG_SP) || (srcReg == REG_SP))
-            {
-                assert(isGeneralRegisterOrSP(dstReg));
-                assert(isGeneralRegisterOrSP(srcReg));
-                dstReg = encodingSPtoZR(dstReg);
-                srcReg = encodingSPtoZR(srcReg);
-                fmt    = IF_DR_2G;
-            }
-            else
-            {
-                assert(insOptsNone(opt));
-                assert(isGeneralRegister(dstReg));
-                assert(isGeneralRegisterOrZR(srcReg));
-                fmt = IF_DR_2E;
-            }
-            break;
-        }
-
-        case INS_sxtw:
-        {
-            assert((size == EA_8BYTE) || (size == EA_4BYTE));
-            FALLTHROUGH;
-        }
-
-        case INS_sxtb:
-        case INS_sxth:
-        case INS_uxtb:
-        case INS_uxth:
-        {
-            if (canSkip && (dstReg == srcReg))
-            {
-                // There are scenarios such as in genCallInstruction where the sign/zero extension should be elided
-                return;
-            }
-
-            assert(insOptsNone(opt));
-            assert(isValidGeneralDatasize(size));
-            assert(isGeneralRegister(dstReg));
-            assert(isGeneralRegister(srcReg));
-            fmt = IF_DR_2H;
-            break;
-        }
-
-        case INS_fmov:
-        {
-            assert(isValidVectorElemsizeFloat(size));
-
-            if (canSkip && (dstReg == srcReg))
-            {
-                // These instructions have no side effect and can be skipped
-                return;
-            }
-
-            if (isVectorRegister(dstReg))
-            {
-                if (isVectorRegister(srcReg))
-                {
-                    assert(insOptsNone(opt));
-                    fmt = IF_DV_2G;
-                }
-                else
-                {
-                    assert(isGeneralRegister(srcReg));
-
-                    // if the optional conversion specifier is not present we calculate it
-                    if (opt == INS_OPTS_NONE)
-                    {
-                        opt = (size == EA_4BYTE) ? INS_OPTS_4BYTE_TO_S : INS_OPTS_8BYTE_TO_D;
-                    }
-                    assert(insOptsConvertIntToFloat(opt));
-
-                    fmt = IF_DV_2I;
-                }
-            }
-            else
-            {
-                assert(isGeneralRegister(dstReg));
-                assert(isVectorRegister(srcReg));
-
-                // if the optional conversion specifier is not present we calculate it
-                if (opt == INS_OPTS_NONE)
-                {
-                    opt = (size == EA_4BYTE) ? INS_OPTS_S_TO_4BYTE : INS_OPTS_D_TO_8BYTE;
-                }
-                assert(insOptsConvertFloatToInt(opt));
-
-                fmt = IF_DV_2H;
-            }
-            break;
-        }
-
-        default:
-        {
-            unreached();
-        }
-    }
-
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstrSmall(attr);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(dstReg);
-    id->idReg2(srcReg);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing two registers
- */
-
-void emitter::emitIns_R_R(instruction     ins,
-                          emitAttr        attr,
-                          regNumber       reg1,
-                          regNumber       reg2,
-                          insOpts         opt /* = INS_OPTS_NONE */,
-                          insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
-{
-    if (IsMovInstruction(ins))
-    {
-        assert(!"Please use emitIns_Mov() to correctly handle move elision");
-        emitIns_Mov(ins, attr, reg1, reg2, /* canSkip */ false, opt);
-    }
-
-    emitAttr  size     = EA_SIZE(attr);
-    emitAttr  elemsize = EA_UNKNOWN;
-    insFormat fmt      = IF_NONE;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_dup:
-            // Vector operation
-            assert(insOptsAnyArrangement(opt));
-            assert(isVectorRegister(reg1));
-            assert(isGeneralRegisterOrZR(reg2));
-            assert(isValidVectorDatasize(size));
-            assert(isValidArrangement(size, opt));
-            assert(opt != INS_OPTS_1D); // Reserved encoding
-            fmt = IF_DV_2C;
-            break;
-
-        case INS_abs:
-        case INS_not:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            if (ins == INS_not)
-            {
-                assert(isValidVectorDatasize(size));
-                // Bitwise behavior is independent of element size, but is always encoded as 1 Byte
-                opt = optMakeArrangement(size, EA_1BYTE);
-            }
-            if (insOptsNone(opt))
-            {
-                // Scalar operation
-                assert(size == EA_8BYTE); // Only type D is supported
-                fmt = IF_DV_2L;
-            }
-            else
-            {
-                // Vector operation
-                assert(insOptsAnyArrangement(opt));
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                elemsize = optGetElemsize(opt);
-                fmt      = IF_DV_2M;
-            }
-            break;
-
-        case INS_mvn:
-        case INS_neg:
-            if (isVectorRegister(reg1))
-            {
-                assert(isVectorRegister(reg2));
-                if (ins == INS_mvn)
-                {
-                    assert(isValidVectorDatasize(size));
-                    // Bitwise behavior is independent of element size, but is always encoded as 1 Byte
-                    opt = optMakeArrangement(size, EA_1BYTE);
-                }
-                if (insOptsNone(opt))
-                {
-                    // Scalar operation
-                    assert(size == EA_8BYTE); // Only type D is supported
-                    fmt = IF_DV_2L;
-                }
-                else
-                {
-                    // Vector operation
-                    assert(isValidVectorDatasize(size));
-                    assert(isValidArrangement(size, opt));
-                    elemsize = optGetElemsize(opt);
-                    fmt      = IF_DV_2M;
-                }
-                break;
-            }
-            FALLTHROUGH;
-
-        case INS_negs:
-            assert(insOptsNone(opt));
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegisterOrZR(reg2));
-            fmt = IF_DR_2E;
-            break;
-
-        case INS_sxtl:
-        case INS_sxtl2:
-        case INS_uxtl:
-        case INS_uxtl2:
-            return emitIns_R_R_I(ins, size, reg1, reg2, 0, opt);
-
-        case INS_cls:
-        case INS_clz:
-        case INS_rbit:
-        case INS_rev16:
-        case INS_rev32:
-        case INS_cnt:
-            if (isVectorRegister(reg1))
-            {
-                assert(isVectorRegister(reg2));
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                elemsize = optGetElemsize(opt);
-                if ((ins == INS_cls) || (ins == INS_clz))
-                {
-                    assert(elemsize != EA_8BYTE); // No encoding for type D
-                }
-                else if (ins == INS_rev32)
-                {
-                    assert((elemsize == EA_2BYTE) || (elemsize == EA_1BYTE));
-                }
-                else
-                {
-                    assert(elemsize == EA_1BYTE); // Only supports 8B or 16B
-                }
-                fmt = IF_DV_2M;
-                break;
-            }
-            if (ins == INS_cnt)
-            {
-                // Doesn't have general register version(s)
-                break;
-            }
-
-            FALLTHROUGH;
-
-        case INS_rev:
-            assert(insOptsNone(opt));
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegister(reg2));
-            if (ins == INS_rev32)
-            {
-                assert(size == EA_8BYTE);
-            }
-            else
-            {
-                assert(isValidGeneralDatasize(size));
-            }
-            fmt = IF_DR_2G;
-            break;
-
-        case INS_addv:
-        case INS_saddlv:
-        case INS_smaxv:
-        case INS_sminv:
-        case INS_uaddlv:
-        case INS_umaxv:
-        case INS_uminv:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isValidVectorDatasize(size));
-            assert(isValidArrangement(size, opt));
-            assert((opt != INS_OPTS_2S) && (opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // Reserved encodings
-            fmt = IF_DV_2T;
-            break;
-
-        case INS_rev64:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isValidVectorDatasize(size));
-            assert(isValidArrangement(size, opt));
-            elemsize = optGetElemsize(opt);
-            assert(elemsize != EA_8BYTE); // No encoding for type D
-            fmt = IF_DV_2M;
-            break;
-
-        case INS_sqxtn:
-        case INS_sqxtun:
-        case INS_uqxtn:
-            if (insOptsNone(opt))
-            {
-                // Scalar operation
-                assert(isVectorRegister(reg1));
-                assert(isVectorRegister(reg2));
-                assert(isValidVectorElemsize(size));
-                assert(size != EA_8BYTE); // The encoding size = 11 is reserved.
-                fmt = IF_DV_2L;
-                break;
-            }
-            FALLTHROUGH;
-
-        case INS_xtn:
-            // Vector operation
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(size == EA_8BYTE);
-            assert(isValidArrangement(size, opt));
-            assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = x is reserved
-            fmt = IF_DV_2M;
-            break;
-
-        case INS_sqxtn2:
-        case INS_sqxtun2:
-        case INS_uqxtn2:
-        case INS_xtn2:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(size == EA_16BYTE);
-            assert(isValidArrangement(size, opt));
-            assert(opt != INS_OPTS_2D); // The encoding size = 11, Q = x is reserved
-            fmt = IF_DV_2M;
-            break;
-
-        case INS_ldar:
-        case INS_ldapr:
-        case INS_ldaxr:
-        case INS_ldxr:
-        case INS_stlr:
-            assert(isValidGeneralDatasize(size));
-
-            FALLTHROUGH;
-
-        case INS_ldarb:
-        case INS_ldaprb:
-        case INS_ldaxrb:
-        case INS_ldxrb:
-        case INS_ldarh:
-        case INS_ldaprh:
-        case INS_ldaxrh:
-        case INS_ldxrh:
-        case INS_stlrb:
-        case INS_stlrh:
-            assert(isValidGeneralLSDatasize(size));
-            assert(isGeneralRegisterOrZR(reg1));
-            assert(isGeneralRegisterOrSP(reg2));
-            assert(insOptsNone(opt));
-
-            reg2 = encodingSPtoZR(reg2);
-
-            fmt = IF_LS_2A;
-            break;
-
-        case INS_ldr:
-        case INS_ldrb:
-        case INS_ldrh:
-        case INS_ldrsb:
-        case INS_ldrsh:
-        case INS_ldrsw:
-        case INS_str:
-        case INS_strb:
-        case INS_strh:
-        case INS_cmn:
-        case INS_tst:
-            assert(insOptsNone(opt));
-            emitIns_R_R_I(ins, attr, reg1, reg2, 0, INS_OPTS_NONE);
-            return;
-
-        case INS_cmp:
-            emitIns_R_R_I(ins, attr, reg1, reg2, 0, opt);
-            return;
-
-        case INS_staddb:
-            emitIns_R_R_R(INS_ldaddb, attr, reg1, REG_ZR, reg2);
-            return;
-        case INS_staddlb:
-            emitIns_R_R_R(INS_ldaddlb, attr, reg1, REG_ZR, reg2);
-            return;
-        case INS_staddh:
-            emitIns_R_R_R(INS_ldaddh, attr, reg1, REG_ZR, reg2);
-            return;
-        case INS_staddlh:
-            emitIns_R_R_R(INS_ldaddlh, attr, reg1, REG_ZR, reg2);
-            return;
-        case INS_stadd:
-            emitIns_R_R_R(INS_ldadd, attr, reg1, REG_ZR, reg2);
-            return;
-        case INS_staddl:
-            emitIns_R_R_R(INS_ldaddl, attr, reg1, REG_ZR, reg2);
-            return;
-
-        case INS_fcmp:
-        case INS_fcmpe:
-            assert(insOptsNone(opt));
-            assert(isValidVectorElemsizeFloat(size));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            fmt = IF_DV_2K;
-            break;
-
-        case INS_fcvtns:
-        case INS_fcvtnu:
-        case INS_fcvtas:
-        case INS_fcvtau:
-        case INS_fcvtps:
-        case INS_fcvtpu:
-        case INS_fcvtms:
-        case INS_fcvtmu:
-        case INS_fcvtzs:
-        case INS_fcvtzu:
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isVectorRegister(reg1));
-                assert(isVectorRegister(reg2));
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                elemsize = optGetElemsize(opt);
-                assert(isValidVectorElemsizeFloat(elemsize));
-                assert(opt != INS_OPTS_1D); // Reserved encoding
-                fmt = IF_DV_2A;
-            }
-            else
-            {
-                // Scalar operation
-                assert(isVectorRegister(reg2));
-                if (isVectorRegister(reg1))
-                {
-                    assert(insOptsNone(opt));
-                    assert(isValidVectorElemsizeFloat(size));
-                    fmt = IF_DV_2G;
-                }
-                else
-                {
-                    assert(isGeneralRegister(reg1));
-                    assert(insOptsConvertFloatToInt(opt));
-                    assert(isValidVectorElemsizeFloat(size));
-                    fmt = IF_DV_2H;
-                }
-            }
-            break;
-
-        case INS_fcvtl:
-        case INS_fcvtn:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(size == EA_8BYTE);
-            assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
-            fmt = IF_DV_2A;
-            break;
-
-        case INS_fcvtl2:
-        case INS_fcvtn2:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(size == EA_16BYTE);
-            assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S));
-            fmt = IF_DV_2A;
-            break;
-
-        case INS_fcvtxn:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(size == EA_8BYTE);
-                assert(opt == INS_OPTS_2S);
-                fmt = IF_DV_2A;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert(size == EA_4BYTE);
-                fmt = IF_DV_2G;
-            }
-            break;
-
-        case INS_fcvtxn2:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(size == EA_16BYTE);
-            assert(opt == INS_OPTS_4S);
-            fmt = IF_DV_2A;
-            break;
-
-        case INS_scvtf:
-        case INS_ucvtf:
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isVectorRegister(reg1));
-                assert(isVectorRegister(reg2));
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                elemsize = optGetElemsize(opt);
-                assert(isValidVectorElemsizeFloat(elemsize));
-                assert(opt != INS_OPTS_1D); // Reserved encoding
-                fmt = IF_DV_2A;
-            }
-            else
-            {
-                // Scalar operation
-                assert(isVectorRegister(reg1));
-                if (isVectorRegister(reg2))
-                {
-                    assert(insOptsNone(opt));
-                    assert(isValidVectorElemsizeFloat(size));
-                    fmt = IF_DV_2G;
-                }
-                else
-                {
-                    assert(isGeneralRegister(reg2));
-                    assert(insOptsConvertIntToFloat(opt));
-                    assert(isValidVectorElemsizeFloat(size));
-                    fmt = IF_DV_2I;
-                }
-            }
-            break;
-
-        case INS_fabs:
-        case INS_fneg:
-        case INS_fsqrt:
-        case INS_frinta:
-        case INS_frinti:
-        case INS_frintm:
-        case INS_frintn:
-        case INS_frintp:
-        case INS_frintx:
-        case INS_frintz:
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isVectorRegister(reg1));
-                assert(isVectorRegister(reg2));
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                elemsize = optGetElemsize(opt);
-                assert(isValidVectorElemsizeFloat(elemsize));
-                assert(opt != INS_OPTS_1D); // Reserved encoding
-                fmt = IF_DV_2A;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert(isValidVectorElemsizeFloat(size));
-                assert(isVectorRegister(reg1));
-                assert(isVectorRegister(reg2));
-                fmt = IF_DV_2G;
-            }
-            break;
-
-        case INS_faddp:
-        case INS_fmaxnmp:
-        case INS_fmaxp:
-        case INS_fminnmp:
-        case INS_fminp:
-            // Scalar operation
-            assert(((size == EA_8BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_16BYTE) && (opt == INS_OPTS_2D)));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            fmt = IF_DV_2Q;
-            break;
-
-        case INS_fmaxnmv:
-        case INS_fmaxv:
-        case INS_fminnmv:
-        case INS_fminv:
-            assert(size == EA_16BYTE);
-            assert(opt == INS_OPTS_4S);
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            fmt = IF_DV_2R;
-            break;
-
-        case INS_addp:
-            assert(size == EA_16BYTE);
-            assert(opt == INS_OPTS_2D);
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            fmt = IF_DV_2S;
-            break;
-
-        case INS_fcvt:
-            assert(insOptsConvertFloatToFloat(opt));
-            assert(isValidVectorFcvtsize(size));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            fmt = IF_DV_2J;
-            break;
-
-        case INS_cmeq:
-        case INS_cmge:
-        case INS_cmgt:
-        case INS_cmle:
-        case INS_cmlt:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                elemsize = optGetElemsize(opt);
-                fmt      = IF_DV_2M;
-            }
-            else
-            {
-                // Scalar operation
-                assert(size == EA_8BYTE);
-                assert(insOptsNone(opt));
-                fmt = IF_DV_2L;
-            }
-            break;
-
-        case INS_fcmeq:
-        case INS_fcmge:
-        case INS_fcmgt:
-        case INS_fcmle:
-        case INS_fcmlt:
-        case INS_frecpe:
-        case INS_frsqrte:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                elemsize = optGetElemsize(opt);
-                assert(isValidVectorElemsizeFloat(elemsize)); // Only Double/Float supported
-                assert(opt != INS_OPTS_1D);                   // Reserved encoding
-                fmt = IF_DV_2A;
-            }
-            else
-            {
-                // Scalar operation
-                assert(isValidScalarDatasize(size)); // Only Double/Float supported
-                assert(insOptsNone(opt));
-                fmt = IF_DV_2G;
-            }
-            break;
-
-        case INS_aesd:
-        case INS_aese:
-        case INS_aesmc:
-        case INS_aesimc:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isValidVectorDatasize(size));
-            elemsize = optGetElemsize(opt);
-            assert(elemsize == EA_1BYTE);
-            fmt = IF_DV_2P;
-            break;
-
-        case INS_sha1h:
-            assert(insOptsNone(opt));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            fmt = IF_DV_2U;
-            break;
-
-        case INS_sha256su0:
-        case INS_sha1su1:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isValidVectorDatasize(size));
-            elemsize = optGetElemsize(opt);
-            assert(elemsize == EA_4BYTE);
-            fmt = IF_DV_2P;
-            break;
-
-        case INS_ld2:
-        case INS_ld3:
-        case INS_ld4:
-        case INS_st2:
-        case INS_st3:
-        case INS_st4:
-            assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1
-            FALLTHROUGH;
-
-        case INS_ld1:
-        case INS_ld1_2regs:
-        case INS_ld1_3regs:
-        case INS_ld1_4regs:
-        case INS_st1:
-        case INS_st1_2regs:
-        case INS_st1_3regs:
-        case INS_st1_4regs:
-        case INS_ld1r:
-        case INS_ld2r:
-        case INS_ld3r:
-        case INS_ld4r:
-            assert(isVectorRegister(reg1));
-            assert(isGeneralRegisterOrSP(reg2));
-            assert(isValidVectorDatasize(size));
-            assert(isValidArrangement(size, opt));
-
-            // Load/Store multiple structures       base register
-            // Load single structure and replicate  base register
-            reg2 = encodingSPtoZR(reg2);
-            fmt  = IF_LS_2D;
-            break;
-
-        case INS_urecpe:
-        case INS_ursqrte:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isValidVectorDatasize(size));
-            assert(isValidArrangement(size, opt));
-            elemsize = optGetElemsize(opt);
-            assert(elemsize == EA_4BYTE);
-            fmt = IF_DV_2A;
-            break;
-
-        case INS_frecpx:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isValidScalarDatasize(size));
-            assert(insOptsNone(opt));
-            fmt = IF_DV_2G;
-            break;
-
-        case INS_sadalp:
-        case INS_saddlp:
-        case INS_uadalp:
-        case INS_uaddlp:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isValidArrangement(size, opt));
-            assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved
-            fmt = IF_DV_2T;
-            break;
-
-        case INS_sqabs:
-        case INS_sqneg:
-        case INS_suqadd:
-        case INS_usqadd:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidArrangement(size, opt));
-                assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved
-                fmt = IF_DV_2M;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert(isValidVectorElemsize(size));
-                fmt = IF_DV_2L;
-            }
-            break;
-
-        case INS_sve_movs:
-        {
-            assert(opt == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(reg1)); // dddd
-            assert(isPredicateRegister(reg2)); // nnnn
-            fmt = IF_SVE_CZ_4A_A;
-            break;
-        }
-
-        case INS_sve_mov:
-        {
-            assert(opt == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(reg1)); // dddd
-            assert(isPredicateRegister(reg2)); // nnnn
-            fmt = IF_SVE_CZ_4A_L;
-            break;
-        }
-
-        case INS_sve_pfirst:
-            assert(opt == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(reg1)); // DDDD
-            assert(isPredicateRegister(reg2)); // gggg
-            fmt = IF_SVE_DD_2A;
-            break;
-
-        case INS_sve_pnext:
-            assert(insOptsScalableStandard(opt));
-            assert(isPredicateRegister(reg1));                     // DDDD
-            assert(isPredicateRegister(reg2));                     // VVVV
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-            fmt = IF_SVE_DF_2A;
-            break;
-
-        case INS_sve_punpkhi:
-        case INS_sve_punpklo:
-            assert(isPredicateRegister(reg1)); // DDDD
-            assert(isPredicateRegister(reg2)); // NNNN
-            fmt = IF_SVE_CK_2A;
-            break;
-
-        case INS_sve_rdffr:
-        case INS_sve_rdffrs:
-            assert(opt == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(reg1)); // DDDD
-            assert(isPredicateRegister(reg2)); // gggg
-            fmt = IF_SVE_DG_2A;
-            break;
-
-        case INS_sve_rev:
-            assert(insOptsScalableStandard(opt));
-            assert(isPredicateRegister(reg1)); // DDDD
-            assert(isPredicateRegister(reg2)); // NNNN
-            fmt = IF_SVE_CJ_2A;
-            break;
-
-        case INS_sve_ptest:
-            assert(opt == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(reg1)); // gggg
-            assert(isPredicateRegister(reg2)); // NNNN
-            fmt = IF_SVE_DI_2A;
-            break;
-
-        case INS_sve_cntp:
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsWithVectorLength(sopt));         // l
-            assert(isGeneralRegister(reg1));                       // ddddd
-            assert(isPredicateRegister(reg2));                     // NNNN
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-            fmt = IF_SVE_DL_2A;
-            break;
-
-        case INS_sve_incp:
-        case INS_sve_decp:
-            assert(isPredicateRegister(reg2)); // MMMM
-
-            if (isGeneralRegister(reg1)) // ddddd
-            {
-                assert(insOptsScalableStandard(opt)); // xx
-                assert(size == EA_8BYTE);
-                fmt = IF_SVE_DM_2A;
-            }
-            else
-            {
-                assert(insOptsScalableAtLeastHalf(opt)); // xx
-                assert(isVectorRegister(reg1));          // ddddd
-                assert(isScalableVectorSize(size));
-                fmt = IF_SVE_DN_2A;
-            }
-            break;
-
-        case INS_sve_sqincp:
-        case INS_sve_uqincp:
-        case INS_sve_sqdecp:
-        case INS_sve_uqdecp:
-            assert(isPredicateRegister(reg2)); // MMMM
-
-            if (isGeneralRegister(reg1)) // ddddd
-            {
-                assert(insOptsScalableStandard(opt)); // xx
-                assert(isValidGeneralDatasize(size));
-                fmt = IF_SVE_DO_2A;
-            }
-            else
-            {
-                assert(insOptsScalableAtLeastHalf(opt)); // xx
-                assert(isVectorRegister(reg1));          // ddddd
-                assert(isScalableVectorSize(size));
-                fmt = IF_SVE_DP_2A;
-            }
-            break;
-
-        case INS_sve_ctermeq:
-        case INS_sve_ctermne:
-            assert(insOptsNone(opt));
-            assert(isGeneralRegister(reg1));      // nnnnn
-            assert(isGeneralRegister(reg2));      // mmmmm
-            assert(isValidGeneralDatasize(size)); // x
-            fmt = IF_SVE_DS_2A;
-            break;
-
-        case INS_sve_sqxtnb:
-        case INS_sve_sqxtnt:
-        case INS_sve_uqxtnb:
-        case INS_sve_uqxtnt:
-        case INS_sve_sqxtunb:
-        case INS_sve_sqxtunt:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(optGetSveElemsize(opt) != EA_8BYTE);
-            assert(isValidVectorElemsize(optGetSveElemsize(opt)));
-            assert(isScalableVectorSize(size));
-            fmt = IF_SVE_GD_2A;
-            break;
-
-        case INS_sve_aese:
-        case INS_sve_aesd:
-        case INS_sve_sm4e:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-#ifdef DEBUG
-            if (opt == INS_OPTS_SCALABLE_S)
-            {
-                assert(ins == INS_sve_sm4e);
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_B);
-            }
-#endif // DEBUG
-            fmt = IF_SVE_GK_2A;
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-
-    assert(fmt != IF_NONE);
-
-    instrDesc* id;
-
-    if (insScalableOptsWithVectorLength(sopt))
-    {
-        id = emitNewInstr(attr);
-        id->idVectorLength4x(sopt == INS_SCALABLE_OPTS_VL_4X);
-    }
-    else
-    {
-        id = emitNewInstrSmall(attr);
-    }
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing a register and two constants.
- */
-
-void emitter::emitIns_R_I_I(instruction ins,
-                            emitAttr    attr,
-                            regNumber   reg,
-                            ssize_t     imm1,
-                            ssize_t     imm2,
-                            insOpts     opt /* = INS_OPTS_NONE */
-                            DEBUGARG(size_t targetHandle /* = 0 */) DEBUGARG(GenTreeFlags gtFlags /* = 0 */))
-{
-    emitAttr  size   = EA_SIZE(attr);
-    insFormat fmt    = IF_NONE;
-    size_t    immOut = 0; // composed from imm1 and imm2 and stored in the instrDesc
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        bool        canEncode;
-        halfwordImm hwi;
-
-        case INS_mov:
-            ins = INS_movz; // INS_mov with LSL is an alias for INS_movz LSL
-            FALLTHROUGH;
-
-        case INS_movk:
-        case INS_movn:
-        case INS_movz:
-            assert(isValidGeneralDatasize(size));
-            assert(isGeneralRegister(reg));
-            assert(isValidUimm16(imm1));
-            assert(insOptsLSL(opt)); // Must be INS_OPTS_LSL
-
-            if (size == EA_8BYTE)
-            {
-                assert((imm2 == 0) || (imm2 == 16) || // shift amount: 0, 16, 32 or 48
-                       (imm2 == 32) || (imm2 == 48));
-            }
-            else // EA_4BYTE
-            {
-                assert((imm2 == 0) || (imm2 == 16)); // shift amount: 0 or 16
-            }
-
-            hwi.immHWVal = 0;
-
-            switch (imm2)
-            {
-                case 0:
-                    hwi.immHW = 0;
-                    canEncode = true;
-                    break;
-
-                case 16:
-                    hwi.immHW = 1;
-                    canEncode = true;
-                    break;
-
-                case 32:
-                    hwi.immHW = 2;
-                    canEncode = true;
-                    break;
-
-                case 48:
-                    hwi.immHW = 3;
-                    canEncode = true;
-                    break;
-
-                default:
-                    canEncode = false;
-            }
-
-            if (canEncode)
-            {
-                hwi.immVal = imm1;
-
-                immOut = hwi.immHWVal;
-                assert(isValidImmHWVal(immOut, size));
-                fmt = IF_DI_1B;
-            }
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstrSC(attr, immOut);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-
-    id->idReg1(reg);
-
-#ifdef DEBUG
-    id->idDebugOnlyInfo()->idFlags     = gtFlags;
-    id->idDebugOnlyInfo()->idMemCookie = targetHandle;
-#endif
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing two registers and a constant.
- */
-
-void emitter::emitIns_R_R_I(instruction     ins,
-                            emitAttr        attr,
-                            regNumber       reg1,
-                            regNumber       reg2,
-                            ssize_t         imm,
-                            insOpts         opt /* = INS_OPTS_NONE */,
-                            insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
-{
-    emitAttr  size       = EA_SIZE(attr);
-    emitAttr  elemsize   = EA_UNKNOWN;
-    insFormat fmt        = IF_NONE;
-    bool      isLdSt     = false;
-    bool      isLdrStr   = false;
-    bool      isSIMD     = false;
-    bool      isAddSub   = false;
-    bool      setFlags   = false;
-    unsigned  scale      = 0;
-    bool      unscaledOp = false;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        bool       canEncode;
-        bitMaskImm bmi;
-        unsigned   registerListSize;
-        bool       isRightShift;
-
-        case INS_mov:
-            // Check for the 'mov' aliases for the vector registers
-            assert(insOptsNone(opt));
-            assert(isValidVectorElemsize(size));
-            elemsize = size;
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-
-            if (isVectorRegister(reg1))
-            {
-                if (isGeneralRegisterOrZR(reg2))
-                {
-                    fmt = IF_DV_2C; // Alias for 'ins'
-                    break;
-                }
-                else if (isVectorRegister(reg2))
-                {
-                    fmt = IF_DV_2E; // Alias for 'dup'
-                    break;
-                }
-            }
-            else // isGeneralRegister(reg1)
-            {
-                assert(isGeneralRegister(reg1));
-                if (isVectorRegister(reg2))
-                {
-                    fmt = IF_DV_2B; // Alias for 'umov'
-                    break;
-                }
-            }
-            assert(!" invalid INS_mov operands");
-            break;
-
-        case INS_lsl:
-        case INS_lsr:
-        case INS_asr:
-            assert(insOptsNone(opt));
-            assert(isValidGeneralDatasize(size));
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegister(reg2));
-            assert(isValidImmShift(imm, size));
-            fmt = IF_DI_2D;
-            break;
-
-        case INS_ror:
-            assert(insOptsNone(opt));
-            assert(isValidGeneralDatasize(size));
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegister(reg2));
-            assert(isValidImmShift(imm, size));
-            fmt = IF_DI_2B;
-            break;
-
-        case INS_shl:
-        case INS_sli:
-        case INS_sri:
-        case INS_srshr:
-        case INS_srsra:
-        case INS_sshr:
-        case INS_ssra:
-        case INS_urshr:
-        case INS_ursra:
-        case INS_ushr:
-        case INS_usra:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            isRightShift = emitInsIsVectorRightShift(ins);
-
-            assert(!isRightShift ||
-                   (imm != 0 && "instructions for vector right-shift do not allow zero as an immediate value"));
-
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                elemsize = optGetElemsize(opt);
-                assert(isValidVectorElemsize(elemsize));
-                assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
-                assert(opt != INS_OPTS_1D); // Reserved encoding
-                fmt = IF_DV_2O;
-                break;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert(size == EA_8BYTE); // only supported size
-                assert(isValidVectorShiftAmount(imm, size, isRightShift));
-                fmt = IF_DV_2N;
-            }
-            break;
-
-        case INS_sqshl:
-        case INS_uqshl:
-        case INS_sqshlu:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            isRightShift = emitInsIsVectorRightShift(ins);
-
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidArrangement(size, opt));
-                assert(opt != INS_OPTS_1D); // The encoding immh = 1xxx, Q = 0 is reserved
-                elemsize = optGetElemsize(opt);
-                assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
-                fmt = IF_DV_2O;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert(isValidVectorElemsize(size));
-                assert(isValidVectorShiftAmount(imm, size, isRightShift));
-                fmt = IF_DV_2N;
-            }
-            break;
-
-        case INS_sqrshrn:
-        case INS_sqrshrun:
-        case INS_sqshrn:
-        case INS_sqshrun:
-        case INS_uqrshrn:
-        case INS_uqshrn:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            isRightShift = emitInsIsVectorRightShift(ins);
-
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidArrangement(size, opt));
-                assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding immh = 1xxx, Q = x is reserved
-                elemsize = optGetElemsize(opt);
-                assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
-                fmt = IF_DV_2O;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert(isValidVectorElemsize(size));
-                assert(size != EA_8BYTE); // The encoding immh = 1xxx is reserved
-                assert(isValidVectorShiftAmount(imm, size, isRightShift));
-                fmt = IF_DV_2N;
-            }
-            break;
-
-        case INS_sxtl:
-        case INS_uxtl:
-            assert(imm == 0);
-            FALLTHROUGH;
-
-        case INS_rshrn:
-        case INS_shrn:
-        case INS_sshll:
-        case INS_ushll:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            isRightShift = emitInsIsVectorRightShift(ins);
-            // Vector operation
-            assert(size == EA_8BYTE);
-            assert(isValidArrangement(size, opt));
-            elemsize = optGetElemsize(opt);
-            assert(elemsize != EA_8BYTE); // Reserved encodings
-            assert(isValidVectorElemsize(elemsize));
-            assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
-            fmt = IF_DV_2O;
-            break;
-
-        case INS_sxtl2:
-        case INS_uxtl2:
-            assert(imm == 0);
-            FALLTHROUGH;
-
-        case INS_rshrn2:
-        case INS_shrn2:
-        case INS_sqrshrn2:
-        case INS_sqrshrun2:
-        case INS_sqshrn2:
-        case INS_sqshrun2:
-        case INS_sshll2:
-        case INS_uqrshrn2:
-        case INS_uqshrn2:
-        case INS_ushll2:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            isRightShift = emitInsIsVectorRightShift(ins);
-
-            // Vector operation
-            assert(size == EA_16BYTE);
-            assert(isValidArrangement(size, opt));
-            elemsize = optGetElemsize(opt);
-            assert(elemsize != EA_8BYTE); // The encoding immh = 1xxx, Q = x is reserved
-            assert(isValidVectorElemsize(elemsize));
-            assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
-            fmt = IF_DV_2O;
-            break;
-
-        case INS_mvn:
-        case INS_neg:
-        case INS_negs:
-            assert(isValidGeneralDatasize(size));
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegisterOrZR(reg2));
-
-            if (imm == 0)
-            {
-                assert(insOptsNone(opt)); // a zero imm, means no alu shift kind
-
-                fmt = IF_DR_2E;
-            }
-            else
-            {
-                if (ins == INS_mvn)
-                {
-                    assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind
-                }
-                else // neg or negs
-                {
-                    assert(insOptsAluShift(opt)); // a non-zero imm, must select shift kind, can't use ROR
-                }
-                assert(isValidImmShift(imm, size));
-                fmt = IF_DR_2F;
-            }
-            break;
-
-        case INS_tst:
-            assert(isValidGeneralDatasize(size));
-            assert(isGeneralRegisterOrZR(reg1));
-            assert(isGeneralRegister(reg2));
-
-            if (insOptsAnyShift(opt))
-            {
-                assert(isValidImmShift(imm, size) && (imm != 0));
-                fmt = IF_DR_2B;
-            }
-            else
-            {
-                assert(insOptsNone(opt)); // a zero imm, means no alu shift kind
-                assert(imm == 0);
-                fmt = IF_DR_2A;
-            }
-            break;
-
-        case INS_cmp:
-        case INS_cmn:
-            assert(isValidGeneralDatasize(size));
-            assert(isGeneralRegisterOrSP(reg1));
-            assert(isGeneralRegister(reg2));
-
-            reg1 = encodingSPtoZR(reg1);
-            if (insOptsAnyExtend(opt))
-            {
-                assert((imm >= 0) && (imm <= 4));
-
-                fmt = IF_DR_2C;
-            }
-            else if (imm == 0)
-            {
-                assert(insOptsNone(opt)); // a zero imm, means no alu shift kind
-
-                fmt = IF_DR_2A;
-            }
-            else
-            {
-                assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind
-                assert(isValidImmShift(imm, size));
-                fmt = IF_DR_2B;
-            }
-            break;
-
-        case INS_ands:
-        case INS_and:
-        case INS_eor:
-        case INS_orr:
-            assert(insOptsNone(opt));
-            assert(isGeneralRegister(reg2));
-            if (ins == INS_ands)
-            {
-                assert(isGeneralRegister(reg1));
-            }
-            else
-            {
-                assert(isGeneralRegisterOrSP(reg1));
-                reg1 = encodingSPtoZR(reg1);
-            }
-
-            bmi.immNRS = 0;
-            canEncode  = canEncodeBitMaskImm(imm, size, &bmi);
-            if (canEncode)
-            {
-                imm = bmi.immNRS;
-                assert(isValidImmNRS(imm, size));
-                fmt = IF_DI_2C;
-            }
-            break;
-
-        case INS_dup: // by element, imm selects the element of reg2
-            assert(isVectorRegister(reg1));
-            if (isVectorRegister(reg2))
-            {
-                if (insOptsAnyArrangement(opt))
-                {
-                    // The size and opt were modified to be based on the
-                    // return type but the immediate is based on the operand
-                    // which can be of a larger size. As such, we don't
-                    // assert the index is valid here and instead do it in
-                    // codegen.
-
-                    // Vector operation
-                    assert(isValidVectorDatasize(size));
-                    assert(isValidArrangement(size, opt));
-                    elemsize = optGetElemsize(opt);
-                    assert(isValidVectorElemsize(elemsize));
-                    assert(opt != INS_OPTS_1D); // Reserved encoding
-                    fmt = IF_DV_2D;
-                    break;
-                }
-                else
-                {
-                    // Scalar operation
-                    assert(insOptsNone(opt));
-                    elemsize = size;
-                    assert(isValidVectorElemsize(elemsize));
-                    assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-                    fmt = IF_DV_2E;
-                    break;
-                }
-            }
-            FALLTHROUGH;
-
-        case INS_ins: // (MOV from general)
-            assert(insOptsNone(opt));
-            assert(isValidVectorElemsize(size));
-            assert(isVectorRegister(reg1));
-            assert(isGeneralRegisterOrZR(reg2));
-            elemsize = size;
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-            fmt = IF_DV_2C;
-            break;
-
-        case INS_umov: // (MOV to general)
-            assert(insOptsNone(opt));
-            assert(isValidVectorElemsize(size));
-            assert(isGeneralRegister(reg1));
-            assert(isVectorRegister(reg2));
-            elemsize = size;
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-            fmt = IF_DV_2B;
-            break;
-
-        case INS_smov:
-            assert(insOptsNone(opt));
-            assert(isValidVectorElemsize(size));
-            assert(size != EA_8BYTE); // no encoding, use INS_umov
-            assert(isGeneralRegister(reg1));
-            assert(isVectorRegister(reg2));
-            elemsize = size;
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-            fmt = IF_DV_2B;
-            break;
-
-        case INS_add:
-        case INS_sub:
-            setFlags = false;
-            isAddSub = true;
-            break;
-
-        case INS_adds:
-        case INS_subs:
-            setFlags = true;
-            isAddSub = true;
-            break;
-
-        case INS_ldrsb:
-        case INS_ldursb:
-            // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register
-            assert(isValidGeneralDatasize(size));
-            unscaledOp = (ins == INS_ldursb);
-            scale      = 0;
-            isLdSt     = true;
-            break;
-
-        case INS_ldrsh:
-        case INS_ldursh:
-            // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register
-            assert(isValidGeneralDatasize(size));
-            unscaledOp = (ins == INS_ldursh);
-            scale      = 1;
-            isLdSt     = true;
-            break;
-
-        case INS_ldrsw:
-        case INS_ldursw:
-            // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register
-            assert(size == EA_8BYTE);
-            unscaledOp = (ins == INS_ldursw);
-            scale      = 2;
-            isLdSt     = true;
-            break;
-
-        case INS_ldrb:
-        case INS_strb:
-            // size is ignored
-            unscaledOp = false;
-            scale      = 0;
-            isLdSt     = true;
-            break;
-
-        case INS_ldapurb:
-        case INS_stlurb:
-        case INS_ldurb:
-        case INS_sturb:
-            // size is ignored
-            unscaledOp = true;
-            scale      = 0;
-            isLdSt     = true;
-            break;
-
-        case INS_ldrh:
-        case INS_strh:
-            // size is ignored
-            unscaledOp = false;
-            scale      = 1;
-            isLdSt     = true;
-            break;
-
-        case INS_ldurh:
-        case INS_ldapurh:
-        case INS_sturh:
-        case INS_stlurh:
-            // size is ignored
-            unscaledOp = true;
-            scale      = 0;
-            isLdSt     = true;
-            break;
-
-        case INS_ldr:
-        case INS_str:
-            // Is the target a vector register?
-            if (isVectorRegister(reg1))
-            {
-                assert(isValidVectorLSDatasize(size));
-                assert(isGeneralRegisterOrSP(reg2));
-                isSIMD = true;
-            }
-            else
-            {
-                assert(isValidGeneralDatasize(size));
-            }
-            unscaledOp = false;
-            scale      = NaturalScale_helper(size);
-            isLdSt     = true;
-            isLdrStr   = true;
-            break;
-
-        case INS_ldur:
-        case INS_stur:
-        case INS_ldapur:
-        case INS_stlur:
-            // Is the target a vector register?
-            if (isVectorRegister(reg1))
-            {
-                assert(isValidVectorLSDatasize(size));
-                assert(isGeneralRegisterOrSP(reg2));
-                isSIMD = true;
-            }
-            else
-            {
-                assert(isValidGeneralDatasize(size));
-            }
-            unscaledOp = true;
-            scale      = 0;
-            isLdSt     = true;
-            break;
-
-        case INS_ld2:
-        case INS_ld3:
-        case INS_ld4:
-        case INS_st2:
-        case INS_st3:
-        case INS_st4:
-            assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1
-            FALLTHROUGH;
-
-        case INS_ld1:
-        case INS_ld1_2regs:
-        case INS_ld1_3regs:
-        case INS_ld1_4regs:
-        case INS_st1:
-        case INS_st1_2regs:
-        case INS_st1_3regs:
-        case INS_st1_4regs:
-            assert(isVectorRegister(reg1));
-            assert(isGeneralRegisterOrSP(reg2));
-
-            reg2 = encodingSPtoZR(reg2);
-
-            if (insOptsAnyArrangement(opt))
-            {
-                registerListSize = insGetRegisterListSize(ins);
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                assert((size * registerListSize) == imm);
-
-                // Load/Store multiple structures  post-indexed by an immediate
-                fmt = IF_LS_2E;
-            }
-            else
-            {
-                assert(insOptsNone(opt));
-                assert((ins != INS_ld1_2regs) && (ins != INS_ld1_3regs) && (ins != INS_ld1_4regs) &&
-                       (ins != INS_st1_2regs) && (ins != INS_st1_3regs) && (ins != INS_st1_4regs));
-
-                elemsize = size;
-                assert(isValidVectorElemsize(elemsize));
-                assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-
-                // Load/Store single structure  base register
-                fmt = IF_LS_2F;
-            }
-            break;
-
-        case INS_ld1r:
-        case INS_ld2r:
-        case INS_ld3r:
-        case INS_ld4r:
-            assert(isVectorRegister(reg1));
-            assert(isGeneralRegisterOrSP(reg2));
-
-            assert(isValidVectorDatasize(size));
-            assert(isValidArrangement(size, opt));
-
-            elemsize         = optGetElemsize(opt);
-            registerListSize = insGetRegisterListSize(ins);
-            assert((elemsize * registerListSize) == imm);
-
-            // Load single structure and replicate  post-indexed by an immediate
-            reg2 = encodingSPtoZR(reg2);
-            fmt  = IF_LS_2E;
-            break;
-
-        case INS_sve_asr:
-        case INS_sve_lsl:
-        case INS_sve_lsr:
-        case INS_sve_srshr:
-        case INS_sve_sqshl:
-        case INS_sve_urshr:
-        case INS_sve_sqshlu:
-        case INS_sve_uqshl:
-        case INS_sve_asrd:
-            isRightShift = emitInsIsVectorRightShift(ins);
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1));       // ddddd
-            assert(isLowPredicateRegister(reg2)); // ggg
-            assert(isValidVectorShiftAmount(imm, optGetSveElemsize(opt), isRightShift));
-            fmt = IF_SVE_AM_2A;
-            break;
-
-        case INS_sve_sqrshrn:
-        case INS_sve_sqrshrun:
-        case INS_sve_uqrshrn:
-            isRightShift = emitInsIsVectorRightShift(ins);
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(opt == INS_OPTS_SCALABLE_H);
-            assert(isRightShift); // These are always right-shift.
-            assert(isValidVectorShiftAmount(imm, EA_4BYTE, isRightShift));
-            fmt = IF_SVE_GA_2A;
-            break;
-
-        case INS_sve_pext:
-            assert(insOptsScalableStandard(opt));
-            assert(isPredicateRegister(reg1));                     // DDDD
-            assert(isHighPredicateRegister(reg2));                 // NNN
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-
-            if (sopt == INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR)
-            {
-                assert(isValidImm1(imm)); // i
-                fmt = IF_SVE_DW_2B;
-            }
-            else
-            {
-                assert(insScalableOptsNone(sopt));
-                assert(isValidUimm2(imm)); // ii
-                fmt = IF_SVE_DW_2A;
-            }
-            break;
-
-        case INS_sve_ftmad:
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(insScalableOptsNone(sopt));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isValidUimm3(imm));
-            assert(isScalableVectorSize(size));
-            fmt = IF_SVE_HN_2A;
-            break;
-
-        case INS_sve_ldr:
-            assert(insOptsNone(opt));
-            assert(isScalableVectorSize(size));
-            assert(isGeneralRegister(reg2)); // nnnnn
-            assert(isValidSimm9(imm));       // iii
-                                             // iiiiii
-
-            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
-            {
-                assert(isVectorRegister(reg1));
-                fmt = IF_SVE_IE_2A;
-            }
-            else
-            {
-                assert(insScalableOptsNone(sopt));
-                assert(isPredicateRegister(reg1));
-                fmt = IF_SVE_ID_2A;
-            }
-            break;
-
-        case INS_sve_str:
-            assert(insOptsNone(opt));
-            assert(isScalableVectorSize(size));
-            assert(isGeneralRegister(reg2)); // nnnnn
-            assert(isValidSimm9(imm));       // iii
-                                             // iiiiii
-
-            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
-            {
-                assert(isVectorRegister(reg1));
-                fmt = IF_SVE_JH_2A;
-            }
-            else
-            {
-                assert(insScalableOptsNone(sopt));
-                assert(isPredicateRegister(reg1));
-                fmt = IF_SVE_JG_2A;
-            }
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-
-    if (isLdSt)
-    {
-        assert(!isAddSub);
-
-        if (isSIMD)
-        {
-            assert(isValidVectorLSDatasize(size));
-            assert(isVectorRegister(reg1));
-            assert((scale >= 0) && (scale <= 4));
-        }
-        else
-        {
-            assert(isValidGeneralLSDatasize(size));
-            assert(isGeneralRegisterOrZR(reg1));
-            assert((scale >= 0) && (scale <= 3));
-        }
-
-        assert(isGeneralRegisterOrSP(reg2));
-
-        // Load/Store reserved encodings:
-        if (insOptsIndexed(opt))
-        {
-            assert(reg1 != reg2);
-        }
-
-        reg2 = encodingSPtoZR(reg2);
-
-        ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
-        if (imm == 0)
-        {
-            assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero
-
-            fmt = IF_LS_2A;
-        }
-        else if (insOptsIndexed(opt) || unscaledOp || (imm < 0) || ((imm & mask) != 0))
-        {
-            if ((imm >= -256) && (imm <= 255))
-            {
-                fmt = IF_LS_2C;
-            }
-            else
-            {
-                assert(!"Instruction cannot be encoded: IF_LS_2C");
-            }
-        }
-        else if (imm > 0)
-        {
-            assert(insOptsNone(opt));
-            assert(!unscaledOp);
-
-            if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
-            {
-                imm >>= scale; // The immediate is scaled by the size of the ld/st
-
-                fmt = IF_LS_2B;
-            }
-            else
-            {
-                assert(!"Instruction cannot be encoded: IF_LS_2B");
-            }
-        }
-
-        // Try to optimize a load/store with an alternative instruction.
-        if (isLdrStr && emitComp->opts.OptimizationEnabled() &&
-            OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, false, -1, -1 DEBUG_ARG(false)))
-        {
-            return;
-        }
-    }
-    else if (isAddSub)
-    {
-        assert(!isLdSt);
-        assert(insOptsNone(opt));
-
-        if (setFlags) // Can't encode SP with setFlags
-        {
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegister(reg2));
-        }
-        else
-        {
-            assert(isGeneralRegisterOrSP(reg1));
-            assert(isGeneralRegisterOrSP(reg2));
-
-            // Is it just a mov?
-            if (imm == 0)
-            {
-                emitIns_Mov(INS_mov, attr, reg1, reg2, /* canSkip */ true);
-                return;
-            }
-
-            reg1 = encodingSPtoZR(reg1);
-            reg2 = encodingSPtoZR(reg2);
-        }
-
-        if (unsigned_abs(imm) <= 0x0fff)
-        {
-            if (imm < 0)
-            {
-                ins = insReverse(ins);
-                imm = -imm;
-            }
-            assert(isValidUimm12(imm));
-            fmt = IF_DI_2A;
-        }
-        else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding
-        {
-            // Encoding will use a 12-bit left shift of the immediate
-            opt = INS_OPTS_LSL12;
-            if (imm < 0)
-            {
-                ins = insReverse(ins);
-                imm = -imm;
-            }
-            assert((imm & 0xfff) == 0);
-            imm >>= 12;
-            assert(isValidUimm12(imm));
-            fmt = IF_DI_2A;
-        }
-        else
-        {
-            assert(!"Instruction cannot be encoded: IF_DI_2A");
-        }
-    }
-
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstrSC(attr, imm);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing two registers and a floating point constant.
- */
-
-void emitter::emitIns_R_R_F(
-    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, double immDbl, insOpts opt /* = INS_OPTS_NONE */)
-{
-    ssize_t   imm  = 0;
-    emitAttr  size = EA_SIZE(attr);
-    insFormat fmt  = IF_NONE;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_sve_fmul:
-        case INS_sve_fmaxnm:
-        case INS_sve_fadd:
-        case INS_sve_fmax:
-        case INS_sve_fminnm:
-        case INS_sve_fsub:
-        case INS_sve_fmin:
-        case INS_sve_fsubr:
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isScalableVectorSize(size));
-            imm = emitEncodeSmallFloatImm(immDbl, ins);
-            fmt = IF_SVE_HM_2A;
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstrSC(attr, imm);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
-*
-*  Add an instruction referencing two registers and a constant.
-*  Also checks for a large immediate that needs a second instruction
-*  and will load it in reg1
-*
-*  - Supports instructions: add, adds, sub, subs, and, ands, eor and orr
-*  - Requires that reg1 is a general register and not SP or ZR
-*  - Requires that reg1 != reg2
-*/
-void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm)
-{
-    assert(isGeneralRegister(reg1));
-    assert(reg1 != reg2);
-
-    bool immFits = true;
-
-    switch (ins)
-    {
-        case INS_add:
-        case INS_adds:
-        case INS_sub:
-        case INS_subs:
-            immFits = emitter::emitIns_valid_imm_for_add(imm, attr);
-            break;
-
-        case INS_ands:
-        case INS_and:
-        case INS_eor:
-        case INS_orr:
-            immFits = emitter::emitIns_valid_imm_for_alu(imm, attr);
-            break;
-
-        default:
-            assert(!"Unsupported instruction in emitIns_R_R_Imm");
-    }
-
-    if (immFits)
-    {
-        emitIns_R_R_I(ins, attr, reg1, reg2, imm);
-    }
-    else
-    {
-        // Load 'imm' into the reg1 register
-        // then issue:   'ins'  reg1, reg2, reg1
-        //
-        codeGen->instGen_Set_Reg_To_Imm(attr, reg1, imm);
-        emitIns_R_R_R(ins, attr, reg1, reg2, reg1);
-    }
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing three registers.
- */
-
-void emitter::emitIns_R_R_R(instruction     ins,
-                            emitAttr        attr,
-                            regNumber       reg1,
-                            regNumber       reg2,
-                            regNumber       reg3,
-                            insOpts         opt /* = INS_OPTS_NONE */,
-                            insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
-{
-    emitAttr  size           = EA_SIZE(attr);
-    emitAttr  elemsize       = EA_UNKNOWN;
-    insFormat fmt            = IF_NONE;
-    bool      pmerge         = false;
-    bool      vectorLength4x = false;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_mul:
-        case INS_smull:
-        case INS_umull:
-            if (insOptsAnyArrangement(opt))
-            {
-                // ASIMD instruction
-                assert(isVectorRegister(reg1));
-                assert(isVectorRegister(reg2));
-                assert(isVectorRegister(reg3));
-                assert(isValidArrangement(size, opt));
-                assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved
-                fmt = IF_DV_3A;
-                break;
-            }
-            // Base instruction
-            FALLTHROUGH;
-
-        case INS_lsl:
-        case INS_lsr:
-        case INS_asr:
-        case INS_ror:
-        case INS_adc:
-        case INS_adcs:
-        case INS_sbc:
-        case INS_sbcs:
-        case INS_udiv:
-        case INS_sdiv:
-        case INS_mneg:
-        case INS_smnegl:
-        case INS_smulh:
-        case INS_umnegl:
-        case INS_umulh:
-        case INS_lslv:
-        case INS_lsrv:
-        case INS_asrv:
-        case INS_rorv:
-        case INS_crc32b:
-        case INS_crc32h:
-        case INS_crc32w:
-        case INS_crc32x:
-        case INS_crc32cb:
-        case INS_crc32ch:
-        case INS_crc32cw:
-        case INS_crc32cx:
-            assert(insOptsNone(opt));
-            assert(isValidGeneralDatasize(size));
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            fmt = IF_DR_3A;
-            break;
-
-        case INS_add:
-        case INS_sub:
-            if (isVectorRegister(reg1))
-            {
-                // ASIMD instruction
-                assert(isVectorRegister(reg2));
-                assert(isVectorRegister(reg3));
-
-                if (insOptsAnyArrangement(opt))
-                {
-                    // Vector operation
-                    assert(opt != INS_OPTS_1D); // Reserved encoding
-                    assert(isValidVectorDatasize(size));
-                    assert(isValidArrangement(size, opt));
-                    fmt = IF_DV_3A;
-                }
-                else
-                {
-                    // Scalar operation
-                    assert(insOptsNone(opt));
-                    assert(size == EA_8BYTE);
-                    fmt = IF_DV_3E;
-                }
-                break;
-            }
-            // Base instruction
-            FALLTHROUGH;
-
-        case INS_adds:
-        case INS_subs:
-            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, opt);
-            return;
-
-        case INS_cmeq:
-        case INS_cmge:
-        case INS_cmgt:
-        case INS_cmhi:
-        case INS_cmhs:
-        case INS_cmtst:
-        case INS_srshl:
-        case INS_sshl:
-        case INS_urshl:
-        case INS_ushl:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidArrangement(size, opt));
-                assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved
-                fmt = IF_DV_3A;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert(size == EA_8BYTE); // Only Int64/UInt64 supported
-                fmt = IF_DV_3E;
-            }
-            break;
-
-        case INS_sqadd:
-        case INS_sqrshl:
-        case INS_sqshl:
-        case INS_sqsub:
-        case INS_uqadd:
-        case INS_uqrshl:
-        case INS_uqshl:
-        case INS_uqsub:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidArrangement(size, opt));
-                assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved
-                fmt = IF_DV_3A;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert(isValidVectorElemsize(size));
-                fmt = IF_DV_3E;
-            }
-            break;
-
-        case INS_fcmeq:
-        case INS_fcmge:
-        case INS_fcmgt:
-        case INS_frecps:
-        case INS_frsqrts:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                elemsize = optGetElemsize(opt);
-                assert((elemsize == EA_8BYTE) || (elemsize == EA_4BYTE)); // Only Double/Float supported
-                assert(opt != INS_OPTS_1D);                               // Reserved encoding
-                fmt = IF_DV_3B;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert((size == EA_8BYTE) || (size == EA_4BYTE)); // Only Double/Float supported
-                fmt = IF_DV_3D;
-            }
-            break;
-
-        case INS_mla:
-        case INS_mls:
-        case INS_saba:
-        case INS_sabd:
-        case INS_shadd:
-        case INS_shsub:
-        case INS_smax:
-        case INS_smaxp:
-        case INS_smin:
-        case INS_sminp:
-        case INS_srhadd:
-        case INS_uaba:
-        case INS_uabd:
-        case INS_uhadd:
-        case INS_uhsub:
-        case INS_umax:
-        case INS_umaxp:
-        case INS_umin:
-        case INS_uminp:
-        case INS_urhadd:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isValidArrangement(size, opt));
-            assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved
-            fmt = IF_DV_3A;
-            break;
-
-        case INS_addp:
-        case INS_uzp1:
-        case INS_uzp2:
-        case INS_zip1:
-        case INS_zip2:
-        case INS_trn1:
-        case INS_trn2:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isValidArrangement(size, opt));
-            assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved
-            fmt = IF_DV_3A;
-            break;
-
-        case INS_mov:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(reg2 == reg3);
-            assert(isValidVectorDatasize(size));
-            // INS_mov is an alias for INS_orr (vector register)
-            if (opt == INS_OPTS_NONE)
-            {
-                elemsize = EA_1BYTE;
-                opt      = optMakeArrangement(size, elemsize);
-            }
-            assert(isValidArrangement(size, opt));
-            fmt = IF_DV_3C;
-            break;
-
-        case INS_and:
-        case INS_bic:
-        case INS_eor:
-        case INS_orr:
-        case INS_orn:
-        case INS_tbl:
-        case INS_tbl_2regs:
-        case INS_tbl_3regs:
-        case INS_tbl_4regs:
-        case INS_tbx:
-        case INS_tbx_2regs:
-        case INS_tbx_3regs:
-        case INS_tbx_4regs:
-            if (isVectorRegister(reg1))
-            {
-                assert(isValidVectorDatasize(size));
-                assert(isVectorRegister(reg2));
-                assert(isVectorRegister(reg3));
-                if (opt == INS_OPTS_NONE)
-                {
-                    elemsize = EA_1BYTE;
-                    opt      = optMakeArrangement(size, elemsize);
-                }
-                assert(isValidArrangement(size, opt));
-                fmt = IF_DV_3C;
-                break;
-            }
-            FALLTHROUGH;
-
-        case INS_ands:
-        case INS_bics:
-        case INS_eon:
-            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, INS_OPTS_NONE);
-            return;
-
-        case INS_bsl:
-        case INS_bit:
-        case INS_bif:
-            assert(isValidVectorDatasize(size));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            if (opt == INS_OPTS_NONE)
-            {
-                elemsize = EA_1BYTE;
-                opt      = optMakeArrangement(size, elemsize);
-            }
-            assert(isValidArrangement(size, opt));
-            fmt = IF_DV_3C;
-            break;
-
-        case INS_fadd:
-        case INS_fsub:
-        case INS_fdiv:
-        case INS_fmax:
-        case INS_fmaxnm:
-        case INS_fmin:
-        case INS_fminnm:
-        case INS_fabd:
-        case INS_fmul:
-        case INS_fmulx:
-        case INS_facge:
-        case INS_facgt:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                elemsize = optGetElemsize(opt);
-                assert(isValidVectorElemsizeFloat(elemsize));
-                assert(opt != INS_OPTS_1D); // Reserved encoding
-                fmt = IF_DV_3B;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert(isValidScalarDatasize(size));
-                fmt = IF_DV_3D;
-            }
-            break;
-
-        case INS_fnmul:
-            // Scalar operation
-            assert(insOptsNone(opt));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isValidScalarDatasize(size));
-            fmt = IF_DV_3D;
-            break;
-
-        case INS_faddp:
-        case INS_fmaxnmp:
-        case INS_fmaxp:
-        case INS_fminnmp:
-        case INS_fminp:
-
-        case INS_fmla:
-        case INS_fmls:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsAnyArrangement(opt)); // no scalar encoding, use 4-operand 'fmadd' or 'fmsub'
-
-            // Vector operation
-            assert(isValidVectorDatasize(size));
-            assert(isValidArrangement(size, opt));
-            elemsize = optGetElemsize(opt);
-            assert(isValidVectorElemsizeFloat(elemsize));
-            assert(opt != INS_OPTS_1D); // Reserved encoding
-            fmt = IF_DV_3B;
-            break;
-
-        case INS_ldr:
-        case INS_ldrb:
-        case INS_ldrh:
-        case INS_ldrsb:
-        case INS_ldrsh:
-        case INS_ldrsw:
-        case INS_str:
-        case INS_strb:
-        case INS_strh:
-            emitIns_R_R_R_Ext(ins, attr, reg1, reg2, reg3, opt);
-            return;
-
-        case INS_ldp:
-        case INS_ldpsw:
-        case INS_ldnp:
-        case INS_stp:
-        case INS_stnp:
-            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0);
-            return;
-
-        case INS_stxr:
-        case INS_stxrb:
-        case INS_stxrh:
-        case INS_stlxr:
-        case INS_stlxrb:
-        case INS_stlxrh:
-            assert(isGeneralRegisterOrZR(reg1));
-            assert(isGeneralRegisterOrZR(reg2));
-            assert(isGeneralRegisterOrSP(reg3));
-            fmt = IF_LS_3D;
-            break;
-
-        case INS_casb:
-        case INS_casab:
-        case INS_casalb:
-        case INS_caslb:
-        case INS_cash:
-        case INS_casah:
-        case INS_casalh:
-        case INS_caslh:
-        case INS_cas:
-        case INS_casa:
-        case INS_casal:
-        case INS_casl:
-        case INS_ldaddb:
-        case INS_ldaddab:
-        case INS_ldaddalb:
-        case INS_ldaddlb:
-        case INS_ldaddh:
-        case INS_ldaddah:
-        case INS_ldaddalh:
-        case INS_ldaddlh:
-        case INS_ldadd:
-        case INS_ldadda:
-        case INS_ldaddal:
-        case INS_ldaddl:
-        case INS_ldclral:
-        case INS_ldsetal:
-        case INS_swpb:
-        case INS_swpab:
-        case INS_swpalb:
-        case INS_swplb:
-        case INS_swph:
-        case INS_swpah:
-        case INS_swpalh:
-        case INS_swplh:
-        case INS_swp:
-        case INS_swpa:
-        case INS_swpal:
-        case INS_swpl:
-            assert(isGeneralRegisterOrZR(reg1));
-            assert(isGeneralRegisterOrZR(reg2));
-            assert(isGeneralRegisterOrSP(reg3));
-            fmt = IF_LS_3E;
-            break;
-
-        case INS_sha256h:
-        case INS_sha256h2:
-        case INS_sha256su1:
-        case INS_sha1su0:
-        case INS_sha1c:
-        case INS_sha1p:
-        case INS_sha1m:
-            assert(isValidVectorDatasize(size));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            if (opt == INS_OPTS_NONE)
-            {
-                elemsize = EA_4BYTE;
-                opt      = optMakeArrangement(size, elemsize);
-            }
-            assert(isValidArrangement(size, opt));
-            fmt = IF_DV_3F;
-            break;
-
-        case INS_ld2:
-        case INS_ld3:
-        case INS_ld4:
-        case INS_st2:
-        case INS_st3:
-        case INS_st4:
-            assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1
-            FALLTHROUGH;
-
-        case INS_ld1:
-        case INS_ld1_2regs:
-        case INS_ld1_3regs:
-        case INS_ld1_4regs:
-        case INS_st1:
-        case INS_st1_2regs:
-        case INS_st1_3regs:
-        case INS_st1_4regs:
-        case INS_ld1r:
-        case INS_ld2r:
-        case INS_ld3r:
-        case INS_ld4r:
-            assert(isVectorRegister(reg1));
-            assert(isGeneralRegisterOrSP(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidArrangement(size, opt));
-
-            // Load/Store multiple structures       post-indexed by a register
-            // Load single structure and replicate  post-indexed by a register
-            reg2 = encodingSPtoZR(reg2);
-            fmt  = IF_LS_3F;
-            break;
-
-        case INS_addhn:
-        case INS_raddhn:
-        case INS_rsubhn:
-        case INS_subhn:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(size == EA_8BYTE);
-            assert(isValidArrangement(size, opt));
-            assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = x is reserved.
-            fmt = IF_DV_3A;
-            break;
-
-        case INS_addhn2:
-        case INS_raddhn2:
-        case INS_rsubhn2:
-        case INS_subhn2:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(size == EA_16BYTE);
-            assert(isValidArrangement(size, opt));
-            assert(opt != INS_OPTS_2D); // The encoding size = 11, Q = x is reserved.
-            fmt = IF_DV_3A;
-            break;
-
-        case INS_sabal:
-        case INS_sabdl:
-        case INS_saddl:
-        case INS_saddw:
-        case INS_smlal:
-        case INS_smlsl:
-        case INS_ssubl:
-        case INS_ssubw:
-        case INS_uabal:
-        case INS_uabdl:
-        case INS_uaddl:
-        case INS_uaddw:
-        case INS_umlal:
-        case INS_umlsl:
-        case INS_usubl:
-        case INS_usubw:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(size == EA_8BYTE);
-            assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
-            fmt = IF_DV_3A;
-            break;
-
-        case INS_sabal2:
-        case INS_sabdl2:
-        case INS_saddl2:
-        case INS_saddw2:
-        case INS_smlal2:
-        case INS_smlsl2:
-        case INS_ssubl2:
-        case INS_ssubw2:
-        case INS_umlal2:
-        case INS_umlsl2:
-        case INS_smull2:
-        case INS_uabal2:
-        case INS_uabdl2:
-        case INS_uaddl2:
-        case INS_uaddw2:
-        case INS_usubl2:
-        case INS_umull2:
-        case INS_usubw2:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(size == EA_16BYTE);
-            assert((opt == INS_OPTS_16B) || (opt == INS_OPTS_8H) || (opt == INS_OPTS_4S));
-            fmt = IF_DV_3A;
-            break;
-
-        case INS_sqdmlal:
-        case INS_sqdmlsl:
-        case INS_sqdmull:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(size == EA_8BYTE);
-                assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
-                fmt = IF_DV_3A;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert((size == EA_2BYTE) || (size == EA_4BYTE));
-                fmt = IF_DV_3E;
-            }
-            break;
-
-        case INS_sqdmulh:
-        case INS_sqrdmlah:
-        case INS_sqrdmlsh:
-        case INS_sqrdmulh:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidVectorDatasize(size));
-                elemsize = optGetElemsize(opt);
-                assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE));
-                fmt = IF_DV_3A;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert((size == EA_2BYTE) || (size == EA_4BYTE));
-                fmt = IF_DV_3E;
-            }
-            break;
-
-        case INS_sqdmlal2:
-        case INS_sqdmlsl2:
-        case INS_sqdmull2:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(size == EA_16BYTE);
-            assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S));
-            fmt = IF_DV_3A;
-            break;
-
-        case INS_pmul:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isValidArrangement(size, opt));
-            assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_16B));
-            fmt = IF_DV_3A;
-            break;
-
-        case INS_pmull:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(size == EA_8BYTE);
-            assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_1D));
-            fmt = IF_DV_3A;
-            break;
-
-        case INS_pmull2:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(size == EA_16BYTE);
-            assert((opt == INS_OPTS_16B) || (opt == INS_OPTS_2D));
-            fmt = IF_DV_3A;
-            break;
-
-        case INS_sdot:
-        case INS_udot:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(((size == EA_8BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_16BYTE) && (opt == INS_OPTS_4S)));
-            fmt = IF_DV_3A;
-            break;
-
-        case INS_sve_and:
-        case INS_sve_bic:
-        case INS_sve_eor:
-        case INS_sve_orr:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AA_3A;
-            break;
-
-        case INS_sve_add:
-        case INS_sve_sub:
-        case INS_sve_subr:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
-            {
-                assert(isVectorRegister(reg2));
-                assert(ins != INS_sve_subr);
-                fmt = IF_SVE_AT_3A;
-            }
-            else
-            {
-                assert(isLowPredicateRegister(reg2));
-                assert(insScalableOptsNone(sopt));
-                fmt = IF_SVE_AB_3A;
-            }
-            break;
-
-        case INS_sve_sdiv:
-        case INS_sve_sdivr:
-        case INS_sve_udiv:
-        case INS_sve_udivr:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableWords(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AC_3A;
-            break;
-
-        case INS_sve_sabd:
-        case INS_sve_smax:
-        case INS_sve_smin:
-        case INS_sve_uabd:
-        case INS_sve_umax:
-        case INS_sve_umin:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AD_3A;
-            break;
-
-        case INS_sve_mul:
-        case INS_sve_smulh:
-        case INS_sve_umulh:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
-            {
-                assert(isVectorRegister(reg2));
-                fmt = IF_SVE_BD_3A;
-            }
-            else
-            {
-                assert(insScalableOptsNone(sopt));
-                assert(isLowPredicateRegister(reg2));
-                fmt = IF_SVE_AE_3A;
-            }
-            break;
-
-        case INS_sve_andv:
-        case INS_sve_eorv:
-        case INS_sve_orv:
-            assert(isFloatReg(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AF_3A;
-            break;
-
-        case INS_sve_andqv:
-        case INS_sve_eorqv:
-        case INS_sve_orqv:
-            unreached(); // TODO-SVE: Not yet supported.
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AG_3A;
-            break;
-
-        case INS_sve_movprfx:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE)
-            {
-                pmerge = true;
-            }
-            fmt = IF_SVE_AH_3A;
-            break;
-
-        case INS_sve_saddv:
-        case INS_sve_uaddv:
-            assert(isFloatReg(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableWide(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AI_3A;
-            break;
-
-        case INS_sve_addqv:
-            unreached(); // TODO-SVE: Not yet supported.
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AJ_3A;
-            break;
-
-        case INS_sve_smaxv:
-        case INS_sve_sminv:
-        case INS_sve_umaxv:
-        case INS_sve_uminv:
-            assert(isFloatReg(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AK_3A;
-            break;
-
-        case INS_sve_smaxqv:
-        case INS_sve_sminqv:
-        case INS_sve_umaxqv:
-        case INS_sve_uminqv:
-            unreached(); // TODO-SVE: Not yet supported.
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AL_3A;
-            break;
-
-        case INS_sve_asrr:
-        case INS_sve_lslr:
-        case INS_sve_lsrr:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AN_3A;
-            break;
-
-        case INS_sve_asr:
-        case INS_sve_lsl:
-        case INS_sve_lsr:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg3));
-            if (sopt == INS_SCALABLE_OPTS_WIDE)
-            {
-                assert(isLowPredicateRegister(reg2));
-                assert(insOptsScalableWide(opt));
-                fmt = IF_SVE_AO_3A;
-            }
-            else if (sopt == INS_SCALABLE_OPTS_UNPREDICATED_WIDE)
-            {
-                assert(isVectorRegister(reg2));
-                assert(insOptsScalableWide(opt));
-                fmt = IF_SVE_BG_3A;
-            }
-            else
-            {
-                assert(isLowPredicateRegister(reg2));
-                assert(insScalableOptsNone(sopt));
-                assert(insOptsScalableStandard(opt));
-                fmt = IF_SVE_AN_3A;
-            }
-            break;
-
-        case INS_sve_uzp1:
-        case INS_sve_trn1:
-        case INS_sve_zip1:
-        case INS_sve_uzp2:
-        case INS_sve_trn2:
-        case INS_sve_zip2:
-            assert(insOptsScalableStandard(opt));
-            assert(isPredicateRegister(reg1)); // DDDD
-            assert(isPredicateRegister(reg2)); // NNNN
-            assert(isPredicateRegister(reg3)); // MMMM
-            fmt = IF_SVE_CI_3A;
-            break;
-
-        case INS_sve_clz:
-        case INS_sve_cls:
-        case INS_sve_cnt:
-        case INS_sve_cnot:
-        case INS_sve_not:
-        case INS_sve_nots:
-            if (isPredicateRegister(reg1) && sopt != INS_SCALABLE_OPTS_UNPREDICATED)
-            {
-                assert(opt == INS_OPTS_SCALABLE_B);
-                assert(isPredicateRegister(reg1)); // DDDD
-                assert(isPredicateRegister(reg2)); // gggg
-                assert(isPredicateRegister(reg3)); // NNNN
-                fmt = IF_SVE_CZ_4A;
-            }
-            else
-            {
-                assert(isVectorRegister(reg1));
-                assert(isLowPredicateRegister(reg2));
-                assert(isVectorRegister(reg3));
-                assert(insOptsScalableStandard(opt));
-                assert(insScalableOptsNone(sopt));
-                fmt = IF_SVE_AP_3A;
-            }
-            break;
-
-        case INS_sve_fabs:
-        case INS_sve_fneg:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableFloat(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AP_3A;
-            break;
-
-        case INS_sve_abs:
-        case INS_sve_neg:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AQ_3A;
-            break;
-
-        case INS_sve_sxtb:
-        case INS_sve_uxtb:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AQ_3A;
-            break;
-
-        case INS_sve_sxth:
-        case INS_sve_uxth:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableWords(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AQ_3A;
-            break;
-
-        case INS_sve_sxtw:
-        case INS_sve_uxtw:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(opt == INS_OPTS_SCALABLE_D);
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_AQ_3A;
-            break;
-
-        case INS_sve_index:
-            assert(isValidScalarDatasize(size));
-            assert(isVectorRegister(reg1));
-            assert(isGeneralRegisterOrZR(reg2));
-            assert(isGeneralRegisterOrZR(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_BA_3A;
-            break;
-
-        case INS_sve_sqdmulh:
-        case INS_sve_sqrdmulh:
-            assert(isScalableVectorSize(size));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_BE_3A;
-            break;
-
-        case INS_sve_ftssel:
-            assert(isScalableVectorSize(size));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableFloat(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_BK_3A;
-            break;
-
-        case INS_sve_compact:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableWords(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_CL_3A;
-            break;
-
-        case INS_sve_clasta:
-        case INS_sve_clastb:
-            assert(insOptsScalableStandard(opt));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            if (isGeneralRegister(reg1))
-            {
-                assert(insScalableOptsNone(sopt));
-                assert(isValidScalarDatasize(size));
-                fmt = IF_SVE_CO_3A;
-            }
-            else if (sopt == INS_SCALABLE_OPTS_WITH_SIMD_SCALAR)
-            {
-                assert(isFloatReg(reg1));
-                assert(isValidVectorElemsize(size));
-                fmt = IF_SVE_CN_3A;
-            }
-            else
-            {
-                assert(insScalableOptsNone(sopt));
-                assert(isVectorRegister(reg1));
-                fmt = IF_SVE_CM_3A;
-            }
-            break;
-
-        case INS_sve_cpy:
-        case INS_sve_mov:
-            assert(insOptsScalableStandard(opt));
-            // TODO-SVE: Following checks can be simplified to check reg1 as predicate register only after adding
-            // definitions for predicate registers. Currently, predicate registers P0 to P15 are aliased to simd
-            // registers V0 to V15.
-            if (isPredicateRegister(reg3) &&
-                (sopt == INS_SCALABLE_OPTS_NONE || sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE))
-            {
-                assert(opt == INS_OPTS_SCALABLE_B);
-                assert(isPredicateRegister(reg1)); // DDDD
-                assert(isPredicateRegister(reg2)); // gggg
-                assert(isPredicateRegister(reg3)); // NNNN
-                fmt = sopt == INS_SCALABLE_OPTS_NONE ? IF_SVE_CZ_4A : IF_SVE_CZ_4A_K;
-            }
-            else
-            {
-                assert(isVectorRegister(reg1));
-                assert(isLowPredicateRegister(reg2));
-                if (isGeneralRegisterOrSP(reg3))
-                {
-                    assert(insScalableOptsNone(sopt));
-                    fmt  = IF_SVE_CQ_3A;
-                    reg3 = encodingSPtoZR(reg3);
-                }
-                else
-                {
-                    assert(sopt == INS_SCALABLE_OPTS_WITH_SIMD_SCALAR);
-                    assert(isVectorRegister(reg3));
-                    fmt = IF_SVE_CP_3A;
-                }
-            }
-
-            // MOV is an alias for CPY, and is always the preferred disassembly.
-            ins = INS_sve_mov;
-            break;
-
-        case INS_sve_lasta:
-        case INS_sve_lastb:
-            assert(insOptsScalableStandard(opt));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            if (isGeneralRegister(reg1))
-            {
-                assert(insScalableOptsNone(sopt));
-                assert(isGeneralRegister(reg1));
-                fmt = IF_SVE_CS_3A;
-            }
-            else if (sopt == INS_SCALABLE_OPTS_WITH_SIMD_SCALAR)
-            {
-                assert(isVectorRegister(reg1));
-                fmt = IF_SVE_CR_3A;
-            }
-            break;
-
-        case INS_sve_revd:
-            assert(isVectorRegister(reg1));       // ddddd
-            assert(isLowPredicateRegister(reg2)); // ggg
-            assert(isVectorRegister(reg3));       // nnnnn
-            fmt = IF_SVE_CT_3A;
-            break;
-
-        case INS_sve_rbit:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_CU_3A;
-            break;
-
-        case INS_sve_revb:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_CU_3A;
-            break;
-
-        case INS_sve_revh:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableWords(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_CU_3A;
-            break;
-
-        case INS_sve_revw:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(opt == INS_OPTS_SCALABLE_D);
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_CU_3A;
-            break;
-
-        case INS_sve_splice:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            fmt = (sopt == INS_SCALABLE_OPTS_WITH_VECTOR_PAIR) ? IF_SVE_CV_3A : IF_SVE_CV_3B;
-            break;
-
-        case INS_sve_brka:
-        case INS_sve_brkb:
-            assert(isPredicateRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isPredicateRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE)
-            {
-                pmerge = true;
-            }
-            fmt = IF_SVE_DB_3A;
-            break;
-
-        case INS_sve_brkas:
-        case INS_sve_brkbs:
-            assert(opt == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isPredicateRegister(reg3));
-            fmt = IF_SVE_DB_3B;
-            break;
-
-        case INS_sve_brkn:
-        case INS_sve_brkns:
-            assert(insOptsScalable(opt));
-            assert(isPredicateRegister(reg1)); // MMMM
-            assert(isPredicateRegister(reg2)); // gggg
-            assert(isPredicateRegister(reg3)); // NNNN
-            fmt = IF_SVE_DC_3A;
-            break;
-
-        case INS_sve_cntp:
-            assert(size == EA_8BYTE);
-            assert(isGeneralRegister(reg1));                       // ddddd
-            assert(isPredicateRegister(reg2));                     // gggg
-            assert(isPredicateRegister(reg3));                     // NNNN
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-            fmt = IF_SVE_DK_3A;
-            break;
-
-        case INS_sve_shadd:
-        case INS_sve_shsub:
-        case INS_sve_shsubr:
-        case INS_sve_srhadd:
-        case INS_sve_uhadd:
-        case INS_sve_uhsub:
-        case INS_sve_uhsubr:
-        case INS_sve_urhadd:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_EP_3A;
-            break;
-
-        case INS_sve_sadalp:
-        case INS_sve_uadalp:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_EQ_3A;
-            break;
-
-        case INS_sve_addp:
-        case INS_sve_smaxp:
-        case INS_sve_sminp:
-        case INS_sve_umaxp:
-        case INS_sve_uminp:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_ER_3A;
-            break;
-
-        case INS_sve_sqabs:
-        case INS_sve_sqneg:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_ES_3A;
-            break;
-
-        case INS_sve_urecpe:
-        case INS_sve_ursqrte:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(opt == INS_OPTS_SCALABLE_S);
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_ES_3A;
-            break;
-
-        case INS_sve_sqadd:
-        case INS_sve_sqsub:
-        case INS_sve_uqadd:
-        case INS_sve_uqsub:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(isScalableVectorSize(size));
-            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
-            {
-                assert(isVectorRegister(reg2));
-                fmt = IF_SVE_AT_3A;
-            }
-            else
-            {
-                assert(insScalableOptsNone(sopt));
-                assert(isLowPredicateRegister(reg2));
-                fmt = IF_SVE_ET_3A;
-            }
-            break;
-
-        case INS_sve_sqsubr:
-        case INS_sve_suqadd:
-        case INS_sve_uqsubr:
-        case INS_sve_usqadd:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            assert(isScalableVectorSize(size));
-            fmt = IF_SVE_ET_3A;
-            break;
-
-        case INS_sve_sqrshl:
-        case INS_sve_sqrshlr:
-        case INS_sve_sqshl:
-        case INS_sve_sqshlr:
-        case INS_sve_srshl:
-        case INS_sve_srshlr:
-        case INS_sve_uqrshl:
-        case INS_sve_uqrshlr:
-        case INS_sve_uqshl:
-        case INS_sve_uqshlr:
-        case INS_sve_urshl:
-        case INS_sve_urshlr:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableStandard(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_EU_3A;
-            break;
-
-        case INS_sve_faddp:
-        case INS_sve_fmaxnmp:
-        case INS_sve_fmaxp:
-        case INS_sve_fminnmp:
-        case INS_sve_fminp:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableFloat(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_GR_3A;
-            break;
-
-        case INS_sve_faddqv:
-        case INS_sve_fmaxnmqv:
-        case INS_sve_fminnmqv:
-        case INS_sve_fmaxqv:
-        case INS_sve_fminqv:
-            unreached(); // TODO-SVE: Not yet supported.
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableFloat(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_GS_3A;
-            break;
-
-        case INS_sve_fmaxnmv:
-        case INS_sve_fmaxv:
-        case INS_sve_fminnmv:
-        case INS_sve_fminv:
-        case INS_sve_faddv:
-            assert(isFloatReg(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableFloat(opt));
-            assert(isValidVectorElemsizeSveFloat(size));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_HE_3A;
-            break;
-
-        case INS_sve_fadda:
-            assert(isFloatReg(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableFloat(opt));
-            assert(isValidVectorElemsizeSveFloat(size));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_HJ_3A;
-            break;
-
-        case INS_sve_fabd:
-        case INS_sve_fadd:
-        case INS_sve_fdiv:
-        case INS_sve_fdivr:
-        case INS_sve_fmax:
-        case INS_sve_fmaxnm:
-        case INS_sve_fmin:
-        case INS_sve_fminnm:
-        case INS_sve_fmul:
-        case INS_sve_fmulx:
-        case INS_sve_fscale:
-        case INS_sve_fsub:
-        case INS_sve_fsubr:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableFloat(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_HL_3A;
-            break;
-
-        case INS_sve_famax:
-        case INS_sve_famin:
-            unreached(); // TODO-SVE: Not yet supported.
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableFloat(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_HL_3A;
-            break;
-
-        case INS_sve_frintn:
-        case INS_sve_frintm:
-        case INS_sve_frintp:
-        case INS_sve_frintz:
-        case INS_sve_frinta:
-        case INS_sve_frintx:
-        case INS_sve_frinti:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableFloat(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_HQ_3A;
-            break;
-
-        case INS_sve_frecpx:
-        case INS_sve_fsqrt:
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(insOptsScalableFloat(opt));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_HR_3A;
-            break;
-
-        case INS_sve_whilege:
-        case INS_sve_whilegt:
-        case INS_sve_whilelt:
-        case INS_sve_whilele:
-        case INS_sve_whilehs:
-        case INS_sve_whilehi:
-        case INS_sve_whilelo:
-        case INS_sve_whilels:
-            assert(isGeneralRegister(reg2));                       // nnnnn
-            assert(isGeneralRegister(reg3));                       // mmmmm
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-            assert(insOptsScalableStandard(opt));
-
-            if (insScalableOptsNone(sopt))
-            {
-                assert(isPredicateRegister(reg1));    // DDDD
-                assert(isValidGeneralDatasize(size)); // X
-                fmt = IF_SVE_DT_3A;
-            }
-            else if (insScalableOptsWithPredicatePair(sopt))
-            {
-                assert(isLowPredicateRegister(reg1)); // DDD
-                assert(size == EA_8BYTE);
-                fmt = IF_SVE_DX_3A;
-            }
-            else
-            {
-                assert(insScalableOptsWithVectorLength(sopt)); // l
-                assert(isHighPredicateRegister(reg1));         // DDD
-                assert(size == EA_8BYTE);
-                vectorLength4x = (sopt == INS_SCALABLE_OPTS_VL_4X);
-                fmt            = IF_SVE_DY_3A;
-            }
-            break;
-
-        case INS_sve_whilewr:
-        case INS_sve_whilerw:
-            assert(insOptsScalableStandard(opt));
-            assert(isPredicateRegister(reg1)); // DDDD
-            assert(isGeneralRegister(reg2));   // nnnnn
-            assert(size == EA_8BYTE);
-            assert(isGeneralRegister(reg3));                       // mmmmm
-            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_DU_3A;
-            break;
-
-        case INS_sve_movs:
-            assert(insOptsScalable(opt));
-            assert(isPredicateRegister(reg1)); // DDDD
-            assert(isPredicateRegister(reg2)); // gggg
-            assert(isPredicateRegister(reg3)); // NNNN
-            fmt = IF_SVE_CZ_4A;
-            break;
-
-        case INS_sve_fcmeq:
-        case INS_sve_fcmge:
-        case INS_sve_fcmgt:
-        case INS_sve_fcmlt:
-        case INS_sve_fcmle:
-        case INS_sve_fcmne:
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(insScalableOptsNone(sopt));
-            assert(isPredicateRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isScalableVectorSize(size));
-            fmt = IF_SVE_HI_3A;
-            break;
-
-        case INS_sve_flogb:
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(insScalableOptsNone(sopt));
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isScalableVectorSize(size));
-            fmt = IF_SVE_HP_3A;
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstr(attr);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-    id->idReg3(reg3);
-
-    if (pmerge)
-    {
-        id->idPredicateReg2Merge(pmerge);
-    }
-    else if (vectorLength4x)
-    {
-        id->idVectorLength4x(vectorLength4x);
-    }
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-//-----------------------------------------------------------------------------------
-// emitIns_R_R_R_I_LdStPair: Add an instruction storing 2 registers into a memory
-//                     (pointed by reg3) and the offset (immediate).
-//
-// Arguments:
-//     ins      - The instruction code
-//     attr     - The emit attribute for register 1
-//     attr2    - The emit attribute for register 2
-//     reg1     - Register 1
-//     reg2     - Register 2
-//     reg3     - Register 3
-//     imm      - Immediate offset, prior to scaling by operand size
-//     varx1    - LclVar number 1
-//     varx2    - LclVar number 2
-//     offs1    - Memory offset of lclvar number 1
-//     offs2    - Memory offset of lclvar number 2
-//
-void emitter::emitIns_R_R_R_I_LdStPair(instruction ins,
-                                       emitAttr    attr,
-                                       emitAttr    attr2,
-                                       regNumber   reg1,
-                                       regNumber   reg2,
-                                       regNumber   reg3,
-                                       ssize_t     imm,
-                                       int         varx1,
-                                       int         varx2,
-                                       int         offs1,
-                                       int offs2 DEBUG_ARG(unsigned var1RefsOffs) DEBUG_ARG(unsigned var2RefsOffs))
-{
-    assert((ins == INS_stp) || (ins == INS_ldp));
-    emitAttr  size  = EA_SIZE(attr);
-    insFormat fmt   = IF_NONE;
-    unsigned  scale = 0;
-
-    // Is the target a vector register?
-    if (isVectorRegister(reg1))
-    {
-        assert(isValidVectorLSPDatasize(size));
-        assert(isVectorRegister(reg2));
-
-        scale = NaturalScale_helper(size);
-        assert((scale >= 2) && (scale <= 4));
-    }
-    else
-    {
-        assert(isValidGeneralDatasize(size));
-        assert(isGeneralRegisterOrZR(reg2));
-        scale = (size == EA_8BYTE) ? 3 : 2;
-    }
-
-    reg3 = encodingSPtoZR(reg3);
-
-    fmt          = IF_LS_3C;
-    ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
-    if (imm == 0)
-    {
-        fmt = IF_LS_3B;
-    }
-    else
-    {
-        if ((imm & mask) == 0)
-        {
-            imm >>= scale; // The immediate is scaled by the size of the ld/st
-        }
-        else
-        {
-            // Unlike emitIns_S_S_R_R(), we would never come here when
-            // (imm & mask) != 0.
-            unreached();
-        }
-    }
-
-    bool validVar1 = varx1 != -1;
-    bool validVar2 = varx2 != -1;
-
-    instrDesc* id;
-
-    if (validVar1 && validVar2)
-    {
-        id = emitNewInstrLclVarPair(attr, imm);
-        id->idAddr()->iiaLclVar.initLclVarAddr(varx1, offs1);
-        id->idSetIsLclVar();
-
-        emitGetLclVarPairLclVar2(id)->initLclVarAddr(varx2, offs2);
-    }
-    else
-    {
-        id = emitNewInstrCns(attr, imm);
-        if (validVar1)
-        {
-            id->idAddr()->iiaLclVar.initLclVarAddr(varx1, offs1);
-            id->idSetIsLclVar();
-        }
-        if (validVar2)
-        {
-            id->idAddr()->iiaLclVar.initLclVarAddr(varx2, offs2);
-            id->idSetIsLclVar();
-        }
-    }
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-    id->idReg3(reg3);
-
-    // Record the attribute for the second register in the pair
-    if (EA_IS_GCREF(attr2))
-    {
-        id->idGCrefReg2(GCT_GCREF);
-    }
-    else if (EA_IS_BYREF(attr2))
-    {
-        id->idGCrefReg2(GCT_BYREF);
-    }
-    else
-    {
-        id->idGCrefReg2(GCT_NONE);
-    }
-
-#ifdef DEBUG
-    id->idDebugOnlyInfo()->idVarRefOffs  = var1RefsOffs;
-    id->idDebugOnlyInfo()->idVarRefOffs2 = var2RefsOffs;
-#endif
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing three registers and a constant.
- */
-
-void emitter::emitIns_R_R_R_I(instruction ins,
-                              emitAttr    attr,
-                              regNumber   reg1,
-                              regNumber   reg2,
-                              regNumber   reg3,
-                              ssize_t     imm,
-                              insOpts     opt /* = INS_OPTS_NONE */,
-                              emitAttr    attrReg2 /* = EA_UNKNOWN */)
-{
-    emitAttr  size     = EA_SIZE(attr);
-    emitAttr  elemsize = EA_UNKNOWN;
-    insFormat fmt      = IF_NONE;
-    bool      isLdSt   = false;
-    bool      isSIMD   = false;
-    bool      isAddSub = false;
-    bool      setFlags = false;
-    unsigned  scale    = 0;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_extr:
-            assert(insOptsNone(opt));
-            assert(isValidGeneralDatasize(size));
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidImmShift(imm, size));
-            fmt = IF_DR_3E;
-            break;
-
-        case INS_and:
-        case INS_ands:
-        case INS_eor:
-        case INS_orr:
-        case INS_bic:
-        case INS_bics:
-        case INS_eon:
-        case INS_orn:
-            assert(isValidGeneralDatasize(size));
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidImmShift(imm, size));
-            if (imm == 0)
-            {
-                assert(insOptsNone(opt)); // a zero imm, means no shift kind
-                fmt = IF_DR_3A;
-            }
-            else
-            {
-                assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind
-                fmt = IF_DR_3B;
-            }
-            break;
-
-        case INS_sve_cmpeq:
-        case INS_sve_cmpgt:
-        case INS_sve_cmpge:
-        case INS_sve_cmpne:
-        case INS_sve_cmple:
-        case INS_sve_cmplt:
-            assert(insOptsScalableStandard(opt));
-            assert(isPredicateRegister(reg1));    // DDDD
-            assert(isLowPredicateRegister(reg2)); // ggg
-            assert(isVectorRegister(reg3));       // nnnnn
-            assert(isValidSimm5(imm));            // iiiii
-            fmt = IF_SVE_CY_3A;
-            break;
-
-        case INS_sve_cmphi:
-        case INS_sve_cmphs:
-        case INS_sve_cmplo:
-        case INS_sve_cmpls:
-            assert(insOptsScalableStandard(opt));
-            assert(isPredicateRegister(reg1));    // DDDD
-            assert(isLowPredicateRegister(reg2)); // ggg
-            assert(isVectorRegister(reg3));       // nnnnn
-            assert(isValidUimm7(imm));            // iiiii
-            fmt = IF_SVE_CY_3B;
-            break;
-
-        case INS_sve_sdot:
-        case INS_sve_udot:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3)); // mmm
-            assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
-            assert(isValidUimm2(imm)); // ii
-
-            if (opt == INS_OPTS_SCALABLE_B)
-            {
-                fmt = IF_SVE_EY_3A;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_H);
-                fmt = IF_SVE_EG_3A;
-            }
-            break;
-
-        case INS_sve_usdot:
-        case INS_sve_sudot:
-            assert(opt == INS_OPTS_SCALABLE_B);
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3)); // mmm
-            assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
-            assert(isValidUimm2(imm)); // ii
-            fmt = IF_SVE_EZ_3A;
-            break;
-
-        case INS_sve_mul:
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3));
-
-            switch (opt)
-            {
-                case INS_OPTS_SCALABLE_H:
-                    assert(isValidUimm3(imm));                    // iii
-                    assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
-                    fmt = IF_SVE_FD_3A;
-                    break;
-
-                case INS_OPTS_SCALABLE_S:
-                    assert(isValidUimm2(imm));                    // ii
-                    assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
-                    fmt = IF_SVE_FD_3B;
-                    break;
-
-                case INS_OPTS_SCALABLE_D:
-                    assert(isValidImm1(imm));                      // i
-                    assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm
-                    fmt = IF_SVE_FD_3C;
-                    break;
-
-                default:
-                    unreached();
-                    break;
-            }
-            break;
-
-        case INS_fmul: // by element, imm[0..3] selects the element of reg3
-        case INS_fmla:
-        case INS_fmls:
-        case INS_fmulx:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidVectorDatasize(size));
-                assert(isValidArrangement(size, opt));
-                elemsize = optGetElemsize(opt);
-                assert(isValidVectorElemsizeFloat(elemsize));
-                assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-                assert(opt != INS_OPTS_1D); // Reserved encoding
-                fmt = IF_DV_3BI;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert(isValidScalarDatasize(size));
-                elemsize = size;
-                assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-                fmt = IF_DV_3DI;
-            }
-            break;
-
-        case INS_mul: // by element, imm[0..7] selects the element of reg3
-        case INS_mla:
-        case INS_mls:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            // Vector operation
-            assert(insOptsAnyArrangement(opt));
-            assert(isValidVectorDatasize(size));
-            assert(isValidArrangement(size, opt));
-            elemsize = optGetElemsize(opt);
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-            // Only has encodings for H or S elemsize
-            assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE));
-            // Only has encodings for V0..V15
-            if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0))
-            {
-                noway_assert(!"Invalid reg3");
-            }
-            fmt = IF_DV_3AI;
-            break;
-
-        case INS_add:
-        case INS_sub:
-            setFlags = false;
-            isAddSub = true;
-            break;
-
-        case INS_adds:
-        case INS_subs:
-            setFlags = true;
-            isAddSub = true;
-            break;
-
-        case INS_ldpsw:
-            scale  = 2;
-            isLdSt = true;
-            break;
-
-        case INS_ldnp:
-        case INS_stnp:
-            assert(insOptsNone(opt)); // Can't use Pre/Post index on these two instructions
-            FALLTHROUGH;
-
-        case INS_ldp:
-        case INS_stp:
-            // Is the target a vector register?
-            if (isVectorRegister(reg1))
-            {
-                scale  = NaturalScale_helper(size);
-                isSIMD = true;
-            }
-            else
-            {
-                scale = (size == EA_8BYTE) ? 3 : 2;
-            }
-            isLdSt = true;
-            fmt    = IF_LS_3C;
-            break;
-
-        case INS_ld1:
-        case INS_ld2:
-        case INS_ld3:
-        case INS_ld4:
-        case INS_st1:
-        case INS_st2:
-        case INS_st3:
-        case INS_st4:
-            assert(isVectorRegister(reg1));
-            assert(isGeneralRegisterOrSP(reg2));
-            assert(isGeneralRegister(reg3));
-
-            assert(insOptsPostIndex(opt));
-
-            elemsize = size;
-            assert(isValidVectorElemsize(elemsize));
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-
-            // Load/Store single structure  post-indexed by a register
-            reg2 = encodingSPtoZR(reg2);
-            fmt  = IF_LS_3G;
-            break;
-
-        case INS_ext:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isValidVectorDatasize(size));
-            assert(isValidArrangement(size, opt));
-            assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_16B));
-            assert(isValidVectorIndex(size, EA_1BYTE, imm));
-            fmt = IF_DV_3G;
-            break;
-
-        case INS_smlal:
-        case INS_smlsl:
-        case INS_smull:
-        case INS_umlal:
-        case INS_umlsl:
-        case INS_umull:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(size == EA_8BYTE);
-            assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
-            elemsize = optGetElemsize(opt);
-            // Restricted to V0-V15 when element size is H.
-            if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0))
-            {
-                assert(!"Invalid reg3");
-            }
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-            fmt = IF_DV_3AI;
-            break;
-
-        case INS_sqdmlal:
-        case INS_sqdmlsl:
-        case INS_sqdmull:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(size == EA_8BYTE);
-                assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
-                elemsize = optGetElemsize(opt);
-                fmt      = IF_DV_3AI;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert((size == EA_2BYTE) || (size == EA_4BYTE));
-                elemsize = size;
-                fmt      = IF_DV_3EI;
-            }
-            // Restricted to V0-V15 when element size is H.
-            if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0))
-            {
-                assert(!"Invalid reg3");
-            }
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-            break;
-
-        case INS_sqdmulh:
-        case INS_sqrdmlah:
-        case INS_sqrdmlsh:
-        case INS_sqrdmulh:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            if (insOptsAnyArrangement(opt))
-            {
-                // Vector operation
-                assert(isValidVectorDatasize(size));
-                elemsize = optGetElemsize(opt);
-                assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE));
-                fmt = IF_DV_3AI;
-            }
-            else
-            {
-                // Scalar operation
-                assert(insOptsNone(opt));
-                assert((size == EA_2BYTE) || (size == EA_4BYTE));
-                elemsize = size;
-                fmt      = IF_DV_3EI;
-            }
-            // Restricted to V0-V15 when element size is H.
-            if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0))
-            {
-                assert(!"Invalid reg3");
-            }
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-            break;
-
-        case INS_smlal2:
-        case INS_smlsl2:
-        case INS_smull2:
-        case INS_sqdmlal2:
-        case INS_sqdmlsl2:
-        case INS_sqdmull2:
-        case INS_umlal2:
-        case INS_umlsl2:
-        case INS_umull2:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(size == EA_16BYTE);
-            assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S));
-            elemsize = optGetElemsize(opt);
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-            // Restricted to V0-V15 when element size is H
-            if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0))
-            {
-                assert(!"Invalid reg3");
-            }
-            fmt = IF_DV_3AI;
-            break;
-
-        case INS_sdot:
-        case INS_udot:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(((size == EA_8BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_16BYTE) && (opt == INS_OPTS_4S)));
-            assert(isValidVectorIndex(EA_16BYTE, EA_4BYTE, imm));
-            fmt = IF_DV_3AI;
-            break;
-
-        case INS_sve_cdot:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3)); // mmm
-            if (opt == INS_OPTS_SCALABLE_B)
-            {
-                assert(isValidUimm4(imm)); // ii rr
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
-                fmt = IF_SVE_FA_3A;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_H);
-                assert(isValidUimm3(imm)); // i rr
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V15));
-                fmt = IF_SVE_FA_3B;
-            }
-            break;
-
-        case INS_sve_cmla:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3)); // mmm
-            if (opt == INS_OPTS_SCALABLE_H)
-            {
-                assert(isValidUimm4(imm)); // ii rr
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
-                fmt = IF_SVE_FB_3A;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_S);
-                assert(isValidUimm3(imm)); // i rr
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V15));
-                fmt = IF_SVE_FB_3B;
-            }
-            break;
-
-        case INS_sve_sqrdcmlah:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3)); // mmm
-            if (opt == INS_OPTS_SCALABLE_H)
-            {
-                assert(isValidUimm4(imm)); // ii rr
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
-                fmt = IF_SVE_FC_3A;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_S);
-                assert(isValidUimm3(imm)); // i rr
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V15));
-                fmt = IF_SVE_FC_3B;
-            }
-            break;
-
-        case INS_sve_ld1d:
-            assert(insOptsScalable(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-            if (opt == INS_OPTS_SCALABLE_Q)
-            {
-                fmt = IF_SVE_IH_3A_A;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_D);
-                fmt = IF_SVE_IH_3A;
-            }
-            break;
-
-        case INS_sve_ld1w:
-            assert(insOptsScalableWordsOrQuadwords(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-            fmt = IF_SVE_IH_3A_F;
-            break;
-
-        case INS_sve_ld1sw:
-            assert(opt == INS_OPTS_SCALABLE_D);
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-            fmt = IF_SVE_IJ_3A;
-            break;
-
-        case INS_sve_ld1sb:
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-            fmt = IF_SVE_IJ_3A_D;
-            break;
-
-        case INS_sve_ld1b:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-            fmt = IF_SVE_IJ_3A_E;
-            break;
-
-        case INS_sve_ld1sh:
-            assert(insOptsScalableWords(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-            fmt = IF_SVE_IJ_3A_F;
-            break;
-
-        case INS_sve_ld1h:
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-            fmt = IF_SVE_IJ_3A_G;
-            break;
-
-        case INS_sve_ldnf1sw:
-        case INS_sve_ldnf1d:
-            assert(opt == INS_OPTS_SCALABLE_D);
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-            fmt = IF_SVE_IL_3A;
-            break;
-
-        case INS_sve_ldnf1sh:
-        case INS_sve_ldnf1w:
-            assert(insOptsScalableWords(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-            fmt = IF_SVE_IL_3A_A;
-            break;
-
-        case INS_sve_ldnf1h:
-        case INS_sve_ldnf1sb:
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-            fmt = IF_SVE_IL_3A_B;
-            break;
-
-        case INS_sve_ldnf1b:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-            fmt = IF_SVE_IL_3A_C;
-            break;
-
-        case INS_sve_ldnt1b:
-        case INS_sve_ldnt1h:
-        case INS_sve_ldnt1w:
-        case INS_sve_ldnt1d:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-
-#ifdef DEBUG
-            switch (ins)
-            {
-                case INS_sve_ldnt1b:
-                    assert(opt == INS_OPTS_SCALABLE_B);
-                    break;
-
-                case INS_sve_ldnt1h:
-                    assert(opt == INS_OPTS_SCALABLE_H);
-                    break;
-
-                case INS_sve_ldnt1w:
-                    assert(opt == INS_OPTS_SCALABLE_S);
-                    break;
-
-                case INS_sve_ldnt1d:
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-#endif // DEBUG
-
-            fmt = IF_SVE_IM_3A;
-            break;
-
-        case INS_sve_ld1rqb:
-        case INS_sve_ld1rob:
-        case INS_sve_ld1rqh:
-        case INS_sve_ld1roh:
-        case INS_sve_ld1rqw:
-        case INS_sve_ld1row:
-        case INS_sve_ld1rqd:
-        case INS_sve_ld1rod:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-
-#ifdef DEBUG
-            switch (ins)
-            {
-                case INS_sve_ld1rqb:
-                case INS_sve_ld1rqd:
-                case INS_sve_ld1rqh:
-                case INS_sve_ld1rqw:
-                    assert(isValidSimm4_MultipleOf16(imm));
-                    break;
-
-                case INS_sve_ld1rob:
-                case INS_sve_ld1rod:
-                case INS_sve_ld1roh:
-                case INS_sve_ld1row:
-                    assert(isValidSimm4_MultipleOf32(imm));
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-
-            switch (ins)
-            {
-                case INS_sve_ld1rqb:
-                case INS_sve_ld1rob:
-                    assert(opt == INS_OPTS_SCALABLE_B);
-                    break;
-
-                case INS_sve_ld1rqh:
-                case INS_sve_ld1roh:
-                    assert(opt == INS_OPTS_SCALABLE_H);
-                    break;
-
-                case INS_sve_ld1rqw:
-                case INS_sve_ld1row:
-                    assert(opt == INS_OPTS_SCALABLE_S);
-                    break;
-
-                case INS_sve_ld1rqd:
-                case INS_sve_ld1rod:
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-#endif // DEBUG
-
-            fmt = IF_SVE_IO_3A;
-            break;
-
-        case INS_sve_ld2q:
-        case INS_sve_ld3q:
-        case INS_sve_ld4q:
-            assert(opt == INS_OPTS_SCALABLE_Q);
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-
-#ifdef DEBUG
-            switch (ins)
-            {
-                case INS_sve_ld2q:
-                    assert(isValidSimm4_MultipleOf2(imm));
-                    break;
-
-                case INS_sve_ld3q:
-                    assert(isValidSimm4_MultipleOf3(imm));
-                    break;
-
-                case INS_sve_ld4q:
-                    assert(isValidSimm4_MultipleOf4(imm));
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-#endif // DEBUG
-
-            fmt = IF_SVE_IQ_3A;
-            break;
-
-        case INS_sve_ld2b:
-        case INS_sve_ld3b:
-        case INS_sve_ld4b:
-        case INS_sve_ld2h:
-        case INS_sve_ld3h:
-        case INS_sve_ld4h:
-        case INS_sve_ld2w:
-        case INS_sve_ld3w:
-        case INS_sve_ld4w:
-        case INS_sve_ld2d:
-        case INS_sve_ld3d:
-        case INS_sve_ld4d:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-
-#ifdef DEBUG
-            switch (ins)
-            {
-                case INS_sve_ld2b:
-                case INS_sve_ld2h:
-                case INS_sve_ld2w:
-                case INS_sve_ld2d:
-                    assert(isValidSimm4_MultipleOf2(imm));
-                    break;
-
-                case INS_sve_ld3b:
-                case INS_sve_ld3h:
-                case INS_sve_ld3w:
-                case INS_sve_ld3d:
-                    assert(isValidSimm4_MultipleOf3(imm));
-                    break;
-
-                case INS_sve_ld4b:
-                case INS_sve_ld4h:
-                case INS_sve_ld4w:
-                case INS_sve_ld4d:
-                    assert(isValidSimm4_MultipleOf4(imm));
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-
-            switch (ins)
-            {
-                case INS_sve_ld2b:
-                case INS_sve_ld3b:
-                case INS_sve_ld4b:
-                    assert(opt == INS_OPTS_SCALABLE_B);
-                    break;
-
-                case INS_sve_ld2h:
-                case INS_sve_ld3h:
-                case INS_sve_ld4h:
-                    assert(opt == INS_OPTS_SCALABLE_H);
-                    break;
-
-                case INS_sve_ld2w:
-                case INS_sve_ld3w:
-                case INS_sve_ld4w:
-                    assert(opt == INS_OPTS_SCALABLE_S);
-                    break;
-
-                case INS_sve_ld2d:
-                case INS_sve_ld3d:
-                case INS_sve_ld4d:
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-#endif // DEBUG
-
-            fmt = IF_SVE_IS_3A;
-            break;
-
-        case INS_sve_st2q:
-        case INS_sve_st3q:
-        case INS_sve_st4q:
-            assert(opt == INS_OPTS_SCALABLE_Q);
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-
-#ifdef DEBUG
-            switch (ins)
-            {
-                case INS_sve_st2q:
-                    assert(isValidSimm4_MultipleOf2(imm));
-                    break;
-
-                case INS_sve_st3q:
-                    assert(isValidSimm4_MultipleOf3(imm));
-                    break;
-
-                case INS_sve_st4q:
-                    assert(isValidSimm4_MultipleOf4(imm));
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-#endif // DEBUG
-
-            fmt = IF_SVE_JE_3A;
-            break;
-
-        case INS_sve_stnt1b:
-        case INS_sve_stnt1h:
-        case INS_sve_stnt1w:
-        case INS_sve_stnt1d:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-
-#ifdef DEBUG
-            switch (ins)
-            {
-                case INS_sve_stnt1b:
-                    assert(opt == INS_OPTS_SCALABLE_B);
-                    break;
-
-                case INS_sve_stnt1h:
-                    assert(opt == INS_OPTS_SCALABLE_H);
-                    break;
-
-                case INS_sve_stnt1w:
-                    assert(opt == INS_OPTS_SCALABLE_S);
-                    break;
-
-                case INS_sve_stnt1d:
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-#endif // DEBUG
-
-            fmt = IF_SVE_JM_3A;
-            break;
-
-        case INS_sve_st1w:
-        case INS_sve_st1d:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-
-            if (opt == INS_OPTS_SCALABLE_Q && (ins == INS_sve_st1d))
-            {
-                fmt = IF_SVE_JN_3C_D;
-            }
-            else
-            {
-                if ((ins == INS_sve_st1w) && insOptsScalableWords(opt))
-                {
-                    fmt = IF_SVE_JN_3B;
-                }
-                else
-                {
-#if DEBUG
-                    if (ins == INS_sve_st1w)
-                    {
-                        assert(opt == INS_OPTS_SCALABLE_Q);
-                    }
-                    else
-                    {
-                        assert(opt == INS_OPTS_SCALABLE_D);
-                    }
-#endif // DEBUG
-                    fmt = IF_SVE_JN_3C;
-                }
-            }
-            break;
-
-        case INS_sve_st2b:
-        case INS_sve_st3b:
-        case INS_sve_st4b:
-        case INS_sve_st2h:
-        case INS_sve_st3h:
-        case INS_sve_st4h:
-        case INS_sve_st2w:
-        case INS_sve_st3w:
-        case INS_sve_st4w:
-        case INS_sve_st2d:
-        case INS_sve_st3d:
-        case INS_sve_st4d:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-
-#ifdef DEBUG
-            switch (ins)
-            {
-                case INS_sve_st2b:
-                case INS_sve_st2h:
-                case INS_sve_st2w:
-                case INS_sve_st2d:
-                    assert(isValidSimm4_MultipleOf2(imm));
-                    break;
-
-                case INS_sve_st3b:
-                case INS_sve_st3h:
-                case INS_sve_st3w:
-                case INS_sve_st3d:
-                    assert(isValidSimm4_MultipleOf3(imm));
-                    break;
-
-                case INS_sve_st4b:
-                case INS_sve_st4h:
-                case INS_sve_st4w:
-                case INS_sve_st4d:
-                    assert(isValidSimm4_MultipleOf4(imm));
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-
-            switch (ins)
-            {
-                case INS_sve_st2b:
-                case INS_sve_st3b:
-                case INS_sve_st4b:
-                    assert(opt == INS_OPTS_SCALABLE_B);
-                    break;
-
-                case INS_sve_st2h:
-                case INS_sve_st3h:
-                case INS_sve_st4h:
-                    assert(opt == INS_OPTS_SCALABLE_H);
-                    break;
-
-                case INS_sve_st2w:
-                case INS_sve_st3w:
-                case INS_sve_st4w:
-                    assert(opt == INS_OPTS_SCALABLE_S);
-                    break;
-
-                case INS_sve_st2d:
-                case INS_sve_st3d:
-                case INS_sve_st4d:
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-#endif // DEBUG
-
-            fmt = IF_SVE_JO_3A;
-            break;
-
-        case INS_sve_st1b:
-        case INS_sve_st1h:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isValidSimm4(imm));
-            // st1h is reserved for scalable B
-            assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : insOptsScalableStandard(opt));
-            fmt = IF_SVE_JN_3A;
-            break;
-
-        case INS_sve_fmla:
-        case INS_sve_fmls:
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3));
-
-            if (opt == INS_OPTS_SCALABLE_S)
-            {
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
-                assert(isValidUimm2(imm));                    // ii
-                fmt = IF_SVE_GU_3A;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_D);
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm
-                assert(isValidImm1(imm));                      // i
-                fmt = IF_SVE_GU_3B;
-            }
-            break;
-
-        case INS_sve_bfmla:
-        case INS_sve_bfmls:
-            assert(opt == INS_OPTS_SCALABLE_H);
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3)); // mmm
-            assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
-            assert(isValidUimm3(imm)); // i ii
-            fmt = IF_SVE_GU_3C;
-            break;
-
-        case INS_sve_fmul:
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3));
-
-            if (opt == INS_OPTS_SCALABLE_S)
-            {
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
-                assert(isValidUimm2(imm));                    // ii
-                fmt = IF_SVE_GX_3A;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_D);
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm
-                assert(isValidImm1(imm));                      // i
-                fmt = IF_SVE_GX_3B;
-            }
-            break;
-
-        case INS_sve_bfmul:
-            assert(opt = INS_OPTS_SCALABLE_H);
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3)); // mmm
-            assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
-            assert(isValidUimm3(imm)); // i ii
-            fmt = IF_SVE_GX_3C;
-            break;
-
-        case INS_sve_fdot:
-        case INS_sve_bfdot:
-            assert(opt = INS_OPTS_SCALABLE_H);
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3)); // mmm
-            assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
-            assert(isValidUimm2(imm)); // ii
-            fmt = IF_SVE_GY_3B;
-            break;
-
-        case INS_sve_mla:
-        case INS_sve_mls:
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3));
-
-            if (opt == INS_OPTS_SCALABLE_H)
-            {
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
-                assert(isValidUimm3(imm));                    // i ii
-                fmt = IF_SVE_FF_3A;
-            }
-            else if (opt == INS_OPTS_SCALABLE_S)
-            {
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
-                assert(isValidUimm2(imm));                    // ii
-                fmt = IF_SVE_FF_3B;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_D);
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm
-                assert(isValidImm1(imm));                      // i
-                fmt = IF_SVE_FF_3C;
-            }
-            break;
-
-        case INS_sve_smullb:
-        case INS_sve_smullt:
-        case INS_sve_umullb:
-        case INS_sve_umullt:
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3));
-
-            if (opt == INS_OPTS_SCALABLE_H)
-            {
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
-                assert(isValidUimm3(imm));                    // ii i
-                fmt = IF_SVE_FE_3A;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_S);
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm
-                assert(isValidUimm2(imm));                     // i i
-                fmt = IF_SVE_FE_3B;
-            }
-            break;
-
-        case INS_sve_smlalb:
-        case INS_sve_smlalt:
-        case INS_sve_umlalb:
-        case INS_sve_umlalt:
-        case INS_sve_smlslb:
-        case INS_sve_smlslt:
-        case INS_sve_umlslb:
-        case INS_sve_umlslt:
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3));
-
-            if (opt == INS_OPTS_SCALABLE_H)
-            {
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
-                assert(isValidUimm3(imm));                    // ii i
-                fmt = IF_SVE_FG_3A;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_S);
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm
-                assert(isValidUimm2(imm));                     // i i
-                fmt = IF_SVE_FG_3B;
-            }
-            break;
-
-        case INS_sve_sqdmullb:
-        case INS_sve_sqdmullt:
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3));
-
-            if (opt == INS_OPTS_SCALABLE_H)
-            {
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
-                assert(isValidUimm3(imm));                    // ii i
-                fmt = IF_SVE_FH_3A;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_S);
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm
-                assert(isValidUimm2(imm));                     // i i
-                fmt = IF_SVE_FH_3B;
-            }
-            break;
-
-        case INS_sve_sqdmulh:
-        case INS_sve_sqrdmulh:
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3));
-
-            if (opt == INS_OPTS_SCALABLE_H)
-            {
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
-                assert(isValidUimm3(imm));                    // ii i
-                fmt = IF_SVE_FI_3A;
-            }
-            else if (opt == INS_OPTS_SCALABLE_S)
-            {
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
-                assert(isValidUimm2(imm));                    // ii
-                fmt = IF_SVE_FI_3B;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_D);
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm
-                assert(isValidImm1(imm));                      // i
-                fmt = IF_SVE_FI_3C;
-            }
-            break;
-
-        case INS_sve_sqdmlalb:
-        case INS_sve_sqdmlalt:
-        case INS_sve_sqdmlslb:
-        case INS_sve_sqdmlslt:
-            assert(isVectorRegister(reg1)); // ddddd
-            assert(isVectorRegister(reg2)); // nnnnn
-            assert(isVectorRegister(reg3));
-
-            if (opt == INS_OPTS_SCALABLE_H)
-            {
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
-                assert(isValidUimm3(imm));                    // ii i
-                fmt = IF_SVE_FJ_3A;
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_S);
-                assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm
-                assert(isValidUimm2(imm));                     // ii
-                fmt = IF_SVE_FJ_3B;
-            }
-            break;
-
-        case INS_sve_fcadd:
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isScalableVectorSize(size));
-            imm = emitEncodeRotationImm90_or_270(imm);
-            fmt = IF_SVE_GP_3A;
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-
-    if (isLdSt)
-    {
-        assert(!isAddSub);
-        assert(isGeneralRegisterOrSP(reg3));
-        assert(insOptsNone(opt) || insOptsIndexed(opt));
-
-        if (isSIMD)
-        {
-            assert(isValidVectorLSPDatasize(size));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert((scale >= 2) && (scale <= 4));
-        }
-        else
-        {
-            assert(isValidGeneralDatasize(size));
-            assert(isGeneralRegisterOrZR(reg1));
-            assert(isGeneralRegisterOrZR(reg2));
-            assert((scale == 2) || (scale == 3));
-        }
-
-        // Load/Store Pair reserved encodings:
-        if (emitInsIsLoad(ins))
-        {
-            assert(reg1 != reg2);
-        }
-        if (insOptsIndexed(opt))
-        {
-            assert(reg1 != reg3);
-            assert(reg2 != reg3);
-        }
-
-        reg3 = encodingSPtoZR(reg3);
-
-        ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
-        if (imm == 0)
-        {
-            assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero
-
-            fmt = IF_LS_3B;
-        }
-        else
-        {
-            if ((imm & mask) == 0)
-            {
-                imm >>= scale; // The immediate is scaled by the size of the ld/st
-
-                if ((imm >= -64) && (imm <= 63))
-                {
-                    fmt = IF_LS_3C;
-                }
-            }
-#ifdef DEBUG
-            if (fmt != IF_LS_3C)
-            {
-                assert(!"Instruction cannot be encoded: IF_LS_3C");
-            }
-#endif
-        }
-    }
-    else if (isAddSub)
-    {
-        bool reg2IsSP = (reg2 == REG_SP);
-        assert(!isLdSt);
-        assert(isValidGeneralDatasize(size));
-        assert(isGeneralRegister(reg3));
-
-        if (setFlags || insOptsAluShift(opt)) // Can't encode SP in reg1 with setFlags or AluShift option
-        {
-            assert(isGeneralRegisterOrZR(reg1));
-        }
-        else
-        {
-            assert(isGeneralRegisterOrSP(reg1));
-            reg1 = encodingSPtoZR(reg1);
-        }
-
-        if (insOptsAluShift(opt)) // Can't encode SP in reg2 with AluShift option
-        {
-            assert(isGeneralRegister(reg2));
-        }
-        else
-        {
-            assert(isGeneralRegisterOrSP(reg2));
-            reg2 = encodingSPtoZR(reg2);
-        }
-
-        if (insOptsAnyExtend(opt))
-        {
-            assert((imm >= 0) && (imm <= 4));
-
-            fmt = IF_DR_3C;
-        }
-        else if (insOptsAluShift(opt))
-        {
-            // imm should be non-zero and in [1..63]
-            assert(isValidImmShift(imm, size) && (imm != 0));
-            fmt = IF_DR_3B;
-        }
-        else if (imm == 0)
-        {
-            assert(insOptsNone(opt));
-
-            if (reg2IsSP)
-            {
-                // To encode the SP register as reg2 we must use the IF_DR_3C encoding
-                // and also specify a LSL of zero (imm == 0)
-                opt = INS_OPTS_LSL;
-                fmt = IF_DR_3C;
-            }
-            else
-            {
-                fmt = IF_DR_3A;
-            }
-        }
-        else
-        {
-            assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A");
-        }
-    }
-
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstrCns(attr, imm);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-    id->idReg3(reg3);
-
-    // Record the attribute for the second register in the pair
-    id->idGCrefReg2(GCT_NONE);
-    if (attrReg2 != EA_UNKNOWN)
-    {
-        // Record the attribute for the second register in the pair
-        assert((fmt == IF_LS_3B) || (fmt == IF_LS_3C));
-        if (EA_IS_GCREF(attrReg2))
-        {
-            id->idGCrefReg2(GCT_GCREF);
-        }
-        else if (EA_IS_BYREF(attrReg2))
-        {
-            id->idGCrefReg2(GCT_BYREF);
-        }
-    }
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing three registers and two constants.
- */
-
-void emitter::emitIns_R_R_R_I_I(instruction ins,
-                                emitAttr    attr,
-                                regNumber   reg1,
-                                regNumber   reg2,
-                                regNumber   reg3,
-                                ssize_t     imm1,
-                                ssize_t     imm2,
-                                insOpts     opt)
-{
-    switch (ins)
-    {
-        case INS_sve_cdot:
-        {
-            if (opt == INS_OPTS_SCALABLE_B)
-            {
-                assert(isValidUimm2(imm1)); // ii
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_H);
-                assert(isValidImm1(imm1)); // i
-            }
-
-            assert(isValidUimm2(imm2)); // rr
-            const ssize_t imm = (imm1 << 2) | imm2;
-            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, imm, opt);
-            break;
-        }
-
-        case INS_sve_cmla:
-        case INS_sve_sqrdcmlah:
-        {
-            if (opt == INS_OPTS_SCALABLE_H)
-            {
-                assert(isValidUimm2(imm1)); // ii
-            }
-            else
-            {
-                assert(opt == INS_OPTS_SCALABLE_S);
-                assert(isValidImm1(imm1)); // i
-            }
-
-            assert(isValidUimm2(imm2)); // rr
-            const ssize_t imm = (imm1 << 2) | imm2;
-            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, imm, opt);
-            break;
-        }
-
-        default:
-            unreached();
-            break;
-    }
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing three registers, with an extend option
- */
-
-void emitter::emitIns_R_R_R_Ext(instruction ins,
-                                emitAttr    attr,
-                                regNumber   reg1,
-                                regNumber   reg2,
-                                regNumber   reg3,
-                                insOpts     opt,         /* = INS_OPTS_NONE */
-                                int         shiftAmount) /* = -1 -- unset   */
-{
-    emitAttr  size   = EA_SIZE(attr);
-    insFormat fmt    = IF_NONE;
-    bool      isSIMD = false;
-    int       scale  = -1;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_ldrb:
-        case INS_ldrsb:
-        case INS_strb:
-            scale = 0;
-            break;
-
-        case INS_ldrh:
-        case INS_ldrsh:
-        case INS_strh:
-            scale = 1;
-            break;
-
-        case INS_ldrsw:
-            scale = 2;
-            break;
-
-        case INS_ldr:
-        case INS_str:
-            // Is the target a vector register?
-            if (isVectorRegister(reg1))
-            {
-                assert(isValidVectorLSDatasize(size));
-                scale  = NaturalScale_helper(size);
-                isSIMD = true;
-            }
-            else
-            {
-                assert(isValidGeneralDatasize(size));
-                scale = (size == EA_8BYTE) ? 3 : 2;
-            }
-
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-
-    assert(scale != -1);
-    assert(insOptsLSExtend(opt));
-
-    if (isSIMD)
-    {
-        assert(isValidVectorLSDatasize(size));
-        assert(isVectorRegister(reg1));
-    }
-    else
-    {
-        assert(isValidGeneralLSDatasize(size));
-        assert(isGeneralRegisterOrZR(reg1));
-    }
-
-    assert(isGeneralRegisterOrSP(reg2));
-    assert(isGeneralRegister(reg3));
-
-    // Load/Store reserved encodings:
-    if (insOptsIndexed(opt))
-    {
-        assert(reg1 != reg2);
-    }
-
-    if (shiftAmount == -1)
-    {
-        shiftAmount = insOptsLSL(opt) ? scale : 0;
-    }
-
-    assert((shiftAmount == scale) || (shiftAmount == 0));
-
-    reg2 = encodingSPtoZR(reg2);
-    fmt  = IF_LS_3A;
-
-    instrDesc* id = emitNewInstr(attr);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-    id->idReg3(reg3);
-    id->idReg3Scaled(shiftAmount == scale);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing two registers and two constants.
- */
-
-void emitter::emitIns_R_R_I_I(
-    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt)
-{
-    emitAttr  size     = EA_SIZE(attr);
-    emitAttr  elemsize = EA_UNKNOWN;
-    insFormat fmt      = IF_NONE;
-    size_t    immOut   = 0; // composed from imm1 and imm2 and stored in the instrDesc
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        int        lsb;
-        int        width;
-        bitMaskImm bmi;
-        unsigned   registerListSize;
-
-        case INS_bfm:
-        case INS_sbfm:
-        case INS_ubfm:
-            assert(isGeneralRegister(reg1));
-            assert((ins == INS_bfm) ? isGeneralRegisterOrZR(reg2) : isGeneralRegister(reg2));
-            assert(isValidImmShift(imm1, size));
-            assert(isValidImmShift(imm2, size));
-            assert(insOptsNone(opt));
-            bmi.immNRS = 0;
-            bmi.immN   = (size == EA_8BYTE);
-            bmi.immR   = imm1;
-            bmi.immS   = imm2;
-            immOut     = bmi.immNRS;
-            fmt        = IF_DI_2D;
-            break;
-
-        case INS_bfi:
-        case INS_sbfiz:
-        case INS_ubfiz:
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegister(reg2));
-            lsb   = getBitWidth(size) - imm1;
-            width = imm2 - 1;
-            assert(isValidImmShift(lsb, size));
-            assert(isValidImmShift(width, size));
-            assert(insOptsNone(opt));
-            bmi.immNRS = 0;
-            bmi.immN   = (size == EA_8BYTE);
-            bmi.immR   = lsb;
-            bmi.immS   = width;
-            immOut     = bmi.immNRS;
-            fmt        = IF_DI_2D;
-            break;
-
-        case INS_bfxil:
-        case INS_sbfx:
-        case INS_ubfx:
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegister(reg2));
-            lsb   = imm1;
-            width = imm2 + imm1 - 1;
-            assert(isValidImmShift(lsb, size));
-            assert(isValidImmShift(width, size));
-            assert(insOptsNone(opt));
-            bmi.immNRS = 0;
-            bmi.immN   = (size == EA_8BYTE);
-            bmi.immR   = imm1;
-            bmi.immS   = imm2 + imm1 - 1;
-            immOut     = bmi.immNRS;
-            fmt        = IF_DI_2D;
-            break;
-
-        case INS_mov:
-        case INS_ins:
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            elemsize = size;
-            assert(isValidVectorElemsize(elemsize));
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm1));
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm2));
-            assert(insOptsNone(opt));
-            immOut = (imm1 << 4) + imm2;
-            fmt    = IF_DV_2F;
-            break;
-
-        case INS_ld1:
-        case INS_ld2:
-        case INS_ld3:
-        case INS_ld4:
-        case INS_st1:
-        case INS_st2:
-        case INS_st3:
-        case INS_st4:
-            assert(isVectorRegister(reg1));
-            assert(isGeneralRegisterOrSP(reg2));
-
-            elemsize = size;
-            assert(isValidVectorElemsize(elemsize));
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm1));
-
-            registerListSize = insGetRegisterListSize(ins);
-            assert((elemsize * registerListSize) == (unsigned)imm2);
-            assert(insOptsPostIndex(opt));
-
-            // Load/Store single structure  post-indexed by an immediate
-            reg2   = encodingSPtoZR(reg2);
-            immOut = imm1;
-            fmt    = IF_LS_2G;
-            break;
-
-        default:
-            unreached();
-            break;
-    }
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstrSC(attr, immOut);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing four registers.
- */
-
-void emitter::emitIns_R_R_R_R(instruction     ins,
-                              emitAttr        attr,
-                              regNumber       reg1,
-                              regNumber       reg2,
-                              regNumber       reg3,
-                              regNumber       reg4,
-                              insOpts         opt /* = INS_OPTS_NONE*/,
-                              insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
-{
-    emitAttr  size = EA_SIZE(attr);
-    insFormat fmt  = IF_NONE;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_madd:
-        case INS_msub:
-        case INS_smaddl:
-        case INS_smsubl:
-        case INS_umaddl:
-        case INS_umsubl:
-            assert(isValidGeneralDatasize(size));
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isGeneralRegister(reg4));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_DR_4A;
-            break;
-
-        case INS_fmadd:
-        case INS_fmsub:
-        case INS_fnmadd:
-        case INS_fnmsub:
-            // Scalar operation
-            assert(isValidScalarDatasize(size));
-            assert(isVectorRegister(reg1));
-            assert(isVectorRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isVectorRegister(reg4));
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_DV_4A;
-            break;
-
-        case INS_invalid:
-            fmt = IF_NONE;
-            break;
-
-        // Fallback handles emitting the SVE instructions.
-        default:
-            return emitInsSve_R_R_R_R(ins, attr, reg1, reg2, reg3, reg4, opt, sopt);
-    }
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstr(attr);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-    id->idReg3(reg3);
-    id->idReg4(reg4);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add a SVE instruction referencing four registers.
- *  Do not call this directly. Use 'emitIns_R_R_R_R' instead.
- */
-
-void emitter::emitInsSve_R_R_R_R(instruction     ins,
-                                 emitAttr        attr,
-                                 regNumber       reg1,
-                                 regNumber       reg2,
-                                 regNumber       reg3,
-                                 regNumber       reg4,
-                                 insOpts         opt /* = INS_OPTS_NONE*/,
-                                 insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
-{
-    emitAttr  size = EA_SIZE(attr);
-    insFormat fmt  = IF_NONE;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_sve_cmpeq:
-        case INS_sve_cmpgt:
-        case INS_sve_cmpge:
-        case INS_sve_cmphi:
-        case INS_sve_cmphs:
-        case INS_sve_cmpne:
-        case INS_sve_cmple:
-        case INS_sve_cmplo:
-        case INS_sve_cmpls:
-        case INS_sve_cmplt:
-            assert(isPredicateRegister(reg1));    // DDDD
-            assert(isLowPredicateRegister(reg2)); // ggg
-            assert(isVectorRegister(reg3));       // nnnnn
-            assert(isVectorRegister(reg4));       // mmmmm
-            assert(isScalableVectorSize(attr));   // xx
-            if (sopt == INS_SCALABLE_OPTS_WIDE)
-            {
-                assert(insOptsScalableWide(opt));
-                fmt = IF_SVE_CX_4A_A;
-            }
-            else
-            {
-                assert(insScalableOptsNone(sopt));
-                assert(insOptsScalableStandard(opt));
-                fmt = IF_SVE_CX_4A;
-            }
-            break;
-
-        case INS_sve_and:
-        case INS_sve_orr:
-        case INS_sve_eor:
-        case INS_sve_ands:
-        case INS_sve_bic:
-        case INS_sve_orn:
-        case INS_sve_bics:
-        case INS_sve_sel:
-        case INS_sve_eors:
-        case INS_sve_nor:
-        case INS_sve_nand:
-        case INS_sve_orrs:
-        case INS_sve_orns:
-        case INS_sve_nors:
-        case INS_sve_nands:
-            assert(opt == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(reg1)); // dddd
-            assert(isPredicateRegister(reg2)); // gggg
-            assert(isPredicateRegister(reg3)); // nnnn
-            assert(isPredicateRegister(reg4)); // mmmm
-            fmt = IF_SVE_CZ_4A;
-            break;
-
-        case INS_sve_brkpa:
-        case INS_sve_brkpb:
-        case INS_sve_brkpas:
-        case INS_sve_brkpbs:
-            assert(opt == INS_OPTS_SCALABLE_B);
-            assert(isPredicateRegister(reg1)); // dddd
-            assert(isPredicateRegister(reg2)); // gggg
-            assert(isPredicateRegister(reg3)); // nnnn
-            assert(isPredicateRegister(reg4)); // mmmm
-            fmt = IF_SVE_DA_4A;
-            break;
-
-        case INS_sve_fcmeq:
-        case INS_sve_fcmge:
-        case INS_sve_facge:
-        case INS_sve_fcmgt:
-        case INS_sve_facgt:
-        case INS_sve_fcmlt:
-        case INS_sve_fcmle:
-        case INS_sve_fcmne:
-        case INS_sve_fcmuo:
-        case INS_sve_facle:
-        case INS_sve_faclt:
-            assert(insOptsScalableFloat(opt));
-            assert(isVectorRegister(reg3));       // nnnnn
-            assert(isVectorRegister(reg4));       // mmmmm
-            assert(isPredicateRegister(reg1));    // DDDD
-            assert(isLowPredicateRegister(reg2)); // ggg
-            assert(isScalableVectorSize(attr));   // xx
-            fmt = IF_SVE_HT_4A;
-            break;
-
-        case INS_sve_match:
-        case INS_sve_nmatch:
-            assert(insOptsScalableAtMaxHalf(opt));
-            assert(isPredicateRegister(reg1));    // DDDD
-            assert(isLowPredicateRegister(reg2)); // ggg
-            assert(isVectorRegister(reg3));       // nnnnn
-            assert(isVectorRegister(reg4));       // mmmmm
-            assert(isScalableVectorSize(attr));   // xx
-            fmt = IF_SVE_GE_4A;
-            break;
-
-        case INS_sve_mla:
-        case INS_sve_mls:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1));       // ddddd
-            assert(isLowPredicateRegister(reg2)); // ggg
-            assert(isVectorRegister(reg3));       // nnnnn
-            assert(isVectorRegister(reg4));       // mmmmm
-            assert(isScalableVectorSize(size));
-            fmt = IF_SVE_AR_4A;
-            break;
-
-        case INS_sve_mad:
-        case INS_sve_msb:
-            assert(insOptsScalableStandard(opt));
-            assert(isVectorRegister(reg1));       // ddddd
-            assert(isLowPredicateRegister(reg2)); // ggg
-            assert(isVectorRegister(reg3));       // mmmmm
-            assert(isVectorRegister(reg4));       // aaaaa
-            assert(isScalableVectorSize(size));
-            fmt = IF_SVE_AS_4A;
-            break;
-
-        case INS_sve_st1b:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isScalableVectorSize(size));
-            assert(insScalableOptsNone(sopt));
-
-            if (insOptsScalableStandard(opt))
-            {
-                if (isGeneralRegister(reg4))
-                {
-                    fmt = IF_SVE_JD_4A;
-                }
-                else
-                {
-                    assert(isVectorRegister(reg4));
-                    fmt = IF_SVE_JK_4B;
-                }
-            }
-            else
-            {
-                assert(insOptsScalable32bitExtends(opt));
-                switch (opt)
-                {
-                    case INS_OPTS_SCALABLE_S_UXTW:
-                    case INS_OPTS_SCALABLE_S_SXTW:
-                        fmt = IF_SVE_JK_4A_B;
-                        break;
-
-                    case INS_OPTS_SCALABLE_D_UXTW:
-                    case INS_OPTS_SCALABLE_D_SXTW:
-                        fmt = IF_SVE_JK_4A;
-                        break;
-
-                    default:
-                        assert(!"Invalid options for scalable");
-                        break;
-                }
-            }
-            break;
-
-        case INS_sve_st1h:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isScalableVectorSize(size));
-
-            if (insOptsScalableStandard(opt))
-            {
-                if (sopt == INS_SCALABLE_OPTS_LSL_N)
-                {
-                    if (isGeneralRegister(reg4))
-                    {
-                        // st1h is reserved for scalable B
-                        assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : true);
-                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                        fmt = IF_SVE_JD_4A;
-                    }
-                    else
-                    {
-                        assert(isVectorRegister(reg4));
-                        fmt = IF_SVE_JJ_4B;
-                    }
-                }
-                else
-                {
-                    assert(isVectorRegister(reg4));
-                    assert(insScalableOptsNone(sopt));
-                    fmt = IF_SVE_JJ_4B_E;
-                }
-            }
-            else
-            {
-                assert(insOptsScalable32bitExtends(opt));
-                switch (opt)
-                {
-                    case INS_OPTS_SCALABLE_S_UXTW:
-                    case INS_OPTS_SCALABLE_S_SXTW:
-                        if (insScalableOptsNone(sopt))
-                        {
-                            fmt = IF_SVE_JJ_4A_D;
-                        }
-                        else
-                        {
-                            assert(sopt == INS_SCALABLE_OPTS_MOD_N);
-                            fmt = IF_SVE_JJ_4A;
-                        }
-                        break;
-
-                    case INS_OPTS_SCALABLE_D_UXTW:
-                    case INS_OPTS_SCALABLE_D_SXTW:
-                        if (insScalableOptsNone(sopt))
-                        {
-                            fmt = IF_SVE_JJ_4A_C;
-                        }
-                        else
-                        {
-                            assert(sopt == INS_SCALABLE_OPTS_MOD_N);
-                            fmt = IF_SVE_JJ_4A_B;
-                        }
-                        break;
-
-                    default:
-                        assert(!"Invalid options for scalable");
-                        break;
-                }
-            }
-            break;
-
-        case INS_sve_st1w:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isScalableVectorSize(size));
-
-            if (insOptsScalableStandard(opt))
-            {
-                if (sopt == INS_SCALABLE_OPTS_LSL_N)
-                {
-                    if (isGeneralRegister(reg4))
-                    {
-                        fmt = IF_SVE_JD_4B;
-                    }
-                    else
-                    {
-                        assert(isVectorRegister(reg4));
-                        fmt = IF_SVE_JJ_4B;
-                    }
-                }
-                else
-                {
-                    assert(isVectorRegister(reg4));
-                    assert(insScalableOptsNone(sopt));
-                    fmt = IF_SVE_JJ_4B_E;
-                }
-            }
-            else if (opt == INS_OPTS_SCALABLE_Q)
-            {
-                assert(isGeneralRegister(reg4));
-                assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                fmt = IF_SVE_JD_4C;
-            }
-            else
-            {
-                assert(insOptsScalable32bitExtends(opt));
-                assert(isVectorRegister(reg4));
-                switch (opt)
-                {
-                    case INS_OPTS_SCALABLE_S_UXTW:
-                    case INS_OPTS_SCALABLE_S_SXTW:
-                        if (insScalableOptsNone(sopt))
-                        {
-                            fmt = IF_SVE_JJ_4A_D;
-                        }
-                        else
-                        {
-                            assert(sopt == INS_SCALABLE_OPTS_MOD_N);
-                            fmt = IF_SVE_JJ_4A;
-                        }
-                        break;
-
-                    case INS_OPTS_SCALABLE_D_UXTW:
-                    case INS_OPTS_SCALABLE_D_SXTW:
-                        if (insScalableOptsNone(sopt))
-                        {
-                            fmt = IF_SVE_JJ_4A_C;
-                        }
-                        else
-                        {
-                            assert(sopt == INS_SCALABLE_OPTS_MOD_N);
-                            fmt = IF_SVE_JJ_4A_B;
-                        }
-                        break;
-
-                    default:
-                        assert(!"Invalid options for scalable");
-                        break;
-                }
-            }
-            break;
-
-        case INS_sve_st1d:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isScalableVectorSize(size));
-
-            if (isGeneralRegister(reg4))
-            {
-                assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                if (opt == INS_OPTS_SCALABLE_Q)
-                {
-                    fmt = IF_SVE_JD_4C_A;
-                }
-                else
-                {
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    fmt = IF_SVE_JD_4C;
-                }
-            }
-            else
-            {
-                assert(isVectorRegister(reg4));
-
-                if (opt == INS_OPTS_SCALABLE_D)
-                {
-                    if (sopt == INS_SCALABLE_OPTS_LSL_N)
-                    {
-                        fmt = IF_SVE_JJ_4B;
-                    }
-                    else
-                    {
-                        assert(insScalableOptsNone(sopt));
-                        fmt = IF_SVE_JJ_4B_C;
-                    }
-                }
-                else
-                {
-                    assert(insOptsScalable32bitExtends(opt));
-                    switch (opt)
-                    {
-                        case INS_OPTS_SCALABLE_D_UXTW:
-                        case INS_OPTS_SCALABLE_D_SXTW:
-                            if (sopt == INS_SCALABLE_OPTS_MOD_N)
-                            {
-                                fmt = IF_SVE_JJ_4A;
-                            }
-                            else
-                            {
-                                assert(insScalableOptsNone(sopt));
-                                fmt = IF_SVE_JJ_4A_B;
-                            }
-                            break;
-
-                        default:
-                            assert(!"Invalid options for scalable");
-                            break;
-                    }
-                }
-            }
-            break;
-
-        case INS_sve_ld1b:
-        case INS_sve_ld1sb:
-        case INS_sve_ldff1b:
-        case INS_sve_ldff1sb:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isScalableVectorSize(size));
-            assert(insScalableOptsNone(sopt));
-
-            if (isGeneralRegisterOrZR(reg4))
-            {
-                switch (ins)
-                {
-                    case INS_sve_ldff1b:
-                        assert(insOptsScalableStandard(opt));
-                        fmt = IF_SVE_IG_4A_E;
-                        break;
-
-                    case INS_sve_ldff1sb:
-                        assert(insOptsScalableAtLeastHalf(opt));
-                        fmt = IF_SVE_IG_4A_D;
-                        break;
-
-                    case INS_sve_ld1sb:
-                        assert(insOptsScalableAtLeastHalf(opt));
-                        fmt = IF_SVE_IK_4A_F;
-                        break;
-
-                    case INS_sve_ld1b:
-                        assert(insOptsScalableStandard(opt));
-                        fmt = IF_SVE_IK_4A_H;
-                        break;
-
-                    default:
-                        assert(!"Invalid instruction");
-                        break;
-                }
-            }
-            else
-            {
-                assert(isVectorRegister(reg4));
-
-                if (insOptsScalableDoubleWord32bitExtends(opt))
-                {
-                    fmt = IF_SVE_HW_4A;
-                }
-                else if (insOptsScalableSingleWord32bitExtends(opt))
-                {
-                    fmt = IF_SVE_HW_4A_A;
-                }
-                else
-                {
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    fmt = IF_SVE_HW_4B;
-                }
-            }
-            break;
-
-        case INS_sve_ld1h:
-        case INS_sve_ld1sh:
-        case INS_sve_ldff1h:
-        case INS_sve_ldff1sh:
-        case INS_sve_ld1w:
-        case INS_sve_ldff1w:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isScalableVectorSize(size));
-
-            if (isGeneralRegisterOrZR(reg4))
-            {
-                assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-
-                switch (ins)
-                {
-                    case INS_sve_ldff1h:
-                        assert(insOptsScalableStandard(opt));
-                        fmt = IF_SVE_IG_4A_G;
-                        break;
-
-                    case INS_sve_ldff1sh:
-                    case INS_sve_ldff1w:
-                        assert(insOptsScalableWords(opt));
-                        fmt = IF_SVE_IG_4A_F;
-                        break;
-
-                    case INS_sve_ld1w:
-                        assert(insOptsScalableWordsOrQuadwords(opt));
-                        fmt = IF_SVE_II_4A_H;
-                        break;
-
-                    case INS_sve_ld1sh:
-                        assert(insOptsScalableWords(opt));
-                        fmt = IF_SVE_IK_4A_G;
-                        break;
-
-                    case INS_sve_ld1h:
-                        assert(insOptsScalableAtLeastHalf(opt));
-                        fmt = IF_SVE_IK_4A_I;
-                        break;
-
-                    default:
-                        assert(!"Invalid instruction");
-                        break;
-                }
-            }
-            else
-            {
-                assert(isVectorRegister(reg4));
-
-                if (insOptsScalableDoubleWord32bitExtends(opt))
-                {
-                    if (sopt == INS_SCALABLE_OPTS_MOD_N)
-                    {
-                        fmt = IF_SVE_HW_4A_A;
-                    }
-                    else
-                    {
-                        assert(insScalableOptsNone(sopt));
-                        fmt = IF_SVE_HW_4A_B;
-                    }
-                }
-                else if (insOptsScalableSingleWord32bitExtends(opt))
-                {
-                    if (sopt == INS_SCALABLE_OPTS_MOD_N)
-                    {
-                        fmt = IF_SVE_HW_4A;
-                    }
-                    else
-                    {
-                        assert(insScalableOptsNone(sopt));
-                        fmt = IF_SVE_HW_4A_C;
-                    }
-                }
-                else
-                {
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    if (sopt == INS_SCALABLE_OPTS_LSL_N)
-                    {
-                        fmt = IF_SVE_HW_4B;
-                    }
-                    else
-                    {
-                        assert(insScalableOptsNone(sopt));
-                        fmt = IF_SVE_HW_4B_D;
-                    }
-                }
-            }
-            break;
-
-        case INS_sve_ld1d:
-        case INS_sve_ld1sw:
-        case INS_sve_ldff1d:
-        case INS_sve_ldff1sw:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isScalableVectorSize(size));
-
-            if (isGeneralRegisterOrZR(reg4))
-            {
-                assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-
-                if (opt == INS_OPTS_SCALABLE_Q)
-                {
-                    assert(reg4 != REG_ZR);
-                    assert(ins == INS_sve_ld1d);
-                    fmt = IF_SVE_II_4A_B;
-                }
-                else
-                {
-                    assert(opt == INS_OPTS_SCALABLE_D);
-
-                    switch (ins)
-                    {
-                        case INS_sve_ldff1d:
-                        case INS_sve_ldff1sw:
-                            fmt = IF_SVE_IG_4A;
-                            break;
-
-                        case INS_sve_ld1d:
-                            assert(reg4 != REG_ZR);
-                            fmt = IF_SVE_II_4A;
-                            break;
-
-                        case INS_sve_ld1sw:
-                            assert(reg4 != REG_ZR);
-                            fmt = IF_SVE_IK_4A;
-                            break;
-
-                        default:
-                            assert(!"Invalid instruction");
-                            break;
-                    }
-                }
-            }
-            else if (insOptsScalableDoubleWord32bitExtends(opt))
-            {
-                assert(isVectorRegister(reg4));
-
-                if (sopt == INS_SCALABLE_OPTS_MOD_N)
-                {
-                    fmt = IF_SVE_IU_4A;
-                }
-                else
-                {
-                    assert(insScalableOptsNone(sopt));
-
-                    if (ins == INS_sve_ld1d)
-                    {
-                        fmt = IF_SVE_IU_4A_C;
-                    }
-                    else
-                    {
-                        fmt = IF_SVE_IU_4A_A;
-                    }
-                }
-            }
-            else if (sopt == INS_SCALABLE_OPTS_LSL_N)
-            {
-                assert(isVectorRegister(reg4));
-                assert(opt == INS_OPTS_SCALABLE_D);
-                fmt = IF_SVE_IU_4B;
-            }
-            else
-            {
-                assert(isVectorRegister(reg4));
-                assert(opt == INS_OPTS_SCALABLE_D);
-                assert(insScalableOptsNone(sopt));
-
-                if (ins == INS_sve_ld1d)
-                {
-                    fmt = IF_SVE_IU_4B_D;
-                }
-                else
-                {
-                    fmt = IF_SVE_IU_4B_B;
-                }
-            }
-            break;
-
-        case INS_sve_ldnt1b:
-        case INS_sve_ldnt1h:
-        case INS_sve_ldnt1w:
-        case INS_sve_ldnt1d:
-        case INS_sve_ldnt1sb:
-        case INS_sve_ldnt1sh:
-        case INS_sve_ldnt1sw:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isScalableVectorSize(size));
-
-            if (isGeneralRegister(reg3))
-            {
-                assert(isGeneralRegister(reg4));
-
-#ifdef DEBUG
-                switch (ins)
-                {
-                    case INS_sve_ldnt1b:
-                        assert(opt == INS_OPTS_SCALABLE_B);
-                        assert(insScalableOptsNone(sopt));
-                        break;
-
-                    case INS_sve_ldnt1h:
-                        assert(opt == INS_OPTS_SCALABLE_H);
-                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                        break;
-
-                    case INS_sve_ldnt1w:
-                        assert(opt == INS_OPTS_SCALABLE_S);
-                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                        break;
-
-                    case INS_sve_ldnt1d:
-                        assert(opt == INS_OPTS_SCALABLE_D);
-                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                        break;
-
-                    default:
-                        assert(!"Invalid instruction");
-                        break;
-                }
-#endif // DEBUG
-
-                fmt = IF_SVE_IN_4A;
-            }
-            else if ((ins == INS_sve_ldnt1d) || (ins == INS_sve_ldnt1sw))
-            {
-                assert(insOptsScalableWords(opt));
-                assert(isVectorRegister(reg3));
-                assert(isGeneralRegisterOrZR(reg4));
-                assert(insScalableOptsNone(sopt));
-                assert(opt == INS_OPTS_SCALABLE_D);
-                fmt = IF_SVE_IX_4A;
-            }
-            else
-            {
-                assert(insOptsScalableWords(opt));
-                assert(isVectorRegister(reg3));
-                assert(isGeneralRegisterOrZR(reg4));
-                assert(insScalableOptsNone(sopt));
-
-                if (opt == INS_OPTS_SCALABLE_S)
-                {
-                    fmt = IF_SVE_IF_4A;
-                }
-                else
-                {
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    fmt = IF_SVE_IF_4A_A;
-                }
-            }
-            break;
-
-        case INS_sve_ld1rob:
-        case INS_sve_ld1roh:
-        case INS_sve_ld1row:
-        case INS_sve_ld1rod:
-        case INS_sve_ld1rqb:
-        case INS_sve_ld1rqh:
-        case INS_sve_ld1rqw:
-        case INS_sve_ld1rqd:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isGeneralRegister(reg4));
-            assert(isScalableVectorSize(size));
-
-#ifdef DEBUG
-            switch (ins)
-            {
-                case INS_sve_ld1rob:
-                case INS_sve_ld1rqb:
-                    assert(opt == INS_OPTS_SCALABLE_B);
-                    assert(insScalableOptsNone(sopt));
-                    break;
-
-                case INS_sve_ld1roh:
-                case INS_sve_ld1rqh:
-                    assert(opt == INS_OPTS_SCALABLE_H);
-                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                    break;
-
-                case INS_sve_ld1row:
-                case INS_sve_ld1rqw:
-                    assert(opt == INS_OPTS_SCALABLE_S);
-                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                    break;
-
-                case INS_sve_ld1rod:
-                case INS_sve_ld1rqd:
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-#endif // DEBUG
-
-            fmt = IF_SVE_IP_4A;
-            break;
-
-        case INS_sve_ld1q:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isGeneralRegisterOrZR(reg4));
-            assert(isScalableVectorSize(size));
-            assert(opt == INS_OPTS_SCALABLE_Q);
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_IW_4A;
-            break;
-
-        case INS_sve_ld2q:
-        case INS_sve_ld3q:
-        case INS_sve_ld4q:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isGeneralRegister(reg4));
-            assert(isScalableVectorSize(size));
-            assert(opt == INS_OPTS_SCALABLE_Q);
-            assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-            fmt = IF_SVE_IR_4A;
-            break;
-
-        case INS_sve_ld2b:
-        case INS_sve_ld3b:
-        case INS_sve_ld4b:
-        case INS_sve_ld2h:
-        case INS_sve_ld3h:
-        case INS_sve_ld4h:
-        case INS_sve_ld2w:
-        case INS_sve_ld3w:
-        case INS_sve_ld4w:
-        case INS_sve_ld2d:
-        case INS_sve_ld3d:
-        case INS_sve_ld4d:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isGeneralRegister(reg4));
-            assert(isScalableVectorSize(size));
-
-#ifdef DEBUG
-            switch (ins)
-            {
-                case INS_sve_ld2b:
-                case INS_sve_ld3b:
-                case INS_sve_ld4b:
-                    assert(opt == INS_OPTS_SCALABLE_B);
-                    assert(insScalableOptsNone(sopt));
-                    break;
-
-                case INS_sve_ld2h:
-                case INS_sve_ld3h:
-                case INS_sve_ld4h:
-                    assert(opt == INS_OPTS_SCALABLE_H);
-                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                    break;
-
-                case INS_sve_ld2w:
-                case INS_sve_ld3w:
-                case INS_sve_ld4w:
-                    assert(opt == INS_OPTS_SCALABLE_S);
-                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                    break;
-
-                case INS_sve_ld2d:
-                case INS_sve_ld3d:
-                case INS_sve_ld4d:
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-#endif // DEBUG
-
-            fmt = IF_SVE_IT_4A;
-            break;
-
-        case INS_sve_st1q:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isGeneralRegisterOrZR(reg4));
-            assert(isScalableVectorSize(size));
-            assert(opt == INS_OPTS_SCALABLE_Q);
-            assert(insScalableOptsNone(sopt));
-            fmt = IF_SVE_IY_4A;
-            break;
-
-        case INS_sve_stnt1b:
-        case INS_sve_stnt1h:
-        case INS_sve_stnt1w:
-        case INS_sve_stnt1d:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isScalableVectorSize(size));
-
-            if (isGeneralRegister(reg3))
-            {
-                assert(isGeneralRegister(reg4));
-#ifdef DEBUG
-                switch (ins)
-                {
-                    case INS_sve_stnt1b:
-                        assert(opt == INS_OPTS_SCALABLE_B);
-                        assert(insScalableOptsNone(sopt));
-                        break;
-
-                    case INS_sve_stnt1h:
-                        assert(opt == INS_OPTS_SCALABLE_H);
-                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                        break;
-
-                    case INS_sve_stnt1w:
-                        assert(opt == INS_OPTS_SCALABLE_S);
-                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                        break;
-
-                    case INS_sve_stnt1d:
-                        assert(opt == INS_OPTS_SCALABLE_D);
-                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                        break;
-
-                    default:
-                        assert(!"Invalid instruction");
-                        break;
-                }
-#endif // DEBUG
-                fmt = IF_SVE_JB_4A;
-            }
-            else
-            {
-                assert(isVectorRegister(reg3));
-                assert(isGeneralRegisterOrZR(reg4));
-                assert(isScalableVectorSize(size));
-                assert(insScalableOptsNone(sopt));
-
-                if (opt == INS_OPTS_SCALABLE_S)
-                {
-                    fmt = IF_SVE_IZ_4A;
-                }
-                else
-                {
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    if (ins == INS_sve_stnt1d)
-                    {
-                        fmt = IF_SVE_JA_4A;
-                    }
-                    else
-                    {
-                        fmt = IF_SVE_IZ_4A_A;
-                    }
-                }
-            }
-            break;
-
-        case INS_sve_st2b:
-        case INS_sve_st3b:
-        case INS_sve_st4b:
-        case INS_sve_st2h:
-        case INS_sve_st3h:
-        case INS_sve_st4h:
-        case INS_sve_st2w:
-        case INS_sve_st3w:
-        case INS_sve_st4w:
-        case INS_sve_st2d:
-        case INS_sve_st3d:
-        case INS_sve_st4d:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isGeneralRegister(reg4));
-            assert(isScalableVectorSize(size));
-
-#ifdef DEBUG
-            switch (ins)
-            {
-                case INS_sve_st2b:
-                case INS_sve_st3b:
-                case INS_sve_st4b:
-                    assert(opt == INS_OPTS_SCALABLE_B);
-                    assert(insScalableOptsNone(sopt));
-                    break;
-
-                case INS_sve_st2h:
-                case INS_sve_st3h:
-                case INS_sve_st4h:
-                    assert(opt == INS_OPTS_SCALABLE_H);
-                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                    break;
-
-                case INS_sve_st2w:
-                case INS_sve_st3w:
-                case INS_sve_st4w:
-                    assert(opt == INS_OPTS_SCALABLE_S);
-                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                    break;
-
-                case INS_sve_st2d:
-                case INS_sve_st3d:
-                case INS_sve_st4d:
-                    assert(opt == INS_OPTS_SCALABLE_D);
-                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
-                    break;
-
-                default:
-                    assert(!"Invalid instruction");
-                    break;
-            }
-#endif // DEBUG
-            fmt = IF_SVE_JC_4A;
-            break;
-
-        case INS_sve_st2q:
-        case INS_sve_st3q:
-        case INS_sve_st4q:
-            assert(isVectorRegister(reg1));
-            assert(isPredicateRegister(reg2));
-            assert(isGeneralRegister(reg3));
-            assert(isGeneralRegister(reg4));
-            assert(isScalableVectorSize(size));
-            assert(opt == INS_OPTS_SCALABLE_Q);
-            fmt = IF_SVE_JF_4A;
-            break;
-
-        case INS_sve_bfmla:
-        case INS_sve_bfmls:
-            assert(opt == INS_OPTS_SCALABLE_H);
-            assert(insScalableOptsNone(sopt));
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isVectorRegister(reg4));
-            fmt = IF_SVE_HU_4B;
-            break;
-
-        case INS_sve_fmad:
-        case INS_sve_fmsb:
-        case INS_sve_fnmad:
-        case INS_sve_fnmsb:
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(insScalableOptsNone(sopt));
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isVectorRegister(reg4));
-            fmt = IF_SVE_HV_4A;
-            break;
-
-        default:
-            unreached();
-            break;
-    }
-    assert(fmt != IF_NONE);
-
-    // Use aliases.
-    switch (ins)
-    {
-        case INS_sve_cmple:
-            std::swap(reg3, reg4);
-            ins = INS_sve_cmpge;
-            break;
-        case INS_sve_cmplo:
-            std::swap(reg3, reg4);
-            ins = INS_sve_cmphi;
-            break;
-        case INS_sve_cmpls:
-            std::swap(reg3, reg4);
-            ins = INS_sve_cmphs;
-            break;
-        case INS_sve_cmplt:
-            std::swap(reg3, reg4);
-            ins = INS_sve_cmpgt;
-            break;
-        case INS_sve_facle:
-            std::swap(reg3, reg4);
-            ins = INS_sve_facge;
-            break;
-        case INS_sve_faclt:
-            std::swap(reg3, reg4);
-            ins = INS_sve_facgt;
-            break;
-        case INS_sve_fcmle:
-            std::swap(reg3, reg4);
-            ins = INS_sve_fcmge;
-            break;
-        case INS_sve_fcmlt:
-            std::swap(reg3, reg4);
-            ins = INS_sve_fcmgt;
-            break;
-        default:
-            break;
-    }
-
-    instrDesc* id = emitNewInstr(attr);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-    id->idReg3(reg3);
-    id->idReg4(reg4);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing four registers and a constant.
- */
-
-void emitter::emitIns_R_R_R_R_I(instruction ins,
-                                emitAttr    attr,
-                                regNumber   reg1,
-                                regNumber   reg2,
-                                regNumber   reg3,
-                                regNumber   reg4,
-                                ssize_t     imm,
-                                insOpts     opt /* = INS_OPT_NONE*/)
-{
-    emitAttr  size = EA_SIZE(attr);
-    insFormat fmt  = IF_NONE;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_sve_fcmla:
-            assert(insOptsScalableAtLeastHalf(opt));
-            assert(isVectorRegister(reg1));
-            assert(isLowPredicateRegister(reg2));
-            assert(isVectorRegister(reg3));
-            assert(isVectorRegister(reg4));
-            assert(isScalableVectorSize(size));
-            imm = emitEncodeRotationImm0_to_270(imm);
-            fmt = IF_SVE_GT_4A;
-            break;
-
-        default:
-            unreached();
-            break;
-    }
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstrCns(attr, imm);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(opt);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-    id->idReg3(reg3);
-    id->idReg4(reg4);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing a register and a condition code
- */
-
-void emitter::emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond)
-{
-    insFormat    fmt = IF_NONE;
-    condFlagsImm cfi;
-    cfi.immCFVal = 0;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_cset:
-        case INS_csetm:
-            assert(isGeneralRegister(reg));
-            cfi.cond = cond;
-            fmt      = IF_DR_1D;
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-
-    assert(fmt != IF_NONE);
-    assert(isValidImmCond(cfi.immCFVal));
-
-    instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
-
-    id->idReg1(reg);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing two registers and a condition code
- */
-
-void emitter::emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond)
-{
-    insFormat    fmt = IF_NONE;
-    condFlagsImm cfi;
-    cfi.immCFVal = 0;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_cinc:
-        case INS_cinv:
-        case INS_cneg:
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegisterOrZR(reg2));
-            cfi.cond = cond;
-            fmt      = IF_DR_2D;
-            break;
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-
-    assert(fmt != IF_NONE);
-    assert(isValidImmCond(cfi.immCFVal));
-
-    instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing two registers and a condition code
- */
-
-void emitter::emitIns_R_R_R_COND(
-    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insCond cond)
-{
-    insFormat    fmt = IF_NONE;
-    condFlagsImm cfi;
-    cfi.immCFVal = 0;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_csel:
-        case INS_csinc:
-        case INS_csinv:
-        case INS_csneg:
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegisterOrZR(reg2));
-            assert(isGeneralRegisterOrZR(reg3));
-            cfi.cond = cond;
-            fmt      = IF_DR_3D;
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-
-    assert(fmt != IF_NONE);
-    assert(isValidImmCond(cfi.immCFVal));
-
-    instrDesc* id = emitNewInstr(attr);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-    id->idReg3(reg3);
-    id->idSmallCns(cfi.immCFVal);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing two registers the flags and a condition code
- */
-
-void emitter::emitIns_R_R_FLAGS_COND(
-    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCflags flags, insCond cond)
-{
-    insFormat    fmt = IF_NONE;
-    condFlagsImm cfi;
-    cfi.immCFVal = 0;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_ccmp:
-        case INS_ccmn:
-            assert(isGeneralRegister(reg1));
-            assert(isGeneralRegister(reg2));
-            cfi.flags = flags;
-            cfi.cond  = cond;
-            fmt       = IF_DR_2I;
-            break;
-        default:
-            unreached();
-            break;
-    } // end switch (ins)
-
-    assert(fmt != IF_NONE);
-    assert(isValidImmCondFlags(cfi.immCFVal));
-
-    instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing a register, an immediate, the flags and a condition code
- */
-
-void emitter::emitIns_R_I_FLAGS_COND(
-    instruction ins, emitAttr attr, regNumber reg, int imm, insCflags flags, insCond cond)
-{
-    insFormat    fmt = IF_NONE;
-    condFlagsImm cfi;
-    cfi.immCFVal = 0;
-
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
-    {
-        case INS_ccmp:
-        case INS_ccmn:
-            assert(isGeneralRegister(reg));
-            if (imm < 0)
-            {
-                ins = insReverse(ins);
-                imm = -imm;
-            }
-            if (isValidUimm5(imm))
-            {
-                cfi.imm5  = imm;
-                cfi.flags = flags;
-                cfi.cond  = cond;
-                fmt       = IF_DI_1F;
-            }
-            else
-            {
-                assert(!"Instruction cannot be encoded: ccmp/ccmn imm5");
-            }
-            break;
-        default:
-            unreached();
-            break;
-    } // end switch (ins)
-
-    assert(fmt != IF_NONE);
-    assert(isValidImmCondFlagsImm5(cfi.immCFVal));
-
-    instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
-
-    id->idReg1(reg);
-
-    dispIns(id);
-    appendToCurIG(id);
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing a register, a SVE Pattern.
- */
-
-void emitter::emitIns_R_PATTERN(
-    instruction ins, emitAttr attr, regNumber reg1, insOpts opt, insSvePattern pattern /* = SVE_PATTERN_ALL*/)
-{
-    emitAttr  elemsize = EA_UNKNOWN;
-    insFormat fmt      = IF_NONE;
+        }
+    }
 
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
+    if (canEncode)
     {
-        case INS_sve_ptrue:
-        case INS_sve_ptrues:
-            assert(isPredicateRegister(reg1));
-            assert(isScalableVectorSize(attr));
-            assert(insOptsScalableStandard(opt));
-            fmt = IF_SVE_DE_1A;
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstr(attr);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-
-    id->idReg1(reg1);
-    id->idInsOpt(opt);
-    id->idSvePattern(pattern);
+        // Does the caller want us to return the imm(i8,bySh) encoding values?
+        //
+        if (wbBSI != nullptr)
+        {
+            wbBSI->immOnes = onesShift;
+            wbBSI->immBY   = bySh;
+            wbBSI->immVal  = imm8;
 
-    dispIns(id);
-    appendToCurIG(id);
+            // Verify that what we are returning is correct.
+            assert(imm == emitDecodeByteShiftedImm(*wbBSI, size));
+        }
+        // Tell the caller that we can successfully encode this immediate
+        // using a 'byteShifted immediate'.
+        //
+        return true;
+    }
+    return false;
 }
 
-/*****************************************************************************
+/************************************************************************
  *
- *  Add an instruction referencing a register, a SVE Pattern and an immediate.
+ *  Convert a 'float 8-bit immediate' into a double.
+ *  inputs 'fpImm' a floatImm8 struct
  */
 
-void emitter::emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1, insSvePattern pattern, int imm)
+/*static*/ double emitter::emitDecodeFloatImm8(const emitter::floatImm8 fpImm)
 {
-    emitAttr  size     = EA_SIZE(attr);
-    emitAttr  elemsize = EA_UNKNOWN;
-    insFormat fmt      = IF_NONE;
+    unsigned sign  = fpImm.immSign;
+    unsigned exp   = fpImm.immExp ^ 0x4;
+    unsigned mant  = fpImm.immMant + 16;
+    unsigned scale = 16 * 8;
 
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
+    while (exp > 0)
     {
-        case INS_sve_cntb:
-        case INS_sve_cntd:
-        case INS_sve_cnth:
-        case INS_sve_cntw:
-            assert(isGeneralRegister(reg1));
-            assert(size == EA_8BYTE);
-            assert(isValidUimm4From1(imm));
-            fmt = IF_SVE_BL_1A;
-            break;
-
-        default:
-            unreached();
-            break;
-
-    } // end switch (ins)
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstrCns(attr, imm);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
+        scale /= 2;
+        exp--;
+    }
 
-    id->idReg1(reg1);
-    id->idSvePattern(pattern);
+    double result = ((double)mant) / ((double)scale);
+    if (sign == 1)
+    {
+        result = -result;
+    }
 
-    dispIns(id);
-    appendToCurIG(id);
+    return result;
 }
 
-/*****************************************************************************
+/************************************************************************
+ *
+ *  returns true if the 'immDbl' can be encoded using the 'float 8-bit immediate' form.
+ *  also returns the encoding if wbFPI is non-null
  *
- *  Add a memory barrier instruction with a 'barrier' immediate
  */
 
-void emitter::emitIns_BARR(instruction ins, insBarrier barrier)
+/*static*/ bool emitter::canEncodeFloatImm8(double immDbl, emitter::floatImm8* wbFPI)
 {
-    insFormat fmt = IF_NONE;
-    ssize_t   imm = 0;
+    bool   canEncode = false;
+    double val       = immDbl;
 
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
+    int sign = 0;
+    if (val < 0.0)
     {
-        case INS_dsb:
-        case INS_dmb:
-        case INS_isb:
-
-            fmt = IF_SI_0B;
-            imm = (ssize_t)barrier;
-            break;
-        default:
-            unreached();
-            break;
-    } // end switch (ins)
-
-    assert(fmt != IF_NONE);
+        val  = -val;
+        sign = 1;
+    }
 
-    instrDesc* id = emitNewInstrSC(EA_8BYTE, imm);
+    int exp = 0;
+    while ((val < 1.0) && (exp >= -4))
+    {
+        val *= 2.0;
+        exp--;
+    }
+    while ((val >= 2.0) && (exp <= 5))
+    {
+        val *= 0.5;
+        exp++;
+    }
+    exp += 3;
+    val *= 16.0;
+    int ival = (int)val;
 
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
+    if ((exp >= 0) && (exp <= 7))
+    {
+        if (val == (double)ival)
+        {
+            canEncode = true;
 
-    dispIns(id);
-    appendToCurIG(id);
-}
+            if (wbFPI != nullptr)
+            {
+                ival -= 16;
+                assert((ival >= 0) && (ival <= 15));
 
-/*****************************************************************************
- *
- *  Add an instruction with a static data member operand. If 'size' is 0, the
- *  instruction operates on the address of the static member instead of its
- *  value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
- */
+                wbFPI->immSign = sign;
+                wbFPI->immExp  = exp ^ 0x4;
+                wbFPI->immMant = ival;
+                unsigned imm8  = wbFPI->immFPIVal;
+                assert((imm8 >= 0) && (imm8 <= 0xff));
+            }
+        }
+    }
 
-void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
-{
-    NYI("emitIns_C");
+    return canEncode;
 }
 
 /*****************************************************************************
  *
- *  Add an instruction referencing stack-based local variable.
+ *  For the given 'ins' returns the reverse instruction
+ *  if one exists, otherwise returns INS_INVALID
  */
 
-void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
-{
-    NYI("emitIns_S");
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing a register and a stack-based local variable.
- */
-void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+/*static*/ instruction emitter::insReverse(instruction ins)
 {
-    emitAttr  size     = EA_SIZE(attr);
-    insFormat fmt      = IF_NONE;
-    int       disp     = 0;
-    unsigned  scale    = 0;
-    bool      isLdrStr = false;
-
-    assert(offs >= 0);
-
-    // TODO-ARM64-CQ: use unscaled loads?
-    /* Figure out the encoding format of the instruction */
     switch (ins)
     {
-        case INS_strb:
-        case INS_ldrb:
-        case INS_ldrsb:
-            scale = 0;
-            break;
-
-        case INS_strh:
-        case INS_ldrh:
-        case INS_ldrsh:
-            scale = 1;
-            break;
-
-        case INS_ldrsw:
-            scale = 2;
-            break;
-
-        case INS_str:
-        case INS_ldr:
-            assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size));
-            scale    = genLog2(EA_SIZE_IN_BYTES(size));
-            isLdrStr = true;
-            break;
+        case INS_add:
+            return INS_sub;
+        case INS_adds:
+            return INS_subs;
 
-        case INS_lea:
-            assert(size == EA_8BYTE);
-            scale = 0;
-            break;
+        case INS_sub:
+            return INS_add;
+        case INS_subs:
+            return INS_adds;
 
-        default:
-            NYI("emitIns_R_S"); // FP locals?
-            return;
+        case INS_cmp:
+            return INS_cmn;
+        case INS_cmn:
+            return INS_cmp;
 
-    } // end switch (ins)
+        case INS_ccmp:
+            return INS_ccmn;
+        case INS_ccmn:
+            return INS_ccmp;
 
-    /* Figure out the variable's frame position */
-    ssize_t imm;
-    int     base;
-    bool    FPbased;
+        default:
+            return INS_invalid;
+    }
+}
 
-    base = emitComp->lvaFrameAddress(varx, &FPbased);
-    disp = base + offs;
-    assert((scale >= 0) && (scale <= 4));
+/*****************************************************************************
+ *
+ *  For the given 'datasize' and 'elemsize', make the proper arrangement option
+ *  returns the insOpts that specifies the vector register arrangement
+ *  if one does not exist returns INS_OPTS_NONE
+ */
 
-    bool      useRegForImm = false;
-    regNumber reg2         = FPbased ? REG_FPBASE : REG_SPBASE;
-    reg2                   = encodingSPtoZR(reg2);
+/*static*/ insOpts emitter::optMakeArrangement(emitAttr datasize, emitAttr elemsize)
+{
+    insOpts result = INS_OPTS_NONE;
 
-    if (ins == INS_lea)
+    if (datasize == EA_8BYTE)
     {
-        if (disp >= 0)
+        switch (elemsize)
         {
-            ins = INS_add;
-            imm = disp;
+            case EA_1BYTE:
+                result = INS_OPTS_8B;
+                break;
+            case EA_2BYTE:
+                result = INS_OPTS_4H;
+                break;
+            case EA_4BYTE:
+                result = INS_OPTS_2S;
+                break;
+            case EA_8BYTE:
+                result = INS_OPTS_1D;
+                break;
+            default:
+                unreached();
+                break;
         }
-        else
+    }
+    else if (datasize == EA_16BYTE)
+    {
+        switch (elemsize)
         {
-            ins = INS_sub;
-            imm = -disp;
+            case EA_1BYTE:
+                result = INS_OPTS_16B;
+                break;
+            case EA_2BYTE:
+                result = INS_OPTS_8H;
+                break;
+            case EA_4BYTE:
+                result = INS_OPTS_4S;
+                break;
+            case EA_8BYTE:
+                result = INS_OPTS_2D;
+                break;
+            default:
+                unreached();
+                break;
         }
+    }
+    return result;
+}
 
-        if (imm <= 0x0fff)
-        {
-            fmt = IF_DI_2A; // add reg1,reg2,#disp
-        }
-        else
+/*****************************************************************************
+ *
+ *  For the given 'datasize' and arrangement 'opts'
+ *  returns true is the pair specifies a valid arrangement
+ */
+/*static*/ bool emitter::isValidArrangement(emitAttr datasize, insOpts opt)
+{
+    if (datasize == EA_8BYTE)
+    {
+        if ((opt == INS_OPTS_8B) || (opt == INS_OPTS_4H) || (opt == INS_OPTS_2S) || (opt == INS_OPTS_1D))
         {
-            regNumber rsvdReg = codeGen->rsGetRsvdReg();
-            codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
-            fmt = IF_DR_3A; // add reg1,reg2,rsvdReg
+            return true;
         }
     }
-    else
+    else if (datasize == EA_16BYTE)
     {
-        ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
-        imm          = disp;
-        if (imm == 0)
-        {
-            fmt = IF_LS_2A;
-        }
-        else if ((imm < 0) || ((imm & mask) != 0))
-        {
-            if ((imm >= -256) && (imm <= 255))
-            {
-                fmt = IF_LS_2C;
-            }
-            else
-            {
-                useRegForImm = true;
-            }
-        }
-        else if (imm > 0)
-        {
-            if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
-            {
-                imm >>= scale; // The immediate is scaled by the size of the ld/st
-
-                fmt = IF_LS_2B;
-            }
-            else
-            {
-                useRegForImm = true;
-            }
-        }
-
-        if (useRegForImm)
+        if ((opt == INS_OPTS_16B) || (opt == INS_OPTS_8H) || (opt == INS_OPTS_4S) || (opt == INS_OPTS_2D))
         {
-            regNumber rsvdReg = codeGen->rsGetRsvdReg();
-            codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
-            fmt = IF_LS_3A;
+            return true;
         }
     }
+    return false;
+}
 
-    assert(fmt != IF_NONE);
+//------------------------------------------------------------------------
+// insGetRegisterListSize: Returns a size of the register list a given instruction operates on.
+//
+// Arguments:
+//   ins - An instruction which uses a register list
+//         (e.g. ld1 (2 registers), ld1r, st1, tbl, tbx).
+//
+// Return value:
+//   A number of consecutive SIMD and floating-point registers the instruction loads to/store from.
+//
+/*static*/ unsigned emitter::insGetRegisterListSize(instruction ins)
+{
+    unsigned registerListSize = 0;
 
-    // Try to optimize a load/store with an alternative instruction.
-    if (isLdrStr && emitComp->opts.OptimizationEnabled() &&
-        OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs DEBUG_ARG(useRegForImm)))
+    switch (ins)
     {
-        return;
-    }
+        case INS_ld1:
+        case INS_ld1r:
+        case INS_st1:
+        case INS_tbl:
+        case INS_tbx:
+            registerListSize = 1;
+            break;
 
-    instrDesc* id = emitNewInstrCns(attr, imm);
+        case INS_ld1_2regs:
+        case INS_ld2:
+        case INS_ld2r:
+        case INS_st1_2regs:
+        case INS_st2:
+        case INS_tbl_2regs:
+        case INS_tbx_2regs:
+            registerListSize = 2;
+            break;
 
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
+        case INS_ld1_3regs:
+        case INS_ld3:
+        case INS_ld3r:
+        case INS_st1_3regs:
+        case INS_st3:
+        case INS_tbl_3regs:
+        case INS_tbx_3regs:
+            registerListSize = 3;
+            break;
 
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
-    id->idSetIsLclVar();
+        case INS_ld1_4regs:
+        case INS_ld4:
+        case INS_ld4r:
+        case INS_st1_4regs:
+        case INS_st4:
+        case INS_tbl_4regs:
+        case INS_tbx_4regs:
+            registerListSize = 4;
+            break;
 
-#ifdef DEBUG
-    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
-#endif
+        default:
+            assert(!"Unexpected instruction");
+            break;
+    }
 
-    dispIns(id);
-    appendToCurIG(id);
+    return registerListSize;
 }
 
-/*****************************************************************************
- *
- *  Add an instruction referencing two register and consecutive stack-based local variable slots.
- */
-void emitter::emitIns_R_R_S_S(
-    instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
+//  For the given 'arrangement' returns the 'datasize' specified by the vector register arrangement
+//  asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
+//
+/*static*/ emitAttr emitter::optGetDatasize(insOpts arrangement)
 {
-    assert((ins == INS_ldp) || (ins == INS_ldnp));
-    assert(EA_8BYTE == EA_SIZE(attr1));
-    assert(EA_8BYTE == EA_SIZE(attr2));
-    assert(isGeneralRegisterOrZR(reg1));
-    assert(isGeneralRegisterOrZR(reg2));
-    assert(offs >= 0);
-
-    insFormat      fmt   = IF_LS_3B;
-    int            disp  = 0;
-    const unsigned scale = 3;
-
-    /* Figure out the variable's frame position */
-    int  base;
-    bool FPbased;
-
-    base = emitComp->lvaFrameAddress(varx, &FPbased);
-    disp = base + offs;
-
-    // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead?
-    regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE;
-    reg3           = encodingSPtoZR(reg3);
-
-    bool    useRegForAdr = true;
-    ssize_t imm          = disp;
-    ssize_t mask         = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
-    if (imm == 0)
+    if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_2S) ||
+        (arrangement == INS_OPTS_1D))
     {
-        useRegForAdr = false;
+        return EA_8BYTE;
+    }
+    else if ((arrangement == INS_OPTS_16B) || (arrangement == INS_OPTS_8H) || (arrangement == INS_OPTS_4S) ||
+             (arrangement == INS_OPTS_2D))
+    {
+        return EA_16BYTE;
     }
     else
     {
-        if ((imm & mask) == 0)
-        {
-            ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st
-
-            if ((immShift >= -64) && (immShift <= 63))
-            {
-                fmt          = IF_LS_3C;
-                useRegForAdr = false;
-                imm          = immShift;
-            }
-        }
+        assert(!" invalid 'arrangement' value");
+        return EA_UNKNOWN;
     }
+}
 
-    if (useRegForAdr)
+//  For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement
+//  asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
+//
+/*static*/ emitAttr emitter::optGetElemsize(insOpts arrangement)
+{
+    if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_16B))
+    {
+        return EA_1BYTE;
+    }
+    else if ((arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_8H))
     {
-        regNumber rsvd = codeGen->rsGetRsvdReg();
-        emitIns_R_R_Imm(INS_add, EA_PTRSIZE, rsvd, reg3, imm);
-        reg3 = rsvd;
-        imm  = 0;
+        return EA_2BYTE;
     }
-
-    assert(fmt != IF_NONE);
-
-    instrDesc* id = emitNewInstrCns(attr1, imm);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
-
-    // Record the attribute for the second register in the pair
-    if (EA_IS_GCREF(attr2))
+    else if ((arrangement == INS_OPTS_2S) || (arrangement == INS_OPTS_4S))
     {
-        id->idGCrefReg2(GCT_GCREF);
+        return EA_4BYTE;
     }
-    else if (EA_IS_BYREF(attr2))
+    else if ((arrangement == INS_OPTS_1D) || (arrangement == INS_OPTS_2D))
     {
-        id->idGCrefReg2(GCT_BYREF);
+        return EA_8BYTE;
     }
     else
     {
-        id->idGCrefReg2(GCT_NONE);
+        assert(!" invalid 'arrangement' value");
+        return EA_UNKNOWN;
     }
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-    id->idReg3(reg3);
-    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
-    id->idSetIsLclVar();
-
-#ifdef DEBUG
-    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
-#endif
-
-    dispIns(id);
-    appendToCurIG(id);
 }
 
-/*****************************************************************************
- *
- *  Add an instruction referencing a stack-based local variable and a register
- */
-void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+/*static*/ insOpts emitter::optWidenElemsizeArrangement(insOpts arrangement)
 {
-    assert(offs >= 0);
-    emitAttr  size          = EA_SIZE(attr);
-    insFormat fmt           = IF_NONE;
-    int       disp          = 0;
-    unsigned  scale         = 0;
-    bool      isVectorStore = false;
-    bool      isStr         = false;
-
-    // TODO-ARM64-CQ: use unscaled loads?
-    /* Figure out the encoding format of the instruction */
-    switch (ins)
+    if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_16B))
     {
-        case INS_strb:
-            scale = 0;
-            assert(isGeneralRegisterOrZR(reg1));
-            break;
-
-        case INS_strh:
-            scale = 1;
-            assert(isGeneralRegisterOrZR(reg1));
-            break;
-
-        case INS_str:
-            if (isGeneralRegisterOrZR(reg1))
-            {
-                assert(isValidGeneralDatasize(size));
-                scale = (size == EA_8BYTE) ? 3 : 2;
-            }
-            else
-            {
-                assert(isVectorRegister(reg1));
-                assert(isValidVectorLSDatasize(size));
-                scale         = NaturalScale_helper(size);
-                isVectorStore = true;
-            }
-            isStr = true;
-            break;
-
-        default:
-            NYI("emitIns_S_R"); // FP locals?
-            return;
-
-    } // end switch (ins)
-
-    /* Figure out the variable's frame position */
-    int  base;
-    bool FPbased;
-
-    base = emitComp->lvaFrameAddress(varx, &FPbased);
-    disp = base + offs;
-    assert(scale >= 0);
-    if (isVectorStore)
+        return INS_OPTS_8H;
+    }
+    else if ((arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_8H))
     {
-        assert(scale <= 4);
+        return INS_OPTS_4S;
+    }
+    else if ((arrangement == INS_OPTS_2S) || (arrangement == INS_OPTS_4S))
+    {
+        return INS_OPTS_2D;
     }
     else
     {
-        assert(scale <= 3);
+        assert(!" invalid 'arrangement' value");
+        return INS_OPTS_NONE;
     }
+}
 
-    // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead?
-    regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
-    reg2           = encodingSPtoZR(reg2);
-
-    bool    useRegForImm = false;
-    ssize_t imm          = disp;
-    ssize_t mask         = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
-    if (imm == 0)
+/*static*/ emitAttr emitter::widenDatasize(emitAttr datasize)
+{
+    if (datasize == EA_1BYTE)
     {
-        fmt = IF_LS_2A;
+        return EA_2BYTE;
     }
-    else if ((imm < 0) || ((imm & mask) != 0))
+    else if (datasize == EA_2BYTE)
     {
-        if ((imm >= -256) && (imm <= 255))
-        {
-            fmt = IF_LS_2C;
-        }
-        else
-        {
-            useRegForImm = true;
-        }
+        return EA_4BYTE;
     }
-    else if (imm > 0)
+    else if (datasize == EA_4BYTE)
     {
-        if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
-        {
-            imm >>= scale; // The immediate is scaled by the size of the ld/st
-
-            fmt = IF_LS_2B;
-        }
-        else
-        {
-            useRegForImm = true;
-        }
+        return EA_8BYTE;
     }
-
-    if (useRegForImm)
+    else
     {
-        // The reserved register is not stored in idReg3() since that field overlaps with iiaLclVar.
-        // It is instead implicit when idSetIsLclVar() is set, with this encoding format.
-        regNumber rsvdReg = codeGen->rsGetRsvdReg();
-        codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
-        fmt = IF_LS_3A;
+        assert(!" invalid 'datasize' value");
+        return EA_UNKNOWN;
     }
+}
 
-    assert(fmt != IF_NONE);
+//  For the given 'srcArrangement' returns the "widen" 'dstArrangement' specifying the destination vector register
+//  arrangement
+//  asserts and returns INS_OPTS_NONE if an invalid 'srcArrangement' value is passed
+//
+/*static*/ insOpts emitter::optWidenDstArrangement(insOpts srcArrangement)
+{
+    insOpts dstArrangement = INS_OPTS_NONE;
 
-    // Try to optimize a store with an alternative instruction.
-    if (isStr && emitComp->opts.OptimizationEnabled() &&
-        OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs DEBUG_ARG(useRegForImm)))
+    switch (srcArrangement)
     {
-        return;
-    }
-
-    instrDesc* id = emitNewInstrCns(attr, imm);
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
-    id->idSetIsLclVar();
+        case INS_OPTS_8B:
+            dstArrangement = INS_OPTS_4H;
+            break;
 
-#ifdef DEBUG
-    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
-#endif
+        case INS_OPTS_16B:
+            dstArrangement = INS_OPTS_8H;
+            break;
 
-    dispIns(id);
-    appendToCurIG(id);
-}
+        case INS_OPTS_4H:
+            dstArrangement = INS_OPTS_2S;
+            break;
 
-/*****************************************************************************
- *
- *  Add an instruction referencing consecutive stack-based local variable slots and two registers
- */
-void emitter::emitIns_S_S_R_R(
-    instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
-{
-    assert((ins == INS_stp) || (ins == INS_stnp));
-    assert(EA_8BYTE == EA_SIZE(attr1));
-    assert(EA_8BYTE == EA_SIZE(attr2));
-    assert(isGeneralRegisterOrZR(reg1));
-    assert(isGeneralRegisterOrZR(reg2));
-    assert(offs >= 0);
+        case INS_OPTS_8H:
+            dstArrangement = INS_OPTS_4S;
+            break;
 
-    insFormat      fmt   = IF_LS_3B;
-    int            disp  = 0;
-    const unsigned scale = 3;
+        case INS_OPTS_2S:
+            dstArrangement = INS_OPTS_1D;
+            break;
 
-    /* Figure out the variable's frame position */
-    int  base;
-    bool FPbased;
+        case INS_OPTS_4S:
+            dstArrangement = INS_OPTS_2D;
+            break;
 
-    base = emitComp->lvaFrameAddress(varx, &FPbased);
-    disp = base + offs;
+        default:
+            assert(!" invalid 'srcArrangement' value");
+            break;
+    }
 
-    // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead?
-    regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE;
+    return dstArrangement;
+}
 
-    bool    useRegForAdr = true;
-    ssize_t imm          = disp;
-    ssize_t mask         = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
-    if (imm == 0)
-    {
-        useRegForAdr = false;
-    }
-    else
+//  For the given 'conversion' returns the 'dstsize' specified by the conversion option
+/*static*/ emitAttr emitter::optGetDstsize(insOpts conversion)
+{
+    switch (conversion)
     {
-        if ((imm & mask) == 0)
-        {
-            ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st
+        case INS_OPTS_S_TO_8BYTE:
+        case INS_OPTS_D_TO_8BYTE:
+        case INS_OPTS_4BYTE_TO_D:
+        case INS_OPTS_8BYTE_TO_D:
+        case INS_OPTS_S_TO_D:
+        case INS_OPTS_H_TO_D:
 
-            if ((immShift >= -64) && (immShift <= 63))
-            {
-                fmt          = IF_LS_3C;
-                useRegForAdr = false;
-                imm          = immShift;
-            }
-        }
-    }
+            return EA_8BYTE;
 
-    if (useRegForAdr)
-    {
-        regNumber rsvd = codeGen->rsGetRsvdReg();
-        emitIns_R_R_Imm(INS_add, EA_PTRSIZE, rsvd, reg3, imm);
-        reg3 = rsvd;
-        imm  = 0;
-    }
+        case INS_OPTS_S_TO_4BYTE:
+        case INS_OPTS_D_TO_4BYTE:
+        case INS_OPTS_4BYTE_TO_S:
+        case INS_OPTS_8BYTE_TO_S:
+        case INS_OPTS_D_TO_S:
+        case INS_OPTS_H_TO_S:
 
-    assert(fmt != IF_NONE);
+            return EA_4BYTE;
 
-    instrDesc* id = emitNewInstrCns(attr1, imm);
+        case INS_OPTS_S_TO_H:
+        case INS_OPTS_D_TO_H:
 
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
+            return EA_2BYTE;
 
-    // Record the attribute for the second register in the pair
-    if (EA_IS_GCREF(attr2))
-    {
-        id->idGCrefReg2(GCT_GCREF);
-    }
-    else if (EA_IS_BYREF(attr2))
-    {
-        id->idGCrefReg2(GCT_BYREF);
+        default:
+            assert(!" invalid 'conversion' value");
+            return EA_UNKNOWN;
     }
-    else
+}
+
+//  For the given 'conversion' returns the 'srcsize' specified by the conversion option
+/*static*/ emitAttr emitter::optGetSrcsize(insOpts conversion)
+{
+    switch (conversion)
     {
-        id->idGCrefReg2(GCT_NONE);
-    }
+        case INS_OPTS_D_TO_8BYTE:
+        case INS_OPTS_D_TO_4BYTE:
+        case INS_OPTS_8BYTE_TO_D:
+        case INS_OPTS_8BYTE_TO_S:
+        case INS_OPTS_D_TO_S:
+        case INS_OPTS_D_TO_H:
 
-    reg3 = encodingSPtoZR(reg3);
+            return EA_8BYTE;
 
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-    id->idReg3(reg3);
-    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
-    id->idSetIsLclVar();
+        case INS_OPTS_S_TO_8BYTE:
+        case INS_OPTS_S_TO_4BYTE:
+        case INS_OPTS_4BYTE_TO_S:
+        case INS_OPTS_4BYTE_TO_D:
+        case INS_OPTS_S_TO_D:
+        case INS_OPTS_S_TO_H:
 
-#ifdef DEBUG
-    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
-#endif
+            return EA_4BYTE;
 
-    dispIns(id);
-    appendToCurIG(id);
-}
+        case INS_OPTS_H_TO_S:
+        case INS_OPTS_H_TO_D:
 
-/*****************************************************************************
- *
- *  Add an instruction referencing stack-based local variable and an immediate
- */
-void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
-{
-    NYI("emitIns_S_I");
+            return EA_2BYTE;
+
+        default:
+            assert(!" invalid 'conversion' value");
+            return EA_UNKNOWN;
+    }
 }
 
-/*****************************************************************************
- *
- *  Add an instruction with a register + static member operands.
- *  Constant is stored into JIT data which is adjacent to code.
- *  No relocation is needed. PC-relative offset will be encoded directly into instruction.
- *
- */
-void emitter::emitIns_R_C(
-    instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs)
+//    For the given 'size' and 'index' returns true if it specifies a valid index for a vector register of 'size'
+/*static*/ bool emitter::isValidVectorIndex(emitAttr datasize, emitAttr elemsize, ssize_t index)
 {
-    assert(offs >= 0);
-    assert(instrDesc::fitsInSmallCns(offs));
-
-    emitAttr      size = EA_SIZE(attr);
-    insFormat     fmt  = IF_NONE;
-    instrDescJmp* id   = emitNewInstrJmp();
+    assert(isValidVectorDatasize(datasize));
+    assert(isValidVectorElemsize(elemsize));
 
-    switch (ins)
+    bool result = false;
+    if (index >= 0)
     {
-        case INS_adr:
-            // This is case to get address to the constant data.
-            fmt = IF_LARGEADR;
-            assert(isGeneralRegister(reg));
-            assert(isValidGeneralDatasize(size));
-            break;
-
-        case INS_ldr:
-            fmt = IF_LARGELDC;
-            if (isVectorRegister(reg))
+        if (datasize == EA_8BYTE)
+        {
+            switch (elemsize)
             {
-                assert(isValidVectorLSDatasize(size));
-                // For vector (float/double) register, we should have an integer address reg to
-                // compute long address which consists of page address and page offset.
-                // For integer constant, this is not needed since the dest reg can be used to
-                // compute address as well as contain the final contents.
-                assert(isGeneralRegister(reg) || (addrReg != REG_NA));
+                case EA_1BYTE:
+                    result = (index < 8);
+                    break;
+                case EA_2BYTE:
+                    result = (index < 4);
+                    break;
+                case EA_4BYTE:
+                    result = (index < 2);
+                    break;
+                case EA_8BYTE:
+                    result = (index < 1);
+                    break;
+                default:
+                    unreached();
+                    break;
             }
-            else
+        }
+        else if (datasize == EA_16BYTE)
+        {
+            switch (elemsize)
             {
-                assert(isGeneralRegister(reg));
-                assert(isValidGeneralDatasize(size));
+                case EA_1BYTE:
+                    result = (index < 16);
+                    break;
+                case EA_2BYTE:
+                    result = (index < 8);
+                    break;
+                case EA_4BYTE:
+                    result = (index < 4);
+                    break;
+                case EA_8BYTE:
+                    result = (index < 2);
+                    break;
+                default:
+                    unreached();
+                    break;
             }
-            break;
-
-        default:
-            unreached();
+        }
     }
+    return result;
+}
 
-    assert(fmt != IF_NONE);
+/*****************************************************************************
+ *
+ *  Add an instruction with no operands.
+ */
 
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
-    id->idSmallCns(offs);
-    id->idOpSize(size);
-    id->idAddr()->iiaFieldHnd = fldHnd;
-    id->idSetIsBound(); // We won't patch address since we will know the exact distance once JIT code and data are
-                        // allocated together.
+void emitter::emitIns(instruction ins)
+{
+    instrDesc* id  = emitNewInstrSmall(EA_8BYTE);
+    insFormat  fmt = emitInsFormat(ins);
 
-    id->idReg1(reg); // destination register that will get the constant value.
-    if (addrReg != REG_NA)
+    if (ins != INS_BREAKPOINT)
     {
-        id->idReg2(addrReg); // integer register to compute long address (used for vector dest when we end up with long
-                             // address)
+        assert(fmt == IF_SN_0A);
     }
-    id->idjShort = false; // Assume loading constant from long address
-
-    // Keep it long if it's in cold code.
-    id->idjKeepLong = emitComp->fgIsBlockCold(emitComp->compCurBB);
-
-#ifdef DEBUG
-    if (emitComp->opts.compLongAddress)
-        id->idjKeepLong = 1;
-#endif // DEBUG
-
-    // If it's possible to be shortened, then put it in jump list
-    // to be revisited by emitJumpDistBind.
-    if (!id->idjKeepLong)
-    {
-        /* Record the jump's IG and offset within it */
-        id->idjIG   = emitCurIG;
-        id->idjOffs = emitCurIGsize;
-
-        /* Append this jump to this IG's jump list */
-        id->idjNext      = emitCurIGjmpList;
-        emitCurIGjmpList = id;
 
-#if EMITTER_STATS
-        emitTotalIGjmps++;
-#endif
-    }
+    id->idIns(ins);
+    id->idInsFmt(fmt);
 
     dispIns(id);
     appendToCurIG(id);
@@ -14874,212 +3663,350 @@ void emitter::emitIns_R_C(
 
 /*****************************************************************************
  *
- *  Add an instruction with a static member + constant.
- */
-
-void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, ssize_t val)
-{
-    NYI("emitIns_C_I");
-}
-
-/*****************************************************************************
- *
- *  Add an instruction with a static member + register operands.
+ *  Add an instruction with a single immediate value.
  */
 
-void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
-{
-    assert(!"emitIns_C_R not supported for RyuJIT backend");
-}
-
-void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs)
-{
-    NYI("emitIns_R_AR");
-}
-
-// This computes address from the immediate which is relocatable.
-void emitter::emitIns_R_AI(instruction ins,
-                           emitAttr    attr,
-                           regNumber   ireg,
-                           ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
+void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm)
 {
-    assert(EA_IS_RELOC(attr));
-    emitAttr      size    = EA_SIZE(attr);
-    insFormat     fmt     = IF_DI_1E;
-    bool          needAdd = false;
-    instrDescJmp* id      = emitNewInstrJmp();
+    insFormat fmt = IF_NONE;
 
-    switch (ins)
+    /* Figure out the encoding format of the instruction */
+    if (ins == INS_BREAKPOINT)
     {
-        case INS_adrp:
-            // This computes page address.
-            // page offset is needed using add.
-            needAdd = true;
-            break;
-        case INS_adr:
-            break;
-        default:
-            unreached();
+        if ((imm & 0x0000ffff) == imm)
+        {
+            fmt = IF_SI_0A;
+        }
+        else
+        {
+            assert(!"Instruction cannot be encoded: IF_SI_0A");
+        }
+    }
+    else
+    {
+        // fallback to emit SVE instructions.
+        return emitInsSve_I(ins, attr, imm);
     }
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrSC(attr, imm);
 
     id->idIns(ins);
     id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
-    id->idOpSize(size);
-    id->idAddr()->iiaAddr = (BYTE*)addr;
-    id->idReg1(ireg);
-    id->idSetIsDspReloc();
-#ifdef DEBUG
-    id->idDebugOnlyInfo()->idMemCookie = targetHandle;
-    id->idDebugOnlyInfo()->idFlags     = gtFlags;
-#endif
 
     dispIns(id);
     appendToCurIG(id);
+}
 
-    if (needAdd)
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a single register.
+ */
+
+void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts opt /* = INS_OPTS_NONE */)
+{
+    insFormat fmt = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
     {
-        // add reg, reg, imm
-        ins           = INS_add;
-        fmt           = IF_DI_2A;
-        instrDesc* id = emitNewInstr(attr);
-        assert(id->idIsReloc());
+        case INS_br:
+        case INS_ret:
+            assert(isGeneralRegister(reg));
+            fmt = IF_BR_1A;
+            break;
 
-        id->idIns(ins);
-        id->idInsFmt(fmt);
-        id->idInsOpt(INS_OPTS_NONE);
-        id->idOpSize(size);
-        id->idAddr()->iiaAddr = (BYTE*)addr;
-        id->idReg1(ireg);
-        id->idReg2(ireg);
+        case INS_dczva:
+            assert(isGeneralRegister(reg));
+            assert(attr == EA_8BYTE);
+            fmt = IF_SR_1A;
+            break;
 
-        dispIns(id);
-        appendToCurIG(id);
+        case INS_mrs_tpid0:
+            fmt = IF_SR_1A;
+            break;
+
+        default:
+            // fallback to emit SVE instructions.
+            return emitInsSve_R(ins, attr, reg, opt);
     }
-}
 
-void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs)
-{
-    NYI("emitIns_AR_R");
-}
+    assert(fmt != IF_NONE);
 
-void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
-{
-    NYI("emitIns_R_ARR");
-}
+    instrDesc* id = emitNewInstrSmall(attr);
 
-void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
-{
-    NYI("emitIns_R_ARR");
-}
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(reg);
 
-void emitter::emitIns_R_ARX(
-    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp)
-{
-    NYI("emitIns_R_ARR");
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- *  Record that a jump instruction uses the short encoding
- *
+ *  Add an instruction referencing a register and a constant.
  */
-void emitter::emitSetShortJump(instrDescJmp* id)
+
+void emitter::emitIns_R_I(instruction ins,
+                          emitAttr    attr,
+                          regNumber   reg,
+                          ssize_t     imm,
+                          insOpts     opt,     /* = INS_OPTS_NONE */
+                          insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */
+                              DEBUGARG(size_t targetHandle /* = 0 */) DEBUGARG(GenTreeFlags gtFlags /* = GTF_EMPTY */))
 {
-    if (id->idjKeepLong)
-        return;
+    emitAttr  size      = EA_SIZE(attr);
+    emitAttr  elemsize  = EA_UNKNOWN;
+    insFormat fmt       = IF_NONE;
+    bool      canEncode = false;
 
-    insFormat fmt = IF_NONE;
-    if (emitIsCondJump(id))
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
     {
-        switch (id->idIns())
-        {
-            case INS_cbz:
-            case INS_cbnz:
-                fmt = IF_BI_1A;
+        bitMaskImm     bmi;
+        halfwordImm    hwi;
+        byteShiftedImm bsi;
+        ssize_t        notOfImm;
+
+        case INS_tst:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg));
+            bmi.immNRS = 0;
+            canEncode  = canEncodeBitMaskImm(imm, size, &bmi);
+            if (canEncode)
+            {
+                imm = bmi.immNRS;
+                assert(isValidImmNRS(imm, size));
+                fmt = IF_DI_1C;
+            }
+            break;
+
+        case INS_movk:
+        case INS_movn:
+        case INS_movz:
+            assert(isValidGeneralDatasize(size));
+            assert(insOptsNone(opt)); // No LSL here (you must use emitIns_R_I_I if a shift is needed)
+            assert(isGeneralRegister(reg));
+            assert(isValidUimm<16>(imm));
+
+            hwi.immHW  = 0;
+            hwi.immVal = imm;
+            assert(imm == emitDecodeHalfwordImm(hwi, size));
+
+            imm       = hwi.immHWVal;
+            canEncode = true;
+            fmt       = IF_DI_1B;
+            break;
+
+        case INS_mov:
+            assert(isValidGeneralDatasize(size));
+            assert(insOptsNone(opt)); // No explicit LSL here
+            // We will automatically determine the shift based upon the imm
+
+            // First try the standard 'halfword immediate' imm(i16,hw)
+            hwi.immHWVal = 0;
+            canEncode    = canEncodeHalfwordImm(imm, size, &hwi);
+            if (canEncode)
+            {
+                // uses a movz encoding
+                assert(isGeneralRegister(reg));
+                imm = hwi.immHWVal;
+                assert(isValidImmHWVal(imm, size));
+                fmt = IF_DI_1B;
                 break;
-            case INS_tbz:
-            case INS_tbnz:
-                fmt = IF_BI_1B;
+            }
+
+            // Next try the ones-complement form of 'halfword immediate' imm(i16,hw)
+            notOfImm  = NOT_helper(imm, getBitWidth(size));
+            canEncode = canEncodeHalfwordImm(notOfImm, size, &hwi);
+            if (canEncode)
+            {
+                assert(isGeneralRegister(reg));
+                imm = hwi.immHWVal;
+                ins = INS_movn; // uses a movn encoding
+                assert(isValidImmHWVal(imm, size));
+                fmt = IF_DI_1B;
                 break;
-            default:
-                fmt = IF_BI_0B;
+            }
+
+            // Finally try the 'bitmask immediate' imm(N,r,s)
+            bmi.immNRS = 0;
+            canEncode  = canEncodeBitMaskImm(imm, size, &bmi);
+            if (canEncode)
+            {
+                assert(isGeneralRegisterOrSP(reg));
+                reg = encodingSPtoZR(reg);
+                imm = bmi.immNRS;
+                assert(isValidImmNRS(imm, size));
+                fmt = IF_DI_1D;
                 break;
-        }
-    }
-    else if (emitIsLoadLabel(id))
-    {
-        fmt = IF_DI_1E;
-    }
-    else if (emitIsLoadConstant(id))
-    {
-        fmt = IF_LS_1A;
-    }
-    else
-    {
-        unreached();
-    }
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded: mov imm");
+            }
+
+            break;
+
+        case INS_movi:
+            assert(isValidVectorDatasize(size));
+            assert(isVectorRegister(reg));
+            if (insOptsNone(opt) && (size == EA_8BYTE))
+            {
+                opt = INS_OPTS_1D;
+            }
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+
+            if (elemsize == EA_8BYTE)
+            {
+                size_t   uimm = imm;
+                ssize_t  imm8 = 0;
+                unsigned pos  = 0;
+                canEncode     = true;
+                while (uimm != 0)
+                {
+                    INT64 loByte = uimm & 0xFF;
+                    if (((loByte == 0) || (loByte == 0xFF)) && (pos < 8))
+                    {
+                        if (loByte == 0xFF)
+                        {
+                            imm8 |= (ssize_t{1} << pos);
+                        }
+                        uimm >>= 8;
+                        pos++;
+                    }
+                    else
+                    {
+                        canEncode = false;
+                        break;
+                    }
+                }
+                imm = imm8;
+                assert(isValidUimm<8>(imm));
+                fmt = IF_DV_1B;
+                break;
+            }
+            else
+            {
+                // Vector operation
+
+                // No explicit LSL/MSL is used for the immediate
+                // We will automatically determine the shift based upon the value of imm
+
+                // First try the standard 'byteShifted immediate' imm(i8,bySh)
+                bsi.immBSVal = 0;
+                canEncode    = canEncodeByteShiftedImm(imm, elemsize, true, &bsi);
+                if (canEncode)
+                {
+                    imm = bsi.immBSVal;
+                    assert(isValidImmBSVal(imm, size));
+                    fmt = IF_DV_1B;
+                    break;
+                }
 
-    id->idInsFmt(fmt);
-    id->idjShort = true;
-}
+                // Next try the ones-complement form of the 'immediate' imm(i8,bySh)
+                if ((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)) // Only EA_2BYTE or EA_4BYTE forms
+                {
+                    notOfImm  = NOT_helper(imm, getBitWidth(elemsize));
+                    canEncode = canEncodeByteShiftedImm(notOfImm, elemsize, true, &bsi);
+                    if (canEncode)
+                    {
+                        imm = bsi.immBSVal;
+                        ins = INS_mvni; // uses a mvni encoding
+                        assert(isValidImmBSVal(imm, size));
+                        fmt = IF_DV_1B;
+                        break;
+                    }
+                }
+            }
+            break;
 
-/*****************************************************************************
- *
- *  Add a label instruction.
- */
+        case INS_orr:
+        case INS_bic:
+        case INS_mvni:
+            assert(isValidVectorDatasize(size));
+            assert(isVectorRegister(reg));
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms
 
-void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
-{
-    assert(dst->HasFlag(BBF_HAS_LABEL));
+            // Vector operation
 
-    insFormat fmt = IF_NONE;
+            // No explicit LSL/MSL is used for the immediate
+            // We will automatically determine the shift based upon the value of imm
 
-    switch (ins)
-    {
-        case INS_adr:
-            fmt = IF_LARGEADR;
+            // First try the standard 'byteShifted immediate' imm(i8,bySh)
+            bsi.immBSVal = 0;
+            canEncode    = canEncodeByteShiftedImm(imm, elemsize,
+                                                   (ins == INS_mvni), // mvni supports the ones shifting variant (aka MSL)
+                                                   &bsi);
+            if (canEncode)
+            {
+                imm = bsi.immBSVal;
+                assert(isValidImmBSVal(imm, size));
+                fmt = IF_DV_1B;
+                break;
+            }
             break;
-        default:
-            unreached();
-    }
-
-    instrDescJmp* id = emitNewInstrJmp();
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idjShort             = false;
-    id->idAddr()->iiaBBlabel = dst;
-    id->idReg1(reg);
-    id->idOpSize(EA_PTRSIZE);
 
-#ifdef DEBUG
-    // Mark the catch return
-    if (emitComp->compCurBB->KindIs(BBJ_EHCATCHRET))
-    {
-        id->idDebugOnlyInfo()->idCatchRet = true;
-    }
-#endif // DEBUG
+        case INS_cmp:
+        case INS_cmn:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg));
 
-    id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+            if (unsigned_abs(imm) <= 0x0fff)
+            {
+                if (imm < 0)
+                {
+                    ins = insReverse(ins);
+                    imm = -imm;
+                }
+                assert(isValidUimm<12>(imm));
+                canEncode = true;
+                fmt       = IF_DI_1A;
+            }
+            else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding
+            {
+                // Encoding will use a 12-bit left shift of the immediate
+                opt = INS_OPTS_LSL12;
+                if (imm < 0)
+                {
+                    ins = insReverse(ins);
+                    imm = -imm;
+                }
+                assert((imm & 0xfff) == 0);
+                imm >>= 12;
+                assert(isValidUimm<12>(imm));
+                canEncode = true;
+                fmt       = IF_DI_1A;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded: IF_DI_1A");
+            }
+            break;
 
-#ifdef DEBUG
-    if (emitComp->opts.compLongAddress)
-        id->idjKeepLong = 1;
-#endif // DEBUG
+        default:
+            // fallback to emit SVE instructions.
+            return emitInsSve_R_I(ins, attr, reg, imm, opt, sopt);
+    } // end switch (ins)
 
-    /* Record the jump's IG and offset within it */
+    assert(canEncode);
+    assert(fmt != IF_NONE);
 
-    id->idjIG   = emitCurIG;
-    id->idjOffs = emitCurIGsize;
+    instrDesc* id = emitNewInstrSC(attr, imm);
 
-    /* Append this jump to this IG's jump list */
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
 
-    id->idjNext      = emitCurIGjmpList;
-    emitCurIGjmpList = id;
+    id->idReg1(reg);
 
-#if EMITTER_STATS
-    emitTotalIGjmps++;
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idMemCookie = targetHandle;
+    id->idDebugOnlyInfo()->idFlags     = gtFlags;
 #endif
 
     dispIns(id);
@@ -15088,12171 +4015,10814 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu
 
 /*****************************************************************************
  *
- *  Add a data label instruction.
+ *  Add an instruction referencing a register and a floating point constant.
  */
 
-void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg)
-{
-    NYI("emitIns_R_D");
-}
+void emitter::emitIns_R_F(
+    instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt /* = INS_OPTS_NONE */)
 
-void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
 {
-    assert((ins == INS_cbz) || (ins == INS_cbnz));
-
-    assert(dst != nullptr);
-    assert(dst->HasFlag(BBF_HAS_LABEL));
-
-    insFormat fmt = IF_LARGEJMP;
-
-    instrDescJmp* id = emitNewInstrJmp();
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idReg1(reg);
-    id->idjShort = false;
-    id->idOpSize(EA_SIZE(attr));
-
-    id->idAddr()->iiaBBlabel = dst;
-    id->idjKeepLong          = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
-
-    /* Record the jump's IG and offset within it */
-
-    id->idjIG   = emitCurIG;
-    id->idjOffs = emitCurIGsize;
-
-    /* Append this jump to this IG's jump list */
-
-    id->idjNext      = emitCurIGjmpList;
-    emitCurIGjmpList = id;
-
-#if EMITTER_STATS
-    emitTotalIGjmps++;
-#endif
+    emitAttr  size      = EA_SIZE(attr);
+    emitAttr  elemsize  = EA_UNKNOWN;
+    insFormat fmt       = IF_NONE;
+    ssize_t   imm       = 0;
+    bool      canEncode = false;
 
-    dispIns(id);
-    appendToCurIG(id);
-}
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        floatImm8 fpi;
 
-void emitter::emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg, int imm)
-{
-    assert((ins == INS_tbz) || (ins == INS_tbnz));
+        case INS_fcmp:
+        case INS_fcmpe:
+            assert(insOptsNone(opt));
+            assert(isValidVectorElemsizeFloat(size));
+            assert(isVectorRegister(reg));
+            if (immDbl == 0.0)
+            {
+                canEncode = true;
+                fmt       = IF_DV_1C;
+            }
+            break;
 
-    assert(dst != nullptr);
-    assert(dst->HasFlag(BBF_HAS_LABEL));
-    assert((EA_SIZE(attr) == EA_4BYTE) || (EA_SIZE(attr) == EA_8BYTE));
-    assert(imm < ((EA_SIZE(attr) == EA_4BYTE) ? 32 : 64));
+        case INS_fmov:
+            assert(isVectorRegister(reg));
+            fpi.immFPIVal = 0;
+            canEncode     = canEncodeFloatImm8(immDbl, &fpi);
 
-    insFormat fmt = IF_LARGEJMP;
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
 
-    instrDescJmp* id = emitNewInstrJmp();
+                if (canEncode)
+                {
+                    imm = fpi.immFPIVal;
+                    assert((imm >= 0) && (imm <= 0xff));
+                    fmt = IF_DV_1B;
+                }
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidVectorElemsizeFloat(size));
 
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idReg1(reg);
-    id->idjShort = false;
-    id->idSmallCns(imm);
-    id->idOpSize(EA_SIZE(attr));
+                if (canEncode)
+                {
+                    imm = fpi.immFPIVal;
+                    assert((imm >= 0) && (imm <= 0xff));
+                    fmt = IF_DV_1A;
+                }
+            }
+            break;
 
-    id->idAddr()->iiaBBlabel = dst;
-    id->idjKeepLong          = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+        default:
+            // fallback to emit SVE instructions.
+            return emitInsSve_R_F(ins, attr, reg, immDbl, opt);
 
-    /* Record the jump's IG and offset within it */
+    } // end switch (ins)
 
-    id->idjIG   = emitCurIG;
-    id->idjOffs = emitCurIGsize;
+    assert(canEncode);
+    assert(fmt != IF_NONE);
 
-    /* Append this jump to this IG's jump list */
+    instrDesc* id = emitNewInstrSC(attr, imm);
 
-    id->idjNext      = emitCurIGjmpList;
-    emitCurIGjmpList = id;
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
 
-#if EMITTER_STATS
-    emitTotalIGjmps++;
-#endif
+    id->idReg1(reg);
 
     dispIns(id);
     appendToCurIG(id);
 }
 
-void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
+//------------------------------------------------------------------------
+// emitIns_Mov: Emits a move instruction
+//
+// Arguments:
+//    ins       -- The instruction being emitted
+//    attr      -- The emit attribute
+//    dstReg    -- The destination register
+//    srcReg    -- The source register
+//    canSkip   -- true if the move can be elided when dstReg == srcReg, otherwise false
+//    insOpts   -- The instruction options
+//
+void emitter::emitIns_Mov(
+    instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */)
 {
-    insFormat fmt = IF_NONE;
+    assert(IsMovInstruction(ins));
 
-    if (dst != nullptr)
-    {
-        assert(dst->HasFlag(BBF_HAS_LABEL));
-    }
-    else
-    {
-        assert(instrCount != 0);
-    }
+    emitAttr  size     = EA_SIZE(attr);
+    emitAttr  elemsize = EA_UNKNOWN;
+    insFormat fmt      = IF_NONE;
 
     /* Figure out the encoding format of the instruction */
-
     switch (ins)
     {
-        case INS_bl_local:
-        case INS_b:
-            // Unconditional jump is a single form.
-            // Assume is long in case we cross hot/cold sections.
-            fmt = IF_BI_0A;
-            break;
-
-        case INS_beq:
-        case INS_bne:
-        case INS_bhs:
-        case INS_blo:
-        case INS_bmi:
-        case INS_bpl:
-        case INS_bvs:
-        case INS_bvc:
-        case INS_bhi:
-        case INS_bls:
-        case INS_bge:
-        case INS_blt:
-        case INS_bgt:
-        case INS_ble:
-            // Assume conditional jump is long.
-            fmt = IF_LARGEJMP;
-            break;
-
-        default:
-            unreached();
-            break;
-    }
-
-    instrDescJmp* id = emitNewInstrJmp();
+        case INS_mov:
+        {
+            assert(insOptsNone(opt));
 
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idjShort = false;
+            if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip))
+            {
+                // These instructions have no side effect and can be skipped
+                return;
+            }
 
-#ifdef DEBUG
-    // Mark the finally call
-    if (ins == INS_bl_local && emitComp->compCurBB->KindIs(BBJ_CALLFINALLY))
-    {
-        id->idDebugOnlyInfo()->idFinallyCall = true;
-    }
-#endif // DEBUG
+            // Check for the 'mov' aliases for the vector registers
+            if (isVectorRegister(dstReg))
+            {
+                if (isVectorRegister(srcReg) && isValidVectorDatasize(size))
+                {
+                    return emitIns_R_R_R(INS_mov, size, dstReg, srcReg, srcReg);
+                }
+                else
+                {
+                    return emitIns_R_R_I(INS_mov, size, dstReg, srcReg, 0);
+                }
+            }
+            else
+            {
+                if (isVectorRegister(srcReg))
+                {
+                    assert(isGeneralRegister(dstReg));
+                    return emitIns_R_R_I(INS_mov, size, dstReg, srcReg, 0);
+                }
+            }
 
-    if (dst != nullptr)
-    {
-        id->idAddr()->iiaBBlabel = dst;
+            // Is this a MOV to/from SP instruction?
+            if ((dstReg == REG_SP) || (srcReg == REG_SP))
+            {
+                assert(isGeneralRegisterOrSP(dstReg));
+                assert(isGeneralRegisterOrSP(srcReg));
+                dstReg = encodingSPtoZR(dstReg);
+                srcReg = encodingSPtoZR(srcReg);
+                fmt    = IF_DR_2G;
+            }
+            else
+            {
+                assert(insOptsNone(opt));
+                assert(isGeneralRegister(dstReg));
+                assert(isGeneralRegisterOrZR(srcReg));
+                fmt = IF_DR_2E;
+            }
+            break;
+        }
 
-        // Skip unconditional jump that has a single form.
-        // The target needs to be relocated.
-        id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+        case INS_sxtw:
+        {
+            assert((size == EA_8BYTE) || (size == EA_4BYTE));
+            FALLTHROUGH;
+        }
 
-#ifdef DEBUG
-        if (emitComp->opts.compLongAddress) // Force long branches
+        case INS_sxtb:
+        case INS_sxth:
+        case INS_uxtb:
+        case INS_uxth:
         {
-            id->idjKeepLong = true;
+            if (canSkip && (dstReg == srcReg))
+            {
+                // There are scenarios such as in genCallInstruction where the sign/zero extension should be elided
+                return;
+            }
+
+            assert(insOptsNone(opt));
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(dstReg));
+            assert(isGeneralRegister(srcReg));
+            fmt = IF_DR_2H;
+            break;
         }
-#endif // DEBUG
-    }
-    else
-    {
-        id->idAddr()->iiaSetInstrCount(instrCount);
-        id->idjKeepLong = false;
-        /* This jump must be short */
-        emitSetShortJump(id);
-        id->idSetIsBound();
-    }
 
-    /* Record the jump's IG and offset within it */
+        case INS_fmov:
+        {
+            assert(isValidVectorElemsizeFloat(size));
 
-    id->idjIG   = emitCurIG;
-    id->idjOffs = emitCurIGsize;
+            if (canSkip && (dstReg == srcReg))
+            {
+                // These instructions have no side effect and can be skipped
+                return;
+            }
 
-    /* Append this jump to this IG's jump list */
+            if (isVectorRegister(dstReg))
+            {
+                if (isVectorRegister(srcReg))
+                {
+                    assert(insOptsNone(opt));
+                    fmt = IF_DV_2G;
+                }
+                else
+                {
+                    assert(isGeneralRegister(srcReg));
 
-    id->idjNext      = emitCurIGjmpList;
-    emitCurIGjmpList = id;
+                    // if the optional conversion specifier is not present we calculate it
+                    if (opt == INS_OPTS_NONE)
+                    {
+                        opt = (size == EA_4BYTE) ? INS_OPTS_4BYTE_TO_S : INS_OPTS_8BYTE_TO_D;
+                    }
+                    assert(insOptsConvertIntToFloat(opt));
 
-#if EMITTER_STATS
-    emitTotalIGjmps++;
-#endif
+                    fmt = IF_DV_2I;
+                }
+            }
+            else
+            {
+                assert(isGeneralRegister(dstReg));
+                assert(isVectorRegister(srcReg));
 
-    dispIns(id);
-    appendToCurIG(id);
-}
+                // if the optional conversion specifier is not present we calculate it
+                if (opt == INS_OPTS_NONE)
+                {
+                    opt = (size == EA_4BYTE) ? INS_OPTS_S_TO_4BYTE : INS_OPTS_D_TO_8BYTE;
+                }
+                assert(insOptsConvertFloatToInt(opt));
 
-/*****************************************************************************
- *
- *  Add a call instruction (direct or indirect).
- *      argSize<0 means that the caller will pop the arguments
- *
- * The other arguments are interpreted depending on callType as shown:
- * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
- *
- * EC_FUNC_TOKEN       : addr is the method address
- * EC_FUNC_ADDR        : addr is the absolute address of the function
- *
- * If callType is one of these emitCallTypes, addr has to be NULL.
- * EC_INDIR_R          : "call ireg".
- *
- * For ARM xreg, xmul and disp are never used and should always be 0/REG_NA.
- *
- *  Please consult the "debugger team notification" comment in genFnProlog().
- */
+                fmt = IF_DV_2H;
+            }
+            break;
+        }
 
-void emitter::emitIns_Call(EmitCallType          callType,
-                           CORINFO_METHOD_HANDLE methHnd,
-                           INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
-                           void*            addr,
-                           ssize_t          argSize,
-                           emitAttr         retSize,
-                           emitAttr         secondRetSize,
-                           VARSET_VALARG_TP ptrVars,
-                           regMaskTP        gcrefRegs,
-                           regMaskTP        byrefRegs,
-                           const DebugInfo& di /* = DebugInfo() */,
-                           regNumber        ireg /* = REG_NA */,
-                           regNumber        xreg /* = REG_NA */,
-                           unsigned         xmul /* = 0     */,
-                           ssize_t          disp /* = 0     */,
-                           bool             isJump /* = false */)
-{
-    /* Sanity check the arguments depending on callType */
+        default:
+        {
+            unreached();
+        }
+    }
 
-    assert(callType < EC_COUNT);
-    assert((callType != EC_FUNC_TOKEN) || (addr != nullptr && ireg == REG_NA));
-    assert(callType != EC_INDIR_R || (addr == nullptr && ireg < REG_COUNT));
+    assert(fmt != IF_NONE);
 
-    // ARM never uses these
-    assert(xreg == REG_NA && xmul == 0 && disp == 0);
+    instrDesc* id = emitNewInstrSmall(attr);
 
-    // Our stack level should be always greater than the bytes of arguments we push. Just
-    // a sanity test.
-    assert((unsigned)abs(argSize) <= codeGen->genStackLevel);
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
 
-    // Trim out any callee-trashed registers from the live set.
-    regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd);
-    gcrefRegs &= savedSet;
-    byrefRegs &= savedSet;
+    id->idReg1(dstReg);
+    id->idReg2(srcReg);
 
-#ifdef DEBUG
-    if (EMIT_GC_VERBOSE)
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers
+ */
+
+void emitter::emitIns_R_R(instruction     ins,
+                          emitAttr        attr,
+                          regNumber       reg1,
+                          regNumber       reg2,
+                          insOpts         opt /* = INS_OPTS_NONE */,
+                          insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
+{
+    if (IsMovInstruction(ins))
     {
-        printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
-        dumpConvertedVarSet(emitComp, ptrVars);
-        printf(", gcrefRegs=");
-        printRegMaskInt(gcrefRegs);
-        emitDispRegSet(gcrefRegs);
-        printf(", byrefRegs=");
-        printRegMaskInt(byrefRegs);
-        emitDispRegSet(byrefRegs);
-        printf("\n");
+        assert(!"Please use emitIns_Mov() to correctly handle move elision");
+        emitIns_Mov(ins, attr, reg1, reg2, /* canSkip */ false, opt);
     }
-#endif
 
-    /* Managed RetVal: emit sequence point for the call */
-    if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid())
+    emitAttr  size     = EA_SIZE(attr);
+    emitAttr  elemsize = EA_UNKNOWN;
+    insFormat fmt      = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
     {
-        codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false);
-    }
+        case INS_dup:
+            // Vector operation
+            assert(insOptsAnyArrangement(opt));
+            assert(isVectorRegister(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
+            assert(opt != INS_OPTS_1D); // Reserved encoding
+            fmt = IF_DV_2C;
+            break;
 
-    /*
-        We need to allocate the appropriate instruction descriptor based
-        on whether this is a direct/indirect call, and whether we need to
-        record an updated set of live GC variables.
-     */
-    instrDesc* id;
+        case INS_abs:
+        case INS_not:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            if (ins == INS_not)
+            {
+                assert(isValidVectorDatasize(size));
+                // Bitwise behavior is independent of element size, but is always encoded as 1 Byte
+                opt = optMakeArrangement(size, EA_1BYTE);
+            }
+            if (insOptsNone(opt))
+            {
+                // Scalar operation
+                assert(size == EA_8BYTE); // Only type D is supported
+                fmt = IF_DV_2L;
+            }
+            else
+            {
+                // Vector operation
+                assert(insOptsAnyArrangement(opt));
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                fmt      = IF_DV_2M;
+            }
+            break;
 
-    assert(argSize % REGSIZE_BYTES == 0);
-    int argCnt = (int)(argSize / (int)REGSIZE_BYTES);
+        case INS_mvn:
+        case INS_neg:
+            if (isVectorRegister(reg1))
+            {
+                assert(isVectorRegister(reg2));
+                if (ins == INS_mvn)
+                {
+                    assert(isValidVectorDatasize(size));
+                    // Bitwise behavior is independent of element size, but is always encoded as 1 Byte
+                    opt = optMakeArrangement(size, EA_1BYTE);
+                }
+                if (insOptsNone(opt))
+                {
+                    // Scalar operation
+                    assert(size == EA_8BYTE); // Only type D is supported
+                    fmt = IF_DV_2L;
+                }
+                else
+                {
+                    // Vector operation
+                    assert(isValidVectorDatasize(size));
+                    assert(isValidArrangement(size, opt));
+                    elemsize = optGetElemsize(opt);
+                    fmt      = IF_DV_2M;
+                }
+                break;
+            }
+            FALLTHROUGH;
 
-    if (callType == EC_INDIR_R)
-    {
-        /* Indirect call, virtual calls */
+        case INS_negs:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+            fmt = IF_DR_2E;
+            break;
 
-        id = emitNewInstrCallInd(argCnt, 0 /* disp */, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize);
-    }
-    else
-    {
-        /* Helper/static/nonvirtual/function calls (direct or through handle),
-           and calls to an absolute addr. */
+        case INS_sxtl:
+        case INS_sxtl2:
+        case INS_uxtl:
+        case INS_uxtl2:
+            return emitIns_R_R_I(ins, size, reg1, reg2, 0, opt);
+
+        case INS_cls:
+        case INS_clz:
+        case INS_rbit:
+        case INS_rev16:
+        case INS_rev32:
+        case INS_cnt:
+            if (isVectorRegister(reg1))
+            {
+                assert(isVectorRegister(reg2));
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                if ((ins == INS_cls) || (ins == INS_clz))
+                {
+                    assert(elemsize != EA_8BYTE); // No encoding for type D
+                }
+                else if (ins == INS_rev32)
+                {
+                    assert((elemsize == EA_2BYTE) || (elemsize == EA_1BYTE));
+                }
+                else
+                {
+                    assert(elemsize == EA_1BYTE); // Only supports 8B or 16B
+                }
+                fmt = IF_DV_2M;
+                break;
+            }
+            if (ins == INS_cnt)
+            {
+                // Doesn't have general register version(s)
+                break;
+            }
+
+            FALLTHROUGH;
+
+        case INS_rev:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            if (ins == INS_rev32)
+            {
+                assert(size == EA_8BYTE);
+            }
+            else
+            {
+                assert(isValidGeneralDatasize(size));
+            }
+            fmt = IF_DR_2G;
+            break;
+
+        case INS_addv:
+        case INS_saddlv:
+        case INS_smaxv:
+        case INS_sminv:
+        case INS_uaddlv:
+        case INS_umaxv:
+        case INS_uminv:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
+            assert((opt != INS_OPTS_2S) && (opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // Reserved encodings
+            fmt = IF_DV_2T;
+            break;
+
+        case INS_rev64:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert(elemsize != EA_8BYTE); // No encoding for type D
+            fmt = IF_DV_2M;
+            break;
+
+        case INS_sqxtn:
+        case INS_sqxtun:
+        case INS_uqxtn:
+            if (insOptsNone(opt))
+            {
+                // Scalar operation
+                assert(isVectorRegister(reg1));
+                assert(isVectorRegister(reg2));
+                assert(isValidVectorElemsize(size));
+                assert(size != EA_8BYTE); // The encoding size = 11 is reserved.
+                fmt = IF_DV_2L;
+                break;
+            }
+            FALLTHROUGH;
+
+        case INS_xtn:
+            // Vector operation
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(size == EA_8BYTE);
+            assert(isValidArrangement(size, opt));
+            assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = x is reserved
+            fmt = IF_DV_2M;
+            break;
+
+        case INS_sqxtn2:
+        case INS_sqxtun2:
+        case INS_uqxtn2:
+        case INS_xtn2:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(size == EA_16BYTE);
+            assert(isValidArrangement(size, opt));
+            assert(opt != INS_OPTS_2D); // The encoding size = 11, Q = x is reserved
+            fmt = IF_DV_2M;
+            break;
 
-        assert(callType == EC_FUNC_TOKEN);
+        case INS_ldar:
+        case INS_ldapr:
+        case INS_ldaxr:
+        case INS_ldxr:
+        case INS_stlr:
+            assert(isValidGeneralDatasize(size));
 
-        id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize);
-    }
+            FALLTHROUGH;
 
-    /* Update the emitter's live GC ref sets */
+        case INS_ldarb:
+        case INS_ldaprb:
+        case INS_ldaxrb:
+        case INS_ldxrb:
+        case INS_ldarh:
+        case INS_ldaprh:
+        case INS_ldaxrh:
+        case INS_ldxrh:
+        case INS_stlrb:
+        case INS_stlrh:
+            assert(isValidGeneralLSDatasize(size));
+            assert(isGeneralRegisterOrZR(reg1));
+            assert(isGeneralRegisterOrSP(reg2));
+            assert(insOptsNone(opt));
 
-    VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
-    emitThisGCrefRegs = gcrefRegs;
-    emitThisByrefRegs = byrefRegs;
+            reg2 = encodingSPtoZR(reg2);
 
-    id->idSetIsNoGC(emitNoGChelper(methHnd));
+            fmt = IF_LS_2A;
+            break;
 
-    /* Set the instruction - special case jumping a function */
-    instruction ins;
-    insFormat   fmt = IF_NONE;
+        case INS_ldr:
+        case INS_ldrb:
+        case INS_ldrh:
+        case INS_ldrsb:
+        case INS_ldrsh:
+        case INS_ldrsw:
+        case INS_str:
+        case INS_strb:
+        case INS_strh:
+        case INS_cmn:
+        case INS_tst:
+            assert(insOptsNone(opt));
+            emitIns_R_R_I(ins, attr, reg1, reg2, 0, INS_OPTS_NONE);
+            return;
 
-    /* Record the address: method, indirection, or funcptr */
+        case INS_cmp:
+            emitIns_R_R_I(ins, attr, reg1, reg2, 0, opt);
+            return;
 
-    if (callType == EC_INDIR_R)
-    {
-        /* This is an indirect call (either a virtual call or func ptr call) */
+        case INS_staddb:
+            emitIns_R_R_R(INS_ldaddb, attr, reg1, REG_ZR, reg2);
+            return;
+        case INS_staddlb:
+            emitIns_R_R_R(INS_ldaddlb, attr, reg1, REG_ZR, reg2);
+            return;
+        case INS_staddh:
+            emitIns_R_R_R(INS_ldaddh, attr, reg1, REG_ZR, reg2);
+            return;
+        case INS_staddlh:
+            emitIns_R_R_R(INS_ldaddlh, attr, reg1, REG_ZR, reg2);
+            return;
+        case INS_stadd:
+            emitIns_R_R_R(INS_ldadd, attr, reg1, REG_ZR, reg2);
+            return;
+        case INS_staddl:
+            emitIns_R_R_R(INS_ldaddl, attr, reg1, REG_ZR, reg2);
+            return;
 
-        if (isJump)
-        {
-            ins = INS_br_tail; // INS_br_tail  Reg
-        }
-        else
-        {
-            ins = INS_blr; // INS_blr Reg
-        }
-        fmt = IF_BR_1B;
+        case INS_fcmp:
+        case INS_fcmpe:
+            assert(insOptsNone(opt));
+            assert(isValidVectorElemsizeFloat(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            fmt = IF_DV_2K;
+            break;
 
-        id->idIns(ins);
-        id->idInsFmt(fmt);
+        case INS_fcvtns:
+        case INS_fcvtnu:
+        case INS_fcvtas:
+        case INS_fcvtau:
+        case INS_fcvtps:
+        case INS_fcvtpu:
+        case INS_fcvtms:
+        case INS_fcvtmu:
+        case INS_fcvtzs:
+        case INS_fcvtzu:
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isVectorRegister(reg1));
+                assert(isVectorRegister(reg2));
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+                fmt = IF_DV_2A;
+            }
+            else
+            {
+                // Scalar operation
+                assert(isVectorRegister(reg2));
+                if (isVectorRegister(reg1))
+                {
+                    assert(insOptsNone(opt));
+                    assert(isValidVectorElemsizeFloat(size));
+                    fmt = IF_DV_2G;
+                }
+                else
+                {
+                    assert(isGeneralRegister(reg1));
+                    assert(insOptsConvertFloatToInt(opt));
+                    assert(isValidVectorElemsizeFloat(size));
+                    fmt = IF_DV_2H;
+                }
+            }
+            break;
 
-        id->idReg3(ireg);
-        assert(xreg == REG_NA);
-    }
-    else
-    {
-        /* This is a simple direct call: "call helper/method/addr" */
+        case INS_fcvtl:
+        case INS_fcvtn:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(size == EA_8BYTE);
+            assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
+            fmt = IF_DV_2A;
+            break;
 
-        assert(callType == EC_FUNC_TOKEN);
+        case INS_fcvtl2:
+        case INS_fcvtn2:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(size == EA_16BYTE);
+            assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S));
+            fmt = IF_DV_2A;
+            break;
 
-        assert(addr != NULL);
+        case INS_fcvtxn:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
 
-        if (isJump)
-        {
-            ins = INS_b_tail; // INS_b_tail imm28
-        }
-        else
-        {
-            ins = INS_bl; // INS_bl imm28
-        }
-        fmt = IF_BI_0C;
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(size == EA_8BYTE);
+                assert(opt == INS_OPTS_2S);
+                fmt = IF_DV_2A;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(size == EA_4BYTE);
+                fmt = IF_DV_2G;
+            }
+            break;
 
-        id->idIns(ins);
-        id->idInsFmt(fmt);
+        case INS_fcvtxn2:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(size == EA_16BYTE);
+            assert(opt == INS_OPTS_4S);
+            fmt = IF_DV_2A;
+            break;
 
-        id->idAddr()->iiaAddr = (BYTE*)addr;
+        case INS_scvtf:
+        case INS_ucvtf:
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isVectorRegister(reg1));
+                assert(isVectorRegister(reg2));
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+                fmt = IF_DV_2A;
+            }
+            else
+            {
+                // Scalar operation
+                assert(isVectorRegister(reg1));
+                if (isVectorRegister(reg2))
+                {
+                    assert(insOptsNone(opt));
+                    assert(isValidVectorElemsizeFloat(size));
+                    fmt = IF_DV_2G;
+                }
+                else
+                {
+                    assert(isGeneralRegister(reg2));
+                    assert(insOptsConvertIntToFloat(opt));
+                    assert(isValidVectorElemsizeFloat(size));
+                    fmt = IF_DV_2I;
+                }
+            }
+            break;
 
-        if (emitComp->opts.compReloc)
-        {
-            id->idSetIsDspReloc();
-        }
-    }
+        case INS_fabs:
+        case INS_fneg:
+        case INS_fsqrt:
+        case INS_frinta:
+        case INS_frinti:
+        case INS_frintm:
+        case INS_frintn:
+        case INS_frintp:
+        case INS_frintx:
+        case INS_frintz:
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isVectorRegister(reg1));
+                assert(isVectorRegister(reg2));
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+                fmt = IF_DV_2A;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidVectorElemsizeFloat(size));
+                assert(isVectorRegister(reg1));
+                assert(isVectorRegister(reg2));
+                fmt = IF_DV_2G;
+            }
+            break;
 
-#ifdef DEBUG
-    if (EMIT_GC_VERBOSE)
-    {
-        if (id->idIsLargeCall())
-        {
-            printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
-                   VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
-        }
-    }
-#endif
+        case INS_faddp:
+        case INS_fmaxnmp:
+        case INS_fmaxp:
+        case INS_fminnmp:
+        case INS_fminp:
+            // Scalar operation
+            assert(((size == EA_8BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_16BYTE) && (opt == INS_OPTS_2D)));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            fmt = IF_DV_2Q;
+            break;
 
-    if (m_debugInfoSize > 0)
-    {
-        INDEBUG(id->idDebugOnlyInfo()->idCallSig = sigInfo);
-        id->idDebugOnlyInfo()->idMemCookie       = (size_t)methHnd; // method token
-    }
+        case INS_fmaxnmv:
+        case INS_fmaxv:
+        case INS_fminnmv:
+        case INS_fminv:
+            assert(size == EA_16BYTE);
+            assert(opt == INS_OPTS_4S);
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            fmt = IF_DV_2R;
+            break;
 
-#ifdef LATE_DISASM
-    if (addr != nullptr)
-    {
-        codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
-    }
-#endif // LATE_DISASM
+        case INS_addp:
+            assert(size == EA_16BYTE);
+            assert(opt == INS_OPTS_2D);
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            fmt = IF_DV_2S;
+            break;
 
-    dispIns(id);
-    appendToCurIG(id);
-    emitLastMemBarrier = nullptr; // Cannot optimize away future memory barriers
-}
+        case INS_fcvt:
+            assert(insOptsConvertFloatToFloat(opt));
+            assert(isValidVectorFcvtsize(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            fmt = IF_DV_2J;
+            break;
 
-/*****************************************************************************
- *
- *  Returns true if 'imm' is valid Cond encoding
- */
+        case INS_cmeq:
+        case INS_cmge:
+        case INS_cmgt:
+        case INS_cmle:
+        case INS_cmlt:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
 
-/*static*/ bool emitter::isValidImmCond(ssize_t imm)
-{
-    // range check the ssize_t value, to make sure it is a small unsigned value
-    // and that only the bits in the cfi.cond are set
-    if ((imm < 0) || (imm > 0xF))
-        return false;
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                fmt      = IF_DV_2M;
+            }
+            else
+            {
+                // Scalar operation
+                assert(size == EA_8BYTE);
+                assert(insOptsNone(opt));
+                fmt = IF_DV_2L;
+            }
+            break;
 
-    condFlagsImm cfi;
-    cfi.immCFVal = (unsigned)imm;
+        case INS_fcmeq:
+        case INS_fcmge:
+        case INS_fcmgt:
+        case INS_fcmle:
+        case INS_fcmlt:
+        case INS_frecpe:
+        case INS_frsqrte:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
 
-    return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV).
-}
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize)); // Only Double/Float supported
+                assert(opt != INS_OPTS_1D);                   // Reserved encoding
+                fmt = IF_DV_2A;
+            }
+            else
+            {
+                // Scalar operation
+                assert(isValidScalarDatasize(size)); // Only Double/Float supported
+                assert(insOptsNone(opt));
+                fmt = IF_DV_2G;
+            }
+            break;
 
-/*****************************************************************************
- *
- *  Returns true if 'imm' is valid Cond/Flags encoding
- */
+        case INS_aesd:
+        case INS_aese:
+        case INS_aesmc:
+        case INS_aesimc:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isValidVectorDatasize(size));
+            elemsize = optGetElemsize(opt);
+            assert(elemsize == EA_1BYTE);
+            fmt = IF_DV_2P;
+            break;
 
-/*static*/ bool emitter::isValidImmCondFlags(ssize_t imm)
-{
-    // range check the ssize_t value, to make sure it is a small unsigned value
-    // and that only the bits in the cfi.cond or cfi.flags are set
-    if ((imm < 0) || (imm > 0xFF))
-        return false;
+        case INS_sha1h:
+            assert(insOptsNone(opt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            fmt = IF_DV_2U;
+            break;
 
-    condFlagsImm cfi;
-    cfi.immCFVal = (unsigned)imm;
+        case INS_sha256su0:
+        case INS_sha1su1:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isValidVectorDatasize(size));
+            elemsize = optGetElemsize(opt);
+            assert(elemsize == EA_4BYTE);
+            fmt = IF_DV_2P;
+            break;
 
-    return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV).
-}
+        case INS_ld2:
+        case INS_ld3:
+        case INS_ld4:
+        case INS_st2:
+        case INS_st3:
+        case INS_st4:
+            assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1
+            FALLTHROUGH;
 
-/*****************************************************************************
- *
- *  Returns true if 'imm' is valid Cond/Flags/Imm5 encoding
- */
+        case INS_ld1:
+        case INS_ld1_2regs:
+        case INS_ld1_3regs:
+        case INS_ld1_4regs:
+        case INS_st1:
+        case INS_st1_2regs:
+        case INS_st1_3regs:
+        case INS_st1_4regs:
+        case INS_ld1r:
+        case INS_ld2r:
+        case INS_ld3r:
+        case INS_ld4r:
+            assert(isVectorRegister(reg1));
+            assert(isGeneralRegisterOrSP(reg2));
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
 
-/*static*/ bool emitter::isValidImmCondFlagsImm5(ssize_t imm)
-{
-    // range check the ssize_t value, to make sure it is a small unsigned value
-    // and that only the bits in the cfi.cond, cfi.flags or cfi.imm5 are set
-    if ((imm < 0) || (imm > 0x1FFF))
-        return false;
+            // Load/Store multiple structures       base register
+            // Load single structure and replicate  base register
+            reg2 = encodingSPtoZR(reg2);
+            fmt  = IF_LS_2D;
+            break;
 
-    condFlagsImm cfi;
-    cfi.immCFVal = (unsigned)imm;
+        case INS_urecpe:
+        case INS_ursqrte:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert(elemsize == EA_4BYTE);
+            fmt = IF_DV_2A;
+            break;
 
-    return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV).
-}
+        case INS_frecpx:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isValidScalarDatasize(size));
+            assert(insOptsNone(opt));
+            fmt = IF_DV_2G;
+            break;
 
-/*****************************************************************************
- *
- *  Returns an encoding for the specified register used in the 'Rd' position
- */
+        case INS_sadalp:
+        case INS_saddlp:
+        case INS_uadalp:
+        case INS_uaddlp:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isValidArrangement(size, opt));
+            assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved
+            fmt = IF_DV_2T;
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_Rd(regNumber reg)
-{
-    assert(isIntegerRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg;
-}
+        case INS_sqabs:
+        case INS_sqneg:
+        case INS_suqadd:
+        case INS_usqadd:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
 
-/*****************************************************************************
- *
- *  Returns an encoding for the specified register used in the 'Rt' position
- */
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidArrangement(size, opt));
+                assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved
+                fmt = IF_DV_2M;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidVectorElemsize(size));
+                fmt = IF_DV_2L;
+            }
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_Rt(regNumber reg)
-{
-    assert(isIntegerRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg;
-}
+        default:
+            // fallback to emit SVE instructions.
+            return emitInsSve_R_R(ins, attr, reg1, reg2, opt, sopt);
 
-/*****************************************************************************
- *
- *  Returns an encoding for the specified register used in the 'Rn' position
- */
+    } // end switch (ins)
 
-/*static*/ emitter::code_t emitter::insEncodeReg_Rn(regNumber reg)
-{
-    assert(isIntegerRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 5;
-}
+    assert(fmt != IF_NONE);
 
-/*****************************************************************************
- *
- *  Returns an encoding for the specified register used in the 'Rm' position
- */
+    instrDesc* id = emitNewInstrSmall(attr);
 
-/*static*/ emitter::code_t emitter::insEncodeReg_Rm(regNumber reg)
-{
-    assert(isIntegerRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 16;
-}
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
 
-/*****************************************************************************
- *
- *  Returns an encoding for the specified register used in the 'Ra' position
- */
+    id->idReg1(reg1);
+    id->idReg2(reg2);
 
-/*static*/ emitter::code_t emitter::insEncodeReg_Ra(regNumber reg)
-{
-    assert(isIntegerRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 10;
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- *  Returns an encoding for the specified register used in the 'Vd' position
+ *  Add an instruction referencing a register and two constants.
  */
 
-/*static*/ emitter::code_t emitter::insEncodeReg_Vd(regNumber reg)
+void emitter::emitIns_R_I_I(instruction ins,
+                            emitAttr    attr,
+                            regNumber   reg,
+                            ssize_t     imm1,
+                            ssize_t     imm2,
+                            insOpts opt /* = INS_OPTS_NONE */
+                                DEBUGARG(size_t targetHandle /* = 0 */) DEBUGARG(GenTreeFlags gtFlags /* = 0 */))
 {
-    assert(emitter::isVectorRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg;
-}
+    emitAttr  size   = EA_SIZE(attr);
+    insFormat fmt    = IF_NONE;
+    size_t    immOut = 0; // composed from imm1 and imm2 and stored in the instrDesc
 
-/*****************************************************************************
- *
- *  Returns an encoding for the specified register used in the 'Vt' position
- */
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        bool        canEncode;
+        halfwordImm hwi;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_Vt(regNumber reg)
-{
-    assert(emitter::isVectorRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg;
-}
+        case INS_mov:
+            ins = INS_movz; // INS_mov with LSL is an alias for INS_movz LSL
+            FALLTHROUGH;
 
-/*****************************************************************************
- *
- *  Returns an encoding for the specified register used in the 'Vn' position
- */
+        case INS_movk:
+        case INS_movn:
+        case INS_movz:
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg));
+            assert(isValidUimm<16>(imm1));
+            assert(insOptsLSL(opt)); // Must be INS_OPTS_LSL
 
-/*static*/ emitter::code_t emitter::insEncodeReg_Vn(regNumber reg)
-{
-    assert(emitter::isVectorRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 5;
-}
+            if (size == EA_8BYTE)
+            {
+                assert((imm2 == 0) || (imm2 == 16) || // shift amount: 0, 16, 32 or 48
+                       (imm2 == 32) || (imm2 == 48));
+            }
+            else // EA_4BYTE
+            {
+                assert((imm2 == 0) || (imm2 == 16)); // shift amount: 0 or 16
+            }
 
-/*****************************************************************************
- *
- *  Returns an encoding for the specified register used in the 'Vm' position
- */
+            hwi.immHWVal = 0;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_Vm(regNumber reg)
-{
-    assert(emitter::isVectorRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 16;
-}
+            switch (imm2)
+            {
+                case 0:
+                    hwi.immHW = 0;
+                    canEncode = true;
+                    break;
 
-/*****************************************************************************
- *
- *  Returns an encoding for the specified register used in the 'Va' position
- */
+                case 16:
+                    hwi.immHW = 1;
+                    canEncode = true;
+                    break;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_Va(regNumber reg)
-{
-    assert(emitter::isVectorRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 10;
-}
+                case 32:
+                    hwi.immHW = 2;
+                    canEncode = true;
+                    break;
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'V' register used in '4' thru '0' position.
- */
+                case 48:
+                    hwi.immHW = 3;
+                    canEncode = true;
+                    break;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_V_4_to_0(regNumber reg)
-{
-    assert(isVectorRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 0;
-}
+                default:
+                    canEncode = false;
+            }
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'V' register used in '9' thru '5' position.
- */
+            if (canEncode)
+            {
+                hwi.immVal = imm1;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_V_9_to_5(regNumber reg)
-{
-    assert(isVectorRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 5;
-}
+                immOut = hwi.immHWVal;
+                assert(isValidImmHWVal(immOut, size));
+                fmt = IF_DI_1B;
+            }
+            break;
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'P' register used in '12' thru '10' position.
- */
+        default:
+            // fallback to emit SVE instructions.
+            return emitInsSve_R_I_I(ins, attr, reg, imm1, imm2, opt);
 
-/*static*/ emitter::code_t emitter::insEncodeReg_P_12_to_10(regNumber reg)
-{
-    assert(isLowPredicateRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0;
-    assert((ureg >= 0) && (ureg <= 15));
-    return ureg << 10;
-}
+    } // end switch (ins)
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'V' register used in '20' thru '16' position.
- */
+    assert(fmt != IF_NONE);
 
-/*static*/ emitter::code_t emitter::insEncodeReg_V_20_to_16(regNumber reg)
-{
-    assert(isVectorRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 16;
-}
+    instrDesc* id = emitNewInstrSC(attr, immOut);
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'R' register used in '20' thru '16' position.
- */
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
 
-/*static*/ emitter::code_t emitter::insEncodeReg_R_20_to_16(regNumber reg)
-{
-    assert(isIntegerRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 16;
-}
+    id->idReg1(reg);
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'R' register used in '9' thru '5' position.
- */
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idFlags     = gtFlags;
+    id->idDebugOnlyInfo()->idMemCookie = targetHandle;
+#endif
 
-/*static*/ emitter::code_t emitter::insEncodeReg_R_9_to_5(regNumber reg)
-{
-    assert(isIntegerRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 5;
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- *  Return an encoding for the specified 'R' register used in '4' thru '0' position.
+ *  Add an instruction referencing two registers and a constant.
  */
 
-/*static*/ emitter::code_t emitter::insEncodeReg_R_4_to_0(regNumber reg)
+void emitter::emitIns_R_R_I(instruction     ins,
+                            emitAttr        attr,
+                            regNumber       reg1,
+                            regNumber       reg2,
+                            ssize_t         imm,
+                            insOpts         opt /* = INS_OPTS_NONE */,
+                            insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
 {
-    assert(isIntegerRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 0;
-}
+    emitAttr  size       = EA_SIZE(attr);
+    emitAttr  elemsize   = EA_UNKNOWN;
+    insFormat fmt        = IF_NONE;
+    bool      isLdSt     = false;
+    bool      isLdrStr   = false;
+    bool      isSIMD     = false;
+    bool      isAddSub   = false;
+    bool      setFlags   = false;
+    unsigned  scale      = 0;
+    bool      unscaledOp = false;
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'P' register used in '19' thru '16' position.
- */
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        bool       canEncode;
+        bitMaskImm bmi;
+        unsigned   registerListSize;
+        bool       isRightShift;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_P_19_to_16(regNumber reg)
-{
-    assert(isPredicateRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0;
-    assert((ureg >= 0) && (ureg <= 15));
-    return ureg << 16;
-}
+        case INS_mov:
+            // Check for the 'mov' aliases for the vector registers
+            assert(insOptsNone(opt));
+            assert(isValidVectorElemsize(size));
+            elemsize = size;
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+
+            if (isVectorRegister(reg1))
+            {
+                if (isGeneralRegisterOrZR(reg2))
+                {
+                    fmt = IF_DV_2C; // Alias for 'ins'
+                    break;
+                }
+                else if (isVectorRegister(reg2))
+                {
+                    fmt = IF_DV_2E; // Alias for 'dup'
+                    break;
+                }
+            }
+            else // isGeneralRegister(reg1)
+            {
+                assert(isGeneralRegister(reg1));
+                if (isVectorRegister(reg2))
+                {
+                    fmt = IF_DV_2B; // Alias for 'umov'
+                    break;
+                }
+            }
+            assert(!" invalid INS_mov operands");
+            break;
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'P' register used in '3' thru '0' position.
- */
+        case INS_lsl:
+        case INS_lsr:
+        case INS_asr:
+            assert(insOptsNone(opt));
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isValidImmShift(imm, size));
+            fmt = IF_DI_2D;
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_P_3_to_0(regNumber reg)
-{
-    assert(isPredicateRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0;
-    assert((ureg >= 0) && (ureg <= 15));
-    return ureg;
-}
+        case INS_ror:
+            assert(insOptsNone(opt));
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isValidImmShift(imm, size));
+            fmt = IF_DI_2B;
+            break;
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'P' register used in '8' thru '5' position.
- */
+        case INS_shl:
+        case INS_sli:
+        case INS_sri:
+        case INS_srshr:
+        case INS_srsra:
+        case INS_sshr:
+        case INS_ssra:
+        case INS_urshr:
+        case INS_ursra:
+        case INS_ushr:
+        case INS_usra:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            isRightShift = emitInsIsVectorRightShift(ins);
 
-/*static*/ emitter::code_t emitter::insEncodeReg_P_8_to_5(regNumber reg)
-{
-    assert(isPredicateRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0;
-    assert((ureg >= 0) && (ureg <= 15));
-    return ureg << 5;
-}
+            assert(!isRightShift ||
+                   (imm != 0 && "instructions for vector right-shift do not allow zero as an immediate value"));
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'P' register used in '13' thru '10' position.
- */
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsize(elemsize));
+                assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+                fmt = IF_DV_2O;
+                break;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(size == EA_8BYTE); // only supported size
+                assert(isValidVectorShiftAmount(imm, size, isRightShift));
+                fmt = IF_DV_2N;
+            }
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_P_13_to_10(regNumber reg)
-{
-    assert(isPredicateRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0;
-    assert((ureg >= 0) && (ureg <= 15));
-    return ureg << 10;
-}
+        case INS_sqshl:
+        case INS_uqshl:
+        case INS_sqshlu:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            isRightShift = emitInsIsVectorRightShift(ins);
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'R' register used in '17' thru '16' position.
- */
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidArrangement(size, opt));
+                assert(opt != INS_OPTS_1D); // The encoding immh = 1xxx, Q = 0 is reserved
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
+                fmt = IF_DV_2O;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidVectorElemsize(size));
+                assert(isValidVectorShiftAmount(imm, size, isRightShift));
+                fmt = IF_DV_2N;
+            }
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_R_17_to_16(regNumber reg)
-{
-    assert(isIntegerRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg;
-    assert((ureg >= 12) && (ureg <= 15));
-    return ureg << 16;
-}
+        case INS_sqrshrn:
+        case INS_sqrshrun:
+        case INS_sqshrn:
+        case INS_sqshrun:
+        case INS_uqrshrn:
+        case INS_uqshrn:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            isRightShift = emitInsIsVectorRightShift(ins);
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'P' register used in '7' thru '5' position.
- */
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidArrangement(size, opt));
+                assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding immh = 1xxx, Q = x is reserved
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
+                fmt = IF_DV_2O;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidVectorElemsize(size));
+                assert(size != EA_8BYTE); // The encoding immh = 1xxx is reserved
+                assert(isValidVectorShiftAmount(imm, size, isRightShift));
+                fmt = IF_DV_2N;
+            }
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_P_7_to_5(regNumber reg)
-{
-    assert(isHighPredicateRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P8;
-    assert((ureg >= 0) && (ureg <= 7));
-    return ureg << 5;
-}
+        case INS_sxtl:
+        case INS_uxtl:
+            assert(imm == 0);
+            FALLTHROUGH;
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'P' register used in '3' thru '1' position.
- */
+        case INS_rshrn:
+        case INS_shrn:
+        case INS_sshll:
+        case INS_ushll:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            isRightShift = emitInsIsVectorRightShift(ins);
+            // Vector operation
+            assert(size == EA_8BYTE);
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert(elemsize != EA_8BYTE); // Reserved encodings
+            assert(isValidVectorElemsize(elemsize));
+            assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
+            fmt = IF_DV_2O;
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_P_3_to_1(regNumber reg)
-{
-    assert(isLowPredicateRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0;
-    assert((ureg >= 0) && (ureg <= 15));
-    return ureg << 1;
-}
+        case INS_sxtl2:
+        case INS_uxtl2:
+            assert(imm == 0);
+            FALLTHROUGH;
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'P' register used in '2' thru '0' position.
- */
+        case INS_rshrn2:
+        case INS_shrn2:
+        case INS_sqrshrn2:
+        case INS_sqrshrun2:
+        case INS_sqshrn2:
+        case INS_sqshrun2:
+        case INS_sshll2:
+        case INS_uqrshrn2:
+        case INS_uqshrn2:
+        case INS_ushll2:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            isRightShift = emitInsIsVectorRightShift(ins);
 
-/*static*/ emitter::code_t emitter::insEncodeReg_P_2_to_0(regNumber reg)
-{
-    assert(isPredicateRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0;
-    assert((ureg >= 8) && (ureg <= 15));
-    return (ureg - 8) << 0;
-}
+            // Vector operation
+            assert(size == EA_16BYTE);
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert(elemsize != EA_8BYTE); // The encoding immh = 1xxx, Q = x is reserved
+            assert(isValidVectorElemsize(elemsize));
+            assert(isValidVectorShiftAmount(imm, elemsize, isRightShift));
+            fmt = IF_DV_2O;
+            break;
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified predicate type used in '16' position.
- */
+        case INS_mvn:
+        case INS_neg:
+        case INS_negs:
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
 
-/*static*/ emitter::code_t emitter::insEncodePredQualifier_16(bool merge)
-{
-    return merge ? 1 << 16 : 0;
-}
+            if (imm == 0)
+            {
+                assert(insOptsNone(opt)); // a zero imm, means no alu shift kind
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified predicate type used in '4' position.
- */
+                fmt = IF_DR_2E;
+            }
+            else
+            {
+                if (ins == INS_mvn)
+                {
+                    assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind
+                }
+                else // neg or negs
+                {
+                    assert(insOptsAluShift(opt)); // a non-zero imm, must select shift kind, can't use ROR
+                }
+                assert(isValidImmShift(imm, size));
+                fmt = IF_DR_2F;
+            }
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodePredQualifier_4(bool merge)
-{
-    return merge ? 1 << 4 : 0;
-}
+        case INS_tst:
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegisterOrZR(reg1));
+            assert(isGeneralRegister(reg2));
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'V' register used in '18' thru '16' position.
- */
+            if (insOptsAnyShift(opt))
+            {
+                assert(isValidImmShift(imm, size) && (imm != 0));
+                fmt = IF_DR_2B;
+            }
+            else
+            {
+                assert(insOptsNone(opt)); // a zero imm, means no alu shift kind
+                assert(imm == 0);
+                fmt = IF_DR_2A;
+            }
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_V_18_to_16(regNumber reg)
-{
-    assert(isVectorRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
-    assert((ureg >= 0) && (ureg <= 7));
-    return ureg << 16;
-}
+        case INS_cmp:
+        case INS_cmn:
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegisterOrSP(reg1));
+            assert(isGeneralRegister(reg2));
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'V' register used in '19' thru '16' position.
- */
+            reg1 = encodingSPtoZR(reg1);
+            if (insOptsAnyExtend(opt))
+            {
+                assert((imm >= 0) && (imm <= 4));
 
-/*static*/ emitter::code_t emitter::insEncodeReg_V_19_to_16(regNumber reg)
-{
-    assert(isVectorRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
-    assert((ureg >= 0) && (ureg <= 15));
-    return ureg << 16;
-}
+                fmt = IF_DR_2C;
+            }
+            else if (imm == 0)
+            {
+                assert(insOptsNone(opt)); // a zero imm, means no alu shift kind
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'V' register used in '9' thru '6' position.
- */
+                fmt = IF_DR_2A;
+            }
+            else
+            {
+                assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind
+                assert(isValidImmShift(imm, size));
+                fmt = IF_DR_2B;
+            }
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_V_9_to_6(regNumber reg)
-{
-    assert(isVectorRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 6;
-}
+        case INS_ands:
+        case INS_and:
+        case INS_eor:
+        case INS_orr:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg2));
+            if (ins == INS_ands)
+            {
+                assert(isGeneralRegister(reg1));
+            }
+            else
+            {
+                assert(isGeneralRegisterOrSP(reg1));
+                reg1 = encodingSPtoZR(reg1);
+            }
 
-/*****************************************************************************
- *
- *  Return an encoding for the specified 'V' register used in '9' thru '6' position with the times two encoding.
- *  This encoding requires that the register number be divisible by two.
- */
+            bmi.immNRS = 0;
+            canEncode  = canEncodeBitMaskImm(imm, size, &bmi);
+            if (canEncode)
+            {
+                imm = bmi.immNRS;
+                assert(isValidImmNRS(imm, size));
+                fmt = IF_DI_2C;
+            }
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeReg_V_9_to_6_Times_Two(regNumber reg)
-{
-    assert(isVectorRegister(reg));
-    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
-    assert(ureg % 2 == 0);
-    ureg /= 2u;
-    assert((ureg >= 0) && (ureg <= 31));
-    return ureg << 6;
-}
+        case INS_dup: // by element, imm selects the element of reg2
+            assert(isVectorRegister(reg1));
+            if (isVectorRegister(reg2))
+            {
+                if (insOptsAnyArrangement(opt))
+                {
+                    // The size and opt were modified to be based on the
+                    // return type but the immediate is based on the operand
+                    // which can be of a larger size. As such, we don't
+                    // assert the index is valid here and instead do it in
+                    // codegen.
 
-/*****************************************************************************
- *
- *  Returns an encoding for the specified condition code.
- */
+                    // Vector operation
+                    assert(isValidVectorDatasize(size));
+                    assert(isValidArrangement(size, opt));
+                    elemsize = optGetElemsize(opt);
+                    assert(isValidVectorElemsize(elemsize));
+                    assert(opt != INS_OPTS_1D); // Reserved encoding
+                    fmt = IF_DV_2D;
+                    break;
+                }
+                else
+                {
+                    // Scalar operation
+                    assert(insOptsNone(opt));
+                    elemsize = size;
+                    assert(isValidVectorElemsize(elemsize));
+                    assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+                    fmt = IF_DV_2E;
+                    break;
+                }
+            }
+            FALLTHROUGH;
 
-/*static*/ emitter::code_t emitter::insEncodeCond(insCond cond)
-{
-    emitter::code_t uimm = (emitter::code_t)cond;
-    return uimm << 12;
-}
+        case INS_ins: // (MOV from general)
+            assert(insOptsNone(opt));
+            assert(isValidVectorElemsize(size));
+            assert(isVectorRegister(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+            elemsize = size;
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            fmt = IF_DV_2C;
+            break;
 
-/*****************************************************************************
- *
- *  Returns an encoding for the condition code with the lowest bit inverted (marked by invert(<cond>) in the
- *  architecture manual).
- */
+        case INS_umov: // (MOV to general)
+            assert(insOptsNone(opt));
+            assert(isValidVectorElemsize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isVectorRegister(reg2));
+            elemsize = size;
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            fmt = IF_DV_2B;
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeInvertedCond(insCond cond)
-{
-    emitter::code_t uimm = (emitter::code_t)cond;
-    uimm ^= 1; // invert the lowest bit
-    return uimm << 12;
-}
+        case INS_smov:
+            assert(insOptsNone(opt));
+            assert(isValidVectorElemsize(size));
+            assert(size != EA_8BYTE); // no encoding, use INS_umov
+            assert(isGeneralRegister(reg1));
+            assert(isVectorRegister(reg2));
+            elemsize = size;
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            fmt = IF_DV_2B;
+            break;
 
-/*****************************************************************************
- *
- *  Returns an encoding for the specified flags.
- */
+        case INS_add:
+        case INS_sub:
+            setFlags = false;
+            isAddSub = true;
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeFlags(insCflags flags)
-{
-    emitter::code_t uimm = (emitter::code_t)flags;
-    return uimm;
-}
+        case INS_adds:
+        case INS_subs:
+            setFlags = true;
+            isAddSub = true;
+            break;
 
-/*****************************************************************************
- *
- *  Returns the encoding for the Shift Count bits to be used for Arm64 encodings
- */
+        case INS_ldrsb:
+        case INS_ldursb:
+            // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register
+            assert(isValidGeneralDatasize(size));
+            unscaledOp = (ins == INS_ldursb);
+            scale      = 0;
+            isLdSt     = true;
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeShiftCount(ssize_t imm, emitAttr size)
-{
-    assert((imm & 0x003F) == imm);
-    assert(((imm & 0x0020) == 0) || (size == EA_8BYTE));
+        case INS_ldrsh:
+        case INS_ldursh:
+            // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register
+            assert(isValidGeneralDatasize(size));
+            unscaledOp = (ins == INS_ldursh);
+            scale      = 1;
+            isLdSt     = true;
+            break;
 
-    return (emitter::code_t)imm << 10;
-}
+        case INS_ldrsw:
+        case INS_ldursw:
+            // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register
+            assert(size == EA_8BYTE);
+            unscaledOp = (ins == INS_ldursw);
+            scale      = 2;
+            isLdSt     = true;
+            break;
 
-/*****************************************************************************
- *
- *  Returns the encoding to select a 64-bit datasize for an Arm64 instruction
- */
+        case INS_ldrb:
+        case INS_strb:
+            // size is ignored
+            unscaledOp = false;
+            scale      = 0;
+            isLdSt     = true;
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeDatasize(emitAttr size)
-{
-    if (size == EA_8BYTE)
-    {
-        return 0x80000000; // set the bit at location 31
-    }
-    else
-    {
-        assert(size == EA_4BYTE);
-        return 0;
-    }
-}
+        case INS_ldapurb:
+        case INS_stlurb:
+        case INS_ldurb:
+        case INS_sturb:
+            // size is ignored
+            unscaledOp = true;
+            scale      = 0;
+            isLdSt     = true;
+            break;
 
-/*****************************************************************************
- *
- *  Returns the encoding to select the datasize for the general load/store Arm64 instructions
- *
- */
+        case INS_ldrh:
+        case INS_strh:
+            // size is ignored
+            unscaledOp = false;
+            scale      = 1;
+            isLdSt     = true;
+            break;
+
+        case INS_ldurh:
+        case INS_ldapurh:
+        case INS_sturh:
+        case INS_stlurh:
+            // size is ignored
+            unscaledOp = true;
+            scale      = 0;
+            isLdSt     = true;
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeDatasizeLS(emitter::code_t code, emitAttr size)
-{
-    bool exclusive = ((code & 0x35000000) == 0);
-    bool atomic    = ((code & 0x31200C00) == 0x30200000);
+        case INS_ldr:
+        case INS_str:
+            // Is the target a vector register?
+            if (isVectorRegister(reg1))
+            {
+                assert(isValidVectorLSDatasize(size));
+                assert(isGeneralRegisterOrSP(reg2));
+                isSIMD = true;
+            }
+            else
+            {
+                assert(isValidGeneralDatasize(size));
+            }
+            unscaledOp = false;
+            scale      = NaturalScale_helper(size);
+            isLdSt     = true;
+            isLdrStr   = true;
+            break;
 
-    if ((code & 0x00800000) && !exclusive && !atomic) // Is this a sign-extending opcode? (i.e. ldrsw, ldrsh, ldrsb)
-    {
-        if ((code & 0x80000000) == 0) // Is it a ldrsh or ldrsb and not ldrsw ?
-        {
-            if (EA_SIZE(size) != EA_8BYTE) // Do we need to encode the 32-bit Rt size bit?
+        case INS_ldur:
+        case INS_stur:
+        case INS_ldapur:
+        case INS_stlur:
+            // Is the target a vector register?
+            if (isVectorRegister(reg1))
             {
-                return 0x00400000; // set the bit at location 22
+                assert(isValidVectorLSDatasize(size));
+                assert(isGeneralRegisterOrSP(reg2));
+                isSIMD = true;
             }
-        }
-    }
-    else if (code & 0x80000000) // Is this a ldr/str/ldur/stur opcode?
-    {
-        if (EA_SIZE(size) == EA_8BYTE) // Do we need to encode the 64-bit size bit?
-        {
-            return 0x40000000; // set the bit at location 30
-        }
-    }
-    return 0;
-}
+            else
+            {
+                assert(isValidGeneralDatasize(size));
+            }
+            unscaledOp = true;
+            scale      = 0;
+            isLdSt     = true;
+            break;
 
-/*****************************************************************************
- *
- *  Returns the encoding to select the datasize for the vector load/store Arm64 instructions
- *
- */
+        case INS_ld2:
+        case INS_ld3:
+        case INS_ld4:
+        case INS_st2:
+        case INS_st3:
+        case INS_st4:
+            assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1
+            FALLTHROUGH;
 
-/*static*/ emitter::code_t emitter::insEncodeDatasizeVLS(emitter::code_t code, emitAttr size)
-{
-    code_t result = 0;
+        case INS_ld1:
+        case INS_ld1_2regs:
+        case INS_ld1_3regs:
+        case INS_ld1_4regs:
+        case INS_st1:
+        case INS_st1_2regs:
+        case INS_st1_3regs:
+        case INS_st1_4regs:
+            assert(isVectorRegister(reg1));
+            assert(isGeneralRegisterOrSP(reg2));
 
-    // Check bit 29
-    if ((code & 0x20000000) == 0)
-    {
-        // LDR literal
+            reg2 = encodingSPtoZR(reg2);
 
-        if (size == EA_16BYTE)
-        {
-            // set the operation size in bit 31
-            result = 0x80000000;
-        }
-        else if (size == EA_8BYTE)
-        {
-            // set the operation size in bit 30
-            result = 0x40000000;
-        }
-        else
-        {
-            assert(size == EA_4BYTE);
-            // no bits are set
-            result = 0x00000000;
-        }
-    }
-    else
-    {
-        // LDR non-literal
+            if (insOptsAnyArrangement(opt))
+            {
+                registerListSize = insGetRegisterListSize(ins);
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                assert((size * registerListSize) == imm);
 
-        if (size == EA_16BYTE)
-        {
-            // The operation size in bits 31 and 30 are zero
-            // Bit 23 specifies a 128-bit Load/Store
-            result = 0x00800000;
-        }
-        else if (size == EA_8BYTE)
-        {
-            // set the operation size in bits 31 and 30
-            result = 0xC0000000;
-        }
-        else if (size == EA_4BYTE)
-        {
-            // set the operation size in bit 31
-            result = 0x80000000;
-        }
-        else if (size == EA_2BYTE)
-        {
-            // set the operation size in bit 30
-            result = 0x40000000;
-        }
-        else
-        {
-            assert(size == EA_1BYTE);
-            // The operation size in bits 31 and 30 are zero
-            result = 0x00000000;
-        }
-    }
+                // Load/Store multiple structures  post-indexed by an immediate
+                fmt = IF_LS_2E;
+            }
+            else
+            {
+                assert(insOptsNone(opt));
+                assert((ins != INS_ld1_2regs) && (ins != INS_ld1_3regs) && (ins != INS_ld1_4regs) &&
+                       (ins != INS_st1_2regs) && (ins != INS_st1_3regs) && (ins != INS_st1_4regs));
 
-    // Or in bit 26 to indicate a Vector register is used as 'target'
-    result |= 0x04000000;
+                elemsize = size;
+                assert(isValidVectorElemsize(elemsize));
+                assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
 
-    return result;
-}
+                // Load/Store single structure  base register
+                fmt = IF_LS_2F;
+            }
+            break;
 
-/*****************************************************************************
- *
- *  Returns the encoding to select the datasize for the vector load/store Arm64 instructions
- *
- */
+        case INS_ld1r:
+        case INS_ld2r:
+        case INS_ld3r:
+        case INS_ld4r:
+            assert(isVectorRegister(reg1));
+            assert(isGeneralRegisterOrSP(reg2));
 
-/*static*/ emitter::code_t emitter::insEncodeDatasizeVPLS(emitter::code_t code, emitAttr size)
-{
-    code_t result = 0;
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
 
-    if (size == EA_16BYTE)
-    {
-        // The operation size in bits 31 and 30 are zero
-        // Bit 23 specifies a 128-bit Load/Store
-        result = 0x80000000;
-    }
-    else if (size == EA_8BYTE)
-    {
-        // set the operation size in bits 31 and 30
-        result = 0x40000000;
-    }
-    else if (size == EA_4BYTE)
-    {
-        // set the operation size in bit 31
-        result = 0x00000000;
-    }
+            elemsize         = optGetElemsize(opt);
+            registerListSize = insGetRegisterListSize(ins);
+            assert((elemsize * registerListSize) == imm);
 
-    // Or in bit 26 to indicate a Vector register is used as 'target'
-    result |= 0x04000000;
+            // Load single structure and replicate  post-indexed by an immediate
+            reg2 = encodingSPtoZR(reg2);
+            fmt  = IF_LS_2E;
+            break;
 
-    return result;
-}
+        default:
+            // fallback to emit SVE instructions.
+            return emitInsSve_R_R_I(ins, attr, reg1, reg2, imm, opt, sopt);
 
-/*****************************************************************************
- *
- *  Returns the encoding to set the size bit and the N bits for a 'bitfield' instruction
- *
- */
+    } // end switch (ins)
 
-/*static*/ emitter::code_t emitter::insEncodeDatasizeBF(emitter::code_t code, emitAttr size)
-{
-    // is bit 30 equal to 0?
-    if ((code & 0x40000000) == 0) // is the opcode one of extr, sxtb, sxth or sxtw
+    if (isLdSt)
     {
-        if (size == EA_8BYTE) // Do we need to set the sf and N bits?
+        assert(!isAddSub);
+
+        if (isSIMD)
         {
-            return 0x80400000; // set the sf-bit at location 31 and the N-bit at location 22
+            assert(isValidVectorLSDatasize(size));
+            assert(isVectorRegister(reg1));
+            assert((scale >= 0) && (scale <= 4));
+        }
+        else
+        {
+            assert(isValidGeneralLSDatasize(size));
+            assert(isGeneralRegisterOrZR(reg1));
+            assert((scale >= 0) && (scale <= 3));
         }
-    }
-    return 0; // don't set any bits
-}
 
-/*****************************************************************************
- *
- *  Returns the encoding to select the 64/128-bit datasize for an Arm64 vector instruction
- */
+        assert(isGeneralRegisterOrSP(reg2));
 
-/*static*/ emitter::code_t emitter::insEncodeVectorsize(emitAttr size)
-{
-    if (size == EA_16BYTE)
-    {
-        return 0x40000000; // set the bit at location 30
-    }
-    else
-    {
-        assert(size == EA_8BYTE);
-        return 0;
-    }
-}
+        // Load/Store reserved encodings:
+        if (insOptsIndexed(opt))
+        {
+            assert(reg1 != reg2);
+        }
 
-/*****************************************************************************
- *
- *  Returns the encoding to set the vector length specifier (vl) for an Arm64 SVE instruction
- */
+        reg2 = encodingSPtoZR(reg2);
 
-/*static*/ emitter::code_t emitter::insEncodeVectorLengthSpecifier(instrDesc* id)
-{
-    assert(id != nullptr);
-    assert(insOptsScalableStandard(id->idInsOpt()));
+        ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+        if (imm == 0 || EA_IS_CNS_TLSGD_RELOC(attr))
+        {
+            assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero
 
-    if (id->idVectorLength4x())
-    {
-        switch (id->idInsFmt())
+            fmt = IF_LS_2A;
+        }
+        else if (insOptsIndexed(opt) || unscaledOp || (imm < 0) || ((imm & mask) != 0))
         {
-            case IF_SVE_DL_2A:
-                return 0x400; // set the bit at location 10
-            case IF_SVE_DY_3A:
-                return 0x2000; // set the bit at location 13
-            default:
-                assert(!"Unexpected format");
-                break;
+            if (isValidSimm<9>(imm))
+            {
+                fmt = IF_LS_2C;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded: IF_LS_2C");
+            }
         }
-    }
-
-    return 0;
-}
+        else if (imm > 0)
+        {
+            assert(insOptsNone(opt));
+            assert(!unscaledOp);
 
-/*****************************************************************************
- *
- *  Returns the encoding to select 'index' for an Arm64 vector elem instruction
- */
-/*static*/ emitter::code_t emitter::insEncodeVectorIndex(emitAttr elemsize, ssize_t index)
-{
-    code_t bits = (code_t)index;
-    if (elemsize == EA_1BYTE)
-    {
-        bits <<= 1;
-        bits |= 1;
-    }
-    else if (elemsize == EA_2BYTE)
-    {
-        bits <<= 2;
-        bits |= 2;
-    }
-    else if (elemsize == EA_4BYTE)
-    {
-        bits <<= 3;
-        bits |= 4;
-    }
-    else
-    {
-        assert(elemsize == EA_8BYTE);
-        bits <<= 4;
-        bits |= 8;
-    }
-    assert((bits >= 1) && (bits <= 0x1f));
+            if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
+            {
+                imm >>= scale; // The immediate is scaled by the size of the ld/st
 
-    return (bits << 16); // bits at locations [20,19,18,17,16]
-}
+                fmt = IF_LS_2B;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded: IF_LS_2B");
+            }
+        }
 
-/*****************************************************************************
- *
- *  Returns the encoding to select 'index2' for an Arm64 'ins' elem instruction
- */
-/*static*/ emitter::code_t emitter::insEncodeVectorIndex2(emitAttr elemsize, ssize_t index2)
-{
-    code_t bits = (code_t)index2;
-    if (elemsize == EA_1BYTE)
-    {
-        // bits are correct
-    }
-    else if (elemsize == EA_2BYTE)
-    {
-        bits <<= 1;
-    }
-    else if (elemsize == EA_4BYTE)
-    {
-        bits <<= 2;
+        // Try to optimize a load/store with an alternative instruction.
+        if (isLdrStr && emitComp->opts.OptimizationEnabled() &&
+            OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, false, -1, -1 DEBUG_ARG(false)))
+        {
+            return;
+        }
     }
-    else
+    else if (isAddSub)
     {
-        assert(elemsize == EA_8BYTE);
-        bits <<= 3;
-    }
-    assert((bits >= 0) && (bits <= 0xf));
-
-    return (bits << 11); // bits at locations [14,13,12,11]
-}
-
-/*****************************************************************************
- *
- *  Returns the encoding to select the 'index' for an Arm64 'mul' by element instruction
- */
-/*static*/ emitter::code_t emitter::insEncodeVectorIndexLMH(emitAttr elemsize, ssize_t index)
-{
-    code_t bits = 0;
+        assert(!isLdSt);
+        assert(insOptsNone(opt));
 
-    if (elemsize == EA_2BYTE)
-    {
-        assert((index >= 0) && (index <= 7));
-        if (index & 0x4)
+        if (setFlags) // Can't encode SP with setFlags
         {
-            bits |= (1 << 11); // set bit 11 'H'
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
         }
-        if (index & 0x2)
+        else
         {
-            bits |= (1 << 21); // set bit 21 'L'
+            assert(isGeneralRegisterOrSP(reg1));
+            assert(isGeneralRegisterOrSP(reg2));
+
+            // Is it just a mov?
+            if (imm == 0)
+            {
+                emitIns_Mov(INS_mov, attr, reg1, reg2, /* canSkip */ true);
+                return;
+            }
+
+            reg1 = encodingSPtoZR(reg1);
+            reg2 = encodingSPtoZR(reg2);
         }
-        if (index & 0x1)
+
+        if (unsigned_abs(imm) <= 0x0fff)
         {
-            bits |= (1 << 20); // set bit 20 'M'
+            if (imm < 0)
+            {
+                ins = insReverse(ins);
+                imm = -imm;
+            }
+            assert(isValidUimm<12>(imm));
+            fmt = IF_DI_2A;
         }
-    }
-    else if (elemsize == EA_4BYTE)
-    {
-        assert((index >= 0) && (index <= 3));
-        if (index & 0x2)
+        else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding
         {
-            bits |= (1 << 11); // set bit 11 'H'
+            // Encoding will use a 12-bit left shift of the immediate
+            opt = INS_OPTS_LSL12;
+            if (imm < 0)
+            {
+                ins = insReverse(ins);
+                imm = -imm;
+            }
+            assert((imm & 0xfff) == 0);
+            imm >>= 12;
+            assert(isValidUimm<12>(imm));
+            fmt = IF_DI_2A;
         }
-        if (index & 0x1)
+        else
         {
-            bits |= (1 << 21); // set bit 21 'L'
+            assert(!"Instruction cannot be encoded: IF_DI_2A");
         }
     }
-    else
-    {
-        assert(!"Invalid 'elemsize' value");
-    }
 
-    return bits;
-}
+    assert(fmt != IF_NONE);
 
-/*****************************************************************************
- *
- *  Returns the encoding for a shift instruction, ready for insertion into an instruction.
- */
-/*static*/ emitter::code_t emitter::insEncodeShiftImmediate(emitAttr size, bool isRightShift, ssize_t shiftAmount)
-{
-    if (isRightShift)
-    {
-        // The right shift amount must be in the range 1 to the destination element width in bits.
-        assert((shiftAmount > 0) && (shiftAmount <= getBitWidth(size)));
-        return (code_t)(2 * getBitWidth(size) - shiftAmount);
-    }
-    else
+    instrDesc* id = emitNewInstrSC(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    if (EA_IS_CNS_TLSGD_RELOC(attr))
     {
-        // The left shift amount must in the range 0 to the element width in bits minus 1.
-        assert(shiftAmount < getBitWidth(size));
-        return (code_t)(getBitWidth(size) + shiftAmount);
+        assert(imm != 0);
+        id->idSetTlsGD();
     }
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 vector instruction
+ *  Add an instruction referencing two registers and a floating point constant.
  */
 
-/*static*/ emitter::code_t emitter::insEncodeElemsize(emitAttr size)
+void emitter::emitIns_R_R_F(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, double immDbl, insOpts opt /* = INS_OPTS_NONE */)
 {
-    if (size == EA_8BYTE)
-    {
-        return 0x00C00000; // set the bit at location 23 and 22
-    }
-    else if (size == EA_4BYTE)
-    {
-        return 0x00800000; // set the bit at location 23
-    }
-    else if (size == EA_2BYTE)
-    {
-        return 0x00400000; // set the bit at location 22
-    }
-    assert(size == EA_1BYTE);
-    return 0x00000000;
+    // Currently, only SVE instructions use this format.
+    emitInsSve_R_R_F(ins, attr, reg1, reg2, immDbl, opt);
 }
 
 /*****************************************************************************
  *
- *  Returns the encoding to select the 4/8 byte elemsize for an Arm64 float vector instruction
+ *  Add an instruction referencing two registers and a constant.
+ *  Also checks for a large immediate that needs a second instruction
+ *  and will load it in reg1
+ *
+ *  - Supports instructions: add, adds, sub, subs, and, ands, eor and orr
+ *  - Requires that reg1 is a general register and not SP or ZR
+ *  - Requires that reg1 != reg2
  */
-
-/*static*/ emitter::code_t emitter::insEncodeFloatElemsize(emitAttr size)
+void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm)
 {
-    if (size == EA_8BYTE)
+    assert(isGeneralRegister(reg1));
+    assert(reg1 != reg2);
+
+    bool immFits = true;
+
+    switch (ins)
     {
-        return 0x00400000; // set the bit at location 22
-    }
-    assert(size == EA_4BYTE);
-    return 0x00000000;
-}
+        case INS_add:
+        case INS_adds:
+        case INS_sub:
+        case INS_subs:
+            immFits = emitter::emitIns_valid_imm_for_add(imm, attr);
+            break;
+
+        case INS_ands:
+        case INS_and:
+        case INS_eor:
+        case INS_orr:
+            immFits = emitter::emitIns_valid_imm_for_alu(imm, attr);
+            break;
+
+        default:
+            assert(!"Unsupported instruction in emitIns_R_R_Imm");
+    }
 
-// Returns the encoding to select the index for an Arm64 float vector by element instruction
-/*static*/ emitter::code_t emitter::insEncodeFloatIndex(emitAttr elemsize, ssize_t index)
-{
-    code_t result = 0x00000000;
-    if (elemsize == EA_8BYTE)
+    if (immFits)
     {
-        assert((index >= 0) && (index <= 1));
-        if (index == 1)
-        {
-            result |= 0x00000800; // 'H' - set the bit at location 11
-        }
+        emitIns_R_R_I(ins, attr, reg1, reg2, imm);
     }
     else
     {
-        assert(elemsize == EA_4BYTE);
-        assert((index >= 0) && (index <= 3));
-        if (index & 2)
-        {
-            result |= 0x00000800; // 'H' - set the bit at location 11
-        }
-        if (index & 1)
-        {
-            result |= 0x00200000; // 'L' - set the bit at location 21
-        }
+        // Load 'imm' into the reg1 register
+        // then issue:   'ins'  reg1, reg2, reg1
+        //
+        codeGen->instGen_Set_Reg_To_Imm(attr, reg1, imm);
+        emitIns_R_R_R(ins, attr, reg1, reg2, reg1);
     }
-    return result;
 }
 
 /*****************************************************************************
  *
- *  Returns the encoding to select the vector elemsize for an Arm64 ld/st# vector instruction
+ *  Add an instruction referencing three registers.
  */
 
-/*static*/ emitter::code_t emitter::insEncodeVLSElemsize(emitAttr size)
+void emitter::emitIns_R_R_R(instruction     ins,
+                            emitAttr        attr,
+                            regNumber       reg1,
+                            regNumber       reg2,
+                            regNumber       reg3,
+                            insOpts         opt /* = INS_OPTS_NONE */,
+                            insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
 {
-    code_t result = 0x00000000;
+    emitAttr  size     = EA_SIZE(attr);
+    emitAttr  elemsize = EA_UNKNOWN;
+    insFormat fmt      = IF_NONE;
 
-    switch (size)
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
     {
-        case EA_1BYTE:
-        {
-            result |= 0x0000; // clear bits 10 and 11
-            break;
-        }
+        case INS_mul:
+        case INS_smull:
+        case INS_umull:
+            if (insOptsAnyArrangement(opt))
+            {
+                // ASIMD instruction
+                assert(isVectorRegister(reg1));
+                assert(isVectorRegister(reg2));
+                assert(isVectorRegister(reg3));
+                assert(isValidArrangement(size, opt));
+                assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved
+                fmt = IF_DV_3A;
+                break;
+            }
+            // Base instruction
+            FALLTHROUGH;
 
-        case EA_2BYTE:
-        {
-            result |= 0x0400; // set bit at location 10, clear bit at location 11
+        case INS_lsl:
+        case INS_lsr:
+        case INS_asr:
+        case INS_ror:
+        case INS_adc:
+        case INS_adcs:
+        case INS_sbc:
+        case INS_sbcs:
+        case INS_udiv:
+        case INS_sdiv:
+        case INS_mneg:
+        case INS_smnegl:
+        case INS_smulh:
+        case INS_umnegl:
+        case INS_umulh:
+        case INS_lslv:
+        case INS_lsrv:
+        case INS_asrv:
+        case INS_rorv:
+        case INS_crc32b:
+        case INS_crc32h:
+        case INS_crc32w:
+        case INS_crc32x:
+        case INS_crc32cb:
+        case INS_crc32ch:
+        case INS_crc32cw:
+        case INS_crc32cx:
+            assert(insOptsNone(opt));
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            fmt = IF_DR_3A;
             break;
-        }
 
-        case EA_4BYTE:
-        {
-            result |= 0x0800; // clear bit at location 10, set bit at location 11
-            break;
-        }
+        case INS_add:
+        case INS_sub:
+            if (isVectorRegister(reg1))
+            {
+                // ASIMD instruction
+                assert(isVectorRegister(reg2));
+                assert(isVectorRegister(reg3));
 
-        case EA_8BYTE:
-        {
-            result |= 0x0C00; // set bits at location 10 and 11
-            break;
-        }
+                if (insOptsAnyArrangement(opt))
+                {
+                    // Vector operation
+                    assert(opt != INS_OPTS_1D); // Reserved encoding
+                    assert(isValidVectorDatasize(size));
+                    assert(isValidArrangement(size, opt));
+                    fmt = IF_DV_3A;
+                }
+                else
+                {
+                    // Scalar operation
+                    assert(insOptsNone(opt));
+                    assert(size == EA_8BYTE);
+                    fmt = IF_DV_3E;
+                }
+                break;
+            }
+            // Base instruction
+            FALLTHROUGH;
 
-        default:
-        {
-            assert(!"Invalid element size");
+        case INS_adds:
+        case INS_subs:
+            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, opt);
+            return;
+
+        case INS_cmeq:
+        case INS_cmge:
+        case INS_cmgt:
+        case INS_cmhi:
+        case INS_cmhs:
+        case INS_cmtst:
+        case INS_srshl:
+        case INS_sshl:
+        case INS_urshl:
+        case INS_ushl:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidArrangement(size, opt));
+                assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved
+                fmt = IF_DV_3A;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(size == EA_8BYTE); // Only Int64/UInt64 supported
+                fmt = IF_DV_3E;
+            }
             break;
-        }
-    }
 
-    return result;
-}
+        case INS_sqadd:
+        case INS_sqrshl:
+        case INS_sqshl:
+        case INS_sqsub:
+        case INS_uqadd:
+        case INS_uqrshl:
+        case INS_uqshl:
+        case INS_uqsub:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
 
-/*****************************************************************************
- *
- *  Returns the encoding to select the index for an Arm64 ld/st# vector by element instruction
- */
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidArrangement(size, opt));
+                assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved
+                fmt = IF_DV_3A;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidVectorElemsize(size));
+                fmt = IF_DV_3E;
+            }
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeVLSIndex(emitAttr size, ssize_t index)
-{
-    code_t result = 0x00000000;
+        case INS_fcmeq:
+        case INS_fcmge:
+        case INS_fcmgt:
+        case INS_frecps:
+        case INS_frsqrts:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
 
-    switch (size)
-    {
-        case EA_1BYTE:
-        {
-            // Q  = ?   - bit location 30
-            // xx = 00  - bit location 14 and 15
-            // S = ?    - bit location 12
-            // ss = ?0  - bit location 10 and 11
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert((elemsize == EA_8BYTE) || (elemsize == EA_4BYTE)); // Only Double/Float supported
+                assert(opt != INS_OPTS_1D);                               // Reserved encoding
+                fmt = IF_DV_3B;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert((size == EA_8BYTE) || (size == EA_4BYTE)); // Only Double/Float supported
+                fmt = IF_DV_3D;
+            }
+            break;
 
-            result |= (index & 0x8) << 27;
-            result |= (index & 0x4) << 10;
-            result |= (index & 0x3) << 10;
+        case INS_mla:
+        case INS_mls:
+        case INS_saba:
+        case INS_sabd:
+        case INS_shadd:
+        case INS_shsub:
+        case INS_smax:
+        case INS_smaxp:
+        case INS_smin:
+        case INS_sminp:
+        case INS_srhadd:
+        case INS_uaba:
+        case INS_uabd:
+        case INS_uhadd:
+        case INS_uhsub:
+        case INS_umax:
+        case INS_umaxp:
+        case INS_umin:
+        case INS_uminp:
+        case INS_urhadd:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isValidArrangement(size, opt));
+            assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved
+            fmt = IF_DV_3A;
             break;
-        }
 
-        case EA_2BYTE:
-        {
-            // Q  = ?   - bit location 30
-            // xx = 01  - bit location 14 and 15
-            // S = ?    - bit location 12
-            // ss = ??  - bit location 10 and 11
+        case INS_addp:
+        case INS_uzp1:
+        case INS_uzp2:
+        case INS_zip1:
+        case INS_zip2:
+        case INS_trn1:
+        case INS_trn2:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isValidArrangement(size, opt));
+            assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved
+            fmt = IF_DV_3A;
+            break;
 
-            result |= (index & 0x4) << 28;
-            result |= 0x4000;
-            result |= (index & 0x2) << 11;
-            result |= (index & 0x1) << 11;
+        case INS_mov:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(reg2 == reg3);
+            assert(isValidVectorDatasize(size));
+            // INS_mov is an alias for INS_orr (vector register)
+            if (opt == INS_OPTS_NONE)
+            {
+                elemsize = EA_1BYTE;
+                opt      = optMakeArrangement(size, elemsize);
+            }
+            assert(isValidArrangement(size, opt));
+            fmt = IF_DV_3C;
             break;
-        }
 
-        case EA_4BYTE:
-        {
-            // Q  = ?   - bit location 30
-            // xx = 10  - bit location 14 and 15
-            // S = ?    - bit location 12
-            // ss = 00  - bit location 10 and 11
+        case INS_and:
+        case INS_bic:
+        case INS_eor:
+        case INS_orr:
+        case INS_orn:
+        case INS_tbl:
+        case INS_tbl_2regs:
+        case INS_tbl_3regs:
+        case INS_tbl_4regs:
+        case INS_tbx:
+        case INS_tbx_2regs:
+        case INS_tbx_3regs:
+        case INS_tbx_4regs:
+            if (isVectorRegister(reg1))
+            {
+                assert(isValidVectorDatasize(size));
+                assert(isVectorRegister(reg2));
+                assert(isVectorRegister(reg3));
+                if (opt == INS_OPTS_NONE)
+                {
+                    elemsize = EA_1BYTE;
+                    opt      = optMakeArrangement(size, elemsize);
+                }
+                assert(isValidArrangement(size, opt));
+                fmt = IF_DV_3C;
+                break;
+            }
+            FALLTHROUGH;
 
-            result |= (index & 0x2) << 29;
-            result |= 0x8000;
-            result |= (index & 0x1) << 12;
-            break;
-        }
+        case INS_ands:
+        case INS_bics:
+        case INS_eon:
+            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, INS_OPTS_NONE);
+            return;
 
-        case EA_8BYTE:
-        {
-            // Q  = ?   - bit location 30
-            // xx = 10  - bit location 14 and 15
-            // S = 0    - bit location 12
-            // ss = 01  - bit location 10 and 11
+        case INS_bsl:
+        case INS_bit:
+        case INS_bif:
+            assert(isValidVectorDatasize(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (opt == INS_OPTS_NONE)
+            {
+                elemsize = EA_1BYTE;
+                opt      = optMakeArrangement(size, elemsize);
+            }
+            assert(isValidArrangement(size, opt));
+            fmt = IF_DV_3C;
+            break;
 
-            result |= (index & 0x1) << 30;
-            result |= 0x8400;
+        case INS_fadd:
+        case INS_fsub:
+        case INS_fdiv:
+        case INS_fmax:
+        case INS_fmaxnm:
+        case INS_fmin:
+        case INS_fminnm:
+        case INS_fabd:
+        case INS_fmul:
+        case INS_fmulx:
+        case INS_facge:
+        case INS_facgt:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+                fmt = IF_DV_3B;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidScalarDatasize(size));
+                fmt = IF_DV_3D;
+            }
             break;
-        }
 
-        default:
-        {
-            assert(!"Invalid element size");
+        case INS_fnmul:
+            // Scalar operation
+            assert(insOptsNone(opt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isValidScalarDatasize(size));
+            fmt = IF_DV_3D;
             break;
-        }
-    }
 
-    return result;
-}
+        case INS_faddp:
+        case INS_fmaxnmp:
+        case INS_fmaxp:
+        case INS_fminnmp:
+        case INS_fminp:
 
-/*****************************************************************************
- *
- *  Returns the encoding to select the fcvt operation for Arm64 instructions
- */
-/*static*/ emitter::code_t emitter::insEncodeConvertOpt(insFormat fmt, insOpts conversion)
-{
-    code_t result = 0;
-    switch (conversion)
-    {
-        case INS_OPTS_S_TO_D: // Single to Double
-            assert(fmt == IF_DV_2J);
-            result = 0x00008000; // type=00, opc=01
-            break;
+        case INS_fmla:
+        case INS_fmls:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsAnyArrangement(opt)); // no scalar encoding, use 4-operand 'fmadd' or 'fmsub'
 
-        case INS_OPTS_D_TO_S: // Double to Single
-            assert(fmt == IF_DV_2J);
-            result = 0x00400000; // type=01, opc=00
+            // Vector operation
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert(isValidVectorElemsizeFloat(elemsize));
+            assert(opt != INS_OPTS_1D); // Reserved encoding
+            fmt = IF_DV_3B;
             break;
 
-        case INS_OPTS_H_TO_S: // Half to Single
-            assert(fmt == IF_DV_2J);
-            result = 0x00C00000; // type=11, opc=00
-            break;
+        case INS_ldr:
+        case INS_ldrb:
+        case INS_ldrh:
+        case INS_ldrsb:
+        case INS_ldrsh:
+        case INS_ldrsw:
+        case INS_str:
+        case INS_strb:
+        case INS_strh:
+            emitIns_R_R_R_Ext(ins, attr, reg1, reg2, reg3, opt);
+            return;
 
-        case INS_OPTS_H_TO_D: // Half to Double
-            assert(fmt == IF_DV_2J);
-            result = 0x00C08000; // type=11, opc=01
-            break;
+        case INS_ldp:
+        case INS_ldpsw:
+        case INS_ldnp:
+        case INS_stp:
+        case INS_stnp:
+            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0);
+            return;
 
-        case INS_OPTS_S_TO_H: // Single to Half
-            assert(fmt == IF_DV_2J);
-            result = 0x00018000; // type=00, opc=11
+        case INS_stxr:
+        case INS_stxrb:
+        case INS_stxrh:
+        case INS_stlxr:
+        case INS_stlxrb:
+        case INS_stlxrh:
+            assert(isGeneralRegisterOrZR(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+            assert(isGeneralRegisterOrSP(reg3));
+            fmt = IF_LS_3D;
             break;
 
-        case INS_OPTS_D_TO_H: // Double to Half
-            assert(fmt == IF_DV_2J);
-            result = 0x00418000; // type=01, opc=11
+        case INS_casb:
+        case INS_casab:
+        case INS_casalb:
+        case INS_caslb:
+        case INS_cash:
+        case INS_casah:
+        case INS_casalh:
+        case INS_caslh:
+        case INS_cas:
+        case INS_casa:
+        case INS_casal:
+        case INS_casl:
+        case INS_ldaddb:
+        case INS_ldaddab:
+        case INS_ldaddalb:
+        case INS_ldaddlb:
+        case INS_ldaddh:
+        case INS_ldaddah:
+        case INS_ldaddalh:
+        case INS_ldaddlh:
+        case INS_ldadd:
+        case INS_ldadda:
+        case INS_ldaddal:
+        case INS_ldaddl:
+        case INS_ldclral:
+        case INS_ldsetal:
+        case INS_swpb:
+        case INS_swpab:
+        case INS_swpalb:
+        case INS_swplb:
+        case INS_swph:
+        case INS_swpah:
+        case INS_swpalh:
+        case INS_swplh:
+        case INS_swp:
+        case INS_swpa:
+        case INS_swpal:
+        case INS_swpl:
+            assert(isGeneralRegisterOrZR(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+            assert(isGeneralRegisterOrSP(reg3));
+            fmt = IF_LS_3E;
             break;
 
-        case INS_OPTS_S_TO_4BYTE: // Single to INT32
-            assert(fmt == IF_DV_2H);
-            result = 0x00000000; // sf=0, type=00
+        case INS_sha256h:
+        case INS_sha256h2:
+        case INS_sha256su1:
+        case INS_sha1su0:
+        case INS_sha1c:
+        case INS_sha1p:
+        case INS_sha1m:
+            assert(isValidVectorDatasize(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (opt == INS_OPTS_NONE)
+            {
+                elemsize = EA_4BYTE;
+                opt      = optMakeArrangement(size, elemsize);
+            }
+            assert(isValidArrangement(size, opt));
+            fmt = IF_DV_3F;
             break;
 
-        case INS_OPTS_D_TO_4BYTE: // Double to INT32
-            assert(fmt == IF_DV_2H);
-            result = 0x00400000; // sf=0, type=01
-            break;
+        case INS_ld2:
+        case INS_ld3:
+        case INS_ld4:
+        case INS_st2:
+        case INS_st3:
+        case INS_st4:
+            assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1
+            FALLTHROUGH;
 
-        case INS_OPTS_S_TO_8BYTE: // Single to INT64
-            assert(fmt == IF_DV_2H);
-            result = 0x80000000; // sf=1, type=00
-            break;
+        case INS_ld1:
+        case INS_ld1_2regs:
+        case INS_ld1_3regs:
+        case INS_ld1_4regs:
+        case INS_st1:
+        case INS_st1_2regs:
+        case INS_st1_3regs:
+        case INS_st1_4regs:
+        case INS_ld1r:
+        case INS_ld2r:
+        case INS_ld3r:
+        case INS_ld4r:
+            assert(isVectorRegister(reg1));
+            assert(isGeneralRegisterOrSP(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidArrangement(size, opt));
 
-        case INS_OPTS_D_TO_8BYTE: // Double to INT64
-            assert(fmt == IF_DV_2H);
-            result = 0x80400000; // sf=1, type=01
+            // Load/Store multiple structures       post-indexed by a register
+            // Load single structure and replicate  post-indexed by a register
+            reg2 = encodingSPtoZR(reg2);
+            fmt  = IF_LS_3F;
             break;
 
-        case INS_OPTS_4BYTE_TO_S: // INT32 to Single
-            assert(fmt == IF_DV_2I);
-            result = 0x00000000; // sf=0, type=00
+        case INS_addhn:
+        case INS_raddhn:
+        case INS_rsubhn:
+        case INS_subhn:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(size == EA_8BYTE);
+            assert(isValidArrangement(size, opt));
+            assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = x is reserved.
+            fmt = IF_DV_3A;
             break;
 
-        case INS_OPTS_4BYTE_TO_D: // INT32 to Double
-            assert(fmt == IF_DV_2I);
-            result = 0x00400000; // sf=0, type=01
+        case INS_addhn2:
+        case INS_raddhn2:
+        case INS_rsubhn2:
+        case INS_subhn2:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(size == EA_16BYTE);
+            assert(isValidArrangement(size, opt));
+            assert(opt != INS_OPTS_2D); // The encoding size = 11, Q = x is reserved.
+            fmt = IF_DV_3A;
             break;
 
-        case INS_OPTS_8BYTE_TO_S: // INT64 to Single
-            assert(fmt == IF_DV_2I);
-            result = 0x80000000; // sf=1, type=00
+        case INS_sabal:
+        case INS_sabdl:
+        case INS_saddl:
+        case INS_saddw:
+        case INS_smlal:
+        case INS_smlsl:
+        case INS_ssubl:
+        case INS_ssubw:
+        case INS_uabal:
+        case INS_uabdl:
+        case INS_uaddl:
+        case INS_uaddw:
+        case INS_umlal:
+        case INS_umlsl:
+        case INS_usubl:
+        case INS_usubw:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(size == EA_8BYTE);
+            assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
+            fmt = IF_DV_3A;
             break;
 
-        case INS_OPTS_8BYTE_TO_D: // INT64 to Double
-            assert(fmt == IF_DV_2I);
-            result = 0x80400000; // sf=1, type=01
+        case INS_sabal2:
+        case INS_sabdl2:
+        case INS_saddl2:
+        case INS_saddw2:
+        case INS_smlal2:
+        case INS_smlsl2:
+        case INS_ssubl2:
+        case INS_ssubw2:
+        case INS_umlal2:
+        case INS_umlsl2:
+        case INS_smull2:
+        case INS_uabal2:
+        case INS_uabdl2:
+        case INS_uaddl2:
+        case INS_uaddw2:
+        case INS_usubl2:
+        case INS_umull2:
+        case INS_usubw2:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(size == EA_16BYTE);
+            assert((opt == INS_OPTS_16B) || (opt == INS_OPTS_8H) || (opt == INS_OPTS_4S));
+            fmt = IF_DV_3A;
             break;
 
-        default:
-            assert(!"Invalid 'conversion' value");
+        case INS_sqdmlal:
+        case INS_sqdmlsl:
+        case INS_sqdmull:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(size == EA_8BYTE);
+                assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
+                fmt = IF_DV_3A;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert((size == EA_2BYTE) || (size == EA_4BYTE));
+                fmt = IF_DV_3E;
+            }
             break;
-    }
-    return result;
-}
-
-/*****************************************************************************
- *
- *  Returns the encoding to have the Rn register be updated Pre/Post indexed
- *  or not updated
- */
-
-/*static*/ emitter::code_t emitter::insEncodeIndexedOpt(insOpts opt)
-{
-    assert(emitter::insOptsNone(opt) || emitter::insOptsIndexed(opt));
-
-    if (emitter::insOptsIndexed(opt))
-    {
-        if (emitter::insOptsPostIndex(opt))
-        {
-            return 0x00000400; // set the bit at location 10
-        }
-        else
-        {
-            assert(emitter::insOptsPreIndex(opt));
-            return 0x00000C00; // set the bit at location 10 and 11
-        }
-    }
-    else
-    {
-        assert(emitter::insOptsNone(opt));
-        return 0; // bits 10 and 11 are zero
-    }
-}
-
-/*****************************************************************************
- *
- *  Returns the encoding for a ldp/stp instruction to have the Rn register
- *  be updated Pre/Post indexed or not updated
- */
-
-/*static*/ emitter::code_t emitter::insEncodePairIndexedOpt(instruction ins, insOpts opt)
-{
-    assert(emitter::insOptsNone(opt) || emitter::insOptsIndexed(opt));
 
-    if ((ins == INS_ldnp) || (ins == INS_stnp))
-    {
-        assert(emitter::insOptsNone(opt));
-        return 0; // bits 23 and 24 are zero
-    }
-    else
-    {
-        if (emitter::insOptsIndexed(opt))
-        {
-            if (emitter::insOptsPostIndex(opt))
+        case INS_sqdmulh:
+        case INS_sqrdmlah:
+        case INS_sqrdmlsh:
+        case INS_sqrdmulh:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (insOptsAnyArrangement(opt))
             {
-                return 0x00800000; // set the bit at location 23
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                elemsize = optGetElemsize(opt);
+                assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE));
+                fmt = IF_DV_3A;
             }
             else
             {
-                assert(emitter::insOptsPreIndex(opt));
-                return 0x01800000; // set the bit at location 24 and 23
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert((size == EA_2BYTE) || (size == EA_4BYTE));
+                fmt = IF_DV_3E;
             }
-        }
-        else
-        {
-            assert(emitter::insOptsNone(opt));
-            return 0x01000000; // set the bit at location 24
-        }
-    }
-}
-
-/*****************************************************************************
- *
- *  Returns the encoding to apply a Shift Type on the Rm register
- */
-
-/*static*/ emitter::code_t emitter::insEncodeShiftType(insOpts opt)
-{
-    if (emitter::insOptsNone(opt))
-    {
-        // None implies the we encode LSL (with a zero immediate)
-        opt = INS_OPTS_LSL;
-    }
-    assert(emitter::insOptsAnyShift(opt));
-
-    emitter::code_t option = (emitter::code_t)opt - (emitter::code_t)INS_OPTS_LSL;
-    assert(option <= 3);
-
-    return option << 22; // bits 23, 22
-}
-
-/*****************************************************************************
- *
- *  Returns the encoding to apply a 12 bit left shift to the immediate
- */
-
-/*static*/ emitter::code_t emitter::insEncodeShiftImm12(insOpts opt)
-{
-    if (emitter::insOptsLSL12(opt))
-    {
-        return 0x00400000; // set the bit at location 22
-    }
-    return 0;
-}
-
-/*****************************************************************************
- *
- *  Returns the encoding to have the Rm register use an extend operation
- */
-
-/*static*/ emitter::code_t emitter::insEncodeExtend(insOpts opt)
-{
-    if (emitter::insOptsNone(opt) || (opt == INS_OPTS_LSL))
-    {
-        // None or LSL implies the we encode UXTX
-        opt = INS_OPTS_UXTX;
-    }
-    assert(emitter::insOptsAnyExtend(opt));
-
-    emitter::code_t option = (emitter::code_t)opt - (emitter::code_t)INS_OPTS_UXTB;
-    assert(option <= 7);
-
-    return option << 13; // bits 15,14,13
-}
-
-/*****************************************************************************
- *
- *  Returns the encoding to scale the Rm register by {0,1,2,3,4}
- *  when using an extend operation
- */
-
-/*static*/ emitter::code_t emitter::insEncodeExtendScale(ssize_t imm)
-{
-    assert((imm >= 0) && (imm <= 4));
-
-    return (emitter::code_t)imm << 10; // bits 12,11,10
-}
-
-/*****************************************************************************
- *
- *  Returns the encoding to have the Rm register be auto scaled by the ld/st size
- */
-
-/*static*/ emitter::code_t emitter::insEncodeReg3Scale(bool isScaled)
-{
-    if (isScaled)
-    {
-        return 0x00001000; // set the bit at location 12
-    }
-    else
-    {
-        return 0;
-    }
-}
+            break;
 
-/*****************************************************************************
- *
- *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
- */
+        case INS_sqdmlal2:
+        case INS_sqdmlsl2:
+        case INS_sqdmull2:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(size == EA_16BYTE);
+            assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S));
+            fmt = IF_DV_3A;
+            break;
 
-/*static*/ emitter::code_t emitter::insEncodeSveElemsize(emitAttr size)
-{
-    switch (size)
-    {
-        case EA_1BYTE:
-            return 0x00000000;
+        case INS_pmul:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isValidArrangement(size, opt));
+            assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_16B));
+            fmt = IF_DV_3A;
+            break;
 
-        case EA_2BYTE:
-            return 0x00400000; // set the bit at location 22
+        case INS_pmull:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(size == EA_8BYTE);
+            assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_1D));
+            fmt = IF_DV_3A;
+            break;
 
-        case EA_4BYTE:
-            return 0x00800000; // set the bit at location 23
+        case INS_pmull2:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(size == EA_16BYTE);
+            assert((opt == INS_OPTS_16B) || (opt == INS_OPTS_2D));
+            fmt = IF_DV_3A;
+            break;
 
-        case EA_8BYTE:
-            return 0x00C00000; // set the bit at location 23 and 22
+        case INS_sdot:
+        case INS_udot:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(((size == EA_8BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_16BYTE) && (opt == INS_OPTS_4S)));
+            fmt = IF_DV_3A;
+            break;
 
         default:
-            assert(!"Invalid insOpt for vector register");
-    }
-    return 0;
-}
+            // fallback to emit SVE instructions.
+            return emitInsSve_R_R_R(ins, attr, reg1, reg2, reg3, opt, sopt);
 
-/*****************************************************************************
- *
- *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
- *  This specifically encodes the size at bit locations '22-21'.
- */
+    } // end switch (ins)
 
-/*static*/ emitter::code_t emitter::insEncodeSveElemsize_22_to_21(emitAttr size)
-{
-    switch (size)
-    {
-        case EA_1BYTE:
-            return 0;
+    assert(fmt != IF_NONE);
 
-        case EA_2BYTE:
-            return (1 << 21); // set the bit at location 21
+    instrDesc* id = emitNewInstr(attr);
 
-        case EA_4BYTE:
-            return (1 << 22); // set the bit at location 22
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
 
-        case EA_8BYTE:
-            return (1 << 22) | (1 << 21); // set the bit at location 22 and 21
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
 
-        default:
-            assert(!"Invalid insOpt for vector register");
-    }
-    return 0;
+    dispIns(id);
+    appendToCurIG(id);
 }
 
-/*****************************************************************************
- *
- *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
- *  This specifically encodes the size at bit locations '18-17'.
- */
-
-/*static*/ emitter::code_t emitter::insEncodeSveElemsize_18_to_17(emitAttr size)
+//-----------------------------------------------------------------------------------
+// emitIns_R_R_R_I_LdStPair: Add an instruction storing 2 registers into a memory
+//                     (pointed by reg3) and the offset (immediate).
+//
+// Arguments:
+//     ins      - The instruction code
+//     attr     - The emit attribute for register 1
+//     attr2    - The emit attribute for register 2
+//     reg1     - Register 1
+//     reg2     - Register 2
+//     reg3     - Register 3
+//     imm      - Immediate offset, prior to scaling by operand size
+//     varx1    - LclVar number 1
+//     varx2    - LclVar number 2
+//     offs1    - Memory offset of lclvar number 1
+//     offs2    - Memory offset of lclvar number 2
+//
+void emitter::emitIns_R_R_R_I_LdStPair(instruction ins,
+                                       emitAttr    attr,
+                                       emitAttr    attr2,
+                                       regNumber   reg1,
+                                       regNumber   reg2,
+                                       regNumber   reg3,
+                                       ssize_t     imm,
+                                       int         varx1,
+                                       int         varx2,
+                                       int         offs1,
+                                       int offs2   DEBUG_ARG(unsigned var1RefsOffs) DEBUG_ARG(unsigned var2RefsOffs))
 {
-    switch (size)
-    {
-        case EA_1BYTE:
-            return 0;
-
-        case EA_2BYTE:
-            return (1 << 17); // set the bit at location 17
-
-        case EA_4BYTE:
-            return (1 << 18); // set the bit at location 18
+    assert((ins == INS_stp) || (ins == INS_ldp));
+    emitAttr  size  = EA_SIZE(attr);
+    insFormat fmt   = IF_NONE;
+    unsigned  scale = 0;
 
-        case EA_8BYTE:
-            return (1 << 18) | (1 << 17); // set the bit at location 18 and 17
+    // Is the target a vector register?
+    if (isVectorRegister(reg1))
+    {
+        assert(isValidVectorLSPDatasize(size));
+        assert(isVectorRegister(reg2));
 
-        default:
-            assert(!"Invalid insOpt for vector register");
+        scale = NaturalScale_helper(size);
+        assert((scale >= 2) && (scale <= 4));
     }
-    return 0;
-}
-
-/*****************************************************************************
- *
- *  Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction
- *  This specifically encodes the field 'sz' at bit location '21'.
- */
-
-/*static*/ emitter::code_t emitter::insEncodeSveElemsize_sz_21(emitAttr size)
-{
-    switch (size)
+    else
     {
-        case EA_4BYTE:
-            return 0;
-
-        case EA_8BYTE:
-            return (1 << 21);
-
-        default:
-            assert(!"Invalid insOpt for vector register");
+        assert(isValidGeneralDatasize(size));
+        assert(isGeneralRegisterOrZR(reg2));
+        scale = (size == EA_8BYTE) ? 3 : 2;
     }
-    return 0;
-}
 
-/*****************************************************************************
- *
- *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
- *  This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'.
- */
+    reg3 = encodingSPtoZR(reg3);
 
-/*static*/ emitter::code_t emitter::insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size)
-{
-    switch (size)
+    fmt          = IF_LS_3C;
+    ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+    if (imm == 0)
     {
-        case EA_1BYTE:
-            return 0x080000; // set the bit at location 19
-
-        case EA_2BYTE:
-            return 0x100000; // set the bit at location 20
-
-        case EA_4BYTE:
-            return 0x400000; // set the bit at location 22
-
-        default:
-            assert(!"Invalid size for vector register");
+        fmt = IF_LS_3B;
     }
-    return 0;
-}
-
-/*****************************************************************************
- *
- *  Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate.
- *  This specifically encodes the field 'tszh:tszl' at bit locations '23-22:9-8'.
- */
-
-/*static*/ emitter::code_t emitter::insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift, size_t imm)
-{
-    code_t encodedSize = 0;
-
-    switch (size)
+    else
     {
-        case EA_1BYTE:
-            encodedSize = 0x100; // set the bit at location 8
-            break;
+        if ((imm & mask) == 0)
+        {
+            imm >>= scale; // The immediate is scaled by the size of the ld/st
+        }
+        else
+        {
+            // Unlike emitIns_S_S_R_R(), we would never come here when
+            // (imm & mask) != 0.
+            unreached();
+        }
+    }
 
-        case EA_2BYTE:
-            encodedSize = 0x200; // set the bit at location 9
-            break;
+    bool validVar1 = varx1 != -1;
+    bool validVar2 = varx2 != -1;
 
-        case EA_4BYTE:
-            encodedSize = 0x400000; // set the bit at location 22
-            break;
+    instrDesc* id;
 
-        case EA_8BYTE:
-            encodedSize = 0x800000; // set the bit at location 23
-            break;
+    if (validVar1 && validVar2)
+    {
+        id = emitNewInstrLclVarPair(attr, imm);
+        id->idAddr()->iiaLclVar.initLclVarAddr(varx1, offs1);
+        id->idSetIsLclVar();
 
-        default:
-            assert(!"Invalid esize for vector register");
+        emitGetLclVarPairLclVar2(id)->initLclVarAddr(varx2, offs2);
+    }
+    else
+    {
+        id = emitNewInstrCns(attr, imm);
+        if (validVar1)
+        {
+            id->idAddr()->iiaLclVar.initLclVarAddr(varx1, offs1);
+            id->idSetIsLclVar();
+        }
+        if (validVar2)
+        {
+            id->idAddr()->iiaLclVar.initLclVarAddr(varx2, offs2);
+            id->idSetIsLclVar();
+        }
     }
 
-    code_t encodedImm = insEncodeShiftImmediate(size, isRightShift, imm);
-    code_t imm3High   = (encodedImm & 0x60) << 17;
-    code_t imm3Low    = (encodedImm & 0x1f) << 5;
-    return encodedSize | imm3High | imm3Low;
-}
-
-/*****************************************************************************
- *
- *  Returns the encoding to select the constant values 90 or 270 for an Arm64 SVE vector instruction
- *  This specifically encode the field 'rot' at bit location '16'.
- */
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
 
-/*static*/ emitter::code_t emitter::insEncodeSveImm90_or_270_rot(ssize_t imm)
-{
-    assert(emitIsValidEncodedRotationImm90_or_270(imm));
-    return (code_t)(imm << 16);
-}
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
 
-/*****************************************************************************
- *
- *  Returns the encoding to select the constant values 0, 90, 180 or 270 for an Arm64 SVE vector instruction
- *  This specifically encode the field 'rot' at bit locations '14-13'.
- */
+    // Record the attribute for the second register in the pair
+    if (EA_IS_GCREF(attr2))
+    {
+        id->idGCrefReg2(GCT_GCREF);
+    }
+    else if (EA_IS_BYREF(attr2))
+    {
+        id->idGCrefReg2(GCT_BYREF);
+    }
+    else
+    {
+        id->idGCrefReg2(GCT_NONE);
+    }
 
-/*static*/ emitter::code_t emitter::insEncodeSveImm0_to_270_rot(ssize_t imm)
-{
-    assert(emitIsValidEncodedRotationImm0_to_270(imm));
-    return (code_t)(imm << 13);
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs  = var1RefsOffs;
+    id->idDebugOnlyInfo()->idVarRefOffs2 = var2RefsOffs;
+#endif
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- *  Returns the encoding to select the constant float values 0, 0.5, 1.0 or 2.0 for an Arm64 SVE vector instruction
- *  This specifically encode the field 'i1' at bit location '5'.
+ *  Add an instruction referencing three registers and a constant.
  */
 
-/*static*/ emitter::code_t emitter::insEncodeSveSmallFloatImm(ssize_t imm)
+void emitter::emitIns_R_R_R_I(instruction     ins,
+                              emitAttr        attr,
+                              regNumber       reg1,
+                              regNumber       reg2,
+                              regNumber       reg3,
+                              ssize_t         imm,
+                              insOpts         opt /* = INS_OPTS_NONE */,
+                              emitAttr        attrReg2 /* = EA_UNKNOWN */,
+                              insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
 {
-    assert(emitIsValidEncodedSmallFloatImm(imm));
-    return (code_t)(imm << 5);
-}
-
-/*****************************************************************************
- *
- *  Returns the register list size for the given SVE instruction.
- */
+    emitAttr  size     = EA_SIZE(attr);
+    emitAttr  elemsize = EA_UNKNOWN;
+    insFormat fmt      = IF_NONE;
+    bool      isLdSt   = false;
+    bool      isSIMD   = false;
+    bool      isAddSub = false;
+    bool      setFlags = false;
+    unsigned  scale    = 0;
 
-/*static*/ int emitter::insGetSveReg1ListSize(instruction ins)
-{
+    /* Figure out the encoding format of the instruction */
     switch (ins)
     {
-        case INS_sve_ld1d:
-        case INS_sve_ld1w:
-        case INS_sve_ld1sw:
-        case INS_sve_ld1sb:
-        case INS_sve_ld1b:
-        case INS_sve_ld1sh:
-        case INS_sve_ld1h:
-        case INS_sve_ldnf1d:
-        case INS_sve_ldnf1sw:
-        case INS_sve_ldnf1sh:
-        case INS_sve_ldnf1w:
-        case INS_sve_ldnf1h:
-        case INS_sve_ldnf1sb:
-        case INS_sve_ldnf1b:
-        case INS_sve_ldnt1b:
-        case INS_sve_ldnt1d:
-        case INS_sve_ldnt1h:
-        case INS_sve_ldnt1w:
-        case INS_sve_ld1rob:
-        case INS_sve_ld1rod:
-        case INS_sve_ld1roh:
-        case INS_sve_ld1row:
-        case INS_sve_ld1rqb:
-        case INS_sve_ld1rqd:
-        case INS_sve_ld1rqh:
-        case INS_sve_ld1rqw:
-        case INS_sve_stnt1b:
-        case INS_sve_stnt1d:
-        case INS_sve_stnt1h:
-        case INS_sve_stnt1w:
-        case INS_sve_st1d:
-        case INS_sve_st1w:
-        case INS_sve_ldff1sh:
-        case INS_sve_ldff1w:
-        case INS_sve_ldff1h:
-        case INS_sve_ldff1d:
-        case INS_sve_ldff1sw:
-        case INS_sve_st1b:
-        case INS_sve_st1h:
-        case INS_sve_ldff1sb:
-        case INS_sve_ldff1b:
-        case INS_sve_ldnt1sb:
-        case INS_sve_ldnt1sh:
-        case INS_sve_ld1rd:
-        case INS_sve_ld1rsw:
-        case INS_sve_ld1rh:
-        case INS_sve_ld1rsb:
-        case INS_sve_ld1rsh:
-        case INS_sve_ld1rw:
-        case INS_sve_ld1q:
-        case INS_sve_ldnt1sw:
-        case INS_sve_st1q:
-            return 1;
-
-        case INS_sve_ld2b:
-        case INS_sve_ld2h:
-        case INS_sve_ld2w:
-        case INS_sve_ld2d:
-        case INS_sve_ld2q:
-        case INS_sve_splice: // SVE_CV_3A
-        case INS_sve_st2b:
-        case INS_sve_st2h:
-        case INS_sve_st2w:
-        case INS_sve_st2d:
-        case INS_sve_st2q:
-        case INS_sve_whilege: // SVE_DX_3A
-        case INS_sve_whilegt: // SVE_DX_3A
-        case INS_sve_whilehi: // SVE_DX_3A
-        case INS_sve_whilehs: // SVE_DX_3A
-        case INS_sve_whilele: // SVE_DX_3A
-        case INS_sve_whilels: // SVE_DX_3A
-        case INS_sve_whilelt: // SVE_DX_3A
-        case INS_sve_pext:    // SVE_DW_2B
-            return 2;
-
-        case INS_sve_ld3b:
-        case INS_sve_ld3h:
-        case INS_sve_ld3w:
-        case INS_sve_ld3d:
-        case INS_sve_ld3q:
-        case INS_sve_st3b:
-        case INS_sve_st3h:
-        case INS_sve_st3w:
-        case INS_sve_st3d:
-        case INS_sve_st3q:
-            return 3;
-
-        case INS_sve_ld4b:
-        case INS_sve_ld4h:
-        case INS_sve_ld4w:
-        case INS_sve_ld4d:
-        case INS_sve_ld4q:
-        case INS_sve_st4b:
-        case INS_sve_st4h:
-        case INS_sve_st4w:
-        case INS_sve_st4d:
-        case INS_sve_st4q:
-            return 4;
-
-        default:
-            assert(!"Unexpected instruction");
-            return 1;
-    }
-}
-
-/*****************************************************************************
- *
- *  Returns the predicate type for the given SVE format.
- */
-
-/*static*/ emitter::PredicateType emitter::insGetPredicateType(insFormat fmt, int regpos /* =0 */)
-{
-    switch (fmt)
-    {
-        case IF_SVE_BV_2A:
-        case IF_SVE_HW_4A:
-        case IF_SVE_HW_4A_A:
-        case IF_SVE_HW_4A_B:
-        case IF_SVE_HW_4A_C:
-        case IF_SVE_HW_4B:
-        case IF_SVE_HW_4B_D:
-        case IF_SVE_HX_3A_E:
-        case IF_SVE_IJ_3A_D:
-        case IF_SVE_IJ_3A_E:
-        case IF_SVE_IJ_3A_F:
-        case IF_SVE_IK_4A_G:
-        case IF_SVE_IJ_3A_G:
-        case IF_SVE_IK_4A_I:
-        case IF_SVE_IH_3A_F:
-        case IF_SVE_II_4A_H:
-        case IF_SVE_IH_3A:
-        case IF_SVE_IH_3A_A:
-        case IF_SVE_II_4A:
-        case IF_SVE_II_4A_B:
-        case IF_SVE_IU_4A:
-        case IF_SVE_IU_4A_C:
-        case IF_SVE_IU_4B:
-        case IF_SVE_IU_4B_D:
-        case IF_SVE_IV_3A:
-        case IF_SVE_IG_4A_F:
-        case IF_SVE_IG_4A_G:
-        case IF_SVE_IJ_3A:
-        case IF_SVE_IK_4A:
-        case IF_SVE_IK_4A_F:
-        case IF_SVE_IK_4A_H:
-        case IF_SVE_IU_4A_A:
-        case IF_SVE_IU_4B_B:
-        case IF_SVE_HX_3A_B:
-        case IF_SVE_IG_4A:
-        case IF_SVE_IG_4A_D:
-        case IF_SVE_IG_4A_E:
-        case IF_SVE_IF_4A:
-        case IF_SVE_IF_4A_A:
-        case IF_SVE_IM_3A:
-        case IF_SVE_IN_4A:
-        case IF_SVE_IX_4A:
-        case IF_SVE_IO_3A:
-        case IF_SVE_IP_4A:
-        case IF_SVE_IQ_3A:
-        case IF_SVE_IR_4A:
-        case IF_SVE_IS_3A:
-        case IF_SVE_IT_4A:
-        case IF_SVE_GI_4A:
-        case IF_SVE_IC_3A_C:
-        case IF_SVE_IC_3A:
-        case IF_SVE_IC_3A_B:
-        case IF_SVE_IC_3A_A:
-        case IF_SVE_IL_3A_C:
-        case IF_SVE_IL_3A:
-        case IF_SVE_IL_3A_B:
-        case IF_SVE_IL_3A_A:
-        case IF_SVE_IW_4A:
-            return PREDICATE_ZERO;
-
-        case IF_SVE_BV_2A_J:
-        case IF_SVE_CP_3A:
-        case IF_SVE_CQ_3A:
-        case IF_SVE_AM_2A:
-        case IF_SVE_AN_3A:
-        case IF_SVE_AO_3A:
-        case IF_SVE_HL_3A:
-        case IF_SVE_HM_2A:
-        case IF_SVE_AA_3A:
-        case IF_SVE_BU_2A:
-        case IF_SVE_BV_2B:
-        case IF_SVE_HS_3A:
-        case IF_SVE_HS_3A_H:
-        case IF_SVE_HS_3A_I:
-        case IF_SVE_HS_3A_J:
-        case IF_SVE_HP_3B:
-        case IF_SVE_HP_3B_H:
-        case IF_SVE_HP_3B_I:
-        case IF_SVE_HP_3B_J:
-        case IF_SVE_AR_4A:
-        case IF_SVE_BV_2A_A:
-        case IF_SVE_AB_3A:
-        case IF_SVE_ET_3A:
-        case IF_SVE_HU_4A:
-        case IF_SVE_HL_3B:
-        case IF_SVE_AD_3A:
-        case IF_SVE_AB_3B:
-        case IF_SVE_AE_3A:
-        case IF_SVE_EU_3A:
-        case IF_SVE_GT_4A:
-        case IF_SVE_AP_3A:
-        case IF_SVE_HO_3A:
-        case IF_SVE_HO_3A_B:
-        case IF_SVE_GQ_3A:
-        case IF_SVE_HU_4B:
-        case IF_SVE_AQ_3A:
-        case IF_SVE_CU_3A:
-        case IF_SVE_AC_3A:
-        case IF_SVE_ER_3A:
-        case IF_SVE_GR_3A:
-        case IF_SVE_ES_3A:
-        case IF_SVE_HR_3A:
-        case IF_SVE_EP_3A:
-        case IF_SVE_GP_3A:
-        case IF_SVE_EQ_3A:
-        case IF_SVE_HQ_3A:
-        case IF_SVE_AS_4A:
-        case IF_SVE_CT_3A:
-        case IF_SVE_HP_3A:
-        case IF_SVE_HV_4A:
-            return PREDICATE_MERGE;
-
-        case IF_SVE_CZ_4A_A:
-        case IF_SVE_CZ_4A_L:
-        case IF_SVE_CF_2A:
-        case IF_SVE_CF_2B:
-        case IF_SVE_CF_2C:
-        case IF_SVE_CF_2D:
-        case IF_SVE_CI_3A:
-        case IF_SVE_CJ_2A:
-        case IF_SVE_DE_1A:
-        case IF_SVE_DH_1A:
-        case IF_SVE_DJ_1A:
-        case IF_SVE_DM_2A:
-        case IF_SVE_DN_2A:
-        case IF_SVE_DO_2A:
-        case IF_SVE_DP_2A:
-        case IF_SVE_DR_1A:
-        case IF_SVE_DT_3A:
-        case IF_SVE_DU_3A:
-        case IF_SVE_CK_2A:
-            return PREDICATE_SIZED;
-
-        case IF_SVE_DB_3A:
-            // Second register could be ZERO or MERGE so handled at source.
-            assert(regpos != 2);
-            return PREDICATE_SIZED;
-
-        case IF_SVE_DL_2A:
-        case IF_SVE_DY_3A:
-        case IF_SVE_DZ_1A:
-            return PREDICATE_N_SIZED;
-
-        // This is a special case as the second register could be ZERO or MERGE.
-        // <Pg>/<ZM>
-        // Therefore, by default return NONE due to ambiguity.
-        case IF_SVE_AH_3A:
-            // TODO: Handle these cases.
-            assert(false);
-            break;
-
-        case IF_SVE_JD_4B:
-        case IF_SVE_JD_4C:
-        case IF_SVE_JI_3A_A:
-        case IF_SVE_JJ_4A:
-        case IF_SVE_JJ_4A_B:
-        case IF_SVE_JJ_4A_C:
-        case IF_SVE_JJ_4A_D:
-        case IF_SVE_JJ_4B:
-        case IF_SVE_JJ_4B_E:
-        case IF_SVE_JN_3B:
-        case IF_SVE_JN_3C:
-        case IF_SVE_JD_4A:
-        case IF_SVE_JN_3A:
-        case IF_SVE_JD_4C_A:
-        case IF_SVE_JJ_4B_C:
-        case IF_SVE_JL_3A:
-        case IF_SVE_JN_3C_D:
-        case IF_SVE_HY_3A:
-        case IF_SVE_HY_3A_A:
-        case IF_SVE_HY_3B:
-        case IF_SVE_HZ_2A_B:
-        case IF_SVE_IA_2A:
-        case IF_SVE_IB_3A:
-        case IF_SVE_JK_4A:
-        case IF_SVE_JK_4A_B:
-        case IF_SVE_JK_4B:
-        case IF_SVE_IZ_4A:
-        case IF_SVE_IZ_4A_A:
-        case IF_SVE_JB_4A:
-        case IF_SVE_JM_3A:
-        case IF_SVE_CM_3A:
-        case IF_SVE_CN_3A:
-        case IF_SVE_CO_3A:
-        case IF_SVE_JA_4A:
-        case IF_SVE_CR_3A:
-        case IF_SVE_CS_3A:
-        case IF_SVE_CV_3A:
-        case IF_SVE_CV_3B:
-        case IF_SVE_DW_2A: // <PNn>[<imm>]
-        case IF_SVE_DW_2B: // <PNn>[<imm>]
-        case IF_SVE_JC_4A:
-        case IF_SVE_JO_3A:
-        case IF_SVE_JE_3A:
-        case IF_SVE_JF_4A:
-        case IF_SVE_AK_3A:
-        case IF_SVE_HE_3A:
-        case IF_SVE_AF_3A:
-        case IF_SVE_AG_3A:
-        case IF_SVE_AI_3A:
-        case IF_SVE_AJ_3A:
-        case IF_SVE_AL_3A:
-        case IF_SVE_CL_3A:
-        case IF_SVE_GS_3A:
-        case IF_SVE_HJ_3A:
-        case IF_SVE_IY_4A:
-            return PREDICATE_NONE;
-
-        case IF_SVE_CX_4A:
-        case IF_SVE_CX_4A_A:
-        case IF_SVE_CY_3A:
-        case IF_SVE_CY_3B:
-        case IF_SVE_GE_4A:
-        case IF_SVE_HT_4A:
-            assert((regpos == 1) || (regpos == 2));
-            return (regpos == 2 ? PREDICATE_ZERO : PREDICATE_SIZED);
-
-        case IF_SVE_CZ_4A:
-        case IF_SVE_DA_4A:
-        case IF_SVE_DB_3B:
-        case IF_SVE_DC_3A:
-            assert((regpos >= 1) && (regpos <= 4));
-            return (regpos == 2 ? PREDICATE_ZERO : PREDICATE_SIZED);
-
-        case IF_SVE_CZ_4A_K:
-            assert((regpos >= 1) && (regpos <= 3));
-            return (regpos == 2 ? PREDICATE_MERGE : PREDICATE_SIZED);
-
-        case IF_SVE_DD_2A:
-        case IF_SVE_DF_2A:
-            assert((regpos >= 1) && (regpos <= 3));
-            return ((regpos == 2) ? PREDICATE_NONE : PREDICATE_SIZED);
-
-        case IF_SVE_DG_2A:
-            return (regpos == 2 ? PREDICATE_ZERO : PREDICATE_SIZED);
-
-        case IF_SVE_DI_2A:
-            return (regpos == 1 ? PREDICATE_NONE : PREDICATE_SIZED);
-
-        case IF_SVE_DK_3A:
-            assert((regpos == 2) || (regpos == 3));
-            return ((regpos == 2) ? PREDICATE_NONE : PREDICATE_SIZED);
-
-        case IF_SVE_HI_3A:
-            assert((regpos == 1) || (regpos == 2));
-            return ((regpos == 2) ? PREDICATE_ZERO : PREDICATE_SIZED);
-
-        case IF_SVE_ID_2A:
-        case IF_SVE_JG_2A:
-            return PREDICATE_NONE;
-
-        default:
+        case INS_extr:
+            assert(insOptsNone(opt));
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidImmShift(imm, size));
+            fmt = IF_DR_3E;
             break;
-    }
-
-    assert(!"Unexpected instruction format");
-    return PREDICATE_NONE;
-}
 
-/*****************************************************************************
- *
- *  Returns true if the SVE instruction has a LSL addr.
- *  This is for formats that have [<Xn|SP>, <Xm>, LSL #N], [<Xn|SP>{, <Xm>, LSL #N}]
- */
-/*static*/ bool emitter::insSveIsLslN(instruction ins, insFormat fmt)
-{
-    switch (fmt)
-    {
-        case IF_SVE_JD_4A:
-            switch (ins)
+        case INS_and:
+        case INS_ands:
+        case INS_eor:
+        case INS_orr:
+        case INS_bic:
+        case INS_bics:
+        case INS_eon:
+        case INS_orn:
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidImmShift(imm, size));
+            if (imm == 0)
             {
-                case INS_sve_st1h:
-                    return true;
-
-                default:
-                    break;
+                assert(insOptsNone(opt)); // a zero imm, means no shift kind
+                fmt = IF_DR_3A;
+            }
+            else
+            {
+                assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind
+                fmt = IF_DR_3B;
             }
             break;
 
-        case IF_SVE_JD_4B:
-            switch (ins)
+        case INS_fmul: // by element, imm[0..3] selects the element of reg3
+        case INS_fmla:
+        case INS_fmls:
+        case INS_fmulx:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (insOptsAnyArrangement(opt))
             {
-                case INS_sve_st1w:
-                    return true;
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+                fmt = IF_DV_3BI;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidScalarDatasize(size));
+                elemsize = size;
+                assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+                fmt = IF_DV_3DI;
+            }
+            break;
 
-                default:
-                    break;
+        case INS_mul: // by element, imm[0..7] selects the element of reg3
+        case INS_mla:
+        case INS_mls:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            // Vector operation
+            assert(insOptsAnyArrangement(opt));
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            // Only has encodings for H or S elemsize
+            assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE));
+            // Only has encodings for V0..V15
+            if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0))
+            {
+                noway_assert(!"Invalid reg3");
             }
+            fmt = IF_DV_3AI;
+            break;
+
+        case INS_add:
+        case INS_sub:
+            setFlags = false;
+            isAddSub = true;
+            break;
+
+        case INS_adds:
+        case INS_subs:
+            setFlags = true;
+            isAddSub = true;
+            break;
+
+        case INS_ldpsw:
+            scale  = 2;
+            isLdSt = true;
             break;
 
-        case IF_SVE_HW_4B:
-            switch (ins)
+        case INS_ldnp:
+        case INS_stnp:
+            assert(insOptsNone(opt)); // Can't use Pre/Post index on these two instructions
+            FALLTHROUGH;
+
+        case INS_ldp:
+        case INS_stp:
+            // Is the target a vector register?
+            if (isVectorRegister(reg1))
             {
-                case INS_sve_ld1h:
-                case INS_sve_ld1sh:
-                case INS_sve_ldff1h:
-                case INS_sve_ldff1sh:
-                case INS_sve_ld1w:
-                case INS_sve_ldff1w:
-                    return true;
-
-                default:
-                    break;
+                scale  = NaturalScale_helper(size);
+                isSIMD = true;
             }
-            break;
-
-        case IF_SVE_IG_4A:
-            switch (ins)
+            else
             {
-                case INS_sve_ldff1d:
-                case INS_sve_ldff1sw:
-                    return true;
-
-                default:
-                    break;
+                scale = (size == EA_8BYTE) ? 3 : 2;
             }
+            isLdSt = true;
+            fmt    = IF_LS_3C;
             break;
 
-        case IF_SVE_IG_4A_F:
-            switch (ins)
-            {
-                case INS_sve_ldff1sh:
-                case INS_sve_ldff1w:
-                    return true;
+        case INS_ld1:
+        case INS_ld2:
+        case INS_ld3:
+        case INS_ld4:
+        case INS_st1:
+        case INS_st2:
+        case INS_st3:
+        case INS_st4:
+            assert(isVectorRegister(reg1));
+            assert(isGeneralRegisterOrSP(reg2));
+            assert(isGeneralRegister(reg3));
 
-                default:
-                    break;
-            }
-            break;
+            assert(insOptsPostIndex(opt));
 
-        case IF_SVE_IG_4A_G:
-            switch (ins)
-            {
-                case INS_sve_ldff1h:
-                    return true;
+            elemsize = size;
+            assert(isValidVectorElemsize(elemsize));
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
 
-                default:
-                    break;
-            }
+            // Load/Store single structure  post-indexed by a register
+            reg2 = encodingSPtoZR(reg2);
+            fmt  = IF_LS_3G;
             break;
 
-        case IF_SVE_II_4A:
-        case IF_SVE_II_4A_B:
-            switch (ins)
-            {
-                case INS_sve_ld1d:
-                    return true;
-
-                default:
-                    break;
-            }
+        case INS_ext:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
+            assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_16B));
+            assert(isValidVectorIndex(size, EA_1BYTE, imm));
+            fmt = IF_DV_3G;
             break;
 
-        case IF_SVE_II_4A_H:
-            switch (ins)
+        case INS_smlal:
+        case INS_smlsl:
+        case INS_smull:
+        case INS_umlal:
+        case INS_umlsl:
+        case INS_umull:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(size == EA_8BYTE);
+            assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
+            elemsize = optGetElemsize(opt);
+            // Restricted to V0-V15 when element size is H.
+            if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0))
             {
-                case INS_sve_ld1w:
-                    return true;
-
-                default:
-                    break;
+                assert(!"Invalid reg3");
             }
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            fmt = IF_DV_3AI;
             break;
 
-        case IF_SVE_IK_4A:
-            switch (ins)
+        case INS_sqdmlal:
+        case INS_sqdmlsl:
+        case INS_sqdmull:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (insOptsAnyArrangement(opt))
             {
-                case INS_sve_ld1sw:
-                    return true;
-
-                default:
-                    break;
+                // Vector operation
+                assert(size == EA_8BYTE);
+                assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S));
+                elemsize = optGetElemsize(opt);
+                fmt      = IF_DV_3AI;
             }
-            break;
-
-        case IF_SVE_IK_4A_G:
-            switch (ins)
+            else
             {
-                case INS_sve_ld1sh:
-                    return true;
-
-                default:
-                    break;
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert((size == EA_2BYTE) || (size == EA_4BYTE));
+                elemsize = size;
+                fmt      = IF_DV_3EI;
             }
-            break;
-
-        case IF_SVE_IK_4A_I:
-            switch (ins)
+            // Restricted to V0-V15 when element size is H.
+            if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0))
             {
-                case INS_sve_ld1h:
-                    return true;
-
-                default:
-                    break;
+                assert(!"Invalid reg3");
             }
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
             break;
 
-        case IF_SVE_IN_4A:
-            switch (ins)
+        case INS_sqdmulh:
+        case INS_sqrdmlah:
+        case INS_sqrdmlsh:
+        case INS_sqrdmulh:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (insOptsAnyArrangement(opt))
             {
-                case INS_sve_ldnt1d:
-                case INS_sve_ldnt1h:
-                case INS_sve_ldnt1w:
-                    return true;
-
-                default:
-                    break;
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                elemsize = optGetElemsize(opt);
+                assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE));
+                fmt = IF_DV_3AI;
             }
-            break;
-
-        case IF_SVE_IP_4A:
-            switch (ins)
+            else
             {
-                case INS_sve_ld1roh:
-                case INS_sve_ld1row:
-                case INS_sve_ld1rod:
-                case INS_sve_ld1rqh:
-                case INS_sve_ld1rqw:
-                case INS_sve_ld1rqd:
-                    return true;
-
-                default:
-                    break;
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert((size == EA_2BYTE) || (size == EA_4BYTE));
+                elemsize = size;
+                fmt      = IF_DV_3EI;
             }
-            break;
-
-        case IF_SVE_IR_4A:
-            switch (ins)
+            // Restricted to V0-V15 when element size is H.
+            if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0))
             {
-                case INS_sve_ld2q:
-                case INS_sve_ld3q:
-                case INS_sve_ld4q:
-                    return true;
-
-                default:
-                    break;
+                assert(!"Invalid reg3");
             }
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
             break;
 
-        case IF_SVE_IT_4A:
-            switch (ins)
+        case INS_smlal2:
+        case INS_smlsl2:
+        case INS_smull2:
+        case INS_sqdmlal2:
+        case INS_sqdmlsl2:
+        case INS_sqdmull2:
+        case INS_umlal2:
+        case INS_umlsl2:
+        case INS_umull2:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(size == EA_16BYTE);
+            assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S));
+            elemsize = optGetElemsize(opt);
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            // Restricted to V0-V15 when element size is H
+            if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0))
             {
-                case INS_sve_ld2h:
-                case INS_sve_ld2w:
-                case INS_sve_ld2d:
-                case INS_sve_ld3h:
-                case INS_sve_ld3w:
-                case INS_sve_ld3d:
-                case INS_sve_ld4h:
-                case INS_sve_ld4w:
-                case INS_sve_ld4d:
-                    return true;
-
-                default:
-                    break;
+                assert(!"Invalid reg3");
             }
+            fmt = IF_DV_3AI;
             break;
 
-        case IF_SVE_IU_4B:
-            switch (ins)
-            {
-                case INS_sve_ld1sw:
-                case INS_sve_ldff1sw:
-                case INS_sve_ld1d:
-                case INS_sve_ldff1d:
-                    return true;
-
-                default:
-                    break;
-            }
+        case INS_sdot:
+        case INS_udot:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(((size == EA_8BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_16BYTE) && (opt == INS_OPTS_4S)));
+            assert(isValidVectorIndex(EA_16BYTE, EA_4BYTE, imm));
+            fmt = IF_DV_3AI;
             break;
 
-        case IF_SVE_JB_4A:
-            switch (ins)
-            {
-                case INS_sve_stnt1h:
-                case INS_sve_stnt1w:
-                case INS_sve_stnt1d:
-                    return true;
+        default:
+            // fallback to emit SVE instructions.
+            return emitInsSve_R_R_R_I(ins, attr, reg1, reg2, reg3, imm, opt, sopt);
 
-                default:
-                    break;
-            }
-            break;
+    } // end switch (ins)
 
-        case IF_SVE_JC_4A:
-            switch (ins)
-            {
-                case INS_sve_st2h:
-                case INS_sve_st2w:
-                case INS_sve_st2d:
-                case INS_sve_st3h:
-                case INS_sve_st3w:
-                case INS_sve_st3d:
-                case INS_sve_st4h:
-                case INS_sve_st4w:
-                case INS_sve_st4d:
-                    return true;
+    assert(insScalableOptsNone(sopt));
 
-                default:
-                    break;
-            }
-            break;
+    if (isLdSt)
+    {
+        assert(!isAddSub);
+        assert(isGeneralRegisterOrSP(reg3));
+        assert(insOptsNone(opt) || insOptsIndexed(opt));
 
-        case IF_SVE_JD_4C:
-            switch (ins)
-            {
-                case INS_sve_st1w:
-                case INS_sve_st1d:
-                    return true;
+        if (isSIMD)
+        {
+            assert(isValidVectorLSPDatasize(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert((scale >= 2) && (scale <= 4));
+        }
+        else
+        {
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegisterOrZR(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+            assert((scale == 2) || (scale == 3));
+        }
 
-                default:
-                    break;
-            }
-            break;
+        // Load/Store Pair reserved encodings:
+        if (emitInsIsLoad(ins))
+        {
+            assert(reg1 != reg2);
+        }
+        if (insOptsIndexed(opt))
+        {
+            assert(reg1 != reg3);
+            assert(reg2 != reg3);
+        }
 
-        case IF_SVE_JD_4C_A:
-            switch (ins)
-            {
-                case INS_sve_st1d:
-                    return true;
+        reg3 = encodingSPtoZR(reg3);
 
-                default:
-                    break;
-            }
-            break;
+        ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+        if (imm == 0)
+        {
+            assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero
 
-        case IF_SVE_JF_4A:
-            switch (ins)
+            fmt = IF_LS_3B;
+        }
+        else
+        {
+            if ((imm & mask) == 0)
             {
-                case INS_sve_st2q:
-                case INS_sve_st3q:
-                case INS_sve_st4q:
-                    return true;
+                imm >>= scale; // The immediate is scaled by the size of the ld/st
 
-                default:
-                    break;
+                if ((imm >= -64) && (imm <= 63))
+                {
+                    fmt = IF_LS_3C;
+                }
             }
-            break;
-
-        case IF_SVE_JJ_4B:
-            switch (ins)
+#ifdef DEBUG
+            if (fmt != IF_LS_3C)
             {
-                case INS_sve_st1h:
-                case INS_sve_st1w:
-                case INS_sve_st1d:
-                    return true;
-
-                default:
-                    break;
+                assert(!"Instruction cannot be encoded: IF_LS_3C");
             }
-            break;
-
-        default:
-            break;
+#endif
+        }
     }
-
-    return false;
-}
-
-/*****************************************************************************
- *
- *  Returns true if the SVE instruction has a <mod> addr.
- *  This is for formats that have [<Xn|SP>, <Zm>.T, <mod>], [<Xn|SP>, <Zm>.T, <mod> #N]
- */
-/*static*/ bool emitter::insSveIsModN(instruction ins, insFormat fmt)
-{
-    switch (fmt)
+    else if (isAddSub)
     {
-        case IF_SVE_JJ_4A:
-        case IF_SVE_JJ_4A_B:
-            switch (ins)
-            {
-                case INS_sve_st1d:
-                case INS_sve_st1h:
-                case INS_sve_st1w:
-                    return true;
-
-                default:
-                    break;
-            }
-            break;
+        bool reg2IsSP = (reg2 == REG_SP);
+        assert(!isLdSt);
+        assert(isValidGeneralDatasize(size));
+        assert(isGeneralRegister(reg3));
 
-        case IF_SVE_JJ_4A_C:
-        case IF_SVE_JJ_4A_D:
-            switch (ins)
-            {
-                case INS_sve_st1h:
-                case INS_sve_st1w:
-                    return true;
+        if (setFlags || insOptsAluShift(opt)) // Can't encode SP in reg1 with setFlags or AluShift option
+        {
+            assert(isGeneralRegisterOrZR(reg1));
+        }
+        else
+        {
+            assert(isGeneralRegisterOrSP(reg1));
+            reg1 = encodingSPtoZR(reg1);
+        }
 
-                default:
-                    break;
-            }
-            break;
+        if (insOptsAluShift(opt)) // Can't encode SP in reg2 with AluShift option
+        {
+            assert(isGeneralRegister(reg2));
+        }
+        else
+        {
+            assert(isGeneralRegisterOrSP(reg2));
+            reg2 = encodingSPtoZR(reg2);
+        }
 
-        case IF_SVE_JK_4A:
-        case IF_SVE_JK_4A_B:
-            switch (ins)
-            {
-                case INS_sve_st1b:
-                    return true;
+        if (insOptsAnyExtend(opt))
+        {
+            assert((imm >= 0) && (imm <= 4));
 
-                default:
-                    break;
-            }
-            break;
+            fmt = IF_DR_3C;
+        }
+        else if (insOptsAluShift(opt))
+        {
+            // imm should be non-zero and in [1..63]
+            assert(isValidImmShift(imm, size) && (imm != 0));
+            fmt = IF_DR_3B;
+        }
+        else if (imm == 0)
+        {
+            assert(insOptsNone(opt));
 
-        case IF_SVE_HW_4A:
-        case IF_SVE_HW_4A_A:
-            switch (ins)
+            if (reg2IsSP)
             {
-                case INS_sve_ld1b:
-                case INS_sve_ld1h:
-                case INS_sve_ld1sb:
-                case INS_sve_ld1sh:
-                case INS_sve_ld1w:
-                case INS_sve_ldff1b:
-                case INS_sve_ldff1h:
-                case INS_sve_ldff1sb:
-                case INS_sve_ldff1sh:
-                case INS_sve_ldff1w:
-                    return true;
-
-                default:
-                    break;
+                // To encode the SP register as reg2 we must use the IF_DR_3C encoding
+                // and also specify a LSL of zero (imm == 0)
+                opt = INS_OPTS_LSL;
+                fmt = IF_DR_3C;
             }
-            break;
-
-        case IF_SVE_HW_4A_B:
-        case IF_SVE_HW_4A_C:
-            switch (ins)
+            else
             {
-                case INS_sve_ld1h:
-                case INS_sve_ld1sh:
-                case INS_sve_ld1w:
-                case INS_sve_ldff1h:
-                case INS_sve_ldff1sh:
-                case INS_sve_ldff1w:
-                    return true;
-
-                default:
-                    break;
+                fmt = IF_DR_3A;
             }
-            break;
+        }
+        else
+        {
+            assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A");
+        }
+    }
 
-        case IF_SVE_IU_4A:
-            switch (ins)
-            {
-                case INS_sve_ld1d:
-                case INS_sve_ld1sw:
-                case INS_sve_ldff1d:
-                case INS_sve_ldff1sw:
-                    return true;
+    assert(fmt != IF_NONE);
 
-                default:
-                    break;
-            }
-            break;
+    instrDesc* id = emitNewInstrCns(attr, imm);
 
-        case IF_SVE_IU_4A_A:
-            switch (ins)
-            {
-                case INS_sve_ld1sw:
-                case INS_sve_ldff1d:
-                case INS_sve_ldff1sw:
-                    return true;
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
 
-                default:
-                    break;
-            }
-            break;
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
 
-        case IF_SVE_IU_4A_C:
-            switch (ins)
-            {
-                case INS_sve_ld1d:
-                    return true;
+    // Record the attribute for the second register in the pair
+    id->idGCrefReg2(GCT_NONE);
+    if (attrReg2 != EA_UNKNOWN)
+    {
+        // Record the attribute for the second register in the pair
+        assert((fmt == IF_LS_3B) || (fmt == IF_LS_3C));
+        if (EA_IS_GCREF(attrReg2))
+        {
+            id->idGCrefReg2(GCT_GCREF);
+        }
+        else if (EA_IS_BYREF(attrReg2))
+        {
+            id->idGCrefReg2(GCT_BYREF);
+        }
+    }
 
-                default:
-                    break;
-            }
-            break;
+    dispIns(id);
+    appendToCurIG(id);
+}
 
-        default:
-            break;
-    }
+/*****************************************************************************
+ *
+ *  Add an instruction referencing three registers and two constants.
+ */
 
-    return false;
+void emitter::emitIns_R_R_R_I_I(instruction ins,
+                                emitAttr    attr,
+                                regNumber   reg1,
+                                regNumber   reg2,
+                                regNumber   reg3,
+                                ssize_t     imm1,
+                                ssize_t     imm2,
+                                insOpts     opt)
+{
+    // Currently, only SVE instructions use this format.
+    emitInsSve_R_R_R_I_I(ins, attr, reg1, reg2, reg3, imm1, imm2, opt);
 }
 
 /*****************************************************************************
  *
- *  Returns 0, 1, 2, 3 or 4 depending on the instruction and format.
- *  This is for formats that have [<Xn|SP>, <Zm>.T, <mod>], [<Xn|SP>, <Zm>.T, <mod> #N], [<Xn|SP>, <Xm>, LSL #N],
- * [<Xn|SP>{, <Xm>, LSL #N}]
+ *  Add an instruction referencing three registers, with an extend option
  */
 
-/*static*/ int emitter::insSveGetLslOrModN(instruction ins, insFormat fmt)
+void emitter::emitIns_R_R_R_Ext(instruction ins,
+                                emitAttr    attr,
+                                regNumber   reg1,
+                                regNumber   reg2,
+                                regNumber   reg3,
+                                insOpts     opt, /* = INS_OPTS_NONE */
+                                int         shiftAmount) /* = -1 -- unset   */
 {
-    switch (fmt)
+    emitAttr  size   = EA_SIZE(attr);
+    insFormat fmt    = IF_NONE;
+    bool      isSIMD = false;
+    int       scale  = -1;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
     {
-        case IF_SVE_JD_4A:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_st1h:
-                    return 1;
+        case INS_ldrb:
+        case INS_ldrsb:
+        case INS_strb:
+            scale = 0;
+            break;
 
-                default:
-                    break;
-            }
+        case INS_ldrh:
+        case INS_ldrsh:
+        case INS_strh:
+            scale = 1;
             break;
 
-        case IF_SVE_JD_4B:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_st1w:
-                    return 2;
+        case INS_ldrsw:
+            scale = 2;
+            break;
 
-                default:
-                    break;
+        case INS_ldr:
+        case INS_str:
+            // Is the target a vector register?
+            if (isVectorRegister(reg1))
+            {
+                assert(isValidVectorLSDatasize(size));
+                scale  = NaturalScale_helper(size);
+                isSIMD = true;
             }
-            break;
-
-        case IF_SVE_HW_4B:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
+            else
             {
-                case INS_sve_ld1h:
-                case INS_sve_ld1sh:
-                case INS_sve_ldff1h:
-                case INS_sve_ldff1sh:
-                    return 1;
-
-                case INS_sve_ld1w:
-                case INS_sve_ldff1w:
-                    return 2;
-
-                default:
-                    break;
+                assert(isValidGeneralDatasize(size));
+                scale = (size == EA_8BYTE) ? 3 : 2;
             }
+
             break;
 
-        case IF_SVE_JJ_4A:
-        case IF_SVE_JJ_4A_B:
-        case IF_SVE_JJ_4A_C:
-        case IF_SVE_JJ_4A_D:
-        case IF_SVE_JK_4A:
-        case IF_SVE_JK_4A_B:
-        case IF_SVE_HW_4A:
-        case IF_SVE_HW_4A_A:
-        case IF_SVE_HW_4A_B:
-        case IF_SVE_HW_4A_C:
-        case IF_SVE_IU_4A:
-        case IF_SVE_IU_4A_A:
-        case IF_SVE_IU_4A_C:
-            assert(!insSveIsLslN(ins, fmt));
-            assert(insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ld1h:
-                case INS_sve_ld1sh:
-                case INS_sve_ldff1h:
-                case INS_sve_ldff1sh:
-                    switch (fmt)
-                    {
-                        case IF_SVE_HW_4A:
-                        case IF_SVE_HW_4A_A:
-                            return 1;
+        default:
+            unreached();
+            break;
 
-                        default:
-                            break;
-                    }
-                    return 0;
+    } // end switch (ins)
 
-                case INS_sve_ld1w:
-                case INS_sve_ldff1w:
-                case INS_sve_ld1sw:
-                case INS_sve_ldff1sw:
-                    switch (fmt)
-                    {
-                        case IF_SVE_HW_4A:
-                        case IF_SVE_HW_4A_A:
-                        case IF_SVE_IU_4A:
-                            return 2;
+    assert(scale != -1);
+    assert(insOptsLSExtend(opt));
 
-                        default:
-                            break;
-                    }
-                    return 0;
+    if (isSIMD)
+    {
+        assert(isValidVectorLSDatasize(size));
+        assert(isVectorRegister(reg1));
+    }
+    else
+    {
+        assert(isValidGeneralLSDatasize(size));
+        assert(isGeneralRegisterOrZR(reg1));
+    }
 
-                case INS_sve_ld1d:
-                case INS_sve_ldff1d:
-                    switch (fmt)
-                    {
-                        case IF_SVE_IU_4A:
-                            return 3;
+    assert(isGeneralRegisterOrSP(reg2));
+    assert(isGeneralRegister(reg3));
 
-                        default:
-                            break;
-                    }
-                    return 0;
+    // Load/Store reserved encodings:
+    if (insOptsIndexed(opt))
+    {
+        assert(reg1 != reg2);
+    }
 
-                case INS_sve_st1h:
-                    switch (fmt)
-                    {
-                        case IF_SVE_JJ_4A_C:
-                        case IF_SVE_JJ_4A_D:
-                            return 0;
+    if (shiftAmount == -1)
+    {
+        shiftAmount = insOptsLSL(opt) ? scale : 0;
+    }
 
-                        default:
-                            break;
-                    }
-                    return 1;
+    assert((shiftAmount == scale) || (shiftAmount == 0));
 
-                case INS_sve_st1w:
-                    switch (fmt)
-                    {
-                        case IF_SVE_JJ_4A_C:
-                        case IF_SVE_JJ_4A_D:
-                            return 0;
+    reg2 = encodingSPtoZR(reg2);
+    fmt  = IF_LS_3A;
 
-                        default:
-                            break;
-                    }
-                    return 2;
+    instrDesc* id = emitNewInstr(attr);
 
-                case INS_sve_st1d:
-                    if (fmt == IF_SVE_JJ_4A_B)
-                    {
-                        return 0;
-                    }
-                    return 3;
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
 
-                default:
-                    break;
-            }
-            return 0;
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idReg3Scaled(shiftAmount == scale);
 
-        case IF_SVE_IG_4A:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ldff1sw:
-                    return 2;
+    dispIns(id);
+    appendToCurIG(id);
+}
 
-                case INS_sve_ldff1d:
-                    return 3;
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers and two constants.
+ */
 
-                default:
-                    break;
-            }
-            break;
+void emitter::emitIns_R_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt)
+{
+    emitAttr  size     = EA_SIZE(attr);
+    emitAttr  elemsize = EA_UNKNOWN;
+    insFormat fmt      = IF_NONE;
+    size_t    immOut   = 0; // composed from imm1 and imm2 and stored in the instrDesc
 
-        case IF_SVE_IG_4A_F:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ldff1sh:
-                    return 1;
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        int        lsb;
+        int        width;
+        bitMaskImm bmi;
+        unsigned   registerListSize;
 
-                case INS_sve_ldff1w:
-                    return 2;
+        case INS_bfm:
+        case INS_sbfm:
+        case INS_ubfm:
+            assert(isGeneralRegister(reg1));
+            assert((ins == INS_bfm) ? isGeneralRegisterOrZR(reg2) : isGeneralRegister(reg2));
+            assert(isValidImmShift(imm1, size));
+            assert(isValidImmShift(imm2, size));
+            assert(insOptsNone(opt));
+            bmi.immNRS = 0;
+            bmi.immN   = (size == EA_8BYTE);
+            bmi.immR   = imm1;
+            bmi.immS   = imm2;
+            immOut     = bmi.immNRS;
+            fmt        = IF_DI_2D;
+            break;
 
-                default:
-                    break;
-            }
+        case INS_bfi:
+        case INS_sbfiz:
+        case INS_ubfiz:
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            lsb   = getBitWidth(size) - imm1;
+            width = imm2 - 1;
+            assert(isValidImmShift(lsb, size));
+            assert(isValidImmShift(width, size));
+            assert(insOptsNone(opt));
+            bmi.immNRS = 0;
+            bmi.immN   = (size == EA_8BYTE);
+            bmi.immR   = lsb;
+            bmi.immS   = width;
+            immOut     = bmi.immNRS;
+            fmt        = IF_DI_2D;
             break;
 
-        case IF_SVE_IG_4A_G:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ldff1h:
-                    return 1;
+        case INS_bfxil:
+        case INS_sbfx:
+        case INS_ubfx:
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            lsb   = imm1;
+            width = imm2 + imm1 - 1;
+            assert(isValidImmShift(lsb, size));
+            assert(isValidImmShift(width, size));
+            assert(insOptsNone(opt));
+            bmi.immNRS = 0;
+            bmi.immN   = (size == EA_8BYTE);
+            bmi.immR   = imm1;
+            bmi.immS   = imm2 + imm1 - 1;
+            immOut     = bmi.immNRS;
+            fmt        = IF_DI_2D;
+            break;
 
-                default:
-                    break;
-            }
+        case INS_mov:
+        case INS_ins:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            elemsize = size;
+            assert(isValidVectorElemsize(elemsize));
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm1));
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm2));
+            assert(insOptsNone(opt));
+            immOut = (imm1 << 4) + imm2;
+            fmt    = IF_DV_2F;
             break;
 
-        case IF_SVE_II_4A:
-        case IF_SVE_II_4A_B:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ld1d:
-                    return 3;
+        case INS_ld1:
+        case INS_ld2:
+        case INS_ld3:
+        case INS_ld4:
+        case INS_st1:
+        case INS_st2:
+        case INS_st3:
+        case INS_st4:
+            assert(isVectorRegister(reg1));
+            assert(isGeneralRegisterOrSP(reg2));
 
-                default:
-                    break;
-            }
-            break;
+            elemsize = size;
+            assert(isValidVectorElemsize(elemsize));
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm1));
 
-        case IF_SVE_II_4A_H:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ld1w:
-                    return 2;
+            registerListSize = insGetRegisterListSize(ins);
+            assert((elemsize * registerListSize) == (unsigned)imm2);
+            assert(insOptsPostIndex(opt));
 
-                default:
-                    break;
-            }
+            // Load/Store single structure  post-indexed by an immediate
+            reg2   = encodingSPtoZR(reg2);
+            immOut = imm1;
+            fmt    = IF_LS_2G;
             break;
 
-        case IF_SVE_IK_4A:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ld1sw:
-                    return 2;
-
-                default:
-                    break;
-            }
+        default:
+            unreached();
             break;
+    }
+    assert(fmt != IF_NONE);
 
-        case IF_SVE_IK_4A_G:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ld1sh:
-                    return 1;
+    instrDesc* id = emitNewInstrSC(attr, immOut);
 
-                default:
-                    break;
-            }
-            break;
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
 
-        case IF_SVE_IK_4A_I:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ld1h:
-                    return 1;
+    id->idReg1(reg1);
+    id->idReg2(reg2);
 
-                default:
-                    break;
-            }
-            break;
+    dispIns(id);
+    appendToCurIG(id);
+}
 
-        case IF_SVE_IN_4A:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ldnt1h:
-                    return 1;
-                case INS_sve_ldnt1w:
-                    return 2;
-                case INS_sve_ldnt1d:
-                    return 3;
+/*****************************************************************************
+ *
+ *  Add an instruction referencing four registers.
+ */
 
-                default:
-                    break;
-            }
+void emitter::emitIns_R_R_R_R(instruction     ins,
+                              emitAttr        attr,
+                              regNumber       reg1,
+                              regNumber       reg2,
+                              regNumber       reg3,
+                              regNumber       reg4,
+                              insOpts         opt /* = INS_OPTS_NONE*/,
+                              insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_madd:
+        case INS_msub:
+        case INS_smaddl:
+        case INS_smsubl:
+        case INS_umaddl:
+        case INS_umsubl:
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isGeneralRegister(reg4));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_DR_4A;
             break;
 
-        case IF_SVE_IP_4A:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ld1roh:
-                case INS_sve_ld1rqh:
-                    return 1;
-
-                case INS_sve_ld1row:
-                case INS_sve_ld1rqw:
-                    return 2;
-                case INS_sve_ld1rod:
-                case INS_sve_ld1rqd:
-                    return 3;
-
-                default:
-                    break;
-            }
+        case INS_fmadd:
+        case INS_fmsub:
+        case INS_fnmadd:
+        case INS_fnmsub:
+            // Scalar operation
+            assert(isValidScalarDatasize(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isVectorRegister(reg4));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_DV_4A;
             break;
 
-        case IF_SVE_IR_4A:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ld2q:
-                case INS_sve_ld3q:
-                case INS_sve_ld4q:
-                    return 4;
-
-                default:
-                    break;
-            }
+        case INS_invalid:
+            fmt = IF_NONE;
             break;
 
-        case IF_SVE_IT_4A:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ld2h:
-                case INS_sve_ld3h:
-                case INS_sve_ld4h:
-                    return 1;
+        // Fallback handles emitting the SVE instructions.
+        default:
+            return emitInsSve_R_R_R_R(ins, attr, reg1, reg2, reg3, reg4, opt, sopt);
+    }
+    assert(fmt != IF_NONE);
 
-                case INS_sve_ld2w:
-                case INS_sve_ld3w:
-                case INS_sve_ld4w:
-                    return 2;
+    instrDesc* id = emitNewInstr(attr);
 
-                case INS_sve_ld2d:
-                case INS_sve_ld3d:
-                case INS_sve_ld4d:
-                    return 3;
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
 
-                default:
-                    break;
-            }
-            break;
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idReg4(reg4);
 
-        case IF_SVE_IU_4B:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_ld1sw:
-                case INS_sve_ldff1sw:
-                    return 2;
+    dispIns(id);
+    appendToCurIG(id);
+}
 
-                case INS_sve_ld1d:
-                case INS_sve_ldff1d:
-                    return 3;
+/*****************************************************************************
+ *
+ *  Add an instruction referencing four registers and a constant.
+ */
 
-                default:
-                    break;
-            }
-            break;
+void emitter::emitIns_R_R_R_R_I(instruction ins,
+                                emitAttr    attr,
+                                regNumber   reg1,
+                                regNumber   reg2,
+                                regNumber   reg3,
+                                regNumber   reg4,
+                                ssize_t     imm,
+                                insOpts     opt /* = INS_OPT_NONE*/)
+{
+    // Currently, only SVE instructions use this format.
+    emitInsSve_R_R_R_R_I(ins, attr, reg1, reg2, reg3, reg4, imm, opt);
+}
 
-        case IF_SVE_JB_4A:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_stnt1h:
-                    return 1;
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and a condition code
+ */
 
-                case INS_sve_stnt1w:
-                    return 2;
+void emitter::emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond)
+{
+    insFormat    fmt = IF_NONE;
+    condFlagsImm cfi;
+    cfi.immCFVal = 0;
 
-                case INS_sve_stnt1d:
-                    return 3;
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_cset:
+        case INS_csetm:
+            assert(isGeneralRegister(reg));
+            cfi.cond = cond;
+            fmt      = IF_DR_1D;
+            break;
 
-                default:
-                    break;
-            }
+        default:
+            unreached();
             break;
 
-        case IF_SVE_JC_4A:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_st2h:
-                case INS_sve_st3h:
-                case INS_sve_st4h:
-                    return 1;
+    } // end switch (ins)
 
-                case INS_sve_st2w:
-                case INS_sve_st3w:
-                case INS_sve_st4w:
-                    return 2;
+    assert(fmt != IF_NONE);
+    assert(isValidImmCond(cfi.immCFVal));
 
-                case INS_sve_st2d:
-                case INS_sve_st3d:
-                case INS_sve_st4d:
-                    return 3;
+    instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
 
-                default:
-                    break;
-            }
-            break;
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
 
-        case IF_SVE_JD_4C:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_st1w:
-                    return 2;
+    id->idReg1(reg);
 
-                case INS_sve_st1d:
-                    return 3;
+    dispIns(id);
+    appendToCurIG(id);
+}
 
-                default:
-                    break;
-            }
-            break;
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers and a condition code
+ */
 
-        case IF_SVE_JD_4C_A:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_st1d:
-                    return 3;
+void emitter::emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond)
+{
+    insFormat    fmt = IF_NONE;
+    condFlagsImm cfi;
+    cfi.immCFVal = 0;
 
-                default:
-                    break;
-            }
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_cinc:
+        case INS_cinv:
+        case INS_cneg:
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+            cfi.cond = cond;
+            fmt      = IF_DR_2D;
             break;
-
-        case IF_SVE_JF_4A:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_st2q:
-                case INS_sve_st3q:
-                case INS_sve_st4q:
-                    return 4;
-
-                default:
-                    break;
-            }
+        default:
+            unreached();
             break;
 
-        case IF_SVE_JJ_4B:
-            assert(insSveIsLslN(ins, fmt));
-            assert(!insSveIsModN(ins, fmt));
-            switch (ins)
-            {
-                case INS_sve_st1h:
-                    return 1;
+    } // end switch (ins)
 
-                case INS_sve_st1w:
-                    return 2;
+    assert(fmt != IF_NONE);
+    assert(isValidImmCond(cfi.immCFVal));
 
-                case INS_sve_st1d:
-                    return 3;
+    instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
 
-                default:
-                    break;
-            }
-            break;
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
 
-        default:
-            break;
-    }
+    id->idReg1(reg1);
+    id->idReg2(reg2);
 
-    assert(!"Unexpected instruction format");
-    return 0;
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- *  Returns true if the specified instruction can encode the 'dtype' field.
+ *  Add an instruction referencing two registers and a condition code
  */
 
-/*static*/ bool emitter::canEncodeSveElemsize_dtype(instruction ins)
+void emitter::emitIns_R_R_R_COND(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insCond cond)
 {
+    insFormat    fmt = IF_NONE;
+    condFlagsImm cfi;
+    cfi.immCFVal = 0;
+
+    /* Figure out the encoding format of the instruction */
     switch (ins)
     {
-        case INS_sve_ld1w:
-        case INS_sve_ld1sb:
-        case INS_sve_ld1b:
-        case INS_sve_ld1sh:
-        case INS_sve_ld1h:
-        case INS_sve_ldnf1sh:
-        case INS_sve_ldnf1w:
-        case INS_sve_ldnf1h:
-        case INS_sve_ldnf1sb:
-        case INS_sve_ldnf1b:
-        case INS_sve_ldff1b:
-        case INS_sve_ldff1sb:
-        case INS_sve_ldff1h:
-        case INS_sve_ldff1sh:
-        case INS_sve_ldff1w:
-            return true;
+        case INS_csel:
+        case INS_csinc:
+        case INS_csinv:
+        case INS_csneg:
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+            assert(isGeneralRegisterOrZR(reg3));
+            cfi.cond = cond;
+            fmt      = IF_DR_3D;
+            break;
 
         default:
-            return false;
-    }
+            unreached();
+            break;
+
+    } // end switch (ins)
+
+    assert(fmt != IF_NONE);
+    assert(isValidImmCond(cfi.immCFVal));
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idSmallCns(cfi.immCFVal);
+
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
- *  for the 'dtype' field.
+ *  Add an instruction referencing two registers the flags and a condition code
  */
 
-/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t code)
+void emitter::emitIns_R_R_FLAGS_COND(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCflags flags, insCond cond)
 {
-    assert(canEncodeSveElemsize_dtype(ins));
-    assert(ins != INS_sve_ld1w);
-    switch (size)
-    {
-        case EA_1BYTE:
-            switch (ins)
-            {
-                case INS_sve_ld1b:
-                case INS_sve_ldnf1b:
-                case INS_sve_ldff1b:
-                    return code; // By default, the instruction already encodes 8-bit.
-
-                default:
-                    assert(!"Invalid instruction for encoding dtype.");
-            }
-            return code;
-
-        case EA_2BYTE:
-            switch (ins)
-            {
-                case INS_sve_ld1b:
-                case INS_sve_ld1h:
-                case INS_sve_ldnf1b:
-                case INS_sve_ldnf1h:
-                case INS_sve_ldff1b:
-                case INS_sve_ldff1h:
-                    return code | (1 << 21); // Set bit '21' to 1.
-
-                case INS_sve_ld1sb:
-                case INS_sve_ldnf1sb:
-                case INS_sve_ldff1sb:
-                    return code | (1 << 22); // Set bit '22' to 1.
-
-                default:
-                    assert(!"Invalid instruction for encoding dtype.");
-            }
-            return code;
+    insFormat    fmt = IF_NONE;
+    condFlagsImm cfi;
+    cfi.immCFVal = 0;
 
-        case EA_4BYTE:
-            switch (ins)
-            {
-                case INS_sve_ldnf1w:
-                case INS_sve_ldff1w:
-                    return code; // By default, the instruction already encodes 32-bit.
-
-                case INS_sve_ld1b:
-                case INS_sve_ld1h:
-                case INS_sve_ldnf1b:
-                case INS_sve_ldnf1h:
-                case INS_sve_ldff1b:
-                case INS_sve_ldff1h:
-                    return code | (1 << 22); // Set bit '22' to 1.
-
-                case INS_sve_ld1sb:
-                case INS_sve_ld1sh:
-                case INS_sve_ldnf1sb:
-                case INS_sve_ldnf1sh:
-                case INS_sve_ldff1sb:
-                case INS_sve_ldff1sh:
-                    return code | (1 << 21); // Set bit '21' to 1.
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_ccmp:
+        case INS_ccmn:
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            cfi.flags = flags;
+            cfi.cond  = cond;
+            fmt       = IF_DR_2I;
+            break;
+        default:
+            unreached();
+            break;
+    } // end switch (ins)
 
-                default:
-                    assert(!"Invalid instruction for encoding dtype.");
-            }
-            return code;
+    assert(fmt != IF_NONE);
+    assert(isValidImmCondFlags(cfi.immCFVal));
 
-        case EA_8BYTE:
-            switch (ins)
-            {
-                case INS_sve_ldnf1w:
-                case INS_sve_ldff1w:
-                    return code | (1 << 21); // Set bit '21' to 1. Set bit '15' to 1.
-
-                case INS_sve_ld1b:
-                case INS_sve_ld1h:
-                case INS_sve_ldnf1b:
-                case INS_sve_ldnf1h:
-                case INS_sve_ldff1b:
-                case INS_sve_ldff1h:
-                    return (code | (1 << 22)) | (1 << 21); // Set bit '22' and '21' to 1.
-
-                case INS_sve_ld1sb:
-                case INS_sve_ld1sh:
-                case INS_sve_ldnf1sb:
-                case INS_sve_ldnf1sh:
-                case INS_sve_ldff1sb:
-                case INS_sve_ldff1sh:
-                    return code; // By default, the instruction already encodes 64-bit.
+    instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
 
-                default:
-                    assert(!"Invalid instruction for encoding dtype.");
-            }
-            return code;
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
 
-        default:
-            assert(!"Invalid size for encoding dtype.");
-    }
+    id->idReg1(reg1);
+    id->idReg2(reg2);
 
-    return code;
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- * Returns the encoding to select the 4/8/16 byte elemsize for the Arm64 Sve vector instruction 'ld1w'
- * for the 'dtype' field.
+ *  Add an instruction referencing a register, an immediate, the flags and a condition code
  */
 
-/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtype_ld1w(instruction ins,
-                                                                    insFormat   fmt,
-                                                                    emitAttr    size,
-                                                                    code_t      code)
+void emitter::emitIns_R_I_FLAGS_COND(
+    instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insCflags flags, insCond cond)
 {
-    assert(canEncodeSveElemsize_dtype(ins));
-    assert(ins == INS_sve_ld1w);
-    switch (size)
+    insFormat    fmt = IF_NONE;
+    condFlagsImm cfi;
+    cfi.immCFVal = 0;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
     {
-        case EA_4BYTE:
-            switch (fmt)
+        case INS_ccmp:
+        case INS_ccmn:
+            assert(isGeneralRegister(reg));
+            if (imm < 0)
             {
-                case IF_SVE_IH_3A_F:
-                    // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the
-                    // proper encoding for S.
-                    return (code | (1 << 15)) | (1 << 22); // Set bit '22' and '15' to 1.
-
-                case IF_SVE_II_4A_H:
-                    // Note: Bit '14' is not actually part of 'dtype', but it is necessary to set to '1' to get the
-                    // proper encoding for S.
-                    return (code | (1 << 14)) | (1 << 22); // Set bit '22' and '14' to 1.
-
-                default:
-                    break;
+                ins = insReverse(ins);
+                imm = -imm;
             }
-            break;
-
-        case EA_8BYTE:
-            switch (fmt)
+            if (isValidUimm<5>(imm))
             {
-                case IF_SVE_IH_3A_F:
-                    // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the
-                    // proper encoding for D.
-                    return ((code | (1 << 15)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '15' to 1.
-
-                case IF_SVE_II_4A_H:
-                    // Note: Bit '14' is not actually part of 'dtype', but it is necessary to set to '1' to get the
-                    // proper encoding for D.
-                    return ((code | (1 << 14)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '14' to 1.
-
-                default:
-                    break;
+                cfi.imm5  = imm;
+                cfi.flags = flags;
+                cfi.cond  = cond;
+                fmt       = IF_DI_1F;
             }
-            break;
-
-        case EA_16BYTE:
-            switch (fmt)
+            else
             {
-                case IF_SVE_IH_3A_F:
-                    return code | (1 << 20); // Set bit '20' to 1.
-
-                case IF_SVE_II_4A_H:
-                    // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the
-                    // proper encoding for Q.
-                    return code | (1 << 15); // Set bit '15' to 1.
-
-                default:
-                    break;
+                assert(!"Instruction cannot be encoded: ccmp/ccmn imm5");
             }
             break;
-
         default:
-            assert(!"Invalid size for encoding dtype.");
+            unreached();
             break;
-    }
+    } // end switch (ins)
 
-    assert(!"Invalid instruction format");
-    return code;
-}
+    assert(fmt != IF_NONE);
+    assert(isValidImmCondFlagsImm5(cfi.immCFVal));
 
-/*****************************************************************************
- *
- *  Returns the encoding for the immediate value as 4-bits at bit locations '19-16'.
- */
+    instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
 
-/*static*/ emitter::code_t emitter::insEncodeSimm4_19_to_16(ssize_t imm)
-{
-    assert(isValidSimm4(imm));
-    if (imm < 0)
-    {
-        imm = (imm & 0xF);
-    }
-    return (code_t)imm << 16;
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    id->idReg1(reg);
+
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- *  Returns the encoding for the immediate value as 9-bits at bit locations '21-16' for high and '12-10' for low.
+ *  Add a memory barrier instruction with a 'barrier' immediate
  */
 
-/*static*/ emitter::code_t emitter::insEncodeSimm9h9l_21_to_16_and_12_to_10(ssize_t imm)
+void emitter::emitIns_BARR(instruction ins, insBarrier barrier)
 {
-    assert(isValidSimm9(imm));
+    insFormat fmt = IF_NONE;
+    ssize_t   imm = 0;
 
-    if (imm < 0)
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
     {
-        imm = (imm & 0x1FF);
-    }
+        case INS_dsb:
+        case INS_dmb:
+        case INS_isb:
 
-    code_t h = (code_t)(imm & 0x1F8) << 13;          // encode high 6-bits at locations '21-16'
-    code_t l = (code_t)((imm & ~0x1F8) & 0x7) << 10; // encode low 3-bits at locations '12-10'
+            fmt = IF_SI_0B;
+            imm = (ssize_t)barrier;
+            break;
+        default:
+            unreached();
+            break;
+    } // end switch (ins)
 
-    return (h | l);
-}
+    assert(fmt != IF_NONE);
 
-/*****************************************************************************
- *
- *  Returns the encoding for the immediate value that is a multiple of 2 as 4-bits at bit locations '19-16'.
- */
+    instrDesc* id = emitNewInstrSC(EA_8BYTE, imm);
 
-/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf2_19_to_16(ssize_t imm)
-{
-    assert(isValidSimm4_MultipleOf2(imm));
-    return insEncodeSimm4_19_to_16(imm / 2);
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- *  Returns the encoding for the immediate value that is a multiple of 3 as 4-bits at bit locations '19-16'.
+ *  Add an instruction with a static data member operand. If 'size' is 0, the
+ *  instruction operates on the address of the static member instead of its
+ *  value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
  */
 
-/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf3_19_to_16(ssize_t imm)
+void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
 {
-    assert(isValidSimm4_MultipleOf3(imm));
-    return insEncodeSimm4_19_to_16(imm / 3);
+    NYI("emitIns_C");
 }
 
 /*****************************************************************************
  *
- *  Returns the encoding for the immediate value that is a multiple of 4 as 4-bits at bit locations '19-16'.
+ *  Add an instruction referencing stack-based local variable.
  */
 
-/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf4_19_to_16(ssize_t imm)
+void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
 {
-    assert(isValidSimm4_MultipleOf4(imm));
-    return insEncodeSimm4_19_to_16(imm / 4);
+    NYI("emitIns_S");
 }
 
 /*****************************************************************************
  *
- *  Returns the encoding for the immediate value that is a multiple of 16 as 4-bits at bit locations '19-16'.
+ *  Add an instruction referencing a register and a stack-based local variable.
  */
-
-/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf16_19_to_16(ssize_t imm)
+void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
 {
-    assert(isValidSimm4_MultipleOf16(imm));
-    return insEncodeSimm4_19_to_16(imm / 16);
-}
+    emitAttr  size         = EA_SIZE(attr);
+    insFormat fmt          = IF_NONE;
+    unsigned  scale        = 0;
+    bool      isLdrStr     = false;
+    bool      isSimple     = true;
+    bool      useRegForImm = false;
 
-/*****************************************************************************
- *
- *  Returns the encoding for the immediate value that is a multiple of 32 as 4-bits at bit locations '19-16'.
- */
+    assert(offs >= 0);
 
-/*static*/ emitter::code_t emitter::insEncodeSimm4_MultipleOf32_19_to_16(ssize_t imm)
-{
-    assert(isValidSimm4_MultipleOf32(imm));
-    return insEncodeSimm4_19_to_16(imm / 32);
-}
+    /* Figure out the variable's frame position */
+    bool    FPbased;
+    int     base = emitComp->lvaFrameAddress(varx, &FPbased);
+    int     disp = base + offs;
+    ssize_t imm  = disp;
+
+    regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE);
+
+    // TODO-ARM64-CQ: use unscaled loads?
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_strb:
+        case INS_ldrb:
+        case INS_ldrsb:
+            scale = 0;
+            break;
+
+        case INS_strh:
+        case INS_ldrh:
+        case INS_ldrsh:
+            scale = 1;
+            break;
+
+        case INS_ldrsw:
+            scale = 2;
+            break;
+
+        case INS_str:
+        case INS_ldr:
+            assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size));
+            scale    = genLog2(EA_SIZE_IN_BYTES(size));
+            isLdrStr = true;
+            break;
+
+        case INS_lea:
+            assert(size == EA_8BYTE);
+            isSimple = false;
+            scale    = 0;
+
+            if (disp >= 0)
+            {
+                ins = INS_add;
+            }
+            else
+            {
+                ins = INS_sub;
+                imm = -disp;
+            }
+
+            if (imm <= 0x0fff)
+            {
+                fmt = IF_DI_2A; // add reg1,reg2,#disp
+            }
+            else
+            {
+                regNumber rsvdReg = codeGen->rsGetRsvdReg();
+                codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
+                fmt = IF_DR_3A; // add reg1,reg2,rsvdReg
+            }
+            break;
+
+        case INS_sve_ldr:
+        {
+            assert(isVectorRegister(reg1));
+            isSimple = false;
+            size     = EA_SCALABLE;
+            attr     = size;
+            fmt      = IF_SVE_IE_2A;
+
+            // TODO-SVE: Don't assume 128bit vectors
+            scale        = NaturalScale_helper(EA_16BYTE);
+            ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+
+            if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale)))
+            {
+                imm >>= scale; // The immediate is scaled by the size of the ld/st
+            }
+            else
+            {
+                useRegForImm      = true;
+                regNumber rsvdReg = codeGen->rsGetRsvdReg();
+                codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
+            }
+        }
+        break;
+
+        // TODO-SVE: Fold into INS_sve_ldr once REG_V0 and REG_P0 are distinct
+        case INS_sve_ldr_mask:
+        {
+            assert(isPredicateRegister(reg1));
+            isSimple = false;
+            size     = EA_SCALABLE;
+            attr     = size;
+            fmt      = IF_SVE_ID_2A;
+            ins      = INS_sve_ldr;
+
+            // TODO-SVE: Don't assume 128bit vectors
+            // Predicate size is vector length / 8
+            scale        = NaturalScale_helper(EA_2BYTE);
+            ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+
+            if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale)))
+            {
+                imm >>= scale; // The immediate is scaled by the size of the ld/st
+            }
+            else
+            {
+                useRegForImm      = true;
+                regNumber rsvdReg = codeGen->rsGetRsvdReg();
+                codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
+            }
+        }
+        break;
+
+        default:
+            NYI("emitIns_R_S"); // FP locals?
+            return;
+
+    } // end switch (ins)
+
+    assert((scale >= 0) && (scale <= 4));
+
+    if (isSimple)
+    {
+        ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+
+        if (imm == 0)
+        {
+            fmt = IF_LS_2A;
+        }
+        else if ((imm < 0) || ((imm & mask) != 0))
+        {
+            if (isValidSimm<9>(imm))
+            {
+                fmt = IF_LS_2C;
+            }
+            else
+            {
+                useRegForImm = true;
+            }
+        }
+        else if (imm > 0)
+        {
+            if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
+            {
+                imm >>= scale; // The immediate is scaled by the size of the ld/st
+
+                fmt = IF_LS_2B;
+            }
+            else
+            {
+                useRegForImm = true;
+            }
+        }
+
+        if (useRegForImm)
+        {
+            regNumber rsvdReg = codeGen->rsGetRsvdReg();
+            codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
+            fmt = IF_LS_3A;
+        }
+    }
 
-/*****************************************************************************
- *
- *  Returns the encoding for the immediate value as 5-bits at bit locations '20-16'.
- */
+    assert(fmt != IF_NONE);
 
-/*static*/ emitter::code_t emitter::insEncodeSimm5_20_to_16(ssize_t imm)
-{
-    assert(isValidSimm5(imm));
-    if (imm < 0)
+    // Try to optimize a load/store with an alternative instruction.
+    if (isLdrStr && emitComp->opts.OptimizationEnabled() &&
+        OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs DEBUG_ARG(useRegForImm)))
     {
-        imm = (imm & 0x1F);
+        return;
     }
-    return (code_t)imm << 16;
-}
-
-/*****************************************************************************
- *
- *  Returns the encoding for the immediate value as 2-bits at bit locations '9-8'.
- */
 
-/*static*/ emitter::code_t emitter::insEncodeUimm2_9_to_8(ssize_t imm)
-{
-    assert(isValidUimm2(imm));
-    return (code_t)imm << 8;
-}
+    instrDesc* id = emitNewInstrCns(attr, imm);
 
-/*****************************************************************************
- *
- *  Returns the encoding for the immediate value as 2-bits at bit locations '11-10'.
- */
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
 
-/*static*/ emitter::code_t emitter::insEncodeUimm2_11_to_10(ssize_t imm)
-{
-    assert(isValidUimm2(imm));
-    return (code_t)imm << 10;
-}
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idSetIsLclVar();
 
-/*****************************************************************************
- *
- *  Returns the encoding for the immediate value as 2-bits at bit locations '20-19'.
- */
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
 
-/*static*/ emitter::code_t emitter::insEncodeUimm2_20_to_19(ssize_t imm)
-{
-    assert(isValidUimm2(imm));
-    return (code_t)imm << 19;
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- *  Returns the encoding for the immediate value as 1 bit at bit location '11'.
+ *  Add an instruction referencing two register and consecutive stack-based local variable slots.
  */
-
-/*static*/ emitter::code_t emitter::insEncodeImm1_11(ssize_t imm)
+void emitter::emitIns_R_R_S_S(
+    instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
 {
-    assert(isValidImm1(imm));
-    return (code_t)imm << 11;
-}
+    assert((ins == INS_ldp) || (ins == INS_ldnp));
+    assert(EA_8BYTE == EA_SIZE(attr1));
+    assert(EA_8BYTE == EA_SIZE(attr2));
+    assert(isGeneralRegisterOrZR(reg1));
+    assert(isGeneralRegisterOrZR(reg2));
+    assert(offs >= 0);
 
-/*****************************************************************************
- *
- *  Returns the encoding for the immediate value as 1 bit at bit location '22'.
- */
+    insFormat      fmt   = IF_LS_3B;
+    int            disp  = 0;
+    const unsigned scale = 3;
 
-/*static*/ emitter::code_t emitter::insEncodeImm1_22(ssize_t imm)
-{
-    assert(isValidImm1(imm));
-    return (code_t)imm << 22;
-}
+    /* Figure out the variable's frame position */
+    int  base;
+    bool FPbased;
 
-/*****************************************************************************
- *
- *  Returns the encoding for the immediate value as 7-bits at bit locations '20-14'.
- */
+    base = emitComp->lvaFrameAddress(varx, &FPbased);
+    disp = base + offs;
 
-/*static*/ emitter::code_t emitter::insEncodeUimm7_20_to_14(ssize_t imm)
-{
-    assert(isValidUimm7(imm));
-    return (code_t)imm << 14;
-}
+    // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead?
+    regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE;
+    reg3           = encodingSPtoZR(reg3);
 
-/*****************************************************************************
- *
- *  Returns the encoding for the immediate value as 4-bits starting from 1, at bit locations '19-16'.
- */
+    bool    useRegForAdr = true;
+    ssize_t imm          = disp;
+    ssize_t mask         = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+    if (imm == 0)
+    {
+        useRegForAdr = false;
+    }
+    else
+    {
+        if ((imm & mask) == 0)
+        {
+            ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st
 
-/*static*/ emitter::code_t emitter::insEncodeUimm4From1_19_to_16(ssize_t imm)
-{
-    assert(isValidUimm4From1(imm));
-    return (code_t)(imm - 1) << 16;
-}
+            if ((immShift >= -64) && (immShift <= 63))
+            {
+                fmt          = IF_LS_3C;
+                useRegForAdr = false;
+                imm          = immShift;
+            }
+        }
+    }
 
-/*****************************************************************************
- *
- *  Returns the encoding for the immediate value as 8-bits at bit locations '12-5'.
- */
+    if (useRegForAdr)
+    {
+        regNumber rsvd = codeGen->rsGetRsvdReg();
+        emitIns_R_R_Imm(INS_add, EA_PTRSIZE, rsvd, reg3, imm);
+        reg3 = rsvd;
+        imm  = 0;
+    }
 
-/*static*/ emitter::code_t emitter::insEncodeImm8_12_to_5(ssize_t imm)
-{
-    assert(isValidSimm8(imm) || isValidUimm8(imm));
-    return (code_t)((imm & 0xFF) << 5);
-}
+    assert(fmt != IF_NONE);
 
-/*****************************************************************************
- *
- *  Returns the encoding for the unsigned immediate value as 3-bits at bit locations '18-16'.
- */
+    instrDesc* id = emitNewInstrCns(attr1, imm);
 
-/*static*/ emitter::code_t emitter::insEncodeUimm3_18_to_16(ssize_t imm)
-{
-    assert(isValidUimm3(imm));
-    return (code_t)imm << 16;
-}
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
 
-/*****************************************************************************
- *
- *  Returns the encoding to select the <R> 4/8-byte width specifier <R>
- *  at bit location 22 for an Arm64 Sve instruction.
- */
-/*static*/ emitter::code_t emitter::insEncodeSveElemsize_R_22(emitAttr size)
-{
-    if (size == EA_8BYTE)
+    // Record the attribute for the second register in the pair
+    if (EA_IS_GCREF(attr2))
     {
-        return 0x400000; // set the bit at location 22
+        id->idGCrefReg2(GCT_GCREF);
     }
-
-    assert(size == EA_4BYTE);
-    return 0;
-}
-
-/*****************************************************************************
- *
- * Returns the encoding to select an insSvePattern
- */
-/*static*/ emitter::code_t emitter::insEncodeSvePattern(insSvePattern pattern)
-{
-    return (code_t)((unsigned)pattern << 5);
-}
-
-BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id)
-{
-    instruction ins    = id->idIns();
-    insFormat   fmt    = id->idInsFmt();
-    regNumber   dstReg = id->idReg1();
-    if (id->idjShort)
+    else if (EA_IS_BYREF(attr2))
     {
-        // adr x, [rel addr] --  compute address: current addr(ip) + rel addr.
-        assert(ins == INS_adr);
-        assert(fmt == IF_DI_1E);
-        ssize_t distVal = (ssize_t)(dstAddr - srcAddr);
-        dst             = emitOutputShortAddress(dst, ins, fmt, distVal, dstReg);
+        id->idGCrefReg2(GCT_BYREF);
     }
     else
     {
-        // adrp x, [rel page addr] -- compute page address: current page addr + rel page addr
-        assert(fmt == IF_LARGEADR);
-        ssize_t relPageAddr = computeRelPageAddr((size_t)dstAddr, (size_t)srcAddr);
-        dst                 = emitOutputShortAddress(dst, INS_adrp, IF_DI_1E, relPageAddr, dstReg);
-
-        // add x, x, page offs -- compute address = page addr + page offs
-        ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits
-        assert(isValidUimm12(imm12));
-        code_t code =
-            emitInsCode(INS_add, IF_DI_2A);  // DI_2A  X0010001shiiiiii iiiiiinnnnnddddd   1100 0000   imm(i12, sh)
-        code |= insEncodeDatasize(EA_8BYTE); // X
-        code |= ((code_t)imm12 << 10);       // iiiiiiiiiiii
-        code |= insEncodeReg_Rd(dstReg);     // ddddd
-        code |= insEncodeReg_Rn(dstReg);     // nnnnn
-        dst += emitOutput_Instr(dst, code);
+        id->idGCrefReg2(GCT_NONE);
     }
-    return dst;
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idSetIsLclVar();
+
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- *  Output a local jump or other instruction with a pc-relative immediate.
- *  Note that this may be invoked to overwrite an existing jump instruction at 'dst'
- *  to handle forward branch patching.
+ *  Add an instruction referencing a stack-based local variable and a register
  */
-
-BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
+void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
 {
-    instrDescJmp* id = (instrDescJmp*)i;
-
-    unsigned srcOffs;
-    unsigned dstOffs;
-    BYTE*    srcAddr;
-    BYTE*    dstAddr;
-    ssize_t  distVal;
-
-    // Set default ins/fmt from id.
-    instruction ins = id->idIns();
-    insFormat   fmt = id->idInsFmt();
-
-    bool loadLabel    = false;
-    bool isJump       = false;
-    bool loadConstant = false;
-
-    switch (ins)
-    {
-        default:
-            isJump = true;
-            break;
-
-        case INS_tbz:
-        case INS_tbnz:
-        case INS_cbz:
-        case INS_cbnz:
-            isJump = true;
-            break;
-
-        case INS_ldr:
-        case INS_ldrsw:
-            loadConstant = true;
-            break;
-
-        case INS_adr:
-        case INS_adrp:
-            loadLabel = true;
-            break;
-    }
+    assert(offs >= 0);
+    emitAttr  size          = EA_SIZE(attr);
+    insFormat fmt           = IF_NONE;
+    unsigned  scale         = 0;
+    bool      isVectorStore = false;
+    bool      isStr         = false;
+    bool      isSimple      = true;
+    bool      useRegForImm  = false;
 
-    /* Figure out the distance to the target */
+    /* Figure out the variable's frame position */
+    bool    FPbased;
+    int     base = emitComp->lvaFrameAddress(varx, &FPbased);
+    int     disp = base + offs;
+    ssize_t imm  = disp;
 
-    srcOffs = emitCurCodeOffs(dst);
-    srcAddr = emitOffsetToPtr(srcOffs);
+    // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead?
+    regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE);
 
-    if (id->idAddr()->iiaIsJitDataOffset())
-    {
-        assert(loadConstant || loadLabel);
-        int doff = id->idAddr()->iiaGetJitDataOffset();
-        assert(doff >= 0);
-        ssize_t imm = emitGetInsSC(id);
-        assert((imm >= 0) && (imm < 0x1000)); // 0x1000 is arbitrary, currently 'imm' is always 0
+    // TODO-ARM64-CQ: use unscaled loads?
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_strb:
+            scale = 0;
+            assert(isGeneralRegisterOrZR(reg1));
+            break;
 
-        unsigned dataOffs = (unsigned)(doff + imm);
-        assert(dataOffs < emitDataSize());
-        dstAddr = emitDataOffsetToPtr(dataOffs);
+        case INS_strh:
+            scale = 1;
+            assert(isGeneralRegisterOrZR(reg1));
+            break;
 
-        regNumber dstReg  = id->idReg1();
-        regNumber addrReg = dstReg; // an integer register to compute long address.
-        emitAttr  opSize  = id->idOpSize();
+        case INS_str:
+            if (isGeneralRegisterOrZR(reg1))
+            {
+                assert(isValidGeneralDatasize(size));
+                scale = (size == EA_8BYTE) ? 3 : 2;
+            }
+            else
+            {
+                assert(isVectorRegister(reg1));
+                assert(isValidVectorLSDatasize(size));
+                scale         = NaturalScale_helper(size);
+                isVectorStore = true;
+            }
+            isStr = true;
+            break;
 
-        if (loadConstant)
+        case INS_sve_str:
         {
-            if (id->idjShort)
+            assert(isVectorRegister(reg1));
+            isSimple = false;
+            size     = EA_SCALABLE;
+            attr     = size;
+            fmt      = IF_SVE_JH_2A;
+
+            // TODO-SVE: Don't assume 128bit vectors
+            scale        = NaturalScale_helper(EA_16BYTE);
+            ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+
+            if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale)))
             {
-                // ldr x/v, [rel addr] -- load constant from current addr(ip) + rel addr.
-                assert(ins == INS_ldr);
-                assert(fmt == IF_LS_1A);
-                distVal = (ssize_t)(dstAddr - srcAddr);
-                dst     = emitOutputShortConstant(dst, ins, fmt, distVal, dstReg, opSize);
+                imm >>= scale; // The immediate is scaled by the size of the ld/st
             }
             else
             {
-                // adrp x, [rel page addr] -- compute page address: current page addr + rel page addr
-                assert(fmt == IF_LARGELDC);
-                ssize_t relPageAddr = computeRelPageAddr((size_t)dstAddr, (size_t)srcAddr);
-                if (isVectorRegister(dstReg))
-                {
-                    // Update addrReg with the reserved integer register
-                    // since we cannot use dstReg (vector) to load constant directly from memory.
-
-                    // If loading a 16-byte value, we will need to load directly into dstReg.
-                    // Thus, encode addrReg for the ld1 instruction.
-                    if (opSize == EA_16BYTE)
-                    {
-                        addrReg = encodingSPtoZR(id->idReg2());
-                    }
-                    else
-                    {
-                        addrReg = id->idReg2();
-                    }
+                useRegForImm      = true;
+                regNumber rsvdReg = codeGen->rsGetRsvdReg();
+                codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
+            }
+        }
+        break;
 
-                    assert(isGeneralRegister(addrReg));
-                }
+        // TODO-SVE: Fold into INS_sve_str once REG_V0 and REG_P0 are distinct
+        case INS_sve_str_mask:
+        {
+            assert(isPredicateRegister(reg1));
+            isSimple = false;
+            size     = EA_SCALABLE;
+            attr     = size;
+            fmt      = IF_SVE_JG_2A;
+            ins      = INS_sve_str;
 
-                ins = INS_adrp;
-                fmt = IF_DI_1E;
-                dst = emitOutputShortAddress(dst, ins, fmt, relPageAddr, addrReg);
+            // TODO-SVE: Don't assume 128bit vectors
+            // Predicate size is vector length / 8
+            scale        = NaturalScale_helper(EA_2BYTE);
+            ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
 
-                ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits
-                assert(isValidUimm12(imm12));
+            if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale)))
+            {
+                imm >>= scale; // The immediate is scaled by the size of the ld/st
+            }
+            else
+            {
+                useRegForImm      = true;
+                regNumber rsvdReg = codeGen->rsGetRsvdReg();
+                codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
+            }
+        }
+        break;
 
-                // Special case: emit add + ld1 instructions for loading 16-byte data into vector register.
-                if (isVectorRegister(dstReg) && (opSize == EA_16BYTE))
-                {
-                    const emitAttr elemSize = EA_1BYTE;
-                    const insOpts  opt      = optMakeArrangement(opSize, elemSize);
+        default:
+            NYI("emitIns_S_R"); // FP locals?
+            return;
 
-                    assert(isGeneralRegisterOrSP(addrReg));
-                    assert(isValidVectorElemsize(elemSize));
-                    assert(isValidArrangement(opSize, opt));
+    } // end switch (ins)
 
-                    // Calculate page addr + page offs, then emit ld1 instruction.
-                    dst = emitOutputVectorConstant(dst, imm12, dstReg, addrReg, opSize, elemSize);
-                }
-                else
-                {
-                    // ldr x, [x, 0] -- load constant from address into integer register.
-                    ins = INS_ldr;
-                    fmt = IF_LS_2B;
-                    dst = emitOutputShortConstant(dst, ins, fmt, imm12, addrReg, opSize);
+    if (isVectorStore || !isSimple)
+    {
+        assert(scale <= 4);
+    }
+    else
+    {
+        assert(scale <= 3);
+    }
 
-                    // fmov v, d -- copy constant in integer register to vector register.
-                    // This is needed only for vector constant.
-                    if (addrReg != dstReg)
-                    {
-                        //  fmov    Vd,Rn                DV_2I  X00111100X100111 000000nnnnnddddd   1E27 0000   Vd,Rn
-                        //  (scalar, from general)
-                        assert(isVectorRegister(dstReg) && isGeneralRegister(addrReg));
-                        ins         = INS_fmov;
-                        fmt         = IF_DV_2I;
-                        code_t code = emitInsCode(ins, fmt);
+    if (isSimple)
+    {
+        ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
 
-                        code |= insEncodeReg_Vd(dstReg);  // ddddd
-                        code |= insEncodeReg_Rn(addrReg); // nnnnn
-                        if (id->idOpSize() == EA_8BYTE)
-                        {
-                            code |= 0x80400000; // X ... X
-                        }
-                        dst += emitOutput_Instr(dst, code);
-                    }
-                }
+        if (imm == 0)
+        {
+            fmt = IF_LS_2A;
+        }
+        else if ((imm < 0) || ((imm & mask) != 0))
+        {
+            if (isValidSimm<9>(imm))
+            {
+                fmt = IF_LS_2C;
+            }
+            else
+            {
+                useRegForImm = true;
             }
         }
-        else
+        else if (imm > 0)
         {
-            assert(loadLabel);
-            dst = emitOutputLoadLabel(dst, srcAddr, dstAddr, id);
+            if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
+            {
+                imm >>= scale; // The immediate is scaled by the size of the ld/st
+                fmt = IF_LS_2B;
+            }
+            else
+            {
+                useRegForImm = true;
+            }
         }
 
-        return dst;
-    }
-
-    assert(loadLabel || isJump);
-
-    if (id->idAddr()->iiaHasInstrCount())
-    {
-        assert(ig != NULL);
-        int      instrCount = id->idAddr()->iiaGetInstrCount();
-        unsigned insNum     = emitFindInsNum(ig, id);
-        if (instrCount < 0)
+        if (useRegForImm)
         {
-            // Backward branches using instruction count must be within the same instruction group.
-            assert(insNum + 1 >= (unsigned)(-instrCount));
+            // The reserved register is not stored in idReg3() since that field overlaps with iiaLclVar.
+            // It is instead implicit when idSetIsLclVar() is set, with this encoding format.
+            regNumber rsvdReg = codeGen->rsGetRsvdReg();
+            codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
+            fmt = IF_LS_3A;
         }
-
-        dstOffs = ig->igOffs + emitFindOffset(ig, (insNum + 1 + instrCount));
-        dstAddr = emitOffsetToPtr(dstOffs);
     }
-    else
+
+    assert(fmt != IF_NONE);
+
+    // Try to optimize a store with an alternative instruction.
+    if (isStr && emitComp->opts.OptimizationEnabled() &&
+        OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs DEBUG_ARG(useRegForImm)))
     {
-        dstOffs = id->idAddr()->iiaIGlabel->igOffs;
-        dstAddr = emitOffsetToPtr(dstOffs);
+        return;
     }
 
-    distVal = (ssize_t)(dstAddr - srcAddr);
+    instrDesc* id = emitNewInstrCns(attr, imm);
 
-    if (dstOffs <= srcOffs)
-    {
-#if DEBUG_EMIT
-        /* This is a backward jump - distance is known at this point */
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
 
-        if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
-        {
-            size_t blkOffs = id->idjIG->igOffs;
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idSetIsLclVar();
 
-            if (INTERESTING_JUMP_NUM == 0)
-                printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
-            printf("[3] Jump  block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
-            printf("[3] Jump        is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
-            printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
-        }
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
 #endif
-    }
-    else
-    {
-        /* This is a  forward jump - distance will be an upper limit */
 
-        emitFwdJumps = true;
+    dispIns(id);
+    appendToCurIG(id);
+}
 
-        /* The target offset will be closer by at least 'emitOffsAdj', but only if this
-           jump doesn't cross the hot-cold boundary. */
+/*****************************************************************************
+ *
+ *  Add an instruction referencing consecutive stack-based local variable slots and two registers
+ */
+void emitter::emitIns_S_S_R_R(
+    instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
+{
+    assert((ins == INS_stp) || (ins == INS_stnp));
+    assert(EA_8BYTE == EA_SIZE(attr1));
+    assert(EA_8BYTE == EA_SIZE(attr2));
+    assert(isGeneralRegisterOrZR(reg1));
+    assert(isGeneralRegisterOrZR(reg2));
+    assert(offs >= 0);
 
-        if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
-        {
-            dstOffs -= emitOffsAdj;
-            distVal -= emitOffsAdj;
-        }
+    insFormat      fmt   = IF_LS_3B;
+    int            disp  = 0;
+    const unsigned scale = 3;
 
-        /* Record the location of the jump for later patching */
+    /* Figure out the variable's frame position */
+    int  base;
+    bool FPbased;
 
-        id->idjOffs = dstOffs;
+    base = emitComp->lvaFrameAddress(varx, &FPbased);
+    disp = base + offs;
 
-        /* Are we overflowing the id->idjOffs bitfield? */
-        if (id->idjOffs != dstOffs)
-            IMPL_LIMITATION("Method is too large");
+    // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead?
+    regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE;
 
-#if DEBUG_EMIT
-        if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+    bool    useRegForAdr = true;
+    ssize_t imm          = disp;
+    ssize_t mask         = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+    if (imm == 0)
+    {
+        useRegForAdr = false;
+    }
+    else
+    {
+        if ((imm & mask) == 0)
         {
-            size_t blkOffs = id->idjIG->igOffs;
+            ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st
 
-            if (INTERESTING_JUMP_NUM == 0)
-                printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
-            printf("[4] Jump  block is at %08X\n", blkOffs);
-            printf("[4] Jump        is at %08X\n", srcOffs);
-            printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
+            if ((immShift >= -64) && (immShift <= 63))
+            {
+                fmt          = IF_LS_3C;
+                useRegForAdr = false;
+                imm          = immShift;
+            }
         }
-#endif
     }
 
-#ifdef DEBUG
-    if (0 && emitComp->verbose)
+    if (useRegForAdr)
     {
-        size_t sz          = 4;
-        int    distValSize = id->idjShort ? 4 : 8;
-        printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = 0x%08X\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
-               dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs, distVal);
+        regNumber rsvd = codeGen->rsGetRsvdReg();
+        emitIns_R_R_Imm(INS_add, EA_PTRSIZE, rsvd, reg3, imm);
+        reg3 = rsvd;
+        imm  = 0;
     }
-#endif
 
-    /* For forward jumps, record the address of the distance value */
-    id->idjTemp.idjAddr = (distVal > 0) ? dst : NULL;
+    assert(fmt != IF_NONE);
 
-    assert(insOptsNone(id->idInsOpt()));
+    instrDesc* id = emitNewInstrCns(attr1, imm);
 
-    if (isJump)
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    // Record the attribute for the second register in the pair
+    if (EA_IS_GCREF(attr2))
     {
-        if (id->idjShort)
-        {
-            // Short conditional/unconditional jump
-            assert(!id->idjKeepLong);
-            assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
-            assert((fmt == IF_BI_0A) || (fmt == IF_BI_0B) || (fmt == IF_BI_1A) || (fmt == IF_BI_1B));
-            dst = emitOutputShortBranch(dst, ins, fmt, distVal, id);
-        }
-        else
-        {
-            // Long conditional/unconditional jump
+        id->idGCrefReg2(GCT_GCREF);
+    }
+    else if (EA_IS_BYREF(attr2))
+    {
+        id->idGCrefReg2(GCT_BYREF);
+    }
+    else
+    {
+        id->idGCrefReg2(GCT_NONE);
+    }
 
-            if (fmt == IF_LARGEJMP)
-            {
-                // This is a pseudo-instruction format representing a large conditional branch, to allow
-                // us to get a greater branch target range than we can get by using a straightforward conditional
-                // branch. It is encoded as a short conditional branch that branches around a long unconditional
-                // branch.
-                //
-                // Conceptually, we have:
-                //
-                //      b<cond> L_target
-                //
-                // The code we emit is:
-                //
-                //      b<!cond> L_not  // 4 bytes. Note that we reverse the condition.
-                //      b L_target      // 4 bytes
-                //   L_not:
-                //
-                // Note that we don't actually insert any blocks: we simply encode "b <!cond> L_not" as a branch with
-                // the correct offset. Note also that this works for both integer and floating-point conditions, because
-                // the condition inversion takes ordered/unordered into account, preserving NaN behavior. For example,
-                // "GT" (greater than) is inverted to "LE" (less than, equal, or unordered).
+    reg3 = encodingSPtoZR(reg3);
 
-                instruction reverseIns;
-                insFormat   reverseFmt;
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idSetIsLclVar();
 
-                switch (ins)
-                {
-                    case INS_cbz:
-                        reverseIns = INS_cbnz;
-                        reverseFmt = IF_BI_1A;
-                        break;
-                    case INS_cbnz:
-                        reverseIns = INS_cbz;
-                        reverseFmt = IF_BI_1A;
-                        break;
-                    case INS_tbz:
-                        reverseIns = INS_tbnz;
-                        reverseFmt = IF_BI_1B;
-                        break;
-                    case INS_tbnz:
-                        reverseIns = INS_tbz;
-                        reverseFmt = IF_BI_1B;
-                        break;
-                    default:
-                        reverseIns = emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(ins)));
-                        reverseFmt = IF_BI_0B;
-                }
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
 
-                dst = emitOutputShortBranch(dst,
-                                            reverseIns,    // reverse the conditional instruction
-                                            reverseFmt, 8, /* 8 bytes from start of this large conditional
-                                                              pseudo-instruction to L_not. */
-                                            id);
+    dispIns(id);
+    appendToCurIG(id);
+}
 
-                // Now, pretend we've got a normal unconditional branch, and fall through to the code to emit that.
-                ins = INS_b;
-                fmt = IF_BI_0A;
+/*****************************************************************************
+ *
+ *  Add an instruction referencing stack-based local variable and an immediate
+ */
+void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
+{
+    NYI("emitIns_S_I");
+}
 
-                // The distVal was computed based on the beginning of the pseudo-instruction,
-                // So subtract the size of the conditional branch so that it is relative to the
-                // unconditional branch.
-                distVal -= 4;
-            }
+/*****************************************************************************
+ *
+ *  Add an instruction with a register + static member operands.
+ *  Constant is stored into JIT data which is adjacent to code.
+ *  No relocation is needed. PC-relative offset will be encoded directly into instruction.
+ *
+ */
+void emitter::emitIns_R_C(
+    instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+    assert(offs >= 0);
+    assert(instrDesc::fitsInSmallCns(offs));
 
-            assert(fmt == IF_BI_0A);
-            assert((distVal & 1) == 0);
-            code_t     code               = emitInsCode(ins, fmt);
-            const bool doRecordRelocation = emitComp->opts.compReloc && emitJumpCrossHotColdBoundary(srcOffs, dstOffs);
+    emitAttr      size = EA_SIZE(attr);
+    insFormat     fmt  = IF_NONE;
+    instrDescJmp* id   = emitNewInstrJmp();
 
-            if (doRecordRelocation)
+    switch (ins)
+    {
+        case INS_adr:
+            // This is case to get address to the constant data.
+            fmt = IF_LARGEADR;
+            assert(isGeneralRegister(reg));
+            assert(isValidGeneralDatasize(size));
+            break;
+
+        case INS_ldr:
+            fmt = IF_LARGELDC;
+            if (isVectorRegister(reg))
             {
-                // dst isn't an actual final target location, just some intermediate
-                // location.  Thus we cannot make any guarantees about distVal (not
-                // even the direction/sign).  Instead we don't encode any offset and
-                // rely on the relocation to do all the work
+                assert(isValidVectorLSDatasize(size));
+                // For vector (float/double) register, we should have an integer address reg to
+                // compute long address which consists of page address and page offset.
+                // For integer constant, this is not needed since the dest reg can be used to
+                // compute address as well as contain the final contents.
+                assert(isGeneralRegister(reg) || (addrReg != REG_NA));
             }
             else
             {
-                // Branch offset encodings are scaled by 4.
-                noway_assert((distVal & 3) == 0);
-                distVal >>= 2;
-                noway_assert(isValidSimm26(distVal));
-
-                // Insert offset into unconditional branch instruction
-                distVal &= 0x3FFFFFFLL;
-                code |= distVal;
+                assert(isGeneralRegister(reg));
+                assert(isValidGeneralDatasize(size));
             }
+            break;
 
-            const unsigned instrSize = emitOutput_Instr(dst, code);
+        default:
+            unreached();
+    }
 
-            if (doRecordRelocation)
-            {
-                assert(id->idjKeepLong);
-                if (emitComp->info.compMatchedVM)
-                {
-                    void* target = emitOffsetToPtr(dstOffs);
-                    emitRecordRelocation((void*)dst, target, IMAGE_REL_ARM64_BRANCH26);
-                }
-            }
+    assert(fmt != IF_NONE);
 
-            dst += instrSize;
-        }
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+    id->idSmallCns(offs);
+    id->idOpSize(size);
+    id->idAddr()->iiaFieldHnd = fldHnd;
+    id->idSetIsBound(); // We won't patch address since we will know the exact distance once JIT code and data are
+                        // allocated together.
+
+    id->idReg1(reg); // destination register that will get the constant value.
+    if (addrReg != REG_NA)
+    {
+        id->idReg2(addrReg); // integer register to compute long address (used for vector dest when we end up with long
+                             // address)
     }
-    else if (loadLabel)
+    id->idjShort = false; // Assume loading constant from long address
+
+    // Keep it long if it's in cold code.
+    id->idjKeepLong = emitComp->fgIsBlockCold(emitComp->compCurBB);
+
+#ifdef DEBUG
+    if (emitComp->opts.compLongAddress)
+        id->idjKeepLong = 1;
+#endif // DEBUG
+
+    // If it's possible to be shortened, then put it in jump list
+    // to be revisited by emitJumpDistBind.
+    if (!id->idjKeepLong)
     {
-        dst = emitOutputLoadLabel(dst, srcAddr, dstAddr, id);
+        /* Record the jump's IG and offset within it */
+        id->idjIG   = emitCurIG;
+        id->idjOffs = emitCurIGsize;
+
+        /* Append this jump to this IG's jump list */
+        id->idjNext      = emitCurIGjmpList;
+        emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+        emitTotalIGjmps++;
+#endif
     }
 
-    return dst;
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static member + constant.
+ */
+
+void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, ssize_t val)
+{
+    NYI("emitIns_C_I");
 }
 
 /*****************************************************************************
-*
-*  Output a short branch instruction.
-*/
-BYTE* emitter::emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id)
+ *
+ *  Add an instruction with a static member + register operands.
+ */
+
+void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
 {
-    code_t code = emitInsCode(ins, fmt);
+    assert(!"emitIns_C_R not supported for RyuJIT backend");
+}
 
-    ssize_t loBits = (distVal & 3);
-    noway_assert(loBits == 0);
-    distVal >>= 2; // branch offset encodings are scaled by 4.
+void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs)
+{
+    NYI("emitIns_R_AR");
+}
 
-    if (fmt == IF_BI_0A)
-    {
-        // INS_b or INS_bl_local
-        noway_assert(isValidSimm26(distVal));
-        distVal &= 0x3FFFFFFLL;
-        code |= distVal;
-    }
-    else if (fmt == IF_BI_0B) // BI_0B   01010100iiiiiiii iiiiiiiiiiiXXXXX      simm19:00
-    {
-        // INS_beq, INS_bne, etc...
-        noway_assert(isValidSimm19(distVal));
-        distVal &= 0x7FFFFLL;
-        code |= distVal << 5;
-    }
-    else if (fmt == IF_BI_1A) // BI_1A   X.......iiiiiiii iiiiiiiiiiittttt      Rt simm19:00
-    {
-        // INS_cbz or INS_cbnz
-        assert(id != nullptr);
-        code |= insEncodeDatasize(id->idOpSize()); // X
-        code |= insEncodeReg_Rt(id->idReg1());     // ttttt
+// This generates code to populate the access for TLS on linux
+void emitter::emitIns_Adrp_Ldr_Add(emitAttr     attr,
+                                   regNumber    reg1,
+                                   regNumber    reg2,
+                                   ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
+{
+    assert(emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI));
+    assert(TargetOS::IsUnix);
+    assert(EA_IS_RELOC(attr));
+    assert(EA_IS_CNS_TLSGD_RELOC(attr));
 
-        noway_assert(isValidSimm19(distVal));
-        distVal &= 0x7FFFFLL; // 19 bits
-        code |= distVal << 5;
-    }
-    else if (fmt == IF_BI_1B) // BI_1B   B.......bbbbbiii iiiiiiiiiiittttt      Rt imm6, simm14:00
-    {
-        // INS_tbz or INS_tbnz
-        assert(id != nullptr);
-        ssize_t imm = emitGetInsSC(id);
-        assert(isValidImmShift(imm, id->idOpSize()));
+    emitAttr      size    = EA_SIZE(attr);
+    insFormat     fmt     = IF_DI_1E;
+    bool          needAdd = false;
+    instrDescJmp* id      = emitNewInstrJmp();
 
-        if (imm & 0x20) // test bit 32-63 ?
-        {
-            code |= 0x80000000; // B
-        }
-        code |= ((imm & 0x1F) << 19);          // bbbbb
-        code |= insEncodeReg_Rt(id->idReg1()); // ttttt
+    // adrp
+    id->idIns(INS_adrp);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+    id->idOpSize(size);
+    id->idAddr()->iiaAddr = (BYTE*)addr;
+    id->idReg1(reg1);
+    id->idSetIsDspReloc();
+    id->idSetTlsGD();
 
-        noway_assert(isValidSimm14(distVal));
-        distVal &= 0x3FFFLL; // 14 bits
-        code |= distVal << 5;
-    }
-    else
-    {
-        assert(!"Unknown fmt for emitOutputShortBranch");
-    }
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idMemCookie = targetHandle;
+    id->idDebugOnlyInfo()->idFlags     = gtFlags;
+#endif
 
-    dst += emitOutput_Instr(dst, code);
+    dispIns(id);
+    appendToCurIG(id);
 
-    return dst;
+    // ldr
+    emitIns_R_R_I(INS_ldr, attr, reg2, reg1, (ssize_t)addr);
+
+    // add
+    fmt              = IF_DI_2A;
+    instrDesc* addId = emitNewInstr(attr);
+    assert(id->idIsReloc());
+
+    addId->idIns(INS_add);
+    addId->idInsFmt(fmt);
+    addId->idInsOpt(INS_OPTS_NONE);
+    addId->idOpSize(size);
+    addId->idAddr()->iiaAddr = (BYTE*)addr;
+    addId->idReg1(reg1);
+    addId->idReg2(reg1);
+    addId->idSetTlsGD();
+
+    dispIns(addId);
+    appendToCurIG(addId);
 }
 
-/*****************************************************************************
-*
-*  Output a short address instruction.
-*/
-BYTE* emitter::emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg)
+// This computes address from the immediate which is relocatable.
+void emitter::emitIns_R_AI(instruction  ins,
+                           emitAttr     attr,
+                           regNumber    ireg,
+                           ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
 {
-    ssize_t loBits = (distVal & 3);
-    distVal >>= 2;
+    assert(EA_IS_RELOC(attr));
+    emitAttr      size    = EA_SIZE(attr);
+    insFormat     fmt     = IF_DI_1E;
+    bool          needAdd = false;
+    instrDescJmp* id      = emitNewInstrJmp();
 
-    code_t code = emitInsCode(ins, fmt);
-    if (fmt == IF_DI_1E) // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd simm21
+    switch (ins)
     {
-        // INS_adr or INS_adrp
-        code |= insEncodeReg_Rd(reg); // ddddd
-
-        noway_assert(isValidSimm19(distVal));
-        distVal &= 0x7FFFFLL; // 19 bits
-        code |= distVal << 5;
-        code |= loBits << 29; //  2 bits
+        case INS_adrp:
+            // This computes page address.
+            // page offset is needed using add.
+            needAdd = true;
+            break;
+        case INS_adr:
+            break;
+        default:
+            unreached();
     }
-    else
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+    id->idOpSize(size);
+    id->idAddr()->iiaAddr = (BYTE*)addr;
+    id->idReg1(ireg);
+    id->idSetIsDspReloc();
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idMemCookie = targetHandle;
+    id->idDebugOnlyInfo()->idFlags     = gtFlags;
+#endif
+
+    dispIns(id);
+    appendToCurIG(id);
+
+    if (needAdd)
     {
-        assert(!"Unknown fmt for emitOutputShortAddress");
-    }
+        // add reg, reg, imm
+        ins           = INS_add;
+        fmt           = IF_DI_2A;
+        instrDesc* id = emitNewInstr(attr);
+        assert(id->idIsReloc());
 
-    dst += emitOutput_Instr(dst, code);
+        id->idIns(ins);
+        id->idInsFmt(fmt);
+        id->idInsOpt(INS_OPTS_NONE);
+        id->idOpSize(size);
+        id->idAddr()->iiaAddr = (BYTE*)addr;
+        id->idReg1(ireg);
+        id->idReg2(ireg);
 
-    return dst;
+        dispIns(id);
+        appendToCurIG(id);
+    }
 }
 
-/*****************************************************************************
-*
-*  Output a short constant instruction.
-*/
-BYTE* emitter::emitOutputShortConstant(
-    BYTE* dst, instruction ins, insFormat fmt, ssize_t imm, regNumber reg, emitAttr opSize)
+void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs)
 {
-    code_t code = emitInsCode(ins, fmt);
-
-    if (fmt == IF_LS_1A)
-    {
-        // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt simm21
-        // INS_ldr or INS_ldrsw (PC-Relative)
+    NYI("emitIns_AR_R");
+}
 
-        ssize_t loBits = (imm & 3);
-        noway_assert(loBits == 0);
-        ssize_t distVal = imm >> 2; // load offset encodings are scaled by 4.
+void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
+{
+    NYI("emitIns_R_ARR");
+}
 
-        noway_assert(isValidSimm19(distVal));
+void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
+{
+    NYI("emitIns_R_ARR");
+}
 
-        // Is the target a vector register?
-        if (isVectorRegister(reg))
-        {
-            code |= insEncodeDatasizeVLS(code, opSize); // XX V
-            code |= insEncodeReg_Vt(reg);               // ttttt
-        }
-        else
-        {
-            assert(isGeneralRegister(reg));
-            // insEncodeDatasizeLS is not quite right for this case.
-            // So just specialize it.
-            if ((ins == INS_ldr) && (opSize == EA_8BYTE))
-            {
-                // set the operation size in bit 30
-                code |= 0x40000000;
-            }
+void emitter::emitIns_R_ARX(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp)
+{
+    NYI("emitIns_R_ARR");
+}
 
-            code |= insEncodeReg_Rt(reg); // ttttt
-        }
+/*****************************************************************************
+ *
+ *  Record that a jump instruction uses the short encoding
+ *
+ */
+void emitter::emitSetShortJump(instrDescJmp* id)
+{
+    if (id->idjKeepLong)
+        return;
 
-        distVal &= 0x7FFFFLL; // 19 bits
-        code |= distVal << 5;
-    }
-    else if (fmt == IF_LS_2B)
+    insFormat fmt = IF_NONE;
+    if (emitIsCondJump(id))
     {
-        //  ldr     Rt,[Xn+pimm12]       LS_2B  1X11100101iiiiii iiiiiinnnnnttttt   B940 0000   imm(0-4095<<{2,3})
-        // INS_ldr or INS_ldrsw (PC-Relative)
-        noway_assert(isValidUimm12(imm));
-        assert(isGeneralRegister(reg));
-
-        if (opSize == EA_8BYTE)
-        {
-            // insEncodeDatasizeLS is not quite right for this case.
-            // So just specialize it.
-            if (ins == INS_ldr)
-            {
-                // set the operation size in bit 30
-                code |= 0x40000000;
-            }
-            // Low 3 bits should be 0 -- 8 byte JIT data should be aligned on 8 byte.
-            assert((imm & 7) == 0);
-            imm >>= 3;
-        }
-        else
+        switch (id->idIns())
         {
-            assert(opSize == EA_4BYTE);
-            // Low 2 bits should be 0 -- 4 byte aligned data.
-            assert((imm & 3) == 0);
-            imm >>= 2;
-        }
-
-        code |= insEncodeReg_Rt(reg); // ttttt
-        code |= insEncodeReg_Rn(reg); // nnnnn
-        code |= imm << 10;
+            case INS_cbz:
+            case INS_cbnz:
+                fmt = IF_BI_1A;
+                break;
+            case INS_tbz:
+            case INS_tbnz:
+                fmt = IF_BI_1B;
+                break;
+            default:
+                fmt = IF_BI_0B;
+                break;
+        }
+    }
+    else if (emitIsLoadLabel(id))
+    {
+        fmt = IF_DI_1E;
+    }
+    else if (emitIsLoadConstant(id))
+    {
+        fmt = IF_LS_1A;
     }
     else
     {
-        assert(!"Unknown fmt for emitOutputShortConstant");
+        unreached();
     }
 
-    dst += emitOutput_Instr(dst, code);
-
-    return dst;
-}
-
-/*****************************************************************************
- *
- *  Output instructions to load a constant into a vector register.
- */
-BYTE* emitter::emitOutputVectorConstant(
-    BYTE* dst, ssize_t imm, regNumber dstReg, regNumber addrReg, emitAttr opSize, emitAttr elemSize)
-{
-    // add addrReg, addrReg, page offs -- compute address = page addr + page offs.
-    code_t code = emitInsCode(INS_add, IF_DI_2A); // DI_2A  X0010001shiiiiii iiiiiinnnnnddddd   1100 0000   imm(i12, sh)
-    code |= insEncodeDatasize(EA_8BYTE);          // X - use EA_8BYTE, as we are calculating 64-bit address
-    code |= ((code_t)imm << 10);                  // iiiiiiiiiiii
-    code |= insEncodeReg_Rd(addrReg);             // ddddd
-    code |= insEncodeReg_Rn(addrReg);             // nnnnn
-    dst += emitOutput_Instr(dst, code);
-
-    // ld1 dstReg, addrReg -- load constant at address in addrReg into dstReg.
-    code = emitInsCode(INS_ld1, IF_LS_2D);  // LS_2D   .Q.............. ....ssnnnnnttttt      Vt Rn
-    code |= insEncodeVectorsize(opSize);    // Q
-    code |= insEncodeVLSElemsize(elemSize); // ss
-    code |= insEncodeReg_Rn(addrReg);       // nnnnn
-    code |= insEncodeReg_Vt(dstReg);        // ttttt
-    dst += emitOutput_Instr(dst, code);
-
-    return dst;
+    id->idInsFmt(fmt);
+    id->idjShort = true;
 }
 
 /*****************************************************************************
  *
- *  Output a call instruction.
+ *  Add a label instruction.
  */
 
-unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code)
+void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
 {
-    const unsigned char callInstrSize = sizeof(code_t); // 4 bytes
-    regMaskTP           gcrefRegs;
-    regMaskTP           byrefRegs;
+    assert(dst->HasFlag(BBF_HAS_LABEL));
 
-    VARSET_TP GCvars(VarSetOps::UninitVal());
+    insFormat fmt = IF_NONE;
 
-    // Is this a "fat" call descriptor?
-    if (id->idIsLargeCall())
-    {
-        instrDescCGCA* idCall = (instrDescCGCA*)id;
-        gcrefRegs             = idCall->idcGcrefRegs;
-        byrefRegs             = idCall->idcByrefRegs;
-        VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
-    }
-    else
+    switch (ins)
     {
-        assert(!id->idIsLargeDsp());
-        assert(!id->idIsLargeCns());
-
-        gcrefRegs = emitDecodeCallGCregs(id);
-        byrefRegs = 0;
-        VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+        case INS_adr:
+            fmt = IF_LARGEADR;
+            break;
+        default:
+            unreached();
     }
 
-    /* We update the GC info before the call as the variables cannot be
-        used by the call. Killing variables before the call helps with
-        boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
-        If we ever track aliased variables (which could be used by the
-        call), we would have to keep them alive past the call. */
+    instrDescJmp* id = emitNewInstrJmp();
 
-    emitUpdateLiveGCvars(GCvars, dst);
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idjShort             = false;
+    id->idAddr()->iiaBBlabel = dst;
+    id->idReg1(reg);
+    id->idOpSize(EA_PTRSIZE);
 
 #ifdef DEBUG
-    // Output any delta in GC variable info, corresponding to the before-call GC var updates done above.
-    if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC)
+    // Mark the catch return
+    if (emitComp->compCurBB->KindIs(BBJ_EHCATCHRET))
     {
-        emitDispGCVarDelta();
+        id->idDebugOnlyInfo()->idCatchRet = true;
     }
 #endif // DEBUG
 
-    // Now output the call instruction and update the 'dst' pointer
-    //
-    unsigned outputInstrSize = emitOutput_Instr(dst, code);
-    dst += outputInstrSize;
+    id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
 
-    // All call instructions are 4-byte in size on ARM64
-    //
-    assert(outputInstrSize == callInstrSize);
+#ifdef DEBUG
+    if (emitComp->opts.compLongAddress)
+        id->idjKeepLong = 1;
+#endif // DEBUG
 
-    // If the method returns a GC ref, mark INTRET (R0) appropriately.
-    if (id->idGCref() == GCT_GCREF)
-    {
-        gcrefRegs |= RBM_INTRET;
-    }
-    else if (id->idGCref() == GCT_BYREF)
-    {
-        byrefRegs |= RBM_INTRET;
-    }
+    /* Record the jump's IG and offset within it */
 
-    // If is a multi-register return method is called, mark INTRET_1 (X1) appropriately
-    if (id->idIsLargeCall())
-    {
-        instrDescCGCA* idCall = (instrDescCGCA*)id;
-        if (idCall->idSecondGCref() == GCT_GCREF)
-        {
-            gcrefRegs |= RBM_INTRET_1;
-        }
-        else if (idCall->idSecondGCref() == GCT_BYREF)
-        {
-            byrefRegs |= RBM_INTRET_1;
-        }
-    }
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
 
-    // If the GC register set has changed, report the new set.
-    if (gcrefRegs != emitThisGCrefRegs)
-    {
-        emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
-    }
-    // If the Byref register set has changed, report the new set.
-    if (byrefRegs != emitThisByrefRegs)
-    {
-        emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
-    }
+    /* Append this jump to this IG's jump list */
 
-    // Some helper calls may be marked as not requiring GC info to be recorded.
-    if ((!id->idIsNoGC()))
-    {
-        // On ARM64, as on AMD64, we don't change the stack pointer to push/pop args.
-        // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism
-        // to record the call for GC info purposes.  (It might be best to use an alternate call,
-        // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.)
-        emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0);
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
 
-        // Do we need to record a call location for GC purposes?
-        //
-        if (!emitFullGCinfo)
-        {
-            emitRecordGCcall(dst, callInstrSize);
-        }
-    }
-    return callInstrSize;
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+    dispIns(id);
+    appendToCurIG(id);
 }
 
 /*****************************************************************************
  *
- *  Emit a 32-bit Arm64 instruction
+ *  Add a data label instruction.
  */
 
-unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code)
+void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg)
 {
-    assert(sizeof(code_t) == 4);
-    BYTE* dstRW       = dst + writeableOffset;
-    *((code_t*)dstRW) = code;
-
-    return sizeof(code_t);
+    NYI("emitIns_R_D");
 }
 
-/*****************************************************************************
-*
- *  Append the machine code corresponding to the given instruction descriptor
- *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
- *  is the instruction group that contains the instruction. Updates '*dp' to
- *  point past the generated code, and returns the size of the instruction
- *  descriptor in bytes.
- */
-
-size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
+void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
 {
-    BYTE*       dst  = *dp;
-    BYTE*       odst = dst;
-    code_t      code = 0;
-    size_t      sz   = emitGetInstrDescSize(id); // TODO-ARM64-Cleanup: on ARM, this is set in each case. why?
-    instruction ins  = id->idIns();
-    insFormat   fmt  = id->idInsFmt();
-    emitAttr    size = id->idOpSize();
-
-#ifdef DEBUG
-#if DUMP_GC_TABLES
-    bool dspOffs = emitComp->opts.dspGCtbls;
-#else
-    bool dspOffs = !emitComp->opts.disDiffable;
-#endif
-#endif // DEBUG
+    assert((ins == INS_cbz) || (ins == INS_cbnz));
 
-    assert(REG_NA == (int)REG_NA);
+    assert(dst != nullptr);
+    assert(dst->HasFlag(BBF_HAS_LABEL));
 
-    /* What instruction format have we got? */
+    insFormat fmt = IF_LARGEJMP;
 
-    switch (fmt)
-    {
-        ssize_t  imm;
-        ssize_t  index;
-        ssize_t  index2;
-        unsigned cmode;
-        unsigned immShift;
-        emitAttr elemsize;
-        emitAttr datasize;
+    instrDescJmp* id = emitNewInstrJmp();
 
-        case IF_BI_0A: // BI_0A   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
-        case IF_BI_0B: // BI_0B   ......iiiiiiiiii iiiiiiiiiii.....               simm19:00
-        case IF_LARGEJMP:
-            assert(id->idGCref() == GCT_NONE);
-            assert(id->idIsBound());
-            dst = emitOutputLJ(ig, dst, id);
-            sz  = sizeof(instrDescJmp);
-            break;
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(reg);
+    id->idjShort = false;
+    id->idOpSize(EA_SIZE(attr));
 
-        case IF_BI_0C: // BI_0C   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
-            code = emitInsCode(ins, fmt);
-            sz   = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc);
-            dst += emitOutputCall(ig, dst, id, code);
-            // Always call RecordRelocation so that we wire in a JumpStub when we don't reach
-            emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_BRANCH26);
-            break;
+    id->idAddr()->iiaBBlabel = dst;
+    id->idjKeepLong          = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
 
-        case IF_BI_1A: // BI_1A   ......iiiiiiiiii iiiiiiiiiiittttt      Rt       simm19:00
-            assert(insOptsNone(id->idInsOpt()));
-            assert(id->idIsBound());
+    /* Record the jump's IG and offset within it */
 
-            dst = emitOutputLJ(ig, dst, id);
-            sz  = sizeof(instrDescJmp);
-            break;
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
 
-        case IF_BI_1B: // BI_1B   B.......bbbbbiii iiiiiiiiiiittttt      Rt imm6, simm14:00
-            assert(insOptsNone(id->idInsOpt()));
-            assert(id->idIsBound());
+    /* Append this jump to this IG's jump list */
 
-            dst = emitOutputLJ(ig, dst, id);
-            sz  = sizeof(instrDescJmp);
-            break;
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
 
-        case IF_BR_1A: // BR_1A   ................ ......nnnnn.....         Rn
-            assert(insOptsNone(id->idInsOpt()));
-            assert((ins == INS_ret) || (ins == INS_br));
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
 
-            dst += emitOutput_Instr(dst, code);
-            break;
+    dispIns(id);
+    appendToCurIG(id);
+}
 
-        case IF_BR_1B: // BR_1B   ................ ......nnnnn.....         Rn
-            assert(insOptsNone(id->idInsOpt()));
-            assert((ins == INS_br_tail) || (ins == INS_blr));
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeReg_Rn(id->idReg3()); // nnnnn
+void emitter::emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg, int imm)
+{
+    assert((ins == INS_tbz) || (ins == INS_tbnz));
 
-            sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc);
-            dst += emitOutputCall(ig, dst, id, code);
-            break;
+    assert(dst != nullptr);
+    assert(dst->HasFlag(BBF_HAS_LABEL));
+    assert((EA_SIZE(attr) == EA_4BYTE) || (EA_SIZE(attr) == EA_8BYTE));
+    assert(imm < ((EA_SIZE(attr) == EA_4BYTE) ? 32 : 64));
 
-        case IF_LS_1A: // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt    PC imm(1MB)
-        case IF_LARGELDC:
-            assert(insOptsNone(id->idInsOpt()));
-            assert(id->idIsBound());
+    insFormat fmt = IF_LARGEJMP;
 
-            dst = emitOutputLJ(ig, dst, id);
-            sz  = sizeof(instrDescJmp);
-            break;
+    instrDescJmp* id = emitNewInstrJmp();
 
-        case IF_LS_2A: // LS_2A   .X.......X...... ......nnnnnttttt      Rt Rn
-            assert(insOptsNone(id->idInsOpt()));
-            code = emitInsCode(ins, fmt);
-            // Is the target a vector register?
-            if (isVectorRegister(id->idReg1()))
-            {
-                code &= 0x3FFFFFFF;                                 // clear the size bits
-                code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
-                code |= insEncodeReg_Vt(id->idReg1());              // ttttt
-            }
-            else
-            {
-                code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
-                code |= insEncodeReg_Rt(id->idReg1());             // ttttt
-            }
-            code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(reg);
+    id->idjShort = false;
+    id->idSmallCns(imm);
+    id->idOpSize(EA_SIZE(attr));
 
-        case IF_LS_2B: // LS_2B   .X.......Xiiiiii iiiiiinnnnnttttt      Rt Rn    imm(0-4095)
-            assert(insOptsNone(id->idInsOpt()));
-            imm = emitGetInsSC(id);
-            assert(isValidUimm12(imm));
-            code = emitInsCode(ins, fmt);
-            // Is the target a vector register?
-            if (isVectorRegister(id->idReg1()))
-            {
-                code &= 0x3FFFFFFF;                                 // clear the size bits
-                code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
-                code |= insEncodeReg_Vt(id->idReg1());              // ttttt
-            }
-            else
-            {
-                code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
-                code |= insEncodeReg_Rt(id->idReg1());             // ttttt
-            }
-            code |= ((code_t)imm << 10);           // iiiiiiiiiiii
-            code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+    id->idAddr()->iiaBBlabel = dst;
+    id->idjKeepLong          = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
 
-        case IF_LS_2C: // LS_2C   .X.......X.iiiii iiiiPPnnnnnttttt      Rt Rn    imm(-256..+255) no/pre/post inc
-            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
-            imm = emitGetInsSC(id);
-            assert((imm >= -256) && (imm <= 255)); // signed 9 bits
-            imm &= 0x1ff;                          // force into unsigned 9 bit representation
-            code = emitInsCode(ins, fmt);
-            // Is the target a vector register?
-            if (isVectorRegister(id->idReg1()))
-            {
-                code &= 0x3FFFFFFF;                                 // clear the size bits
-                code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
-                code |= insEncodeReg_Vt(id->idReg1());              // ttttt
-            }
-            else
-            {
-                code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
-                code |= insEncodeReg_Rt(id->idReg1());             // ttttt
-            }
-            code |= insEncodeIndexedOpt(id->idInsOpt()); // PP
-            code |= ((code_t)imm << 12);                 // iiiiiiiii
-            code |= insEncodeReg_Rn(id->idReg2());       // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+    /* Record the jump's IG and offset within it */
 
-        case IF_LS_2D: // LS_2D   .Q.............. ....ssnnnnnttttt      Vt Rn
-        case IF_LS_2E: // LS_2E   .Q.............. ....ssnnnnnttttt      Vt Rn
-            elemsize = optGetElemsize(id->idInsOpt());
-            code     = emitInsCode(ins, fmt);
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
 
-            code |= insEncodeVectorsize(id->idOpSize()); // Q
-            code |= insEncodeVLSElemsize(elemsize);      // ss
-            code |= insEncodeReg_Rn(id->idReg2());       // nnnnn
-            code |= insEncodeReg_Vt(id->idReg1());       // ttttt
+    /* Append this jump to this IG's jump list */
 
-            dst += emitOutput_Instr(dst, code);
-            break;
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
 
-        case IF_LS_2F: // LS_2F   .Q.............. xx.Sssnnnnnttttt      Vt[] Rn
-        case IF_LS_2G: // LS_2G   .Q.............. xx.Sssnnnnnttttt      Vt[] Rn
-            elemsize = id->idOpSize();
-            index    = id->idSmallCns();
-            code     = emitInsCode(ins, fmt);
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
 
-            code |= insEncodeVLSIndex(elemsize, index); // Q xx S ss
-            code |= insEncodeReg_Rn(id->idReg2());      // nnnnn
-            code |= insEncodeReg_Vt(id->idReg1());      // ttttt
+    dispIns(id);
+    appendToCurIG(id);
+}
 
-            dst += emitOutput_Instr(dst, code);
-            break;
+void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
+{
+    insFormat fmt = IF_NONE;
 
-        case IF_LS_3A: // LS_3A   .X.......X.mmmmm oooS..nnnnnttttt      Rt Rn Rm ext(Rm) LSL {}
-            assert(insOptsLSExtend(id->idInsOpt()));
-            code = emitInsCode(ins, fmt);
-            // Is the target a vector register?
-            if (isVectorRegister(id->idReg1()))
-            {
-                code &= 0x3FFFFFFF;                                 // clear the size bits
-                code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
-                code |= insEncodeReg_Vt(id->idReg1());              // ttttt
-            }
-            else
-            {
-                code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
-                code |= insEncodeReg_Rt(id->idReg1());             // ttttt
-            }
-            code |= insEncodeExtend(id->idInsOpt()); // ooo
-            code |= insEncodeReg_Rn(id->idReg2());   // nnnnn
-            if (id->idIsLclVar())
-            {
-                code |= insEncodeReg_Rm(codeGen->rsGetRsvdReg()); // mmmmm
-            }
-            else
-            {
-                code |= insEncodeReg3Scale(id->idReg3Scaled()); // S
-                code |= insEncodeReg_Rm(id->idReg3());          // mmmmm
-            }
-            dst += emitOutput_Instr(dst, code);
-            break;
+    if (dst != nullptr)
+    {
+        assert(dst->HasFlag(BBF_HAS_LABEL));
+    }
+    else
+    {
+        assert(instrCount != 0);
+    }
 
-        case IF_LS_3B: // LS_3B   X............... .aaaaannnnnddddd      Rd Ra Rn
-            assert(insOptsNone(id->idInsOpt()));
-            code = emitInsCode(ins, fmt);
-            // Is the target a vector register?
-            if (isVectorRegister(id->idReg1()))
-            {
-                code &= 0x3FFFFFFF;                                  // clear the size bits
-                code |= insEncodeDatasizeVPLS(code, id->idOpSize()); // XX
-                code |= insEncodeReg_Vt(id->idReg1());               // ttttt
-                code |= insEncodeReg_Va(id->idReg2());               // aaaaa
-            }
-            else
-            {
-                code |= insEncodeDatasize(id->idOpSize()); // X
-                code |= insEncodeReg_Rt(id->idReg1());     // ttttt
-                code |= insEncodeReg_Ra(id->idReg2());     // aaaaa
-            }
-            code |= insEncodeReg_Rn(id->idReg3()); // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+    /* Figure out the encoding format of the instruction */
 
-        case IF_LS_3C: // LS_3C   X......PP.iiiiii iaaaaannnnnddddd      Rd Ra Rn imm(im7,sh)
-            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
-            imm = emitGetInsSC(id);
-            assert((imm >= -64) && (imm <= 63)); // signed 7 bits
-            imm &= 0x7f;                         // force into unsigned 7 bit representation
-            code = emitInsCode(ins, fmt);
-            // Is the target a vector register?
-            if (isVectorRegister(id->idReg1()))
-            {
-                code &= 0x3FFFFFFF;                                  // clear the size bits
-                code |= insEncodeDatasizeVPLS(code, id->idOpSize()); // XX
-                code |= insEncodeReg_Vt(id->idReg1());               // ttttt
-                code |= insEncodeReg_Va(id->idReg2());               // aaaaa
-            }
-            else
-            {
-                code |= insEncodeDatasize(id->idOpSize()); // X
-                code |= insEncodeReg_Rt(id->idReg1());     // ttttt
-                code |= insEncodeReg_Ra(id->idReg2());     // aaaaa
-            }
-            code |= insEncodePairIndexedOpt(ins, id->idInsOpt()); // PP
-            code |= ((code_t)imm << 15);                          // iiiiiiiii
-            code |= insEncodeReg_Rn(id->idReg3());                // nnnnn
-            dst += emitOutput_Instr(dst, code);
+    switch (ins)
+    {
+        case INS_bl_local:
+        case INS_b:
+            // Unconditional jump is a single form.
+            // Assume is long in case we cross hot/cold sections.
+            fmt = IF_BI_0A;
             break;
 
-        case IF_LS_3D: // LS_3D   .X.......X.mmmmm ......nnnnnttttt      Wm Rt Rn
-            code = emitInsCode(ins, fmt);
-            // Arm64 store exclusive unpredictable cases
-            assert(id->idReg1() != id->idReg2());
-            assert(id->idReg1() != id->idReg3());
-            code |= insEncodeDatasizeLS(code, id->idOpSize()); // X
-            code |= insEncodeReg_Rm(id->idReg1());             // mmmmm
-            code |= insEncodeReg_Rt(id->idReg2());             // ttttt
-            code |= insEncodeReg_Rn(id->idReg3());             // nnnnn
-            dst += emitOutput_Instr(dst, code);
+        case INS_beq:
+        case INS_bne:
+        case INS_bhs:
+        case INS_blo:
+        case INS_bmi:
+        case INS_bpl:
+        case INS_bvs:
+        case INS_bvc:
+        case INS_bhi:
+        case INS_bls:
+        case INS_bge:
+        case INS_blt:
+        case INS_bgt:
+        case INS_ble:
+            // Assume conditional jump is long.
+            fmt = IF_LARGEJMP;
+            break;
+
+        default:
+            unreached();
             break;
+    }
 
-        case IF_LS_3E: // LS_3E   .X.........mmmmm ......nnnnnttttt      Rm Rt Rn ARMv8.1 LSE Atomics
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeDatasizeLS(code, id->idOpSize()); // X
-            code |= insEncodeReg_Rm(id->idReg1());             // mmmmm
-            code |= insEncodeReg_Rt(id->idReg2());             // ttttt
-            code |= insEncodeReg_Rn(id->idReg3());             // nnnnn
-            dst += emitOutput_Instr(dst, code);
+    instrDescJmp* id = emitNewInstrJmp();
 
-            // Some instructions with this encoding return their result in the
-            // second operand register instead of the first so we special case
-            // the GC update here and skip the common path down below.
-            if (emitInsDestIsOp2(ins))
-            {
-                if (id->idGCref() != GCT_NONE)
-                {
-                    emitGCregLiveUpd(id->idGCref(), id->idReg2(), dst);
-                }
-                else
-                {
-                    emitGCregDeadUpd(id->idReg2(), dst);
-                }
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idjShort = false;
 
-                goto SKIP_GC_UPDATE;
-            }
+#ifdef DEBUG
+    // Mark the finally call
+    if (ins == INS_bl_local && emitComp->compCurBB->KindIs(BBJ_CALLFINALLY))
+    {
+        id->idDebugOnlyInfo()->idFinallyCall = true;
+    }
+#endif // DEBUG
 
-            break;
+    if (dst != nullptr)
+    {
+        id->idAddr()->iiaBBlabel = dst;
 
-        case IF_LS_3F: // LS_3F   .Q.........mmmmm ....ssnnnnnttttt      Vt Rn Rm
-            elemsize = optGetElemsize(id->idInsOpt());
-            code     = emitInsCode(ins, fmt);
+        // Skip unconditional jump that has a single form.
+        // The target needs to be relocated.
+        id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
 
-            code |= insEncodeVectorsize(id->idOpSize()); // Q
-            code |= insEncodeReg_Rm(id->idReg3());       // mmmmm
-            code |= insEncodeVLSElemsize(elemsize);      // ss
-            code |= insEncodeReg_Rn(id->idReg2());       // nnnnn
-            code |= insEncodeReg_Vt(id->idReg1());       // ttttt
+#ifdef DEBUG
+        if (emitComp->opts.compLongAddress) // Force long branches
+        {
+            id->idjKeepLong = true;
+        }
+#endif // DEBUG
+    }
+    else
+    {
+        id->idAddr()->iiaSetInstrCount(instrCount);
+        id->idjKeepLong = false;
+        /* This jump must be short */
+        emitSetShortJump(id);
+        id->idSetIsBound();
+    }
 
-            dst += emitOutput_Instr(dst, code);
-            break;
+    /* Record the jump's IG and offset within it */
 
-        case IF_LS_3G: // LS_3G   .Q.........mmmmm ...Sssnnnnnttttt      Vt[] Rn Rm
-            elemsize = id->idOpSize();
-            index    = id->idSmallCns();
-            code     = emitInsCode(ins, fmt);
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
 
-            code |= insEncodeVLSIndex(elemsize, index); // Q xx S ss
-            code |= insEncodeReg_Rm(id->idReg3());      // mmmmm
-            code |= insEncodeReg_Rn(id->idReg2());      // nnnnn
-            code |= insEncodeReg_Vt(id->idReg1());      // ttttt
+    /* Append this jump to this IG's jump list */
 
-            dst += emitOutput_Instr(dst, code);
-            break;
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
 
-        case IF_DI_1A: // DI_1A   X.......shiiiiii iiiiiinnnnn.....         Rn    imm(i12,sh)
-            assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
-            imm = emitGetInsSC(id);
-            assert(isValidUimm12(imm));
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeDatasize(id->idOpSize());   // X
-            code |= insEncodeShiftImm12(id->idInsOpt()); // sh
-            code |= ((code_t)imm << 10);                 // iiiiiiiiiiii
-            code |= insEncodeReg_Rn(id->idReg1());       // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
 
-        case IF_DI_1B: // DI_1B   X........hwiiiii iiiiiiiiiiiddddd      Rd       imm(i16,hw)
-            imm = emitGetInsSC(id);
-            assert(isValidImmHWVal(imm, id->idOpSize()));
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeDatasize(id->idOpSize()); // X
-            code |= ((code_t)imm << 5);                // hwiiiii iiiiiiiiiii
-            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
-            dst += emitOutput_Instr(dst, code);
-            break;
+    dispIns(id);
+    appendToCurIG(id);
+}
 
-        case IF_DI_1C: // DI_1C   X........Nrrrrrr ssssssnnnnn.....         Rn    imm(N,r,s)
-            imm = emitGetInsSC(id);
-            assert(isValidImmNRS(imm, id->idOpSize()));
-            code = emitInsCode(ins, fmt);
-            code |= ((code_t)imm << 10);               // Nrrrrrrssssss
-            code |= insEncodeDatasize(id->idOpSize()); // X
-            code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Add a call instruction (direct or indirect).
+ *      argSize<0 means that the caller will pop the arguments
+ *
+ * The other arguments are interpreted depending on callType as shown:
+ * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
+ *
+ * EC_FUNC_TOKEN       : addr is the method address
+ * EC_FUNC_ADDR        : addr is the absolute address of the function
+ *
+ * If callType is one of these emitCallTypes, addr has to be NULL.
+ * EC_INDIR_R          : "call ireg".
+ *
+ * For ARM xreg, xmul and disp are never used and should always be 0/REG_NA.
+ *
+ *  Please consult the "debugger team notification" comment in genFnProlog().
+ */
 
-        case IF_DI_1D: // DI_1D   X........Nrrrrrr ssssss.....ddddd      Rd       imm(N,r,s)
-            imm = emitGetInsSC(id);
-            assert(isValidImmNRS(imm, id->idOpSize()));
-            code = emitInsCode(ins, fmt);
-            code |= ((code_t)imm << 10);               // Nrrrrrrssssss
-            code |= insEncodeDatasize(id->idOpSize()); // X
-            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
-            dst += emitOutput_Instr(dst, code);
-            break;
+void emitter::emitIns_Call(EmitCallType          callType,
+                           CORINFO_METHOD_HANDLE methHnd,
+                           INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+                           void*            addr,
+                           ssize_t          argSize,
+                           emitAttr         retSize,
+                           emitAttr         secondRetSize,
+                           VARSET_VALARG_TP ptrVars,
+                           regMaskTP        gcrefRegs,
+                           regMaskTP        byrefRegs,
+                           const DebugInfo& di /* = DebugInfo() */,
+                           regNumber        ireg /* = REG_NA */,
+                           regNumber        xreg /* = REG_NA */,
+                           unsigned         xmul /* = 0     */,
+                           ssize_t          disp /* = 0     */,
+                           bool             isJump /* = false */)
+{
+    /* Sanity check the arguments depending on callType */
 
-        case IF_DI_1E: // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd       simm21
-        case IF_LARGEADR:
-            assert(insOptsNone(id->idInsOpt()));
-            if (id->idIsReloc())
-            {
-                code = emitInsCode(ins, fmt);
-                code |= insEncodeReg_Rd(id->idReg1()); // ddddd
-                dst += emitOutput_Instr(dst, code);
-                emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEBASE_REL21);
-            }
-            else
-            {
-                // Local jmp/load case which does not need a relocation.
-                assert(id->idIsBound());
-                dst = emitOutputLJ(ig, dst, id);
-            }
-            sz = sizeof(instrDescJmp);
-            break;
+    assert(callType < EC_COUNT);
+    assert((callType != EC_FUNC_TOKEN) || (addr != nullptr && ireg == REG_NA));
+    assert(callType != EC_INDIR_R || (addr == nullptr && ireg < REG_COUNT));
 
-        case IF_DI_1F: // DI_1F   X..........iiiii cccc..nnnnn.nzcv      Rn imm5  nzcv cond
-            imm = emitGetInsSC(id);
-            assert(isValidImmCondFlagsImm5(imm));
-            {
-                condFlagsImm cfi;
-                cfi.immCFVal = (unsigned)imm;
-                code         = emitInsCode(ins, fmt);
-                code |= insEncodeDatasize(id->idOpSize()); // X
-                code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
-                code |= ((code_t)cfi.imm5 << 16);          // iiiii
-                code |= insEncodeFlags(cfi.flags);         // nzcv
-                code |= insEncodeCond(cfi.cond);           // cccc
-                dst += emitOutput_Instr(dst, code);
-            }
-            break;
+    // ARM never uses these
+    assert(xreg == REG_NA && xmul == 0 && disp == 0);
 
-        case IF_DI_2A: // DI_2A   X.......shiiiiii iiiiiinnnnnddddd      Rd Rn    imm(i12,sh)
-            assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
-            imm = emitGetInsSC(id);
-            assert(isValidUimm12(imm));
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeDatasize(id->idOpSize());   // X
-            code |= insEncodeShiftImm12(id->idInsOpt()); // sh
-            code |= ((code_t)imm << 10);                 // iiiiiiiiiiii
-            code |= insEncodeReg_Rd(id->idReg1());       // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());       // nnnnn
-            dst += emitOutput_Instr(dst, code);
+    // Our stack level should be always greater than the bytes of arguments we push. Just
+    // a sanity test.
+    assert((unsigned)std::abs(argSize) <= codeGen->genStackLevel);
 
-            if (id->idIsReloc())
-            {
-                assert(sz == sizeof(instrDesc));
-                assert(id->idAddr()->iiaAddr != nullptr);
-                emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEOFFSET_12A);
-            }
-            break;
+    // Trim out any callee-trashed registers from the live set.
+    regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd);
+    gcrefRegs &= savedSet;
+    byrefRegs &= savedSet;
 
-        case IF_DI_2B: // DI_2B   X.........Xnnnnn ssssssnnnnnddddd      Rd Rn    imm(0-63)
-            code = emitInsCode(ins, fmt);
-            imm  = emitGetInsSC(id);
-            assert(isValidImmShift(imm, id->idOpSize()));
-            code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X
-            code |= insEncodeReg_Rd(id->idReg1());             // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());             // nnnnn
-            code |= insEncodeReg_Rm(id->idReg2());             // Reg2 also in mmmmm
-            code |= insEncodeShiftCount(imm, id->idOpSize());  // ssssss
-            dst += emitOutput_Instr(dst, code);
-            break;
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
+        dumpConvertedVarSet(emitComp, ptrVars);
+        printf(", gcrefRegs=");
+        printRegMaskInt(gcrefRegs);
+        emitDispRegSet(gcrefRegs);
+        printf(", byrefRegs=");
+        printRegMaskInt(byrefRegs);
+        emitDispRegSet(byrefRegs);
+        printf("\n");
+    }
+#endif
+
+    /* Managed RetVal: emit sequence point for the call */
+    if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid())
+    {
+        codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false);
+    }
+
+    /*
+        We need to allocate the appropriate instruction descriptor based
+        on whether this is a direct/indirect call, and whether we need to
+        record an updated set of live GC variables.
+     */
+    instrDesc* id;
 
-        case IF_DI_2C: // DI_2C   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imm(N,r,s)
-            imm = emitGetInsSC(id);
-            assert(isValidImmNRS(imm, id->idOpSize()));
-            code = emitInsCode(ins, fmt);
-            code |= ((code_t)imm << 10);               // Nrrrrrrssssss
-            code |= insEncodeDatasize(id->idOpSize()); // X
-            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+    assert(argSize % REGSIZE_BYTES == 0);
+    int argCnt = (int)(argSize / (int)REGSIZE_BYTES);
 
-        case IF_DI_2D: // DI_2D   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imr, imms   (N,r,s)
-            if (ins == INS_asr || ins == INS_lsl || ins == INS_lsr)
-            {
-                imm = emitGetInsSC(id);
-                assert(isValidImmShift(imm, id->idOpSize()));
+    if (callType == EC_INDIR_R)
+    {
+        /* Indirect call, virtual calls */
 
-                // Shift immediates are aliases of the SBFM/UBFM instructions
-                // that actually take 2 registers and 2 constants,
-                // Since we stored the shift immediate value
-                // we need to calculate the N,R and S values here.
+        id = emitNewInstrCallInd(argCnt, 0 /* disp */, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize);
+    }
+    else
+    {
+        /* Helper/static/nonvirtual/function calls (direct or through handle),
+           and calls to an absolute addr. */
 
-                bitMaskImm bmi;
-                bmi.immNRS = 0;
+        assert(callType == EC_FUNC_TOKEN);
 
-                bmi.immN = (size == EA_8BYTE) ? 1 : 0;
-                bmi.immR = imm;
-                bmi.immS = (size == EA_8BYTE) ? 0x3f : 0x1f;
+        id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize);
+    }
 
-                // immR and immS are now set correctly for INS_asr and INS_lsr
-                // but for INS_lsl we have to adjust the values for immR and immS
-                //
-                if (ins == INS_lsl)
-                {
-                    bmi.immR = -imm & bmi.immS;
-                    bmi.immS = bmi.immS - imm;
-                }
+    /* Update the emitter's live GC ref sets */
 
-                // setup imm with the proper 13 bit value N:R:S
-                //
-                imm = bmi.immNRS;
-            }
-            else
-            {
-                // The other instructions have already have encoded N,R and S values
-                imm = emitGetInsSC(id);
-            }
-            assert(isValidImmNRS(imm, id->idOpSize()));
+    VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
+    emitThisGCrefRegs = gcrefRegs;
+    emitThisByrefRegs = byrefRegs;
 
-            code = emitInsCode(ins, fmt);
-            code |= ((code_t)imm << 10);               // Nrrrrrrssssss
-            code |= insEncodeDatasize(id->idOpSize()); // X
-            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+    id->idSetIsNoGC(emitNoGChelper(methHnd));
 
-        case IF_DR_1D: // DR_1D   X............... cccc.......ddddd      Rd       cond
-            imm = emitGetInsSC(id);
-            assert(isValidImmCond(imm));
-            {
-                condFlagsImm cfi;
-                cfi.immCFVal = (unsigned)imm;
-                code         = emitInsCode(ins, fmt);
-                code |= insEncodeDatasize(id->idOpSize()); // X
-                code |= insEncodeReg_Rd(id->idReg1());     // ddddd
-                code |= insEncodeInvertedCond(cfi.cond);   // cccc
-                dst += emitOutput_Instr(dst, code);
-            }
-            break;
+    /* Set the instruction - special case jumping a function */
+    instruction ins;
+    insFormat   fmt = IF_NONE;
 
-        case IF_DR_2A: // DR_2A   X..........mmmmm ......nnnnn.....         Rn Rm
-            assert(insOptsNone(id->idInsOpt()));
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeDatasize(id->idOpSize()); // X
-            code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
-            code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+    /* Record the address: method, indirection, or funcptr */
 
-        case IF_DR_2B: // DR_2B   X.......sh.mmmmm ssssssnnnnn.....         Rn Rm {LSL,LSR,ASR,ROR} imm(0-63)
-            code = emitInsCode(ins, fmt);
-            imm  = emitGetInsSC(id);
-            assert(isValidImmShift(imm, id->idOpSize()));
-            code |= insEncodeDatasize(id->idOpSize());        // X
-            code |= insEncodeShiftType(id->idInsOpt());       // sh
-            code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
-            code |= insEncodeReg_Rn(id->idReg1());            // nnnnn
-            code |= insEncodeReg_Rm(id->idReg2());            // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+    if (callType == EC_INDIR_R)
+    {
+        /* This is an indirect call (either a virtual call or func ptr call) */
 
-        case IF_DR_2C: // DR_2C   X..........mmmmm ooosssnnnnn.....         Rn Rm ext(Rm) LSL imm(0-4)
-            code = emitInsCode(ins, fmt);
-            imm  = emitGetInsSC(id);
-            assert((imm >= 0) && (imm <= 4));          // imm [0..4]
-            code |= insEncodeDatasize(id->idOpSize()); // X
-            code |= insEncodeExtend(id->idInsOpt());   // ooo
-            code |= insEncodeExtendScale(imm);         // sss
-            code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
-            code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+        if (isJump)
+        {
+            ins = INS_br_tail; // INS_br_tail  Reg
+        }
+        else
+        {
+            ins = INS_blr; // INS_blr Reg
+        }
+        fmt = IF_BR_1B;
 
-        case IF_DR_2D: // DR_2D   X..........nnnnn cccc..nnnnnddddd      Rd Rn    cond
-            imm = emitGetInsSC(id);
-            assert(isValidImmCond(imm));
-            {
-                condFlagsImm cfi;
-                cfi.immCFVal = (unsigned)imm;
-                code         = emitInsCode(ins, fmt);
-                code |= insEncodeDatasize(id->idOpSize()); // X
-                code |= insEncodeReg_Rd(id->idReg1());     // ddddd
-                code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
-                code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
-                code |= insEncodeInvertedCond(cfi.cond);   // cccc
-                dst += emitOutput_Instr(dst, code);
-            }
-            break;
+        id->idIns(ins);
+        id->idInsFmt(fmt);
 
-        case IF_DR_2E: // DR_2E   X..........mmmmm ...........ddddd      Rd    Rm
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeDatasize(id->idOpSize()); // X
-            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
-            code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+        assert(xreg == REG_NA);
+        if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && EA_IS_CNS_TLSGD_RELOC(retSize))
+        {
+            // For NativeAOT linux/arm64, we need to also record the relocation of methHnd.
+            // Since we do not have space to embed it in instrDesc, we store the register in
+            // reg1 and instead use the `iiaAdd` to store the method handle. Likewise, during
+            // emitOutputInstr, we retrieve the register from reg1 for this specific case.
+            id->idSetTlsGD();
+            id->idReg1(ireg);
+            id->idAddr()->iiaAddr = (BYTE*)methHnd;
+        }
+        else
+        {
+            id->idReg3(ireg);
+        }
+    }
+    else
+    {
+        /* This is a simple direct call: "call helper/method/addr" */
 
-        case IF_DR_2F: // DR_2F   X.......sh.mmmmm ssssss.....ddddd      Rd    Rm {LSL,LSR,ASR} imm(0-63)
-            code = emitInsCode(ins, fmt);
-            imm  = emitGetInsSC(id);
-            assert(isValidImmShift(imm, id->idOpSize()));
-            code |= insEncodeDatasize(id->idOpSize());        // X
-            code |= insEncodeShiftType(id->idInsOpt());       // sh
-            code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
-            code |= insEncodeReg_Rd(id->idReg1());            // ddddd
-            code |= insEncodeReg_Rm(id->idReg2());            // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+        assert(callType == EC_FUNC_TOKEN);
 
-        case IF_DR_2G: // DR_2G   X............... .....xnnnnnddddd      Rd Rn
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeDatasize(id->idOpSize()); // X
-            if (ins == INS_rev)
-            {
-                if (size == EA_8BYTE)
-                {
-                    code |= 0x00000400; // x - bit at location 10
-                }
-            }
-            code |= insEncodeReg_Rd(id->idReg1()); // ddddd
-            code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+        assert(addr != NULL);
 
-        case IF_DR_2H: // DR_2H   X........X...... ......nnnnnddddd      Rd Rn
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X
-            code |= insEncodeReg_Rd(id->idReg1());             // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());             // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+        if (isJump)
+        {
+            ins = INS_b_tail; // INS_b_tail imm28
+        }
+        else
+        {
+            ins = INS_bl; // INS_bl imm28
+        }
+        fmt = IF_BI_0C;
 
-        case IF_DR_2I: // DR_2I   X..........mmmmm cccc..nnnnn.nzcv      Rn Rm    nzcv cond
-            imm = emitGetInsSC(id);
-            assert(isValidImmCondFlags(imm));
-            {
-                condFlagsImm cfi;
-                cfi.immCFVal = (unsigned)imm;
-                code         = emitInsCode(ins, fmt);
-                code |= insEncodeDatasize(id->idOpSize()); // X
-                code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
-                code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
-                code |= insEncodeFlags(cfi.flags);         // nzcv
-                code |= insEncodeCond(cfi.cond);           // cccc
-                dst += emitOutput_Instr(dst, code);
-            }
-            break;
+        id->idIns(ins);
+        id->idInsFmt(fmt);
 
-        case IF_DR_3A: // DR_3A   X..........mmmmm ......nnnnnmmmmm      Rd Rn Rm
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeDatasize(id->idOpSize()); // X
-            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
-            if (id->idIsLclVar())
-            {
-                code |= insEncodeReg_Rm(codeGen->rsGetRsvdReg()); // mmmmm
-            }
-            else
-            {
-                code |= insEncodeReg_Rm(id->idReg3()); // mmmmm
-            }
-            dst += emitOutput_Instr(dst, code);
-            break;
+        id->idAddr()->iiaAddr = (BYTE*)addr;
+
+        if (emitComp->opts.compReloc)
+        {
+            id->idSetIsDspReloc();
+        }
+    }
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        if (id->idIsLargeCall())
+        {
+            printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
+                   VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
+        }
+    }
+#endif
 
-        case IF_DR_3B: // DR_3B   X.......sh.mmmmm ssssssnnnnnddddd      Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
-            code = emitInsCode(ins, fmt);
-            imm  = emitGetInsSC(id);
-            assert(isValidImmShift(imm, id->idOpSize()));
-            code |= insEncodeDatasize(id->idOpSize());        // X
-            code |= insEncodeReg_Rd(id->idReg1());            // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());            // nnnnn
-            code |= insEncodeReg_Rm(id->idReg3());            // mmmmm
-            code |= insEncodeShiftType(id->idInsOpt());       // sh
-            code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
-            dst += emitOutput_Instr(dst, code);
-            break;
+    if (m_debugInfoSize > 0)
+    {
+        INDEBUG(id->idDebugOnlyInfo()->idCallSig = sigInfo);
+        id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
+    }
 
-        case IF_DR_3C: // DR_3C   X..........mmmmm ooosssnnnnnddddd      Rd Rn Rm ext(Rm) LSL imm(0-4)
-            code = emitInsCode(ins, fmt);
-            imm  = emitGetInsSC(id);
-            assert((imm >= 0) && (imm <= 4));          // imm [0..4]
-            code |= insEncodeDatasize(id->idOpSize()); // X
-            code |= insEncodeExtend(id->idInsOpt());   // ooo
-            code |= insEncodeExtendScale(imm);         // sss
-            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
-            code |= insEncodeReg_Rm(id->idReg3());     // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+#ifdef LATE_DISASM
+    if (addr != nullptr)
+    {
+        codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
+    }
+#endif // LATE_DISASM
 
-        case IF_DR_3D: // DR_3D   X..........mmmmm cccc..nnnnnddddd      Rd Rn Rm cond
-            imm = emitGetInsSC(id);
-            assert(isValidImmCond(imm));
-            {
-                condFlagsImm cfi;
-                cfi.immCFVal = (unsigned)imm;
-                code         = emitInsCode(ins, fmt);
-                code |= insEncodeDatasize(id->idOpSize()); // X
-                code |= insEncodeReg_Rd(id->idReg1());     // ddddd
-                code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
-                code |= insEncodeReg_Rm(id->idReg3());     // mmmmm
-                code |= insEncodeCond(cfi.cond);           // cccc
-                dst += emitOutput_Instr(dst, code);
-            }
-            break;
+    dispIns(id);
+    appendToCurIG(id);
+    emitLastMemBarrier = nullptr; // Cannot optimize away future memory barriers
+}
 
-        case IF_DR_3E: // DR_3E   X........X.mmmmm ssssssnnnnnddddd      Rd Rn Rm imm(0-63)
-            code = emitInsCode(ins, fmt);
-            imm  = emitGetInsSC(id);
-            assert(isValidImmShift(imm, id->idOpSize()));
-            code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X
-            code |= insEncodeReg_Rd(id->idReg1());             // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());             // nnnnn
-            code |= insEncodeReg_Rm(id->idReg3());             // mmmmm
-            code |= insEncodeShiftCount(imm, id->idOpSize());  // ssssss
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Returns true if 'imm' is valid Cond encoding
+ */
 
-        case IF_DR_4A: // DR_4A   X..........mmmmm .aaaaannnnnmmmmm      Rd Rn Rm Ra
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeDatasize(id->idOpSize()); // X
-            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
-            code |= insEncodeReg_Rm(id->idReg3());     // mmmmm
-            code |= insEncodeReg_Ra(id->idReg4());     // aaaaa
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ bool emitter::isValidImmCond(ssize_t imm)
+{
+    // range check the ssize_t value, to make sure it is a small unsigned value
+    // and that only the bits in the cfi.cond are set
+    if ((imm < 0) || (imm > 0xF))
+        return false;
 
-        case IF_DV_1A: // DV_1A   .........X.iiiii iii........ddddd      Vd imm8    (fmov - immediate scalar)
-            imm      = emitGetInsSC(id);
-            elemsize = id->idOpSize();
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeFloatElemsize(elemsize); // X
-            code |= ((code_t)imm << 13);              // iiiii iii
-            code |= insEncodeReg_Vd(id->idReg1());    // ddddd
-            dst += emitOutput_Instr(dst, code);
-            break;
+    condFlagsImm cfi;
+    cfi.immCFVal = (unsigned)imm;
 
-        case IF_DV_1B: // DV_1B   .QX..........iii cmod..iiiiiddddd      Vd imm8    (immediate vector)
-            imm      = emitGetInsSC(id) & 0x0ff;
-            immShift = (emitGetInsSC(id) & 0x700) >> 8;
-            elemsize = optGetElemsize(id->idInsOpt());
-            cmode    = 0;
-            switch (elemsize)
-            { // cmode
-                case EA_1BYTE:
-                    cmode = 0xE; // 1110
-                    break;
-                case EA_2BYTE:
-                    cmode = 0x8;
-                    cmode |= (immShift << 1); // 10x0
-                    break;
-                case EA_4BYTE:
-                    if (immShift < 4)
-                    {
-                        cmode = 0x0;
-                        cmode |= (immShift << 1); // 0xx0
-                    }
-                    else // MSL
-                    {
-                        cmode = 0xC;
-                        if (immShift & 2)
-                            cmode |= 1; // 110x
-                    }
-                    break;
-                case EA_8BYTE:
-                    cmode = 0xE; // 1110
-                    break;
-                default:
-                    unreached();
-                    break;
-            }
+    return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV).
+}
 
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeVectorsize(id->idOpSize()); // Q
-            if ((ins == INS_fmov) || (ins == INS_movi))
-            {
-                if (elemsize == EA_8BYTE)
-                {
-                    code |= 0x20000000; // X
-                }
-            }
-            if (ins != INS_fmov)
-            {
-                assert((cmode >= 0) && (cmode <= 0xF));
-                code |= (cmode << 12); // cmod
-            }
-            code |= (((code_t)imm >> 5) << 16);    // iii
-            code |= (((code_t)imm & 0x1f) << 5);   // iiiii
-            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Returns true if 'imm' is valid Cond/Flags encoding
+ */
 
-        case IF_DV_1C: // DV_1C   .........X...... ......nnnnn.....      Vn #0.0    (fcmp - with zero)
-            elemsize = id->idOpSize();
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeFloatElemsize(elemsize); // X
-            code |= insEncodeReg_Vn(id->idReg1());    // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ bool emitter::isValidImmCondFlags(ssize_t imm)
+{
+    // range check the ssize_t value, to make sure it is a small unsigned value
+    // and that only the bits in the cfi.cond or cfi.flags are set
+    if ((imm < 0) || (imm > 0xFF))
+        return false;
 
-        case IF_DV_2A: // DV_2A   .Q.......X...... ......nnnnnddddd      Vd Vn      (fabs, fcvt - vector)
-        case IF_DV_2R: // DV_2R   .Q.......X...... ......nnnnnddddd      Sd Vn      (fmaxnmv, fmaxv, fminnmv, fminv)
-            elemsize = optGetElemsize(id->idInsOpt());
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeVectorsize(id->idOpSize()); // Q
-            if ((ins == INS_fcvtl) || (ins == INS_fcvtl2) || (ins == INS_fcvtn) || (ins == INS_fcvtn2))
-            {
-                // fcvtl{2} and fcvtn{2} encode the element size as
-                //   esize = 16 << UInt(sz)
-                if (elemsize == EA_4BYTE)
-                {
-                    code |= 0x00400000; // X
-                }
-                else
-                {
-                    assert(elemsize == EA_2BYTE);
-                }
-            }
-            else
-            {
-                code |= insEncodeFloatElemsize(elemsize); // X
-            }
-            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
-            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+    condFlagsImm cfi;
+    cfi.immCFVal = (unsigned)imm;
 
-        case IF_DV_2B: // DV_2B   .Q.........iiiii ......nnnnnddddd      Rd Vn[] (umov/smov    - to general)
-            elemsize = id->idOpSize();
-            index    = emitGetInsSC(id);
-            datasize = (elemsize == EA_8BYTE) ? EA_16BYTE : EA_8BYTE;
-            if (ins == INS_smov)
-            {
-                datasize = EA_16BYTE;
-            }
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeVectorsize(datasize);         // Q
-            code |= insEncodeVectorIndex(elemsize, index); // iiiii
-            code |= insEncodeReg_Rd(id->idReg1());         // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());         // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+    return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV).
+}
 
-        case IF_DV_2C: // DV_2C   .Q.........iiiii ......nnnnnddddd      Vd Rn   (dup/ins - vector from general)
-            if (ins == INS_dup)
-            {
-                datasize = id->idOpSize();
-                elemsize = optGetElemsize(id->idInsOpt());
-                index    = 0;
-            }
-            else // INS_ins
-            {
-                datasize = EA_16BYTE;
-                elemsize = id->idOpSize();
-                index    = emitGetInsSC(id);
-            }
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeVectorsize(datasize);         // Q
-            code |= insEncodeVectorIndex(elemsize, index); // iiiii
-            code |= insEncodeReg_Vd(id->idReg1());         // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());         // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Returns true if 'imm' is valid Cond/Flags/Imm5 encoding
+ */
 
-        case IF_DV_2D: // DV_2D   .Q.........iiiii ......nnnnnddddd      Vd Vn[]   (dup - vector)
-            index    = emitGetInsSC(id);
-            elemsize = optGetElemsize(id->idInsOpt());
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeVectorsize(id->idOpSize());   // Q
-            code |= insEncodeVectorIndex(elemsize, index); // iiiii
-            code |= insEncodeReg_Vd(id->idReg1());         // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());         // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ bool emitter::isValidImmCondFlagsImm5(ssize_t imm)
+{
+    // range check the ssize_t value, to make sure it is a small unsigned value
+    // and that only the bits in the cfi.cond, cfi.flags or cfi.imm5 are set
+    if ((imm < 0) || (imm > 0x1FFF))
+        return false;
+
+    condFlagsImm cfi;
+    cfi.immCFVal = (unsigned)imm;
 
-        case IF_DV_2E: // DV_2E   ...........iiiii ......nnnnnddddd      Vd Vn[]   (dup - scalar)
-            index    = emitGetInsSC(id);
-            elemsize = id->idOpSize();
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeVectorIndex(elemsize, index); // iiiii
-            code |= insEncodeReg_Vd(id->idReg1());         // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());         // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+    return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV).
+}
 
-        case IF_DV_2F: // DV_2F   ...........iiiii .jjjj.nnnnnddddd      Vd[] Vn[] (ins - element)
-            elemsize = id->idOpSize();
-            imm      = emitGetInsSC(id);
-            index    = (imm >> 4) & 0xf;
-            index2   = imm & 0xf;
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeVectorIndex(elemsize, index);   // iiiii
-            code |= insEncodeVectorIndex2(elemsize, index2); // jjjj
-            code |= insEncodeReg_Vd(id->idReg1());           // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());           // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Return an encoding for the specified 'V' register used in '9' thru '6' position with the times two encoding.
+ *  This encoding requires that the register number be divisible by two.
+ */
 
-        case IF_DV_2G: // DV_2G   .........X...... ......nnnnnddddd      Vd Vn      (fmov, fcvtXX - register)
-            elemsize = id->idOpSize();
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeFloatElemsize(elemsize); // X
-            code |= insEncodeReg_Vd(id->idReg1());    // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());    // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ emitter::code_t emitter::insEncodeReg_V_9_to_6_Times_Two(regNumber reg)
+{
+    assert(isVectorRegister(reg));
+    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
+    assert(ureg % 2 == 0);
+    ureg /= 2u;
+    assert((ureg >= 0) && (ureg <= 31));
+    return ureg << 6;
+}
 
-        case IF_DV_2H: // DV_2H   X........X...... ......nnnnnddddd      Rd Vn      (fmov - to general)
-            elemsize = id->idOpSize();
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // X   X
-            code |= insEncodeReg_Rd(id->idReg1());            // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());            // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified condition code.
+ */
 
-        case IF_DV_2I: // DV_2I   X........X...... ......nnnnnddddd      Vd Rn      (fmov - from general)
-            elemsize = id->idOpSize();
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // X   X
-            code |= insEncodeReg_Vd(id->idReg1());            // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());            // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ emitter::code_t emitter::insEncodeCond(insCond cond)
+{
+    emitter::code_t uimm = (emitter::code_t)cond;
+    return uimm << 12;
+}
 
-        case IF_DV_2J: // DV_2J   ........SS.....D D.....nnnnnddddd      Vd Vn      (fcvt)
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // SS DD
-            code |= insEncodeReg_Vd(id->idReg1());            // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());            // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Returns an encoding for the condition code with the lowest bit inverted (marked by invert(<cond>) in the
+ *  architecture manual).
+ */
 
-        case IF_DV_2K: // DV_2K   .........X.mmmmm ......nnnnn.....      Vn Vm      (fcmp)
-            elemsize = id->idOpSize();
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeFloatElemsize(elemsize); // X
-            code |= insEncodeReg_Vn(id->idReg1());    // nnnnn
-            code |= insEncodeReg_Vm(id->idReg2());    // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ emitter::code_t emitter::insEncodeInvertedCond(insCond cond)
+{
+    emitter::code_t uimm = (emitter::code_t)cond;
+    uimm ^= 1; // invert the lowest bit
+    return uimm << 12;
+}
 
-        case IF_DV_2L: // DV_2L   ........XX...... ......nnnnnddddd      Vd Vn      (abs, neg - scalar)
-            elemsize = id->idOpSize();
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeElemsize(elemsize);   // XX
-            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
-            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified flags.
+ */
 
-        case IF_DV_2M: // DV_2M   .Q......XX...... ......nnnnnddddd      Vd Vn      (abs, neg   - vector)
-        case IF_DV_2T: // DV_2T   .Q......XX...... ......nnnnnddddd      Sd Vn      (addv, saddlv, smaxv, sminv, uaddlv,
-                       // umaxv, uminv)
-            elemsize = optGetElemsize(id->idInsOpt());
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeVectorsize(id->idOpSize()); // Q
-            code |= insEncodeElemsize(elemsize);         // XX
-            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ emitter::code_t emitter::insEncodeFlags(insCflags flags)
+{
+    emitter::code_t uimm = (emitter::code_t)flags;
+    return uimm;
+}
 
-        case IF_DV_2N: // DV_2N   .........iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - scalar)
-            imm      = emitGetInsSC(id);
-            elemsize = id->idOpSize();
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeVectorShift(elemsize, emitInsIsVectorRightShift(ins), imm); // iiiiiii
-            code |= insEncodeReg_Vd(id->idReg1());                                       // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());                                       // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Returns the encoding for the Shift Count bits to be used for Arm64 encodings
+ */
 
-        case IF_DV_2O: // DV_2O   .Q.......iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - vector)
-            imm      = emitGetInsSC(id);
-            elemsize = optGetElemsize(id->idInsOpt());
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeVectorsize(id->idOpSize());                                 // Q
-            code |= insEncodeVectorShift(elemsize, emitInsIsVectorRightShift(ins), imm); // iiiiiii
-            code |= insEncodeReg_Vd(id->idReg1());                                       // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());                                       // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ emitter::code_t emitter::insEncodeShiftCount(ssize_t imm, emitAttr size)
+{
+    assert((imm & 0x003F) == imm);
+    assert(((imm & 0x0020) == 0) || (size == EA_8BYTE));
 
-        case IF_DV_2P: // DV_2P   ............... ......nnnnnddddd      Vd Vn      (aes*, sha1su1)
-            elemsize = optGetElemsize(id->idInsOpt());
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
-            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+    return (emitter::code_t)imm << 10;
+}
 
-        case IF_DV_2Q: // DV_2Q   .........X...... ......nnnnnddddd      Vd Vn      (faddp, fmaxnmp, fmaxp, fminnmp,
-                       // fminp - scalar)
-            elemsize = optGetElemsize(id->idInsOpt());
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeFloatElemsize(elemsize); // X
-            code |= insEncodeReg_Vd(id->idReg1());    // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());    // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Returns the encoding to select a 64-bit datasize for an Arm64 instruction
+ */
 
-        case IF_DV_2S: // DV_2S   ........XX...... ......nnnnnddddd      Sd Vn      (addp - scalar)
-            elemsize = optGetElemsize(id->idInsOpt());
-            code     = emitInsCode(ins, fmt);
-            code |= insEncodeElemsize(elemsize);   // XX
-            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
-            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ emitter::code_t emitter::insEncodeDatasize(emitAttr size)
+{
+    if (size == EA_8BYTE)
+    {
+        return 0x80000000; // set the bit at location 31
+    }
+    else
+    {
+        assert(size == EA_4BYTE);
+        return 0;
+    }
+}
 
-        case IF_DV_2U: // DV_2U   ................ ......nnnnnddddd      Sd Sn   (sha1h)
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
-            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the datasize for the general load/store Arm64 instructions
+ *
+ */
 
-        case IF_DV_3A: // DV_3A   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
-            code     = emitInsCode(ins, fmt);
-            elemsize = optGetElemsize(id->idInsOpt());
-            code |= insEncodeVectorsize(id->idOpSize()); // Q
-            code |= insEncodeElemsize(elemsize);         // XX
-            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
-            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ emitter::code_t emitter::insEncodeDatasizeLS(emitter::code_t code, emitAttr size)
+{
+    bool exclusive = ((code & 0x35000000) == 0);
+    bool atomic    = ((code & 0x31200C00) == 0x30200000);
 
-        case IF_DV_3AI: // DV_3AI  .Q......XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector)
-            code     = emitInsCode(ins, fmt);
-            imm      = emitGetInsSC(id);
-            elemsize = optGetElemsize(id->idInsOpt());
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-            code |= insEncodeVectorsize(id->idOpSize());    // Q
-            code |= insEncodeElemsize(elemsize);            // XX
-            code |= insEncodeVectorIndexLMH(elemsize, imm); // LM H
-            code |= insEncodeReg_Vd(id->idReg1());          // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());          // nnnnn
-            code |= insEncodeReg_Vm(id->idReg3());          // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+    if ((code & 0x00800000) && !exclusive && !atomic) // Is this a sign-extending opcode? (i.e. ldrsw, ldrsh, ldrsb)
+    {
+        if ((code & 0x80000000) == 0) // Is it a ldrsh or ldrsb and not ldrsw ?
+        {
+            if (EA_SIZE(size) != EA_8BYTE) // Do we need to encode the 32-bit Rt size bit?
+            {
+                return 0x00400000; // set the bit at location 22
+            }
+        }
+    }
+    else if (code & 0x80000000) // Is this a ldr/str/ldur/stur opcode?
+    {
+        if (EA_SIZE(size) == EA_8BYTE) // Do we need to encode the 64-bit size bit?
+        {
+            return 0x40000000; // set the bit at location 30
+        }
+    }
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the datasize for the vector load/store Arm64 instructions
+ *
+ */
 
-        case IF_DV_3B: // DV_3B   .Q.......X.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
-            code     = emitInsCode(ins, fmt);
-            elemsize = optGetElemsize(id->idInsOpt());
-            code |= insEncodeVectorsize(id->idOpSize()); // Q
-            code |= insEncodeFloatElemsize(elemsize);    // X
-            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
-            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ emitter::code_t emitter::insEncodeDatasizeVLS(emitter::code_t code, emitAttr size)
+{
+    code_t result = 0;
 
-        case IF_DV_3BI: // DV_3BI  .Q.......XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by element)
-            code     = emitInsCode(ins, fmt);
-            imm      = emitGetInsSC(id);
-            elemsize = optGetElemsize(id->idInsOpt());
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-            code |= insEncodeVectorsize(id->idOpSize()); // Q
-            code |= insEncodeFloatElemsize(elemsize);    // X
-            code |= insEncodeFloatIndex(elemsize, imm);  // L H
-            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
-            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+    // Check bit 29
+    if ((code & 0x20000000) == 0)
+    {
+        // LDR literal
 
-        case IF_DV_3C: // DV_3C   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeVectorsize(id->idOpSize()); // Q
-            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
-            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+        if (size == EA_16BYTE)
+        {
+            // set the operation size in bit 31
+            result = 0x80000000;
+        }
+        else if (size == EA_8BYTE)
+        {
+            // set the operation size in bit 30
+            result = 0x40000000;
+        }
+        else
+        {
+            assert(size == EA_4BYTE);
+            // no bits are set
+            result = 0x00000000;
+        }
+    }
+    else
+    {
+        // LDR non-literal
 
-        case IF_DV_3D: // DV_3D   .........X.mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeFloatElemsize(id->idOpSize()); // X
-            code |= insEncodeReg_Vd(id->idReg1());          // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());          // nnnnn
-            code |= insEncodeReg_Vm(id->idReg3());          // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+        if (size == EA_16BYTE)
+        {
+            // The operation size in bits 31 and 30 are zero
+            // Bit 23 specifies a 128-bit Load/Store
+            result = 0x00800000;
+        }
+        else if (size == EA_8BYTE)
+        {
+            // set the operation size in bits 31 and 30
+            result = 0xC0000000;
+        }
+        else if (size == EA_4BYTE)
+        {
+            // set the operation size in bit 31
+            result = 0x80000000;
+        }
+        else if (size == EA_2BYTE)
+        {
+            // set the operation size in bit 30
+            result = 0x40000000;
+        }
+        else
+        {
+            assert(size == EA_1BYTE);
+            // The operation size in bits 31 and 30 are zero
+            result = 0x00000000;
+        }
+    }
 
-        case IF_DV_3DI: // DV_3DI  .........XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by element)
-            code     = emitInsCode(ins, fmt);
-            imm      = emitGetInsSC(id);
-            elemsize = id->idOpSize();
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-            code |= insEncodeFloatElemsize(elemsize);   // X
-            code |= insEncodeFloatIndex(elemsize, imm); // L H
-            code |= insEncodeReg_Vd(id->idReg1());      // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());      // nnnnn
-            code |= insEncodeReg_Vm(id->idReg3());      // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+    // Or in bit 26 to indicate a Vector register is used as 'target'
+    result |= 0x04000000;
 
-        case IF_DV_3E: // DV_3E   ........XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
-            code     = emitInsCode(ins, fmt);
-            elemsize = id->idOpSize();
-            code |= insEncodeElemsize(elemsize);   // XX
-            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
-            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
-            code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+    return result;
+}
 
-        case IF_DV_3EI: // DV_3EI ........XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by element)
-            code     = emitInsCode(ins, fmt);
-            imm      = emitGetInsSC(id);
-            elemsize = id->idOpSize();
-            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
-            code |= insEncodeElemsize(elemsize);            // XX
-            code |= insEncodeVectorIndexLMH(elemsize, imm); // LM H
-            code |= insEncodeReg_Vd(id->idReg1());          // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());          // nnnnn
-            code |= insEncodeReg_Vm(id->idReg3());          // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the datasize for the vector load/store Arm64 instructions
+ *
+ */
 
-        case IF_DV_3F: // DV_3F   ...........mmmmm ......nnnnnddddd      Vd Vn Vm   (vector) - source dest regs overlap
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
-            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
-            code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ emitter::code_t emitter::insEncodeDatasizeVPLS(emitter::code_t code, emitAttr size)
+{
+    code_t result = 0;
 
-        case IF_DV_3G: // DV_3G   .Q.........mmmmm .iiii.nnnnnddddd      Vd Vn Vm imm (vector)
-            imm  = emitGetInsSC(id);
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeVectorsize(id->idOpSize()); // Q
-            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
-            code |= ((code_t)imm << 11);                 // iiii
-            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
-            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
-            dst += emitOutput_Instr(dst, code);
-            break;
+    if (size == EA_16BYTE)
+    {
+        // The operation size in bits 31 and 30 are zero
+        // Bit 23 specifies a 128-bit Load/Store
+        result = 0x80000000;
+    }
+    else if (size == EA_8BYTE)
+    {
+        // set the operation size in bits 31 and 30
+        result = 0x40000000;
+    }
+    else if (size == EA_4BYTE)
+    {
+        // set the operation size in bit 31
+        result = 0x00000000;
+    }
 
-        case IF_DV_4A: // DV_4A   .........X.mmmmm .aaaaannnnnddddd      Vd Va Vn Vm (scalar)
-            code     = emitInsCode(ins, fmt);
-            elemsize = id->idOpSize();
-            code |= insEncodeFloatElemsize(elemsize); // X
-            code |= insEncodeReg_Vd(id->idReg1());    // ddddd
-            code |= insEncodeReg_Vn(id->idReg2());    // nnnnn
-            code |= insEncodeReg_Vm(id->idReg3());    // mmmmm
-            code |= insEncodeReg_Va(id->idReg4());    // aaaaa
-            dst += emitOutput_Instr(dst, code);
-            break;
+    // Or in bit 26 to indicate a Vector register is used as 'target'
+    result |= 0x04000000;
 
-        case IF_SN_0A: // SN_0A   ................ ................
-        {
-            bool skipIns = false;
-#if FEATURE_LOOP_ALIGN
-            if (id->idIns() == INS_align)
-            {
-                // IG can be marked as not needing alignment after emitting align instruction.
-                // Alternatively, there are fewer align instructions needed than emitted.
-                // If that is the case, skip outputting alignment.
-                if (!ig->endsWithAlignInstr() || id->idIsEmptyAlign())
-                {
-                    skipIns = true;
-                }
+    return result;
+}
 
-#ifdef DEBUG
-                if (!ig->endsWithAlignInstr())
-                {
-                    // Validate if the state is correctly updated
-                    assert(id->idIsEmptyAlign());
-                }
-#endif
-                sz  = sizeof(instrDescAlign);
-                ins = INS_nop;
+/*****************************************************************************
+ *
+ *  Returns the encoding to set the size bit and the N bits for a 'bitfield' instruction
+ *
+ */
 
-#ifdef DEBUG
-                // Under STRESS_EMITTER, if this is the 'align' before the 'jmp' instruction,
-                // then add "bkpt" instruction.
-                instrDescAlign* alignInstr = (instrDescAlign*)id;
+/*static*/ emitter::code_t emitter::insEncodeDatasizeBF(emitter::code_t code, emitAttr size)
+{
+    // is bit 30 equal to 0?
+    if ((code & 0x40000000) == 0) // is the opcode one of extr, sxtb, sxth or sxtw
+    {
+        if (size == EA_8BYTE) // Do we need to set the sf and N bits?
+        {
+            return 0x80400000; // set the sf-bit at location 31 and the N-bit at location 22
+        }
+    }
+    return 0; // don't set any bits
+}
 
-                if (emitComp->compStressCompile(Compiler::STRESS_EMITTER, 50) && alignInstr->isPlacedAfterJmp &&
-                    !skipIns)
-                {
-                    // There is no good way to squeeze in "bkpt" as well as display it
-                    // in the disassembly because there is no corresponding instrDesc for
-                    // it. As such, leave it as is, the "0xD43E0000" bytecode will be seen
-                    // next to the nop instruction in disasm.
-                    // e.g. D43E0000          align   [4 bytes for IG07]
-                    ins = INS_BREAKPOINT;
-                    fmt = IF_SI_0A;
-                }
-#endif
-            }
-#endif // FEATURE_LOOP_ALIGN
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 64/128-bit datasize for an Arm64 vector instruction
+ */
 
-            if (!skipIns)
-            {
-                code = emitInsCode(ins, fmt);
-                dst += emitOutput_Instr(dst, code);
-            }
+/*static*/ emitter::code_t emitter::insEncodeVectorsize(emitAttr size)
+{
+    if (size == EA_16BYTE)
+    {
+        return 0x40000000; // set the bit at location 30
+    }
+    else
+    {
+        assert(size == EA_8BYTE);
+        return 0;
+    }
+}
 
-            break;
-        }
+/*****************************************************************************
+ *
+ *  Returns the encoding to select 'index' for an Arm64 vector elem instruction
+ */
+/*static*/ emitter::code_t emitter::insEncodeVectorIndex(emitAttr elemsize, ssize_t index)
+{
+    code_t bits = (code_t)index;
+    if (elemsize == EA_1BYTE)
+    {
+        bits <<= 1;
+        bits |= 1;
+    }
+    else if (elemsize == EA_2BYTE)
+    {
+        bits <<= 2;
+        bits |= 2;
+    }
+    else if (elemsize == EA_4BYTE)
+    {
+        bits <<= 3;
+        bits |= 4;
+    }
+    else
+    {
+        assert(elemsize == EA_8BYTE);
+        bits <<= 4;
+        bits |= 8;
+    }
+    assert((bits >= 1) && (bits <= 0x1f));
 
-        case IF_SI_0A: // SI_0A   ...........iiiii iiiiiiiiiii.....               imm16
-            imm = emitGetInsSC(id);
-            assert(isValidUimm16(imm));
-            code = emitInsCode(ins, fmt);
-            code |= ((code_t)imm << 5); // iiiii iiiiiiiiiii
-            dst += emitOutput_Instr(dst, code);
-            break;
+    return (bits << 16); // bits at locations [20,19,18,17,16]
+}
 
-        case IF_SI_0B: // SI_0B   ................ ....bbbb........               imm4 - barrier
-            imm = emitGetInsSC(id);
-            assert((imm >= 0) && (imm <= 15));
-            code = emitInsCode(ins, fmt);
-            code |= ((code_t)imm << 8); // bbbb
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Returns the encoding to select 'index2' for an Arm64 'ins' elem instruction
+ */
+/*static*/ emitter::code_t emitter::insEncodeVectorIndex2(emitAttr elemsize, ssize_t index2)
+{
+    code_t bits = (code_t)index2;
+    if (elemsize == EA_1BYTE)
+    {
+        // bits are correct
+    }
+    else if (elemsize == EA_2BYTE)
+    {
+        bits <<= 1;
+    }
+    else if (elemsize == EA_4BYTE)
+    {
+        bits <<= 2;
+    }
+    else
+    {
+        assert(elemsize == EA_8BYTE);
+        bits <<= 3;
+    }
+    assert((bits >= 0) && (bits <= 0xf));
 
-        case IF_SR_1A: // SR_1A   ................ ...........ttttt      Rt       (dc zva, mrs)
-            assert(insOptsNone(id->idInsOpt()));
-            code = emitInsCode(ins, fmt);
-            code |= insEncodeReg_Rt(id->idReg1()); // ttttt
-            dst += emitOutput_Instr(dst, code);
-            break;
+    return (bits << 11); // bits at locations [14,13,12,11]
+}
 
-        default:
-            dst = emitOutput_InstrSve(dst, id);
-            break;
-    }
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 'index' for an Arm64 'mul' by element instruction
+ */
+/*static*/ emitter::code_t emitter::insEncodeVectorIndexLMH(emitAttr elemsize, ssize_t index)
+{
+    code_t bits = 0;
 
-    // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref.
-    // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a
-    // GC ref to register "id->idReg1()".  (It may, apparently, also not be GC_NONE in other cases, such as
-    // for stores, but we ignore those cases here.)
-    if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref.
+    if (elemsize == EA_2BYTE)
     {
-        // We assume that "idReg1" is the primary destination register for all instructions
-        assert(!emitInsDestIsOp2(ins));
-        if (id->idGCref() != GCT_NONE)
+        assert((index >= 0) && (index <= 7));
+        if (index & 0x4)
         {
-            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+            bits |= (1 << 11); // set bit 11 'H'
         }
-        else
+        if (index & 0x2)
         {
-            emitGCregDeadUpd(id->idReg1(), dst);
+            bits |= (1 << 21); // set bit 21 'L'
         }
-
-        if (emitInsMayWriteMultipleRegs(id))
+        if (index & 0x1)
         {
-            // INS_ldp etc...
-            // "idReg2" is the secondary destination register
-            if (id->idGCrefReg2() != GCT_NONE)
-            {
-                emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst);
-            }
-            else
-            {
-                emitGCregDeadUpd(id->idReg2(), dst);
-            }
+            bits |= (1 << 20); // set bit 20 'M'
         }
     }
-
-SKIP_GC_UPDATE:
-    // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
-    // ref or overwritten one.
-    if (emitInsWritesToLclVarStackLoc(id) || emitInsWritesToLclVarStackLocPair(id))
+    else if (elemsize == EA_4BYTE)
     {
-        int      varNum = id->idAddr()->iiaLclVar.lvaVarNum();
-        unsigned ofs    = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
-        bool     FPbased;
-        int      adr = emitComp->lvaFrameAddress(varNum, &FPbased);
-        if (id->idGCref() != GCT_NONE)
-        {
-            emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst DEBUG_ARG(varNum));
-        }
-        else
+        assert((index >= 0) && (index <= 3));
+        if (index & 0x2)
         {
-            // If the type of the local is a gc ref type, update the liveness.
-            var_types vt;
-            if (varNum >= 0)
-            {
-                // "Regular" (non-spill-temp) local.
-                vt = var_types(emitComp->lvaTable[varNum].lvType);
-            }
-            else
-            {
-                TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum);
-                vt              = tmpDsc->tdTempType();
-            }
-            if (vt == TYP_REF || vt == TYP_BYREF)
-            {
-                emitGCvarDeadUpd(adr + ofs, dst DEBUG_ARG(varNum));
-            }
+            bits |= (1 << 11); // set bit 11 'H'
         }
-        if (emitInsWritesToLclVarStackLocPair(id))
+        if (index & 0x1)
         {
-            int      varNum2 = varNum;
-            int      adr2    = adr;
-            unsigned ofs2    = ofs;
-            unsigned ofs2Dist;
-
-            if (id->idIsLclVarPair())
-            {
-                bool FPbased2;
-
-                emitLclVarAddr* lclVarAddr2 = emitGetLclVarPairLclVar2(id);
-                varNum2                     = lclVarAddr2->lvaVarNum();
-                ofs2                        = lclVarAddr2->lvaOffset();
-
-                // If there are 2 GC vars in this instrDesc, get the 2nd variable
-                // that should be tracked.
-                adr2     = emitComp->lvaFrameAddress(varNum2, &FPbased2);
-                ofs2Dist = EA_SIZE_IN_BYTES(size);
-#ifdef DEBUG
-                assert(FPbased == FPbased2);
-                if (FPbased)
-                {
-                    assert(id->idReg3() == REG_FP);
-                }
-                else
-                {
-                    assert(id->idReg3() == REG_SP);
-                }
-                assert(varNum2 != -1);
-#endif // DEBUG
-            }
-            else
-            {
-                ofs2Dist = TARGET_POINTER_SIZE;
-                ofs2 += ofs2Dist;
-            }
-
-            ofs2 = AlignDown(ofs2, ofs2Dist);
-
-            if (id->idGCrefReg2() != GCT_NONE)
-            {
-#ifdef DEBUG
-                if (id->idGCref() != GCT_NONE)
-                {
-                    // If 1st register was a gc-var, then make sure the offset
-                    // are correctly set for the 2nd register that is holding
-                    // another gc-var.
-                    assert((adr + ofs + ofs2Dist) == (adr2 + ofs2));
-                }
-#endif
-                emitGCvarLiveUpd(adr2 + ofs2, varNum2, id->idGCrefReg2(), dst DEBUG_ARG(varNum2));
-            }
-            else
-            {
-                // If the type of the local is a gc ref type, update the liveness.
-                var_types vt;
-                if (varNum2 >= 0)
-                {
-                    // "Regular" (non-spill-temp) local.
-                    vt = var_types(emitComp->lvaTable[varNum2].lvType);
-                }
-                else
-                {
-                    TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum2);
-                    vt              = tmpDsc->tdTempType();
-                }
-                if (vt == TYP_REF || vt == TYP_BYREF)
-                {
-                    emitGCvarDeadUpd(adr2 + ofs2, dst DEBUG_ARG(varNum2));
-                }
-            }
+            bits |= (1 << 21); // set bit 21 'L'
         }
     }
+    else
+    {
+        assert(!"Invalid 'elemsize' value");
+    }
 
-#ifdef DEBUG
-    /* Make sure we set the instruction descriptor size correctly */
-
-    size_t expected = emitSizeOfInsDsc(id);
-    assert(sz == expected);
+    return bits;
+}
 
-    if (emitComp->opts.disAsm || emitComp->verbose)
+/*****************************************************************************
+ *
+ *  Returns the encoding for a shift instruction, ready for insertion into an instruction.
+ */
+/*static*/ emitter::code_t emitter::insEncodeShiftImmediate(emitAttr size, bool isRightShift, ssize_t shiftAmount)
+{
+    if (isRightShift)
     {
-        emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
+        // The right shift amount must be in the range 1 to the destination element width in bits.
+        assert((shiftAmount > 0) && (shiftAmount <= getBitWidth(size)));
+        return (code_t)(2 * getBitWidth(size) - shiftAmount);
     }
-
-    if (emitComp->compDebugBreak)
+    else
     {
-        // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
-        // emitting instruction a6, (i.e. IN00a6 in jitdump).
-        if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
-        {
-            assert(!"JitBreakEmitOutputInstr reached");
-        }
+        // The left shift amount must in the range 0 to the element width in bits minus 1.
+        assert(shiftAmount < getBitWidth(size));
+        return (code_t)(getBitWidth(size) + shiftAmount);
     }
+}
 
-    // Output any delta in GC info.
-    if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC)
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 vector instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeElemsize(emitAttr size)
+{
+    if (size == EA_8BYTE)
     {
-        emitDispGCInfoDelta();
+        return 0x00C00000; // set the bit at location 23 and 22
     }
-#else
-    if (emitComp->opts.disAsm)
+    else if (size == EA_4BYTE)
     {
-        size_t expected = emitSizeOfInsDsc(id);
-        assert(sz == expected);
-        emitDispIns(id, false, 0, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
+        return 0x00800000; // set the bit at location 23
     }
-#endif
-
-    /* All instructions are expected to generate code */
-
-    assert(*dp != dst || id->idIsEmptyAlign());
-
-    *dp = dst;
-
-    return sz;
+    else if (size == EA_2BYTE)
+    {
+        return 0x00400000; // set the bit at location 22
+    }
+    assert(size == EA_1BYTE);
+    return 0x00000000;
 }
 
 /*****************************************************************************
  *
- *  Append the machine code corresponding to the given SVE instruction descriptor.
+ *  Returns the encoding to select the 4/8 byte elemsize for an Arm64 float vector instruction
  */
-BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
-{
-    code_t      code = 0;
-    instruction ins  = id->idIns();
-    insFormat   fmt  = id->idInsFmt();
-    emitAttr    size = id->idOpSize();
-
-    ssize_t imm;
 
-    switch (fmt)
+/*static*/ emitter::code_t emitter::insEncodeFloatElemsize(emitAttr size)
+{
+    if (size == EA_8BYTE)
     {
-        // Scalable.
-        case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated)
-        case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated)
-        case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated)
-        case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated)
-        case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated)
-        case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated)
-        case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords)
-        case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated)
-        case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords)
-        case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated)
-        case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords)
-        case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated)
-        case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated)
-        case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated)
-        case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated)
-        case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements
-        case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector
-        case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar
-        case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector
-                           // (predicated)
-        case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register
-        case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements
-        case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated)
-        case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long
-        case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic
-        case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated)
-        case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract
-        case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left
-                           // (predicated)
-        case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations
-        case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords)
-        case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction
-        case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated)
-        case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated)
-        case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value
-        case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                   // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                     // mmmmm or nnnnn
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        // Scalable with Merge or Zero predicate
-        case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                     // nnnnn
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                   // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                     // ddddd
-            code |= insEncodePredQualifier_16(id->idPredicateReg2Merge());   // M
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
+        return 0x00400000; // set the bit at location 22
+    }
+    assert(size == EA_4BYTE);
+    return 0x00000000;
+}
 
-        // Scalable with shift immediate
-        case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated)
+// Returns the encoding to select the index for an Arm64 float vector by element instruction
+/*static*/ emitter::code_t emitter::insEncodeFloatIndex(emitAttr elemsize, ssize_t index)
+{
+    code_t result = 0x00000000;
+    if (elemsize == EA_8BYTE)
+    {
+        assert((index >= 0) && (index <= 1));
+        if (index == 1)
         {
-            bool isRightShift = emitInsIsVectorRightShift(ins);
-            imm               = emitGetInsSC(id);
-            code              = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
-            code |=
-                insEncodeSveShift_23_to_22_9_to_0(optGetSveElemsize(id->idInsOpt()), isRightShift, imm); // xx, xxiii
-            dst += emitOutput_Instr(dst, code);
+            result |= 0x00000800; // 'H' - set the bit at location 11
         }
-        break;
-
-        // Scalable, 4 regs. Reg4 in mmmmm.
-        case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend
-                           // (predicated)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                   // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                     // nnnnn
-            code |= insEncodeReg_V_20_to_16(id->idReg4());                   // mmmmm
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        // Scalable, 4 regs. Reg4 in aaaaa.
-        case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand
-                           // (predicated)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                   // ggg
-            code |= insEncodeReg_V_20_to_16(id->idReg3());                   // mmmmm
-            code |= insEncodeReg_V_9_to_5(id->idReg4());                     // aaaaa
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        // Scalable, 3 regs, no predicates
-        case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated)
-        case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
-        case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high
-                           // (unpredicated)
-        case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated)
-        case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_V_9_to_5(id->idReg2());                     // nnnnn
-            code |= insEncodeReg_V_20_to_16(id->idReg3());                   // mmmmm
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        // Scalable, 3 regs, no predicates. General purpose source registers
-        case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register
-                           // increment)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_Rn(id->idReg2());                           // nnnnn
-            code |= insEncodeReg_Rm(id->idReg3());                           // mmmmm
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        // Immediate and patterm to general purpose.
-        case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_Rd(id->idReg1());           // ddddd
-            code |= insEncodeSvePattern(id->idSvePattern()); // ppppp
-            code |= insEncodeUimm4From1_19_to_16(imm);       // iiii
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());                  // DDDD
-            code |= insEncodeReg_P_8_to_5(id->idReg2());                  // NNNN
-            code |= insEncodeReg_P_19_to_16(id->idReg3());                // MMMM
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_CJ_2A: // ........xx...... .......nnnn.dddd -- SVE reverse predicate elements
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());                  // DDDD
-            code |= insEncodeReg_P_8_to_5(id->idReg2());                  // NNNN
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD
-            code |= insEncodeReg_P_8_to_5(id->idReg2()); // NNNN
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        // Scalable to general register.
-        case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register
-        case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_Rd(id->idReg1());                           // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                   // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                     // mmmmm
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        // Scalable from general register.
-        case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                   // ggg
-            code |= insEncodeReg_Rn(id->idReg3());                           // mmmmm
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_CT_3A: // ................ ...gggnnnnnddddd -- SVE reverse doublewords
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());   // nnnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
-        case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                  // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                // VVV
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                  // nnnnn/mmmmm
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_CX_4A:   // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
-        case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
-        case IF_SVE_GE_4A:   // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
-        case IF_SVE_HT_4A:   // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());                  // DDDD
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                  // mmmmm
-            code |= insEncodeReg_V_20_to_16(id->idReg4());                // nnnnn
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());                  // DDDD
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                  // nnnnn
-            code |= insEncodeSimm5_20_to_16(imm);                         // iiiii
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());                  // DDDD
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                  // nnnnn
-            code |= insEncodeUimm7_20_to_14(imm);                         // iiiii
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed)
-        case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed)
-        case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed)
-        case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
-        case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
-        case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
-        case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
-        case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
-        case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());       // ddddd
-            code |= insEncodeReg_V_9_to_5(id->idReg2());       // nnnnn
-            code |= insEncodeReg_V_18_to_16(id->idReg3());     // mmm
-            code |= insEncodeUimm2_20_to_19(emitGetInsSC(id)); // ii
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
-        case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
-        case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
-        case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
-        case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ddddd
-            code |= insEncodeReg_V_9_to_5(id->idReg2());   // nnnnn
-            code |= insEncodeReg_V_18_to_16(id->idReg3()); // mmm
-            code |= insEncodeUimm2_20_to_19(imm & 0b11);   // ii
-            code |= insEncodeImm1_22(imm >> 2);            // i
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
-        case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
-        case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
-        case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ddddd
-            code |= insEncodeReg_V_9_to_5(id->idReg2());   // nnnnn
-            code |= insEncodeImm1_11(imm & 1);             // i
-            code |= insEncodeReg_V_18_to_16(id->idReg3()); // mmm
-            code |= insEncodeUimm2_20_to_19(imm >> 1);     // ii
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
-        case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
-        case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
-        case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ddddd
-            code |= insEncodeReg_V_9_to_5(id->idReg2());   // nnnnn
-            code |= insEncodeImm1_11(imm & 1);             // i
-            code |= insEncodeReg_V_19_to_16(id->idReg3()); // mmmm
-            code |= insEncodeUimm2_20_to_19(imm & 0b10);   // i
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
-        case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
-        case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
-        case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
-        case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ddddd
-            code |= insEncodeReg_V_9_to_5(id->idReg2());   // nnnnn
-            code |= insEncodeReg_V_19_to_16(id->idReg3()); // mmmm
-
-            // index is encoded at bit location 20;
-            // left-shift by one bit so we can reuse insEncodeUimm2_20_to_19 without modifying bit location 19
-            code |= insEncodeUimm2_20_to_19(emitGetInsSC(id) << 1); // i
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-        case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition
+    }
+    else
+    {
+        assert(elemsize == EA_4BYTE);
+        assert((index >= 0) && (index <= 3));
+        if (index & 2)
         {
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());   // DDDD
-            code |= insEncodeReg_P_13_to_10(id->idReg2()); // gggg
-            code |= insEncodeReg_P_8_to_5(id->idReg3());   // NNNN
-
-            regNumber regm;
-            switch (ins)
-            {
-                case INS_sve_mov:
-                case INS_sve_movs:
-                    regm = id->idReg3();
-                    break;
-
-                case INS_sve_not:
-                case INS_sve_nots:
-                    regm = id->idReg2();
-                    break;
-
-                default:
-                    regm = id->idReg4();
-            }
-
-            code |= insEncodeReg_P_19_to_16(regm); // MMMM
-            dst += emitOutput_Instr(dst, code);
-            break;
+            result |= 0x00000800; // 'H' - set the bit at location 11
         }
+        if (index & 1)
+        {
+            result |= 0x00200000; // 'L' - set the bit at location 21
+        }
+    }
+    return result;
+}
 
-        case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-        case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());   // DDDD
-            code |= insEncodeReg_P_13_to_10(id->idReg2()); // NNNN
-            code |= insEncodeReg_P_8_to_5(id->idReg2());   // NNNN
-            code |= insEncodeReg_P_19_to_16(id->idReg2()); // NNNN
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());   // DDDD
-            code |= insEncodeReg_P_13_to_10(id->idReg2()); // gggg
-            code |= insEncodeReg_P_8_to_5(id->idReg3());   // NNNN
-            code |= insEncodeReg_P_19_to_16(id->idReg1()); // DDDD
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DB_3A: // ................ ..gggg.NNNNMDDDD -- SVE partition break condition
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());                  // DDDD
-            code |= insEncodeReg_P_13_to_10(id->idReg2());                // gggg
-            code |= insEncodeReg_P_8_to_5(id->idReg3());                  // NNNN
-            code |= insEncodePredQualifier_4(id->idPredicateReg2Merge()); // M
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DB_3B: // ................ ..gggg.NNNN.DDDD -- SVE partition break condition
-        case IF_SVE_DC_3A: // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());   // DDDD
-            code |= insEncodeReg_P_13_to_10(id->idReg2()); // gggg
-            code |= insEncodeReg_P_8_to_5(id->idReg3());   // NNNN
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active
-        case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD
-            code |= insEncodeReg_P_8_to_5(id->idReg2()); // gggg
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeSvePattern(id->idSvePattern());              // ppppp
-            code |= insEncodeReg_P_3_to_0(id->idReg1());                  // DDDD
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());                  // DDDD
-            code |= insEncodeReg_P_8_to_5(id->idReg2());                  // VVVV
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated)
-        case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_13_to_10(id->idReg1()); // gggg
-            code |= insEncodeReg_P_8_to_5(id->idReg2());   // NNNN
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DK_3A: // ........xx...... ..gggg.NNNNddddd -- SVE predicate count
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_R_4_to_0(id->idReg1());                  // ddddd
-            code |= insEncodeReg_P_13_to_10(id->idReg2());                // gggg
-            code |= insEncodeReg_P_8_to_5(id->idReg3());                  // NNNN
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow
-            imm = emitGetInsSC(id);
-            assert(id->idInsOpt() == INS_OPTS_SCALABLE_H);
-            assert(emitInsIsVectorRightShift(id->idIns()));
-            assert(isValidVectorShiftAmount(imm, EA_4BYTE, /* rightShift */ true));
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeVectorShift(EA_4BYTE, true /* right-shift */, imm); // iiii
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                         // ddddd
-            code |= insEncodeReg_V_9_to_6_Times_Two(id->idReg2());               // nnnn
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeVectorLengthSpecifier(id);                      // l
-            code |= insEncodeReg_R_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_P_8_to_5(id->idReg2());                     // NNNN
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_R_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_P_8_to_5(id->idReg2());                     // MMMM
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
-        case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_P_8_to_5(id->idReg2());                     // MMMM
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_R_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_P_8_to_5(id->idReg2());                     // MMMM
-            code |= insEncodeVLSElemsize(id->idOpSize());                    // X
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise
-            code = emitInsCodeSve(ins, fmt);
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_8_to_5(id->idReg1()); // NNNN
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_R_9_to_5(id->idReg1());       // nnnnn
-            code |= insEncodeReg_R_20_to_16(id->idReg2());     // mmmmm
-            code |= insEncodeSveElemsize_R_22(id->idOpSize()); // x
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                                           // ddddd
-            code |= insEncodeReg_V_9_to_5(id->idReg2());                                           // nnnnn
-            code |= insEncodeSveElemsize_tszh_22_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx
-                                                                                                   // x
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd
-            code |= insEncodeReg_V_9_to_5(id->idReg2()); // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());                     // DDDD
-            code |= insEncodeReg_R_9_to_5(id->idReg2());                     // nnnnn
-            code |= (id->idOpSize() == EA_8BYTE) ? (1 << 12) : 0;            // X
-            code |= insEncodeReg_R_20_to_16(id->idReg3());                   // mmmmm
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
-        case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());                  // DDDD
-            code |= insEncodeReg_P_7_to_5(id->idReg2());                  // NNN
-            code |= insEncodeUimm2_9_to_8(emitGetInsSC(id));              // ii (or i)
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate
-                           // pair)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_1(id->idReg1());                     // DDD
-            code |= insEncodeReg_R_9_to_5(id->idReg2());                     // nnnnn
-            code |= insEncodeReg_R_20_to_16(id->idReg3());                   // mmmmm
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit
-                           // (predicate-as-counter)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeVectorLengthSpecifier(id);                   // l
-            code |= insEncodeReg_P_2_to_0(id->idReg1());                  // DDD
-            code |= insEncodeReg_R_9_to_5(id->idReg2());                  // nnnnn
-            code |= insEncodeReg_R_20_to_16(id->idReg3());                // mmmmm
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the vector elemsize for an Arm64 ld/st# vector instruction
+ */
 
-        case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_2_to_0(id->idReg1());                  // DDD
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
+/*static*/ emitter::code_t emitter::insEncodeVLSElemsize(emitAttr size)
+{
+    code_t result = 0x00000000;
 
-        case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated)
-        case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated)
-        case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated)
+    switch (size)
+    {
+        case EA_1BYTE:
         {
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                  // ddddd
-            code |= insEncodeImm8_12_to_5(imm);                           // iiiiiiii
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
+            result |= 0x0000; // clear bits 10 and 11
             break;
         }
 
-        case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
-        case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
-        case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
+        case EA_2BYTE:
         {
-            const ssize_t imm   = emitGetInsSC(id);
-            const ssize_t rot   = (imm & 0b11);
-            const ssize_t index = (imm >> 2);
-            code                = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ddddd
-            code |= insEncodeReg_V_9_to_5(id->idReg2());   // nnnnn
-            code |= insEncodeReg_V_18_to_16(id->idReg3()); // mmm
-            code |= insEncodeUimm2_11_to_10(rot);          // rr
-            code |= insEncodeUimm2_20_to_19(index);        // ii
-            dst += emitOutput_Instr(dst, code);
+            result |= 0x0400; // set bit at location 10, clear bit at location 11
             break;
         }
 
-        case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
-        case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
-        case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
+        case EA_4BYTE:
         {
-            const ssize_t imm   = emitGetInsSC(id);
-            const ssize_t rot   = (imm & 0b11);
-            const ssize_t index = (imm >> 2);
-            code                = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ddddd
-            code |= insEncodeReg_V_9_to_5(id->idReg2());   // nnnnn
-            code |= insEncodeReg_V_19_to_16(id->idReg3()); // mmmm
-            code |= insEncodeUimm2_11_to_10(rot);          // rr
-
-            // index is encoded at bit location 20;
-            // left-shift by one bit so we can reuse insEncodeUimm2_20_to_19 without modifying bit location 19
-            code |= insEncodeUimm2_20_to_19(index << 1); // i
-            dst += emitOutput_Instr(dst, code);
+            result |= 0x0800; // clear bit at location 10, set bit at location 11
             break;
         }
 
-        case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated)
-        case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated)
+        case EA_8BYTE:
         {
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                  // ddddd
-            code |= insEncodeImm8_12_to_5(imm);                           // iiiiiiii
-            code |= (id->idOptionalShift() ? 0x2000 : 0);                 // h
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
+            result |= 0x0C00; // set bits at location 10 and 11
             break;
         }
 
-        case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated)
-            // ins is MOV for this encoding, as it is the preferred disassembly, so pass FMOV to emitInsCodeSve
-            code = emitInsCodeSve(INS_sve_fmov, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                  // ddddd
-            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
-
-        case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());                     // DDDD
-            code |= insEncodeReg_R_9_to_5(id->idReg2());                     // nnnnn
-            code |= insEncodeReg_R_20_to_16(id->idReg3());                   // mmmmm
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
+        default:
+        {
+            assert(!"Invalid element size");
             break;
+        }
+    }
 
-        case IF_SVE_IH_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
-                             // immediate)
-        case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
-                             // immediate)
-        case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
-                             // immediate)
-        case IF_SVE_IJ_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate)
-        case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
-                             // immediate)
-        case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
-                             // immediate)
-        case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
-                             // immediate)
-        case IF_SVE_IM_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus
-                             // immediate)
-        case IF_SVE_IO_3A:   // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus
-                             // immediate)
-        case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
-                           // immediate)
-        case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate)
-        case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
-                           // immediate)
-        case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
-                           // immediate)
-        case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-        case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-        case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate)
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ttttt
-            code |= insEncodeReg_R_9_to_5(id->idReg3());   // nnnnn
-            code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
-
-            switch (ins)
-            {
-                case INS_sve_ld2b:
-                case INS_sve_ld2h:
-                case INS_sve_ld2w:
-                case INS_sve_ld2d:
-                case INS_sve_ld2q:
-                case INS_sve_st2b:
-                case INS_sve_st2h:
-                case INS_sve_st2w:
-                case INS_sve_st2d:
-                case INS_sve_st2q:
-                    code |= insEncodeSimm4_MultipleOf2_19_to_16(imm); // iiii
-                    break;
-
-                case INS_sve_ld3b:
-                case INS_sve_ld3h:
-                case INS_sve_ld3w:
-                case INS_sve_ld3d:
-                case INS_sve_ld3q:
-                case INS_sve_st3b:
-                case INS_sve_st3h:
-                case INS_sve_st3w:
-                case INS_sve_st3d:
-                case INS_sve_st3q:
-                    code |= insEncodeSimm4_MultipleOf3_19_to_16(imm); // iiii
-                    break;
-
-                case INS_sve_ld4b:
-                case INS_sve_ld4h:
-                case INS_sve_ld4w:
-                case INS_sve_ld4d:
-                case INS_sve_ld4q:
-                case INS_sve_st4b:
-                case INS_sve_st4h:
-                case INS_sve_st4w:
-                case INS_sve_st4d:
-                case INS_sve_st4q:
-                    code |= insEncodeSimm4_MultipleOf4_19_to_16(imm); // iiii
-                    break;
-
-                case INS_sve_ld1rqb:
-                case INS_sve_ld1rqd:
-                case INS_sve_ld1rqh:
-                case INS_sve_ld1rqw:
-                    code |= insEncodeSimm4_MultipleOf16_19_to_16(imm); // iiii
-                    break;
+    return result;
+}
 
-                case INS_sve_ld1rob:
-                case INS_sve_ld1rod:
-                case INS_sve_ld1roh:
-                case INS_sve_ld1row:
-                    code |= insEncodeSimm4_MultipleOf32_19_to_16(imm); // iiii
-                    break;
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the index for an Arm64 ld/st# vector by element instruction
+ */
 
-                default:
-                    code |= insEncodeSimm4_19_to_16(imm); // iiii
-                    break;
-            }
+/*static*/ emitter::code_t emitter::insEncodeVLSIndex(emitAttr size, ssize_t index)
+{
+    code_t result = 0x00000000;
 
-            if (canEncodeSveElemsize_dtype(ins))
-            {
-                if (ins == INS_sve_ld1w)
-                {
-                    code = insEncodeSveElemsize_dtype_ld1w(ins, fmt, optGetSveElemsize(id->idInsOpt()), code);
-                }
-                else
-                {
-                    code = insEncodeSveElemsize_dtype(ins, optGetSveElemsize(id->idInsOpt()), code);
-                }
-            }
+    switch (size)
+    {
+        case EA_1BYTE:
+        {
+            // Q  = ?   - bit location 30
+            // xx = 00  - bit location 14 and 15
+            // S = ?    - bit location 12
+            // ss = ?0  - bit location 10 and 11
 
-            dst += emitOutput_Instr(dst, code);
+            result |= (index & 0x8) << 27;
+            result |= (index & 0x4) << 10;
+            result |= (index & 0x3) << 10;
             break;
+        }
 
-        case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                              // ttttt
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                            // ggg
-            code |= insEncodeReg_R_9_to_5(id->idReg3());                              // nnnnn
-            code |= insEncodeReg_R_20_to_16(id->idReg4());                            // mmmmm
-            code |= insEncodeSveElemsize_22_to_21(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
+        case EA_2BYTE:
+        {
+            // Q  = ?   - bit location 30
+            // xx = 01  - bit location 14 and 15
+            // S = ?    - bit location 12
+            // ss = ??  - bit location 10 and 11
 
-        case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                           // ttttt
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                         // ggg
-            code |= insEncodeReg_R_9_to_5(id->idReg3());                           // nnnnn
-            code |= insEncodeReg_R_20_to_16(id->idReg4());                         // mmmmm
-            code |= insEncodeSveElemsize_sz_21(optGetSveElemsize(id->idInsOpt())); // x
-            dst += emitOutput_Instr(dst, code);
+            result |= (index & 0x4) << 28;
+            result |= 0x4000;
+            result |= (index & 0x2) << 11;
+            result |= (index & 0x1) << 11;
             break;
+        }
 
-        case IF_SVE_JJ_4A:   // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             //                     // offsets)
-        case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
-                           // offsets)
-        case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit
-                             // unscaled offsets)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ttttt
-            code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
-            code |= insEncodeReg_R_9_to_5(id->idReg3());   // nnnnn
-            code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm
-
-            switch (id->idInsOpt())
-            {
-                case INS_OPTS_SCALABLE_S_SXTW:
-                case INS_OPTS_SCALABLE_D_SXTW:
-                    code |= (1 << 14); // h
-                    break;
-
-                default:
-                    break;
-            }
+        case EA_4BYTE:
+        {
+            // Q  = ?   - bit location 30
+            // xx = 10  - bit location 14 and 15
+            // S = ?    - bit location 12
+            // ss = 00  - bit location 10 and 11
 
-            dst += emitOutput_Instr(dst, code);
+            result |= (index & 0x2) << 29;
+            result |= 0x8000;
+            result |= (index & 0x1) << 12;
             break;
+        }
 
-        case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                              // ttttt
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                            // ggg
-            code |= insEncodeReg_R_9_to_5(id->idReg3());                              // nnnnn
-            code |= insEncodeSimm4_19_to_16(imm);                                     // iiii
-            code |= insEncodeSveElemsize_22_to_21(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
-            break;
+        case EA_8BYTE:
+        {
+            // Q  = ?   - bit location 30
+            // xx = 10  - bit location 14 and 15
+            // S = 0    - bit location 12
+            // ss = 01  - bit location 10 and 11
 
-        case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                           // ttttt
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                         // ggg
-            code |= insEncodeReg_R_9_to_5(id->idReg3());                           // nnnnn
-            code |= insEncodeSimm4_19_to_16(imm);                                  // iiii
-            code |= insEncodeSveElemsize_sz_21(optGetSveElemsize(id->idInsOpt())); // x
-            dst += emitOutput_Instr(dst, code);
+            result |= (index & 0x1) << 30;
+            result |= 0x8400;
             break;
+        }
 
-        case IF_SVE_HW_4A:   // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_IU_4A:   // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ttttt
-            code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
-            code |= insEncodeReg_R_9_to_5(id->idReg3());   // nnnnn
-            code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm
-
-            switch (id->idInsOpt())
-            {
-                case INS_OPTS_SCALABLE_S_SXTW:
-                case INS_OPTS_SCALABLE_D_SXTW:
-                    code |= (1 << 22); // h
-                    break;
-
-                default:
-                    break;
-            }
-
-            dst += emitOutput_Instr(dst, code);
+        default:
+        {
+            assert(!"Invalid element size");
             break;
+        }
+    }
 
-        case IF_SVE_HW_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ttttt
-            code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
-            code |= insEncodeReg_R_9_to_5(id->idReg3());   // nnnnn
-            code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm
-            dst += emitOutput_Instr(dst, code);
-            break;
+    return result;
+}
 
-        case IF_SVE_IF_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
-                             // scalar)
-        case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
-                             // scalar)
-        case IF_SVE_IW_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar)
-        case IF_SVE_IX_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus
-                             // scalar)
-        case IF_SVE_IY_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar)
-        case IF_SVE_IZ_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
-                             // scalar)
-        case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
-                             // scalar)
-        case IF_SVE_JA_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus
-                             // scalar)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ttttt
-            code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());   // nnnnn
-            code |= insEncodeReg_R_20_to_16(id->idReg4()); // mmmmm
-            dst += emitOutput_Instr(dst, code);
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the fcvt operation for Arm64 instructions
+ */
+/*static*/ emitter::code_t emitter::insEncodeConvertOpt(insFormat fmt, insOpts conversion)
+{
+    code_t result = 0;
+    switch (conversion)
+    {
+        case INS_OPTS_S_TO_D: // Single to Double
+            assert(fmt == IF_DV_2J);
+            result = 0x00008000; // type=00, opc=01
             break;
 
-        case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
-        case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ttttt
-            code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
-            code |= insEncodeReg_R_9_to_5(id->idReg3());   // nnnnn
-            code |= insEncodeReg_R_20_to_16(id->idReg4()); // mmmmm
-
-            if (canEncodeSveElemsize_dtype(ins))
-            {
-                if (ins == INS_sve_ld1w)
-                {
-                    code = insEncodeSveElemsize_dtype_ld1w(ins, fmt, optGetSveElemsize(id->idInsOpt()), code);
-                }
-                else
-                {
-                    code = insEncodeSveElemsize_dtype(ins, optGetSveElemsize(id->idInsOpt()), code);
-                }
-            }
-
-            dst += emitOutput_Instr(dst, code);
+        case INS_OPTS_D_TO_S: // Double to Single
+            assert(fmt == IF_DV_2J);
+            result = 0x00400000; // type=01, opc=00
             break;
 
-        case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar)
-        case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
-        case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
-        case IF_SVE_IK_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar)
-        case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar)
-        case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
-                           // scalar)
-        case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar)
-        case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
-                           // scalar)
-        case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar)
-        case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-        case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-        case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
-                           // scalar)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ttttt
-            code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
-            code |= insEncodeReg_R_9_to_5(id->idReg3());   // nnnnn
-            code |= insEncodeReg_R_20_to_16(id->idReg4()); // mmmmm
-            dst += emitOutput_Instr(dst, code);
+        case INS_OPTS_H_TO_S: // Half to Single
+            assert(fmt == IF_DV_2J);
+            result = 0x00C00000; // type=11, opc=00
             break;
 
-        case IF_SVE_IU_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_JJ_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
-                           // offsets)
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ttttt
-            code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
-            code |= insEncodeReg_R_9_to_5(id->idReg3());   // nnnnn
-            code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm
-            dst += emitOutput_Instr(dst, code);
+        case INS_OPTS_H_TO_D: // Half to Double
+            assert(fmt == IF_DV_2J);
+            result = 0x00C08000; // type=11, opc=01
             break;
 
-        case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated)
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                   // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                     // mmmmm
-            code |= insEncodeSveImm90_or_270_rot(imm);                       // r
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
+        case INS_OPTS_S_TO_H: // Single to Half
+            assert(fmt == IF_DV_2J);
+            result = 0x00018000; // type=00, opc=11
             break;
 
-        case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated)
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                   // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                     // nnnnn
-            code |= insEncodeReg_V_20_to_16(id->idReg4());                   // mmmmm
-            code |= insEncodeSveImm0_to_270_rot(imm);                        // rr
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
+        case INS_OPTS_D_TO_H: // Double to Half
+            assert(fmt == IF_DV_2J);
+            result = 0x00418000; // type=01, opc=11
             break;
 
-        case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());                     // DDDD
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                   // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                     // nnnnn
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
+        case INS_OPTS_S_TO_4BYTE: // Single to INT32
+            assert(fmt == IF_DV_2H);
+            result = 0x00000000; // sf=0, type=00
             break;
 
-        case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate
-                           // (predicated)
-            {
-                imm  = emitGetInsSC(id);
-                code = emitInsCodeSve(ins, fmt);
-                code |= insEncodeReg_V_4_to_0(id->idReg1());                     // ddddd
-                code |= insEncodeReg_P_12_to_10(id->idReg2());                   // ggg
-                code |= insEncodeSveSmallFloatImm(imm);                          // i
-                code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-                dst += emitOutput_Instr(dst, code);
-            }
+        case INS_OPTS_D_TO_4BYTE: // Double to INT32
+            assert(fmt == IF_DV_2H);
+            result = 0x00400000; // sf=0, type=01
             break;
 
-        case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_V_9_to_5(id->idReg2());                     // mmmmm
-            code |= insEncodeUimm3_18_to_16(imm);                            // iii
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
+        case INS_OPTS_S_TO_8BYTE: // Single to INT64
+            assert(fmt == IF_DV_2H);
+            result = 0x80000000; // sf=1, type=00
             break;
 
-        case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                              // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                            // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                              // nnnnn
-            code |= insEncodeSveElemsize_18_to_17(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
+        case INS_OPTS_D_TO_8BYTE: // Double to INT64
+            assert(fmt == IF_DV_2H);
+            result = 0x80400000; // sf=1, type=01
             break;
 
-        case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());   // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());   // nnnnn
-            code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm
-            dst += emitOutput_Instr(dst, code);
+        case INS_OPTS_4BYTE_TO_S: // INT32 to Single
+            assert(fmt == IF_DV_2I);
+            result = 0x00000000; // sf=0, type=00
             break;
 
-        case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing
-                           // multiplicand
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());                     // ddddd
-            code |= insEncodeReg_P_12_to_10(id->idReg2());                   // ggg
-            code |= insEncodeReg_V_9_to_5(id->idReg3());                     // mmmmm
-            code |= insEncodeReg_V_20_to_16(id->idReg4());                   // aaaaa
-            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
-            dst += emitOutput_Instr(dst, code);
+        case INS_OPTS_4BYTE_TO_D: // INT32 to Double
+            assert(fmt == IF_DV_2I);
+            result = 0x00400000; // sf=0, type=01
             break;
 
-        case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register
-        case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_P_3_to_0(id->idReg1());          // TTTT
-            code |= insEncodeReg_R_9_to_5(id->idReg2());          // nnnnn
-            code |= insEncodeSimm9h9l_21_to_16_and_12_to_10(imm); // iii
-                                                                  // iiiiii
-            dst += emitOutput_Instr(dst, code);
+        case INS_OPTS_8BYTE_TO_S: // INT64 to Single
+            assert(fmt == IF_DV_2I);
+            result = 0x80000000; // sf=1, type=00
             break;
 
-        case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register
-        case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register
-            imm  = emitGetInsSC(id);
-            code = emitInsCodeSve(ins, fmt);
-            code |= insEncodeReg_V_4_to_0(id->idReg1());          // ttttt
-            code |= insEncodeReg_R_9_to_5(id->idReg2());          // nnnnn
-            code |= insEncodeSimm9h9l_21_to_16_and_12_to_10(imm); // iii
-                                                                  // iiiiii
-            dst += emitOutput_Instr(dst, code);
+        case INS_OPTS_8BYTE_TO_D: // INT64 to Double
+            assert(fmt == IF_DV_2I);
+            result = 0x80400000; // sf=1, type=01
             break;
 
         default:
-            assert(!"Unexpected format");
+            assert(!"Invalid 'conversion' value");
             break;
     }
-
-    return dst;
+    return result;
 }
 
-/*****************************************************************************/
-/*****************************************************************************/
-
 /*****************************************************************************
  *
- *  Display a comma
+ *  Returns the encoding to have the Rn register be updated Pre/Post indexed
+ *  or not updated
  */
-void emitter::emitDispComma()
-{
-    printf(", ");
-}
 
-/*****************************************************************************
- *
- *  Display the instruction name
- */
-void emitter::emitDispInst(instruction ins)
+/*static*/ emitter::code_t emitter::insEncodeIndexedOpt(insOpts opt)
 {
-    const char* insstr = codeGen->genInsName(ins);
-    size_t      len    = strlen(insstr);
-
-    /* Display the instruction name */
-
-    printf("%s", insstr);
+    assert(emitter::insOptsNone(opt) || emitter::insOptsIndexed(opt));
 
-    //
-    // Add at least one space after the instruction name
-    // and add spaces until we have reach the normal size of 8
-    do
+    if (emitter::insOptsIndexed(opt))
+    {
+        if (emitter::insOptsPostIndex(opt))
+        {
+            return 0x00000400; // set the bit at location 10
+        }
+        else
+        {
+            assert(emitter::insOptsPreIndex(opt));
+            return 0x00000C00; // set the bit at location 10 and 11
+        }
+    }
+    else
     {
-        printf(" ");
-        len++;
-    } while (len < 8);
+        assert(emitter::insOptsNone(opt));
+        return 0; // bits 10 and 11 are zero
+    }
 }
 
 /*****************************************************************************
  *
- *  Display an immediate value
+ *  Returns the encoding for a ldp/stp instruction to have the Rn register
+ *  be updated Pre/Post indexed or not updated
  */
-void emitter::emitDispImm(ssize_t imm, bool addComma, bool alwaysHex /* =false */, bool isAddrOffset /* =false */)
-{
-    if (isAddrOffset)
-    {
-        alwaysHex = true;
-    }
-    else if (imm == 0)
-    {
-        // Non-offset values of zero are never displayed as hex.
-        alwaysHex = false;
-    }
-
-    if (strictArmAsm)
-    {
-        printf("#");
-    }
 
-    // Munge any pointers if we want diff-able disassembly.
-    // Since some may be emitted as partial words, print as diffable anything that has
-    // significant bits beyond the lowest 8-bits.
-    if (emitComp->opts.disDiffable)
-    {
-        ssize_t top56bits = (imm >> 8);
-        if ((top56bits != 0) && (top56bits != -1))
-            imm = 0xD1FFAB1E;
-    }
+/*static*/ emitter::code_t emitter::insEncodePairIndexedOpt(instruction ins, insOpts opt)
+{
+    assert(emitter::insOptsNone(opt) || emitter::insOptsIndexed(opt));
 
-    if (!alwaysHex && (imm > -1000) && (imm < 1000))
+    if ((ins == INS_ldnp) || (ins == INS_stnp))
     {
-        printf("%d", (int)imm);
+        assert(emitter::insOptsNone(opt));
+        return 0; // bits 23 and 24 are zero
     }
     else
     {
-        if ((imm < 0) && ((imm & 0xFFFFFFFF00000000LL) == 0xFFFFFFFF00000000LL))
-        {
-            printf("-");
-            imm = -imm;
-        }
-
-        if ((imm & 0xFFFFFFFF00000000LL) != 0)
+        if (emitter::insOptsIndexed(opt))
         {
-            if (isAddrOffset)
+            if (emitter::insOptsPostIndex(opt))
             {
-                printf("0x%llX", imm);
+                return 0x00800000; // set the bit at location 23
             }
             else
             {
-                printf("0x%llx", imm);
+                assert(emitter::insOptsPreIndex(opt));
+                return 0x01800000; // set the bit at location 24 and 23
             }
         }
         else
         {
-            printf("0x%02X", (unsigned)imm);
+            assert(emitter::insOptsNone(opt));
+            return 0x01000000; // set the bit at location 24
         }
     }
-
-    if (addComma)
-        emitDispComma();
-}
-
-/*****************************************************************************
- *
- *  Display an immediate value as an index operation
- */
-void emitter::emitDispElementIndex(const ssize_t imm, const bool addComma)
-{
-    printf("[%d]", imm);
-
-    if (addComma)
-    {
-        emitDispComma();
-    }
 }
 
 /*****************************************************************************
  *
- *  Display a float zero constant
+ *  Returns the encoding to apply a Shift Type on the Rm register
  */
-void emitter::emitDispFloatZero()
-{
-    if (strictArmAsm)
-    {
-        printf("#");
-    }
-    printf("0.0");
-}
 
-/*****************************************************************************
- *
- *  Display an encoded float constant value
- */
-void emitter::emitDispFloatImm(ssize_t imm8)
+/*static*/ emitter::code_t emitter::insEncodeShiftType(insOpts opt)
 {
-    assert((0 <= imm8) && (imm8 <= 0x0ff));
-    if (strictArmAsm)
+    if (emitter::insOptsNone(opt))
     {
-        printf("#");
+        // None implies the we encode LSL (with a zero immediate)
+        opt = INS_OPTS_LSL;
     }
+    assert(emitter::insOptsAnyShift(opt));
 
-    floatImm8 fpImm;
-    fpImm.immFPIVal = (unsigned)imm8;
-    double result   = emitDecodeFloatImm8(fpImm);
+    emitter::code_t option = (emitter::code_t)opt - (emitter::code_t)INS_OPTS_LSL;
+    assert(option <= 3);
 
-    printf("%.4f", result);
+    return option << 22; // bits 23, 22
 }
 
 /*****************************************************************************
  *
- *  Display an encoded small float constant value
+ *  Returns the encoding to apply a 12 bit left shift to the immediate
  */
-void emitter::emitDispSmallFloatImm(ssize_t imm, instruction ins)
-{
-    if (strictArmAsm)
-    {
-        printf("#");
-    }
-    printf("%.4f", emitDecodeSmallFloatImm(imm, ins));
-}
 
-/*****************************************************************************
- *
- *  Display an immediate with an optional left-shift.
- */
-void emitter::emitDispImmOptsLSL(ssize_t imm, bool hasShift, unsigned shiftAmount)
+/*static*/ emitter::code_t emitter::insEncodeShiftImm12(insOpts opt)
 {
-    if (!strictArmAsm && hasShift)
-    {
-        imm <<= shiftAmount;
-    }
-    emitDispImm(imm, false);
-    if (strictArmAsm && hasShift)
+    if (emitter::insOptsLSL12(opt))
     {
-        printf(", LSL #%u", shiftAmount);
+        return 0x00400000; // set the bit at location 22
     }
+    return 0;
 }
 
 /*****************************************************************************
  *
- *  Display an ARM64 condition code for the conditional instructions
- */
-void emitter::emitDispCond(insCond cond)
-{
-    const static char* armCond[16] = {"eq", "ne", "hs", "lo", "mi", "pl", "vs", "vc",
-                                      "hi", "ls", "ge", "lt", "gt", "le", "AL", "NV"}; // The last two are invalid
-    unsigned imm = (unsigned)cond;
-    assert((0 <= imm) && (imm < ArrLen(armCond)));
-    printf(armCond[imm]);
-}
-
-/*****************************************************************************
- *
- *  Display an ARM64 flags for the conditional instructions
- */
-void emitter::emitDispFlags(insCflags flags)
-{
-    const static char* armFlags[16] = {"0", "v",  "c",  "cv",  "z",  "zv",  "zc",  "zcv",
-                                       "n", "nv", "nc", "ncv", "nz", "nzv", "nzc", "nzcv"};
-    unsigned imm = (unsigned)flags;
-    assert((0 <= imm) && (imm < ArrLen(armFlags)));
-    printf(armFlags[imm]);
-}
-
-/*****************************************************************************
- *
- *  Display an ARM64 'barrier' for the memory barrier instructions
- */
-void emitter::emitDispBarrier(insBarrier barrier)
-{
-    const static char* armBarriers[16] = {"#0", "oshld", "oshst", "osh", "#4",  "nshld", "nshst", "nsh",
-                                          "#8", "ishld", "ishst", "ish", "#12", "ld",    "st",    "sy"};
-    unsigned imm = (unsigned)barrier;
-    assert((0 <= imm) && (imm < ArrLen(armBarriers)));
-    printf(armBarriers[imm]);
-}
-
-/*****************************************************************************
- *
- *  Prints the encoding for the Shift Type encoding
- */
-
-void emitter::emitDispShiftOpts(insOpts opt)
-{
-    if (opt == INS_OPTS_LSL)
-        printf(" LSL ");
-    else if (opt == INS_OPTS_LSR)
-        printf(" LSR ");
-    else if (opt == INS_OPTS_ASR)
-        printf(" ASR ");
-    else if (opt == INS_OPTS_ROR)
-        printf(" ROR ");
-    else if (opt == INS_OPTS_MSL)
-        printf(" MSL ");
-    else
-        assert(!"Bad value");
-}
-
-/*****************************************************************************
- *
- *  Prints the encoding for the Extend Type encoding
- */
-
-void emitter::emitDispExtendOpts(insOpts opt)
-{
-    if (opt == INS_OPTS_UXTB)
-        printf("UXTB");
-    else if (opt == INS_OPTS_UXTH)
-        printf("UXTH");
-    else if (opt == INS_OPTS_UXTW)
-        printf("UXTW");
-    else if (opt == INS_OPTS_UXTX)
-        printf("UXTX");
-    else if (opt == INS_OPTS_SXTB)
-        printf("SXTB");
-    else if (opt == INS_OPTS_SXTH)
-        printf("SXTH");
-    else if (opt == INS_OPTS_SXTW)
-        printf("SXTW");
-    else if (opt == INS_OPTS_SXTX)
-        printf("SXTX");
-    else
-        assert(!"Bad value");
-}
-
-/*****************************************************************************
- *
- *  Prints the encoding for the Extend Type encoding
+ *  Returns the encoding to have the Rm register use an extend operation
  */
 
-void emitter::emitDispSveExtendOpts(insOpts opt)
+/*static*/ emitter::code_t emitter::insEncodeExtend(insOpts opt)
 {
-    switch (opt)
+    if (emitter::insOptsNone(opt) || (opt == INS_OPTS_LSL))
     {
-        case INS_OPTS_SCALABLE_S_UXTW:
-        case INS_OPTS_SCALABLE_D_UXTW:
-            printf("uxtw");
-            break;
+        // None or LSL implies the we encode UXTX
+        opt = INS_OPTS_UXTX;
+    }
+    assert(emitter::insOptsAnyExtend(opt));
 
-        case INS_OPTS_SCALABLE_S_SXTW:
-        case INS_OPTS_SCALABLE_D_SXTW:
-            printf("sxtw");
-            break;
+    emitter::code_t option = (emitter::code_t)opt - (emitter::code_t)INS_OPTS_UXTB;
+    assert(option <= 7);
 
-        default:
-            assert(!"Bad value");
-            break;
-    }
+    return option << 13; // bits 15,14,13
 }
 
 /*****************************************************************************
  *
- *  Prints the encoding for the Extend Type encoding along with the N value
+ *  Returns the encoding to scale the Rm register by {0,1,2,3,4}
+ *  when using an extend operation
  */
 
-void emitter::emitDispSveExtendOptsModN(insOpts opt, int n)
+/*static*/ emitter::code_t emitter::insEncodeExtendScale(ssize_t imm)
 {
-    assert(n >= 0 && n <= 3);
-
-    emitDispSveExtendOpts(opt);
-    switch (n)
-    {
-        case 3:
-            printf(" #3");
-            break;
-
-        case 2:
-            printf(" #2");
-            break;
-
-        case 1:
-            printf(" #1");
-            break;
+    assert((imm >= 0) && (imm <= 4));
 
-        default:
-            break;
-    }
+    return (emitter::code_t)imm << 10; // bits 12,11,10
 }
 
 /*****************************************************************************
  *
- *  Prints the encoding for the <mod> or LSL encoding along with the N value
- *  This is for formats that have [<Xn|SP>, <Zm>.T, <mod>], [<Xn|SP>, <Zm>.T, <mod> #N], [<Xn|SP>, <Xm>, LSL #N],
- * [<Xn|SP>{, <Xm>, LSL #N}]
+ *  Returns the encoding to have the Rm register be auto scaled by the ld/st size
  */
-void emitter::emitDispSveModAddr(instruction ins, regNumber reg1, regNumber reg2, insOpts opt, insFormat fmt)
-{
-    printf("[");
 
-    if (isVectorRegister(reg1))
+/*static*/ emitter::code_t emitter::insEncodeReg3Scale(bool isScaled)
+{
+    if (isScaled)
     {
-        // If the overall instruction is working on 128-bit
-        // registers, the size of this register for
-        // the mod addr is always 64-bit.
-        // Example: LD1Q    {<Zt>.Q }, <Pg>/Z, [<Zn>.D{, <Xm>}]
-        if (opt == INS_OPTS_SCALABLE_Q)
-        {
-            emitDispSveReg(reg1, INS_OPTS_SCALABLE_D, reg2 != REG_ZR);
-        }
-        else
-        {
-            emitDispSveReg(reg1, opt, reg2 != REG_ZR);
-        }
+        return 0x00001000; // set the bit at location 12
     }
     else
     {
-        emitDispReg(reg1, EA_8BYTE, reg2 != REG_ZR);
+        return 0;
     }
+}
 
-    if (isVectorRegister(reg2))
-    {
-        emitDispSveReg(reg2, opt, false);
-    }
-    else if (reg2 != REG_ZR)
-    {
-        emitDispReg(reg2, EA_8BYTE, false);
-    }
+/*****************************************************************************
+ *
+ *  Returns the encoding for the immediate value as 9-bits at bit locations '21-16' for high and '12-10' for low.
+ */
 
-    if (insOptsScalable32bitExtends(opt))
+/*static*/ emitter::code_t emitter::insEncodeSimm9h9l_21_to_16_and_12_to_10(ssize_t imm)
+{
+    assert(isValidSimm<9>(imm));
+
+    if (imm < 0)
     {
-        emitDispComma();
-        emitDispSveExtendOptsModN(opt, insSveGetLslOrModN(ins, fmt));
+        imm = (imm & 0x1FF);
     }
-    // Omit 'lsl #N' only if the second register is ZR.
-    else if ((reg2 != REG_ZR) && insSveIsLslN(ins, fmt))
-    {
-        emitDispComma();
-        switch (insSveGetLslOrModN(ins, fmt))
-        {
-            case 4:
-                printf("lsl #4");
-                break;
-
-            case 3:
-                printf("lsl #3");
-                break;
 
-            case 2:
-                printf("lsl #2");
-                break;
-
-            case 1:
-                printf("lsl #1");
-                break;
+    code_t h = (code_t)(imm & 0x1F8) << 13;          // encode high 6-bits at locations '21-16'
+    code_t l = (code_t)((imm & ~0x1F8) & 0x7) << 10; // encode low 3-bits at locations '12-10'
 
-            default:
-                assert(!"Invalid instruction");
-                break;
-        }
-    }
-    printf("]");
+    return (h | l);
 }
 
 /*****************************************************************************
  *
- *  Prints the encoding for format [<Xn|SP>{, #<imm>, MUL VL}]
+ *  Returns the encoding for the immediate value as 3-bits at bit locations '23-22' for high and '12' for low.
  */
-void emitter::emitDispSveImmMulVl(regNumber reg1, ssize_t imm)
+
+/*static*/ emitter::code_t emitter::insEncodeUimm3h3l_23_to_22_and_12(ssize_t imm)
 {
-    printf("[");
-    emitDispReg(reg1, EA_8BYTE, imm != 0);
-    if (imm != 0)
-    {
-        emitDispImm(imm, true);
-        printf("mul vl");
-    }
-    printf("]");
+    assert(isValidUimm<3>(imm));
+
+    code_t h = (code_t)(imm & 0x6) << 21; // encode high 2-bits at locations '23-22'
+    code_t l = (code_t)(imm & 0x1) << 12; // encode low 1-bit at locations '12'
+
+    return (h | l);
 }
 
 /*****************************************************************************
  *
- *  Prints the encoding for the Extend Type encoding in loads/stores
+ *  Returns the encoding for the immediate value as 8-bits at bit locations '12-5'.
  */
 
-void emitter::emitDispLSExtendOpts(insOpts opt)
+/*static*/ emitter::code_t emitter::insEncodeImm8_12_to_5(ssize_t imm)
 {
-    if (opt == INS_OPTS_LSL)
-        printf("LSL");
-    else if (opt == INS_OPTS_UXTW)
-        printf("UXTW");
-    else if (opt == INS_OPTS_UXTX)
-        printf("UXTX");
-    else if (opt == INS_OPTS_SXTW)
-        printf("SXTW");
-    else if (opt == INS_OPTS_SXTX)
-        printf("SXTX");
-    else
-        assert(!"Bad value");
+    assert(isValidSimm<8>(imm) || isValidUimm<8>(imm));
+    return (code_t)((imm & 0xFF) << 5);
 }
 
-//------------------------------------------------------------------------
-// emitDispReg: Display a general-purpose register name or SIMD and floating-point scalar register name
-//
-void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma)
+BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id)
 {
-    emitAttr size = EA_SIZE(attr);
-    printf(emitRegName(reg, size));
+    instruction ins    = id->idIns();
+    insFormat   fmt    = id->idInsFmt();
+    regNumber   dstReg = id->idReg1();
+    if (id->idjShort)
+    {
+        // adr x, [rel addr] --  compute address: current addr(ip) + rel addr.
+        assert(ins == INS_adr);
+        assert(fmt == IF_DI_1E);
+        ssize_t distVal = (ssize_t)(dstAddr - srcAddr);
+        dst             = emitOutputShortAddress(dst, ins, fmt, distVal, dstReg);
+    }
+    else
+    {
+        // adrp x, [rel page addr] -- compute page address: current page addr + rel page addr
+        assert(fmt == IF_LARGEADR);
+        ssize_t relPageAddr = computeRelPageAddr((size_t)dstAddr, (size_t)srcAddr);
+        dst                 = emitOutputShortAddress(dst, INS_adrp, IF_DI_1E, relPageAddr, dstReg);
 
-    if (addComma)
-        emitDispComma();
+        // add x, x, page offs -- compute address = page addr + page offs
+        ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits
+        assert(isValidUimm<12>(imm12));
+        code_t code =
+            emitInsCode(INS_add, IF_DI_2A);  // DI_2A  X0010001shiiiiii iiiiiinnnnnddddd   1100 0000   imm(i12, sh)
+        code |= insEncodeDatasize(EA_8BYTE); // X
+        code |= ((code_t)imm12 << 10);       // iiiiiiiiiiii
+        code |= insEncodeReg_Rd(dstReg);     // ddddd
+        code |= insEncodeReg_Rn(dstReg);     // nnnnn
+        dst += emitOutput_Instr(dst, code);
+    }
+    return dst;
 }
 
-//------------------------------------------------------------------------
-// emitDispSveReg: Display a scalable vector register name with an arrangement suffix
-//
-void emitter::emitDispSveReg(regNumber reg, insOpts opt, bool addComma)
-{
-    assert(insOptsScalable(opt) || insOptsScalable32bitExtends(opt));
-    assert(isVectorRegister(reg));
-    printf(emitSveRegName(reg));
-    emitDispArrangement(opt);
-
-    if (addComma)
-        emitDispComma();
-}
+/*****************************************************************************
+ *
+ *  Output a local jump or other instruction with a pc-relative immediate.
+ *  Note that this may be invoked to overwrite an existing jump instruction at 'dst'
+ *  to handle forward branch patching.
+ */
 
-//------------------------------------------------------------------------
-// emitDispVectorReg: Display a SIMD vector register name with an arrangement suffix
-//
-void emitter::emitDispVectorReg(regNumber reg, insOpts opt, bool addComma)
+BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
 {
-    assert(isVectorRegister(reg));
-    printf(emitVectorRegName(reg));
-    emitDispArrangement(opt);
+    instrDescJmp* id = (instrDescJmp*)i;
 
-    if (addComma)
-        emitDispComma();
-}
+    unsigned srcOffs;
+    unsigned dstOffs;
+    BYTE*    srcAddr;
+    BYTE*    dstAddr;
+    ssize_t  distVal;
 
-//------------------------------------------------------------------------
-// emitDispVectorRegIndex: Display a SIMD vector register name with element index
-//
-void emitter::emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma)
-{
-    assert(isVectorRegister(reg));
-    printf(emitVectorRegName(reg));
-    emitDispElemsize(elemsize);
-    printf("[%d]", (int)index);
+    // Set default ins/fmt from id.
+    instruction ins = id->idIns();
+    insFormat   fmt = id->idInsFmt();
 
-    if (addComma)
-        emitDispComma();
-}
+    bool loadLabel    = false;
+    bool isJump       = false;
+    bool loadConstant = false;
 
-//------------------------------------------------------------------------
-// emitDispVectorRegList: Display a SIMD vector register list
-//
-void emitter::emitDispVectorRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma)
-{
-    assert(isVectorRegister(firstReg));
+    switch (ins)
+    {
+        default:
+            isJump = true;
+            break;
 
-    regNumber currReg = firstReg;
+        case INS_tbz:
+        case INS_tbnz:
+        case INS_cbz:
+        case INS_cbnz:
+            isJump = true;
+            break;
 
-    printf("{");
-    for (unsigned i = 0; i < listSize; i++)
-    {
-        const bool notLastRegister = (i != listSize - 1);
-        emitDispVectorReg(currReg, opt, notLastRegister);
-        currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg);
+        case INS_ldr:
+        case INS_ldrsw:
+            loadConstant = true;
+            break;
+
+        case INS_adr:
+        case INS_adrp:
+            loadLabel = true;
+            break;
     }
-    printf("}");
 
-    if (addComma)
+    /* Figure out the distance to the target */
+
+    srcOffs = emitCurCodeOffs(dst);
+    srcAddr = emitOffsetToPtr(srcOffs);
+
+    if (id->idAddr()->iiaIsJitDataOffset())
     {
-        emitDispComma();
-    }
-}
+        assert(loadConstant || loadLabel);
+        int doff = id->idAddr()->iiaGetJitDataOffset();
+        assert(doff >= 0);
+        ssize_t imm = emitGetInsSC(id);
+        assert((imm >= 0) && (imm < 0x1000)); // 0x1000 is arbitrary, currently 'imm' is always 0
+
+        unsigned dataOffs = (unsigned)(doff + imm);
+        assert(dataOffs < emitDataSize());
+        dstAddr = emitDataOffsetToPtr(dataOffs);
+
+        regNumber dstReg  = id->idReg1();
+        regNumber addrReg = dstReg; // an integer register to compute long address.
+        emitAttr  opSize  = id->idOpSize();
+
+        if (loadConstant)
+        {
+            if (id->idjShort)
+            {
+                // ldr x/v, [rel addr] -- load constant from current addr(ip) + rel addr.
+                assert(ins == INS_ldr);
+                assert(fmt == IF_LS_1A);
+                distVal = (ssize_t)(dstAddr - srcAddr);
+                dst     = emitOutputShortConstant(dst, ins, fmt, distVal, dstReg, opSize);
+            }
+            else
+            {
+                // adrp x, [rel page addr] -- compute page address: current page addr + rel page addr
+                assert(fmt == IF_LARGELDC);
+                ssize_t relPageAddr = computeRelPageAddr((size_t)dstAddr, (size_t)srcAddr);
+                if (isVectorRegister(dstReg))
+                {
+                    // Update addrReg with the reserved integer register
+                    // since we cannot use dstReg (vector) to load constant directly from memory.
+
+                    // If loading a 16-byte value, we will need to load directly into dstReg.
+                    // Thus, encode addrReg for the ld1 instruction.
+                    if (opSize == EA_16BYTE)
+                    {
+                        addrReg = encodingSPtoZR(id->idReg2());
+                    }
+                    else
+                    {
+                        addrReg = id->idReg2();
+                    }
+
+                    assert(isGeneralRegister(addrReg));
+                }
+
+                ins = INS_adrp;
+                fmt = IF_DI_1E;
+                dst = emitOutputShortAddress(dst, ins, fmt, relPageAddr, addrReg);
+
+                ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits
+                assert(isValidUimm<12>(imm12));
+
+                // Special case: emit add + ld1 instructions for loading 16-byte data into vector register.
+                if (isVectorRegister(dstReg) && (opSize == EA_16BYTE))
+                {
+                    const emitAttr elemSize = EA_1BYTE;
+                    const insOpts  opt      = optMakeArrangement(opSize, elemSize);
 
-//------------------------------------------------------------------------
-// emitDispVectorElemList: Display a SIMD vector element list
-//
-void emitter::emitDispVectorElemList(
-    regNumber firstReg, unsigned listSize, emitAttr elemsize, unsigned index, bool addComma)
-{
-    assert(isVectorRegister(firstReg));
+                    assert(isGeneralRegisterOrSP(addrReg));
+                    assert(isValidVectorElemsize(elemSize));
+                    assert(isValidArrangement(opSize, opt));
 
-    regNumber currReg = firstReg;
+                    // Calculate page addr + page offs, then emit ld1 instruction.
+                    dst = emitOutputVectorConstant(dst, imm12, dstReg, addrReg, opSize, elemSize);
+                }
+                else
+                {
+                    // ldr x, [x, 0] -- load constant from address into integer register.
+                    ins = INS_ldr;
+                    fmt = IF_LS_2B;
+                    dst = emitOutputShortConstant(dst, ins, fmt, imm12, addrReg, opSize);
 
-    printf("{");
-    for (unsigned i = 0; i < listSize; i++)
-    {
-        printf(emitVectorRegName(currReg));
-        emitDispElemsize(elemsize);
-        const bool notLastRegister = (i != listSize - 1);
-        if (notLastRegister)
+                    // fmov v, d -- copy constant in integer register to vector register.
+                    // This is needed only for vector constant.
+                    if (addrReg != dstReg)
+                    {
+                        //  fmov    Vd,Rn                DV_2I  X00111100X100111 000000nnnnnddddd   1E27 0000   Vd,Rn
+                        //  (scalar, from general)
+                        assert(isVectorRegister(dstReg) && isGeneralRegister(addrReg));
+                        ins         = INS_fmov;
+                        fmt         = IF_DV_2I;
+                        code_t code = emitInsCode(ins, fmt);
+
+                        code |= insEncodeReg_Vd(dstReg);  // ddddd
+                        code |= insEncodeReg_Rn(addrReg); // nnnnn
+                        if (id->idOpSize() == EA_8BYTE)
+                        {
+                            code |= 0x80400000; // X ... X
+                        }
+                        dst += emitOutput_Instr(dst, code);
+                    }
+                }
+            }
+        }
+        else
         {
-            emitDispComma();
+            assert(loadLabel);
+            dst = emitOutputLoadLabel(dst, srcAddr, dstAddr, id);
         }
-        currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg);
-    }
-    printf("}");
-    printf("[%d]", index);
 
-    if (addComma)
-    {
-        emitDispComma();
+        return dst;
     }
-}
-
-//------------------------------------------------------------------------
-// emitDispSveConsecutiveRegList: Display a SVE consecutive vector register list
-//
-void emitter::emitDispSveConsecutiveRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma)
-{
-    assert(isVectorRegister(firstReg));
-
-    regNumber currReg = firstReg;
 
-    assert(listSize > 0);
+    assert(loadLabel || isJump);
 
-    printf("{ ");
-    // We do not want the short-hand for list size of 1 or 2.
-    if ((listSize <= 2) || (((unsigned)currReg + listSize - 1) > (unsigned)REG_V31))
+    if (id->idAddr()->iiaHasInstrCount())
     {
-        for (unsigned i = 0; i < listSize; i++)
+        assert(ig != NULL);
+        int      instrCount = id->idAddr()->iiaGetInstrCount();
+        unsigned insNum     = emitFindInsNum(ig, id);
+        if (instrCount < 0)
         {
-            const bool notLastRegister = (i != listSize - 1);
-            emitDispSveReg(currReg, opt, notLastRegister);
-            currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg);
+            // Backward branches using instruction count must be within the same instruction group.
+            assert(insNum + 1 >= (unsigned)(-instrCount));
         }
+
+        dstOffs = ig->igOffs + emitFindOffset(ig, (insNum + 1 + instrCount));
+        dstAddr = emitOffsetToPtr(dstOffs);
     }
     else
     {
-        // short-hand. example: { z0.s - z2.s } which is the same as { z0.s, z1.s, z2.s }
-        emitDispSveReg(currReg, opt, false);
-        printf(" - ");
-        emitDispSveReg((regNumber)(currReg + listSize - 1), opt, false);
-    }
-    printf(" }");
-
-    if (addComma)
-    {
-        emitDispComma();
+        dstOffs = id->idAddr()->iiaIGlabel->igOffs;
+        dstAddr = emitOffsetToPtr(dstOffs);
     }
-}
 
-//------------------------------------------------------------------------
-// emitDispPredicateReg: Display a predicate register name with with an arrangement suffix
-//
-void emitter::emitDispPredicateReg(regNumber reg, PredicateType ptype, insOpts opt, bool addComma)
-{
-    assert(isPredicateRegister(reg));
-    printf(emitPredicateRegName(reg, ptype));
+    distVal = (ssize_t)(dstAddr - srcAddr);
 
-    if (ptype == PREDICATE_MERGE)
-    {
-        printf("/m");
-    }
-    else if (ptype == PREDICATE_ZERO)
-    {
-        printf("/z");
-    }
-    else if (ptype == PREDICATE_SIZED || ptype == PREDICATE_N_SIZED)
+    if (dstOffs <= srcOffs)
     {
-        emitDispElemsize(optGetSveElemsize(opt));
-    }
-
-    if (addComma)
-        emitDispComma();
-}
-
-//------------------------------------------------------------------------
-// emitDispPredicateRegPair: Display a pair of predicate registers
-//
-void emitter::emitDispPredicateRegPair(regNumber reg, insOpts opt)
-{
-    printf("{ ");
-    emitDispPredicateReg(reg, PREDICATE_SIZED, opt, true);
-    emitDispPredicateReg((regNumber)((unsigned)reg + 1), PREDICATE_SIZED, opt, false);
-    printf(" }, ");
-}
-
-//------------------------------------------------------------------------
-// emitDispLowPredicateReg: Display a low predicate register name with with an arrangement suffix
-//
-void emitter::emitDispLowPredicateReg(regNumber reg, PredicateType ptype, insOpts opt, bool addComma)
-{
-    assert(isLowPredicateRegister(reg));
-    reg = (regNumber)((((unsigned)reg - REG_PREDICATE_FIRST) & 0x7) + REG_PREDICATE_FIRST);
-    emitDispPredicateReg(reg, ptype, opt, addComma);
-}
-
-//------------------------------------------------------------------------
-// emitDispLowPredicateRegPair: Display a pair of low predicate registers
-//
-void emitter::emitDispLowPredicateRegPair(regNumber reg, insOpts opt)
-{
-    assert(isLowPredicateRegister(reg));
-
-    printf("{ ");
-    const unsigned baseRegNum = ((unsigned)reg - REG_PREDICATE_FIRST) & 0x7;
-    const unsigned regNum     = (baseRegNum * 2) + REG_PREDICATE_FIRST;
-    emitDispPredicateReg((regNumber)regNum, PREDICATE_SIZED, opt, true);
-    emitDispPredicateReg((regNumber)(regNum + 1), PREDICATE_SIZED, opt, false);
-    printf(" }, ");
-}
+#if DEBUG_EMIT
+        /* This is a backward jump - distance is known at this point */
 
-//------------------------------------------------------------------------
-// emitDispVectorLengthSpecifier: Display the vector length specifier
-//
-void emitter::emitDispVectorLengthSpecifier(instrDesc* id)
-{
-    assert(id != nullptr);
-    assert(insOptsScalableStandard(id->idInsOpt()));
+        if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+        {
+            size_t blkOffs = id->idjIG->igOffs;
 
-    if (id->idVectorLength4x())
-    {
-        printf("vlx4");
+            if (INTERESTING_JUMP_NUM == 0)
+                printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+            printf("[3] Jump  block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
+            printf("[3] Jump        is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
+            printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
+        }
+#endif
     }
     else
     {
-        printf("vlx2");
-    }
-}
-
-//------------------------------------------------------------------------
-// emitDispArrangement: Display a SIMD vector arrangement suffix
-//
-void emitter::emitDispArrangement(insOpts opt)
-{
-    const char* str = "???";
+        /* This is a  forward jump - distance will be an upper limit */
 
-    switch (opt)
-    {
-        case INS_OPTS_8B:
-            str = "8b";
-            break;
-        case INS_OPTS_16B:
-            str = "16b";
-            break;
-        case INS_OPTS_SCALABLE_B:
-            str = "b";
-            break;
-        case INS_OPTS_4H:
-            str = "4h";
-            break;
-        case INS_OPTS_8H:
-            str = "8h";
-            break;
-        case INS_OPTS_SCALABLE_H:
-            str = "h";
-            break;
-        case INS_OPTS_2S:
-            str = "2s";
-            break;
-        case INS_OPTS_4S:
-            str = "4s";
-            break;
-        case INS_OPTS_SCALABLE_S:
-        case INS_OPTS_SCALABLE_S_UXTW:
-        case INS_OPTS_SCALABLE_S_SXTW:
-            str = "s";
-            break;
-        case INS_OPTS_1D:
-            str = "1d";
-            break;
-        case INS_OPTS_2D:
-            str = "2d";
-            break;
-        case INS_OPTS_SCALABLE_D:
-        case INS_OPTS_SCALABLE_D_UXTW:
-        case INS_OPTS_SCALABLE_D_SXTW:
-            str = "d";
-            break;
-        case INS_OPTS_SCALABLE_Q:
-            str = "q";
-            break;
+        emitFwdJumps = true;
 
-        default:
-            assert(!"Invalid SVE insOpt");
-    }
-    printf(".");
-    printf(str);
-}
+        /* The target offset will be closer by at least 'emitOffsAdj', but only if this
+           jump doesn't cross the hot-cold boundary. */
 
-//------------------------------------------------------------------------
-// emitDispElemsize: Display a SIMD vector element suffix
-//
-void emitter::emitDispElemsize(emitAttr elemsize)
-{
-    const char* str = "???";
+        if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+        {
+            dstOffs -= emitOffsAdj;
+            distVal -= emitOffsAdj;
+        }
 
-    switch (elemsize)
-    {
-        case EA_1BYTE:
-            str = ".b";
-            break;
-        case EA_2BYTE:
-            str = ".h";
-            break;
-        case EA_4BYTE:
-            str = ".s";
-            break;
-        case EA_8BYTE:
-            str = ".d";
-            break;
+        /* Record the location of the jump for later patching */
 
-        default:
-            assert(!"invalid elemsize");
-            break;
-    }
+        id->idjOffs = dstOffs;
 
-    printf(str);
-}
+        /* Are we overflowing the id->idjOffs bitfield? */
+        if (id->idjOffs != dstOffs)
+            IMPL_LIMITATION("Method is too large");
 
-//------------------------------------------------------------------------
-// emitDispShiftedReg: Display a register with an optional shift operation
-//
-void emitter::emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr)
-{
-    emitAttr size = EA_SIZE(attr);
-    assert((imm & 0x003F) == imm);
-    assert(((imm & 0x0020) == 0) || (size == EA_8BYTE));
+#if DEBUG_EMIT
+        if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+        {
+            size_t blkOffs = id->idjIG->igOffs;
 
-    printf(emitRegName(reg, size));
+            if (INTERESTING_JUMP_NUM == 0)
+                printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+            printf("[4] Jump  block is at %08X\n", blkOffs);
+            printf("[4] Jump        is at %08X\n", srcOffs);
+            printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
+        }
+#endif
+    }
 
-    if (imm > 0)
+#ifdef DEBUG
+    if (0 && emitComp->verbose)
     {
-        if (strictArmAsm)
-        {
-            emitDispComma();
-        }
-        emitDispShiftOpts(opt);
-        emitDispImm(imm, false);
+        size_t sz          = 4;
+        int    distValSize = id->idjShort ? 4 : 8;
+        printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = 0x%08X\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
+               dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs, distVal);
     }
-}
+#endif
 
-/*****************************************************************************
- *
- *  Display a register with an optional extend and scale operations
- */
-void emitter::emitDispExtendReg(regNumber reg, insOpts opt, ssize_t imm)
-{
-    assert((imm >= 0) && (imm <= 4));
-    assert(insOptsNone(opt) || insOptsAnyExtend(opt) || (opt == INS_OPTS_LSL));
+    /* For forward jumps, record the address of the distance value */
+    id->idjTemp.idjAddr = (distVal > 0) ? dst : NULL;
 
-    // size is based on the extend option, not the instr size.
-    // Assume INS_OPTS_NONE and INS_OPTS_LSL are 64bit as they usually are.
-    emitAttr size = (insOptsNone(opt) || insOptsLSL(opt) || insOpts64BitExtend(opt)) ? EA_8BYTE : EA_4BYTE;
+    assert(insOptsNone(id->idInsOpt()));
 
-    if (strictArmAsm)
+    if (isJump)
     {
-        if (insOptsNone(opt) || (insOptsLSL(opt) && imm == 0))
+        if (id->idjShort)
         {
-            emitDispReg(reg, size, false);
+            // Short conditional/unconditional jump
+            assert(!id->idjKeepLong);
+            assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
+            assert((fmt == IF_BI_0A) || (fmt == IF_BI_0B) || (fmt == IF_BI_1A) || (fmt == IF_BI_1B));
+            dst = emitOutputShortBranch(dst, ins, fmt, distVal, id);
         }
         else
         {
-            emitDispReg(reg, size, true);
+            // Long conditional/unconditional jump
 
-            if (insOptsLSL(opt))
-                printf("LSL");
+            if (fmt == IF_LARGEJMP)
+            {
+                // This is a pseudo-instruction format representing a large conditional branch, to allow
+                // us to get a greater branch target range than we can get by using a straightforward conditional
+                // branch. It is encoded as a short conditional branch that branches around a long unconditional
+                // branch.
+                //
+                // Conceptually, we have:
+                //
+                //      b<cond> L_target
+                //
+                // The code we emit is:
+                //
+                //      b<!cond> L_not  // 4 bytes. Note that we reverse the condition.
+                //      b L_target      // 4 bytes
+                //   L_not:
+                //
+                // Note that we don't actually insert any blocks: we simply encode "b <!cond> L_not" as a branch with
+                // the correct offset. Note also that this works for both integer and floating-point conditions, because
+                // the condition inversion takes ordered/unordered into account, preserving NaN behavior. For example,
+                // "GT" (greater than) is inverted to "LE" (less than, equal, or unordered).
+
+                instruction reverseIns;
+                insFormat   reverseFmt;
+
+                switch (ins)
+                {
+                    case INS_cbz:
+                        reverseIns = INS_cbnz;
+                        reverseFmt = IF_BI_1A;
+                        break;
+                    case INS_cbnz:
+                        reverseIns = INS_cbz;
+                        reverseFmt = IF_BI_1A;
+                        break;
+                    case INS_tbz:
+                        reverseIns = INS_tbnz;
+                        reverseFmt = IF_BI_1B;
+                        break;
+                    case INS_tbnz:
+                        reverseIns = INS_tbz;
+                        reverseFmt = IF_BI_1B;
+                        break;
+                    default:
+                        reverseIns = emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(ins)));
+                        reverseFmt = IF_BI_0B;
+                }
+
+                dst = emitOutputShortBranch(dst,
+                                            reverseIns,    // reverse the conditional instruction
+                                            reverseFmt, 8, /* 8 bytes from start of this large conditional
+                                                              pseudo-instruction to L_not. */
+                                            id);
+
+                // Now, pretend we've got a normal unconditional branch, and fall through to the code to emit that.
+                ins = INS_b;
+                fmt = IF_BI_0A;
+
+                // The distVal was computed based on the beginning of the pseudo-instruction,
+                // So subtract the size of the conditional branch so that it is relative to the
+                // unconditional branch.
+                distVal -= 4;
+            }
+
+            assert(fmt == IF_BI_0A);
+            assert((distVal & 1) == 0);
+            code_t     code               = emitInsCode(ins, fmt);
+            const bool doRecordRelocation = emitComp->opts.compReloc && emitJumpCrossHotColdBoundary(srcOffs, dstOffs);
+
+            if (doRecordRelocation)
+            {
+                // dst isn't an actual final target location, just some intermediate
+                // location.  Thus we cannot make any guarantees about distVal (not
+                // even the direction/sign).  Instead we don't encode any offset and
+                // rely on the relocation to do all the work
+            }
             else
-                emitDispExtendOpts(opt);
+            {
+                // Branch offset encodings are scaled by 4.
+                noway_assert((distVal & 3) == 0);
+                distVal >>= 2;
+                noway_assert(isValidSimm<26>(distVal));
 
-            if (imm > 0)
+                // Insert offset into unconditional branch instruction
+                distVal &= 0x3FFFFFFLL;
+                code |= distVal;
+            }
+
+            const unsigned instrSize = emitOutput_Instr(dst, code);
+
+            if (doRecordRelocation)
             {
-                printf(" ");
-                emitDispImm(imm, false);
+                assert(id->idjKeepLong);
+                if (emitComp->info.compMatchedVM)
+                {
+                    void* target = emitOffsetToPtr(dstOffs);
+                    emitRecordRelocation((void*)dst, target, IMAGE_REL_ARM64_BRANCH26);
+                }
             }
+
+            dst += instrSize;
         }
     }
-    else // !strictArmAsm
+    else if (loadLabel)
     {
-        if (insOptsNone(opt))
-        {
-            emitDispReg(reg, size, false);
-        }
-        else
-        {
-            if (opt != INS_OPTS_LSL)
-            {
-                emitDispExtendOpts(opt);
-                printf("(");
-                emitDispReg(reg, size, false);
-                printf(")");
-            }
-        }
-        if (imm > 0)
-        {
-            printf("*");
-            emitDispImm(ssize_t{1} << imm, false);
-        }
+        dst = emitOutputLoadLabel(dst, srcAddr, dstAddr, id);
     }
+
+    return dst;
 }
 
 /*****************************************************************************
  *
- *  Display an addressing operand [reg + imm]
+ *  Output a short branch instruction.
  */
-void emitter::emitDispAddrRI(regNumber reg, insOpts opt, ssize_t imm)
+BYTE* emitter::emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id)
 {
-    reg = encodingZRtoSP(reg); // ZR (R31) encodes the SP register
-
-    if (strictArmAsm)
-    {
-        printf("[");
-
-        emitDispReg(reg, EA_8BYTE, false);
+    code_t code = emitInsCode(ins, fmt);
 
-        if (!insOptsPostIndex(opt) && (imm != 0))
-        {
-            emitDispComma();
-            emitDispImm(imm, false, true, true);
-        }
-        printf("]");
+    ssize_t loBits = (distVal & 3);
+    noway_assert(loBits == 0);
+    distVal >>= 2; // branch offset encodings are scaled by 4.
 
-        if (insOptsPreIndex(opt))
-        {
-            printf("!");
-        }
-        else if (insOptsPostIndex(opt))
-        {
-            emitDispComma();
-            emitDispImm(imm, false, true, true);
-        }
+    if (fmt == IF_BI_0A)
+    {
+        // INS_b or INS_bl_local
+        noway_assert(isValidSimm<26>(distVal));
+        distVal &= 0x3FFFFFFLL;
+        code |= distVal;
     }
-    else // !strictArmAsm
+    else if (fmt == IF_BI_0B) // BI_0B   01010100iiiiiiii iiiiiiiiiiiXXXXX      simm19:00
     {
-        printf("[");
-
-        const char* operStr = "++";
-        if (imm < 0)
-        {
-            operStr = "--";
-            imm     = -imm;
-        }
-
-        if (insOptsPreIndex(opt))
-        {
-            printf(operStr);
-        }
+        // INS_beq, INS_bne, etc...
+        noway_assert(isValidSimm<19>(distVal));
+        distVal &= 0x7FFFFLL;
+        code |= distVal << 5;
+    }
+    else if (fmt == IF_BI_1A) // BI_1A   X.......iiiiiiii iiiiiiiiiiittttt      Rt simm19:00
+    {
+        // INS_cbz or INS_cbnz
+        assert(id != nullptr);
+        code |= insEncodeDatasize(id->idOpSize()); // X
+        code |= insEncodeReg_Rt(id->idReg1());     // ttttt
 
-        emitDispReg(reg, EA_8BYTE, false);
+        noway_assert(isValidSimm<19>(distVal));
+        distVal &= 0x7FFFFLL; // 19 bits
+        code |= distVal << 5;
+    }
+    else if (fmt == IF_BI_1B) // BI_1B   B.......bbbbbiii iiiiiiiiiiittttt      Rt imm6, simm14:00
+    {
+        // INS_tbz or INS_tbnz
+        assert(id != nullptr);
+        ssize_t imm = emitGetInsSC(id);
+        assert(isValidImmShift(imm, id->idOpSize()));
 
-        if (insOptsPostIndex(opt))
+        if (imm & 0x20) // test bit 32-63 ?
         {
-            printf(operStr);
+            code |= 0x80000000; // B
         }
+        code |= ((imm & 0x1F) << 19);          // bbbbb
+        code |= insEncodeReg_Rt(id->idReg1()); // ttttt
 
-        if (insOptsIndexed(opt))
-        {
-            emitDispComma();
-        }
-        else
-        {
-            printf("%c", operStr[1]);
-        }
-        emitDispImm(imm, false, true, true);
-        printf("]");
+        noway_assert(isValidSimm<14>(distVal));
+        distVal &= 0x3FFFLL; // 14 bits
+        code |= distVal << 5;
+    }
+    else
+    {
+        assert(!"Unknown fmt for emitOutputShortBranch");
     }
+
+    dst += emitOutput_Instr(dst, code);
+
+    return dst;
 }
 
 /*****************************************************************************
  *
- *  Display an addressing operand [reg + extended reg]
+ *  Output a short address instruction.
  */
-void emitter::emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, bool isScaled, emitAttr size)
+BYTE* emitter::emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg)
 {
-    reg1 = encodingZRtoSP(reg1); // ZR (R31) encodes the SP register
+    ssize_t loBits = (distVal & 3);
+    distVal >>= 2;
 
-    unsigned scale = 0;
-    if (isScaled)
+    code_t code = emitInsCode(ins, fmt);
+    if (fmt == IF_DI_1E) // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd simm21
     {
-        scale = NaturalScale_helper(size);
-    }
-
-    printf("[");
+        // INS_adr or INS_adrp
+        code |= insEncodeReg_Rd(reg); // ddddd
 
-    if (strictArmAsm)
-    {
-        emitDispReg(reg1, EA_8BYTE, true);
-        emitDispExtendReg(reg2, opt, scale);
+        noway_assert(isValidSimm<19>(distVal));
+        distVal &= 0x7FFFFLL; // 19 bits
+        code |= distVal << 5;
+        code |= loBits << 29; //  2 bits
     }
-    else // !strictArmAsm
+    else
     {
-        emitDispReg(reg1, EA_8BYTE, false);
-        printf("+");
-        emitDispExtendReg(reg2, opt, scale);
+        assert(!"Unknown fmt for emitOutputShortAddress");
     }
 
-    printf("]");
+    dst += emitOutput_Instr(dst, code);
+
+    return dst;
 }
 
 /*****************************************************************************
  *
- *  Display an insSvePattern
+ *  Output a short constant instruction.
  */
-void emitter::emitDispSvePattern(insSvePattern pattern, bool addComma)
+BYTE* emitter::emitOutputShortConstant(
+    BYTE* dst, instruction ins, insFormat fmt, ssize_t imm, regNumber reg, emitAttr opSize)
 {
-    printf("%s", svePatternNames[pattern]);
+    code_t code = emitInsCode(ins, fmt);
 
-    if (addComma)
+    if (fmt == IF_LS_1A)
     {
-        emitDispComma();
-    }
-}
+        // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt simm21
+        // INS_ldr or INS_ldrsw (PC-Relative)
 
-/*****************************************************************************
- *
- *  Display (optionally) the instruction encoding in hex
- */
+        ssize_t loBits = (imm & 3);
+        noway_assert(loBits == 0);
+        ssize_t distVal = imm >> 2; // load offset encodings are scaled by 4.
 
-void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz)
-{
-    if (!emitComp->opts.disCodeBytes)
-    {
-        return;
-    }
+        noway_assert(isValidSimm<19>(distVal));
 
-    // We do not display the instruction hex if we want diff-able disassembly
-    if (!emitComp->opts.disDiffable)
+        // Is the target a vector register?
+        if (isVectorRegister(reg))
+        {
+            code |= insEncodeDatasizeVLS(code, opSize); // XX V
+            code |= insEncodeReg_Vt(reg);               // ttttt
+        }
+        else
+        {
+            assert(isGeneralRegister(reg));
+            // insEncodeDatasizeLS is not quite right for this case.
+            // So just specialize it.
+            if ((ins == INS_ldr) && (opSize == EA_8BYTE))
+            {
+                // set the operation size in bit 30
+                code |= 0x40000000;
+            }
+
+            code |= insEncodeReg_Rt(reg); // ttttt
+        }
+
+        distVal &= 0x7FFFFLL; // 19 bits
+        code |= distVal << 5;
+    }
+    else if (fmt == IF_LS_2B)
     {
-        if (sz == 4)
+        //  ldr     Rt,[Xn+pimm12]       LS_2B  1X11100101iiiiii iiiiiinnnnnttttt   B940 0000   imm(0-4095<<{2,3})
+        // INS_ldr or INS_ldrsw (PC-Relative)
+        noway_assert(isValidUimm<12>(imm));
+        assert(isGeneralRegister(reg));
+
+        if (opSize == EA_8BYTE)
         {
-            printf("  %08X    ", (*((code_t*)code)));
+            // insEncodeDatasizeLS is not quite right for this case.
+            // So just specialize it.
+            if (ins == INS_ldr)
+            {
+                // set the operation size in bit 30
+                code |= 0x40000000;
+            }
+            // Low 3 bits should be 0 -- 8 byte JIT data should be aligned on 8 byte.
+            assert((imm & 7) == 0);
+            imm >>= 3;
         }
         else
         {
-            printf("              ");
+            assert(opSize == EA_4BYTE);
+            // Low 2 bits should be 0 -- 4 byte aligned data.
+            assert((imm & 3) == 0);
+            imm >>= 2;
         }
+
+        code |= insEncodeReg_Rt(reg); // ttttt
+        code |= insEncodeReg_Rn(reg); // nnnnn
+        code |= imm << 10;
+    }
+    else
+    {
+        assert(!"Unknown fmt for emitOutputShortConstant");
     }
+
+    dst += emitOutput_Instr(dst, code);
+
+    return dst;
 }
 
 /*****************************************************************************
  *
- *  Handles printing of LARGEJMP pseudo-instruction.
+ *  Output instructions to load a constant into a vector register.
  */
-
-void emitter::emitDispLargeJmp(
-    instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig)
+BYTE* emitter::emitOutputVectorConstant(
+    BYTE* dst, ssize_t imm, regNumber dstReg, regNumber addrReg, emitAttr opSize, emitAttr elemSize)
 {
-    // Note: don't touch the actual instrDesc. If we accidentally messed it up, it would create a very
-    // difficult-to-find bug.
+    // add addrReg, addrReg, page offs -- compute address = page addr + page offs.
+    code_t code = emitInsCode(INS_add, IF_DI_2A); // DI_2A  X0010001shiiiiii iiiiiinnnnnddddd   1100 0000   imm(i12, sh)
+    code |= insEncodeDatasize(EA_8BYTE);          // X - use EA_8BYTE, as we are calculating 64-bit address
+    code |= ((code_t)imm << 10);                  // iiiiiiiiiiii
+    code |= insEncodeReg_Rd(addrReg);             // ddddd
+    code |= insEncodeReg_Rn(addrReg);             // nnnnn
+    dst += emitOutput_Instr(dst, code);
 
-    inlineInstrDesc<instrDescJmp> idJmp;
-    instrDescJmp*                 pidJmp = idJmp.id();
+    // ld1 dstReg, addrReg -- load constant at address in addrReg into dstReg.
+    code = emitInsCode(INS_ld1, IF_LS_2D);  // LS_2D   .Q.............. ....ssnnnnnttttt      Vt Rn
+    code |= insEncodeVectorsize(opSize);    // Q
+    code |= insEncodeVLSElemsize(elemSize); // ss
+    code |= insEncodeReg_Rn(addrReg);       // nnnnn
+    code |= insEncodeReg_Vt(dstReg);        // ttttt
+    dst += emitOutput_Instr(dst, code);
 
-    const instruction ins = id->idIns();
-    instruction       reverseIns;
-    insFormat         reverseFmt;
+    return dst;
+}
 
-    // Reverse the conditional instruction.
-    switch (ins)
+/*****************************************************************************
+ *
+ *  Output a call instruction.
+ */
+
+unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code)
+{
+    const unsigned char callInstrSize = sizeof(code_t); // 4 bytes
+    regMaskTP           gcrefRegs;
+    regMaskTP           byrefRegs;
+
+    VARSET_TP GCvars(VarSetOps::UninitVal());
+
+    // Is this a "fat" call descriptor?
+    if (id->idIsLargeCall())
     {
-        case INS_cbz:
-            reverseIns = INS_cbnz;
-            reverseFmt = IF_BI_1A;
-            break;
-        case INS_cbnz:
-            reverseIns = INS_cbz;
-            reverseFmt = IF_BI_1A;
-            break;
-        case INS_tbz:
-            reverseIns = INS_tbnz;
-            reverseFmt = IF_BI_1B;
-            break;
-        case INS_tbnz:
-            reverseIns = INS_tbz;
-            reverseFmt = IF_BI_1B;
-            break;
-        default:
-            reverseIns = emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(ins)));
-            reverseFmt = IF_BI_0B;
+        instrDescCGCA* idCall = (instrDescCGCA*)id;
+        gcrefRegs             = idCall->idcGcrefRegs;
+        byrefRegs             = idCall->idcByrefRegs;
+        VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
     }
+    else
+    {
+        assert(!id->idIsLargeDsp());
+        assert(!id->idIsLargeCns());
 
-    pidJmp->idIns(reverseIns);
-    pidJmp->idInsFmt(reverseFmt);
-    pidJmp->idOpSize(id->idOpSize());
-    pidJmp->idAddr()->iiaSetInstrCount(1);
-    pidJmp->idDebugOnlyInfo(id->idDebugOnlyInfo()); // Share the idDebugOnlyInfo() field.
+        gcrefRegs = emitDecodeCallGCregs(id);
+        byrefRegs = 0;
+        VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+    }
 
-    const size_t bcondSizeOrZero = (pCode == NULL) ? 0 : 4; // Branch is 4 bytes.
-    emitDispInsHelp(pidJmp, false, doffs, asmfm, offset, pCode, bcondSizeOrZero,
-                    NULL /* force display of pc-relative branch */);
+    /* We update the GC info before the call as the variables cannot be
+        used by the call. Killing variables before the call helps with
+        boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
+        If we ever track aliased variables (which could be used by the
+        call), we would have to keep them alive past the call. */
 
-    pCode += bcondSizeOrZero;
-    offset += 4;
+    emitUpdateLiveGCvars(GCvars, dst);
 
-    // Next, display the unconditional branch.
+#ifdef DEBUG
+    // Output any delta in GC variable info, corresponding to the before-call GC var updates done above.
+    if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC)
+    {
+        emitDispGCVarDelta();
+    }
+#endif // DEBUG
 
-    // Reset the local instrDesc.
-    memset(pidJmp, 0, sizeof(instrDescJmp));
+    // Now output the call instruction and update the 'dst' pointer
+    //
+    unsigned outputInstrSize = emitOutput_Instr(dst, code);
+    dst += outputInstrSize;
 
-    pidJmp->idIns(INS_b);
-    pidJmp->idInsFmt(IF_LARGEJMP);
+    // All call instructions are 4-byte in size on ARM64
+    //
+    assert(outputInstrSize == callInstrSize);
+
+    // If the method returns a GC ref, mark INTRET (R0) appropriately.
+    if (id->idGCref() == GCT_GCREF)
+    {
+        gcrefRegs |= RBM_INTRET;
+    }
+    else if (id->idGCref() == GCT_BYREF)
+    {
+        byrefRegs |= RBM_INTRET;
+    }
+
+    // If is a multi-register return method is called, mark INTRET_1 (X1) appropriately
+    if (id->idIsLargeCall())
+    {
+        instrDescCGCA* idCall = (instrDescCGCA*)id;
+        if (idCall->idSecondGCref() == GCT_GCREF)
+        {
+            gcrefRegs |= RBM_INTRET_1;
+        }
+        else if (idCall->idSecondGCref() == GCT_BYREF)
+        {
+            byrefRegs |= RBM_INTRET_1;
+        }
+    }
 
-    if (id->idIsBound())
+    // If the GC register set has changed, report the new set.
+    if (gcrefRegs != emitThisGCrefRegs)
     {
-        pidJmp->idSetIsBound();
-        pidJmp->idAddr()->iiaIGlabel = id->idAddr()->iiaIGlabel;
+        emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
     }
-    else
+    // If the Byref register set has changed, report the new set.
+    if (byrefRegs != emitThisByrefRegs)
     {
-        pidJmp->idAddr()->iiaBBlabel = id->idAddr()->iiaBBlabel;
+        emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
     }
 
-    pidJmp->idDebugOnlyInfo(id->idDebugOnlyInfo()); // Share the idDebugOnlyInfo() field.
+    // Some helper calls may be marked as not requiring GC info to be recorded.
+    if ((!id->idIsNoGC()))
+    {
+        // On ARM64, as on AMD64, we don't change the stack pointer to push/pop args.
+        // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism
+        // to record the call for GC info purposes.  (It might be best to use an alternate call,
+        // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.)
+        emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0);
 
-    const size_t brSizeOrZero = (pCode == NULL) ? 0 : 4; // Unconditional branch is 4 bytes.
-    emitDispInsHelp(pidJmp, isNew, doffs, asmfm, offset, pCode, brSizeOrZero, ig);
+        // Do we need to record a call location for GC purposes?
+        //
+        if (!emitFullGCinfo)
+        {
+            emitRecordGCcall(dst, callInstrSize);
+        }
+    }
+    return callInstrSize;
 }
 
 /*****************************************************************************
  *
- *  Wrapper for emitter::emitDispInsHelp() that handles special large jump
- *  pseudo-instruction.
+ *  Emit a 32-bit Arm64 instruction
  */
 
-void emitter::emitDispIns(
-    instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig)
+unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code)
 {
-    // Special case: IF_LARGEJMP
+    assert(sizeof(code_t) == 4);
+    BYTE* dstRW       = dst + writeableOffset;
+    *((code_t*)dstRW) = code;
 
-    if ((id->idInsFmt() == IF_LARGEJMP) && id->idIsBound())
-    {
-        // This is a pseudo-instruction format representing a large conditional branch. See the comment
-        // in emitter::emitOutputLJ() for the full description.
-        //
-        // For this pseudo-instruction, we will actually generate:
-        //
-        //      b<!cond> L_not  // 4 bytes. Note that we reverse the condition.
-        //      b L_target      // 4 bytes.
-        //   L_not:
-        //
-        // These instructions don't exist in the actual instruction stream, so we need to fake them
-        // up to display them.
-        emitDispLargeJmp(id, isNew, doffs, asmfm, offset, pCode, sz, ig);
-    }
-    else
-    {
-        emitDispInsHelp(id, isNew, doffs, asmfm, offset, pCode, sz, ig);
-    }
+    return sizeof(code_t);
 }
 
-//--------------------------------------------------------------------
-// emitDispInsHelp: Dump the given instruction to jitstdout.
-//
-// Arguments:
-//   id - The instruction
-//   isNew - Whether the instruction is newly generated (before encoding).
-//   doffs - If true, always display the passed-in offset.
-//   asmfm - Whether the instruction should be displayed in assembly format.
-//           If false some additional information may be printed for the instruction.
-//   offset - The offset of the instruction. Only displayed if doffs is true or if
-//            !isNew && !asmfm.
-//   code - Pointer to the actual code, used for displaying the address and encoded bytes
-//          if turned on.
-//   sz - The size of the instruction, used to display the encoded bytes.
-//   ig - The instruction group containing the instruction.
-//
-void emitter::emitDispInsHelp(
-    instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig)
+/*****************************************************************************
+ *
+ *  Append the machine code corresponding to the given instruction descriptor
+ *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
+ *  is the instruction group that contains the instruction. Updates '*dp' to
+ *  point past the generated code, and returns the size of the instruction
+ *  descriptor in bytes.
+ */
+
+size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 {
-#ifdef DEBUG
-    if (EMITVERBOSE)
-    {
-        unsigned idNum =
-            id->idDebugOnlyInfo()->idNum; // Do not remove this!  It is needed for VisualStudio conditional breakpoints
+    BYTE*       dst  = *dp;
+    BYTE*       odst = dst;
+    code_t      code = 0;
+    size_t      sz   = emitGetInstrDescSize(id); // TODO-ARM64-Cleanup: on ARM, this is set in each case. why?
+    instruction ins  = id->idIns();
+    insFormat   fmt  = id->idInsFmt();
+    emitAttr    size = id->idOpSize();
 
-        printf("IN%04x: ", idNum);
-    }
+#ifdef DEBUG
+#if DUMP_GC_TABLES
+    bool dspOffs = emitComp->opts.dspGCtbls;
+#else
+    bool dspOffs = !emitComp->opts.disDiffable;
 #endif
+#endif // DEBUG
 
-    if (pCode == NULL)
-    {
-        sz = 0;
-    }
+    assert(REG_NA == (int)REG_NA);
 
-    if (!isNew && !asmfm && sz)
+    /* What instruction format have we got? */
+
+    switch (fmt)
     {
-        doffs = true;
-    }
+        ssize_t  imm;
+        ssize_t  index;
+        ssize_t  index2;
+        unsigned cmode;
+        unsigned immShift;
+        emitAttr elemsize;
+        emitAttr datasize;
 
-    /* Display the instruction address */
+        case IF_BI_0A: // BI_0A   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
+        case IF_BI_0B: // BI_0B   ......iiiiiiiiii iiiiiiiiiii.....               simm19:00
+        case IF_LARGEJMP:
+            assert(id->idGCref() == GCT_NONE);
+            assert(id->idIsBound());
+            dst = emitOutputLJ(ig, dst, id);
+            sz  = sizeof(instrDescJmp);
+            break;
 
-    emitDispInsAddr(pCode);
+        case IF_BI_0C: // BI_0C   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
+            code = emitInsCode(ins, fmt);
+            sz   = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc);
+            dst += emitOutputCall(ig, dst, id, code);
+            // Always call RecordRelocation so that we wire in a JumpStub when we don't reach
+            emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_BRANCH26);
+            break;
 
-    /* Display the instruction offset */
+        case IF_BI_1A: // BI_1A   ......iiiiiiiiii iiiiiiiiiiittttt      Rt       simm19:00
+            assert(insOptsNone(id->idInsOpt()));
+            assert(id->idIsBound());
 
-    emitDispInsOffs(offset, doffs);
+            dst = emitOutputLJ(ig, dst, id);
+            sz  = sizeof(instrDescJmp);
+            break;
 
-    BYTE* pCodeRW = nullptr;
-    if (pCode != nullptr)
-    {
-        /* Display the instruction hex code */
-        assert(((pCode >= emitCodeBlock) && (pCode < emitCodeBlock + emitTotalHotCodeSize)) ||
-               ((pCode >= emitColdCodeBlock) && (pCode < emitColdCodeBlock + emitTotalColdCodeSize)));
+        case IF_BI_1B: // BI_1B   B.......bbbbbiii iiiiiiiiiiittttt      Rt imm6, simm14:00
+            assert(insOptsNone(id->idInsOpt()));
+            assert(id->idIsBound());
 
-        pCodeRW = pCode + writeableOffset;
-    }
+            dst = emitOutputLJ(ig, dst, id);
+            sz  = sizeof(instrDescJmp);
+            break;
 
-    emitDispInsHex(id, pCodeRW, sz);
+        case IF_BR_1A: // BR_1A   ................ ......nnnnn.....         Rn
+            assert(insOptsNone(id->idInsOpt()));
+            assert((ins == INS_ret) || (ins == INS_br));
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
 
-    printf("      ");
+            dst += emitOutput_Instr(dst, code);
+            break;
 
-    /* Get the instruction and format */
+        case IF_BR_1B: // BR_1B   ................ ......nnnnn.....         Rn
+            assert(insOptsNone(id->idInsOpt()));
+            assert((ins == INS_br_tail) || (ins == INS_blr));
+            code = emitInsCode(ins, fmt);
+
+            if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && id->idIsTlsGD())
+            {
+                emitRecordRelocation(odst, (CORINFO_METHOD_HANDLE)id->idAddr()->iiaAddr,
+                                     IMAGE_REL_AARCH64_TLSDESC_CALL);
+                code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
+            }
+            else
+            {
+                code |= insEncodeReg_Rn(id->idReg3()); // nnnnn
+            }
+            dst += emitOutputCall(ig, dst, id, code);
+            sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc);
+            break;
+
+        case IF_LS_1A: // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt    PC imm(1MB)
+        case IF_LARGELDC:
+            assert(insOptsNone(id->idInsOpt()));
+            assert(id->idIsBound());
+
+            dst = emitOutputLJ(ig, dst, id);
+            sz  = sizeof(instrDescJmp);
+            break;
+
+        case IF_LS_2A: // LS_2A   .X.......X...... ......nnnnnttttt      Rt Rn
+            assert(insOptsNone(id->idInsOpt()));
+            code = emitInsCode(ins, fmt);
+            // Is the target a vector register?
+            if (isVectorRegister(id->idReg1()))
+            {
+                code &= 0x3FFFFFFF;                                 // clear the size bits
+                code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
+                code |= insEncodeReg_Vt(id->idReg1());              // ttttt
+            }
+            else
+            {
+                code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
+                code |= insEncodeReg_Rt(id->idReg1());             // ttttt
+            }
+            code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            if (id->idIsTlsGD())
+            {
+                emitRecordRelocation(odst, (void*)emitGetInsSC(id), IMAGE_REL_AARCH64_TLSDESC_LD64_LO12);
+            }
+            break;
+
+        case IF_LS_2B: // LS_2B   .X.......Xiiiiii iiiiiinnnnnttttt      Rt Rn    imm(0-4095)
+            assert(insOptsNone(id->idInsOpt()));
+            imm = emitGetInsSC(id);
+            assert(isValidUimm<12>(imm));
+            code = emitInsCode(ins, fmt);
+            // Is the target a vector register?
+            if (isVectorRegister(id->idReg1()))
+            {
+                code &= 0x3FFFFFFF;                                 // clear the size bits
+                code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
+                code |= insEncodeReg_Vt(id->idReg1());              // ttttt
+            }
+            else
+            {
+                code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
+                code |= insEncodeReg_Rt(id->idReg1());             // ttttt
+            }
+            code |= ((code_t)imm << 10);           // iiiiiiiiiiii
+            code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_LS_2C: // LS_2C   .X.......X.iiiii iiiiPPnnnnnttttt      Rt Rn    imm(-256..+255) no/pre/post inc
+            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+            imm = emitGetInsSC(id);
+            assert((imm >= -256) && (imm <= 255)); // signed 9 bits
+            imm &= 0x1ff;                          // force into unsigned 9 bit representation
+            code = emitInsCode(ins, fmt);
+            // Is the target a vector register?
+            if (isVectorRegister(id->idReg1()))
+            {
+                code &= 0x3FFFFFFF;                                 // clear the size bits
+                code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
+                code |= insEncodeReg_Vt(id->idReg1());              // ttttt
+            }
+            else
+            {
+                code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
+                code |= insEncodeReg_Rt(id->idReg1());             // ttttt
+            }
+            code |= insEncodeIndexedOpt(id->idInsOpt()); // PP
+            code |= ((code_t)imm << 12);                 // iiiiiiiii
+            code |= insEncodeReg_Rn(id->idReg2());       // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
 
-    instruction ins = id->idIns();
-    insFormat   fmt = id->idInsFmt();
+        case IF_LS_2D: // LS_2D   .Q.............. ....ssnnnnnttttt      Vt Rn
+        case IF_LS_2E: // LS_2E   .Q.............. ....ssnnnnnttttt      Vt Rn
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
 
-    emitDispInst(ins);
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeVLSElemsize(elemsize);      // ss
+            code |= insEncodeReg_Rn(id->idReg2());       // nnnnn
+            code |= insEncodeReg_Vt(id->idReg1());       // ttttt
 
-    /* If this instruction has just been added, check its size */
+            dst += emitOutput_Instr(dst, code);
+            break;
 
-    assert(isNew == false || (int)emitSizeOfInsDsc(id) == emitCurIGfreeNext - (BYTE*)id);
+        case IF_LS_2F: // LS_2F   .Q.............. xx.Sssnnnnnttttt      Vt[] Rn
+        case IF_LS_2G: // LS_2G   .Q.............. xx.Sssnnnnnttttt      Vt[] Rn
+            elemsize = id->idOpSize();
+            index    = id->idSmallCns();
+            code     = emitInsCode(ins, fmt);
 
-    /* Figure out the operand size */
-    emitAttr size = id->idOpSize();
-    emitAttr attr = size;
-    if (id->idGCref() == GCT_GCREF)
-        attr = EA_GCREF;
-    else if (id->idGCref() == GCT_BYREF)
-        attr = EA_BYREF;
+            code |= insEncodeVLSIndex(elemsize, index); // Q xx S ss
+            code |= insEncodeReg_Rn(id->idReg2());      // nnnnn
+            code |= insEncodeReg_Vt(id->idReg1());      // ttttt
 
-    switch (fmt)
-    {
-        ssize_t      imm;
-        int          doffs;
-        bitMaskImm   bmi;
-        halfwordImm  hwi;
-        condFlagsImm cfi;
-        unsigned     scale;
-        unsigned     immShift;
-        bool         hasShift;
-        const char*  methodName;
-        emitAttr     elemsize;
-        emitAttr     datasize;
-        emitAttr     srcsize;
-        emitAttr     dstsize;
-        ssize_t      index;
-        ssize_t      index2;
-        unsigned     registerListSize;
-        const char*  targetName;
+            dst += emitOutput_Instr(dst, code);
+            break;
 
-        case IF_BI_0A: // BI_0A   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
-        case IF_BI_0B: // BI_0B   ......iiiiiiiiii iiiiiiiiiii.....               simm19:00
-        case IF_LARGEJMP:
-        {
-            if (fmt == IF_LARGEJMP)
+        case IF_LS_3A: // LS_3A   .X.......X.mmmmm oooS..nnnnnttttt      Rt Rn Rm ext(Rm) LSL {}
+            assert(insOptsLSExtend(id->idInsOpt()));
+            code = emitInsCode(ins, fmt);
+            // Is the target a vector register?
+            if (isVectorRegister(id->idReg1()))
             {
-                printf("(LARGEJMP)");
+                code &= 0x3FFFFFFF;                                 // clear the size bits
+                code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
+                code |= insEncodeReg_Vt(id->idReg1());              // ttttt
             }
-            if (id->idAddr()->iiaHasInstrCount())
+            else
             {
-                int instrCount = id->idAddr()->iiaGetInstrCount();
-
-                if (ig == nullptr)
-                {
-                    printf("pc%s%d instructions", (instrCount >= 0) ? "+" : "", instrCount);
-                }
-                else
-                {
-                    unsigned       insNum  = emitFindInsNum(ig, id);
-                    UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1);
-                    UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount);
-                    ssize_t        relOffs = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
-                    printf("pc%s%d (%d instructions)", (relOffs >= 0) ? "+" : "", (int)relOffs, (int)instrCount);
-                }
+                code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
+                code |= insEncodeReg_Rt(id->idReg1());             // ttttt
             }
-            else if (id->idIsBound())
+            code |= insEncodeExtend(id->idInsOpt()); // ooo
+            code |= insEncodeReg_Rn(id->idReg2());   // nnnnn
+            if (id->idIsLclVar())
             {
-                emitPrintLabel(id->idAddr()->iiaIGlabel);
+                code |= insEncodeReg_Rm(codeGen->rsGetRsvdReg()); // mmmmm
             }
             else
             {
-                printf("L_M%03u_" FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum);
+                code |= insEncodeReg3Scale(id->idReg3Scaled()); // S
+                code |= insEncodeReg_Rm(id->idReg3());          // mmmmm
             }
-        }
-        break;
-
-        case IF_BI_0C: // BI_0C   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
-            methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
-            printf("%s", methodName);
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_BI_1A: // BI_1A   ......iiiiiiiiii iiiiiiiiiiittttt      Rt       simm19:00
-        case IF_BI_1B: // BI_1B   B.......bbbbbiii iiiiiiiiiiittttt      Rt imm6, simm14:00
-        {
+        case IF_LS_3B: // LS_3B   X............... .aaaaannnnnddddd      Rd Ra Rn
             assert(insOptsNone(id->idInsOpt()));
-            emitDispReg(id->idReg1(), size, true);
-
-            if (fmt == IF_BI_1B)
+            code = emitInsCode(ins, fmt);
+            // Is the target a vector register?
+            if (isVectorRegister(id->idReg1()))
             {
-                emitDispImm(emitGetInsSC(id), true);
+                code &= 0x3FFFFFFF;                                  // clear the size bits
+                code |= insEncodeDatasizeVPLS(code, id->idOpSize()); // XX
+                code |= insEncodeReg_Vt(id->idReg1());               // ttttt
+                code |= insEncodeReg_Va(id->idReg2());               // aaaaa
+            }
+            else
+            {
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rt(id->idReg1());     // ttttt
+                code |= insEncodeReg_Ra(id->idReg2());     // aaaaa
             }
+            code |= insEncodeReg_Rn(id->idReg3()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
 
-            if (id->idAddr()->iiaHasInstrCount())
+        case IF_LS_3C: // LS_3C   X......PP.iiiiii iaaaaannnnnddddd      Rd Ra Rn imm(im7,sh)
+            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+            imm = emitGetInsSC(id);
+            assert((imm >= -64) && (imm <= 63)); // signed 7 bits
+            imm &= 0x7f;                         // force into unsigned 7 bit representation
+            code = emitInsCode(ins, fmt);
+            // Is the target a vector register?
+            if (isVectorRegister(id->idReg1()))
             {
-                int instrCount = id->idAddr()->iiaGetInstrCount();
+                code &= 0x3FFFFFFF;                                  // clear the size bits
+                code |= insEncodeDatasizeVPLS(code, id->idOpSize()); // XX
+                code |= insEncodeReg_Vt(id->idReg1());               // ttttt
+                code |= insEncodeReg_Va(id->idReg2());               // aaaaa
+            }
+            else
+            {
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rt(id->idReg1());     // ttttt
+                code |= insEncodeReg_Ra(id->idReg2());     // aaaaa
+            }
+            code |= insEncodePairIndexedOpt(ins, id->idInsOpt()); // PP
+            code |= ((code_t)imm << 15);                          // iiiiiiiii
+            code |= insEncodeReg_Rn(id->idReg3());                // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
 
-                if (ig == nullptr)
+        case IF_LS_3D: // LS_3D   .X.......X.mmmmm ......nnnnnttttt      Wm Rt Rn
+            code = emitInsCode(ins, fmt);
+            // Arm64 store exclusive unpredictable cases
+            assert(id->idReg1() != id->idReg2());
+            assert(id->idReg1() != id->idReg3());
+            code |= insEncodeDatasizeLS(code, id->idOpSize()); // X
+            code |= insEncodeReg_Rm(id->idReg1());             // mmmmm
+            code |= insEncodeReg_Rt(id->idReg2());             // ttttt
+            code |= insEncodeReg_Rn(id->idReg3());             // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_LS_3E: // LS_3E   .X.........mmmmm ......nnnnnttttt      Rm Rt Rn ARMv8.1 LSE Atomics
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasizeLS(code, id->idOpSize()); // X
+            code |= insEncodeReg_Rm(id->idReg1());             // mmmmm
+            code |= insEncodeReg_Rt(id->idReg2());             // ttttt
+            code |= insEncodeReg_Rn(id->idReg3());             // nnnnn
+            dst += emitOutput_Instr(dst, code);
+
+            // Some instructions with this encoding return their result in the
+            // second operand register instead of the first so we special case
+            // the GC update here and skip the common path down below.
+            if (emitInsDestIsOp2(ins))
+            {
+                if (id->idGCref() != GCT_NONE)
                 {
-                    printf("pc%s%d instructions", (instrCount >= 0) ? "+" : "", instrCount);
+                    emitGCregLiveUpd(id->idGCref(), id->idReg2(), dst);
                 }
                 else
                 {
-                    unsigned       insNum  = emitFindInsNum(ig, id);
-                    UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1);
-                    UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount);
-                    ssize_t        relOffs = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
-                    printf("pc%s%d (%d instructions)", (relOffs >= 0) ? "+" : "", (int)relOffs, (int)instrCount);
+                    emitGCregDeadUpd(id->idReg2(), dst);
                 }
+
+                goto SKIP_GC_UPDATE;
             }
-            else if (id->idIsBound())
-            {
-                emitPrintLabel(id->idAddr()->iiaIGlabel);
-            }
-            else
-            {
-                printf("L_M%03u_" FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum);
-            }
-        }
-        break;
 
-        case IF_BR_1A: // BR_1A   ................ ......nnnnn.....         Rn
-            assert(insOptsNone(id->idInsOpt()));
-            emitDispReg(id->idReg1(), size, false);
             break;
 
-        case IF_BR_1B: // BR_1B   ................ ......nnnnn.....         Rn
-            // The size of a branch target is always EA_PTRSIZE
-            assert(insOptsNone(id->idInsOpt()));
-            emitDispReg(id->idReg3(), EA_PTRSIZE, false);
+        case IF_LS_3F: // LS_3F   .Q.........mmmmm ....ssnnnnnttttt      Vt Rn Rm
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
+
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeReg_Rm(id->idReg3());       // mmmmm
+            code |= insEncodeVLSElemsize(elemsize);      // ss
+            code |= insEncodeReg_Rn(id->idReg2());       // nnnnn
+            code |= insEncodeReg_Vt(id->idReg1());       // ttttt
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_LS_3G: // LS_3G   .Q.........mmmmm ...Sssnnnnnttttt      Vt[] Rn Rm
+            elemsize = id->idOpSize();
+            index    = id->idSmallCns();
+            code     = emitInsCode(ins, fmt);
+
+            code |= insEncodeVLSIndex(elemsize, index); // Q xx S ss
+            code |= insEncodeReg_Rm(id->idReg3());      // mmmmm
+            code |= insEncodeReg_Rn(id->idReg2());      // nnnnn
+            code |= insEncodeReg_Vt(id->idReg1());      // ttttt
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DI_1A: // DI_1A   X.......shiiiiii iiiiiinnnnn.....         Rn    imm(i12,sh)
+            assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
+            imm = emitGetInsSC(id);
+            assert(isValidUimm<12>(imm));
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize());   // X
+            code |= insEncodeShiftImm12(id->idInsOpt()); // sh
+            code |= ((code_t)imm << 10);                 // iiiiiiiiiiii
+            code |= insEncodeReg_Rn(id->idReg1());       // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DI_1B: // DI_1B   X........hwiiiii iiiiiiiiiiiddddd      Rd       imm(i16,hw)
+            imm = emitGetInsSC(id);
+            assert(isValidImmHWVal(imm, id->idOpSize()));
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= ((code_t)imm << 5);                // hwiiiii iiiiiiiiiii
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DI_1C: // DI_1C   X........Nrrrrrr ssssssnnnnn.....         Rn    imm(N,r,s)
+            imm = emitGetInsSC(id);
+            assert(isValidImmNRS(imm, id->idOpSize()));
+            code = emitInsCode(ins, fmt);
+            code |= ((code_t)imm << 10);               // Nrrrrrrssssss
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DI_1D: // DI_1D   X........Nrrrrrr ssssss.....ddddd      Rd       imm(N,r,s)
+            imm = emitGetInsSC(id);
+            assert(isValidImmNRS(imm, id->idOpSize()));
+            code = emitInsCode(ins, fmt);
+            code |= ((code_t)imm << 10);               // Nrrrrrrssssss
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_LS_1A: // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt    PC imm(1MB)
         case IF_DI_1E: // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd       simm21
-        case IF_LARGELDC:
         case IF_LARGEADR:
             assert(insOptsNone(id->idInsOpt()));
-            emitDispReg(id->idReg1(), size, true);
-            imm        = emitGetInsSC(id);
-            targetName = nullptr;
-
-            /* Is this actually a reference to a data section? */
-            if (fmt == IF_LARGEADR)
+            if (id->idIsReloc())
             {
-                printf("(LARGEADR)");
+                code = emitInsCode(ins, fmt);
+                code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+                dst += emitOutput_Instr(dst, code);
+                emitRecordRelocation(odst, id->idAddr()->iiaAddr,
+                                     id->idIsTlsGD() ? IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21
+                                                     : IMAGE_REL_ARM64_PAGEBASE_REL21);
             }
-            else if (fmt == IF_LARGELDC)
+            else
             {
-                printf("(LARGELDC)");
+                // Local jmp/load case which does not need a relocation.
+                assert(id->idIsBound());
+                dst = emitOutputLJ(ig, dst, id);
             }
+            sz = sizeof(instrDescJmp);
+            break;
 
-            printf("[");
-            if (id->idAddr()->iiaIsJitDataOffset())
+        case IF_DI_1F: // DI_1F   X..........iiiii cccc..nnnnn.nzcv      Rn imm5  nzcv cond
+            imm = emitGetInsSC(id);
+            assert(isValidImmCondFlagsImm5(imm));
             {
-                doffs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);
-                /* Display a data section reference */
-
-                if (doffs & 1)
-                    printf("@CNS%02u", doffs - 1);
-                else
-                    printf("@RWD%02u", doffs);
-
-                if (imm != 0)
-                    printf("%+Id", imm);
+                condFlagsImm cfi;
+                cfi.immCFVal = (unsigned)imm;
+                code         = emitInsCode(ins, fmt);
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
+                code |= ((code_t)cfi.imm5 << 16);          // iiiii
+                code |= insEncodeFlags(cfi.flags);         // nzcv
+                code |= insEncodeCond(cfi.cond);           // cccc
+                dst += emitOutput_Instr(dst, code);
             }
-            else
-            {
-                assert(imm == 0);
-                if (id->idIsReloc())
-                {
-                    printf("HIGH RELOC ");
-                    emitDispImm((ssize_t)id->idAddr()->iiaAddr, false);
-                    size_t targetHandle = id->idDebugOnlyInfo()->idMemCookie;
+            break;
 
-#ifdef DEBUG
-                    if (targetHandle == THT_InitializeArrayIntrinsics)
-                    {
-                        targetName = "InitializeArrayIntrinsics";
-                    }
-                    else if (targetHandle == THT_GSCookieCheck)
-                    {
-                        targetName = "GlobalSecurityCookieCheck";
-                    }
-                    else if (targetHandle == THT_SetGSCookie)
-                    {
-                        targetName = "SetGlobalSecurityCookie";
-                    }
-#endif
-                }
-                else if (id->idIsBound())
-                {
-                    emitPrintLabel(id->idAddr()->iiaIGlabel);
-                }
-                else
-                {
-                    printf("L_M%03u_" FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum);
-                }
-            }
-            printf("]");
-            if (targetName != nullptr)
-            {
-                printf("      // [%s]", targetName);
-            }
-            else
+        case IF_DI_2A: // DI_2A   X.......shiiiiii iiiiiinnnnnddddd      Rd Rn    imm(i12,sh)
+            assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
+            imm = emitGetInsSC(id);
+            assert(isValidUimm<12>(imm));
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize());   // X
+            code |= insEncodeShiftImm12(id->idInsOpt()); // sh
+            code |= ((code_t)imm << 10);                 // iiiiiiiiiiii
+            code |= insEncodeReg_Rd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());       // nnnnn
+            dst += emitOutput_Instr(dst, code);
+
+            if (id->idIsReloc())
             {
-                emitDispCommentForHandle(id->idDebugOnlyInfo()->idMemCookie, 0, id->idDebugOnlyInfo()->idFlags);
+                assert(sz == sizeof(instrDesc));
+                assert(id->idAddr()->iiaAddr != nullptr);
+                emitRecordRelocation(odst, id->idAddr()->iiaAddr,
+                                     id->idIsTlsGD() ? IMAGE_REL_AARCH64_TLSDESC_ADD_LO12
+                                                     : IMAGE_REL_ARM64_PAGEOFFSET_12A);
             }
             break;
 
-        case IF_LS_2A: // LS_2A   .X.......X...... ......nnnnnttttt      Rt Rn
-            assert(insOptsNone(id->idInsOpt()));
-            assert(emitGetInsSC(id) == 0);
-            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
-            emitDispAddrRI(id->idReg2(), id->idInsOpt(), 0);
-            break;
-
-        case IF_LS_2B: // LS_2B   .X.......Xiiiiii iiiiiinnnnnttttt      Rt Rn    imm(0-4095)
-            assert(insOptsNone(id->idInsOpt()));
-            imm   = emitGetInsSC(id);
-            scale = NaturalScale_helper(emitInsLoadStoreSize(id));
-            imm <<= scale; // The immediate is scaled by the size of the ld/st
-            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
-            emitDispAddrRI(id->idReg2(), id->idInsOpt(), imm);
+        case IF_DI_2B: // DI_2B   X.........Xnnnnn ssssssnnnnnddddd      Rd Rn    imm(0-63)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert(isValidImmShift(imm, id->idOpSize()));
+            code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X
+            code |= insEncodeReg_Rd(id->idReg1());             // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());             // nnnnn
+            code |= insEncodeReg_Rm(id->idReg2());             // Reg2 also in mmmmm
+            code |= insEncodeShiftCount(imm, id->idOpSize());  // ssssss
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_LS_2C: // LS_2C   .X.......X.iiiii iiiiPPnnnnnttttt      Rt Rn    imm(-256..+255) no/pre/post inc
-            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+        case IF_DI_2C: // DI_2C   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imm(N,r,s)
             imm = emitGetInsSC(id);
-            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
-            emitDispAddrRI(id->idReg2(), id->idInsOpt(), imm);
+            assert(isValidImmNRS(imm, id->idOpSize()));
+            code = emitInsCode(ins, fmt);
+            code |= ((code_t)imm << 10);               // Nrrrrrrssssss
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_LS_2D: // LS_2D   .Q.............. ....ssnnnnnttttt      Vt Rn
-        case IF_LS_2E: // LS_2E   .Q.............. ....ssnnnnnttttt      Vt Rn
-            registerListSize = insGetRegisterListSize(id->idIns());
-            emitDispVectorRegList(id->idReg1(), registerListSize, id->idInsOpt(), true);
-
-            if (fmt == IF_LS_2D)
+        case IF_DI_2D: // DI_2D   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imr, imms   (N,r,s)
+            if (ins == INS_asr || ins == INS_lsl || ins == INS_lsr)
             {
-                // Load/Store multiple structures       base register
-                // Load single structure and replicate  base register
-                emitDispAddrRI(id->idReg2(), INS_OPTS_NONE, 0);
+                imm = emitGetInsSC(id);
+                assert(isValidImmShift(imm, id->idOpSize()));
+
+                // Shift immediates are aliases of the SBFM/UBFM instructions
+                // that actually take 2 registers and 2 constants,
+                // Since we stored the shift immediate value
+                // we need to calculate the N,R and S values here.
+
+                bitMaskImm bmi;
+                bmi.immNRS = 0;
+
+                bmi.immN = (size == EA_8BYTE) ? 1 : 0;
+                bmi.immR = imm;
+                bmi.immS = (size == EA_8BYTE) ? 0x3f : 0x1f;
+
+                // immR and immS are now set correctly for INS_asr and INS_lsr
+                // but for INS_lsl we have to adjust the values for immR and immS
+                //
+                if (ins == INS_lsl)
+                {
+                    bmi.immR = -imm & bmi.immS;
+                    bmi.immS = bmi.immS - imm;
+                }
+
+                // setup imm with the proper 13 bit value N:R:S
+                //
+                imm = bmi.immNRS;
             }
             else
             {
-                // Load/Store multiple structures       post-indexed by an immediate
-                // Load single structure and replicate  post-indexed by an immediate
-                emitDispAddrRI(id->idReg2(), INS_OPTS_POST_INDEX, id->idSmallCns());
+                // The other instructions have already have encoded N,R and S values
+                imm = emitGetInsSC(id);
             }
-            break;
+            assert(isValidImmNRS(imm, id->idOpSize()));
 
-        case IF_LS_2F: // LS_2F   .Q.............. xx.Sssnnnnnttttt      Vt[] Rn
-        case IF_LS_2G: // LS_2G   .Q.............. xx.Sssnnnnnttttt      Vt[] Rn
-            registerListSize = insGetRegisterListSize(id->idIns());
-            elemsize         = id->idOpSize();
-            emitDispVectorElemList(id->idReg1(), registerListSize, elemsize, id->idSmallCns(), true);
+            code = emitInsCode(ins, fmt);
+            code |= ((code_t)imm << 10);               // Nrrrrrrssssss
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
 
-            if (fmt == IF_LS_2F)
-            {
-                // Load/Store single structure  base register
-                emitDispAddrRI(id->idReg2(), INS_OPTS_NONE, 0);
-            }
-            else
+        case IF_DR_1D: // DR_1D   X............... cccc.......ddddd      Rd       cond
+            imm = emitGetInsSC(id);
+            assert(isValidImmCond(imm));
             {
-                // Load/Store single structure  post-indexed by an immediate
-                emitDispAddrRI(id->idReg2(), INS_OPTS_POST_INDEX, (registerListSize * elemsize));
+                condFlagsImm cfi;
+                cfi.immCFVal = (unsigned)imm;
+                code         = emitInsCode(ins, fmt);
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+                code |= insEncodeInvertedCond(cfi.cond);   // cccc
+                dst += emitOutput_Instr(dst, code);
             }
             break;
 
-        case IF_LS_3A: // LS_3A   .X.......X.mmmmm oooS..nnnnnttttt      Rt Rn Rm ext(Rm) LSL {}
-            assert(insOptsLSExtend(id->idInsOpt()));
-            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
-            if (id->idIsLclVar())
-            {
-                emitDispAddrRRExt(id->idReg2(), codeGen->rsGetRsvdReg(), id->idInsOpt(), false, size);
-            }
-            else
+        case IF_DR_2A: // DR_2A   X..........mmmmm ......nnnnn.....         Rn Rm
+            assert(insOptsNone(id->idInsOpt()));
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
+            code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_2B: // DR_2B   X.......sh.mmmmm ssssssnnnnn.....         Rn Rm {LSL,LSR,ASR,ROR} imm(0-63)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert(isValidImmShift(imm, id->idOpSize()));
+            code |= insEncodeDatasize(id->idOpSize());        // X
+            code |= insEncodeShiftType(id->idInsOpt());       // sh
+            code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
+            code |= insEncodeReg_Rn(id->idReg1());            // nnnnn
+            code |= insEncodeReg_Rm(id->idReg2());            // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_2C: // DR_2C   X..........mmmmm ooosssnnnnn.....         Rn Rm ext(Rm) LSL imm(0-4)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert((imm >= 0) && (imm <= 4));          // imm [0..4]
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeExtend(id->idInsOpt());   // ooo
+            code |= insEncodeExtendScale(imm);         // sss
+            code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
+            code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_2D: // DR_2D   X..........nnnnn cccc..nnnnnddddd      Rd Rn    cond
+            imm = emitGetInsSC(id);
+            assert(isValidImmCond(imm));
             {
-                emitDispAddrRRExt(id->idReg2(), id->idReg3(), id->idInsOpt(), id->idReg3Scaled(), size);
+                condFlagsImm cfi;
+                cfi.immCFVal = (unsigned)imm;
+                code         = emitInsCode(ins, fmt);
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+                code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+                code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
+                code |= insEncodeInvertedCond(cfi.cond);   // cccc
+                dst += emitOutput_Instr(dst, code);
             }
             break;
 
-        case IF_LS_3B: // LS_3B   X............... .aaaaannnnnddddd      Rt Ra Rn
-            assert(insOptsNone(id->idInsOpt()));
-            assert(emitGetInsSC(id) == 0);
-            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
-            emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true);
-            emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0);
+        case IF_DR_2E: // DR_2E   X..........mmmmm ...........ddddd      Rd    Rm
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_LS_3C: // LS_3C   X.........iiiiii iaaaaannnnnddddd      Rt Ra Rn imm(im7,sh)
-            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
-            imm   = emitGetInsSC(id);
-            scale = NaturalScale_helper(emitInsLoadStoreSize(id));
-            imm <<= scale;
-            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
-            emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true);
-            emitDispAddrRI(id->idReg3(), id->idInsOpt(), imm);
+        case IF_DR_2F: // DR_2F   X.......sh.mmmmm ssssss.....ddddd      Rd    Rm {LSL,LSR,ASR} imm(0-63)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert(isValidImmShift(imm, id->idOpSize()));
+            code |= insEncodeDatasize(id->idOpSize());        // X
+            code |= insEncodeShiftType(id->idInsOpt());       // sh
+            code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
+            code |= insEncodeReg_Rd(id->idReg1());            // ddddd
+            code |= insEncodeReg_Rm(id->idReg2());            // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_LS_3D: // LS_3D   .X.......X.mmmmm ......nnnnnttttt      Wm Rt Rn
-            assert(insOptsNone(id->idInsOpt()));
-            emitDispReg(id->idReg1(), EA_4BYTE, true);
-            emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true);
-            emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0);
+        case IF_DR_2G: // DR_2G   X............... .....xnnnnnddddd      Rd Rn
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            if (ins == INS_rev)
+            {
+                if (size == EA_8BYTE)
+                {
+                    code |= 0x00000400; // x - bit at location 10
+                }
+            }
+            code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+            code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_LS_3E: // LS_3E   .X.........mmmmm ......nnnnnttttt      Rm Rt Rn ARMv8.1 LSE Atomics
-            assert(insOptsNone(id->idInsOpt()));
-            assert((EA_SIZE(size) == 4) || (EA_SIZE(size) == 8));
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, true);
-            emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0);
+        case IF_DR_2H: // DR_2H   X........X...... ......nnnnnddddd      Rd Rn
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X
+            code |= insEncodeReg_Rd(id->idReg1());             // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());             // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_LS_3F: // LS_3F   .Q.........mmmmm ....ssnnnnnttttt      Vt Rn Rm
-        case IF_LS_3G: // LS_3G   .Q.........mmmmm ...Sssnnnnnttttt      Vt[] Rn Rm
-            registerListSize = insGetRegisterListSize(id->idIns());
+        case IF_DR_2I: // DR_2I   X..........mmmmm cccc..nnnnn.nzcv      Rn Rm    nzcv cond
+            imm = emitGetInsSC(id);
+            assert(isValidImmCondFlags(imm));
+            {
+                condFlagsImm cfi;
+                cfi.immCFVal = (unsigned)imm;
+                code         = emitInsCode(ins, fmt);
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
+                code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
+                code |= insEncodeFlags(cfi.flags);         // nzcv
+                code |= insEncodeCond(cfi.cond);           // cccc
+                dst += emitOutput_Instr(dst, code);
+            }
+            break;
 
-            if (fmt == IF_LS_3F)
+        case IF_DR_3A: // DR_3A   X..........mmmmm ......nnnnnmmmmm      Rd Rn Rm
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+            if (id->idIsLclVar())
             {
-                // Load/Store multiple structures       post-indexed by a register
-                // Load single structure and replicate  post-indexed by a register
-                emitDispVectorRegList(id->idReg1(), registerListSize, id->idInsOpt(), true);
+                code |= insEncodeReg_Rm(codeGen->rsGetRsvdReg()); // mmmmm
             }
             else
             {
-                // Load/Store single structure          post-indexed by a register
-                elemsize = id->idOpSize();
-                emitDispVectorElemList(id->idReg1(), registerListSize, elemsize, id->idSmallCns(), true);
+                code |= insEncodeReg_Rm(id->idReg3()); // mmmmm
             }
+            dst += emitOutput_Instr(dst, code);
+            break;
 
-            printf("[");
-            emitDispReg(encodingZRtoSP(id->idReg2()), EA_8BYTE, false);
-            printf("], ");
-            emitDispReg(id->idReg3(), EA_8BYTE, false);
+        case IF_DR_3B: // DR_3B   X.......sh.mmmmm ssssssnnnnnddddd      Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert(isValidImmShift(imm, id->idOpSize()));
+            code |= insEncodeDatasize(id->idOpSize());        // X
+            code |= insEncodeReg_Rd(id->idReg1());            // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());            // nnnnn
+            code |= insEncodeReg_Rm(id->idReg3());            // mmmmm
+            code |= insEncodeShiftType(id->idInsOpt());       // sh
+            code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DI_1A: // DI_1A   X.......shiiiiii iiiiiinnnnn.....      Rn       imm(i12,sh)
-            emitDispReg(id->idReg1(), size, true);
-            emitDispImmOptsLSL(emitGetInsSC(id), insOptsLSL12(id->idInsOpt()), 12);
-            emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags);
+        case IF_DR_3C: // DR_3C   X..........mmmmm ooosssnnnnnddddd      Rd Rn Rm ext(Rm) LSL imm(0-4)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert((imm >= 0) && (imm <= 4));          // imm [0..4]
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeExtend(id->idInsOpt());   // ooo
+            code |= insEncodeExtendScale(imm);         // sss
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+            code |= insEncodeReg_Rm(id->idReg3());     // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DI_1B: // DI_1B   X........hwiiiii iiiiiiiiiiiddddd      Rd       imm(i16,hw)
-            emitDispReg(id->idReg1(), size, true);
-            hwi.immHWVal = (unsigned)emitGetInsSC(id);
-            if (ins == INS_mov)
-            {
-                emitDispImm(emitDecodeHalfwordImm(hwi, size), false);
-            }
-            else // movz, movn, movk
+        case IF_DR_3D: // DR_3D   X..........mmmmm cccc..nnnnnddddd      Rd Rn Rm cond
+            imm = emitGetInsSC(id);
+            assert(isValidImmCond(imm));
             {
-                emitDispImm(hwi.immVal, false);
-                if (hwi.immHW != 0)
-                {
-                    emitDispShiftOpts(INS_OPTS_LSL);
-                    emitDispImm(hwi.immHW * 16, false);
-                }
+                condFlagsImm cfi;
+                cfi.immCFVal = (unsigned)imm;
+                code         = emitInsCode(ins, fmt);
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+                code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+                code |= insEncodeReg_Rm(id->idReg3());     // mmmmm
+                code |= insEncodeCond(cfi.cond);           // cccc
+                dst += emitOutput_Instr(dst, code);
             }
-            emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags);
             break;
 
-        case IF_DI_1C: // DI_1C   X........Nrrrrrr ssssssnnnnn.....         Rn    imm(N,r,s)
-            emitDispReg(id->idReg1(), size, true);
-            bmi.immNRS = (unsigned)emitGetInsSC(id);
-            emitDispImm(emitDecodeBitMaskImm(bmi, size), false);
-            emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags);
+        case IF_DR_3E: // DR_3E   X........X.mmmmm ssssssnnnnnddddd      Rd Rn Rm imm(0-63)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert(isValidImmShift(imm, id->idOpSize()));
+            code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X
+            code |= insEncodeReg_Rd(id->idReg1());             // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());             // nnnnn
+            code |= insEncodeReg_Rm(id->idReg3());             // mmmmm
+            code |= insEncodeShiftCount(imm, id->idOpSize());  // ssssss
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DI_1D: // DI_1D   X........Nrrrrrr ssssss.....ddddd      Rd       imm(N,r,s)
-            emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
-            bmi.immNRS = (unsigned)emitGetInsSC(id);
-            emitDispImm(emitDecodeBitMaskImm(bmi, size), false);
-            emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags);
+        case IF_DR_4A: // DR_4A   X..........mmmmm .aaaaannnnnmmmmm      Rd Rn Rm Ra
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+            code |= insEncodeReg_Rm(id->idReg3());     // mmmmm
+            code |= insEncodeReg_Ra(id->idReg4());     // aaaaa
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DI_2A: // DI_2A   X.......shiiiiii iiiiiinnnnnddddd      Rd Rn    imm(i12,sh)
-            if ((ins == INS_add) || (ins == INS_sub))
-            {
-                emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
-                emitDispReg(encodingZRtoSP(id->idReg2()), size, true);
-            }
-            else
-            {
-                emitDispReg(id->idReg1(), size, true);
-                emitDispReg(id->idReg2(), size, true);
+        case IF_DV_1A: // DV_1A   .........X.iiiii iii........ddddd      Vd imm8    (fmov - immediate scalar)
+            imm      = emitGetInsSC(id);
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeFloatElemsize(elemsize); // X
+            code |= ((code_t)imm << 13);              // iiiii iii
+            code |= insEncodeReg_Vd(id->idReg1());    // ddddd
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_1B: // DV_1B   .QX..........iii cmod..iiiiiddddd      Vd imm8    (immediate vector)
+            imm      = emitGetInsSC(id) & 0x0ff;
+            immShift = (emitGetInsSC(id) & 0x700) >> 8;
+            elemsize = optGetElemsize(id->idInsOpt());
+            cmode    = 0;
+            switch (elemsize)
+            { // cmode
+                case EA_1BYTE:
+                    cmode = 0xE; // 1110
+                    break;
+                case EA_2BYTE:
+                    cmode = 0x8;
+                    cmode |= (immShift << 1); // 10x0
+                    break;
+                case EA_4BYTE:
+                    if (immShift < 4)
+                    {
+                        cmode = 0x0;
+                        cmode |= (immShift << 1); // 0xx0
+                    }
+                    else // MSL
+                    {
+                        cmode = 0xC;
+                        if (immShift & 2)
+                            cmode |= 1; // 110x
+                    }
+                    break;
+                case EA_8BYTE:
+                    cmode = 0xE; // 1110
+                    break;
+                default:
+                    unreached();
+                    break;
             }
-            if (id->idIsReloc())
+
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            if ((ins == INS_fmov) || (ins == INS_movi))
             {
-                assert(ins == INS_add);
-                printf("[LOW RELOC ");
-                emitDispImm((ssize_t)id->idAddr()->iiaAddr, false);
-                printf("]");
+                if (elemsize == EA_8BYTE)
+                {
+                    code |= 0x20000000; // X
+                }
             }
-            else
+            if (ins != INS_fmov)
             {
-                emitDispImmOptsLSL(emitGetInsSC(id), insOptsLSL12(id->idInsOpt()), 12);
+                assert((cmode >= 0) && (cmode <= 0xF));
+                code |= (cmode << 12); // cmod
             }
+            code |= (((code_t)imm >> 5) << 16);    // iii
+            code |= (((code_t)imm & 0x1f) << 5);   // iiiii
+            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DI_2B: // DI_2B   X........X.nnnnn ssssssnnnnnddddd      Rd Rn    imm(0-63)
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, true);
-            emitDispImm(emitGetInsSC(id), false);
+        case IF_DV_1C: // DV_1C   .........X...... ......nnnnn.....      Vn #0.0    (fcmp - with zero)
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeFloatElemsize(elemsize); // X
+            code |= insEncodeReg_Vn(id->idReg1());    // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DI_2C: // DI_2C   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imm(N,r,s)
-            if (ins == INS_ands)
+        case IF_DV_2A: // DV_2A   .Q.......X...... ......nnnnnddddd      Vd Vn      (fabs, fcvt - vector)
+        case IF_DV_2R: // DV_2R   .Q.......X...... ......nnnnnddddd      Sd Vn      (fmaxnmv, fmaxv, fminnmv, fminv)
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            if ((ins == INS_fcvtl) || (ins == INS_fcvtl2) || (ins == INS_fcvtn) || (ins == INS_fcvtn2))
             {
-                emitDispReg(id->idReg1(), size, true);
+                // fcvtl{2} and fcvtn{2} encode the element size as
+                //   esize = 16 << UInt(sz)
+                if (elemsize == EA_4BYTE)
+                {
+                    code |= 0x00400000; // X
+                }
+                else
+                {
+                    assert(elemsize == EA_2BYTE);
+                }
             }
             else
             {
-                emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+                code |= insEncodeFloatElemsize(elemsize); // X
             }
-            emitDispReg(id->idReg2(), size, true);
-            bmi.immNRS = (unsigned)emitGetInsSC(id);
-            emitDispImm(emitDecodeBitMaskImm(bmi, size), false);
+            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DI_2D: // DI_2D   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imr, ims   (N,r,s)
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, true);
-
-            imm        = emitGetInsSC(id);
-            bmi.immNRS = (unsigned)imm;
-
-            switch (ins)
+        case IF_DV_2B: // DV_2B   .Q.........iiiii ......nnnnnddddd      Rd Vn[] (umov/smov    - to general)
+            elemsize = id->idOpSize();
+            index    = emitGetInsSC(id);
+            datasize = (elemsize == EA_8BYTE) ? EA_16BYTE : EA_8BYTE;
+            if (ins == INS_smov)
             {
-                case INS_bfm:
-                case INS_sbfm:
-                case INS_ubfm:
-                    emitDispImm(bmi.immR, true);
-                    emitDispImm(bmi.immS, false);
-                    break;
-
-                case INS_bfi:
-                case INS_sbfiz:
-                case INS_ubfiz:
-                    emitDispImm(getBitWidth(size) - bmi.immR, true);
-                    emitDispImm(bmi.immS + 1, false);
-                    break;
+                datasize = EA_16BYTE;
+            }
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(datasize);         // Q
+            code |= insEncodeVectorIndex(elemsize, index); // iiiii
+            code |= insEncodeReg_Rd(id->idReg1());         // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());         // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
 
-                case INS_bfxil:
-                case INS_sbfx:
-                case INS_ubfx:
-                    emitDispImm(bmi.immR, true);
-                    emitDispImm(bmi.immS - bmi.immR + 1, false);
-                    break;
+        case IF_DV_2C: // DV_2C   .Q.........iiiii ......nnnnnddddd      Vd Rn   (dup/ins - vector from general)
+            if (ins == INS_dup)
+            {
+                datasize = id->idOpSize();
+                elemsize = optGetElemsize(id->idInsOpt());
+                index    = 0;
+            }
+            else // INS_ins
+            {
+                datasize = EA_16BYTE;
+                elemsize = id->idOpSize();
+                index    = emitGetInsSC(id);
+            }
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(datasize);         // Q
+            code |= insEncodeVectorIndex(elemsize, index); // iiiii
+            code |= insEncodeReg_Vd(id->idReg1());         // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());         // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
 
-                case INS_asr:
-                case INS_lsr:
-                case INS_lsl:
-                    emitDispImm(imm, false);
-                    break;
+        case IF_DV_2D: // DV_2D   .Q.........iiiii ......nnnnnddddd      Vd Vn[]   (dup - vector)
+            index    = emitGetInsSC(id);
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize());   // Q
+            code |= insEncodeVectorIndex(elemsize, index); // iiiii
+            code |= insEncodeReg_Vd(id->idReg1());         // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());         // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
 
-                default:
-                    assert(!"Unexpected instruction in IF_DI_2D");
-            }
+        case IF_DV_2E: // DV_2E   ...........iiiii ......nnnnnddddd      Vd Vn[]   (dup - scalar)
+            index    = emitGetInsSC(id);
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorIndex(elemsize, index); // iiiii
+            code |= insEncodeReg_Vd(id->idReg1());         // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());         // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
 
+        case IF_DV_2F: // DV_2F   ...........iiiii .jjjj.nnnnnddddd      Vd[] Vn[] (ins - element)
+            elemsize = id->idOpSize();
+            imm      = emitGetInsSC(id);
+            index    = (imm >> 4) & 0xf;
+            index2   = imm & 0xf;
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorIndex(elemsize, index);   // iiiii
+            code |= insEncodeVectorIndex2(elemsize, index2); // jjjj
+            code |= insEncodeReg_Vd(id->idReg1());           // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());           // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DI_1F: // DI_1F   X..........iiiii cccc..nnnnn.nzcv      Rn imm5  nzcv cond
-            emitDispReg(id->idReg1(), size, true);
-            cfi.immCFVal = (unsigned)emitGetInsSC(id);
-            emitDispImm(cfi.imm5, true);
-            emitDispFlags(cfi.flags);
-            emitDispComma();
-            emitDispCond(cfi.cond);
+        case IF_DV_2G: // DV_2G   .........X...... ......nnnnnddddd      Vd Vn      (fmov, fcvtXX - register)
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeFloatElemsize(elemsize); // X
+            code |= insEncodeReg_Vd(id->idReg1());    // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());    // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_1D: // DR_1D   X............... cccc.......mmmmm      Rd       cond
-            emitDispReg(id->idReg1(), size, true);
-            cfi.immCFVal = (unsigned)emitGetInsSC(id);
-            emitDispCond(cfi.cond);
+        case IF_DV_2H: // DV_2H   X........X...... ......nnnnnddddd      Rd Vn      (fmov - to general)
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // X   X
+            code |= insEncodeReg_Rd(id->idReg1());            // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());            // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_2A: // DR_2A   X..........mmmmm ......nnnnn.....         Rn Rm
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, false);
+        case IF_DV_2I: // DV_2I   X........X...... ......nnnnnddddd      Vd Rn      (fmov - from general)
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // X   X
+            code |= insEncodeReg_Vd(id->idReg1());            // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());            // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_2B: // DR_2B   X.......sh.mmmmm ssssssnnnnn.....         Rn Rm {LSL,LSR,ASR,ROR} imm(0-63)
-            emitDispReg(id->idReg1(), size, true);
-            emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size);
+        case IF_DV_2J: // DV_2J   ........SS.....D D.....nnnnnddddd      Vd Vn      (fcvt)
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // SS DD
+            code |= insEncodeReg_Vd(id->idReg1());            // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());            // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_2C: // DR_2C   X..........mmmmm ooosssnnnnn.....         Rn Rm ext(Rm) LSL imm(0-4)
-            emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
-            imm = emitGetInsSC(id);
-            emitDispExtendReg(id->idReg2(), id->idInsOpt(), imm);
+        case IF_DV_2K: // DV_2K   .........X.mmmmm ......nnnnn.....      Vn Vm      (fcmp)
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeFloatElemsize(elemsize); // X
+            code |= insEncodeReg_Vn(id->idReg1());    // nnnnn
+            code |= insEncodeReg_Vm(id->idReg2());    // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_2D: // DR_2D   X..........nnnnn cccc..nnnnnddddd      Rd Rn    cond
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, true);
-            cfi.immCFVal = (unsigned)emitGetInsSC(id);
-            emitDispCond(cfi.cond);
+        case IF_DV_2L: // DV_2L   ........XX...... ......nnnnnddddd      Vd Vn      (abs, neg - scalar)
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeElemsize(elemsize);   // XX
+            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_2E: // DR_2E   X..........mmmmm ...........ddddd      Rd    Rm
-        case IF_DV_2U: // DV_2U   ................ ......nnnnnddddd      Sd    Sn
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, false);
+        case IF_DV_2M: // DV_2M   .Q......XX...... ......nnnnnddddd      Vd Vn      (abs, neg   - vector)
+        case IF_DV_2T: // DV_2T   .Q......XX...... ......nnnnnddddd      Sd Vn      (addv, saddlv, smaxv, sminv, uaddlv,
+                       // umaxv, uminv)
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeElemsize(elemsize);         // XX
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_2F: // DR_2F   X.......sh.mmmmm ssssss.....ddddd      Rd    Rm {LSL,LSR,ASR} imm(0-63)
-            emitDispReg(id->idReg1(), size, true);
-            emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size);
+        case IF_DV_2N: // DV_2N   .........iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - scalar)
+            imm      = emitGetInsSC(id);
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorShift(elemsize, emitInsIsVectorRightShift(ins), imm); // iiiiiii
+            code |= insEncodeReg_Vd(id->idReg1());                                       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());                                       // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_2G: // DR_2G   X............... ......nnnnnddddd      Rd Rn
-            emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
-            emitDispReg(encodingZRtoSP(id->idReg2()), size, false);
+        case IF_DV_2O: // DV_2O   .Q.......iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - vector)
+            imm      = emitGetInsSC(id);
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize());                                 // Q
+            code |= insEncodeVectorShift(elemsize, emitInsIsVectorRightShift(ins), imm); // iiiiiii
+            code |= insEncodeReg_Vd(id->idReg1());                                       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());                                       // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_2H: // DR_2H   X........X...... ......nnnnnddddd      Rd Rn
-            if ((ins == INS_uxtb) || (ins == INS_uxth))
-            {
-                // There is no 64-bit variant of uxtb and uxth
-                // However, we allow idOpSize() to have EA_8BYTE value for these instruction
-                emitDispReg(id->idReg1(), EA_4BYTE, true);
-                emitDispReg(id->idReg2(), EA_4BYTE, false);
-            }
-            else
-            {
-                emitDispReg(id->idReg1(), size, true);
-                // sxtb, sxth and sxtb always operate on 32-bit source register
-                emitDispReg(id->idReg2(), EA_4BYTE, false);
-            }
+        case IF_DV_2P: // DV_2P   ............... ......nnnnnddddd      Vd Vn      (aes*, sha1su1)
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_2I: // DR_2I   X..........mmmmm cccc..nnnnn.nzcv      Rn Rm    nzcv cond
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, true);
-            cfi.immCFVal = (unsigned)emitGetInsSC(id);
-            emitDispFlags(cfi.flags);
-            emitDispComma();
-            emitDispCond(cfi.cond);
+        case IF_DV_2Q: // DV_2Q   .........X...... ......nnnnnddddd      Vd Vn      (faddp, fmaxnmp, fmaxp, fminnmp,
+                       // fminp - scalar)
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeFloatElemsize(elemsize); // X
+            code |= insEncodeReg_Vd(id->idReg1());    // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());    // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_3A: // DR_3A   X..........mmmmm ......nnnnnmmmmm      Rd Rn Rm
-            if ((ins == INS_add) || (ins == INS_sub))
-            {
-                emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
-                emitDispReg(encodingZRtoSP(id->idReg2()), size, true);
-            }
-            else if ((ins == INS_smulh) || (ins == INS_umulh))
-            {
-                size = EA_8BYTE;
-                // smulh Xd, Xn, Xm
-                emitDispReg(id->idReg1(), size, true);
-                emitDispReg(id->idReg2(), size, true);
-            }
-            else if ((ins == INS_smull) || (ins == INS_umull) || (ins == INS_smnegl) || (ins == INS_umnegl))
-            {
-                // smull Xd, Wn, Wm
-                emitDispReg(id->idReg1(), EA_8BYTE, true);
-                size = EA_4BYTE;
-                emitDispReg(id->idReg2(), size, true);
-            }
-            else
-            {
-                emitDispReg(id->idReg1(), size, true);
-                emitDispReg(id->idReg2(), size, true);
-            }
-
-            if (id->idIsLclVar())
-            {
-                emitDispReg(codeGen->rsGetRsvdReg(), size, false);
-            }
-            else
-            {
-                emitDispReg(id->idReg3(), size, false);
-            }
+        case IF_DV_2S: // DV_2S   ........XX...... ......nnnnnddddd      Sd Vn      (addp - scalar)
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeElemsize(elemsize);   // XX
+            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
 
+        case IF_DV_2U: // DV_2U   ................ ......nnnnnddddd      Sd Sn   (sha1h)
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_3B: // DR_3B   X.......sh.mmmmm ssssssnnnnnddddd      Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, true);
-            emitDispShiftedReg(id->idReg3(), id->idInsOpt(), emitGetInsSC(id), size);
+        case IF_DV_3A: // DV_3A   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+            code     = emitInsCode(ins, fmt);
+            elemsize = optGetElemsize(id->idInsOpt());
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeElemsize(elemsize);         // XX
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_3C: // DR_3C   X..........mmmmm ooosssnnnnnddddd      Rd Rn Rm ext(Rm) LSL imm(0-4)
-            emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
-            emitDispReg(encodingZRtoSP(id->idReg2()), size, true);
-            imm = emitGetInsSC(id);
-            emitDispExtendReg(id->idReg3(), id->idInsOpt(), imm);
+        case IF_DV_3AI: // DV_3AI  .Q......XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector)
+            code     = emitInsCode(ins, fmt);
+            imm      = emitGetInsSC(id);
+            elemsize = optGetElemsize(id->idInsOpt());
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            code |= insEncodeVectorsize(id->idOpSize());    // Q
+            code |= insEncodeElemsize(elemsize);            // XX
+            code |= insEncodeVectorIndexLMH(elemsize, imm); // LM H
+            code |= insEncodeReg_Vd(id->idReg1());          // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());          // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());          // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_3D: // DR_3D   X..........mmmmm cccc..nnnnnmmmmm      Rd Rn Rm cond
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, true);
-            emitDispReg(id->idReg3(), size, true);
-            cfi.immCFVal = (unsigned)emitGetInsSC(id);
-            emitDispCond(cfi.cond);
+        case IF_DV_3B: // DV_3B   .Q.......X.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+            code     = emitInsCode(ins, fmt);
+            elemsize = optGetElemsize(id->idInsOpt());
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeFloatElemsize(elemsize);    // X
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_3E: // DR_3E   X........X.mmmmm ssssssnnnnnddddd      Rd Rn Rm imm(0-63)
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, true);
-            emitDispReg(id->idReg3(), size, true);
-            emitDispImm(emitGetInsSC(id), false);
+        case IF_DV_3BI: // DV_3BI  .Q.......XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by element)
+            code     = emitInsCode(ins, fmt);
+            imm      = emitGetInsSC(id);
+            elemsize = optGetElemsize(id->idInsOpt());
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeFloatElemsize(elemsize);    // X
+            code |= insEncodeFloatIndex(elemsize, imm);  // L H
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DR_4A: // DR_4A   X..........mmmmm .aaaaannnnnmmmmm      Rd Rn Rm Ra
-            if ((ins == INS_smaddl) || (ins == INS_smsubl) || (ins == INS_umaddl) || (ins == INS_umsubl))
-            {
-                // smaddl Xd, Wn, Wm, Xa
-                emitDispReg(id->idReg1(), EA_8BYTE, true);
-                emitDispReg(id->idReg2(), EA_4BYTE, true);
-                emitDispReg(id->idReg3(), EA_4BYTE, true);
-                emitDispReg(id->idReg4(), EA_8BYTE, false);
-            }
-            else
-            {
-                emitDispReg(id->idReg1(), size, true);
-                emitDispReg(id->idReg2(), size, true);
-                emitDispReg(id->idReg3(), size, true);
-                emitDispReg(id->idReg4(), size, false);
-            }
+        case IF_DV_3C: // DV_3C   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DV_1A: // DV_1A   .........X.iiiii iii........ddddd      Vd imm8 (fmov - immediate scalar)
-            elemsize = id->idOpSize();
-            emitDispReg(id->idReg1(), elemsize, true);
-            emitDispFloatImm(emitGetInsSC(id));
+        case IF_DV_3D: // DV_3D   .........X.mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeFloatElemsize(id->idOpSize()); // X
+            code |= insEncodeReg_Vd(id->idReg1());          // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());          // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());          // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DV_1B: // DV_1B   .QX..........iii cmod..iiiiiddddd      Vd imm8 (immediate vector)
-            imm      = emitGetInsSC(id) & 0x0ff;
-            immShift = (emitGetInsSC(id) & 0x700) >> 8;
-            hasShift = (immShift != 0);
-            elemsize = optGetElemsize(id->idInsOpt());
-            if (id->idInsOpt() == INS_OPTS_1D)
-            {
-                assert(elemsize == size);
-                emitDispReg(id->idReg1(), size, true);
-            }
-            else
-            {
-                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-            }
-            if (ins == INS_fmov)
-            {
-                emitDispFloatImm(imm);
-                assert(hasShift == false);
-            }
-            else
-            {
-                if (elemsize == EA_8BYTE)
-                {
-                    assert(ins == INS_movi);
-                    ssize_t       imm64 = 0;
-                    const ssize_t mask8 = 0xFF;
-                    for (unsigned b = 0; b < 8; b++)
-                    {
-                        if (imm & (ssize_t{1} << b))
-                        {
-                            imm64 |= (mask8 << (b * 8));
-                        }
-                    }
-                    emitDispImm(imm64, hasShift, true);
-                }
-                else
-                {
-                    emitDispImm(imm, hasShift, true);
-                }
-                if (hasShift)
-                {
-                    insOpts  opt   = (immShift & 0x4) ? INS_OPTS_MSL : INS_OPTS_LSL;
-                    unsigned shift = (immShift & 0x3) * 8;
-                    emitDispShiftOpts(opt);
-                    emitDispImm(shift, false);
-                }
-            }
+        case IF_DV_3DI: // DV_3DI  .........XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by element)
+            code     = emitInsCode(ins, fmt);
+            imm      = emitGetInsSC(id);
+            elemsize = id->idOpSize();
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            code |= insEncodeFloatElemsize(elemsize);   // X
+            code |= insEncodeFloatIndex(elemsize, imm); // L H
+            code |= insEncodeReg_Vd(id->idReg1());      // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());      // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());      // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DV_1C: // DV_1C   .........X...... ......nnnnn.....      Vn #0.0 (fcmp - with zero)
+        case IF_DV_3E: // DV_3E   ........XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
+            code     = emitInsCode(ins, fmt);
             elemsize = id->idOpSize();
-            emitDispReg(id->idReg1(), elemsize, true);
-            emitDispFloatZero();
+            code |= insEncodeElemsize(elemsize);   // XX
+            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DV_2A: // DV_2A   .Q.......X...... ......nnnnnddddd      Vd Vn   (fabs, fcvt - vector)
-            if (emitInsIsVectorLong(ins))
-            {
-                emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
-                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
-            }
-            else if (emitInsIsVectorNarrow(ins))
-            {
-                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-                emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), false);
-            }
-            else
-            {
-                assert(!emitInsIsVectorWide(ins));
-                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
-            }
-            if (ins == INS_fcmeq || ins == INS_fcmge || ins == INS_fcmgt || ins == INS_fcmle || ins == INS_fcmlt)
-            {
-                emitDispComma();
-                emitDispFloatZero();
-            }
+        case IF_DV_3EI: // DV_3EI ........XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by element)
+            code     = emitInsCode(ins, fmt);
+            imm      = emitGetInsSC(id);
+            elemsize = id->idOpSize();
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            code |= insEncodeElemsize(elemsize);            // XX
+            code |= insEncodeVectorIndexLMH(elemsize, imm); // LM H
+            code |= insEncodeReg_Vd(id->idReg1());          // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());          // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());          // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DV_2P: // DV_2P   ................ ......nnnnnddddd      Vd Vn   (aes*, sha1su1)
-            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+        case IF_DV_3F: // DV_3F   ...........mmmmm ......nnnnnddddd      Vd Vn Vm   (vector) - source dest regs overlap
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DV_2M: // DV_2M   .Q......XX...... ......nnnnnddddd      Vd Vn   (abs, neg - vector)
-            if (emitInsIsVectorNarrow(ins))
-            {
-                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-                emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), false);
-            }
-            else
-            {
-                assert(!emitInsIsVectorLong(ins) && !emitInsIsVectorWide(ins));
-                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
-            }
-            if (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt)
-            {
-                emitDispComma();
-                emitDispImm(0, false);
-            }
+        case IF_DV_3G: // DV_3G   .Q.........mmmmm .iiii.nnnnnddddd      Vd Vn Vm imm (vector)
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
+            code |= ((code_t)imm << 11);                 // iiii
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DV_2N: // DV_2N   .........iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - scalar)
+        case IF_DV_4A: // DV_4A   .........X.mmmmm .aaaaannnnnddddd      Vd Va Vn Vm (scalar)
+            code     = emitInsCode(ins, fmt);
             elemsize = id->idOpSize();
-            if (emitInsIsVectorLong(ins))
-            {
-                emitDispReg(id->idReg1(), widenDatasize(elemsize), true);
-                emitDispReg(id->idReg2(), elemsize, true);
-            }
-            else if (emitInsIsVectorNarrow(ins))
-            {
-                emitDispReg(id->idReg1(), elemsize, true);
-                emitDispReg(id->idReg2(), widenDatasize(elemsize), true);
-            }
-            else
-            {
-                assert(!emitInsIsVectorWide(ins));
-                emitDispReg(id->idReg1(), elemsize, true);
-                emitDispReg(id->idReg2(), elemsize, true);
-            }
-            imm = emitGetInsSC(id);
-            emitDispImm(imm, false);
+            code |= insEncodeFloatElemsize(elemsize); // X
+            code |= insEncodeReg_Vd(id->idReg1());    // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());    // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());    // mmmmm
+            code |= insEncodeReg_Va(id->idReg4());    // aaaaa
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DV_2O: // DV_2O   .Q.......iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - vector)
-            if ((ins == INS_sxtl) || (ins == INS_sxtl2) || (ins == INS_uxtl) || (ins == INS_uxtl2))
-            {
-                assert((emitInsIsVectorLong(ins)));
-                emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
-                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
-            }
-            else
+        case IF_SN_0A: // SN_0A   ................ ................
+        {
+            bool skipIns = false;
+#if FEATURE_LOOP_ALIGN
+            if (id->idIns() == INS_align)
             {
-                if (emitInsIsVectorLong(ins))
+                // IG can be marked as not needing alignment after emitting align instruction.
+                // Alternatively, there are fewer align instructions needed than emitted.
+                // If that is the case, skip outputting alignment.
+                if (!ig->endsWithAlignInstr() || id->idIsEmptyAlign())
                 {
-                    emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
-                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+                    skipIns = true;
                 }
-                else if (emitInsIsVectorNarrow(ins))
+
+#ifdef DEBUG
+                if (!ig->endsWithAlignInstr())
                 {
-                    emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-                    emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                    // Validate if the state is correctly updated
+                    assert(id->idIsEmptyAlign());
                 }
-                else
+#endif
+                sz  = sizeof(instrDescAlign);
+                ins = INS_nop;
+
+#ifdef DEBUG
+                // Under STRESS_EMITTER, if this is the 'align' before the 'jmp' instruction,
+                // then add "bkpt" instruction.
+                instrDescAlign* alignInstr = (instrDescAlign*)id;
+
+                if (emitComp->compStressCompile(Compiler::STRESS_EMITTER, 50) && alignInstr->isPlacedAfterJmp &&
+                    !skipIns)
                 {
-                    assert(!emitInsIsVectorWide(ins));
-                    emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+                    // There is no good way to squeeze in "bkpt" as well as display it
+                    // in the disassembly because there is no corresponding instrDesc for
+                    // it. As such, leave it as is, the "0xD43E0000" bytecode will be seen
+                    // next to the nop instruction in disasm.
+                    // e.g. D43E0000          align   [4 bytes for IG07]
+                    ins = INS_BREAKPOINT;
+                    fmt = IF_SI_0A;
                 }
-
-                imm = emitGetInsSC(id);
-                emitDispImm(imm, false);
+#endif
             }
-            break;
+#endif // FEATURE_LOOP_ALIGN
 
-        case IF_DV_2B: // DV_2B   .Q.........iiiii ......nnnnnddddd      Rd Vn[] (umov/smov    - to general)
-            srcsize = id->idOpSize();
-            index   = emitGetInsSC(id);
-            if (ins == INS_smov)
-            {
-                dstsize = EA_8BYTE;
-            }
-            else // INS_umov or INS_mov
+            if (!skipIns)
             {
-                dstsize = (srcsize == EA_8BYTE) ? EA_8BYTE : EA_4BYTE;
+                code = emitInsCode(ins, fmt);
+                dst += emitOutput_Instr(dst, code);
             }
-            emitDispReg(id->idReg1(), dstsize, true);
-            emitDispVectorRegIndex(id->idReg2(), srcsize, index, false);
+
             break;
+        }
 
-        case IF_DV_2C: // DV_2C   .Q.........iiiii ......nnnnnddddd      Vd Rn   (dup/ins - vector from general)
-            if (ins == INS_dup)
-            {
-                datasize = id->idOpSize();
-                assert(isValidVectorDatasize(datasize));
-                assert(isValidArrangement(datasize, id->idInsOpt()));
-                elemsize = optGetElemsize(id->idInsOpt());
-                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-            }
-            else // INS_ins
-            {
-                elemsize = id->idOpSize();
-                index    = emitGetInsSC(id);
-                assert(isValidVectorElemsize(elemsize));
-                emitDispVectorRegIndex(id->idReg1(), elemsize, index, true);
-            }
-            emitDispReg(id->idReg2(), (elemsize == EA_8BYTE) ? EA_8BYTE : EA_4BYTE, false);
+        case IF_SI_0A: // SI_0A   ...........iiiii iiiiiiiiiii.....               imm16
+            imm = emitGetInsSC(id);
+            assert(isValidUimm<16>(imm));
+            code = emitInsCode(ins, fmt);
+            code |= ((code_t)imm << 5); // iiiii iiiiiiiiiii
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DV_2D: // DV_2D   .Q.........iiiii ......nnnnnddddd      Vd Vn[]   (dup - vector)
-            datasize = id->idOpSize();
-            assert(isValidVectorDatasize(datasize));
-            assert(isValidArrangement(datasize, id->idInsOpt()));
-            elemsize = optGetElemsize(id->idInsOpt());
-            index    = emitGetInsSC(id);
-            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispVectorRegIndex(id->idReg2(), elemsize, index, false);
+        case IF_SI_0B: // SI_0B   ................ ....bbbb........               imm4 - barrier
+            imm = emitGetInsSC(id);
+            assert((imm >= 0) && (imm <= 15));
+            code = emitInsCode(ins, fmt);
+            code |= ((code_t)imm << 8); // bbbb
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DV_2E: // DV_2E   ...........iiiii ......nnnnnddddd      Vd Vn[]   (dup - scalar)
-            elemsize = id->idOpSize();
-            index    = emitGetInsSC(id);
-            emitDispReg(id->idReg1(), elemsize, true);
-            emitDispVectorRegIndex(id->idReg2(), elemsize, index, false);
+        case IF_SR_1A: // SR_1A   ................ ...........ttttt      Rt       (dc zva, mrs)
+            assert(insOptsNone(id->idInsOpt()));
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeReg_Rt(id->idReg1()); // ttttt
+            dst += emitOutput_Instr(dst, code);
             break;
 
-        case IF_DV_2F: // DV_2F   ...........iiiii .jjjj.nnnnnddddd      Vd[] Vn[] (ins - element)
-            imm      = emitGetInsSC(id);
-            index    = (imm >> 4) & 0xf;
-            index2   = imm & 0xf;
-            elemsize = id->idOpSize();
-            emitDispVectorRegIndex(id->idReg1(), elemsize, index, true);
-            emitDispVectorRegIndex(id->idReg2(), elemsize, index2, false);
+        default:
+            dst = emitOutput_InstrSve(dst, id);
             break;
+    }
 
-        case IF_DV_2G: // DV_2G   .........X...... ......nnnnnddddd      Vd Vn      (fmov, fcvtXX - register)
-        case IF_DV_2K: // DV_2K   .........X.mmmmm ......nnnnn.....      Vn Vm      (fcmp)
-        case IF_DV_2L: // DV_2L   ........XX...... ......nnnnnddddd      Vd Vn      (abs, neg - scalar)
-            size = id->idOpSize();
-            if ((ins == INS_fcmeq) || (ins == INS_fcmge) || (ins == INS_fcmgt) || (ins == INS_fcmle) ||
-                (ins == INS_fcmlt))
+    // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref.
+    // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a
+    // GC ref to register "id->idReg1()".  (It may, apparently, also not be GC_NONE in other cases, such as
+    // for stores, but we ignore those cases here.)
+    if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref.
+    {
+        // We assume that "idReg1" is the primary destination register for all instructions
+        assert(!emitInsDestIsOp2(ins));
+        if (id->idGCref() != GCT_NONE)
+        {
+            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+        }
+        else
+        {
+            emitGCregDeadUpd(id->idReg1(), dst);
+        }
+
+        if (emitInsMayWriteMultipleRegs(id))
+        {
+            // INS_ldp etc...
+            // "idReg2" is the secondary destination register
+            if (id->idGCrefReg2() != GCT_NONE)
+            {
+                emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst);
+            }
+            else
             {
-                emitDispReg(id->idReg1(), size, true);
-                emitDispReg(id->idReg2(), size, true);
-                emitDispFloatZero();
+                emitGCregDeadUpd(id->idReg2(), dst);
             }
-            else if (emitInsIsVectorNarrow(ins))
+        }
+    }
+
+SKIP_GC_UPDATE:
+    // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
+    // ref or overwritten one.
+    if (emitInsWritesToLclVarStackLoc(id) || emitInsWritesToLclVarStackLocPair(id))
+    {
+        int      varNum = id->idAddr()->iiaLclVar.lvaVarNum();
+        unsigned ofs    = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
+        bool     FPbased;
+        int      adr = emitComp->lvaFrameAddress(varNum, &FPbased);
+        if (id->idGCref() != GCT_NONE)
+        {
+            emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst DEBUG_ARG(varNum));
+        }
+        else
+        {
+            // If the type of the local is a gc ref type, update the liveness.
+            var_types vt;
+            if (varNum >= 0)
             {
-                emitDispReg(id->idReg1(), size, true);
-                emitDispReg(id->idReg2(), widenDatasize(size), false);
+                // "Regular" (non-spill-temp) local.
+                vt = var_types(emitComp->lvaTable[varNum].lvType);
             }
             else
             {
-                emitDispReg(id->idReg1(), size, true);
-                emitDispReg(id->idReg2(), size, false);
+                TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum);
+                vt              = tmpDsc->tdTempType();
             }
-            if (fmt == IF_DV_2L &&
-                (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt))
+            if (vt == TYP_REF || vt == TYP_BYREF)
             {
-                emitDispComma();
-                emitDispImm(0, false);
+                emitGCvarDeadUpd(adr + ofs, dst DEBUG_ARG(varNum));
             }
-            break;
+        }
+        if (emitInsWritesToLclVarStackLocPair(id))
+        {
+            int      varNum2 = varNum;
+            int      adr2    = adr;
+            unsigned ofs2    = ofs;
+            unsigned ofs2Dist;
 
-        case IF_DV_2H: // DV_2H   X........X...... ......nnnnnddddd      Rd Vn      (fmov, fcvtXX - to general)
-        case IF_DV_2I: // DV_2I   X........X...... ......nnnnnddddd      Vd Rn      (fmov, Xcvtf - from general)
-        case IF_DV_2J: // DV_2J   ........SS.....D D.....nnnnnddddd      Vd Vn      (fcvt)
-            dstsize = optGetDstsize(id->idInsOpt());
-            srcsize = optGetSrcsize(id->idInsOpt());
+            if (id->idIsLclVarPair())
+            {
+                bool FPbased2;
 
-            emitDispReg(id->idReg1(), dstsize, true);
-            emitDispReg(id->idReg2(), srcsize, false);
-            break;
+                emitLclVarAddr* lclVarAddr2 = emitGetLclVarPairLclVar2(id);
+                varNum2                     = lclVarAddr2->lvaVarNum();
+                ofs2                        = lclVarAddr2->lvaOffset();
 
-        case IF_DV_2Q: // DV_2Q   .........X...... ......nnnnnddddd      Sd Vn      (faddp, fmaxnmp, fmaxp, fminnmp,
-                       // fminp - scalar)
-        case IF_DV_2R: // DV_2R   .Q.......X...... ......nnnnnddddd      Sd Vn      (fmaxnmv, fmaxv, fminnmv, fminv)
-        case IF_DV_2S: // DV_2S   ........XX...... ......nnnnnddddd      Sd Vn      (addp - scalar)
-        case IF_DV_2T: // DV_2T   .Q......XX...... ......nnnnnddddd      Sd Vn      (addv, saddlv, smaxv, sminv, uaddlv,
-                       // umaxv, uminv)
-            if ((ins == INS_sadalp) || (ins == INS_saddlp) || (ins == INS_uadalp) || (ins == INS_uaddlp))
-            {
-                emitDispVectorReg(id->idReg1(), optWidenDstArrangement(id->idInsOpt()), true);
-                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
-            }
-            else
-            {
-                if ((ins == INS_saddlv) || (ins == INS_uaddlv))
+                // If there are 2 GC vars in this instrDesc, get the 2nd variable
+                // that should be tracked.
+                adr2     = emitComp->lvaFrameAddress(varNum2, &FPbased2);
+                ofs2Dist = EA_SIZE_IN_BYTES(size);
+#ifdef DEBUG
+                assert(FPbased == FPbased2);
+                if (FPbased)
                 {
-                    elemsize = optGetElemsize(optWidenDstArrangement(id->idInsOpt()));
+                    assert(id->idReg3() == REG_FP);
                 }
                 else
                 {
-                    elemsize = optGetElemsize(id->idInsOpt());
+                    assert(id->idReg3() == REG_SP);
                 }
-                emitDispReg(id->idReg1(), elemsize, true);
-                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
-            }
-            break;
-
-        case IF_DV_3A: // DV_3A   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
-            if ((ins == INS_sdot) || (ins == INS_udot))
-            {
-                // sdot/udot Vd.2s, Vn.8b, Vm.8b
-                // sdot/udot Vd.4s, Vn.16b, Vm.16b
-                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-                size = id->idOpSize();
-                emitDispVectorReg(id->idReg2(), (size == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B, true);
-                emitDispVectorReg(id->idReg3(), (size == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B, false);
+                assert(varNum2 != -1);
+#endif // DEBUG
             }
-            else if (((ins == INS_pmull) && (id->idInsOpt() == INS_OPTS_1D)) ||
-                     ((ins == INS_pmull2) && (id->idInsOpt() == INS_OPTS_2D)))
+            else
             {
-                // pmull Vd.1q, Vn.1d, Vm.1d
-                // pmull2 Vd.1q, Vn.2d, Vm.2d
-                printf("%s.1q, ", emitVectorRegName(id->idReg1()));
-                emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
-                emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
+                ofs2Dist = TARGET_POINTER_SIZE;
+                ofs2 += ofs2Dist;
             }
-            else if (emitInsIsVectorNarrow(ins))
+
+            ofs2 = AlignDown(ofs2, ofs2Dist);
+
+            if (id->idGCrefReg2() != GCT_NONE)
             {
-                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-                emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true);
-                emitDispVectorReg(id->idReg3(), optWidenElemsizeArrangement(id->idInsOpt()), false);
+#ifdef DEBUG
+                if (id->idGCref() != GCT_NONE)
+                {
+                    // If 1st register was a gc-var, then make sure the offset
+                    // are correctly set for the 2nd register that is holding
+                    // another gc-var.
+                    assert((adr + ofs + ofs2Dist) == (adr2 + ofs2));
+                }
+#endif
+                emitGCvarLiveUpd(adr2 + ofs2, varNum2, id->idGCrefReg2(), dst DEBUG_ARG(varNum2));
             }
             else
             {
-                if (emitInsIsVectorLong(ins))
+                // If the type of the local is a gc ref type, update the liveness.
+                var_types vt;
+                if (varNum2 >= 0)
                 {
-                    emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
-                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+                    // "Regular" (non-spill-temp) local.
+                    vt = var_types(emitComp->lvaTable[varNum2].lvType);
                 }
-                else if (emitInsIsVectorWide(ins))
+                else
                 {
-                    emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
-                    emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                    TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum2);
+                    vt              = tmpDsc->tdTempType();
                 }
-                else
+                if (vt == TYP_REF || vt == TYP_BYREF)
                 {
-                    emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+                    emitGCvarDeadUpd(adr2 + ofs2, dst DEBUG_ARG(varNum2));
                 }
+            }
+        }
+    }
+
+#ifdef DEBUG
+    /* Make sure we set the instruction descriptor size correctly */
+
+    size_t expected = emitSizeOfInsDsc(id);
+    assert(sz == expected);
+
+    if (emitComp->opts.disAsm || emitComp->verbose)
+    {
+        emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
+    }
+
+    if (emitComp->compDebugBreak)
+    {
+        // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
+        // emitting instruction a6, (i.e. IN00a6 in jitdump).
+        if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
+        {
+            assert(!"JitBreakEmitOutputInstr reached");
+        }
+    }
+
+    // Output any delta in GC info.
+    if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC)
+    {
+        emitDispGCInfoDelta();
+    }
+#else
+    if (emitComp->opts.disAsm)
+    {
+        size_t expected = emitSizeOfInsDsc(id);
+        assert(sz == expected);
+        emitDispIns(id, false, 0, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
+    }
+#endif
+
+    /* All instructions are expected to generate code */
+
+    assert(*dp != dst || id->idIsEmptyAlign());
+
+    *dp = dst;
+
+    return sz;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ *  Display a comma
+ */
+void emitter::emitDispComma()
+{
+    printf(", ");
+}
+
+/*****************************************************************************
+ *
+ *  Display the instruction name
+ */
+void emitter::emitDispInst(instruction ins)
+{
+    const char* insstr = codeGen->genInsName(ins);
+    size_t      len    = strlen(insstr);
+
+    /* Display the instruction name */
+
+    printf("%s", insstr);
+
+    //
+    // Add at least one space after the instruction name
+    // and add spaces until we have reach the normal size of 8
+    do
+    {
+        printf(" ");
+        len++;
+    } while (len < 8);
+}
+
+/*****************************************************************************
+ *
+ *  Display an immediate value
+ */
+void emitter::emitDispImm(ssize_t imm, bool addComma, bool alwaysHex /* =false */, bool isAddrOffset /* =false */)
+{
+    if (isAddrOffset)
+    {
+        alwaysHex = true;
+    }
+    else if (imm == 0)
+    {
+        // Non-offset values of zero are never displayed as hex.
+        alwaysHex = false;
+    }
+
+    if (strictArmAsm)
+    {
+        printf("#");
+    }
 
-                emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
-            }
-            break;
+    // Munge any pointers if we want diff-able disassembly.
+    // Since some may be emitted as partial words, print as diffable anything that has
+    // significant bits beyond the lowest 8-bits.
+    if (emitComp->opts.disDiffable)
+    {
+        ssize_t top56bits = (imm >> 8);
+        if ((top56bits != 0) && (top56bits != -1))
+            imm = 0xD1FFAB1E;
+    }
 
-        case IF_DV_3AI: // DV_3AI  .Q......XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by element)
-            if ((ins == INS_sdot) || (ins == INS_udot))
+    if (!alwaysHex && (imm > -1000) && (imm < 1000))
+    {
+        printf("%d", (int)imm);
+    }
+    else
+    {
+        if ((imm < 0) && ((imm & 0xFFFFFFFF00000000LL) == 0xFFFFFFFF00000000LL))
+        {
+            printf("-");
+            imm = -imm;
+        }
+
+        if ((imm & 0xFFFFFFFF00000000LL) != 0)
+        {
+            if (isAddrOffset)
             {
-                // sdot/udot Vd.2s, Vn.8b, Vm.4b[index]
-                // sdot/udot Vd.4s, Vn.16b, Vm.4b[index]
-                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-                size = id->idOpSize();
-                emitDispVectorReg(id->idReg2(), (size == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B, true);
-                index = emitGetInsSC(id);
-                printf("%s.4b[%d]", emitVectorRegName(id->idReg3()), (int)index);
+                printf("0x%llX", imm);
             }
             else
             {
-                if (emitInsIsVectorLong(ins))
-                {
-                    emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
-                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
-                }
-                else if (emitInsIsVectorWide(ins))
-                {
-                    emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
-                    emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true);
-                }
-                else
-                {
-                    assert(!emitInsIsVectorNarrow(ins));
-                    emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
-                }
-
-                elemsize = optGetElemsize(id->idInsOpt());
-                index    = emitGetInsSC(id);
-                emitDispVectorRegIndex(id->idReg3(), elemsize, index, false);
+                printf("0x%llx", imm);
             }
-            break;
+        }
+        else
+        {
+            printf("0x%02X", (unsigned)imm);
+        }
+    }
 
-        case IF_DV_3B: // DV_3B   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm  (vector)
-            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
-            emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
-            break;
+    if (addComma)
+        emitDispComma();
+}
 
-        case IF_DV_3C: // DV_3C   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm  (vector)
-            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-            switch (ins)
-            {
-                case INS_tbl:
-                case INS_tbl_2regs:
-                case INS_tbl_3regs:
-                case INS_tbl_4regs:
-                case INS_tbx:
-                case INS_tbx_2regs:
-                case INS_tbx_3regs:
-                case INS_tbx_4regs:
-                    registerListSize = insGetRegisterListSize(ins);
-                    emitDispVectorRegList(id->idReg2(), registerListSize, INS_OPTS_16B, true);
-                    break;
-                case INS_mov:
-                    break;
-                default:
-                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
-                    break;
-            }
-            emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
-            break;
+/*****************************************************************************
+ *
+ *  Display an immediate value as an index operation
+ */
+void emitter::emitDispElementIndex(const ssize_t imm, const bool addComma)
+{
+    printf("[%d]", imm);
 
-        case IF_DV_3BI: // DV_3BI  .Q........Lmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by element)
-            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
-            elemsize = optGetElemsize(id->idInsOpt());
-            emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false);
-            break;
+    if (addComma)
+    {
+        emitDispComma();
+    }
+}
 
-        case IF_DV_3D: // DV_3D   .........X.mmmmm ......nnnnnddddd      Vd Vn Vm  (scalar)
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, true);
-            emitDispReg(id->idReg3(), size, false);
-            break;
+/*****************************************************************************
+ *
+ *  Display a float zero constant
+ */
+void emitter::emitDispFloatZero()
+{
+    if (strictArmAsm)
+    {
+        printf("#");
+    }
+    printf("0.0");
+}
 
-        case IF_DV_3E: // DV_3E   ........XX.mmmmm ......nnnnnddddd      Vd Vn Vm  (scalar)
-            if (emitInsIsVectorLong(ins))
-            {
-                emitDispReg(id->idReg1(), widenDatasize(size), true);
-            }
-            else
-            {
-                assert(!emitInsIsVectorNarrow(ins) && !emitInsIsVectorWide(ins));
-                emitDispReg(id->idReg1(), size, true);
-            }
+/*****************************************************************************
+ *
+ *  Display an encoded float constant value
+ */
+void emitter::emitDispFloatImm(ssize_t imm8)
+{
+    assert((0 <= imm8) && (imm8 <= 0x0ff));
+    if (strictArmAsm)
+    {
+        printf("#");
+    }
 
-            emitDispReg(id->idReg2(), size, true);
-            emitDispReg(id->idReg3(), size, false);
-            break;
+    floatImm8 fpImm;
+    fpImm.immFPIVal = (unsigned)imm8;
+    double result   = emitDecodeFloatImm8(fpImm);
 
-        case IF_DV_3EI: // DV_3EI ........XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by element)
-            if (emitInsIsVectorLong(ins))
-            {
-                emitDispReg(id->idReg1(), widenDatasize(size), true);
-            }
-            else
-            {
-                assert(!emitInsIsVectorNarrow(ins) && !emitInsIsVectorWide(ins));
-                emitDispReg(id->idReg1(), size, true);
-            }
-            emitDispReg(id->idReg2(), size, true);
-            elemsize = id->idOpSize();
-            index    = emitGetInsSC(id);
-            emitDispVectorRegIndex(id->idReg3(), elemsize, index, false);
-            break;
+    printf("%.4f", result);
+}
 
-        case IF_DV_3F: // DV_3F   ..........mmmmm ......nnnnnddddd       Vd Vn Vm (vector)
-            if ((ins == INS_sha1c) || (ins == INS_sha1m) || (ins == INS_sha1p))
-            {
-                // Qd, Sn, Vm (vector)
-                emitDispReg(id->idReg1(), size, true);
-                emitDispReg(id->idReg2(), EA_4BYTE, true);
-                emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
-            }
-            else if ((ins == INS_sha256h) || (ins == INS_sha256h2))
-            {
-                // Qd Qn Vm (vector)
-                emitDispReg(id->idReg1(), size, true);
-                emitDispReg(id->idReg2(), size, true);
-                emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
-            }
-            else // INS_sha1su0, INS_sha256su1
-            {
-                // Vd, Vn, Vm   (vector)
-                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-                emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
-                emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
-            }
-            break;
+/*****************************************************************************
+ *
+ *  Display an encoded small float constant value
+ */
+void emitter::emitDispSmallFloatImm(ssize_t imm, instruction ins)
+{
+    if (strictArmAsm)
+    {
+        printf("#");
+    }
+    printf("%.4f", emitDecodeSmallFloatImm(imm, ins));
+}
 
-        case IF_DV_3DI: // DV_3DI  .........XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by element)
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, true);
-            elemsize = size;
-            emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false);
-            break;
+/*****************************************************************************
+ *
+ *  Display an immediate with an optional left-shift.
+ */
+void emitter::emitDispImmOptsLSL(ssize_t imm, bool hasShift, unsigned shiftAmount)
+{
+    if (!strictArmAsm && hasShift)
+    {
+        imm <<= shiftAmount;
+    }
+    emitDispImm(imm, false);
+    if (strictArmAsm && hasShift)
+    {
+        printf(", LSL #%u", shiftAmount);
+    }
+}
 
-        case IF_DV_3G: // DV_3G   .Q.........mmmmm .iiii.nnnnnddddd      Vd Vn Vm imm (vector)
-            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
-            emitDispVectorReg(id->idReg3(), id->idInsOpt(), true);
-            emitDispImm(emitGetInsSC(id), false);
-            break;
+/*****************************************************************************
+ *
+ *  Display an ARM64 condition code for the conditional instructions
+ */
+void emitter::emitDispCond(insCond cond)
+{
+    const static char* armCond[16] = {"eq", "ne", "hs", "lo", "mi", "pl", "vs", "vc",
+                                      "hi", "ls", "ge", "lt", "gt", "le", "AL", "NV"}; // The last two are invalid
+    unsigned           imm         = (unsigned)cond;
+    assert((0 <= imm) && (imm < ArrLen(armCond)));
+    printf(armCond[imm]);
+}
+
+/*****************************************************************************
+ *
+ *  Display an ARM64 flags for the conditional instructions
+ */
+void emitter::emitDispFlags(insCflags flags)
+{
+    const static char* armFlags[16] = {"0", "v",  "c",  "cv",  "z",  "zv",  "zc",  "zcv",
+                                       "n", "nv", "nc", "ncv", "nz", "nzv", "nzc", "nzcv"};
+    unsigned           imm          = (unsigned)flags;
+    assert((0 <= imm) && (imm < ArrLen(armFlags)));
+    printf(armFlags[imm]);
+}
 
-        case IF_DV_4A: // DV_4A   .........X.mmmmm .aaaaannnnnddddd      Vd Va Vn Vm (scalar)
-            emitDispReg(id->idReg1(), size, true);
-            emitDispReg(id->idReg2(), size, true);
-            emitDispReg(id->idReg3(), size, true);
-            emitDispReg(id->idReg4(), size, false);
-            break;
+/*****************************************************************************
+ *
+ *  Display an ARM64 'barrier' for the memory barrier instructions
+ */
+void emitter::emitDispBarrier(insBarrier barrier)
+{
+    const static char* armBarriers[16] = {"#0", "oshld", "oshst", "osh", "#4",  "nshld", "nshst", "nsh",
+                                          "#8", "ishld", "ishst", "ish", "#12", "ld",    "st",    "sy"};
+    unsigned           imm             = (unsigned)barrier;
+    assert((0 <= imm) && (imm < ArrLen(armBarriers)));
+    printf(armBarriers[imm]);
+}
 
-        case IF_SN_0A: // SN_0A   ................ ................
-            if (ins == INS_align)
-            {
-                instrDescAlign* alignInstrId = (instrDescAlign*)id;
-                printf("[%d bytes", id->idIsEmptyAlign() ? 0 : INSTR_ENCODED_SIZE);
+/*****************************************************************************
+ *
+ *  Prints the encoding for the Shift Type encoding
+ */
 
-                // targetIG is only set for 1st of the series of align instruction
-                if ((alignInstrId->idaLoopHeadPredIG != nullptr) && (alignInstrId->loopHeadIG() != nullptr))
-                {
-                    printf(" for IG%02u", alignInstrId->loopHeadIG()->igNum);
-                }
-                printf("]");
-            }
-            break;
+void emitter::emitDispShiftOpts(insOpts opt)
+{
+    if (opt == INS_OPTS_LSL)
+        printf(" LSL ");
+    else if (opt == INS_OPTS_LSR)
+        printf(" LSR ");
+    else if (opt == INS_OPTS_ASR)
+        printf(" ASR ");
+    else if (opt == INS_OPTS_ROR)
+        printf(" ROR ");
+    else if (opt == INS_OPTS_MSL)
+        printf(" MSL ");
+    else
+        assert(!"Bad value");
+}
 
-        case IF_SI_0A: // SI_0A   ...........iiiii iiiiiiiiiii.....               imm16
-            emitDispImm(emitGetInsSC(id), false);
-            break;
+/*****************************************************************************
+ *
+ *  Prints the encoding for the Extend Type encoding
+ */
 
-        case IF_SI_0B: // SI_0B   ................ ....bbbb........               imm4 - barrier
-            emitDispBarrier((insBarrier)emitGetInsSC(id));
-            break;
+void emitter::emitDispExtendOpts(insOpts opt)
+{
+    if (opt == INS_OPTS_UXTB)
+        printf("UXTB");
+    else if (opt == INS_OPTS_UXTH)
+        printf("UXTH");
+    else if (opt == INS_OPTS_UXTW)
+        printf("UXTW");
+    else if (opt == INS_OPTS_UXTX)
+        printf("UXTX");
+    else if (opt == INS_OPTS_SXTB)
+        printf("SXTB");
+    else if (opt == INS_OPTS_SXTH)
+        printf("SXTH");
+    else if (opt == INS_OPTS_SXTW)
+        printf("SXTW");
+    else if (opt == INS_OPTS_SXTX)
+        printf("SXTX");
+    else
+        assert(!"Bad value");
+}
 
-        case IF_SR_1A: // SR_1A   ................ ...........ttttt      Rt       (dc zva, mrs)
-            if (ins == INS_mrs_tpid0)
-            {
-                emitDispReg(id->idReg1(), size, true);
-                printf("tpidr_el0");
-            }
-            else
-            {
-                emitDispReg(id->idReg1(), size, false);
-            }
-            break;
+//------------------------------------------------------------------------
+// emitDispReg: Display a general-purpose register name or SIMD and floating-point scalar register name
+//
+void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma)
+{
+    emitAttr size = EA_SIZE(attr);
+    printf(emitRegName(reg, size));
 
-        //  <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
-        case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated)
-        case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated)
-        case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated)
-        case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated)
-        case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated)
-        case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated)
-        case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated)
-        case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic
-        case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract
-        case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left
-                           // (predicated)
-        case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations
-        case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
-            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                   // mmmmm
-            break;
-
-        // <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T>
-        case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated)
-        {
-            PredicateType ptype = (id->idPredicateReg2Merge()) ? PREDICATE_MERGE : PREDICATE_ZERO;
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                 // nnnnn
-            emitDispLowPredicateReg(id->idReg2(), ptype, id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                // ddddd
-            break;
-        }
+    if (addComma)
+        emitDispComma();
+}
 
-        // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
-        case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
-            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
-            emitDispImm(emitGetInsSC(id), false);                                                  // iiii
-            break;
-
-        // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D
-        case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
-            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
-            emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false);                              // mmmmm
-            break;
-
-        // <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
-        // <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
-        case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend
-                           // (predicated)
-        case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand
-                           // (predicated)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
-            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);
-            emitDispSveReg(id->idReg4(), id->idInsOpt(), false);
-            break;
-
-        // <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
-        case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated)
-        case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
-        case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high
-                           // (unpredicated)
-        case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
-            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);  // nnnnn
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm
-            break;
-
-        // <Zd>.<T>, <R><n>, <R><m>
-        case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register
-                           // increment)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
-            emitDispReg(id->idReg2(), size, true);              // nnnnn
-            emitDispReg(id->idReg3(), size, false);             // mmmmm
-            break;
-
-        case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count
-            imm = emitGetInsSC(id);
-            emitDispReg(id->idReg1(), size, true);             // ddddd
-            emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp
-            if (imm > 1)
-            {
-                printf("mul ");
-                emitDispImm(emitGetInsSC(id), false, false); // iiii
-            }
-            break;
-
-        // <Zd>.<T>, <Zn>.<T>, <Zm>.D
-        case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);       // ddddd
-            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);       // nnnnn
-            emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm
-            break;
-
-        // <Pd>.H, <Pn>.B
-        case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_H, true);  // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_B, false); // NNNN
-            break;
+//------------------------------------------------------------------------
+// emitDispVectorReg: Display a SIMD vector register name with an arrangement suffix
+//
+void emitter::emitDispVectorReg(regNumber reg, insOpts opt, bool addComma)
+{
+    assert(isVectorRegister(reg));
+    printf(emitVectorRegName(reg));
+    emitDispArrangement(opt);
 
-        // <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T>
-        case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
-            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                   // mmmmm
-            break;
+    if (addComma)
+        emitDispComma();
+}
 
-        // <V><dn>, <Pg>, <V><dn>, <Zm>.<T>
-        // <R><dn>, <Pg>, <R><dn>, <Zm>.<T>
-        case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar
-        case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register
-        case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated)
-            emitDispReg(id->idReg1(), size, true);                                                 // ddddd
-            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispReg(id->idReg1(), size, true);                                                 // ddddd
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                   // mmmmm
-            break;
-
-        // <V><d>, <Pg>, <Zn>.<T>
-        // <R><d>, <Pg>, <Zn>.<T>
-        case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated)
-        case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated)
-        case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register
-        case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register
-        case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction
-            emitDispReg(id->idReg1(), size, true);                                              // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                // mmmmm
-            break;
+//------------------------------------------------------------------------
+// emitDispVectorRegIndex: Display a SIMD vector register name with element index
+//
+void emitter::emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma)
+{
+    assert(isVectorRegister(reg));
+    printf(emitVectorRegName(reg));
+    emitDispElemsize(elemsize);
+    printf("[%d]", (int)index);
 
-        // <Vd>.<T>, <Pg>, <Zn>.<Tb>
-        case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords)
-        case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords)
-        case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords)
-        case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords)
-            emitDispVectorReg(id->idReg1(), optSveToQuadwordElemsizeArrangement(id->idInsOpt()), true); // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);         // ggg
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                        // mmmmm
-            break;
+    if (addComma)
+        emitDispComma();
+}
 
-        // <Dd>, <Pg>, <Zn>.<T>
-        case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated)
-            emitDispReg(id->idReg1(), EA_8BYTE, true);                                          // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                // mmmmm
-            break;
-
-        // <Zd>.<T>, <Pg>/M, <Zn>.<T>
-        case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated)
-        case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated)
-        case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements
-        case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated)
-        case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value
-        case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                // mmmmm
-            break;
-
-        // <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
-        case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // NNNN
-            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // MMMM
-            break;
-
-        // <Zd>.<T>, <Pg>, <Zn>.<T>
-        case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                // mmmmm
-            break;
-
-        // <Zd>.<T>, <Pg>/M, <V><n>
-        case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector
-                           // (predicated)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispReg(id->idReg3(), size, false);                                             // mmmmm
-            break;
-
-        // <Zd>.<T>, <Pg>/M, <R><n|SP>
-        case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispReg(encodingZRtoSP(id->idReg3()), size, false);                             // mmmmm
-            break;
-
-        // <Zd>.Q, <Pg>/M, <Zn>.Q
-        case IF_SVE_CT_3A: // ................ ...gggnnnnnddddd -- SVE reverse doublewords
-            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_Q, true);                            // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_Q, false);                           // nnnnn
-            break;
-
-        // <Zd>.<T>, <Pv>, {<Zn1>.<T>, <Zn2>.<T>}
-        case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                             // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);             // VVV
-            emitDispSveConsecutiveRegList(id->idReg3(), insGetSveReg1ListSize(ins), id->idInsOpt(), false); // nnnnn
-            break;
-
-        // <Zdn>.<T>, <Pv>, <Zdn>.<T>, <Zm>.<T>
-        case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // VVV
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                // mmmmm
-            break;
-
-        // <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
-        case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
-        case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
-        case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);                                    // nnnnn
-            emitDispSveReg(id->idReg4(), id->idInsOpt(), false);                                   // mmmmm
-            break;
-
-        // <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D
-        case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);                                    // nnnnn
-            emitDispSveReg(id->idReg4(), INS_OPTS_SCALABLE_D, false);                              // mmmmm
-            break;
-
-        // <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
-        case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate
-        case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);                                    // nnnnn
-            emitDispImm(emitGetInsSC(id), false, (fmt == IF_SVE_CY_3B));                           // iiiii
-            break;
-
-        // <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
-        case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed)
-        case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
-        case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
-        // <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
-        case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed)
-        case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed)
-        // <Zd>.S, <Zn>.H, <Zm>.H[<imm>]
-        case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
-        case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
-        case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
-        // <Zda>.S, <Zn>.S, <Zm>.S[<imm>]
-        case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
-        case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
-        case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
-            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd
-            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);     // mmm
-            emitDispElementIndex(emitGetInsSC(id), false);           // ii/iii
-            break;
-
-        // <Zd>.D, <Zn>.S, <Zm>.S[<imm>]
-        case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
-        case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
-        // <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
-        case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
-        case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
-            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd
-            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);     // mmmm
-            emitDispElementIndex(emitGetInsSC(id), false);           // ii
-            break;
-
-        // <Zd>.H, <Zn>.H, <Zm>.H[<imm>]
-        case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
-        case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
-        // <Zd>.S, <Zn>.S, <Zm>.S[<imm>]
-        case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
-        case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
-        // <Zd>.D, <Zn>.D, <Zm>.D[<imm>]
-        case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
-        case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
-        // <Zda>.D, <Zn>.D, <Zm>.D[<imm>]
-        case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
-        case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
-        case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
-        // <Zda>.H, <Zn>.H, <Zm>.H[<imm>]
-        case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
-        case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
-        case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
-            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);  // nnnnn
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm
-            emitDispElementIndex(emitGetInsSC(id), false);       // i/ii/iii
-            break;
-
-        // <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
-        case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-        {
-            bool isFourReg =
-                !((ins == INS_sve_mov) || (ins == INS_sve_movs) || (ins == INS_sve_not) || (ins == INS_sve_nots));
-            PredicateType ptype = (ins == INS_sve_sel) ? PREDICATE_NONE : insGetPredicateType(fmt, 2);
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);      // DDDD
-            emitDispPredicateReg(id->idReg2(), ptype, id->idInsOpt(), true);                            // gggg
-            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), isFourReg); // NNNN
+//------------------------------------------------------------------------
+// emitDispVectorRegList: Display a SIMD vector register list
+//
+void emitter::emitDispVectorRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma)
+{
+    assert(isVectorRegister(firstReg));
 
-            if (isFourReg)
-            {
-                emitDispPredicateReg(id->idReg4(), insGetPredicateType(fmt, 4), id->idInsOpt(), false); // MMMM
-            }
+    regNumber currReg = firstReg;
 
-            break;
-        }
+    printf("{");
+    for (unsigned i = 0; i < listSize; i++)
+    {
+        const bool notLastRegister = (i != listSize - 1);
+        emitDispVectorReg(currReg, opt, notLastRegister);
+        currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg);
+    }
+    printf("}");
 
-        // <Pd>.B, <Pn>.B
-        case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-        case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // NNNN
-            break;
+    if (addComma)
+    {
+        emitDispComma();
+    }
+}
 
-        //  <Pd>.B, <Pg>/M, <Pn>.B
-        case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // gggg
-            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // NNNN
-            break;
+//------------------------------------------------------------------------
+// emitDispVectorElemList: Display a SIMD vector element list
+//
+void emitter::emitDispVectorElemList(
+    regNumber firstReg, unsigned listSize, emitAttr elemsize, unsigned index, bool addComma)
+{
+    assert(isVectorRegister(firstReg));
 
-        //  <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
-        case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition
-        {
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // gggg
-            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), true);  // NNNN
-            emitDispPredicateReg(id->idReg4(), insGetPredicateType(fmt, 4), id->idInsOpt(), false); // MMMM
-            break;
-        }
+    regNumber currReg = firstReg;
 
-        // <Pd>.B, <Pg>/<ZM>, <Pn>.B
-        case IF_SVE_DB_3A: // ................ ..gggg.NNNNMDDDD -- SVE partition break condition
-        case IF_SVE_DB_3B: // ................ ..gggg.NNNN.DDDD -- SVE partition break condition
+    printf("{");
+    for (unsigned i = 0; i < listSize; i++)
+    {
+        printf(emitVectorRegName(currReg));
+        emitDispElemsize(elemsize);
+        const bool notLastRegister = (i != listSize - 1);
+        if (notLastRegister)
         {
-            PredicateType ptype = (id->idPredicateReg2Merge()) ? PREDICATE_MERGE : PREDICATE_ZERO;
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
-            emitDispPredicateReg(id->idReg2(), ptype, id->idInsOpt(), true);                        // gggg
-            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // NNNN
-            break;
+            emitDispComma();
         }
+        currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg);
+    }
+    printf("}");
+    printf("[%d]", index);
 
-        // <Pdm>.B, <Pg>/Z, <Pn>.B, <Pdm>.B
-        case IF_SVE_DC_3A: // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // gggg
-            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), true);  // NNNN
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 4), id->idInsOpt(), false); // MMMM
-            break;
+    if (addComma)
+    {
+        emitDispComma();
+    }
+}
 
-        // <Pdn>.B, <Pg>, <Pdn>.B
-        case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // gggg
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // DDDD
-            break;
+//------------------------------------------------------------------------
+// emitDispArrangement: Display a SIMD vector arrangement suffix
+//
+void emitter::emitDispArrangement(insOpts opt)
+{
+    const char* str = "???";
 
-        // <Pd>.<T>{, <pattern>}
-        case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize
-        {
-            bool dispPattern = (id->idSvePattern() != SVE_PATTERN_ALL);
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), dispPattern); // DDDD
-            if (dispPattern)
-            {
-                emitDispSvePattern(id->idSvePattern(), false); // ppppp
-            }
+    switch (opt)
+    {
+        case INS_OPTS_8B:
+            str = "8b";
             break;
-        }
-
-        // <Pd>.<T>, <Pn>.<T>
-        case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // NNNN
+        case INS_OPTS_16B:
+            str = "16b";
             break;
-
-        // <Pdn>.<T>, <Pv>, <Pdn>.<T>
-        case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // VVVV
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // DDDD
+        case INS_OPTS_SCALABLE_B:
+            str = "b";
             break;
-
-        // <Pd>.B, <Pg>/Z
-        case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated)
-        case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // gggg
+        case INS_OPTS_4H:
+            str = "4h";
             break;
-
-        // <Pd>.B
-        case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated)
-        case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), false); // DDDD
+        case INS_OPTS_8H:
+            str = "8h";
             break;
-
-        // <Xd>, <Pg>, <Pn>.<T>
-        case IF_SVE_DK_3A:                         // ........xx...... ..gggg.NNNNddddd -- SVE predicate count
-            emitDispReg(id->idReg1(), size, true); // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // gggg
-            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // NNNN
+        case INS_OPTS_SCALABLE_H:
+            str = "h";
             break;
-
-        // <Zda>.<T>, <Pg>/M, <Zn>.<Tb>
-        case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
-            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
-            emitDispSveReg(id->idReg3(), (insOpts)((unsigned)id->idInsOpt() - 1), false);          // mmmmm
+        case INS_OPTS_2S:
+            str = "2s";
             break;
-
-        // <Zd>.H, { <Zn1>.S-<Zn2>.S }, #<const>
-        case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                        // ddddd
-            emitDispSveConsecutiveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_S, true); // nnnn
-            emitDispImm(emitGetInsSC(id), false);                                      // iiii
+        case INS_OPTS_4S:
+            str = "4s";
             break;
-
-        // <Xd>, <PNn>.<T>, <vl>
-        case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter)
-            emitDispReg(id->idReg1(), id->idOpSize(), true);                                    // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // NNNN
-            emitDispVectorLengthSpecifier(id);
+        case INS_OPTS_SCALABLE_S:
+        case INS_OPTS_SCALABLE_S_UXTW:
+        case INS_OPTS_SCALABLE_S_SXTW:
+            str = "s";
             break;
-
-        // <Xdn>, <Pm>.<T>
-        case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
-            emitDispReg(id->idReg1(), id->idOpSize(), true);                                     // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), false); // MMMM
+        case INS_OPTS_1D:
+            str = "1d";
             break;
-
-        // <Zdn>.<T>, <Pm>.<T>
-        case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
-        case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                  // ddddd
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), false); // MMMM
+        case INS_OPTS_2D:
+            str = "2d";
             break;
-
-        // <Xdn>, <Pm>.<T>, <Wdn>
-        // <Xdn>, <Pm>.<T>
-        case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
-            if ((ins == INS_sve_sqdecp) || (ins == INS_sve_sqincp))
-            {
-                // 32-bit result: <Xdn>, <Pm>.<T>, <Wdn>
-                // 64-bit result: <Xdn>, <Pm>.<T>
-                const bool is32BitResult = (id->idOpSize() == EA_4BYTE);                                     // X
-                emitDispReg(id->idReg1(), EA_8BYTE, true);                                                   // ddddd
-                emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), is32BitResult); // MMMM
-
-                if (is32BitResult)
-                {
-                    emitDispReg(id->idReg1(), EA_4BYTE, false);
-                }
-            }
-            else
-            {
-                assert((ins == INS_sve_uqdecp) || (ins == INS_sve_uqincp));
-                emitDispReg(id->idReg1(), id->idOpSize(), true);                                     // ddddd
-                emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), false); // MMMM
-            }
+        case INS_OPTS_SCALABLE_D:
+        case INS_OPTS_SCALABLE_D_UXTW:
+        case INS_OPTS_SCALABLE_D_SXTW:
+            str = "d";
             break;
-
-        // none
-        case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise
+        case INS_OPTS_SCALABLE_Q:
+            str = "q";
             break;
 
-        // <Pn>.B
-        case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), false); // NNNN
-            break;
+        default:
+            assert(!"Invalid insOpt");
+    }
+    printf(".");
+    printf(str);
+}
 
-        // <R><n>, <R><m>
-        case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars
-            emitDispReg(id->idReg1(), id->idOpSize(), true);  // nnnnn
-            emitDispReg(id->idReg2(), id->idOpSize(), false); // mmmmm
-            break;
+//------------------------------------------------------------------------
+// emitDispElemsize: Display a SIMD vector element suffix
+//
+void emitter::emitDispElemsize(emitAttr elemsize)
+{
+    const char* str = "???";
 
-        // <Zd>.<T>, <Zn>.<Tb>
-        case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                  // ddddd
-            emitDispSveReg(id->idReg2(), optWidenSveElemsizeArrangement(id->idInsOpt()), false); // nnnnn
+    switch (elemsize)
+    {
+        case EA_1BYTE:
+            str = ".b";
             break;
-
-        // <Zdn>.B, <Zdn>.B, <Zm>.B
-        // <Zdn>.S, <Zdn>.S, <Zm>.S
-        case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
-            emitDispSveReg(id->idReg2(), id->idInsOpt(), false); // mmmmm
+        case EA_2BYTE:
+            str = ".h";
             break;
-
-        // <Zdn>.B, <Zdn>.B
-        case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), false); // ddddd
+        case EA_4BYTE:
+            str = ".s";
             break;
-
-        // <Pd>.<T>, <R><n>, <R><m>
-        case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit
-        // <Pd>.<T>, <Xn>, <Xm>
-        case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); // DDDD
-            emitDispReg(id->idReg2(), id->idOpSize(), true);                                    // nnnnn
-            emitDispReg(id->idReg3(), id->idOpSize(), false);                                   // mmmmm
+        case EA_8BYTE:
+            str = ".d";
             break;
 
-        // <Pd>.<T>, <PNn>[<imm>]
-        case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
-            emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD
-            emitDispPredicateReg(id->idReg2(), PREDICATE_N, id->idInsOpt(), false);    // NNN
-            emitDispElementIndex(emitGetInsSC(id), false);                             // ii
+        default:
+            assert(!"invalid elemsize");
             break;
+    }
 
-        // {<Pd1>.<T>, <Pd2>.<T>}, <PNn>[<imm>]
-        case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
-            emitDispPredicateRegPair(id->idReg1(), id->idInsOpt());                 // DDDD
-            emitDispPredicateReg(id->idReg2(), PREDICATE_N, id->idInsOpt(), false); // NNN
-            emitDispElementIndex(emitGetInsSC(id), false);                          // i
-            break;
+    printf(str);
+}
 
-        // {<Pd1>.<T>, <Pd2>.<T>}, <Xn>, <Xm>
-        case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate
-                           // pair)
-            emitDispLowPredicateRegPair(id->idReg1(), id->idInsOpt());
-            emitDispReg(id->idReg2(), id->idOpSize(), true);  // nnnnn
-            emitDispReg(id->idReg3(), id->idOpSize(), false); // mmmmm
-            break;
+//------------------------------------------------------------------------
+// emitDispShiftedReg: Display a register with an optional shift operation
+//
+void emitter::emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr)
+{
+    emitAttr size = EA_SIZE(attr);
+    assert((imm & 0x003F) == imm);
+    assert(((imm & 0x0020) == 0) || (size == EA_8BYTE));
 
-        // <PNd>.<T>, <Xn>, <Xm>, <vl>
-        case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit
-                           // (predicate-as-counter)
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); // DDD
-            emitDispReg(id->idReg2(), id->idOpSize(), true);                                    // nnnnn
-            emitDispReg(id->idReg3(), id->idOpSize(), true);                                    // mmmmm
-            emitDispVectorLengthSpecifier(id);
-            break;
+    printf(emitRegName(reg, size));
 
-        // PTRUE <PNd>.<T>
-        case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), false); // DDD
-            break;
+    if (imm > 0)
+    {
+        if (strictArmAsm)
+        {
+            emitDispComma();
+        }
+        emitDispShiftOpts(opt);
+        emitDispImm(imm, false);
+    }
+}
 
-        // FDUP <Zd>.<T>, #<const>
-        // FMOV <Zd>.<T>, #<const>
-        case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
-            emitDispFloatImm(emitGetInsSC(id));                 // iiiiiiii
-            break;
+/*****************************************************************************
+ *
+ *  Display a register with an optional extend and scale operations
+ */
+void emitter::emitDispExtendReg(regNumber reg, insOpts opt, ssize_t imm)
+{
+    assert((imm >= 0) && (imm <= 4));
+    assert(insOptsNone(opt) || insOptsAnyExtend(opt) || (opt == INS_OPTS_LSL));
 
-        // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-        // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-        // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-        // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-        // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-        // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-        // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
-        case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+    // size is based on the extend option, not the instr size.
+    // Assume INS_OPTS_NONE and INS_OPTS_LSL are 64bit as they usually are.
+    emitAttr size = (insOptsNone(opt) || insOptsLSL(opt) || insOpts64BitExtend(opt)) ? EA_8BYTE : EA_4BYTE;
 
-            FALLTHROUGH;
-        // DUP <Zd>.<T>, #<imm>{, <shift>}
-        // MOV <Zd>.<T>, #<imm>{, <shift>}
-        case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);             // ddddd
-            emitDispImmOptsLSL(emitGetInsSC(id), id->idOptionalShift(), 8); // iiiiiiii, h
-            break;
-
-        // FMOV <Zd>.<T>, #0.0
-        // (Preferred disassembly: FMOV <Zd>.<T>, #0)
-        case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
-            emitDispImm(0, false);
-            break;
-
-        // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm>
-        // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm>
-        // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm>
-        // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm>
-        case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated)
-        // MUL <Zdn>.<T>, <Zdn>.<T>, #<imm>
-        case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated)
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
-            emitDispImm(emitGetInsSC(id), false);               // iiiiiiii
-            break;
-
-        // CDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>], <const>
-        case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
+    if (strictArmAsm)
+    {
+        if (insOptsNone(opt) || (insOptsLSL(opt) && imm == 0))
         {
-            const ssize_t imm   = emitGetInsSC(id);
-            const ssize_t rot   = (imm & 0b11);
-            const ssize_t index = (imm >> 2);
-            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd
-            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);     // mmm
-            emitDispElementIndex(index, true);                       // ii
-
-            // rot specifies a multiple of 90-degree rotations
-            emitDispImm(rot * 90, false); // rr
-            break;
+            emitDispReg(reg, size, false);
         }
-
-        // CDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>], <const>
-        case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
+        else
         {
-            const ssize_t imm   = emitGetInsSC(id);
-            const ssize_t rot   = (imm & 0b11);
-            const ssize_t index = (imm >> 2);
-            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd
-            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);     // mmm
-            emitDispElementIndex(index, true);                       // i
+            emitDispReg(reg, size, true);
 
-            // rot specifies a multiple of 90-degree rotations
-            emitDispImm(rot * 90, false); // rr
-            break;
-        }
+            if (insOptsLSL(opt))
+                printf("LSL");
+            else
+                emitDispExtendOpts(opt);
 
-        // CMLA <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
-        case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
-        // CMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
-        case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
-        // SQRDCMLAH <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
-        case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
-        // SQRDCMLAH <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
-        case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
+            if (imm > 0)
+            {
+                printf(" ");
+                emitDispImm(imm, false);
+            }
+        }
+    }
+    else // !strictArmAsm
+    {
+        if (insOptsNone(opt))
         {
-            const ssize_t imm   = emitGetInsSC(id);
-            const ssize_t rot   = (imm & 0b11);
-            const ssize_t index = (imm >> 2);
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
-            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);  // nnnnn
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm
-            emitDispElementIndex(index, true);                   // i
-
-            // rot specifies a multiple of 90-degree rotations
-            emitDispImm(rot * 90, false); // rr
-            break;
+            emitDispReg(reg, size, false);
         }
-
-        // { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // Some of these formats may allow changing the element size instead of using 'D' for all instructions.
-        case IF_SVE_IH_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
-                             // immediate)
-        case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
-                             // immediate)
-        case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
-                             // immediate)
-        case IF_SVE_IJ_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate)
-        case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
-                             // immediate)
-        case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
-                             // immediate)
-        case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
-                             // immediate)
-        case IF_SVE_IM_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus
-                             // immediate)
-        // { <Zt>.B }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
-        // { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
-        // { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
-        // { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
-        case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus
-                           // immediate)
-        // { <Zt1>.Q, <Zt2>.Q }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.Q, <Zt2>.Q, <Zt3>.Q }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.Q, <Zt2>.Q, <Zt3>.Q, <Zt4>.Q }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
-                           // immediate)
-        // { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
-        case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate)
-        // { <Zt1>.Q, <Zt2>.Q }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.Q, <Zt2>.Q, <Zt3>.Q }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.Q, <Zt2>.Q, <Zt3>.Q, <Zt4>.Q }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
-                           // immediate)
-        // { <Zt>.B }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt>.H }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt>.S }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt>.D }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
-                           // immediate)
-        // { <Zt>.D }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt>.Q }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        case IF_SVE_JN_3C:   // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-        case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-        // { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        // { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate)
-            imm = emitGetInsSC(id);
-            emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);            // ggg
-            printf("[");
-            emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn
-            if (imm != 0)
+        else
+        {
+            if (opt != INS_OPTS_LSL)
             {
-                switch (fmt)
-                {
-                    case IF_SVE_IO_3A:
-                        // This does not have to be printed as hex.
-                        // We only do it because the capstone disassembly displays this immediate as hex.
-                        // We could not modify capstone without affecting other cases.
-                        emitDispImm(emitGetInsSC(id), false, /* alwaysHex */ true); // iiii
-                        break;
-
-                    case IF_SVE_IQ_3A:
-                    case IF_SVE_IS_3A:
-                    case IF_SVE_JE_3A:
-                    case IF_SVE_JO_3A:
-                        // This does not have to be printed as hex.
-                        // We only do it because the capstone disassembly displays this immediate as hex.
-                        // We could not modify capstone without affecting other cases.
-                        emitDispImm(emitGetInsSC(id), true, /* alwaysHex */ true); // iiii
-                        printf("mul vl");
-                        break;
-
-                    default:
-                        emitDispImm(emitGetInsSC(id), true); // iiii
-                        printf("mul vl");
-                        break;
-                }
+                emitDispExtendOpts(opt);
+                printf("(");
+                emitDispReg(reg, size, false);
+                printf(")");
             }
-            printf("]");
-            break;
-
-        // {<Zt>.<T>}, <Pg>, [<Xn|SP>, <Xm>]
-        // {<Zt>.<T>}, <Pg>, [<Xn|SP>, <Xm>, LSL #1]
-        case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-        // {<Zt>.<T>}, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
-        case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3]
-        // {<Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1]
-        // {<Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2]
-        case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                           // offsets)
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>]
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1]
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2]
-        case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>]
-        case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        // {<Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
-        case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>]
-        case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
-                           // offsets)
-        // {<Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
-        case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit
-                             // unscaled offsets)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1]
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2]
-        case IF_SVE_HW_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                           // offsets)
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2]
-        case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]
-        case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>]
-        case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3]
-        case IF_SVE_IU_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                           // scaled offsets)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]
-        case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]
-        case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]
-        case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                           // offsets)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
-        case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        // {<Zt>.S }, <Pg>/Z, [<Zn>.S{, <Xm>}]
-        case IF_SVE_IF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
-                           // scalar)
-        // {<Zt>.D }, <Pg>/Z, [<Zn>.D{, <Xm>}]
-        case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
-                             // scalar)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #3}]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #2}]
-        case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar)
-        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
-        case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        // {<Zt>.B }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
-        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
-        case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #2}]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #2}]
-        case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
-        case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3]
-        case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
-        // {<Zt>.Q }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3]
-        case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2]
-        case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2
-        case IF_SVE_IK_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
-        case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        // {<Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
-        case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        // {<Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar)
-        // {<Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar)
-        // {<Zt1>.Q, <Zt2>.Q }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #4]
-        // {<Zt1>.Q, <Zt2>.Q, <Zt3>.Q }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #4]
-        // {<Zt1>.Q, <Zt2>.Q, <Zt3>.Q, <Zt4>.Q }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #4]
-        case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
-                           // scalar)
-        // {<Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
-        // {<Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2]
-        // {<Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3]
-        // {<Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
-        // {<Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2]
-        // {<Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3]
-        // {<Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]
-        // {<Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
-        // {<Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2]
-        // {<Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3]
-        case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3]
-        case IF_SVE_IU_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                           // scaled offsets)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
-        case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
-        case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        // {<Zt>.Q }, <Pg>/Z, [<Zn>.D{, <Xm>}]
-        case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar)
-        // {<Zt>.D }, <Pg>/Z, [<Zn>.D{, <Xm>}]
-        case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus
-                           // scalar)
-        // {<Zt>.Q }, <Pg>, [<Zn>.D{, <Xm>}]
-        case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar)
-        // {<Zt>.S }, <Pg>, [<Zn>.S{, <Xm>}]
-        case IF_SVE_IZ_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
-                           // scalar)
-        // {<Zt>.D }, <Pg>, [<Zn>.D{, <Xm>}]
-        case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
-                             // scalar)
-        // {<Zt>.D }, <Pg>, [<Zn>.D{, <Xm>}]
-        case IF_SVE_JA_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus
-                           // scalar)
-        // {<Zt>.B }, <Pg>, [<Xn|SP>, <Xm>]
-        // {<Zt>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1]
-        // {<Zt>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
-        case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
-                           // scalar)
-        // {<Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, <Xm>]
-        // {<Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1]
-        // {<Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
-        // {<Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
-        // {<Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, <Xm>]
-        // {<Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1]
-        // {<Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
-        // {<Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
-        // {<Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, <Xm>]
-        // {<Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1]
-        // {<Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
-        // {<Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
-        case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar)
-        // {<Zt>.Q }, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
-        case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-        // {<Zt>.Q }, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
-        case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-        // {<Zt1>.Q, <Zt2>.Q }, <Pg>, [<Xn|SP>, <Xm>, LSL #4]
-        // {<Zt1>.Q, <Zt2>.Q, <Zt3>.Q }, <Pg>, [<Xn|SP>, <Xm>, LSL #4]
-        // {<Zt1>.Q, <Zt2>.Q, <Zt3>.Q, <Zt4>.Q }, <Pg>, [<Xn|SP>, <Xm>, LSL #4]
-        case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
-                           // scalar)
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1]
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2]
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3]
-        case IF_SVE_JJ_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                           // offsets)
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D]
-        case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D]
-        case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D]
-        case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
-                           // offsets)
-            emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);            // ggg
-            emitDispSveModAddr(ins, id->idReg3(), id->idReg4(), id->idInsOpt(), fmt);                      // nnnnn
-                                                                                                           // mmmmm
-            break;
-
-        // {<Zt>.<T>}, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-            imm = emitGetInsSC(id);
-            emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);            // ggg
-            emitDispSveImmMulVl(id->idReg3(), imm);
-            break;
-
-        // {<Zt>.<T>}, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
-        case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-            imm = emitGetInsSC(id);
-            emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);            // ggg
-            emitDispSveImmMulVl(id->idReg3(), imm);
-            break;
+        }
+        if (imm > 0)
+        {
+            printf("*");
+            emitDispImm(ssize_t{1} << imm, false);
+        }
+    }
+}
 
-        // <Pt>, [<Xn|SP>{, #<imm>, MUL VL}]
-        case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register
-        // <Pt>, [<Xn|SP>{, #<imm>, MUL VL}]
-        case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register
-            imm = emitGetInsSC(id);
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); // TTTT
-            emitDispSveImmMulVl(id->idReg2(), imm);
-            break;
+/*****************************************************************************
+ *
+ *  Display an addressing operand [reg + imm]
+ */
+void emitter::emitDispAddrRI(regNumber reg, insOpts opt, ssize_t imm)
+{
+    reg = encodingZRtoSP(reg); // ZR (R31) encodes the SP register
 
-        // <Zt>, [<Xn|SP>{, #<imm>, MUL VL}]
-        case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register
-        // <Zt>, [<Xn|SP>{, #<imm>, MUL VL}]
-        case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register
-            imm = emitGetInsSC(id);
-            emitDispReg(id->idReg1(), EA_SCALABLE, true); // ttttt
-            emitDispSveImmMulVl(id->idReg2(), imm);
-            break;
+    if (strictArmAsm)
+    {
+        printf("[");
 
-        // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const>
-        case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated)
-            imm = emitGetInsSC(id);
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);
-            emitDispImm(emitDecodeRotationImm90_or_270(imm), false);
-            break;
+        emitDispReg(reg, EA_8BYTE, false);
 
-        // <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const>
-        case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated)
-            imm = emitGetInsSC(id);
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);
-            emitDispSveReg(id->idReg4(), id->idInsOpt(), true);
-            emitDispImm(emitDecodeRotationImm0_to_270(imm), false);
-            break;
-
-        // <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
-        case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero
-            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);
-            emitDispFloatZero();
-            break;
+        if (!insOptsPostIndex(opt) && (imm != 0))
+        {
+            emitDispComma();
+            emitDispImm(imm, false, true, true);
+        }
+        printf("]");
 
-        // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
-        case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate
-                           // (predicated)
-            imm = emitGetInsSC(id);
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispSmallFloatImm(imm, id->idIns());
-            break;
+        if (insOptsPreIndex(opt))
+        {
+            printf("!");
+        }
+        else if (insOptsPostIndex(opt))
+        {
+            emitDispComma();
+            emitDispImm(imm, false, true, true);
+        }
+    }
+    else // !strictArmAsm
+    {
+        printf("[");
 
-        // <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm>
-        case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);
-            emitDispImm(emitGetInsSC(id), false);
-            break;
+        const char* operStr = "++";
+        if (imm < 0)
+        {
+            operStr = "--";
+            imm     = -imm;
+        }
 
-        // <Zd>.<T>, <Pg>/M, <Zn>.<T>
-        case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);
-            break;
+        if (insOptsPreIndex(opt))
+        {
+            printf(operStr);
+        }
 
-        // <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H
-        case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend
-        // <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
-        case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing
-                           // multiplicand
-            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
-            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);
-            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);
-            emitDispSveReg(id->idReg4(), id->idInsOpt(), false);
-            break;
+        emitDispReg(reg, EA_8BYTE, false);
 
-        default:
-            printf("unexpected format %s", emitIfName(id->idInsFmt()));
-            assert(!"unexpectedFormat");
-            break;
-    }
+        if (insOptsPostIndex(opt))
+        {
+            printf(operStr);
+        }
 
-    if (id->idIsLclVar())
-    {
-        printf("\t// ");
-        emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
-                         id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
-        if (id->idIsLclVarPair())
+        if (insOptsIndexed(opt))
         {
-            printf(", ");
-            emitLclVarAddr* iiaLclVar2 = emitGetLclVarPairLclVar2(id);
-            emitDispFrameRef(iiaLclVar2->lvaVarNum(), iiaLclVar2->lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs2,
-                             asmfm);
+            emitDispComma();
         }
+        else
+        {
+            printf("%c", operStr[1]);
+        }
+        emitDispImm(imm, false, true, true);
+        printf("]");
     }
-
-    printf("\n");
 }
 
 /*****************************************************************************
  *
- *  Display a stack frame reference.
+ *  Display an addressing operand [reg + extended reg]
  */
-
-void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
+void emitter::emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, bool isScaled, emitAttr size)
 {
-#ifdef DEBUG
-    printf("[");
+    reg1 = encodingZRtoSP(reg1); // ZR (R31) encodes the SP register
 
-    if (varx < 0)
-        printf("TEMP_%02u", -varx);
-    else
-        emitComp->gtDispLclVar(+varx, false);
+    unsigned scale = 0;
+    if (isScaled)
+    {
+        scale = NaturalScale_helper(size);
+    }
 
-    if (disp < 0)
-        printf("-0x%02x", -disp);
-    else if (disp > 0)
-        printf("+0x%02x", +disp);
+    printf("[");
+
+    if (strictArmAsm)
+    {
+        emitDispReg(reg1, EA_8BYTE, true);
+        emitDispExtendReg(reg2, opt, scale);
+    }
+    else // !strictArmAsm
+    {
+        emitDispReg(reg1, EA_8BYTE, false);
+        printf("+");
+        emitDispExtendReg(reg2, opt, scale);
+    }
 
     printf("]");
+}
+
+/*****************************************************************************
+ *
+ *  Display (optionally) the instruction encoding in hex
+ */
 
-    if ((varx >= 0) && emitComp->opts.varNames && (((IL_OFFSET)offs) != BAD_IL_OFFSET))
+void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz)
+{
+    if (!emitComp->opts.disCodeBytes)
     {
-        const char* varName = emitComp->compLocalVarName(varx, offs);
+        return;
+    }
 
-        if (varName)
+    // We do not display the instruction hex if we want diff-able disassembly
+    if (!emitComp->opts.disDiffable)
+    {
+        if (sz == 4)
         {
-            printf("'%s", varName);
-
-            if (disp < 0)
-                printf("-%d", -disp);
-            else if (disp > 0)
-                printf("+%d", +disp);
-
-            printf("'");
+            printf("  %08X    ", (*((code_t*)code)));
+        }
+        else
+        {
+            printf("              ");
         }
     }
-#endif
 }
 
-// Generate code for a load or store operation with a potentially complex addressing mode
-// This method handles the case of a GT_IND with contained GT_LEA op1 of the x86 form [base + index*scale + offset]
-// Since Arm64 does not directly support this complex of an addressing mode
-// we may generates up to three instructions for this for Arm64
-//
-void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir)
-{
-    GenTree* addr = indir->Addr();
-
-    if (addr->isContained())
-    {
-        assert(addr->OperIs(GT_LCL_ADDR, GT_LEA) || (addr->IsIconHandle(GTF_ICON_TLS_HDL)));
+/*****************************************************************************
+ *
+ *  Handles printing of LARGEJMP pseudo-instruction.
+ */
 
-        int   offset = 0;
-        DWORD lsl    = 0;
+void emitter::emitDispLargeJmp(
+    instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig)
+{
+    // Note: don't touch the actual instrDesc. If we accidentally messed it up, it would create a very
+    // difficult-to-find bug.
 
-        if (addr->OperGet() == GT_LEA)
-        {
-            offset = addr->AsAddrMode()->Offset();
-            if (addr->AsAddrMode()->gtScale > 0)
-            {
-                assert(isPow2(addr->AsAddrMode()->gtScale));
-                BitScanForward(&lsl, addr->AsAddrMode()->gtScale);
-            }
-        }
+    inlineInstrDesc<instrDescJmp> idJmp;
+    instrDescJmp*                 pidJmp = idJmp.id();
 
-        GenTree* memBase = indir->Base();
+    const instruction ins = id->idIns();
+    instruction       reverseIns;
+    insFormat         reverseFmt;
 
-        if (indir->HasIndex())
-        {
-            GenTree* index = indir->Index();
+    // Reverse the conditional instruction.
+    switch (ins)
+    {
+        case INS_cbz:
+            reverseIns = INS_cbnz;
+            reverseFmt = IF_BI_1A;
+            break;
+        case INS_cbnz:
+            reverseIns = INS_cbz;
+            reverseFmt = IF_BI_1A;
+            break;
+        case INS_tbz:
+            reverseIns = INS_tbnz;
+            reverseFmt = IF_BI_1B;
+            break;
+        case INS_tbnz:
+            reverseIns = INS_tbz;
+            reverseFmt = IF_BI_1B;
+            break;
+        default:
+            reverseIns = emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(ins)));
+            reverseFmt = IF_BI_0B;
+    }
 
-            if (offset != 0)
-            {
-                regNumber tmpReg = indir->GetSingleTempReg();
+    pidJmp->idIns(reverseIns);
+    pidJmp->idInsFmt(reverseFmt);
+    pidJmp->idOpSize(id->idOpSize());
+    pidJmp->idAddr()->iiaSetInstrCount(1);
+    pidJmp->idDebugOnlyInfo(id->idDebugOnlyInfo()); // Share the idDebugOnlyInfo() field.
 
-                emitAttr addType = varTypeIsGC(memBase) ? EA_BYREF : EA_PTRSIZE;
+    const size_t bcondSizeOrZero = (pCode == NULL) ? 0 : 4; // Branch is 4 bytes.
+    emitDispInsHelp(pidJmp, false, doffs, asmfm, offset, pCode, bcondSizeOrZero,
+                    NULL /* force display of pc-relative branch */);
 
-                if (emitIns_valid_imm_for_add(offset, EA_8BYTE))
-                {
-                    if (lsl > 0)
-                    {
-                        // Generate code to set tmpReg = base + index*scale
-                        emitIns_R_R_R_I(INS_add, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum(), lsl,
-                                        INS_OPTS_LSL);
-                    }
-                    else // no scale
-                    {
-                        // Generate code to set tmpReg = base + index
-                        emitIns_R_R_R(INS_add, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum());
-                    }
+    pCode += bcondSizeOrZero;
+    offset += 4;
 
-                    noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
+    // Next, display the unconditional branch.
 
-                    // Then load/store dataReg from/to [tmpReg + offset]
-                    emitIns_R_R_I(ins, attr, dataReg, tmpReg, offset);
-                }
-                else // large offset
-                {
-                    // First load/store tmpReg with the large offset constant
-                    codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
-                    // Then add the base register
-                    //      rd = rd + base
-                    emitIns_R_R_R(INS_add, addType, tmpReg, tmpReg, memBase->GetRegNum());
+    // Reset the local instrDesc.
+    memset(pidJmp, 0, sizeof(instrDescJmp));
 
-                    noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
-                    noway_assert(tmpReg != index->GetRegNum());
+    pidJmp->idIns(INS_b);
+    pidJmp->idInsFmt(IF_LARGEJMP);
 
-                    // Then load/store dataReg from/to [tmpReg + index*scale]
-                    emitIns_R_R_R_I(ins, attr, dataReg, tmpReg, index->GetRegNum(), lsl, INS_OPTS_LSL);
-                }
-            }
-            else // (offset == 0)
-            {
-                if (lsl > 0)
-                {
-                    // Then load/store dataReg from/to [memBase + index*scale]
-                    emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum(), INS_OPTS_LSL, lsl);
-                }
-                else // no scale
-                {
-                    if (index->OperIs(GT_BFIZ, GT_CAST) && index->isContained())
-                    {
-                        // Then load/store dataReg from/to [memBase + index*scale with sign/zero extension]
-                        GenTreeCast* cast;
-                        int          cns;
+    if (id->idIsBound())
+    {
+        pidJmp->idSetIsBound();
+        pidJmp->idAddr()->iiaIGlabel = id->idAddr()->iiaIGlabel;
+    }
+    else
+    {
+        pidJmp->idAddr()->iiaBBlabel = id->idAddr()->iiaBBlabel;
+    }
 
-                        if (index->OperIs(GT_BFIZ))
-                        {
-                            cast = index->gtGetOp1()->AsCast();
-                            cns  = (int)index->gtGetOp2()->AsIntCon()->IconValue();
-                        }
-                        else
-                        {
-                            cast = index->AsCast();
-                            cns  = 0;
-                        }
+    pidJmp->idDebugOnlyInfo(id->idDebugOnlyInfo()); // Share the idDebugOnlyInfo() field.
 
-                        // For now, this code only supports extensions from i32/u32
-                        assert(cast->isContained());
+    const size_t brSizeOrZero = (pCode == NULL) ? 0 : 4; // Unconditional branch is 4 bytes.
+    emitDispInsHelp(pidJmp, isNew, doffs, asmfm, offset, pCode, brSizeOrZero, ig);
+}
 
-                        emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), cast->CastOp()->GetRegNum(),
-                                          cast->IsUnsigned() ? INS_OPTS_UXTW : INS_OPTS_SXTW, cns);
-                    }
-                    else
-                    {
-                        // Then load/store dataReg from/to [memBase + index]
-                        emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum());
-                    }
-                }
-            }
-        }
-        else // no Index register
-        {
-            if (addr->OperIs(GT_LCL_ADDR))
-            {
-                GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
-                unsigned             lclNum  = varNode->GetLclNum();
-                unsigned             offset  = varNode->GetLclOffs();
-                if (emitInsIsStore(ins))
-                {
-                    emitIns_S_R(ins, attr, dataReg, lclNum, offset);
-                }
-                else
-                {
-                    emitIns_R_S(ins, attr, dataReg, lclNum, offset);
-                }
-            }
-            else if (addr->IsIconHandle(GTF_ICON_TLS_HDL))
-            {
-                // On Arm64, TEB is in r18, so load from the r18 as base.
-                emitIns_R_R_I(ins, attr, dataReg, REG_R18, addr->AsIntCon()->IconValue());
-            }
-            else if (emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(indir->TypeGet())))
-            {
-                // Then load/store dataReg from/to [memBase + offset]
-                emitIns_R_R_I(ins, attr, dataReg, memBase->GetRegNum(), offset);
-            }
-            else
-            {
-                // We require a tmpReg to hold the offset
-                regNumber tmpReg = indir->GetSingleTempReg();
+/*****************************************************************************
+ *
+ *  Wrapper for emitter::emitDispInsHelp() that handles special large jump
+ *  pseudo-instruction.
+ */
 
-                // First load/store tmpReg with the large offset constant
-                codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+void emitter::emitDispIns(
+    instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig)
+{
+    // Special case: IF_LARGEJMP
 
-                // Then load/store dataReg from/to [memBase + tmpReg]
-                emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), tmpReg);
-            }
-        }
-    }
-    else // addr is not contained, so we evaluate it into a register
+    if ((id->idInsFmt() == IF_LARGEJMP) && id->idIsBound())
     {
-#ifdef DEBUG
-        if (addr->OperIs(GT_LCL_ADDR))
-        {
-            // If the local var is a gcref or byref, the local var better be untracked, because we have
-            // no logic here to track local variable lifetime changes, like we do in the contained case
-            // above. E.g., for a `str r0,[r1]` for byref `r1` to local `V01`, we won't store the local
-            // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth.
-            LclVarDsc* varDsc = emitComp->lvaGetDesc(addr->AsLclVarCommon());
-            assert(!varDsc->lvTracked);
-        }
-#endif // DEBUG
-
-        // Then load/store dataReg from/to [addrReg]
-        emitIns_R_R(ins, attr, dataReg, addr->GetRegNum());
+        // This is a pseudo-instruction format representing a large conditional branch. See the comment
+        // in emitter::emitOutputLJ() for the full description.
+        //
+        // For this pseudo-instruction, we will actually generate:
+        //
+        //      b<!cond> L_not  // 4 bytes. Note that we reverse the condition.
+        //      b L_target      // 4 bytes.
+        //   L_not:
+        //
+        // These instructions don't exist in the actual instruction stream, so we need to fake them
+        // up to display them.
+        emitDispLargeJmp(id, isNew, doffs, asmfm, offset, pCode, sz, ig);
+    }
+    else
+    {
+        emitDispInsHelp(id, isNew, doffs, asmfm, offset, pCode, sz, ig);
     }
 }
 
-// The callee must call genConsumeReg() for any non-contained srcs
-// and genProduceReg() for any non-contained dsts.
-
-regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
+//--------------------------------------------------------------------
+// emitDispInsHelp: Dump the given instruction to jitstdout.
+//
+// Arguments:
+//   id - The instruction
+//   isNew - Whether the instruction is newly generated (before encoding).
+//   doffs - If true, always display the passed-in offset.
+//   asmfm - Whether the instruction should be displayed in assembly format.
+//           If false some additional information may be printed for the instruction.
+//   offset - The offset of the instruction. Only displayed if doffs is true or if
+//            !isNew && !asmfm.
+//   code - Pointer to the actual code, used for displaying the address and encoded bytes
+//          if turned on.
+//   sz - The size of the instruction, used to display the encoded bytes.
+//   ig - The instruction group containing the instruction.
+//
+void emitter::emitDispInsHelp(
+    instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig)
 {
-    // dst can only be a reg
-    assert(!dst->isContained());
-
-    // src can be immed or reg
-    assert(!src->isContained() || src->isContainedIntOrIImmed());
-
-    // find immed (if any) - it cannot be a dst
-    GenTreeIntConCommon* intConst = nullptr;
-    if (src->isContainedIntOrIImmed())
+#ifdef DEBUG
+    if (EMITVERBOSE)
     {
-        intConst = src->AsIntConCommon();
+        unsigned idNum =
+            id->idDebugOnlyInfo()->idNum; // Do not remove this!  It is needed for VisualStudio conditional breakpoints
+
+        printf("IN%04x: ", idNum);
     }
+#endif
 
-    if (intConst)
+    if (pCode == NULL)
     {
-        emitIns_R_I(ins, attr, dst->GetRegNum(), intConst->IconValue());
-        return dst->GetRegNum();
+        sz = 0;
     }
-    else
+
+    if (!isNew && !asmfm && sz)
     {
-        emitIns_R_R(ins, attr, dst->GetRegNum(), src->GetRegNum());
-        return dst->GetRegNum();
+        doffs = true;
     }
-}
 
-// The callee must call genConsumeReg() for any non-contained srcs
-// and genProduceReg() for any non-contained dsts.
+    /* Display the instruction address */
 
-regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2)
-{
-    // dst can only be a reg
-    assert(!dst->isContained());
+    emitDispInsAddr(pCode);
 
-    // find immed (if any) - it cannot be a dst
-    // Only one src can be an int.
-    GenTreeIntConCommon* intConst  = nullptr;
-    GenTree*             nonIntReg = nullptr;
+    /* Display the instruction offset */
 
-    if (varTypeIsFloating(dst))
+    emitDispInsOffs(offset, doffs);
+
+    BYTE* pCodeRW = nullptr;
+    if (pCode != nullptr)
     {
-        // src1 can only be a reg
-        assert(!src1->isContained());
-        // src2 can only be a reg
-        assert(!src2->isContained());
+        /* Display the instruction hex code */
+        assert(((pCode >= emitCodeBlock) && (pCode < emitCodeBlock + emitTotalHotCodeSize)) ||
+               ((pCode >= emitColdCodeBlock) && (pCode < emitColdCodeBlock + emitTotalColdCodeSize)));
+
+        pCodeRW = pCode + writeableOffset;
     }
-    else // not floating point
+
+    emitDispInsHex(id, pCodeRW, sz);
+
+    printf("      ");
+
+    /* Get the instruction and format */
+
+    instruction ins = id->idIns();
+    insFormat   fmt = id->idInsFmt();
+
+    emitDispInst(ins);
+
+    /* If this instruction has just been added, check its size */
+
+    assert(isNew == false || (int)emitSizeOfInsDsc(id) == emitCurIGfreeNext - (BYTE*)id);
+
+    /* Figure out the operand size */
+    emitAttr size = id->idOpSize();
+    emitAttr attr = size;
+    if (id->idGCref() == GCT_GCREF)
+        attr = EA_GCREF;
+    else if (id->idGCref() == GCT_BYREF)
+        attr = EA_BYREF;
+
+    switch (fmt)
     {
-        // src2 can be immed or reg
-        assert(!src2->isContained() || src2->isContainedIntOrIImmed());
+        ssize_t      imm;
+        int          doffs;
+        bitMaskImm   bmi;
+        halfwordImm  hwi;
+        condFlagsImm cfi;
+        unsigned     scale;
+        unsigned     immShift;
+        bool         hasShift;
+        const char*  methodName;
+        emitAttr     elemsize;
+        emitAttr     datasize;
+        emitAttr     srcsize;
+        emitAttr     dstsize;
+        ssize_t      index;
+        ssize_t      index2;
+        unsigned     registerListSize;
+        const char*  targetName;
 
-        // Check src2 first as we can always allow it to be a contained immediate
-        if (src2->isContainedIntOrIImmed())
-        {
-            intConst  = src2->AsIntConCommon();
-            nonIntReg = src1;
-        }
-        // Only for commutative operations do we check src1 and allow it to be a contained immediate
-        else if (dst->OperIsCommutative())
+        case IF_BI_0A: // BI_0A   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
+        case IF_BI_0B: // BI_0B   ......iiiiiiiiii iiiiiiiiiii.....               simm19:00
+        case IF_LARGEJMP:
         {
-            // src1 can be immed or reg
-            assert(!src1->isContained() || src1->isContainedIntOrIImmed());
+            if (fmt == IF_LARGEJMP)
+            {
+                printf("(LARGEJMP)");
+            }
+            if (id->idAddr()->iiaHasInstrCount())
+            {
+                int instrCount = id->idAddr()->iiaGetInstrCount();
 
-            // Check src1 and allow it to be a contained immediate
-            if (src1->isContainedIntOrIImmed())
+                if (ig == nullptr)
+                {
+                    printf("pc%s%d instructions", (instrCount >= 0) ? "+" : "", instrCount);
+                }
+                else
+                {
+                    unsigned       insNum  = emitFindInsNum(ig, id);
+                    UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1);
+                    UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount);
+                    ssize_t        relOffs = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
+                    printf("pc%s%d (%d instructions)", (relOffs >= 0) ? "+" : "", (int)relOffs, (int)instrCount);
+                }
+            }
+            else if (id->idIsBound())
             {
-                assert(!src2->isContainedIntOrIImmed());
-                intConst  = src1->AsIntConCommon();
-                nonIntReg = src2;
+                emitPrintLabel(id->idAddr()->iiaIGlabel);
+            }
+            else
+            {
+                printf("L_M%03u_" FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum);
             }
         }
-        else
-        {
-            // src1 can only be a reg
-            assert(!src1->isContained());
-        }
-    }
+        break;
 
-    bool isMulOverflow = false;
-    if (dst->gtOverflowEx())
-    {
-        if ((ins == INS_add) || (ins == INS_adds))
-        {
-            ins = INS_adds;
-        }
-        else if ((ins == INS_sub) || (ins == INS_subs))
-        {
-            ins = INS_subs;
-        }
-        else if (ins == INS_mul)
-        {
-            isMulOverflow = true;
-            assert(intConst == nullptr); // overflow format doesn't support an int constant operand
-        }
-        else
+        case IF_BI_0C: // BI_0C   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
+            methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+            printf("%s", methodName);
+            break;
+
+        case IF_BI_1A: // BI_1A   ......iiiiiiiiii iiiiiiiiiiittttt      Rt       simm19:00
+        case IF_BI_1B: // BI_1B   B.......bbbbbiii iiiiiiiiiiittttt      Rt imm6, simm14:00
         {
-            assert(!"Invalid ins for overflow check");
+            assert(insOptsNone(id->idInsOpt()));
+            emitDispReg(id->idReg1(), size, true);
+
+            if (fmt == IF_BI_1B)
+            {
+                emitDispImm(emitGetInsSC(id), true);
+            }
+
+            if (id->idAddr()->iiaHasInstrCount())
+            {
+                int instrCount = id->idAddr()->iiaGetInstrCount();
+
+                if (ig == nullptr)
+                {
+                    printf("pc%s%d instructions", (instrCount >= 0) ? "+" : "", instrCount);
+                }
+                else
+                {
+                    unsigned       insNum  = emitFindInsNum(ig, id);
+                    UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1);
+                    UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount);
+                    ssize_t        relOffs = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
+                    printf("pc%s%d (%d instructions)", (relOffs >= 0) ? "+" : "", (int)relOffs, (int)instrCount);
+                }
+            }
+            else if (id->idIsBound())
+            {
+                emitPrintLabel(id->idAddr()->iiaIGlabel);
+            }
+            else
+            {
+                printf("L_M%03u_" FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum);
+            }
         }
-    }
-    if (intConst != nullptr)
-    {
-        emitIns_R_R_I(ins, attr, dst->GetRegNum(), nonIntReg->GetRegNum(), intConst->IconValue());
-    }
-    else
-    {
-        if (isMulOverflow)
-        {
-            regNumber extraReg = dst->GetSingleTempReg();
-            assert(extraReg != dst->GetRegNum());
+        break;
+
+        case IF_BR_1A: // BR_1A   ................ ......nnnnn.....         Rn
+            assert(insOptsNone(id->idInsOpt()));
+            emitDispReg(id->idReg1(), size, false);
+            break;
 
-            if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+        case IF_BR_1B: // BR_1B   ................ ......nnnnn.....         Rn
+            // The size of a branch target is always EA_PTRSIZE
+            assert(insOptsNone(id->idInsOpt()));
+            emitDispReg(id->idReg3(), EA_PTRSIZE, false);
+            break;
+
+        case IF_LS_1A: // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt    PC imm(1MB)
+        case IF_DI_1E: // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd       simm21
+        case IF_LARGELDC:
+        case IF_LARGEADR:
+            assert(insOptsNone(id->idInsOpt()));
+            emitDispReg(id->idReg1(), size, true);
+            imm        = emitGetInsSC(id);
+            targetName = nullptr;
+
+            /* Is this actually a reference to a data section? */
+            if (fmt == IF_LARGEADR)
             {
-                if (attr == EA_4BYTE)
-                {
-                    // Compute 8 byte results from 4 byte by 4 byte multiplication.
-                    emitIns_R_R_R(INS_umull, EA_8BYTE, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
+                printf("(LARGEADR)");
+            }
+            else if (fmt == IF_LARGELDC)
+            {
+                printf("(LARGELDC)");
+            }
 
-                    // Get the high result by shifting dst.
-                    emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->GetRegNum(), 32);
-                }
-                else
-                {
-                    assert(attr == EA_8BYTE);
-                    // Compute the high result.
-                    emitIns_R_R_R(INS_umulh, attr, extraReg, src1->GetRegNum(), src2->GetRegNum());
+            printf("[");
+            if (id->idAddr()->iiaIsJitDataOffset())
+            {
+                doffs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);
+                /* Display a data section reference */
 
-                    // Now multiply without skewing the high result.
-                    emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
-                }
+                if (doffs & 1)
+                    printf("@CNS%02u", doffs - 1);
+                else
+                    printf("@RWD%02u", doffs);
 
-                // zero-sign bit comparison to detect overflow.
-                emitIns_R_I(INS_cmp, attr, extraReg, 0);
+                if (imm != 0)
+                    printf("%+Id", imm);
             }
             else
             {
-                int bitShift = 0;
-                if (attr == EA_4BYTE)
+                assert(imm == 0);
+                if (id->idIsReloc())
                 {
-                    // Compute 8 byte results from 4 byte by 4 byte multiplication.
-                    emitIns_R_R_R(INS_smull, EA_8BYTE, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
-
-                    // Get the high result by shifting dst.
-                    emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->GetRegNum(), 32);
+                    printf("HIGH RELOC ");
+                    emitDispImm((ssize_t)id->idAddr()->iiaAddr, false);
+                    size_t targetHandle = id->idDebugOnlyInfo()->idMemCookie;
 
-                    bitShift = 31;
+#ifdef DEBUG
+                    if (targetHandle == THT_InitializeArrayIntrinsics)
+                    {
+                        targetName = "InitializeArrayIntrinsics";
+                    }
+                    else if (targetHandle == THT_GSCookieCheck)
+                    {
+                        targetName = "GlobalSecurityCookieCheck";
+                    }
+                    else if (targetHandle == THT_SetGSCookie)
+                    {
+                        targetName = "SetGlobalSecurityCookie";
+                    }
+#endif
+                }
+                else if (id->idIsBound())
+                {
+                    emitPrintLabel(id->idAddr()->iiaIGlabel);
                 }
                 else
                 {
-                    assert(attr == EA_8BYTE);
-                    // Save the high result in a temporary register.
-                    emitIns_R_R_R(INS_smulh, attr, extraReg, src1->GetRegNum(), src2->GetRegNum());
-
-                    // Now multiply without skewing the high result.
-                    emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
-
-                    bitShift = 63;
+                    printf("L_M%03u_" FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum);
                 }
-
-                // Sign bit comparison to detect overflow.
-                emitIns_R_R_I(INS_cmp, attr, extraReg, dst->GetRegNum(), bitShift, INS_OPTS_ASR);
             }
-        }
-        else
-        {
-            // We can just multiply.
-            emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
-        }
-    }
-
-    if (dst->gtOverflowEx())
-    {
-        assert(!varTypeIsFloating(dst));
-        codeGen->genCheckOverflow(dst);
-    }
-
-    return dst->GetRegNum();
-}
-
-#if defined(DEBUG) || defined(LATE_DISASM)
-
-void emitter::getMemoryOperation(instrDesc* id, unsigned* pMemAccessKind, bool* pIsLocalAccess)
-{
-    unsigned    memAccessKind = PERFSCORE_MEMORY_NONE;
-    bool        isLocalAccess = false;
-    instruction ins           = id->idIns();
-
-    if (emitInsIsLoadOrStore(ins))
-    {
-        if (emitInsIsLoad(ins))
-        {
-            if (emitInsIsStore(ins))
+            printf("]");
+            if (targetName != nullptr)
             {
-                memAccessKind = PERFSCORE_MEMORY_READ_WRITE;
+                printf("      // [%s]", targetName);
             }
             else
             {
-                memAccessKind = PERFSCORE_MEMORY_READ;
+                emitDispCommentForHandle(id->idDebugOnlyInfo()->idMemCookie, 0, id->idDebugOnlyInfo()->idFlags);
             }
-        }
-        else
-        {
-            assert(emitInsIsStore(ins));
-            memAccessKind = PERFSCORE_MEMORY_WRITE;
-        }
+            break;
 
-        insFormat insFmt = id->idInsFmt();
+        case IF_LS_2A: // LS_2A   .X.......X...... ......nnnnnttttt      Rt Rn
+            assert(insOptsNone(id->idInsOpt()));
+            assert((emitGetInsSC(id) == 0) || id->idIsTlsGD());
+            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+            emitDispAddrRI(id->idReg2(), id->idInsOpt(), 0);
+            break;
 
-        switch (insFmt)
-        {
-            case IF_LS_1A:
-                isLocalAccess = true;
-                break;
+        case IF_LS_2B: // LS_2B   .X.......Xiiiiii iiiiiinnnnnttttt      Rt Rn    imm(0-4095)
+            assert(insOptsNone(id->idInsOpt()));
+            imm   = emitGetInsSC(id);
+            scale = NaturalScale_helper(emitInsLoadStoreSize(id));
+            imm <<= scale; // The immediate is scaled by the size of the ld/st
+            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+            emitDispAddrRI(id->idReg2(), id->idInsOpt(), imm);
+            break;
 
-            case IF_LS_2A:
-            case IF_LS_2B:
-            case IF_LS_2C:
-            case IF_LS_2D:
-            case IF_LS_2E:
-            case IF_LS_2F:
-            case IF_LS_2G:
-            case IF_LS_3A:
-            case IF_LS_3F:
-            case IF_LS_3G:
-                if (isStackRegister(id->idReg2()))
-                {
-                    isLocalAccess = true;
-                }
-                break;
+        case IF_LS_2C: // LS_2C   .X.......X.iiiii iiiiPPnnnnnttttt      Rt Rn    imm(-256..+255) no/pre/post inc
+            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+            imm = emitGetInsSC(id);
+            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+            emitDispAddrRI(id->idReg2(), id->idInsOpt(), imm);
+            break;
 
-            case IF_LS_3B:
-            case IF_LS_3C:
-            case IF_LS_3D:
-            case IF_LS_3E:
-                if (isStackRegister(id->idReg3()))
-                {
-                    isLocalAccess = true;
-                }
-                break;
-            case IF_LARGELDC:
-                isLocalAccess = false;
-                break;
+        case IF_LS_2D: // LS_2D   .Q.............. ....ssnnnnnttttt      Vt Rn
+        case IF_LS_2E: // LS_2E   .Q.............. ....ssnnnnnttttt      Vt Rn
+            registerListSize = insGetRegisterListSize(id->idIns());
+            emitDispVectorRegList(id->idReg1(), registerListSize, id->idInsOpt(), true);
 
-            default:
-                assert(!"Logic Error");
-                memAccessKind = PERFSCORE_MEMORY_NONE;
-                break;
-        }
-    }
+            if (fmt == IF_LS_2D)
+            {
+                // Load/Store multiple structures       base register
+                // Load single structure and replicate  base register
+                emitDispAddrRI(id->idReg2(), INS_OPTS_NONE, 0);
+            }
+            else
+            {
+                // Load/Store multiple structures       post-indexed by an immediate
+                // Load single structure and replicate  post-indexed by an immediate
+                emitDispAddrRI(id->idReg2(), INS_OPTS_POST_INDEX, id->idSmallCns());
+            }
+            break;
 
-    *pMemAccessKind = memAccessKind;
-    *pIsLocalAccess = isLocalAccess;
-}
+        case IF_LS_2F: // LS_2F   .Q.............. xx.Sssnnnnnttttt      Vt[] Rn
+        case IF_LS_2G: // LS_2G   .Q.............. xx.Sssnnnnnttttt      Vt[] Rn
+            registerListSize = insGetRegisterListSize(id->idIns());
+            elemsize         = id->idOpSize();
+            emitDispVectorElemList(id->idReg1(), registerListSize, elemsize, id->idSmallCns(), true);
 
-//----------------------------------------------------------------------------------------
-// getInsExecutionCharacteristics:
-//    Returns the current instruction execution characteristics
-//
-// Arguments:
-//    id  - The current instruction descriptor to be evaluated
-//
-// Return Value:
-//    A struct containing the current instruction execution characteristics
-//
-// Notes:
-//    The instruction latencies and throughput values returned by this function
-//    are from
-//
-//    The Arm Cortex-A55 Software Optimization Guide:
-//    https://static.docs.arm.com/epm128372/20/arm_cortex_a55_software_optimization_guide_v2.pdf
-//
-emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(instrDesc* id)
-{
-    insExecutionCharacteristics result;
-    instruction                 ins    = id->idIns();
-    insFormat                   insFmt = id->idInsFmt();
+            if (fmt == IF_LS_2F)
+            {
+                // Load/Store single structure  base register
+                emitDispAddrRI(id->idReg2(), INS_OPTS_NONE, 0);
+            }
+            else
+            {
+                // Load/Store single structure  post-indexed by an immediate
+                emitDispAddrRI(id->idReg2(), INS_OPTS_POST_INDEX, (registerListSize * elemsize));
+            }
+            break;
 
-    unsigned memAccessKind;
-    bool     isLocalAccess;
-    getMemoryOperation(id, &memAccessKind, &isLocalAccess);
+        case IF_LS_3A: // LS_3A   .X.......X.mmmmm oooS..nnnnnttttt      Rt Rn Rm ext(Rm) LSL {}
+            assert(insOptsLSExtend(id->idInsOpt()));
+            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+            if (id->idIsLclVar())
+            {
+                emitDispAddrRRExt(id->idReg2(), codeGen->rsGetRsvdReg(), id->idInsOpt(), false, size);
+            }
+            else
+            {
+                emitDispAddrRRExt(id->idReg2(), id->idReg3(), id->idInsOpt(), id->idReg3Scaled(), size);
+            }
+            break;
 
-    result.insThroughput = PERFSCORE_THROUGHPUT_ILLEGAL;
-    result.insLatency    = PERFSCORE_LATENCY_ILLEGAL;
+        case IF_LS_3B: // LS_3B   X............... .aaaaannnnnddddd      Rt Ra Rn
+            assert(insOptsNone(id->idInsOpt()));
+            assert(emitGetInsSC(id) == 0);
+            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+            emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true);
+            emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0);
+            break;
 
-    // Initialize insLatency based upon the instruction's memAccessKind and local access values
-    //
-    if (memAccessKind == PERFSCORE_MEMORY_READ)
-    {
-        result.insLatency = isLocalAccess ? PERFSCORE_LATENCY_RD_STACK : PERFSCORE_LATENCY_RD_GENERAL;
-    }
-    else if (memAccessKind == PERFSCORE_MEMORY_WRITE)
-    {
-        result.insLatency = isLocalAccess ? PERFSCORE_LATENCY_WR_STACK : PERFSCORE_LATENCY_WR_GENERAL;
-    }
-    else if (memAccessKind == PERFSCORE_MEMORY_READ_WRITE)
-    {
-        result.insLatency = isLocalAccess ? PERFSCORE_LATENCY_RD_WR_STACK : PERFSCORE_LATENCY_RD_WR_GENERAL;
-    }
+        case IF_LS_3C: // LS_3C   X.........iiiiii iaaaaannnnnddddd      Rt Ra Rn imm(im7,sh)
+            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+            imm   = emitGetInsSC(id);
+            scale = NaturalScale_helper(emitInsLoadStoreSize(id));
+            imm <<= scale;
+            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+            emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true);
+            emitDispAddrRI(id->idReg3(), id->idInsOpt(), imm);
+            break;
 
-    switch (insFmt)
-    {
-        //
-        //  Branch Instructions
-        //
+        case IF_LS_3D: // LS_3D   .X.......X.mmmmm ......nnnnnttttt      Wm Rt Rn
+            assert(insOptsNone(id->idInsOpt()));
+            emitDispReg(id->idReg1(), EA_4BYTE, true);
+            emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true);
+            emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0);
+            break;
 
-        case IF_BI_0A:                                      // b, bl_local
-        case IF_BI_0C:                                      // bl, b_tail
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C; // but is Dual Issue
-            result.insLatency    = PERFSCORE_LATENCY_1C;
+        case IF_LS_3E: // LS_3E   .X.........mmmmm ......nnnnnttttt      Rm Rt Rn ARMv8.1 LSE Atomics
+            assert(insOptsNone(id->idInsOpt()));
+            assert((EA_SIZE(size) == 4) || (EA_SIZE(size) == 8));
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0);
             break;
 
-        case IF_BI_0B: // beq, bne, bge, blt, bgt, ble, ...
-        case IF_BI_1A: // cbz, cbnz
-        case IF_BI_1B: // tbz, tbnz
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_1C;
+        case IF_LS_3F: // LS_3F   .Q.........mmmmm ....ssnnnnnttttt      Vt Rn Rm
+        case IF_LS_3G: // LS_3G   .Q.........mmmmm ...Sssnnnnnttttt      Vt[] Rn Rm
+            registerListSize = insGetRegisterListSize(id->idIns());
+
+            if (fmt == IF_LS_3F)
+            {
+                // Load/Store multiple structures       post-indexed by a register
+                // Load single structure and replicate  post-indexed by a register
+                emitDispVectorRegList(id->idReg1(), registerListSize, id->idInsOpt(), true);
+            }
+            else
+            {
+                // Load/Store single structure          post-indexed by a register
+                elemsize = id->idOpSize();
+                emitDispVectorElemList(id->idReg1(), registerListSize, elemsize, id->idSmallCns(), true);
+            }
+
+            printf("[");
+            emitDispReg(encodingZRtoSP(id->idReg2()), EA_8BYTE, false);
+            printf("], ");
+            emitDispReg(id->idReg3(), EA_8BYTE, false);
             break;
 
-        case IF_LARGEJMP: // bcc + b
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
+        case IF_DI_1A: // DI_1A   X.......shiiiiii iiiiiinnnnn.....      Rn       imm(i12,sh)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispImmOptsLSL(emitGetInsSC(id), insOptsLSL12(id->idInsOpt()), 12);
+            emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags);
             break;
 
-        case IF_BR_1B: // blr, br_tail
-            if (ins == INS_blr)
+        case IF_DI_1B: // DI_1B   X........hwiiiii iiiiiiiiiiiddddd      Rd       imm(i16,hw)
+            emitDispReg(id->idReg1(), size, true);
+            hwi.immHWVal = (unsigned)emitGetInsSC(id);
+            if (ins == INS_mov)
             {
-                result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                result.insLatency    = PERFSCORE_LATENCY_1C;
-                break;
+                emitDispImm(emitDecodeHalfwordImm(hwi, size), false);
             }
-            // otherwise we should have a br_tail instruction
-            assert(ins == INS_br_tail);
-            FALLTHROUGH;
-        case IF_BR_1A: // ret, br
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_1C;
+            else // movz, movn, movk
+            {
+                emitDispImm(hwi.immVal, false);
+                if (hwi.immHW != 0)
+                {
+                    emitDispShiftOpts(INS_OPTS_LSL);
+                    emitDispImm(hwi.immHW * 16, false);
+                }
+            }
+            emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags);
             break;
 
-        //
-        //  Arithmetic and logical instructions
-        //
+        case IF_DI_1C: // DI_1C   X........Nrrrrrr ssssssnnnnn.....         Rn    imm(N,r,s)
+            emitDispReg(id->idReg1(), size, true);
+            bmi.immNRS = (unsigned)emitGetInsSC(id);
+            emitDispImm(emitDecodeBitMaskImm(bmi, size), false);
+            emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags);
+            break;
 
-        // ALU, basic
-        case IF_DR_3A: // add, adds, adc, adcs, and, ands, bic, bics,
-                       // eon, eor, orn, orr, sub, subs, sbc, sbcs
-                       // asr, asrv, lsl, lslv, lsr, lsrv, ror, rorv
-                       // sdiv, udiv, mul, smull, smulh, umull, umulh, mneg
-        case IF_DR_2A: // cmp, cmn, tst
+        case IF_DI_1D: // DI_1D   X........Nrrrrrr ssssss.....ddddd      Rd       imm(N,r,s)
+            emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+            bmi.immNRS = (unsigned)emitGetInsSC(id);
+            emitDispImm(emitDecodeBitMaskImm(bmi, size), false);
+            emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags);
+            break;
 
-            switch (ins)
+        case IF_DI_2A: // DI_2A   X.......shiiiiii iiiiiinnnnnddddd      Rd Rn    imm(i12,sh)
+            if ((ins == INS_add) || (ins == INS_sub))
             {
-                case INS_mul:
-                case INS_smull:
-                case INS_umull:
-                case INS_mneg:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
-                    break;
+                emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+                emitDispReg(encodingZRtoSP(id->idReg2()), size, true);
+            }
+            else
+            {
+                emitDispReg(id->idReg1(), size, true);
+                emitDispReg(id->idReg2(), size, true);
+            }
+            if (id->idIsReloc())
+            {
+                assert(ins == INS_add);
+                printf("[LOW RELOC ");
+                emitDispImm((ssize_t)id->idAddr()->iiaAddr, false);
+                printf("]");
+            }
+            else
+            {
+                emitDispImmOptsLSL(emitGetInsSC(id), insOptsLSL12(id->idInsOpt()), 12);
+            }
+            break;
 
-                case INS_smulh:
-                case INS_umulh:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_6C;
-                    break;
+        case IF_DI_2B: // DI_2B   X........X.nnnnn ssssssnnnnnddddd      Rd Rn    imm(0-63)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispImm(emitGetInsSC(id), false);
+            break;
 
-                case INS_sdiv:
-                case INS_udiv:
-                    if (id->idOpSize() == EA_4BYTE)
-                    {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
-                        result.insLatency    = PERFSCORE_LATENCY_12C;
-                        break;
-                    }
-                    else
-                    {
-                        assert(id->idOpSize() == EA_8BYTE);
-                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
-                        result.insLatency    = PERFSCORE_LATENCY_20C;
-                        break;
-                    }
+        case IF_DI_2C: // DI_2C   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imm(N,r,s)
+            if (ins == INS_ands)
+            {
+                emitDispReg(id->idReg1(), size, true);
+            }
+            else
+            {
+                emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+            }
+            emitDispReg(id->idReg2(), size, true);
+            bmi.immNRS = (unsigned)emitGetInsSC(id);
+            emitDispImm(emitDecodeBitMaskImm(bmi, size), false);
+            break;
 
-                case INS_add:
-                case INS_adds:
-                case INS_adc:
-                case INS_adcs:
-                case INS_and:
-                case INS_ands:
-                case INS_bic:
-                case INS_bics:
-                case INS_eon:
-                case INS_eor:
-                case INS_orn:
-                case INS_orr:
-                case INS_sub:
-                case INS_subs:
-                case INS_sbc:
-                case INS_sbcs:
-                case INS_asr:
-                case INS_lsl:
-                case INS_lsr:
-                case INS_ror:
-                case INS_cmp:
-                case INS_cmn:
-                case INS_tst:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_1C;
-                    break;
+        case IF_DI_2D: // DI_2D   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imr, ims   (N,r,s)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
 
-                case INS_asrv:
-                case INS_lslv:
-                case INS_lsrv:
-                case INS_rorv:
-                    // variable shift by register
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_1C;
+            imm        = emitGetInsSC(id);
+            bmi.immNRS = (unsigned)imm;
+
+            switch (ins)
+            {
+                case INS_bfm:
+                case INS_sbfm:
+                case INS_ubfm:
+                    emitDispImm(bmi.immR, true);
+                    emitDispImm(bmi.immS, false);
                     break;
 
-                case INS_crc32b:
-                case INS_crc32h:
-                case INS_crc32cb:
-                case INS_crc32ch:
-                case INS_crc32x:
-                case INS_crc32cx:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                case INS_bfi:
+                case INS_sbfiz:
+                case INS_ubfiz:
+                    emitDispImm(getBitWidth(size) - bmi.immR, true);
+                    emitDispImm(bmi.immS + 1, false);
                     break;
 
-                case INS_crc32w:
-                case INS_crc32cw:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_1C;
+                case INS_bfxil:
+                case INS_sbfx:
+                case INS_ubfx:
+                    emitDispImm(bmi.immR, true);
+                    emitDispImm(bmi.immS - bmi.immR + 1, false);
                     break;
 
-                case INS_smaddl:
-                case INS_smsubl:
-                case INS_smnegl:
-                case INS_umaddl:
-                case INS_umsubl:
-                case INS_umnegl:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                case INS_asr:
+                case INS_lsr:
+                case INS_lsl:
+                    emitDispImm(imm, false);
                     break;
 
                 default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
+                    assert(!"Unexpected instruction in IF_DI_2D");
             }
+
             break;
 
-        // ALU, basic immediate
-        case IF_DI_1A: // cmp, cmn
-        case IF_DI_1C: // tst
-        case IF_DI_1D: // mov reg, imm(N,r,s)
-        case IF_DI_1E: // adr, adrp
-        case IF_DI_1F: // ccmp, ccmn
-        case IF_DI_2A: // add, adds, suv, subs
-        case IF_DI_2C: // and, ands, eor, orr
+        case IF_DI_1F: // DI_1F   X..........iiiii cccc..nnnnn.nzcv      Rn imm5  nzcv cond
+            emitDispReg(id->idReg1(), size, true);
+            cfi.immCFVal = (unsigned)emitGetInsSC(id);
+            emitDispImm(cfi.imm5, true);
+            emitDispFlags(cfi.flags);
+            emitDispComma();
+            emitDispCond(cfi.cond);
+            break;
 
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_1C;
+        case IF_DR_1D: // DR_1D   X............... cccc.......mmmmm      Rd       cond
+            emitDispReg(id->idReg1(), size, true);
+            cfi.immCFVal = (unsigned)emitGetInsSC(id);
+            emitDispCond(cfi.cond);
             break;
 
-        case IF_DR_2D: // cinc, cinv, cneg
-        case IF_DR_2E: // mov, neg, mvn, negs
-        case IF_DI_1B: // mov, movk, movn, movz
+        case IF_DR_2A: // DR_2A   X..........mmmmm ......nnnnn.....         Rn Rm
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, false);
+            break;
 
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_1C;
+        case IF_DR_2B: // DR_2B   X.......sh.mmmmm ssssssnnnnn.....         Rn Rm {LSL,LSR,ASR,ROR} imm(0-63)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size);
             break;
 
-        case IF_LARGEADR: // adrp + add
-        case IF_LARGELDC: // adrp + ldr
+        case IF_DR_2C: // DR_2C   X..........mmmmm ooosssnnnnn.....         Rn Rm ext(Rm) LSL imm(0-4)
+            emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+            imm = emitGetInsSC(id);
+            emitDispExtendReg(id->idReg2(), id->idInsOpt(), imm);
+            break;
 
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
+        case IF_DR_2D: // DR_2D   X..........nnnnn cccc..nnnnnddddd      Rd Rn    cond
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            cfi.immCFVal = (unsigned)emitGetInsSC(id);
+            emitDispCond(cfi.cond);
             break;
 
-        // ALU, shift by immediate
-        case IF_DR_3B: // add, adds, and, ands, bic, bics,
-                       // eon, eor, orn, orr, sub, subs
-        case IF_DR_2B: // cmp, cmn, tst
-        case IF_DR_2F: // neg, negs, mvn
-        case IF_DI_2B: // ror
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
+        case IF_DR_2E: // DR_2E   X..........mmmmm ...........ddddd      Rd    Rm
+        case IF_DV_2U: // DV_2U   ................ ......nnnnnddddd      Sd    Sn
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, false);
             break;
 
-        // ALU, extend, scale
-        case IF_DR_3C: // add, adc, and, bic, eon, eor, orn, orr, sub, sbc
-        case IF_DR_2C: // cmp
-        case IF_DV_2U: // sha1h
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
+        case IF_DR_2F: // DR_2F   X.......sh.mmmmm ssssss.....ddddd      Rd    Rm {LSL,LSR,ASR} imm(0-63)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size);
             break;
-        // ALU, Conditional select
-        case IF_DR_1D: // cset, csetm
-        case IF_DR_3D: // csel, csinc, csinv, csneg
 
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_1C;
+        case IF_DR_2G: // DR_2G   X............... ......nnnnnddddd      Rd Rn
+            emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+            emitDispReg(encodingZRtoSP(id->idReg2()), size, false);
             break;
 
-        // ALU, Conditional compare
-        case IF_DR_2I: // ccmp , ccmn
+        case IF_DR_2H: // DR_2H   X........X...... ......nnnnnddddd      Rd Rn
+            if ((ins == INS_uxtb) || (ins == INS_uxth))
+            {
+                // There is no 64-bit variant of uxtb and uxth
+                // However, we allow idOpSize() to have EA_8BYTE value for these instruction
+                emitDispReg(id->idReg1(), EA_4BYTE, true);
+                emitDispReg(id->idReg2(), EA_4BYTE, false);
+            }
+            else
+            {
+                emitDispReg(id->idReg1(), size, true);
+                // sxtb, sxth and sxtb always operate on 32-bit source register
+                emitDispReg(id->idReg2(), EA_4BYTE, false);
+            }
+            break;
 
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_1C;
+        case IF_DR_2I: // DR_2I   X..........mmmmm cccc..nnnnn.nzcv      Rn Rm    nzcv cond
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            cfi.immCFVal = (unsigned)emitGetInsSC(id);
+            emitDispFlags(cfi.flags);
+            emitDispComma();
+            emitDispCond(cfi.cond);
             break;
 
-        // Multiply accumulate
-        case IF_DR_4A: // madd, msub, smaddl, smsubl, umaddl, umsubl
-            if (id->idOpSize() == EA_4BYTE)
+        case IF_DR_3A: // DR_3A   X..........mmmmm ......nnnnnmmmmm      Rd Rn Rm
+            if ((ins == INS_add) || (ins == INS_sub))
+            {
+                emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+                emitDispReg(encodingZRtoSP(id->idReg2()), size, true);
+            }
+            else if ((ins == INS_smulh) || (ins == INS_umulh))
+            {
+                size = EA_8BYTE;
+                // smulh Xd, Xn, Xm
+                emitDispReg(id->idReg1(), size, true);
+                emitDispReg(id->idReg2(), size, true);
+            }
+            else if ((ins == INS_smull) || (ins == INS_umull) || (ins == INS_smnegl) || (ins == INS_umnegl))
+            {
+                // smull Xd, Wn, Wm
+                emitDispReg(id->idReg1(), EA_8BYTE, true);
+                size = EA_4BYTE;
+                emitDispReg(id->idReg2(), size, true);
+            }
+            else
+            {
+                emitDispReg(id->idReg1(), size, true);
+                emitDispReg(id->idReg2(), size, true);
+            }
+
+            if (id->idIsLclVar())
+            {
+                emitDispReg(codeGen->rsGetRsvdReg(), size, false);
+            }
+            else
+            {
+                emitDispReg(id->idReg3(), size, false);
+            }
+
+            break;
+
+        case IF_DR_3B: // DR_3B   X.......sh.mmmmm ssssssnnnnnddddd      Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispShiftedReg(id->idReg3(), id->idInsOpt(), emitGetInsSC(id), size);
+            break;
+
+        case IF_DR_3C: // DR_3C   X..........mmmmm ooosssnnnnnddddd      Rd Rn Rm ext(Rm) LSL imm(0-4)
+            emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+            emitDispReg(encodingZRtoSP(id->idReg2()), size, true);
+            imm = emitGetInsSC(id);
+            emitDispExtendReg(id->idReg3(), id->idInsOpt(), imm);
+            break;
+
+        case IF_DR_3D: // DR_3D   X..........mmmmm cccc..nnnnnmmmmm      Rd Rn Rm cond
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispReg(id->idReg3(), size, true);
+            cfi.immCFVal = (unsigned)emitGetInsSC(id);
+            emitDispCond(cfi.cond);
+            break;
+
+        case IF_DR_3E: // DR_3E   X........X.mmmmm ssssssnnnnnddddd      Rd Rn Rm imm(0-63)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispReg(id->idReg3(), size, true);
+            emitDispImm(emitGetInsSC(id), false);
+            break;
+
+        case IF_DR_4A: // DR_4A   X..........mmmmm .aaaaannnnnmmmmm      Rd Rn Rm Ra
+            if ((ins == INS_smaddl) || (ins == INS_smsubl) || (ins == INS_umaddl) || (ins == INS_umsubl))
+            {
+                // smaddl Xd, Wn, Wm, Xa
+                emitDispReg(id->idReg1(), EA_8BYTE, true);
+                emitDispReg(id->idReg2(), EA_4BYTE, true);
+                emitDispReg(id->idReg3(), EA_4BYTE, true);
+                emitDispReg(id->idReg4(), EA_8BYTE, false);
+            }
+            else
+            {
+                emitDispReg(id->idReg1(), size, true);
+                emitDispReg(id->idReg2(), size, true);
+                emitDispReg(id->idReg3(), size, true);
+                emitDispReg(id->idReg4(), size, false);
+            }
+            break;
+
+        case IF_DV_1A: // DV_1A   .........X.iiiii iii........ddddd      Vd imm8 (fmov - immediate scalar)
+            elemsize = id->idOpSize();
+            emitDispReg(id->idReg1(), elemsize, true);
+            emitDispFloatImm(emitGetInsSC(id));
+            break;
+
+        case IF_DV_1B: // DV_1B   .QX..........iii cmod..iiiiiddddd      Vd imm8 (immediate vector)
+            imm      = emitGetInsSC(id) & 0x0ff;
+            immShift = (emitGetInsSC(id) & 0x700) >> 8;
+            hasShift = (immShift != 0);
+            elemsize = optGetElemsize(id->idInsOpt());
+            if (id->idInsOpt() == INS_OPTS_1D)
+            {
+                assert(elemsize == size);
+                emitDispReg(id->idReg1(), size, true);
+            }
+            else
+            {
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            }
+            if (ins == INS_fmov)
             {
-                result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                result.insLatency    = PERFSCORE_LATENCY_3C;
-                break;
+                emitDispFloatImm(imm);
+                assert(hasShift == false);
             }
             else
             {
-                assert(id->idOpSize() == EA_8BYTE);
-                result.insThroughput = PERFSCORE_THROUGHPUT_5C;
-                result.insLatency    = PERFSCORE_LATENCY_3C;
-                break;
+                if (elemsize == EA_8BYTE)
+                {
+                    assert(ins == INS_movi);
+                    ssize_t       imm64 = 0;
+                    const ssize_t mask8 = 0xFF;
+                    for (unsigned b = 0; b < 8; b++)
+                    {
+                        if (imm & (ssize_t{1} << b))
+                        {
+                            imm64 |= (mask8 << (b * 8));
+                        }
+                    }
+                    emitDispImm(imm64, hasShift, true);
+                }
+                else
+                {
+                    emitDispImm(imm, hasShift, true);
+                }
+                if (hasShift)
+                {
+                    insOpts  opt   = (immShift & 0x4) ? INS_OPTS_MSL : INS_OPTS_LSL;
+                    unsigned shift = (immShift & 0x3) * 8;
+                    emitDispShiftOpts(opt);
+                    emitDispImm(shift, false);
+                }
             }
-
-        // Miscellaneous Data Preocessing instructions
-        case IF_DR_3E: // extr
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
-        case IF_DR_2H: // sxtb, sxth, sxtw, uxtb, uxth, sha1h
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_1C;
             break;
 
-        case IF_DI_2D: // lsl, lsr, asr, sbfm, bfm, ubfm, sbfiz, bfi, ubfiz, sbfx, bfxil, ubfx
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
+        case IF_DV_1C: // DV_1C   .........X...... ......nnnnn.....      Vn #0.0 (fcmp - with zero)
+            elemsize = id->idOpSize();
+            emitDispReg(id->idReg1(), elemsize, true);
+            emitDispFloatZero();
             break;
 
-        case IF_DR_2G: // mov sp, cls, clz, rbit, rev16, rev32, rev
-            if (ins == INS_rbit)
+        case IF_DV_2A: // DV_2A   .Q.......X...... ......nnnnnddddd      Vd Vn   (fabs, fcvt - vector)
+            if (emitInsIsVectorLong(ins))
             {
-                result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                result.insLatency    = PERFSCORE_LATENCY_2C;
-                break;
+                emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+            }
+            else if (emitInsIsVectorNarrow(ins))
+            {
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), false);
             }
             else
             {
-                result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                result.insLatency    = PERFSCORE_LATENCY_1C;
-                break;
+                assert(!emitInsIsVectorWide(ins));
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+            }
+            if (ins == INS_fcmeq || ins == INS_fcmge || ins == INS_fcmgt || ins == INS_fcmle || ins == INS_fcmlt)
+            {
+                emitDispComma();
+                emitDispFloatZero();
             }
-
-        //
-        //  Load/Store Instructions
-        //
-
-        case IF_LS_1A: // ldr, ldrsw (literal, pc relative immediate)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
-
-        case IF_LS_2A: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh (no immediate)
-                       // ldar, ldarb, ldarh, ldapr, ldaprb, ldaprh, ldxr, ldxrb, ldxrh,
-                       // ldaxr, ldaxrb, ldaxrh, stlr, stlrb, stlrh
-
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            // ToDo: store release have 2/4 cycle latency
-            break;
-
-        case IF_LS_2B: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh (scaled immediate)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
-
-        case IF_LS_2C: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh
-                       // ldur, ldurb, ldurh, ldursb, ldursh, ldursw, stur, sturb, sturh
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             break;
 
-        case IF_LS_3A: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb strh (register extend, scale 2,4,8)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+        case IF_DV_2P: // DV_2P   ................ ......nnnnnddddd      Vd Vn   (aes*, sha1su1)
+            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
             break;
 
-        case IF_LS_3B: // ldp, ldpsw, ldnp, stp, stnp  (load/store pair zero offset)
-        case IF_LS_3C: // load/store pair with offset pre/post inc
-            if (memAccessKind == PERFSCORE_MEMORY_READ)
+        case IF_DV_2M: // DV_2M   .Q......XX...... ......nnnnnddddd      Vd Vn   (abs, neg - vector)
+            if (emitInsIsVectorNarrow(ins))
             {
-                // ldp, ldpsw, ldnp
-                result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                if (emitIGisInEpilog(emitCurIG) && (ins == INS_ldp))
-                {
-                    // Reduce latency for ldp instructions in the epilog
-                    //
-                    result.insLatency = PERFSCORE_LATENCY_2C;
-                }
-                else if (id->idOpSize() == EA_8BYTE) // X-form
-                {
-                    // the X-reg variant has an extra cycle of latency
-                    // and two cycle throughput
-                    result.insLatency += 1.0;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                }
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), false);
             }
-            else // store instructions
+            else
             {
-                // stp, stnp
-                assert(memAccessKind == PERFSCORE_MEMORY_WRITE);
-                result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                assert(!emitInsIsVectorLong(ins) && !emitInsIsVectorWide(ins));
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+            }
+            if (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt)
+            {
+                emitDispComma();
+                emitDispImm(0, false);
             }
             break;
 
-        case IF_LS_3D: // stxr, stxrb, stxrh, stlxr, stlxrb, srlxrh
-            // Store exclusive register, returning status
-            assert(emitInsIsStore(ins));
-            // @ToDo - find out the actual latency
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = max(PERFSCORE_LATENCY_4C, result.insLatency);
-            break;
-
-        case IF_LS_3E: //  ARMv8.1 LSE Atomics
-            if (memAccessKind == PERFSCORE_MEMORY_WRITE)
+        case IF_DV_2N: // DV_2N   .........iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - scalar)
+            elemsize = id->idOpSize();
+            if (emitInsIsVectorLong(ins))
             {
-                // staddb, staddlb, staddh, staddlh, stadd. staddl
-                result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                result.insLatency    = PERFSCORE_LATENCY_2C;
+                emitDispReg(id->idReg1(), widenDatasize(elemsize), true);
+                emitDispReg(id->idReg2(), elemsize, true);
+            }
+            else if (emitInsIsVectorNarrow(ins))
+            {
+                emitDispReg(id->idReg1(), elemsize, true);
+                emitDispReg(id->idReg2(), widenDatasize(elemsize), true);
             }
             else
             {
-                assert(memAccessKind == PERFSCORE_MEMORY_READ_WRITE);
-                result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                result.insLatency    = max(PERFSCORE_LATENCY_3C, result.insLatency);
+                assert(!emitInsIsVectorWide(ins));
+                emitDispReg(id->idReg1(), elemsize, true);
+                emitDispReg(id->idReg2(), elemsize, true);
             }
+            imm = emitGetInsSC(id);
+            emitDispImm(imm, false);
             break;
 
-        case IF_LS_2D:
-        case IF_LS_2E:
-        case IF_LS_3F:
-            // Load/Store multiple structures
-            // Load single structure and replicate
-            switch (ins)
+        case IF_DV_2O: // DV_2O   .Q.......iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - vector)
+            if ((ins == INS_sxtl) || (ins == INS_sxtl2) || (ins == INS_uxtl) || (ins == INS_uxtl2))
             {
-                case INS_ld1:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                        result.insLatency    = PERFSCORE_LATENCY_3C;
-                    }
-                    else
-                    {
-                        // Q-form
-                        assert(id->idOpSize() == EA_16BYTE);
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
-                    break;
-
-                case INS_ld1_2regs:
-                case INS_ld2:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
-                    else
-                    {
-                        // Q-form
-                        assert(id->idOpSize() == EA_16BYTE);
-                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
-                        result.insLatency    = PERFSCORE_LATENCY_6C;
-                    }
-                    break;
-
-                case INS_ld1_3regs:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                        result.insLatency    = PERFSCORE_LATENCY_5C;
-                    }
-                    else
-                    {
-                        // Q-form
-                        assert(id->idOpSize() == EA_16BYTE);
-                        result.insThroughput = PERFSCORE_THROUGHPUT_6C;
-                        result.insLatency    = PERFSCORE_LATENCY_8C;
-                    }
-                    break;
-
-                case INS_ld1_4regs:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
-                        result.insLatency    = PERFSCORE_LATENCY_6C;
-                    }
-                    else
-                    {
-                        // Q-form
-                        assert(id->idOpSize() == EA_16BYTE);
-                        result.insThroughput = PERFSCORE_THROUGHPUT_8C;
-                        result.insLatency    = PERFSCORE_LATENCY_10C;
-                    }
-                    break;
+                assert((emitInsIsVectorLong(ins)));
+                emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+            }
+            else
+            {
+                if (emitInsIsVectorLong(ins))
+                {
+                    emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+                }
+                else if (emitInsIsVectorNarrow(ins))
+                {
+                    emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                    emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                }
+                else
+                {
+                    assert(!emitInsIsVectorWide(ins));
+                    emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+                }
 
-                case INS_ld3:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D-form
-                        if (optGetElemsize(id->idInsOpt()) == EA_4BYTE)
-                        {
-                            // S
-                            result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                            result.insLatency    = PERFSCORE_LATENCY_5C;
-                        }
-                        else
-                        {
-                            // B/H
-                            result.insThroughput = PERFSCORE_THROUGHPUT_4C;
-                            result.insLatency    = PERFSCORE_LATENCY_6C;
-                        }
-                    }
-                    else
-                    {
-                        // Q-form
-                        assert(id->idOpSize() == EA_16BYTE);
-                        if ((optGetElemsize(id->idInsOpt()) == EA_4BYTE) ||
-                            (optGetElemsize(id->idInsOpt()) == EA_8BYTE))
-                        {
-                            // S/D
-                            result.insThroughput = PERFSCORE_THROUGHPUT_6C;
-                            result.insLatency    = PERFSCORE_LATENCY_8C;
-                        }
-                        else
-                        {
-                            // B/H
-                            result.insThroughput = PERFSCORE_THROUGHPUT_7C;
-                            result.insLatency    = PERFSCORE_LATENCY_9C;
-                        }
-                    }
-                    break;
+                imm = emitGetInsSC(id);
+                emitDispImm(imm, false);
+            }
+            break;
 
-                case INS_ld4:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D-form
-                        if (optGetElemsize(id->idInsOpt()) == EA_4BYTE)
-                        {
-                            // S
-                            result.insThroughput = PERFSCORE_THROUGHPUT_4C;
-                            result.insLatency    = PERFSCORE_LATENCY_6C;
-                        }
-                        else
-                        {
-                            // B/H
-                            result.insThroughput = PERFSCORE_THROUGHPUT_5C;
-                            result.insLatency    = PERFSCORE_LATENCY_7C;
-                        }
-                    }
-                    else
-                    {
-                        // Q-form
-                        assert(id->idOpSize() == EA_16BYTE);
-                        if ((optGetElemsize(id->idInsOpt()) == EA_4BYTE) ||
-                            (optGetElemsize(id->idInsOpt()) == EA_8BYTE))
-                        {
-                            // S/D
-                            result.insThroughput = PERFSCORE_THROUGHPUT_8C;
-                            result.insLatency    = PERFSCORE_LATENCY_10C;
-                        }
-                        else
-                        {
-                            // B/H
-                            result.insThroughput = PERFSCORE_THROUGHPUT_9C;
-                            result.insLatency    = PERFSCORE_LATENCY_11C;
-                        }
-                    }
-                    break;
+        case IF_DV_2B: // DV_2B   .Q.........iiiii ......nnnnnddddd      Rd Vn[] (umov/smov    - to general)
+            srcsize = id->idOpSize();
+            index   = emitGetInsSC(id);
+            if (ins == INS_smov)
+            {
+                dstsize = EA_8BYTE;
+            }
+            else // INS_umov or INS_mov
+            {
+                dstsize = (srcsize == EA_8BYTE) ? EA_8BYTE : EA_4BYTE;
+            }
+            emitDispReg(id->idReg1(), dstsize, true);
+            emitDispVectorRegIndex(id->idReg2(), srcsize, index, false);
+            break;
 
-                case INS_ld1r:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
-                    break;
+        case IF_DV_2C: // DV_2C   .Q.........iiiii ......nnnnnddddd      Vd Rn   (dup/ins - vector from general)
+            if (ins == INS_dup)
+            {
+                datasize = id->idOpSize();
+                assert(isValidVectorDatasize(datasize));
+                assert(isValidArrangement(datasize, id->idInsOpt()));
+                elemsize = optGetElemsize(id->idInsOpt());
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            }
+            else // INS_ins
+            {
+                elemsize = id->idOpSize();
+                index    = emitGetInsSC(id);
+                assert(isValidVectorElemsize(elemsize));
+                emitDispVectorRegIndex(id->idReg1(), elemsize, index, true);
+            }
+            emitDispReg(id->idReg2(), (elemsize == EA_8BYTE) ? EA_8BYTE : EA_4BYTE, false);
+            break;
 
-                case INS_ld2r:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
-                    else
-                    {
-                        // B/H/S
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                        result.insLatency    = PERFSCORE_LATENCY_3C;
-                    }
-                    break;
+        case IF_DV_2D: // DV_2D   .Q.........iiiii ......nnnnnddddd      Vd Vn[]   (dup - vector)
+            datasize = id->idOpSize();
+            assert(isValidVectorDatasize(datasize));
+            assert(isValidArrangement(datasize, id->idInsOpt()));
+            elemsize = optGetElemsize(id->idInsOpt());
+            index    = emitGetInsSC(id);
+            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispVectorRegIndex(id->idReg2(), elemsize, index, false);
+            break;
 
-                case INS_ld3r:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D
-                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                        result.insLatency    = PERFSCORE_LATENCY_5C;
-                    }
-                    else
-                    {
-                        // B/H/S
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
-                    break;
+        case IF_DV_2E: // DV_2E   ...........iiiii ......nnnnnddddd      Vd Vn[]   (dup - scalar)
+            elemsize = id->idOpSize();
+            index    = emitGetInsSC(id);
+            emitDispReg(id->idReg1(), elemsize, true);
+            emitDispVectorRegIndex(id->idReg2(), elemsize, index, false);
+            break;
 
-                case INS_ld4r:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D
-                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
-                        result.insLatency    = PERFSCORE_LATENCY_6C;
-                    }
-                    else
-                    {
-                        // B/H/S
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
-                    break;
+        case IF_DV_2F: // DV_2F   ...........iiiii .jjjj.nnnnnddddd      Vd[] Vn[] (ins - element)
+            imm      = emitGetInsSC(id);
+            index    = (imm >> 4) & 0xf;
+            index2   = imm & 0xf;
+            elemsize = id->idOpSize();
+            emitDispVectorRegIndex(id->idReg1(), elemsize, index, true);
+            emitDispVectorRegIndex(id->idReg2(), elemsize, index2, false);
+            break;
 
-                case INS_st1:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_1C;
-                    break;
+        case IF_DV_2G: // DV_2G   .........X...... ......nnnnnddddd      Vd Vn      (fmov, fcvtXX - register)
+        case IF_DV_2K: // DV_2K   .........X.mmmmm ......nnnnn.....      Vn Vm      (fcmp)
+        case IF_DV_2L: // DV_2L   ........XX...... ......nnnnnddddd      Vd Vn      (abs, neg - scalar)
+            size = id->idOpSize();
+            if ((ins == INS_fcmeq) || (ins == INS_fcmge) || (ins == INS_fcmgt) || (ins == INS_fcmle) ||
+                (ins == INS_fcmlt))
+            {
+                emitDispReg(id->idReg1(), size, true);
+                emitDispReg(id->idReg2(), size, true);
+                emitDispFloatZero();
+            }
+            else if (emitInsIsVectorNarrow(ins))
+            {
+                emitDispReg(id->idReg1(), size, true);
+                emitDispReg(id->idReg2(), widenDatasize(size), false);
+            }
+            else
+            {
+                emitDispReg(id->idReg1(), size, true);
+                emitDispReg(id->idReg2(), size, false);
+            }
+            if (fmt == IF_DV_2L &&
+                (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt))
+            {
+                emitDispComma();
+                emitDispImm(0, false);
+            }
+            break;
 
-                case INS_st1_2regs:
-                case INS_st2:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                        result.insLatency    = PERFSCORE_LATENCY_1C;
-                    }
-                    else
-                    {
-                        // Q-form
-                        assert(id->idOpSize() == EA_16BYTE);
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                        result.insLatency    = PERFSCORE_LATENCY_2C;
-                    }
-                    break;
+        case IF_DV_2H: // DV_2H   X........X...... ......nnnnnddddd      Rd Vn      (fmov, fcvtXX - to general)
+        case IF_DV_2I: // DV_2I   X........X...... ......nnnnnddddd      Vd Rn      (fmov, Xcvtf - from general)
+        case IF_DV_2J: // DV_2J   ........SS.....D D.....nnnnnddddd      Vd Vn      (fcvt)
+            dstsize = optGetDstsize(id->idInsOpt());
+            srcsize = optGetSrcsize(id->idInsOpt());
 
-                case INS_st1_3regs:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                        result.insLatency    = PERFSCORE_LATENCY_2C;
-                    }
-                    else
-                    {
-                        // Q-form
-                        assert(id->idOpSize() == EA_16BYTE);
-                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                        result.insLatency    = PERFSCORE_LATENCY_3C;
-                    }
-                    break;
+            emitDispReg(id->idReg1(), dstsize, true);
+            emitDispReg(id->idReg2(), srcsize, false);
+            break;
 
-                case INS_st1_4regs:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                        result.insLatency    = PERFSCORE_LATENCY_2C;
-                    }
-                    else
-                    {
-                        // Q-form
-                        assert(id->idOpSize() == EA_16BYTE);
-                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
-                    break;
+        case IF_DV_2Q: // DV_2Q   .........X...... ......nnnnnddddd      Sd Vn      (faddp, fmaxnmp, fmaxp, fminnmp,
+                       // fminp - scalar)
+        case IF_DV_2R: // DV_2R   .Q.......X...... ......nnnnnddddd      Sd Vn      (fmaxnmv, fmaxv, fminnmv, fminv)
+        case IF_DV_2S: // DV_2S   ........XX...... ......nnnnnddddd      Sd Vn      (addp - scalar)
+        case IF_DV_2T: // DV_2T   .Q......XX...... ......nnnnnddddd      Sd Vn      (addv, saddlv, smaxv, sminv, uaddlv,
+                       // umaxv, uminv)
+            if ((ins == INS_sadalp) || (ins == INS_saddlp) || (ins == INS_uadalp) || (ins == INS_uaddlp))
+            {
+                emitDispVectorReg(id->idReg1(), optWidenDstArrangement(id->idInsOpt()), true);
+                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+            }
+            else
+            {
+                if ((ins == INS_saddlv) || (ins == INS_uaddlv))
+                {
+                    elemsize = optGetElemsize(optWidenDstArrangement(id->idInsOpt()));
+                }
+                else
+                {
+                    elemsize = optGetElemsize(id->idInsOpt());
+                }
+                emitDispReg(id->idReg1(), elemsize, true);
+                emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+            }
+            break;
 
-                case INS_st3:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
-                    break;
+        case IF_DV_3A: // DV_3A   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+            if ((ins == INS_sdot) || (ins == INS_udot))
+            {
+                // sdot/udot Vd.2s, Vn.8b, Vm.8b
+                // sdot/udot Vd.4s, Vn.16b, Vm.16b
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                size = id->idOpSize();
+                emitDispVectorReg(id->idReg2(), (size == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B, true);
+                emitDispVectorReg(id->idReg3(), (size == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B, false);
+            }
+            else if (((ins == INS_pmull) && (id->idInsOpt() == INS_OPTS_1D)) ||
+                     ((ins == INS_pmull2) && (id->idInsOpt() == INS_OPTS_2D)))
+            {
+                // pmull Vd.1q, Vn.1d, Vm.1d
+                // pmull2 Vd.1q, Vn.2d, Vm.2d
+                printf("%s.1q, ", emitVectorRegName(id->idReg1()));
+                emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+                emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
+            }
+            else if (emitInsIsVectorNarrow(ins))
+            {
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                emitDispVectorReg(id->idReg3(), optWidenElemsizeArrangement(id->idInsOpt()), false);
+            }
+            else
+            {
+                if (emitInsIsVectorLong(ins))
+                {
+                    emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+                }
+                else if (emitInsIsVectorWide(ins))
+                {
+                    emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                    emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                }
+                else
+                {
+                    emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+                }
 
-                case INS_st4:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                        result.insLatency    = PERFSCORE_LATENCY_3C;
-                    }
-                    else
-                    {
-                        assert(id->idOpSize() == EA_16BYTE);
-                        if (optGetElemsize(id->idInsOpt()) == EA_8BYTE)
-                        {
-                            // D
-                            result.insThroughput = PERFSCORE_THROUGHPUT_4C;
-                            result.insLatency    = PERFSCORE_LATENCY_4C;
-                        }
-                        else
-                        {
-                            // B/H/S
-                            result.insThroughput = PERFSCORE_THROUGHPUT_5C;
-                            result.insLatency    = PERFSCORE_LATENCY_5C;
-                        }
-                    }
-                    break;
+                emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
+            }
+            break;
 
-                default:
-                    unreached();
+        case IF_DV_3AI: // DV_3AI  .Q......XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by element)
+            if ((ins == INS_sdot) || (ins == INS_udot))
+            {
+                // sdot/udot Vd.2s, Vn.8b, Vm.4b[index]
+                // sdot/udot Vd.4s, Vn.16b, Vm.4b[index]
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                size = id->idOpSize();
+                emitDispVectorReg(id->idReg2(), (size == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B, true);
+                index = emitGetInsSC(id);
+                printf("%s.4b[%d]", emitVectorRegName(id->idReg3()), (int)index);
+            }
+            else
+            {
+                if (emitInsIsVectorLong(ins))
+                {
+                    emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+                }
+                else if (emitInsIsVectorWide(ins))
+                {
+                    emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                    emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true);
+                }
+                else
+                {
+                    assert(!emitInsIsVectorNarrow(ins));
+                    emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+                }
+
+                elemsize = optGetElemsize(id->idInsOpt());
+                index    = emitGetInsSC(id);
+                emitDispVectorRegIndex(id->idReg3(), elemsize, index, false);
             }
             break;
 
-        case IF_LS_2F:
-        case IF_LS_2G:
-        case IF_LS_3G:
-            // Load/Store single structure
+        case IF_DV_3B: // DV_3B   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm  (vector)
+            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+            emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
+            break;
+
+        case IF_DV_3C: // DV_3C   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm  (vector)
+            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
             switch (ins)
             {
-                case INS_ld1:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                case INS_tbl:
+                case INS_tbl_2regs:
+                case INS_tbl_3regs:
+                case INS_tbl_4regs:
+                case INS_tbx:
+                case INS_tbx_2regs:
+                case INS_tbx_3regs:
+                case INS_tbx_4regs:
+                    registerListSize = insGetRegisterListSize(ins);
+                    emitDispVectorRegList(id->idReg2(), registerListSize, INS_OPTS_16B, true);
                     break;
-
-                case INS_ld2:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
-                    else
-                    {
-                        // B/H/S
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                        result.insLatency    = PERFSCORE_LATENCY_3C;
-                    }
+                case INS_mov:
                     break;
-
-                case INS_ld3:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D
-                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                        result.insLatency    = PERFSCORE_LATENCY_5C;
-                    }
-                    else
-                    {
-                        // B/H/S
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
+                default:
+                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
                     break;
+            }
+            emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
+            break;
 
-                case INS_ld4:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D
-                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
-                        result.insLatency    = PERFSCORE_LATENCY_6C;
-                    }
-                    else
-                    {
-                        // B/H/S
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
-                    break;
+        case IF_DV_3BI: // DV_3BI  .Q........Lmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by element)
+            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+            elemsize = optGetElemsize(id->idInsOpt());
+            emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false);
+            break;
 
-                case INS_st1:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_1C;
-                    break;
+        case IF_DV_3D: // DV_3D   .........X.mmmmm ......nnnnnddddd      Vd Vn Vm  (scalar)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispReg(id->idReg3(), size, false);
+            break;
 
-                case INS_st2:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                        result.insLatency    = PERFSCORE_LATENCY_2C;
-                    }
-                    else
-                    {
-                        // B/H/S
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                        result.insLatency    = PERFSCORE_LATENCY_1C;
-                    }
-                    break;
+        case IF_DV_3E: // DV_3E   ........XX.mmmmm ......nnnnnddddd      Vd Vn Vm  (scalar)
+            if (emitInsIsVectorLong(ins))
+            {
+                emitDispReg(id->idReg1(), widenDatasize(size), true);
+            }
+            else
+            {
+                assert(!emitInsIsVectorNarrow(ins) && !emitInsIsVectorWide(ins));
+                emitDispReg(id->idReg1(), size, true);
+            }
 
-                case INS_st3:
-                case INS_st4:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    break;
+            emitDispReg(id->idReg2(), size, true);
+            emitDispReg(id->idReg3(), size, false);
+            break;
 
-                default:
-                    unreached();
+        case IF_DV_3EI: // DV_3EI ........XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by element)
+            if (emitInsIsVectorLong(ins))
+            {
+                emitDispReg(id->idReg1(), widenDatasize(size), true);
+            }
+            else
+            {
+                assert(!emitInsIsVectorNarrow(ins) && !emitInsIsVectorWide(ins));
+                emitDispReg(id->idReg1(), size, true);
             }
+            emitDispReg(id->idReg2(), size, true);
+            elemsize = id->idOpSize();
+            index    = emitGetInsSC(id);
+            emitDispVectorRegIndex(id->idReg3(), elemsize, index, false);
             break;
 
-        case IF_SN_0A: // nop, yield, align
-
-            if (id->idIns() == INS_align)
+        case IF_DV_3F: // DV_3F   ..........mmmmm ......nnnnnddddd       Vd Vn Vm (vector)
+            if ((ins == INS_sha1c) || (ins == INS_sha1m) || (ins == INS_sha1p))
             {
-                if ((id->idInsOpt() == INS_OPTS_NONE) || ((instrDescAlign*)id)->isPlacedAfterJmp)
-                {
-                    // Either we're not going to generate 'align' instruction, or the 'align'
-                    // instruction is placed immediately after unconditional jmp.
-                    // In both cases, don't count for PerfScore.
-
-                    result.insThroughput = PERFSCORE_THROUGHPUT_ZERO;
-                    result.insLatency    = PERFSCORE_LATENCY_ZERO;
-                    break;
-                }
+                // Qd, Sn, Vm (vector)
+                emitDispReg(id->idReg1(), size, true);
+                emitDispReg(id->idReg2(), EA_4BYTE, true);
+                emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
             }
-            else if (ins == INS_yield)
+            else if ((ins == INS_sha256h) || (ins == INS_sha256h2))
             {
-                // @ToDo - find out the actual latency, match x86/x64 for now
-                result.insThroughput = PERFSCORE_THROUGHPUT_140C;
-                result.insLatency    = PERFSCORE_LATENCY_140C;
+                // Qd Qn Vm (vector)
+                emitDispReg(id->idReg1(), size, true);
+                emitDispReg(id->idReg2(), size, true);
+                emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
+            }
+            else // INS_sha1su0, INS_sha256su1
+            {
+                // Vd, Vn, Vm   (vector)
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+                emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
             }
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_ZERO;
             break;
 
-        case IF_SI_0B: // dmb, dsb, isb
-            // @ToDo - find out the actual latency
-            result.insThroughput = PERFSCORE_THROUGHPUT_10C;
-            result.insLatency    = PERFSCORE_LATENCY_10C;
+        case IF_DV_3DI: // DV_3DI  .........XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by element)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            elemsize = size;
+            emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false);
             break;
 
-        case IF_DV_2J: // fcvt  Vd Vn
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_4C;
+        case IF_DV_3G: // DV_3G   .Q.........mmmmm .iiii.nnnnnddddd      Vd Vn Vm imm (vector)
+            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+            emitDispVectorReg(id->idReg3(), id->idInsOpt(), true);
+            emitDispImm(emitGetInsSC(id), false);
             break;
 
-        case IF_DV_2K: // fcmp  Vd Vn
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_1C;
+        case IF_DV_4A: // DV_4A   .........X.mmmmm .aaaaannnnnddddd      Vd Va Vn Vm (scalar)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispReg(id->idReg3(), size, true);
+            emitDispReg(id->idReg4(), size, false);
             break;
 
-        case IF_DV_1A: // fmov - immediate (scalar)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_1C;
+        case IF_SN_0A: // SN_0A   ................ ................
+            if (ins == INS_align)
+            {
+                instrDescAlign* alignInstrId = (instrDescAlign*)id;
+                printf("[%d bytes", id->idIsEmptyAlign() ? 0 : INSTR_ENCODED_SIZE);
+
+                // targetIG is only set for 1st of the series of align instruction
+                if ((alignInstrId->idaLoopHeadPredIG != nullptr) && (alignInstrId->loopHeadIG() != nullptr))
+                {
+                    printf(" for IG%02u", alignInstrId->loopHeadIG()->igNum);
+                }
+                printf("]");
+            }
             break;
 
-        case IF_DV_1B: // fmov, orr, bic, movi, mvni  (immediate vector)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_1C;
+        case IF_SI_0A: // SI_0A   ...........iiiii iiiiiiiiiii.....               imm16
+            emitDispImm(emitGetInsSC(id), false);
             break;
 
-        case IF_DV_1C: // fcmp vn, #0.0
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_3C;
+        case IF_SI_0B: // SI_0B   ................ ....bbbb........               imm4 - barrier
+            emitDispBarrier((insBarrier)emitGetInsSC(id));
             break;
 
-        case IF_DV_2A: // fabs, fneg, fsqrt, fcvtXX, frintX, scvtf, ucvtf, fcmXX (vector)
-            switch (ins)
+        case IF_SR_1A: // SR_1A   ................ ...........ttttt      Rt       (dc zva, mrs)
+            if (ins == INS_mrs_tpid0)
             {
-                case INS_fabs:
-                case INS_fneg:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency = (id->idOpSize() == EA_8BYTE) ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_3C / 2;
-                    break;
+                emitDispReg(id->idReg1(), size, true);
+                printf("tpidr_el0");
+            }
+            else
+            {
+                emitDispReg(id->idReg1(), size, false);
+            }
+            break;
 
-                case INS_fsqrt:
-                    if ((id->idInsOpt() == INS_OPTS_2S) || (id->idInsOpt() == INS_OPTS_4S))
-                    {
-                        // S-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                        result.insLatency    = PERFSCORE_LATENCY_11C;
-                    }
-                    else
-                    {
-                        // D-form
-                        assert(id->idInsOpt() == INS_OPTS_2D);
-                        result.insThroughput = PERFSCORE_THROUGHPUT_6C;
-                        result.insLatency    = PERFSCORE_LATENCY_18C;
-                    }
-                    break;
+        default:
+            // fallback to display SVE instructions.
+            emitDispInsSveHelp(id);
+            break;
+    }
 
-                case INS_fcvtas:
-                case INS_fcvtau:
-                case INS_fcvtms:
-                case INS_fcvtmu:
-                case INS_fcvtns:
-                case INS_fcvtnu:
-                case INS_fcvtps:
-                case INS_fcvtpu:
-                case INS_fcvtzs:
-                case INS_fcvtzu:
-                case INS_frinta:
-                case INS_frinti:
-                case INS_frintm:
-                case INS_frintn:
-                case INS_frintp:
-                case INS_frintx:
-                case INS_frintz:
-                case INS_scvtf:
-                case INS_ucvtf:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+    if (id->idIsLclVar())
+    {
+        printf("\t// ");
+        emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+                         id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+        if (id->idIsLclVarPair())
+        {
+            printf(", ");
+            emitLclVarAddr* iiaLclVar2 = emitGetLclVarPairLclVar2(id);
+            emitDispFrameRef(iiaLclVar2->lvaVarNum(), iiaLclVar2->lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs2,
+                             asmfm);
+        }
+    }
 
-                case INS_fcmeq:
-                case INS_fcmge:
-                case INS_fcmgt:
-                case INS_fcmle:
-                case INS_fcmlt:
-                case INS_frecpe:
-                case INS_frsqrte:
-                case INS_urecpe:
-                case INS_ursqrte:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    break;
+    printf("\n");
+}
 
-                case INS_fcvtl:
-                case INS_fcvtl2:
-                case INS_fcvtn:
-                case INS_fcvtn2:
-                case INS_fcvtxn:
-                case INS_fcvtxn2:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+/*****************************************************************************
+ *
+ *  Display a stack frame reference.
+ */
+
+void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
+{
+#ifdef DEBUG
+    printf("[");
+
+    if (varx < 0)
+        printf("TEMP_%02u", -varx);
+    else
+        emitComp->gtDispLclVar(+varx, false);
+
+    if (disp < 0)
+        printf("-0x%02x", -disp);
+    else if (disp > 0)
+        printf("+0x%02x", +disp);
+
+    printf("]");
+
+    if ((varx >= 0) && emitComp->opts.varNames && (((IL_OFFSET)offs) != BAD_IL_OFFSET))
+    {
+        const char* varName = emitComp->compLocalVarName(varx, offs);
+
+        if (varName)
+        {
+            printf("'%s", varName);
+
+            if (disp < 0)
+                printf("-%d", -disp);
+            else if (disp > 0)
+                printf("+%d", +disp);
+
+            printf("'");
+        }
+    }
+#endif
+}
+
+// Generate code for a load or store operation with a potentially complex addressing mode
+// This method handles the case of a GT_IND with contained GT_LEA op1 of the x86 form [base + index*scale + offset]
+// Since Arm64 does not directly support this complex of an addressing mode
+// we may generates up to three instructions for this for Arm64
+//
+void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir)
+{
+    GenTree* addr = indir->Addr();
 
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
-            }
-            break;
+    if (addr->isContained())
+    {
+        assert(addr->OperIs(GT_LCL_ADDR, GT_LEA) || (addr->IsIconHandle(GTF_ICON_TLS_HDL)));
 
-        case IF_DV_2G: // fmov, fabs, fneg, fsqrt, fcmXX, fcvtXX, frintX, scvtf, ucvtf (scalar)
-            switch (ins)
+        int   offset = 0;
+        DWORD lsl    = 0;
+
+        if (addr->OperGet() == GT_LEA)
+        {
+            offset = addr->AsAddrMode()->Offset();
+            if (addr->AsAddrMode()->gtScale > 0)
             {
-                case INS_fmov:
-                    // FP move, vector register
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_1C;
-                    break;
+                assert(isPow2(addr->AsAddrMode()->gtScale));
+                BitScanForward(&lsl, addr->AsAddrMode()->gtScale);
+            }
+        }
 
-                case INS_fabs:
-                case INS_fneg:
+        GenTree* memBase = indir->Base();
 
-                case INS_fcvtas:
-                case INS_fcvtau:
-                case INS_fcvtms:
-                case INS_fcvtmu:
-                case INS_fcvtns:
-                case INS_fcvtnu:
-                case INS_fcvtps:
-                case INS_fcvtpu:
-                case INS_fcvtzs:
-                case INS_fcvtzu:
-                case INS_scvtf:
-                case INS_ucvtf:
+        if (indir->HasIndex())
+        {
+            GenTree* index = indir->Index();
 
-                case INS_frinta:
-                case INS_frinti:
-                case INS_frintm:
-                case INS_frintn:
-                case INS_frintp:
-                case INS_frintx:
-                case INS_frintz:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
-                    break;
+            if (offset != 0)
+            {
+                regNumber tmpReg = indir->GetSingleTempReg();
 
-                case INS_fcvtxn:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+                emitAttr addType = varTypeIsGC(memBase) ? EA_BYREF : EA_PTRSIZE;
 
-                case INS_fcmeq:
-                case INS_fcmge:
-                case INS_fcmgt:
-                case INS_fcmle:
-                case INS_fcmlt:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    break;
+                if (emitIns_valid_imm_for_add(offset, EA_8BYTE))
+                {
+                    if (lsl > 0)
+                    {
+                        // Generate code to set tmpReg = base + index*scale
+                        emitIns_R_R_R_I(INS_add, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum(), lsl,
+                                        INS_OPTS_LSL);
+                    }
+                    else // no scale
+                    {
+                        // Generate code to set tmpReg = base + index
+                        emitIns_R_R_R(INS_add, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum());
+                    }
 
-                case INS_frecpe:
-                case INS_frecpx:
-                case INS_frsqrte:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+                    noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
 
-                case INS_fsqrt:
-                    if (id->idOpSize() == EA_8BYTE)
+                    // Then load/store dataReg from/to [tmpReg + offset]
+                    emitIns_R_R_I(ins, attr, dataReg, tmpReg, offset);
+                }
+                else // large offset
+                {
+                    // First load/store tmpReg with the large offset constant
+                    codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+                    // Then add the base register
+                    //      rd = rd + base
+                    emitIns_R_R_R(INS_add, addType, tmpReg, tmpReg, memBase->GetRegNum());
+
+                    noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
+                    noway_assert(tmpReg != index->GetRegNum());
+
+                    // Then load/store dataReg from/to [tmpReg + index*scale]
+                    emitIns_R_R_R_I(ins, attr, dataReg, tmpReg, index->GetRegNum(), lsl, INS_OPTS_LSL);
+                }
+            }
+            else // (offset == 0)
+            {
+                if (lsl > 0)
+                {
+                    // Then load/store dataReg from/to [memBase + index*scale]
+                    emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum(), INS_OPTS_LSL, lsl);
+                }
+                else // no scale
+                {
+                    if (index->OperIs(GT_BFIZ, GT_CAST) && index->isContained())
                     {
-                        // D-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_19C;
-                        result.insLatency    = PERFSCORE_LATENCY_22C;
+                        // Then load/store dataReg from/to [memBase + index*scale with sign/zero extension]
+                        GenTreeCast* cast;
+                        int          cns;
+
+                        if (index->OperIs(GT_BFIZ))
+                        {
+                            cast = index->gtGetOp1()->AsCast();
+                            cns  = (int)index->gtGetOp2()->AsIntCon()->IconValue();
+                        }
+                        else
+                        {
+                            cast = index->AsCast();
+                            cns  = 0;
+                        }
+
+                        // For now, this code only supports extensions from i32/u32
+                        assert(cast->isContained());
+
+                        emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), cast->CastOp()->GetRegNum(),
+                                          cast->IsUnsigned() ? INS_OPTS_UXTW : INS_OPTS_SXTW, cns);
                     }
                     else
                     {
-                        // S-form
-                        assert(id->idOpSize() == EA_4BYTE);
-                        result.insThroughput = PERFSCORE_THROUGHPUT_9C;
-                        result.insLatency    = PERFSCORE_LATENCY_12C;
+                        // Then load/store dataReg from/to [memBase + index]
+                        emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum());
                     }
-                    break;
+                }
+            }
+        }
+        else // no Index register
+        {
+            if (addr->OperIs(GT_LCL_ADDR))
+            {
+                GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
+                unsigned             lclNum  = varNode->GetLclNum();
+                unsigned             offset  = varNode->GetLclOffs();
+                if (emitInsIsStore(ins))
+                {
+                    emitIns_S_R(ins, attr, dataReg, lclNum, offset);
+                }
+                else
+                {
+                    emitIns_R_S(ins, attr, dataReg, lclNum, offset);
+                }
+            }
+            else if (addr->IsIconHandle(GTF_ICON_TLS_HDL))
+            {
+                // On Arm64, TEB is in r18, so load from the r18 as base.
+                emitIns_R_R_I(ins, attr, dataReg, REG_R18, addr->AsIntCon()->IconValue());
+            }
+            else if (emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(indir->TypeGet())))
+            {
+                // Then load/store dataReg from/to [memBase + offset]
+                emitIns_R_R_I(ins, attr, dataReg, memBase->GetRegNum(), offset);
+            }
+            else
+            {
+                // We require a tmpReg to hold the offset
+                regNumber tmpReg = indir->GetSingleTempReg();
 
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
+                // First load/store tmpReg with the large offset constant
+                codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+
+                // Then load/store dataReg from/to [memBase + tmpReg]
+                emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), tmpReg);
             }
-            break;
+        }
+    }
+    else // addr is not contained, so we evaluate it into a register
+    {
+#ifdef DEBUG
+        if (addr->OperIs(GT_LCL_ADDR))
+        {
+            // If the local var is a gcref or byref, the local var better be untracked, because we have
+            // no logic here to track local variable lifetime changes, like we do in the contained case
+            // above. E.g., for a `str r0,[r1]` for byref `r1` to local `V01`, we won't store the local
+            // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth.
+            LclVarDsc* varDsc = emitComp->lvaGetDesc(addr->AsLclVarCommon());
+            assert(!varDsc->lvTracked);
+        }
+#endif // DEBUG
 
-        case IF_DV_2Q: // faddp, fmaxnmp, fmaxp, fminnmp, fminp (scalar)
-        case IF_DV_2R: // fmaxnmv, fmaxv, fminnmv, fminv
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_4C;
-            break;
+        // Then load/store dataReg from/to [addrReg]
+        emitIns_R_R(ins, attr, dataReg, addr->GetRegNum());
+    }
+}
 
-        case IF_DV_2S: // addp (scalar)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            break;
+// The callee must call genConsumeReg() for any non-contained srcs
+// and genProduceReg() for any non-contained dsts.
 
-        case IF_DV_3B: // fadd, fsub, fdiv, fmul, fmulx, fmla, fmls, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX
-                       // faddp, fmaxnmp, fmaxp, fminnmp, fminp, addp (vector)
-            switch (ins)
-            {
-                case INS_fmin:
-                case INS_fminnm:
-                case INS_fmax:
-                case INS_fmaxnm:
-                case INS_fabd:
-                case INS_fadd:
-                case INS_fsub:
-                case INS_fmul:
-                case INS_fmulx:
-                case INS_fmla:
-                case INS_fmls:
-                case INS_frecps:
-                case INS_frsqrts:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
+{
+    // dst can only be a reg
+    assert(!dst->isContained());
+
+    // src can be immed or reg
+    assert(!src->isContained() || src->isContainedIntOrIImmed());
+
+    // find immed (if any) - it cannot be a dst
+    GenTreeIntConCommon* intConst = nullptr;
+    if (src->isContainedIntOrIImmed())
+    {
+        intConst = src->AsIntConCommon();
+    }
 
-                case INS_faddp:
-                case INS_fmaxnmp:
-                case INS_fmaxp:
-                case INS_fminnmp:
-                case INS_fminp:
-                    if (id->idOpSize() == EA_16BYTE)
-                    {
-                        // Q-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
-                    else
-                    {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
-                    break;
+    if (intConst)
+    {
+        emitIns_R_I(ins, attr, dst->GetRegNum(), intConst->IconValue());
+        return dst->GetRegNum();
+    }
+    else
+    {
+        emitIns_R_R(ins, attr, dst->GetRegNum(), src->GetRegNum());
+        return dst->GetRegNum();
+    }
+}
 
-                case INS_facge:
-                case INS_facgt:
-                case INS_fcmeq:
-                case INS_fcmge:
-                case INS_fcmgt:
-                case INS_fcmle:
-                case INS_fcmlt:
-                    if (id->idOpSize() == EA_16BYTE)
-                    {
-                        // Q-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                        result.insLatency    = PERFSCORE_LATENCY_2C;
-                    }
-                    else
-                    {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                        result.insLatency    = PERFSCORE_LATENCY_2C;
-                    }
-                    break;
+// The callee must call genConsumeReg() for any non-contained srcs
+// and genProduceReg() for any non-contained dsts.
 
-                case INS_fdiv:
-                    if ((id->idInsOpt() == INS_OPTS_2S) || (id->idInsOpt() == INS_OPTS_4S))
-                    {
-                        // S-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_10C;
-                        result.insLatency    = PERFSCORE_LATENCY_13C;
-                    }
-                    else
-                    {
-                        // D-form
-                        assert(id->idInsOpt() == INS_OPTS_2D);
-                        result.insThroughput = PERFSCORE_THROUGHPUT_10C;
-                        result.insLatency    = PERFSCORE_LATENCY_22C;
-                    }
-                    break;
+regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2)
+{
+    // dst can only be a reg
+    assert(!dst->isContained());
 
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
-            }
-            break;
+    // find immed (if any) - it cannot be a dst
+    // Only one src can be an int.
+    GenTreeIntConCommon* intConst  = nullptr;
+    GenTree*             nonIntReg = nullptr;
 
-        case IF_DV_3AI: // mul, mla, mls (vector by element)
-        case IF_DV_3BI: // fmul, fmulx, fmla, fmls (vector by element)
-        case IF_DV_3EI: // sqdmlal, sqdmlsl, sqdmulh, sqdmull (scalar by element)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_4C;
-            break;
+    if (varTypeIsFloating(dst))
+    {
+        // src1 can only be a reg
+        assert(!src1->isContained());
+        // src2 can only be a reg
+        assert(!src2->isContained());
+    }
+    else // not floating point
+    {
+        // src2 can be immed or reg
+        assert(!src2->isContained() || src2->isContainedIntOrIImmed());
 
-        case IF_DV_4A: // fmadd, fmsub, fnmadd, fnsub (scalar)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_4C;
-            break;
+        // Check src2 first as we can always allow it to be a contained immediate
+        if (src2->isContainedIntOrIImmed())
+        {
+            intConst  = src2->AsIntConCommon();
+            nonIntReg = src1;
+        }
+        // Only for commutative operations do we check src1 and allow it to be a contained immediate
+        else if (dst->OperIsCommutative())
+        {
+            // src1 can be immed or reg
+            assert(!src1->isContained() || src1->isContainedIntOrIImmed());
 
-        case IF_DV_3D: // fadd, fsub, fdiv, fmul, fmulx, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX (scalar)
-            switch (ins)
+            // Check src1 and allow it to be a contained immediate
+            if (src1->isContainedIntOrIImmed())
             {
-                case INS_fadd:
-                case INS_fsub:
-                case INS_fabd:
-                case INS_fmax:
-                case INS_fmaxnm:
-                case INS_fmin:
-                case INS_fminnm:
-                case INS_fmul:
-                case INS_fmulx:
-                case INS_fnmul:
-                case INS_frecps:
-                case INS_frsqrts:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+                assert(!src2->isContainedIntOrIImmed());
+                intConst  = src1->AsIntConCommon();
+                nonIntReg = src2;
+            }
+        }
+        else
+        {
+            // src1 can only be a reg
+            assert(!src1->isContained());
+        }
+    }
 
-                case INS_facge:
-                case INS_facgt:
-                case INS_fcmeq:
-                case INS_fcmge:
-                case INS_fcmgt:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    break;
+    bool isMulOverflow = false;
+    if (dst->gtOverflowEx())
+    {
+        if ((ins == INS_add) || (ins == INS_adds))
+        {
+            ins = INS_adds;
+        }
+        else if ((ins == INS_sub) || (ins == INS_subs))
+        {
+            ins = INS_subs;
+        }
+        else if (ins == INS_mul)
+        {
+            isMulOverflow = true;
+            assert(intConst == nullptr); // overflow format doesn't support an int constant operand
+        }
+        else
+        {
+            assert(!"Invalid ins for overflow check");
+        }
+    }
+    if (intConst != nullptr)
+    {
+        emitIns_R_R_I(ins, attr, dst->GetRegNum(), nonIntReg->GetRegNum(), intConst->IconValue());
+    }
+    else
+    {
+        if (isMulOverflow)
+        {
+            regNumber extraReg = dst->GetSingleTempReg();
+            assert(extraReg != dst->GetRegNum());
 
-                case INS_fdiv:
-                    if (id->idOpSize() == EA_8BYTE)
-                    {
-                        // D-form
-                        result.insThroughput = PERFSCORE_THROUGHPUT_6C;
-                        result.insLatency    = PERFSCORE_LATENCY_15C;
-                    }
-                    else
-                    {
-                        // S-form
-                        assert(id->idOpSize() == EA_4BYTE);
-                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                        result.insLatency    = PERFSCORE_LATENCY_10C;
-                    }
-                    break;
+            if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+            {
+                if (attr == EA_4BYTE)
+                {
+                    // Compute 8 byte results from 4 byte by 4 byte multiplication.
+                    emitIns_R_R_R(INS_umull, EA_8BYTE, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
 
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
-            }
-            break;
+                    // Get the high result by shifting dst.
+                    emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->GetRegNum(), 32);
+                }
+                else
+                {
+                    assert(attr == EA_8BYTE);
+                    // Compute the high result.
+                    emitIns_R_R_R(INS_umulh, attr, extraReg, src1->GetRegNum(), src2->GetRegNum());
 
-        case IF_DV_2H: // fmov, fcvtXX - to general
-            // fmov : FP transfer to general register
-            // fcvtaXX : FP convert from vector to general
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            break;
+                    // Now multiply without skewing the high result.
+                    emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
+                }
 
-        case IF_DV_2I: // fmov, Xcvtf - from general
-            switch (ins)
+                // zero-sign bit comparison to detect overflow.
+                emitIns_R_I(INS_cmp, attr, extraReg, 0);
+            }
+            else
             {
-                case INS_fmov:
-                    // FP transfer from general register
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    break;
+                int bitShift = 0;
+                if (attr == EA_4BYTE)
+                {
+                    // Compute 8 byte results from 4 byte by 4 byte multiplication.
+                    emitIns_R_R_R(INS_smull, EA_8BYTE, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
+
+                    // Get the high result by shifting dst.
+                    emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->GetRegNum(), 32);
+
+                    bitShift = 31;
+                }
+                else
+                {
+                    assert(attr == EA_8BYTE);
+                    // Save the high result in a temporary register.
+                    emitIns_R_R_R(INS_smulh, attr, extraReg, src1->GetRegNum(), src2->GetRegNum());
+
+                    // Now multiply without skewing the high result.
+                    emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
 
-                case INS_scvtf:
-                case INS_ucvtf:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_5C;
-                    break;
+                    bitShift = 63;
+                }
 
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
+                // Sign bit comparison to detect overflow.
+                emitIns_R_R_I(INS_cmp, attr, extraReg, dst->GetRegNum(), bitShift, INS_OPTS_ASR);
             }
-            break;
+        }
+        else
+        {
+            // We can just multiply.
+            emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
+        }
+    }
 
-        case IF_DV_3C: // mov,and, bic, eor, mov,mvn, orn, bsl, bit, bif,
-                       // tbl, tbx (vector)
-            switch (ins)
-            {
-                case INS_tbl:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_1C;
-                    break;
-                case INS_tbl_2regs:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3X;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    break;
-                case INS_tbl_3regs:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_4X;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
-                    break;
-                case INS_tbl_4regs:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3X;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
-                case INS_tbx:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3X;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    break;
-                case INS_tbx_2regs:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_4X;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
-                    break;
-                case INS_tbx_3regs:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_5X;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
-                case INS_tbx_4regs:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_6X;
-                    result.insLatency    = PERFSCORE_LATENCY_5C;
-                    break;
-                default:
-                    // All other instructions
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_1C;
-                    break;
-            }
-            break;
+    if (dst->gtOverflowEx())
+    {
+        assert(!varTypeIsFloating(dst));
+        codeGen->genCheckOverflow(dst);
+    }
 
-        case IF_DV_2E: // mov, dup (scalar)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
+    return dst->GetRegNum();
+}
 
-        case IF_DV_2F: // mov, ins (element)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
+#if defined(DEBUG) || defined(LATE_DISASM)
 
-        case IF_DV_2B: // smov, umov - to general)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
+void emitter::getMemoryOperation(instrDesc* id, unsigned* pMemAccessKind, bool* pIsLocalAccess)
+{
+    unsigned    memAccessKind = PERFSCORE_MEMORY_NONE;
+    bool        isLocalAccess = false;
+    instruction ins           = id->idIns();
 
-        case IF_DV_2C: // mov, dup, ins - from general)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            if (ins == INS_dup)
+    if (emitInsIsLoadOrStore(ins))
+    {
+        if (emitInsIsLoad(ins))
+        {
+            if (emitInsIsStore(ins))
             {
-                result.insLatency = PERFSCORE_LATENCY_3C;
+                memAccessKind = PERFSCORE_MEMORY_READ_WRITE;
             }
             else
             {
-                assert((ins == INS_ins) || (ins == INS_mov));
-                result.insLatency = PERFSCORE_LATENCY_2C;
+                memAccessKind = PERFSCORE_MEMORY_READ;
             }
-            break;
-
-        case IF_DV_2D: // dup (dvector)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
+        }
+        else
+        {
+            assert(emitInsIsStore(ins));
+            memAccessKind = PERFSCORE_MEMORY_WRITE;
+        }
 
-        case IF_DV_3A: // (vector)
-            // add, sub, mul, mla, mls, cmeq, cmge, cmgt, cmhi, cmhs, ctst,
-            // pmul, saba, uaba, sabd, uabd, umin, uminp, umax, umaxp, smin, sminp, smax, smaxp
-            switch (ins)
-            {
-                case INS_add:
-                case INS_sub:
-                case INS_cmeq:
-                case INS_cmge:
-                case INS_cmgt:
-                case INS_cmhi:
-                case INS_cmhs:
-                case INS_shadd:
-                case INS_shsub:
-                case INS_srhadd:
-                case INS_srshl:
-                case INS_sshl:
-                case INS_smax:
-                case INS_smaxp:
-                case INS_smin:
-                case INS_sminp:
-                case INS_umax:
-                case INS_umaxp:
-                case INS_umin:
-                case INS_uminp:
-                case INS_uhadd:
-                case INS_uhsub:
-                case INS_urhadd:
-                case INS_urshl:
-                case INS_ushl:
-                case INS_uzp1:
-                case INS_uzp2:
-                case INS_zip1:
-                case INS_zip2:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    break;
+        insFormat insFmt = id->idInsFmt();
 
-                case INS_trn1:
-                case INS_trn2:
-                    if (id->idInsOpt() == INS_OPTS_2D)
-                    {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    }
-                    else
-                    {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    }
+        switch (insFmt)
+        {
+            case IF_LS_1A:
+                isLocalAccess = true;
+                break;
 
-                    result.insLatency = PERFSCORE_LATENCY_2C;
-                    break;
+            case IF_LS_2A:
+            case IF_LS_2B:
+            case IF_LS_2C:
+            case IF_LS_2D:
+            case IF_LS_2E:
+            case IF_LS_2F:
+            case IF_LS_2G:
+            case IF_LS_3A:
+            case IF_LS_3F:
+            case IF_LS_3G:
+                if (isStackRegister(id->idReg2()))
+                {
+                    isLocalAccess = true;
+                }
+                break;
 
-                case INS_addp:
-                case INS_cmtst:
-                case INS_pmul:
-                case INS_sabd:
-                case INS_sqadd:
-                case INS_sqsub:
-                case INS_uabd:
-                case INS_uqadd:
-                case INS_uqsub:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
-                    break;
+            case IF_LS_3B:
+            case IF_LS_3C:
+            case IF_LS_3D:
+            case IF_LS_3E:
+                if (isStackRegister(id->idReg3()))
+                {
+                    isLocalAccess = true;
+                }
+                break;
+            case IF_LARGELDC:
+                isLocalAccess = false;
+                break;
 
-                case INS_mla:
-                case INS_mls:
-                case INS_mul:
-                case INS_sqdmulh:
-                case INS_sqrdmulh:
-                case INS_sqrshl:
-                case INS_sqshl:
-                case INS_uqrshl:
-                case INS_uqshl:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+            default:
+                assert(!"Logic Error");
+                memAccessKind = PERFSCORE_MEMORY_NONE;
+                break;
+        }
+    }
 
-                case INS_saba:
-                case INS_uaba:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+    *pMemAccessKind = memAccessKind;
+    *pIsLocalAccess = isLocalAccess;
+}
 
-                case INS_sdot:
-                case INS_udot:
-                    result.insLatency = PERFSCORE_LATENCY_4C;
-                    if (id->idOpSize() == EA_16BYTE)
-                    {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    }
-                    else
-                    {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    }
-                    break;
+//----------------------------------------------------------------------------------------
+// getInsExecutionCharacteristics:
+//    Returns the current instruction execution characteristics
+//
+// Arguments:
+//    id  - The current instruction descriptor to be evaluated
+//
+// Return Value:
+//    A struct containing the current instruction execution characteristics
+//
+// Notes:
+//    The instruction latencies and throughput values returned by this function
+//    are from
+//
+//    The Arm Cortex-A55 Software Optimization Guide:
+//    https://static.docs.arm.com/epm128372/20/arm_cortex_a55_software_optimization_guide_v2.pdf
+//
+emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(instrDesc* id)
+{
+    insExecutionCharacteristics result;
+    instruction                 ins    = id->idIns();
+    insFormat                   insFmt = id->idInsFmt();
 
-                case INS_addhn:
-                case INS_addhn2:
-                case INS_sabdl:
-                case INS_sabdl2:
-                case INS_saddl2:
-                case INS_saddl:
-                case INS_saddw:
-                case INS_saddw2:
-                case INS_ssubl:
-                case INS_ssubl2:
-                case INS_ssubw:
-                case INS_ssubw2:
-                case INS_subhn:
-                case INS_subhn2:
-                case INS_uabdl:
-                case INS_uabdl2:
-                case INS_uaddl:
-                case INS_uaddl2:
-                case INS_uaddw:
-                case INS_uaddw2:
-                case INS_usubl:
-                case INS_usubl2:
-                case INS_usubw:
-                case INS_usubw2:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
-                    break;
+    unsigned memAccessKind;
+    bool     isLocalAccess;
+    getMemoryOperation(id, &memAccessKind, &isLocalAccess);
 
-                case INS_raddhn:
-                case INS_raddhn2:
-                case INS_rsubhn:
-                case INS_rsubhn2:
-                case INS_sabal:
-                case INS_sabal2:
-                case INS_uabal:
-                case INS_uabal2:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+    result.insThroughput = PERFSCORE_THROUGHPUT_ILLEGAL;
+    result.insLatency    = PERFSCORE_LATENCY_ILLEGAL;
 
-                case INS_smlal:
-                case INS_smlal2:
-                case INS_smlsl:
-                case INS_smlsl2:
-                case INS_smull:
-                case INS_smull2:
-                case INS_sqdmlal:
-                case INS_sqdmlal2:
-                case INS_sqdmlsl:
-                case INS_sqdmlsl2:
-                case INS_sqdmull:
-                case INS_sqdmull2:
-                case INS_sqrdmlah:
-                case INS_sqrdmlsh:
-                case INS_umlal:
-                case INS_umlal2:
-                case INS_umlsl:
-                case INS_umlsl2:
-                case INS_umull:
-                case INS_umull2:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+    // Initialize insLatency based upon the instruction's memAccessKind and local access values
+    //
+    if (memAccessKind == PERFSCORE_MEMORY_READ)
+    {
+        result.insLatency = isLocalAccess ? PERFSCORE_LATENCY_RD_STACK : PERFSCORE_LATENCY_RD_GENERAL;
+    }
+    else if (memAccessKind == PERFSCORE_MEMORY_WRITE)
+    {
+        result.insLatency = isLocalAccess ? PERFSCORE_LATENCY_WR_STACK : PERFSCORE_LATENCY_WR_GENERAL;
+    }
+    else if (memAccessKind == PERFSCORE_MEMORY_READ_WRITE)
+    {
+        result.insLatency = isLocalAccess ? PERFSCORE_LATENCY_RD_WR_STACK : PERFSCORE_LATENCY_RD_WR_GENERAL;
+    }
 
-                case INS_pmull:
-                case INS_pmull2:
-                    if ((id->idInsOpt() == INS_OPTS_8B) || (id->idInsOpt() == INS_OPTS_16B))
-                    {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                        result.insLatency    = PERFSCORE_LATENCY_3C;
-                    }
-                    else
-                    {
-                        // Crypto polynomial (64x64) multiply long
-                        assert((id->idInsOpt() == INS_OPTS_1D) || (id->idInsOpt() == INS_OPTS_2D));
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                        result.insLatency    = PERFSCORE_LATENCY_2C;
-                    }
-                    break;
+    switch (insFmt)
+    {
+            //
+            //  Branch Instructions
+            //
 
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
-            }
+        case IF_BI_0A:                                      // b, bl_local
+        case IF_BI_0C:                                      // bl, b_tail
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C; // but is Dual Issue
+            result.insLatency    = PERFSCORE_LATENCY_1C;
             break;
 
-        case IF_DV_3DI: // fmul, fmulx, fmla, fmls (scalar by element)
+        case IF_BI_0B: // beq, bne, bge, blt, bgt, ble, ...
+        case IF_BI_1A: // cbz, cbnz
+        case IF_BI_1B: // tbz, tbnz
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_4C;
+            result.insLatency    = PERFSCORE_LATENCY_1C;
             break;
 
-        case IF_DV_3E: // add, sub, cmeq, cmge, cmgt, cmhi, cmhs, ctst, (scalar)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+        case IF_LARGEJMP: // bcc + b
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
             result.insLatency    = PERFSCORE_LATENCY_2C;
             break;
 
-        case IF_DV_3G: // ext
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
+        case IF_BR_1B: // blr, br_tail
+            if (ins == INS_blr)
+            {
+                result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                result.insLatency    = PERFSCORE_LATENCY_1C;
+                break;
+            }
+            // otherwise we should have a br_tail instruction
+            assert(ins == INS_br_tail);
+            FALLTHROUGH;
+        case IF_BR_1A: // ret, br
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_1C;
             break;
 
-        case IF_DV_2L: // abs, neg, cmeq, cmge, cmgt, cmle, cmlt (scalar)
-        case IF_DV_2M: // (vector)
-            // abs, neg, mvn, not, cmeq, cmge, cmgt, cmle, cmlt,
-            // addv, saddlv,  uaddlv, smaxv, sminv, umaxv, uminv
-            // cls, clz, cnt, rbit, rev16, rev32, rev64,
-            // xtn, xtn2, shll, shll2
+        //
+        //  Arithmetic and logical instructions
+        //
+
+        // ALU, basic
+        case IF_DR_3A: // add, adds, adc, adcs, and, ands, bic, bics,
+                       // eon, eor, orn, orr, sub, subs, sbc, sbcs
+                       // asr, asrv, lsl, lslv, lsr, lsrv, ror, rorv
+                       // sdiv, udiv, mul, smull, smulh, umull, umulh, mneg
+        case IF_DR_2A: // cmp, cmn, tst
+
             switch (ins)
             {
-                case INS_abs:
-                case INS_sqneg:
-                case INS_suqadd:
-                case INS_usqadd:
-                    if (id->idOpSize() == EA_16BYTE)
+                case INS_mul:
+                case INS_smull:
+                case INS_umull:
+                case INS_mneg:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                    break;
+
+                case INS_smulh:
+                case INS_umulh:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_6C;
+                    break;
+
+                case INS_sdiv:
+                case INS_udiv:
+                    if (id->idOpSize() == EA_4BYTE)
                     {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
+                        result.insLatency    = PERFSCORE_LATENCY_12C;
+                        break;
                     }
                     else
                     {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                        assert(id->idOpSize() == EA_8BYTE);
+                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
+                        result.insLatency    = PERFSCORE_LATENCY_20C;
+                        break;
                     }
 
-                    result.insLatency = PERFSCORE_LATENCY_3C;
-                    break;
-
-                case INS_addv:
-                case INS_saddlv:
-                case INS_uaddlv:
-                case INS_cls:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                case INS_add:
+                case INS_adds:
+                case INS_adc:
+                case INS_adcs:
+                case INS_and:
+                case INS_ands:
+                case INS_bic:
+                case INS_bics:
+                case INS_eon:
+                case INS_eor:
+                case INS_orn:
+                case INS_orr:
+                case INS_sub:
+                case INS_subs:
+                case INS_sbc:
+                case INS_sbcs:
+                case INS_asr:
+                case INS_lsl:
+                case INS_lsr:
+                case INS_ror:
+                case INS_cmp:
+                case INS_cmn:
+                case INS_tst:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_1C;
                     break;
 
-                case INS_sminv:
-                case INS_smaxv:
-                case INS_uminv:
-                case INS_umaxv:
+                case INS_asrv:
+                case INS_lslv:
+                case INS_lsrv:
+                case INS_rorv:
+                    // variable shift by register
                     result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    result.insLatency    = PERFSCORE_LATENCY_1C;
                     break;
 
-                case INS_cmeq:
-                case INS_cmge:
-                case INS_cmgt:
-                case INS_cmle:
-                case INS_cmlt:
-
-                case INS_clz:
-                case INS_cnt:
-                case INS_rbit:
-                case INS_rev16:
-                case INS_rev32:
-                case INS_rev64:
-                case INS_xtn:
-                case INS_xtn2:
+                case INS_crc32b:
+                case INS_crc32h:
+                case INS_crc32cb:
+                case INS_crc32ch:
+                case INS_crc32x:
+                case INS_crc32cx:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
                     result.insLatency    = PERFSCORE_LATENCY_2C;
                     break;
 
-                case INS_mvn:
-                case INS_not:
-                case INS_neg:
-                case INS_shll:
-                case INS_shll2:
+                case INS_crc32w:
+                case INS_crc32cw:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
                     result.insLatency    = PERFSCORE_LATENCY_1C;
                     break;
 
-                case INS_sqabs:
-                case INS_sqxtn:
-                case INS_sqxtn2:
-                case INS_sqxtun:
-                case INS_sqxtun2:
-                case INS_uqxtn:
-                case INS_uqxtn2:
+                case INS_smaddl:
+                case INS_smsubl:
+                case INS_smnegl:
+                case INS_umaddl:
+                case INS_umsubl:
+                case INS_umnegl:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
                     break;
 
                 default:
@@ -27262,733 +14832,689 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             }
             break;
 
-        case IF_DV_2N: // sshr, ssra, srshr, srsra, shl, ushr, usra, urshr, ursra, sri, sli (shift by immediate -
-                       // scalar)
-        case IF_DV_2O: // sshr, ssra, srshr, srsra, shl, ushr, usra, urshr, ursra, sri, sli (shift by immediate -
-                       // vector)
-                       // sshll, sshll2, ushll, ushll2, shrn, shrn2, rshrn, rshrn2, sxrl, sxl2, uxtl, uxtl2
-            switch (ins)
-            {
-                case INS_shl:
-                case INS_shrn:
-                case INS_shrn2:
-                case INS_sli:
-                case INS_sri:
-                case INS_sshr:
-                case INS_ushr:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    break;
-
-                case INS_shll:
-                case INS_shll2:
-                case INS_sshll:
-                case INS_sshll2:
-                case INS_ushll:
-                case INS_ushll2:
-                case INS_sxtl:
-                case INS_sxtl2:
-                case INS_uxtl:
-                case INS_uxtl2:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    break;
-
-                case INS_rshrn:
-                case INS_rshrn2:
-                case INS_srshr:
-                case INS_sqshrn:
-                case INS_sqshrn2:
-                case INS_ssra:
-                case INS_urshr:
-                case INS_uqshrn:
-                case INS_uqshrn2:
-                case INS_usra:
-                    if (id->idOpSize() == EA_16BYTE)
-                    {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                        result.insLatency    = PERFSCORE_LATENCY_3C;
-                    }
-                    else
-                    {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                        result.insLatency    = PERFSCORE_LATENCY_3C;
-                    }
-                    break;
+        // ALU, basic immediate
+        case IF_DI_1A: // cmp, cmn
+        case IF_DI_1C: // tst
+        case IF_DI_1D: // mov reg, imm(N,r,s)
+        case IF_DI_1E: // adr, adrp
+        case IF_DI_1F: // ccmp, ccmn
+        case IF_DI_2A: // add, adds, suv, subs
+        case IF_DI_2C: // and, ands, eor, orr
 
-                case INS_srsra:
-                case INS_ursra:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_1C;
+            break;
 
-                case INS_sqrshrn:
-                case INS_sqrshrn2:
-                case INS_sqrshrun:
-                case INS_sqrshrun2:
-                case INS_sqshrun:
-                case INS_sqshrun2:
-                case INS_sqshl:
-                case INS_sqshlu:
-                case INS_uqrshrn:
-                case INS_uqrshrn2:
-                case INS_uqshl:
-                    if (id->idOpSize() == EA_16BYTE)
-                    {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
-                    else
-                    {
-                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                        result.insLatency    = PERFSCORE_LATENCY_4C;
-                    }
-                    break;
+        case IF_DR_2D: // cinc, cinv, cneg
+        case IF_DR_2E: // mov, neg, mvn, negs
+        case IF_DI_1B: // mov, movk, movn, movz
 
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
-            }
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_1C;
             break;
 
-        case IF_DV_2P: // aese, aesd, aesmc, aesimc, sha1su1, sha256su0
+        case IF_LARGEADR: // adrp + add
+        case IF_LARGELDC: // adrp + ldr
+
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency    = PERFSCORE_LATENCY_2C;
             break;
 
-        case IF_DV_3F: // sha1c, sha1m, sha1p, sha1su0, sha256h, sha256h2, sha256su1 (vector)
-            switch (ins)
-            {
-                case INS_sha1su0:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    break;
-
-                case INS_sha256su0:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
-                    break;
-
-                case INS_sha1c:
-                case INS_sha1m:
-                case INS_sha1p:
-                case INS_sha256h:
-                case INS_sha256h2:
-                case INS_sha256su1:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+        // ALU, shift by immediate
+        case IF_DR_3B: // add, adds, and, ands, bic, bics,
+                       // eon, eor, orn, orr, sub, subs
+        case IF_DR_2B: // cmp, cmn, tst
+        case IF_DR_2F: // neg, negs, mvn
+        case IF_DI_2B: // ror
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
 
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
-            }
+        // ALU, extend, scale
+        case IF_DR_3C: // add, adc, and, bic, eon, eor, orn, orr, sub, sbc
+        case IF_DR_2C: // cmp
+        case IF_DV_2U: // sha1h
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
             break;
+        // ALU, Conditional select
+        case IF_DR_1D: // cset, csetm
+        case IF_DR_3D: // csel, csinc, csinv, csneg
 
-        case IF_SI_0A: // brk   imm16
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
             result.insLatency    = PERFSCORE_LATENCY_1C;
             break;
 
-        case IF_SR_1A:
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+        // ALU, Conditional compare
+        case IF_DR_2I: // ccmp , ccmn
+
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
             result.insLatency    = PERFSCORE_LATENCY_1C;
             break;
 
-        case IF_DV_2T: // addv, saddlv, smaxv, sminv, uaddlv, umaxv, uminv
-            switch (ins)
+        // Multiply accumulate
+        case IF_DR_4A: // madd, msub, smaddl, smsubl, umaddl, umsubl
+            if (id->idOpSize() == EA_4BYTE)
             {
-                case INS_addv:
-                case INS_saddlv:
-                case INS_uaddlv:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
-                    break;
+                result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                result.insLatency    = PERFSCORE_LATENCY_3C;
+                break;
+            }
+            else
+            {
+                assert(id->idOpSize() == EA_8BYTE);
+                result.insThroughput = PERFSCORE_THROUGHPUT_5C;
+                result.insLatency    = PERFSCORE_LATENCY_3C;
+                break;
+            }
 
-                case INS_smaxv:
-                case INS_sminv:
-                case INS_umaxv:
-                case INS_uminv:
-                case INS_sha256h2:
-                case INS_sha256su1:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+        // Miscellaneous Data Preocessing instructions
+        case IF_DR_3E: // extr
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
 
-                case INS_sadalp:
-                case INS_uadalp:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
+        case IF_DR_2H: // sxtb, sxth, sxtw, uxtb, uxth, sha1h
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_1C;
+            break;
 
-                case INS_saddlp:
-                case INS_uaddlp:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
-                    break;
+        case IF_DI_2D: // lsl, lsr, asr, sbfm, bfm, ubfm, sbfiz, bfi, ubfiz, sbfx, bfxil, ubfx
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
 
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
+        case IF_DR_2G: // mov sp, cls, clz, rbit, rev16, rev32, rev
+            if (ins == INS_rbit)
+            {
+                result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                result.insLatency    = PERFSCORE_LATENCY_2C;
+                break;
             }
+            else
+            {
+                result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                result.insLatency    = PERFSCORE_LATENCY_1C;
+                break;
+            }
+
+            //
+            //  Load/Store Instructions
+            //
+
+        case IF_LS_1A: // ldr, ldrsw (literal, pc relative immediate)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             break;
 
-        // SVE latencies from Arm Neoverse N2 Software Optimization Guide, Issue 5.0, Revision: r0p3
+        case IF_LS_2A: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh (no immediate)
+                       // ldar, ldarb, ldarh, ldapr, ldaprb, ldaprh, ldxr, ldxrb, ldxrh,
+                       // ldaxr, ldaxrb, ldaxrh, stlr, stlrb, stlrh
 
-        // Predicate logical
-        case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated)
-            result.insLatency    = PERFSCORE_LATENCY_1C;
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            // ToDo: store release have 2/4 cycle latency
             break;
 
-        // Arithmetic, basic
-        case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated)
-        case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated)
-        // Max/min, basic and pairwise
-        case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated)
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+        case IF_LS_2B: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh (scaled immediate)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             break;
 
-        // Divides, 32 bit (Note: worse for 64 bit)
-        case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated)
-            result.insLatency    = PERFSCORE_LATENCY_12C;    // 7 to 12
-            result.insThroughput = PERFSCORE_THROUGHPUT_11C; // 1/11 to 1/7
+        case IF_LS_2C: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh
+                       // ldur, ldurb, ldurh, ldursb, ldursh, ldursw, stur, sturb, sturh
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             break;
 
-        // Multiply, B, H, S element size (Note: D element size is slightly slower)
-        case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated)
-            result.insLatency    = PERFSCORE_LATENCY_4C;
+        case IF_LS_3A: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb strh (register extend, scale 2,4,8)
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             break;
 
-        // Reduction, logical
-        case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated)
-            result.insLatency    = PERFSCORE_LATENCY_6C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+        case IF_LS_3B: // ldp, ldpsw, ldnp, stp, stnp  (load/store pair zero offset)
+        case IF_LS_3C: // load/store pair with offset pre/post inc
+            if (memAccessKind == PERFSCORE_MEMORY_READ)
+            {
+                // ldp, ldpsw, ldnp
+                result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                if (emitIGisInEpilog(emitCurIG) && (ins == INS_ldp))
+                {
+                    // Reduce latency for ldp instructions in the epilog
+                    //
+                    result.insLatency = PERFSCORE_LATENCY_2C;
+                }
+                else if (id->idOpSize() == EA_8BYTE) // X-form
+                {
+                    // the X-reg variant has an extra cycle of latency
+                    // and two cycle throughput
+                    result.insLatency += 1.0;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                }
+            }
+            else // store instructions
+            {
+                // stp, stnp
+                assert(memAccessKind == PERFSCORE_MEMORY_WRITE);
+                result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            }
             break;
 
-        case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated)
+        case IF_LS_3D: // stxr, stxrb, stxrh, stlxr, stlxrb, srlxrh
+            // Store exclusive register, returning status
+            assert(emitInsIsStore(ins));
+            // @ToDo - find out the actual latency
             result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insLatency    = max(PERFSCORE_LATENCY_4C, result.insLatency);
             break;
 
-        // Reduction, arithmetic, D form (worse for B, S and H)
-        case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated)
-        // Reduction, arithmetic, D form (worse for B, S and H)
-        case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated)
-            result.insLatency    = PERFSCORE_LATENCY_4C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+        case IF_LS_3E: //  ARMv8.1 LSE Atomics
+            if (memAccessKind == PERFSCORE_MEMORY_WRITE)
+            {
+                // staddb, staddlb, staddh, staddlh, stadd. staddl
+                result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                result.insLatency    = PERFSCORE_LATENCY_2C;
+            }
+            else
+            {
+                assert(memAccessKind == PERFSCORE_MEMORY_READ_WRITE);
+                result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                result.insLatency    = max(PERFSCORE_LATENCY_3C, result.insLatency);
+            }
             break;
 
-        case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated)
+        case IF_LS_2D:
+        case IF_LS_2E:
+        case IF_LS_3F:
+            // Load/Store multiple structures
+            // Load single structure and replicate
             switch (ins)
             {
-                case INS_sve_asr:
-                case INS_sve_lsl:
-                case INS_sve_lsr:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                case INS_ld1:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insLatency    = PERFSCORE_LATENCY_3C;
+                    }
+                    else
+                    {
+                        // Q-form
+                        assert(id->idOpSize() == EA_16BYTE);
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
                     break;
-                case INS_sve_srshr:
-                case INS_sve_sqshl:
-                case INS_sve_urshr:
-                case INS_sve_sqshlu:
-                case INS_sve_uqshl:
-                case INS_sve_asrd:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
+
+                case INS_ld1_2regs:
+                case INS_ld2:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
+                    else
+                    {
+                        // Q-form
+                        assert(id->idOpSize() == EA_16BYTE);
+                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
+                        result.insLatency    = PERFSCORE_LATENCY_6C;
+                    }
                     break;
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
+
+                case INS_ld1_3regs:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                        result.insLatency    = PERFSCORE_LATENCY_5C;
+                    }
+                    else
+                    {
+                        // Q-form
+                        assert(id->idOpSize() == EA_16BYTE);
+                        result.insThroughput = PERFSCORE_THROUGHPUT_6C;
+                        result.insLatency    = PERFSCORE_LATENCY_8C;
+                    }
                     break;
-            }
-            break;
 
-        // Arithmetic, shift
-        case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated)
-        case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated)
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
+                case INS_ld1_4regs:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
+                        result.insLatency    = PERFSCORE_LATENCY_6C;
+                    }
+                    else
+                    {
+                        // Q-form
+                        assert(id->idOpSize() == EA_16BYTE);
+                        result.insThroughput = PERFSCORE_THROUGHPUT_8C;
+                        result.insLatency    = PERFSCORE_LATENCY_10C;
+                    }
+                    break;
 
-        // Count/reverse bits
-        // Arithmetic, basic
-        // Floating point absolute value/difference
-        // Floating point arithmetic
-        // Logical
-        case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated)
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            break;
+                case INS_ld3:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D-form
+                        if (optGetElemsize(id->idInsOpt()) == EA_4BYTE)
+                        {
+                            // S
+                            result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                            result.insLatency    = PERFSCORE_LATENCY_5C;
+                        }
+                        else
+                        {
+                            // B/H
+                            result.insThroughput = PERFSCORE_THROUGHPUT_4C;
+                            result.insLatency    = PERFSCORE_LATENCY_6C;
+                        }
+                    }
+                    else
+                    {
+                        // Q-form
+                        assert(id->idOpSize() == EA_16BYTE);
+                        if ((optGetElemsize(id->idInsOpt()) == EA_4BYTE) ||
+                            (optGetElemsize(id->idInsOpt()) == EA_8BYTE))
+                        {
+                            // S/D
+                            result.insThroughput = PERFSCORE_THROUGHPUT_6C;
+                            result.insLatency    = PERFSCORE_LATENCY_8C;
+                        }
+                        else
+                        {
+                            // B/H
+                            result.insThroughput = PERFSCORE_THROUGHPUT_7C;
+                            result.insLatency    = PERFSCORE_LATENCY_9C;
+                        }
+                    }
+                    break;
 
-        case IF_SVE_AQ_3A:
-            switch (ins)
-            {
-                // Arithmetic, basic
-                case INS_sve_abs:
-                case INS_sve_neg:
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                case INS_ld4:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D-form
+                        if (optGetElemsize(id->idInsOpt()) == EA_4BYTE)
+                        {
+                            // S
+                            result.insThroughput = PERFSCORE_THROUGHPUT_4C;
+                            result.insLatency    = PERFSCORE_LATENCY_6C;
+                        }
+                        else
+                        {
+                            // B/H
+                            result.insThroughput = PERFSCORE_THROUGHPUT_5C;
+                            result.insLatency    = PERFSCORE_LATENCY_7C;
+                        }
+                    }
+                    else
+                    {
+                        // Q-form
+                        assert(id->idOpSize() == EA_16BYTE);
+                        if ((optGetElemsize(id->idInsOpt()) == EA_4BYTE) ||
+                            (optGetElemsize(id->idInsOpt()) == EA_8BYTE))
+                        {
+                            // S/D
+                            result.insThroughput = PERFSCORE_THROUGHPUT_8C;
+                            result.insLatency    = PERFSCORE_LATENCY_10C;
+                        }
+                        else
+                        {
+                            // B/H
+                            result.insThroughput = PERFSCORE_THROUGHPUT_9C;
+                            result.insLatency    = PERFSCORE_LATENCY_11C;
+                        }
+                    }
+                    break;
+
+                case INS_ld1r:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                    break;
+
+                case INS_ld2r:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
+                    else
+                    {
+                        // B/H/S
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insLatency    = PERFSCORE_LATENCY_3C;
+                    }
+                    break;
+
+                case INS_ld3r:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D
+                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                        result.insLatency    = PERFSCORE_LATENCY_5C;
+                    }
+                    else
+                    {
+                        // B/H/S
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
                     break;
 
-                // Extend, sign or zero
-                case INS_sve_sxtb:
-                case INS_sve_sxth:
-                case INS_sve_sxtw:
-                case INS_sve_uxtb:
-                case INS_sve_uxth:
-                case INS_sve_uxtw:
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                case INS_ld4r:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D
+                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
+                        result.insLatency    = PERFSCORE_LATENCY_6C;
+                    }
+                    else
+                    {
+                        // B/H/S
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
                     break;
 
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
+                case INS_st1:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_1C;
                     break;
-            }
-            break;
-
-        case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend
-                           // (predicated)
-        case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand
-                           // (predicated)
-        case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
-        case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high
-                           // (unpredicated)
-        case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
-        case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
-        case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
-        case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
-        case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
-        case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
-        case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
-        case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
-        case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_5C;
-            break;
-
-        case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
-        case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_4C;
-            break;
-
-        case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
-        case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            break;
 
-        case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
-            switch (ins)
-            {
-                case INS_sve_fdot:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                case INS_sve_bfdot:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
+                case INS_st1_2regs:
+                case INS_st2:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insLatency    = PERFSCORE_LATENCY_1C;
+                    }
+                    else
+                    {
+                        // Q-form
+                        assert(id->idOpSize() == EA_16BYTE);
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                        result.insLatency    = PERFSCORE_LATENCY_2C;
+                    }
                     break;
-            }
-            break;
-
-        case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
-        case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-            result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-            break;
-
-        case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
-        case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register
-                           // increment)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_8C;
-            break;
-
-        case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
-        case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            break;
-
-        case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
-        case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements
-        case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements
-        case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
-        // Conditional extract operations, SIMD&FP scalar and vector forms
-        case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements
-        case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector
-        case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
-
-        // Conditional extract operations, scalar form
-        case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register
-            result.insLatency    = PERFSCORE_LATENCY_8C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
-
-        // Copy, scalar SIMD&FP or imm
-        case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector
-                           // (predicated)
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            break;
 
-        // Copy, scalar
-        case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated)
-            result.insLatency    = PERFSCORE_LATENCY_5C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
-
-        case IF_SVE_CT_3A: // ................ ...gggnnnnnddddd -- SVE reverse doublewords
-            result.insThroughput = PERFSCORE_THROUGHPUT_140C; // @ToDo Currently undocumented.
-            result.insLatency    = PERFSCORE_LATENCY_140C;
-            break;
-
-        case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
-        case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
-
-        case IF_SVE_CX_4A:   // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
-        case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
-        case IF_SVE_CY_3A:   // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate
-        case IF_SVE_CY_3B:   // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate
-        case IF_SVE_EG_3A:   // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed)
-        case IF_SVE_EY_3A:   // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed)
-        case IF_SVE_FE_3A:   // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
-        case IF_SVE_FE_3B:   // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
-        case IF_SVE_FG_3A:   // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
-        case IF_SVE_FG_3B:   // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
-        case IF_SVE_FH_3A:   // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
-        case IF_SVE_FH_3B:   // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
-        case IF_SVE_FJ_3A:   // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
-        case IF_SVE_FJ_3B:   // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
-            result.insLatency    = PERFSCORE_LATENCY_4C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
-
-        case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed)
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            break;
-
-        case IF_SVE_CZ_4A:   // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-        case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-        case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-        case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
-            switch (ins)
-            {
-                case INS_sve_mov:
-                case INS_sve_and:
-                case INS_sve_orr:
-                case INS_sve_eor:
-                case INS_sve_bic:
-                case INS_sve_orn:
-                case INS_sve_not:
-                case INS_sve_sel:
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                case INS_st1_3regs:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                        result.insLatency    = PERFSCORE_LATENCY_2C;
+                    }
+                    else
+                    {
+                        // Q-form
+                        assert(id->idOpSize() == EA_16BYTE);
+                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                        result.insLatency    = PERFSCORE_LATENCY_3C;
+                    }
                     break;
 
-                case INS_sve_bics:
-                case INS_sve_eors:
-                case INS_sve_nots:
-                case INS_sve_ands:
-                case INS_sve_orrs:
-                case INS_sve_orns:
-                case INS_sve_nors:
-                case INS_sve_nands:
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                case INS_st1_4regs:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                        result.insLatency    = PERFSCORE_LATENCY_2C;
+                    }
+                    else
+                    {
+                        // Q-form
+                        assert(id->idOpSize() == EA_16BYTE);
+                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
                     break;
 
-                case INS_sve_nor:
-                case INS_sve_nand:
-                    result.insLatency    = PERFSCORE_LATENCY_1C;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                case INS_st3:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
                     break;
 
-                case INS_sve_movs:
-                    result.insLatency    = PERFSCORE_LATENCY_1C;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                case INS_st4:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                        result.insLatency    = PERFSCORE_LATENCY_3C;
+                    }
+                    else
+                    {
+                        assert(id->idOpSize() == EA_16BYTE);
+                        if (optGetElemsize(id->idInsOpt()) == EA_8BYTE)
+                        {
+                            // D
+                            result.insThroughput = PERFSCORE_THROUGHPUT_4C;
+                            result.insLatency    = PERFSCORE_LATENCY_4C;
+                        }
+                        else
+                        {
+                            // B/H/S
+                            result.insThroughput = PERFSCORE_THROUGHPUT_5C;
+                            result.insLatency    = PERFSCORE_LATENCY_5C;
+                        }
+                    }
                     break;
 
                 default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
+                    unreached();
             }
             break;
 
-        case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition
-        case IF_SVE_DC_3A: // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition
+        case IF_LS_2F:
+        case IF_LS_2G:
+        case IF_LS_3G:
+            // Load/Store single structure
             switch (ins)
             {
-                case INS_sve_brkpa:
-                case INS_sve_brkpb:
-                case INS_sve_brkn:
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                case INS_ld1:
                     result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    break;
-
-                case INS_sve_brkpas:
-                case INS_sve_brkpbs:
-                case INS_sve_brkns:
                     result.insLatency    = PERFSCORE_LATENCY_3C;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
                     break;
 
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
+                case INS_ld2:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
+                    else
+                    {
+                        // B/H/S
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insLatency    = PERFSCORE_LATENCY_3C;
+                    }
                     break;
-            }
-            break;
 
-        case IF_SVE_DB_3A: // ................ ..gggg.NNNNMDDDD -- SVE partition break condition
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            break;
+                case INS_ld3:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D
+                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                        result.insLatency    = PERFSCORE_LATENCY_5C;
+                    }
+                    else
+                    {
+                        // B/H/S
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
+                    break;
 
-        case IF_SVE_DB_3B: // ................ ..gggg.NNNN.DDDD -- SVE partition break condition
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            break;
+                case INS_ld4:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D
+                        result.insThroughput = PERFSCORE_THROUGHPUT_4C;
+                        result.insLatency    = PERFSCORE_LATENCY_6C;
+                    }
+                    else
+                    {
+                        // B/H/S
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
+                    break;
 
-        case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            break;
+                case INS_st1:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_1C;
+                    break;
 
-        case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize
-            switch (ins)
-            {
-                case INS_sve_ptrue:
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                case INS_st2:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                        result.insLatency    = PERFSCORE_LATENCY_2C;
+                    }
+                    else
+                    {
+                        // B/H/S
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insLatency    = PERFSCORE_LATENCY_1C;
+                    }
                     break;
 
-                case INS_sve_ptrues:
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                case INS_st3:
+                case INS_st4:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
                     break;
 
                 default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
+                    unreached();
             }
             break;
 
-        case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            break;
+        case IF_SN_0A: // nop, yield, align
 
-        case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated)
-            switch (ins)
+            if (id->idIns() == INS_align)
             {
-                case INS_sve_rdffr:
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    break;
-
-                case INS_sve_rdffrs:
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    break;
+                if ((id->idInsOpt() == INS_OPTS_NONE) || ((instrDescAlign*)id)->isPlacedAfterJmp)
+                {
+                    // Either we're not going to generate 'align' instruction, or the 'align'
+                    // instruction is placed immediately after unconditional jmp.
+                    // In both cases, don't count for PerfScore.
 
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
+                    result.insThroughput = PERFSCORE_THROUGHPUT_ZERO;
+                    result.insLatency    = PERFSCORE_LATENCY_ZERO;
                     break;
+                }
             }
-            break;
-
-        case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated)
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
-
-        case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            break;
-
-        case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test
-            result.insLatency    = PERFSCORE_LATENCY_1C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            break;
-
-        case IF_SVE_DK_3A: // ........xx...... ..gggg.NNNNddddd -- SVE predicate count
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            break;
-
-        case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
-        case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
-
-        // Extract/insert operation, SIMD and FP scalar form
-        case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
-
-        // Extract/insert operation, scalar
-        case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register
-            result.insLatency    = PERFSCORE_LATENCY_5C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
-
-        // Count/reverse bits
-        // Reverse, vector
-        case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            break;
-
-        // Arithmetic, pairwise add
-        // Max/min, basic and pairwise
-        case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            break;
-
-        case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated)
-            switch (ins)
+            else if (ins == INS_yield)
             {
-                // Arithmetic, complex
-                case INS_sve_sqabs:
-                case INS_sve_sqneg:
-                    // Reciprocal estimate
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                    break;
-
-                // Reciprocal estimate
-                case INS_sve_urecpe:
-                case INS_sve_ursqrte:
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    break;
-
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
+                // @ToDo - find out the actual latency, match x86/x64 for now
+                result.insThroughput = PERFSCORE_THROUGHPUT_140C;
+                result.insLatency    = PERFSCORE_LATENCY_140C;
             }
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_ZERO;
             break;
 
-        // Arithmetic, complex
-        case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+        case IF_SI_0B: // dmb, dsb, isb
+            // @ToDo - find out the actual latency
+            result.insThroughput = PERFSCORE_THROUGHPUT_10C;
+            result.insLatency    = PERFSCORE_LATENCY_10C;
             break;
 
-        // Arithmetic, shift complex
-        case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left
-                           // (predicated)
+        case IF_DV_2J: // fcvt  Vd Vn
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
             result.insLatency    = PERFSCORE_LATENCY_4C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             break;
 
-        // Arithmetic, pairwise add and accum long
-        case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long
-            result.insLatency    = PERFSCORE_LATENCY_4C;
+        case IF_DV_2K: // fcmp  Vd Vn
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_1C;
             break;
 
-        // Floating point arithmetic
-        // Floating point min/max pairwise
-        case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations
-            result.insLatency    = PERFSCORE_LATENCY_2C;
+        case IF_DV_1A: // fmov - immediate (scalar)
             result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_1C;
             break;
 
-        // Floating point reduction, F64. (Note: Worse for F32 and F16)
-        case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+        case IF_DV_1B: // fmov, orr, bic, movi, mvni  (immediate vector)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_1C;
             break;
 
-        // Floating point associative add, F64. (Note: Worse for F32 and F16)
-        case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated)
-            result.insLatency    = PERFSCORE_LATENCY_4C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+        case IF_DV_1C: // fcmp vn, #0.0
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
             break;
 
-        case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated)
+        case IF_DV_2A: // fabs, fneg, fsqrt, fcvtXX, frintX, scvtf, ucvtf, fcmXX (vector)
             switch (ins)
             {
-                // Floating point absolute value/difference
-                case INS_sve_fabd:
-                // Floating point min/max
-                case INS_sve_fmax:
-                case INS_sve_fmaxnm:
-                case INS_sve_fmin:
-                case INS_sve_fminnm:
-                // Floating point arithmetic
-                case INS_sve_fadd:
-                case INS_sve_fsub:
-                case INS_sve_fsubr:
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                case INS_fabs:
+                case INS_fneg:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency = (id->idOpSize() == EA_8BYTE) ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_3C / 2;
                     break;
 
-                // Floating point divide, F64 (Note: Worse for F32, F16)
-                case INS_sve_fdiv:
-                case INS_sve_fdivr:
-                    result.insLatency    = PERFSCORE_LATENCY_15C;    // 7 to 15
-                    result.insThroughput = PERFSCORE_THROUGHPUT_14C; // 1/14 to 1/7
+                case INS_fsqrt:
+                    if ((id->idInsOpt() == INS_OPTS_2S) || (id->idInsOpt() == INS_OPTS_4S))
+                    {
+                        // S-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                        result.insLatency    = PERFSCORE_LATENCY_11C;
+                    }
+                    else
+                    {
+                        // D-form
+                        assert(id->idInsOpt() == INS_OPTS_2D);
+                        result.insThroughput = PERFSCORE_THROUGHPUT_6C;
+                        result.insLatency    = PERFSCORE_LATENCY_18C;
+                    }
                     break;
 
-                // Floating point multiply
-                case INS_sve_fmul:
-                case INS_sve_fmulx:
-                case INS_sve_fscale:
-                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                case INS_fcvtas:
+                case INS_fcvtau:
+                case INS_fcvtms:
+                case INS_fcvtmu:
+                case INS_fcvtns:
+                case INS_fcvtnu:
+                case INS_fcvtps:
+                case INS_fcvtpu:
+                case INS_fcvtzs:
+                case INS_fcvtzu:
+                case INS_frinta:
+                case INS_frinti:
+                case INS_frintm:
+                case INS_frintn:
+                case INS_frintp:
+                case INS_frintx:
+                case INS_frintz:
+                case INS_scvtf:
+                case INS_ucvtf:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+
+                case INS_fcmeq:
+                case INS_fcmge:
+                case INS_fcmgt:
+                case INS_fcmle:
+                case INS_fcmlt:
+                case INS_frecpe:
+                case INS_frsqrte:
+                case INS_urecpe:
+                case INS_ursqrte:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
                     break;
 
-                case INS_sve_famax:
-                case INS_sve_famin:
-                    result.insLatency    = PERFSCORE_LATENCY_20C;    // TODO-SVE: Placeholder
-                    result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder
+                case INS_fcvtl:
+                case INS_fcvtl2:
+                case INS_fcvtn:
+                case INS_fcvtn2:
+                case INS_fcvtxn:
+                case INS_fcvtxn2:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
 
                 default:
@@ -27998,198 +15524,172 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             }
             break;
 
-        // Floating point round to integral, F64. (Note: Worse for F32 and F16)
-        case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            break;
-
-        case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations
+        case IF_DV_2G: // fmov, fabs, fneg, fsqrt, fcmXX, fcvtXX, frintX, scvtf, ucvtf (scalar)
             switch (ins)
             {
-                // Floating point reciprocal estimate, F64. (Note: Worse for F32 and F16)
-                case INS_sve_frecpx:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                case INS_fmov:
+                    // FP move, vector register
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
                     result.insLatency    = PERFSCORE_LATENCY_1C;
                     break;
 
-                // Floating point square root F64. (Note: Worse for F32 and F16)
-                case INS_sve_fsqrt:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_16C;
-                    result.insLatency    = PERFSCORE_LATENCY_14C;
-                    break;
-
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
-            }
-            break;
-
-        case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
-        case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
-        case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
-        case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
-        case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_7C;
-            break;
-
-        case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise
-        case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
-        case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
-        case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
-        case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_1C;
-            break;
-
-        // Not available in Arm Neoverse N2 Software Optimization Guide.
-        case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords)
-        case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords)
-        case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords)
-        case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords)
-            result.insLatency    = PERFSCORE_LATENCY_20C;    // TODO-SVE: Placeholder
-            result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder
-            break;
+                case INS_fabs:
+                case INS_fneg:
 
-        // Not available in Arm Neoverse N2 Software Optimization Guide.
-        case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow
-            result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder
-            result.insLatency    = PERFSCORE_LATENCY_20C;    // TODO-SVE: Placeholder
-            break;
+                case INS_fcvtas:
+                case INS_fcvtau:
+                case INS_fcvtms:
+                case INS_fcvtmu:
+                case INS_fcvtns:
+                case INS_fcvtnu:
+                case INS_fcvtps:
+                case INS_fcvtpu:
+                case INS_fcvtzs:
+                case INS_fcvtzu:
+                case INS_scvtf:
+                case INS_ucvtf:
 
-        case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow
-        case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
-        case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_4C;
-            break;
+                case INS_frinta:
+                case INS_frinti:
+                case INS_frintm:
+                case INS_frintn:
+                case INS_frintp:
+                case INS_frintx:
+                case INS_frintz:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                    break;
 
-        case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations
-        case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
+                case INS_fcvtxn:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
 
-        case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit
-        case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate
-                           // pair)
-        case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit
-                           // (predicate-as-counter)
-        case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            break;
+                case INS_fcmeq:
+                case INS_fcmge:
+                case INS_fcmgt:
+                case INS_fcmle:
+                case INS_fcmlt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
 
-        case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue
-        case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated)
-        case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated)
-        case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated)
-        case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
+                case INS_frecpe:
+                case INS_frecpx:
+                case INS_frsqrte:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
 
-        case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated)
-            switch (ins)
-            {
-                case INS_sve_umin:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                case INS_fsqrt:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_19C;
+                        result.insLatency    = PERFSCORE_LATENCY_22C;
+                    }
+                    else
+                    {
+                        // S-form
+                        assert(id->idOpSize() == EA_4BYTE);
+                        result.insThroughput = PERFSCORE_THROUGHPUT_9C;
+                        result.insLatency    = PERFSCORE_LATENCY_12C;
+                    }
                     break;
+
                 default:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
                     break;
             }
             break;
 
-        case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated)
-        case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
-        case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
-        case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
-        case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
+        case IF_DV_2Q: // faddp, fmaxnmp, fmaxp, fminnmp, fminp (scalar)
+        case IF_DV_2R: // fmaxnmv, fmaxv, fminnmv, fminv
             result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_5C;
-            break;
-
-        case IF_SVE_IH_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
-                             // immediate)
-        case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
-                             // immediate)
-        case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
-                             // immediate)
-        case IF_SVE_IJ_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-        case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_9C;
-            break;
-
-        case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate)
-        case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
-                             // immediate)
-        case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
-                             // immediate)
-        case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
-                             // immediate)
-            result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-            result.insLatency    = PERFSCORE_LATENCY_6C;
+            result.insLatency    = PERFSCORE_LATENCY_4C;
             break;
 
-        case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus
-                           // immediate)
+        case IF_DV_2S: // addp (scalar)
             result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_10C;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
             break;
 
-        case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus
-                           // immediate)
+        case IF_DV_3B: // fadd, fsub, fdiv, fmul, fmulx, fmla, fmls, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX
+                       // faddp, fmaxnmp, fmaxp, fminnmp, fminp, addp (vector)
             switch (ins)
             {
-                case INS_sve_ld1rqb:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_6C;
-                    break;
-                case INS_sve_ld1rob:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                case INS_sve_ld1rqh:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_6C;
-                    break;
-                case INS_sve_ld1roh:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                case INS_sve_ld1rqw:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_6C;
+                case INS_fmin:
+                case INS_fminnm:
+                case INS_fmax:
+                case INS_fmaxnm:
+                case INS_fabd:
+                case INS_fadd:
+                case INS_fsub:
+                case INS_fmul:
+                case INS_fmulx:
+                case INS_fmla:
+                case INS_fmls:
+                case INS_frecps:
+                case INS_frsqrts:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
-                case INS_sve_ld1row:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+
+                case INS_faddp:
+                case INS_fmaxnmp:
+                case INS_fmaxp:
+                case INS_fminnmp:
+                case INS_fminp:
+                    if (id->idOpSize() == EA_16BYTE)
+                    {
+                        // Q-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
+                    else
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
                     break;
-                case INS_sve_ld1rqd:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_6C;
+
+                case INS_facge:
+                case INS_facgt:
+                case INS_fcmeq:
+                case INS_fcmge:
+                case INS_fcmgt:
+                case INS_fcmle:
+                case INS_fcmlt:
+                    if (id->idOpSize() == EA_16BYTE)
+                    {
+                        // Q-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insLatency    = PERFSCORE_LATENCY_2C;
+                    }
+                    else
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                        result.insLatency    = PERFSCORE_LATENCY_2C;
+                    }
                     break;
-                case INS_sve_ld1rod:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+
+                case INS_fdiv:
+                    if ((id->idInsOpt() == INS_OPTS_2S) || (id->idInsOpt() == INS_OPTS_4S))
+                    {
+                        // S-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_10C;
+                        result.insLatency    = PERFSCORE_LATENCY_13C;
+                    }
+                    else
+                    {
+                        // D-form
+                        assert(id->idInsOpt() == INS_OPTS_2D);
+                        result.insThroughput = PERFSCORE_THROUGHPUT_10C;
+                        result.insLatency    = PERFSCORE_LATENCY_22C;
+                    }
                     break;
+
                 default:
                     // all other instructions
                     perfScoreUnhandledInstruction(id, &result);
@@ -28197,80 +15697,62 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             }
             break;
 
-        case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
-                           // immediate)
-            switch (ins)
-            {
-                case INS_sve_ld2q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                case INS_sve_ld3q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                case INS_sve_ld4q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
-            }
+        case IF_DV_3AI: // mul, mla, mls (vector by element)
+        case IF_DV_3BI: // fmul, fmulx, fmla, fmls (vector by element)
+        case IF_DV_3EI: // sqdmlal, sqdmlsl, sqdmulh, sqdmull (scalar by element)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_4C;
+            break;
+
+        case IF_DV_4A: // fmadd, fmsub, fnmadd, fnsub (scalar)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_4C;
             break;
 
-        case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate)
+        case IF_DV_3D: // fadd, fsub, fdiv, fmul, fmulx, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX (scalar)
             switch (ins)
             {
-                case INS_sve_ld2b:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_9C;
-                    break;
-                case INS_sve_ld3b:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
-                    break;
-                case INS_sve_ld4b:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
-                    break;
-                case INS_sve_ld2h:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_9C;
-                    break;
-                case INS_sve_ld3h:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
-                    break;
-                case INS_sve_ld4h:
+                case INS_fadd:
+                case INS_fsub:
+                case INS_fabd:
+                case INS_fmax:
+                case INS_fmaxnm:
+                case INS_fmin:
+                case INS_fminnm:
+                case INS_fmul:
+                case INS_fmulx:
+                case INS_fnmul:
+                case INS_frecps:
+                case INS_frsqrts:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
-                    break;
-                case INS_sve_ld2w:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_9C;
-                    break;
-                case INS_sve_ld3w:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
-                case INS_sve_ld4w:
+
+                case INS_facge:
+                case INS_facgt:
+                case INS_fcmeq:
+                case INS_fcmge:
+                case INS_fcmgt:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
-                    break;
-                case INS_sve_ld2d:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_9C;
-                    break;
-                case INS_sve_ld3d:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
                     break;
-                case INS_sve_ld4d:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
+
+                case INS_fdiv:
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        // D-form
+                        result.insThroughput = PERFSCORE_THROUGHPUT_6C;
+                        result.insLatency    = PERFSCORE_LATENCY_15C;
+                    }
+                    else
+                    {
+                        // S-form
+                        assert(id->idOpSize() == EA_4BYTE);
+                        result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                        result.insLatency    = PERFSCORE_LATENCY_10C;
+                    }
                     break;
+
                 default:
                     // all other instructions
                     perfScoreUnhandledInstruction(id, &result);
@@ -28278,22 +15760,28 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             }
             break;
 
-        case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
-                           // immediate)
+        case IF_DV_2H: // fmov, fcvtXX - to general
+            // fmov : FP transfer to general register
+            // fcvtaXX : FP convert from vector to general
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            break;
+
+        case IF_DV_2I: // fmov, Xcvtf - from general
             switch (ins)
             {
-                case INS_sve_st2q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                case INS_sve_st3q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                case INS_fmov:
+                    // FP transfer from general register
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
                     break;
-                case INS_sve_st4q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+
+                case INS_scvtf:
+                case INS_ucvtf:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_5C;
                     break;
+
                 default:
                     // all other instructions
                     perfScoreUnhandledInstruction(id, &result);
@@ -28301,296 +15789,260 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             }
             break;
 
-        case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
-                           // immediate)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
-        case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
-        case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
-        case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate)
+        case IF_DV_3C: // mov,and, bic, eor, mov,mvn, orn, bsl, bit, bif,
+                       // tbl, tbx (vector)
             switch (ins)
             {
-                case INS_sve_st2b:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
-                case INS_sve_st3b:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
-                    result.insLatency    = PERFSCORE_LATENCY_7C;
-                    break;
-                case INS_sve_st4b:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
-                    result.insLatency    = PERFSCORE_LATENCY_11C;
-                    break;
-                case INS_sve_st2h:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                case INS_tbl:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_1C;
                     break;
-                case INS_sve_st3h:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
-                    result.insLatency    = PERFSCORE_LATENCY_7C;
+                case INS_tbl_2regs:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3X;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
                     break;
-                case INS_sve_st4h:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
-                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                case INS_tbl_3regs:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_4X;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
                     break;
-                case INS_sve_st2w:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                case INS_tbl_4regs:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3X;
                     result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
-                case INS_sve_st3w:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
-                    result.insLatency    = PERFSCORE_LATENCY_7C;
+                case INS_tbx:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3X;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
                     break;
-                case INS_sve_st4w:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
-                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                case INS_tbx_2regs:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_4X;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
                     break;
-                case INS_sve_st2d:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                case INS_tbx_3regs:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_5X;
                     result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
-                case INS_sve_st3d:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
-                    result.insLatency    = PERFSCORE_LATENCY_7C;
-                    break;
-                case INS_sve_st4d:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
-                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                case INS_tbx_4regs:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_6X;
+                    result.insLatency    = PERFSCORE_LATENCY_5C;
                     break;
                 default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
+                    // All other instructions
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_1C;
                     break;
             }
             break;
 
-        case IF_SVE_JD_4A:   // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-        case IF_SVE_JD_4B:   // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-        case IF_SVE_JJ_4A:   // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
-                           // offsets)
-        case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit
-                             // unscaled offsets)
-        case IF_SVE_JN_3A:   // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-        case IF_SVE_JN_3B:   // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+        case IF_DV_2E: // mov, dup (scalar)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
             result.insLatency    = PERFSCORE_LATENCY_2C;
             break;
 
-        case IF_SVE_HW_4A:   // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_IU_4A:   // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_HW_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-        case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
-                             // offsets)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_9C;
-            break;
-
-        case IF_SVE_IF_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
-                             // scalar)
-        case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
-                             // scalar)
+        case IF_DV_2F: // mov, ins (element)
             result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_10C;
-            break;
-
-        case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar)
-        case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
-                             // scalar)
-        case IF_SVE_II_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
-        case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
-        case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
-        case IF_SVE_IK_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-        case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_9C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
             break;
 
-        case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar)
+        case IF_DV_2B: // smov, umov - to general)
             result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_10C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
             break;
 
-        case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar)
-            switch (ins)
+        case IF_DV_2C: // mov, dup, ins - from general)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            if (ins == INS_dup)
             {
-                case INS_sve_ld1rqb:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_6C;
-                    break;
-                case INS_sve_ld1rob:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                case INS_sve_ld1rqh:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_6C;
-                    break;
-                case INS_sve_ld1roh:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                case INS_sve_ld1rqw:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_6C;
-                    break;
-                case INS_sve_ld1row:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                case INS_sve_ld1rqd:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-                    result.insLatency    = PERFSCORE_LATENCY_6C;
-                    break;
-                case INS_sve_ld1rod:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
+                result.insLatency = PERFSCORE_LATENCY_3C;
             }
-            break;
-
-        case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
-                           // scalar)
-            switch (ins)
+            else
             {
-                case INS_sve_ld2q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                case INS_sve_ld3q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                case INS_sve_ld4q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
+                assert((ins == INS_ins) || (ins == INS_mov));
+                result.insLatency = PERFSCORE_LATENCY_2C;
             }
             break;
 
-        case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar)
+        case IF_DV_2D: // dup (dvector)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_DV_3A: // (vector)
+            // add, sub, mul, mla, mls, cmeq, cmge, cmgt, cmhi, cmhs, ctst,
+            // pmul, saba, uaba, sabd, uabd, umin, uminp, umax, umaxp, smin, sminp, smax, smaxp
             switch (ins)
             {
-                case INS_sve_ld2b:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_9C;
-                    break;
-                case INS_sve_ld3b:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
-                    break;
-                case INS_sve_ld4b:
+                case INS_add:
+                case INS_sub:
+                case INS_cmeq:
+                case INS_cmge:
+                case INS_cmgt:
+                case INS_cmhi:
+                case INS_cmhs:
+                case INS_shadd:
+                case INS_shsub:
+                case INS_srhadd:
+                case INS_srshl:
+                case INS_sshl:
+                case INS_smax:
+                case INS_smaxp:
+                case INS_smin:
+                case INS_sminp:
+                case INS_umax:
+                case INS_umaxp:
+                case INS_umin:
+                case INS_uminp:
+                case INS_uhadd:
+                case INS_uhsub:
+                case INS_urhadd:
+                case INS_urshl:
+                case INS_ushl:
+                case INS_uzp1:
+                case INS_uzp2:
+                case INS_zip1:
+                case INS_zip2:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
                     break;
-                case INS_sve_ld2h:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_9C;
+
+                case INS_trn1:
+                case INS_trn2:
+                    if (id->idInsOpt() == INS_OPTS_2D)
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    }
+                    else
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    }
+
+                    result.insLatency = PERFSCORE_LATENCY_2C;
                     break;
-                case INS_sve_ld3h:
+
+                case INS_addp:
+                case INS_cmtst:
+                case INS_pmul:
+                case INS_sabd:
+                case INS_sqadd:
+                case INS_sqsub:
+                case INS_uabd:
+                case INS_uqadd:
+                case INS_uqsub:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
                     break;
-                case INS_sve_ld4h:
+
+                case INS_mla:
+                case INS_mls:
+                case INS_mul:
+                case INS_sqdmulh:
+                case INS_sqrdmulh:
+                case INS_sqrshl:
+                case INS_sqshl:
+                case INS_uqrshl:
+                case INS_uqshl:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
-                    break;
-                case INS_sve_ld2w:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_9C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
-                case INS_sve_ld3w:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
+
+                case INS_saba:
+                case INS_uaba:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
-                case INS_sve_ld4w:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
+
+                case INS_sdot:
+                case INS_udot:
+                    result.insLatency = PERFSCORE_LATENCY_4C;
+                    if (id->idOpSize() == EA_16BYTE)
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    }
+                    else
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    }
                     break;
-                case INS_sve_ld2d:
+
+                case INS_addhn:
+                case INS_addhn2:
+                case INS_sabdl:
+                case INS_sabdl2:
+                case INS_saddl2:
+                case INS_saddl:
+                case INS_saddw:
+                case INS_saddw2:
+                case INS_ssubl:
+                case INS_ssubl2:
+                case INS_ssubw:
+                case INS_ssubw2:
+                case INS_subhn:
+                case INS_subhn2:
+                case INS_uabdl:
+                case INS_uabdl2:
+                case INS_uaddl:
+                case INS_uaddl2:
+                case INS_uaddw:
+                case INS_uaddw2:
+                case INS_usubl:
+                case INS_usubl2:
+                case INS_usubw:
+                case INS_usubw2:
                     result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_9C;
-                    break;
-                case INS_sve_ld3d:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
-                    break;
-                case INS_sve_ld4d:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
                     break;
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
+
+                case INS_raddhn:
+                case INS_raddhn2:
+                case INS_rsubhn:
+                case INS_rsubhn2:
+                case INS_sabal:
+                case INS_sabal2:
+                case INS_uabal:
+                case INS_uabal2:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
-            }
-            break;
 
-        case IF_SVE_IU_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-        case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
-                             // scaled offsets)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_9C;
-            break;
+                case INS_smlal:
+                case INS_smlal2:
+                case INS_smlsl:
+                case INS_smlsl2:
+                case INS_smull:
+                case INS_smull2:
+                case INS_sqdmlal:
+                case INS_sqdmlal2:
+                case INS_sqdmlsl:
+                case INS_sqdmlsl2:
+                case INS_sqdmull:
+                case INS_sqdmull2:
+                case INS_sqrdmlah:
+                case INS_sqrdmlsh:
+                case INS_umlal:
+                case INS_umlal2:
+                case INS_umlsl:
+                case INS_umlsl2:
+                case INS_umull:
+                case INS_umull2:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
 
-        case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar)
-            switch (ins)
-            {
-                case INS_sve_ld1q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                case INS_pmull:
+                case INS_pmull2:
+                    if ((id->idInsOpt() == INS_OPTS_8B) || (id->idInsOpt() == INS_OPTS_16B))
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insLatency    = PERFSCORE_LATENCY_3C;
+                    }
+                    else
+                    {
+                        // Crypto polynomial (64x64) multiply long
+                        assert((id->idInsOpt() == INS_OPTS_1D) || (id->idInsOpt() == INS_OPTS_2D));
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insLatency    = PERFSCORE_LATENCY_2C;
+                    }
                     break;
+
                 default:
                     // all other instructions
                     perfScoreUnhandledInstruction(id, &result);
@@ -28598,91 +16050,99 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             }
             break;
 
-        case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus
-                           // scalar)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-            result.insLatency    = PERFSCORE_LATENCY_10C;
+        case IF_DV_3DI: // fmul, fmulx, fmla, fmls (scalar by element)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_4C;
             break;
 
-        case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar)
-            switch (ins)
-            {
-                case INS_sve_st1q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
-                    break;
-                default:
-                    // all other instructions
-                    perfScoreUnhandledInstruction(id, &result);
-                    break;
-            }
+        case IF_DV_3E: // add, sub, cmeq, cmge, cmgt, cmhi, cmhs, ctst, (scalar)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
             break;
 
-        case IF_SVE_IZ_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
-                             // scalar)
-        case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
-                             // scalar)
-        case IF_SVE_JA_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus
-                             // scalar)
-        case IF_SVE_JB_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
-                             // scalar)
-        case IF_SVE_JD_4C:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-        case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+        case IF_DV_3G: // ext
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
             result.insLatency    = PERFSCORE_LATENCY_2C;
             break;
 
-        case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar)
+        case IF_DV_2L: // abs, neg, cmeq, cmge, cmgt, cmle, cmlt (scalar)
+        case IF_DV_2M: // (vector)
+            // abs, neg, mvn, not, cmeq, cmge, cmgt, cmle, cmlt,
+            // addv, saddlv,  uaddlv, smaxv, sminv, umaxv, uminv
+            // cls, clz, cnt, rbit, rev16, rev32, rev64,
+            // xtn, xtn2, shll, shll2
             switch (ins)
             {
-                case INS_sve_st2b:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
-                case INS_sve_st3b:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_7C;
-                    break;
-                case INS_sve_st4b:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_9X;
-                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                case INS_abs:
+                case INS_sqneg:
+                case INS_suqadd:
+                case INS_usqadd:
+                    if (id->idOpSize() == EA_16BYTE)
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    }
+                    else
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    }
+
+                    result.insLatency = PERFSCORE_LATENCY_3C;
                     break;
-                case INS_sve_st2h:
+
+                case INS_addv:
+                case INS_saddlv:
+                case INS_uaddlv:
+                case INS_cls:
                     result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
-                    break;
-                case INS_sve_st3h:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_7C;
-                    break;
-                case INS_sve_st4h:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_9X;
-                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
                     break;
-                case INS_sve_st2w:
+
+                case INS_sminv:
+                case INS_smaxv:
+                case INS_uminv:
+                case INS_umaxv:
                     result.insThroughput = PERFSCORE_THROUGHPUT_1C;
                     result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
-                case INS_sve_st3w:
+
+                case INS_cmeq:
+                case INS_cmge:
+                case INS_cmgt:
+                case INS_cmle:
+                case INS_cmlt:
+
+                case INS_clz:
+                case INS_cnt:
+                case INS_rbit:
+                case INS_rev16:
+                case INS_rev32:
+                case INS_rev64:
+                case INS_xtn:
+                case INS_xtn2:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_7C;
-                    break;
-                case INS_sve_st4w:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_9X;
-                    result.insLatency    = PERFSCORE_LATENCY_11C;
-                    break;
-                case INS_sve_st2d:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
                     break;
-                case INS_sve_st3d:
+
+                case INS_mvn:
+                case INS_not:
+                case INS_neg:
+                case INS_shll:
+                case INS_shll2:
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
-                    result.insLatency    = PERFSCORE_LATENCY_7C;
+                    result.insLatency    = PERFSCORE_LATENCY_1C;
                     break;
-                case INS_sve_st4d:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_9X;
-                    result.insLatency    = PERFSCORE_LATENCY_11C;
+
+                case INS_sqabs:
+                case INS_sqxtn:
+                case INS_sqxtn2:
+                case INS_sqxtun:
+                case INS_sqxtun2:
+                case INS_uqxtn:
+                case INS_uqxtn2:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
+
                 default:
                     // all other instructions
                     perfScoreUnhandledInstruction(id, &result);
@@ -28690,22 +16150,89 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             }
             break;
 
-        case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
-                           // scalar)
+        case IF_DV_2N: // sshr, ssra, srshr, srsra, shl, ushr, usra, urshr, ursra, sri, sli (shift by immediate -
+                       // scalar)
+        case IF_DV_2O: // sshr, ssra, srshr, srsra, shl, ushr, usra, urshr, ursra, sri, sli (shift by immediate -
+                       // vector)
+                       // sshll, sshll2, ushll, ushll2, shrn, shrn2, rshrn, rshrn2, sxrl, sxl2, uxtl, uxtl2
             switch (ins)
             {
-                case INS_sve_st2q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                case INS_shl:
+                case INS_shrn:
+                case INS_shrn2:
+                case INS_sli:
+                case INS_sri:
+                case INS_sshr:
+                case INS_ushr:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+
+                case INS_shll:
+                case INS_shll2:
+                case INS_sshll:
+                case INS_sshll2:
+                case INS_ushll:
+                case INS_ushll2:
+                case INS_sxtl:
+                case INS_sxtl2:
+                case INS_uxtl:
+                case INS_uxtl2:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+
+                case INS_rshrn:
+                case INS_rshrn2:
+                case INS_srshr:
+                case INS_sqshrn:
+                case INS_sqshrn2:
+                case INS_ssra:
+                case INS_urshr:
+                case INS_uqshrn:
+                case INS_uqshrn2:
+                case INS_usra:
+                    if (id->idOpSize() == EA_16BYTE)
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insLatency    = PERFSCORE_LATENCY_3C;
+                    }
+                    else
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                        result.insLatency    = PERFSCORE_LATENCY_3C;
+                    }
                     break;
-                case INS_sve_st3q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+
+                case INS_srsra:
+                case INS_ursra:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
-                case INS_sve_st4q:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+
+                case INS_sqrshrn:
+                case INS_sqrshrn2:
+                case INS_sqrshrun:
+                case INS_sqrshrun2:
+                case INS_sqshrun:
+                case INS_sqshrun2:
+                case INS_sqshl:
+                case INS_sqshlu:
+                case INS_uqrshrn:
+                case INS_uqrshrn2:
+                case INS_uqshl:
+                    if (id->idOpSize() == EA_16BYTE)
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
+                    else
+                    {
+                        result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                        result.insLatency    = PERFSCORE_LATENCY_4C;
+                    }
                     break;
+
                 default:
                     // all other instructions
                     perfScoreUnhandledInstruction(id, &result);
@@ -28713,70 +16240,81 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             }
             break;
 
-        case IF_SVE_JJ_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
-                             // offsets)
-        case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
-                           // offsets)
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
-        case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_3C;
-            break;
-
-        case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated)
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_5C;
-            break;
-
-        case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero
+        case IF_DV_2P: // aese, aesd, aesmc, aesimc, sha1su1, sha256su0
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency    = PERFSCORE_LATENCY_2C;
             break;
 
-        case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate
-                           // (predicated)
+        case IF_DV_3F: // sha1c, sha1m, sha1p, sha1su0, sha256h, sha256h2, sha256su1 (vector)
             switch (ins)
             {
-                case INS_sve_fmul:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                case INS_sha1su0:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+
+                case INS_sha256su0:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
                     result.insLatency    = PERFSCORE_LATENCY_3C;
                     break;
 
+                case INS_sha1c:
+                case INS_sha1m:
+                case INS_sha1p:
+                case INS_sha256h:
+                case INS_sha256h2:
+                case INS_sha256su1:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+
                 default:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
                     break;
             }
             break;
 
-        case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_4C;
+        case IF_SI_0A: // brk   imm16
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_1C;
             break;
 
-        case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer
+        case IF_SR_1A:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_3C;
+            result.insLatency    = PERFSCORE_LATENCY_1C;
             break;
 
-        case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend
+        case IF_DV_2T: // addv, saddlv, smaxv, sminv, uaddlv, umaxv, uminv
             switch (ins)
             {
-                case INS_sve_bfmla:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                case INS_addv:
+                case INS_saddlv:
+                case INS_uaddlv:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                    break;
+
+                case INS_smaxv:
+                case INS_sminv:
+                case INS_umaxv:
+                case INS_uminv:
+                case INS_sha256h2:
+                case INS_sha256su1:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+
+                case INS_sadalp:
+                case INS_uadalp:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
                     break;
 
-                case INS_sve_bfmls:
-                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
-                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                case INS_saddlp:
+                case INS_uaddlp:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
                     break;
 
                 default:
@@ -28786,27 +16324,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             }
             break;
 
-        case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing
-                           // multiplicand
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_4C;
-            break;
-
-        case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register
-        case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register
-            result.insThroughput = PERFSCORE_THROUGHPUT_3C;
-            result.insLatency    = PERFSCORE_LATENCY_6C;
-            break;
-
-        case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register
-        case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register
-            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
         default:
-            // all other instructions
-            perfScoreUnhandledInstruction(id, &result);
+            // fallback to SVE instructions
+            getInsSveExecutionCharacteristics(id, result);
             break;
     }
 
@@ -29099,7 +16619,7 @@ bool emitter::OptimizeLdrStr(instruction ins,
                              insFormat   fmt,
                              bool        localVar,
                              int         varx,
-                             int offs DEBUG_ARG(bool useRsvdReg))
+                             int offs    DEBUG_ARG(bool useRsvdReg))
 {
     assert(ins == INS_ldr || ins == INS_str);
 
diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h
index 6868f27f5bab..cc3254c06810 100644
--- a/src/coreclr/jit/emitarm64.h
+++ b/src/coreclr/jit/emitarm64.h
@@ -29,12 +29,21 @@ enum PredicateType
     PREDICATE_N_SIZED,  // Predicate printed as counter with element size
 };
 
-const char* emitSveRegName(regNumber reg);
+const char* emitSveRegName(regNumber reg) const;
 const char* emitVectorRegName(regNumber reg);
 const char* emitPredicateRegName(regNumber reg, PredicateType ptype);
 
+#ifdef DEBUG
+void emitInsSveSanityCheck(instrDesc* id);
+#endif // DEBUG
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+void getInsSveExecutionCharacteristics(instrDesc* id, insExecutionCharacteristics& result);
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
 void emitDispInsHelp(
     instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig);
+void emitDispInsSveHelp(instrDesc* id);
 void emitDispLargeJmp(
     instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig);
 void emitDispComma();
@@ -51,12 +60,15 @@ void emitDispBarrier(insBarrier barrier);
 void emitDispShiftOpts(insOpts opt);
 void emitDispExtendOpts(insOpts opt);
 void emitDispSveExtendOpts(insOpts opt);
-void emitDispSveExtendOptsModN(insOpts opt, int n);
+void emitDispSveExtendOptsModN(insOpts opt, ssize_t imm);
 void emitDispSveModAddr(instruction ins, regNumber reg1, regNumber reg2, insOpts opt, insFormat fmt);
+void emitDispSveImm(regNumber reg1, ssize_t imm, insOpts opt);
 void emitDispSveImmMulVl(regNumber reg1, ssize_t imm);
-void emitDispLSExtendOpts(insOpts opt);
+void emitDispSveImmIndex(regNumber reg1, insOpts opt, ssize_t imm);
 void emitDispReg(regNumber reg, emitAttr attr, bool addComma);
+void emitDispSveReg(regNumber reg, bool addComma);
 void emitDispSveReg(regNumber reg, insOpts opt, bool addComma);
+void emitDispSveRegIndex(regNumber reg, ssize_t index, bool addComma);
 void emitDispVectorReg(regNumber reg, insOpts opt, bool addComma);
 void emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma);
 void emitDispVectorRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma);
@@ -74,6 +86,7 @@ void emitDispExtendReg(regNumber reg, insOpts opt, ssize_t imm);
 void emitDispAddrRI(regNumber reg, insOpts opt, ssize_t imm);
 void emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, bool isScaled, emitAttr size);
 void emitDispSvePattern(insSvePattern pattern, bool addComma);
+void emitDispSvePrfop(insSvePrfop prfop, bool addComma);
 
 /************************************************************************/
 /*  Private members that deal with target-dependent instr. descriptors  */
@@ -111,21 +124,21 @@ enum RegisterOrder
 /************************************************************************/
 
 private:
-bool emitInsIsCompare(instruction ins);
-bool emitInsIsLoad(instruction ins);
-bool emitInsIsStore(instruction ins);
-bool emitInsIsLoadOrStore(instruction ins);
-bool emitInsIsVectorRightShift(instruction ins);
-bool emitInsIsVectorLong(instruction ins);
-bool emitInsIsVectorNarrow(instruction ins);
-bool emitInsIsVectorWide(instruction ins);
-bool emitInsDestIsOp2(instruction ins);
+bool     emitInsIsCompare(instruction ins);
+bool     emitInsIsLoad(instruction ins);
+bool     emitInsIsStore(instruction ins);
+bool     emitInsIsLoadOrStore(instruction ins);
+bool     emitInsIsVectorRightShift(instruction ins);
+bool     emitInsIsVectorLong(instruction ins);
+bool     emitInsIsVectorNarrow(instruction ins);
+bool     emitInsIsVectorWide(instruction ins);
+bool     emitInsDestIsOp2(instruction ins);
 emitAttr emitInsTargetRegSize(instrDesc* id);
 emitAttr emitInsLoadStoreSize(instrDesc* id);
 
 emitter::insFormat emitInsFormat(instruction ins);
-emitter::code_t emitInsCode(instruction ins, insFormat fmt);
-emitter::code_t emitInsCodeSve(instruction ins, insFormat fmt);
+emitter::code_t    emitInsCode(instruction ins, insFormat fmt);
+emitter::code_t    emitInsCodeSve(instruction ins, insFormat fmt);
 
 // Generate code for a load or store operation and handle the case of contained GT_LEA op1 with [base + index<<scale +
 // offset]
@@ -152,7 +165,7 @@ static UINT64 Replicate_helper(UINT64 value, unsigned width, emitAttr size);
 // Method to do check if mov is redundant with respect to the last instruction.
 // If yes, the caller of this method can choose to omit current mov instruction.
 static bool IsMovInstruction(instruction ins);
-bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip);
+bool        IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip);
 
 // Methods to optimize a Ldr or Str with an alternative instruction.
 bool IsRedundantLdStr(instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt);
@@ -178,7 +191,7 @@ FORCEINLINE bool OptimizeLdrStr(instruction ins,
                                 insFormat   fmt,
                                 bool        localVar = false,
                                 int         varx     = -1,
-                                int offs = -1 DEBUG_ARG(bool useRsvdReg = false));
+                                int         offs     = -1 DEBUG_ARG(bool useRsvdReg = false));
 
 emitLclVarAddr* emitGetLclVarPairLclVar2(instrDesc* id)
 {
@@ -194,12 +207,13 @@ emitLclVarAddr* emitGetLclVarPairLclVar2(instrDesc* id)
 }
 
 /************************************************************************
-*
-* This union is used to encode/decode the special ARM64 immediate values
-* that is listed as imm(N,r,s) and referred to as 'bitmask immediate'
-*/
+ *
+ * This union is used to encode/decode the special ARM64 immediate values
+ * that is listed as imm(N,r,s) and referred to as 'bitmask immediate'
+ */
 
-union bitMaskImm {
+union bitMaskImm
+{
     struct
     {
         unsigned immS : 6; // bits 0..5
@@ -210,88 +224,83 @@ union bitMaskImm {
 };
 
 /************************************************************************
-*
-*  Convert between a 64-bit immediate and its 'bitmask immediate'
-*   representation imm(i16,hw)
-*/
-
-static emitter::bitMaskImm emitEncodeBitMaskImm(INT64 imm, emitAttr size);
+ *
+ *  Convert between a 64-bit immediate and its 'bitmask immediate'
+ *   representation imm(i16,hw)
+ */
 
 static INT64 emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size);
 
 /************************************************************************
-*
-* This union is used to encode/decode the special ARM64 immediate values
-* that is listed as imm(i16,hw) and referred to as 'halfword immediate'
-*/
+ *
+ * This union is used to encode/decode the special ARM64 immediate values
+ * that is listed as imm(i16,hw) and referred to as 'halfword immediate'
+ */
 
-union halfwordImm {
+union halfwordImm
+{
     struct
     {
         unsigned immVal : 16; // bits  0..15
-        unsigned immHW : 2;   // bits 16..17
+        unsigned immHW  : 2;  // bits 16..17
     };
     unsigned immHWVal; // concat HW:Val forming a 18-bit unsigned immediate
 };
 
 /************************************************************************
-*
-*  Convert between a 64-bit immediate and its 'halfword immediate'
-*   representation imm(i16,hw)
-*/
-
-static emitter::halfwordImm emitEncodeHalfwordImm(INT64 imm, emitAttr size);
+ *
+ *  Convert between a 64-bit immediate and its 'halfword immediate'
+ *   representation imm(i16,hw)
+ */
 
 static INT64 emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size);
 
 /************************************************************************
-*
-* This union is used to encode/decode the special ARM64 immediate values
-* that is listed as imm(i16,by) and referred to as 'byteShifted immediate'
-*/
+ *
+ * This union is used to encode/decode the special ARM64 immediate values
+ * that is listed as imm(i16,by) and referred to as 'byteShifted immediate'
+ */
 
-union byteShiftedImm {
+union byteShiftedImm
+{
     struct
     {
-        unsigned immVal : 8;  // bits  0..7
-        unsigned immBY : 2;   // bits  8..9
+        unsigned immVal  : 8; // bits  0..7
+        unsigned immBY   : 2; // bits  8..9
         unsigned immOnes : 1; // bit   10
     };
     unsigned immBSVal; // concat Ones:BY:Val forming a 10-bit unsigned immediate
 };
 
 /************************************************************************
-*
-*  Convert between a 16/32-bit immediate and its 'byteShifted immediate'
-*   representation imm(i8,by)
-*/
-
-static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL);
+ *
+ *  Convert between a 16/32-bit immediate and its 'byteShifted immediate'
+ *   representation imm(i8,by)
+ */
 
 static UINT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);
 
 /************************************************************************
-*
-* This union is used to encode/decode the special ARM64 immediate values
-* that are use for FMOV immediate and referred to as 'float 8-bit immediate'
-*/
+ *
+ * This union is used to encode/decode the special ARM64 immediate values
+ * that are use for FMOV immediate and referred to as 'float 8-bit immediate'
+ */
 
-union floatImm8 {
+union floatImm8
+{
     struct
     {
         unsigned immMant : 4; // bits 0..3
-        unsigned immExp : 3;  // bits 4..6
+        unsigned immExp  : 3; // bits 4..6
         unsigned immSign : 1; // bits 7
     };
     unsigned immFPIVal; // concat Sign:Exp:Mant forming an 8-bit unsigned immediate
 };
 
 /************************************************************************
-*
-*  Convert between a double and its 'float 8-bit immediate' representation
-*/
-
-static emitter::floatImm8 emitEncodeFloatImm8(double immDbl);
+ *
+ *  Convert between a double and its 'float 8-bit immediate' representation
+ */
 
 static double emitDecodeFloatImm8(const emitter::floatImm8 fpImm);
 
@@ -314,95 +323,126 @@ static double emitDecodeSmallFloatImm(ssize_t imm, instruction ins);
 static bool emitIsValidEncodedSmallFloatImm(size_t imm);
 
 /************************************************************************
-*
-*  This union is used to encode/decode the cond, nzcv and imm5 values for
-*   instructions that use them in the small constant immediate field
-*/
+ *
+ *  This union is used to encode/decode the cond, nzcv and imm5 values for
+ *   instructions that use them in the small constant immediate field
+ */
 
-union condFlagsImm {
+union condFlagsImm
+{
     struct
     {
-        insCond   cond : 4;  // bits  0..3
+        insCond   cond  : 4; // bits  0..3
         insCflags flags : 4; // bits  4..7
-        unsigned  imm5 : 5;  // bits  8..12
+        unsigned  imm5  : 5; // bits  8..12
     };
     unsigned immCFVal; // concat imm5:flags:cond forming an 13-bit unsigned immediate
 };
 
 // Returns an encoding for the specified register used in the 'Rd' position
-static code_t insEncodeReg_Rd(regNumber reg);
+static code_t insEncodeReg_Rd(regNumber reg)
+{
+    return insEncodeReg_R<4, 0>(reg);
+}
 
 // Returns an encoding for the specified register used in the 'Rt' position
-static code_t insEncodeReg_Rt(regNumber reg);
+static code_t insEncodeReg_Rt(regNumber reg)
+{
+    return insEncodeReg_R<4, 0>(reg);
+}
 
 // Returns an encoding for the specified register used in the 'Rn' position
-static code_t insEncodeReg_Rn(regNumber reg);
+static code_t insEncodeReg_Rn(regNumber reg)
+{
+    return insEncodeReg_R<9, 5>(reg);
+}
 
 // Returns an encoding for the specified register used in the 'Rm' position
-static code_t insEncodeReg_Rm(regNumber reg);
+static code_t insEncodeReg_Rm(regNumber reg)
+{
+    return insEncodeReg_R<20, 16>(reg);
+}
 
 // Returns an encoding for the specified register used in the 'Ra' position
-static code_t insEncodeReg_Ra(regNumber reg);
+static code_t insEncodeReg_Ra(regNumber reg)
+{
+    return insEncodeReg_R<14, 10>(reg);
+}
 
 // Returns an encoding for the specified register used in the 'Vd' position
-static code_t insEncodeReg_Vd(regNumber reg);
+static code_t insEncodeReg_Vd(regNumber reg)
+{
+    return insEncodeReg_V<4, 0>(reg);
+}
 
 // Returns an encoding for the specified register used in the 'Vt' position
-static code_t insEncodeReg_Vt(regNumber reg);
+static code_t insEncodeReg_Vt(regNumber reg)
+{
+    return insEncodeReg_V<4, 0>(reg);
+}
 
 // Returns an encoding for the specified register used in the 'Vn' position
-static code_t insEncodeReg_Vn(regNumber reg);
+static code_t insEncodeReg_Vn(regNumber reg)
+{
+    return insEncodeReg_V<9, 5>(reg);
+}
 
 // Returns an encoding for the specified register used in the 'Vm' position
-static code_t insEncodeReg_Vm(regNumber reg);
+static code_t insEncodeReg_Vm(regNumber reg)
+{
+    return insEncodeReg_V<20, 16>(reg);
+}
 
 // Returns an encoding for the specified register used in the 'Va' position
-static code_t insEncodeReg_Va(regNumber reg);
-
-// Return an encoding for the specified 'V' register used in '4' thru '0' position.
-static code_t insEncodeReg_V_4_to_0(regNumber reg);
-
-// Return an encoding for the specified 'V' register used in '9' thru '5' position.
-static code_t insEncodeReg_V_9_to_5(regNumber reg);
-
-// Return an encoding for the specified 'P' register used in '12' thru '10' position.
-static code_t insEncodeReg_P_12_to_10(regNumber reg);
-
-// Return an encoding for the specified 'V' register used in '20' thru '16' position.
-static code_t insEncodeReg_V_20_to_16(regNumber reg);
-
-// Return an encoding for the specified 'R' register used in '20' thru '16' position.
-static code_t insEncodeReg_R_20_to_16(regNumber reg);
-
-// Return an encoding for the specified 'R' register used in '9' thru '5' position.
-static code_t insEncodeReg_R_9_to_5(regNumber reg);
-
-// Return an encoding for the specified 'R' register used in '4' thru '0' position.
-static code_t insEncodeReg_R_4_to_0(regNumber reg);
-
-// Return an encoding for the specified 'P' register used in '19' thru '16' position.
-static code_t insEncodeReg_P_19_to_16(regNumber reg);
-
-// Return an encoding for the specified 'P' register used in '3' thru '0' position.
-static code_t insEncodeReg_P_3_to_0(regNumber reg);
+static code_t insEncodeReg_Va(regNumber reg)
+{
+    return insEncodeReg_V<14, 10>(reg);
+}
 
-// Return an encoding for the specified 'P' register used in '8' thru '5' position.
-static code_t insEncodeReg_P_8_to_5(regNumber reg);
+// Returns an encoding for the specified 'V' register used in 'hi' thru 'lo' position.
+template <const size_t hi, const size_t lo>
+static code_t insEncodeReg_V(regNumber reg)
+{
+    // lo <= hi < 32
+    static_assert((hi >= lo) && (hi < sizeof(code_t) * BITS_PER_BYTE));
+    assert(isVectorRegister(reg));
+    code_t ureg = (code_t)reg - (code_t)REG_V0;
 
-// Return an encoding for the specified 'P' register used in '13' thru '10' position.
-static code_t insEncodeReg_P_13_to_10(regNumber reg);
+    constexpr size_t bits = hi - lo + 1;
+    static_assert(bits <= 5);
+    constexpr size_t mask = (1 << bits) - 1;
+    return (ureg & mask) << lo;
+}
 
-// Return an encoding for the specified 'R' register used in '17' thru '16' position.
-static code_t insEncodeReg_R_17_to_16(regNumber reg);
+// Returns an encoding for the specified 'P' register used in 'hi' thru 'lo' position.
+template <const size_t hi, const size_t lo>
+static code_t insEncodeReg_P(regNumber reg)
+{
+    // lo <= hi < 32
+    static_assert((hi >= lo) && (hi < sizeof(code_t) * BITS_PER_BYTE));
+    assert(isPredicateRegister(reg));
+    code_t ureg = (code_t)reg - (code_t)REG_P0;
 
-// Return an encoding for the specified 'P' register used in '7' thru '5' position.
-static code_t insEncodeReg_P_7_to_5(regNumber reg);
+    constexpr size_t bits = hi - lo + 1;
+    static_assert(bits <= 4);
+    constexpr size_t mask = (1 << bits) - 1;
+    return (ureg & mask) << lo;
+}
 
-// Return an encoding for the specified 'P' register used in '3' thru '1' position.
-static code_t insEncodeReg_P_3_to_1(regNumber reg);
+// Returns an encoding for the specified 'R' register used in 'hi' thru 'lo' position.
+template <const size_t hi, const size_t lo>
+static code_t insEncodeReg_R(regNumber reg)
+{
+    // lo <= hi < 32
+    static_assert((hi >= lo) && (hi < sizeof(code_t) * BITS_PER_BYTE));
+    assert(isIntegerRegister(reg));
+    code_t ureg = (code_t)reg;
 
-// Return an encoding for the specified 'P' register used in '2' thru '0' position.
-static code_t insEncodeReg_P_2_to_0(regNumber reg);
+    constexpr size_t bits = hi - lo + 1;
+    static_assert(bits <= 5);
+    constexpr size_t mask = (1 << bits) - 1;
+    return (ureg & mask) << lo;
+}
 
 // Return an encoding for the specified predicate type used in '16' position.
 static code_t insEncodePredQualifier_16(bool merge);
@@ -410,15 +450,6 @@ static code_t insEncodePredQualifier_16(bool merge);
 // Return an encoding for the specified predicate type used in '4' position.
 static code_t insEncodePredQualifier_4(bool merge);
 
-// Return an encoding for the specified 'V' register used in '18' thru '16' position.
-static code_t insEncodeReg_V_18_to_16(regNumber reg);
-
-// Return an encoding for the specified 'V' register used in '19' thru '16' position.
-static code_t insEncodeReg_V_19_to_16(regNumber reg);
-
-// Return an encoding for the specified 'V' register used in '9' thru '6' position.
-static code_t insEncodeReg_V_9_to_6(regNumber reg);
-
 // Return an encoding for the specified 'V' register used in '9' thru '6' position with the times two encoding.
 // This encoding requires that the register number be divisible by two.
 static code_t insEncodeReg_V_9_to_6_Times_Two(regNumber reg);
@@ -463,6 +494,9 @@ static code_t insEncodeVectorIndex(emitAttr elemsize, ssize_t index);
 // Returns the encoding to select 'index2' for an Arm64 'ins' elem instruction
 static code_t insEncodeVectorIndex2(emitAttr elemsize, ssize_t index2);
 
+// Returns the encoding for an immediate in the SVE variant of dup (indexed)
+static code_t insEncodeSveBroadcastIndex(emitAttr elemsize, ssize_t index);
+
 // Returns the encoding to select 'index' for an Arm64 'mul' elem instruction
 static code_t insEncodeVectorIndexLMH(emitAttr elemsize, ssize_t index);
 
@@ -525,13 +559,31 @@ static code_t insEncodeSveElemsize_22_to_21(emitAttr size);
 // This specifically encodes the size at bit locations '18-17'.
 static code_t insEncodeSveElemsize_18_to_17(emitAttr size);
 
+// Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction
+// This specifically encodes the field 'sz' at bit location '20'.
+static code_t insEncodeSveElemsize_sz_20(emitAttr size);
+
 // Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction
 // This specifically encodes the field 'sz' at bit location '21'.
 static code_t insEncodeSveElemsize_sz_21(emitAttr size);
 
 // Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 SVE vector instruction
-// This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'.
-static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size);
+// This specifically encodes the field 'tszh:tszl' at bit locations '23-22:20-19'.
+static code_t insEncodeSveElemsize_tszh_23_tszl_20_to_19(emitAttr size);
+
+// Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction at bit location '30' or
+// '21'.
+// This only works on select formats.
+static code_t insEncodeSveElemsize_30_or_21(insFormat fmt, emitAttr size);
+
+// Returns the encoding for the field 'i1:tszh:tszl' at bit locations '23-22:20-18'.
+static code_t insEncodeSveElemsize_tszh_tszl_and_imm(const insOpts opt, const ssize_t imm);
+
+// Returns the encoding for the field 'tszh:tszl:imm3' at bit locations '23-22:20-19:18-16'.
+static code_t insEncodeSveElemsizeWithShift_tszh_tszl_imm3(const insOpts opt, ssize_t imm, bool isRightShift);
+
+// Returns the encoding for the field 'i1:tsz' at bit locations '20:19-16'.
+static code_t insEncodeSveElemsizeWithImmediate_i1_tsz(const insOpts opt, ssize_t imm);
 
 // Returns the encoding to select the constant values 90 or 270 for an Arm64 SVE vector instruction
 // This specifically encode the field 'rot' at bit location '16'.
@@ -576,57 +628,120 @@ static code_t insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t
 // for the 'dtype' field.
 static code_t insEncodeSveElemsize_dtype_ld1w(instruction ins, insFormat fmt, emitAttr size, code_t code);
 
-// Returns the encoding for the immediate value as 4-bits at bit locations '19-16'.
-static code_t insEncodeSimm4_19_to_16(ssize_t imm);
+// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
+// for the 'dtypeh' and 'dtypel' fields.
+static code_t insEncodeSveElemsize_dtypeh_dtypel(instruction ins, insFormat fmt, emitAttr size, code_t code);
 
-// Returns the encoding for the immediate value as 9-bits at bit locations '21-16' for high and '12-10' for low.
-static code_t insEncodeSimm9h9l_21_to_16_and_12_to_10(ssize_t imm);
+// Encodes an immediate value in consecutive bits from most significant position 'hi' to least significant
+// position 'lo'.
+template <const size_t hi, const size_t lo>
+static code_t insEncodeUimm(size_t imm)
+{
+    // lo <= hi < 32
+    static_assert((hi >= lo) && (hi < sizeof(code_t) * BITS_PER_BYTE));
+
+    const size_t imm_bits = hi - lo + 1;
+    static_assert(imm_bits < sizeof(code_t) * BITS_PER_BYTE);
+
+    const size_t imm_max = 1 << imm_bits;
+    assert(imm < imm_max);
+
+    code_t result = static_cast<code_t>(imm << lo);
+    assert((result >> lo) == imm);
+    return result;
+}
+
+// Encodes an immediate value across two ranges of consecutive bits, splitting the bits of the immediate
+// value between them. The bit ranges are from hi1-lo1, and hi2-lo2 where the second range is at a less
+// significant position relative to the first.
+template <const size_t hi1, const size_t lo1, const size_t hi2, const size_t lo2>
+static code_t insEncodeSplitUimm(size_t imm)
+{
+    static_assert((hi1 >= lo1) && (lo1 > hi2) && (hi2 >= lo2));
+    static_assert(hi1 < sizeof(code_t) * BITS_PER_BYTE);
+
+    const size_t hi_bits = hi1 - lo1 + 1;
+    const size_t lo_bits = hi2 - lo2 + 1;
 
-// Returns the encoding for the immediate value that is a multiple of 2 as 4-bits at bit locations '19-16'.
-static code_t insEncodeSimm4_MultipleOf2_19_to_16(ssize_t imm);
+    const size_t imm_max = 1 << (hi_bits + lo_bits);
+    assert(imm < imm_max);
 
-// Returns the encoding for the immediate value that is a multiple of 3 as 4-bits at bit locations '19-16'.
-static code_t insEncodeSimm4_MultipleOf3_19_to_16(ssize_t imm);
+    const size_t hi_max = 1 << hi_bits;
+    const size_t lo_max = 1 << lo_bits;
 
-// Returns the encoding for the immediate value that is a multiple of 4 as 4-bits at bit locations '19-16'.
-static code_t insEncodeSimm4_MultipleOf4_19_to_16(ssize_t imm);
+    size_t immhi = (imm >> lo_bits) & (hi_max - 1);
+    size_t immlo = imm & (lo_max - 1);
 
-// Returns the encoding for the immediate value that is a multiple of 16 as 4-bits at bit locations '19-16'.
-static code_t insEncodeSimm4_MultipleOf16_19_to_16(ssize_t imm);
+    code_t result = insEncodeUimm<hi1, lo1>(immhi) | insEncodeUimm<hi2, lo2>(immlo);
+
+    // Calculate and generate a mask for the number of bits between hi2-lo1, and assert that these bits
+    // are not set in the result. Note if between_bits == 0 then the mask will always be 0 and this will
+    // pass.
+    size_t between_bits = lo1 - hi2 - 1;
+    code_t between_mask = ((1 << between_bits) - 1) << (hi2 + 1);
+    assert((result & between_mask) == 0);
+
+    return result;
+}
 
-// Returns the encoding for the immediate value that is a multiple of 32 as 4-bits at bit locations '19-16'.
-static code_t insEncodeSimm4_MultipleOf32_19_to_16(ssize_t imm);
+// Signed variant of insEncodeUimm, preserves the sign bit as the most significant bit of the immediate.
+// The immediate will be encoded into a 32-bit integer where bits in the range [hi, lo] are equal to the
+// bits of the signed immediate.
+template <const size_t hi, const size_t lo>
+static code_t insEncodeSimm(ssize_t imm)
+{
+    // lo <= hi < 32
+    static_assert((hi >= lo) && (hi < sizeof(code_t) * BITS_PER_BYTE));
 
-// Returns the encoding for the immediate value as 5-bits at bit locations '20-16'.
-static code_t insEncodeSimm5_20_to_16(ssize_t imm);
+    constexpr size_t imm_bits = hi - lo + 1;
+    static_assert(imm_bits < sizeof(code_t) * BITS_PER_BYTE);
 
-// Returns the encoding for the immediate value as 2-bits at bit locations '9-8'.
-static code_t insEncodeUimm2_9_to_8(ssize_t imm);
+    const ssize_t imm_max = 1 << (imm_bits - 1);
+    const ssize_t imm_min = -imm_max;
+    assert(imm_min <= imm && imm < imm_max);
 
-// Returns the encoding for the immediate value as 2-bits at bit locations '11-10'.
-static code_t insEncodeUimm2_11_to_10(ssize_t imm);
+    union
+    {
+        ssize_t simm;
+        size_t  uimm;
+    } conv;
 
-// Returns the encoding for the immediate value as 2-bits at bit locations '20-19'.
-static code_t insEncodeUimm2_20_to_19(ssize_t imm);
+    conv.simm     = imm;
+    code_t result = conv.uimm & ((1 << imm_bits) - 1);
 
-// Returns the encoding for the immediate value as 1 bit at bit location '11'.
-static code_t insEncodeImm1_11(ssize_t imm);
+    return result << lo;
+}
 
-// Returns the encoding for the immediate value as 1 bit at bit location '22'.
-static code_t insEncodeImm1_22(ssize_t imm);
+// Returns the encoding for unsigned immediate `imm` that is a multiple of `mul` with `bits` number of bits,
+// for bit locations `hi-lo`.
+template <const size_t hi, const size_t lo, const ssize_t mul>
+static code_t insEncodeUimm_MultipleOf(ssize_t imm)
+{
 
-// Returns the encoding for the immediate value as 7-bits at bit locations '20-14'.
-static code_t insEncodeUimm7_20_to_14(ssize_t imm);
+    constexpr size_t bits = hi - lo + 1;
+    assert((isValidUimm_MultipleOf<bits, mul>(imm)));
+    return insEncodeUimm<hi, lo>(imm / mul);
+}
 
-// Returns the encoding for the immediate value as 4-bits starting from 1, at bit locations '19-16'.
-static code_t insEncodeUimm4From1_19_to_16(ssize_t imm);
+// Returns the encoding for signed immediate `imm` that is a multiple of `mul` with `bits` number of bits,
+// for bit locations `hi-lo`.
+template <const size_t hi, const size_t lo, const ssize_t mul>
+static code_t insEncodeSimm_MultipleOf(ssize_t imm)
+{
+    constexpr size_t bits = hi - lo + 1;
+    assert((isValidSimm_MultipleOf<bits, mul>(imm)));
+    return insEncodeSimm<hi, lo>(imm / mul);
+}
+
+// Returns the encoding for the immediate value as 9-bits at bit locations '21-16' for high and '12-10' for low.
+static code_t insEncodeSimm9h9l_21_to_16_and_12_to_10(ssize_t imm);
+
+// Returns the encoding for the immediate value as 3-bits at bit locations '23-22' for high and '12' for low.
+static code_t insEncodeUimm3h3l_23_to_22_and_12(ssize_t imm);
 
 // Returns the encoding for the immediate value as 8-bits at bit locations '12-5'.
 static code_t insEncodeImm8_12_to_5(ssize_t imm);
 
-// Returns the encoding for the unsigned immediate value as 3-bits at bit locations '18-16'.
-static code_t insEncodeUimm3_18_to_16(ssize_t imm);
-
 // Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate.
 // This specifically encodes the field 'tszh:tszl' at bit locations '23-22:9-8'.
 static code_t insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift, size_t imm);
@@ -635,6 +750,15 @@ static code_t insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift
 // for an Arm64 Sve instruction.
 static code_t insEncodeSveElemsize_R_22(emitAttr size);
 
+// Returns the immediate value for SVE instructions that encode it as a difference from tszh:tszl:imm3.
+static ssize_t insSveGetImmDiff(const ssize_t imm, const insOpts opt);
+
+// Returns the two 5-bit signed immediates encoded as one ssize_t.
+static ssize_t insSveEncodeTwoSimm5(ssize_t imm1, ssize_t imm2);
+
+// Decodes imm into two 5-bit signed immediates, using the encoding format from insSveEncodeTwoSimm5.
+static void insSveDecodeTwoSimm5(ssize_t imm, /* OUT */ ssize_t* const imm1, /* OUT */ ssize_t* const imm2);
+
 // Returns the encoding to select an insSvePattern
 static code_t insEncodeSvePattern(insSvePattern pattern);
 
@@ -650,143 +774,172 @@ static bool isStackRegister(regNumber reg)
     return (reg == REG_ZR) || (reg == REG_FP);
 } // ZR (R31) encodes the SP register
 
-// Returns true if 'value' is a legal signed immediate 4 bit encoding (such as for LDNF1SW).
-static bool isValidSimm4(ssize_t value)
-{
-    return (-8 <= value) && (value <= 7);
-};
-
-// Returns true if 'value' is a legal signed immediate 9 bit encoding (such as for LDR).
-static bool isValidSimm9(ssize_t value)
-{
-    return (-256 <= value) && (value <= 255);
-};
-
-// Returns true if 'value' is a legal signed multiple of 2 immediate 4 bit encoding (such as for LD2Q).
-static bool isValidSimm4_MultipleOf2(ssize_t value)
+// Returns true if 'value' is a legal unsigned immediate with 'bits' number of bits.
+template <const size_t bits>
+static bool isValidUimm(ssize_t value)
 {
-    return (-16 <= value) && (value <= 14) && (value % 2 == 0);
-};
+    constexpr size_t max = 1 << bits;
+    return (0 <= value) && (value < max);
+}
 
-// Returns true if 'value' is a legal signed multiple of 3 immediate 4 bit encoding (such as for LD3Q).
-static bool isValidSimm4_MultipleOf3(ssize_t value)
+// Returns true if 'value' is a legal unsigned immediate with 'bits' number of bits, starting from 1.
+template <const size_t bits>
+static bool isValidUimmFrom1(ssize_t value)
 {
-    return (-24 <= value) && (value <= 21) && (value % 3 == 0);
-};
+    return isValidUimm<bits>(value - 1);
+}
 
-// Returns true if 'value' is a legal signed multiple of 4 immediate 4 bit encoding (such as for LD4Q).
-static bool isValidSimm4_MultipleOf4(ssize_t value)
+// Returns true if 'value' is a legal unsigned multiple of 'mod' immediate with 'bits' number of bits.
+template <const size_t bits, const size_t mod>
+static bool isValidUimm_MultipleOf(ssize_t value)
 {
-    return (-32 <= value) && (value <= 28) && (value % 4 == 0);
-};
+    static_assert(mod != 0);
+    return isValidUimm<bits>(value / mod) && (value % mod == 0);
+}
 
-// Returns true if 'value' is a legal signed multiple of 16 immediate 4 bit encoding (such as for LD1RQB).
-static bool isValidSimm4_MultipleOf16(ssize_t value)
+// Returns true if 'value' is a legal signed immediate with 'bits' number of bits.
+template <const size_t bits>
+static bool isValidSimm(ssize_t value)
 {
-    return (-128 <= value) && (value <= 112) && (value % 16 == 0);
-};
+    constexpr ssize_t max = 1 << (bits - 1);
+    return (-max <= value) && (value < max);
+}
 
-// Returns true if 'value' is a legal signed multiple of 32 immediate 4 bit encoding (such as for LD1ROB).
-static bool isValidSimm4_MultipleOf32(ssize_t value)
+// Returns true if 'value' is a legal signed multiple of 'mod' immediate with 'bits' number of bits.
+template <const size_t bits, const ssize_t mod>
+static bool isValidSimm_MultipleOf(ssize_t value)
 {
-    return (-256 <= value) && (value <= 224) && (value % 32 == 0);
-};
+    static_assert(mod != 0);
+    return isValidSimm<bits>(value / mod) && (value % mod == 0);
+}
 
-// Returns true if 'value' is a legal immediate 1 bit encoding (such as for PEXT).
-static bool isValidImm1(ssize_t value)
+// Returns true if 'imm' is a valid broadcast immediate for some SVE DUP variants
+static bool isValidBroadcastImm(ssize_t imm, emitAttr laneSize)
 {
-    return (value == 0) || (value == 1);
-};
+    // imm fits within 0 <= imm < 2**(7 - (log2(bytes_in_lane) + 1))
+    // e.g. for B => imm < 2**6, H => imm < 2**5, ...
+    ssize_t max = 0;
+    switch (laneSize)
+    {
+        case EA_16BYTE:
+            max = 4;
+            break;
+        case EA_8BYTE:
+            max = 8;
+            break;
+        case EA_4BYTE:
+            max = 16;
+            break;
+        case EA_2BYTE:
+            max = 32;
+            break;
+        case EA_1BYTE:
+            max = 64;
+            break;
+        default:
+            unreached();
+    };
 
-// Returns true if 'value' is a legal unsigned immediate 2 bit encoding (such as for PEXT).
-static bool isValidUimm2(ssize_t value)
-{
-    return (0 <= value) || (value <= 3);
-};
+    return (imm >= 0) && (imm < max);
+}
 
-// Returns true if 'value' is a legal unsigned immediate 3 bit encoding.
-static bool isValidUimm3(ssize_t value)
+// Returns true if 'value' is a legal rotation value (such as for CDOT, CMLA).
+static bool isValidRot(ssize_t value)
 {
-    return (0 <= value) && (value <= 7);
-};
+    return (value == 0) || (value == 90) || (value == 180) || (value == 270);
+}
 
-// Returns true if 'value' is a legal unsigned immediate 4 bit encoding.
-static bool isValidUimm4(ssize_t value)
+// Returns true if 'value' represents a valid 'bitmask immediate' encoding.
+static bool isValidImmNRS(size_t value, emitAttr size)
 {
-    return (0 <= value) && (value <= 15);
-};
+    return (value >= 0) && (value < 0x2000);
+} // any unsigned 13-bit immediate
 
-// Returns true if 'value' is a legal unsigned immediate 4 bit encoding, starting from 1 (such as for CNTB).
-static bool isValidUimm4From1(ssize_t value)
+// Returns one of the following patterns, depending on width, where `mn` is imm:
+// 0xFFFFFFFFFFFFFFmn, 0xFFFFFFmnFFFFFFmn, 0xFFmnFFmnFFmnFFmn,
+// 0xFFFFFFFFFFFFmnFF, 0xFFFFmnFFFFFFmnFF, 0xmnFFmnFFmnFFmnFF,
+// 0xmnmnmnmnmnmnmnmn
+static ssize_t getBitMaskOnes(const ssize_t imm, const unsigned width)
 {
-    return (1 <= value) && (value <= 16);
-};
+    assert(isValidUimm<16>(imm));
+    assert((width % 8) == 0);
+    assert(isValidGeneralLSDatasize((emitAttr)(width / 8)));
+    const unsigned immWidth = isValidUimm<8>(imm) ? 8 : 16;
 
-// Returns true if 'value' is a legal unsigned immediate 5 bit encoding (such as for CCMP).
-static bool isValidUimm5(ssize_t value)
-{
-    return (0 <= value) && (value <= 0x1FLL);
-};
+    const unsigned numIterations = 64 / width;
+    const ssize_t  ones          = ((UINT64)-1) >> (64 - width + immWidth);
+    ssize_t        mask          = 0;
 
-// Returns true if 'value' is a legal unsigned immediate 7 bit encoding (such as for CMPLT, CMPNE).
-static bool isValidUimm7(ssize_t value)
-{
-    return (0 <= value) && (value <= 0x7FLL);
-};
+    for (unsigned i = 0; i < numIterations; i++)
+    {
+        mask <<= width;
+        mask |= (ones << immWidth) | imm;
+    }
 
-// Returns true if 'value' is a legal unsigned immediate 8 bit encoding (such as for FMOV).
-static bool isValidUimm8(ssize_t value)
-{
-    return (0 <= value) && (value <= 0xFFLL);
-};
+    return mask;
+}
 
-// Returns true if 'value' is a legal signed immediate 8 bit encoding (such as for SMAX, SMIN).
-static bool isValidSimm8(ssize_t value)
+// Returns one of the following patterns, depending on width, where `mn` is imm:
+// 0x00000000000000mn, 0x000000mn000000mn, 0x00mn00mn00mn00mn,
+// 0x000000000000mn00, 0x0000mn000000mn00, 0xmn00mn00mn00mn00,
+// 0xmnmnmnmnmnmnmnmn
+static ssize_t getBitMaskZeroes(const ssize_t imm, const unsigned width)
 {
-    return (-128 <= value) && (value <= 127);
-};
+    assert(isValidUimm<16>(imm));
+    assert((width % 8) == 0);
+    assert(isValidGeneralLSDatasize((emitAttr)(width / 8)));
+    const unsigned numIterations = 64 / width;
+    ssize_t        mask          = 0;
 
-// Returns true if 'value' is a legal unsigned immediate 12 bit encoding (such as for CMP, CMN).
-static bool isValidUimm12(ssize_t value)
-{
-    return (0 <= value) && (value <= 0xFFFLL);
-};
+    for (unsigned i = 0; i < numIterations; i++)
+    {
+        mask <<= width;
+        mask |= imm;
+    }
 
-// Returns true if 'value' is a legal unsigned immediate 16 bit encoding (such as for MOVZ, MOVN, MOVK).
-static bool isValidUimm16(ssize_t value)
-{
-    return (0 <= value) && (value <= 0xFFFFLL);
-};
+    return mask;
+}
 
-// Returns true if 'value' is a legal signed immediate 26 bit encoding (such as for B or BL).
-static bool isValidSimm26(ssize_t value)
+// For the IF_SVE_BT_1A encoding, we prefer the DUPM disasm for the following immediate patterns,
+// where 'mn' is some nonzero value:
+// 0xFFFFFFFFFFFFFFmn, 0x00000000000000mn, 0xFFFFFFFFFFFFmn00, 0x000000000000mn00
+// 0xFFFFFFmnFFFFFFmn, 0x000000mn000000mn, 0xFFFFmn00FFFFmn00, 0x0000mn000000mn00
+// 0xFFmnFFmnFFmnFFmn, 0x00mn00mn00mn00mn, 0xmn00mn00mn00mn00
+// 0xmnmnmnmnmnmnmnmn
+// Else, we prefer the MOV disasm.
+static bool useMovDisasmForBitMask(const ssize_t value)
 {
-    return (-0x2000000LL <= value) && (value <= 0x1FFFFFFLL);
-};
+    ssize_t  imm = value & 0xFF;
+    unsigned minFieldSize;
 
-// Returns true if 'value' is a legal signed immediate 19 bit encoding (such as for B.cond, CBNZ, CBZ).
-static bool isValidSimm19(ssize_t value)
-{
-    return (-0x40000LL <= value) && (value <= 0x3FFFFLL);
-};
+    if (imm == 0)
+    {
+        imm          = value & 0xFF00;
+        minFieldSize = 16;
+    }
+    else
+    {
+        minFieldSize = 8;
+    }
 
-// Returns true if 'value' is a legal signed immediate 14 bit encoding (such as for TBNZ, TBZ).
-static bool isValidSimm14(ssize_t value)
-{
-    return (-0x2000LL <= value) && (value <= 0x1FFFLL);
-};
+    assert(isValidUimm<16>(imm));
 
-// Returns true if 'value' is a legal signed immediate 5 bit encoding (such as for CMPLO, CMPHI).
-static bool isValidSimm5(ssize_t value)
-{
-    return (-0x10LL <= value) && (value <= 0xFLL);
-};
+    // Check for all possible bit field sizes
+    for (unsigned width = minFieldSize; width <= 64; width <<= 1)
+    {
+        if (value == getBitMaskZeroes(imm, width))
+        {
+            return false;
+        }
+
+        if (value == getBitMaskOnes(imm, width))
+        {
+            return false;
+        }
+    }
 
-// Returns true if 'value' represents a valid 'bitmask immediate' encoding.
-static bool isValidImmNRS(size_t value, emitAttr size)
-{
-    return (value >= 0) && (value < 0x2000);
-} // any unsigned 13-bit immediate
+    return true;
+}
 
 // Returns true if 'value' represents a valid 'halfword immediate' encoding.
 static bool isValidImmHWVal(size_t value, emitAttr size)
@@ -821,12 +974,19 @@ static insOpts optMakeArrangement(emitAttr datasize, emitAttr elemsize);
 //    For the given 'datasize' and 'opt' returns true if it specifies a valid vector register arrangement
 static bool isValidArrangement(emitAttr datasize, insOpts opt);
 
+// Expands an option that has different size operands (INS_OPTS_*_TO_*) into a pair of scalable options where
+// the first describes the size of the destination operand and the second describes the size of the source operand.
+static void optExpandConversionPair(insOpts opt, insOpts& dst, insOpts& src);
+
 //  For the given 'arrangement' returns the 'datasize' specified by the vector register arrangement
 static emitAttr optGetDatasize(insOpts arrangement);
 
 //  For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement
 static emitAttr optGetElemsize(insOpts arrangement);
 
+//  For the given 'elemsize' returns the 'arrangement' when used in a SVE vector register arrangement.
+static insOpts optGetSveInsOpt(emitAttr elemsize);
+
 //  For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement
 static emitAttr optGetSveElemsize(insOpts arrangement);
 
@@ -903,9 +1063,6 @@ static bool canEncodeWithShiftImmBy12(INT64 imm);
 // Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
 static INT64 normalizeImm64(INT64 imm, emitAttr size);
 
-// Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
-static INT32 normalizeImm32(INT32 imm, emitAttr size);
-
 // true if 'imm' can be encoded using a 'bitmask immediate', also returns the encoding if wbBMI is non-null
 static bool canEncodeBitMaskImm(INT64 imm, emitAttr size, emitter::bitMaskImm* wbBMI = nullptr);
 
@@ -1023,6 +1180,11 @@ inline static bool isVectorRegister(regNumber reg)
     return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST);
 }
 
+inline static bool isLowVectorRegister(regNumber reg)
+{
+    return (reg >= FIRST_FP_ARGREG) && (reg <= LAST_FP_ARGREG);
+}
+
 inline static bool isFloatReg(regNumber reg)
 {
     return isVectorRegister(reg);
@@ -1043,6 +1205,23 @@ inline static bool isHighPredicateRegister(regNumber reg)
     return (reg >= REG_PREDICATE_HIGH_FIRST) && (reg <= REG_PREDICATE_HIGH_LAST);
 }
 
+inline static bool isEvenRegister(regNumber reg)
+{
+    if (isGeneralRegister(reg))
+    {
+        return ((reg - REG_INT_FIRST) % 2 == 0);
+    }
+    else if (isVectorRegister(reg))
+    {
+        return ((reg - REG_FP_FIRST) % 2) == 0;
+    }
+    else
+    {
+        assert(isPredicateRegister(reg));
+        return ((reg - REG_PREDICATE_FIRST) % 2) == 0;
+    }
+}
+
 inline static bool insOptsNone(insOpts opt)
 {
     return (opt == INS_OPTS_NONE);
@@ -1124,6 +1303,11 @@ inline static bool insOptsAnyArrangement(insOpts opt)
     return ((opt >= INS_OPTS_8B) && (opt <= INS_OPTS_2D));
 }
 
+inline static bool insOptsConvertFloatStepwise(insOpts opt)
+{
+    return (opt == INS_OPTS_H_TO_S || opt == INS_OPTS_S_TO_H || opt == INS_OPTS_D_TO_S || opt == INS_OPTS_S_TO_D);
+}
+
 inline static bool insOptsConvertFloatToFloat(insOpts opt)
 {
     return ((opt >= INS_OPTS_S_TO_D) && (opt <= INS_OPTS_D_TO_H));
@@ -1253,8 +1437,12 @@ void emitIns(instruction ins);
 
 void emitIns_I(instruction ins, emitAttr attr, ssize_t imm);
 
+void emitInsSve_I(instruction ins, emitAttr attr, ssize_t imm);
+
 void emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts opt = INS_OPTS_NONE);
 
+void emitInsSve_R(instruction ins, emitAttr attr, regNumber reg, insOpts opt = INS_OPTS_NONE);
+
 void emitIns_R_I(instruction     ins,
                  emitAttr        attr,
                  regNumber       reg,
@@ -1263,8 +1451,17 @@ void emitIns_R_I(instruction     ins,
                  insScalableOpts sopt = INS_SCALABLE_OPTS_NONE DEBUGARG(size_t targetHandle = 0)
                      DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
 
+void emitInsSve_R_I(instruction     ins,
+                    emitAttr        attr,
+                    regNumber       reg,
+                    ssize_t         imm,
+                    insOpts         opt  = INS_OPTS_NONE,
+                    insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);
+
 void emitIns_R_F(instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt = INS_OPTS_NONE);
 
+void emitInsSve_R_F(instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt = INS_OPTS_NONE);
+
 void emitIns_Mov(
     instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt = INS_OPTS_NONE);
 
@@ -1275,6 +1472,13 @@ void emitIns_R_R(instruction     ins,
                  insOpts         opt  = INS_OPTS_NONE,
                  insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);
 
+void emitInsSve_R_R(instruction     ins,
+                    emitAttr        attr,
+                    regNumber       reg1,
+                    regNumber       reg2,
+                    insOpts         opt  = INS_OPTS_NONE,
+                    insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);
+
 void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags)
 {
     emitIns_R_R(ins, attr, reg1, reg2);
@@ -1288,6 +1492,9 @@ void emitIns_R_I_I(instruction ins,
                    insOpts opt = INS_OPTS_NONE DEBUGARG(size_t targetHandle = 0)
                        DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
 
+void emitInsSve_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg1, ssize_t imm1, ssize_t imm2, insOpts opt = INS_OPTS_NONE);
+
 void emitIns_R_R_I(instruction     ins,
                    emitAttr        attr,
                    regNumber       reg1,
@@ -1296,9 +1503,20 @@ void emitIns_R_R_I(instruction     ins,
                    insOpts         opt  = INS_OPTS_NONE,
                    insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);
 
+void emitInsSve_R_R_I(instruction     ins,
+                      emitAttr        attr,
+                      regNumber       reg1,
+                      regNumber       reg2,
+                      ssize_t         imm,
+                      insOpts         opt  = INS_OPTS_NONE,
+                      insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);
+
 void emitIns_R_R_F(
     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, double immDbl, insOpts opt = INS_OPTS_NONE);
 
+void emitInsSve_R_R_F(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, double immDbl, insOpts opt = INS_OPTS_NONE);
+
 // Checks for a large immediate that needs a second instruction
 void emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm);
 
@@ -1310,14 +1528,32 @@ void emitIns_R_R_R(instruction     ins,
                    insOpts         opt  = INS_OPTS_NONE,
                    insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);
 
-void emitIns_R_R_R_I(instruction ins,
-                     emitAttr    attr,
-                     regNumber   reg1,
-                     regNumber   reg2,
-                     regNumber   reg3,
-                     ssize_t     imm,
-                     insOpts     opt      = INS_OPTS_NONE,
-                     emitAttr    attrReg2 = EA_UNKNOWN);
+void emitInsSve_R_R_R(instruction     ins,
+                      emitAttr        attr,
+                      regNumber       reg1,
+                      regNumber       reg2,
+                      regNumber       reg3,
+                      insOpts         opt  = INS_OPTS_NONE,
+                      insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);
+
+void emitIns_R_R_R_I(instruction     ins,
+                     emitAttr        attr,
+                     regNumber       reg1,
+                     regNumber       reg2,
+                     regNumber       reg3,
+                     ssize_t         imm,
+                     insOpts         opt      = INS_OPTS_NONE,
+                     emitAttr        attrReg2 = EA_UNKNOWN,
+                     insScalableOpts sopt     = INS_SCALABLE_OPTS_NONE);
+
+void emitInsSve_R_R_R_I(instruction     ins,
+                        emitAttr        attr,
+                        regNumber       reg1,
+                        regNumber       reg2,
+                        regNumber       reg3,
+                        ssize_t         imm,
+                        insOpts         opt  = INS_OPTS_NONE,
+                        insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);
 
 void emitIns_R_R_R_I_I(instruction ins,
                        emitAttr    attr,
@@ -1328,6 +1564,15 @@ void emitIns_R_R_R_I_I(instruction ins,
                        ssize_t     imm2,
                        insOpts     opt);
 
+void emitInsSve_R_R_R_I_I(instruction ins,
+                          emitAttr    attr,
+                          regNumber   reg1,
+                          regNumber   reg2,
+                          regNumber   reg3,
+                          ssize_t     imm1,
+                          ssize_t     imm2,
+                          insOpts     opt);
+
 void emitIns_R_R_R_Ext(instruction ins,
                        emitAttr    attr,
                        regNumber   reg1,
@@ -1366,6 +1611,15 @@ void emitIns_R_R_R_R_I(instruction ins,
                        ssize_t     imm,
                        insOpts     opt = INS_OPTS_NONE);
 
+void emitInsSve_R_R_R_R_I(instruction ins,
+                          emitAttr    attr,
+                          regNumber   reg1,
+                          regNumber   reg2,
+                          regNumber   reg3,
+                          regNumber   reg4,
+                          ssize_t     imm,
+                          insOpts     opt = INS_OPTS_NONE);
+
 void emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond);
 
 void emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond);
@@ -1375,12 +1629,30 @@ void emitIns_R_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumbe
 void emitIns_R_R_FLAGS_COND(
     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCflags flags, insCond cond);
 
-void emitIns_R_I_FLAGS_COND(instruction ins, emitAttr attr, regNumber reg1, int imm, insCflags flags, insCond cond);
+void emitIns_R_I_FLAGS_COND(instruction ins, emitAttr attr, regNumber reg1, ssize_t imm, insCflags flags, insCond cond);
 
 void emitIns_R_PATTERN(
     instruction ins, emitAttr attr, regNumber reg1, insOpts opt, insSvePattern pattern = SVE_PATTERN_ALL);
 
-void emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1, insSvePattern pattern, int imm);
+void emitIns_R_PATTERN_I(
+    instruction ins, emitAttr attr, regNumber reg1, insSvePattern pattern, ssize_t imm, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_PRFOP_R_R_R(instruction     ins,
+                         emitAttr        attr,
+                         insSvePrfop     prfop,
+                         regNumber       reg1,
+                         regNumber       reg2,
+                         regNumber       reg3,
+                         insOpts         opt  = INS_OPTS_NONE,
+                         insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);
+
+void emitIns_PRFOP_R_R_I(instruction ins,
+                         emitAttr    attr,
+                         insSvePrfop prfop,
+                         regNumber   reg1,
+                         regNumber   reg2,
+                         int         imm,
+                         insOpts     opt = INS_OPTS_NONE);
 
 void emitIns_BARR(instruction ins, insBarrier barrier);
 
@@ -1403,7 +1675,7 @@ void emitIns_R_R_R_I_LdStPair(instruction ins,
                               int         varx1 = -1,
                               int         varx2 = -1,
                               int         offs1 = -1,
-                              int offs2 = -1 DEBUG_ARG(unsigned var1RefsOffs = BAD_IL_OFFSET)
+                              int         offs2 = -1 DEBUG_ARG(unsigned var1RefsOffs = BAD_IL_OFFSET)
                                               DEBUG_ARG(unsigned var2RefsOffs = BAD_IL_OFFSET));
 
 void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
@@ -1432,9 +1704,14 @@ void emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int of
 
 void emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs);
 
-void emitIns_R_AI(instruction ins,
-                  emitAttr    attr,
-                  regNumber   ireg,
+void emitIns_Adrp_Ldr_Add(emitAttr     attr,
+                          regNumber    reg1,
+                          regNumber    reg2,
+                          ssize_t addr DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
+
+void emitIns_R_AI(instruction  ins,
+                  emitAttr     attr,
+                  regNumber    ireg,
                   ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
 
 void emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs);
@@ -1470,13 +1747,13 @@ void emitIns_Call(EmitCallType          callType,
                   ssize_t          disp,
                   bool             isJump);
 
-BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i);
+BYTE*    emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i);
 unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* i, code_t code);
-BYTE* emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id);
-BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id);
-BYTE* emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg);
-BYTE* emitOutputShortConstant(
-    BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg, emitAttr opSize);
+BYTE*    emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id);
+BYTE*    emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id);
+BYTE*    emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg);
+BYTE*    emitOutputShortConstant(
+       BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg, emitAttr opSize);
 BYTE* emitOutputVectorConstant(
     BYTE* dst, ssize_t distVal, regNumber dstReg, regNumber addrReg, emitAttr opSize, emitAttr elemSize);
 
@@ -1523,9 +1800,9 @@ inline bool emitIsLoadLabel(instrDesc* jmp)
 }
 
 /*****************************************************************************
-*
-*  Given a instrDesc, return true if it's a load constant instruction.
-*/
+ *
+ *  Given a instrDesc, return true if it's a load constant instruction.
+ */
 
 inline bool emitIsLoadConstant(instrDesc* jmp)
 {
diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp
new file mode 100644
index 000000000000..1b90b26ba799
--- /dev/null
+++ b/src/coreclr/jit/emitarm64sve.cpp
@@ -0,0 +1,18400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                             emitArm64sve.cpp                              XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef TARGET_ARM64
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#include "instr.h"
+
+/*****************************************************************************/
+
+// clang-format off
+static const char * const  zRegNames[] =
+{
+    "z0",  "z1",  "z2",  "z3",  "z4",
+    "z5",  "z6",  "z7",  "z8",  "z9",
+    "z10", "z11", "z12", "z13", "z14",
+    "z15", "z16", "z17", "z18", "z19",
+    "z20", "z21", "z22", "z23", "z24",
+    "z25", "z26", "z27", "z28", "z29",
+    "z30", "z31"
+};
+
+static const char * const  pRegNames[] =
+{
+    "p0",  "p1",  "p2",  "p3",  "p4",
+    "p5",  "p6",  "p7",  "p8",  "p9",
+    "p10", "p11", "p12", "p13", "p14",
+    "p15"
+};
+
+static const char * const  pnRegNames[] =
+{
+    "pn0",  "pn1",  "pn2",  "pn3",  "pn4",
+    "pn5",  "pn6",  "pn7",  "pn8",  "pn9",
+    "pn10", "pn11", "pn12", "pn13", "pn14",
+    "pn15"
+};
+
+static const char * const  svePatternNames[] =
+{
+    "pow2", "vl1", "vl2", "vl3",
+    "vl4", "vl5", "vl6", "vl7",
+    "vl8", "vl16", "vl32", "vl64",
+    "vl128", "vl256", "invalid", "invalid",
+    "invalid", "invalid", "invalid", "invalid",
+    "invalid", "invalid", "invalid", "invalid",
+    "invalid", "invalid", "invalid", "invalid",
+    "invalid", "mul4", "mul3", "all"
+};
+
+// clang-format on
+
+/*****************************************************************************
+ *
+ *  Returns the specific encoding of the given CPU instruction and format
+ */
+
+emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt)
+{
+    // clang-format off
+    const static code_t insCodes1[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) e1,
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) e1,
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) e1,
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) e1,
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) e1,
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) e1,
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e1,
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e1,
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e1,
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e1,
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e1,
+        #include "instrsarm64sve.h"
+    };
+
+    const static code_t insCodes2[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) 
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) e2,
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) e2,
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) e2,
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) e2,
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) e2,
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e2,
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e2,
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e2,
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e2,
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e2,
+        #include "instrsarm64sve.h"
+    };
+
+    const static code_t insCodes3[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) 
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) e3,
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) e3,
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) e3,
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) e3,
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e3,
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e3,
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e3,
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e3,
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e3,
+        #include "instrsarm64sve.h"
+    };
+
+    const static code_t insCodes4[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) 
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) e4,
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) e4,
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) e4,
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e4,
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e4,
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e4,
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e4,
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e4,
+        #include "instrsarm64sve.h"
+    };
+
+    const static code_t insCodes5[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) 
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) e5,
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) e5,
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e5,
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e5,
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e5,
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e5,
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e5,
+        #include "instrsarm64sve.h"
+    };
+
+    const static code_t insCodes6[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) 
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) e6,
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e6,
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e6,
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e6,
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e6,
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e6,
+        #include "instrsarm64sve.h"
+    };
+
+    const static code_t insCodes7[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) 
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) e7,
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e7,
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e7,
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e7,
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e7,
+        #include "instrsarm64sve.h"
+    };
+
+    const static code_t insCodes8[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) 
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) 
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) e8,
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e8,
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e8,
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e8,
+        #include "instrsarm64sve.h"
+    };
+
+    const static code_t insCodes9[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) 
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) 
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) 
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) e9,
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e9,
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e9,
+        #include "instrsarm64sve.h"
+    };
+
+    const static code_t insCodes10[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) 
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) 
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) 
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) 
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e10,
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e10,
+        #include "instrsarm64sve.h"
+    };
+
+    const static code_t insCodes11[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) 
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) 
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) 
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) 
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) e11,
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e11,
+        #include "instrsarm64sve.h"
+    };
+
+    const static code_t insCodes12[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) 
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) 
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) 
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) 
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) 
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e12,
+        #include "instrsarm64sve.h"
+    };
+
+    const static code_t insCodes13[] =
+    {
+        #define   INST1(id, nm, info, fmt, e1                                                       ) 
+        #define   INST2(id, nm, info, fmt, e1, e2                                                   ) 
+        #define   INST3(id, nm, info, fmt, e1, e2, e3                                               ) 
+        #define   INST4(id, nm, info, fmt, e1, e2, e3, e4                                           ) 
+        #define   INST5(id, nm, info, fmt, e1, e2, e3, e4, e5                                       ) 
+        #define   INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6                                   ) 
+        #define   INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7                               ) 
+        #define   INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8                           ) 
+        #define   INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9                       ) 
+        #define  INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11             ) 
+        #define  INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13   ) e13,
+        #include "instrsarm64sve.h"
+    };
+
+    // clang-format on
+    const static insFormat formatEncode13A[13] = {IF_SVE_AU_3A, IF_SVE_BT_1A, IF_SVE_BV_2A,   IF_SVE_BV_2A_J,
+                                                  IF_SVE_BW_2A, IF_SVE_CB_2A, IF_SVE_CP_3A,   IF_SVE_CQ_3A,
+                                                  IF_SVE_CW_4A, IF_SVE_CZ_4A, IF_SVE_CZ_4A_K, IF_SVE_CZ_4A_L,
+                                                  IF_SVE_EB_1A};
+    const static insFormat formatEncode11A[11] = {IF_SVE_JD_4B,   IF_SVE_JD_4C,   IF_SVE_JI_3A_A, IF_SVE_JJ_4A,
+                                                  IF_SVE_JJ_4A_B, IF_SVE_JJ_4A_C, IF_SVE_JJ_4A_D, IF_SVE_JJ_4B,
+                                                  IF_SVE_JJ_4B_E, IF_SVE_JN_3B,   IF_SVE_JN_3C};
+    const static insFormat formatEncode9A[9]   = {IF_SVE_HW_4A,   IF_SVE_HW_4A_A, IF_SVE_HW_4A_B,
+                                                  IF_SVE_HW_4A_C, IF_SVE_HW_4B,   IF_SVE_HW_4B_D,
+                                                  IF_SVE_HX_3A_E, IF_SVE_IJ_3A_F, IF_SVE_IK_4A_G};
+    const static insFormat formatEncode9B[9]   = {IF_SVE_HW_4A,   IF_SVE_HW_4A_A, IF_SVE_HW_4A_B,
+                                                  IF_SVE_HW_4A_C, IF_SVE_HW_4B,   IF_SVE_HW_4B_D,
+                                                  IF_SVE_HX_3A_E, IF_SVE_IJ_3A_G, IF_SVE_IK_4A_I};
+    const static insFormat formatEncode9C[9]   = {IF_SVE_HW_4A,   IF_SVE_HW_4A_A, IF_SVE_HW_4A_B,
+                                                  IF_SVE_HW_4A_C, IF_SVE_HW_4B,   IF_SVE_HW_4B_D,
+                                                  IF_SVE_HX_3A_E, IF_SVE_IH_3A_F, IF_SVE_II_4A_H};
+    const static insFormat formatEncode9D[9]   = {IF_SVE_IH_3A,   IF_SVE_IH_3A_A, IF_SVE_II_4A,
+                                                  IF_SVE_II_4A_B, IF_SVE_IU_4A,   IF_SVE_IU_4A_C,
+                                                  IF_SVE_IU_4B,   IF_SVE_IU_4B_D, IF_SVE_IV_3A};
+    const static insFormat formatEncode9E[9]   = {IF_SVE_JD_4A,   IF_SVE_JI_3A_A, IF_SVE_JJ_4A,
+                                                  IF_SVE_JJ_4A_B, IF_SVE_JJ_4A_C, IF_SVE_JJ_4A_D,
+                                                  IF_SVE_JJ_4B,   IF_SVE_JJ_4B_E, IF_SVE_JN_3A};
+    const static insFormat formatEncode9F[9]   = {IF_SVE_JD_4C,   IF_SVE_JD_4C_A, IF_SVE_JJ_4A,
+                                                  IF_SVE_JJ_4A_B, IF_SVE_JJ_4B,   IF_SVE_JJ_4B_C,
+                                                  IF_SVE_JL_3A,   IF_SVE_JN_3C,   IF_SVE_JN_3C_D};
+    const static insFormat formatEncode8A[8]   = {IF_SVE_CE_2A, IF_SVE_CE_2B, IF_SVE_CE_2C, IF_SVE_CE_2D,
+                                                  IF_SVE_CF_2A, IF_SVE_CF_2B, IF_SVE_CF_2C, IF_SVE_CF_2D};
+    const static insFormat formatEncode8B[8]   = {IF_SVE_HW_4A, IF_SVE_HW_4A_A, IF_SVE_HW_4A_B, IF_SVE_HW_4A_C,
+                                                  IF_SVE_HW_4B, IF_SVE_HW_4B_D, IF_SVE_HX_3A_E, IF_SVE_IG_4A_F};
+    const static insFormat formatEncode8C[8]   = {IF_SVE_HW_4A, IF_SVE_HW_4A_A, IF_SVE_HW_4A_B, IF_SVE_HW_4A_C,
+                                                  IF_SVE_HW_4B, IF_SVE_HW_4B_D, IF_SVE_HX_3A_E, IF_SVE_IG_4A_G};
+    const static insFormat formatEncode7A[7]   = {IF_SVE_IJ_3A, IF_SVE_IK_4A,   IF_SVE_IU_4A, IF_SVE_IU_4A_A,
+                                                  IF_SVE_IU_4B, IF_SVE_IU_4B_B, IF_SVE_IV_3A};
+    const static insFormat formatEncode6A[6]   = {IF_SVE_AE_3A, IF_SVE_BD_3A, IF_SVE_EE_1A,
+                                                  IF_SVE_FD_3A, IF_SVE_FD_3B, IF_SVE_FD_3C};
+    const static insFormat formatEncode6B[6]   = {IF_SVE_GY_3A, IF_SVE_GY_3B,   IF_SVE_GY_3B_D,
+                                                  IF_SVE_HA_3A, IF_SVE_HA_3A_E, IF_SVE_HA_3A_F};
+    const static insFormat formatEncode6C[6]   = {IF_SVE_HW_4A,   IF_SVE_HW_4A_A, IF_SVE_HW_4B,
+                                                  IF_SVE_HX_3A_B, IF_SVE_IJ_3A_D, IF_SVE_IK_4A_F};
+    const static insFormat formatEncode6D[6]   = {IF_SVE_HW_4A,   IF_SVE_HW_4A_A, IF_SVE_HW_4B,
+                                                  IF_SVE_HX_3A_B, IF_SVE_IJ_3A_E, IF_SVE_IK_4A_H};
+    const static insFormat formatEncode6E[6]   = {IF_SVE_HY_3A,   IF_SVE_HY_3A_A, IF_SVE_HY_3B,
+                                                  IF_SVE_HZ_2A_B, IF_SVE_IA_2A,   IF_SVE_IB_3A};
+    const static insFormat formatEncode6F[6]   = {IF_SVE_IG_4A, IF_SVE_IU_4A,   IF_SVE_IU_4A_A,
+                                                  IF_SVE_IU_4B, IF_SVE_IU_4B_B, IF_SVE_IV_3A};
+    const static insFormat formatEncode6G[6]   = {IF_SVE_JD_4A,   IF_SVE_JI_3A_A, IF_SVE_JK_4A,
+                                                  IF_SVE_JK_4A_B, IF_SVE_JK_4B,   IF_SVE_JN_3A};
+    const static insFormat formatEncode5A[5]   = {IF_SVE_AM_2A, IF_SVE_AN_3A, IF_SVE_AO_3A, IF_SVE_BF_2A, IF_SVE_BG_3A};
+    const static insFormat formatEncode5B[5]   = {IF_SVE_GX_3A, IF_SVE_GX_3B, IF_SVE_HK_3A, IF_SVE_HL_3A, IF_SVE_HM_2A};
+    const static insFormat formatEncode5C[5]   = {IF_SVE_EF_3A, IF_SVE_EG_3A, IF_SVE_EH_3A, IF_SVE_EY_3A, IF_SVE_EY_3B};
+    const static insFormat formatEncode5D[5]   = {IF_SVE_HW_4A, IF_SVE_HW_4A_A, IF_SVE_HW_4B, IF_SVE_HX_3A_B,
+                                                  IF_SVE_IG_4A_D};
+    const static insFormat formatEncode5E[5]   = {IF_SVE_HW_4A, IF_SVE_HW_4A_A, IF_SVE_HW_4B, IF_SVE_HX_3A_B,
+                                                  IF_SVE_IG_4A_E};
+    const static insFormat formatEncode4A[4]   = {IF_SVE_AA_3A, IF_SVE_AU_3A, IF_SVE_BS_1A, IF_SVE_CZ_4A};
+    const static insFormat formatEncode4B[4]   = {IF_SVE_BU_2A, IF_SVE_BV_2B, IF_SVE_EA_1A, IF_SVE_EB_1B};
+    const static insFormat formatEncode4E[4]   = {IF_SVE_BE_3A, IF_SVE_FI_3A, IF_SVE_FI_3B, IF_SVE_FI_3C};
+    const static insFormat formatEncode4F[4]   = {IF_SVE_EM_3A, IF_SVE_FK_3A, IF_SVE_FK_3B, IF_SVE_FK_3C};
+    const static insFormat formatEncode4G[4]   = {IF_SVE_AR_4A, IF_SVE_FF_3A, IF_SVE_FF_3B, IF_SVE_FF_3C};
+    const static insFormat formatEncode4H[4]   = {IF_SVE_GM_3A, IF_SVE_GN_3A, IF_SVE_GZ_3A, IF_SVE_HB_3A};
+    const static insFormat formatEncode4I[4]   = {IF_SVE_AX_1A, IF_SVE_AY_2A, IF_SVE_AZ_2A, IF_SVE_BA_3A};
+    const static insFormat formatEncode4J[4]   = {IF_SVE_BV_2A, IF_SVE_BV_2A_A, IF_SVE_CP_3A, IF_SVE_CQ_3A};
+    const static insFormat formatEncode4K[4]   = {IF_SVE_IF_4A, IF_SVE_IF_4A_A, IF_SVE_IM_3A, IF_SVE_IN_4A};
+    const static insFormat formatEncode4L[4]   = {IF_SVE_IZ_4A, IF_SVE_IZ_4A_A, IF_SVE_JB_4A, IF_SVE_JM_3A};
+    const static insFormat formatEncode3A[3]   = {IF_SVE_AB_3A, IF_SVE_AT_3A, IF_SVE_EC_1A};
+    const static insFormat formatEncode3B[3]   = {IF_SVE_BH_3A, IF_SVE_BH_3B, IF_SVE_BH_3B_A};
+    const static insFormat formatEncode3C[3]   = {IF_SVE_BW_2A, IF_SVE_CB_2A, IF_SVE_EB_1A};
+    const static insFormat formatEncode3D[3]   = {IF_SVE_BR_3A, IF_SVE_BR_3B, IF_SVE_CI_3A};
+    const static insFormat formatEncode3E[3]   = {IF_SVE_AT_3A, IF_SVE_EC_1A, IF_SVE_ET_3A};
+    const static insFormat formatEncode3F[3]   = {IF_SVE_GU_3A, IF_SVE_GU_3B, IF_SVE_HU_4A};
+    const static insFormat formatEncode3G[3]   = {IF_SVE_GH_3A, IF_SVE_GH_3B, IF_SVE_GH_3B_B};
+    const static insFormat formatEncode3H[3]   = {IF_SVE_HK_3A, IF_SVE_HL_3A, IF_SVE_HM_2A};
+    const static insFormat formatEncode3I[3]   = {IF_SVE_CM_3A, IF_SVE_CN_3A, IF_SVE_CO_3A};
+    const static insFormat formatEncode3J[3]   = {IF_SVE_CX_4A, IF_SVE_CX_4A_A, IF_SVE_CY_3A};
+    const static insFormat formatEncode3K[3]   = {IF_SVE_CX_4A, IF_SVE_CX_4A_A, IF_SVE_CY_3B};
+    const static insFormat formatEncode3L[3]   = {IF_SVE_DT_3A, IF_SVE_DX_3A, IF_SVE_DY_3A};
+    const static insFormat formatEncode3M[3]   = {IF_SVE_EJ_3A, IF_SVE_FA_3A, IF_SVE_FA_3B};
+    const static insFormat formatEncode3N[3]   = {IF_SVE_EK_3A, IF_SVE_FB_3A, IF_SVE_FB_3B};
+    const static insFormat formatEncode3O[3]   = {IF_SVE_EK_3A, IF_SVE_FC_3A, IF_SVE_FC_3B};
+    const static insFormat formatEncode3P[3]   = {IF_SVE_EL_3A, IF_SVE_FG_3A, IF_SVE_FG_3B};
+    const static insFormat formatEncode3Q[3]   = {IF_SVE_EO_3A, IF_SVE_FJ_3A, IF_SVE_FJ_3B};
+    const static insFormat formatEncode3R[3]   = {IF_SVE_FE_3A, IF_SVE_FE_3B, IF_SVE_FN_3A};
+    const static insFormat formatEncode3S[3]   = {IF_SVE_FH_3A, IF_SVE_FH_3B, IF_SVE_FN_3A};
+    const static insFormat formatEncode3T[3]   = {IF_SVE_GX_3C, IF_SVE_HK_3B, IF_SVE_HL_3B};
+    const static insFormat formatEncode3U[3]   = {IF_SVE_IM_3A, IF_SVE_IN_4A, IF_SVE_IX_4A};
+    const static insFormat formatEncode3V[3]   = {IF_SVE_JA_4A, IF_SVE_JB_4A, IF_SVE_JM_3A};
+    const static insFormat formatEncode2AA[2]  = {IF_SVE_ID_2A, IF_SVE_IE_2A};
+    const static insFormat formatEncode2AB[2]  = {IF_SVE_JG_2A, IF_SVE_JH_2A};
+    const static insFormat formatEncode2AC[2]  = {IF_SVE_AD_3A, IF_SVE_ED_1A};
+    const static insFormat formatEncode2AD[2]  = {IF_SVE_AB_3B, IF_SVE_AT_3B};
+    const static insFormat formatEncode2AE[2]  = {IF_SVE_CG_2A, IF_SVE_CJ_2A};
+    const static insFormat formatEncode2AF[2]  = {IF_SVE_AE_3A, IF_SVE_BD_3A};
+    const static insFormat formatEncode2AG[2]  = {IF_SVE_BS_1A, IF_SVE_CZ_4A};
+    const static insFormat formatEncode2AH[2]  = {IF_SVE_BQ_2A, IF_SVE_BQ_2B};
+    const static insFormat formatEncode2AI[2]  = {IF_SVE_AM_2A, IF_SVE_EU_3A};
+    const static insFormat formatEncode2AJ[2]  = {IF_SVE_HI_3A, IF_SVE_HT_4A};
+    const static insFormat formatEncode2AK[2]  = {IF_SVE_BZ_3A, IF_SVE_BZ_3A_A};
+    const static insFormat formatEncode2AL[2]  = {IF_SVE_GG_3A, IF_SVE_GG_3B};
+    const static insFormat formatEncode2AM[2]  = {IF_SVE_HL_3A, IF_SVE_HM_2A};
+    const static insFormat formatEncode2AN[2]  = {IF_SVE_EI_3A, IF_SVE_EZ_3A};
+    const static insFormat formatEncode2AO[2]  = {IF_SVE_GT_4A, IF_SVE_GV_3A};
+    const static insFormat formatEncode2AP[2]  = {IF_SVE_GY_3B, IF_SVE_HA_3A};
+    const static insFormat formatEncode2AQ[2]  = {IF_SVE_GO_3A, IF_SVE_HC_3A};
+    const static insFormat formatEncode2AR[2]  = {IF_SVE_AP_3A, IF_SVE_CZ_4A};
+    const static insFormat formatEncode2AT[2]  = {IF_SVE_AB_3A, IF_SVE_EC_1A};
+    const static insFormat formatEncode2AU[2]  = {IF_SVE_AH_3A, IF_SVE_BI_2A};
+    const static insFormat formatEncode2AV[2]  = {IF_SVE_BM_1A, IF_SVE_BN_1A};
+    const static insFormat formatEncode2AW[2]  = {IF_SVE_BO_1A, IF_SVE_BP_1A};
+    const static insFormat formatEncode2AX[2]  = {IF_SVE_CC_2A, IF_SVE_CD_2A};
+    const static insFormat formatEncode2AY[2]  = {IF_SVE_CR_3A, IF_SVE_CS_3A};
+    const static insFormat formatEncode2AZ[2]  = {IF_SVE_CV_3A, IF_SVE_CV_3B};
+    const static insFormat formatEncode2BA[2]  = {IF_SVE_CW_4A, IF_SVE_CZ_4A};
+    const static insFormat formatEncode2BB[2]  = {IF_SVE_CZ_4A, IF_SVE_CZ_4A_A};
+    const static insFormat formatEncode2BC[2]  = {IF_SVE_DE_1A, IF_SVE_DZ_1A};
+    const static insFormat formatEncode2BD[2]  = {IF_SVE_DG_2A, IF_SVE_DH_1A};
+    const static insFormat formatEncode2BE[2]  = {IF_SVE_DK_3A, IF_SVE_DL_2A};
+    const static insFormat formatEncode2BF[2]  = {IF_SVE_DM_2A, IF_SVE_DN_2A};
+    const static insFormat formatEncode2BG[2]  = {IF_SVE_DO_2A, IF_SVE_DP_2A};
+    const static insFormat formatEncode2BH[2]  = {IF_SVE_DW_2A, IF_SVE_DW_2B};
+    const static insFormat formatEncode2BI[2]  = {IF_SVE_FN_3A, IF_SVE_FN_3B};
+    const static insFormat formatEncode2BJ[2]  = {IF_SVE_GQ_3A, IF_SVE_HG_2A};
+    const static insFormat formatEncode2BK[2]  = {IF_SVE_GU_3C, IF_SVE_HU_4B};
+    const static insFormat formatEncode2BL[2]  = {IF_SVE_GZ_3A, IF_SVE_HB_3A};
+    const static insFormat formatEncode2BM[2]  = {IF_SVE_HK_3B, IF_SVE_HL_3B};
+    const static insFormat formatEncode2BN[2]  = {IF_SVE_IF_4A, IF_SVE_IF_4A_A};
+    const static insFormat formatEncode2BO[2]  = {IF_SVE_IO_3A, IF_SVE_IP_4A};
+    const static insFormat formatEncode2BP[2]  = {IF_SVE_IQ_3A, IF_SVE_IR_4A};
+    const static insFormat formatEncode2BQ[2]  = {IF_SVE_IS_3A, IF_SVE_IT_4A};
+    const static insFormat formatEncode2BR[2]  = {IF_SVE_JC_4A, IF_SVE_JO_3A};
+    const static insFormat formatEncode2BS[2]  = {IF_SVE_JE_3A, IF_SVE_JF_4A};
+
+    code_t    code           = BAD_CODE;
+    insFormat insFmt         = emitInsFormat(ins);
+    bool      encoding_found = false;
+    int       index          = -1;
+
+    switch (insFmt)
+    {
+        case IF_SVE_13A:
+            for (index = 0; index < 13; index++)
+            {
+                if (fmt == formatEncode13A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_11A:
+            for (index = 0; index < 11; index++)
+            {
+                if (fmt == formatEncode11A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_9A:
+            for (index = 0; index < 9; index++)
+            {
+                if (fmt == formatEncode9A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_9B:
+            for (index = 0; index < 9; index++)
+            {
+                if (fmt == formatEncode9B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_9C:
+            for (index = 0; index < 9; index++)
+            {
+                if (fmt == formatEncode9C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_9D:
+            for (index = 0; index < 9; index++)
+            {
+                if (fmt == formatEncode9D[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_9E:
+            for (index = 0; index < 9; index++)
+            {
+                if (fmt == formatEncode9E[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_9F:
+            for (index = 0; index < 9; index++)
+            {
+                if (fmt == formatEncode9F[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_8A:
+            for (index = 0; index < 8; index++)
+            {
+                if (fmt == formatEncode8A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_8B:
+            for (index = 0; index < 8; index++)
+            {
+                if (fmt == formatEncode8B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_8C:
+            for (index = 0; index < 8; index++)
+            {
+                if (fmt == formatEncode8C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_7A:
+            for (index = 0; index < 7; index++)
+            {
+                if (fmt == formatEncode7A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_6A:
+            for (index = 0; index < 6; index++)
+            {
+                if (fmt == formatEncode6A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_6B:
+            for (index = 0; index < 6; index++)
+            {
+                if (fmt == formatEncode6B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_6C:
+            for (index = 0; index < 6; index++)
+            {
+                if (fmt == formatEncode6C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_6D:
+            for (index = 0; index < 6; index++)
+            {
+                if (fmt == formatEncode6D[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_6E:
+            for (index = 0; index < 6; index++)
+            {
+                if (fmt == formatEncode6E[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_6F:
+            for (index = 0; index < 6; index++)
+            {
+                if (fmt == formatEncode6F[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_6G:
+            for (index = 0; index < 6; index++)
+            {
+                if (fmt == formatEncode6G[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_5A:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_5B:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_5C:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_5D:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5D[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_5E:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5E[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_4A:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_4B:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_4E:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4E[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_4F:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4F[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_4G:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4G[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_4H:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4H[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_4I:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4I[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_4J:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4J[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_4K:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4K[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_4L:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4L[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3A:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3B:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3C:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3D:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3D[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3E:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3E[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3F:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3F[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3G:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3G[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3H:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3H[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3I:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3I[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3J:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3J[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3K:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3K[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3L:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3L[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3M:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3M[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3N:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3N[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3O:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3O[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3P:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3P[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3Q:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3Q[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3R:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3R[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3S:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3S[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3T:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3T[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3U:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3U[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_3V:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3V[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AA:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AA[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AB:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AB[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AC:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AC[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AD:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AD[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AE:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AE[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AF:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AF[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AG:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AG[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AH:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AH[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AI:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AI[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AJ:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AJ[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AK:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AK[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AL:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AL[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AM:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AM[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AN:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AN[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AO:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AO[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AP:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AP[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AQ:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AQ[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AR:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AR[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AT:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AT[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AU:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AU[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AV:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AV[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AW:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AW[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AX:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AX[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AY:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AY[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2AZ:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2AZ[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BA:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BA[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BB:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BB[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BC:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BC[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BD:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BD[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BE:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BE[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BF:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BF[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BG:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BG[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BH:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BH[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BI:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BI[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BJ:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BJ[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BK:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BK[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BL:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BL[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BM:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BM[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BN:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BN[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BO:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BO[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BP:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BP[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BQ:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BQ[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BR:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BR[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_SVE_2BS:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2BS[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+        default:
+            if (fmt == insFmt)
+            {
+                encoding_found = true;
+                index          = 0;
+            }
+            else
+            {
+                encoding_found = false;
+            }
+            break;
+    }
+
+    assert(encoding_found);
+    const unsigned sve_ins_offset = ((unsigned)ins - INS_sve_invalid);
+
+    switch (index)
+    {
+        case 0:
+            assert(sve_ins_offset < ArrLen(insCodes1));
+            code = insCodes1[sve_ins_offset];
+            break;
+        case 1:
+            assert(sve_ins_offset < ArrLen(insCodes2));
+            code = insCodes2[sve_ins_offset];
+            break;
+        case 2:
+            assert(sve_ins_offset < ArrLen(insCodes3));
+            code = insCodes3[sve_ins_offset];
+            break;
+        case 3:
+            assert(sve_ins_offset < ArrLen(insCodes4));
+            code = insCodes4[sve_ins_offset];
+            break;
+        case 4:
+            assert(sve_ins_offset < ArrLen(insCodes5));
+            code = insCodes5[sve_ins_offset];
+            break;
+        case 5:
+            assert(sve_ins_offset < ArrLen(insCodes6));
+            code = insCodes6[sve_ins_offset];
+            break;
+        case 6:
+            assert(sve_ins_offset < ArrLen(insCodes7));
+            code = insCodes7[sve_ins_offset];
+            break;
+        case 7:
+            assert(sve_ins_offset < ArrLen(insCodes8));
+            code = insCodes8[sve_ins_offset];
+            break;
+        case 8:
+            assert(sve_ins_offset < ArrLen(insCodes9));
+            code = insCodes9[sve_ins_offset];
+            break;
+        case 9:
+            assert(sve_ins_offset < ArrLen(insCodes10));
+            code = insCodes10[sve_ins_offset];
+            break;
+        case 10:
+            assert(sve_ins_offset < ArrLen(insCodes11));
+            code = insCodes11[sve_ins_offset];
+            break;
+        case 11:
+            assert(sve_ins_offset < ArrLen(insCodes12));
+            code = insCodes12[sve_ins_offset];
+            break;
+        case 12:
+            assert(sve_ins_offset < ArrLen(insCodes13));
+            code = insCodes13[sve_ins_offset];
+            break;
+    }
+
+    assert((code != BAD_CODE));
+
+    return code;
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction with a single immediate value.
+ */
+
+void emitter::emitInsSve_I(instruction ins, emitAttr attr, ssize_t imm)
+{
+    insFormat fmt;
+
+    /* Figure out the encoding format of the instruction */
+    if (ins == INS_sve_setffr)
+    {
+        fmt  = IF_SVE_DQ_0A;
+        attr = EA_PTRSIZE;
+        imm  = 0;
+    }
+    else
+    {
+        unreached();
+    }
+
+    instrDesc* id = emitNewInstrSC(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing a single register.
+ */
+
+void emitter::emitInsSve_R(instruction ins, emitAttr attr, regNumber reg, insOpts opt /* = INS_OPTS_NONE */)
+{
+    insFormat fmt;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_sve_aesmc:
+        case INS_sve_aesimc:
+            opt = INS_OPTS_SCALABLE_B;
+            assert(isVectorRegister(reg)); // ddddd
+            assert(isScalableVectorSize(attr));
+            fmt = IF_SVE_GL_1A;
+            break;
+
+        case INS_sve_rdffr:
+            opt = INS_OPTS_SCALABLE_B;
+            assert(isPredicateRegister(reg)); // DDDD
+            fmt = IF_SVE_DH_1A;
+            break;
+
+        case INS_sve_pfalse:
+            opt = INS_OPTS_SCALABLE_B;
+            assert(isPredicateRegister(reg)); // DDDD
+            fmt = IF_SVE_DJ_1A;
+            break;
+
+        case INS_sve_wrffr:
+            opt = INS_OPTS_SCALABLE_B;
+            assert(isPredicateRegister(reg)); // NNNN
+            fmt = IF_SVE_DR_1A;
+            break;
+
+        case INS_sve_ptrue:
+            assert(insOptsScalableStandard(opt));
+            assert(isHighPredicateRegister(reg));                  // DDD
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_DZ_1A;
+            break;
+
+        case INS_sve_fmov:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg));                         // ddddd
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_EB_1B;
+
+            // FMOV is a pseudo-instruction for DUP, which is aliased by MOV;
+            // MOV is the preferred disassembly
+            ins = INS_sve_mov;
+            break;
+
+        default:
+            unreached();
+            break;
+    }
+
+    instrDesc* id = emitNewInstrSmall(attr);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+    id->idReg1(reg);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing a register and a constant.
+ */
+
+void emitter::emitInsSve_R_I(instruction     ins,
+                             emitAttr        attr,
+                             regNumber       reg,
+                             ssize_t         imm,
+                             insOpts         opt, /* = INS_OPTS_NONE */
+                             insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
+{
+    emitAttr  size      = EA_SIZE(attr);
+    bool      canEncode = false;
+    bool      signedImm = false;
+    bool      hasShift  = false;
+    insFormat fmt;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        bitMaskImm bmi;
+
+        case INS_sve_rdvl:
+            assert(insOptsNone(opt));
+            assert(size == EA_8BYTE);
+            assert(isGeneralRegister(reg)); // ddddd
+            assert(isValidSimm<6>(imm));    // iiiiii
+            fmt       = IF_SVE_BC_1A;
+            canEncode = true;
+            break;
+
+        case INS_sve_smax:
+        case INS_sve_smin:
+            signedImm = true;
+
+            FALLTHROUGH;
+        case INS_sve_umax:
+        case INS_sve_umin:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg));                         // ddddd
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+
+            if (signedImm)
+            {
+                assert(isValidSimm<8>(imm)); // iiiiiiii
+            }
+            else
+            {
+                assert(isValidUimm<8>(imm)); // iiiiiiii
+            }
+
+            fmt       = IF_SVE_ED_1A;
+            canEncode = true;
+            break;
+
+        case INS_sve_mul:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg));                         // ddddd
+            assert(isValidSimm<8>(imm));                           // iiiiiiii
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt       = IF_SVE_EE_1A;
+            canEncode = true;
+            break;
+
+        case INS_sve_mov:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg)); // ddddd
+
+            if (sopt == INS_SCALABLE_OPTS_IMM_BITMASK)
+            {
+                bmi.immNRS = 0;
+                canEncode  = canEncodeBitMaskImm(imm, optGetSveElemsize(opt), &bmi);
+
+                if (!useMovDisasmForBitMask(imm))
+                {
+                    ins = INS_sve_dupm;
+                }
+
+                imm = bmi.immNRS; // iiiiiiiiiiiii
+                assert(isValidImmNRS(imm, optGetSveElemsize(opt)));
+                fmt = IF_SVE_BT_1A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+
+                if (!isValidSimm<8>(imm))
+                {
+                    // Size specifier must be able to fit a left-shifted immediate
+                    assert((isValidSimm_MultipleOf<8, 256>(imm))); // iiiiiiii
+                    assert(insOptsScalableAtLeastHalf(opt));
+                    hasShift = true;
+                    imm >>= 8;
+                }
+
+                fmt       = IF_SVE_EB_1A;
+                canEncode = true;
+            }
+            break;
+
+        case INS_sve_dup:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg));                         // ddddd
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+
+            if (!isValidSimm<8>(imm))
+            {
+                // Size specifier must be able to fit a left-shifted immediate
+                assert((isValidSimm_MultipleOf<8, 256>(imm))); // iiiiiiii
+                assert(insOptsScalableAtLeastHalf(opt));
+                hasShift = true;
+                imm >>= 8;
+            }
+
+            fmt       = IF_SVE_EB_1A;
+            canEncode = true;
+
+            // MOV is an alias for DUP, and is always the preferred disassembly.
+            ins = INS_sve_mov;
+            break;
+
+        case INS_sve_add:
+        case INS_sve_sub:
+        case INS_sve_sqadd:
+        case INS_sve_sqsub:
+        case INS_sve_uqadd:
+        case INS_sve_uqsub:
+        case INS_sve_subr:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg));                         // ddddd
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            if (!isValidUimm<8>(imm))
+            {
+                // Size specifier must be able to fit left-shifted immediate
+                assert((isValidUimm_MultipleOf<8, 256>(imm))); // iiiiiiii
+                assert(insOptsScalableAtLeastHalf(opt));
+                hasShift = true;
+                imm >>= 8;
+            }
+
+            fmt       = IF_SVE_EC_1A;
+            canEncode = true;
+            break;
+
+        case INS_sve_and:
+        case INS_sve_orr:
+        case INS_sve_eor:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg)); // ddddd
+
+            bmi.immNRS = 0;
+            canEncode  = canEncodeBitMaskImm(imm, optGetSveElemsize(opt), &bmi);
+            imm        = bmi.immNRS; // iiiiiiiiiiiii
+            assert(isValidImmNRS(imm, optGetSveElemsize(opt)));
+            fmt = IF_SVE_BS_1A;
+            break;
+
+        case INS_sve_bic:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg)); // ddddd
+
+            // AND is an alias for BIC, and is always the preferred disassembly.
+            ins = INS_sve_and;
+            imm = -imm - 1;
+
+            bmi.immNRS = 0;
+            canEncode  = canEncodeBitMaskImm(imm, optGetSveElemsize(opt), &bmi);
+            imm        = bmi.immNRS; // iiiiiiiiiiiii
+            assert(isValidImmNRS(imm, optGetSveElemsize(opt)));
+            fmt = IF_SVE_BS_1A;
+            break;
+
+        case INS_sve_eon:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg)); // ddddd
+
+            // EOR is an alias for EON, and is always the preferred disassembly.
+            ins = INS_sve_eor;
+            imm = -imm - 1;
+
+            bmi.immNRS = 0;
+            canEncode  = canEncodeBitMaskImm(imm, optGetSveElemsize(opt), &bmi);
+            imm        = bmi.immNRS; // iiiiiiiiiiiii
+            assert(isValidImmNRS(imm, optGetSveElemsize(opt)));
+            fmt = IF_SVE_BS_1A;
+            break;
+
+        case INS_sve_orn:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg)); // ddddd
+
+            // ORR is an alias for ORN, and is always the preferred disassembly.
+            ins = INS_sve_orr;
+            imm = -imm - 1;
+
+            bmi.immNRS = 0;
+            canEncode  = canEncodeBitMaskImm(imm, optGetSveElemsize(opt), &bmi);
+            imm        = bmi.immNRS; // iiiiiiiiiiiii
+            assert(isValidImmNRS(imm, optGetSveElemsize(opt)));
+            fmt = IF_SVE_BS_1A;
+            break;
+
+        case INS_sve_dupm:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg)); // ddddd
+
+            bmi.immNRS = 0;
+            canEncode  = canEncodeBitMaskImm(imm, optGetSveElemsize(opt), &bmi);
+            fmt        = IF_SVE_BT_1A;
+
+            if (useMovDisasmForBitMask(imm))
+            {
+                ins = INS_sve_mov;
+            }
+
+            imm = bmi.immNRS; // iiiiiiiiiiiii
+            assert(isValidImmNRS(imm, optGetSveElemsize(opt)));
+            break;
+
+        default:
+            unreached();
+            break;
+    }
+
+    assert(canEncode);
+
+    // For encodings with shifted immediates, we need a way to determine if the immediate has been shifted or not.
+    // We could just leave the immediate in its unshifted form, and call emitNewInstrSC,
+    // but that would allocate unnecessarily large descriptors. Therefore:
+    // - For encodings without any shifting, just call emitNewInstrSC.
+    // - For unshifted immediates, call emitNewInstrSC.
+    //   If it allocates a small descriptor, idHasShift() will always return false.
+    //   Else, idHasShift still returns false, as we set the dedicated bit in large descriptors to false.
+    // - For immediates that need a shift, call emitNewInstrCns so a normal or large descriptor is used.
+    //   idHasShift will always check the dedicated bit, as it is always available. We set this bit to true below.
+    instrDesc* id = !hasShift ? emitNewInstrSC(attr, imm) : emitNewInstrCns(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg);
+
+    id->idHasShift(hasShift);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing a register and a floating point constant.
+ */
+
+void emitter::emitInsSve_R_F(
+    instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt /* = INS_OPTS_NONE */)
+{
+    ssize_t   imm       = 0;
+    bool      canEncode = false;
+    insFormat fmt;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        floatImm8 fpi;
+
+        case INS_sve_fmov:
+        case INS_sve_fdup:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg));                         // ddddd
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+
+            fpi.immFPIVal = 0;
+            canEncode     = canEncodeFloatImm8(immDbl, &fpi);
+            imm           = fpi.immFPIVal;
+            fmt           = IF_SVE_EA_1A;
+
+            // FMOV is an alias for FDUP, and is always the preferred disassembly.
+            ins = INS_sve_fmov;
+            break;
+
+        default:
+            unreached();
+            break;
+    }
+
+    assert(canEncode);
+
+    instrDesc* id = emitNewInstrSC(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing two registers
+ */
+
+void emitter::emitInsSve_R_R(instruction     ins,
+                             emitAttr        attr,
+                             regNumber       reg1,
+                             regNumber       reg2,
+                             insOpts         opt /* = INS_OPTS_NONE */,
+                             insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_sve_pmov:
+            if (opt != INS_OPTS_SCALABLE_B)
+            {
+                assert(insOptsScalableStandard(opt));
+                return emitInsSve_R_R_I(INS_sve_pmov, attr, reg1, reg2, 0, opt, sopt);
+            }
+            if (sopt == INS_SCALABLE_OPTS_TO_PREDICATE)
+            {
+                assert(isPredicateRegister(reg1));
+                assert(isVectorRegister(reg2));
+                fmt = IF_SVE_CE_2A;
+            }
+            else if (sopt == INS_SCALABLE_OPTS_TO_VECTOR)
+            {
+                assert(isVectorRegister(reg1));
+                assert(isPredicateRegister(reg2));
+                fmt = IF_SVE_CF_2A;
+            }
+            else
+            {
+                assert(!"invalid instruction");
+            }
+            break;
+
+        case INS_sve_movs:
+        {
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(reg1)); // dddd
+            assert(isPredicateRegister(reg2)); // nnnn
+            fmt = IF_SVE_CZ_4A_A;
+            break;
+        }
+
+        case INS_sve_mov:
+        {
+            if (isGeneralRegisterOrSP(reg2))
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(insOptsScalableStandard(opt));
+                assert(isVectorRegister(reg1));
+#ifdef DEBUG
+                if (opt == INS_OPTS_SCALABLE_D)
+                {
+                    assert(size == EA_8BYTE);
+                }
+                else
+                {
+                    assert(size == EA_4BYTE);
+                }
+#endif // DEBUG
+                reg2 = encodingSPtoZR(reg2);
+                fmt  = IF_SVE_CB_2A;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_B);
+                assert(isPredicateRegister(reg1)); // dddd
+                assert(isPredicateRegister(reg2)); // nnnn
+                fmt = IF_SVE_CZ_4A_L;
+            }
+            break;
+        }
+
+        case INS_sve_insr:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1)); // ddddd
+            if (isVectorRegister(reg2))
+            {
+                fmt = IF_SVE_CC_2A;
+            }
+            else if (isGeneralRegisterOrZR(reg2))
+            {
+                fmt = IF_SVE_CD_2A;
+            }
+            else
+            {
+                unreached();
+            }
+            break;
+
+        case INS_sve_pfirst:
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(reg1)); // DDDD
+            assert(isPredicateRegister(reg2)); // gggg
+            fmt = IF_SVE_DD_2A;
+            break;
+
+        case INS_sve_pnext:
+            assert(insOptsScalableStandard(opt));
+            assert(isPredicateRegister(reg1));                     // DDDD
+            assert(isPredicateRegister(reg2));                     // VVVV
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_DF_2A;
+            break;
+
+        case INS_sve_punpkhi:
+        case INS_sve_punpklo:
+            assert(isPredicateRegister(reg1)); // DDDD
+            assert(isPredicateRegister(reg2)); // NNNN
+            fmt = IF_SVE_CK_2A;
+            break;
+
+        case INS_sve_rdffr:
+        case INS_sve_rdffrs:
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(reg1)); // DDDD
+            assert(isPredicateRegister(reg2)); // gggg
+            fmt = IF_SVE_DG_2A;
+            break;
+
+        case INS_sve_rev:
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(insOptsScalableStandard(opt));
+                assert(isVectorRegister(reg1));
+                assert(isVectorRegister(reg2));
+                assert(isScalableVectorSize(size));
+                fmt = IF_SVE_CG_2A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(insOptsScalableStandard(opt));
+                assert(isPredicateRegister(reg1)); // DDDD
+                assert(isPredicateRegister(reg2)); // NNNN
+                fmt = IF_SVE_CJ_2A;
+            }
+            break;
+
+        case INS_sve_ptest:
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(reg1)); // gggg
+            assert(isPredicateRegister(reg2)); // NNNN
+            fmt = IF_SVE_DI_2A;
+            break;
+
+        case INS_sve_cntp:
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsWithVectorLength(sopt));         // l
+            assert(isGeneralRegister(reg1));                       // ddddd
+            assert(isPredicateRegister(reg2));                     // NNNN
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_DL_2A;
+            break;
+
+        case INS_sve_incp:
+        case INS_sve_decp:
+            assert(isPredicateRegister(reg2)); // MMMM
+
+            if (isGeneralRegister(reg1)) // ddddd
+            {
+                assert(insOptsScalableStandard(opt)); // xx
+                assert(size == EA_8BYTE);
+                fmt = IF_SVE_DM_2A;
+            }
+            else
+            {
+                assert(insOptsScalableAtLeastHalf(opt)); // xx
+                assert(isVectorRegister(reg1));          // ddddd
+                assert(isScalableVectorSize(size));
+                fmt = IF_SVE_DN_2A;
+            }
+            break;
+
+        case INS_sve_sqincp:
+        case INS_sve_uqincp:
+        case INS_sve_sqdecp:
+        case INS_sve_uqdecp:
+            assert(isPredicateRegister(reg2)); // MMMM
+
+            if (isGeneralRegister(reg1)) // ddddd
+            {
+                assert(insOptsScalableStandard(opt)); // xx
+                assert(isValidGeneralDatasize(size));
+                fmt = IF_SVE_DO_2A;
+            }
+            else
+            {
+                assert(insOptsScalableAtLeastHalf(opt)); // xx
+                assert(isVectorRegister(reg1));          // ddddd
+                assert(isScalableVectorSize(size));
+                fmt = IF_SVE_DP_2A;
+            }
+            break;
+
+        case INS_sve_ctermeq:
+        case INS_sve_ctermne:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg1));      // nnnnn
+            assert(isGeneralRegister(reg2));      // mmmmm
+            assert(isValidGeneralDatasize(size)); // x
+            fmt = IF_SVE_DS_2A;
+            break;
+
+        case INS_sve_sqcvtn:
+        case INS_sve_uqcvtn:
+        case INS_sve_sqcvtun:
+            assert(insOptsNone(opt));
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnn
+            assert(isEvenRegister(reg2));
+            fmt = IF_SVE_FZ_2A;
+            break;
+
+        case INS_sve_fcvtn:
+        case INS_sve_bfcvtn:
+        case INS_sve_fcvtnt:
+        case INS_sve_fcvtnb:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(insOptsNone(opt));
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnn
+            assert(isEvenRegister(reg2));
+            fmt = IF_SVE_HG_2A;
+            break;
+
+        case INS_sve_sqxtnb:
+        case INS_sve_sqxtnt:
+        case INS_sve_uqxtnb:
+        case INS_sve_uqxtnt:
+        case INS_sve_sqxtunb:
+        case INS_sve_sqxtunt:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(optGetSveElemsize(opt) != EA_8BYTE);
+            assert(isValidVectorElemsize(optGetSveElemsize(opt)));
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_GD_2A;
+            break;
+
+        case INS_sve_aese:
+        case INS_sve_aesd:
+        case INS_sve_sm4e:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+#ifdef DEBUG
+            if (opt == INS_OPTS_SCALABLE_S)
+            {
+                assert(ins == INS_sve_sm4e);
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_B);
+            }
+#endif // DEBUG
+            fmt = IF_SVE_GK_2A;
+            break;
+
+        case INS_sve_frecpe:
+        case INS_sve_frsqrte:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_HF_2A;
+            break;
+
+        case INS_sve_sunpkhi:
+        case INS_sve_sunpklo:
+        case INS_sve_uunpkhi:
+        case INS_sve_uunpklo:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_CH_2A;
+            break;
+
+        case INS_sve_fexpa:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_BJ_2A;
+            break;
+
+        case INS_sve_dup:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));
+            assert(isGeneralRegisterOrSP(reg2));
+#ifdef DEBUG
+            if (opt == INS_OPTS_SCALABLE_D)
+            {
+                assert(size == EA_8BYTE);
+            }
+            else
+            {
+                assert(size == EA_4BYTE);
+            }
+#endif // DEBUG
+            reg2 = encodingSPtoZR(reg2);
+            fmt  = IF_SVE_CB_2A;
+
+            // DUP is an alias for MOV;
+            // MOV is the preferred disassembly
+            ins = INS_sve_mov;
+            break;
+
+        case INS_sve_bf1cvt:
+        case INS_sve_bf1cvtlt:
+        case INS_sve_bf2cvt:
+        case INS_sve_bf2cvtlt:
+        case INS_sve_f1cvt:
+        case INS_sve_f1cvtlt:
+        case INS_sve_f2cvt:
+        case INS_sve_f2cvtlt:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_HH_2A;
+            unreached(); // not supported yet
+            break;
+
+        case INS_sve_movprfx:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsNone(opt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_BI_2A;
+            break;
+
+        case INS_sve_fmov:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isPredicateRegister(reg2));                     // gggg
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_BV_2B;
+
+            // CPY is an alias for FMOV, and MOV is an alias for CPY.
+            // Thus, MOV is the preferred disassembly.
+            ins = INS_sve_mov;
+            break;
+
+        default:
+            unreached();
+            break;
+    }
+
+    assert(fmt != IF_NONE);
+
+    instrDesc* id;
+
+    if (insScalableOptsWithVectorLength(sopt))
+    {
+        id = emitNewInstr(attr);
+        id->idVectorLength4x(sopt == INS_SCALABLE_OPTS_VL_4X);
+    }
+    else
+    {
+        id = emitNewInstrSmall(attr);
+    }
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing a register and two constants.
+ */
+
+void emitter::emitInsSve_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg, ssize_t imm1, ssize_t imm2, insOpts opt /* = INS_OPTS_NONE */)
+{
+    insFormat fmt;
+    ssize_t   immOut;
+
+    if (ins == INS_sve_index)
+    {
+        assert(insOptsScalableStandard(opt));
+        assert(isVectorRegister(reg));                         // ddddd
+        assert(isValidSimm<5>(imm1));                          // iiiii
+        assert(isValidSimm<5>(imm2));                          // iiiii
+        assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+        immOut = insSveEncodeTwoSimm5(imm1, imm2);
+        fmt    = IF_SVE_AX_1A;
+    }
+    else
+    {
+        unreached();
+    }
+
+    instrDesc* id = emitNewInstrSC(attr, immOut);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing two registers and a constant.
+ */
+
+void emitter::emitInsSve_R_R_I(instruction     ins,
+                               emitAttr        attr,
+                               regNumber       reg1,
+                               regNumber       reg2,
+                               ssize_t         imm,
+                               insOpts         opt /* = INS_OPTS_NONE */,
+                               insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
+{
+    emitAttr  size     = EA_SIZE(attr);
+    bool      hasShift = false;
+    insFormat fmt;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        bool isRightShift;
+
+        case INS_sve_asr:
+        case INS_sve_lsl:
+        case INS_sve_lsr:
+        case INS_sve_srshr:
+        case INS_sve_sqshl:
+        case INS_sve_urshr:
+        case INS_sve_sqshlu:
+        case INS_sve_uqshl:
+        case INS_sve_asrd:
+            isRightShift = emitInsIsVectorRightShift(ins);
+            assert(isValidVectorShiftAmount(imm, optGetSveElemsize(opt), isRightShift));
+            assert(insOptsScalableStandard(opt));
+            assert(isScalableVectorSize(size));
+
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert((ins == INS_sve_asr) || (ins == INS_sve_lsl) || (ins == INS_sve_lsr));
+                assert(isVectorRegister(reg1));
+                assert(isVectorRegister(reg2));
+                fmt = IF_SVE_BF_2A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isVectorRegister(reg1));       // ddddd
+                assert(isLowPredicateRegister(reg2)); // ggg
+                fmt = IF_SVE_AM_2A;
+            }
+            break;
+
+        case INS_sve_xar:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx xx
+
+            switch (opt)
+            {
+                case INS_OPTS_SCALABLE_B:
+                    assert(isValidUimmFrom1<3>(imm)); // iii
+                    break;
+
+                case INS_OPTS_SCALABLE_H:
+                    assert(isValidUimmFrom1<4>(imm)); // xiii
+                    break;
+
+                case INS_OPTS_SCALABLE_S:
+                    assert(isValidUimmFrom1<5>(imm)); // xxiii
+                    break;
+
+                case INS_OPTS_SCALABLE_D:
+                    assert(isValidUimmFrom1<6>(imm)); // x xxiii
+                    break;
+
+                default:
+                    unreached();
+                    break;
+            }
+
+            fmt = IF_SVE_AW_2A;
+            break;
+
+        case INS_sve_index:
+            assert(insOptsScalable(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isValidSimm<5>(imm));                           // iiiii
+            assert(isIntegerRegister(reg2));                       // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+
+            if (sopt == INS_SCALABLE_OPTS_IMM_FIRST)
+            {
+                fmt = IF_SVE_AY_2A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                fmt = IF_SVE_AZ_2A;
+            }
+            break;
+
+        case INS_sve_addvl:
+        case INS_sve_addpl:
+            assert(insOptsNone(opt));
+            assert(size == EA_8BYTE);
+            assert(isGeneralRegisterOrSP(reg1)); // ddddd
+            assert(isGeneralRegisterOrSP(reg2)); // nnnnn
+            assert(isValidSimm<6>(imm));         // iiiiii
+            reg1 = encodingSPtoZR(reg1);
+            reg2 = encodingSPtoZR(reg2);
+            fmt  = IF_SVE_BB_2A;
+            break;
+
+        case INS_sve_mov:
+            if (sopt == INS_SCALABLE_OPTS_BROADCAST)
+            {
+                return emitInsSve_R_R_I(INS_sve_dup, attr, reg1, reg2, imm, opt, sopt);
+            }
+            FALLTHROUGH;
+        case INS_sve_cpy:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));    // DDDDD
+            assert(isPredicateRegister(reg2)); // GGGG
+
+            if (!isValidSimm<8>(imm))
+            {
+                // Size specifier must be able to fit a left-shifted immediate
+                assert((isValidSimm_MultipleOf<8, 256>(imm))); // iiiiiiii
+                assert(insOptsScalableAtLeastHalf(opt));
+                hasShift = true;
+                imm >>= 8;
+            }
+
+            if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE)
+            {
+                fmt = IF_SVE_BV_2A_J;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                fmt = IF_SVE_BV_2A;
+            }
+
+            // MOV is an alias for CPY, and is always the preferred disassembly.
+            ins = INS_sve_mov;
+            break;
+
+        case INS_sve_dup:
+            assert(insOptsScalable(opt));
+            assert(isVectorRegister(reg1)); // DDDDD
+            assert(isVectorRegister(reg2)); // GGGG
+            assert(isValidBroadcastImm(imm, optGetSveElemsize(opt)));
+            fmt = IF_SVE_BW_2A;
+            ins = INS_sve_mov; // Set preferred alias for disassembly
+            break;
+
+        case INS_sve_pmov:
+            if (sopt == INS_SCALABLE_OPTS_TO_PREDICATE)
+            {
+                assert(isPredicateRegister(reg1));
+                assert(isVectorRegister(reg2));
+                switch (opt)
+                {
+                    case INS_OPTS_SCALABLE_D:
+                        assert(isValidUimm<3>(imm));
+                        fmt = IF_SVE_CE_2B;
+                        break;
+                    case INS_OPTS_SCALABLE_S:
+                        assert(isValidUimm<2>(imm));
+                        fmt = IF_SVE_CE_2D;
+                        break;
+                    case INS_OPTS_SCALABLE_H:
+                        assert(isValidUimm<1>(imm));
+                        fmt = IF_SVE_CE_2C;
+                        break;
+                    default:
+                        unreached();
+                }
+            }
+            else if (sopt == INS_SCALABLE_OPTS_TO_VECTOR)
+            {
+                assert(isVectorRegister(reg1));
+                assert(isPredicateRegister(reg2));
+                switch (opt)
+                {
+                    case INS_OPTS_SCALABLE_D:
+                        assert(isValidUimm<3>(imm));
+                        fmt = IF_SVE_CF_2B;
+                        break;
+                    case INS_OPTS_SCALABLE_S:
+                        assert(isValidUimm<2>(imm));
+                        fmt = IF_SVE_CF_2D;
+                        break;
+                    case INS_OPTS_SCALABLE_H:
+                        assert(isValidUimm<1>(imm));
+                        fmt = IF_SVE_CF_2C;
+                        break;
+                    default:
+                        unreached();
+                }
+            }
+            else
+            {
+                unreached();
+            }
+            break;
+
+        case INS_sve_sqrshrn:
+        case INS_sve_sqrshrun:
+        case INS_sve_uqrshrn:
+            isRightShift = emitInsIsVectorRightShift(ins);
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isEvenRegister(reg2));
+            assert(opt == INS_OPTS_SCALABLE_H);
+            assert(isRightShift); // These are always right-shift.
+            assert(isValidVectorShiftAmount(imm, EA_4BYTE, isRightShift));
+            fmt = IF_SVE_GA_2A;
+            break;
+
+        case INS_sve_pext:
+            assert(insOptsScalableStandard(opt));
+            assert(isPredicateRegister(reg1));                     // DDDD
+            assert(isHighPredicateRegister(reg2));                 // NNN
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+
+            if (sopt == INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR)
+            {
+                assert(isValidUimm<1>(imm)); // i
+                fmt = IF_SVE_DW_2B;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isValidUimm<2>(imm)); // ii
+                fmt = IF_SVE_DW_2A;
+            }
+            break;
+
+        case INS_sve_sshllb:
+        case INS_sve_sshllt:
+        case INS_sve_ushllb:
+        case INS_sve_ushllt:
+            assert(insOptsScalableWide(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // x xx
+
+            switch (opt)
+            {
+                case INS_OPTS_SCALABLE_B:
+                    assert(isValidUimm<3>(imm)); // iii
+                    break;
+
+                case INS_OPTS_SCALABLE_H:
+                    assert(isValidUimm<4>(imm)); // x iii
+                    break;
+
+                case INS_OPTS_SCALABLE_S:
+                    assert(isValidUimm<5>(imm)); // xx iii
+                    break;
+
+                default:
+                    unreached();
+                    break;
+            }
+
+            fmt = IF_SVE_FR_2A;
+            break;
+
+        case INS_sve_sqshrunb:
+        case INS_sve_sqshrunt:
+        case INS_sve_sqrshrunb:
+        case INS_sve_sqrshrunt:
+        case INS_sve_shrnb:
+        case INS_sve_shrnt:
+        case INS_sve_rshrnb:
+        case INS_sve_rshrnt:
+        case INS_sve_sqshrnb:
+        case INS_sve_sqshrnt:
+        case INS_sve_sqrshrnb:
+        case INS_sve_sqrshrnt:
+        case INS_sve_uqshrnb:
+        case INS_sve_uqshrnt:
+        case INS_sve_uqrshrnb:
+        case INS_sve_uqrshrnt:
+            assert(insOptsScalableWide(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // x xx
+
+            switch (opt)
+            {
+                case INS_OPTS_SCALABLE_B:
+                    assert(isValidUimmFrom1<3>(imm)); // iii
+                    break;
+
+                case INS_OPTS_SCALABLE_H:
+                    assert(isValidUimmFrom1<4>(imm)); // x iii
+                    break;
+
+                case INS_OPTS_SCALABLE_S:
+                    assert(isValidUimmFrom1<5>(imm)); // xx iii
+                    break;
+
+                default:
+                    unreached();
+                    break;
+            }
+
+            fmt = IF_SVE_GB_2A;
+            break;
+
+        case INS_sve_cadd:
+        case INS_sve_sqcadd:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+
+            // Convert rot to bitwise representation: 0 if 90, 1 if 270
+            imm = emitEncodeRotationImm90_or_270(imm); // r
+            fmt = IF_SVE_FV_2A;
+            break;
+
+        case INS_sve_ftmad:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isValidUimm<3>(imm));
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_HN_2A;
+            break;
+
+        case INS_sve_ldr:
+            assert(insOptsNone(opt));
+            assert(isScalableVectorSize(size));
+            assert(isGeneralRegister(reg2)); // nnnnn
+            assert(isValidSimm<9>(imm));     // iii
+                                             // iiiiii
+
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(isVectorRegister(reg1));
+                fmt = IF_SVE_IE_2A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isPredicateRegister(reg1));
+                fmt = IF_SVE_ID_2A;
+            }
+            break;
+
+        case INS_sve_str:
+            assert(insOptsNone(opt));
+            assert(isScalableVectorSize(size));
+            assert(isGeneralRegister(reg2)); // nnnnn
+            assert(isValidSimm<9>(imm));     // iii
+                                             // iiiiii
+
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(isVectorRegister(reg1));
+                fmt = IF_SVE_JH_2A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isPredicateRegister(reg1));
+                fmt = IF_SVE_JG_2A;
+            }
+            break;
+
+        case INS_sve_sli:
+        case INS_sve_sri:
+            isRightShift = emitInsIsVectorRightShift(ins);
+            assert(isValidVectorShiftAmount(imm, optGetSveElemsize(opt), isRightShift));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_FT_2A;
+            break;
+
+        case INS_sve_srsra:
+        case INS_sve_ssra:
+        case INS_sve_ursra:
+        case INS_sve_usra:
+            isRightShift = emitInsIsVectorRightShift(ins);
+            assert(isValidVectorShiftAmount(imm, optGetSveElemsize(opt), isRightShift));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_FU_2A;
+            break;
+
+        case INS_sve_ext:
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isValidUimm<8>(imm));    // iiiii iii
+
+            if (sopt == INS_SCALABLE_OPTS_WITH_VECTOR_PAIR)
+            {
+                fmt = IF_SVE_BQ_2A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                fmt = IF_SVE_BQ_2B;
+            }
+            break;
+
+        case INS_sve_dupq:
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isScalableVectorSize(size));
+#ifdef DEBUG
+            switch (opt)
+            {
+                case INS_OPTS_SCALABLE_B:
+                    assert(isValidUimm<4>(imm));
+                    break;
+
+                case INS_OPTS_SCALABLE_H:
+                    assert(isValidUimm<3>(imm));
+                    break;
+
+                case INS_OPTS_SCALABLE_S:
+                    assert(isValidUimm<2>(imm));
+                    break;
+
+                case INS_OPTS_SCALABLE_D:
+                    assert(isValidUimm<1>(imm));
+                    break;
+
+                default:
+                    break;
+            }
+#endif // DEBUG
+            fmt = IF_SVE_BX_2A;
+            break;
+
+        case INS_sve_extq:
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isScalableVectorSize(size));
+            assert(isValidUimm<4>(imm));
+            fmt = IF_SVE_BY_2A;
+            break;
+
+        default:
+            unreached();
+            break;
+    }
+
+    // For encodings with shifted immediates, we need a way to determine if the immediate has been shifted or not.
+    // We could just leave the immediate in its unshifted form, and call emitNewInstrSC,
+    // but that would allocate unnecessarily large descriptors. Therefore:
+    // - For encodings without any shifting, just call emitNewInstrSC.
+    // - For unshifted immediates, call emitNewInstrSC.
+    //   If it allocates a small descriptor, idHasShift() will always return false.
+    //   Else, idHasShift still returns false, as we set the dedicated bit in large descriptors to false.
+    // - For immediates that need a shift, call emitNewInstrCns so a normal or large descriptor is used.
+    //   idHasShift will always check the dedicated bit, as it is always available. We set this bit to true below.
+    instrDesc* id = !hasShift ? emitNewInstrSC(attr, imm) : emitNewInstrCns(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    id->idHasShift(hasShift);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing two registers and a floating point constant.
+ */
+
+void emitter::emitInsSve_R_R_F(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, double immDbl, insOpts opt /* = INS_OPTS_NONE */)
+{
+    ssize_t   imm  = 0;
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_sve_fmul:
+        case INS_sve_fmaxnm:
+        case INS_sve_fadd:
+        case INS_sve_fmax:
+        case INS_sve_fminnm:
+        case INS_sve_fsub:
+        case INS_sve_fmin:
+        case INS_sve_fsubr:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isScalableVectorSize(size));
+            imm = emitEncodeSmallFloatImm(immDbl, ins);
+            fmt = IF_SVE_HM_2A;
+            break;
+
+        case INS_sve_fmov:
+        case INS_sve_fcpy:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isPredicateRegister(reg2));                     // gggg
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            floatImm8 fpi;
+            fpi.immFPIVal = 0;
+            canEncodeFloatImm8(immDbl, &fpi);
+            imm = fpi.immFPIVal;
+            fmt = IF_SVE_BU_2A;
+
+            // FMOV is an alias for FCPY, and is always the preferred disassembly.
+            ins = INS_sve_fmov;
+            break;
+
+        default:
+            unreached();
+            break;
+
+    } // end switch (ins)
+
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrSC(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing three registers.
+ */
+
+void emitter::emitInsSve_R_R_R(instruction     ins,
+                               emitAttr        attr,
+                               regNumber       reg1,
+                               regNumber       reg2,
+                               regNumber       reg3,
+                               insOpts         opt /* = INS_OPTS_NONE */,
+                               insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
+{
+    emitAttr  size           = EA_SIZE(attr);
+    bool      pmerge         = false;
+    bool      vectorLength4x = false;
+    insFormat fmt            = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_sve_and:
+        case INS_sve_bic:
+        case INS_sve_eor:
+        case INS_sve_orr:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1)); // mmmmm
+            assert(isVectorRegister(reg3)); // ddddd
+
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(opt == INS_OPTS_SCALABLE_D);
+                assert(isVectorRegister(reg2)); // nnnnn
+                fmt = IF_SVE_AU_3A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isLowPredicateRegister(reg2)); // ggg
+                fmt = IF_SVE_AA_3A;
+            }
+            break;
+
+        case INS_sve_add:
+        case INS_sve_sub:
+        case INS_sve_subr:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(isVectorRegister(reg2));
+                assert(ins != INS_sve_subr);
+                fmt = IF_SVE_AT_3A;
+            }
+            else
+            {
+                assert(isLowPredicateRegister(reg2));
+                assert(insScalableOptsNone(sopt));
+                fmt = IF_SVE_AB_3A;
+            }
+            break;
+
+        case INS_sve_addpt:
+        case INS_sve_subpt:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(opt == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg3)); // mmmmm
+
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(isVectorRegister(reg2)); // nnnnn
+                fmt = IF_SVE_AT_3B;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isLowPredicateRegister(reg2)); // ggg
+                fmt = IF_SVE_AB_3B;
+            }
+            break;
+
+        case INS_sve_sdiv:
+        case INS_sve_sdivr:
+        case INS_sve_udiv:
+        case INS_sve_udivr:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableWords(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AC_3A;
+            break;
+
+        case INS_sve_sabd:
+        case INS_sve_smax:
+        case INS_sve_smin:
+        case INS_sve_uabd:
+        case INS_sve_umax:
+        case INS_sve_umin:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AD_3A;
+            break;
+
+        case INS_sve_mul:
+        case INS_sve_smulh:
+        case INS_sve_umulh:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(isVectorRegister(reg2));
+                fmt = IF_SVE_BD_3A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isLowPredicateRegister(reg2));
+                fmt = IF_SVE_AE_3A;
+            }
+            break;
+
+        case INS_sve_pmul:
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+            fmt = IF_SVE_BD_3B;
+            break;
+
+        case INS_sve_andv:
+        case INS_sve_eorv:
+        case INS_sve_orv:
+            assert(isFloatReg(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AF_3A;
+            break;
+
+        case INS_sve_andqv:
+        case INS_sve_eorqv:
+        case INS_sve_orqv:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AG_3A;
+            break;
+
+        case INS_sve_movprfx:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE)
+            {
+                pmerge = true;
+            }
+            fmt = IF_SVE_AH_3A;
+            break;
+
+        case INS_sve_saddv:
+        case INS_sve_uaddv:
+            assert(isFloatReg(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableWide(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AI_3A;
+            break;
+
+        case INS_sve_addqv:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AJ_3A;
+            break;
+
+        case INS_sve_smaxv:
+        case INS_sve_sminv:
+        case INS_sve_umaxv:
+        case INS_sve_uminv:
+            assert(isFloatReg(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AK_3A;
+            break;
+
+        case INS_sve_smaxqv:
+        case INS_sve_sminqv:
+        case INS_sve_umaxqv:
+        case INS_sve_uminqv:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AL_3A;
+            break;
+
+        case INS_sve_asrr:
+        case INS_sve_lslr:
+        case INS_sve_lsrr:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AN_3A;
+            break;
+
+        case INS_sve_asr:
+        case INS_sve_lsl:
+        case INS_sve_lsr:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg3));
+            if (sopt == INS_SCALABLE_OPTS_WIDE)
+            {
+                assert(isLowPredicateRegister(reg2));
+                assert(insOptsScalableWide(opt));
+                fmt = IF_SVE_AO_3A;
+            }
+            else if (sopt == INS_SCALABLE_OPTS_UNPREDICATED_WIDE)
+            {
+                assert(isVectorRegister(reg2));
+                assert(insOptsScalableWide(opt));
+                fmt = IF_SVE_BG_3A;
+            }
+            else
+            {
+                assert(isLowPredicateRegister(reg2));
+                assert(insScalableOptsNone(sopt));
+                assert(insOptsScalableStandard(opt));
+                fmt = IF_SVE_AN_3A;
+            }
+            break;
+
+        case INS_sve_uzp1:
+        case INS_sve_trn1:
+        case INS_sve_zip1:
+        case INS_sve_uzp2:
+        case INS_sve_trn2:
+        case INS_sve_zip2:
+            assert(insOptsScalable(opt));
+
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(isVectorRegister(reg1)); // ddddd
+                assert(isVectorRegister(reg2)); // nnnnn
+                assert(isVectorRegister(reg3)); // mmmmm
+
+                if (opt == INS_OPTS_SCALABLE_Q)
+                {
+                    fmt = IF_SVE_BR_3B;
+                }
+                else
+                {
+                    assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+                    fmt = IF_SVE_BR_3A;
+                }
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isPredicateRegister(reg1)); // DDDD
+                assert(isPredicateRegister(reg2)); // NNNN
+                assert(isPredicateRegister(reg3)); // MMMM
+                fmt = IF_SVE_CI_3A;
+            }
+            break;
+
+        case INS_sve_tbl:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+
+            if (sopt == INS_SCALABLE_OPTS_WITH_VECTOR_PAIR)
+            {
+                fmt = IF_SVE_BZ_3A_A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                fmt = IF_SVE_BZ_3A;
+            }
+            break;
+
+        case INS_sve_tbx:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_BZ_3A;
+            break;
+
+        case INS_sve_tbxq:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_CA_3A;
+            break;
+
+        case INS_sve_sdot:
+        case INS_sve_udot:
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                fmt = IF_SVE_EF_3A;
+            }
+            else
+            {
+                fmt = IF_SVE_EH_3A;
+                assert(insOptsScalableWords(opt));
+                assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            }
+            break;
+
+        case INS_sve_usdot:
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+            fmt = IF_SVE_EI_3A;
+            break;
+
+        case INS_sve_smlalb:
+        case INS_sve_smlalt:
+        case INS_sve_umlalb:
+        case INS_sve_umlalt:
+        case INS_sve_smlslb:
+        case INS_sve_smlslt:
+        case INS_sve_umlslb:
+        case INS_sve_umlslt:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_EL_3A;
+            break;
+
+        case INS_sve_sqrdmlah:
+        case INS_sve_sqrdmlsh:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_EM_3A;
+            break;
+
+        case INS_sve_sqdmlalbt:
+        case INS_sve_sqdmlslbt:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_EN_3A;
+            break;
+
+        case INS_sve_sqdmlalb:
+        case INS_sve_sqdmlalt:
+        case INS_sve_sqdmlslb:
+        case INS_sve_sqdmlslt:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_EO_3A;
+            break;
+
+        case INS_sve_sclamp:
+        case INS_sve_uclamp:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_EV_3A;
+            break;
+
+        case INS_sve_zipq1:
+        case INS_sve_zipq2:
+        case INS_sve_uzpq1:
+        case INS_sve_uzpq2:
+        case INS_sve_tblq:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_EX_3A;
+            break;
+
+        case INS_sve_saddlb:
+        case INS_sve_saddlt:
+        case INS_sve_uaddlb:
+        case INS_sve_uaddlt:
+        case INS_sve_ssublb:
+        case INS_sve_ssublt:
+        case INS_sve_usublb:
+        case INS_sve_usublt:
+        case INS_sve_sabdlb:
+        case INS_sve_sabdlt:
+        case INS_sve_uabdlb:
+        case INS_sve_uabdlt:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_FL_3A;
+            break;
+
+        case INS_sve_saddwb:
+        case INS_sve_saddwt:
+        case INS_sve_uaddwb:
+        case INS_sve_uaddwt:
+        case INS_sve_ssubwb:
+        case INS_sve_ssubwt:
+        case INS_sve_usubwb:
+        case INS_sve_usubwt:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_FM_3A;
+            break;
+
+        case INS_sve_smullb:
+        case INS_sve_smullt:
+        case INS_sve_umullb:
+        case INS_sve_umullt:
+        case INS_sve_sqdmullb:
+        case INS_sve_sqdmullt:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_FN_3A;
+            break;
+
+        case INS_sve_pmullb:
+        case INS_sve_pmullt:
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+
+            if (opt == INS_OPTS_SCALABLE_Q)
+            {
+                fmt = IF_SVE_FN_3B;
+            }
+            else
+            {
+                assert((opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_D));
+                assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+                fmt = IF_SVE_FN_3A;
+            }
+            break;
+
+        case INS_sve_smmla:
+        case INS_sve_usmmla:
+        case INS_sve_ummla:
+            assert(opt == INS_OPTS_SCALABLE_S);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+            fmt = IF_SVE_FO_3A;
+            break;
+
+        case INS_sve_rax1:
+        case INS_sve_sm4ekey:
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+
+            if (ins == INS_sve_rax1)
+            {
+                assert(opt == INS_OPTS_SCALABLE_D);
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_S);
+            }
+
+            fmt = IF_SVE_GJ_3A;
+            break;
+
+        case INS_sve_fmlalb:
+        case INS_sve_fmlalt:
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+
+            if (opt == INS_OPTS_SCALABLE_B)
+            {
+                unreached(); // TODO-SVE: Not yet supported.
+                fmt = IF_SVE_GN_3A;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_H);
+                fmt = IF_SVE_HB_3A;
+            }
+            break;
+
+        case INS_sve_fmlslb:
+        case INS_sve_fmlslt:
+        case INS_sve_bfmlalb:
+        case INS_sve_bfmlalt:
+        case INS_sve_bfmlslb:
+        case INS_sve_bfmlslt:
+            assert(opt == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+            fmt = IF_SVE_HB_3A;
+            break;
+
+        case INS_sve_bfmmla:
+            assert(opt == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+            fmt = IF_SVE_HD_3A;
+            break;
+
+        case INS_sve_fmmla:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(opt == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+            fmt = IF_SVE_HD_3A_A;
+            break;
+
+        case INS_sve_fmlallbb:
+        case INS_sve_fmlallbt:
+        case INS_sve_fmlalltb:
+        case INS_sve_fmlalltt:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+            fmt = IF_SVE_GO_3A;
+            break;
+
+        case INS_sve_bfclamp:
+            assert(opt == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+            fmt = IF_SVE_GW_3B;
+            break;
+
+        case INS_sve_bfdot:
+            assert(opt == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+            fmt = IF_SVE_HA_3A;
+            break;
+
+        case INS_sve_fdot:
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                fmt = IF_SVE_HA_3A;
+            }
+            else if (opt == INS_OPTS_SCALABLE_B)
+            {
+                unreached(); // TODO-SVE: Not yet supported.
+                fmt = IF_SVE_HA_3A_E;
+            }
+            else
+            {
+                unreached(); // TODO-SVE: Not yet supported.
+                assert(insOptsNone(opt));
+                fmt = IF_SVE_HA_3A_F;
+            }
+            break;
+
+        case INS_sve_eorbt:
+        case INS_sve_eortb:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_FP_3A;
+            break;
+
+        case INS_sve_bext:
+        case INS_sve_bdep:
+        case INS_sve_bgrp:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_FQ_3A;
+            break;
+
+        case INS_sve_saddlbt:
+        case INS_sve_ssublbt:
+        case INS_sve_ssubltb:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_FS_3A;
+            break;
+
+        case INS_sve_saba:
+        case INS_sve_uaba:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_FW_3A;
+            break;
+
+        case INS_sve_sabalb:
+        case INS_sve_sabalt:
+        case INS_sve_uabalb:
+        case INS_sve_uabalt:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_FX_3A;
+            break;
+
+        case INS_sve_addhnb:
+        case INS_sve_addhnt:
+        case INS_sve_raddhnb:
+        case INS_sve_raddhnt:
+        case INS_sve_subhnb:
+        case INS_sve_subhnt:
+        case INS_sve_rsubhnb:
+        case INS_sve_rsubhnt:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(insOptsScalableWide(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_GC_3A;
+            break;
+
+        case INS_sve_histseg:
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_GF_3A;
+            break;
+
+        case INS_sve_fclamp:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_GW_3A;
+            break;
+
+        case INS_sve_clz:
+        case INS_sve_cls:
+        case INS_sve_cnt:
+        case INS_sve_cnot:
+        case INS_sve_not:
+        case INS_sve_nots:
+            if (isPredicateRegister(reg1) && sopt != INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(opt == INS_OPTS_SCALABLE_B);
+                assert(isPredicateRegister(reg1)); // DDDD
+                assert(isPredicateRegister(reg2)); // gggg
+                assert(isPredicateRegister(reg3)); // NNNN
+                fmt = IF_SVE_CZ_4A;
+            }
+            else
+            {
+                assert(isVectorRegister(reg1));
+                assert(isLowPredicateRegister(reg2));
+                assert(isVectorRegister(reg3));
+                assert(insOptsScalableStandard(opt));
+                assert(insScalableOptsNone(sopt));
+                fmt = IF_SVE_AP_3A;
+            }
+            break;
+
+        case INS_sve_fabs:
+        case INS_sve_fneg:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableFloat(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AP_3A;
+            break;
+
+        case INS_sve_abs:
+        case INS_sve_neg:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AQ_3A;
+            break;
+
+        case INS_sve_sxtb:
+        case INS_sve_uxtb:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AQ_3A;
+            break;
+
+        case INS_sve_sxth:
+        case INS_sve_uxth:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableWords(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AQ_3A;
+            break;
+
+        case INS_sve_sxtw:
+        case INS_sve_uxtw:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(opt == INS_OPTS_SCALABLE_D);
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_AQ_3A;
+            break;
+
+        case INS_sve_index:
+            assert(isValidScalarDatasize(size));
+            assert(isVectorRegister(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+            assert(isGeneralRegisterOrZR(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_BA_3A;
+            break;
+
+        case INS_sve_sqdmulh:
+        case INS_sve_sqrdmulh:
+            assert(isScalableVectorSize(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_BE_3A;
+            break;
+
+        case INS_sve_ftssel:
+            assert(isScalableVectorSize(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableFloat(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_BK_3A;
+            break;
+
+        case INS_sve_compact:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableWords(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_CL_3A;
+            break;
+
+        case INS_sve_clasta:
+        case INS_sve_clastb:
+            assert(insOptsScalableStandard(opt));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (isGeneralRegister(reg1))
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isValidScalarDatasize(size));
+                fmt = IF_SVE_CO_3A;
+            }
+            else if (sopt == INS_SCALABLE_OPTS_WITH_SIMD_SCALAR)
+            {
+                assert(isFloatReg(reg1));
+                assert(isValidVectorElemsize(size));
+                fmt = IF_SVE_CN_3A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isVectorRegister(reg1));
+                fmt = IF_SVE_CM_3A;
+            }
+            break;
+
+        case INS_sve_cpy:
+        case INS_sve_mov:
+            assert(insOptsScalableStandard(opt));
+            // TODO-SVE: Following checks can be simplified to check reg1 as predicate register only after adding
+            // definitions for predicate registers. Currently, predicate registers P0 to P15 are aliased to simd
+            // registers V0 to V15.
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(ins == INS_sve_mov);
+                assert(opt == INS_OPTS_SCALABLE_D);
+                assert(isVectorRegister(reg1)); // ddddd
+                assert(isVectorRegister(reg2)); // nnnnn
+                assert(isVectorRegister(reg3)); // mmmmm
+                fmt = IF_SVE_AU_3A;
+                // ORR is an alias for MOV, and is always the preferred disassembly.
+                ins = INS_sve_orr;
+            }
+            else if (isPredicateRegister(reg3) &&
+                     (sopt == INS_SCALABLE_OPTS_NONE || sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE))
+            {
+                assert(opt == INS_OPTS_SCALABLE_B);
+                assert(isPredicateRegister(reg1)); // DDDD
+                assert(isPredicateRegister(reg2)); // gggg
+                assert(isPredicateRegister(reg3)); // NNNN
+                fmt = sopt == INS_SCALABLE_OPTS_NONE ? IF_SVE_CZ_4A : IF_SVE_CZ_4A_K;
+                // MOV is an alias for CPY, and is always the preferred disassembly.
+                ins = INS_sve_mov;
+            }
+            else if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE)
+            {
+                assert(isVectorRegister(reg1));
+                assert(isPredicateRegister(reg2));
+                assert(isVectorRegister(reg3));
+                assert(insOptsScalableStandard(opt));
+                fmt = IF_SVE_CW_4A;
+            }
+            else
+            {
+                assert(isVectorRegister(reg1));
+                assert(isLowPredicateRegister(reg2));
+                if (isGeneralRegisterOrSP(reg3))
+                {
+                    assert(insScalableOptsNone(sopt));
+                    fmt  = IF_SVE_CQ_3A;
+                    reg3 = encodingSPtoZR(reg3);
+                }
+                else
+                {
+                    assert(sopt == INS_SCALABLE_OPTS_WITH_SIMD_SCALAR);
+                    assert(isVectorRegister(reg3));
+                    fmt = IF_SVE_CP_3A;
+                }
+
+                // MOV is an alias for CPY, and is always the preferred disassembly.
+                ins = INS_sve_mov;
+            }
+            break;
+
+        case INS_sve_lasta:
+        case INS_sve_lastb:
+            assert(insOptsScalableStandard(opt));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (isGeneralRegister(reg1))
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isGeneralRegister(reg1));
+                fmt = IF_SVE_CS_3A;
+            }
+            else if (sopt == INS_SCALABLE_OPTS_WITH_SIMD_SCALAR)
+            {
+                assert(isVectorRegister(reg1));
+                fmt = IF_SVE_CR_3A;
+            }
+            break;
+
+        case INS_sve_revd:
+            assert(isVectorRegister(reg1));       // ddddd
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // nnnnn
+            fmt = IF_SVE_CT_3A;
+            break;
+
+        case INS_sve_rbit:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_CU_3A;
+            break;
+
+        case INS_sve_revb:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_CU_3A;
+            break;
+
+        case INS_sve_revh:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableWords(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_CU_3A;
+            break;
+
+        case INS_sve_revw:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(opt == INS_OPTS_SCALABLE_D);
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_CU_3A;
+            break;
+
+        case INS_sve_splice:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            fmt = (sopt == INS_SCALABLE_OPTS_WITH_VECTOR_PAIR) ? IF_SVE_CV_3A : IF_SVE_CV_3B;
+            break;
+
+        case INS_sve_brka:
+        case INS_sve_brkb:
+            assert(isPredicateRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isPredicateRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE)
+            {
+                pmerge = true;
+            }
+            fmt = IF_SVE_DB_3A;
+            break;
+
+        case INS_sve_brkas:
+        case INS_sve_brkbs:
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isPredicateRegister(reg3));
+            fmt = IF_SVE_DB_3B;
+            break;
+
+        case INS_sve_brkn:
+        case INS_sve_brkns:
+            assert(insOptsScalable(opt));
+            assert(isPredicateRegister(reg1)); // MMMM
+            assert(isPredicateRegister(reg2)); // gggg
+            assert(isPredicateRegister(reg3)); // NNNN
+            fmt = IF_SVE_DC_3A;
+            break;
+
+        case INS_sve_cntp:
+            assert(size == EA_8BYTE);
+            assert(isGeneralRegister(reg1));                       // ddddd
+            assert(isPredicateRegister(reg2));                     // gggg
+            assert(isPredicateRegister(reg3));                     // NNNN
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_DK_3A;
+            break;
+
+        case INS_sve_shadd:
+        case INS_sve_shsub:
+        case INS_sve_shsubr:
+        case INS_sve_srhadd:
+        case INS_sve_uhadd:
+        case INS_sve_uhsub:
+        case INS_sve_uhsubr:
+        case INS_sve_urhadd:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_EP_3A;
+            break;
+
+        case INS_sve_sadalp:
+        case INS_sve_uadalp:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_EQ_3A;
+            break;
+
+        case INS_sve_addp:
+        case INS_sve_smaxp:
+        case INS_sve_sminp:
+        case INS_sve_umaxp:
+        case INS_sve_uminp:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_ER_3A;
+            break;
+
+        case INS_sve_sqabs:
+        case INS_sve_sqneg:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_ES_3A;
+            break;
+
+        case INS_sve_urecpe:
+        case INS_sve_ursqrte:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(opt == INS_OPTS_SCALABLE_S);
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_ES_3A;
+            break;
+
+        case INS_sve_sqadd:
+        case INS_sve_sqsub:
+        case INS_sve_uqadd:
+        case INS_sve_uqsub:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(isScalableVectorSize(size));
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(isVectorRegister(reg2));
+                fmt = IF_SVE_AT_3A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isLowPredicateRegister(reg2));
+                fmt = IF_SVE_ET_3A;
+            }
+            break;
+
+        case INS_sve_sqsubr:
+        case INS_sve_suqadd:
+        case INS_sve_uqsubr:
+        case INS_sve_usqadd:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_ET_3A;
+            break;
+
+        case INS_sve_sqrshl:
+        case INS_sve_sqrshlr:
+        case INS_sve_sqshl:
+        case INS_sve_sqshlr:
+        case INS_sve_srshl:
+        case INS_sve_srshlr:
+        case INS_sve_uqrshl:
+        case INS_sve_uqrshlr:
+        case INS_sve_uqshl:
+        case INS_sve_uqshlr:
+        case INS_sve_urshl:
+        case INS_sve_urshlr:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableStandard(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_EU_3A;
+            break;
+
+        case INS_sve_fcvtnt:
+        case INS_sve_fcvtlt:
+            assert(insOptsConvertFloatStepwise(opt));
+            FALLTHROUGH;
+        case INS_sve_fcvtxnt:
+        case INS_sve_bfcvtnt:
+            assert(isVectorRegister(reg1));       // ddddd
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // nnnnn
+            fmt = IF_SVE_GQ_3A;
+            break;
+
+        case INS_sve_faddp:
+        case INS_sve_fmaxnmp:
+        case INS_sve_fmaxp:
+        case INS_sve_fminnmp:
+        case INS_sve_fminp:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableFloat(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_GR_3A;
+            break;
+
+        case INS_sve_faddqv:
+        case INS_sve_fmaxnmqv:
+        case INS_sve_fminnmqv:
+        case INS_sve_fmaxqv:
+        case INS_sve_fminqv:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableFloat(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_GS_3A;
+            break;
+
+        case INS_sve_fmaxnmv:
+        case INS_sve_fmaxv:
+        case INS_sve_fminnmv:
+        case INS_sve_fminv:
+        case INS_sve_faddv:
+            assert(isFloatReg(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableFloat(opt));
+            assert(isValidVectorElemsizeSveFloat(size));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_HE_3A;
+            break;
+
+        case INS_sve_fadda:
+            assert(isFloatReg(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableFloat(opt));
+            assert(isValidVectorElemsizeSveFloat(size));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_HJ_3A;
+            break;
+
+        case INS_sve_frecps:
+        case INS_sve_frsqrts:
+        case INS_sve_ftsmul:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_HK_3A;
+            break;
+
+        case INS_sve_fadd:
+        case INS_sve_fsub:
+        case INS_sve_fmul:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(isVectorRegister(reg2)); // nnnnn
+                fmt = IF_SVE_HK_3A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isLowPredicateRegister(reg2)); // ggg
+                fmt = IF_SVE_HL_3A;
+            }
+            break;
+
+        case INS_sve_fabd:
+        case INS_sve_fdiv:
+        case INS_sve_fdivr:
+        case INS_sve_fmax:
+        case INS_sve_fmaxnm:
+        case INS_sve_fmin:
+        case INS_sve_fminnm:
+        case INS_sve_fmulx:
+        case INS_sve_fscale:
+        case INS_sve_fsubr:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_HL_3A;
+            break;
+
+        case INS_sve_famax:
+        case INS_sve_famin:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableFloat(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_HL_3A;
+            break;
+
+        case INS_sve_bfmul:
+        case INS_sve_bfadd:
+        case INS_sve_bfsub:
+        case INS_sve_bfmaxnm:
+        case INS_sve_bfminnm:
+        case INS_sve_bfmax:
+        case INS_sve_bfmin:
+            assert(opt == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg3)); // mmmmm
+
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                assert(isVectorRegister(reg2)); // nnnnn
+                fmt = IF_SVE_HK_3B;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(isLowPredicateRegister(reg2)); // ggg
+                fmt = IF_SVE_HL_3B;
+            }
+            break;
+
+        case INS_sve_bsl:
+        case INS_sve_eor3:
+        case INS_sve_bcax:
+        case INS_sve_bsl1n:
+        case INS_sve_bsl2n:
+        case INS_sve_nbsl:
+            assert(opt == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // mmmmm
+            assert(isVectorRegister(reg3)); // kkkkk
+            fmt = IF_SVE_AV_3A;
+            break;
+
+        case INS_sve_frintn:
+        case INS_sve_frintm:
+        case INS_sve_frintp:
+        case INS_sve_frintz:
+        case INS_sve_frinta:
+        case INS_sve_frintx:
+        case INS_sve_frinti:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableFloat(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_HQ_3A;
+            break;
+
+        case INS_sve_bfcvt:
+            assert(isVectorRegister(reg1));       // ddddd
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // nnnnn
+            fmt = IF_SVE_HO_3A;
+            break;
+
+        case INS_sve_fcvt:
+            assert(isVectorRegister(reg1));       // ddddd
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // nnnnn
+            fmt = IF_SVE_HO_3B;
+            break;
+
+        case INS_sve_fcvtx:
+            assert(isVectorRegister(reg1));       // ddddd
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // nnnnn
+            fmt = IF_SVE_HO_3C;
+            break;
+
+        case INS_sve_fcvtzs:
+        case INS_sve_fcvtzu:
+            assert(insOptsScalableFloat(opt) || opt == INS_OPTS_H_TO_S || opt == INS_OPTS_H_TO_D ||
+                   opt == INS_OPTS_S_TO_D || opt == INS_OPTS_D_TO_S);
+            assert(isVectorRegister(reg1));       // ddddd
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // nnnnn
+            fmt = IF_SVE_HP_3B;
+            break;
+
+        case INS_sve_scvtf:
+        case INS_sve_ucvtf:
+            assert(insOptsScalableAtLeastHalf(opt) || opt == INS_OPTS_S_TO_H || opt == INS_OPTS_S_TO_D ||
+                   opt == INS_OPTS_D_TO_H || opt == INS_OPTS_D_TO_S);
+            assert(isVectorRegister(reg1));       // ddddd
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // nnnnn
+            fmt = IF_SVE_HS_3A;
+            break;
+
+        case INS_sve_frecpx:
+        case INS_sve_fsqrt:
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsScalableFloat(opt));
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_HR_3A;
+            break;
+
+        case INS_sve_whilege:
+        case INS_sve_whilegt:
+        case INS_sve_whilelt:
+        case INS_sve_whilele:
+        case INS_sve_whilehs:
+        case INS_sve_whilehi:
+        case INS_sve_whilelo:
+        case INS_sve_whilels:
+            assert(isGeneralRegister(reg2));                       // nnnnn
+            assert(isGeneralRegister(reg3));                       // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            assert(insOptsScalableStandard(opt));
+
+            if (insScalableOptsNone(sopt))
+            {
+                assert(isPredicateRegister(reg1));    // DDDD
+                assert(isValidGeneralDatasize(size)); // X
+                fmt = IF_SVE_DT_3A;
+            }
+            else if (insScalableOptsWithPredicatePair(sopt))
+            {
+                assert(isLowPredicateRegister(reg1)); // DDD
+                assert(size == EA_8BYTE);
+                fmt = IF_SVE_DX_3A;
+            }
+            else
+            {
+                assert(insScalableOptsWithVectorLength(sopt)); // l
+                assert(isHighPredicateRegister(reg1));         // DDD
+                assert(size == EA_8BYTE);
+                vectorLength4x = (sopt == INS_SCALABLE_OPTS_VL_4X);
+                fmt            = IF_SVE_DY_3A;
+            }
+            break;
+
+        case INS_sve_whilewr:
+        case INS_sve_whilerw:
+            assert(insOptsScalableStandard(opt));
+            assert(isPredicateRegister(reg1)); // DDDD
+            assert(isGeneralRegister(reg2));   // nnnnn
+            assert(size == EA_8BYTE);
+            assert(isGeneralRegister(reg3));                       // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_DU_3A;
+            break;
+
+        case INS_sve_movs:
+            assert(insOptsScalable(opt));
+            assert(isPredicateRegister(reg1)); // DDDD
+            assert(isPredicateRegister(reg2)); // gggg
+            assert(isPredicateRegister(reg3)); // NNNN
+            fmt = IF_SVE_CZ_4A;
+            break;
+
+        case INS_sve_adclb:
+        case INS_sve_adclt:
+        case INS_sve_sbclb:
+        case INS_sve_sbclt:
+            assert(insOptsScalableWords(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // x
+            fmt = IF_SVE_FY_3A;
+            break;
+
+        case INS_sve_mlapt:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(insOptsNone(opt));
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+            fmt = IF_SVE_EW_3A;
+            break;
+
+        case INS_sve_madpt:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(insOptsNone(opt));
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // mmmmm
+            assert(isVectorRegister(reg3)); // aaaaa
+            fmt = IF_SVE_EW_3B;
+            break;
+
+        case INS_sve_fcmeq:
+        case INS_sve_fcmge:
+        case INS_sve_fcmgt:
+        case INS_sve_fcmlt:
+        case INS_sve_fcmle:
+        case INS_sve_fcmne:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(insScalableOptsNone(sopt));
+            assert(isPredicateRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_HI_3A;
+            break;
+
+        case INS_sve_flogb:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_HP_3A;
+            break;
+
+        case INS_sve_ld1b:
+        case INS_sve_ld1h:
+        case INS_sve_ld1w:
+        case INS_sve_ld1d:
+            return emitIns_R_R_R_I(ins, size, reg1, reg2, reg3, 0, opt);
+
+        default:
+            unreached();
+            break;
+    }
+
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+
+    if (pmerge)
+    {
+        id->idPredicateReg2Merge(pmerge);
+    }
+    else if (vectorLength4x)
+    {
+        id->idVectorLength4x(vectorLength4x);
+    }
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing three registers and a constant.
+ *  Do not call this directly. Use 'emitIns_R_R_R_I' instead.
+ */
+
+void emitter::emitInsSve_R_R_R_I(instruction     ins,
+                                 emitAttr        attr,
+                                 regNumber       reg1,
+                                 regNumber       reg2,
+                                 regNumber       reg3,
+                                 ssize_t         imm,
+                                 insOpts         opt /* = INS_OPTS_NONE */,
+                                 insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
+{
+    emitAttr  size     = EA_SIZE(attr);
+    emitAttr  elemsize = EA_UNKNOWN;
+    insFormat fmt      = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_sve_adr:
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+            assert(isValidUimm<2>(imm));
+            switch (opt)
+            {
+                case INS_OPTS_SCALABLE_S:
+                case INS_OPTS_SCALABLE_D:
+                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                    fmt = IF_SVE_BH_3A;
+                    break;
+                case INS_OPTS_SCALABLE_D_SXTW:
+                    fmt = IF_SVE_BH_3B;
+                    break;
+                case INS_OPTS_SCALABLE_D_UXTW:
+                    fmt = IF_SVE_BH_3B_A;
+                    break;
+                default:
+                    assert(!"invalid instruction");
+                    break;
+            }
+            break;
+
+        case INS_sve_cmpeq:
+        case INS_sve_cmpgt:
+        case INS_sve_cmpge:
+        case INS_sve_cmpne:
+        case INS_sve_cmple:
+        case INS_sve_cmplt:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableStandard(opt));
+            assert(isPredicateRegister(reg1));    // DDDD
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // nnnnn
+            assert(isValidSimm<5>(imm));          // iiiii
+            fmt = IF_SVE_CY_3A;
+            break;
+
+        case INS_sve_cmphi:
+        case INS_sve_cmphs:
+        case INS_sve_cmplo:
+        case INS_sve_cmpls:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableStandard(opt));
+            assert(isPredicateRegister(reg1));    // DDDD
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // nnnnn
+            assert(isValidUimm<7>(imm));          // iiiii
+            fmt = IF_SVE_CY_3B;
+            break;
+
+        case INS_sve_sdot:
+        case INS_sve_udot:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+
+            if (opt == INS_OPTS_SCALABLE_B)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<2>(imm));                  // ii
+                fmt = IF_SVE_EY_3A;
+            }
+            else if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<2>(imm));                  // ii
+                fmt = IF_SVE_EG_3A;
+            }
+            else
+            {
+                assert(insOptsNone(opt));
+                assert(isValidUimm<1>(imm)); // i
+                opt = INS_OPTS_SCALABLE_H;
+                fmt = IF_SVE_EY_3B;
+            }
+            break;
+
+        case INS_sve_usdot:
+        case INS_sve_sudot:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmm
+            assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
+            assert(isValidUimm<2>(imm)); // ii
+            fmt = IF_SVE_EZ_3A;
+            break;
+
+        case INS_sve_mul:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+
+            switch (opt)
+            {
+                case INS_OPTS_SCALABLE_H:
+                    assert(isValidUimm<3>(imm));                  // iii
+                    assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                    fmt = IF_SVE_FD_3A;
+                    break;
+
+                case INS_OPTS_SCALABLE_S:
+                    assert(isValidUimm<2>(imm));                  // ii
+                    assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                    fmt = IF_SVE_FD_3B;
+                    break;
+
+                case INS_OPTS_SCALABLE_D:
+                    assert(isValidUimm<1>(imm)); // i
+                    fmt = IF_SVE_FD_3C;
+                    break;
+
+                default:
+                    unreached();
+                    break;
+            }
+            break;
+
+        case INS_sve_cdot:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableWords(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidRot(imm));                               // rr
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+
+            // Convert rot to bitwise representation
+            imm = emitEncodeRotationImm0_to_270(imm);
+            fmt = IF_SVE_EJ_3A;
+            break;
+
+        case INS_sve_cmla:
+        case INS_sve_sqrdcmlah:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isVectorRegister(reg2));                        // nnnnn
+            assert(isVectorRegister(reg3));                        // mmmmm
+            assert(isValidRot(imm));                               // rr
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+
+            // Convert rot to bitwise representation
+            imm = emitEncodeRotationImm0_to_270(imm);
+            fmt = IF_SVE_EK_3A;
+            break;
+
+        case INS_sve_ld1d:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalable(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+
+            if (isGeneralRegister(reg3))
+            {
+                assert(isValidSimm<4>(imm));
+                if (opt == INS_OPTS_SCALABLE_Q)
+                {
+                    fmt = IF_SVE_IH_3A_A;
+                }
+                else
+                {
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    fmt = IF_SVE_IH_3A;
+                }
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_D);
+                assert(isVectorRegister(reg3));
+                assert((isValidUimm_MultipleOf<5, 8>(imm)));
+                fmt = IF_SVE_IV_3A;
+            }
+            break;
+
+        case INS_sve_ldff1d:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert((isValidUimm_MultipleOf<5, 8>(imm)));
+            fmt = IF_SVE_IV_3A;
+            break;
+
+        case INS_sve_ld1w:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableWordsOrQuadwords(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+
+            if (isGeneralRegister(reg3))
+            {
+                assert(isValidSimm<4>(imm));
+                fmt = IF_SVE_IH_3A_F;
+            }
+            else
+            {
+                assert(insOptsScalableWords(opt));
+                assert(isVectorRegister(reg3));
+                assert((isValidUimm_MultipleOf<5, 4>(imm)));
+                fmt = IF_SVE_HX_3A_E;
+            }
+            break;
+
+        case INS_sve_ld1sw:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+
+            if (isGeneralRegister(reg3))
+            {
+                assert(isValidSimm<4>(imm));
+                fmt = IF_SVE_IJ_3A;
+            }
+            else
+            {
+                assert(isVectorRegister(reg3));
+                assert((isValidUimm_MultipleOf<5, 4>(imm)));
+                fmt = IF_SVE_IV_3A;
+            }
+            break;
+
+        case INS_sve_ldff1sw:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert((isValidUimm_MultipleOf<5, 4>(imm)));
+            fmt = IF_SVE_IV_3A;
+            break;
+
+        case INS_sve_ld1sb:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+
+            if (isGeneralRegister(reg3))
+            {
+                assert(isGeneralRegister(reg3));
+                assert(isValidSimm<4>(imm));
+                fmt = IF_SVE_IJ_3A_D;
+            }
+            else
+            {
+                assert(insOptsScalableWords(opt));
+                assert(isVectorRegister(reg3));
+                assert(isValidUimm<5>(imm));
+                fmt = IF_SVE_HX_3A_B;
+            }
+            break;
+
+        case INS_sve_ld1b:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+
+            if (isGeneralRegister(reg3))
+            {
+                assert(isValidSimm<4>(imm));
+                fmt = IF_SVE_IJ_3A_E;
+            }
+            else
+            {
+                assert(insOptsScalableWords(opt));
+                assert(isVectorRegister(reg3));
+                assert(isValidUimm<5>(imm));
+                fmt = IF_SVE_HX_3A_B;
+            }
+            break;
+
+        case INS_sve_ldff1b:
+        case INS_sve_ldff1sb:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableWords(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isValidUimm<5>(imm));
+            fmt = IF_SVE_HX_3A_B;
+            break;
+
+        case INS_sve_ld1sh:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableWords(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+
+            if (isGeneralRegister(reg3))
+            {
+                assert(isValidSimm<4>(imm));
+                fmt = IF_SVE_IJ_3A_F;
+            }
+            else
+            {
+                assert(isVectorRegister(reg3));
+                assert((isValidUimm_MultipleOf<5, 2>(imm)));
+                fmt = IF_SVE_HX_3A_E;
+            }
+            break;
+
+        case INS_sve_ld1h:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+
+            if (isGeneralRegister(reg3))
+            {
+                assert(isValidSimm<4>(imm));
+                fmt = IF_SVE_IJ_3A_G;
+            }
+            else
+            {
+                assert(isVectorRegister(reg3));
+                assert((isValidUimm_MultipleOf<5, 2>(imm)));
+                fmt = IF_SVE_HX_3A_E;
+            }
+            break;
+
+        case INS_sve_ldff1h:
+        case INS_sve_ldff1sh:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableWords(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert((isValidUimm_MultipleOf<5, 2>(imm)));
+            fmt = IF_SVE_HX_3A_E;
+            break;
+
+        case INS_sve_ldff1w:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableWords(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert((isValidUimm_MultipleOf<5, 4>(imm)));
+            fmt = IF_SVE_HX_3A_E;
+            break;
+
+        case INS_sve_ldnf1sw:
+        case INS_sve_ldnf1d:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidSimm<4>(imm));
+            fmt = IF_SVE_IL_3A;
+            break;
+
+        case INS_sve_ldnf1sh:
+        case INS_sve_ldnf1w:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableWords(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidSimm<4>(imm));
+            fmt = IF_SVE_IL_3A_A;
+            break;
+
+        case INS_sve_ldnf1h:
+        case INS_sve_ldnf1sb:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidSimm<4>(imm));
+            fmt = IF_SVE_IL_3A_B;
+            break;
+
+        case INS_sve_ldnf1b:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidSimm<4>(imm));
+            fmt = IF_SVE_IL_3A_C;
+            break;
+
+        case INS_sve_ldnt1b:
+        case INS_sve_ldnt1h:
+        case INS_sve_ldnt1w:
+        case INS_sve_ldnt1d:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidSimm<4>(imm));
+
+#ifdef DEBUG
+            switch (ins)
+            {
+                case INS_sve_ldnt1b:
+                    assert(opt == INS_OPTS_SCALABLE_B);
+                    break;
+
+                case INS_sve_ldnt1h:
+                    assert(opt == INS_OPTS_SCALABLE_H);
+                    break;
+
+                case INS_sve_ldnt1w:
+                    assert(opt == INS_OPTS_SCALABLE_S);
+                    break;
+
+                case INS_sve_ldnt1d:
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+#endif // DEBUG
+
+            fmt = IF_SVE_IM_3A;
+            break;
+
+        case INS_sve_ld1rqb:
+        case INS_sve_ld1rob:
+        case INS_sve_ld1rqh:
+        case INS_sve_ld1roh:
+        case INS_sve_ld1rqw:
+        case INS_sve_ld1row:
+        case INS_sve_ld1rqd:
+        case INS_sve_ld1rod:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+
+#ifdef DEBUG
+            switch (ins)
+            {
+                case INS_sve_ld1rqb:
+                case INS_sve_ld1rqd:
+                case INS_sve_ld1rqh:
+                case INS_sve_ld1rqw:
+                    assert((isValidSimm_MultipleOf<4, 16>(imm)));
+                    break;
+
+                case INS_sve_ld1rob:
+                case INS_sve_ld1rod:
+                case INS_sve_ld1roh:
+                case INS_sve_ld1row:
+                    assert((isValidSimm_MultipleOf<4, 32>(imm)));
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+
+            switch (ins)
+            {
+                case INS_sve_ld1rqb:
+                case INS_sve_ld1rob:
+                    assert(opt == INS_OPTS_SCALABLE_B);
+                    break;
+
+                case INS_sve_ld1rqh:
+                case INS_sve_ld1roh:
+                    assert(opt == INS_OPTS_SCALABLE_H);
+                    break;
+
+                case INS_sve_ld1rqw:
+                case INS_sve_ld1row:
+                    assert(opt == INS_OPTS_SCALABLE_S);
+                    break;
+
+                case INS_sve_ld1rqd:
+                case INS_sve_ld1rod:
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+#endif // DEBUG
+
+            fmt = IF_SVE_IO_3A;
+            break;
+
+        case INS_sve_ld2q:
+        case INS_sve_ld3q:
+        case INS_sve_ld4q:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_Q);
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+
+#ifdef DEBUG
+            switch (ins)
+            {
+                case INS_sve_ld2q:
+                    assert((isValidSimm_MultipleOf<4, 2>(imm)));
+                    break;
+
+                case INS_sve_ld3q:
+                    assert((isValidSimm_MultipleOf<4, 3>(imm)));
+                    break;
+
+                case INS_sve_ld4q:
+                    assert((isValidSimm_MultipleOf<4, 4>(imm)));
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+#endif // DEBUG
+
+            fmt = IF_SVE_IQ_3A;
+            break;
+
+        case INS_sve_ld2b:
+        case INS_sve_ld3b:
+        case INS_sve_ld4b:
+        case INS_sve_ld2h:
+        case INS_sve_ld3h:
+        case INS_sve_ld4h:
+        case INS_sve_ld2w:
+        case INS_sve_ld3w:
+        case INS_sve_ld4w:
+        case INS_sve_ld2d:
+        case INS_sve_ld3d:
+        case INS_sve_ld4d:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+
+#ifdef DEBUG
+            switch (ins)
+            {
+                case INS_sve_ld2b:
+                case INS_sve_ld2h:
+                case INS_sve_ld2w:
+                case INS_sve_ld2d:
+                    assert((isValidSimm_MultipleOf<4, 2>(imm)));
+                    break;
+
+                case INS_sve_ld3b:
+                case INS_sve_ld3h:
+                case INS_sve_ld3w:
+                case INS_sve_ld3d:
+                    assert((isValidSimm_MultipleOf<4, 3>(imm)));
+                    break;
+
+                case INS_sve_ld4b:
+                case INS_sve_ld4h:
+                case INS_sve_ld4w:
+                case INS_sve_ld4d:
+                    assert((isValidSimm_MultipleOf<4, 4>(imm)));
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+
+            switch (ins)
+            {
+                case INS_sve_ld2b:
+                case INS_sve_ld3b:
+                case INS_sve_ld4b:
+                    assert(opt == INS_OPTS_SCALABLE_B);
+                    break;
+
+                case INS_sve_ld2h:
+                case INS_sve_ld3h:
+                case INS_sve_ld4h:
+                    assert(opt == INS_OPTS_SCALABLE_H);
+                    break;
+
+                case INS_sve_ld2w:
+                case INS_sve_ld3w:
+                case INS_sve_ld4w:
+                    assert(opt == INS_OPTS_SCALABLE_S);
+                    break;
+
+                case INS_sve_ld2d:
+                case INS_sve_ld3d:
+                case INS_sve_ld4d:
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+#endif // DEBUG
+
+            fmt = IF_SVE_IS_3A;
+            break;
+
+        case INS_sve_st2q:
+        case INS_sve_st3q:
+        case INS_sve_st4q:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_Q);
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+
+#ifdef DEBUG
+            switch (ins)
+            {
+                case INS_sve_st2q:
+                    assert((isValidSimm_MultipleOf<4, 2>(imm)));
+                    break;
+
+                case INS_sve_st3q:
+                    assert((isValidSimm_MultipleOf<4, 3>(imm)));
+                    break;
+
+                case INS_sve_st4q:
+                    assert((isValidSimm_MultipleOf<4, 4>(imm)));
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+#endif // DEBUG
+
+            fmt = IF_SVE_JE_3A;
+            break;
+
+        case INS_sve_stnt1b:
+        case INS_sve_stnt1h:
+        case INS_sve_stnt1w:
+        case INS_sve_stnt1d:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidSimm<4>(imm));
+
+#ifdef DEBUG
+            switch (ins)
+            {
+                case INS_sve_stnt1b:
+                    assert(opt == INS_OPTS_SCALABLE_B);
+                    break;
+
+                case INS_sve_stnt1h:
+                    assert(opt == INS_OPTS_SCALABLE_H);
+                    break;
+
+                case INS_sve_stnt1w:
+                    assert(opt == INS_OPTS_SCALABLE_S);
+                    break;
+
+                case INS_sve_stnt1d:
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+#endif // DEBUG
+
+            fmt = IF_SVE_JM_3A;
+            break;
+
+        case INS_sve_st1w:
+        case INS_sve_st1d:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+
+            if (isGeneralRegister(reg3))
+            {
+                assert(isValidSimm<4>(imm));
+
+                if (opt == INS_OPTS_SCALABLE_Q && (ins == INS_sve_st1d))
+                {
+                    fmt = IF_SVE_JN_3C_D;
+                }
+                else
+                {
+                    if ((ins == INS_sve_st1w) && insOptsScalableWords(opt))
+                    {
+                        fmt = IF_SVE_JN_3B;
+                    }
+                    else
+                    {
+#if DEBUG
+                        if (ins == INS_sve_st1w)
+                        {
+                            assert(opt == INS_OPTS_SCALABLE_Q);
+                        }
+                        else
+                        {
+                            assert(opt == INS_OPTS_SCALABLE_D);
+                        }
+#endif // DEBUG
+                        fmt = IF_SVE_JN_3C;
+                    }
+                }
+            }
+            else
+            {
+                assert(isVectorRegister(reg3));
+                if ((ins == INS_sve_st1w) && insOptsScalableWords(opt))
+                {
+                    assert((isValidUimm_MultipleOf<5, 4>(imm)));
+                    fmt = IF_SVE_JI_3A_A;
+                }
+                else
+                {
+                    assert(ins == INS_sve_st1d);
+                    assert((isValidUimm_MultipleOf<5, 8>(imm)));
+                    fmt = IF_SVE_JL_3A;
+                }
+            }
+            break;
+
+        case INS_sve_st2b:
+        case INS_sve_st3b:
+        case INS_sve_st4b:
+        case INS_sve_st2h:
+        case INS_sve_st3h:
+        case INS_sve_st4h:
+        case INS_sve_st2w:
+        case INS_sve_st3w:
+        case INS_sve_st4w:
+        case INS_sve_st2d:
+        case INS_sve_st3d:
+        case INS_sve_st4d:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+
+#ifdef DEBUG
+            switch (ins)
+            {
+                case INS_sve_st2b:
+                case INS_sve_st2h:
+                case INS_sve_st2w:
+                case INS_sve_st2d:
+                    assert((isValidSimm_MultipleOf<4, 2>(imm)));
+                    break;
+
+                case INS_sve_st3b:
+                case INS_sve_st3h:
+                case INS_sve_st3w:
+                case INS_sve_st3d:
+                    assert((isValidSimm_MultipleOf<4, 3>(imm)));
+                    break;
+
+                case INS_sve_st4b:
+                case INS_sve_st4h:
+                case INS_sve_st4w:
+                case INS_sve_st4d:
+                    assert((isValidSimm_MultipleOf<4, 4>(imm)));
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+
+            switch (ins)
+            {
+                case INS_sve_st2b:
+                case INS_sve_st3b:
+                case INS_sve_st4b:
+                    assert(opt == INS_OPTS_SCALABLE_B);
+                    break;
+
+                case INS_sve_st2h:
+                case INS_sve_st3h:
+                case INS_sve_st4h:
+                    assert(opt == INS_OPTS_SCALABLE_H);
+                    break;
+
+                case INS_sve_st2w:
+                case INS_sve_st3w:
+                case INS_sve_st4w:
+                    assert(opt == INS_OPTS_SCALABLE_S);
+                    break;
+
+                case INS_sve_st2d:
+                case INS_sve_st3d:
+                case INS_sve_st4d:
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+#endif // DEBUG
+
+            fmt = IF_SVE_JO_3A;
+            break;
+
+        case INS_sve_st1b:
+        case INS_sve_st1h:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+
+            if (isGeneralRegister(reg3))
+            {
+                assert(isValidSimm<4>(imm));
+                // st1h is reserved for scalable B
+                assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : insOptsScalableStandard(opt));
+                fmt = IF_SVE_JN_3A;
+            }
+            else
+            {
+                assert(insOptsScalableWords(opt));
+                assert(isVectorRegister(reg3));
+
+#ifdef DEBUG
+                switch (ins)
+                {
+                    case INS_sve_st1b:
+                        assert(isValidUimm<5>(imm));
+                        break;
+
+                    case INS_sve_st1h:
+                        assert((isValidUimm_MultipleOf<5, 2>(imm)));
+                        break;
+
+                    default:
+                        assert(!"Invalid instruction");
+                        break;
+                }
+#endif // DEBUG
+
+                fmt = IF_SVE_JI_3A_A;
+            }
+            break;
+
+        case INS_sve_fmla:
+        case INS_sve_fmls:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+
+            if (opt == INS_OPTS_SCALABLE_S)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<2>(imm));                  // ii
+                fmt = IF_SVE_GU_3A;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_D);
+                assert(isValidUimm<1>(imm)); // i
+                fmt = IF_SVE_GU_3B;
+            }
+            break;
+
+        case INS_sve_bfmla:
+        case INS_sve_bfmls:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmm
+            assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
+            assert(isValidUimm<3>(imm)); // i ii
+            fmt = IF_SVE_GU_3C;
+            break;
+
+        case INS_sve_fmul:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+
+            if (opt == INS_OPTS_SCALABLE_S)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<2>(imm));                  // ii
+                fmt = IF_SVE_GX_3A;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_D);
+                assert(isValidUimm<1>(imm)); // i
+                fmt = IF_SVE_GX_3B;
+            }
+            break;
+
+        case INS_sve_bfmul:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmm
+            assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
+            assert(isValidUimm<3>(imm)); // i ii
+            fmt = IF_SVE_GX_3C;
+            break;
+
+        case INS_sve_fdot:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmm
+            assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
+            assert(isValidUimm<2>(imm)); // ii
+
+            if (opt == INS_OPTS_SCALABLE_B)
+            {
+                unreached();                 // TODO-SVE: Not yet supported.
+                assert(isValidUimm<2>(imm)); // ii
+                fmt = IF_SVE_GY_3B_D;
+            }
+            else if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert(isValidUimm<2>(imm)); // ii
+                fmt = IF_SVE_GY_3B;
+            }
+            else
+            {
+                unreached(); // TODO-SVE: Not yet supported.
+                assert(insOptsNone(opt));
+                assert(isValidUimm<3>(imm)); // i ii
+
+                // Simplify emitDispInsHelp logic by setting insOpt
+                opt = INS_OPTS_SCALABLE_B;
+                fmt = IF_SVE_GY_3A;
+            }
+            break;
+
+        case INS_sve_bfdot:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmm
+            assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
+            assert(isValidUimm<2>(imm)); // ii
+            fmt = IF_SVE_GY_3B;
+            break;
+
+        case INS_sve_mla:
+        case INS_sve_mls:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<3>(imm));                  // i ii
+                fmt = IF_SVE_FF_3A;
+            }
+            else if (opt == INS_OPTS_SCALABLE_S)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<2>(imm));                  // ii
+                fmt = IF_SVE_FF_3B;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_D);
+                assert(isValidUimm<1>(imm)); // i
+                fmt = IF_SVE_FF_3C;
+            }
+            break;
+
+        case INS_sve_smullb:
+        case INS_sve_smullt:
+        case INS_sve_umullb:
+        case INS_sve_umullt:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<3>(imm));                  // ii i
+                fmt = IF_SVE_FE_3A;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_S);
+                assert(isValidUimm<2>(imm)); // i i
+                fmt = IF_SVE_FE_3B;
+            }
+            break;
+
+        case INS_sve_smlalb:
+        case INS_sve_smlalt:
+        case INS_sve_umlalb:
+        case INS_sve_umlalt:
+        case INS_sve_smlslb:
+        case INS_sve_smlslt:
+        case INS_sve_umlslb:
+        case INS_sve_umlslt:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<3>(imm));                  // ii i
+                fmt = IF_SVE_FG_3A;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_S);
+                assert(isValidUimm<2>(imm)); // i i
+                fmt = IF_SVE_FG_3B;
+            }
+            break;
+
+        case INS_sve_sqdmullb:
+        case INS_sve_sqdmullt:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<3>(imm));                  // ii i
+                fmt = IF_SVE_FH_3A;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_S);
+                assert(isValidUimm<2>(imm)); // i i
+                fmt = IF_SVE_FH_3B;
+            }
+            break;
+
+        case INS_sve_sqdmulh:
+        case INS_sve_sqrdmulh:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<3>(imm));                  // ii i
+                fmt = IF_SVE_FI_3A;
+            }
+            else if (opt == INS_OPTS_SCALABLE_S)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<2>(imm));                  // ii
+                fmt = IF_SVE_FI_3B;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_D);
+                assert(isValidUimm<1>(imm)); // i
+                fmt = IF_SVE_FI_3C;
+            }
+            break;
+
+        case INS_sve_sqdmlalb:
+        case INS_sve_sqdmlalt:
+        case INS_sve_sqdmlslb:
+        case INS_sve_sqdmlslt:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<3>(imm));                  // ii i
+                fmt = IF_SVE_FJ_3A;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_S);
+                assert(isValidUimm<2>(imm)); // ii
+                fmt = IF_SVE_FJ_3B;
+            }
+            break;
+
+        case INS_sve_sqrdmlah:
+        case INS_sve_sqrdmlsh:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<3>(imm));                  // i ii
+                fmt = IF_SVE_FK_3A;
+            }
+            else if (opt == INS_OPTS_SCALABLE_S)
+            {
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                assert(isValidUimm<2>(imm));                  // ii
+                fmt = IF_SVE_FK_3B;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_D);
+                assert(isValidUimm<1>(imm)); // i
+                fmt = IF_SVE_FK_3C;
+            }
+            break;
+
+        case INS_sve_fcadd:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isScalableVectorSize(size));
+            imm = emitEncodeRotationImm90_or_270(imm);
+            fmt = IF_SVE_GP_3A;
+            break;
+
+        case INS_sve_ld1rd:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert((isValidUimm_MultipleOf<6, 8>(imm)));
+            fmt = IF_SVE_IC_3A;
+            break;
+
+        case INS_sve_ld1rsw:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert((isValidUimm_MultipleOf<6, 4>(imm)));
+            fmt = IF_SVE_IC_3A;
+            break;
+
+        case INS_sve_ld1rsh:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableWords(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert((isValidUimm_MultipleOf<6, 2>(imm)));
+            fmt = IF_SVE_IC_3A_A;
+            break;
+
+        case INS_sve_ld1rw:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableWords(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert((isValidUimm_MultipleOf<6, 4>(imm)));
+            fmt = IF_SVE_IC_3A_A;
+            break;
+
+        case INS_sve_ld1rh:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert((isValidUimm_MultipleOf<6, 2>(imm)));
+            fmt = IF_SVE_IC_3A_B;
+            break;
+
+        case INS_sve_ld1rsb:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidUimm<6>(imm));
+            fmt = IF_SVE_IC_3A_B;
+            break;
+
+        case INS_sve_ld1rb:
+            assert(insScalableOptsNone(sopt));
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidUimm<6>(imm));
+            fmt = IF_SVE_IC_3A_C;
+            break;
+
+        case INS_sve_fmlalb:
+        case INS_sve_fmlalt:
+        case INS_sve_fmlslb:
+        case INS_sve_fmlslt:
+        case INS_sve_bfmlalb:
+        case INS_sve_bfmlalt:
+        case INS_sve_bfmlslb:
+        case INS_sve_bfmlslt:
+            assert(insScalableOptsNone(sopt));
+            assert(opt == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmm
+            assert((REG_V0 <= reg3) && (reg3 <= REG_V7));
+            assert(isValidUimm<3>(imm)); // ii i
+            fmt = IF_SVE_GZ_3A;
+            break;
+
+        case INS_sve_luti2:
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert(isValidUimm<3>(imm)); // iii
+                fmt = IF_SVE_GG_3B;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_B);
+                assert(isValidUimm<2>(imm)); // ii i
+                fmt = IF_SVE_GG_3A;
+            }
+            unreached();
+            break;
+
+        case INS_sve_luti4:
+            assert(isVectorRegister(reg1)); // ddddd
+            assert(isVectorRegister(reg2)); // nnnnn
+            assert(isVectorRegister(reg3)); // mmmmm
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert(isValidUimm<2>(imm));
+
+                if (sopt == INS_SCALABLE_OPTS_WITH_VECTOR_PAIR)
+                {
+                    fmt = IF_SVE_GH_3B;
+                }
+                else
+                {
+                    assert(insScalableOptsNone(sopt));
+                    fmt = IF_SVE_GH_3B_B;
+                }
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_B);
+                assert(insScalableOptsNone(sopt));
+                assert(isValidUimm<1>(imm)); // i
+                fmt = IF_SVE_GH_3A;
+            }
+            unreached();
+            break;
+
+        default:
+            unreached();
+            break;
+
+    } // end switch (ins)
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrCns(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing three registers and two constants.
+ */
+
+void emitter::emitInsSve_R_R_R_I_I(instruction ins,
+                                   emitAttr    attr,
+                                   regNumber   reg1,
+                                   regNumber   reg2,
+                                   regNumber   reg3,
+                                   ssize_t     imm1,
+                                   ssize_t     imm2,
+                                   insOpts     opt)
+{
+    insFormat fmt = IF_NONE;
+    ssize_t   imm;
+
+    switch (ins)
+    {
+        case INS_sve_cdot:
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+            assert(isValidRot(imm2));          // rr
+            // Convert imm2 from rotation value (0-270) to bitwise representation (0-3)
+            imm = (imm1 << 2) | emitEncodeRotationImm0_to_270(imm2);
+
+            if (opt == INS_OPTS_SCALABLE_B)
+            {
+                assert(isValidUimm<2>(imm1));                 // ii
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                fmt = IF_SVE_FA_3A;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_H);
+                assert(isValidUimm<1>(imm1)); // i
+                fmt = IF_SVE_FA_3B;
+            }
+            break;
+
+        case INS_sve_cmla:
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+            assert(isValidRot(imm2));          // rr
+            // Convert imm2 from rotation value (0-270) to bitwise representation (0-3)
+            imm = (imm1 << 2) | emitEncodeRotationImm0_to_270(imm2);
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert(isValidUimm<2>(imm1));                 // ii
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                fmt = IF_SVE_FB_3A;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_S);
+                assert(isValidUimm<1>(imm1)); // i
+                fmt = IF_SVE_FB_3B;
+            }
+            break;
+
+        case INS_sve_sqrdcmlah:
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+            assert(isValidRot(imm2));          // rr
+            // Convert imm2 from rotation value (0-270) to bitwise representation (0-3)
+            imm = (imm1 << 2) | emitEncodeRotationImm0_to_270(imm2);
+
+            if (opt == INS_OPTS_SCALABLE_H)
+            {
+                assert(isValidUimm<2>(imm1));                 // ii
+                assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
+                fmt = IF_SVE_FC_3A;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_S);
+                assert(isValidUimm<1>(imm1)); // i
+                fmt = IF_SVE_FC_3B;
+            }
+            break;
+
+        case INS_sve_fcmla:
+            assert(opt == INS_OPTS_SCALABLE_S);
+            assert(isVectorRegister(reg1));    // ddddd
+            assert(isVectorRegister(reg2));    // nnnnn
+            assert(isLowVectorRegister(reg3)); // mmmm
+            assert(isValidUimm<1>(imm1));      // i
+            assert(isValidRot(imm2));          // rr
+
+            // Convert imm2 from rotation value (0-270) to bitwise representation (0-3)
+            imm = (imm1 << 2) | emitEncodeRotationImm0_to_270(imm2);
+            fmt = IF_SVE_GV_3A;
+            break;
+
+        default:
+            unreached();
+            break;
+    }
+
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrCns(attr, imm);
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing four registers.
+ *  Do not call this directly. Use 'emitIns_R_R_R_R' instead.
+ */
+
+void emitter::emitInsSve_R_R_R_R(instruction     ins,
+                                 emitAttr        attr,
+                                 regNumber       reg1,
+                                 regNumber       reg2,
+                                 regNumber       reg3,
+                                 regNumber       reg4,
+                                 insOpts         opt /* = INS_OPTS_NONE*/,
+                                 insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_sve_sel:
+            if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
+            {
+                if (reg1 == reg4)
+                {
+                    // mov is a preferred alias for sel
+                    return emitInsSve_R_R_R(INS_sve_mov, attr, reg1, reg2, reg3, opt,
+                                            INS_SCALABLE_OPTS_PREDICATE_MERGE);
+                }
+
+                assert(insOptsScalableStandard(opt));
+                assert(isVectorRegister(reg1));    // ddddd
+                assert(isPredicateRegister(reg2)); // VVVV
+                assert(isVectorRegister(reg3));    // nnnnn
+                assert(isVectorRegister(reg4));    // mmmmm
+                fmt = IF_SVE_CW_4A;
+            }
+            else
+            {
+                assert(opt == INS_OPTS_SCALABLE_B);
+                assert(isPredicateRegister(reg1)); // dddd
+                assert(isPredicateRegister(reg2)); // gggg
+                assert(isPredicateRegister(reg3)); // nnnn
+                assert(isPredicateRegister(reg4)); // mmmm
+                fmt = IF_SVE_CZ_4A;
+            }
+            break;
+
+        case INS_sve_cmpeq:
+        case INS_sve_cmpgt:
+        case INS_sve_cmpge:
+        case INS_sve_cmphi:
+        case INS_sve_cmphs:
+        case INS_sve_cmpne:
+        case INS_sve_cmple:
+        case INS_sve_cmplo:
+        case INS_sve_cmpls:
+        case INS_sve_cmplt:
+            assert(isPredicateRegister(reg1));    // DDDD
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // nnnnn
+            assert(isVectorRegister(reg4));       // mmmmm
+            assert(isScalableVectorSize(attr));   // xx
+            if (sopt == INS_SCALABLE_OPTS_WIDE)
+            {
+                assert(insOptsScalableWide(opt));
+                fmt = IF_SVE_CX_4A_A;
+            }
+            else
+            {
+                assert(insScalableOptsNone(sopt));
+                assert(insOptsScalableStandard(opt));
+                fmt = IF_SVE_CX_4A;
+            }
+            break;
+
+        case INS_sve_and:
+        case INS_sve_orr:
+        case INS_sve_eor:
+        case INS_sve_ands:
+        case INS_sve_bic:
+        case INS_sve_orn:
+        case INS_sve_bics:
+        case INS_sve_eors:
+        case INS_sve_nor:
+        case INS_sve_nand:
+        case INS_sve_orrs:
+        case INS_sve_orns:
+        case INS_sve_nors:
+        case INS_sve_nands:
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(reg1)); // dddd
+            assert(isPredicateRegister(reg2)); // gggg
+            assert(isPredicateRegister(reg3)); // nnnn
+            assert(isPredicateRegister(reg4)); // mmmm
+            fmt = IF_SVE_CZ_4A;
+            break;
+
+        case INS_sve_brkpa:
+        case INS_sve_brkpb:
+        case INS_sve_brkpas:
+        case INS_sve_brkpbs:
+            assert(opt == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(reg1)); // dddd
+            assert(isPredicateRegister(reg2)); // gggg
+            assert(isPredicateRegister(reg3)); // nnnn
+            assert(isPredicateRegister(reg4)); // mmmm
+            fmt = IF_SVE_DA_4A;
+            break;
+
+        case INS_sve_fcmeq:
+        case INS_sve_fcmge:
+        case INS_sve_facge:
+        case INS_sve_fcmgt:
+        case INS_sve_facgt:
+        case INS_sve_fcmlt:
+        case INS_sve_fcmle:
+        case INS_sve_fcmne:
+        case INS_sve_fcmuo:
+        case INS_sve_facle:
+        case INS_sve_faclt:
+            assert(insOptsScalableFloat(opt));
+            assert(isVectorRegister(reg3));       // nnnnn
+            assert(isVectorRegister(reg4));       // mmmmm
+            assert(isPredicateRegister(reg1));    // DDDD
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isScalableVectorSize(attr));   // xx
+            fmt = IF_SVE_HT_4A;
+            break;
+
+        case INS_sve_match:
+        case INS_sve_nmatch:
+            assert(insOptsScalableAtMaxHalf(opt));
+            assert(isPredicateRegister(reg1));    // DDDD
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // nnnnn
+            assert(isVectorRegister(reg4));       // mmmmm
+            assert(isScalableVectorSize(attr));   // xx
+            fmt = IF_SVE_GE_4A;
+            break;
+
+        case INS_sve_mla:
+        case INS_sve_mls:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));       // ddddd
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // nnnnn
+            assert(isVectorRegister(reg4));       // mmmmm
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_AR_4A;
+            break;
+
+        case INS_sve_histcnt:
+            assert(insOptsScalableWords(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isLowPredicateRegister(reg2));                  // ggg
+            assert(isVectorRegister(reg3));                        // nnnnn
+            assert(isVectorRegister(reg4));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_GI_4A;
+            break;
+
+        case INS_sve_fmla:
+        case INS_sve_fmls:
+        case INS_sve_fnmla:
+        case INS_sve_fnmls:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));                        // ddddd
+            assert(isLowPredicateRegister(reg2));                  // ggg
+            assert(isVectorRegister(reg3));                        // nnnnn
+            assert(isVectorRegister(reg4));                        // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
+            fmt = IF_SVE_HU_4A;
+            break;
+
+        case INS_sve_mad:
+        case INS_sve_msb:
+            assert(insOptsScalableStandard(opt));
+            assert(isVectorRegister(reg1));       // ddddd
+            assert(isLowPredicateRegister(reg2)); // ggg
+            assert(isVectorRegister(reg3));       // mmmmm
+            assert(isVectorRegister(reg4));       // aaaaa
+            assert(isScalableVectorSize(size));
+            fmt = IF_SVE_AS_4A;
+            break;
+
+        case INS_sve_st1b:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isScalableVectorSize(size));
+            assert(insScalableOptsNone(sopt));
+
+            if (insOptsScalableStandard(opt))
+            {
+                if (isGeneralRegister(reg4))
+                {
+                    fmt = IF_SVE_JD_4A;
+                }
+                else
+                {
+                    assert(isVectorRegister(reg4));
+                    fmt = IF_SVE_JK_4B;
+                }
+            }
+            else
+            {
+                assert(insOptsScalable32bitExtends(opt));
+                switch (opt)
+                {
+                    case INS_OPTS_SCALABLE_S_UXTW:
+                    case INS_OPTS_SCALABLE_S_SXTW:
+                        fmt = IF_SVE_JK_4A_B;
+                        break;
+
+                    case INS_OPTS_SCALABLE_D_UXTW:
+                    case INS_OPTS_SCALABLE_D_SXTW:
+                        fmt = IF_SVE_JK_4A;
+                        break;
+
+                    default:
+                        assert(!"Invalid options for scalable");
+                        break;
+                }
+            }
+            break;
+
+        case INS_sve_st1h:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isScalableVectorSize(size));
+
+            if (insOptsScalableStandard(opt))
+            {
+                if (sopt == INS_SCALABLE_OPTS_LSL_N)
+                {
+                    if (isGeneralRegister(reg4))
+                    {
+                        // st1h is reserved for scalable B
+                        assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : true);
+                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                        fmt = IF_SVE_JD_4A;
+                    }
+                    else
+                    {
+                        assert(isVectorRegister(reg4));
+                        fmt = IF_SVE_JJ_4B;
+                    }
+                }
+                else
+                {
+                    assert(isVectorRegister(reg4));
+                    assert(insScalableOptsNone(sopt));
+                    fmt = IF_SVE_JJ_4B_E;
+                }
+            }
+            else
+            {
+                assert(insOptsScalable32bitExtends(opt));
+                switch (opt)
+                {
+                    case INS_OPTS_SCALABLE_S_UXTW:
+                    case INS_OPTS_SCALABLE_S_SXTW:
+                        if (insScalableOptsNone(sopt))
+                        {
+                            fmt = IF_SVE_JJ_4A_D;
+                        }
+                        else
+                        {
+                            assert(sopt == INS_SCALABLE_OPTS_MOD_N);
+                            fmt = IF_SVE_JJ_4A;
+                        }
+                        break;
+
+                    case INS_OPTS_SCALABLE_D_UXTW:
+                    case INS_OPTS_SCALABLE_D_SXTW:
+                        if (insScalableOptsNone(sopt))
+                        {
+                            fmt = IF_SVE_JJ_4A_C;
+                        }
+                        else
+                        {
+                            assert(sopt == INS_SCALABLE_OPTS_MOD_N);
+                            fmt = IF_SVE_JJ_4A_B;
+                        }
+                        break;
+
+                    default:
+                        assert(!"Invalid options for scalable");
+                        break;
+                }
+            }
+            break;
+
+        case INS_sve_st1w:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isScalableVectorSize(size));
+
+            if (insOptsScalableStandard(opt))
+            {
+                if (sopt == INS_SCALABLE_OPTS_LSL_N)
+                {
+                    if (isGeneralRegister(reg4))
+                    {
+                        fmt = IF_SVE_JD_4B;
+                    }
+                    else
+                    {
+                        assert(isVectorRegister(reg4));
+                        fmt = IF_SVE_JJ_4B;
+                    }
+                }
+                else
+                {
+                    assert(isVectorRegister(reg4));
+                    assert(insScalableOptsNone(sopt));
+                    fmt = IF_SVE_JJ_4B_E;
+                }
+            }
+            else if (opt == INS_OPTS_SCALABLE_Q)
+            {
+                assert(isGeneralRegister(reg4));
+                assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                fmt = IF_SVE_JD_4C;
+            }
+            else
+            {
+                assert(insOptsScalable32bitExtends(opt));
+                assert(isVectorRegister(reg4));
+                switch (opt)
+                {
+                    case INS_OPTS_SCALABLE_S_UXTW:
+                    case INS_OPTS_SCALABLE_S_SXTW:
+                        if (insScalableOptsNone(sopt))
+                        {
+                            fmt = IF_SVE_JJ_4A_D;
+                        }
+                        else
+                        {
+                            assert(sopt == INS_SCALABLE_OPTS_MOD_N);
+                            fmt = IF_SVE_JJ_4A;
+                        }
+                        break;
+
+                    case INS_OPTS_SCALABLE_D_UXTW:
+                    case INS_OPTS_SCALABLE_D_SXTW:
+                        if (insScalableOptsNone(sopt))
+                        {
+                            fmt = IF_SVE_JJ_4A_C;
+                        }
+                        else
+                        {
+                            assert(sopt == INS_SCALABLE_OPTS_MOD_N);
+                            fmt = IF_SVE_JJ_4A_B;
+                        }
+                        break;
+
+                    default:
+                        assert(!"Invalid options for scalable");
+                        break;
+                }
+            }
+            break;
+
+        case INS_sve_st1d:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isScalableVectorSize(size));
+
+            if (isGeneralRegister(reg4))
+            {
+                assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                if (opt == INS_OPTS_SCALABLE_Q)
+                {
+                    fmt = IF_SVE_JD_4C_A;
+                }
+                else
+                {
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    fmt = IF_SVE_JD_4C;
+                }
+            }
+            else
+            {
+                assert(isVectorRegister(reg4));
+
+                if (opt == INS_OPTS_SCALABLE_D)
+                {
+                    if (sopt == INS_SCALABLE_OPTS_LSL_N)
+                    {
+                        fmt = IF_SVE_JJ_4B;
+                    }
+                    else
+                    {
+                        assert(insScalableOptsNone(sopt));
+                        fmt = IF_SVE_JJ_4B_C;
+                    }
+                }
+                else
+                {
+                    assert(insOptsScalable32bitExtends(opt));
+                    switch (opt)
+                    {
+                        case INS_OPTS_SCALABLE_D_UXTW:
+                        case INS_OPTS_SCALABLE_D_SXTW:
+                            if (sopt == INS_SCALABLE_OPTS_MOD_N)
+                            {
+                                fmt = IF_SVE_JJ_4A;
+                            }
+                            else
+                            {
+                                assert(insScalableOptsNone(sopt));
+                                fmt = IF_SVE_JJ_4A_B;
+                            }
+                            break;
+
+                        default:
+                            assert(!"Invalid options for scalable");
+                            break;
+                    }
+                }
+            }
+            break;
+
+        case INS_sve_ld1b:
+        case INS_sve_ld1sb:
+        case INS_sve_ldff1b:
+        case INS_sve_ldff1sb:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isScalableVectorSize(size));
+            assert(insScalableOptsNone(sopt));
+
+            if (isGeneralRegisterOrZR(reg4))
+            {
+                switch (ins)
+                {
+                    case INS_sve_ldff1b:
+                        assert(insOptsScalableStandard(opt));
+                        fmt = IF_SVE_IG_4A_E;
+                        break;
+
+                    case INS_sve_ldff1sb:
+                        assert(insOptsScalableAtLeastHalf(opt));
+                        fmt = IF_SVE_IG_4A_D;
+                        break;
+
+                    case INS_sve_ld1sb:
+                        assert(insOptsScalableAtLeastHalf(opt));
+                        fmt = IF_SVE_IK_4A_F;
+                        break;
+
+                    case INS_sve_ld1b:
+                        assert(insOptsScalableStandard(opt));
+                        fmt = IF_SVE_IK_4A_H;
+                        break;
+
+                    default:
+                        assert(!"Invalid instruction");
+                        break;
+                }
+            }
+            else
+            {
+                assert(isVectorRegister(reg4));
+
+                if (insOptsScalableDoubleWord32bitExtends(opt))
+                {
+                    fmt = IF_SVE_HW_4A;
+                }
+                else if (insOptsScalableSingleWord32bitExtends(opt))
+                {
+                    fmt = IF_SVE_HW_4A_A;
+                }
+                else
+                {
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    fmt = IF_SVE_HW_4B;
+                }
+            }
+            break;
+
+        case INS_sve_ld1h:
+        case INS_sve_ld1sh:
+        case INS_sve_ldff1h:
+        case INS_sve_ldff1sh:
+        case INS_sve_ld1w:
+        case INS_sve_ldff1w:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isScalableVectorSize(size));
+
+            if (isGeneralRegisterOrZR(reg4))
+            {
+                assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+
+                switch (ins)
+                {
+                    case INS_sve_ldff1h:
+                        assert(insOptsScalableStandard(opt));
+                        fmt = IF_SVE_IG_4A_G;
+                        break;
+
+                    case INS_sve_ldff1sh:
+                    case INS_sve_ldff1w:
+                        assert(insOptsScalableWords(opt));
+                        fmt = IF_SVE_IG_4A_F;
+                        break;
+
+                    case INS_sve_ld1w:
+                        assert(insOptsScalableWordsOrQuadwords(opt));
+                        fmt = IF_SVE_II_4A_H;
+                        break;
+
+                    case INS_sve_ld1sh:
+                        assert(insOptsScalableWords(opt));
+                        fmt = IF_SVE_IK_4A_G;
+                        break;
+
+                    case INS_sve_ld1h:
+                        assert(insOptsScalableAtLeastHalf(opt));
+                        fmt = IF_SVE_IK_4A_I;
+                        break;
+
+                    default:
+                        assert(!"Invalid instruction");
+                        break;
+                }
+            }
+            else
+            {
+                assert(isVectorRegister(reg4));
+
+                if (insOptsScalableDoubleWord32bitExtends(opt))
+                {
+                    if (sopt == INS_SCALABLE_OPTS_MOD_N)
+                    {
+                        fmt = IF_SVE_HW_4A_A;
+                    }
+                    else
+                    {
+                        assert(insScalableOptsNone(sopt));
+                        fmt = IF_SVE_HW_4A_B;
+                    }
+                }
+                else if (insOptsScalableSingleWord32bitExtends(opt))
+                {
+                    if (sopt == INS_SCALABLE_OPTS_MOD_N)
+                    {
+                        fmt = IF_SVE_HW_4A;
+                    }
+                    else
+                    {
+                        assert(insScalableOptsNone(sopt));
+                        fmt = IF_SVE_HW_4A_C;
+                    }
+                }
+                else
+                {
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    if (sopt == INS_SCALABLE_OPTS_LSL_N)
+                    {
+                        fmt = IF_SVE_HW_4B;
+                    }
+                    else
+                    {
+                        assert(insScalableOptsNone(sopt));
+                        fmt = IF_SVE_HW_4B_D;
+                    }
+                }
+            }
+            break;
+
+        case INS_sve_ld1d:
+        case INS_sve_ld1sw:
+        case INS_sve_ldff1d:
+        case INS_sve_ldff1sw:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isScalableVectorSize(size));
+
+            if (isGeneralRegisterOrZR(reg4))
+            {
+                assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+
+                if (opt == INS_OPTS_SCALABLE_Q)
+                {
+                    assert(reg4 != REG_ZR);
+                    assert(ins == INS_sve_ld1d);
+                    fmt = IF_SVE_II_4A_B;
+                }
+                else
+                {
+                    assert(opt == INS_OPTS_SCALABLE_D);
+
+                    switch (ins)
+                    {
+                        case INS_sve_ldff1d:
+                        case INS_sve_ldff1sw:
+                            fmt = IF_SVE_IG_4A;
+                            break;
+
+                        case INS_sve_ld1d:
+                            assert(reg4 != REG_ZR);
+                            fmt = IF_SVE_II_4A;
+                            break;
+
+                        case INS_sve_ld1sw:
+                            assert(reg4 != REG_ZR);
+                            fmt = IF_SVE_IK_4A;
+                            break;
+
+                        default:
+                            assert(!"Invalid instruction");
+                            break;
+                    }
+                }
+            }
+            else if (insOptsScalableDoubleWord32bitExtends(opt))
+            {
+                assert(isVectorRegister(reg4));
+
+                if (sopt == INS_SCALABLE_OPTS_MOD_N)
+                {
+                    fmt = IF_SVE_IU_4A;
+                }
+                else
+                {
+                    assert(insScalableOptsNone(sopt));
+
+                    if (ins == INS_sve_ld1d)
+                    {
+                        fmt = IF_SVE_IU_4A_C;
+                    }
+                    else
+                    {
+                        fmt = IF_SVE_IU_4A_A;
+                    }
+                }
+            }
+            else if (sopt == INS_SCALABLE_OPTS_LSL_N)
+            {
+                assert(isVectorRegister(reg4));
+                assert(opt == INS_OPTS_SCALABLE_D);
+                fmt = IF_SVE_IU_4B;
+            }
+            else
+            {
+                assert(isVectorRegister(reg4));
+                assert(opt == INS_OPTS_SCALABLE_D);
+                assert(insScalableOptsNone(sopt));
+
+                if (ins == INS_sve_ld1d)
+                {
+                    fmt = IF_SVE_IU_4B_D;
+                }
+                else
+                {
+                    fmt = IF_SVE_IU_4B_B;
+                }
+            }
+            break;
+
+        case INS_sve_ldnt1b:
+        case INS_sve_ldnt1h:
+        case INS_sve_ldnt1w:
+        case INS_sve_ldnt1d:
+        case INS_sve_ldnt1sb:
+        case INS_sve_ldnt1sh:
+        case INS_sve_ldnt1sw:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isScalableVectorSize(size));
+
+            if (isGeneralRegister(reg3))
+            {
+                assert(isGeneralRegister(reg4));
+
+#ifdef DEBUG
+                switch (ins)
+                {
+                    case INS_sve_ldnt1b:
+                        assert(opt == INS_OPTS_SCALABLE_B);
+                        assert(insScalableOptsNone(sopt));
+                        break;
+
+                    case INS_sve_ldnt1h:
+                        assert(opt == INS_OPTS_SCALABLE_H);
+                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                        break;
+
+                    case INS_sve_ldnt1w:
+                        assert(opt == INS_OPTS_SCALABLE_S);
+                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                        break;
+
+                    case INS_sve_ldnt1d:
+                        assert(opt == INS_OPTS_SCALABLE_D);
+                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                        break;
+
+                    default:
+                        assert(!"Invalid instruction");
+                        break;
+                }
+#endif // DEBUG
+
+                fmt = IF_SVE_IN_4A;
+            }
+            else if ((ins == INS_sve_ldnt1d) || (ins == INS_sve_ldnt1sw))
+            {
+                assert(insOptsScalableWords(opt));
+                assert(isVectorRegister(reg3));
+                assert(isGeneralRegisterOrZR(reg4));
+                assert(insScalableOptsNone(sopt));
+                assert(opt == INS_OPTS_SCALABLE_D);
+                fmt = IF_SVE_IX_4A;
+            }
+            else
+            {
+                assert(insOptsScalableWords(opt));
+                assert(isVectorRegister(reg3));
+                assert(isGeneralRegisterOrZR(reg4));
+                assert(insScalableOptsNone(sopt));
+
+                if (opt == INS_OPTS_SCALABLE_S)
+                {
+                    fmt = IF_SVE_IF_4A;
+                }
+                else
+                {
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    fmt = IF_SVE_IF_4A_A;
+                }
+            }
+            break;
+
+        case INS_sve_ld1rob:
+        case INS_sve_ld1roh:
+        case INS_sve_ld1row:
+        case INS_sve_ld1rod:
+        case INS_sve_ld1rqb:
+        case INS_sve_ld1rqh:
+        case INS_sve_ld1rqw:
+        case INS_sve_ld1rqd:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isGeneralRegister(reg4));
+            assert(isScalableVectorSize(size));
+
+#ifdef DEBUG
+            switch (ins)
+            {
+                case INS_sve_ld1rob:
+                case INS_sve_ld1rqb:
+                    assert(opt == INS_OPTS_SCALABLE_B);
+                    assert(insScalableOptsNone(sopt));
+                    break;
+
+                case INS_sve_ld1roh:
+                case INS_sve_ld1rqh:
+                    assert(opt == INS_OPTS_SCALABLE_H);
+                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                    break;
+
+                case INS_sve_ld1row:
+                case INS_sve_ld1rqw:
+                    assert(opt == INS_OPTS_SCALABLE_S);
+                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                    break;
+
+                case INS_sve_ld1rod:
+                case INS_sve_ld1rqd:
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+#endif // DEBUG
+
+            fmt = IF_SVE_IP_4A;
+            break;
+
+        case INS_sve_ld1q:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isGeneralRegisterOrZR(reg4));
+            assert(isScalableVectorSize(size));
+            assert(opt == INS_OPTS_SCALABLE_Q);
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_IW_4A;
+            break;
+
+        case INS_sve_ld2q:
+        case INS_sve_ld3q:
+        case INS_sve_ld4q:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isGeneralRegister(reg4));
+            assert(isScalableVectorSize(size));
+            assert(opt == INS_OPTS_SCALABLE_Q);
+            assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+            fmt = IF_SVE_IR_4A;
+            break;
+
+        case INS_sve_ld2b:
+        case INS_sve_ld3b:
+        case INS_sve_ld4b:
+        case INS_sve_ld2h:
+        case INS_sve_ld3h:
+        case INS_sve_ld4h:
+        case INS_sve_ld2w:
+        case INS_sve_ld3w:
+        case INS_sve_ld4w:
+        case INS_sve_ld2d:
+        case INS_sve_ld3d:
+        case INS_sve_ld4d:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isGeneralRegister(reg4));
+            assert(isScalableVectorSize(size));
+
+#ifdef DEBUG
+            switch (ins)
+            {
+                case INS_sve_ld2b:
+                case INS_sve_ld3b:
+                case INS_sve_ld4b:
+                    assert(opt == INS_OPTS_SCALABLE_B);
+                    assert(insScalableOptsNone(sopt));
+                    break;
+
+                case INS_sve_ld2h:
+                case INS_sve_ld3h:
+                case INS_sve_ld4h:
+                    assert(opt == INS_OPTS_SCALABLE_H);
+                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                    break;
+
+                case INS_sve_ld2w:
+                case INS_sve_ld3w:
+                case INS_sve_ld4w:
+                    assert(opt == INS_OPTS_SCALABLE_S);
+                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                    break;
+
+                case INS_sve_ld2d:
+                case INS_sve_ld3d:
+                case INS_sve_ld4d:
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+#endif // DEBUG
+
+            fmt = IF_SVE_IT_4A;
+            break;
+
+        case INS_sve_st1q:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isGeneralRegisterOrZR(reg4));
+            assert(isScalableVectorSize(size));
+            assert(opt == INS_OPTS_SCALABLE_Q);
+            assert(insScalableOptsNone(sopt));
+            fmt = IF_SVE_IY_4A;
+            break;
+
+        case INS_sve_stnt1b:
+        case INS_sve_stnt1h:
+        case INS_sve_stnt1w:
+        case INS_sve_stnt1d:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isScalableVectorSize(size));
+
+            if (isGeneralRegister(reg3))
+            {
+                assert(isGeneralRegister(reg4));
+#ifdef DEBUG
+                switch (ins)
+                {
+                    case INS_sve_stnt1b:
+                        assert(opt == INS_OPTS_SCALABLE_B);
+                        assert(insScalableOptsNone(sopt));
+                        break;
+
+                    case INS_sve_stnt1h:
+                        assert(opt == INS_OPTS_SCALABLE_H);
+                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                        break;
+
+                    case INS_sve_stnt1w:
+                        assert(opt == INS_OPTS_SCALABLE_S);
+                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                        break;
+
+                    case INS_sve_stnt1d:
+                        assert(opt == INS_OPTS_SCALABLE_D);
+                        assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                        break;
+
+                    default:
+                        assert(!"Invalid instruction");
+                        break;
+                }
+#endif // DEBUG
+                fmt = IF_SVE_JB_4A;
+            }
+            else
+            {
+                assert(isVectorRegister(reg3));
+                assert(isGeneralRegisterOrZR(reg4));
+                assert(isScalableVectorSize(size));
+                assert(insScalableOptsNone(sopt));
+
+                if (opt == INS_OPTS_SCALABLE_S)
+                {
+                    fmt = IF_SVE_IZ_4A;
+                }
+                else
+                {
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    if (ins == INS_sve_stnt1d)
+                    {
+                        fmt = IF_SVE_JA_4A;
+                    }
+                    else
+                    {
+                        fmt = IF_SVE_IZ_4A_A;
+                    }
+                }
+            }
+            break;
+
+        case INS_sve_st2b:
+        case INS_sve_st3b:
+        case INS_sve_st4b:
+        case INS_sve_st2h:
+        case INS_sve_st3h:
+        case INS_sve_st4h:
+        case INS_sve_st2w:
+        case INS_sve_st3w:
+        case INS_sve_st4w:
+        case INS_sve_st2d:
+        case INS_sve_st3d:
+        case INS_sve_st4d:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isGeneralRegister(reg4));
+            assert(isScalableVectorSize(size));
+
+#ifdef DEBUG
+            switch (ins)
+            {
+                case INS_sve_st2b:
+                case INS_sve_st3b:
+                case INS_sve_st4b:
+                    assert(opt == INS_OPTS_SCALABLE_B);
+                    assert(insScalableOptsNone(sopt));
+                    break;
+
+                case INS_sve_st2h:
+                case INS_sve_st3h:
+                case INS_sve_st4h:
+                    assert(opt == INS_OPTS_SCALABLE_H);
+                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                    break;
+
+                case INS_sve_st2w:
+                case INS_sve_st3w:
+                case INS_sve_st4w:
+                    assert(opt == INS_OPTS_SCALABLE_S);
+                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                    break;
+
+                case INS_sve_st2d:
+                case INS_sve_st3d:
+                case INS_sve_st4d:
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                    break;
+
+                default:
+                    assert(!"Invalid instruction");
+                    break;
+            }
+#endif // DEBUG
+            fmt = IF_SVE_JC_4A;
+            break;
+
+        case INS_sve_st2q:
+        case INS_sve_st3q:
+        case INS_sve_st4q:
+            assert(isVectorRegister(reg1));
+            assert(isPredicateRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isGeneralRegister(reg4));
+            assert(isScalableVectorSize(size));
+            assert(opt == INS_OPTS_SCALABLE_Q);
+            fmt = IF_SVE_JF_4A;
+            break;
+
+        case INS_sve_bfmla:
+        case INS_sve_bfmls:
+            assert(opt == INS_OPTS_SCALABLE_H);
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isVectorRegister(reg4));
+            fmt = IF_SVE_HU_4B;
+            break;
+
+        case INS_sve_fmad:
+        case INS_sve_fmsb:
+        case INS_sve_fnmad:
+        case INS_sve_fnmsb:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(insScalableOptsNone(sopt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isVectorRegister(reg4));
+            fmt = IF_SVE_HV_4A;
+            break;
+
+        default:
+            unreached();
+            break;
+    }
+    assert(fmt != IF_NONE);
+
+    // Use aliases.
+    switch (ins)
+    {
+        case INS_sve_cmple:
+            std::swap(reg3, reg4);
+            ins = INS_sve_cmpge;
+            break;
+        case INS_sve_cmplo:
+            std::swap(reg3, reg4);
+            ins = INS_sve_cmphi;
+            break;
+        case INS_sve_cmpls:
+            std::swap(reg3, reg4);
+            ins = INS_sve_cmphs;
+            break;
+        case INS_sve_cmplt:
+            std::swap(reg3, reg4);
+            ins = INS_sve_cmpgt;
+            break;
+        case INS_sve_facle:
+            std::swap(reg3, reg4);
+            ins = INS_sve_facge;
+            break;
+        case INS_sve_faclt:
+            std::swap(reg3, reg4);
+            ins = INS_sve_facgt;
+            break;
+        case INS_sve_fcmle:
+            std::swap(reg3, reg4);
+            ins = INS_sve_fcmge;
+            break;
+        case INS_sve_fcmlt:
+            std::swap(reg3, reg4);
+            ins = INS_sve_fcmgt;
+            break;
+        default:
+            break;
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idReg4(reg4);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing four registers and a constant.
+ */
+
+void emitter::emitInsSve_R_R_R_R_I(instruction ins,
+                                   emitAttr    attr,
+                                   regNumber   reg1,
+                                   regNumber   reg2,
+                                   regNumber   reg3,
+                                   regNumber   reg4,
+                                   ssize_t     imm,
+                                   insOpts     opt /* = INS_OPT_NONE*/)
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_sve_fcmla:
+            assert(insOptsScalableAtLeastHalf(opt));
+            assert(isVectorRegister(reg1));
+            assert(isLowPredicateRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isVectorRegister(reg4));
+            assert(isScalableVectorSize(size));
+            imm = emitEncodeRotationImm0_to_270(imm);
+            fmt = IF_SVE_GT_4A;
+            break;
+
+        case INS_sve_psel:
+            unreached(); // TODO-SVE: Not yet supported.
+            assert(insOptsScalableStandard(opt));
+            assert(isPredicateRegister(reg1)); // DDDD
+            assert(isPredicateRegister(reg2)); // NNNN
+            assert(isPredicateRegister(reg3)); // MMMM
+            assert(isGeneralRegister(reg4));   // vv
+            assert((REG_R12 <= reg4) && (reg4 <= REG_R15));
+
+            switch (opt)
+            {
+                case INS_OPTS_SCALABLE_B:
+                    assert(isValidUimm<4>(imm));
+                    break;
+
+                case INS_OPTS_SCALABLE_H:
+                    assert(isValidUimm<3>(imm));
+                    break;
+
+                case INS_OPTS_SCALABLE_S:
+                    assert(isValidUimm<2>(imm));
+                    break;
+
+                case INS_OPTS_SCALABLE_D:
+                    assert(isValidUimm<1>(imm));
+                    break;
+
+                default:
+                    unreached();
+                    break;
+            }
+
+            fmt = IF_SVE_DV_4A;
+            break;
+
+        default:
+            unreached();
+            break;
+    }
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrCns(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idReg4(reg4);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing a register, a SVE Pattern.
+ */
+
+void emitter::emitIns_R_PATTERN(
+    instruction ins, emitAttr attr, regNumber reg1, insOpts opt, insSvePattern pattern /* = SVE_PATTERN_ALL*/)
+{
+    insFormat fmt = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_sve_ptrue:
+        case INS_sve_ptrues:
+            assert(isPredicateRegister(reg1));
+            assert(isScalableVectorSize(attr));
+            assert(insOptsScalableStandard(opt));
+            fmt = IF_SVE_DE_1A;
+            break;
+
+        default:
+            unreached();
+            break;
+
+    } // end switch (ins)
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    id->idReg1(reg1);
+    id->idInsOpt(opt);
+    id->idSvePattern(pattern);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing a register, a SVE Pattern and an immediate.
+ */
+
+void emitter::emitIns_R_PATTERN_I(instruction   ins,
+                                  emitAttr      attr,
+                                  regNumber     reg1,
+                                  insSvePattern pattern,
+                                  ssize_t       imm,
+                                  insOpts       opt /* = INS_OPTS_NONE */)
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_sve_cntb:
+        case INS_sve_cntd:
+        case INS_sve_cnth:
+        case INS_sve_cntw:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg1));  // ddddd
+            assert(isValidUimmFrom1<4>(imm)); // iiii
+            assert(size == EA_8BYTE);
+            fmt = IF_SVE_BL_1A;
+            break;
+
+        case INS_sve_incd:
+        case INS_sve_inch:
+        case INS_sve_incw:
+        case INS_sve_decd:
+        case INS_sve_dech:
+        case INS_sve_decw:
+            assert(isValidUimmFrom1<4>(imm)); // iiii
+
+            if (insOptsNone(opt))
+            {
+                assert(isGeneralRegister(reg1)); // ddddd
+                assert(size == EA_8BYTE);
+                fmt = IF_SVE_BM_1A;
+            }
+            else
+            {
+                assert(insOptsScalableAtLeastHalf(opt));
+                assert(isVectorRegister(reg1)); // ddddd
+                fmt = IF_SVE_BN_1A;
+            }
+            break;
+
+        case INS_sve_incb:
+        case INS_sve_decb:
+            assert(isGeneralRegister(reg1));  // ddddd
+            assert(isValidUimmFrom1<4>(imm)); // iiii
+            assert(size == EA_8BYTE);
+            fmt = IF_SVE_BM_1A;
+            break;
+
+        case INS_sve_sqincb:
+        case INS_sve_uqincb:
+        case INS_sve_sqdecb:
+        case INS_sve_uqdecb:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg1));      // ddddd
+            assert(isValidUimmFrom1<4>(imm));     // iiii
+            assert(isValidGeneralDatasize(size)); // X
+            fmt = IF_SVE_BO_1A;
+            break;
+
+        case INS_sve_sqinch:
+        case INS_sve_uqinch:
+        case INS_sve_sqdech:
+        case INS_sve_uqdech:
+        case INS_sve_sqincw:
+        case INS_sve_uqincw:
+        case INS_sve_sqdecw:
+        case INS_sve_uqdecw:
+        case INS_sve_sqincd:
+        case INS_sve_uqincd:
+        case INS_sve_sqdecd:
+        case INS_sve_uqdecd:
+            assert(isValidUimmFrom1<4>(imm)); // iiii
+
+            if (insOptsNone(opt))
+            {
+                assert(isGeneralRegister(reg1));      // ddddd
+                assert(isValidGeneralDatasize(size)); // X
+                fmt = IF_SVE_BO_1A;
+            }
+            else
+            {
+                assert(insOptsScalableAtLeastHalf(opt));
+                assert(isVectorRegister(reg1)); // ddddd
+                assert(isScalableVectorSize(size));
+                fmt = IF_SVE_BP_1A;
+            }
+            break;
+
+        default:
+            unreached();
+            break;
+
+    } // end switch (ins)
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrCns(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+    id->idOpSize(size);
+
+    id->idReg1(reg1);
+    id->idSvePattern(pattern);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing three registers and a SVE 'prfop'.
+ */
+
+void emitter::emitIns_PRFOP_R_R_R(instruction     ins,
+                                  emitAttr        attr,
+                                  insSvePrfop     prfop,
+                                  regNumber       reg1,
+                                  regNumber       reg2,
+                                  regNumber       reg3,
+                                  insOpts         opt /* = INS_OPTS_NONE */,
+                                  insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_sve_prfb:
+            assert(insScalableOptsNone(sopt));
+            assert(isLowPredicateRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isScalableVectorSize(size));
+
+            if (insOptsScalable32bitExtends(opt))
+            {
+                assert(isVectorRegister(reg3));
+
+                if (insOptsScalableSingleWord32bitExtends(opt))
+                {
+                    fmt = IF_SVE_HY_3A;
+                }
+                else
+                {
+                    assert(insOptsScalableDoubleWord32bitExtends(opt));
+                    fmt = IF_SVE_HY_3A_A;
+                }
+            }
+            else if (isVectorRegister(reg3))
+            {
+                assert(opt == INS_OPTS_SCALABLE_D);
+                fmt = IF_SVE_HY_3B;
+            }
+            else
+            {
+                assert(insOptsNone(opt));
+                assert(isGeneralRegister(reg3));
+                fmt = IF_SVE_IB_3A;
+            }
+            break;
+
+        case INS_sve_prfh:
+        case INS_sve_prfw:
+        case INS_sve_prfd:
+            assert(isLowPredicateRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isScalableVectorSize(size));
+
+            if (sopt == INS_SCALABLE_OPTS_MOD_N)
+            {
+                if (insOptsScalableSingleWord32bitExtends(opt))
+                {
+                    fmt = IF_SVE_HY_3A;
+                }
+                else
+                {
+                    assert(insOptsScalableDoubleWord32bitExtends(opt));
+                    fmt = IF_SVE_HY_3A_A;
+                }
+            }
+            else
+            {
+                assert(sopt == INS_SCALABLE_OPTS_LSL_N);
+                if (isVectorRegister(reg3))
+                {
+                    assert(opt == INS_OPTS_SCALABLE_D);
+                    fmt = IF_SVE_HY_3B;
+                }
+                else
+                {
+                    assert(insOptsNone(opt));
+                    assert(isGeneralRegister(reg3));
+                    fmt = IF_SVE_IB_3A;
+                }
+            }
+            break;
+
+        default:
+            unreached();
+            break;
+
+    } // end switch (ins)
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idInsOpt(opt);
+    id->idInsFmt(fmt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idSvePrfop(prfop);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a SVE instruction referencing two registers, a SVE 'prfop' and an immediate.
+ */
+
+void emitter::emitIns_PRFOP_R_R_I(instruction ins,
+                                  emitAttr    attr,
+                                  insSvePrfop prfop,
+                                  regNumber   reg1,
+                                  regNumber   reg2,
+                                  int         imm,
+                                  insOpts     opt /* = INS_OPTS_NONE */)
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_sve_prfb:
+        case INS_sve_prfh:
+        case INS_sve_prfw:
+        case INS_sve_prfd:
+            assert(isLowPredicateRegister(reg1));
+            assert(isScalableVectorSize(size));
+
+            if (isVectorRegister(reg2))
+            {
+                assert(insOptsScalableWords(opt));
+
+#ifdef DEBUG
+                switch (ins)
+                {
+                    case INS_sve_prfb:
+                        assert(isValidUimm<5>(imm));
+                        break;
+
+                    case INS_sve_prfh:
+                        assert((isValidUimm_MultipleOf<5, 2>(imm)));
+                        break;
+
+                    case INS_sve_prfw:
+                        assert((isValidUimm_MultipleOf<5, 4>(imm)));
+                        break;
+
+                    case INS_sve_prfd:
+                        assert((isValidUimm_MultipleOf<5, 8>(imm)));
+                        break;
+
+                    default:
+                        assert(!"Invalid instruction");
+                        break;
+                }
+#endif // DEBUG
+                fmt = IF_SVE_HZ_2A_B;
+            }
+            else
+            {
+                assert(insOptsNone(opt));
+                assert(isGeneralRegister(reg2));
+                assert(isValidSimm<6>(imm));
+                fmt = IF_SVE_IA_2A;
+            }
+            break;
+
+        default:
+            unreached();
+            break;
+
+    } // end switch (ins)
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrCns(attr, imm);
+
+    id->idIns(ins);
+    id->idInsOpt(opt);
+    id->idInsFmt(fmt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idSvePrfop(prfop);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsize(emitAttr size)
+{
+    switch (size)
+    {
+        case EA_1BYTE:
+            return 0x00000000;
+
+        case EA_2BYTE:
+            return 0x00400000; // set the bit at location 22
+
+        case EA_4BYTE:
+            return 0x00800000; // set the bit at location 23
+
+        case EA_8BYTE:
+            return 0x00C00000; // set the bit at location 23 and 22
+
+        default:
+            assert(!"Invalid insOpt for vector register");
+    }
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
+ *  This specifically encodes the size at bit locations '22-21'.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsize_22_to_21(emitAttr size)
+{
+    switch (size)
+    {
+        case EA_1BYTE:
+            return 0;
+
+        case EA_2BYTE:
+            return (1 << 21); // set the bit at location 21
+
+        case EA_4BYTE:
+            return (1 << 22); // set the bit at location 22
+
+        case EA_8BYTE:
+            return (1 << 22) | (1 << 21); // set the bit at location 22 and 21
+
+        default:
+            assert(!"Invalid insOpt for vector register");
+    }
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
+ *  This specifically encodes the size at bit locations '18-17'.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsize_18_to_17(emitAttr size)
+{
+    switch (size)
+    {
+        case EA_1BYTE:
+            return 0;
+
+        case EA_2BYTE:
+            return (1 << 17); // set the bit at location 17
+
+        case EA_4BYTE:
+            return (1 << 18); // set the bit at location 18
+
+        case EA_8BYTE:
+            return (1 << 18) | (1 << 17); // set the bit at location 18 and 17
+
+        default:
+            assert(!"Invalid insOpt for vector register");
+    }
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction
+ *  This specifically encodes the field 'sz' at bit location '20'.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsize_sz_20(emitAttr size)
+{
+    switch (size)
+    {
+        case EA_4BYTE:
+            return 0;
+
+        case EA_8BYTE:
+            return (1 << 20);
+
+        default:
+            assert(!"Invalid insOpt for vector register");
+    }
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction
+ *  This specifically encodes the field 'sz' at bit location '21'.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsize_sz_21(emitAttr size)
+{
+    switch (size)
+    {
+        case EA_4BYTE:
+            return 0;
+
+        case EA_8BYTE:
+            return (1 << 21);
+
+        default:
+            assert(!"Invalid insOpt for vector register");
+    }
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
+ *  This specifically encodes the field 'tszh:tszl' at bit locations '23-22:20-19'.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsize_tszh_23_tszl_20_to_19(emitAttr size)
+{
+    switch (size)
+    {
+        case EA_1BYTE:
+            return 0x080000; // set the bit at location 19
+
+        case EA_2BYTE:
+            return 0x100000; // set the bit at location 20
+
+        case EA_4BYTE:
+            return 0x400000; // set the bit at location 22
+
+        case EA_8BYTE:
+            return 0x800000; // set the bit at location 23
+
+        default:
+            assert(!"Invalid size for vector register");
+    }
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction at bit location '30'.
+ *  This only works on select formats.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsize_30_or_21(insFormat fmt, emitAttr size)
+{
+    switch (fmt)
+    {
+        case IF_SVE_HX_3A_B:
+        case IF_SVE_HX_3A_E:
+            switch (size)
+            {
+                case EA_4BYTE:
+                    return 0;
+
+                case EA_8BYTE:
+                    return (1 << 30);
+
+                default:
+                    break;
+            }
+
+            assert(!"Invalid size for vector register");
+            return 0;
+
+        case IF_SVE_IV_3A:
+            assert(size == EA_8BYTE);
+            return 0;
+
+        case IF_SVE_JI_3A_A:
+            switch (size)
+            {
+                case EA_4BYTE:
+                    return (1 << 21);
+
+                case EA_8BYTE:
+                    return 0;
+
+                default:
+                    break;
+            }
+
+            assert(!"Invalid size for vector register");
+            return 0;
+
+        default:
+            break;
+    }
+
+    assert(!"Unexpected instruction format");
+    return 0;
+}
+/*****************************************************************************
+ *
+ *  Returns the encoding for the field 'i1:tszh:tszl' at bit locations '23-22:20-18'.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsize_tszh_tszl_and_imm(const insOpts opt, const ssize_t imm)
+{
+    code_t encoding = 0;
+
+    switch (opt)
+    {
+        case INS_OPTS_SCALABLE_B:
+            assert(isValidUimm<4>(imm));
+            encoding = 0x040000; // set the bit at location 18
+            // encode immediate at location 23-22:20-19
+            encoding |= ((imm & 0b1100) << 22);
+            encoding |= ((imm & 0b11) << 19);
+            break;
+
+        case INS_OPTS_SCALABLE_H:
+            assert(isValidUimm<3>(imm));
+            encoding = 0x080000; // set the bit at location 19
+            // encode immediate at location 23-22:20
+            encoding |= ((imm & 0b110) << 22);
+            encoding |= ((imm & 1) << 20);
+            break;
+
+        case INS_OPTS_SCALABLE_S:
+            assert(isValidUimm<2>(imm));
+            encoding = 0x100000;     // set the bit at location 20
+            encoding |= (imm << 22); // encode immediate at location 23:22
+            break;
+
+        case INS_OPTS_SCALABLE_D:
+            assert(isValidUimm<1>(imm));
+            encoding = 0x400000;     // set the bit at location 22
+            encoding |= (imm << 23); // encode immediate at location 23
+            break;
+
+        default:
+            assert(!"Invalid size for vector register");
+            break;
+    }
+
+    return encoding;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding for the field 'tszh:tszl:imm3' at bit locations '23-22:20-19:18-16'.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsizeWithShift_tszh_tszl_imm3(const insOpts opt,
+                                                                                 ssize_t       imm,
+                                                                                 bool          isRightShift)
+{
+    code_t encoding = 0;
+
+    imm = insEncodeShiftImmediate(optGetSveElemsize(opt), isRightShift, imm);
+
+    switch (opt)
+    {
+        case INS_OPTS_SCALABLE_B:
+            imm = imm & 0b111;     // bits 18-16
+            encoding |= (1 << 19); // bit 19
+            break;
+
+        case INS_OPTS_SCALABLE_H:
+            imm = imm & 0b1111;    // bits 19-16
+            encoding |= (1 << 20); // bit 20
+            break;
+
+        case INS_OPTS_SCALABLE_S:
+            imm = imm & 0b11111;   // bits 20-16
+            encoding |= (1 << 22); // bit 22
+            break;
+
+        case INS_OPTS_SCALABLE_D:
+            // this gets the last bit of 'imm' and tries to set bit 22
+            encoding |= ((imm >> 5) << 22);
+            imm = imm & 0b11111;   // bits 20-16
+            encoding |= (1 << 23); // bit 23
+            break;
+
+        default:
+            assert(!"Invalid size for vector register");
+            break;
+    }
+
+    return (encoding | (code_t)(imm << 16));
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding for the field 'i1:tsz' at bit locations '20:19-16'.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsizeWithImmediate_i1_tsz(const insOpts opt, ssize_t imm)
+{
+    code_t encoding = 0;
+
+    switch (opt)
+    {
+        case INS_OPTS_SCALABLE_B:
+            assert(isValidUimm<4>(imm));
+            encoding |= (1 << 16);   // bit 16
+            encoding |= (imm << 17); // bits 20-17
+            break;
+
+        case INS_OPTS_SCALABLE_H:
+            assert(isValidUimm<3>(imm));
+            encoding |= (1 << 17);   // bit 17
+            encoding |= (imm << 18); // bits 20-18
+            break;
+
+        case INS_OPTS_SCALABLE_S:
+            assert(isValidUimm<2>(imm));
+            encoding |= (1 << 18);   // bit 18
+            encoding |= (imm << 19); // bits 20-19
+            break;
+
+        case INS_OPTS_SCALABLE_D:
+            assert(isValidUimm<1>(imm));
+            encoding |= (1 << 19);   // bit 19
+            encoding |= (imm << 20); // bit 20
+            break;
+
+        default:
+            assert(!"Invalid size for vector register");
+            break;
+    }
+
+    return encoding;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate.
+ *  This specifically encodes the field 'tszh:tszl' at bit locations '23-22:9-8'.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift, size_t imm)
+{
+    code_t encodedSize = 0;
+
+    switch (size)
+    {
+        case EA_1BYTE:
+            encodedSize = 0x100; // set the bit at location 8
+            break;
+
+        case EA_2BYTE:
+            encodedSize = 0x200; // set the bit at location 9
+            break;
+
+        case EA_4BYTE:
+            encodedSize = 0x400000; // set the bit at location 22
+            break;
+
+        case EA_8BYTE:
+            encodedSize = 0x800000; // set the bit at location 23
+            break;
+
+        default:
+            assert(!"Invalid esize for vector register");
+    }
+
+    code_t encodedImm = insEncodeShiftImmediate(size, isRightShift, imm);
+    code_t imm3High   = (encodedImm & 0x60) << 17;
+    code_t imm3Low    = (encodedImm & 0x1f) << 5;
+    return encodedSize | imm3High | imm3Low;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the constant values 90 or 270 for an Arm64 SVE vector instruction
+ *  This specifically encode the field 'rot' at bit location '16'.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveImm90_or_270_rot(ssize_t imm)
+{
+    assert(emitIsValidEncodedRotationImm90_or_270(imm));
+    return (code_t)(imm << 16);
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the constant values 0, 90, 180 or 270 for an Arm64 SVE vector instruction
+ *  This specifically encode the field 'rot' at bit locations '14-13'.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveImm0_to_270_rot(ssize_t imm)
+{
+    assert(emitIsValidEncodedRotationImm0_to_270(imm));
+    return (code_t)(imm << 13);
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the constant float values 0, 0.5, 1.0 or 2.0 for an Arm64 SVE vector instruction
+ *  This specifically encode the field 'i1' at bit location '5'.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveSmallFloatImm(ssize_t imm)
+{
+    assert(emitIsValidEncodedSmallFloatImm(imm));
+    return (code_t)(imm << 5);
+}
+
+/*****************************************************************************
+ *
+ *  Returns the register list size for the given SVE instruction.
+ */
+
+/*static*/ int emitter::insGetSveReg1ListSize(instruction ins)
+{
+    switch (ins)
+    {
+        case INS_sve_ld1d:
+        case INS_sve_ld1w:
+        case INS_sve_ld1sw:
+        case INS_sve_ld1sb:
+        case INS_sve_ld1b:
+        case INS_sve_ld1sh:
+        case INS_sve_ld1h:
+        case INS_sve_ldnf1d:
+        case INS_sve_ldnf1sw:
+        case INS_sve_ldnf1sh:
+        case INS_sve_ldnf1w:
+        case INS_sve_ldnf1h:
+        case INS_sve_ldnf1sb:
+        case INS_sve_ldnf1b:
+        case INS_sve_ldnt1b:
+        case INS_sve_ldnt1d:
+        case INS_sve_ldnt1h:
+        case INS_sve_ldnt1w:
+        case INS_sve_ld1rob:
+        case INS_sve_ld1rod:
+        case INS_sve_ld1roh:
+        case INS_sve_ld1row:
+        case INS_sve_ld1rqb:
+        case INS_sve_ld1rqd:
+        case INS_sve_ld1rqh:
+        case INS_sve_ld1rqw:
+        case INS_sve_stnt1b:
+        case INS_sve_stnt1d:
+        case INS_sve_stnt1h:
+        case INS_sve_stnt1w:
+        case INS_sve_st1d:
+        case INS_sve_st1w:
+        case INS_sve_ldff1sh:
+        case INS_sve_ldff1w:
+        case INS_sve_ldff1h:
+        case INS_sve_ldff1d:
+        case INS_sve_ldff1sw:
+        case INS_sve_st1b:
+        case INS_sve_st1h:
+        case INS_sve_ldff1sb:
+        case INS_sve_ldff1b:
+        case INS_sve_ldnt1sb:
+        case INS_sve_ldnt1sh:
+        case INS_sve_ld1rd:
+        case INS_sve_ld1rsw:
+        case INS_sve_ld1rh:
+        case INS_sve_ld1rsb:
+        case INS_sve_ld1rsh:
+        case INS_sve_ld1rw:
+        case INS_sve_ld1q:
+        case INS_sve_ldnt1sw:
+        case INS_sve_st1q:
+        case INS_sve_ld1rb:
+            return 1;
+
+        case INS_sve_ld2b:
+        case INS_sve_ld2h:
+        case INS_sve_ld2w:
+        case INS_sve_ld2d:
+        case INS_sve_ld2q:
+        case INS_sve_splice: // SVE_CV_3A
+        case INS_sve_st2b:
+        case INS_sve_st2h:
+        case INS_sve_st2w:
+        case INS_sve_st2d:
+        case INS_sve_st2q:
+        case INS_sve_whilege: // SVE_DX_3A
+        case INS_sve_whilegt: // SVE_DX_3A
+        case INS_sve_whilehi: // SVE_DX_3A
+        case INS_sve_whilehs: // SVE_DX_3A
+        case INS_sve_whilele: // SVE_DX_3A
+        case INS_sve_whilels: // SVE_DX_3A
+        case INS_sve_whilelt: // SVE_DX_3A
+        case INS_sve_pext:    // SVE_DW_2B
+            return 2;
+
+        case INS_sve_ld3b:
+        case INS_sve_ld3h:
+        case INS_sve_ld3w:
+        case INS_sve_ld3d:
+        case INS_sve_ld3q:
+        case INS_sve_st3b:
+        case INS_sve_st3h:
+        case INS_sve_st3w:
+        case INS_sve_st3d:
+        case INS_sve_st3q:
+            return 3;
+
+        case INS_sve_ld4b:
+        case INS_sve_ld4h:
+        case INS_sve_ld4w:
+        case INS_sve_ld4d:
+        case INS_sve_ld4q:
+        case INS_sve_st4b:
+        case INS_sve_st4h:
+        case INS_sve_st4w:
+        case INS_sve_st4d:
+        case INS_sve_st4q:
+            return 4;
+
+        default:
+            assert(!"Unexpected instruction");
+            return 1;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Returns the predicate type for the given SVE format.
+ */
+
+/*static*/ emitter::PredicateType emitter::insGetPredicateType(insFormat fmt, int regpos /* =0 */)
+{
+    switch (fmt)
+    {
+        case IF_SVE_BV_2A:
+        case IF_SVE_HW_4A:
+        case IF_SVE_HW_4A_A:
+        case IF_SVE_HW_4A_B:
+        case IF_SVE_HW_4A_C:
+        case IF_SVE_HW_4B:
+        case IF_SVE_HW_4B_D:
+        case IF_SVE_HX_3A_E:
+        case IF_SVE_IJ_3A_D:
+        case IF_SVE_IJ_3A_E:
+        case IF_SVE_IJ_3A_F:
+        case IF_SVE_IK_4A_G:
+        case IF_SVE_IJ_3A_G:
+        case IF_SVE_IK_4A_I:
+        case IF_SVE_IH_3A_F:
+        case IF_SVE_II_4A_H:
+        case IF_SVE_IH_3A:
+        case IF_SVE_IH_3A_A:
+        case IF_SVE_II_4A:
+        case IF_SVE_II_4A_B:
+        case IF_SVE_IU_4A:
+        case IF_SVE_IU_4A_C:
+        case IF_SVE_IU_4B:
+        case IF_SVE_IU_4B_D:
+        case IF_SVE_IV_3A:
+        case IF_SVE_IG_4A_F:
+        case IF_SVE_IG_4A_G:
+        case IF_SVE_IJ_3A:
+        case IF_SVE_IK_4A:
+        case IF_SVE_IK_4A_F:
+        case IF_SVE_IK_4A_H:
+        case IF_SVE_IU_4A_A:
+        case IF_SVE_IU_4B_B:
+        case IF_SVE_HX_3A_B:
+        case IF_SVE_IG_4A:
+        case IF_SVE_IG_4A_D:
+        case IF_SVE_IG_4A_E:
+        case IF_SVE_IF_4A:
+        case IF_SVE_IF_4A_A:
+        case IF_SVE_IM_3A:
+        case IF_SVE_IN_4A:
+        case IF_SVE_IX_4A:
+        case IF_SVE_IO_3A:
+        case IF_SVE_IP_4A:
+        case IF_SVE_IQ_3A:
+        case IF_SVE_IR_4A:
+        case IF_SVE_IS_3A:
+        case IF_SVE_IT_4A:
+        case IF_SVE_GI_4A:
+        case IF_SVE_IC_3A_C:
+        case IF_SVE_IC_3A:
+        case IF_SVE_IC_3A_B:
+        case IF_SVE_IC_3A_A:
+        case IF_SVE_IL_3A_C:
+        case IF_SVE_IL_3A:
+        case IF_SVE_IL_3A_B:
+        case IF_SVE_IL_3A_A:
+        case IF_SVE_IW_4A:
+            return PREDICATE_ZERO;
+
+        case IF_SVE_BV_2A_J:
+        case IF_SVE_CP_3A:
+        case IF_SVE_CQ_3A:
+        case IF_SVE_AM_2A:
+        case IF_SVE_AN_3A:
+        case IF_SVE_AO_3A:
+        case IF_SVE_HL_3A:
+        case IF_SVE_HM_2A:
+        case IF_SVE_AA_3A:
+        case IF_SVE_BU_2A:
+        case IF_SVE_BV_2B:
+        case IF_SVE_HS_3A:
+        case IF_SVE_HP_3A:
+        case IF_SVE_HP_3B:
+        case IF_SVE_AR_4A:
+        case IF_SVE_BV_2A_A:
+        case IF_SVE_AB_3A:
+        case IF_SVE_ET_3A:
+        case IF_SVE_HU_4A:
+        case IF_SVE_HL_3B:
+        case IF_SVE_AD_3A:
+        case IF_SVE_AB_3B:
+        case IF_SVE_AE_3A:
+        case IF_SVE_EU_3A:
+        case IF_SVE_GT_4A:
+        case IF_SVE_AP_3A:
+        case IF_SVE_HO_3A:
+        case IF_SVE_HO_3B:
+        case IF_SVE_HO_3C:
+        case IF_SVE_GQ_3A:
+        case IF_SVE_HU_4B:
+        case IF_SVE_AQ_3A:
+        case IF_SVE_CU_3A:
+        case IF_SVE_AC_3A:
+        case IF_SVE_ER_3A:
+        case IF_SVE_GR_3A:
+        case IF_SVE_ES_3A:
+        case IF_SVE_HR_3A:
+        case IF_SVE_EP_3A:
+        case IF_SVE_GP_3A:
+        case IF_SVE_EQ_3A:
+        case IF_SVE_HQ_3A:
+        case IF_SVE_AS_4A:
+        case IF_SVE_CT_3A:
+        case IF_SVE_HV_4A:
+            return PREDICATE_MERGE;
+
+        case IF_SVE_CZ_4A_A:
+        case IF_SVE_CZ_4A_L:
+        case IF_SVE_CE_2A:
+        case IF_SVE_CE_2B:
+        case IF_SVE_CE_2C:
+        case IF_SVE_CE_2D:
+        case IF_SVE_CF_2A:
+        case IF_SVE_CF_2B:
+        case IF_SVE_CF_2C:
+        case IF_SVE_CF_2D:
+        case IF_SVE_CI_3A:
+        case IF_SVE_CJ_2A:
+        case IF_SVE_DE_1A:
+        case IF_SVE_DH_1A:
+        case IF_SVE_DJ_1A:
+        case IF_SVE_DM_2A:
+        case IF_SVE_DN_2A:
+        case IF_SVE_DO_2A:
+        case IF_SVE_DP_2A:
+        case IF_SVE_DR_1A:
+        case IF_SVE_DT_3A:
+        case IF_SVE_DU_3A:
+        case IF_SVE_CK_2A:
+            return PREDICATE_SIZED;
+
+        case IF_SVE_DB_3A:
+            // Second register could be ZERO or MERGE so handled at source.
+            assert(regpos != 2);
+            return PREDICATE_SIZED;
+
+        case IF_SVE_DL_2A:
+        case IF_SVE_DY_3A:
+        case IF_SVE_DZ_1A:
+            return PREDICATE_N_SIZED;
+
+        // This is a special case as the second register could be ZERO or MERGE.
+        // <Pg>/<ZM>
+        // Therefore, by default return NONE due to ambiguity.
+        case IF_SVE_AH_3A:
+            // TODO: Handle these cases.
+            assert(false);
+            break;
+
+        case IF_SVE_JD_4B:
+        case IF_SVE_JD_4C:
+        case IF_SVE_JI_3A_A:
+        case IF_SVE_JJ_4A:
+        case IF_SVE_JJ_4A_B:
+        case IF_SVE_JJ_4A_C:
+        case IF_SVE_JJ_4A_D:
+        case IF_SVE_JJ_4B:
+        case IF_SVE_JJ_4B_E:
+        case IF_SVE_JN_3B:
+        case IF_SVE_JN_3C:
+        case IF_SVE_JD_4A:
+        case IF_SVE_JN_3A:
+        case IF_SVE_JD_4C_A:
+        case IF_SVE_JJ_4B_C:
+        case IF_SVE_JL_3A:
+        case IF_SVE_JN_3C_D:
+        case IF_SVE_HY_3A:
+        case IF_SVE_HY_3A_A:
+        case IF_SVE_HY_3B:
+        case IF_SVE_HZ_2A_B:
+        case IF_SVE_IA_2A:
+        case IF_SVE_IB_3A:
+        case IF_SVE_JK_4A:
+        case IF_SVE_JK_4A_B:
+        case IF_SVE_JK_4B:
+        case IF_SVE_IZ_4A:
+        case IF_SVE_IZ_4A_A:
+        case IF_SVE_JB_4A:
+        case IF_SVE_JM_3A:
+        case IF_SVE_CM_3A:
+        case IF_SVE_CN_3A:
+        case IF_SVE_CO_3A:
+        case IF_SVE_JA_4A:
+        case IF_SVE_CR_3A:
+        case IF_SVE_CS_3A:
+        case IF_SVE_CV_3A:
+        case IF_SVE_CV_3B:
+        case IF_SVE_DW_2A: // <PNn>[<imm>]
+        case IF_SVE_DW_2B: // <PNn>[<imm>]
+        case IF_SVE_JC_4A:
+        case IF_SVE_JO_3A:
+        case IF_SVE_JE_3A:
+        case IF_SVE_JF_4A:
+        case IF_SVE_AK_3A:
+        case IF_SVE_HE_3A:
+        case IF_SVE_AF_3A:
+        case IF_SVE_AG_3A:
+        case IF_SVE_AI_3A:
+        case IF_SVE_AJ_3A:
+        case IF_SVE_AL_3A:
+        case IF_SVE_CL_3A:
+        case IF_SVE_GS_3A:
+        case IF_SVE_HJ_3A:
+        case IF_SVE_IY_4A:
+            return PREDICATE_NONE;
+
+        case IF_SVE_CX_4A:
+        case IF_SVE_CX_4A_A:
+        case IF_SVE_CY_3A:
+        case IF_SVE_CY_3B:
+        case IF_SVE_GE_4A:
+        case IF_SVE_HT_4A:
+            assert((regpos == 1) || (regpos == 2));
+            return (regpos == 2 ? PREDICATE_ZERO : PREDICATE_SIZED);
+
+        case IF_SVE_CZ_4A:
+        case IF_SVE_DA_4A:
+        case IF_SVE_DB_3B:
+        case IF_SVE_DC_3A:
+            assert((regpos >= 1) && (regpos <= 4));
+            return (regpos == 2 ? PREDICATE_ZERO : PREDICATE_SIZED);
+
+        case IF_SVE_CZ_4A_K:
+            assert((regpos >= 1) && (regpos <= 3));
+            return (regpos == 2 ? PREDICATE_MERGE : PREDICATE_SIZED);
+
+        case IF_SVE_DD_2A:
+        case IF_SVE_DF_2A:
+            assert((regpos >= 1) && (regpos <= 3));
+            return ((regpos == 2) ? PREDICATE_NONE : PREDICATE_SIZED);
+
+        case IF_SVE_DG_2A:
+            return (regpos == 2 ? PREDICATE_ZERO : PREDICATE_SIZED);
+
+        case IF_SVE_DI_2A:
+            return (regpos == 1 ? PREDICATE_NONE : PREDICATE_SIZED);
+
+        case IF_SVE_DK_3A:
+            assert((regpos == 2) || (regpos == 3));
+            return ((regpos == 2) ? PREDICATE_NONE : PREDICATE_SIZED);
+
+        case IF_SVE_HI_3A:
+            assert((regpos == 1) || (regpos == 2));
+            return ((regpos == 2) ? PREDICATE_ZERO : PREDICATE_SIZED);
+
+        case IF_SVE_DV_4A:
+            assert((regpos >= 1) && (regpos <= 3));
+            return ((regpos == 3) ? PREDICATE_SIZED : PREDICATE_NONE);
+
+        case IF_SVE_ID_2A:
+        case IF_SVE_JG_2A:
+            return PREDICATE_NONE;
+
+        default:
+            break;
+    }
+
+    assert(!"Unexpected instruction format");
+    return PREDICATE_NONE;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the SVE instruction has a LSL addr.
+ *  This is for formats that have [<Xn|SP>, <Xm>, LSL #N], [<Xn|SP>{, <Xm>, LSL #N}]
+ */
+/*static*/ bool emitter::insSveIsLslN(instruction ins, insFormat fmt)
+{
+    switch (fmt)
+    {
+        case IF_SVE_JD_4A:
+            switch (ins)
+            {
+                case INS_sve_st1h:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JD_4B:
+            switch (ins)
+            {
+                case INS_sve_st1w:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_HW_4B:
+            switch (ins)
+            {
+                case INS_sve_ld1h:
+                case INS_sve_ld1sh:
+                case INS_sve_ldff1h:
+                case INS_sve_ldff1sh:
+                case INS_sve_ld1w:
+                case INS_sve_ldff1w:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IG_4A:
+            switch (ins)
+            {
+                case INS_sve_ldff1d:
+                case INS_sve_ldff1sw:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IG_4A_F:
+            switch (ins)
+            {
+                case INS_sve_ldff1sh:
+                case INS_sve_ldff1w:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IG_4A_G:
+            switch (ins)
+            {
+                case INS_sve_ldff1h:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_II_4A:
+        case IF_SVE_II_4A_B:
+            switch (ins)
+            {
+                case INS_sve_ld1d:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_II_4A_H:
+            switch (ins)
+            {
+                case INS_sve_ld1w:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IK_4A:
+            switch (ins)
+            {
+                case INS_sve_ld1sw:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IK_4A_G:
+            switch (ins)
+            {
+                case INS_sve_ld1sh:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IK_4A_I:
+            switch (ins)
+            {
+                case INS_sve_ld1h:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IN_4A:
+            switch (ins)
+            {
+                case INS_sve_ldnt1d:
+                case INS_sve_ldnt1h:
+                case INS_sve_ldnt1w:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IP_4A:
+            switch (ins)
+            {
+                case INS_sve_ld1roh:
+                case INS_sve_ld1row:
+                case INS_sve_ld1rod:
+                case INS_sve_ld1rqh:
+                case INS_sve_ld1rqw:
+                case INS_sve_ld1rqd:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IR_4A:
+            switch (ins)
+            {
+                case INS_sve_ld2q:
+                case INS_sve_ld3q:
+                case INS_sve_ld4q:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IT_4A:
+            switch (ins)
+            {
+                case INS_sve_ld2h:
+                case INS_sve_ld2w:
+                case INS_sve_ld2d:
+                case INS_sve_ld3h:
+                case INS_sve_ld3w:
+                case INS_sve_ld3d:
+                case INS_sve_ld4h:
+                case INS_sve_ld4w:
+                case INS_sve_ld4d:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IU_4B:
+            switch (ins)
+            {
+                case INS_sve_ld1sw:
+                case INS_sve_ldff1sw:
+                case INS_sve_ld1d:
+                case INS_sve_ldff1d:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JB_4A:
+            switch (ins)
+            {
+                case INS_sve_stnt1h:
+                case INS_sve_stnt1w:
+                case INS_sve_stnt1d:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JC_4A:
+            switch (ins)
+            {
+                case INS_sve_st2h:
+                case INS_sve_st2w:
+                case INS_sve_st2d:
+                case INS_sve_st3h:
+                case INS_sve_st3w:
+                case INS_sve_st3d:
+                case INS_sve_st4h:
+                case INS_sve_st4w:
+                case INS_sve_st4d:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JD_4C:
+            switch (ins)
+            {
+                case INS_sve_st1w:
+                case INS_sve_st1d:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JD_4C_A:
+            switch (ins)
+            {
+                case INS_sve_st1d:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JF_4A:
+            switch (ins)
+            {
+                case INS_sve_st2q:
+                case INS_sve_st3q:
+                case INS_sve_st4q:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JJ_4B:
+            switch (ins)
+            {
+                case INS_sve_st1h:
+                case INS_sve_st1w:
+                case INS_sve_st1d:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_HY_3B:
+        case IF_SVE_IB_3A:
+            switch (ins)
+            {
+                case INS_sve_prfh:
+                case INS_sve_prfw:
+                case INS_sve_prfd:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        default:
+            break;
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the SVE instruction has a <mod> addr.
+ *  This is for formats that have [<Xn|SP>, <Zm>.T, <mod>], [<Xn|SP>, <Zm>.T, <mod> #N]
+ */
+/*static*/ bool emitter::insSveIsModN(instruction ins, insFormat fmt)
+{
+    switch (fmt)
+    {
+        case IF_SVE_JJ_4A:
+        case IF_SVE_JJ_4A_B:
+            switch (ins)
+            {
+                case INS_sve_st1d:
+                case INS_sve_st1h:
+                case INS_sve_st1w:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JJ_4A_C:
+        case IF_SVE_JJ_4A_D:
+            switch (ins)
+            {
+                case INS_sve_st1h:
+                case INS_sve_st1w:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JK_4A:
+        case IF_SVE_JK_4A_B:
+            switch (ins)
+            {
+                case INS_sve_st1b:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_HW_4A:
+        case IF_SVE_HW_4A_A:
+            switch (ins)
+            {
+                case INS_sve_ld1b:
+                case INS_sve_ld1h:
+                case INS_sve_ld1sb:
+                case INS_sve_ld1sh:
+                case INS_sve_ld1w:
+                case INS_sve_ldff1b:
+                case INS_sve_ldff1h:
+                case INS_sve_ldff1sb:
+                case INS_sve_ldff1sh:
+                case INS_sve_ldff1w:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_HW_4A_B:
+        case IF_SVE_HW_4A_C:
+            switch (ins)
+            {
+                case INS_sve_ld1h:
+                case INS_sve_ld1sh:
+                case INS_sve_ld1w:
+                case INS_sve_ldff1h:
+                case INS_sve_ldff1sh:
+                case INS_sve_ldff1w:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IU_4A:
+            switch (ins)
+            {
+                case INS_sve_ld1d:
+                case INS_sve_ld1sw:
+                case INS_sve_ldff1d:
+                case INS_sve_ldff1sw:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IU_4A_A:
+            switch (ins)
+            {
+                case INS_sve_ld1sw:
+                case INS_sve_ldff1d:
+                case INS_sve_ldff1sw:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IU_4A_C:
+            switch (ins)
+            {
+                case INS_sve_ld1d:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_HY_3A:
+        case IF_SVE_HY_3A_A:
+            switch (ins)
+            {
+                case INS_sve_prfb:
+                case INS_sve_prfh:
+                case INS_sve_prfw:
+                case INS_sve_prfd:
+                    return true;
+
+                default:
+                    break;
+            }
+            break;
+
+        default:
+            break;
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  Returns 0, 1, 2, 3 or 4 depending on the instruction and format.
+ *  This is for formats that have [<Xn|SP>, <Zm>.T, <mod>], [<Xn|SP>, <Zm>.T, <mod> #N], [<Xn|SP>, <Xm>, LSL #N],
+ * [<Xn|SP>{, <Xm>, LSL #N}]
+ */
+
+/*static*/ int emitter::insSveGetLslOrModN(instruction ins, insFormat fmt)
+{
+    switch (fmt)
+    {
+        case IF_SVE_JD_4A:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_st1h:
+                    return 1;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JD_4B:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_st1w:
+                    return 2;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_HW_4B:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ld1h:
+                case INS_sve_ld1sh:
+                case INS_sve_ldff1h:
+                case INS_sve_ldff1sh:
+                    return 1;
+
+                case INS_sve_ld1w:
+                case INS_sve_ldff1w:
+                    return 2;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JJ_4A:
+        case IF_SVE_JJ_4A_B:
+        case IF_SVE_JJ_4A_C:
+        case IF_SVE_JJ_4A_D:
+        case IF_SVE_JK_4A:
+        case IF_SVE_JK_4A_B:
+        case IF_SVE_HW_4A:
+        case IF_SVE_HW_4A_A:
+        case IF_SVE_HW_4A_B:
+        case IF_SVE_HW_4A_C:
+        case IF_SVE_IU_4A:
+        case IF_SVE_IU_4A_A:
+        case IF_SVE_IU_4A_C:
+            assert(!insSveIsLslN(ins, fmt));
+            assert(insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ld1h:
+                case INS_sve_ld1sh:
+                case INS_sve_ldff1h:
+                case INS_sve_ldff1sh:
+                    switch (fmt)
+                    {
+                        case IF_SVE_HW_4A:
+                        case IF_SVE_HW_4A_A:
+                            return 1;
+
+                        default:
+                            break;
+                    }
+                    return 0;
+
+                case INS_sve_ld1w:
+                case INS_sve_ldff1w:
+                case INS_sve_ld1sw:
+                case INS_sve_ldff1sw:
+                    switch (fmt)
+                    {
+                        case IF_SVE_HW_4A:
+                        case IF_SVE_HW_4A_A:
+                        case IF_SVE_IU_4A:
+                            return 2;
+
+                        default:
+                            break;
+                    }
+                    return 0;
+
+                case INS_sve_ld1d:
+                case INS_sve_ldff1d:
+                    switch (fmt)
+                    {
+                        case IF_SVE_IU_4A:
+                            return 3;
+
+                        default:
+                            break;
+                    }
+                    return 0;
+
+                case INS_sve_st1h:
+                    switch (fmt)
+                    {
+                        case IF_SVE_JJ_4A_C:
+                        case IF_SVE_JJ_4A_D:
+                            return 0;
+
+                        default:
+                            break;
+                    }
+                    return 1;
+
+                case INS_sve_st1w:
+                    switch (fmt)
+                    {
+                        case IF_SVE_JJ_4A_C:
+                        case IF_SVE_JJ_4A_D:
+                            return 0;
+
+                        default:
+                            break;
+                    }
+                    return 2;
+
+                case INS_sve_st1d:
+                    if (fmt == IF_SVE_JJ_4A_B)
+                    {
+                        return 0;
+                    }
+                    return 3;
+
+                default:
+                    break;
+            }
+            return 0;
+
+        case IF_SVE_IG_4A:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ldff1sw:
+                    return 2;
+
+                case INS_sve_ldff1d:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IG_4A_F:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ldff1sh:
+                    return 1;
+
+                case INS_sve_ldff1w:
+                    return 2;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IG_4A_G:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ldff1h:
+                    return 1;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_II_4A:
+        case IF_SVE_II_4A_B:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ld1d:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_II_4A_H:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ld1w:
+                    return 2;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IK_4A:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ld1sw:
+                    return 2;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IK_4A_G:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ld1sh:
+                    return 1;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IK_4A_I:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ld1h:
+                    return 1;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IN_4A:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ldnt1h:
+                    return 1;
+                case INS_sve_ldnt1w:
+                    return 2;
+                case INS_sve_ldnt1d:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IP_4A:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ld1roh:
+                case INS_sve_ld1rqh:
+                    return 1;
+
+                case INS_sve_ld1row:
+                case INS_sve_ld1rqw:
+                    return 2;
+                case INS_sve_ld1rod:
+                case INS_sve_ld1rqd:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IR_4A:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ld2q:
+                case INS_sve_ld3q:
+                case INS_sve_ld4q:
+                    return 4;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IT_4A:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ld2h:
+                case INS_sve_ld3h:
+                case INS_sve_ld4h:
+                    return 1;
+
+                case INS_sve_ld2w:
+                case INS_sve_ld3w:
+                case INS_sve_ld4w:
+                    return 2;
+
+                case INS_sve_ld2d:
+                case INS_sve_ld3d:
+                case INS_sve_ld4d:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IU_4B:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_ld1sw:
+                case INS_sve_ldff1sw:
+                    return 2;
+
+                case INS_sve_ld1d:
+                case INS_sve_ldff1d:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JB_4A:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_stnt1h:
+                    return 1;
+
+                case INS_sve_stnt1w:
+                    return 2;
+
+                case INS_sve_stnt1d:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JC_4A:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_st2h:
+                case INS_sve_st3h:
+                case INS_sve_st4h:
+                    return 1;
+
+                case INS_sve_st2w:
+                case INS_sve_st3w:
+                case INS_sve_st4w:
+                    return 2;
+
+                case INS_sve_st2d:
+                case INS_sve_st3d:
+                case INS_sve_st4d:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JD_4C:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_st1w:
+                    return 2;
+
+                case INS_sve_st1d:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JD_4C_A:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_st1d:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JF_4A:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_st2q:
+                case INS_sve_st3q:
+                case INS_sve_st4q:
+                    return 4;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_JJ_4B:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_st1h:
+                    return 1;
+
+                case INS_sve_st1w:
+                    return 2;
+
+                case INS_sve_st1d:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_HY_3A:
+        case IF_SVE_HY_3A_A:
+            assert(!insSveIsLslN(ins, fmt));
+            assert(insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_prfb:
+                    return 0;
+
+                case INS_sve_prfh:
+                    return 1;
+
+                case INS_sve_prfw:
+                    return 2;
+
+                case INS_sve_prfd:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_HY_3B:
+        case IF_SVE_IB_3A:
+            assert(insSveIsLslN(ins, fmt));
+            assert(!insSveIsModN(ins, fmt));
+            switch (ins)
+            {
+                case INS_sve_prfh:
+                    return 1;
+
+                case INS_sve_prfw:
+                    return 2;
+
+                case INS_sve_prfd:
+                    return 3;
+
+                default:
+                    break;
+            }
+            break;
+
+        default:
+            break;
+    }
+
+    assert(!"Unexpected instruction format");
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the specified instruction can encode the 'dtype' field.
+ */
+
+/*static*/ bool emitter::canEncodeSveElemsize_dtype(instruction ins)
+{
+    switch (ins)
+    {
+        case INS_sve_ld1w:
+        case INS_sve_ld1sb:
+        case INS_sve_ld1b:
+        case INS_sve_ld1sh:
+        case INS_sve_ld1h:
+        case INS_sve_ldnf1sh:
+        case INS_sve_ldnf1w:
+        case INS_sve_ldnf1h:
+        case INS_sve_ldnf1sb:
+        case INS_sve_ldnf1b:
+        case INS_sve_ldff1b:
+        case INS_sve_ldff1sb:
+        case INS_sve_ldff1h:
+        case INS_sve_ldff1sh:
+        case INS_sve_ldff1w:
+            return true;
+
+        default:
+            return false;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
+ *  for the 'dtype' field.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t code)
+{
+    assert(canEncodeSveElemsize_dtype(ins));
+    assert(ins != INS_sve_ld1w);
+    switch (size)
+    {
+        case EA_1BYTE:
+            switch (ins)
+            {
+                case INS_sve_ld1b:
+                case INS_sve_ldnf1b:
+                case INS_sve_ldff1b:
+                    return code; // By default, the instruction already encodes 8-bit.
+
+                default:
+                    assert(!"Invalid instruction for encoding dtype.");
+            }
+            return code;
+
+        case EA_2BYTE:
+            switch (ins)
+            {
+                case INS_sve_ld1b:
+                case INS_sve_ld1h:
+                case INS_sve_ldnf1b:
+                case INS_sve_ldnf1h:
+                case INS_sve_ldff1b:
+                case INS_sve_ldff1h:
+                    return code | (1 << 21); // Set bit '21' to 1.
+
+                case INS_sve_ld1sb:
+                case INS_sve_ldnf1sb:
+                case INS_sve_ldff1sb:
+                    return code | (1 << 22); // Set bit '22' to 1.
+
+                default:
+                    assert(!"Invalid instruction for encoding dtype.");
+            }
+            return code;
+
+        case EA_4BYTE:
+            switch (ins)
+            {
+                case INS_sve_ldnf1w:
+                case INS_sve_ldff1w:
+                    return code; // By default, the instruction already encodes 32-bit.
+
+                case INS_sve_ld1b:
+                case INS_sve_ld1h:
+                case INS_sve_ldnf1b:
+                case INS_sve_ldnf1h:
+                case INS_sve_ldff1b:
+                case INS_sve_ldff1h:
+                    return code | (1 << 22); // Set bit '22' to 1.
+
+                case INS_sve_ld1sb:
+                case INS_sve_ld1sh:
+                case INS_sve_ldnf1sb:
+                case INS_sve_ldnf1sh:
+                case INS_sve_ldff1sb:
+                case INS_sve_ldff1sh:
+                    return code | (1 << 21); // Set bit '21' to 1.
+
+                default:
+                    assert(!"Invalid instruction for encoding dtype.");
+            }
+            return code;
+
+        case EA_8BYTE:
+            switch (ins)
+            {
+                case INS_sve_ldnf1w:
+                case INS_sve_ldff1w:
+                    return code | (1 << 21); // Set bit '21' to 1. Set bit '15' to 1.
+
+                case INS_sve_ld1b:
+                case INS_sve_ld1h:
+                case INS_sve_ldnf1b:
+                case INS_sve_ldnf1h:
+                case INS_sve_ldff1b:
+                case INS_sve_ldff1h:
+                    return (code | (1 << 22)) | (1 << 21); // Set bit '22' and '21' to 1.
+
+                case INS_sve_ld1sb:
+                case INS_sve_ld1sh:
+                case INS_sve_ldnf1sb:
+                case INS_sve_ldnf1sh:
+                case INS_sve_ldff1sb:
+                case INS_sve_ldff1sh:
+                    return code; // By default, the instruction already encodes 64-bit.
+
+                default:
+                    assert(!"Invalid instruction for encoding dtype.");
+            }
+            return code;
+
+        default:
+            assert(!"Invalid size for encoding dtype.");
+    }
+
+    return code;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select the 4/8/16 byte elemsize for the Arm64 Sve vector instruction 'ld1w'
+ * for the 'dtype' field.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtype_ld1w(instruction ins,
+                                                                    insFormat   fmt,
+                                                                    emitAttr    size,
+                                                                    code_t      code)
+{
+    assert(canEncodeSveElemsize_dtype(ins));
+    assert(ins == INS_sve_ld1w);
+    switch (size)
+    {
+        case EA_4BYTE:
+            switch (fmt)
+            {
+                case IF_SVE_IH_3A_F:
+                    // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the
+                    // proper encoding for S.
+                    return (code | (1 << 15)) | (1 << 22); // Set bit '22' and '15' to 1.
+
+                case IF_SVE_II_4A_H:
+                    // Note: Bit '14' is not actually part of 'dtype', but it is necessary to set to '1' to get the
+                    // proper encoding for S.
+                    return (code | (1 << 14)) | (1 << 22); // Set bit '22' and '14' to 1.
+
+                default:
+                    break;
+            }
+            break;
+
+        case EA_8BYTE:
+            switch (fmt)
+            {
+                case IF_SVE_IH_3A_F:
+                    // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the
+                    // proper encoding for D.
+                    return ((code | (1 << 15)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '15' to 1.
+
+                case IF_SVE_II_4A_H:
+                    // Note: Bit '14' is not actually part of 'dtype', but it is necessary to set to '1' to get the
+                    // proper encoding for D.
+                    return ((code | (1 << 14)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '14' to 1.
+
+                default:
+                    break;
+            }
+            break;
+
+        case EA_16BYTE:
+            switch (fmt)
+            {
+                case IF_SVE_IH_3A_F:
+                    return code | (1 << 20); // Set bit '20' to 1.
+
+                case IF_SVE_II_4A_H:
+                    // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the
+                    // proper encoding for Q.
+                    return code | (1 << 15); // Set bit '15' to 1.
+
+                default:
+                    break;
+            }
+            break;
+
+        default:
+            assert(!"Invalid size for encoding dtype.");
+            break;
+    }
+
+    assert(!"Invalid instruction format");
+    return code;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction
+ *  for the 'dtypeh' and 'dtypel' fields.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtypeh_dtypel(instruction ins,
+                                                                       insFormat   fmt,
+                                                                       emitAttr    size,
+                                                                       code_t      code)
+{
+    switch (fmt)
+    {
+        case IF_SVE_IC_3A_A:
+            switch (size)
+            {
+                case EA_4BYTE:
+                    switch (ins)
+                    {
+                        case INS_sve_ld1rsh:
+                            return code | (1 << 13); // set bit '13'
+
+                        case INS_sve_ld1rw:
+                            return code | (1 << 14); // set bit '14'
+
+                        default:
+                            break;
+                    }
+                    break;
+
+                case EA_8BYTE:
+                    switch (ins)
+                    {
+                        case INS_sve_ld1rsh:
+                            return code;
+
+                        case INS_sve_ld1rw:
+                            return code | (1 << 14) | (1 << 13); // set bits '14' and '13'
+
+                        default:
+                            break;
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IC_3A_B:
+            switch (size)
+            {
+                case EA_2BYTE:
+                    switch (ins)
+                    {
+                        case INS_sve_ld1rh:
+                            return code | (1 << 13); // set bit '13'
+
+                        case INS_sve_ld1rsb:
+                            return code | (1 << 24) | (1 << 14); // set bit '24' and '14'
+
+                        default:
+                            break;
+                    }
+                    break;
+
+                case EA_4BYTE:
+                    switch (ins)
+                    {
+                        case INS_sve_ld1rh:
+                            return code | (1 << 14); // set bit '14'
+
+                        case INS_sve_ld1rsb:
+                            return code | (1 << 24) | (1 << 13); // set bit '24' and '13'
+
+                        default:
+                            break;
+                    }
+                    break;
+
+                case EA_8BYTE:
+                    switch (ins)
+                    {
+                        case INS_sve_ld1rh:
+                            return code | (1 << 14) | (1 << 13); // set bits '14' and '13'
+
+                        case INS_sve_ld1rsb:
+                            return code | (1 << 24); // set bit '24'
+
+                        default:
+                            break;
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_IC_3A_C:
+            assert(ins == INS_sve_ld1rb);
+            switch (size)
+            {
+                case EA_1BYTE:
+                    return code;
+
+                case EA_2BYTE:
+                    return code | (1 << 13); // set bit '13'
+
+                case EA_4BYTE:
+                    return code | (1 << 14); // set bit '14'
+
+                case EA_8BYTE:
+                    return code | (1 << 14) | (1 << 13); // set bits '14' and '13'
+
+                default:
+                    break;
+            }
+            break;
+
+        default:
+            break;
+    }
+
+    assert(!"Unexpected instruction format");
+    return code;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the <R> 4/8-byte width specifier <R>
+ *  at bit location 22 for an Arm64 Sve instruction.
+ */
+/*static*/ emitter::code_t emitter::insEncodeSveElemsize_R_22(emitAttr size)
+{
+    if (size == EA_8BYTE)
+    {
+        return 0x400000; // set the bit at location 22
+    }
+
+    assert(size == EA_4BYTE);
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the immediate value for SVE instructions that encode it as a difference
+ *  from tszh:tszl:imm3.
+ */
+/*static*/ ssize_t emitter::insSveGetImmDiff(const ssize_t imm, const insOpts opt)
+{
+    switch (opt)
+    {
+        case INS_OPTS_SCALABLE_B:
+            assert(isValidUimmFrom1<3>(imm));
+            return (8 - imm);
+
+        case INS_OPTS_SCALABLE_H:
+            assert(isValidUimmFrom1<4>(imm));
+            return (16 - imm);
+
+        case INS_OPTS_SCALABLE_S:
+            assert(isValidUimmFrom1<5>(imm));
+            return (32 - imm);
+
+        case INS_OPTS_SCALABLE_D:
+            assert(isValidUimmFrom1<6>(imm));
+            return (64 - imm);
+
+        default:
+            unreached();
+            break;
+    }
+
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the two 5-bit signed immediates encoded in the following format:
+ *  njjj jjmi iiii
+ *  - iiiii: the absolute value of imm1
+ *  - m: 1 if imm1 is negative, 0 otherwise
+ *  - jjjjj: the absolute value of imm2
+ *  - n: 1 if imm2 is negative, 0 otherwise
+ */
+/*static*/ ssize_t emitter::insSveEncodeTwoSimm5(ssize_t imm1, ssize_t imm2)
+{
+    assert(isValidSimm<5>(imm1));
+    assert(isValidSimm<5>(imm2));
+    ssize_t immOut = 0;
+
+    if (imm1 < 0)
+    {
+        // Set bit location 5 to indicate imm1 is negative
+        immOut |= 0x20;
+        imm1 *= -1;
+    }
+
+    if (imm2 < 0)
+    {
+        // Set bit location 11 to indicate imm2 is negative
+        immOut |= 0x800;
+        imm2 *= -1;
+    }
+
+    immOut |= imm1;
+    immOut |= (imm2 << 6);
+    return immOut;
+}
+
+/*****************************************************************************
+ *
+ *  Decodes imm into two 5-bit signed immediates,
+ *  using the encoding format from insSveEncodeTwoSimm5.
+ */
+/*static*/ void emitter::insSveDecodeTwoSimm5(ssize_t imm, /* OUT */ ssize_t* const imm1, /* OUT */ ssize_t* const imm2)
+{
+    assert(imm1 != nullptr);
+    assert(imm2 != nullptr);
+
+    *imm1 = (imm & 0x1F);
+
+    if ((imm & 0x20) != 0)
+    {
+        *imm1 *= -1;
+    }
+
+    imm >>= 6;
+    *imm2 = (imm & 0x1F);
+
+    if ((imm & 0x20) != 0)
+    {
+        *imm2 *= -1;
+    }
+
+    assert(isValidSimm<5>(*imm1));
+    assert(isValidSimm<5>(*imm2));
+}
+
+/************************************************************************
+ *
+ *  Convert a small immediate float value to an encoded version that matches one-to-one with the instructions.
+ *  The instruction determines the value.
+ */
+
+/*static*/ ssize_t emitter::emitEncodeSmallFloatImm(double immDbl, instruction ins)
+{
+#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_sve_fadd:
+        case INS_sve_fsub:
+        case INS_sve_fsubr:
+            assert((immDbl == 0.5) || (immDbl == 1.0));
+            break;
+
+        case INS_sve_fmax:
+        case INS_sve_fmaxnm:
+        case INS_sve_fmin:
+        case INS_sve_fminnm:
+            assert((immDbl == 0) || (immDbl == 1.0));
+            break;
+
+        case INS_sve_fmul:
+            assert((immDbl == 0.5) || (immDbl == 2.0));
+            break;
+
+        default:
+            assert(!"Invalid instruction");
+            break;
+    }
+#endif // DEBUG
+    if (immDbl < 1.0)
+    {
+        return 0;
+    }
+    return 1;
+}
+
+/************************************************************************
+ *
+ *  Convert an encoded small float immediate value. The instruction determines the value.
+ */
+
+/*static*/ double emitter::emitDecodeSmallFloatImm(ssize_t imm, instruction ins)
+{
+    assert(emitIsValidEncodedSmallFloatImm(imm));
+    switch (ins)
+    {
+        case INS_sve_fadd:
+        case INS_sve_fsub:
+        case INS_sve_fsubr:
+            if (imm == 0)
+            {
+                return 0.5;
+            }
+            else
+            {
+                return 1.0;
+            }
+
+        case INS_sve_fmax:
+        case INS_sve_fmaxnm:
+        case INS_sve_fmin:
+        case INS_sve_fminnm:
+            if (imm == 0)
+            {
+                return 0.0;
+            }
+            else
+            {
+                return 1.0;
+            }
+            break;
+
+        case INS_sve_fmul:
+            if (imm == 0)
+            {
+                return 0.5;
+            }
+            else
+            {
+                return 2.0;
+            }
+            break;
+
+        default:
+            break;
+    }
+
+    assert(!"Invalid instruction");
+    return 0.0;
+}
+
+/************************************************************************
+ *
+ *  Check if the immediate value is a valid encoded small float.
+ */
+
+/*static*/ bool emitter::emitIsValidEncodedSmallFloatImm(size_t imm)
+{
+    return (imm == 0) || (imm == 1);
+}
+
+/************************************************************************
+ *
+ *  Convert a rotation value that is 90 or 270 into a smaller encoding that matches one-to-one with the 'rot' field.
+ */
+
+/*static*/ ssize_t emitter::emitEncodeRotationImm90_or_270(ssize_t imm)
+{
+    switch (imm)
+    {
+        case 90:
+            return 0;
+
+        case 270:
+            return 1;
+
+        default:
+            break;
+    }
+
+    assert(!"Invalid rotation value");
+    return 0;
+}
+
+/************************************************************************
+ *
+ *  Convert an encoded rotation value to 90 or 270.
+ */
+
+/*static*/ ssize_t emitter::emitDecodeRotationImm90_or_270(ssize_t imm)
+{
+    assert(emitIsValidEncodedRotationImm0_to_270(imm));
+    switch (imm)
+    {
+        case 0:
+            return 90;
+
+        case 1:
+            return 270;
+
+        default:
+            break;
+    }
+
+    return 0;
+}
+
+/************************************************************************
+ *
+ *  Check if the immediate value is a valid encoded rotation value for 90 or 270.
+ */
+
+/*static*/ bool emitter::emitIsValidEncodedRotationImm90_or_270(ssize_t imm)
+{
+    return (imm == 0) || (imm == 1);
+}
+
+/************************************************************************
+ *
+ *  Convert a rotation value that is 0, 90, 180 or 270 into a smaller encoding that matches one-to-one with the 'rot'
+ * field.
+ */
+
+/*static*/ ssize_t emitter::emitEncodeRotationImm0_to_270(ssize_t imm)
+{
+    switch (imm)
+    {
+        case 0:
+            return 0;
+
+        case 90:
+            return 1;
+
+        case 180:
+            return 2;
+
+        case 270:
+            return 3;
+
+        default:
+            break;
+    }
+
+    assert(!"Invalid rotation value");
+    return 0;
+}
+
+/************************************************************************
+ *
+ *  Convert an encoded rotation value to 0, 90, 180 or 270.
+ */
+
+/*static*/ ssize_t emitter::emitDecodeRotationImm0_to_270(ssize_t imm)
+{
+    assert(emitIsValidEncodedRotationImm0_to_270(imm));
+    switch (imm)
+    {
+        case 0:
+            return 0;
+
+        case 1:
+            return 90;
+
+        case 2:
+            return 180;
+
+        case 3:
+            return 270;
+
+        default:
+            break;
+    }
+
+    return 0;
+}
+
+/************************************************************************
+ *
+ *  Check if the immediate value is a valid encoded rotation value for 0, 90, 180 or 270.
+ */
+
+/*static*/ bool emitter::emitIsValidEncodedRotationImm0_to_270(ssize_t imm)
+{
+    return (imm >= 0) && (imm <= 3);
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select an insSvePattern
+ */
+/*static*/ emitter::code_t emitter::insEncodeSvePattern(insSvePattern pattern)
+{
+    return (code_t)((unsigned)pattern << 5);
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding for an immediate in the SVE variant of dup (indexed)
+ */
+/*static*/ emitter::code_t emitter::insEncodeSveBroadcastIndex(emitAttr elemsize, ssize_t index)
+{
+    unsigned lane_bytes = genLog2(elemsize) + 1;
+    code_t   tsz        = (1 << (lane_bytes - 1));
+    code_t   imm        = (code_t)index << lane_bytes | tsz;
+    return insEncodeSplitUimm<23, 22, 20, 16>(imm);
+}
+
+/*****************************************************************************
+ *
+ *  Append the machine code corresponding to the given SVE instruction descriptor.
+ */
+BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
+{
+    code_t      code = 0;
+    instruction ins  = id->idIns();
+    insFormat   fmt  = id->idInsFmt();
+    emitAttr    size = id->idOpSize();
+
+    ssize_t imm;
+
+    switch (fmt)
+    {
+        // Scalable.
+        case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated)
+        case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated)
+        case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated)
+        case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated)
+        case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated)
+        case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated)
+        case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords)
+        case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated)
+        case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords)
+        case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated)
+        case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords)
+        case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated)
+        case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated)
+        case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated)
+        case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated)
+        case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements
+        case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector
+        case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar
+        case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector
+                           // (predicated)
+        case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register
+        case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements
+        case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated)
+        case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long
+        case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic
+        case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated)
+        case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract
+        case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left
+                           // (predicated)
+        case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations
+        case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords)
+        case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction
+        case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated)
+        case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated)
+        case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value
+        case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                    // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                      // mmmmm or nnnnn
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_AB_3B: // ................ ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated)
+        case IF_SVE_HL_3B: // ................ ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        // Scalable with Merge or Zero predicate
+        case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // nnnnn
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                    // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                      // ddddd
+            code |= insEncodePredQualifier_16(id->idPredicateReg2Merge());   // M
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        // Scalable with shift immediate
+        case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated)
+        {
+            bool isRightShift = emitInsIsVectorRightShift(ins);
+            imm               = emitGetInsSC(id);
+            code              = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |=
+                insEncodeSveShift_23_to_22_9_to_0(optGetSveElemsize(id->idInsOpt()), isRightShift, imm); // xx, xxiii
+            dst += emitOutput_Instr(dst, code);
+        }
+        break;
+
+        // Scalable, 4 regs. Reg4 in mmmmm.
+        case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend
+                           // (predicated)
+        case IF_SVE_GI_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE2 histogram generation (vector)
+        case IF_SVE_HU_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                    // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                      // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg4());                    // mmmmm
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        // Scalable, 4 regs. Reg4 in aaaaa.
+        case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand
+                           // (predicated)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                    // ggg
+            code |= insEncodeReg_V<20, 16>(id->idReg3());                    // mmmmm
+            code |= insEncodeReg_V<9, 5>(id->idReg4());                      // aaaaa
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        // Scalable, 3 regs, no predicates
+        case IF_SVE_AT_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated)
+        case IF_SVE_BD_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
+        case IF_SVE_BE_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high
+                             // (unpredicated)
+        case IF_SVE_BG_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated)
+        case IF_SVE_BK_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient
+        case IF_SVE_BR_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector segments
+        case IF_SVE_BZ_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources)
+        case IF_SVE_BZ_3A_A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources)
+        case IF_SVE_CA_3A:   // ........xx.mmmmm ......nnnnnddddd -- sve_int_perm_tbxquads
+        case IF_SVE_EH_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE integer dot product (unpredicated)
+        case IF_SVE_EL_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply-add long
+        case IF_SVE_EM_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add high
+        case IF_SVE_EN_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add interleaved long
+        case IF_SVE_EO_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long
+        case IF_SVE_EV_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp
+        case IF_SVE_EX_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector elements (quadwords)
+        case IF_SVE_FL_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long
+        case IF_SVE_FM_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide
+        case IF_SVE_FN_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long
+        case IF_SVE_FP_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved
+        case IF_SVE_FQ_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute
+        case IF_SVE_FS_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long
+        case IF_SVE_FW_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate
+        case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long
+        case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part
+        case IF_SVE_GF_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment)
+        case IF_SVE_GW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE FP clamp
+        case IF_SVE_HK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());                      // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg3());                    // mmmmm
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        // Scalable, 3 regs, no predicates. General purpose source registers
+        case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register
+                           // increment)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());                           // nnnnn
+            code |= insEncodeReg_Rm(id->idReg3());                           // mmmmm
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BH_3A: // .........x.mmmmm ....hhnnnnnddddd -- SVE address generation
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());      // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());      // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg3());    // mmmmm
+            code |= insEncodeUimm<11, 10>(emitGetInsSC(id)); // hh
+            code |= insEncodeUimm<22, 22>(id->idInsOpt() == INS_OPTS_SCALABLE_D ? 1 : 0);
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BH_3B:   // ...........mmmmm ....hhnnnnnddddd -- SVE address generation
+        case IF_SVE_BH_3B_A: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());      // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());      // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg3());    // mmmmm
+            code |= insEncodeUimm<11, 10>(emitGetInsSC(id)); // hh
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        // Immediate and pattern to general purpose.
+        case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count
+        case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_Rd(id->idReg1());           // ddddd
+            code |= insEncodeSvePattern(id->idSvePattern()); // ppppp
+            code |= insEncodeUimm<19, 16>(imm - 1);          // iiii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_Rd(id->idReg1());              // ddddd
+            code |= insEncodeSvePattern(id->idSvePattern());    // ppppp
+            code |= insEncodeUimm<19, 16>(imm - 1);             // iiii
+            code |= insEncodeSveElemsize_sz_20(id->idOpSize()); // X
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive)
+        case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn/mmmmm
+            code |= insEncodeUimm<12, 10>(imm & 0b111); // iii
+            code |= insEncodeUimm<20, 16>(imm >> 3);    // iiiii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count
+        case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());      // ddddd
+            code |= insEncodeSvePattern(id->idSvePattern()); // ppppp
+            code |= insEncodeUimm<19, 16>(imm - 1);          // iiii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BS_1A: // ..............ii iiiiiiiiiiiddddd -- SVE bitwise logical with immediate (unpredicated)
+        case IF_SVE_BT_1A: // ..............ii iiiiiiiiiiiddddd -- SVE broadcast bitmask immediate
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
+            code |= (imm << 5);
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeImm8_12_to_5(imm);                           // iiiiiiii
+            code |= insEncodeReg_P<19, 16>(id->idReg2());                 // gggg
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BV_2A:   // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
+        case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeReg_P<19, 16>(id->idReg2());                 // gggg
+            code |= insEncodeImm8_12_to_5(imm);                           // iiiiiiii
+            code |= (id->idHasShift() ? 0x2000 : 0);                      // h
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BV_2B: // ........xx..gggg ...........ddddd -- SVE copy integer immediate (predicated)
+            // In emitIns, we set this format's instruction to MOV, as that is the preferred disassembly.
+            // However, passing (MOV, IF_SVE_BV_2B) to emitInsCodeSve will assert with "encoding_found",
+            // as FMOV is the only instruction associated with this encoding format.
+            // Thus, always pass FMOV here, and use MOV elsewhere for simplicity.
+            code = emitInsCodeSve(INS_sve_fmov, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeReg_P<19, 16>(id->idReg2());                 // gggg
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BW_2A: // ........ii.xxxxx ......nnnnnddddd -- SVE broadcast indexed element
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn
+            code |= insEncodeSveBroadcastIndex(optGetSveElemsize(id->idInsOpt()), imm);
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1()); // DDDD
+            code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CE_2B: // .........i...ii. ......nnnnn.DDDD -- SVE move predicate from vector
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                   // DDDD
+            code |= insEncodeReg_V<9, 5>(id->idReg2());                   // nnnnn
+            code |= insEncodeSplitUimm<22, 22, 18, 17>(emitGetInsSC(id)); // i...ii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CE_2C: // ..............i. ......nnnnn.DDDD -- SVE move predicate from vector
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());      // DDDD
+            code |= insEncodeReg_V<9, 5>(id->idReg2());      // nnnnn
+            code |= insEncodeUimm<17, 17>(emitGetInsSC(id)); // i
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CE_2D: // .............ii. ......nnnnn.DDDD -- SVE move predicate from vector
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());      // DDDD
+            code |= insEncodeReg_V<9, 5>(id->idReg2());      // nnnnn
+            code |= insEncodeUimm<18, 17>(emitGetInsSC(id)); // ii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CF_2A: // ................ .......NNNNddddd -- SVE move predicate into vector
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
+            code |= insEncodeReg_P<8, 5>(id->idReg2()); // NNNN
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CF_2B: // .........i...ii. .......NNNNddddd -- SVE move predicate into vector
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeReg_P<8, 5>(id->idReg2());                   // NNNN
+            code |= insEncodeSplitUimm<22, 22, 18, 17>(emitGetInsSC(id)); // i...ii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CF_2C: // ..............i. .......NNNNddddd -- SVE move predicate into vector
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());      // ddddd
+            code |= insEncodeReg_P<8, 5>(id->idReg2());      // NNNN
+            code |= insEncodeUimm<17, 17>(emitGetInsSC(id)); // i
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CF_2D: // .............ii. .......NNNNddddd -- SVE move predicate into vector
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());      // ddddd
+            code |= insEncodeReg_P<8, 5>(id->idReg2());      // NNNN
+            code |= insEncodeUimm<18, 17>(emitGetInsSC(id)); // ii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CC_2A: // ........xx...... ......mmmmmddddd -- SVE insert SIMD&FP scalar register
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());                      // mmmmm
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CD_2A: // ........xx...... ......mmmmmddddd -- SVE insert general register
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_R<9, 5>(id->idReg2());                      // mmmmm
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                   // DDDD
+            code |= insEncodeReg_P<8, 5>(id->idReg2());                   // NNNN
+            code |= insEncodeReg_P<19, 16>(id->idReg3());                 // MMMM
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CJ_2A: // ........xx...... .......nnnn.dddd -- SVE reverse predicate elements
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                   // DDDD
+            code |= insEncodeReg_P<8, 5>(id->idReg2());                   // NNNN
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1()); // DDDD
+            code |= insEncodeReg_P<8, 5>(id->idReg2()); // NNNN
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements
+            code = emitInsCodeSve(ins, fmt);
+
+            if (ins == INS_sve_fcvtnt && id->idInsOpt() == INS_OPTS_D_TO_S)
+            {
+                code |= (1 << 22 | 1 << 17);
+            }
+            else if (ins == INS_sve_fcvtlt && id->idInsOpt() == INS_OPTS_S_TO_D)
+            {
+                code |= (1 << 22 | 1 << 17);
+            }
+
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        // Scalable to general register.
+        case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register
+        case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_Rd(id->idReg1());                           // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                    // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                      // mmmmm
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        // Scalable from general register.
+        case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                    // ggg
+            code |= insEncodeReg_Rn(id->idReg3());                           // mmmmm
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CT_3A: // ................ ...gggnnnnnddddd -- SVE reverse doublewords
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
+        case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                 // VVV
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                   // nnnnn/mmmmm
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CW_4A: // ........xx.mmmmm ..VVVVnnnnnddddd -- SVE select vector elements (predicated)
+        {
+            regNumber reg4 = (ins == INS_sve_mov ? id->idReg1() : id->idReg4());
+            code           = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeReg_P<13, 10>(id->idReg2());                 // VVVV
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                   // nnnnn
+            code |= insEncodeReg_V<20, 16>(reg4);                         // mmmmm
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+        }
+
+        case IF_SVE_CX_4A:   // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
+        case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
+        case IF_SVE_GE_4A:   // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
+        case IF_SVE_HT_4A:   // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                   // DDDD
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                 // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                   // mmmmm
+            code |= insEncodeReg_V<20, 16>(id->idReg4());                 // nnnnn
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                   // DDDD
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                 // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                   // nnnnn
+            code |= insEncodeSimm<20, 16>(imm);                           // iiiii
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                   // DDDD
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                 // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                   // nnnnn
+            code |= insEncodeUimm<20, 14>(imm);                           // iiiii
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_EW_3A:   // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer)
+        case IF_SVE_BR_3B:   // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments
+        case IF_SVE_FN_3B:   // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long
+        case IF_SVE_FO_3A:   // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate
+        case IF_SVE_AT_3B:   // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated)
+        case IF_SVE_AU_3A:   // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated)
+        case IF_SVE_BD_3B:   // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
+        case IF_SVE_EF_3A:   // ...........mmmmm ......nnnnnddddd -- SVE two-way dot product
+        case IF_SVE_EI_3A:   // ...........mmmmm ......nnnnnddddd -- SVE mixed sign dot product
+        case IF_SVE_GJ_3A:   // ...........mmmmm ......nnnnnddddd -- SVE2 crypto constructive binary operations
+        case IF_SVE_GN_3A:   // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long
+        case IF_SVE_GO_3A:   // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long long
+        case IF_SVE_GW_3B:   // ...........mmmmm ......nnnnnddddd -- SVE FP clamp
+        case IF_SVE_HA_3A:   // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product
+        case IF_SVE_HA_3A_E: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product
+        case IF_SVE_HA_3A_F: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product
+        case IF_SVE_HB_3A:   // ...........mmmmm ......nnnnnddddd -- SVE floating-point multiply-add long
+        case IF_SVE_HD_3A:   // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate
+        case IF_SVE_HD_3A_A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate
+        case IF_SVE_HK_3B:   // ...........mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg3()); // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_AV_3A: // ...........mmmmm ......kkkkkddddd -- SVE2 bitwise ternary operations
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_V<20, 16>(id->idReg2()); // mmmmm
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // kkkkk
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm
+            imm  = insSveGetImmDiff(emitGetInsSC(id), id->idInsOpt());
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                                            // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());                                            // mmmmm
+            code |= insEncodeUimm<20, 16>(imm & 0b11111);                                          // xxiii
+            code |= insEncodeUimm<22, 22>(imm >> 5);                                               // x
+            code |= insEncodeSveElemsize_tszh_23_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate
+                           // increment)
+        {
+            ssize_t imm1;
+            ssize_t imm2;
+            insSveDecodeTwoSimm5(emitGetInsSC(id), &imm1, &imm2);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeSimm<9, 5>(imm1);                            // iiiii
+            code |= insEncodeSimm<20, 16>(imm2);                          // iiiii
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+        }
+
+        case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register
+                           // increment)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeSimm<9, 5>(emitGetInsSC(id));                // iiiii
+            code |= insEncodeReg_R<20, 16>(id->idReg2());                 // mmmmm
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate
+                           // increment)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeReg_R<9, 5>(id->idReg2());                   // mmmmm
+            code |= insEncodeSimm<20, 16>(emitGetInsSC(id));              // iiiii
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BB_2A: // ...........nnnnn .....iiiiiiddddd -- SVE stack frame adjustment
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_R<4, 0>(id->idReg1());     // ddddd
+            code |= insEncodeSimm<10, 5>(emitGetInsSC(id)); // iiiiii
+            code |= insEncodeReg_R<20, 16>(id->idReg2());   // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BC_1A: // ................ .....iiiiiiddddd -- SVE stack frame size
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_R<4, 0>(id->idReg1());     // ddddd
+            code |= insEncodeSimm<10, 5>(emitGetInsSC(id)); // iiiiii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // aaaaa
+            code |= insEncodeReg_V<20, 16>(id->idReg2()); // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_EG_3A:   // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed)
+        case IF_SVE_EY_3A:   // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed)
+        case IF_SVE_EZ_3A:   // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed)
+        case IF_SVE_FD_3B:   // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
+        case IF_SVE_FF_3B:   // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
+        case IF_SVE_FI_3B:   // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
+        case IF_SVE_GU_3A:   // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
+        case IF_SVE_GX_3A:   // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
+        case IF_SVE_GY_3B:   // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
+        case IF_SVE_GY_3B_D: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
+        case IF_SVE_FK_3B:   // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());      // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());      // nnnnn
+            code |= insEncodeReg_V<18, 16>(id->idReg3());    // mmm
+            code |= insEncodeUimm<20, 19>(emitGetInsSC(id)); // ii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
+        case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
+        case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
+        case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
+        case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
+        case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeReg_V<18, 16>(id->idReg3()); // mmm
+            code |= insEncodeUimm<20, 19>(imm & 0b11);    // ii
+            code |= insEncodeUimm<22, 22>(imm >> 2);      // i
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
+        case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
+        case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
+        case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
+        case IF_SVE_GY_3A: // ...........iimmm ....i.nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
+        case IF_SVE_GZ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE floating-point multiply-add long (indexed)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeUimm<11, 11>(imm & 1);       // i
+            code |= insEncodeReg_V<18, 16>(id->idReg3()); // mmm
+            code |= insEncodeUimm<20, 19>(imm >> 1);      // ii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
+        case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
+        case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
+        case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeUimm<11, 11>(imm & 1);       // i
+            code |= insEncodeReg_V<19, 16>(id->idReg3()); // mmmm
+            code |= insEncodeUimm<20, 19>(imm & 0b10);    // i
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed)
+        case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
+        case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
+        case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
+        case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
+        case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
+        case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeReg_V<19, 16>(id->idReg3()); // mmmm
+
+            // index is encoded at bit location 20;
+            // left-shift by one bit so we can reuse insEncodeUimm<20, 19> without modifying bit location 19
+            code |= insEncodeUimm<20, 19>(emitGetInsSC(id) << 1); // i
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+        case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition
+        {
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());   // DDDD
+            code |= insEncodeReg_P<13, 10>(id->idReg2()); // gggg
+            code |= insEncodeReg_P<8, 5>(id->idReg3());   // NNNN
+
+            regNumber regm;
+            switch (ins)
+            {
+                case INS_sve_mov:
+                case INS_sve_movs:
+                    regm = id->idReg3();
+                    break;
+
+                case INS_sve_not:
+                case INS_sve_nots:
+                    regm = id->idReg2();
+                    break;
+
+                default:
+                    regm = id->idReg4();
+            }
+
+            code |= insEncodeReg_P<19, 16>(regm); // MMMM
+            dst += emitOutput_Instr(dst, code);
+            break;
+        }
+
+        case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+        case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());   // DDDD
+            code |= insEncodeReg_P<13, 10>(id->idReg2()); // NNNN
+            code |= insEncodeReg_P<8, 5>(id->idReg2());   // NNNN
+            code |= insEncodeReg_P<19, 16>(id->idReg2()); // NNNN
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());   // DDDD
+            code |= insEncodeReg_P<13, 10>(id->idReg2()); // gggg
+            code |= insEncodeReg_P<8, 5>(id->idReg3());   // NNNN
+            code |= insEncodeReg_P<19, 16>(id->idReg1()); // DDDD
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DB_3A: // ................ ..gggg.NNNNMDDDD -- SVE partition break condition
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                   // DDDD
+            code |= insEncodeReg_P<13, 10>(id->idReg2());                 // gggg
+            code |= insEncodeReg_P<8, 5>(id->idReg3());                   // NNNN
+            code |= insEncodePredQualifier_4(id->idPredicateReg2Merge()); // M
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DB_3B: // ................ ..gggg.NNNN.DDDD -- SVE partition break condition
+        case IF_SVE_DC_3A: // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());   // DDDD
+            code |= insEncodeReg_P<13, 10>(id->idReg2()); // gggg
+            code |= insEncodeReg_P<8, 5>(id->idReg3());   // NNNN
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active
+        case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1()); // DDDD
+            code |= insEncodeReg_P<8, 5>(id->idReg2()); // gggg
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeSvePattern(id->idSvePattern());              // ppppp
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                   // DDDD
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                   // DDDD
+            code |= insEncodeReg_P<8, 5>(id->idReg2());                   // VVVV
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated)
+        case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1()); // DDDD
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<13, 10>(id->idReg1()); // gggg
+            code |= insEncodeReg_P<8, 5>(id->idReg2());   // NNNN
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DK_3A: // ........xx...... ..gggg.NNNNddddd -- SVE predicate count
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_R<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeReg_P<13, 10>(id->idReg2());                 // gggg
+            code |= insEncodeReg_P<8, 5>(id->idReg3());                   // NNNN
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow
+            imm = emitGetInsSC(id);
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_H);
+            assert(emitInsIsVectorRightShift(id->idIns()));
+            assert(isValidVectorShiftAmount(imm, EA_4BYTE, /* rightShift */ true));
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeVectorShift(EA_4BYTE, true /* right-shift */, imm); // iiii
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                          // ddddd
+            code |= insEncodeReg_V_9_to_6_Times_Two(id->idReg2());               // nnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeVectorLengthSpecifier(id);                      // l
+            code |= insEncodeReg_R<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_P<8, 5>(id->idReg2());                      // NNNN
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_R<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_P<8, 5>(id->idReg2());                      // MMMM
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
+        case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_P<8, 5>(id->idReg2());                      // MMMM
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_R<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_P<8, 5>(id->idReg2());                      // MMMM
+            code |= insEncodeVLSElemsize(id->idOpSize());                    // X
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise
+            code = emitInsCodeSve(ins, fmt);
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<8, 5>(id->idReg1()); // NNNN
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_R<9, 5>(id->idReg1());        // nnnnn
+            code |= insEncodeReg_R<20, 16>(id->idReg2());      // mmmmm
+            code |= insEncodeSveElemsize_R_22(id->idOpSize()); // x
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_FZ_2A: // ................ ......nnnn.ddddd -- SME2 multi-vec extract narrow
+        case IF_SVE_HG_2A: // ................ ......nnnn.ddddd -- SVE2 FP8 downconverts
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());            // ddddd
+            code |= insEncodeReg_V_9_to_6_Times_Two(id->idReg2()); // nnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn
+            // Bit 23 should not be set by below call
+            assert(insOptsScalableWide(id->idInsOpt()));
+            code |= insEncodeSveElemsize_tszh_23_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx
+                                                                                                   // x
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());      // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());      // nnnnn
+            code |= insEncodeUimm<20, 16>(emitGetInsSC(id)); // iii
+            // Bit 23 should not be set by below call
+            assert(insOptsScalableWide(id->idInsOpt()));
+            code |= insEncodeSveElemsize_tszh_23_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx
+                                                                                                   // x
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_GB_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift right narrow
+            // Bit 23 should not be set by call to insEncodeSveElemsize_tszh_23_tszl_20_to_19,
+            // nor should we pass INS_OPTS_SCALABLE_D to insGetImmDiff.
+            assert(insOptsScalableWide(id->idInsOpt()));
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                                            // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());                                            // nnnnn
+            code |= insEncodeUimm<20, 16>(insSveGetImmDiff(emitGetInsSC(id), id->idInsOpt()));     // iii
+            code |= insEncodeSveElemsize_tszh_23_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx
+                                                                                                   // x
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_FV_2A: // ........xx...... .....rmmmmmddddd -- SVE2 complex integer add
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());                   // mmmmm
+            code |= insEncodeUimm<10, 10>(emitGetInsSC(id));              // r
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_FY_3A: // .........x.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long with carry
+        {
+            // Size encoding: 1 if INS_OPTS_SCALABLE_D, 0 if INS_OPTS_SCALABLE_S
+            const ssize_t sizeEncoding = (id->idInsOpt() == INS_OPTS_SCALABLE_D) ? 1 : 0;
+            code                       = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg3()); // mmmmm
+            code |= insEncodeUimm<22, 22>(sizeEncoding);  // x
+            dst += emitOutput_Instr(dst, code);
+            break;
+        }
+
+        case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2()); // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                      // DDDD
+            code |= insEncodeReg_R<9, 5>(id->idReg2());                      // nnnnn
+            code |= (id->idOpSize() == EA_8BYTE) ? (1 << 12) : 0;            // X
+            code |= insEncodeReg_R<20, 16>(id->idReg3());                    // mmmmm
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
+        case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                   // DDDD
+            code |= insEncodeReg_P<7, 5>(id->idReg2());                   // NNN
+            code |= insEncodeUimm<9, 8>(emitGetInsSC(id));                // ii (or i)
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate
+                           // pair)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 1>(id->idReg1());                      // DDD
+            code |= insEncodeReg_R<9, 5>(id->idReg2());                      // nnnnn
+            code |= insEncodeReg_R<20, 16>(id->idReg3());                    // mmmmm
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit
+                           // (predicate-as-counter)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeVectorLengthSpecifier(id);                   // l
+            code |= insEncodeReg_P<2, 0>(id->idReg1());                   // DDD
+            code |= insEncodeReg_R<9, 5>(id->idReg2());                   // nnnnn
+            code |= insEncodeReg_R<20, 16>(id->idReg3());                 // mmmmm
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<2, 0>(id->idReg1());                   // DDD
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated)
+        case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated)
+        case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated)
+        {
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeImm8_12_to_5(imm);                           // iiiiiiii
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+        }
+
+        case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
+        case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
+        case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
+        {
+            const ssize_t imm   = emitGetInsSC(id);
+            const ssize_t rot   = (imm & 0b11);
+            const ssize_t index = (imm >> 2);
+            code                = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeUimm<11, 10>(rot);           // rr
+            code |= insEncodeReg_V<18, 16>(id->idReg3()); // mmm
+            code |= insEncodeUimm<20, 19>(index);         // ii
+            dst += emitOutput_Instr(dst, code);
+            break;
+        }
+
+        case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product
+        case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());                   // nnnnn
+            code |= insEncodeUimm<11, 10>(emitGetInsSC(id));              // rr
+            code |= insEncodeReg_V<20, 16>(id->idReg3());                 // mmmmm
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
+        case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
+        case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
+        case IF_SVE_GV_3A: // ...........immmm ....rrnnnnnddddd -- SVE floating-point complex multiply-add (indexed)
+        {
+            const ssize_t imm   = emitGetInsSC(id);
+            const ssize_t rot   = (imm & 0b11);
+            const ssize_t index = (imm >> 2);
+            code                = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeReg_V<19, 16>(id->idReg3()); // mmmm
+            code |= insEncodeUimm<11, 10>(rot);           // rr
+
+            // index is encoded at bit location 20;
+            // left-shift by one bit so we can reuse insEncodeUimm<20, 19> without modifying bit location 19
+            code |= insEncodeUimm<20, 19>(index << 1); // i
+            dst += emitOutput_Instr(dst, code);
+            break;
+        }
+
+        case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated)
+        case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            code |= insEncodeImm8_12_to_5(imm);                           // iiiiiiii
+            code |= (id->idHasShift() ? 0x2000 : 0);                      // h
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated)
+            // ins is MOV for this encoding, as it is the preferred disassembly, so pass FMOV to emitInsCodeSve
+            code = emitInsCodeSve(INS_sve_fmov, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                   // ddddd
+            code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                      // DDDD
+            code |= insEncodeReg_R<9, 5>(id->idReg2());                      // nnnnn
+            code |= insEncodeReg_R<20, 16>(id->idReg3());                    // mmmmm
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_DV_4A: // ........ix.xxxvv ..NNNN.MMMM.DDDD -- SVE broadcast predicate element
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                                       // DDDD
+            code |= insEncodeReg_P<13, 10>(id->idReg2());                                     // NNNN
+            code |= insEncodeReg_P<8, 5>(id->idReg3());                                       // MMMM
+            code |= insEncodeReg_R<17, 16>(id->idReg4());                                     // vv
+            code |= insEncodeSveElemsize_tszh_tszl_and_imm(id->idInsOpt(), emitGetInsSC(id)); // ix xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HO_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HO_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // nnnnn
+            switch (id->idInsOpt())
+            {
+                case INS_OPTS_H_TO_S:
+                    code |= (1 << 16);
+                    break;
+                case INS_OPTS_H_TO_D:
+                    code |= (1 << 22) | (1 << 16);
+                    break;
+                case INS_OPTS_S_TO_H:
+                    break;
+                case INS_OPTS_S_TO_D:
+                    code |= (1 << 22) | (3 << 16);
+                    break;
+                case INS_OPTS_D_TO_H:
+                    code |= (1 << 22);
+                    break;
+                case INS_OPTS_D_TO_S:
+                    code |= (1 << 22) | (1 << 17);
+                    break;
+                default:
+                    unreached();
+            }
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HO_3C: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HP_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert to integer
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // nnnnn
+
+            switch (id->idInsOpt())
+            {
+                case INS_OPTS_SCALABLE_H:
+                    code |= (1 << 22) | (1 << 17);
+                    break;
+                case INS_OPTS_H_TO_S:
+                    code |= (1 << 22) | (1 << 18);
+                    break;
+                case INS_OPTS_H_TO_D:
+                    code |= (1 << 22) | (3 << 17);
+                    break;
+                case INS_OPTS_SCALABLE_S:
+                    code |= (1 << 23) | (1 << 18);
+                    break;
+                case INS_OPTS_S_TO_D:
+                    code |= (3 << 22) | (1 << 18);
+                    break;
+                case INS_OPTS_D_TO_S:
+                    code |= (3 << 22);
+                    break;
+                case INS_OPTS_SCALABLE_D:
+                    code |= (3 << 22) | (3 << 17);
+                    break;
+                default:
+                    unreached();
+                    break;
+            }
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HS_3A: // ................ ...gggnnnnnddddd -- SVE integer convert to floating-point
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // nnnnn
+
+            switch (id->idInsOpt())
+            {
+                case INS_OPTS_SCALABLE_H:
+                    code |= (1 << 22) | (1 << 17);
+                    break;
+                case INS_OPTS_S_TO_H:
+                    code |= (1 << 22) | (1 << 18);
+                    break;
+                case INS_OPTS_SCALABLE_S:
+                    code |= (1 << 23) | (1 << 18);
+                    break;
+                case INS_OPTS_S_TO_D:
+                    code |= (1 << 23) | (1 << 22);
+                    break;
+                case INS_OPTS_D_TO_H:
+                    code |= (1 << 22) | (3 << 17);
+                    break;
+                case INS_OPTS_D_TO_S:
+                    code |= (3 << 22) | (1 << 18);
+                    break;
+                case INS_OPTS_SCALABLE_D:
+                    code |= (3 << 22) | (3 << 17);
+                    break;
+                default:
+                    unreached();
+                    break;
+            }
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_IH_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
+                             // immediate)
+        case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
+                             // immediate)
+        case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
+                             // immediate)
+        case IF_SVE_IJ_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate)
+        case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
+                             // immediate)
+        case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
+                             // immediate)
+        case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
+                             // immediate)
+        case IF_SVE_IM_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus
+                             // immediate)
+        case IF_SVE_IO_3A:   // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus
+                             // immediate)
+        case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
+                           // immediate)
+        case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate)
+        case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
+                           // immediate)
+        case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
+                           // immediate)
+        case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+        case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+        case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_R<9, 5>(id->idReg3());   // nnnnn
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+
+            switch (ins)
+            {
+                case INS_sve_ld2b:
+                case INS_sve_ld2h:
+                case INS_sve_ld2w:
+                case INS_sve_ld2d:
+                case INS_sve_ld2q:
+                case INS_sve_st2b:
+                case INS_sve_st2h:
+                case INS_sve_st2w:
+                case INS_sve_st2d:
+                case INS_sve_st2q:
+                    code |= insEncodeSimm_MultipleOf<19, 16, 2>(imm); // iiii
+                    break;
+
+                case INS_sve_ld3b:
+                case INS_sve_ld3h:
+                case INS_sve_ld3w:
+                case INS_sve_ld3d:
+                case INS_sve_ld3q:
+                case INS_sve_st3b:
+                case INS_sve_st3h:
+                case INS_sve_st3w:
+                case INS_sve_st3d:
+                case INS_sve_st3q:
+                    code |= insEncodeSimm_MultipleOf<19, 16, 3>(imm); // iiii
+                    break;
+
+                case INS_sve_ld4b:
+                case INS_sve_ld4h:
+                case INS_sve_ld4w:
+                case INS_sve_ld4d:
+                case INS_sve_ld4q:
+                case INS_sve_st4b:
+                case INS_sve_st4h:
+                case INS_sve_st4w:
+                case INS_sve_st4d:
+                case INS_sve_st4q:
+                    code |= insEncodeSimm_MultipleOf<19, 16, 4>(imm); // iiii
+                    break;
+
+                case INS_sve_ld1rqb:
+                case INS_sve_ld1rqd:
+                case INS_sve_ld1rqh:
+                case INS_sve_ld1rqw:
+                    code |= insEncodeSimm_MultipleOf<19, 16, 16>(imm); // iiii
+                    break;
+
+                case INS_sve_ld1rob:
+                case INS_sve_ld1rod:
+                case INS_sve_ld1roh:
+                case INS_sve_ld1row:
+                    code |= insEncodeSimm_MultipleOf<19, 16, 32>(imm); // iiii
+                    break;
+
+                default:
+                    code |= insEncodeSimm<19, 16>(imm); // iiii
+                    break;
+            }
+
+            if (canEncodeSveElemsize_dtype(ins))
+            {
+                if (ins == INS_sve_ld1w)
+                {
+                    code = insEncodeSveElemsize_dtype_ld1w(ins, fmt, optGetSveElemsize(id->idInsOpt()), code);
+                }
+                else
+                {
+                    code = insEncodeSveElemsize_dtype(ins, optGetSveElemsize(id->idInsOpt()), code);
+                }
+            }
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                               // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                             // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg3());                               // nnnnn
+            code |= insEncodeReg_R<20, 16>(id->idReg4());                             // mmmmm
+            code |= insEncodeSveElemsize_22_to_21(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                            // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                          // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg3());                            // nnnnn
+            code |= insEncodeReg_R<20, 16>(id->idReg4());                          // mmmmm
+            code |= insEncodeSveElemsize_sz_21(optGetSveElemsize(id->idInsOpt())); // x
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_JJ_4A:   // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             //                     // offsets)
+        case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
+                           // offsets)
+        case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit
+                             // unscaled offsets)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg3());   // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg4()); // mmmmm
+
+            switch (id->idInsOpt())
+            {
+                case INS_OPTS_SCALABLE_S_SXTW:
+                case INS_OPTS_SCALABLE_D_SXTW:
+                    code |= (1 << 14); // h
+                    break;
+
+                default:
+                    break;
+            }
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                               // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                             // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg3());                               // nnnnn
+            code |= insEncodeSimm<19, 16>(imm);                                       // iiii
+            code |= insEncodeSveElemsize_22_to_21(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                            // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                          // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg3());                            // nnnnn
+            code |= insEncodeSimm<19, 16>(imm);                                    // iiii
+            code |= insEncodeSveElemsize_sz_21(optGetSveElemsize(id->idInsOpt())); // x
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HW_4A:   // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_IU_4A:   // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg3());   // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg4()); // mmmmm
+
+            switch (id->idInsOpt())
+            {
+                case INS_OPTS_SCALABLE_S_SXTW:
+                case INS_OPTS_SCALABLE_D_SXTW:
+                    code |= (1 << 22); // h
+                    break;
+
+                default:
+                    break;
+            }
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HW_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg3());   // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg4()); // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_IF_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
+                             // scalar)
+        case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
+                             // scalar)
+        case IF_SVE_IW_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar)
+        case IF_SVE_IX_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus
+                             // scalar)
+        case IF_SVE_IY_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar)
+        case IF_SVE_IZ_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
+                             // scalar)
+        case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
+                             // scalar)
+        case IF_SVE_JA_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus
+                             // scalar)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // nnnnn
+            code |= insEncodeReg_R<20, 16>(id->idReg4()); // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
+        case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg3());   // nnnnn
+            code |= insEncodeReg_R<20, 16>(id->idReg4()); // mmmmm
+
+            if (canEncodeSveElemsize_dtype(ins))
+            {
+                if (ins == INS_sve_ld1w)
+                {
+                    code = insEncodeSveElemsize_dtype_ld1w(ins, fmt, optGetSveElemsize(id->idInsOpt()), code);
+                }
+                else
+                {
+                    code = insEncodeSveElemsize_dtype(ins, optGetSveElemsize(id->idInsOpt()), code);
+                }
+            }
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar)
+        case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
+        case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
+        case IF_SVE_IK_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar)
+        case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar)
+        case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
+                           // scalar)
+        case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar)
+        case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
+                           // scalar)
+        case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar)
+        case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+        case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+        case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
+                           // scalar)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg3());   // nnnnn
+            code |= insEncodeReg_R<20, 16>(id->idReg4()); // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_IU_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_JJ_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
+                           // offsets)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg3());   // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg4()); // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                    // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                      // mmmmm
+            code |= insEncodeSveImm90_or_270_rot(imm);                       // r
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                    // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                      // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg4());                    // mmmmm
+            code |= insEncodeSveImm0_to_270_rot(imm);                        // rr
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());                      // DDDD
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                    // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                      // nnnnn
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate
+                           // (predicated)
+        {
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                    // ggg
+            code |= insEncodeSveSmallFloatImm(imm);                          // i
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+        }
+        break;
+
+        case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());                      // mmmmm
+            code |= insEncodeUimm<18, 16>(imm);                              // iii
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                               // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                             // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                               // nnnnn
+            code |= insEncodeSveElemsize_18_to_17(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg4()); // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing
+                           // multiplicand
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_P<12, 10>(id->idReg2());                    // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());                      // mmmmm
+            code |= insEncodeReg_V<20, 16>(id->idReg4());                    // aaaaa
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register
+        case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<3, 0>(id->idReg1());           // TTTT
+            code |= insEncodeReg_R<9, 5>(id->idReg2());           // nnnnn
+            code |= insEncodeSimm9h9l_21_to_16_and_12_to_10(imm); // iii
+                                                                  // iiiiii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register
+        case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());           // ttttt
+            code |= insEncodeReg_R<9, 5>(id->idReg2());           // nnnnn
+            code |= insEncodeSimm9h9l_21_to_16_and_12_to_10(imm); // iii
+                                                                  // iiiiii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_GG_3A:   // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit
+                             // element size
+        case IF_SVE_GH_3B:   // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit
+                             // element size
+        case IF_SVE_GH_3B_B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit
+                             // element size
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg3()); // mmmmm
+            code |= insEncodeUimm<23, 22>(imm);           // ii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_GG_3B: // ........ii.mmmmm ...i..nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit
+                           // element size
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());     // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());     // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg3());   // mmmmm
+            code |= insEncodeUimm3h3l_23_to_22_and_12(imm); // ii
+                                                            // i
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_GH_3A: // ........i..mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit
+                           // element size
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg3()); // mmmmm
+            code |= insEncodeUimm<23, 23>(imm);           // i
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled
+                           // offsets)
+        case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit
+                             // scaled offsets)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<12, 10>(id->idReg1()); // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg3()); // mmmmm
+            code |= id->idSvePrfop();                     // oooo
+
+            switch (id->idInsOpt())
+            {
+                case INS_OPTS_SCALABLE_S_SXTW:
+                case INS_OPTS_SCALABLE_D_SXTW:
+                    code |= (1 << 22); // h
+                    break;
+
+                default:
+                    break;
+            }
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled
+                           // offsets)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<12, 10>(id->idReg1()); // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeReg_V<20, 16>(id->idReg3()); // mmmmm
+            code |= id->idSvePrfop();                     // oooo
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<12, 10>(id->idReg1()); // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg2());   // nnnnn
+            code |= insEncodeReg_R<20, 16>(id->idReg3()); // mmmmm
+            code |= id->idSvePrfop();                     // oooo
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<12, 10>(id->idReg1()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg2());   // nnnnn
+            code |= id->idSvePrfop();                     // oooo
+
+            if (id->idInsOpt() == INS_OPTS_SCALABLE_D)
+            {
+                code |= (1 << 30); // set bit '30' to make it a double-word
+            }
+
+            switch (ins)
+            {
+                case INS_sve_prfh:
+                    code |= insEncodeUimm_MultipleOf<20, 16, 2>(imm); // iiiii
+                    break;
+
+                case INS_sve_prfw:
+                    code |= insEncodeUimm_MultipleOf<20, 16, 4>(imm); // iiiii
+                    break;
+
+                case INS_sve_prfd:
+                    code |= insEncodeUimm_MultipleOf<20, 16, 8>(imm); // iiiii
+                    break;
+
+                default:
+                    assert(ins == INS_sve_prfb);
+            }
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // nnnnn
+            code |= insEncodeUimm<20, 16>(imm);           // iiiii
+            code |= insEncodeSveElemsize_30_or_21(fmt, optGetSveElemsize(id->idInsOpt()));
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate)
+        case IF_SVE_IV_3A:   // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // nnnnn
+            code |= insEncodeSveElemsize_30_or_21(fmt, optGetSveElemsize(id->idInsOpt()));
+
+            switch (ins)
+            {
+                case INS_sve_ld1d:
+                case INS_sve_ldff1d:
+                    code |= insEncodeUimm_MultipleOf<20, 16, 8>(imm); // iiiii
+                    break;
+
+                case INS_sve_ld1w:
+                case INS_sve_ld1sw:
+                case INS_sve_ldff1w:
+                case INS_sve_ldff1sw:
+                    code |= insEncodeUimm_MultipleOf<20, 16, 4>(imm); // iiiii
+                    break;
+
+                default:
+                    code |= insEncodeUimm_MultipleOf<20, 16, 2>(imm); // iiiii
+                    break;
+            }
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());       // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2());     // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());       // nnnnn
+            code |= insEncodeUimm_MultipleOf<20, 16, 8>(imm); // iiiii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_V<9, 5>(id->idReg3());   // nnnnn
+            code |= insEncodeSveElemsize_30_or_21(fmt, optGetSveElemsize(id->idInsOpt()));
+
+            switch (ins)
+            {
+                case INS_sve_st1h:
+                    code |= insEncodeUimm_MultipleOf<20, 16, 2>(imm); // iiiii
+                    break;
+
+                case INS_sve_st1w:
+                    code |= insEncodeUimm_MultipleOf<20, 16, 4>(imm); // iiiii
+                    break;
+
+                default:
+                    assert(ins == INS_sve_st1b);
+                    code |= insEncodeUimm<20, 16>(imm); // iiiii
+                    break;
+            }
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate)
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_P<12, 10>(id->idReg1()); // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg2());   // nnnnn
+            code |= id->idSvePrfop();                     // oooo
+            code |= insEncodeSimm<21, 16>(imm);           // iiiiii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg3());   // nnnnn
+
+            switch (ins)
+            {
+                case INS_sve_ld1rd:
+                    code |= insEncodeUimm_MultipleOf<21, 16, 8>(imm); // iiiiii
+                    break;
+
+                default:
+                    assert(ins == INS_sve_ld1rsw);
+                    code |= insEncodeUimm_MultipleOf<21, 16, 4>(imm); // iiiiii
+                    break;
+            }
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+        case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+        case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());   // ttttt
+            code |= insEncodeReg_P<12, 10>(id->idReg2()); // ggg
+            code |= insEncodeReg_R<9, 5>(id->idReg3());   // nnnnn
+            code = insEncodeSveElemsize_dtypeh_dtypel(ins, fmt, optGetSveElemsize(id->idInsOpt()), code);
+
+            switch (ins)
+            {
+                case INS_sve_ld1rw:
+                    code |= insEncodeUimm_MultipleOf<21, 16, 4>(imm); // iiiiii
+                    break;
+
+                case INS_sve_ld1rh:
+                case INS_sve_ld1rsh:
+                    code |= insEncodeUimm_MultipleOf<21, 16, 2>(imm); // iiiiii
+                    break;
+
+                default:
+                    code |= insEncodeUimm<21, 16>(imm); // iiiiii
+                    break;
+            }
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BI_2A: // ................ ......nnnnnddddd -- SVE constructive prefix (unpredicated)
+        case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_CB_2A: // ........xx...... ......nnnnnddddd -- SVE broadcast general register
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());                           // nnnnn
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator
+        case IF_SVE_CG_2A: // ........xx...... ......nnnnnddddd -- SVE reverse vector elements
+        case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
+        case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated)
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                      // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());                      // nnnnn
+            code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated)
+        case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert
+        case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn
+            code |= insEncodeSveElemsizeWithShift_tszh_tszl_imm3(id->idInsOpt(), imm,
+                                                                 emitInsIsVectorRightShift(ins)); // xx xxiii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1());                            // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2());                            // nnnnn
+            code |= insEncodeSveElemsizeWithImmediate_i1_tsz(id->idInsOpt(), imm); // ixxxx
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq
+            imm  = emitGetInsSC(id);
+            code = emitInsCodeSve(ins, fmt);
+            code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
+            code |= insEncodeReg_V<9, 5>(id->idReg2()); // mmmmm
+            code |= insEncodeUimm<19, 16>(imm);         // iiii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        default:
+            assert(!"Unexpected format");
+            break;
+    }
+
+    return dst;
+}
+
+/*****************************************************************************
+ *
+ *  Prints the encoding for the Extend Type encoding
+ */
+
+void emitter::emitDispSveExtendOpts(insOpts opt)
+{
+    switch (opt)
+    {
+        case INS_OPTS_LSL:
+            printf("lsl");
+            break;
+
+        case INS_OPTS_UXTW:
+        case INS_OPTS_SCALABLE_S_UXTW:
+        case INS_OPTS_SCALABLE_D_UXTW:
+            printf("uxtw");
+            break;
+
+        case INS_OPTS_SXTW:
+        case INS_OPTS_SCALABLE_S_SXTW:
+        case INS_OPTS_SCALABLE_D_SXTW:
+            printf("sxtw");
+            break;
+
+        default:
+            assert(!"Bad value");
+            break;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Prints the encoding for the Extend Type encoding along with the N value
+ */
+
+void emitter::emitDispSveExtendOptsModN(insOpts opt, ssize_t imm)
+{
+    assert(imm >= 0 && imm <= 3);
+
+    if (imm == 0 && opt != INS_OPTS_LSL)
+    {
+        emitDispSveExtendOpts(opt);
+    }
+    else if (imm > 0)
+    {
+        emitDispSveExtendOpts(opt);
+        printf(" #%d", (int)imm);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Prints the encoding for the <mod> or LSL encoding along with the N value
+ *  This is for formats that have [<Xn|SP>, <Zm>.T, <mod>], [<Xn|SP>, <Zm>.T, <mod> #N], [<Xn|SP>, <Xm>, LSL #N],
+ * [<Xn|SP>{, <Xm>, LSL #N}]
+ */
+void emitter::emitDispSveModAddr(instruction ins, regNumber reg1, regNumber reg2, insOpts opt, insFormat fmt)
+{
+    printf("[");
+
+    if (isVectorRegister(reg1))
+    {
+        // If the overall instruction is working on 128-bit
+        // registers, the size of this register for
+        // the mod addr is always 64-bit.
+        // Example: LD1Q    {<Zt>.Q }, <Pg>/Z, [<Zn>.D{, <Xm>}]
+        if (opt == INS_OPTS_SCALABLE_Q)
+        {
+            emitDispSveReg(reg1, INS_OPTS_SCALABLE_D, reg2 != REG_ZR);
+        }
+        else
+        {
+            emitDispSveReg(reg1, opt, reg2 != REG_ZR);
+        }
+    }
+    else
+    {
+        emitDispReg(reg1, EA_8BYTE, reg2 != REG_ZR);
+    }
+
+    if (isVectorRegister(reg2))
+    {
+        emitDispSveReg(reg2, opt, false);
+    }
+    else if (reg2 != REG_ZR)
+    {
+        emitDispReg(reg2, EA_8BYTE, false);
+    }
+
+    if (insOptsScalable32bitExtends(opt))
+    {
+        emitDispComma();
+        emitDispSveExtendOptsModN(opt, insSveGetLslOrModN(ins, fmt));
+    }
+    // Omit 'lsl #N' only if the second register is ZR.
+    else if ((reg2 != REG_ZR) && insSveIsLslN(ins, fmt))
+    {
+        emitDispComma();
+        switch (insSveGetLslOrModN(ins, fmt))
+        {
+            case 4:
+                printf("lsl #4");
+                break;
+
+            case 3:
+                printf("lsl #3");
+                break;
+
+            case 2:
+                printf("lsl #2");
+                break;
+
+            case 1:
+                printf("lsl #1");
+                break;
+
+            default:
+                assert(!"Invalid instruction");
+                break;
+        }
+    }
+    printf("]");
+}
+
+/*****************************************************************************
+ *
+ *  Prints the encoding for format [<Zn>.S{, #<imm>}]
+ */
+void emitter::emitDispSveImm(regNumber reg1, ssize_t imm, insOpts opt)
+{
+    printf("[");
+    emitDispSveReg(reg1, opt, imm != 0);
+    if (imm != 0)
+    {
+        // This does not have to be printed as hex.
+        // We only do it because the capstone disassembly displays this immediate as hex.
+        // We could not modify capstone without affecting other cases.
+        emitDispImm(imm, false, /* alwaysHex */ true);
+    }
+    printf("]");
+}
+
+/*****************************************************************************
+ *
+ *  Prints the encoding for format [<Xn|SP>{, #<imm>, MUL VL}]
+ */
+void emitter::emitDispSveImmMulVl(regNumber reg1, ssize_t imm)
+{
+    printf("[");
+    emitDispReg(reg1, EA_8BYTE, imm != 0);
+    if (imm != 0)
+    {
+        emitDispImm(imm, true);
+        printf("mul vl");
+    }
+    printf("]");
+}
+
+/*****************************************************************************
+ *
+ *  Prints the encoding for format [<Zn>.D{, #<imm>}]
+ */
+void emitter::emitDispSveImmIndex(regNumber reg1, insOpts opt, ssize_t imm)
+{
+    printf("[");
+    if (isVectorRegister(reg1))
+    {
+        emitDispSveReg(reg1, opt, imm != 0);
+    }
+    else
+    {
+        emitDispReg(reg1, EA_8BYTE, imm != 0);
+    }
+    if (imm != 0)
+    {
+        // This does not have to be printed as hex.
+        // We only do it because the capstone disassembly displays this immediate as hex.
+        // We could not modify capstone without affecting other cases.
+        emitDispImm(imm, false, /* alwaysHex */ (imm > 31));
+    }
+    printf("]");
+}
+
+//------------------------------------------------------------------------
+// emitDispSveReg: Display a scalable vector register name
+//
+void emitter::emitDispSveReg(regNumber reg, bool addComma)
+{
+    assert(isVectorRegister(reg));
+    printf(emitSveRegName(reg));
+
+    if (addComma)
+        emitDispComma();
+}
+
+//------------------------------------------------------------------------
+// emitDispSveReg: Display a scalable vector register name with an arrangement suffix
+//
+void emitter::emitDispSveReg(regNumber reg, insOpts opt, bool addComma)
+{
+    assert(isVectorRegister(reg));
+    printf(emitSveRegName(reg));
+
+    if (opt != INS_OPTS_NONE)
+    {
+        assert(insOptsScalable(opt) || insOptsScalable32bitExtends(opt));
+        emitDispArrangement(opt);
+    }
+
+    if (addComma)
+        emitDispComma();
+}
+
+//------------------------------------------------------------------------
+// emitDispSveRegIndex: Display a scalable vector register with indexed element
+//
+void emitter::emitDispSveRegIndex(regNumber reg, ssize_t index, bool addComma)
+{
+    assert(isVectorRegister(reg));
+    printf(emitSveRegName(reg));
+    emitDispElementIndex(index, addComma);
+}
+
+//------------------------------------------------------------------------
+// emitDispSveConsecutiveRegList: Display a SVE consecutive vector register list
+//
+void emitter::emitDispSveConsecutiveRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma)
+{
+    assert(isVectorRegister(firstReg));
+
+    regNumber currReg = firstReg;
+
+    assert(listSize > 0);
+
+    printf("{ ");
+    // We do not want the short-hand for list size of 1 or 2.
+    if ((listSize <= 2) || (((unsigned)currReg + listSize - 1) > (unsigned)REG_V31))
+    {
+        for (unsigned i = 0; i < listSize; i++)
+        {
+            const bool notLastRegister = (i != listSize - 1);
+            emitDispSveReg(currReg, opt, notLastRegister);
+            currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg);
+        }
+    }
+    else
+    {
+        // short-hand. example: { z0.s - z2.s } which is the same as { z0.s, z1.s, z2.s }
+        emitDispSveReg(currReg, opt, false);
+        printf(" - ");
+        emitDispSveReg((regNumber)(currReg + listSize - 1), opt, false);
+    }
+    printf(" }");
+
+    if (addComma)
+    {
+        emitDispComma();
+    }
+}
+
+//------------------------------------------------------------------------
+// emitSveRegName: Returns a scalable vector register name.
+//
+// Arguments:
+//    reg - A SIMD and floating-point register.
+//
+// Return value:
+//    A string that represents a scalable vector register name.
+//
+const char* emitter::emitSveRegName(regNumber reg) const
+{
+    assert((reg >= REG_V0) && (reg <= REG_V31));
+
+    int index = (int)reg - (int)REG_V0;
+
+    return zRegNames[index];
+}
+
+//------------------------------------------------------------------------
+// emitPredicateRegName: Returns a predicate register name.
+//
+// Arguments:
+//    reg - A predicate register.
+//
+// Return value:
+//    A string that represents a predicate register name.
+//
+const char* emitter::emitPredicateRegName(regNumber reg, PredicateType ptype)
+{
+    assert((reg >= REG_P0) && (reg <= REG_P15));
+
+    const int  index     = (int)reg - (int)REG_P0;
+    const bool usePnRegs = (ptype == PREDICATE_N) || (ptype == PREDICATE_N_SIZED);
+
+    return usePnRegs ? pnRegNames[index] : pRegNames[index];
+}
+
+//------------------------------------------------------------------------
+// emitDispPredicateReg: Display a predicate register name with with an arrangement suffix
+//
+void emitter::emitDispPredicateReg(regNumber reg, PredicateType ptype, insOpts opt, bool addComma)
+{
+    assert(isPredicateRegister(reg));
+    printf(emitPredicateRegName(reg, ptype));
+
+    if (ptype == PREDICATE_MERGE)
+    {
+        printf("/m");
+    }
+    else if (ptype == PREDICATE_ZERO)
+    {
+        printf("/z");
+    }
+    else if (ptype == PREDICATE_SIZED || ptype == PREDICATE_N_SIZED)
+    {
+        emitDispElemsize(optGetSveElemsize(opt));
+    }
+
+    if (addComma)
+        emitDispComma();
+}
+
+//------------------------------------------------------------------------
+// emitDispPredicateRegPair: Display a pair of predicate registers
+//
+void emitter::emitDispPredicateRegPair(regNumber reg, insOpts opt)
+{
+    printf("{ ");
+    emitDispPredicateReg(reg, PREDICATE_SIZED, opt, true);
+    emitDispPredicateReg((regNumber)((unsigned)reg + 1), PREDICATE_SIZED, opt, false);
+    printf(" }, ");
+}
+
+//------------------------------------------------------------------------
+// emitDispLowPredicateReg: Display a low predicate register name with with an arrangement suffix
+//
+void emitter::emitDispLowPredicateReg(regNumber reg, PredicateType ptype, insOpts opt, bool addComma)
+{
+    assert(isLowPredicateRegister(reg));
+    reg = (regNumber)((((unsigned)reg - REG_PREDICATE_FIRST) & 0x7) + REG_PREDICATE_FIRST);
+    emitDispPredicateReg(reg, ptype, opt, addComma);
+}
+
+//------------------------------------------------------------------------
+// emitDispLowPredicateRegPair: Display a pair of low predicate registers
+//
+void emitter::emitDispLowPredicateRegPair(regNumber reg, insOpts opt)
+{
+    assert(isLowPredicateRegister(reg));
+
+    printf("{ ");
+    const unsigned baseRegNum = ((unsigned)reg - REG_PREDICATE_FIRST) & 0x7;
+    const unsigned regNum     = (baseRegNum * 2) + REG_PREDICATE_FIRST;
+    emitDispPredicateReg((regNumber)regNum, PREDICATE_SIZED, opt, true);
+    emitDispPredicateReg((regNumber)(regNum + 1), PREDICATE_SIZED, opt, false);
+    printf(" }, ");
+}
+
+//------------------------------------------------------------------------
+// emitDispVectorLengthSpecifier: Display the vector length specifier
+//
+void emitter::emitDispVectorLengthSpecifier(instrDesc* id)
+{
+    assert(id != nullptr);
+    assert(insOptsScalableStandard(id->idInsOpt()));
+
+    if (id->idVectorLength4x())
+    {
+        printf("vlx4");
+    }
+    else
+    {
+        printf("vlx2");
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display an insSvePattern
+ */
+void emitter::emitDispSvePattern(insSvePattern pattern, bool addComma)
+{
+    printf("%s", svePatternNames[pattern]);
+
+    if (addComma)
+    {
+        emitDispComma();
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display an insSvePrfop
+ */
+void emitter::emitDispSvePrfop(insSvePrfop prfop, bool addComma)
+{
+    switch (prfop)
+    {
+        case SVE_PRFOP_PLDL1KEEP:
+            printf("pldl1keep");
+            break;
+
+        case SVE_PRFOP_PLDL1STRM:
+            printf("pldl1strm");
+            break;
+
+        case SVE_PRFOP_PLDL2KEEP:
+            printf("pldl2keep");
+            break;
+
+        case SVE_PRFOP_PLDL2STRM:
+            printf("pldl2strm");
+            break;
+
+        case SVE_PRFOP_PLDL3KEEP:
+            printf("pldl3keep");
+            break;
+
+        case SVE_PRFOP_PLDL3STRM:
+            printf("pldl3strm");
+            break;
+
+        case SVE_PRFOP_PSTL1KEEP:
+            printf("pstl1keep");
+            break;
+
+        case SVE_PRFOP_PSTL1STRM:
+            printf("pstl1strm");
+            break;
+
+        case SVE_PRFOP_PSTL2KEEP:
+            printf("pstl2keep");
+            break;
+
+        case SVE_PRFOP_PSTL2STRM:
+            printf("pstl2strm");
+            break;
+
+        case SVE_PRFOP_PSTL3KEEP:
+            printf("pstl3keep");
+            break;
+
+        case SVE_PRFOP_PSTL3STRM:
+            printf("pstl3strm");
+            break;
+
+        case SVE_PRFOP_CONST6:
+            printf("#6");
+            break;
+
+        case SVE_PRFOP_CONST7:
+            printf("#7");
+            break;
+
+        case SVE_PRFOP_CONST14:
+            printf("#0xE");
+            break;
+
+        case SVE_PRFOP_CONST15:
+            printf("#0xF");
+            break;
+
+        default:
+            assert(!"Invalid prfop");
+            break;
+    }
+
+    if (addComma)
+    {
+        emitDispComma();
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to set the vector length specifier (vl) for an Arm64 SVE instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeVectorLengthSpecifier(instrDesc* id)
+{
+    assert(id != nullptr);
+    assert(insOptsScalableStandard(id->idInsOpt()));
+
+    if (id->idVectorLength4x())
+    {
+        switch (id->idInsFmt())
+        {
+            case IF_SVE_DL_2A:
+                return 0x400; // set the bit at location 10
+            case IF_SVE_DY_3A:
+                return 0x2000; // set the bit at location 13
+            default:
+                assert(!"Unexpected format");
+                break;
+        }
+    }
+
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Return an encoding for the specified predicate type used in '16' position.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodePredQualifier_16(bool merge)
+{
+    return merge ? 1 << 16 : 0;
+}
+
+/*****************************************************************************
+ *
+ *  Return an encoding for the specified predicate type used in '4' position.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodePredQualifier_4(bool merge)
+{
+    return merge ? 1 << 4 : 0;
+}
+
+//  For the given 'elemsize' returns the 'arrangement' when used in a SVE vector register arrangement.
+//  Asserts and returns INS_OPTS_NONE if an invalid 'elemsize' is passed
+//
+/*static*/ insOpts emitter::optGetSveInsOpt(emitAttr elemsize)
+{
+    switch (elemsize)
+    {
+        case EA_1BYTE:
+            return INS_OPTS_SCALABLE_B;
+
+        case EA_2BYTE:
+            return INS_OPTS_SCALABLE_H;
+
+        case EA_4BYTE:
+            return INS_OPTS_SCALABLE_S;
+
+        case EA_8BYTE:
+            return INS_OPTS_SCALABLE_D;
+
+        case EA_16BYTE:
+            return INS_OPTS_SCALABLE_Q;
+
+        default:
+            assert(!"Invalid emitAttr for sve vector register");
+            return INS_OPTS_NONE;
+    }
+}
+
+//  For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement
+//  asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
+//
+/*static*/ emitAttr emitter::optGetSveElemsize(insOpts arrangement)
+{
+    switch (arrangement)
+    {
+        case INS_OPTS_SCALABLE_B:
+            return EA_1BYTE;
+
+        case INS_OPTS_SCALABLE_H:
+            return EA_2BYTE;
+
+        case INS_OPTS_SCALABLE_S:
+        case INS_OPTS_SCALABLE_S_UXTW:
+        case INS_OPTS_SCALABLE_S_SXTW:
+            return EA_4BYTE;
+
+        case INS_OPTS_SCALABLE_D:
+        case INS_OPTS_SCALABLE_D_UXTW:
+        case INS_OPTS_SCALABLE_D_SXTW:
+            return EA_8BYTE;
+
+        case INS_OPTS_SCALABLE_Q:
+            return EA_16BYTE;
+
+        default:
+            assert(!"Invalid insOpt for vector register");
+            return EA_UNKNOWN;
+    }
+}
+
+/*static*/ insOpts emitter::optWidenSveElemsizeArrangement(insOpts arrangement)
+{
+    switch (arrangement)
+    {
+        case INS_OPTS_SCALABLE_B:
+            return INS_OPTS_SCALABLE_H;
+
+        case INS_OPTS_SCALABLE_H:
+            return INS_OPTS_SCALABLE_S;
+
+        case INS_OPTS_SCALABLE_S:
+            return INS_OPTS_SCALABLE_D;
+
+        default:
+            assert(!" invalid 'arrangement' value");
+            return INS_OPTS_NONE;
+    }
+}
+
+/*static*/ insOpts emitter::optSveToQuadwordElemsizeArrangement(insOpts arrangement)
+{
+    switch (arrangement)
+    {
+        case INS_OPTS_SCALABLE_B:
+            return INS_OPTS_16B;
+
+        case INS_OPTS_SCALABLE_H:
+            return INS_OPTS_8H;
+
+        case INS_OPTS_SCALABLE_S:
+            return INS_OPTS_4S;
+
+        case INS_OPTS_SCALABLE_D:
+            return INS_OPTS_2D;
+
+        default:
+            assert(!" invalid 'arrangement' value");
+            return INS_OPTS_NONE;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Expands an option that has different size operands (INS_OPTS_*_TO_*) into
+ *  a pair of scalable options where the first describes the size of the
+ *  destination operand and the second describes the size of the source operand.
+ */
+
+/*static*/ void emitter::optExpandConversionPair(insOpts opt, insOpts& dst, insOpts& src)
+{
+    dst = INS_OPTS_NONE;
+    src = INS_OPTS_NONE;
+
+    switch (opt)
+    {
+        case INS_OPTS_H_TO_S:
+            dst = INS_OPTS_SCALABLE_S;
+            src = INS_OPTS_SCALABLE_H;
+            break;
+        case INS_OPTS_S_TO_H:
+            dst = INS_OPTS_SCALABLE_H;
+            src = INS_OPTS_SCALABLE_S;
+            break;
+        case INS_OPTS_S_TO_D:
+            dst = INS_OPTS_SCALABLE_D;
+            src = INS_OPTS_SCALABLE_S;
+            break;
+        case INS_OPTS_D_TO_S:
+            dst = INS_OPTS_SCALABLE_S;
+            src = INS_OPTS_SCALABLE_D;
+            break;
+        case INS_OPTS_H_TO_D:
+            dst = INS_OPTS_SCALABLE_D;
+            src = INS_OPTS_SCALABLE_H;
+            break;
+        case INS_OPTS_D_TO_H:
+            dst = INS_OPTS_SCALABLE_H;
+            src = INS_OPTS_SCALABLE_D;
+            break;
+        case INS_OPTS_SCALABLE_H:
+            dst = INS_OPTS_SCALABLE_H;
+            src = INS_OPTS_SCALABLE_H;
+            break;
+        case INS_OPTS_SCALABLE_S:
+            dst = INS_OPTS_SCALABLE_S;
+            src = INS_OPTS_SCALABLE_S;
+            break;
+        case INS_OPTS_SCALABLE_D:
+            dst = INS_OPTS_SCALABLE_D;
+            src = INS_OPTS_SCALABLE_D;
+            break;
+        default:
+            noway_assert(!"unreachable");
+            break;
+    }
+
+    assert(dst != INS_OPTS_NONE && src != INS_OPTS_NONE);
+    return;
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ *  The following is called for each recorded SVE instruction -- use for debugging.
+ */
+void emitter::emitInsSveSanityCheck(instrDesc* id)
+{
+    switch (id->idInsFmt())
+    {
+        ssize_t imm;
+
+        case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isPredicateRegister(id->idReg2())); // NNNN
+            break;
+
+        // Scalable.
+        case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated)
+        case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated)
+        case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated)
+        case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated)
+        case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated)
+        case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector
+        case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated)
+        case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic
+        case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract
+        case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left
+                           // (predicated)
+            assert(insOptsScalableStandard(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));          // ddddd
+            assert(isLowPredicateRegister(id->idReg2()));    // ggg
+            assert(isVectorRegister(id->idReg3()));          // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        // Scalable, .S or .D.
+        case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated)
+        case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements
+            assert(insOptsScalableWords(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        // Scalable, Merge or Zero predicate.
+        case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));       // nnnnn
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // ddddd
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        // Scalable, with shift immediate.
+        case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isValidVectorShiftAmount(emitGetInsSC(id), optGetSveElemsize(id->idInsOpt()), true));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        // Scalable Wide.
+        case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated)
+            assert(insOptsScalableWide(id->idInsOpt()));  // xx
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        // Scalable to/from SIMD scalar.
+        case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated)
+        case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated)
+        case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar
+        case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector
+                           // (predicated)
+        case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register
+            assert(insOptsScalableStandard(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));          // ddddd
+            assert(isLowPredicateRegister(id->idReg2()));    // ggg
+            assert(isVectorRegister(id->idReg3()));          // mmmmm
+            assert(isValidVectorElemsize(id->idOpSize()));
+            break;
+
+        // Scalable to FP SIMD scalar.
+        case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction
+        case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated)
+            assert(insOptsScalableFloat(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // mmmmm
+            assert(isValidVectorElemsizeSveFloat(id->idOpSize()));
+            break;
+
+        // Scalable to general register.
+        case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register
+        case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register
+            assert(insOptsScalableStandard(id->idInsOpt())); // xx
+            assert(isGeneralRegister(id->idReg1()));         // ddddd
+            assert(isLowPredicateRegister(id->idReg2()));    // ggg
+            assert(isVectorRegister(id->idReg3()));          // mmmmm
+            assert(isValidScalarDatasize(id->idOpSize()));
+            break;
+
+        // Scalable, 4 regs (location of reg3 and reg4 can switch)
+        case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend
+                           // (predicated)
+        case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand
+                           // (predicated)
+        case IF_SVE_GI_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE2 histogram generation (vector)
+        case IF_SVE_HU_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend
+            assert(insOptsScalableStandard(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));          // ddddd
+            assert(isLowPredicateRegister(id->idReg2()));    // ggg
+            assert(isVectorRegister(id->idReg3()));
+            assert(isVectorRegister(id->idReg4()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        // Scalable, unpredicated
+        case IF_SVE_AT_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated)
+        case IF_SVE_BD_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
+        case IF_SVE_BE_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high
+                             // (unpredicated)
+        case IF_SVE_BG_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated)
+        case IF_SVE_BK_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient
+        case IF_SVE_BR_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector segments
+        case IF_SVE_BZ_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources)
+        case IF_SVE_BZ_3A_A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources)
+        case IF_SVE_CA_3A:   // ........xx.mmmmm ......nnnnnddddd -- sve_int_perm_tbxquads
+        case IF_SVE_EH_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE integer dot product (unpredicated)
+        case IF_SVE_EL_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply-add long
+        case IF_SVE_EM_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add high
+        case IF_SVE_EN_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add interleaved long
+        case IF_SVE_EO_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long
+        case IF_SVE_EV_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp
+        case IF_SVE_EX_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector elements (quadwords)
+        case IF_SVE_FL_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long
+        case IF_SVE_FM_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide
+        case IF_SVE_FN_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long
+        case IF_SVE_FP_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved
+        case IF_SVE_FQ_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute
+        case IF_SVE_FS_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long
+        case IF_SVE_FW_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate
+        case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long
+        case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part
+        case IF_SVE_GF_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment)
+        case IF_SVE_GW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE FP clamp
+        case IF_SVE_HK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated)
+            assert(insOptsScalableStandard(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));          // ddddd
+            assert(isVectorRegister(id->idReg2()));          // nnnnn
+            assert(isVectorRegister(id->idReg3()));          // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        // Scalable, no predicates. General purpose source registers
+        case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register
+                           // increment)
+            assert(insOptsScalableStandard(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));          // ddddd
+            assert(isGeneralRegisterOrZR(id->idReg2()));     // nnnnn
+            assert(isGeneralRegisterOrZR(id->idReg3()));     // mmmmm
+            assert(isValidScalarDatasize(id->idOpSize()));
+            break;
+
+        case IF_SVE_BH_3A: // .........x.mmmmm ....hhnnnnnddddd -- SVE address generation
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_S || id->idInsOpt() == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(id->idReg1()));   // ddddd
+            assert(isVectorRegister(id->idReg2()));   // nnnnn
+            assert(isVectorRegister(id->idReg3()));   // mmmmm
+            assert(isValidUimm<2>(emitGetInsSC(id))); // hh
+            break;
+
+        case IF_SVE_BH_3B:   // ...........mmmmm ....hhnnnnnddddd -- SVE address generation
+        case IF_SVE_BH_3B_A: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D_SXTW || id->idInsOpt() == INS_OPTS_SCALABLE_D_UXTW);
+            assert(isVectorRegister(id->idReg1()));   // ddddd
+            assert(isVectorRegister(id->idReg2()));   // nnnnn
+            assert(isVectorRegister(id->idReg3()));   // mmmmm
+            assert(isValidUimm<2>(emitGetInsSC(id))); // hh
+            break;
+
+        case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count
+        case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count
+            assert(id->idInsOpt() == INS_OPTS_NONE);
+            assert(isGeneralRegister(id->idReg1()));
+            assert(id->idOpSize() == EA_8BYTE);
+            assert(isValidUimmFrom1<4>(emitGetInsSC(id)));
+            break;
+
+        case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count
+        case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isValidUimmFrom1<4>(emitGetInsSC(id)));
+            break;
+
+        case IF_SVE_BS_1A: // ..............ii iiiiiiiiiiiddddd -- SVE bitwise logical with immediate (unpredicated)
+        case IF_SVE_BT_1A: // ..............ii iiiiiiiiiiiddddd -- SVE broadcast bitmask immediate
+            imm = emitGetInsSC(id);
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1())); // ddddd
+            assert(isValidImmNRS(imm, optGetSveElemsize(id->idInsOpt())));
+            break;
+
+        case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count
+            assert(id->idInsOpt() == INS_OPTS_NONE);
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isValidUimmFrom1<4>(emitGetInsSC(id)));
+            break;
+
+        case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive)
+        case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
+            assert(isVectorRegister(id->idReg1()));   // ddddd
+            assert(isVectorRegister(id->idReg2()));   // nnnnn
+            assert(isValidUimm<8>(emitGetInsSC(id))); // iiiii iii
+            break;
+
+        case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated)
+        {
+            imm = emitGetInsSC(id);
+            floatImm8 fpImm;
+            fpImm.immFPIVal = (unsigned)imm;
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isValidSimm<8>((ssize_t)emitDecodeFloatImm8(fpImm)));      // iiiiiiii
+            assert(isPredicateRegister(id->idReg2()));                        // gggg
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+        }
+
+        case IF_SVE_BV_2A:   // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
+        case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
+            imm = emitGetInsSC(id);
+            assert(insOptsScalableStandard(id->idInsOpt()));                  // xx
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isPredicateRegister(id->idReg2()));                        // gggg
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            assert(isValidSimm<8>(imm));                                      // iiiiiiii
+            break;
+
+        case IF_SVE_BV_2B: // ........xx..gggg ...........ddddd -- SVE copy integer immediate (predicated)
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));             // ddddd
+            assert(isPredicateRegister(id->idReg2()));          // gggg
+            break;
+
+        case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isVectorRegister(id->idReg2()));    // nnnnn
+            break;
+
+        case IF_SVE_CE_2B: // .........i...ii. ......nnnnn.DDDD -- SVE move predicate from vector
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isVectorRegister(id->idReg2()));    // nnnnn
+            assert(isValidUimm<3>(emitGetInsSC(id)));
+            break;
+
+        case IF_SVE_CE_2C: // ..............i. ......nnnnn.DDDD -- SVE move predicate from vector
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isVectorRegister(id->idReg2()));    // nnnnn
+            assert(isValidUimm<1>(emitGetInsSC(id)));  // i
+            break;
+
+        case IF_SVE_CE_2D: // .............ii. ......nnnnn.DDDD -- SVE move predicate from vector
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isVectorRegister(id->idReg2()));    // nnnnn
+            assert(isValidUimm<3>(emitGetInsSC(id)));  // ii
+            break;
+
+        case IF_SVE_CF_2A: // ................ .......NNNNddddd -- SVE move predicate into vector
+            assert(isVectorRegister(id->idReg1()));    // ddddd
+            assert(isPredicateRegister(id->idReg2())); // NNNN
+            break;
+
+        case IF_SVE_CF_2B: // .........i...ii. .......NNNNddddd -- SVE move predicate into vector
+            assert(isVectorRegister(id->idReg1()));    // ddddd
+            assert(isPredicateRegister(id->idReg2())); // NNNN
+            assert(isValidUimm<3>(emitGetInsSC(id)));
+            break;
+
+        case IF_SVE_CF_2C: // ..............i. .......NNNNddddd -- SVE move predicate into vector
+            assert(isVectorRegister(id->idReg1()));    // ddddd
+            assert(isPredicateRegister(id->idReg2())); // NNNN
+            assert(isValidUimm<1>(emitGetInsSC(id)));  // i
+            break;
+
+        case IF_SVE_CF_2D: // .............ii. .......NNNNddddd -- SVE move predicate into vector
+            assert(isVectorRegister(id->idReg1()));    // ddddd
+            assert(isPredicateRegister(id->idReg2())); // NNNN
+            assert(isValidUimm<2>(emitGetInsSC(id)));  // ii
+            break;
+
+        case IF_SVE_CC_2A: // ........xx...... ......mmmmmddddd -- SVE insert SIMD&FP scalar register
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1())); // ddddd
+            assert(isVectorRegister(id->idReg2())); // mmmmm
+            break;
+
+        case IF_SVE_CD_2A: // ........xx...... ......mmmmmddddd -- SVE insert general register
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));      // ddddd
+            assert(isGeneralRegisterOrZR(id->idReg2())); // mmmmm
+            break;
+
+        case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isPredicateRegister(id->idReg2())); // NNNN
+            assert(isPredicateRegister(id->idReg3())); // MMMM
+            break;
+
+        case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(insOptsScalableStandard(id->idInsOpt())); // xx
+            assert(isPredicateRegister(id->idReg1()));       // DDDD
+            assert(isPredicateRegister(id->idReg2()));       // NNNN
+            break;
+
+        case IF_SVE_CT_3A:                          // ................ ...gggnnnnnddddd -- SVE reverse doublewords
+            assert(isVectorRegister(id->idReg1())); // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // nnnnn
+            break;
+
+        // Scalable, 4 regs, to predicate register.
+        case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(insOptsScalableStandard(id->idInsOpt())); // xx
+            assert(isPredicateRegister(id->idReg1()));       // DDDD
+            assert(isLowPredicateRegister(id->idReg2()));    // ggg
+            assert(isVectorRegister(id->idReg3()));          // nnnnn
+            assert(isVectorRegister(id->idReg4()));          // mmmmm
+            break;
+
+        case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(insOptsScalableWide(id->idInsOpt()));  // xx
+            assert(isPredicateRegister(id->idReg1()));    // DDDD
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // nnnnn
+            assert(isVectorRegister(id->idReg4()));       // mmmmm
+            break;
+
+        case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isPredicateRegister(id->idReg1()));    // DDDD
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // nnnnn
+            assert(isValidSimm<5>(emitGetInsSC(id)));     // iiiii
+            break;
+
+        case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isPredicateRegister(id->idReg1()));    // DDDD
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // nnnnn
+            assert(isValidUimm<7>(emitGetInsSC(id)));     // iiiii
+            break;
+
+        case IF_SVE_BR_3B:   // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments
+        case IF_SVE_FN_3B:   // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long
+        case IF_SVE_FO_3A:   // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate
+        case IF_SVE_AT_3B:   // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated)
+        case IF_SVE_AU_3A:   // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated)
+        case IF_SVE_BD_3B:   // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
+        case IF_SVE_EF_3A:   // ...........mmmmm ......nnnnnddddd -- SVE two-way dot product
+        case IF_SVE_EI_3A:   // ...........mmmmm ......nnnnnddddd -- SVE mixed sign dot product
+        case IF_SVE_GJ_3A:   // ...........mmmmm ......nnnnnddddd -- SVE2 crypto constructive binary operations
+        case IF_SVE_GN_3A:   // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long
+        case IF_SVE_GO_3A:   // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long long
+        case IF_SVE_GW_3B:   // ...........mmmmm ......nnnnnddddd -- SVE FP clamp
+        case IF_SVE_HA_3A:   // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product
+        case IF_SVE_HA_3A_E: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product
+        case IF_SVE_HB_3A:   // ...........mmmmm ......nnnnnddddd -- SVE floating-point multiply-add long
+        case IF_SVE_HD_3A:   // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate
+        case IF_SVE_HD_3A_A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate
+        case IF_SVE_HK_3B:   // ...........mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated)
+        case IF_SVE_AV_3A:   // ...........mmmmm ......kkkkkddddd -- SVE2 bitwise ternary operations
+            assert(insOptsScalable(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1())); // ddddd
+            assert(isVectorRegister(id->idReg2())); // nnnnn/mmmmm
+            assert(isVectorRegister(id->idReg3())); // mmmmm/aaaaa
+            break;
+
+        case IF_SVE_HA_3A_F: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product
+        case IF_SVE_EW_3A:   // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer)
+        case IF_SVE_EW_3B:   // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer)
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1())); // ddddd
+            assert(isVectorRegister(id->idReg2())); // nnnnn/aaaaa
+            assert(isVectorRegister(id->idReg3())); // mmmmm
+            break;
+
+        case IF_SVE_EG_3A:   // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed)
+        case IF_SVE_EY_3A:   // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed)
+        case IF_SVE_EZ_3A:   // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed)
+        case IF_SVE_FD_3B:   // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
+        case IF_SVE_FF_3B:   // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
+        case IF_SVE_FI_3B:   // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
+        case IF_SVE_GU_3A:   // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
+        case IF_SVE_GX_3A:   // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
+        case IF_SVE_GY_3B:   // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
+        case IF_SVE_GY_3B_D: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
+        case IF_SVE_FK_3B:   // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1())); // ddddd
+            assert(isVectorRegister(id->idReg2())); // nnnnn
+            assert(isVectorRegister(id->idReg3())); // mmm
+            assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7));
+            assert(isValidUimm<2>(emitGetInsSC(id))); // ii
+            break;
+
+        case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
+        case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
+        case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
+        case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
+        case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
+        case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
+        case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
+        case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed)
+        case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
+        case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
+        case IF_SVE_GY_3A: // ...........iimmm ....i.nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
+        case IF_SVE_GZ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE floating-point multiply-add long (indexed)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1())); // ddddd
+            assert(isVectorRegister(id->idReg2())); // nnnnn
+            assert(isVectorRegister(id->idReg3())); // mmm
+            assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7));
+            assert(isValidUimm<3>(emitGetInsSC(id))); // iii
+            break;
+
+        case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
+        case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
+        case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
+        case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_S);
+            assert(isVectorRegister(id->idReg1()));    // ddddd
+            assert(isVectorRegister(id->idReg2()));    // nnnnn
+            assert(isLowVectorRegister(id->idReg3())); // mmmm
+            assert(isValidUimm<2>(emitGetInsSC(id)));  // ii
+            break;
+
+        case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed)
+        case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
+        case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
+        case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
+        case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
+        case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
+        case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));    // ddddd
+            assert(isVectorRegister(id->idReg2()));    // nnnnn
+            assert(isLowVectorRegister(id->idReg3())); // mmmm
+            assert(isValidUimm<1>(emitGetInsSC(id)));  // i
+            break;
+
+        case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isPredicateRegister(id->idReg2())); // gggg
+            assert(isPredicateRegister(id->idReg3())); // NNNN
+
+            switch (id->idIns())
+            {
+                case INS_sve_and:
+                case INS_sve_ands:
+                case INS_sve_bic:
+                case INS_sve_bics:
+                case INS_sve_eor:
+                case INS_sve_eors:
+                case INS_sve_nand:
+                case INS_sve_nands:
+                case INS_sve_nor:
+                case INS_sve_nors:
+                case INS_sve_orn:
+                case INS_sve_orns:
+                case INS_sve_orr:
+                case INS_sve_orrs:
+                case INS_sve_sel:
+                    assert(isPredicateRegister(id->idReg4())); // MMMM
+                    break;
+
+                case INS_sve_mov:
+                case INS_sve_movs:
+                case INS_sve_not:
+                case INS_sve_nots:
+                    // no fourth register
+                    break;
+
+                default:
+                    unreached();
+                    break;
+            }
+            break;
+
+        case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+        case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isPredicateRegister(id->idReg2())); // NNNN
+            break;
+
+        case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+        case IF_SVE_DB_3A:   // ................ ..gggg.NNNNMDDDD -- SVE partition break condition
+        case IF_SVE_DB_3B:   // ................ ..gggg.NNNN.DDDD -- SVE partition break condition
+        case IF_SVE_DC_3A:   // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isPredicateRegister(id->idReg2())); // gggg
+            assert(isPredicateRegister(id->idReg3())); // NNNN
+            break;
+
+        case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isPredicateRegister(id->idReg2())); // gggg
+            assert(isPredicateRegister(id->idReg3())); // NNNN
+            assert(isPredicateRegister(id->idReg4())); // MMMM
+            break;
+
+        case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active
+        case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isPredicateRegister(id->idReg2())); // gggg
+            break;
+
+        case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isPredicateRegister(id->idReg1()));       // DDDD
+            assert(insOptsScalableStandard(id->idInsOpt())); // xx
+            break;
+
+        case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active
+        case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isPredicateRegister(id->idReg2())); // gggg
+            break;
+
+        case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated)
+        case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            break;
+
+        case IF_SVE_DK_3A: // ........xx...... ..gggg.NNNNddddd -- SVE predicate count
+            assert(id->idOpSize() == EA_8BYTE);
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isGeneralRegister(id->idReg1()));   // ddddd
+            assert(isPredicateRegister(id->idReg2())); // gggg
+            assert(isPredicateRegister(id->idReg3())); // NNNN
+            break;
+
+        case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(insOptsScalableAtMaxHalf(id->idInsOpt()));
+            assert(isPredicateRegister(id->idReg1()));    // DDDD
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // nnnnn
+            assert(isVectorRegister(id->idReg4()));       // mmmmm
+            break;
+
+        case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements
+            switch (id->idIns())
+            {
+                case INS_sve_fcvtnt:
+                case INS_sve_fcvtlt:
+                    assert(insOptsConvertFloatStepwise(id->idInsOpt()));
+                    FALLTHROUGH;
+                case INS_sve_fcvtxnt:
+                case INS_sve_bfcvtnt:
+                    assert(isVectorRegister(id->idReg1()));       // ddddd
+                    assert(isLowPredicateRegister(id->idReg2())); // ggg
+                    assert(isVectorRegister(id->idReg3()));       // nnnnn
+                    break;
+                default:
+                    assert(!"unreachable");
+                    break;
+            }
+            break;
+
+        case IF_SVE_HO_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision
+            assert(id->idInsOpt() == INS_OPTS_S_TO_H);
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // nnnnn
+            break;
+
+        case IF_SVE_HO_3B:
+            assert(insOptsConvertFloatToFloat(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // nnnnn
+            break;
+
+        case IF_SVE_HO_3C:
+            assert(id->idInsOpt() == INS_OPTS_D_TO_S);
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // nnnnn
+            break;
+
+        case IF_SVE_HP_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert to integer
+            assert(insOptsScalableFloat(id->idInsOpt()) || id->idInsOpt() == INS_OPTS_H_TO_S ||
+                   id->idInsOpt() == INS_OPTS_H_TO_D || id->idInsOpt() == INS_OPTS_S_TO_D ||
+                   id->idInsOpt() == INS_OPTS_D_TO_S);
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // nnnnn
+            break;
+
+        case IF_SVE_HS_3A: // ................ ...gggnnnnnddddd -- SVE integer convert to floating-point
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()) || id->idInsOpt() == INS_OPTS_S_TO_H ||
+                   id->idInsOpt() == INS_OPTS_S_TO_D || id->idInsOpt() == INS_OPTS_D_TO_H ||
+                   id->idInsOpt() == INS_OPTS_D_TO_S);
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // nnnnn
+            break;
+
+        case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(insOptsScalableFloat(id->idInsOpt()));
+            assert(isPredicateRegister(id->idReg1()));    // DDDD
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // nnnnn
+            assert(isVectorRegister(id->idReg4()));       // mmmmm
+            break;
+
+        // Scalable FP.
+        case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations
+        case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated)
+        case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations
+            assert(insOptsScalableFloat(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_AB_3B: // ................ ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated)
+        case IF_SVE_HL_3B: // ................ ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        // Scalable to Simd Vector.
+        case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords)
+        case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords)
+        case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords)
+            assert(insOptsScalableStandard(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));          // ddddd
+            assert(isLowPredicateRegister(id->idReg2()));    // ggg
+            assert(isVectorRegister(id->idReg3()));          // mmmmm
+            assert(id->idOpSize() == EA_8BYTE);
+            break;
+
+        // Scalable FP to Simd Vector.
+        case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords)
+            assert(insOptsScalableFloat(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // mmmmm
+            assert(id->idOpSize() == EA_8BYTE);
+            break;
+
+        // Scalable, widening to scalar SIMD.
+        case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated)
+            assert(insOptsScalableWide(id->idInsOpt()));  // xx
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // mmmmm
+            assert(isValidVectorElemsizeWidening(id->idOpSize()));
+            break;
+
+        // Scalable, possibly FP.
+        case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated)
+            switch (id->idIns())
+            {
+                case INS_sve_fabs:
+                case INS_sve_fneg:
+                    assert(insOptsScalableFloat(id->idInsOpt())); // xx
+                    break;
+
+                default:
+                    assert(insOptsScalableStandard(id->idInsOpt())); // xx
+                    break;
+            }
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        // Scalable, various sizes.
+        case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated)
+        case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements
+            switch (id->idIns())
+            {
+                case INS_sve_abs:
+                case INS_sve_neg:
+                case INS_sve_rbit:
+                    assert(insOptsScalableStandard(id->idInsOpt()));
+                    break;
+
+                case INS_sve_sxtb:
+                case INS_sve_uxtb:
+                case INS_sve_revb:
+                    assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+                    break;
+
+                case INS_sve_sxth:
+                case INS_sve_uxth:
+                case INS_sve_revh:
+                    assert(insOptsScalableWords(id->idInsOpt()));
+                    break;
+
+                default:
+                    assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
+                    break;
+            }
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
+        case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
+            assert(isScalableVectorSize(id->idOpSize())); // xx
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // VVV
+            assert(isVectorRegister(id->idReg3()));       // nnnnn
+            break;
+
+        case IF_SVE_CW_4A: // ........xx.mmmmm ..VVVVnnnnnddddd -- SVE select vector elements (predicated)
+            assert(isScalableVectorSize(id->idOpSize())); // xx
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));    // ddddd
+            assert(isPredicateRegister(id->idReg2())); // VVVV
+            assert(isVectorRegister(id->idReg3()));    // nnnnn
+            if (id->idIns() == INS_sve_sel)
+            {
+                assert(isVectorRegister(id->idReg4())); // mmmmm
+            }
+            break;
+
+        // Scalable from general scalar (possibly SP)
+        case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated)
+            assert(insOptsScalableStandard(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));          // ddddd
+            assert(isLowPredicateRegister(id->idReg2()));    // ggg
+            assert(isGeneralRegisterOrZR(id->idReg3()));     // mmmmm
+            assert(isValidScalarDatasize(id->idOpSize()));
+            break;
+
+        // Scalable, .H, .S or .D
+        case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long
+        case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));             // ddddd
+            assert(isLowPredicateRegister(id->idReg2()));       // ggg
+            assert(isVectorRegister(id->idReg3()));             // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        // Scalable, possibly fixed to .S
+        case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated)
+            switch (id->idIns())
+            {
+                case INS_sve_sqabs:
+                case INS_sve_sqneg:
+                    assert(insOptsScalableStandard(id->idInsOpt()));
+                    break;
+
+                default:
+                    assert(id->idInsOpt() == INS_OPTS_SCALABLE_S);
+                    break;
+            }
+            assert(isVectorRegister(id->idReg1()));       // ddddd
+            assert(isLowPredicateRegister(id->idReg2())); // ggg
+            assert(isVectorRegister(id->idReg3()));       // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(id->idReg1())); // nnnn
+            assert(isVectorRegister(id->idReg2())); // ddddd
+            assert(isEvenRegister(id->idReg2()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter)
+            assert(id->idOpSize() == EA_8BYTE);
+
+            FALLTHROUGH;
+        case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
+        case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            assert(isGeneralRegister(id->idReg1()));                          // ddddd
+            assert(isPredicateRegister(id->idReg2()));                        // MMMM
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            break;
+
+        case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
+        case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt())); // xx
+            assert(isVectorRegister(id->idReg1()));             // ddddd
+            assert(isPredicateRegister(id->idReg2()));          // MMMM
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise
+            break;
+
+        case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
+            assert(isPredicateRegister(id->idReg1())); // NNNN
+            break;
+
+        case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isGeneralRegister(id->idReg1()));        // nnnnn
+            assert(isGeneralRegister(id->idReg2()));        // mmmmm
+            assert(isValidGeneralDatasize(id->idOpSize())); // x
+            break;
+
+        case IF_SVE_FZ_2A: // ................ ......nnnn.ddddd -- SME2 multi-vec extract narrow
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1())); // ddddd
+            assert(isVectorRegister(id->idReg2())); // nnnn
+            assert(isEvenRegister(id->idReg2()));
+            break;
+
+        case IF_SVE_HG_2A: // ................ ......nnnn.ddddd -- SVE2 FP8 downconverts
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1())); // ddddd
+            assert(isVectorRegister(id->idReg2())); // nnnn
+            assert(isEvenRegister(id->idReg2()));
+            break;
+
+        case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1())); // nnnnn
+            assert(isVectorRegister(id->idReg2())); // ddddd
+            assert(optGetSveElemsize(id->idInsOpt()) != EA_8BYTE);
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+                                                                              // x
+            break;
+
+        case IF_SVE_BB_2A: // ...........nnnnn .....iiiiiiddddd -- SVE stack frame adjustment
+            assert(insOptsNone(id->idInsOpt()));
+            assert(id->idOpSize() == EA_8BYTE);
+            assert(isGeneralRegisterOrZR(id->idReg1())); // ddddd
+            assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn
+            assert(isValidSimm<6>(emitGetInsSC(id)));    // iiiiii
+            break;
+
+        case IF_SVE_BC_1A: // ................ .....iiiiiiddddd -- SVE stack frame size
+            assert(insOptsNone(id->idInsOpt()));
+            assert(id->idOpSize() == EA_8BYTE);
+            assert(isGeneralRegister(id->idReg1()));  // ddddd
+            assert(isValidSimm<6>(emitGetInsSC(id))); // iiiiii
+            break;
+
+        case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm
+        {
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isVectorRegister(id->idReg2()));                           // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx xx
+            imm = emitGetInsSC(id);
+
+            switch (id->idInsOpt())
+            {
+                case INS_OPTS_SCALABLE_B:
+                    assert(isValidUimmFrom1<3>(imm)); // iii
+                    break;
+
+                case INS_OPTS_SCALABLE_H:
+                    assert(isValidUimmFrom1<4>(imm)); // xiii
+                    break;
+
+                case INS_OPTS_SCALABLE_S:
+                    assert(isValidUimmFrom1<5>(imm)); // xxiii
+                    break;
+
+                case INS_OPTS_SCALABLE_D:
+                    assert(isValidUimmFrom1<6>(imm)); // xx xiii
+                    break;
+
+                default:
+                    unreached();
+                    break;
+            }
+            break;
+        }
+
+        case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate
+                           // increment)
+        {
+            ssize_t imm1;
+            ssize_t imm2;
+            insSveDecodeTwoSimm5(emitGetInsSC(id), &imm1, &imm2);
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isValidSimm<5>(imm1));                                     // iiiii
+            assert(isValidSimm<5>(imm2));                                     // iiiii
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+        }
+
+        case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register
+                           // increment)
+        case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate
+                           // increment)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isValidSimm<5>(emitGetInsSC(id)));                         // iiiii
+            assert(isIntegerRegister(id->idReg2()));                          // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+
+        case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long
+        {
+            assert(insOptsScalableWide(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isVectorRegister(id->idReg2()));                           // nnnnn
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // x xx
+            imm = emitGetInsSC(id);
+
+            switch (id->idInsOpt())
+            {
+                case INS_OPTS_SCALABLE_B:
+                    assert(isValidUimm<3>(imm)); // iii
+                    break;
+
+                case INS_OPTS_SCALABLE_H:
+                    assert(isValidUimm<4>(imm)); // x iii
+                    break;
+
+                case INS_OPTS_SCALABLE_S:
+                    assert(isValidUimm<5>(imm)); // xx iii
+                    break;
+
+                default:
+                    unreached();
+                    break;
+            }
+            break;
+        }
+
+        case IF_SVE_GB_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift right narrow
+        {
+            assert(insOptsScalableWide(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isVectorRegister(id->idReg2()));                           // nnnnn
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // x xx
+            imm = emitGetInsSC(id);
+
+            switch (id->idInsOpt())
+            {
+                case INS_OPTS_SCALABLE_B:
+                    assert(isValidUimmFrom1<3>(imm)); // iii
+                    break;
+
+                case INS_OPTS_SCALABLE_H:
+                    assert(isValidUimmFrom1<4>(imm)); // x iii
+                    break;
+
+                case INS_OPTS_SCALABLE_S:
+                    assert(isValidUimmFrom1<5>(imm)); // xx iii
+                    break;
+
+                default:
+                    unreached();
+                    break;
+            }
+            break;
+        }
+
+        case IF_SVE_FV_2A: // ........xx...... .....rmmmmmddddd -- SVE2 complex integer add
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isVectorRegister(id->idReg2()));                           // nnnnn
+            assert(emitIsValidEncodedRotationImm90_or_270(emitGetInsSC(id))); // r
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+
+        case IF_SVE_FY_3A: // .........x.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long with carry
+            assert(insOptsScalableWords(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isVectorRegister(id->idReg2()));                           // nnnnn
+            assert(isVectorRegister(id->idReg3()));                           // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // x
+            break;
+
+        case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1())); // ddddd
+            assert(isVectorRegister(id->idReg2())); // mmmmm
+            if (id->idInsOpt() == INS_OPTS_SCALABLE_S)
+            {
+                assert(id->idIns() == INS_sve_sm4e);
+            }
+            else
+            {
+                assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
+            }
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1())); // ddddd
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare
+            assert(id->idOpSize() == EA_8BYTE);
+
+            FALLTHROUGH;
+        case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isPredicateRegister(id->idReg1()));                        // DDDD
+            assert(isGeneralRegister(id->idReg2()));                          // nnnnn
+            assert(isValidGeneralDatasize(id->idOpSize()));                   // X
+            assert(isGeneralRegister(id->idReg3()));                          // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+
+        case IF_SVE_DV_4A: // ........ix.xxxvv ..NNNN.MMMM.DDDD -- SVE broadcast predicate element
+        {
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isPredicateRegister(id->idReg1())); // DDDD
+            assert(isPredicateRegister(id->idReg2())); // NNNN
+            assert(isPredicateRegister(id->idReg3())); // MMMM
+            assert(isGeneralRegister(id->idReg4()));   // vv
+            assert((REG_R12 <= id->idReg4()) && (id->idReg4() <= REG_R15));
+            imm = emitGetInsSC(id);
+
+            switch (id->idInsOpt())
+            {
+                case INS_OPTS_SCALABLE_B:
+                    assert(isValidUimm<4>(imm));
+                    break;
+
+                case INS_OPTS_SCALABLE_H:
+                    assert(isValidUimm<3>(imm));
+                    break;
+
+                case INS_OPTS_SCALABLE_S:
+                    assert(isValidUimm<2>(imm));
+                    break;
+
+                case INS_OPTS_SCALABLE_D:
+                    assert(isValidUimm<1>(imm));
+                    break;
+
+                default:
+                    unreached();
+                    break;
+            }
+
+            break;
+        }
+
+        case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
+            assert(isValidUimm<1>(emitGetInsSC(id))); // i
+
+            FALLTHROUGH;
+        case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isPredicateRegister(id->idReg1()));                        // DDDD
+            assert(isHighPredicateRegister(id->idReg2()));                    // NNN
+            assert(isValidUimm<2>(emitGetInsSC(id)));                         // ii
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+
+        case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate
+                           // pair)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isLowPredicateRegister(id->idReg1()));                     // DDD
+            assert(isGeneralRegister(id->idReg2()));                          // nnnnn
+            assert(isGeneralRegister(id->idReg3()));                          // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+
+        case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit
+                           // (predicate-as-counter)
+            assert(insOptsScalableStandard(id->idInsOpt()));                  // L
+            assert(isHighPredicateRegister(id->idReg1()));                    // DDD
+            assert(isGeneralRegister(id->idReg2()));                          // nnnnn
+            assert(isGeneralRegister(id->idReg3()));                          // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+
+        case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isHighPredicateRegister(id->idReg1()));                    // DDD
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+
+        case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated)
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isValidUimm<8>(emitGetInsSC(id)));                         // iiiiiiii
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+
+        case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated)
+            imm = emitGetInsSC(id);
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            assert(isValidSimm<8>(imm));                                      // iiiiiiii
+            break;
+
+        case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated)
+            imm = emitGetInsSC(id);
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            assert(isValidUimm<8>(imm));                                      // iiiiiiii
+            break;
+
+        case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+
+        case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                                       // ddddd
+            assert(isValidSimm<8>(emitGetInsSC(id)) || isValidUimm<8>(emitGetInsSC(id))); // iiiiiiii
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt())));             // xx
+            break;
+
+        case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isValidSimm<8>(emitGetInsSC(id)));                         // iiiiiiii
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+
+        case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product
+            assert(insOptsScalableWords(id->idInsOpt()));
+
+            FALLTHROUGH;
+        case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));                           // ddddd
+            assert(isVectorRegister(id->idReg2()));                           // nnnnn
+            assert(emitIsValidEncodedRotationImm0_to_270(emitGetInsSC(id)));  // rr
+            assert(isVectorRegister(id->idReg3()));                           // mmmmm
+            assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx
+            break;
+
+        case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
+        case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
+        case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1())); // ddddd
+            assert(isVectorRegister(id->idReg2())); // nnnnn
+            assert(isVectorRegister(id->idReg3())); // mmm
+            assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7));
+            assert(isValidUimm<4>(emitGetInsSC(id))); // ii rr
+            break;
+
+        case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
+        case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
+        case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
+        case IF_SVE_GV_3A: // ...........immmm ....rrnnnnnddddd -- SVE floating-point complex multiply-add (indexed)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));    // ddddd
+            assert(isVectorRegister(id->idReg2()));    // nnnnn
+            assert(isLowVectorRegister(id->idReg3())); // mmm
+            assert(isValidUimm<3>(emitGetInsSC(id)));  // i rr
+            break;
+
+        case IF_SVE_IH_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
+                             // immediate)
+        case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
+                             // immediate)
+        case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
+                             // immediate)
+        case IF_SVE_IJ_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate)
+        case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
+                             // immediate)
+        case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
+                             // immediate)
+        case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
+                             // immediate)
+        case IF_SVE_IM_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus
+                             // immediate)
+        case IF_SVE_IO_3A:   // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus
+                             // immediate)
+        case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
+                           // immediate)
+        case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate)
+        case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
+                           // immediate)
+        case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
+                           // immediate)
+        case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+        case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+        case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate)
+            assert(insOptsScalable(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isScalableVectorSize(id->idOpSize()));
+
+            switch (id->idIns())
+            {
+                case INS_sve_ld2b:
+                case INS_sve_ld2h:
+                case INS_sve_ld2w:
+                case INS_sve_ld2d:
+                case INS_sve_ld2q:
+                case INS_sve_st2b:
+                case INS_sve_st2h:
+                case INS_sve_st2w:
+                case INS_sve_st2d:
+                case INS_sve_st2q:
+                    assert((isValidSimm_MultipleOf<4, 2>(emitGetInsSC(id)))); // iiii
+                    break;
+
+                case INS_sve_ld3b:
+                case INS_sve_ld3h:
+                case INS_sve_ld3w:
+                case INS_sve_ld3d:
+                case INS_sve_ld3q:
+                case INS_sve_st3b:
+                case INS_sve_st3h:
+                case INS_sve_st3w:
+                case INS_sve_st3d:
+                case INS_sve_st3q:
+                    assert((isValidSimm_MultipleOf<4, 3>(emitGetInsSC(id)))); // iiii
+                    break;
+
+                case INS_sve_ld4b:
+                case INS_sve_ld4h:
+                case INS_sve_ld4w:
+                case INS_sve_ld4d:
+                case INS_sve_ld4q:
+                case INS_sve_st4b:
+                case INS_sve_st4h:
+                case INS_sve_st4w:
+                case INS_sve_st4d:
+                case INS_sve_st4q:
+                    assert((isValidSimm_MultipleOf<4, 4>(emitGetInsSC(id)))); // iiii
+                    break;
+
+                case INS_sve_ld1rqb:
+                case INS_sve_ld1rqd:
+                case INS_sve_ld1rqh:
+                case INS_sve_ld1rqw:
+                    assert((isValidSimm_MultipleOf<4, 16>(emitGetInsSC(id)))); // iiii
+                    break;
+
+                case INS_sve_ld1rob:
+                case INS_sve_ld1rod:
+                case INS_sve_ld1roh:
+                case INS_sve_ld1row:
+                    assert((isValidSimm_MultipleOf<4, 32>(emitGetInsSC(id)))); // iiii
+                    break;
+
+                default:
+                    assert(isValidSimm<4>(emitGetInsSC(id))); // iiii
+                    break;
+            }
+            break;
+
+        case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+            assert(isVectorRegister(id->idReg1()));       // ttttt
+            assert(isPredicateRegister(id->idReg2()));    // ggg
+            assert(isGeneralRegister(id->idReg3()));      // nnnnn
+            assert(isGeneralRegister(id->idReg4()));      // mmmmm
+            assert(isScalableVectorSize(id->idOpSize())); // xx
+            // st1h is reserved for scalable B
+            assert((id->idIns() == INS_sve_st1h) ? insOptsScalableAtLeastHalf(id->idInsOpt())
+                                                 : insOptsScalableStandard(id->idInsOpt()));
+            break;
+
+        case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+            assert(insOptsScalableWords(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));       // ttttt
+            assert(isPredicateRegister(id->idReg2()));    // ggg
+            assert(isGeneralRegister(id->idReg3()));      // nnnnn
+            assert(isGeneralRegister(id->idReg4()));      // mmmmm
+            assert(isScalableVectorSize(id->idOpSize())); // x
+            break;
+
+        case IF_SVE_JJ_4A:   // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
+                           // offsets)
+        case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit
+                             // unscaled offsets)
+            assert(insOptsScalable32bitExtends(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+            imm = emitGetInsSC(id);
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));       // ttttt
+            assert(isPredicateRegister(id->idReg2()));    // ggg
+            assert(isGeneralRegister(id->idReg3()));      // nnnnn
+            assert(isScalableVectorSize(id->idOpSize())); // xx
+            assert(isValidSimm<4>(imm));                  // iiii
+            break;
+
+        case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+            imm = emitGetInsSC(id);
+            assert(insOptsScalableWords(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));       // ttttt
+            assert(isPredicateRegister(id->idReg2()));    // ggg
+            assert(isGeneralRegister(id->idReg3()));      // nnnnn
+            assert(isScalableVectorSize(id->idOpSize())); // x
+            assert(isValidSimm<4>(imm));                  // iiii
+            break;
+
+        case IF_SVE_HW_4A:   // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_IU_4A:   // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+            assert(insOptsScalable32bitExtends(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isVectorRegister(id->idReg4()));    // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_HW_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isVectorRegister(id->idReg4()));    // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_IF_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
+                             // scalar)
+        case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
+                             // scalar)
+            assert(insOptsScalableWords(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));      // ttttt
+            assert(isPredicateRegister(id->idReg2()));   // ggg
+            assert(isVectorRegister(id->idReg3()));      // nnnnn
+            assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar)
+        case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));      // ttttt
+            assert(isPredicateRegister(id->idReg2()));   // ggg
+            assert(isGeneralRegister(id->idReg3()));     // nnnnn
+            assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isGeneralRegister(id->idReg4()));   // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q);
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isGeneralRegister(id->idReg4()));   // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
+            assert(insOptsScalableWordsOrQuadwords(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isGeneralRegister(id->idReg4()));   // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_IK_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isGeneralRegister(id->idReg4()));   // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
+                           // scalar)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q);
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isGeneralRegister(id->idReg4()));   // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_IU_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isVectorRegister(id->idReg4()));    // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar)
+        case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q);
+            assert(isVectorRegister(id->idReg1()));      // ttttt
+            assert(isPredicateRegister(id->idReg2()));   // ggg
+            assert(isVectorRegister(id->idReg3()));      // nnnnn
+            assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus
+                           // scalar)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(id->idReg1()));      // ttttt
+            assert(isPredicateRegister(id->idReg2()));   // ggg
+            assert(isVectorRegister(id->idReg3()));      // nnnnn
+            assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_IZ_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
+                             // scalar)
+        case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
+                             // scalar)
+        case IF_SVE_JA_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus
+                             // scalar)
+            assert(insOptsScalableWords(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));      // ttttt
+            assert(isPredicateRegister(id->idReg2()));   // ggg
+            assert(isVectorRegister(id->idReg3()));      // nnnnn
+            assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_JD_4C:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+        case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+            assert(insOptsScalableDoubleWordsOrQuadword(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isGeneralRegister(id->idReg4()));   // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
+                           // scalar)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q);
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isGeneralRegister(id->idReg4()));   // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar)
+        case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar)
+        case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar)
+        case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
+                           // scalar)
+        case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar)
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isGeneralRegister(id->idReg4()));   // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_JJ_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
+                           // offsets)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
+            assert(isVectorRegister(id->idReg1()));    // ttttt
+            assert(isPredicateRegister(id->idReg2())); // ggg
+            assert(isGeneralRegister(id->idReg3()));   // nnnnn
+            assert(isVectorRegister(id->idReg4()));    // mmmmm
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated)
+            imm = emitGetInsSC(id);
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            assert(emitIsValidEncodedRotationImm90_or_270(imm));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated)
+            imm = emitGetInsSC(id);
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            assert(isVectorRegister(id->idReg4()));
+            assert(emitIsValidEncodedRotationImm0_to_270(imm));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+            assert(isPredicateRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate
+                           // (predicated)
+            imm = emitGetInsSC(id);
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(emitIsValidEncodedSmallFloatImm(imm));
+            break;
+
+        case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient
+            imm = emitGetInsSC(id);
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isValidUimm<3>(imm));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            assert(isVectorRegister(id->idReg4()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing
+                           // multiplicand
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            assert(isVectorRegister(id->idReg4()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register
+        case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isPredicateRegister(id->idReg1()));   // TTTT
+            assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn
+            assert(isValidSimm<9>(emitGetInsSC(id)));    // iii
+            break;
+
+        case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register
+        case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isVectorRegister(id->idReg1()));      // ttttt
+            assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn
+            assert(isValidSimm<9>(emitGetInsSC(id)));    // iii
+            break;
+
+        case IF_SVE_GG_3A: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit
+                           // element size
+            assert(isVectorRegister(id->idReg1()));   // ddddd
+            assert(isVectorRegister(id->idReg2()));   // nnnnn
+            assert(isVectorRegister(id->idReg3()));   // mmmmm
+            assert(isValidUimm<2>(emitGetInsSC(id))); // ii
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
+            break;
+
+        case IF_SVE_GH_3B:   // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit
+                             // element size
+        case IF_SVE_GH_3B_B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit
+                             // element size
+            assert(isVectorRegister(id->idReg1()));   // ddddd
+            assert(isVectorRegister(id->idReg2()));   // nnnnn
+            assert(isVectorRegister(id->idReg3()));   // mmmmm
+            assert(isValidUimm<2>(emitGetInsSC(id))); // ii
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_H);
+            break;
+
+        case IF_SVE_GG_3B: // ........ii.mmmmm ...i..nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit
+                           // element size
+            assert(isVectorRegister(id->idReg1()));   // ddddd
+            assert(isVectorRegister(id->idReg2()));   // nnnnn
+            assert(isVectorRegister(id->idReg3()));   // mmmmm
+            assert(isValidUimm<3>(emitGetInsSC(id))); // ii
+                                                      // i
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_H);
+            break;
+
+        case IF_SVE_GH_3A: // ........i..mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit
+                           // element size
+            assert(insOptsScalable(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));   // ddddd
+            assert(isVectorRegister(id->idReg2()));   // nnnnn
+            assert(isVectorRegister(id->idReg3()));   // mmmmm
+            assert(isValidUimm<1>(emitGetInsSC(id))); // i
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
+            break;
+
+        case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled
+                           // offsets)
+        case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit
+                             // scaled offsets)
+            assert(insOptsScalable32bitExtends(id->idInsOpt()));
+            assert(isLowPredicateRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled
+                           // offsets)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
+            assert(isLowPredicateRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar)
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isLowPredicateRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate)
+            assert(insOptsScalableWords(id->idInsOpt()));
+            assert(isLowPredicateRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate)
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isLowPredicateRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate)
+            assert(insOptsScalableWords(id->idInsOpt()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            assert(isValidUimm<5>(emitGetInsSC(id)));
+            break;
+
+        case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate)
+            assert(insOptsScalableWords(id->idInsOpt()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            break;
+
+        case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            break;
+
+        case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate)
+            assert(insOptsScalableWords(id->idInsOpt()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            break;
+
+        case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate)
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            assert((isValidUimm_MultipleOf<5, 8>(emitGetInsSC(id))));
+            break;
+
+        case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_D);
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            break;
+
+        case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+            assert(insOptsScalableWords(id->idInsOpt()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            break;
+
+        case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            break;
+
+        case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isLowPredicateRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            break;
+
+        case IF_SVE_BI_2A: // ................ ......nnnnnddddd -- SVE constructive prefix (unpredicated)
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            break;
+
+        case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_H);
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            break;
+
+        case IF_SVE_CB_2A: // ........xx...... ......nnnnnddddd -- SVE broadcast general register
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isGeneralRegisterOrZR(id->idReg2())); // ZR is SP
+            break;
+
+        case IF_SVE_CG_2A: // ........xx...... ......nnnnnddddd -- SVE reverse vector elements
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            break;
+
+        case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator
+        case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
+        case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated)
+            assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            break;
+
+        case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated)
+        case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert
+        case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate
+            imm = emitGetInsSC(id);
+            assert(isValidVectorShiftAmount(imm, optGetSveElemsize(id->idInsOpt()),
+                                            emitInsIsVectorRightShift(id->idIns())));
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            break;
+
+        case IF_SVE_BW_2A: // ........ii.xxxxx ......nnnnnddddd -- SVE broadcast indexed element
+            imm = emitGetInsSC(id);
+            assert(insOptsScalable(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isValidBroadcastImm(imm, optGetSveElemsize(id->idInsOpt())));
+            break;
+
+        case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i
+            imm = emitGetInsSC(id);
+            assert(insOptsScalableStandard(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            switch (id->idInsOpt())
+            {
+                case INS_OPTS_SCALABLE_B:
+                    assert(isValidUimm<4>(imm));
+                    break;
+
+                case INS_OPTS_SCALABLE_H:
+                    assert(isValidUimm<3>(imm));
+                    break;
+
+                case INS_OPTS_SCALABLE_S:
+                    assert(isValidUimm<2>(imm));
+                    break;
+
+                case INS_OPTS_SCALABLE_D:
+                    assert(isValidUimm<1>(imm));
+                    break;
+
+                default:
+                    break;
+            }
+            break;
+
+        case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq
+            imm = emitGetInsSC(id);
+            assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isScalableVectorSize(id->idOpSize()));
+            assert(isValidUimm<4>(imm));
+            break;
+
+        default:
+            printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
+            assert(!"Unexpected format");
+            break;
+    }
+}
+#endif // DEBUG
+
+//--------------------------------------------------------------------
+// emitDispInsSveHelp: Dump the given SVE instruction to jitstdout.
+//
+// Arguments:
+//   id - The instruction
+//
+void emitter::emitDispInsSveHelp(instrDesc* id)
+{
+    instruction ins  = id->idIns();
+    insFormat   fmt  = id->idInsFmt();
+    emitAttr    size = id->idOpSize();
+
+    switch (fmt)
+    {
+        ssize_t    imm;
+        bitMaskImm bmi;
+
+        //  <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+        case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated)
+        case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated)
+        case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated)
+        case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated)
+        case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated)
+        case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated)
+        case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated)
+        case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic
+        case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract
+        case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left
+                           // (predicated)
+        case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations
+        case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated)
+        // <Zdn>.D, <Pg>/M, <Zdn>.D, <Zm>.D
+        case IF_SVE_AB_3B: // ................ ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated)
+        // <Zdn>.H, <Pg>/M, <Zdn>.H, <Zm>.H
+        case IF_SVE_HL_3B: // ................ ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
+            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                   // mmmmm
+            break;
+
+        // <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T>
+        case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated)
+        {
+            PredicateType ptype = (id->idPredicateReg2Merge()) ? PREDICATE_MERGE : PREDICATE_ZERO;
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                 // nnnnn
+            emitDispLowPredicateReg(id->idReg2(), ptype, id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                // ddddd
+            break;
+        }
+
+        // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+        case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
+            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
+            emitDispImm(emitGetInsSC(id), false);                                                  // iiii
+            break;
+
+        // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D
+        case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
+            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
+            emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false);                              // mmmmm
+            break;
+
+        // <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+        // <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+        case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend
+                           // (predicated)
+        case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand
+                           // (predicated)
+        case IF_SVE_HU_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend
+        // <Zd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+        case IF_SVE_GI_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE2 histogram generation (vector)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
+            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg4(), id->idInsOpt(), false);
+            break;
+
+        // <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+        case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated)
+        case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
+        case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high
+                           // (unpredicated)
+        case IF_SVE_FP_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved
+        case IF_SVE_FQ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute
+        case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient
+        case IF_SVE_BR_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector segments
+        case IF_SVE_CA_3A: // ........xx.mmmmm ......nnnnnddddd -- sve_int_perm_tbxquads
+        case IF_SVE_EV_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp
+        case IF_SVE_GW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE FP clamp
+        case IF_SVE_HK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated)
+        // <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+        case IF_SVE_EM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add high
+        case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate
+        // <Zd>.Q, <Zn>.Q, <Zm>.Q
+        case IF_SVE_BR_3B: // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments
+        // <Zda>.D, <Zn>.D, <Zm>.D
+        case IF_SVE_HD_3A_A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate
+        // <Zd>.D, <Zn>.D, <Zm>.D
+        case IF_SVE_AT_3B: // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated)
+        case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated)
+        // <Zd>.B, <Zn>.B, <Zm>.B
+        case IF_SVE_GF_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment)
+        case IF_SVE_BD_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
+        // <Zd>.D, <Zn>.D, <Zm>.D
+        // <Zd>.S, <Zn>.S, <Zm>.S
+        case IF_SVE_GJ_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 crypto constructive binary operations
+        // <Zd>.H, <Zn>.H, <Zm>.H
+        case IF_SVE_GW_3B: // ...........mmmmm ......nnnnnddddd -- SVE FP clamp
+        case IF_SVE_HK_3B: // ...........mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);  // nnnnn/mmmmm
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm/aaaaa
+            break;
+
+        // <Zda>.D, <Zn>.D, <Zm>.D
+        case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer)
+        // <Zdn>.D, <Zm>.D, <Za>.D
+        case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer)
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true);  // ddddd
+            emitDispSveReg(id->idReg2(), INS_OPTS_SCALABLE_D, true);  // nnnnn
+            emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm
+            break;
+
+        // <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+        case IF_SVE_AV_3A: // ...........mmmmm ......kkkkkddddd -- SVE2 bitwise ternary operations
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);  // mmmmm
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // kkkkk
+            break;
+
+        // <Zd>.<T>, #<imm1>, #<imm2>
+        case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate
+                           // increment)
+        {
+            ssize_t imm1;
+            ssize_t imm2;
+            insSveDecodeTwoSimm5(emitGetInsSC(id), &imm1, &imm2);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispImm(imm1, true);                            // iiiii
+            emitDispImm(imm2, false);                           // iiiii
+            break;
+        }
+
+        // <Zd>.<T>, #<imm>, <R><m>
+        case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register
+                           // increment)
+        {
+            const emitAttr intRegSize = (id->idInsOpt() == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE;
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispImm(emitGetInsSC(id), true);                // iiiii
+            emitDispReg(id->idReg2(), intRegSize, false);       // mmmmm
+            break;
+        }
+
+        // <Zd>.<T>, <R><n>, #<imm>
+        case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate
+                           // increment)
+        {
+            const emitAttr intRegSize = (id->idInsOpt() == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE;
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispReg(id->idReg2(), intRegSize, true);        // mmmmm
+            emitDispImm(emitGetInsSC(id), false);               // iiiii
+            break;
+        }
+
+        // <Zda>.H, <Zn>.B, <Zm>.B
+        case IF_SVE_GN_3A:   // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long
+        case IF_SVE_HA_3A_E: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_H, true); // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);     // mmmmm
+            break;
+
+        // <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+        // <Zd>.<T>, {<Zn>.<T>}, <Zm>.<T>
+        case IF_SVE_BZ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            if (id->idIns() == INS_sve_tbl)
+            {
+                emitDispSveConsecutiveRegList(id->idReg2(), 1, id->idInsOpt(), true); // nnnnn
+            }
+            else
+            {
+                assert(id->idIns() == INS_sve_tbx);
+                emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn
+            }
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm
+            break;
+
+        // <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+        // <Zd>.<T>, {<Zn>.<T>}, <Zm>.<T>
+        case IF_SVE_EX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector elements (quadwords)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            if (id->idIns() == INS_sve_tblq)
+            {
+                emitDispSveConsecutiveRegList(id->idReg2(), 1, id->idInsOpt(), true); // nnnnn
+            }
+            else
+            {
+                emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn
+            }
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm
+            break;
+
+        // <Zd>.<T>, {<Zn1>.<T>, <Zn2>.<T>}, <Zm>.<T>
+        case IF_SVE_BZ_3A_A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                   // ddddd
+            emitDispSveConsecutiveRegList(id->idReg2(), 2, id->idInsOpt(), true); // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                  // mmmmm
+            break;
+
+        // <Zd>.<T>, <R><n>, <R><m>
+        case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register
+                           // increment)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispReg(id->idReg2(), size, true);              // nnnnn
+            emitDispReg(id->idReg3(), size, false);             // mmmmm
+            break;
+
+        // <Xd>{, <pattern>{, MUL #<imm>}}
+        case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count
+        // <Xdn>{, <pattern>{, MUL #<imm>}}
+        case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count
+            imm = emitGetInsSC(id);
+            emitDispReg(id->idReg1(), size, true);             // ddddd
+            emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp
+            if (imm > 1)
+            {
+                printf("mul ");
+                emitDispImm(imm, false, false); // iiii
+            }
+            break;
+
+        // <Zdn>.D{, <pattern>{, MUL #<imm>}}
+        // <Zdn>.H{, <pattern>{, MUL #<imm>}}
+        // <Zdn>.S{, <pattern>{, MUL #<imm>}}
+        case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count
+        case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispSvePattern(id->idSvePattern(), (imm > 1));  // ppppp
+            if (imm > 1)
+            {
+                printf("mul ");
+                emitDispImm(imm, false, false); // iiii
+            }
+            break;
+
+        // <Zdn>.<T>, <Zdn>.<T>, #<const>
+        case IF_SVE_BS_1A: // ..............ii iiiiiiiiiiiddddd -- SVE bitwise logical with immediate (unpredicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+
+            FALLTHROUGH;
+        // <Zd>.<T>, #<const>
+        case IF_SVE_BT_1A: // ..............ii iiiiiiiiiiiddddd -- SVE broadcast bitmask immediate
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            bmi.immNRS = (unsigned)emitGetInsSC(id);
+            imm        = emitDecodeBitMaskImm(bmi, optGetSveElemsize(id->idInsOpt()));
+            emitDispImm(imm, false); // iiiiiiiiiiiii
+            break;
+
+        // <Xdn>, <Wdn>{, <pattern>{, MUL #<imm>}}
+        // <Xdn>{, <pattern>{, MUL #<imm>}}
+        // <Wdn>{, <pattern>{, MUL #<imm>}}
+        case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count
+            switch (id->idIns())
+            {
+                case INS_sve_sqincb:
+                case INS_sve_sqdecb:
+                case INS_sve_sqinch:
+                case INS_sve_sqdech:
+                case INS_sve_sqincw:
+                case INS_sve_sqdecw:
+                case INS_sve_sqincd:
+                case INS_sve_sqdecd:
+                    emitDispReg(id->idReg1(), EA_8BYTE, true); // ddddd
+
+                    if (size == EA_4BYTE)
+                    {
+                        emitDispReg(id->idReg1(), EA_4BYTE, true);
+                    }
+                    break;
+
+                default:
+                    emitDispReg(id->idReg1(), size, true); // ddddd
+                    break;
+            }
+
+            imm = emitGetInsSC(id);
+            emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp
+            if (imm > 1)
+            {
+                printf("mul ");
+                emitDispImm(imm, false, false); // iiii
+            }
+            break;
+
+        // <Zd>.B, {<Zn1>.B, <Zn2>.B }, #<imm>
+        case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive)
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);           // ddddd
+            emitDispVectorRegList(id->idReg2(), 2, id->idInsOpt(), true); // nnnnn
+            emitDispImm(imm, false);                                      // iiiii iii
+            break;
+
+        // <Zdn>.B, <Zdn>.B, <Zm>.B, #<imm>
+        case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive)
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // mmmmm
+            emitDispImm(imm, false);                            // iiiii iii
+            break;
+
+        // <Zd>.<T>, <Pg>/M, #<const>
+        case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                           // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(id->idInsFmt()), INS_OPTS_NONE, true); // gggg
+            emitDispFloatImm(emitGetInsSC(id));                                                           // iiiiiiii
+            break;
+
+        // <Zd>.<T>, <Zn>.<T>, <Zm>.D
+        case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);       // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);       // nnnnn
+            emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm
+            break;
+
+        // <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>{, <mod> <amount>}]
+        case IF_SVE_BH_3A: // .........x.mmmmm ....hhnnnnnddddd -- SVE address generation
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            printf("[");
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), emitGetInsSC(id) > 0);
+            emitDispSveExtendOptsModN(INS_OPTS_LSL, emitGetInsSC(id));
+            printf("]");
+            break;
+
+        // <Zd>.D, [<Zn>.D, <Zm>.D, SXTW{ <amount>}]
+        case IF_SVE_BH_3B: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            printf("[");
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);
+            emitDispSveExtendOptsModN(INS_OPTS_SXTW, emitGetInsSC(id));
+            printf("]");
+            break;
+
+        // <Zd>.D, [<Zn>.D, <Zm>.D, UXTW{ <amount>}]
+        case IF_SVE_BH_3B_A: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            printf("[");
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);
+            emitDispSveExtendOptsModN(INS_OPTS_UXTW, emitGetInsSC(id));
+            printf("]");
+            break;
+
+        // <Zdn>.<T>, <V><m>
+        case IF_SVE_CC_2A: // ........xx...... ......mmmmmddddd -- SVE insert SIMD&FP scalar register
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                  // ddddd
+            emitDispReg(id->idReg2(), optGetSveElemsize(id->idInsOpt()), false); // mmmmm
+            break;
+
+        // <Zdn>.<T>, <R><m>
+        case IF_SVE_CD_2A: // ........xx...... ......mmmmmddddd -- SVE insert general register
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                            // ddddd
+            emitDispReg(id->idReg2(), id->idInsOpt() == INS_OPTS_SCALABLE_D ? EA_8BYTE : EA_4BYTE, false); // mmmmm
+            break;
+
+        // <Pd>.H, <Pn>.B
+        case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_H, true);  // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_B, false); // NNNN
+            break;
+
+        // <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T>
+        case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
+            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                   // mmmmm
+            break;
+
+        // <V><dn>, <Pg>, <V><dn>, <Zm>.<T>
+        // <R><dn>, <Pg>, <R><dn>, <Zm>.<T>
+        case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar
+        case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register
+        case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated)
+            emitDispReg(id->idReg1(), size, true);                                                 // ddddd
+            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispReg(id->idReg1(), size, true);                                                 // ddddd
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                   // mmmmm
+            break;
+
+        // <V><d>, <Pg>, <Zn>.<T>
+        // <R><d>, <Pg>, <Zn>.<T>
+        case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated)
+        case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated)
+        case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register
+        case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register
+        case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction
+            emitDispReg(id->idReg1(), size, true);                                              // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                // mmmmm
+            break;
+
+        // <Vd>.<T>, <Pg>, <Zn>.<Tb>
+        case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords)
+        case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords)
+        case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords)
+        case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords)
+            emitDispVectorReg(id->idReg1(), optSveToQuadwordElemsizeArrangement(id->idInsOpt()), true); // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);         // ggg
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                        // mmmmm
+            break;
+
+        // <Dd>, <Pg>, <Zn>.<T>
+        case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated)
+            emitDispReg(id->idReg1(), EA_8BYTE, true);                                          // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                // mmmmm
+            break;
+
+        // <Zd>.<T>, <Pg>/M, <Zn>.<T>
+        case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated)
+        case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated)
+        case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements
+        case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated)
+        case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value
+        case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                // mmmmm
+            break;
+
+        case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_B, true); // DDDD
+            emitDispSveReg(id->idReg2(), false);                                                     // nnnnn
+            break;
+        case IF_SVE_CE_2B: // .........i...ii. ......nnnnn.DDDD -- SVE move predicate from vector
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_D, true); // DDDD
+            emitDispSveRegIndex(id->idReg2(), emitGetInsSC(id), false);                              // nnnnn
+            break;
+        case IF_SVE_CE_2C: // ..............i. ......nnnnn.DDDD -- SVE move predicate from vector
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_H, true); // DDDD
+            emitDispSveRegIndex(id->idReg2(), emitGetInsSC(id), false);                              // nnnnn
+            break;
+        case IF_SVE_CE_2D: // .............ii. ......nnnnn.DDDD -- SVE move predicate from vector
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_S, true); // DDDD
+            emitDispSveRegIndex(id->idReg2(), emitGetInsSC(id), false);                              // nnnnn
+            break;
+        case IF_SVE_CF_2A:                      // ................ .......NNNNddddd -- SVE move predicate into vector
+            emitDispSveReg(id->idReg1(), true); // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_B, false); // NNNN
+            break;
+        case IF_SVE_CF_2B: // .........i...ii. .......NNNNddddd -- SVE move predicate into vector
+            emitDispSveRegIndex(id->idReg1(), emitGetInsSC(id), true);                                // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_D, false); // NNNN
+            break;
+        case IF_SVE_CF_2C: // ..............i. .......NNNNddddd -- SVE move predicate into vector
+            emitDispSveRegIndex(id->idReg1(), emitGetInsSC(id), true);                                // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_H, false); // NNNN
+            break;
+        case IF_SVE_CF_2D: // .............ii. .......NNNNddddd -- SVE move predicate into vector
+            emitDispSveRegIndex(id->idReg1(), emitGetInsSC(id), true);                                // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_S, false); // NNNN
+            break;
+
+        // <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+        case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // NNNN
+            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // MMMM
+            break;
+
+        // <Zd>.<T>, <Pg>, <Zn>.<T>
+        case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                // mmmmm
+            break;
+
+        // <Zd>.<T>, <Pg>/M, <V><n>
+        case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector
+                           // (predicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispReg(id->idReg3(), size, false);                                             // mmmmm
+            break;
+
+        // <Zd>.<T>, <Pg>/M, <R><n|SP>
+        case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispReg(encodingZRtoSP(id->idReg3()), size, false);                             // mmmmm
+            break;
+
+        // <Zd>.Q, <Pg>/M, <Zn>.Q
+        case IF_SVE_CT_3A: // ................ ...gggnnnnnddddd -- SVE reverse doublewords
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_Q, true);                            // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_Q, false);                           // nnnnn
+            break;
+
+        // <Zd>.<T>, <Pv>, {<Zn1>.<T>, <Zn2>.<T>}
+        case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                             // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);             // VVV
+            emitDispSveConsecutiveRegList(id->idReg3(), insGetSveReg1ListSize(ins), id->idInsOpt(), false); // nnnnn
+            break;
+
+        // <Zdn>.<T>, <Pv>, <Zdn>.<T>, <Zm>.<T>
+        case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // VVV
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                                // mmmmm
+            break;
+
+        // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T> or SEL <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T>
+        case IF_SVE_CW_4A: // ........xx.mmmmm ..VVVVnnnnnddddd -- SVE select vector elements (predicated)
+        {
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+
+            if (id->idIns() == INS_sve_mov)
+            {
+                emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, id->idInsOpt(), true); // VVVV
+                emitDispSveReg(id->idReg3(), id->idInsOpt(), false);                       // nnnnn
+            }
+            else
+            {
+                emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // VVVV
+                emitDispSveReg(id->idReg3(), id->idInsOpt(), true);                       // nnnnn
+                emitDispSveReg(id->idReg4(), id->idInsOpt(), false);                      // mmmmm
+            }
+            break;
+        }
+
+        // <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+        case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
+        case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
+        case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);                                    // nnnnn
+            emitDispSveReg(id->idReg4(), id->idInsOpt(), false);                                   // mmmmm
+            break;
+
+        // <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D
+        case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);                                    // nnnnn
+            emitDispSveReg(id->idReg4(), INS_OPTS_SCALABLE_D, false);                              // mmmmm
+            break;
+
+        // <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+        case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate
+        case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);                                    // nnnnn
+            emitDispImm(emitGetInsSC(id), false, (fmt == IF_SVE_CY_3B));                           // iiiii
+            break;
+
+        // <Zda>.S, <Zn>.H, <Zm>.H[<imm>]
+        case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed)
+        case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
+        case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
+        case IF_SVE_GZ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE floating-point multiply-add long (indexed)
+        // <Zda>.S, <Zn>.B, <Zm>.B[<imm>]
+        case IF_SVE_EY_3A:   // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed)
+        case IF_SVE_EZ_3A:   // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed)
+        case IF_SVE_GY_3B_D: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
+        // <Zd>.S, <Zn>.H, <Zm>.H[<imm>]
+        case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
+        case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
+        case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
+        // <Zda>.S, <Zn>.S, <Zm>.S[<imm>]
+        case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
+        case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
+        case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
+        case IF_SVE_FK_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed)
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);     // mmm
+            emitDispElementIndex(emitGetInsSC(id), false);           // ii/iii
+            break;
+
+        // <Zda>.S, <Zn>.H, <Zm>.H
+        case IF_SVE_EF_3A: // ...........mmmmm ......nnnnnddddd -- SVE two-way dot product
+        case IF_SVE_HA_3A: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product
+        case IF_SVE_HB_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating-point multiply-add long
+        case IF_SVE_HD_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate
+        case IF_SVE_EI_3A: // ...........mmmmm ......nnnnnddddd -- SVE mixed sign dot product
+        case IF_SVE_GO_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long long
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);     // mmmmm
+            break;
+
+        // <Zda>.S, <Zn>.B, <Zm>.B
+        case IF_SVE_HA_3A_F: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true);  // ddddd
+            emitDispSveReg(id->idReg2(), INS_OPTS_SCALABLE_B, true);  // nnnnn
+            emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_B, false); // mmmmm
+            break;
+
+        // <Zd>.D, <Zn>.S, <Zm>.S[<imm>]
+        case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
+        case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
+        // <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
+        case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
+        case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);     // mmmm
+            emitDispElementIndex(emitGetInsSC(id), false);           // ii
+            break;
+
+        // <Zda>.D, <Zn>.H, <Zm>.H[<imm>]
+        case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed)
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);     // mmm
+            emitDispElementIndex(emitGetInsSC(id), false);           // ii
+            break;
+
+        // <Zda>.H, <Zn>.B, <Zm>.B[<imm>]
+        case IF_SVE_GY_3A: // ...........iimmm ....i.nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_H, true); // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);     // mmm
+            emitDispElementIndex(emitGetInsSC(id), false);           // iii
+            break;
+
+        // <Zd>.H, <Zn>.H, <Zm>.H[<imm>]
+        case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
+        case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
+        // <Zd>.S, <Zn>.S, <Zm>.S[<imm>]
+        case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
+        case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
+        // <Zd>.D, <Zn>.D, <Zm>.D[<imm>]
+        case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
+        case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
+        // <Zda>.D, <Zn>.D, <Zm>.D[<imm>]
+        case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
+        case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
+        case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
+        case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed)
+        // <Zda>.H, <Zn>.H, <Zm>.H[<imm>]
+        case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
+        case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
+        case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
+        case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);  // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm
+            emitDispElementIndex(emitGetInsSC(id), false);       // i/ii/iii
+            break;
+
+        // <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
+        case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+        {
+            bool isFourReg =
+                !((ins == INS_sve_mov) || (ins == INS_sve_movs) || (ins == INS_sve_not) || (ins == INS_sve_nots));
+            PredicateType ptype = (ins == INS_sve_sel) ? PREDICATE_NONE : insGetPredicateType(fmt, 2);
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);      // DDDD
+            emitDispPredicateReg(id->idReg2(), ptype, id->idInsOpt(), true);                            // gggg
+            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), isFourReg); // NNNN
+
+            if (isFourReg)
+            {
+                emitDispPredicateReg(id->idReg4(), insGetPredicateType(fmt, 4), id->idInsOpt(), false); // MMMM
+            }
+
+            break;
+        }
+
+        // <Pd>.B, <Pn>.B
+        case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+        case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // NNNN
+            break;
+
+        //  <Pd>.B, <Pg>/M, <Pn>.B
+        case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // gggg
+            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // NNNN
+            break;
+
+        //  <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
+        case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition
+        {
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // gggg
+            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), true);  // NNNN
+            emitDispPredicateReg(id->idReg4(), insGetPredicateType(fmt, 4), id->idInsOpt(), false); // MMMM
+            break;
+        }
+
+        // <Pd>.B, <Pg>/<ZM>, <Pn>.B
+        case IF_SVE_DB_3A: // ................ ..gggg.NNNNMDDDD -- SVE partition break condition
+        case IF_SVE_DB_3B: // ................ ..gggg.NNNN.DDDD -- SVE partition break condition
+        {
+            PredicateType ptype = (id->idPredicateReg2Merge()) ? PREDICATE_MERGE : PREDICATE_ZERO;
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
+            emitDispPredicateReg(id->idReg2(), ptype, id->idInsOpt(), true);                        // gggg
+            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // NNNN
+            break;
+        }
+
+        // <Pdm>.B, <Pg>/Z, <Pn>.B, <Pdm>.B
+        case IF_SVE_DC_3A: // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // gggg
+            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), true);  // NNNN
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 4), id->idInsOpt(), false); // MMMM
+            break;
+
+        // <Pdn>.B, <Pg>, <Pdn>.B
+        case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // gggg
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // DDDD
+            break;
+
+        // <Pd>.<T>{, <pattern>}
+        case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize
+        {
+            bool dispPattern = (id->idSvePattern() != SVE_PATTERN_ALL);
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), dispPattern); // DDDD
+            if (dispPattern)
+            {
+                emitDispSvePattern(id->idSvePattern(), false); // ppppp
+            }
+            break;
+        }
+
+        // <Pd>.<T>, <Pn>.<T>
+        case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // NNNN
+            break;
+
+        // <Pdn>.<T>, <Pv>, <Pdn>.<T>
+        case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // VVVV
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // DDDD
+            break;
+
+        // <Pd>.B, <Pg>/Z
+        case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated)
+        case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // gggg
+            break;
+
+        // <Pd>.B
+        case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated)
+        case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), false); // DDDD
+            break;
+
+        // <Xd>, <Pg>, <Pn>.<T>
+        case IF_SVE_DK_3A:                         // ........xx...... ..gggg.NNNNddddd -- SVE predicate count
+            emitDispReg(id->idReg1(), size, true); // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // gggg
+            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // NNNN
+            break;
+
+        // <Zda>.<T>, <Pg>/M, <Zn>.<Tb>
+        case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                    // ddddd
+            emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg3(), (insOpts)((unsigned)id->idInsOpt() - 1), false);          // mmmmm
+            break;
+
+        // <Zd>.H, { <Zn1>.S-<Zn2>.S }, #<const>
+        case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                        // ddddd
+            emitDispSveConsecutiveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_S, true); // nnnn
+            emitDispImm(emitGetInsSC(id), false);                                      // iiii
+            break;
+
+        // <Xd>, <PNn>.<T>, <vl>
+        case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter)
+            emitDispReg(id->idReg1(), id->idOpSize(), true);                                    // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // NNNN
+            emitDispVectorLengthSpecifier(id);
+            break;
+
+        // <Xdn>, <Pm>.<T>
+        case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
+            emitDispReg(id->idReg1(), id->idOpSize(), true);                                     // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), false); // MMMM
+            break;
+
+        // <Zdn>.<T>, <Pm>.<T>
+        case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
+        case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                  // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), false); // MMMM
+            break;
+
+        // <Xdn>, <Pm>.<T>, <Wdn>
+        // <Xdn>, <Pm>.<T>
+        case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
+            if ((ins == INS_sve_sqdecp) || (ins == INS_sve_sqincp))
+            {
+                // 32-bit result: <Xdn>, <Pm>.<T>, <Wdn>
+                // 64-bit result: <Xdn>, <Pm>.<T>
+                const bool is32BitResult = (id->idOpSize() == EA_4BYTE);                                     // X
+                emitDispReg(id->idReg1(), EA_8BYTE, true);                                                   // ddddd
+                emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), is32BitResult); // MMMM
+
+                if (is32BitResult)
+                {
+                    emitDispReg(id->idReg1(), EA_4BYTE, false);
+                }
+            }
+            else
+            {
+                assert((ins == INS_sve_uqdecp) || (ins == INS_sve_uqincp));
+                emitDispReg(id->idReg1(), id->idOpSize(), true);                                     // ddddd
+                emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), false); // MMMM
+            }
+            break;
+
+        // none
+        case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise
+            break;
+
+        // <Pn>.B
+        case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), false); // NNNN
+            break;
+
+        // <R><n>, <R><m>
+        case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars
+            emitDispReg(id->idReg1(), id->idOpSize(), true);  // nnnnn
+            emitDispReg(id->idReg2(), id->idOpSize(), false); // mmmmm
+            break;
+
+        // <Zd>.H, {<Zn1>.S-<Zn2>.S }
+        case IF_SVE_FZ_2A: // ................ ......nnnn.ddddd -- SME2 multi-vec extract narrow
+        {
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_H, true);
+            emitDispSveConsecutiveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_S, false);
+            break;
+        }
+
+        // <Zd>.B, {<Zn1>.H-<Zn2>.H }
+        case IF_SVE_HG_2A: // ................ ......nnnn.ddddd -- SVE2 FP8 downconverts
+        {
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_B, true);
+            emitDispSveConsecutiveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_H, false);
+            break;
+        }
+
+        // <Zd>.<T>, <Zn>.<Tb>
+        case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                  // ddddd
+            emitDispSveReg(id->idReg2(), optWidenSveElemsizeArrangement(id->idInsOpt()), false); // nnnnn
+            break;
+
+        // <Xd|SP>, <Xn|SP>, #<imm>
+        case IF_SVE_BB_2A: // ...........nnnnn .....iiiiiiddddd -- SVE stack frame adjustment
+        {
+            const regNumber reg1 = (id->idReg1() == REG_ZR) ? REG_SP : id->idReg1();
+            const regNumber reg2 = (id->idReg2() == REG_ZR) ? REG_SP : id->idReg2();
+            emitDispReg(reg1, id->idOpSize(), true); // ddddd
+            emitDispReg(reg2, id->idOpSize(), true); // nnnnn
+            emitDispImm(emitGetInsSC(id), false);    // iiiiii
+            break;
+        }
+
+        // <Xd>, #<imm>
+        case IF_SVE_BC_1A: // ................ .....iiiiiiddddd -- SVE stack frame size
+            emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd
+            emitDispImm(emitGetInsSC(id), false);            // iiiiii
+            break;
+
+        // <Zd>.<T>, <Zn>.<Tb>, #<const>
+        case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long
+        {
+            const insOpts largeSizeSpecifier = (insOpts)(id->idInsOpt() + 1);
+            emitDispSveReg(id->idReg1(), largeSizeSpecifier, true); // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);     // nnnnn
+            emitDispImm(emitGetInsSC(id), false);                   // iii
+            break;
+        }
+
+        // <Zd>.<T>, <Zn>.<Tb>, #<const>
+        case IF_SVE_GB_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift right narrow
+        {
+            const insOpts largeSizeSpecifier = (insOpts)(id->idInsOpt() + 1);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);     // ddddd
+            emitDispSveReg(id->idReg2(), largeSizeSpecifier, true); // nnnnn
+            emitDispImm(emitGetInsSC(id), false);                   // iii
+            break;
+        }
+
+        // <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const>
+        case IF_SVE_FV_2A: // ........xx...... .....rmmmmmddddd -- SVE2 complex integer add
+        {
+            // Rotation bit implies rotation is 270 if set, else rotation is 90
+            const ssize_t rot = emitDecodeRotationImm90_or_270(emitGetInsSC(id));
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // mmmmm
+            emitDispImm(rot, false);                            // r
+            break;
+        }
+
+        // <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+        case IF_SVE_FY_3A: // .........x.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long with carry
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);  // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm
+            break;
+
+        // <Zdn>.B, <Zdn>.B, <Zm>.B
+        // <Zdn>.S, <Zdn>.S, <Zm>.S
+        case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), false); // mmmmm
+            break;
+
+        // <Zdn>.B, <Zdn>.B
+        case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), false); // ddddd
+            break;
+
+        // <Pd>.<T>, <R><n>, <R><m>
+        case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit
+        // <Pd>.<T>, <Xn>, <Xm>
+        case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); // DDDD
+            emitDispReg(id->idReg2(), id->idOpSize(), true);                                    // nnnnn
+            emitDispReg(id->idReg3(), id->idOpSize(), false);                                   // mmmmm
+            break;
+
+        // <Pd>, <Pn>, <Pm>.<T>[<Wv>, <imm>]
+        case IF_SVE_DV_4A: // ........ix.xxxvv ..NNNN.MMMM.DDDD -- SVE broadcast predicate element
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);  // DDDD
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);  // NNNN
+            emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // MMMM
+            printf("[");
+            emitDispReg(id->idReg4(), EA_4BYTE, true); // vv
+            emitDispImm(emitGetInsSC(id), false);      // ix xx
+            printf("]");
+            break;
+
+        // <Pd>.<T>, <PNn>[<imm>]
+        case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
+            emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD
+            emitDispPredicateReg(id->idReg2(), PREDICATE_N, id->idInsOpt(), false);    // NNN
+            emitDispElementIndex(emitGetInsSC(id), false);                             // ii
+            break;
+
+        // {<Pd1>.<T>, <Pd2>.<T>}, <PNn>[<imm>]
+        case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
+            emitDispPredicateRegPair(id->idReg1(), id->idInsOpt());                 // DDDD
+            emitDispPredicateReg(id->idReg2(), PREDICATE_N, id->idInsOpt(), false); // NNN
+            emitDispElementIndex(emitGetInsSC(id), false);                          // i
+            break;
+
+        // {<Pd1>.<T>, <Pd2>.<T>}, <Xn>, <Xm>
+        case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate
+                           // pair)
+            emitDispLowPredicateRegPair(id->idReg1(), id->idInsOpt());
+            emitDispReg(id->idReg2(), id->idOpSize(), true);  // nnnnn
+            emitDispReg(id->idReg3(), id->idOpSize(), false); // mmmmm
+            break;
+
+        // <PNd>.<T>, <Xn>, <Xm>, <vl>
+        case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit
+                           // (predicate-as-counter)
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); // DDD
+            emitDispReg(id->idReg2(), id->idOpSize(), true);                                    // nnnnn
+            emitDispReg(id->idReg3(), id->idOpSize(), true);                                    // mmmmm
+            emitDispVectorLengthSpecifier(id);
+            break;
+
+        // PTRUE <PNd>.<T>
+        case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), false); // DDD
+            break;
+
+        // FDUP <Zd>.<T>, #<const>
+        // FMOV <Zd>.<T>, #<const>
+        case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispFloatImm(emitGetInsSC(id));                 // iiiiiiii
+            break;
+
+        // DUP <Zd>.<T>, #<imm>{, <shift>}
+        // MOV <Zd>.<T>, #<imm>{, <shift>}
+        case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated)
+        {
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispImmOptsLSL(imm, id->idHasShift(), 8);       // h iiiiiiii
+            break;
+        }
+
+        // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+        // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+        // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+        // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+        // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+        // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+        // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+        case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated)
+        {
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispImmOptsLSL(imm, id->idHasShift(), 8);       // h iiiiiiii
+            break;
+        }
+
+        // FMOV <Zd>.<T>, #0.0
+        // (Preferred disassembly: FMOV <Zd>.<T>, #0)
+        case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispImm(0, false);
+            break;
+
+        // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm>
+        // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm>
+        // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm>
+        // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm>
+        case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated)
+        // MUL <Zdn>.<T>, <Zdn>.<T>, #<imm>
+        case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispImm(emitGetInsSC(id), false);               // iiiiiiii
+            break;
+
+        // <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+        case IF_SVE_EH_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer dot product (unpredicated)
+        // <Zda>.S, <Zn>.B, <Zm>.B
+        case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate
+        {
+            const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 2);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);      // ddddd
+            emitDispSveReg(id->idReg2(), smallSizeSpecifier, true);  // nnnnn
+            emitDispSveReg(id->idReg3(), smallSizeSpecifier, false); // mmmmm
+            break;
+        }
+
+        // <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+        case IF_SVE_EL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply-add long
+        case IF_SVE_EN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add interleaved long
+        case IF_SVE_EO_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long
+        case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long
+        // <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+        case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long
+        case IF_SVE_FN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long
+        case IF_SVE_FS_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long
+        // <Zd>.Q, <Zn>.D, <Zm>.D
+        case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long
+        {
+            const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 1);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);      // ddddd
+            emitDispSveReg(id->idReg2(), smallSizeSpecifier, true);  // nnnnn
+            emitDispSveReg(id->idReg3(), smallSizeSpecifier, false); // mmmmm
+            break;
+        }
+
+        // <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+        case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part
+        {
+            const insOpts largeSizeSpecifier = (insOpts)(id->idInsOpt() + 1);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);      // ddddd
+            emitDispSveReg(id->idReg2(), largeSizeSpecifier, true);  // nnnnn
+            emitDispSveReg(id->idReg3(), largeSizeSpecifier, false); // mmmmm
+            break;
+        }
+
+        // <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+        case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide
+        {
+            const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 1);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);      // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
+            emitDispSveReg(id->idReg3(), smallSizeSpecifier, false); // mmmmm
+            break;
+        }
+
+        // CDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>, <const>
+        case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product
+        {
+            const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 2);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);     // ddddd
+            emitDispSveReg(id->idReg2(), smallSizeSpecifier, true); // nnnnn
+            emitDispSveReg(id->idReg3(), smallSizeSpecifier, true); // mmmmm
+
+            // rot specifies a multiple of 90-degree rotations
+            emitDispImm(emitDecodeRotationImm0_to_270(emitGetInsSC(id)), false); // rr
+            break;
+        }
+
+        // CMLA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
+        // SQRDCMLAH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
+        case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // mmmmm
+
+            // rot specifies a multiple of 90-degree rotations
+            emitDispImm(emitDecodeRotationImm0_to_270(emitGetInsSC(id)), false); // rr
+            break;
+
+        // CDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>], <const>
+        case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
+        {
+            const ssize_t imm   = emitGetInsSC(id);
+            const ssize_t rot   = (imm & 0b11);
+            const ssize_t index = (imm >> 2);
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);     // mmm
+            emitDispElementIndex(index, true);                       // ii
+
+            // rot specifies a multiple of 90-degree rotations
+            emitDispImm(emitDecodeRotationImm0_to_270(rot), false); // rr
+            break;
+        }
+
+        // CDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>], <const>
+        case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
+        {
+            const ssize_t imm   = emitGetInsSC(id);
+            const ssize_t rot   = (imm & 0b11);
+            const ssize_t index = (imm >> 2);
+            emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);      // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);     // mmm
+            emitDispElementIndex(index, true);                       // i
+
+            // rot specifies a multiple of 90-degree rotations
+            emitDispImm(emitDecodeRotationImm0_to_270(rot), false); // rr
+            break;
+        }
+
+        // CMLA <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
+        case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
+        // CMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
+        case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
+        // SQRDCMLAH <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
+        case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
+        // SQRDCMLAH <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
+        case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
+        // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
+        case IF_SVE_GV_3A: // ...........immmm ....rrnnnnnddddd -- SVE floating-point complex multiply-add (indexed)
+        {
+            const ssize_t imm   = emitGetInsSC(id);
+            const ssize_t rot   = (imm & 0b11);
+            const ssize_t index = (imm >> 2);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);  // ddddd
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);  // nnnnn
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm
+            emitDispElementIndex(index, true);                   // i
+
+            // rot specifies a multiple of 90-degree rotations
+            emitDispImm(emitDecodeRotationImm0_to_270(rot), false); // rr
+            break;
+        }
+
+        // <Zd>.H, <Pg>/M, <Zn>.S
+        // <Zd>.S, <Pg>/M, <Zn>.D
+        // <Zd>.D, <Pg>/M, <Zn>.S
+        // <Zd>.S, <Pg>/M, <Zn>.H
+        // <Zd>.D, <Pg>/M, <Zn>.D
+        // <Zd>.S, <Pg>/M, <Zn>.S
+        // <Zd>.D, <Pg>/M, <Zn>.H
+        // <Zd>.H, <Pg>/M, <Zn>.H
+        // <Zd>.H, <Pg>/M, <Zn>.D
+        // <Zd>.H, <Pg>/M, <Zn>.S
+        case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements
+        case IF_SVE_HO_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision
+        case IF_SVE_HO_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision
+        case IF_SVE_HO_3C: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision
+        case IF_SVE_HP_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert to integer
+        case IF_SVE_HS_3A: // ................ ...gggnnnnnddddd -- SVE integer convert to floating-point
+        {
+            insOpts opt = id->idInsOpt();
+
+            switch (ins)
+            {
+                // These cases have only one combination of operands so the option may be omitted.
+                case INS_sve_fcvtxnt:
+                    opt = INS_OPTS_D_TO_S;
+                    break;
+                case INS_sve_bfcvtnt:
+                    opt = INS_OPTS_S_TO_H;
+                    break;
+                case INS_sve_fcvtx:
+                    opt = INS_OPTS_D_TO_S;
+                    break;
+                case INS_sve_bfcvt:
+                    opt = INS_OPTS_S_TO_H;
+                    break;
+                default:
+                    break;
+            }
+
+            insOpts dst = INS_OPTS_NONE;
+            insOpts src = INS_OPTS_NONE;
+            optExpandConversionPair(opt, dst, src);
+
+            emitDispSveReg(id->idReg1(), dst, true);                                            // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
+            emitDispSveReg(id->idReg3(), src, false);                                           // nnnnn
+            break;
+        }
+
+        // { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // Some of these formats may allow changing the element size instead of using 'D' for all instructions.
+        case IF_SVE_IH_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
+                             // immediate)
+        case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
+                             // immediate)
+        case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
+                             // immediate)
+        case IF_SVE_IJ_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate)
+        case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
+                             // immediate)
+        case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
+                             // immediate)
+        case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
+                             // immediate)
+        case IF_SVE_IM_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus
+                             // immediate)
+        // { <Zt>.B }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+        // { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+        // { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+        // { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+        case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus
+                           // immediate)
+        // { <Zt1>.Q, <Zt2>.Q }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.Q, <Zt2>.Q, <Zt3>.Q }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.Q, <Zt2>.Q, <Zt3>.Q, <Zt4>.Q }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
+                           // immediate)
+        // { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate)
+        // { <Zt1>.Q, <Zt2>.Q }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.Q, <Zt2>.Q, <Zt3>.Q }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.Q, <Zt2>.Q, <Zt3>.Q, <Zt4>.Q }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
+                           // immediate)
+        // { <Zt>.B }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt>.H }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt>.S }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt>.D }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
+                           // immediate)
+        // { <Zt>.D }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt>.Q }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_JN_3C:   // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+        case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+        // { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        // { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate)
+            imm = emitGetInsSC(id);
+            emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);            // ggg
+            printf("[");
+            emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn
+            if (imm != 0)
+            {
+                switch (fmt)
+                {
+                    case IF_SVE_IO_3A:
+                        // This does not have to be printed as hex.
+                        // We only do it because the capstone disassembly displays this immediate as hex.
+                        // We could not modify capstone without affecting other cases.
+                        emitDispImm(emitGetInsSC(id), false, /* alwaysHex */ true); // iiii
+                        break;
+
+                    case IF_SVE_IQ_3A:
+                    case IF_SVE_IS_3A:
+                    case IF_SVE_JE_3A:
+                    case IF_SVE_JO_3A:
+                        // This does not have to be printed as hex.
+                        // We only do it because the capstone disassembly displays this immediate as hex.
+                        // We could not modify capstone without affecting other cases.
+                        emitDispImm(emitGetInsSC(id), true, /* alwaysHex */ true); // iiii
+                        printf("mul vl");
+                        break;
+
+                    default:
+                        emitDispImm(emitGetInsSC(id), true); // iiii
+                        printf("mul vl");
+                        break;
+                }
+            }
+            printf("]");
+            break;
+
+        // {<Zt>.<T>}, <Pg>, [<Xn|SP>, <Xm>]
+        // {<Zt>.<T>}, <Pg>, [<Xn|SP>, <Xm>, LSL #1]
+        case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+        // {<Zt>.<T>}, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
+        case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3]
+        // {<Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1]
+        // {<Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2]
+        case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                           // offsets)
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>]
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1]
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2]
+        case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>]
+        case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        // {<Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+        case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, <mod>]
+        case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
+                           // offsets)
+        // {<Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+        case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit
+                             // unscaled offsets)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #1]
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod> #2]
+        case IF_SVE_HW_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                           // offsets)
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2]
+        case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]
+        case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>]
+        case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3]
+        case IF_SVE_IU_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                           // scaled offsets)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]
+        case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]
+        case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]
+        case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                           // offsets)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
+        case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        // {<Zt>.S }, <Pg>/Z, [<Zn>.S{, <Xm>}]
+        case IF_SVE_IF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
+                           // scalar)
+        // {<Zt>.D }, <Pg>/Z, [<Zn>.D{, <Xm>}]
+        case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
+                             // scalar)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #3}]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #2}]
+        case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar)
+        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
+        case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        // {<Zt>.B }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
+        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
+        case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #2}]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #2}]
+        case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
+        case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3]
+        case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
+        // {<Zt>.Q }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3]
+        case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2]
+        case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2
+        case IF_SVE_IK_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
+        case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        // {<Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
+        case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        // {<Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar)
+        // {<Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt>.H }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar)
+        // {<Zt1>.Q, <Zt2>.Q }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #4]
+        // {<Zt1>.Q, <Zt2>.Q, <Zt3>.Q }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #4]
+        // {<Zt1>.Q, <Zt2>.Q, <Zt3>.Q, <Zt4>.Q }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #4]
+        case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
+                           // scalar)
+        // {<Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
+        // {<Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2]
+        // {<Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3]
+        // {<Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
+        // {<Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2]
+        // {<Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3]
+        // {<Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]
+        // {<Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #1]
+        // {<Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #2]
+        // {<Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>, <Xm>, LSL #3]
+        case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3]
+        case IF_SVE_IU_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                           // scaled offsets)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
+        case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
+        case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        // {<Zt>.Q }, <Pg>/Z, [<Zn>.D{, <Xm>}]
+        case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar)
+        // {<Zt>.D }, <Pg>/Z, [<Zn>.D{, <Xm>}]
+        case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus
+                           // scalar)
+        // {<Zt>.Q }, <Pg>, [<Zn>.D{, <Xm>}]
+        case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar)
+        // {<Zt>.S }, <Pg>, [<Zn>.S{, <Xm>}]
+        case IF_SVE_IZ_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
+                           // scalar)
+        // {<Zt>.D }, <Pg>, [<Zn>.D{, <Xm>}]
+        case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
+                             // scalar)
+        // {<Zt>.D }, <Pg>, [<Zn>.D{, <Xm>}]
+        case IF_SVE_JA_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus
+                           // scalar)
+        // {<Zt>.B }, <Pg>, [<Xn|SP>, <Xm>]
+        // {<Zt>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1]
+        // {<Zt>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
+        case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
+                           // scalar)
+        // {<Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>, <Xm>]
+        // {<Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1]
+        // {<Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
+        // {<Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
+        // {<Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>, <Xm>]
+        // {<Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1]
+        // {<Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
+        // {<Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
+        // {<Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>, <Xm>]
+        // {<Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>, <Xm>, LSL #1]
+        // {<Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
+        // {<Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
+        case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar)
+        // {<Zt>.Q }, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
+        case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+        // {<Zt>.Q }, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
+        case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+        // {<Zt1>.Q, <Zt2>.Q }, <Pg>, [<Xn|SP>, <Xm>, LSL #4]
+        // {<Zt1>.Q, <Zt2>.Q, <Zt3>.Q }, <Pg>, [<Xn|SP>, <Xm>, LSL #4]
+        // {<Zt1>.Q, <Zt2>.Q, <Zt3>.Q, <Zt4>.Q }, <Pg>, [<Xn|SP>, <Xm>, LSL #4]
+        case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
+                           // scalar)
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1]
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2]
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3]
+        case IF_SVE_JJ_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                           // offsets)
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D]
+        case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D]
+        case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        // {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D]
+        case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
+                           // offsets)
+            emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);            // ggg
+            emitDispSveModAddr(ins, id->idReg3(), id->idReg4(), id->idInsOpt(), fmt);                      // nnnnn
+                                                                                                           // mmmmm
+            break;
+
+        // {<Zt>.<T>}, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+            imm = emitGetInsSC(id);
+            emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);            // ggg
+            emitDispSveImmMulVl(id->idReg3(), imm);
+            break;
+
+        // {<Zt>.<T>}, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+            imm = emitGetInsSC(id);
+            emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);            // ggg
+            emitDispSveImmMulVl(id->idReg3(), imm);
+            break;
+
+        // <Pt>, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register
+        // <Pt>, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register
+            imm = emitGetInsSC(id);
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); // TTTT
+            emitDispSveImmMulVl(id->idReg2(), imm);
+            break;
+
+        // <Zt>, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register
+        // <Zt>, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register
+            imm = emitGetInsSC(id);
+            emitDispReg(id->idReg1(), EA_SCALABLE, true); // ttttt
+            emitDispSveImmMulVl(id->idReg2(), imm);
+            break;
+
+        // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const>
+        case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated)
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);
+            emitDispImm(emitDecodeRotationImm90_or_270(imm), false);
+            break;
+
+        // <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const>
+        case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated)
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg4(), id->idInsOpt(), true);
+            emitDispImm(emitDecodeRotationImm0_to_270(imm), false);
+            break;
+
+        // <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+        case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true);
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);
+            emitDispFloatZero();
+            break;
+
+        // <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+        case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate
+                           // (predicated)
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSmallFloatImm(imm, id->idIns());
+            break;
+
+        // <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm>
+        case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient
+        case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);
+            emitDispImm(emitGetInsSC(id), false);
+            break;
+
+        // <Zd>.<T>, <Pg>/M, <Zn>.<T>
+        case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), false);
+            break;
+
+        // <Zda>.H, <Pg>/M, <Zn>.H, <Zm>.H
+        case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend
+        // <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+        case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing
+                           // multiplicand
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg3(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg4(), id->idInsOpt(), false);
+            break;
+
+        // <Zd>.B, { <Zn>.B }, <Zm>[<index>]
+        case IF_SVE_GG_3A: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit
+                           // element size
+        // <Zd>.B, { <Zn>.B }, <Zm>[<index>]
+        case IF_SVE_GH_3A: // ........i..mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit
+                           // element size
+        // <Zd>.H, { <Zn>.H }, <Zm>[<index>]
+        case IF_SVE_GG_3B: // ........ii.mmmmm ...i..nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit
+                           // element size
+        // <Zd>.H, { <Zn1>.H, <Zn2>.H }, <Zm>[<index>]
+        case IF_SVE_GH_3B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit
+                           // element size
+        // <Zd>.H, {<Zn>.H }, <Zm>[<index>]
+        case IF_SVE_GH_3B_B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit
+                             // element size
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSveConsecutiveRegList(id->idReg1(), 1, id->idInsOpt(), true);
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), false);
+            emitDispElementIndex(imm, false);
+            break;
+
+        // <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]
+        // <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1]
+        // <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2]
+        // <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #3]
+        case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled
+                           // offsets)
+        // <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>]
+        // <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1]
+        // <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2]
+        // <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3]
+        case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit
+                             // scaled offsets)
+        // <prfop>, <Pg>, [<Xn|SP>, <Zm>.D]
+        // <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1]
+        // <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2]
+        // <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3]
+        case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled
+                           // offsets)
+        // <prfop>, <Pg>, [<Xn|SP>, <Xm>]
+        // <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #1]
+        // <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #2]
+        // <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #3]
+        case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar)
+            emitDispSvePrfop(id->idSvePrfop(), true);
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true);
+            emitDispSveModAddr(ins, id->idReg2(), id->idReg3(), id->idInsOpt(), fmt);
+            break;
+
+        // <prfop>, <Pg>, [<Zn>.S{, #<imm>}]
+        // <prfop>, <Pg>, [<Zn>.D{, #<imm>}]
+        case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate)
+            imm = emitGetInsSC(id);
+            emitDispSvePrfop(id->idSvePrfop(), true);
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true);
+            emitDispSveImm(id->idReg2(), imm, id->idInsOpt());
+            break;
+
+        // <prfop>, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]
+        case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate)
+            imm = emitGetInsSC(id);
+            emitDispSvePrfop(id->idSvePrfop(), true);
+            emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true);
+            emitDispSveImmMulVl(id->idReg2(), imm);
+            break;
+
+        // {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]
+        // {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+        case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate)
+        // {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]
+        // {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+        case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate)
+        // {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+        case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate)
+        // {<Zt>.S }, <Pg>, [<Zn>.S{, #<imm>}]
+        // {<Zt>.D }, <Pg>, [<Zn>.D{, #<imm>}]
+        case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate)
+        // {<Zt>.D }, <Pg>, [<Zn>.D{, #<imm>}]
+        case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate)
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+        case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+        case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+        case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+        // {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>}]
+        case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+            imm = emitGetInsSC(id);
+            emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(id->idIns()), id->idInsOpt(), true);
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true);
+            emitDispSveImmIndex(id->idReg3(), id->idInsOpt(), imm);
+            break;
+
+        // <Zd>, <Zn>
+        case IF_SVE_BI_2A: // ................ ......nnnnnddddd -- SVE constructive prefix (unpredicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), false);
+            break;
+
+        // <Zd>.<T>, <R><n|SP>
+        case IF_SVE_CB_2A: // ........xx...... ......nnnnnddddd -- SVE broadcast general register
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispReg(encodingZRtoSP(id->idReg2()), size, false);
+            break;
+
+        // <Zd>.H, <Zn>.B
+        case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts
+        // <Zd>.<T>, <Zn>.<Tb>
+        case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg2(), (insOpts)((unsigned)id->idInsOpt() - 1), false);
+            break;
+
+        // <Zd>.<T>, <Zn>.<T>
+        case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator
+        // <Zd>.<T>, <Zn>.<T>
+        case IF_SVE_CG_2A: // ........xx...... ......nnnnnddddd -- SVE reverse vector elements
+        // <Zd>.<T>, <Zn>.<T>
+        case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), false);
+            break;
+
+        // <Zd>.<T>, <Zn>.<T>, #<const>
+        case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated)
+        // <Zd>.<T>, <Zn>.<T>, #<const>
+        case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert
+        // <Zda>.<T>, <Zn>.<T>, #<const>
+        case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);
+            emitDispImm(imm, false);
+            break;
+
+        // <Zd>.<T>, <Pg>/Z, #<imm>{, <shift>}
+        // <Zd>.<T>, <Pg>/M, #<imm>{, <shift>}
+        case IF_SVE_BV_2A:   // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
+        case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
+        {
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // gggg
+            emitDispImmOptsLSL(imm, id->idHasShift(), 8);                                       // iiiiiiii, h
+            break;
+        }
+
+        // <Zd>.<T>, <Pg>/M, #<imm>
+        case IF_SVE_BV_2B: // ........xx..gggg ...........ddddd -- SVE copy integer immediate (predicated)
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);                                 // ddddd
+            emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // gggg
+            emitDispImm(0, false);
+            break;
+
+        // <Zd>.<T>, <Zn>.<T>[<imm>]
+        // <Zd>.<T>, <V><n>
+        case IF_SVE_BW_2A: // ........ii.xxxxx ......nnnnnddddd -- SVE broadcast indexed element
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
+            if (imm > 0)
+            {
+                emitDispSveReg(id->idReg2(), id->idInsOpt(), false); // nnnnn
+                emitDispElementIndex(imm, false);
+            }
+            else
+            {
+                assert(imm == 0);
+                emitDispReg(id->idReg2(), optGetSveElemsize(id->idInsOpt()), false);
+            }
+            break;
+
+        // <Zd>.<T>, <Zn>.<T>[<imm>]
+        case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), false);
+            emitDispElementIndex(imm, false);
+            break;
+
+        // <Zdn>.B, <Zdn>.B, <Zm>.B, #<imm>
+        case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq
+            imm = emitGetInsSC(id);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispSveReg(id->idReg2(), id->idInsOpt(), true);
+            emitDispImm(imm, false);
+            break;
+
+        default:
+            printf("unexpected format %s", emitIfName(id->idInsFmt()));
+            assert(!"unexpectedFormat");
+            break;
+    }
+}
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+//----------------------------------------------------------------------------------------
+// getInsSveExecutionCharacteristics:
+//    Returns the current SVE instruction's execution characteristics
+//
+// Arguments:
+//    id  - The current instruction descriptor to be evaluated
+//    result - out parameter for execution characteristics struct
+//    (only insLatency and insThroughput will be set)
+//
+// Notes:
+//    SVE latencies from Arm Neoverse N2 Software Optimization Guide, Issue 5.0, Revision: r0p3
+//
+void emitter::getInsSveExecutionCharacteristics(instrDesc* id, insExecutionCharacteristics& result)
+{
+    instruction ins = id->idIns();
+    switch (id->idInsFmt())
+    {
+        // Predicate logical
+        case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated)
+            result.insLatency    = PERFSCORE_LATENCY_1C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        // Arithmetic, basic
+        case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated)
+        case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated)
+        // Max/min, basic and pairwise
+        case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated)
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            break;
+
+        // Divides, 32 bit (Note: worse for 64 bit)
+        case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated)
+            result.insLatency    = PERFSCORE_LATENCY_12C;    // 7 to 12
+            result.insThroughput = PERFSCORE_THROUGHPUT_11C; // 1/11 to 1/7
+            break;
+
+        // Multiply, B, H, S element size (Note: D element size is slightly slower)
+        case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated)
+            result.insLatency    = PERFSCORE_LATENCY_4C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        // Reduction, logical
+        case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated)
+            result.insLatency    = PERFSCORE_LATENCY_6C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        // Reduction, arithmetic, D form (worse for B, S and H)
+        case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated)
+        // Reduction, arithmetic, D form (worse for B, S and H)
+        case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated)
+            result.insLatency    = PERFSCORE_LATENCY_4C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated)
+            switch (ins)
+            {
+                case INS_sve_asr:
+                case INS_sve_lsl:
+                case INS_sve_lsr:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+                case INS_sve_srshr:
+                case INS_sve_sqshl:
+                case INS_sve_urshr:
+                case INS_sve_sqshlu:
+                case INS_sve_uqshl:
+                case INS_sve_asrd:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        // Arithmetic, shift
+        case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated)
+        case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated)
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        // Count/reverse bits
+        // Arithmetic, basic
+        // Floating point absolute value/difference
+        // Floating point arithmetic
+        // Logical
+        case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated)
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            break;
+
+        case IF_SVE_AQ_3A:
+            switch (ins)
+            {
+                // Arithmetic, basic
+                case INS_sve_abs:
+                case INS_sve_neg:
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    break;
+
+                // Extend, sign or zero
+                case INS_sve_sxtb:
+                case INS_sve_sxth:
+                case INS_sve_sxtw:
+                case INS_sve_uxtb:
+                case INS_sve_uxth:
+                case INS_sve_uxtw:
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    break;
+
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend
+                           // (predicated)
+        case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand
+                           // (predicated)
+        case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
+        case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high
+                           // (unpredicated)
+        case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
+        case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
+        case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
+        case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
+        case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
+        case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed)
+        case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
+        case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
+        case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed)
+        case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed)
+        case IF_SVE_FK_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed)
+        case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed)
+        case IF_SVE_EM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add high
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_5C;
+            break;
+
+        case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
+        case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
+        case IF_SVE_GN_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_4C;
+            break;
+
+        case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
+        case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            break;
+
+        case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
+            switch (ins)
+            {
+                case INS_sve_fdot:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_bfdot:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_HA_3A: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product
+            switch (ins)
+            {
+                case INS_sve_fdot:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_bfdot:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_HB_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating-point multiply-add long
+            switch (ins)
+            {
+                case INS_sve_fmlalb:
+                case INS_sve_fmlalt:
+                case INS_sve_fmlslb:
+                case INS_sve_fmlslt:
+                case INS_sve_bfmlalb:
+                case INS_sve_bfmlalt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_bfmlslb:
+                case INS_sve_bfmlslt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_AV_3A: // ...........mmmmm ......kkkkkddddd -- SVE2 bitwise ternary operations
+            switch (ins)
+            {
+                case INS_sve_eor3:
+                case INS_sve_bcax:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+                case INS_sve_bsl:
+                case INS_sve_bsl1n:
+                case INS_sve_bsl2n:
+                case INS_sve_nbsl:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_GU_3C:   // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed)
+        case IF_SVE_GX_3C:   // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed)
+        case IF_SVE_EW_3A:   // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer)
+        case IF_SVE_EW_3B:   // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer)
+        case IF_SVE_CA_3A:   // ........xx.mmmmm ......nnnnnddddd -- sve_int_perm_tbxquads
+        case IF_SVE_EV_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp
+        case IF_SVE_EX_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector elements (quadwords)
+        case IF_SVE_GW_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE FP clamp
+        case IF_SVE_AT_3B:   // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated)
+        case IF_SVE_AB_3B:   // ................ ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated)
+        case IF_SVE_HL_3B:   // ................ ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated)
+        case IF_SVE_GO_3A:   // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long long
+        case IF_SVE_GW_3B:   // ...........mmmmm ......nnnnnddddd -- SVE FP clamp
+        case IF_SVE_HA_3A_E: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product
+        case IF_SVE_HA_3A_F: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product
+        case IF_SVE_HD_3A_A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate
+        case IF_SVE_HK_3B:   // ...........mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+            result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+            break;
+
+        case IF_SVE_AT_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated)
+        case IF_SVE_BR_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector segments
+        case IF_SVE_BR_3B:   // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments
+        case IF_SVE_BZ_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources)
+        case IF_SVE_BZ_3A_A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources)
+        case IF_SVE_FL_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long
+        case IF_SVE_FM_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide
+        case IF_SVE_FP_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved
+        case IF_SVE_FS_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long
+        case IF_SVE_GC_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part
+        case IF_SVE_GF_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment)
+        case IF_SVE_AU_3A:   // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated)
+        case IF_SVE_GI_4A:   // ........xx.mmmmm ...gggnnnnnddddd -- SVE2 histogram generation (vector)
+        case IF_SVE_BB_2A:   // ...........nnnnn .....iiiiiiddddd -- SVE stack frame adjustment
+        case IF_SVE_BC_1A:   // ................ .....iiiiiiddddd -- SVE stack frame size
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_FQ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_6C;
+            break;
+
+        case IF_SVE_FN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long
+            switch (ins)
+            {
+                case INS_sve_smullb:
+                case INS_sve_smullt:
+                case INS_sve_umullb:
+                case INS_sve_umullt:
+                case INS_sve_sqdmullb:
+                case INS_sve_sqdmullt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_pmullb:
+                case INS_sve_pmullt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register
+                           // increment)
+        case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate
+                           // increment)
+        case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register
+                           // increment)
+        case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate
+                           // increment)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_8C;
+            break;
+
+        case IF_SVE_BH_3A:   // .........x.mmmmm ....hhnnnnnddddd -- SVE address generation
+        case IF_SVE_BH_3B:   // ...........mmmmm ....hhnnnnnddddd -- SVE address generation
+        case IF_SVE_BH_3B_A: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count
+        case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count
+        case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count
+        case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count
+        case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count
+        case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive)
+        case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive)
+        case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated)
+        case IF_SVE_BS_1A: // ..............ii iiiiiiiiiiiddddd -- SVE bitwise logical with immediate (unpredicated)
+        case IF_SVE_BT_1A: // ..............ii iiiiiiiiiiiddddd -- SVE broadcast bitmask immediate
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient
+        case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            break;
+
+        case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated)
+        case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long
+        case IF_SVE_BD_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
+        case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_BV_2A:   // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
+        case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated)
+        case IF_SVE_BV_2B:   // ........xx..gggg ...........ddddd -- SVE copy integer immediate (predicated)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_BW_2A: // ........ii.xxxxx ......nnnnnddddd -- SVE broadcast indexed element
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            break;
+
+        case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector
+        case IF_SVE_CE_2B: // .........i...ii. ......nnnnn.DDDD -- SVE move predicate from vector
+        case IF_SVE_CE_2C: // ..............i. ......nnnnn.DDDD -- SVE move predicate from vector
+        case IF_SVE_CE_2D: // .............ii. ......nnnnn.DDDD -- SVE move predicate from vector
+        case IF_SVE_CF_2A: // ................ .......NNNNddddd -- SVE move predicate into vector
+        case IF_SVE_CF_2B: // .........i...ii. .......NNNNddddd -- SVE move predicate into vector
+        case IF_SVE_CF_2C: // ..............i. .......NNNNddddd -- SVE move predicate into vector
+        case IF_SVE_CF_2D: // .............ii. .......NNNNddddd -- SVE move predicate into vector
+            result.insThroughput = PERFSCORE_THROUGHPUT_140C; // @ToDo currently undocumented
+            result.insLatency    = PERFSCORE_LATENCY_140C;
+            break;
+
+        case IF_SVE_CC_2A: // ........xx...... ......mmmmmddddd -- SVE insert SIMD&FP scalar register
+        case IF_SVE_CD_2A: // ........xx...... ......mmmmmddddd -- SVE insert general register
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_5C;
+            break;
+
+        case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements
+        case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements
+        case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        // Conditional extract operations, SIMD&FP scalar and vector forms
+        case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements
+        case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector
+        case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        // Conditional extract operations, scalar form
+        case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register
+            result.insLatency    = PERFSCORE_LATENCY_8C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        // Copy, scalar SIMD&FP or imm
+        case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector
+                           // (predicated)
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            break;
+
+        // Copy, scalar
+        case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated)
+            result.insLatency    = PERFSCORE_LATENCY_5C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        case IF_SVE_CT_3A: // ................ ...gggnnnnnddddd -- SVE reverse doublewords
+            result.insThroughput = PERFSCORE_THROUGHPUT_140C; // @ToDo Currently undocumented.
+            result.insLatency    = PERFSCORE_LATENCY_140C;
+            break;
+
+        case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
+        case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        case IF_SVE_CW_4A: // ........xx.mmmmm ..VVVVnnnnnddddd -- SVE select vector elements (predicated)
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            break;
+
+        case IF_SVE_CX_4A:   // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
+        case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
+        case IF_SVE_CY_3A:   // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate
+        case IF_SVE_CY_3B:   // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate
+        case IF_SVE_EG_3A:   // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed)
+        case IF_SVE_EY_3A:   // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed)
+        case IF_SVE_EY_3B:   // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed)
+        case IF_SVE_FE_3A:   // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
+        case IF_SVE_FE_3B:   // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed)
+        case IF_SVE_FG_3A:   // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
+        case IF_SVE_FG_3B:   // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed)
+        case IF_SVE_FH_3A:   // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
+        case IF_SVE_FH_3B:   // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed)
+        case IF_SVE_FJ_3A:   // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
+        case IF_SVE_FJ_3B:   // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed)
+        case IF_SVE_EH_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE integer dot product (unpredicated)
+        case IF_SVE_EL_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply-add long
+        case IF_SVE_EN_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add interleaved long
+        case IF_SVE_EO_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long
+        case IF_SVE_FW_3A:   // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate
+        case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long
+            result.insLatency    = PERFSCORE_LATENCY_4C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        case IF_SVE_GJ_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 crypto constructive binary operations
+            switch (ins)
+            {
+                case INS_sve_rax1:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+                case INS_sve_sm4ekey:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_GZ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE floating-point multiply-add long (indexed)
+            switch (ins)
+            {
+                case INS_sve_fmlalb:
+                case INS_sve_fmlalt:
+                case INS_sve_fmlslb:
+                case INS_sve_fmlslt:
+                case INS_sve_bfmlalb:
+                case INS_sve_bfmlalt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_bfmlslb:
+                case INS_sve_bfmlslt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed)
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            break;
+
+        case IF_SVE_CZ_4A:   // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+        case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+        case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+        case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
+            switch (ins)
+            {
+                case INS_sve_mov:
+                case INS_sve_and:
+                case INS_sve_orr:
+                case INS_sve_eor:
+                case INS_sve_bic:
+                case INS_sve_orn:
+                case INS_sve_not:
+                case INS_sve_sel:
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    break;
+
+                case INS_sve_bics:
+                case INS_sve_eors:
+                case INS_sve_nots:
+                case INS_sve_ands:
+                case INS_sve_orrs:
+                case INS_sve_orns:
+                case INS_sve_nors:
+                case INS_sve_nands:
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    break;
+
+                case INS_sve_nor:
+                case INS_sve_nand:
+                    result.insLatency    = PERFSCORE_LATENCY_1C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    break;
+
+                case INS_sve_movs:
+                    result.insLatency    = PERFSCORE_LATENCY_1C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    break;
+
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition
+        case IF_SVE_DC_3A: // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition
+            switch (ins)
+            {
+                case INS_sve_brkpa:
+                case INS_sve_brkpb:
+                case INS_sve_brkn:
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    break;
+
+                case INS_sve_brkpas:
+                case INS_sve_brkpbs:
+                case INS_sve_brkns:
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    break;
+
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_DB_3A: // ................ ..gggg.NNNNMDDDD -- SVE partition break condition
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            break;
+
+        case IF_SVE_DB_3B: // ................ ..gggg.NNNN.DDDD -- SVE partition break condition
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            break;
+
+        case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            break;
+
+        case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize
+            switch (ins)
+            {
+                case INS_sve_ptrue:
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    break;
+
+                case INS_sve_ptrues:
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    break;
+
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            break;
+
+        case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated)
+            switch (ins)
+            {
+                case INS_sve_rdffr:
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    break;
+
+                case INS_sve_rdffrs:
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    break;
+
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated)
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            break;
+
+        case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test
+            result.insLatency    = PERFSCORE_LATENCY_1C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            break;
+
+        case IF_SVE_DK_3A: // ........xx...... ..gggg.NNNNddddd -- SVE predicate count
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            break;
+
+        case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
+        case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        // Extract/insert operation, SIMD and FP scalar form
+        case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        // Extract/insert operation, scalar
+        case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register
+            result.insLatency    = PERFSCORE_LATENCY_5C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        // Count/reverse bits
+        // Reverse, vector
+        case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            break;
+
+        // Arithmetic, pairwise add
+        // Max/min, basic and pairwise
+        case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            break;
+
+        case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated)
+            switch (ins)
+            {
+                // Arithmetic, complex
+                case INS_sve_sqabs:
+                case INS_sve_sqneg:
+                    // Reciprocal estimate
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    break;
+
+                // Reciprocal estimate
+                case INS_sve_urecpe:
+                case INS_sve_ursqrte:
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    break;
+
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        // Arithmetic, complex
+        case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            break;
+
+        // Arithmetic, shift complex
+        case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left
+                           // (predicated)
+        // Arithmetic, pairwise add and accum long
+        case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long
+        case IF_SVE_EF_3A: // ...........mmmmm ......nnnnnddddd -- SVE two-way dot product
+            result.insLatency    = PERFSCORE_LATENCY_4C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            break;
+
+        // Floating point arithmetic
+        // Floating point min/max pairwise
+        case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            break;
+
+        // Floating point reduction, F64. (Note: Worse for F32 and F16)
+        case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            break;
+
+        // Floating point associative add, F64. (Note: Worse for F32 and F16)
+        case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated)
+            result.insLatency    = PERFSCORE_LATENCY_4C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            break;
+
+        case IF_SVE_HK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated)
+            switch (ins)
+            {
+                case INS_sve_frecps:
+                case INS_sve_frsqrts:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+
+                case INS_sve_fmul:
+                case INS_sve_ftsmul:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                    break;
+
+                case INS_sve_fadd:
+                case INS_sve_fsub:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated)
+            switch (ins)
+            {
+                // Floating point absolute value/difference
+                case INS_sve_fabd:
+                // Floating point min/max
+                case INS_sve_fmax:
+                case INS_sve_fmaxnm:
+                case INS_sve_fmin:
+                case INS_sve_fminnm:
+                // Floating point arithmetic
+                case INS_sve_fadd:
+                case INS_sve_fsub:
+                case INS_sve_fsubr:
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    break;
+
+                // Floating point divide, F64 (Note: Worse for F32, F16)
+                case INS_sve_fdiv:
+                case INS_sve_fdivr:
+                    result.insLatency    = PERFSCORE_LATENCY_15C;    // 7 to 15
+                    result.insThroughput = PERFSCORE_THROUGHPUT_14C; // 1/14 to 1/7
+                    break;
+
+                // Floating point multiply
+                case INS_sve_fmul:
+                case INS_sve_fmulx:
+                case INS_sve_fscale:
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    break;
+
+                case INS_sve_famax:
+                case INS_sve_famin:
+                    result.insLatency    = PERFSCORE_LATENCY_20C;    // TODO-SVE: Placeholder
+                    result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder
+                    break;
+
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_HO_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision
+        case IF_SVE_HO_3B:
+        case IF_SVE_HO_3C:
+        case IF_SVE_HP_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert to integer
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            break;
+
+        // Floating point round to integral, F64. (Note: Worse for F32 and F16)
+        case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            break;
+
+        case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations
+            switch (ins)
+            {
+                // Floating point reciprocal estimate, F64. (Note: Worse for F32 and F16)
+                case INS_sve_frecpx:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_1C;
+                    break;
+
+                // Floating point square root F64. (Note: Worse for F32 and F16)
+                case INS_sve_fsqrt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_16C;
+                    result.insLatency    = PERFSCORE_LATENCY_14C;
+                    break;
+
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_HS_3A: // ................ ...gggnnnnnddddd -- SVE integer convert to floating-point
+            result.insThroughput = PERFSCORE_THROUGHPUT_4X;
+            result.insLatency    = PERFSCORE_LATENCY_6C;
+            break;
+
+        case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count
+        case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count
+        case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count
+        case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_7C;
+            break;
+
+        case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise
+        case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
+        case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter
+        case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_1C;
+            break;
+
+        case IF_SVE_DV_4A:   // ........ix.xxxvv ..NNNN.MMMM.DDDD -- SVE broadcast predicate element
+        case IF_SVE_FZ_2A:   // ................ ......nnnn.ddddd -- SME2 multi-vec extract narrow
+        case IF_SVE_GY_3A:   // ...........iimmm ....i.nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
+        case IF_SVE_GY_3B_D: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+            result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+            break;
+
+        case IF_SVE_HG_2A: // ................ ......nnnn.ddddd -- SVE2 FP8 downconverts
+            switch (ins)
+            {
+                case INS_sve_fcvtnt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                    break;
+                case INS_sve_fcvtn:
+                case INS_sve_bfcvtn:
+                case INS_sve_fcvtnb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        // Not available in Arm Neoverse N2 Software Optimization Guide.
+        case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords)
+        case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords)
+        case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords)
+        case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords)
+            result.insLatency    = PERFSCORE_LATENCY_20C;    // TODO-SVE: Placeholder
+            result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder
+            break;
+
+        // Not available in Arm Neoverse N2 Software Optimization Guide.
+        case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow
+            result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder
+            result.insLatency    = PERFSCORE_LATENCY_20C;    // TODO-SVE: Placeholder
+            break;
+
+        case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow
+        case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
+        case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed)
+        case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_4C;
+            break;
+
+        case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations
+        case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit
+        case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate
+                           // pair)
+        case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit
+                           // (predicate-as-counter)
+        case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            break;
+
+        case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue
+        case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated)
+        case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated)
+        case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated)
+        case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated)
+        case IF_SVE_FV_2A: // ........xx...... .....rmmmmmddddd -- SVE2 complex integer add
+        case IF_SVE_FY_3A: // .........x.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long with carry
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated)
+            switch (ins)
+            {
+                case INS_sve_umin:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+            }
+            break;
+
+        case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated)
+        case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
+        case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed)
+        case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
+        case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed)
+        case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_5C;
+            break;
+
+        case IF_SVE_IH_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
+                             // immediate)
+        case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
+                             // immediate)
+        case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
+                             // immediate)
+        case IF_SVE_IJ_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+        case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_9C;
+            break;
+
+        case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate)
+        case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
+                             // immediate)
+        case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
+                             // immediate)
+        case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus
+                             // immediate)
+            result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+            result.insLatency    = PERFSCORE_LATENCY_6C;
+            break;
+
+        case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus
+                           // immediate)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_10C;
+            break;
+
+        case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus
+                           // immediate)
+            switch (ins)
+            {
+                case INS_sve_ld1rqb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_6C;
+                    break;
+                case INS_sve_ld1rob:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_ld1rqh:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_6C;
+                    break;
+                case INS_sve_ld1roh:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_ld1rqw:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_6C;
+                    break;
+                case INS_sve_ld1row:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_ld1rqd:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_6C;
+                    break;
+                case INS_sve_ld1rod:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
+                           // immediate)
+            switch (ins)
+            {
+                case INS_sve_ld2q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_ld3q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_ld4q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate)
+            switch (ins)
+            {
+                case INS_sve_ld2b:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_9C;
+                    break;
+                case INS_sve_ld3b:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld4b:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld2h:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_9C;
+                    break;
+                case INS_sve_ld3h:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld4h:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld2w:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_9C;
+                    break;
+                case INS_sve_ld3w:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld4w:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld2d:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_9C;
+                    break;
+                case INS_sve_ld3d:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld4d:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
+                           // immediate)
+            switch (ins)
+            {
+                case INS_sve_st2q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_st3q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_st4q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_FR_2A:   // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long
+        case IF_SVE_JM_3A:   // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
+                             // immediate)
+        case IF_SVE_JN_3C:   // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+        case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_GB_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift right narrow
+            switch (ins)
+            {
+                case INS_sve_sqshrunb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_sqshrunt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_sqrshrunb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_sqrshrunt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_shrnb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+                case INS_sve_shrnt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+                case INS_sve_rshrnb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_rshrnt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_sqshrnb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_sqshrnt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_sqrshrnb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_sqrshrnt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_uqshrnb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_uqshrnt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_uqrshrnb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_uqrshrnt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate)
+            switch (ins)
+            {
+                case INS_sve_st2b:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_st3b:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
+                    result.insLatency    = PERFSCORE_LATENCY_7C;
+                    break;
+                case INS_sve_st4b:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
+                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                    break;
+                case INS_sve_st2h:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_st3h:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
+                    result.insLatency    = PERFSCORE_LATENCY_7C;
+                    break;
+                case INS_sve_st4h:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
+                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                    break;
+                case INS_sve_st2w:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_st3w:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
+                    result.insLatency    = PERFSCORE_LATENCY_7C;
+                    break;
+                case INS_sve_st4w:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
+                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                    break;
+                case INS_sve_st2d:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_st3d:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
+                    result.insLatency    = PERFSCORE_LATENCY_7C;
+                    break;
+                case INS_sve_st4d:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_9C;
+                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_JD_4A:   // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+        case IF_SVE_JD_4B:   // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+        case IF_SVE_JJ_4A:   // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
+                           // offsets)
+        case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit
+                             // unscaled offsets)
+        case IF_SVE_JN_3A:   // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+        case IF_SVE_JN_3B:   // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_HW_4A:   // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_IU_4A:   // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_HW_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+        case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled
+                             // offsets)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_9C;
+            break;
+
+        case IF_SVE_IF_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
+                             // scalar)
+        case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus
+                             // scalar)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_10C;
+            break;
+
+        case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar)
+        case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus
+                             // scalar)
+        case IF_SVE_II_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
+        case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
+        case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar)
+        case IF_SVE_IK_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+        case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_9C;
+            break;
+
+        case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_10C;
+            break;
+
+        case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar)
+            switch (ins)
+            {
+                case INS_sve_ld1rqb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_6C;
+                    break;
+                case INS_sve_ld1rob:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_ld1rqh:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_6C;
+                    break;
+                case INS_sve_ld1roh:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_ld1rqw:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_6C;
+                    break;
+                case INS_sve_ld1row:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_ld1rqd:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+                    result.insLatency    = PERFSCORE_LATENCY_6C;
+                    break;
+                case INS_sve_ld1rod:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus
+                           // scalar)
+            switch (ins)
+            {
+                case INS_sve_ld2q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_ld3q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_ld4q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar)
+            switch (ins)
+            {
+                case INS_sve_ld2b:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_9C;
+                    break;
+                case INS_sve_ld3b:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld4b:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld2h:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_9C;
+                    break;
+                case INS_sve_ld3h:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld4h:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld2w:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_9C;
+                    break;
+                case INS_sve_ld3w:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld4w:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld2d:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_9C;
+                    break;
+                case INS_sve_ld3d:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                case INS_sve_ld4d:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_10C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_IU_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+        case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked
+                             // scaled offsets)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_9C;
+            break;
+
+        case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar)
+            switch (ins)
+            {
+                case INS_sve_ld1q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus
+                           // scalar)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+            result.insLatency    = PERFSCORE_LATENCY_10C;
+            break;
+
+        case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar)
+            switch (ins)
+            {
+                case INS_sve_st1q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_IZ_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
+                             // scalar)
+        case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus
+                             // scalar)
+        case IF_SVE_JA_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus
+                             // scalar)
+        case IF_SVE_JB_4A:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus
+                             // scalar)
+        case IF_SVE_JD_4C:   // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+        case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar)
+            switch (ins)
+            {
+                case INS_sve_st2b:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_st3b:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_7C;
+                    break;
+                case INS_sve_st4b:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_9X;
+                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                    break;
+                case INS_sve_st2h:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_st3h:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_7C;
+                    break;
+                case INS_sve_st4h:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_9X;
+                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                    break;
+                case INS_sve_st2w:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_st3w:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_7C;
+                    break;
+                case INS_sve_st4w:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_9X;
+                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                    break;
+                case INS_sve_st2d:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_4C;
+                    break;
+                case INS_sve_st3d:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2X;
+                    result.insLatency    = PERFSCORE_LATENCY_7C;
+                    break;
+                case INS_sve_st4d:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_9X;
+                    result.insLatency    = PERFSCORE_LATENCY_11C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus
+                           // scalar)
+            switch (ins)
+            {
+                case INS_sve_st2q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_st3q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_st4q:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_JJ_4B:   // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled
+                             // offsets)
+        case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled
+                           // offsets)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated)
+        case IF_SVE_EI_3A: // ...........mmmmm ......nnnnnddddd -- SVE mixed sign dot product
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            break;
+
+        case IF_SVE_GV_3A: // ...........immmm ....rrnnnnnddddd -- SVE floating-point complex multiply-add (indexed)
+        case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated)
+        case IF_SVE_HD_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_5C;
+            break;
+
+        case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate
+                           // (predicated)
+            switch (ins)
+            {
+                case INS_sve_fmul:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                    break;
+
+                default:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+            }
+            break;
+
+        case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_4C;
+            break;
+
+        case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            break;
+
+        case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend
+            switch (ins)
+            {
+                case INS_sve_bfmla:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+
+                case INS_sve_bfmls:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing
+                           // multiplicand
+        case IF_SVE_HU_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_4C;
+            break;
+
+        case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register
+        case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register
+            result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+            result.insLatency    = PERFSCORE_LATENCY_6C;
+            break;
+
+        case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register
+        case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_GG_3A: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit
+                           // element size
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+            result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+            break;
+
+        case IF_SVE_GH_3B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit
+                           // element size
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+            result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+            break;
+
+        case IF_SVE_GH_3B_B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit
+                             // element size
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+            result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+            break;
+
+        case IF_SVE_GG_3B: // ........ii.mmmmm ...i..nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit
+                           // element size
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+            result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+            break;
+
+        case IF_SVE_GH_3A: // ........i..mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit
+                           // element size
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+            result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+            break;
+
+        case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled
+                           // offsets)
+            switch (ins)
+            {
+                case INS_sve_prfb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfh:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfw:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfd:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit
+                             // scaled offsets)
+            switch (ins)
+            {
+                case INS_sve_prfb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfh:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfw:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfd:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled
+                           // offsets)
+            switch (ins)
+            {
+                case INS_sve_prfb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfh:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfw:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfd:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar)
+            switch (ins)
+            {
+                case INS_sve_prfb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfh:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfw:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfd:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate)
+            switch (ins)
+            {
+                case INS_sve_prfb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfh:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfw:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfd:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate)
+            switch (ins)
+            {
+                case INS_sve_prfb:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfh:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfw:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_prfd:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate)
+        case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate)
+        case IF_SVE_IV_3A:   // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_9C;
+            break;
+
+        case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate)
+        case IF_SVE_JL_3A:   // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_IC_3A:   // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+        case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+        case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+        case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element
+            result.insThroughput = PERFSCORE_THROUGHPUT_3C;
+            result.insLatency    = PERFSCORE_LATENCY_6C;
+            break;
+
+        case IF_SVE_BI_2A: // ................ ......nnnnnddddd -- SVE constructive prefix (unpredicated)
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts
+            switch (ins)
+            {
+                case INS_sve_f1cvt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_f2cvt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_bf1cvt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_bf2cvt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_f1cvtlt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_f2cvtlt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_bf1cvtlt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                case INS_sve_bf2cvtlt:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+                    result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            break;
+
+        case IF_SVE_CB_2A: // ........xx...... ......nnnnnddddd -- SVE broadcast general register
+            switch (ins)
+            {
+                case INS_sve_mov:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+                case INS_sve_dup:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+                    result.insLatency    = PERFSCORE_LATENCY_3C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_CG_2A: // ........xx...... ......nnnnnddddd -- SVE reverse vector elements
+            switch (ins)
+            {
+                case INS_sve_rev:
+                    result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+                    result.insLatency    = PERFSCORE_LATENCY_2C;
+                    break;
+                default:
+                    // all other instructions
+                    perfScoreUnhandledInstruction(id, &result);
+                    break;
+            }
+            break;
+
+        case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
+            result.insThroughput = PERFSCORE_THROUGHPUT_2C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated)
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_3C;
+            break;
+
+        case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated)
+        case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_2C;
+            break;
+
+        case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+            result.insLatency    = PERFSCORE_LATENCY_4C;
+            break;
+
+        case IF_SVE_BX_2A:                                  // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+            result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+            break;
+
+        case IF_SVE_BY_2A:                                  // ............iiii ......mmmmmddddd -- sve_int_perm_extq
+            result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix
+            result.insLatency    = PERFSCORE_LATENCY_1C;    // need to fix
+            break;
+
+        default:
+            // all other instructions
+            perfScoreUnhandledInstruction(id, &result);
+            break;
+    }
+}
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#endif // TARGET_ARM64
diff --git a/src/coreclr/jit/emitfmtsarm64sve.h b/src/coreclr/jit/emitfmtsarm64sve.h
index 13137166b7d7..cd27f567478c 100644
--- a/src/coreclr/jit/emitfmtsarm64sve.h
+++ b/src/coreclr/jit/emitfmtsarm64sve.h
@@ -395,18 +395,13 @@ IF_DEF(SVE_HL_3B,   IS_NONE, NONE) // SVE_HL_3B  ................ ...gggmmmmmddd
 IF_DEF(SVE_HM_2A,   IS_NONE, NONE) // SVE_HM_2A  ........xx...... ...ggg....iddddd  -- SVE floating-point arithmetic with immediate (predicated)
 IF_DEF(SVE_HN_2A,   IS_NONE, NONE) // SVE_HN_2A  ........xx...iii ......mmmmmddddd  -- SVE floating-point trig multiply-add coefficient
 IF_DEF(SVE_HO_3A,   IS_NONE, NONE) // SVE_HO_3A  ................ ...gggnnnnnddddd  -- SVE floating-point convert precision
-IF_DEF(SVE_HO_3A_B, IS_NONE, NONE) // SVE_HO_3A_B  ................ ...gggnnnnnddddd  -- 
+IF_DEF(SVE_HO_3B,   IS_NONE, NONE) // SVE_HO_3B  ................ ...gggnnnnnddddd  -- 
+IF_DEF(SVE_HO_3C,   IS_NONE, NONE) // SVE_HO_3C  ................ ...gggnnnnnddddd  -- 
 IF_DEF(SVE_HP_3A,   IS_NONE, NONE) // SVE_HP_3A  .............xx. ...gggnnnnnddddd  -- SVE floating-point convert to integer
-IF_DEF(SVE_HP_3B,   IS_NONE, NONE) // SVE_HP_3B  ................ ...gggnnnnnddddd  -- SVE floating-point convert to integer
-IF_DEF(SVE_HP_3B_H, IS_NONE, NONE) // SVE_HP_3B_H  ................ ...gggnnnnnddddd  -- 
-IF_DEF(SVE_HP_3B_I, IS_NONE, NONE) // SVE_HP_3B_I  ................ ...gggnnnnnddddd  -- 
-IF_DEF(SVE_HP_3B_J, IS_NONE, NONE) // SVE_HP_3B_J  ................ ...gggnnnnnddddd  -- 
+IF_DEF(SVE_HP_3B,   IS_NONE, NONE) // SVE_HP_3B  .............xx. ...gggnnnnnddddd  -- SVE floating-point convert to integer
 IF_DEF(SVE_HQ_3A,   IS_NONE, NONE) // SVE_HQ_3A  ........xx...... ...gggnnnnnddddd  -- SVE floating-point round to integral value
 IF_DEF(SVE_HR_3A,   IS_NONE, NONE) // SVE_HR_3A  ........xx...... ...gggnnnnnddddd  -- SVE floating-point unary operations
 IF_DEF(SVE_HS_3A,   IS_NONE, NONE) // SVE_HS_3A  ................ ...gggnnnnnddddd  -- SVE integer convert to floating-point
-IF_DEF(SVE_HS_3A_H, IS_NONE, NONE) // SVE_HS_3A_H  ................ ...gggnnnnnddddd  -- 
-IF_DEF(SVE_HS_3A_I, IS_NONE, NONE) // SVE_HS_3A_I  ................ ...gggnnnnnddddd  -- 
-IF_DEF(SVE_HS_3A_J, IS_NONE, NONE) // SVE_HS_3A_J  ................ ...gggnnnnnddddd  -- 
 IF_DEF(SVE_HT_4A,   IS_NONE, NONE) // SVE_HT_4A  ........xx.mmmmm ...gggnnnnn.DDDD  -- SVE floating-point compare vectors
 IF_DEF(SVE_HU_4A,   IS_NONE, NONE) // SVE_HU_4A  ........xx.mmmmm ...gggnnnnnddddd  -- SVE floating-point multiply-accumulate writing addend
 IF_DEF(SVE_HU_4B,   IS_NONE, NONE) // SVE_HU_4B  ...........mmmmm ...gggnnnnnddddd  -- SVE floating-point multiply-accumulate writing addend
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index 0ea2546213a7..c69ea7c5a36e 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -4,7 +4,7 @@
 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XX                                                                           XX
-XX                             emitloongarch64.cpp                                XX
+XX                             emitloongarch64.cpp                           XX
 XX                                                                           XX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -51,9 +51,9 @@ const emitJumpKind emitReverseJumpKinds[] = {
 }
 
 /*****************************************************************************
-* Look up the jump kind for an instruction. It better be a conditional
-* branch instruction with a jump kind!
-*/
+ * Look up the jump kind for an instruction. It better be a conditional
+ * branch instruction with a jump kind!
+ */
 
 /*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins)
 {
@@ -2047,9 +2047,9 @@ void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNu
 }
 
 // This computes address from the immediate which is relocatable.
-void emitter::emitIns_R_AI(instruction ins,
-                           emitAttr    attr,
-                           regNumber   reg,
+void emitter::emitIns_R_AI(instruction  ins,
+                           emitAttr     attr,
+                           regNumber    reg,
                            ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
 {
     assert(EA_IS_RELOC(attr)); // EA_PTR_DSP_RELOC
@@ -2381,8 +2381,8 @@ void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm)
 void emitter::emitIns_Call(EmitCallType          callType,
                            CORINFO_METHOD_HANDLE methHnd,
                            INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
-                           void*    addr,
-                           ssize_t  argSize,
+                           void*            addr,
+                           ssize_t          argSize,
                            emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
                            VARSET_VALARG_TP ptrVars,
                            regMaskTP        gcrefRegs,
@@ -2786,9 +2786,9 @@ void emitter::emitJumpDistBind()
         B_DIST_SMALL_MAX_POS -
         emitCounts_INS_OPTS_J * (3 << 2); // the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
 
-/*****************************************************************************/
-/* If the default small encoding is not enough, we start again here.     */
-/*****************************************************************************/
+    /*****************************************************************************/
+    /* If the default small encoding is not enough, we start again here.     */
+    /*****************************************************************************/
 
 AGAIN:
 
@@ -2819,7 +2819,7 @@ void emitter::emitJumpDistBind()
         UNATIVE_OFFSET dstOffs;
         NATIVE_OFFSET  jmpDist; // the relative jump distance, as it will be encoded
 
-/* Make sure the jumps are properly ordered */
+        /* Make sure the jumps are properly ordered */
 
 #ifdef DEBUG
         assert(lastSJ == nullptr || lastIG != jmp->idjIG || lastSJ->idjOffs < (jmp->idjOffs + adjSJ));
@@ -2873,7 +2873,6 @@ void emitter::emitJumpDistBind()
         jmp->idjOffs += adjSJ;
 
         // If this is a jump via register, the instruction size does not change, so we are done.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
         /* Have we bound this jump's target already? */
 
@@ -2894,7 +2893,6 @@ void emitter::emitJumpDistBind()
         else
         {
             /* First time we've seen this label, convert its target */
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
             tgtIG = (insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel);
 
@@ -2997,8 +2995,8 @@ void emitter::emitJumpDistBind()
                 instruction ins = jmp->idIns();
                 assert((INS_bceqz <= ins) && (ins <= INS_bl));
 
-                if (ins <
-                    INS_beqz) //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
+                if (ins < INS_beqz) //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See
+                                    //   instrsloongarch64.h.
                 {
                     if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000)
                     {
@@ -3085,8 +3083,8 @@ void emitter::emitJumpDistBind()
                 instruction ins = jmp->idIns();
                 assert((INS_bceqz <= ins) && (ins <= INS_bl));
 
-                if (ins <
-                    INS_beqz) //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
+                if (ins < INS_beqz) //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See
+                                    //   instrsloongarch64.h.
                 {
                     if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000)
                     {
@@ -3181,7 +3179,7 @@ void emitter::emitJumpDistBind()
 }
 
 /*****************************************************************************
-*
+ *
  *  Append the machine code corresponding to the given instruction descriptor
  *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
  *  is the instruction group that contains the instruction. Updates '*dp' to
@@ -4039,15 +4037,15 @@ void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id)
             {
                 printf("%s, %s, 0x%lx\n", RegNames[regd], RegNames[regj], offs16);
             }
-            else if (INS_OPTS_NONE == id->idInsOpt())
+            else if ((unsigned)(addr - emitCodeBlock) < emitPrologIG->igSize) // only for prolog
             {
                 if (offs16 < 0)
                 {
-                    printf("-%d ins\n", -offs16 >> 2);
+                    printf("%s, %s, -%d ins\n", RegNames[regj], RegNames[regd], -offs16 >> 2);
                 }
                 else
                 {
-                    printf("+%d ins\n", offs16 >> 2);
+                    printf("%s, %s, +%d ins\n", RegNames[regj], RegNames[regd], offs16 >> 2);
                 }
             }
             else
@@ -4060,12 +4058,12 @@ void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id)
         {
             tmp = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
             tmp >>= 9;
-            if (INS_OPTS_NONE == id->idInsOpt())
+            if ((unsigned)(addr - emitCodeBlock) < emitPrologIG->igSize) // only for prolog
             {
                 tmp >>= 2;
                 if (tmp < 0)
                 {
-                    printf("%s, -%d ins\n", RegNames[regj], tmp);
+                    printf("%s, -%d ins\n", RegNames[regj], -tmp);
                 }
                 else
                 {
@@ -4089,12 +4087,12 @@ void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id)
                 methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
                 printf("# %s\n", methodName);
             }
-            else if (INS_OPTS_NONE == id->idInsOpt())
+            else if ((unsigned)(addr - emitCodeBlock) < emitPrologIG->igSize) // only for prolog
             {
                 tmp >>= 2;
                 if (tmp < 0)
                 {
-                    printf("-%d ins\n", tmp);
+                    printf("-%d ins\n", -tmp);
                 }
                 else
                 {
@@ -4134,7 +4132,12 @@ void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id)
             tmp >>= 20;
             if (ins == INS_preld)
             {
-                printf("0x%x, %s, 0x%x\n", regd, RegNames[regj], tmp);
+                printf("0x%x, %s, %d\n", regd, RegNames[regj], tmp);
+                return;
+            }
+            else if (ins == INS_lu52i_d)
+            {
+                printf("%s, %s, 0x%x\n", RegNames[regd], RegNames[regj], tmp & 0xfff);
                 return;
             }
             printf("%s, %s, %d\n", RegNames[regd], RegNames[regj], tmp);
@@ -4598,7 +4601,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
         int   offset = 0;
         DWORD lsl    = 0;
 
-        if (addr->OperGet() == GT_LEA)
+        if (addr->OperIs(GT_LEA))
         {
             offset = addr->AsAddrMode()->Offset();
             if (addr->AsAddrMode()->gtScale > 0)
@@ -4980,7 +4983,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
     {
         emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
     }
-    else if (dst->OperGet() == GT_MUL)
+    else if (dst->OperIs(GT_MUL))
     {
         if (!needCheckOv)
         {
@@ -5048,10 +5051,14 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
 
         // TODO-LOONGARCH64-CQ: here sign-extend dst when deal with 32bit data is too conservative.
         if (EA_SIZE(attr) == EA_4BYTE)
+        {
             emitIns_R_R_I(INS_slli_w, attr, dst->GetRegNum(), dst->GetRegNum(), 0);
+        }
     }
     else
     {
+        assert(dst->OperIs(GT_ADD, GT_SUB));
+
         regNumber regOp1       = src1->GetRegNum();
         regNumber regOp2       = src2->GetRegNum();
         regNumber saveOperReg1 = REG_NA;
@@ -5064,26 +5071,38 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
             assert(REG_R21 != dst->GetRegNum());
             assert(REG_RA != dst->GetRegNum());
 
-            if (dst->GetRegNum() == regOp1)
+            if (dst->OperIs(GT_ADD))
             {
-                assert(REG_R21 != regOp1);
-                assert(REG_RA != regOp1);
-                saveOperReg1 = REG_R21;
-                saveOperReg2 = regOp2;
-                emitIns_R_R_R(INS_or, attr, REG_R21, regOp1, REG_R0);
+                saveOperReg1 = (dst->GetRegNum() == regOp1) ? regOp2 : regOp1;
             }
-            else if (dst->GetRegNum() == regOp2)
+            else
             {
-                assert(REG_R21 != regOp2);
-                assert(REG_RA != regOp2);
-                saveOperReg1 = regOp1;
-                saveOperReg2 = REG_R21;
-                emitIns_R_R_R(INS_or, attr, REG_R21, regOp2, REG_R0);
+                if (dst->GetRegNum() == regOp1)
+                {
+                    assert(REG_R21 != regOp1);
+                    assert(REG_RA != regOp1);
+                    saveOperReg1 = REG_R21;
+                    emitIns_R_R_R(INS_or, attr, REG_R21, regOp1, REG_R0);
+                }
+                else
+                {
+                    saveOperReg1 = regOp1;
+                }
             }
-            else
+
+            if ((dst->gtFlags & GTF_UNSIGNED) == 0)
             {
-                saveOperReg1 = regOp1;
-                saveOperReg2 = regOp2;
+                saveOperReg2 = dst->GetSingleTempReg();
+                assert((saveOperReg2 != REG_RA) && (saveOperReg2 != REG_R21));
+                assert(REG_RA != regOp1);
+                assert(saveOperReg2 != regOp2);
+
+                ssize_t ui6 = (attr == EA_4BYTE) ? 31 : 63;
+                if (dst->OperIs(GT_ADD))
+                {
+                    emitIns_R_R_I(INS_srli_d, attr, REG_RA, regOp1, ui6);
+                }
+                emitIns_R_R_I(INS_srli_d, attr, saveOperReg2, regOp2, ui6);
             }
         }
 
@@ -5091,86 +5110,56 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
 
         if (needCheckOv)
         {
-            if (dst->OperGet() == GT_ADD || dst->OperGet() == GT_SUB)
+            // ADD : A = B + C
+            // SUB : A = B - C <=> B = A + C
+            if ((dst->gtFlags & GTF_UNSIGNED) != 0)
             {
-                ssize_t   imm;
-                regNumber tempReg1;
-                regNumber tempReg2;
-                // ADD : A = B + C
-                // SUB : C = A - B
-                if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+                // ADD: if A < B, goto overflow
+                // SUB: if B < A, goto overflow
+                codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu,
+                                                 dst->OperIs(GT_ADD) ? dst->GetRegNum() : saveOperReg1, nullptr,
+                                                 dst->OperIs(GT_ADD) ? saveOperReg1 : dst->GetRegNum());
+            }
+            else
+            {
+                if (dst->OperIs(GT_SUB))
                 {
-                    // if A < B, goto overflow
-                    if (dst->OperGet() == GT_ADD)
-                    {
-                        tempReg1 = dst->GetRegNum();
-                        tempReg2 = saveOperReg1;
-                    }
-                    else
-                    {
-                        tempReg1 = saveOperReg1;
-                        tempReg2 = saveOperReg2;
-                    }
-                    codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu, tempReg1, nullptr, tempReg2);
+                    emitIns_R_R_I(INS_srli_d, attr, REG_RA, dst->GetRegNum(), (attr == EA_4BYTE) ? 31 : 63);
                 }
-                else
-                {
-                    tempReg1 = REG_RA;
-                    tempReg2 = dst->GetSingleTempReg();
-                    assert(tempReg1 != tempReg2);
-                    assert(tempReg1 != saveOperReg1);
-                    assert(tempReg2 != saveOperReg2);
-
-                    ssize_t ui6 = (attr == EA_4BYTE) ? 31 : 63;
-                    if (dst->OperGet() == GT_ADD)
-                    {
-                        emitIns_R_R_I(INS_srli_d, attr, tempReg1, saveOperReg1, ui6);
-                    }
-                    else
-                    {
-                        emitIns_R_R_I(INS_srli_d, attr, tempReg1, dst->GetRegNum(), ui6);
-                    }
-                    emitIns_R_R_I(INS_srli_d, attr, tempReg2, saveOperReg2, ui6);
 
-                    emitIns_R_R_R(INS_xor, attr, tempReg1, tempReg1, tempReg2);
-                    if (attr == EA_4BYTE)
-                    {
-                        imm = 1;
-                        emitIns_R_R_I(INS_andi, attr, tempReg1, tempReg1, imm);
-                        emitIns_R_R_I(INS_andi, attr, tempReg2, tempReg2, imm);
-                    }
-                    // if (B > 0 && C < 0) || (B < 0  && C > 0), skip overflow
-                    BasicBlock* tmpLabel  = codeGen->genCreateTempLabel();
-                    BasicBlock* tmpLabel2 = codeGen->genCreateTempLabel();
-                    BasicBlock* tmpLabel3 = codeGen->genCreateTempLabel();
+                emitIns_R_R_R(INS_xor, attr, REG_RA, REG_RA, saveOperReg2);
+                if (attr == EA_4BYTE)
+                {
+                    emitIns_R_R_I(INS_andi, attr, REG_RA, REG_RA, 1);
+                    emitIns_R_R_I(INS_andi, attr, saveOperReg2, saveOperReg2, 1);
+                }
+                // ADD: if (B > 0 && C < 0) || (B < 0  && C > 0), skip overflow
+                // SUB: if (A > 0 && C < 0) || (A < 0  && C > 0), skip overflow
+                BasicBlock* tmpLabel1 = codeGen->genCreateTempLabel();
+                BasicBlock* tmpLabel2 = codeGen->genCreateTempLabel();
+                BasicBlock* tmpLabel3 = codeGen->genCreateTempLabel();
 
-                    emitIns_J_cond_la(INS_bne, tmpLabel, tempReg1, REG_R0);
+                emitIns_J_cond_la(INS_bne, tmpLabel1, REG_RA, REG_R0);
 
-                    emitIns_J_cond_la(INS_bne, tmpLabel3, tempReg2, REG_R0);
+                emitIns_J_cond_la(INS_bne, tmpLabel3, saveOperReg2, REG_R0);
 
-                    // B > 0 and C > 0, if A < B, goto overflow
-                    emitIns_J_cond_la(INS_bge, tmpLabel, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1,
-                                      dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2);
+                // ADD: B > 0 and C > 0, if A < B, goto overflow
+                // SUB: A > 0 and C > 0, if B < A, goto overflow
+                emitIns_J_cond_la(INS_bge, tmpLabel1, dst->OperIs(GT_ADD) ? dst->GetRegNum() : saveOperReg1,
+                                  dst->OperIs(GT_ADD) ? saveOperReg1 : dst->GetRegNum());
 
-                    codeGen->genDefineTempLabel(tmpLabel2);
+                codeGen->genDefineTempLabel(tmpLabel2);
 
-                    codeGen->genJumpToThrowHlpBlk(EJ_jmp, SCK_OVERFLOW);
+                codeGen->genJumpToThrowHlpBlk(EJ_jmp, SCK_OVERFLOW);
 
-                    codeGen->genDefineTempLabel(tmpLabel3);
+                codeGen->genDefineTempLabel(tmpLabel3);
 
-                    // B < 0 and C < 0, if A > B, goto overflow
-                    emitIns_J_cond_la(INS_blt, tmpLabel2, dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2,
-                                      dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1);
+                // ADD: B < 0 and C < 0, if A > B, goto overflow
+                // SUB: A < 0 and C < 0, if B > A, goto overflow
+                emitIns_J_cond_la(INS_blt, tmpLabel2, dst->OperIs(GT_ADD) ? saveOperReg1 : dst->GetRegNum(),
+                                  dst->OperIs(GT_ADD) ? dst->GetRegNum() : saveOperReg1);
 
-                    codeGen->genDefineTempLabel(tmpLabel);
-                }
-            }
-            else
-            {
-#ifdef DEBUG
-                printf("---------[LOONGARCH64]-NOTE: UnsignedOverflow instruction %d\n", ins);
-#endif
-                assert(!"unimplemented on LOONGARCH yet");
+                codeGen->genDefineTempLabel(tmpLabel1);
             }
         }
     }
diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h
index 11a2f9ee9071..135f9cf40067 100644
--- a/src/coreclr/jit/emitloongarch64.h
+++ b/src/coreclr/jit/emitloongarch64.h
@@ -104,10 +104,10 @@ enum insDisasmFmt
 #endif
 };
 
-code_t emitGetInsMask(int ins);
+code_t       emitGetInsMask(int ins);
 insDisasmFmt emitGetInsFmt(instruction ins);
-void emitDispInst(instruction ins);
-void emitDisInsName(code_t code, const BYTE* addr, instrDesc* id);
+void         emitDispInst(instruction ins);
+void         emitDisInsName(code_t code, const BYTE* addr, instrDesc* id);
 #endif // DEBUG
 
 void emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 = REG_R0, regNumber reg2 = REG_R0);
@@ -316,9 +316,9 @@ void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
 
 void emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs);
 
-void emitIns_R_AI(instruction ins,
-                  emitAttr    attr,
-                  regNumber   reg,
+void emitIns_R_AI(instruction  ins,
+                  emitAttr     attr,
+                  regNumber    reg,
                   ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
 
 enum EmitCallType
@@ -343,8 +343,8 @@ enum EmitCallType
 void emitIns_Call(EmitCallType          callType,
                   CORINFO_METHOD_HANDLE methHnd,
                   INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
-                  void*    addr,
-                  ssize_t  argSize,
+                  void*            addr,
+                  ssize_t          argSize,
                   emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
                   VARSET_VALARG_TP ptrVars,
                   regMaskTP        gcrefRegs,
diff --git a/src/coreclr/jit/emitpub.h b/src/coreclr/jit/emitpub.h
index 674b98a7f4ba..bf15ba33667c 100644
--- a/src/coreclr/jit/emitpub.h
+++ b/src/coreclr/jit/emitpub.h
@@ -16,24 +16,24 @@ void emitBegFN(bool hasFramePtr
                ,
                bool checkAlign
 #endif
-               );
+);
 
 void emitEndFN();
 
 void emitComputeCodeSizes();
 
-unsigned emitEndCodeGen(Compiler* comp,
-                        bool      contTrkPtrLcls,
-                        bool      fullyInt,
-                        bool      fullPtrMap,
-                        unsigned  xcptnsCount,
-                        unsigned* prologSize,
-                        unsigned* epilogSize,
-                        void**    codeAddr,
-                        void**    codeAddrRW,
-                        void**    coldCodeAddr,
-                        void**    coldCodeAddrRW,
-                        void**    consAddr,
+unsigned emitEndCodeGen(Compiler*         comp,
+                        bool              contTrkPtrLcls,
+                        bool              fullyInt,
+                        bool              fullPtrMap,
+                        unsigned          xcptnsCount,
+                        unsigned*         prologSize,
+                        unsigned*         epilogSize,
+                        void**            codeAddr,
+                        void**            codeAddrRW,
+                        void**            coldCodeAddr,
+                        void**            coldCodeAddrRW,
+                        void**            consAddr,
                         void** consAddrRW DEBUGARG(unsigned* instrCount));
 
 /************************************************************************/
@@ -102,36 +102,14 @@ UNATIVE_OFFSET emitDataSize();
 /************************************************************************/
 
 #ifdef TARGET_XARCH
-static bool instrIs3opImul(instruction ins);
-static bool instrIsExtendedReg3opImul(instruction ins);
-static bool instrHasImplicitRegPairDest(instruction ins);
-static void      check3opImulValues();
-static regNumber inst3opImulReg(instruction ins);
+static bool        instrIs3opImul(instruction ins);
+static bool        instrIsExtendedReg3opImul(instruction ins);
+static bool        instrHasImplicitRegPairDest(instruction ins);
+static void        check3opImulValues();
+static regNumber   inst3opImulReg(instruction ins);
 static instruction inst3opImulForReg(regNumber reg);
 #endif
 
-/************************************************************************/
-/*                   Emit PDB offset translation information            */
-/************************************************************************/
-
-#ifdef TRANSLATE_PDB
-
-static void SetILBaseOfCode(BYTE* pTextBase);
-static void SetILMethodBase(BYTE* pMethodEntry);
-static void SetILMethodStart(BYTE* pMethodCode);
-static void SetImgBaseOfCode(BYTE* pTextBase);
-
-void SetIDBaseToProlog();
-void SetIDBaseToOffset(int methodOffset);
-
-static void DisablePDBTranslation();
-static bool IsPDBEnabled();
-
-static void InitTranslationMaps(int ilCodeSize);
-static void DeleteTranslationMaps();
-static void InitTranslator(PDBRewriter* pPDB, int* rgSecMap, IMAGE_SECTION_HEADER** rgpHeader, int numSections);
-#endif
-
 /************************************************************************/
 /*                   Interface for generating unwind information        */
 /************************************************************************/
diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp
index 3e756a01e683..533d26ef2307 100644
--- a/src/coreclr/jit/emitriscv64.cpp
+++ b/src/coreclr/jit/emitriscv64.cpp
@@ -625,7 +625,8 @@ void emitter::emitIns_R_R(
         assert(isGeneralRegisterOrR0(reg2));
         code |= (reg1 & 0x1f) << 7;
         code |= reg2 << 15;
-        code |= 0x7 << 12;
+        if (INS_fcvt_d_w != ins && INS_fcvt_d_wu != ins) // fcvt.d.w[u] always produces an exact result
+            code |= 0x7 << 12;                           // round according to frm status register
     }
     else if (INS_fcvt_s_d == ins || INS_fcvt_d_s == ins)
     {
@@ -633,7 +634,8 @@ void emitter::emitIns_R_R(
         assert(isFloatReg(reg2));
         code |= (reg1 & 0x1f) << 7;
         code |= (reg2 & 0x1f) << 15;
-        code |= 0x7 << 12;
+        if (INS_fcvt_d_s != ins) // fcvt.d.s never rounds
+            code |= 0x7 << 12;   // round according to frm status register
     }
     else
     {
@@ -691,7 +693,7 @@ void emitter::emitIns_R_R_I(
         code |= ((imm >> 5) & 0x3f) << 25;
         code |= ((imm >> 12) & 0x1) << 31;
         // TODO-RISCV64: Move jump logic to emitIns_J
-        id->idAddr()->iiaSetInstrCount(imm / sizeof(code_t));
+        id->idAddr()->iiaSetInstrCount(static_cast<int>(imm / sizeof(code_t)));
     }
     else if (ins == INS_csrrs || ins == INS_csrrw || ins == INS_csrrc)
     {
@@ -986,9 +988,9 @@ void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNu
 }
 
 // This computes address from the immediate which is relocatable.
-void emitter::emitIns_R_AI(instruction ins,
-                           emitAttr    attr,
-                           regNumber   reg,
+void emitter::emitIns_R_AI(instruction  ins,
+                           emitAttr     attr,
+                           regNumber    reg,
                            ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
 {
     assert(EA_IS_RELOC(attr)); // EA_PTR_DSP_RELOC
@@ -1288,8 +1290,8 @@ void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm)
 void emitter::emitIns_Call(EmitCallType          callType,
                            CORINFO_METHOD_HANDLE methHnd,
                            INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
-                           void*    addr,
-                           ssize_t  argSize,
+                           void*            addr,
+                           ssize_t          argSize,
                            emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
                            VARSET_VALARG_TP ptrVars,
                            regMaskTP        gcrefRegs,
@@ -1758,9 +1760,9 @@ void emitter::emitJumpDistBind()
         emitCounts_INS_OPTS_J * (6 << 2); // the max placeholder sizeof(INS_OPTS_JALR) - sizeof(INS_OPTS_J)
     NATIVE_OFFSET psd = B_DIST_SMALL_MAX_POS - maxPlaceholderSize;
 
-/*****************************************************************************/
-/* If the default small encoding is not enough, we start again here.     */
-/*****************************************************************************/
+    /*****************************************************************************/
+    /* If the default small encoding is not enough, we start again here.     */
+    /*****************************************************************************/
 
 AGAIN:
 
@@ -1791,7 +1793,7 @@ void emitter::emitJumpDistBind()
         UNATIVE_OFFSET dstOffs;
         NATIVE_OFFSET  jmpDist; // the relative jump distance, as it will be encoded
 
-/* Make sure the jumps are properly ordered */
+        /* Make sure the jumps are properly ordered */
 
 #ifdef DEBUG
         assert(lastSJ == nullptr || lastIG != jmp->idjIG || lastSJ->idjOffs < (jmp->idjOffs + adjSJ));
@@ -1845,7 +1847,6 @@ void emitter::emitJumpDistBind()
         jmp->idjOffs += adjSJ;
 
         // If this is a jump via register, the instruction size does not change, so we are done.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
         /* Have we bound this jump's target already? */
 
@@ -1866,7 +1867,6 @@ void emitter::emitJumpDistBind()
         else
         {
             /* First time we've seen this label, convert its target */
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
             tgtIG = (insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel);
 
@@ -1946,8 +1946,8 @@ void emitter::emitJumpDistBind()
                 instruction ins = jmp->idIns();
                 assert((INS_jal <= ins) && (ins <= INS_bgeu));
 
-                if (ins > INS_jalr ||
-                    (ins < INS_jalr && ins > INS_j)) // jal < beqz < bnez < jalr < beq/bne/blt/bltu/bge/bgeu
+                if (ins > INS_jalr || (ins < INS_jalr && ins > INS_j)) // jal < beqz < bnez < jalr <
+                                                                       // beq/bne/blt/bltu/bge/bgeu
                 {
                     if (isValidSimm13(jmpDist + maxPlaceholderSize))
                     {
@@ -2020,8 +2020,8 @@ void emitter::emitJumpDistBind()
                 instruction ins = jmp->idIns();
                 assert((INS_jal <= ins) && (ins <= INS_bgeu));
 
-                if (ins > INS_jalr ||
-                    (ins < INS_jalr && ins > INS_j)) // jal < beqz < bnez < jalr < beq/bne/blt/bltu/bge/bgeu
+                if (ins > INS_jalr || (ins < INS_jalr && ins > INS_j)) // jal < beqz < bnez < jalr <
+                                                                       // beq/bne/blt/bltu/bge/bgeu
                 {
                     if (isValidSimm13(jmpDist + maxPlaceholderSize))
                     {
@@ -2118,12 +2118,12 @@ void emitter::emitJumpDistBind()
 unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code) const
 {
     assert(dst != nullptr);
-    assert(sizeof(code_t) == 4);
-    memcpy(dst + writeableOffset, &code, sizeof(code_t));
+    static_assert(sizeof(code_t) == 4, "code_t must be 4 bytes");
+    memcpy(dst + writeableOffset, &code, sizeof(code));
     return sizeof(code_t);
 }
 
-static inline void assertCodeLength(unsigned code, uint8_t size)
+static inline void assertCodeLength(size_t code, uint8_t size)
 {
     assert((code >> size) == 0);
 }
@@ -2298,7 +2298,9 @@ static inline void assertCodeLength(unsigned code, uint8_t size)
 
 static constexpr unsigned kInstructionOpcodeMask = 0x7f;
 static constexpr unsigned kInstructionFunct3Mask = 0x7000;
+static constexpr unsigned kInstructionFunct5Mask = 0xf8000000;
 static constexpr unsigned kInstructionFunct7Mask = 0xfe000000;
+static constexpr unsigned kInstructionFunct2Mask = 0x06000000;
 
 #ifdef DEBUG
 
@@ -2338,34 +2340,44 @@ static constexpr unsigned kInstructionFunct7Mask = 0xfe000000;
             assert(isGeneralRegisterOrR0(rs1));
             assert(isGeneralRegisterOrR0(rs2));
             break;
-        case INS_fadd_s:
-        case INS_fsub_s:
-        case INS_fmul_s:
-        case INS_fdiv_s:
         case INS_fsgnj_s:
         case INS_fsgnjn_s:
         case INS_fsgnjx_s:
         case INS_fmin_s:
         case INS_fmax_s:
-        case INS_feq_s:
-        case INS_flt_s:
-        case INS_fle_s:
-        case INS_fadd_d:
-        case INS_fsub_d:
-        case INS_fmul_d:
-        case INS_fdiv_d:
         case INS_fsgnj_d:
         case INS_fsgnjn_d:
         case INS_fsgnjx_d:
         case INS_fmin_d:
         case INS_fmax_d:
+            assert(isFloatReg(rd));
+            assert(isFloatReg(rs1));
+            assert(isFloatReg(rs2));
+            break;
+        case INS_feq_s:
         case INS_feq_d:
         case INS_flt_d:
+        case INS_flt_s:
+        case INS_fle_s:
         case INS_fle_d:
-            assert(isFloatReg(rd));
+            assert(isGeneralRegisterOrR0(rd));
             assert(isFloatReg(rs1));
             assert(isFloatReg(rs2));
             break;
+        case INS_fmv_w_x:
+        case INS_fmv_d_x:
+            assert(isFloatReg(rd));
+            assert(isGeneralRegisterOrR0(rs1));
+            assert(rs2 == 0);
+            break;
+        case INS_fmv_x_d:
+        case INS_fmv_x_w:
+        case INS_fclass_s:
+        case INS_fclass_d:
+            assert(isGeneralRegisterOrR0(rd));
+            assert(isFloatReg(rs1));
+            assert(rs2 == 0);
+            break;
         default:
             NO_WAY("Illegal ins within emitOutput_RTypeInstr!");
             break;
@@ -2377,6 +2389,7 @@ static constexpr unsigned kInstructionFunct7Mask = 0xfe000000;
 {
     switch (ins)
     {
+        case INS_mov:
         case INS_jalr:
         case INS_lb:
         case INS_lh:
@@ -2392,7 +2405,6 @@ static constexpr unsigned kInstructionFunct7Mask = 0xfe000000;
         case INS_lwu:
         case INS_ld:
         case INS_addiw:
-        case INS_fence_i:
         case INS_csrrw:
         case INS_csrrs:
         case INS_csrrc:
@@ -2427,6 +2439,15 @@ static constexpr unsigned kInstructionFunct7Mask = 0xfe000000;
             assert(rs1 < 32);
             assert((opcode & kInstructionFunct7Mask) == 0);
             break;
+        case INS_fence:
+        {
+            assert(rd == REG_ZERO);
+            assert(rs1 == REG_ZERO);
+            ssize_t format = immediate >> 8;
+            assert((format == 0) || (format == 0x8));
+            assert((opcode & kInstructionFunct7Mask) == 0);
+        }
+        break;
         default:
             NO_WAY("Illegal ins within emitOutput_ITypeInstr!");
             break;
@@ -2725,48 +2746,48 @@ ssize_t emitter::emitOutputInstrJumpDistance(const BYTE* src, const insGroup* ig
     return distVal;
 }
 
-static constexpr size_t NBitMask(uint8_t bits)
+static inline constexpr unsigned WordMask(uint8_t bits)
 {
-    return (static_cast<size_t>(1) << bits) - 1;
+    return static_cast<unsigned>((1ull << bits) - 1);
 }
 
 template <uint8_t MaskSize>
-static ssize_t LowerNBitsOfWord(ssize_t word)
+static unsigned LowerNBitsOfWord(ssize_t word)
 {
     static_assert(MaskSize < 32, "Given mask size is bigger than the word itself");
     static_assert(MaskSize > 0, "Given mask size cannot be zero");
 
-    static constexpr size_t kMask = NBitMask(MaskSize);
+    static constexpr unsigned kMask = WordMask(MaskSize);
 
-    return word & kMask;
+    return static_cast<unsigned>(word & kMask);
 }
 
 template <uint8_t MaskSize>
-static ssize_t UpperNBitsOfWord(ssize_t word)
+static unsigned UpperNBitsOfWord(ssize_t word)
 {
-    static constexpr size_t kShift = 32 - MaskSize;
+    static constexpr unsigned kShift = 32 - MaskSize;
 
     return LowerNBitsOfWord<MaskSize>(word >> kShift);
 }
 
 template <uint8_t MaskSize>
-static ssize_t UpperNBitsOfWordSignExtend(ssize_t word)
+static unsigned UpperNBitsOfWordSignExtend(ssize_t word)
 {
     static constexpr unsigned kSignExtend = 1 << (31 - MaskSize);
 
     return UpperNBitsOfWord<MaskSize>(word + kSignExtend);
 }
 
-static ssize_t UpperWordOfDoubleWord(ssize_t immediate)
+static unsigned UpperWordOfDoubleWord(ssize_t immediate)
 {
-    return immediate >> 32;
+    return static_cast<unsigned>(immediate >> 32);
 }
 
-static ssize_t LowerWordOfDoubleWord(ssize_t immediate)
+static unsigned LowerWordOfDoubleWord(ssize_t immediate)
 {
-    static constexpr size_t kWordMask = NBitMask(32);
+    static constexpr size_t kWordMask = WordMask(32);
 
-    return immediate & kWordMask;
+    return static_cast<unsigned>(immediate & kWordMask);
 }
 
 template <uint8_t UpperMaskSize, uint8_t LowerMaskSize>
@@ -2792,28 +2813,28 @@ static ssize_t UpperWordOfDoubleWordDoubleSignExtend(ssize_t doubleWord)
     return UpperWordOfDoubleWord(DoubleWordSignExtend<UpperMaskSize, LowerMaskSize>(doubleWord));
 }
 
-/*static*/ unsigned emitter::TrimSignedToImm12(int imm12)
+/*static*/ unsigned emitter::TrimSignedToImm12(ssize_t imm12)
 {
     assert(isValidSimm12(imm12));
 
     return static_cast<unsigned>(LowerNBitsOfWord<12>(imm12));
 }
 
-/*static*/ unsigned emitter::TrimSignedToImm13(int imm13)
+/*static*/ unsigned emitter::TrimSignedToImm13(ssize_t imm13)
 {
     assert(isValidSimm13(imm13));
 
     return static_cast<unsigned>(LowerNBitsOfWord<13>(imm13));
 }
 
-/*static*/ unsigned emitter::TrimSignedToImm20(int imm20)
+/*static*/ unsigned emitter::TrimSignedToImm20(ssize_t imm20)
 {
     assert(isValidSimm20(imm20));
 
     return static_cast<unsigned>(LowerNBitsOfWord<20>(imm20));
 }
 
-/*static*/ unsigned emitter::TrimSignedToImm21(int imm21)
+/*static*/ unsigned emitter::TrimSignedToImm21(ssize_t imm21)
 {
     assert(isValidSimm21(imm21));
 
@@ -2867,8 +2888,8 @@ BYTE* emitter::emitOutputInstr_OptsI8(BYTE* dst, const instrDesc* id, ssize_t im
     if (id->idReg2())
     {
         // special for INT64_MAX or UINT32_MAX
-        dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, REG_R0, 0xfff);
-        const ssize_t shiftValue = (immediate == INT64_MAX) ? 1 : 32;
+        dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, REG_R0, WordMask(12));
+        const unsigned shiftValue = (immediate == INT64_MAX) ? 1 : 32;
         dst += emitOutput_ITypeInstr(dst, INS_srli, reg1, reg1, shiftValue);
     }
     else
@@ -2881,10 +2902,10 @@ BYTE* emitter::emitOutputInstr_OptsI8(BYTE* dst, const instrDesc* id, ssize_t im
 
 BYTE* emitter::emitOutputInstr_OptsI32(BYTE* dst, ssize_t immediate, regNumber reg1)
 {
-    ssize_t upperWord = UpperWordOfDoubleWord(immediate);
+    const unsigned upperWord = UpperWordOfDoubleWord(immediate);
     dst += emitOutput_UTypeInstr(dst, INS_lui, reg1, UpperNBitsOfWordSignExtend<20>(upperWord));
     dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<12>(upperWord));
-    ssize_t lowerWord = LowerWordOfDoubleWord(immediate);
+    const unsigned lowerWord = LowerWordOfDoubleWord(immediate);
     dst += emitOutput_ITypeInstr(dst, INS_slli, reg1, reg1, 11);
     dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<11>(lowerWord >> 21));
     dst += emitOutput_ITypeInstr(dst, INS_slli, reg1, reg1, 11);
@@ -2899,39 +2920,37 @@ BYTE* emitter::emitOutputInstr_OptsRc(BYTE* dst, const instrDesc* id, instructio
     assert(id->idAddr()->iiaIsJitDataOffset());
     assert(id->idGCref() == GCT_NONE);
 
-    int dataOffs = id->idAddr()->iiaGetJitDataOffset();
+    const int dataOffs = id->idAddr()->iiaGetJitDataOffset();
     assert(dataOffs >= 0);
 
-    ssize_t immediate = emitGetInsSC(id);
+    const ssize_t immediate = emitGetInsSC(id);
     assert((immediate >= 0) && (immediate < 0x4000)); // 0x4000 is arbitrary, currently 'imm' is always 0.
 
-    unsigned offset = static_cast<unsigned>(dataOffs + immediate);
+    const unsigned offset = static_cast<unsigned>(dataOffs + immediate);
     assert(offset < emitDataSize());
 
-    *ins           = id->idIns();
-    regNumber reg1 = id->idReg1();
+    *ins                 = id->idIns();
+    const regNumber reg1 = id->idReg1();
 
     if (id->idIsReloc())
     {
-        return emitOutputInstr_OptsRcReloc(dst, ins, reg1);
+        return emitOutputInstr_OptsRcReloc(dst, ins, offset, reg1);
     }
     return emitOutputInstr_OptsRcNoReloc(dst, ins, offset, reg1);
 }
 
-BYTE* emitter::emitOutputInstr_OptsRcReloc(BYTE* dst, instruction* ins, regNumber reg1)
+BYTE* emitter::emitOutputInstr_OptsRcReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1)
 {
-    ssize_t immediate = emitConsBlock - dst;
-    assert(immediate > 0);
-    assert((immediate & 0x03) == 0);
+    const ssize_t immediate = (emitConsBlock - dst) + offset;
+    assert((immediate > 0) && ((immediate & 0x03) == 0));
 
-    regNumber rsvdReg = codeGen->rsGetRsvdReg();
+    const regNumber rsvdReg = codeGen->rsGetRsvdReg();
     dst += emitOutput_UTypeInstr(dst, INS_auipc, rsvdReg, UpperNBitsOfWordSignExtend<20>(immediate));
 
     instruction lastIns = *ins;
 
     if (*ins == INS_jal)
     {
-        assert(isGeneralRegister(reg1));
         *ins = lastIns = INS_addi;
     }
     dst += emitOutput_ITypeInstr(dst, lastIns, reg1, rsvdReg, LowerNBitsOfWord<12>(immediate));
@@ -2940,12 +2959,12 @@ BYTE* emitter::emitOutputInstr_OptsRcReloc(BYTE* dst, instruction* ins, regNumbe
 
 BYTE* emitter::emitOutputInstr_OptsRcNoReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1)
 {
-    ssize_t immediate = reinterpret_cast<ssize_t>(emitConsBlock) + offset;
-    assert((immediate >> 40) == 0);
-    regNumber rsvdReg = codeGen->rsGetRsvdReg();
+    const ssize_t immediate = reinterpret_cast<ssize_t>(emitConsBlock) + offset;
+    assertCodeLength(static_cast<size_t>(immediate), 40);
+    const regNumber rsvdReg = codeGen->rsGetRsvdReg();
 
-    instruction lastIns = (*ins == INS_jal) ? (*ins = INS_addi) : *ins;
-    UINT32      high = immediate >> 11;
+    const instruction lastIns = (*ins == INS_jal) ? (*ins = INS_addi) : *ins;
+    const ssize_t     high    = immediate >> 11;
 
     dst += emitOutput_UTypeInstr(dst, INS_lui, rsvdReg, UpperNBitsOfWordSignExtend<20>(high));
     dst += emitOutput_ITypeInstr(dst, INS_addi, rsvdReg, rsvdReg, LowerNBitsOfWord<12>(high));
@@ -2959,9 +2978,8 @@ BYTE* emitter::emitOutputInstr_OptsRl(BYTE* dst, instrDesc* id, instruction* ins
     insGroup* targetInsGroup = static_cast<insGroup*>(emitCodeGetCookie(id->idAddr()->iiaBBlabel));
     id->idAddr()->iiaIGlabel = targetInsGroup;
 
-    regNumber reg1 = id->idReg1();
-    assert(isGeneralRegister(reg1));
-    ssize_t igOffs = targetInsGroup->igOffs;
+    const regNumber reg1   = id->idReg1();
+    const ssize_t   igOffs = targetInsGroup->igOffs;
 
     if (id->idIsReloc())
     {
@@ -2974,7 +2992,7 @@ BYTE* emitter::emitOutputInstr_OptsRl(BYTE* dst, instrDesc* id, instruction* ins
 
 BYTE* emitter::emitOutputInstr_OptsRlReloc(BYTE* dst, ssize_t igOffs, regNumber reg1)
 {
-    ssize_t immediate = (emitCodeBlock - dst) + igOffs;
+    const ssize_t immediate = (emitCodeBlock - dst) + igOffs;
     assert((immediate & 0x03) == 0);
 
     dst += emitOutput_UTypeInstr(dst, INS_auipc, reg1, UpperNBitsOfWordSignExtend<20>(immediate));
@@ -2984,11 +3002,11 @@ BYTE* emitter::emitOutputInstr_OptsRlReloc(BYTE* dst, ssize_t igOffs, regNumber
 
 BYTE* emitter::emitOutputInstr_OptsRlNoReloc(BYTE* dst, ssize_t igOffs, regNumber reg1)
 {
-    ssize_t immediate = reinterpret_cast<ssize_t>(emitCodeBlock) + igOffs;
-    assert((immediate >> (32 + 20)) == 0);
+    const ssize_t immediate = reinterpret_cast<ssize_t>(emitCodeBlock) + igOffs;
+    assertCodeLength(static_cast<size_t>(immediate), 32 + 20);
 
-    regNumber rsvdReg      = codeGen->rsGetRsvdReg();
-    ssize_t   upperSignExt = UpperWordOfDoubleWordDoubleSignExtend<32, 52>(immediate);
+    const regNumber rsvdReg      = codeGen->rsGetRsvdReg();
+    const ssize_t   upperSignExt = UpperWordOfDoubleWordDoubleSignExtend<32, 52>(immediate);
 
     dst += emitOutput_UTypeInstr(dst, INS_lui, rsvdReg, UpperNBitsOfWordSignExtend<20>(immediate));
     dst += emitOutput_ITypeInstr(dst, INS_addi, rsvdReg, rsvdReg, LowerNBitsOfWord<12>(immediate));
@@ -3000,32 +3018,32 @@ BYTE* emitter::emitOutputInstr_OptsRlNoReloc(BYTE* dst, ssize_t igOffs, regNumbe
 
 BYTE* emitter::emitOutputInstr_OptsJalr(BYTE* dst, instrDescJmp* jmp, const insGroup* ig, instruction* ins)
 {
-    ssize_t immediate = emitOutputInstrJumpDistance(dst, ig, jmp) - 4;
+    const ssize_t immediate = emitOutputInstrJumpDistance(dst, ig, jmp) - 4;
     assert((immediate & 0x03) == 0);
 
     *ins = jmp->idIns();
-    assert(jmp->idCodeSize() > 4); // The original INS_OPTS_JALR: not used by now!!!
     switch (jmp->idCodeSize())
     {
         case 8:
-            return emitOutputInstr_OptsJalr8(dst, jmp, *ins, immediate);
+            return emitOutputInstr_OptsJalr8(dst, jmp, immediate);
         case 24:
-            assert((*ins == INS_jal) || (*ins == INS_j));
+            assert(jmp->idInsIs(INS_jal, INS_j));
             return emitOutputInstr_OptsJalr24(dst, immediate);
         case 28:
-            return emitOutputInstr_OptsJalr28(dst, jmp, *ins, immediate);
+            return emitOutputInstr_OptsJalr28(dst, jmp, immediate);
         default:
+            // case 0 - 4: The original INS_OPTS_JALR: not used by now!!!
             break;
     }
     unreached();
     return nullptr;
 }
 
-BYTE* emitter::emitOutputInstr_OptsJalr8(BYTE* dst, const instrDescJmp* jmp, instruction ins, ssize_t immediate)
+BYTE* emitter::emitOutputInstr_OptsJalr8(BYTE* dst, const instrDescJmp* jmp, ssize_t immediate)
 {
-    regNumber reg2 = ((ins != INS_beqz) && (ins != INS_bnez)) ? jmp->idReg2() : REG_R0;
+    const regNumber reg2 = jmp->idInsIs(INS_beqz, INS_bnez) ? REG_R0 : jmp->idReg2();
 
-    dst += emitOutput_BTypeInstr_InvertComparation(dst, ins, jmp->idReg1(), reg2, 0x8);
+    dst += emitOutput_BTypeInstr_InvertComparation(dst, jmp->idIns(), jmp->idReg1(), reg2, 0x8);
     dst += emitOutput_JTypeInstr(dst, INS_jal, REG_ZERO, TrimSignedToImm21(immediate));
     return dst;
 }
@@ -3034,14 +3052,14 @@ BYTE* emitter::emitOutputInstr_OptsJalr24(BYTE* dst, ssize_t immediate)
 {
     // Make target address with offset, then jump (JALR) with the target address
     immediate -= 2 * 4;
-    ssize_t high = UpperWordOfDoubleWordSingleSignExtend<0>(immediate);
+    const ssize_t high = UpperWordOfDoubleWordSingleSignExtend<0>(immediate);
 
     dst += emitOutput_UTypeInstr(dst, INS_lui, REG_RA, UpperNBitsOfWordSignExtend<20>(high));
     dst += emitOutput_ITypeInstr(dst, INS_addi, REG_RA, REG_RA, LowerNBitsOfWord<12>(high));
     dst += emitOutput_ITypeInstr(dst, INS_slli, REG_RA, REG_RA, 32);
 
-    regNumber rsvdReg = codeGen->rsGetRsvdReg();
-    ssize_t   low     = LowerWordOfDoubleWord(immediate);
+    const regNumber rsvdReg = codeGen->rsGetRsvdReg();
+    const ssize_t   low     = LowerWordOfDoubleWord(immediate);
 
     dst += emitOutput_UTypeInstr(dst, INS_auipc, rsvdReg, UpperNBitsOfWordSignExtend<20>(low));
     dst += emitOutput_RTypeInstr(dst, INS_add, rsvdReg, REG_RA, rsvdReg);
@@ -3050,17 +3068,18 @@ BYTE* emitter::emitOutputInstr_OptsJalr24(BYTE* dst, ssize_t immediate)
     return dst;
 }
 
-BYTE* emitter::emitOutputInstr_OptsJalr28(BYTE* dst, const instrDescJmp* jmp, instruction ins, ssize_t immediate)
+BYTE* emitter::emitOutputInstr_OptsJalr28(BYTE* dst, const instrDescJmp* jmp, ssize_t immediate)
 {
-    regNumber reg2 = ((ins != INS_beqz) && (ins != INS_bnez)) ? jmp->idReg2() : REG_R0;
-    dst += emitOutput_BTypeInstr_InvertComparation(dst, ins, jmp->idReg1(), reg2, 0x1c);
+    regNumber reg2 = jmp->idInsIs(INS_beqz, INS_bnez) ? REG_R0 : jmp->idReg2();
+
+    dst += emitOutput_BTypeInstr_InvertComparation(dst, jmp->idIns(), jmp->idReg1(), reg2, 0x1c);
 
     return emitOutputInstr_OptsJalr24(dst, immediate);
 }
 
 BYTE* emitter::emitOutputInstr_OptsJCond(BYTE* dst, instrDesc* id, const insGroup* ig, instruction* ins)
 {
-    ssize_t immediate = emitOutputInstrJumpDistance(dst, ig, static_cast<instrDescJmp*>(id));
+    const ssize_t immediate = emitOutputInstrJumpDistance(dst, ig, static_cast<instrDescJmp*>(id));
 
     *ins = id->idIns();
 
@@ -3070,7 +3089,7 @@ BYTE* emitter::emitOutputInstr_OptsJCond(BYTE* dst, instrDesc* id, const insGrou
 
 BYTE* emitter::emitOutputInstr_OptsJ(BYTE* dst, instrDesc* id, const insGroup* ig, instruction* ins)
 {
-    ssize_t immediate = emitOutputInstrJumpDistance(dst, ig, static_cast<instrDescJmp*>(id));
+    const ssize_t immediate = emitOutputInstrJumpDistance(dst, ig, static_cast<instrDescJmp*>(id));
     assert((immediate & 0x03) == 0);
 
     *ins = id->idIns();
@@ -3133,11 +3152,12 @@ BYTE* emitter::emitOutputInstr_OptsC(BYTE* dst, instrDesc* id, const insGroup* i
 size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 {
     BYTE*             dst  = *dp;
+    BYTE*             dst2 = dst + 4;
     const BYTE* const odst = *dp;
     instruction       ins;
     size_t            sz = 0;
 
-    assert(REG_NA == static_cast<int>(REG_NA));
+    static_assert(REG_NA == static_cast<int>(REG_NA), "REG_NA must fit in an int");
 
     insOpts insOp = id->idInsOpt();
 
@@ -3174,8 +3194,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             sz  = sizeof(instrDescJmp);
             break;
         case INS_OPTS_C:
-            dst = emitOutputInstr_OptsC(dst, id, ig, &sz);
-            ins = INS_nop;
+            dst  = emitOutputInstr_OptsC(dst, id, ig, &sz);
+            dst2 = dst;
+            ins  = INS_nop;
             break;
         default: // case INS_OPTS_NONE:
             dst += emitOutput_Instr(dst, id->idAddr()->iiaGetInstrEncode());
@@ -3193,11 +3214,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         // We assume that "idReg1" is the primary destination register for all instructions
         if (id->idGCref() != GCT_NONE)
         {
-            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst2);
         }
         else
         {
-            emitGCregDeadUpd(id->idReg1(), dst);
+            emitGCregDeadUpd(id->idReg1(), dst2);
         }
     }
 
@@ -3211,7 +3232,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         int      adr = emitComp->lvaFrameAddress(varNum, &FPbased);
         if (id->idGCref() != GCT_NONE)
         {
-            emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst DEBUG_ARG(varNum));
+            emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst2 DEBUG_ARG(varNum));
         }
         else
         {
@@ -3228,7 +3249,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                 vt              = tmpDsc->tdTempType();
             }
             if (vt == TYP_REF || vt == TYP_BYREF)
-                emitGCvarDeadUpd(adr + ofs, dst DEBUG_ARG(varNum));
+                emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum));
         }
         // if (emitInsWritesToLclVarStackLocPair(id))
         //{
diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h
index 688f9d1f757a..07e603a70afb 100644
--- a/src/coreclr/jit/emitriscv64.h
+++ b/src/coreclr/jit/emitriscv64.h
@@ -82,17 +82,17 @@ void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTr
 unsigned emitOutput_Instr(BYTE* dst, code_t code) const;
 
 ssize_t emitOutputInstrJumpDistance(const BYTE* src, const insGroup* ig, instrDescJmp* jmp);
-void emitOutputInstrJumpDistanceHelper(const insGroup* ig,
-                                       instrDescJmp*   jmp,
-                                       UNATIVE_OFFSET& dstOffs,
-                                       const BYTE*&    dstAddr) const;
+void    emitOutputInstrJumpDistanceHelper(const insGroup* ig,
+                                          instrDescJmp*   jmp,
+                                          UNATIVE_OFFSET& dstOffs,
+                                          const BYTE*&    dstAddr) const;
 
 // Method to do check if mov is redundant with respect to the last instruction.
 // If yes, the caller of this method can choose to omit current mov instruction.
 static bool IsMovInstruction(instruction ins);
-bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip);
-bool IsRedundantLdStr(
-    instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end.
+bool        IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip);
+bool        IsRedundantLdStr(
+           instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end.
 
 static code_t insEncodeRTypeInstr(
     unsigned opcode, unsigned rd, unsigned funct3, unsigned rs1, unsigned rs2, unsigned funct7);
@@ -128,23 +128,23 @@ BYTE* emitOutputInstr_OptsI(BYTE* dst, const instrDesc* id);
 BYTE* emitOutputInstr_OptsI8(BYTE* dst, const instrDesc* id, ssize_t immediate, regNumber reg1);
 BYTE* emitOutputInstr_OptsI32(BYTE* dst, ssize_t immediate, regNumber reg1);
 BYTE* emitOutputInstr_OptsRc(BYTE* dst, const instrDesc* id, instruction* ins);
-BYTE* emitOutputInstr_OptsRcReloc(BYTE* dst, instruction* ins, regNumber reg1);
+BYTE* emitOutputInstr_OptsRcReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1);
 BYTE* emitOutputInstr_OptsRcNoReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1);
 BYTE* emitOutputInstr_OptsRl(BYTE* dst, instrDesc* id, instruction* ins);
 BYTE* emitOutputInstr_OptsRlReloc(BYTE* dst, ssize_t igOffs, regNumber reg1);
 BYTE* emitOutputInstr_OptsRlNoReloc(BYTE* dst, ssize_t igOffs, regNumber reg1);
 BYTE* emitOutputInstr_OptsJalr(BYTE* dst, instrDescJmp* jmp, const insGroup* ig, instruction* ins);
-BYTE* emitOutputInstr_OptsJalr8(BYTE* dst, const instrDescJmp* jmp, instruction ins, ssize_t immediate);
+BYTE* emitOutputInstr_OptsJalr8(BYTE* dst, const instrDescJmp* jmp, ssize_t immediate);
 BYTE* emitOutputInstr_OptsJalr24(BYTE* dst, ssize_t immediate);
-BYTE* emitOutputInstr_OptsJalr28(BYTE* dst, const instrDescJmp* jmp, instruction ins, ssize_t immediate);
+BYTE* emitOutputInstr_OptsJalr28(BYTE* dst, const instrDescJmp* jmp, ssize_t immediate);
 BYTE* emitOutputInstr_OptsJCond(BYTE* dst, instrDesc* id, const insGroup* ig, instruction* ins);
 BYTE* emitOutputInstr_OptsJ(BYTE* dst, instrDesc* id, const insGroup* ig, instruction* ins);
 BYTE* emitOutputInstr_OptsC(BYTE* dst, instrDesc* id, const insGroup* ig, size_t* size);
 
-static unsigned TrimSignedToImm12(int imm12);
-static unsigned TrimSignedToImm13(int imm13);
-static unsigned TrimSignedToImm20(int imm20);
-static unsigned TrimSignedToImm21(int imm21);
+static unsigned TrimSignedToImm12(ssize_t imm12);
+static unsigned TrimSignedToImm13(ssize_t imm13);
+static unsigned TrimSignedToImm20(ssize_t imm20);
+static unsigned TrimSignedToImm21(ssize_t imm21);
 
 /************************************************************************/
 /*           Public inline informational methods                        */
@@ -293,9 +293,9 @@ void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
 
 void emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs);
 
-void emitIns_R_AI(instruction ins,
-                  emitAttr    attr,
-                  regNumber   reg,
+void emitIns_R_AI(instruction  ins,
+                  emitAttr     attr,
+                  regNumber    reg,
                   ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
 
 enum EmitCallType
@@ -324,8 +324,8 @@ enum EmitCallType
 void emitIns_Call(EmitCallType          callType,
                   CORINFO_METHOD_HANDLE methHnd,
                   INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
-                  void*    addr,
-                  ssize_t  argSize,
+                  void*            addr,
+                  ssize_t          argSize,
                   emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
                   VARSET_VALARG_TP ptrVars,
                   regMaskTP        gcrefRegs,
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index d5dc2fd9530a..6bf148cf2d88 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -1287,10 +1287,10 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
 #define DEFAULT_BYTE_EVEX_PREFIX 0x62F07C0800000000ULL
 
 #define DEFAULT_BYTE_EVEX_PREFIX_MASK 0xFFFFFFFF00000000ULL
-#define BBIT_IN_BYTE_EVEX_PREFIX 0x0000001000000000ULL
-#define LBIT_IN_BYTE_EVEX_PREFIX 0x0000002000000000ULL
+#define BBIT_IN_BYTE_EVEX_PREFIX      0x0000001000000000ULL
+#define LBIT_IN_BYTE_EVEX_PREFIX      0x0000002000000000ULL
 #define LPRIMEBIT_IN_BYTE_EVEX_PREFIX 0x0000004000000000ULL
-#define ZBIT_IN_BYTE_EVEX_PREFIX 0x0000008000000000ULL
+#define ZBIT_IN_BYTE_EVEX_PREFIX      0x0000008000000000ULL
 
 //------------------------------------------------------------------------
 // AddEvexPrefix: Add default EVEX prefix with only LL' bits set.
@@ -1460,9 +1460,9 @@ bool emitter::TakesVexPrefix(instruction ins) const
 //   01  - 66     (66 0F - packed double)
 //   10  - F3     (F3 0F - scalar float
 //   11  - F2     (F2 0F - scalar double)
-#define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL
+#define DEFAULT_3BYTE_VEX_PREFIX      0xC4E07800000000ULL
 #define DEFAULT_3BYTE_VEX_PREFIX_MASK 0xFFFFFF00000000ULL
-#define LBIT_IN_3BYTE_VEX_PREFIX 0x00000400000000ULL
+#define LBIT_IN_3BYTE_VEX_PREFIX      0x00000400000000ULL
 emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr attr)
 {
     // The 2-byte VEX encoding is preferred when possible, but actually emitting
@@ -1522,9 +1522,11 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
         switch (ins)
         {
             case INS_cvtss2si:
-            case INS_cvttss2si:
+            case INS_cvttss2si32:
+            case INS_cvttss2si64:
             case INS_cvtsd2si:
-            case INS_cvttsd2si:
+            case INS_cvttsd2si32:
+            case INS_cvttsd2si64:
             case INS_movd:
             case INS_movnti:
             case INS_andn:
@@ -1544,7 +1546,6 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
 #endif // TARGET_AMD64
             case INS_vcvtsd2usi:
             case INS_vcvtss2usi:
-            case INS_vcvttsd2usi:
             {
                 if (attr == EA_8BYTE)
                 {
@@ -2723,8 +2724,10 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
         case INS_blsmsk:
         case INS_blsr:
         case INS_bzhi:
-        case INS_cvttsd2si:
-        case INS_cvttss2si:
+        case INS_cvttsd2si32:
+        case INS_cvttsd2si64:
+        case INS_cvttss2si32:
+        case INS_cvttss2si64:
         case INS_cvtsd2si:
         case INS_cvtss2si:
         case INS_extractps:
@@ -2748,7 +2751,8 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
 #endif
         case INS_vcvtsd2usi:
         case INS_vcvtss2usi:
-        case INS_vcvttsd2usi:
+        case INS_vcvttsd2usi32:
+        case INS_vcvttsd2usi64:
         case INS_vcvttss2usi32:
         case INS_vcvttss2usi64:
         {
@@ -3597,7 +3601,7 @@ bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1
 #ifdef FEATURE_HW_INTRINSICS
         && (ins != INS_crc32)
 #endif
-            )
+    )
     {
         // reg1 must be a byte-able register
         if ((genRegMask(reg1) & RBM_BYTE_REGS) == 0)
@@ -3827,11 +3831,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSVCalcDisp(instrDesc* id, code_t code,
 
         /* Is this a stack parameter reference? */
 
-        if ((emitComp->lvaIsParameter(var)
-#if !defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI)
-             && !emitComp->lvaIsRegArgument(var)
-#endif // !TARGET_AMD64 || UNIX_AMD64_ABI
-                 ) ||
+        if ((emitComp->lvaIsParameter(var) && !emitComp->lvaParamHasLocalStackSpace(var)) ||
             (static_cast<unsigned>(var) == emitComp->lvaRetAddrVar))
         {
             /* If no EBP frame, arguments and ret addr are off of ESP, above temps */
@@ -3863,7 +3863,6 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSVCalcDisp(instrDesc* id, code_t code,
 #endif
                 {
                     // Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes
-                    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef UNIX_AMD64_ABI
                     const LclVarDsc* varDsc         = emitComp->lvaGetDesc(var);
@@ -4108,7 +4107,8 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
 
         assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE)                               // Only for x64
                || (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) || (attrSize == EA_64BYTE) // only for x64
-               || (ins == INS_movzx) || (ins == INS_movsx) || (ins == INS_cmpxchg)
+               || (ins == INS_movzx) || (ins == INS_movsx) ||
+               (ins == INS_cmpxchg)
                // The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded
                || isPrefetch(ins));
 
@@ -4145,7 +4145,6 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
         if (reg == REG_NA)
         {
             /* The address is of the form "[disp]" */
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef TARGET_X86
             // Special case: "mov eax, [disp]" and "mov [disp], eax" can use a smaller 1-byte encoding.
@@ -4489,9 +4488,9 @@ emitter::instrDesc* emitter::emitNewInstrAmdCns(emitAttr size, ssize_t dsp, int
 }
 
 /*****************************************************************************
-*
-*  Add a data16 instruction of the 1 byte.
-*/
+ *
+ *  Add a data16 instruction of the 1 byte.
+ */
 
 void emitter::emitIns_Data16()
 {
@@ -4539,7 +4538,8 @@ void emitter::emitIns(instruction ins)
             (ins == INS_cdq || ins == INS_int3 || ins == INS_lock || ins == INS_leave || ins == INS_movsb ||
              ins == INS_movsd || ins == INS_movsp || ins == INS_nop || ins == INS_r_movsb || ins == INS_r_movsd ||
              ins == INS_r_movsp || ins == INS_r_stosb || ins == INS_r_stosd || ins == INS_r_stosp || ins == INS_ret ||
-             ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp
+             ins == INS_sahf || ins == INS_stosb || ins == INS_stosd ||
+             ins == INS_stosp
              // These instructions take zero operands
              || ins == INS_vzeroupper || ins == INS_lfence || ins == INS_mfence || ins == INS_sfence ||
              ins == INS_pause || ins == INS_serialize);
@@ -4849,7 +4849,7 @@ void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, G
 
     GenTree* addr = mem->Addr();
 
-    if (addr->OperIs(GT_LCL_ADDR))
+    if (addr->isContained() && addr->OperIs(GT_LCL_ADDR))
     {
         GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
         unsigned             offset  = varNode->GetLclOffs();
@@ -4899,7 +4899,7 @@ void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* m
         data = data->gtGetOp1();
     }
 
-    if (addr->OperIs(GT_LCL_ADDR))
+    if (addr->isContained() && addr->OperIs(GT_LCL_ADDR))
     {
         GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
         unsigned             offset  = varNode->GetLclOffs();
@@ -5140,18 +5140,18 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
             switch (memBase->OperGet())
             {
                 case GT_LCL_ADDR:
-                {
-                    assert(memBase->isContained());
-                    varNum = memBase->AsLclFld()->GetLclNum();
-                    offset = memBase->AsLclFld()->GetLclOffs();
-
-                    // Ensure that all the GenTreeIndir values are set to their defaults.
-                    assert(!memIndir->HasIndex());
-                    assert(memIndir->Scale() == 1);
-                    assert(memIndir->Offset() == 0);
+                    if (memBase->isContained())
+                    {
+                        varNum = memBase->AsLclFld()->GetLclNum();
+                        offset = memBase->AsLclFld()->GetLclOffs();
 
-                    break;
-                }
+                        // Ensure that all the GenTreeIndir values are set to their defaults.
+                        assert(!memIndir->HasIndex());
+                        assert(memIndir->Scale() == 1);
+                        assert(memIndir->Offset() == 0);
+                        break;
+                    }
+                    FALLTHROUGH;
 
                 default: // Addressing mode [base + index * scale + offset]
                 {
@@ -6565,7 +6565,7 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN
  *  Add an instruction with two register operands.
  */
 
-void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2)
+void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts instOptions)
 {
     if (IsMovInstruction(ins))
     {
@@ -6587,6 +6587,13 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum
     id->idReg1(reg1);
     id->idReg2(reg2);
 
+    if ((instOptions & INS_OPTS_EVEX_b_MASK) != INS_OPTS_NONE)
+    {
+        // if EVEX.b needs to be set in this path, then it should be embedded rounding.
+        assert(UseEvexEncoding());
+        id->idSetEvexbContext(instOptions);
+    }
+
     UNATIVE_OFFSET sz = emitInsSizeRR(id);
     id->idCodeSize(sz);
 
@@ -6962,9 +6969,9 @@ void emitter::emitIns_R_R_C(instruction          ins,
 }
 
 /*****************************************************************************
-*
-*  Add an instruction with three register operands.
-*/
+ *
+ *  Add an instruction with three register operands.
+ */
 
 void emitter::emitIns_R_R_R(
     instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, insOpts instOptions)
@@ -7095,16 +7102,16 @@ void emitter::emitIns_R_R_C_I(
 }
 
 /**********************************************************************************
-* emitIns_R_R_R_I: Add an instruction with three register operands and an immediate.
-*
-* Arguments:
-*    ins       - the instruction to add
-*    attr      - the emitter attribute for instruction
-*    targetReg - the target (destination) register
-*    reg1      - the first source register
-*    reg2      - the second source register
-*    ival      - the immediate value
-*/
+ * emitIns_R_R_R_I: Add an instruction with three register operands and an immediate.
+ *
+ * Arguments:
+ *    ins       - the instruction to add
+ *    attr      - the emitter attribute for instruction
+ *    targetReg - the target (destination) register
+ *    reg1      - the first source register
+ *    reg2      - the second source register
+ *    ival      - the immediate value
+ */
 
 void emitter::emitIns_R_R_R_I(
     instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, int ival)
@@ -7738,9 +7745,9 @@ void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber reg, regNum
     emitIns_R_ARX(ins, attr, reg, base, REG_NA, 1, disp);
 }
 
-void emitter::emitIns_R_AI(instruction ins,
-                           emitAttr    attr,
-                           regNumber   ireg,
+void emitter::emitIns_R_AI(instruction  ins,
+                           emitAttr     attr,
+                           regNumber    ireg,
                            ssize_t disp DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
 {
     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
@@ -8545,20 +8552,32 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction          ins,
 //    op1Reg    -- The register of the first operand
 //    op2Reg    -- The register of the second operand
 //    op3Reg    -- The register of the second operand
+//    instOptions - The options that modify how the instruction is generated
 //
-void emitter::emitIns_SIMD_R_R_R_R(
-    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg)
+void emitter::emitIns_SIMD_R_R_R_R(instruction ins,
+                                   emitAttr    attr,
+                                   regNumber   targetReg,
+                                   regNumber   op1Reg,
+                                   regNumber   op2Reg,
+                                   regNumber   op3Reg,
+                                   insOpts     instOptions)
 {
     if (IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins))
     {
         assert(UseSimdEncoding());
 
+        if (instOptions != INS_OPTS_NONE)
+        {
+            // insOpts is currently available only in EVEX encoding.
+            assert(UseEvexEncoding());
+        }
+
         // Ensure we aren't overwriting op2 or op3
         assert((op2Reg != targetReg) || (op1Reg == targetReg));
         assert((op3Reg != targetReg) || (op1Reg == targetReg));
 
         emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
-        emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg);
+        emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg, instOptions);
     }
     else if (UseSimdEncoding())
     {
@@ -9687,7 +9706,7 @@ void emitter::emitIns_Call(EmitCallType          callType,
     if (m_debugInfoSize > 0)
     {
         INDEBUG(id->idDebugOnlyInfo()->idCallSig = sigInfo);
-        id->idDebugOnlyInfo()->idMemCookie       = (size_t)methHnd; // method token
+        id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
     }
 
 #ifdef LATE_DISASM
@@ -11586,22 +11605,20 @@ void emitter::emitDispIns(
                     break;
                 }
 
-                case INS_cvttsd2si:
+                case INS_cvttsd2si32:
+                case INS_cvttsd2si64:
                 case INS_cvtss2si:
                 case INS_cvtsd2si:
-                case INS_cvttss2si:
+                case INS_cvttss2si32:
+                case INS_cvttss2si64:
                 case INS_vcvtsd2usi:
                 case INS_vcvtss2usi:
-                case INS_vcvttsd2usi:
-                {
-                    printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
-                    break;
-                }
-
+                case INS_vcvttsd2usi32:
+                case INS_vcvttsd2usi64:
                 case INS_vcvttss2usi32:
                 case INS_vcvttss2usi64:
                 {
-                    printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_4BYTE));
+                    printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
                     break;
                 }
 
@@ -11659,6 +11676,7 @@ void emitter::emitDispIns(
                 default:
                 {
                     printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr));
+                    emitDispEmbRounding(id);
                     break;
                 }
             }
@@ -11678,7 +11696,7 @@ void emitter::emitDispIns(
 #ifdef TARGET_AMD64
                 || ins == INS_shrx || ins == INS_shlx || ins == INS_sarx
 #endif
-                )
+            )
             {
                 // BMI bextr,bzhi, shrx, shlx and sarx encode the reg2 in VEX.vvvv and reg3 in modRM,
                 // which is different from most of other instructions
@@ -12455,10 +12473,10 @@ BYTE* emitter::emitOutputAlign(insGroup* ig, instrDesc* id, BYTE* dst)
             assert(paddingToAdd == paddingNeeded);
         }
     }
-
-    emitComp->loopsAligned++;
 #endif
 
+    emitComp->Metrics.LoopsAligned++;
+
 #ifdef DEBUG
     // Under STRESS_EMITTER, if this is the 'align' before the 'jmp' instruction,
     // then add "int3" instruction. Since int3 takes 1 byte, we would only add
@@ -12979,9 +12997,9 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
                         dst += emitOutputWord(dst, code | 0x0500);
                     }
 #else  // TARGET_AMD64
-                    // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
-                    // This addr mode should never be used while generating relocatable ngen code nor if
-                    // the addr can be encoded as pc-relative address.
+       // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
+       // This addr mode should never be used while generating relocatable ngen code nor if
+       // the addr can be encoded as pc-relative address.
                     noway_assert(!emitComp->opts.compReloc);
                     noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32);
                     noway_assert((int)dsp == dsp);
@@ -13905,7 +13923,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
             case IF_SRW_CNS:
             case IF_SRW_RRD:
             case IF_SRW_RRW:
-            // += -= of a byref, no change
+                // += -= of a byref, no change
 
             case IF_SRW:
                 break;
@@ -15310,13 +15328,10 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
 
         if (id->idIsCnsReloc())
         {
-            if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI))
+            if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && id->idAddr()->iiaSecRel)
             {
-                if (id->idAddr()->iiaSecRel)
-                {
-                    // For section relative, the immediate offset is relocatable and hence need IMAGE_REL_SECREL
-                    emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_SECREL);
-                }
+                // For section relative, the immediate offset is relocatable and hence need IMAGE_REL_SECREL
+                emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_SECREL);
             }
             else
             {
@@ -15751,7 +15766,6 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
     if (dstOffs <= srcOffs)
     {
         // This is a backward jump - distance is known at this point
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if DEBUG_EMIT
         if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
@@ -16420,9 +16434,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             break;
         }
 
-        /********************************************************************/
-        /*                Simple constant, local label, method              */
-        /********************************************************************/
+            /********************************************************************/
+            /*                Simple constant, local label, method              */
+            /********************************************************************/
 
         case IF_CNS:
         {
@@ -16540,9 +16554,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 #ifdef TARGET_X86
                     dst += emitOutputWord(dst, code | 0x0500);
 #else  // TARGET_AMD64
-                    // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
-                    // This addr mode should never be used while generating relocatable ngen code nor if
-                    // the addr can be encoded as pc-relative address.
+       // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
+       // This addr mode should never be used while generating relocatable ngen code nor if
+       // the addr can be encoded as pc-relative address.
                     noway_assert(!emitComp->opts.compReloc);
                     noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32);
                     noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (ssize_t)addr);
@@ -16695,9 +16709,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             break;
         }
 
-        /********************************************************************/
-        /*                      One register operand                        */
-        /********************************************************************/
+            /********************************************************************/
+            /*                      One register operand                        */
+            /********************************************************************/
 
         case IF_RRD:
         case IF_RWR:
@@ -16708,9 +16722,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             break;
         }
 
-        /********************************************************************/
-        /*                 Register and register/constant                   */
-        /********************************************************************/
+            /********************************************************************/
+            /*                 Register and register/constant                   */
+            /********************************************************************/
 
         case IF_RRW_SHF:
         {
@@ -16935,9 +16949,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             break;
         }
 
-        /********************************************************************/
-        /*                      Address mode operand                        */
-        /********************************************************************/
+            /********************************************************************/
+            /*                      Address mode operand                        */
+            /********************************************************************/
 
         case IF_ARD:
         case IF_AWR:
@@ -17174,9 +17188,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             break;
         }
 
-        /********************************************************************/
-        /*                      Stack-based operand                         */
-        /********************************************************************/
+            /********************************************************************/
+            /*                      Stack-based operand                         */
+            /********************************************************************/
 
         case IF_SRD:
         case IF_SWR:
@@ -17186,7 +17200,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             if (ins == INS_pop)
             {
                 // The offset in "pop [ESP+xxx]" is relative to the new ESP value
-                CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !FEATURE_FIXED_OUT_ARGS
                 emitCurStackLvl -= sizeof(int);
@@ -17438,9 +17451,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             unreached();
         }
 
-        /********************************************************************/
-        /*                    Direct memory address                         */
-        /********************************************************************/
+            /********************************************************************/
+            /*                    Direct memory address                         */
+            /********************************************************************/
 
         case IF_MRD:
         case IF_MRW:
@@ -17740,9 +17753,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             unreached();
         }
 
-        /********************************************************************/
-        /*                            oops                                  */
-        /********************************************************************/
+            /********************************************************************/
+            /*                            oops                                  */
+            /********************************************************************/
 
         default:
 
@@ -17765,9 +17778,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 #if !FEATURE_FIXED_OUT_ARGS
     bool updateStackLevel = !emitIGisInProlog(ig) && !emitIGisInEpilog(ig);
 
-#if defined(FEATURE_EH_FUNCLETS)
     updateStackLevel = updateStackLevel && !emitIGisInFuncletProlog(ig) && !emitIGisInFuncletEpilog(ig);
-#endif // FEATURE_EH_FUNCLETS
 
     // Make sure we keep the current stack level up to date
     if (updateStackLevel)
@@ -18207,7 +18218,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
 #ifdef TARGET_AMD64
                          || ins == INS_movsxd
 #endif
-                         )
+                )
                 {
                     result.insLatency += PERFSCORE_LATENCY_2C;
                 }
@@ -19033,7 +19044,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             break;
         }
 
-        case INS_cvttsd2si:
+        case INS_cvttsd2si32:
+        case INS_cvttsd2si64:
         case INS_cvtsd2si:
         case INS_cvtsi2sd32:
         case INS_cvtsi2ss32:
@@ -19042,7 +19054,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
         case INS_vcvtsd2usi:
         case INS_vcvtusi2ss32:
         case INS_vcvtusi2ss64:
-        case INS_vcvttsd2usi:
+        case INS_vcvttsd2usi32:
+        case INS_vcvttsd2usi64:
         case INS_vcvttss2usi32:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency += PERFSCORE_LATENCY_7C;
@@ -19054,7 +19067,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             result.insLatency += PERFSCORE_LATENCY_5C;
             break;
 
-        case INS_cvttss2si:
+        case INS_cvttss2si32:
+        case INS_cvttss2si64:
         case INS_cvtss2si:
         case INS_vcvtss2usi:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h
index d842f91f06a5..e32cab66254f 100644
--- a/src/coreclr/jit/emitxarch.h
+++ b/src/coreclr/jit/emitxarch.h
@@ -93,7 +93,7 @@ code_t emitExtractEvexPrefix(instruction ins, code_t& code) const;
 
 unsigned insEncodeReg012(const instrDesc* id, regNumber reg, emitAttr size, code_t* code);
 unsigned insEncodeReg345(const instrDesc* id, regNumber reg, emitAttr size, code_t* code);
-code_t insEncodeReg3456(const instrDesc* id, regNumber reg, emitAttr size, code_t code);
+code_t   insEncodeReg3456(const instrDesc* id, regNumber reg, emitAttr size, code_t code);
 unsigned insEncodeRegSIB(const instrDesc* id, regNumber reg, code_t* code);
 
 code_t insEncodeMRreg(const instrDesc* id, code_t code);
@@ -116,11 +116,11 @@ static bool IsKInstruction(instruction ins);
 
 static regNumber getBmiRegNumber(instruction ins);
 static regNumber getSseShiftRegNumber(instruction ins);
-bool HasVexEncoding(instruction ins) const;
-bool HasEvexEncoding(instruction ins) const;
-bool IsVexEncodableInstruction(instruction ins) const;
-bool IsEvexEncodableInstruction(instruction ins) const;
-bool IsVexOrEvexEncodableInstruction(instruction ins) const;
+bool             HasVexEncoding(instruction ins) const;
+bool             HasEvexEncoding(instruction ins) const;
+bool             IsVexEncodableInstruction(instruction ins) const;
+bool             IsEvexEncodableInstruction(instruction ins) const;
+bool             IsVexOrEvexEncodableInstruction(instruction ins) const;
 
 code_t insEncodeMIreg(const instrDesc* id, regNumber reg, emitAttr size, code_t code);
 
@@ -130,15 +130,15 @@ code_t AddRexXPrefix(const instrDesc* id, code_t code);
 code_t AddRexBPrefix(const instrDesc* id, code_t code);
 code_t AddRexPrefix(instruction ins, code_t code);
 
-bool EncodedBySSE38orSSE3A(instruction ins) const;
-bool Is4ByteSSEInstruction(instruction ins) const;
+bool   EncodedBySSE38orSSE3A(instruction ins) const;
+bool   Is4ByteSSEInstruction(instruction ins) const;
 code_t AddEvexVPrimePrefix(code_t code);
 code_t AddEvexRPrimePrefix(code_t code);
 
 static bool IsMovInstruction(instruction ins);
-bool HasSideEffect(instruction ins, emitAttr size);
-bool IsRedundantMov(
-    instruction ins, insFormat fmt, emitAttr size, regNumber dst, regNumber src, bool canIgnoreSideEffects);
+bool        HasSideEffect(instruction ins, emitAttr size);
+bool        IsRedundantMov(
+           instruction ins, insFormat fmt, emitAttr size, regNumber dst, regNumber src, bool canIgnoreSideEffects);
 bool EmitMovsxAsCwde(instruction ins, emitAttr size, regNumber dst, regNumber src);
 
 bool IsRedundantStackMov(instruction ins, insFormat fmt, emitAttr size, regNumber ireg, int varx, int offs);
@@ -468,15 +468,25 @@ void SetContains256bitOrMoreAVX(bool value)
     contains256bitOrMoreAVXInstruction = value;
 }
 
-bool IsDstDstSrcAVXInstruction(instruction ins) const;
-bool IsDstSrcSrcAVXInstruction(instruction ins) const;
-bool IsThreeOperandAVXInstruction(instruction ins) const;
+bool containsCallNeedingVzeroupper = false;
+bool ContainsCallNeedingVzeroupper() const
+{
+    return containsCallNeedingVzeroupper;
+}
+void SetContainsCallNeedingVzeroupper(bool value)
+{
+    containsCallNeedingVzeroupper = value;
+}
+
+bool        IsDstDstSrcAVXInstruction(instruction ins) const;
+bool        IsDstSrcSrcAVXInstruction(instruction ins) const;
+bool        IsThreeOperandAVXInstruction(instruction ins) const;
 static bool HasRegularWideForm(instruction ins);
 static bool HasRegularWideImmediateForm(instruction ins);
 static bool DoesWriteZeroFlag(instruction ins);
 static bool DoesWriteSignFlag(instruction ins);
 static bool DoesResetOverflowAndCarryFlags(instruction ins);
-bool IsFlagsAlwaysModified(instrDesc* id);
+bool        IsFlagsAlwaysModified(instrDesc* id);
 static bool IsRexW0Instruction(instruction ins);
 static bool IsRexW1Instruction(instruction ins);
 static bool IsRexWXInstruction(instruction ins);
@@ -518,7 +528,7 @@ const char* emitZMMregName(unsigned reg) const;
 /************************************************************************/
 
 private:
-void emitSetAmdDisp(instrDescAmd* id, ssize_t dsp);
+void       emitSetAmdDisp(instrDescAmd* id, ssize_t dsp);
 instrDesc* emitNewInstrAmd(emitAttr attr, ssize_t dsp);
 instrDesc* emitNewInstrAmdCns(emitAttr attr, ssize_t dsp, int cns);
 
@@ -535,9 +545,9 @@ instrDesc* emitNewInstrCallInd(int              argCnt,
                                regMaskTP        byrefRegs,
                                emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
 
-void emitGetInsCns(const instrDesc* id, CnsVal* cv) const;
+void    emitGetInsCns(const instrDesc* id, CnsVal* cv) const;
 ssize_t emitGetInsAmdCns(const instrDesc* id, CnsVal* cv) const;
-void emitGetInsDcmCns(const instrDesc* id, CnsVal* cv) const;
+void    emitGetInsDcmCns(const instrDesc* id, CnsVal* cv) const;
 ssize_t emitGetInsAmdAny(const instrDesc* id) const;
 
 /************************************************************************/
@@ -570,10 +580,10 @@ size_t emitSizeOfInsDsc_NONE(instrDesc* id) const;
 size_t emitSizeOfInsDsc_SPEC(instrDesc* id) const;
 
 /*****************************************************************************
-*
-*  Convert between an index scale in bytes to a smaller encoding used for
-*  storage in instruction descriptors.
-*/
+ *
+ *  Convert between an index scale in bytes to a smaller encoding used for
+ *  storage in instruction descriptors.
+ */
 
 inline emitter::opSize emitEncodeScale(size_t scale)
 {
@@ -625,7 +635,7 @@ void emitIns_R_I(instruction ins,
 
 void emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regNumber srgReg, bool canSkip);
 
-void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2);
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts instOptions = INS_OPTS_NONE);
 
 void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival);
 
@@ -742,9 +752,9 @@ void emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp);
 
 void emitIns_R_AR(instruction ins, emitAttr attr, regNumber reg, regNumber base, int disp);
 
-void emitIns_R_AI(instruction ins,
-                  emitAttr    attr,
-                  regNumber   ireg,
+void emitIns_R_AI(instruction  ins,
+                  emitAttr     attr,
+                  regNumber    ireg,
                   ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
 
 void emitIns_AR_R(instruction ins, emitAttr attr, regNumber reg, regNumber base, cnsval_ssize_t disp);
@@ -829,8 +839,13 @@ void emitIns_SIMD_R_R_R_C(instruction          ins,
                           regNumber            op2Reg,
                           CORINFO_FIELD_HANDLE fldHnd,
                           int                  offs);
-void emitIns_SIMD_R_R_R_R(
-    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg);
+void emitIns_SIMD_R_R_R_R(instruction ins,
+                          emitAttr    attr,
+                          regNumber   targetReg,
+                          regNumber   op1Reg,
+                          regNumber   op2Reg,
+                          regNumber   op3Reg,
+                          insOpts     instOptions = INS_OPTS_NONE);
 void emitIns_SIMD_R_R_R_S(
     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs);
 
diff --git a/src/coreclr/jit/error.cpp b/src/coreclr/jit/error.cpp
index a45ad7c7df0e..5ae6cea056ef 100644
--- a/src/coreclr/jit/error.cpp
+++ b/src/coreclr/jit/error.cpp
@@ -250,7 +250,9 @@ void debugError(const char* msg, const char* file, unsigned line)
 }
 
 /*****************************************************************************/
-LogEnv::LogEnv(ICorJitInfo* aCompHnd) : compHnd(aCompHnd), compiler(nullptr)
+LogEnv::LogEnv(ICorJitInfo* aCompHnd)
+    : compHnd(aCompHnd)
+    , compiler(nullptr)
 {
 }
 
diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp
index 6f9eba855f83..37683b188c30 100644
--- a/src/coreclr/jit/fgbasic.cpp
+++ b/src/coreclr/jit/fgbasic.cpp
@@ -30,8 +30,6 @@ void Compiler::fgInit()
     fgRangeUsedInEdgeWeights = true;
     fgCalledCount            = BB_ZERO_WEIGHT;
 
-    fgReturnBlocksComputed = false;
-
     /* Initialize the basic block list */
 
     fgFirstBB          = nullptr;
@@ -41,16 +39,13 @@ void Compiler::fgInit()
     fgOSREntryBB       = nullptr;
     fgEntryBBExtraRefs = 0;
 
-#if defined(FEATURE_EH_FUNCLETS)
     fgFirstFuncletBB  = nullptr;
     fgFuncletsCreated = false;
-#endif // FEATURE_EH_FUNCLETS
 
     fgBBcount = 0;
 
 #ifdef DEBUG
-    fgBBcountAtCodegen = 0;
-    fgBBOrder          = nullptr;
+    fgBBOrder = nullptr;
 #endif // DEBUG
 
     fgMightHaveNaturalLoops = false;
@@ -116,9 +111,9 @@ void Compiler::fgInit()
 
     fgUsedSharedTemps = nullptr;
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
     ehMaxHndNestingCount = 0;
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
     /* Init the fgBigOffsetMorphingTemps to be BAD_VAR_NUM. */
     for (int i = 0; i < TYP_COUNT; i++)
@@ -207,17 +202,10 @@ bool Compiler::fgEnsureFirstBBisScratch()
 
     assert(fgFirstBBScratch == nullptr);
 
-    BasicBlock* block = BasicBlock::New(this, BBJ_ALWAYS, fgFirstBB);
-    block->SetFlags(BBF_NONE_QUIRK);
+    BasicBlock* block;
 
     if (fgFirstBB != nullptr)
     {
-        // If we have profile data the new block will inherit fgFirstBlock's weight
-        if (fgFirstBB->hasProfileWeight())
-        {
-            block->inheritWeight(fgFirstBB);
-        }
-
         // The first block has an implicit ref count which we must
         // remove. Note the ref count could be greater than one, if
         // the first block is not scratch and is targeted by a
@@ -225,14 +213,23 @@ bool Compiler::fgEnsureFirstBBisScratch()
         assert(fgFirstBB->bbRefs >= 1);
         fgFirstBB->bbRefs--;
 
+        block = BasicBlock::New(this);
+
+        // If we have profile data the new block will inherit fgFirstBlock's weight
+        if (fgFirstBB->hasProfileWeight())
+        {
+            block->inheritWeight(fgFirstBB);
+        }
+
         // The new scratch bb will fall through to the old first bb
         FlowEdge* const edge = fgAddRefPred(fgFirstBB, block);
-        edge->setLikelihood(1.0);
+        block->SetKindAndTargetEdge(BBJ_ALWAYS, edge);
         fgInsertBBbefore(fgFirstBB, block);
     }
     else
     {
         noway_assert(fgLastBB == nullptr);
+        block     = BasicBlock::New(this, BBJ_ALWAYS);
         fgFirstBB = block;
         fgLastBB  = block;
     }
@@ -358,7 +355,7 @@ void Compiler::fgConvertBBToThrowBB(BasicBlock* block)
     fgRemoveBlockAsPred(block);
 
     // Update jump kind after the scrub.
-    block->SetKindAndTarget(BBJ_THROW);
+    block->SetKindAndTargetEdge(BBJ_THROW);
     block->RemoveFlags(BBF_RETLESS_CALL); // no longer a BBJ_CALLFINALLY
 
     // Any block with a throw is rare
@@ -382,36 +379,26 @@ void Compiler::fgChangeSwitchBlock(BasicBlock* oldSwitchBlock, BasicBlock* newSw
     assert(fgPredsComputed);
 
     // Walk the switch's jump table, updating the predecessor for each branch.
-    for (BasicBlock* const bJump : oldSwitchBlock->SwitchTargets())
-    {
-        noway_assert(bJump != nullptr);
+    BBswtDesc* swtDesc = oldSwitchBlock->GetSwitchTargets();
 
-        // Note that if there are duplicate branch targets in the switch jump table,
-        // fgRemoveRefPred()/fgAddRefPred() will do the right thing: the second and
-        // subsequent duplicates will simply subtract from and add to the duplicate
-        // count (respectively).
-        //
-        // However this does the "wrong" thing with respect to edge profile
-        // data; the old edge is not returned by fgRemoveRefPred until it has
-        // a dup count of 0, and the fgAddRefPred only uses the optional
-        // old edge arg when the new edge is first created.
-        //
-        // Remove the old edge [oldSwitchBlock => bJump]
-        //
-        assert(bJump->countOfInEdges() > 0);
-        FlowEdge* const oldEdge = fgRemoveRefPred(bJump, oldSwitchBlock);
-
-        //
-        // Create the new edge [newSwitchBlock => bJump]
-        //
-        FlowEdge* const newEdge = fgAddRefPred(bJump, newSwitchBlock);
+    for (unsigned i = 0; i < swtDesc->bbsCount; i++)
+    {
+        FlowEdge* succEdge = swtDesc->bbsDstTab[i];
+        assert(succEdge != nullptr);
 
-        // Handle the profile update, once we get our hands on the old edge.
-        //
-        if (oldEdge != nullptr)
+        if (succEdge->getSourceBlock() != oldSwitchBlock)
+        {
+            // swtDesc can have duplicate targets, so we may have updated this edge already
+            //
+            assert(succEdge->getSourceBlock() == newSwitchBlock);
+            assert(succEdge->getDupCount() > 1);
+        }
+        else
         {
-            assert(!newEdge->hasLikelihood());
-            newEdge->setLikelihood(oldEdge->getLikelihood());
+            // Redirect edge's source block from oldSwitchBlock to newSwitchBlock,
+            // and keep successor block's pred list in order
+            //
+            fgReplacePred(succEdge, newSwitchBlock);
         }
     }
 
@@ -449,18 +436,12 @@ void Compiler::fgChangeEhfBlock(BasicBlock* oldBlock, BasicBlock* newBlock)
     assert(oldBlock->KindIs(BBJ_EHFINALLYRET));
     assert(fgPredsComputed);
 
-    for (BasicBlock* const succ : oldBlock->EHFinallyRetSuccs())
-    {
-        assert(succ != nullptr);
-
-        // Remove the old edge [oldBlock => succ]
-        //
-        assert(succ->countOfInEdges() > 0);
-        fgRemoveRefPred(succ, oldBlock);
+    BBehfDesc* ehfDesc = oldBlock->GetEhfTargets();
 
-        // Create the new edge [newBlock => succ]
-        //
-        fgAddRefPred(succ, newBlock);
+    for (unsigned i = 0; i < ehfDesc->bbeCount; i++)
+    {
+        FlowEdge* succEdge = ehfDesc->bbeSuccs[i];
+        fgReplacePred(succEdge, newBlock);
     }
 }
 
@@ -484,22 +465,26 @@ void Compiler::fgReplaceEhfSuccessor(BasicBlock* block, BasicBlock* oldSucc, Bas
     assert(block->KindIs(BBJ_EHFINALLYRET));
     assert(fgPredsComputed);
 
-    BBehfDesc* const   ehfDesc   = block->GetEhfTargets();
-    const unsigned     succCount = ehfDesc->bbeCount;
-    BasicBlock** const succTab   = ehfDesc->bbeSuccs;
+    BBehfDesc* const ehfDesc   = block->GetEhfTargets();
+    const unsigned   succCount = ehfDesc->bbeCount;
+    FlowEdge** const succTab   = ehfDesc->bbeSuccs;
 
-    // Walk the successor table looking for the old successor, which we expect to find.
+    // Walk the successor table looking for the old successor, which we expect to find only once.
     unsigned oldSuccNum = UINT_MAX;
     unsigned newSuccNum = UINT_MAX;
     for (unsigned i = 0; i < succCount; i++)
     {
-        if (succTab[i] == newSucc)
+        assert(succTab[i]->getSourceBlock() == block);
+
+        if (succTab[i]->getDestinationBlock() == newSucc)
         {
+            assert(newSuccNum == UINT_MAX);
             newSuccNum = i;
         }
 
-        if (succTab[i] == oldSucc)
+        if (succTab[i]->getDestinationBlock() == oldSucc)
         {
+            assert(oldSuccNum == UINT_MAX);
             oldSuccNum = i;
         }
     }
@@ -509,7 +494,7 @@ void Compiler::fgReplaceEhfSuccessor(BasicBlock* block, BasicBlock* oldSucc, Bas
     if (newSuccNum != UINT_MAX)
     {
         // The new successor is already in the table; simply remove the old one.
-        fgRemoveEhfSuccessor(block, oldSucc);
+        fgRemoveEhfSuccessor(block, oldSuccNum);
 
         JITDUMP("Remove existing BBJ_EHFINALLYRET " FMT_BB " successor " FMT_BB "; replacement successor " FMT_BB
                 " already exists in list\n",
@@ -517,17 +502,17 @@ void Compiler::fgReplaceEhfSuccessor(BasicBlock* block, BasicBlock* oldSucc, Bas
     }
     else
     {
-        // Replace the old one with the new one.
-
-        succTab[oldSuccNum] = newSucc;
-
         // Remove the old edge [block => oldSucc]
         //
         fgRemoveAllRefPreds(oldSucc, block);
 
         // Create the new edge [block => newSucc]
         //
-        fgAddRefPred(newSucc, block);
+        FlowEdge* const newEdge = fgAddRefPred(newSucc, block);
+
+        // Replace the old one with the new one.
+        //
+        succTab[oldSuccNum] = newEdge;
 
         JITDUMP("Replace BBJ_EHFINALLYRET " FMT_BB " successor " FMT_BB " with " FMT_BB "\n", block->bbNum,
                 oldSucc->bbNum, newSucc->bbNum);
@@ -535,60 +520,96 @@ void Compiler::fgReplaceEhfSuccessor(BasicBlock* block, BasicBlock* oldSucc, Bas
 }
 
 //------------------------------------------------------------------------
-// fgRemoveEhfSuccessor: update BBJ_EHFINALLYRET block to remove `succ` as a successor.
-// Updates the predecessor list of `succ`.
+// fgRemoveEhfSuccessor: update BBJ_EHFINALLYRET block to remove the successor at `succIndex`
+// in the block's jump table.
+// Updates the predecessor list of the successor, if necessary.
 //
 // Arguments:
-//   block   - BBJ_EHFINALLYRET block
-//   succ    - successor
+//   block     - BBJ_EHFINALLYRET block
+//   succIndex - index of the successor in block->GetEhfTargets()->bbeSuccs
 //
-void Compiler::fgRemoveEhfSuccessor(BasicBlock* block, BasicBlock* succ)
+void Compiler::fgRemoveEhfSuccessor(BasicBlock* block, const unsigned succIndex)
 {
     assert(block != nullptr);
-    assert(succ != nullptr);
-    assert(fgPredsComputed);
     assert(block->KindIs(BBJ_EHFINALLYRET));
+    assert(fgPredsComputed);
+
+    BBehfDesc* const ehfDesc   = block->GetEhfTargets();
+    const unsigned   succCount = ehfDesc->bbeCount;
+    FlowEdge**       succTab   = ehfDesc->bbeSuccs;
+    assert(succIndex < succCount);
+    FlowEdge* succEdge = succTab[succIndex];
+
+    fgRemoveRefPred(succEdge);
 
-    // Don't `assert(succ->isBBCallFinallyPairTail())`; we've already unlinked the CALLFINALLY
-    assert(succ->KindIs(BBJ_CALLFINALLYRET));
+    // If succEdge not the last entry, move everything after in the table down one slot.
+    if ((succIndex + 1) < succCount)
+    {
+        memmove_s(&succTab[succIndex], (succCount - succIndex) * sizeof(FlowEdge*), &succTab[succIndex + 1],
+                  (succCount - succIndex - 1) * sizeof(FlowEdge*));
+    }
 
-    fgRemoveRefPred(succ, block);
+#ifdef DEBUG
+    // We only expect to see a successor once in the table.
+    for (unsigned i = succIndex; i < (succCount - 1); i++)
+    {
+        assert(succTab[i]->getDestinationBlock() != succEdge->getDestinationBlock());
+    }
+#endif // DEBUG
+
+    ehfDesc->bbeCount--;
+}
+
+//------------------------------------------------------------------------
+// fgRemoveEhfSuccessor: Removes `succEdge` from its BBJ_EHFINALLYRET source block's jump table.
+// Updates the predecessor list of the successor block, if necessary.
+//
+// Arguments:
+//   block     - BBJ_EHFINALLYRET block
+//   succEdge - FlowEdge* to be removed from predecessor block's jump table
+//
+void Compiler::fgRemoveEhfSuccessor(FlowEdge* succEdge)
+{
+    assert(succEdge != nullptr);
+    assert(fgPredsComputed);
+
+    BasicBlock* block = succEdge->getSourceBlock();
+    assert(block != nullptr);
+    assert(block->KindIs(BBJ_EHFINALLYRET));
+
+    fgRemoveRefPred(succEdge);
 
     BBehfDesc* const ehfDesc   = block->GetEhfTargets();
-    unsigned         succCount = ehfDesc->bbeCount;
-    BasicBlock**     succTab   = ehfDesc->bbeSuccs;
+    const unsigned   succCount = ehfDesc->bbeCount;
+    FlowEdge**       succTab   = ehfDesc->bbeSuccs;
     bool             found     = false;
 
-    // Walk the successor table looking for the specified successor block.
+    // Search succTab for succEdge so we can splice it out of the table.
     for (unsigned i = 0; i < succCount; i++)
     {
-        if (succTab[i] == succ)
+        if (succTab[i] == succEdge)
         {
-            // If it's not the last one, move everything after in the table down one slot.
-            if (i + 1 < succCount)
+            // If succEdge not the last entry, move everything after in the table down one slot.
+            if ((i + 1) < succCount)
             {
-                memmove_s(&succTab[i], (succCount - i) * sizeof(BasicBlock*), &succTab[i + 1],
-                          (succCount - i - 1) * sizeof(BasicBlock*));
+                memmove_s(&succTab[i], (succCount - i) * sizeof(FlowEdge*), &succTab[i + 1],
+                          (succCount - i - 1) * sizeof(FlowEdge*));
             }
 
-            --succCount;
-
             found = true;
 
 #ifdef DEBUG
             // We only expect to see a successor once in the table.
-            for (; i < succCount; i++)
+            for (; i < (succCount - 1); i++)
             {
-                assert(succTab[i] != succ);
+                assert(succTab[i]->getDestinationBlock() != succEdge->getDestinationBlock());
             }
 #endif // DEBUG
-
-            break;
         }
     }
-    assert(found);
 
-    ehfDesc->bbeCount = succCount;
+    assert(found);
+    ehfDesc->bbeCount--;
 }
 
 //------------------------------------------------------------------------
@@ -620,95 +641,105 @@ void Compiler::fgReplaceJumpTarget(BasicBlock* block, BasicBlock* oldTarget, Bas
         case BBJ_EHCATCHRET:
         case BBJ_EHFILTERRET:
         case BBJ_LEAVE: // This function can be called before import, so we still have BBJ_LEAVE
-
-            if (block->TargetIs(oldTarget))
-            {
-                block->SetTarget(newTarget);
-                FlowEdge* const oldEdge = fgRemoveRefPred(oldTarget, block);
-                fgAddRefPred(newTarget, block, oldEdge);
-            }
+            assert(block->TargetIs(oldTarget));
+            fgRedirectTargetEdge(block, newTarget);
             break;
 
         case BBJ_COND:
-
             if (block->TrueTargetIs(oldTarget))
             {
-                if (block->FalseTargetIs(oldTarget))
+                if (block->FalseEdgeIs(block->GetTrueEdge()))
                 {
-                    // fgRemoveRefPred returns nullptr for BBJ_COND blocks with two flow edges to target
+                    // Branch was degenerate, simplify it first
+                    //
                     fgRemoveConditionalJump(block);
                     assert(block->KindIs(BBJ_ALWAYS));
                     assert(block->TargetIs(oldTarget));
-                    block->SetTarget(newTarget);
+                    fgRedirectTargetEdge(block, newTarget);
                 }
                 else
                 {
-                    block->SetTrueTarget(newTarget);
-                }
-
-                // fgRemoveRefPred should have removed the flow edge
-                FlowEdge* oldEdge = fgRemoveRefPred(oldTarget, block);
-                assert(oldEdge != nullptr);
-
-                // TODO-NoFallThrough: Proliferate weight from oldEdge
-                // (as a quirk, we avoid doing so for the true target to reduce diffs for now)
-                FlowEdge* const newEdge = fgAddRefPred(newTarget, block);
-                if (block->KindIs(BBJ_ALWAYS))
-                {
-                    newEdge->setLikelihood(1.0);
-                }
-                else if (oldEdge->hasLikelihood())
-                {
-                    newEdge->setLikelihood(oldEdge->getLikelihood());
+                    fgRedirectTrueEdge(block, newTarget);
                 }
             }
             else
             {
+                // Already degenerate cases should have taken the true path above
+                //
                 assert(block->FalseTargetIs(oldTarget));
+                assert(!block->TrueEdgeIs(block->GetFalseEdge()));
+                fgRedirectFalseEdge(block, newTarget);
+            }
 
-                // fgRemoveRefPred should have removed the flow edge
-                FlowEdge* oldEdge = fgRemoveRefPred(oldTarget, block);
-                assert(oldEdge != nullptr);
-                block->SetFalseTarget(newTarget);
-                fgAddRefPred(newTarget, block, oldEdge);
+            if (block->KindIs(BBJ_COND) && block->TrueEdgeIs(block->GetFalseEdge()))
+            {
+                // Block became degenerate, simplify
+                //
+                fgRemoveConditionalJump(block);
+                assert(block->KindIs(BBJ_ALWAYS));
+                assert(block->TargetIs(newTarget));
             }
+
             break;
 
         case BBJ_SWITCH:
         {
-            unsigned const     jumpCnt = block->GetSwitchTargets()->bbsCount;
-            BasicBlock** const jumpTab = block->GetSwitchTargets()->bbsDstTab;
-            bool               changed = false;
+            unsigned const   jumpCnt      = block->GetSwitchTargets()->bbsCount;
+            FlowEdge** const jumpTab      = block->GetSwitchTargets()->bbsDstTab;
+            bool             existingEdge = false;
+            FlowEdge*        oldEdge      = nullptr;
+            FlowEdge*        newEdge      = nullptr;
+            bool             changed      = false;
 
             for (unsigned i = 0; i < jumpCnt; i++)
             {
-                if (jumpTab[i] == oldTarget)
+                if (jumpTab[i]->getDestinationBlock() == newTarget)
                 {
-                    jumpTab[i]              = newTarget;
-                    changed                 = true;
-                    FlowEdge* const oldEdge = fgRemoveRefPred(oldTarget, block);
-                    FlowEdge* const newEdge = fgAddRefPred(newTarget, block, oldEdge);
+                    // The new target already has an edge from this switch statement.
+                    // We'll need to add the likelihood from the edge we're redirecting
+                    // to the existing edge. Note that if there is no existing edge,
+                    // then we'll copy the likelihood from the existing edge we pass to
+                    // `fgAddRefPred`. Note also that we can visit the same edge multiple
+                    // times if there are multiple switch cases with the same target. The
+                    // edge has a dup count and a single likelihood for all the possible
+                    // paths to the target, so we only want to add the likelihood once
+                    // despite visiting the duplicated edges in the `jumpTab` array
+                    // multiple times.
+                    existingEdge = true;
+                }
 
-                    // Handle the profile update, once we get our hands on the old edge.
-                    // (see notes in fgChangeSwitchBlock for why this extra step is necessary)
-                    //
-                    // We do it slightly differently here so we don't lose the old
-                    // edge weight propagation that would sometimes happen
-                    //
-                    if ((oldEdge != nullptr) && !newEdge->hasLikelihood())
-                    {
-                        newEdge->setLikelihood(oldEdge->getLikelihood());
-                    }
+                if (jumpTab[i]->getDestinationBlock() == oldTarget)
+                {
+                    assert((oldEdge == nullptr) || (oldEdge == jumpTab[i]));
+                    oldEdge = jumpTab[i];
+                    fgRemoveRefPred(oldEdge);
+                    newEdge    = fgAddRefPred(newTarget, block, oldEdge);
+                    jumpTab[i] = newEdge;
+                    changed    = true;
                 }
             }
 
-            if (changed)
+            if (existingEdge)
             {
-                InvalidateUniqueSwitchSuccMap();
+                assert(oldEdge != nullptr);
+                assert(oldEdge->getSourceBlock() == block);
+                assert(oldEdge->getDestinationBlock() == oldTarget);
+                assert(newEdge != nullptr);
+                assert(newEdge->getSourceBlock() == block);
+                assert(newEdge->getDestinationBlock() == newTarget);
+
+                newEdge->addLikelihood(oldEdge->getLikelihood());
             }
+
+            assert(changed);
+            InvalidateUniqueSwitchSuccMap();
             break;
         }
 
+        case BBJ_EHFINALLYRET:
+            fgReplaceEhfSuccessor(block, oldTarget, newTarget);
+            break;
+
         default:
             assert(!"Block doesn't have a jump target!");
             unreached();
@@ -717,51 +748,36 @@ void Compiler::fgReplaceJumpTarget(BasicBlock* block, BasicBlock* oldTarget, Bas
 }
 
 //------------------------------------------------------------------------
-// fgReplacePred: update the predecessor list, swapping one pred for another
+// fgReplacePred: redirects the given edge to a new predecessor block
 //
 // Arguments:
-//   block - block with the pred list we want to update
-//   oldPred - pred currently appearing in block's pred list
-//   newPred - pred that will take oldPred's place.
+//   edge - the edge whose source block we want to update
+//   newPred - the new predecessor block for edge
 //
 // Notes:
 //
-// A block can only appear once in the preds list. If a predecessor has multiple
-// ways to get to this block, then the pred edge DupCount will be >1.
-//
 // This function assumes that all branches from the predecessor (practically, that all
-// switch cases that target this block) are changed to branch from the new predecessor,
+// switch cases that target the successor block) are changed to branch from the new predecessor,
 // with the same dup count.
 //
-// Note that the block bbRefs is not changed, since 'block' has the same number of
+// Note that the successor block's bbRefs is not changed, since it has the same number of
 // references as before, just from a different predecessor block.
 //
 // Also note this may cause sorting of the pred list.
 //
-void Compiler::fgReplacePred(BasicBlock* block, BasicBlock* oldPred, BasicBlock* newPred)
+void Compiler::fgReplacePred(FlowEdge* edge, BasicBlock* const newPred)
 {
-    noway_assert(block != nullptr);
-    noway_assert(oldPred != nullptr);
-    noway_assert(newPred != nullptr);
-
-    bool modified = false;
+    assert(edge != nullptr);
+    assert(newPred != nullptr);
+    assert(edge->getSourceBlock() != newPred);
 
-    for (FlowEdge* const pred : block->PredEdges())
-    {
-        if (oldPred == pred->getSourceBlock())
-        {
-            pred->setSourceBlock(newPred);
-            modified = true;
-            break;
-        }
-    }
+    edge->setSourceBlock(newPred);
 
     // We may now need to reorder the pred list.
     //
-    if (modified)
-    {
-        block->ensurePredListOrder(this);
-    }
+    BasicBlock* succBlock = edge->getDestinationBlock();
+    assert(succBlock != nullptr);
+    succBlock->ensurePredListOrder(this);
 }
 
 /*****************************************************************************
@@ -881,7 +897,10 @@ BasicBlock* Compiler::fgLookupBB(unsigned addr)
 class FgStack
 {
 public:
-    FgStack() : slot0(SLOT_INVALID), slot1(SLOT_INVALID), depth(0)
+    FgStack()
+        : slot0(SLOT_INVALID)
+        , slot1(SLOT_INVALID)
+        , depth(0)
     {
         // Empty
     }
@@ -1282,8 +1301,10 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
                                 break;
                             }
 
+                            case NI_System_SpanHelpers_ClearWithoutReferences:
+                            case NI_System_SpanHelpers_Fill:
                             case NI_System_SpanHelpers_SequenceEqual:
-                            case NI_System_Buffer_Memmove:
+                            case NI_System_SpanHelpers_Memmove:
                             {
                                 if (FgStack::IsConstArgument(pushedStack.Top(), impInlineInfo))
                                 {
@@ -2916,10 +2937,21 @@ void Compiler::fgLinkBasicBlocks()
             {
                 BasicBlock* const trueTarget  = fgLookupBB(curBBdesc->GetTargetOffs());
                 BasicBlock* const falseTarget = curBBdesc->Next();
-                curBBdesc->SetTrueTarget(trueTarget);
-                curBBdesc->SetFalseTarget(falseTarget);
-                fgAddRefPred<initializingPreds>(trueTarget, curBBdesc);
-                fgAddRefPred<initializingPreds>(falseTarget, curBBdesc);
+                FlowEdge* const   trueEdge    = fgAddRefPred<initializingPreds>(trueTarget, curBBdesc);
+                FlowEdge* const   falseEdge   = fgAddRefPred<initializingPreds>(falseTarget, curBBdesc);
+                curBBdesc->SetTrueEdge(trueEdge);
+                curBBdesc->SetFalseEdge(falseEdge);
+
+                if (trueEdge == falseEdge)
+                {
+                    assert(trueEdge->getDupCount() == 2);
+                    trueEdge->setLikelihood(1.0);
+                }
+                else
+                {
+                    trueEdge->setLikelihood(0.5);
+                    falseEdge->setLikelihood(0.5);
+                }
 
                 if (trueTarget->bbNum <= curBBdesc->bbNum)
                 {
@@ -2942,10 +2974,10 @@ void Compiler::fgLinkBasicBlocks()
                 assert(!(curBBdesc->IsLast() && jumpsToNext));
                 BasicBlock* const jumpDest = jumpsToNext ? curBBdesc->Next() : fgLookupBB(curBBdesc->GetTargetOffs());
 
-                // Redundantly use SetKindAndTarget() instead of SetTarget() just this once,
-                // so we don't break the HasInitializedTarget() invariant of SetTarget().
-                curBBdesc->SetKindAndTarget(curBBdesc->GetKind(), jumpDest);
-                fgAddRefPred<initializingPreds>(jumpDest, curBBdesc);
+                // Redundantly use SetKindAndTargetEdge() instead of SetTargetEdge() just this once,
+                // so we don't break the HasInitializedTarget() invariant of SetTargetEdge().
+                FlowEdge* const newEdge = fgAddRefPred<initializingPreds>(jumpDest, curBBdesc);
+                curBBdesc->SetKindAndTargetEdge(curBBdesc->GetKind(), newEdge);
 
                 if (curBBdesc->GetTarget()->bbNum <= curBBdesc->bbNum)
                 {
@@ -2971,23 +3003,26 @@ void Compiler::fgLinkBasicBlocks()
 
             case BBJ_SWITCH:
             {
-                unsigned     jumpCnt = curBBdesc->GetSwitchTargets()->bbsCount;
-                BasicBlock** jumpPtr = curBBdesc->GetSwitchTargets()->bbsDstTab;
+                const unsigned numSucc = curBBdesc->GetSwitchTargets()->bbsCount;
+                unsigned       jumpCnt = numSucc;
+                FlowEdge**     jumpPtr = curBBdesc->GetSwitchTargets()->bbsDstTab;
 
                 do
                 {
-                    BasicBlock* jumpDest = fgLookupBB((unsigned)*(size_t*)jumpPtr);
-                    *jumpPtr             = jumpDest;
-                    fgAddRefPred<initializingPreds>(jumpDest, curBBdesc);
-                    if ((*jumpPtr)->bbNum <= curBBdesc->bbNum)
+                    BasicBlock*     jumpDest = fgLookupBB((unsigned)*(size_t*)jumpPtr);
+                    FlowEdge* const newEdge  = fgAddRefPred<initializingPreds>(jumpDest, curBBdesc);
+
+                    newEdge->setLikelihood((1.0 / numSucc) * newEdge->getDupCount());
+                    *jumpPtr = newEdge;
+                    if (jumpDest->bbNum <= curBBdesc->bbNum)
                     {
-                        fgMarkBackwardJump(*jumpPtr, curBBdesc);
+                        fgMarkBackwardJump(jumpDest, curBBdesc);
                     }
                 } while (++jumpPtr, --jumpCnt);
 
                 /* Default case of CEE_SWITCH (next block), is at end of jumpTab[] */
 
-                noway_assert(curBBdesc->NextIs(*(jumpPtr - 1)));
+                noway_assert(curBBdesc->NextIs((*(jumpPtr - 1))->getDestinationBlock()));
                 break;
             }
 
@@ -3088,7 +3123,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
                 codeAddr += sizeof(__int8);
                 goto DECODE_OPCODE;
 
-            /* Check to see if we have a jump/return opcode */
+                /* Check to see if we have a jump/return opcode */
 
             case CEE_BRFALSE:
             case CEE_BRFALSE_S:
@@ -3150,8 +3185,8 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
                 unsigned jmpBase;
                 unsigned jmpCnt; // # of switch cases (excluding default)
 
-                BasicBlock** jmpTab;
-                BasicBlock** jmpPtr;
+                FlowEdge** jmpTab;
+                FlowEdge** jmpPtr;
 
                 /* Allocate the switch descriptor */
 
@@ -3168,7 +3203,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
 
                 /* Allocate the jump table */
 
-                jmpPtr = jmpTab = new (this, CMK_BasicBlock) BasicBlock*[jmpCnt + 1];
+                jmpPtr = jmpTab = new (this, CMK_FlowEdge) FlowEdge*[jmpCnt + 1];
 
                 /* Fill in the jump table */
 
@@ -3178,12 +3213,12 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
                     codeAddr += 4;
 
                     // store the offset in the pointer.  We change these in fgLinkBasicBlocks().
-                    *jmpPtr++ = (BasicBlock*)(size_t)(jmpBase + jmpDist);
+                    *jmpPtr++ = (FlowEdge*)(size_t)(jmpBase + jmpDist);
                 }
 
                 /* Append the default label to the target table */
 
-                *jmpPtr++ = (BasicBlock*)(size_t)jmpBase;
+                *jmpPtr++ = (FlowEdge*)(size_t)jmpBase;
 
                 /* Make sure we found the right number of labels */
 
@@ -3271,7 +3306,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
                     // statement in the block.
                     // Otherwise, we will assert at the following line in fgMorphCall()
                     //     noway_assert(fgMorphStmt->GetNextStmt() == NULL);
-                    )
+                )
                 {
                     // Neither .tailcall prefix, no tailcall stress. So move on.
                     break;
@@ -3466,7 +3501,6 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
             // Jump to the next block
             jmpKind = BBJ_ALWAYS;
             jmpAddr = nxtBBoffs;
-            bbFlags |= BBF_NONE_QUIRK;
         }
 
         assert(jmpKind != BBJ_COUNT);
@@ -3520,7 +3554,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
 
     noway_assert(codeAddr == codeEndp);
 
-    /* Finally link up the bbTarget of the blocks together */
+    /* Finally link up the targets of the blocks together */
 
     fgLinkBasicBlocks();
 
@@ -3838,10 +3872,9 @@ void Compiler::fgFindBasicBlocks()
                 if (block->KindIs(BBJ_EHFILTERRET))
                 {
                     // Mark catch handler as successor.
-                    block->SetTarget(hndBegBB);
                     FlowEdge* const newEdge = fgAddRefPred(hndBegBB, block);
-                    newEdge->setLikelihood(1.0);
-                    assert(block->GetTarget()->bbCatchTyp == BBCT_FILTER_HANDLER);
+                    block->SetTargetEdge(newEdge);
+                    assert(hndBegBB->bbCatchTyp == BBCT_FILTER_HANDLER);
                     break;
                 }
             }
@@ -3998,15 +4031,15 @@ void Compiler::fgFindBasicBlocks()
 #endif
         }
 
-/*  Init ebdHandlerNestingLevel of current clause, and bump up value for all
- *  enclosed clauses (which have to be before it in the table).
- *  Innermost try-finally blocks must precede outermost
- *  try-finally blocks.
- */
+        /*  Init ebdHandlerNestingLevel of current clause, and bump up value for all
+         *  enclosed clauses (which have to be before it in the table).
+         *  Innermost try-finally blocks must precede outermost
+         *  try-finally blocks.
+         */
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
         HBtab->ebdHandlerNestingLevel = 0;
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
         HBtab->ebdEnclosingTryIndex = EHblkDsc::NO_ENCLOSING_INDEX;
         HBtab->ebdEnclosingHndIndex = EHblkDsc::NO_ENCLOSING_INDEX;
@@ -4016,12 +4049,12 @@ void Compiler::fgFindBasicBlocks()
 
         for (EHblkDsc* xtab = compHndBBtab; xtab < HBtab; xtab++)
         {
-#if !defined(FEATURE_EH_FUNCLETS)
-            if (jitIsBetween(xtab->ebdHndBegOffs(), hndBegOff, hndEndOff))
+#if defined(FEATURE_EH_WINDOWS_X86)
+            if (!UsesFunclets() && jitIsBetween(xtab->ebdHndBegOffs(), hndBegOff, hndEndOff))
             {
                 xtab->ebdHandlerNestingLevel++;
             }
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
             /* If we haven't recorded an enclosing try index for xtab then see
              *  if this EH region should be recorded.  We check if the
@@ -4054,15 +4087,16 @@ void Compiler::fgFindBasicBlocks()
 
     } // end foreach handler table entry
 
-#if !defined(FEATURE_EH_FUNCLETS)
-
-    for (EHblkDsc* const HBtab : EHClauses(this))
+#if defined(FEATURE_EH_WINDOWS_X86)
+    if (!UsesFunclets())
     {
-        if (ehMaxHndNestingCount <= HBtab->ebdHandlerNestingLevel)
-            ehMaxHndNestingCount = HBtab->ebdHandlerNestingLevel + 1;
+        for (EHblkDsc* const HBtab : EHClauses(this))
+        {
+            if (ehMaxHndNestingCount <= HBtab->ebdHandlerNestingLevel)
+                ehMaxHndNestingCount = HBtab->ebdHandlerNestingLevel + 1;
+        }
     }
-
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
     {
         // always run these checks for a debug build
@@ -4173,10 +4207,7 @@ void Compiler::fgFixEntryFlowForOSR()
     //
     fgEnsureFirstBBisScratch();
     assert(fgFirstBB->KindIs(BBJ_ALWAYS) && fgFirstBB->JumpsToNext());
-    fgRemoveRefPred(fgFirstBB->GetTarget(), fgFirstBB);
-    fgFirstBB->SetKindAndTarget(BBJ_ALWAYS, fgOSREntryBB);
-    FlowEdge* const edge = fgAddRefPred(fgOSREntryBB, fgFirstBB);
-    edge->setLikelihood(1.0);
+    fgRedirectTargetEdge(fgFirstBB, fgOSREntryBB);
 
     // We don't know the right weight for this block, since
     // execution of the method was interrupted within the
@@ -4300,7 +4331,7 @@ void Compiler::fgCheckBasicBlockControlFlow()
                 }
                 break;
 
-            case BBJ_EHCATCHRET:  // block ends with a leave out of a catch (only #if defined(FEATURE_EH_FUNCLETS))
+            case BBJ_EHCATCHRET:  // block ends with a leave out of a catch (only if UsesFunclets() == true)
             case BBJ_CALLFINALLY: // block always calls the target finally
             default:
                 noway_assert(!"Unexpected bbKind"); // these blocks don't get created until importing
@@ -4725,14 +4756,15 @@ BasicBlock* Compiler::fgSplitBlockAtEnd(BasicBlock* curr)
     {
         // For each successor of the original block, set the new block as their predecessor.
 
-        for (BasicBlock* const succ : curr->Succs(this))
+        for (FlowEdge* const succEdge : curr->SuccEdges())
         {
-            if (succ != newBlock)
-            {
-                JITDUMP(FMT_BB " previous predecessor was " FMT_BB ", now is " FMT_BB "\n", succ->bbNum, curr->bbNum,
-                        newBlock->bbNum);
-                fgReplacePred(succ, curr, newBlock);
-            }
+            // For non-switch blocks, successor iterator should not iterate duplicates.
+            assert(succEdge->getSourceBlock() != newBlock);
+
+            BasicBlock* const succBlock = succEdge->getDestinationBlock();
+            JITDUMP(FMT_BB " previous predecessor was " FMT_BB ", now is " FMT_BB "\n", succBlock->bbNum, curr->bbNum,
+                    newBlock->bbNum);
+            fgReplacePred(succEdge, newBlock);
         }
     }
 
@@ -4764,19 +4796,16 @@ BasicBlock* Compiler::fgSplitBlockAtEnd(BasicBlock* curr)
     // Remove flags from the old block that are no longer possible.
     curr->RemoveFlags(BBF_HAS_JMP | BBF_RETLESS_CALL);
 
+    // Default to fallthrough, and add the arc for that.
+    FlowEdge* const newEdge = fgAddRefPred(newBlock, curr);
+
     // Transfer the kind and target. Do this after the code above, to avoid null-ing out the old targets used by the
-    // above code (and so newBlock->bbNext is valid, so SetCond() can initialize bbFalseTarget if newBlock is a
-    // BBJ_COND).
+    // above code.
     newBlock->TransferTarget(curr);
 
-    // Default to fallthrough, and add the arc for that.
-    curr->SetKindAndTarget(BBJ_ALWAYS, newBlock);
-    curr->SetFlags(BBF_NONE_QUIRK);
+    curr->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
     assert(curr->JumpsToNext());
 
-    FlowEdge* const newEdge = fgAddRefPred(newBlock, curr);
-    newEdge->setLikelihood(1.0);
-
     return newBlock;
 }
 
@@ -4866,7 +4895,6 @@ BasicBlock* Compiler::fgSplitBlockBeforeTree(
 
     // prevBb should flow into block
     assert(prevBb->KindIs(BBJ_ALWAYS) && prevBb->JumpsToNext() && prevBb->NextIs(block));
-    prevBb->SetFlags(BBF_NONE_QUIRK);
 
     return block;
 }
@@ -4999,15 +5027,12 @@ BasicBlock* Compiler::fgSplitEdge(BasicBlock* curr, BasicBlock* succ)
         // an immediately following block of a BBJ_SWITCH (which has
         // no fall-through path). For this case, simply insert a new
         // fall-through block after 'curr'.
-        // TODO-NoFallThrough: Once bbFalseTarget can diverge from bbNext, this will be unnecessary for BBJ_COND
-        newBlock = fgNewBBafter(BBJ_ALWAYS, curr, true /* extendRegion */, /* jumpDest */ succ);
-        newBlock->SetFlags(BBF_NONE_QUIRK);
-        assert(newBlock->JumpsToNext());
+        newBlock = fgNewBBafter(BBJ_ALWAYS, curr, true /* extendRegion */);
     }
     else
     {
         // The new block always jumps to 'succ'
-        newBlock = fgNewBBinRegion(BBJ_ALWAYS, curr, /* jumpDest */ succ, /* isRunRarely */ curr->isRunRarely());
+        newBlock = fgNewBBinRegion(BBJ_ALWAYS, curr, /* isRunRarely */ curr->isRunRarely());
     }
     newBlock->CopyFlags(curr, succ->GetFlagsRaw() & BBF_BACKWARD_JUMP);
 
@@ -5019,7 +5044,7 @@ BasicBlock* Compiler::fgSplitEdge(BasicBlock* curr, BasicBlock* succ)
 
     // And 'succ' has 'newBlock' as a new predecessor.
     FlowEdge* const newEdge = fgAddRefPred(succ, newBlock);
-    newEdge->setLikelihood(1.0);
+    newBlock->SetTargetEdge(newEdge);
 
     // This isn't accurate, but it is complex to compute a reasonable number so just assume that we take the
     // branch 50% of the time.
@@ -5124,18 +5149,19 @@ void Compiler::fgUnlinkRange(BasicBlock* bBeg, BasicBlock* bEnd)
         fgFirstColdBlock = bPrev->Next();
     }
 
-#if defined(FEATURE_EH_FUNCLETS)
 #ifdef DEBUG
-    // You can't unlink a range that includes the first funclet block. A range certainly
-    // can't cross the non-funclet/funclet region. And you can't unlink the first block
-    // of the first funclet with this, either. (If that's necessary, it could be allowed
-    // by updating fgFirstFuncletBB to bEnd->bbNext.)
-    for (BasicBlock* tempBB = bBeg; tempBB != bEnd->Next(); tempBB = tempBB->Next())
+    if (UsesFunclets())
     {
-        assert(tempBB != fgFirstFuncletBB);
+        // You can't unlink a range that includes the first funclet block. A range certainly
+        // can't cross the non-funclet/funclet region. And you can't unlink the first block
+        // of the first funclet with this, either. (If that's necessary, it could be allowed
+        // by updating fgFirstFuncletBB to bEnd->bbNext.)
+        for (BasicBlock* tempBB = bBeg; tempBB != bEnd->Next(); tempBB = tempBB->Next())
+        {
+            assert(tempBB != fgFirstFuncletBB);
+        }
     }
 #endif // DEBUG
-#endif // FEATURE_EH_FUNCLETS
 }
 
 //------------------------------------------------------------------------
@@ -5170,13 +5196,11 @@ BasicBlock* Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable)
 
         fgUnreachableBlock(block);
 
-#if defined(FEATURE_EH_FUNCLETS)
         // If block was the fgFirstFuncletBB then set fgFirstFuncletBB to block->bbNext
         if (block == fgFirstFuncletBB)
         {
             fgFirstFuncletBB = block->Next();
         }
-#endif // FEATURE_EH_FUNCLETS
 
         // If this is the first Cold basic block update fgFirstColdBlock
         if (block->IsFirstColdBlock(this))
@@ -5246,13 +5270,11 @@ BasicBlock* Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable)
             fgFirstColdBlock = block->Next();
         }
 
-#if defined(FEATURE_EH_FUNCLETS)
         // Update fgFirstFuncletBB if necessary
         if (block == fgFirstFuncletBB)
         {
             fgFirstFuncletBB = block->Next();
         }
-#endif // FEATURE_EH_FUNCLETS
 
         // Update successor block start IL offset, if empty predecessor
         // covers the immediately preceding range.
@@ -5280,9 +5302,9 @@ BasicBlock* Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable)
          * First, remove 'block' from the predecessor list of succBlock.
          */
 
-        fgRemoveRefPred(succBlock, block);
+        fgRemoveRefPred(block->GetTargetEdge());
 
-        for (BasicBlock* const predBlock : block->PredBlocks())
+        for (BasicBlock* const predBlock : block->PredBlocksEditing())
         {
             /* change all jumps/refs to the removed block */
             switch (predBlock->GetKind())
@@ -5297,11 +5319,8 @@ BasicBlock* Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable)
                 case BBJ_ALWAYS:
                 case BBJ_EHCATCHRET:
                 case BBJ_SWITCH:
-                    fgReplaceJumpTarget(predBlock, block, succBlock);
-                    break;
-
                 case BBJ_EHFINALLYRET:
-                    fgReplaceEhfSuccessor(predBlock, block, succBlock);
+                    fgReplaceJumpTarget(predBlock, block, succBlock);
                     break;
             }
         }
@@ -5325,7 +5344,7 @@ BasicBlock* Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable)
                 assert(!bPrev->FalseTargetIs(block));
 
                 /* Check if both sides of the BBJ_COND now jump to the same block */
-                if (bPrev->TrueTargetIs(bPrev->GetFalseTarget()))
+                if (bPrev->TrueEdgeIs(bPrev->GetFalseEdge()))
                 {
                     fgRemoveConditionalJump(bPrev);
                 }
@@ -5366,9 +5385,9 @@ void Compiler::fgPrepareCallFinallyRetForRemoval(BasicBlock* block)
     // However, we might not have marked the BBJ_CALLFINALLY as BBF_RETLESS_CALL even though it is.
     // (Some early flow optimization should probably aggressively mark these as BBF_RETLESS_CALL
     // and not depend on fgRemoveBlock() to do that.)
-    for (BasicBlock* const leavePredBlock : block->PredBlocks())
+    for (FlowEdge* leavePredEdge : block->PredEdges())
     {
-        fgRemoveEhfSuccessor(leavePredBlock, block);
+        fgRemoveEhfSuccessor(leavePredEdge);
     }
     assert(block->bbRefs == 0);
     assert(block->bbPreds == nullptr);
@@ -5401,16 +5420,21 @@ BasicBlock* Compiler::fgConnectFallThrough(BasicBlock* bSrc, BasicBlock* bDst)
     if (bSrc->KindIs(BBJ_COND) && bSrc->FalseTargetIs(bDst) && !bSrc->NextIs(bDst))
     {
         // Add a new block after bSrc which jumps to 'bDst'
-        jmpBlk = fgNewBBafter(BBJ_ALWAYS, bSrc, true, bDst);
-        bSrc->SetFalseTarget(jmpBlk);
-        fgAddRefPred(jmpBlk, bSrc, fgGetPredForBlock(bDst, bSrc));
+        jmpBlk                  = fgNewBBafter(BBJ_ALWAYS, bSrc, true);
+        FlowEdge* const oldEdge = bSrc->GetFalseEdge();
+        // Access the likelihood of oldEdge before
+        // it gets reset by SetTargetEdge below.
+        //
+        FlowEdge* const newEdge = fgAddRefPred(jmpBlk, bSrc, oldEdge);
+        fgReplacePred(oldEdge, jmpBlk);
+        jmpBlk->SetTargetEdge(oldEdge);
+        assert(jmpBlk->TargetIs(bDst));
+        bSrc->SetFalseEdge(newEdge);
 
         // When adding a new jmpBlk we will set the bbWeight and bbFlags
         //
         if (fgHaveValidEdgeWeights && fgHaveProfileWeights())
         {
-            FlowEdge* const newEdge = fgGetPredForBlock(jmpBlk, bSrc);
-
             jmpBlk->bbWeight = (newEdge->edgeWeightMin() + newEdge->edgeWeightMax()) / 2;
             if (bSrc->bbWeight == BB_ZERO_WEIGHT)
             {
@@ -5448,15 +5472,9 @@ BasicBlock* Compiler::fgConnectFallThrough(BasicBlock* bSrc, BasicBlock* bDst)
             }
         }
 
-        fgReplacePred(bDst, bSrc, jmpBlk);
-
         JITDUMP("Added an unconditional jump to " FMT_BB " after block " FMT_BB "\n", jmpBlk->GetTarget()->bbNum,
                 bSrc->bbNum);
     }
-    else if (bSrc->KindIs(BBJ_ALWAYS) && bSrc->HasInitializedTarget() && bSrc->JumpsToNext())
-    {
-        bSrc->SetFlags(BBF_NONE_QUIRK);
-    }
 
     return jmpBlk;
 }
@@ -5613,7 +5631,6 @@ bool Compiler::fgEhAllowsMoveBlock(BasicBlock* bBefore, BasicBlock* bAfter)
 void Compiler::fgMoveBlocksAfter(BasicBlock* bStart, BasicBlock* bEnd, BasicBlock* insertAfterBlk)
 {
     /* We have decided to insert the block(s) after 'insertAfterBlk' */
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (verbose)
@@ -5666,10 +5683,8 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r
     BasicBlock* bLast   = nullptr;
     BasicBlock* bPrev   = nullptr;
 
-#if defined(FEATURE_EH_FUNCLETS)
     // We don't support moving try regions... yet?
-    noway_assert(relocateType == FG_RELOCATE_HANDLER);
-#endif // FEATURE_EH_FUNCLETS
+    noway_assert(!UsesFunclets() || relocateType == FG_RELOCATE_HANDLER);
 
     HBtab = ehGetDsc(regionIndex);
 
@@ -5707,25 +5722,24 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r
         goto FAILURE;
     }
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
     // In the funclets case, we still need to set some information on the handler blocks
-    if (bLast->IsLast())
+    if (!UsesFunclets() && bLast->IsLast())
     {
         INDEBUG(reason = "region is already at the end of the method";)
         goto FAILURE;
     }
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
     // Walk the block list for this purpose:
     // 1. Verify that all the blocks in the range are either all rarely run or not rarely run.
     // When creating funclets, we ignore the run rarely flag, as we need to be able to move any blocks
     // in the range.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
     bool isRare;
     isRare = bStart->isRunRarely();
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
     block = fgFirstBB;
     while (true)
     {
@@ -5743,14 +5757,14 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r
 
         if (inTheRange)
         {
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
             // Unless all blocks are (not) run rarely we must return false.
-            if (isRare != block->isRunRarely())
+            if (!UsesFunclets() && isRare != block->isRunRarely())
             {
                 INDEBUG(reason = "this region contains both rarely run and non-rarely run blocks";)
                 goto FAILURE;
             }
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
             validRange = true;
         }
@@ -5778,11 +5792,10 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r
         fgDispHandlerTab();
     }
 
-#if !defined(FEATURE_EH_FUNCLETS)
-
+#if defined(FEATURE_EH_WINDOWS_X86)
     // This is really expensive, and quickly becomes O(n^n) with funclets
     // so only do it once after we've created them (see fgCreateFunclets)
-    if (expensiveDebugCheckLevel >= 2)
+    if (!UsesFunclets() && expensiveDebugCheckLevel >= 2)
     {
         fgDebugCheckBBlist();
     }
@@ -5790,16 +5803,15 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r
 
 #endif // DEBUG
 
-#if defined(FEATURE_EH_FUNCLETS)
-
-    bStart->SetFlags(BBF_FUNCLET_BEG); // Mark the start block of the funclet
-
-    if (bMiddle != nullptr)
+    if (UsesFunclets())
     {
-        bMiddle->SetFlags(BBF_FUNCLET_BEG); // Also mark the start block of a filter handler as a funclet
-    }
+        bStart->SetFlags(BBF_FUNCLET_BEG); // Mark the start block of the funclet
 
-#endif // FEATURE_EH_FUNCLETS
+        if (bMiddle != nullptr)
+        {
+            bMiddle->SetFlags(BBF_FUNCLET_BEG); // Also mark the start block of a filter handler as a funclet
+        }
+    }
 
     BasicBlock* bNext;
     bNext = bLast->Next();
@@ -5810,60 +5822,134 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r
     BasicBlock* insertAfterBlk;
     insertAfterBlk = fgLastBB;
 
-#if defined(FEATURE_EH_FUNCLETS)
-
-    // There are several cases we need to consider when moving an EH range.
-    // If moving a range X, we must consider its relationship to every other EH
-    // range A in the table. Note that each entry in the table represents both
-    // a protected region and a handler region (possibly including a filter region
-    // that must live before and adjacent to the handler region), so we must
-    // consider try and handler regions independently. These are the cases:
-    // 1. A is completely contained within X (where "completely contained" means
-    //    that the 'begin' and 'last' parts of A are strictly between the 'begin'
-    //    and 'end' parts of X, and aren't equal to either, for example, they don't
-    //    share 'last' blocks). In this case, when we move X, A moves with it, and
-    //    the EH table doesn't need to change.
-    // 2. X is completely contained within A. In this case, X gets extracted from A,
-    //    and the range of A shrinks, but because A is strictly within X, the EH
-    //    table doesn't need to change.
-    // 3. A and X have exactly the same range. In this case, A is moving with X and
-    //    the EH table doesn't need to change.
-    // 4. A and X share the 'last' block. There are two sub-cases:
-    //    (a) A is a larger range than X (such that the beginning of A precedes the
-    //        beginning of X): in this case, we are moving the tail of A. We set the
-    //        'last' block of A to the block preceding the beginning block of X.
-    //    (b) A is a smaller range than X. Thus, we are moving the entirety of A along
-    //        with X. In this case, nothing in the EH record for A needs to change.
-    // 5. A and X share the 'beginning' block (but aren't the same range, as in #3).
-    //    This can never happen here, because we are only moving handler ranges (we don't
-    //    move try ranges), and handler regions cannot start at the beginning of a try
-    //    range or handler range and be a subset.
-    //
-    // Note that A and X must properly nest for the table to be well-formed. For example,
-    // the beginning of A can't be strictly within the range of X (that is, the beginning
-    // of A isn't shared with the beginning of X) and the end of A outside the range.
+    if (UsesFunclets())
+    {
+        // There are several cases we need to consider when moving an EH range.
+        // If moving a range X, we must consider its relationship to every other EH
+        // range A in the table. Note that each entry in the table represents both
+        // a protected region and a handler region (possibly including a filter region
+        // that must live before and adjacent to the handler region), so we must
+        // consider try and handler regions independently. These are the cases:
+        // 1. A is completely contained within X (where "completely contained" means
+        //    that the 'begin' and 'last' parts of A are strictly between the 'begin'
+        //    and 'end' parts of X, and aren't equal to either, for example, they don't
+        //    share 'last' blocks). In this case, when we move X, A moves with it, and
+        //    the EH table doesn't need to change.
+        // 2. X is completely contained within A. In this case, X gets extracted from A,
+        //    and the range of A shrinks, but because A is strictly within X, the EH
+        //    table doesn't need to change.
+        // 3. A and X have exactly the same range. In this case, A is moving with X and
+        //    the EH table doesn't need to change.
+        // 4. A and X share the 'last' block. There are two sub-cases:
+        //    (a) A is a larger range than X (such that the beginning of A precedes the
+        //        beginning of X): in this case, we are moving the tail of A. We set the
+        //        'last' block of A to the block preceding the beginning block of X.
+        //    (b) A is a smaller range than X. Thus, we are moving the entirety of A along
+        //        with X. In this case, nothing in the EH record for A needs to change.
+        // 5. A and X share the 'beginning' block (but aren't the same range, as in #3).
+        //    This can never happen here, because we are only moving handler ranges (we don't
+        //    move try ranges), and handler regions cannot start at the beginning of a try
+        //    range or handler range and be a subset.
+        //
+        // Note that A and X must properly nest for the table to be well-formed. For example,
+        // the beginning of A can't be strictly within the range of X (that is, the beginning
+        // of A isn't shared with the beginning of X) and the end of A outside the range.
 
-    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+        for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+        {
+            if (XTnum != regionIndex) // we don't need to update our 'last' pointer
+            {
+                if (HBtab->ebdTryLast == bLast)
+                {
+                    // If we moved a set of blocks that were at the end of
+                    // a different try region then we may need to update ebdTryLast
+                    for (block = HBtab->ebdTryBeg; block != nullptr; block = block->Next())
+                    {
+                        if (block == bPrev)
+                        {
+                            // We were contained within it, so shrink its region by
+                            // setting its 'last'
+                            fgSetTryEnd(HBtab, bPrev);
+                            break;
+                        }
+                        else if (HBtab->ebdTryLast->NextIs(block))
+                        {
+                            // bPrev does not come after the TryBeg, thus we are larger, and
+                            // it is moving with us.
+                            break;
+                        }
+                    }
+                }
+                if (HBtab->ebdHndLast == bLast)
+                {
+                    // If we moved a set of blocks that were at the end of
+                    // a different handler region then we must update ebdHndLast
+                    for (block = HBtab->ebdHndBeg; block != nullptr; block = block->Next())
+                    {
+                        if (block == bPrev)
+                        {
+                            fgSetHndEnd(HBtab, bPrev);
+                            break;
+                        }
+                        else if (HBtab->ebdHndLast->NextIs(block))
+                        {
+                            // bPrev does not come after the HndBeg
+                            break;
+                        }
+                    }
+                }
+            }
+        } // end exception table iteration
+
+        // Insert the block(s) we are moving after fgLastBlock
+        fgMoveBlocksAfter(bStart, bLast, insertAfterBlk);
+
+        if (fgFirstFuncletBB == nullptr) // The funclet region isn't set yet
+        {
+            fgFirstFuncletBB = bStart;
+        }
+        else
+        {
+            assert(fgFirstFuncletBB != insertAfterBlk->Next()); // We insert at the end, not at the beginning, of the
+                                                                // funclet region.
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Create funclets: moved region\n");
+            fgDispHandlerTab();
+        }
+
+// We have to wait to do this until we've created all the additional regions
+// Because this relies on ebdEnclosingTryIndex and ebdEnclosingHndIndex
+#endif // DEBUG
+    }
+    else
     {
-        if (XTnum != regionIndex) // we don't need to update our 'last' pointer
+#if defined(FEATURE_EH_WINDOWS_X86)
+        for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
         {
+            if (XTnum == regionIndex)
+            {
+                // Don't update our handler's Last info
+                continue;
+            }
+
             if (HBtab->ebdTryLast == bLast)
             {
                 // If we moved a set of blocks that were at the end of
                 // a different try region then we may need to update ebdTryLast
-                for (block = HBtab->ebdTryBeg; block != nullptr; block = block->Next())
+                for (block = HBtab->ebdTryBeg; block != NULL; block = block->Next())
                 {
                     if (block == bPrev)
                     {
-                        // We were contained within it, so shrink its region by
-                        // setting its 'last'
                         fgSetTryEnd(HBtab, bPrev);
                         break;
                     }
                     else if (HBtab->ebdTryLast->NextIs(block))
                     {
-                        // bPrev does not come after the TryBeg, thus we are larger, and
-                        // it is moving with us.
+                        // bPrev does not come after the TryBeg
                         break;
                     }
                 }
@@ -5872,7 +5958,7 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r
             {
                 // If we moved a set of blocks that were at the end of
                 // a different handler region then we must update ebdHndLast
-                for (block = HBtab->ebdHndBeg; block != nullptr; block = block->Next())
+                for (block = HBtab->ebdHndBeg; block != NULL; block = block->Next())
                 {
                     if (block == bPrev)
                     {
@@ -5886,101 +5972,12 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r
                     }
                 }
             }
-        }
-    } // end exception table iteration
+        } // end exception table iteration
 
-    // Insert the block(s) we are moving after fgLastBlock
-    fgMoveBlocksAfter(bStart, bLast, insertAfterBlk);
-
-    if (fgFirstFuncletBB == nullptr) // The funclet region isn't set yet
-    {
-        fgFirstFuncletBB = bStart;
+        // We have decided to insert the block(s) after fgLastBlock
+        fgMoveBlocksAfter(bStart, bLast, insertAfterBlk);
+#endif // FEATURE_EH_WINDOWS_X86
     }
-    else
-    {
-        assert(fgFirstFuncletBB !=
-               insertAfterBlk->Next()); // We insert at the end, not at the beginning, of the funclet region.
-    }
-
-    // These asserts assume we aren't moving try regions (which we might need to do). Only
-    // try regions can have fall through into or out of the region.
-
-    noway_assert(!bPrev->bbFallsThrough()); // There can be no fall through into a filter or handler region
-    noway_assert(!bLast->bbFallsThrough()); // There can be no fall through out of a handler region
-
-#ifdef DEBUG
-    if (verbose)
-    {
-        printf("Create funclets: moved region\n");
-        fgDispHandlerTab();
-    }
-
-// We have to wait to do this until we've created all the additional regions
-// Because this relies on ebdEnclosingTryIndex and ebdEnclosingHndIndex
-#endif // DEBUG
-
-#else // !FEATURE_EH_FUNCLETS
-
-    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
-    {
-        if (XTnum == regionIndex)
-        {
-            // Don't update our handler's Last info
-            continue;
-        }
-
-        if (HBtab->ebdTryLast == bLast)
-        {
-            // If we moved a set of blocks that were at the end of
-            // a different try region then we may need to update ebdTryLast
-            for (block = HBtab->ebdTryBeg; block != NULL; block = block->Next())
-            {
-                if (block == bPrev)
-                {
-                    fgSetTryEnd(HBtab, bPrev);
-                    break;
-                }
-                else if (HBtab->ebdTryLast->NextIs(block))
-                {
-                    // bPrev does not come after the TryBeg
-                    break;
-                }
-            }
-        }
-        if (HBtab->ebdHndLast == bLast)
-        {
-            // If we moved a set of blocks that were at the end of
-            // a different handler region then we must update ebdHndLast
-            for (block = HBtab->ebdHndBeg; block != NULL; block = block->Next())
-            {
-                if (block == bPrev)
-                {
-                    fgSetHndEnd(HBtab, bPrev);
-                    break;
-                }
-                else if (HBtab->ebdHndLast->NextIs(block))
-                {
-                    // bPrev does not come after the HndBeg
-                    break;
-                }
-            }
-        }
-    } // end exception table iteration
-
-    // We have decided to insert the block(s) after fgLastBlock
-    fgMoveBlocksAfter(bStart, bLast, insertAfterBlk);
-
-    if (bPrev->KindIs(BBJ_ALWAYS) && bPrev->JumpsToNext())
-    {
-        bPrev->SetFlags(BBF_NONE_QUIRK);
-    }
-
-    if (bLast->KindIs(BBJ_ALWAYS) && bLast->JumpsToNext())
-    {
-        bLast->SetFlags(BBF_NONE_QUIRK);
-    }
-
-#endif // !FEATURE_EH_FUNCLETS
 
     goto DONE;
 
@@ -6006,25 +6003,17 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r
  * Insert a BasicBlock before the given block.
  */
 
-BasicBlock* Compiler::fgNewBBbefore(BBKinds     jumpKind,
-                                    BasicBlock* block,
-                                    bool        extendRegion,
-                                    BasicBlock* jumpDest /* = nullptr */)
+BasicBlock* Compiler::fgNewBBbefore(BBKinds jumpKind, BasicBlock* block, bool extendRegion)
 {
     // Create a new BasicBlock and chain it in
 
-    BasicBlock* newBlk = BasicBlock::New(this, jumpKind, jumpDest);
+    BasicBlock* newBlk = BasicBlock::New(this, jumpKind);
     newBlk->SetFlags(BBF_INTERNAL);
 
     fgInsertBBbefore(block, newBlk);
 
     newBlk->bbRefs = 0;
 
-    if (newBlk->bbFallsThrough() && block->isRunRarely())
-    {
-        newBlk->bbSetRunRarely();
-    }
-
     if (extendRegion)
     {
         fgExtendEHRegionBefore(block);
@@ -6048,25 +6037,17 @@ BasicBlock* Compiler::fgNewBBbefore(BBKinds     jumpKind,
  * Insert a BasicBlock after the given block.
  */
 
-BasicBlock* Compiler::fgNewBBafter(BBKinds     jumpKind,
-                                   BasicBlock* block,
-                                   bool        extendRegion,
-                                   BasicBlock* jumpDest /* = nullptr */)
+BasicBlock* Compiler::fgNewBBafter(BBKinds jumpKind, BasicBlock* block, bool extendRegion)
 {
     // Create a new BasicBlock and chain it in
 
-    BasicBlock* newBlk = BasicBlock::New(this, jumpKind, jumpDest);
+    BasicBlock* newBlk = BasicBlock::New(this, jumpKind);
     newBlk->SetFlags(BBF_INTERNAL);
 
     fgInsertBBafter(block, newBlk);
 
     newBlk->bbRefs = 0;
 
-    if (block->bbFallsThrough() && block->isRunRarely())
-    {
-        newBlk->bbSetRunRarely();
-    }
-
     if (extendRegion)
     {
         fgExtendEHRegionAfter(block);
@@ -6095,7 +6076,6 @@ BasicBlock* Compiler::fgNewBBafter(BBKinds     jumpKind,
 //    tree              - tree that will be wrapped into a statement and
 //                        inserted in the new block.
 //    debugInfo         - debug info to propagate into the new statement.
-//    jumpDest          - the jump target of the new block. Defaults to nullptr.
 //    updateSideEffects - update side effects for the whole statement.
 //
 // Return Value:
@@ -6104,14 +6084,10 @@ BasicBlock* Compiler::fgNewBBafter(BBKinds     jumpKind,
 // Notes:
 //    The new block will have BBF_INTERNAL flag and EH region will be extended
 //
-BasicBlock* Compiler::fgNewBBFromTreeAfter(BBKinds     jumpKind,
-                                           BasicBlock* block,
-                                           GenTree*    tree,
-                                           DebugInfo&  debugInfo,
-                                           BasicBlock* jumpDest /* = nullptr */,
-                                           bool        updateSideEffects /* = false */)
+BasicBlock* Compiler::fgNewBBFromTreeAfter(
+    BBKinds jumpKind, BasicBlock* block, GenTree* tree, DebugInfo& debugInfo, bool updateSideEffects /* = false */)
 {
-    BasicBlock* newBlock = fgNewBBafter(jumpKind, block, true, jumpDest);
+    BasicBlock* newBlock = fgNewBBafter(jumpKind, block, true);
     newBlock->SetFlags(BBF_INTERNAL);
     Statement* stmt = fgNewStmtFromTree(tree, debugInfo);
     fgInsertStmtAtEnd(newBlock, stmt);
@@ -6145,16 +6121,11 @@ void Compiler::fgInsertBBbefore(BasicBlock* insertBeforeBlk, BasicBlock* newBlk)
         fgInsertBBafter(insertBeforeBlk->Prev(), newBlk);
     }
 
-#if defined(FEATURE_EH_FUNCLETS)
-
     /* Update fgFirstFuncletBB if insertBeforeBlk is the first block of the funclet region. */
-
     if (fgFirstFuncletBB == insertBeforeBlk)
     {
         fgFirstFuncletBB = newBlk;
     }
-
-#endif // FEATURE_EH_FUNCLETS
 }
 
 /*****************************************************************************
@@ -6263,8 +6234,8 @@ BasicBlock* Compiler::fgFindInsertPoint(unsigned    regionIndex,
     noway_assert(startBlk != nullptr);
     noway_assert(startBlk != endBlk);
     noway_assert((regionIndex == 0 && putInTryRegion) || // Search in the main method
-                 (putInTryRegion && regionIndex > 0 &&
-                  startBlk->bbTryIndex == regionIndex) || // Search in the specified try     region
+                 (putInTryRegion && regionIndex > 0 && startBlk->bbTryIndex == regionIndex) || // Search in the
+                                                                                               // specified try region
                  (!putInTryRegion && regionIndex > 0 &&
                   startBlk->bbHndIndex == regionIndex)); // Search in the specified handler region
 
@@ -6386,12 +6357,13 @@ BasicBlock* Compiler::fgFindInsertPoint(unsigned    regionIndex,
             }
         }
 
-        // Look for an insert location. We want blocks that don't end with a fall through.
-        // Quirk: Manually check for BBJ_COND fallthrough behavior
-        const bool blkFallsThrough =
-            blk->bbFallsThrough() && (!blk->KindIs(BBJ_COND) || blk->NextIs(blk->GetFalseTarget()));
-        const bool blkJumpsToNext = blk->KindIs(BBJ_ALWAYS) && blk->HasFlag(BBF_NONE_QUIRK) && blk->JumpsToNext();
-        if (!blkFallsThrough && !blkJumpsToNext)
+        // Look for an insert location.
+        // Avoid splitting up call-finally pairs, or jumps/false branches to the next block.
+        // (We need the HasInitializedTarget() call because fgFindInsertPoint can be called during importation,
+        // before targets are set)
+        const bool jumpsToNext       = blk->KindIs(BBJ_ALWAYS) && blk->HasInitializedTarget() && blk->JumpsToNext();
+        const bool falseBranchToNext = blk->KindIs(BBJ_COND) && blk->NextIs(blk->GetFalseTarget());
+        if (!blk->isBBCallFinallyPair() && !jumpsToNext && !falseBranchToNext)
         {
             bool updateBestBlk = true; // We will probably update the bestBlk
 
@@ -6533,7 +6505,6 @@ BasicBlock* Compiler::fgFindInsertPoint(unsigned    regionIndex,
 //               [0..compHndBBtabCount].
 //    nearBlk  - insert the new block closely after this block, if possible. If nullptr, put the new block anywhere
 //               in the requested region.
-//    jumpDest - the jump target of the new block. Defaults to nullptr.
 //    putInFilter - put the new block in the filter region given by hndIndex, as described above.
 //    runRarely - 'true' if the new block is run rarely.
 //    insertAtEnd - 'true' if the block should be inserted at the end of the region. Note: this is currently only
@@ -6546,7 +6517,6 @@ BasicBlock* Compiler::fgNewBBinRegion(BBKinds     jumpKind,
                                       unsigned    tryIndex,
                                       unsigned    hndIndex,
                                       BasicBlock* nearBlk,
-                                      BasicBlock* jumpDest /* = nullptr */,
                                       bool        putInFilter /* = false */,
                                       bool        runRarely /* = false */,
                                       bool        insertAtEnd /* = false */)
@@ -6626,7 +6596,7 @@ BasicBlock* Compiler::fgNewBBinRegion(BBKinds     jumpKind,
 
         // Figure out the start and end block range to search for an insertion location. Pick the beginning and
         // ending blocks of the target EH region (the 'endBlk' is one past the last block of the EH region, to make
-        // loop iteration easier). Note that, after funclets have been created (for FEATURE_EH_FUNCLETS),
+        // loop iteration easier). Note that, after funclets have been created (for UsesFunclets() == true),
         // this linear block range will not include blocks of handlers for try/handler clauses nested within
         // this EH region, as those blocks have been extracted as funclets. That is ok, though, because we don't
         // want to insert a block in any nested EH region.
@@ -6672,7 +6642,7 @@ _FoundAfterBlk:;
             bbKindNames[jumpKind], tryIndex, hndIndex, dspBool(putInFilter), dspBool(runRarely), dspBool(insertAtEnd),
             afterBlk->bbNum);
 
-    return fgNewBBinRegionWorker(jumpKind, afterBlk, regionIndex, putInTryRegion, jumpDest);
+    return fgNewBBinRegionWorker(jumpKind, afterBlk, regionIndex, putInTryRegion);
 }
 
 //------------------------------------------------------------------------
@@ -6683,7 +6653,6 @@ _FoundAfterBlk:;
 // Arguments:
 //    jumpKind - the jump kind of the new block to create.
 //    srcBlk   - insert the new block in the same EH region as this block, and closely after it if possible.
-//    jumpDest - the jump target of the new block. Defaults to nullptr.
 //    runRarely - 'true' if the new block is run rarely.
 //    insertAtEnd - 'true' if the block should be inserted at the end of the region. Note: this is currently only
 //                  implemented when inserting into the main function (not into any EH region).
@@ -6693,7 +6662,6 @@ _FoundAfterBlk:;
 
 BasicBlock* Compiler::fgNewBBinRegion(BBKinds     jumpKind,
                                       BasicBlock* srcBlk,
-                                      BasicBlock* jumpDest /* = nullptr */,
                                       bool        runRarely /* = false */,
                                       bool        insertAtEnd /* = false */)
 {
@@ -6712,7 +6680,7 @@ BasicBlock* Compiler::fgNewBBinRegion(BBKinds     jumpKind,
         putInFilter = ehGetDsc(hndIndex - 1)->InFilterRegionBBRange(srcBlk);
     }
 
-    return fgNewBBinRegion(jumpKind, tryIndex, hndIndex, srcBlk, jumpDest, putInFilter, runRarely, insertAtEnd);
+    return fgNewBBinRegion(jumpKind, tryIndex, hndIndex, srcBlk, putInFilter, runRarely, insertAtEnd);
 }
 
 //------------------------------------------------------------------------
@@ -6722,14 +6690,13 @@ BasicBlock* Compiler::fgNewBBinRegion(BBKinds     jumpKind,
 //
 // Arguments:
 //    jumpKind - the jump kind of the new block to create.
-//    jumpDest - the jump target of the new block. Defaults to nullptr.
 //
 // Return Value:
 //    The new block.
 
-BasicBlock* Compiler::fgNewBBinRegion(BBKinds jumpKind, BasicBlock* jumpDest /* = nullptr */)
+BasicBlock* Compiler::fgNewBBinRegion(BBKinds jumpKind)
 {
-    return fgNewBBinRegion(jumpKind, 0, 0, nullptr, jumpDest, /* putInFilter */ false, /* runRarely */ false,
+    return fgNewBBinRegion(jumpKind, 0, 0, nullptr, /* putInFilter */ false, /* runRarely */ false,
                            /* insertAtEnd */ true);
 }
 
@@ -6748,7 +6715,6 @@ BasicBlock* Compiler::fgNewBBinRegion(BBKinds jumpKind, BasicBlock* jumpDest /*
 //          set its handler index to the most nested handler region enclosing that 'try' region.
 //          Otherwise, put the block in the handler region specified by 'regionIndex', and set its 'try'
 //          index to the most nested 'try' region enclosing that handler region.
-//    jumpDest - the jump target of the new block. Defaults to nullptr.
 //
 // Return Value:
 //    The new block.
@@ -6756,13 +6722,12 @@ BasicBlock* Compiler::fgNewBBinRegion(BBKinds jumpKind, BasicBlock* jumpDest /*
 BasicBlock* Compiler::fgNewBBinRegionWorker(BBKinds     jumpKind,
                                             BasicBlock* afterBlk,
                                             unsigned    regionIndex,
-                                            bool        putInTryRegion,
-                                            BasicBlock* jumpDest /* = nullptr */)
+                                            bool        putInTryRegion)
 {
     /* Insert the new block */
     BasicBlock* afterBlkNext = afterBlk->Next();
     (void)afterBlkNext; // prevent "unused variable" error from GCC
-    BasicBlock* newBlk = fgNewBBafter(jumpKind, afterBlk, false, jumpDest);
+    BasicBlock* newBlk = fgNewBBafter(jumpKind, afterBlk, false);
 
     if (putInTryRegion)
     {
diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp
index d9461f2597d7..c214a5e5b634 100644
--- a/src/coreclr/jit/fgdiagnostic.cpp
+++ b/src/coreclr/jit/fgdiagnostic.cpp
@@ -134,7 +134,7 @@ void Compiler::fgDebugCheckUpdate()
 
         // Check for an unnecessary jumps to the next block.
         // A conditional branch should never jump to the next block as it can be folded into a BBJ_ALWAYS.
-        if (block->KindIs(BBJ_COND) && block->TrueTargetIs(block->GetFalseTarget()))
+        if (block->KindIs(BBJ_COND) && block->TrueEdgeIs(block->GetFalseEdge()))
         {
             noway_assert(!"Unnecessary jump to the next block!");
         }
@@ -389,7 +389,7 @@ const char* ConvertToUtf8(LPCWSTR wideString, CompAllocator& allocator)
 
     return alloc;
 }
-}
+} // namespace
 #endif
 
 //------------------------------------------------------------------------
@@ -546,7 +546,7 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, PhasePositi
 
     ONE_FILE_PER_METHOD:;
 
-#define FILENAME_PATTERN "%s-%s-%s-%s.%s"
+#define FILENAME_PATTERN             "%s-%s-%s-%s.%s"
 #define FILENAME_PATTERN_WITH_NUMBER "%s-%s-%s-%s~%d.%s"
 
         const size_t MaxFileNameLength = MAX_PATH_FNAME - 20 /* give us some extra buffer */;
@@ -1101,7 +1101,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase, PhasePosition pos)
                         {
                             fprintf(fgxFile, "\n            switchCases=\"%d\"", edge->getDupCount());
                         }
-                        if (bSource->GetSwitchTargets()->getDefault() == bTarget)
+                        if (bSource->GetSwitchTargets()->getDefault()->getDestinationBlock() == bTarget)
                         {
                             fprintf(fgxFile, "\n            switchDefault=\"true\"");
                         }
@@ -1249,7 +1249,10 @@ bool Compiler::fgDumpFlowGraph(Phases phase, PhasePosition pos)
 
             public:
                 RegionGraph(Compiler* comp, unsigned* blkMap, unsigned blkMapSize)
-                    : m_comp(comp), m_rgnRoot(nullptr), m_blkMap(blkMap), m_blkMapSize(blkMapSize)
+                    : m_comp(comp)
+                    , m_rgnRoot(nullptr)
+                    , m_blkMap(blkMap)
+                    , m_blkMapSize(blkMapSize)
                 {
                     // Create a root region that encompasses the whole function.
                     m_rgnRoot =
@@ -1808,6 +1811,7 @@ void Compiler::fgDumpFlowGraphLoops(FILE* file)
 
 void Compiler::fgTableDispBasicBlock(const BasicBlock* block,
                                      const BasicBlock* nextBlock /* = nullptr */,
+                                     bool              printEdgeLikelihoods /* = true */,
                                      int               blockTargetFieldWidth /* = 21 */,
                                      int               ibcColWidth /* = 0 */)
 {
@@ -1933,27 +1937,41 @@ void Compiler::fgTableDispBasicBlock(const BasicBlock* block,
     // Call `dspBlockNum()` to get the block number to print, and update `printedBlockWidth` with the width
     // of the generated string. Note that any computation using `printedBlockWidth` must be done after all
     // calls to this function.
-    auto dspBlockNum = [terseNext, nextBlock, &printedBlockWidth](const BasicBlock* b) -> const char* {
+    auto dspBlockNum = [printEdgeLikelihoods, terseNext, nextBlock,
+                        &printedBlockWidth](const FlowEdge* e) -> const char* {
         static char buffers[3][64]; // static array of 3 to allow 3 concurrent calls in one printf()
         static int  nextBufferIndex = 0;
 
-        auto& buffer    = buffers[nextBufferIndex];
-        nextBufferIndex = (nextBufferIndex + 1) % ArrLen(buffers);
+        auto& buffer              = buffers[nextBufferIndex];
+        nextBufferIndex           = (nextBufferIndex + 1) % ArrLen(buffers);
+        const size_t sizeOfBuffer = ArrLen(buffer);
+        int          written;
 
+        const BasicBlock* b = e->getDestinationBlock();
         if (b == nullptr)
         {
-            _snprintf_s(buffer, ArrLen(buffer), ArrLen(buffer), "NULL");
-            printedBlockWidth += 4;
+            written = _snprintf_s(buffer, sizeOfBuffer, sizeOfBuffer, "NULL");
+            printedBlockWidth += written;
         }
         else if (terseNext && (b == nextBlock))
         {
-            _snprintf_s(buffer, ArrLen(buffer), ArrLen(buffer), "*");
-            printedBlockWidth += 1;
+            written = _snprintf_s(buffer, sizeOfBuffer, sizeOfBuffer, "*");
+            printedBlockWidth += written;
         }
         else
         {
-            _snprintf_s(buffer, ArrLen(buffer), ArrLen(buffer), FMT_BB, b->bbNum);
-            printedBlockWidth += 2 /* BB */ + max(CountDigits(b->bbNum), 2);
+            written = _snprintf_s(buffer, sizeOfBuffer, sizeOfBuffer, FMT_BB, b->bbNum);
+            printedBlockWidth += written;
+        }
+
+        if (printEdgeLikelihoods)
+        {
+            if (e->hasLikelihood())
+            {
+                written = _snprintf_s(buffer + written, sizeOfBuffer - written, sizeOfBuffer - written,
+                                      "(" FMT_WT_NARROW ")", e->getLikelihood());
+                printedBlockWidth += written;
+            }
         }
 
         return buffer;
@@ -1970,19 +1988,19 @@ void Compiler::fgTableDispBasicBlock(const BasicBlock* block,
         {
             case BBJ_COND:
                 printedBlockWidth = 3 /* "-> " */ + 1 /* comma */ + 9 /* kind */;
-                printf("-> %s,%s", dspBlockNum(block->GetTrueTargetRaw()), dspBlockNum(block->GetFalseTargetRaw()));
+                printf("-> %s,%s", dspBlockNum(block->GetTrueEdgeRaw()), dspBlockNum(block->GetFalseEdgeRaw()));
                 printf("%*s ( cond )", blockTargetFieldWidth - printedBlockWidth, "");
                 break;
 
             case BBJ_CALLFINALLY:
                 printedBlockWidth = 3 /* "-> " */ + 9 /* kind */;
-                printf("-> %s", dspBlockNum(block->GetTargetRaw()));
+                printf("-> %s", dspBlockNum(block->GetTargetEdgeRaw()));
                 printf("%*s (callf )", blockTargetFieldWidth - printedBlockWidth, "");
                 break;
 
             case BBJ_CALLFINALLYRET:
                 printedBlockWidth = 3 /* "-> " */ + 9 /* kind */;
-                printf("-> %s", dspBlockNum(block->GetFinallyContinuation()));
+                printf("-> %s", dspBlockNum(block->GetTargetEdgeRaw()));
                 printf("%*s (callfr)", blockTargetFieldWidth - printedBlockWidth, "");
                 break;
 
@@ -1990,13 +2008,13 @@ void Compiler::fgTableDispBasicBlock(const BasicBlock* block,
                 const char* label;
                 label             = (flags & BBF_KEEP_BBJ_ALWAYS) ? "ALWAYS" : "always";
                 printedBlockWidth = 3 /* "-> " */ + 9 /* kind */;
-                printf("-> %s", dspBlockNum(block->GetTargetRaw()));
+                printf("-> %s", dspBlockNum(block->GetTargetEdgeRaw()));
                 printf("%*s (%s)", blockTargetFieldWidth - printedBlockWidth, "", label);
                 break;
 
             case BBJ_LEAVE:
                 printedBlockWidth = 3 /* "-> " */ + 9 /* kind */;
-                printf("-> %s", dspBlockNum(block->GetTargetRaw()));
+                printf("-> %s", dspBlockNum(block->GetTargetEdgeRaw()));
                 printf("%*s (leave )", blockTargetFieldWidth - printedBlockWidth, "");
                 break;
 
@@ -2015,8 +2033,8 @@ void Compiler::fgTableDispBasicBlock(const BasicBlock* block,
                 {
                     // Very early in compilation, we won't have fixed up the BBJ_EHFINALLYRET successors yet.
 
-                    const unsigned     jumpCnt = ehfDesc->bbeCount;
-                    BasicBlock** const jumpTab = ehfDesc->bbeSuccs;
+                    const unsigned   jumpCnt = ehfDesc->bbeCount;
+                    FlowEdge** const jumpTab = ehfDesc->bbeSuccs;
 
                     for (unsigned i = 0; i < jumpCnt; i++)
                     {
@@ -2041,13 +2059,13 @@ void Compiler::fgTableDispBasicBlock(const BasicBlock* block,
 
             case BBJ_EHFILTERRET:
                 printedBlockWidth = 3 /* "-> " */ + 9 /* kind */;
-                printf("-> %s", dspBlockNum(block->GetTargetRaw()));
+                printf("-> %s", dspBlockNum(block->GetTargetEdgeRaw()));
                 printf("%*s (fltret)", blockTargetFieldWidth - printedBlockWidth, "");
                 break;
 
             case BBJ_EHCATCHRET:
                 printedBlockWidth = 3 /* "-> " */ + 9 /* kind */;
-                printf("-> %s", dspBlockNum(block->GetTargetRaw()));
+                printf("-> %s", dspBlockNum(block->GetTargetEdgeRaw()));
                 printf("%*s ( cret )", blockTargetFieldWidth - printedBlockWidth, "");
                 break;
 
@@ -2068,7 +2086,7 @@ void Compiler::fgTableDispBasicBlock(const BasicBlock* block,
 
                 const BBswtDesc* const jumpSwt = block->GetSwitchTargets();
                 const unsigned         jumpCnt = jumpSwt->bbsCount;
-                BasicBlock** const     jumpTab = jumpSwt->bbsDstTab;
+                FlowEdge** const       jumpTab = jumpSwt->bbsDstTab;
 
                 for (unsigned i = 0; i < jumpCnt; i++)
                 {
@@ -2323,10 +2341,16 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock,
     maxBlockNumWidth          = max(maxBlockNumWidth, 2);
     int padWidth              = maxBlockNumWidth - 2; // Account for functions with a large number of blocks.
 
+    const bool printEdgeLikelihoods = true; // TODO: parameterize?
+
+    // Edge likelihoods are printed as "(0.123)", so take 7 characters maxmimum.
+    int edgeLikelihoodsWidth = printEdgeLikelihoods ? 7 : 0;
+
     // Calculate the field width allocated for the block target. The field width is allocated to allow for two blocks
     // for BBJ_COND. It does not include any extra space for variable-sized BBJ_EHFINALLYRET and BBJ_SWITCH.
-    int blockTargetFieldWidth = 3 /* "-> " */ + 2 /* BB */ + maxBlockNumWidth + 1 /* comma */ + 2 /* BB */ +
-                                maxBlockNumWidth + 1 /* space */ + 8 /* kind: "(xxxxxx)" */;
+    int blockTargetFieldWidth = 3 /* "-> " */ + 2 /* BB */ + maxBlockNumWidth + edgeLikelihoodsWidth + 1 /* comma */ +
+                                2 /* BB */ + maxBlockNumWidth + edgeLikelihoodsWidth + 1 /* space */ +
+                                8 /* kind: "(xxxxxx)" */;
 
     // clang-format off
 
@@ -2334,7 +2358,7 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock,
     printf("------%*s-------------------------------------%*s--------------------------%*s--------------------------\n",
         padWidth, "------------", //
         ibcColWidth, "------------", //
-        blockTargetFieldWidth, "-----------------------"); //
+        blockTargetFieldWidth, "----------------------------------------------"); //
     printf("BBnum %*sBBid ref try hnd %s     weight  %*s%s [IL range]   [jump]%*s [EH region]        [flags]\n",
         padWidth, "",
         (fgPredsComputed        ? "preds      "
@@ -2347,7 +2371,7 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock,
     printf("------%*s-------------------------------------%*s--------------------------%*s--------------------------\n",
         padWidth, "------------", //
         ibcColWidth, "------------", //
-        blockTargetFieldWidth, "-----------------------"); //
+        blockTargetFieldWidth, "----------------------------------------------"); //
 
     // clang-format on
 
@@ -2379,23 +2403,21 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock,
         {
             printf("~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~"
                    "~~~~~~~~~~~~~~~~\n",
-                   padWidth, "~~~~~~~~~~~~",                          //
-                   ibcColWidth, "~~~~~~~~~~~~",                       //
-                   blockTargetFieldWidth, "~~~~~~~~~~~~~~~~~~~~~~~"); //
+                   padWidth, "~~~~~~~~~~~~",                                                 //
+                   ibcColWidth, "~~~~~~~~~~~~",                                              //
+                   blockTargetFieldWidth, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); //
         }
 
-#if defined(FEATURE_EH_FUNCLETS)
         if (inDefaultOrder && (block == fgFirstFuncletBB))
         {
             printf("++++++%*s+++++++++++++++++++++++++++++++++++++%*s++++++++++++++++++++++++++%*s++++++++++"
                    "++++++++++++++++ funclets follow\n",
-                   padWidth, "++++++++++++",                          //
-                   ibcColWidth, "++++++++++++",                       //
-                   blockTargetFieldWidth, "+++++++++++++++++++++++"); //
+                   padWidth, "++++++++++++",                                                 //
+                   ibcColWidth, "++++++++++++",                                              //
+                   blockTargetFieldWidth, "++++++++++++++++++++++++++++++++++++++++++++++"); //
         }
-#endif // FEATURE_EH_FUNCLETS
 
-        fgTableDispBasicBlock(block, nextBlock, blockTargetFieldWidth, ibcColWidth);
+        fgTableDispBasicBlock(block, nextBlock, printEdgeLikelihoods, blockTargetFieldWidth, ibcColWidth);
 
         if (block == lastBlock)
         {
@@ -2405,9 +2427,9 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock,
 
     printf("------%*s-------------------------------------%*s--------------------------%*s------------------"
            "--------\n",
-           padWidth, "------------",                          //
-           ibcColWidth, "------------",                       //
-           blockTargetFieldWidth, "-----------------------"); //
+           padWidth, "------------",                                                 //
+           ibcColWidth, "------------",                                              //
+           blockTargetFieldWidth, "----------------------------------------------"); //
 
     if (dumpTrees)
     {
@@ -2631,7 +2653,8 @@ void Compiler::fgStress64RsltMul()
 class BBPredsChecker
 {
 public:
-    BBPredsChecker(Compiler* compiler) : comp(compiler)
+    BBPredsChecker(Compiler* compiler)
+        : comp(compiler)
     {
     }
 
@@ -2796,6 +2819,7 @@ bool BBPredsChecker::CheckJump(BasicBlock* blockPred, BasicBlock* block)
         case BBJ_EHCATCHRET:
         case BBJ_EHFILTERRET:
             assert(blockPred->TargetIs(block));
+            assert(blockPred->GetTargetEdge()->getLikelihood() == 1.0);
             return true;
 
         case BBJ_EHFINALLYRET:
@@ -2871,8 +2895,6 @@ bool BBPredsChecker::CheckEHFinallyRet(BasicBlock* blockPred, BasicBlock* block)
         }
     }
 
-#if defined(FEATURE_EH_FUNCLETS)
-
     if (!found && comp->fgFuncletsCreated)
     {
         // There is no easy way to search just the funclets that were pulled out of
@@ -2891,8 +2913,6 @@ bool BBPredsChecker::CheckEHFinallyRet(BasicBlock* blockPred, BasicBlock* block)
         }
     }
 
-#endif // FEATURE_EH_FUNCLETS
-
     assert(found && "BBJ_EHFINALLYRET predecessor of block that doesn't follow a BBJ_CALLFINALLY!");
     return found;
 }
@@ -2953,7 +2973,6 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef
         return;
     }
 
-#if defined(FEATURE_EH_FUNCLETS)
     bool reachedFirstFunclet = false;
     if (fgFuncletsCreated)
     {
@@ -2967,7 +2986,6 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef
             assert(fgFirstFuncletBB->HasFlag(BBF_FUNCLET_BEG));
         }
     }
-#endif // FEATURE_EH_FUNCLETS
 
     /* Check bbNum, bbRefs and bbPreds */
     // First, pick a traversal stamp, and label all the blocks with it.
@@ -3013,7 +3031,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef
                 {
                     for (unsigned i = 0; i < sd.numDistinctSuccs; i++)
                     {
-                        const BasicBlock* const nonDuplicateSucc = sd.nonDuplicates[i];
+                        const BasicBlock* const nonDuplicateSucc = sd.nonDuplicates[i]->getDestinationBlock();
                         assert(nonDuplicateSucc != nullptr);
                         assert(nonDuplicateSucc->bbTraversalStamp == curTraversalStamp);
                     }
@@ -3055,7 +3073,6 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef
             assert(block->bbPreds == nullptr);
         }
 
-#if defined(FEATURE_EH_FUNCLETS)
         if (fgFuncletsCreated)
         {
             //
@@ -3080,7 +3097,6 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef
                 assert(block->hasHndIndex() == true);
             }
         }
-#endif // FEATURE_EH_FUNCLETS
 
         if (checkBBRefs)
         {
@@ -3164,7 +3180,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef
                 //    try {
                 //        try {
                 //            LEAVE L_OUTER; // this becomes a branch to a BBJ_CALLFINALLY in an outer try region
-                //                           // (in the FEATURE_EH_CALLFINALLY_THUNKS case)
+                //                           // (in the UsesCallFinallyThunks case)
                 //        } catch {
                 //        }
                 //    } finally {
@@ -3175,7 +3191,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef
                 if (ehDsc->ebdTryBeg == succBlock)
                 {
                     // The BBJ_CALLFINALLY is the first block of it's `try` region. Don't check the predecessor.
-                    // Note that this case won't occur in the FEATURE_EH_CALLFINALLY_THUNKS case, since the
+                    // Note that this case won't occur in the UsesCallFinallyThunks case, since the
                     // BBJ_CALLFINALLY in that case won't exist in the `try` region of the `finallyIndex`.
                 }
                 else
@@ -3228,7 +3244,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef
 #ifndef JIT32_GCENCODER
     copiedForGenericsCtxt = ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0);
 #else  // JIT32_GCENCODER
-    copiedForGenericsCtxt        = false;
+    copiedForGenericsCtxt = false;
 #endif // JIT32_GCENCODER
 
     // This if only in support of the noway_asserts it contains.
@@ -3272,7 +3288,8 @@ void Compiler::fgDebugCheckTypes(GenTree* tree)
             DoPostOrder = true,
         };
 
-        NodeTypeValidator(Compiler* comp) : GenTreeVisitor(comp)
+        NodeTypeValidator(Compiler* comp)
+            : GenTreeVisitor(comp)
         {
         }
 
@@ -3399,20 +3416,17 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block)
                 GenTreeFlags handleKind = op1->GetIconHandleFlag();
 
                 // Some of these aren't handles to invariant data...
-                if ((handleKind == GTF_ICON_STATIC_HDL) || // Pointer to a mutable class Static variable
-                    (handleKind == GTF_ICON_BBC_PTR) ||    // Pointer to a mutable basic block count value
-                    (handleKind == GTF_ICON_FTN_ADDR) ||   // Pointer to a potentially mutable VM slot
-                    (handleKind == GTF_ICON_GLOBAL_PTR))   // Pointer to mutable data from the VM state
+                if (GenTree::HandleKindDataIsInvariant(handleKind) && (handleKind != GTF_ICON_FTN_ADDR))
+                {
+                    expectedFlags |= GTF_IND_INVARIANT;
+                }
+                else
                 {
                     // For statics, we expect the GTF_GLOB_REF to be set. However, we currently
                     // fail to set it in a number of situations, and so this check is disabled.
                     // TODO: enable checking of GTF_GLOB_REF.
                     // expectedFlags |= GTF_GLOB_REF;
                 }
-                else // All the other handle indirections are considered invariant
-                {
-                    expectedFlags |= GTF_IND_INVARIANT;
-                }
 
                 // Currently we expect all indirections with constant addresses to be nonfaulting.
                 expectedFlags |= GTF_IND_NONFAULTING;
@@ -3724,7 +3738,9 @@ void Compiler::fgDebugCheckLinkedLocals()
             UseExecutionOrder = true,
         };
 
-        DebugLocalSequencer(Compiler* comp) : GenTreeVisitor(comp), m_locals(comp->getAllocator(CMK_DebugOnly))
+        DebugLocalSequencer(Compiler* comp)
+            : GenTreeVisitor(comp)
+            , m_locals(comp->getAllocator(CMK_DebugOnly))
         {
         }
 
@@ -3993,7 +4009,8 @@ void Compiler::fgDebugCheckBlockLinks()
                 assert(uniqueSuccSet.numDistinctSuccs == count);
                 for (unsigned i = 0; i < uniqueSuccSet.numDistinctSuccs; i++)
                 {
-                    assert(BitVecOps::IsMember(&bitVecTraits, succBlocks, uniqueSuccSet.nonDuplicates[i]->bbNum));
+                    assert(BitVecOps::IsMember(&bitVecTraits, succBlocks,
+                                               uniqueSuccSet.nonDuplicates[i]->getDestinationBlock()->bbNum));
                 }
             }
         }
@@ -4006,7 +4023,9 @@ class UniquenessCheckWalker
 {
 public:
     UniquenessCheckWalker(Compiler* comp)
-        : comp(comp), nodesVecTraits(comp->compGenTreeID, comp), uniqueNodes(BitVecOps::MakeEmpty(&nodesVecTraits))
+        : comp(comp)
+        , nodesVecTraits(comp->compGenTreeID, comp)
+        , uniqueNodes(BitVecOps::MakeEmpty(&nodesVecTraits))
     {
     }
 
@@ -4124,11 +4143,15 @@ class SsaCheckVisitor : public GenTreeVisitor<SsaCheckVisitor>
         unsigned m_ssaNum;
 
     public:
-        SsaKey() : m_lclNum(BAD_VAR_NUM), m_ssaNum(SsaConfig::RESERVED_SSA_NUM)
+        SsaKey()
+            : m_lclNum(BAD_VAR_NUM)
+            , m_ssaNum(SsaConfig::RESERVED_SSA_NUM)
         {
         }
 
-        SsaKey(unsigned lclNum, unsigned ssaNum) : m_lclNum(lclNum), m_ssaNum(ssaNum)
+        SsaKey(unsigned lclNum, unsigned ssaNum)
+            : m_lclNum(lclNum)
+            , m_ssaNum(ssaNum)
         {
         }
 
@@ -4735,30 +4758,51 @@ void Compiler::fgDebugCheckLoops()
     {
         return;
     }
-    if (optLoopsRequirePreHeaders)
+    if (optLoopsCanonical)
     {
         for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder())
         {
             assert(loop->EntryEdges().size() == 1);
             assert(loop->EntryEdge(0)->getSourceBlock()->KindIs(BBJ_ALWAYS));
+
+            loop->VisitRegularExitBlocks([=](BasicBlock* exit) {
+                for (BasicBlock* pred : exit->PredBlocks())
+                {
+                    assert(loop->ContainsBlock(pred));
+                }
+                return BasicBlockVisit::Continue;
+            });
         }
     }
 }
 
 //------------------------------------------------------------------------------
-// fgDebugCheckDfsTree: Checks that the DFS tree matches the current flow graph.
+// fgDebugCheckFlowGraphAnnotations: Checks that all flow graph annotations
+// that are currently non-null are valid.
 //
-void Compiler::fgDebugCheckDfsTree()
+void Compiler::fgDebugCheckFlowGraphAnnotations()
 {
-    unsigned count =
-        fgRunDfs([](BasicBlock* block, unsigned preorderNum) { assert(block->bbPreorderNum == preorderNum); },
-                 [=](BasicBlock* block, unsigned postorderNum) {
-                     assert(block->bbPostorderNum == postorderNum);
-                     assert(m_dfsTree->GetPostOrder(postorderNum) == block);
-                 },
-                 [](BasicBlock* block, BasicBlock* succ) {});
+    if (m_dfsTree == nullptr)
+    {
+        assert((m_loops == nullptr) && (m_domTree == nullptr) && (m_reachabilitySets == nullptr));
+        return;
+    }
+
+    unsigned count = fgRunDfs(
+        [](BasicBlock* block, unsigned preorderNum) {
+        assert(block->bbPreorderNum == preorderNum);
+    },
+        [=](BasicBlock* block, unsigned postorderNum) {
+        assert(block->bbPostorderNum == postorderNum);
+        assert(m_dfsTree->GetPostOrder(postorderNum) == block);
+    },
+        [](BasicBlock* block, BasicBlock* succ) {});
 
     assert(m_dfsTree->GetPostOrderCount() == count);
+
+    assert((m_loops == nullptr) || (m_loops->GetDfsTree() == m_dfsTree));
+    assert((m_domTree == nullptr) || (m_domTree->GetDfsTree() == m_dfsTree));
+    assert((m_reachabilitySets == nullptr) || (m_reachabilitySets->GetDfsTree() == m_dfsTree));
 }
 
 /*****************************************************************************/
diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp
index c72927f589fb..47127fc0ad20 100644
--- a/src/coreclr/jit/fgehopt.cpp
+++ b/src/coreclr/jit/fgehopt.cpp
@@ -32,10 +32,8 @@
 //
 PhaseStatus Compiler::fgRemoveEmptyFinally()
 {
-#if defined(FEATURE_EH_FUNCLETS)
     // We need to do this transformation before funclets are created.
     assert(!fgFuncletsCreated);
-#endif // FEATURE_EH_FUNCLETS
 
     // We need to update the bbPreds lists.
     assert(fgPredsComputed);
@@ -167,12 +165,10 @@ PhaseStatus Compiler::fgRemoveEmptyFinally()
                 fgPrepareCallFinallyRetForRemoval(leaveBlock);
                 fgRemoveBlock(leaveBlock, /* unreachable */ true);
 
-                currentBlock->SetKindAndTarget(BBJ_ALWAYS, postTryFinallyBlock);
-                currentBlock->RemoveFlags(BBF_RETLESS_CALL); // no longer a BBJ_CALLFINALLY
-
                 // Ref count updates.
-                fgAddRefPred(postTryFinallyBlock, currentBlock);
-                fgRemoveRefPred(firstBlock, currentBlock);
+                fgRedirectTargetEdge(currentBlock, postTryFinallyBlock);
+                currentBlock->SetKind(BBJ_ALWAYS);
+                currentBlock->RemoveFlags(BBF_RETLESS_CALL); // no longer a BBJ_CALLFINALLY
 
                 // Cleanup the postTryFinallyBlock
                 fgCleanupContinuation(postTryFinallyBlock);
@@ -273,10 +269,8 @@ PhaseStatus Compiler::fgRemoveEmptyTry()
 {
     JITDUMP("\n*************** In fgRemoveEmptyTry()\n");
 
-#if defined(FEATURE_EH_FUNCLETS)
     // We need to do this transformation before funclets are created.
     assert(!fgFuncletsCreated);
-#endif // FEATURE_EH_FUNCLETS
 
     // We need to update the bbPreds lists.
     assert(fgPredsComputed);
@@ -343,6 +337,7 @@ PhaseStatus Compiler::fgRemoveEmptyTry()
         BasicBlock* const lastTryBlock      = HBtab->ebdTryLast;
         BasicBlock* const firstHandlerBlock = HBtab->ebdHndBeg;
         BasicBlock* const lastHandlerBlock  = HBtab->ebdHndLast;
+        BasicBlock*       callFinally;
 
         assert(firstTryBlock->getTryIndex() == XTnum);
 
@@ -355,63 +350,64 @@ PhaseStatus Compiler::fgRemoveEmptyTry()
             continue;
         }
 
-#if FEATURE_EH_CALLFINALLY_THUNKS
-
-        // Look for blocks that are always jumps to a call finally
-        // pair that targets the finally
-        if (!firstTryBlock->KindIs(BBJ_ALWAYS))
+        if (UsesCallFinallyThunks())
         {
-            JITDUMP("EH#%u first try block " FMT_BB " not jump to a callfinally; skipping.\n", XTnum,
-                    firstTryBlock->bbNum);
-            XTnum++;
-            continue;
-        }
+            // Look for blocks that are always jumps to a call finally
+            // pair that targets the finally
+            if (!firstTryBlock->KindIs(BBJ_ALWAYS))
+            {
+                JITDUMP("EH#%u first try block " FMT_BB " not jump to a callfinally; skipping.\n", XTnum,
+                        firstTryBlock->bbNum);
+                XTnum++;
+                continue;
+            }
 
-        BasicBlock* const callFinally = firstTryBlock->GetTarget();
+            callFinally = firstTryBlock->GetTarget();
 
-        // Look for call finally pair. Note this will also disqualify
-        // empty try removal in cases where the finally doesn't
-        // return.
-        if (!callFinally->isBBCallFinallyPair() || !callFinally->TargetIs(firstHandlerBlock))
-        {
-            JITDUMP("EH#%u first try block " FMT_BB " always jumps but not to a callfinally; skipping.\n", XTnum,
-                    firstTryBlock->bbNum);
-            XTnum++;
-            continue;
-        }
+            // Look for call finally pair. Note this will also disqualify
+            // empty try removal in cases where the finally doesn't
+            // return.
+            if (!callFinally->isBBCallFinallyPair() || !callFinally->TargetIs(firstHandlerBlock))
+            {
+                JITDUMP("EH#%u first try block " FMT_BB " always jumps but not to a callfinally; skipping.\n", XTnum,
+                        firstTryBlock->bbNum);
+                XTnum++;
+                continue;
+            }
 
-        // Try itself must be a single block.
-        if (firstTryBlock != lastTryBlock)
-        {
-            JITDUMP("EH#%u first try block " FMT_BB " not only block in try; skipping.\n", XTnum,
-                    firstTryBlock->Next()->bbNum);
-            XTnum++;
-            continue;
+            // Try itself must be a single block.
+            if (firstTryBlock != lastTryBlock)
+            {
+                JITDUMP("EH#%u first try block " FMT_BB " not only block in try; skipping.\n", XTnum,
+                        firstTryBlock->Next()->bbNum);
+                XTnum++;
+                continue;
+            }
         }
-
-#else
-        // Look for call finally pair within the try itself. Note this
-        // will also disqualify empty try removal in cases where the
-        // finally doesn't return.
-        if (!firstTryBlock->isBBCallFinallyPair() || !firstTryBlock->TargetIs(firstHandlerBlock))
+        else
         {
-            JITDUMP("EH#%u first try block " FMT_BB " not a callfinally; skipping.\n", XTnum, firstTryBlock->bbNum);
-            XTnum++;
-            continue;
-        }
+            // Look for call finally pair within the try itself. Note this
+            // will also disqualify empty try removal in cases where the
+            // finally doesn't return.
+            if (!firstTryBlock->isBBCallFinallyPair() || !firstTryBlock->TargetIs(firstHandlerBlock))
+            {
+                JITDUMP("EH#%u first try block " FMT_BB " not a callfinally; skipping.\n", XTnum, firstTryBlock->bbNum);
+                XTnum++;
+                continue;
+            }
 
-        BasicBlock* const callFinally = firstTryBlock;
+            callFinally = firstTryBlock;
 
-        // Try must be a callalways pair of blocks.
-        if (!firstTryBlock->NextIs(lastTryBlock))
-        {
-            JITDUMP("EH#%u block " FMT_BB " not last block in try; skipping.\n", XTnum, firstTryBlock->Next()->bbNum);
-            XTnum++;
-            continue;
+            // Try must be a callalways pair of blocks.
+            if (!firstTryBlock->NextIs(lastTryBlock))
+            {
+                JITDUMP("EH#%u block " FMT_BB " not last block in try; skipping.\n", XTnum,
+                        firstTryBlock->Next()->bbNum);
+                XTnum++;
+                continue;
+            }
         }
 
-#endif // FEATURE_EH_CALLFINALLY_THUNKS
-
         JITDUMP("EH#%u has empty try, removing the try region and promoting the finally.\n", XTnum);
 
         // There should be just one callfinally that invokes this
@@ -524,26 +520,29 @@ PhaseStatus Compiler::fgRemoveEmptyTry()
                     GenTree*   finallyRetExpr = finallyRet->GetRootNode();
                     assert(finallyRetExpr->gtOper == GT_RETFILT);
                     fgRemoveStmt(block, finallyRet);
-                    block->SetKindAndTarget(BBJ_ALWAYS, continuation);
-                    fgAddRefPred(continuation, block);
+                    FlowEdge* const newEdge = fgAddRefPred(continuation, block);
+                    block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
                 }
             }
 
-#if !defined(FEATURE_EH_FUNCLETS)
-            // If we're in a non-funclet model, decrement the nesting
-            // level of any GT_END_LFIN we find in the handler region,
-            // since we're removing the enclosing handler.
-            for (Statement* const stmt : block->Statements())
+#if defined(FEATURE_EH_WINDOWS_X86)
+            if (!UsesFunclets())
             {
-                GenTree* expr = stmt->GetRootNode();
-                if (expr->gtOper == GT_END_LFIN)
+                // If we're in a non-funclet model, decrement the nesting
+                // level of any GT_END_LFIN we find in the handler region,
+                // since we're removing the enclosing handler.
+                for (Statement* const stmt : block->Statements())
                 {
-                    const size_t nestLevel = expr->AsVal()->gtVal1;
-                    assert(nestLevel > 0);
-                    expr->AsVal()->gtVal1 = nestLevel - 1;
+                    GenTree* expr = stmt->GetRootNode();
+                    if (expr->gtOper == GT_END_LFIN)
+                    {
+                        const size_t nestLevel = expr->AsVal()->gtVal1;
+                        assert(nestLevel > 0);
+                        expr->AsVal()->gtVal1 = nestLevel - 1;
+                    }
                 }
             }
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
         }
 
         // (6) Remove the try-finally EH region. This will compact the
@@ -607,10 +606,8 @@ PhaseStatus Compiler::fgRemoveEmptyTry()
 //
 PhaseStatus Compiler::fgCloneFinally()
 {
-#if defined(FEATURE_EH_FUNCLETS)
     // We need to do this transformation before funclets are created.
     assert(!fgFuncletsCreated);
-#endif // FEATURE_EH_FUNCLETS
 
     // We need to update the bbPreds lists.
     assert(fgPredsComputed);
@@ -797,25 +794,29 @@ PhaseStatus Compiler::fgCloneFinally()
 
         for (BasicBlock* block = lastTryBlock; block != beforeTryBlock; block = block->Prev())
         {
-#if FEATURE_EH_CALLFINALLY_THUNKS
-            // Blocks that transfer control to callfinallies are usually
-            // BBJ_ALWAYS blocks, but the last block of a try may fall
-            // through to a callfinally, or could be the target of a BBJ_CALLFINALLYRET,
-            // indicating a chained callfinally.
             BasicBlock* jumpDest = nullptr;
 
-            if (block->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET))
+            if (UsesCallFinallyThunks())
             {
-                jumpDest = block->GetTarget();
-            }
+                // Blocks that transfer control to callfinallies are usually
+                // BBJ_ALWAYS blocks, but the last block of a try may fall
+                // through to a callfinally, or could be the target of a BBJ_CALLFINALLYRET,
+                // indicating a chained callfinally.
+
+                if (block->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET))
+                {
+                    jumpDest = block->GetTarget();
+                }
 
-            if (jumpDest == nullptr)
+                if (jumpDest == nullptr)
+                {
+                    continue;
+                }
+            }
+            else
             {
-                continue;
+                jumpDest = block;
             }
-#else
-            BasicBlock* const jumpDest = block;
-#endif // FEATURE_EH_CALLFINALLY_THUNKS
 
             // The jumpDest must be a callfinally that in turn invokes the
             // finally of interest.
@@ -882,29 +883,32 @@ PhaseStatus Compiler::fgCloneFinally()
                 isUpdate                = true;
             }
 
-#if FEATURE_EH_CALLFINALLY_THUNKS
-            // When there are callfinally thunks, we don't expect to see the
-            // callfinally within a handler region either.
-            assert(!jumpDest->hasHndIndex());
-
-            // Update the clone insertion point to just after the
-            // call always pair.
-            cloneInsertAfter = finallyReturnBlock;
-
-            // We will consider moving the callfinally so we can fall
-            // through from the try into the clone.
-            tryToRelocateCallFinally = true;
-
-            JITDUMP("%s path to clone: try block " FMT_BB " jumps to callfinally at " FMT_BB ";"
-                    " the call returns to " FMT_BB " which jumps to " FMT_BB "\n",
-                    isUpdate ? "Updating" : "Choosing", block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum,
-                    postTryFinallyBlock->bbNum);
-#else
-            JITDUMP("%s path to clone: try block " FMT_BB " is a callfinally;"
-                    " the call returns to " FMT_BB " which jumps to " FMT_BB "\n",
-                    isUpdate ? "Updating" : "Choosing", block->bbNum, finallyReturnBlock->bbNum,
-                    postTryFinallyBlock->bbNum);
-#endif // FEATURE_EH_CALLFINALLY_THUNKS
+            if (UsesCallFinallyThunks())
+            {
+                // When there are callfinally thunks, we don't expect to see the
+                // callfinally within a handler region either.
+                assert(!jumpDest->hasHndIndex());
+
+                // Update the clone insertion point to just after the
+                // call always pair.
+                cloneInsertAfter = finallyReturnBlock;
+
+                // We will consider moving the callfinally so we can fall
+                // through from the try into the clone.
+                tryToRelocateCallFinally = true;
+
+                JITDUMP("%s path to clone: try block " FMT_BB " jumps to callfinally at " FMT_BB ";"
+                        " the call returns to " FMT_BB " which jumps to " FMT_BB "\n",
+                        isUpdate ? "Updating" : "Choosing", block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum,
+                        postTryFinallyBlock->bbNum);
+            }
+            else
+            {
+                JITDUMP("%s path to clone: try block " FMT_BB " is a callfinally;"
+                        " the call returns to " FMT_BB " which jumps to " FMT_BB "\n",
+                        isUpdate ? "Updating" : "Choosing", block->bbNum, finallyReturnBlock->bbNum,
+                        postTryFinallyBlock->bbNum);
+            }
 
             // For non-pgo just take the first one we find.
             // For pgo, keep searching in case we find one we like better.
@@ -1093,14 +1097,13 @@ PhaseStatus Compiler::fgCloneFinally()
                 GenTree*   finallyRetExpr = finallyRet->GetRootNode();
                 assert(finallyRetExpr->gtOper == GT_RETFILT);
                 fgRemoveStmt(newBlock, finallyRet);
-                newBlock->SetKindAndTarget(BBJ_ALWAYS, normalCallFinallyReturn);
 
-                fgAddRefPred(normalCallFinallyReturn, newBlock);
+                FlowEdge* const newEdge = fgAddRefPred(normalCallFinallyReturn, newBlock);
+                newBlock->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
             }
             else
             {
-                newBlock->CopyTarget(this, block);
-                optRedirectBlock(newBlock, &blockMap, RedirectBlockOption::AddToPredLists);
+                optSetMappedBlockTargets(block, newBlock, &blockMap);
             }
         }
 
@@ -1136,13 +1139,12 @@ PhaseStatus Compiler::fgCloneFinally()
                     fgPrepareCallFinallyRetForRemoval(leaveBlock);
                     fgRemoveBlock(leaveBlock, /* unreachable */ true);
 
+                    // Ref count updates.
+                    fgRedirectTargetEdge(currentBlock, firstCloneBlock);
+
                     // This call returns to the expected spot, so retarget it to branch to the clone.
-                    currentBlock->SetKindAndTarget(BBJ_ALWAYS, firstCloneBlock);
                     currentBlock->RemoveFlags(BBF_RETLESS_CALL); // no longer a BBJ_CALLFINALLY
-
-                    // Ref count updates.
-                    fgAddRefPred(firstCloneBlock, currentBlock);
-                    fgRemoveRefPred(firstBlock, currentBlock);
+                    currentBlock->SetKind(BBJ_ALWAYS);
 
                     // Make sure iteration isn't going off the deep end.
                     assert(leaveBlock != endCallFinallyRangeBlock);
@@ -1339,19 +1341,15 @@ void Compiler::fgDebugCheckTryFinallyExits()
                     continue;
                 }
 
-#if FEATURE_EH_CALLFINALLY_THUNKS
-
                 // When there are callfinally thunks, callfinallies
                 // logically "belong" to a child region and the exit
                 // path validity will be checked when looking at the
                 // try blocks in that region.
-                if (block->KindIs(BBJ_CALLFINALLY))
+                if (UsesCallFinallyThunks() && block->KindIs(BBJ_CALLFINALLY))
                 {
                     continue;
                 }
 
-#endif // FEATURE_EH_CALLFINALLY_THUNKS
-
                 // Now we know block lies directly within the try of a
                 // try-finally, and succBlock is in an enclosing
                 // region (possibly the method region). So this path
@@ -1369,19 +1367,16 @@ void Compiler::fgDebugCheckTryFinallyExits()
                 // (e) via an always jump clonefinally exit
                 bool isCallToFinally = false;
 
-#if FEATURE_EH_CALLFINALLY_THUNKS
-                if (succBlock->KindIs(BBJ_CALLFINALLY))
+                if (UsesCallFinallyThunks() && succBlock->KindIs(BBJ_CALLFINALLY))
                 {
                     // case (a1)
                     isCallToFinally = isFinally && succBlock->TargetIs(finallyBlock);
                 }
-#else  // !FEATURE_EH_CALLFINALLY_THUNKS
-                if (block->KindIs(BBJ_CALLFINALLY))
+                else if (!UsesCallFinallyThunks() && block->KindIs(BBJ_CALLFINALLY))
                 {
                     // case (a2)
                     isCallToFinally = isFinally && block->TargetIs(finallyBlock);
                 }
-#endif // !FEATURE_EH_CALLFINALLY_THUNKS
 
                 bool isJumpToClonedFinally = false;
 
@@ -1459,27 +1454,30 @@ void Compiler::fgDebugCheckTryFinallyExits()
 //
 void Compiler::fgCleanupContinuation(BasicBlock* continuation)
 {
-#if !defined(FEATURE_EH_FUNCLETS)
-    // The continuation may be a finalStep block.
-    // It is now a normal block, so clear the special keep
-    // always flag.
-    continuation->RemoveFlags(BBF_KEEP_BBJ_ALWAYS);
-
-    // Remove the GT_END_LFIN from the continuation,
-    // Note we only expect to see one such statement.
-    bool foundEndLFin = false;
-    for (Statement* const stmt : continuation->Statements())
+#if defined(FEATURE_EH_WINDOWS_X86)
+    if (!UsesFunclets())
     {
-        GenTree* expr = stmt->GetRootNode();
-        if (expr->gtOper == GT_END_LFIN)
+        // The continuation may be a finalStep block.
+        // It is now a normal block, so clear the special keep
+        // always flag.
+        continuation->RemoveFlags(BBF_KEEP_BBJ_ALWAYS);
+
+        // Remove the GT_END_LFIN from the continuation,
+        // Note we only expect to see one such statement.
+        bool foundEndLFin = false;
+        for (Statement* const stmt : continuation->Statements())
         {
-            assert(!foundEndLFin);
-            fgRemoveStmt(continuation, stmt);
-            foundEndLFin = true;
+            GenTree* expr = stmt->GetRootNode();
+            if (expr->gtOper == GT_END_LFIN)
+            {
+                assert(!foundEndLFin);
+                fgRemoveStmt(continuation, stmt);
+                foundEndLFin = true;
+            }
         }
+        assert(foundEndLFin);
     }
-    assert(foundEndLFin);
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 }
 
 //------------------------------------------------------------------------
@@ -1497,10 +1495,8 @@ void Compiler::fgCleanupContinuation(BasicBlock* continuation)
 //
 PhaseStatus Compiler::fgMergeFinallyChains()
 {
-#if defined(FEATURE_EH_FUNCLETS)
     // We need to do this transformation before funclets are created.
     assert(!fgFuncletsCreated);
-#endif // FEATURE_EH_FUNCLETS
 
     // We need to update the bbPreds lists.
     assert(fgPredsComputed);
@@ -1525,22 +1521,26 @@ PhaseStatus Compiler::fgMergeFinallyChains()
 
     bool enableMergeFinallyChains = true;
 
-#if !defined(FEATURE_EH_FUNCLETS)
-    // For non-funclet models (x86) the callfinallys may contain
-    // statements and the continuations contain GT_END_LFINs.  So no
-    // merging is possible until the GT_END_LFIN blocks can be merged
-    // and merging is not safe unless the callfinally blocks are split.
-    JITDUMP("EH using non-funclet model; merging not yet implemented.\n");
-    enableMergeFinallyChains = false;
-#endif // !FEATURE_EH_FUNCLETS
-
-#if !FEATURE_EH_CALLFINALLY_THUNKS
-    // For non-thunk EH models (x86) the callfinallys may contain
-    // statements, and merging is not safe unless the callfinally
-    // blocks are split.
-    JITDUMP("EH using non-callfinally thunk model; merging not yet implemented.\n");
-    enableMergeFinallyChains = false;
-#endif
+#if defined(FEATURE_EH_WINDOWS_X86)
+    if (!UsesFunclets())
+    {
+        // For non-funclet models (x86) the callfinallys may contain
+        // statements and the continuations contain GT_END_LFINs.  So no
+        // merging is possible until the GT_END_LFIN blocks can be merged
+        // and merging is not safe unless the callfinally blocks are split.
+        JITDUMP("EH using non-funclet model; merging not yet implemented.\n");
+        enableMergeFinallyChains = false;
+    }
+#endif // FEATURE_EH_WINDOWS_X86
+
+    if (!UsesCallFinallyThunks())
+    {
+        // For non-thunk EH models (x86) the callfinallys may contain
+        // statements, and merging is not safe unless the callfinally
+        // blocks are split.
+        JITDUMP("EH using non-callfinally thunk model; merging not yet implemented.\n");
+        enableMergeFinallyChains = false;
+    }
 
     if (!enableMergeFinallyChains)
     {
@@ -1758,10 +1758,8 @@ bool Compiler::fgRetargetBranchesToCanonicalCallFinally(BasicBlock*      block,
     JITDUMP("Redirecting branch in " FMT_BB " from " FMT_BB " to " FMT_BB ".\n", block->bbNum, callFinally->bbNum,
             canonicalCallFinally->bbNum);
 
-    block->SetTarget(canonicalCallFinally);
-    fgAddRefPred(canonicalCallFinally, block);
     assert(callFinally->bbRefs > 0);
-    fgRemoveRefPred(callFinally, block);
+    fgRedirectTargetEdge(block, canonicalCallFinally);
 
     // Update profile counts
     //
@@ -1873,11 +1871,15 @@ PhaseStatus Compiler::fgTailMergeThrows()
         BasicBlock*  m_block;
         GenTreeCall* m_call;
 
-        ThrowHelper() : m_block(nullptr), m_call(nullptr)
+        ThrowHelper()
+            : m_block(nullptr)
+            , m_call(nullptr)
         {
         }
 
-        ThrowHelper(BasicBlock* block, GenTreeCall* call) : m_block(block), m_call(call)
+        ThrowHelper(BasicBlock* block, GenTreeCall* call)
+            : m_block(block)
+            , m_call(call)
         {
         }
 
@@ -2008,36 +2010,12 @@ PhaseStatus Compiler::fgTailMergeThrows()
 
         // Walk pred list of the non canonical block, updating flow to target
         // the canonical block instead.
-        for (FlowEdge* predEdge = nonCanonicalBlock->bbPreds; predEdge != nullptr; predEdge = nextPredEdge)
+        for (BasicBlock* const predBlock : nonCanonicalBlock->PredBlocksEditing())
         {
-            BasicBlock* const predBlock = predEdge->getSourceBlock();
-            nextPredEdge                = predEdge->getNextPredEdge();
-
             switch (predBlock->GetKind())
             {
                 case BBJ_ALWAYS:
-                {
-                    fgTailMergeThrowsJumpToHelper(predBlock, nonCanonicalBlock, canonicalBlock, predEdge);
-                    updated = true;
-                }
-                break;
-
                 case BBJ_COND:
-                {
-                    // Flow to non canonical block could be via fall through or jump or both.
-                    if (predBlock->FalseTargetIs(nonCanonicalBlock))
-                    {
-                        fgTailMergeThrowsFallThroughHelper(predBlock, nonCanonicalBlock, canonicalBlock, predEdge);
-                    }
-
-                    if (predBlock->TrueTargetIs(nonCanonicalBlock))
-                    {
-                        fgTailMergeThrowsJumpToHelper(predBlock, nonCanonicalBlock, canonicalBlock, predEdge);
-                    }
-                    updated = true;
-                }
-                break;
-
                 case BBJ_SWITCH:
                 {
                     JITDUMP("*** " FMT_BB " now branching to " FMT_BB "\n", predBlock->bbNum, canonicalBlock->bbNum);
@@ -2080,87 +2058,3 @@ PhaseStatus Compiler::fgTailMergeThrows()
     fgModified = false;
     return PhaseStatus::MODIFIED_EVERYTHING;
 }
-
-//------------------------------------------------------------------------
-// fgTailMergeThrowsFallThroughHelper: fixup flow for fall throughs to mergeable throws
-//
-// Arguments:
-//    predBlock - block falling through to the throw helper
-//    nonCanonicalBlock - original fall through target
-//    canonicalBlock - new (jump) target
-//    predEdge - original flow edge
-//
-// Notes:
-//    Alters fall through flow of predBlock so it jumps to the
-//    canonicalBlock via a new basic block.  Does not try and fix
-//    jump-around flow; we leave that to optOptimizeFlow which runs
-//    just afterwards.
-//
-void Compiler::fgTailMergeThrowsFallThroughHelper(BasicBlock* predBlock,
-                                                  BasicBlock* nonCanonicalBlock,
-                                                  BasicBlock* canonicalBlock,
-                                                  FlowEdge*   predEdge)
-{
-    assert(predBlock->KindIs(BBJ_COND));
-    assert(predBlock->FalseTargetIs(nonCanonicalBlock));
-
-    BasicBlock* const newBlock = fgNewBBafter(BBJ_ALWAYS, predBlock, true, canonicalBlock);
-    predBlock->SetFalseTarget(newBlock);
-
-    JITDUMP("*** " FMT_BB " now falling through to empty " FMT_BB " and then to " FMT_BB "\n", predBlock->bbNum,
-            newBlock->bbNum, canonicalBlock->bbNum);
-
-    // Remove the old flow
-    fgRemoveRefPred(nonCanonicalBlock, predBlock);
-
-    // Wire up the new flow
-    fgAddRefPred(newBlock, predBlock, predEdge);
-
-    fgAddRefPred(canonicalBlock, newBlock, predEdge);
-
-    // If nonCanonicalBlock has only one pred, all its flow transfers.
-    // If it has multiple preds, then we need edge counts or likelihoods
-    // to figure things out.
-    //
-    // For now just do a minimal update.
-    //
-    newBlock->inheritWeight(nonCanonicalBlock);
-}
-
-//------------------------------------------------------------------------
-// fgTailMergeThrowsJumpToHelper: fixup flow for jumps to mergeable throws
-//
-// Arguments:
-//    predBlock - block jumping to the throw helper
-//    nonCanonicalBlock - original jump target
-//    canonicalBlock - new jump target
-//    predEdge - original flow edge
-//
-// Notes:
-//    Alters jumpDest of predBlock so it jumps to the canonicalBlock.
-//
-void Compiler::fgTailMergeThrowsJumpToHelper(BasicBlock* predBlock,
-                                             BasicBlock* nonCanonicalBlock,
-                                             BasicBlock* canonicalBlock,
-                                             FlowEdge*   predEdge)
-{
-    JITDUMP("*** " FMT_BB " now branching to " FMT_BB "\n", predBlock->bbNum, canonicalBlock->bbNum);
-
-    // Remove the old flow
-    fgRemoveRefPred(nonCanonicalBlock, predBlock);
-
-    // Wire up the new flow
-    if (predBlock->KindIs(BBJ_ALWAYS))
-    {
-        assert(predBlock->TargetIs(nonCanonicalBlock));
-        predBlock->SetTarget(canonicalBlock);
-    }
-    else
-    {
-        assert(predBlock->KindIs(BBJ_COND));
-        assert(predBlock->TrueTargetIs(nonCanonicalBlock));
-        predBlock->SetTrueTarget(canonicalBlock);
-    }
-
-    fgAddRefPred(canonicalBlock, predBlock, predEdge);
-}
diff --git a/src/coreclr/jit/fgflow.cpp b/src/coreclr/jit/fgflow.cpp
index 3b6afa82bed5..f4f650f6b5d3 100644
--- a/src/coreclr/jit/fgflow.cpp
+++ b/src/coreclr/jit/fgflow.cpp
@@ -117,8 +117,8 @@ FlowEdge* Compiler::fgAddRefPred(BasicBlock* block, BasicBlock* blockPred, FlowE
     // dependency on this order. Note also that we don't allow duplicates in the list; we maintain a DupCount
     // count of duplication. This also necessitates walking the flow list for every edge we add.
     //
-    FlowEdge*  flow  = nullptr;
-    FlowEdge** listp = &block->bbPreds;
+    FlowEdge*  flow = nullptr;
+    FlowEdge** listp;
 
     if (initializingPreds)
     {
@@ -126,6 +126,7 @@ FlowEdge* Compiler::fgAddRefPred(BasicBlock* block, BasicBlock* blockPred, FlowE
         // increasing blockPred->bbNum order. The only possible
         // dup list entry is the last one.
         //
+        listp              = &block->bbPreds;
         FlowEdge* flowLast = block->bbLastPred;
         if (flowLast != nullptr)
         {
@@ -136,32 +137,6 @@ FlowEdge* Compiler::fgAddRefPred(BasicBlock* block, BasicBlock* blockPred, FlowE
             if (flowLast->getSourceBlock() == blockPred)
             {
                 flow = flowLast;
-
-                // This edge should have been given a likelihood when it was created.
-                // Since we're increasing its duplicate count, update the likelihood.
-                //
-                assert(flow->hasLikelihood());
-                const unsigned numSucc = blockPred->NumSucc();
-                assert(numSucc > 0);
-
-                if (numSucc == 1)
-                {
-                    // BasicBlock::NumSucc() returns 1 for BBJ_CONDs with the same true/false target.
-                    // For blocks that only ever have one successor (BBJ_ALWAYS, BBJ_LEAVE, etc.),
-                    // their successor edge should never have a duplicate count over 1.
-                    //
-                    assert(blockPred->KindIs(BBJ_COND));
-                    assert(blockPred->TrueTargetIs(blockPred->GetFalseTarget()));
-                    flow->setLikelihood(1.0);
-                }
-                else
-                {
-                    // Duplicate count isn't updated until later, so add 1 for now.
-                    //
-                    const unsigned dupCount = flow->getDupCount() + 1;
-                    assert(dupCount > 1);
-                    flow->setLikelihood((1.0 / numSucc) * dupCount);
-                }
             }
         }
     }
@@ -169,10 +144,7 @@ FlowEdge* Compiler::fgAddRefPred(BasicBlock* block, BasicBlock* blockPred, FlowE
     {
         // References are added randomly, so we have to search.
         //
-        while ((*listp != nullptr) && ((*listp)->getSourceBlock()->bbNum < blockPred->bbNum))
-        {
-            listp = (*listp)->getNextPredEdgeRef();
-        }
+        listp = fgGetPredInsertPoint(blockPred, block);
 
         if ((*listp != nullptr) && ((*listp)->getSourceBlock() == blockPred))
         {
@@ -183,6 +155,7 @@ FlowEdge* Compiler::fgAddRefPred(BasicBlock* block, BasicBlock* blockPred, FlowE
     if (flow != nullptr)
     {
         // The predecessor block already exists in the flow list; simply add to its duplicate count.
+        //
         noway_assert(flow->getDupCount());
         flow->incrementDupCount();
     }
@@ -211,18 +184,10 @@ FlowEdge* Compiler::fgAddRefPred(BasicBlock* block, BasicBlock* blockPred, FlowE
         if (initializingPreds)
         {
             block->bbLastPred = flow;
-
-            // When initializing preds, ensure edge likelihood is set,
-            // such that this edge is as likely as any other successor edge
-            //
-            const unsigned numSucc = blockPred->NumSucc();
-            assert(numSucc > 0);
-            assert(flow->getDupCount() == 1);
-            flow->setLikelihood(1.0 / numSucc);
         }
-        else if ((oldEdge != nullptr) && oldEdge->hasLikelihood())
+        else if (oldEdge != nullptr)
         {
-            // Copy likelihood from old edge, if any.
+            // Copy likelihood from old edge.
             //
             flow->setLikelihood(oldEdge->getLikelihood());
         }
@@ -268,10 +233,6 @@ FlowEdge* Compiler::fgAddRefPred(BasicBlock* block, BasicBlock* blockPred, FlowE
     //
     assert(block->checkPredListOrder());
 
-    // When initializing preds, edge likelihood should always be set.
-    //
-    assert(!initializingPreds || flow->hasLikelihood());
-
     return flow;
 }
 
@@ -284,58 +245,40 @@ template FlowEdge* Compiler::fgAddRefPred<true>(BasicBlock* block,
                                                 FlowEdge*   oldEdge /* = nullptr */);
 
 //------------------------------------------------------------------------
-// fgRemoveRefPred: Decrements the reference count of a predecessor edge from "blockPred" to "block",
-// removing the edge if it is no longer necessary.
+// fgRemoveRefPred: Decrements the reference count of `edge`, removing it from its successor block's pred list
+// if the reference count is zero.
 //
 // Arguments:
-//    block -- A block to operate on.
-//    blockPred -- The predecessor block to remove from the predecessor list. It must be a predecessor of "block".
-//
-// Return Value:
-//    If the flow edge was removed (the predecessor has a "dup count" of 1),
-//        returns the flow graph edge that was removed. This means "blockPred" is no longer a predecessor of "block".
-//    Otherwise, returns nullptr. This means that "blockPred" is still a predecessor of "block" (because "blockPred"
-//        is a switch with multiple cases jumping to "block", or a BBJ_COND with both conditional and fall-through
-//        paths leading to "block").
-//
-// Assumptions:
-//    -- "blockPred" must be a predecessor block of "block".
+//    edge -- The FlowEdge* to decrement the reference count of.
 //
 // Notes:
-//    -- block->bbRefs is decremented by one to account for the reduction in incoming edges.
-//    -- block->bbRefs is adjusted even if preds haven't been computed. If preds haven't been computed,
-//       the preds themselves aren't touched.
-//    -- fgModified is set if a flow edge is removed (but not if an existing flow edge dup count is decremented),
-//       indicating that the flow graph shape has changed.
+//    -- succBlock->bbRefs is decremented by one to account for the reduction in incoming edges.
+//    -- fgModified is set if a flow edge is removed, indicating that the flow graph shape has changed.
 //
-FlowEdge* Compiler::fgRemoveRefPred(BasicBlock* block, BasicBlock* blockPred)
+void Compiler::fgRemoveRefPred(FlowEdge* edge)
 {
-    noway_assert(block != nullptr);
-    noway_assert(blockPred != nullptr);
-    noway_assert(block->countOfInEdges() > 0);
+    assert(edge != nullptr);
     assert(fgPredsComputed);
-    block->bbRefs--;
 
-    FlowEdge** ptrToPred;
-    FlowEdge*  pred = fgGetPredForBlock(block, blockPred, &ptrToPred);
-    noway_assert(pred != nullptr);
-    noway_assert(pred->getDupCount() > 0);
+    BasicBlock* predBlock = edge->getSourceBlock();
+    BasicBlock* succBlock = edge->getDestinationBlock();
+    assert(predBlock != nullptr);
+    assert(succBlock != nullptr);
+
+    succBlock->bbRefs--;
 
-    pred->decrementDupCount();
+    assert(edge->getDupCount() > 0);
+    edge->decrementDupCount();
 
-    if (pred->getDupCount() == 0)
+    if (edge->getDupCount() == 0)
     {
-        // Splice out the predecessor edge since it's no longer necessary.
-        *ptrToPred = pred->getNextPredEdge();
+        // Splice out the predecessor edge in succBlock's pred list, since it's no longer necessary.
+        FlowEdge** ptrToPred;
+        FlowEdge*  pred = fgGetPredForBlock(succBlock, predBlock, &ptrToPred);
+        *ptrToPred      = pred->getNextPredEdge();
 
         // Any changes to the flow graph invalidate the dominator sets.
         fgModified = true;
-
-        return pred;
-    }
-    else
-    {
-        return nullptr;
     }
 }
 
@@ -397,20 +340,23 @@ void Compiler::fgRemoveBlockAsPred(BasicBlock* block)
         case BBJ_ALWAYS:
         case BBJ_EHCATCHRET:
         case BBJ_EHFILTERRET:
-            fgRemoveRefPred(block->GetTarget(), block);
+            fgRemoveRefPred(block->GetTargetEdge());
             break;
 
         case BBJ_COND:
-            fgRemoveRefPred(block->GetTrueTarget(), block);
-            fgRemoveRefPred(block->GetFalseTarget(), block);
+            fgRemoveRefPred(block->GetTrueEdge());
+            fgRemoveRefPred(block->GetFalseEdge());
             break;
 
         case BBJ_EHFINALLYRET:
-            for (BasicBlock* const succ : block->EHFinallyRetSuccs())
+        {
+            BBehfDesc* const ehfDesc = block->GetEhfTargets();
+            for (unsigned i = 0; i < ehfDesc->bbeCount; i++)
             {
-                fgRemoveRefPred(succ, block);
+                fgRemoveRefPred(ehfDesc->bbeSuccs[i]);
             }
             break;
+        }
 
         case BBJ_EHFAULTRET:
         case BBJ_THROW:
@@ -418,11 +364,14 @@ void Compiler::fgRemoveBlockAsPred(BasicBlock* block)
             break;
 
         case BBJ_SWITCH:
-            for (BasicBlock* const bTarget : block->SwitchTargets())
+        {
+            BBswtDesc* const swtDesc = block->GetSwitchTargets();
+            for (unsigned i = 0; i < swtDesc->bbsCount; i++)
             {
-                fgRemoveRefPred(bTarget, block);
+                fgRemoveRefPred(swtDesc->bbsDstTab[i]);
             }
             break;
+        }
 
         default:
             noway_assert(!"Block doesn't have a valid bbKind!!!!");
@@ -430,6 +379,188 @@ void Compiler::fgRemoveBlockAsPred(BasicBlock* block)
     }
 }
 
+//------------------------------------------------------------------------
+// fgGetPredInsertPoint: Searches newTarget->bbPreds for where to insert an edge from blockPred.
+//
+// Arguments:
+//    blockPred -- The block we want to make a predecessor of newTarget (it could already be one).
+//    newTarget -- The block whose pred list we will search.
+//
+// Return Value:
+//    Returns a pointer to the next pointer of an edge in newTarget's pred list.
+//    A new edge from blockPred to newTarget can be inserted here
+//    without disrupting bbPreds' sorting invariant.
+//
+FlowEdge** Compiler::fgGetPredInsertPoint(BasicBlock* blockPred, BasicBlock* newTarget)
+{
+    assert(blockPred != nullptr);
+    assert(newTarget != nullptr);
+    assert(fgPredsComputed);
+
+    FlowEdge** listp = &newTarget->bbPreds;
+
+    // Search pred list for insertion point
+    //
+    while ((*listp != nullptr) && ((*listp)->getSourceBlock()->bbNum < blockPred->bbNum))
+    {
+        listp = (*listp)->getNextPredEdgeRef();
+    }
+
+    return listp;
+}
+
+//------------------------------------------------------------------------
+// fgRedirectTargetEdge: Sets block->bbTargetEdge's target block to newTarget,
+// updating pred lists as necessary.
+//
+// Arguments:
+//    block -- The block we want to make a predecessor of newTarget.
+//             It could be one already, in which case nothing changes.
+//    newTarget -- The new successor of block.
+//
+void Compiler::fgRedirectTargetEdge(BasicBlock* block, BasicBlock* newTarget)
+{
+    assert(block != nullptr);
+    assert(newTarget != nullptr);
+
+    FlowEdge* edge = block->GetTargetEdge();
+    assert(edge->getDupCount() == 1);
+
+    // Update oldTarget's pred list.
+    // We could call fgRemoveRefPred, but since we're removing the one and only ref from block to oldTarget,
+    // fgRemoveAllRefPreds is slightly more efficient (one fewer branch, doesn't update edge's dup count, etc).
+    //
+    BasicBlock* oldTarget = edge->getDestinationBlock();
+    fgRemoveAllRefPreds(oldTarget, block);
+
+    // Splice edge into new target block's pred list
+    //
+    FlowEdge** predListPtr = fgGetPredInsertPoint(block, newTarget);
+    edge->setNextPredEdge(*predListPtr);
+    edge->setDestinationBlock(newTarget);
+    *predListPtr = edge;
+
+    // Pred list of target should (still) be ordered
+    //
+    assert(newTarget->checkPredListOrder());
+
+    // Edge should still have only one ref
+    assert(edge->getDupCount() == 1);
+    newTarget->bbRefs++;
+}
+
+//------------------------------------------------------------------------
+// fgRedirectTrueEdge: Sets block->bbTrueEdge's target block to newTarget,
+// updating pred lists as necessary.
+//
+// Arguments:
+//    block -- The block we want to make a predecessor of newTarget.
+//             It could be one already, in which case nothing changes.
+//    newTarget -- The new successor of block.
+//
+// Notes:
+//    This assumes block's true and false targets are different.
+//    If setting block's true target to its false target,
+//    fgRedirectTrueEdge increments the false edge's dup count,
+//    and ensures block->bbTrueEdge == block->bbFalseEdge.
+//    We don't update newTarget->bbPreds in this case,
+//    as we don't want to have more than one edge from the same predecessor.
+//
+void Compiler::fgRedirectTrueEdge(BasicBlock* block, BasicBlock* newTarget)
+{
+    assert(block != nullptr);
+    assert(newTarget != nullptr);
+    assert(block->KindIs(BBJ_COND));
+    assert(!block->TrueEdgeIs(block->GetFalseEdge()));
+
+    // Update oldTarget's pred list.
+    // We could call fgRemoveRefPred, but since we're removing the one and only ref from block to oldTarget,
+    // fgRemoveAllRefPreds is slightly more efficient (one fewer branch, doesn't update edge's dup count, etc).
+    //
+    FlowEdge*   trueEdge  = block->GetTrueEdge();
+    BasicBlock* oldTarget = trueEdge->getDestinationBlock();
+    fgRemoveAllRefPreds(oldTarget, block);
+
+    // Splice edge into new target block's pred list
+    //
+    FlowEdge** predListPtr = fgGetPredInsertPoint(block, newTarget);
+    FlowEdge*  predEdge    = *predListPtr;
+
+    if (block->FalseEdgeIs(predEdge))
+    {
+        block->SetTrueEdge(predEdge);
+        predEdge->incrementDupCount();
+    }
+    else
+    {
+        trueEdge->setNextPredEdge(predEdge);
+        trueEdge->setDestinationBlock(newTarget);
+        *predListPtr = trueEdge;
+    }
+
+    newTarget->bbRefs++;
+
+    // Pred list of target should (still) be ordered
+    //
+    assert(newTarget->checkPredListOrder());
+}
+
+//------------------------------------------------------------------------
+// fgRedirectFalseEdge: Sets block->bbFalseEdge's target block to newTarget,
+// updating pred lists as necessary.
+//
+// Arguments:
+//    block -- The block we want to make a predecessor of newTarget.
+//             It could be one already, in which case nothing changes.
+//    newTarget -- The new successor of block.
+//
+// Notes:
+//    This assumes block's true and false targets are different.
+//    If setting block's false target to its true target,
+//    fgRedirectFalseEdge increments the true edge's dup count,
+//    and ensures block->bbTrueEdge == block->bbFalseEdge.
+//    We don't update newTarget->bbPreds in this case,
+//    as we don't want to have more than one edge from the same predecessor.
+//
+void Compiler::fgRedirectFalseEdge(BasicBlock* block, BasicBlock* newTarget)
+{
+    assert(block != nullptr);
+    assert(newTarget != nullptr);
+    assert(block->KindIs(BBJ_COND));
+    assert(!block->TrueEdgeIs(block->GetFalseEdge()));
+
+    // Update oldTarget's pred list.
+    // We could call fgRemoveRefPred, but since we're removing the one and only ref from block to oldTarget,
+    // fgRemoveAllRefPreds is slightly more efficient (one fewer branch, doesn't update edge's dup count, etc).
+    //
+    FlowEdge*   falseEdge = block->GetFalseEdge();
+    BasicBlock* oldTarget = falseEdge->getDestinationBlock();
+    fgRemoveAllRefPreds(oldTarget, block);
+
+    // Splice edge into new target block's pred list
+    //
+    FlowEdge** predListPtr = fgGetPredInsertPoint(block, newTarget);
+    FlowEdge*  predEdge    = *predListPtr;
+
+    if (block->TrueEdgeIs(predEdge))
+    {
+        block->SetFalseEdge(predEdge);
+        predEdge->incrementDupCount();
+    }
+    else
+    {
+        falseEdge->setNextPredEdge(predEdge);
+        falseEdge->setDestinationBlock(newTarget);
+        *predListPtr = falseEdge;
+    }
+
+    newTarget->bbRefs++;
+
+    // Pred list of target should (still) be ordered
+    //
+    assert(newTarget->checkPredListOrder());
+}
+
 Compiler::SwitchUniqueSuccSet Compiler::GetDescriptorForSwitch(BasicBlock* switchBlk)
 {
     assert(switchBlk->KindIs(BBJ_SWITCH));
@@ -458,16 +589,20 @@ Compiler::SwitchUniqueSuccSet Compiler::GetDescriptorForSwitch(BasicBlock* switc
         // Now we have a set of unique successors.
         unsigned numNonDups = BitVecOps::Count(&blockVecTraits, uniqueSuccBlocks);
 
-        BasicBlock** nonDups = new (getAllocator()) BasicBlock*[numNonDups];
+        FlowEdge** nonDups = new (getAllocator()) FlowEdge*[numNonDups];
 
         unsigned nonDupInd = 0;
+
         // At this point, all unique targets are in "uniqueSuccBlocks".  As we encounter each,
         // add to nonDups, remove from "uniqueSuccBlocks".
-        for (BasicBlock* const targ : switchBlk->SwitchTargets())
+        BBswtDesc* const swtDesc = switchBlk->GetSwitchTargets();
+        for (unsigned i = 0; i < swtDesc->bbsCount; i++)
         {
+            FlowEdge* const   succEdge = swtDesc->bbsDstTab[i];
+            BasicBlock* const targ     = succEdge->getDestinationBlock();
             if (BitVecOps::IsMember(&blockVecTraits, uniqueSuccBlocks, targ->bbNum))
             {
-                nonDups[nonDupInd] = targ;
+                nonDups[nonDupInd] = succEdge;
                 nonDupInd++;
                 BitVecOps::RemoveElemD(&blockVecTraits, uniqueSuccBlocks, targ->bbNum);
             }
@@ -482,87 +617,6 @@ Compiler::SwitchUniqueSuccSet Compiler::GetDescriptorForSwitch(BasicBlock* switc
     }
 }
 
-void Compiler::SwitchUniqueSuccSet::UpdateTarget(CompAllocator alloc,
-                                                 BasicBlock*   switchBlk,
-                                                 BasicBlock*   from,
-                                                 BasicBlock*   to)
-{
-    assert(switchBlk->KindIs(BBJ_SWITCH)); // Precondition.
-
-    // Is "from" still in the switch table (because it had more than one entry before?)
-    bool fromStillPresent = false;
-    for (BasicBlock* const bTarget : switchBlk->SwitchTargets())
-    {
-        if (bTarget == from)
-        {
-            fromStillPresent = true;
-            break;
-        }
-    }
-
-    // Is "to" already in "this"?
-    bool toAlreadyPresent = false;
-    for (unsigned i = 0; i < numDistinctSuccs; i++)
-    {
-        if (nonDuplicates[i] == to)
-        {
-            toAlreadyPresent = true;
-            break;
-        }
-    }
-
-    // Four cases:
-    //   If "from" is still present, and "to" is already present, do nothing
-    //   If "from" is still present, and "to" is not, must reallocate to add an entry.
-    //   If "from" is not still present, and "to" is not present, write "to" where "from" was.
-    //   If "from" is not still present, but "to" is present, remove "from".
-    if (fromStillPresent && toAlreadyPresent)
-    {
-        return;
-    }
-    else if (fromStillPresent && !toAlreadyPresent)
-    {
-        // reallocate to add an entry
-        BasicBlock** newNonDups = new (alloc) BasicBlock*[numDistinctSuccs + 1];
-        memcpy(newNonDups, nonDuplicates, numDistinctSuccs * sizeof(BasicBlock*));
-        newNonDups[numDistinctSuccs] = to;
-        numDistinctSuccs++;
-        nonDuplicates = newNonDups;
-    }
-    else if (!fromStillPresent && !toAlreadyPresent)
-    {
-        // write "to" where "from" was
-        INDEBUG(bool foundFrom = false);
-        for (unsigned i = 0; i < numDistinctSuccs; i++)
-        {
-            if (nonDuplicates[i] == from)
-            {
-                nonDuplicates[i] = to;
-                INDEBUG(foundFrom = true);
-                break;
-            }
-        }
-        assert(foundFrom);
-    }
-    else
-    {
-        assert(!fromStillPresent && toAlreadyPresent);
-        // remove "from".
-        INDEBUG(bool foundFrom = false);
-        for (unsigned i = 0; i < numDistinctSuccs; i++)
-        {
-            if (nonDuplicates[i] == from)
-            {
-                nonDuplicates[i] = nonDuplicates[numDistinctSuccs - 1];
-                numDistinctSuccs--;
-                INDEBUG(foundFrom = true);
-                break;
-            }
-        }
-        assert(foundFrom);
-    }
-}
-
 /*****************************************************************************
  *
  *  Simple utility function to remove an entry for a block in the switch desc
@@ -577,20 +631,3 @@ void Compiler::fgInvalidateSwitchDescMapEntry(BasicBlock* block)
         m_switchDescMap->Remove(block);
     }
 }
-
-void Compiler::UpdateSwitchTableTarget(BasicBlock* switchBlk, BasicBlock* from, BasicBlock* to)
-{
-    if (m_switchDescMap == nullptr)
-    {
-        return; // No mappings, nothing to do.
-    }
-
-    // Otherwise...
-    BlockToSwitchDescMap* switchMap = GetSwitchDescMap();
-    SwitchUniqueSuccSet*  res       = switchMap->LookupPointer(switchBlk);
-    if (res != nullptr)
-    {
-        // If no result, nothing to do. Otherwise, update it.
-        res->UpdateTarget(getAllocator(), switchBlk, from, to);
-    }
-}
diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp
index 54003637e7e3..0f0fb3c7c484 100644
--- a/src/coreclr/jit/fginline.cpp
+++ b/src/coreclr/jit/fginline.cpp
@@ -214,7 +214,8 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
         UseExecutionOrder = true,
     };
 
-    SubstitutePlaceholdersAndDevirtualizeWalker(Compiler* comp) : GenTreeVisitor(comp)
+    SubstitutePlaceholdersAndDevirtualizeWalker(Compiler* comp)
+        : GenTreeVisitor(comp)
     {
     }
 
@@ -261,10 +262,8 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
             {
                 GenTree* effectiveValue = value->gtEffectiveVal();
 
-                noway_assert(
-                    !varTypeIsStruct(effectiveValue) || (effectiveValue->OperGet() != GT_RET_EXPR) ||
-                    !m_compiler->IsMultiRegReturnedType(effectiveValue->AsRetExpr()->gtInlineCandidate->gtRetClsHnd,
-                                                        CorInfoCallConvExtension::Managed));
+                noway_assert(!varTypeIsStruct(effectiveValue) || (effectiveValue->OperGet() != GT_RET_EXPR) ||
+                             !effectiveValue->AsRetExpr()->gtInlineCandidate->HasMultiRegRetVal());
             }
         }
 
@@ -318,18 +317,9 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
     //
     void UpdateInlineReturnExpressionPlaceHolder(GenTree** use, GenTree* parent)
     {
-        CORINFO_CLASS_HANDLE retClsHnd = NO_CLASS_HANDLE;
-
         while ((*use)->OperIs(GT_RET_EXPR))
         {
             GenTree* tree = *use;
-            // We are going to copy the tree from the inlinee,
-            // so record the handle now.
-            //
-            if (varTypeIsStruct(tree))
-            {
-                retClsHnd = tree->AsRetExpr()->gtInlineCandidate->gtRetClsHnd;
-            }
 
             // Skip through chains of GT_RET_EXPRs (say from nested inlines)
             // to the actual tree to use.
@@ -397,6 +387,7 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
 #endif // DEBUG
         }
 
+#if FEATURE_MULTIREG_RET
         // If an inline was rejected and the call returns a struct, we may
         // have deferred some work when importing call for cases where the
         // struct is returned in multiple registers.
@@ -405,55 +396,23 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
         // candidates.
         //
         // Do the deferred work now.
-        if (retClsHnd != NO_CLASS_HANDLE)
+        if ((*use)->IsCall() && varTypeIsStruct(*use) && (*use)->AsCall()->HasMultiRegRetVal())
         {
-            Compiler::structPassingKind howToReturnStruct;
-            var_types                   returnType =
-                m_compiler->getReturnTypeForStruct(retClsHnd, CorInfoCallConvExtension::Managed, &howToReturnStruct);
-
-            switch (howToReturnStruct)
+            // See assert below, we only look one level above for a store parent.
+            if (parent->OperIsStore())
             {
-#if FEATURE_MULTIREG_RET
-                // Force multi-reg nodes into the "lcl = node()" form if necessary.
-                // TODO-ASG: this code could be improved substantially. There is no need
-                // to introduce temps if the inlinee is not actually a multi-reg node.
-                //
-                case Compiler::SPK_ByValue:
-                case Compiler::SPK_ByValueAsHfa:
-                {
-                    // See assert below, we only look one level above for a store parent.
-                    if (parent->OperIsStore())
-                    {
-                        // The inlinee can only be the value.
-                        assert(parent->Data() == *use);
-                        AttachStructInlineeToStore(parent, retClsHnd);
-                    }
-                    else
-                    {
-                        // Just store the inlinee to a variable to keep it simple.
-                        *use = StoreStructInlineeToVar(*use, retClsHnd);
-                    }
-                    m_madeChanges = true;
-                }
-                break;
-
-#endif // FEATURE_MULTIREG_RET
-
-                case Compiler::SPK_EnclosingType:
-                case Compiler::SPK_PrimitiveType:
-                    // No work needs to be done, the call has struct type and should keep it.
-                    break;
-
-                case Compiler::SPK_ByReference:
-                    // We should have already added the return buffer
-                    // when we first imported the call
-                    break;
-
-                default:
-                    noway_assert(!"Unexpected struct passing kind");
-                    break;
+                // The inlinee can only be the value.
+                assert(parent->Data() == *use);
+                AttachStructInlineeToStore(parent, (*use)->AsCall()->gtRetClsHnd);
             }
+            else
+            {
+                // Just store the inlinee to a variable to keep it simple.
+                *use = StoreStructInlineeToVar(*use, (*use)->AsCall()->gtRetClsHnd);
+            }
+            m_madeChanges = true;
         }
+#endif
     }
 
 #if FEATURE_MULTIREG_RET
@@ -474,7 +433,7 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
         GenTree* dst     = store;
         GenTree* inlinee = store->Data();
 
-        // We need to force all assignments from multi-reg nodes into the "lcl = node()" form.
+        // We need to force all stores from multi-reg nodes into the "lcl = node()" form.
         if (inlinee->IsMultiRegNode())
         {
             // Special case: we already have a local, the only thing to do is mark it appropriately. Except
@@ -503,7 +462,7 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
     //
     GenTree* StoreStructInlineeToVar(GenTree* inlinee, CORINFO_CLASS_HANDLE retClsHnd)
     {
-        assert(!inlinee->OperIs(GT_MKREFANY, GT_RET_EXPR));
+        assert(!inlinee->OperIs(GT_RET_EXPR));
 
         unsigned   lclNum = m_compiler->lvaGrabTemp(false DEBUGARG("RetBuf for struct inline return candidates."));
         LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum);
@@ -632,9 +591,9 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
 
                 if (lcl->lvSingleDef)
                 {
-                    bool                 isExact   = false;
-                    bool                 isNonNull = false;
-                    CORINFO_CLASS_HANDLE newClass  = m_compiler->gtGetClassHandle(value, &isExact, &isNonNull);
+                    bool                 isExact;
+                    bool                 isNonNull;
+                    CORINFO_CLASS_HANDLE newClass = m_compiler->gtGetClassHandle(value, &isExact, &isNonNull);
 
                     if (newClass != NO_CLASS_HANDLE)
                     {
@@ -648,7 +607,7 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
             //
             if (value->OperIs(GT_LCL_VAR) && (value->AsLclVar()->GetLclNum() == lclNum))
             {
-                JITDUMP("... removing self-assignment\n");
+                JITDUMP("... removing self-store\n");
                 DISPTREE(tree);
                 tree->gtBashToNOP();
                 m_madeChanges = true;
@@ -675,14 +634,13 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
 
                 if (condTree->IsIntegralConst(0))
                 {
-                    m_compiler->fgRemoveRefPred(block->GetTrueTarget(), block);
-                    block->SetKindAndTarget(BBJ_ALWAYS, block->Next());
-                    block->SetFlags(BBF_NONE_QUIRK);
+                    m_compiler->fgRemoveRefPred(block->GetTrueEdge());
+                    block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetFalseEdge());
                 }
                 else
                 {
-                    m_compiler->fgRemoveRefPred(block->GetFalseTarget(), block);
-                    block->SetKind(BBJ_ALWAYS);
+                    m_compiler->fgRemoveRefPred(block->GetFalseEdge());
+                    block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetTrueEdge());
                 }
             }
         }
@@ -1177,6 +1135,7 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineRe
     inlineInfo.retExprClassHnd        = nullptr;
     inlineInfo.retExprClassHndIsExact = false;
     inlineInfo.inlineResult           = inlineResult;
+    inlineInfo.inlInstParamArgInfo    = nullptr;
 #ifdef FEATURE_SIMD
     inlineInfo.hasSIMDTypeArgLocalOrReturn = false;
 #endif // FEATURE_SIMD
@@ -1217,80 +1176,80 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineRe
     param.inlineInfo          = &inlineInfo;
     bool success              = eeRunWithErrorTrap<Param>(
         [](Param* pParam) {
-            // Init the local var info of the inlinee
-            pParam->pThis->impInlineInitVars(pParam->inlineInfo);
+        // Init the local var info of the inlinee
+        pParam->pThis->impInlineInitVars(pParam->inlineInfo);
 
-            if (pParam->inlineInfo->inlineResult->IsCandidate())
-            {
-                /* Clear the temp table */
-                memset(pParam->inlineInfo->lclTmpNum, -1, sizeof(pParam->inlineInfo->lclTmpNum));
+        if (pParam->inlineInfo->inlineResult->IsCandidate())
+        {
+            /* Clear the temp table */
+            memset(pParam->inlineInfo->lclTmpNum, -1, sizeof(pParam->inlineInfo->lclTmpNum));
 
-                //
-                // Prepare the call to jitNativeCode
-                //
+            //
+            // Prepare the call to jitNativeCode
+            //
 
-                pParam->inlineInfo->InlinerCompiler = pParam->pThis;
-                if (pParam->pThis->impInlineInfo == nullptr)
-                {
-                    pParam->inlineInfo->InlineRoot = pParam->pThis;
-                }
-                else
-                {
-                    pParam->inlineInfo->InlineRoot = pParam->pThis->impInlineInfo->InlineRoot;
-                }
+            pParam->inlineInfo->InlinerCompiler = pParam->pThis;
+            if (pParam->pThis->impInlineInfo == nullptr)
+            {
+                pParam->inlineInfo->InlineRoot = pParam->pThis;
+            }
+            else
+            {
+                pParam->inlineInfo->InlineRoot = pParam->pThis->impInlineInfo->InlineRoot;
+            }
 
-                // The inline context is part of debug info and must be created
-                // before we start creating statements; we lazily create it as
-                // late as possible, which is here.
-                pParam->inlineInfo->inlineContext =
-                    pParam->inlineInfo->InlineRoot->m_inlineStrategy
-                        ->NewContext(pParam->inlineInfo->inlineCandidateInfo->inlinersContext,
-                                     pParam->inlineInfo->iciStmt, pParam->inlineInfo->iciCall);
-                pParam->inlineInfo->argCnt                   = pParam->inlineCandidateInfo->methInfo.args.totalILArgs();
-                pParam->inlineInfo->tokenLookupContextHandle = pParam->inlineCandidateInfo->exactContextHnd;
-
-                JITLOG_THIS(pParam->pThis,
-                            (LL_INFO100000, "INLINER: inlineInfo.tokenLookupContextHandle for %s set to 0x%p:\n",
-                             pParam->pThis->eeGetMethodFullName(pParam->fncHandle),
-                             pParam->pThis->dspPtr(pParam->inlineInfo->tokenLookupContextHandle)));
-
-                JitFlags compileFlagsForInlinee = *pParam->pThis->opts.jitFlags;
-
-                // The following flags are lost when inlining.
-                // (This is checked in Compiler::compInitOptions().)
-                compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_BBINSTR);
-                compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_BBINSTR_IF_LOOPS);
-                compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_PROF_ENTERLEAVE);
-                compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_DEBUG_EnC);
-                compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_REVERSE_PINVOKE);
-                compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_TRACK_TRANSITIONS);
+            // The inline context is part of debug info and must be created
+            // before we start creating statements; we lazily create it as
+            // late as possible, which is here.
+            pParam->inlineInfo->inlineContext =
+                pParam->inlineInfo->InlineRoot->m_inlineStrategy
+                    ->NewContext(pParam->inlineInfo->inlineCandidateInfo->inlinersContext, pParam->inlineInfo->iciStmt,
+                                              pParam->inlineInfo->iciCall);
+            pParam->inlineInfo->argCnt                   = pParam->inlineCandidateInfo->methInfo.args.totalILArgs();
+            pParam->inlineInfo->tokenLookupContextHandle = pParam->inlineCandidateInfo->exactContextHnd;
+
+            JITLOG_THIS(pParam->pThis,
+                                     (LL_INFO100000, "INLINER: inlineInfo.tokenLookupContextHandle for %s set to 0x%p:\n",
+                         pParam->pThis->eeGetMethodFullName(pParam->fncHandle),
+                         pParam->pThis->dspPtr(pParam->inlineInfo->tokenLookupContextHandle)));
+
+            JitFlags compileFlagsForInlinee = *pParam->pThis->opts.jitFlags;
+
+            // The following flags are lost when inlining.
+            // (This is checked in Compiler::compInitOptions().)
+            compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_BBINSTR);
+            compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_BBINSTR_IF_LOOPS);
+            compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_PROF_ENTERLEAVE);
+            compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_DEBUG_EnC);
+            compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_REVERSE_PINVOKE);
+            compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_TRACK_TRANSITIONS);
 
 #ifdef DEBUG
-                if (pParam->pThis->verbose)
-                {
-                    printf("\nInvoking compiler for the inlinee method %s :\n",
-                           pParam->pThis->eeGetMethodFullName(pParam->fncHandle));
-                }
+            if (pParam->pThis->verbose)
+            {
+                printf("\nInvoking compiler for the inlinee method %s :\n",
+                       pParam->pThis->eeGetMethodFullName(pParam->fncHandle));
+            }
 #endif // DEBUG
 
-                int result =
-                    jitNativeCode(pParam->fncHandle, pParam->inlineCandidateInfo->methInfo.scope,
-                                  pParam->pThis->info.compCompHnd, &pParam->inlineCandidateInfo->methInfo,
-                                  (void**)pParam->inlineInfo, nullptr, &compileFlagsForInlinee, pParam->inlineInfo);
+            int result =
+                jitNativeCode(pParam->fncHandle, pParam->inlineCandidateInfo->methInfo.scope,
+                              pParam->pThis->info.compCompHnd, &pParam->inlineCandidateInfo->methInfo,
+                              (void**)pParam->inlineInfo, nullptr, &compileFlagsForInlinee, pParam->inlineInfo);
 
-                if (result != CORJIT_OK)
-                {
-                    // If we haven't yet determined why this inline fails, use
-                    // a catch-all something bad happened observation.
-                    InlineResult* innerInlineResult = pParam->inlineInfo->inlineResult;
+            if (result != CORJIT_OK)
+            {
+                // If we haven't yet determined why this inline fails, use
+                // a catch-all something bad happened observation.
+                InlineResult* innerInlineResult = pParam->inlineInfo->inlineResult;
 
-                    if (!innerInlineResult->IsFailure())
-                    {
-                        innerInlineResult->NoteFatal(InlineObservation::CALLSITE_COMPILATION_FAILURE);
-                    }
+                if (!innerInlineResult->IsFailure())
+                {
+                    innerInlineResult->NoteFatal(InlineObservation::CALLSITE_COMPILATION_FAILURE);
                 }
             }
-        },
+        }
+    },
         &param);
     if (!success)
     {
@@ -1533,14 +1492,8 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
                 JITDUMP("\nConvert bbKind of " FMT_BB " to BBJ_ALWAYS to bottomBlock " FMT_BB "\n", block->bbNum,
                         bottomBlock->bbNum);
 
-                block->SetKindAndTarget(BBJ_ALWAYS, bottomBlock);
                 FlowEdge* const newEdge = fgAddRefPred(bottomBlock, block);
-                newEdge->setLikelihood(1.0);
-
-                if (block == InlineeCompiler->fgLastBB)
-                {
-                    block->SetFlags(BBF_NONE_QUIRK);
-                }
+                block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
             }
         }
 
@@ -1551,11 +1504,9 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
         // Insert inlinee's blocks into inliner's block list.
         assert(topBlock->KindIs(BBJ_ALWAYS));
         assert(topBlock->TargetIs(bottomBlock));
+        fgRedirectTargetEdge(topBlock, InlineeCompiler->fgFirstBB);
+
         topBlock->SetNext(InlineeCompiler->fgFirstBB);
-        topBlock->SetTarget(topBlock->Next());
-        topBlock->SetFlags(BBF_NONE_QUIRK);
-        FlowEdge* const oldEdge = fgRemoveRefPred(bottomBlock, topBlock);
-        fgAddRefPred(InlineeCompiler->fgFirstBB, topBlock, oldEdge);
         InlineeCompiler->fgLastBB->SetNext(bottomBlock);
 
         //
@@ -1631,7 +1582,6 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
     }
 
     // Update optMethodFlags
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     unsigned optMethodFlagsBefore = optMethodFlags;
@@ -1666,6 +1616,175 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
     iciStmt->SetRootNode(gtNewNothingNode());
 }
 
+//------------------------------------------------------------------------
+// fgInsertInlineeArgument: wire up the given argument from the callsite with the inlinee
+//
+// Arguments:
+//    argInfo   - information about the argument
+//    block     - block to insert the argument into
+//    afterStmt - statement to insert the argument after
+//    newStmt   - updated with the new statement
+//    callDI    - debug info for the call
+//
+void Compiler::fgInsertInlineeArgument(
+    const InlArgInfo& argInfo, BasicBlock* block, Statement** afterStmt, Statement** newStmt, const DebugInfo& callDI)
+{
+    const bool argIsSingleDef = !argInfo.argHasLdargaOp && !argInfo.argHasStargOp;
+    CallArg*   arg            = argInfo.arg;
+    GenTree*   argNode        = arg->GetNode();
+
+    assert(!argNode->OperIs(GT_RET_EXPR));
+
+    if (argInfo.argHasTmp)
+    {
+        noway_assert(argInfo.argIsUsed);
+
+        /* argBashTmpNode is non-NULL iff the argument's value was
+           referenced exactly once by the original IL. This offers an
+           opportunity to avoid an intermediate temp and just insert
+           the original argument tree.
+
+           However, if the temp node has been cloned somewhere while
+           importing (e.g. when handling isinst or dup), or if the IL
+           took the address of the argument, then argBashTmpNode will
+           be set (because the value was only explicitly retrieved
+           once) but the optimization cannot be applied.
+         */
+
+        GenTree* argSingleUseNode = argInfo.argBashTmpNode;
+
+        if ((argSingleUseNode != nullptr) && !(argSingleUseNode->gtFlags & GTF_VAR_MOREUSES) && argIsSingleDef)
+        {
+            // Change the temp in-place to the actual argument.
+            // We currently do not support this for struct arguments, so it must not be a GT_BLK.
+            assert(argNode->gtOper != GT_BLK);
+            argSingleUseNode->ReplaceWith(argNode, this);
+            return;
+        }
+        else
+        {
+            // We're going to assign the argument value to the temp we use for it in the inline body.
+            GenTree* store = gtNewTempStore(argInfo.argTmpNum, argNode);
+
+            *newStmt = gtNewStmt(store, callDI);
+            fgInsertStmtAfter(block, *afterStmt, *newStmt);
+            *afterStmt = *newStmt;
+            DISPSTMT(*afterStmt);
+        }
+    }
+    else if (argInfo.argIsByRefToStructLocal)
+    {
+        // Do nothing. Arg was directly substituted as we read
+        // the inlinee.
+    }
+    else
+    {
+        // The argument is either not used or a const or lcl var
+        noway_assert(!argInfo.argIsUsed || argInfo.argIsInvariant || argInfo.argIsLclVar);
+        noway_assert((argInfo.argIsLclVar == 0) ==
+                     (argNode->gtOper != GT_LCL_VAR || (argNode->gtFlags & GTF_GLOB_REF)));
+
+        // If the argument has side effects, append it
+        if (argInfo.argHasSideEff)
+        {
+            noway_assert(argInfo.argIsUsed == false);
+            *newStmt    = nullptr;
+            bool append = true;
+
+            if (argNode->gtOper == GT_BLK)
+            {
+                // Don't put GT_BLK node under a GT_COMMA.
+                // Codegen can't deal with it.
+                // Just hang the address here in case there are side-effect.
+                *newStmt = gtNewStmt(gtUnusedValNode(argNode->AsOp()->gtOp1), callDI);
+            }
+            else
+            {
+                // In some special cases, unused args with side effects can
+                // trigger further changes.
+                //
+                // (1) If the arg is a static field access and the field access
+                // was produced by a call to EqualityComparer<T>.get_Default, the
+                // helper call to ensure the field has a value can be suppressed.
+                // This helper call is marked as a "Special DCE" helper during
+                // importation, over in fgGetStaticsCCtorHelper.
+                //
+                // (2) NYI. If we find that the actual arg expression
+                // has no side effects, we can skip appending all
+                // together. This will help jit TP a bit.
+                //
+                assert(!argNode->OperIs(GT_RET_EXPR));
+
+                // For case (1)
+                //
+                // Look for the following tree shapes
+                // prejit: (IND (ADD (CONST, CALL(special dce helper...))))
+                // jit   : (COMMA (CALL(special dce helper...), (FIELD ...)))
+                if (argNode->gtOper == GT_COMMA)
+                {
+                    // Look for (COMMA (CALL(special dce helper...), (FIELD ...)))
+                    GenTree* op1 = argNode->AsOp()->gtOp1;
+                    GenTree* op2 = argNode->AsOp()->gtOp2;
+                    if (op1->IsCall() && ((op1->AsCall()->gtCallMoreFlags & GTF_CALL_M_HELPER_SPECIAL_DCE) != 0) &&
+                        op2->OperIs(GT_IND) && op2->gtGetOp1()->IsIconHandle() && ((op2->gtFlags & GTF_EXCEPT) == 0))
+                    {
+                        JITDUMP("\nPerforming special dce on unused arg [%06u]:"
+                                " actual arg [%06u] helper call [%06u]\n",
+                                argNode->gtTreeID, argNode->gtTreeID, op1->gtTreeID);
+                        // Drop the whole tree
+                        append = false;
+                    }
+                }
+                else if (argNode->gtOper == GT_IND)
+                {
+                    // Look for (IND (ADD (CONST, CALL(special dce helper...))))
+                    GenTree* addr = argNode->AsOp()->gtOp1;
+
+                    if (addr->gtOper == GT_ADD)
+                    {
+                        GenTree* op1 = addr->AsOp()->gtOp1;
+                        GenTree* op2 = addr->AsOp()->gtOp2;
+                        if (op1->IsCall() && ((op1->AsCall()->gtCallMoreFlags & GTF_CALL_M_HELPER_SPECIAL_DCE) != 0) &&
+                            op2->IsCnsIntOrI())
+                        {
+                            // Drop the whole tree
+                            JITDUMP("\nPerforming special dce on unused arg [%06u]:"
+                                    " actual arg [%06u] helper call [%06u]\n",
+                                    argNode->gtTreeID, argNode->gtTreeID, op1->gtTreeID);
+                            append = false;
+                        }
+                    }
+                }
+            }
+
+            if (!append)
+            {
+                assert(*newStmt == nullptr);
+                JITDUMP("Arg tree side effects were discardable, not appending anything for arg\n");
+            }
+            else
+            {
+                // If we don't have something custom to append,
+                // just append the arg node as an unused value.
+                if (*newStmt == nullptr)
+                {
+                    *newStmt = gtNewStmt(gtUnusedValNode(argNode), callDI);
+                }
+
+                fgInsertStmtAfter(block, *afterStmt, *newStmt);
+                *afterStmt = *newStmt;
+                DISPSTMT(*afterStmt);
+            }
+        }
+        else if (argNode->IsBoxedValue())
+        {
+            // Try to clean up any unnecessary boxing side effects
+            // since the box itself will be ignored.
+            gtTryRemoveBoxUpstreamEffects(argNode);
+        }
+    }
+}
+
 //------------------------------------------------------------------------
 // fgInlinePrependStatements: prepend statements needed to match up
 // caller and inlined callee
@@ -1687,28 +1806,18 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
 //    Newly added statements are placed just after the original call
 //    and are are given the same inline context as the call any calls
 //    added here will appear to have been part of the immediate caller.
-
+//
 Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
 {
     BasicBlock*      block     = inlineInfo->iciBlock;
     Statement*       callStmt  = inlineInfo->iciStmt;
     const DebugInfo& callDI    = callStmt->GetDebugInfo();
-    Statement*       postStmt  = callStmt->GetNextStmt();
     Statement*       afterStmt = callStmt; // afterStmt is the place where the new statements should be inserted after.
     Statement*       newStmt   = nullptr;
     GenTreeCall*     call      = inlineInfo->iciCall->AsCall();
 
     noway_assert(call->gtOper == GT_CALL);
 
-#ifdef DEBUG
-    if (0 && verbose)
-    {
-        printf("\nfgInlinePrependStatements for iciCall= ");
-        printTreeID(call);
-        printf(":\n");
-    }
-#endif
-
     // Prepend statements for any initialization / side effects
 
     InlArgInfo*    inlArgInfo = inlineInfo->inlArgInfo;
@@ -1729,7 +1838,7 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
     if (call->gtFlags & GTF_CALL_NULLCHECK && !inlineInfo->thisDereferencedFirst)
     {
         // Call impInlineFetchArg to "reserve" a temp for the "this" pointer.
-        GenTree* thisOp = impInlineFetchArg(0, inlArgInfo, lclVarInfo);
+        GenTree* thisOp = impInlineFetchArg(inlArgInfo[0], lclVarInfo[0]);
         if (fgAddrCouldBeNull(thisOp))
         {
             nullcheck = gtNewNullCheck(thisOp, block);
@@ -1738,183 +1847,19 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
         }
     }
 
-    /* Treat arguments that had to be assigned to temps */
-    if (inlineInfo->argCnt)
+    // Append the InstParam
+    if (inlineInfo->inlInstParamArgInfo != nullptr)
     {
+        fgInsertInlineeArgument(*inlineInfo->inlInstParamArgInfo, block, &afterStmt, &newStmt, callDI);
+    }
 
-#ifdef DEBUG
-        if (verbose)
-        {
-            printf("\nArguments setup:\n");
-        }
-#endif // DEBUG
-
+    // Treat arguments that had to be assigned to temps
+    if (inlineInfo->argCnt)
+    {
+        JITDUMP("\nArguments setup:\n");
         for (unsigned argNum = 0; argNum < inlineInfo->argCnt; argNum++)
         {
-            const InlArgInfo& argInfo        = inlArgInfo[argNum];
-            const bool        argIsSingleDef = !argInfo.argHasLdargaOp && !argInfo.argHasStargOp;
-            CallArg*          arg            = argInfo.arg;
-            GenTree*          argNode        = arg->GetNode();
-
-            assert(!argNode->OperIs(GT_RET_EXPR));
-
-            if (argInfo.argHasTmp)
-            {
-                noway_assert(argInfo.argIsUsed);
-
-                /* argBashTmpNode is non-NULL iff the argument's value was
-                   referenced exactly once by the original IL. This offers an
-                   opportunity to avoid an intermediate temp and just insert
-                   the original argument tree.
-
-                   However, if the temp node has been cloned somewhere while
-                   importing (e.g. when handling isinst or dup), or if the IL
-                   took the address of the argument, then argBashTmpNode will
-                   be set (because the value was only explicitly retrieved
-                   once) but the optimization cannot be applied.
-                 */
-
-                GenTree* argSingleUseNode = argInfo.argBashTmpNode;
-
-                if ((argSingleUseNode != nullptr) && !(argSingleUseNode->gtFlags & GTF_VAR_MOREUSES) && argIsSingleDef)
-                {
-                    // Change the temp in-place to the actual argument.
-                    // We currently do not support this for struct arguments, so it must not be a GT_BLK.
-                    assert(argNode->gtOper != GT_BLK);
-                    argSingleUseNode->ReplaceWith(argNode, this);
-                    continue;
-                }
-                else
-                {
-                    // We're going to assign the argument value to the temp we use for it in the inline body.
-                    GenTree* store = gtNewTempStore(argInfo.argTmpNum, argNode);
-
-                    newStmt = gtNewStmt(store, callDI);
-                    fgInsertStmtAfter(block, afterStmt, newStmt);
-                    afterStmt = newStmt;
-
-                    DISPSTMT(afterStmt);
-                }
-            }
-            else if (argInfo.argIsByRefToStructLocal)
-            {
-                // Do nothing. Arg was directly substituted as we read
-                // the inlinee.
-            }
-            else
-            {
-                // The argument is either not used or a const or lcl var
-                noway_assert(!argInfo.argIsUsed || argInfo.argIsInvariant || argInfo.argIsLclVar);
-                noway_assert((argInfo.argIsLclVar == 0) ==
-                             (argNode->gtOper != GT_LCL_VAR || (argNode->gtFlags & GTF_GLOB_REF)));
-
-                // If the argument has side effects, append it
-                if (argInfo.argHasSideEff)
-                {
-                    noway_assert(argInfo.argIsUsed == false);
-                    newStmt     = nullptr;
-                    bool append = true;
-
-                    if (argNode->gtOper == GT_BLK || argNode->gtOper == GT_MKREFANY)
-                    {
-                        // Don't put GT_BLK node under a GT_COMMA.
-                        // Codegen can't deal with it.
-                        // Just hang the address here in case there are side-effect.
-                        newStmt = gtNewStmt(gtUnusedValNode(argNode->AsOp()->gtOp1), callDI);
-                    }
-                    else
-                    {
-                        // In some special cases, unused args with side effects can
-                        // trigger further changes.
-                        //
-                        // (1) If the arg is a static field access and the field access
-                        // was produced by a call to EqualityComparer<T>.get_Default, the
-                        // helper call to ensure the field has a value can be suppressed.
-                        // This helper call is marked as a "Special DCE" helper during
-                        // importation, over in fgGetStaticsCCtorHelper.
-                        //
-                        // (2) NYI. If we find that the actual arg expression
-                        // has no side effects, we can skip appending all
-                        // together. This will help jit TP a bit.
-                        //
-                        assert(!argNode->OperIs(GT_RET_EXPR));
-
-                        // For case (1)
-                        //
-                        // Look for the following tree shapes
-                        // prejit: (IND (ADD (CONST, CALL(special dce helper...))))
-                        // jit   : (COMMA (CALL(special dce helper...), (FIELD ...)))
-                        if (argNode->gtOper == GT_COMMA)
-                        {
-                            // Look for (COMMA (CALL(special dce helper...), (FIELD ...)))
-                            GenTree* op1 = argNode->AsOp()->gtOp1;
-                            GenTree* op2 = argNode->AsOp()->gtOp2;
-                            if (op1->IsCall() &&
-                                ((op1->AsCall()->gtCallMoreFlags & GTF_CALL_M_HELPER_SPECIAL_DCE) != 0) &&
-                                op2->OperIs(GT_IND) && op2->gtGetOp1()->IsIconHandle() &&
-                                ((op2->gtFlags & GTF_EXCEPT) == 0))
-                            {
-                                JITDUMP("\nPerforming special dce on unused arg [%06u]:"
-                                        " actual arg [%06u] helper call [%06u]\n",
-                                        argNode->gtTreeID, argNode->gtTreeID, op1->gtTreeID);
-                                // Drop the whole tree
-                                append = false;
-                            }
-                        }
-                        else if (argNode->gtOper == GT_IND)
-                        {
-                            // Look for (IND (ADD (CONST, CALL(special dce helper...))))
-                            GenTree* addr = argNode->AsOp()->gtOp1;
-
-                            if (addr->gtOper == GT_ADD)
-                            {
-                                GenTree* op1 = addr->AsOp()->gtOp1;
-                                GenTree* op2 = addr->AsOp()->gtOp2;
-                                if (op1->IsCall() &&
-                                    ((op1->AsCall()->gtCallMoreFlags & GTF_CALL_M_HELPER_SPECIAL_DCE) != 0) &&
-                                    op2->IsCnsIntOrI())
-                                {
-                                    // Drop the whole tree
-                                    JITDUMP("\nPerforming special dce on unused arg [%06u]:"
-                                            " actual arg [%06u] helper call [%06u]\n",
-                                            argNode->gtTreeID, argNode->gtTreeID, op1->gtTreeID);
-                                    append = false;
-                                }
-                            }
-                        }
-                    }
-
-                    if (!append)
-                    {
-                        assert(newStmt == nullptr);
-                        JITDUMP("Arg tree side effects were discardable, not appending anything for arg\n");
-                    }
-                    else
-                    {
-                        // If we don't have something custom to append,
-                        // just append the arg node as an unused value.
-                        if (newStmt == nullptr)
-                        {
-                            newStmt = gtNewStmt(gtUnusedValNode(argNode), callDI);
-                        }
-
-                        fgInsertStmtAfter(block, afterStmt, newStmt);
-                        afterStmt = newStmt;
-#ifdef DEBUG
-                        if (verbose)
-                        {
-                            gtDispStmt(afterStmt);
-                        }
-#endif // DEBUG
-                    }
-                }
-                else if (argNode->IsBoxedValue())
-                {
-                    // Try to clean up any unnecessary boxing side effects
-                    // since the box itself will be ignored.
-                    gtTryRemoveBoxUpstreamEffects(argNode);
-                }
-            }
+            fgInsertInlineeArgument(inlArgInfo[argNum], block, &afterStmt, &newStmt, callDI);
         }
     }
 
diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp
index 37c880892a1b..adb3df9327a2 100644
--- a/src/coreclr/jit/fgopt.cpp
+++ b/src/coreclr/jit/fgopt.cpp
@@ -30,8 +30,6 @@ void Compiler::fgComputeReturnBlocks()
         }
     }
 
-    fgReturnBlocksComputed = true;
-
 #ifdef DEBUG
     if (verbose)
     {
@@ -132,7 +130,7 @@ bool Compiler::fgRemoveUnreachableBlocks(CanRemoveBlockBody canRemoveBlock)
 
             block->RemoveFlags(BBF_REMOVED | BBF_INTERNAL);
             block->SetFlags(BBF_IMPORTED);
-            block->SetKindAndTarget(BBJ_THROW);
+            block->SetKindAndTargetEdge(BBJ_THROW);
             block->bbSetRunRarely();
         }
         else
@@ -164,35 +162,6 @@ bool Compiler::fgRemoveUnreachableBlocks(CanRemoveBlockBody canRemoveBlock)
     return changed;
 }
 
-//------------------------------------------------------------------------
-// fgComputeReachability: Compute the dominator and reachable sets.
-//
-// Returns:
-//    Suitable phase status
-//
-// Notes:
-//   Also computes the list of return blocks `fgReturnBlocks`
-//   and set of enter  blocks `fgEnterBlks`.
-//
-//   Delete unreachable blocks.
-//
-//   Assumes the predecessor lists are computed and correct.
-//
-PhaseStatus Compiler::fgComputeReachability()
-{
-    assert(fgPredsComputed);
-
-    bool madeChanges = fgDfsBlocksAndRemove() != PhaseStatus::MODIFIED_NOTHING;
-
-    madeChanges |= fgRenumberBlocks();
-
-    fgComputeReturnBlocks();
-    m_reachabilitySets = BlockReachabilitySets::Build(m_dfsTree);
-    m_domTree          = FlowGraphDominatorTree::Build(m_dfsTree);
-
-    return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
-}
-
 //------------------------------------------------------------------------
 // fgRemoveDeadBlocks: Identify all the unreachable blocks and remove them.
 //
@@ -307,7 +276,10 @@ bool Compiler::fgRemoveDeadBlocks()
 PhaseStatus Compiler::fgComputeDominators()
 {
     assert(m_dfsTree != nullptr);
-    m_domTree = FlowGraphDominatorTree::Build(m_dfsTree);
+    if (m_domTree == nullptr)
+    {
+        m_domTree = FlowGraphDominatorTree::Build(m_dfsTree);
+    }
 
     bool anyHandlers = false;
     for (EHblkDsc* const HBtab : EHClauses(this))
@@ -623,8 +595,8 @@ PhaseStatus Compiler::fgPostImportationCleanup()
 
                         // What follows is similar to fgNewBBInRegion, but we can't call that
                         // here as the oldTryEntry is no longer in the main bb list.
-                        newTryEntry = BasicBlock::New(this, BBJ_ALWAYS, tryEntryPrev->Next());
-                        newTryEntry->SetFlags(BBF_IMPORTED | BBF_INTERNAL | BBF_NONE_QUIRK);
+                        newTryEntry = BasicBlock::New(this);
+                        newTryEntry->SetFlags(BBF_IMPORTED | BBF_INTERNAL);
                         newTryEntry->bbRefs = 0;
 
                         // Set the right EH region indices on this new block.
@@ -643,12 +615,12 @@ PhaseStatus Compiler::fgPostImportationCleanup()
                         // plausible flow target. Simplest is to just mark it as a throw.
                         if (bbIsHandlerBeg(newTryEntry->Next()))
                         {
-                            newTryEntry->SetKindAndTarget(BBJ_THROW);
+                            newTryEntry->SetKindAndTargetEdge(BBJ_THROW);
                         }
                         else
                         {
                             FlowEdge* const newEdge = fgAddRefPred(newTryEntry->Next(), newTryEntry);
-                            newEdge->setLikelihood(1.0);
+                            newTryEntry->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
                         }
 
                         JITDUMP("OSR: changing start of try region #%u from " FMT_BB " to new " FMT_BB "\n",
@@ -759,7 +731,6 @@ PhaseStatus Compiler::fgPostImportationCleanup()
                 //
                 auto addConditionalFlow = [this, entryStateVar, &entryJumpTarget, &addedBlocks](BasicBlock* fromBlock,
                                                                                                 BasicBlock* toBlock) {
-
                     // We may have previously though this try entry was unreachable, but now we're going to
                     // step through it on the way to the OSR entry. So ensure it has plausible profile weight.
                     //
@@ -774,7 +745,7 @@ PhaseStatus Compiler::fgPostImportationCleanup()
                     fromBlock->SetFlags(BBF_INTERNAL);
                     newBlock->RemoveFlags(BBF_DONT_REMOVE);
                     addedBlocks++;
-                    FlowEdge* const normalTryEntryEdge = fgGetPredForBlock(newBlock, fromBlock);
+                    FlowEdge* const normalTryEntryEdge = fromBlock->GetTargetEdge();
 
                     GenTree* const entryStateLcl = gtNewLclvNode(entryStateVar, TYP_INT);
                     GenTree* const compareEntryStateToZero =
@@ -782,9 +753,9 @@ PhaseStatus Compiler::fgPostImportationCleanup()
                     GenTree* const jumpIfEntryStateZero = gtNewOperNode(GT_JTRUE, TYP_VOID, compareEntryStateToZero);
                     fgNewStmtAtBeg(fromBlock, jumpIfEntryStateZero);
 
-                    fromBlock->SetCond(toBlock, newBlock);
                     FlowEdge* const osrTryEntryEdge = fgAddRefPred(toBlock, fromBlock);
                     newBlock->inheritWeight(fromBlock);
+                    fromBlock->SetCond(osrTryEntryEdge, normalTryEntryEdge);
 
                     // Not sure what the correct edge likelihoods are just yet;
                     // for now we'll say the OSR path is the likely one.
@@ -833,9 +804,7 @@ PhaseStatus Compiler::fgPostImportationCleanup()
 
                 if (entryJumpTarget != osrEntry)
                 {
-                    fgFirstBB->SetTarget(entryJumpTarget);
-                    FlowEdge* const oldEdge = fgRemoveRefPred(osrEntry, fgFirstBB);
-                    fgAddRefPred(entryJumpTarget, fgFirstBB, oldEdge);
+                    fgRedirectTargetEdge(fgFirstBB, entryJumpTarget);
 
                     JITDUMP("OSR: redirecting flow from method entry " FMT_BB " to OSR entry " FMT_BB
                             " via step blocks.\n",
@@ -1006,7 +975,7 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext)
     noway_assert(block->hasTryIndex() == bNext->hasTryIndex());
 
     JITDUMP("\nCompacting " FMT_BB " into " FMT_BB ":\n", bNext->bbNum, block->bbNum);
-    fgRemoveRefPred(bNext, block);
+    fgRemoveRefPred(block->GetTargetEdge());
 
     if (bNext->countOfInEdges() > 0)
     {
@@ -1286,24 +1255,31 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext)
         case BBJ_ALWAYS:
         case BBJ_EHCATCHRET:
         case BBJ_EHFILTERRET:
-            block->SetKindAndTarget(bNext->GetKind(), bNext->GetTarget());
+        {
+            /* Update the predecessor list for bNext's target */
+            FlowEdge* const targetEdge = bNext->GetTargetEdge();
+            fgReplacePred(targetEdge, block);
 
-            /* Update the predecessor list for 'bNext->bbTarget' */
-            fgReplacePred(bNext->GetTarget(), bNext, block);
+            block->SetKindAndTargetEdge(bNext->GetKind(), targetEdge);
             break;
+        }
 
         case BBJ_COND:
-            block->SetCond(bNext->GetTrueTarget(), bNext->GetFalseTarget());
-
-            /* Update the predecessor list for 'bNext->bbTrueTarget' */
-            fgReplacePred(bNext->GetTrueTarget(), bNext, block);
+        {
+            /* Update the predecessor list for bNext's true target */
+            FlowEdge* const trueEdge  = bNext->GetTrueEdge();
+            FlowEdge* const falseEdge = bNext->GetFalseEdge();
+            fgReplacePred(trueEdge, block);
 
-            /* Update the predecessor list for 'bNext->bbFalseTarget' if it is different than 'bNext->bbTrueTarget' */
-            if (!bNext->TrueTargetIs(bNext->GetFalseTarget()))
+            /* Update the predecessor list for bNext's false target if it is different from the true target */
+            if (trueEdge != falseEdge)
             {
-                fgReplacePred(bNext->GetFalseTarget(), bNext, block);
+                fgReplacePred(falseEdge, block);
             }
+
+            block->SetCond(trueEdge, falseEdge);
             break;
+        }
 
         case BBJ_EHFINALLYRET:
             block->SetEhf(bNext->GetEhfTargets());
@@ -1331,17 +1307,6 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext)
 
     assert(block->KindIs(bNext->GetKind()));
 
-    if (block->KindIs(BBJ_ALWAYS))
-    {
-        // Propagate BBF_NONE_QUIRK flag
-        block->CopyFlags(bNext, BBF_NONE_QUIRK);
-    }
-    else
-    {
-        // It's no longer a BBJ_ALWAYS; remove the BBF_NONE_QUIRK flag.
-        block->RemoveFlags(BBF_NONE_QUIRK);
-    }
-
 #if DEBUG
     if (verbose && 0)
     {
@@ -1525,7 +1490,7 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc
                 bDest->SetFlags(BBF_RUN_RARELY); // Set the RarelyRun flag
             }
 
-            FlowEdge* edge2 = fgGetPredForBlock(bDest->GetTarget(), bDest);
+            FlowEdge* edge2 = bDest->GetTargetEdge();
 
             if (edge2 != nullptr)
             {
@@ -1561,19 +1526,21 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc
         {
             case BBJ_ALWAYS:
             case BBJ_CALLFINALLYRET:
-                block->SetTarget(bDest->GetTarget());
+            {
+                fgRedirectTargetEdge(block, bDest->GetTarget());
                 break;
+            }
 
             case BBJ_COND:
                 if (block->TrueTargetIs(bDest))
                 {
                     assert(!block->FalseTargetIs(bDest));
-                    block->SetTrueTarget(bDest->GetTarget());
+                    fgRedirectTrueEdge(block, bDest->GetTarget());
                 }
                 else
                 {
                     assert(block->FalseTargetIs(bDest));
-                    block->SetFalseTarget(bDest->GetTarget());
+                    fgRedirectFalseEdge(block, bDest->GetTarget());
                 }
                 break;
 
@@ -1581,8 +1548,6 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc
                 unreached();
         }
 
-        fgAddRefPred(bDest->GetTarget(), block, fgRemoveRefPred(bDest, block));
-
         return true;
     }
     return false;
@@ -1639,15 +1604,6 @@ bool Compiler::fgOptimizeEmptyBlock(BasicBlock* block)
                     break;
                 }
             }
-            else
-            {
-                // TODO-NoFallThrough: Once BBJ_COND blocks have pointers to their false branches,
-                // allow removing empty BBJ_ALWAYS and pointing bPrev's false branch to block->bbTarget.
-                if (bPrev->bbFallsThrough() && !block->JumpsToNext())
-                {
-                    break;
-                }
-            }
 
             /* Do not remove a block that jumps to itself - used for while (true){} */
             if (block->TargetIs(block))
@@ -1674,7 +1630,6 @@ bool Compiler::fgOptimizeEmptyBlock(BasicBlock* block)
                 break;
             }
 
-#if defined(FEATURE_EH_FUNCLETS)
             /* Don't remove an empty block that is in a different EH region
              * from its successor block, if the block is the target of a
              * catch return. It is required that the return address of a
@@ -1682,6 +1637,7 @@ bool Compiler::fgOptimizeEmptyBlock(BasicBlock* block)
              * abort exceptions to work. Insert a NOP in the empty block
              * to ensure we generate code for the block, if we keep it.
              */
+            if (UsesFunclets())
             {
                 BasicBlock* succBlock = block->GetTarget();
 
@@ -1739,7 +1695,6 @@ bool Compiler::fgOptimizeEmptyBlock(BasicBlock* block)
                     }
                 }
             }
-#endif // FEATURE_EH_FUNCLETS
 
             if (!ehCanDeleteEmptyBlock(block))
             {
@@ -1809,16 +1764,16 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
 {
     assert(block->KindIs(BBJ_SWITCH));
 
-    unsigned     jmpCnt = block->GetSwitchTargets()->bbsCount;
-    BasicBlock** jmpTab = block->GetSwitchTargets()->bbsDstTab;
-    BasicBlock*  bNewDest; // the new jump target for the current switch case
-    BasicBlock*  bDest;
-    bool         returnvalue = false;
+    unsigned    jmpCnt = block->GetSwitchTargets()->bbsCount;
+    FlowEdge**  jmpTab = block->GetSwitchTargets()->bbsDstTab;
+    BasicBlock* bNewDest; // the new jump target for the current switch case
+    BasicBlock* bDest;
+    bool        modified = false;
 
     do
     {
     REPEAT_SWITCH:;
-        bDest    = *jmpTab;
+        bDest    = (*jmpTab)->getDestinationBlock();
         bNewDest = bDest;
 
         // Do we have a JUMP to an empty unconditional JUMP block?
@@ -1858,7 +1813,7 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
             {
                 if (fgHaveValidEdgeWeights)
                 {
-                    FlowEdge* edge                = fgGetPredForBlock(bDest, block);
+                    FlowEdge* edge                = *jmpTab;
                     weight_t  branchThroughWeight = edge->edgeWeightMin();
 
                     if (bDest->bbWeight > branchThroughWeight)
@@ -1874,20 +1829,43 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
             }
 
             // Update the switch jump table
-            *jmpTab = bNewDest;
+            FlowEdge* const oldEdge = *jmpTab;
+            fgRemoveRefPred(oldEdge);
+            FlowEdge* const newEdge = fgAddRefPred(bNewDest, block, oldEdge);
+            *jmpTab                 = newEdge;
+
+            // Update edge likelihoods
+            // Note old edge may still be "in use" so we decrease its likelihood.
+            //
+
+            // We want to move this much likelihood from old->new
+            //
+            const weight_t likelihoodFraction = oldEdge->getLikelihood() / (oldEdge->getDupCount() + 1);
 
-            // Maintain, if necessary, the set of unique targets of "block."
-            UpdateSwitchTableTarget(block, bDest, bNewDest);
+            if (newEdge->getDupCount() == 1)
+            {
+                newEdge->setLikelihood(likelihoodFraction);
+            }
+            else
+            {
+                newEdge->addLikelihood(likelihoodFraction);
+            }
 
-            fgAddRefPred(bNewDest, block, fgRemoveRefPred(bDest, block));
+            oldEdge->addLikelihood(-likelihoodFraction);
 
             // we optimized a Switch label - goto REPEAT_SWITCH to follow this new jump
-            returnvalue = true;
+            modified = true;
 
             goto REPEAT_SWITCH;
         }
     } while (++jmpTab, --jmpCnt);
 
+    if (modified)
+    {
+        // Invalidate the set of unique targets for block, since we modified the targets
+        fgInvalidateSwitchDescMapEntry(block);
+    }
+
     Statement*  switchStmt = nullptr;
     LIR::Range* blockRange = nullptr;
 
@@ -1920,13 +1898,13 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
     if (block->NumSucc(this) == 1)
     {
         // Use BBJ_ALWAYS for a switch with only a default clause, or with only one unique successor.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
         if (verbose)
         {
             printf("\nRemoving a switch jump with a single target (" FMT_BB ")\n", block->bbNum);
             printf("BEFORE:\n");
+            fgDispBasicBlocks();
         }
 #endif // DEBUG
 
@@ -1998,18 +1976,16 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
         }
 
         // Change the switch jump into a BBJ_ALWAYS
-        block->SetKindAndTarget(BBJ_ALWAYS, block->GetSwitchTargets()->bbsDstTab[0]);
-        if (jmpCnt > 1)
+        block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetSwitchTargets()->bbsDstTab[0]);
+        for (unsigned i = 1; i < jmpCnt; ++i)
         {
-            for (unsigned i = 1; i < jmpCnt; ++i)
-            {
-                (void)fgRemoveRefPred(jmpTab[i], block);
-            }
+            fgRemoveRefPred(jmpTab[i]);
         }
 
         return true;
     }
-    else if ((block->GetSwitchTargets()->bbsCount == 2) && block->NextIs(block->GetSwitchTargets()->bbsDstTab[1]))
+    else if ((block->GetSwitchTargets()->bbsCount == 2) &&
+             block->NextIs(block->GetSwitchTargets()->bbsDstTab[1]->getDestinationBlock()))
     {
         /* Use a BBJ_COND(switchVal==0) for a switch with only one
            significant clause besides the default clause, if the
@@ -2031,7 +2007,6 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
         // replace it with a COMMA node.  In such a case we will end up with GT_JTRUE node pointing to
         // a COMMA node which results in noway asserts in fgMorphSmpOp(), optAssertionGen() and rpPredictTreeRegUse().
         // For the same reason fgMorphSmpOp() marks GT_JTRUE nodes with RELOP children as GTF_DONT_CSE.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
         if (verbose)
@@ -2064,14 +2039,16 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
             fgSetStmtSeq(switchStmt);
         }
 
-        block->SetCond(block->GetSwitchTargets()->bbsDstTab[0], block->GetSwitchTargets()->bbsDstTab[1]);
+        FlowEdge* const trueEdge  = block->GetSwitchTargets()->bbsDstTab[0];
+        FlowEdge* const falseEdge = block->GetSwitchTargets()->bbsDstTab[1];
+        block->SetCond(trueEdge, falseEdge);
 
         JITDUMP("After:\n");
         DISPNODE(switchTree);
 
         return true;
     }
-    return returnvalue;
+    return modified;
 }
 
 //-------------------------------------------------------------
@@ -2119,7 +2096,7 @@ bool Compiler::fgBlockEndFavorsTailDuplication(BasicBlock* block, unsigned lclNu
     }
 
     // Tail duplication tends to pay off when the last statement
-    // is an assignment of a constant, arraylength, or a relop.
+    // is a local store of a constant, arraylength, or a relop.
     // This is because these statements produce information about values
     // that would otherwise be lost at the upcoming merge point.
     //
@@ -2135,8 +2112,8 @@ bool Compiler::fgBlockEndFavorsTailDuplication(BasicBlock* block, unsigned lclNu
         GenTree* const tree = stmt->GetRootNode();
         if (tree->OperIsLocalStore() && !tree->OperIsBlkOp() && (tree->AsLclVarCommon()->GetLclNum() == lclNum))
         {
-            GenTree* const data = tree->Data();
-            if (data->OperIsArrLength() || data->OperIsConst() || data->OperIsCompare())
+            GenTree* const value = tree->Data();
+            if (value->OperIsArrLength() || value->OperIsConst() || value->OperIsCompare())
             {
                 return true;
             }
@@ -2185,7 +2162,7 @@ bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target, unsigne
     // ultimately feeds a simple conditional branch.
     //
     // These blocks are small, and when duplicated onto the tail of blocks that end in
-    // assignments, there is a high probability of the branch completely going away.
+    // local stores, there is a high probability of the branch completely going away.
     //
     // This is by no means the only kind of tail that it is beneficial to duplicate,
     // just the only one we recognize for now.
@@ -2472,27 +2449,30 @@ bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock*
         fgInsertStmtAtEnd(block, cloneStmt);
     }
 
-    // add an unconditional block after this block to jump to the target block's fallthrough block
+    // Fix up block's flow.
+    // Assume edge likelihoods transfer over.
     //
-    assert(!target->IsLast());
-    BasicBlock* next = fgNewBBafter(BBJ_ALWAYS, block, true, target->GetFalseTarget());
+    fgRedirectTargetEdge(block, target->GetTrueTarget());
+    block->GetTargetEdge()->setLikelihood(target->GetTrueEdge()->getLikelihood());
 
-    // Fix up block's flow
-    //
-    block->SetCond(target->GetTrueTarget(), next);
-    fgAddRefPred(block->GetTrueTarget(), block);
-    fgRemoveRefPred(target, block);
-
-    // The new block 'next' will inherit its weight from 'block'
-    //
-    next->inheritWeight(block);
-    fgAddRefPred(next, block);
-    fgAddRefPred(next->GetTarget(), next);
+    FlowEdge* const falseEdge = fgAddRefPred(target->GetFalseTarget(), block, target->GetFalseEdge());
+    block->SetCond(block->GetTargetEdge(), falseEdge);
 
-    JITDUMP("fgOptimizeUncondBranchToSimpleCond(from " FMT_BB " to cond " FMT_BB "), created new uncond " FMT_BB "\n",
-            block->bbNum, target->bbNum, next->bbNum);
+    JITDUMP("fgOptimizeUncondBranchToSimpleCond(from " FMT_BB " to cond " FMT_BB "), modified " FMT_BB "\n",
+            block->bbNum, target->bbNum, block->bbNum);
     JITDUMP("   expecting opts to key off V%02u in " FMT_BB "\n", lclNum, block->bbNum);
 
+    if (target->hasProfileWeight() && block->hasProfileWeight())
+    {
+        // Remove weight from target since block now bypasses it...
+        //
+        weight_t targetWeight = target->bbWeight;
+        weight_t blockWeight  = block->bbWeight;
+        target->setBBProfileWeight(max(0.0, targetWeight - blockWeight));
+        JITDUMP("Decreased " FMT_BB " profile weight from " FMT_WT " to " FMT_WT "\n", target->bbNum, targetWeight,
+                target->bbWeight);
+    }
+
     return true;
 }
 
@@ -2506,7 +2486,7 @@ bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock*
 void Compiler::fgRemoveConditionalJump(BasicBlock* block)
 {
     assert(block->KindIs(BBJ_COND));
-    assert(block->TrueTargetIs(block->GetFalseTarget()));
+    assert(block->TrueEdgeIs(block->GetFalseEdge()));
 
     BasicBlock* target = block->GetTrueTarget();
 
@@ -2616,17 +2596,14 @@ void Compiler::fgRemoveConditionalJump(BasicBlock* block)
 
     /* Conditional is gone - always jump to target */
 
-    block->SetKind(BBJ_ALWAYS);
+    block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetTrueEdge());
     assert(block->TargetIs(target));
 
-    // TODO-NoFallThrough: Set BBF_NONE_QUIRK only when false target is the next block
-    block->SetFlags(BBF_NONE_QUIRK);
-
     /* Update bbRefs and bbNum - Conditional predecessors to the same
-        * block are counted twice so we have to remove one of them */
+     * block are counted twice so we have to remove one of them */
 
     noway_assert(target->countOfInEdges() > 1);
-    fgRemoveRefPred(target, block);
+    fgRemoveRefPred(block->GetTargetEdge());
 }
 
 //-------------------------------------------------------------
@@ -2886,21 +2863,31 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
     // We need to update the following flags of the bJump block if they were set in the bDest block
     bJump->CopyFlags(bDest, BBF_COPY_PROPAGATE);
 
-    bJump->SetCond(bDestNormalTarget, bJump->Next());
-
-    /* Update bbRefs and bbPreds */
-
-    // bJump now falls through into the next block
+    // Update bbRefs and bbPreds
+    //
+    // For now we set the likelihood of the new branch to match
+    // the likelihood of the old branch.
+    //
+    // This may or may not match the block weight adjustments we're
+    // making. All this becomes easier to reconcile once we rely on
+    // edge likelihoods more and have synthesis running.
+    //
+    // Until then we won't worry that edges and blocks are potentially
+    // out of sync.
     //
-    fgAddRefPred(bJump->GetFalseTarget(), bJump);
+    FlowEdge* const destFalseEdge = bDest->GetFalseEdge();
+    FlowEdge* const destTrueEdge  = bDest->GetTrueEdge();
 
-    // bJump no longer jumps to bDest
+    // bJump now falls through into the next block
     //
-    fgRemoveRefPred(bDest, bJump);
+    FlowEdge* const falseEdge = fgAddRefPred(bJump->Next(), bJump, destFalseEdge);
 
     // bJump now jumps to bDest's normal jump target
     //
-    fgAddRefPred(bDestNormalTarget, bJump);
+    fgRedirectTargetEdge(bJump, bDestNormalTarget);
+    bJump->GetTargetEdge()->setLikelihood(destTrueEdge->getLikelihood());
+
+    bJump->SetCond(bJump->GetTargetEdge(), falseEdge);
 
     if (weightJump > 0)
     {
@@ -3003,7 +2990,7 @@ bool Compiler::fgOptimizeSwitchJumps()
         // The dominant case should not be the default case, as we already peel that one.
         //
         assert(dominantCase < (block->GetSwitchTargets()->bbsCount - 1));
-        BasicBlock* const dominantTarget = block->GetSwitchTargets()->bbsDstTab[dominantCase];
+        BasicBlock* const dominantTarget = block->GetSwitchTargets()->bbsDstTab[dominantCase]->getDestinationBlock();
         Statement* const  switchStmt     = block->lastStmt();
         GenTree* const    switchTree     = switchStmt->GetRootNode();
         assert(switchTree->OperIs(GT_SWITCH));
@@ -3046,11 +3033,9 @@ bool Compiler::fgOptimizeSwitchJumps()
 
         // Wire up the new control flow.
         //
-        block->SetCond(dominantTarget, newBlock);
         FlowEdge* const blockToTargetEdge   = fgAddRefPred(dominantTarget, block);
         FlowEdge* const blockToNewBlockEdge = newBlock->bbPreds;
-        assert(blockToNewBlockEdge->getSourceBlock() == block);
-        assert(blockToTargetEdge->getSourceBlock() == block);
+        block->SetCond(blockToTargetEdge, blockToNewBlockEdge);
 
         // Update profile data
         //
@@ -3061,7 +3046,9 @@ bool Compiler::fgOptimizeSwitchJumps()
         newBlock->setBBProfileWeight(blockToNewBlockWeight);
 
         blockToTargetEdge->setEdgeWeights(blockToTargetWeight, blockToTargetWeight, dominantTarget);
+        blockToTargetEdge->setLikelihood(fraction);
         blockToNewBlockEdge->setEdgeWeights(blockToNewBlockWeight, blockToNewBlockWeight, block);
+        blockToNewBlockEdge->setLikelihood(max(0.0, 1.0 - fraction));
 
         // There may be other switch cases that lead to this same block, but there's just
         // one edge in the flowgraph. So we need to subtract off the profile data that now flows
@@ -3470,9 +3457,7 @@ bool Compiler::fgReorderBlocks(bool useProfile)
 {
     noway_assert(opts.compDbgCode == false);
 
-#if defined(FEATURE_EH_FUNCLETS)
-    assert(fgFuncletsCreated);
-#endif // FEATURE_EH_FUNCLETS
+    assert(UsesFunclets() == fgFuncletsCreated);
 
     // We can't relocate anything if we only have one block
     if (fgFirstBB->IsLast())
@@ -3488,9 +3473,12 @@ bool Compiler::fgReorderBlocks(bool useProfile)
     // First let us expand the set of run rarely blocks
     newRarelyRun |= fgExpandRarelyRunBlocks();
 
-#if !defined(FEATURE_EH_FUNCLETS)
-    movedBlocks |= fgRelocateEHRegions();
-#endif // !FEATURE_EH_FUNCLETS
+#if defined(FEATURE_EH_WINDOWS_X86)
+    if (!UsesFunclets())
+    {
+        movedBlocks |= fgRelocateEHRegions();
+    }
+#endif // FEATURE_EH_WINDOWS_X86
 
     //
     // If we are using profile weights we can change some
@@ -3519,11 +3507,11 @@ bool Compiler::fgReorderBlocks(bool useProfile)
                     assert(test->OperIsConditionalJump());
                     test->AsOp()->gtOp1 = gtReverseCond(test->AsOp()->gtOp1);
 
-                    BasicBlock* newFalseTarget = block->GetTrueTarget();
-                    BasicBlock* newTrueTarget  = block->GetFalseTarget();
-                    block->SetTrueTarget(newTrueTarget);
-                    block->SetFalseTarget(newFalseTarget);
-                    assert(block->CanRemoveJumpToTarget(newFalseTarget, this));
+                    FlowEdge* const newFalseEdge = block->GetTrueEdge();
+                    FlowEdge* const newTrueEdge  = block->GetFalseEdge();
+                    block->SetTrueEdge(newTrueEdge);
+                    block->SetFalseEdge(newFalseEdge);
+                    assert(block->CanRemoveJumpToTarget(block->GetFalseTarget(), this));
                 }
                 else
                 {
@@ -3655,7 +3643,7 @@ bool Compiler::fgReorderBlocks(bool useProfile)
                             // The edge bPrev -> bDest must have a higher minimum weight
                             // than every other edge into bDest
                             //
-                            FlowEdge* edgeFromPrev = fgGetPredForBlock(bDest, bPrev);
+                            FlowEdge* edgeFromPrev = bPrev->GetTargetEdge();
                             noway_assert(edgeFromPrev != nullptr);
 
                             // Examine all of the other edges into bDest
@@ -3747,8 +3735,9 @@ bool Compiler::fgReorderBlocks(bool useProfile)
                         //                                                   V
                         //                  bDest --------------->   [BB08, weight 21]
                         //
-                        FlowEdge* edgeToDest  = fgGetPredForBlock(bDest, bPrev);
-                        FlowEdge* edgeToBlock = fgGetPredForBlock(block, bPrev);
+                        assert(bPrev->FalseTargetIs(block));
+                        FlowEdge* edgeToDest  = bPrev->GetTrueEdge();
+                        FlowEdge* edgeToBlock = bPrev->GetFalseEdge();
                         noway_assert(edgeToDest != nullptr);
                         noway_assert(edgeToBlock != nullptr);
                         //
@@ -3981,8 +3970,8 @@ bool Compiler::fgReorderBlocks(bool useProfile)
         bNext                = bEnd->Next();
         bool connected_bDest = false;
 
-        if ((backwardBranch && !isRare) ||
-            block->HasFlag(BBF_DONT_REMOVE)) // Don't choose option #1 when block is the start of a try region
+        if ((backwardBranch && !isRare) || block->HasFlag(BBF_DONT_REMOVE)) // Don't choose option #1 when block is the
+                                                                            // start of a try region
         {
             bStart = nullptr;
             bEnd   = nullptr;
@@ -4008,13 +3997,11 @@ bool Compiler::fgReorderBlocks(bool useProfile)
                     break;
                 }
 
-#if defined(FEATURE_EH_FUNCLETS)
                 // Check if we've reached the funclets region, at the end of the function
                 if (bEnd->NextIs(fgFirstFuncletBB))
                 {
                     break;
                 }
-#endif // FEATURE_EH_FUNCLETS
 
                 if (bNext == bDest)
                 {
@@ -4309,16 +4296,6 @@ bool Compiler::fgReorderBlocks(bool useProfile)
         const bool bStartPrevJumpsToNext = bStartPrev->KindIs(BBJ_ALWAYS) && bStartPrev->JumpsToNext();
         fgUnlinkRange(bStart, bEnd);
 
-        // If bStartPrev is a BBJ_ALWAYS to some block after bStart, unlinking bStart can move
-        // bStartPrev's jump destination up, making bStartPrev jump to the next block for now.
-        // This can lead us to make suboptimal decisions in Compiler::fgFindInsertPoint,
-        // so make sure the BBF_NONE_QUIRK flag is unset for bStartPrev beforehand.
-        // TODO: Remove quirk.
-        if (bStartPrev->KindIs(BBJ_ALWAYS) && (bStartPrevJumpsToNext != bStartPrev->JumpsToNext()))
-        {
-            bStartPrev->RemoveFlags(BBF_NONE_QUIRK);
-        }
-
         if (insertAfterBlk == nullptr)
         {
             // Find new location for the unlinked block(s)
@@ -4580,10 +4557,10 @@ bool Compiler::fgReorderBlocks(bool useProfile)
             noway_assert(condTest->gtOper == GT_JTRUE);
             condTest->AsOp()->gtOp1 = gtReverseCond(condTest->AsOp()->gtOp1);
 
-            BasicBlock* trueTarget  = bPrev->GetTrueTarget();
-            BasicBlock* falseTarget = bPrev->GetFalseTarget();
-            bPrev->SetTrueTarget(falseTarget);
-            bPrev->SetFalseTarget(trueTarget);
+            FlowEdge* const trueEdge  = bPrev->GetTrueEdge();
+            FlowEdge* const falseEdge = bPrev->GetFalseEdge();
+            bPrev->SetTrueEdge(falseEdge);
+            bPrev->SetFalseEdge(trueEdge);
 
             // may need to rethread
             //
@@ -4801,11 +4778,11 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication /* = false */, bool isPh
                 continue;
             }
 
-        /*  We jump to the REPEAT label if we performed a change involving the current block
-         *  This is in case there are other optimizations that can show up
-         *  (e.g. - compact 3 blocks in a row)
-         *  If nothing happens, we then finish the iteration and move to the next block
-         */
+            /*  We jump to the REPEAT label if we performed a change involving the current block
+             *  This is in case there are other optimizations that can show up
+             *  (e.g. - compact 3 blocks in a row)
+             *  If nothing happens, we then finish the iteration and move to the next block
+             */
 
         REPEAT:;
 
@@ -4819,13 +4796,11 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication /* = false */, bool isPh
                 if (doTailDuplication && fgOptimizeUncondBranchToSimpleCond(block, bDest))
                 {
                     assert(block->KindIs(BBJ_COND));
-                    change   = true;
-                    modified = true;
-                    bDest    = block->GetTrueTarget();
-                    bNext    = block->GetFalseTarget();
-
-                    // TODO-NoFallThrough: Adjust the above logic once bbFalseTarget can diverge from bbNext
-                    assert(block->NextIs(bNext));
+                    assert(bNext == block->Next());
+                    change     = true;
+                    modified   = true;
+                    bDest      = block->GetTrueTarget();
+                    bFalseDest = block->GetFalseTarget();
                 }
             }
 
@@ -4837,10 +4812,6 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication /* = false */, bool isPh
                 if (bDest == bNext)
                 {
                     // Skip jump optimizations, and try to compact block and bNext later
-                    if (!block->isBBCallFinallyPairTail())
-                    {
-                        block->SetFlags(BBF_NONE_QUIRK);
-                    }
                     bDest = nullptr;
                 }
             }
@@ -4864,9 +4835,8 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication /* = false */, bool isPh
                 if (bDest->KindIs(BBJ_ALWAYS) && !bDest->TargetIs(bDest) && // special case for self jumps
                     bDest->isEmpty())
                 {
-                    // TODO: Allow optimizing branches to blocks that jump to the next block
-                    const bool optimizeBranch = !bDest->JumpsToNext() || !bDest->HasFlag(BBF_NONE_QUIRK);
-                    if (optimizeBranch && fgOptimizeBranchToEmptyUnconditional(block, bDest))
+                    // Empty blocks that jump to the next block can probably be compacted instead
+                    if (!bDest->JumpsToNext() && fgOptimizeBranchToEmptyUnconditional(block, bDest))
                     {
                         change   = true;
                         modified = true;
@@ -4980,20 +4950,6 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication /* = false */, bool isPh
                             {
                                 ehUpdateLastBlocks(bNext, bDest);
                             }
-
-                            // Add fall through fixup block, if needed.
-                            //
-                            if (bDest->KindIs(BBJ_COND) && !bDest->NextIs(bDest->GetFalseTarget()))
-                            {
-                                BasicBlock* const bDestFalseTarget = bDest->GetFalseTarget();
-                                BasicBlock* const bFixup = fgNewBBafter(BBJ_ALWAYS, bDest, true, bDestFalseTarget);
-                                bDest->SetFalseTarget(bFixup);
-                                bFixup->inheritWeight(bDestFalseTarget);
-
-                                fgRemoveRefPred(bDestFalseTarget, bDest);
-                                fgAddRefPred(bFixup, bDest);
-                                fgAddRefPred(bDestFalseTarget, bFixup);
-                            }
                         }
                     }
 
@@ -5020,10 +4976,20 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication /* = false */, bool isPh
                         }
 
                         // Optimize the Conditional JUMP to go to the new target
-                        block->SetTrueTarget(bNext->GetTarget());
-                        block->SetFalseTarget(bNext->Next());
+                        //
+                        FlowEdge* const oldFalseEdge = block->GetFalseEdge();
+                        FlowEdge* const oldTrueEdge  = block->GetTrueEdge();
+                        FlowEdge* const oldNextEdge  = bNext->GetTargetEdge();
+
+                        // bNext no longer flows to target
+                        //
+                        fgRemoveRefPred(oldNextEdge);
 
-                        fgAddRefPred(bNext->GetTarget(), block, fgRemoveRefPred(bNext->GetTarget(), bNext));
+                        // Rewire flow from block
+                        //
+                        block->SetFalseEdge(oldTrueEdge);
+                        block->SetTrueEdge(oldFalseEdge);
+                        fgRedirectTrueEdge(block, bNext->GetTarget());
 
                         /*
                           Unlink bNext from the BasicBlock list; note that we can
@@ -5035,7 +5001,6 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication /* = false */, bool isPh
                           to the final target by the time we're done here.
                         */
 
-                        fgRemoveRefPred(bNext, block);
                         fgUnlinkBlockForRemoval(bNext);
 
                         /* Mark the block as removed */
@@ -5265,7 +5230,7 @@ PhaseStatus Compiler::fgDfsBlocksAndRemove()
 #ifdef DEBUG
         if (verbose)
         {
-            printf("%u/%u blocks are unreachable and will be removed\n", fgBBcount - m_dfsTree->GetPostOrderCount(),
+            printf("%u/%u blocks are unreachable and will be removed:\n", fgBBcount - m_dfsTree->GetPostOrderCount(),
                    fgBBcount);
             for (BasicBlock* block : Blocks())
             {
@@ -5275,7 +5240,7 @@ PhaseStatus Compiler::fgDfsBlocksAndRemove()
                 }
             }
         }
-#endif
+#endif // DEBUG
 
         // The DFS we run is not precise around call-finally, so
         // `fgRemoveUnreachableBlocks` can expose newly unreachable blocks
@@ -5303,6 +5268,24 @@ PhaseStatus Compiler::fgDfsBlocksAndRemove()
             m_dfsTree = fgComputeDfs();
         }
 
+#ifdef DEBUG
+        // Did we actually remove all the blocks we said we were going to?
+        if (verbose)
+        {
+            if (m_dfsTree->GetPostOrderCount() != fgBBcount)
+            {
+                printf("%u unreachable blocks were not removed:\n", fgBBcount - m_dfsTree->GetPostOrderCount());
+                for (BasicBlock* block : Blocks())
+                {
+                    if (!m_dfsTree->Contains(block))
+                    {
+                        printf("  " FMT_BB "\n", block->bbNum);
+                    }
+                }
+            }
+        }
+#endif // DEBUG
+
         status = PhaseStatus::MODIFIED_EVERYTHING;
     }
 
@@ -5380,12 +5363,13 @@ unsigned Compiler::fgMeasureIR()
         {
             for (Statement* const stmt : block->Statements())
             {
-                fgWalkTreePre(stmt->GetRootNodePointer(),
-                              [](GenTree** slot, fgWalkData* data) -> Compiler::fgWalkResult {
-                                  (*reinterpret_cast<unsigned*>(data->pCallbackData))++;
-                                  return Compiler::WALK_CONTINUE;
-                              },
-                              &nodeCount);
+                fgWalkTreePre(
+                    stmt->GetRootNodePointer(),
+                    [](GenTree** slot, fgWalkData* data) -> Compiler::fgWalkResult {
+                    (*reinterpret_cast<unsigned*>(data->pCallbackData))++;
+                    return Compiler::WALK_CONTINUE;
+                },
+                    &nodeCount);
             }
         }
         else
@@ -5460,7 +5444,9 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early)
 
     struct PredInfo
     {
-        PredInfo(BasicBlock* block, Statement* stmt) : m_block(block), m_stmt(stmt)
+        PredInfo(BasicBlock* block, Statement* stmt)
+            : m_block(block)
+            , m_stmt(stmt)
         {
         }
         BasicBlock* m_block;
@@ -5668,13 +5654,16 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early)
 
                 // Fix up the flow.
                 //
-                predBlock->SetKindAndTarget(BBJ_ALWAYS, crossJumpTarget);
-
                 if (commSucc != nullptr)
                 {
-                    fgRemoveRefPred(commSucc, predBlock);
+                    assert(predBlock->KindIs(BBJ_ALWAYS));
+                    fgRedirectTargetEdge(predBlock, crossJumpTarget);
+                }
+                else
+                {
+                    FlowEdge* const newEdge = fgAddRefPred(crossJumpTarget, predBlock);
+                    predBlock->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
                 }
-                fgAddRefPred(crossJumpTarget, predBlock);
             }
 
             // We changed things
@@ -5763,7 +5752,6 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early)
     };
 
     auto iterateTailMerge = [&](BasicBlock* block) -> void {
-
         int numOpts = 0;
 
         while (tailMerge(block))
@@ -5843,7 +5831,7 @@ bool Compiler::fgTryOneHeadMerge(BasicBlock* block, bool early)
     // ternaries in C#).
     // The logic below could be generalized to BBJ_SWITCH, but this currently
     // has almost no CQ benefit but does have a TP impact.
-    if (!block->KindIs(BBJ_COND) || block->TrueTargetIs(block->GetFalseTarget()))
+    if (!block->KindIs(BBJ_COND) || block->TrueEdgeIs(block->GetFalseEdge()))
     {
         return false;
     }
@@ -5981,7 +5969,8 @@ bool Compiler::gtTreeContainsTailCall(GenTree* tree)
             DoPreOrder = true
         };
 
-        HasTailCallCandidateVisitor(Compiler* comp) : GenTreeVisitor(comp)
+        HasTailCallCandidateVisitor(Compiler* comp)
+            : GenTreeVisitor(comp)
         {
         }
 
diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp
index 9e29cc257927..9fa4e7273862 100644
--- a/src/coreclr/jit/fgprofile.cpp
+++ b/src/coreclr/jit/fgprofile.cpp
@@ -309,7 +309,11 @@ class Instrumentor
     bool      m_modifiedFlow;
 
 protected:
-    Instrumentor(Compiler* comp) : m_comp(comp), m_schemaCount(0), m_instrCount(0), m_modifiedFlow(false)
+    Instrumentor(Compiler* comp)
+        : m_comp(comp)
+        , m_schemaCount(0)
+        , m_instrCount(0)
+        , m_modifiedFlow(false)
     {
     }
 
@@ -360,7 +364,8 @@ class Instrumentor
 class NonInstrumentor : public Instrumentor
 {
 public:
-    NonInstrumentor(Compiler* comp) : Instrumentor(comp)
+    NonInstrumentor(Compiler* comp)
+        : Instrumentor(comp)
     {
     }
 };
@@ -376,7 +381,9 @@ class BlockCountInstrumentor : public Instrumentor
     BasicBlock* m_entryBlock;
 
 public:
-    BlockCountInstrumentor(Compiler* comp) : Instrumentor(comp), m_entryBlock(nullptr)
+    BlockCountInstrumentor(Compiler* comp)
+        : Instrumentor(comp)
+        , m_entryBlock(nullptr)
     {
     }
     bool ShouldProcess(BasicBlock* block) override
@@ -507,12 +514,11 @@ void BlockCountInstrumentor::RelocateProbes()
         //
         if (criticalPreds.Height() > 0)
         {
-            BasicBlock* const intermediary =
-                m_comp->fgNewBBbefore(BBJ_ALWAYS, block, /* extendRegion */ true, /* jumpDest */ block);
-            intermediary->SetFlags(BBF_IMPORTED | BBF_MARKED | BBF_NONE_QUIRK);
+            BasicBlock* const intermediary = m_comp->fgNewBBbefore(BBJ_ALWAYS, block, /* extendRegion */ true);
+            intermediary->SetFlags(BBF_IMPORTED | BBF_MARKED);
             intermediary->inheritWeight(block);
             FlowEdge* const newEdge = m_comp->fgAddRefPred(block, intermediary);
-            newEdge->setLikelihood(1.0);
+            intermediary->SetTargetEdge(newEdge);
             SetModifiedFlow();
 
             while (criticalPreds.Height() > 0)
@@ -567,8 +573,8 @@ void BlockCountInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche
     schemaElem.InstrumentationKind = m_comp->opts.compCollect64BitCounts
                                          ? ICorJitInfo::PgoInstrumentationKind::BasicBlockLongCount
                                          : ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount;
-    schemaElem.ILOffset = offset;
-    schemaElem.Offset   = 0;
+    schemaElem.ILOffset            = offset;
+    schemaElem.Offset              = 0;
 
     schema.push_back(schemaElem);
 
@@ -842,9 +848,9 @@ class SpanningTreeVisitor
         Duplicate
     };
 
-    virtual void Badcode()                     = 0;
-    virtual void VisitBlock(BasicBlock* block) = 0;
-    virtual void VisitTreeEdge(BasicBlock* source, BasicBlock* target) = 0;
+    virtual void Badcode()                                                               = 0;
+    virtual void VisitBlock(BasicBlock* block)                                           = 0;
+    virtual void VisitTreeEdge(BasicBlock* source, BasicBlock* target)                   = 0;
     virtual void VisitNonTreeEdge(BasicBlock* source, BasicBlock* target, EdgeKind kind) = 0;
 };
 
@@ -1240,7 +1246,9 @@ static int32_t EfficientEdgeCountBlockToKey(BasicBlock* block)
 // Based on "Optimally Profiling and Tracing Programs,"
 // Ball and Larus PLDI '92.
 //
-class EfficientEdgeCountInstrumentor : public Instrumentor, public SpanningTreeVisitor
+class EfficientEdgeCountInstrumentor
+    : public Instrumentor
+    , public SpanningTreeVisitor
 {
 private:
     // A particular edge probe. These are linked
@@ -1679,12 +1687,11 @@ void EfficientEdgeCountInstrumentor::RelocateProbes()
         //
         if (criticalPreds.Height() > 0)
         {
-            BasicBlock* intermediary =
-                m_comp->fgNewBBbefore(BBJ_ALWAYS, block, /* extendRegion */ true, /* jumpDest */ block);
-            intermediary->SetFlags(BBF_IMPORTED | BBF_NONE_QUIRK);
+            BasicBlock* intermediary = m_comp->fgNewBBbefore(BBJ_ALWAYS, block, /* extendRegion */ true);
+            intermediary->SetFlags(BBF_IMPORTED);
             intermediary->inheritWeight(block);
             FlowEdge* const newEdge = m_comp->fgAddRefPred(block, intermediary);
-            newEdge->setLikelihood(1.0);
+            intermediary->SetTargetEdge(newEdge);
             NewRelocatedProbe(intermediary, probe->source, probe->target, &leader);
             SetModifiedFlow();
 
@@ -1755,8 +1762,8 @@ void EfficientEdgeCountInstrumentor::BuildSchemaElements(BasicBlock* block, Sche
         schemaElem.InstrumentationKind = m_comp->opts.compCollect64BitCounts
                                              ? ICorJitInfo::PgoInstrumentationKind::EdgeLongCount
                                              : ICorJitInfo::PgoInstrumentationKind::EdgeIntCount;
-        schemaElem.ILOffset = sourceKey;
-        schemaElem.Offset   = 0;
+        schemaElem.ILOffset            = sourceKey;
+        schemaElem.Offset              = 0;
 
         schema.push_back(schemaElem);
 
@@ -1905,7 +1912,9 @@ class HandleHistogramProbeVisitor final : public GenTreeVisitor<HandleHistogramP
     Compiler* m_compiler;
 
     HandleHistogramProbeVisitor(Compiler* compiler, TFunctor& functor)
-        : GenTreeVisitor<HandleHistogramProbeVisitor>(compiler), m_functor(functor), m_compiler(compiler)
+        : GenTreeVisitor<HandleHistogramProbeVisitor>(compiler)
+        , m_functor(functor)
+        , m_compiler(compiler)
     {
     }
     Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
@@ -1937,7 +1946,9 @@ class ValueHistogramProbeVisitor final : public GenTreeVisitor<ValueHistogramPro
     Compiler* m_compiler;
 
     ValueHistogramProbeVisitor(Compiler* compiler, TFunctor& functor)
-        : GenTreeVisitor<ValueHistogramProbeVisitor>(compiler), m_functor(functor), m_compiler(compiler)
+        : GenTreeVisitor<ValueHistogramProbeVisitor>(compiler)
+        , m_functor(functor)
+        , m_compiler(compiler)
     {
     }
 
@@ -1947,7 +1958,7 @@ class ValueHistogramProbeVisitor final : public GenTreeVisitor<ValueHistogramPro
         if (node->IsCall() && node->AsCall()->IsSpecialIntrinsic())
         {
             const NamedIntrinsic ni = m_compiler->lookupNamedIntrinsic(node->AsCall()->gtCallMethHnd);
-            if ((ni == NI_System_Buffer_Memmove) || (ni == NI_System_SpanHelpers_SequenceEqual))
+            if ((ni == NI_System_SpanHelpers_Memmove) || (ni == NI_System_SpanHelpers_SequenceEqual))
             {
                 m_functor(m_compiler, node);
             }
@@ -1967,7 +1978,8 @@ class BuildHandleHistogramProbeSchemaGen
 
 public:
     BuildHandleHistogramProbeSchemaGen(Schema& schema, unsigned& schemaCount)
-        : m_schema(schema), m_schemaCount(schemaCount)
+        : m_schema(schema)
+        , m_schemaCount(schemaCount)
     {
     }
 
@@ -2005,8 +2017,8 @@ class BuildHandleHistogramProbeSchemaGen
         schemaElem.InstrumentationKind = compiler->opts.compCollect64BitCounts
                                              ? ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount
                                              : ICorJitInfo::PgoInstrumentationKind::HandleHistogramIntCount;
-        schemaElem.ILOffset = (int32_t)call->gtHandleHistogramProfileCandidateInfo->ilOffset;
-        schemaElem.Offset   = 0;
+        schemaElem.ILOffset            = (int32_t)call->gtHandleHistogramProfileCandidateInfo->ilOffset;
+        schemaElem.Offset              = 0;
 
         m_schema.push_back(schemaElem);
 
@@ -2015,7 +2027,7 @@ class BuildHandleHistogramProbeSchemaGen
         // Re-using ILOffset and Other fields from schema item for TypeHandleHistogramCount
         schemaElem.InstrumentationKind = isTypeHistogram ? ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes
                                                          : ICorJitInfo::PgoInstrumentationKind::HandleHistogramMethods;
-        schemaElem.Count = ICorJitInfo::HandleHistogram32::SIZE;
+        schemaElem.Count               = ICorJitInfo::HandleHistogram32::SIZE;
         m_schema.push_back(schemaElem);
 
         m_schemaCount++;
@@ -2029,7 +2041,8 @@ class BuildValueHistogramProbeSchemaGen
 
 public:
     BuildValueHistogramProbeSchemaGen(Schema& schema, unsigned& schemaCount)
-        : m_schema(schema), m_schemaCount(schemaCount)
+        : m_schema(schema)
+        , m_schemaCount(schemaCount)
     {
     }
 
@@ -2038,8 +2051,8 @@ class BuildValueHistogramProbeSchemaGen
         ICorJitInfo::PgoInstrumentationSchema schemaElem = {};
         schemaElem.Count                                 = 1;
         schemaElem.InstrumentationKind                   = compiler->opts.compCollect64BitCounts
-                                             ? ICorJitInfo::PgoInstrumentationKind::ValueHistogramLongCount
-                                             : ICorJitInfo::PgoInstrumentationKind::ValueHistogramIntCount;
+                                                               ? ICorJitInfo::PgoInstrumentationKind::ValueHistogramLongCount
+                                                               : ICorJitInfo::PgoInstrumentationKind::ValueHistogramIntCount;
         schemaElem.ILOffset = (int32_t)call->AsCall()->gtHandleHistogramProfileCandidateInfo->ilOffset;
         m_schema.push_back(schemaElem);
         m_schemaCount++;
@@ -2274,7 +2287,7 @@ class ValueHistogramProbeInserter
             return;
         }
 
-        assert(node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_Buffer_Memmove) ||
+        assert(node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_SpanHelpers_Memmove) ||
                node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_SpanHelpers_SequenceEqual));
 
         const ICorJitInfo::PgoInstrumentationSchema& countEntry = m_schema[*m_currentSchemaIndex];
@@ -2334,7 +2347,8 @@ class ValueHistogramProbeInserter
 class HandleHistogramProbeInstrumentor : public Instrumentor
 {
 public:
-    HandleHistogramProbeInstrumentor(Compiler* comp) : Instrumentor(comp)
+    HandleHistogramProbeInstrumentor(Compiler* comp)
+        : Instrumentor(comp)
     {
     }
     bool ShouldProcess(BasicBlock* block) override
@@ -2352,7 +2366,8 @@ class HandleHistogramProbeInstrumentor : public Instrumentor
 class ValueInstrumentor : public Instrumentor
 {
 public:
-    ValueInstrumentor(Compiler* comp) : Instrumentor(comp)
+    ValueInstrumentor(Compiler* comp)
+        : Instrumentor(comp)
     {
     }
     bool ShouldProcess(BasicBlock* block) override
@@ -2540,10 +2555,13 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod()
             // These are marked as [Intrinsic] only to be handled (unrolled) for constant inputs.
             // In other cases they have large managed implementations we want to profile.
             case NI_System_String_Equals:
-            case NI_System_Buffer_Memmove:
+            case NI_System_SpanHelpers_Memmove:
             case NI_System_MemoryExtensions_Equals:
             case NI_System_MemoryExtensions_SequenceEqual:
             case NI_System_MemoryExtensions_StartsWith:
+            case NI_System_SpanHelpers_Fill:
+            case NI_System_SpanHelpers_SequenceEqual:
+            case NI_System_SpanHelpers_ClearWithoutReferences:
 
             // Same here, these are only folded when JIT knows the exact types
             case NI_System_Type_IsAssignableFrom:
@@ -2726,7 +2744,7 @@ PhaseStatus Compiler::fgInstrumentMethod()
     //
     uint8_t* profileMemory;
     HRESULT  res = info.compCompHnd->allocPgoInstrumentationBySchema(info.compMethodHnd, schema.data(),
-                                                                    (UINT32)schema.size(), &profileMemory);
+                                                                     (UINT32)schema.size(), &profileMemory);
 
     // Deal with allocation failures.
     //
@@ -2948,8 +2966,8 @@ PhaseStatus Compiler::fgIncorporateProfileData()
         //
         if (fgPgoHaveWeights && !dataIsGood)
         {
-            JITDUMP("\nIncorporated count data had inconsistencies; blending profile...\n");
-            ProfileSynthesis::Run(this, ProfileSynthesisOption::BlendLikelihoods);
+            JITDUMP("\nIncorporated count data had inconsistencies; repairing profile...\n");
+            ProfileSynthesis::Run(this, ProfileSynthesisOption::RepairLikelihoods);
         }
     }
 
@@ -3101,7 +3119,7 @@ class EfficientEdgeCountReconstructor : public SpanningTreeVisitor
     // Map correlating block keys to blocks.
     //
     typedef JitHashTable<int32_t, JitSmallPrimitiveKeyFuncs<int32_t>, BasicBlock*> KeyToBlockMap;
-    KeyToBlockMap m_keyToBlockMap;
+    KeyToBlockMap                                                                  m_keyToBlockMap;
 
     // Key for finding an edge based on schema info.
     //
@@ -3110,7 +3128,9 @@ class EfficientEdgeCountReconstructor : public SpanningTreeVisitor
         int32_t const m_sourceKey;
         int32_t const m_targetKey;
 
-        EdgeKey(int32_t sourceKey, int32_t targetKey) : m_sourceKey(sourceKey), m_targetKey(targetKey)
+        EdgeKey(int32_t sourceKey, int32_t targetKey)
+            : m_sourceKey(sourceKey)
+            , m_targetKey(targetKey)
         {
         }
 
@@ -3158,7 +3178,7 @@ class EfficientEdgeCountReconstructor : public SpanningTreeVisitor
     // Map for correlating EdgeIntCount schema entries with edges
     //
     typedef JitHashTable<EdgeKey, EdgeKey, Edge*> EdgeKeyToEdgeMap;
-    EdgeKeyToEdgeMap m_edgeKeyToEdgeMap;
+    EdgeKeyToEdgeMap                              m_edgeKeyToEdgeMap;
 
     // Per block data
     //
@@ -3265,15 +3285,9 @@ class EfficientEdgeCountReconstructor : public SpanningTreeVisitor
 
     // Are there are reparable issues with the reconstruction?
     //
-    // Ideally we'd also have || !m_negativeCount here, but this
-    // leads to lots of diffs in async methods.
-    //
-    // Looks like we might first need to resolve reconstruction
-    // shortcomings with irreducible loops.
-    //
     bool IsGood() const
     {
-        return !m_entryWeightZero;
+        return !(m_entryWeightZero || m_negativeCount);
     }
 
     void VisitBlock(BasicBlock*) override
@@ -3524,8 +3538,9 @@ void EfficientEdgeCountReconstructor::Solve()
     //
     if (m_badcode || m_mismatch || m_allWeightsZero)
     {
-        JITDUMP("... not solving because of the %s\n",
-                m_badcode ? "badcode" : m_allWeightsZero ? "zero counts" : "mismatch");
+        JITDUMP("... not solving because of the %s\n", m_badcode          ? "badcode"
+                                                       : m_allWeightsZero ? "zero counts"
+                                                                          : "mismatch");
         return;
     }
 
@@ -3859,18 +3874,22 @@ void EfficientEdgeCountReconstructor::PropagateOSREntryEdges(BasicBlock* block,
 {
     // We expect one pseudo-edge and at least one normal edge.
     //
-    Edge*    pseudoEdge = nullptr;
-    unsigned nEdges     = 0;
+    Edge*    pseudoEdge       = nullptr;
+    weight_t pseudoEdgeWeight = 0;
+    unsigned nEdges           = 0;
+    weight_t successorWeight  = BB_ZERO_WEIGHT;
 
     for (Edge* edge = info->m_outgoingEdges; edge != nullptr; edge = edge->m_nextOutgoingEdge)
     {
         if (edge->m_isPseudoEdge)
         {
             assert(pseudoEdge == nullptr);
-            pseudoEdge = edge;
+            pseudoEdge       = edge;
+            pseudoEdgeWeight = edge->m_weight;
             continue;
         }
 
+        successorWeight += edge->m_weight;
         nEdges++;
     }
 
@@ -3887,28 +3906,25 @@ void EfficientEdgeCountReconstructor::PropagateOSREntryEdges(BasicBlock* block,
 
     assert(nEdges == nSucc);
 
-    if (info->m_weight == BB_ZERO_WEIGHT)
+    if ((info->m_weight == BB_ZERO_WEIGHT) || (successorWeight == BB_ZERO_WEIGHT))
     {
-        JITDUMP("\nPropagate: OSR entry block weight is zero\n");
+        JITDUMP("\nPropagate: OSR entry block or successor weight is zero\n");
         EntryWeightZero();
         return;
     }
 
     // Transfer model edge weight onto the FlowEdges as likelihoods.
     //
-    assert(nEdges == nSucc);
-    weight_t totalLikelihood = 0;
+    JITDUMP("Normalizing OSR successor likelihoods with factor 1/" FMT_WT "\n", successorWeight);
 
     for (Edge* edge = info->m_outgoingEdges; edge != nullptr; edge = edge->m_nextOutgoingEdge)
     {
         assert(block == edge->m_sourceBlock);
 
-        // The pseudo edge doesn't correspond to a flow edge,
-        // but it carries away some flow.
+        // The pseudo edge doesn't correspond to a flow edge.
         //
         if (edge == pseudoEdge)
         {
-            totalLikelihood += edge->m_weight / info->m_weight;
             continue;
         }
 
@@ -3917,58 +3933,26 @@ void EfficientEdgeCountReconstructor::PropagateOSREntryEdges(BasicBlock* block,
 
         assert(flowEdge != nullptr);
 
-        // Naive likelihood should have been set during pred initialization in fgAddRefPred
+        // Naive likelihood should have been set during pred initialization in fgLinkBasicBlocks
         //
         assert(flowEdge->hasLikelihood());
         weight_t likelihood = 0;
 
         if (nEdges == 1)
         {
-            // Conceptually we could assert(edge->m_weight == info->m_weight);
-            // but we can have inconsistencies.
-            //
             // Go with what we know for sure, edge should be 100% likely.
             //
             likelihood = 1.0;
             JITDUMP("Setting likelihood of " FMT_BB " -> " FMT_BB " to " FMT_WT " (uniq)\n", block->bbNum,
                     edge->m_targetBlock->bbNum, likelihood);
             flowEdge->setLikelihood(likelihood);
-            totalLikelihood += likelihood;
             break;
         }
 
-        assert(info->m_weight != BB_ZERO_WEIGHT);
-
-        // We may see nonsensical weights here, cap likelihood.
-        //
-        bool capped = false;
-        if (edge->m_weight > info->m_weight)
-        {
-            capped     = true;
-            likelihood = 1.0;
-        }
-        else
-        {
-            likelihood = edge->m_weight / info->m_weight;
-        }
-        JITDUMP("Setting likelihood of " FMT_BB " -> " FMT_BB " to " FMT_WT " (%s)\n", block->bbNum,
-                edge->m_targetBlock->bbNum, likelihood, capped ? "pgo -- capped" : "pgo");
+        likelihood = edge->m_weight / successorWeight;
+        JITDUMP("Setting likelihood of " FMT_BB " -> " FMT_BB " to " FMT_WT " (pgo)\n", block->bbNum,
+                edge->m_targetBlock->bbNum, likelihood);
         flowEdge->setLikelihood(likelihood);
-        totalLikelihood += likelihood;
-    }
-
-    // Note we expect real flow imbalances here as it's likely there
-    // was no observed flow from the OSR entry to some of its successors.
-    // Since we added in the pseudo edge likelihood above, the check below
-    // probably won't flag this.
-    //
-    // Seems like for OSR we will always want to run synthesis/repair.
-    //
-    if (totalLikelihood != 1.0)
-    {
-        // Consider what to do here... flag this method as needing immediate profile repairs?
-        //
-        JITDUMP(FMT_BB " total outgoing likelihood inaccurate: " FMT_WT "\n", block->bbNum, totalLikelihood);
     }
 }
 
@@ -3981,10 +3965,6 @@ void EfficientEdgeCountReconstructor::PropagateOSREntryEdges(BasicBlock* block,
 //    info - model info for the block
 //    nSucc - number of successors of the block in the flow graph
 //
-// Notes:
-//    This block requires special handling because original method flow
-//    was interrupted here.
-//
 void EfficientEdgeCountReconstructor::PropagateEdges(BasicBlock* block, BlockInfo* info, unsigned nSucc)
 {
     // There is at least one FlowEdge.
@@ -3992,8 +3972,9 @@ void EfficientEdgeCountReconstructor::PropagateEdges(BasicBlock* block, BlockInf
     // Check the reconstruction graph edges. For normal blocks, if we have
     // any pseudo-edges there should be only one pseudo-edge, and no regular edges.
     //
-    Edge*    pseudoEdge = nullptr;
-    unsigned nEdges     = 0;
+    Edge*    pseudoEdge      = nullptr;
+    unsigned nEdges          = 0;
+    weight_t successorWeight = BB_ZERO_WEIGHT;
 
     for (Edge* edge = info->m_outgoingEdges; edge != nullptr; edge = edge->m_nextOutgoingEdge)
     {
@@ -4004,14 +3985,15 @@ void EfficientEdgeCountReconstructor::PropagateEdges(BasicBlock* block, BlockInf
             continue;
         }
 
+        successorWeight += edge->m_weight;
         nEdges++;
     }
 
     // If there is a pseudo edge,
     // There should be only one successor for block. The flow
     // from block to successor will not represent real flow.
-    // We set likelihood anyways so we can assert later
-    // that all flow edges have known likelihood.
+    // Likelihood should be set to 1.0 already, as we already know
+    // this block has only one successor.
     //
     // Note the flowEdge target may not be the same as the pseudo edge target.
     //
@@ -4020,9 +4002,9 @@ void EfficientEdgeCountReconstructor::PropagateEdges(BasicBlock* block, BlockInf
         assert(nSucc == 1);
         assert(block == pseudoEdge->m_sourceBlock);
         assert(block->HasInitializedTarget());
-        FlowEdge* const flowEdge = m_comp->fgGetPredForBlock(block->GetTarget(), block);
+        FlowEdge* const flowEdge = block->GetTargetEdge();
         assert(flowEdge != nullptr);
-        flowEdge->setLikelihood(1.0);
+        assert(flowEdge->getLikelihood() == 1.0);
         return;
     }
 
@@ -4030,7 +4012,7 @@ void EfficientEdgeCountReconstructor::PropagateEdges(BasicBlock* block, BlockInf
     //
     // This can happen because bome BBJ_LEAVE blocks may have been missed during
     // our spanning tree walk since we don't know where all the finallies can return
-    // to just yet (specially, in WalkSpanningTree, we may not add the bbTarget of
+    // to just yet (specially, in WalkSpanningTree, we may not add the target of
     // a BBJ_LEAVE to the worklist).
     //
     // Worst case those missed blocks dominate other blocks so we can't limit
@@ -4043,19 +4025,19 @@ void EfficientEdgeCountReconstructor::PropagateEdges(BasicBlock* block, BlockInf
     //
     // (TODO: use synthesis here)
     //
-    if ((nEdges != nSucc) || (info->m_weight == BB_ZERO_WEIGHT))
+    if ((nEdges != nSucc) || (info->m_weight == BB_ZERO_WEIGHT) || (successorWeight == BB_ZERO_WEIGHT))
     {
         JITDUMP(FMT_BB " %s , setting outgoing likelihoods heuristically\n", block->bbNum,
                 (nEdges != nSucc) ? "has inaccurate flow model" : "has zero weight");
 
         weight_t equalLikelihood = 1.0 / nSucc;
 
-        for (BasicBlock* succ : block->Succs(m_comp))
+        for (FlowEdge* const succEdge : block->SuccEdges(m_comp))
         {
-            FlowEdge* const flowEdge = m_comp->fgGetPredForBlock(succ, block);
-            JITDUMP("Setting likelihood of " FMT_BB " -> " FMT_BB " to " FMT_WT " (heur)\n", block->bbNum, succ->bbNum,
-                    equalLikelihood);
-            flowEdge->setLikelihood(equalLikelihood);
+            BasicBlock* const succBlock = succEdge->getDestinationBlock();
+            JITDUMP("Setting likelihood of " FMT_BB " -> " FMT_BB " to " FMT_WT " (heur)\n", block->bbNum,
+                    succBlock->bbNum, equalLikelihood);
+            succEdge->setLikelihood(equalLikelihood);
         }
 
         return;
@@ -4064,7 +4046,7 @@ void EfficientEdgeCountReconstructor::PropagateEdges(BasicBlock* block, BlockInf
     // Transfer model edge weight onto the FlowEdges as likelihoods.
     //
     assert(nEdges == nSucc);
-    weight_t totalLikelihood = 0;
+    JITDUMP("Normalizing successor likelihoods with factor 1/" FMT_WT "\n", successorWeight);
 
     for (Edge* edge = info->m_outgoingEdges; edge != nullptr; edge = edge->m_nextOutgoingEdge)
     {
@@ -4076,45 +4058,17 @@ void EfficientEdgeCountReconstructor::PropagateEdges(BasicBlock* block, BlockInf
         if (nEdges == 1)
         {
             assert(nSucc == 1);
-
-            // Conceptually we could assert(edge->m_weight == info->m_weight);
-            // but we can have inconsistencies.
-            //
-            // Go with what we know for sure, edge should be 100% likely.
-            //
             likelihood = 1.0;
             JITDUMP("Setting likelihood of " FMT_BB " -> " FMT_BB " to " FMT_WT " (uniq)\n", block->bbNum,
                     edge->m_targetBlock->bbNum, likelihood);
             flowEdge->setLikelihood(likelihood);
-            totalLikelihood += likelihood;
             break;
         }
 
-        assert(info->m_weight != BB_ZERO_WEIGHT);
-
-        // We may see nonsensical weights here, cap likelihood.
-        //
-        bool capped = false;
-        if (edge->m_weight > info->m_weight)
-        {
-            capped     = true;
-            likelihood = 1.0;
-        }
-        else
-        {
-            likelihood = edge->m_weight / info->m_weight;
-        }
-        JITDUMP("Setting likelihood of " FMT_BB " -> " FMT_BB " to " FMT_WT " (%s)\n", block->bbNum,
-                edge->m_targetBlock->bbNum, likelihood, capped ? "pgo -- capped" : "pgo");
+        likelihood = edge->m_weight / successorWeight;
+        JITDUMP("Setting likelihood of " FMT_BB " -> " FMT_BB " to " FMT_WT " (pgo)\n", block->bbNum,
+                edge->m_targetBlock->bbNum, likelihood);
         flowEdge->setLikelihood(likelihood);
-        totalLikelihood += likelihood;
-    }
-
-    if (totalLikelihood != 1.0)
-    {
-        // Consider what to do here... flag this method as needing immediate profile repairs?
-        //
-        JITDUMP(FMT_BB " total outgoing likelihood inaccurate: " FMT_WT "\n", block->bbNum, totalLikelihood);
     }
 }
 
@@ -4231,18 +4185,19 @@ void EfficientEdgeCountReconstructor::MarkInterestingSwitches(BasicBlock* block,
     // If it turns out often we fail at this stage, we might consider building a histogram of switch case
     // values at runtime, similar to what we do for classes at virtual call sites.
     //
-    const unsigned     caseCount    = block->GetSwitchTargets()->bbsCount;
-    BasicBlock** const jumpTab      = block->GetSwitchTargets()->bbsDstTab;
-    unsigned           dominantCase = caseCount;
+    const unsigned   caseCount    = block->GetSwitchTargets()->bbsCount;
+    FlowEdge** const jumpTab      = block->GetSwitchTargets()->bbsDstTab;
+    unsigned         dominantCase = caseCount;
 
     for (unsigned i = 0; i < caseCount; i++)
     {
-        if (jumpTab[i] == dominantEdge->m_targetBlock)
+        BasicBlock* jumpTarget = jumpTab[i]->getDestinationBlock();
+        if (jumpTarget == dominantEdge->m_targetBlock)
         {
             if (dominantCase != caseCount)
             {
                 JITDUMP("Both case %u and %u lead to " FMT_BB "-- can't optimize\n", i, dominantCase,
-                        jumpTab[i]->bbNum);
+                        jumpTarget->bbNum);
                 dominantCase = caseCount;
                 break;
             }
@@ -4974,13 +4929,13 @@ PhaseStatus Compiler::fgComputeEdgeWeights()
                     BasicBlock* otherDst;
                     if (bSrc->FalseTargetIs(bDst))
                     {
-                        otherDst = bSrc->GetTrueTarget();
+                        otherEdge = bSrc->GetTrueEdge();
                     }
                     else
                     {
-                        otherDst = bSrc->GetFalseTarget();
+                        otherEdge = bSrc->GetFalseEdge();
                     }
-                    otherEdge = fgGetPredForBlock(otherDst, bSrc);
+                    otherDst = otherEdge->getDestinationBlock();
 
                     // If we see min/max violations, just give up on the computations
                     //
@@ -5292,7 +5247,8 @@ void Compiler::fgDebugCheckProfileWeights()
     }
     else
     {
-        ProfileChecks checks = ProfileChecks::CHECK_HASLIKELIHOOD | ProfileChecks::RAISE_ASSERT;
+        ProfileChecks checks =
+            ProfileChecks::CHECK_HASLIKELIHOOD | ProfileChecks::CHECK_LIKELIHOODSUM | ProfileChecks::RAISE_ASSERT;
         fgDebugCheckProfileWeights(checks);
     }
 }
@@ -5304,6 +5260,9 @@ void Compiler::fgDebugCheckProfileWeights()
 // Arguments:
 //   checks - checker options
 //
+// Returns:
+//   True if all enabled checks pass
+//
 // Notes:
 //   For each profiled block, check that the flow of counts into
 //   the block matches the flow of counts out of the block.
@@ -5315,7 +5274,7 @@ void Compiler::fgDebugCheckProfileWeights()
 //   There's no point checking until we've built pred lists, as
 //   we can't easily reason about consistency without them.
 //
-void Compiler::fgDebugCheckProfileWeights(ProfileChecks checks)
+bool Compiler::fgDebugCheckProfileWeights(ProfileChecks checks)
 {
     // We can check classic (min/max, late computed) weights
     //   and/or
@@ -5324,13 +5283,14 @@ void Compiler::fgDebugCheckProfileWeights(ProfileChecks checks)
     const bool verifyClassicWeights = fgEdgeWeightsComputed && hasFlag(checks, ProfileChecks::CHECK_CLASSIC);
     const bool verifyLikelyWeights  = hasFlag(checks, ProfileChecks::CHECK_LIKELY);
     const bool verifyHasLikelihood  = hasFlag(checks, ProfileChecks::CHECK_HASLIKELIHOOD);
+    const bool verifyLikelihoodSum  = hasFlag(checks, ProfileChecks::CHECK_LIKELIHOODSUM);
     const bool assertOnFailure      = hasFlag(checks, ProfileChecks::RAISE_ASSERT);
     const bool checkAllBlocks       = hasFlag(checks, ProfileChecks::CHECK_ALL_BLOCKS);
 
     if (!(verifyClassicWeights || verifyLikelyWeights || verifyHasLikelihood))
     {
         JITDUMP("[profile weight checks disabled]\n");
-        return;
+        return true;
     }
 
     JITDUMP("Checking Profile Weights (flags:0x%x)\n", checks);
@@ -5474,6 +5434,10 @@ void Compiler::fgDebugCheckProfileWeights(ProfileChecks checks)
             JITDUMP("Profile is self-consistent (%d profiled blocks, %d unprofiled)\n", profiledBlocks,
                     unprofiledBlocks);
         }
+        else if (verifyLikelihoodSum)
+        {
+            JITDUMP("All block successor flow edge likelihoods sum to 1.0\n");
+        }
         else if (verifyHasLikelihood)
         {
             JITDUMP("All flow edges have likelihoods\n");
@@ -5489,6 +5453,8 @@ void Compiler::fgDebugCheckProfileWeights(ProfileChecks checks)
             assert(!"Inconsistent profile data");
         }
     }
+
+    return (problemBlocks == 0);
 }
 
 //------------------------------------------------------------------------
@@ -5614,10 +5580,10 @@ bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block, ProfileChecks
 bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block, ProfileChecks checks)
 {
     const bool verifyClassicWeights = fgEdgeWeightsComputed && hasFlag(checks, ProfileChecks::CHECK_CLASSIC);
-    const bool verifyLikelyWeights  = hasFlag(checks, ProfileChecks::CHECK_LIKELY);
     const bool verifyHasLikelihood  = hasFlag(checks, ProfileChecks::CHECK_HASLIKELIHOOD);
+    const bool verifyLikelihoodSum  = hasFlag(checks, ProfileChecks::CHECK_LIKELIHOODSUM);
 
-    if (!(verifyClassicWeights || verifyLikelyWeights || verifyHasLikelihood))
+    if (!(verifyClassicWeights || verifyHasLikelihood || verifyLikelihoodSum))
     {
         return true;
     }
@@ -5641,17 +5607,10 @@ bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block, ProfileChecks
         unsigned missingEdges      = 0;
         unsigned missingLikelihood = 0;
 
-        for (unsigned i = 0; i < numSuccs; i++)
+        for (FlowEdge* succEdge : block->SuccEdges(this))
         {
-            BasicBlock* succBlock = block->GetSucc(i, this);
-            FlowEdge*   succEdge  = fgGetPredForBlock(succBlock, block);
-
-            if (succEdge == nullptr)
-            {
-                missingEdges++;
-                JITDUMP("  " FMT_BB " can't find successor edge to " FMT_BB "\n", block->bbNum, succBlock->bbNum);
-                continue;
-            }
+            assert(succEdge != nullptr);
+            BasicBlock* succBlock = succEdge->getDestinationBlock();
 
             outgoingWeightMin += succEdge->edgeWeightMin();
             outgoingWeightMax += succEdge->edgeWeightMax();
@@ -5707,7 +5666,7 @@ bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block, ProfileChecks
             }
         }
 
-        if (verifyLikelyWeights)
+        if (verifyLikelihoodSum)
         {
             if (!fgProfileWeightsConsistent(outgoingLikelihood, 1.0))
             {
@@ -5724,6 +5683,25 @@ bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block, ProfileChecks
                 else
                 {
                     likelyWeightsValid = false;
+
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        for (const FlowEdge* succEdge : block->SuccEdges(this))
+                        {
+                            const BasicBlock* succBlock = succEdge->getDestinationBlock();
+                            if (succEdge->hasLikelihood())
+                            {
+                                printf("  " FMT_BB " -> " FMT_BB ": " FMT_WT "\n", block->bbNum, succBlock->bbNum,
+                                       succEdge->getLikelihood());
+                            }
+                            else
+                            {
+                                printf("  " FMT_BB " -> " FMT_BB ": no likelihood\n", block->bbNum, succBlock->bbNum);
+                            }
+                        }
+                    }
+#endif // DEBUG
                 }
             }
         }
diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp
index e315e33015e1..70b7ab6448b3 100644
--- a/src/coreclr/jit/fgprofilesynthesis.cpp
+++ b/src/coreclr/jit/fgprofilesynthesis.cpp
@@ -11,11 +11,9 @@
 
 // TODO
 //
-// * faster way of doing fgGetPredForBlock
 // * vet against some real data
 // * IR based heuristics (perhaps)
 // * During Cp, avoid repeatedly propagating through nested loops
-// * Fake BB0 or always force scratch BB
 // * Stop the upweight/downweight of loops in rest of jit
 // * Durable edge properties (exit, back)
 // * Tweak RunRarely to be at or near zero
@@ -32,8 +30,9 @@
 //
 void ProfileSynthesis::Run(ProfileSynthesisOption option)
 {
-    m_dfsTree = m_comp->fgComputeDfs();
-    m_loops   = FlowGraphNaturalLoops::Find(m_dfsTree);
+    m_dfsTree             = m_comp->fgComputeDfs();
+    m_loops               = FlowGraphNaturalLoops::Find(m_dfsTree);
+    m_improperLoopHeaders = m_loops->ImproperLoopHeaders();
 
     // Retain or compute edge likelihood information
     //
@@ -100,6 +99,8 @@ void ProfileSynthesis::Run(ProfileSynthesisOption option)
 
     m_comp->fgPgoHaveWeights = true;
     m_comp->fgPgoSource      = newSource;
+    m_comp->fgPgoSynthesized = true;
+    m_comp->fgPgoConsistent  = !m_approximate;
 
 #ifdef DEBUG
     if (JitConfig.JitCheckSynthesizedCounts() > 0)
@@ -107,11 +108,15 @@ void ProfileSynthesis::Run(ProfileSynthesisOption option)
         // Verify consistency, provided we didn't see any improper headers
         // or cap any Cp values.
         //
-        if ((m_improperLoopHeaders == 0) && (m_cappedCyclicProbabilities == 0))
+        // Unfortunately invalid IL may also cause inconsistencies,
+        // so if we are running before the importer, we can't reliably
+        // assert. So we check now, but defer asserting until the end of fgImport.
+        //
+        if (m_comp->fgPgoConsistent)
         {
             // verify likely weights, assert on failure, check all blocks
-            m_comp->fgDebugCheckProfileWeights(ProfileChecks::CHECK_LIKELY | ProfileChecks::RAISE_ASSERT |
-                                               ProfileChecks::CHECK_ALL_BLOCKS);
+            m_comp->fgPgoConsistentCheck =
+                m_comp->fgDebugCheckProfileWeights(ProfileChecks::CHECK_LIKELY | ProfileChecks::CHECK_ALL_BLOCKS);
         }
     }
 #endif
@@ -142,14 +147,6 @@ void ProfileSynthesis::AssignLikelihoods()
                 break;
 
             case BBJ_CALLFINALLY:
-                // Single successor next cases
-                //
-                // Note we handle flow to the finally
-                // specially; this represents return
-                // from the finally.
-                AssignLikelihoodNext(block);
-                break;
-
             case BBJ_ALWAYS:
             case BBJ_CALLFINALLYRET:
             case BBJ_LEAVE:
@@ -175,29 +172,16 @@ void ProfileSynthesis::AssignLikelihoods()
     }
 }
 
-//------------------------------------------------------------------------
-// AssignLikelihoodNext: update edge likelihood for block that always
-//   transfers control to bbNext
-//
-// Arguments;
-//   block -- block in question
-//
-void ProfileSynthesis::AssignLikelihoodNext(BasicBlock* block)
-{
-    FlowEdge* const edge = m_comp->fgGetPredForBlock(block->Next(), block);
-    edge->setLikelihood(1.0);
-}
-
 //------------------------------------------------------------------------
 // AssignLikelihoodJump: update edge likelihood for a block that always
-//   transfers control to bbTarget
+//   transfers control to its target block
 //
 // Arguments;
 //   block -- block in question
 //
 void ProfileSynthesis::AssignLikelihoodJump(BasicBlock* block)
 {
-    FlowEdge* const edge = m_comp->fgGetPredForBlock(block->GetTarget(), block);
+    FlowEdge* const edge = block->GetTargetEdge();
     edge->setLikelihood(1.0);
 }
 
@@ -210,36 +194,37 @@ void ProfileSynthesis::AssignLikelihoodJump(BasicBlock* block)
 //
 void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block)
 {
-    BasicBlock* const jump = block->GetTrueTarget();
-    BasicBlock* const next = block->GetFalseTarget();
+    FlowEdge* const trueEdge  = block->GetTrueEdge();
+    FlowEdge* const falseEdge = block->GetFalseEdge();
 
     // Watch for degenerate case
     //
-    if (jump == next)
+    if (trueEdge == falseEdge)
     {
-        AssignLikelihoodNext(block);
+        assert(trueEdge->getDupCount() == 2);
+        trueEdge->setLikelihood(1.0);
         return;
     }
 
-    FlowEdge* const jumpEdge = m_comp->fgGetPredForBlock(jump, block);
-    FlowEdge* const nextEdge = m_comp->fgGetPredForBlock(next, block);
+    BasicBlock* trueTarget  = trueEdge->getDestinationBlock();
+    BasicBlock* falseTarget = falseEdge->getDestinationBlock();
 
     // THROW heuristic
     //
-    bool const isJumpThrow = jump->KindIs(BBJ_THROW);
-    bool const isNextThrow = next->KindIs(BBJ_THROW);
+    bool const isTrueThrow  = trueTarget->KindIs(BBJ_THROW);
+    bool const isFalseThrow = falseTarget->KindIs(BBJ_THROW);
 
-    if (isJumpThrow != isNextThrow)
+    if (isTrueThrow != isFalseThrow)
     {
-        if (isJumpThrow)
+        if (isTrueThrow)
         {
-            jumpEdge->setLikelihood(0.0);
-            nextEdge->setLikelihood(1.0);
+            trueEdge->setLikelihood(0.0);
+            falseEdge->setLikelihood(1.0);
         }
         else
         {
-            jumpEdge->setLikelihood(1.0);
-            nextEdge->setLikelihood(0.0);
+            trueEdge->setLikelihood(1.0);
+            falseEdge->setLikelihood(0.0);
         }
 
         return;
@@ -247,22 +232,22 @@ void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block)
 
     // LOOP BACK EDGE heuristic
     //
-    bool const isJumpEdgeBackEdge = m_loops->IsLoopBackEdge(jumpEdge);
-    bool const isNextEdgeBackEdge = m_loops->IsLoopBackEdge(nextEdge);
+    bool const isTrueEdgeBackEdge  = m_loops->IsLoopBackEdge(trueEdge);
+    bool const isFalseEdgeBackEdge = m_loops->IsLoopBackEdge(falseEdge);
 
-    if (isJumpEdgeBackEdge != isNextEdgeBackEdge)
+    if (isTrueEdgeBackEdge != isFalseEdgeBackEdge)
     {
-        if (isJumpEdgeBackEdge)
+        if (isTrueEdgeBackEdge)
         {
-            JITDUMP(FMT_BB "->" FMT_BB " is loop back edge\n", block->bbNum, jump->bbNum);
-            jumpEdge->setLikelihood(loopBackLikelihood);
-            nextEdge->setLikelihood(1.0 - loopBackLikelihood);
+            JITDUMP(FMT_BB "->" FMT_BB " is loop back edge\n", block->bbNum, trueTarget->bbNum);
+            trueEdge->setLikelihood(loopBackLikelihood);
+            falseEdge->setLikelihood(1.0 - loopBackLikelihood);
         }
         else
         {
-            JITDUMP(FMT_BB "->" FMT_BB " is loop back edge\n", block->bbNum, next->bbNum);
-            jumpEdge->setLikelihood(1.0 - loopBackLikelihood);
-            nextEdge->setLikelihood(loopBackLikelihood);
+            JITDUMP(FMT_BB "->" FMT_BB " is loop back edge\n", block->bbNum, falseTarget->bbNum);
+            trueEdge->setLikelihood(1.0 - loopBackLikelihood);
+            falseEdge->setLikelihood(loopBackLikelihood);
         }
 
         return;
@@ -273,22 +258,22 @@ void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block)
     // Consider: adjust probability if loop has multiple exit edges, so that
     // overall exit probability is around 0.1.
     //
-    bool const isJumpEdgeExitEdge = m_loops->IsLoopExitEdge(jumpEdge);
-    bool const isNextEdgeExitEdge = m_loops->IsLoopExitEdge(nextEdge);
+    bool const isTrueEdgeExitEdge  = m_loops->IsLoopExitEdge(trueEdge);
+    bool const isFalseEdgeExitEdge = m_loops->IsLoopExitEdge(falseEdge);
 
-    if (isJumpEdgeExitEdge != isNextEdgeExitEdge)
+    if (isTrueEdgeExitEdge != isFalseEdgeExitEdge)
     {
-        if (isJumpEdgeExitEdge)
+        if (isTrueEdgeExitEdge)
         {
-            JITDUMP(FMT_BB "->" FMT_BB " is loop exit edge\n", block->bbNum, jump->bbNum);
-            jumpEdge->setLikelihood(1.0 - loopExitLikelihood);
-            nextEdge->setLikelihood(loopExitLikelihood);
+            JITDUMP(FMT_BB "->" FMT_BB " is loop exit edge\n", block->bbNum, trueTarget->bbNum);
+            trueEdge->setLikelihood(1.0 - loopExitLikelihood);
+            falseEdge->setLikelihood(loopExitLikelihood);
         }
         else
         {
-            JITDUMP(FMT_BB "->" FMT_BB " is loop exit edge\n", block->bbNum, next->bbNum);
-            jumpEdge->setLikelihood(loopExitLikelihood);
-            nextEdge->setLikelihood(1.0 - loopExitLikelihood);
+            JITDUMP(FMT_BB "->" FMT_BB " is loop exit edge\n", block->bbNum, falseTarget->bbNum);
+            trueEdge->setLikelihood(loopExitLikelihood);
+            falseEdge->setLikelihood(1.0 - loopExitLikelihood);
         }
 
         return;
@@ -296,20 +281,20 @@ void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block)
 
     // RETURN heuristic
     //
-    bool const isJumpReturn = jump->KindIs(BBJ_RETURN);
-    bool const isNextReturn = next->KindIs(BBJ_RETURN);
+    bool const isJumpReturn = trueTarget->KindIs(BBJ_RETURN);
+    bool const isNextReturn = falseTarget->KindIs(BBJ_RETURN);
 
     if (isJumpReturn != isNextReturn)
     {
         if (isJumpReturn)
         {
-            jumpEdge->setLikelihood(returnLikelihood);
-            nextEdge->setLikelihood(1.0 - returnLikelihood);
+            trueEdge->setLikelihood(returnLikelihood);
+            falseEdge->setLikelihood(1.0 - returnLikelihood);
         }
         else
         {
-            jumpEdge->setLikelihood(1.0 - returnLikelihood);
-            nextEdge->setLikelihood(returnLikelihood);
+            trueEdge->setLikelihood(1.0 - returnLikelihood);
+            falseEdge->setLikelihood(returnLikelihood);
         }
 
         return;
@@ -319,8 +304,8 @@ void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block)
     //
     // Give slight preference to bbNext
     //
-    jumpEdge->setLikelihood(1.0 - ilNextLikelihood);
-    nextEdge->setLikelihood(ilNextLikelihood);
+    trueEdge->setLikelihood(1.0 - ilNextLikelihood);
+    falseEdge->setLikelihood(ilNextLikelihood);
 }
 
 //------------------------------------------------------------------------
@@ -342,10 +327,9 @@ void ProfileSynthesis::AssignLikelihoodSwitch(BasicBlock* block)
 
     // Each unique edge gets some multiple of that basic probability
     //
-    for (BasicBlock* const succ : block->Succs(m_comp))
+    for (FlowEdge* const succEdge : block->SuccEdges(m_comp))
     {
-        FlowEdge* const edge = m_comp->fgGetPredForBlock(succ, block);
-        edge->setLikelihood(p * edge->getDupCount());
+        succEdge->setLikelihood(p * succEdge->getDupCount());
     }
 }
 
@@ -368,10 +352,9 @@ weight_t ProfileSynthesis::SumOutgoingLikelihoods(BasicBlock* block, WeightVecto
         likelihoods->clear();
     }
 
-    for (BasicBlock* const succ : block->Succs(m_comp))
+    for (FlowEdge* const succEdge : block->SuccEdges(m_comp))
     {
-        FlowEdge* const edge       = m_comp->fgGetPredForBlock(succ, block);
-        weight_t        likelihood = edge->getLikelihood();
+        weight_t likelihood = succEdge->getLikelihood();
         if (likelihoods != nullptr)
         {
             likelihoods->push_back(likelihood);
@@ -406,11 +389,6 @@ void ProfileSynthesis::RepairLikelihoods()
                 break;
 
             case BBJ_CALLFINALLY:
-                // Single successor next cases.
-                // Just assign 1.0
-                AssignLikelihoodNext(block);
-                break;
-
             case BBJ_ALWAYS:
             case BBJ_CALLFINALLYRET:
             case BBJ_LEAVE:
@@ -498,11 +476,6 @@ void ProfileSynthesis::BlendLikelihoods()
                 break;
 
             case BBJ_CALLFINALLY:
-                // Single successor next cases.
-                // Just assign 1.0
-                AssignLikelihoodNext(block);
-                break;
-
             case BBJ_ALWAYS:
             case BBJ_CALLFINALLYRET:
             case BBJ_LEAVE:
@@ -560,15 +533,15 @@ void ProfileSynthesis::BlendLikelihoods()
                 JITDUMP("Blending likelihoods in " FMT_BB " with blend factor " FMT_WT " \n", block->bbNum,
                         blendFactor);
                 iter = likelihoods.begin();
-                for (BasicBlock* const succ : block->Succs(m_comp))
+                for (FlowEdge* const succEdge : block->SuccEdges(m_comp))
                 {
-                    FlowEdge* const edge          = m_comp->fgGetPredForBlock(succ, block);
-                    weight_t        newLikelihood = edge->getLikelihood();
-                    weight_t        oldLikelihood = *iter;
+                    weight_t newLikelihood = succEdge->getLikelihood();
+                    weight_t oldLikelihood = *iter;
 
-                    edge->setLikelihood((blendFactor * oldLikelihood) + ((1.0 - blendFactor) * newLikelihood));
-                    JITDUMP(FMT_BB " -> " FMT_BB " was " FMT_WT " now " FMT_WT "\n", block->bbNum, succ->bbNum,
-                            oldLikelihood, edge->getLikelihood());
+                    succEdge->setLikelihood((blendFactor * oldLikelihood) + ((1.0 - blendFactor) * newLikelihood));
+                    BasicBlock* const succBlock = succEdge->getDestinationBlock();
+                    JITDUMP(FMT_BB " -> " FMT_BB " was " FMT_WT " now " FMT_WT "\n", block->bbNum, succBlock->bbNum,
+                            oldLikelihood, succEdge->getLikelihood());
 
                     iter++;
                 }
@@ -588,10 +561,9 @@ void ProfileSynthesis::ClearLikelihoods()
 {
     for (BasicBlock* const block : m_comp->Blocks())
     {
-        for (BasicBlock* const succ : block->Succs(m_comp))
+        for (FlowEdge* const succEdge : block->SuccEdges(m_comp))
         {
-            FlowEdge* const edge = m_comp->fgGetPredForBlock(succ, block);
-            edge->clearLikelihood();
+            succEdge->clearLikelihood();
         }
     }
 }
@@ -664,10 +636,9 @@ void ProfileSynthesis::RandomizeLikelihoods()
         }
 
         i = 0;
-        for (BasicBlock* const succ : block->Succs(m_comp))
+        for (FlowEdge* const succEdge : block->SuccEdges(m_comp))
         {
-            FlowEdge* const edge = m_comp->fgGetPredForBlock(succ, block);
-            edge->setLikelihood(likelihoods[i++] / sum);
+            succEdge->setLikelihood(likelihoods[i++] / sum);
         }
     }
 #endif // DEBUG
@@ -859,28 +830,26 @@ void ProfileSynthesis::ComputeCyclicProbabilities(FlowGraphNaturalLoop* loop)
                             " to reflect capping; current likelihood is " FMT_WT "\n",
                             exitBlock->bbNum, exitEdge->getLikelihood());
 
-                    BasicBlock* const jump               = exitBlock->GetTrueTarget();
-                    BasicBlock* const next               = exitBlock->GetFalseTarget();
-                    FlowEdge* const   jumpEdge           = m_comp->fgGetPredForBlock(jump, exitBlock);
-                    FlowEdge* const   nextEdge           = m_comp->fgGetPredForBlock(next, exitBlock);
-                    weight_t const    exitLikelihood     = (missingExitWeight + currentExitWeight) / exitBlockWeight;
-                    weight_t const    continueLikelihood = 1.0 - exitLikelihood;
+                    FlowEdge* const trueEdge           = exitBlock->GetTrueEdge();
+                    FlowEdge* const falseEdge          = exitBlock->GetFalseEdge();
+                    weight_t const  exitLikelihood     = (missingExitWeight + currentExitWeight) / exitBlockWeight;
+                    weight_t const  continueLikelihood = 1.0 - exitLikelihood;
 
                     // We are making it more likely that the loop exits, so the new exit likelihood
                     // should be greater than the old.
                     //
                     assert(exitLikelihood > exitEdge->getLikelihood());
 
-                    if (jumpEdge == exitEdge)
+                    if (trueEdge == exitEdge)
                     {
-                        jumpEdge->setLikelihood(exitLikelihood);
-                        nextEdge->setLikelihood(continueLikelihood);
+                        trueEdge->setLikelihood(exitLikelihood);
+                        falseEdge->setLikelihood(continueLikelihood);
                     }
                     else
                     {
-                        assert(nextEdge == exitEdge);
-                        jumpEdge->setLikelihood(continueLikelihood);
-                        nextEdge->setLikelihood(exitLikelihood);
+                        assert(falseEdge == exitEdge);
+                        trueEdge->setLikelihood(continueLikelihood);
+                        falseEdge->setLikelihood(exitLikelihood);
                     }
                     adjustedExit = true;
 
@@ -920,9 +889,9 @@ void ProfileSynthesis::ComputeCyclicProbabilities(FlowGraphNaturalLoop* loop)
 //
 void ProfileSynthesis::AssignInputWeights(ProfileSynthesisOption option)
 {
-    // Determine input weight for entire method.
+    // Determine input weight for method entry
     //
-    BasicBlock* const entryBlock  = m_comp->fgFirstBB;
+    BasicBlock* const entryBlock  = m_comp->opts.IsOSR() ? m_comp->fgEntryBB : m_comp->fgFirstBB;
     weight_t          entryWeight = BB_UNITY_WEIGHT;
 
     switch (option)
@@ -930,7 +899,7 @@ void ProfileSynthesis::AssignInputWeights(ProfileSynthesisOption option)
         case ProfileSynthesisOption::BlendLikelihoods:
         case ProfileSynthesisOption::RepairLikelihoods:
         {
-            // Try and retain fgEntryBB's weight.
+            // Try and retain entryBlock's weight.
             // Easiest to do when the block has no preds.
             //
             if (entryBlock->hasProfileWeight())
@@ -965,27 +934,64 @@ void ProfileSynthesis::AssignInputWeights(ProfileSynthesisOption option)
             break;
     }
 
-    // Determine input weight for EH regions.
+    // Reset existing weights
     //
-    const weight_t ehWeight = entryWeight * exceptionScale;
-
     for (BasicBlock* block : m_comp->Blocks())
     {
         block->setBBProfileWeight(0.0);
     }
 
+    // Set entry weight
+    //
+    JITDUMP("Synthesis: entry " FMT_BB " has input weight " FMT_WT "\n", entryBlock->bbNum, entryWeight);
     entryBlock->setBBProfileWeight(entryWeight);
 
-    if (!m_comp->compIsForInlining())
+    // Determine input weight for EH regions, if any.
+    //
+    weight_t exceptionScaleFactor = exceptionScale;
+
+#ifdef DEBUG
+    if (JitConfig.JitSynthesisExceptionScale() != nullptr)
+    {
+        ConfigDoubleArray JitSynthesisExceptionScaleArray;
+        JitSynthesisExceptionScaleArray.EnsureInit(JitConfig.JitSynthesisExceptionScale());
+        weight_t newFactor = JitSynthesisExceptionScaleArray.GetData()[0];
+
+        if ((newFactor >= 0) && (newFactor <= 1.0))
+        {
+            exceptionScaleFactor = newFactor;
+        }
+    }
+#endif
+
+    JITDUMP("Synthesis: exception scale factor " FMT_WT "\n", exceptionScaleFactor);
+    const weight_t ehWeight = entryWeight * exceptionScaleFactor;
+
+    if (ehWeight != 0)
     {
-        for (EHblkDsc* const HBtab : EHClauses(m_comp))
+        // We can't inline methods with EH, also inlinees share the parent
+        // EH tab, so we can't rely on this being empty.
+        //
+        if (!m_comp->compIsForInlining())
         {
-            if (HBtab->HasFilter())
+            for (EHblkDsc* const HBtab : EHClauses(m_comp))
             {
-                HBtab->ebdFilter->setBBProfileWeight(ehWeight);
-            }
+                // Only set weights on the filter/hander entries
+                // if the associated try is reachable.
+                //
+                BasicBlock* const tryBlock = HBtab->ebdTryBeg;
+                if (!m_dfsTree->Contains(tryBlock))
+                {
+                    continue;
+                }
+
+                if (HBtab->HasFilter())
+                {
+                    HBtab->ebdFilter->setBBProfileWeight(ehWeight);
+                }
 
-            HBtab->ebdHndBeg->setBBProfileWeight(ehWeight);
+                HBtab->ebdHndBeg->setBBProfileWeight(ehWeight);
+            }
         }
     }
 }
@@ -998,11 +1004,25 @@ void ProfileSynthesis::ComputeBlockWeights()
 {
     JITDUMP("Computing block weights\n");
 
+    bool useSolver = true;
+
+#ifdef DEBUG
+    useSolver = JitConfig.JitSynthesisUseSolver() > 0;
+#endif
+
+    if (useSolver)
+    {
+        GaussSeidelSolver();
+        return;
+    }
+
     for (unsigned i = m_dfsTree->GetPostOrderCount(); i != 0; i--)
     {
         BasicBlock* block = m_dfsTree->GetPostOrder(i - 1);
         ComputeBlockWeight(block);
     }
+
+    m_approximate = (m_cappedCyclicProbabilities) || (m_improperLoopHeaders > 0);
 }
 
 //------------------------------------------------------------------------
@@ -1036,7 +1056,7 @@ void ProfileSynthesis::ComputeBlockWeight(BasicBlock* block)
     }
     else
     {
-        // Sum all incoming edges that aren't EH flow
+        // Sum all incoming edges that aren't EH flow.
         //
         for (FlowEdge* const edge : block->PredEdges())
         {
@@ -1066,3 +1086,275 @@ void ProfileSynthesis::ComputeBlockWeight(BasicBlock* block)
         }
     }
 }
+
+//------------------------------------------------------------------------
+// GaussSeidelSolver: solve for block weights iteratively
+//
+void ProfileSynthesis::GaussSeidelSolver()
+{
+    // The computed block weights.
+    //
+    jitstd::vector<weight_t> countVector(m_comp->fgBBNumMax + 1, 0, m_comp->getAllocator(CMK_Pgo));
+
+    // The algorithm.
+    //
+    bool                          converged        = false;
+    weight_t                      previousResidual = 0;
+    weight_t                      residual         = 0;
+    weight_t                      relResidual      = 0;
+    weight_t                      oldRelResidual   = 0;
+    weight_t                      eigenvalue       = 0;
+    weight_t const                stopRelResidual  = 0.002;
+    BasicBlock*                   residualBlock    = nullptr;
+    BasicBlock*                   relResidualBlock = nullptr;
+    const FlowGraphDfsTree* const dfs              = m_loops->GetDfsTree();
+    unsigned const                blockCount       = dfs->GetPostOrderCount();
+
+    // Remember the entry block
+    //
+    BasicBlock* const entryBlock = m_comp->opts.IsOSR() ? m_comp->fgEntryBB : m_comp->fgFirstBB;
+    JITDUMP("Synthesis solver: flow graph has %u improper loop headers\n", m_improperLoopHeaders);
+
+    // This is an iterative solver, and it may require a lot of iterations
+    // to converge. We don't have time for that, so we will give up
+    // fairly quickly.
+    //
+    // This can be mitgated somewhat by using blend mode for repairs, as that tends
+    // to shift likelihoods off of the extremes (say 0.999) can lead to high
+    // iteration counts.
+    //
+    // If we have existing inconsistent data, we might consider starting from
+    // that data, rather than from mostly 0.
+    //
+    // It is possible that a more sophisticated solver (say GMRES or BiCGStab)
+    // might be more effective and run in acceptable time.
+    //
+    unsigned const iterationLimit = (m_improperLoopHeaders > 0) ? 20 : 1;
+
+    // Push weights forward in flow, iterate until convergence.
+    //
+    unsigned i = 0;
+    for (; i < iterationLimit; i++)
+    {
+        residualBlock    = nullptr;
+        relResidualBlock = nullptr;
+        residual         = 0;
+        relResidual      = 0;
+
+        // Compute new counts based on Gauss-Seidel iteration
+        //
+        // Todo: after 1st iteration we can start at the postorder
+        // num of the first improper SCC block, as anything "above"
+        // this will no longer change.
+        //
+        // Likewise we can stop at the postorder num of the last block that is
+        // part of any improper SCC, if we knew what that was,
+        // and ony run through the tail blocks on the last iteration.
+        //
+        // (or more generally we can go SCC by SCC...)
+        //
+        for (unsigned j = m_dfsTree->GetPostOrderCount(); j != 0; j--)
+        {
+            BasicBlock* const block     = dfs->GetPostOrder(j - 1);
+            weight_t          newWeight = 0;
+
+            // Some blocks have additional profile weights that don't come from flow edges.
+            //
+            if (block == entryBlock)
+            {
+                newWeight = block->bbWeight;
+            }
+            else
+            {
+                EHblkDsc* const ehDsc = m_comp->ehGetBlockHndDsc(block);
+
+                if (ehDsc != nullptr)
+                {
+                    if (ehDsc->HasFilter() && (block == ehDsc->ebdFilter))
+                    {
+                        newWeight = block->bbWeight;
+                    }
+                    else if (block == ehDsc->ebdHndBeg)
+                    {
+                        newWeight = block->bbWeight;
+
+                        // Finallies also add in the weight of their try.
+                        //
+                        if (ehDsc->HasFinallyHandler())
+                        {
+                            newWeight += countVector[ehDsc->ebdTryBeg->bbNum];
+                        }
+                    }
+                }
+            }
+
+            // Blocks with no preds are simple to handle
+            //
+            if (block->bbPreds != nullptr)
+            {
+                // Leverage Cp for existing loop headers, provided that
+                // all contained loops are proper.
+                //
+                // This is an optimization to speed convergence.
+                //
+                FlowGraphNaturalLoop* const loop = m_loops->GetLoopByHeader(block);
+
+                if ((loop != nullptr) && !loop->ContainsImproperHeader())
+                {
+                    // Sum all entry edges that aren't EH flow
+                    //
+                    for (FlowEdge* const edge : loop->EntryEdges())
+                    {
+                        BasicBlock* const predBlock = edge->getSourceBlock();
+
+                        if (BasicBlock::sameHndRegion(block, predBlock))
+                        {
+                            newWeight += edge->getLikelihood() * countVector[predBlock->bbNum];
+                        }
+                    }
+
+                    // Scale by cyclic probability
+                    //
+                    newWeight *= m_cyclicProbabilities[loop->GetIndex()];
+                }
+                else
+                {
+                    // A self-edge that's part of a bigger SCC may
+                    // not be detected as simple loop.
+                    //
+                    FlowEdge* selfEdge = nullptr;
+
+                    for (FlowEdge* const edge : block->PredEdges())
+                    {
+                        BasicBlock* const predBlock = edge->getSourceBlock();
+
+                        if (predBlock == block)
+                        {
+                            // We might see a degenerate self BBJ_COND. Hoepfully not.
+                            //
+                            assert(selfEdge == nullptr);
+                            selfEdge = edge;
+                            continue;
+                        }
+
+                        if (BasicBlock::sameHndRegion(block, predBlock))
+                        {
+                            newWeight += edge->getLikelihood() * countVector[predBlock->bbNum];
+                        }
+                    }
+
+                    if (selfEdge != nullptr)
+                    {
+                        weight_t selfLikelihood = selfEdge->getLikelihood();
+                        if (selfLikelihood > cappedLikelihood)
+                        {
+                            m_cappedCyclicProbabilities++;
+                            selfLikelihood = cappedLikelihood;
+                        }
+                        newWeight = newWeight / (1.0 - selfLikelihood);
+                    }
+                }
+            }
+
+            // Note we can't use SOR to accelerate convergence, as our coefficient matrix is an M-matrix
+            // and so it is risky to use \omega > 1 -- our dominant eigenvalue may be very close to 1.
+            // Also even if safe, SOR may over-correct and give negative results.
+            //
+            weight_t const oldWeight = countVector[block->bbNum];
+            weight_t const change    = newWeight - oldWeight;
+
+            // Hence counts will not decrease.
+            //
+            assert(change >= 0);
+
+            JITDUMP("iteration %u: " FMT_BB " :: old " FMT_WT " new " FMT_WT " change " FMT_WT "\n", i, block->bbNum,
+                    oldWeight, newWeight, change);
+            countVector[block->bbNum] = newWeight;
+
+            // Remember max absolute and relative change
+            // (note rel residual will be infinite on the first pass, that's ok)
+            //
+            // Note we are using a "point" bound here ("infinity norm") rather than say
+            // computing the l2-norm of the entire residual vector.
+            //
+            weight_t const blockRelResidual = change / oldWeight;
+
+            if ((relResidualBlock == nullptr) || ((oldWeight > 0) && (blockRelResidual > relResidual)))
+            {
+                relResidual      = blockRelResidual;
+                relResidualBlock = block;
+            }
+
+            if ((residualBlock == nullptr) || (change > residual))
+            {
+                residual      = change;
+                residualBlock = block;
+            }
+
+            if (newWeight >= maxCount)
+            {
+                JITDUMP("count overflow in " FMT_BB ": " FMT_WT "\n", block->bbNum, newWeight);
+                m_overflow = true;
+            }
+        }
+
+        // If there were no improper headers, we will have converged in one pass.
+        // (profile may still be inconsistent, if there were capped cyclic probabilities).
+        //
+        if (m_improperLoopHeaders == 0)
+        {
+            converged = true;
+            break;
+        }
+
+        JITDUMP("iteration %u: max residual is at " FMT_BB " : " FMT_WT "\n", i, residualBlock->bbNum, residual);
+        JITDUMP("iteration %u: max rel residual is at " FMT_BB " : " FMT_WT "\n", i, relResidualBlock->bbNum,
+                relResidual);
+
+        // If max relative residual is sufficiently small, then stop.
+        //
+        if (relResidual < stopRelResidual)
+        {
+            converged = true;
+            break;
+        }
+
+        if (m_overflow)
+        {
+            break;
+        }
+
+        // If we have been iterating for a bit, estimate the dominant GS
+        // eigenvalue. (we might want to start with Jacobi iterations
+        // to get the Jacobi eigenvalue instead).
+        //
+        if ((i > 3) && (oldRelResidual > 0))
+        {
+            eigenvalue = relResidual / oldRelResidual;
+            JITDUMP(" eigenvalue " FMT_WT, eigenvalue);
+        }
+        JITDUMP("\n");
+        oldRelResidual = relResidual;
+    }
+
+    JITDUMP("%s at iteration %u rel residual " FMT_WT " eigenvalue " FMT_WT "\n",
+            converged ? "converged" : "failed to converge", i, relResidual, eigenvalue);
+
+    // TODO: computation above may be on the edge of diverging as there is
+    // nothing preventing a general cycle from having 1.0 likelihood. That
+    // is, there is nothing analogous to the capped cyclic check for more
+    // general cycles.
+    //
+    // We should track if the overall residual error (say L1 or L2 norm).
+    // If it is not decreasing, consider not using the data.
+    //
+    // Propagate the computed weights to the blocks.
+    //
+    for (unsigned j = m_dfsTree->GetPostOrderCount(); j != 0; j--)
+    {
+        BasicBlock* const block = dfs->GetPostOrder(j - 1);
+        block->setBBProfileWeight(max(0.0, countVector[block->bbNum]));
+    }
+
+    m_approximate = !converged || (m_cappedCyclicProbabilities > 0);
+}
diff --git a/src/coreclr/jit/fgprofilesynthesis.h b/src/coreclr/jit/fgprofilesynthesis.h
index 9297357049e8..e2e7c58cbac4 100644
--- a/src/coreclr/jit/fgprofilesynthesis.h
+++ b/src/coreclr/jit/fgprofilesynthesis.h
@@ -41,7 +41,7 @@ class ProfileSynthesis
 
 private:
     ProfileSynthesis(Compiler* compiler)
-        : m_comp(compiler), m_loops(nullptr), m_improperLoopHeaders(0), m_cappedCyclicProbabilities(0)
+        : m_comp(compiler)
     {
     }
 
@@ -52,13 +52,13 @@ class ProfileSynthesis
     static constexpr weight_t ilNextLikelihood   = 0.52;
     static constexpr weight_t loopBackLikelihood = 0.9;
     static constexpr weight_t loopExitLikelihood = 0.9;
+    static constexpr weight_t maxCount           = 1e12;
 
     void Run(ProfileSynthesisOption option);
 
     weight_t SumOutgoingLikelihoods(BasicBlock* block, WeightVector* likelihoods = nullptr);
 
     void AssignLikelihoods();
-    void AssignLikelihoodNext(BasicBlock* block);
     void AssignLikelihoodJump(BasicBlock* block);
     void AssignLikelihoodCond(BasicBlock* block);
     void AssignLikelihoodSwitch(BasicBlock* block);
@@ -76,13 +76,17 @@ class ProfileSynthesis
     void ComputeBlockWeights();
     void ComputeBlockWeight(BasicBlock* block);
 
+    void GaussSeidelSolver();
+
 private:
     Compiler* const        m_comp;
-    FlowGraphDfsTree*      m_dfsTree;
-    FlowGraphNaturalLoops* m_loops;
-    weight_t*              m_cyclicProbabilities;
-    unsigned               m_improperLoopHeaders;
-    unsigned               m_cappedCyclicProbabilities;
+    FlowGraphDfsTree*      m_dfsTree                   = nullptr;
+    FlowGraphNaturalLoops* m_loops                     = nullptr;
+    weight_t*              m_cyclicProbabilities       = nullptr;
+    unsigned               m_improperLoopHeaders       = 0;
+    unsigned               m_cappedCyclicProbabilities = 0;
+    bool                   m_approximate               = false;
+    bool                   m_overflow                  = false;
 };
 
 #endif // !_FGPROFILESYNTHESIS_H_
diff --git a/src/coreclr/jit/fgstmt.cpp b/src/coreclr/jit/fgstmt.cpp
index fead5b82e0b3..2189955a6f97 100644
--- a/src/coreclr/jit/fgstmt.cpp
+++ b/src/coreclr/jit/fgstmt.cpp
@@ -49,9 +49,9 @@ bool Compiler::fgBlockContainsStatementBounded(BasicBlock* block,
 //   stmt  - the statement to be inserted.
 //
 // Notes:
-//    We always insert phi statements at the beginning.
-//    In other cases, if there are any phi assignments and/or an assignment of
-//    the GT_CATCH_ARG, we insert after those.
+//    We always insert phi statements at the beginning. In other cases, if
+//    there are any phi stores and/or a store of the GT_CATCH_ARG, we insert
+//    after those.
 //
 void Compiler::fgInsertStmtAtBeg(BasicBlock* block, Statement* stmt)
 {
@@ -538,9 +538,9 @@ inline bool OperIsControlFlow(genTreeOps oper)
 
         case GT_RETURN:
         case GT_RETFILT:
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
         case GT_END_LFIN:
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
             return true;
 
         default:
diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp
index 3d3f9bd1da29..7bc003dbf442 100644
--- a/src/coreclr/jit/flowgraph.cpp
+++ b/src/coreclr/jit/flowgraph.cpp
@@ -272,17 +272,11 @@ BasicBlock* Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
         // I want to create:
         // top -> poll -> bottom (lexically)
         // so that we jump over poll to get to bottom.
-        BasicBlock* top         = block;
-        BBKinds     oldJumpKind = top->GetKind();
+        BasicBlock* top = block;
 
         BasicBlock* poll = fgNewBBafter(BBJ_ALWAYS, top, true);
         bottom           = fgNewBBafter(top->GetKind(), poll, true);
 
-        poll->SetTarget(bottom);
-        assert(poll->JumpsToNext());
-
-        bottom->TransferTarget(top);
-
         // Update block flags
         const BasicBlockFlags originalFlags = top->GetFlagsRaw() | BBF_GC_SAFE_POINT;
 
@@ -306,7 +300,7 @@ BasicBlock* Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
         }
 
         // Remove the last statement from Top and add it to Bottom if necessary.
-        if ((oldJumpKind == BBJ_COND) || (oldJumpKind == BBJ_RETURN) || (oldJumpKind == BBJ_THROW))
+        if (top->KindIs(BBJ_COND, BBJ_RETURN, BBJ_THROW))
         {
             Statement* stmt = top->firstStmt();
             while (stmt->GetNextStmt() != nullptr)
@@ -322,7 +316,6 @@ BasicBlock* Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
         // Create a GT_EQ node that checks against g_TrapReturningThreads.  True jumps to Bottom,
         // false falls through to poll.  Add this to the end of Top.  Top is now BBJ_COND.  Bottom is
         // now a jump target
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef ENABLE_FAST_GCPOLL_HELPER
         // Prefer the fast gc poll helepr over the double indirection
@@ -370,38 +363,49 @@ BasicBlock* Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
         }
 #endif
 
-        top->SetCond(bottom, poll);
         // Bottom has Top and Poll as its predecessors.  Poll has just Top as a predecessor.
-        fgAddRefPred(bottom, poll);
-        fgAddRefPred(bottom, top);
-        fgAddRefPred(poll, top);
+        FlowEdge* const trueEdge  = fgAddRefPred(bottom, top);
+        FlowEdge* const falseEdge = fgAddRefPred(poll, top);
+        trueEdge->setLikelihood(1.0);
+        falseEdge->setLikelihood(0.0);
+
+        FlowEdge* const newEdge = fgAddRefPred(bottom, poll);
+        poll->SetTargetEdge(newEdge);
+        assert(poll->JumpsToNext());
 
         // Replace Top with Bottom in the predecessor list of all outgoing edges from Bottom
         // (1 for unconditional branches, 2 for conditional branches, N for switches).
-        switch (oldJumpKind)
+        switch (top->GetKind())
         {
             case BBJ_RETURN:
             case BBJ_THROW:
                 // no successors
                 break;
+
             case BBJ_COND:
                 // replace predecessor in true/false successors.
                 noway_assert(!bottom->IsLast());
-                fgReplacePred(bottom->GetFalseTarget(), top, bottom);
-                fgReplacePred(bottom->GetTrueTarget(), top, bottom);
+                fgReplacePred(top->GetFalseEdge(), bottom);
+                fgReplacePred(top->GetTrueEdge(), bottom);
                 break;
 
             case BBJ_ALWAYS:
             case BBJ_CALLFINALLY:
-                fgReplacePred(bottom->GetTarget(), top, bottom);
+                fgReplacePred(top->GetTargetEdge(), bottom);
                 break;
+
             case BBJ_SWITCH:
                 NO_WAY("SWITCH should be a call rather than an inlined poll.");
                 break;
+
             default:
                 NO_WAY("Unknown block type for updating predecessor lists.");
+                break;
         }
 
+        bottom->TransferTarget(top);
+        top->SetCond(trueEdge, falseEdge);
+
         if (compCurBB == top)
         {
             compCurBB = bottom;
@@ -583,6 +587,15 @@ PhaseStatus Compiler::fgImport()
         compInlineResult->SetImportedILSize(info.compILImportSize);
     }
 
+    // Now that we've made it through the importer, we know the IL was valid.
+    // If we synthesized profile data and though it should be consistent,
+    // verify that it was consistent.
+    //
+    if (fgPgoSynthesized && fgPgoConsistent)
+    {
+        assert(fgPgoConsistentCheck);
+    }
+
     return PhaseStatus::MODIFIED_EVERYTHING;
 }
 
@@ -1039,7 +1052,7 @@ GenTree* Compiler::fgOptimizeDelegateConstructor(GenTreeCall*            call,
                 GenTree*       targetObjPointers = call->gtArgs.GetArgByIndex(1)->GetNode();
                 CORINFO_LOOKUP pLookup;
                 info.compCompHnd->getReadyToRunDelegateCtorHelper(&ldftnToken->m_token, ldftnToken->m_tokenConstraint,
-                                                                  clsHnd, &pLookup);
+                                                                  clsHnd, info.compMethodHnd, &pLookup);
                 if (!pLookup.lookupKind.needsRuntimeLookup)
                 {
                     call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_DELEGATE_CTOR, TYP_VOID, thisPointer,
@@ -1051,10 +1064,11 @@ GenTree* Compiler::fgOptimizeDelegateConstructor(GenTreeCall*            call,
                     assert(oper != GT_FTN_ADDR);
                     CORINFO_CONST_LOOKUP genericLookup;
                     info.compCompHnd->getReadyToRunHelper(&ldftnToken->m_token, &pLookup.lookupKind,
-                                                          CORINFO_HELP_READYTORUN_GENERIC_HANDLE, &genericLookup);
+                                                          CORINFO_HELP_READYTORUN_GENERIC_HANDLE, info.compMethodHnd,
+                                                          &genericLookup);
                     GenTree* ctxTree = getRuntimeContextTree(pLookup.lookupKind.runtimeLookupKind);
                     call             = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_DELEGATE_CTOR, TYP_VOID, thisPointer,
-                                               targetObjPointers, ctxTree);
+                                                           targetObjPointers, ctxTree);
                     call->setEntryPoint(genericLookup);
                 }
             }
@@ -1074,7 +1088,7 @@ GenTree* Compiler::fgOptimizeDelegateConstructor(GenTreeCall*            call,
 
             CORINFO_LOOKUP entryPoint;
             info.compCompHnd->getReadyToRunDelegateCtorHelper(&ldftnToken->m_token, ldftnToken->m_tokenConstraint,
-                                                              clsHnd, &entryPoint);
+                                                              clsHnd, info.compMethodHnd, &entryPoint);
             assert(!entryPoint.lookupKind.needsRuntimeLookup);
             call->setEntryPoint(entryPoint.constLookup);
         }
@@ -1151,14 +1165,26 @@ bool Compiler::fgCastNeeded(GenTree* tree, var_types toType)
     //
     // Is the tree as GT_CAST or a GT_CALL ?
     //
-    if (tree->OperGet() == GT_CAST)
+    if (tree->OperIs(GT_CAST))
     {
         fromType = tree->CastToType();
     }
-    else if (tree->OperGet() == GT_CALL)
+    else if (tree->OperIs(GT_CALL))
     {
         fromType = (var_types)tree->AsCall()->gtReturnType;
     }
+    else if (tree->OperIs(GT_LCL_VAR))
+    {
+        LclVarDsc* varDsc = lvaGetDesc(tree->AsLclVarCommon());
+        if (varDsc->lvNormalizeOnStore())
+        {
+            fromType = varDsc->TypeGet();
+        }
+        else
+        {
+            fromType = tree->TypeGet();
+        }
+    }
     else
     {
         fromType = tree->TypeGet();
@@ -1267,8 +1293,6 @@ GenTree* Compiler::fgGetCritSectOfStaticMethod()
     return tree;
 }
 
-#if defined(FEATURE_EH_FUNCLETS)
-
 /*****************************************************************************
  *
  *  Add monitor enter/exit calls for synchronized methods, and a try/fault
@@ -1331,6 +1355,8 @@ GenTree* Compiler::fgGetCritSectOfStaticMethod()
 
 void Compiler::fgAddSyncMethodEnterExit()
 {
+    assert(UsesFunclets());
+
     assert((info.compFlags & CORINFO_FLG_SYNCH) != 0);
 
     // We need to do this transformation before funclets are created.
@@ -1626,14 +1652,13 @@ void Compiler::fgConvertSyncReturnToLeave(BasicBlock* block)
                                                // try/finally, which must be the last EH region.
 
     EHblkDsc* ehDsc = ehGetDsc(tryIndex);
-    assert(ehDsc->ebdEnclosingTryIndex ==
-           EHblkDsc::NO_ENCLOSING_INDEX); // There are no enclosing regions of the BBJ_RETURN block
+    assert(ehDsc->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX); // There are no enclosing regions of the
+                                                                         // BBJ_RETURN block
     assert(ehDsc->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX);
 
     // Convert the BBJ_RETURN to BBJ_ALWAYS, jumping to genReturnBB.
-    block->SetKindAndTarget(BBJ_ALWAYS, genReturnBB);
     FlowEdge* const newEdge = fgAddRefPred(genReturnBB, block);
-    newEdge->setLikelihood(1.0);
+    block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
 
 #ifdef DEBUG
     if (verbose)
@@ -1644,8 +1669,6 @@ void Compiler::fgConvertSyncReturnToLeave(BasicBlock* block)
 #endif
 }
 
-#endif // FEATURE_EH_FUNCLETS
-
 //------------------------------------------------------------------------
 // fgAddReversePInvokeEnterExit: Add enter/exit calls for reverse PInvoke methods
 //
@@ -1803,7 +1826,8 @@ class MergedReturns
     bool mergingReturns = false;
 
 public:
-    MergedReturns(Compiler* comp) : comp(comp)
+    MergedReturns(Compiler* comp)
+        : comp(comp)
     {
         comp->fgReturnCount = 0;
     }
@@ -2103,9 +2127,8 @@ class MergedReturns
 
                     // Change BBJ_RETURN to BBJ_ALWAYS targeting const return block.
                     assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0);
-                    returnBlock->SetKindAndTarget(BBJ_ALWAYS, constReturnBlock);
                     FlowEdge* const newEdge = comp->fgAddRefPred(constReturnBlock, returnBlock);
-                    newEdge->setLikelihood(1.0);
+                    returnBlock->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
 
                     // Remove GT_RETURN since constReturnBlock returns the constant.
                     assert(returnBlock->lastStmt()->GetRootNode()->OperIs(GT_RETURN));
@@ -2247,7 +2270,7 @@ class MergedReturns
         return nullptr;
     }
 };
-}
+} // namespace
 
 //------------------------------------------------------------------------
 // fgAddInternal: add blocks and trees to express special method semantics
@@ -2274,8 +2297,9 @@ PhaseStatus Compiler::fgAddInternal()
     madeChanges |= fgCreateFiltersForGenericExceptions();
 
     // The backend requires a scratch BB into which it can safely insert a P/Invoke method prolog if one is
-    // required. Similarly, we need a scratch BB for poisoning. Create it here.
-    if (compMethodRequiresPInvokeFrame() || compShouldPoisonFrame())
+    // required. Similarly, we need a scratch BB for poisoning and when we have Swift parameters to reassemble.
+    // Create it here.
+    if (compMethodRequiresPInvokeFrame() || compShouldPoisonFrame() || lvaHasAnySwiftStackParamToReassemble())
     {
         madeChanges |= fgEnsureFirstBBisScratch();
         fgFirstBB->SetFlags(BBF_DONT_REMOVE);
@@ -2306,7 +2330,7 @@ PhaseStatus Compiler::fgAddInternal()
 #ifndef JIT32_GCENCODER
             lva0CopiedForGenericsCtxt = ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0);
 #else  // JIT32_GCENCODER
-            lva0CopiedForGenericsCtxt          = false;
+            lva0CopiedForGenericsCtxt = false;
 #endif // JIT32_GCENCODER
             noway_assert(lva0CopiedForGenericsCtxt || !lvaTable[info.compThisArg].IsAddressExposed());
             noway_assert(!lvaTable[info.compThisArg].lvHasILStoreOp);
@@ -2334,17 +2358,15 @@ PhaseStatus Compiler::fgAddInternal()
     m_llvm->AddUnhandledExceptionHandler();
 #endif // TARGET_WASM
 
-#if defined(FEATURE_EH_FUNCLETS)
     // Add the synchronized method enter/exit calls and try/finally protection. Note
     // that this must happen before the one BBJ_RETURN block is created below, so the
     // BBJ_RETURN block gets placed at the top-level, not within an EH region. (Otherwise,
     // we'd have to be really careful when creating the synchronized method try/finally
     // not to include the BBJ_RETURN block.)
-    if ((info.compFlags & CORINFO_FLG_SYNCH) != 0)
+    if (UsesFunclets() && (info.compFlags & CORINFO_FLG_SYNCH) != 0)
     {
         fgAddSyncMethodEnterExit();
     }
-#endif // FEATURE_EH_FUNCLETS
 
     //
     //  We will generate just one epilog (return block)
@@ -2455,11 +2477,11 @@ PhaseStatus Compiler::fgAddInternal()
         madeChanges = true;
     }
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
 
     /* Is this a 'synchronized' method? */
 
-    if (info.compFlags & CORINFO_FLG_SYNCH)
+    if (!UsesFunclets() && (info.compFlags & CORINFO_FLG_SYNCH))
     {
         GenTree* tree = nullptr;
 
@@ -2527,7 +2549,7 @@ PhaseStatus Compiler::fgAddInternal()
         madeChanges         = true;
     }
 
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
     if (opts.IsReversePInvoke())
     {
@@ -2713,15 +2735,11 @@ BasicBlock* Compiler::fgGetDomSpeculatively(const BasicBlock* block)
 //
 BasicBlock* Compiler::fgLastBBInMainFunction()
 {
-#if defined(FEATURE_EH_FUNCLETS)
-
     if (fgFirstFuncletBB != nullptr)
     {
         return fgFirstFuncletBB->Prev();
     }
 
-#endif // FEATURE_EH_FUNCLETS
-
     assert(fgLastBB->IsLast());
     return fgLastBB;
 }
@@ -2733,21 +2751,15 @@ BasicBlock* Compiler::fgLastBBInMainFunction()
 //
 BasicBlock* Compiler::fgEndBBAfterMainFunction()
 {
-#if defined(FEATURE_EH_FUNCLETS)
-
     if (fgFirstFuncletBB != nullptr)
     {
         return fgFirstFuncletBB;
     }
 
-#endif // FEATURE_EH_FUNCLETS
-
     assert(fgLastBB->IsLast());
     return nullptr;
 }
 
-#if defined(FEATURE_EH_FUNCLETS)
-
 /*****************************************************************************
  * Introduce a new head block of the handler for the prolog to be put in, ahead
  * of the current handler head 'block'.
@@ -2763,26 +2775,26 @@ void Compiler::fgInsertFuncletPrologBlock(BasicBlock* block)
     }
 #endif
 
+    assert(UsesFunclets());
     assert(block->hasHndIndex());
     assert(fgFirstBlockOfHandler(block) == block); // this block is the first block of a handler
 
     /* Allocate a new basic block */
 
-    BasicBlock* newHead = BasicBlock::New(this, BBJ_ALWAYS, block);
-    newHead->SetFlags(BBF_INTERNAL | BBF_NONE_QUIRK);
+    BasicBlock* newHead = BasicBlock::New(this);
+    newHead->SetFlags(BBF_INTERNAL);
     newHead->inheritWeight(block);
     newHead->bbRefs = 0;
 
     fgInsertBBbefore(block, newHead); // insert the new block in the block list
-    assert(newHead->JumpsToNext());
-    fgExtendEHRegionBefore(block); // Update the EH table to make the prolog block the first block in the block's EH
-                                   // block.
+    fgExtendEHRegionBefore(block);    // Update the EH table to make the prolog block the first block in the block's EH
+                                      // block.
 
     // Distribute the pred list between newHead and block. Incoming edges coming from outside
     // the handler go to the prolog. Edges coming from with the handler are back-edges, and
     // go to the existing 'block'.
 
-    for (BasicBlock* const predBlock : block->PredBlocks())
+    for (BasicBlock* const predBlock : block->PredBlocksEditing())
     {
         if (!fgIsIntraHandlerPred(predBlock, block))
         {
@@ -2792,11 +2804,11 @@ void Compiler::fgInsertFuncletPrologBlock(BasicBlock* block)
             switch (predBlock->GetKind())
             {
                 case BBJ_CALLFINALLY:
+                {
                     noway_assert(predBlock->TargetIs(block));
-                    predBlock->SetTarget(newHead);
-                    fgRemoveRefPred(block, predBlock);
-                    fgAddRefPred(newHead, predBlock);
+                    fgRedirectTargetEdge(predBlock, newHead);
                     break;
+                }
 
                 default:
                     // The only way into the handler is via a BBJ_CALLFINALLY (to a finally handler), or
@@ -2807,10 +2819,10 @@ void Compiler::fgInsertFuncletPrologBlock(BasicBlock* block)
         }
     }
 
-    assert(nullptr == fgGetPredForBlock(block, newHead));
-    fgAddRefPred(block, newHead);
-
-    assert(newHead->HasFlag(BBF_INTERNAL));
+    assert(fgGetPredForBlock(block, newHead) == nullptr);
+    FlowEdge* const newEdge = fgAddRefPred(block, newHead);
+    newHead->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
+    assert(newHead->JumpsToNext());
 }
 
 //------------------------------------------------------------------------
@@ -2826,6 +2838,7 @@ void Compiler::fgInsertFuncletPrologBlock(BasicBlock* block)
 //
 void Compiler::fgCreateFuncletPrologBlocks()
 {
+    assert(UsesFunclets());
     noway_assert(fgPredsComputed);
     assert(!fgFuncletsCreated);
 
@@ -2890,6 +2903,7 @@ void Compiler::fgCreateFuncletPrologBlocks()
 //
 PhaseStatus Compiler::fgCreateFunclets()
 {
+    assert(UsesFunclets());
     assert(!fgFuncletsCreated);
 
     fgCreateFuncletPrologBlocks();
@@ -2965,6 +2979,8 @@ PhaseStatus Compiler::fgCreateFunclets()
 //
 bool Compiler::fgFuncletsAreCold()
 {
+    assert(UsesFunclets());
+
     for (BasicBlock* block = fgFirstFuncletBB; block != nullptr; block = block->Next())
     {
         if (!block->isRunRarely())
@@ -2976,8 +2992,6 @@ bool Compiler::fgFuncletsAreCold()
     return true;
 }
 
-#endif // defined(FEATURE_EH_FUNCLETS)
-
 //------------------------------------------------------------------------
 // fgDetermineFirstColdBlock: figure out where we might split the block
 //    list to put some blocks into the cold code section
@@ -3047,14 +3061,12 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock()
             }
 #endif // HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
 
-#ifdef FEATURE_EH_FUNCLETS
             // Make note of if we're in the funclet section,
             // so we can stop the search early.
             if (block == fgFirstFuncletBB)
             {
                 inFuncletSection = true;
             }
-#endif // FEATURE_EH_FUNCLETS
 
             // Do we have a candidate for the first cold block?
             if (firstColdBlock != nullptr)
@@ -3068,7 +3080,6 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock()
                     firstColdBlock       = nullptr;
                     prevToFirstColdBlock = nullptr;
 
-#ifdef FEATURE_EH_FUNCLETS
                     // If we're already in the funclet section, try to split
                     // at fgFirstFuncletBB, and stop the search.
                     if (inFuncletSection)
@@ -3081,13 +3092,10 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock()
 
                         break;
                     }
-#endif // FEATURE_EH_FUNCLETS
                 }
             }
             else // (firstColdBlock == NULL) -- we don't have a candidate for first cold block
             {
-
-#ifdef FEATURE_EH_FUNCLETS
                 //
                 // If a function has exception handling and we haven't found the first cold block yet,
                 // consider splitting at the first funclet; do not consider splitting between funclets,
@@ -3103,7 +3111,6 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock()
 
                     break;
                 }
-#endif // FEATURE_EH_FUNCLETS
 
                 // Is this a cold block?
                 if (!blockMustBeInHotSection && block->isRunRarely())
@@ -3384,7 +3391,7 @@ PhaseStatus Compiler::fgCreateThrowHelperBlocks()
         assert((add->acdKind == SCK_FAIL_FAST) || (bbThrowIndex(srcBlk) == add->acdData));
         assert(add->acdKind != SCK_NONE);
 
-        BasicBlock* const newBlk = fgNewBBinRegion(jumpKinds[add->acdKind], srcBlk, /* jumpDest */ nullptr,
+        BasicBlock* const newBlk = fgNewBBinRegion(jumpKinds[add->acdKind], srcBlk,
                                                    /* runRarely */ true, /* insertAtEnd */ true);
 
         // Update the descriptor
@@ -3448,7 +3455,7 @@ PhaseStatus Compiler::fgCreateThrowHelperBlocks()
 #endif // DEBUG
 
         //  Mark the block as added by the compiler and not removable by future flow
-        // graph optimizations. Note that no bbTarget points to these blocks.
+        // graph optimizations. Note that no target block points to these blocks.
         //
         newBlk->SetFlags(BBF_IMPORTED | BBF_DONT_REMOVE);
 
@@ -3575,7 +3582,9 @@ GenTree* Compiler::fgSetTreeSeq(GenTree* tree, bool isLIR)
         };
 
         SetTreeSeqVisitor(Compiler* compiler, GenTree* tree, bool isLIR)
-            : GenTreeVisitor<SetTreeSeqVisitor>(compiler), m_prevNode(tree), m_isLIR(isLIR)
+            : GenTreeVisitor<SetTreeSeqVisitor>(compiler)
+            , m_prevNode(tree)
+            , m_isLIR(isLIR)
         {
             INDEBUG(tree->gtSeqNum = 0);
         }
@@ -3663,7 +3672,8 @@ PhaseStatus Compiler::fgSetBlockOrder()
 class GCSafePointSuccessorEnumerator
 {
     BasicBlock* m_block;
-    union {
+    union
+    {
         BasicBlock*  m_successors[2];
         BasicBlock** m_pSuccessors;
     };
@@ -3674,7 +3684,8 @@ class GCSafePointSuccessorEnumerator
 public:
     // Constructs an enumerator of successors to be used for checking for GC
     // safe point cycles.
-    GCSafePointSuccessorEnumerator(Compiler* comp, BasicBlock* block) : m_block(block)
+    GCSafePointSuccessorEnumerator(Compiler* comp, BasicBlock* block)
+        : m_block(block)
     {
         m_numSuccs = 0;
         block->VisitRegularSuccs(comp, [this](BasicBlock* succ) {
@@ -3906,15 +3917,26 @@ void Compiler::fgSetBlockOrder(BasicBlock* block)
     return firstNode;
 }
 
-void Compiler::fgLclFldAssign(unsigned lclNum)
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// FlowGraphDfsTree::Dump: Dump a textual representation of the DFS tree.
+//
+void FlowGraphDfsTree::Dump() const
 {
-    assert(varTypeIsStruct(lvaTable[lclNum].lvType));
-    if (lvaTable[lclNum].lvPromoted && lvaTable[lclNum].lvFieldCnt > 1)
+    printf("DFS tree. %s.\n", HasCycle() ? "Has cycle" : "No cycle");
+    printf("PO RPO -> BB [pre, post]\n");
+    for (unsigned i = 0; i < GetPostOrderCount(); i++)
     {
-        lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LocalField));
+        unsigned          rpoNum = GetPostOrderCount() - i - 1;
+        BasicBlock* const block  = GetPostOrder(i);
+        printf("%02u %02u -> " FMT_BB "[%u, %u]\n", i, rpoNum, block->bbNum, block->bbPreorderNum,
+               block->bbPostorderNum);
     }
 }
 
+#endif // DEBUG
+
 //------------------------------------------------------------------------
 // FlowGraphDfsTree::Contains: Check if a block is contained in the DFS tree;
 // i.e., if it is reachable.
@@ -4178,10 +4200,13 @@ unsigned FlowGraphNaturalLoop::NumLoopBlocks()
 //   dfs - A DFS tree.
 //
 FlowGraphNaturalLoops::FlowGraphNaturalLoops(const FlowGraphDfsTree* dfsTree)
-    : m_dfsTree(dfsTree), m_loops(m_dfsTree->GetCompiler()->getAllocator(CMK_Loops))
+    : m_dfsTree(dfsTree)
+    , m_loops(m_dfsTree->GetCompiler()->getAllocator(CMK_Loops))
+    , m_improperLoopHeaders(0)
 {
 }
 
+//------------------------------------------------------------------------
 // GetLoopByIndex: Get loop by a specified index.
 //
 // Parameters:
@@ -4196,6 +4221,7 @@ FlowGraphNaturalLoop* FlowGraphNaturalLoops::GetLoopByIndex(unsigned index)
     return m_loops[index];
 }
 
+//------------------------------------------------------------------------
 // GetLoopByHeader: See if a block is a loop header, and if so return the
 // associated loop.
 //
@@ -4295,7 +4321,6 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfsTr
         JITDUMP("%02u -> " FMT_BB "[%u, %u]\n", rpoNum, block->bbNum, block->bbPreorderNum, block->bbPostorderNum);
     }
 
-    unsigned improperLoopHeaders = 0;
 #endif
 
     FlowGraphNaturalLoops* loops = new (comp, CMK_Loops) FlowGraphNaturalLoops(dfsTree);
@@ -4350,7 +4375,16 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfsTr
 
         if (!FindNaturalLoopBlocks(loop, worklist))
         {
-            INDEBUG(improperLoopHeaders++);
+            loops->m_improperLoopHeaders++;
+
+            for (FlowGraphNaturalLoop* const otherLoop : loops->InPostOrder())
+            {
+                if (otherLoop->ContainsBlock(header))
+                {
+                    otherLoop->m_containsImproperHeader = true;
+                }
+            }
+
             continue;
         }
 
@@ -4455,9 +4489,9 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfsTr
         JITDUMP("\nFound %zu loops\n", loops->m_loops.size());
     }
 
-    if (improperLoopHeaders > 0)
+    if (loops->m_improperLoopHeaders > 0)
     {
-        JITDUMP("Rejected %u loop headers\n", improperLoopHeaders);
+        JITDUMP("Rejected %u loop headers\n", loops->m_improperLoopHeaders);
     }
 
     JITDUMPEXEC(Dump(loops));
@@ -4720,14 +4754,8 @@ void FlowGraphNaturalLoop::Dump(FlowGraphNaturalLoop* loop)
         for (FlowEdge* const edge : loop->ExitEdges())
         {
             BasicBlock* const exitingBlock = edge->getSourceBlock();
-            printf("%s" FMT_BB " ->", first ? "" : "; ", exitingBlock->bbNum);
-            exitingBlock->VisitRegularSuccs(comp, [=](BasicBlock* succ) {
-                if (comp->fgGetPredForBlock(succ, exitingBlock) == edge)
-                {
-                    printf(" " FMT_BB, succ->bbNum);
-                }
-                return BasicBlockVisit::Continue;
-            });
+            BasicBlock* const exitBlock    = edge->getDestinationBlock();
+            printf("%s" FMT_BB " -> " FMT_BB, first ? "" : "; ", exitingBlock->bbNum, exitBlock->bbNum);
             first = false;
         }
     }
@@ -4809,7 +4837,9 @@ bool FlowGraphNaturalLoop::VisitDefs(TFunc func)
             DoPreOrder = true,
         };
 
-        VisitDefsVisitor(Compiler* comp, TFunc& func) : GenTreeVisitor<VisitDefsVisitor>(comp), m_func(func)
+        VisitDefsVisitor(Compiler* comp, TFunc& func)
+            : GenTreeVisitor<VisitDefsVisitor>(comp)
+            , m_func(func)
         {
         }
 
@@ -5003,7 +5033,7 @@ bool FlowGraphNaturalLoop::AnalyzeIteration(NaturalLoopIterInfo* info)
         info->TestBlock    = cond;
         info->IterVar      = iterVar;
         info->IterTree     = iterTree;
-        info->ExitedOnTrue = !ContainsBlock(cond->GetTrueTarget());
+        info->ExitedOnTrue = exitEdge->getDestinationBlock() == cond->GetTrueTarget();
         break;
     }
 
@@ -5618,36 +5648,9 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap*
         // Jump target should not be set yet
         assert(!newBlk->HasInitializedTarget());
 
-        // First copy the jump destination(s) from "blk".
-        newBlk->CopyTarget(comp, blk);
-
-        // Now redirect the new block according to "blockMap".
-        comp->optRedirectBlock(newBlk, map);
-
-        // Add predecessor edges for the new successors, as well as the fall-through paths.
-        switch (newBlk->GetKind())
-        {
-            case BBJ_ALWAYS:
-            case BBJ_CALLFINALLY:
-            case BBJ_CALLFINALLYRET:
-                comp->fgAddRefPred(newBlk->GetTarget(), newBlk);
-                break;
-
-            case BBJ_COND:
-                comp->fgAddRefPred(newBlk->GetFalseTarget(), newBlk);
-                comp->fgAddRefPred(newBlk->GetTrueTarget(), newBlk);
-                break;
-
-            case BBJ_SWITCH:
-                for (BasicBlock* const switchDest : newBlk->SwitchTargets())
-                {
-                    comp->fgAddRefPred(switchDest, newBlk);
-                }
-                break;
-
-            default:
-                break;
-        }
+        // Redirect the new block according to "blockMap".
+        // optSetMappedBlockTargets will set newBlk's successors, and add pred edges for the successors.
+        comp->optSetMappedBlockTargets(blk, newBlk, map);
 
         return BasicBlockVisit::Continue;
     });
@@ -6087,7 +6090,9 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df
 
     public:
         NumberDomTreeVisitor(Compiler* comp, unsigned* preorderNums, unsigned* postorderNums)
-            : DomTreeVisitor(comp), m_preorderNums(preorderNums), m_postorderNums(postorderNums)
+            : DomTreeVisitor(comp)
+            , m_preorderNums(preorderNums)
+            , m_postorderNums(postorderNums)
         {
         }
 
@@ -6172,6 +6177,37 @@ BlockToNaturalLoopMap* BlockToNaturalLoopMap::Build(FlowGraphNaturalLoops* loops
     return new (comp, CMK_Loops) BlockToNaturalLoopMap(loops, indices);
 }
 
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// BlockToNaturalLoopMap::Dump: Dump a textual representation of the map.
+//
+void BlockToNaturalLoopMap::Dump() const
+{
+    const FlowGraphDfsTree* dfs        = m_loops->GetDfsTree();
+    unsigned                blockCount = dfs->GetPostOrderCount();
+
+    printf("Block -> natural loop map: %u blocks\n", blockCount);
+    if (blockCount > 0)
+    {
+        printf("block : loop index\n");
+        for (unsigned i = 0; i < blockCount; i++)
+        {
+            if (m_indices[i] == UINT_MAX)
+            {
+                // Just leave the loop space empty if there is no enclosing loop
+                printf(FMT_BB " : \n", dfs->GetPostOrder(i)->bbNum);
+            }
+            else
+            {
+                printf(FMT_BB " : " FMT_LP "\n", dfs->GetPostOrder(i)->bbNum, m_indices[i]);
+            }
+        }
+    }
+}
+
+#endif // DEBUG
+
 //------------------------------------------------------------------------
 // BlockReachabilitySets::Build: Build the reachability sets.
 //
@@ -6185,7 +6221,7 @@ BlockToNaturalLoopMap* BlockToNaturalLoopMap::Build(FlowGraphNaturalLoops* loops
 //   This algorithm consumes O(n^2) memory because we're using dense
 //   bitsets to represent reachability.
 //
-BlockReachabilitySets* BlockReachabilitySets::Build(FlowGraphDfsTree* dfsTree)
+BlockReachabilitySets* BlockReachabilitySets::Build(const FlowGraphDfsTree* dfsTree)
 {
     Compiler*    comp            = dfsTree->GetCompiler();
     BitVecTraits postOrderTraits = dfsTree->PostOrderTraits();
diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp
index cfb113572062..de4ac5fe8a47 100644
--- a/src/coreclr/jit/forwardsub.cpp
+++ b/src/coreclr/jit/forwardsub.cpp
@@ -191,7 +191,9 @@ class ForwardSubVisitor final : public GenTreeVisitor<ForwardSubVisitor>
         UseExecutionOrder = true
     };
 
-    ForwardSubVisitor(Compiler* compiler, unsigned lclNum) : GenTreeVisitor(compiler), m_lclNum(lclNum)
+    ForwardSubVisitor(Compiler* compiler, unsigned lclNum)
+        : GenTreeVisitor(compiler)
+        , m_lclNum(lclNum)
     {
         LclVarDsc* dsc = compiler->lvaGetDesc(m_lclNum);
         if (dsc->lvIsStructField)
@@ -221,7 +223,7 @@ class ForwardSubVisitor final : public GenTreeVisitor<ForwardSubVisitor>
                 // fgGetStubAddrArg cannot handle complex trees (it calls gtClone)
                 //
                 bool isCallTarget = false;
-                if (parent->IsCall())
+                if ((parent != nullptr) && parent->IsCall())
                 {
                     GenTreeCall* const parentCall = parent->AsCall();
                     isCallTarget = (parentCall->gtCallType == CT_INDIRECT) && (parentCall->gtCallAddr == node);
@@ -319,7 +321,7 @@ class ForwardSubVisitor final : public GenTreeVisitor<ForwardSubVisitor>
 
     bool IsCallArg() const
     {
-        return m_parentNode->IsCall();
+        return (m_parentNode != nullptr) && m_parentNode->IsCall();
     }
 
     unsigned GetComplexity() const
@@ -399,7 +401,9 @@ class EffectsVisitor final : public GenTreeVisitor<EffectsVisitor>
         UseExecutionOrder = true
     };
 
-    EffectsVisitor(Compiler* compiler) : GenTreeVisitor<EffectsVisitor>(compiler), m_flags(GTF_EMPTY)
+    EffectsVisitor(Compiler* compiler)
+        : GenTreeVisitor<EffectsVisitor>(compiler)
+        , m_flags(GTF_EMPTY)
     {
     }
 
@@ -746,10 +750,9 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)
     //
     // Don't substitute nodes args morphing doesn't handle into struct args.
     //
-    if (fsv.IsCallArg() && fsv.GetNode()->TypeIs(TYP_STRUCT) &&
-        !fwdSubNode->OperIs(GT_BLK, GT_LCL_VAR, GT_LCL_FLD, GT_MKREFANY))
+    if (fsv.IsCallArg() && fsv.GetNode()->TypeIs(TYP_STRUCT) && !fwdSubNode->OperIs(GT_BLK, GT_LCL_VAR, GT_LCL_FLD))
     {
-        JITDUMP(" use is a struct arg; fwd sub node is not OBJ/LCL_VAR/LCL_FLD/MKREFANY\n");
+        JITDUMP(" use is a struct arg; fwd sub node is not BLK/LCL_VAR/LCL_FLD\n");
         return false;
     }
 
@@ -772,7 +775,7 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)
     {
         GenTree* const parentNode = fsv.GetParentNode();
 
-        if (!parentNode->OperIs(GT_STORE_LCL_VAR))
+        if ((parentNode == nullptr) || !parentNode->OperIs(GT_STORE_LCL_VAR))
         {
             JITDUMP(" multi-reg struct node, parent not STORE_LCL_VAR\n");
             return false;
@@ -794,7 +797,8 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)
     // for them on all 32 bit targets is a CQ regression due to some bad
     // interaction between decomposition and RA.
     //
-    if (compMethodReturnsMultiRegRetType() && fsv.GetParentNode()->OperIs(GT_RETURN))
+    if (compMethodReturnsMultiRegRetType() && (fsv.GetParentNode() != nullptr) &&
+        fsv.GetParentNode()->OperIs(GT_RETURN))
     {
 #if defined(TARGET_X86)
         if (fwdSubNode->TypeGet() == TYP_LONG)
diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp
index 96005e605766..b7972f216d53 100644
--- a/src/coreclr/jit/gcencode.cpp
+++ b/src/coreclr/jit/gcencode.cpp
@@ -62,8 +62,6 @@ ReturnKind GCInfo::getReturnKind()
     }
 }
 
-#if !defined(JIT32_GCENCODER) || defined(FEATURE_EH_FUNCLETS)
-
 // gcMarkFilterVarsPinned - Walk all lifetimes and make it so that anything
 //     live in a filter is marked as pinned (often by splitting the lifetime
 //     so that *only* the filter region is pinned).  This should only be
@@ -86,6 +84,7 @@ ReturnKind GCInfo::getReturnKind()
 //
 void GCInfo::gcMarkFilterVarsPinned()
 {
+    assert(compiler->UsesFunclets());
     assert(compiler->ehAnyFunclets());
 
     for (EHblkDsc* const HBtab : EHClauses(compiler))
@@ -134,7 +133,6 @@ void GCInfo::gcMarkFilterVarsPinned()
                         //     (2) a regular one for after the filter
                         // and then adjust the original lifetime to end before
                         // the filter.
-                        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
                         if (compiler->verbose)
@@ -177,7 +175,6 @@ void GCInfo::gcMarkFilterVarsPinned()
                         // somewhere inside it, so we only create 1 new lifetime,
                         // and then adjust the original lifetime to end before
                         // the filter.
-                        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
                         if (compiler->verbose)
@@ -216,7 +213,6 @@ void GCInfo::gcMarkFilterVarsPinned()
                         // lifetime for the part inside the filter and adjust
                         // the start of the original lifetime to be the end
                         // of the filter
-                        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef DEBUG
                         if (compiler->verbose)
                         {
@@ -259,7 +255,6 @@ void GCInfo::gcMarkFilterVarsPinned()
                     {
                         // The variable lifetime is completely within the filter,
                         // so just add the pinned flag.
-                        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef DEBUG
                         if (compiler->verbose)
                         {
@@ -297,6 +292,8 @@ void GCInfo::gcMarkFilterVarsPinned()
 
 void GCInfo::gcInsertVarPtrDscSplit(varPtrDsc* desc, varPtrDsc* begin)
 {
+    assert(compiler->UsesFunclets());
+
 #ifndef JIT32_GCENCODER
     (void)begin;
     desc->vpdNext = gcVarPtrList;
@@ -335,6 +332,8 @@ void GCInfo::gcDumpVarPtrDsc(varPtrDsc* desc)
     const GCtype gcType = (desc->vpdVarNum & byref_OFFSET_FLAG) ? GCT_BYREF : GCT_GCREF;
     const bool   isPin  = (desc->vpdVarNum & pinned_OFFSET_FLAG) != 0;
 
+    assert(compiler->UsesFunclets());
+
     printf("[%08X] %s%s var at [%s", dspPtr(desc), GCtypeStr(gcType), isPin ? "pinned-ptr" : "",
            compiler->isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
 
@@ -352,8 +351,6 @@ void GCInfo::gcDumpVarPtrDsc(varPtrDsc* desc)
 
 #endif // DEBUG
 
-#endif // !defined(JIT32_GCENCODER) || defined(FEATURE_EH_FUNCLETS)
-
 #ifdef JIT32_GCENCODER
 
 #include "emit.h"
@@ -433,12 +430,13 @@ static void regenLog(unsigned encoding, InfoHdr* header, InfoHdr* state)
 
     EnterCriticalSection(&logFileLock);
 
-    fprintf(logFile, "InfoHdr( %2d, %2d, %1d, %1d, %1d,"
-                     " %1d, %1d, %1d, %1d, %1d,"
-                     " %1d, %1d, %1d, %1d, %1d, %1d,"
-                     " %1d, %1d, %1d,"
-                     " %1d, %2d, %2d,"
-                     " %2d, %2d, %2d, %2d, %2d, %2d), \n",
+    fprintf(logFile,
+            "InfoHdr( %2d, %2d, %1d, %1d, %1d,"
+            " %1d, %1d, %1d, %1d, %1d,"
+            " %1d, %1d, %1d, %1d, %1d, %1d,"
+            " %1d, %1d, %1d,"
+            " %1d, %2d, %2d,"
+            " %2d, %2d, %2d, %2d, %2d, %2d), \n",
             state->prologSize, state->epilogSize, state->epilogCount, state->epilogAtEnd, state->ediSaved,
             state->esiSaved, state->ebxSaved, state->ebpSaved, state->ebpFrame, state->interruptible,
             state->doubleAlign, state->security, state->handlers, state->localloc, state->editNcontinue, state->varargs,
@@ -1462,7 +1460,6 @@ size_t GCInfo::gcInfoBlockHdrSave(
 #endif
 
     /* Write the method size first (using between 1 and 5 bytes) */
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (compiler->verbose)
@@ -1564,9 +1561,9 @@ size_t GCInfo::gcInfoBlockHdrSave(
     header->syncStartOffset = INVALID_SYNC_OFFSET;
     header->syncEndOffset   = INVALID_SYNC_OFFSET;
 
-#ifndef UNIX_X86_ABI
+#if defined(FEATURE_EH_WINDOWS_X86)
     // JIT is responsible for synchronization on funclet-based EH model that x86/Linux uses.
-    if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+    if (!compiler->UsesFunclets() && compiler->info.compFlags & CORINFO_FLG_SYNCH)
     {
         assert(compiler->syncStartEmitCookie != nullptr);
         header->syncStartOffset = compiler->GetEmitter()->emitCodeOffset(compiler->syncStartEmitCookie, 0);
@@ -1817,7 +1814,7 @@ static int (*zeroFunc)() = zeroFN;
  */
 
 typedef unsigned pasMaskType;
-#define BITS_IN_pasMask (BITS_PER_BYTE * sizeof(pasMaskType))
+#define BITS_IN_pasMask     (BITS_PER_BYTE * sizeof(pasMaskType))
 #define HIGHEST_pasMask_BIT (((pasMaskType)0x1) << (BITS_IN_pasMask - 1))
 
 //-----------------------------------------------------------------------------
@@ -1850,8 +1847,8 @@ class PendingArgsStack
     // Use these in the case where there actually are more ptrs than pasArgMask
     unsigned pasEnumGCoffsCount();
 #define pasENUM_START ((unsigned)-1)
-#define pasENUM_LAST ((unsigned)-2)
-#define pasENUM_END ((unsigned)-3)
+#define pasENUM_LAST  ((unsigned)-2)
+#define pasENUM_END   ((unsigned)-3)
     unsigned pasEnumGCoffs(unsigned iter, unsigned* offs);
 
 protected:
@@ -2319,8 +2316,8 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un
 
     if (header.varPtrTableSize != 0)
     {
-#if !defined(FEATURE_EH_FUNCLETS)
-        if (keepThisAlive)
+#if defined(FEATURE_EH_WINDOWS_X86)
+        if (!compiler->UsesFunclets() && keepThisAlive)
         {
             // Encoding of untracked variables does not support reporting
             // "this". So report it as a tracked variable with a liveness
@@ -2331,7 +2328,7 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un
             unsigned varOffs = compiler->lvaTable[compiler->info.compThisArg].GetStackOffset();
 
             /* For negative stack offsets we must reset the low bits,
-                * take abs and then set them back */
+             * take abs and then set them back */
 
             varOffs = abs(static_cast<int>(varOffs));
             varOffs |= this_OFFSET_FLAG;
@@ -2344,7 +2341,7 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un
             dest += (sz & mask);
             totalSize += sz;
         }
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
         /* We'll use a delta encoding for the lifetime offsets */
 
@@ -3285,7 +3282,7 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un
 
                     assert(regMask || argMask || callArgCnt || pasStk.pasCurDepth());
 
-// Emit IPtrMask if needed
+                    // Emit IPtrMask if needed
 
 #define CHK_NON_INTRPT_ESP_IPtrMask                                                                                    \
                                                                                                                        \
@@ -3571,7 +3568,7 @@ size_t GCInfo::gcInfoBlockHdrDump(const BYTE* table, InfoHdr* header, unsigned*
 #ifdef DEBUG
     gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM)
 #else
-    gcDump.gcPrintf       = printf;
+    gcDump.gcPrintf = printf;
 #endif
 
     printf("Method info block:\n");
@@ -3590,7 +3587,7 @@ size_t GCInfo::gcDumpPtrTable(const BYTE* table, const InfoHdr& header, unsigned
 #ifdef DEBUG
     gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM)
 #else
-    gcDump.gcPrintf       = printf;
+    gcDump.gcPrintf = printf;
 #endif
 
     return gcDump.DumpGCTable(table, header, methodSize, verifyGCTables);
@@ -3608,7 +3605,7 @@ void GCInfo::gcFindPtrsInFrame(const void* infoBlock, const void* codeBlock, uns
 #ifdef DEBUG
     gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM)
 #else
-    gcDump.gcPrintf       = printf;
+    gcDump.gcPrintf = printf;
 #endif
 
     gcDump.DumpPtrsInFrame((PTR_CBYTE)infoBlock, (const BYTE*)codeBlock, offs, verifyGCTables);
@@ -3646,7 +3643,8 @@ class GcInfoEncoderWithLogging
 
 public:
     GcInfoEncoderWithLogging(GcInfoEncoder* gcInfoEncoder, bool verbose)
-        : m_gcInfoEncoder(gcInfoEncoder), m_doLogging(verbose INDEBUG(|| JitConfig.JitGCInfoLogging() != 0))
+        : m_gcInfoEncoder(gcInfoEncoder)
+        , m_doLogging(verbose INDEBUG(|| JitConfig.JitGCInfoLogging() != 0))
     {
     }
 
@@ -3960,7 +3958,6 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz
         gcInfoEncoderWithLog->SetPrologSize(prologSize);
     }
 
-#if defined(FEATURE_EH_FUNCLETS)
     if (compiler->lvaPSPSym != BAD_VAR_NUM)
     {
 #ifdef TARGET_AMD64
@@ -3979,8 +3976,6 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz
     }
 #endif // TARGET_AMD64
 
-#endif // FEATURE_EH_FUNCLETS
-
 #ifdef TARGET_ARMARCH
     if (compiler->codeGen->GetHasTailCalls())
     {
@@ -4024,7 +4019,8 @@ struct InterruptibleRangeReporter
     Encoder* gcInfoEncoderWithLog;
 
     InterruptibleRangeReporter(unsigned _prevStart, Encoder* _gcInfo)
-        : prevStart(_prevStart), gcInfoEncoderWithLog(_gcInfo)
+        : prevStart(_prevStart)
+        , gcInfoEncoderWithLog(_gcInfo)
     {
     }
 
@@ -4109,7 +4105,6 @@ void GCInfo::gcMakeRegPtrTable(
                 // pointers" section of the GC info even if lvTracked==true
 
                 // Has this argument been fully enregistered?
-                CLANG_FORMAT_COMMENT_ANCHOR;
 
                 if (!varDsc->lvOnFrame)
                 {
@@ -4138,7 +4133,6 @@ void GCInfo::gcMakeRegPtrTable(
             }
 
             // If we haven't continued to the next variable, we should report this as an untracked local.
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
             GcSlotFlags flags = GC_SLOT_UNTRACKED;
 
@@ -4698,8 +4692,8 @@ void GCInfo::gcMakeVarPtrTable(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode
     // unused by alignment
     C_ASSERT((OFFSET_MASK + 1) <= sizeof(int));
 
-#ifdef DEBUG
-    if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+#if defined(DEBUG) && defined(JIT32_GCENCODER) && defined(FEATURE_EH_WINDOWS_X86)
+    if (!compiler->UsesFunclets() && mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
     {
         // Tracked variables can't be pinned, and the encoding takes
         // advantage of that by using the same bit for 'pinned' and 'this'
@@ -4712,7 +4706,7 @@ void GCInfo::gcMakeVarPtrTable(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode
             assert((flags & this_OFFSET_FLAG) == 0);
         }
     }
-#endif // DEBUG
+#endif
 
     // Only need to do this once, and only if we have EH.
     if ((mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS) && compiler->ehAnyFunclets())
@@ -4793,7 +4787,7 @@ void GCInfo::gcInfoRecordGCStackArgLive(GcInfoEncoder* gcInfoEncoder, MakeRegPtr
 
     StackSlotIdKey sskey(genStackPtr->rpdPtrArg, false,
                          GcSlotFlags(genStackPtr->rpdGCtypeGet() == GCT_BYREF ? GC_SLOT_INTERIOR : GC_SLOT_BASE));
-    GcSlotId varSlotId;
+    GcSlotId       varSlotId;
     if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
     {
         if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
@@ -4841,8 +4835,8 @@ void GCInfo::gcInfoRecordGCStackArgsDead(GcInfoEncoder* gcInfoEncoder,
 
         StackSlotIdKey sskey(genRegPtrTemp->rpdPtrArg, false,
                              genRegPtrTemp->rpdGCtypeGet() == GCT_BYREF ? GC_SLOT_INTERIOR : GC_SLOT_BASE);
-        GcSlotId varSlotId;
-        bool     b = m_stackSlotMap->Lookup(sskey, &varSlotId);
+        GcSlotId       varSlotId;
+        bool           b = m_stackSlotMap->Lookup(sskey, &varSlotId);
         assert(b); // Should have been added in the first pass.
         // Live until the call.
         gcInfoEncoderWithLog->SetSlotState(instrOffset, varSlotId, GC_SLOT_DEAD);
diff --git a/src/coreclr/jit/gcinfo.cpp b/src/coreclr/jit/gcinfo.cpp
index 64988ec8954e..8fc398a1d1b3 100644
--- a/src/coreclr/jit/gcinfo.cpp
+++ b/src/coreclr/jit/gcinfo.cpp
@@ -46,7 +46,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 */
 
-GCInfo::GCInfo(Compiler* theCompiler) : compiler(theCompiler)
+GCInfo::GCInfo(Compiler* theCompiler)
+    : compiler(theCompiler)
 {
     regSet         = nullptr;
     gcVarPtrList   = nullptr;
@@ -242,8 +243,8 @@ GCInfo::WriteBarrierForm GCInfo::gcIsWriteBarrierCandidate(GenTreeStoreInd* stor
     }
 
     // Ignore any assignments of NULL or nongc object
-    GenTree* const data = store->Data()->gtSkipReloadOrCopy();
-    if (data->IsIntegralConst(0) || data->IsIconHandle(GTF_ICON_OBJ_HDL))
+    GenTree* const value = store->Data()->gtSkipReloadOrCopy();
+    if (value->IsIntegralConst(0) || value->IsIconHandle(GTF_ICON_OBJ_HDL))
     {
         return WBF_NoBarrier;
     }
@@ -566,7 +567,7 @@ void GCInfo::gcCountForHeader(UNALIGNED unsigned int* pUntrackedCount, UNALIGNED
 //
 // Arguments:
 //   varNum - the variable number to check;
-//   pKeepThisAlive - if !FEATURE_EH_FUNCLETS and the argument != nullptr remember
+//   pKeepThisAlive - if !UsesFunclets() and the argument != nullptr remember
 //   if `this` should be kept alive and considered tracked.
 //
 // Return value:
@@ -615,16 +616,16 @@ bool GCInfo::gcIsUntrackedLocalOrNonEnregisteredArg(unsigned varNum, bool* pKeep
         }
     }
 
-#if !defined(FEATURE_EH_FUNCLETS)
-    if (compiler->lvaIsOriginalThisArg(varNum) && compiler->lvaKeepAliveAndReportThis())
+#if defined(FEATURE_EH_WINDOWS_X86)
+    if (!compiler->UsesFunclets() && compiler->lvaIsOriginalThisArg(varNum) && compiler->lvaKeepAliveAndReportThis())
     {
         // "this" is in the untracked variable area, but encoding of untracked variables does not support reporting
         // "this". So report it as a tracked variable with a liveness extending over the entire method.
         //
         // TODO-x86-Cleanup: the semantic here is not clear, it would be useful to check different cases and
         // add a description where "this" is saved and how it is tracked in each of them:
-        // 1) when FEATURE_EH_FUNCLETS defined (x86 Linux);
-        // 2) when FEATURE_EH_FUNCLETS not defined, lvaKeepAliveAndReportThis == true, compJmpOpUsed == true;
+        // 1) when UsesFunclets() == true (x86 Linux);
+        // 2) when UsesFunclets() == false, lvaKeepAliveAndReportThis == true, compJmpOpUsed == true;
         // 3) when there is regPtrDsc for "this", but keepThisAlive == true;
         // etc.
 
@@ -634,7 +635,7 @@ bool GCInfo::gcIsUntrackedLocalOrNonEnregisteredArg(unsigned varNum, bool* pKeep
         }
         return false;
     }
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
     return true;
 }
 
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 706a11fe881c..ead6b4d83c21 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -21,13 +21,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 /*****************************************************************************/
 
 const unsigned char GenTree::gtOperKindTable[] = {
-#define GTNODE(en, st, cm, ivn, ok) ((ok)&GTK_MASK) + GTK_COMMUTE *cm,
+#define GTNODE(en, st, cm, ivn, ok) ((ok) & GTK_MASK) + GTK_COMMUTE *cm,
 #include "gtlist.h"
 };
 
 #ifdef DEBUG
 const GenTreeDebugOperKind GenTree::gtDebugOperKindTable[] = {
-#define GTNODE(en, st, cm, ivn, ok) static_cast<GenTreeDebugOperKind>((ok)&DBK_MASK),
+#define GTNODE(en, st, cm, ivn, ok) static_cast<GenTreeDebugOperKind>((ok) & DBK_MASK),
 #include "gtlist.h"
 };
 #endif // DEBUG
@@ -78,7 +78,8 @@ struct IndentStack
     const char**    indents;
 
     // Constructor for IndentStack.  Uses 'compiler' to determine the mode of printing.
-    IndentStack(Compiler* compiler) : stack(compiler->getAllocator(CMK_DebugOnly))
+    IndentStack(Compiler* compiler)
+        : stack(compiler->getAllocator(CMK_DebugOnly))
     {
         if (compiler->asciiTrees)
         {
@@ -252,7 +253,6 @@ void GenTree::InitNodeSize()
     GenTree::s_gtNodeSizes[GT_FIELD_ADDR]    = TREE_NODE_SZ_LARGE;
     GenTree::s_gtNodeSizes[GT_CMPXCHG]       = TREE_NODE_SZ_LARGE;
     GenTree::s_gtNodeSizes[GT_QMARK]         = TREE_NODE_SZ_LARGE;
-    GenTree::s_gtNodeSizes[GT_STORE_DYN_BLK] = TREE_NODE_SZ_LARGE;
     GenTree::s_gtNodeSizes[GT_INTRINSIC]     = TREE_NODE_SZ_LARGE;
     GenTree::s_gtNodeSizes[GT_ALLOCOBJ]      = TREE_NODE_SZ_LARGE;
 #if USE_HELPERS_FOR_INT_DIV
@@ -318,7 +318,6 @@ void GenTree::InitNodeSize()
     static_assert_no_msg(sizeof(GenTreeStoreInd)     <= TREE_NODE_SZ_SMALL);
     static_assert_no_msg(sizeof(GenTreeAddrMode)     <= TREE_NODE_SZ_SMALL);
     static_assert_no_msg(sizeof(GenTreeBlk)          <= TREE_NODE_SZ_SMALL);
-    static_assert_no_msg(sizeof(GenTreeStoreDynBlk)  <= TREE_NODE_SZ_LARGE); // *** large node
     static_assert_no_msg(sizeof(GenTreeRetExpr)      <= TREE_NODE_SZ_LARGE); // *** large node
     static_assert_no_msg(sizeof(GenTreeILOffset)     <= TREE_NODE_SZ_SMALL);
     static_assert_no_msg(sizeof(GenTreePhiArg)       <= TREE_NODE_SZ_SMALL);
@@ -733,10 +732,6 @@ ClassLayout* GenTree::GetLayout(Compiler* compiler) const
             return AsHWIntrinsic()->GetLayout(compiler);
 #endif // FEATURE_HW_INTRINSICS
 
-        case GT_MKREFANY:
-            structHnd = compiler->impGetRefAnyClass();
-            break;
-
         case GT_CALL:
             structHnd = AsCall()->gtRetClsHnd;
             break;
@@ -896,7 +891,6 @@ int GenTree::GetRegisterDstCount(Compiler* compiler) const
         // A MultiRegOp is a GT_MUL_LONG, GT_PUTARG_REG, or GT_BITCAST.
         // For the latter two (ARM-only), they only have multiple registers if they produce a long value
         // (GT_MUL_LONG always produces a long value).
-        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef TARGET_ARM
         return (TypeGet() == TYP_LONG) ? 2 : 1;
 #else
@@ -1810,13 +1804,9 @@ regNumber CallArgs::GetCustomRegister(Compiler* comp, CorInfoCallConvExtension c
             return REG_LNGARG_HI;
 #endif
         case WellKnownArg::RetBuffer:
-            if (hasFixedRetBuffReg())
+            if (hasFixedRetBuffReg(cc))
             {
-                // Windows does not use fixed ret buff arg for instance calls, but does otherwise.
-                if (!TargetOS::IsWindows || !callConvIsInstanceMethodCallConv(cc))
-                {
-                    return theFixedRetBuffReg();
-                }
+                return theFixedRetBuffReg(cc);
             }
 
             break;
@@ -1847,6 +1837,17 @@ regNumber CallArgs::GetCustomRegister(Compiler* comp, CorInfoCallConvExtension c
         case WellKnownArg::DispatchIndirectCallTarget:
             return REG_DISPATCH_INDIRECT_CALL_ADDR;
 #endif
+
+#ifdef SWIFT_SUPPORT
+        case WellKnownArg::SwiftError:
+            assert(cc == CorInfoCallConvExtension::Swift);
+            return REG_SWIFT_ERROR;
+
+        case WellKnownArg::SwiftSelf:
+            assert(cc == CorInfoCallConvExtension::Swift);
+            return REG_SWIFT_SELF;
+#endif // SWIFT_SUPPORT
+
         default:
             break;
     }
@@ -2080,7 +2081,7 @@ void CallArgs::Remove(CallArg* arg)
     assert(!"Did not find arg to remove in CallArgs::Remove");
 }
 
-#if TARGET_WASM
+#ifdef TARGET_WASM
 //------------------------------------------------------------------------
 // MoveLateToEarly: Sets all late nodes as the early nodes
 //
@@ -2101,6 +2102,74 @@ void CallArgs::MoveLateToEarly()
     m_lateHead = nullptr;
 }
 #endif
+#ifdef TARGET_XARCH
+//---------------------------------------------------------------
+// NeedsVzeroupper: Determines if the call needs a vzeroupper emitted before it is invoked
+//
+// Parameters:
+//   comp - the compiler
+//
+// Returns:
+//   true if a vzeroupper needs to be emitted; otherwise, false
+//
+bool GenTreeCall::NeedsVzeroupper(Compiler* comp)
+{
+    bool needsVzeroupper = false;
+
+    if (IsPInvoke() && comp->canUseVexEncoding())
+    {
+        // The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states:
+        //   Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean
+        //   between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a
+        //   VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX
+        //   register) and before any call to an unknown function.
+
+        switch (gtCallType)
+        {
+            case CT_USER_FUNC:
+            case CT_INDIRECT:
+            {
+                // Since P/Invokes are not compiled by the runtime, they are typically "unknown" since they
+                // may use the legacy encoding. This includes both CT_USER_FUNC and CT_INDIRECT
+
+                needsVzeroupper = true;
+                break;
+            }
+
+            case CT_HELPER:
+            {
+                // Most helpers are well known to not use any floating-point or SIMD logic internally, but
+                // a few do exist so we need to ensure they are handled. They are identified by taking or
+                // returning a floating-point or SIMD type, regardless of how it is actually passed/returned.
+
+                if (varTypeUsesFloatReg(this))
+                {
+                    needsVzeroupper = true;
+                }
+                else
+                {
+                    for (CallArg& arg : gtArgs.Args())
+                    {
+                        if (varTypeUsesFloatReg(arg.GetSignatureType()))
+                        {
+                            needsVzeroupper = true;
+                            break;
+                        }
+                    }
+                }
+                break;
+            }
+
+            default:
+            {
+                unreached();
+            }
+        }
+    }
+
+    return needsVzeroupper;
+}
+#endif // TARGET_XARCH
 
 //---------------------------------------------------------------
 // GetOtherRegMask: Get the reg mask of gtOtherRegs of call node
@@ -2449,9 +2518,6 @@ int GenTreeCall::GetNonStandardAddedArgCount(Compiler* compiler) const
 //-------------------------------------------------------------------------
 // TreatAsShouldHaveRetBufArg:
 //
-// Arguments:
-//     compiler, the compiler instance so that we can call eeGetHelperNum
-//
 // Return Value:
 //     Returns true if we treat the call as if it has a retBuf argument
 //     This method may actually have a retBuf argument
@@ -2467,7 +2533,7 @@ int GenTreeCall::GetNonStandardAddedArgCount(Compiler* compiler) const
 //     aren't actually defined to return a struct, so they don't expect
 //     their RetBuf to be passed in x8, instead they  expect it in x0.
 //
-bool GenTreeCall::TreatAsShouldHaveRetBufArg(Compiler* compiler) const
+bool GenTreeCall::TreatAsShouldHaveRetBufArg() const
 {
     if (ShouldHaveRetBufArg())
     {
@@ -2479,27 +2545,22 @@ bool GenTreeCall::TreatAsShouldHaveRetBufArg(Compiler* compiler) const
     //
     if (IsHelperCall() && (gtReturnType == TYP_STRUCT))
     {
-        // There are three possible helper calls that use this path:
-        //  CORINFO_HELP_GETFIELDSTRUCT,  CORINFO_HELP_UNBOX_NULLABLE
-        //  CORINFO_HELP_PINVOKE_CALLI
-        CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(gtCallMethHnd);
+        // There are two helpers that return structs through an argument,
+        // ignoring the ABI, but where we want to handle them during import as
+        // if they have return buffers:
+        //   - CORINFO_HELP_GETFIELDSTRUCT
+        //   - CORINFO_HELP_UNBOX_NULLABLE
+        //
+        // Other TYP_STRUCT returning helpers follow the ABI normally and
+        // should return true for `ShouldHaveRetBufArg` if they need a retbuf
+        // arg, so when we get here, those cases never need retbufs. They
+        // include:
+        //   - CORINFO_HELP_PINVOKE_CALLI
+        //   - CORINFO_HELP_DISPATCH_INDIRECT_CALL
+        //
+        CorInfoHelpFunc helpFunc = Compiler::eeGetHelperNum(gtCallMethHnd);
 
-        if (helpFunc == CORINFO_HELP_GETFIELDSTRUCT)
-        {
-            return true;
-        }
-        else if (helpFunc == CORINFO_HELP_UNBOX_NULLABLE)
-        {
-            return true;
-        }
-        else if (helpFunc == CORINFO_HELP_PINVOKE_CALLI)
-        {
-            return false;
-        }
-        else
-        {
-            assert(!"Unexpected JIT helper in TreatAsShouldHaveRetBufArg");
-        }
+        return (helpFunc == CORINFO_HELP_GETFIELDSTRUCT) || (helpFunc == CORINFO_HELP_UNBOX_NULLABLE);
     }
     return false;
 }
@@ -2858,6 +2919,7 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK)
 
             case GT_NOP:
             case GT_LABEL:
+            case GT_SWIFT_ERROR:
                 return true;
 
             default:
@@ -3108,11 +3170,6 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK)
                    Compare(op1->AsCmpXchg()->Data(), op2->AsCmpXchg()->Data()) &&
                    Compare(op1->AsCmpXchg()->Comparand(), op2->AsCmpXchg()->Comparand());
 
-        case GT_STORE_DYN_BLK:
-            return Compare(op1->AsStoreDynBlk()->Addr(), op2->AsStoreDynBlk()->Addr()) &&
-                   Compare(op1->AsStoreDynBlk()->Data(), op2->AsStoreDynBlk()->Data()) &&
-                   Compare(op1->AsStoreDynBlk()->gtDynamicSize, op2->AsStoreDynBlk()->gtDynamicSize);
-
         default:
             assert(!"unexpected operator");
     }
@@ -3201,7 +3258,8 @@ bool Compiler::gtHasLocalsWithAddrOp(GenTree* tree)
             DoLclVarsOnly = true,
         };
 
-        LocalsWithAddrOpVisitor(Compiler* comp) : GenTreeVisitor(comp)
+        LocalsWithAddrOpVisitor(Compiler* comp)
+            : GenTreeVisitor(comp)
         {
         }
 
@@ -3242,7 +3300,8 @@ bool Compiler::gtHasAddressExposedLocals(GenTree* tree)
             DoLclVarsOnly = true,
         };
 
-        Visitor(Compiler* comp) : GenTreeVisitor(comp)
+        Visitor(Compiler* comp)
+            : GenTreeVisitor(comp)
         {
         }
 
@@ -3331,7 +3390,7 @@ unsigned Compiler::gtHashValue(GenTree* tree)
 #ifdef HOST_64BIT
                 add = bits;
 #else // 32-bit host
-                add      = genTreeHashAdd(uhi32(bits), ulo32(bits));
+                add = genTreeHashAdd(uhi32(bits), ulo32(bits));
 #endif
                 break;
             case GT_CNS_DBL:
@@ -3341,7 +3400,7 @@ unsigned Compiler::gtHashValue(GenTree* tree)
 #ifdef HOST_64BIT
                 add = bits;
 #else // 32-bit host
-                add      = genTreeHashAdd(uhi32(bits), ulo32(bits));
+                add = genTreeHashAdd(uhi32(bits), ulo32(bits));
 #endif
                 break;
             }
@@ -3359,6 +3418,13 @@ unsigned Compiler::gtHashValue(GenTree* tree)
                 {
 #if defined(FEATURE_SIMD)
 #if defined(TARGET_XARCH)
+                    case TYP_MASK:
+                    {
+                        add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[1]);
+                        add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[0]);
+                        break;
+                    }
+
                     case TYP_SIMD64:
                     {
                         add = genTreeHashAdd(ulo32(add), vecCon->gtSimdVal.u32[15]);
@@ -3663,12 +3729,6 @@ unsigned Compiler::gtHashValue(GenTree* tree)
             hash = genTreeHashAdd(hash, gtHashValue(tree->AsCmpXchg()->Comparand()));
             break;
 
-        case GT_STORE_DYN_BLK:
-            hash = genTreeHashAdd(hash, gtHashValue(tree->AsStoreDynBlk()->Data()));
-            hash = genTreeHashAdd(hash, gtHashValue(tree->AsStoreDynBlk()->Addr()));
-            hash = genTreeHashAdd(hash, gtHashValue(tree->AsStoreDynBlk()->gtDynamicSize));
-            break;
-
         default:
 #ifdef DEBUG
             gtDispTree(tree);
@@ -4479,12 +4539,6 @@ bool Compiler::gtGetIndNodeCost(GenTreeIndir* node, int* pCostEx, int* pCostSz)
     {
         // See if we can form a complex addressing mode.
         bool doAddrMode = true;
-
-        // TODO-1stClassStructs: delete once IND<struct> nodes are no more.
-        if (node->TypeGet() == TYP_STRUCT)
-        {
-            doAddrMode = false;
-        }
 #ifdef TARGET_ARM64
         if (node->IsVolatile())
         {
@@ -4573,7 +4627,7 @@ bool Compiler::gtCanSwapOrder(GenTree* firstNode, GenTree* secondNode)
         else
         {
             // No side effects in op2 - we can swap iff op1 has no way of modifying op2,
-            // i.e. through byref assignments or calls or op2 is a constant.
+            // i.e. through indirect stores or calls or op2 is a constant.
 
             if (firstNode->gtFlags & strictEffects & GTF_PERSISTENT_SIDE_EFFECTS)
             {
@@ -4687,8 +4741,6 @@ bool genCreateAddrMode(Compiler* compiler,
        constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
        here if we find a scaled index.
     */
-    CLANG_FORMAT_COMMENT_ANCHOR;
-
     assert(mul == 0);
 
     /* Special case: keep constants as 'op2' */
@@ -5298,7 +5350,6 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ
         //   [base + idx * mul + cns]  // mul can be 0, 2, 4, or 8
         // Note that mul == 0 is semantically equivalent to mul == 1.
         // Note that cns can be zero.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
         assert((base != nullptr) || (idx != nullptr && mul >= 2));
 
@@ -6040,7 +6091,6 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                         case NI_System_Math_Cosh:
                         case NI_System_Math_Exp:
                         case NI_System_Math_Floor:
-                        case NI_System_Math_FMod:
                         case NI_System_Math_FusedMultiplyAdd:
                         case NI_System_Math_ILogB:
                         case NI_System_Math_Log:
@@ -6129,9 +6179,8 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                     costSz = 2;
                     break;
 
-                case GT_MKREFANY:
                 case GT_BLK:
-                    // We estimate the cost of a GT_BLK or GT_MKREFANY to be two loads (GT_INDs)
+                    // We estimate the cost of a GT_BLK to be two loads (GT_INDs)
                     costEx = 2 * IND_COST_EX;
                     costSz = 2 * 2;
                     break;
@@ -6173,7 +6222,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                     {
                         // Store to an enregistered local.
                         costEx = op1->GetCostEx();
-                        costSz = max(3, op1->GetCostSz()); // 3 is an estimate for a reg-reg move.
+                        costSz = max(3, (int)op1->GetCostSz()); // 3 is an estimate for a reg-reg move.
                         goto DONE;
                     }
 
@@ -6575,7 +6624,6 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
 
                     case GT_QMARK:
                     case GT_COLON:
-                    case GT_MKREFANY:
                         break;
 
                     default:
@@ -6748,10 +6796,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                 assert(use.GetNode()->GetCostEx() == 0);
                 assert(use.GetNode()->GetCostSz() == 0);
             }
-            // Give it a level of 2, just to be sure that it's greater than the LHS of
-            // the parent assignment and the PHI gets evaluated first in linear order.
-            // See also SsaBuilder::InsertPhi and SsaBuilder::AddPhiArg.
-            level  = 2;
+            level  = 1;
             costEx = 0;
             costSz = 0;
             break;
@@ -6797,22 +6842,6 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
         }
         break;
 
-        case GT_STORE_DYN_BLK:
-            level  = gtSetEvalOrder(tree->AsStoreDynBlk()->Addr());
-            costEx = tree->AsStoreDynBlk()->Addr()->GetCostEx();
-            costSz = tree->AsStoreDynBlk()->Addr()->GetCostSz();
-
-            lvl2  = gtSetEvalOrder(tree->AsStoreDynBlk()->Data());
-            level = max(level, lvl2);
-            costEx += tree->AsStoreDynBlk()->Data()->GetCostEx();
-            costSz += tree->AsStoreDynBlk()->Data()->GetCostSz();
-
-            lvl2  = gtSetEvalOrder(tree->AsStoreDynBlk()->gtDynamicSize);
-            level = max(level, lvl2);
-            costEx += tree->AsStoreDynBlk()->gtDynamicSize->GetCostEx();
-            costSz += tree->AsStoreDynBlk()->gtDynamicSize->GetCostSz();
-            break;
-
         case GT_SELECT:
             level  = gtSetEvalOrder(tree->AsConditional()->gtCond);
             costEx = tree->AsConditional()->gtCond->GetCostEx();
@@ -6884,7 +6913,9 @@ bool Compiler::gtMayHaveStoreInterference(GenTree* treeWithStores, GenTree* tree
             DoPreOrder = true,
         };
 
-        Visitor(Compiler* compiler, GenTree* readTree) : GenTreeVisitor(compiler), m_readTree(readTree)
+        Visitor(Compiler* compiler, GenTree* readTree)
+            : GenTreeVisitor(compiler)
+            , m_readTree(readTree)
         {
         }
 
@@ -6945,7 +6976,9 @@ bool Compiler::gtTreeHasLocalRead(GenTree* tree, unsigned lclNum)
         unsigned   m_lclNum;
         LclVarDsc* m_lclDsc;
 
-        Visitor(Compiler* compiler, unsigned lclNum) : GenTreeVisitor(compiler), m_lclNum(lclNum)
+        Visitor(Compiler* compiler, unsigned lclNum)
+            : GenTreeVisitor(compiler)
+            , m_lclNum(lclNum)
         {
             m_lclDsc = compiler->lvaGetDesc(lclNum);
         }
@@ -7115,9 +7148,9 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse)
         case GT_START_NONGC:
         case GT_START_PREEMPTGC:
         case GT_PROF_HOOK:
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
         case GT_END_LFIN:
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
         case GT_PHI_ARG:
         case GT_JMPTABLE:
         case GT_PHYSREG:
@@ -7126,6 +7159,7 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse)
         case GT_PINVOKE_EPILOG:
         case GT_IL_OFFSET:
         case GT_NOP:
+        case GT_SWIFT_ERROR:
             return false;
 
         // Standard unary operators
@@ -7259,27 +7293,6 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse)
             return false;
         }
 
-        case GT_STORE_DYN_BLK:
-        {
-            GenTreeStoreDynBlk* const dynBlock = this->AsStoreDynBlk();
-            if (operand == dynBlock->gtOp1)
-            {
-                *pUse = &dynBlock->gtOp1;
-                return true;
-            }
-            if (operand == dynBlock->gtOp2)
-            {
-                *pUse = &dynBlock->gtOp2;
-                return true;
-            }
-            if (operand == dynBlock->gtDynamicSize)
-            {
-                *pUse = &dynBlock->gtDynamicSize;
-                return true;
-            }
-            return false;
-        }
-
         case GT_CALL:
         {
             GenTreeCall* const call = this->AsCall();
@@ -7439,7 +7452,6 @@ bool GenTree::OperRequiresAsgFlag() const
         case GT_STORE_LCL_FLD:
         case GT_STOREIND:
         case GT_STORE_BLK:
-        case GT_STORE_DYN_BLK:
         case GT_XADD:
         case GT_XORR:
         case GT_XAND:
@@ -7450,7 +7462,7 @@ bool GenTree::OperRequiresAsgFlag() const
             return true;
 
         // If the call has return buffer argument, it produced a definition and hence
-        // should be marked with assignment.
+        // should be marked with GTF_ASG.
         case GT_CALL:
             return AsCall()->IsOptimizingRetBufAsLocal();
 
@@ -7479,6 +7491,9 @@ bool GenTree::OperRequiresCallFlag(Compiler* comp) const
         case GT_KEEPALIVE:
             return true;
 
+        case GT_SWIFT_ERROR:
+            return true;
+
         case GT_INTRINSIC:
             return comp->IsIntrinsicImplementedByUserCall(this->AsIntrinsic()->gtIntrinsicName);
 
@@ -7549,7 +7564,6 @@ bool GenTree::OperIsImplicitIndir() const
         case GT_CMPXCHG:
         case GT_BLK:
         case GT_STORE_BLK:
-        case GT_STORE_DYN_BLK:
         case GT_BOX:
         case GT_ARR_ELEM:
         case GT_ARR_LENGTH:
@@ -7646,7 +7660,6 @@ ExceptionSetFlags GenTree::OperExceptions(Compiler* comp)
         case GT_BLK:
         case GT_NULLCHECK:
         case GT_STORE_BLK:
-        case GT_STORE_DYN_BLK:
         case GT_ARR_LENGTH:
         case GT_MDARR_LENGTH:
         case GT_MDARR_LOWER_BOUND:
@@ -7766,7 +7779,6 @@ bool GenTree::OperRequiresGlobRefFlag(Compiler* comp) const
 
         case GT_STOREIND:
         case GT_STORE_BLK:
-        case GT_STORE_DYN_BLK:
         case GT_XADD:
         case GT_XORR:
         case GT_XAND:
@@ -7775,6 +7787,7 @@ bool GenTree::OperRequiresGlobRefFlag(Compiler* comp) const
         case GT_CMPXCHG:
         case GT_MEMORYBARRIER:
         case GT_KEEPALIVE:
+        case GT_SWIFT_ERROR:
             return true;
 
         case GT_CALL:
@@ -7824,7 +7837,6 @@ bool GenTree::OperSupportsOrderingSideEffect() const
         case GT_STOREIND:
         case GT_NULLCHECK:
         case GT_STORE_BLK:
-        case GT_STORE_DYN_BLK:
         case GT_XADD:
         case GT_XORR:
         case GT_XAND:
@@ -7833,6 +7845,7 @@ bool GenTree::OperSupportsOrderingSideEffect() const
         case GT_CMPXCHG:
         case GT_MEMORYBARRIER:
         case GT_CATCH_ARG:
+        case GT_SWIFT_ERROR:
             return true;
         default:
             return false;
@@ -7948,7 +7961,7 @@ GenTree::VtablePtr GenTree::GetVtableForOper(genTreeOps oper)
     switch (oper)
     {
 
-// clang-format off
+        // clang-format off
 
 #define GTSTRUCT_0(nm, tag)                             /*handle explicitly*/
 #define GTSTRUCT_1(nm, tag)                             \
@@ -8010,8 +8023,8 @@ GenTree::VtablePtr GenTree::GetVtableForOper(genTreeOps oper)
         }
         break;
 
-        // We don't need to handle GTSTRUCT_N for LclVarCommon, since all those allowed opers are specified
-        // in their proper subtype. Similarly for GenTreeIndir.
+            // We don't need to handle GTSTRUCT_N for LclVarCommon, since all those allowed opers are specified
+            // in their proper subtype. Similarly for GenTreeIndir.
 
         default:
         {
@@ -8166,8 +8179,8 @@ GenTreeFlags Compiler::gtTokenToIconFlags(unsigned token)
 //    Returns a GT_IND node representing value at the address provided by 'addr'
 //
 // Notes:
-//    The GT_IND node is marked as non-faulting
-//    If the indType is GT_REF we also mark the indNode as GTF_GLOB_REF
+//    The GT_IND node is marked as non-faulting.
+//    If the indirection is not invariant, we also mark the indNode as GTF_GLOB_REF.
 //
 GenTree* Compiler::gtNewIndOfIconHandleNode(var_types indType, size_t addr, GenTreeFlags iconFlags, bool isInvariant)
 {
@@ -8176,9 +8189,7 @@ GenTree* Compiler::gtNewIndOfIconHandleNode(var_types indType, size_t addr, GenT
 
     if (isInvariant)
     {
-        assert(iconFlags != GTF_ICON_STATIC_HDL); // Pointer to a mutable class Static variable
-        assert(iconFlags != GTF_ICON_BBC_PTR);    // Pointer to a mutable basic block count value
-        assert(iconFlags != GTF_ICON_GLOBAL_PTR); // Pointer to mutable data from the VM state
+        assert(GenTree::HandleKindDataIsInvariant(iconFlags));
 
         // This indirection also is invariant.
         indirFlags |= GTF_IND_INVARIANT;
@@ -8696,28 +8707,51 @@ GenTree* Compiler::gtNewConWithPattern(var_types type, uint8_t pattern)
     }
 }
 
-GenTreeLclVar* Compiler::gtNewStoreLclVarNode(unsigned lclNum, GenTree* data)
+//------------------------------------------------------------------------
+// gtNewStoreLclVarNode: Create a local store node.
+//
+// Arguments:
+//    lclNum - Number of the local being stored to
+//    value  - Value to store
+//
+// Return Value:
+//    The created STORE_LCL_VAR node.
+//
+GenTreeLclVar* Compiler::gtNewStoreLclVarNode(unsigned lclNum, GenTree* value)
 {
     LclVarDsc*     varDsc = lvaGetDesc(lclNum);
     var_types      type   = varDsc->lvNormalizeOnLoad() ? varDsc->TypeGet() : genActualType(varDsc);
-    GenTreeLclVar* store  = new (this, GT_STORE_LCL_VAR) GenTreeLclVar(type, lclNum, data);
+    GenTreeLclVar* store  = new (this, GT_STORE_LCL_VAR) GenTreeLclVar(type, lclNum, value);
     store->gtFlags |= (GTF_VAR_DEF | GTF_ASG);
     if (varDsc->IsAddressExposed())
     {
         store->gtFlags |= GTF_GLOB_REF;
     }
 
-    gtInitializeStoreNode(store, data);
+    gtInitializeStoreNode(store, value);
 
     return store;
 }
 
+//------------------------------------------------------------------------
+// gtNewStoreLclFldNode: Create a local field store node.
+//
+// Arguments:
+//    lclNum - Number of the local being stored to
+//    type   - Type of the store
+//    layout - Struct layout of the store
+//    offset - Offset of the store
+//    value  - Value to store
+//
+// Return Value:
+//    The created STORE_LCL_FLD node.
+//
 GenTreeLclFld* Compiler::gtNewStoreLclFldNode(
-    unsigned lclNum, var_types type, ClassLayout* layout, unsigned offset, GenTree* data)
+    unsigned lclNum, var_types type, ClassLayout* layout, unsigned offset, GenTree* value)
 {
     assert((type == TYP_STRUCT) == (layout != nullptr));
 
-    GenTreeLclFld* store = new (this, GT_STORE_LCL_FLD) GenTreeLclFld(type, lclNum, offset, data, layout);
+    GenTreeLclFld* store = new (this, GT_STORE_LCL_FLD) GenTreeLclFld(type, lclNum, offset, value, layout);
     store->gtFlags |= (GTF_VAR_DEF | GTF_ASG);
     if (store->IsPartialLclFld(this))
     {
@@ -8728,7 +8762,7 @@ GenTreeLclFld* Compiler::gtNewStoreLclFldNode(
         store->gtFlags |= GTF_GLOB_REF;
     }
 
-    gtInitializeStoreNode(store, data);
+    gtInitializeStoreNode(store, value);
 
     return store;
 }
@@ -8831,7 +8865,7 @@ GenTreeLclVar* Compiler::gtNewLclvNode(unsigned lnum, var_types type DEBUGARG(IL
 {
     assert(type != TYP_VOID);
     // We need to ensure that all struct values are normalized.
-    // It might be nice to assert this in general, but we have assignments of int to long.
+    // It might be nice to assert this in general, but we have stores of int to long.
     if (varTypeIsStruct(type))
     {
         // Make an exception for implicit by-ref parameters during global morph, since
@@ -8877,7 +8911,7 @@ GenTreeLclVar* Compiler::gtNewLclVarNode(unsigned lclNum, var_types type)
 GenTreeLclVar* Compiler::gtNewLclLNode(unsigned lnum, var_types type DEBUGARG(IL_OFFSET offs))
 {
     // We need to ensure that all struct values are normalized.
-    // It might be nice to assert this in general, but we have assignments of int to long.
+    // It might be nice to assert this in general, but we have stores of int to long.
     if (varTypeIsStruct(type))
     {
         // Make an exception for implicit by-ref parameters during global morph, since
@@ -8988,28 +9022,28 @@ GenTreeFieldAddr* Compiler::gtNewFieldAddrNode(var_types type, CORINFO_FIELD_HAN
 //    store - The store node
 //    data  - The value to store
 //
-void Compiler::gtInitializeStoreNode(GenTree* store, GenTree* data)
+void Compiler::gtInitializeStoreNode(GenTree* store, GenTree* value)
 {
     // TODO-ASG: add asserts that the types match here.
-    assert(store->Data() == data);
+    assert(store->Data() == value);
 
 #if defined(FEATURE_SIMD)
 #ifndef TARGET_X86
     if (varTypeIsSIMD(store))
     {
         // TODO-ASG: delete this zero-diff quirk.
-        if (!data->IsCall() || !data->AsCall()->ShouldHaveRetBufArg())
+        if (!value->IsCall() || !value->AsCall()->ShouldHaveRetBufArg())
         {
-            // We want to track SIMD assignments as being intrinsics since they
-            // are functionally SIMD `mov` instructions and are more efficient
-            // when we don't promote, particularly when it occurs due to inlining.
+            // We want to track SIMD stores as being intrinsics since they are
+            // functionally SIMD `mov` instructions and are more efficient when
+            // we don't promote, particularly when it occurs due to inlining.
             SetOpLclRelatedToSIMDIntrinsic(store);
-            SetOpLclRelatedToSIMDIntrinsic(data);
+            SetOpLclRelatedToSIMDIntrinsic(value);
         }
     }
 #else  // TARGET_X86
     // TODO-Cleanup: merge into the all-arch.
-    if (varTypeIsSIMD(data) && data->OperIs(GT_HWINTRINSIC, GT_CNS_VEC))
+    if (varTypeIsSIMD(value) && value->OperIs(GT_HWINTRINSIC, GT_CNS_VEC))
     {
         SetOpLclRelatedToSIMDIntrinsic(store);
     }
@@ -9064,6 +9098,26 @@ GenTreeBlk* Compiler::gtNewBlkIndir(ClassLayout* layout, GenTree* addr, GenTreeF
     return blkNode;
 }
 
+//------------------------------------------------------------------------
+// gtNewMemoryBarrier: Create a memory barrier node
+//
+// Arguments:
+//    loadOnly - relaxes the full memory barrier to be load-only
+//
+// Return Value:
+//    The created GT_MEMORYBARRIER node.
+//
+GenTree* Compiler::gtNewMemoryBarrier(bool loadOnly)
+{
+    GenTree* tree = new (this, GT_MEMORYBARRIER) GenTree(GT_MEMORYBARRIER, TYP_VOID);
+    tree->gtFlags |= GTF_GLOB_REF | GTF_ASG;
+    if (loadOnly)
+    {
+        tree->gtFlags |= GTF_MEMORYBARRIER_LOAD;
+    }
+    return tree;
+}
+
 //------------------------------------------------------------------------------
 // gtNewIndir : Create an indirection node.
 //
@@ -9114,54 +9168,26 @@ GenTree* Compiler::gtNewLoadValueNode(var_types type, ClassLayout* layout, GenTr
 }
 
 //------------------------------------------------------------------------------
-// gtNewStoreBlkNode : Create an indirect struct store node.
+// gtNewStoreBlkNode: Create an indirect struct store node.
 //
 // Arguments:
 //    layout     - The struct layout
 //    addr       - Destination address
-//    data       - Value to store
+//    value      - Value to store
 //    indirFlags - Indirection flags
 //
 // Return Value:
 //    The created GT_STORE_BLK node.
 //
-GenTreeBlk* Compiler::gtNewStoreBlkNode(ClassLayout* layout, GenTree* addr, GenTree* data, GenTreeFlags indirFlags)
+GenTreeBlk* Compiler::gtNewStoreBlkNode(ClassLayout* layout, GenTree* addr, GenTree* value, GenTreeFlags indirFlags)
 {
     assert((indirFlags & GTF_IND_INVARIANT) == 0);
-    assert(data->IsInitVal() || ClassLayout::AreCompatible(layout, data->GetLayout(this)));
+    assert(value->IsInitVal() || ClassLayout::AreCompatible(layout, value->GetLayout(this)));
 
-    GenTreeBlk* store = new (this, GT_STORE_BLK) GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, addr, data, layout);
+    GenTreeBlk* store = new (this, GT_STORE_BLK) GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, addr, value, layout);
     store->gtFlags |= GTF_ASG;
     gtInitializeIndirNode(store, indirFlags);
-    gtInitializeStoreNode(store, data);
-
-    return store;
-}
-
-//------------------------------------------------------------------------------
-// gtNewStoreDynBlkNode : Create a dynamic block store node.
-//
-// Arguments:
-//    addr        - Destination address
-//    data        - Value to store (init val or indirection representing a location)
-//    dynamicSize - Node that computes number of bytes to store
-//    indirFlags  - Indirection flags
-//
-// Return Value:
-//    The created GT_STORE_DYN_BLK node.
-//
-GenTreeStoreDynBlk* Compiler::gtNewStoreDynBlkNode(GenTree*     addr,
-                                                   GenTree*     data,
-                                                   GenTree*     dynamicSize,
-                                                   GenTreeFlags indirFlags)
-{
-    assert((indirFlags & GTF_IND_INVARIANT) == 0);
-    assert(data->IsInitVal() || data->OperIs(GT_IND));
-
-    GenTreeStoreDynBlk* store = new (this, GT_STORE_DYN_BLK) GenTreeStoreDynBlk(addr, data, dynamicSize);
-    store->gtFlags |= GTF_ASG;
-    gtInitializeIndirNode(store, indirFlags);
-    gtInitializeStoreNode(store, data);
+    gtInitializeStoreNode(store, value);
 
     return store;
 }
@@ -9171,21 +9197,21 @@ GenTreeStoreDynBlk* Compiler::gtNewStoreDynBlkNode(GenTree*     addr,
 //
 // Arguments:
 //    type       - Type of the store
-//    addr       - Destionation address
-//    data       - Value to store
+//    addr       - Destination address
+//    value      - Value to store
 //    indirFlags - Indirection flags
 //
 // Return Value:
 //    The created GT_STOREIND node.
 //
-GenTreeStoreInd* Compiler::gtNewStoreIndNode(var_types type, GenTree* addr, GenTree* data, GenTreeFlags indirFlags)
+GenTreeStoreInd* Compiler::gtNewStoreIndNode(var_types type, GenTree* addr, GenTree* value, GenTreeFlags indirFlags)
 {
     assert(((indirFlags & GTF_IND_INVARIANT) == 0) && (type != TYP_STRUCT));
 
-    GenTreeStoreInd* store = new (this, GT_STOREIND) GenTreeStoreInd(type, addr, data);
+    GenTreeStoreInd* store = new (this, GT_STOREIND) GenTreeStoreInd(type, addr, value);
     store->gtFlags |= GTF_ASG;
     gtInitializeIndirNode(store, indirFlags);
-    gtInitializeStoreNode(store, data);
+    gtInitializeStoreNode(store, value);
 
     return store;
 }
@@ -9197,7 +9223,7 @@ GenTreeStoreInd* Compiler::gtNewStoreIndNode(var_types type, GenTree* addr, GenT
 //    type       - Type to store
 //    layout     - Struct layout for the store
 //    addr       - Destination address
-//    data       - Value to store
+//    value      - Value to store
 //    indirFlags - Indirection flags
 //
 // Return Value:
@@ -9205,7 +9231,7 @@ GenTreeStoreInd* Compiler::gtNewStoreIndNode(var_types type, GenTree* addr, GenT
 //    a compatible local.
 //
 GenTree* Compiler::gtNewStoreValueNode(
-    var_types type, ClassLayout* layout, GenTree* addr, GenTree* data, GenTreeFlags indirFlags)
+    var_types type, ClassLayout* layout, GenTree* addr, GenTree* value, GenTreeFlags indirFlags)
 {
     assert((type != TYP_STRUCT) || (layout != nullptr));
 
@@ -9216,18 +9242,18 @@ GenTree* Compiler::gtNewStoreValueNode(
         if ((varDsc->TypeGet() == type) &&
             ((type != TYP_STRUCT) || ClassLayout::AreCompatible(layout, varDsc->GetLayout())))
         {
-            return gtNewStoreLclVarNode(lclNum, data);
+            return gtNewStoreLclVarNode(lclNum, value);
         }
     }
 
     GenTree* store;
     if (type == TYP_STRUCT)
     {
-        store = gtNewStoreBlkNode(layout, addr, data, indirFlags);
+        store = gtNewStoreBlkNode(layout, addr, value, indirFlags);
     }
     else
     {
-        store = gtNewStoreIndNode(type, addr, data, indirFlags);
+        store = gtNewStoreIndNode(type, addr, value, indirFlags);
     }
 
     return store;
@@ -9277,10 +9303,9 @@ GenTree* Compiler::gtNewAtomicNode(genTreeOps oper, var_types type, GenTree* add
 //    value for the initblk.
 //
 // Notes:
-//    The initBlk MSIL instruction takes a byte value, which must be
-//    extended to the size of the assignment when an initBlk is transformed
-//    to an assignment of a primitive type.
-//    This performs the appropriate extension.
+//    The initBlk MSIL instruction takes a byte value, which must be extended
+//    to the size of the store when an initBlk is transformed to a store of
+//    a primitive type. This performs the appropriate extension.
 //
 void GenTreeIntCon::FixupInitBlkValue(var_types type)
 {
@@ -9504,7 +9529,7 @@ GenTree* Compiler::gtNewPutArgReg(var_types type, GenTree* arg, regNumber argReg
         node->AsMultiRegOp()->gtOtherReg = REG_NEXT(argReg);
     }
 #else
-    node          = gtNewOperNode(GT_PUTARG_REG, type, arg);
+    node = gtNewOperNode(GT_PUTARG_REG, type, arg);
 #endif
     node->SetRegNum(argReg);
 
@@ -9535,7 +9560,7 @@ GenTree* Compiler::gtNewBitCastNode(var_types type, GenTree* arg)
     // A BITCAST could be a MultiRegOp on arm since we could move a double register to two int registers.
     node = new (this, GT_BITCAST) GenTreeMultiRegOp(GT_BITCAST, type, arg, nullptr);
 #else
-    node          = gtNewOperNode(GT_BITCAST, type, arg);
+    node = gtNewOperNode(GT_BITCAST, type, arg);
 #endif
 
     return node;
@@ -9556,7 +9581,9 @@ GenTree* Compiler::gtNewBitCastNode(var_types type, GenTree* arg)
 //    can't be represented in jitted code. If this happens, this method will return
 //    nullptr.
 //
-GenTreeAllocObj* Compiler::gtNewAllocObjNode(CORINFO_RESOLVED_TOKEN* pResolvedToken, bool useParent)
+GenTreeAllocObj* Compiler::gtNewAllocObjNode(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                             CORINFO_METHOD_HANDLE   callerHandle,
+                                             bool                    useParent)
 {
     const bool      mustRestoreHandle     = true;
     bool* const     pRuntimeLookup        = nullptr;
@@ -9572,7 +9599,7 @@ GenTreeAllocObj* Compiler::gtNewAllocObjNode(CORINFO_RESOLVED_TOKEN* pResolvedTo
         helper                                        = CORINFO_HELP_READYTORUN_NEW;
         CORINFO_LOOKUP_KIND* const pGenericLookupKind = nullptr;
         usingReadyToRunHelper =
-            info.compCompHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, helper, &lookup);
+            info.compCompHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, helper, callerHandle, &lookup);
     }
 #endif
 
@@ -9890,12 +9917,13 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree)
             case GT_NO_OP:
             case GT_NOP:
             case GT_LABEL:
+            case GT_SWIFT_ERROR:
                 copy = new (this, oper) GenTree(oper, tree->gtType);
                 goto DONE;
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
             case GT_END_LFIN:
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
             case GT_JMP:
                 copy = new (this, oper) GenTreeVal(oper, tree->gtType, tree->AsVal()->gtVal1);
                 goto DONE;
@@ -9918,7 +9946,6 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree)
     if (kind & GTK_SMPOP)
     {
         /* If necessary, make sure we allocate a "fat" tree node */
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
         switch (oper)
         {
@@ -9937,7 +9964,7 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree)
                                   tree->AsLclFld()->Data(), tree->AsLclFld()->GetLayout());
                 break;
 
-            /* These nodes sometimes get bashed to "fat" ones */
+                /* These nodes sometimes get bashed to "fat" ones */
 
             case GT_MUL:
             case GT_DIV:
@@ -10203,12 +10230,6 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree)
                                gtCloneExpr(tree->AsCmpXchg()->Data()), gtCloneExpr(tree->AsCmpXchg()->Comparand()));
             break;
 
-        case GT_STORE_DYN_BLK:
-            copy = new (this, oper) GenTreeStoreDynBlk(gtCloneExpr(tree->AsStoreDynBlk()->Addr()),
-                                                       gtCloneExpr(tree->AsStoreDynBlk()->Data()),
-                                                       gtCloneExpr(tree->AsStoreDynBlk()->gtDynamicSize));
-            break;
-
         case GT_SELECT:
             copy =
                 new (this, oper) GenTreeConditional(oper, tree->TypeGet(), gtCloneExpr(tree->AsConditional()->gtCond),
@@ -10328,7 +10349,9 @@ GenTreeCall* Compiler::gtCloneExprCallHelper(GenTreeCall* tree)
     copy->gtCallMoreFlags = tree->gtCallMoreFlags;
     INDEBUG(copy->gtCallDebugFlags = tree->gtCallDebugFlags);
 
-    copy->gtArgs.InternalCopyFrom(this, &tree->gtArgs, [=](GenTree* node) { return gtCloneExpr(node); });
+    copy->gtArgs.InternalCopyFrom(this, &tree->gtArgs, [=](GenTree* node) {
+        return gtCloneExpr(node);
+    });
 
     // The call sig comes from the EE and doesn't change throughout the compilation process, meaning
     // we only really need one physical copy of it. Therefore a shallow pointer copy will suffice.
@@ -10481,7 +10504,8 @@ void Compiler::gtUpdateStmtSideEffects(Statement* stmt)
             DoPostOrder = true,
         };
 
-        UpdateSideEffectsWalker(Compiler* comp) : GenTreeVisitor(comp)
+        UpdateSideEffectsWalker(Compiler* comp)
+            : GenTreeVisitor(comp)
         {
         }
 
@@ -10678,12 +10702,20 @@ bool GenTree::gtRequestSetFlags()
 }
 
 GenTreeUseEdgeIterator::GenTreeUseEdgeIterator()
-    : m_advance(nullptr), m_node(nullptr), m_edge(nullptr), m_statePtr(nullptr), m_state(-1)
+    : m_advance(nullptr)
+    , m_node(nullptr)
+    , m_edge(nullptr)
+    , m_statePtr(nullptr)
+    , m_state(-1)
 {
 }
 
 GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node)
-    : m_advance(nullptr), m_node(node), m_edge(nullptr), m_statePtr(nullptr), m_state(0)
+    : m_advance(nullptr)
+    , m_node(node)
+    , m_edge(nullptr)
+    , m_statePtr(nullptr)
+    , m_state(0)
 {
     assert(m_node != nullptr);
 
@@ -10712,9 +10744,9 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node)
         case GT_START_NONGC:
         case GT_START_PREEMPTGC:
         case GT_PROF_HOOK:
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
         case GT_END_LFIN:
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
         case GT_PHI_ARG:
         case GT_JMPTABLE:
         case GT_PHYSREG:
@@ -10723,6 +10755,7 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node)
         case GT_PINVOKE_EPILOG:
         case GT_IL_OFFSET:
         case GT_NOP:
+        case GT_SWIFT_ERROR:
             m_state = -1;
             return;
 
@@ -10826,12 +10859,6 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node)
             m_advance = &GenTreeUseEdgeIterator::AdvanceArrElem;
             return;
 
-        case GT_STORE_DYN_BLK:
-            m_edge = &m_node->AsStoreDynBlk()->Addr();
-            assert(*m_edge != nullptr);
-            m_advance = &GenTreeUseEdgeIterator::AdvanceStoreDynBlk;
-            return;
-
         case GT_CALL:
             m_statePtr = m_node->AsCall()->gtArgs.Args().begin().GetArg();
             m_advance  = &GenTreeUseEdgeIterator::AdvanceCall<CALL_ARGS>;
@@ -10897,29 +10924,6 @@ void GenTreeUseEdgeIterator::AdvanceArrElem()
     }
 }
 
-//------------------------------------------------------------------------
-// GenTreeUseEdgeIterator::AdvanceStoreDynBlk: produces the next operand of a StoreDynBlk node and advances the state.
-//
-void GenTreeUseEdgeIterator::AdvanceStoreDynBlk()
-{
-    GenTreeStoreDynBlk* const dynBlock = m_node->AsStoreDynBlk();
-    switch (m_state)
-    {
-        case 0:
-            m_edge  = &dynBlock->Data();
-            m_state = 1;
-            break;
-        case 1:
-            m_edge    = &dynBlock->gtDynamicSize;
-            m_advance = &GenTreeUseEdgeIterator::Terminate;
-            break;
-        default:
-            unreached();
-    }
-
-    assert(*m_edge != nullptr);
-}
-
 //------------------------------------------------------------------------
 // GenTreeUseEdgeIterator::AdvanceFieldList: produces the next operand of a FieldList node and advances the state.
 //
@@ -10995,7 +10999,7 @@ void GenTreeUseEdgeIterator::AdvanceConditional()
 // `GTF_REVERSE_OPS` flag.
 //
 template <bool ReverseOperands>
-void           GenTreeUseEdgeIterator::AdvanceBinOp()
+void GenTreeUseEdgeIterator::AdvanceBinOp()
 {
     assert(ReverseOperands == ((m_node->gtFlags & GTF_REVERSE_OPS) != 0));
 
@@ -11118,7 +11122,7 @@ void GenTreeUseEdgeIterator::SetEntryStateForMultiOp()
 // component operands.
 //
 template <int state>
-void          GenTreeUseEdgeIterator::AdvanceCall()
+void GenTreeUseEdgeIterator::AdvanceCall()
 {
     GenTreeCall* const call = m_node->AsCall();
 
@@ -11275,7 +11279,7 @@ bool GenTree::Precedes(GenTree* other)
 //
 void GenTree::SetIndirExceptionFlags(Compiler* comp)
 {
-    assert(OperIsIndirOrArrMetaData() && (OperIsSimple() || OperIs(GT_CMPXCHG, GT_STORE_DYN_BLK)));
+    assert(OperIsIndirOrArrMetaData() && (OperIsSimple() || OperIs(GT_CMPXCHG)));
 
     if (IndirMayFault(comp))
     {
@@ -11297,11 +11301,27 @@ void GenTree::SetIndirExceptionFlags(Compiler* comp)
         gtFlags |= AsCmpXchg()->Data()->gtFlags & GTF_EXCEPT;
         gtFlags |= AsCmpXchg()->Comparand()->gtFlags & GTF_EXCEPT;
     }
-    else if (OperIs(GT_STORE_DYN_BLK))
-    {
-        gtFlags |= AsStoreDynBlk()->Data()->gtFlags & GTF_EXCEPT;
-        gtFlags |= AsStoreDynBlk()->gtDynamicSize->gtFlags & GTF_EXCEPT;
-    }
+}
+
+//------------------------------------------------------------------------------
+// HandleKindDataIsInvariant: Returns true if the data referred to by a handle
+// address is guaranteed to be invariant. Note that GTF_ICON_FTN_ADDR handles may
+// or may not point to invariant data.
+//
+// Arguments:
+//    flags - GenTree flags for handle.
+//
+/* static */
+bool GenTree::HandleKindDataIsInvariant(GenTreeFlags flags)
+{
+    GenTreeFlags handleKind = flags & GTF_ICON_HDL_MASK;
+    assert(handleKind != GTF_EMPTY);
+
+    // All handle types are assumed invariant except those specifically listed here.
+
+    return (handleKind != GTF_ICON_STATIC_HDL) && // Pointer to a mutable class Static variable
+           (handleKind != GTF_ICON_BBC_PTR) &&    // Pointer to a mutable basic block count value
+           (handleKind != GTF_ICON_GLOBAL_PTR);   // Pointer to mutable data from the VM state
 }
 
 #ifdef DEBUG
@@ -11315,10 +11335,12 @@ void GenTree::SetIndirExceptionFlags(Compiler* comp)
     printf("%c", (flags & GTF_EXCEPT) ? 'X' : '-');
     printf("%c", (flags & GTF_GLOB_REF) ? 'G' : '-');
     printf("%c", (debugFlags & GTF_DEBUG_NODE_MORPHED) ? '+' : // First print '+' if GTF_DEBUG_NODE_MORPHED is set
-                     (flags & GTF_ORDER_SIDEEFF) ? 'O' : '-'); // otherwise print 'O' or '-'
+                     (flags & GTF_ORDER_SIDEEFF) ? 'O'
+                                                 : '-'); // otherwise print 'O' or '-'
     printf("%c", (flags & GTF_COLON_COND) ? '?' : '-');
-    printf("%c", (flags & GTF_DONT_CSE) ? 'N' :           // N is for No cse
-                     (flags & GTF_MAKE_CSE) ? 'H' : '-'); // H is for Hoist this expr
+    printf("%c", (flags & GTF_DONT_CSE) ? 'N' : // N is for No cse
+                     (flags & GTF_MAKE_CSE) ? 'H'
+                                            : '-'); // H is for Hoist this expr
     printf("%c", (flags & GTF_REVERSE_OPS) ? 'R' : '-');
     printf("%c", (flags & GTF_UNSIGNED) ? 'U' : (flags & GTF_BOOLEAN) ? 'B' : '-');
 #if FEATURE_SET_FLAGS
@@ -11724,7 +11746,6 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, _In_ _In_opt_
             case GT_IND:
             case GT_STOREIND:
             case GT_STORE_BLK:
-            case GT_STORE_DYN_BLK:
                 // We prefer printing V or U
                 if ((tree->gtFlags & (GTF_IND_VOLATILE | GTF_IND_UNALIGNED)) == 0)
                 {
@@ -12168,8 +12189,8 @@ void Compiler::gtDispRegVal(GenTree* tree)
 {
     switch (tree->GetRegTag())
     {
-        // Don't display anything for the GT_REGTAG_NONE case;
-        // the absence of printed register values will imply this state.
+            // Don't display anything for the GT_REGTAG_NONE case;
+            // the absence of printed register values will imply this state.
 
         case GenTree::GT_REGTAG_REG:
             printf(" REG %s", compRegVarName(tree->GetRegNum()));
@@ -12197,9 +12218,9 @@ void Compiler::gtDispRegVal(GenTree* tree)
 }
 
 // We usually/commonly don't expect to print anything longer than this string,
-#define LONGEST_COMMON_LCL_VAR_DISPLAY "V99 PInvokeFrame"
+#define LONGEST_COMMON_LCL_VAR_DISPLAY        "V99 PInvokeFrame"
 #define LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH (sizeof(LONGEST_COMMON_LCL_VAR_DISPLAY))
-#define BUF_SIZE (LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH * 2)
+#define BUF_SIZE                              (LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH * 2)
 
 void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, const char** ilNameOut, unsigned* ilNumOut)
 {
@@ -12258,24 +12279,22 @@ void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, cons
                 ilName = "OutArgs";
             }
 #endif // FEATURE_FIXED_OUT_ARGS
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
             else if (lclNum == lvaShadowSPslotsVar)
             {
                 ilName = "EHSlots";
             }
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 #ifdef JIT32_GCENCODER
             else if (lclNum == lvaLocAllocSPvar)
             {
                 ilName = "LocAllocSP";
             }
 #endif // JIT32_GCENCODER
-#if defined(FEATURE_EH_FUNCLETS)
             else if (lclNum == lvaPSPSym)
             {
                 ilName = "PSPSym";
             }
-#endif // FEATURE_EH_FUNCLETS
             else
             {
                 ilKind = "tmp";
@@ -12434,7 +12453,7 @@ static const char* InsCflagsToString(insCflags flags)
 {
     const static char* s_table[16] = {"0", "v",  "c",  "cv",  "z",  "zv",  "zc",  "zcv",
                                       "n", "nv", "nc", "ncv", "nz", "nzv", "nzc", "nzcv"};
-    unsigned index = (unsigned)flags;
+    unsigned           index       = (unsigned)flags;
     assert((0 <= index) && (index < ArrLen(s_table)));
     return s_table[index];
 }
@@ -12519,12 +12538,12 @@ void Compiler::gtDispConst(GenTree* tree)
             }
             else
             {
-                ssize_t dspIconVal =
-                    tree->IsIconHandle() ? dspPtr(tree->AsIntCon()->gtIconVal) : tree->AsIntCon()->gtIconVal;
+                ssize_t iconVal    = tree->AsIntCon()->gtIconVal;
+                ssize_t dspIconVal = tree->IsIconHandle() ? dspPtr(iconVal) : iconVal;
 
                 if (tree->TypeGet() == TYP_REF)
                 {
-                    if (tree->AsIntCon()->gtIconVal == 0)
+                    if (iconVal == 0)
                     {
                         printf(" null");
                     }
@@ -12534,12 +12553,12 @@ void Compiler::gtDispConst(GenTree* tree)
                         printf(" 0x%llx", dspIconVal);
                     }
                 }
-                else if ((tree->AsIntCon()->gtIconVal > -1000) && (tree->AsIntCon()->gtIconVal < 1000))
+                else if ((iconVal > -1000) && (iconVal < 1000))
                 {
                     printf(" %ld", dspIconVal);
                 }
 #ifdef TARGET_64BIT
-                else if ((tree->AsIntCon()->gtIconVal & 0xFFFFFFFF00000000LL) != 0)
+                else if ((iconVal & 0xFFFFFFFF00000000LL) != 0)
                 {
                     if (dspIconVal >= 0)
                     {
@@ -12571,13 +12590,34 @@ void Compiler::gtDispConst(GenTree* tree)
                             printf(" scope");
                             break;
                         case GTF_ICON_CLASS_HDL:
-                            printf(" class");
+                            if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) || opts.IsReadyToRun())
+                            {
+                                printf(" class");
+                            }
+                            else
+                            {
+                                printf(" class %s", eeGetClassName((CORINFO_CLASS_HANDLE)iconVal));
+                            }
                             break;
                         case GTF_ICON_METHOD_HDL:
-                            printf(" method");
+                            if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) || opts.IsReadyToRun())
+                            {
+                                printf(" method");
+                            }
+                            else
+                            {
+                                printf(" method %s", eeGetMethodFullName((CORINFO_METHOD_HANDLE)iconVal));
+                            }
                             break;
                         case GTF_ICON_FIELD_HDL:
-                            printf(" field");
+                            if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) || opts.IsReadyToRun())
+                            {
+                                printf(" field");
+                            }
+                            else
+                            {
+                                printf(" field %s", eeGetFieldName((CORINFO_FIELD_HANDLE)iconVal, true));
+                            }
                             break;
                         case GTF_ICON_STATIC_HDL:
                             printf(" static");
@@ -12723,6 +12763,12 @@ void Compiler::gtDispConst(GenTree* tree)
                            vecCon->gtSimdVal.u64[6], vecCon->gtSimdVal.u64[7]);
                     break;
                 }
+
+                case TYP_MASK:
+                {
+                    printf("<0x%08x, 0x%08x>", vecCon->gtSimdVal.u32[0], vecCon->gtSimdVal.u32[1]);
+                    break;
+                }
 #endif // TARGET_XARCH
 
                 default:
@@ -12812,13 +12858,13 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack)
         }
         break;
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
         case GT_END_LFIN:
             printf(" endNstLvl=%d", tree->AsVal()->gtVal1);
             break;
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
-        // Vanilla leaves. No qualifying information available. So do nothing
+            // Vanilla leaves. No qualifying information available. So do nothing
 
         case GT_NOP:
         case GT_NO_OP:
@@ -12829,6 +12875,7 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack)
         case GT_MEMORYBARRIER:
         case GT_PINVOKE_PROLOG:
         case GT_JMPTABLE:
+        case GT_SWIFT_ERROR:
             break;
 
         case GT_RET_EXPR:
@@ -12969,8 +13016,8 @@ void Compiler::gtDispLocal(GenTreeLclVarCommon* tree, IndentStack* indentStack)
 void Compiler::gtDispChild(GenTree*             child,
                            IndentStack*         indentStack,
                            IndentInfo           arcType,
-                           _In_opt_ const char* msg,     /* = nullptr  */
-                           bool                 topOnly) /* = false */
+                           _In_opt_ const char* msg, /* = nullptr  */
+                           bool                 topOnly)             /* = false */
 {
     indentStack->Push(arcType);
     gtDispTree(child, indentStack, msg, topOnly);
@@ -12979,11 +13026,11 @@ void Compiler::gtDispChild(GenTree*             child,
 
 /*****************************************************************************/
 
-void Compiler::gtDispTree(GenTree*     tree,
-                          IndentStack* indentStack,            /* = nullptr */
-                          _In_ _In_opt_z_ const char* msg,     /* = nullptr  */
-                          bool                        topOnly, /* = false */
-                          bool                        isLIR)   /* = false */
+void Compiler::gtDispTree(GenTree*                    tree,
+                          IndentStack*                indentStack, /* = nullptr */
+                          _In_ _In_opt_z_ const char* msg,         /* = nullptr  */
+                          bool                        topOnly,     /* = false */
+                          bool                        isLIR)                              /* = false */
 {
     if (tree == nullptr)
     {
@@ -13122,11 +13169,6 @@ void Compiler::gtDispTree(GenTree*     tree,
                     case GenTreeBlk::BlkOpKindUnrollMemmove:
                         printf(" (Memmove)");
                         break;
-#ifndef TARGET_X86
-                    case GenTreeBlk::BlkOpKindHelper:
-                        printf(" (Helper)");
-                        break;
-#endif
 
                     case GenTreeBlk::BlkOpKindLoop:
                         printf(" (Loop)");
@@ -13229,9 +13271,6 @@ void Compiler::gtDispTree(GenTree*     tree,
                 case NI_System_Math_Floor:
                     printf(" floor");
                     break;
-                case NI_System_Math_FMod:
-                    printf(" fmod");
-                    break;
                 case NI_System_Math_FusedMultiplyAdd:
                     printf(" fma");
                     break;
@@ -13488,9 +13527,10 @@ void Compiler::gtDispTree(GenTree*     tree,
         case GT_HWINTRINSIC:
             if (tree->OperIs(GT_HWINTRINSIC))
             {
-                printf(" %s %s", tree->AsHWIntrinsic()->GetSimdBaseType() == TYP_UNKNOWN
-                                     ? ""
-                                     : varTypeName(tree->AsHWIntrinsic()->GetSimdBaseType()),
+                printf(" %s %s",
+                       tree->AsHWIntrinsic()->GetSimdBaseType() == TYP_UNKNOWN
+                           ? ""
+                           : varTypeName(tree->AsHWIntrinsic()->GetSimdBaseType()),
                        HWIntrinsicInfo::lookupName(tree->AsHWIntrinsic()->GetHWIntrinsicId()));
             }
 
@@ -13535,28 +13575,6 @@ void Compiler::gtDispTree(GenTree*     tree,
             }
             break;
 
-        case GT_STORE_DYN_BLK:
-            if (tree->OperIsCopyBlkOp())
-            {
-                printf(" (copy)");
-            }
-            else if (tree->OperIsInitBlkOp())
-            {
-                printf(" (init)");
-            }
-            gtDispCommonEndLine(tree);
-
-            if (!topOnly)
-            {
-                gtDispChild(tree->AsStoreDynBlk()->Addr(), indentStack, IIArc, nullptr, topOnly);
-                if (tree->AsStoreDynBlk()->Data() != nullptr)
-                {
-                    gtDispChild(tree->AsStoreDynBlk()->Data(), indentStack, IIArc, nullptr, topOnly);
-                }
-                gtDispChild(tree->AsStoreDynBlk()->gtDynamicSize, indentStack, IIArcBottom, nullptr, topOnly);
-            }
-            break;
-
         case GT_SELECT:
             gtDispCommonEndLine(tree);
 
@@ -13619,6 +13637,10 @@ const char* Compiler::gtGetWellKnownArgNameForArgMsg(WellKnownArg arg)
         case WellKnownArg::ValidateIndirectCallTarget:
         case WellKnownArg::DispatchIndirectCallTarget:
             return "cfg tgt";
+        case WellKnownArg::SwiftError:
+            return "swift error";
+        case WellKnownArg::SwiftSelf:
+            return "swift self";
         default:
             return nullptr;
     }
@@ -13690,8 +13712,9 @@ void Compiler::gtGetArgMsg(GenTreeCall* call, CallArg* arg, char* bufp, unsigned
                 }
                 else
                 {
-                    unsigned lastRegNum = genMapIntRegNumToRegArgNum(firstReg) + arg->AbiInfo.NumRegs - 1;
-                    lastReg             = genMapIntRegArgNumToRegNum(lastRegNum);
+                    unsigned lastRegNum =
+                        genMapIntRegNumToRegArgNum(firstReg, call->GetUnmanagedCallConv()) + arg->AbiInfo.NumRegs - 1;
+                    lastReg = genMapIntRegArgNumToRegNum(lastRegNum, call->GetUnmanagedCallConv());
                 }
                 sprintf_s(bufp, bufLength, " %s%c%s out+%02x", compRegVarName(firstReg), separator,
                           compRegVarName(lastReg), arg->AbiInfo.ByteOffset);
@@ -13754,8 +13777,9 @@ void Compiler::gtGetLateArgMsg(GenTreeCall* call, CallArg* arg, char* bufp, unsi
                 }
                 else
                 {
-                    unsigned lastRegNum = genMapIntRegNumToRegArgNum(firstReg) + arg->AbiInfo.NumRegs - 1;
-                    lastReg             = genMapIntRegArgNumToRegNum(lastRegNum);
+                    unsigned lastRegNum =
+                        genMapIntRegNumToRegArgNum(firstReg, call->GetUnmanagedCallConv()) + arg->AbiInfo.NumRegs - 1;
+                    lastReg = genMapIntRegArgNumToRegNum(lastRegNum, call->GetUnmanagedCallConv());
                 }
                 sprintf_s(bufp, bufLength, " %s%c%s out+%02x", compRegVarName(firstReg), separator,
                           compRegVarName(lastReg), arg->AbiInfo.ByteOffset);
@@ -13809,48 +13833,45 @@ void Compiler::gtDispArgList(GenTreeCall* call, GenTree* lastCallOperand, Indent
 //
 void Compiler::gtDispStmt(Statement* stmt, const char* msg /* = nullptr */)
 {
-    if (opts.compDbgInfo)
+    if (msg != nullptr)
     {
-        if (msg != nullptr)
-        {
-            printf("%s ", msg);
-        }
-        printStmtID(stmt);
-        printf(" ( ");
-        const DebugInfo& di = stmt->GetDebugInfo();
-        // For statements in the root we display just the location without the
-        // inline context info.
-        if (di.GetInlineContext() == nullptr || di.GetInlineContext()->IsRoot())
-        {
-            di.GetLocation().Dump();
-        }
-        else
-        {
-            stmt->GetDebugInfo().Dump(false);
-        }
-        printf(" ... ");
-
-        IL_OFFSET lastILOffs = stmt->GetLastILOffset();
-        if (lastILOffs == BAD_IL_OFFSET)
-        {
-            printf("???");
-        }
-        else
-        {
-            printf("0x%03X", lastILOffs);
-        }
+        printf("%s ", msg);
+    }
+    printStmtID(stmt);
+    printf(" ( ");
+    const DebugInfo& di = stmt->GetDebugInfo();
+    // For statements in the root we display just the location without the
+    // inline context info.
+    if (di.GetInlineContext() == nullptr || di.GetInlineContext()->IsRoot())
+    {
+        di.GetLocation().Dump();
+    }
+    else
+    {
+        stmt->GetDebugInfo().Dump(false);
+    }
+    printf(" ... ");
 
-        printf(" )");
+    IL_OFFSET lastILOffs = stmt->GetLastILOffset();
+    if (lastILOffs == BAD_IL_OFFSET)
+    {
+        printf("???");
+    }
+    else
+    {
+        printf("0x%03X", lastILOffs);
+    }
 
-        DebugInfo par;
-        if (stmt->GetDebugInfo().GetParent(&par))
-        {
-            printf(" <- ");
-            par.Dump(true);
-        }
+    printf(" )");
 
-        printf("\n");
+    DebugInfo par;
+    if (stmt->GetDebugInfo().GetParent(&par))
+    {
+        printf(" <- ");
+        par.Dump(true);
     }
+    printf("\n");
+
     gtDispTree(stmt->GetRootNode());
 }
 
@@ -13988,23 +14009,7 @@ void Compiler::gtDispLIRNode(GenTree* node, const char* prefixMsg /* = nullptr *
                 displayOperand(operand, buf, operandArc, indentStack, prefixIndent);
             }
         }
-        else if (node->OperIs(GT_STORE_DYN_BLK))
-        {
-            if (operand == node->AsBlk()->Addr())
-            {
-                displayOperand(operand, "lhs", operandArc, indentStack, prefixIndent);
-            }
-            else if (operand == node->AsBlk()->Data())
-            {
-                displayOperand(operand, "rhs", operandArc, indentStack, prefixIndent);
-            }
-            else
-            {
-                assert(operand == node->AsStoreDynBlk()->gtDynamicSize);
-                displayOperand(operand, "size", operandArc, indentStack, prefixIndent);
-            }
-        }
-        else
+        else
         {
             displayOperand(operand, "", operandArc, indentStack, prefixIndent);
         }
@@ -14603,6 +14608,18 @@ GenTree* Compiler::gtFoldTypeCompare(GenTree* tree)
         return tree;
     }
 
+    // Check if an object of this type can even exist
+    if (info.compCompHnd->getExactClasses(clsHnd, 0, nullptr) == 0)
+    {
+        JITDUMP("Runtime reported %p (%s) is never allocated\n", dspPtr(clsHnd), eeGetClassName(clsHnd));
+
+        const bool operatorIsEQ  = (oper == GT_EQ);
+        const int  compareResult = operatorIsEQ ? 0 : 1;
+        JITDUMP("Runtime reports comparison is known at jit time: %u\n", compareResult);
+        GenTree* result = gtNewIconNode(compareResult);
+        return result;
+    }
+
     // We're good to go.
     JITDUMP("Optimizing compare of obj.GetType()"
             " and type-from-handle to compare method table pointer\n");
@@ -14623,13 +14640,13 @@ GenTree* Compiler::gtFoldTypeCompare(GenTree* tree)
         objOp = opOther->AsCall()->gtArgs.GetThisArg()->GetNode();
     }
 
-    bool                 pIsExact   = false;
-    bool                 pIsNonNull = false;
-    CORINFO_CLASS_HANDLE objCls     = gtGetClassHandle(objOp, &pIsExact, &pIsNonNull);
+    bool                 isExact   = false;
+    bool                 isNonNull = false;
+    CORINFO_CLASS_HANDLE objCls    = gtGetClassHandle(objOp, &isExact, &isNonNull);
 
     // if both classes are "final" (e.g. System.String[]) we can replace the comparison
     // with `true/false` + null check.
-    if ((objCls != NO_CLASS_HANDLE) && (pIsExact || info.compCompHnd->isExactType(objCls)))
+    if ((objCls != NO_CLASS_HANDLE) && (isExact || info.compCompHnd->isExactType(objCls)))
     {
         TypeCompareState tcs = info.compCompHnd->compareTypesForEquality(objCls, clsHnd);
         if (tcs != TypeCompareState::May)
@@ -14638,7 +14655,7 @@ GenTree* Compiler::gtFoldTypeCompare(GenTree* tree)
             const bool typesAreEqual = tcs == TypeCompareState::Must;
             GenTree*   compareResult = gtNewIconNode((operatorIsEQ ^ typesAreEqual) ? 0 : 1);
 
-            if (!pIsNonNull)
+            if (!isNonNull)
             {
                 // we still have to emit a null-check
                 // obj.GetType == typeof() -> (nullcheck) true/false
@@ -15343,7 +15360,7 @@ GenTree* Compiler::gtTryRemoveBoxUpstreamEffects(GenTree* op, BoxRemovalOptions
         const bool isUnsafeValueClass = false;
         lvaSetStruct(boxTempLcl, boxClass, isUnsafeValueClass);
 
-        // Remove the newobj and assignment to box temp
+        // Remove the newobj and store to box temp
         JITDUMP("Bashing NEWOBJ [%06u] to NOP\n", dspTreeID(boxLclDef));
         boxLclDef->gtBashToNOP();
 
@@ -15400,7 +15417,7 @@ GenTree* Compiler::gtTryRemoveBoxUpstreamEffects(GenTree* op, BoxRemovalOptions
 
     // Otherwise, proceed with the optimization.
     //
-    // Change the assignment expression to a NOP.
+    // Change the store expression to a NOP.
     JITDUMP("\nBashing NEWOBJ [%06u] to NOP\n", dspTreeID(boxLclDef));
     boxLclDef->gtBashToNOP();
 
@@ -16067,8 +16084,8 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
 
     switch (switchType)
     {
-        // Fold constant REF of BYREF binary operator.
-        // These can only be comparisons or null pointers.
+            // Fold constant REF of BYREF binary operator.
+            // These can only be comparisons or null pointers.
 
         case TYP_REF:
 
@@ -16137,11 +16154,11 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
 
             return tree;
 
-        // Fold constant INT binary operator.
+            // Fold constant INT binary operator.
 
         case TYP_INT:
 
-            assert(tree->TypeIs(TYP_INT) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY));
+            assert(tree->TypeIs(TYP_INT) || varTypeIsGC(tree));
             // No GC pointer types should be folded here...
             assert(!varTypeIsGC(op1->TypeGet()) && !varTypeIsGC(op2->TypeGet()));
 
@@ -16264,8 +16281,8 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
                     i1 = (i1 << ((32 - i2) & 0x1f)) | (UINT32(i1) >> (i2 & 0x1f));
                     break;
 
-                // DIV and MOD can throw an exception - if the division is by 0
-                // or there is overflow - when dividing MIN by -1.
+                    // DIV and MOD can throw an exception - if the division is by 0
+                    // or there is overflow - when dividing MIN by -1.
 
                 case GT_DIV:
                 case GT_MOD:
@@ -16334,7 +16351,7 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
 
             goto DONE;
 
-        // Fold constant LONG binary operator.
+            // Fold constant LONG binary operator.
 
         case TYP_LONG:
 
@@ -16557,7 +16574,7 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
 
             goto DONE;
 
-        // Fold constant FLOAT or DOUBLE binary operator
+            // Fold constant FLOAT or DOUBLE binary operator
 
         case TYP_FLOAT:
         case TYP_DOUBLE:
@@ -16579,7 +16596,7 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
             // For unordered operations (i.e. the GTF_RELOP_NAN_UN flag is set)
             // the result is always true - return 1.
 
-            if (_isnan(d1) || _isnan(d2))
+            if (FloatingPointUtils::isNaN(d1) || FloatingPointUtils::isNaN(d2))
             {
                 JITDUMP("Double operator(s) is NaN\n");
 
@@ -16838,25 +16855,23 @@ GenTree* Compiler::gtFoldIndirConst(GenTreeIndir* indir)
 }
 
 //------------------------------------------------------------------------
-// gtNewTempStore: Create an assignment of the given value to a temp.
+// gtNewTempStore: Create a store of the given value to a temp.
 //
 // Arguments:
 //    tmp         - local number for a compiler temp
-//    val         - value to assign to the temp
+//    val         - value to store to the temp
 //    curLevel    - stack level to spill at (importer-only)
 //    pAfterStmt  - statement to insert any additional statements after
 //    di          - debug info for new statements
 //    block       - block to insert any additional statements in
 //
 // Return Value:
-//    Normally a new assignment node.
+//    Normally a new store node.
 //    However may return a nop node if val is simply a reference to the temp.
 //
 // Notes:
-//    Self-assignments may be represented via NOPs.
-//
+//    Self-stores may be represented via NOPs.
 //    May update the type of the temp, if it was previously unknown.
-//
 //    May set compFloatingPointUsed.
 //
 GenTree* Compiler::gtNewTempStore(
@@ -16936,7 +16951,7 @@ GenTree* Compiler::gtNewTempStore(
         noway_assert(!"Incompatible types for gtNewTempStore");
     }
 
-    // Floating Point assignments can be created during inlining
+    // Floating Point stores can be created during inlining
     // see "Zero init inlinee locals:" in fgInlinePrependStatements
     // thus we may need to set compFloatingPointUsed to true here.
     //
@@ -16960,8 +16975,8 @@ GenTree* Compiler::gtNewTempStore(
 
 /*****************************************************************************
  *
- *  Create a helper call to access a COM field (iff 'assg' is non-zero this is
- *  an assignment and 'assg' is the new value).
+ *  Create a helper call to access a COM field (iff 'value' is non-zero this is
+ *  a store and 'value' is the new value).
  */
 
 GenTree* Compiler::gtNewRefCOMfield(GenTree*                objPtr,
@@ -16969,7 +16984,7 @@ GenTree* Compiler::gtNewRefCOMfield(GenTree*                objPtr,
                                     CORINFO_ACCESS_FLAGS    access,
                                     CORINFO_FIELD_INFO*     pFieldInfo,
                                     var_types               lclTyp,
-                                    GenTree*                assg)
+                                    GenTree*                value)
 {
     assert(pFieldInfo->fieldAccessor == CORINFO_FIELD_INSTANCE_HELPER ||
            pFieldInfo->fieldAccessor == CORINFO_FIELD_INSTANCE_ADDR_HELPER ||
@@ -16986,24 +17001,24 @@ GenTree* Compiler::gtNewRefCOMfield(GenTree*                objPtr,
     {
         if (access & CORINFO_ACCESS_SET)
         {
-            assert(assg != nullptr);
+            assert(value != nullptr);
             // helper needs pointer to struct, not struct itself
             if (pFieldInfo->helper == CORINFO_HELP_SETFIELDSTRUCT)
             {
                 // TODO-Bug?: verify if flags matter here
                 GenTreeFlags indirFlags = GTF_EMPTY;
-                assg                    = impGetNodeAddr(assg, CHECK_SPILL_ALL, &indirFlags);
+                value                   = impGetNodeAddr(value, CHECK_SPILL_ALL, &indirFlags);
             }
-            else if (lclTyp == TYP_DOUBLE && assg->TypeGet() == TYP_FLOAT)
+            else if (lclTyp == TYP_DOUBLE && value->TypeGet() == TYP_FLOAT)
             {
-                assg = gtNewCastNode(TYP_DOUBLE, assg, false, TYP_DOUBLE);
+                value = gtNewCastNode(TYP_DOUBLE, value, false, TYP_DOUBLE);
             }
-            else if (lclTyp == TYP_FLOAT && assg->TypeGet() == TYP_DOUBLE)
+            else if (lclTyp == TYP_FLOAT && value->TypeGet() == TYP_DOUBLE)
             {
-                assg = gtNewCastNode(TYP_FLOAT, assg, false, TYP_FLOAT);
+                value = gtNewCastNode(TYP_FLOAT, value, false, TYP_FLOAT);
             }
 
-            args[nArgs++] = assg;
+            args[nArgs++] = value;
             helperType    = TYP_VOID;
         }
         else if (access & CORINFO_ACCESS_GET)
@@ -17087,8 +17102,8 @@ GenTree* Compiler::gtNewRefCOMfield(GenTree*                objPtr,
 
         if ((access & CORINFO_ACCESS_SET) != 0)
         {
-            result = (lclTyp == TYP_STRUCT) ? gtNewStoreBlkNode(layout, result, assg)->AsIndir()
-                                            : gtNewStoreIndNode(lclTyp, result, assg);
+            result = (lclTyp == TYP_STRUCT) ? gtNewStoreBlkNode(layout, result, value)->AsIndir()
+                                            : gtNewStoreIndNode(lclTyp, result, value);
             if (varTypeIsStruct(lclTyp))
             {
                 result = impStoreStruct(result, CHECK_SPILL_ALL);
@@ -17109,9 +17124,6 @@ GenTree* Compiler::gtNewRefCOMfield(GenTree*                objPtr,
  *  Return true if the given node (excluding children trees) contains side effects.
  *  Note that it does not recurse, and children need to be handled separately.
  *  It may return false even if the node has GTF_SIDE_EFFECT (because of its children).
- *
- *  Similar to OperMayThrow() (but handles GT_CALLs specially), but considers
- *  assignments too.
  */
 
 bool Compiler::gtNodeHasSideEffects(GenTree* tree, GenTreeFlags flags)
@@ -17360,22 +17372,6 @@ bool Compiler::gtSplitTree(
         }
 
     private:
-        bool IsLocation(const UseInfo& useInf)
-        {
-            if (useInf.User == nullptr)
-            {
-                return false;
-            }
-
-            if (useInf.User->OperIs(GT_STORE_DYN_BLK) && !(*useInf.Use)->OperIs(GT_CNS_INT, GT_INIT_VAL) &&
-                (useInf.Use == &useInf.User->AsStoreDynBlk()->Data()))
-            {
-                return true;
-            }
-
-            return false;
-        }
-
         bool IsReturned(const UseInfo& useInf, bool userIsReturned)
         {
             if (useInf.User != nullptr)
@@ -17469,18 +17465,6 @@ bool Compiler::gtSplitTree(
                 return;
             }
 
-            if (IsLocation(useInf))
-            {
-                // Only a handful of nodes can be location, and they are all unary or nullary.
-                assert((*use)->OperIs(GT_IND, GT_BLK, GT_LCL_VAR, GT_LCL_FLD));
-                if ((*use)->OperIsUnary())
-                {
-                    SplitOutUse(UseInfo{&(*use)->AsUnOp()->gtOp1, user}, false);
-                }
-
-                return;
-            }
-
 #ifndef TARGET_64BIT
             // GT_MUL with GTF_MUL_64RSLT is required to stay with casts on the
             // operands. Note that one operand may also be a constant, but we
@@ -17551,6 +17535,41 @@ bool Compiler::gtSplitTree(
     return splitter.MadeChanges;
 }
 
+//------------------------------------------------------------------------
+// gtWrapWithSideEffects: Extracts side effects from sideEffectSource (if any)
+//    and wraps the input tree with a COMMA node with them.
+//
+// Arguments:
+//    tree              - the expression tree to wrap with side effects (if any)
+//                        it has to be either a side effect free subnode of sideEffectsSource
+//                        or any tree outside sideEffectsSource's hierarchy
+//    sideEffectsSource - the expression tree to extract side effects from
+//    sideEffectsFlags  - side effect flags to be considered
+//    ignoreRoot        - ignore side effects on the expression root node
+//
+// Return Value:
+//    The original tree wrapped with a COMMA node that contains the side effects
+//    or just the tree itself if sideEffectSource has no side effects.
+//
+GenTree* Compiler::gtWrapWithSideEffects(GenTree*     tree,
+                                         GenTree*     sideEffectsSource,
+                                         GenTreeFlags sideEffectsFlags,
+                                         bool         ignoreRoot)
+{
+    GenTree* sideEffects = nullptr;
+    gtExtractSideEffList(sideEffectsSource, &sideEffects, sideEffectsFlags, ignoreRoot);
+    if (sideEffects != nullptr)
+    {
+        // TODO: assert if tree is a subnode of sideEffectsSource and the tree has its own side effects
+        // otherwise the resulting COMMA might have some side effects to be duplicated
+        // It should be possible to be smarter here and allow such cases by extracting the side effects
+        // properly for this particular case. For now, caller is responsible for avoiding such cases.
+
+        tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), sideEffects, tree);
+    }
+    return tree;
+}
+
 //------------------------------------------------------------------------
 // gtExtractSideEffList: Extracts side effects from the given expression.
 //
@@ -17588,7 +17607,9 @@ void Compiler::gtExtractSideEffList(GenTree*     expr,
             return m_result;
         }
 
-        SideEffectExtractor(Compiler* compiler, GenTreeFlags flags) : GenTreeVisitor(compiler), m_flags(flags)
+        SideEffectExtractor(Compiler* compiler, GenTreeFlags flags)
+            : GenTreeVisitor(compiler)
+            , m_flags(flags)
         {
         }
 
@@ -17643,8 +17664,6 @@ void Compiler::gtExtractSideEffList(GenTree*     expr,
                         colon->gtOp2  = (elseSideEffects != nullptr) ? elseSideEffects : m_compiler->gtNewNothingNode();
                         qmark->gtType = TYP_VOID;
                         colon->gtType = TYP_VOID;
-
-                        qmark->gtFlags &= ~GTF_QMARK_CAST_INSTOF;
                         Append(qmark);
                     }
 
@@ -17704,7 +17723,7 @@ void Compiler::gtExtractSideEffList(GenTree*     expr,
 
             // Set the ValueNumber 'gtVNPair' for the new GT_COMMA node
             //
-            if (m_result->gtVNPair.BothDefined() && node->gtVNPair.BothDefined())
+            if ((m_compiler->vnStore != nullptr) && m_result->gtVNPair.BothDefined() && node->gtVNPair.BothDefined())
             {
                 // The result of a GT_COMMA node is op2, the normal value number is op2vnp
                 // But we also need to include the union of side effects from op1 and op2.
@@ -17871,7 +17890,9 @@ Compiler::FindLinkData Compiler::gtFindLink(Statement* stmt, GenTree* node)
             DoPreOrder = true,
         };
 
-        FindLinkWalker(Compiler* comp, GenTree* node) : GenTreeVisitor(comp), m_node(node)
+        FindLinkWalker(Compiler* comp, GenTree* node)
+            : GenTreeVisitor(comp)
+            , m_node(node)
         {
         }
 
@@ -18057,7 +18078,9 @@ bool Compiler::gtTreeContainsOper(GenTree* tree, genTreeOps oper)
         genTreeOps m_oper;
 
     public:
-        Visitor(Compiler* comp, genTreeOps oper) : GenTreeVisitor(comp), m_oper(oper)
+        Visitor(Compiler* comp, genTreeOps oper)
+            : GenTreeVisitor(comp)
+            , m_oper(oper)
         {
         }
 
@@ -18098,7 +18121,8 @@ ExceptionSetFlags Compiler::gtCollectExceptions(GenTree* tree)
         ExceptionSetFlags m_preciseExceptions = ExceptionSetFlags::None;
 
     public:
-        ExceptionsWalker(Compiler* comp) : GenTreeVisitor<ExceptionsWalker>(comp)
+        ExceptionsWalker(Compiler* comp)
+            : GenTreeVisitor<ExceptionsWalker>(comp)
         {
         }
 
@@ -18156,7 +18180,9 @@ bool Compiler::gtComplexityExceeds(GenTree* tree, unsigned limit)
             DoPreOrder = true,
         };
 
-        ComplexityVisitor(Compiler* comp, unsigned limit) : GenTreeVisitor(comp), m_limit(limit)
+        ComplexityVisitor(Compiler* comp, unsigned limit)
+            : GenTreeVisitor(comp)
+            , m_limit(limit)
         {
         }
 
@@ -18382,7 +18408,7 @@ GenTreeLclVar* GenTree::IsImplicitByrefParameterValuePostMorph(Compiler* compile
 }
 
 //------------------------------------------------------------------------
-// IsLclVarUpdateTree: Determine whether this is an assignment tree of the
+// IsLclVarUpdateTree: Determine whether this is a local store tree of the
 //                     form Vn = Vn 'oper' 'otherTree' where Vn is a lclVar
 //
 // Arguments:
@@ -18830,18 +18856,6 @@ bool GenTree::IsFieldAddr(Compiler* comp, GenTree** pBaseAddr, FieldSeq** pFldSe
     return false;
 }
 
-bool Compiler::gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_FIELD_HANDLE fldHnd)
-{
-    if (fieldNodeType != TYP_REF)
-    {
-        return false;
-    }
-    noway_assert(fldHnd != nullptr);
-    CorInfoType cit      = info.compCompHnd->getFieldType(fldHnd);
-    var_types   fieldTyp = JITtype2varType(cit);
-    return fieldTyp != TYP_REF;
-}
-
 //------------------------------------------------------------------------
 // gtStoreDefinesField: Does the given parent store modify the given field?
 //
@@ -18892,24 +18906,23 @@ bool Compiler::gtStoreDefinesField(
 //        otherwise actual type may be a subtype.
 //    *pIsNonNull set true if tree value is known not to be null,
 //        otherwise a null value is possible.
-
+//
 CORINFO_CLASS_HANDLE Compiler::gtGetClassHandle(GenTree* tree, bool* pIsExact, bool* pIsNonNull)
 {
     // Set default values for our out params.
-    *pIsNonNull                   = false;
-    *pIsExact                     = false;
-    CORINFO_CLASS_HANDLE objClass = nullptr;
+    *pIsNonNull = false;
+    *pIsExact   = false;
 
     // Bail out if the tree is not a ref type.
-    var_types treeType = tree->TypeGet();
-    if (treeType != TYP_REF)
+    if (!tree->TypeIs(TYP_REF))
     {
-        return objClass;
+        return NO_CLASS_HANDLE;
     }
 
     // Tunnel through commas.
-    GenTree*         obj   = tree->gtEffectiveVal();
-    const genTreeOps objOp = obj->OperGet();
+    GenTree*             obj      = tree->gtEffectiveVal();
+    const genTreeOps     objOp    = obj->OperGet();
+    CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
 
     switch (objOp)
     {
@@ -19058,7 +19071,6 @@ CORINFO_CLASS_HANDLE Compiler::gtGetClassHandle(GenTree* tree, bool* pIsExact, b
                 assert(runtimeType != NO_CLASS_HANDLE);
 
                 objClass    = runtimeType;
-                *pIsExact   = false;
                 *pIsNonNull = true;
             }
 
@@ -19102,9 +19114,6 @@ CORINFO_CLASS_HANDLE Compiler::gtGetClassHandle(GenTree* tree, bool* pIsExact, b
                 {
                     objClass = gtGetArrayElementClassHandle(base->AsArrElem()->gtArrObj);
                 }
-
-                *pIsExact   = false;
-                *pIsNonNull = false;
             }
             else if (base->OperGet() == GT_ADD)
             {
@@ -19141,7 +19150,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetClassHandle(GenTree* tree, bool* pIsExact, b
                 FieldSeq* fldSeq = base->AsIntCon()->gtFieldSeq;
                 if ((fldSeq != nullptr) && (fldSeq->GetOffset() == base->AsIntCon()->IconValue()))
                 {
-                    CORINFO_FIELD_HANDLE fldHandle = base->AsIntCon()->gtFieldSeq->GetFieldHandle();
+                    CORINFO_FIELD_HANDLE fldHandle = fldSeq->GetFieldHandle();
                     objClass                       = gtGetFieldClassHandle(fldHandle, pIsExact, pIsNonNull);
                 }
             }
@@ -19173,6 +19182,12 @@ CORINFO_CLASS_HANDLE Compiler::gtGetClassHandle(GenTree* tree, bool* pIsExact, b
         }
     }
 
+    if ((objClass == NO_CLASS_HANDLE) && (vnStore != nullptr))
+    {
+        // Try VN if we haven't found a class handle yet
+        objClass = vnStore->GetObjectType(tree->gtVNPair.GetConservative(), pIsExact, pIsNonNull);
+    }
+
     if ((objClass != NO_CLASS_HANDLE) && !*pIsExact && JitConfig.JitEnableExactDevirtualization())
     {
         CORINFO_CLASS_HANDLE exactClass;
@@ -19202,7 +19217,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetClassHandle(GenTree* tree, bool* pIsExact, b
 // Return Value:
 //    nullptr if helper call result is not a ref class, or the class handle
 //    is unknown, otherwise the class handle.
-
+//
 CORINFO_CLASS_HANDLE Compiler::gtGetHelperCallClassHandle(GenTreeCall* call, bool* pIsExact, bool* pIsNonNull)
 {
     assert(call->gtCallType == CT_HELPER);
@@ -19344,7 +19359,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetHelperCallClassHandle(GenTreeCall* call, boo
 //
 // Return Value:
 //    nullptr if element class handle is unknown, otherwise the class handle.
-
+//
 CORINFO_CLASS_HANDLE Compiler::gtGetArrayElementClassHandle(GenTree* array)
 {
     bool                 isArrayExact   = false;
@@ -19386,10 +19401,13 @@ CORINFO_CLASS_HANDLE Compiler::gtGetArrayElementClassHandle(GenTree* array)
 //    is unknown, otherwise the class handle.
 //
 //    May examine runtime state of static field instances.
-
+//
 CORINFO_CLASS_HANDLE Compiler::gtGetFieldClassHandle(CORINFO_FIELD_HANDLE fieldHnd, bool* pIsExact, bool* pIsNonNull)
 {
-    CORINFO_CLASS_HANDLE fieldClass   = nullptr;
+    *pIsExact   = false;
+    *pIsNonNull = false;
+
+    CORINFO_CLASS_HANDLE fieldClass   = NO_CLASS_HANDLE;
     CorInfoType          fieldCorType = info.compCompHnd->getFieldType(fieldHnd, &fieldClass);
 
     if (fieldCorType == CORINFO_TYPE_CLASS)
@@ -19401,11 +19419,14 @@ CORINFO_CLASS_HANDLE Compiler::gtGetFieldClassHandle(CORINFO_FIELD_HANDLE fieldH
         if (queryForCurrentClass)
         {
 #if DEBUG
-            char fieldNameBuffer[128];
-            char classNameBuffer[128];
-            JITDUMP("Querying runtime about current class of field %s (declared as %s)\n",
-                    eeGetFieldName(fieldHnd, true, fieldNameBuffer, sizeof(fieldNameBuffer)),
-                    eeGetClassName(fieldClass, classNameBuffer, sizeof(classNameBuffer)));
+            if (verbose || JitConfig.EnableExtraSuperPmiQueries())
+            {
+                char        fieldNameBuffer[128];
+                char        classNameBuffer[128];
+                const char* fieldName = eeGetFieldName(fieldHnd, true, fieldNameBuffer, sizeof(fieldNameBuffer));
+                const char* className = eeGetClassName(fieldClass, classNameBuffer, sizeof(classNameBuffer));
+                JITDUMP("\nQuerying runtime about current class of field %s (declared as %s)\n", fieldName, className);
+            }
 #endif // DEBUG
 
             // Is this a fully initialized init-only static field?
@@ -19420,10 +19441,13 @@ CORINFO_CLASS_HANDLE Compiler::gtGetFieldClassHandle(CORINFO_FIELD_HANDLE fieldH
                 *pIsExact   = true;
                 *pIsNonNull = true;
 #ifdef DEBUG
-                char buffer[128];
-                JITDUMP("Runtime reports field is init-only and initialized and has class %s\n",
-                        eeGetClassName(fieldClass, buffer, sizeof(buffer)));
-#endif
+                if (verbose || JitConfig.EnableExtraSuperPmiQueries())
+                {
+                    char        classNameBuffer2[128];
+                    const char* className2 = eeGetClassName(fieldClass, classNameBuffer2, sizeof(classNameBuffer2));
+                    JITDUMP("Runtime reports field is init-only and initialized and has class %s\n", className2);
+                }
+#endif // DEBUG
             }
             else
             {
@@ -19526,6 +19550,27 @@ GenTreeLclVarCommon* Compiler::gtCallGetDefinedRetBufLclAddr(GenTreeCall* call)
 // Return Value:
 //    Will set "*pArr" to "nullptr" if this array address is not parseable.
 //
+// Notes:
+// Instead of (or in addition to) parsing the GenTree, maybe we should be parsing the VN
+// "trees": if optimization has replaced the index expression with a CSE def, it's harder
+// to parse, but the VN tree for the CSE def GT_COMMA has all the same info. For example:
+//
+// \--*  ARR_ADDR  byref System.Collections.Hashtable+Bucket[] $80
+//    \--*  ADD       byref
+//       +--*  LCL_VAR   ref    V01 arg1         u:1
+//       \--*  COMMA     long
+//          +--*  STORE_LCL_VAR long   V21 cse2         d:1
+//          |  \--*  ADD       long
+//          |     +--*  MUL       long
+//          |     |  +--*  CAST      long <- uint
+//          |     |  |  \--*  LCL_VAR   int    V07 loc2         u:2
+//          |     |  \--*  CNS_INT   long   24
+//          |     \--*  CNS_INT   long   16
+//          \--*  LCL_VAR   long   V21 cse2         u:1
+//
+// Here, the COMMA represents the index + offset VN, and we could pull out the index VN
+// from the COMMA VN.
+//
 void GenTreeArrAddr::ParseArrayAddress(Compiler* comp, GenTree** pArr, ValueNum* pInxVN)
 {
     *pArr                 = nullptr;
@@ -19540,13 +19585,23 @@ void GenTreeArrAddr::ParseArrayAddress(Compiler* comp, GenTree** pArr, ValueNum*
     }
 
     // OK, new we have to figure out if any part of the "offset" is a constant contribution to the index.
-    target_ssize_t elemOffset = GetFirstElemOffset();
-    unsigned       elemSizeUn = (GetElemType() == TYP_STRUCT) ? comp->typGetObjLayout(GetElemClassHandle())->GetSize()
+    target_ssize_t firstElemOffset = GetFirstElemOffset();
+    assert(firstElemOffset > 0);
+
+    // If we didn't parse any offset, or the offset we parsed doesn't make sense, then give up on
+    // parsing the array address. (This can happen with JitOptRepeat.)
+    if (offset < firstElemOffset)
+    {
+        *pArr = nullptr;
+        return;
+    }
+
+    unsigned elemSizeUn = (GetElemType() == TYP_STRUCT) ? comp->typGetObjLayout(GetElemClassHandle())->GetSize()
                                                         : genTypeSize(GetElemType());
 
     assert(FitsIn<target_ssize_t>(elemSizeUn));
     target_ssize_t elemSize         = static_cast<target_ssize_t>(elemSizeUn);
-    target_ssize_t constIndexOffset = offset - elemOffset;
+    target_ssize_t constIndexOffset = offset - firstElemOffset;
 
     // This should be divisible by the element size...
     assert((constIndexOffset % elemSize) == 0);
@@ -19622,6 +19677,7 @@ void GenTreeArrAddr::ParseArrayAddress(Compiler* comp, GenTree** pArr, ValueNum*
     if (tree->TypeIs(TYP_REF))
     {
         // This must be the array pointer.
+        assert(*pArr == nullptr);
         *pArr = tree;
         assert(inputMul == 1); // Can't multiply the array pointer by anything.
     }
@@ -19717,7 +19773,10 @@ void GenTreeArrAddr::ParseArrayAddress(Compiler* comp, GenTree** pArr, ValueNum*
             default:
                 break;
         }
+
         // If we didn't return above, must be a contribution to the non-constant part of the index VN.
+        // We don't get here for GT_CNS_INT, GT_ADD, or GT_SUB, or for GT_MUL by constant, or GT_LSH of
+        // constant shift. Thus, the generated index VN does not include the parsed constant offset.
         ValueNum vn = comp->GetValueNumStore()->VNLiberalNormalValue(tree->gtVNPair);
         if (inputMul != 1)
         {
@@ -19880,7 +19939,8 @@ FieldSeq* FieldSeqStore::Append(FieldSeq* a, FieldSeq* b)
     return nullptr;
 }
 
-FieldSeq::FieldSeq(CORINFO_FIELD_HANDLE fieldHnd, ssize_t offset, FieldKind fieldKind) : m_offset(offset)
+FieldSeq::FieldSeq(CORINFO_FIELD_HANDLE fieldHnd, ssize_t offset, FieldKind fieldKind)
+    : m_offset(offset)
 {
     assert(fieldHnd != NO_FIELD_HANDLE);
 
@@ -21096,8 +21156,8 @@ GenTree* Compiler::gtNewSimdBinOpNode(
                             assert(!compIsaSupportedDebugOnly(InstructionSet_AVX512F_VL));
 
                             // Vector256<short> maskedProduct = Avx2.And(widenedProduct, vecCon1).AsInt16()
-                            GenTree* maskedProduct = gtNewSimdBinOpNode(GT_AND, widenedType, widenedProduct, vecCon1,
-                                                                        widenedSimdBaseJitType, widenedSimdSize);
+                            GenTree* maskedProduct    = gtNewSimdBinOpNode(GT_AND, widenedType, widenedProduct, vecCon1,
+                                                                           widenedSimdBaseJitType, widenedSimdSize);
                             GenTree* maskedProductDup = fgMakeMultiUse(&maskedProduct);
 
                             // Vector256<ulong> packedProduct = Avx2.PackUnsignedSaturate(maskedProduct,
@@ -21771,6 +21831,236 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s
     return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize);
 }
 
+#if defined(TARGET_XARCH)
+GenTree* Compiler::gtNewSimdCvtNode(var_types   type,
+                                    GenTree*    op1,
+                                    CorInfoType simdTargetBaseJitType,
+                                    CorInfoType simdSourceBaseJitType,
+                                    unsigned    simdSize)
+{
+    assert(varTypeIsSIMD(type));
+    assert(getSIMDTypeForSize(simdSize) == type);
+    assert(op1 != nullptr);
+    assert(op1->TypeIs(type));
+
+    var_types simdSourceBaseType = JitType2PreciseVarType(simdSourceBaseJitType);
+    var_types simdTargetBaseType = JitType2PreciseVarType(simdTargetBaseJitType);
+    assert(varTypeIsFloating(simdSourceBaseType));
+    assert(varTypeIsIntegral(simdTargetBaseType));
+
+    assert(IsBaselineSimdIsaSupportedDebugOnly());
+    assert(IsBaselineVector512IsaSupportedDebugOnly() ||
+           ((simdTargetBaseType == TYP_INT) && ((simdSize == 16 && compIsaSupportedDebugOnly(InstructionSet_SSE41)) ||
+                                                (simdSize == 32 && compIsaSupportedDebugOnly(InstructionSet_AVX)))));
+
+    // Generate intrinsic needed for conversion
+    NamedIntrinsic hwIntrinsicID = NI_Illegal;
+    switch (simdSourceBaseJitType)
+    {
+        case CORINFO_TYPE_FLOAT:
+        {
+            switch (simdTargetBaseJitType)
+            {
+                case CORINFO_TYPE_INT:
+                {
+                    switch (simdSize)
+                    {
+                        case 64:
+                        {
+                            hwIntrinsicID = NI_AVX512F_ConvertToVector512Int32WithTruncation;
+                            break;
+                        }
+                        case 32:
+                        {
+                            hwIntrinsicID = NI_AVX_ConvertToVector256Int32WithTruncation;
+                            break;
+                        }
+                        case 16:
+                        {
+                            hwIntrinsicID = NI_SSE2_ConvertToVector128Int32WithTruncation;
+                            break;
+                        }
+                        default:
+                            unreached();
+                    }
+                    break;
+                }
+                case CORINFO_TYPE_UINT:
+                {
+                    switch (simdSize)
+                    {
+                        case 64:
+                        {
+                            hwIntrinsicID = NI_AVX512F_ConvertToVector512UInt32WithTruncation;
+                            break;
+                        }
+                        case 32:
+                        {
+                            hwIntrinsicID = NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation;
+                            break;
+                        }
+                        case 16:
+                        {
+                            hwIntrinsicID = NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation;
+                            break;
+                        }
+                        default:
+                            unreached();
+                    }
+                    break;
+                }
+                default:
+                    unreached();
+            }
+            break;
+        }
+        case CORINFO_TYPE_DOUBLE:
+        {
+            switch (simdTargetBaseJitType)
+            {
+                case CORINFO_TYPE_LONG:
+                {
+                    switch (simdSize)
+                    {
+                        case 64:
+                        {
+                            hwIntrinsicID = NI_AVX512DQ_ConvertToVector512Int64WithTruncation;
+                            break;
+                        }
+                        case 32:
+                        {
+                            hwIntrinsicID = NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation;
+                            break;
+                        }
+                        case 16:
+                        {
+                            hwIntrinsicID = NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation;
+                            break;
+                        }
+                        default:
+                            unreached();
+                    }
+                    break;
+                }
+                case CORINFO_TYPE_ULONG:
+                {
+                    switch (simdSize)
+                    {
+                        case 64:
+                        {
+                            hwIntrinsicID = NI_AVX512DQ_ConvertToVector512UInt64WithTruncation;
+                            break;
+                        }
+                        case 32:
+                        {
+                            hwIntrinsicID = NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation;
+                            break;
+                        }
+                        case 16:
+                        {
+                            hwIntrinsicID = NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation;
+                            break;
+                        }
+                        default:
+                            unreached();
+                    }
+                    break;
+                }
+                default:
+                    unreached();
+            }
+            break;
+        }
+        default:
+            unreached();
+    }
+    assert(hwIntrinsicID != NI_Illegal);
+
+    GenTree* fixupVal;
+
+    if (IsBaselineVector512IsaSupportedOpportunistically())
+    {
+        /*Generate the control table for VFIXUPIMMSD/SS
+        - For conversion to unsigned
+                    // QNAN: 0b1000: Saturate to Zero
+                    // SNAN: 0b1000: Saturate to Zero
+                    // ZERO: 0b0000
+                    // +ONE: 0b0000
+                    // -INF: 0b1000: Saturate to Zero
+                    // +INF: 0b0000
+                    // -VAL: 0b1000: Saturate to Zero
+                    // +VAL: 0b0000
+        - For conversion to signed
+                    // QNAN: 0b1000: Saturate to Zero
+                    // SNAN: 0b1000: Saturate to Zero
+                    // ZERO: 0b0000
+                    // +ONE: 0b0000
+                    // -INF: 0b0000
+                    // +INF: 0b0000
+                    // -VAL: 0b0000
+                    // +VAL: 0b0000
+        */
+        int32_t  iconVal = varTypeIsUnsigned(simdTargetBaseType) ? 0x08080088 : 0x00000088;
+        GenTree* tblCon  = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(iconVal), simdTargetBaseJitType, simdSize);
+
+        // We need op1Clone to run fixup
+        GenTree* op1Clone = fgMakeMultiUse(&op1);
+
+        // run vfixupimmsd base on table and no flags reporting
+        fixupVal = gtNewSimdHWIntrinsicNode(type, op1, op1Clone, tblCon, gtNewIconNode(0), NI_AVX512F_Fixup,
+                                            simdSourceBaseJitType, simdSize);
+    }
+    else
+    {
+        // Zero out NaN values from the input.
+        // mask1 contains the output either 0xFFFFFFFF or 0.
+        // FixupVal zeros out any NaN values in the input by ANDing input with mask1.
+        GenTree* op1Clone1 = fgMakeMultiUse(&op1);
+        GenTree* op1Clone2 = fgMakeMultiUse(&op1);
+        GenTree* mask1     = gtNewSimdCmpOpNode(GT_EQ, type, op1, op1Clone1, simdSourceBaseJitType, simdSize);
+        fixupVal           = gtNewSimdBinOpNode(GT_AND, type, op1Clone2, mask1, simdSourceBaseJitType, simdSize);
+    }
+
+    if (varTypeIsSigned(simdTargetBaseType))
+    {
+        GenTree* maxVal;
+        GenTree* maxValDup;
+        if (varTypeIsLong(simdTargetBaseType))
+        {
+            int64_t actualMaxVal = INT64_MAX;
+            maxVal               = gtNewDconNode(static_cast<double>(actualMaxVal), simdSourceBaseType);
+            maxVal               = gtNewSimdCreateBroadcastNode(type, maxVal, simdSourceBaseJitType, simdSize);
+            maxValDup =
+                gtNewSimdCreateBroadcastNode(type, gtNewLconNode(actualMaxVal), simdTargetBaseJitType, simdSize);
+        }
+        else
+        {
+            ssize_t actualMaxVal = INT32_MAX;
+            maxVal               = gtNewDconNode(static_cast<double>(actualMaxVal), simdSourceBaseType);
+            maxVal               = gtNewSimdCreateBroadcastNode(type, maxVal, simdSourceBaseJitType, simdSize);
+            maxValDup =
+                gtNewSimdCreateBroadcastNode(type, gtNewIconNode(actualMaxVal), simdTargetBaseJitType, simdSize);
+        }
+
+        // we will be using the input value twice
+        GenTree* fixupValDup = fgMakeMultiUse(&fixupVal);
+
+        // compare with max value of integer/long
+        fixupVal = gtNewSimdCmpOpNode(GT_GE, type, fixupVal, maxVal, simdSourceBaseJitType, simdSize);
+
+        // cast it
+        GenTree* castNode = gtNewSimdHWIntrinsicNode(type, fixupValDup, hwIntrinsicID, simdSourceBaseJitType, simdSize);
+
+        // use the fixupVal mask with input value and max value to blend
+        return gtNewSimdCndSelNode(type, fixupVal, maxValDup, castNode, simdTargetBaseJitType, simdSize);
+    }
+    else
+    {
+        return gtNewSimdHWIntrinsicNode(type, fixupVal, hwIntrinsicID, simdSourceBaseJitType, simdSize);
+    }
+}
+#endif // TARGET_XARCH
+
 GenTree* Compiler::gtNewSimdCmpOpNode(
     genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
 {
@@ -22094,10 +22384,10 @@ GenTree* Compiler::gtNewSimdCmpOpNode(
 
                     op1 = gtNewSimdHWIntrinsicNode(type, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle,
                                                    CORINFO_TYPE_INT, simdSize);
-                    u = gtNewSimdHWIntrinsicNode(type, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle,
-                                                 CORINFO_TYPE_INT, simdSize);
-                    v = gtNewSimdHWIntrinsicNode(type, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle,
-                                                 CORINFO_TYPE_INT, simdSize);
+                    u   = gtNewSimdHWIntrinsicNode(type, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle,
+                                                   CORINFO_TYPE_INT, simdSize);
+                    v   = gtNewSimdHWIntrinsicNode(type, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle,
+                                                   CORINFO_TYPE_INT, simdSize);
 
                     // Validate we can't use AVX512F_VL_TernaryLogic here
                     assert(!compIsaSupportedDebugOnly(InstructionSet_AVX512F_VL));
@@ -22349,10 +22639,10 @@ GenTree* Compiler::gtNewSimdCmpOpNode(
 
                     op1 = gtNewSimdHWIntrinsicNode(type, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle,
                                                    CORINFO_TYPE_INT, simdSize);
-                    u = gtNewSimdHWIntrinsicNode(type, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle,
-                                                 CORINFO_TYPE_INT, simdSize);
-                    v = gtNewSimdHWIntrinsicNode(type, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle,
-                                                 CORINFO_TYPE_INT, simdSize);
+                    u   = gtNewSimdHWIntrinsicNode(type, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle,
+                                                   CORINFO_TYPE_INT, simdSize);
+                    v   = gtNewSimdHWIntrinsicNode(type, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle,
+                                                   CORINFO_TYPE_INT, simdSize);
 
                     // Validate we can't use AVX512F_VL_TernaryLogic here
                     assert(!compIsaSupportedDebugOnly(InstructionSet_AVX512F_VL));
@@ -25498,8 +25788,8 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si
         tmp       = fgMakeMultiUse(&op1);
         opShifted = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode(shiftVal, TYP_INT),
                                              NI_SSE2_ShiftRightLogical128BitLane, simdBaseJitType, simdSize);
-        op1      = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD16, opShifted, tmp, simdBaseJitType, simdSize);
-        shiftVal = shiftVal / 2;
+        op1       = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD16, opShifted, tmp, simdBaseJitType, simdSize);
+        shiftVal  = shiftVal / 2;
     }
 
     return gtNewSimdToScalarNode(type, op1, simdBaseJitType, simdSize);
@@ -26585,9 +26875,12 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
             case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
             case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
             case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
-
                 addr = Op(3);
                 break;
+
+            case NI_Sve_LoadVector:
+                addr = Op(2);
+                break;
 #endif // TARGET_ARM64
 
             default:
@@ -26868,15 +27161,103 @@ bool GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic() const
     return Oper == GT_AND || Oper == GT_OR || Oper == GT_XOR || Oper == GT_AND_NOT;
 }
 
+//------------------------------------------------------------------------
+// OperIsEmbRoundingEnabled: Is this HWIntrinsic a node with embedded rounding feature.
+//
+// Return Value:
+//    Whether "this" is a node with embedded rounding feature.
+//
+bool GenTreeHWIntrinsic::OperIsEmbRoundingEnabled() const
+{
+#if defined(TARGET_XARCH)
+    NamedIntrinsic intrinsicId = GetHWIntrinsicId();
+
+    if (!HWIntrinsicInfo::IsEmbRoundingCompatible(intrinsicId))
+    {
+        return false;
+    }
+
+    size_t numArgs = GetOperandCount();
+    switch (intrinsicId)
+    {
+        // these intrinsics only have the embedded rounding enabled implementation.
+        case NI_AVX512F_AddScalar:
+        case NI_AVX512F_DivideScalar:
+        case NI_AVX512F_MultiplyScalar:
+        case NI_AVX512F_SubtractScalar:
+        case NI_AVX512F_SqrtScalar:
+        {
+            return true;
+        }
+
+        case NI_AVX512F_FusedMultiplyAdd:
+        case NI_AVX512F_FusedMultiplyAddScalar:
+        case NI_AVX512F_FusedMultiplyAddNegated:
+        case NI_AVX512F_FusedMultiplyAddNegatedScalar:
+        case NI_AVX512F_FusedMultiplyAddSubtract:
+        case NI_AVX512F_FusedMultiplySubtract:
+        case NI_AVX512F_FusedMultiplySubtractAdd:
+        case NI_AVX512F_FusedMultiplySubtractNegated:
+        case NI_AVX512F_FusedMultiplySubtractNegatedScalar:
+        case NI_AVX512F_FusedMultiplySubtractScalar:
+        {
+            return numArgs == 4;
+        }
+
+        case NI_AVX512F_Add:
+        case NI_AVX512F_Divide:
+        case NI_AVX512F_Multiply:
+        case NI_AVX512F_Subtract:
+
+        case NI_AVX512F_Scale:
+        case NI_AVX512F_ScaleScalar:
+
+        case NI_AVX512F_ConvertScalarToVector128Single:
+#if defined(TARGET_AMD64)
+        case NI_AVX512F_X64_ConvertScalarToVector128Double:
+        case NI_AVX512F_X64_ConvertScalarToVector128Single:
+#endif // TARGET_AMD64
+        {
+            return numArgs == 3;
+        }
+
+        case NI_AVX512F_Sqrt:
+        case NI_AVX512F_ConvertToInt32:
+        case NI_AVX512F_ConvertToUInt32:
+        case NI_AVX512F_ConvertToVector256Int32:
+        case NI_AVX512F_ConvertToVector256Single:
+        case NI_AVX512F_ConvertToVector256UInt32:
+        case NI_AVX512F_ConvertToVector512Single:
+        case NI_AVX512F_ConvertToVector512UInt32:
+        case NI_AVX512F_ConvertToVector512Int32:
+#if defined(TARGET_AMD64)
+        case NI_AVX512F_X64_ConvertToInt64:
+        case NI_AVX512F_X64_ConvertToUInt64:
+#endif // TARGET_AMD64
+        case NI_AVX512DQ_ConvertToVector256Single:
+        case NI_AVX512DQ_ConvertToVector512Double:
+        case NI_AVX512DQ_ConvertToVector512Int64:
+        case NI_AVX512DQ_ConvertToVector512UInt64:
+        {
+            return numArgs == 2;
+        }
+
+        default:
+            unreached();
+    }
+#else  // !TARGET_XARCH
+    return false;
+#endif // TARGET_XARCH
+}
+
 //------------------------------------------------------------------------------
 // OperRequiresAsgFlag : Check whether the operation requires GTF_ASG flag regardless
 //                       of the children's flags.
 //
 bool GenTreeHWIntrinsic::OperRequiresAsgFlag() const
 {
-    // A MemoryStore operation is an assignment and barriers, while they
-    // don't technically do an assignment are modeled the same as
-    // GT_MEMORYBARRIER which tracks itself as requiring the GTF_ASG flag
+    // Barriers, while they don't technically do an assignment are modeled the same
+    // as GT_MEMORYBARRIER which tracks itself as requiring the GTF_ASG flag.
     return OperIsMemoryStoreOrBarrier();
 }
 
@@ -27058,7 +27439,7 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId)
             case NI_SSE2_MemoryFence:
             case NI_X86Serialize_Serialize:
             {
-                // Mark as an assignment and global reference, much as is done for GT_MEMORYBARRIER
+                // Mark as a store and global reference, much as is done for GT_MEMORYBARRIER
                 gtFlags |= (GTF_ASG | GTF_GLOB_REF);
                 break;
             }
@@ -27160,7 +27541,7 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet() const
             return GT_AND_NOT;
         }
 #endif
-        // TODO: Handle other cases
+            // TODO: Handle other cases
 
         default:
         {
@@ -27247,7 +27628,7 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler*                comp,
             assert(varTypeIsValidHfaType(hfaType));
 
             // Note that the retail build issues a warning about a potential divsion by zero without this "max",
-            unsigned elemSize = max(1, genTypeSize(hfaType));
+            unsigned elemSize = max(1u, genTypeSize(hfaType));
 
             // The size of this struct should be evenly divisible by elemSize
             assert((structSize % elemSize) == 0);
@@ -27266,6 +27647,14 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler*                comp,
         {
             assert(varTypeIsStruct(returnType));
 
+#ifdef SWIFT_SUPPORT
+            if (callConv == CorInfoCallConvExtension::Swift)
+            {
+                InitializeSwiftReturnRegs(comp, retClsHnd);
+                break;
+            }
+#endif
+
 #ifdef UNIX_AMD64_ABI
 
             SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
@@ -27299,7 +27688,7 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler*                comp,
 #else
             uint32_t floatFieldFlags = comp->info.compCompHnd->getRISCV64PassStructInRegisterFlags(retClsHnd);
 #endif
-            BYTE     gcPtrs[2]       = {TYPE_GC_NONE, TYPE_GC_NONE};
+            BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
             comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]);
 
             if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
@@ -27369,6 +27758,31 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler*                comp,
 #endif
 }
 
+#ifdef SWIFT_SUPPORT
+//---------------------------------------------------------------------------------------
+// InitializeSwiftReturnRegs:
+//   Initialize the Return Type Descriptor for a method that returns with the
+//   Swift calling convention.
+//
+// Parameters:
+//   comp   - Compiler instance
+//   clsHnd - Struct type being returned
+//
+void ReturnTypeDesc::InitializeSwiftReturnRegs(Compiler* comp, CORINFO_CLASS_HANDLE clsHnd)
+{
+    const CORINFO_SWIFT_LOWERING* lowering = comp->GetSwiftLowering(clsHnd);
+    assert(!lowering->byReference);
+
+    static_assert_no_msg(MAX_SWIFT_LOWERED_ELEMENTS <= MAX_RET_REG_COUNT);
+    assert(lowering->numLoweredElements <= MAX_RET_REG_COUNT);
+
+    for (size_t i = 0; i < lowering->numLoweredElements; i++)
+    {
+        m_regType[i] = JITtype2varType(lowering->loweredElements[i]);
+    }
+}
+#endif
+
 //---------------------------------------------------------------------------------------
 // InitializeLongReturnType:
 //    Initialize the Return Type Descriptor for a method that returns a TYP_LONG
@@ -27432,8 +27846,9 @@ void ReturnTypeDesc::InitializeReturnType(Compiler*                comp,
 // GetABIReturnReg:  Return i'th return register as per target ABI
 //
 // Arguments:
-//     idx   -   Index of the return register.
-//               The first return register has an index of 0 and so on.
+//     idx       -   Index of the return register.
+//                   The first return register has an index of 0 and so on.
+//     callConv  -   Associated calling convention
 //
 // Return Value:
 //     Returns i'th return register as per target ABI.
@@ -27442,13 +27857,44 @@ void ReturnTypeDesc::InitializeReturnType(Compiler*                comp,
 //     x86 and ARM return long in multiple registers.
 //     ARM and ARM64 return HFA struct in multiple registers.
 //
-regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const
+regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx, CorInfoCallConvExtension callConv) const
 {
     unsigned count = GetReturnRegCount();
     assert(idx < count);
 
     regNumber resultReg = REG_NA;
 
+#ifdef SWIFT_SUPPORT
+    if (callConv == CorInfoCallConvExtension::Swift)
+    {
+        static const regNumber swiftIntReturnRegs[]   = {REG_SWIFT_INTRET_ORDER};
+        static const regNumber swiftFloatReturnRegs[] = {REG_SWIFT_FLOATRET_ORDER};
+        assert((idx < ArrLen(swiftIntReturnRegs)) && (idx < ArrLen(swiftFloatReturnRegs)));
+        unsigned intRegIdx   = 0;
+        unsigned floatRegIdx = 0;
+        for (unsigned i = 0; i < idx; i++)
+        {
+            if (varTypeUsesIntReg(GetReturnRegType(i)))
+            {
+                intRegIdx++;
+            }
+            else
+            {
+                floatRegIdx++;
+            }
+        }
+
+        if (varTypeUsesIntReg(GetReturnRegType(idx)))
+        {
+            return swiftIntReturnRegs[intRegIdx];
+        }
+        else
+        {
+            return swiftFloatReturnRegs[floatRegIdx];
+        }
+    }
+#endif
+
 #ifdef UNIX_AMD64_ABI
     var_types regType0 = GetReturnRegType(0);
 
@@ -27596,7 +28042,7 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const
 // GetABIReturnRegs: get the mask of return registers as per target arch ABI.
 //
 // Arguments:
-//    None
+//    callConv - The calling convention
 //
 // Return Value:
 //    reg mask of return registers in which the return type is returned.
@@ -27606,14 +28052,14 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const
 //    of return registers and wants to know the set of return registers.
 //
 // static
-regMaskTP ReturnTypeDesc::GetABIReturnRegs() const
+regMaskTP ReturnTypeDesc::GetABIReturnRegs(CorInfoCallConvExtension callConv) const
 {
     regMaskTP resultMask = RBM_NONE;
 
     unsigned count = GetReturnRegCount();
     for (unsigned i = 0; i < count; ++i)
     {
-        resultMask |= genRegMask(GetABIReturnReg(i));
+        resultMask |= genRegMask(GetABIReturnReg(i, callConv));
     }
 
     return resultMask;
diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index 6394eb4c99c8..daaa0051e41a 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -158,10 +158,10 @@ inline ExceptionSetFlags& operator&=(ExceptionSetFlags& a, ExceptionSetFlags b)
 
 #ifdef DEBUG
 /*****************************************************************************
-*
-*  TargetHandleTypes are used to determine the type of handle present inside GenTreeIntCon node.
-*  The values are such that they don't overlap with helper's or user function's handle.
-*/
+ *
+ *  TargetHandleTypes are used to determine the type of handle present inside GenTreeIntCon node.
+ *  The values are such that they don't overlap with helper's or user function's handle.
+ */
 enum TargetHandleType : BYTE
 {
     THT_Unknown                   = 2,
@@ -196,23 +196,26 @@ inline AssertionIndex GetAssertionIndex(unsigned index)
 
 class AssertionInfo
 {
-    // true if the assertion holds on the bbNext edge instead of the bbTarget edge (for GT_JTRUE nodes)
-    unsigned short m_isNextEdgeAssertion : 1;
+    // true if the assertion holds on the false edge instead of the true edge (for GT_JTRUE nodes)
+    unsigned short m_assertionHoldsOnFalseEdge : 1;
     // 1-based index of the assertion
     unsigned short m_assertionIndex : 15;
 
-    AssertionInfo(bool isNextEdgeAssertion, AssertionIndex assertionIndex)
-        : m_isNextEdgeAssertion(isNextEdgeAssertion), m_assertionIndex(assertionIndex)
+    AssertionInfo(bool assertionHoldsOnFalseEdge, AssertionIndex assertionIndex)
+        : m_assertionHoldsOnFalseEdge(assertionHoldsOnFalseEdge)
+        , m_assertionIndex(assertionIndex)
     {
         assert(m_assertionIndex == assertionIndex);
     }
 
 public:
-    AssertionInfo() : AssertionInfo(false, 0)
+    AssertionInfo()
+        : AssertionInfo(false, 0)
     {
     }
 
-    AssertionInfo(AssertionIndex assertionIndex) : AssertionInfo(false, assertionIndex)
+    AssertionInfo(AssertionIndex assertionIndex)
+        : AssertionInfo(false, assertionIndex)
     {
     }
 
@@ -225,8 +228,8 @@ class AssertionInfo
 
     void Clear()
     {
-        m_isNextEdgeAssertion = 0;
-        m_assertionIndex      = NO_ASSERTION_INDEX;
+        m_assertionHoldsOnFalseEdge = 0;
+        m_assertionIndex            = NO_ASSERTION_INDEX;
     }
 
     bool HasAssertion() const
@@ -239,9 +242,9 @@ class AssertionInfo
         return m_assertionIndex;
     }
 
-    bool IsNextEdgeAssertion() const
+    bool AssertionHoldsOnFalseEdge() const
     {
-        return m_isNextEdgeAssertion;
+        return m_assertionHoldsOnFalseEdge;
     }
 };
 
@@ -316,7 +319,8 @@ class FieldSeqStore
     JitHashTable<CORINFO_FIELD_HANDLE, JitPtrKeyFuncs<CORINFO_FIELD_STRUCT_>, FieldSeq> m_map;
 
 public:
-    FieldSeqStore(CompAllocator alloc) : m_map(alloc)
+    FieldSeqStore(CompAllocator alloc)
+        : m_map(alloc)
     {
     }
 
@@ -333,13 +337,13 @@ struct Statement;
 /*****************************************************************************/
 
 // Forward declarations of the subtypes
-#define GTSTRUCT_0(fn, en) struct GenTree##fn;
-#define GTSTRUCT_1(fn, en) struct GenTree##fn;
-#define GTSTRUCT_2(fn, en, en2) struct GenTree##fn;
-#define GTSTRUCT_3(fn, en, en2, en3) struct GenTree##fn;
-#define GTSTRUCT_4(fn, en, en2, en3, en4) struct GenTree##fn;
-#define GTSTRUCT_N(fn, ...) struct GenTree##fn;
-#define GTSTRUCT_2_SPECIAL(fn, en, en2) GTSTRUCT_2(fn, en, en2)
+#define GTSTRUCT_0(fn, en)                   struct GenTree##fn;
+#define GTSTRUCT_1(fn, en)                   struct GenTree##fn;
+#define GTSTRUCT_2(fn, en, en2)              struct GenTree##fn;
+#define GTSTRUCT_3(fn, en, en2, en3)         struct GenTree##fn;
+#define GTSTRUCT_4(fn, en, en2, en3, en4)    struct GenTree##fn;
+#define GTSTRUCT_N(fn, ...)                  struct GenTree##fn;
+#define GTSTRUCT_2_SPECIAL(fn, en, en2)      GTSTRUCT_2(fn, en, en2)
 #define GTSTRUCT_3_SPECIAL(fn, en, en2, en3) GTSTRUCT_3(fn, en, en2, en3)
 #include "gtstructs.h"
 
@@ -361,8 +365,8 @@ enum GenTreeFlags : unsigned int
 //  expression node for one of these flags.
 //---------------------------------------------------------------------
 
-    GTF_ASG           = 0x00000001, // sub-expression contains an assignment
-    GTF_CALL          = 0x00000002, // sub-expression contains a  func. call
+    GTF_ASG           = 0x00000001, // sub-expression contains a store
+    GTF_CALL          = 0x00000002, // sub-expression contains a func. call
     GTF_EXCEPT        = 0x00000004, // sub-expression might throw an exception
     GTF_GLOB_REF      = 0x00000008, // sub-expression uses global variable(s)
     GTF_ORDER_SIDEEFF = 0x00000010, // sub-expression has a re-ordering side effect
@@ -509,9 +513,6 @@ enum GenTreeFlags : unsigned int
 
     GTF_RET_MERGED              = 0x80000000, // GT_RETURN -- This is a return generated during epilog merging.
 
-    GTF_QMARK_CAST_INSTOF       = 0x80000000, // GT_QMARK -- Is this a top (not nested) level qmark created for
-                                              //             castclass or instanceof?
-
     GTF_BOX_CLONED              = 0x40000000, // GT_BOX -- this box and its operand has been cloned, cannot assume it to be single-use anymore
     GTF_BOX_VALUE               = 0x80000000, // GT_BOX -- "box" is on a value type
 
@@ -594,14 +595,13 @@ inline GenTreeFlags& operator ^=(GenTreeFlags& a, GenTreeFlags b)
 }
 
 // Can any side-effects be observed externally, say by a caller method?
-// For assignments, only assignments to global memory can be observed
-// externally, whereas simple assignments to local variables can not.
+// For stores, only stores to global memory can be observed externally,
+// whereas simple stores to local variables can not.
 //
 // Be careful when using this inside a "try" protected region as the
-// order of assignments to local variables would need to be preserved
-// wrt side effects if the variables are alive on entry to the
-// "catch/finally" region. In such cases, even assignments to locals
-// will have to be restricted.
+// order of stores to local variables would need to be preserved wrt
+// side effects if the variables are alive on entry to the handler
+// region. In such cases, even stores to locals will have to be restricted.
 #define GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS(flags) \
     (((flags) & (GTF_CALL | GTF_EXCEPT)) || (((flags) & (GTF_ASG | GTF_GLOB_REF)) == (GTF_ASG | GTF_GLOB_REF)))
 
@@ -695,11 +695,11 @@ struct GenTree
         return *As##fn();                                                                                              \
     }
 
-#define GTSTRUCT_1(fn, en) GTSTRUCT_N(fn, en)
-#define GTSTRUCT_2(fn, en, en2) GTSTRUCT_N(fn, en, en2)
-#define GTSTRUCT_3(fn, en, en2, en3) GTSTRUCT_N(fn, en, en2, en3)
-#define GTSTRUCT_4(fn, en, en2, en3, en4) GTSTRUCT_N(fn, en, en2, en3, en4)
-#define GTSTRUCT_2_SPECIAL(fn, en, en2) GTSTRUCT_2(fn, en, en2)
+#define GTSTRUCT_1(fn, en)                   GTSTRUCT_N(fn, en)
+#define GTSTRUCT_2(fn, en, en2)              GTSTRUCT_N(fn, en, en2)
+#define GTSTRUCT_3(fn, en, en2, en3)         GTSTRUCT_N(fn, en, en2, en3)
+#define GTSTRUCT_4(fn, en, en2, en3, en4)    GTSTRUCT_N(fn, en, en2, en3, en4)
+#define GTSTRUCT_2_SPECIAL(fn, en, en2)      GTSTRUCT_2(fn, en, en2)
 #define GTSTRUCT_3_SPECIAL(fn, en, en2, en3) GTSTRUCT_3(fn, en, en2, en3)
 
 #include "gtstructs.h"
@@ -724,11 +724,11 @@ struct GenTree
 
 #define NO_CSE (0)
 
-#define IS_CSE_INDEX(x) ((x) != 0)
-#define IS_CSE_USE(x) ((x) > 0)
-#define IS_CSE_DEF(x) ((x) < 0)
+#define IS_CSE_INDEX(x)  ((x) != 0)
+#define IS_CSE_USE(x)    ((x) > 0)
+#define IS_CSE_DEF(x)    ((x) < 0)
 #define GET_CSE_INDEX(x) (((x) > 0) ? x : -(x))
-#define TO_CSE_DEF(x) (-(x))
+#define TO_CSE_DEF(x)    (-(x))
 
     signed char gtCSEnum; // 0 or the CSE index (negated if def)
                           // valid only for CSE expressions
@@ -771,7 +771,7 @@ struct GenTree
     bool gtCostsInitialized;
 #endif // DEBUG
 
-#define MAX_COST UCHAR_MAX
+#define MAX_COST    UCHAR_MAX
 #define IND_COST_EX 3 // execution cost for an indirection
 
     unsigned char GetCostEx() const
@@ -827,7 +827,6 @@ struct GenTree
     //
     // Register or register pair number of the node.
     //
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
 
@@ -859,7 +858,6 @@ struct GenTree
 public:
     // The register number is stored in a small format (8 bits), but the getters return and the setters take
     // a full-size (unsigned) format, to localize the casts here.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     bool canBeContained() const;
@@ -912,16 +910,6 @@ struct GenTree
                 isUsedFromSpillTemp());
     }
 
-    bool isLclVarUsedFromMemory() const
-    {
-        return (OperGet() == GT_LCL_VAR) && (isContained() || isUsedFromSpillTemp());
-    }
-
-    bool isLclFldUsedFromMemory() const
-    {
-        return isLclField() && (isContained() || isUsedFromSpillTemp());
-    }
-
     bool isUsedFromReg() const
     {
         return !isContained() && !isUsedFromSpillTemp();
@@ -972,7 +960,7 @@ struct GenTree
 
     regMaskSmall gtRsvdRegs; // set of fixed trashed  registers
 
-    unsigned AvailableTempRegCount(regMaskTP mask = (regMaskTP)-1) const;
+    unsigned  AvailableTempRegCount(regMaskTP mask = (regMaskTP)-1) const;
     regNumber GetSingleTempReg(regMaskTP mask = (regMaskTP)-1);
     regNumber ExtractTempReg(regMaskTP mask = (regMaskTP)-1);
 
@@ -1221,7 +1209,7 @@ struct GenTree
 
     static bool OperIsStoreBlk(genTreeOps gtOper)
     {
-        return StaticOperIs(gtOper, GT_STORE_BLK, GT_STORE_DYN_BLK);
+        return StaticOperIs(gtOper, GT_STORE_BLK);
     }
 
     bool OperIsStoreBlk() const
@@ -1320,7 +1308,6 @@ struct GenTree
     {
         // Note that only GT_EQ to GT_GT are HIR nodes, GT_TEST and GT_BITTEST
         // nodes are backend nodes only.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef TARGET_XARCH
         static_assert_no_msg(AreContiguous(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT, GT_TEST_EQ, GT_TEST_NE,
                                            GT_BITTEST_EQ, GT_BITTEST_NE));
@@ -1426,18 +1413,6 @@ struct GenTree
         return OperIsMul(gtOper);
     }
 
-    bool OperIsArithmetic() const
-    {
-        genTreeOps op = OperGet();
-        return op == GT_ADD || op == GT_SUB || op == GT_MUL || op == GT_DIV || op == GT_MOD
-
-               || op == GT_UDIV || op == GT_UMOD
-
-               || op == GT_OR || op == GT_XOR || op == GT_AND
-
-               || OperIsShiftOrRotate(op);
-    }
-
 #ifdef TARGET_XARCH
     static bool OperIsRMWMemOp(genTreeOps gtOper)
     {
@@ -1536,7 +1511,7 @@ struct GenTree
 #if !defined(TARGET_64BIT)
                 || (gtOper == GT_ADD_HI) || (gtOper == GT_SUB_HI)
 #endif
-                    );
+        );
     }
 
     bool OperMayOverflow() const
@@ -1550,7 +1525,7 @@ struct GenTree
     static bool OperIsIndir(genTreeOps gtOper)
     {
         static_assert_no_msg(AreContiguous(GT_LOCKADD, GT_XAND, GT_XORR, GT_XADD, GT_XCHG, GT_CMPXCHG, GT_IND,
-                                           GT_STOREIND, GT_BLK, GT_STORE_BLK, GT_STORE_DYN_BLK, GT_NULLCHECK));
+                                           GT_STOREIND, GT_BLK, GT_STORE_BLK, GT_NULLCHECK));
         return (GT_LOCKADD <= gtOper) && (gtOper <= GT_NULLCHECK);
     }
 
@@ -1781,9 +1756,9 @@ struct GenTree
         return (DebugOperKind() & DBK_NOTLIR) == 0;
     }
 
-    bool OperSupportsReverseOpEvalOrder(Compiler* comp) const;
+    bool        OperSupportsReverseOpEvalOrder(Compiler* comp) const;
     static bool RequiresNonNullOp2(genTreeOps oper);
-    bool IsValidCallArgument();
+    bool        IsValidCallArgument();
 #endif // DEBUG
 
     inline bool IsIntegralConst(ssize_t constVal) const;
@@ -1884,7 +1859,7 @@ struct GenTree
     bool OperRequiresCallFlag(Compiler* comp) const;
 
     ExceptionSetFlags OperExceptions(Compiler* comp);
-    bool OperMayThrow(Compiler* comp);
+    bool              OperMayThrow(Compiler* comp);
 
     bool OperRequiresGlobRefFlag(Compiler* comp) const;
 
@@ -1921,7 +1896,7 @@ struct GenTree
 
     static bool Compare(GenTree* op1, GenTree* op2, bool swapOK = false);
 
-//---------------------------------------------------------------------
+    //---------------------------------------------------------------------
 
 #if defined(DEBUG) || CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_MEM_ALLOC ||     \
     NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS || DUMP_FLOWGRAPHS
@@ -1965,8 +1940,8 @@ struct GenTree
     }
 
     template <typename T>
-    void BashToConst(T value, var_types type = TYP_UNDEF);
-    void BashToZeroConst(var_types type);
+    void           BashToConst(T value, var_types type = TYP_UNDEF);
+    void           BashToZeroConst(var_types type);
     GenTreeLclVar* BashToLclVar(Compiler* comp, unsigned lclNum);
 
 #if NODEBASH_STATS
@@ -2004,10 +1979,8 @@ struct GenTree
                       unsigned*             pSize     = nullptr);
 
     GenTreeLclVarCommon* IsImplicitByrefParameterValuePreMorph(Compiler* compiler);
-    GenTreeLclVar* IsImplicitByrefParameterValuePostMorph(Compiler* compiler, GenTree** addr);
+    GenTreeLclVar*       IsImplicitByrefParameterValuePostMorph(Compiler* compiler, GenTree** addr);
 
-    // Determine whether this is an assignment tree of the form X = X (op) Y,
-    // where Y is an arbitrary tree, and X is a lclVar.
     unsigned IsLclVarUpdateTree(GenTree** otherTree, genTreeOps* updateOper);
 
     // Determine whether this tree is a basic block profile count update.
@@ -2022,7 +1995,6 @@ struct GenTree
     // These are only used for dumping.
     // The GetRegNum() is only valid in LIR, but the dumping methods are not easily
     // modified to check this.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     bool InReg() const
@@ -2236,6 +2208,16 @@ struct GenTree
         return (gtOper == GT_CNS_INT) ? (gtFlags & GTF_ICON_HDL_MASK) : GTF_EMPTY;
     }
 
+    bool IsTlsIconHandle()
+    {
+        if (IsIconHandle())
+        {
+            GenTreeFlags tlsFlags = (GTF_ICON_TLSGD_OFFSET | GTF_ICON_TLS_HDL);
+            return ((gtFlags & tlsFlags) == tlsFlags);
+        }
+        return false;
+    }
+
     // Mark this node as no longer being a handle; clear its GTF_ICON_*_HDL bits.
     void ClearIconHandleMask()
     {
@@ -2244,21 +2226,23 @@ struct GenTree
     }
 
 #if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
+
     bool IsEmbMaskOp()
     {
-        bool result = (gtFlags & GTF_HW_EM_OP) != 0;
-        assert(!result || (gtOper == GT_HWINTRINSIC));
-        return result;
+        return OperIsHWIntrinsic() && ((gtFlags & GTF_HW_EM_OP) != 0);
     }
 
     void MakeEmbMaskOp()
     {
+        assert(OperIsHWIntrinsic());
         assert(!IsEmbMaskOp());
         gtFlags |= GTF_HW_EM_OP;
     }
 
 #endif // TARGET_XARCH && FEATURE_HW_INTRINSICS
 
+    static bool HandleKindDataIsInvariant(GenTreeFlags flags);
+
     bool IsCall() const
     {
         return OperGet() == GT_CALL;
@@ -2271,7 +2255,7 @@ struct GenTree
     bool gtRequestSetFlags();
 
 #ifdef DEBUG
-    static int gtDispFlags(GenTreeFlags flags, GenTreeDebugFlags debugFlags);
+    static int         gtDispFlags(GenTreeFlags flags, GenTreeDebugFlags debugFlags);
     static const char* gtGetHandleKindString(GenTreeFlags flags);
 #endif
 
@@ -2390,7 +2374,7 @@ struct GenTree
     typedef void* VtablePtr;
 
     VtablePtr GetVtableForOper(genTreeOps oper);
-    void SetVtableForOper(genTreeOps oper);
+    void      SetVtableForOper(genTreeOps oper);
 
     static VtablePtr s_vtablesForOpers[GT_COUNT];
     static VtablePtr s_vtableForOp;
@@ -2424,7 +2408,9 @@ struct GenTreePhi final : public GenTree
         Use*     m_next;
 
     public:
-        Use(GenTree* node, Use* next = nullptr) : m_node(node), m_next(next)
+        Use(GenTree* node, Use* next = nullptr)
+            : m_node(node)
+            , m_next(next)
         {
             assert(node->OperIs(GT_PHI_ARG));
         }
@@ -2462,7 +2448,8 @@ struct GenTreePhi final : public GenTree
         Use* m_use;
 
     public:
-        UseIterator(Use* use) : m_use(use)
+        UseIterator(Use* use)
+            : m_use(use)
         {
         }
 
@@ -2498,7 +2485,8 @@ struct GenTreePhi final : public GenTree
         Use* m_uses;
 
     public:
-        UseList(Use* uses) : m_uses(uses)
+        UseList(Use* uses)
+            : m_uses(uses)
         {
         }
 
@@ -2515,7 +2503,9 @@ struct GenTreePhi final : public GenTree
 
     Use* gtUses;
 
-    GenTreePhi(var_types type) : GenTree(GT_PHI, type), gtUses(nullptr)
+    GenTreePhi(var_types type)
+        : GenTree(GT_PHI, type)
+        , gtUses(nullptr)
     {
     }
 
@@ -2564,7 +2554,8 @@ struct GenTreePhi final : public GenTree
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreePhi() : GenTree()
+    GenTreePhi()
+        : GenTree()
     {
     }
 #endif
@@ -2583,7 +2574,10 @@ struct GenTreeFieldList : public GenTree
 
     public:
         Use(GenTree* node, unsigned offset, var_types type)
-            : m_node(node), m_next(nullptr), m_offset(static_cast<uint16_t>(offset)), m_type(type)
+            : m_node(node)
+            , m_next(nullptr)
+            , m_offset(static_cast<uint16_t>(offset))
+            , m_type(type)
         {
             // We can save space on 32 bit hosts by storing the offset as uint16_t. Struct promotion
             // only accepts structs which are much smaller than that - 128 bytes = max 4 fields * max
@@ -2643,7 +2637,8 @@ struct GenTreeFieldList : public GenTree
         Use* use;
 
     public:
-        UseIterator(Use* use) : use(use)
+        UseIterator(Use* use)
+            : use(use)
         {
         }
 
@@ -2679,7 +2674,9 @@ struct GenTreeFieldList : public GenTree
         Use* m_tail;
 
     public:
-        UseList() : m_head(nullptr), m_tail(nullptr)
+        UseList()
+            : m_head(nullptr)
+            , m_tail(nullptr)
         {
         }
 
@@ -2759,7 +2756,8 @@ struct GenTreeFieldList : public GenTree
     UseList m_uses;
 
 public:
-    GenTreeFieldList() : GenTree(GT_FIELD_LIST, TYP_STRUCT)
+    GenTreeFieldList()
+        : GenTree(GT_FIELD_LIST, TYP_STRUCT)
     {
         SetContained();
     }
@@ -2858,18 +2856,17 @@ class GenTreeUseEdgeIterator final
     // Advance functions for special nodes
     void AdvanceCmpXchg();
     void AdvanceArrElem();
-    void AdvanceStoreDynBlk();
     void AdvanceFieldList();
     void AdvancePhi();
     void AdvanceConditional();
 
     template <bool ReverseOperands>
-    void           AdvanceBinOp();
-    void           SetEntryStateForBinOp();
+    void AdvanceBinOp();
+    void SetEntryStateForBinOp();
 
     // The advance function for call nodes
     template <int state>
-    void          AdvanceCall();
+    void AdvanceCall();
 
 #if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS)
     void AdvanceMultiOp();
@@ -2928,12 +2925,14 @@ class GenTreeOperandIterator final
 
     GenTreeUseEdgeIterator m_useEdges;
 
-    GenTreeOperandIterator(GenTree* node) : m_useEdges(node)
+    GenTreeOperandIterator(GenTree* node)
+        : m_useEdges(node)
     {
     }
 
 public:
-    GenTreeOperandIterator() : m_useEdges()
+    GenTreeOperandIterator()
+        : m_useEdges()
     {
     }
 
@@ -2976,12 +2975,14 @@ struct GenTreeUnOp : public GenTree
 
 protected:
     GenTreeUnOp(genTreeOps oper, var_types type DEBUGARG(bool largeNode = false))
-        : GenTree(oper, type DEBUGARG(largeNode)), gtOp1(nullptr)
+        : GenTree(oper, type DEBUGARG(largeNode))
+        , gtOp1(nullptr)
     {
     }
 
     GenTreeUnOp(genTreeOps oper, var_types type, GenTree* op1 DEBUGARG(bool largeNode = false))
-        : GenTree(oper, type DEBUGARG(largeNode)), gtOp1(op1)
+        : GenTree(oper, type DEBUGARG(largeNode))
+        , gtOp1(op1)
     {
         assert(op1 != nullptr || NullOp1Legal());
         if (op1 != nullptr)
@@ -2991,7 +2992,9 @@ struct GenTreeUnOp : public GenTree
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeUnOp() : GenTree(), gtOp1(nullptr)
+    GenTreeUnOp()
+        : GenTree()
+        , gtOp1(nullptr)
     {
     }
 #endif
@@ -3002,7 +3005,8 @@ struct GenTreeOp : public GenTreeUnOp
     GenTree* gtOp2;
 
     GenTreeOp(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2 DEBUGARG(bool largeNode = false))
-        : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode)), gtOp2(op2)
+        : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
+        , gtOp2(op2)
     {
         // comparisons are always integral types
         assert(!GenTree::OperIsCompare(oper) || varTypeIsIntegral(type));
@@ -3021,7 +3025,8 @@ struct GenTreeOp : public GenTreeUnOp
     // A small set of types are unary operators with optional arguments.  We use
     // this constructor to build those.
     GenTreeOp(genTreeOps oper, var_types type DEBUGARG(bool largeNode = false))
-        : GenTreeUnOp(oper, type DEBUGARG(largeNode)), gtOp2(nullptr)
+        : GenTreeUnOp(oper, type DEBUGARG(largeNode))
+        , gtOp2(nullptr)
     {
         // Unary operators with optional arguments:
         assert(oper == GT_RETURN || oper == GT_RETFILT || OperIsBlk(oper));
@@ -3043,7 +3048,9 @@ struct GenTreeOp : public GenTreeUnOp
 #endif
 
 #if DEBUGGABLE_GENTREE
-    GenTreeOp() : GenTreeUnOp(), gtOp2(nullptr)
+    GenTreeOp()
+        : GenTreeUnOp()
+        , gtOp2(nullptr)
     {
     }
 #endif
@@ -3053,11 +3060,14 @@ struct GenTreeVal : public GenTree
 {
     size_t gtVal1;
 
-    GenTreeVal(genTreeOps oper, var_types type, ssize_t val) : GenTree(oper, type), gtVal1(val)
+    GenTreeVal(genTreeOps oper, var_types type, ssize_t val)
+        : GenTree(oper, type)
+        , gtVal1(val)
     {
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeVal() : GenTree()
+    GenTreeVal()
+        : GenTree()
     {
     }
 #endif
@@ -3065,12 +3075,12 @@ struct GenTreeVal : public GenTree
 
 struct GenTreeIntConCommon : public GenTree
 {
-    inline INT64 LngValue() const;
-    inline void SetLngValue(INT64 val);
+    inline INT64   LngValue() const;
+    inline void    SetLngValue(INT64 val);
     inline ssize_t IconValue() const;
-    inline void SetIconValue(ssize_t val);
-    inline INT64 IntegralValue() const;
-    inline void SetIntegralValue(int64_t value);
+    inline void    SetIconValue(ssize_t val);
+    inline INT64   IntegralValue() const;
+    inline void    SetIntegralValue(int64_t value);
 
     template <typename T>
     inline void SetValueTruncating(T value);
@@ -3113,7 +3123,8 @@ struct GenTreeIntConCommon : public GenTree
 #endif
 
 #if DEBUGGABLE_GENTREE
-    GenTreeIntConCommon() : GenTree()
+    GenTreeIntConCommon()
+        : GenTree()
     {
     }
 #endif
@@ -3126,11 +3137,14 @@ struct GenTreePhysReg : public GenTree
     // GetRegNum() indicates the destination (and can be changed)
     // whereas reg indicates the source
     regNumber gtSrcReg;
-    GenTreePhysReg(regNumber r, var_types type = TYP_I_IMPL) : GenTree(GT_PHYSREG, type), gtSrcReg(r)
+    GenTreePhysReg(regNumber r, var_types type = TYP_I_IMPL)
+        : GenTree(GT_PHYSREG, type)
+        , gtSrcReg(r)
     {
     }
 #if DEBUGGABLE_GENTREE
-    GenTreePhysReg() : GenTree()
+    GenTreePhysReg()
+        : GenTree()
     {
     }
 #endif
@@ -3189,7 +3203,8 @@ struct GenTreeIntCon : public GenTreeIntConCommon
     void FixupInitBlkValue(var_types type);
 
 #if DEBUGGABLE_GENTREE
-    GenTreeIntCon() : GenTreeIntConCommon()
+    GenTreeIntCon()
+        : GenTreeIntConCommon()
     {
     }
 #endif
@@ -3210,12 +3225,14 @@ struct GenTreeLngCon : public GenTreeIntConCommon
         return (INT32)(gtLconVal >> 32);
     }
 
-    GenTreeLngCon(INT64 val) : GenTreeIntConCommon(GT_CNS_NATIVELONG, TYP_LONG)
+    GenTreeLngCon(INT64 val)
+        : GenTreeIntConCommon(GT_CNS_NATIVELONG, TYP_LONG)
     {
         SetLngValue(val);
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeLngCon() : GenTreeIntConCommon()
+    GenTreeLngCon()
+        : GenTreeIntConCommon()
     {
     }
 #endif
@@ -3346,13 +3363,15 @@ struct GenTreeDblCon : public GenTree
         return (bits == otherBits);
     }
 
-    GenTreeDblCon(double val, var_types type = TYP_DOUBLE) : GenTree(GT_CNS_DBL, type)
+    GenTreeDblCon(double val, var_types type = TYP_DOUBLE)
+        : GenTree(GT_CNS_DBL, type)
     {
         assert(varTypeIsFloating(type));
         SetDconValue(val);
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeDblCon() : GenTree()
+    GenTreeDblCon()
+        : GenTree()
     {
     }
 #endif
@@ -3376,11 +3395,14 @@ struct GenTreeStrCon : public GenTree
     // Because this node can come from an inlined method we need to
     // have the scope handle, since it will become a helper call.
     GenTreeStrCon(unsigned sconCPX, CORINFO_MODULE_HANDLE mod DEBUGARG(bool largeNode = false))
-        : GenTree(GT_CNS_STR, TYP_REF DEBUGARG(largeNode)), gtSconCPX(sconCPX), gtScpHnd(mod)
+        : GenTree(GT_CNS_STR, TYP_REF DEBUGARG(largeNode))
+        , gtSconCPX(sconCPX)
+        , gtScpHnd(mod)
     {
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeStrCon() : GenTree()
+    GenTreeStrCon()
+        : GenTree()
     {
     }
 #endif
@@ -3423,12 +3445,14 @@ class SsaNumInfo final
 
     int m_value;
 
-    SsaNumInfo(int value) : m_value(value)
+    SsaNumInfo(int value)
+        : m_value(value)
     {
     }
 
 public:
-    SsaNumInfo() : m_value(SsaConfig::RESERVED_SSA_NUM)
+    SsaNumInfo()
+        : m_value(SsaConfig::RESERVED_SSA_NUM)
     {
     }
 
@@ -3561,7 +3585,8 @@ struct GenTreeLclVarCommon : public GenTreeUnOp
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeLclVarCommon() : GenTreeUnOp()
+    GenTreeLclVarCommon()
+        : GenTreeUnOp()
     {
     }
 #endif
@@ -3700,7 +3725,7 @@ struct GenTreeLclVar : public GenTreeLclVarCommon
     }
 
     unsigned int GetFieldCount(Compiler* compiler) const;
-    var_types GetFieldTypeByIndex(Compiler* compiler, unsigned idx);
+    var_types    GetFieldTypeByIndex(Compiler* compiler, unsigned idx);
 
     bool IsNeverNegative(Compiler* comp) const;
 
@@ -3739,8 +3764,8 @@ struct GenTreeLclVar : public GenTreeLclVarCommon
     }
 #endif
 
-    GenTreeLclVar(genTreeOps oper,
-                  var_types  type,
+    GenTreeLclVar(genTreeOps      oper,
+                  var_types       type,
                   unsigned lclNum DEBUGARG(IL_OFFSET ilOffs = BAD_IL_OFFSET) DEBUGARG(bool largeNode = false))
         : GenTreeLclVarCommon(oper, type, lclNum DEBUGARG(largeNode)) DEBUGARG(gtLclILoffs(ilOffs))
     {
@@ -3753,7 +3778,8 @@ struct GenTreeLclVar : public GenTreeLclVarCommon
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeLclVar() : GenTreeLclVarCommon()
+    GenTreeLclVar()
+        : GenTreeLclVarCommon()
     {
     }
 #endif
@@ -3769,14 +3795,16 @@ struct GenTreeLclFld : public GenTreeLclVarCommon
 
 public:
     GenTreeLclFld(genTreeOps oper, var_types type, unsigned lclNum, unsigned lclOffs, ClassLayout* layout = nullptr)
-        : GenTreeLclVarCommon(oper, type, lclNum), m_lclOffs(static_cast<uint16_t>(lclOffs))
+        : GenTreeLclVarCommon(oper, type, lclNum)
+        , m_lclOffs(static_cast<uint16_t>(lclOffs))
     {
         assert(lclOffs <= UINT16_MAX);
         SetLayout(layout);
     }
 
     GenTreeLclFld(var_types type, unsigned lclNum, unsigned lclOffs, GenTree* data, ClassLayout* layout)
-        : GenTreeLclVarCommon(GT_STORE_LCL_FLD, type, lclNum, data), m_lclOffs(static_cast<uint16_t>(lclOffs))
+        : GenTreeLclVarCommon(GT_STORE_LCL_FLD, type, lclNum, data)
+        , m_lclOffs(static_cast<uint16_t>(lclOffs))
     {
         assert(lclOffs <= UINT16_MAX);
         SetLayout(layout);
@@ -3811,7 +3839,8 @@ struct GenTreeLclFld : public GenTreeLclVarCommon
 #endif // TARGET_ARM
 
 #if DEBUGGABLE_GENTREE
-    GenTreeLclFld() : GenTreeLclVarCommon()
+    GenTreeLclFld()
+        : GenTreeLclVarCommon()
     {
     }
 #endif
@@ -3853,7 +3882,8 @@ struct GenTreeCast : public GenTreeOp
     var_types gtCastType;
 
     GenTreeCast(var_types type, GenTree* op, bool fromUnsigned, var_types castType DEBUGARG(bool largeNode = false))
-        : GenTreeOp(GT_CAST, type, op, nullptr DEBUGARG(largeNode)), gtCastType(castType)
+        : GenTreeOp(GT_CAST, type, op, nullptr DEBUGARG(largeNode))
+        , gtCastType(castType)
     {
         // We do not allow casts from floating point types to be treated as from
         // unsigned to avoid bugs related to wrong GTF_UNSIGNED in case the
@@ -3863,7 +3893,8 @@ struct GenTreeCast : public GenTreeOp
         gtFlags |= fromUnsigned ? GTF_UNSIGNED : GTF_EMPTY;
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeCast() : GenTreeOp()
+    GenTreeCast()
+        : GenTreeOp()
     {
     }
 #endif
@@ -3896,8 +3927,8 @@ struct GenTreeBox : public GenTreeUnOp
     {
         return gtOp1;
     }
-    // This is the statement that contains the assignment tree when the node is an inlined GT_BOX on a value
-    // type
+    // This is the statement that contains the definition tree when the node is an inlined GT_BOX
+    // on a value type
     Statement* gtDefStmtWhenInlinedBoxValue;
     // And this is the statement that copies from the value being boxed to the box payload
     Statement* gtCopyStmtWhenInlinedBoxValue;
@@ -3912,7 +3943,8 @@ struct GenTreeBox : public GenTreeUnOp
     {
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeBox() : GenTreeUnOp()
+    GenTreeBox()
+        : GenTreeUnOp()
     {
     }
 #endif
@@ -3956,7 +3988,8 @@ struct GenTreeFieldAddr : public GenTreeUnOp
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeFieldAddr() : GenTreeUnOp()
+    GenTreeFieldAddr()
+        : GenTreeUnOp()
     {
     }
 #endif
@@ -4026,12 +4059,14 @@ struct GenTreeColon : public GenTreeOp
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeColon() : GenTreeOp()
+    GenTreeColon()
+        : GenTreeOp()
     {
     }
 #endif
 
-    GenTreeColon(var_types typ, GenTree* thenNode, GenTree* elseNode) : GenTreeOp(GT_COLON, typ, elseNode, thenNode)
+    GenTreeColon(var_types typ, GenTree* thenNode, GenTree* elseNode)
+        : GenTreeOp(GT_COLON, typ, elseNode, thenNode)
     {
     }
 };
@@ -4044,13 +4079,15 @@ struct GenTreeConditional : public GenTreeOp
 
     GenTreeConditional(
         genTreeOps oper, var_types type, GenTree* cond, GenTree* op1, GenTree* op2 DEBUGARG(bool largeNode = false))
-        : GenTreeOp(oper, type, op1, op2 DEBUGARG(largeNode)), gtCond(cond)
+        : GenTreeOp(oper, type, op1, op2 DEBUGARG(largeNode))
+        , gtCond(cond)
     {
         assert(cond != nullptr);
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeConditional() : GenTreeOp()
+    GenTreeConditional()
+        : GenTreeOp()
     {
     }
 #endif
@@ -4199,6 +4236,8 @@ struct ReturnTypeDesc
     bool m_inited;
 #endif
 
+    void InitializeSwiftReturnRegs(Compiler* comp, CORINFO_CLASS_HANDLE retClsHnd);
+
 public:
     ReturnTypeDesc()
     {
@@ -4320,16 +4359,16 @@ struct ReturnTypeDesc
     }
 
     // Get i'th ABI return register
-    regNumber GetABIReturnReg(unsigned idx) const;
+    regNumber GetABIReturnReg(unsigned idx, CorInfoCallConvExtension callConv) const;
 
     // Get reg mask of ABI return registers
-    regMaskTP GetABIReturnRegs() const;
+    regMaskTP GetABIReturnRegs(CorInfoCallConvExtension callConv) const;
 };
 
 class TailCallSiteInfo
 {
     bool                   m_isCallvirt : 1;
-    bool                   m_isCalli : 1;
+    bool                   m_isCalli    : 1;
     CORINFO_SIG_INFO       m_sig;
     CORINFO_RESOLVED_TOKEN m_token;
 
@@ -4394,7 +4433,7 @@ enum class CFGCallKind
 
 class CallArgs;
 
-enum class WellKnownArg
+enum class WellKnownArg : unsigned
 {
     None,
     ThisPointer,
@@ -4412,6 +4451,8 @@ enum class WellKnownArg
     R2RIndirectionCell,
     ValidateIndirectCallTarget,
     DispatchIndirectCallTarget,
+    SwiftError,
+    SwiftSelf,
 };
 
 #ifdef DEBUG
@@ -4425,10 +4466,6 @@ struct CallArgABIInformation
         , ByteOffset(0)
         , ByteSize(0)
         , ByteAlignment(0)
-#ifdef UNIX_AMD64_ABI
-        , StructIntRegs(0)
-        , StructFloatRegs(0)
-#endif
 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
         , StructFloatFieldType()
 #endif
@@ -4467,8 +4504,6 @@ struct CallArgABIInformation
     // Unix amd64 will split floating point types and integer types in structs
     // between floating point and general purpose registers. Keep track of that
     // information so we do not need to recompute it later.
-    unsigned                                            StructIntRegs;
-    unsigned                                            StructFloatRegs;
     SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR StructDesc;
 #endif // UNIX_AMD64_ABI
 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
@@ -4526,7 +4561,7 @@ struct CallArgABIInformation
     bool      IsHfaArg() const;
     bool      IsHfaRegArg() const;
     var_types GetHfaType() const;
-    void SetHfaType(var_types type, unsigned hfaSlots);
+    void      SetHfaType(var_types type, unsigned hfaSlots);
 
     regNumber GetRegNum() const
     {
@@ -4580,7 +4615,7 @@ struct CallArgABIInformation
     bool IsMismatchedArgType() const
     {
 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
-        return isValidIntArgReg(GetRegNum()) && varTypeUsesFloatReg(ArgType);
+        return genIsValidIntReg(GetRegNum()) && varTypeUsesFloatReg(ArgType);
 #else
         return false;
 #endif // TARGET_LOONGARCH64 || TARGET_RISCV64
@@ -4723,7 +4758,8 @@ class CallArg
 public:
     CallArgABIInformation AbiInfo;
 
-    CallArg(const NewCallArg& arg) : CallArg()
+    CallArg(const NewCallArg& arg)
+        : CallArg()
     {
         m_earlyNode       = arg.Node;
         m_wellKnownArg    = arg.WellKnownArg;
@@ -4734,7 +4770,7 @@ class CallArg
         m_signatureClsHnd = arg.SignatureClsHnd;
     }
 
-    CallArg(const CallArg&) = delete;
+    CallArg(const CallArg&)      = delete;
     CallArg& operator=(CallArg&) = delete;
 
     // clang-format off
@@ -4799,9 +4835,9 @@ class CallArgs
     // made for this call.
     unsigned m_padStkAlign;
 #endif
-    bool m_hasThisPointer : 1;
-    bool m_hasRetBuffer : 1;
-    bool m_isVarArgs : 1;
+    bool m_hasThisPointer           : 1;
+    bool m_hasRetBuffer             : 1;
+    bool m_isVarArgs                : 1;
     bool m_abiInformationDetermined : 1;
     // True if we have one or more register arguments.
     bool m_hasRegArgs : 1;
@@ -4815,15 +4851,15 @@ class CallArgs
     bool m_alignmentDone : 1;
 #endif
 
-    void AddedWellKnownArg(WellKnownArg arg);
-    void RemovedWellKnownArg(WellKnownArg arg);
+    void      AddedWellKnownArg(WellKnownArg arg);
+    void      RemovedWellKnownArg(WellKnownArg arg);
     regNumber GetCustomRegister(Compiler* comp, CorInfoCallConvExtension cc, WellKnownArg arg);
-    void SplitArg(CallArg* arg, unsigned numRegs, unsigned numSlots);
-    void SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs);
+    void      SplitArg(CallArg* arg, unsigned numRegs, unsigned numSlots);
+    void      SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs);
 
 public:
     CallArgs();
-    CallArgs(const CallArgs&) = delete;
+    CallArgs(const CallArgs&)      = delete;
     CallArgs& operator=(CallArgs&) = delete;
 
     CallArg* FindByNode(GenTree* node);
@@ -4848,10 +4884,10 @@ class CallArgs
     CallArg* InsertAfterUnchecked(Compiler* comp, CallArg* after, const NewCallArg& arg);
     CallArg* InsertInstParam(Compiler* comp, GenTree* node);
     CallArg* InsertAfterThisOrFirst(Compiler* comp, const NewCallArg& arg);
-    void PushLateBack(CallArg* arg);
-    void Remove(CallArg* arg);
-#if TARGET_WASM
-    void MoveLateToEarly();
+    void     PushLateBack(CallArg* arg);
+    void     Remove(CallArg* arg);
+#ifdef TARGET_WASM
+    void     MoveLateToEarly();
 #endif
 
     template <typename CopyNodeFunc>
@@ -4873,7 +4909,7 @@ class CallArgs
     bool IsNonStandard(Compiler* comp, GenTreeCall* call, CallArg* arg);
 
     GenTree* MakeTmpArgNode(Compiler* comp, CallArg* arg);
-    void SetTemp(CallArg* arg, unsigned tmpNum);
+    void     SetTemp(CallArg* arg, unsigned tmpNum);
 
     // clang-format off
     bool HasThisPointer() const { return m_hasThisPointer; }
@@ -4911,7 +4947,8 @@ class CallArgs
         CallArg* m_arg;
 
     public:
-        explicit CallArgIterator(CallArg* arg) : m_arg(arg)
+        explicit CallArgIterator(CallArg* arg)
+            : m_arg(arg)
         {
         }
 
@@ -4955,7 +4992,8 @@ class CallArgs
         }
 
     public:
-        explicit EarlyArgIterator(CallArg* arg) : m_arg(arg)
+        explicit EarlyArgIterator(CallArg* arg)
+            : m_arg(arg)
         {
         }
 
@@ -5011,7 +5049,8 @@ struct GenTreeCall final : public GenTree
     CORINFO_SIG_INFO* callSig;
 #endif
 
-    union {
+    union
+    {
         TailCallSiteInfo* tailCallInfo;
         // Only used for unmanaged calls, which cannot be tail-called
         CorInfoCallConvExtension unmgdCallConv;
@@ -5165,6 +5204,10 @@ struct GenTreeCall final : public GenTree
 #endif
     }
 
+#ifdef TARGET_XARCH
+    bool NeedsVzeroupper(Compiler* comp);
+#endif // TARGET_XARCH
+
     // Get reg mask of all the valid registers of gtOtherRegs array
     regMaskTP GetOtherRegMask() const;
 
@@ -5246,6 +5289,15 @@ struct GenTreeCall final : public GenTree
         return (gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0;
     }
 
+#ifdef SWIFT_SUPPORT
+    bool HasSwiftErrorHandling()
+    {
+        // Most calls aren't Swift calls, so short-circuit this check by checking the calling convention first.
+        return (GetUnmanagedCallConv() == CorInfoCallConvExtension::Swift) &&
+               (gtArgs.FindWellKnownArg(WellKnownArg::SwiftError) != nullptr);
+    }
+#endif // SWIFT_SUPPORT
+
     bool IsR2ROrVirtualStubRelativeIndir()
     {
 #if defined(FEATURE_READYTORUN)
@@ -5259,7 +5311,7 @@ struct GenTreeCall final : public GenTree
     }
 
     bool HasNonStandardAddedArgs(Compiler* compiler) const;
-    int GetNonStandardAddedArgCount(Compiler* compiler) const;
+    int  GetNonStandardAddedArgCount(Compiler* compiler) const;
 
     // Returns true if the ABI dictates that this call should get a ret buf
     // arg. This may be out of sync with gtArgs.HasRetBuffer during import
@@ -5269,24 +5321,7 @@ struct GenTreeCall final : public GenTree
         return (gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0;
     }
 
-    //-------------------------------------------------------------------------
-    // TreatAsShouldHaveRetBufArg:
-    //
-    // Arguments:
-    //     compiler, the compiler instance so that we can call eeGetHelperNum
-    //
-    // Return Value:
-    //     Returns true if we treat the call as if it has a retBuf argument
-    //     This method may actually have a retBuf argument
-    //     or it could be a JIT helper that we are still transforming during
-    //     the importer phase.
-    //
-    // Notes:
-    //     On ARM64 marking the method with the GTF_CALL_M_RETBUFFARG flag
-    //     will make ShouldHaveRetBufArg() return true, but will also force the
-    //     use of register x8 to pass the RetBuf argument.
-    //
-    bool TreatAsShouldHaveRetBufArg(Compiler* compiler) const;
+    bool TreatAsShouldHaveRetBufArg() const;
 
     //-----------------------------------------------------------------------------------------
     // HasMultiRegRetVal: whether the call node returns its value in multiple return registers.
@@ -5311,7 +5346,7 @@ struct GenTreeCall final : public GenTree
         }
 #endif
 
-        if (!varTypeIsStruct(gtType) || ShouldHaveRetBufArg())
+        if (!varTypeIsStruct(gtType) || TreatAsShouldHaveRetBufArg())
         {
             return false;
         }
@@ -5643,7 +5678,7 @@ struct GenTreeCall final : public GenTree
     }
 
     GenTreeCallFlags gtCallMoreFlags;  // in addition to gtFlags
-    gtCallTypes      gtCallType : 3;   // value from the gtCallTypes enumeration
+    gtCallTypes      gtCallType   : 3; // value from the gtCallTypes enumeration
     var_types        gtReturnType : 5; // exact return type
 
     uint8_t gtInlineInfoCount; // number of inline candidates for the given call
@@ -5652,13 +5687,15 @@ struct GenTreeCall final : public GenTree
 #if defined(TARGET_WASM)
     CorInfoType gtCorInfoType = CORINFO_TYPE_UNDEF; // the precise return type used to construct the signature
 #endif                                              // defined(TARGET_WASM)
-    union {
+    union
+    {
         void*                gtStubCallStubAddr;   // GTF_CALL_VIRT_STUB - these are never inlined
         CORINFO_CLASS_HANDLE gtInitClsHnd;         // Used by static init helpers, represents a class they init
         IL_OFFSET            gtCastHelperILOffset; // Used by cast helpers to save corresponding IL offset
     };
 
-    union {
+    union
+    {
         // only used for CALLI unmanaged calls (CT_INDIRECT)
         GenTree* gtCallCookie;
 
@@ -5676,7 +5713,8 @@ struct GenTreeCall final : public GenTree
     // expression evaluated after args are placed which determines the control target
     GenTree* gtControlExpr;
 
-    union {
+    union
+    {
         CORINFO_METHOD_HANDLE gtCallMethHnd; // CT_USER_FUNC or CT_HELPER
         GenTree*              gtCallAddr;    // CT_INDIRECT
     };
@@ -5729,11 +5767,13 @@ struct GenTreeCall final : public GenTree
 
     static bool Equals(GenTreeCall* c1, GenTreeCall* c2);
 
-    GenTreeCall(var_types type) : GenTree(GT_CALL, type)
+    GenTreeCall(var_types type)
+        : GenTree(GT_CALL, type)
     {
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeCall() : GenTree()
+    GenTreeCall()
+        : GenTree()
     {
     }
 #endif
@@ -5752,7 +5792,8 @@ struct GenTreeMultiRegOp : public GenTreeOp
     MultiRegSpillFlags gtSpillFlags;
 
     GenTreeMultiRegOp(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2)
-        : GenTreeOp(oper, type, op1, op2), gtOtherReg(REG_NA)
+        : GenTreeOp(oper, type, op1, op2)
+        , gtOtherReg(REG_NA)
     {
         ClearOtherRegFlags();
     }
@@ -5835,7 +5876,8 @@ struct GenTreeMultiRegOp : public GenTreeOp
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeMultiRegOp() : GenTreeOp()
+    GenTreeMultiRegOp()
+        : GenTreeOp()
     {
     }
 #endif
@@ -5853,7 +5895,9 @@ struct GenTreeFptrVal : public GenTree
 #endif
 
     GenTreeFptrVal(var_types type, CORINFO_METHOD_HANDLE meth)
-        : GenTree(GT_FTN_ADDR, type), gtFptrMethod(meth), gtFptrDelegateTarget(false)
+        : GenTree(GT_FTN_ADDR, type)
+        , gtFptrMethod(meth)
+        , gtFptrDelegateTarget(false)
     {
 #ifdef FEATURE_READYTORUN
         gtEntryPoint.addr       = nullptr;
@@ -5861,7 +5905,8 @@ struct GenTreeFptrVal : public GenTree
 #endif
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeFptrVal() : GenTree()
+    GenTreeFptrVal()
+        : GenTree()
     {
     }
 #endif
@@ -5873,7 +5918,8 @@ struct GenTreeQmark : public GenTreeOp
     unsigned gtThenLikelihood;
 
     GenTreeQmark(var_types type, GenTree* cond, GenTreeColon* colon, unsigned thenLikelihood = 50)
-        : GenTreeOp(GT_QMARK, type, cond, colon), gtThenLikelihood(thenLikelihood)
+        : GenTreeOp(GT_QMARK, type, cond, colon)
+        , gtThenLikelihood(thenLikelihood)
     {
         // These must follow a specific form.
         assert((cond != nullptr) && cond->TypeIs(TYP_INT));
@@ -5909,7 +5955,8 @@ struct GenTreeQmark : public GenTreeOp
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeQmark() : GenTreeOp()
+    GenTreeQmark()
+        : GenTreeOp()
     {
     }
 #endif
@@ -5928,20 +5975,25 @@ struct GenTreeIntrinsic : public GenTreeOp
 #endif
 
     GenTreeIntrinsic(var_types type, GenTree* op1, NamedIntrinsic intrinsicName, CORINFO_METHOD_HANDLE methodHandle)
-        : GenTreeOp(GT_INTRINSIC, type, op1, nullptr), gtIntrinsicName(intrinsicName), gtMethodHandle(methodHandle)
+        : GenTreeOp(GT_INTRINSIC, type, op1, nullptr)
+        , gtIntrinsicName(intrinsicName)
+        , gtMethodHandle(methodHandle)
     {
         assert(intrinsicName != NI_Illegal);
     }
 
     GenTreeIntrinsic(
         var_types type, GenTree* op1, GenTree* op2, NamedIntrinsic intrinsicName, CORINFO_METHOD_HANDLE methodHandle)
-        : GenTreeOp(GT_INTRINSIC, type, op1, op2), gtIntrinsicName(intrinsicName), gtMethodHandle(methodHandle)
+        : GenTreeOp(GT_INTRINSIC, type, op1, op2)
+        , gtIntrinsicName(intrinsicName)
+        , gtMethodHandle(methodHandle)
     {
         assert(intrinsicName != NI_Illegal);
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeIntrinsic() : GenTreeOp()
+    GenTreeIntrinsic()
+        : GenTreeOp()
     {
     }
 #endif
@@ -5961,7 +6013,8 @@ struct GenTreeMultiOp : public GenTree
     protected:
         GenTree** m_use;
 
-        Iterator(GenTree** use) : m_use(use)
+        Iterator(GenTree** use)
+            : m_use(use)
         {
         }
 
@@ -5986,7 +6039,8 @@ struct GenTreeMultiOp : public GenTree
     class OperandsIterator final : public Iterator
     {
     public:
-        OperandsIterator(GenTree** use) : Iterator(use)
+        OperandsIterator(GenTree** use)
+            : Iterator(use)
         {
         }
 
@@ -5999,7 +6053,8 @@ struct GenTreeMultiOp : public GenTree
     class UseEdgesIterator final : public Iterator
     {
     public:
-        UseEdgesIterator(GenTree** use) : Iterator(use)
+        UseEdgesIterator(GenTree** use)
+            : Iterator(use)
         {
         }
 
@@ -6046,7 +6101,8 @@ struct GenTreeMultiOp : public GenTree
 
 public:
 #if DEBUGGABLE_GENTREE
-    GenTreeMultiOp() : GenTree()
+    GenTreeMultiOp()
+        : GenTree()
     {
     }
 #endif
@@ -6119,7 +6175,8 @@ class IntrinsicNodeBuilder final
     GenTree*  m_inlineOperands[2];
 
 public:
-    IntrinsicNodeBuilder(CompAllocator allocator, size_t operandCount) : m_operandCount(operandCount)
+    IntrinsicNodeBuilder(CompAllocator allocator, size_t operandCount)
+        : m_operandCount(operandCount)
     {
         m_operands =
             (operandCount <= ArrLen(m_inlineOperands)) ? m_inlineOperands : allocator.allocate<GenTree*>(operandCount);
@@ -6131,7 +6188,8 @@ class IntrinsicNodeBuilder final
 #endif // DEBUG
     }
 
-    IntrinsicNodeBuilder(CompAllocator allocator, GenTreeMultiOp* source) : m_operandCount(source->GetOperandCount())
+    IntrinsicNodeBuilder(CompAllocator allocator, GenTreeMultiOp* source)
+        : m_operandCount(source->GetOperandCount())
     {
         m_operands = (m_operandCount <= ArrLen(m_inlineOperands)) ? m_inlineOperands
                                                                   : allocator.allocate<GenTree*>(m_operandCount);
@@ -6343,7 +6401,8 @@ struct GenTreeJitIntrinsic : public GenTreeMultiOp
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeJitIntrinsic() : GenTreeMultiOp()
+    GenTreeJitIntrinsic()
+        : GenTreeMultiOp()
     {
     }
 #endif
@@ -6404,7 +6463,8 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeHWIntrinsic() : GenTreeJitIntrinsic()
+    GenTreeHWIntrinsic()
+        : GenTreeJitIntrinsic()
     {
     }
 #endif
@@ -6417,13 +6477,36 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
     bool OperIsBroadcastScalar() const;
     bool OperIsCreateScalarUnsafe() const;
     bool OperIsBitwiseHWIntrinsic() const;
+    bool OperIsEmbRoundingEnabled() const;
+
+    bool OperIsConvertMaskToVector() const
+    {
+#if defined(TARGET_XARCH)
+        return GetHWIntrinsicId() == NI_AVX512F_ConvertMaskToVector;
+#elif defined(TARGET_ARM64)
+        return GetHWIntrinsicId() == NI_Sve_ConvertMaskToVector;
+#else
+        return false;
+#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS
+    }
+
+    bool OperIsConvertVectorToMask() const
+    {
+#if defined(TARGET_XARCH)
+        return GetHWIntrinsicId() == NI_AVX512F_ConvertVectorToMask;
+#elif defined(TARGET_ARM64)
+        return GetHWIntrinsicId() == NI_Sve_ConvertVectorToMask;
+#else
+        return false;
+#endif
+    }
 
     bool OperRequiresAsgFlag() const;
     bool OperRequiresCallFlag() const;
     bool OperRequiresGlobRefFlag() const;
 
     unsigned GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3);
-    uint8_t GetTernaryControlByte(GenTreeHWIntrinsic* second) const;
+    uint8_t  GetTernaryControlByte(GenTreeHWIntrinsic* second) const;
 
     ClassLayout* GetLayout(Compiler* compiler) const;
 
@@ -6526,14 +6609,16 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
 //
 struct GenTreeVecCon : public GenTree
 {
-    union {
+    union
+    {
         simd8_t  gtSimd8Val;
         simd12_t gtSimd12Val;
         simd16_t gtSimd16Val;
 
 #if defined(TARGET_XARCH)
-        simd32_t gtSimd32Val;
-        simd64_t gtSimd64Val;
+        simd32_t   gtSimd32Val;
+        simd64_t   gtSimd64Val;
+        simdmask_t gtSimdMaskVal;
 #endif // TARGET_XARCH
 
         simd_t gtSimdVal;
@@ -6577,7 +6662,7 @@ struct GenTreeVecCon : public GenTree
                 // These intrinsics are meant to set the same value to every element.
                 if ((argCnt == 1) && HandleArgForHWIntrinsicCreate<simdTypename>(node->Op(1), 0, simdVal, simdBaseType))
                 {
-// CreateScalar leaves the upper bits as zero
+                    // CreateScalar leaves the upper bits as zero
 
 #if defined(TARGET_XARCH)
                     if ((intrinsic != NI_Vector128_CreateScalar) && (intrinsic != NI_Vector256_CreateScalar) &&
@@ -6785,6 +6870,11 @@ struct GenTreeVecCon : public GenTree
             {
                 return gtSimd64Val.IsAllBitsSet();
             }
+
+            case TYP_MASK:
+            {
+                return gtSimdMaskVal.IsAllBitsSet();
+            }
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -6832,6 +6922,11 @@ struct GenTreeVecCon : public GenTree
             {
                 return left->gtSimd64Val == right->gtSimd64Val;
             }
+
+            case TYP_MASK:
+            {
+                return left->gtSimdMaskVal == right->gtSimdMaskVal;
+            }
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -6872,6 +6967,11 @@ struct GenTreeVecCon : public GenTree
             {
                 return gtSimd64Val.IsZero();
             }
+
+            case TYP_MASK:
+            {
+                return gtSimdMaskVal.IsZero();
+            }
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -6882,7 +6982,8 @@ struct GenTreeVecCon : public GenTree
         }
     }
 
-    GenTreeVecCon(var_types type) : GenTree(GT_CNS_VEC, type)
+    GenTreeVecCon(var_types type)
+        : GenTree(GT_CNS_VEC, type)
     {
         assert(varTypeIsSIMD(type));
 
@@ -6898,7 +6999,8 @@ struct GenTreeVecCon : public GenTree
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeVecCon() : GenTree()
+    GenTreeVecCon()
+        : GenTree()
     {
     }
 #endif
@@ -6955,7 +7057,8 @@ struct GenTreeIndexAddr : public GenTreeOp
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeIndexAddr() : GenTreeOp()
+    GenTreeIndexAddr()
+        : GenTreeOp()
     {
     }
 #endif
@@ -6995,7 +7098,8 @@ struct GenTreeArrAddr : GenTreeUnOp
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeArrAddr() : GenTreeUnOp()
+    GenTreeArrAddr()
+        : GenTreeUnOp()
     {
     }
 #endif
@@ -7042,12 +7146,14 @@ struct GenTreeArrCommon : public GenTreeUnOp
         return gtOp1;
     }
 
-    GenTreeArrCommon(genTreeOps oper, var_types type, GenTree* arrRef) : GenTreeUnOp(oper, type, arrRef)
+    GenTreeArrCommon(genTreeOps oper, var_types type, GenTree* arrRef)
+        : GenTreeUnOp(oper, type, arrRef)
     {
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeArrCommon() : GenTreeUnOp()
+    GenTreeArrCommon()
+        : GenTreeUnOp()
     {
     }
 #endif
@@ -7072,12 +7178,14 @@ struct GenTreeArrLen : public GenTreeArrCommon
     }
 
     GenTreeArrLen(var_types type, GenTree* arrRef, int lenOffset)
-        : GenTreeArrCommon(GT_ARR_LENGTH, type, arrRef), gtArrLenOffset(lenOffset)
+        : GenTreeArrCommon(GT_ARR_LENGTH, type, arrRef)
+        , gtArrLenOffset(lenOffset)
     {
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeArrLen() : GenTreeArrCommon()
+    GenTreeArrLen()
+        : GenTreeArrCommon()
     {
     }
 #endif
@@ -7104,13 +7212,16 @@ struct GenTreeMDArr : public GenTreeArrCommon
     }
 
     GenTreeMDArr(genTreeOps oper, GenTree* arrRef, unsigned dim, unsigned rank)
-        : GenTreeArrCommon(oper, TYP_INT, arrRef), gtDim(dim), gtRank(rank)
+        : GenTreeArrCommon(oper, TYP_INT, arrRef)
+        , gtDim(dim)
+        , gtRank(rank)
     {
         assert(OperIs(GT_MDARR_LENGTH, GT_MDARR_LOWER_BOUND));
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeMDArr() : GenTreeArrCommon()
+    GenTreeMDArr()
+        : GenTreeArrCommon()
     {
     }
 #endif
@@ -7142,7 +7253,8 @@ struct GenTreeBoundsChk : public GenTreeOp
         gtFlags |= GTF_EXCEPT;
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeBoundsChk() : GenTreeOp()
+    GenTreeBoundsChk()
+        : GenTreeOp()
     {
     }
 #endif
@@ -7192,7 +7304,10 @@ struct GenTreeArrElem : public GenTree
 
     // Requires that "inds" is a pointer to an array of "rank" nodes for the indices.
     GenTreeArrElem(var_types type, GenTree* arr, unsigned char rank, unsigned char elemSize, GenTree** inds)
-        : GenTree(GT_ARR_ELEM, type), gtArrObj(arr), gtArrRank(rank), gtArrElemSize(elemSize)
+        : GenTree(GT_ARR_ELEM, type)
+        , gtArrObj(arr)
+        , gtArrRank(rank)
+        , gtArrElemSize(elemSize)
     {
         assert(rank <= ArrLen(gtArrInds));
         gtFlags |= (arr->gtFlags & GTF_ALL_EFFECT);
@@ -7204,7 +7319,8 @@ struct GenTreeArrElem : public GenTree
         gtFlags |= GTF_EXCEPT;
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeArrElem() : GenTree()
+    GenTreeArrElem()
+        : GenTree()
     {
     }
 #endif
@@ -7298,7 +7414,8 @@ struct GenTreeAddrMode : public GenTreeOp
 protected:
     friend GenTree;
     // Used only for GenTree::GetVtableForOper()
-    GenTreeAddrMode() : GenTreeOp()
+    GenTreeAddrMode()
+        : GenTreeOp()
     {
     }
 #endif
@@ -7334,7 +7451,8 @@ struct GenTreeIndir : public GenTreeOp
 
     unsigned Size() const;
 
-    GenTreeIndir(genTreeOps oper, var_types type, GenTree* addr, GenTree* data) : GenTreeOp(oper, type, addr, data)
+    GenTreeIndir(genTreeOps oper, var_types type, GenTree* addr, GenTree* data)
+        : GenTreeOp(oper, type, addr, data)
     {
     }
 
@@ -7360,12 +7478,14 @@ struct GenTreeIndir : public GenTreeOp
 
 #if DEBUGGABLE_GENTREE
     // Used only for GenTree::GetVtableForOper()
-    GenTreeIndir() : GenTreeOp()
+    GenTreeIndir()
+        : GenTreeOp()
     {
     }
 #else
     // Used by XARCH codegen to construct temporary trees to pass to the emitter.
-    GenTreeIndir() : GenTreeOp(GT_NOP, TYP_UNDEF)
+    GenTreeIndir()
+        : GenTreeOp(GT_NOP, TYP_UNDEF)
     {
     }
 #endif
@@ -7386,14 +7506,17 @@ struct GenTreeBlk : public GenTreeIndir
 public:
     ClassLayout* GetLayout() const
     {
+        assert(m_layout != nullptr);
         return m_layout;
     }
 
+#ifdef TARGET_WASM
     void SetLayout(ClassLayout* layout)
     {
-        assert((layout != nullptr) || OperIs(GT_STORE_DYN_BLK));
+        assert((layout != nullptr));
         m_layout = layout;
     }
+#endif // TARGET_WASM
 
     // The data to be stored (null for GT_BLK)
     GenTree*& Data()
@@ -7408,8 +7531,7 @@ struct GenTreeBlk : public GenTreeIndir
     // The size of the buffer to be copied.
     unsigned Size() const
     {
-        assert((m_layout != nullptr) || OperIs(GT_STORE_DYN_BLK));
-        return (m_layout != nullptr) ? m_layout->GetSize() : 0;
+        return m_layout->GetSize();
     }
 
     // Instruction selection: during codegen time, what code sequence we will be using
@@ -7421,9 +7543,6 @@ struct GenTreeBlk : public GenTreeIndir
 #ifdef TARGET_XARCH
         BlkOpKindCpObjRepInstr,
 #endif
-#ifndef TARGET_X86
-        BlkOpKindHelper,
-#endif
 #ifdef TARGET_XARCH
         BlkOpKindRepInstr,
 #endif
@@ -7438,7 +7557,7 @@ struct GenTreeBlk : public GenTreeIndir
 
     bool ContainsReferences()
     {
-        return (m_layout != nullptr) && m_layout->HasGCPtr();
+        return m_layout->HasGCPtr();
     }
 
     bool IsOnHeapAndContainsReferences()
@@ -7469,8 +7588,8 @@ struct GenTreeBlk : public GenTreeIndir
 
     void Initialize(ClassLayout* layout)
     {
-        assert(OperIsBlk(OperGet()) && ((layout != nullptr) || OperIs(GT_STORE_DYN_BLK)));
-        assert((layout == nullptr) || (layout->GetSize() != 0));
+        assert(layout != nullptr);
+        assert(layout->GetSize() != 0);
 
         m_layout    = layout;
         gtBlkOpKind = BlkOpKindInvalid;
@@ -7482,36 +7601,8 @@ struct GenTreeBlk : public GenTreeIndir
 #if DEBUGGABLE_GENTREE
 protected:
     friend GenTree;
-    GenTreeBlk() : GenTreeIndir()
-    {
-    }
-#endif // DEBUGGABLE_GENTREE
-};
-
-// GenTreeStoreDynBlk  -- 'dynamic block store' (GT_STORE_DYN_BLK).
-//
-// This node is used to represent stores that have a dynamic size - the "cpblk" and "initblk"
-// IL instructions are implemented with it. Note that such stores assume the input has no GC
-// pointers in it, and as such do not ever use write barriers.
-//
-// The "Data()" member of this node will either be a "dummy" IND(struct) node, for "cpblk", or
-// the zero constant/INIT_VAL for "initblk".
-//
-struct GenTreeStoreDynBlk : public GenTreeBlk
-{
-public:
-    GenTree* gtDynamicSize;
-
-    GenTreeStoreDynBlk(GenTree* dstAddr, GenTree* data, GenTree* dynamicSize)
-        : GenTreeBlk(GT_STORE_DYN_BLK, TYP_VOID, dstAddr, data, nullptr), gtDynamicSize(dynamicSize)
-    {
-        gtFlags |= dynamicSize->gtFlags & GTF_ALL_EFFECT;
-    }
-
-#if DEBUGGABLE_GENTREE
-protected:
-    friend GenTree;
-    GenTreeStoreDynBlk() : GenTreeBlk()
+    GenTreeBlk()
+        : GenTreeIndir()
     {
     }
 #endif // DEBUGGABLE_GENTREE
@@ -7617,7 +7708,8 @@ struct GenTreeStoreInd : public GenTreeIndir
         return gtOp2;
     }
 
-    GenTreeStoreInd(var_types type, GenTree* destPtr, GenTree* data) : GenTreeIndir(GT_STOREIND, type, destPtr, data)
+    GenTreeStoreInd(var_types type, GenTree* destPtr, GenTree* data)
+        : GenTreeIndir(GT_STOREIND, type, destPtr, data)
     {
         SetRMWStatusDefault();
     }
@@ -7626,7 +7718,8 @@ struct GenTreeStoreInd : public GenTreeIndir
 protected:
     friend GenTree;
     // Used only for GenTree::GetVtableForOper()
-    GenTreeStoreInd() : GenTreeIndir()
+    GenTreeStoreInd()
+        : GenTreeIndir()
     {
         SetRMWStatusDefault();
     }
@@ -7640,13 +7733,15 @@ struct GenTreeCmpXchg : public GenTreeIndir
 
 public:
     GenTreeCmpXchg(var_types type, GenTree* loc, GenTree* val, GenTree* comparand)
-        : GenTreeIndir(GT_CMPXCHG, type, loc, val), m_comparand(comparand)
+        : GenTreeIndir(GT_CMPXCHG, type, loc, val)
+        , m_comparand(comparand)
     {
         gtFlags |= comparand->gtFlags & GTF_ALL_EFFECT;
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeCmpXchg() : GenTreeIndir()
+    GenTreeCmpXchg()
+        : GenTreeIndir()
     {
     }
 #endif
@@ -7674,11 +7769,13 @@ struct GenTreeRetExpr : public GenTree
     // nullptr for cases where gtSubstExpr is not a tree from the inlinee.
     BasicBlock* gtSubstBB;
 
-    GenTreeRetExpr(var_types type) : GenTree(GT_RET_EXPR, type)
+    GenTreeRetExpr(var_types type)
+        : GenTree(GT_RET_EXPR, type)
     {
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeRetExpr() : GenTree()
+    GenTreeRetExpr()
+        : GenTree()
     {
     }
 #endif
@@ -7702,7 +7799,8 @@ struct GenTreeILOffset : public GenTree
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeILOffset() : GenTree(GT_IL_OFFSET, TYP_VOID)
+    GenTreeILOffset()
+        : GenTree(GT_IL_OFFSET, TYP_VOID)
     {
     }
 #endif
@@ -7724,7 +7822,8 @@ class GenTreeList
         GenTree* m_tree;
 
     public:
-        explicit iterator(GenTree* tree) : m_tree(tree)
+        explicit iterator(GenTree* tree)
+            : m_tree(tree)
         {
         }
 
@@ -7745,7 +7844,8 @@ class GenTreeList
         }
     };
 
-    explicit GenTreeList(GenTree* trees) : m_trees(trees)
+    explicit GenTreeList(GenTree* trees)
+        : m_trees(trees)
     {
     }
 
@@ -7770,7 +7870,8 @@ class LocalsGenTreeList
         GenTreeLclVarCommon* m_tree;
 
     public:
-        explicit iterator(GenTreeLclVarCommon* tree) : m_tree(tree)
+        explicit iterator(GenTreeLclVarCommon* tree)
+            : m_tree(tree)
         {
         }
 
@@ -7799,7 +7900,8 @@ class LocalsGenTreeList
         }
     };
 
-    explicit LocalsGenTreeList(Statement* stmt) : m_stmt(stmt)
+    explicit LocalsGenTreeList(Statement* stmt)
+        : m_stmt(stmt)
     {
     }
 
@@ -7999,7 +8101,8 @@ class StatementList
         Statement* m_stmt;
 
     public:
-        iterator(Statement* stmt) : m_stmt(stmt)
+        iterator(Statement* stmt)
+            : m_stmt(stmt)
         {
         }
 
@@ -8021,7 +8124,8 @@ class StatementList
     };
 
 public:
-    StatementList(Statement* stmts) : m_stmts(stmts)
+    StatementList(Statement* stmts)
+        : m_stmts(stmts)
     {
     }
 
@@ -8046,13 +8150,15 @@ struct GenTreePhiArg : public GenTreeLclVarCommon
     BasicBlock* gtPredBB;
 
     GenTreePhiArg(var_types type, unsigned lclNum, unsigned ssaNum, BasicBlock* block)
-        : GenTreeLclVarCommon(GT_PHI_ARG, type, lclNum), gtPredBB(block)
+        : GenTreeLclVarCommon(GT_PHI_ARG, type, lclNum)
+        , gtPredBB(block)
     {
         SetSsaNum(ssaNum);
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreePhiArg() : GenTreeLclVarCommon()
+    GenTreePhiArg()
+        : GenTreeLclVarCommon()
     {
     }
 #endif
@@ -8090,8 +8196,13 @@ struct GenTreePutArgStk : public GenTreeUnOp
     // TODO-Throughput: The following information should be obtained from the child
     // block node.
 
-    enum class Kind : int8_t{
-        Invalid, RepInstr, PartialRepInstr, Unroll, Push,
+    enum class Kind : int8_t
+    {
+        Invalid,
+        RepInstr,
+        PartialRepInstr,
+        Unroll,
+        Push,
     };
     Kind gtPutArgStkKind;
 
@@ -8223,7 +8334,8 @@ struct GenTreePutArgStk : public GenTreeUnOp
 #endif // !FEATURE_PUT_STRUCT_ARG_STK
 
 #if DEBUGGABLE_GENTREE
-    GenTreePutArgStk() : GenTreeUnOp()
+    GenTreePutArgStk()
+        : GenTreeUnOp()
     {
     }
 #endif
@@ -8373,7 +8485,8 @@ struct GenTreePutArgSplit : public GenTreePutArgStk
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreePutArgSplit() : GenTreePutArgStk()
+    GenTreePutArgSplit()
+        : GenTreePutArgStk()
     {
     }
 #endif
@@ -8500,7 +8613,8 @@ struct GenTreeCopyOrReload : public GenTreeUnOp
         return 1;
     }
 
-    GenTreeCopyOrReload(genTreeOps oper, var_types type, GenTree* op1) : GenTreeUnOp(oper, type, op1)
+    GenTreeCopyOrReload(genTreeOps oper, var_types type, GenTree* op1)
+        : GenTreeUnOp(oper, type, op1)
     {
         assert(type != TYP_STRUCT || op1->IsMultiRegNode());
         SetRegNum(REG_NA);
@@ -8508,7 +8622,8 @@ struct GenTreeCopyOrReload : public GenTreeUnOp
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeCopyOrReload() : GenTreeUnOp()
+    GenTreeCopyOrReload()
+        : GenTreeUnOp()
     {
     }
 #endif
@@ -8538,7 +8653,8 @@ struct GenTreeAllocObj final : public GenTreeUnOp
 #endif
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeAllocObj() : GenTreeUnOp()
+    GenTreeAllocObj()
+        : GenTreeUnOp()
     {
     }
 #endif
@@ -8552,12 +8668,15 @@ struct GenTreeRuntimeLookup final : public GenTreeUnOp
     CorInfoGenericHandleType gtHndType;
 
     GenTreeRuntimeLookup(CORINFO_GENERIC_HANDLE hnd, CorInfoGenericHandleType hndTyp, GenTree* tree)
-        : GenTreeUnOp(GT_RUNTIMELOOKUP, tree->gtType, tree DEBUGARG(/*largeNode*/ FALSE)), gtHnd(hnd), gtHndType(hndTyp)
+        : GenTreeUnOp(GT_RUNTIMELOOKUP, tree->gtType, tree DEBUGARG(/*largeNode*/ FALSE))
+        , gtHnd(hnd)
+        , gtHndType(hndTyp)
     {
         assert(hnd != nullptr);
     }
 #if DEBUGGABLE_GENTREE
-    GenTreeRuntimeLookup() : GenTreeUnOp()
+    GenTreeRuntimeLookup()
+        : GenTreeUnOp()
     {
     }
 #endif
@@ -8721,11 +8840,13 @@ struct GenCondition
         return names[m_code];
     }
 
-    GenCondition() : m_code()
+    GenCondition()
+        : m_code()
     {
     }
 
-    GenCondition(Code cond) : m_code(cond)
+    GenCondition(Code cond)
+        : m_code(cond)
     {
     }
 
@@ -8849,13 +8970,15 @@ struct GenTreeCC final : public GenTree
     GenCondition gtCondition;
 
     GenTreeCC(genTreeOps oper, var_types type, GenCondition condition)
-        : GenTree(oper, type DEBUGARG(/*largeNode*/ FALSE)), gtCondition(condition)
+        : GenTree(oper, type DEBUGARG(/*largeNode*/ FALSE))
+        , gtCondition(condition)
     {
         assert(OperIs(GT_JCC, GT_SETCC));
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeCC() : GenTree()
+    GenTreeCC()
+        : GenTree()
     {
     }
 #endif // DEBUGGABLE_GENTREE
@@ -8867,7 +8990,8 @@ struct GenTreeOpCC : public GenTreeOp
     GenCondition gtCondition;
 
     GenTreeOpCC(genTreeOps oper, var_types type, GenCondition condition, GenTree* op1 = nullptr, GenTree* op2 = nullptr)
-        : GenTreeOp(oper, type, op1, op2 DEBUGARG(/*largeNode*/ FALSE)), gtCondition(condition)
+        : GenTreeOp(oper, type, op1, op2 DEBUGARG(/*largeNode*/ FALSE))
+        , gtCondition(condition)
     {
 #ifdef TARGET_ARM64
         assert(OperIs(GT_SELECTCC, GT_SELECT_INCCC, GT_SELECT_INVCC, GT_SELECT_NEGCC));
@@ -8877,7 +9001,8 @@ struct GenTreeOpCC : public GenTreeOp
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeOpCC() : GenTreeOp()
+    GenTreeOpCC()
+        : GenTreeOp()
     {
     }
 #endif // DEBUGGABLE_GENTREE
@@ -8912,12 +9037,14 @@ struct GenTreeCCMP final : public GenTreeOpCC
     insCflags gtFlagsVal;
 
     GenTreeCCMP(var_types type, GenCondition condition, GenTree* op1, GenTree* op2, insCflags flagsVal)
-        : GenTreeOpCC(GT_CCMP, type, condition, op1, op2), gtFlagsVal(flagsVal)
+        : GenTreeOpCC(GT_CCMP, type, condition, op1, op2)
+        , gtFlagsVal(flagsVal)
     {
     }
 
 #if DEBUGGABLE_GENTREE
-    GenTreeCCMP() : GenTreeOpCC()
+    GenTreeCCMP()
+        : GenTreeOpCC()
     {
     }
 #endif // DEBUGGABLE_GENTREE
@@ -8931,10 +9058,6 @@ struct GenTreeCCMP final : public GenTreeOpCC
 
 inline bool GenTree::OperIsBlkOp()
 {
-    if (OperIs(GT_STORE_DYN_BLK))
-    {
-        return true;
-    }
     if (OperIsStore())
     {
         return varTypeIsStruct(this);
@@ -9306,7 +9429,6 @@ inline GenTree* GenTree::gtGetOp1() const
         case GT_QMARK:
         case GT_COLON:
         case GT_INDEX_ADDR:
-        case GT_MKREFANY:
             return true;
         default:
             return false;
@@ -9342,7 +9464,7 @@ inline GenTree* GenTree::gtGetOp2IfPresent() const
 
 inline GenTree*& GenTree::Data()
 {
-    assert(OperIsStore() || OperIs(GT_STORE_DYN_BLK));
+    assert(OperIsStore());
     return OperIsLocalStore() ? AsLclVarCommon()->Data() : AsIndir()->Data();
 }
 
diff --git a/src/coreclr/jit/gschecks.cpp b/src/coreclr/jit/gschecks.cpp
index 12c610ceaefa..0b4deec87d22 100644
--- a/src/coreclr/jit/gschecks.cpp
+++ b/src/coreclr/jit/gschecks.cpp
@@ -94,7 +94,7 @@ void Compiler::gsCopyShadowParams()
 
     // Find groups of variables assigned to each other, and also
     // tracks variables which are dereferenced and marks them as ptrs.
-    // Look for assignments to *p, and ptrs passed to functions
+    // Look for stores to *p, and ptrs passed to functions
     //
     if (gsFindVulnerableParams())
     {
@@ -117,7 +117,7 @@ struct MarkPtrsInfo
 {
     Compiler* comp;
     unsigned  lvStoreDef;   // Which local variable is the tree being assigned to?
-    bool      isStoreSrc;   // Is this the source value for an assignment?
+    bool      isStoreSrc;   // Is this the source value for a local store?
     bool      isUnderIndir; // Is this a pointer value tree that is being dereferenced?
     bool      skipNextNode; // Skip a single node during the tree-walk
 
@@ -455,7 +455,8 @@ void Compiler::gsParamsToShadows()
             DoPostOrder = true
         };
 
-        ReplaceShadowParamsVisitor(Compiler* compiler) : GenTreeVisitor<ReplaceShadowParamsVisitor>(compiler)
+        ReplaceShadowParamsVisitor(Compiler* compiler)
+            : GenTreeVisitor<ReplaceShadowParamsVisitor>(compiler)
         {
         }
 
@@ -527,7 +528,7 @@ void Compiler::gsParamsToShadows()
     if (compJmpOpUsed)
     {
         // There could be more than one basic block ending with a "Jmp" type tail call.
-        // We would have to insert assignments in all such blocks, just before GT_JMP stmnt.
+        // We would have to insert stores in all such blocks, just before GT_JMP stmnt.
         for (BasicBlock* const block : Blocks())
         {
             if (!block->KindIs(BBJ_RETURN))
diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h
index 597a9e471d5b..1d442f276737 100644
--- a/src/coreclr/jit/gtlist.h
+++ b/src/coreclr/jit/gtlist.h
@@ -76,13 +76,12 @@ GTNODE(XAND             , GenTreeOp          ,0,1,GTK_BINOP)
 GTNODE(XORR             , GenTreeOp          ,0,1,GTK_BINOP)
 GTNODE(XADD             , GenTreeOp          ,0,1,GTK_BINOP)
 GTNODE(XCHG             , GenTreeOp          ,0,1,GTK_BINOP)
-GTNODE(CMPXCHG          , GenTreeCmpXchg     ,0,1,GTK_SPECIAL)
+GTNODE(CMPXCHG          , GenTreeCmpXchg     ,0,1,GTK_SPECIAL)            // atomic CAS, small types need the comparand to be zero extended
 
 GTNODE(IND              , GenTreeIndir       ,0,1,GTK_UNOP)                                 // Load indirection
 GTNODE(STOREIND         , GenTreeStoreInd    ,0,1,GTK_BINOP|GTK_EXOP|GTK_NOVALUE|GTK_STORE) // Store indirection
 GTNODE(BLK              , GenTreeBlk         ,0,1,GTK_UNOP|GTK_EXOP)                        // Struct load
 GTNODE(STORE_BLK        , GenTreeBlk         ,0,1,GTK_BINOP|GTK_EXOP|GTK_NOVALUE|GTK_STORE) // Struct store
-GTNODE(STORE_DYN_BLK    , GenTreeStoreDynBlk ,0,1,GTK_SPECIAL|GTK_NOVALUE)                  // Dynamically sized block store, with native uint size
 GTNODE(NULLCHECK        , GenTreeIndir       ,0,1,GTK_UNOP|GTK_NOVALUE)                     // Null checks the source
 
 GTNODE(ARR_LENGTH       , GenTreeArrLen      ,0,0,GTK_UNOP|GTK_EXOP)            // single-dimension (SZ) array length
@@ -150,7 +149,6 @@ GTNODE(QMARK            , GenTreeQmark       ,0,1,GTK_BINOP|GTK_EXOP|DBK_NOTLIR)
 GTNODE(COLON            , GenTreeColon       ,0,1,GTK_BINOP|DBK_NOTLIR)
 
 GTNODE(INDEX_ADDR       , GenTreeIndexAddr   ,0,0,GTK_BINOP|GTK_EXOP)   // Address of SZ-array-element.
-GTNODE(MKREFANY         , GenTreeOp          ,0,0,GTK_BINOP|DBK_NOTLIR)
 GTNODE(LEA              , GenTreeAddrMode    ,0,0,GTK_BINOP|GTK_EXOP|DBK_NOTHIR)
 
 #if !defined(TARGET_64BIT)
@@ -284,9 +282,15 @@ GTNODE(START_PREEMPTGC  , GenTree            ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHI
 GTNODE(PROF_HOOK        , GenTree            ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Profiler Enter/Leave/TailCall hook.
 
 GTNODE(RETFILT          , GenTreeOp          ,0,1,GTK_UNOP|GTK_NOVALUE) // End filter with TYP_I_IMPL return value.
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
 GTNODE(END_LFIN         , GenTreeVal         ,0,0,GTK_LEAF|GTK_NOVALUE) // End locally-invoked finally.
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
+
+//-----------------------------------------------------------------------------
+//  Swift interop-specific nodes:
+//-----------------------------------------------------------------------------
+
+GTNODE(SWIFT_ERROR      , GenTree            ,0,0,GTK_LEAF)             // Error register value post-Swift call
 
 //-----------------------------------------------------------------------------
 //  Nodes used by Lower to generate a closer CPU representation of other nodes
diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h
index 5b6e0b07f81e..7bab0328b976 100644
--- a/src/coreclr/jit/gtstructs.h
+++ b/src/coreclr/jit/gtstructs.h
@@ -50,7 +50,7 @@
 
 GTSTRUCT_0(UnOp        , GT_OP)
 GTSTRUCT_0(Op          , GT_OP)
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
 GTSTRUCT_2(Val         , GT_END_LFIN, GT_JMP)
 #else
 GTSTRUCT_1(Val         , GT_JMP)
@@ -88,9 +88,8 @@ GTSTRUCT_1(AddrMode    , GT_LEA)
 GTSTRUCT_1(Qmark       , GT_QMARK)
 GTSTRUCT_1(PhiArg      , GT_PHI_ARG)
 GTSTRUCT_1(Phi         , GT_PHI)
-GTSTRUCT_N(Indir       , GT_IND, GT_NULLCHECK, GT_BLK, GT_STORE_BLK, GT_STORE_DYN_BLK, GT_LOCKADD, GT_XAND, GT_XORR, GT_XADD, GT_XCHG, GT_CMPXCHG, GT_STOREIND)
-GTSTRUCT_N(Blk         , GT_BLK, GT_STORE_BLK, GT_STORE_DYN_BLK)
-GTSTRUCT_1(StoreDynBlk , GT_STORE_DYN_BLK)
+GTSTRUCT_N(Indir       , GT_IND, GT_NULLCHECK, GT_BLK, GT_STORE_BLK, GT_LOCKADD, GT_XAND, GT_XORR, GT_XADD, GT_XCHG, GT_CMPXCHG, GT_STOREIND)
+GTSTRUCT_N(Blk         , GT_BLK, GT_STORE_BLK)
 GTSTRUCT_1(StoreInd    , GT_STOREIND)
 GTSTRUCT_1(CmpXchg     , GT_CMPXCHG)
 #ifdef TARGET_ARM64
diff --git a/src/coreclr/jit/hashbv.cpp b/src/coreclr/jit/hashbv.cpp
index 854215235261..87acddf099bc 100644
--- a/src/coreclr/jit/hashbv.cpp
+++ b/src/coreclr/jit/hashbv.cpp
@@ -824,7 +824,7 @@ void hashBv::setAll(indexType numToSet)
     for (unsigned int i = 0; i < numToSet; i += BITS_PER_NODE)
     {
         hashBvNode* node        = getOrAddNodeForIndex(i);
-        indexType   bits_to_set = min(BITS_PER_NODE, numToSet - i);
+        indexType   bits_to_set = min((indexType)BITS_PER_NODE, numToSet - i);
         node->setLowest(bits_to_set);
     }
 }
@@ -1948,7 +1948,7 @@ indexType hashBvIterator::nextBit()
         current_element++;
         // printf("current element is %d\n", current_element);
         // reached the end of this node
-        if (current_element == (indexType) this->currNode->numElements())
+        if (current_element == (indexType)this->currNode->numElements())
         {
             // printf("going to next node\n");
             this->nextNode();
@@ -1956,7 +1956,7 @@ indexType hashBvIterator::nextBit()
         }
         else
         {
-            assert(current_element < (indexType) this->currNode->numElements());
+            assert(current_element < (indexType)this->currNode->numElements());
             // printf("getting more data\n");
             current_data = this->currNode->elements[current_element];
             current_base = this->currNode->baseIndex + current_element * BITS_PER_ELEMENT;
diff --git a/src/coreclr/jit/hashbv.h b/src/coreclr/jit/hashbv.h
index 7ad95998add8..561a1c5641e4 100644
--- a/src/coreclr/jit/hashbv.h
+++ b/src/coreclr/jit/hashbv.h
@@ -13,15 +13,15 @@
 #include <memory.h>
 #include <windows.h>
 
-//#define TESTING 1
+// #define TESTING 1
 
-#define LOG2_BITS_PER_ELEMENT 5
+#define LOG2_BITS_PER_ELEMENT  5
 #define LOG2_ELEMENTS_PER_NODE 2
-#define LOG2_BITS_PER_NODE (LOG2_BITS_PER_ELEMENT + LOG2_ELEMENTS_PER_NODE)
+#define LOG2_BITS_PER_NODE     (LOG2_BITS_PER_ELEMENT + LOG2_ELEMENTS_PER_NODE)
 
-#define BITS_PER_ELEMENT (1 << LOG2_BITS_PER_ELEMENT)
+#define BITS_PER_ELEMENT  (1 << LOG2_BITS_PER_ELEMENT)
 #define ELEMENTS_PER_NODE (1 << LOG2_ELEMENTS_PER_NODE)
-#define BITS_PER_NODE (1 << LOG2_BITS_PER_NODE)
+#define BITS_PER_NODE     (1 << LOG2_BITS_PER_NODE)
 
 #ifdef TARGET_AMD64
 typedef unsigned __int64 elemType;
@@ -128,8 +128,8 @@ class hashBvNode
     {
     }
     static hashBvNode* Create(indexType base, Compiler* comp);
-    void Reconstruct(indexType base);
-    int numElements()
+    void               Reconstruct(indexType base);
+    int                numElements()
     {
         return ELEMENTS_PER_NODE;
     }
@@ -172,7 +172,8 @@ class hashBv
     hashBvNode** nodeArr;
     hashBvNode*  initialVector[1];
 
-    union {
+    union
+    {
         Compiler* compiler;
         // for freelist
         hashBv* next;
@@ -186,9 +187,9 @@ class hashBv
 public:
     hashBv(Compiler* comp);
     static hashBv* Create(Compiler* comp);
-    static void Init(Compiler* comp);
+    static void    Init(Compiler* comp);
     static hashBv* CreateFrom(hashBv* other, Compiler* comp);
-    void hbvFree();
+    void           hbvFree();
 #ifdef DEBUG
     void dump();
     void dumpFancy();
@@ -201,18 +202,18 @@ class hashBv
     hashBvGlobalData* globalData();
 
     static hashBvNode*& nodeFreeList(hashBvGlobalData* globalData);
-    static hashBv*& hbvFreeList(hashBvGlobalData* data);
+    static hashBv*&     hbvFreeList(hashBvGlobalData* data);
 
     hashBvNode** getInsertionPointForIndex(indexType index);
 
 private:
     hashBvNode* getNodeForIndexHelper(indexType index, bool canAdd);
-    int getHashForIndex(indexType index, int table_size);
-    int getRehashForIndex(indexType thisIndex, int thisTableSize, int newTableSize);
+    int         getHashForIndex(indexType index, int table_size);
+    int         getRehashForIndex(indexType thisIndex, int thisTableSize, int newTableSize);
 
     // maintain free lists for vectors
     hashBvNode** getNewVector(int vectorLength);
-    int getNodeCount();
+    int          getNodeCount();
 
 public:
     inline hashBvNode* getOrAddNodeForIndex(indexType index)
@@ -221,7 +222,7 @@ class hashBv
         return temp;
     }
     hashBvNode* getNodeForIndex(indexType index);
-    void removeNodeAtBase(indexType index);
+    void        removeNodeAtBase(indexType index);
 
 public:
     void setBit(indexType index);
diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp
index f0b042b16ca9..6c8251eee257 100644
--- a/src/coreclr/jit/helperexpansion.cpp
+++ b/src/coreclr/jit/helperexpansion.cpp
@@ -319,21 +319,11 @@ bool Compiler::fgExpandRuntimeLookupsForCall(BasicBlock** pBlock, Statement* stm
 
     // Fallback basic block
     GenTree*    fallbackValueDef = gtNewStoreLclVarNode(rtLookupLcl->GetLclNum(), call);
-    BasicBlock* fallbackBb =
-        fgNewBBFromTreeAfter(BBJ_ALWAYS, nullcheckBb, fallbackValueDef, debugInfo, nullcheckBb->Next(), true);
-
-    assert(fallbackBb->JumpsToNext());
-    fallbackBb->SetFlags(BBF_NONE_QUIRK);
-
-    // Set nullcheckBb's true jump target
-    nullcheckBb->SetTrueTarget(fallbackBb);
+    BasicBlock* fallbackBb       = fgNewBBFromTreeAfter(BBJ_ALWAYS, nullcheckBb, fallbackValueDef, debugInfo, true);
 
     // Fast-path basic block
     GenTree*    fastpathValueDef = gtNewStoreLclVarNode(rtLookupLcl->GetLclNum(), fastPathValueClone);
-    BasicBlock* fastPathBb       = fgNewBBFromTreeAfter(BBJ_ALWAYS, nullcheckBb, fastpathValueDef, debugInfo, block);
-
-    // Set nullcheckBb's false jump target
-    nullcheckBb->SetFalseTarget(fastPathBb);
+    BasicBlock* fastPathBb       = fgNewBBFromTreeAfter(BBJ_ALWAYS, nullcheckBb, fastpathValueDef, debugInfo);
 
     BasicBlock* sizeCheckBb = nullptr;
     if (needsSizeCheck)
@@ -375,45 +365,63 @@ bool Compiler::fgExpandRuntimeLookupsForCall(BasicBlock** pBlock, Statement* stm
 
         GenTree* jtrue = gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck);
         // sizeCheckBb fails - jump to fallbackBb
-        sizeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, prevBb, jtrue, debugInfo, fallbackBb);
-        sizeCheckBb->SetFalseTarget(nullcheckBb);
+        sizeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, prevBb, jtrue, debugInfo);
     }
 
     //
     // Update preds in all new blocks
     //
-    fgRemoveRefPred(block, prevBb);
-    fgAddRefPred(block, fastPathBb);
-    fgAddRefPred(block, fallbackBb);
     assert(prevBb->KindIs(BBJ_ALWAYS));
 
+    {
+        FlowEdge* const newEdge = fgAddRefPred(block, fastPathBb);
+        fastPathBb->SetTargetEdge(newEdge);
+    }
+
+    {
+        FlowEdge* const newEdge = fgAddRefPred(block, fallbackBb);
+        fallbackBb->SetTargetEdge(newEdge);
+        assert(fallbackBb->JumpsToNext());
+    }
+
     if (needsSizeCheck)
     {
         // sizeCheckBb is the first block after prevBb
-        prevBb->SetTarget(sizeCheckBb);
-        fgAddRefPred(sizeCheckBb, prevBb);
+        fgRedirectTargetEdge(prevBb, sizeCheckBb);
+
         // sizeCheckBb flows into nullcheckBb in case if the size check passes
-        fgAddRefPred(nullcheckBb, sizeCheckBb);
+        {
+            FlowEdge* const trueEdge  = fgAddRefPred(fallbackBb, sizeCheckBb);
+            FlowEdge* const falseEdge = fgAddRefPred(nullcheckBb, sizeCheckBb);
+            sizeCheckBb->SetTrueEdge(trueEdge);
+            sizeCheckBb->SetFalseEdge(falseEdge);
+            trueEdge->setLikelihood(0.2);
+            falseEdge->setLikelihood(0.8);
+        }
+
         // fallbackBb is reachable from both nullcheckBb and sizeCheckBb
-        fgAddRefPred(fallbackBb, nullcheckBb);
-        fgAddRefPred(fallbackBb, sizeCheckBb);
         // fastPathBb is only reachable from successful nullcheckBb
-        fgAddRefPred(fastPathBb, nullcheckBb);
     }
     else
     {
         // nullcheckBb is the first block after prevBb
-        prevBb->SetTarget(nullcheckBb);
-        fgAddRefPred(nullcheckBb, prevBb);
+        fgRedirectTargetEdge(prevBb, nullcheckBb);
+
         // No size check, nullcheckBb jumps to fast path
-        fgAddRefPred(fastPathBb, nullcheckBb);
         // fallbackBb is only reachable from nullcheckBb (jump destination)
-        fgAddRefPred(fallbackBb, nullcheckBb);
     }
 
+    FlowEdge* const trueEdge  = fgAddRefPred(fallbackBb, nullcheckBb);
+    FlowEdge* const falseEdge = fgAddRefPred(fastPathBb, nullcheckBb);
+    nullcheckBb->SetTrueEdge(trueEdge);
+    nullcheckBb->SetFalseEdge(falseEdge);
+    trueEdge->setLikelihood(0.2);
+    falseEdge->setLikelihood(0.8);
+
     //
     // Re-distribute weights (see '[weight: X]' on the diagrams above)
     // TODO: consider marking fallbackBb as rarely-taken
+    // TODO: derive block weights from edge likelihoods.
     //
     block->inheritWeight(prevBb);
     if (needsSizeCheck)
@@ -576,8 +584,8 @@ bool Compiler::fgExpandThreadLocalAccessForCallNativeAOT(BasicBlock** pBlock, St
     //      use(tlsRoot);
     // ...
 
-    GenTree*             tlsRootAddr   = nullptr;
-    CORINFO_CONST_LOOKUP tlsRootObject = threadStaticInfo.tlsRootObject;
+    GenTree*               tlsRootAddr   = nullptr;
+    CORINFO_GENERIC_HANDLE tlsRootObject = threadStaticInfo.tlsRootObject.handle;
 
     if (TargetOS::IsWindows)
     {
@@ -598,7 +606,7 @@ bool Compiler::fgExpandThreadLocalAccessForCallNativeAOT(BasicBlock** pBlock, St
         tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
 
         // This resolves to an offset which is TYP_INT
-        GenTree* tlsRootOffset = gtNewIconNode((size_t)tlsRootObject.handle, TYP_INT);
+        GenTree* tlsRootOffset = gtNewIconNode((size_t)tlsRootObject, TYP_INT);
         tlsRootOffset->gtFlags |= GTF_ICON_SECREL_OFFSET;
 
         // Add the tlsValue and tlsRootOffset to produce tlsRootAddr.
@@ -606,34 +614,63 @@ bool Compiler::fgExpandThreadLocalAccessForCallNativeAOT(BasicBlock** pBlock, St
     }
     else if (TargetOS::IsUnix)
     {
-        // Code sequence to access thread local variable on linux/x64:
-        //      data16
-        //      lea      rdi, 0x7FE5C418CD28  ; tlsRootObject
-        //      data16 data16
-        //      call     _tls_get_addr
-        //
-        // This sequence along with `data16` prefix is expected by the linker so it
-        // will patch these with TLS access.
-        GenTree* tls_get_addr_val =
-            gtNewIconHandleNode((size_t)threadStaticInfo.tlsGetAddrFtnPtr.handle, GTF_ICON_FTN_ADDR);
-        tls_get_addr_val->SetContained();
+        if (TargetArchitecture::IsX64)
+        {
+            // Code sequence to access thread local variable on linux/x64:
+            //      data16
+            //      lea      rdi, 0x7FE5C418CD28  ; tlsRootObject
+            //      data16 data16
+            //      call     _tls_get_addr
+            //
+            // This sequence along with `data16` prefix is expected by the linker so it
+            // will patch these with TLS access.
+            GenTree* tls_get_addr_val =
+                gtNewIconHandleNode((size_t)threadStaticInfo.tlsGetAddrFtnPtr.handle, GTF_ICON_FTN_ADDR);
+            tls_get_addr_val->SetContained();
+
+            GenTreeCall* tlsRefCall = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL);
+            tlsRefCall->gtFlags |= GTF_TLS_GET_ADDR;
+
+            // This is an indirect call which takes an argument.
+            // Populate and set the ABI appropriately.
+            assert(tlsRootObject != 0);
+            GenTree* tlsArg = gtNewIconNode((size_t)tlsRootObject, TYP_I_IMPL);
+            tlsArg->gtFlags |= GTF_ICON_TLSGD_OFFSET;
+            tlsRefCall->gtArgs.PushBack(this, NewCallArg::Primitive(tlsArg));
+
+            fgMorphArgs(tlsRefCall);
+
+            tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT);
+            tlsRootAddr = tlsRefCall;
+        }
+        else if (TargetArchitecture::IsArm64)
+        {
+            /*
+            x0 = adrp :tlsdesc:tlsRoot ; 1st parameter
+            x0 += tlsdesc_lo12:tlsRoot ; update 1st parameter
 
-        // GenTreeCall* tlsRefCall = gtNewCallNode(CT_ tls_get_addr_val, TYP_I_IMPL);
-        GenTreeCall* tlsRefCall = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL);
-        tlsRefCall->gtFlags |= GTF_TLS_GET_ADDR;
-        // //
+            x1 = tpidr_el0             ; 2nd parameter
 
-        // This is an indirect call which takes an argument.
-        // Populate and set the ABI appropriately.
-        assert(tlsRootObject.handle != 0);
-        GenTree* tlsArg = gtNewIconNode((size_t)tlsRootObject.handle, TYP_I_IMPL);
-        tlsArg->gtFlags |= GTF_ICON_TLSGD_OFFSET;
-        tlsRefCall->gtArgs.PushBack(this, NewCallArg::Primitive(tlsArg));
+            x2 = [x0]                  ; call
+            blr x2
 
-        fgMorphArgs(tlsRefCall);
+            */
 
-        tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT);
-        tlsRootAddr = tlsRefCall;
+            GenTree* tlsRootOffset = gtNewIconHandleNode((size_t)tlsRootObject, GTF_ICON_TLS_HDL);
+            tlsRootOffset->gtFlags |= GTF_ICON_TLSGD_OFFSET;
+
+            GenTree*     tlsCallIndir = gtCloneExpr(tlsRootOffset);
+            GenTreeCall* tlsRefCall   = gtNewIndCallNode(tlsCallIndir, TYP_I_IMPL);
+            tlsRefCall->gtFlags |= GTF_TLS_GET_ADDR;
+            fgMorphArgs(tlsRefCall);
+
+            tlsRefCall->gtFlags |= GTF_EXCEPT | (tlsCallIndir->gtFlags & GTF_GLOB_EFFECT);
+            tlsRootAddr = tlsRefCall;
+        }
+        else
+        {
+            unreached();
+        }
     }
     else
     {
@@ -670,11 +707,10 @@ bool Compiler::fgExpandThreadLocalAccessForCallNativeAOT(BasicBlock** pBlock, St
 
     // fallbackBb
     GenTree*    fallbackValueDef = gtNewStoreLclVarNode(finalLclNum, slowHelper);
-    BasicBlock* fallbackBb =
-        fgNewBBFromTreeAfter(BBJ_ALWAYS, tlsRootNullCondBB, fallbackValueDef, debugInfo, block, true);
+    BasicBlock* fallbackBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, tlsRootNullCondBB, fallbackValueDef, debugInfo, true);
 
     GenTree*    fastPathValueDef = gtNewStoreLclVarNode(finalLclNum, gtCloneExpr(finalLcl));
-    BasicBlock* fastPathBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, fastPathValueDef, debugInfo, block, true);
+    BasicBlock* fastPathBb       = fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, fastPathValueDef, debugInfo, true);
 
     *callUse = finalLcl;
 
@@ -684,14 +720,22 @@ bool Compiler::fgExpandThreadLocalAccessForCallNativeAOT(BasicBlock** pBlock, St
     //
     // Update preds in all new blocks
     //
-    fgAddRefPred(fallbackBb, tlsRootNullCondBB);
-    fgAddRefPred(fastPathBb, tlsRootNullCondBB);
+    FlowEdge* const trueEdge  = fgAddRefPred(fastPathBb, tlsRootNullCondBB);
+    FlowEdge* const falseEdge = fgAddRefPred(fallbackBb, tlsRootNullCondBB);
+    tlsRootNullCondBB->SetTrueEdge(trueEdge);
+    tlsRootNullCondBB->SetFalseEdge(falseEdge);
+    trueEdge->setLikelihood(1.0);
+    falseEdge->setLikelihood(0.0);
 
-    fgAddRefPred(block, fallbackBb);
-    fgAddRefPred(block, fastPathBb);
+    {
+        FlowEdge* const newEdge = fgAddRefPred(block, fallbackBb);
+        fallbackBb->SetTargetEdge(newEdge);
+    }
 
-    tlsRootNullCondBB->SetTrueTarget(fastPathBb);
-    tlsRootNullCondBB->SetFalseTarget(fallbackBb);
+    {
+        FlowEdge* const newEdge = fgAddRefPred(block, fastPathBb);
+        fastPathBb->SetTargetEdge(newEdge);
+    }
 
     // Inherit the weights
     block->inheritWeight(prevBb);
@@ -701,9 +745,7 @@ bool Compiler::fgExpandThreadLocalAccessForCallNativeAOT(BasicBlock** pBlock, St
     // fallback will just execute first time
     fallbackBb->bbSetRunRarely();
 
-    fgRemoveRefPred(block, prevBb);
-    fgAddRefPred(tlsRootNullCondBB, prevBb);
-    prevBb->SetTarget(tlsRootNullCondBB);
+    fgRedirectTargetEdge(prevBb, tlsRootNullCondBB);
 
     // All blocks are expected to be in the same EH region
     assert(BasicBlock::sameEHRegion(prevBb, block));
@@ -1027,7 +1069,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement*
     // fallbackBb
     GenTree*    fallbackValueDef = gtNewStoreLclVarNode(threadStaticBlockLclNum, call);
     BasicBlock* fallbackBb =
-        fgNewBBFromTreeAfter(BBJ_ALWAYS, threadStaticBlockNullCondBB, fallbackValueDef, debugInfo, block, true);
+        fgNewBBFromTreeAfter(BBJ_ALWAYS, threadStaticBlockNullCondBB, fallbackValueDef, debugInfo, true);
 
     // fastPathBb
     if (isGCThreadStatic)
@@ -1042,32 +1084,41 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement*
 
     GenTree* fastPathValueDef =
         gtNewStoreLclVarNode(threadStaticBlockLclNum, gtCloneExpr(threadStaticBlockBaseLclValueUse));
-    BasicBlock* fastPathBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, fastPathValueDef, debugInfo, block, true);
-
-    // Set maxThreadStaticBlocksCondBB's jump targets
-    maxThreadStaticBlocksCondBB->SetTrueTarget(fallbackBb);
-    maxThreadStaticBlocksCondBB->SetFalseTarget(threadStaticBlockNullCondBB);
-
-    // Set threadStaticBlockNullCondBB's jump targets
-    threadStaticBlockNullCondBB->SetTrueTarget(fastPathBb);
-    threadStaticBlockNullCondBB->SetFalseTarget(fallbackBb);
+    BasicBlock* fastPathBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, fastPathValueDef, debugInfo, true);
 
     //
     // Update preds in all new blocks
     //
     assert(prevBb->KindIs(BBJ_ALWAYS));
-    prevBb->SetTarget(maxThreadStaticBlocksCondBB);
-    fgRemoveRefPred(block, prevBb);
-    fgAddRefPred(maxThreadStaticBlocksCondBB, prevBb);
+    fgRedirectTargetEdge(prevBb, maxThreadStaticBlocksCondBB);
 
-    fgAddRefPred(threadStaticBlockNullCondBB, maxThreadStaticBlocksCondBB);
-    fgAddRefPred(fallbackBb, maxThreadStaticBlocksCondBB);
+    {
+        FlowEdge* const trueEdge  = fgAddRefPred(fallbackBb, maxThreadStaticBlocksCondBB);
+        FlowEdge* const falseEdge = fgAddRefPred(threadStaticBlockNullCondBB, maxThreadStaticBlocksCondBB);
+        maxThreadStaticBlocksCondBB->SetTrueEdge(trueEdge);
+        maxThreadStaticBlocksCondBB->SetFalseEdge(falseEdge);
+        trueEdge->setLikelihood(0.0);
+        falseEdge->setLikelihood(1.0);
+    }
 
-    fgAddRefPred(fastPathBb, threadStaticBlockNullCondBB);
-    fgAddRefPred(fallbackBb, threadStaticBlockNullCondBB);
+    {
+        FlowEdge* const trueEdge  = fgAddRefPred(fastPathBb, threadStaticBlockNullCondBB);
+        FlowEdge* const falseEdge = fgAddRefPred(fallbackBb, threadStaticBlockNullCondBB);
+        threadStaticBlockNullCondBB->SetTrueEdge(trueEdge);
+        threadStaticBlockNullCondBB->SetFalseEdge(falseEdge);
+        trueEdge->setLikelihood(1.0);
+        falseEdge->setLikelihood(0.0);
+    }
 
-    fgAddRefPred(block, fastPathBb);
-    fgAddRefPred(block, fallbackBb);
+    {
+        FlowEdge* const newEdge = fgAddRefPred(block, fastPathBb);
+        fastPathBb->SetTargetEdge(newEdge);
+    }
+
+    {
+        FlowEdge* const newEdge = fgAddRefPred(block, fallbackBb);
+        fallbackBb->SetTargetEdge(newEdge);
+    }
 
     // Inherit the weights
     block->inheritWeight(prevBb);
@@ -1347,14 +1398,12 @@ bool Compiler::fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, G
     GenTree* isInitedCmp = gtNewOperNode(GT_EQ, TYP_INT, isInitedActualValueNode, isInitedExpectedValue);
     isInitedCmp->gtFlags |= GTF_RELOP_JMP_USED;
     BasicBlock* isInitedBb =
-        fgNewBBFromTreeAfter(BBJ_COND, prevBb, gtNewOperNode(GT_JTRUE, TYP_VOID, isInitedCmp), debugInfo, block);
+        fgNewBBFromTreeAfter(BBJ_COND, prevBb, gtNewOperNode(GT_JTRUE, TYP_VOID, isInitedCmp), debugInfo);
 
     // Fallback basic block
     // TODO-CQ: for JIT we can replace the original call with CORINFO_HELP_INITCLASS
     // that only accepts a single argument
-    BasicBlock* helperCallBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, isInitedBb, call, debugInfo, isInitedBb->Next(), true);
-    assert(helperCallBb->JumpsToNext());
-    helperCallBb->SetFlags(BBF_NONE_QUIRK);
+    BasicBlock* helperCallBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, isInitedBb, call, debugInfo, true);
 
     GenTree* replacementNode = nullptr;
     if (retValKind == SHRV_STATIC_BASE_PTR)
@@ -1413,23 +1462,26 @@ bool Compiler::fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, G
     // Update preds in all new blocks
     //
 
-    // Unlink block and prevBb
-    fgRemoveRefPred(block, prevBb);
-
-    // Block has two preds now: either isInitedBb or helperCallBb
-    fgAddRefPred(block, isInitedBb);
-    fgAddRefPred(block, helperCallBb);
-
-    // prevBb always flows into isInitedBb
-    assert(prevBb->KindIs(BBJ_ALWAYS));
-    prevBb->SetTarget(isInitedBb);
-    prevBb->SetFlags(BBF_NONE_QUIRK);
+    // Redirect prevBb from block to isInitedBb
+    fgRedirectTargetEdge(prevBb, isInitedBb);
     assert(prevBb->JumpsToNext());
-    fgAddRefPred(isInitedBb, prevBb);
 
-    // Both fastPathBb and helperCallBb have a single common pred - isInitedBb
-    isInitedBb->SetFalseTarget(helperCallBb);
-    fgAddRefPred(helperCallBb, isInitedBb);
+    {
+        // Block has two preds now: either isInitedBb or helperCallBb
+        FlowEdge* const newEdge = fgAddRefPred(block, helperCallBb);
+        helperCallBb->SetTargetEdge(newEdge);
+        assert(helperCallBb->JumpsToNext());
+    }
+
+    {
+        // Both fastPathBb and helperCallBb have a single common pred - isInitedBb
+        FlowEdge* const trueEdge  = fgAddRefPred(block, isInitedBb);
+        FlowEdge* const falseEdge = fgAddRefPred(helperCallBb, isInitedBb);
+        isInitedBb->SetTrueEdge(trueEdge);
+        isInitedBb->SetFalseEdge(falseEdge);
+        trueEdge->setLikelihood(1.0);
+        falseEdge->setLikelihood(0.0);
+    }
 
     //
     // Re-distribute weights
@@ -1451,6 +1503,10 @@ bool Compiler::fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, G
 
     // Clear gtInitClsHnd as a mark that we've already visited this call
     call->gtInitClsHnd = NO_CLASS_HANDLE;
+
+    // The blocks and statements have been modified enough to make the ending offset invalid.
+    block->bbCodeOffsEnd = BAD_IL_OFFSET;
+
     return true;
 }
 
@@ -1658,7 +1714,7 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock,
     //
     // Block 1: lengthCheckBb (we check that dstLen < srcLen)
     //
-    BasicBlock* lengthCheckBb = fgNewBBafter(BBJ_COND, prevBb, true, block);
+    BasicBlock* const lengthCheckBb = fgNewBBafter(BBJ_COND, prevBb, true);
     lengthCheckBb->SetFlags(BBF_INTERNAL);
 
     // Set bytesWritten -1 by default, if the fast path is not taken we'll return it as the result.
@@ -1680,9 +1736,8 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock,
     // In theory, we could just emit the const U8 data to the data section and use GT_BLK here
     // but that would be a bit less efficient since we would have to load the data from memory.
     //
-    BasicBlock* fastpathBb = fgNewBBafter(BBJ_ALWAYS, lengthCheckBb, true, lengthCheckBb->Next());
-    assert(fastpathBb->JumpsToNext());
-    fastpathBb->SetFlags(BBF_INTERNAL | BBF_NONE_QUIRK);
+    BasicBlock* fastpathBb = fgNewBBafter(BBJ_ALWAYS, lengthCheckBb, true);
+    fastpathBb->SetFlags(BBF_INTERNAL);
 
     // The widest type we can use for loads
     const var_types maxLoadType = roundDownMaxType(srcLenU8);
@@ -1734,20 +1789,28 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock,
     //
     // Update preds in all new blocks
     //
-    // block is no longer a predecessor of prevBb
-    fgRemoveRefPred(block, prevBb);
-    // prevBb flows into lengthCheckBb
-    assert(prevBb->KindIs(BBJ_ALWAYS));
-    prevBb->SetTarget(lengthCheckBb);
-    prevBb->SetFlags(BBF_NONE_QUIRK);
+    // Redirect prevBb to lengthCheckBb
+    fgRedirectTargetEdge(prevBb, lengthCheckBb);
     assert(prevBb->JumpsToNext());
-    fgAddRefPred(lengthCheckBb, prevBb);
-    // lengthCheckBb has two successors: block and fastpathBb
-    lengthCheckBb->SetFalseTarget(fastpathBb);
-    fgAddRefPred(fastpathBb, lengthCheckBb);
-    fgAddRefPred(block, lengthCheckBb);
-    // fastpathBb flows into block
-    fgAddRefPred(block, fastpathBb);
+
+    {
+        // lengthCheckBb has two successors: block and fastpathBb
+        FlowEdge* const trueEdge  = fgAddRefPred(block, lengthCheckBb);
+        FlowEdge* const falseEdge = fgAddRefPred(fastpathBb, lengthCheckBb);
+        lengthCheckBb->SetTrueEdge(trueEdge);
+        lengthCheckBb->SetFalseEdge(falseEdge);
+
+        // review: we assume length check always succeeds??
+        trueEdge->setLikelihood(1.0);
+        falseEdge->setLikelihood(0.0);
+    }
+
+    {
+        // fastpathBb flows into block
+        FlowEdge* const newEdge = fgAddRefPred(block, fastpathBb);
+        fastpathBb->SetTargetEdge(newEdge);
+        assert(fastpathBb->JumpsToNext());
+    }
 
     //
     // Re-distribute weights
@@ -1805,6 +1868,7 @@ PhaseStatus Compiler::fgLateCastExpansion()
 
 enum class TypeCheckFailedAction
 {
+    Unknown,
     ReturnNull,
     CallHelper,
     CallHelper_Specialized,
@@ -1813,8 +1877,10 @@ enum class TypeCheckFailedAction
 
 enum class TypeCheckPassedAction
 {
+    Unknown,
     ReturnObj,
     ReturnNull,
+    CallHelper_AlwaysThrows,
 };
 
 // Some arbitrary limit on the number of guesses we can make
@@ -1846,6 +1912,9 @@ static int PickCandidatesForTypeCheck(Compiler*              comp,
                                       TypeCheckFailedAction* typeCheckFailed,
                                       TypeCheckPassedAction* typeCheckPassed)
 {
+    *typeCheckFailed = TypeCheckFailedAction::Unknown;
+    *typeCheckPassed = TypeCheckPassedAction::Unknown;
+
     if (!castHelper->IsHelperCall() || ((castHelper->gtCallMoreFlags & GTF_CALL_M_CAST_CAN_BE_EXPANDED) == 0))
     {
         // It's not eligible for expansion (already expanded in importer)
@@ -1876,13 +1945,13 @@ static int PickCandidatesForTypeCheck(Compiler*              comp,
             isCastClass = false;
             break;
 
-        // These are never expanded:
-        // CORINFO_HELP_ISINSTANCEOF_EXCEPTION
-        // CORINFO_HELP_CHKCASTCLASS_SPECIAL
-        // CORINFO_HELP_READYTORUN_ISINSTANCEOF,
-        // CORINFO_HELP_READYTORUN_CHKCAST,
+            // These are never expanded:
+            // CORINFO_HELP_ISINSTANCEOF_EXCEPTION
+            // CORINFO_HELP_CHKCASTCLASS_SPECIAL
+            // CORINFO_HELP_READYTORUN_ISINSTANCEOF,
+            // CORINFO_HELP_READYTORUN_CHKCAST,
 
-        // Other helper calls are not cast helpers
+            // Other helper calls are not cast helpers
 
         default:
             return 0;
@@ -1891,13 +1960,35 @@ static int PickCandidatesForTypeCheck(Compiler*              comp,
     // First, let's grab the expected class we're casting to/checking instance of:
     // E.g. "call CORINFO_HELP_ISINSTANCEOFCLASS(castToCls, obj)"
     GenTree*             clsArg    = castHelper->gtArgs.GetUserArgByIndex(0)->GetNode();
+    GenTree*             objArg    = castHelper->gtArgs.GetUserArgByIndex(1)->GetNode();
     CORINFO_CLASS_HANDLE castToCls = comp->gtGetHelperArgClassHandle(clsArg);
     if (castToCls == NO_CLASS_HANDLE)
     {
-        // clsArg doesn't represent a class handle - bail out
-        // TODO-InlineCast: if CSE becomes a problem - move the whole phase after assertion prop,
-        // so we can still rely on VN to get the class handle.
-        JITDUMP("clsArg is not a constant handle - bail out.\n");
+        // If we don't see the constant class handle, we still can speculatively expand it
+        // for castclass case (we'll just take the unknown tree as a type check tree)
+        switch (helper)
+        {
+            case CORINFO_HELP_CHKCASTCLASS:
+            case CORINFO_HELP_CHKCASTARRAY:
+            case CORINFO_HELP_CHKCASTANY:
+                likelihoods[0] = 50; // 50% speculative guess
+                candidates[0]  = NO_CLASS_HANDLE;
+                return 1;
+
+            default:
+                // Otherwise, bail out. We don't expect the constant handles to be CSE'd as they normally
+                // have GTF_DONT_CSE flag set on them for cast helpers.
+                // TODO-InlineCast: One missing case to handle is isinst against Class<_Canon>
+                return 0;
+        }
+    }
+
+    if ((objArg->gtFlags & GTF_ALL_EFFECT) != 0 && comp->lvaHaveManyLocals())
+    {
+        // TODO: Revise this:
+        //  * Some casts are profitable even when ran out of tracked locals
+        //  * We might want to use a shared local in all casts (similar to what we do boxing)
+        JITDUMP("lvaHaveManyLocals() is true and objArg has side effects - bail out.")
         return 0;
     }
 
@@ -1917,6 +2008,31 @@ static int PickCandidatesForTypeCheck(Compiler*              comp,
 
     const unsigned isAbstractFlags = CORINFO_FLG_INTERFACE | CORINFO_FLG_ABSTRACT;
 
+    // See what we already know about the type of the object being cast.
+    bool                 fromClassIsExact   = false;
+    bool                 fromClassIsNonNull = false;
+    CORINFO_CLASS_HANDLE fromClass          = comp->gtGetClassHandle(objArg, &fromClassIsExact, &fromClassIsNonNull);
+    if ((fromClass != NO_CLASS_HANDLE) && fromClassIsExact)
+    {
+        if (fromClassIsNonNull)
+        {
+            // An additional hint for the expansion that the object is not null
+            castHelper->gtCallMoreFlags |= GTF_CALL_M_CAST_OBJ_NONNULL;
+        }
+
+        const TypeCompareState castResult = comp->info.compCompHnd->compareTypesForCast(fromClass, castToCls);
+        if (isCastClass && (castResult == TypeCompareState::MustNot))
+        {
+            // The cast is guaranteed to fail, the expansion logic can skip the type check entirely
+            *typeCheckPassed = TypeCheckPassedAction::CallHelper_AlwaysThrows;
+            return 0;
+        }
+
+        // TODO-InlineCast:
+        // isinst and MustNot         -> just return null
+        // isinst/castclass and Must  -> just return obj
+    }
+
     //
     // Now we need to figure out what classes to use for the fast path, we have 4 options:
     //  1) If "cast to" class is already exact we can go ahead and make some decisions
@@ -2119,36 +2235,29 @@ static int PickCandidatesForTypeCheck(Compiler*              comp,
             return 0;
 
         case CORINFO_HELP_CHKCASTARRAY:
-            // CHKCASTARRAY against exact classes is already handled above, so it's not exact here.
-            //
-            //   (int[])obj - can we use int[] as a guess? No! It's an overhead if obj is uint[]
-            //                or any int-backed enum
-            return 0;
-
         case CORINFO_HELP_CHKCASTCLASS:
-            // CHKCASTCLASS against exact classes is already handled above, so it's not exact here.
+        case CORINFO_HELP_CHKCASTANY:
+            // These casts against exact classes are already handled above, so it's not exact here.
             //
             // let's use castToCls as a guess, we might regress some cases, but at least we know that unrelated
             // types are going to throw InvalidCastException, so we can assume the overhead happens rarely.
-            candidates[0] = castToCls;
-            // 50% chance of successful type check (speculative guess)
-            likelihoods[0] = 50;
             //
-            // A small optimization - use a slightly faster fallback which assumes that we've already checked
-            // for null and for castToCls itself, so it won't do it again.
-            *typeCheckFailed = TypeCheckFailedAction::CallHelper_Specialized;
-            return 1;
-
-        case CORINFO_HELP_CHKCASTANY:
-            // Same as CORINFO_HELP_CHKCASTCLASS above, the only difference - let's check castToCls for
-            // being non-abstract and non-interface first as it makes no sense to speculate on those.
             if ((comp->info.compCompHnd->getClassAttribs(castToCls) & isAbstractFlags) != 0)
             {
+                // The guess is abstract - it will never pass the type check
                 return 0;
             }
             candidates[0] = castToCls;
             // 50% chance of successful type check (speculative guess)
             likelihoods[0] = 50;
+            //
+            // A small optimization - use a slightly faster fallback which assumes that we've already checked
+            // for null and for castToCls itself, so it won't do it again.
+            //
+            if (helper == CORINFO_HELP_CHKCASTCLASS)
+            {
+                *typeCheckFailed = TypeCheckFailedAction::CallHelper_Specialized;
+            }
             return 1;
 
         case CORINFO_HELP_ISINSTANCEOFINTERFACE:
@@ -2201,7 +2310,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,
 
     const int numOfCandidates = PickCandidatesForTypeCheck(this, call, expectedExactClasses, &commonCls, likelihoods,
                                                            &typeCheckFailedAction, &typeCheckPassedAction);
-    if (numOfCandidates == 0)
+    if ((numOfCandidates == 0) && (typeCheckPassedAction != TypeCheckPassedAction::CallHelper_AlwaysThrows))
     {
         return false;
     }
@@ -2256,13 +2365,21 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,
     //     use(tmp);
     //
 
+    // NOTE: if the cast is known to always fail (TypeCheckPassedAction::CallHelper_AlwaysThrows)
+    // we can omit the typeCheckBb and typeCheckSucceedBb and only have:
+    //
+    // if (obj == null) goto lastBb;
+    // throw InvalidCastException;
+    //
+    // if obj is known to be non-null, then it will be just the throw block.
+
     // Block 1: nullcheckBb
     // TODO-InlineCast: assertionprop should leave us a mark that objArg is never null, so we can omit this check
     // it's too late to rely on upstream phases to do this for us (unless we do optRepeat).
     GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, tmpNode, gtNewNull());
     nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED;
-    BasicBlock* nullcheckBb = fgNewBBFromTreeAfter(BBJ_COND, firstBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp),
-                                                   debugInfo, lastBb, true);
+    BasicBlock* nullcheckBb =
+        fgNewBBFromTreeAfter(BBJ_COND, firstBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), debugInfo, true);
 
     // The very first statement in the whole expansion is to assign obj to tmp.
     // We assume it's the value we're going to return in most cases.
@@ -2278,27 +2395,59 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,
     BasicBlock* lastTypeCheckBb                 = nullcheckBb;
     for (int candidateId = 0; candidateId < numOfCandidates; candidateId++)
     {
-        GenTree* likelyClsNode = gtNewIconEmbClsHndNode(expectedExactClasses[candidateId]);
-        GenTree* mtCheck = gtNewOperNode(GT_EQ, TYP_INT, gtNewMethodTableLookup(gtCloneExpr(tmpNode)), likelyClsNode);
+        const CORINFO_CLASS_HANDLE expectedCls = expectedExactClasses[candidateId];
+        // if expectedCls is NO_CLASS_HANDLE, it means we should just use the original clsArg
+        GenTree* expectedClsNode = expectedCls != NO_CLASS_HANDLE
+                                       ? gtNewIconEmbClsHndNode(expectedCls)
+                                       : gtCloneExpr(call->gtArgs.GetUserArgByIndex(0)->GetNode());
+
+        // Manually CSE the expectedClsNode for first type check if it's the same as the original clsArg
+        // TODO-InlineCast: consider not doing this if the helper call is cold
+        GenTree* storeCseVal = nullptr;
+        if (candidateId == 0)
+        {
+            GenTree*& castArg = call->gtArgs.GetUserArgByIndex(0)->LateNodeRef();
+            if (GenTree::Compare(castArg, expectedClsNode))
+            {
+                const unsigned clsTmp = lvaGrabTemp(true DEBUGARG("CSE for expectedClsNode"));
+                storeCseVal           = gtNewTempStore(clsTmp, expectedClsNode);
+                expectedClsNode       = gtNewLclvNode(clsTmp, TYP_I_IMPL);
+                castArg               = gtNewLclvNode(clsTmp, TYP_I_IMPL);
+            }
+        }
+
+        GenTree* mtCheck = gtNewOperNode(GT_EQ, TYP_INT, gtNewMethodTableLookup(gtCloneExpr(tmpNode)), expectedClsNode);
         mtCheck->gtFlags |= GTF_RELOP_JMP_USED;
         GenTree* jtrue             = gtNewOperNode(GT_JTRUE, TYP_VOID, mtCheck);
-        typeChecksBbs[candidateId] = fgNewBBFromTreeAfter(BBJ_COND, lastTypeCheckBb, jtrue, debugInfo, lastBb, true);
+        typeChecksBbs[candidateId] = fgNewBBFromTreeAfter(BBJ_COND, lastTypeCheckBb, jtrue, debugInfo, true);
         lastTypeCheckBb            = typeChecksBbs[candidateId];
+
+        // Insert the CSE node as the first statement in the block
+        if (storeCseVal != nullptr)
+        {
+            Statement* clsStmt = fgNewStmtAtBeg(typeChecksBbs[0], storeCseVal, debugInfo);
+            gtSetStmtInfo(clsStmt);
+            fgSetStmtSeq(clsStmt);
+        }
     }
 
+    // numOfCandidates being 0 means that we don't need any type checks
+    // as we already know that the cast is going to fail.
+    const bool typeCheckNotNeeded = numOfCandidates == 0;
+
     // Block 3: fallbackBb
     BasicBlock* fallbackBb;
-    if (typeCheckFailedAction == TypeCheckFailedAction::CallHelper_AlwaysThrows)
+    if (typeCheckNotNeeded || (typeCheckFailedAction == TypeCheckFailedAction::CallHelper_AlwaysThrows))
     {
         // fallback call is used only to throw InvalidCastException
         call->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN;
-        fallbackBb = fgNewBBFromTreeAfter(BBJ_THROW, lastTypeCheckBb, call, debugInfo, nullptr, true);
+        fallbackBb = fgNewBBFromTreeAfter(BBJ_THROW, lastTypeCheckBb, call, debugInfo, true);
     }
     else if (typeCheckFailedAction == TypeCheckFailedAction::ReturnNull)
     {
         // if fallback call is not needed, we just assign null to tmp
         GenTree* fallbackTree = gtNewTempStore(tmpNum, gtNewNull());
-        fallbackBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, lastTypeCheckBb, fallbackTree, debugInfo, lastBb, true);
+        fallbackBb            = fgNewBBFromTreeAfter(BBJ_ALWAYS, lastTypeCheckBb, fallbackTree, debugInfo, true);
     }
     else
     {
@@ -2309,7 +2458,7 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,
             call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_CHKCASTCLASS_SPECIAL);
         }
         GenTree* fallbackTree = gtNewTempStore(tmpNum, call);
-        fallbackBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, lastTypeCheckBb, fallbackTree, debugInfo, lastBb, true);
+        fallbackBb            = fgNewBBFromTreeAfter(BBJ_ALWAYS, lastTypeCheckBb, fallbackTree, debugInfo, true);
     }
 
     // Block 4: typeCheckSucceedBb
@@ -2320,19 +2469,50 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,
     }
     else
     {
-        assert(typeCheckPassedAction == TypeCheckPassedAction::ReturnObj);
         // No-op because tmp was already assigned to obj
         typeCheckSucceedTree = gtNewNothingNode();
     }
-    BasicBlock* typeCheckSucceedBb =
-        fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, typeCheckSucceedTree, debugInfo, lastBb);
 
     //
     // Wire up the blocks
     //
-    firstBb->SetTarget(nullcheckBb);
-    nullcheckBb->SetTrueTarget(lastBb);
-    nullcheckBb->SetFalseTarget(typeChecksBbs[0]);
+
+    // We assume obj is 50%/50% null/not-null (TODO-InlineCast: rely on PGO)
+    // and rely on profile for the slow path.
+    //
+    // Alternatively we could profile nulls in the reservoir sample and
+    // treat that as another "option".
+    //
+    // True out of the null check means obj is null.
+    //
+    const weight_t nullcheckTrueLikelihood  = 0.5;
+    const weight_t nullcheckFalseLikelihood = 0.5;
+    BasicBlock*    typeCheckSucceedBb;
+
+    {
+        FlowEdge* const trueEdge = fgAddRefPred(lastBb, nullcheckBb);
+        nullcheckBb->SetTrueEdge(trueEdge);
+        trueEdge->setLikelihood(nullcheckTrueLikelihood);
+    }
+
+    if (typeCheckNotNeeded)
+    {
+        FlowEdge* const falseEdge = fgAddRefPred(fallbackBb, nullcheckBb);
+        nullcheckBb->SetFalseEdge(falseEdge);
+        falseEdge->setLikelihood(nullcheckFalseLikelihood);
+
+        typeCheckSucceedBb = nullptr;
+    }
+    else
+    {
+        FlowEdge* const falseEdge = fgAddRefPred(typeChecksBbs[0], nullcheckBb);
+        nullcheckBb->SetFalseEdge(falseEdge);
+        falseEdge->setLikelihood(nullcheckFalseLikelihood);
+
+        typeCheckSucceedBb      = fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, typeCheckSucceedTree, debugInfo);
+        FlowEdge* const newEdge = fgAddRefPred(lastBb, typeCheckSucceedBb);
+        typeCheckSucceedBb->SetTargetEdge(newEdge);
+    }
 
     // Tricky case - wire up multiple type check blocks (in most cases there is only one)
     for (int candidateId = 0; candidateId < numOfCandidates; candidateId++)
@@ -2340,59 +2520,116 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,
         BasicBlock* curTypeCheckBb = typeChecksBbs[candidateId];
 
         // All type checks jump straight to the typeCheckSucceedBb on success
-        curTypeCheckBb->SetTrueTarget(typeCheckSucceedBb);
-        fgAddRefPred(typeCheckSucceedBb, curTypeCheckBb);
+        FlowEdge* const trueEdge = fgAddRefPred(typeCheckSucceedBb, curTypeCheckBb);
+        curTypeCheckBb->SetTrueEdge(trueEdge);
 
         // or ...
         if (candidateId == numOfCandidates - 1)
         {
             // ... jump to the fallbackBb on last type check's failure
-            curTypeCheckBb->SetFalseTarget(fallbackBb);
-            fgAddRefPred(fallbackBb, curTypeCheckBb);
+            FlowEdge* const falseEdge = fgAddRefPred(fallbackBb, curTypeCheckBb);
+            curTypeCheckBb->SetFalseEdge(falseEdge);
         }
         else
         {
             // ... jump to the next type check on failure
-            curTypeCheckBb->SetFalseTarget(typeChecksBbs[candidateId + 1]);
-            fgAddRefPred(typeChecksBbs[candidateId + 1], curTypeCheckBb);
+            FlowEdge* const falseEdge = fgAddRefPred(typeChecksBbs[candidateId + 1], curTypeCheckBb);
+            curTypeCheckBb->SetFalseEdge(falseEdge);
         }
     }
 
-    fgRemoveRefPred(lastBb, firstBb);
-    fgAddRefPred(nullcheckBb, firstBb);
-    fgAddRefPred(typeChecksBbs[0], nullcheckBb);
-    fgAddRefPred(lastBb, nullcheckBb);
-    fgAddRefPred(lastBb, typeCheckSucceedBb);
-    if (typeCheckFailedAction != TypeCheckFailedAction::CallHelper_AlwaysThrows)
-    {
-        // if fallbackBb is BBJ_THROW then it has no successors
-        fgAddRefPred(lastBb, fallbackBb);
-    }
+    fgRedirectTargetEdge(firstBb, nullcheckBb);
 
     //
-    // Re-distribute weights
+    // Re-distribute weights and set edge likelihoods.
+    //
+    // We have the likelihood estimate for each class to test against.
+    // As with multi-guess GDV, once we've failed the first check, the
+    // likelihood of the other checks will increase.
+    //
+    // For instance, suppose we have 3 classes A, B, C, with likelihoods 0.5, 0.2, 0.1,
+    // and a residual 0.2 likelihood for none of the above.
+    //
+    // We first test for A, this is a 0.5 likelihood of success.
+    //
+    // If the test for A fails, we test for B. Because we only reach this second
+    // test with relative likelihood 0.5, we need to divide (scale up) the remaining
+    // likelihoods by 0.5, so we end up with 0.4, 0.2, (none of the above: 0.4).
+    //
+    // If the test for B also fails, we test for C. Because we only reach
+    // this second test with relative likelihood 0.6, we need to divide (scale up)
+    // the remaining likelihoods by by 0.6, so we end up with 0.33, (none of the above: 0.67).
+    //
+    // In the code below, instead of updating all the likelihoods each time,
+    // we remember how much  we need to divide by to fix the next likelihood. This divisor is
+    // 1.0 - (sumOfPreviousLikelihoods)
+    //
+    // So for the above, we have
+    //
+    //   Test  |  Likelihood  |  Sum of Previous  |   Rel. Likelihood
+    //    A    |      0.5     |        0.0        |   0.5 / (1 - 0.0) = 0.50
+    //    B    |      0.2     |        0.5        |   0.2 / (1 - 0.5) = 0.40
+    //    C    |      0.1     |        0.7        |   0.1 / (1 - 0.7) = 0.33
+    //   n/a   |      0.2     |        0.8        |   0.2 / (1 - 0.8) = 1.00
+    //
+    // The same goes for inherited weights -- the block where we test for B will have
+    // the weight of A times the likelihood that A's test fails, etc.
     //
     nullcheckBb->inheritWeight(firstBb);
-    unsigned totalLikelihood = 0;
+    weight_t sumOfPreviousLikelihood = 0;
     for (int candidateId = 0; candidateId < numOfCandidates; candidateId++)
     {
-        unsigned    likelihood     = likelihoods[candidateId];
         BasicBlock* curTypeCheckBb = typeChecksBbs[candidateId];
         if (candidateId == 0)
         {
-            // We assume obj is 50%/50% null/not-null (TODO-InlineCast: rely on PGO)
-            // and rely on profile for the slow path.
-            curTypeCheckBb->inheritWeightPercentage(nullcheckBb, 50);
+            // Predecessor is the nullcheck, control reaches on false.
+            //
+            curTypeCheckBb->inheritWeight(nullcheckBb);
+            curTypeCheckBb->scaleBBWeight(nullcheckBb->GetFalseEdge()->getLikelihood());
         }
         else
         {
-            BasicBlock* prevTypeCheckBb = typeChecksBbs[candidateId - 1];
-            curTypeCheckBb->inheritWeightPercentage(prevTypeCheckBb, likelihood);
+            // Predecessor is the prior type check, control reaches on false.
+            //
+            BasicBlock* const prevTypeCheckBb           = typeChecksBbs[candidateId - 1];
+            weight_t          prevCheckFailedLikelihood = prevTypeCheckBb->GetFalseEdge()->getLikelihood();
+            curTypeCheckBb->inheritWeight(prevTypeCheckBb);
+            curTypeCheckBb->scaleBBWeight(prevCheckFailedLikelihood);
         }
-        totalLikelihood += likelihood;
+
+        // Fix likelihood of block's outgoing edges.
+        //
+        weight_t likelihood    = (weight_t)likelihoods[candidateId] / 100;
+        weight_t relLikelihood = likelihood / (1.0 - sumOfPreviousLikelihood);
+
+        JITDUMP("Candidate %d: likelihood " FMT_WT " relative likelihood " FMT_WT "\n", candidateId, likelihood,
+                relLikelihood);
+
+        curTypeCheckBb->GetTrueEdge()->setLikelihood(relLikelihood);
+        curTypeCheckBb->GetFalseEdge()->setLikelihood(1.0 - relLikelihood);
+        sumOfPreviousLikelihood += likelihood;
     }
-    fallbackBb->inheritWeightPercentage(lastTypeCheckBb, fallbackBb->KindIs(BBJ_THROW) ? 0 : 100 - totalLikelihood);
-    typeCheckSucceedBb->inheritWeightPercentage(typeChecksBbs[0], totalLikelihood);
+
+    if (fallbackBb->KindIs(BBJ_THROW))
+    {
+        fallbackBb->bbSetRunRarely();
+    }
+    else
+    {
+        assert(fallbackBb->KindIs(BBJ_ALWAYS));
+        FlowEdge* const newEdge = fgAddRefPred(lastBb, fallbackBb);
+        fallbackBb->SetTargetEdge(newEdge);
+        fallbackBb->inheritWeight(lastTypeCheckBb);
+        weight_t lastTypeCheckFailedLikelihood = lastTypeCheckBb->GetFalseEdge()->getLikelihood();
+        fallbackBb->scaleBBWeight(lastTypeCheckFailedLikelihood);
+    }
+
+    if (!typeCheckNotNeeded)
+    {
+        typeCheckSucceedBb->inheritWeight(typeChecksBbs[0]);
+        typeCheckSucceedBb->scaleBBWeight(sumOfPreviousLikelihood);
+    }
+
     lastBb->inheritWeight(firstBb);
 
     //
@@ -2401,15 +2638,14 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,
     assert(BasicBlock::sameEHRegion(firstBb, lastBb));
     assert(BasicBlock::sameEHRegion(firstBb, nullcheckBb));
     assert(BasicBlock::sameEHRegion(firstBb, fallbackBb));
-    assert(BasicBlock::sameEHRegion(firstBb, lastTypeCheckBb));
 
     // call guarantees that obj is never null, we can drop the nullcheck
-    // by converting it to a BBJ_ALWAYS to typeCheckBb.
+    // by converting it to a BBJ_ALWAYS to its false target.
     if ((call->gtCallMoreFlags & GTF_CALL_M_CAST_OBJ_NONNULL) != 0)
     {
         fgRemoveStmt(nullcheckBb, nullcheckBb->lastStmt());
-        nullcheckBb->SetKindAndTarget(BBJ_ALWAYS, typeChecksBbs[0]);
-        fgRemoveRefPred(lastBb, nullcheckBb);
+        fgRemoveRefPred(nullcheckBb->GetTrueEdge());
+        nullcheckBb->SetKindAndTargetEdge(BBJ_ALWAYS, nullcheckBb->GetFalseEdge());
     }
 
     // Bonus step: merge prevBb with nullcheckBb as they are likely to be mergeable
diff --git a/src/coreclr/jit/host.h b/src/coreclr/jit/host.h
index 6667fbb3994a..d10eb93ca9a1 100644
--- a/src/coreclr/jit/host.h
+++ b/src/coreclr/jit/host.h
@@ -28,10 +28,10 @@ class LogEnv
 };
 
 bool vlogf(unsigned level, const char* fmt, va_list args);
-int vflogf(FILE* file, const char* fmt, va_list args);
+int  vflogf(FILE* file, const char* fmt, va_list args);
 
-int logf(const char* fmt, ...);
-int flogf(FILE* file, const char* fmt, ...);
+int  logf(const char* fmt, ...);
+int  flogf(FILE* file, const char* fmt, ...);
 void gcDump_logf(const char* fmt, ...);
 
 void logf(unsigned level, const char* fmt, ...);
diff --git a/src/coreclr/jit/hostallocator.h b/src/coreclr/jit/hostallocator.h
index a91f7f1fb4ab..0e8f192063fb 100644
--- a/src/coreclr/jit/hostallocator.h
+++ b/src/coreclr/jit/hostallocator.h
@@ -37,7 +37,7 @@ class HostAllocator final
 
 private:
     void* allocateHostMemory(size_t size);
-    void freeHostMemory(void* p);
+    void  freeHostMemory(void* p);
 };
 
 // Global operator new overloads that work with HostAllocator
diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp
index f771a9ec978e..53970ef4a746 100644
--- a/src/coreclr/jit/hwintrinsic.cpp
+++ b/src/coreclr/jit/hwintrinsic.cpp
@@ -778,7 +778,7 @@ GenTree* Compiler::getArgForHWIntrinsic(var_types            argType,
             {
                 arg = impSIMDPopStack();
             }
-            assert(varTypeIsSIMD(arg));
+            assert(varTypeIsSIMDOrMask(arg));
         }
         else
         {
@@ -832,7 +832,7 @@ GenTree* Compiler::addRangeCheckIfNeeded(
 #ifdef TARGET_XARCH
         && !HWIntrinsicInfo::isAVX2GatherIntrinsic(intrinsic) && !HWIntrinsicInfo::HasFullRangeImm(intrinsic)
 #endif
-            )
+    )
     {
         assert(!immOp->IsCnsIntOrI());
         assert(varTypeIsUnsigned(immOp));
@@ -1356,6 +1356,15 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
         compFloatingPointUsed = true;
     }
 
+    var_types nodeRetType = retType;
+#if defined(TARGET_ARM64)
+    if (HWIntrinsicInfo::ReturnsPerElementMask(intrinsic))
+    {
+        // Ensure the result is generated to a mask.
+        nodeRetType = TYP_MASK;
+    }
+#endif // defined(TARGET_ARM64)
+
     // table-driven importer of simple intrinsics
     if (impIsTableDrivenHWIntrinsic(intrinsic, category))
     {
@@ -1392,7 +1401,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
             case 0:
             {
                 assert(!isScalar);
-                retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, simdBaseJitType, simdSize);
+                retNode = gtNewSimdHWIntrinsicNode(nodeRetType, intrinsic, simdBaseJitType, simdSize);
                 break;
             }
 
@@ -1410,8 +1419,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
                     }
                 }
 
-                retNode = isScalar ? gtNewScalarHWIntrinsicNode(retType, op1, intrinsic)
-                                   : gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
+                retNode = isScalar ? gtNewScalarHWIntrinsicNode(nodeRetType, op1, intrinsic)
+                                   : gtNewSimdHWIntrinsicNode(nodeRetType, op1, intrinsic, simdBaseJitType, simdSize);
 
 #if defined(TARGET_XARCH)
                 switch (intrinsic)
@@ -1462,8 +1471,9 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
                 op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound);
                 op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);
 
-                retNode = isScalar ? gtNewScalarHWIntrinsicNode(retType, op1, op2, intrinsic)
-                                   : gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
+                retNode = isScalar
+                              ? gtNewScalarHWIntrinsicNode(nodeRetType, op1, op2, intrinsic)
+                              : gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, intrinsic, simdBaseJitType, simdSize);
 
 #ifdef TARGET_XARCH
                 if ((intrinsic == NI_SSE42_Crc32) || (intrinsic == NI_SSE42_X64_Crc32))
@@ -1543,9 +1553,9 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
                     op3 = addRangeCheckIfNeeded(intrinsic, op3, mustExpand, immLowerBound, immUpperBound);
                 }
 
-                retNode = isScalar
-                              ? gtNewScalarHWIntrinsicNode(retType, op1, op2, op3, intrinsic)
-                              : gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
+                retNode = isScalar ? gtNewScalarHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic)
+                                   : gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType,
+                                                              simdSize);
 
 #ifdef TARGET_XARCH
                 if ((intrinsic == NI_AVX2_GatherVector128) || (intrinsic == NI_AVX2_GatherVector256))
@@ -1566,7 +1576,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
                 op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd);
 
                 assert(!isScalar);
-                retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize);
+                retNode =
+                    gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize);
                 break;
             }
 
@@ -1576,8 +1587,29 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
     }
     else
     {
-        retNode = impSpecialIntrinsic(intrinsic, clsHnd, method, sig, simdBaseJitType, retType, simdSize);
+        retNode = impSpecialIntrinsic(intrinsic, clsHnd, method, sig, simdBaseJitType, nodeRetType, simdSize);
+    }
+
+#if defined(TARGET_ARM64)
+    if (HWIntrinsicInfo::IsMaskedOperation(intrinsic))
+    {
+        assert(numArgs > 0);
+        GenTree* op1 = retNode->AsHWIntrinsic()->Op(1);
+        if (!varTypeIsMask(op1))
+        {
+            // Op1 input is a vector. HWInstrinsic requires a mask.
+            retNode->AsHWIntrinsic()->Op(1) = gtNewSimdConvertVectorToMaskNode(retType, op1, simdBaseJitType, simdSize);
+        }
+    }
+
+    if (retType != nodeRetType)
+    {
+        // HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector.
+        assert(HWIntrinsicInfo::ReturnsPerElementMask(intrinsic));
+        assert(nodeRetType == TYP_MASK);
+        retNode = gtNewSimdConvertMaskToVectorNode(retNode->AsHWIntrinsic(), retType);
     }
+#endif // defined(TARGET_ARM64)
 
     if ((retNode != nullptr) && retNode->OperIs(GT_HWINTRINSIC))
     {
diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h
index 4df1aace5287..5ca302e126f3 100644
--- a/src/coreclr/jit/hwintrinsic.h
+++ b/src/coreclr/jit/hwintrinsic.h
@@ -58,6 +58,7 @@ enum HWIntrinsicCategory : uint8_t
     HW_Category_ShiftLeftByImmediate,
     HW_Category_ShiftRightByImmediate,
     HW_Category_SIMDByIndexedElement,
+    HW_Category_EnumPattern,
 
     // Helper intrinsics
     // - do not directly correspond to a instruction, such as Vector64.AllBitsSet
@@ -175,6 +176,21 @@ enum HWIntrinsicFlag : unsigned int
 
     // The intrinsic needs consecutive registers
     HW_Flag_NeedsConsecutiveRegisters = 0x4000,
+
+    // The intrinsic uses scalable registers
+    HW_Flag_Scalable = 0x8000,
+
+    // Returns Per-Element Mask
+    // the intrinsic returns a vector containing elements that are either "all bits set" or "all bits clear"
+    // this output can be used as a per-element mask
+    HW_Flag_ReturnsPerElementMask = 0x10000,
+
+    // The intrinsic uses a mask in arg1 to select elements present in the result
+    HW_Flag_MaskedOperation = 0x20000,
+
+    // The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low register.
+    HW_Flag_LowMaskedOperation = 0x40000,
+
 #else
 #error Unsupported platform
 #endif
@@ -435,13 +451,13 @@ struct TernaryLogicInfo
     // We have 256 entries, so we compress as much as possible
     // This gives us 3-bytes per entry (21-bits)
 
-    TernaryLogicOperKind oper1 : 4;
+    TernaryLogicOperKind oper1    : 4;
     TernaryLogicUseFlags oper1Use : 3;
 
-    TernaryLogicOperKind oper2 : 4;
+    TernaryLogicOperKind oper2    : 4;
     TernaryLogicUseFlags oper2Use : 3;
 
-    TernaryLogicOperKind oper3 : 4;
+    TernaryLogicOperKind oper3    : 4;
     TernaryLogicUseFlags oper3Use : 3;
 
     static const TernaryLogicInfo& lookup(uint8_t control);
@@ -475,11 +491,11 @@ struct HWIntrinsicInfo
 
     static const HWIntrinsicInfo& lookup(NamedIntrinsic id);
 
-    static NamedIntrinsic lookupId(Compiler*         comp,
-                                   CORINFO_SIG_INFO* sig,
-                                   const char*       className,
-                                   const char*       methodName,
-                                   const char*       enclosingClassName);
+    static NamedIntrinsic         lookupId(Compiler*         comp,
+                                           CORINFO_SIG_INFO* sig,
+                                           const char*       className,
+                                           const char*       methodName,
+                                           const char*       enclosingClassName);
     static CORINFO_InstructionSet lookupIsa(const char* className, const char* enclosingClassName);
 
     static unsigned lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORINFO_SIG_INFO* sig);
@@ -498,7 +514,7 @@ struct HWIntrinsicInfo
     static bool isScalarIsa(CORINFO_InstructionSet isa);
 
 #ifdef TARGET_XARCH
-    static bool isAVX2GatherIntrinsic(NamedIntrinsic id);
+    static bool                isAVX2GatherIntrinsic(NamedIntrinsic id);
     static FloatComparisonMode lookupFloatComparisonModeForSwappedArgs(FloatComparisonMode comparison);
 #endif
 
@@ -607,21 +623,6 @@ struct HWIntrinsicInfo
         HWIntrinsicFlag flags = lookupFlags(id);
         return (flags & HW_Flag_EmbMaskingIncompatible) == 0;
     }
-
-    static size_t EmbRoundingArgPos(NamedIntrinsic id)
-    {
-        // This helper function returns the expected position,
-        // where the embedded rounding control argument should be.
-        assert(IsEmbRoundingCompatible(id));
-        switch (id)
-        {
-            case NI_AVX512F_Add:
-                return 3;
-
-            default:
-                unreached();
-        }
-    }
 #endif // TARGET_XARCH
 
     static bool CanBenefitFromConstantProp(NamedIntrinsic id)
@@ -669,10 +670,8 @@ struct HWIntrinsicInfo
     static bool ReturnsPerElementMask(NamedIntrinsic id)
     {
         HWIntrinsicFlag flags = lookupFlags(id);
-#if defined(TARGET_XARCH)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
         return (flags & HW_Flag_ReturnsPerElementMask) != 0;
-#elif defined(TARGET_ARM64)
-        unreached();
 #else
 #error Unsupported platform
 #endif
@@ -863,6 +862,25 @@ struct HWIntrinsicInfo
         const HWIntrinsicFlag flags = lookupFlags(id);
         return (flags & HW_Flag_HasImmediateOperand) != 0;
     }
+
+    static bool IsScalable(NamedIntrinsic id)
+    {
+        const HWIntrinsicFlag flags = lookupFlags(id);
+        return (flags & HW_Flag_Scalable) != 0;
+    }
+
+    static bool IsMaskedOperation(NamedIntrinsic id)
+    {
+        const HWIntrinsicFlag flags = lookupFlags(id);
+        return ((flags & HW_Flag_MaskedOperation) != 0) || IsLowMaskedOperation(id);
+    }
+
+    static bool IsLowMaskedOperation(NamedIntrinsic id)
+    {
+        const HWIntrinsicFlag flags = lookupFlags(id);
+        return (flags & HW_Flag_LowMaskedOperation) != 0;
+    }
+
 #endif // TARGET_ARM64
 
     static bool HasSpecialSideEffect(NamedIntrinsic id)
@@ -909,7 +927,12 @@ struct HWIntrinsicInfo
 struct HWIntrinsic final
 {
     HWIntrinsic(const GenTreeHWIntrinsic* node)
-        : op1(nullptr), op2(nullptr), op3(nullptr), op4(nullptr), numOperands(0), baseType(TYP_UNDEF)
+        : op1(nullptr)
+        , op2(nullptr)
+        , op3(nullptr)
+        , op4(nullptr)
+        , numOperands(0)
+        , baseType(TYP_UNDEF)
     {
         assert(node != nullptr);
 
@@ -922,7 +945,7 @@ struct HWIntrinsic final
         InitializeBaseType(node);
     }
 
-    bool IsTableDriven() const
+    bool codeGenIsTableDriven() const
     {
         // TODO-Arm64-Cleanup - make more categories to the table-driven framework
         bool isTableDrivenCategory = category != HW_Category_Helper;
diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp
index 0561ac2adadd..8e3288f75d70 100644
--- a/src/coreclr/jit/hwintrinsicarm64.cpp
+++ b/src/coreclr/jit/hwintrinsicarm64.cpp
@@ -280,6 +280,20 @@ void HWIntrinsicInfo::lookupImmBounds(
                 immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType) - 1;
                 break;
 
+            case NI_Sve_CreateTrueMaskByte:
+            case NI_Sve_CreateTrueMaskDouble:
+            case NI_Sve_CreateTrueMaskInt16:
+            case NI_Sve_CreateTrueMaskInt32:
+            case NI_Sve_CreateTrueMaskInt64:
+            case NI_Sve_CreateTrueMaskSByte:
+            case NI_Sve_CreateTrueMaskSingle:
+            case NI_Sve_CreateTrueMaskUInt16:
+            case NI_Sve_CreateTrueMaskUInt32:
+            case NI_Sve_CreateTrueMaskUInt64:
+                immLowerBound = (int)SVE_PATTERN_POW2;
+                immUpperBound = (int)SVE_PATTERN_ALL;
+                break;
+
             default:
                 unreached();
         }
@@ -1822,7 +1836,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             {
                 unsigned tmp = lvaGrabTemp(true DEBUGARG("StoreVectorNx2 temp tree"));
 
-                impStoreTemp(tmp, op2, CHECK_SPILL_NONE);
+                impStoreToTemp(tmp, op2, CHECK_SPILL_NONE);
                 op2 = gtNewLclvNode(tmp, argType);
             }
             op2 = gtConvertTableOpToFieldList(op2, fieldCount);
@@ -1876,7 +1890,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                 {
                     unsigned tmp = lvaGrabTemp(true DEBUGARG("StoreSelectedScalarN"));
 
-                    impStoreTemp(tmp, op2, CHECK_SPILL_NONE);
+                    impStoreToTemp(tmp, op2, CHECK_SPILL_NONE);
                     op2 = gtNewLclvNode(tmp, argType);
                 }
                 op2 = gtConvertTableOpToFieldList(op2, fieldCount);
@@ -2092,7 +2106,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             {
                 unsigned tmp = lvaGrabTemp(true DEBUGARG("LoadAndInsertScalar temp tree"));
 
-                impStoreTemp(tmp, op1, CHECK_SPILL_NONE);
+                impStoreToTemp(tmp, op1, CHECK_SPILL_NONE);
                 op1 = gtNewLclvNode(tmp, argType);
             }
 
@@ -2125,7 +2139,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                 {
                     unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree"));
 
-                    impStoreTemp(tmp, op1, CHECK_SPILL_NONE);
+                    impStoreToTemp(tmp, op1, CHECK_SPILL_NONE);
                     op1 = gtNewLclvNode(tmp, argType);
                 }
 
@@ -2165,7 +2179,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                 {
                     unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookupExtension temp tree"));
 
-                    impStoreTemp(tmp, op2, CHECK_SPILL_NONE);
+                    impStoreToTemp(tmp, op2, CHECK_SPILL_NONE);
                     op2 = gtNewLclvNode(tmp, argType);
                 }
 
@@ -2179,6 +2193,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
             break;
         }
+
         default:
         {
             return nullptr;
@@ -2188,4 +2203,47 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
     return retNode;
 }
 
+//------------------------------------------------------------------------
+// gtNewSimdConvertMaskToVectorNode: Convert a HW instrinsic vector node to a mask
+//
+// Arguments:
+//    node            -- The node to convert
+//    simdBaseJitType -- the base jit type of the converted node
+//    simdSize        -- the simd size of the converted node
+//
+// Return Value:
+//    The node converted to the a mask type
+//
+GenTree* Compiler::gtNewSimdConvertVectorToMaskNode(var_types   type,
+                                                    GenTree*    node,
+                                                    CorInfoType simdBaseJitType,
+                                                    unsigned    simdSize)
+{
+    assert(varTypeIsSIMD(node));
+
+    // ConvertVectorToMask uses cmpne which requires an embedded mask.
+    GenTree* embeddedMask = gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize);
+    return gtNewSimdHWIntrinsicNode(TYP_MASK, embeddedMask, node, NI_Sve_ConvertVectorToMask, simdBaseJitType,
+                                    simdSize);
+}
+
+//------------------------------------------------------------------------
+// gtNewSimdConvertMaskToVectorNode: Convert a HW instrinsic mask node to a vector
+//
+// Arguments:
+//    node          -- The node to convert
+//    type          -- The type of the node to convert to
+//
+// Return Value:
+//    The node converted to the given type
+//
+GenTree* Compiler::gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type)
+{
+    assert(varTypeIsMask(node));
+    assert(varTypeIsSIMD(type));
+
+    return gtNewSimdHWIntrinsicNode(type, node, NI_Sve_ConvertMaskToVector, node->GetSimdBaseJitType(),
+                                    node->GetSimdSize());
+}
+
 #endif // FEATURE_HW_INTRINSICS
diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
index eba1b6f33a09..9a3a98e087a2 100644
--- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
@@ -36,7 +36,10 @@
 //       of a for-loop.
 //
 CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper(CodeGen* codeGen, GenTree* immOp, GenTreeHWIntrinsic* intrin)
-    : codeGen(codeGen), endLabel(nullptr), nonZeroLabel(nullptr), branchTargetReg(REG_NA)
+    : codeGen(codeGen)
+    , endLabel(nullptr)
+    , nonZeroLabel(nullptr)
+    , branchTargetReg(REG_NA)
 {
     assert(codeGen != nullptr);
     assert(varTypeIsIntegral(immOp));
@@ -265,6 +268,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
         emitSize = EA_UNKNOWN;
         opt      = INS_OPTS_NONE;
     }
+    else if (HWIntrinsicInfo::IsScalable(intrin.id))
+    {
+        emitSize = EA_SCALABLE;
+        opt      = emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType));
+    }
     else
     {
         emitSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize()));
@@ -276,7 +284,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
 
     genConsumeMultiOpOperands(node);
 
-    if (intrin.IsTableDriven())
+    if (intrin.codeGenIsTableDriven())
     {
         const instruction ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType);
         assert(ins != INS_invalid);
@@ -372,6 +380,27 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 emitShift(intrin.op2, op1Reg);
             }
         }
+        else if (intrin.category == HW_Category_EnumPattern)
+        {
+            assert(hasImmediateOperand);
+
+            switch (intrin.numOperands)
+            {
+                case 1:
+                {
+                    HWIntrinsicImmOpHelper helper(this, intrin.op1, node);
+                    for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
+                    {
+                        const insSvePattern pattern = (insSvePattern)helper.ImmValue();
+                        GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, pattern);
+                    }
+                };
+                break;
+
+                default:
+                    unreached();
+            }
+        }
         else
         {
             assert(!hasImmediateOperand);
@@ -1254,6 +1283,23 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg);
                 break;
 
+            case NI_Sve_ConvertMaskToVector:
+                // PMOV would be ideal here, but it is in SVE2.1.
+                // Instead, use a predicated move: MOV <Zd>.<T>, <Pg>/Z, #1
+                GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op1Reg, 1, opt);
+                break;
+
+            case NI_Sve_ConvertVectorToMask:
+                // PMOV would be ideal here, but it is in SVE2.1.
+                // Instead, use a compare: CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0
+                GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op2Reg, 0, opt);
+                break;
+
+            case NI_Sve_CreateTrueMaskAll:
+                // Must use the pattern variant, as the non-pattern varient is SVE2.1.
+                GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, SVE_PATTERN_ALL);
+                break;
+
             default:
                 unreached();
         }
diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
index 6e85d98fea12..79e6b497c368 100644
--- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
@@ -253,57 +253,82 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
             }
         }
 
-        if (HWIntrinsicInfo::IsEmbRoundingCompatible(intrinsicId))
+        if (node->OperIsEmbRoundingEnabled())
         {
-            assert(isTableDriven);
-            size_t expectedArgNum = HWIntrinsicInfo::EmbRoundingArgPos(intrinsicId);
+            GenTree* lastOp = node->Op(numArgs);
 
-            if (numArgs == expectedArgNum)
-            {
-                GenTree* lastOp = node->Op(numArgs);
-
-                // Now that we've extracted the rounding mode, we'll remove the
-                // last operand, adjust the arg count, and continue. This allows
-                // us to reuse all the existing logic without having to add new
-                // specialized handling everywhere.
+            // Now that we've extracted the rounding mode, we'll remove the
+            // last operand, adjust the arg count, and continue. This allows
+            // us to reuse all the existing logic without having to add new
+            // specialized handling everywhere.
 
-                switch (numArgs)
+            switch (numArgs)
+            {
+                case 2:
                 {
-                    case 3:
-                    {
-                        numArgs = 2;
-                        node->ResetHWIntrinsicId(intrinsicId, compiler, node->Op(1), node->Op(2));
-                        break;
-                    }
-
-                    default:
-                    {
-                        unreached();
-                    }
+                    numArgs = 1;
+                    node->ResetHWIntrinsicId(intrinsicId, compiler, node->Op(1));
+                    break;
                 }
 
-                if (lastOp->isContained())
+                case 3:
                 {
-                    assert(lastOp->IsCnsIntOrI());
+                    numArgs = 2;
+                    node->ResetHWIntrinsicId(intrinsicId, compiler, node->Op(1), node->Op(2));
+                    break;
+                }
 
-                    int8_t mode = static_cast<int8_t>(lastOp->AsIntCon()->IconValue());
-                    instOptions = AddEmbRoundingMode(instOptions, mode);
+                case 4:
+                {
+                    numArgs = 3;
+                    node->ResetHWIntrinsicId(intrinsicId, compiler, node->Op(1), node->Op(2), node->Op(3));
+                    break;
                 }
-                else
+
+                default:
                 {
-                    var_types baseType = node->GetSimdBaseType();
+                    unreached();
+                }
+            }
 
-                    instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
-                    assert(ins != INS_invalid);
+            if (lastOp->isContained())
+            {
+                assert(lastOp->IsCnsIntOrI());
+
+                int8_t mode = static_cast<int8_t>(lastOp->AsIntCon()->IconValue());
+                instOptions = AddEmbRoundingMode(instOptions, mode);
+            }
+            else
+            {
+                var_types baseType = node->GetSimdBaseType();
+
+                instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
+                assert(ins != INS_invalid);
 
-                    emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize()));
-                    assert(simdSize != 0);
+                emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize()));
+                assert(simdSize != 0);
 
-                    genConsumeMultiOpOperands(node);
-                    genConsumeRegs(lastOp);
+                genConsumeMultiOpOperands(node);
+                genConsumeRegs(lastOp);
 
+                if (isTableDriven)
+                {
                     switch (numArgs)
                     {
+                        case 1:
+                        {
+                            regNumber targetReg  = node->GetRegNum();
+                            GenTree*  rmOp       = node->Op(1);
+                            auto      emitSwCase = [&](int8_t i) {
+                                insOpts newInstOptions = AddEmbRoundingMode(instOptions, i);
+                                genHWIntrinsic_R_RM(node, ins, simdSize, targetReg, rmOp, newInstOptions);
+                            };
+                            regNumber baseReg = node->ExtractTempReg();
+                            regNumber offsReg = node->GetSingleTempReg();
+                            genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg,
+                                                            emitSwCase);
+                            break;
+                        }
                         case 2:
                         {
                             auto emitSwCase = [&](int8_t i) {
@@ -322,10 +347,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                             unreached();
                         }
                     }
-
-                    genProduceReg(node);
-                    return;
                 }
+                else
+                {
+                    // There are a few embedded rounding intrinsics that need to be emitted with special handling.
+                    genNonTableDrivenHWIntrinsicsJumpTableFallback(node, lastOp);
+                }
+
+                genProduceReg(node);
+                return;
             }
         }
     }
@@ -396,7 +426,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                     }
                     else
                     {
-                        genHWIntrinsic_R_RM(node, ins, simdSize, targetReg, op1);
+                        genHWIntrinsic_R_RM(node, ins, simdSize, targetReg, op1, instOptions);
                     }
                 }
                 break;
@@ -529,7 +559,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
 
                 if (HWIntrinsicInfo::isImmOp(intrinsicId, op3))
                 {
-                    auto emitSwCase = [&](int8_t i) { genHWIntrinsic_R_R_RM_I(node, ins, simdSize, i); };
+                    auto emitSwCase = [&](int8_t i) {
+                        genHWIntrinsic_R_R_RM_I(node, ins, simdSize, i);
+                    };
 
                     if (op3->IsCnsIntOrI())
                     {
@@ -623,7 +655,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
 
                 if (HWIntrinsicInfo::isImmOp(intrinsicId, op4))
                 {
-                    auto emitSwCase = [&](int8_t i) { genHWIntrinsic_R_R_R_RM_I(node, ins, simdSize, i); };
+                    auto emitSwCase = [&](int8_t i) {
+                        genHWIntrinsic_R_R_R_RM_I(node, ins, simdSize, i);
+                    };
 
                     if (op4->IsCnsIntOrI())
                     {
@@ -715,7 +749,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
             genBMI1OrBMI2Intrinsic(node, instOptions);
             break;
         case InstructionSet_FMA:
-            genFMAIntrinsic(node);
+            genFMAIntrinsic(node, instOptions);
             break;
         case InstructionSet_LZCNT:
         case InstructionSet_LZCNT_X64:
@@ -749,13 +783,23 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
 //    attr - The emit attribute for the instruction being generated
 //    reg  - The register
 //    rmOp - The register/memory operand node
-//
+//    instOptions - the existing intOpts
 void CodeGen::genHWIntrinsic_R_RM(
-    GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, regNumber reg, GenTree* rmOp)
+    GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, regNumber reg, GenTree* rmOp, insOpts instOptions)
 {
     emitter*    emit     = GetEmitter();
     OperandDesc rmOpDesc = genOperandDesc(rmOp);
 
+    if (((instOptions & INS_OPTS_EVEX_b_MASK) != 0) && (rmOpDesc.GetKind() == OperandKind::Reg))
+    {
+        // As embedded rounding only appies in R_R case, we can skip other checks for different paths.
+        regNumber op1Reg = rmOp->GetRegNum();
+        assert(op1Reg != REG_NA);
+
+        emit->emitIns_R_R(ins, attr, reg, op1Reg, instOptions);
+        return;
+    }
+
     if (rmOpDesc.IsContained())
     {
         assert(HWIntrinsicInfo::SupportsContainment(node->GetHWIntrinsicId()));
@@ -1081,9 +1125,15 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins,
 //    op1Reg    - The register of the first operand
 //    op2Reg    - The register of the second operand
 //    op3       - The third operand
+//    instOptions - The options that modify how the instruction is generated
 //
-void CodeGen::genHWIntrinsic_R_R_R_RM(
-    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTree* op3)
+void CodeGen::genHWIntrinsic_R_R_R_RM(instruction ins,
+                                      emitAttr    attr,
+                                      regNumber   targetReg,
+                                      regNumber   op1Reg,
+                                      regNumber   op2Reg,
+                                      GenTree*    op3,
+                                      insOpts     instOptions)
 {
     assert(targetReg != REG_NA);
     assert(op1Reg != REG_NA);
@@ -1092,6 +1142,16 @@ void CodeGen::genHWIntrinsic_R_R_R_RM(
     emitter*    emit    = GetEmitter();
     OperandDesc op3Desc = genOperandDesc(op3);
 
+    if (((instOptions & INS_OPTS_EVEX_b_MASK) != 0) && (op3Desc.GetKind() == OperandKind::Reg))
+    {
+        // As embedded rounding only appies in R_R case, we can skip other checks for different paths.
+        regNumber op3Reg = op3->GetRegNum();
+        assert(op3Reg != REG_NA);
+
+        emit->emitIns_SIMD_R_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, op3Desc.GetReg(), instOptions);
+        return;
+    }
+
     switch (op3Desc.GetKind())
     {
         case OperandKind::ClsVar:
@@ -1152,10 +1212,10 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I(GenTreeHWIntrinsic* node, instruction in
 
         if (op2->isContained())
         {
-// op2 is never selected by the table so
-// we can contain and ignore any register
-// allocated to it resulting in better
-// non-RMW based codegen.
+            // op2 is never selected by the table so
+            // we can contain and ignore any register
+            // allocated to it resulting in better
+            // non-RMW based codegen.
 
 #if defined(DEBUG)
             NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
@@ -1285,6 +1345,113 @@ void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic            intrinsi
     genDefineTempLabel(switchTableEnd);
 }
 
+void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* node, GenTree* lastOp)
+{
+    NamedIntrinsic      intrinsicId = node->GetHWIntrinsicId();
+    HWIntrinsicCategory category    = HWIntrinsicInfo::lookupCategory(intrinsicId);
+
+    assert(HWIntrinsicInfo::IsEmbRoundingCompatible(intrinsicId));
+    assert(!lastOp->isContained());
+    assert(!genIsTableDrivenHWIntrinsic(intrinsicId, category));
+
+    var_types   baseType   = node->GetSimdBaseType();
+    emitAttr    attr       = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize()));
+    var_types   targetType = node->TypeGet();
+    instruction ins        = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
+    regNumber   targetReg  = node->GetRegNum();
+
+    insOpts instOptions = INS_OPTS_NONE;
+    switch (intrinsicId)
+    {
+        case NI_AVX512F_ConvertToVector256Int32:
+        case NI_AVX512F_ConvertToVector256UInt32:
+        {
+            // This intrinsic has several overloads, only the ones with floating number inputs should reach this part.
+            assert(varTypeIsFloating(baseType));
+            GenTree* rmOp       = node->Op(1);
+            auto     emitSwCase = [&](int8_t i) {
+                insOpts newInstOptions = AddEmbRoundingMode(instOptions, i);
+                genHWIntrinsic_R_RM(node, ins, attr, targetReg, rmOp, newInstOptions);
+            };
+            regNumber baseReg = node->ExtractTempReg();
+            regNumber offsReg = node->GetSingleTempReg();
+            genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase);
+            break;
+        }
+
+        case NI_AVX512F_ConvertToInt32:
+        case NI_AVX512F_ConvertToUInt32:
+#if defined(TARGET_AMD64)
+        case NI_AVX512F_X64_ConvertToInt64:
+        case NI_AVX512F_X64_ConvertToUInt64:
+#endif // TARGET_AMD64
+        {
+            assert(varTypeIsFloating(baseType));
+            attr          = emitTypeSize(targetType);
+            GenTree* rmOp = node->Op(1);
+
+            auto emitSwCase = [&](int8_t i) {
+                insOpts newInstOptions = AddEmbRoundingMode(instOptions, i);
+                genHWIntrinsic_R_RM(node, ins, attr, targetReg, rmOp, newInstOptions);
+            };
+            regNumber baseReg = node->ExtractTempReg();
+            regNumber offsReg = node->GetSingleTempReg();
+            genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase);
+            break;
+        }
+
+        case NI_AVX512F_X64_ConvertScalarToVector128Single:
+        case NI_AVX512F_X64_ConvertScalarToVector128Double:
+        {
+            assert(varTypeIsLong(baseType));
+            auto emitSwCase = [&](int8_t i) {
+                insOpts newInstOptions = AddEmbRoundingMode(instOptions, i);
+                genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE, newInstOptions);
+            };
+            regNumber baseReg = node->ExtractTempReg();
+            regNumber offsReg = node->GetSingleTempReg();
+            genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase);
+            break;
+        }
+
+        case NI_AVX512F_FusedMultiplyAdd:
+        case NI_AVX512F_FusedMultiplyAddScalar:
+        case NI_AVX512F_FusedMultiplyAddNegated:
+        case NI_AVX512F_FusedMultiplyAddNegatedScalar:
+        case NI_AVX512F_FusedMultiplyAddSubtract:
+        case NI_AVX512F_FusedMultiplySubtract:
+        case NI_AVX512F_FusedMultiplySubtractAdd:
+        case NI_AVX512F_FusedMultiplySubtractNegated:
+        case NI_AVX512F_FusedMultiplySubtractNegatedScalar:
+        case NI_AVX512F_FusedMultiplySubtractScalar:
+        {
+            // For FMA intrinsics, since it is not possible to get any contained operand in this case: embedded rounding
+            // is limited in register-to-register form, and the control byte is dynamic, we don't need to do any swap.
+            assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinsicId));
+
+            GenTree* op1 = node->Op(1);
+            GenTree* op2 = node->Op(2);
+            GenTree* op3 = node->Op(3);
+
+            regNumber op1Reg = op1->GetRegNum();
+            regNumber op2Reg = op2->GetRegNum();
+
+            auto emitSwCase = [&](int8_t i) {
+                insOpts newInstOptions = AddEmbRoundingMode(instOptions, i);
+                genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3, newInstOptions);
+            };
+            regNumber baseReg = node->ExtractTempReg();
+            regNumber offsReg = node->GetSingleTempReg();
+            genHWIntrinsicJumpTableFallback(intrinsicId, lastOp->GetRegNum(), baseReg, offsReg, emitSwCase);
+            break;
+        }
+
+        default:
+            unreached();
+            break;
+    }
+}
+
 //------------------------------------------------------------------------
 // genBaseIntrinsic: Generates the code for a base hardware intrinsic node
 //
@@ -1892,7 +2059,9 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node)
             instruction ins  = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
             emitAttr    attr = emitActualTypeSize(node->TypeGet());
 
-            auto emitSwCase = [&](int8_t i) { inst_RV_TT_IV(ins, attr, targetReg, op1, i); };
+            auto emitSwCase = [&](int8_t i) {
+                inst_RV_TT_IV(ins, attr, targetReg, op1, i);
+            };
 
             if (op2->IsCnsIntOrI())
             {
@@ -1988,7 +2157,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption
 
     if (HWIntrinsicInfo::IsFmaIntrinsic(intrinsicId))
     {
-        genFMAIntrinsic(node);
+        genFMAIntrinsic(node, instOptions);
         return;
     }
 
@@ -2550,8 +2719,10 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption
             break;
         }
 
+        case NI_AVX512F_ConvertToInt32:
         case NI_AVX512F_ConvertToUInt32:
         case NI_AVX512F_ConvertToUInt32WithTruncation:
+        case NI_AVX512F_X64_ConvertToInt64:
         case NI_AVX512F_X64_ConvertToUInt64:
         case NI_AVX512F_X64_ConvertToUInt64WithTruncation:
         {
@@ -2559,7 +2730,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption
             emitAttr attr = emitTypeSize(targetType);
 
             instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
-            genHWIntrinsic_R_RM(node, ins, attr, targetReg, node->Op(1));
+            genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions);
             break;
         }
 
@@ -2571,7 +2742,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption
             if (varTypeIsFloating(baseType))
             {
                 instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
-                genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1);
+                genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions);
                 break;
             }
             FALLTHROUGH;
@@ -2623,7 +2794,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption
         case NI_AVX512F_X64_ConvertScalarToVector128Double:
         case NI_AVX512F_X64_ConvertScalarToVector128Single:
         {
-            assert(baseType == TYP_ULONG);
+            assert(baseType == TYP_ULONG || baseType == TYP_LONG);
             instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
             genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE, instOptions);
             break;
@@ -2773,7 +2944,7 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptio
 // Arguments:
 //    node - The hardware intrinsic node
 //
-void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
+void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
 {
     NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
     assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinsicId));
@@ -2880,7 +3051,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
     }
 
     assert(ins != INS_invalid);
-    genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, emitOp1->GetRegNum(), emitOp2->GetRegNum(), emitOp3);
+    genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, emitOp1->GetRegNum(), emitOp2->GetRegNum(), emitOp3, instOptions);
     genProduceReg(node);
 }
 
diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h
index f8263c40bb0c..ac110c2a0e1b 100644
--- a/src/coreclr/jit/hwintrinsiclistarm64sve.h
+++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h
@@ -16,6 +16,32 @@
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SVE Intrinsics
 
+// Sve
+HARDWARE_INTRINSIC(Sve,           CreateTrueMaskByte,                                               -1,      1,      false, {INS_invalid,        INS_sve_ptrue,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_EnumPattern,           HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Sve,           CreateTrueMaskDouble,                                             -1,      1,      false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_ptrue},   HW_Category_EnumPattern,           HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Sve,           CreateTrueMaskInt16,                                              -1,      1,      false, {INS_invalid,        INS_invalid,        INS_sve_ptrue,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_EnumPattern,           HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Sve,           CreateTrueMaskInt32,                                              -1,      1,      false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_ptrue,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_EnumPattern,           HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Sve,           CreateTrueMaskInt64,                                              -1,      1,      false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_ptrue,      INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_EnumPattern,           HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Sve,           CreateTrueMaskSByte,                                              -1,      1,      false, {INS_sve_ptrue,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_EnumPattern,           HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Sve,           CreateTrueMaskSingle,                                             -1,      1,      false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_ptrue,      INS_invalid},     HW_Category_EnumPattern,           HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Sve,           CreateTrueMaskUInt16,                                             -1,      1,      false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_ptrue,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_EnumPattern,           HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Sve,           CreateTrueMaskUInt32,                                             -1,      1,      false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_ptrue,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_EnumPattern,           HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Sve,           CreateTrueMaskUInt64,                                             -1,      1,      false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_ptrue,      INS_invalid,        INS_invalid},     HW_Category_EnumPattern,           HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
+
+HARDWARE_INTRINSIC(Sve,           LoadVector,                                                       -1,      2,      true,  {INS_sve_ld1b,       INS_sve_ld1b,       INS_sve_ld1h,       INS_sve_ld1h,       INS_sve_ld1w,       INS_sve_ld1w,       INS_sve_ld1d,       INS_sve_ld1d,       INS_sve_ld1w,       INS_sve_ld1d},    HW_Category_MemoryLoad,            HW_Flag_Scalable|HW_Flag_LowMaskedOperation)
+
+
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 ISA            Function name                                              SIMD size  NumArg  EncodesExtraTypeArg                                                                                            Instructions                                                                                        Category                           Flags
+//                                                                                                                          {TYP_BYTE,           TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// Special intrinsics that are generated during importing or lowering
+
+HARDWARE_INTRINSIC(Sve,           ConvertMaskToVector,                                              -1,      1,      true,  {INS_sve_mov,        INS_sve_mov,        INS_sve_mov,        INS_sve_mov,        INS_sve_mov,        INS_sve_mov,        INS_sve_mov,        INS_sve_mov,        INS_sve_mov,        INS_sve_mov},     HW_Category_Helper,                HW_Flag_Scalable|HW_Flag_MaskedOperation)
+HARDWARE_INTRINSIC(Sve,           ConvertVectorToMask,                                              -1,      2,      true,  {INS_sve_cmpne,      INS_sve_cmpne,      INS_sve_cmpne,      INS_sve_cmpne,      INS_sve_cmpne,      INS_sve_cmpne,      INS_sve_cmpne,      INS_sve_cmpne,      INS_sve_cmpne,      INS_sve_cmpne},   HW_Category_Helper,                HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation)
+
+HARDWARE_INTRINSIC(Sve,           CreateTrueMaskAll,                                                -1,     -1,      false, {INS_sve_ptrue,      INS_sve_ptrue,      INS_sve_ptrue,      INS_sve_ptrue,      INS_sve_ptrue,      INS_sve_ptrue,      INS_sve_ptrue,      INS_sve_ptrue,      INS_sve_ptrue,      INS_sve_ptrue},   HW_Category_Helper,           	  HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask)
 
 #endif // FEATURE_HW_INTRINSIC
 
diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h
index 65df8c14c053..07bc2e4838c8 100644
--- a/src/coreclr/jit/hwintrinsiclistxarch.h
+++ b/src/coreclr/jit/hwintrinsiclistxarch.h
@@ -273,8 +273,12 @@ HARDWARE_INTRINSIC(Vector512,       Create,
 HARDWARE_INTRINSIC(Vector512,       CreateScalar,                               64,            -1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector512,       CreateScalarUnsafe,                         64,             1,       true,  {INS_movd,              INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movss,              INS_movsd_simd},        HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector512,       CreateSequence,                             64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector512,       ConvertToDouble,                            64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector512,       ConvertToSingle,                            64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector512,       ConvertToInt32,                             64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector512,       ConvertToInt64,                             64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector512,       ConvertToUInt32,                            64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector512,       ConvertToUInt64,                            64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector512,       Divide,                                     64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector512,       Equals,                                     64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector512,       EqualsAll,                                  64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
@@ -411,7 +415,7 @@ HARDWARE_INTRINSIC(SSE,             CompareUnordered,
 HARDWARE_INTRINSIC(SSE,             CompareScalarUnordered,                     16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics)
 HARDWARE_INTRINSIC(SSE,             ConvertToInt32,                             16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2si,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE,             ConvertScalarToVector128Single,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2ss32,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE,             ConvertToInt32WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttss2si,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             ConvertToInt32WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttss2si32,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE,             Divide,                                     16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_EmbBroadcastCompatible)
 HARDWARE_INTRINSIC(SSE,             DivideScalar,                               16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE,             LoadAlignedVector128,                       16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movaps,             INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
@@ -460,7 +464,7 @@ HARDWARE_INTRINSIC(SSE,             Xor,
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE 64-bit-only Intrinsics
 HARDWARE_INTRINSIC(SSE_X64,         ConvertToInt64,                             16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2si,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(SSE_X64,         ConvertToInt64WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttss2si,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE_X64,         ConvertToInt64WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttss2si64,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(SSE_X64,         ConvertScalarToVector128Single,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2ss64,         INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
@@ -511,7 +515,7 @@ HARDWARE_INTRINSIC(SSE2,            CompareScalarOrdered,
 HARDWARE_INTRINSIC(SSE2,            CompareUnordered,                           16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_ReturnsPerElementMask)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarUnordered,                     16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertToInt32,                             16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2,            ConvertToInt32WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertToInt32WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si32},       HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertToUInt32,                            16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Double,                   16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2pd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2pd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128Double,             16,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2sd32,         INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2sd,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg)
@@ -578,7 +582,7 @@ HARDWARE_INTRINSIC(SSE2,            Xor,
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE2 64-bit-only Intrinsics
 HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64,                             16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64WithTruncation,               16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si64},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_X64,        ConvertToUInt64,                            16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128Double,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2sd64,         INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
 HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128Int64,              16,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
@@ -708,9 +712,9 @@ HARDWARE_INTRINSIC(AVX,             CompareNotLessThanOrEqual,
 HARDWARE_INTRINSIC(AVX,             CompareOrdered,                             32,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_ReturnsPerElementMask)
 HARDWARE_INTRINSIC(AVX,             CompareUnordered,                           32,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_ReturnsPerElementMask)
 HARDWARE_INTRINSIC(AVX,             CompareScalar,                              16,              3,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_cmpsd},             HW_Category_IMM,                    HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics)
-HARDWARE_INTRINSIC(AVX,             ConvertToVector128Int32,                    32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             ConvertToVector128Int32,                    32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2dq},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX,             ConvertToVector128Single,                   32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2ps},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX,             ConvertToVector256Int32,                    32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             ConvertToVector256Int32,                    32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX,             ConvertToVector256Single,                   32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2ps,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX,             ConvertToVector256Double,                   32,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2pd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2pd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX,             ConvertToVector128Int32WithTruncation,      32,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttpd2dq,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
@@ -836,6 +840,7 @@ HARDWARE_INTRINSIC(AVX2,            Xor,
 //  AVX512F Intrinsics
 HARDWARE_INTRINSIC(AVX512F,         Abs,                                        64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pabsd,              INS_invalid,            INS_vpabsq,             INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX512F,         Add,                                        64,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_paddd,              INS_paddd,              INS_paddq,              INS_paddq,              INS_addps,              INS_addpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         AddScalar,                                  16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addss,              INS_addsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         AlignRight32,                               64,              3,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_valignd,            INS_valignd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX512F,         AlignRight64,                               64,              3,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_valignq,            INS_valignq,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX512F,         And,                                        64,              2,      true,  {INS_pand,              INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_vpandq,             INS_vpandq,             INS_andps,              INS_andpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible)
@@ -856,10 +861,11 @@ HARDWARE_INTRINSIC(AVX512F,         CompareNotLessThan,
 HARDWARE_INTRINSIC(AVX512F,         CompareNotLessThanOrEqual,                  64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcmpps,             INS_vcmppd},            HW_Category_SimpleSIMD,             HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         CompareOrdered,                             64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcmpps,             INS_vcmppd},            HW_Category_SimpleSIMD,             HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         CompareUnordered,                           64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcmpps,             INS_vcmppd},            HW_Category_SimpleSIMD,             HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(AVX512F,         ConvertScalarToVector128Double,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtusi2sd32,       INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(AVX512F,         ConvertScalarToVector128Single,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtusi2ss32,       INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(AVX512F,         ConvertToUInt32,                            16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtss2usi,         INS_vcvtsd2usi},        HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F,         ConvertToUInt32WithTruncation,              16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttss2usi32,      INS_vcvttsd2usi},       HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F,         ConvertScalarToVector128Double,             16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2sd32,         INS_vcvtusi2sd32,       INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(AVX512F,         ConvertScalarToVector128Single,             16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2ss32,         INS_vcvtusi2ss32,       INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsd2ss},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         ConvertToInt32,                             16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2si,           INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         ConvertToUInt32,                            16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtss2usi,         INS_vcvtsd2usi},        HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         ConvertToUInt32WithTruncation,              16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttss2usi32,      INS_vcvttsd2usi32},     HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector128Byte,                     64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovdb,            INS_vpmovdb,            INS_vpmovqb,            INS_vpmovqb,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector128ByteWithSaturation,       64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovusdb,          INS_invalid,            INS_vpmovusqb,          INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector128Int16,                    64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovqw,            INS_vpmovqw,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
@@ -870,36 +876,41 @@ HARDWARE_INTRINSIC(AVX512F,         ConvertToVector128UInt16,
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector128UInt16WithSaturation,     64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovusqw,          INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256Int16,                    64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovdw,            INS_vpmovdw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256Int16WithSaturation,      64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovsdw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256Int32,                    64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovqd,            INS_vpmovqd,            INS_invalid,            INS_cvtpd2dq},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256Int32,                    64,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovqd,            INS_vpmovqd,            INS_invalid,            INS_cvtpd2dq},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256Int32WithSaturation,      64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovsqd,           INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256Int32WithTruncation,      64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttpd2dq},         HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256Single,                   64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2ps},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256Single,                   64,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2ps},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256UInt16,                   64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovdw,            INS_vpmovdw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256UInt16WithSaturation,     64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovusdw,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256UInt32,                   64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovqd,            INS_vpmovqd,            INS_invalid,            INS_vcvtpd2udq},        HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256UInt32,                   64,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovqd,            INS_vpmovqd,            INS_invalid,            INS_vcvtpd2udq},        HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256UInt32WithSaturation,     64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmovusqd,          INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector256UInt32WithTruncation,     64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttpd2udq},       HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector512Double,                   64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2pd,           INS_vcvtudq2pd,         INS_invalid,            INS_invalid,            INS_cvtps2pd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512F,         ConvertToVector512Int32,                    64,              1,      true,  {INS_pmovsxbd,          INS_pmovzxbd,           INS_pmovsxwd,           INS_pmovzxwd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F,         ConvertToVector512Int32,                    64,             -1,     false,  {INS_pmovsxbd,          INS_pmovzxbd,           INS_pmovsxwd,           INS_pmovzxwd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector512Int32WithTruncation,      64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttps2dq,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector512Int64,                    64,              1,      true,  {INS_pmovsxbq,          INS_pmovzxbq,           INS_pmovsxwq,           INS_pmovzxwq,           INS_pmovsxdq,           INS_pmovzxdq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512F,         ConvertToVector512Single,                   64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2ps,           INS_vcvtudq2ps,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512F,         ConvertToVector512UInt32,                   64,              1,      true,  {INS_pmovsxbd,          INS_pmovzxbd,           INS_pmovsxwd,           INS_pmovzxwd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtps2udq,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512F,         ConvertToVector512Single,                   64,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2ps,           INS_vcvtudq2ps,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         ConvertToVector512UInt32,                   64,             -1,     false,  {INS_pmovsxbd,          INS_pmovzxbd,           INS_pmovsxwd,           INS_pmovzxwd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtps2udq,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector512UInt32WithTruncation,     64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttps2udq,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX512F,         ConvertToVector512UInt64,                   64,              1,      true,  {INS_pmovsxbq,          INS_pmovzxbq,           INS_pmovsxwq,           INS_pmovzxwq,           INS_pmovsxdq,           INS_pmovzxdq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512F,         Divide,                                     64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divps,              INS_divpd},             HW_Category_SimpleSIMD,             HW_Flag_EmbBroadcastCompatible)
+HARDWARE_INTRINSIC(AVX512F,         Divide,                                     64,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divps,              INS_divpd},             HW_Category_SimpleSIMD,             HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         DivideScalar,                               16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divss,              INS_divsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         DuplicateEvenIndexed,                       64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movsldup,           INS_movddup},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX512F,         DuplicateOddIndexed,                        64,              1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movshdup,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX512F,         ExtractVector128,                           64,              2,      true,  {INS_vextracti128,      INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextractf128,       INS_vextractf128},      HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX512F,         ExtractVector256,                           64,              2,      true,  {INS_vextracti64x4,     INS_vextracti64x4,      INS_vextracti64x4,      INS_vextracti64x4,      INS_vextracti64x4,      INS_vextracti64x4,      INS_vextracti64x4,      INS_vextracti64x4,      INS_vextractf64x4,      INS_vextractf64x4},     HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX512F,         Fixup,                                      64,              4,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfixupimmps,        INS_vfixupimmpd},       HW_Category_IMM,                    HW_Flag_SpecialImport|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX512F,         FixupScalar,                                16,              4,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfixupimmss,        INS_vfixupimmsd},       HW_Category_IMM,                    HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(AVX512F,         FusedMultiplyAdd,                           64,              3,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmadd213ps,        INS_vfmadd213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic)
-HARDWARE_INTRINSIC(AVX512F,         FusedMultiplyAddNegated,                    64,              3,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmadd213ps,       INS_vfnmadd213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic)
-HARDWARE_INTRINSIC(AVX512F,         FusedMultiplyAddSubtract,                   64,              3,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmaddsub213ps,     INS_vfmaddsub213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic)
-HARDWARE_INTRINSIC(AVX512F,         FusedMultiplySubtract,                      64,              3,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsub213ps,        INS_vfmsub213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic)
-HARDWARE_INTRINSIC(AVX512F,         FusedMultiplySubtractAdd,                   64,              3,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsubadd213ps,     INS_vfmsubadd213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic)
-HARDWARE_INTRINSIC(AVX512F,         FusedMultiplySubtractNegated,               64,              3,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmsub213ps,       INS_vfnmsub213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic)
+HARDWARE_INTRINSIC(AVX512F,         FusedMultiplyAdd,                           64,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmadd213ps,        INS_vfmadd213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         FusedMultiplyAddScalar,                     16,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmadd213ss,        INS_vfmadd213sd},       HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         FusedMultiplyAddNegated,                    64,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmadd213ps,       INS_vfnmadd213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         FusedMultiplyAddNegatedScalar,              16,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmadd213ss,       INS_vfnmadd213sd},      HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         FusedMultiplyAddSubtract,                   64,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmaddsub213ps,     INS_vfmaddsub213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         FusedMultiplySubtract,                      64,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsub213ps,        INS_vfmsub213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         FusedMultiplySubtractScalar,                16,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsub213ss,        INS_vfmsub213sd},       HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         FusedMultiplySubtractAdd,                   64,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsubadd213ps,     INS_vfmsubadd213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         FusedMultiplySubtractNegated,               64,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmsub213ps,       INS_vfnmsub213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         FusedMultiplySubtractNegatedScalar,         16,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmsub213ss,       INS_vfnmsub213sd},      HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         GetExponent,                                64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vgetexpps,          INS_vgetexppd},         HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX512F,         GetExponentScalar,                          16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vgetexpss,          INS_vgetexpsd},         HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(AVX512F,         GetMantissa,                                64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vgetmantps,         INS_vgetmantpd},        HW_Category_IMM,                    HW_Flag_NoFlag)
@@ -911,7 +922,8 @@ HARDWARE_INTRINSIC(AVX512F,         LoadAlignedVector512NonTemporal,
 HARDWARE_INTRINSIC(AVX512F,         LoadVector512,                              64,              1,      true,  {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_vmovdqu64,          INS_vmovdqu64,          INS_movups,             INS_movupd},            HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         Max,                                        64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pmaxsd,             INS_pmaxud,             INS_vpmaxsq,            INS_vpmaxuq,            INS_maxps,              INS_maxpd},             HW_Category_SimpleSIMD,             HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible)
 HARDWARE_INTRINSIC(AVX512F,         Min,                                        64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pminsd,             INS_pminud,             INS_vpminsq,            INS_vpminuq,            INS_minps,              INS_minpd},             HW_Category_SimpleSIMD,             HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible)
-HARDWARE_INTRINSIC(AVX512F,         Multiply,                                   64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pmuldq,             INS_pmuludq,            INS_mulps,              INS_mulpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible)
+HARDWARE_INTRINSIC(AVX512F,         Multiply,                                   64,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pmuldq,             INS_pmuludq,            INS_mulps,              INS_mulpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         MultiplyScalar,                             16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mulss,              INS_mulsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         MultiplyLow,                                64,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pmulld,             INS_pmulld,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible)
 HARDWARE_INTRINSIC(AVX512F,         Or,                                         64,              2,      true,  {INS_por,               INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_vporq,              INS_vporq,              INS_orps,               INS_orpd},              HW_Category_SimpleSIMD,             HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible)
 HARDWARE_INTRINSIC(AVX512F,         Permute2x64,                                64,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermilpd},         HW_Category_IMM,                    HW_Flag_FullRangeIMM)
@@ -933,8 +945,8 @@ HARDWARE_INTRINSIC(AVX512F,         RotateRight,
 HARDWARE_INTRINSIC(AVX512F,         RotateRightVariable,                        64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vprorvd,            INS_vprorvd,            INS_vprorvq,            INS_vprorvq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX512F,         RoundScale,                                 64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vrndscaleps,        INS_vrndscalepd},       HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX512F,         RoundScaleScalar,                           16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vrndscaless,        INS_vrndscalesd},       HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(AVX512F,         Scale,                                      64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vscalefps,          INS_vscalefpd},         HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX512F,         ScaleScalar,                                16,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vscalefss,          INS_vscalefsd},         HW_Category_SimpleSIMD,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(AVX512F,         Scale,                                      64,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vscalefps,          INS_vscalefpd},         HW_Category_SimpleSIMD,             HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         ScaleScalar,                                16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vscalefss,          INS_vscalefsd},         HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         ShiftLeftLogical,                           64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pslld,              INS_pslld,              INS_psllq,              INS_psllq,              INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX512F,         ShiftLeftLogicalVariable,                   64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsllvd,            INS_vpsllvd,            INS_vpsllvq,            INS_vpsllvq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_EmbBroadcastCompatible)
 HARDWARE_INTRINSIC(AVX512F,         ShiftRightArithmetic,                       64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_psrad,              INS_invalid,            INS_vpsraq,             INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
@@ -943,11 +955,13 @@ HARDWARE_INTRINSIC(AVX512F,         ShiftRightLogical,
 HARDWARE_INTRINSIC(AVX512F,         ShiftRightLogicalVariable,                  64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsrlvd,            INS_vpsrlvd,            INS_vpsrlvq,            INS_vpsrlvq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_EmbBroadcastCompatible)
 HARDWARE_INTRINSIC(AVX512F,         Shuffle,                                    64,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pshufd,             INS_pshufd,             INS_invalid,            INS_invalid,            INS_shufps,             INS_shufpd},            HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX512F,         Shuffle4x128,                               64,              3,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vshufi32x4,         INS_vshufi32x4,         INS_vshufi64x2,         INS_vshufi64x2,         INS_vshuff32x4,         INS_vshuff64x2},        HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX512F,         Sqrt,                                       64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtps,             INS_sqrtpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX512F,         Sqrt,                                       64,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtps,             INS_sqrtpd},            HW_Category_SimpleSIMD,             HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         SqrtScalar,                                 16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtss,             INS_sqrtsd},            HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         Store,                                      64,              2,      true,  {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_vmovdqu64,          INS_vmovdqu64,          INS_movups,             INS_movupd},            HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(AVX512F,         StoreAligned,                               64,              2,      true,  {INS_movdqa,            INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_vmovdqa64,          INS_vmovdqa64,          INS_movaps,             INS_movapd},            HW_Category_MemoryStore,            HW_Flag_BaseTypeFromSecondArg)
 HARDWARE_INTRINSIC(AVX512F,         StoreAlignedNonTemporal,                    64,              2,      true,  {INS_movntdq,           INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntps,            INS_movntpd},           HW_Category_MemoryStore,            HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(AVX512F,         Subtract,                                   64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_psubd,              INS_psubd,              INS_psubq,              INS_psubq,              INS_subps,              INS_subpd},             HW_Category_SimpleSIMD,             HW_Flag_EmbBroadcastCompatible)
+HARDWARE_INTRINSIC(AVX512F,         Subtract,                                   64,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_psubd,              INS_psubd,              INS_psubq,              INS_psubq,              INS_subps,              INS_subpd},             HW_Category_SimpleSIMD,             HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F,         SubtractScalar,                             16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_subss,              INS_subsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512F,         UnpackHigh,                                 64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_punpckhdq,          INS_punpckhdq,          INS_punpckhqdq,         INS_punpckhqdq,         INS_unpckhps,           INS_unpckhpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX512F,         TernaryLogic,                               64,              4,      true,  {INS_vpternlogd,        INS_vpternlogd,         INS_vpternlogd,         INS_vpternlogd,         INS_vpternlogd,         INS_vpternlogd,         INS_vpternlogq,         INS_vpternlogq,         INS_vpternlogd,         INS_vpternlogq},        HW_Category_IMM,                    HW_Flag_SpecialImport|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX512F,         UnpackLow,                                  64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_punpckldq,          INS_punpckldq,          INS_punpcklqdq,         INS_punpcklqdq,         INS_unpcklps,           INS_unpcklpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
@@ -1013,10 +1027,11 @@ HARDWARE_INTRINSIC(AVX512F_VL,      TernaryLogic,
 //                                                                                                              {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  AVX512F.X64 Intrinsics
-HARDWARE_INTRINSIC(AVX512F_X64,     ConvertScalarToVector128Double,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtusi2sd64,       INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F_X64,     ConvertScalarToVector128Single,             16,              2,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtusi2ss64,       INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F_X64,     ConvertToUInt64,                            16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtss2usi,         INS_vcvtsd2usi},        HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AVX512F_X64,     ConvertToUInt64WithTruncation,              16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttss2usi64,      INS_vcvttsd2usi},       HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AVX512F_X64,     ConvertScalarToVector128Double,             16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2sd64,         INS_vcvtusi2sd64,       INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F_X64,     ConvertScalarToVector128Single,             16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2ss64,         INS_vcvtusi2ss64,       INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F_X64,     ConvertToInt64,                             16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2si,           INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F_X64,     ConvertToUInt64,                            16,             -1,     false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtss2usi,         INS_vcvtsd2usi},        HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512F_X64,     ConvertToUInt64WithTruncation,              16,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttss2usi64,      INS_vcvttsd2usi64},     HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 ISA              Function name                               SIMD size       NumArg  EncodesExtraTypeArg                                                                                                       Instructions                                                                                                                  Category                            Flags
@@ -1121,11 +1136,11 @@ HARDWARE_INTRINSIC(AVX512DQ,        AndNot,
 HARDWARE_INTRINSIC(AVX512DQ,        BroadcastPairScalarToVector512,             64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vbroadcasti32x2,    INS_vbroadcasti32x2,    INS_invalid,            INS_invalid,            INS_vbroadcastf32x2,    INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX512DQ,        BroadcastVector128ToVector512,              64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vbroadcasti64x2,    INS_vbroadcasti64x2,    INS_invalid,            INS_vbroadcastf64x2},   HW_Category_MemoryLoad,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX512DQ,        BroadcastVector256ToVector512,              64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vbroadcasti32x8,    INS_vbroadcasti32x8,    INS_invalid,            INS_invalid,            INS_vbroadcastf32x8,    INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX512DQ,        ConvertToVector256Single,                   64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtqq2ps,          INS_vcvtuqq2ps,         INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512DQ,        ConvertToVector512Double,                   64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtqq2pd,          INS_vcvtuqq2pd,         INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512DQ,        ConvertToVector512Int64,                    64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtps2qq,          INS_vcvtpd2qq},         HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ,        ConvertToVector256Single,                   64,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtqq2ps,          INS_vcvtuqq2ps,         INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512DQ,        ConvertToVector512Double,                   64,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtqq2pd,          INS_vcvtuqq2pd,         INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
+HARDWARE_INTRINSIC(AVX512DQ,        ConvertToVector512Int64,                    64,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtps2qq,          INS_vcvtpd2qq},         HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512DQ,        ConvertToVector512Int64WithTruncation,      64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttps2qq,         INS_vcvttpd2qq},        HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX512DQ,        ConvertToVector512UInt64,                   64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtps2uqq,         INS_vcvtpd2uqq},        HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512DQ,        ConvertToVector512UInt64,                   64,             -1,      false, {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvtps2uqq,         INS_vcvtpd2uqq},        HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible)
 HARDWARE_INTRINSIC(AVX512DQ,        ConvertToVector512UInt64WithTruncation,     64,              1,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vcvttps2uqq,        INS_vcvttpd2uqq},       HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX512DQ,        ExtractVector128,                           64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vextracti64x2,      INS_vextracti64x2,      INS_invalid,            INS_vextractf64x2},     HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX512DQ,        ExtractVector256,                           64,              2,      true,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vextracti32x8,      INS_vextracti32x8,      INS_invalid,            INS_invalid,            INS_vextractf32x8,      INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index f88cf6ec99ec..fc3c01e4c31d 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -371,7 +371,7 @@ FloatComparisonMode HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(Flo
 {
     switch (comparison)
     {
-        // These comparison modes are the same even if the operands are swapped
+            // These comparison modes are the same even if the operands are swapped
 
         case FloatComparisonMode::OrderedEqualNonSignaling:
             return FloatComparisonMode::OrderedEqualNonSignaling;
@@ -406,7 +406,7 @@ FloatComparisonMode HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(Flo
         case FloatComparisonMode::UnorderedTrueSignaling:
             return FloatComparisonMode::UnorderedTrueSignaling;
 
-        // These comparison modes need a different mode if the operands are swapped
+            // These comparison modes need a different mode if the operands are swapped
 
         case FloatComparisonMode::OrderedLessThanSignaling:
             return FloatComparisonMode::OrderedGreaterThanSignaling;
@@ -1415,15 +1415,70 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
 
         case NI_Vector128_ConvertToDouble:
         case NI_Vector256_ConvertToDouble:
+        case NI_Vector512_ConvertToDouble:
+        {
+            assert(sig->numArgs == 1);
+            assert(varTypeIsLong(simdBaseType));
+            if (IsBaselineVector512IsaSupportedOpportunistically())
+            {
+                if (simdSize == 64)
+                {
+                    intrinsic = NI_AVX512DQ_ConvertToVector512Double;
+                }
+                else if (simdSize == 32)
+                {
+                    intrinsic = NI_AVX512DQ_VL_ConvertToVector256Double;
+                }
+                else
+                {
+                    assert(simdSize == 16);
+                    intrinsic = NI_AVX512DQ_VL_ConvertToVector128Double;
+                }
+                op1     = impSIMDPopStack();
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
+            }
+            break;
+        }
+
         case NI_Vector128_ConvertToInt64:
         case NI_Vector256_ConvertToInt64:
+        case NI_Vector512_ConvertToInt64:
+        {
+            assert(sig->numArgs == 1);
+            assert(simdBaseType == TYP_DOUBLE);
+            if (IsBaselineVector512IsaSupportedOpportunistically())
+            {
+                op1     = impSIMDPopStack();
+                retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize);
+            }
+            break;
+        }
+
         case NI_Vector128_ConvertToUInt32:
         case NI_Vector256_ConvertToUInt32:
+        case NI_Vector512_ConvertToUInt32:
+        {
+            assert(sig->numArgs == 1);
+            assert(simdBaseType == TYP_FLOAT);
+            if (IsBaselineVector512IsaSupportedOpportunistically())
+            {
+                op1     = impSIMDPopStack();
+                retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize);
+            }
+            break;
+        }
+
         case NI_Vector128_ConvertToUInt64:
         case NI_Vector256_ConvertToUInt64:
+        case NI_Vector512_ConvertToUInt64:
         {
             assert(sig->numArgs == 1);
-            // TODO-XARCH-CQ: These intrinsics should be accelerated
+            assert(simdBaseType == TYP_DOUBLE);
+            if (IsBaselineVector512IsaSupportedOpportunistically())
+            {
+                op1     = impSIMDPopStack();
+                retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize);
+            }
             break;
         }
 
@@ -1433,24 +1488,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         {
             assert(sig->numArgs == 1);
             assert(simdBaseType == TYP_FLOAT);
-
-            switch (simdSize)
+            if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
             {
-                case 16:
-                    intrinsic = NI_SSE2_ConvertToVector128Int32WithTruncation;
-                    break;
-                case 32:
-                    intrinsic = NI_AVX_ConvertToVector256Int32WithTruncation;
-                    break;
-                case 64:
-                    intrinsic = NI_AVX512F_ConvertToVector512Int32WithTruncation;
-                    break;
-                default:
-                    unreached();
+                op1     = impSIMDPopStack();
+                retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_INT, simdBaseJitType, simdSize);
             }
-
-            op1     = impSIMDPopStack();
-            retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
             break;
         }
 
@@ -1459,7 +1501,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector512_ConvertToSingle:
         {
             assert(sig->numArgs == 1);
-
+            assert(varTypeIsInt(simdBaseType));
+            intrinsic = NI_Illegal;
             if (simdBaseType == TYP_INT)
             {
                 switch (simdSize)
@@ -1476,14 +1519,28 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                     default:
                         unreached();
                 }
-
-                op1     = impSIMDPopStack();
-                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
             }
-            else
+            else if (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically())
             {
-                // TODO-XARCH-CQ: These intrinsics should be accelerated
-                assert(simdBaseType == TYP_UINT);
+                switch (simdSize)
+                {
+                    case 16:
+                        intrinsic = NI_AVX512F_VL_ConvertToVector128Single;
+                        break;
+                    case 32:
+                        intrinsic = NI_AVX512F_VL_ConvertToVector256Single;
+                        break;
+                    case 64:
+                        intrinsic = NI_AVX512F_ConvertToVector512Single;
+                        break;
+                    default:
+                        unreached();
+                }
+            }
+            if (intrinsic != NI_Illegal)
+            {
+                op1     = impSIMDPopStack();
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
             }
             break;
         }
@@ -2498,7 +2555,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                     // TODO-XARCH-CQ: We should support long/ulong multiplication
                     break;
                 }
-// else if simdSize == 64 then above assert would check if baseline isa supported
+                // else if simdSize == 64 then above assert would check if baseline isa supported
 
 #if defined(TARGET_X86)
                 // TODO-XARCH-CQ: We need to support 64-bit CreateBroadcast
@@ -3274,13 +3331,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
 
                 int ival = HWIntrinsicInfo::lookupIval(this, intrinsic, simdBaseType);
                 retNode  = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(ival), NI_AVX_CompareScalar,
-                                                   simdBaseJitType, simdSize);
+                                                    simdBaseJitType, simdSize);
             }
             else
             {
                 GenTree* clonedOp1 = nullptr;
                 op1                = impCloneExpr(op1, &clonedOp1, CHECK_SPILL_ALL,
-                                   nullptr DEBUGARG("Clone op1 for Sse.CompareScalarGreaterThan"));
+                                                  nullptr DEBUGARG("Clone op1 for Sse.CompareScalarGreaterThan"));
 
                 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, simdBaseJitType, simdSize);
                 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE_MoveScalar, simdBaseJitType,
@@ -3333,13 +3390,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
 
                 int ival = HWIntrinsicInfo::lookupIval(this, intrinsic, simdBaseType);
                 retNode  = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(ival), NI_AVX_CompareScalar,
-                                                   simdBaseJitType, simdSize);
+                                                    simdBaseJitType, simdSize);
             }
             else
             {
                 GenTree* clonedOp1 = nullptr;
                 op1                = impCloneExpr(op1, &clonedOp1, CHECK_SPILL_ALL,
-                                   nullptr DEBUGARG("Clone op1 for Sse2.CompareScalarGreaterThan"));
+                                                  nullptr DEBUGARG("Clone op1 for Sse2.CompareScalarGreaterThan"));
 
                 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, simdBaseJitType, simdSize);
                 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE2_MoveScalar, simdBaseJitType,
diff --git a/src/coreclr/jit/ifconversion.cpp b/src/coreclr/jit/ifconversion.cpp
index f9cb5af17925..6d7ead881a91 100644
--- a/src/coreclr/jit/ifconversion.cpp
+++ b/src/coreclr/jit/ifconversion.cpp
@@ -33,7 +33,7 @@ class OptIfConversionDsc
     BasicBlock* m_startBlock;           // First block in the If Conversion.
     BasicBlock* m_finalBlock = nullptr; // Block where the flows merge. In a return case, this can be nullptr.
 
-    // The node, statement and block of an assignment.
+    // The node, statement and block of an operation.
     struct IfConvertOperation
     {
         BasicBlock* block = nullptr;
@@ -208,15 +208,15 @@ void OptIfConversionDsc::IfConvertFindFlow()
 // IfConvertCheckStmts
 //
 // From the given block to the final block, check all the statements and nodes are
-// valid for an If conversion. Chain of blocks must contain only a single assignment
-// and no other operations.
+// valid for an If conversion. Chain of blocks must contain only a single local
+// store and no other operations.
 //
 // Arguments:
-//   fromBlock  -- Block inside the if statement to start from (Either Then or Else path).
-//   foundOperation -- Returns the found operation.
+//   fromBlock      - Block inside the if statement to start from (Either Then or Else path).
+//   foundOperation - Returns the found operation.
 //
 // Returns:
-//   If everything is valid, then set foundOperation to the assignment and return true.
+//   If everything is valid, then set foundOperation to the store and return true.
 //   Otherwise return false.
 //
 bool OptIfConversionDsc::IfConvertCheckStmts(BasicBlock* fromBlock, IfConvertOperation* foundOperation)
@@ -739,7 +739,7 @@ bool OptIfConversionDsc::optIfConvert()
 
     // Update the flow from the original block.
     m_comp->fgRemoveAllRefPreds(m_startBlock->GetFalseTarget(), m_startBlock);
-    m_startBlock->SetKindAndTarget(BBJ_ALWAYS, m_startBlock->GetTrueTarget());
+    m_startBlock->SetKindAndTargetEdge(BBJ_ALWAYS, m_startBlock->GetTrueEdge());
 
 #ifdef DEBUG
     if (m_comp->verbose)
@@ -774,7 +774,7 @@ PhaseStatus Compiler::optIfConversion()
 
     bool madeChanges = false;
 
-    // This phase does not respect SSA: assignments are deleted/moved.
+    // This phase does not respect SSA: local stores are deleted/moved.
     assert(!fgSsaValid);
     optReachableBitVecTraits = nullptr;
 
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 3fa43c9c4972..4928204635d1 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -71,16 +71,16 @@ bool Compiler::impILConsumesAddr(const BYTE* codeAddr)
 
     switch (opcode)
     {
-        // case CEE_LDFLDA: We're taking this one out as if you have a sequence
-        // like
-        //
-        //          ldloca.0
-        //          ldflda whatever
-        //
-        // of a primitivelike struct, you end up after morphing with addr of a local
-        // that's not marked as addrtaken, which is wrong. Also ldflda is usually used
-        // for structs that contain other structs, which isnt a case we handle very
-        // well now for other reasons.
+            // case CEE_LDFLDA: We're taking this one out as if you have a sequence
+            // like
+            //
+            //          ldloca.0
+            //          ldflda whatever
+            //
+            // of a primitivelike struct, you end up after morphing with addr of a local
+            // that's not marked as addrtaken, which is wrong. Also ldflda is usually used
+            // for structs that contain other structs, which isnt a case we handle very
+            // well now for other reasons.
 
         case CEE_LDFLD:
         {
@@ -431,7 +431,7 @@ void Compiler::impAppendStmt(Statement* stmt, unsigned chkLevel, bool checkConsu
         {
             GenTree* call = expr->OperIs(GT_RET_EXPR) ? expr->AsRetExpr()->gtInlineCandidate : expr;
 
-            if (call->TypeIs(TYP_VOID) && call->AsCall()->TreatAsShouldHaveRetBufArg(this))
+            if (call->TypeIs(TYP_VOID) && call->AsCall()->TreatAsShouldHaveRetBufArg())
             {
                 GenTree* retBuf;
                 if (call->AsCall()->ShouldHaveRetBufArg())
@@ -465,7 +465,7 @@ void Compiler::impAppendStmt(Statement* stmt, unsigned chkLevel, bool checkConsu
 
             if (expr->OperIsLocalStore())
             {
-                // For assignments, limit the checking to what the value could modify/interfere with.
+                // For stores, limit the checking to what the value could modify/interfere with.
                 GenTree* value = expr->AsLclVarCommon()->Data();
                 flags          = value->gtFlags & GTF_GLOB_EFFECT;
 
@@ -661,17 +661,17 @@ Statement* Compiler::impAppendTree(GenTree* tree, unsigned chkLevel, const Debug
 
 /*****************************************************************************
  *
- *  Append an assignment of the given value to a temp to the current tree list.
+ *  Append a store of the given value to a temp to the current tree list.
  *  curLevel is the stack level for which the spill to the temp is being done.
  */
 
-void Compiler::impStoreTemp(unsigned         lclNum,
-                            GenTree*         val,
-                            unsigned         curLevel,
-                            Statement**      pAfterStmt, /* = NULL */
-                            const DebugInfo& di,         /* = DebugInfo() */
-                            BasicBlock*      block       /* = NULL */
-                            )
+void Compiler::impStoreToTemp(unsigned         lclNum,
+                              GenTree*         val,
+                              unsigned         curLevel,
+                              Statement**      pAfterStmt, /* = NULL */
+                              const DebugInfo& di,         /* = DebugInfo() */
+                              BasicBlock*      block       /* = NULL */
+)
 {
     GenTree* store = gtNewTempStore(lclNum, val, curLevel, pAfterStmt, di, block);
 
@@ -816,7 +816,7 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
                                   Statement**      pAfterStmt, /* = nullptr */
                                   const DebugInfo& di,         /* = DebugInfo() */
                                   BasicBlock*      block       /* = nullptr */
-                                  )
+)
 {
     assert(varTypeIsStruct(store) && store->OperIsStore());
 
@@ -837,7 +837,7 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
     if (src->IsCall())
     {
         GenTreeCall* srcCall = src->AsCall();
-        if (srcCall->TreatAsShouldHaveRetBufArg(this))
+        if (srcCall->TreatAsShouldHaveRetBufArg())
         {
             // Case of call returning a struct via hidden retbuf arg.
             // Some calls have an "out buffer" that is not actually a ret buff
@@ -966,39 +966,6 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
             return src;
         }
     }
-    else if (src->OperIs(GT_MKREFANY))
-    {
-        // Since we are assigning the result of a GT_MKREFANY, "destAddr" must point to a refany.
-        // TODO-CQ: we can do this without address-exposing the local on the LHS.
-        GenTreeFlags indirFlags = GTF_EMPTY;
-        GenTree*     destAddr   = impGetNodeAddr(store, CHECK_SPILL_ALL, &indirFlags);
-        GenTree*     destAddrClone;
-        destAddr = impCloneExpr(destAddr, &destAddrClone, curLevel, pAfterStmt DEBUGARG("MKREFANY assignment"));
-
-        assert(OFFSETOF__CORINFO_TypedReference__dataPtr == 0);
-        assert(destAddr->gtType == TYP_I_IMPL || destAddr->gtType == TYP_BYREF);
-
-        // Append the store of the pointer value.
-        // TODO-Bug: the pointer value can be a byref. Use its actual type here instead of TYP_I_IMPL.
-        GenTree* ptrFieldStore = gtNewStoreIndNode(TYP_I_IMPL, destAddr, src->AsOp()->gtOp1, indirFlags);
-        if (pAfterStmt)
-        {
-            Statement* newStmt = gtNewStmt(ptrFieldStore, usedDI);
-            fgInsertStmtAfter(block, *pAfterStmt, newStmt);
-            *pAfterStmt = newStmt;
-        }
-        else
-        {
-            impAppendTree(ptrFieldStore, curLevel, usedDI);
-        }
-
-        GenTree* typeFieldOffset = gtNewIconNode(OFFSETOF__CORINFO_TypedReference__type, TYP_I_IMPL);
-        GenTree* typeFieldAddr   = gtNewOperNode(GT_ADD, genActualType(destAddr), destAddrClone, typeFieldOffset);
-        GenTree* typeFieldStore  = gtNewStoreIndNode(TYP_I_IMPL, typeFieldAddr, src->AsOp()->gtOp2);
-
-        // Return the store of the type value, to be appended.
-        return typeFieldStore;
-    }
     else if (src->OperIs(GT_COMMA))
     {
         if (pAfterStmt)
@@ -1020,8 +987,8 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
             // Instead, we're going to sink the store below the COMMA.
             store->Data()      = src->AsOp()->gtOp2;
             src->AsOp()->gtOp2 = impStoreStruct(store, curLevel, pAfterStmt, usedDI, block);
-            src->SetAllEffectsFlags(src->AsOp()->gtOp1, src->AsOp()->gtOp2);
             gtUpdateNodeSideEffects(store);
+            src->SetAllEffectsFlags(src->AsOp()->gtOp1, src->AsOp()->gtOp2);
 
             return src;
         }
@@ -1117,7 +1084,7 @@ GenTree* Compiler::impGetNodeAddr(GenTree* val, unsigned curLevel, GenTreeFlags*
     }
 
     unsigned lclNum = lvaGrabTemp(true DEBUGARG("location for address-of(RValue)"));
-    impStoreTemp(lclNum, val, curLevel);
+    impStoreToTemp(lclNum, val, curLevel);
 
     // The 'return value' is now address of the temp itself.
     return gtNewLclVarAddrNode(lclNum, TYP_BYREF);
@@ -1200,7 +1167,7 @@ GenTree* Compiler::impNormStructVal(GenTree* structVal, unsigned curLevel)
         case GT_RET_EXPR:
         {
             unsigned lclNum = lvaGrabTemp(true DEBUGARG("spilled call-like call argument"));
-            impStoreTemp(lclNum, structVal, curLevel);
+            impStoreToTemp(lclNum, structVal, curLevel);
 
             // The structVal is now the temp itself
             structVal = gtNewLclvNode(lclNum, structType);
@@ -1274,7 +1241,7 @@ GenTree* Compiler::impTokenToHandle(CORINFO_RESOLVED_TOKEN* pResolvedToken,
     assert(!fgGlobalMorph);
 
     CORINFO_GENERICHANDLE_RESULT embedInfo;
-    info.compCompHnd->embedGenericHandle(pResolvedToken, importParent, &embedInfo);
+    info.compCompHnd->embedGenericHandle(pResolvedToken, importParent, info.compMethodHnd, &embedInfo);
 
     if (pRuntimeLookup)
     {
@@ -1486,7 +1453,7 @@ GenTreeCall* Compiler::impReadyToRunHelperToTree(CORINFO_RESOLVED_TOKEN* pResolv
                                                  GenTree*                arg1)
 {
     CORINFO_CONST_LOOKUP lookup;
-    if (!info.compCompHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, helper, &lookup))
+    if (!info.compCompHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, helper, info.compMethodHnd, &lookup))
     {
         return nullptr;
     }
@@ -1549,30 +1516,56 @@ GenTree* Compiler::impMethodPointer(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORI
 
 GenTree* Compiler::getRuntimeContextTree(CORINFO_RUNTIME_LOOKUP_KIND kind)
 {
-    GenTree* ctxTree = nullptr;
+    GenTree* ctxTree;
 
     // Collectible types requires that for shared generic code, if we use the generic context parameter
-    // that we report it. (This is a conservative approach, we could detect some cases particularly when the
-    // context parameter is this that we don't need the eager reporting logic.)
-    lvaGenericsContextInUse = true;
+    // that we report it. Conservatively mark the root method as using generic context, MARK_LOCAL_VARS phase
+    // will clean it up if it turns out to be unnecessary.
+    impInlineRoot()->lvaGenericsContextInUse = true;
 
-    Compiler* pRoot = impInlineRoot();
-
-    if (kind == CORINFO_LOOKUP_THISOBJ)
+    // Always use generic context from the callsite if we're inlining and it's available.
+    if (compIsForInlining() && (impInlineInfo->inlInstParamArgInfo != nullptr))
     {
-        // this Object
-        ctxTree = gtNewLclvNode(pRoot->info.compThisArg, TYP_REF);
-        ctxTree->gtFlags |= GTF_VAR_CONTEXT;
+        // Create a dummy lclInfo node, we know that nobody's going to do stloc or take address
+        // of the generic context, so we don't need to scan IL for it.
+        InlLclVarInfo lclInfo = {};
+        lclInfo.lclTypeInfo   = TYP_I_IMPL;
+        ctxTree               = impInlineFetchArg(*impInlineInfo->inlInstParamArgInfo, lclInfo);
+        assert(ctxTree != nullptr);
+        assert(ctxTree->TypeIs(TYP_I_IMPL));
+        // We don't need to worry about GTF_VAR_CONTEXT here, it should be set on the callsite anyway.
+    }
+    else if (kind == CORINFO_LOOKUP_THISOBJ)
+    {
+        // Use "this" from the callsite if we're inlining
+        if (compIsForInlining())
+        {
+            // "this" is always the first argument in inlArgInfo
+            assert(impInlineInfo->argCnt > 0);
+            assert(impInlineInfo->inlArgInfo[0].argIsThis);
+
+            ctxTree = impInlineFetchArg(impInlineInfo->inlArgInfo[0], impInlineInfo->lclVarInfo[0]);
+
+            // "this" is expected to be always a local, and we must mark it as a context
+            assert(ctxTree->OperIs(GT_LCL_VAR));
+            ctxTree->gtFlags |= GTF_VAR_CONTEXT;
+        }
+        else
+        {
+            assert(info.compThisArg != BAD_VAR_NUM);
+            ctxTree = gtNewLclvNode(info.compThisArg, TYP_REF);
+            ctxTree->gtFlags |= GTF_VAR_CONTEXT;
+        }
 
         // context is the method table pointer of the this object
         ctxTree = gtNewMethodTableLookup(ctxTree);
     }
     else
     {
-        assert(kind == CORINFO_LOOKUP_METHODPARAM || kind == CORINFO_LOOKUP_CLASSPARAM);
+        assert((kind == CORINFO_LOOKUP_METHODPARAM) || (kind == CORINFO_LOOKUP_CLASSPARAM));
 
         // Exact method descriptor as passed in
-        ctxTree = gtNewLclvNode(pRoot->info.compTypeCtxtArg, TYP_I_IMPL);
+        ctxTree = gtNewLclvNode(impInlineRoot()->info.compTypeCtxtArg, TYP_I_IMPL);
         ctxTree->gtFlags |= GTF_VAR_CONTEXT;
     }
     return ctxTree;
@@ -1633,7 +1626,7 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken
 
         // Spilling it to a temp improves CQ (mainly in Tier0)
         unsigned callLclNum = lvaGrabTemp(true DEBUGARG("spilling helperCall"));
-        impStoreTemp(callLclNum, helperCall, CHECK_SPILL_NONE);
+        impStoreToTemp(callLclNum, helperCall, CHECK_SPILL_NONE);
         return gtNewLclvNode(callLclNum, helperCall->TypeGet());
     }
 
@@ -1644,6 +1637,9 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken
     GenTree* slotPtrTree = ctxTree;
     GenTree* indOffTree  = nullptr;
 
+    // TODO-CQ: consider relaxing where it's safe to do so
+    const bool ctxTreeIsInvariant = !compIsForInlining();
+
     // Applied repeated indirections
     for (WORD i = 0; i < pRuntimeLookup->indirections; i++)
     {
@@ -1655,7 +1651,8 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken
 
         if (i != 0)
         {
-            slotPtrTree = gtNewIndir(TYP_I_IMPL, slotPtrTree, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
+            slotPtrTree = gtNewIndir(TYP_I_IMPL, slotPtrTree,
+                                     ctxTreeIsInvariant ? (GTF_IND_NONFAULTING | GTF_IND_INVARIANT) : GTF_EMPTY);
         }
 
         if ((i == 1 && pRuntimeLookup->indirectFirstOffset) || (i == 2 && pRuntimeLookup->indirectSecondOffset))
@@ -1678,8 +1675,8 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken
         return slotPtrTree;
     }
 
-    slotPtrTree = gtNewIndir(TYP_I_IMPL, slotPtrTree, GTF_IND_NONFAULTING);
-    slotPtrTree->gtFlags &= ~GTF_GLOB_REF; // TODO-Bug?: this is a quirk. Can we mark this indirection invariant?
+    slotPtrTree =
+        gtNewIndir(TYP_I_IMPL, slotPtrTree, ctxTreeIsInvariant ? (GTF_IND_NONFAULTING | GTF_IND_INVARIANT) : GTF_EMPTY);
 
     return slotPtrTree;
 }
@@ -1722,7 +1719,7 @@ bool Compiler::impSpillStackEntry(unsigned level,
                                   bool        bAssertOnRecursion,
                                   const char* reason
 #endif
-                                  )
+)
 {
 
 #ifdef DEBUG
@@ -1748,7 +1745,7 @@ bool Compiler::impSpillStackEntry(unsigned level,
     }
 
     /* Assign the spilled entry to the temp */
-    impStoreTemp(tnum, tree, level);
+    impStoreToTemp(tnum, tree, level);
 
     if (isNewTemp)
     {
@@ -1783,7 +1780,7 @@ bool Compiler::impSpillStackEntry(unsigned level,
         }
     }
 
-    // The tree type may be modified by impStoreTemp, so use the type of the lclVar.
+    // The tree type may be modified by impStoreToTemp, so use the type of the lclVar.
     var_types type                     = genActualType(lvaTable[tnum].TypeGet());
     GenTree*  temp                     = gtNewLclvNode(tnum, type);
     verCurrentState.esStack[level].val = temp;
@@ -1826,7 +1823,7 @@ void Compiler::impSpillStackEnsure(bool spillLeaves)
 /*****************************************************************************
  *
  *  If the stack contains any trees with side effects in them, assign those
- *  trees to temps and append the assignments to the statement list.
+ *  trees to temps and append the stores to the statement list.
  *  On return the stack is guaranteed to be empty.
  */
 
@@ -1940,9 +1937,8 @@ void Compiler::impSpillLclRefs(unsigned lclNum, unsigned chkLevel)
         GenTree* tree = verCurrentState.esStack[level].val;
 
         /* If the tree may throw an exception, and the block has a handler,
-           then we need to spill assignments to the local if the local is
-           live on entry to the handler.
-           Just spill 'em all without considering the liveness */
+           then we need to spill stores to the local if the local is on entry
+           to the handler. Just spill 'em all without considering the liveness */
 
         bool xcptnCaught = ehBlockHasExnFlowDsc(compCurBB) && (tree->gtFlags & (GTF_CALL | GTF_EXCEPT));
 
@@ -2022,13 +2018,13 @@ BasicBlock* Compiler::impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_H
     {
         // Create extra basic block for the spill
         //
-        BasicBlock* newBlk = fgNewBBbefore(BBJ_ALWAYS, hndBlk, /* extendRegion */ true, /* jumpDest */ hndBlk);
-        newBlk->SetFlags(BBF_IMPORTED | BBF_DONT_REMOVE | BBF_NONE_QUIRK);
+        BasicBlock* newBlk = fgNewBBbefore(BBJ_ALWAYS, hndBlk, /* extendRegion */ true);
+        newBlk->SetFlags(BBF_IMPORTED | BBF_DONT_REMOVE);
         newBlk->inheritWeight(hndBlk);
         newBlk->bbCodeOffs = hndBlk->bbCodeOffs;
 
         FlowEdge* const newEdge = fgAddRefPred(hndBlk, newBlk);
-        newEdge->setLikelihood(1.0);
+        newBlk->SetTargetEdge(newEdge);
 
         // Spill into a temp.
         unsigned tempNum         = lvaGrabTemp(false DEBUGARG("SpillCatchArg"));
@@ -2071,9 +2067,9 @@ BasicBlock* Compiler::impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_H
  *  If the tree has side-effects, it will be spilled to a temp.
  */
 
-GenTree* Compiler::impCloneExpr(GenTree*    tree,
-                                GenTree**   pClone,
-                                unsigned    curLevel,
+GenTree* Compiler::impCloneExpr(GenTree*               tree,
+                                GenTree**              pClone,
+                                unsigned               curLevel,
                                 Statement** pAfterStmt DEBUGARG(const char* reason))
 {
     if (!(tree->gtFlags & GTF_GLOB_EFFECT))
@@ -2091,12 +2087,12 @@ GenTree* Compiler::impCloneExpr(GenTree*    tree,
 
     unsigned temp = lvaGrabTemp(true DEBUGARG(reason));
 
-    // impStoreTemp() may change tree->gtType to TYP_VOID for calls which
+    // impStoreToTemp() may change tree->gtType to TYP_VOID for calls which
     // return a struct type. It also may modify the struct type to a more
     // specialized type (e.g. a SIMD type).  So we will get the type from
-    // the lclVar AFTER calling impStoreTemp().
+    // the lclVar AFTER calling impStoreToTemp().
 
-    impStoreTemp(temp, tree, curLevel, pAfterStmt, impCurStmtDI);
+    impStoreToTemp(temp, tree, curLevel, pAfterStmt, impCurStmtDI);
     var_types type = genActualType(lvaTable[temp].TypeGet());
 
     *pClone = gtNewLclvNode(temp, type);
@@ -2495,7 +2491,7 @@ GenTree* Compiler::impTypeIsAssignable(GenTree* typeTo, GenTree* typeFrom)
 
 void Compiler::verConvertBBToThrowVerificationException(BasicBlock* block DEBUGARG(bool logMsg))
 {
-    block->SetKindAndTarget(BBJ_THROW);
+    block->SetKindAndTargetEdge(BBJ_THROW);
     block->SetFlags(BBF_FAILED_VERIFICATION);
     block->RemoveFlags(BBF_IMPORTED);
 
@@ -3238,7 +3234,7 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken)
         Statement* const cursor = impLastStmt;
 
         const bool useParent = false;
-        op1                  = gtNewAllocObjNode(pResolvedToken, useParent);
+        op1                  = gtNewAllocObjNode(pResolvedToken, info.compMethodHnd, useParent);
         if (op1 == nullptr)
         {
             // If we fail to create the newobj node, we must be inlining
@@ -3260,7 +3256,7 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken)
         Statement* allocBoxStmt  = impAppendTree(allocBoxStore, CHECK_SPILL_NONE, impCurStmtDI);
 
         // If the exprToBox is a call that returns its value via a ret buf arg,
-        // move the assignment statement(s) before the call (which must be a top level tree).
+        // move the store statement(s) before the call (which must be a top level tree).
         //
         // We do this because impStoreStructPtr (invoked below) will
         // back-substitute into a call when it sees a GT_RET_EXPR and the call
@@ -3279,7 +3275,7 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken)
                 // that has this call as the root node.
                 //
                 // Because gtNewTempStore (above) may have added statements that
-                // feed into the actual assignment we need to move this set of added
+                // feed into the actual store we need to move this set of added
                 // statements as a group.
                 //
                 // Note boxed allocations are side-effect free (no com or finalizer) so
@@ -3314,7 +3310,7 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken)
                     insertBeforeStmt = insertBeforeStmt->GetPrevStmt();
                 }
 
-                // Found the call. Move the statements comprising the assignment.
+                // Found the call. Move the statements comprising the store.
                 //
                 JITDUMP("Moving " FMT_STMT "..." FMT_STMT " before " FMT_STMT "\n", cursor->GetNextStmt()->GetID(),
                         allocBoxStmt->GetID(), insertBeforeStmt->GetID());
@@ -3377,7 +3373,7 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken)
         // Spill eval stack to flush out any pending side effects.
         impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("impImportAndPushBox"));
 
-        // Set up this copy as a second assignment.
+        // Set up this copy as a second store.
         Statement* copyStmt = impAppendTree(op1, CHECK_SPILL_NONE, impCurStmtDI);
 
         op1 = gtNewLclvNode(impBoxTemp, TYP_REF);
@@ -3655,7 +3651,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI
                 unsigned structTempNum = lvaGrabTemp(true DEBUGARG("folding static readonly field empty struct"));
                 lvaSetStruct(structTempNum, fieldClsHnd, false);
 
-                impStoreTemp(structTempNum, gtNewIconNode(0), CHECK_SPILL_NONE);
+                impStoreToTemp(structTempNum, gtNewIconNode(0), CHECK_SPILL_NONE);
 
                 return gtNewLclVarNode(structTempNum);
             }
@@ -4228,7 +4224,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op)
     JITDUMP("\nimpFixupStructReturnType: retyping\n");
     DISPTREE(op);
 
-    if (op->IsCall() && op->AsCall()->TreatAsShouldHaveRetBufArg(this))
+    if (op->IsCall() && op->AsCall()->TreatAsShouldHaveRetBufArg())
     {
         // This must be one of those 'special' helpers that don't really have a return buffer, but instead
         // use it as a way to keep the trees cleaner with fewer address-taken temps. Well now we have to
@@ -4237,7 +4233,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op)
         unsigned tmpNum = lvaGrabTemp(true DEBUGARG("pseudo return buffer"));
 
         // No need to spill anything as we're about to return.
-        impStoreTemp(tmpNum, op, CHECK_SPILL_NONE);
+        impStoreToTemp(tmpNum, op, CHECK_SPILL_NONE);
 
         op = gtNewLclvNode(tmpNum, info.compRetType);
         JITDUMP("\nimpFixupStructReturnType: created a pseudo-return buffer for a special helper\n");
@@ -4264,12 +4260,13 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op)
 
         // In contrast, we can only use multi-reg calls directly if they have the exact same ABI.
         // Calling convention equality is a conservative approximation for that check.
-        if (op->IsCall() && (op->AsCall()->GetUnmanagedCallConv() == info.compCallConv)
+        if (op->IsCall() &&
+            (op->AsCall()->GetUnmanagedCallConv() == info.compCallConv)
 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
             // TODO-Review: this seems unnecessary. Return ABI doesn't change under varargs.
             && !op->AsCall()->IsVarargs()
 #endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
-                )
+        )
         {
             return op;
         }
@@ -4312,9 +4309,9 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op)
 //   After this function, the BBJ_LEAVE block has been converted to a different type.
 //
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
 
-void Compiler::impImportLeave(BasicBlock* block)
+void Compiler::impImportLeaveEHRegions(BasicBlock* block)
 {
 #ifdef DEBUG
     if (verbose)
@@ -4405,11 +4402,10 @@ void Compiler::impImportLeave(BasicBlock* block)
                 assert(step == DUMMY_INIT(NULL));
                 callBlock = block;
 
+                // callBlock calls the finally handler
                 assert(callBlock->HasInitializedTarget());
-                fgRemoveRefPred(callBlock->GetTarget(), callBlock);
-
-                // callBlock will call the finally handler. Convert the BBJ_LEAVE to BBJ_CALLFINALLY.
-                callBlock->SetKindAndTarget(BBJ_CALLFINALLY, HBtab->ebdHndBeg);
+                fgRedirectTargetEdge(callBlock, HBtab->ebdHndBeg);
+                callBlock->SetKind(BBJ_CALLFINALLY);
 
                 if (endCatches)
                 {
@@ -4431,16 +4427,22 @@ void Compiler::impImportLeave(BasicBlock* block)
 
                 // Calling the finally block.
 
-                // callBlock will call the finally handler
-                callBlock = fgNewBBinRegion(BBJ_CALLFINALLY, XTnum + 1, 0, step, HBtab->ebdHndBeg);
+                // callBlock calls the finally handler
+                callBlock = fgNewBBinRegion(BBJ_CALLFINALLY, XTnum + 1, 0, step);
+
+                {
+                    FlowEdge* const newEdge = fgAddRefPred(HBtab->ebdHndBeg, callBlock);
+                    callBlock->SetTargetEdge(newEdge);
+                }
 
                 // step's jump target shouldn't be set yet
                 assert(!step->HasInitializedTarget());
 
-                // the previous call to a finally returns to this call (to the next finally in the chain)
-                step->SetTarget(callBlock);
-                FlowEdge* const newEdge = fgAddRefPred(callBlock, step);
-                newEdge->setLikelihood(1.0);
+                {
+                    // the previous call to a finally returns to this call (to the next finally in the chain)
+                    FlowEdge* const newEdge = fgAddRefPred(callBlock, step);
+                    step->SetTargetEdge(newEdge);
+                }
 
                 // The new block will inherit this block's weight.
                 callBlock->inheritWeight(block);
@@ -4470,6 +4472,9 @@ void Compiler::impImportLeave(BasicBlock* block)
                 impEndTreeList(callBlock, endLFinStmt, lastStmt);
             }
 
+            // callBlock should be set up at this point
+            assert(callBlock->TargetIs(HBtab->ebdHndBeg));
+
             // Note: we don't know the jump target yet
             step = fgNewBBafter(BBJ_CALLFINALLYRET, callBlock, true);
             // The new block will inherit this block's weight.
@@ -4488,11 +4493,6 @@ void Compiler::impImportLeave(BasicBlock* block)
             unsigned finallyNesting = compHndBBtab[XTnum].ebdHandlerNestingLevel;
             assert(finallyNesting <= compHndBBtabCount);
 
-            assert(callBlock->KindIs(BBJ_CALLFINALLY));
-            assert(callBlock->TargetIs(HBtab->ebdHndBeg));
-            FlowEdge* const newEdge = fgAddRefPred(callBlock->GetTarget(), callBlock);
-            newEdge->setLikelihood(1.0);
-
             GenTree* endLFin = new (this, GT_END_LFIN) GenTreeVal(GT_END_LFIN, TYP_VOID, finallyNesting);
             endLFinStmt      = gtNewStmt(endLFin);
             endCatches       = NULL;
@@ -4534,16 +4534,15 @@ void Compiler::impImportLeave(BasicBlock* block)
         // Insert a new BB either in the try region indicated by tryIndex or
         // the handler region indicated by leaveTarget->bbHndIndex,
         // depending on which is the inner region.
-        BasicBlock* finalStep = fgNewBBinRegion(BBJ_ALWAYS, tryIndex, leaveTarget->bbHndIndex, step, leaveTarget);
+        BasicBlock* finalStep = fgNewBBinRegion(BBJ_ALWAYS, tryIndex, leaveTarget->bbHndIndex, step);
         finalStep->SetFlags(BBF_KEEP_BBJ_ALWAYS);
 
         // step's jump target shouldn't be set yet
         assert(!step->HasInitializedTarget());
 
-        step->SetTarget(finalStep);
         {
             FlowEdge* const newEdge = fgAddRefPred(finalStep, step);
-            newEdge->setLikelihood(1.0);
+            step->SetTargetEdge(newEdge);
         }
 
         // The new block will inherit this block's weight.
@@ -4575,7 +4574,7 @@ void Compiler::impImportLeave(BasicBlock* block)
         // this is the ultimate destination of the LEAVE
         {
             FlowEdge* const newEdge = fgAddRefPred(leaveTarget, finalStep);
-            newEdge->setLikelihood(1.0);
+            finalStep->SetTargetEdge(newEdge);
         }
 
         // Queue up the jump target for importing
@@ -4595,10 +4594,17 @@ void Compiler::impImportLeave(BasicBlock* block)
 #endif // DEBUG
 }
 
-#else // FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
 void Compiler::impImportLeave(BasicBlock* block)
 {
+#if defined(FEATURE_EH_WINDOWS_X86)
+    if (!UsesFunclets())
+    {
+        return impImportLeaveEHRegions(block);
+    }
+#endif
+
 #ifdef DEBUG
     if (verbose)
     {
@@ -4690,12 +4696,15 @@ void Compiler::impImportLeave(BasicBlock* block)
                 assert((step == block) || !step->HasInitializedTarget());
                 if (step == block)
                 {
-                    fgRemoveRefPred(step->GetTarget(), step);
+                    fgRedirectTargetEdge(step, exitBlock);
+                }
+                else
+                {
+                    FlowEdge* const newEdge = fgAddRefPred(exitBlock, step);
+                    step->SetTargetEdge(newEdge); // the previous step (maybe a call to a nested finally, or a nested
+                                                  // catch
+                                                  // exit) returns to this block
                 }
-                step->SetTarget(exitBlock); // the previous step (maybe a call to a nested finally, or a nested catch
-                                            // exit) returns to this block
-                FlowEdge* const newEdge = fgAddRefPred(exitBlock, step);
-                newEdge->setLikelihood(1.0);
 
                 // The new block will inherit this block's weight.
                 exitBlock->inheritWeight(block);
@@ -4721,31 +4730,30 @@ void Compiler::impImportLeave(BasicBlock* block)
 
             BasicBlock* callBlock;
 
-            if (step == nullptr)
+            if (step == nullptr && UsesCallFinallyThunks())
             {
-#if FEATURE_EH_CALLFINALLY_THUNKS
-
                 // Put the call to the finally in the enclosing region.
                 unsigned callFinallyTryIndex =
                     (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : HBtab->ebdEnclosingTryIndex + 1;
                 unsigned callFinallyHndIndex =
                     (HBtab->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : HBtab->ebdEnclosingHndIndex + 1;
-                callBlock =
-                    fgNewBBinRegion(BBJ_CALLFINALLY, callFinallyTryIndex, callFinallyHndIndex, block, HBtab->ebdHndBeg);
+                callBlock = fgNewBBinRegion(BBJ_CALLFINALLY, callFinallyTryIndex, callFinallyHndIndex, block);
 
                 // Convert the BBJ_LEAVE to BBJ_ALWAYS, jumping to the new BBJ_CALLFINALLY. This is because
                 // the new BBJ_CALLFINALLY is in a different EH region, thus it can't just replace the BBJ_LEAVE,
                 // which might be in the middle of the "try". In most cases, the BBJ_ALWAYS will jump to the
                 // next block, and flow optimizations will remove it.
-                fgRemoveRefPred(block->GetTarget(), block);
-                block->SetKindAndTarget(BBJ_ALWAYS, callBlock);
-                FlowEdge* const newEdge = fgAddRefPred(callBlock, block);
-                newEdge->setLikelihood(1.0);
+                fgRedirectTargetEdge(block, callBlock);
+                block->SetKind(BBJ_ALWAYS);
 
                 // The new block will inherit this block's weight.
                 callBlock->inheritWeight(block);
                 callBlock->SetFlags(BBF_IMPORTED);
 
+                // callBlock calls the finally handler
+                FlowEdge* const newEdge = fgAddRefPred(HBtab->ebdHndBeg, callBlock);
+                callBlock->SetKindAndTargetEdge(BBJ_CALLFINALLY, newEdge);
+
 #ifdef DEBUG
                 if (verbose)
                 {
@@ -4754,16 +4762,15 @@ void Compiler::impImportLeave(BasicBlock* block)
                            XTnum, block->bbNum, callBlock->bbNum);
                 }
 #endif
-
-#else // !FEATURE_EH_CALLFINALLY_THUNKS
-
+            }
+            else if (step == nullptr) // && !UsesCallFinallyThunks()
+            {
                 callBlock = block;
 
+                // callBlock calls the finally handler
                 assert(callBlock->HasInitializedTarget());
-                fgRemoveRefPred(callBlock->GetTarget(), callBlock);
-
-                // callBlock will call the finally handler. Convert the BBJ_LEAVE to BBJ_CALLFINALLY
-                callBlock->SetKindAndTarget(BBJ_CALLFINALLY, HBtab->ebdHndBeg);
+                fgRedirectTargetEdge(callBlock, HBtab->ebdHndBeg);
+                callBlock->SetKind(BBJ_CALLFINALLY);
 
 #ifdef DEBUG
                 if (verbose)
@@ -4773,8 +4780,6 @@ void Compiler::impImportLeave(BasicBlock* block)
                            XTnum, callBlock->bbNum);
                 }
 #endif
-
-#endif // !FEATURE_EH_CALLFINALLY_THUNKS
             }
             else
             {
@@ -4797,8 +4802,7 @@ void Compiler::impImportLeave(BasicBlock* block)
                 assert(step->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET, BBJ_EHCATCHRET));
                 assert((step == block) || !step->HasInitializedTarget());
 
-#if FEATURE_EH_CALLFINALLY_THUNKS
-                if (step->KindIs(BBJ_EHCATCHRET))
+                if (UsesCallFinallyThunks() && step->KindIs(BBJ_EHCATCHRET))
                 {
                     // Need to create another step block in the 'try' region that will actually branch to the
                     // call-to-finally thunk.
@@ -4806,11 +4810,14 @@ void Compiler::impImportLeave(BasicBlock* block)
                     BasicBlock* step2 = fgNewBBinRegion(BBJ_ALWAYS, XTnum + 1, 0, step);
                     if (step == block)
                     {
-                        fgRemoveRefPred(step->GetTarget(), step);
+                        fgRedirectTargetEdge(step, step2);
+                    }
+                    else
+                    {
+                        FlowEdge* const newEdge = fgAddRefPred(step2, step);
+                        step->SetTargetEdge(newEdge);
                     }
-                    step->SetTarget(step2);
-                    FlowEdge* const newEdge = fgAddRefPred(step2, step);
-                    newEdge->setLikelihood(1.0);
+
                     step2->inheritWeight(block);
                     step2->CopyFlags(block, BBF_RUN_RARELY);
                     step2->SetFlags(BBF_IMPORTED);
@@ -4827,37 +4834,49 @@ void Compiler::impImportLeave(BasicBlock* block)
                     step = step2;
                     assert(stepType == ST_Catch); // Leave it as catch type for now.
                 }
-#endif // FEATURE_EH_CALLFINALLY_THUNKS
 
-#if FEATURE_EH_CALLFINALLY_THUNKS
-                unsigned callFinallyTryIndex =
-                    (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : HBtab->ebdEnclosingTryIndex + 1;
-                unsigned callFinallyHndIndex =
-                    (HBtab->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : HBtab->ebdEnclosingHndIndex + 1;
-#else  // !FEATURE_EH_CALLFINALLY_THUNKS
-                unsigned callFinallyTryIndex = XTnum + 1;
-                unsigned callFinallyHndIndex = 0; // don't care
-#endif // !FEATURE_EH_CALLFINALLY_THUNKS
+                unsigned callFinallyTryIndex;
+                unsigned callFinallyHndIndex;
+
+                if (UsesCallFinallyThunks())
+                {
+                    callFinallyTryIndex = (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+                                              ? 0
+                                              : HBtab->ebdEnclosingTryIndex + 1;
+                    callFinallyHndIndex = (HBtab->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+                                              ? 0
+                                              : HBtab->ebdEnclosingHndIndex + 1;
+                }
+                else
+                {
+                    callFinallyTryIndex = XTnum + 1;
+                    callFinallyHndIndex = 0; // don't care
+                }
 
                 assert(step->KindIs(BBJ_ALWAYS, BBJ_CALLFINALLYRET, BBJ_EHCATCHRET));
                 assert((step == block) || !step->HasInitializedTarget());
 
                 // callBlock will call the finally handler
-                callBlock =
-                    fgNewBBinRegion(BBJ_CALLFINALLY, callFinallyTryIndex, callFinallyHndIndex, step, HBtab->ebdHndBeg);
+                callBlock = fgNewBBinRegion(BBJ_CALLFINALLY, callFinallyTryIndex, callFinallyHndIndex, step);
                 if (step == block)
                 {
-                    fgRemoveRefPred(step->GetTarget(), step);
+                    fgRedirectTargetEdge(step, callBlock);
+                }
+                else
+                {
+                    FlowEdge* const newEdge = fgAddRefPred(callBlock, step);
+                    step->SetTargetEdge(newEdge); // the previous call to a finally returns to this call (to the next
+                                                  // finally in the chain)
                 }
-                step->SetTarget(callBlock); // the previous call to a finally returns to this call (to the next
-                                            // finally in the chain)
-                FlowEdge* const newEdge = fgAddRefPred(callBlock, step);
-                newEdge->setLikelihood(1.0);
 
                 // The new block will inherit this block's weight.
                 callBlock->inheritWeight(block);
                 callBlock->SetFlags(BBF_IMPORTED);
 
+                // callBlock calls the finally handler
+                FlowEdge* const newEdge = fgAddRefPred(HBtab->ebdHndBeg, callBlock);
+                callBlock->SetKindAndTargetEdge(BBJ_CALLFINALLY, newEdge);
+
 #ifdef DEBUG
                 if (verbose)
                 {
@@ -4868,6 +4887,9 @@ void Compiler::impImportLeave(BasicBlock* block)
 #endif
             }
 
+            // callBlock should be set up at this point
+            assert(callBlock->TargetIs(HBtab->ebdHndBeg));
+
             // Note: we don't know the jump target yet
             step     = fgNewBBafter(BBJ_CALLFINALLYRET, callBlock, true);
             stepType = ST_FinallyReturn;
@@ -4885,11 +4907,6 @@ void Compiler::impImportLeave(BasicBlock* block)
                        XTnum, step->bbNum);
             }
 #endif
-
-            assert(callBlock->KindIs(BBJ_CALLFINALLY));
-            assert(callBlock->TargetIs(HBtab->ebdHndBeg));
-            FlowEdge* const newEdge = fgAddRefPred(callBlock->GetTarget(), callBlock);
-            newEdge->setLikelihood(1.0);
         }
         else if (HBtab->HasCatchHandler() && jitIsBetween(blkAddr, tryBeg, tryEnd) &&
                  !jitIsBetween(jmpAddr, tryBeg, tryEnd))
@@ -4953,11 +4970,13 @@ void Compiler::impImportLeave(BasicBlock* block)
 
                 if (step == block)
                 {
-                    fgRemoveRefPred(step->GetTarget(), step);
+                    fgRedirectTargetEdge(step, catchStep);
+                }
+                else
+                {
+                    FlowEdge* const newEdge = fgAddRefPred(catchStep, step);
+                    step->SetTargetEdge(newEdge);
                 }
-                step->SetTarget(catchStep);
-                FlowEdge* const newEdge = fgAddRefPred(catchStep, step);
-                newEdge->setLikelihood(1.0);
 
                 // The new block will inherit this block's weight.
                 catchStep->inheritWeight(block);
@@ -5006,13 +5025,16 @@ void Compiler::impImportLeave(BasicBlock* block)
     {
         assert((step == block) || !step->HasInitializedTarget());
 
+        // leaveTarget is the ultimate destination of the LEAVE
         if (step == block)
         {
-            fgRemoveRefPred(step->GetTarget(), step);
+            fgRedirectTargetEdge(step, leaveTarget);
+        }
+        else
+        {
+            FlowEdge* const newEdge = fgAddRefPred(leaveTarget, step);
+            step->SetTargetEdge(newEdge);
         }
-        step->SetTarget(leaveTarget); // this is the ultimate destination of the LEAVE
-        FlowEdge* const newEdge = fgAddRefPred(leaveTarget, step);
-        newEdge->setLikelihood(1.0);
 
 #ifdef DEBUG
         if (verbose)
@@ -5038,15 +5060,12 @@ void Compiler::impImportLeave(BasicBlock* block)
 #endif // DEBUG
 }
 
-#endif // FEATURE_EH_FUNCLETS
-
 /*****************************************************************************/
 // This is called when reimporting a leave block. It resets the JumpKind,
 // JumpDest, and bbNext to the original values
 
 void Compiler::impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr)
 {
-#if defined(FEATURE_EH_FUNCLETS)
     // With EH Funclets, while importing leave opcode we create another block ending with BBJ_ALWAYS (call it B1)
     // and the block containing leave (say B0) is marked as BBJ_CALLFINALLY.   Say for some reason we reimport B0,
     // it is reset (in this routine) by marking as ending with BBJ_LEAVE and further down when B0 is reimported, we
@@ -5069,12 +5088,12 @@ void Compiler::impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr)
     // work around this we will duplicate B0 (call it B0Dup) before resetting. B0Dup is marked as BBJ_CALLFINALLY and
     // only serves to pair up with B1 (BBJ_ALWAYS) that got orphaned. Now during orphan block deletion B0Dup and B1
     // will be treated as pair and handled correctly.
-    if (block->KindIs(BBJ_CALLFINALLY))
+    if (UsesFunclets() && block->KindIs(BBJ_CALLFINALLY))
     {
-        BasicBlock* dupBlock = BasicBlock::New(this, BBJ_CALLFINALLY, block->GetTarget());
+        BasicBlock* dupBlock = BasicBlock::New(this);
         dupBlock->CopyFlags(block);
-        FlowEdge* const newEdge = fgAddRefPred(dupBlock->GetTarget(), dupBlock);
-        newEdge->setLikelihood(1.0);
+        FlowEdge* const newEdge = fgAddRefPred(block->GetTarget(), dupBlock);
+        dupBlock->SetKindAndTargetEdge(BBJ_CALLFINALLY, newEdge);
         dupBlock->copyEHRegion(block);
         dupBlock->bbCatchTyp = block->bbCatchTyp;
 
@@ -5099,14 +5118,11 @@ void Compiler::impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr)
         }
 #endif
     }
-#endif // FEATURE_EH_FUNCLETS
 
     fgInitBBLookup();
 
-    fgRemoveRefPred(block->GetTarget(), block);
-    block->SetKindAndTarget(BBJ_LEAVE, fgLookupBB(jmpAddr));
-    FlowEdge* const newEdge = fgAddRefPred(block->GetTarget(), block);
-    newEdge->setLikelihood(1.0);
+    fgRedirectTargetEdge(block, fgLookupBB(jmpAddr));
+    block->SetKind(BBJ_LEAVE);
 
     // We will leave the BBJ_ALWAYS block we introduced. When it's reimported
     // the BBJ_ALWAYS block will be unreachable, and will be removed after. The
@@ -5227,7 +5243,6 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr
             // <BUGNUM> VSW 318822 </BUGNUM>
             //
             // So here we decide to make the resulting type to be a native int.
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
             // Insert an explicit upcast if needed.
             op1 = *pOp1 = impImplicitIorI4Cast(op1, TYP_I_IMPL, fUnsigned);
@@ -5337,6 +5352,32 @@ GenTree* Compiler::impOptimizeCastClassOrIsInst(GenTree* op1, CORINFO_RESOLVED_T
         return nullptr;
     }
 
+    CORINFO_CLASS_HANDLE toClass = pResolvedToken->hClass;
+    if (info.compCompHnd->getExactClasses(toClass, 0, nullptr) == 0)
+    {
+        JITDUMP("\nClass %p (%s) can never be allocated\n", dspPtr(toClass), eeGetClassName(toClass));
+
+        if (!isCastClass)
+        {
+            JITDUMP("Cast will fail, optimizing to return null\n");
+
+            // If the cast was fed by a box, we can remove that too.
+            if (op1->IsBoxedValue())
+            {
+                JITDUMP("Also removing upstream box\n");
+                gtTryRemoveBoxUpstreamEffects(op1);
+            }
+
+            if (gtTreeHasSideEffects(op1, GTF_SIDE_EFFECT))
+            {
+                impAppendTree(op1, CHECK_SPILL_ALL, impCurStmtDI);
+            }
+            return gtNewNull();
+        }
+
+        JITDUMP("Cast will always throw, but not optimizing yet\n");
+    }
+
     // See what we know about the type of the object being cast.
     bool                 isExact   = false;
     bool                 isNonNull = false;
@@ -5344,7 +5385,6 @@ GenTree* Compiler::impOptimizeCastClassOrIsInst(GenTree* op1, CORINFO_RESOLVED_T
 
     if (fromClass != nullptr)
     {
-        CORINFO_CLASS_HANDLE toClass = pResolvedToken->hClass;
         JITDUMP("\nConsidering optimization of %s from %s%p (%s) to %p (%s)\n", isCastClass ? "castclass" : "isinst",
                 isExact ? "exact " : "", dspPtr(fromClass), eeGetClassName(fromClass), dspPtr(toClass),
                 eeGetClassName(toClass));
@@ -5422,15 +5462,14 @@ GenTree* Compiler::impOptimizeCastClassOrIsInst(GenTree* op1, CORINFO_RESOLVED_T
 //
 // Notes:
 //   May expand into a series of runtime checks or a helper call.
-
+//
 GenTree* Compiler::impCastClassOrIsInstToTree(
     GenTree* op1, GenTree* op2, CORINFO_RESOLVED_TOKEN* pResolvedToken, bool isCastClass, IL_OFFSET ilOffset)
 {
     assert(op1->TypeGet() == TYP_REF);
 
     // Optimistically assume the jit should expand this as an inline test
-    bool shouldExpandInline = true;
-    bool isClassExact       = info.compCompHnd->isExactType(pResolvedToken->hClass);
+    bool isClassExact = info.compCompHnd->isExactType(pResolvedToken->hClass);
 
     // ECMA-335 III.4.3:  If typeTok is a nullable type, Nullable<T>, it is interpreted as "boxed" T
     // We can convert constant-ish tokens of nullable to its underlying type.
@@ -5439,7 +5478,6 @@ GenTree* Compiler::impCastClassOrIsInstToTree(
     if (isClassExact && !(info.compCompHnd->getClassAttribs(pResolvedToken->hClass) & CORINFO_FLG_SHAREDINST))
     {
         CORINFO_CLASS_HANDLE hClass = info.compCompHnd->getTypeForBox(pResolvedToken->hClass);
-
         if (hClass != pResolvedToken->hClass)
         {
             bool runtimeLookup;
@@ -5449,88 +5487,38 @@ GenTree* Compiler::impCastClassOrIsInstToTree(
         }
     }
 
-    // Profitability check.
-    //
-    // Don't bother with inline expansion when jit is trying to generate code quickly
-    if (opts.OptimizationDisabled())
-    {
-        // not worth the code expansion if jitting fast or in a rarely run block
-        shouldExpandInline = false;
-    }
-    else if ((op1->gtFlags & GTF_GLOB_EFFECT) && lvaHaveManyLocals())
-    {
-        // not worth creating an untracked local variable
-        shouldExpandInline = false;
-    }
-    else if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) && (JitConfig.JitProfileCasts() == 1))
-    {
-        // Optimizations are enabled but we're still instrumenting (including casts)
-        if (isCastClass && !isClassExact)
-        {
-            // Usually, we make a speculative assumption that it makes sense to expand castclass
-            // even for non-sealed classes, but let's rely on PGO in this specific case
-            shouldExpandInline = false;
-        }
-    }
-
-    if (shouldExpandInline && compCurBB->isRunRarely())
-    {
-        // For cold blocks we only expand castclass against exact classes because it's cheap
-        shouldExpandInline = isCastClass && isClassExact;
-    }
-
-    // Pessimistically assume the jit cannot expand this as an inline test
-    bool                  canExpandInline = false;
-    bool                  reversedMTCheck = false;
-    const CorInfoHelpFunc helper          = info.compCompHnd->getCastingHelper(pResolvedToken, isCastClass);
-
-    CORINFO_CLASS_HANDLE exactCls = NO_CLASS_HANDLE;
-
-    // By default, we assume it's 50/50 with the slow path.
-    unsigned fastPathLikelihood = 50;
+    const CorInfoHelpFunc helper = info.compCompHnd->getCastingHelper(pResolvedToken, isCastClass);
 
-    // Legality check.
-    //
-    // Not all classclass/isinst operations can be inline expanded.
-    // Check legality only if an inline expansion is desirable.
-    if (shouldExpandInline)
+    bool       shouldExpandEarly = false;
+    const bool tooManyLocals     = (((op1->gtFlags & GTF_GLOB_EFFECT) != 0) && lvaHaveManyLocals());
+    if (isClassExact && opts.OptimizationEnabled() && !compCurBB->isRunRarely() && !tooManyLocals)
     {
-        if (isCastClass)
+        // TODO-InlineCast: Fix size regressions for these two cases if they're moved to the
+        // late cast expansion path and remove this early expansion entirely.
+        if (helper == CORINFO_HELP_ISINSTANCEOFCLASS)
         {
-            // Jit can only inline expand CHKCASTCLASS and CHKCASTARRAY helpers.
-            canExpandInline = (helper == CORINFO_HELP_CHKCASTCLASS) || (helper == CORINFO_HELP_CHKCASTARRAY);
-
-            // For ChkCastAny we ignore cases where the class is known to be abstract or is an interface.
-            if (helper == CORINFO_HELP_CHKCASTANY)
-            {
-                const bool isAbstract = (info.compCompHnd->getClassAttribs(pResolvedToken->hClass) &
-                                         (CORINFO_FLG_INTERFACE | CORINFO_FLG_ABSTRACT)) != 0;
-                canExpandInline = !isAbstract;
-            }
+            shouldExpandEarly = true;
         }
-        else if ((helper == CORINFO_HELP_ISINSTANCEOFCLASS) || (helper == CORINFO_HELP_ISINSTANCEOFARRAY))
+        else if (helper == CORINFO_HELP_ISINSTANCEOFARRAY && !op2->IsIconHandle(GTF_ICON_CLASS_HDL))
         {
-            // If the class is exact, the jit can expand the IsInst check inline.
-            canExpandInline = isClassExact;
+            shouldExpandEarly = true;
         }
     }
 
-    const bool expandInline = canExpandInline && shouldExpandInline;
-
-    if (!expandInline)
+    if (!shouldExpandEarly)
     {
-        JITDUMP("\nExpanding %s as call because %s\n", isCastClass ? "castclass" : "isinst",
-                canExpandInline ? "want smaller code or faster jitting" : "inline expansion not legal");
+        JITDUMP("\nImporting %s as call\n", isCastClass ? "castclass" : "isinst");
 
         // If we CSE this class handle we prevent assertionProp from making SubType assertions
         // so instead we force the CSE logic to not consider CSE-ing this class handle.
         //
         op2->gtFlags |= GTF_DONT_CSE;
-
-        GenTreeCall* call = gtNewHelperCallNode(helper, TYP_REF, op2, op1);
+        GenTreeCall* call          = gtNewHelperCallNode(helper, TYP_REF, op2, op1);
+        call->gtCastHelperILOffset = ilOffset;
 
         // Instrument this castclass/isinst
-        if ((JitConfig.JitClassProfiling() > 0) && impIsCastHelperEligibleForClassProbe(call) && !isClassExact)
+        if ((JitConfig.JitClassProfiling() > 0) && impIsCastHelperEligibleForClassProbe(call) && !isClassExact &&
+            !compCurBB->isRunRarely())
         {
             // It doesn't make sense to instrument "x is T" or "(T)x" for shared T
             if ((info.compCompHnd->getClassAttribs(pResolvedToken->hClass) & CORINFO_FLG_SHAREDINST) == 0)
@@ -5552,129 +5540,41 @@ GenTree* Compiler::impCastClassOrIsInstToTree(
         return call;
     }
 
-    JITDUMP("\nExpanding %s inline\n", isCastClass ? "castclass" : "isinst");
-
-    impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark2"));
-
-    GenTree* temp;
-    GenTree* condMT;
-    //
-    // expand the methodtable match:
-    //
-    //  condMT ==>   GT_NE
-    //               /    \.
-    //           GT_IND   op2 (typically CNS_INT)
-    //              |
-    //           op1Copy
-    //
-
-    // This can replace op1 with a GT_COMMA that evaluates op1 into a local
-    //
-    op1 = impCloneExpr(op1, &temp, CHECK_SPILL_ALL, nullptr DEBUGARG("CASTCLASS eval op1"));
-    //
-    // op1 is now known to be a non-complex tree
-    // thus we can use gtClone(op1) from now on
-    //
-
-    GenTree* op2Var = op2;
-    if (isCastClass && (exactCls == NO_CLASS_HANDLE))
-    {
-        // if exactCls is not null we won't have to clone op2 (it will be used only for the fallback)
-        op2Var                                                  = fgInsertCommaFormTemp(&op2);
-        lvaTable[op2Var->AsLclVarCommon()->GetLclNum()].lvIsCSE = true;
-    }
-    temp = gtNewMethodTableLookup(temp);
-    condMT =
-        gtNewOperNode(GT_NE, TYP_INT, temp, (exactCls != NO_CLASS_HANDLE) ? gtNewIconEmbClsHndNode(exactCls) : op2);
-
-    GenTree* condNull;
-    //
-    // expand the null check:
-    //
-    //  condNull ==>   GT_EQ
-    //                 /    \.
-    //             op1Copy CNS_INT
-    //                      null
-    //
-    condNull = gtNewOperNode(GT_EQ, TYP_INT, gtClone(op1), gtNewNull());
-
-    //
-    // expand the true and false trees for the condMT
-    //
-    GenTree* condFalse = reversedMTCheck ? gtNewNull() : gtClone(op1);
-    GenTree* condTrue;
-    if (isCastClass)
-    {
-        assert((helper == CORINFO_HELP_CHKCASTCLASS) || (helper == CORINFO_HELP_CHKCASTARRAY) ||
-               (helper == CORINFO_HELP_CHKCASTANY) || (helper == CORINFO_HELP_CHKCASTINTERFACE));
+    JITDUMP("\nExpanding isinst inline\n");
 
-        CorInfoHelpFunc specialHelper = helper;
-        if ((helper == CORINFO_HELP_CHKCASTCLASS) &&
-            ((exactCls == nullptr) || (exactCls == gtGetHelperArgClassHandle(op2))))
-        {
-            // use the special helper that skips the cases checked by our inlined cast
-            specialHelper = CORINFO_HELP_CHKCASTCLASS_SPECIAL;
-        }
-        condTrue = gtNewHelperCallNode(specialHelper, TYP_REF, op2Var, gtClone(op1));
-    }
-    else
-    {
-        condTrue = gtNewIconNode(0, TYP_REF);
-    }
+    impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling "));
 
-    GenTreeQmark* qmarkMT;
+    // Now we import it as two QMark nodes representing this:
     //
-    // Generate first QMARK - COLON tree
+    //  tmp = op1;
+    //  if (tmp != null) // qmarkNull
+    //  {
+    //      if (tmp->pMT == op2) // qmarkMT
+    //          result = tmp;
+    //      else
+    //          result = null;
+    //  }
+    //  else
+    //      result = null;
     //
-    //  qmarkMT ==>   GT_QMARK
-    //                 /     \.
-    //            condMT   GT_COLON
-    //                      /     \.
-    //                condFalse  condTrue
-    //
-    temp    = new (this, GT_COLON) GenTreeColon(TYP_REF, condTrue, condFalse);
-    qmarkMT = gtNewQmarkNode(TYP_REF, condMT, temp->AsColon());
-    qmarkMT->SetThenNodeLikelihood(fastPathLikelihood);
 
-    if (isCastClass && isClassExact && condTrue->OperIs(GT_CALL))
-    {
-        if (helper == CORINFO_HELP_CHKCASTCLASS)
-        {
-            // condTrue is used only for throwing InvalidCastException in case of casting to an exact class.
-            condTrue->AsCall()->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN;
-
-            // defer calling setMethodHasNoReturnCalls until qmark expasion
-        }
-    }
+    // Spill op1 if it's a complex expression
+    GenTree* op1Clone;
+    op1 = impCloneExpr(op1, &op1Clone, CHECK_SPILL_ALL, nullptr DEBUGARG("ISINST eval op1"));
 
-    GenTree* qmarkNull;
-    //
-    // Generate second QMARK - COLON tree
-    //
-    //  qmarkNull ==>  GT_QMARK
-    //                 /     \.
-    //           condNull  GT_COLON
-    //                      /     \.
-    //                qmarkMT   op1Copy
-    //
-    temp      = new (this, GT_COLON) GenTreeColon(TYP_REF, reversedMTCheck ? gtNewNull() : gtClone(op1), qmarkMT);
-    qmarkNull = gtNewQmarkNode(TYP_REF, condNull, temp->AsColon());
-    qmarkNull->gtFlags |= GTF_QMARK_CAST_INSTOF;
+    GenTreeOp*    condMT    = gtNewOperNode(GT_NE, TYP_INT, gtNewMethodTableLookup(op1Clone), op2);
+    GenTreeOp*    condNull  = gtNewOperNode(GT_EQ, TYP_INT, gtClone(op1), gtNewNull());
+    GenTreeQmark* qmarkMT   = gtNewQmarkNode(TYP_REF, condMT, gtNewColonNode(TYP_REF, gtNewNull(), gtClone(op1)));
+    GenTreeQmark* qmarkNull = gtNewQmarkNode(TYP_REF, condNull, gtNewColonNode(TYP_REF, gtNewNull(), qmarkMT));
 
     // Make QMark node a top level node by spilling it.
-    unsigned tmp = lvaGrabTemp(true DEBUGARG("spilling QMark2"));
-    impStoreTemp(tmp, qmarkNull, CHECK_SPILL_NONE);
+    const unsigned result = lvaGrabTemp(true DEBUGARG("spilling qmarkNull"));
+    impStoreToTemp(result, qmarkNull, CHECK_SPILL_NONE);
 
-    // TODO-CQ: Is it possible op1 has a better type?
-    //
     // See also gtGetHelperCallClassHandle where we make the same
     // determination for the helper call variants.
-    LclVarDsc* lclDsc = lvaGetDesc(tmp);
-    assert(lclDsc->lvSingleDef == 0);
-    lclDsc->lvSingleDef = 1;
-    JITDUMP("Marked V%02u as a single def temp\n", tmp);
-    lvaSetClass(tmp, pResolvedToken->hClass);
-    return gtNewLclvNode(tmp, TYP_REF);
+    lvaSetClass(result, pResolvedToken->hClass);
+    return gtNewLclvNode(result, TYP_REF);
 }
 
 #ifndef DEBUG
@@ -6007,7 +5907,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 
             // Change block to BBJ_THROW so we won't trigger importation of successors.
             //
-            block->SetKindAndTarget(BBJ_THROW);
+            block->SetKindAndTargetEdge(BBJ_THROW);
 
             // If this method has a explicit generic context, the only uses of it may be in
             // the IL for this block. So assume it's used.
@@ -6277,7 +6177,8 @@ void Compiler::impImportBlockCode(BasicBlock* block)
             bool                 ovfl, unordered, callNode;
             CORINFO_CLASS_HANDLE tokenType;
 
-            union {
+            union
+            {
                 int     intVal;
                 float   fltVal;
                 __int64 lngVal;
@@ -6414,7 +6315,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 
                 if (compIsForInlining())
                 {
-                    op1 = impInlineFetchArg(lclNum, impInlineInfo->inlArgInfo, impInlineInfo->lclVarInfo);
+                    op1 = impInlineFetchArg(impInlineInfo->inlArgInfo[lclNum], impInlineInfo->lclVarInfo[lclNum]);
                     noway_assert(op1->gtOper == GT_LCL_VAR);
                     lclNum = op1->AsLclVar()->GetLclNum();
 
@@ -6477,7 +6378,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 
             VAR_ST_VALID:
 
-                /* if it is a struct assignment, make certain we don't overflow the buffer */
+                /* if it is a struct store, make certain we don't overflow the buffer */
                 assert(lclTyp != TYP_STRUCT || lvaLclSize(lclNum) >= info.compCompHnd->getClassSize(clsHnd));
 
                 if (lvaTable[lclNum].lvNormalizeOnLoad())
@@ -6539,7 +6440,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     }
                 }
 
-                /* Filter out simple assignments to itself */
+                /* Filter out simple stores to itself */
 
                 if (op1->gtOper == GT_LCL_VAR && lclNum == op1->AsLclVarCommon()->GetLclNum())
                 {
@@ -6619,7 +6520,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     // In IL, LDARGA(_S) is used to load the byref managed pointer of struct argument,
                     // followed by a ldfld to load the field.
 
-                    op1 = impInlineFetchArg(lclNum, impInlineInfo->inlArgInfo, impInlineInfo->lclVarInfo);
+                    op1 = impInlineFetchArg(impInlineInfo->inlArgInfo[lclNum], impInlineInfo->lclVarInfo[lclNum]);
                     if (op1->gtOper != GT_LCL_VAR)
                     {
                         compInlineResult->NoteFatal(InlineObservation::CALLSITE_LDARGA_NOT_LOCAL_VAR);
@@ -6955,7 +6856,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     }
                 }
 
-                // Else call a helper function to do the assignment
+                // Else call a helper function to do the store
                 impPopStack(3);
 
                 // The CLI Spec allows an array to be indexed by either an int32 or a native int.
@@ -7024,7 +6925,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 // Create the store node and append it.
                 ClassLayout* layout = (lclTyp == TYP_STRUCT) ? typGetObjLayout(stelemClsHnd) : nullptr;
                 op1                 = (lclTyp == TYP_STRUCT) ? gtNewStoreBlkNode(layout, op1, op2)->AsIndir()
-                                             : gtNewStoreIndNode(lclTyp, op1, op2);
+                                                             : gtNewStoreIndNode(lclTyp, op1, op2);
                 if (varTypeIsStruct(op1))
                 {
                     op1 = impStoreStruct(op1, CHECK_SPILL_ALL);
@@ -7082,7 +6983,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 oper = GT_MUL;
                 goto MATH_MAYBE_CALL_OVF;
 
-            // Other binary math operations
+                // Other binary math operations
 
             case CEE_DIV:
                 oper = GT_DIV;
@@ -7323,16 +7224,12 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     // We may have already modified `block`'s jump kind, if this is a re-importation.
                     //
                     bool jumpToNextOptimization = false;
-                    if (block->KindIs(BBJ_COND) && block->TrueTargetIs(block->GetFalseTarget()))
+                    if (block->KindIs(BBJ_COND) && block->TrueEdgeIs(block->GetFalseEdge()))
                     {
                         JITDUMP(FMT_BB " always branches to " FMT_BB ", changing to BBJ_ALWAYS\n", block->bbNum,
                                 block->GetFalseTarget()->bbNum);
-                        fgRemoveRefPred(block->GetFalseTarget(), block);
-                        block->SetKind(BBJ_ALWAYS);
-
-                        // TODO-NoFallThrough: Once bbFalseTarget can diverge from bbNext, it may not make sense to
-                        // set BBF_NONE_QUIRK
-                        block->SetFlags(BBF_NONE_QUIRK);
+                        fgRemoveRefPred(block->GetFalseEdge());
+                        block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetTrueEdge());
 
                         jumpToNextOptimization = true;
                     }
@@ -7375,7 +7272,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     op1  = gtNewOperNode(oper, TYP_INT, op1, op2);
                 }
 
-            // fall through
+                // fall through
 
             COND_JUMP:
 
@@ -7402,20 +7299,15 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         {
                             JITDUMP("\nThe conditional jump becomes an unconditional jump to " FMT_BB "\n",
                                     block->GetTrueTarget()->bbNum);
-                            fgRemoveRefPred(block->GetFalseTarget(), block);
-                            block->SetKind(BBJ_ALWAYS);
+                            fgRemoveRefPred(block->GetFalseEdge());
+                            block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetTrueEdge());
                         }
                         else
                         {
-                            // TODO-NoFallThrough: Update once bbFalseTarget can diverge from bbNext
                             assert(block->NextIs(block->GetFalseTarget()));
                             JITDUMP("\nThe block jumps to the next " FMT_BB "\n", block->Next()->bbNum);
-                            fgRemoveRefPred(block->GetTrueTarget(), block);
-                            block->SetKindAndTarget(BBJ_ALWAYS, block->Next());
-
-                            // TODO-NoFallThrough: Once bbFalseTarget can diverge from bbNext, it may not make sense
-                            // to set BBF_NONE_QUIRK
-                            block->SetFlags(BBF_NONE_QUIRK);
+                            fgRemoveRefPred(block->GetTrueEdge());
+                            block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetFalseEdge());
                         }
                     }
 
@@ -7585,16 +7477,12 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     // We may have already modified `block`'s jump kind, if this is a re-importation.
                     //
                     bool jumpToNextOptimization = false;
-                    if (block->KindIs(BBJ_COND) && block->TrueTargetIs(block->GetFalseTarget()))
+                    if (block->KindIs(BBJ_COND) && block->TrueEdgeIs(block->GetFalseEdge()))
                     {
                         JITDUMP(FMT_BB " always branches to " FMT_BB ", changing to BBJ_ALWAYS\n", block->bbNum,
                                 block->GetFalseTarget()->bbNum);
-                        fgRemoveRefPred(block->GetFalseTarget(), block);
-                        block->SetKind(BBJ_ALWAYS);
-
-                        // TODO-NoFallThrough: Once bbFalseTarget can diverge from bbNext, it may not make sense to
-                        // set BBF_NONE_QUIRK
-                        block->SetFlags(BBF_NONE_QUIRK);
+                        fgRemoveRefPred(block->GetFalseEdge());
+                        block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetTrueEdge());
 
                         jumpToNextOptimization = true;
                     }
@@ -7660,16 +7548,16 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 if (opts.OptimizationEnabled() && (op1->gtOper == GT_CNS_INT))
                 {
                     // Find the jump target
-                    size_t       switchVal = (size_t)op1->AsIntCon()->gtIconVal;
-                    unsigned     jumpCnt   = block->GetSwitchTargets()->bbsCount;
-                    BasicBlock** jumpTab   = block->GetSwitchTargets()->bbsDstTab;
-                    bool         foundVal  = false;
+                    size_t     switchVal = (size_t)op1->AsIntCon()->gtIconVal;
+                    unsigned   jumpCnt   = block->GetSwitchTargets()->bbsCount;
+                    FlowEdge** jumpTab   = block->GetSwitchTargets()->bbsDstTab;
+                    bool       foundVal  = false;
 
                     for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
                     {
-                        BasicBlock* curJump = *jumpTab;
+                        FlowEdge* curEdge = *jumpTab;
 
-                        assert(curJump->countOfInEdges() > 0);
+                        assert(curEdge->getDestinationBlock()->countOfInEdges() > 0);
 
                         // If val matches switchVal or we are at the last entry and
                         // we never found the switch value then set the new jump dest
@@ -7677,22 +7565,17 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
                         {
                             // transform the basic block into a BBJ_ALWAYS
-                            block->SetKindAndTarget(BBJ_ALWAYS, curJump);
+                            block->SetKindAndTargetEdge(BBJ_ALWAYS, curEdge);
                             foundVal = true;
                         }
                         else
                         {
-                            // Remove 'block' from the predecessor list of 'curJump'
-                            fgRemoveRefPred(curJump, block);
+                            // Remove 'curEdge'
+                            fgRemoveRefPred(curEdge);
                         }
                     }
 
                     assert(foundVal);
-                    if (block->JumpsToNext())
-                    {
-                        block->SetFlags(BBF_NONE_QUIRK);
-                    }
-
 #ifdef DEBUG
                     if (verbose)
                     {
@@ -7718,7 +7601,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 
                 goto SPILL_APPEND;
 
-            /************************** Casting OPCODES ***************************/
+                /************************** Casting OPCODES ***************************/
 
             case CEE_CONV_OVF_I1:
                 lclTyp = TYP_BYTE;
@@ -7860,12 +7743,13 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 
                 if (varTypeIsFloating(lclTyp))
                 {
-                    callNode = varTypeIsLong(impStackTop().val) || uns // uint->dbl gets turned into uint->long->dbl
+                    callNode = varTypeIsLong(impStackTop().val) ||
+                               uns // uint->dbl gets turned into uint->long->dbl
 #ifdef TARGET_64BIT
-                               // TODO-ARM64-Bug?: This was AMD64; I enabled it for ARM64 also. OK?
-                               // TYP_BYREF could be used as TYP_I_IMPL which is long.
-                               // TODO-CQ: remove this when we lower casts long/ulong --> float/double
-                               // and generate SSE2 code instead of going through helper calls.
+                                   // TODO-ARM64-Bug?: This was AMD64; I enabled it for ARM64 also. OK?
+                                   // TYP_BYREF could be used as TYP_I_IMPL which is long.
+                                   // TODO-CQ: remove this when we lower casts long/ulong --> float/double
+                                   // and generate SSE2 code instead of going through helper calls.
                                || (impStackTop().val->TypeGet() == TYP_BYREF)
 #endif
                         ;
@@ -8001,31 +7885,19 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     {
                         JITDUMP("\n ... CEE_POP struct ...\n");
                         DISPTREE(op1);
-#ifdef UNIX_AMD64_ABI
-                        // Non-calls, such as obj or ret_expr, have to go through this.
-                        // Calls with large struct return value have to go through this.
-                        // Helper calls with small struct return value also have to go
-                        // through this since they do not follow Unix calling convention.
-                        if (!op1->IsCall() ||
-                            !IsMultiRegReturnedType(op1->AsCall()->gtRetClsHnd,
-                                                    op1->AsCall()->GetUnmanagedCallConv()) ||
-                            op1->IsHelperCall())
-#endif // UNIX_AMD64_ABI
+                        // If the value being produced comes from loading
+                        // via an underlying address, just null check the address.
+                        if (op1->OperIs(GT_IND, GT_BLK))
                         {
-                            // If the value being produced comes from loading
-                            // via an underlying address, just null check the address.
-                            if (op1->OperIs(GT_IND, GT_BLK))
-                            {
-                                gtChangeOperToNullCheck(op1, block);
-                            }
-                            else
-                            {
-                                op1 = impGetNodeAddr(op1, CHECK_SPILL_ALL, nullptr);
-                            }
-
-                            JITDUMP("\n ... optimized to ...\n");
-                            DISPTREE(op1);
+                            gtChangeOperToNullCheck(op1, block);
+                        }
+                        else
+                        {
+                            op1 = impGetNodeAddr(op1, CHECK_SPILL_ALL, nullptr);
                         }
+
+                        JITDUMP("\n ... optimized to ...\n");
+                        DISPTREE(op1);
                     }
 
                     // If op1 is non-overflow cast, throw it away since it is useless.
@@ -8097,7 +7969,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     else
                     {
                         const unsigned tmpNum = lvaGrabTemp(true DEBUGARG("dup spill"));
-                        impStoreTemp(tmpNum, op1, CHECK_SPILL_ALL);
+                        impStoreToTemp(tmpNum, op1, CHECK_SPILL_ALL);
                         var_types type = genActualType(lvaTable[tmpNum].TypeGet());
 
                         assert(lvaTable[tmpNum].lvSingleDef == 0);
@@ -8583,7 +8455,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                             GenTree* newObjInit =
                                 gtNewZeroConNode((lclDsc->TypeGet() == TYP_STRUCT) ? TYP_INT : lclDsc->TypeGet());
 
-                            impStoreTemp(lclNum, newObjInit, CHECK_SPILL_NONE);
+                            impStoreToTemp(lclNum, newObjInit, CHECK_SPILL_NONE);
                         }
                         else
                         {
@@ -8615,7 +8487,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         }
 
                         const bool useParent = true;
-                        op1                  = gtNewAllocObjNode(&resolvedToken, useParent);
+                        op1                  = gtNewAllocObjNode(&resolvedToken, info.compMethodHnd, useParent);
                         if (op1 == nullptr)
                         {
                             return;
@@ -8625,16 +8497,16 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         block->SetFlags(BBF_HAS_NEWOBJ);
                         optMethodFlags |= OMF_HAS_NEWOBJ;
 
-                        // Append the assignment to the temp/local. Dont need to spill
-                        // at all as we are just calling an EE-Jit helper which can only
-                        // cause an (async) OutOfMemoryException.
+                        // Append the store to the temp/local. Dont need to spill at all as
+                        // we are just calling an EE-Jit helper which can only cause
+                        // an (async) OutOfMemoryException.
 
                         // We assign the newly allocated object (by a GT_ALLOCOBJ node)
                         // to a temp. Note that the pattern "temp = allocObj" is required
                         // by ObjectAllocator phase to be able to determine GT_ALLOCOBJ nodes
                         // without exhaustive walk over all expressions.
 
-                        impStoreTemp(lclNum, op1, CHECK_SPILL_NONE);
+                        impStoreToTemp(lclNum, op1, CHECK_SPILL_NONE);
 
                         assert(lvaTable[lclNum].lvSingleDef == 0);
                         lvaTable[lclNum].lvSingleDef = 1;
@@ -9057,7 +8929,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 #if BIGENDIAN
                         op1 = gtNewIconNode(0, lclTyp);
 #else
-                        op1                     = gtNewIconNode(1, lclTyp);
+                        op1 = gtNewIconNode(1, lclTyp);
 #endif
                         goto FIELD_DONE;
                     }
@@ -9072,7 +8944,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     ClassLayout* layout;
                     lclTyp = TypeHandleToVarType(fieldInfo.fieldType, clsHnd, &layout);
                     op1    = (lclTyp == TYP_STRUCT) ? gtNewBlkIndir(layout, op1, indirFlags)
-                                                 : gtNewIndir(lclTyp, op1, indirFlags);
+                                                    : gtNewIndir(lclTyp, op1, indirFlags);
                     if ((indirFlags & GTF_IND_INVARIANT) != 0)
                     {
                         // TODO-ASG: delete this zero-diff quirk.
@@ -9299,20 +9171,19 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         assert(!"Unexpected fieldAccessor");
                 }
 
-                /* V4.0 allows assignment of i4 constant values to i8 type vars when IL verifier is bypassed (full
-                trust apps). The reason this works is that JIT stores an i4 constant in GenTree union during
-                importation and reads from the union as if it were a long during code generation. Though this
-                can potentially read garbage, one can get lucky to have this working correctly.
+                    /* V4.0 allows stores of i4 constant values to i8 type vars when IL verifier is bypassed (full
+                    trust apps). The reason this works is that JIT stores an i4 constant in GenTree union during
+                    importation and reads from the union as if it were a long during code generation. Though this
+                    can potentially read garbage, one can get lucky to have this working correctly.
 
-                This code pattern is generated by Dev10 MC++ compiler while storing to fields when compiled with
-                /O2 switch (default when compiling retail configs in Dev10) and a customer app has taken a
-                dependency on it. To be backward compatible, we will explicitly add an upward cast here so that
-                it works correctly always.
+                    This code pattern is generated by Dev10 MC++ compiler while storing to fields when compiled with
+                    /O2 switch (default when compiling retail configs in Dev10) and a customer app has taken a
+                    dependency on it. To be backward compatible, we will explicitly add an upward cast here so that
+                    it works correctly always.
 
-                Note that this is limited to x86 alone as there is no back compat to be addressed for Arm JIT
-                for V4.0.
-                */
-                CLANG_FORMAT_COMMENT_ANCHOR;
+                    Note that this is limited to x86 alone as there is no back compat to be addressed for Arm JIT
+                    for V4.0.
+                    */
 
 #ifndef TARGET_64BIT
                 // In UWP6.0 and beyond (post-.NET Core 2.0), we decided to let this cast from int to long be
@@ -9416,7 +9287,8 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                             CORINFO_FIELD_INFO fi;
                             eeGetFieldInfo(&fldToken, CORINFO_ACCESS_SET, &fi);
                             unsigned flagsToCheck = CORINFO_FLG_FIELD_STATIC | CORINFO_FLG_FIELD_FINAL;
-                            if ((fi.fieldFlags & flagsToCheck) == flagsToCheck)
+                            if (((fi.fieldFlags & flagsToCheck) == flagsToCheck) &&
+                                ((info.compCompHnd->getClassAttribs(info.compClassHnd) & CORINFO_FLG_SHAREDINST) == 0))
                             {
 #ifdef FEATURE_READYTORUN
                                 if (opts.IsReadyToRun())
@@ -9544,7 +9416,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                                 {
                                     // Explicitly zero out the local if we're inlining a method with InitLocals into a
                                     // method without InitLocals.
-                                    impStoreTemp(stackallocAsLocal, gtNewIconNode(0), CHECK_SPILL_ALL);
+                                    impStoreToTemp(stackallocAsLocal, gtNewIconNode(0), CHECK_SPILL_ALL);
                                 }
 
                                 if (!this->opts.compDbgEnC)
@@ -9703,31 +9575,14 @@ void Compiler::impImportBlockCode(BasicBlock* block)
             {
                 op1 = impPopStack().val;
 
-                if (op1->OperIs(GT_MKREFANY))
-                {
-                    // The pointer may have side-effects
-                    if (op1->AsOp()->gtOp1->gtFlags & GTF_SIDE_EFFECT)
-                    {
-                        impAppendTree(op1->AsOp()->gtOp1, CHECK_SPILL_ALL, impCurStmtDI);
-#ifdef DEBUG
-                        impNoteLastILoffs();
-#endif
-                    }
-
-                    // We already have the class handle
-                    op1 = op1->AsOp()->gtOp2;
-                }
-                else
-                {
-                    // Get the address of the refany
-                    GenTreeFlags indirFlags = GTF_EMPTY;
-                    op1                     = impGetNodeAddr(op1, CHECK_SPILL_ALL, &indirFlags);
+                // Get the address of the refany
+                GenTreeFlags indirFlags = GTF_EMPTY;
+                op1                     = impGetNodeAddr(op1, CHECK_SPILL_ALL, &indirFlags);
 
-                    // Fetch the type from the correct slot
-                    op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1,
-                                        gtNewIconNode(OFFSETOF__CORINFO_TypedReference__type, TYP_I_IMPL));
-                    op1 = gtNewIndir(TYP_BYREF, op1, indirFlags);
-                }
+                // Fetch the type from the correct slot
+                op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1,
+                                    gtNewIconNode(OFFSETOF__CORINFO_TypedReference__type, TYP_I_IMPL));
+                op1 = gtNewIndir(TYP_BYREF, op1, indirFlags);
 
                 // Convert native TypeHandle to RuntimeTypeHandle.
                 op1 = gtNewHelperCallNode(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPEHANDLE_MAYBENULL, TYP_STRUCT, op1);
@@ -9954,10 +9809,10 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     }
                 }
 
-                assert((helper == CORINFO_HELP_UNBOX && op1->gtType == TYP_BYREF) || // Unbox helper returns a byref.
-                       (helper == CORINFO_HELP_UNBOX_NULLABLE &&
-                        varTypeIsStruct(op1)) // UnboxNullable helper returns a struct.
-                       );
+                assert((helper == CORINFO_HELP_UNBOX && op1->gtType == TYP_BYREF) ||   // Unbox helper returns a byref.
+                       (helper == CORINFO_HELP_UNBOX_NULLABLE && varTypeIsStruct(op1)) // UnboxNullable helper returns a
+                                                                                       // struct.
+                );
 
                 /*
                   ----------------------------------------------------------------------
@@ -10221,7 +10076,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 // Pop the exception object and create the 'throw' helper call
                 op1 = gtNewHelperCallNode(CORINFO_HELP_THROW, TYP_VOID, impPopStack().val);
 
-            // Fall through to clear out the eval stack.
+                // Fall through to clear out the eval stack.
 
             EVAL_APPEND:
                 if (verCurrentState.esStackDepth > 0)
@@ -10275,9 +10130,19 @@ void Compiler::impImportBlockCode(BasicBlock* block)
             case CEE_CPBLK:
             {
                 GenTreeFlags indirFlags = impPrefixFlagsToIndirFlags(prefixFlags);
-                op3                     = impPopStack().val; // Size
-                op2                     = impPopStack().val; // Value / Src addr
-                op1                     = impPopStack().val; // Dst addr
+                const bool   isVolatile = (indirFlags & GTF_IND_VOLATILE) != 0;
+#ifndef TARGET_X86
+                if (isVolatile && !impStackTop(0).val->IsCnsIntOrI())
+                {
+                    // We're going to emit a helper call surrounded by memory barriers, so we need to spill any side
+                    // effects.
+                    impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("spilling side-effects"));
+                }
+#endif
+
+                op3 = gtFoldExpr(impPopStack().val); // Size
+                op2 = gtFoldExpr(impPopStack().val); // Value / Src addr
+                op1 = impPopStack().val;             // Dst addr
 
                 if (op3->IsCnsIntOrI())
                 {
@@ -10314,24 +10179,41 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 }
                 else
                 {
+                    if (TARGET_POINTER_SIZE == 8)
+                    {
+                        // Cast size to TYP_LONG on 64-bit targets
+                        op3 = gtNewCastNode(TYP_LONG, op3, /* fromUnsigned */ true, TYP_LONG);
+                    }
+
+                    GenTreeCall* call;
                     if (opcode == CEE_INITBLK)
                     {
-                        if (!op2->IsIntegralConst(0))
+                        // value is zero -> memzero, otherwise -> memset
+                        if (op2->IsIntegralConst(0))
                         {
-                            op2 = gtNewOperNode(GT_INIT_VAL, TYP_INT, op2);
+                            call = gtNewHelperCallNode(CORINFO_HELP_MEMZERO, TYP_VOID, op1, op3);
+                        }
+                        else
+                        {
+                            call = gtNewHelperCallNode(CORINFO_HELP_MEMSET, TYP_VOID, op1, op2, op3);
                         }
                     }
                     else
                     {
-                        op2 = gtNewIndir(TYP_STRUCT, op2);
+                        call = gtNewHelperCallNode(CORINFO_HELP_MEMCPY, TYP_VOID, op1, op2, op3);
                     }
 
-#ifdef TARGET_64BIT
-                    // STORE_DYN_BLK takes a native uint size as it turns into call to memcpy.
-                    op3 = gtNewCastNode(TYP_I_IMPL, op3, /* fromUnsigned */ true, TYP_I_IMPL);
-#endif
-
-                    op1 = gtNewStoreDynBlkNode(op1, op2, op3, indirFlags);
+                    if (isVolatile)
+                    {
+                        // Wrap with memory barriers: full-barrier + call + load-barrier
+                        impAppendTree(gtNewMemoryBarrier(), CHECK_SPILL_ALL, impCurStmtDI);
+                        impAppendTree(call, CHECK_SPILL_ALL, impCurStmtDI);
+                        op1 = gtNewMemoryBarrier(true);
+                    }
+                    else
+                    {
+                        op1 = call;
+                    }
                 }
                 goto SPILL_APPEND;
             }
@@ -10388,16 +10270,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
             }
 
             case CEE_MKREFANY:
-
+            {
                 assert(!compIsForInlining());
 
-                // Being lazy here. Refanys are tricky in terms of gc tracking.
-                // Since it is uncommon, just don't perform struct promotion in any method that contains mkrefany.
-
-                JITDUMP("disabling struct promotion because of mkrefany\n");
-                fgNoStructPromotion = true;
-
-                oper = GT_MKREFANY;
                 assertImp(sz == sizeof(unsigned));
 
                 _impResolveToken(CORINFO_TOKENKIND_Class);
@@ -10418,13 +10293,21 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 
                 // @SPECVIOLATION: TYP_INT should not be allowed here by a strict reading of the spec.
                 // But JIT32 allowed it, so we continue to allow it.
-                assertImp(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_INT);
+                assertImp(op1->TypeIs(TYP_BYREF, TYP_I_IMPL, TYP_INT));
+
+                unsigned refAnyLcl = lvaGrabTemp(false DEBUGARG("mkrefany temp"));
+                lvaSetStruct(refAnyLcl, impGetRefAnyClass(), false);
 
-                // MKREFANY returns a struct.  op2 is the class token.
-                op1 = gtNewOperNode(oper, TYP_STRUCT, op1, op2);
+                GenTree* storeData =
+                    gtNewStoreLclFldNode(refAnyLcl, op1->TypeGet(), OFFSETOF__CORINFO_TypedReference__dataPtr, op1);
+                GenTree* storeType =
+                    gtNewStoreLclFldNode(refAnyLcl, op2->TypeGet(), OFFSETOF__CORINFO_TypedReference__type, op2);
+                impAppendTree(storeData, CHECK_SPILL_ALL, impCurStmtDI);
+                impAppendTree(storeType, CHECK_SPILL_ALL, impCurStmtDI);
 
-                impPushOnStack(op1, verMakeTypeInfo(impGetRefAnyClass()));
+                impPushOnStack(gtNewLclVarNode(refAnyLcl, TYP_STRUCT), verMakeTypeInfo(impGetRefAnyClass()));
                 break;
+            }
 
             case CEE_LDOBJ:
             {
@@ -10480,7 +10363,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 }
                 break;
 
-            /******************************** NYI *******************************/
+                /******************************** NYI *******************************/
 
             case 0xCC:
                 OutputDebugStringA("CLR: Invalid x86 breakpoint in IL stream\n");
@@ -10570,7 +10453,8 @@ void Compiler::impLoadArg(unsigned ilArgNum, IL_OFFSET offset)
             tiRetVal = typeInfo(type);
         }
 
-        impPushOnStack(impInlineFetchArg(ilArgNum, impInlineInfo->inlArgInfo, impInlineInfo->lclVarInfo), tiRetVal);
+        impPushOnStack(impInlineFetchArg(impInlineInfo->inlArgInfo[ilArgNum], impInlineInfo->lclVarInfo[ilArgNum]),
+                       tiRetVal);
     }
     else
     {
@@ -10585,6 +10469,21 @@ void Compiler::impLoadArg(unsigned ilArgNum, IL_OFFSET offset)
         {
             lclNum = lvaArg0Var;
         }
+#ifdef SWIFT_SUPPORT
+        else if (lclNum == lvaSwiftErrorArg)
+        {
+            // Convert any usages of the SwiftError pointer/ref parameter to pointers/refs to the SwiftError pseudolocal
+            // (set side effect flags so usages of references to pseudolocal aren't removed)
+            assert(info.compCallConv == CorInfoCallConvExtension::Swift);
+            assert(lvaSwiftErrorArg != BAD_VAR_NUM);
+            assert(lvaSwiftErrorLocal != BAD_VAR_NUM);
+            const var_types type               = lvaGetDesc(lvaSwiftErrorArg)->TypeGet();
+            GenTree* const  swiftErrorLocalRef = gtNewLclVarAddrNode(lvaSwiftErrorLocal, type);
+            impPushOnStack(swiftErrorLocalRef, typeInfo(type));
+            JITDUMP("\nCreated GT_LCL_ADDR of SwiftError pseudolocal\n");
+            return;
+        }
+#endif // SWIFT_SUPPORT
 
         impLoadVar(lclNum, offset);
     }
@@ -10632,13 +10531,13 @@ void Compiler::impLoadLoc(unsigned ilLclNum, IL_OFFSET offset)
 // Returns:
 //     Tree with reference to struct local to use as call return value.
 
-GenTree* Compiler::impStoreMultiRegValueToVar(GenTree*             op,
+GenTree* Compiler::impStoreMultiRegValueToVar(GenTree*                    op,
                                               CORINFO_CLASS_HANDLE hClass DEBUGARG(CorInfoCallConvExtension callConv))
 {
     unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for multireg return"));
     lvaSetStruct(tmpNum, hClass, false);
 
-    impStoreTemp(tmpNum, op, CHECK_SPILL_ALL);
+    impStoreToTemp(tmpNum, op, CHECK_SPILL_ALL);
 
     LclVarDsc* varDsc = lvaGetDesc(tmpNum);
 
@@ -10838,7 +10737,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode)
                         }
                     }
 
-                    impStoreTemp(lvaInlineeReturnSpillTemp, op2, CHECK_SPILL_ALL);
+                    impStoreToTemp(lvaInlineeReturnSpillTemp, op2, CHECK_SPILL_ALL);
 
                     var_types lclRetType = lvaGetDesc(lvaInlineeReturnSpillTemp)->lvType;
                     GenTree*  tmpOp2     = gtNewLclvNode(lvaInlineeReturnSpillTemp, lclRetType);
@@ -10882,7 +10781,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode)
                     assert(info.compRetNativeType != TYP_VOID);
                     assert(fgMoreThanOneReturnBlock() || impInlineInfo->HasGcRefLocals());
 
-                    impStoreTemp(lvaInlineeReturnSpillTemp, op2, CHECK_SPILL_ALL);
+                    impStoreToTemp(lvaInlineeReturnSpillTemp, op2, CHECK_SPILL_ALL);
                 }
 
                 if (compMethodReturnsMultiRegRetType())
@@ -11460,7 +11359,7 @@ void Compiler::impImportBlock(BasicBlock* block)
                     if (gtHasRef(relOp->AsOp()->gtOp1, tempNum))
                     {
                         unsigned temp = lvaGrabTemp(true DEBUGARG("spill addStmt JTRUE ref Op1"));
-                        impStoreTemp(temp, relOp->AsOp()->gtOp1, level);
+                        impStoreToTemp(temp, relOp->AsOp()->gtOp1, level);
                         type                 = genActualType(lvaTable[temp].TypeGet());
                         relOp->AsOp()->gtOp1 = gtNewLclvNode(temp, type);
                     }
@@ -11468,7 +11367,7 @@ void Compiler::impImportBlock(BasicBlock* block)
                     if (gtHasRef(relOp->AsOp()->gtOp2, tempNum))
                     {
                         unsigned temp = lvaGrabTemp(true DEBUGARG("spill addStmt JTRUE ref Op2"));
-                        impStoreTemp(temp, relOp->AsOp()->gtOp2, level);
+                        impStoreToTemp(temp, relOp->AsOp()->gtOp2, level);
                         type                 = genActualType(lvaTable[temp].TypeGet());
                         relOp->AsOp()->gtOp2 = gtNewLclvNode(temp, type);
                     }
@@ -11478,7 +11377,7 @@ void Compiler::impImportBlock(BasicBlock* block)
                     assert(addTree->OperIs(GT_SWITCH) && genActualTypeIsIntOrI(addTree->AsOp()->gtOp1->TypeGet()));
 
                     unsigned temp = lvaGrabTemp(true DEBUGARG("spill addStmt SWITCH"));
-                    impStoreTemp(temp, addTree->AsOp()->gtOp1, level);
+                    impStoreToTemp(temp, addTree->AsOp()->gtOp1, level);
                     addTree->AsOp()->gtOp1 = gtNewLclvNode(temp, genActualType(addTree->AsOp()->gtOp1->TypeGet()));
                 }
             }
@@ -11921,7 +11820,7 @@ unsigned Compiler::impGetSpillTmpBase(BasicBlock* block)
 
     // Otherwise, choose one, and propagate to all members of the spill clique.
     // Grab enough temps for the whole stack.
-    unsigned baseTmp = lvaGrabTemps(verCurrentState.esStackDepth DEBUGARG("IL Stack Entries"));
+    unsigned          baseTmp = lvaGrabTemps(verCurrentState.esStackDepth DEBUGARG("IL Stack Entries"));
     SetSpillTempsBase callback(baseTmp);
 
     // We do *NOT* need to reset the SpillClique*Members because a block can only be the predecessor
@@ -12224,7 +12123,7 @@ void Compiler::impFixPredLists()
     unsigned XTnum = 0;
     bool     added = false;
 
-    for (EHblkDsc *HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    for (EHblkDsc* HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
     {
         if (HBtab->HasFinallyHandler())
         {
@@ -12272,7 +12171,7 @@ void Compiler::impFixPredLists()
                 if (predCount > 0)
                 {
                     jumpEhf->bbeCount = predCount;
-                    jumpEhf->bbeSuccs = new (this, CMK_BasicBlock) BasicBlock*[predCount];
+                    jumpEhf->bbeSuccs = new (this, CMK_FlowEdge) FlowEdge*[predCount];
 
                     unsigned predNum = 0;
                     for (BasicBlock* const predBlock : finallyBegBlock->PredBlocks())
@@ -12288,7 +12187,7 @@ void Compiler::impFixPredLists()
                         FlowEdge* const   newEdge      = fgAddRefPred(continuation, finallyBlock);
                         newEdge->setLikelihood(1.0 / predCount);
 
-                        jumpEhf->bbeSuccs[predNum] = continuation;
+                        jumpEhf->bbeSuccs[predNum] = newEdge;
                         ++predNum;
 
                         if (!added)
@@ -12373,7 +12272,7 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I
 {
     assert((pInlineInfo != nullptr && compIsForInlining()) || // Perform the actual inlining.
            (pInlineInfo == nullptr && !compIsForInlining())   // Calculate the static inlining hint for ngen.
-           );
+    );
 
     // If we're really inlining, we should just have one result in play.
     assert((pInlineInfo == nullptr) || (inlineResult == pInlineInfo->inlineResult));
@@ -12718,12 +12617,6 @@ void Compiler::impInlineRecordArgInfo(InlineInfo*   pInlineInfo,
 
     assert(!curArgVal->OperIs(GT_RET_EXPR));
 
-    if (curArgVal->gtOper == GT_MKREFANY)
-    {
-        inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_IS_MKREFANY);
-        return;
-    }
-
     GenTree*   lclVarTree;
     const bool isAddressInLocal = impIsAddressInLocal(curArgVal, &lclVarTree);
     if (isAddressInLocal)
@@ -12894,9 +12787,27 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo)
                 inlArgInfo[ilArgCnt].argIsThis = true;
                 break;
             case WellKnownArg::RetBuffer:
+                // This does not appear in the table of inline arg info; do not include them
+                continue;
             case WellKnownArg::InstParam:
-                // These do not appear in the table of inline arg info; do not include them
+            {
+                InlArgInfo* ctxInfo  = new (this, CMK_Inlining) InlArgInfo{};
+                ctxInfo->arg         = &arg;
+                ctxInfo->argTmpNum   = BAD_VAR_NUM;
+                ctxInfo->argIsLclVar = arg.GetNode()->OperIs(GT_LCL_VAR);
+                if (arg.GetNode()->IsCnsIntOrI())
+                {
+                    ctxInfo->argIsInvariant = true;
+                }
+                else
+                {
+                    // Conservative approach
+                    ctxInfo->argHasSideEff = true;
+                    ctxInfo->argHasGlobRef = true;
+                }
+                pInlineInfo->inlInstParamArgInfo = ctxInfo;
                 continue;
+            }
             default:
                 break;
         }
@@ -13267,9 +13178,8 @@ unsigned Compiler::impInlineFetchLocal(unsigned lclNum DEBUGARG(const char* reas
 // impInlineFetchArg: return tree node for argument value in an inlinee
 //
 // Arguments:
-//    lclNum -- argument number in inlinee IL
-//    inlArgInfo -- argument info for inlinee
-//    lclVarInfo -- var info for inlinee
+//    argInfo -- argument info for inlinee
+//    lclInfo -- var info for inlinee
 //
 // Returns:
 //    Tree for the argument's value. Often an inlinee-scoped temp
@@ -13296,15 +13206,13 @@ unsigned Compiler::impInlineFetchLocal(unsigned lclNum DEBUGARG(const char* reas
 //    This method will side effect inlArgInfo. It should only be called
 //    for actual uses of the argument in the inlinee.
 
-GenTree* Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, InlLclVarInfo* lclVarInfo)
+GenTree* Compiler::impInlineFetchArg(InlArgInfo& argInfo, const InlLclVarInfo& lclInfo)
 {
     // Cache the relevant arg and lcl info for this argument.
     // We will modify argInfo but not lclVarInfo.
-    InlArgInfo&          argInfo          = inlArgInfo[lclNum];
-    const InlLclVarInfo& lclInfo          = lclVarInfo[lclNum];
-    const bool           argCanBeModified = argInfo.argHasLdargaOp || argInfo.argHasStargOp;
-    const var_types      lclTyp           = lclInfo.lclTypeInfo;
-    GenTree*             op1              = nullptr;
+    const bool      argCanBeModified = argInfo.argHasLdargaOp || argInfo.argHasStargOp;
+    const var_types lclTyp           = lclInfo.lclTypeInfo;
+    GenTree*        op1              = nullptr;
 
     GenTree* argNode = argInfo.arg->GetNode();
     assert(!argNode->OperIs(GT_RET_EXPR));
@@ -13355,7 +13263,6 @@ GenTree* Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, In
         if (argInfo.argIsUsed || ((lclTyp == TYP_BYREF) && (op1->TypeGet() != TYP_BYREF)))
         {
             assert(op1->gtOper == GT_LCL_VAR);
-            assert(lclNum == op1->AsLclVar()->gtLclILoffs);
 
             // Create a new lcl var node - remember the argument lclNum
             op1 = impCreateLocalNode(argLclNum DEBUGARG(op1->AsLclVar()->gtLclILoffs));
@@ -13410,7 +13317,7 @@ GenTree* Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, In
             assert(!argInfo.argIsUsed);
 
             /* Reserve a temp for the expression.
-            * Use a large size node as we may change it later */
+             * Use a large size node as we may change it later */
 
             const unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Inlining Arg"));
 
@@ -13468,7 +13375,7 @@ GenTree* Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, In
                  !argInfo.argHasCallerLocalRef))
             {
                 /* Get a *LARGE* LCL_VAR node */
-                op1 = gtNewLclLNode(tmpNum, genActualType(lclTyp) DEBUGARG(lclNum));
+                op1 = gtNewLclLNode(tmpNum, genActualType(lclTyp));
 
                 /* Record op1 as the very first use of this argument.
                 If there are no further uses of the arg, we may be
@@ -13615,7 +13522,7 @@ bool Compiler::impCanSkipCovariantStoreCheck(GenTree* value, GenTree* array)
     // We should only call this when optimizing.
     assert(opts.OptimizationEnabled());
 
-    // Check for assignment to same array, ie. arrLcl[i] = arrLcl[j]
+    // Check for store to same array, ie. arrLcl[i] = arrLcl[j]
     if (value->OperIs(GT_IND) && value->AsIndir()->Addr()->OperIs(GT_INDEX_ADDR) && array->OperIs(GT_LCL_VAR))
     {
         GenTree* valueArray = value->AsIndir()->Addr()->AsIndexAddr()->Arr();
@@ -13631,7 +13538,7 @@ bool Compiler::impCanSkipCovariantStoreCheck(GenTree* value, GenTree* array)
         }
     }
 
-    // Check for assignment of NULL.
+    // Check for store of NULL.
     if (value->OperIs(GT_CNS_INT))
     {
         assert(value->gtType == TYP_REF);
diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp
index fd72bf42238c..e7d6ef877068 100644
--- a/src/coreclr/jit/importercalls.cpp
+++ b/src/coreclr/jit/importercalls.cpp
@@ -104,6 +104,10 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
     CORINFO_SIG_INFO calliSig;
     NewCallArg       extraArg;
 
+    // Swift calls that might throw use a SwiftError* arg that requires additional IR to handle,
+    // so if we're importing a Swift call, look for this type in the signature
+    GenTree* swiftErrorNode = nullptr;
+
     /*-------------------------------------------------------------------------
      * First create the call node
      */
@@ -325,7 +329,7 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
                         return TYP_UNDEF;
                     }
 
-                    impStoreTemp(lclNum, stubAddr, CHECK_SPILL_NONE);
+                    impStoreToTemp(lclNum, stubAddr, CHECK_SPILL_NONE);
                     stubAddr = gtNewLclvNode(lclNum, TYP_I_IMPL);
 
                     // Create the actual call node
@@ -419,7 +423,7 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
                 // Now make an indirect call through the function pointer
 
                 unsigned lclNum = lvaGrabTemp(true DEBUGARG("VirtualCall through function pointer"));
-                impStoreTemp(lclNum, fptr, CHECK_SPILL_ALL);
+                impStoreToTemp(lclNum, fptr, CHECK_SPILL_ALL);
                 fptr = gtNewLclvNode(lclNum, TYP_I_IMPL);
 
                 call->AsCall()->gtCallAddr = fptr;
@@ -490,7 +494,7 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
                 // Now make an indirect call through the function pointer
 
                 unsigned lclNum = lvaGrabTemp(true DEBUGARG("Indirect call through function pointer"));
-                impStoreTemp(lclNum, fptr, CHECK_SPILL_ALL);
+                impStoreToTemp(lclNum, fptr, CHECK_SPILL_ALL);
                 fptr = gtNewLclvNode(lclNum, TYP_I_IMPL);
 
                 call = gtNewIndCallNode(fptr, callRetTyp, di);
@@ -589,7 +593,6 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
            tailcall to a function with a different number of arguments, we
            are hosed. There are ways around this (caller remembers esp value,
            varargs is not caller-pop, etc), but not worth it. */
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef TARGET_X86
         if (canTailCall)
@@ -657,6 +660,8 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
 
     if (call->gtFlags & GTF_CALL_UNMANAGED)
     {
+        assert(call->IsCall());
+
         // We set up the unmanaged call by linking the frame, disabling GC, etc
         // This needs to be cleaned up on return.
         // In addition, native calls have different normalization rules than managed code
@@ -669,7 +674,7 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
 
         checkForSmallType = true;
 
-        impPopArgsForUnmanagedCall(call->AsCall(), sig);
+        impPopArgsForUnmanagedCall(call->AsCall(), sig, &swiftErrorNode);
 
         goto DONE;
     }
@@ -1296,7 +1301,7 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
             impAppendTree(call, verCurrentState.esStackDepth - 1, impCurStmtDI);
         }
         else if (JitConfig.JitProfileValues() && call->IsCall() &&
-                 call->AsCall()->IsSpecialIntrinsic(this, NI_System_Buffer_Memmove))
+                 call->AsCall()->IsSpecialIntrinsic(this, NI_System_SpanHelpers_Memmove))
         {
             if (opts.IsOptimizedWithProfile())
             {
@@ -1397,7 +1402,7 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
                 if (origCall->IsVirtual() && (origCall->gtCallType != CT_INDIRECT) && (exactContextHnd != nullptr) &&
                     (origCall->gtHandleHistogramProfileCandidateInfo == nullptr))
                 {
-                    JITDUMP("\nSaving context %p for call [%06u]\n", exactContextHnd, dspTreeID(origCall));
+                    JITDUMP("\nSaving context %p for call [%06u]\n", dspPtr(exactContextHnd), dspTreeID(origCall));
                     origCall->gtCallMoreFlags |= GTF_CALL_M_HAS_LATE_DEVIRT_INFO;
                     LateDevirtualizationInfo* const info = new (this, CMK_Inlining) LateDevirtualizationInfo;
                     info->exactContextHnd                = exactContextHnd;
@@ -1410,13 +1415,21 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
                     // Such form allows to find statements with fat calls without walking through whole trees
                     // and removes problems with cutting trees.
                     assert(IsTargetAbi(CORINFO_NATIVEAOT_ABI));
-                    if (call->OperGet() != GT_LCL_VAR) // can be already converted by impFixupCallStructReturn.
+                    if (!call->OperIs(GT_LCL_VAR)) // can be already converted by impFixupCallStructReturn.
                     {
                         unsigned   calliSlot = lvaGrabTemp(true DEBUGARG("calli"));
                         LclVarDsc* varDsc    = lvaGetDesc(calliSlot);
+                        // Keep the information about small typedness to avoid
+                        // inserting unnecessary casts around normalization.
+                        if (call->IsCall() && varTypeIsSmall(call->AsCall()->gtReturnType))
+                        {
+                            assert(call->AsCall()->NormalizesSmallTypesOnReturn());
+                            varDsc->lvType = call->AsCall()->gtReturnType;
+                        }
 
-                        impStoreTemp(calliSlot, call, CHECK_SPILL_NONE);
-                        // impStoreTemp can change src arg list and return type for call that returns struct.
+                        // TODO-Bug: CHECK_SPILL_NONE here looks wrong.
+                        impStoreToTemp(calliSlot, call, CHECK_SPILL_NONE);
+                        // impStoreToTemp can change src arg list and return type for call that returns struct.
                         var_types type = genActualType(lvaTable[calliSlot].TypeGet());
                         call           = gtNewLclvNode(calliSlot, type);
                     }
@@ -1460,7 +1473,7 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
                         {
                             // QMARK has to be a root node
                             unsigned tmp = lvaGrabTemp(true DEBUGARG("Grabbing temp for Qmark"));
-                            impStoreTemp(tmp, call, CHECK_SPILL_ALL);
+                            impStoreToTemp(tmp, call, CHECK_SPILL_ALL);
                             call = gtNewLclvNode(tmp, call->TypeGet());
                         }
                     }
@@ -1495,6 +1508,15 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
         impPushOnStack(call, tiRetVal);
     }
 
+#ifdef SWIFT_SUPPORT
+    // If call is a Swift call with error handling, append additional IR
+    // to handle storing the error register's value post-call.
+    if (swiftErrorNode != nullptr)
+    {
+        impAppendSwiftErrorStore(swiftErrorNode);
+    }
+#endif // SWIFT_SUPPORT
+
     return callRetTyp;
 }
 #ifdef _PREFAST_
@@ -1565,7 +1587,7 @@ GenTree* Compiler::impDuplicateWithProfiledArg(GenTreeCall* call, IL_OFFSET ilOf
         unsigned argNum   = 0;
         ssize_t  minValue = 0;
         ssize_t  maxValue = 0;
-        if (call->IsSpecialIntrinsic(this, NI_System_Buffer_Memmove))
+        if (call->IsSpecialIntrinsic(this, NI_System_SpanHelpers_Memmove))
         {
             // dst(0), src(1), len(2)
             argNum = 2;
@@ -1743,7 +1765,7 @@ GenTree* Compiler::impFixupCallStructReturn(GenTreeCall* call, CORINFO_CLASS_HAN
             // This is allowed by the managed ABI and impStoreStruct will
             // never introduce copies due to this.
             unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Retbuf for unmanaged call"));
-            impStoreTemp(tmpNum, call, CHECK_SPILL_ALL);
+            impStoreToTemp(tmpNum, call, CHECK_SPILL_ALL);
             return gtNewLclvNode(tmpNum, lvaGetDesc(tmpNum)->TypeGet());
         }
 
@@ -1760,12 +1782,7 @@ GenTree* Compiler::impFixupCallStructReturn(GenTreeCall* call, CORINFO_CLASS_HAN
     assert(returnType == TYP_STRUCT);
     assert((howToReturnStruct == SPK_ByValueAsHfa) || (howToReturnStruct == SPK_ByValue));
 
-#ifdef UNIX_AMD64_ABI
-    // must be a struct returned in two registers
-    assert(retRegCount == 2);
-#else  // not UNIX_AMD64_ABI
     assert(retRegCount >= 2);
-#endif // not UNIX_AMD64_ABI
 
     if (!call->CanTailCall() && !call->IsInlineCandidate())
     {
@@ -1830,12 +1847,28 @@ GenTreeCall* Compiler::impImportIndirectCall(CORINFO_SIG_INFO* sig, const DebugI
     return call;
 }
 
-/*****************************************************************************/
-
-void Compiler::impPopArgsForUnmanagedCall(GenTreeCall* call, CORINFO_SIG_INFO* sig)
+//------------------------------------------------------------------------
+// impPopArgsForUnmanagedCall: Pop arguments from IL stack to a pinvoke call.
+//
+// Arguments:
+//    call - The unmanaged call
+//    sig  - The signature of the call site
+//    swiftErrorNode - [out] If this is a Swift call with a SwiftError* argument,
+//                     then swiftErrorNode points to the node.
+//                     Otherwise left at its existing value.
+//
+void Compiler::impPopArgsForUnmanagedCall(GenTreeCall* call, CORINFO_SIG_INFO* sig, GenTree** swiftErrorNode)
 {
     assert(call->gtFlags & GTF_CALL_UNMANAGED);
 
+#ifdef SWIFT_SUPPORT
+    if (call->unmgdCallConv == CorInfoCallConvExtension::Swift)
+    {
+        impPopArgsForSwiftCall(call, sig, swiftErrorNode);
+        return;
+    }
+#endif
+
     /* Since we push the arguments in reverse order (i.e. right -> left)
      * spill any side effects from the stack
      *
@@ -1852,7 +1885,7 @@ void Compiler::impPopArgsForUnmanagedCall(GenTreeCall* call, CORINFO_SIG_INFO* s
 
     if (call->unmgdCallConv == CorInfoCallConvExtension::Thiscall)
     {
-        assert(argsToReverse);
+        assert(argsToReverse != 0);
         argsToReverse--;
     }
 
@@ -1902,6 +1935,23 @@ void Compiler::impPopArgsForUnmanagedCall(GenTreeCall* call, CORINFO_SIG_INFO* s
         assert(thisPtr->TypeGet() == TYP_I_IMPL || thisPtr->TypeGet() == TYP_BYREF);
     }
 
+    impRetypeUnmanagedCallArgs(call);
+}
+
+//------------------------------------------------------------------------
+// impRetypeUnmanagedCallArgs: Retype unmanaged call arguments from managed
+// pointers to unmanaged ones.
+//
+// Arguments:
+//    call - The call
+//
+// Remarks:
+//   This makes use of the fact that TYP_I_IMPL <-> TYP_BYREF casts are
+//   implicit in JIT IR, allowing us to change the types directly without
+//   inserting a cast node.
+//
+void Compiler::impRetypeUnmanagedCallArgs(GenTreeCall* call)
+{
     for (CallArg& arg : call->gtArgs.Args())
     {
         GenTree* argNode = arg.GetEarlyNode();
@@ -1927,6 +1977,364 @@ void Compiler::impPopArgsForUnmanagedCall(GenTreeCall* call, CORINFO_SIG_INFO* s
     }
 }
 
+#ifdef SWIFT_SUPPORT
+
+//------------------------------------------------------------------------
+// GetSwiftLowering: Get the CORINFO_SWIFT_LOWERING associated with a struct.
+//
+// Arguments:
+//   call - The class
+//
+// Return Value:
+//   Pointer to lowering
+//
+const CORINFO_SWIFT_LOWERING* Compiler::GetSwiftLowering(CORINFO_CLASS_HANDLE hClass)
+{
+    if (m_swiftLoweringCache == nullptr)
+    {
+        m_swiftLoweringCache = new (this, CMK_CallArgs) SwiftLoweringMap(getAllocator(CMK_CallArgs));
+    }
+
+    CORINFO_SWIFT_LOWERING* lowering;
+    if (!m_swiftLoweringCache->Lookup(hClass, &lowering))
+    {
+        lowering = new (this, CMK_CallArgs) CORINFO_SWIFT_LOWERING;
+        info.compCompHnd->getSwiftLowering(hClass, lowering);
+        m_swiftLoweringCache->Set(hClass, lowering);
+    }
+
+    return lowering;
+}
+
+//------------------------------------------------------------------------
+// impPopArgsForSwiftCall: Pop arguments from IL stack to a Swift pinvoke node.
+//
+// Arguments:
+//    call           - The Swift call
+//    sig            - The signature of the call site
+//    swiftErrorNode - [out] Pointer to the SwiftError* argument.
+//                     Left at its existing value if no such argument exists.
+//
+void Compiler::impPopArgsForSwiftCall(GenTreeCall* call, CORINFO_SIG_INFO* sig, GenTree** swiftErrorNode)
+{
+    JITDUMP("Creating args for Swift call [%06u]\n", dspTreeID(call));
+
+    unsigned short swiftErrorIndex = sig->numArgs;
+    unsigned short swiftSelfIndex  = sig->numArgs;
+
+    // We are importing an unmanaged Swift call, which might require special parameter handling
+    bool checkEntireStack = false;
+
+    // Check the signature of the Swift call for the special types
+    CORINFO_ARG_LIST_HANDLE sigArg = sig->args;
+
+    for (unsigned short argIndex = 0; argIndex < sig->numArgs;
+         sigArg                  = info.compCompHnd->getArgNext(sigArg), argIndex++)
+    {
+        CORINFO_CLASS_HANDLE argClass;
+        CorInfoType          argType         = strip(info.compCompHnd->getArgType(sig, sigArg, &argClass));
+        const bool           argIsByrefOrPtr = (argType == CORINFO_TYPE_BYREF) || (argType == CORINFO_TYPE_PTR);
+
+        if (argIsByrefOrPtr)
+        {
+            argClass = info.compCompHnd->getArgClass(sig, sigArg);
+            argType  = info.compCompHnd->getChildType(argClass, &argClass);
+        }
+
+        if (argType != CORINFO_TYPE_VALUECLASS)
+        {
+            continue;
+        }
+
+        if (info.compCompHnd->isIntrinsicType(argClass))
+        {
+            const char* namespaceName;
+            const char* className = info.compCompHnd->getClassNameFromMetadata(argClass, &namespaceName);
+
+            if ((strcmp(className, "SwiftError") == 0) &&
+                (strcmp(namespaceName, "System.Runtime.InteropServices.Swift") == 0))
+            {
+                // For error handling purposes, we expect a pointer/reference to a SwiftError to be passed
+                if (!argIsByrefOrPtr)
+                {
+                    BADCODE("Expected SwiftError pointer/reference, got struct");
+                }
+
+                if (swiftErrorIndex != sig->numArgs)
+                {
+                    BADCODE("Duplicate SwiftError* parameter");
+                }
+
+                swiftErrorIndex  = argIndex;
+                checkEntireStack = true;
+            }
+            else if ((strcmp(className, "SwiftSelf") == 0) &&
+                     (strcmp(namespaceName, "System.Runtime.InteropServices.Swift") == 0))
+            {
+                // We expect a SwiftSelf struct to be passed, not a pointer/reference
+                if (argIsByrefOrPtr)
+                {
+                    BADCODE("Expected SwiftSelf struct, got pointer/reference");
+                }
+
+                if (swiftSelfIndex != sig->numArgs)
+                {
+                    BADCODE("Duplicate SwiftSelf parameter");
+                }
+
+                swiftSelfIndex = argIndex;
+                // Fall through to make sure the struct value becomes a local.
+            }
+            // TODO: Handle SwiftAsync
+        }
+
+        if (argIsByrefOrPtr)
+        {
+            continue;
+        }
+
+        if (argIndex != swiftSelfIndex)
+        {
+            // This is a struct type. Check if it needs to be lowered.
+            // TODO-Bug: SIMD types are not handled correctly by this.
+        }
+
+        // We must spill this struct to a local to be able to expand it into primitives.
+        GenTree* node = impStackTop(sig->numArgs - 1 - argIndex).val;
+        if (!node->OperIsLocalRead())
+        {
+            // TODO-CQ: If we enable FEATURE_IMPLICIT_BYREFS on all platforms
+            // where we support Swift we can probably let normal implicit byref
+            // handling handle the unlowered case.
+            impSpillStackEntry(verCurrentState.esStackDepth - sig->numArgs + argIndex,
+                               BAD_VAR_NUM DEBUGARG(false) DEBUGARG("Swift struct arg with lowering"));
+        }
+    }
+
+    // If using SwiftError*, spill entire stack as we will need to reuse the
+    // error argument after the call.
+    if (checkEntireStack)
+    {
+        impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("Spill for swift call"));
+    }
+
+    impPopCallArgs(sig, call);
+
+    JITDUMP("Node after popping args:\n");
+    DISPTREE(call);
+    JITDUMP("\n");
+
+    // Get SwiftError* arg (if it exists) before modifying the arg list
+    CallArg* const swiftErrorArg =
+        (swiftErrorIndex != sig->numArgs) ? call->gtArgs.GetArgByIndex(swiftErrorIndex) : nullptr;
+
+    // Now expand struct args that must be lowered into primitives
+    unsigned argIndex = 0;
+    for (CallArg* arg = call->gtArgs.Args().begin().GetArg(); arg != nullptr; argIndex++)
+    {
+        if (!varTypeIsStruct(arg->GetSignatureType()))
+        {
+            arg = arg->GetNext();
+            continue;
+        }
+
+        if (varTypeIsSIMD(arg->GetSignatureType()))
+        {
+            IMPL_LIMITATION("SIMD types are currently unsupported in Swift calls");
+        }
+
+        JITDUMP("  Argument %u is a struct [%06u]\n", argIndex, dspTreeID(arg->GetNode()));
+
+        assert(arg->GetNode()->OperIsLocalRead());
+        GenTreeLclVarCommon* structVal = arg->GetNode()->AsLclVarCommon();
+
+        CallArg* insertAfter = arg;
+        // For the self arg, change it from the SwiftSelf struct to a
+        // TYP_I_IMPL primitive directly. It must also be marked as a well
+        // known arg because it has a non-standard calling convention.
+        if (argIndex == swiftSelfIndex)
+        {
+            assert(arg->GetNode()->OperIsLocalRead());
+            GenTree*   primitiveSelf = gtNewLclFldNode(structVal->GetLclNum(), TYP_I_IMPL, structVal->GetLclOffs());
+            NewCallArg newArg = NewCallArg::Primitive(primitiveSelf, TYP_I_IMPL).WellKnown(WellKnownArg::SwiftSelf);
+            insertAfter       = call->gtArgs.InsertAfter(this, insertAfter, newArg);
+        }
+        else
+        {
+            const CORINFO_SWIFT_LOWERING* lowering = GetSwiftLowering(arg->GetSignatureClassHandle());
+            if (lowering->byReference)
+            {
+                JITDUMP("  Argument %d of type %s must be passed by reference\n", argIndex,
+                        typGetObjLayout(arg->GetSignatureClassHandle())->GetClassName());
+            }
+            else
+            {
+                JITDUMP("  Argument %d of type %s must be passed as %d primitive(s)\n", argIndex,
+                        typGetObjLayout(arg->GetSignatureClassHandle())->GetClassName(), lowering->numLoweredElements);
+                for (size_t i = 0; i < lowering->numLoweredElements; i++)
+                {
+                    JITDUMP("    [%zu] @ +%02u: %s\n", i, lowering->offsets[i],
+                            varTypeName(JitType2PreciseVarType(lowering->loweredElements[i])));
+                }
+            }
+
+            if (lowering->byReference)
+            {
+                GenTree* addrNode = gtNewLclAddrNode(structVal->GetLclNum(), structVal->GetLclOffs());
+                JITDUMP("    Passing by reference\n");
+
+                insertAfter = call->gtArgs.InsertAfter(this, insertAfter, NewCallArg::Primitive(addrNode, TYP_I_IMPL));
+            }
+            else
+            {
+                for (size_t i = 0; i < lowering->numLoweredElements; i++)
+                {
+                    var_types loweredType = JITtype2varType(lowering->loweredElements[i]);
+                    unsigned  offset      = lowering->offsets[i];
+
+                    GenTree* loweredNode = nullptr;
+
+                    // It's possible for the lowering to require us to pass the
+                    // tail of the sequence as a 64-bit value, even if the tail
+                    // of the struct is smaller than 8 bytes. In that case we
+                    // reconstruct the value using bitwise operations.
+                    // Alternatively we could create IND(LCL_ADDR), assuming
+                    // that the upper bits are undefined. This would result in
+                    // address exposure instead.
+                    unsigned sizeToRead = min(structVal->GetLayout(this)->GetSize() - offset, genTypeSize(loweredType));
+                    assert(sizeToRead > 0);
+
+                    if (sizeToRead == genTypeSize(loweredType))
+                    {
+                        loweredNode =
+                            gtNewLclFldNode(structVal->GetLclNum(), loweredType, structVal->GetLclOffs() + offset);
+                    }
+                    else
+                    {
+                        unsigned relOffset  = 0;
+                        auto     addSegment = [=, &loweredNode, &relOffset](var_types type) {
+                            GenTree* val = gtNewLclFldNode(structVal->GetLclNum(), type,
+                                                               structVal->GetLclOffs() + offset + relOffset);
+
+                            if (loweredType == TYP_LONG)
+                            {
+                                val = gtNewCastNode(TYP_LONG, val, true, TYP_LONG);
+                            }
+
+                            if (relOffset > 0)
+                            {
+                                val = gtNewOperNode(GT_LSH, genActualType(loweredType), val,
+                                                        gtNewIconNode(relOffset * 8));
+                            }
+
+                            if (loweredNode == nullptr)
+                            {
+                                loweredNode = val;
+                            }
+                            else
+                            {
+                                loweredNode = gtNewOperNode(GT_OR, genActualType(loweredType), loweredNode, val);
+                            }
+
+                            relOffset += genTypeSize(type);
+                        };
+
+                        if (sizeToRead - relOffset >= 4)
+                        {
+                            addSegment(TYP_INT);
+                        }
+                        if (sizeToRead - relOffset >= 2)
+                        {
+                            addSegment(TYP_USHORT);
+                        }
+                        if (sizeToRead - relOffset >= 1)
+                        {
+                            addSegment(TYP_UBYTE);
+                        }
+
+                        assert(relOffset == sizeToRead);
+                    }
+
+                    JITDUMP("    Adding expanded primitive argument [%06u]\n", dspTreeID(loweredNode));
+                    DISPTREE(loweredNode);
+
+                    insertAfter =
+                        call->gtArgs.InsertAfter(this, insertAfter, NewCallArg::Primitive(loweredNode, loweredType));
+                }
+            }
+        }
+
+        JITDUMP("  Removing plain struct argument [%06u]\n", dspTreeID(structVal));
+        call->gtArgs.Remove(arg);
+        arg = insertAfter->GetNext();
+    }
+
+    if (swiftErrorArg != nullptr)
+    {
+        // Before calling a Swift method that may throw, the error register must be cleared,
+        // as we will check for a nonzero error value after the call returns.
+        // By adding a well-known "sentinel" argument that uses the error register,
+        // the JIT will emit code for clearing the error register before the call,
+        // and will mark the error register as busy so it isn't used to hold the function call's address.
+        GenTree* const errorSentinelValueNode = gtNewIconNode(0);
+        call->gtArgs.InsertAfter(this, swiftErrorArg,
+                                 NewCallArg::Primitive(errorSentinelValueNode).WellKnown(WellKnownArg::SwiftError));
+
+        // Swift call isn't going to use the SwiftError* arg, so don't bother emitting it
+        assert(swiftErrorNode != nullptr);
+        *swiftErrorNode = swiftErrorArg->GetNode();
+        call->gtArgs.Remove(swiftErrorArg);
+    }
+
+#ifdef DEBUG
+    if (verbose && call->TypeIs(TYP_STRUCT) && (sig->retTypeClass != NO_CLASS_HANDLE))
+    {
+        const CORINFO_SWIFT_LOWERING* lowering = GetSwiftLowering(sig->retTypeClass);
+        if (lowering->byReference)
+        {
+            printf("  Call returns %s by reference\n", typGetObjLayout(sig->retTypeClass)->GetClassName());
+        }
+        else
+        {
+            printf("  Call returns %s as %d primitive(s) in registers\n",
+                   typGetObjLayout(sig->retTypeClass)->GetClassName(), lowering->numLoweredElements);
+            for (size_t i = 0; i < lowering->numLoweredElements; i++)
+            {
+                printf("    [%zu] @ +%02u: %s\n", i, lowering->offsets[i],
+                       varTypeName(JitType2PreciseVarType(lowering->loweredElements[i])));
+            }
+        }
+    }
+#endif
+
+    JITDUMP("Final result after Swift call lowering:\n");
+    DISPTREE(call);
+    JITDUMP("\n");
+
+    impRetypeUnmanagedCallArgs(call);
+}
+
+//------------------------------------------------------------------------
+// impAppendSwiftErrorStore: Append IR to store the Swift error register value
+// to the SwiftError* argument represented by swiftErrorNode, post-Swift call
+//
+// Arguments:
+//    swiftErrorNode - the SwiftError* argument
+//
+void Compiler::impAppendSwiftErrorStore(GenTree* const swiftErrorNode)
+{
+    assert(swiftErrorNode != nullptr);
+
+    // Store the error register value to where the SwiftError* points to
+    GenTree* errorRegNode = new (this, GT_SWIFT_ERROR) GenTree(GT_SWIFT_ERROR, TYP_I_IMPL);
+    errorRegNode->SetHasOrderingSideEffect();
+    errorRegNode->gtFlags |= (GTF_CALL | GTF_GLOB_REF);
+
+    GenTreeStoreInd* swiftErrorStore = gtNewStoreIndNode(swiftErrorNode->TypeGet(), swiftErrorNode, errorRegNode);
+    impAppendTree(swiftErrorStore, CHECK_SPILL_ALL, impCurStmtDI, false);
+}
+#endif // SWIFT_SUPPORT
+
 //------------------------------------------------------------------------
 // impInitializeArrayIntrinsic: Attempts to replace a call to InitializeArray
 //    with a GT_COPYBLK node.
@@ -2260,8 +2668,8 @@ GenTree* Compiler::impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig)
 
     //
     // At this point we are ready to commit to implementing the InitializeArray
-    // intrinsic using a struct assignment.  Pop the arguments from the stack and
-    // return the struct assignment node.
+    // intrinsic using a struct store.  Pop the arguments from the stack and
+    // return the store node.
     //
 
     impPopStack();
@@ -2771,7 +3179,7 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
                 betterToExpand = true;
                 break;
 
-            case NI_System_Buffer_Memmove:
+            case NI_System_SpanHelpers_Memmove:
             case NI_System_SpanHelpers_SequenceEqual:
                 // We're going to instrument these
                 betterToExpand = opts.IsInstrumented();
@@ -2837,26 +3245,38 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
 
             case NI_System_String_Equals:
             {
-                retNode = impStringEqualsOrStartsWith(/*startsWith:*/ false, sig, methodFlags);
+                retNode = impUtf16StringComparison(StringComparisonKind::Equals, sig, methodFlags);
                 break;
             }
 
             case NI_System_MemoryExtensions_Equals:
             case NI_System_MemoryExtensions_SequenceEqual:
             {
-                retNode = impSpanEqualsOrStartsWith(/*startsWith:*/ false, sig, methodFlags);
+                retNode = impUtf16SpanComparison(StringComparisonKind::Equals, sig, methodFlags);
                 break;
             }
 
             case NI_System_String_StartsWith:
             {
-                retNode = impStringEqualsOrStartsWith(/*startsWith:*/ true, sig, methodFlags);
+                retNode = impUtf16StringComparison(StringComparisonKind::StartsWith, sig, methodFlags);
+                break;
+            }
+
+            case NI_System_String_EndsWith:
+            {
+                retNode = impUtf16StringComparison(StringComparisonKind::EndsWith, sig, methodFlags);
                 break;
             }
 
             case NI_System_MemoryExtensions_StartsWith:
             {
-                retNode = impSpanEqualsOrStartsWith(/*startsWith:*/ true, sig, methodFlags);
+                retNode = impUtf16SpanComparison(StringComparisonKind::StartsWith, sig, methodFlags);
+                break;
+            }
+
+            case NI_System_MemoryExtensions_EndsWith:
+            {
+                retNode = impUtf16SpanComparison(StringComparisonKind::EndsWith, sig, methodFlags);
                 break;
             }
 
@@ -2874,7 +3294,7 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
                 GenTree* op1  = impPopStack().val;
                 GenTree* addr = gtNewIndexAddr(op1, op2, TYP_USHORT, NO_CLASS_HANDLE, OFFSETOF__CORINFO_String__chars,
                                                OFFSETOF__CORINFO_String__stringLen);
-                retNode = gtNewIndexIndir(addr->AsIndexAddr());
+                retNode       = gtNewIndexIndir(addr->AsIndexAddr());
                 break;
             }
 
@@ -2999,7 +3419,7 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
                 resolvedToken.tokenType    = CORINFO_TOKENKIND_Method;
 
                 CORINFO_GENERICHANDLE_RESULT embedInfo;
-                info.compCompHnd->expandRawHandleIntrinsic(&resolvedToken, &embedInfo);
+                info.compCompHnd->expandRawHandleIntrinsic(&resolvedToken, info.compMethodHnd, &embedInfo);
 
                 GenTree* rawHandle = impLookupToTree(&resolvedToken, &embedInfo.lookup, gtTokenToIconFlags(memberRef),
                                                      embedInfo.compileTimeHandle);
@@ -3011,7 +3431,7 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
                 noway_assert(genTypeSize(rawHandle->TypeGet()) == genTypeSize(TYP_I_IMPL));
 
                 unsigned rawHandleSlot = lvaGrabTemp(true DEBUGARG("rawHandle"));
-                impStoreTemp(rawHandleSlot, rawHandle, CHECK_SPILL_NONE);
+                impStoreToTemp(rawHandleSlot, rawHandle, CHECK_SPILL_NONE);
 
                 GenTree*  lclVarAddr = gtNewLclVarAddrNode(rawHandleSlot);
                 var_types resultType = JITtype2varType(sig->retType);
@@ -3221,8 +3641,8 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
                         typeHandleHelper = CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL;
                     }
                     assert(op1->AsCall()->gtArgs.CountArgs() == 1);
-                    op1 = gtNewHelperCallNode(typeHandleHelper, TYP_REF,
-                                              op1->AsCall()->gtArgs.GetArgByIndex(0)->GetEarlyNode());
+                    op1         = gtNewHelperCallNode(typeHandleHelper, TYP_REF,
+                                                      op1->AsCall()->gtArgs.GetArgByIndex(0)->GetEarlyNode());
                     op1->gtType = TYP_REF;
                     retNode     = op1;
                 }
@@ -3481,6 +3901,11 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
                 assert(sig->numArgs == 3);
 
                 GenTree* op3 = impPopStack().val; // comparand
+                if (varTypeIsSmall(callType))
+                {
+                    // small types need the comparand to have its upper bits zeroed
+                    op3 = gtNewCastNode(genActualType(callType), op3, /* uns */ false, varTypeToUnsigned(callType));
+                }
                 GenTree* op2 = impPopStack().val; // value
                 GenTree* op1 = impPopStack().val; // location
                 retNode      = gtNewAtomicNode(GT_CMPXCHG, callType, op1, op2, op3);
@@ -3539,18 +3964,9 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
             case NI_System_Threading_Interlocked_ReadMemoryBarrier:
             {
                 assert(sig->numArgs == 0);
-
-                GenTree* op1 = new (this, GT_MEMORYBARRIER) GenTree(GT_MEMORYBARRIER, TYP_VOID);
-                op1->gtFlags |= GTF_GLOB_REF | GTF_ASG;
-
                 // On XARCH `NI_System_Threading_Interlocked_ReadMemoryBarrier` fences need not be emitted.
                 // However, we still need to capture the effect on reordering.
-                if (ni == NI_System_Threading_Interlocked_ReadMemoryBarrier)
-                {
-                    op1->gtFlags |= GTF_MEMORYBARRIER_LOAD;
-                }
-
-                retNode = op1;
+                retNode = gtNewMemoryBarrier(ni == NI_System_Threading_Interlocked_ReadMemoryBarrier);
                 break;
             }
 
@@ -3628,7 +4044,6 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
             case NI_System_Math_Cosh:
             case NI_System_Math_Exp:
             case NI_System_Math_Floor:
-            case NI_System_Math_FMod:
             case NI_System_Math_ILogB:
             case NI_System_Math_Log:
             case NI_System_Math_Log2:
@@ -3987,7 +4402,8 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
 
             case NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8:
             case NI_System_SpanHelpers_SequenceEqual:
-            case NI_System_Buffer_Memmove:
+            case NI_System_SpanHelpers_ClearWithoutReferences:
+            case NI_System_SpanHelpers_Memmove:
             {
                 if (sig->sigInst.methInstCount == 0)
                 {
@@ -3998,6 +4414,16 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
                 break;
             }
 
+            case NI_System_SpanHelpers_Fill:
+            {
+                if (sig->sigInst.methInstCount == 1)
+                {
+                    // We'll try to unroll this in lower for constant input.
+                    isSpecial = true;
+                }
+                break;
+            }
+
             case NI_System_BitConverter_DoubleToInt64Bits:
             {
                 GenTree* op1 = impStackTop().val;
@@ -4255,7 +4681,7 @@ GenTree* Compiler::impSRCSUnsafeIntrinsic(NamedIntrinsic          intrinsic,
                 // In order to change the class handle of the object we need to spill it to a temp
                 // and update class info for that temp.
                 unsigned localNum = lvaGrabTemp(true DEBUGARG("updating class info"));
-                impStoreTemp(localNum, op, CHECK_SPILL_ALL);
+                impStoreToTemp(localNum, op, CHECK_SPILL_ALL);
 
                 // NOTE: we still can't say for sure that it is the exact type of the argument
                 lvaSetClass(localNum, inst, /*isExact*/ false);
@@ -4401,7 +4827,7 @@ GenTree* Compiler::impSRCSUnsafeIntrinsic(NamedIntrinsic          intrinsic,
             if (varTypeIsIntegral(valType) && (genTypeSize(valType) < fromSize))
             {
                 unsigned lclNum = lvaGrabTemp(true DEBUGARG("bitcast small type extension"));
-                impStoreTemp(lclNum, op1, CHECK_SPILL_ALL);
+                impStoreToTemp(lclNum, op1, CHECK_SPILL_ALL);
                 addr = gtNewLclVarAddrNode(lclNum, TYP_I_IMPL);
             }
             else
@@ -4897,7 +5323,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic        intrinsic,
                 result = gtNewQmarkNode(baseType, cond, colon);
 
                 unsigned tmp = lvaGrabTemp(true DEBUGARG("Grabbing temp for LeadingZeroCount Qmark"));
-                impStoreTemp(tmp, result, CHECK_SPILL_NONE);
+                impStoreToTemp(tmp, result, CHECK_SPILL_NONE);
                 result = gtNewLclvNode(tmp, baseType);
             }
 #elif defined(TARGET_ARM64)
@@ -5228,7 +5654,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic        intrinsic,
                 result = gtNewQmarkNode(baseType, cond, colon);
 
                 unsigned tmp = lvaGrabTemp(true DEBUGARG("Grabbing temp for TrailingZeroCount Qmark"));
-                impStoreTemp(tmp, result, CHECK_SPILL_NONE);
+                impStoreToTemp(tmp, result, CHECK_SPILL_NONE);
                 result = gtNewLclvNode(tmp, baseType);
             }
 #elif defined(TARGET_ARM64)
@@ -5623,8 +6049,7 @@ void Compiler::impCheckForPInvokeCall(
     // return here without inlining the native call.
     if (unmanagedCallConv == CorInfoCallConvExtension::Managed ||
         unmanagedCallConv == CorInfoCallConvExtension::Fastcall ||
-        unmanagedCallConv == CorInfoCallConvExtension::FastcallMemberFunction ||
-        unmanagedCallConv == CorInfoCallConvExtension::Swift)
+        unmanagedCallConv == CorInfoCallConvExtension::FastcallMemberFunction)
     {
         return;
     }
@@ -5701,7 +6126,8 @@ void Compiler::impCheckForPInvokeCall(
 class SpillRetExprHelper
 {
 public:
-    SpillRetExprHelper(Compiler* comp) : comp(comp)
+    SpillRetExprHelper(Compiler* comp)
+        : comp(comp)
     {
     }
 
@@ -5737,7 +6163,7 @@ class SpillRetExprHelper
         assert(retExpr->OperGet() == GT_RET_EXPR);
         const unsigned tmp = comp->lvaGrabTemp(true DEBUGARG("spilling ret_expr"));
         JITDUMP("Storing return expression [%06u] to a local var V%02u.\n", comp->dspTreeID(retExpr), tmp);
-        comp->impStoreTemp(tmp, retExpr, Compiler::CHECK_SPILL_NONE);
+        comp->impStoreToTemp(tmp, retExpr, Compiler::CHECK_SPILL_NONE);
         *pRetExpr = comp->gtNewLclvNode(tmp, retExpr->TypeGet());
 
         assert(comp->lvaTable[tmp].lvSingleDef == 0);
@@ -5832,15 +6258,19 @@ void Compiler::pickGDV(GenTreeCall*           call,
 #ifdef DEBUG
     if ((verbose || JitConfig.EnableExtraSuperPmiQueries()) && (numberOfClasses > 0))
     {
-        bool                 isExact;
-        bool                 isNonNull;
-        CallArg*             thisArg            = call->gtArgs.GetThisArg();
-        CORINFO_CLASS_HANDLE declaredThisClsHnd = gtGetClassHandle(thisArg->GetNode(), &isExact, &isNonNull);
         JITDUMP("Likely classes for call [%06u]", dspTreeID(call));
-        if (declaredThisClsHnd != NO_CLASS_HANDLE)
+        if (!call->IsHelperCall())
         {
-            const char* baseClassName = eeGetClassName(declaredThisClsHnd);
-            JITDUMP(" on class %p (%s)", declaredThisClsHnd, baseClassName);
+            bool     isExact;
+            bool     isNonNull;
+            CallArg* thisArg = call->gtArgs.GetThisArg();
+            assert(thisArg != nullptr);
+            CORINFO_CLASS_HANDLE declaredThisClsHnd = gtGetClassHandle(thisArg->GetNode(), &isExact, &isNonNull);
+            if (declaredThisClsHnd != NO_CLASS_HANDLE)
+            {
+                const char* baseClassName = eeGetClassName(declaredThisClsHnd);
+                JITDUMP(" on class %p (%s)", declaredThisClsHnd, baseClassName);
+            }
         }
         JITDUMP("\n");
 
@@ -6177,7 +6607,7 @@ void Compiler::considerGuardedDevirtualization(GenTreeCall*            call,
         {
             JITDUMP("No exact classes implementing %s\n", eeGetClassName(baseClass))
         }
-        else if (numExactClasses > maxTypeChecks)
+        else if (numExactClasses < 0 || numExactClasses > maxTypeChecks)
         {
             JITDUMP("Too many exact classes implementing %s (%d > %d)\n", eeGetClassName(baseClass), numExactClasses,
                     maxTypeChecks)
@@ -6365,7 +6795,9 @@ void Compiler::considerGuardedDevirtualization(GenTreeCall*            call,
 #ifdef DEBUG
         char buffer[256];
         JITDUMP("%s call would invoke method %s\n",
-                isInterface ? "interface" : call->IsDelegateInvoke() ? "delegate" : "virtual",
+                isInterface                ? "interface"
+                : call->IsDelegateInvoke() ? "delegate"
+                                           : "virtual",
                 eeGetMethodFullName(likelyMethod, true, true, buffer, sizeof(buffer)));
 #endif
 
@@ -6860,8 +7292,8 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName)
 #if defined(TARGET_XARCH)
     switch (intrinsicName)
     {
-        // AMD64/x86 has SSE2 instructions to directly compute sqrt/abs and SSE4.1
-        // instructions to directly compute round/ceiling/floor/truncate.
+            // AMD64/x86 has SSE2 instructions to directly compute sqrt/abs and SSE4.1
+            // instructions to directly compute round/ceiling/floor/truncate.
 
         case NI_System_Math_Abs:
         case NI_System_Math_Sqrt:
@@ -6952,7 +7384,6 @@ bool Compiler::IsMathIntrinsic(NamedIntrinsic intrinsicName)
         case NI_System_Math_Cosh:
         case NI_System_Math_Exp:
         case NI_System_Math_Floor:
-        case NI_System_Math_FMod:
         case NI_System_Math_FusedMultiplyAdd:
         case NI_System_Math_ILogB:
         case NI_System_Math_Log:
@@ -7067,8 +7498,8 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
     // Optionally, print info on devirtualization
     Compiler* const rootCompiler = impInlineRoot();
     const bool      doPrint      = JitConfig.JitPrintDevirtualizedMethods().contains(rootCompiler->info.compMethodHnd,
-                                                                           rootCompiler->info.compClassHnd,
-                                                                           &rootCompiler->info.compMethodInfo->args);
+                                                                                     rootCompiler->info.compClassHnd,
+                                                                                     &rootCompiler->info.compMethodInfo->args);
 #endif // DEBUG
 
     // Fetch information about the virtual method we're calling.
@@ -8017,160 +8448,160 @@ void Compiler::impCheckCanInline(GenTreeCall*           call,
 
     bool success = eeRunWithErrorTrap<Param>(
         [](Param* pParam) {
-
-            // Cache some frequently accessed state.
-            //
-            Compiler* const       compiler     = pParam->pThis;
-            COMP_HANDLE           compCompHnd  = compiler->info.compCompHnd;
-            CORINFO_METHOD_HANDLE ftn          = pParam->fncHandle;
-            InlineResult* const   inlineResult = pParam->result;
+        // Cache some frequently accessed state.
+        //
+        Compiler* const       compiler     = pParam->pThis;
+        COMP_HANDLE           compCompHnd  = compiler->info.compCompHnd;
+        CORINFO_METHOD_HANDLE ftn          = pParam->fncHandle;
+        InlineResult* const   inlineResult = pParam->result;
 
 #ifdef DEBUG
-            if (JitConfig.JitNoInline())
-            {
-                inlineResult->NoteFatal(InlineObservation::CALLEE_IS_JIT_NOINLINE);
-                return;
-            }
+        if (JitConfig.JitNoInline())
+        {
+            inlineResult->NoteFatal(InlineObservation::CALLEE_IS_JIT_NOINLINE);
+            return;
+        }
 #endif
-            JITDUMP("\nCheckCanInline: fetching method info for inline candidate %s -- context %p\n",
-                    compiler->eeGetMethodName(ftn), pParam->exactContextHnd);
 
-            if (pParam->exactContextHnd == METHOD_BEING_COMPILED_CONTEXT())
-            {
-                JITDUMP("Current method context\n");
-            }
-            else if ((((size_t)pParam->exactContextHnd & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_METHOD))
-            {
-                JITDUMP("Method context: %s\n",
-                        compiler->eeGetMethodFullName((CORINFO_METHOD_HANDLE)pParam->exactContextHnd));
-            }
-            else
-            {
-                JITDUMP("Class context: %s\n", compiler->eeGetClassName((CORINFO_CLASS_HANDLE)(
-                                                   (size_t)pParam->exactContextHnd & ~CORINFO_CONTEXTFLAGS_MASK)));
-            }
+        JITDUMP("\nCheckCanInline: fetching method info for inline candidate %s -- context %p\n",
+                compiler->eeGetMethodName(ftn), compiler->dspPtr(pParam->exactContextHnd));
 
-            // Fetch method info. This may fail, if the method doesn't have IL.
-            //
-            CORINFO_METHOD_INFO methInfo;
-            if (!compCompHnd->getMethodInfo(ftn, &methInfo, pParam->exactContextHnd))
-            {
-                inlineResult->NoteFatal(InlineObservation::CALLEE_NO_METHOD_INFO);
-                return;
-            }
+        if (pParam->exactContextHnd == METHOD_BEING_COMPILED_CONTEXT())
+        {
+            JITDUMP("Current method context\n");
+        }
+        else if ((((size_t)pParam->exactContextHnd & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_METHOD))
+        {
+            JITDUMP("Method context: %s\n",
+                    compiler->eeGetMethodFullName((CORINFO_METHOD_HANDLE)pParam->exactContextHnd));
+        }
+        else
+        {
+            JITDUMP("Class context: %s\n",
+                    compiler->eeGetClassName(
+                        (CORINFO_CLASS_HANDLE)((size_t)pParam->exactContextHnd & ~CORINFO_CONTEXTFLAGS_MASK)));
+        }
 
-            // Profile data allows us to avoid early "too many IL bytes" outs.
-            //
-            inlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE_WEIGHTS,
-                                   compiler->fgHaveSufficientProfileWeights());
-            inlineResult->NoteBool(InlineObservation::CALLSITE_INSIDE_THROW_BLOCK,
-                                   compiler->compCurBB->KindIs(BBJ_THROW));
+        // Fetch method info. This may fail, if the method doesn't have IL.
+        //
+        CORINFO_METHOD_INFO methInfo;
+        if (!compCompHnd->getMethodInfo(ftn, &methInfo, pParam->exactContextHnd))
+        {
+            inlineResult->NoteFatal(InlineObservation::CALLEE_NO_METHOD_INFO);
+            return;
+        }
 
-            bool const forceInline = (pParam->methAttr & CORINFO_FLG_FORCEINLINE) != 0;
+        // Profile data allows us to avoid early "too many IL bytes" outs.
+        //
+        inlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE_WEIGHTS,
+                               compiler->fgHaveSufficientProfileWeights());
+        inlineResult->NoteBool(InlineObservation::CALLSITE_INSIDE_THROW_BLOCK, compiler->compCurBB->KindIs(BBJ_THROW));
 
-            compiler->impCanInlineIL(ftn, &methInfo, forceInline, inlineResult);
+        bool const forceInline = (pParam->methAttr & CORINFO_FLG_FORCEINLINE) != 0;
 
-            if (inlineResult->IsFailure())
-            {
-                assert(inlineResult->IsNever());
-                return;
-            }
+        compiler->impCanInlineIL(ftn, &methInfo, forceInline, inlineResult);
 
-            // Speculatively check if initClass() can be done.
-            // If it can be done, we will try to inline the method.
-            CorInfoInitClassResult const initClassResult =
-                compCompHnd->initClass(nullptr /* field */, ftn /* method */, pParam->exactContextHnd /* context */);
+        if (inlineResult->IsFailure())
+        {
+            assert(inlineResult->IsNever());
+            return;
+        }
 
-            if (initClassResult & CORINFO_INITCLASS_DONT_INLINE)
-            {
-                inlineResult->NoteFatal(InlineObservation::CALLSITE_CANT_CLASS_INIT);
-                return;
-            }
+        // Speculatively check if initClass() can be done.
+        // If it can be done, we will try to inline the method.
+        CorInfoInitClassResult const initClassResult =
+            compCompHnd->initClass(nullptr /* field */, ftn /* method */, pParam->exactContextHnd /* context */);
 
-            // Given the VM the final say in whether to inline or not.
-            // This should be last since for verifiable code, this can be expensive
-            //
-            CorInfoInline const vmResult = compCompHnd->canInline(compiler->info.compMethodHnd, ftn);
+        if (initClassResult & CORINFO_INITCLASS_DONT_INLINE)
+        {
+            inlineResult->NoteFatal(InlineObservation::CALLSITE_CANT_CLASS_INIT);
+            return;
+        }
 
-            if (vmResult == INLINE_FAIL)
-            {
-                inlineResult->NoteFatal(InlineObservation::CALLSITE_IS_VM_NOINLINE);
-            }
-            else if (vmResult == INLINE_NEVER)
-            {
-                inlineResult->NoteFatal(InlineObservation::CALLEE_IS_VM_NOINLINE);
-            }
+        // Given the VM the final say in whether to inline or not.
+        // This should be last since for verifiable code, this can be expensive
+        //
+        CorInfoInline const vmResult = compCompHnd->canInline(compiler->info.compMethodHnd, ftn);
 
-            if (inlineResult->IsFailure())
-            {
-                // The VM already self-reported this failure, so mark it specially
-                // so the JIT doesn't also try reporting it.
-                //
-                inlineResult->SetVMFailure();
-                return;
-            }
+        if (vmResult == INLINE_FAIL)
+        {
+            inlineResult->NoteFatal(InlineObservation::CALLSITE_IS_VM_NOINLINE);
+        }
+        else if (vmResult == INLINE_NEVER)
+        {
+            inlineResult->NoteFatal(InlineObservation::CALLEE_IS_VM_NOINLINE);
+        }
 
-            // Get the method's class properties
+        if (inlineResult->IsFailure())
+        {
+            // The VM already self-reported this failure, so mark it specially
+            // so the JIT doesn't also try reporting it.
             //
-            CORINFO_CLASS_HANDLE clsHandle = compCompHnd->getMethodClass(ftn);
-            unsigned const       clsAttr   = compCompHnd->getClassAttribs(clsHandle);
+            inlineResult->SetVMFailure();
+            return;
+        }
 
-            // Return type
-            //
-            var_types const fncRetType = pParam->call->TypeGet();
+        // Get the method's class properties
+        //
+        CORINFO_CLASS_HANDLE clsHandle = compCompHnd->getMethodClass(ftn);
+        unsigned const       clsAttr   = compCompHnd->getClassAttribs(clsHandle);
+
+        // Return type
+        //
+        var_types const fncRetType = pParam->call->TypeGet();
 
 #ifdef DEBUG
-            var_types fncRealRetType = JITtype2varType(methInfo.args.retType);
+        var_types fncRealRetType = JITtype2varType(methInfo.args.retType);
 
-            assert((genActualType(fncRealRetType) == genActualType(fncRetType)) ||
-                   // <BUGNUM> VSW 288602 </BUGNUM>
-                   // In case of IJW, we allow to assign a native pointer to a BYREF.
-                   (fncRetType == TYP_BYREF && methInfo.args.retType == CORINFO_TYPE_PTR) ||
-                   (varTypeIsStruct(fncRetType) && (fncRealRetType == TYP_STRUCT)));
+        assert((genActualType(fncRealRetType) == genActualType(fncRetType)) ||
+               // <BUGNUM> VSW 288602 </BUGNUM>
+               // In case of IJW, we allow to assign a native pointer to a BYREF.
+               (fncRetType == TYP_BYREF && methInfo.args.retType == CORINFO_TYPE_PTR) ||
+               (varTypeIsStruct(fncRetType) && (fncRealRetType == TYP_STRUCT)));
 #endif
 
-            // Allocate an InlineCandidateInfo structure,
-            //
-            // Or, reuse the existing GuardedDevirtualizationCandidateInfo,
-            // which was pre-allocated to have extra room.
-            //
-            InlineCandidateInfo* pInfo;
+        // Allocate an InlineCandidateInfo structure,
+        //
+        // Or, reuse the existing GuardedDevirtualizationCandidateInfo,
+        // which was pre-allocated to have extra room.
+        //
+        InlineCandidateInfo* pInfo;
 
-            if (pParam->call->IsGuardedDevirtualizationCandidate())
-            {
-                pInfo = pParam->call->GetGDVCandidateInfo(pParam->candidateIndex);
-            }
-            else
-            {
-                pInfo = new (pParam->pThis, CMK_Inlining) InlineCandidateInfo;
+        if (pParam->call->IsGuardedDevirtualizationCandidate())
+        {
+            pInfo = pParam->call->GetGDVCandidateInfo(pParam->candidateIndex);
+        }
+        else
+        {
+            pInfo = new (pParam->pThis, CMK_Inlining) InlineCandidateInfo;
 
-                // Null out bits we don't use when we're just inlining
-                //
-                pInfo->guardedClassHandle              = nullptr;
-                pInfo->guardedMethodHandle             = nullptr;
-                pInfo->guardedMethodUnboxedEntryHandle = nullptr;
-                pInfo->likelihood                      = 0;
-                pInfo->requiresInstMethodTableArg      = false;
-            }
-
-            pInfo->methInfo                       = methInfo;
-            pInfo->ilCallerHandle                 = pParam->pThis->info.compMethodHnd;
-            pInfo->clsHandle                      = clsHandle;
-            pInfo->exactContextHnd                = pParam->exactContextHnd;
-            pInfo->retExpr                        = nullptr;
-            pInfo->preexistingSpillTemp           = BAD_VAR_NUM;
-            pInfo->clsAttr                        = clsAttr;
-            pInfo->methAttr                       = pParam->methAttr;
-            pInfo->initClassResult                = initClassResult;
-            pInfo->fncRetType                     = fncRetType;
-            pInfo->exactContextNeedsRuntimeLookup = false;
-            pInfo->inlinersContext                = pParam->pThis->compInlineContext;
-
-            // Note exactContextNeedsRuntimeLookup is reset later on,
-            // over in impMarkInlineCandidate.
+            // Null out bits we don't use when we're just inlining
             //
-            *(pParam->ppInlineCandidateInfo) = pInfo;
-        },
+            pInfo->guardedClassHandle              = nullptr;
+            pInfo->guardedMethodHandle             = nullptr;
+            pInfo->guardedMethodUnboxedEntryHandle = nullptr;
+            pInfo->likelihood                      = 0;
+            pInfo->requiresInstMethodTableArg      = false;
+        }
+
+        pInfo->methInfo                       = methInfo;
+        pInfo->ilCallerHandle                 = pParam->pThis->info.compMethodHnd;
+        pInfo->clsHandle                      = clsHandle;
+        pInfo->exactContextHnd                = pParam->exactContextHnd;
+        pInfo->retExpr                        = nullptr;
+        pInfo->preexistingSpillTemp           = BAD_VAR_NUM;
+        pInfo->clsAttr                        = clsAttr;
+        pInfo->methAttr                       = pParam->methAttr;
+        pInfo->initClassResult                = initClassResult;
+        pInfo->fncRetType                     = fncRetType;
+        pInfo->exactContextNeedsRuntimeLookup = false;
+        pInfo->inlinersContext                = pParam->pThis->compInlineContext;
+
+        // Note exactContextNeedsRuntimeLookup is reset later on,
+        // over in impMarkInlineCandidate.
+        //
+        *(pParam->ppInlineCandidateInfo) = pInfo;
+    },
         &param);
 
     if (!success)
@@ -8866,13 +9297,6 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
                             result = NI_System_BitConverter_Int64BitsToDouble;
                         }
                     }
-                    else if (strcmp(className, "Buffer") == 0)
-                    {
-                        if (strcmp(methodName, "Memmove") == 0)
-                        {
-                            result = NI_System_Buffer_Memmove;
-                        }
-                    }
                     break;
                 }
 
@@ -8943,6 +9367,10 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
                         {
                             result = NI_System_MemoryExtensions_StartsWith;
                         }
+                        else if (strcmp(methodName, "EndsWith") == 0)
+                        {
+                            result = NI_System_MemoryExtensions_EndsWith;
+                        }
                     }
                     break;
                 }
@@ -9020,6 +9448,18 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
                         {
                             result = NI_System_SpanHelpers_SequenceEqual;
                         }
+                        else if (strcmp(methodName, "Fill") == 0)
+                        {
+                            result = NI_System_SpanHelpers_Fill;
+                        }
+                        else if (strcmp(methodName, "ClearWithoutReferences") == 0)
+                        {
+                            result = NI_System_SpanHelpers_ClearWithoutReferences;
+                        }
+                        else if (strcmp(methodName, "Memmove") == 0)
+                        {
+                            result = NI_System_SpanHelpers_Memmove;
+                        }
                     }
                     else if (strcmp(className, "String") == 0)
                     {
@@ -9043,6 +9483,10 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
                         {
                             result = NI_System_String_StartsWith;
                         }
+                        else if (strcmp(methodName, "EndsWith") == 0)
+                        {
+                            result = NI_System_String_EndsWith;
+                        }
                     }
                     break;
                 }
@@ -9131,372 +9575,372 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
             else
 #endif // defined(TARGET_XARCH) || defined(TARGET_ARM64)
                 if (strcmp(namespaceName, "Collections.Generic") == 0)
-            {
-                if (strcmp(className, "Comparer`1") == 0)
                 {
-                    if (strcmp(methodName, "get_Default") == 0)
+                    if (strcmp(className, "Comparer`1") == 0)
                     {
-                        result = NI_System_Collections_Generic_Comparer_get_Default;
+                        if (strcmp(methodName, "get_Default") == 0)
+                        {
+                            result = NI_System_Collections_Generic_Comparer_get_Default;
+                        }
                     }
-                }
-                else if (strcmp(className, "EqualityComparer`1") == 0)
-                {
-                    if (strcmp(methodName, "get_Default") == 0)
+                    else if (strcmp(className, "EqualityComparer`1") == 0)
                     {
-                        result = NI_System_Collections_Generic_EqualityComparer_get_Default;
+                        if (strcmp(methodName, "get_Default") == 0)
+                        {
+                            result = NI_System_Collections_Generic_EqualityComparer_get_Default;
+                        }
                     }
                 }
-            }
-            else if (strcmp(namespaceName, "Numerics") == 0)
-            {
-                if (strcmp(className, "BitOperations") == 0)
-                {
-                    result = lookupPrimitiveIntNamedIntrinsic(method, methodName);
-                }
-                else
+                else if (strcmp(namespaceName, "Numerics") == 0)
                 {
+                    if (strcmp(className, "BitOperations") == 0)
+                    {
+                        result = lookupPrimitiveIntNamedIntrinsic(method, methodName);
+                    }
+                    else
+                    {
 #ifdef FEATURE_HW_INTRINSICS
-                    CORINFO_SIG_INFO sig;
-                    info.compCompHnd->getMethodSig(method, &sig);
+                        CORINFO_SIG_INFO sig;
+                        info.compCompHnd->getMethodSig(method, &sig);
 
-                    result = SimdAsHWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName);
+                        result = SimdAsHWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName);
 #endif // FEATURE_HW_INTRINSICS
 
-                    if (result == NI_Illegal)
-                    {
-                        // This allows the relevant code paths to be dropped as dead code even
-                        // on platforms where FEATURE_HW_INTRINSICS is not supported.
-
-                        if (strcmp(methodName, "get_IsSupported") == 0)
-                        {
-                            assert(strcmp(className, "Vector`1") == 0);
-                            result = NI_IsSupported_Type;
-                        }
-                        else if (strcmp(methodName, "get_IsHardwareAccelerated") == 0)
+                        if (result == NI_Illegal)
                         {
-                            result = NI_IsSupported_False;
-                        }
-                        else if (strcmp(methodName, "get_Count") == 0)
-                        {
-                            assert(strcmp(className, "Vector`1") == 0);
-                            result = NI_Vector_GetCount;
-                        }
-                        else if (gtIsRecursiveCall(method))
-                        {
-                            // For the framework itself, any recursive intrinsics will either be
-                            // only supported on a single platform or will be guarded by a relevant
-                            // IsSupported check so the throw PNSE will be valid or dropped.
+                            // This allows the relevant code paths to be dropped as dead code even
+                            // on platforms where FEATURE_HW_INTRINSICS is not supported.
+
+                            if (strcmp(methodName, "get_IsSupported") == 0)
+                            {
+                                assert(strcmp(className, "Vector`1") == 0);
+                                result = NI_IsSupported_Type;
+                            }
+                            else if (strcmp(methodName, "get_IsHardwareAccelerated") == 0)
+                            {
+                                result = NI_IsSupported_False;
+                            }
+                            else if (strcmp(methodName, "get_Count") == 0)
+                            {
+                                assert(strcmp(className, "Vector`1") == 0);
+                                result = NI_Vector_GetCount;
+                            }
+                            else if (gtIsRecursiveCall(method))
+                            {
+                                // For the framework itself, any recursive intrinsics will either be
+                                // only supported on a single platform or will be guarded by a relevant
+                                // IsSupported check so the throw PNSE will be valid or dropped.
 
-                            result = NI_Throw_PlatformNotSupportedException;
+                                result = NI_Throw_PlatformNotSupportedException;
+                            }
                         }
                     }
                 }
-            }
-            else if (strncmp(namespaceName, "Runtime.", 8) == 0)
-            {
-                namespaceName += 8;
-
-                if (strcmp(namespaceName, "CompilerServices") == 0)
+                else if (strncmp(namespaceName, "Runtime.", 8) == 0)
                 {
-                    if (strcmp(className, "RuntimeHelpers") == 0)
+                    namespaceName += 8;
+
+                    if (strcmp(namespaceName, "CompilerServices") == 0)
                     {
-                        if (strcmp(methodName, "CreateSpan") == 0)
-                        {
-                            result = NI_System_Runtime_CompilerServices_RuntimeHelpers_CreateSpan;
-                        }
-                        else if (strcmp(methodName, "InitializeArray") == 0)
+                        if (strcmp(className, "RuntimeHelpers") == 0)
                         {
-                            result = NI_System_Runtime_CompilerServices_RuntimeHelpers_InitializeArray;
+                            if (strcmp(methodName, "CreateSpan") == 0)
+                            {
+                                result = NI_System_Runtime_CompilerServices_RuntimeHelpers_CreateSpan;
+                            }
+                            else if (strcmp(methodName, "InitializeArray") == 0)
+                            {
+                                result = NI_System_Runtime_CompilerServices_RuntimeHelpers_InitializeArray;
+                            }
+                            else if (strcmp(methodName, "IsKnownConstant") == 0)
+                            {
+                                result = NI_System_Runtime_CompilerServices_RuntimeHelpers_IsKnownConstant;
+                            }
                         }
-                        else if (strcmp(methodName, "IsKnownConstant") == 0)
+                        else if (strcmp(className, "Unsafe") == 0)
                         {
-                            result = NI_System_Runtime_CompilerServices_RuntimeHelpers_IsKnownConstant;
+                            if (strcmp(methodName, "Add") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_Add;
+                            }
+                            else if (strcmp(methodName, "AddByteOffset") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_AddByteOffset;
+                            }
+                            else if (strcmp(methodName, "AreSame") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_AreSame;
+                            }
+                            else if (strcmp(methodName, "As") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_As;
+                            }
+                            else if (strcmp(methodName, "AsPointer") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_AsPointer;
+                            }
+                            else if (strcmp(methodName, "AsRef") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_AsRef;
+                            }
+                            else if (strcmp(methodName, "BitCast") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_BitCast;
+                            }
+                            else if (strcmp(methodName, "ByteOffset") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_ByteOffset;
+                            }
+                            else if (strcmp(methodName, "Copy") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_Copy;
+                            }
+                            else if (strcmp(methodName, "CopyBlock") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_CopyBlock;
+                            }
+                            else if (strcmp(methodName, "CopyBlockUnaligned") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_CopyBlockUnaligned;
+                            }
+                            else if (strcmp(methodName, "InitBlock") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_InitBlock;
+                            }
+                            else if (strcmp(methodName, "InitBlockUnaligned") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_InitBlockUnaligned;
+                            }
+                            else if (strcmp(methodName, "IsAddressGreaterThan") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_IsAddressGreaterThan;
+                            }
+                            else if (strcmp(methodName, "IsAddressLessThan") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_IsAddressLessThan;
+                            }
+                            else if (strcmp(methodName, "IsNullRef") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_IsNullRef;
+                            }
+                            else if (strcmp(methodName, "NullRef") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_NullRef;
+                            }
+                            else if (strcmp(methodName, "Read") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_Read;
+                            }
+                            else if (strcmp(methodName, "ReadUnaligned") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_ReadUnaligned;
+                            }
+                            else if (strcmp(methodName, "SizeOf") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_SizeOf;
+                            }
+                            else if (strcmp(methodName, "SkipInit") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_SkipInit;
+                            }
+                            else if (strcmp(methodName, "Subtract") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_Subtract;
+                            }
+                            else if (strcmp(methodName, "SubtractByteOffset") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_SubtractByteOffset;
+                            }
+                            else if (strcmp(methodName, "Unbox") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_Unbox;
+                            }
+                            else if (strcmp(methodName, "Write") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_Write;
+                            }
+                            else if (strcmp(methodName, "WriteUnaligned") == 0)
+                            {
+                                result = NI_SRCS_UNSAFE_WriteUnaligned;
+                            }
                         }
                     }
-                    else if (strcmp(className, "Unsafe") == 0)
+                    else if (strcmp(namespaceName, "InteropServices") == 0)
                     {
-                        if (strcmp(methodName, "Add") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_Add;
-                        }
-                        else if (strcmp(methodName, "AddByteOffset") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_AddByteOffset;
-                        }
-                        else if (strcmp(methodName, "AreSame") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_AreSame;
-                        }
-                        else if (strcmp(methodName, "As") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_As;
-                        }
-                        else if (strcmp(methodName, "AsPointer") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_AsPointer;
-                        }
-                        else if (strcmp(methodName, "AsRef") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_AsRef;
-                        }
-                        else if (strcmp(methodName, "BitCast") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_BitCast;
-                        }
-                        else if (strcmp(methodName, "ByteOffset") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_ByteOffset;
-                        }
-                        else if (strcmp(methodName, "Copy") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_Copy;
-                        }
-                        else if (strcmp(methodName, "CopyBlock") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_CopyBlock;
-                        }
-                        else if (strcmp(methodName, "CopyBlockUnaligned") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_CopyBlockUnaligned;
-                        }
-                        else if (strcmp(methodName, "InitBlock") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_InitBlock;
-                        }
-                        else if (strcmp(methodName, "InitBlockUnaligned") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_InitBlockUnaligned;
-                        }
-                        else if (strcmp(methodName, "IsAddressGreaterThan") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_IsAddressGreaterThan;
-                        }
-                        else if (strcmp(methodName, "IsAddressLessThan") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_IsAddressLessThan;
-                        }
-                        else if (strcmp(methodName, "IsNullRef") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_IsNullRef;
-                        }
-                        else if (strcmp(methodName, "NullRef") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_NullRef;
-                        }
-                        else if (strcmp(methodName, "Read") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_Read;
-                        }
-                        else if (strcmp(methodName, "ReadUnaligned") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_ReadUnaligned;
-                        }
-                        else if (strcmp(methodName, "SizeOf") == 0)
+                        if (strcmp(className, "MemoryMarshal") == 0)
                         {
-                            result = NI_SRCS_UNSAFE_SizeOf;
-                        }
-                        else if (strcmp(methodName, "SkipInit") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_SkipInit;
-                        }
-                        else if (strcmp(methodName, "Subtract") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_Subtract;
-                        }
-                        else if (strcmp(methodName, "SubtractByteOffset") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_SubtractByteOffset;
-                        }
-                        else if (strcmp(methodName, "Unbox") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_Unbox;
-                        }
-                        else if (strcmp(methodName, "Write") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_Write;
-                        }
-                        else if (strcmp(methodName, "WriteUnaligned") == 0)
-                        {
-                            result = NI_SRCS_UNSAFE_WriteUnaligned;
+                            if (strcmp(methodName, "GetArrayDataReference") == 0)
+                            {
+                                result = NI_System_Runtime_InteropService_MemoryMarshal_GetArrayDataReference;
+                            }
                         }
                     }
-                }
-                else if (strcmp(namespaceName, "InteropServices") == 0)
-                {
-                    if (strcmp(className, "MemoryMarshal") == 0)
+                    else if (strncmp(namespaceName, "Intrinsics", 10) == 0)
                     {
-                        if (strcmp(methodName, "GetArrayDataReference") == 0)
-                        {
-                            result = NI_System_Runtime_InteropService_MemoryMarshal_GetArrayDataReference;
-                        }
-                    }
-                }
-                else if (strncmp(namespaceName, "Intrinsics", 10) == 0)
-                {
-                    // We go down this path even when FEATURE_HW_INTRINSICS isn't enabled
-                    // so we can specially handle IsSupported and recursive calls.
-
-                    // This is required to appropriately handle the intrinsics on platforms
-                    // which don't support them. On such a platform methods like Vector64.Create
-                    // will be seen as `Intrinsic` and `mustExpand` due to having a code path
-                    // which is recursive. When such a path is hit we expect it to be handled by
-                    // the importer and we fire an assert if it wasn't and in previous versions
-                    // of the JIT would fail fast. This was changed to throw a PNSE instead but
-                    // we still assert as most intrinsics should have been recognized/handled.
-
-                    // In order to avoid the assert, we specially handle the IsSupported checks
-                    // (to better allow dead-code optimizations) and we explicitly throw a PNSE
-                    // as we know that is the desired behavior for the HWIntrinsics when not
-                    // supported. For cases like Vector64.Create, this is fine because it will
-                    // be behind a relevant IsSupported check and will never be hit and the
-                    // software fallback will be executed instead.
-
-                    CLANG_FORMAT_COMMENT_ANCHOR;
+                        // We go down this path even when FEATURE_HW_INTRINSICS isn't enabled
+                        // so we can specially handle IsSupported and recursive calls.
+
+                        // This is required to appropriately handle the intrinsics on platforms
+                        // which don't support them. On such a platform methods like Vector64.Create
+                        // will be seen as `Intrinsic` and `mustExpand` due to having a code path
+                        // which is recursive. When such a path is hit we expect it to be handled by
+                        // the importer and we fire an assert if it wasn't and in previous versions
+                        // of the JIT would fail fast. This was changed to throw a PNSE instead but
+                        // we still assert as most intrinsics should have been recognized/handled.
+
+                        // In order to avoid the assert, we specially handle the IsSupported checks
+                        // (to better allow dead-code optimizations) and we explicitly throw a PNSE
+                        // as we know that is the desired behavior for the HWIntrinsics when not
+                        // supported. For cases like Vector64.Create, this is fine because it will
+                        // be behind a relevant IsSupported check and will never be hit and the
+                        // software fallback will be executed instead.
 
 #ifdef FEATURE_HW_INTRINSICS
-                    namespaceName += 10;
-                    const char* platformNamespaceName;
+                        namespaceName += 10;
+                        const char* platformNamespaceName;
 
 #if defined(TARGET_XARCH)
-                    platformNamespaceName = ".X86";
+                        platformNamespaceName = ".X86";
 #elif defined(TARGET_ARM64)
-                    platformNamespaceName = ".Arm";
+                        platformNamespaceName = ".Arm";
 #else
 #error Unsupported platform
 #endif
 
-                    if ((namespaceName[0] == '\0') || (strcmp(namespaceName, platformNamespaceName) == 0))
-                    {
-                        CORINFO_SIG_INFO sig;
-                        info.compCompHnd->getMethodSig(method, &sig);
+                        if ((namespaceName[0] == '\0') || (strcmp(namespaceName, platformNamespaceName) == 0))
+                        {
+                            CORINFO_SIG_INFO sig;
+                            info.compCompHnd->getMethodSig(method, &sig);
 
-                        result = HWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName);
-                    }
+                            result = HWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName);
+                        }
 #endif // FEATURE_HW_INTRINSICS
 
-                    if (result == NI_Illegal)
-                    {
-                        // This allows the relevant code paths to be dropped as dead code even
-                        // on platforms where FEATURE_HW_INTRINSICS is not supported.
-
-                        if (strcmp(methodName, "get_IsSupported") == 0)
+                        if (result == NI_Illegal)
                         {
-                            if (strncmp(className, "Vector", 6) == 0)
+                            // This allows the relevant code paths to be dropped as dead code even
+                            // on platforms where FEATURE_HW_INTRINSICS is not supported.
+
+                            if (strcmp(methodName, "get_IsSupported") == 0)
                             {
-                                assert(
-                                    (strcmp(className, "Vector64`1") == 0) || (strcmp(className, "Vector128`1") == 0) ||
-                                    (strcmp(className, "Vector256`1") == 0) || (strcmp(className, "Vector512`1") == 0));
+                                if (strncmp(className, "Vector", 6) == 0)
+                                {
+                                    assert((strcmp(className, "Vector64`1") == 0) ||
+                                           (strcmp(className, "Vector128`1") == 0) ||
+                                           (strcmp(className, "Vector256`1") == 0) ||
+                                           (strcmp(className, "Vector512`1") == 0));
 
-                                result = NI_IsSupported_Type;
+                                    result = NI_IsSupported_Type;
+                                }
+                                else
+                                {
+                                    result = NI_IsSupported_False;
+                                }
                             }
-                            else
+                            else if (strcmp(methodName, "get_IsHardwareAccelerated") == 0)
                             {
                                 result = NI_IsSupported_False;
                             }
-                        }
-                        else if (strcmp(methodName, "get_IsHardwareAccelerated") == 0)
-                        {
-                            result = NI_IsSupported_False;
-                        }
-                        else if (strcmp(methodName, "get_Count") == 0)
-                        {
-                            assert((strcmp(className, "Vector64`1") == 0) || (strcmp(className, "Vector128`1") == 0) ||
-                                   (strcmp(className, "Vector256`1") == 0) || (strcmp(className, "Vector512`1") == 0));
+                            else if (strcmp(methodName, "get_Count") == 0)
+                            {
+                                assert(
+                                    (strcmp(className, "Vector64`1") == 0) || (strcmp(className, "Vector128`1") == 0) ||
+                                    (strcmp(className, "Vector256`1") == 0) || (strcmp(className, "Vector512`1") == 0));
 
-                            result = NI_Vector_GetCount;
-                        }
-                        else if (gtIsRecursiveCall(method))
-                        {
-                            // For the framework itself, any recursive intrinsics will either be
-                            // only supported on a single platform or will be guarded by a relevant
-                            // IsSupported check so the throw PNSE will be valid or dropped.
+                                result = NI_Vector_GetCount;
+                            }
+                            else if (gtIsRecursiveCall(method))
+                            {
+                                // For the framework itself, any recursive intrinsics will either be
+                                // only supported on a single platform or will be guarded by a relevant
+                                // IsSupported check so the throw PNSE will be valid or dropped.
 
-                            result = NI_Throw_PlatformNotSupportedException;
+                                result = NI_Throw_PlatformNotSupportedException;
+                            }
                         }
                     }
                 }
-            }
-            else if (strcmp(namespaceName, "StubHelpers") == 0)
-            {
-                if (strcmp(className, "StubHelpers") == 0)
+                else if (strcmp(namespaceName, "StubHelpers") == 0)
                 {
-                    if (strcmp(methodName, "GetStubContext") == 0)
+                    if (strcmp(className, "StubHelpers") == 0)
                     {
-                        result = NI_System_StubHelpers_GetStubContext;
-                    }
-                    else if (strcmp(methodName, "NextCallReturnAddress") == 0)
-                    {
-                        result = NI_System_StubHelpers_NextCallReturnAddress;
-                    }
-                }
-            }
-            else if (strcmp(namespaceName, "Text") == 0)
-            {
-                if (strcmp(className, "UTF8EncodingSealed") == 0)
-                {
-                    if (strcmp(methodName, "ReadUtf8") == 0)
-                    {
-                        assert(strcmp(enclosingClassName, "UTF8Encoding") == 0);
-                        result = NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8;
+                        if (strcmp(methodName, "GetStubContext") == 0)
+                        {
+                            result = NI_System_StubHelpers_GetStubContext;
+                        }
+                        else if (strcmp(methodName, "NextCallReturnAddress") == 0)
+                        {
+                            result = NI_System_StubHelpers_NextCallReturnAddress;
+                        }
                     }
                 }
-            }
-            else if (strcmp(namespaceName, "Threading") == 0)
-            {
-                if (strcmp(className, "Interlocked") == 0)
+                else if (strcmp(namespaceName, "Text") == 0)
                 {
-                    if (strcmp(methodName, "And") == 0)
-                    {
-                        result = NI_System_Threading_Interlocked_And;
-                    }
-                    else if (strcmp(methodName, "Or") == 0)
-                    {
-                        result = NI_System_Threading_Interlocked_Or;
-                    }
-                    else if (strcmp(methodName, "CompareExchange") == 0)
-                    {
-                        result = NI_System_Threading_Interlocked_CompareExchange;
-                    }
-                    else if (strcmp(methodName, "Exchange") == 0)
+                    if (strcmp(className, "UTF8EncodingSealed") == 0)
                     {
-                        result = NI_System_Threading_Interlocked_Exchange;
-                    }
-                    else if (strcmp(methodName, "ExchangeAdd") == 0)
-                    {
-                        result = NI_System_Threading_Interlocked_ExchangeAdd;
-                    }
-                    else if (strcmp(methodName, "MemoryBarrier") == 0)
-                    {
-                        result = NI_System_Threading_Interlocked_MemoryBarrier;
-                    }
-                    else if (strcmp(methodName, "ReadMemoryBarrier") == 0)
-                    {
-                        result = NI_System_Threading_Interlocked_ReadMemoryBarrier;
+                        if (strcmp(methodName, "ReadUtf8") == 0)
+                        {
+                            assert(strcmp(enclosingClassName, "UTF8Encoding") == 0);
+                            result = NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8;
+                        }
                     }
                 }
-                else if (strcmp(className, "Thread") == 0)
+                else if (strcmp(namespaceName, "Threading") == 0)
                 {
-                    if (strcmp(methodName, "get_CurrentThread") == 0)
-                    {
-                        result = NI_System_Threading_Thread_get_CurrentThread;
-                    }
-                    else if (strcmp(methodName, "get_ManagedThreadId") == 0)
+                    if (strcmp(className, "Interlocked") == 0)
                     {
-                        result = NI_System_Threading_Thread_get_ManagedThreadId;
+                        if (strcmp(methodName, "And") == 0)
+                        {
+                            result = NI_System_Threading_Interlocked_And;
+                        }
+                        else if (strcmp(methodName, "Or") == 0)
+                        {
+                            result = NI_System_Threading_Interlocked_Or;
+                        }
+                        else if (strcmp(methodName, "CompareExchange") == 0)
+                        {
+                            result = NI_System_Threading_Interlocked_CompareExchange;
+                        }
+                        else if (strcmp(methodName, "Exchange") == 0)
+                        {
+                            result = NI_System_Threading_Interlocked_Exchange;
+                        }
+                        else if (strcmp(methodName, "ExchangeAdd") == 0)
+                        {
+                            result = NI_System_Threading_Interlocked_ExchangeAdd;
+                        }
+                        else if (strcmp(methodName, "MemoryBarrier") == 0)
+                        {
+                            result = NI_System_Threading_Interlocked_MemoryBarrier;
+                        }
+                        else if (strcmp(methodName, "ReadMemoryBarrier") == 0)
+                        {
+                            result = NI_System_Threading_Interlocked_ReadMemoryBarrier;
+                        }
                     }
-                }
-                else if (strcmp(className, "Volatile") == 0)
-                {
-                    if (strcmp(methodName, "Read") == 0)
+                    else if (strcmp(className, "Thread") == 0)
                     {
-                        result = NI_System_Threading_Volatile_Read;
+                        if (strcmp(methodName, "get_CurrentThread") == 0)
+                        {
+                            result = NI_System_Threading_Thread_get_CurrentThread;
+                        }
+                        else if (strcmp(methodName, "get_ManagedThreadId") == 0)
+                        {
+                            result = NI_System_Threading_Thread_get_ManagedThreadId;
+                        }
                     }
-                    else if (strcmp(methodName, "Write") == 0)
+                    else if (strcmp(className, "Volatile") == 0)
                     {
-                        result = NI_System_Threading_Volatile_Write;
+                        if (strcmp(methodName, "Read") == 0)
+                        {
+                            result = NI_System_Threading_Volatile_Read;
+                        }
+                        else if (strcmp(methodName, "Write") == 0)
+                        {
+                            result = NI_System_Threading_Volatile_Write;
+                        }
                     }
                 }
-            }
         }
     }
     else if (strcmp(namespaceName, "Internal.Runtime") == 0)
@@ -9653,10 +10097,6 @@ NamedIntrinsic Compiler::lookupPrimitiveFloatNamedIntrinsic(CORINFO_METHOD_HANDL
             {
                 result = NI_System_Math_Floor;
             }
-            else if (strcmp(methodName, "FMod") == 0)
-            {
-                result = NI_System_Math_FMod;
-            }
             else if (strcmp(methodName, "FusedMultiplyAdd") == 0)
             {
                 result = NI_System_Math_FusedMultiplyAdd;
@@ -10028,7 +10468,7 @@ GenTree* Compiler::impArrayAccessIntrinsic(
 
     if (intrinsicName == NI_Array_Set)
     {
-        // Assignment of a struct is more work, and there are more gets than sets.
+        // Stores of structs require more work, and there are more gets than sets.
         // TODO-CQ: support SET (`a[i,j,k] = s`) for struct element arrays.
         if (varTypeIsStruct(elemType))
         {
diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp
index 01ee4916d4ee..dddc14dec3b9 100644
--- a/src/coreclr/jit/importervectorization.cpp
+++ b/src/coreclr/jit/importervectorization.cpp
@@ -25,6 +25,10 @@
 //   8) MemoryExtensions.StartsWith<char>(ROS<char>, ROS<char>)
 //   9) MemoryExtensions.StartsWith(ROS<char>, ROS<char>, Ordinal or OrdinalIgnoreCase)
 //
+//   10) str.EndsWith(string, Ordinal or OrdinalIgnoreCase)
+//   11) MemoryExtensions.EndsWith<char>(ROS<char>, ROS<char>)
+//   12) MemoryExtensions.EndsWith(ROS<char>, ROS<char>, Ordinal or OrdinalIgnoreCase)
+//
 // When one of the arguments is a constant string of a [0..32] size so we can inline
 // a vectorized comparison against it using SWAR or SIMD techniques (e.g. via two V256 vectors)
 //
@@ -178,7 +182,7 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD(
         xor1 = gtNewSimdBinOpNode(GT_XOR, simdType, vec1, cnsVec1, baseType, simdSize);
     }
 
-// ((v1 ^ cns1) | (v2 ^ cns2)) == zero
+    // ((v1 ^ cns1) | (v2 ^ cns2)) == zero
 
 #if defined(TARGET_XARCH)
     if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
@@ -278,12 +282,12 @@ GenTree* Compiler::impCreateCompareInd(GenTreeLclVarCommon*  obj,
     }
 
     GenTree* valueTree = gtNewIconNode(value, actualType);
-    if (joint == Xor)
+    if (joint == StringComparisonJoint::Xor)
     {
         // XOR is better than CMP if we want to join multiple comparisons
         return gtNewOperNode(GT_XOR, actualType, indirTree, valueTree);
     }
-    assert(joint == Eq);
+    assert(joint == StringComparisonJoint::Eq);
     return gtNewOperNode(GT_EQ, TYP_INT, indirTree, valueTree);
 }
 
@@ -313,7 +317,7 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR(
     assert(len >= 1 && len <= 8);
 
 // Compose Int32 or Int64 values from ushort components
-#define MAKEINT32(c1, c2) ((UINT64)c2 << 16) | ((UINT64)c1 << 0)
+#define MAKEINT32(c1, c2)         ((UINT64)c2 << 16) | ((UINT64)c1 << 0)
 #define MAKEINT64(c1, c2, c3, c4) ((UINT64)c4 << 48) | ((UINT64)c3 << 32) | ((UINT64)c2 << 16) | ((UINT64)c1 << 0)
 
     if (len == 1)
@@ -342,11 +346,13 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR(
         //
         // where offset for value2 is 2 bytes (1 char)
         //
-        UINT32   value1      = MAKEINT32(cns[0], cns[1]);
-        UINT32   value2      = MAKEINT32(cns[1], cns[2]);
-        GenTree* firstIndir  = impCreateCompareInd(data, TYP_INT, dataOffset, value1, cmpMode, Xor);
-        GenTree* secondIndir = impCreateCompareInd(gtClone(data)->AsLclVarCommon(), TYP_INT,
-                                                   dataOffset + sizeof(USHORT), value2, cmpMode, Xor);
+        UINT32   value1 = MAKEINT32(cns[0], cns[1]);
+        UINT32   value2 = MAKEINT32(cns[1], cns[2]);
+        GenTree* firstIndir =
+            impCreateCompareInd(data, TYP_INT, dataOffset, value1, cmpMode, StringComparisonJoint::Xor);
+        GenTree* secondIndir =
+            impCreateCompareInd(gtClone(data)->AsLclVarCommon(), TYP_INT, dataOffset + sizeof(USHORT), value2, cmpMode,
+                                StringComparisonJoint::Xor);
 
         if ((firstIndir == nullptr) || (secondIndir == nullptr))
         {
@@ -377,12 +383,13 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR(
     // For 5..6 the overlapping part is 4 bytes
     if (len <= 6)
     {
-        UINT32   value2     = MAKEINT32(cns[len - 2], cns[len - 1]);
-        GenTree* firstIndir = impCreateCompareInd(data, TYP_LONG, dataOffset, value1, cmpMode, Xor);
+        UINT32   value2 = MAKEINT32(cns[len - 2], cns[len - 1]);
+        GenTree* firstIndir =
+            impCreateCompareInd(data, TYP_LONG, dataOffset, value1, cmpMode, StringComparisonJoint::Xor);
 
-        ssize_t  offset = dataOffset + len * sizeof(WCHAR) - sizeof(UINT32);
-        GenTree* secondIndir =
-            impCreateCompareInd(gtClone(data)->AsLclVarCommon(), TYP_INT, offset, value2, cmpMode, Xor);
+        ssize_t  offset      = dataOffset + len * sizeof(WCHAR) - sizeof(UINT32);
+        GenTree* secondIndir = impCreateCompareInd(gtClone(data)->AsLclVarCommon(), TYP_INT, offset, value2, cmpMode,
+                                                   StringComparisonJoint::Xor);
 
         if ((firstIndir == nullptr) || (secondIndir == nullptr))
         {
@@ -398,10 +405,11 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR(
     assert((len == 7) || (len == 8));
 
     UINT64   value2     = MAKEINT64(cns[len - 4], cns[len - 3], cns[len - 2], cns[len - 1]);
-    GenTree* firstIndir = impCreateCompareInd(data, TYP_LONG, dataOffset, value1, cmpMode, Xor);
+    GenTree* firstIndir = impCreateCompareInd(data, TYP_LONG, dataOffset, value1, cmpMode, StringComparisonJoint::Xor);
 
     ssize_t  offset      = dataOffset + len * sizeof(WCHAR) - sizeof(UINT64);
-    GenTree* secondIndir = impCreateCompareInd(gtClone(data)->AsLclVarCommon(), TYP_LONG, offset, value2, cmpMode, Xor);
+    GenTree* secondIndir = impCreateCompareInd(gtClone(data)->AsLclVarCommon(), TYP_LONG, offset, value2, cmpMode,
+                                               StringComparisonJoint::Xor);
 
     if ((firstIndir == nullptr) || (secondIndir == nullptr))
     {
@@ -426,7 +434,7 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR(
 //    data         - Pointer (LCL_VAR) to a data to vectorize
 //    lengthFld    - Pointer (LCL_VAR or GT_IND) to Length field
 //    checkForNull - Check data for null
-//    startsWith   - Is it StartsWith or Equals?
+//    kind         - Is it StartsWith, Equals or EndsWith?
 //    cns          - Constant data (array of 2-byte chars)
 //    len          - Number of 2-byte chars in the cns
 //    dataOffset   - Offset for data
@@ -439,7 +447,7 @@ GenTree* Compiler::impExpandHalfConstEqualsSWAR(
 GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data,
                                             GenTree*             lengthFld,
                                             bool                 checkForNull,
-                                            bool                 startsWith,
+                                            StringComparisonKind kind,
                                             WCHAR*               cnsData,
                                             int                  len,
                                             int                  dataOffset,
@@ -454,14 +462,14 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data,
         return nullptr;
     }
 
-    const genTreeOps cmpOp         = startsWith ? GT_GE : GT_EQ;
+    const genTreeOps cmpOp         = kind == StringComparisonKind::Equals ? GT_EQ : GT_GE;
     GenTree*         elementsCount = gtNewIconNode(len);
     GenTree*         lenCheckNode;
     if (len == 0)
     {
         // For zero length we don't need to compare content, the following expression is enough:
         //
-        //   varData != null && lengthFld == 0
+        //   varData != null && lengthFld cmpOp 0
         //
         lenCheckNode = gtNewOperNode(cmpOp, TYP_INT, lengthFld, elementsCount);
     }
@@ -469,15 +477,26 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data,
     {
         assert(cnsData != nullptr);
 
+        GenTreeLclVarCommon* dataAddr = gtClone(data)->AsLclVarCommon();
+
+        if (kind == StringComparisonKind::EndsWith)
+        {
+            // For EndsWith we need to adjust dataAddr to point to the end of the string minus value's length
+            // We spawn a local that we're going to set below
+            unsigned dataTmp         = lvaGrabTemp(true DEBUGARG("clonning data ptr"));
+            lvaTable[dataTmp].lvType = TYP_BYREF;
+            dataAddr                 = gtNewLclvNode(dataTmp, TYP_BYREF);
+        }
+
         GenTree* indirCmp = nullptr;
         if (len < 8) // SWAR impl supports len == 8 but we'd better give it to SIMD
         {
-            indirCmp = impExpandHalfConstEqualsSWAR(gtClone(data)->AsLclVarCommon(), cnsData, len, dataOffset, cmpMode);
+            indirCmp = impExpandHalfConstEqualsSWAR(dataAddr, cnsData, len, dataOffset, cmpMode);
         }
 #if defined(FEATURE_HW_INTRINSICS)
         else if (IsBaselineSimdIsaSupported())
         {
-            indirCmp = impExpandHalfConstEqualsSIMD(gtClone(data)->AsLclVarCommon(), cnsData, len, dataOffset, cmpMode);
+            indirCmp = impExpandHalfConstEqualsSIMD(dataAddr, cnsData, len, dataOffset, cmpMode);
         }
 #endif
 
@@ -488,9 +507,24 @@ GenTree* Compiler::impExpandHalfConstEquals(GenTreeLclVarCommon* data,
         }
         assert(indirCmp->TypeIs(TYP_INT, TYP_UBYTE));
 
+        if (kind == StringComparisonKind::EndsWith)
+        {
+            // len is expected to be small, so no overflow is possible
+            assert(!CheckedOps::MulOverflows(len, 2, CheckedOps::Signed));
+
+            // dataAddr = dataAddr + (length * 2 - len * 2)
+            GenTree*   castedLen = gtNewCastNode(TYP_I_IMPL, gtCloneExpr(lengthFld), false, TYP_I_IMPL);
+            GenTree*   byteLen   = gtNewOperNode(GT_MUL, TYP_I_IMPL, castedLen, gtNewIconNode(2, TYP_I_IMPL));
+            GenTreeOp* cmpStart  = gtNewOperNode(GT_ADD, TYP_BYREF, gtClone(data),
+                                                 gtNewOperNode(GT_SUB, TYP_I_IMPL, byteLen,
+                                                               gtNewIconNode((ssize_t)(len * 2), TYP_I_IMPL)));
+            GenTree*   storeTmp  = gtNewTempStore(dataAddr->GetLclNum(), cmpStart);
+            indirCmp             = gtNewOperNode(GT_COMMA, indirCmp->TypeGet(), storeTmp, indirCmp);
+        }
+
         GenTreeColon* lenCheckColon = gtNewColonNode(TYP_INT, indirCmp, gtNewFalse());
 
-        // For StartsWith we use GT_GE, e.g.: `x.Length >= 10`
+        // For StartsWith/EndsWith we use GT_GE, e.g.: `x.Length >= 10`
         lenCheckNode = gtNewQmarkNode(TYP_INT, gtNewOperNode(cmpOp, TYP_INT, lengthFld, elementsCount), lenCheckColon);
     }
 
@@ -556,7 +590,7 @@ GenTreeStrCon* Compiler::impGetStrConFromSpan(GenTree* span)
 }
 
 //------------------------------------------------------------------------
-// impStringEqualsOrStartsWith: The main entry-point for String methods
+// impUtf16StringComparison: The main entry-point for String methods
 //   We're going to unroll & vectorize the following cases:
 //    1) String.Equals(obj, "cns")
 //    2) String.Equals(obj, "cns", Ordinal or OrdinalIgnoreCase)
@@ -570,18 +604,21 @@ GenTreeStrCon* Compiler::impGetStrConFromSpan(GenTree* span)
 //    9) obj.StartsWith("cns", Ordinal or OrdinalIgnoreCase)
 //   10) "cns".StartsWith(obj, Ordinal or OrdinalIgnoreCase)
 //
+//   11) obj.EndsWith("cns", Ordinal or OrdinalIgnoreCase)
+//   12) "cns".EndsWith(obj, Ordinal or OrdinalIgnoreCase)
+//
 //   For cases 5, 6 and 9 we don't emit "obj != null"
 //   NOTE: String.Equals(object) is not supported currently
 //
 // Arguments:
-//    startsWith  - Is it StartsWith or Equals?
-//    sig         - signature of StartsWith or Equals method
+//    kind        - Is it StartsWith, EndsWith or Equals?
+//    sig         - signature of StartsWith, EndsWith or Equals method
 //    methodFlags - its flags
 //
 // Returns:
 //    GenTree representing vectorized comparison or nullptr
 //
-GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO* sig, unsigned methodFlags)
+GenTree* Compiler::impUtf16StringComparison(StringComparisonKind kind, CORINFO_SIG_INFO* sig, unsigned methodFlags)
 {
     const bool isStatic  = methodFlags & CORINFO_FLG_STATIC;
     const int  argsCount = sig->numArgs + (isStatic ? 0 : 1);
@@ -589,7 +626,7 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO
     // This optimization spawns several temps so make sure we have a room
     if (lvaHaveManyLocals(0.75))
     {
-        JITDUMP("impSpanEqualsOrStartsWith: Method has too many locals - bail out.\n")
+        JITDUMP("impUtf16StringComparison: Method has too many locals - bail out.\n")
         return nullptr;
     }
 
@@ -630,9 +667,9 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO
     }
     else
     {
-        if (startsWith)
+        if (kind != StringComparisonKind::Equals)
         {
-            // StartsWith is not commutative
+            // StartsWith and EndsWith are not commutative
             return nullptr;
         }
         cnsStr = op1->AsStrCon();
@@ -647,6 +684,7 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO
         //  obj.Equals("cns")
         //  obj.Equals("cns", Ordinal or OrdinalIgnoreCase)
         //  obj.StartsWith("cns", Ordinal or OrdinalIgnoreCase)
+        //  obj.EndsWith("cns", Ordinal or OrdinalIgnoreCase)
         //
         // instead, it should throw NRE if it's null
         needsNullcheck = false;
@@ -658,7 +696,7 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO
     {
         // check for fake "" first
         cnsLength = 0;
-        JITDUMP("Trying to unroll String.Equals|StartsWith(op1, \"\")...\n", str)
+        JITDUMP("Trying to unroll String.Equals|StartsWith|EndsWith(op1, \"\")...\n", str)
     }
     else
     {
@@ -668,7 +706,7 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO
             // We were unable to get the literal (e.g. dynamic context)
             return nullptr;
         }
-        JITDUMP("Trying to unroll String.Equals|StartsWith(op1, \"cns\")...\n")
+        JITDUMP("Trying to unroll String.Equals|StartsWith|EndsWith(op1, \"cns\")...\n")
     }
 
     // Create a temp which is safe to gtClone for varStr
@@ -682,16 +720,16 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO
     GenTree* lenNode      = gtNewArrLen(TYP_INT, varStrLcl, strLenOffset, compCurBB);
     varStrLcl             = gtClone(varStrLcl)->AsLclVar();
 
-    GenTree* unrolled = impExpandHalfConstEquals(varStrLcl, lenNode, needsNullcheck, startsWith, (WCHAR*)str, cnsLength,
+    GenTree* unrolled = impExpandHalfConstEquals(varStrLcl, lenNode, needsNullcheck, kind, (WCHAR*)str, cnsLength,
                                                  strLenOffset + sizeof(int), cmpMode);
     if (unrolled != nullptr)
     {
-        impStoreTemp(varStrTmp, varStr, CHECK_SPILL_NONE);
+        impStoreToTemp(varStrTmp, varStr, CHECK_SPILL_NONE);
         if (unrolled->OperIs(GT_QMARK))
         {
             // QMARK nodes cannot reside on the evaluation stack
             unsigned rootTmp = lvaGrabTemp(true DEBUGARG("spilling unroll qmark"));
-            impStoreTemp(rootTmp, unrolled, CHECK_SPILL_NONE);
+            impStoreToTemp(rootTmp, unrolled, CHECK_SPILL_NONE);
             unrolled = gtNewLclvNode(rootTmp, TYP_INT);
         }
 
@@ -706,7 +744,7 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO
 }
 
 //------------------------------------------------------------------------
-// impSpanEqualsOrStartsWith: The main entry-point for [ReadOnly]Span<char> methods
+// impUtf16SpanComparison: The main entry-point for [ReadOnly]Span<char> methods
 //    We're going to unroll & vectorize the following cases:
 //    1) MemoryExtensions.SequenceEqual<char>(var, "cns")
 //    2) MemoryExtensions.SequenceEqual<char>("cns", var)
@@ -717,15 +755,20 @@ GenTree* Compiler::impStringEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO
 //    7) MemoryExtensions.StartsWith("cns", var, Ordinal or OrdinalIgnoreCase)
 //    8) MemoryExtensions.StartsWith(var, "cns", Ordinal or OrdinalIgnoreCase)
 //
+//    9) MemoryExtensions.EndsWith<char>("cns", var)
+//    10) MemoryExtensions.EndsWith<char>(var, "cns")
+//    11) MemoryExtensions.EndsWith("cns", var, Ordinal or OrdinalIgnoreCase)
+//    12) MemoryExtensions.EndsWith(var, "cns", Ordinal or OrdinalIgnoreCase)
+//
 // Arguments:
-//    startsWith  - Is it StartsWith or Equals?
-//    sig         - signature of StartsWith or Equals method
+//    kind        - Is it StartsWith, EndsWith or Equals?
+//    sig         - signature of StartsWith, EndsWith or Equals method
 //    methodFlags - its flags
 //
 // Returns:
 //    GenTree representing vectorized comparison or nullptr
 //
-GenTree* Compiler::impSpanEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO* sig, unsigned methodFlags)
+GenTree* Compiler::impUtf16SpanComparison(StringComparisonKind kind, CORINFO_SIG_INFO* sig, unsigned methodFlags)
 {
     const bool isStatic  = methodFlags & CORINFO_FLG_STATIC;
     const int  argsCount = sig->numArgs + (isStatic ? 0 : 1);
@@ -733,7 +776,7 @@ GenTree* Compiler::impSpanEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO*
     // This optimization spawns several temps so make sure we have a room
     if (lvaHaveManyLocals(0.75))
     {
-        JITDUMP("impSpanEqualsOrStartsWith: Method has too many locals - bail out.\n")
+        JITDUMP("impUtf16SpanComparison: Method has too many locals - bail out.\n")
         return nullptr;
     }
 
@@ -760,7 +803,7 @@ GenTree* Compiler::impSpanEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO*
         op2 = impStackTop(0).val;
     }
 
-    // For generic StartsWith and Equals we need to make sure T is char
+    // For generic StartsWith, EndsWith and Equals we need to make sure T is char
     if (sig->sigInst.methInstCount != 0)
     {
         assert(sig->sigInst.methInstCount == 1);
@@ -790,9 +833,9 @@ GenTree* Compiler::impSpanEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO*
     }
     else
     {
-        if (startsWith)
+        if (kind != StringComparisonKind::Equals)
         {
-            // StartsWith is not commutative
+            // StartsWith and EndsWith are not commutative
             return nullptr;
         }
         cnsStr  = op1Str;
@@ -835,21 +878,21 @@ GenTree* Compiler::impSpanEqualsOrStartsWith(bool startsWith, CORINFO_SIG_INFO*
 
     GenTreeLclFld* spanReferenceFld = gtNewLclFldNode(spanLclNum, TYP_BYREF, OFFSETOF__CORINFO_Span__reference);
     GenTreeLclFld* spanLengthFld    = gtNewLclFldNode(spanLclNum, TYP_INT, OFFSETOF__CORINFO_Span__length);
-    GenTree*       unrolled = impExpandHalfConstEquals(spanReferenceFld, spanLengthFld, false, startsWith, (WCHAR*)str,
-                                                 cnsLength, 0, cmpMode);
+    GenTree*       unrolled =
+        impExpandHalfConstEquals(spanReferenceFld, spanLengthFld, false, kind, (WCHAR*)str, cnsLength, 0, cmpMode);
 
     if (unrolled != nullptr)
     {
         if (!spanObj->OperIs(GT_LCL_VAR))
         {
-            impStoreTemp(spanLclNum, spanObj, CHECK_SPILL_NONE);
+            impStoreToTemp(spanLclNum, spanObj, CHECK_SPILL_NONE);
         }
 
         if (unrolled->OperIs(GT_QMARK))
         {
             // QMARK can't be a root node, spill it to a temp
             unsigned rootTmp = lvaGrabTemp(true DEBUGARG("spilling unroll qmark"));
-            impStoreTemp(rootTmp, unrolled, CHECK_SPILL_NONE);
+            impStoreToTemp(rootTmp, unrolled, CHECK_SPILL_NONE);
             unrolled = gtNewLclvNode(rootTmp, TYP_INT);
         }
 
diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp
index f3fc562edb51..ae9335513c9e 100644
--- a/src/coreclr/jit/indirectcalltransformer.cpp
+++ b/src/coreclr/jit/indirectcalltransformer.cpp
@@ -67,7 +67,8 @@
 class IndirectCallTransformer
 {
 public:
-    IndirectCallTransformer(Compiler* compiler) : compiler(compiler)
+    IndirectCallTransformer(Compiler* compiler)
+        : compiler(compiler)
     {
     }
 
@@ -157,7 +158,9 @@ class IndirectCallTransformer
     {
     public:
         Transformer(Compiler* compiler, BasicBlock* block, Statement* stmt)
-            : compiler(compiler), currBlock(block), stmt(stmt)
+            : compiler(compiler)
+            , currBlock(block)
+            , stmt(stmt)
         {
             remainderBlock = nullptr;
             checkBlock     = nullptr;
@@ -197,7 +200,7 @@ class IndirectCallTransformer
         virtual const char*  Name()                       = 0;
         virtual void         ClearFlag()                  = 0;
         virtual GenTreeCall* GetCall(Statement* callStmt) = 0;
-        virtual void FixupRetExpr()                       = 0;
+        virtual void         FixupRetExpr()               = 0;
 
         //------------------------------------------------------------------------
         // CreateRemainder: split current block at the call stmt and
@@ -207,6 +210,10 @@ class IndirectCallTransformer
         {
             remainderBlock = compiler->fgSplitBlockAfterStatement(currBlock, stmt);
             remainderBlock->SetFlags(BBF_INTERNAL);
+
+            // We will be adding more blocks after currBlock, so remove edge to remainderBlock.
+            //
+            compiler->fgRemoveRefPred(currBlock->GetTargetEdge());
         }
 
         virtual void CreateCheck(uint8_t checkIdx) = 0;
@@ -218,13 +225,12 @@ class IndirectCallTransformer
         // Arguments:
         //    jumpKind - jump kind for the new basic block
         //    insertAfter - basic block, after which compiler has to insert the new one.
-        //    jumpDest - jump target for the new basic block. Defaults to nullptr.
         //
         // Return Value:
         //    new basic block.
-        BasicBlock* CreateAndInsertBasicBlock(BBKinds jumpKind, BasicBlock* insertAfter, BasicBlock* jumpDest = nullptr)
+        BasicBlock* CreateAndInsertBasicBlock(BBKinds jumpKind, BasicBlock* insertAfter)
         {
-            BasicBlock* block = compiler->fgNewBBafter(jumpKind, insertAfter, true, jumpDest);
+            BasicBlock* block = compiler->fgNewBBafter(jumpKind, insertAfter, true);
             block->SetFlags(BBF_IMPORTED);
             return block;
         }
@@ -267,37 +273,35 @@ class IndirectCallTransformer
             assert(compiler->fgPredsComputed);
 
             // currBlock
-            compiler->fgRemoveRefPred(remainderBlock, currBlock);
-
             if (checkBlock != currBlock)
             {
                 assert(currBlock->KindIs(BBJ_ALWAYS));
-                currBlock->SetTarget(checkBlock);
                 FlowEdge* const newEdge = compiler->fgAddRefPred(checkBlock, currBlock);
-                newEdge->setLikelihood(1.0);
+                currBlock->SetTargetEdge(newEdge);
             }
 
             // checkBlock
             // Todo: get likelihoods right
             //
             assert(checkBlock->KindIs(BBJ_ALWAYS));
-            checkBlock->SetCond(elseBlock, thenBlock);
             FlowEdge* const thenEdge = compiler->fgAddRefPred(thenBlock, checkBlock);
             thenEdge->setLikelihood(0.5);
             FlowEdge* const elseEdge = compiler->fgAddRefPred(elseBlock, checkBlock);
             elseEdge->setLikelihood(0.5);
+            checkBlock->SetCond(elseEdge, thenEdge);
 
             // thenBlock
-            assert(thenBlock->TargetIs(remainderBlock));
             {
+                assert(thenBlock->KindIs(BBJ_ALWAYS));
                 FlowEdge* const newEdge = compiler->fgAddRefPred(remainderBlock, thenBlock);
-                newEdge->setLikelihood(1.0);
+                thenBlock->SetTargetEdge(newEdge);
             }
 
             // elseBlock
             {
+                assert(elseBlock->KindIs(BBJ_ALWAYS));
                 FlowEdge* const newEdge = compiler->fgAddRefPred(remainderBlock, elseBlock);
-                newEdge->setLikelihood(1.0);
+                elseBlock->SetTargetEdge(newEdge);
             }
         }
 
@@ -376,8 +380,7 @@ class IndirectCallTransformer
         {
             assert(checkIdx == 0);
 
-            checkBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, currBlock, currBlock->Next());
-            checkBlock->SetFlags(BBF_NONE_QUIRK);
+            checkBlock                 = CreateAndInsertBasicBlock(BBJ_ALWAYS, currBlock);
             GenTree*   fatPointerMask  = new (compiler, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, FAT_POINTER_MASK);
             GenTree*   fptrAddressCopy = compiler->gtCloneExpr(fptrAddress);
             GenTree*   fatPointerAnd   = compiler->gtNewOperNode(GT_AND, TYP_I_IMPL, fptrAddressCopy, fatPointerMask);
@@ -395,7 +398,7 @@ class IndirectCallTransformer
         virtual void CreateThen(uint8_t checkIdx)
         {
             assert(remainderBlock != nullptr);
-            thenBlock                     = CreateAndInsertBasicBlock(BBJ_ALWAYS, checkBlock, remainderBlock);
+            thenBlock                     = CreateAndInsertBasicBlock(BBJ_ALWAYS, checkBlock);
             Statement* copyOfOriginalStmt = compiler->gtCloneStmt(stmt);
             compiler->fgInsertStmtAtEnd(thenBlock, copyOfOriginalStmt);
         }
@@ -405,8 +408,7 @@ class IndirectCallTransformer
         //
         virtual void CreateElse()
         {
-            elseBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, thenBlock, thenBlock->Next());
-            elseBlock->SetFlags(BBF_NONE_QUIRK);
+            elseBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, thenBlock);
 
             GenTree* fixedFptrAddress  = GetFixedFptrAddress();
             GenTree* actualCallAddress = compiler->gtNewIndir(pointerType, fixedFptrAddress);
@@ -481,7 +483,8 @@ class IndirectCallTransformer
     {
     public:
         GuardedDevirtualizationTransformer(Compiler* compiler, BasicBlock* block, Statement* stmt)
-            : Transformer(compiler, block, stmt), returnTemp(BAD_VAR_NUM)
+            : Transformer(compiler, block, stmt)
+            , returnTemp(BAD_VAR_NUM)
         {
         }
 
@@ -574,10 +577,7 @@ class IndirectCallTransformer
         {
             assert(compiler->fgPredsComputed);
 
-            // currBlock
-            compiler->fgRemoveRefPred(remainderBlock, currBlock);
-
-            // The rest of chaining is done in-place.
+            // Chaining is done in-place.
         }
 
         virtual void SetWeights()
@@ -608,23 +608,24 @@ class IndirectCallTransformer
                 checkBlock                 = CreateAndInsertBasicBlock(BBJ_ALWAYS, thenBlock);
                 checkFallsThrough          = false;
 
-                // Calculate the total likelihood for this check as a sum of likelihoods
-                // of all previous candidates (thenBlocks)
-                unsigned checkLikelihood = 100;
-                for (uint8_t previousCandidate = 0; previousCandidate < checkIdx; previousCandidate++)
-                {
-                    checkLikelihood -= origCall->GetGDVCandidateInfo(previousCandidate)->likelihood;
-                }
+                // We computed the "then" likelihood in CreateThen, so we
+                // just use that to figure out the "else" likelihood.
+                //
+                assert(prevCheckBlock->KindIs(BBJ_ALWAYS));
+                assert(prevCheckBlock->JumpsToNext());
+                FlowEdge* const prevCheckThenEdge = prevCheckBlock->GetTargetEdge();
+                weight_t        checkLikelihood   = max(0.0, 1.0 - prevCheckThenEdge->getLikelihood());
 
-                // Make sure we didn't overflow
-                assert(checkLikelihood <= 100);
-                weight_t checkLikelihoodWt = ((weight_t)checkLikelihood) / 100.0;
+                JITDUMP("Level %u Check block " FMT_BB " success likelihood " FMT_WT "\n", checkIdx, checkBlock->bbNum,
+                        checkLikelihood);
 
                 // prevCheckBlock is expected to jump to this new check (if its type check doesn't succeed)
-                prevCheckBlock->SetCond(checkBlock, prevCheckBlock->Next());
-                FlowEdge* const checkEdge = compiler->fgAddRefPred(checkBlock, prevCheckBlock);
-                checkEdge->setLikelihood(checkLikelihoodWt);
-                checkBlock->inheritWeightPercentage(currBlock, checkLikelihood);
+                //
+                FlowEdge* const prevCheckCheckEdge = compiler->fgAddRefPred(checkBlock, prevCheckBlock);
+                prevCheckCheckEdge->setLikelihood(checkLikelihood);
+                checkBlock->inheritWeight(prevCheckBlock);
+                checkBlock->scaleBBWeight(checkLikelihood);
+                prevCheckBlock->SetCond(prevCheckCheckEdge, prevCheckThenEdge);
             }
 
             // Find last arg with a side effect. All args with any effect
@@ -742,7 +743,7 @@ class IndirectCallTransformer
         // SpillArgToTempBeforeGuard: spill an argument into a temp in the guard/check block.
         //
         // Parameters
-        //   arg - The arg to create a temp and assignment for.
+        //   arg - The arg to create a temp and local store for.
         //
         void SpillArgToTempBeforeGuard(CallArg* arg)
         {
@@ -1023,25 +1024,63 @@ class IndirectCallTransformer
         {
             // Compute likelihoods
             //
-            unsigned const thenLikelihood   = origCall->GetGDVCandidateInfo(checkIdx)->likelihood;
-            weight_t       thenLikelihoodWt = min(((weight_t)thenLikelihood) / 100.0, 100.0);
-            weight_t       elseLikelihoodWt = max(1.0 - thenLikelihoodWt, 0.0);
+            // If this is the first check the likelihood is just the candidate likelihood.
+            // If there are multiple checks things are a bit more complicated.
+            //
+            // Say we had three candidates with likelihoods 0.5, 0.3, and 0.1.
+            //
+            // The first one's likelihood is 0.5.
+            //
+            // The second one (given that we've already checked the first and failed)
+            // is (0.3) / (1.0 - 0.5) = 0.6.
+            //
+            // The third one is (0.1) / (1.0 - (0.5 + 0.3)) = (0.1)/(0.2) = 0.5
+            //
+            // So to figure out the proper divisor, we start with 1.0 and subtract off each
+            // preceeding test's likelihood of success.
+            //
+            unsigned const thenLikelihood = origCall->GetGDVCandidateInfo(checkIdx)->likelihood;
+            unsigned       baseLikelihood = 0;
+
+            for (uint8_t i = 0; i < checkIdx; i++)
+            {
+                baseLikelihood += origCall->GetGDVCandidateInfo(i)->likelihood;
+            }
+            assert(baseLikelihood < 100);
+            baseLikelihood = 100 - baseLikelihood;
+
+            weight_t adjustedThenLikelihood = min(((weight_t)thenLikelihood) / baseLikelihood, 100.0);
+            JITDUMP("For check in " FMT_BB ": orig likelihood " FMT_WT ", base likelihood " FMT_WT
+                    ", adjusted likelihood " FMT_WT "\n",
+                    checkBlock->bbNum, (weight_t)thenLikelihood / 100.0, (weight_t)baseLikelihood / 100.0,
+                    adjustedThenLikelihood);
 
             // thenBlock always jumps to remainderBlock
-            thenBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, checkBlock, remainderBlock);
+            //
+            thenBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, checkBlock);
             thenBlock->CopyFlags(currBlock, BBF_SPLIT_GAINED);
-            thenBlock->inheritWeightPercentage(currBlock, thenLikelihood);
+            thenBlock->inheritWeight(currBlock);
+            thenBlock->scaleBBWeight(adjustedThenLikelihood);
+            FlowEdge* const thenRemainderEdge = compiler->fgAddRefPred(remainderBlock, thenBlock);
+            thenBlock->SetTargetEdge(thenRemainderEdge);
 
-            // Also, thenBlock has a single pred - last checkBlock
+            // thenBlock has a single pred - last checkBlock.
+            //
             assert(checkBlock->KindIs(BBJ_ALWAYS));
-            checkBlock->SetTarget(thenBlock);
-            checkBlock->SetFlags(BBF_NONE_QUIRK);
+            FlowEdge* const checkThenEdge = compiler->fgAddRefPred(thenBlock, checkBlock);
+            checkBlock->SetTargetEdge(checkThenEdge);
             assert(checkBlock->JumpsToNext());
-            FlowEdge* const thenEdge = compiler->fgAddRefPred(thenBlock, checkBlock);
-            thenEdge->setLikelihood(thenLikelihoodWt);
-            FlowEdge* const elseEdge = compiler->fgAddRefPred(remainderBlock, thenBlock);
-            elseEdge->setLikelihood(elseLikelihoodWt);
 
+            // SetTargetEdge() gave checkThenEdge a (correct) likelihood of 1.0.
+            // Later on, we might convert this checkBlock into a BBJ_COND.
+            // Since we have the adjusted likelihood calculated here, set it prematurely.
+            // If we leave this block as a BBJ_ALWAYS, we'll assert later that the likelihood is 1.0.
+            //
+            checkThenEdge->setLikelihood(adjustedThenLikelihood);
+
+            // We will set the "else edge" likelihood in CreateElse later,
+            // based on the thenEdge likelihood.
+            //
             DevirtualizeCall(thenBlock, checkIdx);
         }
 
@@ -1050,28 +1089,25 @@ class IndirectCallTransformer
         //
         virtual void CreateElse()
         {
-            // Calculate the likelihood of the else block as a remainder of the sum
-            // of all the other likelihoods.
-            unsigned elseLikelihood = 100;
-            for (uint8_t i = 0; i < origCall->GetInlineCandidatesCount(); i++)
-            {
-                elseLikelihood -= origCall->GetGDVCandidateInfo(i)->likelihood;
-            }
-            // Make sure it didn't overflow
-            assert(elseLikelihood <= 100);
-            weight_t elseLikelihoodDbl = ((weight_t)elseLikelihood) / 100.0;
-
-            elseBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, thenBlock, thenBlock->Next());
+            elseBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, thenBlock);
             elseBlock->CopyFlags(currBlock, BBF_SPLIT_GAINED);
-            elseBlock->SetFlags(BBF_NONE_QUIRK);
+
+            // We computed the "then" likelihood in CreateThen, so we
+            // just use that to figure out the "else" likelihood.
+            //
+            assert(checkBlock->KindIs(BBJ_ALWAYS));
+            FlowEdge* const checkThenEdge  = checkBlock->GetTargetEdge();
+            weight_t        elseLikelihood = max(0.0, 1.0 - checkThenEdge->getLikelihood());
 
             // CheckBlock flows into elseBlock unless we deal with the case
             // where we know the last check is always true (in case of "exact" GDV)
+            //
             if (!checkFallsThrough)
             {
-                checkBlock->SetCond(elseBlock, checkBlock->Next());
-                FlowEdge* const checkEdge = compiler->fgAddRefPred(elseBlock, checkBlock);
-                checkEdge->setLikelihood(elseLikelihoodDbl);
+                assert(checkBlock->JumpsToNext());
+                FlowEdge* const checkElseEdge = compiler->fgAddRefPred(elseBlock, checkBlock);
+                checkElseEdge->setLikelihood(elseLikelihood);
+                checkBlock->SetCond(checkElseEdge, checkThenEdge);
             }
             else
             {
@@ -1079,16 +1115,21 @@ class IndirectCallTransformer
                 // and is NativeAOT-only, we just assume the unreached block will be removed
                 // by other phases.
                 assert(origCall->gtCallMoreFlags & GTF_CALL_M_GUARDED_DEVIRT_EXACT);
+
+                // We aren't converting checkBlock to a BBJ_COND. Its successor edge likelihood should remain 1.0.
+                //
+                assert(checkThenEdge->getLikelihood() == 1.0);
             }
 
             // elseBlock always flows into remainderBlock
-            FlowEdge* const elseEdge = compiler->fgAddRefPred(remainderBlock, elseBlock);
-            elseEdge->setLikelihood(1.0);
+            FlowEdge* const elseRemainderEdge = compiler->fgAddRefPred(remainderBlock, elseBlock);
+            elseBlock->SetTargetEdge(elseRemainderEdge);
 
             // Remove everything related to inlining from the original call
             origCall->ClearInlineInfo();
 
-            elseBlock->inheritWeightPercentage(currBlock, elseLikelihood);
+            elseBlock->inheritWeight(checkBlock);
+            elseBlock->scaleBBWeight(elseLikelihood);
 
             GenTreeCall* call    = origCall;
             Statement*   newStmt = compiler->gtNewStmt(call, stmt->GetDebugInfo());
@@ -1183,12 +1224,10 @@ class IndirectCallTransformer
             // Finally, rewire the cold block to jump to the else block,
             // not fall through to the check block.
             //
-            FlowEdge* const oldEdge = compiler->fgRemoveRefPred(checkBlock, coldBlock);
-            coldBlock->SetKindAndTarget(BBJ_ALWAYS, elseBlock);
-            compiler->fgAddRefPred(elseBlock, coldBlock, oldEdge);
+            compiler->fgRedirectTargetEdge(coldBlock, elseBlock);
         }
 
-        // When the current candidate hads sufficiently high likelihood, scan
+        // When the current candidate has sufficiently high likelihood, scan
         // the remainer block looking for another GDV candidate.
         //
         // (also consider: if currBlock has sufficiently high execution frequency)
@@ -1231,7 +1270,9 @@ class IndirectCallTransformer
                 unsigned m_nodeCount;
 
                 ClonabilityVisitor(Compiler* compiler)
-                    : GenTreeVisitor(compiler), m_unclonableNode(nullptr), m_nodeCount(0)
+                    : GenTreeVisitor(compiler)
+                    , m_unclonableNode(nullptr)
+                    , m_nodeCount(0)
                 {
                 }
 
diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp
new file mode 100644
index 000000000000..a1ab0c58ecd9
--- /dev/null
+++ b/src/coreclr/jit/inductionvariableopts.cpp
@@ -0,0 +1,686 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// This file contains code to optimize induction variables in loops based on
+// scalar evolution analysis (see scev.h and scev.cpp for more information
+// about the scalar evolution analysis).
+//
+// Currently the only optimization done is widening of primary induction
+// variables from 32 bits into 64 bits. This is generally only profitable on
+// x64 that does not allow zero extension of 32-bit values in addressing modes
+// (in contrast, arm64 does have the capability of including zero extensions in
+// addressing modes). For x64 this saves a zero extension for every array
+// access inside the loop, in exchange for some widening or narrowing stores
+// outside the loop:
+//   - To make sure the new widened IV starts at the right value it is
+//   initialized to the value of the narrow IV outside the loop (either in the
+//   preheader or at the def location of the narrow IV). Usually the start
+//   value is a constant, in which case the widened IV is just initialized to
+//   the constant value.
+//   - If the narrow IV is used after the loop we need to store it back from
+//   the widened IV in the exits. We depend on liveness sets to figure out
+//   which exits to insert IR into.
+//
+// These steps ensure that the wide IV has the right value to begin with and
+// the old narrow IV still has the right value after the loop. Additionally,
+// we must replace every use of the narrow IV inside the loop with the widened
+// IV. This is done by a traversal of the IR inside the loop. We do not
+// actually widen the uses of the IV; rather, we keep all uses and defs as
+// 32-bit, which the backend is able to handle efficiently on x64. Because of
+// this we do not need to worry about overflow.
+//
+
+#include "jitpch.h"
+#include "scev.h"
+
+//------------------------------------------------------------------------
+// optCanSinkWidenedIV: Check to see if we are able to sink a store to the old
+// local into the exits of a loop if we decide to widen.
+//
+// Parameters:
+//   lclNum - The primary induction variable
+//   loop   - The loop
+//
+// Returns:
+//   True if we can sink a store to the old local after widening.
+//
+// Remarks:
+//   This handles the situation where the primary induction variable is used
+//   after the loop. In those cases we need to store the widened local back
+//   into the old one in the exits where the IV variable is live.
+//
+//   We are able to sink when none of the exits are critical blocks, in the
+//   sense that all their predecessors must come from inside the loop. Loop
+//   exit canonicalization guarantees this for regular exit blocks. It is not
+//   guaranteed for exceptional exits, but we do not expect to widen IVs that
+//   are live into exceptional exits since those are marked DNER which makes it
+//   unprofitable anyway.
+//
+//   Note that there may be natural loops that have not had their regular exits
+//   canonicalized at the time when IV opts run, in particular if RBO/assertion
+//   prop makes a previously unnatural loop natural. This function accounts for
+//   and rejects these cases.
+//
+bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop)
+{
+    LclVarDsc* dsc = lvaGetDesc(lclNum);
+
+    BasicBlockVisit result = loop->VisitRegularExitBlocks([=](BasicBlock* exit) {
+        if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex))
+        {
+            JITDUMP("  Exit " FMT_BB " does not need a sink; V%02u is not live-in\n", exit->bbNum, lclNum);
+            return BasicBlockVisit::Continue;
+        }
+
+        for (BasicBlock* pred : exit->PredBlocks())
+        {
+            if (!loop->ContainsBlock(pred))
+            {
+                JITDUMP("  Cannot safely sink widened version of V%02u into exit " FMT_BB " of " FMT_LP
+                        "; it has a non-loop pred " FMT_BB "\n",
+                        lclNum, exit->bbNum, loop->GetIndex(), pred->bbNum);
+                return BasicBlockVisit::Abort;
+            }
+        }
+
+        return BasicBlockVisit::Continue;
+    });
+
+#ifdef DEBUG
+    // We currently do not expect to ever widen IVs that are live into
+    // exceptional exits. Such IVs are expected to have been marked DNER
+    // previously (EH write-thru is only for single def locals) which makes it
+    // unprofitable. If this ever changes we need some more expansive handling
+    // here.
+    loop->VisitLoopBlocks([=](BasicBlock* block) {
+        block->VisitAllSuccs(this, [=](BasicBlock* succ) {
+            if (!loop->ContainsBlock(succ) && bbIsHandlerBeg(succ))
+            {
+                assert(!VarSetOps::IsMember(this, succ->bbLiveIn, dsc->lvVarIndex) &&
+                       "Candidate IV for widening is live into exceptional exit");
+            }
+
+            return BasicBlockVisit::Continue;
+        });
+
+        return BasicBlockVisit::Continue;
+    });
+#endif
+
+    return result != BasicBlockVisit::Abort;
+}
+
+//------------------------------------------------------------------------
+// optIsIVWideningProfitable: Check to see if IV widening is profitable.
+//
+// Parameters:
+//   lclNum           - The primary induction variable
+//   initBlock        - The block in where the new IV would be initialized
+//   initedToConstant - Whether or not the new IV will be initialized to a constant
+//   loop             - The loop
+//   ivUses           - Statements in which "lclNum" appears will be added to this list
+//
+//
+// Returns:
+//   True if IV widening is profitable.
+//
+// Remarks:
+//   IV widening is generally profitable when it allows us to remove casts
+//   inside the loop. However, it may also introduce other reg-reg moves:
+//     1. We may need to store the narrow IV into the wide one in the
+//     preheader. This is necessary when the start value is not constant. If
+//     the start value _is_ constant then we assume that the constant store to
+//     the narrow local will be a DCE'd.
+//     2. We need to store the wide IV back into the narrow one in each of
+//     the exits where the narrow IV is live-in.
+//
+bool Compiler::optIsIVWideningProfitable(unsigned                lclNum,
+                                         BasicBlock*             initBlock,
+                                         bool                    initedToConstant,
+                                         FlowGraphNaturalLoop*   loop,
+                                         ArrayStack<Statement*>& ivUses)
+{
+    for (FlowGraphNaturalLoop* otherLoop : m_loops->InReversePostOrder())
+    {
+        if (otherLoop == loop)
+            continue;
+
+        for (Statement* stmt : otherLoop->GetHeader()->Statements())
+        {
+            if (!stmt->IsPhiDefnStmt())
+                break;
+
+            if (stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() == lclNum)
+            {
+                JITDUMP("  V%02u has a phi [%06u] in " FMT_LP "'s header " FMT_BB "\n", lclNum,
+                        dspTreeID(stmt->GetRootNode()), otherLoop->GetIndex(), otherLoop->GetHeader()->bbNum);
+                // TODO-CQ: We can legally widen these cases, but LSRA is
+                // unhappy about some of the lifetimes we create when we do
+                // this. This particularly affects cloned loops.
+                return false;
+            }
+        }
+    }
+
+    const weight_t ExtensionCost = 2;
+    const int      ExtensionSize = 3;
+
+    weight_t savedCost = 0;
+    int      savedSize = 0;
+
+    loop->VisitLoopBlocks([&](BasicBlock* block) {
+        for (Statement* stmt : block->NonPhiStatements())
+        {
+            bool hasUse        = false;
+            int  numExtensions = 0;
+            for (GenTree* node : stmt->TreeList())
+            {
+                if (!node->OperIs(GT_CAST))
+                {
+                    hasUse |= node->OperIsLocal() && (node->AsLclVarCommon()->GetLclNum() == lclNum);
+                    continue;
+                }
+
+                GenTreeCast* cast = node->AsCast();
+                if ((cast->gtCastType != TYP_LONG) || !cast->IsUnsigned() || cast->gtOverflow())
+                {
+                    continue;
+                }
+
+                GenTree* op = cast->CastOp();
+                if (!op->OperIs(GT_LCL_VAR) || (op->AsLclVarCommon()->GetLclNum() != lclNum))
+                {
+                    continue;
+                }
+
+                // If this is already the source of a store then it is going to be
+                // free in our backends regardless.
+                GenTree* parent = node->gtGetParent(nullptr);
+                if ((parent != nullptr) && parent->OperIs(GT_STORE_LCL_VAR))
+                {
+                    continue;
+                }
+
+                numExtensions++;
+            }
+
+            if (hasUse)
+            {
+                ivUses.Push(stmt);
+            }
+
+            if (numExtensions > 0)
+            {
+                JITDUMP("  Found %d zero extensions in " FMT_STMT "\n", numExtensions, stmt->GetID());
+
+                savedSize += numExtensions * ExtensionSize;
+                savedCost += numExtensions * block->getBBWeight(this) * ExtensionCost;
+            }
+        }
+
+        return BasicBlockVisit::Continue;
+    });
+
+    if (!initedToConstant)
+    {
+        // We will need to store the narrow IV into the wide one in the init
+        // block. We only cost this when init value is not a constant since
+        // otherwise we assume that constant initialization of the narrow local
+        // will be DCE'd.
+        savedSize -= ExtensionSize;
+        savedCost -= initBlock->getBBWeight(this) * ExtensionCost;
+    }
+
+    // Now account for the cost of sinks.
+    LclVarDsc* dsc = lvaGetDesc(lclNum);
+    loop->VisitRegularExitBlocks([&](BasicBlock* exit) {
+        if (VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex))
+        {
+            savedSize -= ExtensionSize;
+            savedCost -= exit->getBBWeight(this) * ExtensionCost;
+        }
+        return BasicBlockVisit::Continue;
+    });
+
+    const weight_t ALLOWED_SIZE_REGRESSION_PER_CYCLE_IMPROVEMENT = 2;
+    weight_t       cycleImprovementPerInvoc                      = savedCost / fgFirstBB->getBBWeight(this);
+
+    JITDUMP("  Estimated cycle improvement: " FMT_WT " cycles per invocation\n", cycleImprovementPerInvoc);
+    JITDUMP("  Estimated size improvement: %d bytes\n", savedSize);
+
+    if ((cycleImprovementPerInvoc > 0) &&
+        ((cycleImprovementPerInvoc * ALLOWED_SIZE_REGRESSION_PER_CYCLE_IMPROVEMENT) >= -savedSize))
+    {
+        JITDUMP("    Widening is profitable (cycle improvement)\n");
+        return true;
+    }
+
+    const weight_t ALLOWED_CYCLE_REGRESSION_PER_SIZE_IMPROVEMENT = 0.01;
+
+    if ((savedSize > 0) && ((savedSize * ALLOWED_CYCLE_REGRESSION_PER_SIZE_IMPROVEMENT) >= -cycleImprovementPerInvoc))
+    {
+        JITDUMP("  Widening is profitable (size improvement)\n");
+        return true;
+    }
+
+    JITDUMP("  Widening is not profitable\n");
+    return false;
+}
+
+//------------------------------------------------------------------------
+// optSinkWidenedIV: Create stores back to the narrow IV in the exits where
+// that is necessary.
+//
+// Parameters:
+//   lclNum    - Narrow version of primary induction variable
+//   newLclNum - Wide version of primary induction variable
+//   loop      - The loop
+//
+// Returns:
+//   True if any store was created in any exit block.
+//
+void Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop)
+{
+    LclVarDsc* dsc = lvaGetDesc(lclNum);
+    loop->VisitRegularExitBlocks([=](BasicBlock* exit) {
+        if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex))
+        {
+            return BasicBlockVisit::Continue;
+        }
+
+        GenTree*   narrowing = gtNewCastNode(TYP_INT, gtNewLclvNode(newLclNum, TYP_LONG), false, TYP_INT);
+        GenTree*   store     = gtNewStoreLclVarNode(lclNum, narrowing);
+        Statement* newStmt   = fgNewStmtFromTree(store);
+        JITDUMP("Narrow IV local V%02u live into exit block " FMT_BB "; sinking a narrowing\n", lclNum, exit->bbNum);
+        DISPSTMT(newStmt);
+        fgInsertStmtAtBeg(exit, newStmt);
+
+        return BasicBlockVisit::Continue;
+    });
+}
+
+//------------------------------------------------------------------------
+// optReplaceWidenedIV: Replace uses of the narrow IV with the wide IV in the
+// specified statement.
+//
+// Parameters:
+//   lclNum    - Narrow version of primary induction variable
+//   newLclNum - Wide version of primary induction variable
+//   stmt      - The statement to replace uses in.
+//
+void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt)
+{
+    struct ReplaceVisitor : GenTreeVisitor<ReplaceVisitor>
+    {
+    private:
+        unsigned m_lclNum;
+        unsigned m_ssaNum;
+        unsigned m_newLclNum;
+
+        bool IsLocal(GenTreeLclVarCommon* tree)
+        {
+            return (tree->GetLclNum() == m_lclNum) &&
+                   ((m_ssaNum == SsaConfig::RESERVED_SSA_NUM) || (tree->GetSsaNum() == m_ssaNum));
+        }
+
+    public:
+        bool MadeChanges = false;
+
+        enum
+        {
+            DoPreOrder = true,
+        };
+
+        ReplaceVisitor(Compiler* comp, unsigned lclNum, unsigned ssaNum, unsigned newLclNum)
+            : GenTreeVisitor(comp)
+            , m_lclNum(lclNum)
+            , m_ssaNum(ssaNum)
+            , m_newLclNum(newLclNum)
+        {
+        }
+
+        fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
+        {
+            GenTree* node = *use;
+            if (node->OperIs(GT_CAST))
+            {
+                GenTreeCast* cast = node->AsCast();
+                if ((cast->gtCastType == TYP_LONG) && cast->IsUnsigned() && !cast->gtOverflow())
+                {
+                    GenTree* op = cast->CastOp();
+                    if (op->OperIs(GT_LCL_VAR) && IsLocal(op->AsLclVarCommon()))
+                    {
+                        *use        = m_compiler->gtNewLclvNode(m_newLclNum, TYP_LONG);
+                        MadeChanges = true;
+                        return fgWalkResult::WALK_SKIP_SUBTREES;
+                    }
+                }
+            }
+            else if (node->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR, GT_LCL_FLD, GT_STORE_LCL_FLD) &&
+                     IsLocal(node->AsLclVarCommon()))
+            {
+                switch (node->OperGet())
+                {
+                    case GT_LCL_VAR:
+                        node->AsLclVarCommon()->SetLclNum(m_newLclNum);
+                        // No cast needed -- the backend allows TYP_INT uses of TYP_LONG locals.
+                        break;
+                    case GT_STORE_LCL_VAR:
+                    {
+                        node->AsLclVarCommon()->SetLclNum(m_newLclNum);
+                        node->gtType = TYP_LONG;
+                        node->AsLclVarCommon()->Data() =
+                            m_compiler->gtNewCastNode(TYP_LONG, node->AsLclVarCommon()->Data(), true, TYP_LONG);
+                        break;
+                    }
+                    case GT_LCL_FLD:
+                    case GT_STORE_LCL_FLD:
+                        assert(!"Unexpected field use for local not marked as DNER");
+                        break;
+                    default:
+                        break;
+                }
+
+                MadeChanges = true;
+            }
+
+            return fgWalkResult::WALK_CONTINUE;
+        }
+    };
+
+    ReplaceVisitor visitor(this, lclNum, ssaNum, newLclNum);
+    visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
+    if (visitor.MadeChanges)
+    {
+        gtSetStmtInfo(stmt);
+        fgSetStmtSeq(stmt);
+        JITDUMP("New tree:\n", dspTreeID(stmt->GetRootNode()));
+        DISPTREE(stmt->GetRootNode());
+        JITDUMP("\n");
+    }
+    else
+    {
+        JITDUMP("No replacements made\n");
+    }
+}
+
+//------------------------------------------------------------------------
+// optBestEffortReplaceNarrowIVUses: Try to find and replace uses of the specified
+// SSA def with a new local.
+//
+// Parameters:
+//   lclNum    - Previous local
+//   ssaNum    - Previous local SSA num
+//   newLclNum - New local to replace with
+//   block     - Block to replace in
+//   firstStmt - First statement in "block" to start replacing in
+//
+// Remarks:
+//   This function is best effort; it might not find all uses of the provided
+//   SSA num, particularly because it does not follow into joins. Note that we
+//   only use this to replace uses of the narrow IV outside the loop; inside
+//   the loop we do ensure that all uses/defs are replaced.
+//   Keeping it best-effort outside the loop is ok; there is no correctness
+//   issue since we do not invalidate the value of the old narrow IV in any
+//   way, but it may mean we end up leaving the narrow IV live concurrently
+//   with the new widened IV, increasing register pressure.
+//
+void Compiler::optBestEffortReplaceNarrowIVUses(
+    unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt)
+{
+    JITDUMP("Replacing V%02u -> V%02u in " FMT_BB " starting at " FMT_STMT "\n", lclNum, newLclNum, block->bbNum,
+            firstStmt == nullptr ? 0 : firstStmt->GetID());
+
+    for (Statement* stmt = firstStmt; stmt != nullptr; stmt = stmt->GetNextStmt())
+    {
+        JITDUMP("Replacing V%02u -> V%02u in [%06u]\n", lclNum, newLclNum, dspTreeID(stmt->GetRootNode()));
+        DISPSTMT(stmt);
+        JITDUMP("\n");
+
+        optReplaceWidenedIV(lclNum, ssaNum, newLclNum, stmt);
+    }
+
+    block->VisitRegularSuccs(this, [=](BasicBlock* succ) {
+        if (succ->GetUniquePred(this) == block)
+        {
+            optBestEffortReplaceNarrowIVUses(lclNum, ssaNum, newLclNum, succ, succ->firstStmt());
+        }
+
+        return BasicBlockVisit::Continue;
+    });
+}
+
+//------------------------------------------------------------------------
+// optInductionVariables: Try and optimize induction variables in the method.
+//
+// Returns:
+//   PhaseStatus indicating if anything changed.
+//
+PhaseStatus Compiler::optInductionVariables()
+{
+    JITDUMP("*************** In optInductionVariables()\n");
+
+#ifdef DEBUG
+    static ConfigMethodRange s_range;
+    s_range.EnsureInit(JitConfig.JitEnableInductionVariableOptsRange());
+
+    if (!s_range.Contains(info.compMethodHash()))
+    {
+        return PhaseStatus::MODIFIED_NOTHING;
+    }
+#endif
+
+    if (!fgMightHaveNaturalLoops)
+    {
+        JITDUMP("  Skipping since this method has no natural loops\n");
+        return PhaseStatus::MODIFIED_NOTHING;
+    }
+
+    bool changed = false;
+
+    // Currently we only do IV widening which generally is only profitable for
+    // x64 because arm64 addressing modes can include the zero/sign-extension
+    // of the index for free.
+#if defined(TARGET_XARCH) && defined(TARGET_64BIT)
+    m_dfsTree = fgComputeDfs();
+    m_loops   = FlowGraphNaturalLoops::Find(m_dfsTree);
+
+    ScalarEvolutionContext scevContext(this);
+    JITDUMP("Widening primary induction variables:\n");
+    ArrayStack<Statement*> ivUses(getAllocator(CMK_LoopIVOpts));
+    for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder())
+    {
+        JITDUMP("Processing ");
+        DBEXEC(verbose, FlowGraphNaturalLoop::Dump(loop));
+        scevContext.ResetForLoop(loop);
+
+        int numWidened = 0;
+
+        for (Statement* stmt : loop->GetHeader()->Statements())
+        {
+            if (!stmt->IsPhiDefnStmt())
+            {
+                break;
+            }
+
+            JITDUMP("\n");
+
+            DISPSTMT(stmt);
+
+            GenTreeLclVarCommon* lcl    = stmt->GetRootNode()->AsLclVarCommon();
+            LclVarDsc*           lclDsc = lvaGetDesc(lcl);
+            if (lclDsc->TypeGet() != TYP_INT)
+            {
+                JITDUMP("  Type is %s, no widening to be done\n", varTypeName(lclDsc->TypeGet()));
+                continue;
+            }
+
+            // If the IV is not enregisterable then uses/defs are going to go
+            // to stack regardless. This check also filters out IVs that may be
+            // live into exceptional exits since those are always marked DNER.
+            if (lclDsc->lvDoNotEnregister)
+            {
+                JITDUMP("  V%02u is marked DNER\n", lcl->GetLclNum());
+                continue;
+            }
+
+            Scev* scev = scevContext.Analyze(loop->GetHeader(), stmt->GetRootNode());
+            if (scev == nullptr)
+            {
+                JITDUMP("  Could not analyze header PHI\n");
+                continue;
+            }
+
+            scev = scevContext.Simplify(scev);
+            JITDUMP("  => ");
+            DBEXEC(verbose, scev->Dump(this));
+            JITDUMP("\n");
+            if (!scev->OperIs(ScevOper::AddRec))
+            {
+                JITDUMP("  Not an addrec\n");
+                continue;
+            }
+
+            ScevAddRec* addRec = (ScevAddRec*)scev;
+
+            JITDUMP("  V%02u is a primary induction variable in " FMT_LP "\n", lcl->GetLclNum(), loop->GetIndex());
+
+            if (!optCanSinkWidenedIV(lcl->GetLclNum(), loop))
+            {
+                continue;
+            }
+
+            // Start value should always be an SSA use from outside the loop
+            // since we only widen primary IVs.
+            assert(addRec->Start->OperIs(ScevOper::Local));
+            ScevLocal*    startLocal     = (ScevLocal*)addRec->Start;
+            int64_t       startConstant  = 0;
+            bool          initToConstant = startLocal->GetConstantValue(this, &startConstant);
+            LclSsaVarDsc* startSsaDsc    = lclDsc->GetPerSsaData(startLocal->SsaNum);
+
+            BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock();
+            BasicBlock* initBlock = preheader;
+            if ((startSsaDsc->GetBlock() != nullptr) && (startSsaDsc->GetDefNode() != nullptr))
+            {
+                initBlock = startSsaDsc->GetBlock();
+            }
+
+            ivUses.Reset();
+            if (!optIsIVWideningProfitable(lcl->GetLclNum(), initBlock, initToConstant, loop, ivUses))
+            {
+                continue;
+            }
+
+            changed = true;
+
+            Statement* insertInitAfter = nullptr;
+            if (initBlock != preheader)
+            {
+                GenTree* narrowInitRoot = startSsaDsc->GetDefNode();
+                while (true)
+                {
+                    GenTree* parent = narrowInitRoot->gtGetParent(nullptr);
+                    if (parent == nullptr)
+                        break;
+
+                    narrowInitRoot = parent;
+                }
+
+                for (Statement* stmt : initBlock->Statements())
+                {
+                    if (stmt->GetRootNode() == narrowInitRoot)
+                    {
+                        insertInitAfter = stmt;
+                        break;
+                    }
+                }
+
+                assert(insertInitAfter != nullptr);
+
+                if (insertInitAfter->IsPhiDefnStmt())
+                {
+                    while ((insertInitAfter->GetNextStmt() != nullptr) &&
+                           insertInitAfter->GetNextStmt()->IsPhiDefnStmt())
+                    {
+                        insertInitAfter = insertInitAfter->GetNextStmt();
+                    }
+                }
+            }
+
+            Statement* initStmt  = nullptr;
+            unsigned   newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum())));
+            INDEBUG(lclDsc = nullptr);
+            assert(startLocal->LclNum == lcl->GetLclNum());
+
+            if (initBlock != preheader)
+            {
+                JITDUMP("Adding initialization of new widened local to same block as reaching def outside loop, " FMT_BB
+                        "\n",
+                        initBlock->bbNum);
+            }
+            else
+            {
+                JITDUMP("Adding initialization of new widened local to preheader " FMT_BB "\n", initBlock->bbNum);
+            }
+
+            GenTree* initVal;
+            if (initToConstant)
+            {
+                initVal = gtNewIconNode((int64_t)(uint32_t)startConstant, TYP_LONG);
+            }
+            else
+            {
+                initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), TYP_INT), true, TYP_LONG);
+            }
+
+            GenTree* widenStore = gtNewTempStore(newLclNum, initVal);
+            initStmt            = fgNewStmtFromTree(widenStore);
+            if (insertInitAfter != nullptr)
+            {
+                fgInsertStmtAfter(initBlock, insertInitAfter, initStmt);
+            }
+            else
+            {
+                fgInsertStmtNearEnd(initBlock, initStmt);
+            }
+
+            DISPSTMT(initStmt);
+            JITDUMP("\n");
+
+            JITDUMP("  Replacing uses of V%02u with widened version V%02u\n", lcl->GetLclNum(), newLclNum);
+
+            if (initStmt != nullptr)
+            {
+                JITDUMP("    Replacing on the way to the loop\n");
+                optBestEffortReplaceNarrowIVUses(lcl->GetLclNum(), startLocal->SsaNum, newLclNum, initBlock,
+                                                 initStmt->GetNextStmt());
+            }
+
+            JITDUMP("    Replacing in the loop; %d statements with appearances\n", ivUses.Height());
+            for (int i = 0; i < ivUses.Height(); i++)
+            {
+                Statement* stmt = ivUses.Bottom(i);
+                JITDUMP("Replacing V%02u -> V%02u in [%06u]\n", lcl->GetLclNum(), newLclNum,
+                        dspTreeID(stmt->GetRootNode()));
+                DISPSTMT(stmt);
+                JITDUMP("\n");
+                optReplaceWidenedIV(lcl->GetLclNum(), SsaConfig::RESERVED_SSA_NUM, newLclNum, stmt);
+            }
+
+            optSinkWidenedIV(lcl->GetLclNum(), newLclNum, loop);
+
+            numWidened++;
+        }
+
+        Metrics.WidenedIVs += numWidened;
+        if (numWidened > 0)
+        {
+            Metrics.LoopsIVWidened++;
+        }
+    }
+
+    fgInvalidateDfsTree();
+#endif
+
+    return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
+}
diff --git a/src/coreclr/jit/inline.cpp b/src/coreclr/jit/inline.cpp
index 06ca71126f85..c8831a75b39b 100644
--- a/src/coreclr/jit/inline.cpp
+++ b/src/coreclr/jit/inline.cpp
@@ -383,7 +383,7 @@ void InlineContext::Dump(bool verbose, unsigned indent)
 #if defined(DEBUG)
         calleeName = compiler->eeGetMethodFullName(m_Callee);
 #else
-        calleeName         = "callee";
+        calleeName = "callee";
 #endif // defined(DEBUG)
     }
 
diff --git a/src/coreclr/jit/inline.def b/src/coreclr/jit/inline.def
index 9371e655bde1..b1152d6f9ecc 100644
--- a/src/coreclr/jit/inline.def
+++ b/src/coreclr/jit/inline.def
@@ -122,7 +122,6 @@ INLINE_OBSERVATION(HAS_NEWOBJ,                bool,   "has newobj",
 // ------ Call Site Correctness -------
 
 INLINE_OBSERVATION(ARG_HAS_NULL_THIS,         bool,   "this pointer argument is null",        FATAL,       CALLSITE)
-INLINE_OBSERVATION(ARG_IS_MKREFANY,           bool,   "argument is mkrefany",                 FATAL,       CALLSITE)
 INLINE_OBSERVATION(ARG_NO_BASH_TO_INT,        bool,   "argument can't bash to int",           FATAL,       CALLSITE)
 INLINE_OBSERVATION(ARG_NO_BASH_TO_REF,        bool,   "argument can't bash to ref",           FATAL,       CALLSITE)
 INLINE_OBSERVATION(ARG_TYPES_INCOMPATIBLE,    bool,   "argument types incompatible",          FATAL,       CALLSITE)
diff --git a/src/coreclr/jit/inline.h b/src/coreclr/jit/inline.h
index dca92e39241e..8c1cb56124ad 100644
--- a/src/coreclr/jit/inline.h
+++ b/src/coreclr/jit/inline.h
@@ -222,9 +222,9 @@ class InlinePolicy
     }
 
     // Policy observations
-    virtual void NoteSuccess() = 0;
-    virtual void NoteBool(InlineObservation obs, bool value) = 0;
-    virtual void NoteFatal(InlineObservation obs) = 0;
+    virtual void NoteSuccess()                                   = 0;
+    virtual void NoteBool(InlineObservation obs, bool value)     = 0;
+    virtual void NoteFatal(InlineObservation obs)                = 0;
     virtual void NoteInt(InlineObservation obs, int value)       = 0;
     virtual void NoteDouble(InlineObservation obs, double value) = 0;
 
@@ -321,7 +321,7 @@ class InlinePolicy
 
 private:
     // No copying or assignment supported
-    InlinePolicy(const InlinePolicy&) = delete;
+    InlinePolicy(const InlinePolicy&)            = delete;
     InlinePolicy& operator=(const InlinePolicy&) = delete;
 
 protected:
@@ -558,7 +558,7 @@ class InlineResult
 
 private:
     // No copying or assignment allowed.
-    InlineResult(const InlineResult&) = delete;
+    InlineResult(const InlineResult&)            = delete;
     InlineResult& operator=(const InlineResult&) = delete;
 
     // Report/log/dump decision as appropriate
@@ -637,16 +637,16 @@ struct InlArgInfo
     CallArg* arg;                         // the caller argument
     GenTree* argBashTmpNode;              // tmp node created, if it may be replaced with actual arg
     unsigned argTmpNum;                   // the argument tmp number
-    unsigned argIsUsed : 1;               // is this arg used at all?
-    unsigned argIsInvariant : 1;          // the argument is a constant or a local variable address
-    unsigned argIsLclVar : 1;             // the argument is a local variable
-    unsigned argIsThis : 1;               // the argument is the 'this' pointer
-    unsigned argHasSideEff : 1;           // the argument has side effects
-    unsigned argHasGlobRef : 1;           // the argument has a global ref
-    unsigned argHasCallerLocalRef : 1;    // the argument value depends on an aliased caller local
-    unsigned argHasTmp : 1;               // the argument will be evaluated to a temp
-    unsigned argHasLdargaOp : 1;          // Is there LDARGA(s) operation on this argument?
-    unsigned argHasStargOp : 1;           // Is there STARG(s) operation on this argument?
+    unsigned argIsUsed               : 1; // is this arg used at all?
+    unsigned argIsInvariant          : 1; // the argument is a constant or a local variable address
+    unsigned argIsLclVar             : 1; // the argument is a local variable
+    unsigned argIsThis               : 1; // the argument is the 'this' pointer
+    unsigned argHasSideEff           : 1; // the argument has side effects
+    unsigned argHasGlobRef           : 1; // the argument has a global ref
+    unsigned argHasCallerLocalRef    : 1; // the argument value depends on an aliased caller local
+    unsigned argHasTmp               : 1; // the argument will be evaluated to a temp
+    unsigned argHasLdargaOp          : 1; // Is there LDARGA(s) operation on this argument?
+    unsigned argHasStargOp           : 1; // Is there STARG(s) operation on this argument?
     unsigned argIsByRefToStructLocal : 1; // Is this arg an address of a struct local or a normed struct local or a
                                           // field in them?
     unsigned argIsExact : 1;              // Is this arg of an exact class?
@@ -658,10 +658,10 @@ struct InlLclVarInfo
 {
     CORINFO_CLASS_HANDLE lclTypeHandle;             // Type handle from the signature. Available for structs and REFs.
     var_types            lclTypeInfo;               // Type from the signature.
-    unsigned char        lclHasLdlocaOp : 1;        // Is there LDLOCA(s) operation on this local?
-    unsigned char        lclHasStlocOp : 1;         // Is there a STLOC on this local?
+    unsigned char        lclHasLdlocaOp        : 1; // Is there LDLOCA(s) operation on this local?
+    unsigned char        lclHasStlocOp         : 1; // Is there a STLOC on this local?
     unsigned char        lclHasMultipleStlocOp : 1; // Is there more than one STLOC on this local
-    unsigned char        lclIsPinned : 1;
+    unsigned char        lclIsPinned           : 1;
 };
 
 // InlineInfo provides detailed information about a particular inline candidate.
@@ -686,6 +686,7 @@ struct InlineInfo
 
     unsigned      argCnt;
     InlArgInfo    inlArgInfo[MAX_INL_ARGS + 1];
+    InlArgInfo*   inlInstParamArgInfo;
     int           lclTmpNum[MAX_INL_LCLS];                     // map local# -> temp# (-1 if unused)
     InlLclVarInfo lclVarInfo[MAX_INL_LCLS + MAX_INL_ARGS + 1]; // type information from local sig
 
@@ -886,8 +887,8 @@ class InlineContext
     InlinePolicy* m_Policy;            // policy that evaluated this inline
     unsigned      m_TreeID;            // ID of the GenTreeCall in the parent
     bool          m_Devirtualized : 1; // true if this was a devirtualized call
-    bool          m_Guarded : 1;       // true if this was a guarded call
-    bool          m_Unboxed : 1;       // true if this call now invokes the unboxed entry
+    bool          m_Guarded       : 1; // true if this was a guarded call
+    bool          m_Unboxed       : 1; // true if this call now invokes the unboxed entry
 
 #endif // defined(DEBUG)
 
@@ -1025,7 +1026,7 @@ class InlineStrategy
     void DumpDataContents(FILE* file);
 
     // Dump xml-formatted description of inlines
-    void DumpXml(FILE* file = stderr, unsigned indent = 0);
+    void        DumpXml(FILE* file = stderr, unsigned indent = 0);
     static void FinalizeXml(FILE* file = stderr);
 
     // Cache for file position of this method in the inline xml
@@ -1048,7 +1049,7 @@ class InlineStrategy
     enum
     {
         ALWAYS_INLINE_SIZE              = 16,
-        IMPLEMENTATION_MAX_INLINE_SIZE  = _UI16_MAX,
+        IMPLEMENTATION_MAX_INLINE_SIZE  = UINT16_MAX,
         IMPLEMENTATION_MAX_INLINE_DEPTH = 1000
     };
 
diff --git a/src/coreclr/jit/inlinepolicy.cpp b/src/coreclr/jit/inlinepolicy.cpp
index 9a4086dc5af8..3b771f291607 100644
--- a/src/coreclr/jit/inlinepolicy.cpp
+++ b/src/coreclr/jit/inlinepolicy.cpp
@@ -945,8 +945,9 @@ void DefaultPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
     {
         // Inline appears to be unprofitable
         JITLOG_THIS(m_RootCompiler,
-                    (LL_INFO100000, "Native estimate for function size exceeds threshold"
-                                    " for inlining %g > %g (multiplier = %g)\n",
+                    (LL_INFO100000,
+                     "Native estimate for function size exceeds threshold"
+                     " for inlining %g > %g (multiplier = %g)\n",
                      (double)m_CalleeNativeSizeEstimate / SIZE_SCALE, (double)threshold / SIZE_SCALE, m_Multiplier));
 
         // Fail the inline
@@ -963,8 +964,9 @@ void DefaultPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
     {
         // Inline appears to be profitable
         JITLOG_THIS(m_RootCompiler,
-                    (LL_INFO100000, "Native estimate for function size is within threshold"
-                                    " for inlining %g <= %g (multiplier = %g)\n",
+                    (LL_INFO100000,
+                     "Native estimate for function size is within threshold"
+                     " for inlining %g <= %g (multiplier = %g)\n",
                      (double)m_CalleeNativeSizeEstimate / SIZE_SCALE, (double)threshold / SIZE_SCALE, m_Multiplier));
 
         // Update candidacy
@@ -1072,7 +1074,8 @@ bool DefaultPolicy::PropagateNeverToRuntime() const
 //    compiler -- compiler instance doing the inlining (root compiler)
 //    isPrejitRoot -- true if this compiler is prejitting the root method
 
-RandomPolicy::RandomPolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot)
+RandomPolicy::RandomPolicy(Compiler* compiler, bool isPrejitRoot)
+    : DiscretionaryPolicy(compiler, isPrejitRoot)
 {
     m_Random = compiler->m_inlineStrategy->GetRandom();
 }
@@ -1122,7 +1125,7 @@ void RandomPolicy::NoteInt(InlineObservation obs, int value)
 //    methodInfo -- method info for the callee
 //
 // Notes:
-//    The random policy makes random decisions about profitablity.
+//    The random policy makes random decisions about profitability.
 //    Generally we aspire to inline differently, not necessarily to
 //    inline more.
 
@@ -1131,6 +1134,20 @@ void RandomPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
     assert(InlDecisionIsCandidate(m_Decision));
     assert(m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
 
+#if defined(DEBUG)
+
+    // Punt if we're inlining and we've reached the acceptance limit.
+    int      limit   = JitConfig.JitInlineLimit();
+    unsigned current = m_RootCompiler->m_inlineStrategy->GetInlineCount();
+
+    if (!m_IsPrejitRoot && (limit >= 0) && (current >= static_cast<unsigned>(limit)))
+    {
+        SetFailure(InlineObservation::CALLSITE_OVER_INLINE_LIMIT);
+        return;
+    }
+
+#endif // defined(DEBUG)
+
     // Budget check.
     const bool overBudget = this->BudgetCheck();
     if (overBudget)
@@ -2754,7 +2771,8 @@ void DiscretionaryPolicy::DumpData(FILE* file) const
 //    compiler -- compiler instance doing the inlining (root compiler)
 //    isPrejitRoot -- true if this compiler is prejitting the root method
 
-ModelPolicy::ModelPolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot)
+ModelPolicy::ModelPolicy(Compiler* compiler, bool isPrejitRoot)
+    : DiscretionaryPolicy(compiler, isPrejitRoot)
 {
     // Empty
 }
@@ -2955,7 +2973,8 @@ void ModelPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
 //    compiler -- compiler instance doing the inlining (root compiler)
 //    isPrejitRoot -- true if this compiler is prejitting the root method
 
-ProfilePolicy::ProfilePolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot)
+ProfilePolicy::ProfilePolicy(Compiler* compiler, bool isPrejitRoot)
+    : DiscretionaryPolicy(compiler, isPrejitRoot)
 {
     // Empty
 }
@@ -3155,7 +3174,8 @@ void ProfilePolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
 //    compiler -- compiler instance doing the inlining (root compiler)
 //    isPrejitRoot -- true if this compiler is prejitting the root method
 
-FullPolicy::FullPolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot)
+FullPolicy::FullPolicy(Compiler* compiler, bool isPrejitRoot)
+    : DiscretionaryPolicy(compiler, isPrejitRoot)
 {
     // Empty
 }
@@ -3222,7 +3242,8 @@ void FullPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
 //    compiler -- compiler instance doing the inlining (root compiler)
 //    isPrejitRoot -- true if this compiler is prejitting the root method
 
-SizePolicy::SizePolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot)
+SizePolicy::SizePolicy(Compiler* compiler, bool isPrejitRoot)
+    : DiscretionaryPolicy(compiler, isPrejitRoot)
 {
     // Empty
 }
diff --git a/src/coreclr/jit/inlinepolicy.h b/src/coreclr/jit/inlinepolicy.h
index 52333d5aacac..a8d8e67f1db3 100644
--- a/src/coreclr/jit/inlinepolicy.h
+++ b/src/coreclr/jit/inlinepolicy.h
@@ -48,7 +48,8 @@ class LegalPolicy : public InlinePolicy
 
 public:
     // Constructor
-    LegalPolicy(bool isPrejitRoot) : InlinePolicy(isPrejitRoot)
+    LegalPolicy(bool isPrejitRoot)
+        : InlinePolicy(isPrejitRoot)
     {
         // empty
     }
@@ -157,7 +158,7 @@ class DefaultPolicy : public LegalPolicy
     // Helper methods
     virtual double DetermineMultiplier();
     int            DetermineNativeSizeEstimate();
-    int DetermineCallsiteNativeSizeEstimate(CORINFO_METHOD_INFO* methodInfo);
+    int            DetermineCallsiteNativeSizeEstimate(CORINFO_METHOD_INFO* methodInfo);
 
     // Data members
     Compiler*               m_RootCompiler; // root compiler instance
@@ -174,20 +175,20 @@ class DefaultPolicy : public LegalPolicy
     unsigned                m_ConstantArgFeedsConstantTest;
     int                     m_CalleeNativeSizeEstimate;
     int                     m_CallsiteNativeSizeEstimate;
-    bool                    m_IsForceInline : 1;
-    bool                    m_IsForceInlineKnown : 1;
-    bool                    m_IsInstanceCtor : 1;
+    bool                    m_IsForceInline              : 1;
+    bool                    m_IsForceInlineKnown         : 1;
+    bool                    m_IsInstanceCtor             : 1;
     bool                    m_IsFromPromotableValueClass : 1;
-    bool                    m_HasSimd : 1;
-    bool                    m_LooksLikeWrapperMethod : 1;
-    bool                    m_MethodIsMostlyLoadStore : 1;
-    bool                    m_CallsiteIsInTryRegion : 1;
-    bool                    m_CallsiteIsInLoop : 1;
-    bool                    m_IsNoReturn : 1;
-    bool                    m_IsNoReturnKnown : 1;
-    bool                    m_ConstArgFeedsIsKnownConst : 1;
-    bool                    m_ArgFeedsIsKnownConst : 1;
-    bool                    m_InsideThrowBlock : 1;
+    bool                    m_HasSimd                    : 1;
+    bool                    m_LooksLikeWrapperMethod     : 1;
+    bool                    m_MethodIsMostlyLoadStore    : 1;
+    bool                    m_CallsiteIsInTryRegion      : 1;
+    bool                    m_CallsiteIsInLoop           : 1;
+    bool                    m_IsNoReturn                 : 1;
+    bool                    m_IsNoReturnKnown            : 1;
+    bool                    m_ConstArgFeedsIsKnownConst  : 1;
+    bool                    m_ArgFeedsIsKnownConst       : 1;
+    bool                    m_InsideThrowBlock           : 1;
 };
 
 // ExtendedDefaultPolicy is a slightly more aggressive variant of
@@ -271,11 +272,11 @@ class ExtendedDefaultPolicy : public DefaultPolicy
     unsigned m_UnrollableMemop;
     unsigned m_Switch;
     unsigned m_DivByCns;
-    bool     m_ReturnsStructByValue : 1;
-    bool     m_IsFromValueClass : 1;
-    bool     m_NonGenericCallsGeneric : 1;
+    bool     m_ReturnsStructByValue       : 1;
+    bool     m_IsFromValueClass           : 1;
+    bool     m_NonGenericCallsGeneric     : 1;
     bool     m_IsCallsiteInNoReturnRegion : 1;
-    bool     m_HasProfileWeights : 1;
+    bool     m_HasProfileWeights          : 1;
 };
 
 // DiscretionaryPolicy is a variant of the default policy.  It
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index caee21bff8d8..79aae2c33454 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -876,7 +876,7 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op)
                         // broadcast -> LCL_VAR(TYP_(U)INT)
                         ssize_t        scalarValue = hwintrinsicChild->AsIntCon()->IconValue();
                         UNATIVE_OFFSET cnum        = emit->emitDataConst(&scalarValue, genTypeSize(simdBaseType),
-                                                                  genTypeSize(simdBaseType), simdBaseType);
+                                                                         genTypeSize(simdBaseType), simdBaseType);
                         return OperandDesc(compiler->eeFindJitDataOffs(cnum));
                     }
                     else
@@ -916,18 +916,14 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op)
 #endif // FEATURE_HW_INTRINSICS
         }
 
-        switch (addr->OperGet())
+        if (addr->isContained() && addr->OperIs(GT_LCL_ADDR))
         {
-            case GT_LCL_ADDR:
-            {
-                assert(addr->isContained());
-                varNum = addr->AsLclFld()->GetLclNum();
-                offset = addr->AsLclFld()->GetLclOffs();
-                break;
-            }
-
-            default:
-                return (memIndir != nullptr) ? OperandDesc(memIndir) : OperandDesc(op->TypeGet(), addr);
+            varNum = addr->AsLclFld()->GetLclNum();
+            offset = addr->AsLclFld()->GetLclOffs();
+        }
+        else
+        {
+            return (memIndir != nullptr) ? OperandDesc(memIndir) : OperandDesc(op->TypeGet(), addr);
         }
     }
     else
@@ -993,6 +989,13 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(GenTree* op)
                         memcpy(&constValue, &op->AsVecCon()->gtSimdVal, sizeof(simd64_t));
                         return OperandDesc(emit->emitSimd64Const(constValue));
                     }
+
+                    case TYP_MASK:
+                    {
+                        simdmask_t constValue;
+                        memcpy(&constValue, &op->AsVecCon()->gtSimdVal, sizeof(simdmask_t));
+                        return OperandDesc(emit->emitSimdMaskConst(constValue));
+                    }
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -1121,9 +1124,9 @@ void CodeGen::inst_RV_TT(instruction ins, emitAttr size, regNumber op1Reg, GenTr
 }
 
 /*****************************************************************************
-*
-*  Generate an instruction of the form "op reg1, reg2, icon".
-*/
+ *
+ *  Generate an instruction of the form "op reg1, reg2, icon".
+ */
 
 void CodeGen::inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival)
 {
@@ -1253,8 +1256,8 @@ void CodeGen::inst_RV_RV_TT(instruction ins,
     emitter* emit = GetEmitter();
     noway_assert(emit->emitVerifyEncodable(ins, EA_SIZE(size), targetReg));
 
-// TODO-XArch-CQ: Commutative operations can have op1 be contained
-// TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
+    // TODO-XArch-CQ: Commutative operations can have op1 be contained
+    // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
 
 #if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
     if (CodeGenInterface::IsEmbeddedBroadcastEnabled(ins, op2))
@@ -1684,12 +1687,17 @@ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg)
         return ins;
     }
 
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#if defined(FEATURE_MASKED_HW_INTRINSICS)
     if (varTypeUsesMaskReg(srcType))
     {
+#if defined(TARGET_XARCH)
         return INS_kmovq_msk;
+#elif defined(TARGET_ARM64)
+        unreached(); // TODO-SVE: This needs testing
+        return INS_sve_mov;
+#endif
     }
-#endif // TARGET_XARCH && FEATURE_SIMD
+#endif // FEATURE_MASKED_HW_INTRINSICS
 
     assert(varTypeUsesFloatReg(srcType));
 
@@ -1834,12 +1842,16 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
         return ins;
     }
 
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#if defined(FEATURE_MASKED_HW_INTRINSICS)
     if (varTypeUsesMaskReg(srcType))
     {
+#if defined(TARGET_XARCH)
         return INS_kmovq_msk;
+#elif defined(TARGET_ARM64)
+        return INS_sve_ldr_mask;
+#endif
     }
-#endif // TARGET_XARCH && FEATURE_SIMD
+#endif // FEATURE_MASKED_HW_INTRINSICS
 
     assert(varTypeUsesFloatReg(srcType));
 
@@ -1918,12 +1930,17 @@ instruction CodeGen::ins_Copy(var_types dstType)
 #endif
     }
 
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#if defined(FEATURE_MASKED_HW_INTRINSICS)
     if (varTypeUsesMaskReg(dstType))
     {
+#if defined(TARGET_XARCH)
         return INS_kmovq_msk;
+#elif defined(TARGET_ARM64)
+        unreached(); // TODO-SVE: This needs testing
+        return INS_sve_mov;
+#endif
     }
-#endif // TARGET_XARCH && FEATURE_SIMD
+#endif // FEATURE_MASKED_HW_INTRINSICS
 
     assert(varTypeUsesFloatReg(dstType));
 
@@ -2027,7 +2044,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType)
 #endif
     }
 
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#if defined(FEATURE_MASKED_HW_INTRINSICS)
     if (varTypeUsesMaskReg(dstType))
     {
         if (genIsValidMaskReg(srcReg))
@@ -2038,9 +2055,14 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType)
 
         // mask to int
         assert(genIsValidIntOrFakeReg(srcReg));
+#if defined(TARGET_XARCH)
         return INS_kmovq_gpr;
+#elif defined(TARGET_ARM64)
+        unreached(); // TODO-SVE: This needs testing
+        return INS_sve_mov;
+#endif
     }
-#endif // TARGET_XARCH && FEATURE_SIMD
+#endif // FEATURE_MASKED_HW_INTRINSICS
 
     assert(varTypeUsesFloatReg(dstType));
 
@@ -2142,12 +2164,16 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
         return ins;
     }
 
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#if defined(FEATURE_MASKED_HW_INTRINSICS)
     if (varTypeUsesMaskReg(dstType))
     {
+#if defined(TARGET_XARCH)
         return INS_kmovq_msk;
+#elif defined(TARGET_ARM64)
+        return INS_sve_str_mask;
+#endif
     }
-#endif // TARGET_XARCH && FEATURE_SIMD
+#endif // FEATURE_MASKED_HW_INTRINSICS
 
     assert(varTypeUsesFloatReg(dstType));
 
@@ -2259,7 +2285,7 @@ instruction CodeGenInterface::ins_StoreFromSrc(regNumber srcReg, var_types dstTy
         return ins_Store(dstType, aligned);
     }
 
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#if defined(FEATURE_MASKED_HW_INTRINSICS)
     if (varTypeUsesMaskReg(dstType))
     {
         if (genIsValidMaskReg(srcReg))
@@ -2272,7 +2298,7 @@ instruction CodeGenInterface::ins_StoreFromSrc(regNumber srcReg, var_types dstTy
         assert(genIsValidIntOrFakeReg(srcReg));
         return ins_Store(dstType, aligned);
     }
-#endif // TARGET_XARCH && FEATURE_SIMD
+#endif // FEATURE_MASKED_HW_INTRINSICS
 
     assert(varTypeUsesFloatReg(dstType));
 
@@ -2382,13 +2408,17 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
             switch (to)
             {
                 case TYP_INT:
-                    return INS_cvttss2si;
+                    return INS_cvttss2si32;
                 case TYP_LONG:
-                    return INS_cvttss2si;
+                    return INS_cvttss2si64;
                 case TYP_FLOAT:
                     return ins_Move_Extend(TYP_FLOAT, false);
                 case TYP_DOUBLE:
                     return INS_cvtss2sd;
+                case TYP_ULONG:
+                    return INS_vcvttss2usi64;
+                case TYP_UINT:
+                    return INS_vcvttss2usi32;
                 default:
                     unreached();
             }
@@ -2398,13 +2428,17 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
             switch (to)
             {
                 case TYP_INT:
-                    return INS_cvttsd2si;
+                    return INS_cvttsd2si32;
                 case TYP_LONG:
-                    return INS_cvttsd2si;
+                    return INS_cvttsd2si64;
                 case TYP_FLOAT:
                     return INS_cvtsd2ss;
                 case TYP_DOUBLE:
                     return ins_Move_Extend(TYP_DOUBLE, false);
+                case TYP_ULONG:
+                    return INS_vcvttsd2usi64;
+                case TYP_UINT:
+                    return INS_vcvttsd2usi32;
                 default:
                     unreached();
             }
diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h
index 4e9515d79aee..d9684e8f96e6 100644
--- a/src/coreclr/jit/instr.h
+++ b/src/coreclr/jit/instr.h
@@ -371,16 +371,22 @@ enum insScalableOpts : unsigned
     INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR, // Variants with {<Pd1>.<T>, <Pd2>.<T>} predicate pair (eg whilege)
     INS_SCALABLE_OPTS_VL_2X,               // Variants with a vector length specifier of 2x (eg whilege)
     INS_SCALABLE_OPTS_VL_4X,               // Variants with a vector length specifier of 4x (eg whilege)
-    INS_SCALABLE_OPTS_SHIFT,               // Variants with an optional shift operation (eg dup)
 
     INS_SCALABLE_OPTS_LSL_N,               // Variants with a LSL #N (eg {<Zt>.<T>}, <Pg>, [<Xn|SP>, <Xm>, LSL #2])
     INS_SCALABLE_OPTS_MOD_N,               // Variants with a <mod> #N (eg {<Zt>.S }, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2])
 
     INS_SCALABLE_OPTS_WITH_VECTOR_PAIR,    // Variants with {<Zn1>.<T>, <Zn2>.<T>} sve register pair (eg splice)
 
+    INS_SCALABLE_OPTS_IMM_BITMASK,         // Variants with an immediate that is a bitmask
+
+    INS_SCALABLE_OPTS_IMM_FIRST,           // Variants with an immediate and a register, where the immediate comes first
+
     // Removable once REG_V0 and REG_P0 are distinct
     INS_SCALABLE_OPTS_UNPREDICATED,      // Variants without a predicate (eg add)
     INS_SCALABLE_OPTS_UNPREDICATED_WIDE, // Variants without a predicate and wide elements (eg asr)
+    INS_SCALABLE_OPTS_TO_PREDICATE,      // Variants moving to a predicate from a vector (e.g. pmov)
+    INS_SCALABLE_OPTS_TO_VECTOR,         // Variants moving to a vector from a predicate (e.g. pmov)
+    INS_SCALABLE_OPTS_BROADCAST,         // Used to distinguish mov from cpy, where mov is an alias for both
 };
 
 // Maps directly to the pattern used in SVE instructions such as cntb.
@@ -400,11 +406,33 @@ enum insSvePattern : unsigned
     SVE_PATTERN_VL64 = 11,  // 64 elements.
     SVE_PATTERN_VL128 = 12, // 128 elements.
     SVE_PATTERN_VL256 = 13, // 256 elements.
-    SVE_PATTERN_MUL4 = 29,  // The largest multiple of 3.
-    SVE_PATTERN_MUL3 = 30,  // The largest multiple of 4.
+    SVE_PATTERN_MUL4 = 29,  // The largest multiple of 4.
+    SVE_PATTERN_MUL3 = 30,  // The largest multiple of 3.
     SVE_PATTERN_ALL = 31    // All available (implicitly a multiple of two).
 };
 
+// Prefetch operation specifier for SVE instructions such as prfb.
+enum insSvePrfop : unsigned
+{
+    SVE_PRFOP_PLDL1KEEP = 0b0000,
+    SVE_PRFOP_PLDL1STRM = 0b0001, 
+    SVE_PRFOP_PLDL2KEEP = 0b0010, 
+    SVE_PRFOP_PLDL2STRM = 0b0011, 
+    SVE_PRFOP_PLDL3KEEP = 0b0100, 
+    SVE_PRFOP_PLDL3STRM = 0b0101, 
+    SVE_PRFOP_PSTL1KEEP = 0b1000, 
+    SVE_PRFOP_PSTL1STRM = 0b1001, 
+    SVE_PRFOP_PSTL2KEEP = 0b1010, 
+    SVE_PRFOP_PSTL2STRM = 0b1011, 
+    SVE_PRFOP_PSTL3KEEP = 0b1100, 
+    SVE_PRFOP_PSTL3STRM = 0b1101,
+
+    SVE_PRFOP_CONST6    = 0b0110,
+    SVE_PRFOP_CONST7    = 0b0111,
+    SVE_PRFOP_CONST14   = 0b1110,
+    SVE_PRFOP_CONST15   = 0b1111
+};
+
 enum insCond : unsigned
 {
     INS_COND_EQ,
diff --git a/src/coreclr/jit/instrsarm.h b/src/coreclr/jit/instrsarm.h
index 9356150d4b2e..3a1c871d316f 100644
--- a/src/coreclr/jit/instrsarm.h
+++ b/src/coreclr/jit/instrsarm.h
@@ -19,7 +19,7 @@
  *          e8      -- encoding 8
  *          e9      -- encoding 9
  *
-******************************************************************************/
+ ******************************************************************************/
 
 #if !defined(TARGET_ARM)
 #error Unexpected target type
diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h
index c07976f1eca0..c6ac7404c569 100644
--- a/src/coreclr/jit/instrsarm64.h
+++ b/src/coreclr/jit/instrsarm64.h
@@ -18,7 +18,7 @@
  *          e8      -- encoding 8
  *          e9      -- encoding 9
  *
-******************************************************************************/
+ ******************************************************************************/
 
 #if !defined(TARGET_ARM64)
 #error Unexpected target type
diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h
index 710928a35eaf..fb469c0bfdc1 100644
--- a/src/coreclr/jit/instrsarm64sve.h
+++ b/src/coreclr/jit/instrsarm64sve.h
@@ -239,7 +239,6 @@ INST7(ld1sw,             "ld1sw",                 0,                       IF_SV
     // LD1SW   {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]                                      SVE_IU_4B_B         11000101010mmmmm 100gggnnnnnttttt     C540 8000   
     // LD1SW   {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]                                     SVE_IV_3A           11000101001iiiii 100gggnnnnnttttt     C520 8000   
 
-
 //    enum               name                     info                                              SVE_AE_3A        SVE_BD_3A        SVE_EE_1A        SVE_FD_3A        SVE_FD_3B        SVE_FD_3C        
 INST6(mul,               "mul",                   0,                       IF_SVE_6A,               0x04100000,      0x04206000,      0x2530C000,      0x4420F800,      0x44A0F800,      0x44E0F800       )
     // MUL     <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>                                    SVE_AE_3A           00000100xx010000 000gggmmmmmddddd     0410 0000   
@@ -265,7 +264,7 @@ INST6(ld1sb,             "ld1sb",                 0,                       IF_SV
     // LD1SB   {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]                               SVE_HW_4A           110001000h0mmmmm 000gggnnnnnttttt     C400 0000   
     // LD1SB   {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>]                               SVE_HW_4A_A         100001000h0mmmmm 000gggnnnnnttttt     8400 0000   
     // LD1SB   {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]                                      SVE_HW_4B           11000100010mmmmm 100gggnnnnnttttt     C440 8000   
-    // LD1SB   {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]                                     SVE_HX_3A_B         10000100001iiiii 100gggnnnnnttttt     8420 8000   
+    // LD1SB   {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]                                     SVE_HX_3A_B         10000100001iiiii 100gggnnnnnttttt     8420 8000   
     // LD1SB   {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]                            SVE_IJ_3A_D         101001011000iiii 101gggnnnnnttttt     A580 A000   
     // LD1SB   {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Xm>]                                        SVE_IK_4A_F         10100101100mmmmm 010gggnnnnnttttt     A580 4000   
 
@@ -275,7 +274,7 @@ INST6(ld1b,              "ld1b",                  0,                       IF_SV
     // LD1B    {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]                               SVE_HW_4A           110001000h0mmmmm 010gggnnnnnttttt     C400 4000   
     // LD1B    {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>]                               SVE_HW_4A_A         100001000h0mmmmm 010gggnnnnnttttt     8400 4000   
     // LD1B    {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]                                      SVE_HW_4B           11000100010mmmmm 110gggnnnnnttttt     C440 C000   
-    // LD1B    {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]                                     SVE_HX_3A_B         10000100001iiiii 110gggnnnnnttttt     8420 C000   
+    // LD1B    {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]                                     SVE_HX_3A_B         10000100001iiiii 110gggnnnnnttttt     8420 C000   
     // LD1B    {<Zt>.B }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]                            SVE_IJ_3A_E         101001000000iiii 101gggnnnnnttttt     A400 A000   
     // LD1B    {<Zt>.B }, <Pg>/Z, [<Xn|SP>, <Xm>]                                        SVE_IK_4A_H         10100100000mmmmm 010gggnnnnnttttt     A400 4000   
 
@@ -285,7 +284,7 @@ INST6(prfb,              "prfb",                  0,                       IF_SV
     // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]                                   SVE_HY_3A           100001000h1mmmmm 000gggnnnnn0oooo     8420 0000   
     // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>]                                   SVE_HY_3A_A         110001000h1mmmmm 000gggnnnnn0oooo     C420 0000   
     // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D]                                          SVE_HY_3B           11000100011mmmmm 100gggnnnnn0oooo     C460 8000   
-    // PRFB    <prfop>, <Pg>, [<Zn>.D{, #<imm>}]                                         SVE_HZ_2A_B         10000100000iiiii 111gggnnnnn0oooo     8400 E000   
+    // PRFB    <prfop>, <Pg>, [<Zn>.S{, #<imm>}]                                         SVE_HZ_2A_B         10000100000iiiii 111gggnnnnn0oooo     8400 E000   
     // PRFB    <prfop>, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]                                SVE_IA_2A           1000010111iiiiii 000gggnnnnn0oooo     85C0 0000   
     // PRFB    <prfop>, <Pg>, [<Xn|SP>, <Xm>]                                            SVE_IB_3A           10000100000mmmmm 110gggnnnnn0oooo     8400 C000   
 
@@ -293,7 +292,7 @@ INST6(prfd,              "prfd",                  0,                       IF_SV
     // PRFD    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #3]                                SVE_HY_3A           100001000h1mmmmm 011gggnnnnn0oooo     8420 6000   
     // PRFD    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #3]                                SVE_HY_3A_A         110001000h1mmmmm 011gggnnnnn0oooo     C420 6000   
     // PRFD    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #3]                                  SVE_HY_3B           11000100011mmmmm 111gggnnnnn0oooo     C460 E000   
-    // PRFD    <prfop>, <Pg>, [<Zn>.D{, #<imm>}]                                         SVE_HZ_2A_B         10000101100iiiii 111gggnnnnn0oooo     8580 E000   
+    // PRFD    <prfop>, <Pg>, [<Zn>.S{, #<imm>}]                                         SVE_HZ_2A_B         10000101100iiiii 111gggnnnnn0oooo     8580 E000   
     // PRFD    <prfop>, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]                                SVE_IA_2A           1000010111iiiiii 011gggnnnnn0oooo     85C0 6000   
     // PRFD    <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #3]                                    SVE_IB_3A           10000101100mmmmm 110gggnnnnn0oooo     8580 C000   
 
@@ -301,7 +300,7 @@ INST6(prfh,              "prfh",                  0,                       IF_SV
     // PRFH    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #1]                                SVE_HY_3A           100001000h1mmmmm 001gggnnnnn0oooo     8420 2000   
     // PRFH    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #1]                                SVE_HY_3A_A         110001000h1mmmmm 001gggnnnnn0oooo     C420 2000   
     // PRFH    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #1]                                  SVE_HY_3B           11000100011mmmmm 101gggnnnnn0oooo     C460 A000   
-    // PRFH    <prfop>, <Pg>, [<Zn>.D{, #<imm>}]                                         SVE_HZ_2A_B         10000100100iiiii 111gggnnnnn0oooo     8480 E000   
+    // PRFH    <prfop>, <Pg>, [<Zn>.S{, #<imm>}]                                         SVE_HZ_2A_B         10000100100iiiii 111gggnnnnn0oooo     8480 E000   
     // PRFH    <prfop>, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]                                SVE_IA_2A           1000010111iiiiii 001gggnnnnn0oooo     85C0 2000   
     // PRFH    <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #1]                                    SVE_IB_3A           10000100100mmmmm 110gggnnnnn0oooo     8480 C000   
 
@@ -309,7 +308,7 @@ INST6(prfw,              "prfw",                  0,                       IF_SV
     // PRFW    <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod> #2]                                SVE_HY_3A           100001000h1mmmmm 010gggnnnnn0oooo     8420 4000   
     // PRFW    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod> #2]                                SVE_HY_3A_A         110001000h1mmmmm 010gggnnnnn0oooo     C420 4000   
     // PRFW    <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, LSL #2]                                  SVE_HY_3B           11000100011mmmmm 110gggnnnnn0oooo     C460 C000   
-    // PRFW    <prfop>, <Pg>, [<Zn>.D{, #<imm>}]                                         SVE_HZ_2A_B         10000101000iiiii 111gggnnnnn0oooo     8500 E000   
+    // PRFW    <prfop>, <Pg>, [<Zn>.S{, #<imm>}]                                         SVE_HZ_2A_B         10000101000iiiii 111gggnnnnn0oooo     8500 E000   
     // PRFW    <prfop>, <Pg>, [<Xn|SP>{, #<imm>, MUL VL}]                                SVE_IA_2A           1000010111iiiiii 010gggnnnnn0oooo     85C0 4000   
     // PRFW    <prfop>, <Pg>, [<Xn|SP>, <Xm>, LSL #2]                                    SVE_IB_3A           10000101000mmmmm 110gggnnnnn0oooo     8500 C000   
 
@@ -395,7 +394,7 @@ INST5(ldff1sb,           "ldff1sb",               0,                       IF_SV
     // LDFF1SB {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]                               SVE_HW_4A           110001000h0mmmmm 001gggnnnnnttttt     C400 2000   
     // LDFF1SB {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>]                               SVE_HW_4A_A         100001000h0mmmmm 001gggnnnnnttttt     8400 2000   
     // LDFF1SB {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]                                      SVE_HW_4B           11000100010mmmmm 101gggnnnnnttttt     C440 A000   
-    // LDFF1SB {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]                                     SVE_HX_3A_B         10000100001iiiii 101gggnnnnnttttt     8420 A000   
+    // LDFF1SB {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]                                     SVE_HX_3A_B         10000100001iiiii 101gggnnnnnttttt     8420 A000   
     // LDFF1SB {<Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>}]                                      SVE_IG_4A_D         10100101100mmmmm 011gggnnnnnttttt     A580 6000   
 
 
@@ -404,7 +403,7 @@ INST5(ldff1b,            "ldff1b",                0,                       IF_SV
     // LDFF1B  {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]                               SVE_HW_4A           110001000h0mmmmm 011gggnnnnnttttt     C400 6000   
     // LDFF1B  {<Zt>.S }, <Pg>/Z, [<Xn|SP>, <Zm>.S, <mod>]                               SVE_HW_4A_A         100001000h0mmmmm 011gggnnnnnttttt     8400 6000   
     // LDFF1B  {<Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]                                      SVE_HW_4B           11000100010mmmmm 111gggnnnnnttttt     C440 E000   
-    // LDFF1B  {<Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]                                     SVE_HX_3A_B         10000100001iiiii 111gggnnnnnttttt     8420 E000   
+    // LDFF1B  {<Zt>.S }, <Pg>/Z, [<Zn>.S{, #<imm>}]                                     SVE_HX_3A_B         10000100001iiiii 111gggnnnnnttttt     8420 E000   
     // LDFF1B  {<Zt>.B }, <Pg>/Z, [<Xn|SP>{, <Xm>}]                                      SVE_IG_4A_E         10100100000mmmmm 011gggnnnnnttttt     A400 6000   
 
 
@@ -442,32 +441,6 @@ INST4(fmov,              "fmov",                  0,                       IF_SV
     // FMOV    <Zd>.<T>, #0.0                                                            SVE_EB_1B           00100101xx111000 11000000000ddddd     2538 C000   
 
 
-//    enum               name                     info                                              SVE_HS_3A        SVE_HS_3A_H      SVE_HS_3A_I      SVE_HS_3A_J      
-INST4(scvtf,             "scvtf",                 0,                       IF_SVE_4C,               0x6594A000,      0x65D0A000,      0x65D4A000,      0x65D6A000       )
-    // SCVTF   <Zd>.S, <Pg>/M, <Zn>.S                                                    SVE_HS_3A           0110010110010100 101gggnnnnnddddd     6594 A000   
-    // SCVTF   <Zd>.D, <Pg>/M, <Zn>.S                                                    SVE_HS_3A_H         0110010111010000 101gggnnnnnddddd     65D0 A000   
-    // SCVTF   <Zd>.S, <Pg>/M, <Zn>.D                                                    SVE_HS_3A_I         0110010111010100 101gggnnnnnddddd     65D4 A000   
-    // SCVTF   <Zd>.D, <Pg>/M, <Zn>.D                                                    SVE_HS_3A_J         0110010111010110 101gggnnnnnddddd     65D6 A000   
-
-INST4(ucvtf,             "ucvtf",                 0,                       IF_SVE_4C,               0x6595A000,      0x65D1A000,      0x65D5A000,      0x65D7A000       )
-    // UCVTF   <Zd>.S, <Pg>/M, <Zn>.S                                                    SVE_HS_3A           0110010110010101 101gggnnnnnddddd     6595 A000   
-    // UCVTF   <Zd>.D, <Pg>/M, <Zn>.S                                                    SVE_HS_3A_H         0110010111010001 101gggnnnnnddddd     65D1 A000   
-    // UCVTF   <Zd>.S, <Pg>/M, <Zn>.D                                                    SVE_HS_3A_I         0110010111010101 101gggnnnnnddddd     65D5 A000   
-    // UCVTF   <Zd>.D, <Pg>/M, <Zn>.D                                                    SVE_HS_3A_J         0110010111010111 101gggnnnnnddddd     65D7 A000   
-
-
-//    enum               name                     info                                              SVE_HP_3B        SVE_HP_3B_H      SVE_HP_3B_I      SVE_HP_3B_J      
-INST4(fcvtzs,            "fcvtzs",                0,                       IF_SVE_4D,               0x659CA000,      0x65DCA000,      0x65D8A000,      0x65DEA000       )
-    // FCVTZS  <Zd>.S, <Pg>/M, <Zn>.S                                                    SVE_HP_3B           0110010110011100 101gggnnnnnddddd     659C A000   
-    // FCVTZS  <Zd>.D, <Pg>/M, <Zn>.S                                                    SVE_HP_3B_H         0110010111011100 101gggnnnnnddddd     65DC A000   
-    // FCVTZS  <Zd>.S, <Pg>/M, <Zn>.D                                                    SVE_HP_3B_I         0110010111011000 101gggnnnnnddddd     65D8 A000   
-    // FCVTZS  <Zd>.D, <Pg>/M, <Zn>.D                                                    SVE_HP_3B_J         0110010111011110 101gggnnnnnddddd     65DE A000   
-
-INST4(fcvtzu,            "fcvtzu",                0,                       IF_SVE_4D,               0x659DA000,      0x65DDA000,      0x65D9A000,      0x65DFA000       )
-    // FCVTZU  <Zd>.S, <Pg>/M, <Zn>.S                                                    SVE_HP_3B           0110010110011101 101gggnnnnnddddd     659D A000   
-    // FCVTZU  <Zd>.D, <Pg>/M, <Zn>.S                                                    SVE_HP_3B_H         0110010111011101 101gggnnnnnddddd     65DD A000   
-    // FCVTZU  <Zd>.S, <Pg>/M, <Zn>.D                                                    SVE_HP_3B_I         0110010111011001 101gggnnnnnddddd     65D9 A000   
-    // FCVTZU  <Zd>.D, <Pg>/M, <Zn>.D                                                    SVE_HP_3B_J         0110010111011111 101gggnnnnnddddd     65DF A000   
 
 
 //    enum               name                     info                                              SVE_BE_3A        SVE_FI_3A        SVE_FI_3B        SVE_FI_3C        
@@ -1121,12 +1094,6 @@ INST2(not,               "not",                   0,                       IF_SV
     // NOT     <Pd>.B, <Pg>/Z, <Pn>.B                                                    SVE_CZ_4A           001001010000MMMM 01gggg1NNNN0DDDD     2500 4200   
 
 
-//    enum               name                     info                                              SVE_HO_3A        SVE_HO_3A_B                 
-INST2(fcvt,              "fcvt",                  0,                       IF_SVE_2AS,              0x65CBA000,      0x65CAA000                  )
-    // FCVT    <Zd>.D, <Pg>/M, <Zn>.S                                                    SVE_HO_3A           0110010111001011 101gggnnnnnddddd     65CB A000   
-    // FCVT    <Zd>.S, <Pg>/M, <Zn>.D                                                    SVE_HO_3A_B         0110010111001010 101gggnnnnnddddd     65CA A000   
-
-
 //    enum               name                     info                                              SVE_AB_3A        SVE_EC_1A                   
 INST2(subr,              "subr",                  0,                       IF_SVE_2AT,              0x04030000,      0x2523C000                  )
     // SUBR    <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>                                    SVE_AB_3A           00000100xx000011 000gggmmmmmddddd     0403 0000   
@@ -1312,8 +1279,8 @@ INST2(pmullt,            "pmullt",                0,                       IF_SV
 
 
 //    enum               name                     info                                              SVE_GQ_3A        SVE_HG_2A                   
-INST2(fcvtnt,            "fcvtnt",                0,                       IF_SVE_2BJ,              0x64CAA000,      0x650A3C00                  )
-    // FCVTNT  <Zd>.S, <Pg>/M, <Zn>.D                                                    SVE_GQ_3A           0110010011001010 101gggnnnnnddddd     64CA A000   
+INST2(fcvtnt,            "fcvtnt",                0,                       IF_SVE_2BJ,              0x6488A000,      0x650A3C00                  )
+    // FCVTNT  <Zd>.H, <Pg>/M, <Zn>.S                                                    SVE_GQ_3A           0110010010001000 101gggnnnnnddddd     6488 A000   
     // FCVTNT  <Zd>.B, {<Zn1>.S-<Zn2>.S }                                                SVE_HG_2A           0110010100001010 001111nnnn0ddddd     650A 3C00   
 
 
@@ -1823,7 +1790,6 @@ INST1(frsqrts,           "frsqrts",               0,                       IF_SV
 INST1(ftsmul,            "ftsmul",                0,                       IF_SVE_HK_3A,            0x65000C00                                   )
     // FTSMUL  <Zd>.<T>, <Zn>.<T>, <Zm>.<T>                                              SVE_HK_3A           01100101xx0mmmmm 000011nnnnnddddd     6500 0C00   
 
-
 //    enum               name                     info                                              SVE_HT_4A                                    
 INST1(facge,             "facge",                 0,                       IF_SVE_HT_4A,            0x6500C010                                   )
     // FACGE   <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>                                      SVE_HT_4A           01100101xx0mmmmm 110gggnnnnn1DDDD     6500 C010   
@@ -2057,14 +2023,6 @@ INST1(xar,               "xar",                   0,                       IF_SV
     // XAR     <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<const>                                  SVE_AW_2A           00000100xx1xxiii 001101mmmmmddddd     0420 3400   
 
 
-//    enum               name                     info                                              SVE_HO_3A                                    
-INST1(bfcvt,             "bfcvt",                 0,                       IF_SVE_HO_3A,            0x658AA000                                   )
-    // BFCVT   <Zd>.H, <Pg>/M, <Zn>.S                                                    SVE_HO_3A           0110010110001010 101gggnnnnnddddd     658A A000   
-
-INST1(fcvtx,             "fcvtx",                 0,                       IF_SVE_HO_3A,            0x650AA000                                   )
-    // FCVTX   <Zd>.S, <Pg>/M, <Zn>.D                                                    SVE_HO_3A           0110010100001010 101gggnnnnnddddd     650A A000   
-
-
 //    enum               name                     info                                              SVE_AF_3A                                    
 INST1(andv,              "andv",                  0,                       IF_SVE_AF_3A,            0x041A2000                                   )
     // ANDV    <V><d>, <Pg>, <Zn>.<T>                                                    SVE_AF_3A           00000100xx011010 001gggnnnnnddddd     041A 2000   
@@ -2669,8 +2627,8 @@ INST1(histcnt,           "histcnt",               0,                       IF_SV
 INST1(bfcvtnt,           "bfcvtnt",               0,                       IF_SVE_GQ_3A,            0x648AA000                                   )
     // BFCVTNT <Zd>.H, <Pg>/M, <Zn>.S                                                    SVE_GQ_3A           0110010010001010 101gggnnnnnddddd     648A A000   
 
-INST1(fcvtlt,            "fcvtlt",                0,                       IF_SVE_GQ_3A,            0x64CBA000                                   )
-    // FCVTLT  <Zd>.D, <Pg>/M, <Zn>.S                                                    SVE_GQ_3A           0110010011001011 101gggnnnnnddddd     64CB A000   
+INST1(fcvtlt,            "fcvtlt",                0,                       IF_SVE_GQ_3A,            0x6489A000                                   )
+    // FCVTLT  <Zd>.S, <Pg>/M, <Zn>.H                                                    SVE_GQ_3A           0110010010001001 101gggnnnnnddddd     6489 A000   
 
 INST1(fcvtxnt,           "fcvtxnt",               0,                       IF_SVE_GQ_3A,            0x640AA000                                   )
     // FCVTXNT <Zd>.S, <Pg>/M, <Zn>.D                                                    SVE_GQ_3A           0110010000001010 101gggnnnnnddddd     640A A000   
@@ -2758,10 +2716,35 @@ INST1(ftmad,             "ftmad",                 0,                       IF_SV
     // FTMAD   <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm>                                    SVE_HN_2A           01100101xx010iii 100000mmmmmddddd     6510 8000   
 
 
+//    enum               name                     info                                              SVE_HO_3A                                    
+INST1(bfcvt,             "bfcvt",                 0,                       IF_SVE_HO_3A,            0x658AA000                                   )
+    // BFCVT   <Zd>.H, <Pg>/M, <Zn>.S                                                    SVE_HO_3A           0110010110001010 101gggnnnnnddddd     658A A000   
+
+//    enum               name                     info                                              SVE_HO_3B
+INST1(fcvt,              "fcvt",                  0,                       IF_SVE_HO_3B,            0x6588A000)
+    // FCVT    <Zd>.D, <Pg>/M, <Zn>.S                                                    SVE_HO_3B           0110010110001000 101gggnnnnnddddd     6588 A000   
+
+//    enum               name                     info                                              SVE_HO_3C
+INST1(fcvtx,             "fcvtx",                 0,                       IF_SVE_HO_3C,            0x650AA000                                   )
+    // FCVTX   <Zd>.S, <Pg>/M, <Zn>.D                                                    SVE_HO_3C           0110010100001010 101gggnnnnnddddd     650A A000   
+
 //    enum               name                     info                                              SVE_HP_3A                                    
 INST1(flogb,             "flogb",                 0,                       IF_SVE_HP_3A,            0x6518A000                                   )
     // FLOGB   <Zd>.<T>, <Pg>/M, <Zn>.<T>                                                SVE_HP_3A           0110010100011xx0 101gggnnnnnddddd     6518 A000   
 
+//    enum               name                     info                                              SVE_HP_3B
+INST1(fcvtzs,            "fcvtzs",                0,                       IF_SVE_HP_3B,            0x6518A000)
+    // FCVTZS  <Zd>.<H|S|D>, <Pg>/M, <Zn>.<H|S|D>                                        SVE_HP_3B           0110010100011000 101gggnnnnnddddd     6518 A000   
+
+INST1(fcvtzu,            "fcvtzu",                0,                       IF_SVE_HP_3B,            0x6519A000)
+    // FCVTZU  <Zd>.<H|S|D>, <Pg>/M, <Zn>.<H|S|D>                                        SVE_HP_3B           0110010100011001 101gggnnnnnddddd     6519 A000   
+
+//    enum               name                     info                                              SVE_HS_3A    
+INST1(scvtf,             "scvtf",                 0,                       IF_SVE_HS_3A,            0x6510A000)
+    // SCVTF   <Zd>.<H|S|D>, <Pg>/M, <Zn>.<H|S|D>                                        SVE_HS_3A           0110010100010000 101gggnnnnnddddd     6594 A000   
+
+INST1(ucvtf,             "ucvtf",                 0,                       IF_SVE_HS_3A,            0x6511A000)
+    // UCVTF   <Zd>.<H|S|D>, <Pg>/M, <Zn>.<H|S|D>                                        SVE_HS_3A           0110010100010001 101gggnnnnnddddd     6595 A000   
 
 //    enum               name                     info                                              SVE_HU_4A                                    
 INST1(fnmla,             "fnmla",                 0,                       IF_SVE_HU_4A,            0x65204000                                   )
@@ -2857,6 +2840,11 @@ INST1(ldnt1sw,           "ldnt1sw",               0,                       IF_SV
 INST1(st1q,              "st1q",                  0,                       IF_SVE_IY_4A,            0xE4202000                                   )
     // ST1Q    {<Zt>.Q }, <Pg>, [<Zn>.D{, <Xm>}]                                         SVE_IY_4A           11100100001mmmmm 001gggnnnnnttttt     E420 2000
 
+
+// TODO-SVE: Removable once REG_V0 and REG_P0 are distinct
+INST1(str_mask,          "str_mask",              0,                       IF_SN_0A,                BAD_CODE)
+INST1(ldr_mask,          "ldr_mask",              0,                       IF_SN_0A,                BAD_CODE)
+
 // clang-format on
 
 /*****************************************************************************/
diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h
index 4f94516c5fb9..3794d91e02e3 100644
--- a/src/coreclr/jit/instrsloongarch64.h
+++ b/src/coreclr/jit/instrsloongarch64.h
@@ -11,7 +11,7 @@
  *   mask        -- instruction's mask
  *   fmt         -- disasmbly format
  *
-******************************************************************************/
+ ******************************************************************************/
 
 #if !defined(TARGET_LOONGARCH64)
 #error Unexpected target type
diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h
index 17443cb97849..030bcffd41c6 100644
--- a/src/coreclr/jit/instrsxarch.h
+++ b/src/coreclr/jit/instrsxarch.h
@@ -18,7 +18,7 @@
  *          tt      -- the tupletype for the instruction
  *          flags   -- flags, see INS_FLAGS_* enum
  *
-******************************************************************************/
+ ******************************************************************************/
 
 // clang-format off
 #if !defined(TARGET_XARCH)
@@ -201,7 +201,8 @@ INST3(comiss,           "comiss",           IUM_RD, BAD_CODE,     BAD_CODE,
 INST3(cvtsi2ss32,       "cvtsi2ss",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2A),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt DWORD to scalar single
 INST3(cvtsi2ss64,       "cvtsi2ss",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2A),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt QWORD to scalar single
 INST3(cvtss2si,         "cvtss2si",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2D),                            INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt scalar single to DWORD/QWORD
-INST3(cvttss2si,        "cvttss2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar single to DWORD
+INST3(cvttss2si32,      "cvttss2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar single to DWORD
+INST3(cvttss2si64,      "cvttss2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar single to DWORD
 INST3(divps,            "divps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x5E),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Divide packed singles
 INST3(divss,            "divss",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x5E),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Divide scalar singles
 INST3(maxps,            "maxps",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKFLT(0x5F),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Return Maximum packed singles
@@ -260,7 +261,8 @@ INST3(cvtsi2sd64,       "cvtsi2sd",         IUM_WR, BAD_CODE,     BAD_CODE,
 INST3(cvtss2sd,         "cvtss2sd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x5A),                            INS_TT_TUPLE1_SCALAR,                Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // cvt scalar single to scalar doubles
 INST3(cvttpd2dq,        "cvttpd2dq",        IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xE6),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc packed doubles to DWORDs
 INST3(cvttps2dq,        "cvttps2dq",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0x5B),                            INS_TT_FULL,                         Input_32Bit    | REX_W0_EVEX  | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc packed singles to DWORDs
-INST3(cvttsd2si,        "cvttsd2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar double to signed DWORDs
+INST3(cvttsd2si32,      "cvttsd2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_W0       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar double to signed DWORDs
+INST3(cvttsd2si64,      "cvttsd2si",        IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x2C),                            INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_W1       | Encoding_VEX  | Encoding_EVEX)                                                                                                                                  // cvt with trunc scalar double to signed DWORDs
 INST3(divpd,            "divpd",            IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x5E),                            INS_TT_FULL,                         Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported)                                                    // Divide packed doubles
 INST3(divsd,            "divsd",            IUM_WR, BAD_CODE,     BAD_CODE,     SSEDBL(0x5E),                            INS_TT_TUPLE1_SCALAR,                Input_64Bit    | REX_W1_EVEX  | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Divide scalar doubles
 INST3(lfence,           "lfence",           IUM_RD, 0x000FE8AE,   BAD_CODE,     BAD_CODE,                                INS_TT_NONE,                                          REX_WIG)
@@ -640,7 +642,8 @@ INST3(vcvtsd2usi,       "cvtsd2usi",        IUM_WR, BAD_CODE,               BAD_
 INST3(vcvtss2usi,       "cvtss2usi",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x79),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt scalar single to unsigned DWORD/QWORD
 INST3(vcvttpd2udq,      "cvttpd2udq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x78),                  INS_TT_FULL,                         Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed doubles to unsigned DWORDs
 INST3(vcvttps2udq,      "cvttps2udq",       IUM_WR, BAD_CODE,               BAD_CODE,     PCKFLT(0x78),                  INS_TT_FULL,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation packed singles to unsigned DWORDs
-INST3(vcvttsd2usi,      "cvttsd2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_WX                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar double to unsigned DWORD/QWORD
+INST3(vcvttsd2usi32,    "cvttsd2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar double to unsigned DWORD
+INST3(vcvttsd2usi64,    "cvttsd2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEDBL(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_64Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar double to unsigned QWORD
 INST3(vcvttss2usi32,    "cvttss2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar single to unsigned DWORD/QWORD
 INST3(vcvttss2usi64,    "cvttss2usi",       IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x78),                  INS_TT_TUPLE1_FIXED,                 Input_32Bit    | REX_W1                       | Encoding_EVEX)                                                                                                                                  // cvt w/ truncation scalar single to unsigned DWORD/QWORD
 INST3(vcvtudq2pd,       "cvtudq2pd",        IUM_WR, BAD_CODE,               BAD_CODE,     SSEFLT(0x7A),                  INS_TT_HALF,                         Input_32Bit    | REX_W0                       | Encoding_EVEX)                                                                                                                                  // cvt packed unsigned DWORDs to doubles
diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h
index 9711f54dbfc7..f34e548a26d4 100644
--- a/src/coreclr/jit/jit.h
+++ b/src/coreclr/jit/jit.h
@@ -17,17 +17,14 @@
 #endif
 #endif
 
-// Clang-format messes with the indentation of comments if they directly precede an
-// ifdef. This macro allows us to anchor the comments to the regular flow of code.
-#define CLANG_FORMAT_COMMENT_ANCHOR ;
-
 // Clang-tidy replaces 0 with nullptr in some templated functions, causing a build
 // break. Replacing those instances with ZERO avoids this change
 #define ZERO 0
 
 #ifdef _MSC_VER
-#define CHECK_STRUCT_PADDING 0 // Set this to '1' to enable warning C4820 "'bytes' bytes padding added after
-                               // construct 'member_name'" on interesting structs/classes
+#define CHECK_STRUCT_PADDING                                                                                           \
+    0 // Set this to '1' to enable warning C4820 "'bytes' bytes padding added after
+      // construct 'member_name'" on interesting structs/classes
 #else
 #define CHECK_STRUCT_PADDING 0 // Never enable it for non-MSFT compilers
 #endif
@@ -302,9 +299,9 @@ typedef ptrdiff_t ssize_t;
 #include "corjit.h"
 #include "jitee.h"
 
-#define __OPERATOR_NEW_INLINE 1 // indicate that I will define these
-#define __PLACEMENT_NEW_INLINE  // don't bring in the global placement new, it is easy to make a mistake
-                                // with our new(compiler*) pattern.
+#define __OPERATOR_NEW_INLINE  1 // indicate that I will define these
+#define __PLACEMENT_NEW_INLINE   // don't bring in the global placement new, it is easy to make a mistake
+                                 // with our new(compiler*) pattern.
 
 #include "utilcode.h" // this defines assert as _ASSERTE
 #include "host.h"     // this redefines assert for the JIT to use assertAbort
@@ -326,7 +323,7 @@ typedef ptrdiff_t ssize_t;
 #endif
 
 #ifdef DEBUG
-#define INDEBUG(x) x
+#define INDEBUG(x)  x
 #define DEBUGARG(x) , x
 #else
 #define INDEBUG(x)
@@ -341,7 +338,7 @@ typedef ptrdiff_t ssize_t;
 
 #if defined(UNIX_AMD64_ABI)
 #define UNIX_AMD64_ABI_ONLY_ARG(x) , x
-#define UNIX_AMD64_ABI_ONLY(x) x
+#define UNIX_AMD64_ABI_ONLY(x)     x
 #else // !defined(UNIX_AMD64_ABI)
 #define UNIX_AMD64_ABI_ONLY_ARG(x)
 #define UNIX_AMD64_ABI_ONLY(x)
@@ -354,7 +351,7 @@ typedef ptrdiff_t ssize_t;
 
 #if defined(TARGET_LOONGARCH64)
 #define UNIX_LOONGARCH64_ONLY_ARG(x) , x
-#define UNIX_LOONGARCH64_ONLY(x) x
+#define UNIX_LOONGARCH64_ONLY(x)     x
 #else // !TARGET_LOONGARCH64
 #define UNIX_LOONGARCH64_ONLY_ARG(x)
 #define UNIX_LOONGARCH64_ONLY(x)
@@ -367,16 +364,16 @@ typedef ptrdiff_t ssize_t;
 
 #if defined(UNIX_AMD64_ABI)
 #define UNIX_AMD64_ABI_ONLY_ARG(x) , x
-#define UNIX_AMD64_ABI_ONLY(x) x
+#define UNIX_AMD64_ABI_ONLY(x)     x
 #else // !defined(UNIX_AMD64_ABI)
 #define UNIX_AMD64_ABI_ONLY_ARG(x)
 #define UNIX_AMD64_ABI_ONLY(x)
 #endif // defined(UNIX_AMD64_ABI)
 
 #if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
-#define MULTIREG_HAS_SECOND_GC_RET 1
+#define MULTIREG_HAS_SECOND_GC_RET             1
 #define MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(x) , x
-#define MULTIREG_HAS_SECOND_GC_RET_ONLY(x) x
+#define MULTIREG_HAS_SECOND_GC_RET_ONLY(x)     x
 #else // !defined(UNIX_AMD64_ABI)
 #define MULTIREG_HAS_SECOND_GC_RET 0
 #define MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(x)
@@ -397,7 +394,7 @@ typedef ptrdiff_t ssize_t;
 #define DUMMY_INIT(x) (x)
 
 #define REGEN_SHORTCUTS 0
-#define REGEN_CALLPAT 0
+#define REGEN_CALLPAT   0
 
 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -489,9 +486,9 @@ class GlobalJitOptions
 
 /*****************************************************************************/
 
-#define CSE_INTO_HANDLERS 0
-#define DUMP_FLOWGRAPHS DEBUG                  // Support for creating Xml Flowgraph reports in *.fgx files
-#define HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION 0 // if 1 we must have all handler entry points in the Hot code section
+#define CSE_INTO_HANDLERS                    0
+#define DUMP_FLOWGRAPHS                      DEBUG // Support for creating Xml Flowgraph reports in *.fgx files
+#define HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION 0     // if 1 we must have all handler entry points in the Hot code section
 
 /*****************************************************************************/
 
@@ -499,40 +496,43 @@ class GlobalJitOptions
 
 /*****************************************************************************/
 
-#define DUMP_GC_TABLES DEBUG
+#define DUMP_GC_TABLES   DEBUG
 #define VERIFY_GC_TABLES 0
-#define REARRANGE_ADDS 1
+#define REARRANGE_ADDS   1
 
-#define FUNC_INFO_LOGGING 1 // Support dumping function info to a file. In retail, only NYIs, with no function name,
-                            // are dumped.
+#define FUNC_INFO_LOGGING                                                                                              \
+    1 // Support dumping function info to a file. In retail, only NYIs, with no function name,
+      // are dumped.
 
 /*****************************************************************************/
 /*****************************************************************************/
 /* Set these to 1 to collect and output various statistics about the JIT */
 
-#define CALL_ARG_STATS 0      // Collect stats about calls and call arguments.
-#define COUNT_BASIC_BLOCKS 0  // Create a histogram of basic block sizes, and a histogram of IL sizes in the simple
-                              // case of single block methods.
-#define COUNT_LOOPS 0         // Collect stats about loops, such as the total number of natural loops, a histogram of
+#define CALL_ARG_STATS 0 // Collect stats about calls and call arguments.
+#define COUNT_BASIC_BLOCKS                                                                                             \
+    0 // Create a histogram of basic block sizes, and a histogram of IL sizes in the simple
+      // case of single block methods.
+#define COUNT_LOOPS                                                                                                    \
+    0                         // Collect stats about loops, such as the total number of natural loops, a histogram of
                               // the number of loop exits, etc.
-#define DISPLAY_SIZES 0       // Display generated code, data, and GC information sizes.
-#define MEASURE_BLOCK_SIZE 0  // Collect stats about basic block and FlowEdge node sizes and memory allocations.
-#define MEASURE_FATAL 0       // Count the number of calls to fatal(), including NYIs and noway_asserts.
-#define MEASURE_NODE_SIZE 0   // Collect stats about GenTree node allocations.
+#define DISPLAY_SIZES       0 // Display generated code, data, and GC information sizes.
+#define MEASURE_BLOCK_SIZE  0 // Collect stats about basic block and FlowEdge node sizes and memory allocations.
+#define MEASURE_FATAL       0 // Count the number of calls to fatal(), including NYIs and noway_asserts.
+#define MEASURE_NODE_SIZE   0 // Collect stats about GenTree node allocations.
 #define MEASURE_PTRTAB_SIZE 0 // Collect stats about GC pointer table allocations.
-#define EMITTER_STATS 0       // Collect stats on the emitter.
-#define NODEBASH_STATS 0      // Collect stats on changed gtOper values in GenTree's.
-#define COUNT_AST_OPERS 0     // Display use counts for GenTree operators.
+#define EMITTER_STATS       0 // Collect stats on the emitter.
+#define NODEBASH_STATS      0 // Collect stats on changed gtOper values in GenTree's.
+#define COUNT_AST_OPERS     0 // Display use counts for GenTree operators.
 
 #ifdef DEBUG
 #define MEASURE_MEM_ALLOC 1 // Collect memory allocation stats.
-#define LOOP_HOIST_STATS 1  // Collect loop hoisting stats.
-#define TRACK_LSRA_STATS 1  // Collect LSRA stats
+#define LOOP_HOIST_STATS  1 // Collect loop hoisting stats.
+#define TRACK_LSRA_STATS  1 // Collect LSRA stats
 #define TRACK_ENREG_STATS 1 // Collect enregistration stats
 #else
 #define MEASURE_MEM_ALLOC 0 // You can set this to 1 to get memory stats in retail, as well
-#define LOOP_HOIST_STATS 0  // You can set this to 1 to get loop hoist stats in retail, as well
-#define TRACK_LSRA_STATS 0  // You can set this to 1 to get LSRA stats in retail, as well
+#define LOOP_HOIST_STATS  0 // You can set this to 1 to get loop hoist stats in retail, as well
+#define TRACK_LSRA_STATS  0 // You can set this to 1 to get LSRA stats in retail, as well
 #define TRACK_ENREG_STATS 0
 #endif
 
@@ -618,7 +618,7 @@ const bool dspGCtbls = true;
         JitTls::GetCompiler()->fgTableDispBasicBlock(b);
 #define VERBOSE JitTls::GetCompiler()->verbose
 // Development-time only macros, simplify guards for specified IL methods one wants to debug/add log messages for
-#define ISMETHOD(name) (strcmp(JitTls::GetCompiler()->impInlineRoot()->info.compMethodName, name) == 0)
+#define ISMETHOD(name)     (strcmp(JitTls::GetCompiler()->impInlineRoot()->info.compMethodName, name) == 0)
 #define ISMETHODHASH(hash) (JitTls::GetCompiler()->impInlineRoot()->info.compMethodHash() == hash)
 #else // !DEBUG
 #define JITDUMP(...)
@@ -644,8 +644,9 @@ const bool dspGCtbls = true;
  */
 
 #ifdef TARGET_X86
-#define DOUBLE_ALIGN 1 // permit the double alignment of ESP in prolog,
-                       //  and permit the double alignment of local offsets
+#define DOUBLE_ALIGN                                                                                                   \
+    1 // permit the double alignment of ESP in prolog,
+      //  and permit the double alignment of local offsets
 #else
 #define DOUBLE_ALIGN 0 // no special handling for double alignment
 #endif
@@ -689,7 +690,7 @@ inline bool IsUninitialized(T data);
 #define MISALIGNED_RD_U2(src) (*castto(src, unsigned short*))
 
 #define MISALIGNED_WR_I2(dst, val) *castto(dst, short*) = val;
-#define MISALIGNED_WR_I4(dst, val) *castto(dst, int*)   = val;
+#define MISALIGNED_WR_I4(dst, val) *castto(dst, int*) = val;
 
 #define MISALIGNED_WR_ST(dst, val) *castto(dst, ssize_t*) = val;
 
@@ -713,19 +714,19 @@ inline unsigned int roundUp(unsigned size, unsigned mult)
 
 inline unsigned int unsigned_abs(int x)
 {
-    return ((unsigned int)abs(x));
+    return ((unsigned int)std::abs(x));
 }
 
 #ifdef TARGET_64BIT
 inline size_t unsigned_abs(ssize_t x)
 {
-    return ((size_t)abs((__int64)x));
+    return ((size_t)std::abs((__int64)x));
 }
 
 #ifdef __APPLE__
 inline size_t unsigned_abs(__int64 x)
 {
-    return ((size_t)abs(x));
+    return ((size_t)std::abs(x));
 }
 #endif // __APPLE__
 #endif // TARGET_64BIT
@@ -756,16 +757,16 @@ inline size_t unsigned_abs(__int64 x)
 #define FEATURE_TAILCALL_OPT_SHARED_RETURN 0
 #endif // !FEATURE_TAILCALL_OPT
 
-#define CLFLG_CODESIZE 0x00001
-#define CLFLG_CODESPEED 0x00002
-#define CLFLG_CSE 0x00004
-#define CLFLG_REGVAR 0x00008
-#define CLFLG_RNGCHKOPT 0x00010
-#define CLFLG_DEADSTORE 0x00020
+#define CLFLG_CODESIZE   0x00001
+#define CLFLG_CODESPEED  0x00002
+#define CLFLG_CSE        0x00004
+#define CLFLG_REGVAR     0x00008
+#define CLFLG_RNGCHKOPT  0x00010
+#define CLFLG_DEADSTORE  0x00020
 #define CLFLG_CODEMOTION 0x00040
-#define CLFLG_QMARK 0x00080
-#define CLFLG_TREETRANS 0x00100
-#define CLFLG_INLINING 0x00200
+#define CLFLG_QMARK      0x00080
+#define CLFLG_TREETRANS  0x00100
+#define CLFLG_INLINING   0x00200
 
 #if FEATURE_STRUCTPROMOTE
 #define CLFLG_STRUCTPROMOTE 0x00400
@@ -829,7 +830,7 @@ class JitTls
 #endif
 
     static Compiler* GetCompiler();
-    static void SetCompiler(Compiler* compiler);
+    static void      SetCompiler(Compiler* compiler);
 };
 
 #if defined(DEBUG)
diff --git a/src/coreclr/jit/jitconfig.cpp b/src/coreclr/jit/jitconfig.cpp
index 3c85031cee6c..19730be75c2c 100644
--- a/src/coreclr/jit/jitconfig.cpp
+++ b/src/coreclr/jit/jitconfig.cpp
@@ -193,7 +193,7 @@ void JitConfigValues::initialize(ICorJitHost* host)
     assert(!m_isInitialized);
 
 #define CONFIG_INTEGER(name, key, defaultValue) m_##name = host->getIntConfigValue(key, defaultValue);
-#define CONFIG_STRING(name, key) m_##name = host->getStringConfigValue(key);
+#define CONFIG_STRING(name, key)                m_##name = host->getStringConfigValue(key);
 #define CONFIG_METHODSET(name, key)                                                                                    \
     const WCHAR* name##value = host->getStringConfigValue(key);                                                        \
     m_##name.initialize(name##value, host);                                                                            \
@@ -212,7 +212,7 @@ void JitConfigValues::destroy(ICorJitHost* host)
     }
 
 #define CONFIG_INTEGER(name, key, defaultValue)
-#define CONFIG_STRING(name, key) host->freeStringConfigValue(m_##name);
+#define CONFIG_STRING(name, key)    host->freeStringConfigValue(m_##name);
 #define CONFIG_METHODSET(name, key) m_##name.destroy(host);
 
 #include "jitconfigvalues.h"
diff --git a/src/coreclr/jit/jitconfig.h b/src/coreclr/jit/jitconfig.h
index e19021cd52f2..bd1c552f5943 100644
--- a/src/coreclr/jit/jitconfig.h
+++ b/src/coreclr/jit/jitconfig.h
@@ -31,7 +31,7 @@ class JitConfigValues
         char*       m_list;
         MethodName* m_names;
 
-        MethodSet(const MethodSet& other) = delete;
+        MethodSet(const MethodSet& other)            = delete;
         MethodSet& operator=(const MethodSet& other) = delete;
 
     public:
@@ -56,8 +56,8 @@ class JitConfigValues
 
 private:
 #define CONFIG_INTEGER(name, key, defaultValue) int m_##name;
-#define CONFIG_STRING(name, key) const WCHAR* m_##name;
-#define CONFIG_METHODSET(name, key) MethodSet m_##name;
+#define CONFIG_STRING(name, key)                const WCHAR* m_##name;
+#define CONFIG_METHODSET(name, key)             MethodSet m_##name;
 #include "jitconfigvalues.h"
 
 public:
@@ -81,7 +81,7 @@ class JitConfigValues
 private:
     bool m_isInitialized;
 
-    JitConfigValues(const JitConfigValues& other) = delete;
+    JitConfigValues(const JitConfigValues& other)            = delete;
     JitConfigValues& operator=(const JitConfigValues& other) = delete;
 
 public:
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index d5ea332d0fa1..041e29ad3823 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -71,7 +71,7 @@ CONFIG_INTEGER(JitHideAlignBehindJmp,
                1) // If set, try to hide align instruction (if any) behind an unconditional jump instruction (if any)
                   // that is present before the loop start.
 
-CONFIG_INTEGER(JitOptimizeStructHiddenBuffer, W("JitOptimizeStructHiddenBuffer"), 1) // Track assignments to locals done
+CONFIG_INTEGER(JitOptimizeStructHiddenBuffer, W("JitOptimizeStructHiddenBuffer"), 1) // Track stores to locals done
                                                                                      // through return buffers.
 
 CONFIG_INTEGER(JitUnrollLoopMaxIterationCount,
@@ -125,9 +125,8 @@ CONFIG_INTEGER(JitNoForceFallback, W("JitNoForceFallback"), 0) // Set to non-zer
                                                                // flags.
 CONFIG_INTEGER(JitNoForwardSub, W("JitNoForwardSub"), 0)       // Disables forward sub
 CONFIG_INTEGER(JitNoHoist, W("JitNoHoist"), 0)
-CONFIG_INTEGER(JitNoInline, W("JitNoInline"), 0)                 // Disables inlining of all methods
-CONFIG_INTEGER(JitNoMemoryBarriers, W("JitNoMemoryBarriers"), 0) // If 1, don't generate memory barriers
-CONFIG_INTEGER(JitNoRegLoc, W("JitNoRegLoc"), 0)
+CONFIG_INTEGER(JitNoInline, W("JitNoInline"), 0)                   // Disables inlining of all methods
+CONFIG_INTEGER(JitNoMemoryBarriers, W("JitNoMemoryBarriers"), 0)   // If 1, don't generate memory barriers
 CONFIG_INTEGER(JitNoStructPromotion, W("JitNoStructPromotion"), 0) // Disables struct promotion 1 - for all, 2 - for
                                                                    // params.
 CONFIG_INTEGER(JitNoUnroll, W("JitNoUnroll"), 0)
@@ -137,6 +136,8 @@ CONFIG_INTEGER(JitReportFastTailCallDecisions, W("JitReportFastTailCallDecisions
 CONFIG_INTEGER(JitPInvokeCheckEnabled, W("JITPInvokeCheckEnabled"), 0)
 CONFIG_INTEGER(JitPInvokeEnabled, W("JITPInvokeEnabled"), 1)
 
+CONFIG_INTEGER(JitHoistLimit, W("JitHoistLimit"), -1) // Specifies the maximum number of hoist candidates to hoist
+
 // Controls verbosity for JitPrintInlinedMethods. Ignored for JitDump where
 // it's always set.
 CONFIG_INTEGER(JitPrintInlinedMethodsVerbose, W("JitPrintInlinedMethodsVerboseLevel"), 0)
@@ -144,11 +145,12 @@ CONFIG_INTEGER(JitPrintInlinedMethodsVerbose, W("JitPrintInlinedMethodsVerboseLe
 CONFIG_METHODSET(JitPrintInlinedMethods, W("JitPrintInlinedMethods"))
 
 CONFIG_METHODSET(JitPrintDevirtualizedMethods, W("JitPrintDevirtualizedMethods"))
-// -1: just do internal checks
-// Else bitflag: 0x1 check classic, 0x2 check likely, 0x4 enable asserts
+
+// -1: just do internal checks (CHECK_HASLIKELIHOOD | CHECK_LIKELIHOODSUM | RAISE_ASSERT)
+// Else bitflag of ProfileChecks enum.
 CONFIG_INTEGER(JitProfileChecks, W("JitProfileChecks"), -1)
+
 CONFIG_INTEGER(JitRequired, W("JITRequired"), -1)
-CONFIG_INTEGER(JitRoundFloat, W("JITRoundFloat"), DEFAULT_ROUND_LEVEL)
 CONFIG_INTEGER(JitStackAllocToLocalSize, W("JitStackAllocToLocalSize"), DEFAULT_MAX_LOCALLOC_TO_LOCAL_SIZE)
 CONFIG_INTEGER(JitSkipArrayBoundCheck, W("JitSkipArrayBoundCheck"), 0)
 CONFIG_INTEGER(JitSlowDebugChecksEnabled, W("JitSlowDebugChecksEnabled"), 1) // Turn on slow debug checks
@@ -160,9 +162,7 @@ CONFIG_INTEGER(JitSsaStress, W("JitSsaStress"), 0) // Perturb order of processin
 CONFIG_INTEGER(JitStackChecks, W("JitStackChecks"), 0)
 CONFIG_INTEGER(JitStress, W("JitStress"), 0) // Internal Jit stress mode: 0 = no stress, 2 = all stress, other = vary
                                              // stress based on a hash of the method and this value
-CONFIG_INTEGER(JitStressBBProf, W("JitStressBBProf"), 0)               // Internal Jit stress mode
-CONFIG_INTEGER(JitStressModeNamesOnly, W("JitStressModeNamesOnly"), 0) // Internal Jit stress: if nonzero, only enable
-                                                                       // stress modes listed in JitStressModeNames
+CONFIG_INTEGER(JitStressBBProf, W("JitStressBBProf"), 0)                         // Internal Jit stress mode
 CONFIG_INTEGER(JitStressProcedureSplitting, W("JitStressProcedureSplitting"), 0) // Always split after the first basic
                                                                                  // block.
 CONFIG_INTEGER(JitStressRegs, W("JitStressRegs"), 0)
@@ -175,7 +175,6 @@ CONFIG_INTEGER(RunAltJitCode, W("RunAltJitCode"), 1) // If non-zero, and the com
                                                      // code and fall back to the default compiler.
 CONFIG_INTEGER(RunComponentUnitTests, W("JitComponentUnitTests"), 0) // Run JIT component unit tests
 CONFIG_INTEGER(ShouldInjectFault, W("InjectFault"), 0)
-CONFIG_INTEGER(StressCOMCall, W("StressCOMCall"), 0)
 CONFIG_INTEGER(TailcallStress, W("TailcallStress"), 0)
 CONFIG_INTEGER(TreesBeforeAfterMorph, W("JitDumpBeforeAfterMorph"), 0) // If 1, display each tree before/after morphing
 
@@ -240,12 +239,24 @@ CONFIG_INTEGER(JitDumpFgBlockOrder, W("JitDumpFgBlockOrder"), 0) // 0 == bbNext
 CONFIG_INTEGER(JitDumpFgMemorySsa, W("JitDumpFgMemorySsa"), 0)   // non-zero: show memory phis + SSA/VNs
 
 CONFIG_STRING(JitRange, W("JitRange"))
-CONFIG_STRING(JitStressModeNames, W("JitStressModeNames")) // Internal Jit stress mode: stress using the given set of
-                                                           // stress mode names, e.g. STRESS_REGS, STRESS_TAILCALL
-CONFIG_STRING(JitStressModeNamesNot, W("JitStressModeNamesNot")) // Internal Jit stress mode: do NOT stress using the
-                                                                 // given set of stress mode names, e.g. STRESS_REGS,
-                                                                 // STRESS_TAILCALL
-CONFIG_STRING(JitStressRange, W("JitStressRange"))               // Internal Jit stress mode
+
+// Internal Jit stress mode: stress using the given set of stress mode names, e.g. STRESS_REGS, STRESS_TAILCALL.
+// Unless JitStressModeNamesOnly is non-zero, other stress modes from a JitStress setting may also be invoked.
+CONFIG_STRING(JitStressModeNames, W("JitStressModeNames"))
+
+// Internal Jit stress: if nonzero, only enable stress modes listed in JitStressModeNames.
+CONFIG_INTEGER(JitStressModeNamesOnly, W("JitStressModeNamesOnly"), 0)
+
+// Internal Jit stress mode: only allow stress using the given set of stress mode names, e.g. STRESS_REGS,
+// STRESS_TAILCALL. Note that JitStress must be enabled first, and then only the mentioned stress modes are allowed
+// to be used, at the same percentage weighting as with JitStress -- the stress modes mentioned are NOT
+// unconditionally true for a call to `compStressCompile`. This is basically the opposite of JitStressModeNamesNot.
+CONFIG_STRING(JitStressModeNamesAllow, W("JitStressModeNamesAllow"))
+
+// Internal Jit stress mode: do NOT stress using the given set of stress mode names, e.g. STRESS_REGS, STRESS_TAILCALL
+CONFIG_STRING(JitStressModeNamesNot, W("JitStressModeNamesNot"))
+
+CONFIG_STRING(JitStressRange, W("JitStressRange"))        // Internal Jit stress mode
 CONFIG_METHODSET(JitEmitUnitTests, W("JitEmitUnitTests")) // Generate emitter unit tests in the specified functions
 CONFIG_STRING(JitEmitUnitTestsSections, W("JitEmitUnitTestsSections")) // Generate this set of unit tests
 
@@ -253,7 +264,8 @@ CONFIG_STRING(JitEmitUnitTestsSections, W("JitEmitUnitTestsSections")) // Genera
 /// JIT Hardware Intrinsics
 ///
 CONFIG_INTEGER(EnableIncompleteISAClass, W("EnableIncompleteISAClass"), 0) // Enable testing not-yet-implemented
-#endif                                                                     // defined(DEBUG)
+
+#endif // defined(DEBUG)
 
 CONFIG_METHODSET(JitDisasm, W("JitDisasm"))                // Print codegen for given methods
 CONFIG_INTEGER(JitDisasmTesting, W("JitDisasmTesting"), 0) // Display BEGIN METHOD/END METHOD anchors for disasm testing
@@ -277,13 +289,6 @@ CONFIG_INTEGER(JitAlignLoops, W("JitAlignLoops"), 1) // If set, align inner loop
 CONFIG_INTEGER(JitAlignLoops, W("JitAlignLoops"), 0)
 #endif
 
-///
-/// JIT
-///
-#ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
-CONFIG_INTEGER(JitNoRangeChks, W("JitNoRngChks"), 0) // If 1, don't generate range checks
-#endif
-
 // AltJitAssertOnNYI should be 0 on targets where JIT is under development or bring up stage, so as to facilitate
 // fallback to main JIT on hitting a NYI.
 CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 1) // Controls the AltJit behavior of NYI stuff
@@ -326,6 +331,7 @@ CONFIG_INTEGER(EnableAVX512F,               W("EnableAVX512F"),             1) /
 CONFIG_INTEGER(EnableAVX512F_VL,            W("EnableAVX512F_VL"),          1) // Allows AVX512F+ AVX512VL+ hardware intrinsics to be disabled
 CONFIG_INTEGER(EnableAVX512VBMI,            W("EnableAVX512VBMI"),          1) // Allows AVX512VBMI+ hardware intrinsics to be disabled
 CONFIG_INTEGER(EnableAVX512VBMI_VL,         W("EnableAVX512VBMI_VL"),       1) // Allows AVX512VBMI_VL+ hardware intrinsics to be disabled
+CONFIG_INTEGER(EnableAVX10v1,               W("EnableAVX10v1"),             1) // Allows AVX10v1+ hardware intrinsics to be disabled
 CONFIG_INTEGER(EnableAVXVNNI,               W("EnableAVXVNNI"),             1) // Allows AVXVNNI+ hardware intrinsics to be disabled
 CONFIG_INTEGER(EnableBMI1,                  W("EnableBMI1"),                1) // Allows BMI1+ hardware intrinsics to be disabled
 CONFIG_INTEGER(EnableBMI2,                  W("EnableBMI2"),                1) // Allows BMI2+ hardware intrinsics to be disabled
@@ -363,21 +369,30 @@ CONFIG_INTEGER(JitDisableSimdVN, W("JitDisableSimdVN"), 0) // Default 0, ValueNu
                                                            // If 3, disable both SIMD and HW Intrinsic nodes
 #endif                                                     // FEATURE_SIMD
 
-// Default 0, enable the CSE of Constants, including nearby offsets. (only for ARM64)
+// Default 0, enable the CSE of Constants, including nearby offsets. (only for ARM/ARM64)
 // If 1, disable all the CSE of Constants
-// If 2, enable the CSE of Constants but don't combine with nearby offsets. (only for ARM64)
+// If 2, enable the CSE of Constants but don't combine with nearby offsets. (only for ARM/ARM64)
 // If 3, enable the CSE of Constants including nearby offsets. (all platforms)
 // If 4, enable the CSE of Constants but don't combine with nearby offsets. (all platforms)
 //
 CONFIG_INTEGER(JitConstCSE, W("JitConstCSE"), 0)
 
-#define CONST_CSE_ENABLE_ARM 0
-#define CONST_CSE_DISABLE_ALL 1
+#define CONST_CSE_ENABLE_ARM            0
+#define CONST_CSE_DISABLE_ALL           1
 #define CONST_CSE_ENABLE_ARM_NO_SHARING 2
-#define CONST_CSE_ENABLE_ALL 3
+#define CONST_CSE_ENABLE_ALL            3
 #define CONST_CSE_ENABLE_ALL_NO_SHARING 4
 
+// If nonzero, use the greedy RL policy.
+//
+CONFIG_INTEGER(JitRLCSEGreedy, W("JitRLCSEGreedy"), 0)
+
+// If nonzero, dump out details of parameterized policy evaluation and
+// gradient updates
+CONFIG_INTEGER(JitRLCSEVerbose, W("JitRLCSEVerbose"), 0)
+
 #if defined(DEBUG)
+
 // Allow fine-grained controls of CSEs done in a particular method
 //
 // Specify method that will respond to the CSEMask.
@@ -415,7 +430,7 @@ CONFIG_STRING(JitReplayCSE, W("JitReplayCSE"))
 CONFIG_STRING(JitReplayCSEReward, W("JitReplayCSEReward"))
 
 // When set, specifies the initial parameter string for
-// a reinforcement-learning based CSE heuristic.
+// the reinforcement-learning based CSE heuristic.
 //
 // Note you can also set JitReplayCSE and JitReplayCSEPerfScore
 // along with this, in which case we are asking for a policy
@@ -426,17 +441,10 @@ CONFIG_STRING(JitRLCSE, W("JitRLCSE"))
 // use in learning.
 CONFIG_STRING(JitRLCSEAlpha, W("JitRLCSEAlpha"))
 
-// If nonzero, dump out details of policy evaluation and
-// gradient updates
-CONFIG_INTEGER(JitRLCSEVerbose, W("JitRLCSEVerbose"), 0)
-
 // If nonzero, dump candidate feature values
 CONFIG_INTEGER(JitRLCSECandidateFeatures, W("JitRLCSECandidateFeatures"), 0)
 
-// If nonzero, use the greedy policy with current parameters.
-CONFIG_INTEGER(JitRLCSEGreedy, W("JitRLCSEGreedy"), 0)
-
-#endif
+#endif // DEBUG
 
 ///
 /// JIT
@@ -479,9 +487,11 @@ CONFIG_INTEGER(JitNoRngChks, W("JitNoRngChks"), 0) // If 1, don't generate range
 #endif                                             // defined(FEATURE_ENABLE_NO_RANGE_CHECKS)
 
 #if defined(OPT_CONFIG)
+
 CONFIG_INTEGER(JitDoAssertionProp, W("JitDoAssertionProp"), 1) // Perform assertion propagation optimization
-CONFIG_INTEGER(JitDoCopyProp, W("JitDoCopyProp"), 1)   // Perform copy propagation on variables that appear redundant
-CONFIG_INTEGER(JitDoEarlyProp, W("JitDoEarlyProp"), 1) // Perform Early Value Propagation
+CONFIG_INTEGER(JitDoCopyProp, W("JitDoCopyProp"), 1) // Perform copy propagation on variables that appear redundant
+CONFIG_INTEGER(JitDoOptimizeIVs, W("JitDoOptimizeIVs"), 1)     // Perform optimization of induction variables
+CONFIG_INTEGER(JitDoEarlyProp, W("JitDoEarlyProp"), 1)         // Perform Early Value Propagation
 CONFIG_INTEGER(JitDoLoopHoisting, W("JitDoLoopHoisting"), 1)   // Perform loop hoisting on loop invariant values
 CONFIG_INTEGER(JitDoLoopInversion, W("JitDoLoopInversion"), 1) // Perform loop inversion on "for/while" loops
 CONFIG_INTEGER(JitDoRangeAnalysis, W("JitDoRangeAnalysis"), 1) // Perform range check analysis
@@ -496,14 +506,20 @@ CONFIG_STRING(JitOnlyOptimizeRange,
               W("JitOnlyOptimizeRange")) // If set, all methods that do _not_ match are forced into MinOpts
 CONFIG_STRING(JitEnablePhysicalPromotionRange, W("JitEnablePhysicalPromotionRange"))
 CONFIG_STRING(JitEnableCrossBlockLocalAssertionPropRange, W("JitEnableCrossBlockLocalAssertionPropRange"))
+CONFIG_STRING(JitEnableInductionVariableOptsRange, W("JitEnableInductionVariableOptsRange"))
 
 CONFIG_INTEGER(JitDoSsa, W("JitDoSsa"), 1) // Perform Static Single Assignment (SSA) numbering on the variables
 CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value numbering on method expressions
 
-CONFIG_METHODSET(JitOptRepeat, W("JitOptRepeat"))            // Runs optimizer multiple times on the method
-CONFIG_INTEGER(JitOptRepeatCount, W("JitOptRepeatCount"), 2) // Number of times to repeat opts when repeating
+CONFIG_STRING(JitOptRepeatRange, W("JitOptRepeatRange")) // Enable JitOptRepeat based on method hash range
+
 CONFIG_INTEGER(JitDoIfConversion, W("JitDoIfConversion"), 1) // Perform If conversion
-#endif                                                       // defined(OPT_CONFIG)
+
+#endif // defined(OPT_CONFIG)
+
+CONFIG_INTEGER(JitEnableOptRepeat, W("JitEnableOptRepeat"), 1) // If zero, do not allow JitOptRepeat
+CONFIG_METHODSET(JitOptRepeat, W("JitOptRepeat"))              // Runs optimizer multiple times on specified methods
+CONFIG_INTEGER(JitOptRepeatCount, W("JitOptRepeatCount"), 2)   // Number of times to repeat opts when repeating
 
 // Max # of MapSelect's considered for a particular top-level invocation.
 CONFIG_INTEGER(JitVNMapSelBudget, W("JitVNMapSelBudget"), DEFAULT_MAP_SELECT_BUDGET)
@@ -671,6 +687,10 @@ CONFIG_INTEGER(JitCheckSynthesizedCounts, W("JitCheckSynthesizedCounts"), 0)
 // If instrumenting the method, run synthesis and save the synthesis results
 // as edge or block profile data. Do not actually instrument.
 CONFIG_INTEGER(JitPropagateSynthesizedCountsToProfileData, W("JitPropagateSynthesizedCountsToProfileData"), 0)
+// Use general (gauss-seidel) solver
+CONFIG_INTEGER(JitSynthesisUseSolver, W("JitSynthesisUseSolver"), 1)
+// Relative likelihood of exceptions for synthesis
+CONFIG_STRING(JitSynthesisExceptionScale, W("JitSynthesisExceptionScale"))
 #endif
 
 // Devirtualize virtual calls with getExactClasses (NativeAOT only for now)
diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h
index 27963ac356ef..71f53b4e10d7 100644
--- a/src/coreclr/jit/jitee.h
+++ b/src/coreclr/jit/jitee.h
@@ -54,13 +54,15 @@ class JitFlags
     };
     // clang-format on
 
-    JitFlags() : m_jitFlags(0)
+    JitFlags()
+        : m_jitFlags(0)
     {
         // empty
     }
 
     // Convenience constructor to set exactly one flags.
-    JitFlags(JitFlag flag) : m_jitFlags(0)
+    JitFlags(JitFlag flag)
+        : m_jitFlags(0)
     {
         Set(flag);
     }
diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp
index 5a331af8fe94..313f42563d7b 100644
--- a/src/coreclr/jit/jiteh.cpp
+++ b/src/coreclr/jit/jiteh.cpp
@@ -243,9 +243,16 @@ void EHblkDsc::DispEntry(unsigned XTnum)
 {
     printf(" %2u  ::", XTnum);
 
-#if !defined(FEATURE_EH_FUNCLETS)
-    printf("  %2u  ", XTnum, ebdHandlerNestingLevel);
-#endif // !FEATURE_EH_FUNCLETS
+#if defined(FEATURE_EH_WINDOWS_X86)
+    if (ebdHandlerNestingLevel == 0)
+    {
+        printf("      ");
+    }
+    else
+    {
+        printf("  %2u  ", ebdHandlerNestingLevel);
+    }
+#endif // FEATURE_EH_WINDOWS_X86
 
     if (ebdEnclosingTryIndex == NO_ENCLOSING_INDEX)
     {
@@ -613,17 +620,19 @@ bool Compiler::bbIsHandlerBeg(const BasicBlock* block)
 
 bool Compiler::ehHasCallableHandlers()
 {
-#if defined(FEATURE_EH_FUNCLETS)
-
-    // Any EH in the function?
-
-    return compHndBBtabCount > 0;
-
-#else // !FEATURE_EH_FUNCLETS
-
-    return ehNeedsShadowSPslots();
-
-#endif // !FEATURE_EH_FUNCLETS
+    if (UsesFunclets())
+    {
+        // Any EH in the function?
+        return compHndBBtabCount > 0;
+    }
+    else
+    {
+#if defined(FEATURE_EH_WINDOWS_X86)
+        return ehNeedsShadowSPslots();
+#else
+        return false;
+#endif // FEATURE_EH_WINDOWS_X86
+    }
 }
 
 /******************************************************************************************
@@ -897,12 +906,15 @@ unsigned Compiler::ehGetCallFinallyRegionIndex(unsigned finallyIndex, bool* inTr
     assert(finallyIndex != EHblkDsc::NO_ENCLOSING_INDEX);
     assert(ehGetDsc(finallyIndex)->HasFinallyHandler());
 
-#if FEATURE_EH_CALLFINALLY_THUNKS
-    return ehGetDsc(finallyIndex)->ebdGetEnclosingRegionIndex(inTryRegion);
-#else
-    *inTryRegion = true;
-    return finallyIndex;
-#endif
+    if (UsesCallFinallyThunks())
+    {
+        return ehGetDsc(finallyIndex)->ebdGetEnclosingRegionIndex(inTryRegion);
+    }
+    else
+    {
+        *inTryRegion = true;
+        return finallyIndex;
+    }
 }
 
 void Compiler::ehGetCallFinallyBlockRange(unsigned finallyIndex, BasicBlock** startBlock, BasicBlock** lastBlock)
@@ -912,35 +924,38 @@ void Compiler::ehGetCallFinallyBlockRange(unsigned finallyIndex, BasicBlock** st
     assert(startBlock != nullptr);
     assert(lastBlock != nullptr);
 
-#if FEATURE_EH_CALLFINALLY_THUNKS
-    bool     inTryRegion;
-    unsigned callFinallyRegionIndex = ehGetCallFinallyRegionIndex(finallyIndex, &inTryRegion);
-
-    if (callFinallyRegionIndex == EHblkDsc::NO_ENCLOSING_INDEX)
-    {
-        *startBlock = fgFirstBB;
-        *lastBlock  = fgLastBBInMainFunction();
-    }
-    else
+    if (UsesCallFinallyThunks())
     {
-        EHblkDsc* ehDsc = ehGetDsc(callFinallyRegionIndex);
+        bool     inTryRegion;
+        unsigned callFinallyRegionIndex = ehGetCallFinallyRegionIndex(finallyIndex, &inTryRegion);
 
-        if (inTryRegion)
+        if (callFinallyRegionIndex == EHblkDsc::NO_ENCLOSING_INDEX)
         {
-            *startBlock = ehDsc->ebdTryBeg;
-            *lastBlock  = ehDsc->ebdTryLast;
+            *startBlock = fgFirstBB;
+            *lastBlock  = fgLastBBInMainFunction();
         }
         else
         {
-            *startBlock = ehDsc->ebdHndBeg;
-            *lastBlock  = ehDsc->ebdHndLast;
+            EHblkDsc* ehDsc = ehGetDsc(callFinallyRegionIndex);
+
+            if (inTryRegion)
+            {
+                *startBlock = ehDsc->ebdTryBeg;
+                *lastBlock  = ehDsc->ebdTryLast;
+            }
+            else
+            {
+                *startBlock = ehDsc->ebdHndBeg;
+                *lastBlock  = ehDsc->ebdHndLast;
+            }
         }
     }
-#else  // !FEATURE_EH_CALLFINALLY_THUNKS
-    EHblkDsc* ehDsc = ehGetDsc(finallyIndex);
-    *startBlock     = ehDsc->ebdTryBeg;
-    *lastBlock      = ehDsc->ebdTryLast;
-#endif // !FEATURE_EH_CALLFINALLY_THUNKS
+    else
+    {
+        EHblkDsc* ehDsc = ehGetDsc(finallyIndex);
+        *startBlock     = ehDsc->ebdTryBeg;
+        *lastBlock      = ehDsc->ebdTryLast;
+    }
 }
 
 #ifdef DEBUG
@@ -989,8 +1004,6 @@ bool Compiler::ehCallFinallyInCorrectRegion(BasicBlock* blockCallFinally, unsign
 
 #endif // DEBUG
 
-#if defined(FEATURE_EH_FUNCLETS)
-
 /*****************************************************************************
  *
  *  Are there (or will there be) any funclets in the function?
@@ -998,7 +1011,14 @@ bool Compiler::ehCallFinallyInCorrectRegion(BasicBlock* blockCallFinally, unsign
 
 bool Compiler::ehAnyFunclets()
 {
-    return compHndBBtabCount > 0; // if there is any EH, there will be funclets
+    if (UsesFunclets())
+    {
+        return compHndBBtabCount > 0; // if there is any EH, there will be funclets
+    }
+    else
+    {
+        return false;
+    }
 }
 
 /*****************************************************************************
@@ -1010,17 +1030,24 @@ bool Compiler::ehAnyFunclets()
 
 unsigned Compiler::ehFuncletCount()
 {
-    unsigned funcletCnt = 0;
-
-    for (EHblkDsc* const HBtab : EHClauses(this))
+    if (UsesFunclets())
     {
-        if (HBtab->HasFilter())
+        unsigned funcletCnt = 0;
+
+        for (EHblkDsc* const HBtab : EHClauses(this))
         {
+            if (HBtab->HasFilter())
+            {
+                ++funcletCnt;
+            }
             ++funcletCnt;
         }
-        ++funcletCnt;
+        return funcletCnt;
+    }
+    else
+    {
+        return 0;
     }
-    return funcletCnt;
 }
 
 /*****************************************************************************
@@ -1037,36 +1064,41 @@ unsigned Compiler::ehFuncletCount()
  */
 unsigned Compiler::bbThrowIndex(BasicBlock* blk)
 {
-    if (!blk->hasTryIndex() && !blk->hasHndIndex())
+    if (UsesFunclets())
     {
-        return -1;
-    }
+        if (!blk->hasTryIndex() && !blk->hasHndIndex())
+        {
+            return -1;
+        }
 
-    const unsigned tryIndex = blk->hasTryIndex() ? blk->getTryIndex() : USHRT_MAX;
-    const unsigned hndIndex = blk->hasHndIndex() ? blk->getHndIndex() : USHRT_MAX;
-    assert(tryIndex != hndIndex);
-    assert(tryIndex != USHRT_MAX || hndIndex != USHRT_MAX);
+        const unsigned tryIndex = blk->hasTryIndex() ? blk->getTryIndex() : USHRT_MAX;
+        const unsigned hndIndex = blk->hasHndIndex() ? blk->getHndIndex() : USHRT_MAX;
+        assert(tryIndex != hndIndex);
+        assert(tryIndex != USHRT_MAX || hndIndex != USHRT_MAX);
 
-    if (tryIndex < hndIndex)
-    {
-        // The most enclosing region is a try body, use it
-        assert(tryIndex <= 0x3FFFFFFF);
-        return tryIndex;
-    }
+        if (tryIndex < hndIndex)
+        {
+            // The most enclosing region is a try body, use it
+            assert(tryIndex <= 0x3FFFFFFF);
+            return tryIndex;
+        }
 
-    // The most enclosing region is a handler which will be a funclet
-    // Now we have to figure out if blk is in the filter or handler
-    assert(hndIndex <= 0x3FFFFFFF);
-    if (ehGetDsc(hndIndex)->InFilterRegionBBRange(blk))
+        // The most enclosing region is a handler which will be a funclet
+        // Now we have to figure out if blk is in the filter or handler
+        assert(hndIndex <= 0x3FFFFFFF);
+        if (ehGetDsc(hndIndex)->InFilterRegionBBRange(blk))
+        {
+            return hndIndex | 0x40000000;
+        }
+
+        return hndIndex | 0x80000000;
+    }
+    else
     {
-        return hndIndex | 0x40000000;
+        return blk->bbTryIndex;
     }
-
-    return hndIndex | 0x80000000;
 }
 
-#endif // FEATURE_EH_FUNCLETS
-
 /*****************************************************************************
  * Determine the emitter code cookie for a block, for unwind purposes.
  */
@@ -1354,28 +1386,26 @@ void Compiler::fgSkipRmvdBlocks(EHblkDsc* handlerTab)
  */
 void Compiler::fgAllocEHTable()
 {
-#if defined(FEATURE_EH_FUNCLETS)
-
-    // We need to allocate space for EH clauses that will be used by funclets
-    // as well as one for each EH clause from the IL. Nested EH clauses pulled
-    // out as funclets create one EH clause for each enclosing region. Thus,
-    // the maximum number of clauses we will need might be very large. We allocate
-    // twice the number of EH clauses in the IL, which should be good in practice.
-    // In extreme cases, we might need to abandon this and reallocate. See
-    // fgAddEHTableEntry() for more details.
-    CLANG_FORMAT_COMMENT_ANCHOR;
+    if (UsesFunclets())
+    {
+        // We need to allocate space for EH clauses that will be used by funclets
+        // as well as one for each EH clause from the IL. Nested EH clauses pulled
+        // out as funclets create one EH clause for each enclosing region. Thus,
+        // the maximum number of clauses we will need might be very large. We allocate
+        // twice the number of EH clauses in the IL, which should be good in practice.
+        // In extreme cases, we might need to abandon this and reallocate. See
+        // fgAddEHTableEntry() for more details.
 
 #ifdef DEBUG
-    compHndBBtabAllocCount = info.compXcptnsCount; // force the resizing code to hit more frequently in DEBUG
-#else                                              // DEBUG
-    compHndBBtabAllocCount = info.compXcptnsCount * 2;
-#endif                                             // DEBUG
-
-#else // !FEATURE_EH_FUNCLETS
-
-    compHndBBtabAllocCount = info.compXcptnsCount;
-
-#endif // !FEATURE_EH_FUNCLETS
+        compHndBBtabAllocCount = info.compXcptnsCount; // force the resizing code to hit more frequently in DEBUG
+#else                                                  // DEBUG
+        compHndBBtabAllocCount = info.compXcptnsCount * 2;
+#endif                                                 // DEBUG
+    }
+    else
+    {
+        compHndBBtabAllocCount = info.compXcptnsCount;
+    }
 
     compHndBBtab = new (this, CMK_BasicBlock) EHblkDsc[compHndBBtabAllocCount];
 
@@ -1495,8 +1525,6 @@ void Compiler::fgRemoveEHTableEntry(unsigned XTnum)
     }
 }
 
-#if defined(FEATURE_EH_FUNCLETS)
-
 /*****************************************************************************
  *
  *  Add a single exception table entry at index 'XTnum', [0 <= XTnum <= compHndBBtabCount].
@@ -1508,6 +1536,8 @@ void Compiler::fgRemoveEHTableEntry(unsigned XTnum)
  */
 EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum)
 {
+    assert(UsesFunclets());
+
     if (XTnum != compHndBBtabCount)
     {
         // Update all enclosing links that will get invalidated by inserting an entry at 'XTnum'
@@ -1556,7 +1586,7 @@ EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum)
         // Double the table size. For stress, we could use +1. Note that if the table isn't allocated
         // yet, such as when we add an EH region for synchronized methods that don't already have one,
         // we start at zero, so we need to make sure the new table has at least one entry.
-        unsigned newHndBBtabAllocCount = max(1, compHndBBtabAllocCount * 2);
+        unsigned newHndBBtabAllocCount = max(1u, compHndBBtabAllocCount * 2);
         noway_assert(compHndBBtabAllocCount < newHndBBtabAllocCount); // check for overflow
 
         if (newHndBBtabAllocCount > MAX_XCPTN_INDEX)
@@ -1603,8 +1633,6 @@ EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum)
     return compHndBBtab + XTnum;
 }
 
-#endif // FEATURE_EH_FUNCLETS
-
 /*****************************************************************************
  *
  *  Sort the EH table if necessary.
@@ -1684,7 +1712,6 @@ void Compiler::fgSortEHTable()
     // but ARM did. It turns out not sorting the table can cause the EH table to incorrectly
     // set the bbHndIndex value in some nested cases, and that can lead to a security exploit
     // that allows the execution of arbitrary code.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (verbose)
@@ -1720,7 +1747,7 @@ void Compiler::fgSortEHTable()
                 (hndBegOff >= xtab1->ebdHndBegOffset && hndEndOff <= xtab1->ebdHndEndOffset) ||
                 (xtab1->HasFilter() && (hndBegOff >= xtab1->ebdFilterBegOffset && hndEndOff <= xtab1->ebdHndBegOffset))
                 // Note that end of filter is beginning of handler
-                )
+            )
             {
 #ifdef DEBUG
                 if (verbose)
@@ -1986,10 +2013,10 @@ bool Compiler::fgNormalizeEHCase1()
         {
             // ...then we want to insert an empty, non-removable block outside the try to be the new first block of the
             // handler.
-            BasicBlock* newHndStart = BasicBlock::New(this, BBJ_ALWAYS, handlerStart);
+            BasicBlock* newHndStart = BasicBlock::New(this);
             fgInsertBBbefore(handlerStart, newHndStart);
             FlowEdge* newEdge = fgAddRefPred(handlerStart, newHndStart);
-            newEdge->setLikelihood(1.0);
+            newHndStart->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
 
             // Handler begins have an extra implicit ref count.
             // BasicBlock::New has already handled this for newHndStart.
@@ -2027,7 +2054,7 @@ bool Compiler::fgNormalizeEHCase1()
             newHndStart->bbCodeOffs    = handlerStart->bbCodeOffs;
             newHndStart->bbCodeOffsEnd = newHndStart->bbCodeOffs; // code size = 0. TODO: use BAD_IL_OFFSET instead?
             newHndStart->inheritWeight(handlerStart);
-            newHndStart->SetFlags(BBF_DONT_REMOVE | BBF_INTERNAL | BBF_NONE_QUIRK);
+            newHndStart->SetFlags(BBF_DONT_REMOVE | BBF_INTERNAL);
             modified = true;
 
 #ifdef DEBUG
@@ -2084,7 +2111,7 @@ bool Compiler::fgNormalizeEHCase2()
 
                     if (ehOuter->ebdIsSameTry(mutualTryBeg, mutualTryLast))
                     {
-// clang-format off
+                        // clang-format off
                         // Don't touch mutually-protect regions: their 'try' regions must remain identical!
                         // We want to continue the looping outwards, in case we have something like this:
                         //
@@ -2133,7 +2160,7 @@ bool Compiler::fgNormalizeEHCase2()
                         //
                         // In this case, all the 'try' start at the same block! Note that there are two sets of mutually-protect regions,
                         // separated by some nesting.
-// clang-format on
+                        // clang-format on
 
 #ifdef DEBUG
                         if (verbose)
@@ -2156,11 +2183,11 @@ bool Compiler::fgNormalizeEHCase2()
                         // We've got multiple 'try' blocks starting at the same place!
                         // Add a new first 'try' block for 'ehOuter' that will be outside 'eh'.
 
-                        BasicBlock* newTryStart = BasicBlock::New(this, BBJ_ALWAYS, insertBeforeBlk);
+                        BasicBlock* newTryStart = BasicBlock::New(this);
                         newTryStart->bbRefs     = 0;
                         fgInsertBBbefore(insertBeforeBlk, newTryStart);
                         FlowEdge* const newEdge = fgAddRefPred(insertBeforeBlk, newTryStart);
-                        newEdge->setLikelihood(1.0);
+                        newTryStart->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
 
                         // It's possible for a try to start at the beginning of a method. If so, we need
                         // to adjust the implicit ref counts as we've just created a new first bb
@@ -2196,12 +2223,8 @@ bool Compiler::fgNormalizeEHCase2()
 
                         // Note that we don't need to clear any flags on the old try start, since it is still a 'try'
                         // start.
-                        newTryStart->SetFlags(BBF_DONT_REMOVE | BBF_INTERNAL | BBF_NONE_QUIRK);
-
-                        if (insertBeforeBlk->HasFlag(BBF_BACKWARD_JUMP_TARGET))
-                        {
-                            newTryStart->SetFlags(BBF_BACKWARD_JUMP_TARGET);
-                        }
+                        newTryStart->SetFlags(BBF_DONT_REMOVE | BBF_INTERNAL);
+                        newTryStart->CopyFlags(insertBeforeBlk, BBF_BACKWARD_JUMP_TARGET);
 
                         // Now we need to split any flow edges targeting the old try begin block between the old
                         // and new block. Note that if we are handling a multiply-nested 'try', we may have already
@@ -2339,7 +2362,9 @@ bool Compiler::fgCreateFiltersForGenericExceptions()
             info.compCompHnd->resolveToken(&resolvedToken);
 
             CORINFO_GENERICHANDLE_RESULT embedInfo;
-            info.compCompHnd->embedGenericHandle(&resolvedToken, true, &embedInfo);
+            // NOTE: inlining is done at this point, so we don't know which method contained this token.
+            // It's fine because currently this is never used for something that belongs to an inlinee.
+            info.compCompHnd->embedGenericHandle(&resolvedToken, true, info.compMethodHnd, &embedInfo);
             if (!embedInfo.lookup.lookupKind.needsRuntimeLookup)
             {
                 // Exception type does not need runtime lookup
@@ -2348,7 +2373,7 @@ bool Compiler::fgCreateFiltersForGenericExceptions()
 
             // Create a new bb for the fake filter
             BasicBlock* handlerBb = eh->ebdHndBeg;
-            BasicBlock* filterBb  = BasicBlock::New(this, BBJ_EHFILTERRET, handlerBb);
+            BasicBlock* filterBb  = BasicBlock::New(this);
 
             // Now we need to spill CATCH_ARG (it should be the first thing evaluated)
             GenTree* arg = new (this, GT_CATCH_ARG) GenTree(GT_CATCH_ARG, TYP_REF);
@@ -2365,7 +2390,7 @@ bool Compiler::fgCreateFiltersForGenericExceptions()
             {
                 GenTree* ctxTree = getRuntimeContextTree(embedInfo.lookup.lookupKind.runtimeLookupKind);
                 runtimeLookup    = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_HANDLE,
-                                                          TYP_I_IMPL, &embedInfo.lookup.lookupKind, ctxTree);
+                                                             TYP_I_IMPL, &embedInfo.lookup.lookupKind, ctxTree);
             }
             else
             {
@@ -2377,7 +2402,7 @@ bool Compiler::fgCreateFiltersForGenericExceptions()
             // Insert it right before the handler (and make it a pred of the handler)
             fgInsertBBbefore(handlerBb, filterBb);
             FlowEdge* const newEdge = fgAddRefPred(handlerBb, filterBb);
-            newEdge->setLikelihood(1.0);
+            filterBb->SetKindAndTargetEdge(BBJ_EHFILTERRET, newEdge);
             fgNewStmtAtEnd(filterBb, retFilt, handlerBb->firstStmt()->GetDebugInfo());
 
             filterBb->bbCatchTyp = BBCT_FILTER;
@@ -2546,7 +2571,6 @@ bool Compiler::fgNormalizeEHCase3()
                     if (EHblkDsc::ebdIsSameTry(ehOuter, ehInner))
                     {
                         // We can't touch this 'try', since it's mutual protect.
-                        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef DEBUG
                         if (verbose)
                         {
@@ -2634,7 +2658,7 @@ bool Compiler::fgNormalizeEHCase3()
                     // Add a new last block for 'ehOuter' that will be outside the EH region with which it encloses and
                     // shares a 'last' pointer
 
-                    BasicBlock* newLast = BasicBlock::New(this, BBJ_ALWAYS, insertAfterBlk->Next());
+                    BasicBlock* newLast = BasicBlock::New(this);
                     newLast->bbRefs     = 0;
                     assert(insertAfterBlk != nullptr);
                     fgInsertBBafter(insertAfterBlk, newLast);
@@ -2682,9 +2706,9 @@ bool Compiler::fgNormalizeEHCase3()
                     newLast->bbCodeOffs    = insertAfterBlk->bbCodeOffsEnd;
                     newLast->bbCodeOffsEnd = newLast->bbCodeOffs; // code size = 0. TODO: use BAD_IL_OFFSET instead?
                     newLast->inheritWeight(insertAfterBlk);
-                    newLast->SetFlags(BBF_INTERNAL | BBF_NONE_QUIRK);
+                    newLast->SetFlags(BBF_INTERNAL);
                     FlowEdge* const newEdge = fgAddRefPred(newLast, insertAfterBlk);
-                    newEdge->setLikelihood(1.0);
+                    insertAfterBlk->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
 
                     // Move the insert pointer. More enclosing equivalent 'last' blocks will be inserted after this.
                     insertAfterBlk = newLast;
@@ -2733,7 +2757,6 @@ bool Compiler::fgNormalizeEHCase3()
                             if (innerIsTryRegion && ehOuter->ebdIsSameTry(mutualTryBeg, mutualTryLast))
                             {
                                 // We can't touch this 'try', since it's mutual protect.
-                                CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
                                 if (verbose)
@@ -3001,7 +3024,6 @@ void Compiler::fgVerifyHandlerTab()
             assert(!HBtab->ebdFilter->HasFlag(BBF_REMOVED));
         }
 
-#if defined(FEATURE_EH_FUNCLETS)
         if (fgFuncletsCreated)
         {
             assert(HBtab->ebdHndBeg->HasFlag(BBF_FUNCLET_BEG));
@@ -3011,7 +3033,6 @@ void Compiler::fgVerifyHandlerTab()
                 assert(HBtab->ebdFilter->HasFlag(BBF_FUNCLET_BEG));
             }
         }
-#endif // FEATURE_EH_FUNCLETS
     }
 
     // I want to assert things about the relative ordering of blocks in the block list using
@@ -3034,8 +3055,8 @@ void Compiler::fgVerifyHandlerTab()
         assert(blockNumMap[block->bbNum] == 0); // If this fails, we have two blocks with the same block number.
         blockNumMap[block->bbNum] = newBBnum++;
     }
-// Note that there may be some blockNumMap[x] == 0, for a block number 'x' that has been deleted, if the blocks
-// haven't been renumbered since the deletion.
+    // Note that there may be some blockNumMap[x] == 0, for a block number 'x' that has been deleted, if the blocks
+    // haven't been renumbered since the deletion.
 
 #if 0 // Useful for debugging, but don't want to put this in the dump all the time
     if (verbose)
@@ -3065,7 +3086,6 @@ void Compiler::fgVerifyHandlerTab()
         blockHndBegSet[i] = false;
     }
 
-#if defined(FEATURE_EH_FUNCLETS)
     bool     isLegalFirstFunclet = false;
     unsigned bbNumFirstFunclet   = 0;
 
@@ -3081,7 +3101,6 @@ void Compiler::fgVerifyHandlerTab()
     {
         assert(fgFirstFuncletBB == nullptr);
     }
-#endif // FEATURE_EH_FUNCLETS
 
     for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
     {
@@ -3130,7 +3149,6 @@ void Compiler::fgVerifyHandlerTab()
             assert((bbNumHndLast < bbNumTryBeg) || (bbNumTryLast < bbNumHndBeg));
         }
 
-#if defined(FEATURE_EH_FUNCLETS)
         // If funclets have been created, check the first funclet block. The first funclet block must be the
         // first block of a filter or handler. All filter/handler blocks must come after it.
         // Note that 'try' blocks might come either before or after it. If after, they will be nested within
@@ -3179,7 +3197,6 @@ void Compiler::fgVerifyHandlerTab()
                 }
             }
         }
-#endif // FEATURE_EH_FUNCLETS
 
         // Check the 'try' region nesting, using ebdEnclosingTryIndex.
         // Only check one level of nesting, since we'll check the outer EH region (and its nesting) when we get to it
@@ -3203,9 +3220,7 @@ void Compiler::fgVerifyHandlerTab()
                 // blocks in the nested EH region. However, if funclets have been created, this is no longer true, since
                 // this 'try' might be in a handler that is pulled out to the funclet region, while the outer 'try'
                 // remains in the main function region.
-                CLANG_FORMAT_COMMENT_ANCHOR;
 
-#if defined(FEATURE_EH_FUNCLETS)
                 if (fgFuncletsCreated)
                 {
                     // If both the 'try' region and the outer 'try' region are in the main function area, then we can
@@ -3238,7 +3253,6 @@ void Compiler::fgVerifyHandlerTab()
                     assert((bbNumHndLast < bbNumOuterTryBeg) || (bbNumOuterTryLast < bbNumHndBeg));
                 }
                 else
-#endif // FEATURE_EH_FUNCLETS
                 {
                     if (multipleBegBlockNormalizationDone)
                     {
@@ -3282,11 +3296,10 @@ void Compiler::fgVerifyHandlerTab()
             assert(bbNumOuterHndLast != 0);
             assert(bbNumOuterHndBeg <= bbNumOuterHndLast);
 
-// The outer handler must completely contain all the blocks in the EH region nested within it. However, if
-// funclets have been created, it's harder to make any relationship asserts about the order of nested
-// handlers, which also have been made into funclets.
+            // The outer handler must completely contain all the blocks in the EH region nested within it. However, if
+            // funclets have been created, it's harder to make any relationship asserts about the order of nested
+            // handlers, which also have been made into funclets.
 
-#if defined(FEATURE_EH_FUNCLETS)
             if (fgFuncletsCreated)
             {
                 if (handlerBegIsTryBegNormalizationDone)
@@ -3313,7 +3326,6 @@ void Compiler::fgVerifyHandlerTab()
                 assert((bbNumHndLast < bbNumOuterHndBeg) || (bbNumOuterHndLast < bbNumHndBeg));
             }
             else
-#endif // FEATURE_EH_FUNCLETS
             {
                 if (handlerBegIsTryBegNormalizationDone)
                 {
@@ -3373,9 +3385,7 @@ void Compiler::fgVerifyHandlerTab()
         }
     }
 
-#if defined(FEATURE_EH_FUNCLETS)
     assert(!fgFuncletsCreated || isLegalFirstFunclet);
-#endif // FEATURE_EH_FUNCLETS
 
     // Figure out what 'try' and handler index each basic block should have,
     // and check the blocks against that. This depends on the more nested EH
@@ -3415,7 +3425,6 @@ void Compiler::fgVerifyHandlerTab()
         }
     }
 
-#if defined(FEATURE_EH_FUNCLETS)
     if (fgFuncletsCreated)
     {
         // Mark all the funclet 'try' indices correctly, since they do not exist in the linear 'try' region that
@@ -3445,7 +3454,6 @@ void Compiler::fgVerifyHandlerTab()
             }
         }
     }
-#endif // FEATURE_EH_FUNCLETS
 
     // Make sure that all blocks have the right index, including those blocks that should have zero (no EH region).
     for (BasicBlock* const block : Blocks())
@@ -3459,13 +3467,11 @@ void Compiler::fgVerifyHandlerTab()
         {
             assert(block->bbCatchTyp == BBCT_NONE);
 
-#if defined(FEATURE_EH_FUNCLETS)
             if (fgFuncletsCreated)
             {
                 // Make sure blocks that aren't the first block of a funclet do not have the BBF_FUNCLET_BEG flag set.
                 assert(!block->HasFlag(BBF_FUNCLET_BEG));
             }
-#endif // FEATURE_EH_FUNCLETS
         }
 
         // Check for legal block types
@@ -3524,9 +3530,12 @@ void Compiler::fgDispHandlerTab()
     }
 
     printf("\nindex  ");
-#if !defined(FEATURE_EH_FUNCLETS)
-    printf("nest, ");
-#endif // !FEATURE_EH_FUNCLETS
+#if defined(FEATURE_EH_WINDOWS_X86)
+    if (!UsesFunclets())
+    {
+        printf("nest, ");
+    }
+#endif // FEATURE_EH_WINDOWS_X86
     printf("eTry, eHnd\n");
 
     unsigned  XTnum;
@@ -4001,8 +4010,6 @@ void Compiler::verCheckNestingLevel(EHNodeDsc* root)
     }
 }
 
-#if defined(FEATURE_EH_FUNCLETS)
-
 /*****************************************************************************
  * Is this an intra-handler control flow edge?
  *
@@ -4026,14 +4033,14 @@ void Compiler::verCheckNestingLevel(EHNodeDsc* root)
 bool Compiler::fgIsIntraHandlerPred(BasicBlock* predBlock, BasicBlock* block)
 {
     // Some simple preconditions (as stated above)
+    assert(UsesFunclets());
     assert(!fgFuncletsCreated);
     assert(fgGetPredForBlock(block, predBlock) != nullptr);
     assert(block->hasHndIndex());
 
     EHblkDsc* xtab = ehGetDsc(block->getHndIndex());
 
-#if FEATURE_EH_CALLFINALLY_THUNKS
-    if (xtab->HasFinallyHandler())
+    if (UsesCallFinallyThunks() && xtab->HasFinallyHandler())
     {
         assert((xtab->ebdHndBeg == block) || // The normal case
                (xtab->ebdHndBeg->NextIs(block) &&
@@ -4061,7 +4068,6 @@ bool Compiler::fgIsIntraHandlerPred(BasicBlock* predBlock, BasicBlock* block)
             return false;
         }
     }
-#endif // FEATURE_EH_CALLFINALLY_THUNKS
 
     assert(predBlock->hasHndIndex() || predBlock->hasTryIndex());
 
@@ -4130,6 +4136,7 @@ bool Compiler::fgIsIntraHandlerPred(BasicBlock* predBlock, BasicBlock* block)
 
 bool Compiler::fgAnyIntraHandlerPreds(BasicBlock* block)
 {
+    assert(UsesFunclets());
     assert(block->hasHndIndex());
     assert(fgFirstBlockOfHandler(block) == block); // this block is the first block of a handler
 
@@ -4145,7 +4152,7 @@ bool Compiler::fgAnyIntraHandlerPreds(BasicBlock* block)
     return false;
 }
 
-#else // !FEATURE_EH_FUNCLETS
+#if defined(FEATURE_EH_WINDOWS_X86)
 
 /*****************************************************************************
  *
@@ -4158,6 +4165,8 @@ bool Compiler::fgRelocateEHRegions()
 {
     bool result = false; // Our return value
 
+    assert(!UsesFunclets());
+
 #ifdef DEBUG
     if (verbose)
         printf("*************** In fgRelocateEHRegions()\n");
@@ -4211,7 +4220,6 @@ bool Compiler::fgRelocateEHRegions()
                 // Currently it is not good to move the rarely run handler regions to the end of the method
                 // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot
                 // section.
-                CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if 0
                 // Now try to move the entire handler region if it can be moved.
@@ -4263,7 +4271,7 @@ bool Compiler::fgRelocateEHRegions()
     return result;
 }
 
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
 //------------------------------------------------------------------------
 // fgExtendEHRegionBefore: Modify the EH table to account for a new block.
@@ -4321,18 +4329,16 @@ void Compiler::fgExtendEHRegionBefore(BasicBlock* block)
             block->bbRefs--;
             bPrev->bbRefs++;
 
-#if defined(FEATURE_EH_FUNCLETS)
             if (fgFuncletsCreated)
             {
                 assert(block->HasFlag(BBF_FUNCLET_BEG));
                 bPrev->SetFlags(BBF_FUNCLET_BEG);
                 block->RemoveFlags(BBF_FUNCLET_BEG);
             }
-#endif // FEATURE_EH_FUNCLETS
 
             // If this is a handler for a filter, the last block of the filter will end with
-            // a BBJ_EHFILTERRET block that has a bbTarget that jumps to the first block of
-            // its handler. So we need to update it to keep things in sync.
+            // a BBJ_EHFILTERRET block that jumps to the first block of its handler.
+            // So we need to update it to keep things in sync.
             //
             if (HBtab->HasFilter())
             {
@@ -4343,15 +4349,12 @@ void Compiler::fgExtendEHRegionBefore(BasicBlock* block)
 #ifdef DEBUG
                 if (verbose)
                 {
-                    printf("EH#%u: Updating bbTarget for filter ret block: " FMT_BB " => " FMT_BB "\n",
-                           ehGetIndex(HBtab), bFilterLast->bbNum, bPrev->bbNum);
+                    printf("EH#%u: Updating target for filter ret block: " FMT_BB " => " FMT_BB "\n", ehGetIndex(HBtab),
+                           bFilterLast->bbNum, bPrev->bbNum);
                 }
 #endif // DEBUG
-                // Change the bbTarget for bFilterLast from the old first 'block' to the new first 'bPrev'
-                fgRemoveRefPred(bFilterLast->GetTarget(), bFilterLast);
-                bFilterLast->SetTarget(bPrev);
-                FlowEdge* const newEdge = fgAddRefPred(bPrev, bFilterLast);
-                newEdge->setLikelihood(1.0);
+       // Change the target for bFilterLast from the old first 'block' to the new first 'bPrev'
+                fgRedirectTargetEdge(bFilterLast, bPrev);
             }
         }
 
@@ -4371,14 +4374,12 @@ void Compiler::fgExtendEHRegionBefore(BasicBlock* block)
             HBtab->ebdFilter = bPrev;
             bPrev->SetFlags(BBF_DONT_REMOVE);
 
-#if defined(FEATURE_EH_FUNCLETS)
             if (fgFuncletsCreated)
             {
                 assert(block->HasFlag(BBF_FUNCLET_BEG));
                 bPrev->SetFlags(BBF_FUNCLET_BEG);
                 block->RemoveFlags(BBF_FUNCLET_BEG);
             }
-#endif // FEATURE_EH_FUNCLETS
 
             bPrev->bbRefs++;
         }
diff --git a/src/coreclr/jit/jiteh.h b/src/coreclr/jit/jiteh.h
index 4f09f9bcce57..482e6796f549 100644
--- a/src/coreclr/jit/jiteh.h
+++ b/src/coreclr/jit/jiteh.h
@@ -84,18 +84,19 @@ struct EHblkDsc
     BasicBlock* ebdTryLast; // Last block of the try
     BasicBlock* ebdHndBeg;  // First block of the handler
     BasicBlock* ebdHndLast; // Last block of the handler
-    union {
+    union
+    {
         BasicBlock* ebdFilter; // First block of filter,          if HasFilter()
         unsigned    ebdTyp;    // Exception type (a class token), otherwise
     };
 
     EHHandlerType ebdHandlerType;
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
     // How nested is the try/handler within other *handlers* - 0 for outermost clauses, 1 for nesting with a handler,
     // etc.
     unsigned short ebdHandlerNestingLevel;
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
     static const unsigned short NO_ENCLOSING_INDEX = USHRT_MAX;
 
@@ -110,8 +111,6 @@ struct EHblkDsc
     // The index of the enclosing outer handler region, NO_ENCLOSING_INDEX if none.
     unsigned short ebdEnclosingHndIndex;
 
-#if defined(FEATURE_EH_FUNCLETS)
-
     // After funclets are created, this is the index of corresponding FuncInfoDsc
     // Special case for Filter/Filter-handler:
     //   Like the IL the filter funclet immediately precedes the filter-handler funclet.
@@ -125,8 +124,6 @@ struct EHblkDsc
     unsigned short ebdFilterFuncIndex;
 #endif // TARGET_WASM
 
-#endif // FEATURE_EH_FUNCLETS
-
     IL_OFFSET ebdTryBegOffset; // IL offsets of EH try/end regions as they are imported
     IL_OFFSET ebdTryEndOffset;
     IL_OFFSET ebdFilterBegOffset; // only set if HasFilter()
@@ -172,8 +169,8 @@ struct EHblkDsc
     unsigned ebdGetEnclosingRegionIndex(bool* inTryRegion);
 
     static bool ebdIsSameTry(EHblkDsc* h1, EHblkDsc* h2); // Same 'try' region? Compare begin/last blocks.
-    bool ebdIsSameTry(Compiler* comp, unsigned t2);
-    bool ebdIsSameTry(BasicBlock* ebdTryBeg, BasicBlock* ebdTryLast);
+    bool        ebdIsSameTry(Compiler* comp, unsigned t2);
+    bool        ebdIsSameTry(BasicBlock* ebdTryBeg, BasicBlock* ebdTryLast);
 
 #ifdef DEBUG
     void DispEntry(unsigned num); // Display this table entry
diff --git a/src/coreclr/jit/jitexpandarray.h b/src/coreclr/jit/jitexpandarray.h
index 646f9e6747a3..8eaf52705986 100644
--- a/src/coreclr/jit/jitexpandarray.h
+++ b/src/coreclr/jit/jitexpandarray.h
@@ -54,7 +54,10 @@ class JitExpandArray
     //    of size max(`minSize`, `idx`) is allocated.
     //
     JitExpandArray(CompAllocator alloc, unsigned minSize = 1)
-        : m_alloc(alloc), m_members(nullptr), m_size(0), m_minSize(minSize)
+        : m_alloc(alloc)
+        , m_members(nullptr)
+        , m_size(0)
+        , m_minSize(minSize)
     {
         assert(minSize > 0);
     }
@@ -219,7 +222,9 @@ class JitExpandArrayStack : public JitExpandArray<T>
     // Notes:
     //    See JitExpandArray constructor notes.
     //
-    JitExpandArrayStack(CompAllocator alloc, unsigned minSize = 1) : JitExpandArray<T>(alloc, minSize), m_used(0)
+    JitExpandArrayStack(CompAllocator alloc, unsigned minSize = 1)
+        : JitExpandArray<T>(alloc, minSize)
+        , m_used(0)
     {
     }
 
diff --git a/src/coreclr/jit/jitgcinfo.h b/src/coreclr/jit/jitgcinfo.h
index 367d49873fcc..f6b35bd6d75a 100644
--- a/src/coreclr/jit/jitgcinfo.h
+++ b/src/coreclr/jit/jitgcinfo.h
@@ -27,7 +27,9 @@ struct RegSlotIdKey
     {
     }
 
-    RegSlotIdKey(unsigned short regNum, unsigned flags) : m_regNum(regNum), m_flags((unsigned short)flags)
+    RegSlotIdKey(unsigned short regNum, unsigned flags)
+        : m_regNum(regNum)
+        , m_flags((unsigned short)flags)
     {
         assert(m_flags == flags);
     }
@@ -54,7 +56,9 @@ struct StackSlotIdKey
     }
 
     StackSlotIdKey(int offset, bool fpRel, unsigned flags)
-        : m_offset(offset), m_fpRel(fpRel), m_flags((unsigned short)flags)
+        : m_offset(offset)
+        , m_fpRel(fpRel)
+        , m_flags((unsigned short)flags)
     {
         assert(flags == m_flags);
     }
@@ -165,7 +169,7 @@ class GCInfo
         unsigned char rpdCallInstrSize; // Length of the call instruction.
 #endif
 
-        unsigned short rpdArg : 1;     // is this an argument descriptor?
+        unsigned short rpdArg     : 1; // is this an argument descriptor?
         unsigned short rpdArgType : 2; // is this an argument push,pop, or kill?
         rpdArgType_t   rpdArgTypeGet()
         {
@@ -181,8 +185,8 @@ class GCInfo
 #endif // !TARGET_WASM
 
         unsigned short rpdIsThis : 1;                       // is it the 'this' pointer
-        unsigned short rpdCall : 1;                         // is this a true call site?
-        unsigned short : 1;                                 // Padding bit, so next two start on a byte boundary
+        unsigned short rpdCall   : 1;                       // is this a true call site?
+        unsigned short           : 1;                       // Padding bit, so next two start on a byte boundary
         unsigned short rpdCallGCrefRegs : CNT_CALLEE_SAVED; // Callee-saved registers containing GC pointers.
         unsigned short rpdCallByrefRegs : CNT_CALLEE_SAVED; // Callee-saved registers containing byrefs.
 
@@ -263,7 +267,8 @@ class GCInfo
 
         unsigned short cdArgCnt;
 
-        union {
+        union
+        {
             struct // used if cdArgCnt == 0
             {
                 unsigned cdArgMask;      // ptr arg bitfield
@@ -280,7 +285,7 @@ class GCInfo
     CallDsc* gcCallDescList;
     CallDsc* gcCallDescLast;
 
-//-------------------------------------------------------------------------
+    //-------------------------------------------------------------------------
 
 #ifdef JIT32_GCENCODER
     void gcCountForHeader(UNALIGNED unsigned int* pUntrackedCount, UNALIGNED unsigned int* pVarPtrTableSize);
@@ -305,7 +310,7 @@ class GCInfo
 
 #ifdef JIT32_GCENCODER
     size_t gcPtrTableSize(const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset);
-    BYTE* gcPtrTableSave(BYTE* destPtr, const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset);
+    BYTE*  gcPtrTableSave(BYTE* destPtr, const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset);
 #endif
     void gcRegPtrSetInit();
     /*****************************************************************************/
@@ -337,7 +342,6 @@ class GCInfo
     //
     //  These record the info about the procedure in the info-block
     //
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef JIT32_GCENCODER
 private:
@@ -363,8 +367,6 @@ class GCInfo
 
 #endif // JIT32_GCENCODER
 
-#if !defined(JIT32_GCENCODER) || defined(FEATURE_EH_FUNCLETS)
-
     // This method expands the tracked stack variables lifetimes so that any lifetimes within filters
     // are reported as pinned.
     void gcMarkFilterVarsPinned();
@@ -376,15 +378,13 @@ class GCInfo
     void gcDumpVarPtrDsc(varPtrDsc* desc);
 #endif // DEBUG
 
-#endif // !defined(JIT32_GCENCODER) || defined(FEATURE_EH_FUNCLETS)
-
 #if DUMP_GC_TABLES
 
     void gcFindPtrsInFrame(const void* infoBlock, const void* codeBlock, unsigned offs);
 
 #ifdef JIT32_GCENCODER
     size_t gcInfoBlockHdrDump(const BYTE* table,
-                              InfoHdr*    header,      /* OUT */
+                              InfoHdr*    header,    /* OUT */
                               unsigned*   methodSize); /* OUT */
 
     size_t gcDumpPtrTable(const BYTE* table, const InfoHdr& header, unsigned methodSize);
diff --git a/src/coreclr/jit/jithashtable.h b/src/coreclr/jit/jithashtable.h
index 9ad73dbf2f7d..f699c3eee19d 100644
--- a/src/coreclr/jit/jithashtable.h
+++ b/src/coreclr/jit/jithashtable.h
@@ -57,10 +57,16 @@ class JitHashTableBehavior
 class JitPrimeInfo
 {
 public:
-    constexpr JitPrimeInfo() : prime(0), magic(0), shift(0)
+    constexpr JitPrimeInfo()
+        : prime(0)
+        , magic(0)
+        , shift(0)
     {
     }
-    constexpr JitPrimeInfo(unsigned p, unsigned m, unsigned s) : prime(p), magic(m), shift(s)
+    constexpr JitPrimeInfo(unsigned p, unsigned m, unsigned s)
+        : prime(p)
+        , magic(m)
+        , shift(s)
     {
     }
     unsigned prime;
@@ -130,7 +136,10 @@ class JitHashTable
         Value m_val;
 
         template <class... Args>
-        Node(Node* next, Key k, Args&&... args) : m_next(next), m_key(k), m_val(std::forward<Args>(args)...)
+        Node(Node* next, Key k, Args&&... args)
+            : m_next(next)
+            , m_key(k)
+            , m_val(std::forward<Args>(args)...)
         {
         }
 
@@ -166,7 +175,12 @@ class JitHashTable
     //    JitHashTable always starts out empty, with no allocation overhead.
     //    Call Reallocate to prime with an initial size if desired.
     //
-    JitHashTable(Allocator alloc) : m_alloc(alloc), m_table(nullptr), m_tableSizeInfo(), m_tableCount(0), m_tableMax(0)
+    JitHashTable(Allocator alloc)
+        : m_alloc(alloc)
+        , m_table(nullptr)
+        , m_tableSizeInfo()
+        , m_tableCount(0)
+        , m_tableMax(0)
     {
 #ifndef __GNUC__ // these crash GCC
         static_assert_no_msg(Behavior::s_growth_factor_numerator > Behavior::s_growth_factor_denominator);
@@ -492,7 +506,8 @@ class JitHashTable
     class KeyIterator : public NodeIterator
     {
     public:
-        KeyIterator(const JitHashTable* hash, bool begin) : NodeIterator(hash, begin)
+        KeyIterator(const JitHashTable* hash, bool begin)
+            : NodeIterator(hash, begin)
         {
         }
 
@@ -506,7 +521,8 @@ class JitHashTable
     class ValueIterator : public NodeIterator
     {
     public:
-        ValueIterator(const JitHashTable* hash, bool begin) : NodeIterator(hash, begin)
+        ValueIterator(const JitHashTable* hash, bool begin)
+            : NodeIterator(hash, begin)
         {
         }
 
@@ -521,7 +537,8 @@ class JitHashTable
     class KeyValueIterator : public NodeIterator
     {
     public:
-        KeyValueIterator(const JitHashTable* hash, bool begin) : NodeIterator(hash, begin)
+        KeyValueIterator(const JitHashTable* hash, bool begin)
+            : NodeIterator(hash, begin)
         {
         }
 
@@ -538,7 +555,8 @@ class JitHashTable
         const JitHashTable* const m_hash;
 
     public:
-        KeyIteration(const JitHashTable* hash) : m_hash(hash)
+        KeyIteration(const JitHashTable* hash)
+            : m_hash(hash)
         {
         }
 
@@ -559,7 +577,8 @@ class JitHashTable
         const JitHashTable* const m_hash;
 
     public:
-        ValueIteration(const JitHashTable* hash) : m_hash(hash)
+        ValueIteration(const JitHashTable* hash)
+            : m_hash(hash)
         {
         }
 
@@ -580,7 +599,8 @@ class JitHashTable
         const JitHashTable* const m_hash;
 
     public:
-        KeyValueIteration(const JitHashTable* hash) : m_hash(hash)
+        KeyValueIteration(const JitHashTable* hash)
+            : m_hash(hash)
         {
         }
 
diff --git a/src/coreclr/jit/jitmetadata.cpp b/src/coreclr/jit/jitmetadata.cpp
new file mode 100644
index 000000000000..905cdb7317d8
--- /dev/null
+++ b/src/coreclr/jit/jitmetadata.cpp
@@ -0,0 +1,100 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "jitpch.h"
+#include "jitmetadata.h"
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// JitMetadata::report: Report metadata back to the EE.
+//
+// Parameters:
+//   comp - Compiler instance
+//   key  - Key name of metadata
+//   data - Pointer to the value to report back
+//
+void JitMetadata::report(Compiler* comp, const char* key, const void* data, size_t length)
+{
+    comp->info.compCompHnd->reportMetadata(key, data, length);
+}
+
+//------------------------------------------------------------------------
+// reportValue: Report a specific value back to the EE.
+//
+// Parameters:
+//   comp  - Compiler instance
+//   key   - The key
+//   value - Value to report back
+//
+template <typename T>
+static void reportValue(Compiler* comp, const char* key, T value)
+{
+    JitMetadata::report(comp, key, &value, sizeof(value));
+}
+
+//------------------------------------------------------------------------
+// JitMetrics::report: Report all metrics and their values back to the EE.
+//
+// Parameters:
+//   comp - Compiler instance
+//
+void JitMetrics::report(Compiler* comp)
+{
+#define JITMETADATAINFO(name, type, flags)
+#define JITMETADATAMETRIC(name, type, flags) reportValue(comp, #name, name);
+#include "jitmetadatalist.h"
+}
+
+//------------------------------------------------------------------------
+// printMetric: Print a double metric value to jitstdout.
+//
+// Parameters:
+//   value - The value
+//
+static void printMetric(double value)
+{
+    printf("%f", value);
+}
+
+//------------------------------------------------------------------------
+// printMetric: Print an int metric value to jitstdout.
+//
+// Parameters:
+//   value - The value
+//
+static void printMetric(int value)
+{
+    printf("%d", value);
+}
+
+//------------------------------------------------------------------------
+// printMetric: Print an int64_t metric value to jitstdout.
+//
+// Parameters:
+//   value - The value
+//
+static void printMetric(int64_t value)
+{
+    printf("%lld", value);
+}
+
+//------------------------------------------------------------------------
+// JitMetrics::dump: Print the values of all metrics to jitstdout.
+//
+void JitMetrics::dump()
+{
+    int nameMaxWidth = 0;
+#define JITMETADATAINFO(name, type, flags)
+#define JITMETADATAMETRIC(name, type, flags) nameMaxWidth = max(nameMaxWidth, (int)strlen(#name));
+#include "jitmetadatalist.h"
+
+#define JITMETADATAINFO(name, type, flags)
+#define JITMETADATAMETRIC(name, type, flags)                                                                           \
+    printf("%-*s: ", nameMaxWidth + 5, #name);                                                                         \
+    printMetric(name);                                                                                                 \
+    printf("\n");
+#include "jitmetadatalist.h"
+}
+
+#endif
diff --git a/src/coreclr/jit/jitmetadata.h b/src/coreclr/jit/jitmetadata.h
new file mode 100644
index 000000000000..3b4b324497cc
--- /dev/null
+++ b/src/coreclr/jit/jitmetadata.h
@@ -0,0 +1,26 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#pragma once
+
+class Compiler;
+
+class JitMetadata
+{
+public:
+#define JITMETADATA(name, type, flags) static constexpr const char* name = #name;
+#include "jitmetadatalist.h"
+
+    static void report(Compiler* comp, const char* name, const void* data, size_t length);
+};
+
+class JitMetrics
+{
+public:
+#define JITMETADATAINFO(name, type, flags)
+#define JITMETADATAMETRIC(name, type, flags) type name = 0;
+#include "jitmetadatalist.h"
+
+    void report(Compiler* comp);
+    void dump();
+};
diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h
new file mode 100644
index 000000000000..f36c15ab9991
--- /dev/null
+++ b/src/coreclr/jit/jitmetadatalist.h
@@ -0,0 +1,51 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// clang-format off
+
+#ifdef JITMETADATA
+#define JITMETADATAINFO(name, type, flags) JITMETADATA(name, type, flags)
+#define JITMETADATAMETRIC(name, type, flags) JITMETADATA(name, type, flags)
+#endif
+
+#if !defined(JITMETADATAINFO) || !defined(JITMETADATAMETRIC)
+#error Define JITMETADATAINFO and JITMETADATAMETRIC before including this file.
+#endif
+
+// List of metadata that the JIT can report. There are two categories:
+//
+// - JITMETADATAINFO: General info that can be of any type and that cannot be
+//   aggregated in straightforward ways. These properties are not handled
+//   automatically; the JIT must explicitly report them using
+//   JitMetadata::report, and the SPMI side needs to manually handle (or ignore)
+//   them in ICorJitInfo::reportMetadata.
+//
+// - JITMETADATAMETRIC: Metrics which are numeric types (currently int, double
+//   and int64_t types supported). Their reporting is handled automatically and
+//   they will be propagated all the way into SPMI replay/diff results.
+
+//              Name,                          type              flags
+JITMETADATAINFO(MethodFullName,                const char*,      0)
+JITMETADATAINFO(TieringName,                   const char*,      0)
+JITMETADATAMETRIC(PhysicallyPromotedFields,    int,              0)
+JITMETADATAMETRIC(LoopsFoundDuringOpts,        int,              0)
+JITMETADATAMETRIC(LoopsCloned,                 int,              0)
+JITMETADATAMETRIC(LoopsUnrolled,               int,              0)
+JITMETADATAMETRIC(LoopAlignmentCandidates,     int,              0)
+JITMETADATAMETRIC(LoopsAligned,                int,              0)
+JITMETADATAMETRIC(LoopsIVWidened,              int,              0)
+JITMETADATAMETRIC(WidenedIVs,                  int,              0)
+JITMETADATAMETRIC(VarsInSsa,                   int,              0)
+JITMETADATAMETRIC(HoistedExpressions,          int,              0)
+JITMETADATAMETRIC(RedundantBranchesEliminated, int,              JIT_METADATA_HIGHER_IS_BETTER)
+JITMETADATAMETRIC(JumpThreadingsPerformed,     int,              JIT_METADATA_HIGHER_IS_BETTER)
+JITMETADATAMETRIC(CseCount,                    int,              0)
+JITMETADATAMETRIC(BasicBlocksAtCodegen,        int,              0)
+JITMETADATAMETRIC(PerfScore,                   double,           JIT_METADATA_LOWER_IS_BETTER)
+JITMETADATAMETRIC(BytesAllocated,              int64_t,          JIT_METADATA_LOWER_IS_BETTER)
+
+#undef JITMETADATA
+#undef JITMETADATAINFO
+#undef JITMETADATAMETRIC
+
+// clang-format on
diff --git a/src/coreclr/jit/jitpch.h b/src/coreclr/jit/jitpch.h
index 63f12133f61b..6e9a0a6f8002 100644
--- a/src/coreclr/jit/jitpch.h
+++ b/src/coreclr/jit/jitpch.h
@@ -11,7 +11,15 @@
 #include <string.h>
 #include <float.h>
 #include <cstdlib>
+#include <cmath>
 #include <intrin.h>
+#ifdef HOST_WINDOWS
+#include <malloc.h>
+#endif
+#include <algorithm>
+
+using std::max;
+using std::min;
 
 // Don't allow using the windows.h #defines for the BitScan* APIs. Using the #defines means our
 // `BitOperations::BitScan*` functions have their name mapped, which is confusing and messes up
diff --git a/src/coreclr/jit/jitstd/list.h b/src/coreclr/jit/jitstd/list.h
index f00c15964525..77b5f893bea1 100644
--- a/src/coreclr/jit/jitstd/list.h
+++ b/src/coreclr/jit/jitstd/list.h
@@ -14,7 +14,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 #include "iterator.h"
 #include "functional.h"
-#include "clr_std/utility"
+#include <utility>
 
 namespace jitstd
 {
diff --git a/src/coreclr/jit/jitstd/utility.h b/src/coreclr/jit/jitstd/utility.h
index 624bb7bc7c39..0df302a9352a 100644
--- a/src/coreclr/jit/jitstd/utility.h
+++ b/src/coreclr/jit/jitstd/utility.h
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include "clr_std/type_traits"
+#include <type_traits>
 
 namespace jitstd
 {
diff --git a/src/coreclr/jit/layout.cpp b/src/coreclr/jit/layout.cpp
index 918fd4ab6521..ad4c0077c22b 100644
--- a/src/coreclr/jit/layout.cpp
+++ b/src/coreclr/jit/layout.cpp
@@ -21,7 +21,8 @@ class ClassLayoutTable
     typedef JitHashTable<unsigned, JitSmallPrimitiveKeyFuncs<unsigned>, unsigned>               BlkLayoutIndexMap;
     typedef JitHashTable<CORINFO_CLASS_HANDLE, JitPtrKeyFuncs<CORINFO_CLASS_STRUCT_>, unsigned> ObjLayoutIndexMap;
 
-    union {
+    union
+    {
         // Up to 3 layouts can be stored "inline" and finding a layout by handle/size can be done using linear search.
         // Most methods need no more than 2 layouts.
         ClassLayout* m_layoutArray[3];
@@ -43,7 +44,10 @@ class ClassLayoutTable
     ClassLayout m_zeroSizedBlockLayout;
 
 public:
-    ClassLayoutTable() : m_layoutCount(0), m_layoutLargeCapacity(0), m_zeroSizedBlockLayout(0)
+    ClassLayoutTable()
+        : m_layoutCount(0)
+        , m_layoutLargeCapacity(0)
+        , m_zeroSizedBlockLayout(0)
     {
     }
 
diff --git a/src/coreclr/jit/layout.h b/src/coreclr/jit/layout.h
index 59ecaa940548..3c6487e516b9 100644
--- a/src/coreclr/jit/layout.h
+++ b/src/coreclr/jit/layout.h
@@ -30,7 +30,8 @@ class ClassLayout
     // Array of CorInfoGCType (as BYTE) that describes the GC layout of the class.
     // For small classes the array is stored inline, avoiding an extra allocation
     // and the pointer size overhead.
-    union {
+    union
+    {
         BYTE* m_gcPtrs;
         BYTE  m_gcPtrsArray[sizeof(BYTE*)];
     };
@@ -69,7 +70,7 @@ class ClassLayout
     ClassLayout(CORINFO_CLASS_HANDLE classHandle,
                 bool                 isValueClass,
                 unsigned             size,
-                var_types type DEBUGARG(const char* className) DEBUGARG(const char* shortClassName))
+                var_types type       DEBUGARG(const char* className) DEBUGARG(const char* shortClassName))
         : m_classHandle(classHandle)
         , m_size(size)
         , m_isValueClass(isValueClass)
diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp
index 7ebba3c14bdc..6a4534fc3f63 100644
--- a/src/coreclr/jit/lclmorph.cpp
+++ b/src/coreclr/jit/lclmorph.cpp
@@ -14,7 +14,9 @@ class LocalSequencer final : public GenTreeVisitor<LocalSequencer>
         UseExecutionOrder = true,
     };
 
-    LocalSequencer(Compiler* comp) : GenTreeVisitor(comp), m_prevNode(nullptr)
+    LocalSequencer(Compiler* comp)
+        : GenTreeVisitor(comp)
+        , m_prevNode(nullptr)
     {
     }
 
@@ -719,7 +721,7 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
         GenTreeCall* callUser           = user->IsCall() ? user->AsCall() : nullptr;
         bool         hasHiddenStructArg = false;
         if (m_compiler->opts.compJitOptimizeStructHiddenBuffer && (callUser != nullptr) &&
-            IsValidLclAddr(lclNum, val.Offset()))
+            m_compiler->IsValidLclAddr(lclNum, val.Offset()))
         {
             // We will only attempt this optimization for locals that are:
             // a) Not susceptible to liveness bugs (see "lvaSetHiddenBufferStructArg").
@@ -805,6 +807,7 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
         unsigned   indirSize = node->AsIndir()->Size();
         bool       isWide;
 
+        // TODO-Cleanup: delete "indirSize == 0", use "Compiler::IsValidLclAddr".
         if ((indirSize == 0) || ((offset + indirSize) > UINT16_MAX))
         {
             // If we can't figure out the indirection size then treat it as a wide indirection.
@@ -823,15 +826,6 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
             else
             {
                 isWide = endOffset.Value() > m_compiler->lvaLclExactSize(lclNum);
-
-                if ((varDsc->TypeGet() == TYP_STRUCT) && varDsc->GetLayout()->IsBlockLayout())
-                {
-                    // TODO-CQ: TYP_BLK used to always be exposed here. This is in principle not necessary, but
-                    // not doing so would require VN changes. For now, exposing gets better CQ as otherwise the
-                    // variable ends up untracked and VN treats untracked-not-exposed locals more conservatively
-                    // than exposed ones.
-                    m_compiler->lvaSetVarAddrExposed(lclNum DEBUGARG(AddressExposedReason::TOO_CONSERVATIVE));
-                }
             }
         }
 
@@ -865,7 +859,7 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
         assert(addr->TypeIs(TYP_BYREF, TYP_I_IMPL));
         assert(m_compiler->lvaVarAddrExposed(lclNum) || m_compiler->lvaGetDesc(lclNum)->IsHiddenBufferStructArg());
 
-        if (IsValidLclAddr(lclNum, offset))
+        if (m_compiler->IsValidLclAddr(lclNum, offset))
         {
             addr->ChangeOper(GT_LCL_ADDR);
             addr->AsLclFld()->SetLclNum(lclNum);
@@ -926,9 +920,9 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
                 break;
 
 #ifdef FEATURE_HW_INTRINSICS
-            // We have two cases we want to handle:
-            // 1. Vector2/3/4 and Quaternion where we have 4x float fields
-            // 2. Plane where we have 1x Vector3 and 1x float field
+                // We have two cases we want to handle:
+                // 1. Vector2/3/4 and Quaternion where we have 4x float fields
+                // 2. Plane where we have 1x Vector3 and 1x float field
 
             case IndirTransform::GetElement:
             {
@@ -942,7 +936,7 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
                     {
                         GenTree* indexNode = m_compiler->gtNewIconNode(offset / genTypeSize(elementType));
                         hwiNode            = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode,
-                                                                      CORINFO_TYPE_FLOAT, genTypeSize(varDsc));
+                                                                                 CORINFO_TYPE_FLOAT, genTypeSize(varDsc));
                         break;
                     }
                     case TYP_SIMD12:
@@ -1053,9 +1047,9 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
                 }
                 if (isDef)
                 {
-                    GenTree* data = indir->Data();
+                    GenTree* value = indir->Data();
                     indir->ChangeOper(GT_STORE_LCL_VAR);
-                    indir->AsLclVar()->Data() = data;
+                    indir->AsLclVar()->Data() = value;
                 }
                 else
                 {
@@ -1068,9 +1062,9 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
             case IndirTransform::LclFld:
                 if (isDef)
                 {
-                    GenTree* data = indir->Data();
+                    GenTree* value = indir->Data();
                     indir->ChangeOper(GT_STORE_LCL_FLD);
-                    indir->AsLclFld()->Data() = data;
+                    indir->AsLclFld()->Data() = value;
                 }
                 else
                 {
@@ -1265,9 +1259,9 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
         {
             if (node->OperIs(GT_STOREIND, GT_STORE_BLK))
             {
-                GenTree* data = node->Data();
+                GenTree* value = node->Data();
                 node->ChangeOper(GT_STORE_LCL_VAR);
-                node->AsLclVar()->Data() = data;
+                node->AsLclVar()->Data() = value;
                 node->gtFlags |= GTF_VAR_DEF;
             }
             else
@@ -1458,24 +1452,6 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
     }
 
 private:
-    //------------------------------------------------------------------------
-    // IsValidLclAddr: Can the given local address be represented as "LCL_FLD_ADDR"?
-    //
-    // Local address nodes cannot point beyond the local and can only store
-    // 16 bits worth of offset.
-    //
-    // Arguments:
-    //    lclNum - The local's number
-    //    offset - The address' offset
-    //
-    // Return Value:
-    //    Whether "LCL_FLD_ADDR<lclNum> [+offset]" would be valid IR.
-    //
-    bool IsValidLclAddr(unsigned lclNum, unsigned offset) const
-    {
-        return (offset < UINT16_MAX) && (offset < m_compiler->lvaLclExactSize(lclNum));
-    }
-
     //------------------------------------------------------------------------
     // IsUnused: is the given node unused?
     //
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 57e6ab02d6d8..447b4a75f15d 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -35,6 +35,8 @@ unsigned Compiler::s_lvaDoubleAlignedProcsCount = 0;
 
 void Compiler::lvaInit()
 {
+    lvaParameterPassingInfo = nullptr;
+
     /* We haven't allocated stack variables yet */
     lvaRefCountState = RCS_INVALID;
 
@@ -46,9 +48,9 @@ void Compiler::lvaInit()
     lvaTrackedFixed = false; // false: We can still add new tracked variables
 
     lvaDoneFrameLayout = NO_FRAME_LAYOUT;
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
     lvaShadowSPslotsVar = BAD_VAR_NUM;
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
     lvaInlinedPInvokeFrameVar = BAD_VAR_NUM;
     lvaReversePInvokeFrameVar = BAD_VAR_NUM;
 #if FEATURE_FIXED_OUT_ARGS
@@ -69,12 +71,15 @@ void Compiler::lvaInit()
     lvaMonAcquired      = BAD_VAR_NUM;
     lvaRetAddrVar       = BAD_VAR_NUM;
 
+#ifdef SWIFT_SUPPORT
+    lvaSwiftSelfArg  = BAD_VAR_NUM;
+    lvaSwiftErrorArg = BAD_VAR_NUM;
+#endif
+
     lvaInlineeReturnSpillTemp = BAD_VAR_NUM;
 
     gsShadowVarInfo = nullptr;
-#if defined(FEATURE_EH_FUNCLETS)
-    lvaPSPSym = BAD_VAR_NUM;
-#endif
+    lvaPSPSym       = BAD_VAR_NUM;
 #if FEATURE_SIMD
     lvaSIMDInitTempVarNum = BAD_VAR_NUM;
 #endif // FEATURE_SIMD
@@ -165,6 +170,28 @@ void Compiler::lvaInitTypeRef()
         info.compRetBuffArg = BAD_VAR_NUM;
     }
 
+#if defined(DEBUG) && defined(SWIFT_SUPPORT)
+    if (verbose && (info.compCallConv == CorInfoCallConvExtension::Swift) && varTypeIsStruct(info.compRetType))
+    {
+        CORINFO_CLASS_HANDLE          retTypeHnd = info.compMethodInfo->args.retTypeClass;
+        const CORINFO_SWIFT_LOWERING* lowering   = GetSwiftLowering(retTypeHnd);
+        if (lowering->byReference)
+        {
+            printf("Swift compilation returns %s by reference\n", typGetObjLayout(retTypeHnd)->GetClassName());
+        }
+        else
+        {
+            printf("Swift compilation returns %s as %d primitive(s) in registers\n",
+                   typGetObjLayout(retTypeHnd)->GetClassName(), lowering->numLoweredElements);
+            for (size_t i = 0; i < lowering->numLoweredElements; i++)
+            {
+                printf("    [%zu] @ +%02u: %s\n", i, lowering->offsets[i],
+                       varTypeName(JitType2PreciseVarType(lowering->loweredElements[i])));
+            }
+        }
+    }
+#endif
+
     /* There is a 'hidden' cookie pushed last when the
        calling convention is varargs */
 
@@ -304,9 +331,9 @@ void Compiler::lvaInitTypeRef()
     }
 
     if ( // If there already exist unsafe buffers, don't mark more structs as unsafe
-        // as that will cause them to be placed along with the real unsafe buffers,
-        // unnecessarily exposing them to overruns. This can affect GS tests which
-        // intentionally do buffer-overruns.
+         // as that will cause them to be placed along with the real unsafe buffers,
+         // unnecessarily exposing them to overruns. This can affect GS tests which
+         // intentionally do buffer-overruns.
         !getNeedsGSSecurityCookie() &&
         // GS checks require the stack to be re-ordered, which can't be done with EnC
         !opts.compDbgEnC && compStressCompile(STRESS_UNSAFE_BUFFER_CHECKS, 25))
@@ -387,7 +414,7 @@ void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo)
 
     //----------------------------------------------------------------------
 
-    /* Is there a "this" pointer ? */
+    // Is there a "this" pointer ?
     lvaInitThisPtr(varDscInfo);
 
     unsigned numUserArgsToSkip = 0;
@@ -412,7 +439,7 @@ void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo)
         lvaInitRetBuffArg(varDscInfo, true);
     }
 
-//======================================================================
+    //======================================================================
 
 #if USER_ARGS_COME_LAST
     //@GENERICS: final instantiation-info argument for shared generic methods
@@ -440,6 +467,7 @@ void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo)
 
     // We have set info.compArgsCount in compCompile()
     noway_assert(varDscInfo->varNum == info.compArgsCount);
+
     assert(varDscInfo->intRegArgNum <= MAX_REG_ARG);
 
 #ifndef TARGET_WASM
@@ -447,6 +475,9 @@ void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo)
     codeGen->floatRegState.rsCalleeRegArgCount = varDscInfo->floatRegArgNum;
 #endif // !TARGET_WASM
 
+    // Now we have parameters created in the right order. Figure out how they're passed.
+    lvaClassifyParameterABI();
+
 #if FEATURE_FASTTAILCALL
     // Save the stack usage information
     // We can get register usage information using codeGen->intRegState and
@@ -492,7 +523,8 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo)
         varDsc->lvIsRegArg = 1;
         noway_assert(varDscInfo->intRegArgNum == 0);
 
-        varDsc->SetArgReg(genMapRegArgNumToRegNum(varDscInfo->allocRegArg(TYP_INT), varDsc->TypeGet()));
+        varDsc->SetArgReg(
+            genMapRegArgNumToRegNum(varDscInfo->allocRegArg(TYP_INT), varDsc->TypeGet(), info.compCallConv));
 #if FEATURE_MULTIREG_ARGS
         varDsc->SetOtherArgReg(REG_NA);
 #endif
@@ -506,8 +538,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo)
 #endif
         compArgSize += TARGET_POINTER_SIZE;
 
-        varDscInfo->varNum++;
-        varDscInfo->varDsc++;
+        varDscInfo->nextParam();
     }
 }
 
@@ -523,16 +554,22 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBuf
         varDsc->lvIsParam  = 1;
         varDsc->lvIsRegArg = 0;
 
-        if (useFixedRetBufReg && hasFixedRetBuffReg())
+        if (useFixedRetBufReg && hasFixedRetBuffReg(info.compCallConv))
         {
             varDsc->lvIsRegArg = 1;
-            varDsc->SetArgReg(theFixedRetBuffReg());
+            varDsc->SetArgReg(theFixedRetBuffReg(info.compCallConv));
         }
         else if (varDscInfo->canEnreg(TYP_INT))
         {
             varDsc->lvIsRegArg     = 1;
             unsigned retBuffArgNum = varDscInfo->allocRegArg(TYP_INT);
-            varDsc->SetArgReg(genMapIntRegArgNumToRegNum(retBuffArgNum));
+            varDsc->SetArgReg(genMapIntRegArgNumToRegNum(retBuffArgNum, info.compCallConv));
+        }
+        else
+        {
+            varDscInfo->stackArgSize = roundUp(varDscInfo->stackArgSize, TARGET_POINTER_SIZE);
+            varDsc->SetStackOffset(varDscInfo->stackArgSize);
+            varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
         }
 
 #if FEATURE_MULTIREG_ARGS
@@ -540,7 +577,7 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBuf
 #endif
         varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
 
-        assert(!varDsc->lvIsRegArg || isValidIntArgReg(varDsc->GetArgReg()));
+        assert(!varDsc->lvIsRegArg || isValidIntArgReg(varDsc->GetArgReg(), info.compCallConv));
 
 #ifdef DEBUG
         if (varDsc->lvIsRegArg && verbose)
@@ -549,11 +586,9 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBuf
         }
 #endif
 
-        /* Update the total argument size, count and varDsc */
-
         compArgSize += TARGET_POINTER_SIZE;
-        varDscInfo->varNum++;
-        varDscInfo->varDsc++;
+
+        varDscInfo->nextParam();
     }
 }
 
@@ -568,9 +603,9 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBuf
 //
 void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, unsigned takeArgs)
 {
-//-------------------------------------------------------------------------
-// Walk the function signature for the explicit arguments
-//-------------------------------------------------------------------------
+    //-------------------------------------------------------------------------
+    // Walk the function signature for the explicit arguments
+    //-------------------------------------------------------------------------
 
 #if defined(TARGET_X86)
     // Only (some of) the implicit args are enregistered for varargs
@@ -588,7 +623,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
     const unsigned argSigLen = info.compMethodInfo->args.numArgs;
 
     // We will process at most takeArgs arguments from the signature after skipping skipArgs arguments
-    const int64_t numUserArgs = min(takeArgs, (argSigLen - (int64_t)skipArgs));
+    const int64_t numUserArgs = min((int64_t)takeArgs, (argSigLen - (int64_t)skipArgs));
 
     // If there are no user args or less than skipArgs args, return here since there's no work to do.
     if (numUserArgs <= 0)
@@ -607,8 +642,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
     }
 
     // Process each user arg.
-    for (unsigned i = 0; i < numUserArgs;
-         i++, varDscInfo->varNum++, varDscInfo->varDsc++, argLst = info.compCompHnd->getArgNext(argLst))
+    for (unsigned i = 0; i < numUserArgs; i++, varDscInfo->nextParam(), argLst = info.compCompHnd->getArgNext(argLst))
     {
         LclVarDsc*           varDsc  = varDscInfo->varDsc;
         CORINFO_CLASS_HANDLE typeHnd = nullptr;
@@ -624,6 +658,33 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             lvaSetClass(varDscInfo->varNum, clsHnd);
         }
 
+        // The final home for this incoming parameter might be our local stack frame.
+        varDsc->lvOnFrame = true;
+
+#ifdef SWIFT_SUPPORT
+        if (info.compCallConv == CorInfoCallConvExtension::Swift)
+        {
+            if (varTypeIsSIMD(varDsc))
+            {
+                IMPL_LIMITATION("SIMD types are currently unsupported in Swift reverse pinvokes");
+            }
+
+            if (lvaInitSpecialSwiftParam(argLst, varDscInfo, strip(corInfoType), typeHnd))
+            {
+                continue;
+            }
+
+            if (varDsc->TypeGet() == TYP_STRUCT)
+            {
+                // Struct parameters are lowered to separate primitives in the
+                // Swift calling convention. We cannot handle these patterns
+                // efficiently, so we always DNER them and home them to stack
+                // in the prolog.
+                lvaSetVarDoNotEnregister(varDscInfo->varNum DEBUGARG(DoNotEnregisterReason::IsStructArg));
+            }
+        }
+#endif
+
         // For ARM, ARM64, LOONGARCH64, RISCV64 and AMD64 varargs, all arguments go in integer registers
         var_types argType = mangleVarArgsType(varDsc->TypeGet());
 
@@ -633,7 +694,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         // Otherwise there appear too many surplus pre-spills and other memory operations
         // with the associated locations .
         bool     isSoftFPPreSpill = opts.compUseSoftFP && varTypeIsFloating(varDsc->TypeGet());
-        unsigned argSize          = eeGetArgSize(argLst, &info.compMethodInfo->args);
+        unsigned argSize          = eeGetArgSize(strip(corInfoType), typeHnd);
         unsigned cSlots =
             (argSize + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; // the total number of slots of this argument
         bool      isHfaArg = false;
@@ -697,6 +758,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                     !varDscInfo->canEnreg(TYP_INT, cSlots)) // The end of the struct can't fit in a register
                 {
                     cSlotsToEnregister = 1; // Force the split
+                    varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
                 }
             }
         }
@@ -720,8 +782,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                 // arguments passed in the integer registers but get homed immediately after the prolog.
                 if (!isHfaArg)
                 {
-                    // TODO-Arm32-Windows: vararg struct should be forced to split like
-                    // ARM64 above.
                     cSlotsToEnregister = 1; // HFAs must be totally enregistered or not, but other structs can be split.
                     preSpill           = true;
                 }
@@ -823,10 +883,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         }
 #endif // UNIX_AMD64_ABI
 
-        // The final home for this incoming register might be our local stack frame.
-        // For System V platforms the final home will always be on the local stack frame.
-        varDsc->lvOnFrame = true;
-
         bool canPassArgInRegisters = false;
 
 #if defined(UNIX_AMD64_ABI)
@@ -998,17 +1054,19 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
 #ifdef TARGET_ARM64
             if (argType == TYP_STRUCT)
             {
-                varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL));
+                varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL, info.compCallConv));
                 if (cSlots == 2)
                 {
-                    varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL));
+                    varDsc->SetOtherArgReg(
+                        genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL, info.compCallConv));
                     varDsc->lvIsMultiRegArg = true;
                 }
             }
 #elif defined(UNIX_AMD64_ABI)
             if (varTypeIsStruct(argType))
             {
-                varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType));
+                varDsc->SetArgReg(
+                    genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType, info.compCallConv));
 
                 // If there is a second eightbyte, get a register for it too and map the arg to the reg number.
                 if (structDesc.eightByteCount >= 2)
@@ -1020,20 +1078,25 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
 
                 if (secondEightByteType != TYP_UNDEF)
                 {
-                    varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType));
+                    varDsc->SetOtherArgReg(
+                        genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType, info.compCallConv));
                 }
+
+                assert(structDesc.eightByteCount <= 2);
             }
 #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
             if (argType == TYP_STRUCT)
             {
                 if (argRegTypeInStruct1 != TYP_UNKNOWN)
                 {
-                    varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argRegTypeInStruct1));
+                    varDsc->SetArgReg(
+                        genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argRegTypeInStruct1, info.compCallConv));
                     varDsc->lvIs4Field1 = (genTypeSize(argRegTypeInStruct1) == 4) ? 1 : 0;
                     if (argRegTypeInStruct2 != TYP_UNKNOWN)
                     {
                         secondAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct2, 1);
-                        varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2));
+                        varDsc->SetOtherArgReg(
+                            genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2, info.compCallConv));
                         varDsc->lvIs4Field2 = (genTypeSize(argRegTypeInStruct2) == 4) ? 1 : 0;
                     }
                     else if (cSlots > 1)
@@ -1043,9 +1106,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                         varDsc->lvIsSplit = 1;
                         varDsc->SetOtherArgReg(REG_STK);
                         varDscInfo->setAllRegArgUsed(argRegTypeInStruct1);
-#if FEATURE_FASTTAILCALL
                         varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
-#endif
 #ifdef TARGET_RISCV64
                         varDscInfo->hasSplitParam = true;
 #endif
@@ -1053,48 +1114,61 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                 }
                 else
                 {
-                    varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL));
+                    varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL, info.compCallConv));
                     if (cSlots == 2)
                     {
-                        varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL));
+                        varDsc->SetOtherArgReg(
+                            genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL, info.compCallConv));
                     }
+
+                    assert(cSlots <= 2);
                 }
             }
 #else  // ARM32
             if (varTypeIsStruct(argType))
             {
-                varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL));
+                varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL, info.compCallConv));
             }
 #endif // ARM32
             else
 #endif // FEATURE_MULTIREG_ARGS
             {
-                varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType));
+                varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType, info.compCallConv));
             }
 
 #ifdef TARGET_ARM
             if (varDsc->TypeGet() == TYP_LONG)
             {
-                varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_INT));
+                varDsc->SetOtherArgReg(
+                    genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_INT, info.compCallConv));
             }
 
-#if FEATURE_FASTTAILCALL
+            unsigned numEnregistered = 0;
+            unsigned stackSize       = 0;
             // Check if arg was split between registers and stack.
             if (varTypeUsesIntReg(argType))
             {
-                unsigned firstRegArgNum = genMapIntRegNumToRegArgNum(varDsc->GetArgReg());
+                unsigned firstRegArgNum = genMapIntRegNumToRegArgNum(varDsc->GetArgReg(), info.compCallConv);
                 unsigned lastRegArgNum  = firstRegArgNum + cSlots - 1;
                 if (lastRegArgNum >= varDscInfo->maxIntRegArgNum)
                 {
                     assert(varDscInfo->stackArgSize == 0);
-                    unsigned numEnregistered = varDscInfo->maxIntRegArgNum - firstRegArgNum;
+                    numEnregistered = varDscInfo->maxIntRegArgNum - firstRegArgNum;
                     varDsc->SetStackOffset(-(int)numEnregistered * REGSIZE_BYTES);
-                    varDscInfo->stackArgSize += (cSlots - numEnregistered) * REGSIZE_BYTES;
+                    stackSize = (cSlots - numEnregistered) * REGSIZE_BYTES;
+                    varDscInfo->stackArgSize += stackSize;
                     varDscInfo->hasSplitParam = true;
                     JITDUMP("set user arg V%02u offset to %d\n", varDscInfo->varNum, varDsc->GetStackOffset());
                 }
+                else
+                {
+                    numEnregistered = cSlots;
+                }
+            }
+            else
+            {
+                numEnregistered = cSlots;
             }
-#endif
 #endif // TARGET_ARM
 
 #ifdef DEBUG
@@ -1113,7 +1187,8 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                     else
                     {
                         printf("firstEightByte: %s",
-                               getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType)));
+                               getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType,
+                                                                  info.compCallConv)));
                     }
 
                     if (secondEightByteType == TYP_UNDEF)
@@ -1123,7 +1198,8 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                     else
                     {
                         printf(", secondEightByte: %s",
-                               getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType)));
+                               getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType,
+                                                                  info.compCallConv)));
                     }
                 }
                 else
@@ -1137,7 +1213,8 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                     else
                     {
                         printf("first: %s",
-                               getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argRegTypeInStruct1)));
+                               getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argRegTypeInStruct1,
+                                                                  info.compCallConv)));
                     }
                     if (argRegTypeInStruct2 == TYP_UNKNOWN)
                     {
@@ -1146,7 +1223,8 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                     else
                     {
                         printf(", second: %s",
-                               getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2)));
+                               getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2,
+                                                                  info.compCallConv)));
                     }
                 }
                 else
@@ -1155,7 +1233,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                     assert(varTypeUsesFloatReg(argType) || varTypeUsesIntReg(argType));
 
                     bool     isFloat   = varTypeUsesFloatReg(argType);
-                    unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->GetArgReg(), argType);
+                    unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->GetArgReg(), argType, info.compCallConv);
 
                     for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++)
                     {
@@ -1164,9 +1242,9 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                             printf(",");
                         }
 
-                        if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) // a struct has been split between
-                                                                                    // registers and stack
+                        if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum))
                         {
+                            // a struct has been split between registers and stack
                             printf(" stack slots:%d", cSlots - ix);
                             break;
                         }
@@ -1178,8 +1256,9 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                             if (argType == TYP_DOUBLE)
                             {
                                 // Print both registers, just to be clear
-                                printf("%s/%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType)),
-                                       getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType)));
+                                printf("%s/%s",
+                                       getRegName(genMapRegArgNumToRegNum(regArgNum, argType, info.compCallConv)),
+                                       getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType, info.compCallConv)));
 
                                 // doubles take 2 slots
                                 assert(ix + 1 < cSlots);
@@ -1188,13 +1267,14 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                             }
                             else
                             {
-                                printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType)));
+                                printf("%s",
+                                       getRegName(genMapRegArgNumToRegNum(regArgNum, argType, info.compCallConv)));
                             }
                         }
                         else
 #endif // TARGET_ARM
                         {
-                            printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType)));
+                            printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType, info.compCallConv)));
                         }
                     }
                 }
@@ -1226,7 +1306,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
 
 #endif // TARGET_XXX
 
-#if FEATURE_FASTTAILCALL
 #ifdef TARGET_ARM
             unsigned argAlignment = cAlign * TARGET_POINTER_SIZE;
 #else
@@ -1242,7 +1321,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             JITDUMP("set user arg V%02u offset to %u\n", varDscInfo->varNum, varDscInfo->stackArgSize);
             varDsc->SetStackOffset(varDscInfo->stackArgSize);
             varDscInfo->stackArgSize += argSize;
-#endif // FEATURE_FASTTAILCALL
         }
 
 #ifdef UNIX_AMD64_ABI
@@ -1258,8 +1336,8 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
 #if defined(TARGET_X86)
             varDsc->SetStackOffset(compArgSize);
 #else  // !TARGET_X86
-            // TODO-CQ: We shouldn't have to go as far as to declare these
-            // address-exposed -- DoNotEnregister should suffice.
+       // TODO-CQ: We shouldn't have to go as far as to declare these
+       // address-exposed -- DoNotEnregister should suffice.
 
             lvaSetVarAddrExposed(varDscInfo->varNum DEBUGARG(AddressExposedReason::TOO_CONSERVATIVE));
 #endif // !TARGET_X86
@@ -1303,6 +1381,102 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
 #endif // TARGET_ARM
 }
 
+#ifdef SWIFT_SUPPORT
+//-----------------------------------------------------------------------------
+// lvaInitSpecialSwiftParam: Initialize SwiftSelf/SwiftError* parameters.
+//
+// Parameters:
+//   argHnd  - Handle for this parameter in the method's signature
+//   varDsc  - LclVarDsc* for the parameter
+//   type    - Type of the parameter
+//   typeHnd - Class handle for the type of the parameter
+//
+// Returns:
+//   true if parameter was initialized
+//
+bool Compiler::lvaInitSpecialSwiftParam(CORINFO_ARG_LIST_HANDLE argHnd,
+                                        InitVarDscInfo*         varDscInfo,
+                                        CorInfoType             type,
+                                        CORINFO_CLASS_HANDLE    typeHnd)
+{
+    const bool argIsByrefOrPtr = (type == CORINFO_TYPE_BYREF) || (type == CORINFO_TYPE_PTR);
+
+    if (argIsByrefOrPtr)
+    {
+        // For primitive types, we don't expect to be passed a CORINFO_CLASS_HANDLE; look up the actual handle
+        assert(typeHnd == nullptr);
+        CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getArgClass(&info.compMethodInfo->args, argHnd);
+        type                        = info.compCompHnd->getChildType(clsHnd, &typeHnd);
+    }
+
+    if (type != CORINFO_TYPE_VALUECLASS)
+    {
+        return false;
+    }
+
+    if (!info.compCompHnd->isIntrinsicType(typeHnd))
+    {
+        return false;
+    }
+
+    const char* namespaceName;
+    const char* className = info.compCompHnd->getClassNameFromMetadata(typeHnd, &namespaceName);
+    if ((strcmp(className, "SwiftSelf") == 0) && (strcmp(namespaceName, "System.Runtime.InteropServices.Swift") == 0))
+    {
+        if (argIsByrefOrPtr)
+        {
+            BADCODE("Expected SwiftSelf struct, got pointer/reference");
+        }
+
+        if (lvaSwiftSelfArg != BAD_VAR_NUM)
+        {
+            BADCODE("Duplicate SwiftSelf parameter");
+        }
+
+        LclVarDsc* const varDsc = varDscInfo->varDsc;
+        varDsc->SetArgReg(REG_SWIFT_SELF);
+        varDsc->SetOtherArgReg(REG_NA);
+        varDsc->lvIsRegArg = true;
+
+        compArgSize += TARGET_POINTER_SIZE;
+
+        lvaSwiftSelfArg = varDscInfo->varNum;
+        lvaSetVarDoNotEnregister(lvaSwiftSelfArg DEBUGARG(DoNotEnregisterReason::NonStandardParameter));
+        return true;
+    }
+
+    if ((strcmp(className, "SwiftError") == 0) && (strcmp(namespaceName, "System.Runtime.InteropServices.Swift") == 0))
+    {
+        if (!argIsByrefOrPtr)
+        {
+            BADCODE("Expected SwiftError pointer/reference, got struct");
+        }
+
+        if (lvaSwiftErrorArg != BAD_VAR_NUM)
+        {
+            BADCODE("Duplicate SwiftError* parameter");
+        }
+
+        // We won't actually be passing this SwiftError* in REG_SWIFT_ERROR (or any register, for that matter).
+        // We will check for this quirk when generating the prolog,
+        // and ensure this fake parameter doesn't take any registers/stack space
+        LclVarDsc* const varDsc = varDscInfo->varDsc;
+        varDsc->SetArgReg(REG_SWIFT_ERROR);
+        varDsc->SetOtherArgReg(REG_NA);
+        varDsc->lvIsRegArg = true;
+        lvaSwiftErrorArg   = varDscInfo->varNum;
+
+        // Instead, all usages of the SwiftError* parameter will be redirected to this pseudolocal.
+        lvaSwiftErrorLocal = lvaGrabTempWithImplicitUse(false DEBUGARG("SwiftError pseudolocal"));
+        lvaSetStruct(lvaSwiftErrorLocal, typeHnd, false);
+        lvaSetVarAddrExposed(lvaSwiftErrorLocal DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS));
+        return true;
+    }
+
+    return false;
+}
+#endif
+
 /*****************************************************************************/
 void Compiler::lvaInitGenericsCtxt(InitVarDscInfo* varDscInfo)
 {
@@ -1321,7 +1495,8 @@ void Compiler::lvaInitGenericsCtxt(InitVarDscInfo* varDscInfo)
             /* Another register argument */
 
             varDsc->lvIsRegArg = 1;
-            varDsc->SetArgReg(genMapRegArgNumToRegNum(varDscInfo->regArgNum(TYP_INT), varDsc->TypeGet()));
+            varDsc->SetArgReg(
+                genMapRegArgNumToRegNum(varDscInfo->regArgNum(TYP_INT), varDsc->TypeGet(), info.compCallConv));
 #if FEATURE_MULTIREG_ARGS
             varDsc->SetOtherArgReg(REG_NA);
 #endif
@@ -1341,10 +1516,8 @@ void Compiler::lvaInitGenericsCtxt(InitVarDscInfo* varDscInfo)
             // We need to mark these as being on the stack, as this is not done elsewhere in the case that canEnreg
             // returns false.
             varDsc->lvOnFrame = true;
-#if FEATURE_FASTTAILCALL
             varDsc->SetStackOffset(varDscInfo->stackArgSize);
             varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
-#endif // FEATURE_FASTTAILCALL
         }
 
         compArgSize += TARGET_POINTER_SIZE;
@@ -1354,8 +1527,7 @@ void Compiler::lvaInitGenericsCtxt(InitVarDscInfo* varDscInfo)
             varDsc->SetStackOffset(compArgSize);
 #endif // TARGET_X86
 
-        varDscInfo->varNum++;
-        varDscInfo->varDsc++;
+        varDscInfo->nextParam();
     }
 }
 
@@ -1379,8 +1551,6 @@ void Compiler::lvaInitVarArgsHandle(InitVarDscInfo* varDscInfo)
 
         assert(mostRecentlyActivePhase == PHASE_PRE_IMPORT);
 
-        // TODO-Cleanup: this is preImportation phase, why do we try to work with regs here?
-        // Should it be just deleted?
         if (varDscInfo->canEnreg(TYP_I_IMPL))
         {
             /* Another register argument */
@@ -1388,7 +1558,7 @@ void Compiler::lvaInitVarArgsHandle(InitVarDscInfo* varDscInfo)
             unsigned varArgHndArgNum = varDscInfo->allocRegArg(TYP_I_IMPL);
 
             varDsc->lvIsRegArg = 1;
-            varDsc->SetArgReg(genMapRegArgNumToRegNum(varArgHndArgNum, TYP_I_IMPL));
+            varDsc->SetArgReg(genMapRegArgNumToRegNum(varArgHndArgNum, TYP_I_IMPL, info.compCallConv));
 #if FEATURE_MULTIREG_ARGS
             varDsc->SetOtherArgReg(REG_NA);
 #endif
@@ -1415,18 +1585,15 @@ void Compiler::lvaInitVarArgsHandle(InitVarDscInfo* varDscInfo)
             // We need to mark these as being on the stack, as this is not done elsewhere in the case that canEnreg
             // returns false.
             varDsc->lvOnFrame = true;
-#if FEATURE_FASTTAILCALL
             varDsc->SetStackOffset(varDscInfo->stackArgSize);
             varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
-#endif // FEATURE_FASTTAILCALL
         }
 
         /* Update the total argument size, count and varDsc */
 
         compArgSize += TARGET_POINTER_SIZE;
 
-        varDscInfo->varNum++;
-        varDscInfo->varDsc++;
+        varDscInfo->nextParam();
 
 #if defined(TARGET_X86)
         varDsc->SetStackOffset(compArgSize);
@@ -1519,6 +1686,247 @@ void Compiler::lvaInitVarDsc(LclVarDsc*              varDsc,
 #endif // FEATURE_MULTIREG_ARGS
 }
 
+//-----------------------------------------------------------------------------
+// lvaClassifyParameterABI:
+//  Classify the ABI information for all parameters.
+//
+// Type parameters:
+//   Classifier - The type of classifier to use.
+//
+// Parameters:
+//   classifier - The classifier to use
+//
+template <typename Classifier>
+void Compiler::lvaClassifyParameterABI(Classifier& classifier)
+{
+    lvaParameterPassingInfo = new (this, CMK_LvaTable) ABIPassingInformation[info.compArgsCount];
+
+    for (unsigned i = 0; i < info.compArgsCount; i++)
+    {
+        LclVarDsc*   dsc          = lvaGetDesc(i);
+        ClassLayout* structLayout = varTypeIsStruct(dsc) ? dsc->GetLayout() : nullptr;
+
+        WellKnownArg wellKnownArg = WellKnownArg::None;
+        if (i == info.compRetBuffArg)
+        {
+            wellKnownArg = WellKnownArg::RetBuffer;
+        }
+#ifdef SWIFT_SUPPORT
+        else if (i == lvaSwiftSelfArg)
+        {
+            wellKnownArg = WellKnownArg::SwiftSelf;
+        }
+        else if (i == lvaSwiftErrorArg)
+        {
+            wellKnownArg = WellKnownArg::SwiftError;
+        }
+#endif
+
+        lvaParameterPassingInfo[i] = classifier.Classify(this, dsc->TypeGet(), structLayout, wellKnownArg);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Parameter #%u ABI info: ", i);
+            lvaParameterPassingInfo[i].Dump();
+        }
+#endif
+    }
+}
+
+//-----------------------------------------------------------------------------
+// lvaClassifyParameterABI:
+//  Classify the ABI information for all parameters.
+//
+void Compiler::lvaClassifyParameterABI()
+{
+    if (info.compArgsCount == 0)
+    {
+        return;
+    }
+
+    ClassifierInfo cInfo;
+    cInfo.CallConv   = info.compCallConv;
+    cInfo.IsVarArgs  = info.compIsVarArgs;
+    cInfo.HasThis    = info.compThisArg != BAD_VAR_NUM;
+    cInfo.HasRetBuff = info.compRetBuffArg != BAD_VAR_NUM;
+
+#ifdef SWIFT_SUPPORT
+    if (info.compCallConv == CorInfoCallConvExtension::Swift)
+    {
+        SwiftABIClassifier classifier(cInfo);
+        lvaClassifyParameterABI(classifier);
+
+        regMaskTP argRegs = RBM_NONE;
+
+        // The calling convention details computed by the old ABI classifier
+        // are wrong since it does not handle the Swift ABI for structs
+        // appropriately. Grab them from the new ABI information.
+        for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
+        {
+            LclVarDsc*                   dsc     = lvaGetDesc(lclNum);
+            const ABIPassingInformation& abiInfo = lvaParameterPassingInfo[lclNum];
+
+            if (dsc->TypeGet() == TYP_STRUCT)
+            {
+                const CORINFO_SWIFT_LOWERING* lowering = GetSwiftLowering(dsc->GetLayout()->GetClassHandle());
+                dsc->lvIsImplicitByRef                 = lowering->byReference;
+            }
+
+            if ((dsc->TypeGet() == TYP_STRUCT) && !lvaIsImplicitByRefLocal(lclNum) &&
+                !abiInfo.HasExactlyOneStackSegment())
+            {
+                dsc->lvIsRegArg = false;
+            }
+            else
+            {
+                assert(abiInfo.NumSegments == 1);
+                if (abiInfo.Segments[0].IsPassedInRegister())
+                {
+                    dsc->lvIsRegArg = true;
+                    dsc->SetArgReg(abiInfo.Segments[0].GetRegister());
+                    dsc->SetOtherArgReg(REG_NA);
+                }
+                else
+                {
+                    dsc->lvIsRegArg = false;
+                    dsc->SetArgReg(REG_STK);
+                    dsc->SetOtherArgReg(REG_NA);
+                    dsc->SetStackOffset(abiInfo.Segments[0].GetStackOffset());
+                }
+            }
+
+            for (unsigned i = 0; i < abiInfo.NumSegments; i++)
+            {
+                const ABIPassingSegment& segment = abiInfo.Segments[i];
+                if (segment.IsPassedInRegister())
+                {
+                    argRegs |= segment.GetRegisterMask();
+                }
+            }
+        }
+
+        // genFnPrologCalleeRegArgs expect these to be the counts of registers it knows how to handle.
+        codeGen->intRegState.rsCalleeRegArgCount   = genCountBits(argRegs & RBM_ARG_REGS);
+        codeGen->floatRegState.rsCalleeRegArgCount = genCountBits(argRegs & RBM_FLTARG_REGS);
+    }
+    else
+#endif
+#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM)
+    {
+        PlatformClassifier classifier(cInfo);
+        lvaClassifyParameterABI(classifier);
+    }
+#endif
+
+#ifdef DEBUG
+    if (lvaParameterPassingInfo == nullptr)
+    {
+        return;
+    }
+
+    for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
+    {
+        LclVarDsc*                   dsc     = lvaGetDesc(lclNum);
+        const ABIPassingInformation& abiInfo = lvaParameterPassingInfo[lclNum];
+
+        assert(abiInfo.NumSegments > 0);
+
+        if ((dsc->TypeGet() == TYP_STRUCT) && (info.compCallConv == CorInfoCallConvExtension::Swift))
+        {
+            continue;
+        }
+
+        unsigned numSegmentsToCompare = abiInfo.NumSegments;
+        if (dsc->lvIsHfa())
+        {
+            // LclVarDsc only has one register set for HFAs
+            numSegmentsToCompare = 1;
+        }
+
+#ifdef TARGET_ARM
+        // On arm the old representation only represents the start register for
+        // struct multireg args.
+        if (varTypeIsStruct(dsc))
+        {
+            numSegmentsToCompare = 1;
+        }
+
+        // And also for TYP_DOUBLE on soft FP
+        if (opts.compUseSoftFP && (dsc->TypeGet() == TYP_DOUBLE))
+        {
+            numSegmentsToCompare = 1;
+        }
+#endif
+
+        for (unsigned i = 0; i < numSegmentsToCompare; i++)
+        {
+            const ABIPassingSegment& expected = abiInfo.Segments[i];
+            regNumber                reg      = REG_NA;
+            if (i == 0)
+            {
+                reg = dsc->GetArgReg();
+            }
+#if FEATURE_MULTIREG_ARGS
+            else if (i == 1)
+            {
+                reg = dsc->GetOtherArgReg();
+            }
+#endif
+
+            if (expected.IsPassedOnStack())
+            {
+                if (i == 0)
+                {
+                    assert(reg == REG_STK);
+
+// On x86, varargs methods access stack args off of a base pointer, and the
+// first stack arg is not considered to be at offset 0.
+// TODO-Cleanup: Unify things so that x86 is consistent with other platforms
+// here and change fgMorphExpandStackArgForVarArgs to account for that.
+#ifndef TARGET_X86
+                    assert((unsigned)dsc->GetStackOffset() == expected.GetStackOffset());
+#endif
+                }
+            }
+            else
+            {
+                assert(reg == expected.GetRegister());
+            }
+        }
+    }
+#endif // DEBUG
+}
+
+//--------------------------------------------------------------------------------------------
+// lvaHaveSwiftStructStackParamsToReassemble:
+//   Check if this compilation has any Swift parameters that are passed on the
+//   stack and that need to be reassembled on the local stack frame.
+//
+// Return value:
+//   True if so.
+//
+bool Compiler::lvaHasAnySwiftStackParamToReassemble()
+{
+#ifdef SWIFT_SUPPORT
+    if (info.compCallConv != CorInfoCallConvExtension::Swift)
+    {
+        return false;
+    }
+
+    for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
+    {
+        const ABIPassingInformation& abiInfo = lvaParameterPassingInfo[lclNum];
+        if (abiInfo.HasAnyStackSegment() && !abiInfo.HasExactlyOneStackSegment())
+        {
+            return true;
+        }
+    }
+#endif
+
+    return false;
+}
+
 /*****************************************************************************
  * Returns our internal varNum for a given IL variable.
  * Asserts assume it is called after lvaTable[] has been set up.
@@ -1546,7 +1954,7 @@ unsigned Compiler::compMapILvarNum(unsigned ILvarNum)
     else if (ILvarNum == (unsigned)ICorDebugInfo::TYPECTXT_ILNUM)
     {
         noway_assert(info.compTypeCtxtArg >= 0);
-        varNum = unsigned(info.compTypeCtxtArg);
+        varNum = info.compTypeCtxtArg;
     }
     else if (ILvarNum < info.compILargsCount)
     {
@@ -1601,7 +2009,7 @@ unsigned Compiler::compMap2ILvarNum(unsigned varNum) const
 
     // We create an extra argument for the type context parameter
     // needed for shared generic code.
-    if ((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) && varNum == (unsigned)info.compTypeCtxtArg)
+    if ((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) && varNum == info.compTypeCtxtArg)
     {
         return (unsigned)ICorDebugInfo::TYPECTXT_ILNUM;
     }
@@ -1614,7 +2022,7 @@ unsigned Compiler::compMap2ILvarNum(unsigned varNum) const
 #endif // FEATURE_FIXED_OUT_ARGS
 
     // Now mutate varNum to remove extra parameters from the count.
-    if ((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) && varNum > (unsigned)info.compTypeCtxtArg)
+    if ((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) && varNum > info.compTypeCtxtArg)
     {
         varNum--;
     }
@@ -1683,7 +2091,9 @@ void Compiler::lvSetMinOptsDoNotEnreg()
 // Arguments:
 //   compiler - pointer to a compiler to get access to an allocator, compHandle etc.
 //
-Compiler::StructPromotionHelper::StructPromotionHelper(Compiler* compiler) : compiler(compiler), structPromotionInfo()
+Compiler::StructPromotionHelper::StructPromotionHelper(Compiler* compiler)
+    : compiler(compiler)
+    , structPromotionInfo()
 {
 }
 
@@ -1755,7 +2165,7 @@ bool Compiler::StructPromotionHelper::CanPromoteStructType(CORINFO_CLASS_HANDLE
 #if defined(FEATURE_SIMD)
     // getMaxVectorByteLength() represents the size of the largest primitive type that we can struct promote.
     const unsigned maxSize =
-        MAX_NumOfFieldsInPromotableStruct * max(compiler->getMaxVectorByteLength(), sizeof(double));
+        MAX_NumOfFieldsInPromotableStruct * max(compiler->getMaxVectorByteLength(), (uint32_t)sizeof(double));
 #else  // !FEATURE_SIMD
     // sizeof(double) represents the size of the largest primitive type that we can struct promote.
     const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * sizeof(double);
@@ -2086,6 +2496,17 @@ bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum)
         return false;
     }
 
+#ifdef SWIFT_SUPPORT
+    // Swift structs are not passed in a way that match their layout and
+    // require reassembling on the local stack frame. Skip promotion for these
+    // (which would result in dependent promotion anyway).
+    if ((compiler->info.compCallConv == CorInfoCallConvExtension::Swift) && varDsc->lvIsParam)
+    {
+        JITDUMP("  struct promotion of V%02u is disabled because it is a parameter to a Swift function");
+        return false;
+    }
+#endif
+
     CORINFO_CLASS_HANDLE typeHnd = varDsc->GetLayout()->GetClassHandle();
     assert(typeHnd != NO_CLASS_HANDLE);
 
@@ -2201,7 +2622,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum)
     //
     // TODO: Ideally we would want to consider the impact of whether the struct is
     // passed as a parameter or assigned the return value of a call. Because once promoted,
-    // struct copying is done by field by field assignment instead of a more efficient
+    // struct copying is done by field by field store instead of a more efficient
     // rep.stos or xmm reg based copy.
     if (structPromotionInfo.fieldCnt > 3 && !varDsc->lvFieldAccessed)
     {
@@ -2262,12 +2683,12 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum)
             //             with something else occupying the same 4-byte slot, it will
             //             overwrite other fields.
             if (structPromotionInfo.fieldCnt != 1)
-        {
-            JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = "
-                    "%d.\n",
-                    lclNum, structPromotionInfo.fieldCnt);
-            shouldPromote = false;
-        }
+            {
+                JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = "
+                        "%d.\n",
+                        lclNum, structPromotionInfo.fieldCnt);
+                shouldPromote = false;
+            }
     }
     else if ((lclNum == compiler->genReturnLocal) && (structPromotionInfo.fieldCnt > 1))
     {
@@ -2289,7 +2710,6 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum)
     // In that case, we would like to avoid promortion.
     // However we haven't yet computed the lvRefCnt values so we can't do that.
     //
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
     return shouldPromote;
 }
@@ -2306,8 +2726,8 @@ void Compiler::StructPromotionHelper::SortStructFields()
     {
         jitstd::sort(structPromotionInfo.fields, structPromotionInfo.fields + structPromotionInfo.fieldCnt,
                      [](const lvaStructFieldInfo& lhs, const lvaStructFieldInfo& rhs) {
-                         return lhs.fldOffset < rhs.fldOffset;
-                     });
+            return lhs.fldOffset < rhs.fldOffset;
+        });
         structPromotionInfo.fieldsSorted = true;
     }
 }
@@ -2362,19 +2782,15 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum)
             compiler->compFloatingPointUsed = true;
         }
 
-// Now grab the temp for the field local.
+        // Now grab the temp for the field local.
 
 #ifdef DEBUG
-        char        buf[200];
         char        fieldNameBuffer[128];
         const char* fieldName =
             compiler->eeGetFieldName(pFieldInfo->diagFldHnd, false, fieldNameBuffer, sizeof(fieldNameBuffer));
-        sprintf_s(buf, sizeof(buf), "field V%02u.%s (fldOffset=0x%x)", lclNum, fieldName, pFieldInfo->fldOffset);
 
-        // We need to copy 'buf' as lvaGrabTemp() below caches a copy to its argument.
-        size_t len  = strlen(buf) + 1;
-        char*  bufp = compiler->getAllocator(CMK_DebugOnly).allocate<char>(len);
-        strcpy_s(bufp, len, buf);
+        const char* bufp =
+            compiler->printfAlloc("field V%02u.%s (fldOffset=0x%x)", lclNum, fieldName, pFieldInfo->fldOffset);
 
         if (index > 0)
         {
@@ -2764,6 +3180,10 @@ void Compiler::lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregister
             JITDUMP("Promoted struct used by a SIMD/HWI node\n");
             break;
 
+        case DoNotEnregisterReason::NonStandardParameter:
+            JITDUMP("Non-standard parameter\n");
+            break;
+
         default:
             unreached();
             break;
@@ -2887,7 +3307,7 @@ void Compiler::lvaSetStruct(unsigned varNum, ClassLayout* layout, bool unsafeVal
             if (varDsc->lvIsParam && !varDsc->lvIsStructField)
             {
                 structPassingKind howToReturnStruct;
-                getArgTypeForStruct(layout->GetClassHandle(), &howToReturnStruct, this->info.compIsVarArgs,
+                getArgTypeForStruct(layout->GetClassHandle(), &howToReturnStruct, info.compIsVarArgs,
                                     varDsc->lvExactSize());
 
                 if (howToReturnStruct == SPK_ByReference)
@@ -3158,7 +3578,7 @@ void Compiler::lvaSetClass(unsigned varNum, GenTree* tree, CORINFO_CLASS_HANDLE
 //
 // Notes:
 //
-//    This method models the type update rule for an assignment.
+//    This method models the type update rule for a store.
 //
 //    Updates currently should only happen for single-def user args or
 //    locals, when we are processing the expression actually being
@@ -3566,8 +3986,8 @@ void Compiler::lvaSortByRefCount()
         if (varDsc->IsAddressExposed())
         {
             varDsc->lvTracked = 0;
-            assert(varDsc->lvType != TYP_STRUCT ||
-                   varDsc->lvDoNotEnregister); // For structs, should have set this when we set m_addrExposed.
+            assert(varDsc->lvType != TYP_STRUCT || varDsc->lvDoNotEnregister); // For structs, should have set this when
+                                                                               // we set m_addrExposed.
         }
         if (varTypeIsStruct(varDsc))
         {
@@ -3624,8 +4044,9 @@ void Compiler::lvaSortByRefCount()
         {
             lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::NoRegVars));
         }
-#if defined(JIT32_GCENCODER) && defined(FEATURE_EH_FUNCLETS)
-        if (lvaIsOriginalThisArg(lclNum) && (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0)
+#if defined(JIT32_GCENCODER)
+        if (UsesFunclets() && lvaIsOriginalThisArg(lclNum) &&
+            (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0)
         {
             // For x86/Linux, we need to track "this".
             // However we cannot have it in tracked variables, so we set "this" pointer always untracked
@@ -3799,8 +4220,8 @@ unsigned LclVarDsc::lvSize() const // Size needed for storage representation. On
 }
 
 /**********************************************************************************
-* Get stack size of the varDsc.
-*/
+ * Get stack size of the varDsc.
+ */
 size_t LclVarDsc::lvArgStackSize() const
 {
     // Make sure this will have a stack size
@@ -4148,7 +4569,6 @@ void Compiler::lvaMarkLclRefs(GenTree* tree, BasicBlock* block, Statement* stmt,
                 {
                     // Variables can be marked as DoNotEngister in earlier stages like LocalAddressVisitor.
                     // No need to track them for single-def.
-                    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
                     // TODO-CQ: If the varType needs partial callee save, conservatively do not enregister
@@ -4220,7 +4640,10 @@ void Compiler::lvaMarkLocalVars(BasicBlock* block, bool isRecompute)
         };
 
         MarkLocalVarsVisitor(Compiler* compiler, BasicBlock* block, Statement* stmt, bool isRecompute)
-            : GenTreeVisitor<MarkLocalVarsVisitor>(compiler), m_block(block), m_stmt(stmt), m_isRecompute(isRecompute)
+            : GenTreeVisitor<MarkLocalVarsVisitor>(compiler)
+            , m_block(block)
+            , m_stmt(stmt)
+            , m_isRecompute(isRecompute)
         {
         }
 
@@ -4274,11 +4697,11 @@ PhaseStatus Compiler::lvaMarkLocalVars()
 
     unsigned const lvaCountOrig = lvaCount;
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
 
     // Grab space for exception handling
 
-    if (ehNeedsShadowSPslots())
+    if (!UsesFunclets() && ehNeedsShadowSPslots())
     {
         // The first slot is reserved for ICodeManager::FixContext(ppEndRegion)
         // ie. the offset of the end-of-last-executed-filter
@@ -4301,43 +4724,41 @@ PhaseStatus Compiler::lvaMarkLocalVars()
         lvaSetVarAddrExposed(lvaShadowSPslotsVar DEBUGARG(AddressExposedReason::EXTERNALLY_VISIBLE_IMPLICITLY));
     }
 
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
-    // PSPSym and LocAllocSPvar are not used by the NativeAOT ABI
+    // PSPSym is not used by the NativeAOT ABI
     if (!IsTargetAbi(CORINFO_NATIVEAOT_ABI))
     {
-#if defined(FEATURE_EH_FUNCLETS)
-        if (ehNeedsPSPSym())
+        if (UsesFunclets() && ehNeedsPSPSym())
         {
             lvaPSPSym            = lvaGrabTempWithImplicitUse(false DEBUGARG("PSPSym"));
             LclVarDsc* lclPSPSym = lvaGetDesc(lvaPSPSym);
             lclPSPSym->lvType    = TYP_I_IMPL;
             lvaSetVarDoNotEnregister(lvaPSPSym DEBUGARG(DoNotEnregisterReason::VMNeedsStackAddr));
         }
-#endif // FEATURE_EH_FUNCLETS
+    }
 
 #ifdef JIT32_GCENCODER
-        // LocAllocSPvar is only required by the implicit frame layout expected by the VM on x86. Whether
-        // a function contains a Localloc is conveyed in the GC information, in the InfoHdrSmall.localloc
-        // field. The function must have an EBP frame. Then, the VM finds the LocAllocSP slot by assuming
-        // the following stack layout:
-        //
-        //      -- higher addresses --
-        //      saved EBP                       <-- EBP points here
-        //      other callee-saved registers    // InfoHdrSmall.savedRegsCountExclFP specifies this size
-        //      optional GS cookie              // InfoHdrSmall.security is 1 if this exists
-        //      LocAllocSP slot
-        //      -- lower addresses --
-        //
-        // See also eetwain.cpp::GetLocallocSPOffset() and its callers.
-        if (compLocallocUsed)
-        {
-            lvaLocAllocSPvar         = lvaGrabTempWithImplicitUse(false DEBUGARG("LocAllocSPvar"));
-            LclVarDsc* locAllocSPvar = lvaGetDesc(lvaLocAllocSPvar);
-            locAllocSPvar->lvType    = TYP_I_IMPL;
-        }
-#endif // JIT32_GCENCODER
+    // LocAllocSPvar is only required by the implicit frame layout expected by the VM on x86. Whether
+    // a function contains a Localloc is conveyed in the GC information, in the InfoHdrSmall.localloc
+    // field. The function must have an EBP frame. Then, the VM finds the LocAllocSP slot by assuming
+    // the following stack layout:
+    //
+    //      -- higher addresses --
+    //      saved EBP                       <-- EBP points here
+    //      other callee-saved registers    // InfoHdrSmall.savedRegsCountExclFP specifies this size
+    //      optional GS cookie              // InfoHdrSmall.security is 1 if this exists
+    //      LocAllocSP slot
+    //      -- lower addresses --
+    //
+    // See also eetwain.cpp::GetLocallocSPOffset() and its callers.
+    if (compLocallocUsed)
+    {
+        lvaLocAllocSPvar         = lvaGrabTempWithImplicitUse(false DEBUGARG("LocAllocSPvar"));
+        LclVarDsc* locAllocSPvar = lvaGetDesc(lvaLocAllocSPvar);
+        locAllocSPvar->lvType    = TYP_I_IMPL;
     }
+#endif // JIT32_GCENCODER
 
     // Ref counting is now enabled normally.
     lvaRefCountState = RCS_NORMAL;
@@ -4369,7 +4790,7 @@ PhaseStatus Compiler::lvaMarkLocalVars()
     else if (lvaReportParamTypeArg())
     {
         // We should have a context arg.
-        assert(info.compTypeCtxtArg != (int)BAD_VAR_NUM);
+        assert(info.compTypeCtxtArg != BAD_VAR_NUM);
         lvaGetDesc(info.compTypeCtxtArg)->lvImplicitlyReferenced = reportParamTypeArg;
     }
 
@@ -4644,11 +5065,11 @@ inline void Compiler::lvaIncrementFrameSize(unsigned size)
 
 #ifndef TARGET_WASM
 /****************************************************************************
-*
-*  Return true if absolute offsets of temps are larger than vars, or in other
-*  words, did we allocate temps before of after vars.  The /GS buffer overrun
-*  checks want temps to be at low stack addresses than buffers
-*/
+ *
+ *  Return true if absolute offsets of temps are larger than vars, or in other
+ *  words, did we allocate temps before of after vars.  The /GS buffer overrun
+ *  checks want temps to be at low stack addresses than buffers
+ */
 bool Compiler::lvaTempsHaveLargerOffsetThanVars()
 {
 #ifdef TARGET_ARM
@@ -4667,10 +5088,10 @@ bool Compiler::lvaTempsHaveLargerOffsetThanVars()
 }
 
 /****************************************************************************
-*
-*  Return an upper bound estimate for the size of the compiler spill temps
-*
-*/
+ *
+ *  Return an upper bound estimate for the size of the compiler spill temps
+ *
+ */
 unsigned Compiler::lvaGetMaxSpillTempSize()
 {
     unsigned result = 0;
@@ -5179,7 +5600,7 @@ void Compiler::lvaFixVirtualFrameOffsets()
 {
     LclVarDsc* varDsc;
 
-#if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_AMD64)
+#if defined(TARGET_AMD64)
     if (lvaPSPSym != BAD_VAR_NUM)
     {
         // We need to fix the offset of the PSPSym so there is no padding between it and the outgoing argument space.
@@ -5277,17 +5698,7 @@ void Compiler::lvaFixVirtualFrameOffsets()
 
         if (!varDsc->lvOnFrame)
         {
-            if (!varDsc->lvIsParam
-#if !defined(TARGET_AMD64)
-                || (varDsc->lvIsRegArg
-#if defined(TARGET_ARM) && defined(PROFILING_SUPPORTED)
-                    && compIsProfilerHookNeeded() &&
-                    !lvaIsPreSpilled(lclNum, codeGen->regSet.rsMaskPreSpillRegs(false)) // We need assign stack offsets
-                                                                                        // for prespilled arguments
-#endif
-                    )
-#endif // !defined(TARGET_AMD64)
-                    )
+            if (!varDsc->lvIsParam || lvaParamHasLocalStackSpace(lclNum))
             {
                 doAssignStkOffs = false; // Not on frame or an incoming stack arg
             }
@@ -5308,8 +5719,8 @@ void Compiler::lvaFixVirtualFrameOffsets()
                     // We need to re-adjust the offsets of the parameters so they are EBP
                     // relative rather than stack/frame pointer relative
 
-                    varDsc->SetStackOffset(varDsc->GetStackOffset() +
-                                           (2 * TARGET_POINTER_SIZE)); // return address and pushed EBP
+                    varDsc->SetStackOffset(varDsc->GetStackOffset() + (2 * TARGET_POINTER_SIZE)); // return address and
+                                                                                                  // pushed EBP
 
                     noway_assert(varDsc->GetStackOffset() >= FIRST_ARG_STACK_OFFS);
                 }
@@ -5461,6 +5872,27 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
     // Update the arg initial register locations.
     lvaUpdateArgsWithInitialReg();
 
+#ifdef SWIFT_SUPPORT
+    if (info.compCallConv == CorInfoCallConvExtension::Swift)
+    {
+        // We already assigned argument offsets in lvaClassifyParameterABI.
+        // Just get them from there.
+        // TODO-Cleanup: We can use similar logic for all backends once we have
+        // the new ABI info for all targets.
+        for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
+        {
+            LclVarDsc*                   dsc     = lvaGetDesc(lclNum);
+            const ABIPassingInformation& abiInfo = lvaParameterPassingInfo[lclNum];
+
+            if (abiInfo.HasExactlyOneStackSegment())
+            {
+                dsc->SetStackOffset(abiInfo.Segments[0].GetStackOffset());
+            }
+        }
+        return;
+    }
+#endif
+
     /* Is there a "this" argument? */
 
     if (!info.compIsStatic)
@@ -5487,7 +5919,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
             argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs);
         }
 #elif !defined(UNIX_AMD64_ABI)
-        argOffs              = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs);
+        argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs);
 #endif // TARGET_X86
         lclNum++;
         userArgsToSkip++;
@@ -5509,7 +5941,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
     //@GENERICS: extra argument for instantiation info
     if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
     {
-        noway_assert(lclNum == (unsigned)info.compTypeCtxtArg);
+        noway_assert(lclNum == info.compTypeCtxtArg);
         argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, REGSIZE_BYTES,
                                                    argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
     }
@@ -5569,8 +6001,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
     {
         if (lvaIsPreSpilled(preSpillLclNum, preSpillMask))
         {
-            unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
-            argOffs          = lvaAssignVirtualFrameOffsetToArg(preSpillLclNum, argSize, argOffs);
+            CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
+            CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &argClass));
+            unsigned    argSize    = eeGetArgSize(argTypeJit, argClass);
+            argOffs                = lvaAssignVirtualFrameOffsetToArg(preSpillLclNum, argSize, argOffs);
             argLcls++;
 
             // Early out if we can. If size is 8 and base reg is 2, then the mask is 0x1100
@@ -5592,7 +6026,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
     {
         if (!lvaIsPreSpilled(stkLclNum, preSpillMask))
         {
-            const unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
+            CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
+            CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &argClass));
+            const unsigned argSize = eeGetArgSize(argTypeJit, argClass);
             argOffs                = lvaAssignVirtualFrameOffsetToArg(stkLclNum, argSize, argOffs);
             argLcls++;
         }
@@ -5603,7 +6039,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
 #else  // !TARGET_ARM
     for (unsigned i = 0; i < argSigLen; i++)
     {
-        unsigned argumentSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
+        CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
+        CorInfoType argTypeJit   = strip(info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &argClass));
+        unsigned    argumentSize = eeGetArgSize(argTypeJit, argClass);
 
         assert(compAppleArm64Abi() || argumentSize % TARGET_POINTER_SIZE == 0);
 
@@ -5618,7 +6056,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
     //@GENERICS: extra argument for instantiation info
     if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
     {
-        noway_assert(lclNum == (unsigned)info.compTypeCtxtArg);
+        noway_assert(lclNum == info.compTypeCtxtArg);
         argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, REGSIZE_BYTES,
                                                    argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
     }
@@ -5642,8 +6080,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
 //        ret address slot, stack frame padding, alloca instructions, etc.
 //  Note: This is the implementation for UNIX_AMD64 System V platforms.
 //
-int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
-                                               unsigned argSize,
+int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned    lclNum,
+                                               unsigned    argSize,
                                                int argOffs UNIX_AMD64_ABI_ONLY_ARG(int* callerArgOffset))
 {
     noway_assert(lclNum < info.compArgsCount);
@@ -5734,8 +6172,8 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
 //        The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existence,
 //        ret address slot, stack frame padding, alloca instructions, etc.
 //  Note: This implementation for all the platforms but UNIX_AMD64 OSs (System V 64 bit.)
-int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
-                                               unsigned argSize,
+int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned    lclNum,
+                                               unsigned    argSize,
                                                int argOffs UNIX_AMD64_ABI_ONLY_ARG(int* callerArgOffset))
 {
     noway_assert(lclNum < info.compArgsCount);
@@ -5756,7 +6194,6 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
     {
         /* Argument is passed in a register, don't count it
          * when updating the current offset on the stack */
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
 #if DEBUG
@@ -5943,7 +6380,6 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
             // r3    int             a2 --> pushed (not pre-spilled) for alignment of a0 by lvaInitUserArgs.
             // r2    struct { int }  a1
             // r0-r1 struct { long } a0
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef PROFILING_SUPPORTED
             // On Arm under profiler, r0-r3 are always prespilled on stack.
@@ -5963,8 +6399,8 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
                             (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST));
 
                 noway_assert(cond);
-                noway_assert(sizeofPreSpillRegArgs <=
-                             argOffs + TARGET_POINTER_SIZE); // at most one register of alignment
+                noway_assert(sizeofPreSpillRegArgs <= argOffs + TARGET_POINTER_SIZE); // at most one register of
+                                                                                      // alignment
             }
             argOffs = sizeofPreSpillRegArgs;
         }
@@ -6007,7 +6443,6 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
     // For struct promoted parameters we need to set the offsets for both LclVars.
     //
     // For a dependent promoted struct we also assign the struct fields stack offset
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
     if (varDsc->lvPromoted)
     {
@@ -6135,8 +6570,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
         stkOffs -= initialStkOffs;
     }
 
-    if (codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() ||
-        !isFramePointerUsed()) // Note that currently we always have a frame pointer
+    if (codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() || !isFramePointerUsed()) // Note that currently we always have
+                                                                                   // a frame pointer
     {
         stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
     }
@@ -6274,7 +6709,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
         }
     }
 
-#if defined(FEATURE_EH_FUNCLETS) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64))
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
     if (lvaPSPSym != BAD_VAR_NUM)
     {
         // On ARM/ARM64, if we need a PSPSym we allocate it early since funclets
@@ -6283,7 +6718,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
         noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
         stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
     }
-#endif // FEATURE_EH_FUNCLETS && (TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64)
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64
 
     if (mustDoubleAlign)
     {
@@ -6378,9 +6813,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     }
 #endif
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
     /* If we need space for slots for shadow SP, reserve it now */
-    if (ehNeedsShadowSPslots())
+    if (!UsesFunclets() && ehNeedsShadowSPslots())
     {
         noway_assert(codeGen->isFramePointerUsed()); // else offsets of locals of frameless methods will be incorrect
         if (!lvaReportParamTypeArg())
@@ -6397,7 +6832,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
         }
         stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaShadowSPslotsVar, lvaLclSize(lvaShadowSPslotsVar), stkOffs);
     }
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 
     if (compGSReorderStackLayout)
     {
@@ -6598,12 +7033,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
 
             // These need to be located as the very first variables (highest memory address)
             // and so they have already been assigned an offset
-            if (
-#if defined(FEATURE_EH_FUNCLETS)
-                lclNum == lvaPSPSym ||
-#else
+            if (lclNum == lvaPSPSym ||
+#if defined(FEATURE_EH_WINDOWS_X86)
                 lclNum == lvaShadowSPslotsVar ||
-#endif // FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
 #ifdef JIT32_GCENCODER
                 lclNum == lvaLocAllocSPvar ||
 #endif // JIT32_GCENCODER
@@ -6636,45 +7069,21 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
 
             if (varDsc->lvIsParam)
             {
-#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
-
-                // On Windows AMD64 we can use the caller-reserved stack area that is already setup
-                assert(varDsc->GetStackOffset() != BAD_STK_OFFS);
-                continue;
-
-#else // !TARGET_AMD64
-
-                //  A register argument that is not enregistered ends up as
-                //  a local variable which will need stack frame space.
-                //
-                if (!varDsc->lvIsRegArg)
-                {
-                    continue;
-                }
-
 #ifdef TARGET_ARM64
-                if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum())
+                if (info.compIsVarArgs && varDsc->lvIsRegArg &&
+                    (varDsc->GetArgReg() != theFixedRetBuffReg(info.compCallConv)))
                 {
                     // Stack offset to varargs (parameters) should point to home area which will be preallocated.
-                    const unsigned regArgNum = genMapIntRegNumToRegArgNum(varDsc->GetArgReg());
+                    const unsigned regArgNum = genMapIntRegNumToRegArgNum(varDsc->GetArgReg(), info.compCallConv);
                     varDsc->SetStackOffset(-initialStkOffs + regArgNum * REGSIZE_BYTES);
                     continue;
                 }
-
 #endif
 
-#ifdef TARGET_ARM
-                // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg
-                // in the prolog, thus they don't need stack frame space.
-                //
-                if ((codeGen->regSet.rsMaskPreSpillRegs(false) & genRegMask(varDsc->GetArgReg())) != 0)
+                if (!lvaParamHasLocalStackSpace(lclNum))
                 {
-                    assert(varDsc->GetStackOffset() != BAD_STK_OFFS);
                     continue;
                 }
-#endif
-
-#endif // !TARGET_AMD64
             }
 
             /* Make sure the type is appropriate */
@@ -6864,7 +7273,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
         }
     }
 
-#if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_AMD64)
+#if defined(TARGET_AMD64)
     if (lvaPSPSym != BAD_VAR_NUM)
     {
         // On AMD64, if we need a PSPSym, allocate it last, immediately above the outgoing argument
@@ -6873,11 +7282,11 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
         noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
         stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
     }
-#endif // FEATURE_EH_FUNCLETS && defined(TARGET_AMD64)
+#endif // TARGET_AMD64
 
 #ifdef TARGET_ARM64
-    if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() &&
-        isFramePointerUsed()) // Note that currently we always have a frame pointer
+    if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() && isFramePointerUsed()) // Note that currently we always have
+                                                                                   // a frame pointer
     {
         // Create space for saving FP and LR.
         stkOffs -= 2 * REGSIZE_BYTES;
@@ -6928,6 +7337,58 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
                  (unsigned)-(stkOffs + (pushedCount * (int)TARGET_POINTER_SIZE)));
 }
 
+//------------------------------------------------------------------------
+// lvaParamHasLocalStackSpace: Check if a local that represents a parameter has
+// space allocated for it in the local stack frame.
+//
+// Arguments:
+//   lclNum - the variable number
+//
+// Return Value:
+//   true if the local does not have reusable stack space created by the caller
+//   already.
+//
+bool Compiler::lvaParamHasLocalStackSpace(unsigned lclNum)
+{
+    LclVarDsc* varDsc = lvaGetDesc(lclNum);
+
+#ifdef SWIFT_SUPPORT
+    if ((info.compCallConv == CorInfoCallConvExtension::Swift) && !lvaIsImplicitByRefLocal(lclNum) &&
+        !lvaParameterPassingInfo[lclNum].HasExactlyOneStackSegment())
+    {
+        return true;
+    }
+#endif
+
+#if defined(WINDOWS_AMD64_ABI)
+    // On Windows AMD64 we can use the caller-reserved stack area that is already setup
+    return false;
+#else // !WINDOWS_AMD64_ABI
+
+    //  A register argument that is not enregistered ends up as
+    //  a local variable which will need stack frame space.
+    //
+    if (!varDsc->lvIsRegArg)
+    {
+        return false;
+    }
+
+#ifdef TARGET_ARM
+    // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg
+    // in the prolog, thus they don't need stack frame space.
+    //
+    if ((codeGen->regSet.rsMaskPreSpillRegs(false) & genRegMask(varDsc->GetArgReg())) != 0)
+    {
+        assert(varDsc->GetStackOffset() != BAD_STK_OFFS);
+        return false;
+    }
+#endif
+
+#endif // !WINDOWS_AMD64_ABI
+
+    return true;
+}
+
 int Compiler::lvaAllocLocalAndSetVirtualOffset(unsigned lclNum, unsigned size, int stkOffs)
 {
     noway_assert(lclNum != BAD_VAR_NUM);
@@ -7074,7 +7535,6 @@ void Compiler::lvaAlignFrame()
 
     // If this isn't the final frame layout, assume we have to push an extra QWORD
     // Just so the offsets are true upper limits.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef UNIX_AMD64_ABI
     // The compNeedToAlignFrame flag  is indicating if there is a need to align the frame.
@@ -7162,9 +7622,9 @@ void Compiler::lvaAlignFrame()
         }
 
         // Align the stack with STACK_ALIGN value.
-        int  adjustFrameSize = compLclFrameSize;
+        int adjustFrameSize = compLclFrameSize;
 #if defined(UNIX_X86_ABI)
-        bool isEbpPushed     = codeGen->isFramePointerUsed();
+        bool isEbpPushed = codeGen->isFramePointerUsed();
 #if DOUBLE_ALIGN
         isEbpPushed |= genDoubleAlign();
 #endif
@@ -7196,8 +7656,6 @@ void Compiler::lvaAssignFrameOffsetsToPromotedStructs()
         // assign their offsets in lvaAssignVirtualFrameOffsetToArg().
         // This is not true for the System V systems since there is no
         // outgoing args space. Assign the dependently promoted fields properly.
-        //
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM) || defined(TARGET_X86)
         // ARM: lo/hi parts of a promoted long arg need to be updated.
@@ -7210,9 +7668,9 @@ void Compiler::lvaAssignFrameOffsetsToPromotedStructs()
         //
         const bool mustProcessParams = true;
 #else
-        // OSR must also assign offsets here.
+        // OSR/Swift must also assign offsets here.
         //
-        const bool mustProcessParams = opts.IsOSR();
+        const bool mustProcessParams = opts.IsOSR() || (info.compCallConv == CorInfoCallConvExtension::Swift);
 #endif // defined(UNIX_AMD64_ABI) || defined(TARGET_ARM) || defined(TARGET_X86)
 
         if (varDsc->lvIsStructField && (!varDsc->lvIsParam || mustProcessParams))
@@ -7273,7 +7731,6 @@ int Compiler::lvaAllocateTemps(int stkOffs, bool mustDoubleAlign)
             /* Figure out and record the stack offset of the temp */
 
             /* Need to align the offset? */
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef TARGET_64BIT
             if (varTypeIsGC(tempType) && ((stkOffs % TARGET_POINTER_SIZE) != 0))
@@ -7655,9 +8112,9 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r
 }
 
 /*****************************************************************************
-*
-*  dump the lvaTable
-*/
+ *
+ *  dump the lvaTable
+ */
 
 void Compiler::lvaTableDump(FrameLayoutState curState)
 {
@@ -7838,7 +8295,7 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState)
 //
 // Return Value:
 //    The offset.
-
+//
 int Compiler::lvaGetSPRelativeOffset(unsigned varNum)
 {
     assert(!compLocallocUsed);
diff --git a/src/coreclr/jit/likelyclass.cpp b/src/coreclr/jit/likelyclass.cpp
index fa0839725c9f..e181a2e9a135 100644
--- a/src/coreclr/jit/likelyclass.cpp
+++ b/src/coreclr/jit/likelyclass.cpp
@@ -255,8 +255,8 @@ static unsigned getLikelyClassesOrMethods(LikelyClassMethodRecord*
                     jitstd::sort(sortedEntries, sortedEntries + knownHandles,
                                  [](const LikelyClassMethodHistogramEntry& h1,
                                     const LikelyClassMethodHistogramEntry& h2) -> bool {
-                                     return h1.m_count > h2.m_count;
-                                 });
+                        return h1.m_count > h2.m_count;
+                    });
 
                     const UINT32 numberOfClasses = min(knownHandles, maxLikelyClasses);
 
@@ -410,7 +410,9 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyValues(LikelyValueRecord*
             // sort by m_count (descending)
             jitstd::sort(sortedEntries, sortedEntries + h.countHistogramElements,
                          [](const LikelyClassMethodHistogramEntry& h1,
-                            const LikelyClassMethodHistogramEntry& h2) -> bool { return h1.m_count > h2.m_count; });
+                            const LikelyClassMethodHistogramEntry& h2) -> bool {
+                return h1.m_count > h2.m_count;
+            });
 
             const UINT32 numberOfLikelyConst = min(h.countHistogramElements, maxLikelyValues);
 
diff --git a/src/coreclr/jit/lir.cpp b/src/coreclr/jit/lir.cpp
index ce2b9a97f495..a401bbb7dc3e 100644
--- a/src/coreclr/jit/lir.cpp
+++ b/src/coreclr/jit/lir.cpp
@@ -9,7 +9,10 @@
 #pragma hdrstop
 #endif
 
-LIR::Use::Use() : m_range(nullptr), m_edge(nullptr), m_user(nullptr)
+LIR::Use::Use()
+    : m_range(nullptr)
+    , m_edge(nullptr)
+    , m_user(nullptr)
 {
 }
 
@@ -30,7 +33,10 @@ LIR::Use::Use(const Use& other)
 //
 // Return Value:
 //
-LIR::Use::Use(Range& range, GenTree** edge, GenTree* user) : m_range(&range), m_edge(edge), m_user(user)
+LIR::Use::Use(Range& range, GenTree** edge, GenTree* user)
+    : m_range(&range)
+    , m_edge(edge)
+    , m_user(user)
 {
     AssertIsValid();
 }
@@ -280,11 +286,15 @@ unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned lclNum, GenTre
     return lclNum;
 }
 
-LIR::ReadOnlyRange::ReadOnlyRange() : m_firstNode(nullptr), m_lastNode(nullptr)
+LIR::ReadOnlyRange::ReadOnlyRange()
+    : m_firstNode(nullptr)
+    , m_lastNode(nullptr)
 {
 }
 
-LIR::ReadOnlyRange::ReadOnlyRange(ReadOnlyRange&& other) : m_firstNode(other.m_firstNode), m_lastNode(other.m_lastNode)
+LIR::ReadOnlyRange::ReadOnlyRange(ReadOnlyRange&& other)
+    : m_firstNode(other.m_firstNode)
+    , m_lastNode(other.m_lastNode)
 {
 #ifdef DEBUG
     other.m_firstNode = nullptr;
@@ -301,7 +311,9 @@ LIR::ReadOnlyRange::ReadOnlyRange(ReadOnlyRange&& other) : m_firstNode(other.m_f
 //    firstNode - The first node in the range.
 //    lastNode  - The last node in the range.
 //
-LIR::ReadOnlyRange::ReadOnlyRange(GenTree* firstNode, GenTree* lastNode) : m_firstNode(firstNode), m_lastNode(lastNode)
+LIR::ReadOnlyRange::ReadOnlyRange(GenTree* firstNode, GenTree* lastNode)
+    : m_firstNode(firstNode)
+    , m_lastNode(lastNode)
 {
     assert((m_firstNode == nullptr) == (m_lastNode == nullptr));
     assert((m_firstNode == m_lastNode) || (Contains(m_lastNode)));
@@ -426,11 +438,13 @@ bool LIR::ReadOnlyRange::Contains(GenTree* node) const
 
 #endif
 
-LIR::Range::Range() : ReadOnlyRange()
+LIR::Range::Range()
+    : ReadOnlyRange()
 {
 }
 
-LIR::Range::Range(Range&& other) : ReadOnlyRange(std::move(other))
+LIR::Range::Range(Range&& other)
+    : ReadOnlyRange(std::move(other))
 {
 }
 
@@ -442,7 +456,8 @@ LIR::Range::Range(Range&& other) : ReadOnlyRange(std::move(other))
 //    firstNode - The first node in the range.
 //    lastNode  - The last node in the range.
 //
-LIR::Range::Range(GenTree* firstNode, GenTree* lastNode) : ReadOnlyRange(firstNode, lastNode)
+LIR::Range::Range(GenTree* firstNode, GenTree* lastNode)
+    : ReadOnlyRange(firstNode, lastNode)
 {
 }
 
@@ -1218,7 +1233,7 @@ bool LIR::Range::TryGetUse(GenTree* node, Use* use)
 // Returns:
 //    The computed subrange.
 //
-template <bool     markFlagsOperands>
+template <bool markFlagsOperands>
 LIR::ReadOnlyRange LIR::Range::GetMarkedRange(unsigned  markCount,
                                               GenTree*  start,
                                               bool*     isClosed,
@@ -1438,8 +1453,8 @@ class CheckLclVarSemanticsHelper
     //    range - a range to do the check.
     //    unusedDefs - map of defs that do no have users.
     //
-    CheckLclVarSemanticsHelper(Compiler*         compiler,
-                               const LIR::Range* range,
+    CheckLclVarSemanticsHelper(Compiler*                            compiler,
+                               const LIR::Range*                    range,
                                SmallHashTable<GenTree*, bool, 32U>& unusedDefs)
         : compiler(compiler)
         , range(range)
@@ -1586,7 +1601,7 @@ class CheckLclVarSemanticsHelper
     void PopLclVarRead(const AliasSet::NodeInfo& defInfo)
     {
         SmallHashTable<GenTree*, GenTree*>* reads;
-        const bool foundReads = unusedLclVarReads.TryGetValue(defInfo.LclNum(), &reads);
+        const bool                          foundReads = unusedLclVarReads.TryGetValue(defInfo.LclNum(), &reads);
         assert(foundReads);
 
         bool found = reads->TryRemove(defInfo.Node());
@@ -1601,11 +1616,11 @@ class CheckLclVarSemanticsHelper
     }
 
 private:
-    Compiler*         compiler;
-    const LIR::Range* range;
-    SmallHashTable<GenTree*, bool, 32U>& unusedDefs;
+    Compiler*                                                     compiler;
+    const LIR::Range*                                             range;
+    SmallHashTable<GenTree*, bool, 32U>&                          unusedDefs;
     SmallHashTable<int, SmallHashTable<GenTree*, GenTree*>*, 16U> unusedLclVarReads;
-    ArrayStack<SmallHashTable<GenTree*, GenTree*>*> lclVarReadsMapsCache;
+    ArrayStack<SmallHashTable<GenTree*, GenTree*>*>               lclVarReadsMapsCache;
 };
 
 //------------------------------------------------------------------------
diff --git a/src/coreclr/jit/lir.h b/src/coreclr/jit/lir.h
index 9605f11669ea..76423427884a 100644
--- a/src/coreclr/jit/lir.h
+++ b/src/coreclr/jit/lir.h
@@ -73,7 +73,7 @@ class LIR final
         void AssertIsValid() const;
         bool IsDummyUse() const;
 
-        void ReplaceWith(GenTree* replacement);
+        void     ReplaceWith(GenTree* replacement);
         unsigned ReplaceWithLclVar(Compiler* compiler, unsigned lclNum = BAD_VAR_NUM, GenTree** pStore = nullptr);
     };
 
@@ -113,7 +113,7 @@ class LIR final
         GenTree* m_firstNode;
         GenTree* m_lastNode;
 
-        ReadOnlyRange(const ReadOnlyRange& other) = delete;
+        ReadOnlyRange(const ReadOnlyRange& other)            = delete;
         ReadOnlyRange& operator=(const ReadOnlyRange& other) = delete;
 
     public:
@@ -125,12 +125,14 @@ class LIR final
 
             GenTree* m_node;
 
-            Iterator(GenTree* begin) : m_node(begin)
+            Iterator(GenTree* begin)
+                : m_node(begin)
             {
             }
 
         public:
-            Iterator() : m_node(nullptr)
+            Iterator()
+                : m_node(nullptr)
             {
             }
 
@@ -167,12 +169,14 @@ class LIR final
 
             GenTree* m_node;
 
-            ReverseIterator(GenTree* begin) : m_node(begin)
+            ReverseIterator(GenTree* begin)
+                : m_node(begin)
             {
             }
 
         public:
-            ReverseIterator() : m_node(nullptr)
+            ReverseIterator()
+                : m_node(nullptr)
             {
             }
 
@@ -245,7 +249,7 @@ class LIR final
     private:
         Range(GenTree* firstNode, GenTree* lastNode);
 
-        Range(const Range& other) = delete;
+        Range(const Range& other)            = delete;
         Range& operator=(const Range& other) = delete;
 
         template <bool markFlagsOperands = false>
@@ -282,7 +286,7 @@ class LIR final
         void InsertAtBeginning(Range&& range);
         void InsertAtEnd(Range&& range);
 
-        void Remove(GenTree* node, bool markOperandsUnused = false);
+        void  Remove(GenTree* node, bool markOperandsUnused = false);
         Range Remove(GenTree* firstNode, GenTree* lastNode);
         Range Remove(ReadOnlyRange&& range);
 
@@ -305,7 +309,7 @@ class LIR final
     };
 
 public:
-    static Range& AsRange(BasicBlock* block);
+    static Range&       AsRange(BasicBlock* block);
     static const Range& AsRange(const BasicBlock* block);
 
     static Range EmptyRange();
diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp
index 49cd663f5d99..060cf4aeb588 100644
--- a/src/coreclr/jit/liveness.cpp
+++ b/src/coreclr/jit/liveness.cpp
@@ -249,7 +249,6 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree)
 
         case GT_STOREIND:
         case GT_STORE_BLK:
-        case GT_STORE_DYN_BLK:
         case GT_MEMORYBARRIER: // Similar to Volatile indirections, we must handle this as a memory def.
             fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
             break;
@@ -503,7 +502,6 @@ void Compiler::fgPerBlockLocalVarLiveness()
                 // 32-bit targets always pop the frame in the epilog.
                 // For 64-bit targets, we only do this in the epilog for IL stubs;
                 // for non-IL stubs the frame is popped after every PInvoke call.
-                CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef TARGET_64BIT
                 if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
 #endif
@@ -668,8 +666,6 @@ void Compiler::fgDispDebugScopes()
  * Mark variables live across their entire scope.
  */
 
-#if defined(FEATURE_EH_FUNCLETS)
-
 void Compiler::fgExtendDbgScopes()
 {
     compResetScopeLists();
@@ -678,124 +674,110 @@ void Compiler::fgExtendDbgScopes()
     if (verbose)
     {
         printf("\nMarking vars alive over their entire scope :\n\n");
-    }
-
-    if (verbose)
-    {
         compDispScopeLists();
     }
 #endif // DEBUG
 
     VARSET_TP inScope(VarSetOps::MakeEmpty(this));
 
-    // Mark all tracked LocalVars live over their scope - walk the blocks
-    // keeping track of the current life, and assign it to the blocks.
-
-    for (BasicBlock* const block : Blocks())
+    if (UsesFunclets())
     {
-        // If we get to a funclet, reset the scope lists and start again, since the block
-        // offsets will be out of order compared to the previous block.
+        // Mark all tracked LocalVars live over their scope - walk the blocks
+        // keeping track of the current life, and assign it to the blocks.
 
-        if (block->HasFlag(BBF_FUNCLET_BEG))
+        for (BasicBlock* const block : Blocks())
         {
-            compResetScopeLists();
-            VarSetOps::ClearD(this, inScope);
-        }
-
-        // Process all scopes up to the current offset
+            // If we get to a funclet, reset the scope lists and start again, since the block
+            // offsets will be out of order compared to the previous block.
 
-        if (block->bbCodeOffs != BAD_IL_OFFSET)
-        {
-            compProcessScopesUntil(block->bbCodeOffs, &inScope, &Compiler::fgBeginScopeLife, &Compiler::fgEndScopeLife);
-        }
-
-        // Assign the current set of variables that are in scope to the block variables tracking this.
+            if (block->HasFlag(BBF_FUNCLET_BEG))
+            {
+                compResetScopeLists();
+                VarSetOps::ClearD(this, inScope);
+            }
 
-        fgMarkInScope(block, inScope);
-    }
+            // Process all scopes up to the current offset
 
-#ifdef DEBUG
-    if (verbose)
-    {
-        fgDispDebugScopes();
-    }
-#endif // DEBUG
-}
+            if (block->bbCodeOffs != BAD_IL_OFFSET)
+            {
+                compProcessScopesUntil(block->bbCodeOffs, &inScope, &Compiler::fgBeginScopeLife,
+                                       &Compiler::fgEndScopeLife);
+            }
 
-#else // !FEATURE_EH_FUNCLETS
+            // Assign the current set of variables that are in scope to the block variables tracking this.
 
-void Compiler::fgExtendDbgScopes()
-{
-    compResetScopeLists();
+            fgMarkInScope(block, inScope);
+        }
 
 #ifdef DEBUG
-    if (verbose)
-    {
-        printf("\nMarking vars alive over their entire scope :\n\n");
-        compDispScopeLists();
-    }
+        if (verbose)
+        {
+            fgDispDebugScopes();
+        }
 #endif // DEBUG
+    }
+#if defined(FEATURE_EH_WINDOWS_X86)
+    else
+    {
+        compProcessScopesUntil(0, &inScope, &Compiler::fgBeginScopeLife, &Compiler::fgEndScopeLife);
 
-    VARSET_TP inScope(VarSetOps::MakeEmpty(this));
-    compProcessScopesUntil(0, &inScope, &Compiler::fgBeginScopeLife, &Compiler::fgEndScopeLife);
-
-    IL_OFFSET lastEndOffs = 0;
+        IL_OFFSET lastEndOffs = 0;
 
-    // Mark all tracked LocalVars live over their scope - walk the blocks
-    // keeping track of the current life, and assign it to the blocks.
+        // Mark all tracked LocalVars live over their scope - walk the blocks
+        // keeping track of the current life, and assign it to the blocks.
 
-    for (BasicBlock* const block : Blocks())
-    {
-        // Find scopes becoming alive. If there is a gap in the instr
-        // sequence, we need to process any scopes on those missing offsets.
-
-        if (block->bbCodeOffs != BAD_IL_OFFSET)
+        for (BasicBlock* const block : Blocks())
         {
-            if (lastEndOffs != block->bbCodeOffs)
-            {
-                noway_assert(lastEndOffs < block->bbCodeOffs);
+            // Find scopes becoming alive. If there is a gap in the instr
+            // sequence, we need to process any scopes on those missing offsets.
 
-                compProcessScopesUntil(block->bbCodeOffs, &inScope, &Compiler::fgBeginScopeLife,
-                                       &Compiler::fgEndScopeLife);
-            }
-            else
+            if (block->bbCodeOffs != BAD_IL_OFFSET)
             {
-                while (VarScopeDsc* varScope = compGetNextEnterScope(block->bbCodeOffs))
+                if (lastEndOffs != block->bbCodeOffs)
+                {
+                    noway_assert(lastEndOffs < block->bbCodeOffs);
+
+                    compProcessScopesUntil(block->bbCodeOffs, &inScope, &Compiler::fgBeginScopeLife,
+                                           &Compiler::fgEndScopeLife);
+                }
+                else
                 {
-                    fgBeginScopeLife(&inScope, varScope);
+                    while (VarScopeDsc* varScope = compGetNextEnterScope(block->bbCodeOffs))
+                    {
+                        fgBeginScopeLife(&inScope, varScope);
+                    }
                 }
             }
-        }
 
-        // Assign the current set of variables that are in scope to the block variables tracking this.
+            // Assign the current set of variables that are in scope to the block variables tracking this.
 
-        fgMarkInScope(block, inScope);
+            fgMarkInScope(block, inScope);
 
-        // Find scopes going dead.
+            // Find scopes going dead.
 
-        if (block->bbCodeOffsEnd != BAD_IL_OFFSET)
-        {
-            VarScopeDsc* varScope;
-            while ((varScope = compGetNextExitScope(block->bbCodeOffsEnd)) != nullptr)
+            if (block->bbCodeOffsEnd != BAD_IL_OFFSET)
             {
-                fgEndScopeLife(&inScope, varScope);
-            }
+                VarScopeDsc* varScope;
+                while ((varScope = compGetNextExitScope(block->bbCodeOffsEnd)) != nullptr)
+                {
+                    fgEndScopeLife(&inScope, varScope);
+                }
 
-            lastEndOffs = block->bbCodeOffsEnd;
+                lastEndOffs = block->bbCodeOffsEnd;
+            }
         }
-    }
 
-    /* Everything should be out of scope by the end of the method. But if the
-       last BB got removed, then inScope may not be empty. */
+        /* Everything should be out of scope by the end of the method. But if the
+        last BB got removed, then inScope may not be empty. */
 
-    noway_assert(VarSetOps::IsEmpty(this, inScope) || lastEndOffs < info.compILCodeSize);
+        noway_assert(VarSetOps::IsEmpty(this, inScope) || lastEndOffs < info.compILCodeSize);
+    }
+#endif // FEATURE_EH_WINDOWS_X86
 }
 
-#endif // !FEATURE_EH_FUNCLETS
-
 /*****************************************************************************
  *
- * For debuggable code, we allow redundant assignments to vars
+ * For debuggable code, we allow redundant stores to vars
  * by marking them live over their entire scope.
  */
 
@@ -816,10 +798,10 @@ void Compiler::fgExtendDbgLifetimes()
 
     fgExtendDbgScopes();
 
-/*-------------------------------------------------------------------------
- * Partly update liveness info so that we handle any funky BBF_INTERNAL
- * blocks inserted out of sequence.
- */
+    /*-------------------------------------------------------------------------
+     * Partly update liveness info so that we handle any funky BBF_INTERNAL
+     * blocks inserted out of sequence.
+     */
 
 #ifdef DEBUG
     if (verbose && 0)
@@ -1012,7 +994,7 @@ void Compiler::fgExtendDbgLifetimes()
     //   So just ensure that they don't have a 0 ref cnt
 
     unsigned lclNum = 0;
-    for (LclVarDsc *varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    for (LclVarDsc* varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
     {
         if (lclNum >= info.compArgsCount)
         {
@@ -1683,10 +1665,10 @@ GenTree* Compiler::fgTryRemoveDeadStoreEarly(Statement* stmt, GenTreeLclVarCommo
  * or subtree of a statement moving backward from startNode to endNode
  */
 
-void Compiler::fgComputeLife(VARSET_TP&       life,
-                             GenTree*         startNode,
-                             GenTree*         endNode,
-                             VARSET_VALARG_TP volatileVars,
+void Compiler::fgComputeLife(VARSET_TP&           life,
+                             GenTree*             startNode,
+                             GenTree*             endNode,
+                             VARSET_VALARG_TP     volatileVars,
                              bool* pStmtInfoDirty DEBUGARG(bool* treeModf))
 {
     // Don't kill vars in scope
@@ -1905,13 +1887,13 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
 
                 if (isDeadStore && fgTryRemoveDeadStoreLIR(node, lclVarNode, block))
                 {
-                    GenTree* data = lclVarNode->Data();
-                    data->SetUnusedValue();
+                    GenTree* value = lclVarNode->Data();
+                    value->SetUnusedValue();
 
 #ifndef TARGET_WASM
-                    if (data->isIndir())
+                    if (value->isIndir())
                     {
-                        Lowering::TransformUnusedIndirection(data->AsIndir(), this, block);
+                        Lowering::TransformUnusedIndirection(value->AsIndir(), this, block);
                     }
 #endif // TARGET_WASM
                 }
@@ -1947,7 +1929,6 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
             case GT_STOREIND:
             case GT_BOUNDS_CHECK:
             case GT_STORE_BLK:
-            case GT_STORE_DYN_BLK:
             case GT_JCMP:
             case GT_JTEST:
             case GT_JCC:
@@ -1958,9 +1939,9 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
             case GT_START_NONGC:
             case GT_START_PREEMPTGC:
             case GT_PROF_HOOK:
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
             case GT_END_LFIN:
-#endif // !FEATURE_EH_FUNCLETS
+#endif // FEATURE_EH_WINDOWS_X86
             case GT_SWITCH_TABLE:
             case GT_PINVOKE_PROLOG:
             case GT_PINVOKE_EPILOG:
@@ -2055,7 +2036,7 @@ bool Compiler::fgTryRemoveNonLocal(GenTree* node, LIR::Range* blockRange)
     {
         // We are only interested in avoiding the removal of nodes with direct side effects
         // (as opposed to side effects of their children).
-        // This default case should never include calls or assignments.
+        // This default case should never include calls or stores.
         assert(!node->OperRequiresAsgFlag() && !node->OperIs(GT_CALL));
         if (!node->gtSetFlags() && !node->OperMayThrow(this))
         {
@@ -2130,11 +2111,11 @@ bool Compiler::fgTryRemoveDeadStoreLIR(GenTree* store, GenTreeLclVarCommon* lclN
 // Return Value:
 //   true if we should skip the rest of the statement, false if we should continue
 //
-bool Compiler::fgRemoveDeadStore(GenTree**        pTree,
-                                 LclVarDsc*       varDsc,
-                                 VARSET_VALARG_TP life,
-                                 bool*            doAgain,
-                                 bool*            pStmtInfoDirty,
+bool Compiler::fgRemoveDeadStore(GenTree**           pTree,
+                                 LclVarDsc*          varDsc,
+                                 VARSET_VALARG_TP    life,
+                                 bool*               doAgain,
+                                 bool*               pStmtInfoDirty,
                                  bool* pStoreRemoved DEBUGARG(bool* treeModf))
 {
     assert(!compRationalIRForm);
@@ -2156,11 +2137,11 @@ bool Compiler::fgRemoveDeadStore(GenTree**        pTree,
     *pStoreRemoved = true;
 
     GenTreeLclVarCommon* store = tree->AsLclVarCommon();
-    GenTree*             data  = store->Data();
+    GenTree*             value = store->Data();
 
     // Check for side effects.
     GenTree* sideEffList = nullptr;
-    if ((data->gtFlags & GTF_SIDE_EFFECT) != 0)
+    if ((value->gtFlags & GTF_SIDE_EFFECT) != 0)
     {
 #ifdef DEBUG
         if (verbose)
@@ -2171,7 +2152,7 @@ bool Compiler::fgRemoveDeadStore(GenTree**        pTree,
         }
 #endif // DEBUG
 
-        gtExtractSideEffList(data, &sideEffList);
+        gtExtractSideEffList(value, &sideEffList);
     }
 
     // Test for interior statement
@@ -2200,7 +2181,7 @@ bool Compiler::fgRemoveDeadStore(GenTree**        pTree,
 #ifdef DEBUG
             *treeModf = true;
 #endif // DEBUG
-            // Update ordering, costs, FP levels, etc.
+       // Update ordering, costs, FP levels, etc.
             gtSetStmtInfo(compCurStmt);
 
             // Re-link the nodes for this statement
@@ -2292,7 +2273,7 @@ bool Compiler::fgRemoveDeadStore(GenTree**        pTree,
                 printf("\n");
             }
 #endif // DEBUG
-            // No side effects - Change the store to a GT_NOP node
+       // No side effects - Change the store to a GT_NOP node
             store->gtBashToNOP();
 
 #ifdef DEBUG
diff --git a/src/coreclr/jit/llvm.cpp b/src/coreclr/jit/llvm.cpp
index 023c38267069..2218c8d35277 100644
--- a/src/coreclr/jit/llvm.cpp
+++ b/src/coreclr/jit/llvm.cpp
@@ -367,9 +367,9 @@ bool Llvm::helperCallMayPhysicallyThrow(CorInfoHelpFunc helperFunc) const
 
         // Implemented in "Runtime.Base\src\System\Runtime\TypeCast.cs".
         { FUNC(CORINFO_HELP_ARRADDR_ST) CORINFO_TYPE_VOID, { CORINFO_TYPE_CLASS, CORINFO_TYPE_NATIVEINT, CORINFO_TYPE_CLASS }, HFIF_SS_ARG },
-        { FUNC(CORINFO_HELP_LDELEMA_REF) CORINFO_TYPE_BYREF, { CORINFO_TYPE_CLASS, CORINFO_TYPE_NATIVEINT, CORINFO_TYPE_NATIVEINT }, HFIF_SS_ARG }, // Oddity: IntPtr used for MethodTable*.
+        { FUNC(CORINFO_HELP_LDELEMA_REF) CORINFO_TYPE_BYREF, { CORINFO_TYPE_CLASS, CORINFO_TYPE_NATIVEINT, CORINFO_TYPE_PTR }, HFIF_SS_ARG },
 
-        // Runtime exports implemented in "Runtime.Base\src\System\Runtime\ExcetionHandling.wasm.cs".
+        // Runtime exports implemented in "Runtime.Base\src\System\Runtime\ExceptionHandling.wasm.cs".
         { FUNC(CORINFO_HELP_THROW) CORINFO_TYPE_VOID, { CORINFO_TYPE_CLASS }, HFIF_SS_ARG },
         { FUNC(CORINFO_HELP_RETHROW) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR }, HFIF_SS_ARG },
 
@@ -481,9 +481,13 @@ bool Llvm::helperCallMayPhysicallyThrow(CorInfoHelpFunc helperFunc) const
         // Part of the inlined PInvoke frame construction feature which is NYI in NativeAOT.
         { FUNC(CORINFO_HELP_INIT_PINVOKE_FRAME) },
 
-        // Implemented as plain "memset"/"memcpy".
-        { FUNC(CORINFO_HELP_MEMSET) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR, CORINFO_TYPE_INT, CORINFO_TYPE_NATIVEUINT } },
-        { FUNC(CORINFO_HELP_MEMCPY) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR, CORINFO_TYPE_PTR, CORINFO_TYPE_NATIVEUINT } },
+        // Runtime exports implemented in "src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs".
+        { FUNC(CORINFO_HELP_MEMSET) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR, CORINFO_TYPE_BYTE, CORINFO_TYPE_NATIVEUINT }, HFIF_SS_ARG },
+        { FUNC(CORINFO_HELP_MEMZERO) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR, CORINFO_TYPE_NATIVEUINT }, HFIF_SS_ARG },
+        { FUNC(CORINFO_HELP_MEMCPY) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR, CORINFO_TYPE_PTR, CORINFO_TYPE_NATIVEUINT }, HFIF_SS_ARG },
+
+        // Implemented as plain "memset".
+        { FUNC(CORINFO_HELP_NATIVE_MEMSET) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR, CORINFO_TYPE_INT, CORINFO_TYPE_NATIVEUINT } },
 
         // Not used in NativeAOT.
         { FUNC(CORINFO_HELP_RUNTIMEHANDLE_METHOD) },
diff --git a/src/coreclr/jit/llvm.h b/src/coreclr/jit/llvm.h
index 97ad4ffcdc15..96900be26907 100644
--- a/src/coreclr/jit/llvm.h
+++ b/src/coreclr/jit/llvm.h
@@ -19,13 +19,19 @@
 // this breaks StringMap.h
 #undef NumItems
 
-#pragma warning (disable: 4702)
+// Remove these disables where possible and convert to push/pop elsewhere.
+// See https://github.com/dotnet/runtimelab/issues/2554
+#pragma warning(disable : 4146)
+#pragma warning(disable : 4242)
+#pragma warning(disable : 4244)
+#pragma warning(disable : 4267)
+#pragma warning(disable : 4459)
+#pragma warning(disable : 4702)
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/IR/IntrinsicsWebAssembly.h"
-#pragma warning (error: 4702)
 
 #include <unordered_map>
 
@@ -407,7 +413,6 @@ class Llvm
     void lowerRethrow(GenTreeCall* callNode);
     void lowerIndir(GenTreeIndir* indirNode);
     void lowerStoreBlk(GenTreeBlk* storeBlkNode);
-    void lowerStoreDynBlk(GenTreeStoreDynBlk* storeDynBlkNode);
     void lowerArrLength(GenTreeArrCommon* node);
     void lowerReturn(GenTreeUnOp* retNode);
 
@@ -529,7 +534,6 @@ class Llvm
     void buildBlk(GenTreeBlk* blkNode);
     void buildStoreInd(GenTreeStoreInd* storeIndOp);
     void buildStoreBlk(GenTreeBlk* blockOp);
-    void buildStoreDynBlk(GenTreeStoreDynBlk* blockOp);
     void buildUnaryOperation(GenTree* node);
     void buildBinaryOperation(GenTree* node);
     void buildShift(GenTreeOp* node);
diff --git a/src/coreclr/jit/llvmcodegen.cpp b/src/coreclr/jit/llvmcodegen.cpp
index ac64781272ca..c9b0d4ec487f 100644
--- a/src/coreclr/jit/llvmcodegen.cpp
+++ b/src/coreclr/jit/llvmcodegen.cpp
@@ -1080,9 +1080,6 @@ void Llvm::visitNode(GenTree* node)
         case GT_STORE_BLK:
             buildStoreBlk(node->AsBlk());
             break;
-        case GT_STORE_DYN_BLK:
-            buildStoreDynBlk(node->AsStoreDynBlk());
-            break;
         case GT_MUL:
         case GT_AND:
         case GT_OR:
@@ -1586,14 +1583,14 @@ void Llvm::buildCast(GenTreeCast* cast)
                 case TYP_SHORT:
                 case TYP_INT:
                 case TYP_LONG:
-                    castValue = _builder.CreateFPToSI(castFromValue, castToLlvmType);
+                    castValue = _builder.CreateIntrinsic(castToLlvmType, llvm::Intrinsic::fptosi_sat, castFromValue);
                     break;
 
                 case TYP_UBYTE:
                 case TYP_USHORT:
                 case TYP_UINT:
                 case TYP_ULONG:
-                    castValue = _builder.CreateFPToUI(castFromValue, castToLlvmType);
+                    castValue = _builder.CreateIntrinsic(castToLlvmType, llvm::Intrinsic::fptoui_sat, castFromValue);
                     break;
 
                 default:
@@ -1845,72 +1842,6 @@ void Llvm::buildStoreBlk(GenTreeBlk* blockOp)
     }
 }
 
-void Llvm::buildStoreDynBlk(GenTreeStoreDynBlk* blockOp)
-{
-    bool isCopyBlock = blockOp->OperIsCopyBlkOp();
-    GenTree* srcNode = blockOp->Data();
-    GenTree* sizeNode = blockOp->gtDynamicSize;
-
-    // STORE_DYN_BLK accepts native-sized size operands.
-    Type* sizeLlvmType = getIntPtrLlvmType();
-    Value* sizeValue = consumeValue(sizeNode, sizeLlvmType);
-
-    // STORE_DYN_BLK's contract is that it must not throw any exceptions in case the dynamic size is zero and must throw
-    // NRE otherwise.
-    bool dstAddrMayBeNull = (blockOp->gtFlags & GTF_IND_NONFAULTING) == 0;
-    bool srcAddrMayBeNull = isCopyBlock && ((srcNode->gtFlags & GTF_IND_NONFAULTING) == 0);
-    llvm::BasicBlock* checkSizeLlvmBlock = nullptr;
-    llvm::BasicBlock* nullChecksLlvmBlock = nullptr;
-
-    // TODO-LLVM-CQ: we should use CORINFO_HELP_MEMCPY/CORINFO_HELP_MEMSET here if we need to do the size check (it will
-    // result in smaller code). But currently we cannot because ILC maps these to native "memcpy/memset", which do not
-    // have the right semantics (don't throw NREs).
-    if (dstAddrMayBeNull || srcAddrMayBeNull)
-    {
-        //
-        // if (sizeIsZeroValue) goto PASSED; else goto CHECK_DST; (we'll add this below)
-        // CHECK_DST:
-        //   if (dst is null) Throw();
-        // CHECK_SRC:
-        //   if (src is null) Throw();
-        // COPY:
-        //   memcpy/memset
-        // PASSED:
-        //
-        checkSizeLlvmBlock = _builder.GetInsertBlock();
-        nullChecksLlvmBlock = createInlineLlvmBlock();
-        _builder.SetInsertPoint(nullChecksLlvmBlock);
-    }
-
-    // Technically cpblk/initblk specify that they expect their sources/destinations to be aligned, but in
-    // practice these instructions are used like memcpy/memset, which do not require this. So we do not try
-    // to be more precise with the alignment specification here as well.
-    // TODO-LLVM: volatile STORE_DYN_BLK.
-    Value* dstAddrValue = consumeAddressAndEmitNullCheck(blockOp);
-    if (isCopyBlock)
-    {
-        Value* srcAddrValue = consumeAddressAndEmitNullCheck(srcNode->AsIndir());
-        _builder.CreateMemCpy(dstAddrValue, llvm::MaybeAlign(), srcAddrValue, llvm::MaybeAlign(), sizeValue);
-    }
-    else
-    {
-        Value* initValue = consumeInitVal(srcNode);
-        _builder.CreateMemSet(dstAddrValue, initValue, sizeValue, llvm::MaybeAlign());
-    }
-
-    if (checkSizeLlvmBlock != nullptr)
-    {
-        llvm::BasicBlock* skipOperationLlvmBlock = createInlineLlvmBlock();
-        _builder.CreateBr(skipOperationLlvmBlock);
-
-        _builder.SetInsertPoint(checkSizeLlvmBlock);
-        Value* sizeIsZeroValue = _builder.CreateICmpEQ(sizeValue, llvm::ConstantInt::getNullValue(sizeLlvmType));
-        _builder.CreateCondBr(sizeIsZeroValue, skipOperationLlvmBlock, nullChecksLlvmBlock);
-
-        _builder.SetInsertPoint(skipOperationLlvmBlock);
-    }
-}
-
 void Llvm::buildUnaryOperation(GenTree* node)
 {
     GenTree* op1 = node->gtGetOp1();
@@ -2190,14 +2121,14 @@ void Llvm::buildSwitch(GenTreeUnOp* switchNode)
     unsigned casesCount = switchDesc->bbsCount - 1;
     noway_assert(switchDesc->bbsHasDefault);
 
-    BasicBlock* defaultDestBlock = switchDesc->getDefault();
+    BasicBlock* defaultDestBlock = switchDesc->getDefault()->getDestinationBlock();
     llvm::BasicBlock* defaultDestLlvmBlock = getFirstLlvmBlockForBlock(defaultDestBlock);
     llvm::SwitchInst* switchInst = _builder.CreateSwitch(destValue, defaultDestLlvmBlock, casesCount);
 
     for (unsigned destIndex = 0; destIndex < casesCount; destIndex++)
     {
         llvm::ConstantInt* destIndexValue = llvm::ConstantInt::get(switchLlvmType, destIndex);
-        llvm::BasicBlock* destLlvmBlock = getFirstLlvmBlockForBlock(switchDesc->bbsDstTab[destIndex]);
+        llvm::BasicBlock* destLlvmBlock = getFirstLlvmBlockForBlock(switchDesc->bbsDstTab[destIndex]->getDestinationBlock());
 
         switchInst->addCase(destIndexValue, destLlvmBlock);
     }
@@ -3370,7 +3301,7 @@ llvm::BasicBlock* Llvm::getOrCreatePrologLlvmBlockForFunction(unsigned funcIdx)
     BasicBlock* firstUserBlock = getFirstBlockForFunction(funcIdx);
     llvm::BasicBlock* firstLlvmUserBlock = getFirstLlvmBlockForBlock(firstUserBlock);
     llvm::BasicBlock* prologLlvmBlock = firstLlvmUserBlock->getPrevNode();
-    if ((prologLlvmBlock == nullptr) || !prologLlvmBlock->getName().startswith(PROLOG_BLOCK_NAME))
+    if ((prologLlvmBlock == nullptr) || !prologLlvmBlock->getName().starts_with(PROLOG_BLOCK_NAME))
     {
         Function* llvmFunc = firstLlvmUserBlock->getParent();
         prologLlvmBlock = llvm::BasicBlock::Create(m_context->Context, PROLOG_BLOCK_NAME, llvmFunc, firstLlvmUserBlock);
diff --git a/src/coreclr/jit/llvmlower.cpp b/src/coreclr/jit/llvmlower.cpp
index 6d59d22e6b17..e75bf02187a3 100644
--- a/src/coreclr/jit/llvmlower.cpp
+++ b/src/coreclr/jit/llvmlower.cpp
@@ -301,10 +301,6 @@ void Llvm::lowerNode(GenTree* node)
             lowerStoreBlk(node->AsBlk());
             break;
 
-        case GT_STORE_DYN_BLK:
-            lowerStoreDynBlk(node->AsStoreDynBlk());
-            break;
-
         case GT_ARR_LENGTH:
         case GT_MDARR_LENGTH:
         case GT_MDARR_LOWER_BOUND:
@@ -574,12 +570,6 @@ void Llvm::lowerStoreBlk(GenTreeBlk* storeBlkNode)
     lowerIndir(storeBlkNode);
 }
 
-void Llvm::lowerStoreDynBlk(GenTreeStoreDynBlk* storeDynBlkNode)
-{
-    storeDynBlkNode->Data()->SetContained();
-    lowerIndir(storeDynBlkNode);
-}
-
 // TODO-LLVM: Almost a direct copy from lower.cpp which is not included for Wasm.
 //------------------------------------------------------------------------
 // LowerArrLength: lower an array length
@@ -1573,7 +1563,7 @@ PhaseStatus Llvm::AddVirtualUnwindFrame()
 
                 bool allPredsUseTheSameUnwindIndex = true;
                 unsigned selectedUnwindIndexGroup = blockUnwindIndexGroup;
-                for (BasicBlock* predBlock : PredBlockList(allPredEdges))
+                for (BasicBlock* predBlock : PredBlockList<false>(allPredEdges))
                 {
                     unsigned predBlockUnwindIndex = GetUnwindIndexForBlock(predBlock);
                     unsigned predBlockUnwindIndexGroup = GetGroup(predBlock);
@@ -1628,7 +1618,7 @@ PhaseStatus Llvm::AddVirtualUnwindFrame()
                 bool allPredsDefineTheSameUnwindIndex = allPredsUseTheSameUnwindIndex;
                 if (allPredsUseTheSameUnwindIndex)
                 {
-                    for (BasicBlock* predBlock : PredBlockList(allPredEdges))
+                    for (BasicBlock* predBlock : PredBlockList<false>(allPredEdges))
                     {
                         unsigned predBlockUnwindIndexGroup = GetGroup(predBlock);
                         if (predBlockUnwindIndexGroup != UNWIND_INDEX_GROUP_NONE)
diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp
index 23848154bcff..2001f396c6f2 100644
--- a/src/coreclr/jit/loopcloning.cpp
+++ b/src/coreclr/jit/loopcloning.cpp
@@ -853,24 +853,40 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler*
     noway_assert(conds.Size() > 0);
     assert(slowPreheader != nullptr);
 
+    // For now assume high likelihood for the fast path,
+    // uniformly spread across the gating branches.
+    //
+    // For "normal" cloning this is probably ok. For GDV cloning this
+    // may be inaccurate. We should key off the type test likelihood(s).
+    //
+    const weight_t fastLikelihood = fastPathWeightScaleFactor;
+
     // Choose how to generate the conditions
     const bool generateOneConditionPerBlock = true;
 
     if (generateOneConditionPerBlock)
     {
+        // N = conds.Size() branches must all be true to execute the fast loop.
+        // Use the N'th root....
+        //
+        const weight_t fastLikelihoodPerBlock = exp(log(fastLikelihood) / (weight_t)conds.Size());
+
         for (unsigned i = 0; i < conds.Size(); ++i)
         {
-            BasicBlock* newBlk = comp->fgNewBBafter(BBJ_COND, insertAfter, /*extendRegion*/ true, slowPreheader);
+            BasicBlock* newBlk = comp->fgNewBBafter(BBJ_COND, insertAfter, /*extendRegion*/ true);
             newBlk->inheritWeight(insertAfter);
 
-            JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", newBlk->bbNum, newBlk->GetTrueTarget()->bbNum);
-            comp->fgAddRefPred(newBlk->GetTrueTarget(), newBlk);
+            JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", newBlk->bbNum, slowPreheader->bbNum);
+            FlowEdge* const trueEdge = comp->fgAddRefPred(slowPreheader, newBlk);
+            newBlk->SetTrueEdge(trueEdge);
+            trueEdge->setLikelihood(1 - fastLikelihoodPerBlock);
 
             if (insertAfter->KindIs(BBJ_COND))
             {
                 JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", insertAfter->bbNum, newBlk->bbNum);
-                insertAfter->SetFalseTarget(newBlk);
-                comp->fgAddRefPred(newBlk, insertAfter);
+                FlowEdge* const falseEdge = comp->fgAddRefPred(newBlk, insertAfter);
+                insertAfter->SetFalseEdge(falseEdge);
+                falseEdge->setLikelihood(fastLikelihoodPerBlock);
             }
 
             JITDUMP("Adding conditions %u to " FMT_BB "\n", i, newBlk->bbNum);
@@ -894,16 +910,20 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler*
     }
     else
     {
-        BasicBlock* newBlk = comp->fgNewBBafter(BBJ_COND, insertAfter, /*extendRegion*/ true, slowPreheader);
+        BasicBlock* newBlk = comp->fgNewBBafter(BBJ_COND, insertAfter, /*extendRegion*/ true);
         newBlk->inheritWeight(insertAfter);
 
-        JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", newBlk->bbNum, newBlk->GetTrueTarget()->bbNum);
-        comp->fgAddRefPred(newBlk->GetTrueTarget(), newBlk);
+        JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", newBlk->bbNum, slowPreheader->bbNum);
+        FlowEdge* const trueEdge = comp->fgAddRefPred(slowPreheader, newBlk);
+        newBlk->SetTrueEdge(trueEdge);
+        trueEdge->setLikelihood(1.0 - fastLikelihood);
 
-        if (insertAfter->bbFallsThrough())
+        if (insertAfter->KindIs(BBJ_COND))
         {
             JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", insertAfter->bbNum, newBlk->bbNum);
-            comp->fgAddRefPred(newBlk, insertAfter);
+            FlowEdge* const falseEdge = comp->fgAddRefPred(newBlk, insertAfter);
+            insertAfter->SetFalseEdge(falseEdge);
+            falseEdge->setLikelihood(fastLikelihood);
         }
 
         JITDUMP("Adding conditions to " FMT_BB "\n", newBlk->bbNum);
@@ -1351,7 +1371,7 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
                 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
                 LC_Array arrLen(LC_Array(LC_Array::MdArray, mdArrInfo->GetArrIndexForDim(getAllocator(CMK_LoopClone)),
                                          mdArrInfo->dim, LC_Array::None));
-                LC_Ident     arrLenIdent = LC_Ident::CreateArrAccess(arrLen);
+                LC_Ident arrLenIdent = LC_Ident::CreateArrAccess(arrLen);
                 LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent));
                 context->EnsureConditions(loop->GetIndex())->Push(cond);
 
@@ -1646,7 +1666,7 @@ void Compiler::optDebugLogLoopCloning(BasicBlock* block, Statement* insertBefore
 //      performs the optimizations assuming that the path in which the candidates
 //      were collected is the fast path in which the optimizations will be performed.
 //
-void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop,
+void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop*     loop,
                                              LoopCloneContext* context DEBUGARG(bool dynamicPath))
 {
     JitExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loop->GetIndex());
@@ -1938,13 +1958,6 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex
     // taking the max with the head block's weight.
     ambientWeight = max(ambientWeight, preheader->bbWeight);
 
-    // We assume that the fast path will run 99% of the time, and thus should get 99% of the block weights.
-    // The slow path will, correspondingly, get only 1% of the block weights. It could be argued that we should
-    // mark the slow path as "run rarely", since it really shouldn't execute (given the currently optimized loop
-    // conditions) except under exceptional circumstances.
-    const weight_t fastPathWeightScaleFactor = 0.99;
-    const weight_t slowPathWeightScaleFactor = 1.0 - fastPathWeightScaleFactor;
-
     // We're going to transform this loop:
     //
     // preheader --> header
@@ -1959,20 +1972,17 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex
     // Make a new pre-header block for the fast loop.
     JITDUMP("Create new preheader block for fast loop\n");
 
-    BasicBlock* fastPreheader =
-        fgNewBBafter(BBJ_ALWAYS, preheader, /*extendRegion*/ true, /*jumpDest*/ loop->GetHeader());
+    BasicBlock* fastPreheader = fgNewBBafter(BBJ_ALWAYS, preheader, /*extendRegion*/ true);
     JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", fastPreheader->bbNum, preheader->bbNum);
     fastPreheader->bbWeight = fastPreheader->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight;
 
-    if (fastPreheader->JumpsToNext())
-    {
-        fastPreheader->SetFlags(BBF_NONE_QUIRK);
-    }
-
     assert(preheader->KindIs(BBJ_ALWAYS));
     assert(preheader->TargetIs(loop->GetHeader()));
 
-    fgReplacePred(loop->GetHeader(), preheader, fastPreheader);
+    FlowEdge* const oldEdge = preheader->GetTargetEdge();
+    fgReplacePred(oldEdge, fastPreheader);
+    fastPreheader->SetTargetEdge(oldEdge);
+
     JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", preheader->bbNum,
             loop->GetHeader()->bbNum, fastPreheader->bbNum, loop->GetHeader()->bbNum);
 
@@ -1998,18 +2008,18 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex
     BasicBlock* slowPreheader = fgNewBBafter(BBJ_ALWAYS, newPred, /*extendRegion*/ true);
     JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", slowPreheader->bbNum, newPred->bbNum);
     slowPreheader->bbWeight = newPred->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight;
-    slowPreheader->scaleBBWeight(slowPathWeightScaleFactor);
+    slowPreheader->scaleBBWeight(LoopCloneContext::slowPathWeightScaleFactor);
     newPred = slowPreheader;
 
     // Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path.
 
     BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone));
 
-    loop->Duplicate(&newPred, blockMap, slowPathWeightScaleFactor);
+    loop->Duplicate(&newPred, blockMap, LoopCloneContext::slowPathWeightScaleFactor);
 
     // Scale old blocks to the fast path weight.
     loop->VisitLoopBlocks([=](BasicBlock* block) {
-        block->scaleBBWeight(fastPathWeightScaleFactor);
+        block->scaleBBWeight(LoopCloneContext::fastPathWeightScaleFactor);
         return BasicBlockVisit::Continue;
     });
 
@@ -2039,9 +2049,12 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex
     // We haven't set the jump target yet
     assert(slowPreheader->KindIs(BBJ_ALWAYS));
     assert(!slowPreheader->HasInitializedTarget());
-    slowPreheader->SetTarget(slowHeader);
 
-    fgAddRefPred(slowHeader, slowPreheader);
+    {
+        FlowEdge* const newEdge = fgAddRefPred(slowHeader, slowPreheader);
+        slowPreheader->SetTargetEdge(newEdge);
+    }
+
     JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", slowPreheader->bbNum, slowHeader->bbNum);
 
     BasicBlock* condLast = optInsertLoopChoiceConditions(context, loop, slowPreheader, preheader);
@@ -2049,14 +2062,18 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex
     // Now redirect the old preheader to jump to the first new condition that
     // was inserted by the above function.
     assert(preheader->KindIs(BBJ_ALWAYS));
-    preheader->SetTarget(preheader->Next());
-    fgAddRefPred(preheader->Next(), preheader);
-    preheader->SetFlags(BBF_NONE_QUIRK);
+
+    {
+        FlowEdge* const newEdge = fgAddRefPred(preheader->Next(), preheader);
+        preheader->SetTargetEdge(newEdge);
+    }
 
     // And make sure we insert a pred link for the final fallthrough into the fast preheader.
     assert(condLast->NextIs(fastPreheader));
-    condLast->SetFalseTarget(fastPreheader);
-    fgAddRefPred(fastPreheader, condLast);
+    FlowEdge* const falseEdge = fgAddRefPred(fastPreheader, condLast);
+    condLast->SetFalseEdge(falseEdge);
+    FlowEdge* const trueEdge = condLast->GetTrueEdge();
+    falseEdge->setLikelihood(max(0.0, 1.0 - trueEdge->getLikelihood()));
 }
 
 //-------------------------------------------------------------------------
@@ -2955,31 +2972,38 @@ PhaseStatus Compiler::optCloneLoops()
 #endif
 #endif
 
-    assert(optLoopsCloned == 0); // It should be initialized, but not yet changed.
+    assert(Metrics.LoopsCloned == 0); // It should be initialized, but not yet changed.
     for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder())
     {
         if (context.GetLoopOptInfo(loop->GetIndex()) != nullptr)
         {
-            optLoopsCloned++;
+            Metrics.LoopsCloned++;
             context.OptimizeConditions(loop->GetIndex() DEBUGARG(verbose));
             context.OptimizeBlockConditions(loop->GetIndex() DEBUGARG(verbose));
             optCloneLoop(loop, &context);
         }
     }
 
-    if (optLoopsCloned > 0)
+    if (Metrics.LoopsCloned > 0)
     {
-        fgRenumberBlocks();
-
         fgInvalidateDfsTree();
         m_dfsTree = fgComputeDfs();
         m_loops   = FlowGraphNaturalLoops::Find(m_dfsTree);
+
+        if (optCanonicalizeLoops())
+        {
+            fgInvalidateDfsTree();
+            m_dfsTree = fgComputeDfs();
+            m_loops   = FlowGraphNaturalLoops::Find(m_dfsTree);
+        }
+
+        fgRenumberBlocks();
     }
 
 #ifdef DEBUG
     if (verbose)
     {
-        printf("Loops cloned: %d\n", optLoopsCloned);
+        printf("Loops cloned: %d\n", Metrics.LoopsCloned);
         printf("Loops statically optimized: %d\n", optStaticallyOptimizedLoops);
         printf("After loop cloning:\n");
         fgDispBasicBlocks(/*dumpTrees*/ true);
diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h
index 2333d491764c..20f041eab40a 100644
--- a/src/coreclr/jit/loopcloning.h
+++ b/src/coreclr/jit/loopcloning.h
@@ -196,7 +196,12 @@ struct ArrIndex
     unsigned                      rank;     // Rank of the array
     BasicBlock*                   useBlock; // Block where the [] occurs
 
-    ArrIndex(CompAllocator alloc) : arrLcl(BAD_VAR_NUM), indLcls(alloc), bndsChks(alloc), rank(0), useBlock(nullptr)
+    ArrIndex(CompAllocator alloc)
+        : arrLcl(BAD_VAR_NUM)
+        , indLcls(alloc)
+        , bndsChks(alloc)
+        , rank(0)
+        , useBlock(nullptr)
     {
     }
 
@@ -236,7 +241,8 @@ struct LcOptInfo
     };
 
     OptType optType;
-    LcOptInfo(OptType optType) : optType(optType)
+    LcOptInfo(OptType optType)
+        : optType(optType)
     {
     }
 
@@ -267,7 +273,10 @@ struct LcMdArrayOptInfo : public LcOptInfo
     ArrIndex* index;         // "index" cached computation in the form of an ArrIndex representation.
 
     LcMdArrayOptInfo(GenTreeArrElem* arrElem, unsigned dim)
-        : LcOptInfo(LcMdArray), arrElem(arrElem), dim(dim), index(nullptr)
+        : LcOptInfo(LcMdArray)
+        , arrElem(arrElem)
+        , dim(dim)
+        , index(nullptr)
     {
     }
 
@@ -300,7 +309,10 @@ struct LcJaggedArrayOptInfo : public LcOptInfo
     Statement* stmt;     // "stmt" where the optimization opportunity occurs.
 
     LcJaggedArrayOptInfo(ArrIndex& arrIndex, unsigned dim, Statement* stmt)
-        : LcOptInfo(LcJaggedArray), dim(dim), arrIndex(arrIndex), stmt(stmt)
+        : LcOptInfo(LcJaggedArray)
+        , dim(dim)
+        , arrIndex(arrIndex)
+        , stmt(stmt)
     {
     }
 };
@@ -319,7 +331,11 @@ struct LcTypeTestOptInfo : public LcOptInfo
     CORINFO_CLASS_HANDLE clsHnd;
 
     LcTypeTestOptInfo(Statement* stmt, GenTreeIndir* methodTableIndir, unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd)
-        : LcOptInfo(LcTypeTest), stmt(stmt), methodTableIndir(methodTableIndir), lclNum(lclNum), clsHnd(clsHnd)
+        : LcOptInfo(LcTypeTest)
+        , stmt(stmt)
+        , methodTableIndir(methodTableIndir)
+        , lclNum(lclNum)
+        , clsHnd(clsHnd)
     {
     }
 };
@@ -343,7 +359,7 @@ struct LcMethodAddrTestOptInfo : public LcOptInfo
                             GenTreeIndir* delegateAddressIndir,
                             unsigned      delegateLclNum,
                             void*         methAddr,
-                            bool isSlot DEBUG_ARG(CORINFO_METHOD_HANDLE targetMethHnd))
+                            bool isSlot   DEBUG_ARG(CORINFO_METHOD_HANDLE targetMethHnd))
         : LcOptInfo(LcMethodAddrTest)
         , stmt(stmt)
         , delegateAddressIndir(delegateAddressIndir)
@@ -393,15 +409,24 @@ struct LC_Array
     int dim; // "dim" = which index to invoke arrLen on, if -1 invoke on the whole array
              //     Example 1: a[0][1][2] and dim =  2 implies a[0][1].length
              //     Example 2: a[0][1][2] and dim = -1 implies a[0][1][2].length
-    LC_Array() : type(Invalid), dim(-1)
+    LC_Array()
+        : type(Invalid)
+        , dim(-1)
     {
     }
     LC_Array(ArrType type, ArrIndex* arrIndex, int dim, OperType oper)
-        : type(type), arrIndex(arrIndex), oper(oper), dim(dim)
+        : type(type)
+        , arrIndex(arrIndex)
+        , oper(oper)
+        , dim(dim)
     {
     }
 
-    LC_Array(ArrType type, ArrIndex* arrIndex, OperType oper) : type(type), arrIndex(arrIndex), oper(oper), dim(-1)
+    LC_Array(ArrType type, ArrIndex* arrIndex, OperType oper)
+        : type(type)
+        , arrIndex(arrIndex)
+        , oper(oper)
+        , dim(-1)
     {
     }
 
@@ -464,7 +489,8 @@ struct LC_Ident
     };
 
 private:
-    union {
+    union
+    {
         unsigned constant;
         struct
         {
@@ -482,7 +508,8 @@ struct LC_Ident
         };
     };
 
-    LC_Ident(IdentType type) : type(type)
+    LC_Ident(IdentType type)
+        : type(type)
     {
     }
 
@@ -490,7 +517,8 @@ struct LC_Ident
     // The type of this object
     IdentType type;
 
-    LC_Ident() : type(Invalid)
+    LC_Ident()
+        : type(Invalid)
     {
     }
 
@@ -680,10 +708,13 @@ struct LC_Expr
     }
 #endif
 
-    LC_Expr() : type(Invalid)
+    LC_Expr()
+        : type(Invalid)
     {
     }
-    explicit LC_Expr(const LC_Ident& ident) : ident(ident), type(Ident)
+    explicit LC_Expr(const LC_Ident& ident)
+        : ident(ident)
+        , type(Ident)
     {
     }
 
@@ -724,7 +755,10 @@ struct LC_Condition
     {
     }
     LC_Condition(genTreeOps oper, const LC_Expr& op1, const LC_Expr& op2, bool asUnsigned = false)
-        : op1(op1), op2(op2), oper(oper), compareUnsigned(asUnsigned)
+        : op1(op1)
+        , op2(op2)
+        , oper(oper)
+        , compareUnsigned(asUnsigned)
     {
     }
 
@@ -756,7 +790,10 @@ struct LC_ArrayDeref
 
     unsigned level;
 
-    LC_ArrayDeref(const LC_Array& array, unsigned level) : array(array), children(nullptr), level(level)
+    LC_ArrayDeref(const LC_Array& array, unsigned level)
+        : array(array)
+        , children(nullptr)
+        , level(level)
     {
     }
 
@@ -764,8 +801,8 @@ struct LC_ArrayDeref
 
     unsigned Lcl();
 
-    bool HasChildren();
-    void EnsureChildren(CompAllocator alloc);
+    bool                  HasChildren();
+    void                  EnsureChildren(CompAllocator alloc);
     static LC_ArrayDeref* Find(JitExpandArrayStack<LC_ArrayDeref*>* children, unsigned lcl);
 
     void DeriveLevelConditions(JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>* len);
@@ -814,6 +851,14 @@ struct NaturalLoopIterInfo;
  */
 struct LoopCloneContext
 {
+    // We assume that the fast path will run 99% of the time, and thus should get 99% of the block weights.
+    // The slow path will, correspondingly, get only 1% of the block weights. It could be argued that we should
+    // mark the slow path as "run rarely", since it really shouldn't execute (given the currently optimized loop
+    // conditions) except under exceptional circumstances.
+    //
+    static constexpr weight_t fastPathWeightScaleFactor = 0.99;
+    static constexpr weight_t slowPathWeightScaleFactor = 1.0 - fastPathWeightScaleFactor;
+
     CompAllocator alloc; // The allocator
 
     // The array of optimization opportunities found in each loop. (loop x optimization-opportunities)
@@ -851,7 +896,7 @@ struct LoopCloneContext
     }
 
     NaturalLoopIterInfo* GetLoopIterInfo(unsigned loopNum);
-    void SetLoopIterInfo(unsigned loopNum, NaturalLoopIterInfo* info);
+    void                 SetLoopIterInfo(unsigned loopNum, NaturalLoopIterInfo* info);
 
     // Evaluate conditions into a JTRUE stmt and put it in a new block after `insertAfter`.
     BasicBlock* CondToStmtInBlock(Compiler*                          comp,
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 591db3a78a22..0427cf666a3b 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -384,7 +384,7 @@ bool Lowering::IsSafeToMarkRegOptional(GenTree* parentNode, GenTree* childNode)
     LclVarDsc* dsc = comp->lvaGetDesc(childNode->AsLclVarCommon());
     if (!dsc->IsAddressExposed())
     {
-        // Safe by IR invariants (no assignments occur between parent and node).
+        // Safe by IR invariants (no stores occur between parent and node).
         return true;
     }
 
@@ -529,8 +529,16 @@ GenTree* Lowering::LowerNode(GenTree* node)
             break;
 
         case GT_CAST:
-            LowerCast(node);
-            break;
+        {
+            GenTree* nextNode = LowerCast(node);
+#if defined(TARGET_XARCH)
+            if (nextNode != nullptr)
+            {
+                return nextNode;
+            }
+#endif // TARGET_XARCH
+        }
+        break;
 
         case GT_BITCAST:
             ContainCheckBitCast(node);
@@ -570,8 +578,6 @@ GenTree* Lowering::LowerNode(GenTree* node)
                 LowerStoreSingleRegCallStruct(node->AsBlk());
                 break;
             }
-            FALLTHROUGH;
-        case GT_STORE_DYN_BLK:
             LowerBlockStoreCommon(node->AsBlk());
             break;
 
@@ -827,10 +833,6 @@ GenTree* Lowering::LowerArrLength(GenTreeArrCommon* node)
 
 GenTree* Lowering::LowerSwitch(GenTree* node)
 {
-    unsigned     jumpCnt;
-    unsigned     targetCnt;
-    BasicBlock** jumpTab;
-
     assert(node->gtOper == GT_SWITCH);
 
     // The first step is to build the default case conditional construct that is
@@ -844,9 +846,9 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
     // jumpCnt is the number of elements in the jump table array.
     // jumpTab is the actual pointer to the jump table array.
     // targetCnt is the number of unique targets in the jump table array.
-    jumpCnt   = originalSwitchBB->GetSwitchTargets()->bbsCount;
-    jumpTab   = originalSwitchBB->GetSwitchTargets()->bbsDstTab;
-    targetCnt = originalSwitchBB->NumSucc(comp);
+    const unsigned   jumpCnt   = originalSwitchBB->GetSwitchTargets()->bbsCount;
+    FlowEdge** const jumpTab   = originalSwitchBB->GetSwitchTargets()->bbsDstTab;
+    const unsigned   targetCnt = originalSwitchBB->NumSucc(comp);
 
 // GT_SWITCH must be a top-level node with no use.
 #ifdef DEBUG
@@ -865,17 +867,12 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
     {
         JITDUMP("Lowering switch " FMT_BB ": single target; converting to BBJ_ALWAYS\n", originalSwitchBB->bbNum);
         noway_assert(comp->opts.OptimizationDisabled());
-        originalSwitchBB->SetKindAndTarget(BBJ_ALWAYS, jumpTab[0]);
-
-        if (originalSwitchBB->JumpsToNext())
-        {
-            originalSwitchBB->SetFlags(BBF_NONE_QUIRK);
-        }
+        originalSwitchBB->SetKindAndTargetEdge(BBJ_ALWAYS, jumpTab[0]);
 
         // Remove extra predecessor links if there was more than one case.
         for (unsigned i = 1; i < jumpCnt; ++i)
         {
-            (void)comp->fgRemoveRefPred(jumpTab[i], originalSwitchBB);
+            comp->fgRemoveRefPred(jumpTab[i]);
         }
 
         // We have to get rid of the GT_SWITCH node but a child might have side effects so just assign
@@ -909,11 +906,11 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
     unsigned  tempLclNum  = temp->AsLclVarCommon()->GetLclNum();
     var_types tempLclType = temp->TypeGet();
 
-    BasicBlock* defaultBB   = jumpTab[jumpCnt - 1];
+    BasicBlock* defaultBB   = jumpTab[jumpCnt - 1]->getDestinationBlock();
     BasicBlock* followingBB = originalSwitchBB->Next();
 
     /* Is the number of cases right for a test and jump switch? */
-    const bool fFirstCaseFollows = (followingBB == jumpTab[0]);
+    const bool fFirstCaseFollows = (followingBB == jumpTab[0]->getDestinationBlock());
     const bool fDefaultFollows   = (followingBB == defaultBB);
 
     unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
@@ -955,21 +952,34 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
     // originalSwitchBB is now a BBJ_ALWAYS, and there is a predecessor edge in afterDefaultCondBlock
     // representing the fall-through flow from originalSwitchBB.
     assert(originalSwitchBB->KindIs(BBJ_ALWAYS));
-    assert(originalSwitchBB->NextIs(afterDefaultCondBlock));
+    assert(originalSwitchBB->TargetIs(afterDefaultCondBlock));
+    assert(originalSwitchBB->JumpsToNext());
     assert(afterDefaultCondBlock->KindIs(BBJ_SWITCH));
     assert(afterDefaultCondBlock->GetSwitchTargets()->bbsHasDefault);
     assert(afterDefaultCondBlock->isEmpty()); // Nothing here yet.
 
     // The GT_SWITCH code is still in originalSwitchBB (it will be removed later).
 
-    // Turn originalSwitchBB into a BBJ_COND.
-    originalSwitchBB->SetCond(jumpTab[jumpCnt - 1], afterDefaultCondBlock);
-
     // Fix the pred for the default case: the default block target still has originalSwitchBB
     // as a predecessor, but the fgSplitBlockAfterStatement() moved all predecessors to point
     // to afterDefaultCondBlock.
-    FlowEdge* oldEdge = comp->fgRemoveRefPred(jumpTab[jumpCnt - 1], afterDefaultCondBlock);
-    comp->fgAddRefPred(jumpTab[jumpCnt - 1], originalSwitchBB, oldEdge);
+
+    // Note defaultEdge may also be the edge for some switch cases. We only probe edges,
+    // so assume each possibility is equally likely.
+    FlowEdge* const defaultEdge       = jumpTab[jumpCnt - 1];
+    weight_t const  defaultLikelihood = defaultEdge->getLikelihood() / defaultEdge->getDupCount();
+    comp->fgRemoveRefPred(defaultEdge);
+    FlowEdge* const trueEdge = comp->fgAddRefPred(defaultBB, originalSwitchBB);
+    trueEdge->setLikelihood(defaultLikelihood);
+    defaultEdge->setLikelihood(defaultEdge->getLikelihood() - defaultLikelihood);
+
+    // Turn originalSwitchBB into a BBJ_COND.
+    FlowEdge* const falseEdge        = originalSwitchBB->GetTargetEdge();
+    weight_t const  switchLikelihood = 1.0 - defaultLikelihood;
+    falseEdge->setLikelihood(switchLikelihood);
+    originalSwitchBB->SetCond(trueEdge, falseEdge);
+    afterDefaultCondBlock->inheritWeight(originalSwitchBB);
+    afterDefaultCondBlock->scaleBBWeight(switchLikelihood);
 
     bool useJumpSequence = jumpCnt < minSwitchTabJumpCnt;
 
@@ -989,7 +999,7 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
     // If we originally had 2 unique successors, check to see whether there is a unique
     // non-default case, in which case we can eliminate the switch altogether.
     // Note that the single unique successor case is handled above.
-    BasicBlock* uniqueSucc = nullptr;
+    FlowEdge* uniqueSucc = nullptr;
     if (targetCnt == 2)
     {
         uniqueSucc = jumpTab[0];
@@ -1008,22 +1018,17 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
         // If the unique successor immediately follows this block, we have nothing to do -
         // it will simply fall-through after we remove the switch, below.
         // Otherwise, make this a BBJ_ALWAYS.
-        // Now, fixup the predecessor links to uniqueSucc.  In the original jumpTab:
+        // Now, fixup the predecessor links to uniqueSucc's target block.  In the original jumpTab:
         //   jumpTab[i-1] was the default target, which we handled above,
         //   jumpTab[0] is the first target, and we'll leave that predecessor link.
-        // Remove any additional predecessor links to uniqueSucc.
+        // Remove any additional predecessor links to uniqueSucc's target block.
         for (unsigned i = 1; i < jumpCnt - 1; ++i)
         {
             assert(jumpTab[i] == uniqueSucc);
-            (void)comp->fgRemoveRefPred(uniqueSucc, afterDefaultCondBlock);
+            comp->fgRemoveRefPred(uniqueSucc);
         }
 
-        afterDefaultCondBlock->SetKindAndTarget(BBJ_ALWAYS, uniqueSucc);
-
-        if (afterDefaultCondBlock->JumpsToNext())
-        {
-            afterDefaultCondBlock->SetFlags(BBF_NONE_QUIRK);
-        }
+        afterDefaultCondBlock->SetKindAndTargetEdge(BBJ_ALWAYS, uniqueSucc);
     }
     // If the number of possible destinations is small enough, we proceed to expand the switch
     // into a series of conditional branches, otherwise we follow the jump table based switch
@@ -1051,15 +1056,38 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
         // If no case target follows, the last one doesn't need to be a compare/branch: it can be an
         // unconditional branch.
         bool fAnyTargetFollows = false;
+
+        // We need to track how much of the original switch's likelihood has already been
+        // tested for.  We'll use this to adjust the likelihood of the branches we're adding.
+        // So far we've tested for the default case, so we'll start with that.
+        weight_t totalTestLikelihood = defaultLikelihood;
         for (unsigned i = 0; i < jumpCnt - 1; ++i)
         {
             assert(currentBlock != nullptr);
+            BasicBlock* const targetBlock = jumpTab[i]->getDestinationBlock();
 
             // Remove the switch from the predecessor list of this case target's block.
             // We'll add the proper new predecessor edge later.
-            FlowEdge* oldEdge = comp->fgRemoveRefPred(jumpTab[i], afterDefaultCondBlock);
+            FlowEdge* const oldEdge = jumpTab[i];
 
-            if (jumpTab[i] == followingBB)
+            // Compute the likelihood that this test is successful.
+            // Divide by number of cases still sharing this edge (reduces likelihood)
+            // Divide by likelihood of reaching this test (increases likelihood).
+            // But if there is little chance of reaching this test, set the likelihood to 0.5
+            //
+            weight_t const edgeLikelihood          = oldEdge->getLikelihood();
+            weight_t const caseLikelihood          = edgeLikelihood / oldEdge->getDupCount();
+            bool const     unlikelyToReachThisCase = Compiler::fgProfileWeightsEqual(totalTestLikelihood, 1.0, 0.001);
+            weight_t const adjustedCaseLikelihood =
+                unlikelyToReachThisCase ? 0.5 : min(1.0, caseLikelihood / (1.0 - totalTestLikelihood));
+            comp->fgRemoveRefPred(oldEdge);
+
+            // Decrement the likelihood on the old edge, so if other cases are sharing it,
+            // they get the right values later.
+            //
+            oldEdge->setLikelihood(edgeLikelihood - caseLikelihood);
+
+            if (targetBlock == followingBB)
             {
                 // This case label follows the switch; let it fall through.
                 fAnyTargetFollows = true;
@@ -1068,23 +1096,54 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
 
             // We need a block to put in the new compare and/or branch.
             // If we haven't used the afterDefaultCondBlock yet, then use that.
+            //
             if (fUsedAfterDefaultCondBlock)
             {
-                BasicBlock* newBlock = comp->fgNewBBafter(BBJ_ALWAYS, currentBlock, true, currentBlock->Next());
-                newBlock->SetFlags(BBF_NONE_QUIRK);
-                currentBlock->SetFalseTarget(newBlock);
-                comp->fgAddRefPred(newBlock, currentBlock); // The fall-through predecessor.
+                BasicBlock*     newBlock  = comp->fgNewBBafter(BBJ_ALWAYS, currentBlock, true);
+                FlowEdge* const falseEdge = comp->fgAddRefPred(newBlock, currentBlock); // The fall-through predecessor.
+
+                // We set the true edge likelihood earlier, use that to figure out the false edge likelihood
+                // and the block weight.
+                //
+                FlowEdge* const trueEdge        = currentBlock->GetTrueEdge();
+                weight_t const  falseLikelihood = 1.0 - trueEdge->getLikelihood();
+                falseEdge->setLikelihood(falseLikelihood);
+                currentBlock->SetFalseEdge(falseEdge);
+                newBlock->inheritWeight(currentBlock);
+                newBlock->scaleBBWeight(falseLikelihood);
                 currentBlock   = newBlock;
                 currentBBRange = &LIR::AsRange(currentBlock);
             }
             else
             {
                 assert(currentBlock == afterDefaultCondBlock);
+
+                // If the first switch case we peel off has the same target as
+                // other cases (that is, it has nonzero dup count, it's simpler to
+                // just make a new here block, so that as we peel off cases,
+                // we're not sharing edges with the original switch.
+                //
+                // That is, the call to fgAddRefPred below always creates a new edge.
+                //
+                if (oldEdge->getDupCount() > 0)
+                {
+                    BasicBlock* const newBlock = comp->fgNewBBafter(BBJ_ALWAYS, currentBlock, true);
+                    FlowEdge* const   newEdge  = comp->fgAddRefPred(newBlock, currentBlock);
+                    currentBlock               = newBlock;
+                    currentBBRange             = &LIR::AsRange(currentBlock);
+                    afterDefaultCondBlock->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
+                }
+
                 fUsedAfterDefaultCondBlock = true;
             }
 
+            // Update the total test case likelihood.
+            totalTestLikelihood += caseLikelihood;
+
             // Wire up the predecessor list for the "branch" case.
-            comp->fgAddRefPred(jumpTab[i], currentBlock, oldEdge);
+            FlowEdge* const newEdge = comp->fgAddRefPred(targetBlock, currentBlock, oldEdge);
+            // This should truly be a new edge.
+            assert(newEdge->getDupCount() == 1);
 
             if (!fAnyTargetFollows && (i == jumpCnt - 2))
             {
@@ -1093,13 +1152,15 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
                 // case: there is no need to compare against the case index, since it's
                 // guaranteed to be taken (since the default case was handled first, above).
 
-                currentBlock->SetKindAndTarget(BBJ_ALWAYS, jumpTab[i]);
+                currentBlock->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
             }
             else
             {
                 // Otherwise, it's a conditional branch. Set the branch kind, then add the
                 // condition statement.
-                currentBlock->SetCond(jumpTab[i], currentBlock->Next());
+                // We will set the false edge in a later iteration of the loop, or after.
+                currentBlock->SetCond(newEdge);
+                newEdge->setLikelihood(adjustedCaseLikelihood);
 
                 // Now, build the conditional statement for the current case that is
                 // being evaluated:
@@ -1110,8 +1171,8 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
                 //                 |____ (ICon)        (The actual case constant)
                 GenTree* gtCaseCond = comp->gtNewOperNode(GT_EQ, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
                                                           comp->gtNewIconNode(i, genActualType(tempLclType)));
-                GenTree*   gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond);
-                LIR::Range caseRange    = LIR::SeqTree(comp, gtCaseBranch);
+                GenTree* gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond);
+                LIR::Range caseRange  = LIR::SeqTree(comp, gtCaseBranch);
                 currentBBRange->InsertAtEnd(std::move(caseRange));
             }
         }
@@ -1121,7 +1182,14 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
             // There is a fall-through to the following block. In the loop
             // above, we deleted all the predecessor edges from the switch.
             // In this case, we need to add one back.
-            comp->fgAddRefPred(currentBlock->Next(), currentBlock);
+            FlowEdge* const falseEdge = comp->fgAddRefPred(currentBlock->Next(), currentBlock);
+            currentBlock->SetFalseEdge(falseEdge);
+            FlowEdge* const trueEdge        = currentBlock->GetTrueEdge();
+            weight_t const  falseLikelihood = 1.0 - trueEdge->getLikelihood();
+            falseEdge->setLikelihood(falseLikelihood);
+
+            // The following block weight should remain unchanged. All we've done
+            // is alter the various paths that can reach it.
         }
 
         if (!fUsedAfterDefaultCondBlock)
@@ -1132,7 +1200,8 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
             JITDUMP("Lowering switch " FMT_BB ": all switch cases were fall-through\n", originalSwitchBB->bbNum);
             assert(currentBlock == afterDefaultCondBlock);
             assert(currentBlock->KindIs(BBJ_SWITCH));
-            currentBlock->SetKindAndTarget(BBJ_ALWAYS, currentBlock->Next());
+            FlowEdge* const newEdge = comp->fgAddRefPred(currentBlock->Next(), currentBlock);
+            currentBlock->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
             currentBlock->RemoveFlags(BBF_DONT_REMOVE);
             comp->fgRemoveBlock(currentBlock, /* unreachable */ false); // It's an empty block.
         }
@@ -1146,9 +1215,14 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
         LIR::Range& switchBlockRange = LIR::AsRange(afterDefaultCondBlock);
         switchBlockRange.InsertAtEnd(switchValue);
 
+        // We are going to modify the switch, invalidate any desc map.
+        //
+        comp->fgInvalidateSwitchDescMapEntry(afterDefaultCondBlock);
+
         // Try generating a bit test based switch first,
         // if that's not possible a jump table based switch will be generated.
-        if (!TryLowerSwitchToBitTest(jumpTab, jumpCnt, targetCnt, afterDefaultCondBlock, switchValue))
+        if (!TryLowerSwitchToBitTest(jumpTab, jumpCnt, targetCnt, afterDefaultCondBlock, switchValue,
+                                     defaultLikelihood))
         {
             JITDUMP("Lowering switch " FMT_BB ": using jump table expansion\n", originalSwitchBB->bbNum);
 
@@ -1168,9 +1242,49 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
 
             // this block no longer branches to the default block
             afterDefaultCondBlock->GetSwitchTargets()->removeDefault();
-        }
 
-        comp->fgInvalidateSwitchDescMapEntry(afterDefaultCondBlock);
+            // We need to scale up the likelihood of the remaining switch edges, now that we've peeled off
+            // the default case. But if the remaining likelihood is zero (default likelihood was 1.0),
+            // we don't know the case likelihoods. Instead, divide likelihood evenly among all cases.
+            //
+            // First, rebuild the unique succ set
+            //
+            Compiler::SwitchUniqueSuccSet successors = comp->GetDescriptorForSwitch(afterDefaultCondBlock);
+
+            // Then fix each successor edge
+            //
+            if (Compiler::fgProfileWeightsEqual(defaultLikelihood, 1.0, 0.001))
+            {
+                JITDUMP("Zero weight switch block " FMT_BB ", distributing likelihoods equally per case\n",
+                        afterDefaultCondBlock->bbNum);
+                // jumpCnt-1 here because we peeled the default after copying this value.
+                weight_t const newLikelihood = 1.0 / (jumpCnt - 1);
+                for (unsigned i = 0; i < successors.numDistinctSuccs; i++)
+                {
+                    FlowEdge* const edge = successors.nonDuplicates[i];
+                    edge->setLikelihood(newLikelihood * edge->getDupCount());
+                }
+            }
+            else
+            {
+                weight_t const scaleFactor = 1.0 / (1.0 - defaultLikelihood);
+                JITDUMP("Scaling switch block " FMT_BB " likelihoods by " FMT_WT "\n", afterDefaultCondBlock->bbNum,
+                        scaleFactor);
+                for (unsigned i = 0; i < successors.numDistinctSuccs; i++)
+                {
+                    FlowEdge* const edge          = successors.nonDuplicates[i];
+                    weight_t        newLikelihood = scaleFactor * edge->getLikelihood();
+
+                    if (newLikelihood > 1.0)
+                    {
+                        // tolerate small overflows
+                        assert(Compiler::fgProfileWeightsEqual(newLikelihood, 1.0, 0.001));
+                        newLikelihood = 1.0;
+                    }
+                    edge->setLikelihood(newLikelihood);
+                }
+            }
+        }
     }
 
     GenTree* next = node->gtNext;
@@ -1191,6 +1305,7 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
 //    targetCount - The number of distinct blocks in the jump table
 //    bbSwitch - The switch block
 //    switchValue - A LclVar node that provides the switch value
+//    defaultLikelihood - likelihood control flow took the default case (already checked)
 //
 // Return value:
 //    true if the switch has been lowered to a bit test
@@ -1211,8 +1326,12 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
 //    than the traditional jump table base code. And of course, it also avoids the need
 //    to emit the jump table itself that can reach up to 256 bytes (for 64 entries).
 //
-bool Lowering::TryLowerSwitchToBitTest(
-    BasicBlock* jumpTable[], unsigned jumpCount, unsigned targetCount, BasicBlock* bbSwitch, GenTree* switchValue)
+bool Lowering::TryLowerSwitchToBitTest(FlowEdge*   jumpTable[],
+                                       unsigned    jumpCount,
+                                       unsigned    targetCount,
+                                       BasicBlock* bbSwitch,
+                                       GenTree*    switchValue,
+                                       weight_t    defaultLikelihood)
 {
     assert(jumpCount >= 2);
     assert(targetCount >= 2);
@@ -1249,29 +1368,32 @@ bool Lowering::TryLowerSwitchToBitTest(
     // table and/or swap the blocks if it's beneficial.
     //
 
-    BasicBlock* bbCase0  = nullptr;
-    BasicBlock* bbCase1  = jumpTable[0];
-    size_t      bitTable = 1;
+    FlowEdge* case0Edge = nullptr;
+    FlowEdge* case1Edge = jumpTable[0];
+    size_t    bitTable  = 1;
 
     for (unsigned bitIndex = 1; bitIndex < bitCount; bitIndex++)
     {
-        if (jumpTable[bitIndex] == bbCase1)
+        if (jumpTable[bitIndex] == case1Edge)
         {
             bitTable |= (size_t(1) << bitIndex);
         }
-        else if (bbCase0 == nullptr)
+        else if (case0Edge == nullptr)
         {
-            bbCase0 = jumpTable[bitIndex];
+            case0Edge = jumpTable[bitIndex];
         }
-        else if (jumpTable[bitIndex] != bbCase0)
+        else if (jumpTable[bitIndex] != case0Edge)
         {
-            // If it's neither bbCase0 nor bbCase1 then it means we have 3 targets. There can't be more
+            // If it's neither case0Edge nor case`Edge then it means we have 3 targets. There can't be more
             // than 3 because of the check at the start of the function.
             assert(targetCount == 3);
             return false;
         }
     }
 
+    BasicBlock* bbCase0 = case0Edge->getDestinationBlock();
+    BasicBlock* bbCase1 = case1Edge->getDestinationBlock();
+
     //
     // One of the case blocks has to follow the switch block. This requirement could be avoided
     // by adding a BBJ_ALWAYS block after the switch block but doing that sometimes negatively
@@ -1283,6 +1405,8 @@ bool Lowering::TryLowerSwitchToBitTest(
         return false;
     }
 
+    JITDUMP("Lowering switch " FMT_BB " to bit test\n", bbSwitch->bbNum);
+
 #if defined(TARGET_64BIT) && defined(TARGET_XARCH)
     //
     // See if we can avoid a 8 byte immediate on 64 bit targets. If all upper 32 bits are 1
@@ -1307,11 +1431,33 @@ bool Lowering::TryLowerSwitchToBitTest(
     comp->fgRemoveAllRefPreds(bbCase1, bbSwitch);
     comp->fgRemoveAllRefPreds(bbCase0, bbSwitch);
 
+    case0Edge = comp->fgAddRefPred(bbCase0, bbSwitch, case0Edge);
+    case1Edge = comp->fgAddRefPred(bbCase1, bbSwitch, case1Edge);
+
+    // If defaultLikelihood is not ~ 1.0
+    //   up-scale case likelihoods by 1.0 / (1.0 - defaultLikelihood)
+    // else switch block weight should be zero
+    //   edge likelihoods are unknown, use 0.5
+    //
+    bool const likelyToReachSwitch = !Compiler::fgProfileWeightsEqual(defaultLikelihood, 1.0, 0.001);
+
+    if (likelyToReachSwitch)
+    {
+        weight_t const scaleFactor = 1.0 / (1.0 - defaultLikelihood);
+        case0Edge->setLikelihood(min(1.0, scaleFactor * case0Edge->getLikelihood()));
+        case1Edge->setLikelihood(min(1.0, scaleFactor * case1Edge->getLikelihood()));
+    }
+    else
+    {
+        case0Edge->setLikelihood(0.5);
+        case1Edge->setLikelihood(0.5);
+    }
+
     if (bbSwitch->NextIs(bbCase0))
     {
         // GenCondition::C generates JC so we jump to bbCase1 when the bit is set
         bbSwitchCondition = GenCondition::C;
-        bbSwitch->SetCond(bbCase1, bbCase0);
+        bbSwitch->SetCond(case1Edge, case0Edge);
     }
     else
     {
@@ -1319,12 +1465,9 @@ bool Lowering::TryLowerSwitchToBitTest(
 
         // GenCondition::NC generates JNC so we jump to bbCase0 when the bit is not set
         bbSwitchCondition = GenCondition::NC;
-        bbSwitch->SetCond(bbCase0, bbCase1);
+        bbSwitch->SetCond(case0Edge, case1Edge);
     }
 
-    comp->fgAddRefPred(bbCase0, bbSwitch);
-    comp->fgAddRefPred(bbCase1, bbSwitch);
-
     var_types bitTableType = (bitCount <= (genTypeSize(TYP_INT) * 8)) ? TYP_INT : TYP_LONG;
     GenTree*  bitTableIcon = comp->gtNewIconNode(bitTable, bitTableType);
 
@@ -1498,7 +1641,6 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg,
             // Mark this one as tail call arg if it is a fast tail call.
             // This provides the info to put this argument in in-coming arg area slot
             // instead of in out-going arg area slot.
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
             // Make sure state is correct. The PUTARG_STK has TYP_VOID, as it doesn't produce
@@ -1841,8 +1983,160 @@ GenTree* Lowering::AddrGen(void* addr)
     return AddrGen((ssize_t)addr);
 }
 
+// LowerCallMemset: Replaces the following memset-like special intrinsics:
+//
+//    SpanHelpers.Fill<T>(ref dstRef, CNS_SIZE, CNS_VALUE)
+//    CORINFO_HELP_MEMSET(ref dstRef, CNS_VALUE, CNS_SIZE)
+//    SpanHelpers.ClearWithoutReferences(ref dstRef, CNS_SIZE)
+//
+//  with a GT_STORE_BLK node:
+//
+//    *  STORE_BLK struct<CNS_SIZE> (init) (Unroll)
+//    +--*  LCL_VAR   byref  dstRef
+//    \--*  CNS_INT   int    0
+//
+// Arguments:
+//    tree - GenTreeCall node to replace with STORE_BLK
+//    next - [out] Next node to lower if this function returns true
+//
+// Return Value:
+//    false if no changes were made
+//
+bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next)
+{
+    assert(call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_Fill) ||
+           call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_ClearWithoutReferences) ||
+           call->IsHelperCall(comp, CORINFO_HELP_MEMSET));
+
+    JITDUMP("Considering Memset-like call [%06d] for unrolling.. ", comp->dspTreeID(call))
+
+    if (comp->info.compHasNextCallRetAddr)
+    {
+        JITDUMP("compHasNextCallRetAddr=true so we won't be able to remove the call - bail out.\n");
+        return false;
+    }
+
+    GenTree* dstRefArg = call->gtArgs.GetUserArgByIndex(0)->GetNode();
+    GenTree* lengthArg;
+    GenTree* valueArg;
+
+    // Fill<T>'s length is not in bytes, so we need to scale it depending on the signature
+    unsigned lengthScale;
+
+    if (call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_Fill))
+    {
+        // void SpanHelpers::Fill<T>(ref T refData, nuint numElements, T value)
+        //
+        assert(call->gtArgs.CountUserArgs() == 3);
+        lengthArg             = call->gtArgs.GetUserArgByIndex(1)->GetNode();
+        CallArg* valueCallArg = call->gtArgs.GetUserArgByIndex(2);
+        valueArg              = valueCallArg->GetNode();
+
+        // Get that <T> from the signature
+        lengthScale = genTypeSize(valueCallArg->GetSignatureType());
+        // NOTE: structs and TYP_REF will be ignored by the "Value is not a constant" check
+        // Some of those cases can be enabled in future, e.g. s
+    }
+    else if (call->IsHelperCall(comp, CORINFO_HELP_MEMSET))
+    {
+        // void CORINFO_HELP_MEMSET(ref T refData, byte value, nuint numElements)
+        //
+        assert(call->gtArgs.CountUserArgs() == 3);
+        lengthArg   = call->gtArgs.GetUserArgByIndex(2)->GetNode();
+        valueArg    = call->gtArgs.GetUserArgByIndex(1)->GetNode();
+        lengthScale = 1; // it's always in bytes
+    }
+    else
+    {
+        // void SpanHelpers::ClearWithoutReferences(ref byte b, nuint byteLength)
+        //
+        assert(call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_ClearWithoutReferences));
+        assert(call->gtArgs.CountUserArgs() == 2);
+
+        // Simple zeroing
+        lengthArg   = call->gtArgs.GetUserArgByIndex(1)->GetNode();
+        valueArg    = comp->gtNewZeroConNode(TYP_INT);
+        lengthScale = 1; // it's always in bytes
+    }
+
+    if (!lengthArg->IsIntegralConst())
+    {
+        JITDUMP("Length is not a constant - bail out.\n");
+        return false;
+    }
+
+    if (!valueArg->IsCnsIntOrI() || !valueArg->TypeIs(TYP_INT))
+    {
+        JITDUMP("Value is not a constant - bail out.\n");
+        return false;
+    }
+
+    // If value is not zero, we can only unroll for single-byte values
+    if (!valueArg->IsIntegralConst(0) && (lengthScale != 1))
+    {
+        JITDUMP("Value is not unroll-friendly - bail out.\n");
+        return false;
+    }
+
+    // Convert lenCns to bytes
+    ssize_t lenCns = lengthArg->AsIntCon()->IconValue();
+    if (CheckedOps::MulOverflows((target_ssize_t)lenCns, (target_ssize_t)lengthScale, CheckedOps::Signed))
+    {
+        // lenCns overflows
+        JITDUMP("lenCns * lengthScale overflows - bail out.\n")
+        return false;
+    }
+    lenCns *= (ssize_t)lengthScale;
+
+    // TODO-CQ: drop the whole thing in case of lenCns = 0
+    if ((lenCns <= 0) || (lenCns > (ssize_t)comp->getUnrollThreshold(Compiler::UnrollKind::Memset)))
+    {
+        JITDUMP("Size is either 0 or too big to unroll - bail out.\n")
+        return false;
+    }
+
+    JITDUMP("Accepted for unrolling!\nOld tree:\n");
+    DISPTREERANGE(BlockRange(), call);
+
+    if (!valueArg->IsIntegralConst(0))
+    {
+        // Non-zero (byte) value, wrap value with GT_INIT_VAL
+        GenTree* initVal = valueArg;
+        valueArg         = comp->gtNewOperNode(GT_INIT_VAL, TYP_INT, initVal);
+        BlockRange().InsertAfter(initVal, valueArg);
+    }
+
+    GenTreeBlk* storeBlk =
+        comp->gtNewStoreBlkNode(comp->typGetBlkLayout((unsigned)lenCns), dstRefArg, valueArg, GTF_IND_UNALIGNED);
+    storeBlk->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+
+    // Insert/Remove trees into LIR
+    BlockRange().InsertBefore(call, storeBlk);
+    if (call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_ClearWithoutReferences))
+    {
+        // Value didn't exist in LIR previously
+        BlockRange().InsertBefore(storeBlk, valueArg);
+    }
+
+    // Remove the call and mark everything as unused ...
+    BlockRange().Remove(call, true);
+    // ... except the args we're going to re-use
+    dstRefArg->ClearUnusedValue();
+    valueArg->ClearUnusedValue();
+    if (valueArg->OperIs(GT_INIT_VAL))
+    {
+        valueArg->gtGetOp1()->ClearUnusedValue();
+    }
+
+    JITDUMP("\nNew tree:\n");
+    DISPTREERANGE(BlockRange(), storeBlk);
+    *next = storeBlk;
+    return true;
+}
+
 //------------------------------------------------------------------------
 // LowerCallMemmove: Replace Buffer.Memmove(DST, SRC, CNS_SIZE) with a GT_STORE_BLK:
+//    Do the same for CORINFO_HELP_MEMCPY(DST, SRC, CNS_SIZE)
 //
 //    *  STORE_BLK struct<CNS_SIZE> (copy) (Unroll)
 //    +--*  LCL_VAR   byref  dst
@@ -1859,7 +2153,8 @@ GenTree* Lowering::AddrGen(void* addr)
 bool Lowering::LowerCallMemmove(GenTreeCall* call, GenTree** next)
 {
     JITDUMP("Considering Memmove [%06d] for unrolling.. ", comp->dspTreeID(call))
-    assert(comp->lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_Buffer_Memmove);
+    assert(call->IsHelperCall(comp, CORINFO_HELP_MEMCPY) ||
+           (comp->lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_SpanHelpers_Memmove));
 
     assert(call->gtArgs.CountUserArgs() == 3);
 
@@ -1893,7 +2188,8 @@ bool Lowering::LowerCallMemmove(GenTreeCall* call, GenTree** next)
 
             // TODO-CQ: Use GenTreeBlk::BlkOpKindUnroll here if srcAddr and dstAddr don't overlap, thus, we can
             // unroll this memmove as memcpy - it doesn't require lots of temp registers
-            storeBlk->gtBlkOpKind = GenTreeBlk::BlkOpKindUnrollMemmove;
+            storeBlk->gtBlkOpKind = call->IsHelperCall(comp, CORINFO_HELP_MEMCPY) ? GenTreeBlk::BlkOpKindUnroll
+                                                                                  : GenTreeBlk::BlkOpKindUnrollMemmove;
 
             BlockRange().InsertBefore(call, srcBlk);
             BlockRange().InsertBefore(call, storeBlk);
@@ -2214,16 +2510,49 @@ GenTree* Lowering::LowerCall(GenTree* node)
     }
 
 #if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+    GenTree* nextNode = nullptr;
     if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
     {
-        GenTree*       nextNode = nullptr;
-        NamedIntrinsic ni       = comp->lookupNamedIntrinsic(call->gtCallMethHnd);
-        if (((ni == NI_System_Buffer_Memmove) && LowerCallMemmove(call, &nextNode)) ||
-            ((ni == NI_System_SpanHelpers_SequenceEqual) && LowerCallMemcmp(call, &nextNode)))
+        switch (comp->lookupNamedIntrinsic(call->gtCallMethHnd))
         {
-            return nextNode;
+            case NI_System_SpanHelpers_Memmove:
+                if (LowerCallMemmove(call, &nextNode))
+                {
+                    return nextNode;
+                }
+                break;
+
+            case NI_System_SpanHelpers_SequenceEqual:
+                if (LowerCallMemcmp(call, &nextNode))
+                {
+                    return nextNode;
+                }
+                break;
+
+            case NI_System_SpanHelpers_Fill:
+            case NI_System_SpanHelpers_ClearWithoutReferences:
+                if (LowerCallMemset(call, &nextNode))
+                {
+                    return nextNode;
+                }
+                break;
+
+            default:
+                break;
         }
     }
+
+    // Try to lower CORINFO_HELP_MEMCPY to unrollable STORE_BLK
+    if (call->IsHelperCall(comp, CORINFO_HELP_MEMCPY) && LowerCallMemmove(call, &nextNode))
+    {
+        return nextNode;
+    }
+
+    // Try to lower CORINFO_HELP_MEMSET to unrollable STORE_BLK
+    if (call->IsHelperCall(comp, CORINFO_HELP_MEMSET) && LowerCallMemset(call, &nextNode))
+    {
+        return nextNode;
+    }
 #endif
 
     call->ClearOtherRegs();
@@ -3098,23 +3427,7 @@ void Lowering::LowerCFGCall(GenTreeCall* call)
             LowerNode(regNode);
 
             // Finally move all GT_PUTARG_* nodes
-            for (CallArg& arg : call->gtArgs.EarlyArgs())
-            {
-                GenTree* node = arg.GetEarlyNode();
-                // Non-value nodes in early args are setup nodes for late args.
-                if (node->IsValue())
-                {
-                    assert(node->OperIsPutArg() || node->OperIsFieldList());
-                    MoveCFGCallArg(call, node);
-                }
-            }
-
-            for (CallArg& arg : call->gtArgs.LateArgs())
-            {
-                GenTree* node = arg.GetLateNode();
-                assert(node->OperIsPutArg() || node->OperIsFieldList());
-                MoveCFGCallArg(call, node);
-            }
+            MoveCFGCallArgs(call);
             break;
         }
         case CFGCallKind::Dispatch:
@@ -3261,6 +3574,38 @@ void Lowering::MoveCFGCallArg(GenTreeCall* call, GenTree* node)
     BlockRange().InsertBefore(call, node);
 }
 
+//------------------------------------------------------------------------
+// MoveCFGCallArgs: Given a call that will be CFG transformed using the
+// validate+call scheme, move all GT_PUTARG_* or GT_FIELD_LIST nodes right before the call.
+//
+// Arguments:
+//    call - The call that is being CFG transformed
+//
+// Remarks:
+//    See comments in MoveCFGCallArg for more details.
+//
+void Lowering::MoveCFGCallArgs(GenTreeCall* call)
+{
+    // Finally move all GT_PUTARG_* nodes
+    for (CallArg& arg : call->gtArgs.EarlyArgs())
+    {
+        GenTree* node = arg.GetEarlyNode();
+        // Non-value nodes in early args are setup nodes for late args.
+        if (node->IsValue())
+        {
+            assert(node->OperIsPutArg() || node->OperIsFieldList());
+            MoveCFGCallArg(call, node);
+        }
+    }
+
+    for (CallArg& arg : call->gtArgs.LateArgs())
+    {
+        GenTree* node = arg.GetLateNode();
+        assert(node->OperIsPutArg() || node->OperIsFieldList());
+        MoveCFGCallArg(call, node);
+    }
+}
+
 #ifndef TARGET_64BIT
 //------------------------------------------------------------------------
 // Lowering::DecomposeLongCompare: Decomposes a TYP_LONG compare node.
@@ -3549,7 +3894,7 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
 #ifdef TARGET_XARCH
                  || IsContainableMemoryOp(castOp)
 #endif
-                     );
+                );
 
             if (removeCast)
             {
@@ -4433,10 +4778,10 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
             }
             convertToStoreObj = false;
 #else  // TARGET_ARM64
-            // This optimization on arm64 allows more SIMD16 vars to be enregistered but it could cause
-            // regressions when there are many calls and before/after each one we have to store/save the upper
-            // half of these registers. So enable this for arm64 only when LSRA is taught not to allocate registers when
-            // it would have to spilled too many times.
+       // This optimization on arm64 allows more SIMD16 vars to be enregistered but it could cause
+       // regressions when there are many calls and before/after each one we have to store/save the upper
+       // half of these registers. So enable this for arm64 only when LSRA is taught not to allocate registers when
+       // it would have to spilled too many times.
             convertToStoreObj = true;
 #endif // TARGET_ARM64
         }
@@ -4455,7 +4800,6 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
 
             addr->gtFlags |= lclStore->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG);
 
-            // Create the assignment node.
             lclStore->ChangeOper(GT_STORE_BLK);
             GenTreeBlk* objStore = lclStore->AsBlk();
             objStore->gtFlags    = GTF_ASG | GTF_IND_NONFAULTING | GTF_IND_TGT_NOT_HEAP;
@@ -4753,8 +5097,8 @@ void Lowering::LowerCallStruct(GenTreeCall* call)
                     break;
                 }
 #endif // FEATURE_SIMD
-                // importer has a separate mechanism to retype calls to helpers,
-                // keep it for now.
+       // importer has a separate mechanism to retype calls to helpers,
+       // keep it for now.
                 assert(user->TypeIs(TYP_REF) || (user->TypeIs(TYP_I_IMPL) && comp->IsTargetAbi(CORINFO_NATIVEAOT_ABI)));
                 assert(call->IsHelperCall());
                 assert(returnType == user->TypeGet());
@@ -5329,7 +5673,6 @@ void Lowering::InsertPInvokeMethodProlog()
     // On 32-bit targets, CORINFO_HELP_INIT_PINVOKE_FRAME initializes the PInvoke frame and then pushes it onto
     // the current thread's Frame stack. On 64-bit targets, it only initializes the PInvoke frame.
     // As a result, don't push the frame onto the frame stack here for any 64-bit targets
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef TARGET_64BIT
 #ifdef USE_PER_FRAME_PINVOKE_INIT
@@ -5394,7 +5737,6 @@ void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTree*
 
     // Pop the frame if necessary. This always happens in the epilog on 32-bit targets. For 64-bit targets, we only do
     // this in the epilog for IL stubs; for non-IL stubs the frame is popped after every PInvoke call.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef USE_PER_FRAME_PINVOKE_INIT
     // For IL stubs, we push the frame once even when we're doing per-pinvoke init
@@ -5544,7 +5886,6 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
 
     // Push the PInvoke frame if necessary. On 32-bit targets this only happens in the method prolog if a method
     // contains PInvokes; on 64-bit targets this is necessary in non-stubs.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef USE_PER_FRAME_PINVOKE_INIT
     if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
@@ -5622,7 +5963,6 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
 
     // Pop the frame if necessary. On 32-bit targets this only happens in the method epilog; on 64-bit targets
     // this happens after every PInvoke call in non-stubs. 32-bit targets instead mark the frame as inactive.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef USE_PER_FRAME_PINVOKE_INIT
     if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
@@ -5793,6 +6133,26 @@ GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call)
         InsertPInvokeCallEpilog(call);
     }
 
+#ifdef SWIFT_SUPPORT
+    // For Swift calls that require error handling, ensure the GT_SWIFT_ERROR node
+    // that consumes the error register is the call node's successor.
+    // This is to simplify logic for marking the error register as busy in LSRA.
+    if (call->HasSwiftErrorHandling())
+    {
+        GenTree* swiftErrorNode = call->gtNext;
+        assert(swiftErrorNode != nullptr);
+
+        while (!swiftErrorNode->OperIs(GT_SWIFT_ERROR))
+        {
+            swiftErrorNode = swiftErrorNode->gtNext;
+            assert(swiftErrorNode != nullptr);
+        }
+
+        BlockRange().Remove(swiftErrorNode);
+        BlockRange().InsertAfter(call, swiftErrorNode);
+    }
+#endif // SWIFT_SUPPORT
+
     return result;
 }
 
@@ -6596,7 +6956,6 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
         // On ARM64 we will use a 32x32->64 bit multiply instead of a 64x64->64 one.
         bool widenToNativeIntForMul = (type != TYP_I_IMPL) && !simpleMul;
 #else
-        CLANG_FORMAT_COMMENT_ANCHOR;
         bool widenToNativeIntForMul = (type != TYP_I_IMPL);
 #endif
 
@@ -6795,7 +7154,7 @@ bool Lowering::TryLowerConstIntDivOrMod(GenTree* node, GenTree** nextNode)
     }
 
     size_t absDivisorValue =
-        (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) : static_cast<size_t>(abs(divisorValue));
+        (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) : static_cast<size_t>(std::abs(divisorValue));
 
     if (!isPow2(absDivisorValue))
     {
@@ -7728,7 +8087,7 @@ void Lowering::ContainCheckNode(GenTree* node)
 #if FEATURE_ARG_SPLIT
         case GT_PUTARG_SPLIT:
 #endif // FEATURE_ARG_SPLIT
-            // The regNum must have been set by the lowering of the call.
+       // The regNum must have been set by the lowering of the call.
             assert(node->GetRegNum() != REG_NA);
             break;
 #ifdef TARGET_XARCH
@@ -7844,6 +8203,141 @@ void Lowering::ContainCheckBitCast(GenTree* node)
     }
 }
 
+//------------------------------------------------------------------------
+// LowerBlockStoreAsHelperCall: Lower a block store node as a memset/memcpy call
+//
+// Arguments:
+//    blkNode - The block store node to lower
+//
+void Lowering::LowerBlockStoreAsHelperCall(GenTreeBlk* blkNode)
+{
+    // We shouldn't be using helper calls for blocks on heap containing GC pointers.
+    // due to atomicity guarantees.
+    assert(!blkNode->IsZeroingGcPointersOnHeap());
+
+    LIR::Use use;
+    assert(!BlockRange().TryGetUse(blkNode, &use));
+
+    const bool isVolatile = blkNode->IsVolatile();
+
+    GenTree* dest = blkNode->Addr();
+    GenTree* data = blkNode->Data();
+    GenTree* size;
+
+    CorInfoHelpFunc helper;
+
+    // Is it Memset ...
+    if (blkNode->OperIsInitBlkOp())
+    {
+        helper = CORINFO_HELP_MEMSET;
+
+        // Drop GT_INIT_VAL nodes
+        if (data->OperIsInitVal())
+        {
+            BlockRange().Remove(data);
+            data = data->gtGetOp1();
+        }
+    }
+    else
+    {
+        // ... or Memcpy?
+        helper = CORINFO_HELP_MEMCPY;
+
+        if (data->OperIs(GT_IND))
+        {
+            // Drop GT_IND nodes
+            BlockRange().Remove(data);
+            data = data->AsIndir()->Addr();
+        }
+        else
+        {
+            assert(data->OperIs(GT_LCL_VAR, GT_LCL_FLD));
+
+            // Convert local to LCL_ADDR
+            unsigned lclOffset = data->AsLclVarCommon()->GetLclOffs();
+
+            data->ChangeOper(GT_LCL_ADDR);
+            data->ChangeType(TYP_I_IMPL);
+            data->AsLclFld()->SetLclOffs(lclOffset);
+            data->ClearContained();
+        }
+    }
+
+    // Size is a constant
+    size = comp->gtNewIconNode(blkNode->Size(), TYP_I_IMPL);
+    BlockRange().InsertBefore(data, size);
+
+    // A hacky way to safely call fgMorphTree in Lower
+    GenTree* destPlaceholder = comp->gtNewZeroConNode(dest->TypeGet());
+    GenTree* dataPlaceholder = comp->gtNewZeroConNode(genActualType(data));
+    GenTree* sizePlaceholder = comp->gtNewZeroConNode(genActualType(size));
+
+    const bool isMemzero = helper == CORINFO_HELP_MEMSET ? data->IsIntegralConst(0) : false;
+
+    GenTreeCall* call;
+    if (isMemzero)
+    {
+        BlockRange().Remove(data);
+        call = comp->gtNewHelperCallNode(CORINFO_HELP_MEMZERO, TYP_VOID, destPlaceholder, sizePlaceholder);
+    }
+    else
+    {
+        call = comp->gtNewHelperCallNode(helper, TYP_VOID, destPlaceholder, dataPlaceholder, sizePlaceholder);
+    }
+    comp->fgMorphArgs(call);
+
+    LIR::Range range      = LIR::SeqTree(comp, call);
+    GenTree*   rangeStart = range.FirstNode();
+    GenTree*   rangeEnd   = range.LastNode();
+
+    BlockRange().InsertBefore(blkNode, std::move(range));
+    blkNode->gtBashToNOP();
+
+    LIR::Use destUse;
+    LIR::Use sizeUse;
+    BlockRange().TryGetUse(destPlaceholder, &destUse);
+    BlockRange().TryGetUse(sizePlaceholder, &sizeUse);
+    destUse.ReplaceWith(dest);
+    sizeUse.ReplaceWith(size);
+    destPlaceholder->SetUnusedValue();
+    sizePlaceholder->SetUnusedValue();
+
+    if (!isMemzero)
+    {
+        LIR::Use dataUse;
+        BlockRange().TryGetUse(dataPlaceholder, &dataUse);
+        dataUse.ReplaceWith(data);
+        dataPlaceholder->SetUnusedValue();
+    }
+
+    LowerRange(rangeStart, rangeEnd);
+
+    // Finally move all GT_PUTARG_* nodes
+    // Re-use the existing logic for CFG call args here
+    MoveCFGCallArgs(call);
+
+    BlockRange().Remove(destPlaceholder);
+    BlockRange().Remove(sizePlaceholder);
+    if (!isMemzero)
+    {
+        BlockRange().Remove(dataPlaceholder);
+    }
+
+// Wrap with memory barriers on weak memory models
+// if the block store was volatile
+#ifndef TARGET_XARCH
+    if (isVolatile)
+    {
+        GenTree* firstBarrier  = comp->gtNewMemoryBarrier();
+        GenTree* secondBarrier = comp->gtNewMemoryBarrier(/*loadOnly*/ true);
+        BlockRange().InsertBefore(call, firstBarrier);
+        BlockRange().InsertAfter(call, secondBarrier);
+        LowerNode(firstBarrier);
+        LowerNode(secondBarrier);
+    }
+#endif
+}
+
 struct StoreCoalescingData
 {
     var_types targetType;
@@ -8306,7 +8800,7 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
     //
     const bool isContainable = IsInvariantInRange(ind->Addr(), ind);
 #else
-    const bool     isContainable         = true;
+    const bool isContainable = true;
 #endif
     TryCreateAddrMode(ind->Addr(), isContainable, ind);
 
@@ -8363,14 +8857,13 @@ GenTree* Lowering::LowerIndir(GenTreeIndir* ind)
         // TODO-Cleanup: We're passing isContainable = true but ContainCheckIndir rejects
         // address containment in some cases so we end up creating trivial (reg + offfset)
         // or (reg + reg) LEAs that are not necessary.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_ARM64)
         // Verify containment safety before creating an LEA that must be contained.
         //
         const bool isContainable = IsInvariantInRange(ind->Addr(), ind);
 #else
-        const bool isContainable         = true;
+        const bool isContainable = true;
 #endif
 
         TryCreateAddrMode(ind->Addr(), isContainable, ind);
@@ -8457,7 +8950,7 @@ bool Lowering::OptimizeForLdp(GenTreeIndir* ind)
 
         JITDUMP("[%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n",
                 Compiler::dspTreeID(ind), Compiler::dspTreeID(prevIndir), (unsigned)offs, (unsigned)prev.Offset);
-        if (abs(offs - prev.Offset) == genTypeSize(ind))
+        if (std::abs(offs - prev.Offset) == genTypeSize(ind))
         {
             JITDUMP("  ..and they are amenable to ldp optimization\n");
             if (TryMakeIndirsAdjacent(prevIndir, ind))
@@ -8503,7 +8996,7 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi
 
         // We can reorder indirs with some calls, but introducing a LIR edge
         // that spans a call can introduce spills (or callee-saves).
-        if (cur->IsCall() || (cur->OperIsStoreBlk() && (cur->AsBlk()->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper)))
+        if (cur->IsCall())
         {
             JITDUMP("  ..but they are separated by node [%06u] that kills registers\n", Compiler::dspTreeID(cur));
             return false;
@@ -8736,6 +9229,36 @@ void Lowering::UnmarkTree(GenTree* node)
 
 #endif // TARGET_ARM64
 
+//------------------------------------------------------------------------
+// IsContainableLclAddr: Can a given local address be contained?
+//
+// Most local addresses can be contained, however, there are two edge cases
+// where this is not true:
+// 1. When the resulting memory access will go beyond the local's location.
+// 2. When the resulting access may go past a UINT16_MAX.
+// Both of these requirements are imposed by the emitter.
+//
+// Arguments:
+//    lclAddr    - The local address node
+//    accessSize - The access size (of an indirection)
+//
+// Return Value:
+//    Whether an indirection of "accessSize" may contain "lclAddr".
+//
+bool Lowering::IsContainableLclAddr(GenTreeLclFld* lclAddr, unsigned accessSize) const
+{
+    if (CheckedOps::AddOverflows<int32_t>(lclAddr->GetLclOffs(), accessSize, CheckedOps::Unsigned) ||
+        !comp->IsValidLclAddr(lclAddr->GetLclNum(), lclAddr->GetLclOffs() + accessSize - 1))
+    {
+        // We depend on containment for correctness of liveness updates in codegen. Therefore, all
+        // locals that may "return false" here MUST be address-exposed. Local morph ensures this.
+        assert(comp->lvaGetDesc(lclAddr)->IsAddressExposed());
+        return false;
+    }
+
+    return true;
+}
+
 //------------------------------------------------------------------------
 // TransformUnusedIndirection: change the opcode and the type of the unused indirection.
 //
@@ -8771,7 +9294,7 @@ void Lowering::TransformUnusedIndirection(GenTreeIndir* ind, Compiler* comp, Bas
 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
     bool useNullCheck = true;
 #elif defined(TARGET_ARM)
-    bool           useNullCheck          = false;
+    bool useNullCheck = false;
 #else  // TARGET_XARCH
     bool useNullCheck = !ind->Addr()->isContained();
     ind->ClearDontExtend();
@@ -8840,7 +9363,6 @@ void Lowering::LowerLclHeap(GenTree* node)
                     GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, heapLcl, zero, comp->typGetBlkLayout((unsigned)alignedSize));
                 storeBlk->gtFlags |= (GTF_IND_UNALIGNED | GTF_ASG | GTF_EXCEPT | GTF_GLOB_REF);
                 BlockRange().InsertAfter(use.Def(), heapLcl, zero, storeBlk);
-                LowerNode(storeBlk);
             }
             else
             {
@@ -8861,13 +9383,10 @@ void Lowering::LowerLclHeap(GenTree* node)
 //
 void Lowering::LowerBlockStoreCommon(GenTreeBlk* blkNode)
 {
-    assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK));
+    assert(blkNode->OperIs(GT_STORE_BLK));
 
     if (blkNode->ContainsReferences() && !blkNode->OperIsCopyBlkOp())
     {
-        // Make sure we don't use GT_STORE_DYN_BLK
-        assert(blkNode->OperIs(GT_STORE_BLK));
-
         // and we only zero it (and that zero is better to be not hoisted/CSE'd)
         assert(blkNode->Data()->IsIntegralConst(0));
     }
@@ -8903,17 +9422,12 @@ void Lowering::LowerBlockStoreCommon(GenTreeBlk* blkNode)
 //
 bool Lowering::TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode)
 {
-    assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK));
+    assert(blkNode->OperIs(GT_STORE_BLK));
     if (!comp->opts.OptimizationEnabled())
     {
         return false;
     }
 
-    if (blkNode->OperIs(GT_STORE_DYN_BLK))
-    {
-        return false;
-    }
-
     var_types regType = blkNode->GetLayout()->GetRegisterType();
     if (regType == TYP_UNDEF)
     {
@@ -8999,14 +9513,14 @@ void Lowering::TryRetypingFloatingPointStoreToIntegerStore(GenTree* store)
         return;
     }
 
-    GenTree* data = store->Data();
-    assert(store->TypeGet() == data->TypeGet());
+    GenTree* value = store->Data();
+    assert(store->TypeGet() == value->TypeGet());
 
     // Optimize *x = DCON to *x = ICON which can be slightly faster and/or smaller.
     //
-    if (data->IsCnsFltOrDbl())
+    if (value->IsCnsFltOrDbl())
     {
-        double    dblCns = data->AsDblCon()->DconValue();
+        double    dblCns = value->AsDblCon()->DconValue();
         ssize_t   intCns = 0;
         var_types type   = TYP_UNKNOWN;
         // XARCH: we can always contain the immediates.
@@ -9014,12 +9528,11 @@ void Lowering::TryRetypingFloatingPointStoreToIntegerStore(GenTree* store)
         //        section and it is not a clear win to switch them to inline integers.
         // ARM:   FP constants are assembled from integral ones, so it is always profitable
         //        to directly use the integers as it avoids the int -> float conversion.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_XARCH) || defined(TARGET_ARM)
         bool shouldSwitchToInteger = true;
 #else // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
-        bool       shouldSwitchToInteger = FloatingPointUtils::isPositiveZero(dblCns);
+        bool shouldSwitchToInteger = FloatingPointUtils::isPositiveZero(dblCns);
 #endif
 
         if (shouldSwitchToInteger)
@@ -9042,7 +9555,7 @@ void Lowering::TryRetypingFloatingPointStoreToIntegerStore(GenTree* store)
 
         if (type != TYP_UNKNOWN)
         {
-            data->BashToConst(intCns, type);
+            value->BashToConst(intCns, type);
 
             assert(!store->OperIsLocalStore() || comp->lvaGetDesc(store->AsLclVarCommon())->lvDoNotEnregister);
             if (store->OperIs(GT_STORE_LCL_VAR))
diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h
index 3812d3c412eb..4813fb9a7d4a 100644
--- a/src/coreclr/jit/lower.h
+++ b/src/coreclr/jit/lower.h
@@ -88,14 +88,14 @@ class Lowering final : public Phase
     void ContainCheckLclHeap(GenTreeOp* node);
     void ContainCheckRet(GenTreeUnOp* ret);
 #ifdef TARGET_ARM64
-    bool TryLowerAndOrToCCMP(GenTreeOp* tree, GenTree** next);
+    bool      TryLowerAndOrToCCMP(GenTreeOp* tree, GenTree** next);
     insCflags TruthifyingFlags(GenCondition cond);
-    void ContainCheckConditionalCompare(GenTreeCCMP* ccmp);
-    void ContainCheckNeg(GenTreeOp* neg);
-    void TryLowerCnsIntCselToCinc(GenTreeOp* select, GenTree* cond);
-    void TryLowerCselToCSOp(GenTreeOp* select, GenTree* cond);
-    bool TryLowerAddSubToMulLongOp(GenTreeOp* op, GenTree** next);
-    bool TryLowerNegToMulLongOp(GenTreeOp* op, GenTree** next);
+    void      ContainCheckConditionalCompare(GenTreeCCMP* ccmp);
+    void      ContainCheckNeg(GenTreeOp* neg);
+    void      TryLowerCnsIntCselToCinc(GenTreeOp* select, GenTree* cond);
+    void      TryLowerCselToCSOp(GenTreeOp* select, GenTree* cond);
+    bool      TryLowerAddSubToMulLongOp(GenTreeOp* op, GenTree** next);
+    bool      TryLowerNegToMulLongOp(GenTreeOp* op, GenTree** next);
 #endif
     void ContainCheckSelect(GenTreeOp* select);
     void ContainCheckBitCast(GenTree* node);
@@ -114,7 +114,7 @@ class Lowering final : public Phase
     void ContainCheckIntrinsic(GenTreeOp* node);
 #endif // TARGET_XARCH
 #ifdef FEATURE_HW_INTRINSICS
-    void ContainCheckHWIntrinsicAddr(GenTreeHWIntrinsic* node, GenTree* addr);
+    void ContainCheckHWIntrinsicAddr(GenTreeHWIntrinsic* node, GenTree* addr, unsigned size);
     void ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node);
 #ifdef TARGET_XARCH
     void TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode, GenTreeVecCon* childNode);
@@ -129,7 +129,7 @@ class Lowering final : public Phase
     static bool CheckBlock(Compiler* compiler, BasicBlock* block);
 #endif // DEBUG
 
-    void LowerBlock(BasicBlock* block);
+    void     LowerBlock(BasicBlock* block);
     GenTree* LowerNode(GenTree* node);
 
     bool IsCFGCallArgInvariantInRange(GenTree* node, GenTree* endExclusive);
@@ -138,26 +138,28 @@ class Lowering final : public Phase
     // Call Lowering
     // ------------------------------
     GenTree* LowerCall(GenTree* call);
-    bool LowerCallMemmove(GenTreeCall* call, GenTree** next);
-    bool LowerCallMemcmp(GenTreeCall* call, GenTree** next);
-    void LowerCFGCall(GenTreeCall* call);
-    void MoveCFGCallArg(GenTreeCall* call, GenTree* node);
+    bool     LowerCallMemmove(GenTreeCall* call, GenTree** next);
+    bool     LowerCallMemcmp(GenTreeCall* call, GenTree** next);
+    bool     LowerCallMemset(GenTreeCall* call, GenTree** next);
+    void     LowerCFGCall(GenTreeCall* call);
+    void     MoveCFGCallArgs(GenTreeCall* call);
+    void     MoveCFGCallArg(GenTreeCall* call, GenTree* node);
 #ifndef TARGET_64BIT
     GenTree* DecomposeLongCompare(GenTree* cmp);
 #endif
-    GenTree* OptimizeConstCompare(GenTree* cmp);
-    GenTree* LowerCompare(GenTree* cmp);
-    GenTree* LowerJTrue(GenTreeOp* jtrue);
-    GenTree* LowerSelect(GenTreeConditional* cond);
-    bool TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* code);
+    GenTree*   OptimizeConstCompare(GenTree* cmp);
+    GenTree*   LowerCompare(GenTree* cmp);
+    GenTree*   LowerJTrue(GenTreeOp* jtrue);
+    GenTree*   LowerSelect(GenTreeConditional* cond);
+    bool       TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* code);
     GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition);
-    void LowerJmpMethod(GenTree* jmp);
-    void LowerRet(GenTreeUnOp* ret);
-    void LowerStoreLocCommon(GenTreeLclVarCommon* lclVar);
-    void LowerRetStruct(GenTreeUnOp* ret);
-    void LowerRetSingleRegStructLclVar(GenTreeUnOp* ret);
-    void LowerCallStruct(GenTreeCall* call);
-    void LowerStoreSingleRegCallStruct(GenTreeBlk* store);
+    void       LowerJmpMethod(GenTree* jmp);
+    void       LowerRet(GenTreeUnOp* ret);
+    void       LowerStoreLocCommon(GenTreeLclVarCommon* lclVar);
+    void       LowerRetStruct(GenTreeUnOp* ret);
+    void       LowerRetSingleRegStructLclVar(GenTreeUnOp* ret);
+    void       LowerCallStruct(GenTreeCall* call);
+    void       LowerStoreSingleRegCallStruct(GenTreeBlk* store);
 #if !defined(WINDOWS_AMD64_ABI)
     GenTreeLclVar* SpillStructCallResult(GenTreeCall* call) const;
 #endif // WINDOWS_AMD64_ABI
@@ -166,29 +168,29 @@ class Lowering final : public Phase
     GenTree* LowerDirectCall(GenTreeCall* call);
     GenTree* LowerNonvirtPinvokeCall(GenTreeCall* call);
     GenTree* LowerTailCallViaJitHelper(GenTreeCall* callNode, GenTree* callTarget);
-    void LowerFastTailCall(GenTreeCall* callNode);
-    void RehomeArgForFastTailCall(unsigned int lclNum,
-                                  GenTree*     insertTempBefore,
-                                  GenTree*     lookForUsesStart,
-                                  GenTreeCall* callNode);
-    void InsertProfTailCallHook(GenTreeCall* callNode, GenTree* insertionPoint);
+    void     LowerFastTailCall(GenTreeCall* callNode);
+    void     RehomeArgForFastTailCall(unsigned int lclNum,
+                                      GenTree*     insertTempBefore,
+                                      GenTree*     lookForUsesStart,
+                                      GenTreeCall* callNode);
+    void     InsertProfTailCallHook(GenTreeCall* callNode, GenTree* insertionPoint);
     GenTree* FindEarliestPutArg(GenTreeCall* call);
-    size_t MarkPutArgNodes(GenTree* node);
+    size_t   MarkPutArgNodes(GenTree* node);
     GenTree* LowerVirtualVtableCall(GenTreeCall* call);
     GenTree* LowerVirtualStubCall(GenTreeCall* call);
-    void LowerArgsForCall(GenTreeCall* call);
-    void ReplaceArgWithPutArgOrBitcast(GenTree** ppChild, GenTree* newNode);
+    void     LowerArgsForCall(GenTreeCall* call);
+    void     ReplaceArgWithPutArgOrBitcast(GenTree** ppChild, GenTree* newNode);
     GenTree* NewPutArg(GenTreeCall* call, GenTree* arg, CallArg* callArg, var_types type);
-    void LowerArg(GenTreeCall* call, CallArg* callArg, bool late);
+    void     LowerArg(GenTreeCall* call, CallArg* callArg, bool late);
 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
     GenTree* LowerFloatArg(GenTree** pArg, CallArg* callArg);
     GenTree* LowerFloatArgReg(GenTree* arg, regNumber regNum);
 #endif
 
-    void InsertPInvokeCallProlog(GenTreeCall* call);
-    void InsertPInvokeCallEpilog(GenTreeCall* call);
-    void InsertPInvokeMethodProlog();
-    void InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTree* lastExpr));
+    void     InsertPInvokeCallProlog(GenTreeCall* call);
+    void     InsertPInvokeCallEpilog(GenTreeCall* call);
+    void     InsertPInvokeMethodProlog();
+    void     InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTree* lastExpr));
     GenTree* SetGCState(int cns);
     GenTree* CreateReturnTrapSeq();
     enum FrameLinkAction
@@ -240,16 +242,16 @@ class Lowering final : public Phase
         GenTree* oldUseNode = use.Def();
         if ((oldUseNode->gtOper != GT_LCL_VAR) || (tempNum != BAD_VAR_NUM))
         {
-            GenTree* assign;
-            use.ReplaceWithLclVar(comp, tempNum, &assign);
+            GenTree* store;
+            use.ReplaceWithLclVar(comp, tempNum, &store);
 
             GenTree* newUseNode = use.Def();
             ContainCheckRange(oldUseNode->gtNext, newUseNode);
 
-            // We need to lower the LclVar and assignment since there may be certain
+            // We need to lower the LclVar and store since there may be certain
             // types or scenarios, such as TYP_SIMD12, that need special handling
 
-            LowerNode(assign);
+            LowerNode(store);
             LowerNode(newUseNode);
 
             return newUseNode->AsLclVar();
@@ -314,30 +316,31 @@ class Lowering final : public Phase
 #endif // defined(TARGET_XARCH)
 
     // Per tree node member functions
-    void LowerStoreIndirCommon(GenTreeStoreInd* ind);
+    void     LowerStoreIndirCommon(GenTreeStoreInd* ind);
     GenTree* LowerIndir(GenTreeIndir* ind);
-    bool OptimizeForLdp(GenTreeIndir* ind);
-    bool TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir);
-    void MarkTree(GenTree* root);
-    void UnmarkTree(GenTree* root);
-    void LowerStoreIndir(GenTreeStoreInd* node);
-    void LowerStoreIndirCoalescing(GenTreeStoreInd* node);
+    bool     OptimizeForLdp(GenTreeIndir* ind);
+    bool     TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir);
+    void     MarkTree(GenTree* root);
+    void     UnmarkTree(GenTree* root);
+    void     LowerStoreIndir(GenTreeStoreInd* node);
+    void     LowerStoreIndirCoalescing(GenTreeStoreInd* node);
     GenTree* LowerAdd(GenTreeOp* node);
     GenTree* LowerMul(GenTreeOp* mul);
-    bool TryLowerAndNegativeOne(GenTreeOp* node, GenTree** nextNode);
+    bool     TryLowerAndNegativeOne(GenTreeOp* node, GenTree** nextNode);
     GenTree* LowerBinaryArithmetic(GenTreeOp* binOp);
-    bool LowerUnsignedDivOrMod(GenTreeOp* divMod);
-    bool TryLowerConstIntDivOrMod(GenTree* node, GenTree** nextNode);
+    bool     LowerUnsignedDivOrMod(GenTreeOp* divMod);
+    bool     TryLowerConstIntDivOrMod(GenTree* node, GenTree** nextNode);
     GenTree* LowerSignedDivOrMod(GenTree* node);
-    void LowerBlockStore(GenTreeBlk* blkNode);
-    void LowerBlockStoreCommon(GenTreeBlk* blkNode);
-    void LowerLclHeap(GenTree* node);
-    void ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr, GenTree* addrParent);
-    void LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode);
+    void     LowerBlockStore(GenTreeBlk* blkNode);
+    void     LowerBlockStoreCommon(GenTreeBlk* blkNode);
+    void     LowerBlockStoreAsHelperCall(GenTreeBlk* blkNode);
+    void     LowerLclHeap(GenTree* node);
+    void     ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr, GenTree* addrParent);
+    void     LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode);
     GenTree* LowerArrLength(GenTreeArrCommon* node);
 
 #ifdef TARGET_XARCH
-    void LowerPutArgStk(GenTreePutArgStk* putArgStk);
+    void     LowerPutArgStk(GenTreePutArgStk* putArgStk);
     GenTree* TryLowerMulWithConstant(GenTreeOp* node);
 #endif // TARGET_XARCH
 
@@ -348,10 +351,14 @@ class Lowering final : public Phase
     void TryRetypingFloatingPointStoreToIntegerStore(GenTree* store);
 
     GenTree* LowerSwitch(GenTree* node);
-    bool TryLowerSwitchToBitTest(
-        BasicBlock* jumpTable[], unsigned jumpCount, unsigned targetCount, BasicBlock* bbSwitch, GenTree* switchValue);
+    bool     TryLowerSwitchToBitTest(FlowEdge*   jumpTable[],
+                                     unsigned    jumpCount,
+                                     unsigned    targetCount,
+                                     BasicBlock* bbSwitch,
+                                     GenTree*    switchValue,
+                                     weight_t    defaultLikelihood);
 
-    void LowerCast(GenTree* node);
+    GenTree* LowerCast(GenTree* node);
 
 #if !CPU_LOAD_STORE_ARCH
     bool IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd);
@@ -367,12 +374,12 @@ class Lowering final : public Phase
     void LowerShift(GenTreeOp* shift);
 #ifdef FEATURE_HW_INTRINSICS
     GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node);
-    void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition);
+    void     LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition);
     GenTree* LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp);
     GenTree* LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node);
     GenTree* LowerHWIntrinsicDot(GenTreeHWIntrinsic* node);
 #if defined(TARGET_XARCH)
-    void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node);
+    void     LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node);
     GenTree* LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node);
     GenTree* LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node);
     GenTree* LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node);
@@ -382,7 +389,7 @@ class Lowering final : public Phase
     GenTree* TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode);
     GenTree* TryLowerAndOpToAndNot(GenTreeOp* andNode);
     GenTree* TryLowerXorOpToGetMaskUpToLowestSetBit(GenTreeOp* xorNode);
-    void LowerBswapOp(GenTreeOp* node);
+    void     LowerBswapOp(GenTreeOp* node);
 #elif defined(TARGET_ARM64)
     bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);
     void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);
@@ -489,6 +496,8 @@ class Lowering final : public Phase
         return false;
     }
 
+    bool IsContainableLclAddr(GenTreeLclFld* lclAddr, unsigned accessSize) const;
+
 #ifdef TARGET_ARM64
     bool IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childNode) const;
 #endif // TARGET_ARM64
@@ -580,7 +589,9 @@ class Lowering final : public Phase
         target_ssize_t Offset;
 
         SavedIndir(GenTreeIndir* indir, GenTreeLclVar* addrBase, target_ssize_t offset)
-            : Indir(indir), AddrBase(addrBase), Offset(offset)
+            : Indir(indir)
+            , AddrBase(addrBase)
+            , Offset(offset)
         {
         }
     };
diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp
index 5c0acbbdc401..9731331885be 100644
--- a/src/coreclr/jit/lowerarmarch.cpp
+++ b/src/coreclr/jit/lowerarmarch.cpp
@@ -585,8 +585,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
             src = src->AsUnOp()->gtGetOp1();
         }
 
-        if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)) &&
-            src->OperIs(GT_CNS_INT))
+        if ((size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)) && src->OperIs(GT_CNS_INT))
         {
             blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
 
@@ -634,7 +633,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
         }
         else
         {
-            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+            LowerBlockStoreAsHelperCall(blkNode);
+            return;
         }
     }
     else
@@ -650,7 +650,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
         }
 
         ClassLayout* layout               = blkNode->GetLayout();
-        bool         doCpObj              = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
+        bool         doCpObj              = layout->HasGCPtr();
         unsigned     copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
 
         if (doCpObj && (size <= copyBlockUnrollLimit))
@@ -685,9 +685,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
         }
         else
         {
-            assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK));
-
-            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+            assert(blkNode->OperIs(GT_STORE_BLK));
+            LowerBlockStoreAsHelperCall(blkNode);
         }
     }
 }
@@ -706,7 +705,7 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT
     assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll));
     assert(size < INT32_MAX);
 
-    if (addr->OperIs(GT_LCL_ADDR))
+    if (addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), size))
     {
         addr->SetContained();
         return;
@@ -727,7 +726,7 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT
     {
         return;
     }
-#else // !TARGET_ARM
+#else  // !TARGET_ARM
     if ((ClrSafeInt<int>(offset) + ClrSafeInt<int>(size)).IsOverflow())
     {
         return;
@@ -779,7 +778,7 @@ void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode)
 //    tree - GT_CAST node to be lowered
 //
 // Return Value:
-//    None.
+//    nextNode to be lowered if tree is modified else returns nullptr
 //
 // Notes:
 //    Casts from float/double to a smaller int type are transformed as follows:
@@ -792,7 +791,7 @@ void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode)
 //    don't expect to see them here.
 //    i) GT_CAST(float/double, int type with overflow detection)
 //
-void Lowering::LowerCast(GenTree* tree)
+GenTree* Lowering::LowerCast(GenTree* tree)
 {
     assert(tree->OperGet() == GT_CAST);
 
@@ -815,6 +814,8 @@ void Lowering::LowerCast(GenTree* tree)
 
     // Now determine if we have operands that should be contained.
     ContainCheckCast(tree->AsCast());
+
+    return nullptr;
 }
 
 //------------------------------------------------------------------------
@@ -2061,7 +2062,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)
             MakeSrcContained(indirNode, addr);
         }
     }
-    else if (addr->OperIs(GT_LCL_ADDR))
+    else if (addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), indirNode->Size()))
     {
         // These nodes go into an addr mode:
         // - GT_LCL_ADDR is a stack addr mode.
@@ -3188,6 +3189,24 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                 break;
             }
 
+            case NI_Sve_CreateTrueMaskByte:
+            case NI_Sve_CreateTrueMaskDouble:
+            case NI_Sve_CreateTrueMaskInt16:
+            case NI_Sve_CreateTrueMaskInt32:
+            case NI_Sve_CreateTrueMaskInt64:
+            case NI_Sve_CreateTrueMaskSByte:
+            case NI_Sve_CreateTrueMaskSingle:
+            case NI_Sve_CreateTrueMaskUInt16:
+            case NI_Sve_CreateTrueMaskUInt32:
+            case NI_Sve_CreateTrueMaskUInt64:
+                assert(hasImmediateOperand);
+                assert(varTypeIsIntegral(intrin.op1));
+                if (intrin.op1->IsCnsIntOrI())
+                {
+                    MakeSrcContained(node, intrin.op1);
+                }
+                break;
+
             default:
                 unreached();
         }
diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
index 5110442eda10..4e826be0b225 100644
--- a/src/coreclr/jit/lowerloongarch64.cpp
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -296,8 +296,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
             src = src->AsUnOp()->gtGetOp1();
         }
 
-        if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)) &&
-            src->OperIs(GT_CNS_INT))
+        if ((size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)) && src->OperIs(GT_CNS_INT))
         {
             blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
 
@@ -334,7 +333,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
         }
         else
         {
-            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+            LowerBlockStoreAsHelperCall(blkNode);
+            return;
         }
     }
     else
@@ -350,7 +350,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
         }
 
         ClassLayout* layout               = blkNode->GetLayout();
-        bool         doCpObj              = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
+        bool         doCpObj              = layout->HasGCPtr();
         unsigned     copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
 
         if (doCpObj && (size <= copyBlockUnrollLimit))
@@ -386,9 +386,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
         }
         else
         {
-            assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK));
-
-            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+            assert(blkNode->OperIs(GT_STORE_BLK));
+            LowerBlockStoreAsHelperCall(blkNode);
         }
     }
 }
@@ -408,7 +407,7 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT
     assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll));
     assert(size < INT32_MAX);
 
-    if (addr->OperIs(GT_LCL_ADDR))
+    if (addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), size))
     {
         addr->SetContained();
         return;
@@ -486,7 +485,8 @@ void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode)
         }
 
         // Codegen supports containment of local addresses under BLKs.
-        if (src->OperIs(GT_BLK) && src->AsBlk()->Addr()->IsLclVarAddr())
+        if (src->OperIs(GT_BLK) && src->AsBlk()->Addr()->IsLclVarAddr() &&
+            IsContainableLclAddr(src->AsBlk()->Addr()->AsLclFld(), src->AsBlk()->Size()))
         {
             // TODO-LOONGARCH64-CQ: support containment of LCL_ADDR with non-zero offset too.
             MakeSrcContained(src, src->AsBlk()->Addr());
@@ -515,7 +515,7 @@ void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode)
 //    i) GT_CAST(float/double, int type with overflow detection)
 //
 
-void Lowering::LowerCast(GenTree* tree)
+GenTree* Lowering::LowerCast(GenTree* tree)
 {
     assert(tree->OperGet() == GT_CAST);
 
@@ -538,6 +538,8 @@ void Lowering::LowerCast(GenTree* tree)
 
     // Now determine if we have operands that should be contained.
     ContainCheckCast(tree->AsCast());
+
+    return nullptr;
 }
 
 //------------------------------------------------------------------------
@@ -705,7 +707,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)
     {
         MakeSrcContained(indirNode, addr);
     }
-    else if (addr->OperIs(GT_LCL_ADDR))
+    else if (addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), indirNode->Size()))
     {
         // These nodes go into an addr mode:
         // - GT_LCL_ADDR is a stack addr mode.
diff --git a/src/coreclr/jit/lowerriscv64.cpp b/src/coreclr/jit/lowerriscv64.cpp
index 4f60458fd251..aa8342ee1af5 100644
--- a/src/coreclr/jit/lowerriscv64.cpp
+++ b/src/coreclr/jit/lowerriscv64.cpp
@@ -245,7 +245,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
             src = src->AsUnOp()->gtGetOp1();
         }
 
-        if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= INITBLK_UNROLL_LIMIT) && src->OperIs(GT_CNS_INT))
+        if ((size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)) && src->OperIs(GT_CNS_INT))
         {
             blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
 
@@ -282,7 +282,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
         }
         else
         {
-            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+            LowerBlockStoreAsHelperCall(blkNode);
+            return;
         }
     }
     else
@@ -297,10 +298,11 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
             comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
         }
 
-        ClassLayout* layout  = blkNode->GetLayout();
-        bool         doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
+        ClassLayout* layout               = blkNode->GetLayout();
+        bool         doCpObj              = layout->HasGCPtr();
+        unsigned     copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
 
-        if (doCpObj && (size <= CPBLK_UNROLL_LIMIT))
+        if (doCpObj && (size <= copyBlockUnrollLimit))
         {
             // No write barriers are needed on the stack.
             // If the layout contains a byref, then we know it must live on the stack.
@@ -320,7 +322,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
             assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL));
             blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll;
         }
-        else if (blkNode->OperIs(GT_STORE_BLK) && (size <= CPBLK_UNROLL_LIMIT))
+        else if (blkNode->OperIs(GT_STORE_BLK) && (size <= copyBlockUnrollLimit))
         {
             blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
 
@@ -333,9 +335,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
         }
         else
         {
-            assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK));
-
-            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+            assert(blkNode->OperIs(GT_STORE_BLK));
+            LowerBlockStoreAsHelperCall(blkNode);
         }
     }
 }
@@ -354,7 +355,7 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT
     assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll));
     assert(size < INT32_MAX);
 
-    if (addr->OperIs(GT_LCL_ADDR))
+    if (addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), size))
     {
         addr->SetContained();
         return;
@@ -433,7 +434,7 @@ void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode)
 //    i) GT_CAST(float/double, int type with overflow detection)
 //
 
-void Lowering::LowerCast(GenTree* tree)
+GenTree* Lowering::LowerCast(GenTree* tree)
 {
     assert(tree->OperGet() == GT_CAST);
 
@@ -456,6 +457,8 @@ void Lowering::LowerCast(GenTree* tree)
 
     // Now determine if we have operands that should be contained.
     ContainCheckCast(tree->AsCast());
+
+    return nullptr;
 }
 
 //------------------------------------------------------------------------
@@ -615,7 +618,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)
     {
         MakeSrcContained(indirNode, addr);
     }
-    else if (addr->OperIs(GT_LCL_ADDR))
+    else if (addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), indirNode->Size()))
     {
         // These nodes go into an addr mode:
         // - GT_LCL_ADDR is a stack addr mode.
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index a7ba98cad90d..5a9b12ca4aa2 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -349,7 +349,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
             src = src->AsUnOp()->gtGetOp1();
         }
 
-        if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)))
+        if (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset))
         {
             if (!src->OperIs(GT_CNS_INT))
             {
@@ -407,14 +407,20 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
         else
         {
         TOO_BIG_TO_UNROLL:
+            if (blkNode->IsZeroingGcPointersOnHeap())
+            {
+                blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop;
+            }
+            else
+            {
 #ifdef TARGET_AMD64
-            blkNode->gtBlkOpKind =
-                blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper;
+                LowerBlockStoreAsHelperCall(blkNode);
+                return;
 #else
-            // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86
-            blkNode->gtBlkOpKind =
-                blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindRepInstr;
+                // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86
+                blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
 #endif
+            }
         }
     }
     else
@@ -430,7 +436,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
         }
 
         ClassLayout* layout               = blkNode->GetLayout();
-        bool         doCpObj              = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
+        bool         doCpObj              = layout->HasGCPtr();
         unsigned     copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy, false);
 
 #ifndef JIT32_GCENCODER
@@ -510,10 +516,11 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
         }
         else
         {
-            assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK));
+            assert(blkNode->OperIs(GT_STORE_BLK));
 
 #ifdef TARGET_AMD64
-            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+            LowerBlockStoreAsHelperCall(blkNode);
+            return;
 #else
             // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86
             blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
@@ -522,13 +529,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
     }
 
     assert(blkNode->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid);
-
-#ifndef TARGET_X86
-    if ((MIN_ARG_AREA_FOR_CALL > 0) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper))
-    {
-        RequireOutgoingArgSpace(blkNode, MIN_ARG_AREA_FOR_CALL);
-    }
-#endif
 }
 
 //------------------------------------------------------------------------
@@ -545,7 +545,7 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT
     assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll));
     assert(size < INT32_MAX);
 
-    if (addr->OperIs(GT_LCL_ADDR))
+    if (addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), size))
     {
         addr->SetContained();
         return;
@@ -690,13 +690,13 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
                 else
 #endif // TARGET_X86
                     if (loadSize <= comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy))
-                {
-                    putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Unroll;
-                }
-                else
-                {
-                    putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::RepInstr;
-                }
+                    {
+                        putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Unroll;
+                    }
+                    else
+                    {
+                        putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::RepInstr;
+                    }
             }
             else // There are GC pointers.
             {
@@ -767,7 +767,7 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
 #if defined(TARGET_AMD64)
         && !src->IsIntegralConst(0)
 #endif // TARGET_AMD64
-            )
+    )
     {
         MakeSrcContained(putArgStk, src);
     }
@@ -815,12 +815,13 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
  * system.windows.forms, scimark, fractals, bio mums). If we ever find evidence that
  * doing this optimization is a win, should consider generating in-lined code.
  */
-void Lowering::LowerCast(GenTree* tree)
+GenTree* Lowering::LowerCast(GenTree* tree)
 {
     assert(tree->OperGet() == GT_CAST);
 
     GenTree*  castOp     = tree->AsCast()->CastOp();
     var_types castToType = tree->CastToType();
+    var_types dstType    = castToType;
     var_types srcType    = castOp->TypeGet();
     var_types tmpType    = TYP_UNDEF;
 
@@ -843,7 +844,7 @@ void Lowering::LowerCast(GenTree* tree)
     if (varTypeIsFloating(srcType))
     {
         noway_assert(!tree->gtOverflow());
-        noway_assert(castToType != TYP_ULONG);
+        assert(castToType != TYP_ULONG || comp->IsBaselineVector512IsaSupportedDebugOnly());
     }
     else if (srcType == TYP_UINT)
     {
@@ -851,8 +852,321 @@ void Lowering::LowerCast(GenTree* tree)
     }
     else if (srcType == TYP_ULONG)
     {
-        assert(castToType != TYP_FLOAT || comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+        assert(castToType != TYP_FLOAT || comp->IsBaselineVector512IsaSupportedDebugOnly());
+    }
+
+#if defined(TARGET_AMD64)
+    // Handle saturation logic for X64
+    if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType) && !varTypeIsSmall(dstType))
+    {
+        // We should have filtered out float -> long conversion and
+        // converted it to float -> double -> long conversion.
+        assert((dstType != TYP_LONG) || (srcType != TYP_FLOAT));
+
+        // we should have handled overflow cases in morph itself
+        assert(!tree->gtOverflow());
+
+        CorInfoType fieldType  = (srcType == TYP_DOUBLE) ? CORINFO_TYPE_DOUBLE : CORINFO_TYPE_FLOAT;
+        GenTree*    castOutput = nullptr;
+        LIR::Use    castOpUse(BlockRange(), &(tree->AsCast()->CastOp()), tree);
+        ReplaceWithLclVar(castOpUse);
+        castOp = tree->AsCast()->CastOp();
+        /*The code below is to introduce saturating conversions on X86/X64.
+        The C# equivalence of the code is given below -->
+
+                // Replace QNaN and SNaN with Zero
+                op1 = Avx512F.Fixup(op1, op1, Vector128.Create<long>(0x88), 0);
+
+                // Convert from double to long, replacing any values that were greater than or equal to MaxValue
+        with MaxValue
+                // Values that were less than or equal to MinValue will already be MinValue
+                return Vector128.ConditionalSelect(
+                    Vector128.LessThan(op1, Vector128.Create<double>(long.MaxValue)).AsInt64(),
+                    Avx512DQ.VL.ConvertToVector128Int64(op1),
+                    Vector128.Create<long>(long.MaxValue)
+                );
+        */
+        if (comp->IsBaselineVector512IsaSupportedOpportunistically())
+        {
+            // Clone the cast operand for usage.
+            GenTree* op1Clone1 = comp->gtClone(castOp);
+            BlockRange().InsertAfter(castOp, op1Clone1);
+
+            // Generate the control table for VFIXUPIMMSD
+            // The behavior we want is to saturate negative values to 0.
+            GenTreeVecCon* tbl    = comp->gtNewVconNode(TYP_SIMD16);
+            tbl->gtSimdVal.i32[0] = (varTypeIsUnsigned(dstType)) ? 0x08080088 : 0x00000088;
+            BlockRange().InsertAfter(op1Clone1, tbl);
+
+            // get a zero int node for control table
+            GenTree* ctrlByte = comp->gtNewIconNode(0);
+            BlockRange().InsertAfter(tbl, ctrlByte);
+
+            if (varTypeIsUnsigned(dstType))
+            {
+                // run vfixupimmsd base on table and no flags reporting
+                GenTree* oper1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, castOp, op1Clone1, tbl, ctrlByte,
+                                                                NI_AVX512F_FixupScalar, fieldType, 16);
+                BlockRange().InsertAfter(ctrlByte, oper1);
+                LowerNode(oper1);
+
+                // Convert to scalar
+                // Here, we try to insert a Vector128 to Scalar node so that the input
+                // can be provided to the scalar cast
+                GenTree* oper2 = comp->gtNewSimdHWIntrinsicNode(srcType, oper1, NI_Vector128_ToScalar, fieldType, 16);
+                BlockRange().InsertAfter(oper1, oper2);
+                LowerNode(oper2);
+
+                castOutput = comp->gtNewCastNode(genActualType(dstType), oper2, false, dstType);
+                BlockRange().InsertAfter(oper2, castOutput);
+            }
+            else
+            {
+                CorInfoType destFieldType = (dstType == TYP_INT) ? CORINFO_TYPE_INT : CORINFO_TYPE_LONG;
+
+                ssize_t actualMaxVal = (dstType == TYP_INT) ? INT32_MAX : INT64_MAX;
+
+                // run vfixupimmsd base on table and no flags reporting
+                GenTree* fixupVal = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, castOp, op1Clone1, tbl, ctrlByte,
+                                                                   NI_AVX512F_FixupScalar, fieldType, 16);
+                BlockRange().InsertAfter(ctrlByte, fixupVal);
+                LowerNode(fixupVal);
+
+                // get the max value vector
+                GenTree* maxValScalar = (srcType == TYP_DOUBLE)
+                                            ? comp->gtNewDconNodeD(static_cast<double>(actualMaxVal))
+                                            : comp->gtNewDconNodeF(static_cast<float>(actualMaxVal));
+                GenTree* maxVal       = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, maxValScalar, fieldType, 16);
+                BlockRange().InsertAfter(fixupVal, maxVal);
+
+                GenTree* maxValDstTypeScalar = (dstType == TYP_INT) ? comp->gtNewIconNode(actualMaxVal, dstType)
+                                                                    : comp->gtNewLconNode(actualMaxVal);
+                GenTree* maxValDstType =
+                    comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, maxValDstTypeScalar, destFieldType, 16);
+                BlockRange().InsertAfter(maxVal, maxValDstType);
+
+                // usage 1 --> compare with max value of integer
+                GenTree* compMask = comp->gtNewSimdCmpOpNode(GT_GE, TYP_SIMD16, fixupVal, maxVal, fieldType, 16);
+                BlockRange().InsertAfter(maxValDstType, compMask);
+
+                // convert fixupVal to local variable and clone it for further use
+                LIR::Use fixupValUse(BlockRange(), &(compMask->AsHWIntrinsic()->Op(1)), compMask);
+                ReplaceWithLclVar(fixupValUse);
+                fixupVal               = compMask->AsHWIntrinsic()->Op(1);
+                GenTree* fixupValClone = comp->gtClone(fixupVal);
+                LowerNode(compMask);
+                BlockRange().InsertAfter(fixupVal, fixupValClone);
+
+                GenTree* FixupValCloneScalar =
+                    comp->gtNewSimdHWIntrinsicNode(srcType, fixupValClone, NI_Vector128_ToScalar, fieldType, 16);
+                BlockRange().InsertAfter(compMask, FixupValCloneScalar);
+                LowerNode(FixupValCloneScalar);
+
+                // cast it
+                GenTreeCast* newCast = comp->gtNewCastNode(dstType, FixupValCloneScalar, false, dstType);
+                BlockRange().InsertAfter(FixupValCloneScalar, newCast);
+
+                GenTree* newTree = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, newCast, destFieldType, 16);
+                BlockRange().InsertAfter(newCast, newTree);
+                LowerNode(newTree);
+
+                // usage 2 --> use thecompared mask with input value and max value to blend
+                GenTree* control = comp->gtNewIconNode(0xCA); // (B & A) | (C & ~A)
+                BlockRange().InsertAfter(newTree, control);
+                GenTree* cndSelect = comp->gtNewSimdTernaryLogicNode(TYP_SIMD16, compMask, maxValDstType, newTree,
+                                                                     control, destFieldType, 16);
+                BlockRange().InsertAfter(control, cndSelect);
+                LowerNode(cndSelect);
+
+                castOutput =
+                    comp->gtNewSimdHWIntrinsicNode(dstType, cndSelect, NI_Vector128_ToScalar, destFieldType, 16);
+                BlockRange().InsertAfter(cndSelect, castOutput);
+                LowerNode(castOutput);
+            }
+        }
+        else if (varTypeIsSigned(dstType) && comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
+        {
+            CorInfoType destFieldType = (dstType == TYP_INT) ? CORINFO_TYPE_INT : CORINFO_TYPE_LONG;
+
+            ssize_t actualMaxVal = (dstType == TYP_INT) ? INT32_MAX : INT64_MAX;
+
+            // create clones for usage
+            GenTree* castOpClone1 = comp->gtClone(castOp);
+            GenTree* castOpClone2 = comp->gtClone(castOp);
+            BlockRange().InsertAfter(castOp, castOpClone1);
+            BlockRange().InsertAfter(castOpClone1, castOpClone2);
+
+            GenTree* oper = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOp, fieldType, 16);
+            BlockRange().InsertAfter(castOpClone2, oper);
+            LowerNode(oper);
+            GenTree* op1Clone1 = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOpClone1, fieldType, 16);
+            BlockRange().InsertAfter(oper, op1Clone1);
+            LowerNode(op1Clone1);
+            GenTree* op1Clone2 = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOpClone2, fieldType, 16);
+            BlockRange().InsertAfter(op1Clone1, op1Clone2);
+            LowerNode(op1Clone2);
+
+            // check NaN
+            GenTree* mask1 = comp->gtNewSimdCmpOpNode(GT_EQ, TYP_SIMD16, oper, op1Clone1, fieldType, 16);
+            BlockRange().InsertAfter(op1Clone2, mask1);
+            LowerNode(mask1);
+            // inp = inp & mask
+            GenTree* maskNaN = comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, op1Clone2, mask1, fieldType, 16);
+            BlockRange().InsertAfter(mask1, maskNaN);
+            LowerNode(maskNaN);
+
+            // get the max value vector
+            GenTree* maxVal = (srcType == TYP_DOUBLE) ? comp->gtNewDconNodeD(static_cast<double>(actualMaxVal))
+                                                      : comp->gtNewDconNodeF(static_cast<float>(actualMaxVal));
+            GenTree* maxValDup =
+                (dstType == TYP_INT) ? comp->gtNewIconNode(actualMaxVal) : comp->gtNewLconNode(actualMaxVal);
+            maxVal = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, maxVal, fieldType, 16);
+            BlockRange().InsertAfter(maskNaN, maxVal);
+            maxValDup = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, maxValDup, destFieldType, 16);
+            BlockRange().InsertAfter(maxVal, maxValDup);
+
+            // usage 1 --> compare with max value of integer
+            GenTree* compMask = comp->gtNewSimdCmpOpNode(GT_GE, TYP_SIMD16, maskNaN, maxVal, fieldType, 16);
+            BlockRange().InsertAfter(maxValDup, compMask);
+
+            // we will be using the maskNaN value twice
+            LIR::Use maskNaNUse(BlockRange(), &(compMask->AsHWIntrinsic()->Op(1)), compMask);
+            ReplaceWithLclVar(maskNaNUse);
+            maskNaN               = compMask->AsHWIntrinsic()->Op(1);
+            GenTree* maskNaNClone = comp->gtClone(maskNaN);
+            LowerNode(compMask);
+            BlockRange().InsertAfter(maskNaN, maskNaNClone);
+
+            // convert to scalar for conversion
+            GenTree* maskNaNCloneScalar =
+                comp->gtNewSimdHWIntrinsicNode(srcType, maskNaNClone, NI_Vector128_ToScalar, fieldType, 16);
+            BlockRange().InsertAfter(compMask, maskNaNCloneScalar);
+            LowerNode(maskNaNCloneScalar);
+
+            // cast it
+            GenTreeCast* newCast = comp->gtNewCastNode(dstType, maskNaNCloneScalar, false, dstType);
+            BlockRange().InsertAfter(maskNaNCloneScalar, newCast);
+            GenTree* newTree = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, newCast, destFieldType, 16);
+            BlockRange().InsertAfter(newCast, newTree);
+            LowerNode(newTree);
+
+            // usage 2 --> use thecompared mask with input value and max value to blend
+            GenTree* cndSelect = comp->gtNewSimdCndSelNode(TYP_SIMD16, compMask, maxValDup, newTree, destFieldType, 16);
+            BlockRange().InsertAfter(newTree, cndSelect);
+            LowerNode(cndSelect);
+
+            castOutput = comp->gtNewSimdHWIntrinsicNode(dstType, cndSelect, NI_Vector128_ToScalar, destFieldType, 16);
+            BlockRange().InsertAfter(cndSelect, castOutput);
+            LowerNode(castOutput);
+        }
+        else
+        {
+            // The remaining case not handled above should be conversion
+            // to TYP_UINT in case where SSE41 is supported.
+            // We should have converted float -> uint conversion to
+            // float -> double -> uint during morph.
+            assert((dstType == TYP_UINT) && comp->compIsaSupportedDebugOnly(InstructionSet_SSE41) &&
+                   (srcType != TYP_FLOAT));
+
+            ssize_t     actualMaxVal  = UINT32_MAX;
+            CorInfoType destFieldType = CORINFO_TYPE_LONG;
+
+            GenTree* castOpClone1 = comp->gtClone(castOp);
+            GenTree* castOpClone2 = comp->gtClone(castOp);
+            GenTree* castOpClone3 = comp->gtClone(castOp);
+            BlockRange().InsertAfter(castOp, castOpClone1);
+            BlockRange().InsertAfter(castOpClone1, castOpClone2);
+            BlockRange().InsertAfter(castOpClone2, castOpClone3);
+
+            GenTree* oper = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOp, fieldType, 16);
+            BlockRange().InsertAfter(castOpClone3, oper);
+            LowerNode(oper);
+            GenTree* op1Clone1 = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOpClone1, fieldType, 16);
+            BlockRange().InsertAfter(oper, op1Clone1);
+            LowerNode(op1Clone1);
+            GenTree* op1Clone2 = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOpClone2, fieldType, 16);
+            BlockRange().InsertAfter(op1Clone1, op1Clone2);
+            LowerNode(op1Clone2);
+            GenTree* op1Clone3 = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, castOpClone3, fieldType, 16);
+            BlockRange().InsertAfter(op1Clone2, op1Clone3);
+            LowerNode(op1Clone3);
+
+            // get the max/min value vector
+            GenTree* minVal = comp->gtNewDconNodeD(static_cast<double>(0));
+            minVal          = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, minVal, fieldType, 16);
+            BlockRange().InsertAfter(op1Clone3, minVal);
+            GenTree* maxVal = comp->gtNewDconNodeD(static_cast<double>(actualMaxVal));
+            maxVal          = comp->gtNewSimdCreateBroadcastNode(TYP_SIMD16, maxVal, fieldType, 16);
+            BlockRange().InsertAfter(minVal, maxVal);
+
+            // check NaN
+            GenTree* mask1 = comp->gtNewSimdCmpOpNode(GT_EQ, TYP_SIMD16, oper, op1Clone1, fieldType, 16);
+            BlockRange().InsertAfter(maxVal, mask1);
+            LowerNode(mask1);
+
+            // check negative
+            GenTree* mask2 = comp->gtNewSimdCmpOpNode(GT_GE, TYP_SIMD16, op1Clone2, minVal, fieldType, 16);
+            BlockRange().InsertAfter(mask1, mask2);
+            LowerNode(mask2);
+
+            // and mask
+            GenTree* mask12 = comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, mask1, mask2, fieldType, 16);
+            BlockRange().InsertAfter(mask2, mask12);
+            LowerNode(mask12);
+
+            // inp = inp & mask
+            GenTree* saturatedVal = comp->gtNewSimdBinOpNode(GT_AND, TYP_SIMD16, op1Clone3, mask12, fieldType, 16);
+            BlockRange().InsertAfter(mask12, saturatedVal);
+            LowerNode(saturatedVal);
+
+            // compare with max value of uint
+            GenTree* mask3 = comp->gtNewSimdCmpOpNode(GT_GE, TYP_SIMD16, saturatedVal, maxVal, fieldType, 16);
+            BlockRange().InsertAfter(saturatedVal, mask3);
+
+            // Convert both the operands of mask3 to local variables for reusage
+            LIR::Use saturatedValUse(BlockRange(), &(mask3->AsHWIntrinsic()->Op(1)), mask3);
+            ReplaceWithLclVar(saturatedValUse);
+            saturatedVal             = mask3->AsHWIntrinsic()->Op(1);
+            GenTree* saturatedValDup = comp->gtClone(saturatedVal);
+            BlockRange().InsertAfter(saturatedVal, saturatedValDup);
+
+            LIR::Use maxValUse(BlockRange(), &(mask3->AsHWIntrinsic()->Op(2)), mask3);
+            ReplaceWithLclVar(maxValUse);
+            maxVal             = mask3->AsHWIntrinsic()->Op(2);
+            GenTree* maxValDup = comp->gtClone(maxVal);
+            LowerNode(mask3);
+            BlockRange().InsertAfter(maxVal, maxValDup);
+
+            // Select based on mask3
+            GenTree* castOpVal =
+                comp->gtNewSimdCndSelNode(TYP_SIMD16, mask3, maxValDup, saturatedValDup, fieldType, 16);
+            BlockRange().InsertAfter(mask3, castOpVal);
+            LowerNode(castOpVal);
+
+            // scalar
+            GenTree* castOpValScalar =
+                comp->gtNewSimdHWIntrinsicNode(srcType, castOpVal, NI_Vector128_ToScalar, fieldType, 16);
+            BlockRange().InsertAfter(castOpVal, castOpValScalar);
+            LowerNode(castOpValScalar);
+
+            // cast it
+            castOutput = comp->gtNewCastNode(TYP_INT, castOpValScalar, false, dstType);
+            BlockRange().InsertAfter(castOpValScalar, castOutput);
+        }
+        assert(castOutput != nullptr);
+        LIR::Use use;
+        if (BlockRange().TryGetUse(tree, &use))
+        {
+            use.ReplaceWith(castOutput);
+        }
+        else
+        {
+            castOutput->SetUnusedValue();
+        }
+        BlockRange().Remove(tree);
+        return castOutput->gtNext;
     }
+#endif // TARGET_AMD64
 
     // Case of src is a small type and dst is a floating point type.
     if (varTypeIsSmall(srcType) && varTypeIsFloating(castToType))
@@ -880,6 +1194,7 @@ void Lowering::LowerCast(GenTree* tree)
 
     // Now determine if we have operands that should be contained.
     ContainCheckCast(tree->AsCast());
+    return nullptr;
 }
 
 #ifdef FEATURE_HW_INTRINSICS
@@ -1068,29 +1383,24 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
 
     NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
 
-    if (HWIntrinsicInfo::IsEmbRoundingCompatible(intrinsicId))
+    if (node->OperIsEmbRoundingEnabled())
     {
-        size_t numArgs        = node->GetOperandCount();
-        size_t expectedArgNum = HWIntrinsicInfo::EmbRoundingArgPos(intrinsicId);
+        size_t   numArgs = node->GetOperandCount();
+        GenTree* lastOp  = node->Op(numArgs);
+        uint8_t  mode    = 0xFF;
 
-        if (numArgs == expectedArgNum)
+        if (lastOp->IsCnsIntOrI())
         {
-            GenTree* lastOp = node->Op(numArgs);
-            uint8_t  mode   = 0xFF;
-
-            if (lastOp->IsCnsIntOrI())
-            {
-                // Mark the constant as contained since it's specially encoded
-                MakeSrcContained(node, lastOp);
+            // Mark the constant as contained since it's specially encoded
+            MakeSrcContained(node, lastOp);
 
-                mode = static_cast<uint8_t>(lastOp->AsIntCon()->IconValue());
-            }
+            mode = static_cast<uint8_t>(lastOp->AsIntCon()->IconValue());
+        }
 
-            if ((mode & 0x03) != 0x00)
-            {
-                // Embedded rounding only works for register-to-register operations, so skip containment
-                return node->gtNext;
-            }
+        if ((mode & 0x03) != 0x00)
+        {
+            // Embedded rounding only works for register-to-register operations, so skip containment
+            return node->gtNext;
         }
     }
 
@@ -1772,8 +2082,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
                     // currently ANDNOT logic cannot be optimized by the ternary node.
                     break;
                 }
-                GenTree* op3 = second->AsHWIntrinsic()->Op(1) == node ? second->AsHWIntrinsic()->Op(2)
-                                                                      : second->AsHWIntrinsic()->Op(1);
+                GenTree*    op3             = second->AsHWIntrinsic()->Op(1) == node ? second->AsHWIntrinsic()->Op(2)
+                                                                                     : second->AsHWIntrinsic()->Op(1);
                 GenTree*    control         = comp->gtNewIconNode(node->GetTernaryControlByte(second->AsHWIntrinsic()));
                 CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
                 unsigned    simdSize        = node->GetSimdSize();
@@ -6655,12 +6965,12 @@ void Lowering::ContainCheckCallOperands(GenTreeCall* call)
         else
 #endif // TARGET_X86
             if (ctrlExpr->isIndir())
-        {
-            // We may have cases where we have set a register target on the ctrlExpr, but if it
-            // contained we must clear it.
-            ctrlExpr->SetRegNum(REG_NA);
-            MakeSrcContained(call, ctrlExpr);
-        }
+            {
+                // We may have cases where we have set a register target on the ctrlExpr, but if it
+                // contained we must clear it.
+                ctrlExpr->SetRegNum(REG_NA);
+                MakeSrcContained(call, ctrlExpr);
+            }
     }
 }
 
@@ -6692,12 +7002,10 @@ void Lowering::ContainCheckIndir(GenTreeIndir* node)
         // The address of an indirection that requires its address in a reg.
         // Skip any further processing that might otherwise make it contained.
     }
-    else if (addr->OperIs(GT_LCL_ADDR))
+    else if (addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), node->Size()))
     {
         // These nodes go into an addr mode:
         // - GT_LCL_ADDR is a stack addr mode.
-
-        // make this contained, it turns into a constant that goes into an addr mode
         MakeSrcContained(node, addr);
     }
     else if (addr->IsCnsIntOrI())
@@ -7635,7 +7943,8 @@ bool Lowering::LowerRMWMemOp(GenTreeIndir* storeInd)
 
         // If it is a GT_LCL_VAR, it still needs the reg to hold the address.
         // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base.
-        if (indirCandidateChild->OperIs(GT_LCL_ADDR))
+        if (indirCandidateChild->OperIs(GT_LCL_ADDR) &&
+            IsContainableLclAddr(indirCandidateChild->AsLclFld(), storeInd->Size()))
         {
             indirDst->SetContained();
         }
@@ -8766,6 +9075,8 @@ void Lowering::TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode,
 void Lowering::TryCompressConstVecData(GenTreeStoreInd* node)
 {
     assert(node->Data()->IsCnsVec());
+    assert(node->Data()->AsVecCon()->TypeIs(TYP_SIMD32, TYP_SIMD64));
+
     GenTreeVecCon*      vecCon    = node->Data()->AsVecCon();
     GenTreeHWIntrinsic* broadcast = nullptr;
 
@@ -8835,16 +9146,24 @@ void Lowering::TryCompressConstVecData(GenTreeStoreInd* node)
 //  Arguments:
 //     node - The hardware intrinsic node
 //     addr - The address node to try contain
+//     size - Size of the memory access (can be an overestimate)
 //
-void Lowering::ContainCheckHWIntrinsicAddr(GenTreeHWIntrinsic* node, GenTree* addr)
+void Lowering::ContainCheckHWIntrinsicAddr(GenTreeHWIntrinsic* node, GenTree* addr, unsigned size)
 {
-    assert((addr->TypeGet() == TYP_I_IMPL) || (addr->TypeGet() == TYP_BYREF));
-    TryCreateAddrMode(addr, true, node);
-    if ((addr->OperIs(GT_LCL_ADDR, GT_LEA) || (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp))) &&
-        IsInvariantInRange(addr, node))
+    assert((genActualType(addr) == TYP_I_IMPL) || (addr->TypeGet() == TYP_BYREF));
+    if ((addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), size)) ||
+        (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp)))
     {
         MakeSrcContained(node, addr);
     }
+    else
+    {
+        TryCreateAddrMode(addr, true, node);
+        if (addr->OperIs(GT_LEA) && IsInvariantInRange(addr, node))
+        {
+            MakeSrcContained(node, addr);
+        }
+    }
 }
 
 //----------------------------------------------------------------------------------------------
@@ -8919,7 +9238,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
         switch (category)
         {
             case HW_Category_MemoryLoad:
-                ContainCheckHWIntrinsicAddr(node, op1);
+                ContainCheckHWIntrinsicAddr(node, op1, simdSize);
                 break;
 
             case HW_Category_SimpleSIMD:
@@ -8977,7 +9296,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                     {
                         if (node->OperIsMemoryLoad())
                         {
-                            ContainCheckHWIntrinsicAddr(node, op1);
+                            ContainCheckHWIntrinsicAddr(node, op1, /* conservative maximum */ 16);
                             return;
                         }
                         break;
@@ -8990,7 +9309,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                     {
                         if (node->OperIsMemoryLoad())
                         {
-                            ContainCheckHWIntrinsicAddr(node, op1);
+                            ContainCheckHWIntrinsicAddr(node, op1, /* conservative maximum */ 8);
                             return;
                         }
 
@@ -9029,7 +9348,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                     {
                         if (node->OperIsMemoryLoad())
                         {
-                            ContainCheckHWIntrinsicAddr(node, op1);
+                            ContainCheckHWIntrinsicAddr(node, op1, /* conservative maximum */ 16);
                             return;
                         }
 
@@ -9128,16 +9447,16 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                 case HW_Category_MemoryLoad:
                     if ((intrinsicId == NI_AVX_MaskLoad) || (intrinsicId == NI_AVX2_MaskLoad))
                     {
-                        ContainCheckHWIntrinsicAddr(node, op1);
+                        ContainCheckHWIntrinsicAddr(node, op1, simdSize);
                     }
                     else
                     {
-                        ContainCheckHWIntrinsicAddr(node, op2);
+                        ContainCheckHWIntrinsicAddr(node, op2, simdSize);
                     }
                     break;
 
                 case HW_Category_MemoryStore:
-                    ContainCheckHWIntrinsicAddr(node, op1);
+                    ContainCheckHWIntrinsicAddr(node, op1, /* conservative maximum */ simdSize);
                     break;
 
                 case HW_Category_SimpleSIMD:
@@ -9491,10 +9810,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
 
             switch (category)
             {
-                case HW_Category_MemoryStore:
-                    ContainCheckHWIntrinsicAddr(node, op1);
-                    break;
-
                 case HW_Category_SimpleSIMD:
                 case HW_Category_SIMDScalar:
                 case HW_Category_Scalar:
@@ -10027,8 +10342,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
 
                                 if (op1->IsVectorZero())
                                 {
-// When op1 is zero, we can contain it and we expect that
-// ival is already in the correct state to account for it
+                                    // When op1 is zero, we can contain it and we expect that
+                                    // ival is already in the correct state to account for it
 
 #if DEBUG
                                     ssize_t ival = lastOp->AsIntConCommon()->IconValue();
@@ -10048,8 +10363,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                                 }
                                 else if (op2->IsVectorZero())
                                 {
-// When op2 is zero, we can contain it and we expect that
-// zmask is already in the correct state to account for it
+                                    // When op2 is zero, we can contain it and we expect that
+                                    // zmask is already in the correct state to account for it
 
 #if DEBUG
                                     ssize_t ival = lastOp->AsIntConCommon()->IconValue();
diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp
index 04ca6149c9fc..831f6c7ac194 100644
--- a/src/coreclr/jit/lsra.cpp
+++ b/src/coreclr/jit/lsra.cpp
@@ -384,9 +384,9 @@ void LinearScan::updateSpillCost(regNumber reg, Interval* interval)
 //    interval - Interval of Refposition.
 //    assignedReg - Assigned register for this refposition.
 //
-void LinearScan::updateRegsFreeBusyState(RefPosition& refPosition,
-                                         regMaskTP    regsBusy,
-                                         regMaskTP*   regsToFree,
+void LinearScan::updateRegsFreeBusyState(RefPosition&               refPosition,
+                                         regMaskTP                  regsBusy,
+                                         regMaskTP*                 regsToFree,
                                          regMaskTP* delayRegsToFree DEBUG_ARG(Interval* interval)
                                              DEBUG_ARG(regNumber assignedReg))
 {
@@ -788,7 +788,7 @@ LinearScan::LinearScan(Compiler* theCompiler)
 
     availableFloatRegs  = RBM_ALLFLOAT;
     availableDoubleRegs = RBM_ALLDOUBLE;
-#if defined(TARGET_XARCH)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
     availableMaskRegs = RBM_ALLMASK;
 #endif
 
@@ -815,7 +815,6 @@ LinearScan::LinearScan(Compiler* theCompiler)
 #endif
 
     // Initialize the availableRegs to use for each TYP_*
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf)                                   \
     availableRegs[static_cast<int>(TYP_##tn)] = &regFld;
@@ -1437,7 +1436,7 @@ PhaseStatus LinearScan::doLinearScan()
 #ifdef DEBUG
         || VERBOSE
 #endif
-        )
+    )
     {
         dumpLsraStats(jitstdout());
     }
@@ -1681,6 +1680,21 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
         return false;
     }
 
+    // Avoid allocating parameters that are passed in float regs into integer
+    // registers. We currently home float registers before integer registers,
+    // so that kind of enregistration can trash integer registers containing
+    // other parameters.
+    // We assume that these cases will be homed to float registers if they are
+    // promoted.
+    // TODO-CQ: Combine integer and float register homing to handle these kinds
+    // of conflicts.
+    if ((varDsc->TypeGet() == TYP_STRUCT) && varDsc->lvIsRegArg && !varDsc->lvPromoted &&
+        varTypeUsesIntReg(varDsc->GetRegisterType()) && genIsValidFloatReg(varDsc->GetArgReg()))
+    {
+        compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::IsStructArg));
+        return false;
+    }
+
     //  Are we not optimizing and we have exception handlers?
     //   if so mark all args and locals as volatile, so that they
     //   won't ever get enregistered.
@@ -1756,7 +1770,7 @@ template void LinearScan::identifyCandidates<false>();
 // TODO-Cleanup: This was cloned from Compiler::lvaSortByRefCount() in lclvars.cpp in order
 // to avoid perturbation, but should be merged.
 template <bool localVarsEnregistered>
-void           LinearScan::identifyCandidates()
+void LinearScan::identifyCandidates()
 {
     if (localVarsEnregistered)
     {
@@ -1988,7 +2002,6 @@ void           LinearScan::identifyCandidates()
             // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
             // and those that meet the second (see the definitions of thresholdFPRefCntWtd and maybeFPRefCntWtd
             // above).
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
             // Additionally, when we are generating code for a target with partial SIMD callee-save
@@ -2007,24 +2020,24 @@ void           LinearScan::identifyCandidates()
             else
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
                 if (regType(type) == FloatRegisterType)
-            {
-                floatVarCount++;
-                weight_t refCntWtd = varDsc->lvRefCntWtd();
-                if (varDsc->lvIsRegArg)
-                {
-                    // Don't count the initial reference for register params.  In those cases,
-                    // using a callee-save causes an extra copy.
-                    refCntWtd -= BB_UNITY_WEIGHT;
-                }
-                if (refCntWtd >= thresholdFPRefCntWtd)
                 {
-                    VarSetOps::AddElemD(compiler, fpCalleeSaveCandidateVars, varDsc->lvVarIndex);
-                }
-                else if (refCntWtd >= maybeFPRefCntWtd)
-                {
-                    VarSetOps::AddElemD(compiler, fpMaybeCandidateVars, varDsc->lvVarIndex);
+                    floatVarCount++;
+                    weight_t refCntWtd = varDsc->lvRefCntWtd();
+                    if (varDsc->lvIsRegArg)
+                    {
+                        // Don't count the initial reference for register params.  In those cases,
+                        // using a callee-save causes an extra copy.
+                        refCntWtd -= BB_UNITY_WEIGHT;
+                    }
+                    if (refCntWtd >= thresholdFPRefCntWtd)
+                    {
+                        VarSetOps::AddElemD(compiler, fpCalleeSaveCandidateVars, varDsc->lvVarIndex);
+                    }
+                    else if (refCntWtd >= maybeFPRefCntWtd)
+                    {
+                        VarSetOps::AddElemD(compiler, fpMaybeCandidateVars, varDsc->lvVarIndex);
+                    }
                 }
-            }
             JITDUMP("  ");
             DBEXEC(VERBOSE, newInt->dump(compiler));
         }
@@ -2065,7 +2078,6 @@ void           LinearScan::identifyCandidates()
     // registers current include the number of fp vars, whether there are loops, and whether there are
     // multiple exits.  These have been selected somewhat empirically, but there is probably room for
     // more tuning.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (VERBOSE)
@@ -2483,7 +2495,7 @@ void LinearScan::checkLastUses(BasicBlock* block)
 //                                    the register locations will be "rotated" to stress the resolution and allocation
 //                                    code.
 //
-BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock* block,
+BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock*           block,
                                                BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated))
 {
     BasicBlock* predBlock = nullptr;
@@ -2680,33 +2692,33 @@ void LinearScan::setFrameType()
     else
 #endif // DOUBLE_ALIGN
         if (compiler->codeGen->isFramePointerRequired())
-    {
-        frameType = FT_EBP_FRAME;
-    }
-    else
-    {
-        if (compiler->rpMustCreateEBPCalled == false)
-        {
-#ifdef DEBUG
-            const char* reason;
-#endif // DEBUG
-            compiler->rpMustCreateEBPCalled = true;
-            if (compiler->rpMustCreateEBPFrame(INDEBUG(&reason)))
-            {
-                JITDUMP("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
-                compiler->codeGen->setFrameRequired(true);
-            }
-        }
-
-        if (compiler->codeGen->isFrameRequired())
         {
             frameType = FT_EBP_FRAME;
         }
         else
         {
-            frameType = FT_ESP_FRAME;
+            if (compiler->rpMustCreateEBPCalled == false)
+            {
+#ifdef DEBUG
+                const char* reason;
+#endif // DEBUG
+                compiler->rpMustCreateEBPCalled = true;
+                if (compiler->rpMustCreateEBPFrame(INDEBUG(&reason)))
+                {
+                    JITDUMP("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
+                    compiler->codeGen->setFrameRequired(true);
+                }
+            }
+
+            if (compiler->codeGen->isFrameRequired())
+            {
+                frameType = FT_EBP_FRAME;
+            }
+            else
+            {
+                frameType = FT_ESP_FRAME;
+            }
         }
-    }
 
     switch (frameType)
     {
@@ -2926,7 +2938,7 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo
 // for enregistration. It simply finds the register to be assigned, if it was assigned to something
 // else, then will unassign it and then assign to the currentInterval
 //
-regNumber LinearScan::allocateRegMinimal(Interval*    currentInterval,
+regNumber LinearScan::allocateRegMinimal(Interval*                currentInterval,
                                          RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore))
 {
     assert(!enregisterLocalVars);
@@ -2989,7 +3001,7 @@ regNumber LinearScan::allocateRegMinimal(Interval*    currentInterval,
 //        no such ref position, no register will be allocated.
 //
 template <bool needsConsecutiveRegisters>
-regNumber LinearScan::allocateReg(Interval*    currentInterval,
+regNumber LinearScan::allocateReg(Interval*                currentInterval,
                                   RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore))
 {
     regMaskTP foundRegBit =
@@ -3008,7 +3020,6 @@ regNumber LinearScan::allocateReg(Interval*    currentInterval,
         if (regSelector->isSpilling())
         {
             // We're spilling.
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef TARGET_ARM
             if (currentInterval->registerType == TYP_DOUBLE)
@@ -3984,11 +3995,8 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition)
         bool needsKill = varTypeIsGC(assignedInterval->registerType);
         if (!needsKill)
         {
-            // The importer will assign a GC type to the rhs of an assignment if the lhs type is a GC type,
-            // even if the rhs is not. See the CEE_STLOC* case in impImportBlockCode(). As a result,
-            // we can have a 'GT_LCL_VAR' node with a GC type, when the lclVar itself is an integer type.
+            // We can have a 'GT_LCL_VAR' node with a GC type, when the lclVar itself is an integer type.
             // The emitter will mark this register as holding a GC type. Therefore, we must spill this value.
-            // This was exposed on Arm32 with EH write-thru.
             if ((assignedInterval->recentRefPosition != nullptr) &&
                 (assignedInterval->recentRefPosition->treeNode != nullptr))
             {
@@ -5109,6 +5117,13 @@ void LinearScan::allocateRegistersMinimal()
                 }
                 regsInUseThisLocation |= currentRefPosition.registerAssignment;
                 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg()));
+
+#ifdef SWIFT_SUPPORT
+                if (currentRefPosition.delayRegFree)
+                {
+                    regsInUseNextLocation |= currentRefPosition.registerAssignment;
+                }
+#endif // SWIFT_SUPPORT
             }
             else
             {
@@ -5433,7 +5448,6 @@ void LinearScan::allocateRegistersMinimal()
     }
 
     // Free registers to clear associated intervals for resolution phase
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (getLsraExtendLifeTimes())
@@ -5818,6 +5832,13 @@ void LinearScan::allocateRegisters()
                 }
                 regsInUseThisLocation |= currentRefPosition.registerAssignment;
                 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg()));
+
+#ifdef SWIFT_SUPPORT
+                if (currentRefPosition.delayRegFree)
+                {
+                    regsInUseNextLocation |= currentRefPosition.registerAssignment;
+                }
+#endif // SWIFT_SUPPORT
             }
             else
             {
@@ -5922,9 +5943,62 @@ void LinearScan::allocateRegisters()
             assert(lclVarInterval->isLocalVar);
             if (refType == RefTypeUpperVectorSave)
             {
-                if ((lclVarInterval->physReg == REG_NA) ||
+                assert(currentInterval->recentRefPosition == &currentRefPosition);
+
+                // For a given RefTypeUpperVectorSave, there should be a matching RefTypeUpperVectorRestore
+                // If not, probably, this was an extra one we added conservatively and should not need a register.
+                RefPosition* nextRefPosition        = currentRefPosition.nextRefPosition;
+                bool         isExtraUpperVectorSave = currentRefPosition.IsExtraUpperVectorSave();
+
+                if ((lclVarInterval->physReg == REG_NA) || isExtraUpperVectorSave ||
                     (lclVarInterval->isPartiallySpilled && (currentInterval->physReg == REG_STK)))
                 {
+                    if (!currentRefPosition.liveVarUpperSave)
+                    {
+                        if (isExtraUpperVectorSave)
+                        {
+                            // If this was just an extra upperVectorSave that do not have corresponding
+                            // upperVectorRestore, we do not need to mark this as isPartiallySpilled
+                            // or need to insert the save/restore.
+                            currentRefPosition.skipSaveRestore = true;
+                        }
+
+                        if (assignedRegister != REG_NA)
+                        {
+                            // If we ever assigned register to this interval, it was because in the past
+                            // there were valid save/restore RefPositions associated. For non-live vars,
+                            // we want to reduce the affect of their presence and hence, we will
+                            // unassign register from this interval without spilling and free it.
+
+                            // We do not take similar action on upperVectorRestore below because here, we
+                            // have already removed the register association with the interval.
+                            // The "allocate = false" route, will do a no-op.
+                            if (currentInterval->isActive)
+                            {
+                                RegRecord* physRegRecord = getRegisterRecord(assignedRegister);
+                                unassignPhysRegNoSpill(physRegRecord);
+                            }
+                            else
+                            {
+                                updateNextIntervalRef(assignedRegister, currentInterval);
+                                updateSpillCost(assignedRegister, currentInterval);
+                            }
+
+                            regsToFree |= getRegMask(assignedRegister, currentInterval->registerType);
+                        }
+                        INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, nullptr, assignedRegister));
+                        currentRefPosition.registerAssignment = RBM_NONE;
+                        lastAllocatedRefPosition              = &currentRefPosition;
+
+                        continue;
+                    }
+                    else
+                    {
+                        // We should never have an extra upperVectorSave for non-live var because there will
+                        // always be a valid use for which we will add the restore.
+                        assert(!isExtraUpperVectorSave);
+                    }
+
                     allocate = false;
                 }
 #if defined(TARGET_XARCH)
@@ -6674,7 +6748,6 @@ void LinearScan::allocateRegisters()
 #endif // JIT32_GCENCODER
 
     // Free registers to clear associated intervals for resolution phase
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (getLsraExtendLifeTimes())
@@ -7747,7 +7820,7 @@ void LinearScan::updateMaxSpill(RefPosition* refPosition)
 // the tree, and performs resolution across joins and back edges.
 //
 template <bool localVarsEnregistered>
-void           LinearScan::resolveRegisters()
+void LinearScan::resolveRegisters()
 {
     // Iterate over the tree and the RefPositions in lockstep
     //  - annotate the tree with register assignments by setting GetRegNum() or gtRegPair (for longs)
@@ -7969,7 +8042,15 @@ void           LinearScan::resolveRegisters()
                             insertUpperVectorSave(treeNode, currentRefPosition, currentRefPosition->getInterval(),
                                                   block);
                         }
-                        localVarInterval->isPartiallySpilled = true;
+
+                        if (!currentRefPosition->IsExtraUpperVectorSave())
+                        {
+                            localVarInterval->isPartiallySpilled = true;
+                        }
+                        else
+                        {
+                            assert(!currentRefPosition->liveVarUpperSave);
+                        }
                     }
                 }
                 else
@@ -8212,8 +8293,8 @@ void           LinearScan::resolveRegisters()
                 {
                     regMaskTP initialRegMask = interval->firstRefPosition->registerAssignment;
                     regNumber initialReg     = (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter)
-                                               ? REG_STK
-                                               : genRegNumFromMask(initialRegMask);
+                                                   ? REG_STK
+                                                   : genRegNumFromMask(initialRegMask);
 
 #ifdef TARGET_ARM
                     if (varTypeIsMultiReg(varDsc))
@@ -8660,12 +8741,12 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock*      fromBlock,
 // Notes:
 //    It inserts at least one move and updates incoming parameter 'location'.
 //
-void LinearScan::addResolutionForDouble(BasicBlock*     block,
-                                        GenTree*        insertionPoint,
-                                        Interval**      sourceIntervals,
-                                        regNumberSmall* location,
-                                        regNumber       toReg,
-                                        regNumber       fromReg,
+void LinearScan::addResolutionForDouble(BasicBlock*             block,
+                                        GenTree*                insertionPoint,
+                                        Interval**              sourceIntervals,
+                                        regNumberSmall*         location,
+                                        regNumber               toReg,
+                                        regNumber               fromReg,
                                         ResolveType resolveType DEBUG_ARG(BasicBlock* fromBlock)
                                             DEBUG_ARG(BasicBlock* toBlock))
 {
@@ -8735,10 +8816,10 @@ void LinearScan::addResolutionForDouble(BasicBlock*     block,
 //    The next time, we want to move from the stack to the destination (toReg),
 //    in which case fromReg will be REG_STK, and we insert at the top.
 //
-void LinearScan::addResolution(BasicBlock* block,
-                               GenTree*    insertionPoint,
-                               Interval*   interval,
-                               regNumber   toReg,
+void LinearScan::addResolution(BasicBlock*       block,
+                               GenTree*          insertionPoint,
+                               Interval*         interval,
+                               regNumber         toReg,
                                regNumber fromReg DEBUG_ARG(BasicBlock* fromBlock) DEBUG_ARG(BasicBlock* toBlock)
                                    DEBUG_ARG(const char* reason))
 {
@@ -9862,7 +9943,7 @@ const char* LinearScan::getStatName(unsigned stat)
 #define LSRA_STAT_DEF(stat, name) name,
 #include "lsra_stats.h"
 #undef LSRA_STAT_DEF
-#define REG_SEL_DEF(stat, value, shortname, orderSeqId) #stat,
+#define REG_SEL_DEF(stat, value, shortname, orderSeqId)      #stat,
 #define BUSY_REG_SEL_DEF(stat, value, shortname, orderSeqId) REG_SEL_DEF(stat, value, shortname, orderSeqId)
 #include "lsra_score.h"
     };
@@ -10096,7 +10177,7 @@ void LinearScan::dumpLsraStatsCsv(FILE* file)
     {
         fprintf(file, ",%u", sumStats[statIndex]);
     }
-    fprintf(file, ",%.2f\n", compiler->info.compPerfScore);
+    fprintf(file, ",%.2f\n", compiler->Metrics.PerfScore);
 }
 
 // -----------------------------------------------------------
@@ -11182,9 +11263,8 @@ void LinearScan::dumpRegRecordHeader()
     //    l is either '*' (if a last use) or ' ' (otherwise)
     //    d is either 'D' (if a delayed use) or ' ' (otherwise)
 
-    maxNodeLocation = (maxNodeLocation == 0)
-                          ? 1
-                          : maxNodeLocation; // corner case of a method with an infinite loop without any GenTree nodes
+    maxNodeLocation = (maxNodeLocation == 0) ? 1 : maxNodeLocation; // corner case of a method with an infinite loop
+                                                                    // without any GenTree nodes
     assert(maxNodeLocation >= 1);
     assert(refPositions.size() >= 1);
     int treeIdWidth               = 9; /* '[XXXXX] '*/
@@ -11745,8 +11825,7 @@ void LinearScan::verifyFinalAllocation()
             }
         }
 
-        LsraLocation newLocation = currentRefPosition.nodeLocation;
-        currentLocation          = newLocation;
+        currentLocation = currentRefPosition.nodeLocation;
 
         switch (currentRefPosition.refType)
         {
@@ -12088,7 +12167,8 @@ void LinearScan::verifyFinalAllocation()
                             (currentRefPosition.refType == RefTypeUpperVectorRestore))
                         {
                             Interval* lclVarInterval = interval->relatedInterval;
-                            assert((lclVarInterval->physReg == REG_NA) || lclVarInterval->isPartiallySpilled);
+                            assert((lclVarInterval->physReg == REG_NA) || lclVarInterval->isPartiallySpilled ||
+                                   currentRefPosition.IsExtraUpperVectorSave());
                         }
                     }
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
@@ -12314,7 +12394,7 @@ LinearScan::RegisterSelection::RegisterSelection(LinearScan* linearScan)
 #ifdef TARGET_ARM64
             && !linearScan->compiler->info.compNeedsConsecutiveRegisters
 #endif
-            )
+        )
         {
             ordering = W("MQQQQQQQQQQQQQQQQ");
         }
@@ -13031,7 +13111,7 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT()
 
             && !refPosition->needsConsecutive
 #endif
-            )
+        )
         {
             assert(!"Spill candidate has no assignedInterval recentRefPosition");
         }
@@ -13163,7 +13243,7 @@ void LinearScan::RegisterSelection::calculateCoversSets()
 //      Register bit selected (a single register) and REG_NA if no register was selected.
 //
 template <bool needsConsecutiveRegisters>
-regMaskTP LinearScan::RegisterSelection::select(Interval*    currentInterval,
+regMaskTP LinearScan::RegisterSelection::select(Interval*                currentInterval,
                                                 RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore))
 {
 #ifdef DEBUG
@@ -13628,7 +13708,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval*    currentInterval,
 //  select the REG_ORDER heuristics (if there are any free candidates) or REG_NUM (if all registers
 //  are busy).
 //
-regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval*    currentInterval,
+regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval*                currentInterval,
                                                        RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore))
 {
     assert(!linearScan->enregisterLocalVars);
diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h
index dd7a049960ca..778341361929 100644
--- a/src/coreclr/jit/lsra.h
+++ b/src/coreclr/jit/lsra.h
@@ -30,13 +30,13 @@ const unsigned int MaxInternalRegisters = 8;
 const unsigned int RegisterTypeCount    = 2;
 
 /*****************************************************************************
-* Register types
-*****************************************************************************/
+ * Register types
+ *****************************************************************************/
 typedef var_types RegisterType;
 
-#define IntRegisterType TYP_INT
+#define IntRegisterType   TYP_INT
 #define FloatRegisterType TYP_FLOAT
-#define MaskRegisterType TYP_MASK
+#define MaskRegisterType  TYP_MASK
 
 //------------------------------------------------------------------------
 // regType: Return the RegisterType to use for a given type
@@ -51,12 +51,12 @@ RegisterType regType(T type)
     {
         return IntRegisterType;
     }
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#if (defined(TARGET_XARCH) || defined(TARGET_ARM64)) && defined(FEATURE_SIMD)
     else if (varTypeUsesMaskReg(type))
     {
         return MaskRegisterType;
     }
-#endif // TARGET_XARCH && FEATURE_SIMD
+#endif // (TARGET_XARCH || TARGET_ARM64) && FEATURE_SIMD
     else
     {
         assert(varTypeUsesFloatReg(type));
@@ -83,7 +83,9 @@ struct RefInfo
     RefPosition* ref;
     GenTree*     treeNode;
 
-    RefInfo(RefPosition* r, GenTree* t) : ref(r), treeNode(t)
+    RefInfo(RefPosition* r, GenTree* t)
+        : ref(r)
+        , treeNode(t)
     {
     }
 
@@ -107,7 +109,8 @@ class RefInfoListNode final : public RefInfo
     RefInfoListNode* m_next; // The next node in the list
 
 public:
-    RefInfoListNode(RefPosition* r, GenTree* t) : RefInfo(r, t)
+    RefInfoListNode(RefPosition* r, GenTree* t)
+        : RefInfo(r, t)
     {
     }
 
@@ -134,11 +137,15 @@ class RefInfoList final
     RefInfoListNode* m_tail; // The tail of the list
 
 public:
-    RefInfoList() : m_head(nullptr), m_tail(nullptr)
+    RefInfoList()
+        : m_head(nullptr)
+        , m_tail(nullptr)
     {
     }
 
-    RefInfoList(RefInfoListNode* node) : m_head(node), m_tail(node)
+    RefInfoList(RefInfoListNode* node)
+        : m_head(node)
+        , m_tail(node)
     {
         assert(m_head->m_next == nullptr);
     }
@@ -365,7 +372,7 @@ class RefInfoListNodePool final
 public:
     RefInfoListNodePool(Compiler* compiler, unsigned preallocate = defaultPreallocation);
     RefInfoListNode* GetNode(RefPosition* r, GenTree* t);
-    void ReturnNode(RefInfoListNode* listNode);
+    void             ReturnNode(RefInfoListNode* listNode);
 };
 
 #if TRACK_LSRA_STATS
@@ -374,7 +381,7 @@ enum LsraStat
 #define LSRA_STAT_DEF(enum_name, enum_str) enum_name,
 #include "lsra_stats.h"
 #undef LSRA_STAT_DEF
-#define REG_SEL_DEF(enum_name, value, short_str, orderSeqId) STAT_##enum_name,
+#define REG_SEL_DEF(enum_name, value, short_str, orderSeqId)      STAT_##enum_name,
 #define BUSY_REG_SEL_DEF(enum_name, value, short_str, orderSeqId) REG_SEL_DEF(enum_name, value, short_str, orderSeqId)
 #include "lsra_score.h"
     COUNT
@@ -387,11 +394,11 @@ struct LsraBlockInfo
     // 0 for fgFirstBB.
     unsigned int predBBNum;
     weight_t     weight;
-    bool         hasCriticalInEdge : 1;
+    bool         hasCriticalInEdge  : 1;
     bool         hasCriticalOutEdge : 1;
-    bool         hasEHBoundaryIn : 1;
-    bool         hasEHBoundaryOut : 1;
-    bool         hasEHPred : 1;
+    bool         hasEHBoundaryIn    : 1;
+    bool         hasEHBoundaryOut   : 1;
+    bool         hasEHPred          : 1;
 
 #if TRACK_LSRA_STATS
     // Per block maintained LSRA statistics.
@@ -401,7 +408,7 @@ struct LsraBlockInfo
 
 enum RegisterScore
 {
-#define REG_SEL_DEF(enum_name, value, short_str, orderSeqId) enum_name = value,
+#define REG_SEL_DEF(enum_name, value, short_str, orderSeqId)      enum_name = value,
 #define BUSY_REG_SEL_DEF(enum_name, value, short_str, orderSeqId) REG_SEL_DEF(enum_name, value, short_str, orderSeqId)
 #include "lsra_score.h"
     NONE = 0
@@ -635,7 +642,7 @@ class LinearScan : public LinearScanInterface
 
     // This does the dataflow analysis and builds the intervals
     template <bool localVarsEnregistered>
-    void           buildIntervals();
+    void buildIntervals();
 
     // This is where the actual assignment is done for scenarios where
     // no local var enregistration is done.
@@ -648,7 +655,7 @@ class LinearScan : public LinearScanInterface
     void allocateRegisters();
     // This is the resolution phase, where cross-block mismatches are fixed up
     template <bool localVarsEnregistered>
-    void           resolveRegisters();
+    void resolveRegisters();
 
     void writeRegisters(RefPosition* currentRefPosition, GenTree* tree);
 
@@ -658,7 +665,7 @@ class LinearScan : public LinearScanInterface
     void insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned multiRegIdx, RefPosition* refPosition);
 
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-    void makeUpperVectorInterval(unsigned varIndex);
+    void      makeUpperVectorInterval(unsigned varIndex);
     Interval* getUpperVectorInterval(unsigned varIndex);
 
     // Save the upper half of a vector that lives in a callee-save register at the point of a call.
@@ -693,20 +700,20 @@ class LinearScan : public LinearScanInterface
     };
 
 #ifdef TARGET_ARM
-    void addResolutionForDouble(BasicBlock*     block,
-                                GenTree*        insertionPoint,
-                                Interval**      sourceIntervals,
-                                regNumberSmall* location,
-                                regNumber       toReg,
-                                regNumber       fromReg,
+    void addResolutionForDouble(BasicBlock*             block,
+                                GenTree*                insertionPoint,
+                                Interval**              sourceIntervals,
+                                regNumberSmall*         location,
+                                regNumber               toReg,
+                                regNumber               fromReg,
                                 ResolveType resolveType DEBUG_ARG(BasicBlock* fromBlock)
                                     DEBUG_ARG(BasicBlock* toBlock));
 #endif
 
-    void addResolution(BasicBlock* block,
-                       GenTree*    insertionPoint,
-                       Interval*   interval,
-                       regNumber   outReg,
+    void addResolution(BasicBlock*     block,
+                       GenTree*        insertionPoint,
+                       Interval*       interval,
+                       regNumber       outReg,
                        regNumber inReg DEBUG_ARG(BasicBlock* fromBlock) DEBUG_ARG(BasicBlock* toBlock)
                            DEBUG_ARG(const char* reason));
 
@@ -766,7 +773,6 @@ class LinearScan : public LinearScanInterface
     // At least for x86 and AMD64, and potentially other architecture that will support SIMD,
     // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
     // Hence the "SmallFPSet" has 5 elements.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_AMD64)
 #ifdef UNIX_AMD64_ABI
@@ -816,8 +822,14 @@ class LinearScan : public LinearScanInterface
 
     // This controls the heuristics used to select registers
     // These can be combined.
-    enum LsraSelect{LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04,
-                    LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c};
+    enum LsraSelect
+    {
+        LSRA_SELECT_DEFAULT               = 0,
+        LSRA_SELECT_REVERSE_HEURISTICS    = 0x04,
+        LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08,
+        LSRA_SELECT_NEAREST               = 0x10,
+        LSRA_SELECT_MASK                  = 0x1c
+    };
     LsraSelect getSelectionHeuristics()
     {
         return (LsraSelect)(lsraStressMask & LSRA_SELECT_MASK);
@@ -836,9 +848,14 @@ class LinearScan : public LinearScanInterface
     }
 
     // This controls the order in which basic blocks are visited during allocation
-    enum LsraTraversalOrder{LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40,
-                            LSRA_TRAVERSE_RANDOM  = 0x60, // NYI
-                            LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60};
+    enum LsraTraversalOrder
+    {
+        LSRA_TRAVERSE_LAYOUT     = 0x20,
+        LSRA_TRAVERSE_PRED_FIRST = 0x40,
+        LSRA_TRAVERSE_RANDOM     = 0x60, // NYI
+        LSRA_TRAVERSE_DEFAULT    = LSRA_TRAVERSE_PRED_FIRST,
+        LSRA_TRAVERSE_MASK       = 0x60
+    };
     LsraTraversalOrder getLsraTraversalOrder()
     {
         if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0)
@@ -858,7 +875,12 @@ class LinearScan : public LinearScanInterface
 
     // This controls whether lifetimes should be extended to the entire method.
     // Note that this has no effect under MinOpts
-    enum LsraExtendLifetimes{LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80};
+    enum LsraExtendLifetimes
+    {
+        LSRA_DONT_EXTEND           = 0,
+        LSRA_EXTEND_LIFETIMES      = 0x80,
+        LSRA_EXTEND_LIFETIMES_MASK = 0x80
+    };
     LsraExtendLifetimes getLsraExtendLifeTimes()
     {
         return (LsraExtendLifetimes)(lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK);
@@ -871,8 +893,13 @@ class LinearScan : public LinearScanInterface
     // This controls whether variables locations should be set to the previous block in layout order
     // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED -
     // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
-    enum LsraBlockBoundaryLocations{LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100,
-                                    LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300};
+    enum LsraBlockBoundaryLocations
+    {
+        LSRA_BLOCK_BOUNDARY_PRED   = 0,
+        LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100,
+        LSRA_BLOCK_BOUNDARY_ROTATE = 0x200,
+        LSRA_BLOCK_BOUNDARY_MASK   = 0x300
+    };
     LsraBlockBoundaryLocations getLsraBlockBoundaryLocations()
     {
         return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK);
@@ -881,7 +908,12 @@ class LinearScan : public LinearScanInterface
 
     // This controls whether we always insert a GT_RELOAD instruction after a spill
     // Note that this can be combined with LSRA_SPILL_ALWAYS (or not)
-    enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400};
+    enum LsraReload
+    {
+        LSRA_NO_RELOAD_IF_SAME    = 0,
+        LSRA_ALWAYS_INSERT_RELOAD = 0x400,
+        LSRA_RELOAD_MASK          = 0x400
+    };
     LsraReload getLsraReload()
     {
         return (LsraReload)(lsraStressMask & LSRA_RELOAD_MASK);
@@ -892,7 +924,12 @@ class LinearScan : public LinearScanInterface
     }
 
     // This controls whether we spill everywhere
-    enum LsraSpill{LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800};
+    enum LsraSpill
+    {
+        LSRA_DONT_SPILL_ALWAYS = 0,
+        LSRA_SPILL_ALWAYS      = 0x800,
+        LSRA_SPILL_MASK        = 0x800
+    };
     LsraSpill getLsraSpill()
     {
         return (LsraSpill)(lsraStressMask & LSRA_SPILL_MASK);
@@ -904,8 +941,12 @@ class LinearScan : public LinearScanInterface
 
     // This controls whether RefPositions that lower/codegen indicated as reg optional be
     // allocated a reg at all.
-    enum LsraRegOptionalControl{LSRA_REG_OPTIONAL_DEFAULT = 0, LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000,
-                                LSRA_REG_OPTIONAL_MASK = 0x1000};
+    enum LsraRegOptionalControl
+    {
+        LSRA_REG_OPTIONAL_DEFAULT  = 0,
+        LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000,
+        LSRA_REG_OPTIONAL_MASK     = 0x1000
+    };
 
     LsraRegOptionalControl getLsraRegOptionalControl()
     {
@@ -988,7 +1029,7 @@ class LinearScan : public LinearScanInterface
 private:
     // Determine which locals are candidates for allocation
     template <bool localVarsEnregistered>
-    void           identifyCandidates();
+    void identifyCandidates();
 
     // determine which locals are used in EH constructs we don't want to deal with
     void identifyCandidatesExceptionDataflow();
@@ -997,8 +1038,8 @@ class LinearScan : public LinearScanInterface
 
 #ifdef DEBUG
     void checkLastUses(BasicBlock* block);
-    int ComputeOperandDstCount(GenTree* operand);
-    int ComputeAvailableSrcCount(GenTree* node);
+    int  ComputeOperandDstCount(GenTree* operand);
+    int  ComputeAvailableSrcCount(GenTree* node);
 #endif // DEBUG
 
     void setFrameType();
@@ -1014,20 +1055,20 @@ class LinearScan : public LinearScanInterface
     void resetAllRegistersState();
 
 #ifdef TARGET_ARM
-    bool isSecondHalfReg(RegRecord* regRec, Interval* interval);
+    bool       isSecondHalfReg(RegRecord* regRec, Interval* interval);
     RegRecord* getSecondHalfRegRec(RegRecord* regRec);
     RegRecord* findAnotherHalfRegRec(RegRecord* regRec);
-    regNumber findAnotherHalfRegNum(regNumber regNum);
-    bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation);
-    void unassignDoublePhysReg(RegRecord* doubleRegRecord);
+    regNumber  findAnotherHalfRegNum(regNumber regNum);
+    bool       canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation);
+    void       unassignDoublePhysReg(RegRecord* doubleRegRecord);
 #endif
-    void clearAssignedInterval(RegRecord* reg ARM_ARG(RegisterType regType));
-    void updateAssignedInterval(RegRecord* reg, Interval* interval ARM_ARG(RegisterType regType));
-    void updatePreviousInterval(RegRecord* reg, Interval* interval ARM_ARG(RegisterType regType));
-    bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval);
-    bool isAssignedToInterval(Interval* interval, RegRecord* regRec);
-    bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation);
-    bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation);
+    void     clearAssignedInterval(RegRecord* reg ARM_ARG(RegisterType regType));
+    void     updateAssignedInterval(RegRecord* reg, Interval* interval ARM_ARG(RegisterType regType));
+    void     updatePreviousInterval(RegRecord* reg, Interval* interval ARM_ARG(RegisterType regType));
+    bool     canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval);
+    bool     isAssignedToInterval(Interval* interval, RegRecord* regRec);
+    bool     isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation);
+    bool     canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation);
     weight_t getSpillWeight(RegRecord* physRegRecord);
 
     // insert refpositions representing prolog zero-inits which will be added later
@@ -1214,13 +1255,13 @@ class LinearScan : public LinearScanInterface
 
     void spillGCRefs(RefPosition* killRefPosition);
 
-/*****************************************************************************
-* Register selection
-****************************************************************************/
+    /*****************************************************************************
+     * Register selection
+     ****************************************************************************/
 
 #if defined(TARGET_ARM64)
-    bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned);
-    void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned);
+    bool      canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned);
+    void      assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned);
     regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition, regMaskTP* busyCandidates);
     regMaskTP filterConsecutiveCandidates(regMaskTP    candidates,
                                           unsigned int registersNeeded,
@@ -1258,10 +1299,10 @@ class LinearScan : public LinearScanInterface
 
         // Perform register selection and update currentInterval or refPosition
         template <bool hasConsecutiveRegister = false>
-        FORCEINLINE regMaskTP select(Interval*    currentInterval,
+        FORCEINLINE regMaskTP select(Interval*                currentInterval,
                                      RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore));
 
-        FORCEINLINE regMaskTP selectMinimal(Interval*    currentInterval,
+        FORCEINLINE regMaskTP selectMinimal(Interval*                currentInterval,
                                             RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore));
 
         // If the register is from unassigned set such that it was not already
@@ -1344,14 +1385,14 @@ class LinearScan : public LinearScanInterface
             return (prevRegBit & preferences) == foundRegBit;
         }
 
-        bool applySelection(int selectionScore, regMaskTP selectionCandidates);
-        bool applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate);
+        bool             applySelection(int selectionScore, regMaskTP selectionCandidates);
+        bool             applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate);
         FORCEINLINE void calculateCoversSets();
         FORCEINLINE void calculateUnassignedSets();
         FORCEINLINE void reset(Interval* interval, RefPosition* refPosition);
         FORCEINLINE void resetMinimal(Interval* interval, RefPosition* refPosition);
 
-#define REG_SEL_DEF(stat, value, shortname, orderSeqId) FORCEINLINE void try_##stat();
+#define REG_SEL_DEF(stat, value, shortname, orderSeqId)      FORCEINLINE void try_##stat();
 #define BUSY_REG_SEL_DEF(stat, value, shortname, orderSeqId) REG_SEL_DEF(stat, value, shortname, orderSeqId)
 #include "lsra_score.h"
     };
@@ -1379,8 +1420,8 @@ class LinearScan : public LinearScanInterface
         unsigned toBBNum;
     };
     typedef JitHashTable<unsigned, JitSmallPrimitiveKeyFuncs<unsigned>, SplitEdgeInfo> SplitBBNumToTargetBBNumMap;
-    SplitBBNumToTargetBBNumMap* splitBBNumToTargetBBNumMap;
-    SplitBBNumToTargetBBNumMap* getSplitBBNumToTargetBBNumMap()
+    SplitBBNumToTargetBBNumMap*                                                        splitBBNumToTargetBBNumMap;
+    SplitBBNumToTargetBBNumMap*                                                        getSplitBBNumToTargetBBNumMap()
     {
         if (splitBBNumToTargetBBNumMap == nullptr)
         {
@@ -1391,13 +1432,13 @@ class LinearScan : public LinearScanInterface
     }
     SplitEdgeInfo getSplitEdgeInfo(unsigned int bbNum);
 
-    void initVarRegMaps();
-    void setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
-    void setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
+    void        initVarRegMaps();
+    void        setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
+    void        setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
     VarToRegMap getInVarToRegMap(unsigned int bbNum);
     VarToRegMap getOutVarToRegMap(unsigned int bbNum);
-    void setVarReg(VarToRegMap map, unsigned int trackedVarIndex, regNumber reg);
-    regNumber getVarReg(VarToRegMap map, unsigned int trackedVarIndex);
+    void        setVarReg(VarToRegMap map, unsigned int trackedVarIndex, regNumber reg);
+    regNumber   getVarReg(VarToRegMap map, unsigned int trackedVarIndex);
     // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
     // the block)
     VarToRegMap setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap);
@@ -1410,8 +1451,8 @@ class LinearScan : public LinearScanInterface
 
 #ifdef TARGET_ARM64
     typedef JitHashTable<RefPosition*, JitPtrKeyFuncs<RefPosition>, RefPosition*> NextConsecutiveRefPositionsMap;
-    NextConsecutiveRefPositionsMap* nextConsecutiveRefPositionMap;
-    NextConsecutiveRefPositionsMap* getNextConsecutiveRefPositionsMap()
+    NextConsecutiveRefPositionsMap*                                               nextConsecutiveRefPositionMap;
+    NextConsecutiveRefPositionsMap*                                               getNextConsecutiveRefPositionsMap()
     {
         if (nextConsecutiveRefPositionMap == nullptr)
         {
@@ -1439,7 +1480,12 @@ class LinearScan : public LinearScanInterface
     //   - In LSRA_DUMP_POST, which is after register allocation, the registers are
     //     shown.
 
-    enum LsraTupleDumpMode{LSRA_DUMP_PRE, LSRA_DUMP_REFPOS, LSRA_DUMP_POST};
+    enum LsraTupleDumpMode
+    {
+        LSRA_DUMP_PRE,
+        LSRA_DUMP_REFPOS,
+        LSRA_DUMP_POST
+    };
     void lsraGetOperandString(GenTree* tree, LsraTupleDumpMode mode, char* operandString, unsigned operandStringLength);
     void lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDest);
     void DumpOperandDefs(
@@ -1477,7 +1523,7 @@ class LinearScan : public LinearScanInterface
     regMaskTP lastDumpedRegisters;
     regMaskTP registersToDump;
     int       lastUsedRegNumIndex;
-    bool shouldDumpReg(regNumber regNum)
+    bool      shouldDumpReg(regNumber regNum)
     {
         return (registersToDump & genRegMask(regNum)) != 0;
     }
@@ -1498,29 +1544,54 @@ class LinearScan : public LinearScanInterface
     void dumpIntervalName(Interval* interval);
 
     // Events during the allocation phase that cause some dump output
-    enum LsraDumpEvent{
+    enum LsraDumpEvent
+    {
         // Conflicting def/use
-        LSRA_EVENT_DEFUSE_CONFLICT, LSRA_EVENT_DEFUSE_FIXED_DELAY_USE, LSRA_EVENT_DEFUSE_CASE1, LSRA_EVENT_DEFUSE_CASE2,
-        LSRA_EVENT_DEFUSE_CASE3, LSRA_EVENT_DEFUSE_CASE4, LSRA_EVENT_DEFUSE_CASE5, LSRA_EVENT_DEFUSE_CASE6,
+        LSRA_EVENT_DEFUSE_CONFLICT,
+        LSRA_EVENT_DEFUSE_FIXED_DELAY_USE,
+        LSRA_EVENT_DEFUSE_CASE1,
+        LSRA_EVENT_DEFUSE_CASE2,
+        LSRA_EVENT_DEFUSE_CASE3,
+        LSRA_EVENT_DEFUSE_CASE4,
+        LSRA_EVENT_DEFUSE_CASE5,
+        LSRA_EVENT_DEFUSE_CASE6,
 
         // Spilling
-        LSRA_EVENT_SPILL, LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL,
-        LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, LSRA_EVENT_DONE_KILL_GC_REFS, LSRA_EVENT_NO_GC_KILLS,
+        LSRA_EVENT_SPILL,
+        LSRA_EVENT_SPILL_EXTENDED_LIFETIME,
+        LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL,
+        LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL,
+        LSRA_EVENT_DONE_KILL_GC_REFS,
+        LSRA_EVENT_NO_GC_KILLS,
 
         // Block boundaries
-        LSRA_EVENT_START_BB, LSRA_EVENT_END_BB,
+        LSRA_EVENT_START_BB,
+        LSRA_EVENT_END_BB,
 
         // Miscellaneous
-        LSRA_EVENT_FREE_REGS, LSRA_EVENT_UPPER_VECTOR_SAVE, LSRA_EVENT_UPPER_VECTOR_RESTORE,
+        LSRA_EVENT_FREE_REGS,
+        LSRA_EVENT_UPPER_VECTOR_SAVE,
+        LSRA_EVENT_UPPER_VECTOR_RESTORE,
 
         // Characteristics of the current RefPosition
         LSRA_EVENT_INCREMENT_RANGE_END, // ???
-        LSRA_EVENT_LAST_USE, LSRA_EVENT_LAST_USE_DELAYED, LSRA_EVENT_NEEDS_NEW_REG,
+        LSRA_EVENT_LAST_USE,
+        LSRA_EVENT_LAST_USE_DELAYED,
+        LSRA_EVENT_NEEDS_NEW_REG,
 
         // Allocation decisions
-        LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED,
-        LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG,
-        LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG, LSRA_EVENT_REUSE_REG,
+        LSRA_EVENT_FIXED_REG,
+        LSRA_EVENT_EXP_USE,
+        LSRA_EVENT_ZERO_REF,
+        LSRA_EVENT_NO_ENTRY_REG_ALLOCATED,
+        LSRA_EVENT_KEPT_ALLOCATION,
+        LSRA_EVENT_COPY_REG,
+        LSRA_EVENT_MOVE_REG,
+        LSRA_EVENT_ALLOC_REG,
+        LSRA_EVENT_NO_REG_ALLOCATED,
+        LSRA_EVENT_RELOAD,
+        LSRA_EVENT_SPECIAL_PUTARG,
+        LSRA_EVENT_REUSE_REG,
     };
     void dumpLsraAllocationEvent(LsraDumpEvent event,
                                  Interval*     interval      = nullptr,
@@ -1533,14 +1604,14 @@ class LinearScan : public LinearScanInterface
 
 #if TRACK_LSRA_STATS
     unsigned regCandidateVarCount;
-    void updateLsraStat(LsraStat stat, unsigned currentBBNum);
-    void dumpLsraStats(FILE* file);
+    void     updateLsraStat(LsraStat stat, unsigned currentBBNum);
+    void     dumpLsraStats(FILE* file);
     LsraStat getLsraStatFromScore(RegisterScore registerScore);
     LsraStat firstRegSelStat = STAT_FREE;
 
 public:
-    virtual void dumpLsraStatsCsv(FILE* file);
-    virtual void dumpLsraStatsSummary(FILE* file);
+    virtual void       dumpLsraStatsCsv(FILE* file);
+    virtual void       dumpLsraStatsSummary(FILE* file);
     static const char* getStatName(unsigned stat);
 
 #define INTRACK_STATS(x) x
@@ -1576,7 +1647,7 @@ class LinearScan : public LinearScanInterface
 
     // Set of blocks that have been visited.
     BlockSet bbVisitedSet;
-    void markBlockVisited(BasicBlock* block)
+    void     markBlockVisited(BasicBlock* block)
     {
         BlockSetOps::AddElemD(compiler, bbVisitedSet, block->bbNum);
     }
@@ -1603,17 +1674,17 @@ class LinearScan : public LinearScanInterface
     BasicBlock** blockSequence;
     // The verifiedAllBBs flag indicates whether we have verified that all BBs have been
     // included in the blockSeuqence above, during setBlockSequence().
-    bool verifiedAllBBs;
-    void setBlockSequence();
-    int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);
+    bool            verifiedAllBBs;
+    void            setBlockSequence();
+    int             compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);
     BasicBlockList* blockSequenceWorkList;
     bool            blockSequencingDone;
 #ifdef DEBUG
     // LSRA must not change number of blocks and blockEpoch that it initializes at start.
     unsigned blockEpoch;
 #endif // DEBUG
-    void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet);
-    void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);
+    void        addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet);
+    void        removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);
     BasicBlock* getNextCandidateFromWorkList();
 
     // Indicates whether the allocation pass has been completed.
@@ -1662,12 +1733,12 @@ class LinearScan : public LinearScanInterface
     PhasedVar<regMaskTP> availableIntRegs;
     PhasedVar<regMaskTP> availableFloatRegs;
     PhasedVar<regMaskTP> availableDoubleRegs;
-#if defined(TARGET_XARCH)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
     PhasedVar<regMaskTP> availableMaskRegs;
 #endif
     PhasedVar<regMaskTP>* availableRegs[TYP_COUNT];
 
-#if defined(TARGET_XARCH)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
 #define allAvailableRegs (availableIntRegs | availableFloatRegs | availableMaskRegs)
 #else
 #define allAvailableRegs (availableIntRegs | availableFloatRegs)
@@ -1714,7 +1785,7 @@ class LinearScan : public LinearScanInterface
 #if defined(TARGET_AMD64)
     static const var_types LargeVectorSaveType = TYP_SIMD16;
 #elif defined(TARGET_ARM64)
-    static const var_types LargeVectorSaveType  = TYP_DOUBLE;
+    static const var_types LargeVectorSaveType = TYP_DOUBLE;
 #endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
     // Set of large vector (TYP_SIMD32 on AVX) variables.
     VARSET_TP largeVectorVars;
@@ -1790,14 +1861,14 @@ class LinearScan : public LinearScanInterface
     void clearSpillCost(regNumber reg, var_types regType);
     void updateSpillCost(regNumber reg, Interval* interval);
 
-    FORCEINLINE void updateRegsFreeBusyState(RefPosition& refPosition,
-                                             regMaskTP    regsBusy,
-                                             regMaskTP*   regsToFree,
+    FORCEINLINE void updateRegsFreeBusyState(RefPosition&               refPosition,
+                                             regMaskTP                  regsBusy,
+                                             regMaskTP*                 regsToFree,
                                              regMaskTP* delayRegsToFree DEBUG_ARG(Interval* interval)
                                                  DEBUG_ARG(regNumber assignedReg));
 
     regMaskTP m_RegistersWithConstants;
-    void clearConstantReg(regNumber reg, var_types regType)
+    void      clearConstantReg(regNumber reg, var_types regType)
     {
         m_RegistersWithConstants &= ~getRegMask(reg, regType);
     }
@@ -1815,7 +1886,7 @@ class LinearScan : public LinearScanInterface
 
     regMaskTP    fixedRegs;
     LsraLocation nextFixedRef[REG_COUNT];
-    void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition);
+    void         updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition);
     LsraLocation getNextFixedRef(regNumber regNum, var_types regType)
     {
         LsraLocation loc = nextFixedRef[regNum];
@@ -1932,11 +2003,11 @@ class LinearScan : public LinearScanInterface
     bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode);
 
     RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0);
-    void setDelayFree(RefPosition* use);
-    int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE);
-    int BuildCastUses(GenTreeCast* cast, regMaskTP candidates);
+    void         setDelayFree(RefPosition* use);
+    int          BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE);
+    int          BuildCastUses(GenTreeCast* cast, regMaskTP candidates);
 #ifdef TARGET_XARCH
-    int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates = RBM_NONE);
+    int              BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates = RBM_NONE);
     inline regMaskTP BuildEvexIncompatibleMask(GenTree* tree);
 #endif // !TARGET_XARCH
     int BuildSelect(GenTreeOp* select);
@@ -1948,19 +2019,19 @@ class LinearScan : public LinearScanInterface
     void getTgtPrefOperands(GenTree* tree, GenTree* op1, GenTree* op2, bool* prefOp1, bool* prefOp2);
     bool supportsSpecialPutArg();
 
-    int BuildSimple(GenTree* tree);
-    int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE);
-    void AddDelayFreeUses(RefPosition* refPosition, GenTree* rmwNode);
-    int BuildDelayFreeUses(GenTree*      node,
-                           GenTree*      rmwNode        = nullptr,
-                           regMaskTP     candidates     = RBM_NONE,
-                           RefPosition** useRefPosition = nullptr);
-    int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE);
-    int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE);
-    void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
+    int          BuildSimple(GenTree* tree);
+    int          BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE);
+    void         AddDelayFreeUses(RefPosition* refPosition, GenTree* rmwNode);
+    int          BuildDelayFreeUses(GenTree*      node,
+                                    GenTree*      rmwNode        = nullptr,
+                                    regMaskTP     candidates     = RBM_NONE,
+                                    RefPosition** useRefPosition = nullptr);
+    int          BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE);
+    int          BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE);
+    void         HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
     RefPosition* BuildDef(GenTree* tree, regMaskTP dstCandidates = RBM_NONE, int multiRegIdx = 0);
-    void BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates = RBM_NONE);
-    void BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask);
+    void         BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates = RBM_NONE);
+    void         BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask);
 
     int BuildReturn(GenTree* tree);
 #ifdef TARGET_XARCH
@@ -1971,24 +2042,24 @@ class LinearScan : public LinearScanInterface
 #ifdef TARGET_ARM
     int BuildShiftLongCarry(GenTree* tree);
 #endif
-    int BuildPutArgReg(GenTreeUnOp* node);
-    int BuildCall(GenTreeCall* call);
-    int BuildCmp(GenTree* tree);
-    int BuildCmpOperands(GenTree* tree);
-    int BuildBlockStore(GenTreeBlk* blkNode);
-    int BuildModDiv(GenTree* tree);
-    int BuildIntrinsic(GenTree* tree);
+    int  BuildPutArgReg(GenTreeUnOp* node);
+    int  BuildCall(GenTreeCall* call);
+    int  BuildCmp(GenTree* tree);
+    int  BuildCmpOperands(GenTree* tree);
+    int  BuildBlockStore(GenTreeBlk* blkNode);
+    int  BuildModDiv(GenTree* tree);
+    int  BuildIntrinsic(GenTree* tree);
     void BuildStoreLocDef(GenTreeLclVarCommon* storeLoc, LclVarDsc* varDsc, RefPosition* singleUseRef, int index);
-    int BuildMultiRegStoreLoc(GenTreeLclVar* storeLoc);
-    int BuildStoreLoc(GenTreeLclVarCommon* tree);
-    int BuildIndir(GenTreeIndir* indirTree);
-    int BuildGCWriteBarrier(GenTree* tree);
-    int BuildCast(GenTreeCast* cast);
+    int  BuildMultiRegStoreLoc(GenTreeLclVar* storeLoc);
+    int  BuildStoreLoc(GenTreeLclVarCommon* tree);
+    int  BuildIndir(GenTreeIndir* indirTree);
+    int  BuildGCWriteBarrier(GenTree* tree);
+    int  BuildCast(GenTreeCast* cast);
 
 #if defined(TARGET_XARCH)
     // returns true if the tree can use the read-modify-write memory instruction form
     bool isRMWRegOper(GenTree* tree);
-    int BuildMul(GenTree* tree);
+    int  BuildMul(GenTree* tree);
     void SetContainsAVXFlags(unsigned sizeOfSIMDVector = 0);
 #endif // defined(TARGET_XARCH)
 
@@ -2017,7 +2088,7 @@ class LinearScan : public LinearScanInterface
 #ifdef FEATURE_HW_INTRINSICS
     int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCount);
 #ifdef TARGET_ARM64
-    int BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwNode = nullptr);
+    int  BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwNode = nullptr);
     void BuildConsecutiveRegistersForDef(GenTree* treeNode, int fieldCount);
 #endif // TARGET_ARM64
 #endif // FEATURE_HW_INTRINSICS
@@ -2487,8 +2558,8 @@ class RefPosition
     //    we need an explicit move.
     //  - copyReg and moveReg must not exist with each other.
 
-    unsigned char reload : 1;
-    unsigned char spillAfter : 1;
+    unsigned char reload         : 1;
+    unsigned char spillAfter     : 1;
     unsigned char singleDefSpill : 1;
     unsigned char writeThru : 1; // true if this var is defined in a register and also spilled. spillAfter must NOT be
                                  // set.
@@ -2496,7 +2567,7 @@ class RefPosition
     unsigned char copyReg : 1;
     unsigned char moveReg : 1; // true if this var is moved to a new register
 
-    unsigned char isPhysRegRef : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval
+    unsigned char isPhysRegRef  : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval
     unsigned char isFixedRegRef : 1;
     unsigned char isLocalDefUse : 1;
 
@@ -2517,6 +2588,10 @@ class RefPosition
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     // If upper vector save/restore can be avoided.
     unsigned char skipSaveRestore : 1;
+    // If upper vector save is related to live var
+    // or created just based on bbLiveIn/bbDefs and
+    // whose liveness is not entirely know.
+    unsigned char liveVarUpperSave : 1;
 #endif
 
 #ifdef DEBUG
@@ -2534,9 +2609,9 @@ class RefPosition
     GenTree* buildNode;
 #endif // DEBUG
 
-    RefPosition(unsigned int bbNum,
-                LsraLocation nodeLocation,
-                GenTree*     treeNode,
+    RefPosition(unsigned int    bbNum,
+                LsraLocation    nodeLocation,
+                GenTree*        treeNode,
                 RefType refType DEBUG_ARG(GenTree* buildNode))
         : referent(nullptr)
         , nextRefPosition(nullptr)
@@ -2720,6 +2795,11 @@ class RefPosition
 #endif
 #endif // TARGET_ARM64
 
+    FORCEINLINE bool IsExtraUpperVectorSave() const
+    {
+        assert(refType == RefTypeUpperVectorSave);
+        return (nextRefPosition == nullptr) || (nextRefPosition->refType != RefTypeUpperVectorRestore);
+    }
 #ifdef DEBUG
     // operator= copies everything except 'rpNum', which must remain unique
     RefPosition& operator=(const RefPosition& rp)
diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp
index 30991778868d..2192265984d6 100644
--- a/src/coreclr/jit/lsraarm.cpp
+++ b/src/coreclr/jit/lsraarm.cpp
@@ -579,7 +579,6 @@ int LinearScan::BuildNode(GenTree* tree)
             break;
 
         case GT_STORE_BLK:
-        case GT_STORE_DYN_BLK:
             srcCount = BuildBlockStore(tree->AsBlk());
             break;
 
diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp
index ea3bc9d7fb37..1096d7f11701 100644
--- a/src/coreclr/jit/lsraarm64.cpp
+++ b/src/coreclr/jit/lsraarm64.cpp
@@ -1076,7 +1076,6 @@ int LinearScan::BuildNode(GenTree* tree)
             break;
 
         case GT_STORE_BLK:
-        case GT_STORE_DYN_BLK:
             srcCount = BuildBlockStore(tree->AsBlk());
             break;
 
@@ -1282,6 +1281,20 @@ int LinearScan::BuildNode(GenTree* tree)
             srcCount = BuildSelect(tree->AsOp());
             break;
 
+#ifdef SWIFT_SUPPORT
+        case GT_SWIFT_ERROR:
+            srcCount = 0;
+            assert(dstCount == 1);
+
+            // Any register should do here, but the error register value should immediately
+            // be moved from GT_SWIFT_ERROR's destination register to the SwiftError struct,
+            // and we know REG_SWIFT_ERROR should be busy up to this point, anyway.
+            // By forcing LSRA to use REG_SWIFT_ERROR as both the source and destination register,
+            // we can ensure the redundant move is elided.
+            BuildDef(tree, RBM_SWIFT_ERROR);
+            break;
+#endif // SWIFT_SUPPORT
+
     } // end switch (tree->OperGet())
 
     if (tree->IsUnusedValue() && (dstCount != 0))
@@ -1316,8 +1329,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
 
     const HWIntrinsic intrin(intrinsicTree);
 
-    int srcCount = 0;
-    int dstCount = 0;
+    int       srcCount      = 0;
+    int       dstCount      = 0;
+    regMaskTP dstCandidates = RBM_NONE;
 
     if (HWIntrinsicInfo::IsMultiReg(intrin.id))
     {
@@ -1430,6 +1444,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
                         assert(intrin.op4->isContainedIntOrIImmed());
                         break;
 
+                    case NI_Sve_CreateTrueMaskByte:
+                    case NI_Sve_CreateTrueMaskDouble:
+                    case NI_Sve_CreateTrueMaskInt16:
+                    case NI_Sve_CreateTrueMaskInt32:
+                    case NI_Sve_CreateTrueMaskInt64:
+                    case NI_Sve_CreateTrueMaskSByte:
+                    case NI_Sve_CreateTrueMaskSingle:
+                    case NI_Sve_CreateTrueMaskUInt16:
+                    case NI_Sve_CreateTrueMaskUInt32:
+                    case NI_Sve_CreateTrueMaskUInt64:
+                        needBranchTargetReg = !intrin.op1->isContainedIntOrIImmed();
+                        break;
+
                     default:
                         unreached();
                 }
@@ -1518,6 +1545,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
                 srcCount++;
             }
         }
+        else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id))
+        {
+            regMaskTP predMask = HWIntrinsicInfo::IsLowMaskedOperation(intrin.id) ? RBM_LOWMASK : RBM_ALLMASK;
+            srcCount += BuildOperandUses(intrin.op1, predMask);
+        }
         else if (intrinsicTree->OperIsMemoryLoadOrStore())
         {
             srcCount += BuildAddrUses(intrin.op1);
@@ -1684,6 +1716,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
 
                 assert(intrinsicTree->OperIsMemoryLoadOrStore());
                 srcCount += BuildAddrUses(intrin.op3);
+                buildInternalRegisterUses();
                 FALLTHROUGH;
             }
 
@@ -1716,6 +1749,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
         }
         return srcCount;
     }
+
     else if (intrin.op2 != nullptr)
     {
         // RMW intrinsic operands doesn't have to be delayFree when they can be assigned the same register as op1Reg
@@ -1770,11 +1804,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
 
     if ((dstCount == 1) || (dstCount == 2))
     {
-        BuildDef(intrinsicTree);
+        BuildDef(intrinsicTree, dstCandidates);
 
         if (dstCount == 2)
         {
-            BuildDef(intrinsicTree, RBM_NONE, 1);
+            BuildDef(intrinsicTree, dstCandidates, 1);
         }
     }
     else
@@ -2024,17 +2058,30 @@ bool RefPosition::isLiveAtConsecutiveRegistersLoc(LsraLocation consecutiveRegist
         return true;
     }
 
+    bool atConsecutiveRegsLoc          = consecutiveRegistersLocation == nodeLocation;
+    bool treeNeedsConsecutiveRegisters = false;
+
+    if ((treeNode != nullptr) && treeNode->OperIsHWIntrinsic())
+    {
+        const HWIntrinsic intrin(treeNode->AsHWIntrinsic());
+        treeNeedsConsecutiveRegisters = HWIntrinsicInfo::NeedsConsecutiveRegisters(intrin.id);
+    }
+
     if (refType == RefTypeDef)
     {
-        if (treeNode->OperIsHWIntrinsic())
+        return treeNeedsConsecutiveRegisters;
+    }
+    else if (refType == RefTypeUse)
+    {
+        if (isIntervalRef() && getInterval()->isInternal)
         {
-            const HWIntrinsic intrin(treeNode->AsHWIntrinsic());
-            return HWIntrinsicInfo::NeedsConsecutiveRegisters(intrin.id);
+            return treeNeedsConsecutiveRegisters;
         }
+        return atConsecutiveRegsLoc;
     }
-    else if ((refType == RefTypeUse) || (refType == RefTypeUpperVectorRestore))
+    else if (refType == RefTypeUpperVectorRestore)
     {
-        return consecutiveRegistersLocation == nodeLocation;
+        return atConsecutiveRegsLoc;
     }
     return false;
 }
diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp
index 8b1305caec52..c2b8b7440658 100644
--- a/src/coreclr/jit/lsraarmarch.cpp
+++ b/src/coreclr/jit/lsraarmarch.cpp
@@ -212,7 +212,7 @@ int LinearScan::BuildCall(GenTreeCall* call)
 
     RegisterType registerType = call->TypeGet();
 
-// Set destination candidates for return value of the call.
+    // Set destination candidates for return value of the call.
 
 #ifdef TARGET_ARM
     if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
@@ -224,22 +224,22 @@ int LinearScan::BuildCall(GenTreeCall* call)
     else
 #endif // TARGET_ARM
         if (hasMultiRegRetVal)
-    {
-        assert(retTypeDesc != nullptr);
-        dstCandidates = retTypeDesc->GetABIReturnRegs();
-    }
-    else if (varTypeUsesFloatArgReg(registerType))
-    {
-        dstCandidates = RBM_FLOATRET;
-    }
-    else if (registerType == TYP_LONG)
-    {
-        dstCandidates = RBM_LNGRET;
-    }
-    else
-    {
-        dstCandidates = RBM_INTRET;
-    }
+        {
+            assert(retTypeDesc != nullptr);
+            dstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv());
+        }
+        else if (varTypeUsesFloatArgReg(registerType))
+        {
+            dstCandidates = RBM_FLOATRET;
+        }
+        else if (registerType == TYP_LONG)
+        {
+            dstCandidates = RBM_LNGRET;
+        }
+        else
+        {
+            dstCandidates = RBM_INTRET;
+        }
 
     // First, count reg args
     // Each register argument corresponds to one source.
@@ -368,6 +368,21 @@ int LinearScan::BuildCall(GenTreeCall* call)
 
     if (ctrlExpr != nullptr)
     {
+#ifdef TARGET_ARM64
+        if (compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && TargetOS::IsUnix && (call->gtArgs.CountArgs() == 0) &&
+            ctrlExpr->IsTlsIconHandle())
+        {
+            // For NativeAOT linux/arm64, we generate the needed code as part of
+            // call node because the generated code has to be in specific format
+            // that linker can patch. As such, the code needs specific registers
+            // that we will attach to this node to guarantee that they are available
+            // during generating this node.
+            assert(call->gtFlags & GTF_TLS_GET_ADDR);
+            newRefPosition(REG_R0, currentLoc, RefTypeFixedReg, nullptr, genRegMask(REG_R0));
+            newRefPosition(REG_R1, currentLoc, RefTypeFixedReg, nullptr, genRegMask(REG_R1));
+            ctrlExprCandidates = genRegMask(REG_R2);
+        }
+#endif
         BuildUse(ctrlExpr, ctrlExprCandidates);
         srcCount++;
     }
@@ -378,6 +393,29 @@ int LinearScan::BuildCall(GenTreeCall* call)
     regMaskTP killMask = getKillSetForCall(call);
     BuildDefsWithKills(call, dstCount, dstCandidates, killMask);
 
+#ifdef SWIFT_SUPPORT
+    if (call->HasSwiftErrorHandling())
+    {
+        // Tree is a Swift call with error handling; error register should have been killed
+        assert((killMask & RBM_SWIFT_ERROR) != 0);
+
+        // After a Swift call that might throw returns, we expect the error register to be consumed
+        // by a GT_SWIFT_ERROR node. However, we want to ensure the error register won't be trashed
+        // before GT_SWIFT_ERROR can consume it.
+        // (For example, the PInvoke epilog comes before the error register store.)
+        // To do so, delay the freeing of the error register until the next node.
+        // This only works if the next node after the call is the GT_SWIFT_ERROR node.
+        // (InsertPInvokeCallEpilog should have moved the GT_SWIFT_ERROR node during lowering.)
+        assert(call->gtNext != nullptr);
+        assert(call->gtNext->OperIs(GT_SWIFT_ERROR));
+
+        // We could use RefTypeKill, but RefTypeFixedReg is used less commonly, so the check for delayRegFree
+        // during register allocation should be cheaper in terms of TP.
+        RefPosition* pos = newRefPosition(REG_SWIFT_ERROR, currentLoc, RefTypeFixedReg, call, RBM_SWIFT_ERROR);
+        setDelayFree(pos);
+    }
+#endif // SWIFT_SUPPORT
+
     // No args are placed in registers anymore.
     placedArgRegs      = RBM_NONE;
     numPlacedArgLocals = 0;
@@ -629,13 +667,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
                 buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
                 break;
 
-            case GenTreeBlk::BlkOpKindHelper:
-                assert(!src->isContained());
-                dstAddrRegMask = RBM_ARG_0;
-                srcRegMask     = RBM_ARG_1;
-                sizeRegMask    = RBM_ARG_2;
-                break;
-
             default:
                 unreached();
         }
@@ -666,6 +697,13 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
                     buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
                 }
 
+                if (size >= 4 * REGSIZE_BYTES && compiler->IsBaselineSimdIsaSupported())
+                {
+                    // We can use 128-bit SIMD ldp/stp for larger block sizes
+                    buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
+                    buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
+                }
+
                 // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
                 dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;
 
@@ -768,22 +806,12 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
             }
             break;
 
-            case GenTreeBlk::BlkOpKindHelper:
-                dstAddrRegMask = RBM_ARG_0;
-                if (srcAddrOrFill != nullptr)
-                {
-                    assert(!srcAddrOrFill->isContained());
-                    srcRegMask = RBM_ARG_1;
-                }
-                sizeRegMask = RBM_ARG_2;
-                break;
-
             default:
                 unreached();
         }
     }
 
-    if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (sizeRegMask != RBM_NONE))
+    if (sizeRegMask != RBM_NONE)
     {
         // Reserve a temp register for the block size argument.
         buildInternalIntRegisterDefForNode(blkNode, sizeRegMask);
@@ -814,12 +842,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
         }
     }
 
-    if (blkNode->OperIs(GT_STORE_DYN_BLK))
-    {
-        useCount++;
-        BuildUse(blkNode->AsStoreDynBlk()->gtDynamicSize, sizeRegMask);
-    }
-
     buildInternalRegisterUses();
     regMaskTP killMask = getKillSetForBlockStore(blkNode);
     BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask);
diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp
index 0c1d3f74475c..62532de8d66f 100644
--- a/src/coreclr/jit/lsrabuild.cpp
+++ b/src/coreclr/jit/lsrabuild.cpp
@@ -78,7 +78,8 @@ RefInfoListNode* RefInfoList::removeListNode(GenTree* node, unsigned multiRegIdx
 //    compiler    - The compiler context.
 //    preallocate - The number of nodes to preallocate.
 //
-RefInfoListNodePool::RefInfoListNodePool(Compiler* compiler, unsigned preallocate) : m_compiler(compiler)
+RefInfoListNodePool::RefInfoListNodePool(Compiler* compiler, unsigned preallocate)
+    : m_compiler(compiler)
 {
     if (preallocate > 0)
     {
@@ -635,7 +636,8 @@ RefPosition* LinearScan::newRefPosition(Interval*    theInterval,
     newRP->setRegOptional(false);
 
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-    newRP->skipSaveRestore = false;
+    newRP->skipSaveRestore  = false;
+    newRP->liveVarUpperSave = false;
 #endif
 
     associateRefPosWithInterval(newRP);
@@ -880,6 +882,16 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call)
     assert(!call->IsVirtualStub() ||
            ((killMask & compiler->virtualStubParamInfo->GetRegMask()) == compiler->virtualStubParamInfo->GetRegMask()));
 #endif // !TARGET_ARM
+
+#ifdef SWIFT_SUPPORT
+    // Swift calls that throw may trash the callee-saved error register,
+    // so don't use the register post-call until it is consumed by SwiftError.
+    if (call->HasSwiftErrorHandling())
+    {
+        killMask |= RBM_SWIFT_ERROR;
+    }
+#endif // SWIFT_SUPPORT
+
     return killMask;
 }
 
@@ -907,18 +919,6 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode)
             killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF);
             break;
 
-#ifndef TARGET_X86
-        case GenTreeBlk::BlkOpKindHelper:
-            if (isCopyBlk)
-            {
-                killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY);
-            }
-            else
-            {
-                killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
-            }
-            break;
-#endif
 #ifdef TARGET_XARCH
         case GenTreeBlk::BlkOpKindRepInstr:
             if (isCopyBlk)
@@ -1055,7 +1055,6 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
             break;
 
         case GT_STORE_BLK:
-        case GT_STORE_DYN_BLK:
             killMask = getKillSetForBlockStore(tree->AsBlk());
             break;
 
@@ -1157,9 +1156,9 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
                     if (varTypeIsFloating(varDsc) &&
                         !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex))
-                {
-                    continue;
-                }
+                    {
+                        continue;
+                    }
                 Interval*  interval   = getIntervalForLocalVar(varIndex);
                 const bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH));
 
@@ -1491,9 +1490,25 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation cu
         assert((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE);
         assert((fpCalleeKillSet & RBM_FLT_CALLEE_SAVED) == RBM_NONE);
 
-        // We only need to save the upper half of any large vector vars that are currently live.
-        VARSET_TP       liveLargeVectors(VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars));
-        VarSetOps::Iter iter(compiler, liveLargeVectors);
+        // We should only save the upper half of any large vector vars that are currently live.
+        // However, the liveness information may not be accurate, specially around the place where
+        // we load the LCL_VAR and the node that uses it. Hence, as a conservative approach, we will
+        // add all variables that are live-in/defined in the block. We need to add variable although
+        // it is not in the live-out set, because a variable may get defined before the call and
+        // (last) used after the call.
+        //
+        // This will create more UpperSave/UpperRestore RefPositions then needed, but we need to do
+        // this for correctness anyway.
+        VARSET_TP bbLiveDefs(VarSetOps::Union(compiler, compiler->compCurBB->bbLiveIn, compiler->compCurBB->bbVarDef));
+
+        VARSET_TP liveDefsLargeVectors(VarSetOps::Intersection(compiler, bbLiveDefs, largeVectorVars));
+
+        // Make sure that `liveLargeVectors` captures the currentLiveVars as well.
+        VARSET_TP liveLargeVectors(VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars));
+
+        assert(VarSetOps::IsSubset(compiler, liveLargeVectors, liveDefsLargeVectors));
+
+        VarSetOps::Iter iter(compiler, liveDefsLargeVectors);
         unsigned        varIndex = 0;
         bool            blockAlwaysReturn =
             compiler->compCurBB->KindIs(BBJ_THROW, BBJ_EHFINALLYRET, BBJ_EHFAULTRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET);
@@ -1508,6 +1523,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation cu
                     newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorSave, tree, RBM_FLT_CALLEE_SAVED);
                 varInterval->isPartiallySpilled = true;
                 pos->skipSaveRestore            = blockAlwaysReturn;
+                pos->liveVarUpperSave           = VarSetOps::IsMember(compiler, liveLargeVectors, varIndex);
 #ifdef TARGET_XARCH
                 pos->regOptional = true;
 #endif
@@ -1585,12 +1601,21 @@ void LinearScan::buildUpperVectorRestoreRefPosition(
 {
     if (lclVarInterval->isPartiallySpilled)
     {
-        unsigned     varIndex            = lclVarInterval->getVarIndex(compiler);
-        Interval*    upperVectorInterval = getUpperVectorInterval(varIndex);
-        RefPosition* savePos             = upperVectorInterval->recentRefPosition;
+        lclVarInterval->isPartiallySpilled = false;
+        unsigned     varIndex              = lclVarInterval->getVarIndex(compiler);
+        Interval*    upperVectorInterval   = getUpperVectorInterval(varIndex);
+        RefPosition* savePos               = upperVectorInterval->recentRefPosition;
+        if (!isUse && !savePos->liveVarUpperSave)
+        {
+            // If we are just restoring upper vector at the block boundary and if this is not
+            // a upperVector related to the liveVar, then ignore creating restore for them.
+            // During allocation, we will detect that this was an extra save-upper and skip
+            // the save/restore altogether.
+            return;
+        }
+
         RefPosition* restorePos =
             newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorRestore, node, RBM_NONE);
-        lclVarInterval->isPartiallySpilled = false;
 
         restorePos->setMultiRegIdx(multiRegIdx);
 
@@ -1598,12 +1623,14 @@ void LinearScan::buildUpperVectorRestoreRefPosition(
         {
             // If there was a use of the restore before end of the block restore,
             // then it is needed and cannot be eliminated
-            savePos->skipSaveRestore = false;
+            savePos->skipSaveRestore  = false;
+            savePos->liveVarUpperSave = true;
         }
         else
         {
             // otherwise, just do the whatever was decided for save position
-            restorePos->skipSaveRestore = savePos->skipSaveRestore;
+            restorePos->skipSaveRestore  = savePos->skipSaveRestore;
+            restorePos->liveVarUpperSave = savePos->liveVarUpperSave;
         }
 
 #ifdef TARGET_XARCH
@@ -2100,14 +2127,14 @@ void LinearScan::UpdateRegStateForStructArg(LclVarDsc* argDsc)
 
     if ((argDsc->GetArgReg() != REG_STK) && (argDsc->GetArgReg() != REG_NA))
     {
-        if (genRegMask(argDsc->GetArgReg()) & (RBM_ALLFLOAT))
+        if ((genRegMask(argDsc->GetArgReg()) & RBM_ALLFLOAT) != RBM_NONE)
         {
-            assert(genRegMask(argDsc->GetArgReg()) & (RBM_FLTARG_REGS));
+            assert((genRegMask(argDsc->GetArgReg()) & RBM_FLTARG_REGS) != RBM_NONE);
             floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetArgReg());
         }
         else
         {
-            assert(genRegMask(argDsc->GetArgReg()) & (RBM_ARG_REGS));
+            assert((genRegMask(argDsc->GetArgReg()) & fullIntArgRegMask(compiler->info.compCallConv)) != RBM_NONE);
             intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetArgReg());
         }
     }
@@ -2121,7 +2148,7 @@ void LinearScan::UpdateRegStateForStructArg(LclVarDsc* argDsc)
         }
         else
         {
-            assert(genRegMask(argDsc->GetOtherArgReg()) & (RBM_ARG_REGS));
+            assert((genRegMask(argDsc->GetOtherArgReg()) & fullIntArgRegMask(compiler->info.compCallConv)) != RBM_NONE);
             intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetOtherArgReg());
         }
     }
@@ -2191,7 +2218,7 @@ template void LinearScan::buildIntervals<false>();
 //                 which we will do register allocation.
 //
 template <bool localVarsEnregistered>
-void           LinearScan::buildIntervals()
+void LinearScan::buildIntervals()
 {
     BasicBlock* block;
 
@@ -2264,6 +2291,32 @@ void           LinearScan::buildIntervals()
     regsInUseThisLocation                   = RBM_NONE;
     regsInUseNextLocation                   = RBM_NONE;
 
+#ifdef SWIFT_SUPPORT
+    if (compiler->info.compCallConv == CorInfoCallConvExtension::Swift)
+    {
+        for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++)
+        {
+            LclVarDsc* argDsc = compiler->lvaGetDesc(lclNum);
+
+            if ((argDsc->lvRefCnt() == 0) && !compiler->opts.compDbgCode)
+            {
+                continue;
+            }
+
+            const ABIPassingInformation& abiInfo = compiler->lvaParameterPassingInfo[lclNum];
+            for (unsigned i = 0; i < abiInfo.NumSegments; i++)
+            {
+                const ABIPassingSegment& seg = abiInfo.Segments[i];
+                if (seg.IsPassedInRegister())
+                {
+                    RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? floatRegState : intRegState;
+                    regState->rsCalleeRegArgMaskLiveIn |= seg.GetRegisterMask();
+                }
+            }
+        }
+    }
+#endif
+
     for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
     {
         LclVarDsc* argDsc = compiler->lvaGetDescByTrackedIndex(varIndex);
@@ -2447,7 +2500,7 @@ void           LinearScan::buildIntervals()
                             assert(isCandidateVar(varDsc));
                             Interval*    interval = getIntervalForLocalVar(varIndex);
                             RefPosition* pos      = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr,
-                                                              allRegs(interval->registerType));
+                                                                   allRegs(interval->registerType));
                             pos->setRegOptional(true);
                         }
                         JITDUMP("Finished creating dummy definitions\n\n");
@@ -2482,18 +2535,31 @@ void           LinearScan::buildIntervals()
             // assert(block->isRunRarely());
         }
 
+        // For Swift calls there can be an arbitrary amount of codegen related
+        // to homing of decomposed struct parameters passed on stack. We cannot
+        // do that in the prolog. We handle registers in the prolog and the
+        // stack args in the scratch BB that we have ensured exists. The
+        // handling clobbers REG_SCRATCH, so kill it here.
+        if ((block == compiler->fgFirstBB) && compiler->lvaHasAnySwiftStackParamToReassemble())
+        {
+            assert(compiler->fgFirstBBisScratch());
+            addRefsForPhysRegMask(genRegMask(REG_SCRATCH), currentLoc + 1, RefTypeKill, true);
+            currentLoc += 2;
+        }
+
         // For frame poisoning we generate code into scratch BB right after prolog since
         // otherwise the prolog might become too large. In this case we will put the poison immediate
         // into the scratch register, so it will be killed here.
-        if (compiler->compShouldPoisonFrame() && compiler->fgFirstBBisScratch() && block == compiler->fgFirstBB)
+        if (compiler->compShouldPoisonFrame() && (block == compiler->fgFirstBB))
         {
+            assert(compiler->fgFirstBBisScratch());
             regMaskTP killed;
 #if defined(TARGET_XARCH)
             // Poisoning uses EAX for small vars and rep stosd that kills edi, ecx and eax for large vars.
             killed = RBM_EDI | RBM_ECX | RBM_EAX;
 #else
             // Poisoning uses REG_SCRATCH for small vars and memset helper for big vars.
-            killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
+            killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_NATIVE_MEMSET);
 #endif
             addRefsForPhysRegMask(killed, currentLoc + 1, RefTypeKill, true);
             currentLoc += 2;
@@ -2506,7 +2572,6 @@ void           LinearScan::buildIntervals()
             // is at a new location and doesn't interfere with the uses.
             // For multi-reg local stores, the 'BuildMultiRegStoreLoc' method will further increment the
             // location by 2 for each destination register beyond the first.
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
             node->gtSeqNum = currentLoc;
@@ -3032,7 +3097,8 @@ void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates)
             // For all other cases of multi-reg definitions, the registers must be in sequential order.
             if (retTypeDesc != nullptr)
             {
-                thisDstCandidates = genRegMask(tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i));
+                thisDstCandidates = genRegMask(
+                    tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv()));
                 assert((dstCandidates & thisDstCandidates) != RBM_NONE);
             }
             else
@@ -3664,7 +3730,7 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc,
         defCandidates = allRegs(type);
     }
 #else
-    defCandidates  = allRegs(type);
+    defCandidates = allRegs(type);
 #endif // TARGET_X86
 
     RefPosition* def = newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, defCandidates, index);
@@ -3938,113 +4004,114 @@ int LinearScan::BuildReturn(GenTree* tree)
     else
 #endif // !defined(TARGET_64BIT)
         if ((tree->TypeGet() != TYP_VOID) && !op1->isContained())
-    {
-        regMaskTP useCandidates = RBM_NONE;
+        {
+            regMaskTP useCandidates = RBM_NONE;
 
 #if FEATURE_MULTIREG_RET
 #ifdef TARGET_ARM64
-        if (varTypeIsSIMD(tree) && !op1->IsMultiRegLclVar())
-        {
-            BuildUse(op1, RBM_DOUBLERET);
-            return 1;
-        }
-#endif // TARGET_ARM64
-
-        if (varTypeIsStruct(tree))
-        {
-            // op1 has to be either a lclvar or a multi-reg returning call
-            if ((op1->OperGet() == GT_LCL_VAR) && !op1->IsMultiRegLclVar())
+            if (varTypeIsSIMD(tree) && !op1->IsMultiRegLclVar())
             {
-                BuildUse(op1, useCandidates);
+                BuildUse(op1, RBM_DOUBLERET);
+                return 1;
             }
-            else
+#endif // TARGET_ARM64
+
+            if (varTypeIsStruct(tree))
             {
-                noway_assert(op1->IsMultiRegCall() || (op1->IsMultiRegLclVar() && compiler->lvaEnregMultiRegVars));
+                // op1 has to be either a lclvar or a multi-reg returning call
+                if ((op1->OperGet() == GT_LCL_VAR) && !op1->IsMultiRegLclVar())
+                {
+                    BuildUse(op1, useCandidates);
+                }
+                else
+                {
+                    noway_assert(op1->IsMultiRegCall() || (op1->IsMultiRegLclVar() && compiler->lvaEnregMultiRegVars));
 
-                ReturnTypeDesc retTypeDesc = compiler->compRetTypeDesc;
-                const int      srcCount    = retTypeDesc.GetReturnRegCount();
-                assert(op1->GetMultiRegCount(compiler) == static_cast<unsigned>(srcCount));
+                    ReturnTypeDesc retTypeDesc = compiler->compRetTypeDesc;
+                    const int      srcCount    = retTypeDesc.GetReturnRegCount();
+                    assert(op1->GetMultiRegCount(compiler) == static_cast<unsigned>(srcCount));
 
-                // For any source that's coming from a different register file, we need to ensure that
-                // we reserve the specific ABI register we need.
-                bool hasMismatchedRegTypes = false;
-                if (op1->IsMultiRegLclVar())
-                {
-                    for (int i = 0; i < srcCount; i++)
+                    // For any source that's coming from a different register file, we need to ensure that
+                    // we reserve the specific ABI register we need.
+                    bool hasMismatchedRegTypes = false;
+                    if (op1->IsMultiRegLclVar())
                     {
-                        RegisterType srcType = regType(op1->AsLclVar()->GetFieldTypeByIndex(compiler, i));
-                        RegisterType dstType = regType(retTypeDesc.GetReturnRegType(i));
-                        if (srcType != dstType)
+                        for (int i = 0; i < srcCount; i++)
                         {
-                            hasMismatchedRegTypes = true;
-                            regMaskTP dstRegMask  = genRegMask(retTypeDesc.GetABIReturnReg(i));
-
-                            if (varTypeUsesIntReg(dstType))
+                            RegisterType srcType = regType(op1->AsLclVar()->GetFieldTypeByIndex(compiler, i));
+                            RegisterType dstType = regType(retTypeDesc.GetReturnRegType(i));
+                            if (srcType != dstType)
                             {
-                                buildInternalIntRegisterDefForNode(tree, dstRegMask);
-                            }
+                                hasMismatchedRegTypes = true;
+                                regMaskTP dstRegMask =
+                                    genRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv));
+
+                                if (varTypeUsesIntReg(dstType))
+                                {
+                                    buildInternalIntRegisterDefForNode(tree, dstRegMask);
+                                }
 #if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
-                            else if (varTypeUsesMaskReg(dstType))
-                            {
-                                buildInternalMaskRegisterDefForNode(tree, dstRegMask);
-                            }
+                                else if (varTypeUsesMaskReg(dstType))
+                                {
+                                    buildInternalMaskRegisterDefForNode(tree, dstRegMask);
+                                }
 #endif // TARGET_XARCH && FEATURE_SIMD
-                            else
-                            {
-                                assert(varTypeUsesFloatReg(dstType));
-                                buildInternalFloatRegisterDefForNode(tree, dstRegMask);
+                                else
+                                {
+                                    assert(varTypeUsesFloatReg(dstType));
+                                    buildInternalFloatRegisterDefForNode(tree, dstRegMask);
+                                }
                             }
                         }
                     }
-                }
-                for (int i = 0; i < srcCount; i++)
-                {
-                    // We will build uses of the type of the operand registers/fields, and the codegen
-                    // for return will move as needed.
-                    if (!hasMismatchedRegTypes || (regType(op1->AsLclVar()->GetFieldTypeByIndex(compiler, i)) ==
-                                                   regType(retTypeDesc.GetReturnRegType(i))))
+                    for (int i = 0; i < srcCount; i++)
                     {
-                        BuildUse(op1, genRegMask(retTypeDesc.GetABIReturnReg(i)), i);
+                        // We will build uses of the type of the operand registers/fields, and the codegen
+                        // for return will move as needed.
+                        if (!hasMismatchedRegTypes || (regType(op1->AsLclVar()->GetFieldTypeByIndex(compiler, i)) ==
+                                                       regType(retTypeDesc.GetReturnRegType(i))))
+                        {
+                            BuildUse(op1, genRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv)), i);
+                        }
+                        else
+                        {
+                            BuildUse(op1, RBM_NONE, i);
+                        }
                     }
-                    else
+                    if (hasMismatchedRegTypes)
                     {
-                        BuildUse(op1, RBM_NONE, i);
+                        buildInternalRegisterUses();
                     }
+                    return srcCount;
                 }
-                if (hasMismatchedRegTypes)
-                {
-                    buildInternalRegisterUses();
-                }
-                return srcCount;
             }
-        }
-        else
+            else
 #endif // FEATURE_MULTIREG_RET
-        {
-            // Non-struct type return - determine useCandidates
-            switch (tree->TypeGet())
             {
-                case TYP_VOID:
-                    useCandidates = RBM_NONE;
-                    break;
-                case TYP_FLOAT:
-                    useCandidates = RBM_FLOATRET;
-                    break;
-                case TYP_DOUBLE:
-                    // We ONLY want the valid double register in the RBM_DOUBLERET mask.
-                    useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE);
-                    break;
-                case TYP_LONG:
-                    useCandidates = RBM_LNGRET;
-                    break;
-                default:
-                    useCandidates = RBM_INTRET;
-                    break;
+                // Non-struct type return - determine useCandidates
+                switch (tree->TypeGet())
+                {
+                    case TYP_VOID:
+                        useCandidates = RBM_NONE;
+                        break;
+                    case TYP_FLOAT:
+                        useCandidates = RBM_FLOATRET;
+                        break;
+                    case TYP_DOUBLE:
+                        // We ONLY want the valid double register in the RBM_DOUBLERET mask.
+                        useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE);
+                        break;
+                    case TYP_LONG:
+                        useCandidates = RBM_LNGRET;
+                        break;
+                    default:
+                        useCandidates = RBM_INTRET;
+                        break;
+                }
+                BuildUse(op1, useCandidates);
+                return 1;
             }
-            BuildUse(op1, useCandidates);
-            return 1;
         }
-    }
 
     // No kills or defs.
     return 0;
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index 67b27aa51300..1ceb61e53625 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -394,7 +394,6 @@ int LinearScan::BuildNode(GenTree* tree)
             break;
 
         case GT_STORE_BLK:
-        case GT_STORE_DYN_BLK:
             srcCount = BuildBlockStore(tree->AsBlk());
             break;
 
@@ -749,7 +748,7 @@ int LinearScan::BuildCall(GenTreeCall* call)
     if (hasMultiRegRetVal)
     {
         assert(retTypeDesc != nullptr);
-        dstCandidates = retTypeDesc->GetABIReturnRegs();
+        dstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv());
     }
     else if (varTypeUsesFloatArgReg(registerType))
     {
@@ -1104,13 +1103,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
                 buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
                 break;
 
-            case GenTreeBlk::BlkOpKindHelper:
-                assert(!src->isContained());
-                dstAddrRegMask = RBM_ARG_0;
-                srcRegMask     = RBM_ARG_1;
-                sizeRegMask    = RBM_ARG_2;
-                break;
-
             default:
                 unreached();
         }
@@ -1159,22 +1151,12 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
                 buildInternalIntRegisterDefForNode(blkNode);
                 break;
 
-            case GenTreeBlk::BlkOpKindHelper:
-                dstAddrRegMask = RBM_ARG_0;
-                if (srcAddrOrFill != nullptr)
-                {
-                    assert(!srcAddrOrFill->isContained());
-                    srcRegMask = RBM_ARG_1;
-                }
-                sizeRegMask = RBM_ARG_2;
-                break;
-
             default:
                 unreached();
         }
     }
 
-    if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (sizeRegMask != RBM_NONE))
+    if (sizeRegMask != RBM_NONE)
     {
         // Reserve a temp register for the block size argument.
         buildInternalIntRegisterDefForNode(blkNode, sizeRegMask);
@@ -1205,12 +1187,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
         }
     }
 
-    if (blkNode->OperIs(GT_STORE_DYN_BLK))
-    {
-        useCount++;
-        BuildUse(blkNode->AsStoreDynBlk()->gtDynamicSize, sizeRegMask);
-    }
-
     buildInternalRegisterUses();
     regMaskTP killMask = getKillSetForBlockStore(blkNode);
     BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask);
diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp
index ec4ca4a34972..6af21c06ab20 100644
--- a/src/coreclr/jit/lsrariscv64.cpp
+++ b/src/coreclr/jit/lsrariscv64.cpp
@@ -309,7 +309,7 @@ int LinearScan::BuildNode(GenTree* tree)
                         needTemp = true;
                 }
 
-                if (!needTemp && (tree->gtOper == GT_DIV || tree->gtOper == GT_MOD))
+                if (!needTemp && tree->OperIs(GT_DIV, GT_MOD))
                 {
                     if ((exceptions & ExceptionSetFlags::ArithmeticException) != ExceptionSetFlags::None)
                         needTemp = true;
@@ -512,7 +512,6 @@ int LinearScan::BuildNode(GenTree* tree)
             break;
 
         case GT_STORE_BLK:
-        case GT_STORE_DYN_BLK:
             srcCount = BuildBlockStore(tree->AsBlk());
             break;
 
@@ -912,7 +911,7 @@ int LinearScan::BuildCall(GenTreeCall* call)
     if (hasMultiRegRetVal)
     {
         assert(retTypeDesc != nullptr);
-        dstCandidates = retTypeDesc->GetABIReturnRegs();
+        dstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv());
     }
     else if (varTypeUsesFloatArgReg(registerType))
     {
@@ -1260,13 +1259,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
                 buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
                 break;
 
-            case GenTreeBlk::BlkOpKindHelper:
-                assert(!src->isContained());
-                dstAddrRegMask = RBM_ARG_0;
-                srcRegMask     = RBM_ARG_1;
-                sizeRegMask    = RBM_ARG_2;
-                break;
-
             default:
                 unreached();
         }
@@ -1315,22 +1307,12 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
                 buildInternalIntRegisterDefForNode(blkNode);
                 break;
 
-            case GenTreeBlk::BlkOpKindHelper:
-                dstAddrRegMask = RBM_ARG_0;
-                if (srcAddrOrFill != nullptr)
-                {
-                    assert(!srcAddrOrFill->isContained());
-                    srcRegMask = RBM_ARG_1;
-                }
-                sizeRegMask = RBM_ARG_2;
-                break;
-
             default:
                 unreached();
         }
     }
 
-    if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (sizeRegMask != RBM_NONE))
+    if (sizeRegMask != RBM_NONE)
     {
         // Reserve a temp register for the block size argument.
         buildInternalIntRegisterDefForNode(blkNode, sizeRegMask);
@@ -1361,12 +1343,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
         }
     }
 
-    if (blkNode->OperIs(GT_STORE_DYN_BLK))
-    {
-        useCount++;
-        BuildUse(blkNode->AsStoreDynBlk()->gtDynamicSize, sizeRegMask);
-    }
-
     buildInternalRegisterUses();
     regMaskTP killMask = getKillSetForBlockStore(blkNode);
     BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask);
diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp
index 4fc56947478a..7fe119ccfd16 100644
--- a/src/coreclr/jit/lsraxarch.cpp
+++ b/src/coreclr/jit/lsraxarch.cpp
@@ -507,7 +507,6 @@ int LinearScan::BuildNode(GenTree* tree)
 #endif // FEATURE_PUT_STRUCT_ARG_STK
 
         case GT_STORE_BLK:
-        case GT_STORE_DYN_BLK:
             srcCount = BuildBlockStore(tree->AsBlk());
             break;
 
@@ -591,7 +590,7 @@ int LinearScan::BuildNode(GenTree* tree)
             BuildDef(tree, RBM_EXCEPTION_OBJECT);
             break;
 
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
         case GT_END_LFIN:
             srcCount = 0;
             assert(dstCount == 0);
@@ -633,11 +632,24 @@ int LinearScan::BuildNode(GenTree* tree)
         }
         break;
 
+#ifdef SWIFT_SUPPORT
+        case GT_SWIFT_ERROR:
+            srcCount = 0;
+            assert(dstCount == 1);
+
+            // Any register should do here, but the error register value should immediately
+            // be moved from GT_SWIFT_ERROR's destination register to the SwiftError struct,
+            // and we know REG_SWIFT_ERROR should be busy up to this point, anyway.
+            // By forcing LSRA to use REG_SWIFT_ERROR as both the source and destination register,
+            // we can ensure the redundant move is elided.
+            BuildDef(tree, RBM_SWIFT_ERROR);
+            break;
+#endif // SWIFT_SUPPORT
+
     } // end switch (tree->OperGet())
 
     // We need to be sure that we've set srcCount and dstCount appropriately.
-    // Not that for XARCH, the maximum number of registers defined is 2.
-    assert((dstCount < 2) || ((dstCount == 2) && tree->IsMultiRegNode()));
+    assert((dstCount < 2) || tree->IsMultiRegNode());
     assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
     assert(!tree->IsValue() || (dstCount != 0));
     assert(dstCount == tree->GetRegisterDstCount(compiler));
@@ -698,7 +710,6 @@ bool LinearScan::isRMWRegOper(GenTree* tree)
 {
     // TODO-XArch-CQ: Make this more accurate.
     // For now, We assume that most binary operators are of the RMW form.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef FEATURE_HW_INTRINSICS
     assert(tree->OperIsBinary() || (tree->OperIsMultiOp() && (tree->AsMultiOp()->GetOperandCount() <= 2)));
@@ -1057,7 +1068,6 @@ int LinearScan::BuildShiftRotate(GenTree* tree)
     // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
     // if the shift count is known to be non-zero and in the range depending on the
     // operand size.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef TARGET_X86
     // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
@@ -1157,7 +1167,6 @@ int LinearScan::BuildCall(GenTreeCall* call)
     RegisterType registerType = regType(call);
 
     // Set destination candidates for return value of the call.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef TARGET_X86
     if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
@@ -1170,33 +1179,33 @@ int LinearScan::BuildCall(GenTreeCall* call)
     else
 #endif // TARGET_X86
         if (hasMultiRegRetVal)
-    {
-        assert(retTypeDesc != nullptr);
-        dstCandidates = retTypeDesc->GetABIReturnRegs();
-        assert((int)genCountBits(dstCandidates) == dstCount);
-    }
-    else if (varTypeUsesFloatReg(registerType))
-    {
+        {
+            assert(retTypeDesc != nullptr);
+            dstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv());
+            assert((int)genCountBits(dstCandidates) == dstCount);
+        }
+        else if (varTypeUsesFloatReg(registerType))
+        {
 #ifdef TARGET_X86
-        // The return value will be on the X87 stack, and we will need to move it.
-        dstCandidates = allRegs(registerType);
+            // The return value will be on the X87 stack, and we will need to move it.
+            dstCandidates = allRegs(registerType);
 #else  // !TARGET_X86
         dstCandidates = RBM_FLOATRET;
 #endif // !TARGET_X86
-    }
-    else
-    {
-        assert(varTypeUsesIntReg(registerType));
-
-        if (registerType == TYP_LONG)
-        {
-            dstCandidates = RBM_LNGRET;
         }
         else
         {
-            dstCandidates = RBM_INTRET;
+            assert(varTypeUsesIntReg(registerType));
+
+            if (registerType == TYP_LONG)
+            {
+                dstCandidates = RBM_LNGRET;
+            }
+            else
+            {
+                dstCandidates = RBM_INTRET;
+            }
         }
-    }
 
     // number of args to a call =
     // callRegArgs + (callargs - placeholders, setup, etc)
@@ -1341,12 +1350,45 @@ int LinearScan::BuildCall(GenTreeCall* call)
         srcCount += BuildOperandUses(ctrlExpr, ctrlExprCandidates);
     }
 
+    if (call->NeedsVzeroupper(compiler))
+    {
+        // Much like for Contains256bitOrMoreAVX, we want to track if any
+        // call needs a vzeroupper inserted. This allows us to reduce
+        // the total number of vzeroupper being inserted for cases where
+        // no 256+ AVX is used directly by the method.
+
+        compiler->GetEmitter()->SetContainsCallNeedingVzeroupper(true);
+    }
+
     buildInternalRegisterUses();
 
     // Now generate defs and kills.
     regMaskTP killMask = getKillSetForCall(call);
     BuildDefsWithKills(call, dstCount, dstCandidates, killMask);
 
+#ifdef SWIFT_SUPPORT
+    if (call->HasSwiftErrorHandling())
+    {
+        // Tree is a Swift call with error handling; error register should have been killed
+        assert((killMask & RBM_SWIFT_ERROR) != 0);
+
+        // After a Swift call that might throw returns, we expect the error register to be consumed
+        // by a GT_SWIFT_ERROR node. However, we want to ensure the error register won't be trashed
+        // before GT_SWIFT_ERROR can consume it.
+        // (For example, the PInvoke epilog comes before the error register store.)
+        // To do so, delay the freeing of the error register until the next node.
+        // This only works if the next node after the call is the GT_SWIFT_ERROR node.
+        // (InsertPInvokeCallEpilog should have moved the GT_SWIFT_ERROR node during lowering.)
+        assert(call->gtNext != nullptr);
+        assert(call->gtNext->OperIs(GT_SWIFT_ERROR));
+
+        // We could use RefTypeKill, but RefTypeFixedReg is used less commonly, so the check for delayRegFree
+        // during register allocation should be cheaper in terms of TP.
+        RefPosition* pos = newRefPosition(REG_SWIFT_ERROR, currentLoc, RefTypeFixedReg, call, RBM_SWIFT_ERROR);
+        setDelayFree(pos);
+    }
+#endif // SWIFT_SUPPORT
+
     // No args are placed in registers anymore.
     placedArgRegs      = RBM_NONE;
     numPlacedArgLocals = 0;
@@ -1424,14 +1466,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
                 buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
                 break;
 
-#ifdef TARGET_AMD64
-            case GenTreeBlk::BlkOpKindHelper:
-                dstAddrRegMask = RBM_ARG_0;
-                srcRegMask     = RBM_ARG_1;
-                sizeRegMask    = RBM_ARG_2;
-                break;
-#endif
-
             default:
                 unreached();
         }
@@ -1467,7 +1501,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
                     // We need a float temporary if we're doing SIMD operations
 
                     buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
-                    SetContainsAVXFlags(size);
+                    SetContainsAVXFlags(regSize);
 
                     remainder %= regSize;
                 }
@@ -1552,14 +1586,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
                 sizeRegMask    = RBM_RCX;
                 break;
 
-#ifdef TARGET_AMD64
-            case GenTreeBlk::BlkOpKindHelper:
-                dstAddrRegMask = RBM_ARG_0;
-                srcRegMask     = RBM_ARG_1;
-                sizeRegMask    = RBM_ARG_2;
-                break;
-#endif
-
             default:
                 unreached();
         }
@@ -1572,7 +1598,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
         }
     }
 
-    if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (sizeRegMask != RBM_NONE))
+    if (sizeRegMask != RBM_NONE)
     {
         // Reserve a temp register for the block size argument.
         buildInternalIntRegisterDefForNode(blkNode, sizeRegMask);
@@ -1603,12 +1629,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
         }
     }
 
-    if (blkNode->OperIs(GT_STORE_DYN_BLK))
-    {
-        useCount++;
-        BuildUse(blkNode->AsStoreDynBlk()->gtDynamicSize, sizeRegMask);
-    }
-
 #ifdef TARGET_X86
     // If we require a byte register on x86, we may run into an over-constrained situation
     // if we have BYTE_REG_COUNT or more uses (currently, it can be at most 4, if both the
@@ -2154,8 +2174,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
             }
         }
 
-        if (HWIntrinsicInfo::IsEmbRoundingCompatible(intrinsicId) &&
-            numArgs == HWIntrinsicInfo::EmbRoundingArgPos(intrinsicId) && !lastOp->IsCnsIntOrI())
+        if (intrinsicTree->OperIsEmbRoundingEnabled() && !lastOp->IsCnsIntOrI())
         {
             buildInternalIntRegisterDefForNode(intrinsicTree);
             buildInternalIntRegisterDefForNode(intrinsicTree);
@@ -2392,13 +2411,17 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
             case NI_FMA_MultiplySubtractNegatedScalar:
             case NI_FMA_MultiplySubtractScalar:
             case NI_AVX512F_FusedMultiplyAdd:
+            case NI_AVX512F_FusedMultiplyAddScalar:
             case NI_AVX512F_FusedMultiplyAddNegated:
+            case NI_AVX512F_FusedMultiplyAddNegatedScalar:
             case NI_AVX512F_FusedMultiplyAddSubtract:
             case NI_AVX512F_FusedMultiplySubtract:
+            case NI_AVX512F_FusedMultiplySubtractScalar:
             case NI_AVX512F_FusedMultiplySubtractAdd:
             case NI_AVX512F_FusedMultiplySubtractNegated:
+            case NI_AVX512F_FusedMultiplySubtractNegatedScalar:
             {
-                assert(numArgs == 3);
+                assert((numArgs == 3) || (intrinsicTree->OperIsEmbRoundingEnabled()));
                 assert(isRMW);
                 assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinsicId));
 
@@ -2496,6 +2519,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
                 srcCount += BuildDelayFreeUses(emitOp2, emitOp1);
                 srcCount += emitOp3->isContained() ? BuildOperandUses(emitOp3) : BuildDelayFreeUses(emitOp3, emitOp1);
 
+                if (intrinsicTree->OperIsEmbRoundingEnabled() && !intrinsicTree->Op(4)->IsCnsIntOrI())
+                {
+                    srcCount += BuildOperandUses(intrinsicTree->Op(4));
+                }
+
                 buildUses = false;
                 break;
             }
@@ -2988,7 +3016,6 @@ int LinearScan::BuildMul(GenTree* tree)
     // three-op form:   reg = r/m * imm
 
     // This special widening 32x32->64 MUL is not used on x64
-    CLANG_FORMAT_COMMENT_ANCHOR;
 #if defined(TARGET_X86)
     if (tree->OperGet() != GT_MUL_LONG)
 #endif
@@ -3053,18 +3080,9 @@ void LinearScan::SetContainsAVXFlags(unsigned sizeOfSIMDVector /* = 0*/)
 
     compiler->GetEmitter()->SetContainsAVX(true);
 
-    if (sizeOfSIMDVector == 32)
-    {
-        compiler->GetEmitter()->SetContains256bitOrMoreAVX(true);
-    }
-
-    if (!compiler->canUseEvexEncoding())
-    {
-        return;
-    }
-
-    if (sizeOfSIMDVector == 64)
+    if (sizeOfSIMDVector >= 32)
     {
+        assert((sizeOfSIMDVector == 32) || ((sizeOfSIMDVector == 64) && compiler->canUseEvexEncoding()));
         compiler->GetEmitter()->SetContains256bitOrMoreAVX(true);
     }
 }
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 3b36a54743ca..c07a412a81a6 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -338,16 +338,22 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
             && tree->gtOverflow()
 #elif defined(TARGET_AMD64)
             // Amd64: src = float, dst = uint64 or overflow conversion.
-            // This goes through helper and hence src needs to be converted to double.
-            && (tree->gtOverflow() || (dstType == TYP_ULONG))
+            // src needs to be converted to double except for the following cases
+            //       dstType = int/uint/ulong for AVX512F
+            //       dstType = int for SSE41
+            // For pre-SSE41, the all src is converted to TYP_DOUBLE
+            // and goes through helpers.
+            && (tree->gtOverflow() || (dstType == TYP_LONG) ||
+                !(compOpportunisticallyDependsOn(InstructionSet_AVX512F) ||
+                  (dstType == TYP_INT && compOpportunisticallyDependsOn(InstructionSet_SSE41))))
 #elif defined(TARGET_ARM)
             // Arm: src = float, dst = int64/uint64 or overflow conversion.
             && (tree->gtOverflow() || varTypeIsLong(dstType))
 #else
             // x86: src = float, dst = uint32/int64/uint64 or overflow conversion.
-            && (tree->gtOverflow() || varTypeIsLong(dstType) || (dstType == TYP_UINT))
+            && (tree->gtOverflow() || varTypeIsIntegral(dstType))
 #endif
-                )
+        )
         {
             oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE);
         }
@@ -371,25 +377,39 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
                 return nullptr;
 #else
+#if defined(TARGET_AMD64)
+                // Following nodes are handled when lowering the nodes
+                //     float  -> ulong/uint/int for AVX512F
+                //     double -> ulong/uint/long/int for AVX512F
+                //     float  -> int for SSE41
+                //     double -> int/uint/long for SSE41
+                // For all other conversions, we use helper functions.
+                if (compOpportunisticallyDependsOn(InstructionSet_AVX512F) ||
+                    ((dstType != TYP_ULONG) && compOpportunisticallyDependsOn(InstructionSet_SSE41)))
+                {
+                    if (tree->CastOp() != oper)
+                    {
+                        tree->CastOp() = oper;
+                    }
+                    return nullptr;
+                }
+#endif // TARGET_AMD64
                 switch (dstType)
                 {
                     case TYP_INT:
+#ifdef TARGET_XARCH
+                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
+#endif // TARGET_XARCH
                         return nullptr;
 
                     case TYP_UINT:
-#if defined(TARGET_ARM) || defined(TARGET_AMD64)
+#if defined(TARGET_ARM)
                         return nullptr;
-#else  // TARGET_X86
+#endif
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
-#endif // TARGET_X86
 
                     case TYP_LONG:
-#ifdef TARGET_AMD64
-                        // SSE2 has instructions to convert a float/double directly to a long
-                        return nullptr;
-#else  // !TARGET_AMD64
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
-#endif // !TARGET_AMD64
 
                     case TYP_ULONG:
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
@@ -439,7 +459,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
 #ifdef TARGET_ARM
              && !varTypeIsLong(oper->AsCast()->CastOp())
 #endif
-                 )
+    )
     {
         oper->gtType       = TYP_FLOAT;
         oper->CastToType() = TYP_FLOAT;
@@ -566,7 +586,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
         // we fix this by copying the GC pointer to a non-gc pointer temp.
         noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
 
-        // We generate an assignment to an int and then do the cast from an int. With this we avoid
+        // We generate a store to an int and then do the cast from an int. With this we avoid
         // the gc problem and we allow casts to bytes, longs,  etc...
         unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
         oper->gtType    = TYP_I_IMPL;
@@ -770,6 +790,10 @@ const char* getWellKnownArgName(WellKnownArg arg)
             return "ValidateIndirectCallTarget";
         case WellKnownArg::DispatchIndirectCallTarget:
             return "DispatchIndirectCallTarget";
+        case WellKnownArg::SwiftError:
+            return "SwiftError";
+        case WellKnownArg::SwiftSelf:
+            return "SwiftSelf";
     }
 
     return "N/A";
@@ -917,9 +941,9 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call)
         }
 #endif // FEATURE_ARG_SPLIT
 
-        // If the argument tree contains an assignment (GTF_ASG) then the argument and
+        // If the argument tree contains a store (GTF_ASG) then the argument and
         // and every earlier argument (except constants) must be evaluated into temps
-        // since there may be other arguments that follow and they may use the value being assigned.
+        // since there may be other arguments that follow and they may use the value being defined.
         //
         // EXAMPLE: ArgTab is "a, a=5, a"
         //          -> when we see the second arg "a=5"
@@ -1011,7 +1035,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call)
         // with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
         // has to be kept in the right order since we will move the call to the first position)
 
-        // For calls we don't have to be quite as conservative as we are with an assignment
+        // For calls we don't have to be quite as conservative as we are with stores
         // since the call won't be modifying any non-address taken LclVars.
 
         if (treatLikeCall)
@@ -1138,7 +1162,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call)
             {
                 if ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0)
                 {
-                    // Spill multireg struct arguments that have Assignments or Calls embedded in them.
+                    // Spill multireg struct arguments that have stores or calls embedded in them.
                     SetNeedsTemp(&arg);
                 }
                 else if (!argx->OperIsLocalRead() && !argx->OperIsLoad())
@@ -1635,7 +1659,7 @@ GenTree* CallArgs::MakeTmpArgNode(Compiler* comp, CallArg* arg)
 //
 //   3. Early: <any node>, Late: nullptr
 //        Arguments that are passed on stack and that do not need an explicit
-//        assignment in the early node list do not require any late node.
+//        temp store in the early node list do not require any late node.
 //
 void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call)
 {
@@ -1680,9 +1704,8 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call)
             }
             else
             {
-                // Create a temp assignment for the argument
-                // Put the temp in the gtCallLateArgs list
-                CLANG_FORMAT_COMMENT_ANCHOR;
+                // Create a temp store for the argument
+                // Put the temp in the late arg list
 
 #ifdef DEBUG
                 if (comp->verbose)
@@ -1697,36 +1720,6 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call)
 #endif
 
                 unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect"));
-                if (argx->gtOper == GT_MKREFANY)
-                {
-                    // For GT_MKREFANY, typically the actual struct copying does
-                    // not have any side-effects and can be delayed. So instead
-                    // of using a temp for the whole struct, we can just use a temp
-                    // for operand that has a side-effect.
-                    GenTree* operand;
-                    if ((argx->AsOp()->gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
-                    {
-                        operand = argx->AsOp()->gtOp1;
-
-                        // In the early argument evaluation, place an assignment to the temp
-                        // from the source operand of the mkrefany
-                        setupArg = comp->gtNewTempStore(tmpVarNum, operand);
-
-                        // Replace the operand for the mkrefany with the new temp.
-                        argx->AsOp()->gtOp1 = comp->gtNewLclvNode(tmpVarNum, operand->TypeGet());
-                    }
-                    else if ((argx->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
-                    {
-                        operand = argx->AsOp()->gtOp2;
-
-                        // In the early argument evaluation, place an assignment to the temp
-                        // from the source operand of the mkrefany
-                        setupArg = comp->gtNewTempStore(tmpVarNum, operand);
-
-                        // Replace the operand for the mkrefany with the new temp.
-                        argx->AsOp()->gtOp2 = comp->gtNewLclvNode(tmpVarNum, operand->TypeGet());
-                    }
-                }
 
                 if (setupArg != nullptr)
                 {
@@ -1873,20 +1866,20 @@ void CallArgs::SetNeedsTemp(CallArg* arg)
 }
 
 //------------------------------------------------------------------------------
-// fgMakeTemp: Make a temp variable with a right-hand side expression as the assignment.
+// fgMakeTemp: Make a temp variable and store 'value' into it.
 //
 // Arguments:
-//    rhs - The right-hand side expression.
+//    value - The expression to store to a temp.
 //
 // Return Value:
-//    'TempInfo' data that contains the GT_STORE_LCL_VAR and GT_LCL_VAR nodes for store
-//    and variable load respectively.
+//    'TempInfo' data that contains the GT_STORE_LCL_VAR and GT_LCL_VAR nodes for
+//    store and variable load respectively.
 //
-TempInfo Compiler::fgMakeTemp(GenTree* rhs)
+TempInfo Compiler::fgMakeTemp(GenTree* value)
 {
     unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgMakeTemp is creating a new local variable"));
-    GenTree* store  = gtNewTempStore(lclNum, rhs);
-    GenTree* load   = gtNewLclvNode(lclNum, genActualType(rhs));
+    GenTree* store  = gtNewTempStore(lclNum, value);
+    GenTree* load   = gtNewLclvNode(lclNum, genActualType(value));
 
     TempInfo tempInfo{};
     tempInfo.store = store;
@@ -2015,7 +2008,6 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
     // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
     // in the implementation of fast tail call.
     // *********** END NOTE *********
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_ARM)
     // A non-standard calling convention using wrapper delegate invoke is used on ARM, only, for wrapper
@@ -2048,17 +2040,21 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
         PushBack(comp, NewCallArg::Primitive(newArg).WellKnown(WellKnownArg::WrapperDelegateCell));
     }
 #endif // defined(TARGET_ARM)
-#ifndef TARGET_X86
+
+    bool addStubCellArg = true;
+
+#ifdef TARGET_X86
     // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
     // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
     // convention for x86/SSE.
 
+    addStubCellArg = call->gtCallType != CT_INDIRECT && comp->IsTargetAbi(CORINFO_NATIVEAOT_ABI);
+#endif
+
     // We are allowed to have a ret buffer argument combined
     // with any of the remaining non-standard arguments
-    //
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
-    if (call->IsVirtualStub())
+    if (call->IsVirtualStub() && addStubCellArg)
     {
         if (!call->IsTailCallViaJitHelper())
         {
@@ -2075,9 +2071,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
             // add as a non-standard arg.
         }
     }
-    else
-#endif // !TARGET_X86
-        if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
+    else if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
     {
         assert(!call->IsUnmanaged());
 
@@ -2136,8 +2130,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
     unsigned numArgs = CountArgs();
 
 #ifdef TARGET_X86
-// Compute the maximum number of arguments that can be passed in registers.
-// For X86 we handle the varargs and unmanaged calling conventions
+    // Compute the maximum number of arguments that can be passed in registers.
+    // For X86 we handle the varargs and unmanaged calling conventions
 
 #ifndef UNIX_X86_ABI
     if (call->gtFlags & GTF_CALL_POP_ARGS)
@@ -2185,7 +2179,6 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
 #endif // TARGET_X86
 
     /* Morph the user arguments */
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_ARM)
 
@@ -2540,7 +2533,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
 #elif defined(TARGET_X86)
             || (isStructArg && comp->isTrivialPointerSizedStruct(argSigClass))
 #endif
-                )
+        )
         {
 #ifdef TARGET_ARM
             if (passUsingFloatRegs)
@@ -2659,7 +2652,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
                         {
                             assert(size == 1);
                             size            = 2;
-                            nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum);
+                            nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum, call->GetUnmanagedCallConv());
                         }
                     }
                 }
@@ -2674,7 +2667,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
                 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
                 if (!passUsingFloatRegs && isRegArg && (size > 1))
                 {
-                    nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum + 1);
+                    nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum + 1, call->GetUnmanagedCallConv());
                 }
 
                 // Did we run out of registers when we had a 16-byte struct (size===2) ?
@@ -2756,7 +2749,6 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
         }
 
         // Now we know if the argument goes in registers or not and how big it is.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef TARGET_ARM
         // If we ever allocate a floating point argument to the stack, then all
@@ -2813,7 +2805,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
                 {
                     if (structDesc.IsIntegralSlot(i))
                     {
-                        *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
+                        *nextRegNumPtrs[i] =
+                            genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs, call->GetUnmanagedCallConv());
                         ++structIntRegs;
                     }
                     else if (structDesc.IsSseSlot(i))
@@ -2827,8 +2820,9 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
             else
             {
                 // fill in or update the argInfo table
-                nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
-                                                : genMapIntRegArgNumToRegNum(intArgRegNum);
+                nextRegNum = passUsingFloatRegs
+                                 ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
+                                 : genMapIntRegArgNumToRegNum(intArgRegNum, call->GetUnmanagedCallConv());
             }
 
 #ifdef WINDOWS_AMD64_ABI
@@ -2841,8 +2835,6 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
             arg.AbiInfo.NumRegs = size;
             arg.AbiInfo.SetByteSize(byteSize, argAlignBytes, isStructArg, isFloatHfa);
 #ifdef UNIX_AMD64_ABI
-            arg.AbiInfo.StructIntRegs   = structIntRegs;
-            arg.AbiInfo.StructFloatRegs = structFloatRegs;
 
             if (isStructArg)
             {
@@ -2961,9 +2953,9 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
 #ifdef WINDOWS_AMD64_ABI
                             // Whenever we pass an integer register argument
                             // we skip the corresponding floating point register argument
-                            intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
+                            intArgRegNum = min(intArgRegNum + size, (unsigned)MAX_REG_ARG);
 #endif // WINDOWS_AMD64_ABI
-                            // No supported architecture supports partial structs using float registers.
+       // No supported architecture supports partial structs using float registers.
                             assert(fltArgRegNum <= MAX_FLOAT_REG_ARG);
                         }
                         else
@@ -2972,7 +2964,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
                             intArgRegNum += size;
 
 #ifdef WINDOWS_AMD64_ABI
-                            fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
+                            fltArgRegNum = min(fltArgRegNum + size, (unsigned)MAX_FLOAT_REG_ARG);
 #endif // WINDOWS_AMD64_ABI
                         }
                     }
@@ -3046,7 +3038,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
 unsigned CallArgs::OutgoingArgsStackSize() const
 {
     unsigned aligned = Compiler::GetOutgoingArgByteSize(m_nextStackByteOffset);
-    return max(aligned, MIN_ARG_AREA_FOR_CALL);
+    return max(aligned, (unsigned)MIN_ARG_AREA_FOR_CALL);
 }
 
 //------------------------------------------------------------------------
@@ -3110,9 +3102,9 @@ unsigned CallArgs::CountUserArgs()
 //    arguments, e.g. into registers or onto the stack.
 //
 //    The "non-late arguments", are doing the in-order evaluation of the
-//    arguments that might have side-effects, such as embedded assignments,
-//    calls or possible throws. In these cases, it and earlier arguments must
-//    be evaluated to temps.
+//    arguments that might have side-effects, such as embedded stores, calls
+//    or possible throws. In these cases, it and earlier arguments must be
+//    evaluated to temps.
 //
 //    On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
 //    if we have any nested calls, we need to defer the copying of the argument
@@ -3198,7 +3190,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
         GenTree* argObj         = argx->gtEffectiveVal();
         bool     makeOutArgCopy = false;
 
-        if (isStructArg && !reMorphing && !argObj->OperIs(GT_MKREFANY))
+        if (isStructArg && !reMorphing)
         {
             unsigned originalSize;
             if (argObj->TypeGet() == TYP_STRUCT)
@@ -3269,12 +3261,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                     assert(arg.AbiInfo.GetStackSlotsNumber() == 1);
                     makeOutArgCopy = true;
 #else  // UNIX_AMD64_ABI
-                    // On Unix, structs are always passed by value.
-                    // We only need a copy if we have one of the following:
-                    // - The sizes don't match for a non-lclVar argument.
-                    // - We have a known struct type (e.g. SIMD) that requires multiple registers.
-                    // TODO-Amd64-Unix-Throughput: We don't need to keep the structDesc in the argEntry if it's not
-                    // actually passed in registers.
+       // On Unix, structs are always passed by value.
+       // We only need a copy if we have one of the following:
+       // - The sizes don't match for a non-lclVar argument.
+       // - We have a known struct type (e.g. SIMD) that requires multiple registers.
+       // TODO-Amd64-Unix-Throughput: We don't need to keep the structDesc in the argEntry if it's not
+       // actually passed in registers.
                     if (arg.AbiInfo.IsPassedInRegisters())
                     {
                         if (argObj->OperIs(GT_BLK))
@@ -3359,9 +3351,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         }
 #ifdef TARGET_AMD64
                         else if (!argObj->OperIs(GT_LCL_VAR) || !argObj->TypeIs(TYP_SIMD8)) // Handled by lowering.
-#else  // !TARGET_ARM64
+#else                                                                                       // !TARGET_ARM64
                         else
-#endif // !TARGET_ARM64
+#endif                                                                                      // !TARGET_ARM64
                         {
                             // TODO-CQ: perform this transformation in lowering instead of here and
                             // avoid marking enregisterable structs DNER.
@@ -3396,40 +3388,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
             }
         }
 
-        if (argx->gtOper == GT_MKREFANY)
-        {
-            // 'Lower' the MKREFANY tree and insert it.
-            noway_assert(!reMorphing);
-
-#ifdef TARGET_X86
-            // Build the mkrefany as a GT_FIELD_LIST
-            GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList();
-            fieldList->AddField(this, argx->AsOp()->gtGetOp1(), OFFSETOF__CORINFO_TypedReference__dataPtr, TYP_BYREF);
-            fieldList->AddField(this, argx->AsOp()->gtGetOp2(), OFFSETOF__CORINFO_TypedReference__type, TYP_I_IMPL);
-            arg.SetEarlyNode(fieldList);
-#else  // !TARGET_X86
-
-            // Get a new temp
-            // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
-            unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
-            lvaSetStruct(tmp, impGetRefAnyClass(), false);
-            lvaSetVarAddrExposed(tmp DEBUGARG(AddressExposedReason::TOO_CONSERVATIVE));
-
-            // Build the mkrefany as a comma node: (tmp.ptr=argx),(tmp.type=handle)
-            GenTree* storePtrSlot =
-                gtNewStoreLclFldNode(tmp, TYP_BYREF, OFFSETOF__CORINFO_TypedReference__dataPtr, argx->AsOp()->gtOp1);
-            GenTree* storeTypeSlot =
-                gtNewStoreLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__type, argx->AsOp()->gtOp2);
-            GenTree* store = gtNewOperNode(GT_COMMA, TYP_VOID, storePtrSlot, storeTypeSlot);
-
-            // Change the expression to "(tmp=val)"
-            arg.SetEarlyNode(store);
-            call->gtArgs.SetTemp(&arg, tmp);
-            flagsSummary |= GTF_ASG;
-            hasMultiregStructArgs |= ((arg.AbiInfo.ArgType == TYP_STRUCT) && !arg.AbiInfo.PassedByRef);
-#endif // !TARGET_X86
-        }
-
 #if FEATURE_MULTIREG_ARGS
         if (!isStructArg)
         {
@@ -3946,7 +3904,6 @@ GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl)
 void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)
 {
     GenTree* argx = arg->GetEarlyNode();
-    noway_assert(!argx->OperIs(GT_MKREFANY));
 
 #if FEATURE_IMPLICIT_BYREFS
     // If we're optimizing, see if we can avoid making a copy.
@@ -4027,18 +3984,18 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)
     if (!opts.MinOpts())
     {
         found = ForEachHbvBitSet(*fgAvailableOutgoingArgTemps, [&](indexType lclNum) {
-                    LclVarDsc*   varDsc = lvaGetDesc((unsigned)lclNum);
-                    ClassLayout* layout = varDsc->GetLayout();
-                    if (!layout->IsBlockLayout() && (layout->GetClassHandle() == copyBlkClass))
-                    {
-                        tmp = (unsigned)lclNum;
-                        JITDUMP("reusing outgoing struct arg V%02u\n", tmp);
-                        fgAvailableOutgoingArgTemps->clearBit(lclNum);
-                        return HbvWalk::Abort;
-                    }
+            LclVarDsc*   varDsc = lvaGetDesc((unsigned)lclNum);
+            ClassLayout* layout = varDsc->GetLayout();
+            if (!layout->IsBlockLayout() && (layout->GetClassHandle() == copyBlkClass))
+            {
+                tmp = (unsigned)lclNum;
+                JITDUMP("reusing outgoing struct arg V%02u\n", tmp);
+                fgAvailableOutgoingArgTemps->clearBit(lclNum);
+                return HbvWalk::Abort;
+            }
 
-                    return HbvWalk::Continue;
-                }) == HbvWalk::Abort;
+            return HbvWalk::Continue;
+        }) == HbvWalk::Abort;
     }
 
     // Create the CopyBlk tree and insert it.
@@ -4081,7 +4038,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)
     GenTree* argNode = call->gtArgs.MakeTmpArgNode(this, arg);
 
     // Change the expression to "(tmp=val),tmp"
-    argNode      = gtNewOperNode(GT_COMMA, argNode->TypeGet(), copyBlk, argNode);
+    argNode = gtNewOperNode(GT_COMMA, argNode->TypeGet(), copyBlk, argNode);
 
 #endif // !FEATURE_FIXED_OUT_ARGS
 
@@ -4261,8 +4218,8 @@ void Compiler::fgMoveOpsLeft(GenTree* tree)
 //
 // We expand the GT_INDEX_ADDR node into a larger tree that evaluates the array
 // base and index. The simplest expansion is a GT_COMMA with a GT_BOUNDS_CHECK.
-// For complex array or index expressions one or more GT_COMMA assignments
-// are inserted so that we only evaluate the array or index expressions once.
+// For complex array or index expressions one or more GT_COMMA stores are inserted
+// so that we only evaluate the array or index expressions once.
 //
 // The fully expanded tree is then morphed.  This causes gtFoldExpr to
 // perform local constant prop and reorder the constants in the tree and
@@ -4348,7 +4305,7 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
         GenTree* arrRef2 = nullptr; // The second copy will be used in array address expression
         GenTree* index2  = nullptr;
 
-        // If the arrRef or index expressions involves an assignment, a call, or reads from global memory,
+        // If the arrRef or index expressions involves a store, a call, or reads from global memory,
         // then we *must* allocate a temporary in which to "localize" those values, to ensure that the
         // same values are used in the bounds check and the actual dereference.
         // Also we allocate the temporary when the expression is sufficiently complex/expensive. We special
@@ -4510,7 +4467,7 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
 
     GenTree* tree = addr;
 
-    // Prepend the bounds check and the assignment trees that were created (if any).
+    // Prepend the bounds check and the store trees that were created (if any).
     if (boundsCheck != nullptr)
     {
         // This is changing a value dependency (INDEX_ADDR node) into a flow
@@ -4584,7 +4541,7 @@ GenTree* Compiler::fgMorphLeafLocal(GenTreeLclVarCommon* lclNode)
 #if FEATURE_IMPLICIT_BYREFS
         || varDsc->lvIsLastUseCopyOmissionCandidate
 #endif
-        )
+    )
     {
         lclNode->gtFlags |= GTF_GLOB_REF;
     }
@@ -4656,9 +4613,9 @@ GenTree* Compiler::fgMorphExpandStackArgForVarArgs(GenTreeLclVarCommon* lclNode)
     GenTree* argNode;
     if (lclNode->OperIsLocalStore())
     {
-        GenTree* data = lclNode->Data();
-        argNode       = lclNode->TypeIs(TYP_STRUCT) ? gtNewStoreBlkNode(lclNode->GetLayout(this), argAddr, data)
-                                              : gtNewStoreIndNode(lclNode->TypeGet(), argAddr, data)->AsIndir();
+        GenTree* value = lclNode->Data();
+        argNode        = lclNode->TypeIs(TYP_STRUCT) ? gtNewStoreBlkNode(lclNode->GetLayout(this), argAddr, value)
+                                                     : gtNewStoreIndNode(lclNode->TypeGet(), argAddr, value)->AsIndir();
     }
     else if (lclNode->OperIsLocalRead())
     {
@@ -4686,17 +4643,15 @@ GenTree* Compiler::fgMorphExpandStackArgForVarArgs(GenTreeLclVarCommon* lclNode)
 //
 GenTree* Compiler::fgMorphExpandImplicitByRefArg(GenTreeLclVarCommon* lclNode)
 {
-    if (!fgGlobalMorph)
-    {
-        return nullptr;
-    }
-
     unsigned   lclNum         = lclNode->GetLclNum();
     LclVarDsc* varDsc         = lvaGetDesc(lclNum);
     unsigned   fieldOffset    = 0;
     unsigned   newLclNum      = BAD_VAR_NUM;
     bool       isStillLastUse = false;
 
+    assert(lvaIsImplicitByRefLocal(lclNum) ||
+           (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl)));
+
     if (lvaIsImplicitByRefLocal(lclNum))
     {
         // The SIMD transformation to coalesce contiguous references to SIMD vector fields will re-invoke
@@ -4746,16 +4701,12 @@ GenTree* Compiler::fgMorphExpandImplicitByRefArg(GenTreeLclVarCommon* lclNode)
             }
         }
     }
-    else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl))
+    else
     {
         // This was a field reference to an implicit-by-reference struct parameter that was dependently promoted.
         newLclNum   = varDsc->lvParentLcl;
         fieldOffset = varDsc->lvFldOffset;
     }
-    else
-    {
-        return nullptr;
-    }
 
     // Add a level of indirection to this node. The "base" will be a local node referring to "newLclNum".
     // We will also add an offset, and, if the original "lclNode" represents a location, a dereference.
@@ -4817,7 +4768,16 @@ GenTree* Compiler::fgMorphExpandLocal(GenTreeLclVarCommon* lclNode)
 #ifdef TARGET_X86
     expandedTree = fgMorphExpandStackArgForVarArgs(lclNode);
 #else
-    expandedTree = fgMorphExpandImplicitByRefArg(lclNode);
+#if FEATURE_IMPLICIT_BYREFS
+    if (fgGlobalMorph)
+    {
+        LclVarDsc* dsc = lvaGetDesc(lclNode);
+        if (dsc->lvIsImplicitByRef || (dsc->lvIsStructField && lvaIsImplicitByRefLocal(dsc->lvParentLcl)))
+        {
+            expandedTree = fgMorphExpandImplicitByRefArg(lclNode);
+        }
+    }
+#endif
 #endif
 
     if (expandedTree != nullptr)
@@ -4825,8 +4785,8 @@ GenTree* Compiler::fgMorphExpandLocal(GenTreeLclVarCommon* lclNode)
         return expandedTree;
     }
 
-    // Small-typed arguments and aliased locals are normalized on load. Other small-typed locals are
-    // normalized on store. If it is an assignment to one of the latter, insert the cast on source.
+    // Small-typed arguments and aliased locals are normalized on load. Other small-typed
+    // locals are normalized on store. If it is the latter case, insert the cast on source.
     if (fgGlobalMorph && lclNode->OperIs(GT_STORE_LCL_VAR) && genActualTypeIsInt(lclNode))
     {
         LclVarDsc* varDsc = lvaGetDesc(lclNode);
@@ -5382,7 +5342,6 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason)
 
     // To reach here means that the return types of the caller and callee are tail call compatible.
     // In the case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (callee->IsTailPrefixedCall())
@@ -5525,7 +5484,6 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason)
     // We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg
     // method. This is due to the ABI differences for native vararg methods for these platforms. There is
     // work required to shuffle arguments to the correct locations.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
     if (TargetOS::IsWindows && TargetArchitecture::IsArmArch && (info.compIsVarArgs || callee->IsVarargs()))
     {
@@ -6103,19 +6061,11 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call)
 
     // If this block has a flow successor, make suitable updates.
     //
-    BasicBlock* nextBlock = compCurBB->GetUniqueSucc();
-
-    if (nextBlock == nullptr)
-    {
-        // No unique successor. compCurBB should be a return.
-        //
-        assert(compCurBB->KindIs(BBJ_RETURN));
-    }
-    else
+    if (compCurBB->KindIs(BBJ_ALWAYS))
     {
-        // Flow no longer reaches nextBlock from here.
+        // Flow no longer reaches the target from here.
         //
-        fgRemoveRefPred(nextBlock, compCurBB);
+        fgRemoveRefPred(compCurBB->GetTargetEdge());
 
         // Adjust profile weights of the successor blocks.
         //
@@ -6125,7 +6075,8 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call)
         BasicBlock* curBlock = compCurBB;
         if (curBlock->hasProfileWeight())
         {
-            weight_t weightLoss = curBlock->bbWeight;
+            weight_t    weightLoss = curBlock->bbWeight;
+            BasicBlock* nextBlock  = curBlock->GetTarget();
 
             while (nextBlock->hasProfileWeight())
             {
@@ -6154,15 +6105,22 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call)
                             nextBlock->bbNum, nextWeight, compCurBB->bbNum, weightLoss);
                 }
 
-                curBlock  = nextBlock;
-                nextBlock = curBlock->GetUniqueSucc();
-                if (nextBlock == nullptr)
+                if (!nextBlock->KindIs(BBJ_ALWAYS))
                 {
                     break;
                 }
+
+                curBlock  = nextBlock;
+                nextBlock = curBlock->GetTarget();
             }
         }
     }
+    else
+    {
+        // No unique successor. compCurBB should be a return.
+        //
+        assert(compCurBB->KindIs(BBJ_RETURN));
+    }
 
 #if !FEATURE_TAILCALL_OPT_SHARED_RETURN
     // We enable shared-ret tail call optimization for recursive calls even if
@@ -6173,8 +6131,7 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call)
         // Many tailcalls will have call and ret in the same block, and thus be
         // BBJ_RETURN, but if the call falls through to a ret, and we are doing a
         // tailcall, change it here.
-        // (compCurBB may have a jump target, so use SetKind() to avoid nulling it)
-        compCurBB->SetKind(BBJ_RETURN);
+        compCurBB->SetKindAndTargetEdge(BBJ_RETURN);
     }
 
     GenTree* stmtExpr = fgMorphStmt->GetRootNode();
@@ -6326,7 +6283,7 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call)
         {
             // We call CORINFO_HELP_TAILCALL which does not return, so we will
             // not need epilogue.
-            compCurBB->SetKindAndTarget(BBJ_THROW);
+            compCurBB->SetKindAndTargetEdge(BBJ_THROW);
         }
 
         if (isRootReplaced)
@@ -6385,7 +6342,10 @@ void Compiler::fgValidateIRForTailCall(GenTreeCall* call)
         };
 
         TailCallIRValidatorVisitor(Compiler* comp, GenTreeCall* tailcall)
-            : GenTreeVisitor(comp), m_tailcall(tailcall), m_lclNum(BAD_VAR_NUM), m_active(false)
+            : GenTreeVisitor(comp)
+            , m_tailcall(tailcall)
+            , m_lclNum(BAD_VAR_NUM)
+            , m_active(false)
         {
         }
 
@@ -7009,7 +6969,8 @@ GenTree* Compiler::getVirtMethodPointerTree(GenTree*                thisPtr,
 }
 
 //------------------------------------------------------------------------
-// getTokenHandleTree: get a handle tree for a token
+// getTokenHandleTree: get a handle tree for a token. This method should never
+//    be called for tokens imported from inlinees.
 //
 // Arguments:
 //    pResolvedToken - token to get a handle for
@@ -7021,7 +6982,14 @@ GenTree* Compiler::getVirtMethodPointerTree(GenTree*                thisPtr,
 GenTree* Compiler::getTokenHandleTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, bool parent)
 {
     CORINFO_GENERICHANDLE_RESULT embedInfo;
-    info.compCompHnd->embedGenericHandle(pResolvedToken, parent, &embedInfo);
+
+    // NOTE: inlining is done at this point, so we don't know which method contained this token.
+    // It's fine because currently this is never used for something that belongs to an inlinee.
+    // Namely, we currently use it for:
+    //   1) Methods with EH are never inlined
+    //   2) Methods with explicit tail calls are never inlined
+    //
+    info.compCompHnd->embedGenericHandle(pResolvedToken, parent, info.compMethodHnd, &embedInfo);
 
     GenTree* result = getLookupTree(pResolvedToken, &embedInfo.lookup, gtTokenToIconFlags(pResolvedToken->token),
                                     embedInfo.compileTimeHandle);
@@ -7399,9 +7367,9 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa
     // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
     if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
     {
-        GenTree*   arg0Assignment     = gtNewStoreLclVarNode(lvaArg0Var, gtNewLclVarNode(info.compThisArg));
-        Statement* arg0AssignmentStmt = gtNewStmt(arg0Assignment, callDI);
-        fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
+        GenTree*   arg0Store     = gtNewStoreLclVarNode(lvaArg0Var, gtNewLclVarNode(info.compThisArg));
+        Statement* arg0StoreStmt = gtNewStmt(arg0Store, callDI);
+        fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0StoreStmt);
     }
 
     // If compInitMem is set, we may need to zero-initialize some locals. Normally it's done in the prolog
@@ -7435,7 +7403,6 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa
                     // Local copy for implicit byref promotion that was undone. Do
                     // not introduce new references to it, all uses have been
                     // morphed to access the parameter.
-                    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
                     LclVarDsc* param = lvaGetDesc(firstField->lvParentLcl);
@@ -7475,7 +7442,8 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa
     {
         // Todo: this may not look like a viable loop header.
         // Might need the moral equivalent of a scratch BB.
-        block->SetKindAndTarget(BBJ_ALWAYS, fgEntryBB);
+        FlowEdge* const newEdge = fgAddRefPred(fgEntryBB, block);
+        block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
     }
     else
     {
@@ -7490,11 +7458,11 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa
         // block removal on it.
         //
         fgFirstBB->SetFlags(BBF_DONT_REMOVE);
-        block->SetKindAndTarget(BBJ_ALWAYS, fgFirstBB->Next());
+        FlowEdge* const newEdge = fgAddRefPred(fgFirstBB->Next(), block);
+        block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
     }
 
     // Finish hooking things up.
-    fgAddRefPred(block->GetTarget(), block);
     block->RemoveFlags(BBF_HAS_JMP);
 }
 
@@ -7502,17 +7470,17 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa
 // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
 //
 // Arguments:
-//    arg  -  argument to assign
-//    late  -  whether to use early or late arg
-//    lclParamNum - the lcl num of the parameter
-//    block  --- basic block the call is in
-//    callILOffset  -  IL offset of the call
-//    tmpAssignmentInsertionPoint  -  tree before which temp assignment should be inserted (if necessary)
-//    paramAssignmentInsertionPoint  -  tree before which parameter assignment should be inserted
+//    arg                           - argument to assign
+//    callArg                       - the corresponding call argument
+//    lclParamNum                   - the lcl num of the parameter
+//    block                         - basic block the call is in
+//    callILOffset                  - IL offset of the call
+//    tmpAssignmentInsertionPoint   - tree before which temp assignment should be inserted (if necessary)
+//    paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
 //
 // Return Value:
 //    parameter assignment statement if one was inserted; nullptr otherwise.
-
+//
 Statement* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree*         arg,
                                                            CallArg*         callArg,
                                                            unsigned         lclParamNum,
@@ -7561,9 +7529,7 @@ Statement* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree*         arg,
     {
         if (argInTemp == nullptr)
         {
-            // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
-            // TODO: we can avoid a temp assignment if we can prove that the argument tree
-            // doesn't involve any caller parameters.
+            // The argument is not assigned to a temp. We need to create a new temp and insert a store.
             unsigned tmpNum         = lvaGrabTemp(true DEBUGARG("arg temp"));
             lvaTable[tmpNum].lvType = arg->gtType;
             GenTree*   tempSrc      = arg;
@@ -7635,7 +7601,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call)
 
             compCurBB->SetFlags(BBF_HAS_CALL); // This block has a call
 
-            JITDUMP("\nInserting assignment of a multi-reg call result to a temp:\n");
+            JITDUMP("\nInserting store of a multi-reg call result to a temp:\n");
             DISPSTMT(storeStmt);
             INDEBUG(result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
 
@@ -7701,7 +7667,6 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call)
     // In the event the call indicates the block isn't a GC safe point
     // and the call is unmanaged with a GC transition suppression request
     // then insert a GC poll.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
     if (IsGcSafePoint(call))
     {
@@ -7964,7 +7929,7 @@ GenTree* Compiler::fgExpandVirtualVtableCallTarget(GenTreeCall* call)
             // [tmp + vtabOffsOfIndirection]
             GenTree* tmpTree1 = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtNewLclvNode(varNum1, TYP_I_IMPL),
                                               gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
-            tmpTree1 = gtNewIndir(TYP_I_IMPL, tmpTree1, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
+            tmpTree1          = gtNewIndir(TYP_I_IMPL, tmpTree1, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
 
             // var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection
             GenTree* tmpTree2 =
@@ -7980,7 +7945,7 @@ GenTree* Compiler::fgExpandVirtualVtableCallTarget(GenTreeCall* call)
 
             result = gtNewOperNode(GT_ADD, TYP_I_IMPL, result, gtNewLclvNode(varNum2, TYP_I_IMPL)); // [var2] + var2
 
-            // Now stitch together the two assignment and the calculation of result into a single tree
+            // Now stitch together the two stores and the calculation of result into a single tree
             GenTree* commaTree = gtNewOperNode(GT_COMMA, TYP_I_IMPL, storeVar2, result);
             result             = gtNewOperNode(GT_COMMA, TYP_I_IMPL, storeVar1, commaTree);
         }
@@ -8382,7 +8347,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
 #if FEATURE_IMPLICIT_BYREFS
                 || lclDsc->lvIsLastUseCopyOmissionCandidate
 #endif
-                )
+            )
             {
                 tree->AddAllEffectsFlags(GTF_GLOB_REF);
             }
@@ -8627,8 +8592,8 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
 // Note for TARGET_ARMARCH we don't have  a remainder instruction, so we don't do this optimization
 //
 #else  // TARGET_XARCH
-            // If this is an unsigned long mod with a constant divisor,
-            // then don't morph to a helper call - it can be done faster inline using idiv.
+       // If this is an unsigned long mod with a constant divisor,
+       // then don't morph to a helper call - it can be done faster inline using idiv.
 
             noway_assert(op2);
             if ((typ == TYP_LONG) && opts.OptimizationEnabled())
@@ -8904,8 +8869,8 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
     if (op1 != nullptr)
     {
         // If we are entering the "then" part of a Qmark-Colon we must
-        // save the state of the current copy assignment table
-        // so that we can restore this state when entering the "else" part
+        // save the state of the current assertions table so that we can
+        // restore this state when entering the "else" part
         if (isQmarkColon)
         {
             noway_assert(optLocalAssertionProp);
@@ -8946,8 +8911,8 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
         tree->AsOp()->gtOp1 = op1 = fgMorphTree(op1, mac);
 
         // If we are exiting the "then" part of a Qmark-Colon we must
-        // save the state of the current copy assignment table
-        // so that we can merge this state with the "else" part exit
+        // save the state of the current assertions table so that we
+        // can merge this state with the "else" part exit
         if (isQmarkColon)
         {
             noway_assert(optLocalAssertionProp);
@@ -8962,7 +8927,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
     if (op2 != nullptr)
     {
         // If we are entering the "else" part of a Qmark-Colon we must
-        // reset the state of the current copy assignment table
+        // reset the state of the current assertions table
         if (isQmarkColon)
         {
             noway_assert(optLocalAssertionProp);
@@ -8972,8 +8937,8 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
         tree->AsOp()->gtOp2 = op2 = fgMorphTree(op2);
 
         // If we are exiting the "else" part of a Qmark-Colon we must
-        // merge the state of the current copy assignment table with
-        // that of the exit of the "then" part.
+        // merge the state of the current assertions table with that
+        // of the exit of the "then" part.
         //
         if (isQmarkColon)
         {
@@ -9227,11 +9192,11 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
 
             // TODO #4104: there are a lot of other places where
             // this condition is not checked before transformations.
-            if (fgGlobalMorph)
+            noway_assert(op2);
+            if (fgGlobalMorph && !op2->TypeIs(TYP_BYREF))
             {
                 /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
 
-                noway_assert(op2);
                 if (op2->IsCnsIntOrI() && !op2->IsIconHandle())
                 {
                     // Negate the constant and change the node to be "+",
@@ -9249,7 +9214,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
                 noway_assert(op1);
                 if (op1->IsCnsIntOrI())
                 {
-                    noway_assert(varTypeIsIntOrI(tree));
+                    noway_assert(varTypeIsIntegralOrI(tree));
 
                     // The type of the new GT_NEG node cannot just be op2->TypeGet().
                     // Otherwise we may sign-extend incorrectly in cases where the GT_NEG
@@ -9665,22 +9630,8 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
                 }
                 else
                 {
-                    GenTree* op1SideEffects = nullptr;
-                    gtExtractSideEffList(op1, &op1SideEffects, GTF_ALL_EFFECT);
-                    if (op1SideEffects != nullptr)
-                    {
-                        DEBUG_DESTROY_NODE(tree);
-                        // Keep side-effects of op1
-                        tree = gtNewOperNode(GT_COMMA, TYP_INT, op1SideEffects, gtNewIconNode(0));
-                        JITDUMP("false with side effects:\n")
-                        DISPTREE(tree);
-                    }
-                    else
-                    {
-                        JITDUMP("false\n");
-                        DEBUG_DESTROY_NODE(tree, op1);
-                        tree = gtNewIconNode(0);
-                    }
+                    JITDUMP("false\n");
+                    tree = gtWrapWithSideEffects(gtNewIconNode(0), op1, GTF_ALL_EFFECT);
                 }
                 INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
                 return tree;
@@ -9831,11 +9782,11 @@ GenTree* Compiler::fgMorphFinalizeIndir(GenTreeIndir* indir)
             addr->ChangeType(indir->TypeGet());
             if (indir->OperIs(GT_STOREIND))
             {
-                GenTree* data = indir->Data();
+                GenTree* value = indir->Data();
                 addr->SetOper(GT_STORE_LCL_FLD);
-                addr->AsLclFld()->Data() = data;
+                addr->AsLclFld()->Data() = value;
                 addr->gtFlags |= (GTF_ASG | GTF_VAR_DEF);
-                addr->AddAllEffectsFlags(data);
+                addr->AddAllEffectsFlags(value);
             }
             else
             {
@@ -9961,7 +9912,7 @@ GenTree* Compiler::fgOptimizeCast(GenTreeCast* cast)
 }
 
 //------------------------------------------------------------------------
-// fgOptimizeCastOnAssignment: Optimizes the supplied store tree with a GT_CAST node.
+// fgOptimizeCastOnStore: Optimizes the supplied store tree with a GT_CAST node.
 //
 // Arguments:
 //    tree - the store to optimize
@@ -11628,8 +11579,8 @@ GenTree* Compiler::fgMorphRetInd(GenTreeUnOp* ret)
 #if defined(TARGET_64BIT)
         bool canFold = (indSize == lclVarSize);
 #else // !TARGET_64BIT
-        // TODO: improve 32 bit targets handling for LONG returns if necessary, nowadays we do not support `BITCAST
-        // long<->double` there.
+      // TODO: improve 32 bit targets handling for LONG returns if necessary, nowadays we do not support `BITCAST
+      // long<->double` there.
         bool canFold = (indSize == lclVarSize) && (lclVarSize <= REGSIZE_BYTES);
 #endif
 
@@ -12351,9 +12302,9 @@ GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree)
 
     if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
     {
-        // We can't do anything if the tree has assignments, calls, or volatile
-        // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
-        // thrown by the original tree will be thrown by the transformed tree as well.
+        // We can't do anything if the tree has stores, calls, or volatile reads. Note that we allow
+        // GTF_EXCEPT side effect since any exceptions thrown by the original tree will be thrown by
+        // the transformed tree as well.
         return nullptr;
     }
 
@@ -12470,7 +12421,6 @@ GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree)
                             // (x >>> y) | (x << (-y + N))
                             // where N == bitsize(x), M is const, and
                             // M & (N - 1) == N - 1
-                            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifndef TARGET_64BIT
                             if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
@@ -12654,12 +12604,12 @@ GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac)
 
     bool optAssertionPropDone = false;
 
-/*-------------------------------------------------------------------------
- * fgMorphTree() can potentially replace a tree with another, and the
- * caller has to store the return value correctly.
- * Turn this on to always make copy of "tree" here to shake out
- * hidden/unupdated references.
- */
+    /*-------------------------------------------------------------------------
+     * fgMorphTree() can potentially replace a tree with another, and the
+     * caller has to store the return value correctly.
+     * Turn this on to always make copy of "tree" here to shake out
+     * hidden/unupdated references.
+     */
 
 #ifdef DEBUG
 
@@ -12819,10 +12769,6 @@ GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac)
             gtUpdateNodeSideEffects(tree);
             break;
 
-        case GT_STORE_DYN_BLK:
-            tree = fgMorphStoreDynBlock(tree->AsStoreDynBlk());
-            break;
-
         case GT_SELECT:
             tree->AsConditional()->gtCond = fgMorphTree(tree->AsConditional()->gtCond);
             tree->AsConditional()->gtOp1  = fgMorphTree(tree->AsConditional()->gtOp1);
@@ -12884,7 +12830,7 @@ void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTree*
                              ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
                 if (verbose)
                 {
-                    printf("\nThe assignment ");
+                    printf("\nThe store ");
                     printTreeID(tree);
                     printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
                     optPrintAssertion(curAssertion, index);
@@ -13026,7 +12972,7 @@ void Compiler::fgAssertionGen(GenTree* tree)
         AssertionIndex ifFalseAssertionIndex;
         AssertionIndex ifTrueAssertionIndex;
 
-        if (info.IsNextEdgeAssertion())
+        if (info.AssertionHoldsOnFalseEdge())
         {
             ifFalseAssertionIndex = info.GetAssertionIndex();
             ifTrueAssertionIndex  = optFindComplementary(ifFalseAssertionIndex);
@@ -13226,21 +13172,31 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block)
             //
             BasicBlock* bTaken;
             BasicBlock* bNotTaken;
+            FlowEdge*   edgeTaken;
 
             if (cond->AsIntCon()->gtIconVal != 0)
             {
                 // JTRUE 1 - transform the basic block into a BBJ_ALWAYS
                 bTaken    = block->GetTrueTarget();
                 bNotTaken = block->GetFalseTarget();
-                block->SetKind(BBJ_ALWAYS);
+
+                // Remove 'block' from the predecessor list of 'bNotTaken' */
+                fgRemoveRefPred(block->GetFalseEdge());
+
+                edgeTaken = block->GetTrueEdge();
+                block->SetKindAndTargetEdge(BBJ_ALWAYS, edgeTaken);
             }
             else
             {
                 // JTRUE 0 - transform the basic block into a BBJ_ALWAYS
                 bTaken    = block->GetFalseTarget();
                 bNotTaken = block->GetTrueTarget();
-                block->SetKindAndTarget(BBJ_ALWAYS, bTaken);
-                block->SetFlags(BBF_NONE_QUIRK);
+
+                // Remove 'block' from the predecessor list of 'bNotTaken' */
+                fgRemoveRefPred(block->GetTrueEdge());
+
+                edgeTaken = block->GetFalseEdge();
+                block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetFalseEdge());
             }
 
             if (fgHaveValidEdgeWeights)
@@ -13249,8 +13205,7 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block)
                 // and we have already computed the edge weights, so
                 // we will try to adjust some of the weights
                 //
-                FlowEdge*   edgeTaken = fgGetPredForBlock(bTaken, block);
-                BasicBlock* bUpdated  = nullptr; // non-NULL if we updated the weight of an internal block
+                BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block
 
                 // We examine the taken edge (block -> bTaken)
                 // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
@@ -13296,19 +13251,19 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block)
                     switch (bUpdated->GetKind())
                     {
                         case BBJ_COND:
-                            edge         = fgGetPredForBlock(bUpdated->GetFalseTarget(), bUpdated);
+                            edge         = bUpdated->GetFalseEdge();
                             newMaxWeight = bUpdated->bbWeight;
                             newMinWeight = min(edge->edgeWeightMin(), newMaxWeight);
                             edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->GetFalseTarget());
 
-                            edge         = fgGetPredForBlock(bUpdated->GetTrueTarget(), bUpdated);
+                            edge         = bUpdated->GetTrueEdge();
                             newMaxWeight = bUpdated->bbWeight;
                             newMinWeight = min(edge->edgeWeightMin(), newMaxWeight);
                             edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->GetFalseTarget());
                             break;
 
                         case BBJ_ALWAYS:
-                            edge         = fgGetPredForBlock(bUpdated->GetTarget(), bUpdated);
+                            edge         = bUpdated->GetTargetEdge();
                             newMaxWeight = bUpdated->bbWeight;
                             newMinWeight = min(edge->edgeWeightMin(), newMaxWeight);
                             edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->Next());
@@ -13321,11 +13276,6 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block)
                 }
             }
 
-            /* modify the flow graph */
-
-            /* Remove 'block' from the predecessor list of 'bNotTaken' */
-            fgRemoveRefPred(bNotTaken, block);
-
 #ifdef DEBUG
             if (verbose)
             {
@@ -13391,38 +13341,33 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block)
             // modify the flow graph
 
             // Find the actual jump target
-            size_t       switchVal = (size_t)cond->AsIntCon()->gtIconVal;
-            unsigned     jumpCnt   = block->GetSwitchTargets()->bbsCount;
-            BasicBlock** jumpTab   = block->GetSwitchTargets()->bbsDstTab;
-            bool         foundVal  = false;
+            size_t     switchVal = (size_t)cond->AsIntCon()->gtIconVal;
+            unsigned   jumpCnt   = block->GetSwitchTargets()->bbsCount;
+            FlowEdge** jumpTab   = block->GetSwitchTargets()->bbsDstTab;
+            bool       foundVal  = false;
 
             for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
             {
-                BasicBlock* curJump = *jumpTab;
+                FlowEdge* curEdge = *jumpTab;
 
-                assert(curJump->countOfInEdges() > 0);
+                assert(curEdge->getDestinationBlock()->countOfInEdges() > 0);
 
                 // If val matches switchVal or we are at the last entry and
                 // we never found the switch value then set the new jump dest
 
                 if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
                 {
-                    block->SetKindAndTarget(BBJ_ALWAYS, curJump);
+                    block->SetKindAndTargetEdge(BBJ_ALWAYS, curEdge);
                     foundVal = true;
                 }
                 else
                 {
-                    // Remove 'block' from the predecessor list of 'curJump'
-                    fgRemoveRefPred(curJump, block);
+                    // Remove 'curEdge'
+                    fgRemoveRefPred(curEdge);
                 }
             }
 
             assert(foundVal);
-            if (block->JumpsToNext())
-            {
-                block->SetFlags(BBF_NONE_QUIRK);
-            }
-
 #ifdef DEBUG
             if (verbose)
             {
@@ -13587,7 +13532,8 @@ void Compiler::fgMorphStmtBlockOps(BasicBlock* block, Statement* stmt)
             DoPostOrder = true,
         };
 
-        Visitor(Compiler* comp) : GenTreeVisitor(comp)
+        Visitor(Compiler* comp)
+            : GenTreeVisitor(comp)
         {
         }
 
@@ -13595,11 +13541,7 @@ void Compiler::fgMorphStmtBlockOps(BasicBlock* block, Statement* stmt)
         {
             if ((*use)->OperIsBlkOp())
             {
-                if ((*use)->OperIs(GT_STORE_DYN_BLK))
-                {
-                    *use = m_compiler->fgMorphStoreDynBlock((*use)->AsStoreDynBlk());
-                }
-                else if ((*use)->OperIsInitBlkOp())
+                if ((*use)->OperIsInitBlkOp())
                 {
                     *use = m_compiler->fgMorphInitBlock(*use);
                 }
@@ -13679,7 +13621,6 @@ void Compiler::fgMorphStmts(BasicBlock* block)
                 /* This must be a tailcall that caused a GCPoll to get
                 injected. We haven't actually morphed the call yet
                 but the flag still got set, clear it here...  */
-                CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
                 morphedTree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
@@ -14153,7 +14094,6 @@ void Compiler::fgMergeBlockReturn(BasicBlock* block)
     else
     {
         // We'll jump to the genReturnBB.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !defined(TARGET_X86)
         if (info.compFlags & CORINFO_FLG_SYNCH)
@@ -14163,8 +14103,8 @@ void Compiler::fgMergeBlockReturn(BasicBlock* block)
         else
 #endif // !TARGET_X86
         {
-            block->SetKindAndTarget(BBJ_ALWAYS, genReturnBB);
-            fgAddRefPred(genReturnBB, block);
+            FlowEdge* const newEdge = fgAddRefPred(genReturnBB, block);
+            block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
             fgReturnCount--;
         }
 
@@ -14541,229 +14481,6 @@ GenTreeQmark* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = N
     return topQmark;
 }
 
-//------------------------------------------------------------------------
-// fgExpandQmarkForCastInstOf: expand qmark for cast
-//
-// Arguments:
-//   block - block containing the qmark
-//   stmt  - statement containing the qmark
-//
-// Returns:
-//   true if the expansion introduced a throwing block
-//
-// Notes:
-//
-//  For a castclass helper call,
-//  Importer creates the following tree:
-//      tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
-//
-//  This method splits the qmark expression created by the importer into the
-//  following blocks: (block, asg, cond1, cond2, helper, remainder)
-//  Notice that op1 is the result for both the conditions. So we coalesce these
-//  assignments into a single block instead of two blocks resulting a nested diamond.
-//
-//                       +---------->-----------+
-//                       |          |           |
-//                       ^          ^           v
-//                       |          |           |
-//  block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
-//
-//  We expect to achieve the following codegen:
-//     mov      rsi, rdx                           tmp = op1                  // asgBlock
-//     test     rsi, rsi                           goto skip if tmp == null ? // cond1Block
-//     je       SKIP
-//     mov      rcx, 0x76543210                    cns = op2                  // cond2Block
-//     cmp      qword ptr [rsi], rcx               goto skip if *tmp == op2
-//     je       SKIP
-//     call     CORINFO_HELP_CHKCASTCLASS_SPECIAL  tmp = helper(cns, tmp)     // helperBlock
-//     mov      rsi, rax
-//  SKIP:                                                                     // remainderBlock
-//     tmp has the result.
-//
-bool Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, Statement* stmt)
-{
-#ifdef DEBUG
-    if (verbose)
-    {
-        printf("\nExpanding CastInstOf qmark in " FMT_BB " (before)\n", block->bbNum);
-        fgDispBasicBlocks(block, block, true);
-    }
-#endif // DEBUG
-
-    bool     introducedThrow = false;
-    GenTree* expr            = stmt->GetRootNode();
-
-    GenTree*      dst   = nullptr;
-    GenTreeQmark* qmark = fgGetTopLevelQmark(expr, &dst);
-
-    noway_assert(dst != nullptr);
-    assert(dst->OperIsLocalStore());
-    assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
-
-    // Get cond, true, false exprs for the qmark.
-    GenTree* condExpr  = qmark->gtGetOp1();
-    GenTree* trueExpr  = qmark->gtGetOp2()->AsColon()->ThenNode();
-    GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
-
-    // Get cond, true, false exprs for the nested qmark.
-    GenTree* nestedQmark = falseExpr;
-    GenTree* cond2Expr;
-    GenTree* true2Expr;
-    GenTree* false2Expr;
-
-    unsigned nestedQmarkElseLikelihood = 50;
-    if (nestedQmark->gtOper == GT_QMARK)
-    {
-        cond2Expr                 = nestedQmark->gtGetOp1();
-        true2Expr                 = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
-        false2Expr                = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
-        nestedQmarkElseLikelihood = nestedQmark->AsQmark()->ElseNodeLikelihood();
-    }
-    else
-    {
-        // This is a rare case that arises when we are doing minopts and encounter isinst of null
-        // gtFoldExpr was still is able to optimize away part of the tree (but not all).
-        // That means it does not match our pattern.
-
-        // Rather than write code to handle this case, just fake up some nodes to make it match the common
-        // case.  Synthesize a comparison that is always true, and for the result-on-true, use the
-        // entire subtree we expected to be the nested question op.
-
-        cond2Expr  = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
-        true2Expr  = nestedQmark;
-        false2Expr = gtNewIconNode(0, TYP_I_IMPL);
-    }
-    assert(false2Expr->OperGet() == trueExpr->OperGet());
-
-    // Create the chain of blocks. See method header comment.
-    // The order of blocks after this is the following:
-    //     block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
-    //
-    // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
-    // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
-    // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
-    // remainderBlock will still be GC safe.
-    BasicBlockFlags propagateFlags = block->GetFlagsRaw() & BBF_GC_SAFE_POINT;
-    BasicBlock*     remainderBlock = fgSplitBlockAfterStatement(block, stmt);
-    fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
-
-    BasicBlock* helperBlock = fgNewBBafter(BBJ_ALWAYS, block, true, block->Next());
-    BasicBlock* cond2Block  = fgNewBBafter(BBJ_COND, block, true, remainderBlock);
-    BasicBlock* cond1Block  = fgNewBBafter(BBJ_COND, block, true, remainderBlock);
-    BasicBlock* asgBlock    = fgNewBBafter(BBJ_ALWAYS, block, true, block->Next());
-
-    block->RemoveFlags(BBF_NEEDS_GCPOLL);
-    remainderBlock->SetFlags(propagateFlags);
-    helperBlock->SetFlags(BBF_NONE_QUIRK);
-    asgBlock->SetFlags(BBF_NONE_QUIRK);
-
-    // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
-    // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
-    if (!block->HasFlag(BBF_INTERNAL))
-    {
-        helperBlock->RemoveFlags(BBF_INTERNAL);
-        cond2Block->RemoveFlags(BBF_INTERNAL);
-        cond1Block->RemoveFlags(BBF_INTERNAL);
-        asgBlock->RemoveFlags(BBF_INTERNAL);
-        helperBlock->SetFlags(BBF_IMPORTED);
-        cond2Block->SetFlags(BBF_IMPORTED);
-        cond1Block->SetFlags(BBF_IMPORTED);
-        asgBlock->SetFlags(BBF_IMPORTED);
-    }
-
-    // Chain the flow correctly.
-    assert(block->KindIs(BBJ_ALWAYS));
-    block->SetTarget(asgBlock);
-    fgAddRefPred(asgBlock, block);
-    fgAddRefPred(cond1Block, asgBlock);
-    fgAddRefPred(remainderBlock, helperBlock);
-
-    cond1Block->SetFalseTarget(cond2Block);
-    cond2Block->SetFalseTarget(helperBlock);
-    fgAddRefPred(cond2Block, cond1Block);
-    fgAddRefPred(helperBlock, cond2Block);
-    fgAddRefPred(remainderBlock, cond1Block);
-    fgAddRefPred(remainderBlock, cond2Block);
-
-    // Set the weights; some are guesses.
-    asgBlock->inheritWeight(block);
-    cond1Block->inheritWeight(block);
-
-    // We only have likelihood for the fast path (and fallback), but we don't know
-    // how often we have null in the root QMARK (although, we might be able to guess it too)
-    // so leave 50/50 for now. Thus, we have:
-    //
-    //   [weight 1.0]
-    //   if (obj != null)
-    //   {
-    //       [weight 0.5]
-    //       if (obj.GetType() == typeof(FastType))
-    //       {
-    //           [weight 0.5 * <likelihood of FastType>]
-    //       }
-    //       else
-    //       {
-    //           [weight 0.5 * <100 - likelihood of FastType>]
-    //       }
-    //
-    cond2Block->inheritWeightPercentage(cond1Block, 50);
-    helperBlock->inheritWeightPercentage(cond2Block, nestedQmarkElseLikelihood);
-
-    // Append cond1 as JTRUE to cond1Block
-    GenTree*   jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
-    Statement* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->GetDebugInfo());
-    fgInsertStmtAtEnd(cond1Block, jmpStmt);
-
-    // Append cond2 as JTRUE to cond2Block
-    jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
-    jmpStmt = fgNewStmtFromTree(jmpTree, stmt->GetDebugInfo());
-    fgInsertStmtAtEnd(cond2Block, jmpStmt);
-
-    unsigned dstLclNum = dst->AsLclVarCommon()->GetLclNum();
-
-    // AsgBlock should get tmp = op1.
-    GenTree* trueExprStore =
-        dst->OperIs(GT_STORE_LCL_FLD)
-            ? gtNewStoreLclFldNode(dstLclNum, dst->TypeGet(), dst->AsLclFld()->GetLclOffs(), trueExpr)
-            : gtNewStoreLclVarNode(dstLclNum, trueExpr)->AsLclVarCommon();
-    Statement* trueStmt = fgNewStmtFromTree(trueExprStore, stmt->GetDebugInfo());
-    fgInsertStmtAtEnd(asgBlock, trueStmt);
-
-    // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
-    gtReverseCond(cond2Expr);
-
-    if (true2Expr->OperIs(GT_CALL) && (true2Expr->AsCall()->gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN))
-    {
-        Statement* helperStmt = fgNewStmtFromTree(true2Expr, stmt->GetDebugInfo());
-        fgInsertStmtAtEnd(helperBlock, helperStmt);
-        fgConvertBBToThrowBB(helperBlock);
-        setMethodHasNoReturnCalls();
-        introducedThrow = true;
-    }
-    else
-    {
-        GenTree* helperExprStore =
-            dst->OperIs(GT_STORE_LCL_FLD)
-                ? gtNewStoreLclFldNode(dstLclNum, dst->TypeGet(), dst->AsLclFld()->GetLclOffs(), true2Expr)
-                : gtNewStoreLclVarNode(dstLclNum, true2Expr)->AsLclVarCommon();
-        Statement* helperStmt = fgNewStmtFromTree(helperExprStore, stmt->GetDebugInfo());
-        fgInsertStmtAtEnd(helperBlock, helperStmt);
-    }
-
-    // Finally remove the nested qmark stmt.
-    fgRemoveStmt(block, stmt);
-
-#ifdef DEBUG
-    if (verbose)
-    {
-        printf("\nExpanding CastInstOf qmark in " FMT_BB " (after)\n", block->bbNum);
-        fgDispBasicBlocks(block, remainderBlock, true);
-    }
-#endif // DEBUG
-
-    return introducedThrow;
-}
-
 //------------------------------------------------------------------------
 // fgExpandQmarkStmt: expand a qmark into control flow
 //
@@ -14837,11 +14554,6 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt)
         return false;
     }
 
-    if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
-    {
-        return fgExpandQmarkForCastInstOf(block, stmt);
-    }
-
 #ifdef DEBUG
     if (verbose)
     {
@@ -14872,7 +14584,6 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt)
     // Conservatively propagate BBF_COPY_PROPAGATE flags to all blocks
     BasicBlockFlags propagateFlagsToAll = block->GetFlagsRaw() & BBF_COPY_PROPAGATE;
     BasicBlock*     remainderBlock      = fgSplitBlockAfterStatement(block, stmt);
-    fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
 
     BasicBlock* condBlock = fgNewBBafter(BBJ_ALWAYS, block, true);
     BasicBlock* elseBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
@@ -14896,18 +14607,23 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt)
     assert(condBlock->bbWeight == remainderBlock->bbWeight);
 
     assert(block->KindIs(BBJ_ALWAYS));
-    block->SetTarget(condBlock);
-    condBlock->SetTarget(elseBlock);
-    elseBlock->SetTarget(remainderBlock);
+    fgRedirectTargetEdge(block, condBlock);
+
+    {
+        FlowEdge* const newEdge = fgAddRefPred(elseBlock, condBlock);
+        condBlock->SetTargetEdge(newEdge);
+    }
+
+    {
+        FlowEdge* const newEdge = fgAddRefPred(remainderBlock, elseBlock);
+        elseBlock->SetTargetEdge(newEdge);
+    }
+
     assert(condBlock->JumpsToNext());
     assert(elseBlock->JumpsToNext());
 
-    fgAddRefPred(condBlock, block);
-    fgAddRefPred(elseBlock, condBlock);
-    fgAddRefPred(remainderBlock, elseBlock);
-
-    condBlock->SetFlags(propagateFlagsToAll | BBF_NONE_QUIRK);
-    elseBlock->SetFlags(propagateFlagsToAll | BBF_NONE_QUIRK);
+    condBlock->SetFlags(propagateFlagsToAll);
+    elseBlock->SetFlags(propagateFlagsToAll);
 
     BasicBlock* thenBlock = nullptr;
     if (hasTrueExpr && hasFalseExpr)
@@ -14922,20 +14638,28 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt)
         //
         gtReverseCond(condExpr);
 
-        thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true, remainderBlock);
+        thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
         thenBlock->SetFlags(propagateFlagsToAll);
-        condBlock->SetCond(elseBlock, thenBlock);
         if (!block->HasFlag(BBF_INTERNAL))
         {
             thenBlock->RemoveFlags(BBF_INTERNAL);
             thenBlock->SetFlags(BBF_IMPORTED);
         }
 
-        fgAddRefPred(thenBlock, condBlock);
-        fgAddRefPred(remainderBlock, thenBlock);
+        const unsigned thenLikelihood = qmark->ThenNodeLikelihood();
+        const unsigned elseLikelihood = qmark->ElseNodeLikelihood();
 
-        thenBlock->inheritWeightPercentage(condBlock, qmark->ThenNodeLikelihood());
-        elseBlock->inheritWeightPercentage(condBlock, qmark->ElseNodeLikelihood());
+        FlowEdge* const newEdge = fgAddRefPred(remainderBlock, thenBlock);
+        thenBlock->SetTargetEdge(newEdge);
+
+        assert(condBlock->TargetIs(elseBlock));
+        FlowEdge* const elseEdge = fgAddRefPred(thenBlock, condBlock);
+        FlowEdge* const thenEdge = condBlock->GetTargetEdge();
+        condBlock->SetCond(thenEdge, elseEdge);
+        thenBlock->inheritWeightPercentage(condBlock, thenLikelihood);
+        elseBlock->inheritWeightPercentage(condBlock, elseLikelihood);
+        thenEdge->setLikelihood(thenLikelihood / 100.0);
+        elseEdge->setLikelihood(elseLikelihood / 100.0);
     }
     else if (hasTrueExpr)
     {
@@ -14946,13 +14670,22 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt)
         //              bbj_cond(true)
         //
         gtReverseCond(condExpr);
-        condBlock->SetCond(remainderBlock, elseBlock);
-        fgAddRefPred(remainderBlock, condBlock);
+
+        const unsigned thenLikelihood = qmark->ThenNodeLikelihood();
+        const unsigned elseLikelihood = qmark->ElseNodeLikelihood();
+
+        assert(condBlock->TargetIs(elseBlock));
+        FlowEdge* const thenEdge = fgAddRefPred(remainderBlock, condBlock);
+        FlowEdge* const elseEdge = condBlock->GetTargetEdge();
+        condBlock->SetCond(thenEdge, elseEdge);
+
         // Since we have no false expr, use the one we'd already created.
         thenBlock = elseBlock;
         elseBlock = nullptr;
 
-        thenBlock->inheritWeightPercentage(condBlock, qmark->ThenNodeLikelihood());
+        thenBlock->inheritWeightPercentage(condBlock, thenLikelihood);
+        thenEdge->setLikelihood(thenLikelihood / 100.0);
+        elseEdge->setLikelihood(elseLikelihood / 100.0);
     }
     else if (hasFalseExpr)
     {
@@ -14962,10 +14695,17 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt)
         //              +-->------------+
         //              bbj_cond(true)
         //
-        condBlock->SetCond(remainderBlock, elseBlock);
-        fgAddRefPred(remainderBlock, condBlock);
+        const unsigned thenLikelihood = qmark->ThenNodeLikelihood();
+        const unsigned elseLikelihood = qmark->ElseNodeLikelihood();
+
+        assert(condBlock->TargetIs(elseBlock));
+        FlowEdge* const thenEdge = fgAddRefPred(remainderBlock, condBlock);
+        FlowEdge* const elseEdge = condBlock->GetTargetEdge();
+        condBlock->SetCond(thenEdge, elseEdge);
 
-        elseBlock->inheritWeightPercentage(condBlock, qmark->ElseNodeLikelihood());
+        elseBlock->inheritWeightPercentage(condBlock, elseLikelihood);
+        thenEdge->setLikelihood(thenLikelihood / 100.0);
+        elseEdge->setLikelihood(elseLikelihood / 100.0);
     }
 
     assert(condBlock->KindIs(BBJ_COND));
@@ -15255,7 +14995,7 @@ PhaseStatus Compiler::fgPromoteStructs()
 //
 PhaseStatus Compiler::fgMarkImplicitByRefCopyOmissionCandidates()
 {
-#if FEATURE_IMPLICIT_BYREFS
+#if FEATURE_IMPLICIT_BYREFS && !defined(UNIX_AMD64_ABI)
     if (!fgDidEarlyLiveness)
     {
         return PhaseStatus::MODIFIED_NOTHING;
@@ -15269,7 +15009,8 @@ PhaseStatus Compiler::fgMarkImplicitByRefCopyOmissionCandidates()
             UseExecutionOrder = true,
         };
 
-        Visitor(Compiler* comp) : GenTreeVisitor(comp)
+        Visitor(Compiler* comp)
+            : GenTreeVisitor(comp)
         {
         }
 
@@ -15461,7 +15202,7 @@ PhaseStatus Compiler::fgRetypeImplicitByRefArgs()
                                       (nonCallAppearances <= varDsc->lvFieldCnt));
 
 #ifdef DEBUG
-                // Above is a profitability heurisic; either value of
+                // Above is a profitability heuristic; either value of
                 // undoPromotion should lead to correct code. So,
                 // under stress, make different decisions at times.
                 if (compStressCompile(STRESS_BYREF_PROMOTION, 25))
@@ -15481,9 +15222,9 @@ PhaseStatus Compiler::fgRetypeImplicitByRefArgs()
                 {
                     // Insert IR that initializes the temp from the parameter.
                     fgEnsureFirstBBisScratch();
-                    GenTree* addr = gtNewLclvNode(lclNum, TYP_BYREF);
-                    GenTree* data = (varDsc->TypeGet() == TYP_STRUCT) ? gtNewBlkIndir(varDsc->GetLayout(), addr)
-                                                                      : gtNewIndir(varDsc->TypeGet(), addr);
+                    GenTree* addr  = gtNewLclvNode(lclNum, TYP_BYREF);
+                    GenTree* data  = (varDsc->TypeGet() == TYP_STRUCT) ? gtNewBlkIndir(varDsc->GetLayout(), addr)
+                                                                       : gtNewIndir(varDsc->TypeGet(), addr);
                     GenTree* store = gtNewStoreLclVarNode(newLclNum, data);
                     fgNewStmtAtBeg(fgFirstBB, store);
                 }
@@ -15854,7 +15595,10 @@ bool Compiler::fgMorphArrayOpsStmt(MorphMDArrayTempCache* pTempCache, BasicBlock
         };
 
         MorphMDArrayVisitor(Compiler* compiler, BasicBlock* block, MorphMDArrayTempCache* pTempCache)
-            : GenTreeVisitor<MorphMDArrayVisitor>(compiler), m_changed(false), m_block(block), m_pTempCache(pTempCache)
+            : GenTreeVisitor<MorphMDArrayVisitor>(compiler)
+            , m_changed(false)
+            , m_block(block)
+            , m_pTempCache(pTempCache)
         {
         }
 
diff --git a/src/coreclr/jit/morphblock.cpp b/src/coreclr/jit/morphblock.cpp
index 94d10dd5887f..0f49e62c647f 100644
--- a/src/coreclr/jit/morphblock.cpp
+++ b/src/coreclr/jit/morphblock.cpp
@@ -45,7 +45,6 @@ class MorphInitBlockHelper
     GenTreeLclVarCommon* m_dstLclNode           = nullptr;
     LclVarDsc*           m_dstVarDsc            = nullptr;
     unsigned             m_dstLclOffset         = 0;
-    bool                 m_dstUseLclFld         = false;
     bool                 m_dstSingleStoreLclVar = false;
 
     enum class BlockTransformation
@@ -64,7 +63,7 @@ class MorphInitBlockHelper
 };
 
 //------------------------------------------------------------------------
-// MorphInitBlock: Morph a block initialization assignment tree.
+// MorphInitBlock: Morph a block initialization store tree.
 //
 // Arguments:
 //    comp - a compiler instance;
@@ -93,7 +92,8 @@ GenTree* MorphInitBlockHelper::MorphInitBlock(Compiler* comp, GenTree* tree)
 //    Most class members are initialized via in-class member initializers.
 //
 MorphInitBlockHelper::MorphInitBlockHelper(Compiler* comp, GenTree* store, bool initBlock = true)
-    : m_comp(comp), m_initBlock(initBlock)
+    : m_comp(comp)
+    , m_initBlock(initBlock)
 {
     assert(store->OperIsStore());
     assert((m_initBlock == store->OperIsInitBlkOp()) && (!m_initBlock == store->OperIsCopyBlkOp()));
@@ -318,13 +318,13 @@ void MorphInitBlockHelper::MorphStructCases()
 
 //------------------------------------------------------------------------
 // InitFieldByField: Attempts to promote a local block init tree to a tree
-// of promoted field initialization assignments.
+// of promoted field initialization stores.
 //
 // If successful, will set "m_transformationDecision" to "FieldByField" and
 // "m_result" to the final tree.
 //
 // Notes:
-//    This transforms a single block initialization assignment like:
+//    This transforms a single block initialization store like:
 //
 //    *  STORE_BLK struct<12> (init)
 //    +--*  LCL_ADDR  byref V02 loc0
@@ -481,7 +481,7 @@ void MorphInitBlockHelper::TryPrimitiveInit()
 
 //------------------------------------------------------------------------
 // EliminateCommas: Prepare for block morphing by removing commas from the
-// source operand of the assignment.
+// source operand of the store.
 //
 // Parameters:
 //   commaPool - [out] Pool of GT_COMMA nodes linked by their gtNext nodes that
@@ -531,8 +531,8 @@ GenTree* MorphInitBlockHelper::EliminateCommas(GenTree** commaPool)
 {
     *commaPool = nullptr;
 
-    GenTree* sideEffects = nullptr;
-    auto addSideEffect   = [&sideEffects](GenTree* sideEff) {
+    GenTree* sideEffects   = nullptr;
+    auto     addSideEffect = [&sideEffects](GenTree* sideEff) {
         sideEff->gtNext = sideEffects;
         sideEffects     = sideEff;
     };
@@ -609,7 +609,6 @@ class MorphCopyBlockHelper : public MorphInitBlockHelper
     unsigned             m_srcLclNum            = BAD_VAR_NUM;
     LclVarDsc*           m_srcVarDsc            = nullptr;
     GenTreeLclVarCommon* m_srcLclNode           = nullptr;
-    bool                 m_srcUseLclFld         = false;
     unsigned             m_srcLclOffset         = 0;
     bool                 m_srcSingleStoreLclVar = false;
 
@@ -621,7 +620,7 @@ class MorphCopyBlockHelper : public MorphInitBlockHelper
 };
 
 //------------------------------------------------------------------------
-// MorphCopyBlock: Morph a block copy assignment tree.
+// MorphCopyBlock: Morph a block copy tree.
 //
 // Arguments:
 //    comp - a compiler instance;
@@ -647,7 +646,8 @@ GenTree* MorphCopyBlockHelper::MorphCopyBlock(Compiler* comp, GenTree* tree)
 // Notes:
 //    Most class members are initialized via in-class member initializers.
 //
-MorphCopyBlockHelper::MorphCopyBlockHelper(Compiler* comp, GenTree* store) : MorphInitBlockHelper(comp, store, false)
+MorphCopyBlockHelper::MorphCopyBlockHelper(Compiler* comp, GenTree* store)
+    : MorphInitBlockHelper(comp, store, false)
 {
 }
 
@@ -676,7 +676,7 @@ void MorphCopyBlockHelper::PrepareSrc()
 }
 
 // TrySpecialCases: check special cases that require special transformations.
-//    The current special cases include assignments with calls in RHS.
+//    The current special cases include stores with calls as values.
 //
 void MorphCopyBlockHelper::TrySpecialCases()
 {
@@ -707,7 +707,7 @@ void MorphCopyBlockHelper::TrySpecialCases()
 //
 void MorphCopyBlockHelper::MorphStructCases()
 {
-    JITDUMP("block assignment to morph:\n");
+    JITDUMP("block store to morph:\n");
     DISPTREE(m_store);
 
     if (m_dstVarDsc != nullptr)
@@ -775,7 +775,7 @@ void MorphCopyBlockHelper::MorphStructCases()
         requiresCopyBlock = true;
     }
 
-    // Can we use field by field assignment for the dest?
+    // Can we use field by field copy for the dest?
     if (m_dstDoFldStore && m_dstVarDsc->lvAnySignificantPadding)
     {
         JITDUMP(" dest has significant padding");
@@ -783,7 +783,7 @@ void MorphCopyBlockHelper::MorphStructCases()
         requiresCopyBlock = true;
     }
 
-    // Can we use field by field assignment for the src?
+    // Can we use field by field copy for the src?
     if (m_srcDoFldStore && m_srcVarDsc->lvAnySignificantPadding)
     {
         JITDUMP(" src has significant padding");
@@ -805,8 +805,8 @@ void MorphCopyBlockHelper::MorphStructCases()
     }
 #endif // TARGET_ARM
 
-    // Don't use field by field assignment if the src is a call, lowering will handle
-    // it without spilling the call result into memory to access the individual fields.
+    // Don't use field by field store if the src is a call, lowering will handle it
+    // without spilling the call result into memory to access the individual fields.
     // For HWI/SIMD/CNS_VEC, we don't expect promoted destinations - we purposefully
     // mark SIMDs used in such copies as "used in a SIMD intrinsic", to prevent their
     // promotion.
@@ -944,12 +944,12 @@ void MorphCopyBlockHelper::MorphStructCases()
     // If we require a copy block the set both of the field assign bools to false
     if (requiresCopyBlock)
     {
-        // If a copy block is required then we won't do field by field assignments
+        // If a copy block is required then we won't do field by field stores
         m_dstDoFldStore = false;
         m_srcDoFldStore = false;
     }
 
-    JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
+    JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field stores.\n");
 
     if (requiresCopyBlock)
     {
@@ -996,7 +996,7 @@ void MorphCopyBlockHelper::MorphStructCases()
 }
 
 //------------------------------------------------------------------------
-// TryPrimitiveCopy: Attempt to replace a block assignment with a scalar assignment.
+// TryPrimitiveCopy: Attempt to replace a block store with a scalar store.
 //
 // If successful, will set "m_transformationDecision" to "OneStoreBlock".
 //
@@ -1074,7 +1074,7 @@ void MorphCopyBlockHelper::TryPrimitiveCopy()
 }
 
 //------------------------------------------------------------------------
-// CopyFieldByField: transform the copy block to a field by field assignment.
+// CopyFieldByField: transform the copy block to a field by field store.
 //
 // Notes:
 //    We do it for promoted lclVars which fields can be enregistered.
@@ -1108,7 +1108,7 @@ GenTree* MorphCopyBlockHelper::CopyFieldByField()
 
     if (m_dstDoFldStore && m_srcDoFldStore)
     {
-        // To do fieldwise assignments for both sides.
+        // To do fieldwise stores for both sides.
         // The structs do not have to be the same exact types but have to have same field types
         // at the same offsets.
         assert(m_dstLclNum != BAD_VAR_NUM && m_srcLclNum != BAD_VAR_NUM);
@@ -1116,9 +1116,7 @@ GenTree* MorphCopyBlockHelper::CopyFieldByField()
     }
     else if (m_dstDoFldStore)
     {
-        m_srcUseLclFld = m_srcVarDsc != nullptr;
-
-        if (!m_srcUseLclFld)
+        if (m_srcLclNum == BAD_VAR_NUM)
         {
             addr = m_src->AsIndir()->Addr();
 
@@ -1143,8 +1141,7 @@ GenTree* MorphCopyBlockHelper::CopyFieldByField()
     else
     {
         assert(m_srcDoFldStore);
-        fieldCnt       = m_srcVarDsc->lvFieldCnt;
-        m_dstUseLclFld = m_dstVarDsc != nullptr;
+        fieldCnt = m_srcVarDsc->lvFieldCnt;
 
         // Clear the def flags, we'll reuse the node below and reset them.
         if (m_dstLclNode != nullptr)
@@ -1152,7 +1149,7 @@ GenTree* MorphCopyBlockHelper::CopyFieldByField()
             m_dstLclNode->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
         }
 
-        if (!m_dstUseLclFld)
+        if (m_dstLclNum == BAD_VAR_NUM)
         {
             addr = m_store->AsIndir()->Addr();
 
@@ -1242,7 +1239,7 @@ GenTree* MorphCopyBlockHelper::CopyFieldByField()
     {
         JITDUMP("All fields of destination of field-by-field copy are dying, skipping entirely\n");
 
-        if (m_srcUseLclFld)
+        if (m_srcLclNum != BAD_VAR_NUM)
         {
             return m_comp->gtNewNothingNode();
         }
@@ -1316,12 +1313,7 @@ GenTree* MorphCopyBlockHelper::CopyFieldByField()
                 }
                 if (!done)
                 {
-                    if (!m_srcUseLclFld)
-                    {
-                        GenTree* fldAddr = grabAddr(fldOffset);
-                        srcFld           = m_comp->gtNewIndir(destType, fldAddr);
-                    }
-                    else
+                    if (m_srcLclNum != BAD_VAR_NUM)
                     {
                         // If the src was a struct type field "B" in a struct "A" then we add
                         // add offset of ("B" in "A") + current offset in "B".
@@ -1331,6 +1323,11 @@ GenTree* MorphCopyBlockHelper::CopyFieldByField()
                         // TODO-1stClassStructs: remove this and implement reading a field from a struct in a reg.
                         m_comp->lvaSetVarDoNotEnregister(m_srcLclNum DEBUGARG(DoNotEnregisterReason::LocalField));
                     }
+                    else
+                    {
+                        GenTree* fldAddr = grabAddr(fldOffset);
+                        srcFld           = m_comp->gtNewIndir(destType, fldAddr);
+                    }
                 }
             }
         }
@@ -1364,12 +1361,7 @@ GenTree* MorphCopyBlockHelper::CopyFieldByField()
                 unsigned   srcFieldOffset = srcFieldVarDsc->lvFldOffset;
                 var_types  srcType        = srcFieldVarDsc->TypeGet();
 
-                if (!m_dstUseLclFld)
-                {
-                    GenTree* fldAddr = grabAddr(srcFieldOffset);
-                    dstFldStore      = m_comp->gtNewStoreIndNode(srcType, fldAddr, srcFld);
-                }
-                else
+                if (m_dstLclNum != BAD_VAR_NUM)
                 {
                     // If the dst was a struct type field "B" in a struct "A" then we add
                     // add offset of ("B" in "A") + current offset in "B".
@@ -1379,6 +1371,11 @@ GenTree* MorphCopyBlockHelper::CopyFieldByField()
                     // TODO-1stClassStructs: remove this and implement storing to a field in a struct in a reg.
                     m_comp->lvaSetVarDoNotEnregister(m_dstLclNum DEBUGARG(DoNotEnregisterReason::LocalField));
                 }
+                else
+                {
+                    GenTree* fldAddr = grabAddr(srcFieldOffset);
+                    dstFldStore      = m_comp->gtNewStoreIndNode(srcType, fldAddr, srcFld);
+                }
             }
         }
         noway_assert(dstFldStore->TypeGet() == srcFld->TypeGet());
@@ -1471,9 +1468,9 @@ bool MorphCopyBlockHelper::CanReuseAddressForDecomposedStore(GenTree* addrNode)
 //
 // Return Value:
 //    We can return the original block copy unmodified (least desirable, but always correct)
-//    We can return a single assignment, when TryPrimitiveCopy transforms it (most desirable).
+//    We can return a single store, when TryPrimitiveCopy transforms it (most desirable).
 //    If we have performed struct promotion of the Source() or the Dest() then we will try to
-//    perform a field by field assignment for each of the promoted struct fields.
+//    perform a field by field store for each of the promoted struct fields.
 //
 // Assumptions:
 //    The child nodes for tree have already been Morphed.
@@ -1481,10 +1478,10 @@ bool MorphCopyBlockHelper::CanReuseAddressForDecomposedStore(GenTree* addrNode)
 // Notes:
 //    If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on Source() or Dest()
 //    if they cannot be enregistered.
-//    When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
+//    When performing a field by field store we can have one of Source() or Dest treated as a blob of bytes
 //    and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
 //    If the Source() or Dest() is a struct that has a "CustomLayout" and "ContainsHoles" then we
-//    can not use a field by field assignment and must leave the original block copy unmodified.
+//    can not use a field by field store and must leave the original block copy unmodified.
 //
 GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
 {
@@ -1492,14 +1489,14 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
 }
 
 //------------------------------------------------------------------------
-// fgMorphInitBlock: Morph a block initialization assignment tree.
+// fgMorphInitBlock: Morph a block initialization store tree.
 //
 // Arguments:
 //    tree - A store tree that performs block initialization.
 //
 // Return Value:
 //    If the destination is a promoted struct local variable then we will try to
-//    perform a field by field assignment for each of the promoted struct fields.
+//    perform a field by field store for each of the promoted struct fields.
 //    This is not always possible (e.g. if the struct is address exposed).
 //
 //    Otherwise the original store tree is returned unmodified, note that the
@@ -1512,70 +1509,3 @@ GenTree* Compiler::fgMorphInitBlock(GenTree* tree)
 {
     return MorphInitBlockHelper::MorphInitBlock(this, tree);
 }
-
-//------------------------------------------------------------------------
-// fgMorphStoreDynBlock: Morph a dynamic block store (GT_STORE_DYN_BLK).
-//
-// Performs full (pre-order and post-order) morphing for a STORE_DYN_BLK.
-//
-// Arguments:
-//    tree - The GT_STORE_DYN_BLK tree to morph.
-//
-// Return Value:
-//    In case the size turns into a constant - the store, transformed
-//    into an "ordinary" STORE_BLK<size> one, and further morphed by
-//    "fgMorphInitBlock"/"fgMorphCopyBlock". Otherwise, the original
-//    tree (fully morphed).
-//
-GenTree* Compiler::fgMorphStoreDynBlock(GenTreeStoreDynBlk* tree)
-{
-    if (!tree->Data()->OperIs(GT_CNS_INT, GT_INIT_VAL))
-    {
-        // Data is a location and required to have GTF_DONT_CSE.
-        tree->Data()->gtFlags |= GTF_DONT_CSE;
-    }
-
-    tree->Addr()        = fgMorphTree(tree->Addr());
-    tree->Data()        = fgMorphTree(tree->Data());
-    tree->gtDynamicSize = fgMorphTree(tree->gtDynamicSize);
-
-    if (tree->gtDynamicSize->IsIntegralConst())
-    {
-        int64_t size = tree->gtDynamicSize->AsIntConCommon()->IntegralValue();
-
-        if ((size != 0) && FitsIn<int32_t>(size))
-        {
-            ClassLayout* layout = typGetBlkLayout(static_cast<unsigned>(size));
-            GenTree*     src    = tree->Data();
-            if (src->OperIs(GT_IND))
-            {
-                assert(src->TypeIs(TYP_STRUCT));
-                src->SetOper(GT_BLK);
-                src->AsBlk()->Initialize(layout);
-            }
-
-            GenTree* store = gtNewStoreValueNode(layout, tree->Addr(), src, tree->gtFlags & GTF_IND_FLAGS);
-            store->AddAllEffectsFlags(tree);
-            INDEBUG(store->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
-
-            JITDUMP("MorphStoreDynBlock: transformed STORE_DYN_BLK into STORE_BLK\n");
-
-            return tree->OperIsCopyBlkOp() ? fgMorphCopyBlock(store) : fgMorphInitBlock(store);
-        }
-    }
-
-    tree->SetAllEffectsFlags(tree->Addr(), tree->Data(), tree->gtDynamicSize);
-
-    if (tree->OperMayThrow(this))
-    {
-        tree->gtFlags |= GTF_EXCEPT;
-    }
-    else
-    {
-        tree->gtFlags |= GTF_IND_NONFAULTING;
-    }
-
-    tree->gtFlags |= GTF_ASG;
-
-    return tree;
-}
diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h
index 9c4f44e8e2f0..b3eb292677d8 100644
--- a/src/coreclr/jit/namedintrinsiclist.h
+++ b/src/coreclr/jit/namedintrinsiclist.h
@@ -20,7 +20,7 @@ enum NamedIntrinsic : unsigned short
     NI_System_BitConverter_Int64BitsToDouble,
     NI_System_BitConverter_SingleToInt32Bits,
 
-    NI_System_Buffer_Memmove,
+    NI_System_SpanHelpers_Memmove,
 
     NI_SYSTEM_MATH_START,
     NI_System_Math_Abs,
@@ -37,7 +37,6 @@ enum NamedIntrinsic : unsigned short
     NI_System_Math_Cosh,
     NI_System_Math_Exp,
     NI_System_Math_Floor,
-    NI_System_Math_FMod,
     NI_System_Math_FusedMultiplyAdd,
     NI_System_Math_ILogB,
     NI_System_Math_Log,
@@ -115,8 +114,11 @@ enum NamedIntrinsic : unsigned short
     NI_System_String_get_Length,
     NI_System_String_op_Implicit,
     NI_System_String_StartsWith,
+    NI_System_String_EndsWith,
     NI_System_Span_get_Item,
     NI_System_Span_get_Length,
+    NI_System_SpanHelpers_ClearWithoutReferences,
+    NI_System_SpanHelpers_Fill,
     NI_System_SpanHelpers_SequenceEqual,
     NI_System_ReadOnlySpan_get_Item,
     NI_System_ReadOnlySpan_get_Length,
@@ -125,6 +127,7 @@ enum NamedIntrinsic : unsigned short
     NI_System_MemoryExtensions_Equals,
     NI_System_MemoryExtensions_SequenceEqual,
     NI_System_MemoryExtensions_StartsWith,
+    NI_System_MemoryExtensions_EndsWith,
 
     NI_System_Threading_Interlocked_And,
     NI_System_Threading_Interlocked_Or,
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index a86039dc3338..0c995997d813 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -163,7 +163,8 @@ void ObjectAllocator::MarkEscapingVarsAndBuildConnGraph()
         };
 
         BuildConnGraphVisitor(ObjectAllocator* allocator)
-            : GenTreeVisitor<BuildConnGraphVisitor>(allocator->comp), m_allocator(allocator)
+            : GenTreeVisitor<BuildConnGraphVisitor>(allocator->comp)
+            , m_allocator(allocator)
         {
         }
 
@@ -317,7 +318,7 @@ void ObjectAllocator::ComputeStackObjectPointers(BitVecTraits* bitVecTraits)
 
                             if (DoesLclVarPointToStack(rhsLclNum))
                             {
-                                // The only assignment to lclNum local is definitely-stack-pointing
+                                // The only store to lclNum local is the definitely-stack-pointing
                                 // rhsLclNum local so lclNum local is also definitely-stack-pointing.
                                 MarkLclVarAsDefinitelyStackPointing(lclNum);
                             }
@@ -504,8 +505,8 @@ unsigned int ObjectAllocator::MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* a
     assert(m_AnalysisDone);
 
     const bool         shortLifetime = false;
-    const unsigned int lclNum     = comp->lvaGrabTemp(shortLifetime DEBUGARG("MorphAllocObjNodeIntoStackAlloc temp"));
-    const int unsafeValueClsCheck = true;
+    const unsigned int lclNum = comp->lvaGrabTemp(shortLifetime DEBUGARG("MorphAllocObjNodeIntoStackAlloc temp"));
+    const int          unsafeValueClsCheck = true;
     comp->lvaSetStruct(lclNum, allocObj->gtAllocObjClsHnd, unsafeValueClsCheck);
 
     // Initialize the object memory if necessary.
@@ -766,7 +767,8 @@ void ObjectAllocator::RewriteUses()
         };
 
         RewriteUsesVisitor(ObjectAllocator* allocator)
-            : GenTreeVisitor<RewriteUsesVisitor>(allocator->comp), m_allocator(allocator)
+            : GenTreeVisitor<RewriteUsesVisitor>(allocator->comp)
+            , m_allocator(allocator)
         {
         }
 
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index f4a56cb4ca39..07307161da00 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -47,21 +47,21 @@ class ObjectAllocator final : public Phase
     virtual PhaseStatus DoPhase() override;
 
 private:
-    bool CanAllocateLclVarOnStack(unsigned int lclNum, CORINFO_CLASS_HANDLE clsHnd);
-    bool CanLclVarEscape(unsigned int lclNum);
-    void MarkLclVarAsPossiblyStackPointing(unsigned int lclNum);
-    void MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum);
-    bool MayLclVarPointToStack(unsigned int lclNum);
-    bool DoesLclVarPointToStack(unsigned int lclNum);
-    void DoAnalysis();
-    void MarkLclVarAsEscaping(unsigned int lclNum);
-    void MarkEscapingVarsAndBuildConnGraph();
-    void AddConnGraphEdge(unsigned int sourceLclNum, unsigned int targetLclNum);
-    void ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& escapingNodes);
-    void ComputeStackObjectPointers(BitVecTraits* bitVecTraits);
-    bool     MorphAllocObjNodes();
-    void     RewriteUses();
-    GenTree* MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj);
+    bool         CanAllocateLclVarOnStack(unsigned int lclNum, CORINFO_CLASS_HANDLE clsHnd);
+    bool         CanLclVarEscape(unsigned int lclNum);
+    void         MarkLclVarAsPossiblyStackPointing(unsigned int lclNum);
+    void         MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum);
+    bool         MayLclVarPointToStack(unsigned int lclNum);
+    bool         DoesLclVarPointToStack(unsigned int lclNum);
+    void         DoAnalysis();
+    void         MarkLclVarAsEscaping(unsigned int lclNum);
+    void         MarkEscapingVarsAndBuildConnGraph();
+    void         AddConnGraphEdge(unsigned int sourceLclNum, unsigned int targetLclNum);
+    void         ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& escapingNodes);
+    void         ComputeStackObjectPointers(BitVecTraits* bitVecTraits);
+    bool         MorphAllocObjNodes();
+    void         RewriteUses();
+    GenTree*     MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj);
     unsigned int MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj, BasicBlock* block, Statement* stmt);
     struct BuildConnGraphVisitorCallbackData;
     bool CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parentStack, unsigned int lclNum);
diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp
index 01631f8d4157..41b15792e24a 100644
--- a/src/coreclr/jit/optcse.cpp
+++ b/src/coreclr/jit/optcse.cpp
@@ -18,6 +18,22 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 #include "optcse.h"
 
+#ifdef DEBUG
+#define RLDUMP(...)                                                                                                    \
+    {                                                                                                                  \
+        if (m_verbose)                                                                                                 \
+            logf(__VA_ARGS__);                                                                                         \
+    }
+#define RLDUMPEXEC(x)                                                                                                  \
+    {                                                                                                                  \
+        if (m_verbose)                                                                                                 \
+            x;                                                                                                         \
+    }
+#else
+#define RLDUMP(...)
+#define RLDUMPEXEC(x)
+#endif
+
 /* static */
 const size_t Compiler::s_optCSEhashSizeInitial  = EXPSET_SZ * 2;
 const size_t Compiler::s_optCSEhashGrowthFactor = 2;
@@ -148,6 +164,11 @@ bool Compiler::optUnmarkCSE(GenTree* tree)
         // 2. Unmark the CSE information in the node
 
         tree->gtCSEnum = NO_CSE;
+
+        // 3. Leave breadcrumbs so we know some dsc was altered
+
+        optCSEunmarks++;
+
         return true;
     }
     else
@@ -183,7 +204,9 @@ void Compiler::optCSE_GetMaskData(GenTree* tree, optCSE_MaskData* pMaskData)
             DoPreOrder = true,
         };
 
-        MaskDataWalker(Compiler* comp, optCSE_MaskData* maskData) : GenTreeVisitor(comp), m_maskData(maskData)
+        MaskDataWalker(Compiler* comp, optCSE_MaskData* maskData)
+            : GenTreeVisitor(comp)
+            , m_maskData(maskData)
         {
         }
 
@@ -375,7 +398,9 @@ void CSEdsc::ComputeNumLocals(Compiler* compiler)
         };
 
         LocalCountingVisitor(Compiler* compiler)
-            : GenTreeVisitor<LocalCountingVisitor>(compiler), m_count(0), m_occurrences(0)
+            : GenTreeVisitor<LocalCountingVisitor>(compiler)
+            , m_count(0)
+            , m_occurrences(0)
         {
         }
 
@@ -478,26 +503,11 @@ unsigned optCSEKeyToHashIndex(size_t key, size_t optCSEhashSize)
 //
 unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt)
 {
-    size_t   key;
-    unsigned hval;
-    CSEdsc*  hashDsc;
-    bool     enableSharedConstCSE = false;
-    bool     isSharedConst        = false;
-    int      configValue          = JitConfig.JitConstCSE();
-
-#if defined(TARGET_ARMARCH)
-    // ARMARCH - allow to combine with nearby offsets, when config is not 2 or 4
-    if ((configValue != CONST_CSE_ENABLE_ARM_NO_SHARING) && (configValue != CONST_CSE_ENABLE_ALL_NO_SHARING))
-    {
-        enableSharedConstCSE = true;
-    }
-#endif // TARGET_ARMARCH
-
-    // All Platforms - also allow to combine with nearby offsets, when config is 3
-    if (configValue == CONST_CSE_ENABLE_ALL)
-    {
-        enableSharedConstCSE = true;
-    }
+    size_t     key;
+    unsigned   hval;
+    CSEdsc*    hashDsc;
+    const bool enableSharedConstCSE = optSharedConstantCSEEnabled();
+    bool       isSharedConst        = false;
 
     // We use the liberal Value numbers when building the set of CSE
     ValueNum vnLib     = tree->GetVN(VNK_Liberal);
@@ -609,10 +619,45 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt)
 
             treeStmtLst* newElem;
 
-            /* Have we started the list of matching nodes? */
+            // Have we started the list of matching nodes?
 
             if (hashDsc->csdTreeList == nullptr)
             {
+                // This is the second time we see this value. Handle cases
+                // where the first value dominates the second one and we can
+                // already prove that the first one is _not_ going to be a
+                // valid def for the second one, due to the second one having
+                // more exceptions. This happens for example in code like
+                // CASTCLASS(x, y) where the "CASTCLASS" just adds exceptions
+                // on top of "x". In those cases it is always better to let the
+                // second value be the def.
+                // It also happens for GT_COMMA, but that one is special cased
+                // above; this handling is a less special-casey version of the
+                // GT_COMMA handling above. However, it is quite limited since
+                // it only handles the def/use being in the same block.
+                if (compCurBB == hashDsc->csdBlock)
+                {
+                    GenTree* prevTree  = hashDsc->csdTree;
+                    ValueNum prevVnLib = prevTree->GetVN(VNK_Liberal);
+                    if (prevVnLib != vnLib)
+                    {
+                        ValueNum prevExceptionSet = vnStore->VNExceptionSet(prevVnLib);
+                        ValueNum curExceptionSet  = vnStore->VNExceptionSet(vnLib);
+                        if ((prevExceptionSet != curExceptionSet) &&
+                            vnStore->VNExcIsSubset(curExceptionSet, prevExceptionSet))
+                        {
+                            JITDUMP("Skipping CSE candidate for tree [%06u]; tree [%06u] is a better candidate with "
+                                    "more exceptions\n",
+                                    prevTree->gtTreeID, tree->gtTreeID);
+                            prevTree->gtCSEnum = 0;
+                            hashDsc->csdStmt   = stmt;
+                            hashDsc->csdTree   = tree;
+                            tree->gtCSEnum     = (signed char)hashDsc->csdIndex;
+                            return hashDsc->csdIndex;
+                        }
+                    }
+                }
+
                 // Create the new element based upon the matching hashDsc element.
 
                 newElem = new (this, CMK_TreeStatementList) treeStmtLst;
@@ -1145,7 +1190,9 @@ class CSE_DataFlow
     EXPSET_TP m_preMergeOut;
 
 public:
-    CSE_DataFlow(Compiler* pCompiler) : m_comp(pCompiler), m_preMergeOut(BitVecOps::UninitVal())
+    CSE_DataFlow(Compiler* pCompiler)
+        : m_comp(pCompiler)
+        , m_preMergeOut(BitVecOps::UninitVal())
     {
     }
 
@@ -1701,36 +1748,15 @@ void Compiler::optValnumCSE_Availability()
 // Notes:
 //  This creates the basic CSE heuristic. It never does any CSEs.
 //
-CSE_HeuristicCommon::CSE_HeuristicCommon(Compiler* pCompiler) : m_pCompiler(pCompiler)
+CSE_HeuristicCommon::CSE_HeuristicCommon(Compiler* pCompiler)
+    : m_pCompiler(pCompiler)
 {
-    m_addCSEcount = 0; /* Count of the number of LclVars for CSEs that we added */
-    sortTab       = nullptr;
-    sortSiz       = 0;
-    madeChanges   = false;
-    codeOptKind   = m_pCompiler->compCodeOpt();
-
-    enableConstCSE = true;
-
-    int configValue = JitConfig.JitConstCSE();
-
-    // all platforms - disable CSE of constant values when config is 1
-    if (configValue == CONST_CSE_DISABLE_ALL)
-    {
-        enableConstCSE = false;
-    }
-
-#if !defined(TARGET_ARM64)
-    // non-ARM64 platforms - disable by default
-    //
-    enableConstCSE = false;
-
-    // Check for the two enable cases for all platforms
-    //
-    if ((configValue == CONST_CSE_ENABLE_ALL) || (configValue == CONST_CSE_ENABLE_ALL_NO_SHARING))
-    {
-        enableConstCSE = true;
-    }
-#endif
+    m_addCSEcount  = 0; /* Count of the number of LclVars for CSEs that we added */
+    sortTab        = nullptr;
+    sortSiz        = 0;
+    madeChanges    = false;
+    codeOptKind    = m_pCompiler->compCodeOpt();
+    enableConstCSE = Compiler::optConstantCSEEnabled();
 
 #ifdef DEBUG
     // Track the order of CSEs done (candidate number)
@@ -1772,7 +1798,7 @@ bool CSE_HeuristicCommon::CanConsiderTree(GenTree* tree, bool isReturn)
     }
 
     // Don't allow non-SIMD struct CSEs under a return; we don't fully
-    // re-morph these if we introduce a CSE assignment, and so may create
+    // re-morph these if we introduce a CSE store, and so may create
     // IR that lower is not yet prepared to handle.
     //
     if (isReturn && varTypeIsStruct(tree->gtType) && !varTypeIsSIMD(tree->gtType))
@@ -2055,10 +2081,10 @@ void CSE_HeuristicCommon::DumpMetrics()
 //  This creates the random CSE heuristic. It does CSEs randomly, with some
 //  predetermined likelihood (set by config or by stress).
 //
-CSE_HeuristicRandom::CSE_HeuristicRandom(Compiler* pCompiler) : CSE_HeuristicCommon(pCompiler)
+CSE_HeuristicRandom::CSE_HeuristicRandom(Compiler* pCompiler)
+    : CSE_HeuristicCommon(pCompiler)
 {
     m_cseRNG.Init(m_pCompiler->info.compMethodHash() ^ JitConfig.JitRandomCSE());
-    Announce();
 }
 
 //------------------------------------------------------------------------
@@ -2135,6 +2161,13 @@ void CSE_HeuristicRandom::ConsiderCandidates()
         JITDUMPEXEC(m_pCompiler->gtDispTree(candidate.Expr()));
         JITDUMP("\n");
 
+#ifdef DEBUG
+        if (m_pCompiler->optConfigDisableCSE2())
+        {
+            continue;
+        }
+#endif
+
         if (dsc->defExcSetPromise == ValueNumStore::NoVN)
         {
             JITDUMP("Abandoned " FMT_CSE " because we had defs with different Exc sets\n", candidate.CseIndex());
@@ -2175,9 +2208,9 @@ void CSE_HeuristicRandom::ConsiderCandidates()
 //  This creates the replay CSE heuristic. It does CSEs specifed by
 //  the ArrayConfig parsing of JitReplayCSE.
 //
-CSE_HeuristicReplay::CSE_HeuristicReplay(Compiler* pCompiler) : CSE_HeuristicCommon(pCompiler)
+CSE_HeuristicReplay::CSE_HeuristicReplay(Compiler* pCompiler)
+    : CSE_HeuristicCommon(pCompiler)
 {
-    Announce();
 }
 
 //------------------------------------------------------------------------
@@ -2219,7 +2252,7 @@ void CSE_HeuristicReplay::ConsiderCandidates()
         return;
     }
 
-    static ConfigIntArray JitReplayCSEArray;
+    ConfigIntArray JitReplayCSEArray;
     JitReplayCSEArray.EnsureInit(JitConfig.JitReplayCSE());
 
     for (unsigned i = 0; i < JitReplayCSEArray.GetLength(); i++)
@@ -2253,108 +2286,34 @@ void CSE_HeuristicReplay::ConsiderCandidates()
     }
 }
 
+#endif // DEBUG
+
+// From PolicyGradient
+// Greedy/Base: 35483 methods, 8669 better, 23752 same, 3061 worse,  1.0041 geomean
+
+double CSE_HeuristicParameterized::s_defaultParameters[CSE_HeuristicParameterized::numParameters] =
+    {0.2425,  0.2479, 0.1089,  -0.2363, 0.2472, -0.0559, -0.8418, -0.0585, -0.2773, 0.0000,  0.0213,  -0.4116, 0.0000,
+     -0.0922, 0.2593, -0.0315, -0.0745, 0.2607, 0.3475,  -0.0590, -0.3177, -0.6883, -0.4998, -0.3220, -0.2268};
+
 //------------------------------------------------------------------------
-// CSE_HeuristicRL: construct RL CSE heuristic
+// CSE_HeuristicParameterized: CSE heuristic using parameterized, linear profitability model
 //
 // Arguments;
 //  pCompiler - compiler instance
 //
-// Notes:
-//  This creates the RL CSE heuristic. It does CSEs based on a stochastic
-//  softmax policy, governed by a parameter vector.
-//
-//  JitRLCSE specified the initial parameter values.
-//  JitRandomCSE can be used to supply salt for the RNG.
-//  JitReplayCSE can be used to supply a sequence to follow.
-//  JitReplayCSEReward can be used to supply the perf score for the sequence.
-//
-CSE_HeuristicRL::CSE_HeuristicRL(Compiler* pCompiler)
-    : CSE_HeuristicCommon(pCompiler), m_alpha(0.0), m_updateParameters(false), m_greedy(false), m_verbose(false)
+CSE_HeuristicParameterized::CSE_HeuristicParameterized(Compiler* pCompiler)
+    : CSE_HeuristicCommon(pCompiler)
 {
-    // Set up the random state
-    //
-    m_cseRNG.Init(m_pCompiler->info.compMethodHash() ^ JitConfig.JitRandomCSE());
-
-    // Parameters
+    // Default parameter values...
     //
-    static ConfigDoubleArray initialParameters;
-    initialParameters.EnsureInit(JitConfig.JitRLCSE());
-    const unsigned initialParamLength = initialParameters.GetLength();
-
-    for (unsigned i = 0; (i < initialParamLength) && (i < numParameters); i++)
-    {
-        m_parameters[i] = initialParameters.GetData()[i];
-    }
-
-    if (numParameters > initialParamLength)
-    {
-        JITDUMP("Too few parameters (expected %d), trailing will be zero\n", numParameters);
-        for (unsigned i = initialParamLength; i < numParameters; i++)
-        {
-            m_parameters[i] = 0;
-        }
-    }
-    else if (numParameters < initialParamLength)
+    for (unsigned i = 0; i < numParameters; i++)
     {
-        JITDUMP("Too many parameters (expected %d), trailing will be ignored\n", numParameters);
+        m_parameters[i] = s_defaultParameters[i];
     }
 
-    // Policy sub-behavior: explore / update / greedy
-    //
-    // We may be given a prior sequence and perf score to use to
-    // update the parameters .... if so, we will replay same sequence of CSEs
-    // (like the replay policy) and update the parameters via the policy
-    // gradient algorithm.
-    //
-    // For updates:
-    //
-    // m_alpha controls the "step size" or learning rate; when we want to adjust
-    // the parameters we only partially move them towards the gradient indicated values.
-    //
-    // m_rewards describes the reward associated with each step.
-    //
-    // This "two-pass" technique (first run the current policy and, obtain the perf score
-    // and CSE sequence, then rerun with the same sequence and update the policy
-    // parameters) ensures all the policy model logic is within the
-    // JIT, so the preference computation and its gradient can be kept in sync.
+    // These get set during...
     //
-    if ((JitConfig.JitReplayCSE() != nullptr) && (JitConfig.JitReplayCSEReward() != nullptr))
-    {
-        m_updateParameters = true;
-
-        // Reward
-        //
-        static ConfigDoubleArray rewards;
-        rewards.EnsureInit(JitConfig.JitReplayCSEReward());
-        const unsigned rewardsLength = rewards.GetLength();
-
-        for (unsigned i = 0; (i < rewardsLength) && (i < maxSteps); i++)
-        {
-            m_rewards[i] = rewards.GetData()[i];
-        }
-
-        for (unsigned i = rewardsLength; i < maxSteps; i++)
-        {
-            m_rewards[i] = 0;
-        }
-
-        // Alpha
-        //
-        if (JitConfig.JitRLCSEAlpha() != nullptr)
-        {
-            static ConfigDoubleArray JitRLCSEAlphaArray;
-            JitRLCSEAlphaArray.EnsureInit(JitConfig.JitRLCSEAlpha());
-            m_alpha = JitRLCSEAlphaArray.GetData()[0];
-        }
-        else
-        {
-            m_alpha = 0.001;
-        }
-    }
-    else if (JitConfig.JitRLCSEGreedy() > 0)
-    {
-        m_greedy = true;
-    }
+    m_localWeights = nullptr;
 
     // Stopping "parameter"
     //
@@ -2362,121 +2321,29 @@ CSE_HeuristicRL::CSE_HeuristicRL(Compiler* pCompiler)
 
     // Verbose
     //
-    if (m_pCompiler->verbose || (JitConfig.JitRLCSEVerbose() > 0))
-    {
-        m_verbose = true;
-    }
+    m_verbose = (JitConfig.JitRLCSEVerbose() > 0);
 
 #ifdef DEBUG
+    m_verbose |= m_pCompiler->verbose;
     CompAllocator allocator = m_pCompiler->getAllocator(CMK_CSE);
     m_likelihoods           = new (allocator) jitstd::vector<double>(allocator);
-    m_baseLikelihoods       = new (allocator) jitstd::vector<double>(allocator);
-    m_features              = new (allocator) jitstd::vector<char*>(allocator);
 #endif
-    Announce();
-}
-
-//------------------------------------------------------------------------
-// Name: name this jit heuristic
-//
-// Returns:
-//   descriptive name string
-//
-const char* CSE_HeuristicRL::Name() const
-{
-    if (m_updateParameters)
-    {
-        return "RL Policy Gradient Update";
-    }
-    else if (m_greedy)
-    {
-        return "RL Policy Gradient Greedy";
-    }
-    else
-    {
-        return "RL Policy Gradient Stochastic";
-    }
-}
-
-//------------------------------------------------------------------------
-// Announce: describe heuristic in jit dump
-//
-void CSE_HeuristicRL::Announce()
-{
-    JITDUMP("%s salt %d parameters ", Name(), JitConfig.JitRandomCSE());
-    for (int i = 0; i < numParameters; i++)
-    {
-        JITDUMP("%s%f", (i == 0) ? "" : ",", m_parameters[i]);
-    }
-    JITDUMP("\n");
-
-    if (m_updateParameters)
-    {
-        JITDUMP("Operating in update mode with sequence %ls, rewards %ls, and alpha %f\n", JitConfig.JitReplayCSE(),
-                JitConfig.JitReplayCSEReward(), m_alpha);
-    }
 }
 
 //------------------------------------------------------------------------
-// DumpMetrics: dump post-CSE metrics
+// ConsiderCandidates: examine candidates and perform CSEs.
 //
-void CSE_HeuristicRL::DumpMetrics()
+void CSE_HeuristicParameterized::ConsiderCandidates()
 {
-    CSE_HeuristicCommon::DumpMetrics();
-
-    if (m_updateParameters)
-    {
-        // For update, dump the new parameter values
-        //
-        printf(" updatedparams ");
-        for (int i = 0; i < numParameters; i++)
-        {
-            printf("%s%f", (i == 0) ? "" : ",", m_parameters[i]);
-        }
-
-        if (JitConfig.JitRLCSECandidateFeatures() > 0)
-        {
-            bool first = true;
-            printf(", features ");
-            for (char* f : *m_features)
-            {
-                printf("%s%s", first ? "" : ",", f);
-                first = false;
-            }
-        }
-    }
-    else if (m_greedy)
-    {
-        // Show the parameters used.
-        //
-        printf(" params ");
-        for (int i = 0; i < numParameters; i++)
-        {
-            printf("%s%f", (i == 0) ? "" : ",", m_parameters[i]);
-        }
-    }
-    else
-    {
-        // For evaluation, dump likelihood of the choices made
-        //
-        printf(" likelihoods ");
-        bool first = true;
-        for (double d : *m_likelihoods)
-        {
-            printf("%s%.3f", first ? "" : ",", d);
-            first = false;
-        }
+    const int numCandidates = m_pCompiler->optCSECandidateCount;
+    sortTab                 = new (m_pCompiler, CMK_CSE) CSEdsc*[numCandidates];
+    sortSiz                 = numCandidates * sizeof(*sortTab);
+    memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz);
 
-        // For evaluation, dump initial likelihood each choice
-        //
-        printf(" baseLikelihoods ");
-        first = true;
-        for (double d : *m_baseLikelihoods)
-        {
-            printf("%s%.3f", first ? "" : ",", d);
-            first = false;
-        }
-    }
+    // Capture distribution of enregisterable local var weights.
+    //
+    CaptureLocalWeights();
+    GreedyPolicy();
 }
 
 //------------------------------------------------------------------------
@@ -2489,7 +2356,7 @@ void CSE_HeuristicRL::DumpMetrics()
 // Returns:
 //    true if this tree can be a CSE candidate
 //
-bool CSE_HeuristicRL::ConsiderTree(GenTree* tree, bool isReturn)
+bool CSE_HeuristicParameterized::ConsiderTree(GenTree* tree, bool isReturn)
 {
     return CanConsiderTree(tree, isReturn);
 }
@@ -2502,7 +2369,7 @@ bool CSE_HeuristicRL::ConsiderTree(GenTree* tree, bool isReturn)
 //    Used to estimate where the temp introduced by a CSE would rank compared
 //    to other locals in the method, as they compete for registers.
 //
-void CSE_HeuristicRL::CaptureLocalWeights()
+void CSE_HeuristicParameterized::CaptureLocalWeights()
 {
     JITDUMP("Local weight table...\n");
     CompAllocator allocator = m_pCompiler->getAllocator(CMK_SSA);
@@ -2513,158 +2380,61 @@ void CSE_HeuristicRL::CaptureLocalWeights()
         LclVarDsc* const varDsc = m_pCompiler->lvaGetDescByTrackedIndex(trackedIndex);
 
         // Locals with no references aren't enregistered
+        //
         if (varDsc->lvRefCnt() == 0)
         {
             continue;
         }
 
         // Some LclVars always have stack homes
+        //
         if (varDsc->lvDoNotEnregister)
         {
             continue;
         }
 
+        // Only consider for integral types
+        //
+        if (varTypeIsFloating(varDsc->TypeGet()) || varTypeIsMask(varDsc->TypeGet()))
+        {
+            continue;
+        }
+
         JITDUMP("V%02u," FMT_WT "\n", m_pCompiler->lvaGetLclNum(varDsc), varDsc->lvRefCntWtd());
         m_localWeights->push_back(varDsc->lvRefCntWtd() / BB_UNITY_WEIGHT);
     }
 }
 
 //------------------------------------------------------------------------
-// ConsiderCandidates: examine candidates and perform CSEs.
+// GreedyPolicy: use a greedy policy
 //
-void CSE_HeuristicRL::ConsiderCandidates()
+// Notes:
+//   This always performs the most-preferred choice, using lower candidate number
+//   as a tie-breaker.
+//
+void CSE_HeuristicParameterized::GreedyPolicy()
 {
-    const int numCandidates = m_pCompiler->optCSECandidateCount;
-    sortTab                 = new (m_pCompiler, CMK_CSE) CSEdsc*[numCandidates];
-    sortSiz                 = numCandidates * sizeof(*sortTab);
-    memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz);
-
-    // Capture distribution of enregisterable local var weights.
-    //
-    CaptureLocalWeights();
-
-    if (m_updateParameters)
-    {
-        UpdateParameters();
-        return;
-    }
-
-    if (m_greedy)
-    {
-        GreedyPolicy();
-        return;
-    }
-
-    SoftmaxPolicy();
-}
-
-//------------------------------------------------------------------------
-// GreedyPolicy: use a greedy policy
-//
-// Notes:
-//   This always performs the most-preferred choice, using lower candidate number
-//   as a tie-breaker.
-//
-void CSE_HeuristicRL::GreedyPolicy()
-{
-    if (m_verbose)
-    {
-        printf("RL using greedy policy\n");
-    }
+    RLDUMP("RL using greedy policy\n");
 
     // Number of choices is num candidates + 1, since
     // early stopping is also a choice.
     //
     const int          numCandidates = m_pCompiler->optCSECandidateCount;
     ArrayStack<Choice> choices(m_pCompiler->getAllocator(CMK_CSE), numCandidates + 1);
+    unsigned           numUnmarked       = m_pCompiler->optCSEunmarks;
+    bool               recomputeFeatures = true;
 
     while (true)
     {
-        Choice&       choice = ChooseGreedy(choices);
+        Choice&       choice = ChooseGreedy(choices, recomputeFeatures);
         CSEdsc* const dsc    = choice.m_dsc;
 
-        if (dsc == nullptr)
-        {
-            m_likelihoods->push_back(choice.m_softmax);
-            break;
-        }
-
-        // purge this CSE from sortTab so we won't choose it again
-        //
-        assert(sortTab[dsc->csdIndex - 1] == dsc);
-        sortTab[dsc->csdIndex - 1] = nullptr;
-
-        // ChooseCSE should only choose viable options
-        //
-        assert(dsc->IsViable());
-
-        CSE_Candidate candidate(this, dsc);
-
-        if (m_verbose)
-        {
-            printf("\nRL attempting " FMT_CSE "\n", candidate.CseIndex());
-        }
-
-        JITDUMP("CSE Expression : \n");
-        JITDUMPEXEC(m_pCompiler->gtDispTree(candidate.Expr()));
-        JITDUMP("\n");
-
-        PerformCSE(&candidate);
-        madeChanges = true;
+#ifdef DEBUG
         m_likelihoods->push_back(choice.m_softmax);
-    }
-
-    return;
-}
-
-//------------------------------------------------------------------------
-// SoftmaxPolicy: use a randomized softmax policy
-//
-// Notes:
-//   This converts preferences to likelihoods using softmax, and then
-//   randomly selects a candidate proportional to its likelihood.
-//
-void CSE_HeuristicRL::SoftmaxPolicy()
-{
-    if (m_verbose)
-    {
-        printf("RL using softmax policy\n");
-    }
-
-    // Number of choices is num candidates + 1, since
-    // early stopping is also a choice.
-    //
-    const int          numCandidates = m_pCompiler->optCSECandidateCount;
-    ArrayStack<Choice> choices(m_pCompiler->getAllocator(CMK_CSE), numCandidates + 1);
-    bool               first = true;
-
-    while (true)
-    {
-        Choice& choice = ChooseSoftmax(choices);
-
-        if (first)
-        {
-            for (int i = 0; i < choices.Height(); i++)
-            {
-                Choice& option = choices.TopRef(i);
-                if (option.m_dsc == nullptr)
-                {
-                    m_baseLikelihoods->push_back(0);
-                }
-                else
-                {
-                    m_baseLikelihoods->push_back(option.m_dsc->csdIndex);
-                }
-                m_baseLikelihoods->push_back(option.m_softmax);
-            }
-            first = false;
-        }
-
-        CSEdsc* const dsc = choice.m_dsc;
+#endif
 
         if (dsc == nullptr)
         {
-            m_likelihoods->push_back(choice.m_softmax);
             break;
         }
 
@@ -2689,8 +2459,16 @@ void CSE_HeuristicRL::SoftmaxPolicy()
         JITDUMP("\n");
 
         PerformCSE(&candidate);
-        madeChanges = true;
-        m_likelihoods->push_back(choice.m_softmax);
+        madeChanges        = true;
+        choice.m_performed = true;
+
+        // If performing this CSE impacted other CSEs, we need to
+        // recompute all cse features.
+        //
+        unsigned newNumUnmarked = m_pCompiler->optCSEunmarks;
+        assert(newNumUnmarked >= numUnmarked);
+        recomputeFeatures = (numUnmarked != newNumUnmarked);
+        numUnmarked       = newNumUnmarked;
     }
 
     return;
@@ -2723,8 +2501,19 @@ void CSE_HeuristicRL::SoftmaxPolicy()
 //   14. cse is marked GTF_MAKE_CSE (0/1)
 //   15. cse num distinct locals
 //   16. cse num local occurrences
+//   17. cse has call (0/1)
+//   18. log (cse use count weighted * costEx)
+//   19. log (cse use count weighted * num local occurrences)
+//   20. cse "distance" (max postorder num - min postorder num) / num BBs
+//   21. cse is "containable" (0/1)
+//   22. cse is cheap & containable (0/1)
+//   23. is live across call in possible LSRA ordering (0/1)
+//
+//   -----
 //
-void CSE_HeuristicRL::GetFeatures(CSEdsc* cse, double* features)
+//   24. log (pressure estimate weight)
+//
+void CSE_HeuristicParameterized::GetFeatures(CSEdsc* cse, double* features)
 {
     for (int i = 0; i < numParameters; i++)
     {
@@ -2737,20 +2526,13 @@ void CSE_HeuristicRL::GetFeatures(CSEdsc* cse, double* features)
         return;
     }
 
-    const unsigned char costEx = cse->csdTree->GetCostEx();
+    const unsigned char costEx       = cse->csdTree->GetCostEx();
+    const double        deMinimis    = 1e-3;
+    const double        deMinimusAdj = -log(deMinimis);
 
     features[0] = costEx;
-
-    if (cse->csdUseWtCnt > 0)
-    {
-        features[1] = log(cse->csdUseWtCnt);
-    }
-
-    if (cse->csdDefWtCnt > 0)
-    {
-        features[2] = log(cse->csdDefWtCnt);
-    }
-
+    features[1] = deMinimusAdj + log(max(deMinimis, cse->csdUseWtCnt));
+    features[2] = deMinimusAdj + log(max(deMinimis, cse->csdDefWtCnt));
     features[3] = cse->csdTree->GetCostSz();
     features[4] = cse->csdUseCount;
     features[5] = cse->csdDefCount;
@@ -2770,6 +2552,7 @@ void CSE_HeuristicRL::GetFeatures(CSEdsc* cse, double* features)
     features[9] = booleanScale * isSharedConstant;
 
     const bool isMinCost = (costEx == Compiler::MIN_CSE_COST);
+    const bool isLowCost = (costEx <= Compiler::MIN_CSE_COST + 1);
 
     features[10] = booleanScale * isMinCost;
 
@@ -2780,18 +2563,69 @@ void CSE_HeuristicRL::GetFeatures(CSEdsc* cse, double* features)
     features[13] = booleanScale * (isMinCost & isLiveAcrossCall);
 
     // Is any CSE tree for this candidate marked GTF_MAKE_CSE (hoisting)
+    // Also gather data for "distance" metric.
     //
-    bool isMakeCse = false;
-    for (treeStmtLst* treeList = cse->csdTreeList; treeList != nullptr && !isMakeCse; treeList = treeList->tslNext)
+    const unsigned numBBs            = m_pCompiler->fgBBcount;
+    bool           isMakeCse         = false;
+    unsigned       minPostorderNum   = numBBs;
+    unsigned       maxPostorderNum   = 0;
+    BasicBlock*    minPostorderBlock = nullptr;
+    BasicBlock*    maxPostorderBlock = nullptr;
+    for (treeStmtLst* treeList = cse->csdTreeList; treeList != nullptr; treeList = treeList->tslNext)
     {
-        isMakeCse = ((treeList->tslTree->gtFlags & GTF_MAKE_CSE) != 0);
+        BasicBlock* const treeBlock    = treeList->tslBlock;
+        unsigned          postorderNum = treeBlock->bbPostorderNum;
+        if (postorderNum < minPostorderNum)
+        {
+            minPostorderNum   = postorderNum;
+            minPostorderBlock = treeBlock;
+        }
+
+        if (postorderNum > maxPostorderNum)
+        {
+            maxPostorderNum   = postorderNum;
+            maxPostorderBlock = treeBlock;
+        }
+
+        isMakeCse |= ((treeList->tslTree->gtFlags & GTF_MAKE_CSE) != 0);
     }
+    const unsigned blockSpread = maxPostorderNum - minPostorderNum;
+
     features[14] = booleanScale * isMakeCse;
 
     // Locals data
     //
     features[15] = cse->numDistinctLocals;
     features[16] = cse->numLocalOccurrences;
+
+    // More
+    //
+    features[17] = booleanScale * ((cse->csdTree->gtFlags & GTF_CALL) != 0);
+    features[18] = deMinimusAdj + log(max(deMinimis, cse->csdUseCount * cse->csdUseWtCnt));
+    features[19] = deMinimusAdj + log(max(deMinimis, cse->numLocalOccurrences * cse->csdUseWtCnt));
+    features[20] = booleanScale * ((double)(blockSpread) / numBBs);
+
+    const bool isContainable = cse->csdTree->OperIs(GT_ADD, GT_NOT, GT_MUL, GT_LSH);
+    features[21]             = booleanScale * isContainable;
+    features[22]             = booleanScale * (isContainable && isLowCost);
+
+    // LSRA "is live across call"
+    //
+    bool isLiveAcrossCallLSRA = isLiveAcrossCall;
+    if (!isLiveAcrossCallLSRA)
+    {
+        unsigned count = 0;
+        for (BasicBlock* block                                                            = minPostorderBlock;
+             block != nullptr && block != maxPostorderBlock && count < blockSpread; block = block->Next(), count++)
+        {
+            if (block->HasFlag(BBF_HAS_CALL))
+            {
+                isLiveAcrossCallLSRA = true;
+                break;
+            }
+        }
+    }
+    features[23] = booleanScale * isLiveAcrossCallLSRA;
 }
 
 //------------------------------------------------------------------------
@@ -2804,12 +2638,12 @@ void CSE_HeuristicRL::GetFeatures(CSEdsc* cse, double* features)
 //
 // Stopping features
 //
-//   17. int register pressure weight estimate (log)
+//   24. int register pressure weight estimate (log)
 //
 // All boolean features are scaled up by booleanScale so their
 // numeric range is similar to the non-boolean features
 //
-void CSE_HeuristicRL::GetStoppingFeatures(double* features)
+void CSE_HeuristicParameterized::GetStoppingFeatures(double* features)
 {
     // Estimate the (log) weight at which a new CSE would cause a spill
     // if m_registerPressure registers were initially available.
@@ -2819,8 +2653,9 @@ void CSE_HeuristicRL::GetStoppingFeatures(double* features)
     //  "remove" weight per local use occurrences * weightUses
     //  "add" weight of the CSE temp times * (weigh defs*2) + weightUses
     //
-    double minWeight     = 0.01;
-    double spillAtWeight = minWeight;
+    const double deMinimis     = 1e-3;
+    double       spillAtWeight = deMinimis;
+    const double deMinimusAdj  = -log(deMinimis);
 
     // Assume each already performed cse is occupying a registger
     //
@@ -2845,44 +2680,54 @@ void CSE_HeuristicRL::GetStoppingFeatures(double* features)
     // Large frame...?
     //  todo: scan all vars, not just tracked?
     //
-    features[17] = log(max(spillAtWeight, minWeight));
+
+    features[24] = deMinimusAdj + log(max(deMinimis, spillAtWeight));
 }
 
 //------------------------------------------------------------------------
-// DumpFeatures: dump feature values for a CSE candidate
+// Preference: determine a preference score for this CSE
 //
 // Arguments:
-//    dsc - cse descriptor
-//    features - feature vector for that candidate
-//
-// Notes:
-//    Dumps a comma separated row of data, prefixed by method index.
+//    cse - cse descriptor, or nullptr for the option to stop doing CSEs.
 //
-void CSE_HeuristicRL::DumpFeatures(CSEdsc* dsc, double* features)
+double CSE_HeuristicParameterized::Preference(CSEdsc* cse)
 {
-    printf("features,%d," FMT_CSE, m_pCompiler->info.compMethodSuperPMIIndex, dsc == nullptr ? 0 : dsc->csdIndex);
+    double features[numParameters];
+    GetFeatures(cse, features);
+
+#ifdef DEBUG
+    if (JitConfig.JitRLCSECandidateFeatures() > 0)
+    {
+        DumpFeatures(cse, features);
+    }
+#endif
+
+    double preference = 0;
     for (int i = 0; i < numParameters; i++)
     {
-        printf(",%f", features[i]);
+        preference += features[i] * m_parameters[i];
     }
-    printf("\n");
+
+    return preference;
 }
 
 //------------------------------------------------------------------------
-// Preference: determine a preference score for this CSE
+// StoppingPreference: determine a preference score for this stopping CSE
 //
 // Arguments:
-//    cse - cse descriptor, or nullptr for the option to stop doing CSEs.
+//    regAvail - number of registers threshold
 //
-double CSE_HeuristicRL::Preference(CSEdsc* cse)
+double CSE_HeuristicParameterized::StoppingPreference()
 {
     double features[numParameters];
-    GetFeatures(cse, features);
+    GetFeatures(nullptr, features);
 
+#ifdef DEBUG
     if (JitConfig.JitRLCSECandidateFeatures() > 0)
     {
-        DumpFeatures(cse, features);
+        DumpFeatures(nullptr, features);
     }
+#endif
 
     double preference = 0;
     for (int i = 0; i < numParameters; i++)
@@ -2894,88 +2739,573 @@ double CSE_HeuristicRL::Preference(CSEdsc* cse)
 }
 
 //------------------------------------------------------------------------
-// StoppingPreference: determine a preference score for this stopping CSE
-//
-// Arguments:
-//    regAvail - number of registers threshold
+// ChooseGreedy: examine candidates and choose the next CSE to perform
+//   via greedy policy
+//
+// Arguments:
+//   choices -- array of choices, possibly already filled in
+//   recompute -- if true, rebuild the choice array from scratch
+//
+// Returns:
+//   Choice of CSE to perform
+//
+// Notes:
+//   Picks the most-preferred candidate.
+//   If there is a tie, picks stop, or the lowest cse index.
+//
+CSE_HeuristicParameterized::Choice& CSE_HeuristicParameterized::ChooseGreedy(ArrayStack<Choice>& choices,
+                                                                             bool                recompute)
+{
+    if (recompute)
+    {
+        choices.Reset();
+        BuildChoices(choices);
+    }
+    else
+    {
+        // Always recompute the stopping preference as this
+        // reflects ambient state after each CSE.
+        //
+        // By convention, this is at TopRef(0).
+        //
+        Choice& stopping = choices.TopRef(0);
+        assert(stopping.m_dsc == nullptr);
+        stopping.m_preference = StoppingPreference();
+    }
+
+    // Find the maximally preferred case.
+    //
+    int choiceNum = 0;
+
+    for (int i = 1; i < choices.Height(); i++)
+    {
+        const Choice& choice = choices.TopRef(i);
+
+        if (choice.m_performed == true)
+        {
+            continue;
+        }
+
+        const Choice& bestChoice = choices.TopRef(choiceNum);
+
+        const double delta = choice.m_preference - bestChoice.m_preference;
+
+        bool update = false;
+
+        if (delta > 0)
+        {
+            update = true;
+        }
+        else if (delta == 0)
+        {
+            if (choice.m_dsc == nullptr)
+            {
+                update = true;
+            }
+            else if ((bestChoice.m_dsc != nullptr) && (choice.m_dsc->csdIndex < bestChoice.m_dsc->csdIndex))
+            {
+                update = true;
+            }
+        }
+
+        if (update)
+        {
+            choiceNum = i;
+        }
+    }
+
+    RLDUMP("Greedy candidate evaluation\n");
+    RLDUMPEXEC(DumpChoices(choices, choiceNum));
+
+    return choices.TopRef(choiceNum);
+}
+
+//------------------------------------------------------------------------
+// BuildChoices: fill in the choices currently available
+//
+//   choices - array of choices to be filled in
+//
+// Notes:
+//    Also computes the preference for each choice.
+//
+void CSE_HeuristicParameterized::BuildChoices(ArrayStack<Choice>& choices)
+{
+    JITDUMP("Building choice array...\n");
+
+    for (unsigned i = 0; i < m_pCompiler->optCSECandidateCount; i++)
+    {
+        CSEdsc* const dsc = sortTab[i];
+        if ((dsc == nullptr) || !dsc->IsViable())
+        {
+            // already did this cse,
+            // or the cse is not viable
+            continue;
+        }
+
+        double preference = Preference(dsc);
+        choices.Emplace(dsc, preference);
+    }
+
+    // Doing nothing is also an option.
+    //
+    const double stoppingPreference = StoppingPreference();
+    choices.Emplace(nullptr, stoppingPreference);
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// Announce: describe heuristic in jit dump
+//
+void CSE_HeuristicParameterized::Announce()
+{
+    JITDUMP("%s parameters ", Name());
+    for (int i = 0; i < numParameters; i++)
+    {
+        JITDUMP("%s%f", (i == 0) ? "" : ",", m_parameters[i]);
+    }
+    JITDUMP("\n");
+}
+
+//------------------------------------------------------------------------
+// DumpMetrics: dump post-CSE metrics
+//
+void CSE_HeuristicParameterized::DumpMetrics()
+{
+    CSE_HeuristicCommon::DumpMetrics();
+
+    // Show the parameters used.
+    //
+    printf(" params ");
+    for (int i = 0; i < numParameters; i++)
+    {
+        printf("%s%f", (i == 0) ? "" : ",", m_parameters[i]);
+    }
+}
+
+//------------------------------------------------------------------------
+// DumpFeatures: dump feature values for a CSE candidate
+//
+// Arguments:
+//    dsc - cse descriptor
+//    features - feature vector for that candidate
+//
+// Notes:
+//    Dumps a comma separated row of data, prefixed by method index.
+//
+void CSE_HeuristicParameterized::DumpFeatures(CSEdsc* dsc, double* features)
+{
+    printf("features,%d," FMT_CSE, m_pCompiler->info.compMethodSuperPMIIndex, dsc == nullptr ? 0 : dsc->csdIndex);
+    for (int i = 0; i < numParameters; i++)
+    {
+        printf(",%f", features[i]);
+    }
+    printf("\n");
+}
+
+//------------------------------------------------------------------------
+// DumpChoices: dump out information on current choices
+//
+// Arguments:
+//   choices - array of choices
+//   highlight - highlight this choice
+//
+void CSE_HeuristicParameterized::DumpChoices(ArrayStack<Choice>& choices, int highlight)
+{
+    for (int i = 0; i < choices.Height(); i++)
+    {
+        const Choice& choice = choices.TopRef(i);
+
+        if (choice.m_performed == true)
+        {
+            continue;
+        }
+
+        CSEdsc* const cse = choice.m_dsc;
+        const char*   msg = (i == highlight) ? "=>" : "  ";
+        if (cse != nullptr)
+        {
+            printf("%s%2d: " FMT_CSE " preference %10.7f likelihood %10.7f\n", msg, i, cse->csdIndex,
+                   choice.m_preference, choice.m_softmax);
+        }
+        else
+        {
+            printf("%s%2d: QUIT    preference %10.7f likelihood %10.7f\n", msg, i, choice.m_preference,
+                   choice.m_softmax);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// DumpChoices: dump out information on current choices
+//
+// Arguments:
+//   choices - array of choices
+//   highlight - highlight this choice
+//
+void CSE_HeuristicParameterized::DumpChoices(ArrayStack<Choice>& choices, CSEdsc* highlight)
+{
+    for (int i = 0; i < choices.Height(); i++)
+    {
+        const Choice& choice = choices.TopRef(i);
+
+        if (choice.m_performed == true)
+        {
+            continue;
+        }
+
+        CSEdsc* const cse = choice.m_dsc;
+        const char*   msg = (cse == highlight) ? "=>" : "  ";
+        if (cse != nullptr)
+        {
+            printf("%s%2d: " FMT_CSE " preference %10.7f likelihood %10.7f\n", msg, i, cse->csdIndex,
+                   choice.m_preference, choice.m_softmax);
+        }
+        else
+        {
+            printf("%s%2d: QUIT    preference %10.7f likelihood %10.7f\n", msg, i, choice.m_preference,
+                   choice.m_softmax);
+        }
+    }
+}
+
+#endif // DEBUG
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// CSE_HeuristicRL: construct RL CSE heuristic
+//
+// Arguments;
+//  pCompiler - compiler instance
+//
+// Notes:
+//  This creates the RL CSE heuristic, selected when JitRLCSE is set.
+//  It has 3 modes of operation:
+//
+//  (1) Stochastic (default) softmax policy, governed by a parameter vector.
+//      * JitRLCSE specifies the initial parameter values.
+//        Missing values default to zero, extra values are ignored.
+//      * JitRandomCSE can be used to supply salt for the RNG.
+//  (2) Update: replay a sequence with known rewards, and compute updated
+//      parameters based on stochastic gradient ascent
+//      * JitReplayCSE specifies the sequence
+//      * JitReplayCSEReward the rewards per step (actor-critic style)
+//  (3) Greedy:
+//      Enable via JitRLCSEGreedy=1.
+//      Uses parameters from JitRLCSE to drive a deterministic greedy policy
+//
+CSE_HeuristicRL::CSE_HeuristicRL(Compiler* pCompiler)
+    : CSE_HeuristicParameterized(pCompiler)
+    , m_alpha(0.0)
+    , m_updateParameters(false)
+    , m_greedy(false)
+{
+    // Set up the random state
+    //
+    m_cseRNG.Init(m_pCompiler->info.compMethodHash() ^ JitConfig.JitRandomCSE());
+
+    // Parameters
+    //
+    ConfigDoubleArray initialParameters;
+    initialParameters.EnsureInit(JitConfig.JitRLCSE());
+    const unsigned initialParamLength = initialParameters.GetLength();
+
+    for (unsigned i = 0; (i < initialParamLength) && (i < numParameters); i++)
+    {
+        m_parameters[i] = initialParameters.GetData()[i];
+    }
+
+    if (numParameters > initialParamLength)
+    {
+        JITDUMP("Too few parameters (expected %d), trailing will be zero\n", numParameters);
+        for (unsigned i = initialParamLength; i < numParameters; i++)
+        {
+            m_parameters[i] = 0;
+        }
+    }
+    else if (numParameters < initialParamLength)
+    {
+        JITDUMP("Too many parameters (expected %d), trailing will be ignored\n", numParameters);
+    }
+
+    // Policy sub-behavior: explore / update / greedy
+    //
+    // We may be given a prior sequence and perf score to use to
+    // update the parameters .... if so, we will replay same sequence of CSEs
+    // (like the replay policy) and update the parameters via the policy
+    // gradient algorithm.
+    //
+    // For updates:
+    //
+    // m_alpha controls the "step size" or learning rate; when we want to adjust
+    // the parameters we only partially move them towards the gradient indicated values.
+    //
+    // m_rewards describes the reward associated with each step.
+    //
+    // This "two-pass" technique (first run the current policy and, obtain the perf score
+    // and CSE sequence, then rerun with the same sequence and update the policy
+    // parameters) ensures all the policy model logic is within the
+    // JIT, so the preference computation and its gradient can be kept in sync.
+    //
+    if ((JitConfig.JitReplayCSE() != nullptr) && (JitConfig.JitReplayCSEReward() != nullptr))
+    {
+        m_updateParameters = true;
+
+        // Reward
+        //
+        ConfigDoubleArray rewards;
+        rewards.EnsureInit(JitConfig.JitReplayCSEReward());
+        const unsigned rewardsLength = rewards.GetLength();
+
+        for (unsigned i = 0; (i < rewardsLength) && (i < maxSteps); i++)
+        {
+            m_rewards[i] = rewards.GetData()[i];
+        }
+
+        for (unsigned i = rewardsLength; i < maxSteps; i++)
+        {
+            m_rewards[i] = 0;
+        }
+
+        // Alpha
+        //
+        if (JitConfig.JitRLCSEAlpha() != nullptr)
+        {
+            ConfigDoubleArray JitRLCSEAlphaArray;
+            JitRLCSEAlphaArray.EnsureInit(JitConfig.JitRLCSEAlpha());
+            m_alpha = JitRLCSEAlphaArray.GetData()[0];
+        }
+        else
+        {
+            m_alpha = 0.001;
+        }
+    }
+    else if (JitConfig.JitRLCSEGreedy() > 0)
+    {
+        m_greedy = true;
+    }
+
+    CompAllocator allocator = m_pCompiler->getAllocator(CMK_CSE);
+    m_baseLikelihoods       = new (allocator) jitstd::vector<double>(allocator);
+    m_features              = new (allocator) jitstd::vector<char*>(allocator);
+}
+
+//------------------------------------------------------------------------
+// Name: name this jit heuristic
+//
+// Returns:
+//   descriptive name string
+//
+const char* CSE_HeuristicRL::Name() const
+{
+    if (m_updateParameters)
+    {
+        return "RL Policy Gradient Update";
+    }
+    else
+    {
+        return "RL Policy Gradient Stochastic";
+    }
+}
+
+//------------------------------------------------------------------------
+// Announce: describe heuristic in jit dump
+//
+void CSE_HeuristicRL::Announce()
+{
+    JITDUMP("%s salt %d parameters ", Name(), JitConfig.JitRandomCSE());
+    for (int i = 0; i < numParameters; i++)
+    {
+        JITDUMP("%s%f", (i == 0) ? "" : ",", m_parameters[i]);
+    }
+    JITDUMP("\n");
+
+    if (m_updateParameters)
+    {
+        JITDUMP("Operating in update mode with sequence %ls, rewards %ls, and alpha %f\n", JitConfig.JitReplayCSE(),
+                JitConfig.JitReplayCSEReward(), m_alpha);
+    }
+}
+
+//------------------------------------------------------------------------
+// DumpMetrics: dump post-CSE metrics
+//
+void CSE_HeuristicRL::DumpMetrics()
+{
+    CSE_HeuristicParameterized::DumpMetrics();
+
+    if (m_updateParameters)
+    {
+        // For update, dump the new parameter values
+        //
+        printf(" updatedparams ");
+        for (int i = 0; i < numParameters; i++)
+        {
+            printf("%s%f", (i == 0) ? "" : ",", m_parameters[i]);
+        }
+
+        if (JitConfig.JitRLCSECandidateFeatures() > 0)
+        {
+            bool first = true;
+            printf(", features ");
+            for (char* f : *m_features)
+            {
+                printf("%s%s", first ? "" : ",", f);
+                first = false;
+            }
+        }
+    }
+    else if (m_greedy)
+    {
+        // handled by base class
+    }
+    else
+    {
+        // For evaluation, dump likelihood of the choices made
+        //
+        printf(" likelihoods ");
+        bool first = true;
+        for (double d : *m_likelihoods)
+        {
+            printf("%s%.3f", first ? "" : ",", d);
+            first = false;
+        }
+
+        // For evaluation, dump initial likelihood each choice
+        //
+        printf(" baseLikelihoods ");
+        first = true;
+        for (double d : *m_baseLikelihoods)
+        {
+            printf("%s%.3f", first ? "" : ",", d);
+            first = false;
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// ConsiderTree: check if this tree can be a CSE candidate
+//
+// Arguments:
+//   tree - tree in question
+//   isReturn - true if tree is part of a return statement
+//
+// Returns:
+//    true if this tree can be a CSE candidate
+//
+bool CSE_HeuristicRL::ConsiderTree(GenTree* tree, bool isReturn)
+{
+    return CanConsiderTree(tree, isReturn);
+}
+
+//------------------------------------------------------------------------
+// ConsiderCandidates: examine candidates and perform CSEs.
 //
-double CSE_HeuristicRL::StoppingPreference()
+void CSE_HeuristicRL::ConsiderCandidates()
 {
-    double features[numParameters];
-    GetFeatures(nullptr, features);
+    const int numCandidates = m_pCompiler->optCSECandidateCount;
+    sortTab                 = new (m_pCompiler, CMK_CSE) CSEdsc*[numCandidates];
+    sortSiz                 = numCandidates * sizeof(*sortTab);
+    memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz);
 
-    if (JitConfig.JitRLCSECandidateFeatures() > 0)
+    // Capture distribution of enregisterable local var weights.
+    //
+    CaptureLocalWeights();
+
+    if (m_updateParameters)
     {
-        DumpFeatures(nullptr, features);
+        UpdateParameters();
+        return;
     }
-
-    double preference = 0;
-    for (int i = 0; i < numParameters; i++)
+    else if (m_greedy)
     {
-        preference += features[i] * m_parameters[i];
+        GreedyPolicy();
+        return;
+    }
+    else
+    {
+        SoftmaxPolicy();
     }
-
-    return preference;
 }
 
 //------------------------------------------------------------------------
-// ChooseGreedy: examine candidates and choose the next CSE to perform
-//   via greedy policy
-//
-// Returns:
-//   Choice of CSE to perform
+// SoftmaxPolicy: use a randomized softmax policy
 //
 // Notes:
-//   Picks the most-preferred candidate.
-//   If there is a tie, picks stop, or the lowest cse index.
+//   This converts preferences to likelihoods using softmax, and then
+//   randomly selects a candidate proportional to its likelihood.
 //
-CSE_HeuristicRL::Choice& CSE_HeuristicRL::ChooseGreedy(ArrayStack<Choice>& choices)
+void CSE_HeuristicRL::SoftmaxPolicy()
 {
-    choices.Reset();
-    BuildChoices(choices);
+    if (m_verbose)
+    {
+        printf("RL using softmax policy\n");
+    }
 
-    // Find the maximally preferred case.
+    // Number of choices is num candidates + 1, since
+    // early stopping is also a choice.
     //
-    Choice& bestChoice = choices.TopRef(0);
-    int     choiceNum  = 0;
+    const int          numCandidates = m_pCompiler->optCSECandidateCount;
+    ArrayStack<Choice> choices(m_pCompiler->getAllocator(CMK_CSE), numCandidates + 1);
+    bool               first = true;
 
-    for (int i = 1; i < choices.Height(); i++)
+    while (true)
     {
-        Choice&      choice = choices.TopRef(i);
-        const double delta  = choice.m_preference - bestChoice.m_preference;
-
-        bool update = false;
+        Choice& choice = ChooseSoftmax(choices);
 
-        if (delta > 0)
-        {
-            update = true;
-        }
-        else if (delta == 0)
+        if (first)
         {
-            if (choice.m_dsc == nullptr)
-            {
-                update = true;
-            }
-            else if ((bestChoice.m_dsc != nullptr) && (choice.m_dsc->csdIndex < bestChoice.m_dsc->csdIndex))
+            for (int i = 0; i < choices.Height(); i++)
             {
-                update = true;
+                Choice& option = choices.TopRef(i);
+                if (option.m_dsc == nullptr)
+                {
+                    m_baseLikelihoods->push_back(0);
+                }
+                else
+                {
+                    m_baseLikelihoods->push_back(option.m_dsc->csdIndex);
+                }
+                m_baseLikelihoods->push_back(option.m_softmax);
             }
+            first = false;
         }
 
-        if (update)
+        CSEdsc* const dsc = choice.m_dsc;
+
+        if (dsc == nullptr)
         {
-            bestChoice = choice;
-            choiceNum  = i;
+            m_likelihoods->push_back(choice.m_softmax);
+            break;
         }
-    }
 
-    if (m_verbose)
-    {
-        printf("Greedy candidate evaluation\n");
-        DumpChoices(choices, choiceNum);
+        // purge this CSE from sortTab so we won't choose it again
+        //
+        assert(sortTab[dsc->csdIndex - 1] == dsc);
+        sortTab[dsc->csdIndex - 1] = nullptr;
+
+        // ChooseCSE should only choose viable options
+        //
+        assert(dsc->IsViable());
+
+        CSE_Candidate candidate(this, dsc);
+
+        if (m_verbose)
+        {
+            printf("\nRL attempting " FMT_CSE "\n", candidate.CseIndex());
+        }
+
+        JITDUMP("CSE Expression : \n");
+        JITDUMPEXEC(m_pCompiler->gtDispTree(candidate.Expr()));
+        JITDUMP("\n");
+
+        PerformCSE(&candidate);
+        madeChanges = true;
+        m_likelihoods->push_back(choice.m_softmax);
     }
 
-    return bestChoice;
+    return;
 }
 
 //------------------------------------------------------------------------
@@ -3036,36 +3366,6 @@ CSE_HeuristicRL::Choice& CSE_HeuristicRL::ChooseSoftmax(ArrayStack<Choice>& choi
     return choices.TopRef(choiceNum);
 }
 
-//------------------------------------------------------------------------
-// BuildChoices: fill in the choices currently available
-//
-//   choices - array of choices to be filled in
-//
-// Notes:
-//    Also computes the preference for each choice.
-//
-void CSE_HeuristicRL::BuildChoices(ArrayStack<Choice>& choices)
-{
-    for (unsigned i = 0; i < m_pCompiler->optCSECandidateCount; i++)
-    {
-        CSEdsc* const dsc = sortTab[i];
-        if ((dsc == nullptr) || !dsc->IsViable())
-        {
-            // already did this cse,
-            // or the cse is not viable
-            continue;
-        }
-
-        double preference = Preference(dsc);
-        choices.Emplace(dsc, preference);
-    }
-
-    // Doing nothing is also an option.
-    //
-    const double stoppingPreference = StoppingPreference();
-    choices.Emplace(nullptr, stoppingPreference);
-}
-
 //------------------------------------------------------------------------
 // Softmax: fill in likelihoods for each choice vis softmax
 //
@@ -3104,60 +3404,6 @@ void CSE_HeuristicRL::Softmax(ArrayStack<Choice>& choices)
     }
 }
 
-//------------------------------------------------------------------------
-// DumpChoices: dump out information on current choices
-//
-// Arguments:
-//   choices - array of choices
-//   highlight - highlight this choice
-//
-void CSE_HeuristicRL::DumpChoices(ArrayStack<Choice>& choices, int highlight)
-{
-    for (int i = 0; i < choices.Height(); i++)
-    {
-        Choice&       choice = choices.TopRef(i);
-        CSEdsc* const cse    = choice.m_dsc;
-        const char*   msg    = i == highlight ? "=>" : "  ";
-        if (cse != nullptr)
-        {
-            printf("%s%2d: " FMT_CSE " preference %10.7f likelihood %10.7f\n", msg, i, cse->csdIndex,
-                   choice.m_preference, choice.m_softmax);
-        }
-        else
-        {
-            printf("%s%2d: QUIT    preference %10.7f likelihood %10.7f\n", msg, i, choice.m_preference,
-                   choice.m_softmax);
-        }
-    }
-}
-
-//------------------------------------------------------------------------
-// DumpChoices: dump out information on current choices
-//
-// Arguments:
-//   choices - array of choices
-//   highlight - highlight this choice
-//
-void CSE_HeuristicRL::DumpChoices(ArrayStack<Choice>& choices, CSEdsc* highlight)
-{
-    for (int i = 0; i < choices.Height(); i++)
-    {
-        Choice&       choice = choices.TopRef(i);
-        CSEdsc* const cse    = choice.m_dsc;
-        const char*   msg    = cse == highlight ? "=>" : "  ";
-        if (cse != nullptr)
-        {
-            printf("%s%2d: " FMT_CSE " preference %10.7f likelihood %10.7f\n", msg, i, cse->csdIndex,
-                   choice.m_preference, choice.m_softmax);
-        }
-        else
-        {
-            printf("%s%2d: QUIT    preference %10.7f likelihood %10.7f\n", msg, i, choice.m_preference,
-                   choice.m_softmax);
-        }
-    }
-}
-
 //------------------------------------------------------------------------
 // UpdateParameters: Replay an existing CSE sequence with known reward,
 //   and update the model parameters via the policy gradient.
@@ -3172,8 +3418,8 @@ void CSE_HeuristicRL::UpdateParameters()
         return;
     }
 
-    ArrayStack<Choice>    choices(m_pCompiler->getAllocator(CMK_CSE));
-    static ConfigIntArray JitReplayCSEArray;
+    ArrayStack<Choice> choices(m_pCompiler->getAllocator(CMK_CSE));
+    ConfigIntArray     JitReplayCSEArray;
     JitReplayCSEArray.EnsureInit(JitConfig.JitReplayCSE());
 
     // We have an undiscounted reward, so it applies equally
@@ -3423,7 +3669,8 @@ CSE_HeuristicRL::Choice* CSE_HeuristicRL::FindChoice(CSEdsc* dsc, ArrayStack<Cho
 // Notes:
 //  This creates the standard CSE heuristic.
 //
-CSE_Heuristic::CSE_Heuristic(Compiler* pCompiler) : CSE_HeuristicCommon(pCompiler)
+CSE_Heuristic::CSE_Heuristic(Compiler* pCompiler)
+    : CSE_HeuristicCommon(pCompiler)
 {
     aggressiveRefCnt = 0;
     moderateRefCnt   = 0;
@@ -4212,50 +4459,62 @@ bool CSE_HeuristicCommon::IsCompatibleType(var_types cseLclVarTyp, var_types exp
     return false;
 }
 
-// PerformCSE() takes a successful candidate and performs  the appropriate replacements:
+//------------------------------------------------------------------------
+// PerformCSE: takes a successful candidate and performs the appropriate replacements
+//
+// Arguments:
+//    successfulCandidate - cse candidate to perform
 //
-// It will replace all of the CSE defs with assignments to a new "cse0" LclVar
+// It will replace all of the CSE defs with writes to a new "cse0" LclVar
 // and will replace all of the CSE uses with reads of the "cse0" LclVar
 //
 // It will also put cse0 into SSA if there is just one def.
+//
 void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate)
 {
     AdjustHeuristic(successfulCandidate);
+    CSEdsc* const dsc = successfulCandidate->CseDsc();
 
 #ifdef DEBUG
     // Setup the message arg for lvaGrabTemp()
     //
-    const char* grabTempMessage = "CSE - unknown";
+    const char* heuristicTempMessage = "";
 
     if (successfulCandidate->IsAggressive())
     {
-        grabTempMessage = "CSE - aggressive";
+        heuristicTempMessage = ": aggressive";
     }
     else if (successfulCandidate->IsModerate())
     {
-        grabTempMessage = "CSE - moderate";
+        heuristicTempMessage = ": moderate";
     }
     else if (successfulCandidate->IsConservative())
     {
-        grabTempMessage = "CSE - conservative";
+        heuristicTempMessage = ": conservative";
     }
     else if (successfulCandidate->IsStressCSE())
     {
-        grabTempMessage = "CSE - stress mode";
+        heuristicTempMessage = ": stress";
     }
     else if (successfulCandidate->IsRandom())
     {
-        grabTempMessage = "CSE - random";
+        heuristicTempMessage = ": random";
     }
-#endif // DEBUG
 
-    /* Introduce a new temp for the CSE */
+    const char* const grabTempMessage = m_pCompiler->printfAlloc(FMT_CSE "%s", dsc->csdIndex, heuristicTempMessage);
+
+    // Add this candidate to the CSE sequence
+    //
+    m_sequence->push_back(dsc->csdIndex);
+
+#endif // DEBUG
 
-    // we will create a  long lifetime temp for the new CSE LclVar
+    //  Allocate a CSE temp
+    //
     unsigned  cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG(grabTempMessage));
     var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet());
 
-    LclVarDsc* lclDsc = m_pCompiler->lvaGetDesc(cseLclVarNum);
+    LclVarDsc* const lclDsc = m_pCompiler->lvaGetDesc(cseLclVarNum);
     if (cseLclVarTyp == TYP_STRUCT)
     {
         m_pCompiler->lvaSetStruct(cseLclVarNum, successfulCandidate->Expr()->GetLayout(m_pCompiler), false);
@@ -4264,20 +4523,20 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate)
     lclDsc->lvIsCSE = true;
 
     // Record that we created a new LclVar for use as a CSE temp
+    //
     m_addCSEcount++;
     m_pCompiler->optCSEcount++;
+    m_pCompiler->Metrics.CseCount++;
 
-    //  Walk all references to this CSE, adding an assignment
-    //  to the CSE temp to all defs and changing all refs to
+    //  Walk all references to this CSE, adding an store to
+    //  the CSE temp to all defs and changing all refs to
     //  a simple use of the CSE temp.
     //
     //  Later we will unmark any nested CSE's for the CSE uses.
     //
-    CSEdsc* dsc = successfulCandidate->CseDsc();
-    INDEBUG(m_sequence->push_back(dsc->csdIndex));
-
     // If there's just a single def for the CSE, we'll put this
     // CSE into SSA form on the fly. We won't need any PHIs.
+    //
     unsigned      cseSsaNum = SsaConfig::RESERVED_SSA_NUM;
     LclSsaVarDsc* ssaVarDsc = nullptr;
 
@@ -4632,7 +4891,7 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate)
             if (!store->OperIs(GT_STORE_LCL_VAR))
             {
                 // This can only be the case for a struct in which the 'val' was a COMMA, so
-                // the assignment is sunk below it.
+                // the store is sunk below it.
                 store = store->gtEffectiveVal();
                 noway_assert(origStore->OperIs(GT_COMMA) && (origStore == val));
             }
@@ -4699,7 +4958,7 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate)
             /* Create a comma node for the CSE assignment */
             cse           = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, origStore, cseUse);
             cse->gtVNPair = cseUse->gtVNPair; // The comma's value is the same as 'val'
-            // as the assignment to the CSE LclVar
+            // as the store to the CSE LclVar
             // cannot add any new exceptions
         }
 
@@ -4922,12 +5181,10 @@ CSE_HeuristicCommon* Compiler::optGetCSEheuristic()
 
         if (JitConfig.JitRandomCSE() > 0)
         {
-            JITDUMP("Using Random CSE heuristic (JitRandomCSE)\n");
             useRandomHeuristic = true;
         }
         else if (compStressCompile(Compiler::STRESS_MAKE_CSE, MAX_STRESS_WEIGHT))
         {
-            JITDUMP("Using Random CSE heuristic (stress)\n");
             useRandomHeuristic = true;
         }
 
@@ -4951,12 +5208,24 @@ CSE_HeuristicCommon* Compiler::optGetCSEheuristic()
 
 #endif
 
+    // Parameterized (greedy) RL-based heuristic
+    //
+    if (optCSEheuristic == nullptr)
+    {
+        bool useGreedyHeuristic = (JitConfig.JitRLCSEGreedy() > 0);
+
+        if (useGreedyHeuristic)
+        {
+            optCSEheuristic = new (this, CMK_CSE) CSE_HeuristicParameterized(this);
+        }
+    }
+
     if (optCSEheuristic == nullptr)
     {
-        JITDUMP("Using standard CSE heuristic\n");
         optCSEheuristic = new (this, CMK_CSE) CSE_Heuristic(this);
     }
 
+    INDEBUG(optCSEheuristic->Announce());
     return optCSEheuristic;
 }
 
@@ -4979,6 +5248,7 @@ PhaseStatus Compiler::optOptimizeValnumCSEs()
     // Determine which heuristic to use...
     //
     CSE_HeuristicCommon* const heuristic = optGetCSEheuristic();
+    INDEBUG(heuristic->Announce());
 
     optValnumCSE_phase = true;
     optCSEweight       = -1.0f;
@@ -5121,7 +5391,7 @@ bool Compiler::optConfigDisableCSE2()
         {
             if (verbose)
             {
-                printf(" Disabled by jitNoCSE2 > totalCSEcount\n");
+                printf(" Disabled by jitNoCSE2 %d > totalCSEcount %d\n", jitNoCSE2, totalCSEcount);
             }
             return true;
         }
@@ -5167,6 +5437,56 @@ void Compiler::optCleanupCSEs()
     }
 }
 
+//---------------------------------------------------------------------------
+// optSharedConstantCSEEnabled: Returns `true` if shared constant CSE is enabled.
+//
+// Notes: see `optConstantCSEEnabled` for detecting if general constant CSE is enabled.
+//
+// static
+bool Compiler::optSharedConstantCSEEnabled()
+{
+    bool enableSharedConstCSE = false;
+    int  configValue          = JitConfig.JitConstCSE();
+
+    if (configValue == CONST_CSE_ENABLE_ALL)
+    {
+        enableSharedConstCSE = true;
+    }
+#if defined(TARGET_ARMARCH)
+    else if (configValue == CONST_CSE_ENABLE_ARM)
+    {
+        enableSharedConstCSE = true;
+    }
+#endif // TARGET_ARMARCH
+
+    return enableSharedConstCSE;
+}
+
+//---------------------------------------------------------------------------
+// optConstantCSEEnabled: Returns `true` if constant CSE is enabled.
+//
+// Notes: see `optSharedConstantCSEEnabled` for detecting if shared constant CSE is enabled.
+//
+// static
+bool Compiler::optConstantCSEEnabled()
+{
+    bool enableConstCSE = false;
+    int  configValue    = JitConfig.JitConstCSE();
+
+    if ((configValue == CONST_CSE_ENABLE_ALL) || (configValue == CONST_CSE_ENABLE_ALL_NO_SHARING))
+    {
+        enableConstCSE = true;
+    }
+#if defined(TARGET_ARMARCH)
+    else if ((configValue == CONST_CSE_ENABLE_ARM) || (configValue == CONST_CSE_ENABLE_ARM_NO_SHARING))
+    {
+        enableConstCSE = true;
+    }
+#endif
+
+    return enableConstCSE;
+}
+
 #ifdef DEBUG
 
 /*****************************************************************************
diff --git a/src/coreclr/jit/optcse.h b/src/coreclr/jit/optcse.h
index e79871b0783c..b8dd9fae685d 100644
--- a/src/coreclr/jit/optcse.h
+++ b/src/coreclr/jit/optcse.h
@@ -142,58 +142,101 @@ class CSE_HeuristicReplay : public CSE_HeuristicCommon
 #endif
 };
 
-// Reinforcement Learning CSE heuristic
-//
-// Uses a "linear" feature model with
-// softmax policy.
-//
-class CSE_HeuristicRL : public CSE_HeuristicCommon
+#endif // DEBUG
+
+// Parameterized Policy
+
+class CSE_HeuristicParameterized : public CSE_HeuristicCommon
 {
-private:
+protected:
     struct Choice
     {
-        Choice(CSEdsc* dsc, double preference) : m_dsc(dsc), m_preference(preference), m_softmax(0)
+        Choice(CSEdsc* dsc, double preference)
+            : m_dsc(dsc)
+            , m_preference(preference)
+            , m_softmax(0)
+            , m_performed(false)
         {
         }
+
         CSEdsc* m_dsc;
         double  m_preference;
         double  m_softmax;
+        bool    m_performed;
     };
 
     enum
     {
-        numParameters = 19,
+        numParameters = 25,
         booleanScale  = 5,
         maxSteps      = 65, // MAX_CSE_CNT + 1 (for stopping)
     };
 
+    static double           s_defaultParameters[numParameters];
     double                  m_parameters[numParameters];
-    double                  m_alpha;
-    double                  m_rewards[maxSteps];
-    CLRRandom               m_cseRNG;
-    bool                    m_updateParameters;
-    bool                    m_greedy;
-    bool                    m_verbose;
     unsigned                m_registerPressure;
     jitstd::vector<double>* m_localWeights;
+    bool                    m_verbose;
 
+public:
+    CSE_HeuristicParameterized(Compiler*);
+    void ConsiderCandidates();
+    bool ConsiderTree(GenTree* tree, bool isReturn);
     void CaptureLocalWeights();
-    void GetFeatures(CSEdsc* dsc, double* features);
+    void GreedyPolicy();
+
+    void   GetFeatures(CSEdsc* dsc, double* features);
     double Preference(CSEdsc* dsc);
-    void GetStoppingFeatures(double* features);
+    void   GetStoppingFeatures(double* features);
     double StoppingPreference();
+    void   BuildChoices(ArrayStack<Choice>& choices);
+
+    Choice& ChooseGreedy(ArrayStack<Choice>& choices, bool recompute);
+
+    virtual const char* Name() const
+    {
+        return "Parameterized CSE Heuristic";
+    }
+
+#ifdef DEBUG
     void DumpFeatures(CSEdsc* dsc, double* features);
-    Choice& ChooseSoftmax(ArrayStack<Choice>& choices);
-    Choice& ChooseGreedy(ArrayStack<Choice>& choices);
-    void BuildChoices(ArrayStack<Choice>& choices);
-    void Softmax(ArrayStack<Choice>& choices);
     void DumpChoices(ArrayStack<Choice>& choices, int higlight = -1);
     void DumpChoices(ArrayStack<Choice>& choices, CSEdsc* higlight);
-    void UpdateParameters();
-    void GreedyPolicy();
-    void SoftmaxPolicy();
-    void UpdateParametersStep(CSEdsc* dsc, ArrayStack<Choice>& choices, double reward, double* delta);
-    Choice* FindChoice(CSEdsc* dsc, ArrayStack<Choice>& choices);
+    void DumpMetrics();
+    void Announce();
+
+    // Likelihood of each choice made in the sequence
+    jitstd::vector<double>* m_likelihoods;
+    // Likelihood of each action from starting state
+    jitstd::vector<double>* m_baseLikelihoods;
+    // Features of each candidate
+    jitstd::vector<char*>* m_features;
+
+#endif
+};
+
+#ifdef DEBUG
+
+// Reinforcement Learning CSE heuristic
+//
+// Uses a "linear" feature model with
+// softmax policy.
+//
+class CSE_HeuristicRL : public CSE_HeuristicParameterized
+{
+private:
+    double    m_alpha;
+    double    m_rewards[maxSteps];
+    CLRRandom m_cseRNG;
+    bool      m_updateParameters;
+    bool      m_greedy;
+
+    Choice&     ChooseSoftmax(ArrayStack<Choice>& choices);
+    void        Softmax(ArrayStack<Choice>& choices);
+    void        SoftmaxPolicy();
+    void        UpdateParametersStep(CSEdsc* dsc, ArrayStack<Choice>& choices, double reward, double* delta);
+    void        UpdateParameters();
+    Choice*     FindChoice(CSEdsc* dsc, ArrayStack<Choice>& choices);
     const char* Name() const;
 
 public:
@@ -203,11 +246,6 @@ class CSE_HeuristicRL : public CSE_HeuristicCommon
 #ifdef DEBUG
     virtual void DumpMetrics();
     virtual void Announce();
-    // Likelihood of each choice made in the sequence
-    jitstd::vector<double>* m_likelihoods;
-    // Likelihood of each action from starting state
-    jitstd::vector<double>* m_baseLikelihoods;
-    jitstd::vector<char*>*  m_features;
 #endif
 };
 
diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp
index f9616636681b..1e5d5a00b107 100644
--- a/src/coreclr/jit/optimizebools.cpp
+++ b/src/coreclr/jit/optimizebools.cpp
@@ -46,7 +46,7 @@ class OptBoolsDsc
 private:
     BasicBlock* m_b1; // The first basic block with the BBJ_COND conditional jump type
     BasicBlock* m_b2; // The next basic block of m_b1. Either BBJ_COND or BBJ_RETURN type
-    BasicBlock* m_b3; // m_b1->bbTarget. Null if m_b2 is not a return block.
+    BasicBlock* m_b3; // m_b1's target block. Null if m_b2 is not a return block.
 
     Compiler* m_comp; // The pointer to the Compiler instance
 
@@ -74,10 +74,10 @@ class OptBoolsDsc
 
 private:
     Statement* optOptimizeBoolsChkBlkCond();
-    GenTree* optIsBoolComp(OptTestInfo* pOptTest);
-    bool optOptimizeBoolsChkTypeCostCond();
-    void optOptimizeBoolsUpdateTrees();
-    bool FindCompareChain(GenTree* condition, bool* isTestCondition);
+    GenTree*   optIsBoolComp(OptTestInfo* pOptTest);
+    bool       optOptimizeBoolsChkTypeCostCond();
+    void       optOptimizeBoolsUpdateTrees();
+    bool       FindCompareChain(GenTree* condition, bool* isTestCondition);
 };
 
 //-----------------------------------------------------------------------------
@@ -89,7 +89,7 @@ class OptBoolsDsc
 //  Notes:
 //      m_b1 and m_b2 are set on entry.
 //
-//      Case 1: if b1.bbTarget == b2.bbTarget, it transforms
+//      Case 1: if b1->TargetIs(b2->GetTarget()), it transforms
 //          B1 : brtrue(t1, Bx)
 //          B2 : brtrue(t2, Bx)
 //          B3 :
@@ -107,7 +107,7 @@ class OptBoolsDsc
 //              B3: GT_RETURN (BBJ_RETURN)
 //              B4: GT_RETURN (BBJ_RETURN)
 //
-//      Case 2: if B2->FalseTargetIs(B1.bbTarget), it transforms
+//      Case 2: if B2->FalseTargetIs(B1->GetTarget()), it transforms
 //          B1 : brtrue(t1, B3)
 //          B2 : brtrue(t2, Bx)
 //          B3 :
@@ -123,7 +123,7 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock()
 
     m_t3 = nullptr;
 
-    // Check if m_b1 and m_b2 have the same bbTarget
+    // Check if m_b1 and m_b2 have the same target
 
     if (m_b1->TrueTargetIs(m_b2->GetTrueTarget()))
     {
@@ -751,6 +751,8 @@ bool OptBoolsDsc::optOptimizeRangeTests()
     //
     BasicBlock* notInRangeBb = m_b1->GetTrueTarget();
     BasicBlock* inRangeBb;
+    weight_t    inRangeLikelihood = m_b1->GetFalseEdge()->getLikelihood();
+
     if (m_b2->TrueTargetIs(notInRangeBb))
     {
         // Shape 1: both conditions jump to NotInRange
@@ -764,6 +766,7 @@ bool OptBoolsDsc::optOptimizeRangeTests()
         // InRange:
         // ...
         inRangeBb = m_b2->GetFalseTarget();
+        inRangeLikelihood *= m_b2->GetFalseEdge()->getLikelihood();
     }
     else if (m_b2->FalseTargetIs(notInRangeBb))
     {
@@ -778,6 +781,7 @@ bool OptBoolsDsc::optOptimizeRangeTests()
         // NotInRange:
         // ...
         inRangeBb = m_b2->GetTrueTarget();
+        inRangeLikelihood *= m_b2->GetTrueEdge()->getLikelihood();
     }
     else
     {
@@ -808,22 +812,35 @@ bool OptBoolsDsc::optOptimizeRangeTests()
     }
 
     // Re-direct firstBlock to jump to inRangeBb
-    m_comp->fgAddRefPred(inRangeBb, m_b1);
+    FlowEdge* const newEdge      = m_comp->fgAddRefPred(inRangeBb, m_b1);
+    FlowEdge* const oldFalseEdge = m_b1->GetFalseEdge();
+    FlowEdge* const oldTrueEdge  = m_b1->GetTrueEdge();
+
     if (!cmp2IsReversed)
     {
-        m_b1->SetTrueTarget(inRangeBb);
-        m_b1->SetFalseTarget(notInRangeBb);
+        m_b1->SetFalseEdge(oldTrueEdge);
+        m_b1->SetTrueEdge(newEdge);
+        assert(m_b1->TrueTargetIs(inRangeBb));
+        assert(m_b1->FalseTargetIs(notInRangeBb));
+
+        newEdge->setLikelihood(inRangeLikelihood);
+        oldTrueEdge->setLikelihood(1.0 - inRangeLikelihood);
     }
     else
     {
-        m_b1->SetFalseTarget(inRangeBb);
+        m_b1->SetFalseEdge(newEdge);
+        assert(m_b1->TrueTargetIs(notInRangeBb));
+        assert(m_b1->FalseTargetIs(inRangeBb));
+
+        oldTrueEdge->setLikelihood(inRangeLikelihood);
+        newEdge->setLikelihood(1.0 - inRangeLikelihood);
     }
 
     // Remove the 2nd condition block as we no longer need it
-    m_comp->fgRemoveRefPred(m_b2, m_b1);
+    m_comp->fgRemoveRefPred(oldFalseEdge);
     m_comp->fgRemoveBlock(m_b2, true);
 
-    Statement* stmt = m_b1->lastStmt();
+    Statement* const stmt = m_b1->lastStmt();
     m_comp->gtSetStmtInfo(stmt);
     m_comp->fgSetStmtSeq(stmt);
     m_comp->gtUpdateStmtSideEffects(stmt);
@@ -1012,9 +1029,8 @@ bool OptBoolsDsc::optOptimizeCompareChainCondBlock()
     m_comp->fgSetStmtSeq(s2);
 
     // Update the flow.
-    m_comp->fgRemoveRefPred(m_b1->GetTrueTarget(), m_b1);
-    m_b1->SetKindAndTarget(BBJ_ALWAYS, m_b1->GetFalseTarget());
-    m_b1->SetFlags(BBF_NONE_QUIRK);
+    m_comp->fgRemoveRefPred(m_b1->GetTrueEdge());
+    m_b1->SetKindAndTargetEdge(BBJ_ALWAYS, m_b1->GetFalseEdge());
 
     // Fixup flags.
     m_b2->CopyFlags(m_b1, BBF_COPY_PROPAGATE);
@@ -1262,71 +1278,73 @@ void OptBoolsDsc::optOptimizeBoolsUpdateTrees()
         m_comp->fgSetStmtSeq(m_testInfo1.testStmt);
     }
 
-    if (!optReturnBlock)
+    /* Modify the target of the conditional jump and update bbRefs and bbPreds */
+
+    if (optReturnBlock)
     {
-        // Update edges if m_b1: BBJ_COND and m_b2: BBJ_COND
+        assert(m_b1->KindIs(BBJ_COND));
+        assert(m_b2->KindIs(BBJ_RETURN));
+        assert(m_b1->FalseTargetIs(m_b2));
+        assert(m_b3 != nullptr);
+        m_b1->SetKindAndTargetEdge(BBJ_RETURN);
+    }
+    else
+    {
+        // Modify b1, if necessary, so it has the same
+        // true target as b2.
+        //
+        FlowEdge* const origB1TrueEdge  = m_b1->GetTrueEdge();
+        FlowEdge* const origB2TrueEdge  = m_b2->GetTrueEdge();
+        FlowEdge* const origB2FalseEdge = m_b2->GetFalseEdge();
 
-        FlowEdge* edge1 = m_comp->fgGetPredForBlock(m_b1->GetTrueTarget(), m_b1);
-        FlowEdge* edge2;
+        weight_t const origB1TrueLikelihood = origB1TrueEdge->getLikelihood();
+        weight_t       newB1TrueLikelihood  = 0;
 
         if (m_sameTarget)
         {
-            edge2 = m_comp->fgGetPredForBlock(m_b2->GetTrueTarget(), m_b2);
+            // We originally reached B2's true target via
+            // B1 true OR B1 false B2 true.
+            //
+            newB1TrueLikelihood = origB1TrueLikelihood + (1.0 - origB1TrueLikelihood) * origB2TrueEdge->getLikelihood();
         }
         else
         {
-            edge2 = m_comp->fgGetPredForBlock(m_b2->GetFalseTarget(), m_b2);
-
-            m_comp->fgRemoveRefPred(m_b1->GetTrueTarget(), m_b1);
-
-            m_b1->SetTrueTarget(m_b2->GetTrueTarget());
-
-            m_comp->fgAddRefPred(m_b2->GetTrueTarget(), m_b1);
-        }
-
-        assert(edge1 != nullptr);
-        assert(edge2 != nullptr);
+            // We originally reached B2's true target via
+            // B1 false OR B1 true B2 false.
+            //
+            // We will now reach via B1 true.
+            // Modify flow for true side of B1
+            //
+            m_comp->fgRedirectTrueEdge(m_b1, m_b2->GetTrueTarget());
 
-        weight_t edgeSumMin = edge1->edgeWeightMin() + edge2->edgeWeightMin();
-        weight_t edgeSumMax = edge1->edgeWeightMax() + edge2->edgeWeightMax();
-        if ((edgeSumMax >= edge1->edgeWeightMax()) && (edgeSumMax >= edge2->edgeWeightMax()))
-        {
-            edge1->setEdgeWeights(edgeSumMin, edgeSumMax, m_b1->GetTrueTarget());
-        }
-        else
-        {
-            edge1->setEdgeWeights(BB_ZERO_WEIGHT, BB_MAX_WEIGHT, m_b1->GetTrueTarget());
+            newB1TrueLikelihood =
+                (1.0 - origB1TrueLikelihood) + origB1TrueLikelihood * origB2FalseEdge->getLikelihood();
         }
-    }
 
-    /* Modify the target of the conditional jump and update bbRefs and bbPreds */
+        // Fix B1 true edge likelihood and min/max weights
+        //
+        origB1TrueEdge->setLikelihood(newB1TrueLikelihood);
+        weight_t const newB1TrueWeight = m_b1->bbWeight * newB1TrueLikelihood;
+        origB1TrueEdge->setEdgeWeights(newB1TrueWeight, newB1TrueWeight, m_b1->GetTrueTarget());
 
-    if (optReturnBlock)
-    {
-        assert(m_b1->KindIs(BBJ_COND));
-        assert(m_b2->KindIs(BBJ_RETURN));
-        assert(m_b1->FalseTargetIs(m_b2));
-        assert(m_b3 != nullptr);
-        m_b1->SetKindAndTarget(BBJ_RETURN);
-    }
-    else
-    {
         assert(m_b1->KindIs(BBJ_COND));
         assert(m_b2->KindIs(BBJ_COND));
         assert(m_b1->TrueTargetIs(m_b2->GetTrueTarget()));
         assert(m_b1->FalseTargetIs(m_b2));
         assert(!m_b2->IsLast());
-    }
 
-    if (!optReturnBlock)
-    {
-        // Update bbRefs and bbPreds
+        // We now reach B2's false target via B1 false.
+        //
+        m_comp->fgReplacePred(origB2FalseEdge, m_b1);
+        m_comp->fgRemoveRefPred(origB2TrueEdge);
+        FlowEdge* const newB1FalseEdge = origB2FalseEdge;
+        m_b1->SetFalseEdge(newB1FalseEdge);
+
+        // Fix B1 false edge likelihood and min/max weights.
         //
-        // Replace pred 'm_b2' for 'm_b2->bbFalseTarget' with 'm_b1'
-        // Remove  pred 'm_b2' for 'm_b2->bbTrueTarget'
-        m_comp->fgReplacePred(m_b2->GetFalseTarget(), m_b2, m_b1);
-        m_comp->fgRemoveRefPred(m_b2->GetTrueTarget(), m_b2);
-        m_b1->SetFalseTarget(m_b2->GetFalseTarget());
+        newB1FalseEdge->setLikelihood(1.0 - newB1TrueLikelihood);
+        weight_t const newB1FalseWeight = m_b1->bbWeight * (1.0 - newB1TrueLikelihood);
+        newB1FalseEdge->setEdgeWeights(newB1FalseWeight, newB1FalseWeight, m_b1->GetTrueTarget());
     }
 
     // Get rid of the second block
@@ -1361,7 +1379,7 @@ void OptBoolsDsc::optOptimizeBoolsUpdateTrees()
 //  Notes:
 //      m_b1, m_b2 and m_b3 of OptBoolsDsc are set on entry.
 //
-//      if B1.bbTarget == b3, it transforms
+//      if B1->TargetIs(b3), it transforms
 //          B1 : brtrue(t1, B3)
 //          B2 : ret(t2)
 //          B3 : ret(0)
diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp
index 263de164c345..ebbd125c1c3c 100644
--- a/src/coreclr/jit/optimizer.cpp
+++ b/src/coreclr/jit/optimizer.cpp
@@ -21,13 +21,7 @@ void Compiler::optInit()
 {
     fgHasLoops = false;
 
-    optLoopsRequirePreHeaders = false;
-    optNumNaturalLoopsFound   = 0;
-
-#ifdef DEBUG
-    loopAlignCandidates = 0;
-    loopsAligned        = 0;
-#endif
+    optLoopsCanonical = false;
 
     /* Keep track of the number of calls and indirect calls made by this method */
     optCallCount         = 0;
@@ -40,16 +34,20 @@ void Compiler::optInit()
     optCSECandidateCount = 0;
     optCSEattempt        = 0;
     optCSEheuristic      = nullptr;
+    optCSEunmarks        = 0;
 }
 
-DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
+DataFlow::DataFlow(Compiler* pCompiler)
+    : m_pCompiler(pCompiler)
 {
 }
 
 //------------------------------------------------------------------------
 // optSetBlockWeights: adjust block weights, as follows:
-// 1. A block that is not reachable from the entry block is marked "run rarely".
-// 2. If we're not using profile weights, then any block with a non-zero weight
+// 1. Lexical block ranges where the bottom reaches the top are scaled as a loop.
+//    This is a more general definition of "loop" than natural loops.
+// 2. A block that is not reachable from the entry block is marked "run rarely".
+// 3. If we're not using profile weights, then any block with a non-zero weight
 //    that doesn't dominate all the return blocks has its weight dropped in half
 //    (but only if the first block *does* dominate all the returns).
 //
@@ -62,13 +60,29 @@ DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
 PhaseStatus Compiler::optSetBlockWeights()
 {
     noway_assert(opts.OptimizationEnabled());
-    assert(m_domTree != nullptr);
-    assert(fgReturnBlocksComputed);
+
+    assert(m_dfsTree != nullptr);
+    if (m_domTree == nullptr)
+    {
+        m_domTree = FlowGraphDominatorTree::Build(m_dfsTree);
+    }
+    if (m_reachabilitySets == nullptr)
+    {
+        m_reachabilitySets = BlockReachabilitySets::Build(m_dfsTree);
+    }
+
+    if (m_dfsTree->HasCycle())
+    {
+        optMarkLoopHeads();
+        optFindAndScaleGeneralLoopBlocks();
+    }
 
     bool       madeChanges                = false;
     bool       firstBBDominatesAllReturns = true;
     const bool usingProfileWeights        = fgIsUsingProfileWeights();
 
+    fgComputeReturnBlocks();
+
     // TODO-Quirk: Previously, this code ran on a dominator tree based only on
     // regular flow. This meant that all handlers were not considered to be
     // dominated by fgFirstBB. When those handlers could reach a return
@@ -233,6 +247,13 @@ void Compiler::optScaleLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
             continue;
         }
 
+        // Don't change the block weight if it's unreachable.
+        if (!m_reachabilitySets->GetDfsTree()->Contains(curBlk))
+        {
+            reportBlockWeight(curBlk, "; unchanged: unreachable");
+            continue;
+        }
+
         // For curBlk to be part of a loop that starts at begBlk, curBlk must be reachable from begBlk and
         // (since this is a loop) begBlk must likewise be reachable from curBlk.
 
@@ -483,14 +504,12 @@ bool Compiler::optExtractInitTestIncr(
         if (initStmt->GetRootNode()->OperIs(GT_JTRUE))
         {
             bool doGetPrev = true;
-#ifdef DEBUG
             if (opts.optRepeat)
             {
                 // Previous optimization passes may have inserted compiler-generated
                 // statements other than duplicated loop conditions.
                 doGetPrev = (initStmt->GetPrevStmt() != nullptr);
             }
-#endif // DEBUG
             if (doGetPrev)
             {
                 initStmt = initStmt->GetPrevStmt();
@@ -553,170 +572,178 @@ void Compiler::optCheckPreds()
 #endif // DEBUG
 
 //------------------------------------------------------------------------
-// optRedirectBlock: Replace the branch successors of a block based on a block map.
+// optSetMappedBlockTargets: Initialize the branch successors of a block based on a block map.
 //
-// Updates the successors of `blk`: if `blk2` is a branch successor of `blk`, and there is a mapping
-// for `blk2->blk3` in `redirectMap`, change `blk` so that `blk3` is this branch successor.
+// Updates the successors of `newBlk`, a copy of `blk`:
+// If `blk2` is a branch successor of `blk`, and there is a mapping
+// for `blk2->blk3` in `redirectMap`, make `blk3` a successor of `newBlk`.
+// Else, make `blk2` a successor of `newBlk`.
 //
 // Arguments:
-//     blk          - block to redirect
-//     redirectMap  - block->block map specifying how the `blk` target will be redirected.
-//     predOption   - specifies how to update the pred lists
+//     blk          - the original block, which doesn't need redirecting
+//     newBlk       - copy of blk, with uninitialized successors
+//     redirectMap  - block->block map specifying how to redirect the target of `blk`.
 //
 // Notes:
-//     Pred lists for successors of `blk` may be changed, depending on `predOption`.
+//     Initially, `newBlk` should not have any successors set.
+//     Upon returning, `newBlk` should have all of its successors initialized.
+//     `blk` must have its successors set upon entry; these won't be changed.
 //
-void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, RedirectBlockOption predOption)
+void Compiler::optSetMappedBlockTargets(BasicBlock* blk, BasicBlock* newBlk, BlockToBlockMap* redirectMap)
 {
-    const bool updatePreds = (predOption == RedirectBlockOption::UpdatePredLists);
-    const bool addPreds    = (predOption == RedirectBlockOption::AddToPredLists);
+    // Caller should not have initialized newBlk's target yet
+    assert(newBlk->KindIs(BBJ_ALWAYS));
+    assert(!newBlk->HasInitializedTarget());
 
-    BasicBlock* newJumpDest = nullptr;
+    BasicBlock* newTarget;
 
+    // Initialize the successors of "newBlk".
+    // For each successor, use "blockMap" to determine if the successor needs to be redirected.
     switch (blk->GetKind())
     {
-        case BBJ_THROW:
-        case BBJ_RETURN:
-        case BBJ_EHFILTERRET:
-        case BBJ_EHFAULTRET:
-        case BBJ_EHCATCHRET:
-            // These have no jump destination to update.
-            break;
-
-        case BBJ_CALLFINALLY:
-            if (addPreds && blk->bbFallsThrough())
-            {
-                fgAddRefPred(blk->Next(), blk);
-            }
-
-            FALLTHROUGH;
         case BBJ_ALWAYS:
-        case BBJ_LEAVE:
+        case BBJ_CALLFINALLY:
         case BBJ_CALLFINALLYRET:
-            // All of these have a single jump destination to update.
-            if (redirectMap->Lookup(blk->GetTarget(), &newJumpDest))
+        case BBJ_LEAVE:
+        {
+            FlowEdge* newEdge;
+
+            // Determine if newBlk should be redirected to a different target from blk's target
+            if (redirectMap->Lookup(blk->GetTarget(), &newTarget))
             {
-                if (updatePreds)
-                {
-                    fgRemoveRefPred(blk->GetTarget(), blk);
-                }
-                blk->SetTarget(newJumpDest);
-                if (updatePreds || addPreds)
-                {
-                    fgAddRefPred(newJumpDest, blk);
-                }
+                // newBlk needs to be redirected to a new target
+                newEdge = fgAddRefPred(newTarget, newBlk);
             }
-            else if (addPreds)
+            else
             {
-                fgAddRefPred(blk->GetTarget(), blk);
+                // newBlk uses the same target as blk
+                newEdge = fgAddRefPred(blk->GetTarget(), newBlk);
             }
+
+            newBlk->SetKindAndTargetEdge(blk->GetKind(), newEdge);
             break;
+        }
 
         case BBJ_COND:
-            // Update jump taken when condition is true
-            if (redirectMap->Lookup(blk->GetTrueTarget(), &newJumpDest))
+        {
+            BasicBlock* trueTarget;
+            BasicBlock* falseTarget;
+
+            // Determine if newBLk should be redirected to a different true target from blk's true target
+            if (redirectMap->Lookup(blk->GetTrueTarget(), &newTarget))
             {
-                if (updatePreds)
-                {
-                    fgRemoveRefPred(blk->GetTrueTarget(), blk);
-                }
-                blk->SetTrueTarget(newJumpDest);
-                if (updatePreds || addPreds)
-                {
-                    fgAddRefPred(newJumpDest, blk);
-                }
+                // newBlk needs to be redirected to a new true target
+                trueTarget = newTarget;
             }
-            else if (addPreds)
+            else
             {
-                fgAddRefPred(blk->GetTrueTarget(), blk);
+                // newBlk uses the same true target as blk
+                trueTarget = blk->GetTrueTarget();
             }
 
-            // Update jump taken when condition is false
-            if (redirectMap->Lookup(blk->GetFalseTarget(), &newJumpDest))
+            // Do the same lookup for the false target
+            if (redirectMap->Lookup(blk->GetFalseTarget(), &newTarget))
             {
-                if (updatePreds)
-                {
-                    fgRemoveRefPred(blk->GetFalseTarget(), blk);
-                }
-                blk->SetFalseTarget(newJumpDest);
-                if (updatePreds || addPreds)
-                {
-                    fgAddRefPred(newJumpDest, blk);
-                }
+                falseTarget = newTarget;
             }
-            else if (addPreds)
+            else
             {
-                fgAddRefPred(blk->GetFalseTarget(), blk);
+                falseTarget = blk->GetFalseTarget();
             }
+
+            FlowEdge* const oldTrueEdge  = blk->GetTrueEdge();
+            FlowEdge* const trueEdge     = fgAddRefPred(trueTarget, newBlk, oldTrueEdge);
+            FlowEdge* const oldFalseEdge = blk->GetFalseEdge();
+            FlowEdge* const falseEdge    = fgAddRefPred(falseTarget, newBlk, oldFalseEdge);
+            newBlk->SetCond(trueEdge, falseEdge);
             break;
+        }
 
         case BBJ_EHFINALLYRET:
         {
-            BBehfDesc*  ehfDesc = blk->GetEhfTargets();
-            BasicBlock* newSucc = nullptr;
-            for (unsigned i = 0; i < ehfDesc->bbeCount; i++)
+            BBehfDesc* currEhfDesc = blk->GetEhfTargets();
+            BBehfDesc* newEhfDesc  = new (this, CMK_BasicBlock) BBehfDesc;
+            newEhfDesc->bbeCount   = currEhfDesc->bbeCount;
+            newEhfDesc->bbeSuccs   = new (this, CMK_FlowEdge) FlowEdge*[newEhfDesc->bbeCount];
+
+            for (unsigned i = 0; i < newEhfDesc->bbeCount; i++)
             {
-                BasicBlock* const succ = ehfDesc->bbeSuccs[i];
-                if (redirectMap->Lookup(succ, &newSucc))
+                FlowEdge* const   inspiringEdge = currEhfDesc->bbeSuccs[i];
+                BasicBlock* const ehfTarget     = inspiringEdge->getDestinationBlock();
+                FlowEdge*         newEdge;
+
+                // Determine if newBlk should target ehfTarget, or be redirected
+                if (redirectMap->Lookup(ehfTarget, &newTarget))
                 {
-                    if (updatePreds)
-                    {
-                        fgRemoveRefPred(succ, blk);
-                    }
-                    if (updatePreds || addPreds)
-                    {
-                        fgAddRefPred(newSucc, blk);
-                    }
-                    ehfDesc->bbeSuccs[i] = newSucc;
+                    newEdge = fgAddRefPred(newTarget, newBlk, inspiringEdge);
                 }
-                else if (addPreds)
+                else
                 {
-                    fgAddRefPred(succ, blk);
+                    newEdge = fgAddRefPred(ehfTarget, newBlk, inspiringEdge);
                 }
+
+                newEhfDesc->bbeSuccs[i] = newEdge;
             }
+
+            newBlk->SetEhf(newEhfDesc);
+            break;
         }
-        break;
 
         case BBJ_SWITCH:
         {
-            bool redirected = false;
-            for (unsigned i = 0; i < blk->GetSwitchTargets()->bbsCount; i++)
+            BBswtDesc* currSwtDesc = blk->GetSwitchTargets();
+            BBswtDesc* newSwtDesc  = new (this, CMK_BasicBlock) BBswtDesc(currSwtDesc);
+            newSwtDesc->bbsDstTab  = new (this, CMK_FlowEdge) FlowEdge*[newSwtDesc->bbsCount];
+
+            for (unsigned i = 0; i < newSwtDesc->bbsCount; i++)
             {
-                BasicBlock* const switchDest = blk->GetSwitchTargets()->bbsDstTab[i];
-                if (redirectMap->Lookup(switchDest, &newJumpDest))
+                FlowEdge* const   inspiringEdge = currSwtDesc->bbsDstTab[i];
+                BasicBlock* const switchTarget  = inspiringEdge->getDestinationBlock();
+                FlowEdge*         newEdge;
+
+                // Determine if newBlk should target switchTarget, or be redirected
+                if (redirectMap->Lookup(switchTarget, &newTarget))
                 {
-                    if (updatePreds)
-                    {
-                        fgRemoveRefPred(switchDest, blk);
-                    }
-                    if (updatePreds || addPreds)
-                    {
-                        fgAddRefPred(newJumpDest, blk);
-                    }
-                    blk->GetSwitchTargets()->bbsDstTab[i] = newJumpDest;
-                    redirected                            = true;
+                    newEdge = fgAddRefPred(newTarget, newBlk);
                 }
-                else if (addPreds)
+                else
                 {
-                    fgAddRefPred(switchDest, blk);
+                    newEdge = fgAddRefPred(switchTarget, newBlk);
                 }
-            }
-            // If any redirections happened, invalidate the switch table map for the switch.
-            if (redirected)
-            {
-                // Don't create a new map just to try to remove an entry.
-                BlockToSwitchDescMap* switchMap = GetSwitchDescMap(/* createIfNull */ false);
-                if (switchMap != nullptr)
+
+                // Transfer likelihood... instead of doing this gradually
+                // for dup'd edges, we set it once, when we add the last dup.
+                //
+                if (newEdge->getDupCount() == inspiringEdge->getDupCount())
                 {
-                    switchMap->Remove(blk);
+                    newEdge->setLikelihood(inspiringEdge->getLikelihood());
                 }
+
+                newSwtDesc->bbsDstTab[i] = newEdge;
             }
+
+            newBlk->SetSwitch(newSwtDesc);
+            break;
+        }
+
+        case BBJ_EHCATCHRET:
+        case BBJ_EHFILTERRET:
+        {
+            // newBlk's jump target should not need to be redirected
+            assert(!redirectMap->Lookup(blk->GetTarget(), &newTarget));
+            FlowEdge* newEdge = fgAddRefPred(newBlk->GetTarget(), newBlk);
+            newBlk->SetKindAndTargetEdge(blk->GetKind(), newEdge);
+            break;
         }
-        break;
 
         default:
-            unreached();
+            // blk doesn't have a jump destination
+            assert(blk->NumSucc() == 0);
+            newBlk->SetKindAndTargetEdge(blk->GetKind());
+            break;
     }
+
+    assert(newBlk->KindIs(blk->GetKind()));
 }
 
 //-----------------------------------------------------------------------------
@@ -863,7 +890,7 @@ bool Compiler::optComputeLoopRep(int        constInit,
 
     switch (iterOperType)
     {
-// For small types, the iteration operator will narrow these values if big
+        // For small types, the iteration operator will narrow these values if big
 
 #define INIT_ITER_BY_TYPE(type)                                                                                        \
     constInitX = (type)constInit;                                                                                      \
@@ -882,7 +909,7 @@ bool Compiler::optComputeLoopRep(int        constInit,
             INIT_ITER_BY_TYPE(unsigned short);
             break;
 
-        // For the big types, 32 bit arithmetic is performed
+            // For the big types, 32 bit arithmetic is performed
 
         case TYP_INT:
             if (unsTest)
@@ -1306,6 +1333,8 @@ PhaseStatus Compiler::optUnrollLoops()
     {
         assert(anyIRchange);
 
+        Metrics.LoopsUnrolled += unrollCount;
+
 #ifdef DEBUG
         if (verbose)
         {
@@ -1319,6 +1348,13 @@ PhaseStatus Compiler::optUnrollLoops()
         fgDfsBlocksAndRemove();
         m_loops = FlowGraphNaturalLoops::Find(m_dfsTree);
 
+        if (optCanonicalizeLoops())
+        {
+            fgInvalidateDfsTree();
+            m_dfsTree = fgComputeDfs();
+            m_loops   = FlowGraphNaturalLoops::Find(m_dfsTree);
+        }
+
         fgRenumberBlocks();
 
         DBEXEC(verbose, fgDispBasicBlocks());
@@ -1657,7 +1693,6 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR)
 
     // The old loop body is unreachable now, but we will remove those
     // blocks after we finish unrolling.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (verbose)
@@ -1708,12 +1743,10 @@ void Compiler::optRedirectPrevUnrollIteration(FlowGraphNaturalLoop* loop, BasicB
             testCopyStmt->SetRootNode(sideEffList);
         }
 
-        fgRemoveRefPred(prevTestBlock->GetTrueTarget(), prevTestBlock);
-        fgRemoveRefPred(prevTestBlock->GetFalseTarget(), prevTestBlock);
-
         // Redirect exit edge from previous iteration to new entry.
-        prevTestBlock->SetKindAndTarget(BBJ_ALWAYS, target);
-        fgAddRefPred(target, prevTestBlock);
+        fgRedirectTrueEdge(prevTestBlock, target);
+        fgRemoveRefPred(prevTestBlock->GetFalseEdge());
+        prevTestBlock->SetKindAndTargetEdge(BBJ_ALWAYS, prevTestBlock->GetTrueEdge());
 
         JITDUMP("Redirecting previously created exiting " FMT_BB " -> " FMT_BB "\n", prevTestBlock->bbNum,
                 target->bbNum);
@@ -1762,7 +1795,9 @@ void Compiler::optReplaceScalarUsesWithConst(BasicBlock* block, unsigned lclNum,
         bool MadeChanges = false;
 
         ReplaceVisitor(Compiler* comp, unsigned lclNum, ssize_t cnsVal)
-            : GenTreeVisitor(comp), m_lclNum(lclNum), m_cnsVal(cnsVal)
+            : GenTreeVisitor(comp)
+            , m_lclNum(lclNum)
+            , m_cnsVal(cnsVal)
         {
         }
 
@@ -1808,7 +1843,8 @@ Compiler::OptInvertCountTreeInfoType Compiler::optInvertCountTreeInfo(GenTree* t
 
         Compiler::OptInvertCountTreeInfoType Result = {};
 
-        CountTreeInfoVisitor(Compiler* comp) : GenTreeVisitor(comp)
+        CountTreeInfoVisitor(Compiler* comp)
+            : GenTreeVisitor(comp)
         {
         }
 
@@ -1853,32 +1889,36 @@ Compiler::OptInvertCountTreeInfoType Compiler::optInvertCountTreeInfo(GenTree* t
 //
 //   Specifically, we're looking for the following case:
 //
+//  block:
 //          ...
 //          jmp test                // `block` argument
-//   loop:
+//    top:
 //          ...
 //          ...
 //   test:
 //          ..stmts..
 //          cond
-//          jtrue loop
+//          jtrue top
 //
 //   If we find this, and the condition is simple enough, we change
 //   the loop to the following:
 //
+//  block:
 //          ...
+//          jmp bNewCond
+//  bNewCond:
 //          ..stmts..               // duplicated cond block statements
 //          cond                    // duplicated cond
-//          jfalse done
+//          jfalse join
 //          // else fall-through
-//   loop:
+//    top:
 //          ...
 //          ...
 //   test:
 //          ..stmts..
 //          cond
-//          jtrue loop
-//   done:
+//          jtrue top
+//   join:
 //
 //  Makes no changes if the flow pattern match fails.
 //
@@ -1922,7 +1962,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
         return false;
     }
 
-    // Since bTest is a BBJ_COND it will have a bbFalseTarget
+    // Since bTest is a BBJ_COND it will have a false target
     //
     BasicBlock* const bJoin = bTest->GetFalseTarget();
     noway_assert(bJoin != nullptr);
@@ -1944,7 +1984,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
     }
 
     // It has to be a forward jump. Defer this check until after all the cheap checks
-    // are done, since it iterates forward in the block list looking for bbTarget.
+    // are done, since it iterates forward in the block list looking for block's target.
     //  TODO-CQ: Check if we can also optimize the backwards jump as well.
     //
     if (!fgIsForwardBranch(block, block->GetTarget()))
@@ -2134,10 +2174,8 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
     bool foundCondTree = false;
 
     // Create a new block after `block` to put the copied condition code.
-    BasicBlock* bNewCond = fgNewBBafter(BBJ_COND, block, /*extendRegion*/ true, bJoin);
-    block->SetKindAndTarget(BBJ_ALWAYS, bNewCond);
-    block->SetFlags(BBF_NONE_QUIRK);
-    assert(block->JumpsToNext());
+    //
+    BasicBlock* const bNewCond = fgNewBBafter(BBJ_COND, block, /*extendRegion*/ true);
 
     // Clone each statement in bTest and append to bNewCond.
     for (Statement* const stmt : bTest->Statements())
@@ -2196,12 +2234,26 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
 
     // Update pred info
     //
-    bNewCond->SetFalseTarget(bTop);
-    fgAddRefPred(bJoin, bNewCond);
-    fgAddRefPred(bTop, bNewCond);
+    // For now we set the likelihood of the newCond branch to match
+    // the likelihood of the test branch (though swapped, since we're
+    // currently reversing the condition). This may or may not match
+    // the block weight adjustments we're making. All this becomes
+    // easier to reconcile once we rely on edge likelihoods more and
+    // have synthesis running (so block weights ==> frequencies).
+    //
+    // Until then we won't worry that edges and blocks are potentially
+    // out of sync.
+    //
+    FlowEdge* const testTopEdge     = bTest->GetTrueEdge();
+    FlowEdge* const testJoinEdge    = bTest->GetFalseEdge();
+    FlowEdge* const newCondJoinEdge = fgAddRefPred(bJoin, bNewCond, testJoinEdge);
+    FlowEdge* const newCondTopEdge  = fgAddRefPred(bTop, bNewCond, testTopEdge);
 
-    fgAddRefPred(bNewCond, block);
-    fgRemoveRefPred(bTest, block);
+    bNewCond->SetTrueEdge(newCondJoinEdge);
+    bNewCond->SetFalseEdge(newCondTopEdge);
+
+    fgRedirectTargetEdge(block, bNewCond);
+    assert(block->JumpsToNext());
 
     // Move all predecessor edges that look like loop entry edges to point to the new cloned condition
     // block, not the existing condition block. The idea is that if we only move `block` to point to
@@ -2212,12 +2264,9 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
     // is maintained no matter which condition block we point to, but we'll lose optimization potential
     // (and create spaghetti code) if we get it wrong.
     //
-    BlockToBlockMap blockMap(getAllocator(CMK_LoopOpt));
-    bool            blockMapInitialized = false;
-
     unsigned const loopFirstNum  = bTop->bbNum;
     unsigned const loopBottomNum = bTest->bbNum;
-    for (BasicBlock* const predBlock : bTest->PredBlocks())
+    for (BasicBlock* const predBlock : bTest->PredBlocksEditing())
     {
         unsigned const bNum = predBlock->bbNum;
         if ((loopFirstNum <= bNum) && (bNum <= loopBottomNum))
@@ -2226,16 +2275,30 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
             continue;
         }
 
-        if (!blockMapInitialized)
-        {
-            blockMapInitialized = true;
-            blockMap.Set(bTest, bNewCond);
-        }
-
         // Redirect the predecessor to the new block.
         JITDUMP("Redirecting non-loop " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum,
                 bTest->bbNum, predBlock->bbNum, bNewCond->bbNum);
-        optRedirectBlock(predBlock, &blockMap, RedirectBlockOption::UpdatePredLists);
+
+        switch (predBlock->GetKind())
+        {
+            case BBJ_ALWAYS:
+            case BBJ_CALLFINALLY:
+            case BBJ_CALLFINALLYRET:
+            case BBJ_COND:
+            case BBJ_SWITCH:
+            case BBJ_EHFINALLYRET:
+                fgReplaceJumpTarget(predBlock, bTest, bNewCond);
+                break;
+
+            case BBJ_EHCATCHRET:
+            case BBJ_EHFILTERRET:
+                // These block types should not need redirecting
+                break;
+
+            default:
+                assert(!"Unexpected bbKind for predecessor block");
+                break;
+        }
     }
 
     // If we have profile data for all blocks and we know that we are cloning the
@@ -2268,8 +2331,8 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
         weight_t const testToNextWeight  = weightTop * testToNextLikelihood;
         weight_t const testToAfterWeight = weightTop * testToAfterLikelihood;
 
-        FlowEdge* const edgeTestToNext  = fgGetPredForBlock(bTop, bTest);
-        FlowEdge* const edgeTestToAfter = fgGetPredForBlock(bTest->GetFalseTarget(), bTest);
+        FlowEdge* const edgeTestToNext  = bTest->GetTrueEdge();
+        FlowEdge* const edgeTestToAfter = bTest->GetFalseEdge();
 
         JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (iterate loop)\n", bTest->bbNum, bTop->bbNum,
                 testToNextWeight);
@@ -2289,8 +2352,8 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
         weight_t const blockToNextWeight  = weightBlock * blockToNextLikelihood;
         weight_t const blockToAfterWeight = weightBlock * blockToAfterLikelihood;
 
-        FlowEdge* const edgeBlockToNext  = fgGetPredForBlock(bNewCond->GetFalseTarget(), bNewCond);
-        FlowEdge* const edgeBlockToAfter = fgGetPredForBlock(bNewCond->GetTrueTarget(), bNewCond);
+        FlowEdge* const edgeBlockToNext  = bNewCond->GetFalseEdge();
+        FlowEdge* const edgeBlockToAfter = bNewCond->GetTrueEdge();
 
         JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (enter loop)\n", bNewCond->bbNum,
                 bNewCond->GetFalseTarget()->bbNum, blockToNextWeight);
@@ -2301,18 +2364,21 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
         edgeBlockToAfter->setEdgeWeights(blockToAfterWeight, blockToAfterWeight, bNewCond->GetTrueTarget());
 
 #ifdef DEBUG
-        // If we're checkig profile data, see if profile for the two target blocks is consistent.
+        // If we're checking profile data, see if profile for the two target blocks is consistent.
         //
         if ((activePhaseChecks & PhaseChecks::CHECK_PROFILE) == PhaseChecks::CHECK_PROFILE)
         {
-            const ProfileChecks checks        = (ProfileChecks)JitConfig.JitProfileChecks();
-            const bool          nextProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetFalseTarget(), checks);
-            const bool          jumpProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetTrueTarget(), checks);
-
-            if (hasFlag(checks, ProfileChecks::RAISE_ASSERT))
+            if (JitConfig.JitProfileChecks() > 0)
             {
-                assert(nextProfileOk);
-                assert(jumpProfileOk);
+                const ProfileChecks checks        = (ProfileChecks)JitConfig.JitProfileChecks();
+                const bool          nextProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetFalseTarget(), checks);
+                const bool          jumpProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetTrueTarget(), checks);
+
+                if (hasFlag(checks, ProfileChecks::RAISE_ASSERT))
+                {
+                    assert(nextProfileOk);
+                    assert(jumpProfileOk);
+                }
             }
         }
 #endif // DEBUG
@@ -2443,6 +2509,40 @@ PhaseStatus Compiler::optOptimizeLayout()
     return PhaseStatus::MODIFIED_EVERYTHING;
 }
 
+//-----------------------------------------------------------------------------
+// optOptimizePostLayout: Optimize flow after block layout is finalized
+//
+// Returns:
+//   suitable phase status
+//
+PhaseStatus Compiler::optOptimizePostLayout()
+{
+    assert(opts.OptimizationEnabled());
+    PhaseStatus status = PhaseStatus::MODIFIED_NOTHING;
+
+    for (BasicBlock* const block : Blocks())
+    {
+        // Reverse conditions to enable fallthrough flow into BBJ_COND's false target
+        if (block->KindIs(BBJ_COND) && block->CanRemoveJumpToTarget(block->GetTrueTarget(), this))
+        {
+            GenTree* const test = block->lastNode();
+            assert(test->OperIsConditionalJump());
+            GenTree* const cond = gtReverseCond(test);
+            assert(cond == test); // Ensure `gtReverseCond` did not create a new node
+
+            FlowEdge* const oldTrueEdge  = block->GetTrueEdge();
+            FlowEdge* const oldFalseEdge = block->GetFalseEdge();
+            block->SetTrueEdge(oldFalseEdge);
+            block->SetFalseEdge(oldTrueEdge);
+
+            assert(block->CanRemoveJumpToTarget(block->GetFalseTarget(), this));
+            status = PhaseStatus::MODIFIED_EVERYTHING;
+        }
+    }
+
+    return status;
+}
+
 //------------------------------------------------------------------------
 // optMarkLoopHeads: Mark all potential loop heads as BBF_LOOP_HEAD. A potential loop head is a block
 // targeted by a lexical back edge, where the source of the back edge is reachable from the block.
@@ -2460,13 +2560,13 @@ void Compiler::optMarkLoopHeads()
     {
         printf("*************** In optMarkLoopHeads()\n");
     }
-
-    assert(m_reachabilitySets != nullptr);
     fgDebugCheckBBNumIncreasing();
 
     int loopHeadsMarked = 0;
 #endif
 
+    assert((m_dfsTree != nullptr) && (m_reachabilitySets != nullptr));
+
     bool hasLoops = false;
 
     for (BasicBlock* const block : Blocks())
@@ -2542,15 +2642,7 @@ void Compiler::optFindAndScaleGeneralLoopBlocks()
     // This code depends on block number ordering.
     INDEBUG(fgDebugCheckBBNumIncreasing());
 
-    assert(m_dfsTree != nullptr);
-    if (m_reachabilitySets == nullptr)
-    {
-        m_reachabilitySets = BlockReachabilitySets::Build(m_dfsTree);
-    }
-    if (m_domTree == nullptr)
-    {
-        m_domTree = FlowGraphDominatorTree::Build(m_dfsTree);
-    }
+    assert((m_dfsTree != nullptr) && (m_domTree != nullptr) && (m_reachabilitySets != nullptr));
 
     unsigned generalLoopCount = 0;
 
@@ -2626,7 +2718,7 @@ void Compiler::optFindAndScaleGeneralLoopBlocks()
 }
 
 //-----------------------------------------------------------------------------
-// optFindLoops: find loops in the function.
+// optFindLoopsPhase: find loops in the function.
 //
 // The JIT recognizes two types of loops in a function: natural loops and "general" (or "unnatural") loops.
 // Natural loops are those which get added to Compiler::m_loops. Most downstream optimizations require
@@ -2647,17 +2739,12 @@ PhaseStatus Compiler::optFindLoopsPhase()
     }
 #endif
 
-    optMarkLoopHeads();
+    fgRenumberBlocks();
 
     assert(m_dfsTree != nullptr);
     optFindLoops();
 
-    if (fgHasLoops)
-    {
-        optFindAndScaleGeneralLoopBlocks();
-    }
-
-    optNumNaturalLoopsFound = (unsigned)m_loops->NumLoops();
+    Metrics.LoopsFoundDuringOpts = (int)m_loops->NumLoops();
 
     return PhaseStatus::MODIFIED_EVERYTHING;
 }
@@ -2680,8 +2767,8 @@ void Compiler::optFindLoops()
 
     fgRenumberBlocks();
 
-    // Starting now, we require all loops to have pre-headers.
-    optLoopsRequirePreHeaders = true;
+    // Starting now we require all loops to be in canonical form.
+    optLoopsCanonical = true;
 
     // Leave a bread crumb for future phases like loop alignment about whether
     // looking for loops makes sense. We generally do not expect phases to
@@ -2709,11 +2796,28 @@ void Compiler::optFindLoops()
 bool Compiler::optCanonicalizeLoops()
 {
     bool changed = false;
+
     for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder())
     {
         changed |= optCreatePreheader(loop);
     }
 
+    // At this point we've created preheaders. That means we are working with
+    // stale loop and DFS data. However, we can do exit canonicalization even
+    // on the stale data; this relies on the fact that exiting blocks do not
+    // change as a result of creating preheaders. On the other hand the exit
+    // blocks themselves may have changed (previously it may have been another
+    // loop's header, now it might be its preheader instead). Exit
+    // canonicalization stil works even with this.
+    //
+    // The exit canonicalization needs to be done in post order (inner -> outer
+    // loops) so that inner exits that also exit outer loops have proper exit
+    // blocks created for each loop.
+    for (FlowGraphNaturalLoop* loop : m_loops->InPostOrder())
+    {
+        changed |= optCanonicalizeExits(loop);
+    }
+
     return changed;
 }
 
@@ -2820,8 +2924,40 @@ BasicBlock* Compiler::optFindLoopCompactionInsertionPoint(FlowGraphNaturalLoop*
     // out of the loop, and if possible find a spot that won't break up fall-through.
     BasicBlock* bottom         = loop->GetLexicallyBottomMostBlock();
     BasicBlock* insertionPoint = bottom;
-    while (insertionPoint->bbFallsThrough() && !insertionPoint->IsLast())
+    while (!insertionPoint->IsLast())
     {
+        switch (insertionPoint->GetKind())
+        {
+            case BBJ_ALWAYS:
+                if (!insertionPoint->JumpsToNext())
+                {
+                    // Found a branch that isn't to the next block, so we won't split up any fall-through.
+                    return insertionPoint;
+                }
+                break;
+
+            case BBJ_COND:
+                if (!insertionPoint->FalseTargetIs(insertionPoint->Next()))
+                {
+                    // Found a conditional branch that doesn't have a false branch to the next block,
+                    // so we won't split up any fall-through.
+                    return insertionPoint;
+                }
+                break;
+
+            case BBJ_CALLFINALLY:
+                if (!insertionPoint->isBBCallFinallyPair())
+                {
+                    // Found a retless BBJ_CALLFINALLY block, so we won't split up any fall-through.
+                    return insertionPoint;
+                }
+                break;
+
+            default:
+                // No fall-through to split up.
+                return insertionPoint;
+        }
+
         // Keep looking for a better insertion point if we can.
         BasicBlock* newInsertionPoint = optTryAdvanceLoopCompactionInsertionPoint(loop, insertionPoint, top, bottom);
         if (newInsertionPoint == nullptr)
@@ -2946,20 +3082,15 @@ bool Compiler::optCreatePreheader(FlowGraphNaturalLoop* loop)
         insertBefore = header;
     }
 
-    BasicBlock* preheader = fgNewBBbefore(BBJ_ALWAYS, insertBefore, false, header);
+    BasicBlock* preheader = fgNewBBbefore(BBJ_ALWAYS, insertBefore, false);
     preheader->SetFlags(BBF_INTERNAL);
-    fgSetEHRegionForNewPreheader(preheader);
-
-    if (preheader->NextIs(header))
-    {
-        preheader->SetFlags(BBF_NONE_QUIRK);
-    }
-
+    fgSetEHRegionForNewPreheaderOrExit(preheader);
     preheader->bbCodeOffs = insertBefore->bbCodeOffs;
 
     JITDUMP("Created new preheader " FMT_BB " for " FMT_LP "\n", preheader->bbNum, loop->GetIndex());
 
-    fgAddRefPred(header, preheader);
+    FlowEdge* const newEdge = fgAddRefPred(header, preheader);
+    preheader->SetTargetEdge(newEdge);
 
     for (FlowEdge* enterEdge : loop->EntryEdges())
     {
@@ -2970,137 +3101,278 @@ bool Compiler::optCreatePreheader(FlowGraphNaturalLoop* loop)
         fgReplaceJumpTarget(enterBlock, header, preheader);
     }
 
-    optSetPreheaderWeight(loop, preheader);
+    optSetWeightForPreheaderOrExit(loop, preheader);
 
     return true;
 }
 
 //-----------------------------------------------------------------------------
-// optSetPreheaderWeight: Set the weight of a newly created preheader, after it
-// has been added to the flowgraph.
+// optCanonicalizeExits: Canonicalize all regular exits of the loop so that
+// they have only loop predecessors.
 //
 // Parameters:
-//   loop      - The loop
-//   preheader - The new preheader block
+//   loop - The loop
+//
+// Returns:
+//   True if any flow graph modifications were made.
 //
-void Compiler::optSetPreheaderWeight(FlowGraphNaturalLoop* loop, BasicBlock* preheader)
+bool Compiler::optCanonicalizeExits(FlowGraphNaturalLoop* loop)
 {
-    if (loop->EntryEdges().size() == 0)
+    bool changed = false;
+
+    for (FlowEdge* edge : loop->ExitEdges())
+    {
+        // Find all blocks outside the loop from this exiting block. Those
+        // blocks are exits. Note that we may see preheaders created by
+        // previous canonicalization here, which are not part of the DFS tree
+        // or properly maintained in a parent loop. This also means the
+        // destination block of the exit edge may no longer be right, so we
+        // cannot use VisitRegularExitBlocks. The canonicalization here works
+        // despite this.
+        edge->getSourceBlock()->VisitRegularSuccs(this, [=, &changed](BasicBlock* succ) {
+            if (!loop->ContainsBlock(succ))
+            {
+                changed |= optCanonicalizeExit(loop, succ);
+            }
+
+            return BasicBlockVisit::Continue;
+        });
+    }
+
+    return changed;
+}
+
+//-----------------------------------------------------------------------------
+// optCanonicalizeExit: Canonicalize a single exit block to have only loop
+// predecessors.
+//
+// Parameters:
+//   loop - The loop
+//
+// Returns:
+//   True if any flow graph modifications were made.
+//
+bool Compiler::optCanonicalizeExit(FlowGraphNaturalLoop* loop, BasicBlock* exit)
+{
+    assert(!loop->ContainsBlock(exit));
+
+    if (bbIsHandlerBeg(exit))
     {
-        return;
+        return false;
     }
 
-    // The preheader is only considered to have profile weights when all the
-    // following conditions are true:
-    // - The loop header has a profile weight
-    // - All entering blocks have a profile weight
-    // - The successors of all entering blocks have a profile weight (so that
-    //   we can trust the computed taken/not taken ratio)
-    //
-    bool     hasProfWeight   = fgIsUsingProfileWeights();
-    weight_t preheaderWeight = BB_ZERO_WEIGHT;
+    bool allLoopPreds = true;
+    for (BasicBlock* pred : exit->PredBlocks())
+    {
+        if (!loop->ContainsBlock(pred))
+        {
+            allLoopPreds = false;
+            break;
+        }
+    }
 
-    for (FlowEdge* entryEdge : loop->EntryEdges())
+    if (allLoopPreds)
     {
-        BasicBlock* prevEntering = entryEdge->getSourceBlock();
+        // Already canonical
+        JITDUMP("All preds of exit " FMT_BB " of " FMT_LP " are already in the loop, no exit canonicalization needed\n",
+                exit->bbNum, loop->GetIndex());
+        return false;
+    }
 
-        hasProfWeight &= prevEntering->HasFlag(BBF_PROF_WEIGHT) != BBF_EMPTY;
+    BasicBlock* newExit;
 
-        if (!fgIsUsingProfileWeights() || !prevEntering->HasFlag(BBF_PROF_WEIGHT) ||
-            !loop->GetHeader()->HasFlag(BBF_PROF_WEIGHT) || prevEntering->KindIs(BBJ_ALWAYS))
+    JITDUMP("Canonicalize exit " FMT_BB " for " FMT_LP " to have only loop predecessors\n", exit->bbNum,
+            loop->GetIndex());
+
+    if (UsesCallFinallyThunks() && exit->KindIs(BBJ_CALLFINALLY))
+    {
+        // Branches to a BBJ_CALLFINALLY _must_ come from inside its associated
+        // try region, and when we have callfinally thunks the BBJ_CALLFINALLY
+        // is outside it. First try to see if the lexically bottom most block
+        // is part of the try; if so, inserting after that is a good choice.
+        BasicBlock* finallyBlock = exit->GetTarget();
+        assert(finallyBlock->hasHndIndex());
+        BasicBlock* bottom = loop->GetLexicallyBottomMostBlock();
+        if (bottom->hasTryIndex() && (bottom->getTryIndex() == finallyBlock->getHndIndex()) && !bottom->hasHndIndex())
         {
-            preheaderWeight += prevEntering->bbWeight / prevEntering->NumSucc(this);
-            continue;
+            newExit = fgNewBBafter(BBJ_ALWAYS, bottom, true);
         }
+        else
+        {
+            // Otherwise just do the heavy-handed thing and insert it anywhere in the right region.
+            newExit = fgNewBBinRegion(BBJ_ALWAYS, finallyBlock->bbHndIndex, 0, nullptr, /* putInFilter */ false,
+                                      /* runRarely */ false, /* insertAtEnd */ true);
+        }
+    }
+    else
+    {
+        newExit = fgNewBBbefore(BBJ_ALWAYS, exit, false);
+        fgSetEHRegionForNewPreheaderOrExit(newExit);
+    }
 
-        bool succsHaveProfileWeights = true;
-        bool useEdgeWeights          = fgHaveValidEdgeWeights;
+    newExit->SetFlags(BBF_INTERNAL);
 
-        weight_t loopEnterCount = 0;
-        weight_t loopSkipCount  = 0;
+    FlowEdge* const newEdge = fgAddRefPred(exit, newExit);
+    newExit->SetTargetEdge(newEdge);
 
-        if (useEdgeWeights)
+    newExit->bbCodeOffs = exit->bbCodeOffs;
+
+    for (BasicBlock* pred : exit->PredBlocksEditing())
+    {
+        if (loop->ContainsBlock(pred))
         {
-            prevEntering->VisitRegularSuccs(this, [&, preheader](BasicBlock* succ) {
-                FlowEdge* edge       = fgGetPredForBlock(succ, prevEntering);
-                weight_t  edgeWeight = (edge->edgeWeightMin() + edge->edgeWeightMax()) / 2.0;
+            fgReplaceJumpTarget(pred, exit, newExit);
+        }
+    }
 
-                if (succ == preheader)
-                {
-                    loopEnterCount += edgeWeight;
-                }
-                else
-                {
-                    succsHaveProfileWeights &= succ->hasProfileWeight();
-                    loopSkipCount += edgeWeight;
-                }
-                return BasicBlockVisit::Continue;
-            });
+    optSetWeightForPreheaderOrExit(loop, newExit);
 
-            // Watch out for cases where edge weights were not properly maintained
-            // so that it appears no profile flow enters the loop.
-            //
-            useEdgeWeights = !fgProfileWeightsConsistent(loopEnterCount, BB_ZERO_WEIGHT);
-        }
+    JITDUMP("Created new exit " FMT_BB " to replace " FMT_BB " exit for " FMT_LP "\n", newExit->bbNum, exit->bbNum,
+            loop->GetIndex());
+    return true;
+}
 
-        if (!useEdgeWeights)
-        {
-            loopEnterCount = 0;
-            loopSkipCount  = 0;
+//-----------------------------------------------------------------------------
+// optEstimateEdgeLikelihood: Given a block "from" that may transfer control to
+// "to", estimate the likelihood that this will happen taking profile into
+// account if available.
+//
+// Parameters:
+//   from        - From block
+//   to          - To block
+//   fromProfile - [out] Whether or not the estimate is based on profile data
+//
+// Returns:
+//   Estimated likelihood of the edge being taken.
+//
+weight_t Compiler::optEstimateEdgeLikelihood(BasicBlock* from, BasicBlock* to, bool* fromProfile)
+{
+    *fromProfile = (from->HasFlag(BBF_PROF_WEIGHT) != BBF_EMPTY) && (to->HasFlag(BBF_PROF_WEIGHT) != BBF_EMPTY);
+    if (!fgIsUsingProfileWeights() || !from->HasFlag(BBF_PROF_WEIGHT) || !to->HasFlag(BBF_PROF_WEIGHT) ||
+        from->KindIs(BBJ_ALWAYS))
+    {
+        return 1.0 / from->NumSucc(this);
+    }
 
-            prevEntering->VisitRegularSuccs(this, [&, preheader](BasicBlock* succ) {
-                if (succ == preheader)
-                {
-                    loopEnterCount += succ->bbWeight;
-                }
-                else
-                {
-                    succsHaveProfileWeights &= succ->hasProfileWeight();
-                    loopSkipCount += succ->bbWeight;
-                }
+    bool useEdgeWeights = fgHaveValidEdgeWeights;
 
-                return BasicBlockVisit::Continue;
-            });
-        }
+    weight_t takenCount    = 0;
+    weight_t notTakenCount = 0;
 
-        if (!succsHaveProfileWeights)
-        {
-            preheaderWeight += prevEntering->bbWeight / prevEntering->NumSucc(this);
-            hasProfWeight = false;
-            continue;
-        }
+    if (useEdgeWeights)
+    {
+        from->VisitRegularSuccs(this, [&, to](BasicBlock* succ) {
+            *fromProfile &= succ->hasProfileWeight();
+            FlowEdge* edge       = fgGetPredForBlock(succ, from);
+            weight_t  edgeWeight = (edge->edgeWeightMin() + edge->edgeWeightMax()) / 2.0;
 
-        weight_t loopTakenRatio = loopEnterCount / (loopEnterCount + loopSkipCount);
+            if (succ == to)
+            {
+                takenCount += edgeWeight;
+            }
+            else
+            {
+                notTakenCount += edgeWeight;
+            }
+            return BasicBlockVisit::Continue;
+        });
 
-        JITDUMP("%s edge weights; loopEnterCount " FMT_WT " loopSkipCount " FMT_WT " taken ratio " FMT_WT "\n",
-                fgHaveValidEdgeWeights ? (useEdgeWeights ? "valid" : "ignored") : "invalid", loopEnterCount,
-                loopSkipCount, loopTakenRatio);
+        // Watch out for cases where edge weights were not properly maintained
+        // so that it appears no profile flow goes to 'to'.
+        //
+        useEdgeWeights = !fgProfileWeightsConsistent(takenCount, BB_ZERO_WEIGHT);
+    }
 
-        weight_t enterContribution = prevEntering->bbWeight * loopTakenRatio;
-        preheaderWeight += enterContribution;
+    if (!useEdgeWeights)
+    {
+        takenCount    = 0;
+        notTakenCount = 0;
 
-        // Normalize prevEntering -> preheader edge
-        FlowEdge* const edgeToPreheader = fgGetPredForBlock(preheader, prevEntering);
-        assert(edgeToPreheader != nullptr);
-        edgeToPreheader->setEdgeWeights(enterContribution, enterContribution, preheader);
+        from->VisitRegularSuccs(this, [&, to](BasicBlock* succ) {
+            *fromProfile &= succ->hasProfileWeight();
+            if (succ == to)
+            {
+                takenCount += succ->bbWeight;
+            }
+            else
+            {
+                notTakenCount += succ->bbWeight;
+            }
+
+            return BasicBlockVisit::Continue;
+        });
+    }
+
+    if (!*fromProfile)
+    {
+        return 1.0 / from->NumSucc(this);
+    }
+
+    if (fgProfileWeightsConsistent(takenCount, BB_ZERO_WEIGHT))
+    {
+        return 0;
+    }
+
+    weight_t likelihood = takenCount / (takenCount + notTakenCount);
+    return likelihood;
+}
+
+//-----------------------------------------------------------------------------
+// optSetWeightForPreheaderOrExit: Set the weight of a newly created preheader
+// or exit, after it has been added to the flowgraph.
+//
+// Parameters:
+//   loop  - The loop
+//   block - The new preheader or exit block
+//
+void Compiler::optSetWeightForPreheaderOrExit(FlowGraphNaturalLoop* loop, BasicBlock* block)
+{
+    bool hasProfWeight = true;
+
+    assert(block->GetUniqueSucc() != nullptr);
+    // Inherit first estimate from the target target; optEstimateEdgeLikelihood
+    // may use it in its estimate if we do not have edge weights to estimate
+    // from (we also assume the edges into 'block' already inherited their edge
+    // weights from the previous edge).
+    block->inheritWeight(block->GetTarget());
+
+    weight_t newWeight = BB_ZERO_WEIGHT;
+    for (FlowEdge* edge : block->PredEdges())
+    {
+        BasicBlock* predBlock = edge->getSourceBlock();
+
+        bool     fromProfile = false;
+        weight_t likelihood  = optEstimateEdgeLikelihood(predBlock, block, &fromProfile);
+        hasProfWeight &= fromProfile;
+
+        weight_t contribution = predBlock->bbWeight * likelihood;
+        JITDUMP("  Estimated likelihood " FMT_BB " -> " FMT_BB " to be " FMT_WT " (contribution: " FMT_WT ")\n",
+                predBlock->bbNum, block->bbNum, likelihood, contribution);
+
+        newWeight += contribution;
+
+        // Normalize pred -> new block weight
+        edge->setEdgeWeights(contribution, contribution, block);
     }
 
-    preheader->bbWeight = preheaderWeight;
+    block->RemoveFlags(BBF_PROF_WEIGHT | BBF_RUN_RARELY);
+
+    block->bbWeight = newWeight;
     if (hasProfWeight)
     {
-        preheader->SetFlags(BBF_PROF_WEIGHT);
+        block->SetFlags(BBF_PROF_WEIGHT);
     }
 
-    if (preheaderWeight == BB_ZERO_WEIGHT)
+    if (newWeight == BB_ZERO_WEIGHT)
     {
-        preheader->SetFlags(BBF_RUN_RARELY);
+        block->SetFlags(BBF_RUN_RARELY);
         return;
     }
 
-    // Normalize preheader -> header weight
-    FlowEdge* const edgeFromPreheader = fgGetPredForBlock(loop->GetHeader(), preheader);
-    assert(edgeFromPreheader != nullptr);
-    edgeFromPreheader->setEdgeWeights(preheader->bbWeight, preheader->bbWeight, loop->GetHeader());
+    // Normalize block -> target weight
+    FlowEdge* const edgeFromBlock = block->GetTargetEdge();
+    assert(edgeFromBlock != nullptr);
+    edgeFromBlock->setEdgeWeights(block->bbWeight, block->bbWeight, block->GetTarget());
 }
 
 /*****************************************************************************
@@ -3143,7 +3415,6 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu
         switch (oper)
         {
             /* Constants can usually be narrowed by changing their value */
-            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifndef TARGET_64BIT
             __int64 lval;
@@ -3245,8 +3516,8 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu
 
                 return true;
 
-            /* Operands that are in memory can usually be narrowed
-               simply by changing their gtType */
+                /* Operands that are in memory can usually be narrowed
+                   simply by changing their gtType */
 
             case GT_LCL_VAR:
                 /* We only allow narrowing long -> int for a GT_LCL_VAR */
@@ -3504,7 +3775,8 @@ void Compiler::optRecordSsaUses(GenTree* tree, BasicBlock* block)
         };
 
         SsaRecordingVisitor(Compiler* compiler, BasicBlock* block)
-            : GenTreeVisitor<SsaRecordingVisitor>(compiler), m_block(block)
+            : GenTreeVisitor<SsaRecordingVisitor>(compiler)
+            , m_block(block)
         {
         }
 
@@ -4341,7 +4613,11 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop*    loop,
             const char* m_failReason;
 #endif
 
-            Value(GenTree* node) : m_node(node), m_hoistable(false), m_cctorDependent(false), m_invariant(false)
+            Value(GenTree* node)
+                : m_node(node)
+                , m_hoistable(false)
+                , m_cctorDependent(false)
+                , m_invariant(false)
             {
 #ifdef DEBUG
                 m_failReason = "unset";
@@ -4377,6 +4653,15 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop*    loop,
                 // hence this check is not present in optIsCSEcandidate().
                 return true;
             }
+            else if ((node->gtFlags & GTF_ORDER_SIDEEFF) != 0)
+            {
+                // If a node has an order side effect, we can't hoist it at all: we don't know what the order
+                // dependence actually is. For example, assertion prop might have determined a node can't throw
+                // an exception, and eliminated the GTF_EXCEPT flag, replacing it with GTF_ORDER_SIDEEFF. We
+                // can't hoist because we might then hoist above the expression that led assertion prop to make
+                // that decision. This can happen in JitOptRepeat, where hoisting can follow assertion prop.
+                return false;
+            }
 
             // Tree must be a suitable CSE candidate for us to be able to hoist it.
             return m_compiler->optIsCSEcandidate(node);
@@ -4532,9 +4817,9 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop*    loop,
                 // To be invariant the variable must be in SSA ...
                 bool isInvariant = lclVar->HasSsaName();
                 // and the SSA definition must be outside the loop we're hoisting from ...
-                isInvariant = isInvariant &&
-                              !m_loop->ContainsBlock(
-                                  m_compiler->lvaGetDesc(lclNum)->GetPerSsaData(lclVar->GetSsaNum())->GetBlock());
+                isInvariant =
+                    isInvariant && !m_loop->ContainsBlock(
+                                       m_compiler->lvaGetDesc(lclNum)->GetPerSsaData(lclVar->GetSsaNum())->GetBlock());
 
                 // and the VN of the tree is considered invariant as well.
                 //
@@ -4567,7 +4852,7 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop*    loop,
                 }
                 else if (!top.m_hoistable)
                 {
-                    top.m_failReason = "not handled by cse";
+                    top.m_failReason = "not handled by hoisting or CSE";
                 }
 #endif
 
@@ -4650,7 +4935,7 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop*    loop,
                     treeIsHoistable = IsNodeHoistable(tree);
                     if (!treeIsHoistable)
                     {
-                        INDEBUG(failReason = "not handled by cse";)
+                        INDEBUG(failReason = "not handled by hoisting or CSE";)
                     }
                 }
 
@@ -4948,6 +5233,21 @@ void Compiler::optHoistCandidate(GenTree*              tree,
         return;
     }
 
+#if defined(DEBUG)
+
+    // Punt if we've reached the hoisting limit.
+    int      limit   = JitConfig.JitHoistLimit();
+    unsigned current = m_totalHoistedExpressions; // this doesn't include the current candidate yet
+
+    if ((limit >= 0) && (current >= static_cast<unsigned>(limit)))
+    {
+        JITDUMP("   ... not hoisting in " FMT_LP ", hoist count %u >= JitHoistLimit %u\n", loop->GetIndex(), current,
+                static_cast<unsigned>(limit));
+        return;
+    }
+
+#endif // defined(DEBUG)
+
     // Expression can be hoisted
     optPerformHoistExpr(tree, treeBb, loop);
 
@@ -4977,6 +5277,8 @@ void Compiler::optHoistCandidate(GenTree*              tree,
 
     // Record the hoisted expression in hoistCtxt
     hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
+
+    Metrics.HoistedExpressions++;
 }
 
 bool Compiler::optVNIsLoopInvariant(ValueNum vn, FlowGraphNaturalLoop* loop, VNSet* loopVnInvariantCache)
@@ -5083,43 +5385,39 @@ bool Compiler::optVNIsLoopInvariant(ValueNum vn, FlowGraphNaturalLoop* loop, VNS
 }
 
 //------------------------------------------------------------------------------
-// fgSetEHRegionForNewPreheader: Set the EH region for a newly inserted
-// preheader.
-//
-// In which EH region should the header live?
+// fgSetEHRegionForNewPreheaderOrExit: Set the EH region for a newly inserted
+// preheader or exit block.
 //
-// The preheader block is expected to have been added immediately before a
-// block `next` in the loop that is also in the same EH region as the header.
-// This is usually the lexically first block of the loop, but may also be the
-// header itself.
+// In which EH region should the block live?
 //
-// If the `next` block is NOT the first block of a `try` region, the preheader
-// can simply extend the header block's EH region.
+// If the `next` block is NOT the first block of a `try` region, the new block
+// can simply extend the next block's EH region.
 //
 // If the `next` block IS the first block of a `try`, we find its parent region
 // and use that. For mutual-protect regions, we need to find the actual parent,
 // as the block stores the most "nested" mutual region. For non-mutual-protect
 // regions, due to EH canonicalization, we are guaranteed that no other EH
 // regions begin on the same block, so looking to just the parent is
-// sufficient. Note that we can't just extend the EH region of the header to
-// the preheader, because the header will still be the target of backward
-// branches from within the loop. If those backward branches come from outside
-// the `try` (say, only the top half of the loop is a `try` region), then we
-// can't branch to a non-first `try` region block (you always must enter the
-// `try` in the first block).
+// sufficient.
+// Note that we can't just extend the EH region of the next block to the new
+// block, because it may still be the target of other branches. If those
+// branches come from outside the `try` then we can't branch to a non-first
+// `try` region block (you always must enter the `try` in the first block). For
+// example, for the preheader we can have backedges that come from outside the
+// `try` (if, say, only the top half of the loop is a `try` region). For exits,
+// we could similarly have branches to the old exit block from outside the `try`.
 //
 // Note that hoisting any code out of a try region, for example, to a preheader
 // block in a different EH region, needs to ensure that no exceptions will be
-// thrown.
+// thrown. Similar considerations are required for exits.
 //
 // Arguments:
-//    preheader - the new preheader block, which has already been added to the
-//                block list before a block inside the loop that shares EH
-//                region with the header.
+//    block - the new block, which has already been added to the
+//            block list.
 //
-void Compiler::fgSetEHRegionForNewPreheader(BasicBlock* preheader)
+void Compiler::fgSetEHRegionForNewPreheaderOrExit(BasicBlock* block)
 {
-    BasicBlock* next = preheader->Next();
+    BasicBlock* next = block->Next();
 
     if (bbIsTryBeg(next))
     {
@@ -5129,15 +5427,15 @@ void Compiler::fgSetEHRegionForNewPreheader(BasicBlock* preheader)
         if (newTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
         {
             // No EH try index.
-            preheader->clearTryIndex();
+            block->clearTryIndex();
         }
         else
         {
-            preheader->setTryIndex(newTryIndex);
+            block->setTryIndex(newTryIndex);
         }
 
         // What handler region to use? Use the same handler region as `next`.
-        preheader->copyHndIndex(next);
+        block->copyHndIndex(next);
     }
     else
     {
@@ -5174,7 +5472,9 @@ PhaseStatus Compiler::fgCanonicalizeFirstBB()
     return PhaseStatus::MODIFIED_EVERYTHING;
 }
 
-LoopSideEffects::LoopSideEffects() : VarInOut(VarSetOps::UninitVal()), VarUseDef(VarSetOps::UninitVal())
+LoopSideEffects::LoopSideEffects()
+    : VarInOut(VarSetOps::UninitVal())
+    , VarUseDef(VarSetOps::UninitVal())
 {
     for (MemoryKind mk : allMemoryKinds())
     {
@@ -5447,7 +5747,6 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk, FlowGraphNatura
                 case GT_XCHG:
                 case GT_CMPXCHG:
                 case GT_MEMORYBARRIER:
-                case GT_STORE_DYN_BLK:
                 {
                     memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
                 }
@@ -5723,21 +6022,21 @@ typedef JitHashTable<unsigned, JitSmallPrimitiveKeyFuncs<unsigned>, unsigned> Lc
 // Notes:
 //    This phase iterates over basic blocks starting with the first basic block until there is no unique
 //    basic block successor or until it detects a loop. It keeps track of local nodes it encounters.
-//    When it gets to an assignment to a local variable or a local field, it checks whether the assignment
+//    When it gets to a store to a local variable or a local field, it checks whether the store
 //    is the first reference to the local (or to the parent of the local field), and, if so,
 //    it may do one of two optimizations:
 //      1. If the following conditions are true:
 //            the local is untracked,
-//            the rhs of the assignment is 0,
+//            the value to store is 0,
 //            the local is guaranteed to be fully initialized in the prolog,
 //         then the explicit zero initialization is removed.
 //      2. If the following conditions are true:
-//            the assignment is to a local (and not a field),
-//            the local is not lvLiveInOutOfHndlr or no exceptions can be thrown between the prolog and the assignment,
-//            either the local has no gc pointers or there are no gc-safe points between the prolog and the assignment,
+//            the store is to a local (and not a field),
+//            the local is not lvLiveInOutOfHndlr or no exceptions can be thrown between the prolog and the store,
+//            either the local has no gc pointers or there are no gc-safe points between the prolog and the store,
 //         then the local is marked with lvHasExplicitInit which tells the codegen not to insert zero initialization
 //         for this local in the prolog.
-
+//
 void Compiler::optRemoveRedundantZeroInits()
 {
 #ifdef DEBUG
@@ -5880,7 +6179,7 @@ void Compiler::optRemoveRedundantZeroInits()
                             break;
                         }
 
-                        // The local hasn't been referenced before this assignment.
+                        // The local hasn't been referenced before this store.
                         bool removedExplicitZeroInit = false;
                         bool isEntire                = !tree->IsPartialLclFld(this);
 
@@ -5902,7 +6201,7 @@ void Compiler::optRemoveRedundantZeroInits()
                                 {
                                     // We are guaranteed to have a zero initialization in the prolog or a
                                     // dominating explicit zero initialization and the local hasn't been redefined
-                                    // between the prolog and this explicit zero initialization so the assignment
+                                    // between the prolog and this explicit zero initialization so the store
                                     // can be safely removed.
                                     if (tree == stmt->GetRootNode())
                                     {
diff --git a/src/coreclr/jit/patchpoint.cpp b/src/coreclr/jit/patchpoint.cpp
index 27b94470962e..71622ecfc3d7 100644
--- a/src/coreclr/jit/patchpoint.cpp
+++ b/src/coreclr/jit/patchpoint.cpp
@@ -34,7 +34,9 @@ class PatchpointTransformer
     Compiler* compiler;
 
 public:
-    PatchpointTransformer(Compiler* compiler) : ppCounterLclNum(BAD_VAR_NUM), compiler(compiler)
+    PatchpointTransformer(Compiler* compiler)
+        : ppCounterLclNum(BAD_VAR_NUM)
+        , compiler(compiler)
     {
     }
 
@@ -101,13 +103,12 @@ class PatchpointTransformer
     // Arguments:
     //    jumpKind - jump kind for the new basic block
     //    insertAfter - basic block, after which compiler has to insert the new one.
-    //    jumpDest - jump target for the new basic block. Defaults to nullptr.
     //
     // Return Value:
     //    new basic block.
-    BasicBlock* CreateAndInsertBasicBlock(BBKinds jumpKind, BasicBlock* insertAfter, BasicBlock* jumpDest = nullptr)
+    BasicBlock* CreateAndInsertBasicBlock(BBKinds jumpKind, BasicBlock* insertAfter)
     {
-        BasicBlock* block = compiler->fgNewBBafter(jumpKind, insertAfter, true, jumpDest);
+        BasicBlock* block = compiler->fgNewBBafter(jumpKind, insertAfter, true);
         block->SetFlags(BBF_IMPORTED);
         return block;
     }
@@ -143,21 +144,21 @@ class PatchpointTransformer
 
         // Current block now becomes the test block
         BasicBlock* remainderBlock = compiler->fgSplitBlockAtBeginning(block);
-        BasicBlock* helperBlock    = CreateAndInsertBasicBlock(BBJ_ALWAYS, block, block->Next());
+        BasicBlock* helperBlock    = CreateAndInsertBasicBlock(BBJ_ALWAYS, block);
 
         // Update flow and flags
-        block->SetCond(remainderBlock, helperBlock);
         block->SetFlags(BBF_INTERNAL);
+        helperBlock->SetFlags(BBF_BACKWARD_JUMP);
 
-        helperBlock->SetFlags(BBF_BACKWARD_JUMP | BBF_NONE_QUIRK);
-
+        assert(block->TargetIs(remainderBlock));
         FlowEdge* const falseEdge = compiler->fgAddRefPred(helperBlock, block);
-        FlowEdge* const trueEdge  = compiler->fgGetPredForBlock(remainderBlock, block);
+        FlowEdge* const trueEdge  = block->GetTargetEdge();
         trueEdge->setLikelihood(HIGH_PROBABILITY / 100.0);
         falseEdge->setLikelihood((100 - HIGH_PROBABILITY) / 100.0);
+        block->SetCond(trueEdge, falseEdge);
 
         FlowEdge* const newEdge = compiler->fgAddRefPred(remainderBlock, helperBlock);
-        newEdge->setLikelihood(1.0);
+        helperBlock->SetTargetEdge(newEdge);
 
         // Update weights
         remainderBlock->inheritWeight(block);
@@ -238,7 +239,7 @@ class PatchpointTransformer
         }
 
         // Update flow
-        block->SetKindAndTarget(BBJ_THROW);
+        block->SetKindAndTargetEdge(BBJ_THROW);
 
         // Add helper call
         //
diff --git a/src/coreclr/jit/phase.cpp b/src/coreclr/jit/phase.cpp
index 717d0a7d270d..199167d7d0c5 100644
--- a/src/coreclr/jit/phase.cpp
+++ b/src/coreclr/jit/phase.cpp
@@ -80,7 +80,15 @@ void Phase::PrePhase()
         }
         else
         {
-            printf("\n*************** Starting PHASE %s\n", m_name);
+            if (comp->opts.optRepeatActive)
+            {
+                printf("\n*************** Starting PHASE %s (OptRepeat iteration %d of %d)\n", m_name,
+                       comp->opts.optRepeatIteration, comp->opts.optRepeatCount);
+            }
+            else
+            {
+                printf("\n*************** Starting PHASE %s\n", m_name);
+            }
         }
     }
 #endif // DEBUG
@@ -124,7 +132,15 @@ void Phase::PostPhase(PhaseStatus status)
         }
         else
         {
-            printf("\n*************** Finishing PHASE %s%s\n", m_name, statusMessage);
+            if (comp->opts.optRepeatActive)
+            {
+                printf("\n*************** Finishing PHASE %s%s (OptRepeat iteration %d of %d)\n", m_name, statusMessage,
+                       comp->opts.optRepeatIteration, comp->opts.optRepeatCount);
+            }
+            else
+            {
+                printf("\n*************** Finishing PHASE %s%s\n", m_name, statusMessage);
+            }
         }
 
         if (doPostPhase && doPostPhaseDumps)
@@ -171,10 +187,7 @@ void Phase::PostPhase(PhaseStatus status)
             comp->fgDebugCheckLinkedLocals();
         }
 
-        if (comp->m_dfsTree != nullptr)
-        {
-            comp->fgDebugCheckDfsTree();
-        }
+        comp->fgDebugCheckFlowGraphAnnotations();
     }
 #endif // DEBUG
 }
diff --git a/src/coreclr/jit/phase.h b/src/coreclr/jit/phase.h
index 6288d596729d..0f3d461c2b13 100644
--- a/src/coreclr/jit/phase.h
+++ b/src/coreclr/jit/phase.h
@@ -34,14 +34,17 @@ class Phase
     virtual void Run();
 
 protected:
-    Phase(Compiler* _compiler, Phases _phase) : comp(_compiler), m_name(nullptr), m_phase(_phase)
+    Phase(Compiler* _compiler, Phases _phase)
+        : comp(_compiler)
+        , m_name(nullptr)
+        , m_phase(_phase)
     {
         m_name = PhaseNames[_phase];
     }
 
     virtual void        PrePhase();
     virtual PhaseStatus DoPhase() = 0;
-    virtual void PostPhase(PhaseStatus status);
+    virtual void        PostPhase(PhaseStatus status);
 
     Compiler*   comp;
     const char* m_name;
@@ -54,7 +57,9 @@ template <typename A>
 class ActionPhase final : public Phase
 {
 public:
-    ActionPhase(Compiler* _compiler, Phases _phase, A _action) : Phase(_compiler, _phase), action(_action)
+    ActionPhase(Compiler* _compiler, Phases _phase, A _action)
+        : Phase(_compiler, _phase)
+        , action(_action)
     {
     }
 
@@ -84,7 +89,8 @@ class CompilerPhase final : public Phase
 {
 public:
     CompilerPhase(Compiler* _compiler, Phases _phase, void (Compiler::*_action)())
-        : Phase(_compiler, _phase), action(_action)
+        : Phase(_compiler, _phase)
+        , action(_action)
     {
     }
 
@@ -114,7 +120,8 @@ class CompilerPhaseWithStatus final : public Phase
 {
 public:
     CompilerPhaseWithStatus(Compiler* _compiler, Phases _phase, PhaseStatus (Compiler::*_action)())
-        : Phase(_compiler, _phase), action(_action)
+        : Phase(_compiler, _phase)
+        , action(_action)
     {
     }
 
diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp
index e11d0619905e..e02a5f0e06ba 100644
--- a/src/coreclr/jit/promotion.cpp
+++ b/src/coreclr/jit/promotion.cpp
@@ -80,7 +80,9 @@ struct Access
 #endif
 
     Access(unsigned offset, var_types accessType, ClassLayout* layout)
-        : Layout(layout), Offset(offset), AccessType(accessType)
+        : Layout(layout)
+        , Offset(offset)
+        , AccessType(accessType)
     {
     }
 
@@ -220,7 +222,8 @@ bool AggregateInfo::OverlappingReplacements(unsigned      offset,
 //   numLocals - Number of locals to support in the map
 //
 AggregateInfoMap::AggregateInfoMap(CompAllocator allocator, unsigned numLocals)
-    : m_aggregates(allocator), m_numLocals(numLocals)
+    : m_aggregates(allocator)
+    , m_numLocals(numLocals)
 {
     m_lclNumToAggregateIndex = new (allocator) unsigned[numLocals];
     for (unsigned i = 0; i < numLocals; i++)
@@ -277,7 +280,9 @@ struct PrimitiveAccess
     unsigned  Offset;
     var_types AccessType;
 
-    PrimitiveAccess(unsigned offset, var_types accessType) : Offset(offset), AccessType(accessType)
+    PrimitiveAccess(unsigned offset, var_types accessType)
+        : Offset(offset)
+        , AccessType(accessType)
     {
     }
 };
@@ -290,7 +295,8 @@ class LocalUses
 
 public:
     LocalUses(Compiler* comp)
-        : m_accesses(comp->getAllocator(CMK_Promotion)), m_inducedAccesses(comp->getAllocator(CMK_Promotion))
+        : m_accesses(comp->getAllocator(CMK_Promotion))
+        , m_inducedAccesses(comp->getAllocator(CMK_Promotion))
     {
     }
 
@@ -973,7 +979,7 @@ class LocalsUseVisitor : public GenTreeVisitor<LocalsUseVisitor>
         , m_prom(prom)
         , m_candidateStores(prom->m_compiler->getAllocator(CMK_Promotion))
     {
-        m_uses = new (prom->m_compiler, CMK_Promotion) LocalUses*[prom->m_compiler->lvaCount]{};
+        m_uses = new (prom->m_compiler, CMK_Promotion) LocalUses* [prom->m_compiler->lvaCount] {};
     }
 
     //------------------------------------------------------------------------
@@ -1205,6 +1211,8 @@ class LocalsUseVisitor : public GenTreeVisitor<LocalsUseVisitor>
             }
         }
 
+        m_compiler->Metrics.PhysicallyPromotedFields += totalNumPromotions;
+
         if (totalNumPromotions <= 0)
         {
             return false;
@@ -1219,13 +1227,8 @@ class LocalsUseVisitor : public GenTreeVisitor<LocalsUseVisitor>
             for (Replacement& rep : reps)
             {
 #ifdef DEBUG
-                char buf[32];
-                sprintf_s(buf, sizeof(buf), "V%02u.[%03u..%03u)", agg->LclNum, rep.Offset,
-                          rep.Offset + genTypeSize(rep.AccessType));
-                size_t len  = strlen(buf) + 1;
-                char*  bufp = new (m_compiler, CMK_DebugOnly) char[len];
-                strcpy_s(bufp, len, buf);
-                rep.Description = bufp;
+                rep.Description = m_compiler->printfAlloc("V%02u.[%03u..%03u)", agg->LclNum, rep.Offset,
+                                                          rep.Offset + genTypeSize(rep.AccessType));
 #endif
 
                 rep.LclNum     = m_compiler->lvaGrabTemp(false DEBUGARG(rep.Description));
@@ -2272,7 +2275,9 @@ void ReplaceVisitor::InsertPreStatementWriteBacks()
             DoPreOrder = true,
         };
 
-        Visitor(Compiler* comp, ReplaceVisitor* replacer) : GenTreeVisitor(comp), m_replacer(replacer)
+        Visitor(Compiler* comp, ReplaceVisitor* replacer)
+            : GenTreeVisitor(comp)
+            , m_replacer(replacer)
         {
         }
 
@@ -2719,8 +2724,8 @@ void ReplaceVisitor::WriteBackBeforeUse(GenTree** use, unsigned lcl, unsigned of
 
         GenTreeOp* comma = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(),
                                                      Promotion::CreateWriteBack(m_compiler, lcl, rep), *use);
-        *use = comma;
-        use  = &comma->gtOp2;
+        *use             = comma;
+        use              = &comma->gtOp2;
 
         ClearNeedsWriteBack(rep);
         m_madeChanges = true;
diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h
index c421b019bc8f..89097d78cd10 100644
--- a/src/coreclr/jit/promotion.h
+++ b/src/coreclr/jit/promotion.h
@@ -31,7 +31,9 @@ struct Replacement
     const char* Description = "";
 #endif
 
-    Replacement(unsigned offset, var_types accessType) : Offset(offset), AccessType(accessType)
+    Replacement(unsigned offset, var_types accessType)
+        : Offset(offset)
+        , AccessType(accessType)
     {
     }
 
@@ -55,7 +57,9 @@ class StructSegments
         {
         }
 
-        Segment(unsigned start, unsigned end) : Start(start), End(end)
+        Segment(unsigned start, unsigned end)
+            : Start(start)
+            , End(end)
         {
         }
 
@@ -69,7 +73,8 @@ class StructSegments
     jitstd::vector<Segment> m_segments;
 
 public:
-    explicit StructSegments(CompAllocator allocator) : m_segments(allocator)
+    explicit StructSegments(CompAllocator allocator)
+        : m_segments(allocator)
     {
     }
 
@@ -96,7 +101,10 @@ struct AggregateInfo
     // Max offset in the struct local of the unpromoted part.
     unsigned UnpromotedMax = 0;
 
-    AggregateInfo(CompAllocator alloc, unsigned lclNum) : Replacements(alloc), LclNum(lclNum), Unpromoted(alloc)
+    AggregateInfo(CompAllocator alloc, unsigned lclNum)
+        : Replacements(alloc)
+        , LclNum(lclNum)
+        , Unpromoted(alloc)
     {
     }
 
@@ -115,7 +123,7 @@ class AggregateInfoMap
 
 public:
     AggregateInfoMap(CompAllocator allocator, unsigned numLocals);
-    void Add(AggregateInfo* agg);
+    void           Add(AggregateInfo* agg);
     AggregateInfo* Lookup(unsigned lclNum);
 
     jitstd::vector<AggregateInfo*>::iterator begin()
@@ -146,10 +154,10 @@ class Promotion
 
     StructSegments SignificantSegments(ClassLayout* layout);
 
-    void ExplicitlyZeroInitReplacementLocals(unsigned                           lclNum,
-                                             const jitstd::vector<Replacement>& replacements,
-                                             Statement**                        prevStmt);
-    void InsertInitStatement(Statement** prevStmt, GenTree* tree);
+    void            ExplicitlyZeroInitReplacementLocals(unsigned                           lclNum,
+                                                        const jitstd::vector<Replacement>& replacements,
+                                                        Statement**                        prevStmt);
+    void            InsertInitStatement(Statement** prevStmt, GenTree* tree);
     static GenTree* CreateWriteBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement);
     static GenTree* CreateReadBack(Compiler* compiler, unsigned structLclNum, const Replacement& replacement);
 
@@ -198,11 +206,12 @@ class Promotion
 
     bool HaveCandidateLocals();
 
-    static bool IsCandidateForPhysicalPromotion(LclVarDsc* dsc);
+    static bool     IsCandidateForPhysicalPromotion(LclVarDsc* dsc);
     static GenTree* EffectiveUser(Compiler::GenTreeStack& ancestors);
 
 public:
-    explicit Promotion(Compiler* compiler) : m_compiler(compiler)
+    explicit Promotion(Compiler* compiler)
+        : m_compiler(compiler)
     {
     }
 
@@ -218,12 +227,15 @@ class StructDeaths
     friend class PromotionLiveness;
 
 private:
-    StructDeaths(BitVec deaths, AggregateInfo* agg) : m_deaths(deaths), m_aggregate(agg)
+    StructDeaths(BitVec deaths, AggregateInfo* agg)
+        : m_deaths(deaths)
+        , m_aggregate(agg)
     {
     }
 
 public:
-    StructDeaths() : m_deaths(BitVecOps::UninitVal())
+    StructDeaths()
+        : m_deaths(BitVecOps::UninitVal())
     {
     }
 
@@ -236,26 +248,28 @@ struct BasicBlockLiveness;
 // Class to compute and track liveness information pertaining promoted structs.
 class PromotionLiveness
 {
-    Compiler*           m_compiler;
-    AggregateInfoMap&   m_aggregates;
-    BitVecTraits*       m_bvTraits                = nullptr;
-    unsigned*           m_structLclToTrackedIndex = nullptr;
-    unsigned            m_numVars                 = 0;
-    BasicBlockLiveness* m_bbInfo                  = nullptr;
-    bool                m_hasPossibleBackEdge     = false;
-    BitVec              m_liveIn;
-    BitVec              m_ehLiveVars;
+    Compiler*                                               m_compiler;
+    AggregateInfoMap&                                       m_aggregates;
+    BitVecTraits*                                           m_bvTraits                = nullptr;
+    unsigned*                                               m_structLclToTrackedIndex = nullptr;
+    unsigned                                                m_numVars                 = 0;
+    BasicBlockLiveness*                                     m_bbInfo                  = nullptr;
+    bool                                                    m_hasPossibleBackEdge     = false;
+    BitVec                                                  m_liveIn;
+    BitVec                                                  m_ehLiveVars;
     JitHashTable<GenTree*, JitPtrKeyFuncs<GenTree>, BitVec> m_aggDeaths;
 
 public:
     PromotionLiveness(Compiler* compiler, AggregateInfoMap& aggregates)
-        : m_compiler(compiler), m_aggregates(aggregates), m_aggDeaths(compiler->getAllocator(CMK_Promotion))
+        : m_compiler(compiler)
+        , m_aggregates(aggregates)
+        , m_aggDeaths(compiler->getAllocator(CMK_Promotion))
     {
     }
 
-    void Run();
-    bool IsReplacementLiveIn(BasicBlock* bb, unsigned structLcl, unsigned replacement);
-    bool IsReplacementLiveOut(BasicBlock* bb, unsigned structLcl, unsigned replacement);
+    void         Run();
+    bool         IsReplacementLiveIn(BasicBlock* bb, unsigned structLcl, unsigned replacement);
+    bool         IsReplacementLiveOut(BasicBlock* bb, unsigned structLcl, unsigned replacement);
     StructDeaths GetDeathsForStructLocal(GenTreeLclVarCommon* use);
 
 private:
@@ -297,7 +311,10 @@ class ReplaceVisitor : public GenTreeVisitor<ReplaceVisitor>
     };
 
     ReplaceVisitor(Promotion* prom, AggregateInfoMap& aggregates, PromotionLiveness* liveness)
-        : GenTreeVisitor(prom->m_compiler), m_promotion(prom), m_aggregates(aggregates), m_liveness(liveness)
+        : GenTreeVisitor(prom->m_compiler)
+        , m_promotion(prom)
+        , m_aggregates(aggregates)
+        , m_liveness(liveness)
     {
     }
 
diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp
index 693e0ed30a2b..d4f71b998352 100644
--- a/src/coreclr/jit/promotiondecomposition.cpp
+++ b/src/coreclr/jit/promotiondecomposition.cpp
@@ -275,7 +275,9 @@ class DecompositionPlan
         var_types PrimitiveType;
 
         RemainderStrategy(int type, unsigned primitiveOffset = 0, var_types primitiveType = TYP_UNDEF)
-            : Type(type), PrimitiveOffset(primitiveOffset), PrimitiveType(primitiveType)
+            : Type(type)
+            , PrimitiveOffset(primitiveOffset)
+            , PrimitiveType(primitiveType)
         {
         }
     };
@@ -727,8 +729,8 @@ class DecompositionPlan
     //   remainderStrategy - The strategy we are using for the remainder
     //   dump              - Whether to JITDUMP decisions made
     //
-    bool CanSkipEntry(const Entry&             entry,
-                      const StructDeaths&      deaths,
+    bool CanSkipEntry(const Entry&                               entry,
+                      const StructDeaths&                        deaths,
                       const RemainderStrategy& remainderStrategy DEBUGARG(bool dump = false))
     {
         if (entry.ToReplacement != nullptr)
@@ -760,7 +762,7 @@ class DecompositionPlan
             // If the destination has replacements we still have usable
             // liveness information for the remainder. This case happens if the
             // source was also promoted.
-            if (m_dstInvolvesReplacements && deaths.IsRemainderDying())
+            if (m_dstInvolvesReplacements && !m_hasNonRemainderUseOfStructLocal && deaths.IsRemainderDying())
             {
 #ifdef DEBUG
                 if (dump)
diff --git a/src/coreclr/jit/rangecheck.cpp b/src/coreclr/jit/rangecheck.cpp
index 6ac1c1c01646..2328dba7d636 100644
--- a/src/coreclr/jit/rangecheck.cpp
+++ b/src/coreclr/jit/rangecheck.cpp
@@ -96,7 +96,7 @@ bool RangeCheck::BetweenBounds(Range& range, GenTree* upper, int arrSize)
 #ifdef DEBUG
     if (m_pCompiler->verbose)
     {
-        printf("%s BetweenBounds <%d, ", range.ToString(m_pCompiler->getAllocatorDebugOnly()), 0);
+        printf("%s BetweenBounds <%d, ", range.ToString(m_pCompiler), 0);
         Compiler::printTreeID(upper);
         printf(">\n");
     }
@@ -359,7 +359,7 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, Statement* stmt, GenTree*
         return;
     }
 
-    JITDUMP("Range value %s\n", range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+    JITDUMP("Range value %s\n", range.ToString(m_pCompiler));
     m_pSearchPath->RemoveAll();
     Widen(block, treeIndex, &range);
 
@@ -461,7 +461,9 @@ bool RangeCheck::IsMonotonicallyIncreasing(GenTree* expr, bool rejectNegativeCon
     }
 
     // Remove hashtable entry for expr when we exit the present scope.
-    auto                                         code = [this, expr] { m_pSearchPath->Remove(expr); };
+    auto code = [this, expr] {
+        m_pSearchPath->Remove(expr);
+    };
     jitstd::utility::scoped_code<decltype(code)> finally(code);
 
     if (m_pSearchPath->GetCount() > MAX_SEARCH_DEPTH)
@@ -484,7 +486,7 @@ bool RangeCheck::IsMonotonicallyIncreasing(GenTree* expr, bool rejectNegativeCon
             return true;
         }
     }
-    // If the rhs expr is local, then try to find the def of the local.
+    // If the expr is local, then try to find the def of the local.
     else if (expr->IsLocal())
     {
         LclSsaVarDsc* ssaDef = GetSsaDefStore(expr->AsLclVarCommon());
@@ -519,7 +521,7 @@ bool RangeCheck::IsMonotonicallyIncreasing(GenTree* expr, bool rejectNegativeCon
     return false;
 }
 
-// Given a lclvar use, try to find the lclvar's defining assignment and its containing block.
+// Given a lclvar use, try to find the lclvar's defining store and its containing block.
 LclSsaVarDsc* RangeCheck::GetSsaDefStore(GenTreeLclVarCommon* lclUse)
 {
     unsigned ssaNum = lclUse->GetSsaNum();
@@ -917,7 +919,7 @@ void RangeCheck::MergeEdgeAssertions(ValueNum normalLclVN, ASSERT_VALARG_TP asse
                 break;
         }
         JITDUMP("The range after edge merging:");
-        JITDUMP(pRange->ToString(m_pCompiler->getAllocatorDebugOnly()));
+        JITDUMP(pRange->ToString(m_pCompiler));
         JITDUMP("\n");
     }
 }
@@ -1007,7 +1009,7 @@ Range RangeCheck::ComputeRangeForBinOp(BasicBlock* block, GenTreeOp* binop, bool
         if (icon >= 0)
         {
             Range range(Limit(Limit::keConstant, 0), Limit(Limit::keConstant, icon));
-            JITDUMP("Limit range to %s\n", range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+            JITDUMP("Limit range to %s\n", range.ToString(m_pCompiler));
             return range;
         }
         // Generalized range computation not implemented for these operators
@@ -1068,32 +1070,28 @@ Range RangeCheck::ComputeRangeForBinOp(BasicBlock* block, GenTreeOp* binop, bool
     if (binop->OperIs(GT_ADD))
     {
         r = RangeOps::Add(op1Range, op2Range);
-        JITDUMP("BinOp add ranges %s %s = %s\n", op1Range.ToString(m_pCompiler->getAllocatorDebugOnly()),
-                op2Range.ToString(m_pCompiler->getAllocatorDebugOnly()),
-                r.ToString(m_pCompiler->getAllocatorDebugOnly()));
+        JITDUMP("BinOp add ranges %s %s = %s\n", op1Range.ToString(m_pCompiler), op2Range.ToString(m_pCompiler),
+                r.ToString(m_pCompiler));
     }
     else if (binop->OperIs(GT_MUL))
     {
         r = RangeOps::Multiply(op1Range, op2Range);
-        JITDUMP("BinOp multiply ranges %s %s = %s\n", op1Range.ToString(m_pCompiler->getAllocatorDebugOnly()),
-                op2Range.ToString(m_pCompiler->getAllocatorDebugOnly()),
-                r.ToString(m_pCompiler->getAllocatorDebugOnly()));
+        JITDUMP("BinOp multiply ranges %s %s = %s\n", op1Range.ToString(m_pCompiler), op2Range.ToString(m_pCompiler),
+                r.ToString(m_pCompiler));
     }
     else if (binop->OperIs(GT_LSH))
     {
         // help the next step a bit, convert the LSH rhs to a multiply
         Range convertedOp2Range = RangeOps::ConvertShiftToMultiply(op2Range);
         r                       = RangeOps::Multiply(op1Range, convertedOp2Range);
-        JITDUMP("BinOp multiply ranges %s %s = %s\n", op1Range.ToString(m_pCompiler->getAllocatorDebugOnly()),
-                convertedOp2Range.ToString(m_pCompiler->getAllocatorDebugOnly()),
-                r.ToString(m_pCompiler->getAllocatorDebugOnly()));
+        JITDUMP("BinOp multiply ranges %s %s = %s\n", op1Range.ToString(m_pCompiler),
+                convertedOp2Range.ToString(m_pCompiler), r.ToString(m_pCompiler));
     }
     else if (binop->OperIs(GT_RSH))
     {
         r = RangeOps::ShiftRight(op1Range, op2Range);
-        JITDUMP("Right shift range: %s >> %s = %s\n", op1Range.ToString(m_pCompiler->getAllocatorDebugOnly()),
-                op2Range.ToString(m_pCompiler->getAllocatorDebugOnly()),
-                r.ToString(m_pCompiler->getAllocatorDebugOnly()));
+        JITDUMP("Right shift range: %s >> %s = %s\n", op1Range.ToString(m_pCompiler), op2Range.ToString(m_pCompiler),
+                r.ToString(m_pCompiler));
     }
     return r;
 }
@@ -1127,7 +1125,7 @@ Range RangeCheck::GetRangeFromType(var_types type)
 // Compute the range for a local var definition.
 Range RangeCheck::ComputeRangeForLocalDef(BasicBlock*          block,
                                           GenTreeLclVarCommon* lcl,
-                                          bool monIncreasing DEBUGARG(int indent))
+                                          bool monIncreasing   DEBUGARG(int indent))
 {
     LclSsaVarDsc* ssaDef = GetSsaDefStore(lcl);
     if (ssaDef == nullptr)
@@ -1265,9 +1263,8 @@ bool RangeCheck::DoesBinOpOverflow(BasicBlock* block, GenTreeOp* binop)
         return true;
     }
 
-    JITDUMP("Checking bin op overflow %s %s %s\n", GenTree::OpName(binop->OperGet()),
-            op1Range->ToString(m_pCompiler->getAllocatorDebugOnly()),
-            op2Range->ToString(m_pCompiler->getAllocatorDebugOnly()));
+    JITDUMP("Checking bin op overflow %s %s %s\n", GenTree::OpName(binop->OperGet()), op1Range->ToString(m_pCompiler),
+            op2Range->ToString(m_pCompiler));
 
     if (binop->OperIs(GT_ADD))
     {
@@ -1482,16 +1479,15 @@ Range RangeCheck::ComputeRange(BasicBlock* block, GenTree* expr, bool monIncreas
             assert(!argRange.LowerLimit().IsUndef());
             assert(!argRange.UpperLimit().IsUndef());
             MergeAssertion(block, use.GetNode(), &argRange DEBUGARG(indent + 1));
-            JITDUMP("Merging ranges %s %s:", range.ToString(m_pCompiler->getAllocatorDebugOnly()),
-                    argRange.ToString(m_pCompiler->getAllocatorDebugOnly()));
+            JITDUMP("Merging ranges %s %s:", range.ToString(m_pCompiler), argRange.ToString(m_pCompiler));
             range = RangeOps::Merge(range, argRange, monIncreasing);
-            JITDUMP("%s\n", range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+            JITDUMP("%s\n", range.ToString(m_pCompiler));
         }
     }
     else if (varTypeIsSmall(expr))
     {
         range = GetRangeFromType(expr->TypeGet());
-        JITDUMP("%s\n", range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+        JITDUMP("%s\n", range.ToString(m_pCompiler));
     }
     else if (expr->OperIs(GT_COMMA))
     {
@@ -1546,7 +1542,7 @@ Range RangeCheck::GetRange(BasicBlock* block, GenTree* expr, bool monIncreasing
     {
         Indent(indent);
         JITDUMP("   %s Range [%06d] => %s\n", (pRange == nullptr) ? "Computed" : "Cached", Compiler::dspTreeID(expr),
-                range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+                range.ToString(m_pCompiler));
         Indent(indent);
         JITDUMP("}\n", expr);
     }
@@ -1572,7 +1568,10 @@ struct MapMethodDefsData
     BasicBlock* block;
     Statement*  stmt;
 
-    MapMethodDefsData(RangeCheck* rc, BasicBlock* block, Statement* stmt) : rc(rc), block(block), stmt(stmt)
+    MapMethodDefsData(RangeCheck* rc, BasicBlock* block, Statement* stmt)
+        : rc(rc)
+        , block(block)
+        , stmt(stmt)
     {
     }
 };
diff --git a/src/coreclr/jit/rangecheck.h b/src/coreclr/jit/rangecheck.h
index 8e63147c3122..9d7b06438717 100644
--- a/src/coreclr/jit/rangecheck.h
+++ b/src/coreclr/jit/rangecheck.h
@@ -83,20 +83,28 @@ struct Limit
         keUnknown,   // The limit could not be determined.
     };
 
-    Limit() : type(keUndef)
+    Limit()
+        : type(keUndef)
     {
     }
 
-    Limit(LimitType type) : type(type)
+    Limit(LimitType type)
+        : type(type)
     {
     }
 
-    Limit(LimitType type, int cns) : cns(cns), vn(ValueNumStore::NoVN), type(type)
+    Limit(LimitType type, int cns)
+        : cns(cns)
+        , vn(ValueNumStore::NoVN)
+        , type(type)
     {
         assert(type == keConstant);
     }
 
-    Limit(LimitType type, ValueNum vn, int cns) : cns(cns), vn(vn), type(type)
+    Limit(LimitType type, ValueNum vn, int cns)
+        : cns(cns)
+        , vn(vn)
+        , type(type)
     {
         assert(type == keBinOpArray);
     }
@@ -209,10 +217,8 @@ struct Limit
         return false;
     }
 #ifdef DEBUG
-    const char* ToString(CompAllocator alloc)
+    const char* ToString(Compiler* comp)
     {
-        unsigned size = 64;
-        char*    buf  = alloc.allocate<char>(size);
         switch (type)
         {
             case keUndef:
@@ -225,12 +231,10 @@ struct Limit
                 return "Dependent";
 
             case keBinOpArray:
-                sprintf_s(buf, size, FMT_VN " + %d", vn, cns);
-                return buf;
+                return comp->printfAlloc(FMT_VN " + %d", vn, cns);
 
             case keConstant:
-                sprintf_s(buf, size, "%d", cns);
-                return buf;
+                return comp->printfAlloc("%d", cns);
         }
         unreached();
     }
@@ -246,11 +250,15 @@ struct Range
     Limit uLimit;
     Limit lLimit;
 
-    Range(const Limit& limit) : uLimit(limit), lLimit(limit)
+    Range(const Limit& limit)
+        : uLimit(limit)
+        , lLimit(limit)
     {
     }
 
-    Range(const Limit& lLimit, const Limit& uLimit) : uLimit(uLimit), lLimit(lLimit)
+    Range(const Limit& lLimit, const Limit& uLimit)
+        : uLimit(uLimit)
+        , lLimit(lLimit)
     {
     }
 
@@ -265,12 +273,9 @@ struct Range
     }
 
 #ifdef DEBUG
-    char* ToString(CompAllocator alloc)
+    const char* ToString(Compiler* comp)
     {
-        size_t size = 64;
-        char*  buf  = alloc.allocate<char>(size);
-        sprintf_s(buf, size, "<%s, %s>", lLimit.ToString(alloc), uLimit.ToString(alloc));
-        return buf;
+        return comp->printfAlloc("<%s, %s>", lLimit.ToString(comp), uLimit.ToString(comp));
     }
 #endif
 };
@@ -593,7 +598,10 @@ class RangeCheck
         BasicBlock*          block;
         Statement*           stmt;
         GenTreeLclVarCommon* tree;
-        Location(BasicBlock* block, Statement* stmt, GenTreeLclVarCommon* tree) : block(block), stmt(stmt), tree(tree)
+        Location(BasicBlock* block, Statement* stmt, GenTreeLclVarCommon* tree)
+            : block(block)
+            , stmt(stmt)
+            , tree(tree)
         {
         }
 
@@ -683,7 +691,7 @@ class RangeCheck
     // Does the binary operation between the operands overflow? Check recursively.
     bool DoesBinOpOverflow(BasicBlock* block, GenTreeOp* binop);
 
-    // Does the phi operands involve an assignment that could overflow?
+    // Do the phi operands involve a definition that could overflow?
     bool DoesPhiOverflow(BasicBlock* block, GenTree* expr);
 
     // Find the def of the "expr" local and recurse on the arguments if any of them involve a
@@ -702,9 +710,7 @@ class RangeCheck
     // Is the binary operation increasing the value.
     bool IsBinOpMonotonicallyIncreasing(GenTreeOp* binop);
 
-    // Given an "expr" trace its rhs and their definitions to check if all the assignments
-    // are monotonically increasing.
-    //
+    // Given an expression trace its value to check if it is monotonically increasing.
     bool IsMonotonicallyIncreasing(GenTree* tree, bool rejectNegativeConst);
 
     // We allocate a budget to avoid walking long UD chains. When traversing each link in the UD
diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp
index 9f714e76dff5..fb1dc80b7af1 100644
--- a/src/coreclr/jit/rationalize.cpp
+++ b/src/coreclr/jit/rationalize.cpp
@@ -399,7 +399,8 @@ PhaseStatus Rationalizer::DoPhase()
         };
 
         RationalizeVisitor(Rationalizer& rationalizer)
-            : GenTreeVisitor<RationalizeVisitor>(rationalizer.comp), m_rationalizer(rationalizer)
+            : GenTreeVisitor<RationalizeVisitor>(rationalizer.comp)
+            , m_rationalizer(rationalizer)
         {
         }
 
diff --git a/src/coreclr/jit/rationalize.h b/src/coreclr/jit/rationalize.h
index 65264f829458..a8651b2e5b8c 100644
--- a/src/coreclr/jit/rationalize.h
+++ b/src/coreclr/jit/rationalize.h
@@ -55,7 +55,8 @@ class Rationalizer final : public Phase
     Compiler::fgWalkResult RewriteNode(GenTree** useEdge, Compiler::GenTreeStack& parents);
 };
 
-inline Rationalizer::Rationalizer(Compiler* _comp) : Phase(_comp, PHASE_RATIONALIZE)
+inline Rationalizer::Rationalizer(Compiler* _comp)
+    : Phase(_comp, PHASE_RATIONALIZE)
 {
 }
 
diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp
index 3ea6142de4cd..d4a678875848 100644
--- a/src/coreclr/jit/redundantbranchopts.cpp
+++ b/src/coreclr/jit/redundantbranchopts.cpp
@@ -24,7 +24,9 @@ PhaseStatus Compiler::optRedundantBranches()
     public:
         bool madeChanges;
 
-        OptRedundantBranchesDomTreeVisitor(Compiler* compiler) : DomTreeVisitor(compiler), madeChanges(false)
+        OptRedundantBranchesDomTreeVisitor(Compiler* compiler)
+            : DomTreeVisitor(compiler)
+            , madeChanges(false)
         {
         }
 
@@ -927,6 +929,7 @@ bool Compiler::optRedundantBranch(BasicBlock* const block)
     JITDUMP("\nRedundant branch opt in " FMT_BB ":\n", block->bbNum);
 
     fgMorphBlockStmt(block, stmt DEBUGARG(__FUNCTION__));
+    Metrics.RedundantBranchesEliminated++;
     return true;
 }
 
@@ -1025,8 +1028,8 @@ bool Compiler::optJumpThreadCheck(BasicBlock* const block, BasicBlock* const dom
     // Since flow is going to bypass block, make sure there
     // is nothing in block that can cause a side effect.
     //
-    // For non-PHI RBO, we neglect PHI assignments. This can leave SSA
-    // in an incorrect state but so far it has not yet caused problems.
+    // For non-PHI RBO, we neglect PHI stores. This can leave SSA in
+    // an incorrect state but so far it has not yet caused problems.
     //
     // For PHI-based RBO we need to be more cautious and insist that
     // any PHI is locally consumed, so that if we bypass the block we
@@ -1599,7 +1602,7 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti)
     // If this pred is in the set that will reuse block, do nothing.
     // Else revise pred to branch directly to the appropriate successor of block.
     //
-    for (BasicBlock* const predBlock : jti.m_block->PredBlocks())
+    for (BasicBlock* const predBlock : jti.m_block->PredBlocksEditing())
     {
         // If this was an ambiguous pred, skip.
         //
@@ -1683,6 +1686,7 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti)
 
     // We optimized.
     //
+    Metrics.JumpThreadingsPerformed++;
     fgModified = true;
     return true;
 }
@@ -1890,12 +1894,12 @@ bool Compiler::optRedundantRelop(BasicBlock* const block)
             break;
         }
 
-        GenTree* const prevTreeData = prevTree->AsLclVar()->Data();
+        GenTree* const prevTreeValue = prevTree->AsLclVar()->Data();
 
         // If prevTree has side effects, bail, unless it is in the immediately preceding statement.
         // We'll handle exceptional side effects with VNs below.
         //
-        if (((prevTree->gtFlags & (GTF_CALL | GTF_ORDER_SIDEEFF)) != 0) || ((prevTreeData->gtFlags & GTF_ASG) != 0))
+        if (((prevTree->gtFlags & (GTF_CALL | GTF_ORDER_SIDEEFF)) != 0) || ((prevTreeValue->gtFlags & GTF_ASG) != 0))
         {
             if (prevStmt->GetNextStmt() != stmt)
             {
@@ -1909,13 +1913,13 @@ bool Compiler::optRedundantRelop(BasicBlock* const block)
 
         // If we are seeing PHIs we have run out of interesting stmts.
         //
-        if (prevTreeData->OperIs(GT_PHI))
+        if (prevTreeValue->OperIs(GT_PHI))
         {
             JITDUMP(" -- prev tree is a phi\n");
             break;
         }
 
-        // Figure out what local is assigned here.
+        // Figure out what local is defined here.
         //
         const unsigned   prevTreeLclNum = prevTree->AsLclVarCommon()->GetLclNum();
         LclVarDsc* const prevTreeLclDsc = lvaGetDesc(prevTreeLclNum);
@@ -1942,7 +1946,7 @@ bool Compiler::optRedundantRelop(BasicBlock* const block)
         // If the normal liberal VN of RHS is the normal liberal VN of the current tree, or is "related",
         // consider forward sub.
         //
-        const ValueNum                  domCmpVN        = vnStore->VNNormalValue(prevTreeData->GetVN(VNK_Liberal));
+        const ValueNum                  domCmpVN        = vnStore->VNNormalValue(prevTreeValue->GetVN(VNK_Liberal));
         bool                            matched         = false;
         ValueNumStore::VN_RELATION_KIND vnRelationMatch = ValueNumStore::VN_RELATION_KIND::VRK_Same;
 
@@ -1965,11 +1969,11 @@ bool Compiler::optRedundantRelop(BasicBlock* const block)
 
         JITDUMP("  -- prev tree has relop with %s liberal VN\n", ValueNumStore::VNRelationString(vnRelationMatch));
 
-        // If the jump tree VN has exceptions, verify that the RHS tree has a superset.
+        // If the jump tree VN has exceptions, verify that the value tree has a superset.
         //
         if (treeExcVN != vnStore->VNForEmptyExcSet())
         {
-            const ValueNum prevTreeExcVN = vnStore->VNExceptionSet(prevTreeData->GetVN(VNK_Liberal));
+            const ValueNum prevTreeExcVN = vnStore->VNExceptionSet(prevTreeValue->GetVN(VNK_Liberal));
 
             if (!vnStore->VNExcIsSubset(prevTreeExcVN, treeExcVN))
             {
@@ -1978,14 +1982,14 @@ bool Compiler::optRedundantRelop(BasicBlock* const block)
             }
         }
 
-        // See if we can safely move a copy of prevTreeRHS later, to replace tree.
+        // See if we can safely move a copy of prevTreeValue later, to replace tree.
         // We can, if none of its lcls are killed.
         //
         bool interferes = false;
 
         for (unsigned int i = 0; i < definedLocalsCount; i++)
         {
-            if (gtTreeHasLocalRead(prevTreeData, definedLocals[i]))
+            if (gtTreeHasLocalRead(prevTreeValue, definedLocals[i]))
             {
                 JITDUMP(" -- prev tree ref to V%02u interferes\n", definedLocals[i]);
                 interferes = true;
@@ -1998,7 +2002,7 @@ bool Compiler::optRedundantRelop(BasicBlock* const block)
             break;
         }
 
-        if (gtMayHaveStoreInterference(prevTreeData, tree))
+        if (gtMayHaveStoreInterference(prevTreeValue, tree))
         {
             JITDUMP(" -- prev tree has an embedded store that interferes with [%06u]\n", dspTreeID(tree));
             break;
@@ -2006,7 +2010,7 @@ bool Compiler::optRedundantRelop(BasicBlock* const block)
 
         // Heuristic: only forward sub a relop
         //
-        if (!prevTreeData->OperIsCompare())
+        if (!prevTreeValue->OperIsCompare())
         {
             JITDUMP(" -- prev tree is not relop\n");
             continue;
@@ -2022,7 +2026,7 @@ bool Compiler::optRedundantRelop(BasicBlock* const block)
             continue;
         }
 
-        if ((prevTreeData->gtFlags & GTF_GLOB_REF) != 0)
+        if ((prevTreeValue->gtFlags & GTF_GLOB_REF) != 0)
         {
             bool hasExtraUses = false;
 
@@ -2049,7 +2053,7 @@ bool Compiler::optRedundantRelop(BasicBlock* const block)
         }
 
         JITDUMP(" -- prev tree is viable candidate for relop fwd sub!\n");
-        candidateTree       = prevTreeData;
+        candidateTree       = prevTreeValue;
         candidateStmt       = prevStmt;
         candidateVnRelation = vnRelationMatch;
     }
diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp
index fa2ef10764a5..ea33ea50cf41 100644
--- a/src/coreclr/jit/regalloc.cpp
+++ b/src/coreclr/jit/regalloc.cpp
@@ -104,23 +104,11 @@ regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc
 
     if (regState->rsIsFloat)
     {
-        noway_assert(inArgMask & RBM_FLTARG_REGS);
+        assert((inArgMask & RBM_FLTARG_REGS) != RBM_NONE);
     }
-    else //  regState is for the integer registers
+    else
     {
-        // This might be the fixed return buffer register argument (on ARM64)
-        // We check and allow inArgReg to be theFixedRetBuffReg
-        if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
-        {
-            // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
-            noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
-            // We should have recorded the variable number for the return buffer arg
-            noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
-        }
-        else // we have a regular arg
-        {
-            noway_assert(inArgMask & RBM_ARG_REGS);
-        }
+        assert((inArgMask & fullIntArgRegMask(info.compCallConv)) != RBM_NONE);
     }
 
     regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
@@ -262,6 +250,16 @@ bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
     }
 #endif // TARGET_LOONGARCH64
 
+#ifdef TARGET_RISCV64
+    // TODO-RISCV64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog
+    // can handle non-frame pointer frames.
+    if (!result)
+    {
+        INDEBUG(reason = "Temporary RISCV64 force frame pointer");
+        result = true;
+    }
+#endif // TARGET_RISCV64
+
 #ifdef DEBUG
     if ((result == true) && (wbReason != nullptr))
     {
diff --git a/src/coreclr/jit/registerargconvention.h b/src/coreclr/jit/registerargconvention.h
index 858efdc9d22c..840f7adc4fce 100644
--- a/src/coreclr/jit/registerargconvention.h
+++ b/src/coreclr/jit/registerargconvention.h
@@ -29,18 +29,16 @@ struct InitVarDscInfo
     bool hasSplitParam;
 #endif // TARGET_ARM || TARGET_RISCV64
 
-#if FEATURE_FASTTAILCALL
-    // It is used to calculate argument stack size information in byte
+    // Bytes passed on the stack (including things like padding after structs)
     unsigned stackArgSize;
-#endif // FEATURE_FASTTAILCALL
 
 public:
     // set to initial values
     void Init(LclVarDsc* lvaTable, bool _hasRetBufArg, unsigned _maxIntRegArgNum, unsigned _maxFloatRegArgNum)
     {
         hasRetBufArg      = _hasRetBufArg;
-        varDsc            = &lvaTable[0]; // the first argument LclVar 0
-        varNum            = 0;            // the first argument varNum 0
+        varDsc            = lvaTable; // the first argument LclVar 0
+        varNum            = 0;        // the first argument varNum 0
         intRegArgNum      = 0;
         floatRegArgNum    = 0;
         maxIntRegArgNum   = _maxIntRegArgNum;
@@ -55,9 +53,7 @@ struct InitVarDscInfo
         hasSplitParam = false;
 #endif // TARGET_ARM || TARGET_RISCV64
 
-#if FEATURE_FASTTAILCALL
         stackArgSize = 0;
-#endif // FEATURE_FASTTAILCALL
     }
 
     // return ref to current register arg for this type
@@ -111,6 +107,12 @@ struct InitVarDscInfo
 
 #endif // TARGET_ARM
 
+    void nextParam()
+    {
+        varDsc++;
+        varNum++;
+    }
+
 private:
     // return max register arg for this type
     unsigned maxRegArgNum(var_types type)
diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp
index cc6e7bac12cb..151b42fdc8ef 100644
--- a/src/coreclr/jit/regset.cpp
+++ b/src/coreclr/jit/regset.cpp
@@ -117,6 +117,16 @@ void RegSet::rsClearRegsModified()
 #endif // DEBUG
 
     rsModifiedRegsMask = RBM_NONE;
+
+#ifdef SWIFT_SUPPORT
+    // If this method has a SwiftError* parameter, we will return SwiftError::Value in REG_SWIFT_ERROR,
+    // so don't treat it as callee-save.
+    if (m_rsCompiler->lvaSwiftErrorArg != BAD_VAR_NUM)
+    {
+        rsAllCalleeSavedMask &= ~RBM_SWIFT_ERROR;
+        rsIntCalleeSavedMask &= ~RBM_SWIFT_ERROR;
+    }
+#endif // SWIFT_SUPPORT
 }
 
 void RegSet::rsSetRegsModified(regMaskTP mask DEBUGARG(bool suppressDump))
@@ -235,7 +245,9 @@ void RegSet::SetMaskVars(regMaskTP newMaskVars)
 
 /*****************************************************************************/
 
-RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo) : m_rsCompiler(compiler), m_rsGCInfo(gcInfo)
+RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo)
+    : m_rsCompiler(compiler)
+    , m_rsGCInfo(gcInfo)
 {
     /* Initialize the spill logic */
 
@@ -258,6 +270,11 @@ RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo) : m_rsCompiler(compiler), m_r
     rsMaskPreSpillAlign  = RBM_NONE;
 #endif
 
+#ifdef SWIFT_SUPPORT
+    rsAllCalleeSavedMask = RBM_CALLEE_SAVED;
+    rsIntCalleeSavedMask = RBM_INT_CALLEE_SAVED;
+#endif // SWIFT_SUPPORT
+
 #ifdef DEBUG
     rsModifiedRegsMaskInitialized = false;
 #endif // DEBUG
@@ -431,9 +448,9 @@ void RegSet::rsSpillTree(regNumber reg, GenTree* tree, unsigned regIdx /* =0 */)
 
 #if defined(TARGET_X86)
 /*****************************************************************************
-*
-*  Spill the top of the FP x87 stack.
-*/
+ *
+ *  Spill the top of the FP x87 stack.
+ */
 void RegSet::rsSpillFPStack(GenTreeCall* call)
 {
     SpillDsc* spill;
@@ -901,7 +918,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 regNumber genRegArgNext(regNumber argReg)
 {
-    assert(isValidIntArgReg(argReg) || isValidFloatArgReg(argReg));
+    assert(isValidIntArgReg(argReg, CorInfoCallConvExtension::Managed) || isValidFloatArgReg(argReg));
 
     switch (argReg)
     {
diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h
index 73eb08aa943e..dae93baebad3 100644
--- a/src/coreclr/jit/regset.h
+++ b/src/coreclr/jit/regset.h
@@ -58,7 +58,7 @@ class RegSet
         TempDsc*  spillTemp; // the temp holding the spilled value
 
         static SpillDsc* alloc(Compiler* pComp, RegSet* regSet, var_types type);
-        static void freeDsc(RegSet* regSet, SpillDsc* spillDsc);
+        static void      freeDsc(RegSet* regSet, SpillDsc* spillDsc);
     };
 
     //-------------------------------------------------------------------------
@@ -74,6 +74,14 @@ class RegSet
     bool rsModifiedRegsMaskInitialized; // Has rsModifiedRegsMask been initialized? Guards against illegal use.
 #endif                                  // DEBUG
 
+#ifdef SWIFT_SUPPORT
+    regMaskTP rsAllCalleeSavedMask;
+    regMaskTP rsIntCalleeSavedMask;
+#else  // !SWIFT_SUPPORT
+    static constexpr regMaskTP rsAllCalleeSavedMask = RBM_CALLEE_SAVED;
+    static constexpr regMaskTP rsIntCalleeSavedMask = RBM_INT_CALLEE_SAVED;
+#endif // !SWIFT_SUPPORT
+
 public:
     regMaskTP rsGetModifiedRegsMask() const
     {
@@ -81,6 +89,32 @@ class RegSet
         return rsModifiedRegsMask;
     }
 
+    regMaskTP rsGetModifiedCalleeSavedRegsMask() const
+    {
+        assert(rsModifiedRegsMaskInitialized);
+        return (rsModifiedRegsMask & rsAllCalleeSavedMask);
+    }
+
+    regMaskTP rsGetModifiedIntCalleeSavedRegsMask() const
+    {
+        assert(rsModifiedRegsMaskInitialized);
+        return (rsModifiedRegsMask & rsIntCalleeSavedMask);
+    }
+
+#ifdef TARGET_AMD64
+    regMaskTP rsGetModifiedOsrIntCalleeSavedRegsMask() const
+    {
+        assert(rsModifiedRegsMaskInitialized);
+        return (rsModifiedRegsMask & (rsIntCalleeSavedMask | RBM_EBP));
+    }
+#endif // TARGET_AMD64
+
+    regMaskTP rsGetModifiedFltCalleeSavedRegsMask() const
+    {
+        assert(rsModifiedRegsMaskInitialized);
+        return (rsModifiedRegsMask & RBM_FLT_CALLEE_SAVED);
+    }
+
     void rsClearRegsModified();
 
     void rsSetRegsModified(regMaskTP mask DEBUGARG(bool suppressDump = false));
@@ -179,14 +213,14 @@ class RegSet
     };
 
     static var_types tmpNormalizeType(var_types type);
-    TempDsc* tmpGetTemp(var_types type); // get temp for the given type
-    void tmpRlsTemp(TempDsc* temp);
-    TempDsc* tmpFindNum(int temp, TEMP_USAGE_TYPE usageType = TEMP_USAGE_FREE) const;
+    TempDsc*         tmpGetTemp(var_types type); // get temp for the given type
+    void             tmpRlsTemp(TempDsc* temp);
+    TempDsc*         tmpFindNum(int temp, TEMP_USAGE_TYPE usageType = TEMP_USAGE_FREE) const;
 
     void     tmpEnd();
     TempDsc* tmpListBeg(TEMP_USAGE_TYPE usageType = TEMP_USAGE_FREE) const;
     TempDsc* tmpListNxt(TempDsc* curTemp, TEMP_USAGE_TYPE usageType = TEMP_USAGE_FREE) const;
-    void tmpDone();
+    void     tmpDone();
 
 #ifdef DEBUG
     bool tmpAllFree() const;
diff --git a/src/coreclr/jit/scev.cpp b/src/coreclr/jit/scev.cpp
new file mode 100644
index 000000000000..491ee4ab06f0
--- /dev/null
+++ b/src/coreclr/jit/scev.cpp
@@ -0,0 +1,1008 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// This file contains code to analyze how the value of induction variables
+// evolve (scalar evolution analysis), and to turn them into the SCEV IR
+// defined in scev.h. The analysis is inspired by "Michael Wolfe. 1992. Beyond
+// induction variables." and also by LLVM's scalar evolution analysis.
+//
+// The main idea of scalar evolution nalysis is to give a closed form
+// describing the value of tree nodes inside loops even when taking into
+// account that they are changing on each loop iteration. This is useful for
+// optimizations that want to reason about values of IR nodes inside loops,
+// such as IV widening or strength reduction.
+//
+// To represent the possibility of evolution the SCEV IR includes the concept
+// of an add recurrence <loop, start, step>, which describes a value that
+// starts at "start" and changes by adding "step" at each iteration. The IR
+// nodes that change in this way (or depend on something that changes in this
+// way) are generally called induction variables.
+//
+// An add recurrence arises only when a local exists in the loop that is
+// mutated in each iteration. Such a local will naturally end up with a phi
+// node in the loop header. These locals are called primary (or basic)
+// induction variables. The non-primary IVs (which always must depend on the
+// primary IVs) are sometimes called secondary IVs.
+//
+// The job of the analysis is to go from a tree node to a SCEV node that
+// describes its value (possibly taking its evolution into account). Note that
+// SCEV nodes are immutable and the values they represent are _not_
+// flow-dependent; that is, they don't exist at a specific location inside the
+// loop, even though some particular tree node gave rise to that SCEV node. The
+// analysis itself _is_ flow-dependent and guarantees that the Scev* returned
+// describes the value that corresponds to what the tree node computes at its
+// specific location. However, it would be perfectly legal for two trees at
+// different locations in the loop to analyze to the same SCEV node (even
+// potentially returning the same pointer). For example, in theory "i" and "j"
+// in the following loop would both be represented by the same add recurrence
+// <L, 0, 1>, and the analysis could even return the same Scev* for both of
+// them, even if it does not today:
+//
+//   int i = 0;
+//   while (true)
+//   {
+//     i++;
+//     ...
+//     int j = i - 1;
+//   }
+//
+// Actually materializing the value of a SCEV node back into tree IR is not
+// implemented yet, but generally would depend on the availability of tree
+// nodes that compute the dependent values at the point where the IR is to be
+// materialized.
+//
+// Besides the add recurrences the analysis itself is generally a
+// straightforward translation from JIT IR into the SCEV IR. Creating the add
+// recurrences requires paying attention to the structure of PHIs, and
+// disambiguating the values coming from outside the loop and the values coming
+// from the backedges.
+//
+
+#include "jitpch.h"
+
+//------------------------------------------------------------------------
+// GetConstantValue: If this SSA use refers to a constant, then fetch that
+// constant.
+//
+// Parameters:
+//   comp - Compiler instance
+//   cns  - [out] Constant value; only valid if this function returns true.
+//
+// Returns:
+//   True if this SSA use refers to a constant; otherwise false,
+//
+bool ScevLocal::GetConstantValue(Compiler* comp, int64_t* cns)
+{
+    LclVarDsc*           dsc     = comp->lvaGetDesc(LclNum);
+    LclSsaVarDsc*        ssaDsc  = dsc->GetPerSsaData(SsaNum);
+    GenTreeLclVarCommon* defNode = ssaDsc->GetDefNode();
+    if ((defNode != nullptr) && defNode->Data()->OperIs(GT_CNS_INT, GT_CNS_LNG))
+    {
+        *cns = defNode->Data()->AsIntConCommon()->IntegralValue();
+        return true;
+    }
+
+    return false;
+}
+
+//------------------------------------------------------------------------
+// Scev::GetConstantValue: If this SCEV is always a constant (i.e. either an
+// inline constant or an SSA use referring to a constant) then obtain that
+// constant.
+//
+// Parameters:
+//   comp - Compiler instance
+//   cns  - [out] Constant value; only valid if this function returns true.
+//
+// Returns:
+//   True if a constant could be extracted.
+//
+bool Scev::GetConstantValue(Compiler* comp, int64_t* cns)
+{
+    if (OperIs(ScevOper::Constant))
+    {
+        *cns = ((ScevConstant*)this)->Value;
+        return true;
+    }
+
+    if (OperIs(ScevOper::Local))
+    {
+        return ((ScevLocal*)this)->GetConstantValue(comp, cns);
+    }
+
+    return false;
+}
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// Dump: Print this scev node to stdout.
+//
+// Parameters:
+//   comp - Compiler instance
+//
+void Scev::Dump(Compiler* comp)
+{
+    switch (Oper)
+    {
+        case ScevOper::Constant:
+        {
+            ScevConstant* cns = (ScevConstant*)this;
+            printf("%zd", (ssize_t)cns->Value);
+            break;
+        }
+        case ScevOper::Local:
+        {
+            ScevLocal* invariantLocal = (ScevLocal*)this;
+            printf("V%02u.%u", invariantLocal->LclNum, invariantLocal->SsaNum);
+
+            int64_t cns;
+            if (invariantLocal->GetConstantValue(comp, &cns))
+            {
+                printf(" (%lld)", (long long)cns);
+            }
+            break;
+        }
+        case ScevOper::ZeroExtend:
+        case ScevOper::SignExtend:
+        {
+            ScevUnop* unop = (ScevUnop*)this;
+            printf("%cext<%d>(", unop->Oper == ScevOper::ZeroExtend ? 'z' : 's', genTypeSize(unop->Type) * 8);
+            unop->Op1->Dump(comp);
+            printf(")");
+            break;
+        }
+        case ScevOper::Add:
+        case ScevOper::Mul:
+        case ScevOper::Lsh:
+        {
+            ScevBinop* binop = (ScevBinop*)this;
+            printf("(");
+            binop->Op1->Dump(comp);
+            const char* op;
+            switch (binop->Oper)
+            {
+                case ScevOper::Add:
+                    op = "+";
+                    break;
+                case ScevOper::Mul:
+                    op = "*";
+                    break;
+                case ScevOper::Lsh:
+                    op = "<<";
+                    break;
+                default:
+                    unreached();
+            }
+            printf(" %s ", op);
+            binop->Op2->Dump(comp);
+            printf(")");
+            break;
+        }
+        case ScevOper::AddRec:
+        {
+            ScevAddRec* addRec = (ScevAddRec*)this;
+            printf("<" FMT_LP, addRec->Loop->GetIndex());
+            printf(", ");
+            addRec->Start->Dump(comp);
+            printf(", ");
+            addRec->Step->Dump(comp);
+            printf(">");
+            break;
+        }
+        default:
+            unreached();
+    }
+}
+#endif
+
+//------------------------------------------------------------------------
+// ScalarEvolutionContext: Construct an instance of a context to do scalar evolution in.
+//
+// Parameters:
+//   comp - Compiler instance
+//
+// Remarks:
+//   After construction the context should be reset for a new loop by calling
+//   ResetForLoop.
+//
+ScalarEvolutionContext::ScalarEvolutionContext(Compiler* comp)
+    : m_comp(comp)
+    , m_cache(comp->getAllocator(CMK_LoopIVOpts))
+    , m_ephemeralCache(comp->getAllocator(CMK_LoopIVOpts))
+{
+}
+
+//------------------------------------------------------------------------
+// ResetForLoop: Reset the internal cache in preparation of scalar
+// evolution analysis inside a new loop.
+//
+// Parameters:
+//   loop - The loop.
+//
+void ScalarEvolutionContext::ResetForLoop(FlowGraphNaturalLoop* loop)
+{
+    m_loop = loop;
+    m_cache.RemoveAll();
+}
+
+//------------------------------------------------------------------------
+// NewConstant: Create a SCEV node that represents a constant.
+//
+// Returns:
+//   The new node.
+//
+ScevConstant* ScalarEvolutionContext::NewConstant(var_types type, int64_t value)
+{
+    ScevConstant* constant = new (m_comp, CMK_LoopIVOpts) ScevConstant(type, value);
+    return constant;
+}
+
+//------------------------------------------------------------------------
+// NewLocal: Create a SCEV node that represents an invariant local (i.e. a
+// use of an SSA def from outside the loop).
+//
+// Parameters:
+//   lclNum - The local
+//   ssaNum - The SSA number of the def outside the loop that is being used.
+//
+// Returns:
+//   The new node.
+//
+ScevLocal* ScalarEvolutionContext::NewLocal(unsigned lclNum, unsigned ssaNum)
+{
+    var_types  type           = genActualType(m_comp->lvaGetDesc(lclNum));
+    ScevLocal* invariantLocal = new (m_comp, CMK_LoopIVOpts) ScevLocal(type, lclNum, ssaNum);
+    return invariantLocal;
+}
+
+//------------------------------------------------------------------------
+// NewExtension: Create a SCEV node that represents a zero or sign extension.
+//
+// Parameters:
+//   oper       - The operation (ScevOper::ZeroExtend or ScevOper::SignExtend)
+//   targetType - The target type of the extension
+//   op         - The operand being extended.
+//
+// Returns:
+//   The new node.
+//
+ScevUnop* ScalarEvolutionContext::NewExtension(ScevOper oper, var_types targetType, Scev* op)
+{
+    assert(op != nullptr);
+    ScevUnop* ext = new (m_comp, CMK_LoopIVOpts) ScevUnop(oper, targetType, op);
+    return ext;
+}
+
+//------------------------------------------------------------------------
+// NewBinop: Create a SCEV node that represents a binary operation.
+//
+// Parameters:
+//   oper - The operation
+//   op1  - First operand
+//   op2  - Second operand
+//
+// Returns:
+//   The new node.
+//
+ScevBinop* ScalarEvolutionContext::NewBinop(ScevOper oper, Scev* op1, Scev* op2)
+{
+    assert((op1 != nullptr) && (op2 != nullptr));
+    ScevBinop* binop = new (m_comp, CMK_LoopIVOpts) ScevBinop(oper, op1->Type, op1, op2);
+    return binop;
+}
+
+//------------------------------------------------------------------------
+// NewAddRec: Create a SCEV node that represents a new add recurrence.
+//
+// Parameters:
+//   loop  - The loop where this add recurrence is evolving
+//   start - Value of the recurrence at the first iteration
+//   step  - Step value of the recurrence
+//
+// Returns:
+//   The new node.
+//
+ScevAddRec* ScalarEvolutionContext::NewAddRec(Scev* start, Scev* step)
+{
+    assert((start != nullptr) && (step != nullptr));
+    ScevAddRec* addRec = new (m_comp, CMK_LoopIVOpts) ScevAddRec(start->Type, start, step DEBUGARG(m_loop));
+    return addRec;
+}
+
+//------------------------------------------------------------------------
+// CreateSimpleInvariantScev: Create a "simple invariant" SCEV node for a tree:
+// either an invariant local use or a constant.
+//
+// Parameters:
+//   tree - The tree
+//
+// Returns:
+//   SCEV node or nullptr if the tree is not a simple invariant.
+//
+Scev* ScalarEvolutionContext::CreateSimpleInvariantScev(GenTree* tree)
+{
+    if (tree->OperIs(GT_CNS_INT, GT_CNS_LNG))
+    {
+        return CreateScevForConstant(tree->AsIntConCommon());
+    }
+
+    if (tree->OperIs(GT_LCL_VAR) && tree->AsLclVarCommon()->HasSsaName())
+    {
+        LclVarDsc*    dsc    = m_comp->lvaGetDesc(tree->AsLclVarCommon());
+        LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum());
+
+        if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock()))
+        {
+            return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum());
+        }
+    }
+
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// CreateScevForConstant: Given an integer constant, create a SCEV node for it.
+//
+// Parameters:
+//   tree - The integer constant
+//
+// Returns:
+//   SCEV node or nullptr if the integer constant is not representable (e.g. a handle).
+//
+Scev* ScalarEvolutionContext::CreateScevForConstant(GenTreeIntConCommon* tree)
+{
+    if (tree->IsIconHandle() || !tree->TypeIs(TYP_INT, TYP_LONG))
+    {
+        return nullptr;
+    }
+
+    return NewConstant(tree->TypeGet(), tree->AsIntConCommon()->IntegralValue());
+}
+
+//------------------------------------------------------------------------
+// AnalyzeNew: Analyze the specified tree in the specified block, without going
+// through the cache.
+//
+// Parameters:
+//   block - Block containing the tree
+//   tree  - Tree node
+//   depth - Current analysis depth
+//
+// Returns:
+//   SCEV node if the tree was analyzable; otherwise nullptr if the value is
+//   cannot be described.
+//
+Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int depth)
+{
+    switch (tree->OperGet())
+    {
+        case GT_CNS_INT:
+        case GT_CNS_LNG:
+        {
+            return CreateScevForConstant(tree->AsIntConCommon());
+        }
+        case GT_LCL_VAR:
+        case GT_PHI_ARG:
+        {
+            if (!tree->AsLclVarCommon()->HasSsaName())
+            {
+                return nullptr;
+            }
+
+            assert(m_comp->lvaInSsa(tree->AsLclVarCommon()->GetLclNum()));
+            LclVarDsc*    dsc    = m_comp->lvaGetDesc(tree->AsLclVarCommon());
+            LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum());
+
+            if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock()))
+            {
+                return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum());
+            }
+
+            if (ssaDsc->GetDefNode() == nullptr)
+            {
+                // GT_CALL retbuf def?
+                return nullptr;
+            }
+
+            if (ssaDsc->GetDefNode()->GetLclNum() != tree->AsLclVarCommon()->GetLclNum())
+            {
+                // Should be a def of the parent
+                assert(dsc->lvIsStructField && (ssaDsc->GetDefNode()->GetLclNum() == dsc->lvParentLcl));
+                return nullptr;
+            }
+
+            return Analyze(ssaDsc->GetBlock(), ssaDsc->GetDefNode(), depth + 1);
+        }
+        case GT_STORE_LCL_VAR:
+        {
+            GenTreeLclVarCommon* store = tree->AsLclVarCommon();
+            GenTree*             data  = store->Data();
+            if (!data->OperIs(GT_PHI))
+            {
+                return Analyze(block, data, depth + 1);
+            }
+
+            if (block != m_loop->GetHeader())
+            {
+                return nullptr;
+            }
+
+            // We have a phi def for the current loop. Look for a primary
+            // induction variable.
+            GenTreePhi*    phi         = data->AsPhi();
+            GenTreePhiArg* enterSsa    = nullptr;
+            GenTreePhiArg* backedgeSsa = nullptr;
+
+            for (GenTreePhi::Use& use : phi->Uses())
+            {
+                GenTreePhiArg*  phiArg = use.GetNode()->AsPhiArg();
+                GenTreePhiArg*& ssaArg = m_loop->ContainsBlock(phiArg->gtPredBB) ? backedgeSsa : enterSsa;
+                if ((ssaArg == nullptr) || (ssaArg->GetSsaNum() == phiArg->GetSsaNum()))
+                {
+                    ssaArg = phiArg;
+                }
+                else
+                {
+                    return nullptr;
+                }
+            }
+
+            if ((enterSsa == nullptr) || (backedgeSsa == nullptr))
+            {
+                return nullptr;
+            }
+
+            ScevLocal* enterScev = NewLocal(enterSsa->GetLclNum(), enterSsa->GetSsaNum());
+
+            LclVarDsc*    dsc    = m_comp->lvaGetDesc(store);
+            LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(backedgeSsa->GetSsaNum());
+
+            if (ssaDsc->GetDefNode() == nullptr)
+            {
+                // GT_CALL retbuf def
+                return nullptr;
+            }
+
+            if (ssaDsc->GetDefNode()->GetLclNum() != store->GetLclNum())
+            {
+                assert(dsc->lvIsStructField && ssaDsc->GetDefNode()->GetLclNum() == dsc->lvParentLcl);
+                return nullptr;
+            }
+
+            assert(ssaDsc->GetBlock() != nullptr);
+
+            // Try simple but most common case first, where we have a direct
+            // add recurrence like i = i + 1.
+            Scev* simpleAddRec = CreateSimpleAddRec(store, enterScev, ssaDsc->GetBlock(), ssaDsc->GetDefNode()->Data());
+            if (simpleAddRec != nullptr)
+            {
+                return simpleAddRec;
+            }
+
+            // Otherwise try a more powerful approach; we create a symbolic
+            // node representing the recurrence and then invoke the analysis
+            // recursively. This handles for example cases like
+            //
+            //   int i = start;
+            //   while (i < n)
+            //   {
+            //     int j = i + 1;
+            //     ...
+            //     i = j;
+            //   }
+            // => <L, start, 1>
+            //
+            // where we need to follow SSA defs. In this case the analysis will result in
+            // <symbolic node> + 1. The symbolic node represents a recurrence,
+            // so this corresponds to the infinite sequence [start, start + 1,
+            // start + 1 + 1, ...] which can be represented by <L, start, 1>.
+            //
+            // This approach also generalizes to handle chains of recurrences.
+            // For example:
+            //
+            //   int i = 0;
+            //   int j = 0;
+            //   while (i < n)
+            //   {
+            //     j++;
+            //     i += j;
+            //   }
+            // => <L, 0, <L, 1, 1>>
+            //
+            // Here `i` will analyze to <symbolic node> + <L, [initial value of j], 1>.
+            // Like before this corresponds to an infinite sequence
+            // [start, start + <L, [initial value of j], 1>, start + 2 * <L, [initial value of j], 1>, ...]
+            // which again can be represented as <L, start, <L, [initial value of j], 1>>.
+            //
+            // More generally, as long as we have only additions and only a
+            // single operand is the recurrence, we can represent it as an add
+            // recurrence. See MakeAddRecFromRecursiveScev for the details.
+            //
+            ScevConstant* symbolicAddRec = NewConstant(data->TypeGet(), 0xdeadbeef);
+            m_ephemeralCache.Emplace(store, symbolicAddRec);
+
+            Scev* result;
+            if (m_usingEphemeralCache)
+            {
+                result = Analyze(ssaDsc->GetBlock(), ssaDsc->GetDefNode()->Data(), depth + 1);
+            }
+            else
+            {
+                m_usingEphemeralCache = true;
+                result                = Analyze(ssaDsc->GetBlock(), ssaDsc->GetDefNode()->Data(), depth + 1);
+                m_usingEphemeralCache = false;
+                m_ephemeralCache.RemoveAll();
+            }
+
+            if (result == nullptr)
+            {
+                return nullptr;
+            }
+
+            return MakeAddRecFromRecursiveScev(enterScev, result, symbolicAddRec);
+        }
+        case GT_CAST:
+        {
+            GenTreeCast* cast = tree->AsCast();
+            if (cast->gtCastType != TYP_LONG)
+            {
+                return nullptr;
+            }
+
+            Scev* op = Analyze(block, cast->CastOp(), depth + 1);
+            if (op == nullptr)
+            {
+                return nullptr;
+            }
+
+            return NewExtension(cast->IsUnsigned() ? ScevOper::ZeroExtend : ScevOper::SignExtend, TYP_LONG, op);
+        }
+        case GT_ADD:
+        case GT_SUB:
+        case GT_MUL:
+        case GT_LSH:
+        {
+            Scev* op1 = Analyze(block, tree->gtGetOp1(), depth + 1);
+            if (op1 == nullptr)
+                return nullptr;
+
+            Scev* op2 = Analyze(block, tree->gtGetOp2(), depth + 1);
+            if (op2 == nullptr)
+                return nullptr;
+
+            ScevOper oper;
+            switch (tree->OperGet())
+            {
+                case GT_ADD:
+                    oper = ScevOper::Add;
+                    break;
+                case GT_SUB:
+                    oper = ScevOper::Add;
+                    op2  = NewBinop(ScevOper::Mul, op2, NewConstant(op2->Type, -1));
+                    break;
+                case GT_MUL:
+                    oper = ScevOper::Mul;
+                    break;
+                case GT_LSH:
+                    oper = ScevOper::Lsh;
+                    break;
+                default:
+                    unreached();
+            }
+
+            return NewBinop(oper, op1, op2);
+        }
+        case GT_COMMA:
+        {
+            return Analyze(block, tree->gtGetOp2(), depth + 1);
+        }
+        case GT_ARR_ADDR:
+        {
+            return Analyze(block, tree->AsArrAddr()->Addr(), depth + 1);
+        }
+        default:
+            return nullptr;
+    }
+}
+
+//------------------------------------------------------------------------
+// CreateSimpleAddRec: Create a "simple" add-recurrence. This handles the most
+// common patterns for primary induction variables where we see a store like
+// "i = i + 1".
+//
+// Parameters:
+//   headerStore  - Phi definition of the candidate primary induction variable
+//   enterScev    - SCEV describing start value of the primary induction variable
+//   stepDefBlock - Block containing the def of the step value
+//   stepDefData  - Value of the def of the step value
+//
+// Returns:
+//   SCEV node if this is a simple addrec shape. Otherwise nullptr.
+//
+Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStore,
+                                                 ScevLocal*           enterScev,
+                                                 BasicBlock*          stepDefBlock,
+                                                 GenTree*             stepDefData)
+{
+    if (!stepDefData->OperIs(GT_ADD))
+    {
+        return nullptr;
+    }
+
+    GenTree* stepTree;
+    GenTree* op1 = stepDefData->gtGetOp1();
+    GenTree* op2 = stepDefData->gtGetOp2();
+    if (op1->OperIs(GT_LCL_VAR) && (op1->AsLclVar()->GetLclNum() == headerStore->GetLclNum()) &&
+        (op1->AsLclVar()->GetSsaNum() == headerStore->GetSsaNum()))
+    {
+        stepTree = op2;
+    }
+    else if (op2->OperIs(GT_LCL_VAR) && (op2->AsLclVar()->GetLclNum() == headerStore->GetLclNum()) &&
+             (op2->AsLclVar()->GetSsaNum() == headerStore->GetSsaNum()))
+    {
+        stepTree = op1;
+    }
+    else
+    {
+        // Not a simple IV shape (i.e. more complex than "i = i + k")
+        return nullptr;
+    }
+
+    Scev* stepScev = CreateSimpleInvariantScev(stepTree);
+    if (stepScev == nullptr)
+    {
+        return nullptr;
+    }
+
+    return NewAddRec(enterScev, stepScev);
+}
+
+//------------------------------------------------------------------------
+// ExtractAddOperands: Extract all operands of potentially nested add
+// operations.
+//
+// Parameters:
+//   binop    - The binop representing an add
+//   operands - Array stack to add the operands to
+//
+void ScalarEvolutionContext::ExtractAddOperands(ScevBinop* binop, ArrayStack<Scev*>& operands)
+{
+    assert(binop->OperIs(ScevOper::Add));
+
+    if (binop->Op1->OperIs(ScevOper::Add))
+    {
+        ExtractAddOperands(static_cast<ScevBinop*>(binop->Op1), operands);
+    }
+    else
+    {
+        operands.Push(binop->Op1);
+    }
+
+    if (binop->Op2->OperIs(ScevOper::Add))
+    {
+        ExtractAddOperands(static_cast<ScevBinop*>(binop->Op2), operands);
+    }
+    else
+    {
+        operands.Push(binop->Op2);
+    }
+}
+
+//------------------------------------------------------------------------
+// MakeAddRecFromRecursiveScev: Given a recursive SCEV and a symbolic SCEV
+// whose appearances represent an occurrence of the full SCEV, create a
+// non-recursive add-rec from it.
+//
+// Parameters:
+//   startScev     - The start value of the addrec
+//   scev          - The scev
+//   recursiveScev - A symbolic node whose appearance represents the value of "scev"
+//
+// Returns:
+//   A non-recursive addrec, or nullptr if the recursive SCEV is not
+//   representable as an add recurrence.
+//
+Scev* ScalarEvolutionContext::MakeAddRecFromRecursiveScev(Scev* startScev, Scev* scev, Scev* recursiveScev)
+{
+    if (!scev->OperIs(ScevOper::Add))
+    {
+        return nullptr;
+    }
+
+    ArrayStack<Scev*> addOperands(m_comp->getAllocator(CMK_LoopIVOpts));
+    ExtractAddOperands(static_cast<ScevBinop*>(scev), addOperands);
+
+    assert(addOperands.Height() >= 2);
+
+    int numAppearances = 0;
+    for (int i = 0; i < addOperands.Height(); i++)
+    {
+        Scev* addOperand = addOperands.Bottom(i);
+        if (addOperand == recursiveScev)
+        {
+            numAppearances++;
+        }
+        else
+        {
+            ScevVisit result = addOperand->Visit([=](Scev* node) {
+                if (node == recursiveScev)
+                {
+                    return ScevVisit::Abort;
+                }
+
+                return ScevVisit::Continue;
+            });
+
+            if (result == ScevVisit::Abort)
+            {
+                // We do not handle nested occurrences. Some of these may be representable, some won't.
+                return nullptr;
+            }
+        }
+    }
+
+    if (numAppearances == 0)
+    {
+        // TODO-CQ: We currently cannot handle cases like
+        // i = arr.Length;
+        // j = i - 1;
+        // i = j;
+        // while (true) { ...; j = i - 1; i = j; }
+        //
+        // These cases can arise from loop structures like "for (int i =
+        // arr.Length; --i >= 0;)" when Roslyn emits a "sub; dup; stloc"
+        // sequence, and local prop + loop inversion converts the duplicated
+        // local into a fully fledged IV.
+        // In this case we see that i = <L, [i from outside loop], -1>, but for
+        // j we will see <L, [i from outside loop], -1> + (-1) in this function
+        // as the value coming around the backedge, and we cannot reconcile
+        // this.
+        //
+        return nullptr;
+    }
+
+    if (numAppearances > 1)
+    {
+        // Multiple occurrences -- cannot be represented as an addrec
+        // (corresponds to a geometric progression).
+        return nullptr;
+    }
+
+    Scev* step = nullptr;
+    for (int i = 0; i < addOperands.Height(); i++)
+    {
+        Scev* addOperand = addOperands.Bottom(i);
+        if (addOperand == recursiveScev)
+        {
+            continue;
+        }
+
+        if (step == nullptr)
+        {
+            step = addOperand;
+        }
+        else
+        {
+            step = NewBinop(ScevOper::Add, step, addOperand);
+        }
+    }
+
+    return NewAddRec(startScev, step);
+}
+
+//------------------------------------------------------------------------
+// Analyze: Analyze the specified tree in the specified block.
+//
+// Parameters:
+//   block - Block containing the tree
+//   tree  - Tree node
+//
+// Returns:
+//   SCEV node if the tree was analyzable; otherwise nullptr if the value is
+//   cannot be described.
+//
+Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree)
+{
+    return Analyze(block, tree, 0);
+}
+
+// Since the analysis follows SSA defs we have no upper bound on the potential
+// depth of the analysis performed. We put an artificial limit on this for two
+// reasons:
+// 1. The analysis is recursive, and we should not stack overflow regardless of
+// the input program.
+// 2. If we produced arbitrarily deep SCEV trees then all algorithms over their
+// structure would similarly be at risk of stack overflows if they were
+// recursive. However, these algorithms are generally much more elegant when
+// they make use of recursion.
+const int SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH = 64;
+
+//------------------------------------------------------------------------
+// Analyze: Analyze the specified tree in the specified block.
+//
+// Parameters:
+//   block - Block containing the tree
+//   tree  - Tree node
+//   depth - Current analysis depth
+//
+// Returns:
+//   SCEV node if the tree was analyzable; otherwise nullptr if the value is
+//   cannot be described.
+//
+Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree, int depth)
+{
+    Scev* result;
+    if (!m_cache.Lookup(tree, &result) && (!m_usingEphemeralCache || !m_ephemeralCache.Lookup(tree, &result)))
+    {
+        if (depth >= SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH)
+        {
+            return nullptr;
+        }
+
+        result = AnalyzeNew(block, tree, depth);
+
+        if (m_usingEphemeralCache)
+        {
+            m_ephemeralCache.Set(tree, result, ScalarEvolutionMap::Overwrite);
+        }
+        else
+        {
+            m_cache.Set(tree, result);
+        }
+    }
+
+    return result;
+}
+
+//------------------------------------------------------------------------
+// FoldBinop: Fold simple binops.
+//
+// Type parameters:
+//   T - Type that the binop is being evaluated in
+//
+// Parameters:
+//   oper - Binary operation
+//   op1  - First operand
+//   op2  - Second operand
+//
+// Returns:
+//   Folded value.
+//
+template <typename T>
+static T FoldBinop(ScevOper oper, T op1, T op2)
+{
+    switch (oper)
+    {
+        case ScevOper::Add:
+            return op1 + op2;
+        case ScevOper::Mul:
+            return op1 * op2;
+        case ScevOper::Lsh:
+            return op1 << op2;
+        default:
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// Simplify: Try to simplify a SCEV node by folding and canonicalization.
+//
+// Parameters:
+//   scev - The node
+//
+// Returns:
+//   Simplified node.
+//
+// Remarks:
+//   Canonicalization is done for binops; constants are moved to the right and
+//   addrecs are moved to the left.
+//
+//   Simple unops/binops on constants are folded. Operands are distributed into
+//   add recs whenever possible.
+//
+Scev* ScalarEvolutionContext::Simplify(Scev* scev)
+{
+    switch (scev->Oper)
+    {
+        case ScevOper::Constant:
+        case ScevOper::Local:
+        {
+            return scev;
+        }
+        case ScevOper::ZeroExtend:
+        case ScevOper::SignExtend:
+        {
+            ScevUnop* unop = (ScevUnop*)scev;
+            assert(genTypeSize(unop->Type) >= genTypeSize(unop->Op1->Type));
+
+            Scev* op1 = Simplify(unop->Op1);
+
+            if (unop->Type == op1->Type)
+            {
+                return op1;
+            }
+
+            assert((unop->Type == TYP_LONG) && (op1->Type == TYP_INT));
+
+            if (op1->OperIs(ScevOper::Constant))
+            {
+                ScevConstant* cns = (ScevConstant*)op1;
+                return NewConstant(unop->Type, unop->OperIs(ScevOper::ZeroExtend) ? (uint64_t)(int32_t)cns->Value
+                                                                                  : (int64_t)(int32_t)cns->Value);
+            }
+
+            if (op1->OperIs(ScevOper::AddRec))
+            {
+                // TODO-Cleanup: This requires some proof that it is ok, but
+                // currently we do not rely on this.
+                return op1;
+            }
+
+            return (op1 == unop->Op1) ? unop : NewExtension(unop->Oper, unop->Type, op1);
+        }
+        case ScevOper::Add:
+        case ScevOper::Mul:
+        case ScevOper::Lsh:
+        {
+            ScevBinop* binop = (ScevBinop*)scev;
+            Scev*      op1   = Simplify(binop->Op1);
+            Scev*      op2   = Simplify(binop->Op2);
+
+            if (binop->OperIs(ScevOper::Add, ScevOper::Mul))
+            {
+                // Normalize addrecs to the left
+                if (op2->OperIs(ScevOper::AddRec) && !op1->OperIs(ScevOper::AddRec))
+                {
+                    std::swap(op1, op2);
+                }
+                // Normalize constants to the right
+                if (op1->OperIs(ScevOper::Constant) && !op2->OperIs(ScevOper::Constant))
+                {
+                    std::swap(op1, op2);
+                }
+            }
+
+            if (op1->OperIs(ScevOper::AddRec))
+            {
+                // <L, start, step> + x => <L, start + x, step>
+                // <L, start, step> * x => <L, start * x, step * x>
+                ScevAddRec* addRec   = (ScevAddRec*)op1;
+                Scev*       newStart = Simplify(NewBinop(binop->Oper, addRec->Start, op2));
+                Scev*       newStep  = scev->OperIs(ScevOper::Mul, ScevOper::Lsh)
+                                           ? Simplify(NewBinop(binop->Oper, addRec->Step, op2))
+                                           : addRec->Step;
+                return NewAddRec(newStart, newStep);
+            }
+
+            if (op1->OperIs(ScevOper::Constant) && op2->OperIs(ScevOper::Constant))
+            {
+                ScevConstant* cns1 = (ScevConstant*)op1;
+                ScevConstant* cns2 = (ScevConstant*)op2;
+                int64_t       newValue;
+                if (binop->TypeIs(TYP_INT))
+                {
+                    newValue = FoldBinop<int32_t>(binop->Oper, static_cast<int32_t>(cns1->Value),
+                                                  static_cast<int32_t>(cns2->Value));
+                }
+                else
+                {
+                    assert(binop->TypeIs(TYP_LONG));
+                    newValue = FoldBinop<int64_t>(binop->Oper, cns1->Value, cns2->Value);
+                }
+
+                return NewConstant(binop->Type, newValue);
+            }
+
+            return (op1 == binop->Op1) && (op2 == binop->Op2) ? binop : NewBinop(binop->Oper, op1, op2);
+        }
+        case ScevOper::AddRec:
+        {
+            ScevAddRec* addRec = (ScevAddRec*)scev;
+            Scev*       start  = Simplify(addRec->Start);
+            Scev*       step   = Simplify(addRec->Step);
+            return (start == addRec->Start) && (step == addRec->Step) ? addRec : NewAddRec(start, step);
+        }
+        default:
+            unreached();
+    }
+}
diff --git a/src/coreclr/jit/scev.h b/src/coreclr/jit/scev.h
new file mode 100644
index 000000000000..1aab39e3d3a5
--- /dev/null
+++ b/src/coreclr/jit/scev.h
@@ -0,0 +1,234 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#pragma once
+
+// This file contains the definition of the scalar evolution IR. This IR allows
+// representing the values of IR nodes inside loops in a closed form, taking
+// into account that they are changing on each loop iteration. The IR is based
+// around the following possible operations. At the core is ScevOper::AddRec,
+// which represents a value that evolves by an add recurrence. In dumps it is
+// described by <loop, start, step> where "loop" is the loop the value is
+// evolving in, "start" is the initial value and "step" is the step by which
+// the value evolves in every iteration.
+//
+// See scev.cpp for further documentation.
+//
+enum class ScevOper
+{
+    Constant,
+    Local,
+    ZeroExtend,
+    SignExtend,
+    Add,
+    Mul,
+    Lsh,
+    AddRec,
+};
+
+static bool ScevOperIs(ScevOper oper, ScevOper otherOper)
+{
+    return oper == otherOper;
+}
+
+template <typename... Args>
+static bool ScevOperIs(ScevOper oper, ScevOper operFirst, Args... operTail)
+{
+    return oper == operFirst || ScevOperIs(oper, operTail...);
+}
+
+enum class ScevVisit
+{
+    Abort,
+    Continue,
+};
+
+struct Scev
+{
+    const ScevOper  Oper;
+    const var_types Type;
+
+    Scev(ScevOper oper, var_types type)
+        : Oper(oper)
+        , Type(type)
+    {
+    }
+
+    template <typename... Args>
+    bool OperIs(Args... opers)
+    {
+        return ScevOperIs(Oper, opers...);
+    }
+
+    bool TypeIs(var_types type)
+    {
+        return Type == type;
+    }
+
+    bool GetConstantValue(Compiler* comp, int64_t* cns);
+
+#ifdef DEBUG
+    void Dump(Compiler* comp);
+#endif
+    template <typename TVisitor>
+    ScevVisit Visit(TVisitor visitor);
+};
+
+struct ScevConstant : Scev
+{
+    ScevConstant(var_types type, int64_t value)
+        : Scev(ScevOper::Constant, type)
+        , Value(value)
+    {
+    }
+
+    int64_t Value;
+};
+
+struct ScevLocal : Scev
+{
+    ScevLocal(var_types type, unsigned lclNum, unsigned ssaNum)
+        : Scev(ScevOper::Local, type)
+        , LclNum(lclNum)
+        , SsaNum(ssaNum)
+    {
+    }
+
+    const unsigned LclNum;
+    const unsigned SsaNum;
+
+    bool GetConstantValue(Compiler* comp, int64_t* cns);
+};
+
+struct ScevUnop : Scev
+{
+    ScevUnop(ScevOper oper, var_types type, Scev* op1)
+        : Scev(oper, type)
+        , Op1(op1)
+    {
+    }
+
+    Scev* const Op1;
+};
+
+struct ScevBinop : ScevUnop
+{
+    ScevBinop(ScevOper oper, var_types type, Scev* op1, Scev* op2)
+        : ScevUnop(oper, type, op1)
+        , Op2(op2)
+    {
+    }
+
+    Scev* const Op2;
+};
+
+// Represents a value that evolves by an add recurrence.
+// The value at iteration N is Start + N * Step.
+// "Start" and "Step" are guaranteed to be invariant in "Loop".
+struct ScevAddRec : Scev
+{
+    ScevAddRec(var_types type, Scev* start, Scev* step DEBUGARG(FlowGraphNaturalLoop* loop))
+        : Scev(ScevOper::AddRec, type)
+        , Start(start)
+        , Step(step) DEBUGARG(Loop(loop))
+    {
+    }
+
+    Scev* const Start;
+    Scev* const Step;
+    INDEBUG(FlowGraphNaturalLoop* const Loop);
+};
+
+//------------------------------------------------------------------------
+// Scev::Visit: Recursively visit all SCEV nodes in the SCEV tree.
+//
+// Parameters:
+//   visitor - Callback with signature Scev* -> ScevVisit.
+//
+// Returns:
+//   ScevVisit::Abort if "visitor" aborted, otherwise ScevVisit::Continue.
+//
+// Remarks:
+//   The visit is done in preorder.
+//
+template <typename TVisitor>
+ScevVisit Scev::Visit(TVisitor visitor)
+{
+    if (visitor(this) == ScevVisit::Abort)
+        return ScevVisit::Abort;
+
+    switch (Oper)
+    {
+        case ScevOper::Constant:
+        case ScevOper::Local:
+            break;
+        case ScevOper::ZeroExtend:
+        case ScevOper::SignExtend:
+            return static_cast<ScevUnop*>(this)->Op1->Visit(visitor);
+        case ScevOper::Add:
+        case ScevOper::Mul:
+        case ScevOper::Lsh:
+        {
+            ScevBinop* binop = static_cast<ScevBinop*>(this);
+            if (binop->Op1->Visit(visitor) == ScevVisit::Abort)
+                return ScevVisit::Abort;
+
+            return binop->Op2->Visit(visitor);
+        }
+        case ScevOper::AddRec:
+        {
+            ScevAddRec* addrec = static_cast<ScevAddRec*>(this);
+            if (addrec->Start->Visit(visitor) == ScevVisit::Abort)
+                return ScevVisit::Abort;
+
+            return addrec->Step->Visit(visitor);
+        }
+        default:
+            unreached();
+    }
+
+    return ScevVisit::Continue;
+}
+
+typedef JitHashTable<GenTree*, JitPtrKeyFuncs<GenTree>, Scev*> ScalarEvolutionMap;
+
+// Scalar evolution is analyzed in the context of a single loop, and are
+// computed on-demand by the use of the "Analyze" method on this class, which
+// also maintains a cache.
+class ScalarEvolutionContext
+{
+    Compiler*             m_comp;
+    FlowGraphNaturalLoop* m_loop = nullptr;
+    ScalarEvolutionMap    m_cache;
+
+    // During analysis of PHIs we insert a symbolic node representing the
+    // "recurrence"; we use this cache to be able to invalidate things that end
+    // up depending on the symbolic node quickly.
+    ScalarEvolutionMap m_ephemeralCache;
+    bool               m_usingEphemeralCache = false;
+
+    Scev* Analyze(BasicBlock* block, GenTree* tree, int depth);
+    Scev* AnalyzeNew(BasicBlock* block, GenTree* tree, int depth);
+    Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore,
+                             ScevLocal*           start,
+                             BasicBlock*          stepDefBlock,
+                             GenTree*             stepDefData);
+    Scev* MakeAddRecFromRecursiveScev(Scev* start, Scev* scev, Scev* recursiveScev);
+    Scev* CreateSimpleInvariantScev(GenTree* tree);
+    Scev* CreateScevForConstant(GenTreeIntConCommon* tree);
+    void  ExtractAddOperands(ScevBinop* add, ArrayStack<Scev*>& operands);
+
+public:
+    ScalarEvolutionContext(Compiler* comp);
+
+    void ResetForLoop(FlowGraphNaturalLoop* loop);
+
+    ScevConstant* NewConstant(var_types type, int64_t value);
+    ScevLocal*    NewLocal(unsigned lclNum, unsigned ssaNum);
+    ScevUnop*     NewExtension(ScevOper oper, var_types targetType, Scev* op);
+    ScevBinop*    NewBinop(ScevOper oper, Scev* op1, Scev* op2);
+    ScevAddRec*   NewAddRec(Scev* start, Scev* step);
+
+    Scev* Analyze(BasicBlock* block, GenTree* tree);
+    Scev* Simplify(Scev* scev);
+};
diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp
index 922f8052f2a0..94a844aabb59 100644
--- a/src/coreclr/jit/scopeinfo.cpp
+++ b/src/coreclr/jit/scopeinfo.cpp
@@ -41,7 +41,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  *    This should only be needed if some basic block are deleted/out of order,
  *    etc.
  *  Also,
- *  o At every assignment to a variable, siCheckVarScope() adds an open scope
+ *  o At every store to a variable, siCheckVarScope() adds an open scope
  *    for the variable being assigned to.
  *  o UpdateLifeVar() calls siUpdate() which closes scopes for variables which
  *    are not live anymore.
@@ -302,6 +302,9 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc(
         case TYP_LONG:
         case TYP_DOUBLE:
 #endif // TARGET_64BIT
+#if defined(FEATURE_MASKED_HW_INTRINSICS)
+        case TYP_MASK:
+#endif // FEATURE_MASKED_HW_INTRINSICS
 #if FEATURE_IMPLICIT_BYREFS
             // In the AMD64 ABI we are supposed to pass a struct by reference when its
             // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies
@@ -434,6 +437,9 @@ void CodeGenInterface::siVarLoc::siFillRegisterVarLoc(
         case TYP_SIMD32:
         case TYP_SIMD64:
 #endif // TARGET_XARCH
+#if defined(FEATURE_MASKED_HW_INTRINSICS)
+        case TYP_MASK:
+#endif // FEATURE_MASKED_HW_INTRINSICS
         {
             this->vlType = VLT_REG_FP;
 
@@ -785,11 +791,9 @@ void CodeGenInterface::VariableLiveKeeper::VariableLiveDescriptor::startLiveRang
     else
     {
         JITDUMP("Debug: New V%02u debug range: %s\n", m_varNum,
-                m_VariableLiveRanges->empty()
-                    ? "first"
-                    : siVarLoc::Equals(&varLocation, &(m_VariableLiveRanges->back().m_VarLocation))
-                          ? "new var or location"
-                          : "not adjacent");
+                m_VariableLiveRanges->empty()                                                   ? "first"
+                : siVarLoc::Equals(&varLocation, &(m_VariableLiveRanges->back().m_VarLocation)) ? "new var or location"
+                                                                                                : "not adjacent");
         // Creates new live range with invalid end
         m_VariableLiveRanges->emplace_back(varLocation, emitLocation(), emitLocation());
         m_VariableLiveRanges->back().m_StartEmitLocation.CaptureLocation(emit);
@@ -1446,12 +1450,10 @@ void CodeGen::siInit()
 
     assert(compiler->opts.compScopeInfo);
 
-#if defined(FEATURE_EH_FUNCLETS)
     if (compiler->info.compVarScopesCount > 0)
     {
         siInFuncletRegion = false;
     }
-#endif // FEATURE_EH_FUNCLETS
 
     siLastEndOffs = 0;
 
@@ -1479,7 +1481,6 @@ void CodeGen::siBeginBlock(BasicBlock* block)
         return;
     }
 
-#if defined(FEATURE_EH_FUNCLETS)
     if (siInFuncletRegion)
     {
         return;
@@ -1495,7 +1496,6 @@ void CodeGen::siBeginBlock(BasicBlock* block)
 
         return;
     }
-#endif // FEATURE_EH_FUNCLETS
 
 #ifdef DEBUG
     if (verbose)
@@ -1554,45 +1554,44 @@ void CodeGen::siOpenScopesForNonTrackedVars(const BasicBlock* block, unsigned in
         // Check if there are any scopes on the current block's start boundary.
         VarScopeDsc* varScope = nullptr;
 
-#if defined(FEATURE_EH_FUNCLETS)
-
-        // If we find a spot where the code offset isn't what we expect, because
-        // there is a gap, it might be because we've moved the funclets out of
-        // line. Catch up with the enter and exit scopes of the current block.
-        // Ignore the enter/exit scope changes of the missing scopes, which for
-        // funclets must be matched.
-        if (lastBlockILEndOffset != beginOffs)
+        if (compiler->UsesFunclets())
         {
-            assert(beginOffs > 0);
-            assert(lastBlockILEndOffset < beginOffs);
+            // If we find a spot where the code offset isn't what we expect, because
+            // there is a gap, it might be because we've moved the funclets out of
+            // line. Catch up with the enter and exit scopes of the current block.
+            // Ignore the enter/exit scope changes of the missing scopes, which for
+            // funclets must be matched.
+            if (lastBlockILEndOffset != beginOffs)
+            {
+                assert(beginOffs > 0);
+                assert(lastBlockILEndOffset < beginOffs);
 
-            JITDUMP("Scope info: found offset hole. lastOffs=%u, currOffs=%u\n", lastBlockILEndOffset, beginOffs);
+                JITDUMP("Scope info: found offset hole. lastOffs=%u, currOffs=%u\n", lastBlockILEndOffset, beginOffs);
 
-            // Skip enter scopes
-            while ((varScope = compiler->compGetNextEnterScope(beginOffs - 1, true)) != nullptr)
-            {
-                /* do nothing */
-                JITDUMP("Scope info: skipping enter scope, LVnum=%u\n", varScope->vsdLVnum);
-            }
+                // Skip enter scopes
+                while ((varScope = compiler->compGetNextEnterScope(beginOffs - 1, true)) != nullptr)
+                {
+                    /* do nothing */
+                    JITDUMP("Scope info: skipping enter scope, LVnum=%u\n", varScope->vsdLVnum);
+                }
 
-            // Skip exit scopes
-            while ((varScope = compiler->compGetNextExitScope(beginOffs - 1, true)) != nullptr)
-            {
-                /* do nothing */
-                JITDUMP("Scope info: skipping exit scope, LVnum=%u\n", varScope->vsdLVnum);
+                // Skip exit scopes
+                while ((varScope = compiler->compGetNextExitScope(beginOffs - 1, true)) != nullptr)
+                {
+                    /* do nothing */
+                    JITDUMP("Scope info: skipping exit scope, LVnum=%u\n", varScope->vsdLVnum);
+                }
             }
         }
-
-#else // !FEATURE_EH_FUNCLETS
-
-        if (lastBlockILEndOffset != beginOffs)
+        else
         {
-            assert(lastBlockILEndOffset < beginOffs);
-            return;
+            if (lastBlockILEndOffset != beginOffs)
+            {
+                assert(lastBlockILEndOffset < beginOffs);
+                return;
+            }
         }
 
-#endif // !FEATURE_EH_FUNCLETS
-
         while ((varScope = compiler->compGetNextEnterScope(beginOffs)) != nullptr)
         {
             LclVarDsc* lclVarDsc = compiler->lvaGetDesc(varScope->vsdVarNum);
@@ -1629,12 +1628,10 @@ void CodeGen::siEndBlock(BasicBlock* block)
 {
     assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
 
-#if defined(FEATURE_EH_FUNCLETS)
     if (siInFuncletRegion)
     {
         return;
     }
-#endif // FEATURE_EH_FUNCLETS
 
     unsigned endOffs = block->bbCodeOffsEnd;
 
@@ -1680,9 +1677,9 @@ NATIVE_OFFSET CodeGen::psiGetVarStackOffset(const LclVarDsc* lclVarDsc) const
 }
 
 /*============================================================================
-*           INTERFACE (public) Functions for PrologScopeInfo
-*============================================================================
-*/
+ *           INTERFACE (public) Functions for PrologScopeInfo
+ *============================================================================
+ */
 
 //------------------------------------------------------------------------
 // psiBegProlog: Initializes the PrologScopeInfo creating open psiScopes or
@@ -1722,8 +1719,6 @@ void CodeGen::psiBegProlog()
                     regNumber otherRegNum = REG_NA;
                     for (unsigned nCnt = 0; nCnt < structDesc.eightByteCount; nCnt++)
                     {
-                        var_types regType = TYP_UNDEF;
-
                         if (nCnt == 0)
                         {
                             regNum = lclVarDsc->GetArgReg();
@@ -1736,12 +1731,6 @@ void CodeGen::psiBegProlog()
                         {
                             assert(false && "Invalid eightbyte number.");
                         }
-
-                        regType = compiler->GetEightByteType(structDesc, nCnt);
-#ifdef DEBUG
-                        regType = compiler->mangleVarArgsType(regType);
-                        assert(genMapRegNumToRegArgNum((nCnt == 0 ? regNum : otherRegNum), regType) != (unsigned)-1);
-#endif // DEBUG
                     }
 
                     varLocation.storeVariableInRegisters(regNum, otherRegNum);
@@ -1790,7 +1779,6 @@ void CodeGen::psiBegProlog()
                     regType = lclVarDsc->GetHfaType();
                 }
 #endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
-                assert(genMapRegNumToRegArgNum(lclVarDsc->GetArgReg(), regType) != (unsigned)-1);
 #endif // DEBUG
                 varLocation.storeVariableInRegisters(lclVarDsc->GetArgReg(), REG_NA);
             }
diff --git a/src/coreclr/jit/sideeffects.cpp b/src/coreclr/jit/sideeffects.cpp
index d2c1de6c749a..4a9b1899b24b 100644
--- a/src/coreclr/jit/sideeffects.cpp
+++ b/src/coreclr/jit/sideeffects.cpp
@@ -8,7 +8,10 @@
 
 #include "sideeffects.h"
 
-LclVarSet::LclVarSet() : m_bitVector(nullptr), m_hasAnyLcl(false), m_hasBitVector(false)
+LclVarSet::LclVarSet()
+    : m_bitVector(nullptr)
+    , m_hasAnyLcl(false)
+    , m_hasBitVector(false)
 {
 }
 
@@ -121,7 +124,10 @@ void LclVarSet::Clear()
 }
 
 AliasSet::AliasSet()
-    : m_lclVarReads(), m_lclVarWrites(), m_readsAddressableLocation(false), m_writesAddressableLocation(false)
+    : m_lclVarReads()
+    , m_lclVarWrites()
+    , m_readsAddressableLocation(false)
+    , m_writesAddressableLocation(false)
 {
 }
 
@@ -136,7 +142,11 @@ AliasSet::AliasSet()
 //    node - The node in question.
 //
 AliasSet::NodeInfo::NodeInfo(Compiler* compiler, GenTree* node)
-    : m_compiler(compiler), m_node(node), m_flags(0), m_lclNum(0), m_lclOffs(0)
+    : m_compiler(compiler)
+    , m_node(node)
+    , m_flags(0)
+    , m_lclNum(0)
+    , m_lclOffs(0)
 {
     if (node->IsCall())
     {
@@ -174,7 +184,7 @@ AliasSet::NodeInfo::NodeInfo(Compiler* compiler, GenTree* node)
 
     // Is the operation a write? If so, set `node` to the location that is being written to.
     bool isWrite = false;
-    if (node->OperIsStore() || node->OperIs(GT_STORE_DYN_BLK, GT_MEMORYBARRIER))
+    if (node->OperIsStore() || node->OperIs(GT_MEMORYBARRIER))
     {
         isWrite = true;
     }
@@ -444,7 +454,9 @@ void AliasSet::Clear()
     m_lclVarWrites.Clear();
 }
 
-SideEffectSet::SideEffectSet() : m_sideEffectFlags(0), m_aliasSet()
+SideEffectSet::SideEffectSet()
+    : m_sideEffectFlags(0)
+    , m_aliasSet()
 {
 }
 
@@ -460,7 +472,9 @@ SideEffectSet::SideEffectSet() : m_sideEffectFlags(0), m_aliasSet()
 //    compiler - The compiler context.
 //    node - The node to use for initialization.
 //
-SideEffectSet::SideEffectSet(Compiler* compiler, GenTree* node) : m_sideEffectFlags(0), m_aliasSet()
+SideEffectSet::SideEffectSet(Compiler* compiler, GenTree* node)
+    : m_sideEffectFlags(0)
+    , m_aliasSet()
 {
     AddNode(compiler, node);
 }
@@ -484,22 +498,17 @@ void SideEffectSet::AddNode(Compiler* compiler, GenTree* node)
 //    Returns true if the side effects in this set interfere with the
 //    given side effect flags and alias information.
 //
-//    Two side effect sets interfere under any of the following
-//    conditions:
+//    Two side effect sets interfere under any of the following conditions:
 //    - If the analysis is strict, and:
-//        - One set contains a compiler barrier and the other set contains a global reference, or
+//        - One set contains a compiler barrier and the other set contains a global reference or compiler barrier, or
 //        - Both sets produce an exception
 //    - Whether or not the analysis is strict:
-//        - One set produces an exception and the other set contains a
-//          write
-//        - One set's reads and writes interfere with the other set's
-//          reads and writes
+//        - One set produces an exception and the other set contains a write
+//        - One set's reads and writes interfere with the other set's reads and writes
 //
 // Arguments:
-//    otherSideEffectFlags - The side effect flags for the other side
-//                           effect set.
-//    otherAliasInfo - The alias information for the other side effect
-//                     set.
+//    otherSideEffectFlags - The side effect flags for the other side effect set.
+//    otherAliasInfo - The alias information for the other side effect set.
 //    strict - True if the analysis should be strict as described above.
 //
 template <typename TOtherAliasInfo>
@@ -514,12 +523,14 @@ bool SideEffectSet::InterferesWith(unsigned               otherSideEffectFlags,
     {
         // If either set contains a compiler barrier, and the other set contains a global reference,
         // the sets interfere.
-        if (((m_sideEffectFlags & GTF_ORDER_SIDEEFF) != 0) && ((otherSideEffectFlags & GTF_GLOB_REF) != 0))
+        if (((m_sideEffectFlags & GTF_ORDER_SIDEEFF) != 0) &&
+            ((otherSideEffectFlags & (GTF_GLOB_REF | GTF_ORDER_SIDEEFF)) != 0))
         {
             return true;
         }
 
-        if (((otherSideEffectFlags & GTF_ORDER_SIDEEFF) != 0) && ((m_sideEffectFlags & GTF_GLOB_REF) != 0))
+        if (((otherSideEffectFlags & GTF_ORDER_SIDEEFF) != 0) &&
+            ((m_sideEffectFlags & (GTF_GLOB_REF | GTF_ORDER_SIDEEFF)) != 0))
         {
             return true;
         }
diff --git a/src/coreclr/jit/sideeffects.h b/src/coreclr/jit/sideeffects.h
index d94622d9f0ca..0fef277532cf 100644
--- a/src/coreclr/jit/sideeffects.h
+++ b/src/coreclr/jit/sideeffects.h
@@ -13,7 +13,8 @@
 //
 class LclVarSet final
 {
-    union {
+    union
+    {
         hashBv*  m_bitVector;
         unsigned m_lclNum;
     };
diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp
index d69730ad520e..1157bf9e5bfc 100644
--- a/src/coreclr/jit/simd.cpp
+++ b/src/coreclr/jit/simd.cpp
@@ -456,7 +456,8 @@ GenTree* Compiler::impSIMDPopStack()
 {
     StackEntry se   = impPopStack();
     GenTree*   tree = se.val;
-    assert(varTypeIsSIMD(tree));
+
+    assert(varTypeIsSIMDOrMask(tree));
 
     // Handle calls that may return the struct via a return buffer.
     if (tree->OperIs(GT_CALL, GT_RET_EXPR))
@@ -549,8 +550,6 @@ bool areFieldAddressesTheSame(GenTreeFieldAddr* op1, GenTreeFieldAddr* op2)
 bool Compiler::areFieldsContiguous(GenTreeIndir* op1, GenTreeIndir* op2)
 {
     assert(op1->isIndir() && op2->isIndir());
-    // TODO-1stClassStructs: delete once IND<struct> nodes are no more.
-    assert(!op1->TypeIs(TYP_STRUCT) && !op2->TypeIs(TYP_STRUCT));
 
     var_types         op1Type      = op1->TypeGet();
     var_types         op2Type      = op2->TypeGet();
@@ -739,9 +738,9 @@ GenTree* Compiler::CreateAddressNodeForSimdHWIntrinsicCreate(GenTree* tree, var_
 }
 
 //-------------------------------------------------------------------------------
-// impMarkContiguousSIMDFieldStores: Try to identify if there are contiguous
-// assignments from SIMD field to memory. If there are, then mark the related
-// lclvar so that it won't be promoted.
+// impMarkContiguousSIMDFieldStores: Try to identify if there are contiguous stores
+// from SIMD field to memory. If there are, then mark the related lclvar so that it
+// won't be promoted.
 //
 // Arguments:
 //      stmt - GenTree*. Input statement node.
diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h
index 9e8781714dbc..3a5311aaaa79 100644
--- a/src/coreclr/jit/simd.h
+++ b/src/coreclr/jit/simd.h
@@ -6,7 +6,8 @@
 
 struct simd8_t
 {
-    union {
+    union
+    {
         float    f32[2];
         double   f64[1];
         int8_t   i8[8];
@@ -58,7 +59,8 @@ static_assert_no_msg(sizeof(simd8_t) == 8);
 #include <pshpack4.h>
 struct simd12_t
 {
-    union {
+    union
+    {
         float    f32[3];
         int8_t   i8[12];
         int16_t  i16[6];
@@ -116,7 +118,8 @@ static_assert_no_msg(sizeof(simd12_t) == 12);
 
 struct simd16_t
 {
-    union {
+    union
+    {
         float    f32[4];
         double   f64[2];
         int8_t   i8[16];
@@ -170,7 +173,8 @@ static_assert_no_msg(sizeof(simd16_t) == 16);
 #if defined(TARGET_XARCH)
 struct simd32_t
 {
-    union {
+    union
+    {
         float    f32[8];
         double   f64[4];
         int8_t   i8[32];
@@ -224,7 +228,8 @@ static_assert_no_msg(sizeof(simd32_t) == 32);
 
 struct simd64_t
 {
-    union {
+    union
+    {
         float    f32[16];
         double   f64[8];
         int8_t   i8[64];
@@ -277,6 +282,56 @@ struct simd64_t
 };
 static_assert_no_msg(sizeof(simd64_t) == 64);
 
+struct simdmask_t
+{
+    union
+    {
+        int8_t   i8[8];
+        int16_t  i16[4];
+        int32_t  i32[2];
+        int64_t  i64[1];
+        uint8_t  u8[8];
+        uint16_t u16[4];
+        uint32_t u32[2];
+        uint64_t u64[1];
+    };
+
+    bool operator==(const simdmask_t& other) const
+    {
+        return (u64[0] == other.u64[0]);
+    }
+
+    bool operator!=(const simdmask_t& other) const
+    {
+        return !(*this == other);
+    }
+
+    static simdmask_t AllBitsSet()
+    {
+        simdmask_t result;
+
+        result.u64[0] = 0xFFFFFFFFFFFFFFFF;
+
+        return result;
+    }
+
+    bool IsAllBitsSet() const
+    {
+        return *this == AllBitsSet();
+    }
+
+    bool IsZero() const
+    {
+        return *this == Zero();
+    }
+
+    static simdmask_t Zero()
+    {
+        return {};
+    }
+};
+static_assert_no_msg(sizeof(simdmask_t) == 8);
+
 typedef simd64_t simd_t;
 #else
 typedef simd16_t simd_t;
diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp
index f06b38736dda..9ffd3b7b011d 100644
--- a/src/coreclr/jit/simdashwintrinsic.cpp
+++ b/src/coreclr/jit/simdashwintrinsic.cpp
@@ -399,7 +399,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic        intrinsic,
         {
             argType = isInstanceMethod ? simdType
                                        : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
-            op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod);
+            op1     = getArgForHWIntrinsic(argType, argClass, isInstanceMethod);
 
             return gtNewSimdAsHWIntrinsicNode(retType, op1, hwIntrinsic, simdBaseJitType, simdSize);
         }
@@ -421,7 +421,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic        intrinsic,
 
             argType = isInstanceMethod ? simdType
                                        : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
-            op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod);
+            op1     = getArgForHWIntrinsic(argType, argClass, isInstanceMethod);
 
             return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, simdBaseJitType, simdSize);
         }
@@ -513,23 +513,44 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
     switch (intrinsic)
     {
 #if defined(TARGET_XARCH)
+
         case NI_VectorT_ConvertToDouble:
+        {
+            if (IsBaselineVector512IsaSupportedOpportunistically())
+            {
+                break;
+            }
+            return nullptr;
+        }
+
         case NI_VectorT_ConvertToInt64:
         case NI_VectorT_ConvertToUInt32:
         case NI_VectorT_ConvertToUInt64:
         {
-            // TODO-XARCH-CQ: These intrinsics should be accelerated
+            if (IsBaselineVector512IsaSupportedOpportunistically())
+            {
+                break;
+            }
+            return nullptr;
+        }
+
+        case NI_VectorT_ConvertToInt32:
+        {
+            if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
+            {
+                break;
+            }
             return nullptr;
         }
 
         case NI_VectorT_ConvertToSingle:
         {
-            if (simdBaseType == TYP_UINT)
+            if ((simdBaseType == TYP_INT) ||
+                (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically()))
             {
-                // TODO-XARCH-CQ: These intrinsics should be accelerated
-                return nullptr;
+                break;
             }
-            break;
+            return nullptr;
         }
 #endif // TARGET_XARCH
 
@@ -954,7 +975,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
 
             argType = isInstanceMethod ? simdType
                                        : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
-            op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod);
+            op1     = getArgForHWIntrinsic(argType, argClass, isInstanceMethod);
 
             switch (intrinsic)
             {
@@ -1154,50 +1175,95 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                 }
 
 #if defined(TARGET_XARCH)
+
+                case NI_VectorT_ConvertToInt64:
+                {
+                    assert(sig->numArgs == 1);
+                    assert(simdBaseType == TYP_DOUBLE);
+                    return gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize);
+                }
+
+                case NI_VectorT_ConvertToUInt32:
+                {
+                    assert(sig->numArgs == 1);
+                    assert(simdBaseType == TYP_FLOAT);
+                    return gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize);
+                }
+
+                case NI_VectorT_ConvertToUInt64:
+                {
+                    assert(sig->numArgs == 1);
+                    assert(simdBaseType == TYP_DOUBLE);
+                    return gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize);
+                }
+
                 case NI_VectorT_ConvertToInt32:
                 {
                     assert(simdBaseType == TYP_FLOAT);
-                    NamedIntrinsic convert;
+                    return gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_INT, simdBaseJitType, simdSize);
+                }
 
-                    switch (simdSize)
+                case NI_VectorT_ConvertToDouble:
+                {
+                    assert(sig->numArgs == 1);
+                    assert(varTypeIsLong(simdBaseType));
+                    NamedIntrinsic intrinsic = NI_Illegal;
+                    if (simdSize == 64)
                     {
-                        case 16:
-                            convert = NI_SSE2_ConvertToVector128Int32WithTruncation;
-                            break;
-                        case 32:
-                            convert = NI_AVX_ConvertToVector256Int32WithTruncation;
-                            break;
-                        case 64:
-                            convert = NI_AVX512F_ConvertToVector512Int32WithTruncation;
-                            break;
-                        default:
-                            unreached();
+                        intrinsic = NI_AVX512DQ_ConvertToVector512Double;
                     }
-
-                    return gtNewSimdHWIntrinsicNode(retType, op1, convert, simdBaseJitType, simdSize);
+                    else if (simdSize == 32)
+                    {
+                        intrinsic = NI_AVX512DQ_VL_ConvertToVector256Double;
+                    }
+                    else
+                    {
+                        assert(simdSize == 16);
+                        intrinsic = NI_AVX512DQ_VL_ConvertToVector128Double;
+                    }
+                    return gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
                 }
 
                 case NI_VectorT_ConvertToSingle:
                 {
-                    assert(simdBaseType == TYP_INT);
-                    NamedIntrinsic convert;
-
-                    switch (simdSize)
+                    assert(varTypeIsInt(simdBaseType));
+                    NamedIntrinsic intrinsic = NI_Illegal;
+                    if (simdBaseType == TYP_INT)
                     {
-                        case 16:
-                            convert = NI_SSE2_ConvertToVector128Single;
-                            break;
-                        case 32:
-                            convert = NI_AVX_ConvertToVector256Single;
-                            break;
-                        case 64:
-                            convert = NI_AVX512F_ConvertToVector512Single;
-                            break;
-                        default:
-                            unreached();
+                        switch (simdSize)
+                        {
+                            case 16:
+                                intrinsic = NI_SSE2_ConvertToVector128Single;
+                                break;
+                            case 32:
+                                intrinsic = NI_AVX_ConvertToVector256Single;
+                                break;
+                            case 64:
+                                intrinsic = NI_AVX512F_ConvertToVector512Single;
+                                break;
+                            default:
+                                unreached();
+                        }
                     }
-
-                    return gtNewSimdHWIntrinsicNode(retType, op1, convert, simdBaseJitType, simdSize);
+                    else if (simdBaseType == TYP_UINT)
+                    {
+                        switch (simdSize)
+                        {
+                            case 16:
+                                intrinsic = NI_AVX512F_VL_ConvertToVector128Single;
+                                break;
+                            case 32:
+                                intrinsic = NI_AVX512F_VL_ConvertToVector256Single;
+                                break;
+                            case 64:
+                                intrinsic = NI_AVX512F_ConvertToVector512Single;
+                                break;
+                            default:
+                                unreached();
+                        }
+                    }
+                    assert(intrinsic != NI_Illegal);
+                    return gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
                 }
 #elif defined(TARGET_ARM64)
                 case NI_VectorT_ConvertToDouble:
diff --git a/src/coreclr/jit/sm.cpp b/src/coreclr/jit/sm.cpp
index 5cd6e9879c78..5e9b97699b84 100644
--- a/src/coreclr/jit/sm.cpp
+++ b/src/coreclr/jit/sm.cpp
@@ -130,8 +130,8 @@ SM_STATE_ID CodeSeqSM::GetDestState(SM_STATE_ID srcState, SM_OPCODE opcode)
 
     if (cell->srcState != srcState)
     {
-        assert(cell->srcState == 0 ||
-               cell->srcState != srcState); // Either way means there is not outgoing edge from srcState.
+        assert(cell->srcState == 0 || cell->srcState != srcState); // Either way means there is not outgoing edge from
+                                                                   // srcState.
         return 0;
     }
     else
diff --git a/src/coreclr/jit/smallhash.h b/src/coreclr/jit/smallhash.h
index 54abf45dfb1e..227cad8cc918 100644
--- a/src/coreclr/jit/smallhash.h
+++ b/src/coreclr/jit/smallhash.h
@@ -338,7 +338,10 @@ class HashTableBase
 
 protected:
     HashTableBase(TAllocator alloc, Bucket* buckets, unsigned numBuckets)
-        : m_alloc(alloc), m_buckets(buckets), m_numBuckets(numBuckets), m_numFullBuckets(0)
+        : m_alloc(alloc)
+        , m_buckets(buckets)
+        , m_numBuckets(numBuckets)
+        , m_numFullBuckets(0)
     {
         if (numBuckets > 0)
         {
@@ -359,13 +362,15 @@ class HashTableBase
 
         Bucket* m_bucket;
 
-        KeyValuePair(Bucket* bucket) : m_bucket(bucket)
+        KeyValuePair(Bucket* bucket)
+            : m_bucket(bucket)
         {
             assert(m_bucket != nullptr);
         }
 
     public:
-        KeyValuePair() : m_bucket(nullptr)
+        KeyValuePair()
+            : m_bucket(nullptr)
         {
         }
 
@@ -392,7 +397,9 @@ class HashTableBase
         unsigned m_index;
 
         Iterator(Bucket* buckets, unsigned numBuckets, unsigned index)
-            : m_buckets(buckets), m_numBuckets(numBuckets), m_index(index)
+            : m_buckets(buckets)
+            , m_numBuckets(numBuckets)
+            , m_index(index)
         {
             assert((buckets != nullptr) || (numBuckets == 0));
             assert(index <= numBuckets);
@@ -405,7 +412,10 @@ class HashTableBase
         }
 
     public:
-        Iterator() : m_buckets(nullptr), m_numBuckets(0), m_index(0)
+        Iterator()
+            : m_buckets(nullptr)
+            , m_numBuckets(0)
+            , m_index(0)
         {
         }
 
@@ -636,7 +646,8 @@ class HashTable final : public HashTableBase<TKey, TValue, TKeyInfo, TAllocator>
     }
 
 public:
-    HashTable(TAllocator alloc) : TBase(alloc, nullptr, 0)
+    HashTable(TAllocator alloc)
+        : TBase(alloc, nullptr, 0)
     {
     }
 
@@ -670,7 +681,8 @@ class SmallHashTable final : public HashTableBase<TKey, TValue, TKeyInfo, TAlloc
     typename TBase::Bucket m_inlineBuckets[RoundedNumInlineBuckets];
 
 public:
-    SmallHashTable(TAllocator alloc) : TBase(alloc, m_inlineBuckets, RoundedNumInlineBuckets)
+    SmallHashTable(TAllocator alloc)
+        : TBase(alloc, m_inlineBuckets, RoundedNumInlineBuckets)
     {
     }
 };
diff --git a/src/coreclr/jit/smcommon.h b/src/coreclr/jit/smcommon.h
index a6de234b523e..d45f82ca87e2 100644
--- a/src/coreclr/jit/smcommon.h
+++ b/src/coreclr/jit/smcommon.h
@@ -44,6 +44,6 @@ struct SMState
 //
 
 #define MAX_CODE_SEQUENCE_LENGTH 7
-#define CODE_SEQUENCE_END ((SM_OPCODE)(SM_COUNT + 1))
+#define CODE_SEQUENCE_END        ((SM_OPCODE)(SM_COUNT + 1))
 
 #endif /* __sm_common_h__ */
diff --git a/src/coreclr/jit/smopenum.h b/src/coreclr/jit/smopenum.h
index dc1fd6a81033..c10140473171 100644
--- a/src/coreclr/jit/smopenum.h
+++ b/src/coreclr/jit/smopenum.h
@@ -4,7 +4,8 @@
 #ifndef __smopenum_h__
 #define __smopenum_h__
 
-typedef enum smopcode_t {
+typedef enum smopcode_t
+{
 #define SMOPDEF(smname, string) smname,
 #include "smopcode.def"
 #undef SMOPDEF
diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp
index 2250dfb9e9d1..8dadf691b0cb 100644
--- a/src/coreclr/jit/ssabuilder.cpp
+++ b/src/coreclr/jit/ssabuilder.cpp
@@ -1408,6 +1408,7 @@ void SsaBuilder::RenameVariables()
 {
     JITDUMP("*************** In SsaBuilder::RenameVariables()\n");
 
+    m_pCompiler->Metrics.VarsInSsa = 0;
     // The first thing we do is treat parameters and must-init variables as if they have a
     // virtual definition before entry -- they start out at SSA name 1.
     for (unsigned lclNum = 0; lclNum < m_pCompiler->lvaCount; lclNum++)
@@ -1417,6 +1418,8 @@ void SsaBuilder::RenameVariables()
             continue;
         }
 
+        m_pCompiler->Metrics.VarsInSsa++;
+
         LclVarDsc* varDsc = m_pCompiler->lvaGetDesc(lclNum);
         assert(varDsc->lvTracked);
 
@@ -1467,7 +1470,9 @@ void SsaBuilder::RenameVariables()
 
     public:
         SsaRenameDomTreeVisitor(Compiler* compiler, SsaBuilder* builder, SsaRenameState* renameStack)
-            : DomTreeVisitor(compiler), m_builder(builder), m_renameStack(renameStack)
+            : DomTreeVisitor(compiler)
+            , m_builder(builder)
+            , m_renameStack(renameStack)
         {
         }
 
diff --git a/src/coreclr/jit/ssabuilder.h b/src/coreclr/jit/ssabuilder.h
index f72ee884081c..470209e88611 100644
--- a/src/coreclr/jit/ssabuilder.h
+++ b/src/coreclr/jit/ssabuilder.h
@@ -61,7 +61,7 @@ class SsaBuilder
     // Rename all definitions and uses within a block.
     void BlockRenameVariables(BasicBlock* block);
     // Rename a local or memory definition generated by a local store/GT_CALL node.
-    void RenameDef(GenTree* defNode, BasicBlock* block);
+    void     RenameDef(GenTree* defNode, BasicBlock* block);
     unsigned RenamePushDef(GenTree* defNode, BasicBlock* block, unsigned lclNum, bool isFullDef);
     // Rename a use of a local variable.
     void RenameLclUse(GenTreeLclVarCommon* lclNode, BasicBlock* block);
diff --git a/src/coreclr/jit/ssarenamestate.cpp b/src/coreclr/jit/ssarenamestate.cpp
index 369c8f190359..257bb84d6616 100644
--- a/src/coreclr/jit/ssarenamestate.cpp
+++ b/src/coreclr/jit/ssarenamestate.cpp
@@ -13,7 +13,10 @@
 //    lvaCount - The number of local variables
 //
 SsaRenameState::SsaRenameState(CompAllocator alloc, unsigned lvaCount)
-    : m_alloc(alloc), m_lvaCount(lvaCount), m_stacks(nullptr), m_stackListTail(nullptr)
+    : m_alloc(alloc)
+    , m_lvaCount(lvaCount)
+    , m_stacks(nullptr)
+    , m_stackListTail(nullptr)
 {
 }
 
diff --git a/src/coreclr/jit/ssarenamestate.h b/src/coreclr/jit/ssarenamestate.h
index 37dc332746b5..a71fe33a370c 100644
--- a/src/coreclr/jit/ssarenamestate.h
+++ b/src/coreclr/jit/ssarenamestate.h
@@ -12,7 +12,8 @@ class SsaRenameState
         StackNode* m_top;
 
     public:
-        Stack() : m_top(nullptr)
+        Stack()
+            : m_top(nullptr)
         {
         }
 
@@ -47,7 +48,9 @@ class SsaRenameState
         unsigned m_ssaNum;
 
         StackNode(Stack* listPrev, BasicBlock* block, unsigned ssaNum)
-            : m_listPrev(listPrev), m_block(block), m_ssaNum(ssaNum)
+            : m_listPrev(listPrev)
+            , m_block(block)
+            , m_ssaNum(ssaNum)
         {
         }
     };
diff --git a/src/coreclr/jit/stacklevelsetter.cpp b/src/coreclr/jit/stacklevelsetter.cpp
index db97352d5e69..d25f2683ca30 100644
--- a/src/coreclr/jit/stacklevelsetter.cpp
+++ b/src/coreclr/jit/stacklevelsetter.cpp
@@ -287,7 +287,6 @@ void StackLevelSetter::SetThrowHelperBlock(SpecialCodeKind kind, BasicBlock* blo
         // or generate all required helpers after all stack alignment
         // has been added, and the stack level at each call to fgAddCodeRef()
         // is known, or can be recalculated.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 #if defined(UNIX_X86_ABI)
         framePointerRequired = true;
 #else  // !defined(UNIX_X86_ABI)
diff --git a/src/coreclr/jit/stacklevelsetter.h b/src/coreclr/jit/stacklevelsetter.h
index b3341c1f409c..45b7d13775af 100644
--- a/src/coreclr/jit/stacklevelsetter.h
+++ b/src/coreclr/jit/stacklevelsetter.h
@@ -22,8 +22,8 @@ class StackLevelSetter final : public Phase
     void SetThrowHelperBlock(SpecialCodeKind kind, BasicBlock* block);
 
     unsigned PopArgumentsFromCall(GenTreeCall* call);
-    void AddStackLevel(unsigned value);
-    void SubStackLevel(unsigned value);
+    void     AddStackLevel(unsigned value);
+    void     SubStackLevel(unsigned value);
 
     void CheckArgCnt();
     void CheckAdditionalArgs();
diff --git a/src/coreclr/jit/switchrecognition.cpp b/src/coreclr/jit/switchrecognition.cpp
index fa6abd0f23e8..747d04669934 100644
--- a/src/coreclr/jit/switchrecognition.cpp
+++ b/src/coreclr/jit/switchrecognition.cpp
@@ -9,7 +9,7 @@
 // We mainly rely on TryLowerSwitchToBitTest in these heuristics, but jump tables can be useful
 // even without conversion to a bitmap test.
 #define SWITCH_MAX_DISTANCE ((TARGET_POINTER_SIZE * BITS_PER_BYTE) - 1)
-#define SWITCH_MIN_TESTS 3
+#define SWITCH_MIN_TESTS    3
 
 //-----------------------------------------------------------------------------
 //  optSwitchRecognition: Optimize range check for `x == cns1 || x == cns2 || x == cns3 ...`
@@ -51,8 +51,8 @@ PhaseStatus Compiler::optSwitchRecognition()
 //
 // Arguments:
 //    block        - The block to check
-//    blockIfTrue  - [out] The block that will be jumped to if X == CNS
-//    blockIfFalse - [out] The block that will be jumped to if X != CNS
+//    trueEdge     - [out] The successor edge taken if X == CNS
+//    falseEdge    - [out] The successor edge taken if X != CNS
 //    isReversed   - [out] True if the condition is reversed (GT_NE)
 //    variableNode - [out] The variable node (X in the example above)
 //    cns          - [out] The constant value (CNS in the example above)
@@ -61,8 +61,8 @@ PhaseStatus Compiler::optSwitchRecognition()
 //    True if the block represents a constant test, false otherwise
 //
 bool IsConstantTestCondBlock(const BasicBlock* block,
-                             BasicBlock**      blockIfTrue,
-                             BasicBlock**      blockIfFalse,
+                             BasicBlock**      trueTarget,
+                             BasicBlock**      falseTarget,
                              bool*             isReversed,
                              GenTree**         variableNode = nullptr,
                              ssize_t*          cns          = nullptr)
@@ -94,9 +94,9 @@ bool IsConstantTestCondBlock(const BasicBlock* block,
                     return false;
                 }
 
-                *isReversed   = rootNode->gtGetOp1()->OperIs(GT_NE);
-                *blockIfTrue  = *isReversed ? block->GetFalseTarget() : block->GetTrueTarget();
-                *blockIfFalse = *isReversed ? block->GetTrueTarget() : block->GetFalseTarget();
+                *isReversed  = rootNode->gtGetOp1()->OperIs(GT_NE);
+                *trueTarget  = *isReversed ? block->GetFalseTarget() : block->GetTrueTarget();
+                *falseTarget = *isReversed ? block->GetTrueTarget() : block->GetFalseTarget();
 
                 if (block->FalseTargetIs(block) || block->TrueTargetIs(block))
                 {
@@ -141,14 +141,14 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
 
     GenTree*    variableNode = nullptr;
     ssize_t     cns          = 0;
-    BasicBlock* blockIfTrue  = nullptr;
-    BasicBlock* blockIfFalse = nullptr;
+    BasicBlock* trueTarget   = nullptr;
+    BasicBlock* falseTarget  = nullptr;
 
     // The algorithm is simple - we check that the given block is a constant test block
     // and then try to accumulate as many constant test blocks as possible. Once we hit
     // a block that doesn't match the pattern, we start processing the accumulated blocks.
     bool isReversed = false;
-    if (IsConstantTestCondBlock(firstBlock, &blockIfTrue, &blockIfFalse, &isReversed, &variableNode, &cns))
+    if (IsConstantTestCondBlock(firstBlock, &trueTarget, &falseTarget, &isReversed, &variableNode, &cns))
     {
         if (isReversed)
         {
@@ -161,65 +161,71 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
         // No more than SWITCH_MAX_TABLE_SIZE blocks are allowed (arbitrary limit in this context)
         int     testValueIndex                  = 0;
         ssize_t testValues[SWITCH_MAX_DISTANCE] = {};
-        testValues[testValueIndex++]            = cns;
+        testValues[testValueIndex]              = cns;
+        testValueIndex++;
 
-        const BasicBlock* prevBlock = firstBlock;
+        // Track likelihood of reaching the false block
+        //
+        weight_t          falseLikelihood = firstBlock->GetFalseEdge()->getLikelihood();
+        const BasicBlock* prevBlock       = firstBlock;
 
         // Now walk the next blocks and see if they are basically the same type of test
         for (const BasicBlock* currBb = firstBlock->Next(); currBb != nullptr; currBb = currBb->Next())
         {
             GenTree*    currVariableNode = nullptr;
             ssize_t     currCns          = 0;
-            BasicBlock* currBlockIfTrue  = nullptr;
-            BasicBlock* currBlockIfFalse = nullptr;
+            BasicBlock* currTrueTarget   = nullptr;
+            BasicBlock* currFalseTarget  = nullptr;
 
             if (!currBb->hasSingleStmt())
             {
                 // Only the first conditional block can have multiple statements.
                 // Stop searching and process what we already have.
-                return optSwitchConvert(firstBlock, testValueIndex, testValues, variableNode);
+                return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode);
             }
 
             // Inspect secondary blocks
-            if (IsConstantTestCondBlock(currBb, &currBlockIfTrue, &currBlockIfFalse, &isReversed, &currVariableNode,
+            if (IsConstantTestCondBlock(currBb, &currTrueTarget, &currFalseTarget, &isReversed, &currVariableNode,
                                         &currCns))
             {
-                if (currBlockIfTrue != blockIfTrue)
+                if (currTrueTarget != trueTarget)
                 {
                     // This blocks jumps to a different target, stop searching and process what we already have.
-                    return optSwitchConvert(firstBlock, testValueIndex, testValues, variableNode);
+                    return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode);
                 }
 
                 if (!GenTree::Compare(currVariableNode, variableNode))
                 {
                     // A different variable node is used, stop searching and process what we already have.
-                    return optSwitchConvert(firstBlock, testValueIndex, testValues, variableNode);
+                    return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode);
                 }
 
                 if (currBb->GetUniquePred(this) != prevBlock)
                 {
                     // Multiple preds in a secondary block, stop searching and process what we already have.
-                    return optSwitchConvert(firstBlock, testValueIndex, testValues, variableNode);
+                    return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode);
                 }
 
                 if (!BasicBlock::sameEHRegion(prevBlock, currBb))
                 {
                     // Current block is in a different EH region, stop searching and process what we already have.
-                    return optSwitchConvert(firstBlock, testValueIndex, testValues, variableNode);
+                    return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode);
                 }
 
                 // Ok we can work with that, add the test value to the list
                 testValues[testValueIndex++] = currCns;
+                falseLikelihood *= currBb->GetFalseEdge()->getLikelihood();
+
                 if (testValueIndex == SWITCH_MAX_DISTANCE)
                 {
                     // Too many suitable tests found - stop and process what we already have.
-                    return optSwitchConvert(firstBlock, testValueIndex, testValues, variableNode);
+                    return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode);
                 }
 
                 if (isReversed)
                 {
                     // We only support reversed test (GT_NE) for the last block.
-                    return optSwitchConvert(firstBlock, testValueIndex, testValues, variableNode);
+                    return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode);
                 }
 
                 prevBlock = currBb;
@@ -227,7 +233,7 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
             else
             {
                 // Current block is not a suitable test, stop searching and process what we already have.
-                return optSwitchConvert(firstBlock, testValueIndex, testValues, variableNode);
+                return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode);
             }
         }
     }
@@ -245,12 +251,14 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
 //    firstBlock - First conditional block in the chain
 //    testsCount - Number of conditional blocks in the chain
 //    testValues - Array of constants that are tested against the variable
+//    falseLikelihood - Likelihood of control flow reaching the false block
 //    nodeToTest - Variable node that is tested against the constants
 //
 // Return Value:
 //    True if the conversion was successful, false otherwise
 //
-bool Compiler::optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, GenTree* nodeToTest)
+bool Compiler::optSwitchConvert(
+    BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest)
 {
     assert(firstBlock->KindIs(BBJ_COND));
     assert(!varTypeIsSmall(nodeToTest));
@@ -319,6 +327,10 @@ bool Compiler::optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t*
     const bool  isTest       = IsConstantTestCondBlock(lastBlock, &blockIfTrue, &blockIfFalse, &isReversed);
     assert(isTest);
 
+    assert(firstBlock->TrueTargetIs(blockIfTrue));
+    FlowEdge* const trueEdge  = firstBlock->GetTrueEdge();
+    FlowEdge* const falseEdge = firstBlock->GetFalseEdge();
+
     // Convert firstBlock to a switch block
     firstBlock->SetSwitch(new (this, CMK_BasicBlock) BBswtDesc);
     firstBlock->bbCodeOffsEnd = lastBlock->bbCodeOffsEnd;
@@ -338,16 +350,17 @@ bool Compiler::optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t*
     gtUpdateStmtSideEffects(firstBlock->lastStmt());
 
     // Unlink and remove the whole chain of conditional blocks
-    BasicBlock* blockToRemove = firstBlock->Next();
-    fgRemoveRefPred(blockToRemove, firstBlock);
+    fgRemoveRefPred(falseEdge);
+    BasicBlock* blockToRemove = falseEdge->getDestinationBlock();
+    assert(firstBlock->NextIs(blockToRemove));
     while (!lastBlock->NextIs(blockToRemove))
     {
         blockToRemove = fgRemoveBlock(blockToRemove, true);
     }
 
-    const auto jumpCount = static_cast<unsigned>(maxValue - minValue + 1);
+    const unsigned jumpCount = static_cast<unsigned>(maxValue - minValue + 1);
     assert((jumpCount > 0) && (jumpCount <= SWITCH_MAX_DISTANCE + 1));
-    const auto jmpTab = new (this, CMK_BasicBlock) BasicBlock*[jumpCount + 1 /*default case*/];
+    FlowEdge** jmpTab = new (this, CMK_FlowEdge) FlowEdge*[jumpCount + 1 /*default case*/];
 
     // Quirk: lastBlock's false target may have diverged from bbNext. If the false target is behind firstBlock,
     // we may create a cycle in the BasicBlock list by setting firstBlock->bbNext to it.
@@ -361,16 +374,20 @@ bool Compiler::optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t*
         if (isReversed)
         {
             assert(lastBlock->FalseTargetIs(blockIfTrue));
-            fgRemoveRefPred(blockIfTrue, firstBlock);
-            blockIfTrue = fgNewBBafter(BBJ_ALWAYS, firstBlock, true, blockIfTrue);
-            fgAddRefPred(blockIfTrue->GetTarget(), blockIfTrue);
-            skipPredRemoval = true;
+            fgRemoveRefPred(trueEdge);
+            BasicBlock* targetBlock = blockIfTrue;
+            blockIfTrue             = fgNewBBafter(BBJ_ALWAYS, firstBlock, true);
+            FlowEdge* const newEdge = fgAddRefPred(targetBlock, blockIfTrue);
+            skipPredRemoval         = true;
+            blockIfTrue->SetTargetEdge(newEdge);
         }
         else
         {
             assert(lastBlock->FalseTargetIs(blockIfFalse));
-            blockIfFalse = fgNewBBafter(BBJ_ALWAYS, firstBlock, true, blockIfFalse);
-            fgAddRefPred(blockIfFalse->GetTarget(), blockIfFalse);
+            BasicBlock* targetBlock = blockIfFalse;
+            blockIfFalse            = fgNewBBafter(BBJ_ALWAYS, firstBlock, true);
+            FlowEdge* const newEdge = fgAddRefPred(targetBlock, blockIfFalse);
+            blockIfFalse->SetTargetEdge(newEdge);
         }
     }
 
@@ -395,21 +412,32 @@ bool Compiler::optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t*
     // Unlink blockIfTrue from firstBlock, we're going to link it again in the loop below.
     if (!skipPredRemoval)
     {
-        fgRemoveRefPred(blockIfTrue, firstBlock);
+        fgRemoveRefPred(trueEdge);
     }
 
+    FlowEdge* switchTrueEdge = nullptr;
+
     for (unsigned i = 0; i < jumpCount; i++)
     {
         // value exists in the testValues array (via bitVector) - 'true' case.
         const bool isTrue = (bitVector & static_cast<ssize_t>(1ULL << i)) != 0;
-        jmpTab[i]         = isTrue ? blockIfTrue : blockIfFalse;
 
-        fgAddRefPred(jmpTab[i], firstBlock);
+        FlowEdge* const newEdge = fgAddRefPred((isTrue ? blockIfTrue : blockIfFalse), firstBlock);
+        jmpTab[i]               = newEdge;
+
+        if ((switchTrueEdge == nullptr) && isTrue)
+        {
+            switchTrueEdge = newEdge;
+        }
     }
 
     // Link the 'default' case
-    jmpTab[jumpCount] = blockIfFalse;
-    fgAddRefPred(blockIfFalse, firstBlock);
+    FlowEdge* const switchDefaultEdge = fgAddRefPred(blockIfFalse, firstBlock);
+    jmpTab[jumpCount]                 = switchDefaultEdge;
+
+    // Fix likelihoods
+    switchDefaultEdge->setLikelihood(falseLikelihood);
+    switchTrueEdge->setLikelihood(1.0 - falseLikelihood);
 
     return true;
 }
diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h
index 1a81fe9a04f4..b4693baea829 100644
--- a/src/coreclr/jit/target.h
+++ b/src/coreclr/jit/target.h
@@ -72,19 +72,19 @@ inline bool compUnixX86Abi()
 // The following are intended to capture only those #defines that cannot be replaced
 // with static const members of Target
 #if defined(TARGET_AMD64)
-#define REGMASK_BITS 64
+#define REGMASK_BITS              64
 #define CSE_CONST_SHARED_LOW_BITS 16
 
 #elif defined(TARGET_X86)
-#define REGMASK_BITS 32
+#define REGMASK_BITS              32
 #define CSE_CONST_SHARED_LOW_BITS 16
 
 #elif defined(TARGET_ARM)
-#define REGMASK_BITS 64
+#define REGMASK_BITS              64
 #define CSE_CONST_SHARED_LOW_BITS 12
 
 #elif defined(TARGET_ARM64)
-#define REGMASK_BITS 64
+#define REGMASK_BITS              64
 #define CSE_CONST_SHARED_LOW_BITS 12
 
 #elif defined(TARGET_WASM)
@@ -92,11 +92,11 @@ inline bool compUnixX86Abi()
 #define CSE_CONST_SHARED_LOW_BITS 16
 
 #elif defined(TARGET_LOONGARCH64)
-#define REGMASK_BITS 64
+#define REGMASK_BITS              64
 #define CSE_CONST_SHARED_LOW_BITS 12
 
 #elif defined(TARGET_RISCV64)
-#define REGMASK_BITS 64
+#define REGMASK_BITS              64
 #define CSE_CONST_SHARED_LOW_BITS 12
 
 #else
@@ -118,7 +118,7 @@ inline bool compUnixX86Abi()
 enum _regNumber_enum : unsigned
 {
 #define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
-#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#define REGALIAS(alias, realname)       REG_##alias = REG_##realname,
 #include "register.h"
 
     REG_COUNT,
@@ -130,7 +130,7 @@ enum _regMask_enum : unsigned __int64
 {
     RBM_NONE = 0,
 #define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
-#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#define REGALIAS(alias, realname)       RBM_##alias = RBM_##realname,
 #include "register.h"
 };
 
@@ -139,7 +139,7 @@ enum _regMask_enum : unsigned __int64
 enum _regNumber_enum : unsigned
 {
 #define REGDEF(name, rnum, mask, xname, wname) REG_##name = rnum,
-#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#define REGALIAS(alias, realname)              REG_##alias = REG_##realname,
 #include "register.h"
 
     REG_COUNT,
@@ -151,7 +151,7 @@ enum _regMask_enum : unsigned __int64
 {
     RBM_NONE = 0,
 #define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask,
-#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#define REGALIAS(alias, realname)              RBM_##alias = RBM_##realname,
 #include "register.h"
 };
 
@@ -160,7 +160,7 @@ enum _regMask_enum : unsigned __int64
 enum _regNumber_enum : unsigned
 {
 #define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
-#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#define REGALIAS(alias, realname)       REG_##alias = REG_##realname,
 #include "register.h"
 
     REG_COUNT,
@@ -173,9 +173,8 @@ enum _regMask_enum : uint64_t
     RBM_NONE = 0,
 
 #define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
-#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#define REGALIAS(alias, realname)       RBM_##alias = RBM_##realname,
 #include "register.h"
-
 };
 
 #elif defined(TARGET_X86)
@@ -183,7 +182,7 @@ enum _regMask_enum : uint64_t
 enum _regNumber_enum : unsigned
 {
 #define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
-#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#define REGALIAS(alias, realname)       REG_##alias = REG_##realname,
 #include "register.h"
 
     REG_COUNT,
@@ -196,7 +195,7 @@ enum _regMask_enum : unsigned
     RBM_NONE = 0,
 
 #define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
-#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#define REGALIAS(alias, realname)       RBM_##alias = RBM_##realname,
 #include "register.h"
 };
 
@@ -240,7 +239,7 @@ enum _regMask_enum : unsigned
 #if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 typedef unsigned __int64 regMaskTP;
 #else
-typedef unsigned       regMaskTP;
+typedef unsigned regMaskTP;
 #endif
 
 #if REGMASK_BITS == 8
@@ -267,9 +266,9 @@ typedef unsigned char   regNumberSmall;
 /*****************************************************************************/
 
 #ifdef DEBUG
-#define DSP_SRC_OPER_LEFT 0
+#define DSP_SRC_OPER_LEFT  0
 #define DSP_SRC_OPER_RIGHT 1
-#define DSP_DST_OPER_LEFT 1
+#define DSP_DST_OPER_LEFT  1
 #define DSP_DST_OPER_RIGHT 0
 #endif
 
@@ -429,7 +428,7 @@ inline bool genIsValidFloatReg(regNumber reg)
     return reg >= REG_FP_FIRST && reg <= REG_FP_LAST;
 }
 
-#if defined(TARGET_XARCH)
+#if defined(FEATURE_MASKED_HW_INTRINSICS)
 /*****************************************************************************
  * Return true if the register is a valid mask register
  */
@@ -437,7 +436,7 @@ inline bool genIsValidMaskReg(regNumber reg)
 {
     return reg >= REG_MASK_FIRST && reg <= REG_MASK_LAST;
 }
-#endif // TARGET_XARCH
+#endif // FEATURE_MASKED_HW_INTRINSICS
 
 #ifdef TARGET_ARM
 
@@ -455,10 +454,13 @@ inline bool genIsValidDoubleReg(regNumber reg)
 // hasFixedRetBuffReg:
 //     Returns true if our target architecture uses a fixed return buffer register
 //
-inline bool hasFixedRetBuffReg()
+inline bool hasFixedRetBuffReg(CorInfoCallConvExtension callConv)
 {
-#ifdef TARGET_ARM64
-    return true;
+#if defined(TARGET_ARM64)
+    // Windows does not use fixed ret buff arg for instance calls, but does otherwise.
+    return !TargetOS::IsWindows || !callConvIsInstanceMethodCallConv(callConv);
+#elif defined(TARGET_AMD64) && defined(SWIFT_SUPPORT)
+    return callConv == CorInfoCallConvExtension::Swift;
 #else
     return false;
 #endif
@@ -468,11 +470,14 @@ inline bool hasFixedRetBuffReg()
 // theFixedRetBuffReg:
 //     Returns the regNumber to use for the fixed return buffer
 //
-inline regNumber theFixedRetBuffReg()
+inline regNumber theFixedRetBuffReg(CorInfoCallConvExtension callConv)
 {
-    assert(hasFixedRetBuffReg()); // This predicate should be checked before calling this method
-#ifdef TARGET_ARM64
+    assert(hasFixedRetBuffReg(callConv)); // This predicate should be checked before calling this method
+#if defined(TARGET_ARM64)
     return REG_ARG_RET_BUFF;
+#elif defined(TARGET_AMD64) && defined(SWIFT_SUPPORT)
+    assert(callConv == CorInfoCallConvExtension::Swift);
+    return REG_SWIFT_ARG_RET_BUFF;
 #else
     return REG_NA;
 #endif
@@ -482,11 +487,14 @@ inline regNumber theFixedRetBuffReg()
 // theFixedRetBuffMask:
 //     Returns the regNumber to use for the fixed return buffer
 //
-inline regMaskTP theFixedRetBuffMask()
+inline regMaskTP theFixedRetBuffMask(CorInfoCallConvExtension callConv)
 {
-    assert(hasFixedRetBuffReg()); // This predicate should be checked before calling this method
-#ifdef TARGET_ARM64
+    assert(hasFixedRetBuffReg(callConv)); // This predicate should be checked before calling this method
+#if defined(TARGET_ARM64)
     return RBM_ARG_RET_BUFF;
+#elif defined(TARGET_AMD64) && defined(SWIFT_SUPPORT)
+    assert(callConv == CorInfoCallConvExtension::Swift);
+    return RBM_SWIFT_ARG_RET_BUFF;
 #else
     return 0;
 #endif
@@ -496,11 +504,14 @@ inline regMaskTP theFixedRetBuffMask()
 // theFixedRetBuffArgNum:
 //     Returns the argNum to use for the fixed return buffer
 //
-inline unsigned theFixedRetBuffArgNum()
+inline unsigned theFixedRetBuffArgNum(CorInfoCallConvExtension callConv)
 {
-    assert(hasFixedRetBuffReg()); // This predicate should be checked before calling this method
+    assert(hasFixedRetBuffReg(callConv)); // This predicate should be checked before calling this method
 #ifdef TARGET_ARM64
     return RET_BUFF_ARGNUM;
+#elif defined(TARGET_AMD64) && defined(SWIFT_SUPPORT)
+    assert(callConv == CorInfoCallConvExtension::Swift);
+    return SWIFT_RET_BUFF_ARGNUM;
 #else
     return BAD_VAR_NUM;
 #endif
@@ -511,16 +522,28 @@ inline unsigned theFixedRetBuffArgNum()
 //     Returns the full mask of all possible integer registers
 //     Note this includes the fixed return buffer register on Arm64
 //
-inline regMaskTP fullIntArgRegMask()
+inline regMaskTP fullIntArgRegMask(CorInfoCallConvExtension callConv)
 {
-    if (hasFixedRetBuffReg())
+    regMaskTP result = RBM_ARG_REGS;
+    if (hasFixedRetBuffReg(callConv))
     {
-        return RBM_ARG_REGS | theFixedRetBuffMask();
+        result |= theFixedRetBuffMask(callConv);
     }
-    else
+
+#ifdef SWIFT_SUPPORT
+    if (callConv == CorInfoCallConvExtension::Swift)
     {
-        return RBM_ARG_REGS;
+        result |= RBM_SWIFT_SELF;
+
+        // We don't pass any arguments in REG_SWIFT_ERROR, but as a quirk,
+        // we set the SwiftError* parameter to be passed in this register,
+        // and later ensure the parameter isn't given any registers/stack space
+        // to avoid interfering with other arguments.
+        result |= RBM_SWIFT_ERROR;
     }
+#endif
+
+    return result;
 }
 
 //-------------------------------------------------------------------------------------------
@@ -528,12 +551,12 @@ inline regMaskTP fullIntArgRegMask()
 //     Returns true if the register is a valid integer argument register
 //     Note this method also returns true on Arm64 when 'reg' is the RetBuff register
 //
-inline bool isValidIntArgReg(regNumber reg)
+inline bool isValidIntArgReg(regNumber reg, CorInfoCallConvExtension callConv)
 {
 #if defined(TARGET_WASM)
     return true;
 #else
-    return (genRegMask(reg) & fullIntArgRegMask()) != 0;
+    return (genRegMask(reg) & fullIntArgRegMask(callConv)) != 0;
 #endif
 }
 
@@ -788,8 +811,8 @@ typedef __int64          target_ssize_t;
 #define TARGET_SIGN_BIT (1ULL << 63)
 
 #else // !TARGET_64BIT
-typedef unsigned int   target_size_t;
-typedef int            target_ssize_t;
+typedef unsigned int target_size_t;
+typedef int          target_ssize_t;
 #define TARGET_SIGN_BIT (1ULL << 31)
 
 #endif // !TARGET_64BIT
diff --git a/src/coreclr/jit/targetamd64.cpp b/src/coreclr/jit/targetamd64.cpp
index 4ac48cb229fb..85b1ba6ef19a 100644
--- a/src/coreclr/jit/targetamd64.cpp
+++ b/src/coreclr/jit/targetamd64.cpp
@@ -30,4 +30,170 @@ const regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3 };
 #endif // !UNIX_AMD64_ABI
 // clang-format on
 
+#ifdef UNIX_AMD64_ABI
+//-----------------------------------------------------------------------------
+// SysVX64Classifier:
+//   Construct a new instance of the SysV x64 ABI classifier.
+//
+// Parameters:
+//   info - Info about the method being classified.
+//
+SysVX64Classifier::SysVX64Classifier(const ClassifierInfo& info)
+    : m_intRegs(intArgRegs, ArrLen(intArgRegs))
+    , m_floatRegs(fltArgRegs, ArrLen(fltArgRegs))
+{
+}
+
+//-----------------------------------------------------------------------------
+// Classify:
+//   Classify a parameter for the SysV x64 ABI.
+//
+// Parameters:
+//   comp           - Compiler instance
+//   type           - The type of the parameter
+//   structLayout   - The layout of the struct. Expected to be non-null if
+//                    varTypeIsStruct(type) is true.
+//   wellKnownParam - Well known type of the parameter (if it may affect its ABI classification)
+//
+// Returns:
+//   Classification information for the parameter.
+//
+ABIPassingInformation SysVX64Classifier::Classify(Compiler*    comp,
+                                                  var_types    type,
+                                                  ClassLayout* structLayout,
+                                                  WellKnownArg wellKnownParam)
+{
+    bool                                                canEnreg = false;
+    SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+    if (varTypeIsStruct(type))
+    {
+        comp->eeGetSystemVAmd64PassStructInRegisterDescriptor(structLayout->GetClassHandle(), &structDesc);
+
+        if (structDesc.passedInRegisters)
+        {
+            unsigned intRegCount   = 0;
+            unsigned floatRegCount = 0;
+
+            for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+            {
+                if (structDesc.IsIntegralSlot(i))
+                {
+                    intRegCount++;
+                }
+                else if (structDesc.IsSseSlot(i))
+                {
+                    floatRegCount++;
+                }
+                else
+                {
+                    assert(!"Invalid eightbyte classification type.");
+                    break;
+                }
+            }
+
+            canEnreg = (intRegCount <= m_intRegs.Count()) && (floatRegCount <= m_floatRegs.Count());
+        }
+    }
+    else
+    {
+        unsigned availRegs = varTypeUsesFloatArgReg(type) ? m_floatRegs.Count() : m_intRegs.Count();
+        canEnreg           = availRegs > 0;
+    }
+
+    ABIPassingInformation info;
+    if (canEnreg)
+    {
+        if (varTypeIsStruct(type))
+        {
+            info.NumSegments = structDesc.eightByteCount;
+            info.Segments    = new (comp, CMK_ABI) ABIPassingSegment[structDesc.eightByteCount];
+
+            for (unsigned i = 0; i < structDesc.eightByteCount; i++)
+            {
+                regNumber reg = structDesc.IsIntegralSlot(i) ? m_intRegs.Dequeue() : m_floatRegs.Dequeue();
+                info.Segments[i] =
+                    ABIPassingSegment::InRegister(reg, structDesc.eightByteOffsets[i], structDesc.eightByteSizes[i]);
+            }
+        }
+        else
+        {
+            regNumber reg = varTypeUsesFloatArgReg(type) ? m_floatRegs.Dequeue() : m_intRegs.Dequeue();
+            info = ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(reg, 0, genTypeSize(type)));
+        }
+    }
+    else
+    {
+        assert((m_stackArgSize % TARGET_POINTER_SIZE) == 0);
+        unsigned size = type == TYP_STRUCT ? structLayout->GetSize() : genTypeSize(type);
+        info          = ABIPassingInformation::FromSegment(comp, ABIPassingSegment::OnStack(m_stackArgSize, 0, size));
+        m_stackArgSize += roundUp(size, TARGET_POINTER_SIZE);
+    }
+
+    return info;
+}
+
+#else // !UNIX_AMD64_ABI
+
+//-----------------------------------------------------------------------------
+// WinX64Classifier:
+//   Construct a new instance of the Windows x64 ABI classifier.
+//
+// Parameters:
+//   info - Info about the method being classified.
+//
+WinX64Classifier::WinX64Classifier(const ClassifierInfo& info)
+    : m_intRegs(intArgRegs, ArrLen(intArgRegs))
+    , m_floatRegs(fltArgRegs, ArrLen(fltArgRegs))
+{
+}
+
+//-----------------------------------------------------------------------------
+// Classify:
+//   Classify a parameter for the Windows x64 ABI.
+//
+// Parameters:
+//   comp           - Compiler instance
+//   type           - The type of the parameter
+//   structLayout   - The layout of the struct. Expected to be non-null if
+//                    varTypeIsStruct(type) is true.
+//   wellKnownParam - Well known type of the parameter (if it may affect its ABI classification)
+//
+// Returns:
+//   Classification information for the parameter.
+//
+ABIPassingInformation WinX64Classifier::Classify(Compiler*    comp,
+                                                 var_types    type,
+                                                 ClassLayout* structLayout,
+                                                 WellKnownArg wellKnownParam)
+{
+    // On windows-x64 ABI all parameters take exactly 1 stack slot (structs
+    // that do not fit are passed implicitly by reference). Passing a parameter
+    // in an int register also consumes the corresponding float register and
+    // vice versa.
+    assert(m_intRegs.Count() == m_floatRegs.Count());
+
+    unsigned typeSize = type == TYP_STRUCT ? structLayout->GetSize() : genTypeSize(type);
+    if ((typeSize > TARGET_POINTER_SIZE) || !isPow2(typeSize))
+    {
+        typeSize = TARGET_POINTER_SIZE; // Passed by implicit byref
+    }
+
+    ABIPassingSegment segment;
+    if (m_intRegs.Count() > 0)
+    {
+        regNumber reg = varTypeUsesFloatArgReg(type) ? m_floatRegs.Peek() : m_intRegs.Peek();
+        segment       = ABIPassingSegment::InRegister(reg, 0, typeSize);
+        m_intRegs.Dequeue();
+        m_floatRegs.Dequeue();
+    }
+    else
+    {
+        segment = ABIPassingSegment::OnStack(m_stackArgSize, 0, typeSize);
+        m_stackArgSize += TARGET_POINTER_SIZE;
+    }
+
+    return ABIPassingInformation::FromSegment(comp, segment);
+}
+#endif
+
 #endif // TARGET_AMD64
diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h
index 4abe71984b57..7e72da9cf2cc 100644
--- a/src/coreclr/jit/targetamd64.h
+++ b/src/coreclr/jit/targetamd64.h
@@ -32,7 +32,7 @@
   #define FEATURE_SET_FLAGS        0       // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
   #define MAX_PASS_SINGLEREG_BYTES      8  // Maximum size of a struct passed in a single register (double).
 #ifdef    UNIX_AMD64_ABI
-  #define FEATURE_IMPLICIT_BYREFS       0  // Support for struct parameters passed via pointers to shadow copies
+  #define FEATURE_IMPLICIT_BYREFS       1  // Support for struct parameters passed via pointers to shadow copies
   #define FEATURE_MULTIREG_ARGS_OR_RET  1  // Support for passing and/or returning single values in more than one register
   #define FEATURE_MULTIREG_ARGS         1  // Support for passing a single argument in more than one register
   #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register
@@ -41,9 +41,9 @@
   #define MAX_PASS_MULTIREG_BYTES      32  // Maximum size of a struct that could be passed in more than one register (Max is two SIMD16s)
   #define MAX_RET_MULTIREG_BYTES       32  // Maximum size of a struct that could be returned in more than one register  (Max is two SIMD16s)
   #define MAX_ARG_REG_COUNT             2  // Maximum registers used to pass a single argument in multiple registers.
-  #define MAX_RET_REG_COUNT             2  // Maximum registers used to return a value.
+  #define MAX_RET_REG_COUNT             4  // Maximum registers used to return a value.
 
-  #define MAX_MULTIREG_COUNT            2  // Maximum number of registers defined by a single instruction (including calls).
+  #define MAX_MULTIREG_COUNT            4  // Maximum number of registers defined by a single instruction (including calls).
                                            // This is also the maximum number of registers for a MultiReg node.
 #else // !UNIX_AMD64_ABI
   #define WINDOWS_AMD64_ABI                // Uses the Windows ABI for AMD64
@@ -68,7 +68,6 @@
   #define EMIT_TRACK_STACK_DEPTH   1
   #define TARGET_POINTER_SIZE      8       // equal to sizeof(void*) and the managed pointer size in bytes for this target
   #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
-  #define FEATURE_EH_CALLFINALLY_THUNKS 1  // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
 #ifdef    UNIX_AMD64_ABI
   #define ETW_EBP_FRAMED           1       // if 1 we cannot use EBP as a scratch register and must create EBP based frames for most methods
 #else // !UNIX_AMD64_ABI
@@ -206,11 +205,11 @@
   // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
   #define RBM_CALLEE_GCTRASH_WRITEBARRIER       RBM_CALLEE_TRASH_NOGC
 
-  // Registers killed by CORINFO_HELP_ASSIGN_BYREF.
-  #define RBM_CALLEE_TRASH_WRITEBARRIER_BYREF   (RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC)
-
   // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF.
-  #define RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF (RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI))
+  #define RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF (RBM_RAX | RBM_RCX)
+
+  // Registers killed by CORINFO_HELP_ASSIGN_BYREF.
+  #define RBM_CALLEE_TRASH_WRITEBARRIER_BYREF   (RBM_RSI | RBM_RDI | RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF)
 
   // We have two register classifications
   // * callee trash: aka     volatile or caller saved
@@ -511,12 +510,6 @@
   #define RBM_FLTARG_REGS         (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3)
 #endif // !UNIX_AMD64_ABI
 
-  // The registers trashed by profiler enter/leave/tailcall hook
-  // See vm\amd64\asmhelpers.asm for more details.
-  #define RBM_PROFILER_ENTER_TRASH          RBM_CALLEE_TRASH
-
-  #define RBM_PROFILER_TAILCALL_TRASH       RBM_PROFILER_LEAVE_TRASH
-
   // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper.
 #ifdef UNIX_AMD64_ABI
   // See vm\amd64\unixasmhelpers.S for more details.
@@ -525,11 +518,20 @@
   // The return registers could be any two from the set { RAX, RDX, XMM0, XMM1 }.
   // STOP_FOR_GC helper preserves all the 4 possible return registers.
   #define RBM_STOP_FOR_GC_TRASH (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET | RBM_FLOATRET_1 | RBM_INTRET_1))
+
+  // The registers trashed by profiler enter/leave/tailcall hook
+  // See vm\amd64\asmhelpers.S for more details.
+  #define RBM_PROFILER_ENTER_TRASH  (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_FLTARG_REGS))
   #define RBM_PROFILER_LEAVE_TRASH  (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET | RBM_FLOATRET_1 | RBM_INTRET_1))
+  #define RBM_PROFILER_TAILCALL_TRASH RBM_PROFILER_LEAVE_TRASH
+
 #else
   // See vm\amd64\asmhelpers.asm for more details.
   #define RBM_STOP_FOR_GC_TRASH (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET))
+
+  #define RBM_PROFILER_ENTER_TRASH  RBM_CALLEE_TRASH
   #define RBM_PROFILER_LEAVE_TRASH  (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET))
+  #define RBM_PROFILER_TAILCALL_TRASH RBM_PROFILER_LEAVE_TRASH
 #endif
 
   // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
@@ -563,4 +565,18 @@
   #define RBM_STACK_PROBE_HELPER_TRASH RBM_RAX
 #endif // !UNIX_AMD64_ABI
 
+#ifdef UNIX_AMD64_ABI
+  #define SWIFT_SUPPORT
+  #define REG_SWIFT_ERROR REG_R12
+  #define RBM_SWIFT_ERROR RBM_R12
+  #define REG_SWIFT_SELF  REG_R13
+  #define RBM_SWIFT_SELF  RBM_R13
+
+  #define REG_SWIFT_INTRET_ORDER REG_RAX,REG_RDX,REG_RCX,REG_R8
+  #define REG_SWIFT_FLOATRET_ORDER REG_XMM0,REG_XMM1,REG_XMM2,REG_XMM3
+  #define REG_SWIFT_ARG_RET_BUFF REG_RAX
+  #define RBM_SWIFT_ARG_RET_BUFF RBM_RAX
+  #define SWIFT_RET_BUFF_ARGNUM  MAX_REG_ARG
+#endif // UNIX_AMD64_ABI
+
 // clang-format on
diff --git a/src/coreclr/jit/targetarm.cpp b/src/coreclr/jit/targetarm.cpp
index 8e117bae8102..037578fa67b8 100644
--- a/src/coreclr/jit/targetarm.cpp
+++ b/src/coreclr/jit/targetarm.cpp
@@ -26,4 +26,186 @@ const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5,
 
 static_assert_no_msg(RBM_ALLDOUBLE == (RBM_ALLDOUBLE_HIGH >> 1));
 
+//-----------------------------------------------------------------------------
+// Arm32Classifier:
+//   Construct a new instance of the arm32 ABI classifier.
+//
+// Parameters:
+//   info - Info about the method being classified.
+//
+Arm32Classifier::Arm32Classifier(const ClassifierInfo& info)
+    : m_info(info)
+{
+}
+
+//-----------------------------------------------------------------------------
+// Classify:
+//   Classify a parameter for the arm32 ABI.
+//
+// Parameters:
+//   comp           - Compiler instance
+//   type           - The type of the parameter
+//   structLayout   - The layout of the struct. Expected to be non-null if
+//                    varTypeIsStruct(type) is true.
+//   wellKnownParam - Well known type of the parameter (if it may affect its ABI classification)
+//
+// Returns:
+//   Classification information for the parameter.
+//
+ABIPassingInformation Arm32Classifier::Classify(Compiler*    comp,
+                                                var_types    type,
+                                                ClassLayout* structLayout,
+                                                WellKnownArg wellKnownParam)
+{
+    if (!comp->opts.compUseSoftFP)
+    {
+        if (varTypeIsStruct(type))
+        {
+            var_types hfaType = comp->GetHfaType(structLayout->GetClassHandle());
+
+            if (hfaType != TYP_UNDEF)
+            {
+                unsigned slots = structLayout->GetSize() / genTypeSize(hfaType);
+                return ClassifyFloat(comp, hfaType, slots);
+            }
+        }
+
+        if (varTypeIsFloating(type))
+        {
+            return ClassifyFloat(comp, type, 1);
+        }
+    }
+
+    unsigned alignment = 4;
+    if ((type == TYP_LONG) || (type == TYP_DOUBLE) ||
+        ((type == TYP_STRUCT) &&
+         (comp->info.compCompHnd->getClassAlignmentRequirement(structLayout->GetClassHandle()) == 8)))
+    {
+        alignment    = 8;
+        m_nextIntReg = roundUp(m_nextIntReg, 2);
+    }
+
+    unsigned size        = type == TYP_STRUCT ? structLayout->GetSize() : genTypeSize(type);
+    unsigned alignedSize = roundUp(size, alignment);
+
+    unsigned numInRegs  = min(alignedSize / 4, 4 - m_nextIntReg);
+    bool     anyOnStack = numInRegs < (alignedSize / 4);
+
+    // If we already passed anything on stack (due to float args) then we
+    // cannot split an arg.
+    if ((numInRegs > 0) && anyOnStack && (m_stackArgSize != 0))
+    {
+        numInRegs = 0;
+    }
+
+    ABIPassingInformation info;
+    info.NumSegments = numInRegs + (anyOnStack ? 1 : 0);
+    info.Segments    = new (comp, CMK_ABI) ABIPassingSegment[info.NumSegments];
+
+    for (unsigned i = 0; i < numInRegs; i++)
+    {
+        unsigned endOffs = min((i + 1) * 4, size);
+        info.Segments[i] =
+            ABIPassingSegment::InRegister(static_cast<regNumber>(static_cast<unsigned>(REG_R0) + m_nextIntReg + i),
+                                          i * 4, endOffs - (i * 4));
+    }
+
+    m_nextIntReg += numInRegs;
+
+    if (anyOnStack)
+    {
+        m_stackArgSize           = roundUp(m_stackArgSize, alignment);
+        unsigned stackSize       = size - (numInRegs * 4);
+        info.Segments[numInRegs] = ABIPassingSegment::OnStack(m_stackArgSize, 0, stackSize);
+        m_stackArgSize += roundUp(stackSize, 4);
+
+        // As soon as any int arg goes on stack we cannot put anything else in
+        // int registers. This situation can happen if an arg would normally be
+        // split but wasn't because a float arg was already passed on stack.
+        m_nextIntReg = 4;
+    }
+
+    return info;
+}
+
+//-----------------------------------------------------------------------------
+// ClassifyFloat:
+//   Classify a parameter that uses float registers.
+//
+// Parameters:
+//   comp     - Compiler instance
+//   type     - The type of the parameter
+//   numElems - Number of elements for the parameter.
+//
+// Returns:
+//   Classification information for the parameter.
+//
+// Remarks:
+//   Float parameters can require multiple registers; the double registers are
+//   overlaid on top of the float registers so that d0 = s0, s1, d1 = s2, s3
+//   etc. This means that allocating a double register automatically makes the
+//   two corresponding float registers unavailable.
+//
+//   The ABI also supports HFAs that similarly require multiple registers for
+//   passing. When multiple registers are required for a single argument they
+//   must always be allocated into consecutive float registers. However,
+//   backfilling is allowed. For example, a signature like
+//   Foo(float x, double y, float z) allocates x in REG_F0 = s0, y in REG_F2 =
+//   d1, z in REG_F1 = s1.
+//
+ABIPassingInformation Arm32Classifier::ClassifyFloat(Compiler* comp, var_types type, unsigned numElems)
+{
+    assert((type == TYP_FLOAT) || (type == TYP_DOUBLE));
+
+    unsigned numConsecutive = type == TYP_FLOAT ? numElems : (numElems * 2);
+
+    // Find the first start index that has a consecutive run of
+    // 'numConsecutive' bits set.
+    unsigned startRegMask = m_floatRegs;
+    for (unsigned i = 1; i < numConsecutive; i++)
+    {
+        startRegMask &= m_floatRegs >> i;
+    }
+
+    // Doubles can only start at even indices.
+    if (type == TYP_DOUBLE)
+    {
+        startRegMask &= 0b0101010101010101;
+    }
+
+    if (startRegMask != 0)
+    {
+        unsigned startRegIndex = BitOperations::TrailingZeroCount(startRegMask);
+        unsigned usedRegsMask  = ((1 << numConsecutive) - 1) << startRegIndex;
+        // First consecutive run of numConsecutive bits start at startRegIndex
+        assert((m_floatRegs & usedRegsMask) == usedRegsMask);
+
+        m_floatRegs ^= usedRegsMask;
+        ABIPassingInformation info;
+        info.NumSegments        = numElems;
+        info.Segments           = new (comp, CMK_ABI) ABIPassingSegment[numElems];
+        unsigned numRegsPerElem = type == TYP_FLOAT ? 1 : 2;
+        for (unsigned i = 0; i < numElems; i++)
+        {
+            regNumber reg = static_cast<regNumber>(static_cast<unsigned>(REG_F0) + startRegIndex + i * numRegsPerElem);
+            info.Segments[i] = ABIPassingSegment::InRegister(reg, i * genTypeSize(type), genTypeSize(type));
+        }
+
+        return info;
+    }
+    else
+    {
+        // As soon as any float arg goes on stack no other float arg can go in a register.
+        m_floatRegs = 0;
+
+        m_stackArgSize = roundUp(m_stackArgSize, genTypeSize(type));
+        ABIPassingInformation info =
+            ABIPassingInformation::FromSegment(comp, ABIPassingSegment::OnStack(m_stackArgSize, 0,
+                                                                                numElems * genTypeSize(type)));
+        m_stackArgSize += numElems * genTypeSize(type);
+
+        return info;
+    }
+}
+
 #endif // TARGET_ARM
diff --git a/src/coreclr/jit/targetarm.h b/src/coreclr/jit/targetarm.h
index ac9d72cab31f..0f56ebe1ce98 100644
--- a/src/coreclr/jit/targetarm.h
+++ b/src/coreclr/jit/targetarm.h
@@ -40,7 +40,6 @@
                                            // need to track stack depth, but this is currently necessary to get GC information reported at call sites.
   #define TARGET_POINTER_SIZE      4       // equal to sizeof(void*) and the managed pointer size in bytes for this target
   #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
-  #define FEATURE_EH_CALLFINALLY_THUNKS 1  // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
   #define ETW_EBP_FRAMED           1       // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods
   #define CSE_CONSTS               1       // Enable if we want to CSE constants
 
@@ -138,8 +137,8 @@
   // ARM write barrier ABI (see vm\arm\asmhelpers.asm, vm\arm\asmhelpers.S):
   // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier):
   //     On entry:
-  //       r0: the destination address (LHS of the assignment)
-  //       r1: the object reference (RHS of the assignment)
+  //       r0: the destination address of the store
+  //       r1: the object reference to be stored
   //     On exit:
   //       r0: trashed
   //       r3: trashed
diff --git a/src/coreclr/jit/targetarm64.cpp b/src/coreclr/jit/targetarm64.cpp
index dcec1db6c522..a0e4dfb5c3cf 100644
--- a/src/coreclr/jit/targetarm64.cpp
+++ b/src/coreclr/jit/targetarm64.cpp
@@ -24,4 +24,178 @@ const regNumber fltArgRegs [] = {REG_V0, REG_V1, REG_V2, REG_V3, REG_V4, REG_V5,
 const regMaskTP fltArgMasks[] = {RBM_V0, RBM_V1, RBM_V2, RBM_V3, RBM_V4, RBM_V5, RBM_V6, RBM_V7 };
 // clang-format on
 
+//-----------------------------------------------------------------------------
+// Arm64Classifier:
+//   Construct a new instance of the ARM64 ABI classifier.
+//
+// Parameters:
+//   info - Info about the method being classified.
+//
+Arm64Classifier::Arm64Classifier(const ClassifierInfo& info)
+    : m_info(info)
+    , m_intRegs(intArgRegs, ArrLen(intArgRegs))
+    , m_floatRegs(fltArgRegs, ArrLen(fltArgRegs))
+{
+}
+
+//-----------------------------------------------------------------------------
+// Classify:
+//   Classify a parameter for the ARM64 ABI.
+//
+// Parameters:
+//   comp           - Compiler instance
+//   type           - The type of the parameter
+//   structLayout   - The layout of the struct. Expected to be non-null if
+//                    varTypeIsStruct(type) is true.
+//   wellKnownParam - Well known type of the parameter (if it may affect its ABI classification)
+//
+// Returns:
+//   Classification information for the parameter.
+//
+ABIPassingInformation Arm64Classifier::Classify(Compiler*    comp,
+                                                var_types    type,
+                                                ClassLayout* structLayout,
+                                                WellKnownArg wellKnownParam)
+{
+    if ((wellKnownParam == WellKnownArg::RetBuffer) && hasFixedRetBuffReg(m_info.CallConv))
+    {
+        return ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(REG_ARG_RET_BUFF, 0,
+                                                                                      TARGET_POINTER_SIZE));
+    }
+
+    // First handle HFA/HVAs. These are allowed to be passed in more registers
+    // than other structures.
+    if (varTypeIsStruct(type) && !m_info.IsVarArgs)
+    {
+        var_types hfaType = comp->GetHfaType(structLayout->GetClassHandle());
+
+        if (hfaType != TYP_UNDEF)
+        {
+            unsigned              elemSize = genTypeSize(hfaType);
+            unsigned              slots    = structLayout->GetSize() / elemSize;
+            ABIPassingInformation info;
+            if (m_floatRegs.Count() >= slots)
+            {
+                info.NumSegments = slots;
+                info.Segments    = new (comp, CMK_ABI) ABIPassingSegment[slots];
+
+                for (unsigned i = 0; i < slots; i++)
+                {
+                    info.Segments[i] = ABIPassingSegment::InRegister(m_floatRegs.Dequeue(), i * elemSize, elemSize);
+                }
+            }
+            else
+            {
+                unsigned alignment =
+                    compAppleArm64Abi() ? min(elemSize, (unsigned)TARGET_POINTER_SIZE) : TARGET_POINTER_SIZE;
+                m_stackArgSize = roundUp(m_stackArgSize, alignment);
+                info           = ABIPassingInformation::FromSegment(comp, ABIPassingSegment::OnStack(m_stackArgSize, 0,
+                                                                                                     structLayout->GetSize()));
+                m_stackArgSize += roundUp(structLayout->GetSize(), alignment);
+                // After passing any float value on the stack, we should not enregister more float values.
+                m_floatRegs.Clear();
+            }
+
+            return info;
+        }
+    }
+
+    unsigned slots;
+    unsigned passedSize;
+    if (varTypeIsStruct(type))
+    {
+        unsigned size = structLayout->GetSize();
+        if (size > 16)
+        {
+            slots      = 1; // Passed by implicit byref
+            passedSize = TARGET_POINTER_SIZE;
+        }
+        else
+        {
+            slots      = (size + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE;
+            passedSize = size;
+        }
+    }
+    else
+    {
+        assert(genTypeSize(type) <= TARGET_POINTER_SIZE);
+        slots      = 1;
+        passedSize = genTypeSize(type);
+    }
+
+    assert((slots == 1) || (slots == 2));
+
+    ABIPassingInformation info;
+    if (m_info.IsVarArgs && (slots == 2) && (m_intRegs.Count() == 1))
+    {
+        // For varargs we split structs between register and stack in this
+        // case. Normally a struct that does not fit in registers will always
+        // be passed on stack.
+        assert(compFeatureArgSplit());
+        info.NumSegments = 2;
+        info.Segments    = new (comp, CMK_ABI) ABIPassingSegment[2];
+        info.Segments[0] = ABIPassingSegment::InRegister(m_intRegs.Dequeue(), 0, TARGET_POINTER_SIZE);
+        info.Segments[1] = ABIPassingSegment::OnStack(m_stackArgSize, TARGET_POINTER_SIZE,
+                                                      structLayout->GetSize() - TARGET_POINTER_SIZE);
+        m_stackArgSize += TARGET_POINTER_SIZE;
+    }
+    else
+    {
+        RegisterQueue* regs = &m_intRegs;
+
+        // In varargs methods (only supported on Windows) all parameters go in
+        // integer registers.
+        if (varTypeUsesFloatArgReg(type) && !m_info.IsVarArgs)
+        {
+            regs = &m_floatRegs;
+        }
+
+        if (regs->Count() >= slots)
+        {
+            info.NumSegments  = slots;
+            info.Segments     = new (comp, CMK_ABI) ABIPassingSegment[slots];
+            unsigned slotSize = varTypeIsStruct(type) ? TARGET_POINTER_SIZE : genTypeSize(type);
+            info.Segments[0]  = ABIPassingSegment::InRegister(regs->Dequeue(), 0, slotSize);
+            if (slots == 2)
+            {
+                assert(varTypeIsStruct(type));
+                unsigned tailSize = structLayout->GetSize() - slotSize;
+                info.Segments[1]  = ABIPassingSegment::InRegister(regs->Dequeue(), slotSize, tailSize);
+            }
+        }
+        else
+        {
+            unsigned alignment;
+            if (compAppleArm64Abi())
+            {
+                if (varTypeIsStruct(type))
+                {
+                    alignment = TARGET_POINTER_SIZE;
+                }
+                else
+                {
+                    alignment = genTypeSize(type);
+                }
+
+                m_stackArgSize = roundUp(m_stackArgSize, alignment);
+            }
+            else
+            {
+                alignment = TARGET_POINTER_SIZE;
+                assert((m_stackArgSize % TARGET_POINTER_SIZE) == 0);
+            }
+
+            info = ABIPassingInformation::FromSegment(comp, ABIPassingSegment::OnStack(m_stackArgSize, 0, passedSize));
+
+            m_stackArgSize += roundUp(passedSize, alignment);
+
+            // As soon as we pass something on stack we cannot go back and
+            // enregister something else.
+            regs->Clear();
+        }
+    }
+
+    return info;
+}
+
 #endif // TARGET_ARM64
diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h
index 3646ecb4407b..6d33d378bcd9 100644
--- a/src/coreclr/jit/targetarm64.h
+++ b/src/coreclr/jit/targetarm64.h
@@ -42,7 +42,6 @@
                                            // need to track stack depth, but this is currently necessary to get GC information reported at call sites.
   #define TARGET_POINTER_SIZE      8       // equal to sizeof(void*) and the managed pointer size in bytes for this target
   #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
-  #define FEATURE_EH_CALLFINALLY_THUNKS 1  // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
   #define ETW_EBP_FRAMED           1       // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods
   #define CSE_CONSTS               1       // Enable if we want to CSE constants
 
@@ -56,6 +55,9 @@
   #define REG_PREDICATE_HIGH_FIRST REG_P8  // Similarly, some instructions can only use the second half of the predicate registers.
   #define REG_PREDICATE_HIGH_LAST  REG_P15
 
+  #define REG_MASK_FIRST           REG_PREDICATE_FIRST
+  #define REG_MASK_LAST            REG_PREDICATE_LAST
+
   static_assert_no_msg(REG_PREDICATE_HIGH_LAST == REG_PREDICATE_LAST);
 
   #define REGNUM_BITS              6       // number of bits in a REG_*
@@ -140,11 +142,19 @@
   #define REG_JUMP_THUNK_PARAM     REG_R12
   #define RBM_JUMP_THUNK_PARAM     RBM_R12
 
+  #define RBM_LOWMASK              (RBM_P0 | RBM_P1 | RBM_P2 | RBM_P3 | RBM_P4 | RBM_P5 | RBM_P6 | RBM_P7)
+  #define RBM_HIGHMASK             (RBM_P8 | RBM_P9 | RBM_P10 | RBM_P11 | RBM_P12 | RBM_P13 | RBM_P14 | RBM_P15)
+  #define RBM_ALLMASK              (RBM_LOWMASK | RBM_HIGHMASK)
+
+  // TODO-SVE: Fix when adding predicate register allocation
+  #define RBM_MSK_CALLEE_SAVED    (0)
+  #define RBM_MSK_CALLEE_TRASH    (0)
+
   // ARM64 write barrier ABI (see vm\arm64\asmhelpers.asm, vm\arm64\asmhelpers.S):
   // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier):
   //     On entry:
-  //       x14: the destination address (LHS of the assignment)
-  //       x15: the object reference (RHS of the assignment)
+  //       x14: the destination address of the store
+  //       x15: the object reference to be stored
   //     On exit:
   //       x12: trashed
   //       x14: incremented by 8
@@ -370,4 +380,12 @@
   #define REG_ZERO_INIT_FRAME_REG2 REG_R10
   #define REG_ZERO_INIT_FRAME_SIMD REG_V16
 
+  #define SWIFT_SUPPORT
+  #define REG_SWIFT_ERROR REG_R21
+  #define RBM_SWIFT_ERROR RBM_R21
+  #define REG_SWIFT_SELF  REG_R20
+  #define RBM_SWIFT_SELF  RBM_R20
+  #define REG_SWIFT_INTRET_ORDER REG_R0,REG_R1,REG_R2,REG_R3
+  #define REG_SWIFT_FLOATRET_ORDER REG_V0,REG_V1,REG_V2,REG_V3
+
 // clang-format on
diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h
index 736fd1406c30..d27bffa3aa69 100644
--- a/src/coreclr/jit/targetloongarch64.h
+++ b/src/coreclr/jit/targetloongarch64.h
@@ -47,8 +47,6 @@
                                            // need to track stack depth, but this is currently necessary to get GC information reported at call sites.
   #define TARGET_POINTER_SIZE      8       // equal to sizeof(void*) and the managed pointer size in bytes for this target
   #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
-  #define FEATURE_EH_FUNCLETS      1
-  #define FEATURE_EH_CALLFINALLY_THUNKS 1  // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
   #define ETW_EBP_FRAMED           1       // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods
   #define CSE_CONSTS               1       // Enable if we want to CSE constants
 
@@ -132,8 +130,8 @@
   // LOONGARCH64 write barrier ABI (see vm/loongarch64/asmhelpers.S):
   // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier):
   //     On entry:
-  //       t6: the destination address (LHS of the assignment)
-  //       t7: the object reference (RHS of the assignment)
+  //       t6: the destination address of the store
+  //       t7: the object reference to be stored
   //     On exit:
   //       t0: trashed
   //       t1: trashed
diff --git a/src/coreclr/jit/targetriscv64.h b/src/coreclr/jit/targetriscv64.h
index 9cf0185a5693..33c1b0d49190 100644
--- a/src/coreclr/jit/targetriscv64.h
+++ b/src/coreclr/jit/targetriscv64.h
@@ -12,8 +12,6 @@
   #define ROUND_FLOAT              0       // Do not round intermed float expression results
   #define CPU_HAS_BYTE_REGS        0
 
-  #define CPBLK_UNROLL_LIMIT       64     // Upper bound to let the code generator to loop unroll CpBlk
-  #define INITBLK_UNROLL_LIMIT     64     // Upper bound to let the code generator to loop unroll InitBlk
 
 #ifdef FEATURE_SIMD
 #pragma error("SIMD Unimplemented yet RISCV64")
@@ -44,7 +42,6 @@
                                            // need to track stack depth, but this is currently necessary to get GC information reported at call sites.
   #define TARGET_POINTER_SIZE      8       // equal to sizeof(void*) and the managed pointer size in bytes for this target
   #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
-  #define FEATURE_EH_CALLFINALLY_THUNKS 1  // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
   #define ETW_EBP_FRAMED           1       // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods
   #define CSE_CONSTS               1       // Enable if we want to CSE constants
 
diff --git a/src/coreclr/jit/targetwasm.h b/src/coreclr/jit/targetwasm.h
index eb0231b70e3a..fa99a91c1b7d 100644
--- a/src/coreclr/jit/targetwasm.h
+++ b/src/coreclr/jit/targetwasm.h
@@ -64,6 +64,9 @@
   #define STACK_ALIGN              16      // stack alignment requirement
   #define STACK_ALIGN_SHIFT        4       // Shift-right amount to convert size in bytes to size in STACK_ALIGN units == log2(STACK_ALIGN)
 
+  #define RBM_INT_CALLEE_SAVED     RBM_NONE
+  #define RBM_FLT_CALLEE_SAVED     RBM_NONE
+
   #define RBM_CALLEE_SAVED         RBM_R0
   #define RBM_CALLEE_TRASH         RBM_NONE
 
@@ -119,4 +122,5 @@
 
   #define RBM_ARG_REGS             RBM_R0
   #define RBM_FLTARG_REGS          RBM_F0
-// clang-format on
+
+  // clang-format on
diff --git a/src/coreclr/jit/targetx86.cpp b/src/coreclr/jit/targetx86.cpp
index d5ed8b0bbf60..5c2702d47288 100644
--- a/src/coreclr/jit/targetx86.cpp
+++ b/src/coreclr/jit/targetx86.cpp
@@ -21,4 +21,103 @@ const regNumber intArgRegs [] = {REG_ECX, REG_EDX};
 const regMaskTP intArgMasks[] = {RBM_ECX, RBM_EDX};
 // clang-format on
 
+//-----------------------------------------------------------------------------
+// X86Classifier:
+//   Construct a new instance of the x86 ABI classifier.
+//
+// Parameters:
+//   info - Info about the method being classified.
+//
+X86Classifier::X86Classifier(const ClassifierInfo& info)
+    : m_regs(nullptr, 0)
+{
+    switch (info.CallConv)
+    {
+        case CorInfoCallConvExtension::Thiscall:
+        {
+            static const regNumber thiscallRegs[] = {REG_ECX};
+            m_regs                                = RegisterQueue(thiscallRegs, ArrLen(thiscallRegs));
+            break;
+        }
+        case CorInfoCallConvExtension::C:
+        case CorInfoCallConvExtension::Stdcall:
+        case CorInfoCallConvExtension::CMemberFunction:
+        case CorInfoCallConvExtension::StdcallMemberFunction:
+        {
+            break;
+        }
+        default:
+        {
+            unsigned numRegs = ArrLen(intArgRegs);
+            if (info.IsVarArgs)
+            {
+                // In varargs methods we only enregister the this pointer or retbuff.
+                numRegs = info.HasThis || info.HasRetBuff ? 1 : 0;
+            }
+            m_regs = RegisterQueue(intArgRegs, numRegs);
+            break;
+        }
+    }
+}
+
+//-----------------------------------------------------------------------------
+// Classify:
+//   Classify a parameter for the x86 ABI.
+//
+// Parameters:
+//   comp           - Compiler instance
+//   type           - The type of the parameter
+//   structLayout   - The layout of the struct. Expected to be non-null if
+//                    varTypeIsStruct(type) is true.
+//   wellKnownParam - Well known type of the parameter (if it may affect its ABI classification)
+//
+// Returns:
+//   Classification information for the parameter.
+//
+ABIPassingInformation X86Classifier::Classify(Compiler*    comp,
+                                              var_types    type,
+                                              ClassLayout* structLayout,
+                                              WellKnownArg wellKnownParam)
+{
+    unsigned size     = type == TYP_STRUCT ? structLayout->GetSize() : genTypeSize(type);
+    unsigned numSlots = (size + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE;
+
+    bool canEnreg = false;
+    if (m_regs.Count() >= numSlots)
+    {
+        switch (type)
+        {
+            case TYP_BYTE:
+            case TYP_UBYTE:
+            case TYP_SHORT:
+            case TYP_USHORT:
+            case TYP_INT:
+            case TYP_REF:
+            case TYP_BYREF:
+                canEnreg = true;
+                break;
+            case TYP_STRUCT:
+                canEnreg = comp->isTrivialPointerSizedStruct(structLayout->GetClassHandle());
+                break;
+            default:
+                break;
+        }
+    }
+
+    ABIPassingSegment segment;
+    if (canEnreg)
+    {
+        assert(numSlots == 1);
+        segment = ABIPassingSegment::InRegister(m_regs.Dequeue(), 0, size);
+    }
+    else
+    {
+        assert((m_stackArgSize % TARGET_POINTER_SIZE) == 0);
+        segment = ABIPassingSegment::OnStack(m_stackArgSize, 0, size);
+        m_stackArgSize += roundUp(size, TARGET_POINTER_SIZE);
+    }
+
+    return ABIPassingInformation::FromSegment(comp, segment);
+}
+
 #endif // TARGET_X86
diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h
index 60b2f7793f43..dfeb96ae9e97 100644
--- a/src/coreclr/jit/targetx86.h
+++ b/src/coreclr/jit/targetx86.h
@@ -53,9 +53,9 @@
                                            // target
   #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter,
                                            // filter-handler, fault) and directly execute 'finally' clauses.
-
-  #define FEATURE_EH_CALLFINALLY_THUNKS 0  // Generate call-to-finally code in "thunks" in the enclosing EH region,
-                                           // protected by "cloned finally" clauses.
+#if !defined(UNIX_X86_ABI)
+  #define FEATURE_EH_WINDOWS_X86   1       // Enable support for SEH regions
+#endif
   #define ETW_EBP_FRAMED           1       // if 1 we cannot use EBP as a scratch register and must create EBP based
                                            // frames for most methods
   #define CSE_CONSTS               1       // Enable if we want to CSE constants
@@ -227,7 +227,6 @@
   // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
   // Note that x86 normally emits an optimized (source-register-specific) write barrier, but can emit
   // a call to a "general" write barrier.
-  CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
   #define RBM_CALLEE_TRASH_WRITEBARRIER         (RBM_EAX | RBM_EDX)
diff --git a/src/coreclr/jit/tinyarray.h b/src/coreclr/jit/tinyarray.h
deleted file mode 100644
index 36cd462a7861..000000000000
--- a/src/coreclr/jit/tinyarray.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#ifndef TINYARRAY_H
-#define TINYARRAY_H
-
-/*****************************************************************************/
-
-// This is an array packed into some kind of integral data type
-// storagetype is the type (integral) which your array is going to be packed into
-// itemtype is the type of array elements
-// bits_per_element is size of the elements in bits
-template <class storageType, class itemType, int bits_per_element>
-class TinyArray
-{
-public:
-    // operator[] returns a 'ref' (usually a ref to the element type)
-    // This presents a problem if you wanted to implement something like a
-    // bitvector via this packed array, because you cannot make a ref to
-    // the element type.
-    //    The trick is you define something that acts like a ref (TinyArrayRef in this case)
-    // which for our purposes means you can assign to and from it and our chosen
-    // element type.
-    class TinyArrayRef
-    {
-    public:
-        // this is really the getter for the array.
-        operator itemType()
-        {
-            storageType mask  = ((1 << bits_per_element) - 1);
-            int         shift = bits_per_element * index;
-
-            itemType result = (itemType)((*data >> shift) & mask);
-            return result;
-        }
-
-        void operator=(const itemType b)
-        {
-            storageType mask = ((1 << bits_per_element) - 1);
-            assert(itemType(b & mask) == b);
-
-            mask <<= bits_per_element * index;
-
-            *data &= ~mask;
-            *data |= b << (bits_per_element * index);
-        }
-        friend class TinyArray;
-
-    protected:
-        TinyArrayRef(storageType* d, int idx) : data(d), index(idx)
-        {
-        }
-
-        storageType* data;
-        int          index;
-    };
-
-    storageType data;
-
-    void clear()
-    {
-        data = 0;
-    }
-
-    TinyArrayRef operator[](unsigned int n)
-    {
-        assert((n + 1) * bits_per_element <= sizeof(storageType) * 8);
-        return TinyArrayRef(&data, n);
-    }
-    // only use this for clearing it
-    void operator=(void* rhs)
-    {
-        assert(rhs == nullptr);
-        data = 0;
-    }
-};
-
-#endif // TINYARRAY_H
diff --git a/src/coreclr/jit/treelifeupdater.cpp b/src/coreclr/jit/treelifeupdater.cpp
index 536454d18c19..4fc5a283f4a7 100644
--- a/src/coreclr/jit/treelifeupdater.cpp
+++ b/src/coreclr/jit/treelifeupdater.cpp
@@ -349,7 +349,7 @@ void TreeLifeUpdater<ForCodeGen>::UpdateLifeBit(VARSET_TP& set, LclVarDsc* dsc,
 // can be dumped after potential updates.
 //
 template <bool ForCodeGen>
-void           TreeLifeUpdater<ForCodeGen>::StoreCurrentLifeForDump()
+void TreeLifeUpdater<ForCodeGen>::StoreCurrentLifeForDump()
 {
 #ifdef DEBUG
     if (compiler->verbose)
diff --git a/src/coreclr/jit/typelist.h b/src/coreclr/jit/typelist.h
index 8b8da6db011f..bf5acb5ee014 100644
--- a/src/coreclr/jit/typelist.h
+++ b/src/coreclr/jit/typelist.h
@@ -4,7 +4,7 @@
 #define GCS EA_GCREF
 #define BRS EA_BYREF
 #define EPS EA_PTRSIZE
-#define PS TARGET_POINTER_SIZE
+#define PS  TARGET_POINTER_SIZE
 #define PST (TARGET_POINTER_SIZE / sizeof(int))
 
 #ifdef TARGET_64BIT
@@ -63,8 +63,10 @@ DEF_TP(SIMD16   ,"simd16" , TYP_SIMD16,  16,16, 16,   4,16, VTR_FLOAT, available
 #if defined(TARGET_XARCH)
 DEF_TP(SIMD32   ,"simd32" , TYP_SIMD32,  32,32, 32,   8,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED,    RBM_FLT_CALLEE_TRASH,    VTF_S|VTF_VEC)
 DEF_TP(SIMD64   ,"simd64" , TYP_SIMD64,  64,64, 64,  16,16, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED,    RBM_FLT_CALLEE_TRASH,    VTF_S|VTF_VEC)
-DEF_TP(MASK     ,"mask"   , TYP_MASK,     8, 8,  8,   2, 8, VTR_MASK,  availableMaskRegs,   RBM_MSK_CALLEE_SAVED,    RBM_MSK_CALLEE_TRASH,    VTF_S)
 #endif // TARGET_XARCH
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+DEF_TP(MASK     ,"mask"   , TYP_MASK,     8, 8,  8,   2, 8, VTR_MASK,  availableMaskRegs,   RBM_MSK_CALLEE_SAVED,    RBM_MSK_CALLEE_TRASH,    VTF_S)
+#endif // TARGET_XARCH || TARGET_ARM64
 #endif // FEATURE_SIMD
 
 DEF_TP(UNKNOWN ,"unknown" ,TYP_UNKNOWN,  0,  0,  0,   0, 0, VTR_INT,   availableIntRegs,    RBM_INT_CALLEE_SAVED,    RBM_INT_CALLEE_TRASH,    VTF_ANY)
diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp
index 4bf97a90d251..3926387d1b08 100644
--- a/src/coreclr/jit/unwind.cpp
+++ b/src/coreclr/jit/unwind.cpp
@@ -16,8 +16,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #endif
 
 #ifndef TARGET_WASM
-#if defined(FEATURE_EH_FUNCLETS)
-
 //------------------------------------------------------------------------
 // Compiler::unwindGetFuncLocations: Get the start/end emitter locations for this
 // function or funclet. If 'getHotSectionData' is true, get the start/end locations
@@ -54,6 +52,8 @@ void Compiler::unwindGetFuncLocations(FuncInfoDsc*             func,
                                       /* OUT */ emitLocation** ppStartLoc,
                                       /* OUT */ emitLocation** ppEndLoc)
 {
+    assert(UsesFunclets());
+
     if (func->funKind == FUNC_ROOT)
     {
         // Since all funclets are pulled out of line, the main code size is everything
@@ -129,13 +129,11 @@ void Compiler::unwindGetFuncLocations(FuncInfoDsc*             func,
             assert(func->funKind == FUNC_HANDLER);
             *ppStartLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(HBtab->ebdHndBeg));
             *ppEndLoc   = HBtab->ebdHndLast->IsLast() ? nullptr
-                                                    : new (this, CMK_UnwindInfo)
+                                                      : new (this, CMK_UnwindInfo)
                                                           emitLocation(ehEmitCookie(HBtab->ebdHndLast->Next()));
         }
     }
 }
-
-#endif // FEATURE_EH_FUNCLETS
 #endif // !TARGET_WASM
 
 #if defined(FEATURE_CFI_SUPPORT)
@@ -186,21 +184,22 @@ void Compiler::unwindBegPrologCFI()
 {
     assert(compGeneratingProlog);
 
-#if defined(FEATURE_EH_FUNCLETS)
-    FuncInfoDsc* func = funCurrentFunc();
+    if (UsesFunclets())
+    {
+        FuncInfoDsc* func = funCurrentFunc();
 
-    // There is only one prolog for a function/funclet, and it comes first. So now is
-    // a good time to initialize all the unwind data structures.
+        // There is only one prolog for a function/funclet, and it comes first. So now is
+        // a good time to initialize all the unwind data structures.
 
-    unwindGetFuncLocations(func, true, &func->startLoc, &func->endLoc);
+        unwindGetFuncLocations(func, true, &func->startLoc, &func->endLoc);
 
-    if (fgFirstColdBlock != nullptr)
-    {
-        unwindGetFuncLocations(func, false, &func->coldStartLoc, &func->coldEndLoc);
-    }
+        if (fgFirstColdBlock != nullptr)
+        {
+            unwindGetFuncLocations(func, false, &func->coldStartLoc, &func->coldEndLoc);
+        }
 
-    func->cfiCodes = new (getAllocator(CMK_UnwindInfo)) CFICodeVector(getAllocator());
-#endif // FEATURE_EH_FUNCLETS
+        func->cfiCodes = new (getAllocator(CMK_UnwindInfo)) CFICodeVector(getAllocator());
+    }
 }
 
 void Compiler::unwindPushPopMaskCFI(regMaskTP regMask, bool isFloat)
diff --git a/src/coreclr/jit/unwind.h b/src/coreclr/jit/unwind.h
index 4d1b540f0606..8b7fcaa5a103 100644
--- a/src/coreclr/jit/unwind.h
+++ b/src/coreclr/jit/unwind.h
@@ -21,46 +21,51 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #if defined(TARGET_ARM)
 const unsigned MAX_PROLOG_SIZE_BYTES = 44;
 const unsigned MAX_EPILOG_SIZE_BYTES = 44;
-#define UWC_END 0xFF // "end" unwind code
+#define UWC_END                    0xFF // "end" unwind code
 #define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 19)
-#define UW_MAX_CODE_WORDS_COUNT 15      // Max number that can be encoded in the "Code Words" field of the .pdata record
-#define UW_MAX_EPILOG_START_INDEX 0xFFU // Max number that can be encoded in the "Epilog Start Index" field
-                                        // of the .pdata record
+#define UW_MAX_CODE_WORDS_COUNT    15 // Max number that can be encoded in the "Code Words" field of the .pdata record
+#define UW_MAX_EPILOG_START_INDEX                                                                                      \
+    0xFFU // Max number that can be encoded in the "Epilog Start Index" field
+          // of the .pdata record
 #elif defined(TARGET_ARM64)
 const unsigned MAX_PROLOG_SIZE_BYTES = 100;
 const unsigned MAX_EPILOG_SIZE_BYTES = 100;
-#define UWC_END 0xE4   // "end" unwind code
-#define UWC_END_C 0xE5 // "end_c" unwind code
+#define UWC_END                    0xE4 // "end" unwind code
+#define UWC_END_C                  0xE5 // "end_c" unwind code
 #define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20)
-#define UW_MAX_CODE_WORDS_COUNT 31
-#define UW_MAX_EPILOG_START_INDEX 0x3FFU
+#define UW_MAX_CODE_WORDS_COUNT    31
+#define UW_MAX_EPILOG_START_INDEX  0x3FFU
 #elif defined(TARGET_LOONGARCH64)
 const unsigned MAX_PROLOG_SIZE_BYTES = 200;
 const unsigned MAX_EPILOG_SIZE_BYTES = 200;
-#define UWC_END 0xE4   // "end" unwind code
-#define UWC_END_C 0xE5 // "end_c" unwind code
+#define UWC_END                    0xE4 // "end" unwind code
+#define UWC_END_C                  0xE5 // "end_c" unwind code
 #define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20)
-#define UW_MAX_CODE_WORDS_COUNT 31
-#define UW_MAX_EPILOG_START_INDEX 0x3FFU
+#define UW_MAX_CODE_WORDS_COUNT    31
+#define UW_MAX_EPILOG_START_INDEX  0x3FFU
 #elif defined(TARGET_RISCV64)
 const unsigned MAX_PROLOG_SIZE_BYTES = 200;
 const unsigned MAX_EPILOG_SIZE_BYTES = 200;
-#define UWC_END 0xE4   // "end" unwind code
-#define UWC_END_C 0xE5 // "end_c" unwind code
+#define UWC_END                    0xE4 // "end" unwind code
+#define UWC_END_C                  0xE5 // "end_c" unwind code
 #define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20)
-#define UW_MAX_CODE_WORDS_COUNT 31
-#define UW_MAX_EPILOG_START_INDEX 0x3FFU
+#define UW_MAX_CODE_WORDS_COUNT    31
+#define UW_MAX_EPILOG_START_INDEX  0x3FFU
 
 #endif // TARGET_RISCV64
 
-#define UW_MAX_EPILOG_COUNT 31                 // Max number that can be encoded in the "Epilog count" field
-                                               // of the .pdata record
-#define UW_MAX_EXTENDED_CODE_WORDS_COUNT 0xFFU // Max number that can be encoded in the "Extended Code Words"
-                                               // field of the .pdata record
-#define UW_MAX_EXTENDED_EPILOG_COUNT 0xFFFFU   // Max number that can be encoded in the "Extended Epilog Count"
-                                               // field of the .pdata record
-#define UW_MAX_EPILOG_START_OFFSET 0x3FFFFU    // Max number that can be encoded in the "Epilog Start Offset"
-                                               // field of the .pdata record
+#define UW_MAX_EPILOG_COUNT                                                                                            \
+    31 // Max number that can be encoded in the "Epilog count" field
+       // of the .pdata record
+#define UW_MAX_EXTENDED_CODE_WORDS_COUNT                                                                               \
+    0xFFU // Max number that can be encoded in the "Extended Code Words"
+          // field of the .pdata record
+#define UW_MAX_EXTENDED_EPILOG_COUNT                                                                                   \
+    0xFFFFU // Max number that can be encoded in the "Extended Epilog Count"
+            // field of the .pdata record
+#define UW_MAX_EPILOG_START_OFFSET                                                                                     \
+    0x3FFFFU // Max number that can be encoded in the "Epilog Start Offset"
+             // field of the .pdata record
 
 //
 // Forward declaration of class defined in emit.h
@@ -85,7 +90,8 @@ class UnwindInfo;
 class UnwindBase
 {
 protected:
-    UnwindBase(Compiler* comp) : uwiComp(comp)
+    UnwindBase(Compiler* comp)
+        : uwiComp(comp)
     {
     }
 
@@ -107,9 +113,9 @@ class UnwindCodesBase
 public:
     // Add a single unwind code.
 
-    virtual void AddCode(BYTE b1) = 0;
-    virtual void AddCode(BYTE b1, BYTE b2) = 0;
-    virtual void AddCode(BYTE b1, BYTE b2, BYTE b3) = 0;
+    virtual void AddCode(BYTE b1)                            = 0;
+    virtual void AddCode(BYTE b1, BYTE b2)                   = 0;
+    virtual void AddCode(BYTE b1, BYTE b2, BYTE b3)          = 0;
     virtual void AddCode(BYTE b1, BYTE b2, BYTE b3, BYTE b4) = 0;
 
     // Get access to the unwind codes
@@ -139,7 +145,9 @@ class UnwindCodesBase
 // information for a function, including unwind info header, the prolog codes,
 // and any epilog codes.
 
-class UnwindPrologCodes : public UnwindBase, public UnwindCodesBase
+class UnwindPrologCodes
+    : public UnwindBase
+    , public UnwindCodesBase
 {
     // UPC_LOCAL_COUNT is the amount of memory local to this class. For ARM CoreLib, the maximum size is 34.
     // Here is a histogram of other interesting sizes:
@@ -303,7 +311,9 @@ class UnwindPrologCodes : public UnwindBase, public UnwindCodesBase
 // Epilog unwind codes arrive in the order they will be emitted. Store them as an array,
 // adding new ones to the end of the array.
 
-class UnwindEpilogCodes : public UnwindBase, public UnwindCodesBase
+class UnwindEpilogCodes
+    : public UnwindBase
+    , public UnwindCodesBase
 {
     // UEC_LOCAL_COUNT is the amount of memory local to this class. For ARM CoreLib, the maximum size is 6,
     // while 89% of epilogs fit in 4. So, set it to 4 to maintain array alignment and hit most cases.
diff --git a/src/coreclr/jit/unwindamd64.cpp b/src/coreclr/jit/unwindamd64.cpp
index 549c4e991056..e42a4368581f 100644
--- a/src/coreclr/jit/unwindamd64.cpp
+++ b/src/coreclr/jit/unwindamd64.cpp
@@ -199,7 +199,7 @@ void Compiler::unwindPushWindows(regNumber reg)
         // since it is pushed as a frame register.
         || (reg == REG_FPBASE)
 #endif // ETW_EBP_FRAMED
-            )
+    )
     {
         code->UnwindOp = UWOP_PUSH_NONVOL;
         code->OpInfo   = (BYTE)reg;
diff --git a/src/coreclr/jit/unwindarm64.cpp b/src/coreclr/jit/unwindarm64.cpp
index 0725eb41dfdb..f842737171c0 100644
--- a/src/coreclr/jit/unwindarm64.cpp
+++ b/src/coreclr/jit/unwindarm64.cpp
@@ -461,8 +461,8 @@ void Compiler::unwindSaveRegPairPreindexed(regNumber reg1, regNumber reg2, int o
 
         pu->AddCode(0x80 | (BYTE)z);
     }
-    else if ((reg1 == REG_R19) &&
-             (-256 <= offset)) // If the offset is between -512 and -256, we use the save_regp_x unwind code.
+    else if ((reg1 == REG_R19) && (-256 <= offset)) // If the offset is between -512 and -256, we use the save_regp_x
+                                                    // unwind code.
     {
         // save_r19r20_x: 001zzzzz: save <r19,r20> pair at [sp-#Z*8]!, pre-indexed offset >= -248
         // NOTE: I'm not sure why we allow Z==0 here; seems useless, and the calculation of offset is different from the
@@ -758,7 +758,7 @@ void DumpUnwindInfo(Compiler*         comp,
     // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end
     // to provide padding, and round down to get a multiple of 4 bytes in size.
     DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader;
-    DWORD dw;
+    DWORD            dw;
 
     dw = *pdw++;
 
diff --git a/src/coreclr/jit/unwindarmarch.cpp b/src/coreclr/jit/unwindarmarch.cpp
index 445b2581ca0a..51af7f24889d 100644
--- a/src/coreclr/jit/unwindarmarch.cpp
+++ b/src/coreclr/jit/unwindarmarch.cpp
@@ -243,9 +243,8 @@ void Compiler::unwindPushPopMaskInt(regMaskTP maskInt, bool useOpsize16)
     }
     else
     {
-        assert((maskInt &
-                ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 |
-                  RBM_R11 | RBM_R12 | RBM_LR)) == 0);
+        assert((maskInt & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 |
+                            RBM_R10 | RBM_R11 | RBM_R12 | RBM_LR)) == 0);
 
         bool shortFormat = false;
         BYTE val         = 0;
@@ -321,9 +320,8 @@ void Compiler::unwindPushPopMaskFloat(regMaskTP maskFloat)
 void Compiler::unwindPushMaskInt(regMaskTP maskInt)
 {
     // Only r0-r12 and lr are supported
-    assert((maskInt &
-            ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 |
-              RBM_R11 | RBM_R12 | RBM_LR)) == 0);
+    assert((maskInt & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 |
+                        RBM_R10 | RBM_R11 | RBM_R12 | RBM_LR)) == 0);
 
 #if defined(FEATURE_CFI_SUPPORT)
     if (generateCFIUnwindCodes())
@@ -364,9 +362,8 @@ void Compiler::unwindPopMaskInt(regMaskTP maskInt)
 #endif // FEATURE_CFI_SUPPORT
 
     // Only r0-r12 and lr and pc are supported (pc is mapped to lr when encoding)
-    assert((maskInt &
-            ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 |
-              RBM_R11 | RBM_R12 | RBM_LR | RBM_PC)) == 0);
+    assert((maskInt & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 |
+                        RBM_R10 | RBM_R11 | RBM_R12 | RBM_LR | RBM_PC)) == 0);
 
     bool useOpsize16 = ((maskInt & (RBM_LOW_REGS | RBM_PC)) == maskInt); // Can POP use the 16-bit encoding?
 
@@ -574,7 +571,6 @@ void Compiler::unwindReserveFunc(FuncInfoDsc* func)
     }
 #endif // DEBUG
 
-#ifdef FEATURE_EH_FUNCLETS
     // If hot/cold splitting occurred at fgFirstFuncletBB, then the main body is not split.
     const bool splitAtFirstFunclet = (funcHasColdSection && (fgFirstColdBlock == fgFirstFuncletBB));
 
@@ -582,7 +578,6 @@ void Compiler::unwindReserveFunc(FuncInfoDsc* func)
     {
         funcHasColdSection = false;
     }
-#endif // FEATURE_EH_FUNCLETS
 
 #if defined(FEATURE_CFI_SUPPORT)
     if (generateCFIUnwindCodes())
@@ -721,8 +716,8 @@ unsigned GetOpcodeSizeFromUnwindHeader(BYTE b1)
     };
 
     BYTE opsize = s_UnwindOpsize[b1];
-    assert(opsize == 2 ||
-           opsize == 4); // We shouldn't get a code with no opsize (the 0xFF end code is handled specially)
+    assert(opsize == 2 || opsize == 4); // We shouldn't get a code with no opsize (the 0xFF end code is handled
+                                        // specially)
     return opsize;
 }
 
@@ -850,7 +845,6 @@ void UnwindPrologCodes::SetFinalSize(int headerBytes, int epilogBytes)
                   &upcMem[upcCodeSlot], prologBytes);
 
         // Note that the three UWC_END padding bytes still exist at the end of the array.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
         // Zero out the epilog codes memory, to ensure we've copied the right bytes. Don't zero the padding bytes.
@@ -887,9 +881,9 @@ void UnwindPrologCodes::AppendEpilog(UnwindEpilogInfo* pEpi)
 
     int epiSize = pEpi->Size();
     memcpy_s(&upcMem[upcEpilogSlot], upcMemSize - upcEpilogSlot - 3, pEpi->GetCodes(),
-             epiSize); // -3 to avoid writing to the alignment padding
-    assert(pEpi->GetStartIndex() ==
-           upcEpilogSlot - upcCodeSlot); // Make sure we copied it where we expected to copy it.
+             epiSize);                                            // -3 to avoid writing to the alignment padding
+    assert(pEpi->GetStartIndex() == upcEpilogSlot - upcCodeSlot); // Make sure we copied it where we expected to copy
+                                                                  // it.
 
     upcEpilogSlot += epiSize;
     assert(upcEpilogSlot <= upcMemSize - 3);
@@ -1455,7 +1449,7 @@ void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength)
     }
 #endif
 
-// Compute the header
+    // Compute the header
 
 #if defined(TARGET_ARM)
     noway_assert((functionLength & 1) == 0);
@@ -1504,8 +1498,8 @@ void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength)
 
     // Start writing the header
 
-    noway_assert(headerFunctionLength <=
-                 0x3FFFFU); // We create fragments to prevent this from firing, so if it hits, we have an internal error
+    noway_assert(headerFunctionLength <= 0x3FFFFU); // We create fragments to prevent this from firing, so if it hits,
+                                                    // we have an internal error
 
     if ((headerEpilogCount > UW_MAX_EPILOG_COUNT) || (headerCodeWords > UW_MAX_CODE_WORDS_COUNT))
     {
@@ -1516,7 +1510,7 @@ void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength)
     DWORD header = headerFunctionLength | (headerVers << 18) | (headerXBit << 20) | (headerEBit << 21) |
                    (headerFBit << 22) | (headerEpilogCount << 23) | (headerCodeWords << 28);
 #elif defined(TARGET_ARM64)
-    DWORD header               = headerFunctionLength | (headerVers << 18) | (headerXBit << 20) | (headerEBit << 21) |
+    DWORD header = headerFunctionLength | (headerVers << 18) | (headerXBit << 20) | (headerEBit << 21) |
                    (headerEpilogCount << 22) | (headerCodeWords << 27);
 #endif // defined(TARGET_ARM64)
 
@@ -1912,7 +1906,6 @@ void UnwindInfo::Split()
     // the actual offsets of the splits since we haven't issued the instructions yet, so store
     // an emitter location instead of an offset, and "finalize" the offset in the unwindEmit() phase,
     // like we do for the function length and epilog offsets.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (uwiComp->verbose)
@@ -2203,7 +2196,7 @@ DWORD DumpRegSetRange(const char* const rtype, DWORD start, DWORD end, DWORD lr)
 DWORD DumpOpsize(DWORD padding, DWORD opsize)
 {
     if (padding > 100) // underflow?
-        padding   = 4;
+        padding = 4;
     DWORD printed = padding;
     for (; padding > 0; padding--)
         printf(" ");
@@ -2231,7 +2224,7 @@ void DumpUnwindInfo(Compiler*         comp,
     // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end
     // to provide padding, and round down to get a multiple of 4 bytes in size.
     DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader;
-    DWORD dw;
+    DWORD            dw;
 
     dw = *pdw++;
 
diff --git a/src/coreclr/jit/unwindloongarch64.cpp b/src/coreclr/jit/unwindloongarch64.cpp
index 3aa5fd668d40..e46d3ec60e07 100644
--- a/src/coreclr/jit/unwindloongarch64.cpp
+++ b/src/coreclr/jit/unwindloongarch64.cpp
@@ -516,7 +516,7 @@ void DumpUnwindInfo(Compiler*         comp,
     // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end
     // to provide padding, and round down to get a multiple of 4 bytes in size.
     DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader;
-    DWORD dw;
+    DWORD            dw;
 
     dw = *pdw++;
 
@@ -1112,7 +1112,6 @@ void UnwindPrologCodes::SetFinalSize(int headerBytes, int epilogBytes)
                   &upcMem[upcCodeSlot], prologBytes);
 
         // Note that the three UWC_END padding bytes still exist at the end of the array.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
         // Zero out the epilog codes memory, to ensure we've copied the right bytes. Don't zero the padding bytes.
@@ -1149,9 +1148,9 @@ void UnwindPrologCodes::AppendEpilog(UnwindEpilogInfo* pEpi)
 
     int epiSize = pEpi->Size();
     memcpy_s(&upcMem[upcEpilogSlot], upcMemSize - upcEpilogSlot - 3, pEpi->GetCodes(),
-             epiSize); // -3 to avoid writing to the alignment padding
-    assert(pEpi->GetStartIndex() ==
-           upcEpilogSlot - upcCodeSlot); // Make sure we copied it where we expected to copy it.
+             epiSize);                                            // -3 to avoid writing to the alignment padding
+    assert(pEpi->GetStartIndex() == upcEpilogSlot - upcCodeSlot); // Make sure we copied it where we expected to copy
+                                                                  // it.
 
     upcEpilogSlot += epiSize;
     assert(upcEpilogSlot <= upcMemSize - 3);
@@ -1772,8 +1771,8 @@ void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength)
 
     // Start writing the header
 
-    noway_assert(headerFunctionLength <=
-                 0x3FFFFU); // We create fragments to prevent this from firing, so if it hits, we have an internal error
+    noway_assert(headerFunctionLength <= 0x3FFFFU); // We create fragments to prevent this from firing, so if it hits,
+                                                    // we have an internal error
 
     if ((headerEpilogCount > UW_MAX_EPILOG_COUNT) || (headerCodeWords > UW_MAX_CODE_WORDS_COUNT))
     {
@@ -2139,7 +2138,6 @@ void UnwindInfo::Split()
     // the actual offsets of the splits since we haven't issued the instructions yet, so store
     // an emitter location instead of an offset, and "finalize" the offset in the unwindEmit() phase,
     // like we do for the function length and epilog offsets.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (uwiComp->verbose)
diff --git a/src/coreclr/jit/unwindriscv64.cpp b/src/coreclr/jit/unwindriscv64.cpp
index b78eb04c228e..05648c481744 100644
--- a/src/coreclr/jit/unwindriscv64.cpp
+++ b/src/coreclr/jit/unwindriscv64.cpp
@@ -327,7 +327,7 @@ void DumpUnwindInfo(Compiler*         comp,
     // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end
     // to provide padding, and round down to get a multiple of 4 bytes in size.
     DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader;
-    DWORD dw;
+    DWORD            dw;
 
     dw = *pdw++;
 
@@ -923,7 +923,6 @@ void UnwindPrologCodes::SetFinalSize(int headerBytes, int epilogBytes)
                   &upcMem[upcCodeSlot], prologBytes);
 
         // Note that the three UWC_END padding bytes still exist at the end of the array.
-        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
         // Zero out the epilog codes memory, to ensure we've copied the right bytes. Don't zero the padding bytes.
@@ -1946,7 +1945,6 @@ void UnwindInfo::Split()
     // the actual offsets of the splits since we haven't issued the instructions yet, so store
     // an emitter location instead of an offset, and "finalize" the offset in the unwindEmit() phase,
     // like we do for the function length and epilog offsets.
-    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (uwiComp->verbose)
diff --git a/src/coreclr/jit/unwindx86.cpp b/src/coreclr/jit/unwindx86.cpp
index 32d077429af6..40e720d40c33 100644
--- a/src/coreclr/jit/unwindx86.cpp
+++ b/src/coreclr/jit/unwindx86.cpp
@@ -70,16 +70,17 @@ void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
 //
 void Compiler::unwindReserve()
 {
-#if defined(FEATURE_EH_FUNCLETS)
-    assert(!compGeneratingProlog);
-    assert(!compGeneratingEpilog);
-
-    assert(compFuncInfoCount > 0);
-    for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+    if (UsesFunclets())
     {
-        unwindReserveFunc(funGetFunc(funcIdx));
+        assert(!compGeneratingProlog);
+        assert(!compGeneratingEpilog);
+
+        assert(compFuncInfoCount > 0);
+        for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+        {
+            unwindReserveFunc(funGetFunc(funcIdx));
+        }
     }
-#endif
 }
 
 //------------------------------------------------------------------------
@@ -91,19 +92,19 @@ void Compiler::unwindReserve()
 //
 void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
 {
-#if defined(FEATURE_EH_FUNCLETS)
-    assert(!compGeneratingProlog);
-    assert(!compGeneratingEpilog);
-
-    assert(compFuncInfoCount > 0);
-    for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+    if (UsesFunclets())
     {
-        unwindEmitFunc(funGetFunc(funcIdx), pHotCode, pColdCode);
+        assert(!compGeneratingProlog);
+        assert(!compGeneratingEpilog);
+
+        assert(compFuncInfoCount > 0);
+        for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+        {
+            unwindEmitFunc(funGetFunc(funcIdx), pHotCode, pColdCode);
+        }
     }
-#endif // FEATURE_EH_FUNCLETS
 }
 
-#if defined(FEATURE_EH_FUNCLETS)
 //------------------------------------------------------------------------
 // Compiler::unwindReserveFunc: Reserve the unwind information from the VM for a
 // given main function or funclet.
@@ -113,6 +114,7 @@ void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
 //
 void Compiler::unwindReserveFunc(FuncInfoDsc* func)
 {
+    assert(UsesFunclets());
     unwindReserveFuncHelper(func, true);
 
     if (fgFirstColdBlock != nullptr)
@@ -280,5 +282,3 @@ void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pCo
     eeAllocUnwindInfo((BYTE*)pHotCode, (BYTE*)pColdCode, startOffset, endOffset, sizeof(UNWIND_INFO),
                       (BYTE*)&unwindInfo, (CorJitFuncKind)func->funKind);
 }
-
-#endif // FEATURE_EH_FUNCLETS
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index ca735561346a..865049170fb9 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -323,7 +323,6 @@ const char* dspRegRange(regMaskTP regMask, size_t& minSiz, const char* sep, regN
                 minSiz -= strlen(sep) + strlen(nam);
 
                 // What kind of separator should we use for this range (if it is indeed going to be a range)?
-                CLANG_FORMAT_COMMENT_ANCHOR;
 
                 if (genIsValidIntReg(regNum))
                 {
@@ -355,7 +354,6 @@ const char* dspRegRange(regMaskTP regMask, size_t& minSiz, const char* sep, regN
                     }
 #elif defined(TARGET_X86) || defined(TARGET_WASM)
                     // No register ranges
-                    CLANG_FORMAT_COMMENT_ANCHOR;
 #elif defined(TARGET_LOONGARCH64)
                     if (REG_A0 <= regNum && regNum <= REG_T8)
                     {
@@ -1098,11 +1096,17 @@ void ConfigDoubleArray::Dump()
 
 #if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE || MEASURE_MEM_ALLOC
 
+void Counter::dump(FILE* output)
+{
+    fprintf(output, "%lld\n", (long long)Value);
+}
+
 /*****************************************************************************
  *  Histogram class.
  */
 
-Histogram::Histogram(const unsigned* const sizeTable) : m_sizeTable(sizeTable)
+Histogram::Histogram(const unsigned* const sizeTable)
+    : m_sizeTable(sizeTable)
 {
     unsigned sizeCount = 0;
     do
@@ -1861,7 +1865,8 @@ void HelperCallProperties::init()
 //
 // You must use ';' as a separator; whitespace no longer works
 
-AssemblyNamesList2::AssemblyNamesList2(const WCHAR* list, HostAllocator alloc) : m_alloc(alloc)
+AssemblyNamesList2::AssemblyNamesList2(const WCHAR* list, HostAllocator alloc)
+    : m_alloc(alloc)
 {
     WCHAR          prevChar   = '?';     // dummy
     LPWSTR         nameStart  = nullptr; // start of the name currently being processed. nullptr if no current name
@@ -1948,7 +1953,9 @@ bool AssemblyNamesList2::IsInList(const char* assemblyName)
 // MethodSet
 //=============================================================================
 
-MethodSet::MethodSet(const WCHAR* filename, HostAllocator alloc) : m_pInfos(nullptr), m_alloc(alloc)
+MethodSet::MethodSet(const WCHAR* filename, HostAllocator alloc)
+    : m_pInfos(nullptr)
+    , m_alloc(alloc)
 {
     FILE* methodSetFile = _wfopen(filename, W("r"));
     if (methodSetFile == nullptr)
@@ -2177,7 +2184,8 @@ double CachedCyclesPerSecond()
 }
 
 #ifdef FEATURE_JIT_METHOD_PERF
-CycleCount::CycleCount() : cps(CachedCyclesPerSecond())
+CycleCount::CycleCount()
+    : cps(CachedCyclesPerSecond())
 {
 }
 
@@ -2321,7 +2329,7 @@ unsigned __int64 FloatingPointUtils::convertDoubleToUInt64(double d)
 
     u64 = UINT64(INT64(d));
 #else
-    u64   = UINT64(d);
+    u64 = UINT64(d);
 #endif // TARGET_XARCH
 
     return u64;
@@ -2426,22 +2434,10 @@ double FloatingPointUtils::round(double x)
     // noting that we also need to copy back the original sign to
     // correctly handle -0.0
 
-    double temp = _copysign(IntegerBoundary, x);
-    return _copysign((x + temp) - temp, x);
+    double temp = copysign(IntegerBoundary, x);
+    return copysign((x + temp) - temp, x);
 }
 
-// Windows x86 and Windows ARM/ARM64 may not define _copysignf() but they do define _copysign().
-// We will redirect the macro to this other functions if the macro is not defined for the platform.
-// This has the side effect of a possible implicit upcasting for arguments passed in and an explicit
-// downcasting for the _copysign() call.
-#if (defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_ARM64)) && !defined(TARGET_UNIX)
-
-#if !defined(_copysignf)
-#define _copysignf (float)_copysign
-#endif
-
-#endif
-
 // Rounds a single-precision floating-point value to the nearest integer,
 // and rounds midpoint values to the nearest even number.
 float FloatingPointUtils::round(float x)
@@ -2483,8 +2479,8 @@ float FloatingPointUtils::round(float x)
     // noting that we also need to copy back the original sign to
     // correctly handle -0.0
 
-    float temp = _copysignf(IntegerBoundary, x);
-    return _copysignf((x + temp) - temp, x);
+    float temp = copysignf(IntegerBoundary, x);
+    return copysignf((x + temp) - temp, x);
 }
 
 bool FloatingPointUtils::isNormal(double x)
@@ -2611,6 +2607,38 @@ bool FloatingPointUtils::isAllBitsSet(double val)
     return bits == 0xFFFFFFFFFFFFFFFFULL;
 }
 
+//------------------------------------------------------------------------
+// isFinite: Determines whether the specified value is finite
+//
+// Arguments:
+//    val - value to check is not NaN or infinity
+//
+// Return Value:
+//    True if val is finite
+//
+
+bool FloatingPointUtils::isFinite(float val)
+{
+    UINT32 bits = *reinterpret_cast<UINT32*>(&val);
+    return (~bits & 0x7F800000U) != 0;
+}
+
+//------------------------------------------------------------------------
+// isFinite: Determines whether the specified value is finite
+//
+// Arguments:
+//    val - value to check is not NaN or infinity
+//
+// Return Value:
+//    True if val is finite
+//
+
+bool FloatingPointUtils::isFinite(double val)
+{
+    UINT64 bits = *reinterpret_cast<UINT64*>(&val);
+    return (~bits & 0x7FF0000000000000ULL) != 0;
+}
+
 //------------------------------------------------------------------------
 // isNegative: Determines whether the specified value is negative
 //
@@ -3229,6 +3257,32 @@ double FloatingPointUtils::normalize(double value)
 #endif
 }
 
+int FloatingPointUtils::ilogb(double value)
+{
+    if (value == 0.0)
+    {
+        return -2147483648;
+    }
+    else if (isNaN(value))
+    {
+        return 2147483647;
+    }
+    return ilogb(value);
+}
+
+int FloatingPointUtils::ilogb(float value)
+{
+    if (value == 0.0f)
+    {
+        return -2147483648;
+    }
+    else if (isNaN(value))
+    {
+        return 2147483647;
+    }
+    return ilogbf(value);
+}
+
 //------------------------------------------------------------------------
 // BitOperations::BitScanReverse: Search the mask data from most significant bit (MSB) to least significant bit
 // (LSB) for a set bit (1).
@@ -3412,6 +3466,11 @@ uint32_t BitOperations::Log2(uint64_t value)
 // Return Value:
 //    The population count (number of bits set) of value
 //
+#if defined(_MSC_VER)
+// Disable optimizations for PopCount to avoid the compiler from generating intrinsics
+// not supported on all platforms.
+#pragma optimize("", off)
+#endif // _MSC_VER
 uint32_t BitOperations::PopCount(uint32_t value)
 {
 #if defined(_MSC_VER)
@@ -3464,6 +3523,9 @@ uint32_t BitOperations::PopCount(uint64_t value)
     return static_cast<uint32_t>(result);
 #endif
 }
+#if defined(_MSC_VER)
+#pragma optimize("", on)
+#endif // _MSC_VER
 
 //------------------------------------------------------------------------
 // BitOperations::ReverseBits: Reverses the bits in an integer value
@@ -4013,7 +4075,7 @@ T GetSignedMagic(T denom, int* shift /*out*/)
     UT  t;
     T   result_magic;
 
-    absDenom = abs(denom);
+    absDenom = std::abs(denom);
     t        = two_nminus1 + (UT(denom) >> bits_minus_1);
     absNc    = t - 1 - (t % absDenom);        // absolute value of nc
     p        = bits_minus_1;                  // initialize p
@@ -4067,7 +4129,7 @@ int64_t GetSigned64Magic(int64_t d, int* shift /*out*/)
     return GetSignedMagic<int64_t>(d, shift);
 }
 #endif
-}
+} // namespace MagicDivide
 
 namespace CheckedOps
 {
@@ -4261,4 +4323,4 @@ bool CastFromDoubleOverflows(double fromValue, var_types toType)
             unreached();
     }
 }
-}
+} // namespace CheckedOps
diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h
index 766689fc49b6..6a0362bbbf06 100644
--- a/src/coreclr/jit/utils.h
+++ b/src/coreclr/jit/utils.h
@@ -16,7 +16,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #define _UTILS_H_
 
 #include "safemath.h"
-#include "clr_std/type_traits"
+#include <type_traits>
 #include "iallocator.h"
 #include "hostallocator.h"
 #include "cycletimer.h"
@@ -88,7 +88,9 @@ class IteratorPair
     TIterator m_end;
 
 public:
-    IteratorPair(TIterator begin, TIterator end) : m_begin(begin), m_end(end)
+    IteratorPair(TIterator begin, TIterator end)
+        : m_begin(begin)
+        , m_end(end)
     {
     }
 
@@ -116,7 +118,8 @@ struct ConstLog2
 {
     enum
     {
-        value = ConstLog2<val / 2, acc + 1>::value
+        value = ConstLog2 < val / 2,
+        acc + 1 > ::value
     };
 };
 
@@ -246,6 +249,12 @@ class ConfigMethodRange
 class ConfigIntArray
 {
 public:
+    ConfigIntArray()
+        : m_values(nullptr)
+        , m_length(0)
+    {
+    }
+
     // Ensure the string has been parsed.
     void EnsureInit(const WCHAR* str)
     {
@@ -266,7 +275,7 @@ class ConfigIntArray
     }
 
 private:
-    void Init(const WCHAR* str);
+    void     Init(const WCHAR* str);
     int*     m_values;
     unsigned m_length;
 };
@@ -276,6 +285,12 @@ class ConfigIntArray
 class ConfigDoubleArray
 {
 public:
+    ConfigDoubleArray()
+        : m_values(nullptr)
+        , m_length(0)
+    {
+    }
+
     // Ensure the string has been parsed.
     void EnsureInit(const WCHAR* str)
     {
@@ -296,7 +311,7 @@ class ConfigDoubleArray
     }
 
 private:
-    void Init(const WCHAR* str);
+    void     Init(const WCHAR* str);
     double*  m_values;
     unsigned m_length;
 };
@@ -396,7 +411,8 @@ template <typename T>
 class ScopedSetVariable
 {
 public:
-    ScopedSetVariable(T* pVariable, T value) : m_pVariable(pVariable)
+    ScopedSetVariable(T* pVariable, T value)
+        : m_pVariable(pVariable)
     {
         m_oldValue   = *m_pVariable;
         *m_pVariable = value;
@@ -434,7 +450,8 @@ class PhasedVar
 public:
     PhasedVar()
 #ifdef DEBUG
-        : m_initialized(false), m_writePhase(true)
+        : m_initialized(false)
+        , m_writePhase(true)
 #endif // DEBUG
     {
     }
@@ -696,7 +713,9 @@ class MethodSet
         MethodInfo* m_next;
 
         MethodInfo(char* methodName, int methodHash)
-            : m_MethodName(methodName), m_MethodHash(methodHash), m_next(nullptr)
+            : m_MethodName(methodName)
+            , m_MethodHash(methodHash)
+            , m_next(nullptr)
         {
         }
     };
@@ -778,8 +797,8 @@ unsigned CountDigits(double num, unsigned base = 10);
 #endif // DEBUG
 
 /*****************************************************************************
-* Floating point utility class
-*/
+ * Floating point utility class
+ */
 class FloatingPointUtils
 {
 public:
@@ -813,6 +832,10 @@ class FloatingPointUtils
 
     static bool isAllBitsSet(double val);
 
+    static bool isFinite(float val);
+
+    static bool isFinite(double val);
+
     static bool isNegative(float val);
 
     static bool isNegative(double val);
@@ -858,6 +881,10 @@ class FloatingPointUtils
     static float minimumNumber(float val1, float val2);
 
     static double normalize(double x);
+
+    static int ilogb(double x);
+
+    static int ilogb(float f);
 };
 
 class BitOperations
@@ -1003,7 +1030,7 @@ class CritSecObject
     CRITSEC_COOKIE m_pCs;
 
     // No copying or assignment allowed.
-    CritSecObject(const CritSecObject&) = delete;
+    CritSecObject(const CritSecObject&)            = delete;
     CritSecObject& operator=(const CritSecObject&) = delete;
 };
 
@@ -1013,7 +1040,8 @@ class CritSecObject
 class CritSecHolder
 {
 public:
-    CritSecHolder(CritSecObject& critSec) : m_CritSec(critSec)
+    CritSecHolder(CritSecObject& critSec)
+        : m_CritSec(critSec)
     {
         ClrEnterCriticalSection(m_CritSec.Val());
     }
@@ -1027,7 +1055,7 @@ class CritSecHolder
     CritSecObject& m_CritSec;
 
     // No copying or assignment allowed.
-    CritSecHolder(const CritSecHolder&) = delete;
+    CritSecHolder(const CritSecHolder&)            = delete;
     CritSecHolder& operator=(const CritSecHolder&) = delete;
 };
 
@@ -1043,7 +1071,7 @@ int32_t GetSigned32Magic(int32_t d, int* shift /*out*/);
 #ifdef TARGET_64BIT
 int64_t GetSigned64Magic(int64_t d, int* shift /*out*/);
 #endif
-}
+} // namespace MagicDivide
 
 //
 // Profiling helpers
@@ -1144,6 +1172,6 @@ bool CastFromIntOverflows(int32_t fromValue, var_types toType, bool fromUnsigned
 bool CastFromLongOverflows(int64_t fromValue, var_types toType, bool fromUnsigned);
 bool CastFromFloatOverflows(float fromValue, var_types toType);
 bool CastFromDoubleOverflows(double fromValue, var_types toType);
-}
+} // namespace CheckedOps
 
 #endif // _UTILS_H_
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index 4774e4f0ddb0..97c3be213963 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -18,17 +18,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #include "valuenum.h"
 #include "ssaconfig.h"
 
-// Windows x86 and Windows ARM/ARM64 may not define _isnanf() but they do define _isnan().
-// We will redirect the macros to these other functions if the macro is not defined for the
-// platform. This has the side effect of a possible implicit upcasting for arguments passed.
-#if (defined(HOST_X86) || defined(HOST_ARM) || defined(HOST_ARM64)) && !defined(HOST_UNIX)
-
-#if !defined(_isnanf)
-#define _isnanf _isnan
-#endif
-
-#endif // (defined(HOST_X86) || defined(HOST_ARM) || defined(HOST_ARM64)) && !defined(HOST_UNIX)
-
 // We need to use target-specific NaN values when statically compute expressions.
 // Otherwise, cross crossgen (e.g. x86_arm) would have different binary outputs
 // from native crossgen (i.e. arm_arm) when the NaN got "embedded" into code.
@@ -56,9 +45,9 @@ struct FloatTraits
 #if defined(TARGET_XARCH)
         unsigned bits = 0xFFC00000u;
 #elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
-        unsigned           bits = 0x7FC00000u;
+        unsigned bits = 0x7FC00000u;
 #elif defined(TARGET_WASM)
-        unsigned           bits = 0x7FC00000u;
+        unsigned bits = 0x7FC00000u;
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -116,7 +105,7 @@ TFp FpAdd(TFp value1, TFp value2)
     // If [value1] is positive infinity and [value2] is negative infinity
     //   the result is NaN.
 
-    if (!_finite(value1) && !_finite(value2))
+    if (!FloatingPointUtils::isFinite(value1) && !FloatingPointUtils::isFinite(value2))
     {
         if (value1 < 0 && value2 > 0)
         {
@@ -152,7 +141,7 @@ TFp FpSub(TFp value1, TFp value2)
     // If [value1] is negative infinity and [value2] is negative infinity
     //   the result is NaN.
 
-    if (!_finite(value1) && !_finite(value2))
+    if (!FloatingPointUtils::isFinite(value1) && !FloatingPointUtils::isFinite(value2))
     {
         if (value1 > 0 && value2 > 0)
         {
@@ -190,11 +179,11 @@ TFp FpMul(TFp value1, TFp value2)
     // If [value1] is infinity and [value2] is zero
     //   the result is NaN.
 
-    if (value1 == 0 && !_finite(value2) && !_isnan(value2))
+    if (value1 == 0 && !FloatingPointUtils::isFinite(value2) && !FloatingPointUtils::isNaN(value2))
     {
         return TFpTraits::NaN();
     }
-    if (!_finite(value1) && !_isnan(value1) && value2 == 0)
+    if (!FloatingPointUtils::isFinite(value1) && !FloatingPointUtils::isNaN(value1) && value2 == 0)
     {
         return TFpTraits::NaN();
     }
@@ -228,7 +217,8 @@ TFp FpDiv(TFp dividend, TFp divisor)
     {
         return TFpTraits::NaN();
     }
-    else if (!_finite(dividend) && !_isnan(dividend) && !_finite(divisor) && !_isnan(divisor))
+    else if (!FloatingPointUtils::isFinite(dividend) && !FloatingPointUtils::isNaN(dividend) &&
+             !FloatingPointUtils::isFinite(divisor) && !FloatingPointUtils::isNaN(divisor))
     {
         return TFpTraits::NaN();
     }
@@ -247,11 +237,11 @@ TFp FpRem(TFp dividend, TFp divisor)
     // If [divisor] is infinity,
     //   the result is [dividend]
 
-    if (divisor == 0 || !_finite(dividend))
+    if (divisor == 0 || !FloatingPointUtils::isFinite(dividend))
     {
         return TFpTraits::NaN();
     }
-    else if (!_finite(divisor) && !_isnan(divisor))
+    else if (!FloatingPointUtils::isFinite(divisor) && !FloatingPointUtils::isNaN(divisor))
     {
         return dividend;
     }
@@ -451,6 +441,7 @@ ValueNumStore::ValueNumStore(Compiler* comp, CompAllocator alloc)
 #if defined(TARGET_XARCH)
     , m_simd32CnsMap(nullptr)
     , m_simd64CnsMap(nullptr)
+    , m_simdMaskCnsMap(nullptr)
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
     , m_VNFunc0Map(nullptr)
@@ -821,7 +812,7 @@ int ValueNumStore::EvalComparison<double>(VNFunc vnf, double v0, double v1)
     // Here we handle specialized double comparisons.
 
     // We must check for a NaN argument as they they need special handling
-    bool hasNanArg = (_isnan(v0) || _isnan(v1));
+    bool hasNanArg = (FloatingPointUtils::isNaN(v0) || FloatingPointUtils::isNaN(v1));
 
     if (vnf < VNF_Boundary)
     {
@@ -885,7 +876,7 @@ int ValueNumStore::EvalComparison<float>(VNFunc vnf, float v0, float v1)
     // Here we handle specialized float comparisons.
 
     // We must check for a NaN argument as they they need special handling
-    bool hasNanArg = (_isnanf(v0) || _isnanf(v1));
+    bool hasNanArg = (FloatingPointUtils::isNaN(v0) || FloatingPointUtils::isNaN(v1));
 
     if (vnf < VNF_Boundary)
     {
@@ -1660,7 +1651,11 @@ bool ValueNumStore::IsSharedStatic(ValueNum vn)
 }
 
 ValueNumStore::Chunk::Chunk(CompAllocator alloc, ValueNum* pNextBaseVN, var_types typ, ChunkExtraAttribs attribs)
-    : m_defs(nullptr), m_numUsed(0), m_baseVN(*pNextBaseVN), m_typ(typ), m_attribs(attribs)
+    : m_defs(nullptr)
+    , m_numUsed(0)
+    , m_baseVN(*pNextBaseVN)
+    , m_typ(typ)
+    , m_attribs(attribs)
 {
     // Allocate "m_defs" here, according to the typ/attribs pair.
     switch (attribs)
@@ -1720,6 +1715,12 @@ ValueNumStore::Chunk::Chunk(CompAllocator alloc, ValueNum* pNextBaseVN, var_type
                     m_defs = new (alloc) Alloc<TYP_SIMD64>::Type[ChunkSize];
                     break;
                 }
+
+                case TYP_MASK:
+                {
+                    m_defs = new (alloc) Alloc<TYP_MASK>::Type[ChunkSize];
+                    break;
+                }
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -1884,6 +1885,11 @@ ValueNum ValueNumStore::VNForSimd64Con(simd64_t cnsVal)
 {
     return VnForConst(cnsVal, GetSimd64CnsMap(), TYP_SIMD64);
 }
+
+ValueNum ValueNumStore::VNForSimdMaskCon(simdmask_t cnsVal)
+{
+    return VnForConst(cnsVal, GetSimdMaskCnsMap(), TYP_MASK);
+}
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -1985,6 +1991,11 @@ ValueNum ValueNumStore::VNForGenericCon(var_types typ, uint8_t* cnsVal)
             READ_VALUE(simd64_t);
             return VNForSimd64Con(val);
         }
+        case TYP_MASK:
+        {
+            READ_VALUE(simdmask_t);
+            return VNForSimdMaskCon(val);
+        }
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
         default:
@@ -2099,6 +2110,11 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ)
         {
             return VNForSimd64Con(simd64_t::Zero());
         }
+
+        case TYP_MASK:
+        {
+            return VNForSimdMaskCon(simdmask_t::Zero());
+        }
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -2189,6 +2205,11 @@ ValueNum ValueNumStore::VNAllBitsForType(var_types typ)
         {
             return VNForSimd64Con(simd64_t::AllBitsSet());
         }
+
+        case TYP_MASK:
+        {
+            return VNForSimdMaskCon(simdmask_t::AllBitsSet());
+        }
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -2310,6 +2331,13 @@ ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseT
             memcpy(&simd64Val, &simdVal, sizeof(simd64_t));
             return VNForSimd64Con(simd64Val);
         }
+
+        case TYP_MASK:
+        {
+            // '1' doesn't make sense for TYP_MASK?
+            // Or should it be AllBitsSet?
+            unreached();
+        }
 #endif // TARGET_XARCH
 
         default:
@@ -2478,6 +2506,87 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN)
     return resultVN;
 }
 
+//----------------------------------------------------------------------------------------
+// VNForCast: Returns VN associated with castclass/isinst
+//
+// Arguments:
+//    func     - Either VNF_CastClass or VNF_IsInstanceOf
+//    castToVN - VN of the "Cast to" argument
+//    objVN    - VN of the "Cast object" argument
+//
+// Return Value:
+//    ValueNum associated with castclass/isinst
+//
+ValueNum ValueNumStore::VNForCast(VNFunc func, ValueNum castToVN, ValueNum objVN)
+{
+    assert((func == VNF_CastClass) || (func == VNF_IsInstanceOf));
+
+    if (objVN == VNForNull())
+    {
+        // CastClass(cls, null)    -> null
+        // IsInstanceOf(cls, null) -> null
+        //
+        return VNForNull();
+    }
+
+    //
+    // Fold "CAST(IsInstanceOf(obj, cls), cls)" to "IsInstanceOf(obj, cls)"
+    // where CAST is either ISINST or CASTCLASS.
+    //
+    VNFuncApp funcApp;
+    if (GetVNFunc(objVN, &funcApp) && (funcApp.m_func == VNF_IsInstanceOf) && (funcApp.m_args[0] == castToVN))
+    {
+        // The outer cast is redundant, remove it and preserve its side effects
+        // We do ignoreRoot here because the actual cast node never throws any exceptions.
+        return objVN;
+    }
+
+    // Check if we can fold the cast based on the runtime types of the arguments.
+    //
+    if (IsVNTypeHandle(castToVN))
+    {
+        bool                 isExact;
+        bool                 isNonNull;
+        CORINFO_CLASS_HANDLE castFrom = GetObjectType(objVN, &isExact, &isNonNull);
+        CORINFO_CLASS_HANDLE castTo;
+        if ((castFrom != NO_CLASS_HANDLE) &&
+            EmbeddedHandleMapLookup(ConstantValue<ssize_t>(castToVN), (ssize_t*)&castTo))
+        {
+            TypeCompareState castResult = m_pComp->info.compCompHnd->compareTypesForCast(castFrom, castTo);
+            if (castResult == TypeCompareState::Must)
+            {
+                // IsInstanceOf/CastClass is guaranteed to succeed (we don't need to check for isExact here)
+                return objVN;
+            }
+
+            if ((castResult == TypeCompareState::MustNot) && isExact && (func == VNF_IsInstanceOf))
+            {
+                // IsInstanceOf is guaranteed to fail -> return null (we need to check for isExact here)
+                return VNForNull();
+            }
+        }
+    }
+
+    if (func == VNF_CastClass)
+    {
+        // CastClass(cls, obj) -> obj (may throw InvalidCastException)
+        //
+        ValueNum vnExcSet = VNExcSetSingleton(VNForFuncNoFolding(TYP_REF, VNF_InvalidCastExc, objVN, castToVN));
+        return VNWithExc(objVN, vnExcSet);
+    }
+
+    // IsInstanceOf(cls, obj) -> either obj or null - we don't know
+    //
+    assert(func == VNF_IsInstanceOf);
+    Chunk* const          c                 = GetAllocChunk(TYP_REF, CEA_Func2);
+    unsigned const        offsetWithinChunk = c->AllocVN();
+    VNDefFuncAppFlexible* fapp              = c->PointerToFuncApp(offsetWithinChunk, 2);
+    fapp->m_func                            = VNF_IsInstanceOf;
+    fapp->m_args[0]                         = castToVN;
+    fapp->m_args[1]                         = objVN;
+    return c->m_baseVN + offsetWithinChunk;
+}
+
 //----------------------------------------------------------------------------------------
 //  VNForFunc  - Returns the ValueNum associated with 'func'('arg0VN','arg1VN')
 //               There is a one-to-one relationship between the ValueNum
@@ -2541,12 +2650,9 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
     }
     else
     {
-        if (func == VNF_CastClass)
+        if ((func == VNF_CastClass) || (func == VNF_IsInstanceOf))
         {
-            // In terms of values, a castclass always returns its second argument, the object being cast.
-            // The operation may also throw an exception
-            ValueNum vnExcSet = VNExcSetSingleton(VNForFuncNoFolding(TYP_REF, VNF_InvalidCastExc, arg1VN, arg0VN));
-            resultVN          = VNWithExc(arg1VN, vnExcSet);
+            resultVN = VNForCast(func, arg0VN, arg1VN);
         }
         else
         {
@@ -2873,7 +2979,8 @@ typedef JitHashTable<ValueNum, JitSmallPrimitiveKeyFuncs<ValueNum>, bool> ValueN
 
 class SmallValueNumSet
 {
-    union {
+    union
+    {
         ValueNum     m_inlineElements[4];
         ValueNumSet* m_set;
     };
@@ -3318,7 +3425,7 @@ ValueNum ValueNumStore::VNForMapSelectWork(ValueNumKind      vnk,
                             {
                                 bool     usedRecursiveVN = false;
                                 ValueNum curResult       = VNForMapSelectWork(vnk, type, phiArgVN, index, pBudget,
-                                                                        &usedRecursiveVN, recMemoryDependencies);
+                                                                              &usedRecursiveVN, recMemoryDependencies);
 
                                 *pUsedRecursiveVN |= usedRecursiveVN;
                                 if (sameSelResult == ValueNumStore::RecursiveVN)
@@ -3351,8 +3458,9 @@ ValueNum ValueNumStore::VNForMapSelectWork(ValueNumKind      vnk,
                                 GetMapSelectWorkCache()->Set(fstruct, entry);
                             }
 
-                            recMemoryDependencies.ForEach(
-                                [this, &memoryDependencies](ValueNum vn) { memoryDependencies.Add(m_pComp, vn); });
+                            recMemoryDependencies.ForEach([this, &memoryDependencies](ValueNum vn) {
+                                memoryDependencies.Add(m_pComp, vn);
+                            });
 
                             return sameSelResult;
                         }
@@ -3387,7 +3495,9 @@ ValueNum ValueNumStore::VNForMapSelectWork(ValueNumKind      vnk,
         GetMapSelectWorkCache()->Set(fstruct, entry);
     }
 
-    recMemoryDependencies.ForEach([this, &memoryDependencies](ValueNum vn) { memoryDependencies.Add(m_pComp, vn); });
+    recMemoryDependencies.ForEach([this, &memoryDependencies](ValueNum vn) {
+        memoryDependencies.Add(m_pComp, vn);
+    });
 
     return entry.Result;
 }
@@ -3495,16 +3605,12 @@ ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, Valu
         case TYP_INT:
         {
             int resVal = EvalOp<int>(func, ConstantValue<int>(arg0VN));
-            // Unary op on a handle results in a handle.
-            return IsVNHandle(arg0VN) ? VNForHandle(ssize_t(resVal), GetFoldedArithOpResultHandleFlags(arg0VN))
-                                      : VNForIntCon(resVal);
+            return VNForIntCon(resVal);
         }
         case TYP_LONG:
         {
             INT64 resVal = EvalOp<INT64>(func, ConstantValue<INT64>(arg0VN));
-            // Unary op on a handle results in a handle.
-            return IsVNHandle(arg0VN) ? VNForHandle(ssize_t(resVal), GetFoldedArithOpResultHandleFlags(arg0VN))
-                                      : VNForLongCon(resVal);
+            return VNForLongCon(resVal);
         }
         case TYP_FLOAT:
         {
@@ -3677,7 +3783,7 @@ simd32_t ValueNumStore::GetConstantSimd32(ValueNum argVN)
     return ConstantValue<simd32_t>(argVN);
 }
 
-// Given a simd64 constant value number return its value as a simd32.
+// Given a simd64 constant value number return its value as a simd64.
 //
 simd64_t ValueNumStore::GetConstantSimd64(ValueNum argVN)
 {
@@ -3686,6 +3792,16 @@ simd64_t ValueNumStore::GetConstantSimd64(ValueNum argVN)
 
     return ConstantValue<simd64_t>(argVN);
 }
+
+// Given a simdmask constant value number return its value as a simdmask.
+//
+simdmask_t ValueNumStore::GetConstantSimdMask(ValueNum argVN)
+{
+    assert(IsVNConstant(argVN));
+    assert(TypeOfVN(argVN) == TYP_MASK);
+
+    return ConstantValue<simdmask_t>(argVN);
+}
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -3746,16 +3862,7 @@ ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, Valu
             {
                 assert(typ == TYP_INT);
                 int resultVal = EvalOp<int>(func, arg0Val, arg1Val);
-                // Bin op on a handle results in a handle.
-                ValueNum handleVN = IsVNHandle(arg0VN) ? arg0VN : IsVNHandle(arg1VN) ? arg1VN : NoVN;
-                if (handleVN != NoVN)
-                {
-                    result = VNForHandle(ssize_t(resultVal), GetFoldedArithOpResultHandleFlags(handleVN));
-                }
-                else
-                {
-                    result = VNForIntCon(resultVal);
-                }
+                result        = VNForIntCon(resultVal);
             }
         }
         else if (arg0VNtyp == TYP_LONG)
@@ -3771,17 +3878,8 @@ ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, Valu
             else
             {
                 assert(typ == TYP_LONG);
-                INT64    resultVal = EvalOp<INT64>(func, arg0Val, arg1Val);
-                ValueNum handleVN  = IsVNHandle(arg0VN) ? arg0VN : IsVNHandle(arg1VN) ? arg1VN : NoVN;
-
-                if (handleVN != NoVN)
-                {
-                    result = VNForHandle(ssize_t(resultVal), GetFoldedArithOpResultHandleFlags(handleVN));
-                }
-                else
-                {
-                    result = VNForLongCon(resultVal);
-                }
+                INT64 resultVal = EvalOp<INT64>(func, arg0Val, arg1Val);
+                result          = VNForLongCon(resultVal);
             }
         }
         else // both args are TYP_REF or both args are TYP_BYREF
@@ -4423,6 +4521,11 @@ bool ValueNumStore::VNEvalCanFoldBinaryFunc(var_types type, VNFunc func, ValueNu
             case GT_RSZ:
             case GT_ROL:
             case GT_ROR:
+                if (m_pComp->opts.compReloc && (IsVNHandle(arg0VN) || IsVNHandle(arg1VN)))
+                {
+                    return false;
+                }
+                break;
 
             case GT_EQ:
             case GT_NE:
@@ -4451,6 +4554,11 @@ bool ValueNumStore::VNEvalCanFoldBinaryFunc(var_types type, VNFunc func, ValueNu
             case VNF_ADD_UN_OVF:
             case VNF_SUB_UN_OVF:
             case VNF_MUL_UN_OVF:
+                if (m_pComp->opts.compReloc && (IsVNHandle(arg0VN) || IsVNHandle(arg1VN)))
+                {
+                    return false;
+                }
+                break;
 
             case VNF_Cast:
             case VNF_CastOvf:
@@ -5514,7 +5622,7 @@ ValueNum ValueNumStore::ExtendPtrVN(GenTree* opA, FieldSeq* fldSeq, ssize_t offs
     {
         fldSeq = m_pComp->GetFieldSeqStore()->Append(FieldSeqVNToFieldSeq(funcApp.m_args[1]), fldSeq);
         res    = VNForFunc(TYP_BYREF, VNF_PtrToStatic, funcApp.m_args[0], VNForFieldSeq(fldSeq),
-                        VNForIntPtrCon(ConstantValue<ssize_t>(funcApp.m_args[2]) + offset));
+                           VNForIntPtrCon(ConstantValue<ssize_t>(funcApp.m_args[2]) + offset));
     }
     else if (funcApp.m_func == VNF_PtrToArrElem)
     {
@@ -5557,7 +5665,6 @@ void Compiler::fgValueNumberLocalStore(GenTree*             storeNode,
 
     auto processDef = [=](unsigned defLclNum, unsigned defSsaNum, ssize_t defOffset, unsigned defSize,
                           ValueNumPair defValue) {
-
         LclVarDsc* defVarDsc = lvaGetDesc(defLclNum);
 
         if (defSsaNum != SsaConfig::RESERVED_SSA_NUM)
@@ -6073,39 +6180,6 @@ GenTreeFlags ValueNumStore::GetHandleFlags(ValueNum vn)
     return handleFlags;
 }
 
-GenTreeFlags ValueNumStore::GetFoldedArithOpResultHandleFlags(ValueNum vn)
-{
-    GenTreeFlags flags = GetHandleFlags(vn);
-    assert((flags & GTF_ICON_HDL_MASK) == flags);
-
-    switch (flags)
-    {
-        case GTF_ICON_SCOPE_HDL:
-        case GTF_ICON_CLASS_HDL:
-        case GTF_ICON_METHOD_HDL:
-        case GTF_ICON_FIELD_HDL:
-        case GTF_ICON_TOKEN_HDL:
-        case GTF_ICON_STR_HDL:
-        case GTF_ICON_OBJ_HDL:
-        case GTF_ICON_CONST_PTR:
-        case GTF_ICON_VARG_HDL:
-        case GTF_ICON_PINVKI_HDL:
-        case GTF_ICON_FTN_ADDR:
-        case GTF_ICON_CIDMID_HDL:
-        case GTF_ICON_TLS_HDL:
-        case GTF_ICON_STATIC_BOX_PTR:
-        case GTF_ICON_STATIC_ADDR_PTR:
-            return GTF_ICON_CONST_PTR;
-        case GTF_ICON_STATIC_HDL:
-        case GTF_ICON_GLOBAL_PTR:
-        case GTF_ICON_BBC_PTR:
-            return GTF_ICON_GLOBAL_PTR;
-        default:
-            assert(!"Unexpected handle type");
-            return flags;
-    }
-}
-
 bool ValueNumStore::IsVNHandle(ValueNum vn)
 {
     if (vn == NoVN)
@@ -8504,14 +8578,14 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN
                     case TYP_DOUBLE:
                     {
                         double arg0Val = GetConstantDouble(arg0VN);
-                        res            = ilogb(arg0Val);
+                        res            = FloatingPointUtils::ilogb(arg0Val);
                         break;
                     }
 
                     case TYP_FLOAT:
                     {
                         float arg0Val = GetConstantSingle(arg0VN);
-                        res           = ilogbf(arg0Val);
+                        res           = FloatingPointUtils::ilogb(arg0Val);
                         break;
                     }
 
@@ -8680,14 +8754,6 @@ ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, NamedIntrinsic gtMathF
                     break;
                 }
 
-                case NI_System_Math_FMod:
-                {
-                    assert(typ == TypeOfVN(arg1VN));
-                    double arg1Val = GetConstantDouble(arg1VN);
-                    res            = fmod(arg0Val, arg1Val);
-                    break;
-                }
-
                 case NI_System_Math_Pow:
                 {
                     assert(typ == TypeOfVN(arg1VN));
@@ -8785,14 +8851,6 @@ ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, NamedIntrinsic gtMathF
                     break;
                 }
 
-                case NI_System_Math_FMod:
-                {
-                    assert(typ == TypeOfVN(arg1VN));
-                    float arg1Val = GetConstantSingle(arg1VN);
-                    res           = fmodf(arg0Val, arg1Val);
-                    break;
-                }
-
                 case NI_System_Math_Max:
                 {
                     assert(typ == TypeOfVN(arg1VN));
@@ -8883,10 +8941,6 @@ ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, NamedIntrinsic gtMathF
                 vnf = VNF_Atan2;
                 break;
 
-            case NI_System_Math_FMod:
-                vnf = VNF_FMod;
-                break;
-
             case NI_System_Math_Max:
                 vnf = VNF_Max;
                 break;
@@ -9008,6 +9062,23 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr)
         ssize_t            val         = ConstantValue<ssize_t>(vn);
         const GenTreeFlags handleFlags = GetHandleFlags(vn);
         printf("Hnd const: 0x%p %s", dspPtr(val), GenTree::gtGetHandleKindString(handleFlags));
+        if (!comp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && !comp->opts.IsReadyToRun())
+        {
+            switch (handleFlags & GTF_ICON_HDL_MASK)
+            {
+                case GTF_ICON_CLASS_HDL:
+                    printf(" %s", comp->eeGetClassName((CORINFO_CLASS_HANDLE)val));
+                    break;
+                case GTF_ICON_METHOD_HDL:
+                    printf(" %s", comp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)val));
+                    break;
+                case GTF_ICON_FIELD_HDL:
+                    printf(" %s", comp->eeGetFieldName((CORINFO_FIELD_HANDLE)val, true));
+                    break;
+                default:
+                    break;
+            }
+        }
     }
     else if (IsVNConstant(vn))
     {
@@ -9130,6 +9201,13 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr)
                     cnsVal.u64[6], cnsVal.u64[7]);
                 break;
             }
+
+            case TYP_MASK:
+            {
+                simdmask_t cnsVal = GetConstantSimdMask(vn);
+                printf("SimdMaskCns[0x%08x, 0x%08x]", cnsVal.u32[0], cnsVal.u32[1]);
+                break;
+            }
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -9482,8 +9560,8 @@ const uint8_t ValueNumStore::s_vnfOpAttribs[VNF_COUNT] = {
 
 static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memory.
                                                  GT_NULLCHECK, GT_QMARK, GT_COLON, GT_LOCKADD, GT_XADD, GT_XCHG,
-                                                 GT_CMPXCHG, GT_LCLHEAP, GT_BOX, GT_XORR, GT_XAND, GT_STORE_DYN_BLK,
-                                                 GT_STORE_LCL_VAR, GT_STORE_LCL_FLD, GT_STOREIND, GT_STORE_BLK,
+                                                 GT_CMPXCHG, GT_LCLHEAP, GT_BOX, GT_XORR, GT_XAND, GT_STORE_LCL_VAR,
+                                                 GT_STORE_LCL_FLD, GT_STOREIND, GT_STORE_BLK,
                                                  // These need special semantics:
                                                  GT_COMMA, // == second argument (but with exception(s) from first).
                                                  GT_ARR_ADDR, GT_BOUNDS_CHECK,
@@ -9783,7 +9861,7 @@ class ValueNumberState
             return false;
         }
 
-        if (!predBlock->KindIs(BBJ_COND) || predBlock->TrueTargetIs(predBlock->GetFalseTarget()))
+        if (!predBlock->KindIs(BBJ_COND) || predBlock->TrueEdgeIs(predBlock->GetFalseEdge()))
         {
             return true;
         }
@@ -10570,6 +10648,15 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree)
             tree->gtVNPair.SetBoth(vnStore->VNForSimd64Con(simd64Val));
             break;
         }
+
+        case TYP_MASK:
+        {
+            simdmask_t simdmaskVal;
+            memcpy(&simdmaskVal, &tree->AsVecCon()->gtSimdVal, sizeof(simdmask_t));
+
+            tree->gtVNPair.SetBoth(vnStore->VNForSimdMaskCon(simdmaskVal));
+            break;
+        }
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -10667,21 +10754,21 @@ void Compiler::fgValueNumberStore(GenTree* store)
 {
     assert(store->OperIsStore());
 
-    GenTree* data = store->Data();
+    GenTree* value = store->Data();
 
     // Only normal values are to be stored in SSA defs, VN maps, etc.
-    ValueNumPair dataExcSet;
-    ValueNumPair dataVNPair;
-    vnStore->VNPUnpackExc(data->gtVNPair, &dataVNPair, &dataExcSet);
-    assert(dataVNPair.BothDefined());
+    ValueNumPair valueExcSet;
+    ValueNumPair valueVNPair;
+    vnStore->VNPUnpackExc(value->gtVNPair, &valueVNPair, &valueExcSet);
+    assert(valueVNPair.BothDefined());
 
-    // Is the type being stored different from the type computed by "data"?
-    if (data->TypeGet() != store->TypeGet())
+    // Is the type being stored different from the type computed by "value"?
+    if (value->TypeGet() != store->TypeGet())
     {
         if (store->OperIsInitBlkOp())
         {
             ValueNum initObjVN;
-            if (data->IsIntegralConst(0))
+            if (value->IsIntegralConst(0))
             {
                 initObjVN = vnStore->VNForZeroObj(store->GetLayout(this));
             }
@@ -10690,31 +10777,29 @@ void Compiler::fgValueNumberStore(GenTree* store)
                 initObjVN = vnStore->VNForExpr(compCurBB, TYP_STRUCT);
             }
 
-            dataVNPair.SetBoth(initObjVN);
+            valueVNPair.SetBoth(initObjVN);
         }
-        else if (data->TypeGet() == TYP_REF)
+        else if (value->TypeGet() == TYP_REF)
         {
-            // If we have an unsafe IL assignment of a TYP_REF to a non-ref (typically a TYP_BYREF)
+            // If we have an unsafe IL store of a TYP_REF to a non-ref (typically a TYP_BYREF)
             // then don't propagate this ValueNumber to the lhs, instead create a new unique VN.
-            dataVNPair.SetBoth(vnStore->VNForExpr(compCurBB, store->TypeGet()));
+            valueVNPair.SetBoth(vnStore->VNForExpr(compCurBB, store->TypeGet()));
         }
         else
         {
-            // This means that there is an implicit cast on the rhs value
-            // We will add a cast function to reflect the possible narrowing of the rhs value
-            dataVNPair = vnStore->VNPairForCast(dataVNPair, store->TypeGet(), data->TypeGet());
+            // This means that there is an implicit cast on the value.
+            // We will add a cast function to reflect its possible narrowing.
+            valueVNPair = vnStore->VNPairForCast(valueVNPair, store->TypeGet(), value->TypeGet());
         }
     }
 
-    // Now, record the new VN for an assignment (performing the indicated "state update").
-    // It's safe to use gtEffectiveVal here, because the non-last elements of a comma list on the
-    // LHS will come before the assignment in evaluation order.
+    // Now, record the new VN for the store (performing the indicated "state update").
     switch (store->OperGet())
     {
         case GT_STORE_LCL_VAR:
         {
             GenTreeLclVarCommon* lcl = store->AsLclVarCommon();
-            fgValueNumberLocalStore(store, lcl, 0, lvaLclExactSize(lcl->GetLclNum()), dataVNPair,
+            fgValueNumberLocalStore(store, lcl, 0, lvaLclExactSize(lcl->GetLclNum()), valueVNPair,
                                     /* normalize */ false);
         }
         break;
@@ -10722,7 +10807,7 @@ void Compiler::fgValueNumberStore(GenTree* store)
         case GT_STORE_LCL_FLD:
         {
             GenTreeLclFld* lclFld = store->AsLclFld();
-            fgValueNumberLocalStore(store, lclFld, lclFld->GetLclOffs(), lclFld->GetSize(), dataVNPair);
+            fgValueNumberLocalStore(store, lclFld, lclFld->GetLclOffs(), lclFld->GetSize(), valueVNPair);
         }
         break;
 
@@ -10752,16 +10837,16 @@ void Compiler::fgValueNumberStore(GenTree* store)
                 fldSeq   = vnStore->FieldSeqVNToFieldSeq(funcApp.m_args[1]);
                 offset   = vnStore->ConstantValue<ssize_t>(funcApp.m_args[2]);
 
-                fgValueNumberFieldStore(store, baseAddr, fldSeq, offset, storeSize, dataVNPair.GetLiberal());
+                fgValueNumberFieldStore(store, baseAddr, fldSeq, offset, storeSize, valueVNPair.GetLiberal());
             }
             else if (addrIsVNFunc && (funcApp.m_func == VNF_PtrToArrElem))
             {
-                fgValueNumberArrayElemStore(store, &funcApp, storeSize, dataVNPair.GetLiberal());
+                fgValueNumberArrayElemStore(store, &funcApp, storeSize, valueVNPair.GetLiberal());
             }
             else if (addr->IsFieldAddr(this, &baseAddr, &fldSeq, &offset))
             {
                 assert(fldSeq != nullptr);
-                fgValueNumberFieldStore(store, baseAddr, fldSeq, offset, storeSize, dataVNPair.GetLiberal());
+                fgValueNumberFieldStore(store, baseAddr, fldSeq, offset, storeSize, valueVNPair.GetLiberal());
             }
             else
             {
@@ -10772,8 +10857,8 @@ void Compiler::fgValueNumberStore(GenTree* store)
                 // at byref loads if the current ByrefExposed VN happens to be
                 // VNF_ByrefExposedStore with the same pointer VN, we could propagate the
                 // VN from the RHS to the VN for the load.  This would e.g. allow tracking
-                // values through assignments to out params.  For now, just model this
-                // as an opaque GcHeap/ByrefExposed mutation.
+                // values through stores to out params.  For now, just model this as an
+                // opaque GcHeap/ByrefExposed mutation.
                 fgMutateGcHeap(store DEBUGARG("assign-of-IND"));
             }
         }
@@ -10784,7 +10869,7 @@ void Compiler::fgValueNumberStore(GenTree* store)
     }
 
     // Stores produce no values, and as such are given the "Void" VN.
-    ValueNumPair storeExcSet = dataExcSet;
+    ValueNumPair storeExcSet = valueExcSet;
     if (store->OperIsIndir())
     {
         storeExcSet = vnStore->VNPUnionExcSet(store->AsIndir()->Addr()->gtVNPair, storeExcSet);
@@ -10843,7 +10928,11 @@ void Compiler::fgValueNumberSsaVarDef(GenTreeLclVarCommon* lcl)
 // Return Value:
 //    true if the given tree is a static field address
 //
-static bool GetStaticFieldSeqAndAddress(ValueNumStore* vnStore, GenTree* tree, ssize_t* byteOffset, FieldSeq** pFseq)
+/* static */
+bool Compiler::fgGetStaticFieldSeqAndAddress(ValueNumStore* vnStore,
+                                             GenTree*       tree,
+                                             ssize_t*       byteOffset,
+                                             FieldSeq**     pFseq)
 {
     VNFuncApp funcApp;
     if (vnStore->GetVNFunc(tree->gtVNPair.GetLiberal(), &funcApp) && (funcApp.m_func == VNF_PtrToStatic))
@@ -10858,7 +10947,6 @@ static bool GetStaticFieldSeqAndAddress(ValueNumStore* vnStore, GenTree* tree, s
             return true;
         }
     }
-    ssize_t val = 0;
 
     // Special cases for NativeAOT:
     //   ADD(ICON_STATIC, CNS_INT)                // nonGC-static base
@@ -10876,6 +10964,7 @@ static bool GetStaticFieldSeqAndAddress(ValueNumStore* vnStore, GenTree* tree, s
     }
 
     // Accumulate final offset
+    ssize_t val = 0;
     while (tree->OperIs(GT_ADD))
     {
         GenTree* op1   = tree->gtGetOp1();
@@ -10915,6 +11004,7 @@ static bool GetStaticFieldSeqAndAddress(ValueNumStore* vnStore, GenTree* tree, s
             return true;
         }
     }
+
     return false;
 }
 
@@ -10998,7 +11088,7 @@ bool Compiler::fgValueNumberConstLoad(GenTreeIndir* tree)
     const int             maxElementSize = sizeof(simd_t);
 
     if (!tree->TypeIs(TYP_BYREF, TYP_STRUCT) &&
-        GetStaticFieldSeqAndAddress(vnStore, tree->gtGetOp1(), &byteOffset, &fieldSeq))
+        fgGetStaticFieldSeqAndAddress(vnStore, tree->gtGetOp1(), &byteOffset, &fieldSeq))
     {
         CORINFO_FIELD_HANDLE fieldHandle = fieldSeq->GetFieldHandle();
         if ((fieldHandle != nullptr) && (size > 0) && (size <= maxElementSize) && ((size_t)byteOffset < INT_MAX))
@@ -11153,12 +11243,7 @@ void Compiler::fgValueNumberTree(GenTree* tree)
 
     if (GenTree::OperIsConst(oper))
     {
-        // If this is a struct assignment, with a constant rhs, (i,.e. an initBlk),
-        // it is not useful to value number the constant.
-        if (tree->TypeGet() != TYP_STRUCT)
-        {
-            fgValueNumberTreeConst(tree);
-        }
+        fgValueNumberTreeConst(tree);
     }
     else if (GenTree::OperIsLeaf(oper))
     {
@@ -11233,7 +11318,9 @@ void Compiler::fgValueNumberTree(GenTree* tree)
             break;
 
             case GT_CATCH_ARG:
+            case GT_SWIFT_ERROR:
                 // We know nothing about the value of a caught expression.
+                // We also know nothing about the error register's value post-Swift call.
                 tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
                 break;
 
@@ -11248,7 +11335,7 @@ void Compiler::fgValueNumberTree(GenTree* tree)
             case GT_NOP:
             case GT_JMP:   // Control flow
             case GT_LABEL: // Control flow
-#if !defined(FEATURE_EH_FUNCLETS)
+#if defined(FEATURE_EH_WINDOWS_X86)
             case GT_END_LFIN: // Control flow
 #endif
                 tree->gtVNPair = vnStore->VNPForVoid();
@@ -11395,12 +11482,7 @@ void Compiler::fgValueNumberTree(GenTree* tree)
                 unsigned  loadSize = tree->AsIndir()->Size();
                 VNFuncApp funcApp{VNF_COUNT};
 
-                // TODO-1stClassStructs: delete layout-less "IND(struct)" nodes and the "loadSize == 0" condition.
-                if (loadSize == 0)
-                {
-                    tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, loadType));
-                }
-                else if (fgValueNumberConstLoad(tree->AsIndir()))
+                if (fgValueNumberConstLoad(tree->AsIndir()))
                 {
                     // VN is assigned inside fgValueNumberConstLoad
                 }
@@ -11667,30 +11749,6 @@ void Compiler::fgValueNumberTree(GenTree* tree)
                 break;
 #endif // FEATURE_HW_INTRINSICS
 
-            case GT_STORE_DYN_BLK:
-            {
-                // Conservatively, mutate the heaps - we don't analyze these rare stores.
-                // Likewise, any locals possibly defined by them we mark as address-exposed.
-                fgMutateGcHeap(tree DEBUGARG("dynamic block store"));
-
-                GenTreeStoreDynBlk* store     = tree->AsStoreDynBlk();
-                ValueNumPair        vnpExcSet = ValueNumStore::VNPForEmptyExcSet();
-
-                // Propagate the exceptions...
-                vnpExcSet = vnStore->VNPUnionExcSet(store->Addr()->gtVNPair, vnpExcSet);
-                vnpExcSet = vnStore->VNPUnionExcSet(store->Data()->gtVNPair, vnpExcSet);
-                vnpExcSet = vnStore->VNPUnionExcSet(store->gtDynamicSize->gtVNPair, vnpExcSet);
-
-                // This is a store, it produces no value. Thus we use VNPForVoid().
-                store->gtVNPair = vnStore->VNPWithExc(vnStore->VNPForVoid(), vnpExcSet);
-
-                // Note that we are only adding the exception for the destination address.
-                // Currently, "Data()" is an explicit indirection in case this is a "cpblk".
-                assert(store->Data()->gtEffectiveVal()->OperIsIndir() || store->OperIsInitBlkOp());
-                fgValueNumberAddExceptionSetForIndirection(store, store->Addr());
-                break;
-            }
-
             case GT_CMPXCHG: // Specialop
             {
                 // For CMPXCHG and other intrinsics add an arbitrary side effect on GcHeap/ByrefExposed.
@@ -11968,9 +12026,6 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree)
         // There are some HWINTRINSICS operations that have zero args, i.e.  NI_Vector128_Zero
         if (opCount == 0)
         {
-            // Currently we don't have intrinsics with variable number of args with a parameter-less option.
-            assert(!isVariableNumArgs);
-
             if (encodeResultType)
             {
                 // There are zero arg HWINTRINSICS operations that encode the result type, i.e.  Vector128_AllBitSet
@@ -12138,8 +12193,8 @@ void Compiler::fgValueNumberCastTree(GenTree* tree)
 ValueNum ValueNumStore::VNForCast(ValueNum  srcVN,
                                   var_types castToType,
                                   var_types castFromType,
-                                  bool      srcIsUnsigned,    /* = false */
-                                  bool      hasOverflowCheck) /* = false */
+                                  bool      srcIsUnsigned, /* = false */
+                                  bool      hasOverflowCheck)   /* = false */
 {
 
     if ((castFromType == TYP_I_IMPL) && (castToType == TYP_BYREF) && IsVNHandle(srcVN))
@@ -12184,8 +12239,8 @@ ValueNum ValueNumStore::VNForCast(ValueNum  srcVN,
 ValueNumPair ValueNumStore::VNPairForCast(ValueNumPair srcVNPair,
                                           var_types    castToType,
                                           var_types    castFromType,
-                                          bool         srcIsUnsigned,    /* = false */
-                                          bool         hasOverflowCheck) /* = false */
+                                          bool         srcIsUnsigned, /* = false */
+                                          bool         hasOverflowCheck)      /* = false */
 {
     ValueNum srcLibVN = srcVNPair.GetLiberal();
     ValueNum srcConVN = srcVNPair.GetConservative();
@@ -13086,6 +13141,13 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call)
                     vnStore->VNPairForFunc(TYP_REF, VNF_OverflowExc, vnStore->VNPForVoid()));
                 break;
 
+            case CORINFO_HELP_CHKCASTINTERFACE:
+            case CORINFO_HELP_CHKCASTARRAY:
+            case CORINFO_HELP_CHKCASTCLASS:
+            case CORINFO_HELP_CHKCASTANY:
+                // InvalidCastExc for these is set in VNForCast
+                break;
+
             default:
                 // Setup vnpExc with the information that multiple different exceptions
                 // could be generated by this helper
@@ -13684,7 +13746,6 @@ void Compiler::fgDebugCheckExceptionSets()
 
             ValueNumPair operandsExcSet = vnStore->VNPForEmptyExcSet();
             tree->VisitOperands([&](GenTree* operand) -> GenTree::VisitResult {
-
                 CheckTree(operand, vnStore);
 
                 ValueNumPair operandVNP = operand->gtVNPair.BothDefined() ? operand->gtVNPair : vnStore->VNPForVoid();
@@ -13738,7 +13799,7 @@ void Compiler::JitTestCheckVN()
 
     // First we have to know which nodes in the tree are reachable.
     typedef JitHashTable<GenTree*, JitPtrKeyFuncs<GenTree>, int> NodeToIntMap;
-    NodeToIntMap* reachable = FindReachableNodesInNodeTestData();
+    NodeToIntMap*                                                reachable = FindReachableNodesInNodeTestData();
 
     LabelToVNMap* labelToVN = new (getAllocatorDebugOnly()) LabelToVNMap(getAllocatorDebugOnly());
     VNToLabelMap* vnToLabel = new (getAllocatorDebugOnly()) VNToLabelMap(getAllocatorDebugOnly());
@@ -13873,7 +13934,9 @@ void Compiler::vnPrint(ValueNum vn, unsigned level)
 #endif // DEBUG
 
 // Methods of ValueNumPair.
-ValueNumPair::ValueNumPair() : m_liberal(ValueNumStore::NoVN), m_conservative(ValueNumStore::NoVN)
+ValueNumPair::ValueNumPair()
+    : m_liberal(ValueNumStore::NoVN)
+    , m_conservative(ValueNumStore::NoVN)
 {
 }
 
@@ -13881,3 +13944,68 @@ bool ValueNumPair::BothDefined() const
 {
     return (m_liberal != ValueNumStore::NoVN) && (m_conservative != ValueNumStore::NoVN);
 }
+
+//--------------------------------------------------------------------------------
+// GetObjectType: Try to get a class handle (hopefully, exact) for given object via VN
+//
+// Arguments:
+//    vn         - Value number of the object
+//    pIsExact   - [out] set to true if the class handle is exact
+//    pIsNonNull - [out] set to true if the object is known to be non-null
+//
+// Return Value:
+//    Class handle for the object, or NO_CLASS_HANDLE if not available
+//
+CORINFO_CLASS_HANDLE ValueNumStore::GetObjectType(ValueNum vn, bool* pIsExact, bool* pIsNonNull)
+{
+    *pIsNonNull = false;
+    *pIsExact   = false;
+
+    if (TypeOfVN(vn) != TYP_REF)
+    {
+        // Not an object
+        return NO_CLASS_HANDLE;
+    }
+
+    if (IsVNObjHandle(vn))
+    {
+        // We know exact type for nongc objects, and they can never be null
+        *pIsNonNull   = true;
+        *pIsExact     = true;
+        size_t handle = CoercedConstantValue<size_t>(vn);
+        return m_pComp->info.compCompHnd->getObjectType((CORINFO_OBJECT_HANDLE)handle);
+    }
+
+    VNFuncApp funcApp;
+    if (!GetVNFunc(vn, &funcApp))
+    {
+        // We can't make any assumptions about the object
+        return NO_CLASS_HANDLE;
+    }
+
+    // CastClass/IsInstanceOf/JitNew all have the class handle as the first argument
+    const VNFunc func = funcApp.m_func;
+    if ((func == VNF_CastClass) || (func == VNF_IsInstanceOf) || (func == VNF_JitNew))
+    {
+        ssize_t  clsHandle;
+        ValueNum clsVN = funcApp.m_args[0];
+        if (IsVNTypeHandle(clsVN) && EmbeddedHandleMapLookup(ConstantValue<ssize_t>(clsVN), &clsHandle))
+        {
+            // JitNew returns an exact and non-null obj, castclass and isinst do not have this guarantee.
+            *pIsNonNull = func == VNF_JitNew;
+            *pIsExact   = func == VNF_JitNew;
+            return (CORINFO_CLASS_HANDLE)clsHandle;
+        }
+    }
+
+    // obj.GetType() is guaranteed to return a non-null RuntimeType object
+    if (func == VNF_ObjGetType)
+    {
+        *pIsNonNull = true;
+        // Let's not assume whether RuntimeType is exact or not here (it was not in the past for NAOT)
+        // Callers usually call isExact anyway.
+        return m_pComp->info.compCompHnd->getBuiltinClass(CLASSID_RUNTIME_TYPE);
+    }
+
+    return NO_CLASS_HANDLE;
+}
diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h
index 544bf5d41b8e..1f9171e13cef 100644
--- a/src/coreclr/jit/valuenum.h
+++ b/src/coreclr/jit/valuenum.h
@@ -205,13 +205,6 @@ struct VNFuncApp
     }
 };
 
-// An instance of this struct represents the decoded information of a SIMD type from a value number.
-struct VNSimdTypeInfo
-{
-    unsigned int m_simdSize;
-    CorInfoType  m_simdBaseJitType;
-};
-
 // We use a unique prefix character when printing value numbers in dumps:  i.e.  $1c0
 // This define is used with string concatenation to put this in printf format strings
 #define FMT_VN "$%x"
@@ -245,7 +238,8 @@ class ValueNumStore
     class VNMap : public JitHashTable<fromType, keyfuncs, ValueNum>
     {
     public:
-        VNMap(CompAllocator alloc) : JitHashTable<fromType, keyfuncs, ValueNum>(alloc)
+        VNMap(CompAllocator alloc)
+            : JitHashTable<fromType, keyfuncs, ValueNum>(alloc)
         {
         }
 
@@ -313,7 +307,7 @@ class ValueNumStore
                                                     bool            illegalAsVNFunc,
                                                     GenTreeOperKind kind);
     static constexpr uint8_t GetOpAttribsForFunc(int arity, bool commute, bool knownNonNull, bool sharedStatic);
-    static const uint8_t s_vnfOpAttribs[];
+    static const uint8_t     s_vnfOpAttribs[];
 
     // Returns "true" iff gtOper is a legal value number function.
     // (Requires InitValueNumStoreStatics to have been run.)
@@ -362,18 +356,19 @@ class ValueNumStore
 
 public:
     // Given an constant value number return its value.
-    int GetConstantInt32(ValueNum argVN);
-    INT64 GetConstantInt64(ValueNum argVN);
+    int    GetConstantInt32(ValueNum argVN);
+    INT64  GetConstantInt64(ValueNum argVN);
     double GetConstantDouble(ValueNum argVN);
-    float GetConstantSingle(ValueNum argVN);
+    float  GetConstantSingle(ValueNum argVN);
 
 #if defined(FEATURE_SIMD)
-    simd8_t GetConstantSimd8(ValueNum argVN);
+    simd8_t  GetConstantSimd8(ValueNum argVN);
     simd12_t GetConstantSimd12(ValueNum argVN);
     simd16_t GetConstantSimd16(ValueNum argVN);
 #if defined(TARGET_XARCH)
-    simd32_t GetConstantSimd32(ValueNum argVN);
-    simd64_t GetConstantSimd64(ValueNum argVN);
+    simd32_t   GetConstantSimd32(ValueNum argVN);
+    simd64_t   GetConstantSimd64(ValueNum argVN);
+    simdmask_t GetConstantSimdMask(ValueNum argVN);
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -403,8 +398,6 @@ class ValueNumStore
     // returns true iff vn is known to be a constant int32 that is > 0
     bool IsVNPositiveInt32Constant(ValueNum vn);
 
-    GenTreeFlags GetFoldedArithOpResultHandleFlags(ValueNum vn);
-
 public:
     // Validate that the new initializer for s_vnfOpAttribs matches the old code.
     static void ValidateValueNumStoreStatics();
@@ -459,6 +452,7 @@ class ValueNumStore
 #if defined(TARGET_XARCH)
     ValueNum VNForSimd32Con(simd32_t cnsVal);
     ValueNum VNForSimd64Con(simd64_t cnsVal);
+    ValueNum VNForSimdMaskCon(simdmask_t cnsVal);
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
     ValueNum VNForGenericCon(var_types typ, uint8_t* cnsVal);
@@ -511,6 +505,8 @@ class ValueNumStore
         return nullptr;
     }
 
+    CORINFO_CLASS_HANDLE GetObjectType(ValueNum vn, bool* pIsExact, bool* pIsNonNull);
+
     // And the single constant for an object reference type.
     static ValueNum VNForNull()
     {
@@ -565,7 +561,7 @@ class ValueNumStore
 
     // Create or return the existimg value number representing a singleton exception set
     // for the exception value "x".
-    ValueNum VNExcSetSingleton(ValueNum x);
+    ValueNum     VNExcSetSingleton(ValueNum x);
     ValueNumPair VNPExcSetSingleton(ValueNumPair x);
 
     // Returns true if the current pair of items are in ascending order and they are not duplicates.
@@ -688,6 +684,8 @@ class ValueNumStore
     // Skip all folding checks.
     ValueNum VNForFuncNoFolding(var_types typ, VNFunc func, ValueNum op1VNwx, ValueNum op2VNwx);
 
+    ValueNum VNForCast(VNFunc func, ValueNum castToVN, ValueNum objVN);
+
     ValueNum VNForMapSelect(ValueNumKind vnk, var_types type, ValueNum map, ValueNum index);
 
     ValueNum VNForMapPhysicalSelect(ValueNumKind vnk, var_types type, ValueNum map, unsigned offset, unsigned size);
@@ -817,7 +815,7 @@ class ValueNumStore
         return ValueNumPair(liberalFuncVN, conservativeFuncVN);
     }
 
-    ValueNum VNForExpr(BasicBlock* block, var_types type = TYP_UNKNOWN);
+    ValueNum     VNForExpr(BasicBlock* block, var_types type = TYP_UNKNOWN);
     ValueNumPair VNPairForExpr(BasicBlock* block, var_types type);
 
 // This controls extra tracing of the "evaluation" of "VNF_MapSelect" functions.
@@ -919,7 +917,10 @@ class ValueNumStore
         ValueNum vnIdx;
         ValueNum vnBound;
 
-        UnsignedCompareCheckedBoundInfo() : cmpOper(GT_NONE), vnIdx(NoVN), vnBound(NoVN)
+        UnsignedCompareCheckedBoundInfo()
+            : cmpOper(GT_NONE)
+            , vnIdx(NoVN)
+            , vnBound(NoVN)
         {
         }
     };
@@ -933,7 +934,12 @@ class ValueNumStore
         ValueNum arrOp;
         unsigned cmpOper;
         ValueNum cmpOp;
-        CompareCheckedBoundArithInfo() : vnBound(NoVN), arrOper(GT_NONE), arrOp(NoVN), cmpOper(GT_NONE), cmpOp(NoVN)
+        CompareCheckedBoundArithInfo()
+            : vnBound(NoVN)
+            , arrOper(GT_NONE)
+            , arrOp(NoVN)
+            , cmpOper(GT_NONE)
+            , cmpOp(NoVN)
         {
         }
 #ifdef DEBUG
@@ -961,7 +967,11 @@ class ValueNumStore
         ValueNum cmpOpVN;
         bool     isUnsigned;
 
-        ConstantBoundInfo() : constVal(0), cmpOper(GT_NONE), cmpOpVN(NoVN), isUnsigned(false)
+        ConstantBoundInfo()
+            : constVal(0)
+            , cmpOper(GT_NONE)
+            , cmpOpVN(NoVN)
+            , isUnsigned(false)
         {
         }
 
@@ -1102,7 +1112,8 @@ class ValueNumStore
 
 #ifdef _MSC_VER
 
-                assert(&typeid(T) == &typeid(size_t)); // We represent ref/byref constants as size_t's.
+                assert((&typeid(T) == &typeid(size_t)) ||
+                       (&typeid(T) == &typeid(ssize_t))); // We represent ref/byref constants as size_t/ssize_t
 
 #endif // _MSC_VER
 
@@ -1309,7 +1320,8 @@ class ValueNumStore
         VNFunc   m_func;
         ValueNum m_args[NumArgs];
 
-        VNDefFuncApp() : m_func(VNF_COUNT)
+        VNDefFuncApp()
+            : m_func(VNF_COUNT)
         {
             for (size_t i = 0; i < NumArgs; i++)
             {
@@ -1318,7 +1330,9 @@ class ValueNumStore
         }
 
         template <typename... VNs>
-        VNDefFuncApp(VNFunc func, VNs... vns) : m_func(func), m_args{vns...}
+        VNDefFuncApp(VNFunc func, VNs... vns)
+            : m_func(func)
+            , m_args{vns...}
         {
             static_assert_no_msg(NumArgs == sizeof...(VNs));
         }
@@ -1479,7 +1493,7 @@ class ValueNumStore
     static const int      SmallIntConstMin = -1;
     static const int      SmallIntConstMax = 10;
     static const unsigned SmallIntConstNum = SmallIntConstMax - SmallIntConstMin + 1;
-    static bool IsSmallIntConst(int i)
+    static bool           IsSmallIntConst(int i)
     {
         return SmallIntConstMin <= i && i <= SmallIntConstMax;
     }
@@ -1489,7 +1503,9 @@ class ValueNumStore
     {
         ValueNum      vn;
         ValueNumList* next;
-        ValueNumList(const ValueNum& v, ValueNumList* n = nullptr) : vn(v), next(n)
+        ValueNumList(const ValueNum& v, ValueNumList* n = nullptr)
+            : vn(v)
+            , next(n)
         {
         }
     };
@@ -1520,8 +1536,8 @@ class ValueNumStore
     }
 
     typedef VNMap<VNHandle, VNHandle> HandleToValueNumMap;
-    HandleToValueNumMap* m_handleMap;
-    HandleToValueNumMap* GetHandleMap()
+    HandleToValueNumMap*              m_handleMap;
+    HandleToValueNumMap*              GetHandleMap()
     {
         if (m_handleMap == nullptr)
         {
@@ -1531,10 +1547,10 @@ class ValueNumStore
     }
 
     typedef SmallHashTable<ssize_t, ssize_t> EmbeddedToCompileTimeHandleMap;
-    EmbeddedToCompileTimeHandleMap m_embeddedToCompileTimeHandleMap;
+    EmbeddedToCompileTimeHandleMap           m_embeddedToCompileTimeHandleMap;
 
     typedef SmallHashTable<ValueNum, FieldSeq*> FieldAddressToFieldSeqMap;
-    FieldAddressToFieldSeqMap m_fieldAddressToFieldSeqMap;
+    FieldAddressToFieldSeqMap                   m_fieldAddressToFieldSeqMap;
 
     struct LargePrimitiveKeyFuncsFloat : public JitLargePrimitiveKeyFuncs<float>
     {
@@ -1545,8 +1561,8 @@ class ValueNumStore
     };
 
     typedef VNMap<float, LargePrimitiveKeyFuncsFloat> FloatToValueNumMap;
-    FloatToValueNumMap* m_floatCnsMap;
-    FloatToValueNumMap* GetFloatCnsMap()
+    FloatToValueNumMap*                               m_floatCnsMap;
+    FloatToValueNumMap*                               GetFloatCnsMap()
     {
         if (m_floatCnsMap == nullptr)
         {
@@ -1565,8 +1581,8 @@ class ValueNumStore
     };
 
     typedef VNMap<double, LargePrimitiveKeyFuncsDouble> DoubleToValueNumMap;
-    DoubleToValueNumMap* m_doubleCnsMap;
-    DoubleToValueNumMap* GetDoubleCnsMap()
+    DoubleToValueNumMap*                                m_doubleCnsMap;
+    DoubleToValueNumMap*                                GetDoubleCnsMap()
     {
         if (m_doubleCnsMap == nullptr)
         {
@@ -1606,8 +1622,8 @@ class ValueNumStore
     };
 
     typedef VNMap<simd8_t, Simd8PrimitiveKeyFuncs> Simd8ToValueNumMap;
-    Simd8ToValueNumMap* m_simd8CnsMap;
-    Simd8ToValueNumMap* GetSimd8CnsMap()
+    Simd8ToValueNumMap*                            m_simd8CnsMap;
+    Simd8ToValueNumMap*                            GetSimd8CnsMap()
     {
         if (m_simd8CnsMap == nullptr)
         {
@@ -1636,8 +1652,8 @@ class ValueNumStore
     };
 
     typedef VNMap<simd12_t, Simd12PrimitiveKeyFuncs> Simd12ToValueNumMap;
-    Simd12ToValueNumMap* m_simd12CnsMap;
-    Simd12ToValueNumMap* GetSimd12CnsMap()
+    Simd12ToValueNumMap*                             m_simd12CnsMap;
+    Simd12ToValueNumMap*                             GetSimd12CnsMap()
     {
         if (m_simd12CnsMap == nullptr)
         {
@@ -1667,8 +1683,8 @@ class ValueNumStore
     };
 
     typedef VNMap<simd16_t, Simd16PrimitiveKeyFuncs> Simd16ToValueNumMap;
-    Simd16ToValueNumMap* m_simd16CnsMap;
-    Simd16ToValueNumMap* GetSimd16CnsMap()
+    Simd16ToValueNumMap*                             m_simd16CnsMap;
+    Simd16ToValueNumMap*                             GetSimd16CnsMap()
     {
         if (m_simd16CnsMap == nullptr)
         {
@@ -1703,8 +1719,8 @@ class ValueNumStore
     };
 
     typedef VNMap<simd32_t, Simd32PrimitiveKeyFuncs> Simd32ToValueNumMap;
-    Simd32ToValueNumMap* m_simd32CnsMap;
-    Simd32ToValueNumMap* GetSimd32CnsMap()
+    Simd32ToValueNumMap*                             m_simd32CnsMap;
+    Simd32ToValueNumMap*                             GetSimd32CnsMap()
     {
         if (m_simd32CnsMap == nullptr)
         {
@@ -1746,8 +1762,8 @@ class ValueNumStore
     };
 
     typedef VNMap<simd64_t, Simd64PrimitiveKeyFuncs> Simd64ToValueNumMap;
-    Simd64ToValueNumMap* m_simd64CnsMap;
-    Simd64ToValueNumMap* GetSimd64CnsMap()
+    Simd64ToValueNumMap*                             m_simd64CnsMap;
+    Simd64ToValueNumMap*                             GetSimd64CnsMap()
     {
         if (m_simd64CnsMap == nullptr)
         {
@@ -1755,6 +1771,35 @@ class ValueNumStore
         }
         return m_simd64CnsMap;
     }
+
+    struct SimdMaskPrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simdmask_t>
+    {
+        static bool Equals(simdmask_t x, simdmask_t y)
+        {
+            return x == y;
+        }
+
+        static unsigned GetHashCode(const simdmask_t val)
+        {
+            unsigned hash = 0;
+
+            hash = static_cast<unsigned>(hash ^ val.u32[0]);
+            hash = static_cast<unsigned>(hash ^ val.u32[1]);
+
+            return hash;
+        }
+    };
+
+    typedef VNMap<simdmask_t, SimdMaskPrimitiveKeyFuncs> SimdMaskToValueNumMap;
+    SimdMaskToValueNumMap*                               m_simdMaskCnsMap;
+    SimdMaskToValueNumMap*                               GetSimdMaskCnsMap()
+    {
+        if (m_simdMaskCnsMap == nullptr)
+        {
+            m_simdMaskCnsMap = new (m_alloc) SimdMaskToValueNumMap(m_alloc);
+        }
+        return m_simdMaskCnsMap;
+    }
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -1786,8 +1831,8 @@ class ValueNumStore
     }
 
     typedef VNMap<VNDefFuncApp<1>, VNDefFuncAppKeyFuncs<1>> VNFunc1ToValueNumMap;
-    VNFunc1ToValueNumMap* m_VNFunc1Map;
-    VNFunc1ToValueNumMap* GetVNFunc1Map()
+    VNFunc1ToValueNumMap*                                   m_VNFunc1Map;
+    VNFunc1ToValueNumMap*                                   GetVNFunc1Map()
     {
         if (m_VNFunc1Map == nullptr)
         {
@@ -1797,8 +1842,8 @@ class ValueNumStore
     }
 
     typedef VNMap<VNDefFuncApp<2>, VNDefFuncAppKeyFuncs<2>> VNFunc2ToValueNumMap;
-    VNFunc2ToValueNumMap* m_VNFunc2Map;
-    VNFunc2ToValueNumMap* GetVNFunc2Map()
+    VNFunc2ToValueNumMap*                                   m_VNFunc2Map;
+    VNFunc2ToValueNumMap*                                   GetVNFunc2Map()
     {
         if (m_VNFunc2Map == nullptr)
         {
@@ -1808,8 +1853,8 @@ class ValueNumStore
     }
 
     typedef VNMap<VNDefFuncApp<3>, VNDefFuncAppKeyFuncs<3>> VNFunc3ToValueNumMap;
-    VNFunc3ToValueNumMap* m_VNFunc3Map;
-    VNFunc3ToValueNumMap* GetVNFunc3Map()
+    VNFunc3ToValueNumMap*                                   m_VNFunc3Map;
+    VNFunc3ToValueNumMap*                                   GetVNFunc3Map()
     {
         if (m_VNFunc3Map == nullptr)
         {
@@ -1819,8 +1864,8 @@ class ValueNumStore
     }
 
     typedef VNMap<VNDefFuncApp<4>, VNDefFuncAppKeyFuncs<4>> VNFunc4ToValueNumMap;
-    VNFunc4ToValueNumMap* m_VNFunc4Map;
-    VNFunc4ToValueNumMap* GetVNFunc4Map()
+    VNFunc4ToValueNumMap*                                   m_VNFunc4Map;
+    VNFunc4ToValueNumMap*                                   GetVNFunc4Map()
     {
         if (m_VNFunc4Map == nullptr)
         {
@@ -1831,7 +1876,8 @@ class ValueNumStore
 
     class MapSelectWorkCacheEntry
     {
-        union {
+        union
+        {
             ValueNum* m_memoryDependencies;
             ValueNum  m_inlineMemoryDependencies[sizeof(ValueNum*) / sizeof(ValueNum)];
         };
@@ -1937,6 +1983,13 @@ struct ValueNumStore::VarTypConv<TYP_SIMD64>
     typedef simd64_t Type;
     typedef simd64_t Lang;
 };
+
+template <>
+struct ValueNumStore::VarTypConv<TYP_MASK>
+{
+    typedef simdmask_t Type;
+    typedef simdmask_t Lang;
+};
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
@@ -2013,6 +2066,13 @@ FORCEINLINE simd64_t ValueNumStore::SafeGetConstantValue<simd64_t>(Chunk* c, uns
     assert(c->m_typ == TYP_SIMD64);
     return reinterpret_cast<VarTypConv<TYP_SIMD64>::Lang*>(c->m_defs)[offset];
 }
+
+template <>
+FORCEINLINE simdmask_t ValueNumStore::SafeGetConstantValue<simdmask_t>(Chunk* c, unsigned offset)
+{
+    assert(c->m_typ == TYP_MASK);
+    return reinterpret_cast<VarTypConv<TYP_MASK>::Lang*>(c->m_defs)[offset];
+}
 #endif // TARGET_XARCH
 
 template <>
@@ -2085,6 +2145,20 @@ FORCEINLINE simd64_t ValueNumStore::ConstantValueInternal<simd64_t>(ValueNum vn
 
     return SafeGetConstantValue<simd64_t>(c, offset);
 }
+
+template <>
+FORCEINLINE simdmask_t ValueNumStore::ConstantValueInternal<simdmask_t>(ValueNum vn DEBUGARG(bool coerce))
+{
+    Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+    assert(c->m_attribs == CEA_Const);
+
+    unsigned offset = ChunkOffset(vn);
+
+    assert(c->m_typ == TYP_MASK);
+    assert(!coerce);
+
+    return SafeGetConstantValue<simdmask_t>(c, offset);
+}
 #endif // TARGET_XARCH
 #endif // FEATURE_SIMD
 
diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h
index 72e43c4b8102..74dba754de9a 100644
--- a/src/coreclr/jit/valuenumfuncs.h
+++ b/src/coreclr/jit/valuenumfuncs.h
@@ -83,7 +83,6 @@ ValueNumFuncDef(Cos, 1, false, false, false, false)
 ValueNumFuncDef(Cosh, 1, false, false, false, false)
 ValueNumFuncDef(Exp, 1, false, false, false, false)
 ValueNumFuncDef(Floor, 1, false, false, false, false)
-ValueNumFuncDef(FMod, 2, false, false, false, false)
 ValueNumFuncDef(ILogB, 1, false, false, false, false)
 ValueNumFuncDef(Log, 1, false, false, false, false)
 ValueNumFuncDef(Log2, 1, false, false, false, false)
diff --git a/src/coreclr/jit/valuenumtype.h b/src/coreclr/jit/valuenumtype.h
index 2eb3254e3e18..e41db9726754 100644
--- a/src/coreclr/jit/valuenumtype.h
+++ b/src/coreclr/jit/valuenumtype.h
@@ -115,7 +115,9 @@ struct ValueNumPair
     // Initializes both elements to "NoVN".  Defined in ValueNum.cpp.
     ValueNumPair();
 
-    ValueNumPair(ValueNum lib, ValueNum cons) : m_liberal(lib), m_conservative(cons)
+    ValueNumPair(ValueNum lib, ValueNum cons)
+        : m_liberal(lib)
+        , m_conservative(cons)
     {
     }
 
diff --git a/src/coreclr/jit/varset.h b/src/coreclr/jit/varset.h
index 465ab146cbac..b9e4cab1a0c4 100644
--- a/src/coreclr/jit/varset.h
+++ b/src/coreclr/jit/varset.h
@@ -108,7 +108,7 @@ typedef BitSetOpsWithCounter<VARSET_TP,
                              VarSetOpsRaw::Iter>
     VarSetOps;
 #else
-typedef VarSetOpsRaw       VarSetOps;
+typedef VarSetOpsRaw VarSetOps;
 #endif
 
 #define ALLVARSET_REP BSShortLong
diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h
index 27dd5b332957..642ab1593603 100644
--- a/src/coreclr/jit/vartype.h
+++ b/src/coreclr/jit/vartype.h
@@ -85,13 +85,19 @@ inline bool varTypeIsSIMD(T vt)
 template <class T>
 inline bool varTypeIsMask(T vt)
 {
-#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
+#if defined(FEATURE_MASKED_HW_INTRINSICS)
     return (TypeGet(vt) == TYP_MASK);
-#else // FEATURE_SIMD
+#else // FEATURE_MASKED_HW_INTRINSICS
     return false;
 #endif
 }
 
+template <class T>
+inline bool varTypeIsSIMDOrMask(T vt)
+{
+    return varTypeIsSIMD(vt) || varTypeIsMask(vt);
+}
+
 template <class T>
 inline bool varTypeIsIntegral(T vt)
 {
@@ -219,7 +225,7 @@ inline bool varTypeIsIntOrI(T vt)
 #ifdef TARGET_64BIT
             || (TypeGet(vt) == TYP_I_IMPL)
 #endif // TARGET_64BIT
-                );
+    );
 }
 
 template <class T>
@@ -315,13 +321,13 @@ inline bool varTypeUsesFloatReg(T vt)
 template <class T>
 inline bool varTypeUsesMaskReg(T vt)
 {
-// The technically correct check is:
-//     return varTypeRegister[TypeGet(vt)] == VTR_MASK;
-//
-// However, we only have one type that uses VTR_MASK today
-// and so its quite a bit cheaper to just check that directly
+    // The technically correct check is:
+    //     return varTypeRegister[TypeGet(vt)] == VTR_MASK;
+    //
+    // However, we only have one type that uses VTR_MASK today
+    // and so its quite a bit cheaper to just check that directly
 
-#if defined(FEATURE_SIMD) && defined(TARGET_XARCH)
+#if defined(FEATURE_SIMD) && (defined(TARGET_XARCH) || defined(TARGET_ARM64))
     assert((TypeGet(vt) == TYP_MASK) || (varTypeRegister[TypeGet(vt)] != VTR_MASK));
     return TypeGet(vt) == TYP_MASK;
 #else
diff --git a/src/coreclr/md/ceefilegen/blobfetcher.cpp b/src/coreclr/md/ceefilegen/blobfetcher.cpp
index 7a110eeeeaf5..f08908147de7 100644
--- a/src/coreclr/md/ceefilegen/blobfetcher.cpp
+++ b/src/coreclr/md/ceefilegen/blobfetcher.cpp
@@ -211,7 +211,7 @@ char* CBlobFetcher::MakeNewBlock(unsigned len, unsigned align) {
     pChRet = m_pIndex[m_nIndexUsed].MakeNewBlock(len + pad, 0);
 
     // Did we run out of memory?
-    if (pChRet == NULL &&  m_pIndex[m_nIndexUsed].GetDataLen() == NULL)
+    if (pChRet == NULL &&  m_pIndex[m_nIndexUsed].GetDataLen() == 0)
         return NULL;
 
     if (pChRet == NULL) {
diff --git a/src/coreclr/md/ceefilegen/stdafx.h b/src/coreclr/md/ceefilegen/stdafx.h
index 36f42f95aa52..4026a47f1410 100644
--- a/src/coreclr/md/ceefilegen/stdafx.h
+++ b/src/coreclr/md/ceefilegen/stdafx.h
@@ -17,6 +17,7 @@
 #include <stdlib.h>		// for qsort
 #include <windows.h>
 #include <time.h>
+#include <algorithm>
 
 #include <corerror.h>
 #include <utilcode.h>
@@ -27,3 +28,6 @@
 
 #include "ceegen.h"
 #include "ceesectionstring.h"
+
+using std::min;
+using std::max;
diff --git a/src/coreclr/md/compiler/import.cpp b/src/coreclr/md/compiler/import.cpp
index 9c7d4c5a01b8..060d3261af6a 100644
--- a/src/coreclr/md/compiler/import.cpp
+++ b/src/coreclr/md/compiler/import.cpp
@@ -2172,7 +2172,7 @@ STDMETHODIMP RegMeta::GetUserString(          // S_OK or error.
         memcpy(
             wszString,
             userString.GetDataPointer(),
-            min(userString.GetSize(), cbStringSize));
+            min((ULONG)userString.GetSize(), cbStringSize));
         if (cbStringSize < userString.GetSize())
         {
             if ((wszString != NULL) && (cchStringSize > 0))
diff --git a/src/coreclr/md/compiler/stdafx.h b/src/coreclr/md/compiler/stdafx.h
index 56e29559cafe..b8ae250e008c 100644
--- a/src/coreclr/md/compiler/stdafx.h
+++ b/src/coreclr/md/compiler/stdafx.h
@@ -13,6 +13,7 @@
 
 #include <crtwrap.h>
 #include <winwrap.h>
+#include <algorithm>
 #include <utilcode.h>
 
 #include <cor.h>
@@ -25,4 +26,7 @@
 
 #include "utsem.h"
 
+using std::min;
+using std::max;
+
 #endif  // __STDAFX_H_
diff --git a/src/coreclr/md/enc/rwutil.cpp b/src/coreclr/md/enc/rwutil.cpp
index 69ad55f571c3..a828249fea6b 100644
--- a/src/coreclr/md/enc/rwutil.cpp
+++ b/src/coreclr/md/enc/rwutil.cpp
@@ -230,7 +230,7 @@ HRESULT HENUMInternal::EnumWithCount(
     }
 
     // we can only fill the minimum of what caller asked for or what we have left
-    cTokens = min ( (pEnum->u.m_ulEnd - pEnum->u.m_ulCur), cMax);
+    cTokens = min ( (ULONG)(pEnum->u.m_ulEnd - pEnum->u.m_ulCur), cMax);
 
     if (pEnum->m_EnumType == MDSimpleEnum)
     {
@@ -296,7 +296,7 @@ HRESULT HENUMInternal::EnumWithCount(
     _ASSERTE(! ((pEnum->u.m_ulEnd - pEnum->u.m_ulCur) % 2) );
 
     // we can only fill the minimum of what caller asked for or what we have left
-    cTokens = min ( (pEnum->u.m_ulEnd - pEnum->u.m_ulCur), cMax * 2);
+    cTokens = min ( (ULONG)(pEnum->u.m_ulEnd - pEnum->u.m_ulCur), cMax * 2);
 
     // get the embedded dynamic array
     TOKENLIST       *pdalist = (TOKENLIST *)&(pEnum->m_cursor);
diff --git a/src/coreclr/md/enc/stdafx.h b/src/coreclr/md/enc/stdafx.h
index e1b3962a14e6..10d1cf0f32d6 100644
--- a/src/coreclr/md/enc/stdafx.h
+++ b/src/coreclr/md/enc/stdafx.h
@@ -13,6 +13,7 @@
 
 #include <crtwrap.h>
 #include <winwrap.h>
+#include <algorithm>
 #include <utilcode.h>
 
 #include <cor.h>
@@ -26,4 +27,7 @@
 
 #include "utsem.h"
 
+using std::min;
+using std::max;
+
 #endif  // __STDAFX_H__
diff --git a/src/coreclr/md/runtime/stdafx.h b/src/coreclr/md/runtime/stdafx.h
index aca84b431773..957cbd7e006d 100644
--- a/src/coreclr/md/runtime/stdafx.h
+++ b/src/coreclr/md/runtime/stdafx.h
@@ -13,6 +13,7 @@
 
 #include <crtwrap.h>
 #include <winwrap.h>
+#include <algorithm>
 #include <utilcode.h>
 
 #include <cor.h>
diff --git a/src/coreclr/nativeaot/Bootstrap/main.cpp b/src/coreclr/nativeaot/Bootstrap/main.cpp
index 526dacb9554f..7e2d1951878f 100644
--- a/src/coreclr/nativeaot/Bootstrap/main.cpp
+++ b/src/coreclr/nativeaot/Bootstrap/main.cpp
@@ -103,48 +103,63 @@ extern "C" bool RhRegisterOSModule(void * pModule,
 
 extern "C" void* PalGetModuleHandleFromPointer(void* pointer);
 
+#if defined(HOST_X86) && defined(HOST_WINDOWS)
+#define STRINGIFY(s) #s
+#define MANAGED_RUNTIME_EXPORT_ALTNAME(_method) STRINGIFY(/alternatename:_##_method=_method)
+#define MANAGED_RUNTIME_EXPORT(_name) \
+    __pragma(comment (linker, MANAGED_RUNTIME_EXPORT_ALTNAME(_name))) \
+    extern "C" void __cdecl _name();
+#define MANAGED_RUNTIME_EXPORT_NAME(_name) _name
+#define CDECL __cdecl
+#else
 // The runtime assumes classlib exports have a managed calling convention.
 // For WASM, however, they are exported with the native calling convention
 // by default so we must explicitly use the managed entrypoint here.
 #ifdef HOST_WASM
-#define MANAGED_RUNTIME_EXPORT(name) name##_Managed
-#else
-#define MANAGED_RUNTIME_EXPORT(name) name
+#define MANAGED_RUNTIME_EXPORT(_name) \
+    extern "C" void _name##_Managed();
+#define MANAGED_RUNTIME_EXPORT_NAME(_name) _name##_Managed
+#else // !HOST_WASM
+#define MANAGED_RUNTIME_EXPORT(_name) \
+    extern "C" void _name();
+#define MANAGED_RUNTIME_EXPORT_NAME(_name) _name
+#endif // !HOST_WASM
+#define CDECL
 #endif
 
-extern "C" void MANAGED_RUNTIME_EXPORT(GetRuntimeException)();
-extern "C" void MANAGED_RUNTIME_EXPORT(RuntimeFailFast)();
-extern "C" void MANAGED_RUNTIME_EXPORT(AppendExceptionStackFrame)();
-extern "C" void MANAGED_RUNTIME_EXPORT(GetSystemArrayEEType)();
-extern "C" void MANAGED_RUNTIME_EXPORT(OnFirstChanceException)();
-extern "C" void MANAGED_RUNTIME_EXPORT(OnUnhandledException)();
-extern "C" void MANAGED_RUNTIME_EXPORT(IDynamicCastableIsInterfaceImplemented)();
-extern "C" void MANAGED_RUNTIME_EXPORT(IDynamicCastableGetInterfaceImplementation)();
+MANAGED_RUNTIME_EXPORT(GetRuntimeException)
+MANAGED_RUNTIME_EXPORT(RuntimeFailFast)
+MANAGED_RUNTIME_EXPORT(AppendExceptionStackFrame)
+MANAGED_RUNTIME_EXPORT(GetSystemArrayEEType)
+MANAGED_RUNTIME_EXPORT(OnFirstChanceException)
+MANAGED_RUNTIME_EXPORT(OnUnhandledException)
+MANAGED_RUNTIME_EXPORT(IDynamicCastableIsInterfaceImplemented)
+MANAGED_RUNTIME_EXPORT(IDynamicCastableGetInterfaceImplementation)
 #ifdef FEATURE_OBJCMARSHAL
-extern "C" void ObjectiveCMarshalTryGetTaggedMemory();
-extern "C" void ObjectiveCMarshalGetIsTrackedReferenceCallback();
-extern "C" void ObjectiveCMarshalGetOnEnteredFinalizerQueueCallback();
-extern "C" void ObjectiveCMarshalGetUnhandledExceptionPropagationHandler();
+MANAGED_RUNTIME_EXPORT(ObjectiveCMarshalTryGetTaggedMemory)
+MANAGED_RUNTIME_EXPORT(ObjectiveCMarshalGetIsTrackedReferenceCallback)
+MANAGED_RUNTIME_EXPORT(ObjectiveCMarshalGetOnEnteredFinalizerQueueCallback)
+MANAGED_RUNTIME_EXPORT(ObjectiveCMarshalGetUnhandledExceptionPropagationHandler)
 #endif
 
-typedef void(*pfn)();
+typedef void (CDECL *pfn)();
 
 static const pfn c_classlibFunctions[] = {
-    &MANAGED_RUNTIME_EXPORT(GetRuntimeException),
-    &MANAGED_RUNTIME_EXPORT(RuntimeFailFast),
+    &MANAGED_RUNTIME_EXPORT_NAME(GetRuntimeException),
+    &MANAGED_RUNTIME_EXPORT_NAME(RuntimeFailFast),
     nullptr, // &UnhandledExceptionHandler,
-    &MANAGED_RUNTIME_EXPORT(AppendExceptionStackFrame),
+    &MANAGED_RUNTIME_EXPORT_NAME(AppendExceptionStackFrame),
     nullptr, // &CheckStaticClassConstruction,
-    &MANAGED_RUNTIME_EXPORT(GetSystemArrayEEType),
-    &MANAGED_RUNTIME_EXPORT(OnFirstChanceException),
-    &MANAGED_RUNTIME_EXPORT(OnUnhandledException),
-    &MANAGED_RUNTIME_EXPORT(IDynamicCastableIsInterfaceImplemented),
-    &MANAGED_RUNTIME_EXPORT(IDynamicCastableGetInterfaceImplementation),
+    &MANAGED_RUNTIME_EXPORT_NAME(GetSystemArrayEEType),
+    &MANAGED_RUNTIME_EXPORT_NAME(OnFirstChanceException),
+    &MANAGED_RUNTIME_EXPORT_NAME(OnUnhandledException),
+    &MANAGED_RUNTIME_EXPORT_NAME(IDynamicCastableIsInterfaceImplemented),
+    &MANAGED_RUNTIME_EXPORT_NAME(IDynamicCastableGetInterfaceImplementation),
 #ifdef FEATURE_OBJCMARSHAL
-    &ObjectiveCMarshalTryGetTaggedMemory,
-    &ObjectiveCMarshalGetIsTrackedReferenceCallback,
-    &ObjectiveCMarshalGetOnEnteredFinalizerQueueCallback,
-    &ObjectiveCMarshalGetUnhandledExceptionPropagationHandler,
+    &MANAGED_RUNTIME_EXPORT_NAME(ObjectiveCMarshalTryGetTaggedMemory),
+    &MANAGED_RUNTIME_EXPORT_NAME(ObjectiveCMarshalGetIsTrackedReferenceCallback),
+    &MANAGED_RUNTIME_EXPORT_NAME(ObjectiveCMarshalGetOnEnteredFinalizerQueueCallback),
+    &MANAGED_RUNTIME_EXPORT_NAME(ObjectiveCMarshalGetUnhandledExceptionPropagationHandler),
 #else
     nullptr,
     nullptr,
@@ -241,7 +256,7 @@ int (*g_RuntimeInitializationCallback)() = nullptr;
 #ifndef NATIVEAOT_DLL
 
 #if defined(_WIN32)
-int __cdecl wmain(int argc, wchar_t* argv[])
+int CDECL wmain(int argc, wchar_t* argv[])
 #else
 int main(int argc, char* argv[])
 #endif
diff --git a/src/coreclr/nativeaot/BuildIntegration/Microsoft.DotNet.ILCompiler.SingleEntry.targets b/src/coreclr/nativeaot/BuildIntegration/Microsoft.DotNet.ILCompiler.SingleEntry.targets
index 0af386669aa5..6c1821455fe6 100644
--- a/src/coreclr/nativeaot/BuildIntegration/Microsoft.DotNet.ILCompiler.SingleEntry.targets
+++ b/src/coreclr/nativeaot/BuildIntegration/Microsoft.DotNet.ILCompiler.SingleEntry.targets
@@ -4,9 +4,12 @@
     <!-- Define the name of the runtime specific compiler package to import -->
     <_hostOS>$(NETCoreSdkPortableRuntimeIdentifier.SubString(0, $(NETCoreSdkPortableRuntimeIdentifier.LastIndexOf('-'))))</_hostOS>
 
+    <_targetsNonPortableSdkRid>false</_targetsNonPortableSdkRid>
+    <_targetsNonPortableSdkRid Condition="'$(RuntimeIdentifier)' == '$(NETCoreSdkRuntimeIdentifier)' and '$(RuntimeIdentifier)' != '$(NETCoreSdkPortableRuntimeIdentifier)'">true</_targetsNonPortableSdkRid>
+
     <_originalTargetOS>$(RuntimeIdentifier.SubString(0, $(RuntimeIdentifier.LastIndexOf('-'))))</_originalTargetOS>
-    <_indexOfPeriod>$(_originalTargetOS.IndexOf('.'))</_indexOfPeriod>
-    <_originalTargetOS Condition="'$(_indexOfPeriod)' &gt; -1">$(_originalTargetOS.SubString(0, $(_indexOfPeriod)))</_originalTargetOS>
+    <_originalTargetOS Condition="'$(_targetsNonPortableSdkRid)' == 'true'">$(NETCoreSdkPortableRuntimeIdentifier.SubString(0, $(NETCoreSdkPortableRuntimeIdentifier.LastIndexOf('-'))))</_originalTargetOS>
+    <_originalTargetOS Condition="$(_originalTargetOS.Contains('.'))">$(_originalTargetOS.SubString(0, $(_originalTargetOS.IndexOf('.'))))</_originalTargetOS>
     <_originalTargetOS Condition="$(_originalTargetOS.StartsWith('win'))">win</_originalTargetOS>
 
     <!-- On non-Windows, determine _hostArchitecture from NETCoreSdkPortableRuntimeIdentifier -->
@@ -18,8 +21,10 @@
     <_targetArchitecture>$(RuntimeIdentifier.SubString($([MSBuild]::Add($(RuntimeIdentifier.LastIndexOf('-')), 1))))</_targetArchitecture>
 
     <_hostPackageName>runtime.$(_hostOS)-$(_hostArchitecture).Microsoft.DotNet.ILCompiler</_hostPackageName>
+    <_hostPackageName Condition="'$(_targetsNonPortableSdkRid)' == 'true'">runtime.$(RuntimeIdentifier).Microsoft.DotNet.ILCompiler</_hostPackageName>
     <_hostPackageName Condition="'$(_targetArchitecture)' == 'wasm'">$(_hostPackageName).LLVM</_hostPackageName>
     <_targetPackageName>runtime.$(_originalTargetOS)-$(_targetArchitecture).Microsoft.DotNet.ILCompiler</_targetPackageName>
+    <_targetPackageName Condition="'$(_targetsNonPortableSdkRid)' == 'true'">runtime.$(RuntimeIdentifier).Microsoft.DotNet.ILCompiler</_targetPackageName>
     <_targetPackageName Condition="'$(_targetArchitecture)' == 'wasm'">$(_targetPackageName).LLVM</_targetPackageName>
 
     <!-- Treat linux-musl and linux-bionic etc. as linux -->
@@ -27,6 +32,10 @@
     <_linuxToken>linux-</_linuxToken>
     <_linuxLibcFlavor Condition="$(_targetOS.StartsWith($(_linuxToken)))">$(_targetOS.SubString($(_linuxToken.Length)))</_linuxLibcFlavor>
     <_targetOS Condition="$(_targetOS.StartsWith($(_linuxToken)))">linux</_targetOS>
+
+    <!-- linux-bionic on ARM uses armel (softfp) ABI -->
+    <_targetArchitectureWithAbi>$(_targetArchitecture)</_targetArchitectureWithAbi>
+    <_targetArchitectureWithAbi Condition="'$(_linuxLibcFlavor)' == 'bionic' and '$(_targetArchitecture)' == 'arm'">armel</_targetArchitectureWithAbi>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Publish.targets b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Publish.targets
index 0f18bda40982..0edd9c12335c 100644
--- a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Publish.targets
+++ b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Publish.targets
@@ -27,6 +27,7 @@
     <ItemGroup>
       <_ResolvedCopyLocalPublishAssets Remove="@(_AssembliesToSkipPublish)" />
       <_ResolvedCopyLocalPublishAssets Include="@(_LinkedResolvedAssemblies)" />
+      <_DebugSymbolsIntermediatePath Remove="@(_DebugSymbolsIntermediatePath)" />
     </ItemGroup>
 
     <ItemGroup>
@@ -57,6 +58,8 @@
 
     <Error Condition="'$(PublishTrimmed)' == 'false'" Text="PublishTrimmed is implied by native compilation and cannot be disabled." />
 
+    <Error Condition="'$(DynamicCodeSupport)' == 'true'" Text="DynamicCodeSupport property cannot be set to true with native compilation." />
+
     <!-- Fail with descriptive error message for common unsupported cases. -->
     <Error Condition="'$(DisableUnsupportedError)' != 'true' and '$(OS)' == 'Windows_NT' and '$(_targetOS)' != 'win'"
       Text="Cross-OS native compilation is not supported." />
@@ -64,11 +67,11 @@
     <Error Condition="'$(DisableUnsupportedError)' != 'true' and '$(OS)' != 'Windows_NT' and '$(_targetOS)' == 'win'"
       Text="Cross-OS native compilation is not supported." />
 
-    <Error Condition="'$(DisableUnsupportedError)' != 'true' and '$(_targetArchitecture)' != 'x64' and '$(_targetArchitecture)' != 'arm64' and '$(_targetArchitecture)' != 'arm'"
+    <Error Condition="'$(DisableUnsupportedError)' != 'true' and '$(_targetArchitecture)' != 'x64' and '$(_targetArchitecture)' != 'x86' and '$(_targetArchitecture)' != 'arm64' and '$(_targetArchitecture)' != 'arm'"
       Text="Native compilation does not support targeting $(RuntimeIdentifier) yet." />
 
-    <Error Condition="'$(DisableUnsupportedError)' != 'true' and !('$(_hostArchitecture)' == 'x64' or '$(_hostArchitecture)' == 'arm64' or '$(_hostArchitecture)' == 'arm')"
-      Text="Native compilation can run on x64, arm64 and arm hosts only." />
+    <Error Condition="'$(DisableUnsupportedError)' != 'true' and '$(_hostArchitecture)' != 'x64' and '$(_hostArchitecture)' != 'x86' and '$(_hostArchitecture)' != 'arm64' and '$(_hostArchitecture)' != 'arm'"
+      Text="Native compilation can run on x64, x86, arm64 and arm hosts only." />
 
     <Error Condition="'$(IlcHostPackagePath)' == '' and '$(RuntimePackagePath)' != '' and ('$(_hostArchitecture)' == 'x64' or '$(_hostArchitecture)' == 'arm64')
       and '$(_targetArchitecture)' != 'wasm'"
diff --git a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Unix.targets b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Unix.targets
index 1b38cbd3efa3..d82f02fd7f17 100644
--- a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Unix.targets
+++ b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Unix.targets
@@ -23,7 +23,7 @@ The .NET Foundation licenses this file to you under the MIT license.
     <LinkerFlavor Condition="'$(LinkerFlavor)' == '' and '$(_targetOS)' == 'freebsd'">lld</LinkerFlavor>
     <LinkerFlavor Condition="'$(LinkerFlavor)' == '' and '$(_linuxLibcFlavor)' == 'bionic'">lld</LinkerFlavor>
     <LinkerFlavor Condition="'$(LinkerFlavor)' == '' and '$(_targetOS)' == 'linux'">bfd</LinkerFlavor>
-    <IlcDefaultStackSize Condition="'$(_linuxLibcFlavor)' == 'musl'">1572864</IlcDefaultStackSize>
+    <IlcDefaultStackSize Condition="'$(IlcDefaultStackSize)' == '' and '$(_linuxLibcFlavor)' == 'musl'">1572864</IlcDefaultStackSize>
   </PropertyGroup>
 
   <Target Name="SetupOSSpecificProps" DependsOnTargets="$(IlcDynamicBuildPropertyDependencies)">
@@ -39,15 +39,17 @@ The .NET Foundation licenses this file to you under the MIT license.
       <CrossCompileArch Condition="$(CrossCompileRid.EndsWith('-x64'))">x86_64</CrossCompileArch>
       <CrossCompileArch Condition="$(CrossCompileRid.EndsWith('-arm64')) and '$(_IsApplePlatform)' != 'true'">aarch64</CrossCompileArch>
       <CrossCompileArch Condition="$(CrossCompileRid.EndsWith('-arm64')) and '$(_IsApplePlatform)' == 'true'">arm64</CrossCompileArch>
-      <CrossCompileArch Condition="$(CrossCompileRid.EndsWith('-arm'))">arm</CrossCompileArch>
+      <CrossCompileArch Condition="$(CrossCompileRid.EndsWith('-arm'))">armv7</CrossCompileArch>
 
       <CrossCompileAbi>gnu</CrossCompileAbi>
-      <CrossCompileAbi Condition="$(CrossCompileRid.EndsWith('-arm'))">gnueabihf</CrossCompileAbi>
+      <CrossCompileAbi Condition="$(CrossCompileRid.StartsWith('linux-bionic-'))">android21</CrossCompileAbi>
+      <CrossCompileAbi Condition="$(CrossCompileRid.StartsWith('linux-musl-')) or $(CrossCompileRid.StartsWith('alpine-'))">musl</CrossCompileAbi>
+      <CrossCompileAbi Condition="'$(CrossCompileRid)' == 'linux-arm'">gnueabihf</CrossCompileAbi>
+      <CrossCompileAbi Condition="'$(CrossCompileRid)' == 'linux-bionic-arm'">androideabi21</CrossCompileAbi>
+      <CrossCompileAbi Condition="'$(CrossCompileRid)' == 'linux-musl-arm'">musleabihf</CrossCompileAbi>
 
       <TargetTriple />
       <TargetTriple Condition="'$(CrossCompileArch)' != ''">$(CrossCompileArch)-linux-$(CrossCompileAbi)</TargetTriple>
-      <TargetTriple Condition="'$(CrossCompileArch)' != '' and ($(CrossCompileRid.StartsWith('linux-musl')) or $(CrossCompileRid.StartsWith('alpine')))">$(CrossCompileArch)-alpine-linux-musl</TargetTriple>
-      <TargetTriple Condition="'$(CrossCompileArch)' != '' and $(CrossCompileRid.StartsWith('linux-bionic'))">$(CrossCompileArch)-linux-android21</TargetTriple>
       <TargetTriple Condition="'$(CrossCompileArch)' != '' and ($(CrossCompileRid.StartsWith('freebsd')))">$(CrossCompileArch)-unknown-freebsd12</TargetTriple>
 
       <IlcRPath Condition="'$(IlcRPath)' == '' and '$(_IsApplePlatform)' != 'true'">$ORIGIN</IlcRPath>
@@ -57,6 +59,8 @@ The .NET Foundation licenses this file to you under the MIT license.
       <EventPipeName Condition="'$(EventSourceSupport)' == 'true'">libeventpipe-enabled</EventPipeName>
 
       <LinkStandardCPlusPlusLibrary Condition="'$(LinkStandardCPlusPlusLibrary)' == '' and '$(_IsiOSLikePlatform)' == 'true' and '$(InvariantGlobalization)' != 'true'">true</LinkStandardCPlusPlusLibrary>
+      <VxSortSupportName>libRuntime.VxsortEnabled</VxSortSupportName>
+      <VxSortSupportName Condition="'$(OptimizationPreference)' == 'Size' or '$(IlcDisableVxsort)' == 'true'">libRuntime.VxsortDisabled</VxSortSupportName>
       <StandaloneGCSupportName>libstandalonegc-disabled</StandaloneGCSupportName>
       <StandaloneGCSupportName Condition="'$(IlcStandaloneGCSupport)' == 'true'">libstandalonegc-enabled</StandaloneGCSupportName>
     </PropertyGroup>
@@ -83,6 +87,12 @@ The .NET Foundation licenses this file to you under the MIT license.
       <SharedLibraryInstallName Condition="'$(SharedLibraryInstallName)' == '' and '$(NativeLib)' == 'Shared'">@rpath/$(TargetName)$(NativeBinaryExt)</SharedLibraryInstallName>
     </PropertyGroup>
 
+    <PropertyGroup Condition="'$(_targetOS)' == 'osx'">
+      <AppleMinOSVersion Condition="'$(AppleMinOSVersion)' == ''">11.0</AppleMinOSVersion>
+      <TargetTriple Condition="'$(_targetArchitecture)' == 'x64'">x86_64-apple-macos$(AppleMinOSVersion)</TargetTriple>
+      <TargetTriple Condition="'$(_targetArchitecture)' == 'arm64'">arm64-apple-macos$(AppleMinOSVersion)</TargetTriple>
+    </PropertyGroup>
+
     <Error Condition="'$(_IsiOSLikePlatform)' == 'true' and ('$(_AppleSdkName)' == '' or '$(CrossCompileArch)' == '' or '$(_AppleTripleOS)' == ''  or '$(AppleMinOSVersion)' == '' or '$(_AppleTripleAbi)' == '')"
       Text="One of the required Apple SDK properties is empty and was not properly resolved: _AppleSdkName = '$(_AppleSdkName)' CrossCompileArch = '$(CrossCompileArch)' _AppleTripleOS = '$(_AppleTripleOS)' AppleMinOSVersion = '$(AppleMinOSVersion)' _AppleTripleAbi = '$(_AppleTripleAbi)'" />
 
@@ -109,6 +119,7 @@ The .NET Foundation licenses this file to you under the MIT license.
       <NativeLibrary Condition="'$(NativeLib)' != '' or '$(CustomNativeMain)' == 'true'" Include="$(IlcSdkPath)libbootstrapperdll.o" />
       <NativeLibrary Include="$(IlcSdkPath)$(FullRuntimeName).a" />
       <NativeLibrary Include="$(IlcSdkPath)$(EventPipeName)$(LibFileExt)" />
+      <NativeLibrary Condition="'$(_targetArchitecture)' == 'x64'" Include="$(IlcSdkPath)$(VxSortSupportName)$(LibFileExt)" />
       <NativeLibrary Include="$(IlcSdkPath)$(StandaloneGCSupportName)$(LibFileExt)" />
       <NativeLibrary Condition="'$(LinkStandardCPlusPlusLibrary)' != 'true' and '$(StaticICULinking)' != 'true'" Include="$(IlcSdkPath)libstdc++compat.a" />
     </ItemGroup>
@@ -183,8 +194,7 @@ The .NET Foundation licenses this file to you under the MIT license.
       <LinkerArg Include="@(NativeLibrary)" />
       <LinkerArg Include="--sysroot=&quot;$(SysRoot)&quot;" Condition="'$(SysRoot)' != '' and '$(_IsApplePlatform)' != 'true'" />
       <LinkerArg Include="-isysroot &quot;$(SysRoot)&quot;" Condition="'$(SysRoot)' != '' and '$(_IsApplePlatform)' == 'true'" />
-      <LinkerArg Include="--target=$(TargetTriple)" Condition="'$(_targetOS)' != 'osx' and '$(TargetTriple)' != ''" />
-      <LinkerArg Include="-arch $(CrossCompileArch)" Condition="'$(_IsApplePlatform)' == 'true' and '$(CrossCompileArch)' != ''" />
+      <LinkerArg Include="--target=$(TargetTriple)" Condition="'$(TargetTriple)' != ''" />
       <LinkerArg Include="-g" Condition="$(NativeDebugSymbols) == 'true'" />
       <LinkerArg Include="-Wl,--strip-debug" Condition="$(NativeDebugSymbols) != 'true' and '$(_IsApplePlatform)' != 'true'" />
       <LinkerArg Include="-Wl,-rpath,'$(IlcRPath)'" Condition="'$(StaticExecutable)' != 'true' and !$([MSBuild]::IsOSPlatform('Windows'))" />
@@ -223,6 +233,19 @@ The .NET Foundation licenses this file to you under the MIT license.
       <LinkerArg Include="-Wl,--eh-frame-hdr" Condition="'$(_IsApplePlatform)' != 'true'" />
     </ItemGroup>
 
+    <Exec Command="clang --version" Condition="'$(_IsApplePlatform)' == 'true'" IgnoreExitCode="true" StandardOutputImportance="Low" ConsoleToMSBuild="true">
+      <Output TaskParameter="ExitCode" PropertyName="_XcodeVersionStringExitCode" />
+      <Output TaskParameter="ConsoleOutput" PropertyName="_XcodeVersionString" />
+    </Exec>
+
+    <PropertyGroup Condition="('$(_XcodeVersionStringExitCode)' == '0' or '$(_XcodeVersionStringExitCode)' == '1') and '$(_XcodeVersionString)' != ''">
+      <_XcodeVersion>$([System.Text.RegularExpressions.Regex]::Match($(_XcodeVersionString), '[1-9]\d*'))</_XcodeVersion>
+    </PropertyGroup>
+
+    <ItemGroup Condition="'$(UseLdClassicXCodeLinker)' != 'false' and '$(_IsApplePlatform)' == 'true'">
+      <CustomLinkerArg Condition="'$(_XcodeVersion)' &gt;= '15'" Include="-ld_classic" />
+    </ItemGroup>
+
     <PropertyGroup>
       <_CommandProbe>command -v</_CommandProbe>
       <_CommandProbe Condition="$([MSBuild]::IsOSPlatform('Windows'))">where /Q</_CommandProbe>
diff --git a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets
index dfd432a77805..f9fb32ed669e 100644
--- a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets
+++ b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets
@@ -33,6 +33,7 @@ The .NET Foundation licenses this file to you under the MIT license.
     <LinkerSubsystem Condition="'$(OutputType)' == 'Exe' and '$(LinkerSubsystem)' == ''">CONSOLE</LinkerSubsystem>
     <EventPipeName>eventpipe-disabled</EventPipeName>
     <EventPipeName Condition="'$(EventSourceSupport)' == 'true'">eventpipe-enabled</EventPipeName>
+    <IlcDefaultStackSize Condition="'$(IlcDefaultStackSize)' == ''">1572864</IlcDefaultStackSize>
   </PropertyGroup>
 
   <!-- Ensure that runtime-specific paths have already been set -->
@@ -94,6 +95,8 @@ The .NET Foundation licenses this file to you under the MIT license.
       <LinkerArg Condition="'$(OutputType)' == 'WinExe' or '$(OutputType)' == 'Exe'" Include="/ENTRY:$(EntryPointSymbol) /NOEXP /NOIMPLIB" />
       <LinkerArg Include="/NATVIS:&quot;$(MSBuildThisFileDirectory)NativeAOT.natvis&quot;" />
       <LinkerArg Condition="'$(ControlFlowGuard)' == 'Guard'" Include="/guard:cf" />
+      <LinkerArg Condition="'$(_targetArchitecture)' == 'x86'" Include="/safeseh" />
+      <LinkerArg Condition="'$(OutputType)' == 'WinExe' or '$(OutputType)' == 'Exe'" Include="/STACK:$(IlcDefaultStackSize)" />
       <!-- Do not warn if someone declares UnmanagedCallersOnly with an entrypoint of 'DllGetClassObject' and similar -->
       <LinkerArg Include="/IGNORE:4104" />
     </ItemGroup>
diff --git a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets
index e430ca836167..dbe59baa3b94 100644
--- a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets
+++ b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets
@@ -117,6 +117,7 @@ The .NET Foundation licenses this file to you under the MIT license.
   <PropertyGroup Condition="'$(NativeCodeGen)' == 'llvm'">
     <!-- These LLVM options are public and supported. -->
     <WasmEnableJSBigIntIntegration Condition="'$(WasmEnableJSBigIntIntegration)' == ''">true</WasmEnableJSBigIntIntegration>
+    <WasmEnableNonTrappingFloatToIntConversions Condition="'$(WasmEnableNonTrappingFloatToIntConversions)' == ''">true</WasmEnableNonTrappingFloatToIntConversions>
     <!-- Most browsers support WASM EH, so we enable it by default. This is aligned with the upstream behavior. -->
     <WasmEnableExceptionHandling Condition="'$(WasmEnableExceptionHandling)' == '' and '$(_targetOS)' == 'browser'">true</WasmEnableExceptionHandling>
 
@@ -207,7 +208,7 @@ The .NET Foundation licenses this file to you under the MIT license.
 
   <Target Name="ComputeIlcCompileInputs" DependsOnTargets="$(IlcDynamicBuildPropertyDependencies)" BeforeTargets="Publish">
     <ItemGroup>
-      <ManagedBinary Condition="$(BuildingFrameworkLibrary) != 'true'" Include="$(IntermediateOutputPath)$(TargetName)$(TargetExt)" />
+      <ManagedBinary Condition="$(BuildingFrameworkLibrary) != 'true'" Include="@(IntermediateAssembly)" />
       <IlcCompileInput Include="@(ManagedBinary)" />
       <IlcReference Include="@(DefaultFrameworkAssemblies)" />
     </ItemGroup>
@@ -266,7 +267,7 @@ The .NET Foundation licenses this file to you under the MIT license.
       <IlcArg Include="@(MibcFile->'--mibc:%(Identity)')" />
       <IlcArg Condition="$(IlcGenerateMetadataLog) == 'true'" Include="--metadatalog:$(NativeIntermediateOutputPath)%(ManagedBinary.Filename).metadata.csv" />
       <IlcArg Condition="$(_targetOS) != ''" Include="--targetos:$(_targetOS)" />
-      <IlcArg Condition="$(_targetArchitecture) != ''" Include="--targetarch:$(_targetArchitecture)" />
+      <IlcArg Condition="$(_targetArchitectureWithAbi) != ''" Include="--targetarch:$(_targetArchitectureWithAbi)" />
       <IlcArg Condition="$(IlcMultiModule) == 'true'" Include="--multifile" />
       <IlcArg Condition="$(IlcMultiModule) != 'true' and '$(IlcDehydrate)' != 'false' and '$(ControlFlowGuard)' != 'Guard' and '$(_targetArchitecture)' != 'wasm'" Include="--dehydrate" />
       <IlcArg Condition="$(Optimize) == 'true'" Include="-O" />
@@ -400,12 +401,15 @@ The .NET Foundation licenses this file to you under the MIT license.
 
     <MakeDir Directories="$([System.IO.Path]::GetDirectoryName($(NativeBinary)))" />
 
+    <PropertyGroup>
+      <CompileWasmArgs>-c $(WasmOptimizationSetting)</CompileWasmArgs>
+      <CompileWasmArgs Condition="'$(NativeDebugSymbols)' == 'true'">$(CompileWasmArgs) -g3</CompileWasmArgs>
+      <CompileWasmArgs Condition="'$(WasmEnableNonTrappingFloatToIntConversions)' == 'true'">$(CompileWasmArgs) -mnontrapping-fptoint</CompileWasmArgs>
+      <CompileWasmArgs Condition="'$(IlcLlvmExceptionHandlingModel)' == 'wasm'">$(CompileWasmArgs) -fwasm-exceptions</CompileWasmArgs>
+    </PropertyGroup>
+
     <PropertyGroup Condition="'$(_targetOS)' == 'browser'">
-      <CompileWasmArgs>-c</CompileWasmArgs>
       <CompileWasmArgs Condition="'$(IlcLlvmExceptionHandlingModel)' == 'cpp'">$(CompileWasmArgs) -s DISABLE_EXCEPTION_CATCHING=0</CompileWasmArgs>
-      <CompileWasmArgs Condition="'$(IlcLlvmExceptionHandlingModel)' == 'wasm'">$(CompileWasmArgs) -fwasm-exceptions</CompileWasmArgs>
-      <CompileWasmArgs Condition="'$(NativeDebugSymbols)' == 'true'">$(CompileWasmArgs) -g3</CompileWasmArgs>
-      <CompileWasmArgs>$(CompileWasmArgs) $(WasmOptimizationSetting)</CompileWasmArgs>
 
       <ScriptExt Condition="'$(OS)' == 'Windows_NT'">.bat</ScriptExt>
       <WasmCompilerPath>&quot;$(EMSDK)/upstream/emscripten/emcc$(ScriptExt)&quot;</WasmCompilerPath>
@@ -413,9 +417,7 @@ The .NET Foundation licenses this file to you under the MIT license.
     </PropertyGroup>
 
    <PropertyGroup Condition="'$(_targetOS)' == 'wasi'">
-      <CompileWasmArgs>-fvisibility=default -mllvm -combiner-global-alias-analysis=false -mllvm -disable-lsr --sysroot=&quot;$(WASI_SDK_PATH)/share/wasi-sysroot&quot; -target $(IlcLlvmTarget) -c</CompileWasmArgs>
-      <CompileWasmArgs>$(CompileWasmArgs) $(WasmOptimizationSetting)</CompileWasmArgs>
-      <CompileWasmArgs Condition="'$(NativeDebugSymbols)' == 'true'">$(CompileWasmArgs) -g3</CompileWasmArgs>
+      <CompileWasmArgs>$(CompileWasmArgs) -fvisibility=default -mllvm -combiner-global-alias-analysis=false -mllvm -disable-lsr --sysroot=&quot;$(WASI_SDK_PATH)/share/wasi-sysroot&quot; -target $(IlcLlvmTarget)</CompileWasmArgs>
 
       <ExeExt Condition="'$(OS)' == 'Windows_NT'">.exe</ExeExt>
       <!-- using Emscripten's clang++ because of a crash in wasi-sdk's clang++ (https://github.com/WebAssembly/wasi-sdk/issues/326) -->
@@ -473,7 +475,6 @@ The .NET Foundation licenses this file to you under the MIT license.
     <PropertyGroup>
       <_IgnoreLinkerWarnings>false</_IgnoreLinkerWarnings>
       <_IgnoreLinkerWarnings Condition="'$(_IsApplePlatform)' == 'true'">true</_IgnoreLinkerWarnings>
-      <_StripFlag Condition="'$(_IsApplePlatform)' == 'true' and '$(IlcExportUnmanagedEntrypoints)' == 'true'">-x</_StripFlag> <!-- keep only global symbols -->
     </PropertyGroup>
 
     <!-- write linker script for lld (13+) to retain the __modules section -->
@@ -501,7 +502,7 @@ The .NET Foundation licenses this file to you under the MIT license.
     <Exec Condition="'$(StripSymbols)' == 'true' and '$(_IsApplePlatform)' == 'true' and '$(NativeLib)' != 'Static'"
       Command="
         dsymutil $(DsymUtilOptions) &quot;$(NativeBinary)&quot; &amp;&amp;
-        strip -no_code_signature_warning $(_StripFlag) &quot;$(NativeBinary)&quot;" />
+        strip -no_code_signature_warning -x &quot;$(NativeBinary)&quot;" />
   </Target>
 
   <!-- NativeAOT-LLVM: separate target to reduce conflicts -->
diff --git a/src/coreclr/nativeaot/BuildIntegration/WindowsAPIs.txt b/src/coreclr/nativeaot/BuildIntegration/WindowsAPIs.txt
index bc68e90ec767..d5a50a702a6d 100644
--- a/src/coreclr/nativeaot/BuildIntegration/WindowsAPIs.txt
+++ b/src/coreclr/nativeaot/BuildIntegration/WindowsAPIs.txt
@@ -86,6 +86,7 @@ advapi32!CredWriteDomainCredentialsW
 advapi32!CredWriteW
 advapi32!DeleteAce
 advapi32!DeleteService
+advapi32!DeregisterEventSource
 advapi32!DestroyPrivateObjectSecurity
 advapi32!DuplicateToken
 advapi32!DuplicateTokenEx
@@ -206,6 +207,8 @@ advapi32!RegFlushKey
 advapi32!RegGetKeySecurity
 advapi32!RegGetValueA
 advapi32!RegGetValueW
+advapi32!RegisterEventSourceA
+advapi32!RegisterEventSourceW
 advapi32!RegisterServiceCtrlHandlerA
 advapi32!RegisterServiceCtrlHandlerExA
 advapi32!RegisterServiceCtrlHandlerExW
@@ -239,6 +242,8 @@ advapi32!RegSetValueExA
 advapi32!RegSetValueExW
 advapi32!RegUnLoadKeyA
 advapi32!RegUnLoadKeyW
+advapi32!ReportEventA
+advapi32!ReportEventW
 advapi32!RevertToSelf
 advapi32!SetAclInformation
 advapi32!SetFileSecurityW
diff --git a/src/coreclr/nativeaot/BuildIntegration/findvcvarsall.bat b/src/coreclr/nativeaot/BuildIntegration/findvcvarsall.bat
index efee6316785f..70294486de45 100644
--- a/src/coreclr/nativeaot/BuildIntegration/findvcvarsall.bat
+++ b/src/coreclr/nativeaot/BuildIntegration/findvcvarsall.bat
@@ -33,6 +33,9 @@ IF /I "%~1"=="arm64" (
     SET vcEnvironment=x86_arm64
     IF /I "%procArch%"=="AMD64" SET vcEnvironment=amd64_arm64
 )
+IF /I "%~1"=="x86" (
+    IF /I "%procArch%"=="AMD64" SET vcEnvironment=amd64_x86
+)
 
 CALL "%vsBase%\vc\Auxiliary\Build\vcvarsall.bat" %vcEnvironment% > NUL
 
diff --git a/src/coreclr/nativeaot/CMakeLists.txt b/src/coreclr/nativeaot/CMakeLists.txt
index 99cf2acd49d3..fc5c72a92c5c 100644
--- a/src/coreclr/nativeaot/CMakeLists.txt
+++ b/src/coreclr/nativeaot/CMakeLists.txt
@@ -1,5 +1,6 @@
 if(WIN32)
   add_definitions(-DUNICODE=1)
+  add_compile_definitions(NOMINMAX)
 endif (WIN32)
 
 if(MSVC)
@@ -15,6 +16,7 @@ endif (MSVC)
 
 if(CLR_CMAKE_HOST_UNIX)
   add_compile_options(-fno-exceptions)    # Native AOT runtime doesn't use C++ exception handling
+  add_compile_options(-fno-asynchronous-unwind-tables)
   add_compile_options(-nostdlib)
 
   if(CLR_CMAKE_TARGET_APPLE)
diff --git a/src/coreclr/nativeaot/Common/src/Internal/Runtime/CompilerHelpers/StartupCodeHelpers.cs b/src/coreclr/nativeaot/Common/src/Internal/Runtime/CompilerHelpers/StartupCodeHelpers.cs
index 42e677737be9..6d9c043416f7 100644
--- a/src/coreclr/nativeaot/Common/src/Internal/Runtime/CompilerHelpers/StartupCodeHelpers.cs
+++ b/src/coreclr/nativeaot/Common/src/Internal/Runtime/CompilerHelpers/StartupCodeHelpers.cs
@@ -27,7 +27,7 @@ internal static partial class StartupCodeHelpers
         /// </summary>
         private static IntPtr s_moduleGCStaticsSpines;
 
-        [UnmanagedCallersOnly(EntryPoint = "InitializeModules", CallConvs = new Type[] { typeof(CallConvCdecl) })]
+        [UnmanagedCallersOnly(EntryPoint = "InitializeModules")]
         internal static unsafe void InitializeModules(IntPtr osModule, IntPtr* pModuleHeaders, int count, IntPtr* pClasslibFunctions, int nClasslibFunctions)
         {
             RuntimeImports.RhpRegisterOsModule(osModule);
@@ -206,11 +206,12 @@ private static unsafe object[] InitializeStatics(IntPtr gcStaticRegionStart, int
                 nint blockAddr = MethodTable.SupportsRelativePointers ? (nint)ReadRelPtr32(pBlock) : *pBlock;
                 if ((blockAddr & GCStaticRegionConstants.Uninitialized) == GCStaticRegionConstants.Uninitialized)
                 {
+#pragma warning disable CS8500 // takes address of managed type
                     object? obj = null;
                     RuntimeImports.RhAllocateNewObject(
                         new IntPtr(blockAddr & ~GCStaticRegionConstants.Mask),
                         (uint)GC_ALLOC_FLAGS.GC_ALLOC_PINNED_OBJECT_HEAP,
-                        Unsafe.AsPointer(ref obj));
+                        &obj);
                     if (obj == null)
                     {
                         RuntimeExceptionHelpers.FailFast("Failed allocating GC static bases");
@@ -232,7 +233,8 @@ private static unsafe object[] InitializeStatics(IntPtr gcStaticRegionStart, int
                     Unsafe.Add(ref rawSpineData, currentBase) = obj;
 
                     // Update the base pointer to point to the pinned object
-                    *pBlock = *(IntPtr*)Unsafe.AsPointer(ref obj);
+                    *pBlock = *(IntPtr*)&obj;
+#pragma warning restore CS8500
                 }
 
                 currentBase++;
@@ -290,12 +292,7 @@ private static unsafe void RehydrateData(IntPtr dehydratedData, int length)
                         {
                             // At the time of writing this, 90% of DehydratedDataCommand.Copy cases
                             // would fall into the above specialized cases. 10% fall back to memmove.
-                            memmove(pDest, pCurrent, (nuint)payload);
-
-                            // Not a DllImport - we don't need a GC transition since this is early startup
-                            [MethodImplAttribute(MethodImplOptions.InternalCall)]
-                            [RuntimeImport("*", "memmove")]
-                            static extern unsafe void* memmove(byte* dmem, byte* smem, nuint size);
+                            Unsafe.CopyBlock(pDest, pCurrent, (uint)payload);
                         }
 
                         pDest += payload;
diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CompilerServices/Unsafe.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CompilerServices/Unsafe.cs
index 136872edd4a4..89eedc1638a7 100644
--- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CompilerServices/Unsafe.cs
+++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CompilerServices/Unsafe.cs
@@ -119,5 +119,35 @@ public static T ReadUnaligned<T>(void* source)
         {
             throw new PlatformNotSupportedException();
         }
+
+        /// <summary>
+        /// Reads a value of type <typeparamref name="T"/> from the given location.
+        /// </summary>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static T ReadUnaligned<T>(ref readonly byte source)
+        {
+            throw new PlatformNotSupportedException();
+        }
+
+        /// <summary>
+        /// Copies bytes from the source address to the destination address.
+        /// </summary>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void CopyBlock(void* destination, void* source, uint byteCount)
+        {
+            throw new PlatformNotSupportedException();
+        }
+
+        /// <summary>
+        /// Copies bytes from the source address to the destination address.
+        /// </summary>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void CopyBlock(ref byte destination, ref readonly byte source, uint byteCount)
+        {
+            throw new PlatformNotSupportedException();
+        }
     }
 }
diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/ExceptionHandling.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/ExceptionHandling.cs
index c29e68d1c050..7ce239610a7a 100644
--- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/ExceptionHandling.cs
+++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/ExceptionHandling.cs
@@ -68,6 +68,14 @@ private struct EHEnum
             private IntPtr _dummy; // For alignment
         }
 
+        internal struct MethodRegionInfo
+        {
+            internal byte* _hotStartAddress;
+            internal nuint _hotSize;
+            internal byte* _coldStartAddress;
+            internal nuint _coldSize;
+        }
+
 #pragma warning disable IDE0060
         // This is a fail-fast function used by the runtime as a last resort that will terminate the process with
         // as little effort as possible. No guarantee is made about the semantics of this fail-fast.
@@ -763,7 +771,13 @@ private static void DispatchEx(scoped ref StackFrameIterator frameIter, ref ExIn
 
                 DebugScanCallFrame(exInfo._passNumber, frameIter.ControlPC, frameIter.SP);
 
-                UpdateStackTrace(exceptionObj, exInfo._frameIter.FramePointer, (IntPtr)frameIter.OriginalControlPC, frameIter.SP, ref isFirstRethrowFrame, ref prevFramePtr, ref isFirstFrame, ref exInfo);
+#if !NATIVEAOT
+                // Don't add frames at collided unwind
+                if (startIdx == MaxTryRegionIdx)
+#endif
+                {
+                    UpdateStackTrace(exceptionObj, exInfo._frameIter.FramePointer, (IntPtr)frameIter.OriginalControlPC, frameIter.SP, ref isFirstRethrowFrame, ref prevFramePtr, ref isFirstFrame, ref exInfo);
+                }
 
                 byte* pHandler;
                 if (FindFirstPassHandler(exceptionObj, startIdx, ref frameIter,
@@ -932,6 +946,20 @@ private static void DebugVerifyHandlingFrame(UIntPtr handlingFrameSP)
                 "Handling frame must have a valid stack frame pointer");
         }
 
+        // Caclulate the code offset from the start of the method as if the hot and cold regions were
+        // stored sequentially in memory.
+        private static uint CalculateCodeOffset(byte* pbControlPC, in MethodRegionInfo methodRegionInfo)
+        {
+            uint codeOffset = (uint)(pbControlPC - methodRegionInfo._hotStartAddress);
+            // If the PC is in the cold region, adjust the offset to be relative to the start of the method.
+            if ((methodRegionInfo._coldSize != 0) && (codeOffset >= methodRegionInfo._hotSize))
+            {
+                codeOffset = (uint)(methodRegionInfo._hotSize + (nuint)(pbControlPC - methodRegionInfo._coldStartAddress));
+            }
+
+            return codeOffset;
+        }
+
         private static void UpdateStackTrace(object exceptionObj, UIntPtr curFramePtr, IntPtr ip, UIntPtr sp,
             ref bool isFirstRethrowFrame, ref UIntPtr prevFramePtr, ref bool isFirstFrame, ref ExInfo exInfo)
         {
@@ -958,13 +986,13 @@ private static bool FindFirstPassHandler(object exception, uint idxStart,
             tryRegionIdx = MaxTryRegionIdx;
 
             EHEnum ehEnum;
-            byte* pbMethodStartAddress;
-            if (!InternalCalls.RhpEHEnumInitFromStackFrameIterator(ref frameIter, &pbMethodStartAddress, &ehEnum))
+            MethodRegionInfo methodRegionInfo;
+            if (!InternalCalls.RhpEHEnumInitFromStackFrameIterator(ref frameIter, out methodRegionInfo, &ehEnum))
                 return false;
 
             byte* pbControlPC = frameIter.ControlPC;
 
-            uint codeOffset = (uint)(pbControlPC - pbMethodStartAddress);
+            uint codeOffset = CalculateCodeOffset(pbControlPC, in methodRegionInfo);
 
             uint lastTryStart = 0, lastTryEnd = 0;
 
@@ -1084,27 +1112,21 @@ private static bool ShouldTypedClauseCatchThisException(object exception, Method
 
             return TypeCast.IsInstanceOfException(pClauseType, exception);
 #else
-            bool retry = false;
-            do
+            if (tryUnwrapException && exception is RuntimeWrappedException ex)
             {
-                MethodTable* mt = RuntimeHelpers.GetMethodTable(exception);
-                while (mt != null)
-                {
-                    if (pClauseType == mt)
-                    {
-                        return true;
-                    }
-
-                    mt = mt->ParentMethodTable;
-                }
+                exception = ex.WrappedException;
+            }
 
-                if (tryUnwrapException && exception is RuntimeWrappedException ex)
+            MethodTable* mt = RuntimeHelpers.GetMethodTable(exception);
+            while (mt != null)
+            {
+                if (pClauseType == mt)
                 {
-                    exception = ex.WrappedException;
-                    retry = true;
+                    return true;
                 }
+
+                mt = mt->ParentMethodTable;
             }
-            while (retry);
 
             return false;
 #endif
@@ -1117,13 +1139,14 @@ private static void InvokeSecondPass(ref ExInfo exInfo, uint idxStart)
         private static void InvokeSecondPass(ref ExInfo exInfo, uint idxStart, uint idxLimit)
         {
             EHEnum ehEnum;
-            byte* pbMethodStartAddress;
-            if (!InternalCalls.RhpEHEnumInitFromStackFrameIterator(ref exInfo._frameIter, &pbMethodStartAddress, &ehEnum))
+            MethodRegionInfo methodRegionInfo;
+
+            if (!InternalCalls.RhpEHEnumInitFromStackFrameIterator(ref exInfo._frameIter, out methodRegionInfo, &ehEnum))
                 return;
 
             byte* pbControlPC = exInfo._frameIter.ControlPC;
 
-            uint codeOffset = (uint)(pbControlPC - pbMethodStartAddress);
+            uint codeOffset = CalculateCodeOffset(pbControlPC, in methodRegionInfo);
 
             uint lastTryStart = 0, lastTryEnd = 0;
 
@@ -1196,7 +1219,7 @@ private static void InvokeSecondPass(ref ExInfo exInfo, uint idxStart, uint idxL
 
 #if NATIVEAOT
 #pragma warning disable IDE0060
-        [UnmanagedCallersOnly(EntryPoint = "RhpFailFastForPInvokeExceptionPreemp", CallConvs = new Type[] { typeof(CallConvCdecl) })]
+        [UnmanagedCallersOnly(EntryPoint = "RhpFailFastForPInvokeExceptionPreemp")]
         public static void RhpFailFastForPInvokeExceptionPreemp(IntPtr PInvokeCallsiteReturnAddr, void* pExceptionRecord, void* pContextRecord)
         {
             FailFastViaClasslib(RhFailFastReason.UnhandledExceptionFromPInvoke, null, PInvokeCallsiteReturnAddr);
diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs
index 518bf9a26e54..540c995176ce 100644
--- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs
+++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs
@@ -66,7 +66,6 @@ internal static void RhCollect(int generation, InternalGCCollectionMode mode, bo
         }
 
         [DllImport(Redhawk.BaseName)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         private static extern void RhpCollect(int generation, InternalGCCollectionMode mode, Interop.BOOL lowMemoryP);
 
         [RuntimeExport("RhGetGcTotalMemory")]
@@ -76,7 +75,6 @@ internal static long RhGetGcTotalMemory()
         }
 
         [DllImport(Redhawk.BaseName)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         private static extern long RhpGetGcTotalMemory();
 
         [RuntimeExport("RhStartNoGCRegion")]
@@ -154,16 +152,12 @@ internal static int RhEndNoGCRegion()
 
         [RuntimeImport(Redhawk.BaseName, "RhpAssignRef")]
         [MethodImpl(MethodImplOptions.InternalCall)]
-        internal static extern unsafe void RhpAssignRef(ref object address, object obj);
+        internal static extern unsafe void RhpAssignRef(ref object? address, object? obj);
 
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(Redhawk.BaseName, "RhpGcSafeZeroMemory")]
         internal static extern unsafe ref byte RhpGcSafeZeroMemory(ref byte dmem, nuint size);
 
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        [RuntimeImport(Redhawk.BaseName, "memmove")]
-        internal static extern unsafe void* memmove(byte* dmem, byte* smem, nuint size);
-
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(Redhawk.BaseName, "RhBulkMoveWithWriteBarrier")]
         internal static extern unsafe void RhBulkMoveWithWriteBarrier(ref byte dmem, ref byte smem, nuint size);
@@ -179,7 +173,7 @@ internal static int RhEndNoGCRegion()
 
         [RuntimeImport(Redhawk.BaseName, "RhpEHEnumInitFromStackFrameIterator")]
         [MethodImpl(MethodImplOptions.InternalCall)]
-        internal static extern unsafe bool RhpEHEnumInitFromStackFrameIterator(ref StackFrameIterator pFrameIter, byte** pMethodStartAddress, void* pEHEnum);
+        internal static extern unsafe bool RhpEHEnumInitFromStackFrameIterator(ref StackFrameIterator pFrameIter, out EH.MethodRegionInfo pMethodRegionInfo, void* pEHEnum);
 
         [RuntimeImport(Redhawk.BaseName, "RhpEHEnumNext")]
         [MethodImpl(MethodImplOptions.InternalCall)]
@@ -284,28 +278,23 @@ internal static extern unsafe IntPtr RhpCallPropagateExceptionCallback(
         // Block the current thread until at least one object needs to be finalized (returns true) or
         // memory is low (returns false and the finalizer thread should initiate a garbage collection).
         [DllImport(Redhawk.BaseName)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static extern uint RhpWaitForFinalizerRequest();
 
         // Indicate that the current round of finalizations is complete.
         [DllImport(Redhawk.BaseName)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static extern void RhpSignalFinalizationComplete(uint fCount);
 
         [DllImport(Redhawk.BaseName)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static extern ulong RhpGetTickCount64();
 
         // Enters a no GC region, possibly doing a blocking GC if there is not enough
         // memory available to satisfy the caller's request.
         [DllImport(Redhawk.BaseName)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static extern int RhpStartNoGCRegion(long totalSize, Interop.BOOL hasLohSize, long lohSize, Interop.BOOL disallowFullBlockingGC);
 
         // Exits a no GC region, possibly doing a GC to clean up the garbage that
         // the caller allocated.
         [DllImport(Redhawk.BaseName)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static extern int RhpEndNoGCRegion();
     }
 }
diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/RuntimeExports.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/RuntimeExports.cs
index 8c83e3cc3c83..6fa6c5460dbb 100644
--- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/RuntimeExports.cs
+++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/RuntimeExports.cs
@@ -75,6 +75,9 @@ public static unsafe object RhNewArray(MethodTable* pEEType, int length)
         [RuntimeExport("RhBox")]
         public static unsafe object RhBox(MethodTable* pEEType, ref byte data)
         {
+            // A null can be passed for boxing of a null ref.
+            _ = Unsafe.ReadUnaligned<byte>(ref data);
+
             ref byte dataAdjustedForNullable = ref data;
 
             // Can box non-ByRefLike value types only (which also implies no finalizers).
@@ -114,9 +117,7 @@ public static unsafe object RhBox(MethodTable* pEEType, ref byte data)
             }
             else
             {
-                fixed (byte* pFields = &result.GetRawData())
-                fixed (byte* pData = &dataAdjustedForNullable)
-                    InternalCalls.memmove(pFields, pData, pEEType->ValueTypeSize);
+                Unsafe.CopyBlock(ref result.GetRawData(), ref dataAdjustedForNullable, pEEType->ValueTypeSize);
             }
 
             return result;
@@ -271,9 +272,7 @@ public static unsafe void RhUnbox(object? obj, ref byte data, MethodTable* pUnbo
             else
             {
                 // Copy the boxed fields into the new location.
-                fixed (byte *pData = &data)
-                    fixed (byte* pFields = &fields)
-                        InternalCalls.memmove(pData, pFields, pEEType->ValueTypeSize);
+                Unsafe.CopyBlock(ref data, ref fields, pEEType->ValueTypeSize);
             }
         }
 
@@ -287,7 +286,6 @@ public static unsafe int RhGetCurrentThreadStackTrace(IntPtr[] outputBuffer)
 
 #pragma warning disable SYSLIB1054 // Use DllImport here instead of LibraryImport because this file is used by Test.CoreLib.
         [DllImport(Redhawk.BaseName)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         private static extern unsafe int RhpGetCurrentThreadStackTrace(IntPtr* pOutputBuffer, uint outputBufferLength, UIntPtr addressInCurrentFrame);
 #pragma warning restore SYSLIB1054
 
@@ -303,7 +301,7 @@ public static unsafe int RhGetCurrentThreadStackTrace(IntPtr[] outputBuffer)
         // NOTE: We don't want to allocate the array on behalf of the caller because we don't know which class
         // library's objects the caller understands (we support multiple class libraries with multiple root
         // System.Object types).
-        [UnmanagedCallersOnly(EntryPoint = "RhpCalculateStackTraceWorker", CallConvs = new Type[] { typeof(CallConvCdecl) })]
+        [UnmanagedCallersOnly(EntryPoint = "RhpCalculateStackTraceWorker")]
         private static unsafe int RhpCalculateStackTraceWorker(IntPtr* pOutputBuffer, uint outputBufferLength, UIntPtr addressInCurrentFrame)
         {
             uint nFrames = 0;
diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs
index 3dcf2bae02f2..0161e8c47c15 100644
--- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs
+++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/TypeCast.cs
@@ -22,6 +22,8 @@ namespace System.Runtime
     //
     /////////////////////////////////////////////////////////////////////////////////////////////////////
 
+    [StackTraceHidden]
+    [DebuggerStepThrough]
     [EagerStaticClassConstruction]
     internal static class TypeCast
     {
@@ -146,12 +148,17 @@ internal enum AssignmentVariation
                         interfaceMap++;
                         interfaceCount--;
                     } while (interfaceCount > 0);
+                }
 
-                extra:
-                    if (mt->IsIDynamicInterfaceCastable)
-                    {
-                        goto slowPath;
-                    }
+            extra:
+                // NOTE: this check is outside the `if (interfaceCount != 0)` check because
+                // we could have devirtualized and inlined all uses of IDynamicInterfaceCastable
+                // (and optimized the interface MethodTable away) and still have a type that
+                // is legitimately marked IDynamicInterfaceCastable (without having the MethodTable
+                // of IDynamicInterfaceCastable in the interface list).
+                if (mt->IsIDynamicInterfaceCastable)
+                {
+                    goto slowPath;
                 }
 
                 obj = null;
@@ -737,23 +744,69 @@ public static unsafe void CheckArrayStore(object array, object obj)
             throw array.GetMethodTable()->GetClasslibException(ExceptionIDs.ArrayTypeMismatch);
         }
 
-        internal struct ArrayElement
+        private static unsafe void ThrowIndexOutOfRangeException(object?[] array)
+        {
+            // Throw the index out of range exception defined by the classlib, using the input array's MethodTable*
+            // to find the correct classlib.
+            throw array.GetMethodTable()->GetClasslibException(ExceptionIDs.IndexOutOfRange);
+        }
+
+        private static unsafe void ThrowArrayMismatchException(object?[] array)
         {
-            public object Value;
+            // Throw the array type mismatch exception defined by the classlib, using the input array's MethodTable*
+            // to find the correct classlib.
+            throw array.GetMethodTable()->GetClasslibException(ExceptionIDs.ArrayTypeMismatch);
         }
 
         //
         // Array stelem/ldelema helpers with RyuJIT conventions
         //
+
+        [RuntimeExport("RhpLdelemaRef")]
+        public static unsafe ref object? LdelemaRef(object?[] array, nint index, MethodTable* elementType)
+        {
+            Debug.Assert(array is null || array.GetMethodTable()->IsArray, "first argument must be an array");
+
+#if INPLACE_RUNTIME
+            // This will throw NullReferenceException if obj is null.
+            if ((nuint)index >= (uint)array.Length)
+                ThrowIndexOutOfRangeException(array);
+
+            Debug.Assert(index >= 0);
+            ref object? element = ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index);
+#else
+            if (array is null)
+            {
+                throw elementType->GetClasslibException(ExceptionIDs.NullReference);
+            }
+            if ((nuint)index >= (uint)array.Length)
+            {
+                throw elementType->GetClasslibException(ExceptionIDs.IndexOutOfRange);
+            }
+            ref object rawData = ref Unsafe.As<byte, object>(ref Unsafe.As<RawArrayData>(array).Data);
+            ref object element = ref Unsafe.Add(ref rawData, index);
+#endif
+            MethodTable* arrayElemType = array.GetMethodTable()->RelatedParameterType;
+
+            if (elementType != arrayElemType)
+                ThrowArrayMismatchException(array);
+
+            return ref element;
+        }
+
         [RuntimeExport("RhpStelemRef")]
-        public static unsafe void StelemRef(Array array, nint index, object obj)
+        public static unsafe void StelemRef(object?[] array, nint index, object? obj)
         {
             // This is supported only on arrays
-            Debug.Assert(array.GetMethodTable()->IsArray, "first argument must be an array");
+            Debug.Assert(array is null || array.GetMethodTable()->IsArray, "first argument must be an array");
 
 #if INPLACE_RUNTIME
-            // this will throw appropriate exceptions if array is null or access is out of range.
-            ref object element = ref Unsafe.As<ArrayElement[]>(array)[index].Value;
+            // This will throw NullReferenceException if obj is null.
+            if ((nuint)index >= (uint)array.Length)
+                ThrowIndexOutOfRangeException(array);
+
+            Debug.Assert(index >= 0);
+            ref object? element = ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index);
 #else
             if (array is null)
             {
@@ -796,7 +849,7 @@ public static unsafe void StelemRef(Array array, nint index, object obj)
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        private static unsafe void StelemRef_Helper(ref object element, MethodTable* elementType, object obj)
+        private static unsafe void StelemRef_Helper(ref object? element, MethodTable* elementType, object obj)
         {
             CastResult result = s_castCache.TryGet((nuint)obj.GetMethodTable() + (int)AssignmentVariation.BoxedSource, (nuint)elementType);
             if (result == CastResult.CanCast)
@@ -808,58 +861,17 @@ private static unsafe void StelemRef_Helper(ref object element, MethodTable* ele
             StelemRef_Helper_NoCacheLookup(ref element, elementType, obj);
         }
 
-        private static unsafe void StelemRef_Helper_NoCacheLookup(ref object element, MethodTable* elementType, object obj)
+        private static unsafe void StelemRef_Helper_NoCacheLookup(ref object? element, MethodTable* elementType, object obj)
         {
             object? castedObj = IsInstanceOfAny_NoCacheLookup(elementType, obj);
-            if (castedObj != null)
-            {
-                InternalCalls.RhpAssignRef(ref element, obj);
-                return;
-            }
-
-            // Throw the array type mismatch exception defined by the classlib, using the input array's
-            // MethodTable* to find the correct classlib.
-            throw elementType->GetClasslibException(ExceptionIDs.ArrayTypeMismatch);
-        }
-
-        [RuntimeExport("RhpLdelemaRef")]
-        public static unsafe ref object LdelemaRef(Array array, nint index, IntPtr elementType)
-        {
-            Debug.Assert(array is null || array.GetMethodTable()->IsArray, "first argument must be an array");
-
-#if INPLACE_RUNTIME
-            // this will throw appropriate exceptions if array is null or access is out of range.
-            ref object element = ref Unsafe.As<ArrayElement[]>(array)[index].Value;
-#else
-            if (array is null)
-            {
-                throw ((MethodTable*)elementType)->GetClasslibException(ExceptionIDs.NullReference);
-            }
-            if ((uint)index >= (uint)array.Length)
+            if (castedObj == null)
             {
-                throw ((MethodTable*)elementType)->GetClasslibException(ExceptionIDs.IndexOutOfRange);
+                // Throw the array type mismatch exception defined by the classlib, using the input array's
+                // MethodTable* to find the correct classlib.
+                throw elementType->GetClasslibException(ExceptionIDs.ArrayTypeMismatch);
             }
-            ref object rawData = ref Unsafe.As<byte, object>(ref Unsafe.As<RawArrayData>(array).Data);
-            ref object element = ref Unsafe.Add(ref rawData, index);
-#endif
 
-            MethodTable* elemType = (MethodTable*)elementType;
-            MethodTable* arrayElemType = array.GetMethodTable()->RelatedParameterType;
-
-            if (elemType == arrayElemType)
-            {
-                return ref element;
-            }
-
-            return ref ThrowArrayMismatchException(array);
-        }
-
-        // This weird structure is for parity with CoreCLR - allows potentially to be tailcalled
-        private static unsafe ref object ThrowArrayMismatchException(Array array)
-        {
-            // Throw the array type mismatch exception defined by the classlib, using the input array's MethodTable*
-            // to find the correct classlib.
-            throw array.GetMethodTable()->GetClasslibException(ExceptionIDs.ArrayTypeMismatch);
+            InternalCalls.RhpAssignRef(ref element, obj);
         }
 
         private static unsafe object IsInstanceOfArray(MethodTable* pTargetType, object obj)
diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/__Finalizer.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/__Finalizer.cs
index b88c8033eb8f..a55b7fc040d2 100644
--- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/__Finalizer.cs
+++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/__Finalizer.cs
@@ -16,7 +16,7 @@ namespace System.Runtime
     // We choose this name to avoid clashing with any future public class with the name Finalizer.
     internal static class __Finalizer
     {
-        [UnmanagedCallersOnly(EntryPoint = "ProcessFinalizers", CallConvs = new Type[] { typeof(CallConvCdecl) })]
+        [UnmanagedCallersOnly(EntryPoint = "ProcessFinalizers")]
         public static void ProcessFinalizers()
         {
 #if INPLACE_RUNTIME
diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt
index 26b2979bd951..0ead84294f75 100644
--- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt
+++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt
@@ -131,7 +131,6 @@ else()
   include_directories(unix)
 
   # sal.h, pshpack/poppack.h
-  add_definitions(-DPAL_STDCPP_COMPAT)
   include_directories(../../pal/inc/rt)
 
   include(CheckIncludeFiles)
@@ -194,7 +193,7 @@ if (CLR_CMAKE_TARGET_APPLE)
   )
 endif (CLR_CMAKE_TARGET_APPLE)
 
-if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
+if (CLR_CMAKE_TARGET_ARCH_AMD64)
   set(VXSORT_SOURCES
     ${GC_DIR}/vxsort/isa_detection.cpp
     ${GC_DIR}/vxsort/do_vxsort_avx2.cpp
@@ -210,7 +209,7 @@ if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
   set(DUMMY_VXSORT_SOURCES
     ${GC_DIR}/vxsort/dummy.cpp
   )
-endif (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
+endif (CLR_CMAKE_TARGET_ARCH_AMD64)
 
 if (CLR_CMAKE_TARGET_ARCH_WASM)
   list(REMOVE_ITEM COMMON_RUNTIME_SOURCES FinalizerHelpers.cpp)
@@ -225,12 +224,6 @@ if (CLR_CMAKE_TARGET_ARCH_WASM)
   )
 endif (CLR_CMAKE_TARGET_ARCH_WASM)
 
-if (NOT (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64))
-  list(APPEND RUNTIME_SOURCES_ARCH_ASM
-    ${ARCH_SOURCES_DIR}/Interlocked.${ASM_SUFFIX}
-  )
-endif (NOT (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64))
-
 list(APPEND RUNTIME_SOURCES_ARCH_ASM
   ${ARCH_SOURCES_DIR}/AllocFast.${ASM_SUFFIX}
   ${ARCH_SOURCES_DIR}/ExceptionHandling.${ASM_SUFFIX}
@@ -290,13 +283,18 @@ add_definitions(-D_LIB)
 # there is a problem with undefined symbols when this is set
 # add_definitions(-DSTRESS_HEAP)
 
-if(WIN32)
+if(CLR_CMAKE_TARGET_WIN32)
   set(FEATURE_ETW 1)
   add_definitions(-DFEATURE_ETW)
   add_definitions(-DFEATURE_SUSPEND_REDIRECTION)
-  add_definitions(-DFEATURE_SPECIAL_USER_MODE_APC)
+  if (CLR_CMAKE_TARGET_ARCH_AMD64)
+    add_definitions(-DFEATURE_SPECIAL_USER_MODE_APC)
+  endif()
+  if (CLR_CMAKE_TARGET_ARCH_I386)
+    add_compile_options($<$<COMPILE_LANGUAGE:ASM_MASM>:/safeseh>)
+  endif()
 else()
-  if(NOT CLR_CMAKE_TARGET_MACCATALYST AND NOT CLR_CMAKE_TARGET_IOS AND NOT CLR_CMAKE_TARGET_TVOS AND NOT CLR_CMAKE_TARGET_ARCH_WASM)
+  if(NOT CLR_CMAKE_TARGET_APPLE AND NOT CLR_CMAKE_TARGET_ARCH_WASM)
     add_definitions(-DFEATURE_READONLY_GS_COOKIE)
   endif()
   include(unix/configure.cmake)
diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.cpp b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.cpp
index eaf8bce80c23..2938ee709740 100644
--- a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.cpp
+++ b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.cpp
@@ -445,7 +445,7 @@ bool InitializeInterfaceDispatch()
     return true;
 }
 
-COOP_PINVOKE_HELPER(PCODE, RhpUpdateDispatchCellCache, (InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo))
+FCIMPL4(PCODE, RhpUpdateDispatchCellCache, InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo)
 {
     // Attempt to update the cache with this new mapping (if we have any cache at all, the initial state
     // is none).
@@ -515,8 +515,9 @@ COOP_PINVOKE_HELPER(PCODE, RhpUpdateDispatchCellCache, (InterfaceDispatchCell *
 
     return (PCODE)pTargetCode;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(PCODE, RhpSearchDispatchCellCache, (InterfaceDispatchCell * pCell, MethodTable* pInstanceType))
+FCIMPL2(PCODE, RhpSearchDispatchCellCache, InterfaceDispatchCell * pCell, MethodTable* pInstanceType)
 {
     // This function must be implemented in native code so that we do not take a GC while walking the cache
     InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache();
@@ -530,13 +531,15 @@ COOP_PINVOKE_HELPER(PCODE, RhpSearchDispatchCellCache, (InterfaceDispatchCell *
 
     return (PCODE)nullptr;
 }
+FCIMPLEND
 
 // Given a dispatch cell, get the type and slot associated with it. This function MUST be implemented
 // in cooperative native code, as the m_pCache field on the cell is unsafe to access from managed
 // code due to its use of the GC state as a lock, and as lifetime control
-COOP_PINVOKE_HELPER(void, RhpGetDispatchCellInfo, (InterfaceDispatchCell * pCell, DispatchCellInfo* pDispatchCellInfo))
+FCIMPL2(void, RhpGetDispatchCellInfo, InterfaceDispatchCell * pCell, DispatchCellInfo* pDispatchCellInfo)
 {
     *pDispatchCellInfo = pCell->GetDispatchCellInfo();
 }
+FCIMPLEND
 
 #endif // FEATURE_CACHED_INTERFACE_DISPATCH
diff --git a/src/coreclr/nativeaot/Runtime/CommonMacros.h b/src/coreclr/nativeaot/Runtime/CommonMacros.h
index b3de2c10e335..d864bb79b0bd 100644
--- a/src/coreclr/nativeaot/Runtime/CommonMacros.h
+++ b/src/coreclr/nativeaot/Runtime/CommonMacros.h
@@ -17,8 +17,8 @@
 #define STDCALL
 #endif
 
-#define NATIVEAOT_API
-#define REDHAWK_CALLCONV FASTCALL
+#define F_CALL_CONV FASTCALL
+#define QCALLTYPE
 
 #ifdef _MSC_VER
 
@@ -89,16 +89,6 @@ inline bool IS_ALIGNED(T* val, uintptr_t alignment);
 #define ZeroMemory(_dst, _size) memset((_dst), 0, (_size))
 #endif
 
-//-------------------------------------------------------------------------------------------------
-// min/max
-
-#ifndef min
-#define min(_a, _b) ((_a) < (_b) ? (_a) : (_b))
-#endif
-#ifndef max
-#define max(_a, _b) ((_a) < (_b) ? (_b) : (_a))
-#endif
-
 #endif // !DACCESS_COMPILE
 
 //-------------------------------------------------------------------------------------------------
@@ -177,14 +167,152 @@ typedef uint8_t CODE_LOCATION;
 // Define an unmanaged function called from managed code that needs to execute in co-operative GC mode. (There
 // should be very few of these, most such functions will be simply p/invoked).
 //
-#define COOP_PINVOKE_HELPER(_rettype, _method, _args) EXTERN_C NATIVEAOT_API _rettype REDHAWK_CALLCONV _method _args
-#ifdef HOST_X86
-// We have helpers that act like memcpy and memset from the CRT, so they need to be __cdecl.
-#define COOP_PINVOKE_CDECL_HELPER(_rettype, _method, _args) EXTERN_C NATIVEAOT_API _rettype __cdecl _method _args
+
+#define FCALL_METHOD_NAME(name, ...) name
+#define FCALL_METHOD_NAME_(tuple) FCALL_METHOD_NAME tuple
+
+#if defined(HOST_X86) && defined(HOST_WINDOWS)
+
+// x86 is special case. It supports multiple calling conventions (fastcall, stdcall, cdecl)
+// and mangles the method names according to the calling convention (eg. @fastcall@4, _stdcall@4,
+// _cdecl).
+//
+// The managed code uses its own calling convention that is different from the native call
+// conventions. It's similar to fastcall but pushes the arguments to stack in reverse order.
+// Additionally, for the sake of simplicity we don't decorate the symbol names.
+//
+// In order to bridge the managed calling convention we use two tricks:
+// - The FCIMPL and FCDECL macros reorder parameters for any method with 4 or more arguments.
+// - A linker comment is used to pass the "/alternatename:foo=@foo@4" switch to allow the
+//   symbols to be resolved to the fastcall decorated name.
+
+#define FCALL_ARGHELPER_NAME(_0, _1, _2, _3, _4, _5, NAME, ...) NAME
+#define FCALL_ARGHELPER_NAME_(tuple) FCALL_ARGHELPER_NAME tuple
+
+#define FCALL_ARGHELPER0(dummy) ()
+#define FCALL_ARGHELPER1(dummy, a) (a)
+#define FCALL_ARGHELPER2(dummy, a, b) (a, b)
+#define FCALL_ARGHELPER3(dummy, a, b, c) (a, b, c)
+#define FCALL_ARGHELPER4(dummy, a, b, c, d) (a, b, d, c)
+#define FCALL_ARGHELPER5(dummy, a, b, c, d, e) (a, b, e, d, c)
+
+#define FCALL_STRINGIFY(s) #s
+#define FCALL_XSTRINGIFY(s) FCALL_STRINGIFY(s)
+
+#define FCALL_METHOD_ARGS(...) FCALL_ARGHELPER_NAME_((__VA_ARGS__, FCALL_ARGHELPER5, FCALL_ARGHELPER4, FCALL_ARGHELPER3, FCALL_ARGHELPER2, FCALL_ARGHELPER1, FCALL_ARGHELPER0)) (__VA_ARGS__)
+#define FCALL_METHOD_ARGS_(tuple) FCALL_METHOD_ARGS tuple
+
+#define FCALL_ARGHELPER_STACKSIZE(...) FCALL_ARGHELPER_NAME_((__VA_ARGS__, 20, 16, 12, 8, 4, 0))
+#define FCALL_IMPL_ALTNAME(_method, _argSize) FCALL_XSTRINGIFY(/alternatename:_method=@_method@_argSize)
+#define FCALL_DECL_ALTNAME(_method, _argSize) FCALL_XSTRINGIFY(/alternatename:@_method@_argSize=_method)
+#define FCDECL_RENAME(_rettype, ...) \
+    _Pragma(FCALL_XSTRINGIFY(comment (linker, FCALL_DECL_ALTNAME(FCALL_METHOD_NAME_((__VA_ARGS__)), FCALL_ARGHELPER_STACKSIZE(__VA_ARGS__)))))
+#define FCIMPL_RENAME(_rettype, ...) \
+    _Pragma(FCALL_XSTRINGIFY(comment (linker, FCALL_IMPL_ALTNAME(FCALL_METHOD_NAME_((__VA_ARGS__)), FCALL_ARGHELPER_STACKSIZE(__VA_ARGS__)))))
+#define FCIMPL_RENAME_ARGSIZE(_rettype, _method, _argSize) \
+    _Pragma(FCALL_XSTRINGIFY(comment (linker, FCALL_XSTRINGIFY(/alternatename:_method=@_method##_FCall@_argSize))))
+
+#define FCIMPL1_F(_rettype, _method, a) \
+    FCIMPL_RENAME_ARGSIZE(_rettype, _method, 4) \
+    EXTERN_C _rettype F_CALL_CONV _method##_FCall (a) \
+    {
+#define FCIMPL1_D(_rettype, _method, a) \
+    FCIMPL_RENAME_ARGSIZE(_rettype, _method, 8) \
+    EXTERN_C _rettype F_CALL_CONV _method##_FCall (a) \
+    {
+#define FCIMPL1_L FCIMPL1_D
+#define FCIMPL2_FF(_rettype, _method, a, b) \
+    FCIMPL_RENAME_ARGSIZE(_rettype, _method, 8) \
+    EXTERN_C _rettype F_CALL_CONV _method##_FCall (b, a) \
+    {
+#define FCIMPL2_DD(_rettype, _method, a, b) \
+    FCIMPL_RENAME_ARGSIZE(_rettype, _method, 16) \
+    EXTERN_C _rettype F_CALL_CONV _method##_FCall (b, a) \
+    {
+#define FCIMPL2_FI(_rettype, _method, a, b) \
+    FCIMPL_RENAME_ARGSIZE(_rettype, _method, 8) \
+    EXTERN_C _rettype F_CALL_CONV _method##_FCall (a, b) \
+    {
+#define FCIMPL2_DI(_rettype, _method, a, b) \
+    FCIMPL_RENAME_ARGSIZE(_rettype, _method, 12) \
+    EXTERN_C _rettype F_CALL_CONV _method##_FCall (a, b) \
+    {
+#define FCIMPL3_FFF(_rettype, _method, a, b, c) \
+    FCIMPL_RENAME_ARGSIZE(_rettype, _method, 12) \
+    EXTERN_C _rettype F_CALL_CONV _method##_FCall (c, b, a) \
+    {
+#define FCIMPL3_DDD(_rettype, _method, a, b, c) \
+    FCIMPL_RENAME_ARGSIZE(_rettype, _method, 24) \
+    EXTERN_C _rettype F_CALL_CONV _method##_FCall (c, b, a) \
+    {
+#define FCIMPL3_ILL(_rettype, _method, a, b, c) \
+    FCIMPL_RENAME_ARGSIZE(_rettype, _method, 20) \
+    EXTERN_C _rettype F_CALL_CONV _method##_FCall (a, c, b) \
+    {
+
 #else
-#define COOP_PINVOKE_CDECL_HELPER COOP_PINVOKE_HELPER
+
+#define FCDECL_RENAME(_rettype, ...)
+#define FCIMPL_RENAME(_rettype, ...)
+
+#define FCALL_METHOD_ARGS(dummy, ...) (__VA_ARGS__)
+#define FCALL_METHOD_ARGS_(tuple) FCALL_METHOD_ARGS tuple
+
+#define FCIMPL1_F(_rettype, _method, a) \
+    EXTERN_C _rettype F_CALL_CONV _method (a) \
+    {
+#define FCIMPL1_D(_rettype, _method, a) \
+    EXTERN_C _rettype F_CALL_CONV _method (a) \
+    {
+#define FCIMPL1_L FCIMPL1_D
+#define FCIMPL2_FF(_rettype, _method, a, b) \
+    EXTERN_C _rettype F_CALL_CONV _method (a, b) \
+    {
+#define FCIMPL2_DD(_rettype, _method, a, b) \
+    EXTERN_C _rettype F_CALL_CONV _method (a, b) \
+    {
+#define FCIMPL2_FI(_rettype, _method, a, b) \
+    EXTERN_C _rettype F_CALL_CONV _method (a, b) \
+    {
+#define FCIMPL2_DI(_rettype, _method, a, b) \
+    EXTERN_C _rettype F_CALL_CONV _method (a, b) \
+    {
+#define FCIMPL3_FFF(_rettype, _method, a, b, c) \
+    EXTERN_C _rettype F_CALL_CONV _method (a, b, c) \
+    {
+#define FCIMPL3_DDD(_rettype, _method, a, b, c) \
+    EXTERN_C _rettype F_CALL_CONV _method (a, b, c) \
+    {
+#define FCIMPL3_ILL(_rettype, _method, a, b, c) \
+    EXTERN_C _rettype F_CALL_CONV _method (a, b, c) \
+    {
+
 #endif
 
+#define FCDECL_(_rettype, ...) \
+    FCDECL_RENAME(_rettype, __VA_ARGS__) \
+    EXTERN_C _rettype F_CALL_CONV FCALL_METHOD_NAME_((__VA_ARGS__)) FCALL_METHOD_ARGS_((__VA_ARGS__))
+#define FCDECL0(_rettype, _method) FCDECL_(_rettype, _method)
+#define FCDECL1(_rettype, _method, a) FCDECL_(_rettype, _method, a)
+#define FCDECL2(_rettype, _method, a, b) FCDECL_(_rettype, _method, a, b)
+#define FCDECL3(_rettype, _method, a, b, c) FCDECL_(_rettype, _method, a, b, c)
+#define FCDECL4(_rettype, _method, a, b, c, d) FCDECL_(_rettype, _method, a, b, c, d)
+#define FCDECL5(_rettype, _method, a, b, c, d, e) FCDECL_(_rettype, _method, a, b, c, d, e)
+
+#define FCIMPL_(_rettype, ...) \
+    FCIMPL_RENAME(_rettype, __VA_ARGS__) \
+    EXTERN_C _rettype F_CALL_CONV FCALL_METHOD_NAME_((__VA_ARGS__)) FCALL_METHOD_ARGS_((__VA_ARGS__)) \
+    {
+#define FCIMPL0(_rettype, _method) FCIMPL_(_rettype, _method)
+#define FCIMPL1(_rettype, _method, a) FCIMPL_(_rettype, _method, a)
+#define FCIMPL2(_rettype, _method, a, b) FCIMPL_(_rettype, _method, a, b)
+#define FCIMPL3(_rettype, _method, a, b, c) FCIMPL_(_rettype, _method, a, b, c)
+#define FCIMPL4(_rettype, _method, a, b, c, d) FCIMPL_(_rettype, _method, a, b, c, d)
+#define FCIMPL5(_rettype, _method, a, b, c, d, e) FCIMPL_(_rettype, _method, a, b, c, d, e)
+
+#define FCIMPLEND \
+    }
+
 typedef bool CLR_BOOL;
 
 #if defined(TARGET_X86) || defined(TARGET_AMD64)
diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp
index 28cb9e617f09..325128c4e01f 100644
--- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp
@@ -29,43 +29,60 @@
 #include "MethodTable.inl"
 #include "CommonMacros.inl"
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhpEHEnumInitFromStackFrameIterator, (
-    StackFrameIterator* pFrameIter, void ** pMethodStartAddressOut, EHEnum* pEHEnum))
+struct MethodRegionInfo
+{
+    void* hotStartAddress;
+    size_t hotSize;
+    void* coldStartAddress;
+    size_t coldSize;
+};
+
+FCIMPL3(FC_BOOL_RET, RhpEHEnumInitFromStackFrameIterator,
+    StackFrameIterator* pFrameIter, MethodRegionInfo* pMethodRegionInfoOut, EHEnum* pEHEnum)
 {
     ICodeManager * pCodeManager = pFrameIter->GetCodeManager();
     pEHEnum->m_pCodeManager = pCodeManager;
 
-    FC_RETURN_BOOL(pCodeManager->EHEnumInit(pFrameIter->GetMethodInfo(), pMethodStartAddressOut, &pEHEnum->m_state));
+    pMethodRegionInfoOut->hotSize = 0; // unknown
+    pMethodRegionInfoOut->coldStartAddress = nullptr;
+    pMethodRegionInfoOut->coldSize = 0;
+
+    FC_RETURN_BOOL(pCodeManager->EHEnumInit(pFrameIter->GetMethodInfo(), &pMethodRegionInfoOut->hotStartAddress, &pEHEnum->m_state));
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhpEHEnumNext, (EHEnum* pEHEnum, EHClause* pEHClause))
+FCIMPL2(FC_BOOL_RET, RhpEHEnumNext, EHEnum* pEHEnum, EHClause* pEHClause)
 {
     FC_RETURN_BOOL(pEHEnum->m_pCodeManager->EHEnumNext(&pEHEnum->m_state, pEHClause));
 }
+FCIMPLEND
 
 // Unmanaged helper to locate one of two classlib-provided functions that the runtime needs to
 // implement throwing of exceptions out of Rtm, and fail-fast. This may return NULL if the classlib
 // found via the provided address does not have the necessary exports.
-COOP_PINVOKE_HELPER(void *, RhpGetClasslibFunctionFromCodeAddress, (void * address, ClasslibFunctionId functionId))
+FCIMPL2(void *, RhpGetClasslibFunctionFromCodeAddress, void * address, ClasslibFunctionId functionId)
 {
     return GetRuntimeInstance()->GetClasslibFunctionFromCodeAddress(address, functionId);
 }
+FCIMPLEND
 
 // Unmanaged helper to locate one of two classlib-provided functions that the runtime needs to
 // implement throwing of exceptions out of Rtm, and fail-fast. This may return NULL if the classlib
 // found via the provided address does not have the necessary exports.
-COOP_PINVOKE_HELPER(void *, RhpGetClasslibFunctionFromEEType, (MethodTable * pEEType, ClasslibFunctionId functionId))
+FCIMPL2(void *, RhpGetClasslibFunctionFromEEType, MethodTable * pEEType, ClasslibFunctionId functionId)
 {
     return pEEType->GetTypeManagerPtr()->AsTypeManager()->GetClasslibFunction(functionId);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpValidateExInfoStack, ())
+FCIMPL0(void, RhpValidateExInfoStack)
 {
     Thread * pThisThread = ThreadStore::GetCurrentThread();
     pThisThread->ValidateExInfoStack();
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpClearThreadDoNotTriggerGC, ())
+FCIMPL0(void, RhpClearThreadDoNotTriggerGC)
 {
     Thread * pThisThread = ThreadStore::GetCurrentThread();
 
@@ -74,8 +91,9 @@ COOP_PINVOKE_HELPER(void, RhpClearThreadDoNotTriggerGC, ())
 
     pThisThread->ClearDoNotTriggerGc();
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpSetThreadDoNotTriggerGC, ())
+FCIMPL0(void, RhpSetThreadDoNotTriggerGC)
 {
     Thread * pThisThread = ThreadStore::GetCurrentThread();
 
@@ -84,13 +102,15 @@ COOP_PINVOKE_HELPER(void, RhpSetThreadDoNotTriggerGC, ())
 
     pThisThread->SetDoNotTriggerGc();
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int32_t, RhGetModuleFileName, (HANDLE moduleHandle, _Out_ const TCHAR** pModuleNameOut))
+FCIMPL2(int32_t, RhGetModuleFileName, HANDLE moduleHandle, _Out_ const TCHAR** pModuleNameOut)
 {
     return PalGetModuleFileName(pModuleNameOut, moduleHandle);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpCopyContextFromExInfo, (void * pOSContext, int32_t cbOSContext, PAL_LIMITED_CONTEXT * pPalContext))
+FCIMPL3(void, RhpCopyContextFromExInfo, void * pOSContext, int32_t cbOSContext, PAL_LIMITED_CONTEXT * pPalContext)
 {
     ASSERT((size_t)cbOSContext >= sizeof(CONTEXT));
     CONTEXT* pContext = (CONTEXT *)pOSContext;
@@ -173,6 +193,7 @@ COOP_PINVOKE_HELPER(void, RhpCopyContextFromExInfo, (void * pOSContext, int32_t
 #error Not Implemented for this architecture -- RhpCopyContextFromExInfo
 #endif
 }
+FCIMPLEND
 
 #if defined(HOST_AMD64) || defined(HOST_ARM) || defined(HOST_X86) || defined(HOST_ARM64)
 struct DISPATCHER_CONTEXT
@@ -189,10 +210,10 @@ struct EXCEPTION_REGISTRATION_RECORD
 };
 #endif // HOST_X86
 
-EXTERN_C void __cdecl RhpFailFastForPInvokeExceptionPreemp(intptr_t PInvokeCallsiteReturnAddr,
-                                                           void* pExceptionRecord, void* pContextRecord);
-EXTERN_C void REDHAWK_CALLCONV RhpFailFastForPInvokeExceptionCoop(intptr_t PInvokeCallsiteReturnAddr,
-                                                                  void* pExceptionRecord, void* pContextRecord);
+EXTERN_C void QCALLTYPE RhpFailFastForPInvokeExceptionPreemp(intptr_t PInvokeCallsiteReturnAddr,
+                                                             void* pExceptionRecord, void* pContextRecord);
+FCDECL3(void, RhpFailFastForPInvokeExceptionCoop, intptr_t PInvokeCallsiteReturnAddr,
+                                                  void* pExceptionRecord, void* pContextRecord);
 int32_t __stdcall RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs);
 
 EXTERN_C int32_t __stdcall RhpPInvokeExceptionGuard(PEXCEPTION_RECORD       pExceptionRecord,
@@ -246,32 +267,38 @@ EXTERN_C int32_t RhpPInvokeExceptionGuard()
 #endif
 
 #if defined(HOST_AMD64) || defined(HOST_ARM) || defined(HOST_X86) || defined(HOST_ARM64) || defined(HOST_WASM)
-EXTERN_C NATIVEAOT_API void REDHAWK_CALLCONV RhpThrowHwEx();
+FCDECL2(void, RhpThrowHwEx, int exceptionCode, TADDR faultingIP);
 #else
-COOP_PINVOKE_HELPER(void, RhpThrowHwEx, ())
+FCIMPL0(void, RhpThrowHwEx)
 {
     ASSERT_UNCONDITIONALLY("RhpThrowHwEx NYI for this architecture!");
 }
-COOP_PINVOKE_HELPER(void, RhpThrowEx, ())
+FCIMPLEND
+FCIMPL0(void, RhpThrowEx)
 {
     ASSERT_UNCONDITIONALLY("RhpThrowEx NYI for this architecture!");
 }
-COOP_PINVOKE_HELPER(void, RhpCallCatchFunclet, ())
+FCIMPLEND
+FCIMPL0(void, RhpCallCatchFunclet)
 {
     ASSERT_UNCONDITIONALLY("RhpCallCatchFunclet NYI for this architecture!");
 }
-COOP_PINVOKE_HELPER(void, RhpCallFinallyFunclet, ())
+FCIMPLEND
+FCIMPL0(void, RhpCallFinallyFunclet)
 {
     ASSERT_UNCONDITIONALLY("RhpCallFinallyFunclet NYI for this architecture!");
 }
-COOP_PINVOKE_HELPER(void, RhpCallFilterFunclet, ())
+FCIMPLEND
+FCIMPL0(void, RhpCallFilterFunclet)
 {
     ASSERT_UNCONDITIONALLY("RhpCallFilterFunclet NYI for this architecture!");
 }
-COOP_PINVOKE_HELPER(void, RhpRethrow, ())
+FCIMPLEND
+FCIMPL0(void, RhpRethrow)
 {
     ASSERT_UNCONDITIONALLY("RhpRethrow NYI for this architecture!");
 }
+FCIMPLEND
 
 EXTERN_C void* RhpCallCatchFunclet2 = NULL;
 EXTERN_C void* RhpCallFinallyFunclet2 = NULL;
@@ -282,14 +309,22 @@ EXTERN_C void* RhpRethrow2   = NULL;
 #endif
 
 EXTERN_C CODE_LOCATION RhpAssignRefAVLocation;
+#if defined(HOST_X86)
+EXTERN_C CODE_LOCATION RhpAssignRefEAXAVLocation;
+EXTERN_C CODE_LOCATION RhpAssignRefECXAVLocation;
+EXTERN_C CODE_LOCATION RhpAssignRefEBXAVLocation;
+EXTERN_C CODE_LOCATION RhpAssignRefESIAVLocation;
+EXTERN_C CODE_LOCATION RhpAssignRefEDIAVLocation;
+EXTERN_C CODE_LOCATION RhpAssignRefEBPAVLocation;
+#endif
 EXTERN_C CODE_LOCATION RhpCheckedAssignRefAVLocation;
-EXTERN_C CODE_LOCATION RhpCheckedLockCmpXchgAVLocation;
-EXTERN_C CODE_LOCATION RhpCheckedXchgAVLocation;
-#if !defined(HOST_AMD64) && !defined(HOST_ARM64)
-EXTERN_C CODE_LOCATION RhpLockCmpXchg8AVLocation;
-EXTERN_C CODE_LOCATION RhpLockCmpXchg16AVLocation;
-EXTERN_C CODE_LOCATION RhpLockCmpXchg32AVLocation;
-EXTERN_C CODE_LOCATION RhpLockCmpXchg64AVLocation;
+#if defined(HOST_X86)
+EXTERN_C CODE_LOCATION RhpCheckedAssignRefEAXAVLocation;
+EXTERN_C CODE_LOCATION RhpCheckedAssignRefECXAVLocation;
+EXTERN_C CODE_LOCATION RhpCheckedAssignRefEBXAVLocation;
+EXTERN_C CODE_LOCATION RhpCheckedAssignRefESIAVLocation;
+EXTERN_C CODE_LOCATION RhpCheckedAssignRefEDIAVLocation;
+EXTERN_C CODE_LOCATION RhpCheckedAssignRefEBPAVLocation;
 #endif
 EXTERN_C CODE_LOCATION RhpByRefAssignRefAVLocation1;
 
@@ -308,22 +343,26 @@ static bool InWriteBarrierHelper(uintptr_t faultingIP)
     static uintptr_t writeBarrierAVLocations[] =
     {
         (uintptr_t)&RhpAssignRefAVLocation,
+#if defined(HOST_X86)
+        (uintptr_t)&RhpAssignRefEAXAVLocation,
+        (uintptr_t)&RhpAssignRefECXAVLocation,
+        (uintptr_t)&RhpAssignRefEBXAVLocation,
+        (uintptr_t)&RhpAssignRefESIAVLocation,
+        (uintptr_t)&RhpAssignRefEDIAVLocation,
+        (uintptr_t)&RhpAssignRefEBPAVLocation,
+#endif
         (uintptr_t)&RhpCheckedAssignRefAVLocation,
-        (uintptr_t)&RhpCheckedLockCmpXchgAVLocation,
-        (uintptr_t)&RhpCheckedXchgAVLocation,
-#if !defined(HOST_AMD64) && !defined(HOST_ARM64)
-        (uintptr_t)&RhpLockCmpXchg8AVLocation,
-        (uintptr_t)&RhpLockCmpXchg16AVLocation,
-        (uintptr_t)&RhpLockCmpXchg32AVLocation,
-        (uintptr_t)&RhpLockCmpXchg64AVLocation,
+#if defined(HOST_X86)
+        (uintptr_t)&RhpCheckedAssignRefEAXAVLocation,
+        (uintptr_t)&RhpCheckedAssignRefECXAVLocation,
+        (uintptr_t)&RhpCheckedAssignRefEBXAVLocation,
+        (uintptr_t)&RhpCheckedAssignRefESIAVLocation,
+        (uintptr_t)&RhpCheckedAssignRefEDIAVLocation,
+        (uintptr_t)&RhpCheckedAssignRefEBPAVLocation,
 #endif
         (uintptr_t)&RhpByRefAssignRefAVLocation1,
 #if !defined(HOST_ARM64)
         (uintptr_t)&RhpByRefAssignRefAVLocation2,
-#endif
-#if defined(HOST_ARM64) && !defined(LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT)
-        (uintptr_t)&RhpCheckedLockCmpXchgAVLocation2,
-        (uintptr_t)&RhpCheckedXchgAVLocation2,
 #endif
     };
 
@@ -595,9 +634,10 @@ int32_t __stdcall RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs)
 
 #endif // TARGET_UNIX
 
-COOP_PINVOKE_HELPER(void, RhpFallbackFailFast, ())
+FCIMPL0(void, RhpFallbackFailFast)
 {
     RhFailFast();
 }
+FCIMPLEND
 
 #endif // !DACCESS_COMPILE
diff --git a/src/coreclr/nativeaot/Runtime/FinalizerHelpers.cpp b/src/coreclr/nativeaot/Runtime/FinalizerHelpers.cpp
index 24a456a5f5a7..7a4d8a853a8b 100644
--- a/src/coreclr/nativeaot/Runtime/FinalizerHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/FinalizerHelpers.cpp
@@ -27,7 +27,7 @@ GPTR_DECL(Thread, g_pFinalizerThread);
 CLREventStatic g_FinalizerEvent;
 CLREventStatic g_FinalizerDoneEvent;
 
-extern "C" void __cdecl ProcessFinalizers();
+EXTERN_C void QCALLTYPE ProcessFinalizers();
 
 // Unmanaged front-end to the finalizer thread. We require this because at the point the GC creates the
 // finalizer thread we can't run managed code. Instead this method waits
@@ -89,12 +89,12 @@ void RhEnableFinalization()
     g_FinalizerEvent.Set();
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhInitializeFinalizerThread()
+EXTERN_C void QCALLTYPE RhInitializeFinalizerThread()
 {
     g_FinalizerEvent.Set();
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhWaitForPendingFinalizers(UInt32_BOOL allowReentrantWait)
+EXTERN_C void QCALLTYPE RhWaitForPendingFinalizers(UInt32_BOOL allowReentrantWait)
 {
     // This must be called via p/invoke rather than RuntimeImport since it blocks and could starve the GC if
     // called in cooperative mode.
@@ -115,7 +115,7 @@ EXTERN_C NATIVEAOT_API void __cdecl RhWaitForPendingFinalizers(UInt32_BOOL allow
 
 // Block the current thread until at least one object needs to be finalized (returns true) or memory is low
 // (returns false and the finalizer thread should initiate a garbage collection).
-EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhpWaitForFinalizerRequest()
+EXTERN_C UInt32_BOOL QCALLTYPE RhpWaitForFinalizerRequest()
 {
     // We can wait for two events; finalization queue has been populated and low memory resource notification.
     // But if the latter is signalled we shouldn't wait on it again immediately -- if the garbage collection
@@ -182,7 +182,7 @@ EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhpWaitForFinalizerRequest()
 }
 
 // Indicate that the current round of finalizations is complete.
-EXTERN_C NATIVEAOT_API void __cdecl RhpSignalFinalizationComplete(uint32_t fcount)
+EXTERN_C void QCALLTYPE RhpSignalFinalizationComplete(uint32_t fcount)
 {
     FireEtwGCFinalizersEnd_V1(fcount, GetClrInstanceId());
     g_FinalizerDoneEvent.Set();
@@ -194,7 +194,7 @@ EXTERN_C NATIVEAOT_API void __cdecl RhpSignalFinalizationComplete(uint32_t fcoun
 //
 
 // Fetch next object which needs finalization or return null if we've reached the end of the list.
-COOP_PINVOKE_HELPER(OBJECTREF, RhpGetNextFinalizableObject, ())
+FCIMPL0(OBJECTREF, RhpGetNextFinalizableObject)
 {
     while (true)
     {
@@ -216,3 +216,4 @@ COOP_PINVOKE_HELPER(OBJECTREF, RhpGetNextFinalizableObject, ())
         return refNext;
     }
 }
+FCIMPLEND
diff --git a/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt
index e665a6c88ee1..f9b390e18d11 100644
--- a/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt
+++ b/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt
@@ -41,10 +41,10 @@ if(CLR_CMAKE_TARGET_WIN32)
   add_dependencies(standalonegc-enabled aot_etw_headers)
 endif()
 
-if (CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_TARGET_ARCH_AMD64)
+if (CLR_CMAKE_TARGET_ARCH_AMD64)
   add_library(Runtime.VxsortEnabled STATIC ${VXSORT_SOURCES})
   add_library(Runtime.VxsortDisabled STATIC ${DUMMY_VXSORT_SOURCES})
-endif (CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_TARGET_ARCH_AMD64)
+endif (CLR_CMAKE_TARGET_ARCH_AMD64)
 
 target_compile_definitions(Runtime.ServerGC PRIVATE -DFEATURE_SVR_GC)
 
@@ -116,13 +116,15 @@ install_static_library(Runtime.ServerGC aotsdk nativeaot)
 install_static_library(standalonegc-disabled aotsdk nativeaot)
 install_static_library(standalonegc-enabled aotsdk nativeaot)
 if (CLR_CMAKE_TARGET_WIN32)
-  if (CLR_CMAKE_TARGET_ARCH_AMD64)
-    install_static_library(Runtime.VxsortEnabled aotsdk nativeaot)
-    install_static_library(Runtime.VxsortDisabled aotsdk nativeaot)
-    install_static_library(Runtime.VxsortEnabled.GuardCF aotsdk nativeaot)
-  endif (CLR_CMAKE_TARGET_ARCH_AMD64)
   install_static_library(Runtime.ServerGC.GuardCF aotsdk nativeaot)
   add_dependencies(Runtime.ServerGC.GuardCF aot_eventing_headers)
   install_static_library(standalonegc-disabled.GuardCF aotsdk nativeaot)
   install_static_library(standalonegc-enabled.GuardCF aotsdk nativeaot)
 endif (CLR_CMAKE_TARGET_WIN32)
+if (CLR_CMAKE_TARGET_ARCH_AMD64)
+  install_static_library(Runtime.VxsortEnabled aotsdk nativeaot)
+  install_static_library(Runtime.VxsortDisabled aotsdk nativeaot)
+  if (CLR_CMAKE_TARGET_WIN32)
+    install_static_library(Runtime.VxsortEnabled.GuardCF aotsdk nativeaot)
+  endif (CLR_CMAKE_TARGET_WIN32)
+endif (CLR_CMAKE_TARGET_ARCH_AMD64)
\ No newline at end of file
diff --git a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp
index 2f2a088d5073..6fecd5ac0476 100644
--- a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp
@@ -95,7 +95,7 @@ void MethodTable::InitializeAsGcFreeType()
     m_uBaseSize = sizeof(Array) + SYNC_BLOCK_SKEW;
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhpCollect(uint32_t uGeneration, uint32_t uMode, UInt32_BOOL lowMemoryP)
+EXTERN_C void QCALLTYPE RhpCollect(uint32_t uGeneration, uint32_t uMode, UInt32_BOOL lowMemoryP)
 {
     // This must be called via p/invoke rather than RuntimeImport to make the stack crawlable.
 
@@ -110,7 +110,7 @@ EXTERN_C NATIVEAOT_API void __cdecl RhpCollect(uint32_t uGeneration, uint32_t uM
     pCurThread->EnablePreemptiveMode();
 }
 
-EXTERN_C NATIVEAOT_API int64_t __cdecl RhpGetGcTotalMemory()
+EXTERN_C int64_t QCALLTYPE RhpGetGcTotalMemory()
 {
     // This must be called via p/invoke rather than RuntimeImport to make the stack crawlable.
 
@@ -126,7 +126,7 @@ EXTERN_C NATIVEAOT_API int64_t __cdecl RhpGetGcTotalMemory()
     return ret;
 }
 
-EXTERN_C NATIVEAOT_API int32_t __cdecl RhpStartNoGCRegion(int64_t totalSize, UInt32_BOOL hasLohSize, int64_t lohSize, UInt32_BOOL disallowFullBlockingGC)
+EXTERN_C int32_t QCALLTYPE RhpStartNoGCRegion(int64_t totalSize, UInt32_BOOL hasLohSize, int64_t lohSize, UInt32_BOOL disallowFullBlockingGC)
 {
     Thread *pCurThread = ThreadStore::GetCurrentThread();
     ASSERT(!pCurThread->IsCurrentThreadInCooperativeMode());
@@ -141,132 +141,154 @@ EXTERN_C NATIVEAOT_API int32_t __cdecl RhpStartNoGCRegion(int64_t totalSize, UIn
     return result;
 }
 
-EXTERN_C NATIVEAOT_API int32_t __cdecl RhpEndNoGCRegion()
+EXTERN_C int32_t QCALLTYPE RhpEndNoGCRegion()
 {
     ASSERT(!ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode());
 
     return GCHeapUtilities::GetGCHeap()->EndNoGCRegion();
 }
 
-COOP_PINVOKE_HELPER(void, RhSuppressFinalize, (OBJECTREF refObj))
+FCIMPL1(void, RhSuppressFinalize, OBJECTREF refObj)
 {
     if (!refObj->GetMethodTable()->HasFinalizer())
         return;
     GCHeapUtilities::GetGCHeap()->SetFinalizationRun(refObj);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhReRegisterForFinalize, (OBJECTREF refObj))
+FCIMPL1(FC_BOOL_RET, RhReRegisterForFinalize, OBJECTREF refObj)
 {
     if (!refObj->GetMethodTable()->HasFinalizer())
         FC_RETURN_BOOL(true);
     FC_RETURN_BOOL(GCHeapUtilities::GetGCHeap()->RegisterForFinalization(-1, refObj));
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int32_t, RhGetMaxGcGeneration, ())
+FCIMPL0(int32_t, RhGetMaxGcGeneration)
 {
     return GCHeapUtilities::GetGCHeap()->GetMaxGeneration();
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int32_t, RhGetGcCollectionCount, (int32_t generation, CLR_BOOL getSpecialGCCount))
+FCIMPL2(int32_t, RhGetGcCollectionCount, int32_t generation, CLR_BOOL getSpecialGCCount)
 {
     return GCHeapUtilities::GetGCHeap()->CollectionCount(generation, getSpecialGCCount);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int32_t, RhGetGeneration, (OBJECTREF obj))
+FCIMPL1(int32_t, RhGetGeneration, OBJECTREF obj)
 {
     return GCHeapUtilities::GetGCHeap()->WhichGeneration(obj);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int64_t, RhGetGenerationSize, (int32_t gen))
+FCIMPL1(int64_t, RhGetGenerationSize, int32_t gen)
 {
     return (int64_t)(GCHeapUtilities::GetGCHeap()->GetLastGCGenerationSize(gen));
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int64_t, RhGetLastGCPercentTimeInGC, ())
+FCIMPL0(int64_t, RhGetLastGCPercentTimeInGC)
 {
     return GCHeapUtilities::GetGCHeap()->GetLastGCPercentTimeInGC();
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int32_t, RhGetGcLatencyMode, ())
+FCIMPL0(int32_t, RhGetGcLatencyMode)
 {
     return GCHeapUtilities::GetGCHeap()->GetGcLatencyMode();
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int32_t, RhSetGcLatencyMode, (int32_t newLatencyMode))
+FCIMPL1(int32_t, RhSetGcLatencyMode, int32_t newLatencyMode)
 {
     return GCHeapUtilities::GetGCHeap()->SetGcLatencyMode(newLatencyMode);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhIsPromoted, (OBJECTREF obj))
+FCIMPL1(FC_BOOL_RET, RhIsPromoted, OBJECTREF obj)
 {
     FC_RETURN_BOOL(GCHeapUtilities::GetGCHeap()->IsPromoted(obj));
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhIsServerGc, ())
+FCIMPL0(FC_BOOL_RET, RhIsServerGc)
 {
     FC_RETURN_BOOL(GCHeapUtilities::IsServerHeap());
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhRegisterGcCallout, (GcRestrictedCalloutKind eKind, void * pCallout))
+FCIMPL2(FC_BOOL_RET, RhRegisterGcCallout, GcRestrictedCalloutKind eKind, void * pCallout)
 {
     FC_RETURN_BOOL(RestrictedCallouts::RegisterGcCallout(eKind, pCallout));
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhUnregisterGcCallout, (GcRestrictedCalloutKind eKind, void * pCallout))
+FCIMPL2(void, RhUnregisterGcCallout, GcRestrictedCalloutKind eKind, void * pCallout)
 {
     RestrictedCallouts::UnregisterGcCallout(eKind, pCallout);
 }
+FCIMPLEND
 
 #ifdef FEATURE_OBJCMARSHAL
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhRegisterObjectiveCMarshalBeginEndCallback, (void * pCallback))
+FCIMPL1(FC_BOOL_RET, RhRegisterObjectiveCMarshalBeginEndCallback, void * pCallback)
 {
     FC_RETURN_BOOL(ObjCMarshalNative::RegisterBeginEndCallback(pCallback));
 }
+FCIMPLEND
 #endif
 
-COOP_PINVOKE_HELPER(int32_t, RhGetLohCompactionMode, ())
+FCIMPL0(int32_t, RhGetLohCompactionMode)
 {
     return GCHeapUtilities::GetGCHeap()->GetLOHCompactionMode();
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhSetLohCompactionMode, (int32_t newLohCompactionMode))
+FCIMPL1(void, RhSetLohCompactionMode, int32_t newLohCompactionMode)
 {
     GCHeapUtilities::GetGCHeap()->SetLOHCompactionMode(newLohCompactionMode);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int64_t, RhGetCurrentObjSize, ())
+FCIMPL0(int64_t, RhGetCurrentObjSize)
 {
     return GCHeapUtilities::GetGCHeap()->GetCurrentObjSize();
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int64_t, RhGetGCNow, ())
+FCIMPL0(int64_t, RhGetGCNow)
 {
     return GCHeapUtilities::GetGCHeap()->GetNow();
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int64_t, RhGetLastGCStartTime, (int32_t generation))
+FCIMPL1(int64_t, RhGetLastGCStartTime, int32_t generation)
 {
     return GCHeapUtilities::GetGCHeap()->GetLastGCStartTime(generation);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int64_t, RhGetLastGCDuration, (int32_t generation))
+FCIMPL1(int64_t, RhGetLastGCDuration, int32_t generation)
 {
     return GCHeapUtilities::GetGCHeap()->GetLastGCDuration(generation);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhRegisterForFullGCNotification, (int32_t maxGenerationThreshold, int32_t largeObjectHeapThreshold))
+FCIMPL2(FC_BOOL_RET, RhRegisterForFullGCNotification, int32_t maxGenerationThreshold, int32_t largeObjectHeapThreshold)
 {
     ASSERT(maxGenerationThreshold >= 1 && maxGenerationThreshold <= 99);
     ASSERT(largeObjectHeapThreshold >= 1 && largeObjectHeapThreshold <= 99);
     FC_RETURN_BOOL(GCHeapUtilities::GetGCHeap()->RegisterForFullGCNotification(maxGenerationThreshold, largeObjectHeapThreshold));
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhCancelFullGCNotification, ())
+FCIMPL0(FC_BOOL_RET, RhCancelFullGCNotification)
 {
     FC_RETURN_BOOL(GCHeapUtilities::GetGCHeap()->CancelFullGCNotification());
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int32_t, RhWaitForFullGCApproach, (int32_t millisecondsTimeout))
+FCIMPL1(int32_t, RhWaitForFullGCApproach, int32_t millisecondsTimeout)
 {
     ASSERT(millisecondsTimeout >= -1);
     ASSERT(ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode());
@@ -274,8 +296,9 @@ COOP_PINVOKE_HELPER(int32_t, RhWaitForFullGCApproach, (int32_t millisecondsTimeo
     int timeout = millisecondsTimeout == -1 ? INFINITE : millisecondsTimeout;
     return GCHeapUtilities::GetGCHeap()->WaitForFullGCApproach(millisecondsTimeout);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int32_t, RhWaitForFullGCComplete, (int32_t millisecondsTimeout))
+FCIMPL1(int32_t, RhWaitForFullGCComplete, int32_t millisecondsTimeout)
 {
     ASSERT(millisecondsTimeout >= -1);
     ASSERT(ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode());
@@ -283,22 +306,25 @@ COOP_PINVOKE_HELPER(int32_t, RhWaitForFullGCComplete, (int32_t millisecondsTimeo
     int timeout = millisecondsTimeout == -1 ? INFINITE : millisecondsTimeout;
     return GCHeapUtilities::GetGCHeap()->WaitForFullGCComplete(millisecondsTimeout);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int64_t, RhGetGCSegmentSize, ())
+FCIMPL0(int64_t, RhGetGCSegmentSize)
 {
     size_t first = GCHeapUtilities::GetGCHeap()->GetValidSegmentSize(true);
     size_t second = GCHeapUtilities::GetGCHeap()->GetValidSegmentSize(false);
 
     return (first > second) ? first : second;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int64_t, RhGetAllocatedBytesForCurrentThread, ())
+FCIMPL0(int64_t, RhGetAllocatedBytesForCurrentThread)
 {
     Thread *pThread = ThreadStore::GetCurrentThread();
     gc_alloc_context *ac = pThread->GetAllocContext();
     int64_t currentAllocated = ac->alloc_bytes + ac->alloc_bytes_uoh - (ac->alloc_limit - ac->alloc_ptr);
     return currentAllocated;
 }
+FCIMPLEND
 
 struct RH_GC_GENERATION_INFO
 {
@@ -334,7 +360,7 @@ struct RH_GH_MEMORY_INFO
     uint64_t pauseDuration1;
 };
 
-COOP_PINVOKE_HELPER(void, RhGetMemoryInfo, (RH_GH_MEMORY_INFO* pData, int kind))
+FCIMPL2(void, RhGetMemoryInfo, RH_GH_MEMORY_INFO* pData, int kind)
 {
     uint64_t* genInfoRaw = (uint64_t*)&(pData->generationInfo0);
     uint64_t* pauseInfoRaw = (uint64_t*)&(pData->pauseDuration0);
@@ -358,6 +384,7 @@ COOP_PINVOKE_HELPER(void, RhGetMemoryInfo, (RH_GH_MEMORY_INFO* pData, int kind))
         pauseInfoRaw,
         kind);
 }
+FCIMPLEND
 
 
 // The MethodTable is remembered in some slow-path allocation paths. This value is used in event tracing.
@@ -370,7 +397,7 @@ MethodTable* GetLastAllocEEType()
     return tls_pLastAllocationEEType;
 }
 
-COOP_PINVOKE_HELPER(int64_t, RhGetTotalAllocatedBytes, ())
+FCIMPL0(int64_t, RhGetTotalAllocatedBytes)
 {
     uint64_t allocated_bytes = GCHeapUtilities::GetGCHeap()->GetTotalAllocatedBytes() - Thread::GetDeadThreadsNonAllocBytes();
 
@@ -389,10 +416,9 @@ COOP_PINVOKE_HELPER(int64_t, RhGetTotalAllocatedBytes, ())
 
     return current_high;
 }
+FCIMPLEND
 
-using EnumerateConfigurationValuesCallback = void (*)(void* context, void* name, void* publicKey, GCConfigurationType type, int64_t data);
-
-EXTERN_C NATIVEAOT_API void __cdecl RhEnumerateConfigurationValues(void* configurationContext, EnumerateConfigurationValuesCallback callback)
+EXTERN_C void QCALLTYPE RhEnumerateConfigurationValues(void* configurationContext, ConfigurationValueFunc callback)
 {
     IGCHeap* pHeap = GCHeapUtilities::GetGCHeap();
     pHeap->EnumerateConfigurationValues(configurationContext, callback);
@@ -401,27 +427,28 @@ EXTERN_C NATIVEAOT_API void __cdecl RhEnumerateConfigurationValues(void* configu
 GCHeapHardLimitInfo g_gcHeapHardLimitInfo;
 bool g_gcHeapHardLimitInfoSpecified = false;
 
-EXTERN_C NATIVEAOT_API void __cdecl RhRefreshMemoryLimit(GCHeapHardLimitInfo heapHardLimitInfo)
+FCIMPL1(void, RhRefreshMemoryLimit, GCHeapHardLimitInfo heapHardLimitInfo)
 {
     IGCHeap* pHeap = GCHeapUtilities::GetGCHeap();
     g_gcHeapHardLimitInfo = heapHardLimitInfo;
     g_gcHeapHardLimitInfoSpecified = true;
     pHeap->RefreshMemoryLimit();
 }
+FCIMPLEND
 
-EXTERN_C NATIVEAOT_API uint64_t __cdecl RhGetGenerationBudget(int generation)
+EXTERN_C uint64_t QCALLTYPE RhGetGenerationBudget(int generation)
 {
     IGCHeap* pHeap = GCHeapUtilities::GetGCHeap();
     return pHeap->GetGenerationBudget(generation);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhEnableNoGCRegionCallback(NoGCRegionCallbackFinalizerWorkItem* pCallback, int64_t totalSize)
+EXTERN_C void QCALLTYPE RhEnableNoGCRegionCallback(NoGCRegionCallbackFinalizerWorkItem* pCallback, int64_t totalSize)
 {
     IGCHeap* pHeap = GCHeapUtilities::GetGCHeap();
     pHeap->EnableNoGCRegionCallback(pCallback, totalSize);
 }
 
-EXTERN_C NATIVEAOT_API int64_t __cdecl RhGetTotalAllocatedBytesPrecise()
+EXTERN_C int64_t QCALLTYPE RhGetTotalAllocatedBytesPrecise()
 {
     int64_t allocated;
 
@@ -449,6 +476,12 @@ static Object* GcAllocInternal(MethodTable* pEEType, uint32_t uFlags, uintptr_t
     ASSERT(!pThread->IsDoNotTriggerGcSet());
     ASSERT(pThread->IsCurrentThreadInCooperativeMode());
 
+    if (pEEType->ContainsPointers())
+    {
+        uFlags |= GC_ALLOC_CONTAINS_REF;
+        uFlags &= ~GC_ALLOC_ZEROING_OPTIONAL;
+    }
+
     size_t cbSize = pEEType->GetBaseSize();
 
     if (pEEType->HasComponentSize())
@@ -535,8 +568,7 @@ static Object* GcAllocInternal(MethodTable* pEEType, uint32_t uFlags, uintptr_t
 //  numElements     -  number of array elements
 //  pTransitionFrame-  transition frame to make stack crawlable
 // Returns a pointer to the object allocated or NULL on failure.
-
-COOP_PINVOKE_HELPER(void*, RhpGcAlloc, (MethodTable* pEEType, uint32_t uFlags, uintptr_t numElements, PInvokeTransitionFrame* pTransitionFrame))
+EXTERN_C void* F_CALL_CONV RhpGcAlloc(MethodTable* pEEType, uint32_t uFlags, uintptr_t numElements, PInvokeTransitionFrame* pTransitionFrame)
 {
     Thread* pThread = ThreadStore::GetCurrentThread();
 
@@ -571,7 +603,7 @@ COOP_PINVOKE_HELPER(void*, RhpGcAlloc, (MethodTable* pEEType, uint32_t uFlags, u
     return GcAllocInternal(pEEType, uFlags, numElements, pThread);
 }
 
-EXTERN_C NATIVEAOT_API void RhAllocateNewArray(MethodTable* pArrayEEType, uint32_t numElements, uint32_t flags, Array** pResult)
+EXTERN_C void QCALLTYPE RhAllocateNewArray(MethodTable* pArrayEEType, uint32_t numElements, uint32_t flags, Array** pResult)
 {
     Thread* pThread = ThreadStore::GetCurrentThread();
 
@@ -585,7 +617,7 @@ EXTERN_C NATIVEAOT_API void RhAllocateNewArray(MethodTable* pArrayEEType, uint32
     pThread->EnablePreemptiveMode();
 }
 
-EXTERN_C NATIVEAOT_API void RhAllocateNewObject(MethodTable* pEEType, uint32_t flags, Object** pResult)
+EXTERN_C void QCALLTYPE RhAllocateNewObject(MethodTable* pEEType, uint32_t flags, Object** pResult)
 {
     Thread* pThread = ThreadStore::GetCurrentThread();
 
@@ -599,12 +631,13 @@ EXTERN_C NATIVEAOT_API void RhAllocateNewObject(MethodTable* pEEType, uint32_t f
     pThread->EnablePreemptiveMode();
 }
 
-COOP_PINVOKE_HELPER(int64_t, RhGetTotalPauseDuration, ())
+FCIMPL0(int64_t, RhGetTotalPauseDuration)
 {
     return GCHeapUtilities::GetGCHeap()->GetTotalPauseDuration();
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhRegisterForGCReporting, (GCFrameRegistration* pRegistration))
+FCIMPL1(void, RhRegisterForGCReporting, GCFrameRegistration* pRegistration)
 {
     Thread* pThread = ThreadStore::GetCurrentThread();
 
@@ -613,8 +646,9 @@ COOP_PINVOKE_HELPER(void, RhRegisterForGCReporting, (GCFrameRegistration* pRegis
 
     pThread->PushGCFrameRegistration(pRegistration);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhUnregisterForGCReporting, (GCFrameRegistration* pRegistration))
+FCIMPL1(void, RhUnregisterForGCReporting, GCFrameRegistration* pRegistration)
 {
     Thread* pThread = pRegistration->m_pThread;
     if (pThread == NULL)
@@ -623,8 +657,9 @@ COOP_PINVOKE_HELPER(void, RhUnregisterForGCReporting, (GCFrameRegistration* pReg
     ASSERT(pThread == ThreadStore::GetCurrentThread());
     pThread->PopGCFrameRegistration(pRegistration);
 }
+FCIMPLEND
 
-EXTERN_C NATIVEAOT_API void* __cdecl RhRegisterFrozenSegment(void* pSection, size_t allocSize, size_t commitSize, size_t reservedSize)
+EXTERN_C void* QCALLTYPE RhRegisterFrozenSegment(void* pSection, size_t allocSize, size_t commitSize, size_t reservedSize)
 {
     ASSERT(allocSize <= commitSize);
     ASSERT(commitSize <= reservedSize);
@@ -644,25 +679,26 @@ EXTERN_C NATIVEAOT_API void* __cdecl RhRegisterFrozenSegment(void* pSection, siz
 #endif // FEATURE_BASICFREEZE
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhUpdateFrozenSegment(void* pSegmentHandle, uint8_t* allocated, uint8_t* committed)
+EXTERN_C void QCALLTYPE RhUpdateFrozenSegment(void* pSegmentHandle, uint8_t* allocated, uint8_t* committed)
 {
     ASSERT(allocated <= committed);
 
     GCHeapUtilities::GetGCHeap()->UpdateFrozenSegment((segment_handle)pSegmentHandle, allocated, committed);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhUnregisterFrozenSegment(void* pSegmentHandle)
+EXTERN_C void QCALLTYPE RhUnregisterFrozenSegment(void* pSegmentHandle)
 {
     GCHeapUtilities::GetGCHeap()->UnregisterFrozenSegment((segment_handle)pSegmentHandle);
 }
 
-COOP_PINVOKE_HELPER(uint32_t, RhGetGCDescSize, (MethodTable* pMT))
+FCIMPL1(uint32_t, RhGetGCDescSize, MethodTable* pMT)
 {
     if (!pMT->ContainsPointersOrCollectible())
         return 0;
 
     return (uint32_t)CGCDesc::GetCGCDescFromMT(pMT)->GetSize();
 }
+FCIMPLEND
 
 #ifdef FEATURE_GC_STRESS
 
@@ -671,7 +707,7 @@ EXTERN_C UInt32_BOOL g_fGcStressStarted;
 UInt32_BOOL g_fGcStressStarted = UInt32_FALSE; // UInt32_BOOL because asm code reads it
 
 // static
-EXTERN_C void RhpStressGc()
+EXTERN_C void F_CALL_CONV RhpStressGc()
 {
     // The GarbageCollect operation below may trash the last win32 error. We save the error here so that it can be
     // restored after the GC operation;
@@ -686,8 +722,9 @@ EXTERN_C void RhpStressGc()
     PalSetLastError(lastErrorOnEntry);
 }
 
-COOP_PINVOKE_HELPER(void, RhpInitializeGcStress, ())
+FCIMPL0(void, RhpInitializeGcStress)
 {
     g_fGcStressStarted = UInt32_TRUE;
 }
+FCIMPLEND
 #endif // FEATURE_GC_STRESS
diff --git a/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp b/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp
index 0f40aadbf059..0154bd1fde50 100644
--- a/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp
@@ -18,7 +18,7 @@
 // in a read on another thread getting incorrect data.
 // Unaligned memory at the beginning and remaining bytes at the end are written bytewise.
 // USAGE:  The caller is responsible for null-checking the reference.
-COOP_PINVOKE_CDECL_HELPER(void *, RhpGcSafeZeroMemory, (void * mem, size_t size))
+FCIMPL2(void *, RhpGcSafeZeroMemory, void * mem, size_t size)
 {
     // The caller must do the null-check because we cannot take an AV in the runtime and translate it to managed.
     ASSERT(mem != nullptr);
@@ -28,6 +28,7 @@ COOP_PINVOKE_CDECL_HELPER(void *, RhpGcSafeZeroMemory, (void * mem, size_t size)
     // memset returns the destination buffer
     return mem;
 }
+FCIMPLEND
 
 #if defined(TARGET_X86) || defined(TARGET_AMD64) 
     // 
@@ -41,7 +42,7 @@ COOP_PINVOKE_CDECL_HELPER(void *, RhpGcSafeZeroMemory, (void * mem, size_t size)
 // Move memory, in a way that is compatible with a move onto the heap, but
 // does not require the destination pointer to be on the heap.
 
-COOP_PINVOKE_HELPER(void, RhBulkMoveWithWriteBarrier, (uint8_t* pDest, uint8_t* pSrc, size_t cbDest))
+FCIMPL3(void, RhBulkMoveWithWriteBarrier, uint8_t* pDest, uint8_t* pSrc, size_t cbDest)
 {
     // It is possible that the bulk write is publishing object references accessible so far only
     // by the current thread to shared memory.
@@ -56,3 +57,4 @@ COOP_PINVOKE_HELPER(void, RhBulkMoveWithWriteBarrier, (uint8_t* pDest, uint8_t*
 
     InlinedBulkWriteBarrier(pDest, cbDest);
 }
+FCIMPLEND
diff --git a/src/coreclr/nativeaot/Runtime/HandleTableHelpers.cpp b/src/coreclr/nativeaot/Runtime/HandleTableHelpers.cpp
index 3f4b88e52715..23e985357d34 100644
--- a/src/coreclr/nativeaot/Runtime/HandleTableHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/HandleTableHelpers.cpp
@@ -15,49 +15,58 @@
 #include "gchandleutilities.h"
 
 
-COOP_PINVOKE_HELPER(OBJECTHANDLE, RhpHandleAlloc, (Object *pObject, int type))
+FCIMPL2(OBJECTHANDLE, RhpHandleAlloc, Object *pObject, int type)
 {
     return GCHandleUtilities::GetGCHandleManager()->GetGlobalHandleStore()->CreateHandleOfType(pObject, (HandleType)type);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(OBJECTHANDLE, RhpHandleAllocDependent, (Object *pPrimary, Object *pSecondary))
+FCIMPL2(OBJECTHANDLE, RhpHandleAllocDependent, Object *pPrimary, Object *pSecondary)
 {
     return GCHandleUtilities::GetGCHandleManager()->GetGlobalHandleStore()->CreateDependentHandle(pPrimary, pSecondary);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhHandleFree, (OBJECTHANDLE handle))
+FCIMPL1(void, RhHandleFree, OBJECTHANDLE handle)
 {
     GCHandleUtilities::GetGCHandleManager()->DestroyHandleOfUnknownType(handle);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(Object *, RhHandleGet, (OBJECTHANDLE handle))
+FCIMPL1(Object *, RhHandleGet, OBJECTHANDLE handle)
 {
     return ObjectFromHandle(handle);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(Object *, RhHandleGetDependent, (OBJECTHANDLE handle, Object **ppSecondary))
+FCIMPL2(Object *, RhHandleGetDependent, OBJECTHANDLE handle, Object **ppSecondary)
 {
     Object *pPrimary = ObjectFromHandle(handle);
     *ppSecondary = (pPrimary != NULL) ? GetDependentHandleSecondary(handle) : NULL;
     return pPrimary;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhHandleSetDependentSecondary, (OBJECTHANDLE handle, Object *pSecondary))
+FCIMPL2(void, RhHandleSetDependentSecondary, OBJECTHANDLE handle, Object *pSecondary)
 {
     SetDependentHandleSecondary(handle, pSecondary);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhHandleSet, (OBJECTHANDLE handle, Object *pObject))
+FCIMPL2(void, RhHandleSet, OBJECTHANDLE handle, Object *pObject)
 {
     GCHandleUtilities::GetGCHandleManager()->StoreObjectInHandle(handle, pObject);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhRegisterRefCountedHandleCallback, (void * pCallout, MethodTable * pTypeFilter))
+FCIMPL2(FC_BOOL_RET, RhRegisterRefCountedHandleCallback, void * pCallout, MethodTable * pTypeFilter)
 {
     FC_RETURN_BOOL(RestrictedCallouts::RegisterRefCountedHandleCallback(pCallout, pTypeFilter));
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhUnregisterRefCountedHandleCallback, (void * pCallout, MethodTable * pTypeFilter))
+FCIMPL2(void, RhUnregisterRefCountedHandleCallback, void * pCallout, MethodTable * pTypeFilter)
 {
     RestrictedCallouts::UnregisterRefCountedHandleCallback(pCallout, pTypeFilter);
 }
+FCIMPLEND
diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h
index 4c9957dd6062..dfc6e9efa915 100644
--- a/src/coreclr/nativeaot/Runtime/ICodeManager.h
+++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h
@@ -229,6 +229,11 @@ class ICodeManager
     virtual PTR_VOID GetFramePointer(MethodInfo *   pMethodInfo,
                                      REGDISPLAY *   pRegisterSet) PURE_VIRTUAL
 
+#ifdef TARGET_X86
+    virtual uintptr_t GetResumeSp(MethodInfo *   pMethodInfo,
+                                  REGDISPLAY *   pRegisterSet) PURE_VIRTUAL
+#endif
+
     virtual void EnumGcRefs(MethodInfo *    pMethodInfo,
                             PTR_VOID        safePointAddress,
                             REGDISPLAY *    pRegisterSet,
diff --git a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
index 33e0869c3ab7..6e3f4d73de8a 100644
--- a/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/MathHelpers.cpp
@@ -9,172 +9,342 @@
 // Floating point and 64-bit integer math helpers.
 //
 
-EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpDbl2ULng(double val)
-{
-    const double two63  = 2147483648.0 * 4294967296.0;
-    uint64_t ret;
-    if (val < two63)
-    {
-        ret = (int64_t)(val);
-    }
-    else
-    {
-        // subtract 0x8000000000000000, do the convert then add it back again
-        ret = (int64_t)(val - two63) + I64(0x8000000000000000);
-    }
-    return ret;
+FCIMPL1_D(uint64_t, RhpDbl2ULng, double val)
+{
+#if defined(HOST_X86) || defined(HOST_AMD64)
+    const double uint64_max_plus_1 = 4294967296.0 * 4294967296.0;
+    return (val > 0) ? ((val >= uint64_max_plus_1) ? UINT64_MAX : (uint64_t)val) : 0;
+#else
+    return (uint64_t)val;
+#endif
 }
+FCIMPLEND
 
-#undef min
-#undef max
-#include <cmath>
-
-EXTERN_C NATIVEAOT_API float REDHAWK_CALLCONV RhpFltRem(float dividend, float divisor)
-{
-    //
-    // From the ECMA standard:
-    //
-    // If [divisor] is zero or [dividend] is infinity
-    //   the result is NaN.
-    // If [divisor] is infinity,
-    //   the result is [dividend] (negated for -infinity***).
-    //
-    // ***"negated for -infinity" has been removed from the spec
-    //
-
-    if (divisor==0 || !std::isfinite(dividend))
-    {
-        return -nanf("");
-    }
-    else if (!std::isfinite(divisor) && !std::isnan(divisor))
-    {
-        return dividend;
-    }
-    // else...
-    return fmodf(dividend,divisor);
-}
-
-EXTERN_C NATIVEAOT_API double REDHAWK_CALLCONV RhpDblRem(double dividend, double divisor)
-{
-    //
-    // From the ECMA standard:
-    //
-    // If [divisor] is zero or [dividend] is infinity
-    //   the result is NaN.
-    // If [divisor] is infinity,
-    //   the result is [dividend] (negated for -infinity***).
-    //
-    // ***"negated for -infinity" has been removed from the spec
-    //
-    if (divisor==0 || !std::isfinite(dividend))
-    {
-        return -nan("");
-    }
-    else if (!std::isfinite(divisor) && !std::isnan(divisor))
-    {
-        return dividend;
-    }
-    // else...
-    return(fmod(dividend,divisor));
+FCIMPL1_D(int64_t, RhpDbl2Lng, double val)
+{
+#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM)
+    const double int64_min = -2147483648.0 * 4294967296.0;
+    const double int64_max = 2147483648.0 * 4294967296.0;
+    return (val != val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (int64_t)val;
+#else
+    return (int64_t)val;
+#endif
 }
+FCIMPLEND
 
-#ifdef HOST_ARM
-EXTERN_C NATIVEAOT_API int32_t REDHAWK_CALLCONV RhpIDiv(int32_t i, int32_t j)
+FCIMPL1_D(int32_t, RhpDbl2Int, double val)
 {
-    ASSERT(j && "Divide by zero!");
-    return i / j;
+#if defined(HOST_X86) || defined(HOST_AMD64)
+    const double int32_min = -2147483648.0;
+    const double int32_max_plus_1 = 2147483648.0;
+    return (val != val) ? 0 : (val <= int32_min) ? INT32_MIN : (val >= int32_max_plus_1) ? INT32_MAX : (int32_t)val;
+#else
+    return (int32_t)val;
+#endif
 }
+FCIMPLEND
 
-EXTERN_C NATIVEAOT_API uint32_t REDHAWK_CALLCONV RhpUDiv(uint32_t i, uint32_t j)
+FCIMPL1_D(uint32_t, RhpDbl2UInt, double val)
 {
-    ASSERT(j && "Divide by zero!");
-    return i / j;
+#if defined(HOST_X86) || defined(HOST_AMD64)
+    const double uint_max = 4294967295.0;
+    return (val > 0) ? ((val >= uint_max) ? UINT32_MAX : (uint32_t)val) : 0;
+#else
+    return (uint32_t)val;
+#endif
 }
+FCIMPLEND
 
-EXTERN_C NATIVEAOT_API int64_t REDHAWK_CALLCONV RhpLDiv(int64_t i, int64_t j)
+#ifndef HOST_64BIT
+EXTERN_C int64_t QCALLTYPE RhpLDiv(int64_t i, int64_t j)
 {
     ASSERT(j && "Divide by zero!");
     return i / j;
 }
 
-EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpULDiv(uint64_t i, uint64_t j)
+EXTERN_C uint64_t QCALLTYPE RhpULDiv(uint64_t i, uint64_t j)
 {
     ASSERT(j && "Divide by zero!");
     return i / j;
 }
 
-EXTERN_C NATIVEAOT_API int32_t REDHAWK_CALLCONV RhpIMod(int32_t i, int32_t j)
+EXTERN_C int64_t QCALLTYPE RhpLMod(int64_t i, int64_t j)
 {
     ASSERT(j && "Divide by zero!");
     return i % j;
 }
 
-EXTERN_C NATIVEAOT_API uint32_t REDHAWK_CALLCONV RhpUMod(uint32_t i, uint32_t j)
+EXTERN_C uint64_t QCALLTYPE RhpULMod(uint64_t i, uint64_t j)
 {
     ASSERT(j && "Divide by zero!");
     return i % j;
 }
 
-EXTERN_C NATIVEAOT_API int64_t REDHAWK_CALLCONV RhpLMod(int64_t i, int64_t j)
+FCIMPL1_L(double, RhpLng2Dbl, int64_t val)
 {
-    ASSERT(j && "Divide by zero!");
-    return i % j;
+    return (double)val;
 }
+FCIMPLEND
 
-EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpULMod(uint64_t i, uint64_t j)
+FCIMPL1_L(double, RhpULng2Dbl, uint64_t val)
 {
-    ASSERT(j && "Divide by zero!");
-    return i % j;
+    return (double)val;
 }
+FCIMPLEND
 
-EXTERN_C NATIVEAOT_API int64_t REDHAWK_CALLCONV RhpLMul(int64_t i, int64_t j)
-{
-    return i * j;
-}
+#endif
 
-EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpULMul(uint64_t i, uint64_t j)
-{
-    return i * j;
-}
-
-EXTERN_C NATIVEAOT_API uint64_t REDHAWK_CALLCONV RhpLRsz(uint64_t i, int32_t j)
+#ifdef HOST_ARM
+EXTERN_C int32_t F_CALL_CONV RhpIDiv(int32_t i, int32_t j)
 {
-    return i >> j;
+    ASSERT(j && "Divide by zero!");
+    return i / j;
 }
 
-EXTERN_C NATIVEAOT_API int64_t REDHAWK_CALLCONV RhpLRsh(int64_t i, int32_t j)
+EXTERN_C uint32_t F_CALL_CONV RhpUDiv(uint32_t i, uint32_t j)
 {
-    return i >> j;
+    ASSERT(j && "Divide by zero!");
+    return i / j;
 }
 
-EXTERN_C NATIVEAOT_API int64_t REDHAWK_CALLCONV RhpLLsh(int64_t i, int32_t j)
+EXTERN_C int32_t F_CALL_CONV RhpIMod(int32_t i, int32_t j)
 {
-    return i << j;
+    ASSERT(j && "Divide by zero!");
+    return i % j;
 }
 
-EXTERN_C NATIVEAOT_API int64_t REDHAWK_CALLCONV RhpDbl2Lng(double val)
+EXTERN_C uint32_t F_CALL_CONV RhpUMod(uint32_t i, uint32_t j)
 {
-    return (int64_t)val;
+    ASSERT(j && "Divide by zero!");
+    return i % j;
 }
 
-EXTERN_C NATIVEAOT_API int32_t REDHAWK_CALLCONV RhpDbl2Int(double val)
+EXTERN_C int64_t F_CALL_CONV RhpLMul(int64_t i, int64_t j)
 {
-    return (int32_t)val;
+    return i * j;
 }
 
-EXTERN_C NATIVEAOT_API uint32_t REDHAWK_CALLCONV RhpDbl2UInt(double val)
+EXTERN_C uint64_t F_CALL_CONV RhpLRsz(uint64_t i, int32_t j)
 {
-    return (uint32_t)val;
+    return i >> (j & 0x3f);
 }
 
-EXTERN_C NATIVEAOT_API double REDHAWK_CALLCONV RhpLng2Dbl(int64_t val)
+EXTERN_C int64_t F_CALL_CONV RhpLRsh(int64_t i, int32_t j)
 {
-    return (double)val;
+    return i >> (j & 0x3f);
 }
 
-EXTERN_C NATIVEAOT_API double REDHAWK_CALLCONV RhpULng2Dbl(uint64_t val)
+EXTERN_C int64_t F_CALL_CONV RhpLLsh(int64_t i, int32_t j)
 {
-    return (double)val;
+    return i << (j & 0x3f);
 }
 
 #endif // HOST_ARM
+
+#ifdef HOST_X86
+
+#undef min
+#undef max
+#include <cmath>
+
+FCIMPL1_D(double, acos, double x)
+    return std::acos(x);
+FCIMPLEND
+
+FCIMPL1_F(float, acosf, float x)
+    return std::acosf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, acosh, double x)
+    return std::acosh(x);
+FCIMPLEND
+
+FCIMPL1_F(float, acoshf, float x)
+    return std::acoshf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, asin, double x)
+    return std::asin(x);
+FCIMPLEND
+
+FCIMPL1_F(float, asinf, float x)
+    return std::asinf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, asinh, double x)
+    return std::asinh(x);
+FCIMPLEND
+
+FCIMPL1_F(float, asinhf, float x)
+    return std::asinhf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, atan, double x)
+    return std::atan(x);
+FCIMPLEND
+
+FCIMPL1_F(float, atanf, float x)
+    return std::atanf(x);
+FCIMPLEND
+
+FCIMPL2_DD(double, atan2, double x, double y)
+    return std::atan2(x, y);
+FCIMPLEND
+
+FCIMPL2_FF(float, atan2f, float x, float y)
+    return std::atan2f(x, y);
+FCIMPLEND
+
+FCIMPL1_D(double, atanh, double x)
+    return std::atanh(x);
+FCIMPLEND
+
+FCIMPL1_F(float, atanhf, float x)
+    return std::atanhf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, cbrt, double x)
+    return std::cbrt(x);
+FCIMPLEND
+
+FCIMPL1_F(float, cbrtf, float x)
+    return std::cbrtf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, ceil, double x)
+    return std::ceil(x);
+FCIMPLEND
+
+FCIMPL1_F(float, ceilf, float x)
+    return std::ceilf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, cos, double x)
+    return std::cos(x);
+FCIMPLEND
+
+FCIMPL1_F(float, cosf, float x)
+    return std::cosf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, cosh, double x)
+    return std::cosh(x);
+FCIMPLEND
+
+FCIMPL1_F(float, coshf, float x)
+    return std::coshf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, exp, double x)
+    return std::exp(x);
+FCIMPLEND
+
+FCIMPL1_F(float, expf, float x)
+    return std::expf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, floor, double x)
+    return std::floor(x);
+FCIMPLEND
+
+FCIMPL1_F(float, floorf, float x)
+    return std::floorf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, log, double x)
+    return std::log(x);
+FCIMPLEND
+
+FCIMPL1_F(float, logf, float x)
+    return std::logf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, log2, double x)
+    return std::log2(x);
+FCIMPLEND
+
+FCIMPL1_F(float, log2f, float x)
+    return std::log2f(x);
+FCIMPLEND
+
+FCIMPL1_D(double, log10, double x)
+    return std::log10(x);
+FCIMPLEND
+
+FCIMPL1_F(float, log10f, float x)
+    return std::log10f(x);
+FCIMPLEND
+
+FCIMPL2_DD(double, pow, double x, double y)
+    return std::pow(x, y);
+FCIMPLEND
+
+FCIMPL2_FF(float, powf, float x, float y)
+    return std::powf(x, y);
+FCIMPLEND
+
+FCIMPL1_D(double, sin, double x)
+    return std::sin(x);
+FCIMPLEND
+
+FCIMPL1_F(float, sinf, float x)
+    return std::sinf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, sinh, double x)
+    return std::sinh(x);
+FCIMPLEND
+
+FCIMPL1_F(float, sinhf, float x)
+    return std::sinhf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, sqrt, double x)
+    return std::sqrt(x);
+FCIMPLEND
+
+FCIMPL1_F(float, sqrtf, float x)
+    return std::sqrtf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, tan, double x)
+    return std::tan(x);
+FCIMPLEND
+
+FCIMPL1_F(float, tanf, float x)
+    return std::tanf(x);
+FCIMPLEND
+
+FCIMPL1_D(double, tanh, double x)
+    return std::tanh(x);
+FCIMPLEND
+
+FCIMPL1_F(float, tanhf, float x)
+    return std::tanhf(x);
+FCIMPLEND
+
+FCIMPL2_DD(double, fmod, double x, double y)
+    return std::fmod(x, y);
+FCIMPLEND
+
+FCIMPL2_FF(float, fmodf, float x, float y)
+    return std::fmodf(x, y);
+FCIMPLEND
+
+FCIMPL3_DDD(double, fma, double x, double y, double z)
+    return std::fma(x, y, z);
+FCIMPLEND
+
+FCIMPL3_FFF(float, fmaf, float x, float y, float z)
+    return std::fmaf(x, y, z);
+FCIMPLEND
+
+FCIMPL2_DI(double, modf, double x, double* intptr)
+    return std::modf(x, intptr);
+FCIMPLEND
+
+FCIMPL2_FI(float, modff, float x, float* intptr)
+    return std::modff(x, intptr);
+FCIMPLEND
+
+#endif
diff --git a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp
index eacc90297a69..c5bbcc228427 100644
--- a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp
@@ -37,13 +37,14 @@
 #include "RhConfig.h"
 #include <minipal/cpuid.h>
 
-COOP_PINVOKE_HELPER(void, RhDebugBreak, ())
+FCIMPL0(void, RhDebugBreak)
 {
     PalDebugBreak();
 }
+FCIMPLEND
 
 // Busy spin for the given number of iterations.
-EXTERN_C NATIVEAOT_API void __cdecl RhSpinWait(int32_t iterations)
+EXTERN_C void QCALLTYPE RhSpinWait(int32_t iterations)
 {
     ASSERT(iterations > 0);
 
@@ -56,7 +57,7 @@ EXTERN_C NATIVEAOT_API void __cdecl RhSpinWait(int32_t iterations)
 }
 
 // Yield the cpu to another thread ready to process, if one is available.
-EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhYield()
+EXTERN_C UInt32_BOOL QCALLTYPE RhYield()
 {
     // This must be called via p/invoke -- it's a wait operation and we don't want to block thread suspension on this.
     ASSERT_MSG(!ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode(),
@@ -65,7 +66,7 @@ EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhYield()
     return PalSwitchToThread();
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhFlushProcessWriteBuffers()
+EXTERN_C void QCALLTYPE RhFlushProcessWriteBuffers()
 {
     // This must be called via p/invoke -- it's a wait operation and we don't want to block thread suspension on this.
     ASSERT_MSG(!ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode(),
@@ -81,7 +82,7 @@ EXTERN_C NATIVEAOT_API void __cdecl RhFlushProcessWriteBuffers()
 // modules are available based on the return count. It is also possible to call this method without an array,
 // in which case just the module count is returned (note that it's still possible for the module count to
 // increase between calls to this method).
-COOP_PINVOKE_HELPER(uint32_t, RhGetLoadedOSModules, (Array * pResultArray))
+FCIMPL1(uint32_t, RhGetLoadedOSModules, Array * pResultArray)
 {
     // Note that we depend on the fact that this is a COOP helper to make writing into an unpinned array safe.
 
@@ -107,8 +108,9 @@ COOP_PINVOKE_HELPER(uint32_t, RhGetLoadedOSModules, (Array * pResultArray))
 
     return cModules;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(HANDLE, RhGetOSModuleFromPointer, (PTR_VOID pPointerVal))
+FCIMPL1(HANDLE, RhGetOSModuleFromPointer, PTR_VOID pPointerVal)
 {
     ICodeManager * pCodeManager = GetRuntimeInstance()->GetCodeManagerForAddress(pPointerVal);
 
@@ -117,8 +119,9 @@ COOP_PINVOKE_HELPER(HANDLE, RhGetOSModuleFromPointer, (PTR_VOID pPointerVal))
 
     return NULL;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhFindBlob, (TypeManagerHandle *pTypeManagerHandle, uint32_t blobId, uint8_t ** ppbBlob, uint32_t * pcbBlob))
+FCIMPL4(FC_BOOL_RET, RhFindBlob, TypeManagerHandle *pTypeManagerHandle, uint32_t blobId, uint8_t ** ppbBlob, uint32_t * pcbBlob)
 {
     TypeManagerHandle typeManagerHandle = *pTypeManagerHandle;
 
@@ -137,11 +140,13 @@ COOP_PINVOKE_HELPER(FC_BOOL_RET, RhFindBlob, (TypeManagerHandle *pTypeManagerHan
 
     FC_RETURN_BOOL(pBlob != NULL);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void *, RhGetTargetOfUnboxingAndInstantiatingStub, (void * pUnboxStub))
+FCIMPL1(void *, RhGetTargetOfUnboxingAndInstantiatingStub, void * pUnboxStub)
 {
     return GetRuntimeInstance()->GetTargetOfUnboxingAndInstantiatingStub(pUnboxStub);
 }
+FCIMPLEND
 
 #if TARGET_ARM
 //*****************************************************************************
@@ -193,7 +198,7 @@ inline int32_t GetThumb2BlRel24(uint16_t * p)
 
 // Given a pointer to code, find out if this points to an import stub
 // or unboxing stub, and if so, return the address that stub jumps to
-COOP_PINVOKE_HELPER(uint8_t *, RhGetCodeTarget, (uint8_t * pCodeOrg))
+FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg)
 {
     bool unboxingStub = false;
 
@@ -336,15 +341,16 @@ COOP_PINVOKE_HELPER(uint8_t *, RhGetCodeTarget, (uint8_t * pCodeOrg))
 
     return pCodeOrg;
 }
+FCIMPLEND
 
-EXTERN_C NATIVEAOT_API uint64_t __cdecl RhpGetTickCount64()
+EXTERN_C uint64_t QCALLTYPE RhpGetTickCount64()
 {
     return PalGetTickCount64();
 }
 
-EXTERN_C int32_t __cdecl RhpCalculateStackTraceWorker(void* pOutputBuffer, uint32_t outputBufferLength, void* pAddressInCurrentFrame);
+EXTERN_C int32_t QCALLTYPE RhpCalculateStackTraceWorker(void* pOutputBuffer, uint32_t outputBufferLength, void* pAddressInCurrentFrame);
 
-EXTERN_C NATIVEAOT_API int32_t __cdecl RhpGetCurrentThreadStackTrace(void* pOutputBuffer, uint32_t outputBufferLength, void* pAddressInCurrentFrame)
+EXTERN_C int32_t QCALLTYPE RhpGetCurrentThreadStackTrace(void* pOutputBuffer, uint32_t outputBufferLength, void* pAddressInCurrentFrame)
 {
     // This must be called via p/invoke rather than RuntimeImport to make the stack crawlable.
 
@@ -353,7 +359,7 @@ EXTERN_C NATIVEAOT_API int32_t __cdecl RhpGetCurrentThreadStackTrace(void* pOutp
     return RhpCalculateStackTraceWorker(pOutputBuffer, outputBufferLength, pAddressInCurrentFrame);
 }
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhCompareObjectContentsAndPadding, (Object* pObj1, Object* pObj2))
+FCIMPL2(FC_BOOL_RET, RhCompareObjectContentsAndPadding, Object* pObj1, Object* pObj2)
 {
     ASSERT(pObj1->GetMethodTable() == pObj2->GetMethodTable());
     ASSERT(pObj1->GetMethodTable()->IsValueType());
@@ -367,40 +373,58 @@ COOP_PINVOKE_HELPER(FC_BOOL_RET, RhCompareObjectContentsAndPadding, (Object* pOb
     // memcmp is ok in this COOP method as we are comparing structs which are typically small.
     FC_RETURN_BOOL(memcmp(pbFields1, pbFields2, cbFields) == 0);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void*, RhpGetModuleSection, (TypeManagerHandle *pModule, int32_t headerId, int32_t* length))
+FCIMPL3(void*, RhpGetModuleSection, TypeManagerHandle *pModule, int32_t headerId, int32_t* length)
 {
     return pModule->AsTypeManager()->GetModuleSection((ReadyToRunSectionType)headerId, length);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhGetCurrentThreadStackBounds, (PTR_VOID * ppStackLow, PTR_VOID * ppStackHigh))
+FCIMPL2(void, RhGetCurrentThreadStackBounds, PTR_VOID * ppStackLow, PTR_VOID * ppStackHigh)
 {
     ThreadStore::GetCurrentThread()->GetStackBounds(ppStackLow, ppStackHigh);
 }
+FCIMPLEND
 
 // Function to call when a thread is detached from the runtime
 ThreadExitCallback g_threadExitCallback;
 
-COOP_PINVOKE_HELPER(void, RhSetThreadExitCallback, (void * pCallback))
+FCIMPL1(void, RhSetThreadExitCallback, void * pCallback)
 {
     g_threadExitCallback = (ThreadExitCallback)pCallback;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int32_t, RhGetProcessCpuCount, ())
+FCIMPL0(int32_t, RhGetProcessCpuCount)
 {
     return PalGetProcessCpuCount();
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(uint32_t, RhGetKnobValues, (char *** pResultKeys, char *** pResultValues))
+FCIMPL2(uint32_t, RhGetKnobValues, char *** pResultKeys, char *** pResultValues)
 {
     *pResultKeys = g_pRhConfig->GetKnobNames();
     *pResultValues = g_pRhConfig->GetKnobValues();
     return g_pRhConfig->GetKnobCount();
 }
+FCIMPLEND
 
 #if defined(TARGET_X86) || defined(TARGET_AMD64)
-EXTERN_C NATIVEAOT_API void __cdecl RhCpuIdEx(int* cpuInfo, int functionId, int subFunctionId)
+EXTERN_C void QCALLTYPE RhCpuIdEx(int* cpuInfo, int functionId, int subFunctionId)
 {
     __cpuidex(cpuInfo, functionId, subFunctionId);
 }
 #endif
+
+FCIMPL3(int32_t, RhpLockCmpXchg32, int32_t * location, int32_t value, int32_t comparand)
+{
+    return PalInterlockedCompareExchange(location, value, comparand);
+}
+FCIMPLEND
+
+FCIMPL3_ILL(int64_t, RhpLockCmpXchg64, int64_t * location, int64_t value, int64_t comparand)
+{
+    return PalInterlockedCompareExchange64(location, value, comparand);
+}
+FCIMPLEND
diff --git a/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt
index bf24b0f05d9b..4eb224818192 100644
--- a/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt
+++ b/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt
@@ -1,6 +1,6 @@
 project(PortableRuntime)
 
-# Portable version of the runtime is designed to be used with CppCodeGen or WASM.
+# Portable version of the runtime is designed to be used with WASM.
 # It should be written in pure C/C++, with no assembly code.
 
 add_definitions(-DUSE_PORTABLE_HELPERS)
diff --git a/src/coreclr/nativeaot/Runtime/RestrictedCallouts.h b/src/coreclr/nativeaot/Runtime/RestrictedCallouts.h
index 40eaf8395bae..2c1a2e61e095 100644
--- a/src/coreclr/nativeaot/Runtime/RestrictedCallouts.h
+++ b/src/coreclr/nativeaot/Runtime/RestrictedCallouts.h
@@ -97,6 +97,6 @@ class RestrictedCallouts
     static CrstStatic s_sLock;
 
     // Prototypes for the callouts.
-    typedef void (REDHAWK_CALLCONV * GcRestrictedCallbackFunction)(uint32_t uiCondemnedGeneration);
-    typedef CLR_BOOL (REDHAWK_CALLCONV * HandleTableRestrictedCallbackFunction)(Object * pObject);
+    typedef void (F_CALL_CONV * GcRestrictedCallbackFunction)(uint32_t uiCondemnedGeneration);
+    typedef CLR_BOOL (* HandleTableRestrictedCallbackFunction)(Object * pObject);
 };
diff --git a/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp b/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp
index fedc3989eba7..708356c2e3a9 100644
--- a/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp
+++ b/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp
@@ -43,27 +43,30 @@ ThreadStore *   RuntimeInstance::GetThreadStore()
     return m_pThreadStore;
 }
 
-COOP_PINVOKE_HELPER(uint8_t *, RhGetCrashInfoBuffer, (int32_t* pcbMaxSize))
+FCIMPL1(uint8_t *, RhGetCrashInfoBuffer, int32_t* pcbMaxSize)
 {
     *pcbMaxSize = MAX_CRASHINFOBUFFER_SIZE;
     return g_CrashInfoBuffer;
 }
+FCIMPLEND
 
 #if TARGET_UNIX
 #include "PalCreateDump.h"
-COOP_PINVOKE_HELPER(void, RhCreateCrashDumpIfEnabled, (PEXCEPTION_RECORD pExceptionRecord, PCONTEXT pExContext))
+FCIMPL2(void, RhCreateCrashDumpIfEnabled, PEXCEPTION_RECORD pExceptionRecord, PCONTEXT pExContext)
 {
     PalCreateCrashDumpIfEnabled(pExceptionRecord, pExContext);
 }
+FCIMPLEND
 #endif
 
-COOP_PINVOKE_HELPER(uint8_t *, RhGetRuntimeVersion, (int32_t* pcbLength))
+FCIMPL1(uint8_t *, RhGetRuntimeVersion, int32_t* pcbLength)
 {
     *pcbLength = sizeof(CLR_PRODUCT_VERSION) - 1;           // don't include the terminating null
     return (uint8_t*)&CLR_PRODUCT_VERSION;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(uint8_t *, RhFindMethodStartAddress, (void * codeAddr))
+FCIMPL1(uint8_t *, RhFindMethodStartAddress, void * codeAddr)
 {
     uint8_t *startAddress = dac_cast<uint8_t *>(GetRuntimeInstance()->FindMethodStartAddress(dac_cast<PTR_VOID>(codeAddr)));
 #if TARGET_ARM
@@ -72,6 +75,7 @@ COOP_PINVOKE_HELPER(uint8_t *, RhFindMethodStartAddress, (void * codeAddr))
     return startAddress;
 #endif
 }
+FCIMPLEND
 
 PTR_uint8_t RuntimeInstance::FindMethodStartAddress(PTR_VOID ControlPC)
 {
@@ -269,15 +273,16 @@ bool RuntimeInstance::RegisterTypeManager(TypeManager * pTypeManager)
     return true;
 }
 
-COOP_PINVOKE_HELPER(TypeManagerHandle, RhpCreateTypeManager, (HANDLE osModule, void* pModuleHeader, PTR_PTR_VOID pClasslibFunctions, uint32_t nClasslibFunctions))
+FCIMPL4(TypeManagerHandle, RhpCreateTypeManager, HANDLE osModule, void* pModuleHeader, PTR_PTR_VOID pClasslibFunctions, uint32_t nClasslibFunctions)
 {
     TypeManager * typeManager = TypeManager::Create(osModule, pModuleHeader, pClasslibFunctions, nClasslibFunctions);
     GetRuntimeInstance()->RegisterTypeManager(typeManager);
 
     return TypeManagerHandle::Create(typeManager);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void*, RhpRegisterOsModule, (HANDLE hOsModule))
+FCIMPL1(void*, RhpRegisterOsModule, HANDLE hOsModule)
 {
     RuntimeInstance::OsModuleEntry * pEntry = new (nothrow) RuntimeInstance::OsModuleEntry();
     if (NULL == pEntry)
@@ -289,6 +294,7 @@ COOP_PINVOKE_HELPER(void*, RhpRegisterOsModule, (HANDLE hOsModule))
 
     return hOsModule; // Return non-null on success
 }
+FCIMPLEND
 
 RuntimeInstance::TypeManagerList& RuntimeInstance::GetTypeManagerList()
 {
@@ -344,9 +350,9 @@ bool RuntimeInstance::ShouldHijackCallsiteForGcStress(uintptr_t CallsiteIP)
 }
 
 #ifdef FEATURE_CACHED_INTERFACE_DISPATCH
-EXTERN_C void RhpInitialDynamicInterfaceDispatch();
+EXTERN_C void F_CALL_CONV RhpInitialDynamicInterfaceDispatch();
 
-COOP_PINVOKE_HELPER(void *, RhNewInterfaceDispatchCell, (MethodTable * pInterface, int32_t slotNumber))
+FCIMPL2(void *, RhNewInterfaceDispatchCell, MethodTable * pInterface, int32_t slotNumber)
 {
     InterfaceDispatchCell * pCell = new (nothrow) InterfaceDispatchCell[2];
     if (pCell == NULL)
@@ -364,6 +370,7 @@ COOP_PINVOKE_HELPER(void *, RhNewInterfaceDispatchCell, (MethodTable * pInterfac
 
     return pCell;
 }
+FCIMPLEND
 #endif // FEATURE_CACHED_INTERFACE_DISPATCH
 
 #endif // DACCESS_COMPILE
diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
index 55a32b3d7ce2..ce0cfab5b78b 100644
--- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
+++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
@@ -41,9 +41,6 @@ EXTERN_C CODE_LOCATION ReturnFromUniversalTransition;
 EXTERN_C CODE_LOCATION ReturnFromUniversalTransition_DebugStepTailCall;
 #endif
 
-#ifdef TARGET_X86
-EXTERN_C CODE_LOCATION RhpCallFunclet2;
-#endif
 EXTERN_C CODE_LOCATION RhpCallCatchFunclet2;
 EXTERN_C CODE_LOCATION RhpCallFinallyFunclet2;
 EXTERN_C CODE_LOCATION RhpCallFilterFunclet2;
@@ -781,20 +778,6 @@ void StackFrameIterator::UnwindFuncletInvokeThunk()
 
     PTR_uintptr_t SP;
 
-#ifdef TARGET_X86
-    // First, unwind RhpCallFunclet
-    SP = (PTR_uintptr_t)(m_RegDisplay.SP + 0x4);   // skip the saved assembly-routine-EBP
-    m_RegDisplay.SetIP(*SP++);
-    m_RegDisplay.SetSP((uintptr_t)dac_cast<TADDR>(SP));
-    SetControlPC(dac_cast<PTR_VOID>(m_RegDisplay.GetIP()));
-
-    ASSERT(
-        EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2) ||
-        EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallFinallyFunclet2) ||
-        EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallFilterFunclet2)
-        );
-#endif
-
     bool isFilterInvoke = EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallFilterFunclet2);
 
 #if defined(UNIX_AMD64_ABI)
@@ -878,7 +861,7 @@ void StackFrameIterator::UnwindFuncletInvokeThunk()
     m_RegDisplay.pR15 = SP++;
 
 #elif defined(TARGET_X86)
-    SP = (PTR_uintptr_t)(m_RegDisplay.SP);
+    SP = (PTR_uintptr_t)(m_RegDisplay.SP + 0x4);   // skip the saved assembly-routine-EBP
 
     if (!isFilterInvoke)
     {
@@ -1696,6 +1679,13 @@ void StackFrameIterator::CalculateCurrentMethodState()
     m_effectiveSafePointAddress = m_ControlPC;
     m_FramePointer = GetCodeManager()->GetFramePointer(&m_methodInfo, &m_RegDisplay);
 
+#ifdef TARGET_X86
+    if (m_dwFlags & UpdateResumeSp)
+    {
+        m_RegDisplay.ResumeSP = GetCodeManager()->GetResumeSp(&m_methodInfo, &m_RegDisplay);
+    }
+#endif
+
     m_dwFlags |= MethodStateCalculated;
 }
 
@@ -1811,25 +1801,6 @@ StackFrameIterator::ReturnAddressCategory StackFrameIterator::CategorizeUnadjust
         return InThrowSiteThunk;
     }
 
-#ifdef TARGET_X86
-    if (EQUALS_RETURN_ADDRESS(returnAddress, RhpCallFunclet2))
-    {
-        PORTABILITY_ASSERT("CategorizeUnadjustedReturnAddress");
-#if 0
-        // See if it is a filter funclet based on the caller of RhpCallFunclet
-        PTR_uintptr_t SP = (PTR_uintptr_t)(m_RegDisplay.SP + 0x4);   // skip the saved assembly-routine-EBP
-        PTR_uintptr_t ControlPC = *SP++;
-        if (EQUALS_RETURN_ADDRESS(ControlPC, RhpCallFilterFunclet2))
-        {
-            return InFilterFuncletInvokeThunk;
-        }
-        else
-#endif
-        {
-            return InFuncletInvokeThunk;
-        }
-    }
-#else // TARGET_X86
     if (EQUALS_RETURN_ADDRESS(returnAddress, RhpCallCatchFunclet2) ||
         EQUALS_RETURN_ADDRESS(returnAddress, RhpCallFinallyFunclet2))
     {
@@ -1840,14 +1811,13 @@ StackFrameIterator::ReturnAddressCategory StackFrameIterator::CategorizeUnadjust
     {
         return InFilterFuncletInvokeThunk;
     }
-#endif // TARGET_X86
     return InManagedCode;
 #endif // defined(USE_PORTABLE_HELPERS)
 }
 
 #ifndef DACCESS_COMPILE
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhpSfiInit, (StackFrameIterator* pThis, PAL_LIMITED_CONTEXT* pStackwalkCtx, CLR_BOOL instructionFault, CLR_BOOL* pfIsExceptionIntercepted))
+bool StackFrameIterator::Init(PAL_LIMITED_CONTEXT* pStackwalkCtx, bool instructionFault)
 {
     Thread * pCurThread = ThreadStore::GetCurrentThread();
 
@@ -1859,41 +1829,38 @@ COOP_PINVOKE_HELPER(FC_BOOL_RET, RhpSfiInit, (StackFrameIterator* pThis, PAL_LIM
 
     // Passing NULL is a special-case to request a standard managed stack trace for the current thread.
     if (pStackwalkCtx == NULL)
-        pThis->InternalInitForStackTrace();
+        InternalInitForStackTrace();
     else
-        pThis->InternalInitForEH(pCurThread, pStackwalkCtx, instructionFault);
+        InternalInitForEH(pCurThread, pStackwalkCtx, instructionFault);
 
-    bool isValid = pThis->IsValid();
+    bool isValid = IsValid();
     if (isValid)
-        pThis->CalculateCurrentMethodState();
-
-    if (pfIsExceptionIntercepted)
-    {
-        *pfIsExceptionIntercepted = false;
-    }
+        CalculateCurrentMethodState();
 
-    FC_RETURN_BOOL(isValid);
+    return isValid;
 }
 
-COOP_PINVOKE_HELPER(FC_BOOL_RET, RhpSfiNext, (StackFrameIterator* pThis, uint32_t* puExCollideClauseIdx, CLR_BOOL* pfUnwoundReversePInvoke, CLR_BOOL* pfIsExceptionIntercepted))
+bool StackFrameIterator::Next(uint32_t* puExCollideClauseIdx, bool* pfUnwoundReversePInvoke)
 {
+    Thread * pCurThread = ThreadStore::GetCurrentThread();
+
     // The stackwalker is intolerant to hijacked threads, as it is largely expecting to be called from C++
     // where the hijack state of the thread is invariant.  Because we've exposed the iterator out to C#, we
     // need to unhijack every time we callback into C++ because the thread could have been hijacked during our
     // time executing C#.
-    ThreadStore::GetCurrentThread()->Unhijack();
+    pCurThread->Unhijack();
 
     const uint32_t MaxTryRegionIdx = 0xFFFFFFFF;
 
-    ExInfo * pCurExInfo = pThis->m_pNextExInfo;
-    pThis->Next();
-    bool isValid = pThis->IsValid();
+    ExInfo * pCurExInfo = m_pNextExInfo;
+    Next();
+    bool isValid = IsValid();
     if (isValid)
-        pThis->CalculateCurrentMethodState();
+        CalculateCurrentMethodState();
 
     if (puExCollideClauseIdx != NULL)
     {
-        if (pThis->m_dwFlags & StackFrameIterator::ExCollide)
+        if (m_dwFlags & StackFrameIterator::ExCollide)
         {
             ASSERT(pCurExInfo->m_idxCurClause != MaxTryRegionIdx);
             *puExCollideClauseIdx = pCurExInfo->m_idxCurClause;
@@ -1907,9 +1874,29 @@ COOP_PINVOKE_HELPER(FC_BOOL_RET, RhpSfiNext, (StackFrameIterator* pThis, uint32_
 
     if (pfUnwoundReversePInvoke != NULL)
     {
-        *pfUnwoundReversePInvoke = (pThis->m_dwFlags & StackFrameIterator::UnwoundReversePInvoke) != 0;
+        *pfUnwoundReversePInvoke = (m_dwFlags & StackFrameIterator::UnwoundReversePInvoke) != 0;
     }
 
+    return isValid;
+}
+
+FCIMPL4(FC_BOOL_RET, RhpSfiInit, StackFrameIterator* pThis, PAL_LIMITED_CONTEXT* pStackwalkCtx, CLR_BOOL instructionFault, CLR_BOOL* pfIsExceptionIntercepted)
+{
+    bool isValid = pThis->Init(pStackwalkCtx, instructionFault);
+
+    if (pfIsExceptionIntercepted)
+    {
+        *pfIsExceptionIntercepted = false;
+    }
+
+    FC_RETURN_BOOL(isValid);
+}
+FCIMPLEND
+
+FCIMPL4(FC_BOOL_RET, RhpSfiNext, StackFrameIterator* pThis, uint32_t* puExCollideClauseIdx, CLR_BOOL* pfUnwoundReversePInvoke, CLR_BOOL* pfIsExceptionIntercepted)
+{
+    bool isValid = pThis->Next(puExCollideClauseIdx, pfUnwoundReversePInvoke);
+
     if (pfIsExceptionIntercepted)
     {
         *pfIsExceptionIntercepted = false;
@@ -1917,5 +1904,6 @@ COOP_PINVOKE_HELPER(FC_BOOL_RET, RhpSfiNext, (StackFrameIterator* pThis, uint32_
 
     FC_RETURN_BOOL(isValid);
 }
+FCIMPLEND
 
 #endif // !DACCESS_COMPILE
diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h
index d102cda234d8..cf7f524de8db 100644
--- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h
+++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h
@@ -28,8 +28,6 @@ struct EHEnum
 };
 
 class StackFrameIterator;
-EXTERN_C FC_BOOL_RET FASTCALL RhpSfiInit(StackFrameIterator* pThis, PAL_LIMITED_CONTEXT* pStackwalkCtx, CLR_BOOL instructionFault, CLR_BOOL* pfIsExceptionIntercepted);
-EXTERN_C FC_BOOL_RET FASTCALL RhpSfiNext(StackFrameIterator* pThis, uint32_t* puExCollideClauseIdx, CLR_BOOL* pfUnwoundReversePInvoke, CLR_BOOL* pfIsExceptionIntercepted);
 
 struct PInvokeTransitionFrame;
 typedef DPTR(PInvokeTransitionFrame) PTR_PInvokeTransitionFrame;
@@ -38,8 +36,6 @@ typedef DPTR(PAL_LIMITED_CONTEXT) PTR_PAL_LIMITED_CONTEXT;
 class StackFrameIterator
 {
     friend class AsmOffsets;
-    friend FC_BOOL_RET FASTCALL RhpSfiInit(StackFrameIterator* pThis, PAL_LIMITED_CONTEXT* pStackwalkCtx, CLR_BOOL instructionFault, CLR_BOOL* pfIsExceptionIntercepted);
-    friend FC_BOOL_RET FASTCALL RhpSfiNext(StackFrameIterator* pThis, uint32_t* puExCollideClauseIdx, CLR_BOOL* pfUnwoundReversePInvoke, CLR_BOOL* pfIsExceptionIntercepted);
 
 public:
     StackFrameIterator() {}
@@ -69,6 +65,10 @@ class StackFrameIterator
     bool HasStackRangeToReportConservatively();
     void GetStackRangeToReportConservatively(PTR_OBJECTREF * ppLowerBound, PTR_OBJECTREF * ppUpperBound);
 
+    // Implementations of RhpSfiInit and RhpSfiNext called from managed code
+    bool             Init(PAL_LIMITED_CONTEXT* pStackwalkCtx, bool instructionFault);
+    bool             Next(uint32_t* puExCollideClauseIdx, bool* pfUnwoundReversePInvoke);
+
 private:
     // The invoke of a funclet is a bit special and requires an assembly thunk, but we don't want to break the
     // stackwalk due to this.  So this routine will unwind through the assembly thunks used to invoke funclets.
@@ -148,8 +148,11 @@ class StackFrameIterator
         // When encountering a reverse P/Invoke, unwind directly to the P/Invoke frame using the saved transition frame.
         SkipNativeFrames = 0x80,
 
+        // Set SP to an address that is valid for funclet resumption (x86 only)
+        UpdateResumeSp = 0x100,
+
         GcStackWalkFlags = (CollapseFunclets | RemapHardwareFaultsToSafePoint | SkipNativeFrames),
-        EHStackWalkFlags = ApplyReturnAddressAdjustment,
+        EHStackWalkFlags = (ApplyReturnAddressAdjustment | UpdateResumeSp),
         StackTraceStackWalkFlags = GcStackWalkFlags
     };
 
diff --git a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp
index 025a20ab9176..d22f30e19d9e 100644
--- a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp
+++ b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp
@@ -55,42 +55,48 @@ void EncodeThumb2Mov32(uint16_t * pCode, uint32_t value, uint8_t rDestination)
 }
 #endif
 
-COOP_PINVOKE_HELPER(int, RhpGetNumThunkBlocksPerMapping, ())
+FCIMPL0(int, RhpGetNumThunkBlocksPerMapping)
 {
     ASSERT_MSG((THUNKS_MAP_SIZE % OS_PAGE_SIZE) == 0, "Thunks map size should be in multiples of pages");
 
     return THUNKS_MAP_SIZE / OS_PAGE_SIZE;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int, RhpGetNumThunksPerBlock, ())
+FCIMPL0(int, RhpGetNumThunksPerBlock)
 {
     return min(
         OS_PAGE_SIZE / THUNK_SIZE,                              // Number of thunks that can fit in a page
         (OS_PAGE_SIZE - POINTER_SIZE) / (POINTER_SIZE * 2)      // Number of pointer pairs, minus the jump stub cell, that can fit in a page
     );
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int, RhpGetThunkSize, ())
+FCIMPL0(int, RhpGetThunkSize)
 {
     return THUNK_SIZE;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void*, RhpGetThunkDataBlockAddress, (void* pThunkStubAddress))
+FCIMPL1(void*, RhpGetThunkDataBlockAddress, void* pThunkStubAddress)
 {
     return (void*)(((uintptr_t)pThunkStubAddress & ~(OS_PAGE_SIZE - 1)) + THUNKS_MAP_SIZE);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void*, RhpGetThunkStubsBlockAddress, (void* pThunkDataAddress))
+FCIMPL1(void*, RhpGetThunkStubsBlockAddress, void* pThunkDataAddress)
 {
     return (void*)(((uintptr_t)pThunkDataAddress & ~(OS_PAGE_SIZE - 1)) - THUNKS_MAP_SIZE);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int, RhpGetThunkBlockSize, ())
+FCIMPL0(int, RhpGetThunkBlockSize)
 {
     return OS_PAGE_SIZE;
 }
+FCIMPLEND
 
-EXTERN_C NATIVEAOT_API void* __cdecl RhAllocateThunksMapping()
+EXTERN_C void* QCALLTYPE RhAllocateThunksMapping()
 {
 #ifdef WIN32
 
@@ -260,13 +266,13 @@ EXTERN_C NATIVEAOT_API void* __cdecl RhAllocateThunksMapping()
 extern "C" uintptr_t g_pThunkStubData;
 uintptr_t g_pThunkStubData = NULL;
 
-COOP_PINVOKE_HELPER(int, RhpGetThunkBlockCount, ());
-COOP_PINVOKE_HELPER(int, RhpGetNumThunkBlocksPerMapping, ());
-COOP_PINVOKE_HELPER(int, RhpGetThunkBlockSize, ());
-COOP_PINVOKE_HELPER(void*, RhpGetThunkDataBlockAddress, (void* addr));
-COOP_PINVOKE_HELPER(void*, RhpGetThunkStubsBlockAddress, (void* addr));
+FCDECL0(int, RhpGetThunkBlockCount);
+FCDECL0(int, RhpGetNumThunkBlocksPerMapping);
+FCDECL0(int, RhpGetThunkBlockSize);
+FCDECL1(void*, RhpGetThunkDataBlockAddress, void* addr);
+FCDECL1(void*, RhpGetThunkStubsBlockAddress, void* addr);
 
-EXTERN_C NATIVEAOT_API void* __cdecl RhAllocateThunksMapping()
+EXTERN_C void* QCALLTYPE RhAllocateThunksMapping()
 {
     static int nextThunkDataMapping = 0;
 
@@ -313,13 +319,13 @@ EXTERN_C NATIVEAOT_API void* __cdecl RhAllocateThunksMapping()
 
 #else // FEATURE_FIXED_POOL_THUNKS
 
-COOP_PINVOKE_HELPER(void*, RhpGetThunksBase, ());
-COOP_PINVOKE_HELPER(int, RhpGetNumThunkBlocksPerMapping, ());
-COOP_PINVOKE_HELPER(int, RhpGetNumThunksPerBlock, ());
-COOP_PINVOKE_HELPER(int, RhpGetThunkSize, ());
-COOP_PINVOKE_HELPER(int, RhpGetThunkBlockSize, ());
+FCDECL0(void*, RhpGetThunksBase);
+FCDECL0(int, RhpGetNumThunkBlocksPerMapping);
+FCDECL0(int, RhpGetNumThunksPerBlock);
+FCDECL0(int, RhpGetThunkSize);
+FCDECL0(int, RhpGetThunkBlockSize);
 
-EXTERN_C NATIVEAOT_API void* __cdecl RhAllocateThunksMapping()
+EXTERN_C void* QCALLTYPE RhAllocateThunksMapping()
 {
     static void* pThunksTemplateAddress = NULL;
 
diff --git a/src/coreclr/nativeaot/Runtime/amd64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/amd64/InteropThunksHelpers.S
index a53dd3c63624..ab7242b89e29 100644
--- a/src/coreclr/nativeaot/Runtime/amd64/InteropThunksHelpers.S
+++ b/src/coreclr/nativeaot/Runtime/amd64/InteropThunksHelpers.S
@@ -38,14 +38,3 @@ LEAF_ENTRY RhGetCommonStubAddress, _TEXT
     lea rax, [rip + C_FUNC(RhCommonStub)]
     ret
 LEAF_END RhGetCommonStubAddress, _TEXT
-
-
-#ifndef FEATURE_EMULATED_TLS
-LEAF_ENTRY RhGetCurrentThunkContext, _TEXT
-
-    INLINE_GET_TLS_VAR  tls_thunkData
-
-    mov    rax, qword ptr [rax]
-    ret
-LEAF_END RhGetCurrentThunkContext, _TEXT
-#endif //FEATURE_EMULATED_TLS
diff --git a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S
index cc740a9c0601..e55e682653b5 100644
--- a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S
+++ b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S
@@ -221,12 +221,8 @@ LEAF_END RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT
 // just one write barrier that assumes the input register is RSI.
 DEFINE_CHECKED_WRITE_BARRIER RSI, ESI
 
-// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
-// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation
-// - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
 LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
     mov             rax, rdx
-ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
     lock cmpxchg    [rdi], rsi
     jne             LOCAL_LABEL(RhpCheckedLockCmpXchg_NoBarrierRequired_RSI)
 
@@ -234,15 +230,11 @@ ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
 
 LEAF_END RhpCheckedLockCmpXchg, _TEXT
 
-// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
-// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedXchgAVLocation
-// - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
 LEAF_ENTRY RhpCheckedXchg, _TEXT
 
     // Setup rax with the new object for the exchange, that way it will automatically hold the correct result
     // afterwards and we can leave rdx unaltered ready for the GC write barrier below.
     mov             rax, rsi
-ALTERNATE_ENTRY RhpCheckedXchgAVLocation
     xchg            [rdi], rax
 
     DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, RSI
@@ -258,7 +250,10 @@ LEAF_END RhpCheckedXchg, _TEXT
 //
 // On exit:
 //      rdi, rsi are incremented by 8,
-//      rcx, r10, r11: trashed
+//      rcx, rax: trashed
+//
+// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+//       if you add more trashed registers.
 //
 // WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
 // - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1/2
@@ -280,16 +275,15 @@ ALTERNATE_ENTRY RhpByRefAssignRefAVLocation2
     UPDATE_GC_SHADOW BASENAME, rcx, rdi
 
 #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
-    mov     r11, [C_VAR(g_write_watch_table)]
-    cmp     r11, 0x0
+    cmp     qword ptr [C_VAR(g_write_watch_table)], 0x0
     je      LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable)
 
-    mov     r10, rdi
-    shr     r10, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift
-    add     r10, r11
-    cmp     byte ptr [r10], 0x0
+    mov     rax, rdi
+    shr     rax, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift
+    add     rax, [C_VAR(g_write_watch_table)]
+    cmp     byte ptr [rax], 0x0
     jne     LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable)
-    mov     byte ptr [r10], 0xFF
+    mov     byte ptr [rax], 0xFF
 #endif
 
 LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable):
@@ -309,12 +303,12 @@ LOCAL_LABEL(RhpByRefAssignRef_CheckCardTable):
     // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
     // the byte if it hasn't already been done since writes are expensive and impact scaling.
     shr     rcx, 0x0B
-    mov     r10, [C_VAR(g_card_table)]
-    cmp     byte ptr [rcx + r10], 0x0FF
+    mov     rax, [C_VAR(g_card_table)]
+    cmp     byte ptr [rcx + rax], 0x0FF
     je      LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired)
 
 // We get here if it's necessary to update the card table.
-    mov     byte ptr [rcx + r10], 0xFF
+    mov     byte ptr [rcx + rax], 0xFF
 
 #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
     // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already)
diff --git a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm
index 148aa7d1301b..302b9e0a8b1f 100644
--- a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm
+++ b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm
@@ -237,12 +237,8 @@ endm
 ;; just one write barrier that assumes the input register is RDX.
 DEFINE_CHECKED_WRITE_BARRIER RDX, EDX
 
-;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
-;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation
-;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
 LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
     mov             rax, r8
-ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
     lock cmpxchg    [rcx], rdx
     jne             RhpCheckedLockCmpXchg_NoBarrierRequired_RDX
 
@@ -250,15 +246,11 @@ ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
 
 LEAF_END RhpCheckedLockCmpXchg, _TEXT
 
-;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
-;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedXchgAVLocation
-;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
 LEAF_ENTRY RhpCheckedXchg, _TEXT
 
     ;; Setup rax with the new object for the exchange, that way it will automatically hold the correct result
     ;; afterwards and we can leave rdx unaltered ready for the GC write barrier below.
     mov             rax, rdx
-ALTERNATE_ENTRY RhpCheckedXchgAVLocation
     xchg            [rcx], rax
 
     DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, RDX
@@ -274,7 +266,10 @@ LEAF_END RhpCheckedXchg, _TEXT
 ;;
 ;; On exit:
 ;;      rdi, rsi are incremented by 8,
-;;      rcx, r10, r11: trashed
+;;      rcx, rax: trashed
+;;
+;; NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+;;       if you add more trashed registers.
 ;;
 ;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
 ;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1/2
@@ -296,16 +291,15 @@ ALTERNATE_ENTRY RhpByRefAssignRefAVLocation2
     UPDATE_GC_SHADOW BASENAME, rcx, rdi
 
 ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
-    mov     r11, [g_write_watch_table]
-    cmp     r11, 0
+    cmp     [g_write_watch_table], 0
     je      RhpByRefAssignRef_CheckCardTable
 
-    mov     r10, rdi
-    shr     r10, 0Ch ;; SoftwareWriteWatch::AddressToTableByteIndexShift
-    add     r10, r11
-    cmp     byte ptr [r10], 0
+    mov     rax, rdi
+    shr     rax, 0Ch ;; SoftwareWriteWatch::AddressToTableByteIndexShift
+    add     rax, [g_write_watch_table]
+    cmp     byte ptr [rax], 0
     jne     RhpByRefAssignRef_CheckCardTable
-    mov     byte ptr [r10], 0FFh
+    mov     byte ptr [rax], 0FFh
 endif
 
 RhpByRefAssignRef_CheckCardTable:
@@ -325,12 +319,12 @@ RhpByRefAssignRef_CheckCardTable:
     ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
     ;; the byte if it hasn't already been done since writes are expensive and impact scaling.
     shr     rcx, 0Bh
-    mov     r10, [g_card_table]
-    cmp     byte ptr [rcx + r10], 0FFh
+    mov     rax, [g_card_table]
+    cmp     byte ptr [rcx + rax], 0FFh
     je      RhpByRefAssignRef_NoBarrierRequired
 
 ;; We get here if it's necessary to update the card table.
-    mov     byte ptr [rcx + r10], 0FFh
+    mov     byte ptr [rcx + rax], 0FFh
 
 ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
     ;; Shift rcx by 0Ah more to get the card bundle byte (we shifted by 0Bh already)
diff --git a/src/coreclr/nativeaot/Runtime/arm/Interlocked.S b/src/coreclr/nativeaot/Runtime/arm/Interlocked.S
deleted file mode 100644
index 631731c7e3a3..000000000000
--- a/src/coreclr/nativeaot/Runtime/arm/Interlocked.S
+++ /dev/null
@@ -1,102 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-.syntax unified
-.thumb
-
-#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
-#include <unixasmmacros.inc>
-
-// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular:
-// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg8AVLocation
-// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
-// r0 = destination address
-// r1 = value
-// r2 = comparand
-LEAF_ENTRY RhpLockCmpXchg8, _TEXT
-          dmb
-GLOBAL_LABEL RhpLockCmpXchg8AVLocation
-LOCAL_LABEL(CmpXchg8Retry):
-          ldrexb       r3, [r0]
-          cmp          r2, r3
-          bne          LOCAL_LABEL(CmpXchg8Exit)
-          strexb       r12, r1, [r0]
-          cmp          r12, #0
-          bne          LOCAL_LABEL(CmpXchg8Retry)
-LOCAL_LABEL(CmpXchg8Exit):
-          mov          r0, r3
-          dmb
-          bx           lr
-LEAF_END RhpLockCmpXchg8, _TEXT
-
-// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular:
-// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg16AVLocation
-// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
-// r0 = destination address
-// r1 = value
-// r2 = comparand
-LEAF_ENTRY RhpLockCmpXchg16, _TEXT
-          uxth         r2, r2
-          dmb
-GLOBAL_LABEL RhpLockCmpXchg16AVLocation
-LOCAL_LABEL(CmpXchg16Retry):
-          ldrexh       r3, [r0]
-          cmp          r2, r3
-          bne          LOCAL_LABEL(CmpXchg16Exit)
-          strexh       r12, r1, [r0]
-          cmp          r12, #0
-          bne          LOCAL_LABEL(CmpXchg16Retry)
-LOCAL_LABEL(CmpXchg16Exit):
-          sxth         r0, r3
-          dmb
-          bx           lr
-LEAF_END RhpLockCmpXchg16, _TEXT
-
-// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular:
-// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg32AVLocation
-// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
-// r0 = destination address
-// r1 = value
-// r2 = comparand
-LEAF_ENTRY RhpLockCmpXchg32, _TEXT
-          dmb
-GLOBAL_LABEL RhpLockCmpXchg32AVLocation
-LOCAL_LABEL(CmpXchg32Retry):
-          ldrex        r3, [r0]
-          cmp          r2, r3
-          bne          LOCAL_LABEL(CmpXchg32Exit)
-          strex        r12, r1, [r0]
-          cmp          r12, #0
-          bne          LOCAL_LABEL(CmpXchg32Retry)
-LOCAL_LABEL(CmpXchg32Exit):
-          mov          r0, r3
-          dmb
-          bx           lr
-LEAF_END RhpLockCmpXchg32, _TEXT
-
-// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular:
-// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg64AVLocation
-// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
-// r0      = destination address
-// {r2,r3} = value
-// sp[0+8] = comparand
-LEAF_ENTRY RhpLockCmpXchg64, _TEXT
-GLOBAL_LABEL RhpLockCmpXchg64AVLocation
-          ldr          r12, [r0]        // dummy read for null check
-          PROLOG_PUSH  "{r4-r6,lr}"
-          dmb
-          ldrd         r4, r5, [sp,#0x10]
-LOCAL_LABEL(CmpXchg64Retry):
-          ldrexd       r6, r1, [r0]
-          cmp          r6, r4
-          bne          LOCAL_LABEL(CmpXchg64Exit)
-          cmp          r1, r5
-          bne          LOCAL_LABEL(CmpXchg64Exit)
-          strexd       r12, r2, r3, [r0]
-          cmp          r12, #0
-          bne          LOCAL_LABEL(CmpXchg64Retry)
-LOCAL_LABEL(CmpXchg64Exit):
-          mov          r0, r6
-          dmb
-          EPILOG_POP   "{r4-r6,pc}"
-LEAF_END RhpLockCmpXchg64, _TEXT
diff --git a/src/coreclr/nativeaot/Runtime/arm/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/arm/InteropThunksHelpers.S
index eec5523d6750..dc2babe8b6ab 100644
--- a/src/coreclr/nativeaot/Runtime/arm/InteropThunksHelpers.S
+++ b/src/coreclr/nativeaot/Runtime/arm/InteropThunksHelpers.S
@@ -22,7 +22,11 @@ NESTED_ENTRY RhCommonStub, _TEXT, NoHandler
 
           mov          r4, r12
 
+#ifdef FEATURE_EMULATED_TLS
+          bl           C_FUNC(RhpGetThunkData)
+#else
           INLINE_GET_TLS_VAR  tls_thunkData
+#endif
 
           // r0 = base address of TLS data
           // r4 = address of context cell in thunk's data
@@ -46,15 +50,3 @@ LEAF_ENTRY RhGetCommonStubAddress, _TEXT
           bx            lr
 LEAF_END RhGetCommonStubAddress, _TEXT
 
-//
-// IntPtr RhGetCurrentThunkContext()
-//
-LEAF_ENTRY RhGetCurrentThunkContext, _TEXT
-
-          PROLOG_PUSH   "{r12, lr}"
-
-          INLINE_GET_TLS_VAR  tls_thunkData
-
-          ldr           r0, [r0]
-          EPILOG_POP    "{r12, pc}"
-LEAF_END RhGetCurrentThunkContext, _TEXT
diff --git a/src/coreclr/nativeaot/Runtime/arm/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/arm/WriteBarriers.S
index 3f7a10a85925..3bb862231a34 100644
--- a/src/coreclr/nativeaot/Runtime/arm/WriteBarriers.S
+++ b/src/coreclr/nativeaot/Runtime/arm/WriteBarriers.S
@@ -250,9 +250,6 @@ LEAF_END RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT
 // just one write barrier that assumes the input register is RSI.
 DEFINE_CHECKED_WRITE_BARRIER r1, r1
 
-// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
-// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation
-// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
 // r0 = destination address
 // r1 = value
 // r2 = comparand
@@ -261,7 +258,6 @@ LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
           // barrier must occur before the object reference update, so we have to do it unconditionally even
           // though the update may fail below.
           dmb
-GLOBAL_LABEL RhpCheckedLockCmpXchgAVLocation
 LOCAL_LABEL(RhpCheckedLockCmpXchgRetry):
           ldrex        r3, [r0]
           cmp          r2, r3
@@ -277,16 +273,12 @@ LOCAL_LABEL(RhpCheckedLockCmpXchgRetry):
           bx           lr
 LEAF_END RhpCheckedLockCmpXchg, _TEXT
 
-// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
-// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedXchgAVLocation
-// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
 // r0 = destination address
 // r1 = value
 LEAF_ENTRY RhpCheckedXchg, _TEXT
           // To implement our chosen memory model for ARM we insert a memory barrier at GC write barriers. This
           // barrier must occur before the object reference update.
           dmb
-GLOBAL_LABEL RhpCheckedXchgAVLocation
 LOCAL_LABEL(RhpCheckedXchgRetry):
           ldrex        r2, [r0]
           strex        r3, r1, [r0]
diff --git a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S
index 79ffed2b0521..966b052a2b9f 100644
--- a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S
+++ b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S
@@ -90,9 +90,11 @@ LOCAL_LABEL(RhpNewFast_RarePath):
         // Set the new objects MethodTable pointer on success.
         cbz         x0, LOCAL_LABEL(NewOutOfMemory)
 
+        .cfi_remember_state
         POP_COOP_PINVOKE_FRAME
         EPILOG_RETURN
 
+        .cfi_restore_state
 LOCAL_LABEL(NewOutOfMemory):
         // This is the OOM failure path. We are going to tail-call to a managed helper that will throw
         // an out of memory exception that the caller of this allocator understands.
@@ -262,9 +264,11 @@ LOCAL_LABEL(RhpNewArray_Rare):
         // Set the new objects MethodTable pointer and length on success.
         cbz         x0, LOCAL_LABEL(ArrayOutOfMemory)
 
+        .cfi_remember_state
         POP_COOP_PINVOKE_FRAME
         EPILOG_RETURN
 
+        .cfi_restore_state
 LOCAL_LABEL(ArrayOutOfMemory):
         // This is the OOM failure path. We are going to tail-call to a managed helper that will throw
         // an out of memory exception that the caller of this allocator understands.
diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S
index abe7555b7611..8075335ea0b2 100644
--- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S
+++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S
@@ -146,8 +146,11 @@ NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler
     ldr         x2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
     tbnz        x2, #PTFF_THREAD_ABORT_BIT, LOCAL_LABEL(ThrowThreadAbort)
 
+    .cfi_remember_state
     POP_PROBE_FRAME
     EPILOG_RETURN
+
+    .cfi_restore_state
 LOCAL_LABEL(ThrowThreadAbort):
     POP_PROBE_FRAME
     mov w0, #STATUS_REDHAWK_THREAD_ABORT
diff --git a/src/coreclr/nativeaot/Runtime/arm64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/arm64/InteropThunksHelpers.S
index f474c4fa4017..31d151d4890f 100644
--- a/src/coreclr/nativeaot/Runtime/arm64/InteropThunksHelpers.S
+++ b/src/coreclr/nativeaot/Runtime/arm64/InteropThunksHelpers.S
@@ -50,19 +50,3 @@ POINTER_SIZE = 0x08
         PREPARE_EXTERNAL_VAR RhCommonStub, x0
         ret
     LEAF_END RhGetCommonStubAddress, _TEXT
-
-
-#ifndef FEATURE_EMULATED_TLS
-    //
-    // IntPtr RhGetCurrentThunkContext()
-    //
-    LEAF_ENTRY RhGetCurrentThunkContext, _TEXT
-
-        INLINE_GET_TLS_VAR x1, C_FUNC(tls_thunkData)
-
-        ldr x0, [x1]
-
-        ret
-
-    LEAF_END RhGetCurrentThunkContext, _TEXT
-#endif //FEATURE_EMULATED_TLS
diff --git a/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S
index 835466c3b9e7..474509ea587f 100644
--- a/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S
+++ b/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S
@@ -192,6 +192,9 @@
 //   x15  : trashed
 //   x12, x17  : trashed
 //
+//   NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+//         if you add more trashed registers.
+//
 // WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
 // - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1
 // - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
@@ -299,7 +302,6 @@ LEAF_END RhpAssignRef, _TEXT
 #endif
 
         mov    x10, x2
-    ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
         casal  x10, x1, [x0]                  // exchange
         cmp    x2, x10
         bne    LOCAL_LABEL(CmpXchgNoUpdate)
@@ -308,7 +310,6 @@ LEAF_END RhpAssignRef, _TEXT
         b      LOCAL_LABEL(DoCardsCmpXchg)
 LOCAL_LABEL(CmpXchgRetry):
         // Check location value is what we expect.
-    ALTERNATE_ENTRY  RhpCheckedLockCmpXchgAVLocation2
         ldaxr   x10, [x0]
         cmp     x10, x2
         bne     LOCAL_LABEL(CmpXchgNoUpdate)
@@ -362,14 +363,12 @@ LOCAL_LABEL(NoBarrierCmpXchg):
         tbz    w16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, LOCAL_LABEL(ExchangeRetry)
 #endif
 
-    ALTERNATE_ENTRY  RhpCheckedXchgAVLocation
         swpal  x1, x10, [x0]                   // exchange
 
 #ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
         b      LOCAL_LABEL(DoCardsXchg)
 LOCAL_LABEL(ExchangeRetry):
         // Read the existing memory location.
-    ALTERNATE_ENTRY  RhpCheckedXchgAVLocation2
         ldaxr   x10,  [x0]
 
         // Attempt to update with the new value.
diff --git a/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.asm b/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.asm
index 26a8ef30387c..5ccccd2a301e 100644
--- a/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.asm
+++ b/src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.asm
@@ -206,6 +206,9 @@ INVALIDGCVALUE  EQU 0xCCCCCCCD
 ;;   x15  : trashed
 ;;   x12, x17  : trashed
 ;;
+;;   NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+;;         if you add more trashed registers.
+;;
 ;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
 ;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1
 ;; - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
@@ -283,10 +286,6 @@ NotInHeap
 ;; Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon
 ;; successful updates.
 
-;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
-;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation
-;; - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
-
 ;; RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand)
 ;;
 ;; Interlocked compare exchange on objectref.
@@ -308,7 +307,6 @@ NotInHeap
 #endif
 
         mov    x10, x2
-    ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
         casal  x10, x1, [x0]                  ;; exchange
         cmp    x2, x10
         bne    CmpXchgNoUpdate
@@ -317,7 +315,6 @@ NotInHeap
         b      DoCardsCmpXchg
 CmpXchgRetry
         ;; Check location value is what we expect.
-    ALTERNATE_ENTRY  RhpCheckedLockCmpXchgAVLocation2
         ldaxr   x10, [x0]
         cmp     x10, x2
         bne     CmpXchgNoUpdate
@@ -347,10 +344,6 @@ NoBarrierCmpXchg
 
     LEAF_END RhpCheckedLockCmpXchg
 
-;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
-;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation
-;; - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address
-
 ;; RhpCheckedXchg(Object** destination, Object* value)
 ;;
 ;; Interlocked exchange on objectref.
@@ -371,14 +364,12 @@ NoBarrierCmpXchg
         tbz    x16, #ARM64_ATOMICS_FEATURE_FLAG_BIT, ExchangeRetry
 #endif
 
-    ALTERNATE_ENTRY  RhpCheckedXchgAVLocation
         swpal  x1, x10, [x0]                   ;; exchange
 
 #ifndef LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT
         b      DoCardsXchg
 ExchangeRetry
         ;; Read the existing memory location.
-    ALTERNATE_ENTRY  RhpCheckedXchgAVLocation2
         ldaxr   x10,  [x0]
 
         ;; Attempt to update with the new value.
diff --git a/src/coreclr/nativeaot/Runtime/disabledeventpipeinternal.cpp b/src/coreclr/nativeaot/Runtime/disabledeventpipeinternal.cpp
index 4f34808b1c56..6c48533b05c1 100644
--- a/src/coreclr/nativeaot/Runtime/disabledeventpipeinternal.cpp
+++ b/src/coreclr/nativeaot/Runtime/disabledeventpipeinternal.cpp
@@ -13,7 +13,7 @@ struct EventPipeEventInstanceData;
 
 struct EventPipeSessionInfo;
 
-EXTERN_C NATIVEAOT_API uint64_t __cdecl RhEventPipeInternal_Enable(
+EXTERN_C uint64_t QCALLTYPE EventPipeInternal_Enable(
     const WCHAR* outputFile,
     EventPipeSerializationFormat format,
     uint32_t circularBufferSizeInMB,
@@ -23,11 +23,11 @@ EXTERN_C NATIVEAOT_API uint64_t __cdecl RhEventPipeInternal_Enable(
     return 0;
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhEventPipeInternal_Disable(uint64_t sessionID)
+EXTERN_C void QCALLTYPE EventPipeInternal_Disable(uint64_t sessionID)
 {
 }
 
-EXTERN_C NATIVEAOT_API intptr_t __cdecl RhEventPipeInternal_CreateProvider(
+EXTERN_C intptr_t QCALLTYPE EventPipeInternal_CreateProvider(
     const WCHAR* providerName,
     EventPipeCallback pCallbackFunc,
     void* pCallbackContext)
@@ -35,7 +35,7 @@ EXTERN_C NATIVEAOT_API intptr_t __cdecl RhEventPipeInternal_CreateProvider(
     return 0;
 }
 
-EXTERN_C NATIVEAOT_API intptr_t __cdecl RhEventPipeInternal_DefineEvent(
+EXTERN_C intptr_t QCALLTYPE EventPipeInternal_DefineEvent(
     intptr_t provHandle,
     uint32_t eventID,
     int64_t keywords,
@@ -47,21 +47,21 @@ EXTERN_C NATIVEAOT_API intptr_t __cdecl RhEventPipeInternal_DefineEvent(
     return 0;
 }
 
-EXTERN_C NATIVEAOT_API intptr_t __cdecl RhEventPipeInternal_GetProvider(const WCHAR* providerName)
+EXTERN_C intptr_t QCALLTYPE EventPipeInternal_GetProvider(const WCHAR* providerName)
 {
     return 0;
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhEventPipeInternal_DeleteProvider(intptr_t provHandle)
+EXTERN_C void QCALLTYPE EventPipeInternal_DeleteProvider(intptr_t provHandle)
 {
 }
 
-EXTERN_C NATIVEAOT_API int __cdecl RhEventPipeInternal_EventActivityIdControl(uint32_t controlCode, GUID *pActivityId)
+EXTERN_C int QCALLTYPE EventPipeInternal_EventActivityIdControl(uint32_t controlCode, GUID *pActivityId)
 {
     return 0;
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhEventPipeInternal_WriteEventData(
+EXTERN_C void QCALLTYPE EventPipeInternal_WriteEventData(
     intptr_t eventHandle,
     EventData *pEventData,
     uint32_t eventDataCount,
@@ -70,22 +70,22 @@ EXTERN_C NATIVEAOT_API void __cdecl RhEventPipeInternal_WriteEventData(
 {
 }
 
-EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhEventPipeInternal_GetSessionInfo(uint64_t sessionID, EventPipeSessionInfo *pSessionInfo)
+EXTERN_C UInt32_BOOL QCALLTYPE EventPipeInternal_GetSessionInfo(uint64_t sessionID, EventPipeSessionInfo *pSessionInfo)
 {
     return FALSE;
 }
 
-EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhEventPipeInternal_GetNextEvent(uint64_t sessionID, EventPipeEventInstanceData *pInstance)
+EXTERN_C UInt32_BOOL QCALLTYPE EventPipeInternal_GetNextEvent(uint64_t sessionID, EventPipeEventInstanceData *pInstance)
 {
     return FALSE;
 }
 
-EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhEventPipeInternal_SignalSession(uint64_t sessionID)
+EXTERN_C UInt32_BOOL QCALLTYPE EventPipeInternal_SignalSession(uint64_t sessionID)
 {
     return FALSE;
 }
 
-EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhEventPipeInternal_WaitForSessionSignal(uint64_t sessionID, int32_t timeoutMs)
+EXTERN_C UInt32_BOOL QCALLTYPE EventPipeInternal_WaitForSessionSignal(uint64_t sessionID, int32_t timeoutMs)
 {
     return FALSE;
 }
diff --git a/src/coreclr/nativeaot/Runtime/disabledruntimeeventinternal.cpp b/src/coreclr/nativeaot/Runtime/disabledruntimeeventinternal.cpp
index 17a8010ec1cd..f590021e6ff9 100644
--- a/src/coreclr/nativeaot/Runtime/disabledruntimeeventinternal.cpp
+++ b/src/coreclr/nativeaot/Runtime/disabledruntimeeventinternal.cpp
@@ -9,43 +9,43 @@
 
 // We will do a no-op for events in the disabled EventPipe This is similar to the way eventpipe checks if the provider and an event is enabled before firting the event, and no-op otherwise.
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogContentionLockCreated(intptr_t LockID, intptr_t AssociatedObjectID, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogContentionLockCreated(intptr_t LockID, intptr_t AssociatedObjectID, uint16_t ClrInstanceID)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogContentionStart(uint8_t ContentionFlags, uint16_t ClrInstanceID, intptr_t LockID, intptr_t AssociatedObjectID, uint64_t LockOwnerThreadID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogContentionStart(uint8_t ContentionFlags, uint16_t ClrInstanceID, intptr_t LockID, intptr_t AssociatedObjectID, uint64_t LockOwnerThreadID)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogContentionStop(uint8_t ContentionFlags, uint16_t ClrInstanceID, double DurationNs)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogContentionStop(uint8_t ContentionFlags, uint16_t ClrInstanceID, double DurationNs)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkerThreadStart(uint32_t activeWorkerThreadCount, uint32_t retiredWorkerThreadCount, uint16_t clrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadStart(uint32_t activeWorkerThreadCount, uint32_t retiredWorkerThreadCount, uint16_t clrInstanceID)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkerThreadStop(uint32_t ActiveWorkerThreadCount, uint32_t RetiredWorkerThreadCount, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadStop(uint32_t ActiveWorkerThreadCount, uint32_t RetiredWorkerThreadCount, uint16_t ClrInstanceID)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkerThreadWait(uint32_t ActiveWorkerThreadCount, uint32_t RetiredWorkerThreadCount, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadWait(uint32_t ActiveWorkerThreadCount, uint32_t RetiredWorkerThreadCount, uint16_t ClrInstanceID)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolMinMaxThreads(uint16_t MinWorkerThreads, uint16_t MaxWorkerThreads, uint16_t MinIOCompletionThreads, uint16_t MaxIOCompletionThreads, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolMinMaxThreads(uint16_t MinWorkerThreads, uint16_t MaxWorkerThreads, uint16_t MinIOCompletionThreads, uint16_t MaxIOCompletionThreads, uint16_t ClrInstanceID)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentSample(double Throughput, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentSample(double Throughput, uint16_t ClrInstanceID)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentAdjustment(double AverageThroughput, uint32_t NewWorkerThreadCount, uint32_t Reason, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentAdjustment(double AverageThroughput, uint32_t NewWorkerThreadCount, uint32_t Reason, uint16_t ClrInstanceID)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentStats(
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentStats(
     double Duration,
     double Throughput,
     double ThreadPoolWorkerThreadWait,
@@ -60,7 +60,7 @@ EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorker
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolIOEnqueue(
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolIOEnqueue(
     void * NativeOverlapped,
     void * Overlapped,
     bool MultiDequeues,
@@ -68,27 +68,27 @@ EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolIOEnqu
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolIODequeue(void * NativeOverlapped, void * Overlapped, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolIODequeue(void * NativeOverlapped, void * Overlapped, uint16_t ClrInstanceID)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkingThreadCount(uint32_t Count, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkingThreadCount(uint32_t Count, uint16_t ClrInstanceID)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolIOPack(void * NativeOverlapped, void * Overlapped, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolIOPack(void * NativeOverlapped, void * Overlapped, uint16_t ClrInstanceID)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogExceptionThrown(const WCHAR* exceptionTypeName, const WCHAR* exceptionMessage, void* faultingIP, HRESULT hresult)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogExceptionThrown(const WCHAR* exceptionTypeName, const WCHAR* exceptionMessage, void* faultingIP, HRESULT hresult)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogWaitHandleWaitStart(uint8_t WaitSource, intptr_t AssociatedObjectID, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogWaitHandleWaitStart(uint8_t WaitSource, intptr_t AssociatedObjectID, uint16_t ClrInstanceID)
 {
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogWaitHandleWaitStop(uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogWaitHandleWaitStop(uint16_t ClrInstanceID)
 {
 }
 
diff --git a/src/coreclr/nativeaot/Runtime/eventpipe/gen-eventing-event-inc.lst b/src/coreclr/nativeaot/Runtime/eventpipe/gen-eventing-event-inc.lst
index 5abd0e386692..901af659ff84 100644
--- a/src/coreclr/nativeaot/Runtime/eventpipe/gen-eventing-event-inc.lst
+++ b/src/coreclr/nativeaot/Runtime/eventpipe/gen-eventing-event-inc.lst
@@ -46,6 +46,7 @@ GCBulkSurvivingObjectRanges
 GCCreateConcurrentThread_V1
 GCCreateSegment_V1
 GCDecision_V1
+GCDynamicEvent
 GCEnd_V1
 GCFinalizersBegin_V1
 GCFinalizersEnd_V1
diff --git a/src/coreclr/nativeaot/Runtime/eventpipeinternal.cpp b/src/coreclr/nativeaot/Runtime/eventpipeinternal.cpp
index 0e4f52a80b37..3febb68b73e7 100644
--- a/src/coreclr/nativeaot/Runtime/eventpipeinternal.cpp
+++ b/src/coreclr/nativeaot/Runtime/eventpipeinternal.cpp
@@ -43,7 +43,7 @@ struct EventPipeProviderConfigurationNative
     WCHAR *pFilterData;
 };
 
-EXTERN_C NATIVEAOT_API uint64_t __cdecl RhEventPipeInternal_Enable(
+EXTERN_C uint64_t QCALLTYPE EventPipeInternal_Enable(
     const WCHAR* outputFile,
     EventPipeSerializationFormat format,
     uint32_t circularBufferSizeInMB,
@@ -104,12 +104,12 @@ EXTERN_C NATIVEAOT_API uint64_t __cdecl RhEventPipeInternal_Enable(
     return result;
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhEventPipeInternal_Disable(uint64_t sessionID)
+EXTERN_C void QCALLTYPE EventPipeInternal_Disable(uint64_t sessionID)
 {
     ep_disable(sessionID);
 }
 
-EXTERN_C NATIVEAOT_API intptr_t __cdecl RhEventPipeInternal_CreateProvider(
+EXTERN_C intptr_t QCALLTYPE EventPipeInternal_CreateProvider(
     const WCHAR* providerName,
     EventPipeCallback pCallbackFunc,
     void* pCallbackContext)
@@ -120,7 +120,7 @@ EXTERN_C NATIVEAOT_API intptr_t __cdecl RhEventPipeInternal_CreateProvider(
     return reinterpret_cast<intptr_t>(pProvider);
 }
 
-EXTERN_C NATIVEAOT_API intptr_t __cdecl RhEventPipeInternal_DefineEvent(
+EXTERN_C intptr_t QCALLTYPE EventPipeInternal_DefineEvent(
     intptr_t provHandle,
     uint32_t eventID,
     int64_t keywords,
@@ -139,7 +139,7 @@ EXTERN_C NATIVEAOT_API intptr_t __cdecl RhEventPipeInternal_DefineEvent(
     return reinterpret_cast<intptr_t>(pEvent);
 }
 
-EXTERN_C NATIVEAOT_API intptr_t __cdecl RhEventPipeInternal_GetProvider(const WCHAR* providerName)
+EXTERN_C intptr_t QCALLTYPE EventPipeInternal_GetProvider(const WCHAR* providerName)
 {
     EventPipeProvider * provider = NULL;
     if (providerName)
@@ -152,7 +152,7 @@ EXTERN_C NATIVEAOT_API intptr_t __cdecl RhEventPipeInternal_GetProvider(const WC
     return reinterpret_cast<intptr_t>(provider);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhEventPipeInternal_DeleteProvider(intptr_t provHandle)
+EXTERN_C void QCALLTYPE EventPipeInternal_DeleteProvider(intptr_t provHandle)
 {
     if (provHandle != 0)
     {
@@ -172,7 +172,7 @@ enum class ActivityControlCode
     EVENT_ACTIVITY_CONTROL_CREATE_SET_ID = 5
 };
 
-EXTERN_C NATIVEAOT_API int __cdecl RhEventPipeInternal_EventActivityIdControl(uint32_t controlCode, GUID *pActivityId)
+EXTERN_C int QCALLTYPE EventPipeInternal_EventActivityIdControl(uint32_t controlCode, GUID *pActivityId)
 {
     int retVal = 0;
     ep_rt_thread_activity_id_handle_t activityIdHandle = ep_thread_get_activity_id_handle ();
@@ -224,7 +224,7 @@ EXTERN_C NATIVEAOT_API int __cdecl RhEventPipeInternal_EventActivityIdControl(ui
     return retVal;
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhEventPipeInternal_WriteEventData(
+EXTERN_C void QCALLTYPE EventPipeInternal_WriteEventData(
     intptr_t eventHandle,
     EventData *pEventData,
     uint32_t eventDataCount,
@@ -236,7 +236,7 @@ EXTERN_C NATIVEAOT_API void __cdecl RhEventPipeInternal_WriteEventData(
     ep_write_event_2(pEvent, pEventData, eventDataCount, reinterpret_cast<const uint8_t*>(pActivityId), reinterpret_cast<const uint8_t*>(pRelatedActivityId));
 }
 
-EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhEventPipeInternal_GetSessionInfo(uint64_t sessionID, EventPipeSessionInfo *pSessionInfo)
+EXTERN_C UInt32_BOOL QCALLTYPE EventPipeInternal_GetSessionInfo(uint64_t sessionID, EventPipeSessionInfo *pSessionInfo)
 {
     bool retVal = false;
     if (pSessionInfo != NULL)
@@ -253,7 +253,7 @@ EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhEventPipeInternal_GetSessionInfo(ui
     return retVal;
 }
 
-EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhEventPipeInternal_GetNextEvent(uint64_t sessionID, EventPipeEventInstanceData *pInstance)
+EXTERN_C UInt32_BOOL QCALLTYPE EventPipeInternal_GetNextEvent(uint64_t sessionID, EventPipeEventInstanceData *pInstance)
 {
     EventPipeEventInstance *pNextInstance = NULL;
     _ASSERTE(pInstance != NULL);
@@ -274,7 +274,7 @@ EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhEventPipeInternal_GetNextEvent(uint
     return pNextInstance != NULL;
 }
 
-EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhEventPipeInternal_SignalSession(uint64_t sessionID)
+EXTERN_C UInt32_BOOL QCALLTYPE EventPipeInternal_SignalSession(uint64_t sessionID)
 {
     EventPipeSession *const session = ep_get_session (sessionID);
     if (!session)
@@ -283,7 +283,7 @@ EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhEventPipeInternal_SignalSession(uin
     return ep_rt_wait_event_set (ep_session_get_wait_event (session));
 }
 
-EXTERN_C NATIVEAOT_API UInt32_BOOL __cdecl RhEventPipeInternal_WaitForSessionSignal(uint64_t sessionID, int32_t timeoutMs)
+EXTERN_C UInt32_BOOL QCALLTYPE EventPipeInternal_WaitForSessionSignal(uint64_t sessionID, int32_t timeoutMs)
 {
     EventPipeSession *const session = ep_get_session (sessionID);
     if (!session)
diff --git a/src/coreclr/nativeaot/Runtime/gcheaputilities.cpp b/src/coreclr/nativeaot/Runtime/gcheaputilities.cpp
index 42f7928cd9e2..2678b12c1aea 100644
--- a/src/coreclr/nativeaot/Runtime/gcheaputilities.cpp
+++ b/src/coreclr/nativeaot/Runtime/gcheaputilities.cpp
@@ -38,7 +38,7 @@ GPTR_IMPL(GcDacVars, g_gcDacGlobals);
 
 // GC entrypoints for the linked-in GC. These symbols are invoked
 // directly if we are not using a standalone GC.
-extern "C" HRESULT GC_Initialize(
+extern "C" HRESULT LOCALGC_CALLCONV GC_Initialize(
     /* In  */ IGCToCLR* clrToGC,
     /* Out */ IGCHeap** gcHeap,
     /* Out */ IGCHandleManager** gcHandleManager,
diff --git a/src/coreclr/nativeaot/Runtime/gctoclreventsink.cpp b/src/coreclr/nativeaot/Runtime/gctoclreventsink.cpp
index 8ece828ba53d..16ef56605304 100644
--- a/src/coreclr/nativeaot/Runtime/gctoclreventsink.cpp
+++ b/src/coreclr/nativeaot/Runtime/gctoclreventsink.cpp
@@ -11,17 +11,27 @@ void GCToCLREventSink::FireDynamicEvent(const char* eventName, void* payload, ui
 {
     LIMITED_METHOD_CONTRACT;
 
-#ifndef FEATURE_NATIVEAOT
     const size_t EventNameMaxSize = 255;
 
     WCHAR wideEventName[EventNameMaxSize];
-    if (MultiByteToWideChar(CP_ACP, 0, eventName, -1, wideEventName, EventNameMaxSize) == 0)
+    int i = 0;
+    while (true)
     {
-        return;
+        if (i == (EventNameMaxSize - 1))
+        {
+            wideEventName[i] = L'\0';
+            assert(false);
+            break;
+        }
+        wideEventName[i] = (WCHAR)eventName[i];
+        if (eventName[i] == '\0')
+        {
+            break;
+        }
+        i++;
     }
 
     FireEtwGCDynamicEvent(wideEventName, payloadSize, (const BYTE*)payload, GetClrInstanceId());
-#endif // !FEATURE_NATIVEAOT
 }
 
 void GCToCLREventSink::FireGCStart_V2(uint32_t count, uint32_t depth, uint32_t reason, uint32_t type)
diff --git a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc
index b7f6554993cd..896bf8e67dab 100644
--- a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc
+++ b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc
@@ -10,6 +10,9 @@ include AsmOffsets.inc      ; generated by the build from AsmOffsets.cpp
 FASTCALL_FUNC macro FuncName,cbArgs
     FuncNameReal EQU @&FuncName&@&cbArgs
     FuncNameReal proc public
+    FuncName label proc
+    PUBLIC FuncName
+
 endm
 
 FASTCALL_ENDFUNC macro
@@ -18,18 +21,8 @@ endm
 
 ALTERNATE_ENTRY macro Name
 
-decoratedName TEXTEQU @CatStr( _, Name ) )
-
-decoratedName label proc
-PUBLIC decoratedName
-        endm
-
-LABELED_RETURN_ADDRESS macro Name
-
-decoratedName TEXTEQU @CatStr( _, Name ) )
-
-decoratedName label proc
-PUBLIC decoratedName
+Name label proc
+PUBLIC Name
         endm
 
 __tls_array     equ 2Ch     ;; offsetof(TEB, ThreadLocalStoragePointer)
@@ -134,7 +127,7 @@ PTFF_SAVE_RAX           equ 00000100h   ;; RAX is saved if it contains a GC ref
 PTFF_SAVE_ALL_SCRATCH   equ 00000700h
 PTFF_RAX_IS_GCREF       equ 00010000h   ;; iff PTFF_SAVE_RAX: set -> eax is Object, clear -> eax is scalar
 PTFF_RAX_IS_BYREF       equ 00020000h   ;; iff PTFF_SAVE_RAX: set -> eax is ByRef, clear -> eax is Object or scalar
-PTFF_THREAD_ABORT       equ 00040000h   ;; indicates that ThreadAbortException should be thrown when returning from the transition
+PTFF_THREAD_ABORT       equ 00100000h   ;; indicates that ThreadAbortException should be thrown when returning from the transition
 
 ;; These must match the TrapThreadsFlags enum
 TrapThreadsFlags_None            equ 0
@@ -163,6 +156,11 @@ G_EPHEMERAL_HIGH                            equ _g_ephemeral_high
 G_CARD_TABLE                                equ _g_card_table
 RhpWaitForGC2                               equ @RhpWaitForGC2@4
 RhpTrapThreads                              equ _RhpTrapThreads
+RhpStressGc                                 equ @RhpStressGc@0
+RhpGcPoll2                                  equ @RhpGcPoll2@4
+RhHandleGet                                 equ @RhHandleGet@4
+RhpGcSafeZeroMemory                         equ @RhpGcSafeZeroMemory@8
+RhpGetNumThunkBlocksPerMapping              equ @RhpGetNumThunkBlocksPerMapping@0
 
 ifdef FEATURE_GC_STRESS
 THREAD__HIJACKFORGCSTRESS                   equ ?HijackForGcStress@Thread@@SGXPAUPAL_LIMITED_CONTEXT@@@Z
@@ -178,6 +176,17 @@ EXTERN RhExceptionHandling_FailedAllocation     : PROC
 EXTERN RhThrowHwEx                              : PROC
 EXTERN RhThrowEx                                : PROC
 EXTERN RhRethrow                                : PROC
+EXTERN RhpGcPoll2                               : PROC
+
+;; The following imports are not used in the assembly helpers. Due to the
+;; way the FCall mangling is handled in the C sources through linker directives
+;; (see FCIMPL macro definitions in CommonMacros.h), we need to add dummy
+;; references to some methods in few object files (HandleTableHelpers and
+;; GCMemoryHelpers, ThunkMapping) to get the linker to see the #pragma
+;; comment(linker, ...) directives embedded in those files.
+EXTERN RhHandleGet                              : PROC
+EXTERN RhpGcSafeZeroMemory                      : PROC
+EXTERN RhpGetNumThunkBlocksPerMapping           : PROC
 
 ifdef FEATURE_GC_STRESS
 EXTERN THREAD__HIJACKFORGCSTRESS                : PROC
diff --git a/src/coreclr/nativeaot/Runtime/i386/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/i386/AsmOffsetsCpu.h
index 7e19e77b7e88..326a8aa6ddf3 100644
--- a/src/coreclr/nativeaot/Runtime/i386/AsmOffsetsCpu.h
+++ b/src/coreclr/nativeaot/Runtime/i386/AsmOffsetsCpu.h
@@ -7,7 +7,7 @@
 //
 // NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix
 
-PLAT_ASM_SIZEOF(bc, ExInfo)
+PLAT_ASM_SIZEOF(c4, ExInfo)
 PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo)
 PLAT_ASM_OFFSET(4, ExInfo, m_pExContext)
 PLAT_ASM_OFFSET(8, ExInfo, m_exception)
@@ -15,7 +15,7 @@ PLAT_ASM_OFFSET(0c, ExInfo, m_kind)
 PLAT_ASM_OFFSET(0d, ExInfo, m_passNumber)
 PLAT_ASM_OFFSET(10, ExInfo, m_idxCurClause)
 PLAT_ASM_OFFSET(14, ExInfo, m_frameIter)
-PLAT_ASM_OFFSET(b8, ExInfo, m_notifyDebuggerSP)
+PLAT_ASM_OFFSET(c0, ExInfo, m_notifyDebuggerSP)
 
 PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP)
 PLAT_ASM_OFFSET(4, PInvokeTransitionFrame, m_FramePointer)
@@ -23,16 +23,15 @@ PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_pThread)
 PLAT_ASM_OFFSET(0c, PInvokeTransitionFrame, m_Flags)
 PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_PreservedRegs)
 
-PLAT_ASM_SIZEOF(a4, StackFrameIterator)
+PLAT_ASM_SIZEOF(ac, StackFrameIterator)
 PLAT_ASM_OFFSET(08, StackFrameIterator, m_FramePointer)
 PLAT_ASM_OFFSET(0c, StackFrameIterator, m_ControlPC)
 PLAT_ASM_OFFSET(10, StackFrameIterator, m_RegDisplay)
-PLAT_ASM_OFFSET(9c, StackFrameIterator, m_OriginalControlPC)
-PLAT_ASM_OFFSET(a0, StackFrameIterator, m_pPreviousTransitionFrame)
+PLAT_ASM_OFFSET(a4, StackFrameIterator, m_OriginalControlPC)
+PLAT_ASM_OFFSET(a8, StackFrameIterator, m_pPreviousTransitionFrame)
 
 PLAT_ASM_SIZEOF(1c, PAL_LIMITED_CONTEXT)
 PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP)
-
 PLAT_ASM_OFFSET(4, PAL_LIMITED_CONTEXT, Rsp)
 PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, Rbp)
 PLAT_ASM_OFFSET(0c, PAL_LIMITED_CONTEXT, Rdi)
@@ -40,10 +39,10 @@ PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, Rsi)
 PLAT_ASM_OFFSET(14, PAL_LIMITED_CONTEXT, Rax)
 PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, Rbx)
 
-PLAT_ASM_SIZEOF(24, REGDISPLAY)
-PLAT_ASM_OFFSET(1c, REGDISPLAY, SP)
-
+PLAT_ASM_SIZEOF(2c, REGDISPLAY)
 PLAT_ASM_OFFSET(0c, REGDISPLAY, pRbx)
 PLAT_ASM_OFFSET(10, REGDISPLAY, pRbp)
 PLAT_ASM_OFFSET(14, REGDISPLAY, pRsi)
 PLAT_ASM_OFFSET(18, REGDISPLAY, pRdi)
+PLAT_ASM_OFFSET(1c, REGDISPLAY, SP)
+PLAT_ASM_OFFSET(28, REGDISPLAY, ResumeSP)
diff --git a/src/coreclr/nativeaot/Runtime/i386/CallDescrWorker.S b/src/coreclr/nativeaot/Runtime/i386/CallDescrWorker.S
deleted file mode 100644
index 876f2dfbcb80..000000000000
--- a/src/coreclr/nativeaot/Runtime/i386/CallDescrWorker.S
+++ /dev/null
@@ -1,4 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-// TODO: Implement
diff --git a/src/coreclr/nativeaot/Runtime/i386/ExceptionHandling.asm b/src/coreclr/nativeaot/Runtime/i386/ExceptionHandling.asm
index 127c1b617b8f..4e823bbbd6ad 100644
--- a/src/coreclr/nativeaot/Runtime/i386/ExceptionHandling.asm
+++ b/src/coreclr/nativeaot/Runtime/i386/ExceptionHandling.asm
@@ -10,10 +10,18 @@
 
 include AsmMacros.inc
 
-RhpCallFunclet equ @RhpCallFunclet@0
-RhpThrowHwEx equ @RhpThrowHwEx@0
+;; input:   ECX: possible exception object
+;;          EDX: funclet IP
+;;          EAX: funclet EBP
+CALL_FUNCLET  macro  SUFFIX
+        push    ebp
+        mov     ebp, eax
+        mov     eax, ecx
+        call    edx
+ALTERNATE_ENTRY _RhpCall&SUFFIX&Funclet2
+        pop     ebp
+endm
 
-extern RhpCallFunclet : proc
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
@@ -25,7 +33,7 @@ extern RhpCallFunclet : proc
 ;; OUTPUT:
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-FASTCALL_FUNC  RhpThrowHwEx, 0
+FASTCALL_FUNC  RhpThrowHwEx, 8
 
         esp_offsetof_ExInfo     textequ %0
         esp_offsetof_Context    textequ %SIZEOF__ExInfo
@@ -74,7 +82,7 @@ FASTCALL_FUNC  RhpThrowHwEx, 0
         ;; edx contains the address of the ExInfo
         call    RhThrowHwEx
 
-ALTERNATE_ENTRY RhpThrowHwEx2
+ALTERNATE_ENTRY _RhpThrowHwEx2
 
         ;; no return
         int 3
@@ -90,7 +98,7 @@ FASTCALL_ENDFUNC
 ;; OUTPUT:
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-FASTCALL_FUNC  RhpThrowEx, 0
+FASTCALL_FUNC  RhpThrowEx, 4
 
         esp_offsetof_ExInfo     textequ %0
         esp_offsetof_Context    textequ %SIZEOF__ExInfo
@@ -151,7 +159,7 @@ FASTCALL_FUNC  RhpThrowEx, 0
         ;; edx contains the address of the ExInfo
         call    RhThrowEx
 
-ALTERNATE_ENTRY RhpThrowEx2
+ALTERNATE_ENTRY _RhpThrowEx2
 
         ;; no return
         int 3
@@ -171,7 +179,6 @@ FASTCALL_ENDFUNC
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 FASTCALL_FUNC  RhpRethrow, 0
 
-
         esp_offsetof_ExInfo     textequ %0
         esp_offsetof_Context    textequ %SIZEOF__ExInfo
 
@@ -266,13 +273,14 @@ endm
 ;;
 ;; INPUT:  ECX:         exception object
 ;;         EDX:         handler funclet address
-;;         [ESP + 4]:   REGDISPLAY*
-;;         [ESP + 8]:   ExInfo*
+;;         [ESP + 4]:   ExInfo*
+;;         [ESP + 8]:   REGDISPLAY*
+;;         (CLR calling convention switches the last two parameters!)
 ;;
 ;; OUTPUT:
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-FASTCALL_FUNC  RhpCallCatchFunclet, 0
+FASTCALL_FUNC  RhpCallCatchFunclet, 16
 
         FUNCLET_CALL_PROLOGUE 2
 
@@ -283,8 +291,8 @@ FASTCALL_FUNC  RhpCallCatchFunclet, 0
                                                                     ;; [esp + 10h]: ebx save
         esp_offsetof_PrevEBP                    textequ %14h        ;; [esp + 14h]: prev ebp
         esp_offsetof_RetAddr                    textequ %18h        ;; [esp + 18h]: return address
-        esp_offsetof_RegDisplay                 textequ %1ch        ;; [esp + 1Ch]: REGDISPLAY*
-        esp_offsetof_ExInfo                     textequ %20h        ;; [esp + 20h]: ExInfo*
+        esp_offsetof_RegDisplay                 textequ %20h        ;; [esp + 20h]: REGDISPLAY*
+        esp_offsetof_ExInfo                     textequ %1ch        ;; [esp + 1ch]: ExInfo*
 
         ;; Clear the DoNotTriggerGc state before calling out to our managed catch funclet.
         INLINE_GETTHREAD    eax, ebx        ;; eax <- Thread*, ebx is trashed
@@ -313,9 +321,7 @@ FASTCALL_FUNC  RhpCallCatchFunclet, 0
         ;; ECX still contains the exception object
         ;; EDX: funclet IP
         ;; EAX: funclet EBP
-        call        RhpCallFunclet
-
-ALTERNATE_ENTRY RhpCallCatchFunclet2
+        CALL_FUNCLET Catch
 
         ;; eax: resume IP
         mov         [esp + esp_offsetof_ResumeIP], eax              ;; save for later
@@ -328,7 +334,7 @@ ALTERNATE_ENTRY RhpCallCatchFunclet2
 
         mov         ecx, [esp + esp_offsetof_ExInfo]                ;; ecx <- current ExInfo *
         mov         eax, [esp + esp_offsetof_RegDisplay]            ;; eax <- REGDISPLAY*
-        mov         eax, [eax + OFFSETOF__REGDISPLAY__SP]           ;; eax <- resume SP value
+        mov         eax, [eax + OFFSETOF__REGDISPLAY__ResumeSP]     ;; eax <- resume SP value
 
     @@: mov         ecx, [ecx + OFFSETOF__ExInfo__m_pPrevExInfo]    ;; ecx <- next ExInfo
         cmp         ecx, 0
@@ -379,7 +385,7 @@ FASTCALL_ENDFUNC
 ;; OUTPUT:
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-FASTCALL_FUNC  RhpCallFinallyFunclet, 0
+FASTCALL_FUNC  RhpCallFinallyFunclet, 8
 
         FUNCLET_CALL_PROLOGUE 0
 
@@ -409,9 +415,7 @@ FASTCALL_FUNC  RhpCallFinallyFunclet, 0
         ;; ECX: not used
         ;; EDX: funclet IP
         ;; EAX: funclet EBP
-        call        RhpCallFunclet
-
-ALTERNATE_ENTRY RhpCallFinallyFunclet2
+        CALL_FUNCLET Finally
 
         pop         edx     ;; restore REGDISPLAY*
 
@@ -446,7 +450,7 @@ FASTCALL_ENDFUNC
 ;; OUTPUT:
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-FASTCALL_FUNC  RhpCallFilterFunclet, 0
+FASTCALL_FUNC  RhpCallFilterFunclet, 12
 
         FUNCLET_CALL_PROLOGUE 0
 
@@ -463,9 +467,7 @@ FASTCALL_FUNC  RhpCallFilterFunclet, 0
         ;; EAX contains the funclet EBP value
         mov         edx, [esp + 0]                  ;; reload filter funclet address
 
-        call        RhpCallFunclet
-
-ALTERNATE_ENTRY RhpCallFilterFunclet2
+        CALL_FUNCLET Filter
 
         ;; EAX contains the result of the filter execution
         mov         edx, [ebp + 8]
diff --git a/src/coreclr/nativeaot/Runtime/i386/GC.asm b/src/coreclr/nativeaot/Runtime/i386/GC.asm
deleted file mode 100644
index bf79142e9286..000000000000
--- a/src/coreclr/nativeaot/Runtime/i386/GC.asm
+++ /dev/null
@@ -1,15 +0,0 @@
-;; Licensed to the .NET Foundation under one or more agreements.
-;; The .NET Foundation licenses this file to you under the MIT license.
-
-;;
-;; Unmanaged helpers used by the managed System.GC class.
-;;
-
-    .586
-    .model  flat
-    option  casemap:none
-    .code
-
-include AsmMacros.inc
-
-        end
diff --git a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm
index b5876f059f6a..7e2715d3dd76 100644
--- a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm
+++ b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm
@@ -11,8 +11,6 @@
 include AsmMacros.inc
 
 DEFAULT_PROBE_SAVE_FLAGS        equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP
-PROBE_SAVE_FLAGS_EVERYTHING     equ DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH
-PROBE_SAVE_FLAGS_RAX_IS_GCREF   equ DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF
 
 ;;
 ;; The prolog for all GC suspension hijackes (normal and stress). Sets up an EBP frame,
@@ -25,7 +23,7 @@ PROBE_SAVE_FLAGS_RAX_IS_GCREF   equ DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + P
 ;;  EAX: not trashed or saved
 ;;  EBP: new EBP frame with correct return address
 ;;  ESP: points to saved scratch registers (ECX & EDX)
-;;  ECX: trashed
+;;  ECX: return value flags
 ;;  EDX: thread pointer
 ;;
 HijackFixupProlog macro
@@ -44,11 +42,15 @@ HijackFixupProlog macro
         mov         ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress]
         mov         [ebp + 4], ecx
 
+        ;; Fetch the return address flags
+        mov         ecx, [edx + OFFSETOF__Thread__m_uHijackedReturnValueFlags]
+
         ;;
         ;; Clear hijack state
         ;;
         mov         dword ptr [edx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0
         mov         dword ptr [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0
+        mov         dword ptr [edx + OFFSETOF__Thread__m_uHijackedReturnValueFlags], 0
 
 endm
 
@@ -136,7 +138,7 @@ PopProbeFrame macro
         pop         eax
 endm
 
-RhpThrowHwEx equ @RhpThrowHwEx@0
+RhpThrowHwEx equ @RhpThrowHwEx@8
 extern RhpThrowHwEx : proc
 
 ;;
@@ -179,6 +181,25 @@ Abort:
 
 RhpWaitForGC  endp
 
+RhpGcPoll  proc
+        cmp         [RhpTrapThreads], TrapThreadsFlags_None
+        jne         @F                  ; forward branch - predicted not taken
+        ret
+@@:
+        jmp         RhpGcPollRare
+
+RhpGcPoll  endp
+
+RhpGcPollRare  proc
+        push        ebp
+        mov         ebp, esp
+        PUSH_COOP_PINVOKE_FRAME ecx
+        call        RhpGcPoll2
+        POP_COOP_PINVOKE_FRAME
+        pop         ebp
+        ret
+RhpGcPollRare  endp
+
 ifdef FEATURE_GC_STRESS
 ;;
 ;; Set the Thread state and invoke RhpStressGC().
@@ -237,7 +258,7 @@ FASTCALL_FUNC RhpGcProbeHijack, 0
         HijackFixupEpilog
 
 WaitForGC:
-        mov         ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX
+        or          ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX
         jmp         RhpWaitForGC
 
 FASTCALL_ENDFUNC
@@ -246,7 +267,7 @@ ifdef FEATURE_GC_STRESS
 FASTCALL_FUNC RhpGcStressHijack, 0
 
         HijackFixupProlog
-        mov         ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX
+        or          ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX
         jmp         RhpGcStressProbe
 
 FASTCALL_ENDFUNC
diff --git a/src/coreclr/nativeaot/Runtime/i386/Interlocked.S b/src/coreclr/nativeaot/Runtime/i386/Interlocked.S
deleted file mode 100644
index 876f2dfbcb80..000000000000
--- a/src/coreclr/nativeaot/Runtime/i386/Interlocked.S
+++ /dev/null
@@ -1,4 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-// TODO: Implement
diff --git a/src/coreclr/nativeaot/Runtime/i386/Interlocked.asm b/src/coreclr/nativeaot/Runtime/i386/Interlocked.asm
deleted file mode 100644
index f9599b1b8666..000000000000
--- a/src/coreclr/nativeaot/Runtime/i386/Interlocked.asm
+++ /dev/null
@@ -1,3 +0,0 @@
-;; TODO: Implement
-
-end
diff --git a/src/coreclr/nativeaot/Runtime/i386/InteropThunksHelpers.asm b/src/coreclr/nativeaot/Runtime/i386/InteropThunksHelpers.asm
index f47ff5eb3c16..f786fa592987 100644
--- a/src/coreclr/nativeaot/Runtime/i386/InteropThunksHelpers.asm
+++ b/src/coreclr/nativeaot/Runtime/i386/InteropThunksHelpers.asm
@@ -7,6 +7,8 @@
 option  casemap:none
 .code
 
+include AsmMacros.inc
+
 ;; -----------------------------------------------------------------------------------------------------------
 ;; standard macros
 ;; -----------------------------------------------------------------------------------------------------------
@@ -66,7 +68,8 @@ LEAF_ENTRY RhCommonStub, _TEXT
         ;; store thunk address in thread static
         mov     edx, [eax]
         mov     eax, [eax + POINTER_SIZE]                          ;;   eax <- target slot data
-        mov     [ecx + OFFSET ThunkParamSlot], edx                 ;;   ThunkParamSlot <- context slot data
+        add     ecx, SECTIONREL ThunkParamSlot
+        mov     [ecx], edx                 ;;   ThunkParamSlot <- context slot data
 
         ;; restore the regs we used
         pop     edx
@@ -80,22 +83,23 @@ LEAF_END RhCommonStub, _TEXT
 ;;
 ;; IntPtr RhGetCommonStubAddress()
 ;;
-LEAF_ENTRY RhGetCommonStubAddress, _TEXT
+FASTCALL_FUNC RhGetCommonStubAddress, 0
         lea     eax, [RhCommonStub]
         ret
-LEAF_END RhGetCommonStubAddress, _TEXT
+FASTCALL_ENDFUNC
 
 
 ;;
 ;; IntPtr RhGetCurrentThunkContext()
 ;;
-LEAF_ENTRY RhGetCurrentThunkContext, _TEXT
+FASTCALL_FUNC RhGetCurrentThunkContext, 0
         mov     ecx, [__tls_index]
         mov     edx, fs:[__tls_array]
         mov     ecx, [edx + ecx * POINTER_SIZE]
-        mov     eax, [ecx + OFFSET ThunkParamSlot]                 ;;   eax <- ThunkParamSlot
+        add     ecx, SECTIONREL ThunkParamSlot
+        mov     eax, [ecx]                 ;;   eax <- ThunkParamSlot
         ret
-LEAF_END RhGetCurrentThunkContext, _TEXT
+FASTCALL_ENDFUNC
 
 
 end
diff --git a/src/coreclr/nativeaot/Runtime/i386/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/i386/MiscStubs.asm
index 7c1329d6f66b..ac4b9c511cef 100644
--- a/src/coreclr/nativeaot/Runtime/i386/MiscStubs.asm
+++ b/src/coreclr/nativeaot/Runtime/i386/MiscStubs.asm
@@ -17,7 +17,7 @@ include AsmMacros.inc
 ; NOTE: this helper will modify a value of esp and must establish the frame pointer.
 PROBE_STEP equ 1000h
 
-_RhpStackProbe PROC public
+RhpStackProbe PROC public
     ; On entry:
     ;   eax - the lowest address of the stack frame being allocated (i.e. [InitialSp - FrameSize])
     ;
@@ -37,6 +37,158 @@ ProbeLoop:
     pop     ebp
     ret
 
-_RhpStackProbe ENDP
+RhpStackProbe ENDP
+
+;; *********************************************************************/
+;; LLsh - long shift left
+;;
+;; Purpose:
+;;    Does a Long Shift Left (signed and unsigned are identical)
+;;    Shifts a long left any number of bits.
+;;
+;; Entry:
+;;    EDX:EAX - long value to be shifted
+;;        ECX - number of bits to shift by
+;;
+;; Exit:
+;;    EDX:EAX - shifted value
+;;
+;; NOTE: Adapted from JIT_LLsh in CoreCLR
+;;
+RhpLLsh PROC public
+    ;; Reduce shift amount mod 64
+    and     ecx, 63
+
+    cmp     ecx, 32
+    jae     LLshMORE32
+
+    ;; Handle shifts of between bits 0 and 31
+    shld    edx, eax, cl
+    shl     eax, cl
+    ret
+
+LLshMORE32:
+    ;; Handle shifts of between bits 32 and 63
+    ;; The x86 shift instructions only use the lower 5 bits.
+    mov     edx, eax
+    xor     eax, eax
+    shl     edx, cl
+    ret
+RhpLLsh ENDP
+
+;; *********************************************************************/
+;; LRsh - long shift right
+;;
+;; Purpose:
+;;    Does a signed Long Shift Right
+;;    Shifts a long right any number of bits.
+;;
+;; Entry:
+;;    EDX:EAX - long value to be shifted
+;;        ECX - number of bits to shift by
+;;
+;; Exit:
+;;    EDX:EAX - shifted value
+;;
+;; NOTE: Adapted from JIT_LRsh in CoreCLR
+;;
+RhpLRsh PROC public
+    ;; Reduce shift amount mod 64
+    and     ecx, 63
+
+    cmp     ecx, 32
+    jae     LRshMORE32
+
+    ;; Handle shifts of between bits 0 and 31
+    shrd    eax, edx, cl
+    sar     edx, cl
+    ret
+
+LRshMORE32:
+    ;; Handle shifts of between bits 32 and 63
+    ;; The x86 shift instructions only use the lower 5 bits.
+    mov     eax, edx
+    sar     edx, 31
+    sar     eax, cl
+    ret
+RhpLRsh ENDP
+
+;; *********************************************************************/
+;; LRsz
+;;
+;; Purpose:
+;;    Does a unsigned Long Shift Right
+;;    Shifts a long right any number of bits.
+;;
+;; Entry:
+;;    EDX:EAX - long value to be shifted
+;;        ECX - number of bits to shift by
+;;
+;; Exit:
+;;    EDX:EAX - shifted value
+;;
+;; NOTE: Adapted from JIT_LRsz in CoreCLR
+;;
+RhpLRsz PROC public
+    ;; Reduce shift amount mod 64
+    and     ecx, 63
+
+    cmp     ecx, 32
+    jae     LRszMORE32
+
+    ;; Handle shifts of between bits 0 and 31
+    shrd    eax, edx, cl
+    shr     edx, cl
+    ret
+
+LRszMORE32:
+    ;; Handle shifts of between bits 32 and 63
+    ;; The x86 shift instructions only use the lower 5 bits.
+    mov     eax, edx
+    xor     edx, edx
+    shr     eax, cl
+    ret
+RhpLRsz ENDP
+
+;; *********************************************************************/
+;; LMul
+;;
+;; Purpose:
+;;    Does a long multiply (same for signed/unsigned)
+;;
+;; Entry:
+;;    Parameters are passed on the stack:
+;;    1st pushed: multiplier (QWORD)
+;;    2nd pushed: multiplicand (QWORD)
+;;
+;; Exit:
+;;    EDX:EAX - product of multiplier and multiplicand
+;;
+;; NOTE: Adapted from JIT_LMul in CoreCLR
+;;
+RhpLMul PROC public
+    mov     eax, dword ptr [esp + 8]   ; AHI
+    mov     ecx, dword ptr [esp + 16]  ; BHI
+    or      ecx, eax                   ; test for both hiwords zero.
+    mov     ecx, dword ptr [esp + 12]  ; BLO
+    jnz     LMul_hard                  ; both are zero, just mult ALO and BLO
+
+    mov     eax, dword ptr [esp + 4]
+    mul     ecx
+    ret     16
+
+LMul_hard:
+    push    ebx
+    mul     ecx                        ; eax has AHI, ecx has BLO, so AHI * BLO
+    mov     ebx, eax                   ; save result
+    mov     eax, dword ptr [esp + 8]   ; ALO
+    mul     dword ptr [esp + 20]       ; ALO * BHI
+    add     ebx, eax                   ; ebx = ((ALO * BHI) + (AHI * BLO))
+    mov     eax, dword ptr [esp + 8]   ; ALO   ;ecx = BLO
+    mul     ecx                        ; so edx:eax = ALO*BLO
+    add     edx, ebx                   ; now edx has all the LO*HI stuff
+    pop     ebx
+    ret     16
+RhpLMul ENDP
 
 end
diff --git a/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm b/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm
index 90f0d083a842..7e03d12c5808 100644
--- a/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm
+++ b/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm
@@ -6,7 +6,34 @@
         option  casemap:none
         .code
 
-
 include AsmMacros.inc
 
+FASTCALL_FUNC RhpPInvoke, 4
+        INLINE_GETTHREAD eax, edx
+
+        mov         edx, [esp]                  ; edx <- return address
+        mov         dword ptr [ecx + OFFSETOF__PInvokeTransitionFrame__m_pThread], eax
+        mov         dword ptr [ecx + OFFSETOF__PInvokeTransitionFrame__m_FramePointer], ebp
+        mov         dword ptr [ecx + OFFSETOF__PInvokeTransitionFrame__m_RIP], edx
+
+        lea         edx, [esp + 4]              ; edx <- caller SP
+        mov         dword ptr [ecx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_SAVE_RSP
+        mov         dword ptr [ecx + OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs], edx
+
+        mov         dword ptr [eax + OFFSETOF__Thread__m_pTransitionFrame], ecx
+
+        ret
+FASTCALL_ENDFUNC
+
+FASTCALL_FUNC RhpPInvokeReturn, 4
+        mov         edx, [ecx + OFFSETOF__PInvokeTransitionFrame__m_pThread]
+        mov         dword ptr [edx + OFFSETOF__Thread__m_pTransitionFrame], 0
+        cmp         [RhpTrapThreads], TrapThreadsFlags_None
+        jne         @F                  ; forward branch - predicted not taken
+        ret
+@@:
+        ; passing transition frame pointer in rcx
+        jmp         RhpWaitForGC2
+FASTCALL_ENDFUNC
+
         end
diff --git a/src/coreclr/nativeaot/Runtime/i386/StubDispatch.asm b/src/coreclr/nativeaot/Runtime/i386/StubDispatch.asm
index b1101fac5377..86dd2807fbc8 100644
--- a/src/coreclr/nativeaot/Runtime/i386/StubDispatch.asm
+++ b/src/coreclr/nativeaot/Runtime/i386/StubDispatch.asm
@@ -30,17 +30,18 @@ endm
 ;; Macro that generates a stub consuming a cache with the given number of entries.
 DEFINE_INTERFACE_DISPATCH_STUB macro entries
 
-StubName textequ @CatStr( _RhpInterfaceDispatch, entries )
+StubName textequ @CatStr( _RhpInterfaceDispatch, entries, <@0> )
+StubAVLocation textequ @CatStr( _RhpInterfaceDispatchAVLocation, entries )
 
     StubName proc public
 
         ;; Check the instance here to catch null references. We're going to touch it again below (to cache
         ;; the MethodTable pointer), but that's after we've pushed ebx below, and taking an A/V there will
-        ;; mess up the stack trace for debugging. We also don't have a spare scratch register (eax holds
-        ;; the cache pointer and the push of ebx below is precisely so we can access a second register
-        ;; to hold the MethodTable pointer).
-        test    ecx, ecx
-        je      RhpInterfaceDispatchNullReference
+        ;; mess up the stack trace. We also don't have a spare scratch register (eax holds the cache pointer
+        ;; and the push of ebx below is precisely so we can access a second register to hold the MethodTable
+        ;; pointer).
+    ALTERNATE_ENTRY StubAVLocation
+        cmp     dword ptr [ecx], ecx
 
         ;; eax currently contains the indirection cell address. We need to update it to point to the cache
         ;; block instead.
@@ -94,17 +95,8 @@ RhpInterfaceDispatchSlow proc
         jmp         _RhpUniversalTransition_DebugStepTailCall@0
 RhpInterfaceDispatchSlow endp
 
-;; Out of line helper used when we try to interface dispatch on a null pointer. Sets up the stack so the
-;; debugger gives a reasonable stack trace.
-RhpInterfaceDispatchNullReference proc public
-        push    ebp
-        mov     ebp, esp
-        mov     ebx, [ecx]  ;; This should A/V
-        int     3
-RhpInterfaceDispatchNullReference endp
-
 ;; Stub dispatch routine for dispatch to a vtable slot
-_RhpVTableOffsetDispatch proc public
+_RhpVTableOffsetDispatch@0 proc public
         ;; eax currently contains the indirection cell address. We need to update it to point to the vtable offset (which is in the m_pCache field)
         mov     eax, [eax + OFFSETOF__InterfaceDispatchCell__m_pCache]
 
@@ -116,17 +108,20 @@ _RhpVTableOffsetDispatch proc public
 
         ;; tail-jump to the target
         jmp     eax
-_RhpVTableOffsetDispatch endp
+_RhpVTableOffsetDispatch@0 endp
 
 
 ;; Initial dispatch on an interface when we don't have a cache yet.
-_RhpInitialInterfaceDispatch proc public
-    ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
+FASTCALL_FUNC RhpInitialDynamicInterfaceDispatch, 0
+ALTERNATE_ENTRY _RhpInitialInterfaceDispatch
+        ;; Trigger an AV if we're dispatching on a null this.
+        ;; The exception handling infrastructure is aware of the fact that this is the first
+        ;; instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here
+        ;; to a NullReferenceException at the callsite.
+        cmp     dword ptr [ecx], ecx
 
         jmp RhpInterfaceDispatchSlow
-
-_RhpInitialInterfaceDispatch endp
-
+FASTCALL_ENDFUNC
 
 endif ;; FEATURE_CACHED_INTERFACE_DISPATCH
 
diff --git a/src/coreclr/nativeaot/Runtime/i386/ThunkPoolThunks.asm b/src/coreclr/nativeaot/Runtime/i386/ThunkPoolThunks.asm
deleted file mode 100644
index 9db31a0136bc..000000000000
--- a/src/coreclr/nativeaot/Runtime/i386/ThunkPoolThunks.asm
+++ /dev/null
@@ -1,297 +0,0 @@
-;; Licensed to the .NET Foundation under one or more agreements.
-;; The .NET Foundation licenses this file to you under the MIT license.
-
-.586
-.model  flat
-option  casemap:none
-.code
-
-include AsmMacros.inc
-
-;; -----------------------------------------------------------------------------------------------------------
-;; standard macros
-;; -----------------------------------------------------------------------------------------------------------
-LEAF_ENTRY macro Name, Section
-    Section segment para 'CODE'
-    public  Name
-    Name    proc
-endm
-
-NAMED_LEAF_ENTRY macro Name, Section, SectionAlias
-    Section segment para alias(SectionAlias) 'CODE'
-    public  Name
-    Name    proc
-endm
-
-LEAF_END macro Name, Section
-    Name    endp
-    Section ends
-endm
-
-NAMED_READONLY_DATA_SECTION macro Section, SectionAlias
-    Section segment para alias(SectionAlias) read 'DATA'
-    DD 0
-    Section ends
-endm
-
-NAMED_READWRITE_DATA_SECTION macro Section, SectionAlias
-    Section segment para alias(SectionAlias) read write 'DATA'
-    DD 0
-    Section ends
-endm
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  STUBS & DATA SECTIONS  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-THUNK_CODESIZE                      equ 20h     ;; 5-byte call, 1 byte pop, 6-byte lea, 6-byte jmp, 14 bytes of padding
-THUNK_DATASIZE                      equ 08h     ;; 2 dwords
-
-THUNK_POOL_NUM_THUNKS_PER_PAGE      equ 078h    ;; 120 thunks per page
-
-PAGE_SIZE                           equ 01000h  ;; 4K
-POINTER_SIZE                        equ 04h
-
-
-GET_CURRENT_IP macro
-        ALIGN   10h                             ;; make sure we align to 16-byte boundary for CFG table
-        call    @F
-    @@: pop     eax
-endm
-
-LOAD_DATA_ADDRESS macro groupIndex, index
-        ;; start                            : eax points to current instruction of the current thunk
-        ;; set eax to beginning of data page : eax <- [eax - (size of the call instruction + (THUNK_CODESIZE * current thunk's index)) + PAGE_SIZE]
-        ;; fix offset of the data           : eax <- eax + (THUNK_DATASIZE * current thunk's index)
-        lea     eax,[eax - (5 + groupIndex * THUNK_CODESIZE * 10 + THUNK_CODESIZE * index) + PAGE_SIZE + (groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * index)]
-endm
-
-JUMP_TO_COMMON macro groupIndex, index
-        ;; start                                   : eax points to current thunk's data block
-        ;; re-point eax to beginning of data page   : eax <- [eax - (THUNK_DATASIZE * current thunk's index)]
-        ;; jump to the location pointed at by the last dword in the data page : jump [eax + PAGE_SIZE - POINTER_SIZE]
-        jmp     dword ptr[eax - (groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * index) + PAGE_SIZE - POINTER_SIZE]
-endm
-
-TenThunks macro groupIndex
-        ;; Each thunk will load the address of its corresponding data (from the page that immediately follows)
-        ;; and call a common stub. The address of the common stub is setup by the caller (last dword
-        ;; in the thunks data section) depending on the 'kind' of thunks needed (interop, fat function pointers, etc...)
-
-        ;; Each data block used by a thunk consists of two dword values:
-        ;;      - Context: some value given to the thunk as context (passed in eax). Example for fat-fptrs: context = generic dictionary
-        ;;      - Target : target code that the thunk eventually jumps to.
-
-        GET_CURRENT_IP
-        LOAD_DATA_ADDRESS groupIndex,0
-        JUMP_TO_COMMON    groupIndex,0
-
-        GET_CURRENT_IP
-        LOAD_DATA_ADDRESS groupIndex,1
-        JUMP_TO_COMMON    groupIndex,1
-
-        GET_CURRENT_IP
-        LOAD_DATA_ADDRESS groupIndex,2
-        JUMP_TO_COMMON    groupIndex,2
-
-        GET_CURRENT_IP
-        LOAD_DATA_ADDRESS groupIndex,3
-        JUMP_TO_COMMON    groupIndex,3
-
-        GET_CURRENT_IP
-        LOAD_DATA_ADDRESS groupIndex,4
-        JUMP_TO_COMMON    groupIndex,4
-
-        GET_CURRENT_IP
-        LOAD_DATA_ADDRESS groupIndex,5
-        JUMP_TO_COMMON    groupIndex,5
-
-        GET_CURRENT_IP
-        LOAD_DATA_ADDRESS groupIndex,6
-        JUMP_TO_COMMON    groupIndex,6
-
-        GET_CURRENT_IP
-        LOAD_DATA_ADDRESS groupIndex,7
-        JUMP_TO_COMMON    groupIndex,7
-
-        GET_CURRENT_IP
-        LOAD_DATA_ADDRESS groupIndex,8
-        JUMP_TO_COMMON    groupIndex,8
-
-        GET_CURRENT_IP
-        LOAD_DATA_ADDRESS groupIndex,9
-        JUMP_TO_COMMON    groupIndex,9
-endm
-
-THUNKS_PAGE_BLOCK macro
-        TenThunks 0
-        TenThunks 1
-        TenThunks 2
-        TenThunks 3
-        TenThunks 4
-        TenThunks 5
-        TenThunks 6
-        TenThunks 7
-        TenThunks 8
-        TenThunks 9
-        TenThunks 10
-        TenThunks 11
-endm
-
-;;
-;; The first thunks section should be 64K aligned because it can get
-;; mapped multiple  times in memory, and mapping works on allocation
-;; granularity boundaries (we don't want to map more than what we need)
-;;
-;; The easiest way to do so is by having the thunks section at the
-;; first 64K aligned virtual address in the binary. We provide a section
-;; layout file to the linker to tell it how to layout the thunks sections
-;; that we care about. (ndp\rh\src\runtime\DLLs\app\mrt100_app_sectionlayout.txt)
-;;
-;; The PE spec says images cannot have gaps between sections (other
-;; than what is required by the section alignment value in the header),
-;; therefore we need a couple of padding data sections (otherwise the
-;; OS will not load the image).
-;;
-
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment0, ".pad0"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment1, ".pad1"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment2, ".pad2"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment3, ".pad3"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment4, ".pad4"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment5, ".pad5"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment6, ".pad6"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment7, ".pad7"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment8, ".pad8"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment9, ".pad9"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment10, ".pad10"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment11, ".pad11"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment12, ".pad12"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment13, ".pad13"
-NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment14, ".pad14"
-
-;;
-;; Thunk Stubs
-;; NOTE: Keep number of blocks in sync with macro/constant named 'NUM_THUNK_BLOCKS' in:
-;;      - ndp\FxCore\src\System.Private.CoreLib\System\Runtime\InteropServices\ThunkPool.cs
-;;      - ndp\rh\src\tools\rhbind\zapimage.h
-;;
-NAMED_LEAF_ENTRY ThunkPool, TKS0, ".tks0"
-    THUNKS_PAGE_BLOCK
-LEAF_END ThunkPool, TKS0
-
-NAMED_READWRITE_DATA_SECTION ThunkData0, ".tkd0"
-
-NAMED_LEAF_ENTRY ThunkPool1, TKS1, ".tks1"
-    THUNKS_PAGE_BLOCK
-LEAF_END ThunkPool1, TKS1
-
-NAMED_READWRITE_DATA_SECTION ThunkData1, ".tkd1"
-
-NAMED_LEAF_ENTRY ThunkPool2, TKS2, ".tks2"
-    THUNKS_PAGE_BLOCK
-LEAF_END ThunkPool2, TKS2
-
-NAMED_READWRITE_DATA_SECTION ThunkData2, ".tkd2"
-
-NAMED_LEAF_ENTRY ThunkPool3, TKS3, ".tks3"
-    THUNKS_PAGE_BLOCK
-LEAF_END ThunkPool3, TKS3
-
-NAMED_READWRITE_DATA_SECTION ThunkData3, ".tkd3"
-
-NAMED_LEAF_ENTRY ThunkPool4, TKS4, ".tks4"
-    THUNKS_PAGE_BLOCK
-LEAF_END ThunkPool4, TKS4
-
-NAMED_READWRITE_DATA_SECTION ThunkData4, ".tkd4"
-
-NAMED_LEAF_ENTRY ThunkPool5, TKS5, ".tks5"
-    THUNKS_PAGE_BLOCK
-LEAF_END ThunkPool5, TKS5
-
-NAMED_READWRITE_DATA_SECTION ThunkData5, ".tkd5"
-
-NAMED_LEAF_ENTRY ThunkPool6, TKS6, ".tks6"
-    THUNKS_PAGE_BLOCK
-LEAF_END ThunkPool6, TKS6
-
-NAMED_READWRITE_DATA_SECTION ThunkData6, ".tkd6"
-
-NAMED_LEAF_ENTRY ThunkPool7, TKS7, ".tks7"
-    THUNKS_PAGE_BLOCK
-LEAF_END ThunkPool7, TKS7
-
-NAMED_READWRITE_DATA_SECTION ThunkData7, ".tkd7"
-
-
-;;
-;; IntPtr RhpGetThunksBase()
-;;
-FASTCALL_FUNC RhpGetThunksBase, 0
-        ;; Return the address of the first thunk pool to the caller (this is really the base address)
-        lea     eax, [ThunkPool]
-        ret
-FASTCALL_ENDFUNC
-
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; General Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;
-;; int RhpGetNumThunksPerBlock()
-;;
-FASTCALL_FUNC RhpGetNumThunksPerBlock, 0
-        mov     eax, THUNK_POOL_NUM_THUNKS_PER_PAGE
-        ret
-FASTCALL_ENDFUNC
-
-;;
-;; int RhpGetThunkSize()
-;;
-FASTCALL_FUNC RhpGetThunkSize, 0
-        mov     eax, THUNK_CODESIZE
-        ret
-FASTCALL_ENDFUNC
-
-;;
-;; int RhpGetNumThunkBlocksPerMapping()
-;;
-FASTCALL_FUNC RhpGetNumThunkBlocksPerMapping, 0
-        mov     eax, 8
-        ret
-FASTCALL_ENDFUNC
-
-;;
-;; int RhpGetThunkBlockSize
-;;
-FASTCALL_FUNC RhpGetThunkBlockSize, 0
-        mov     eax, PAGE_SIZE * 2
-        ret
-FASTCALL_ENDFUNC
-
-;;
-;; IntPtr RhpGetThunkDataBlockAddress(IntPtr thunkStubAddress)
-;;
-FASTCALL_FUNC RhpGetThunkDataBlockAddress, 4
-        mov     eax, ecx
-        mov     ecx, PAGE_SIZE - 1
-        not     ecx
-        and     eax, ecx
-        add     eax, PAGE_SIZE
-        ret
-FASTCALL_ENDFUNC
-
-;;
-;; IntPtr RhpGetThunkStubsBlockAddress(IntPtr thunkDataAddress)
-;;
-FASTCALL_FUNC RhpGetThunkStubsBlockAddress, 4
-        mov     eax, ecx
-        mov     ecx, PAGE_SIZE - 1
-        not     ecx
-        and     eax, ecx
-        sub     eax, PAGE_SIZE
-        ret
-FASTCALL_ENDFUNC
-
-
-end
diff --git a/src/coreclr/nativeaot/Runtime/i386/UniversalTransition.asm b/src/coreclr/nativeaot/Runtime/i386/UniversalTransition.asm
index d20406686318..f5abc17e985d 100644
--- a/src/coreclr/nativeaot/Runtime/i386/UniversalTransition.asm
+++ b/src/coreclr/nativeaot/Runtime/i386/UniversalTransition.asm
@@ -62,7 +62,7 @@ FASTCALL_FUNC Rhp&FunctionName&_FAKE_ENTRY, 0
         mov         ebp, esp
         push eax
         push eax
-ALTERNATE_ENTRY Rhp&FunctionName&@0
+ALTERNATE_ENTRY _Rhp&FunctionName&@0
         push ecx
         push edx
 
@@ -74,7 +74,7 @@ ALTERNATE_ENTRY Rhp&FunctionName&@0
         lea  ecx, [ebp-10h]  ; Get pointer to edx value pushed above
         call eax
 
-ALTERNATE_ENTRY ReturnFrom&FunctionName
+ALTERNATE_ENTRY _ReturnFrom&FunctionName
 
         ; We cannot make the label public as that tricks DIA stackwalker into thinking
         ; it's the beginning of a method. For this reason we export an auxiliary variable
diff --git a/src/coreclr/nativeaot/Runtime/i386/WriteBarriers.asm b/src/coreclr/nativeaot/Runtime/i386/WriteBarriers.asm
index 246f42979006..133081bee831 100644
--- a/src/coreclr/nativeaot/Runtime/i386/WriteBarriers.asm
+++ b/src/coreclr/nativeaot/Runtime/i386/WriteBarriers.asm
@@ -99,15 +99,16 @@ DEFINE_WRITE_BARRIER macro DESTREG, REFREG
 ;; Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard
 ;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that
 ;; location is in one of the other general registers determined by the value of REFREG.
-FASTCALL_FUNC RhpAssignRef&REFREG&, 0
+FASTCALL_FUNC RhpAssignRef&REFREG&, 8
 
     ;; Export the canonical write barrier under unqualified name as well
     ifidni <REFREG>, <EDX>
-    @RhpAssignRef@0 label proc
-    PUBLIC @RhpAssignRef@0
-    ALTERNATE_ENTRY RhpAssignRefAVLocation
+    ALTERNATE_ENTRY RhpAssignRef
+    ALTERNATE_ENTRY _RhpAssignRefAVLocation
     endif
 
+    ALTERNATE_ENTRY _RhpAssignRef&REFREG&AVLocation
+
     ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here
     ;; and the card table update we may perform below.
     mov     dword ptr [DESTREG], REFREG
@@ -196,15 +197,16 @@ DEFINE_CHECKED_WRITE_BARRIER macro DESTREG, REFREG
 ;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
 ;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
 ;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
-FASTCALL_FUNC RhpCheckedAssignRef&REFREG&, 0
+FASTCALL_FUNC RhpCheckedAssignRef&REFREG&, 8
 
     ;; Export the canonical write barrier under unqualified name as well
     ifidni <REFREG>, <EDX>
-    @RhpCheckedAssignRef@0 label proc
-    PUBLIC @RhpCheckedAssignRef@0
-    ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+    ALTERNATE_ENTRY RhpCheckedAssignRef
+    ALTERNATE_ENTRY _RhpCheckedAssignRefAVLocation
     endif
 
+    ALTERNATE_ENTRY _RhpCheckedAssignRef&REFREG&AVLocation
+
     ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here
     ;; and the card table update we may perform below.
     mov     dword ptr [DESTREG], REFREG
@@ -235,32 +237,71 @@ DEFINE_CHECKED_WRITE_BARRIER EDX, ESI
 DEFINE_CHECKED_WRITE_BARRIER EDX, EDI
 DEFINE_CHECKED_WRITE_BARRIER EDX, EBP
 
-;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
-;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at @RhpCheckedLockCmpXchgAVLocation@0
-;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
-;; pass third argument in EAX
-FASTCALL_FUNC RhpCheckedLockCmpXchg
-ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
+FASTCALL_FUNC RhpCheckedLockCmpXchg, 12
+    mov             eax, [esp+4]
     lock cmpxchg    [ecx], edx
-    jne              RhpCheckedLockCmpXchg_NoBarrierRequired_ECX_EDX
+    jne             RhpCheckedLockCmpXchg_NoBarrierRequired_ECX_EDX
 
-    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, ECX, EDX, ret
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, ECX, EDX, ret 4
 
 FASTCALL_ENDFUNC
 
-;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
-;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at @RhpCheckedXchgAVLocation@0
-;; - Function "UnwindSimpleHelperToCaller" assumes the stack contains just the pushed return address
-FASTCALL_FUNC RhpCheckedXchg, 0
+FASTCALL_FUNC RhpCheckedXchg, 8
 
     ;; Setup eax with the new object for the exchange, that way it will automatically hold the correct result
     ;; afterwards and we can leave edx unaltered ready for the GC write barrier below.
     mov             eax, edx
-ALTERNATE_ENTRY RhpCheckedXchgAVLocation
     xchg            [ecx], eax
 
     DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, ECX, EDX, ret
 
 FASTCALL_ENDFUNC
 
+;;
+;; RhpByRefAssignRef simulates movs instruction for object references.
+;;
+;; On entry:
+;;      edi: address of ref-field (assigned to)
+;;      esi: address of the data (source)
+;;
+;; On exit:
+;;      edi, esi are incremented by 4,
+;;      ecx: trashed
+;;
+FASTCALL_FUNC RhpByRefAssignRef, 8
+ALTERNATE_ENTRY _RhpByRefAssignRefAVLocation1
+    mov     ecx, [esi]
+ALTERNATE_ENTRY _RhpByRefAssignRefAVLocation2
+    mov     [edi], ecx
+
+    ;; Check whether the writes were even into the heap. If not there's no card update required.
+    cmp     edi, [G_LOWEST_ADDRESS]
+    jb      RhpByRefAssignRef_NoBarrierRequired
+    cmp     edi, [G_HIGHEST_ADDRESS]
+    jae     RhpByRefAssignRef_NoBarrierRequired
+
+    UPDATE_GC_SHADOW BASENAME, ecx, edi
+
+    ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    ;; (since the object won't be collected or moved by an ephemeral collection).
+    cmp     ecx, [G_EPHEMERAL_LOW]
+    jb      RhpByRefAssignRef_NoBarrierRequired
+    cmp     ecx, [G_EPHEMERAL_HIGH]
+    jae     RhpByRefAssignRef_NoBarrierRequired
+
+    mov     ecx, edi
+    shr     ecx, 10
+    add     ecx, [G_CARD_TABLE]
+    cmp     byte ptr [ecx], 0FFh
+    je      RhpByRefAssignRef_NoBarrierRequired
+
+    mov     byte ptr [ecx], 0FFh
+
+RhpByRefAssignRef_NoBarrierRequired:
+    ;; Increment the pointers before leaving
+    add     esi,4
+    add     edi,4
+    ret
+FASTCALL_ENDFUNC
+
     end
diff --git a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h
index 750faccc8283..6a3b24a39448 100644
--- a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h
+++ b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h
@@ -12,7 +12,7 @@ struct ReadyToRunHeaderConstants
     static const uint32_t Signature = 0x00525452; // 'RTR'
 
     static const uint32_t CurrentMajorVersion = 9;
-    static const uint32_t CurrentMinorVersion = 1;
+    static const uint32_t CurrentMinorVersion = 2;
 };
 
 struct ReadyToRunHeader
diff --git a/src/coreclr/nativeaot/Runtime/inc/daccess.h b/src/coreclr/nativeaot/Runtime/inc/daccess.h
index 7c237cc5ed30..2dd7c772f4e0 100644
--- a/src/coreclr/nativeaot/Runtime/inc/daccess.h
+++ b/src/coreclr/nativeaot/Runtime/inc/daccess.h
@@ -1,7 +1,6 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-#include "type_traits.hpp"
 #include "CommonTypes.h"
 
 #include "../../inc/daccess.h"
diff --git a/src/coreclr/nativeaot/Runtime/inc/type_traits.hpp b/src/coreclr/nativeaot/Runtime/inc/type_traits.hpp
deleted file mode 100644
index 0bd237aa1bc2..000000000000
--- a/src/coreclr/nativeaot/Runtime/inc/type_traits.hpp
+++ /dev/null
@@ -1,311 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-// type_traits.hpp
-//
-// Type trait metaprogramming utilities.
-//
-
-#ifndef __TYPE_TRAITS_HPP__
-#define __TYPE_TRAITS_HPP__
-
-#include "CommonTypes.h"
-
-namespace type_traits
-{
-
-namespace imp
-{
-
-struct true_type { static const bool value = true; };
-struct false_type { static const bool value = false; };
-
-////////////////////////////////////////////////////////////////////////////////
-// Helper types Small and Big - guarantee that sizeof(Small) < sizeof(Big)
-//
-
-template <class T, class U>
-struct conversion_helper
-{
-    typedef char Small;
-    struct Big { char dummy[2]; };
-    static Big   Test(...);
-    static Small Test(U);
-    static T MakeT();
-};
-
-////////////////////////////////////////////////////////////////////////////////
-// class template conversion
-// Figures out the conversion relationships between two types
-// Invocations (T and U are types):
-// a) conversion<T, U>::exists
-// returns (at compile time) true if there is an implicit conversion from T
-// to U (example: Derived to Base)
-// b) conversion<T, U>::exists2Way
-// returns (at compile time) true if there are both conversions from T
-// to U and from U to T (example: int to char and back)
-// c) conversion<T, U>::sameType
-// returns (at compile time) true if T and U represent the same type
-//
-// NOTE: might not work if T and U are in a private inheritance hierarchy.
-//
-
-template <class T, class U>
-struct conversion
-{
-    typedef imp::conversion_helper<T, U> H;
-    static const bool exists = sizeof(typename H::Small) == sizeof((H::Test(H::MakeT())));
-    static const bool exists2Way = exists && conversion<U, T>::exists;
-    static const bool sameType = false;
-};
-
-template <class T>
-struct conversion<T, T>
-{
-    static const bool exists = true;
-    static const bool exists2Way = true;
-    static const bool sameType = true;
-};
-
-template <class T>
-struct conversion<void, T>
-{
-    static const bool exists = false;
-    static const bool exists2Way = false;
-    static const bool sameType = false;
-};
-
-template <class T>
-struct conversion<T, void>
-{
-    static const bool exists = false;
-    static const bool exists2Way = false;
-    static const bool sameType = false;
-};
-
-template <>
-struct conversion<void, void>
-{
-    static const bool exists = true;
-    static const bool exists2Way = true;
-    static const bool sameType = true;
-};
-
-template <bool>
-struct is_base_of_helper;
-
-template <>
-struct is_base_of_helper<true> : public true_type {} ;
-
-template <>
-struct is_base_of_helper<false> : public false_type {} ;
-
-}// imp
-
-////////////////////////////////////////////////////////////////////////////////
-// is_base_of::value is typedefed to be true if TDerived derives from TBase
-// and false otherwise.
-//
-//
-// NOTE: use TR1 type_traits::is_base_of when available.
-//
-#ifdef _MSC_VER
-
-template <typename TBase, typename TDerived>
-struct is_base_of : public imp::is_base_of_helper<__is_base_of( TBase, TDerived)> {};
-
-#else
-
-// Note that we need to compare pointer types here, since conversion of types by-value
-// just tells us whether or not an implicit conversion constructor exists. We handle
-// type parameters that are already pointers specially; see below.
-template <typename TBase, typename TDerived>
-struct is_base_of : public imp::is_base_of_helper<imp::conversion<TDerived *, TBase *>::exists> {};
-
-// Specialization to handle type parameters that are already pointers.
-template <typename TBase, typename TDerived>
-struct is_base_of<TBase *, TDerived *> : public imp::is_base_of_helper<imp::conversion<TDerived *, TBase *>::exists> {};
-
-// Specialization to handle invalid mixing of pointer types.
-template <typename TBase, typename TDerived>
-struct is_base_of<TBase *, TDerived> : public imp::false_type {};
-
-// Specialization to handle invalid mixing of pointer types.
-template <typename TBase, typename TDerived>
-struct is_base_of<TBase, TDerived *> : public imp::false_type {};
-
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-// Remove const qualifications, if any. Access using remove_const::type
-//
-template <typename T> struct remove_const { typedef T type; };
-template <typename T> struct remove_const<T const> { typedef T type; };
-
-////////////////////////////////////////////////////////////////////////////////
-// is_signed::value is true if T is a signed integral type, false otherwise.
-//
-template <typename T>
-struct is_signed { static const bool value = (static_cast<T>(-1) < 0); };
-
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// These are related to type traits, but they are more like asserts of type
-// traits in that the result is that either the compiler does or does not
-// produce an error.
-//
-namespace type_constraints
-{
-
-////////////////////////////////////////////////////////////////////////////////
-// derived_from will produce a compiler error if TDerived does not
-// derive from TBase.
-//
-// NOTE: use TR1 type_traits::is_base_of when available.
-//
-
-template<class TBase, class TDerived> struct is_base_of
-{
-    is_base_of()
-    {
-        static_assert((type_traits::is_base_of<TBase, TDerived>::value),
-                      "is_base_of() constraint violation: TDerived does not derive from TBase");
-    }
-};
-
-}; // namespace type_constraints
-
-namespace rh { namespace std
-{
-    // Import some select components of the STL
-
-    // TEMPLATE FUNCTION for_each
-    template<class _InIt, class _Fn1>
-    inline
-    _Fn1 for_each(_InIt _First, _InIt _Last, _Fn1 _Func)
-    {   // perform function for each element
-        for (; _First != _Last; ++_First)
-            _Func(*_First);
-        return (_Func);
-    }
-
-    template<class _InIt, class _Ty>
-    inline
-    _InIt find(_InIt _First, _InIt _Last, const _Ty& _Val)
-    {   // find first matching _Val
-        for (; _First != _Last; ++_First)
-            if (*_First == _Val)
-                break;
-        return (_First);
-    }
-
-    template<class _InIt, class _Pr>
-    inline
-    _InIt find_if(_InIt _First, _InIt _Last, _Pr _Pred)
-    {   // find first satisfying _Pred
-        for (; _First != _Last; ++_First)
-            if (_Pred(*_First))
-                break;
-        return (_First);
-    }
-
-    template<class _InIt, class _Ty>
-    inline
-    bool exists(_InIt _First, _InIt _Last, const _Ty& _Val)
-    {
-        return find(_First, _Last, _Val) != _Last;
-    }
-
-    template<class _InIt, class _Pr>
-    inline
-    bool exists_if(_InIt _First, _InIt _Last, _Pr _Pred)
-    {
-        return find_if(_First, _Last, _Pred) != _Last;
-    }
-
-    template<class _InIt, class _Ty>
-    inline
-    uintptr_t count(_InIt _First, _InIt _Last, const _Ty& _Val)
-    {
-        uintptr_t _Ret = 0;
-        for (; _First != _Last; _First++)
-            if (*_First == _Val)
-                ++_Ret;
-        return _Ret;
-    }
-
-    template<class _InIt, class _Pr>
-    inline
-    uintptr_t count_if(_InIt _First, _InIt _Last, _Pr _Pred)
-    {
-        uintptr_t _Ret = 0;
-        for (; _First != _Last; _First++)
-            if (_Pred(*_First))
-                ++_Ret;
-        return _Ret;
-    }
-
-    // Forward declaration, each collection requires specialization
-    template<class _FwdIt, class _Ty>
-    inline
-    _FwdIt remove(_FwdIt _First, _FwdIt _Last, const _Ty& _Val);
-} // namespace std
-} // namespace rh
-
-#if 0
-
-// -----------------------------------------------------------------
-// Holding place for unused-but-possibly-useful-in-the-future code.
-
-// -------------------------------------------------
-// This belongs in type_traits.hpp
-
-//
-// is_pointer::value is true if the type is a pointer, false otherwise
-//
-template <typename T> struct is_pointer : public false_type {};
-template <typename T> struct is_pointer<T *> : public true_type {};
-
-//
-// Remove pointer from type, if it has one. Use remove_pointer::type
-// Further specialized in daccess.h
-//
-template <typename T> struct remove_pointer { typedef T type; };
-template <typename T> struct remove_pointer<T *> { typedef T type; };
-
-// -------------------------------------------------
-// This belongs in daccess.h
-
-namespace type_traits
-{
-
-//
-// is_pointer::value is true if the type is a pointer, false otherwise
-// specialized from type_traits.hpp
-//
-template <typename T> struct is_pointer<typename __DPtr<T> > : public type_traits::true_type {};
-
-//
-// remove_pointer::type is T with one less pointer qualification, if it had one.
-// specialized from type_traits.hpp
-//
-template <typename T> struct remove_pointer<typename __DPtr<T> > { typedef T type; };
-
-} // type_traits
-
-namespace dac
-{
-
-//
-// is_dptr::value is true if T is a __DPtr, false otherwise.
-// This is a partial specialization case for the positive case.
-//
-//template <typename T> struct is_dptr<typename __DPtr<T> > : public type_traits::true_type {};
-
-}
-
-#endif
-
-#endif
-
diff --git a/src/coreclr/nativeaot/Runtime/interoplibinterface.h b/src/coreclr/nativeaot/Runtime/interoplibinterface.h
index da57618b3f78..fce04b81f902 100644
--- a/src/coreclr/nativeaot/Runtime/interoplibinterface.h
+++ b/src/coreclr/nativeaot/Runtime/interoplibinterface.h
@@ -6,11 +6,11 @@
 class ObjCMarshalNative
 {
 public:
-    using TryGetCallback = void*(REDHAWK_CALLCONV *)(void);
-    using TryGetTaggedMemoryCallback = CLR_BOOL(REDHAWK_CALLCONV *)(_In_ Object *, _Out_ void **);
-    using BeginEndCallback = void(REDHAWK_CALLCONV *)(void);
-    using IsReferencedCallback = int(REDHAWK_CALLCONV *)(_In_ void*);
-    using EnteredFinalizationCallback = void(REDHAWK_CALLCONV *)(_In_ void*);
+    using TryGetCallback = void*(F_CALL_CONV *)(void);
+    using TryGetTaggedMemoryCallback = CLR_BOOL(F_CALL_CONV *)(_In_ Object *, _Out_ void **);
+    using BeginEndCallback = void(F_CALL_CONV *)(void);
+    using IsReferencedCallback = int(F_CALL_CONV *)(_In_ void*);
+    using EnteredFinalizationCallback = void(F_CALL_CONV *)(_In_ void*);
 
 public: // Instance inspection
     static bool IsTrackedReference(_In_ Object * pObject, _Out_ bool* isReferenced);
diff --git a/src/coreclr/nativeaot/Runtime/interoplibinterface_objc.cpp b/src/coreclr/nativeaot/Runtime/interoplibinterface_objc.cpp
index 3272721a5eb8..5a1dfe10f96b 100644
--- a/src/coreclr/nativeaot/Runtime/interoplibinterface_objc.cpp
+++ b/src/coreclr/nativeaot/Runtime/interoplibinterface_objc.cpp
@@ -24,6 +24,7 @@
 #include "thread.h"
 #include "threadstore.h"
 #include "threadstore.inl"
+#include "thread.inl"
 
 #include "interoplibinterface.h"
 
diff --git a/src/coreclr/nativeaot/Runtime/portable.cpp b/src/coreclr/nativeaot/Runtime/portable.cpp
index 90a010a68f18..a80de75942bd 100644
--- a/src/coreclr/nativeaot/Runtime/portable.cpp
+++ b/src/coreclr/nativeaot/Runtime/portable.cpp
@@ -33,7 +33,7 @@
 #include "GCMemoryHelpers.inl"
 
 #if defined(USE_PORTABLE_HELPERS) && !defined(HOST_WASM)
-EXTERN_C NATIVEAOT_API void* REDHAWK_CALLCONV RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame);
+EXTERN_C void* F_CALL_CONV RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame);
 
 static Object* AllocateObject(MethodTable* pEEType, uint32_t uFlags, uintptr_t numElements)
 {
@@ -55,7 +55,7 @@ struct gc_alloc_context
 //
 // Allocations
 //
-COOP_PINVOKE_HELPER(Object *, RhpNewFast, (MethodTable* pEEType))
+FCIMPL1(Object *, RhpNewFast, MethodTable* pEEType)
 {
     ASSERT(!pEEType->HasFinalizer());
 
@@ -75,18 +75,20 @@ COOP_PINVOKE_HELPER(Object *, RhpNewFast, (MethodTable* pEEType))
 
     return AllocateObject(pEEType, 0, 0);
 }
+FCIMPLEND
 
 #define GC_ALLOC_FINALIZE    0x1 // TODO: Defined in gc.h
 #define GC_ALLOC_ALIGN8_BIAS 0x4 // TODO: Defined in gc.h
 #define GC_ALLOC_ALIGN8      0x8 // TODO: Defined in gc.h
 
-COOP_PINVOKE_HELPER(Object *, RhpNewFinalizable, (MethodTable* pEEType))
+FCIMPL1(Object *, RhpNewFinalizable, MethodTable* pEEType)
 {
     ASSERT(pEEType->HasFinalizer());
     return AllocateObject(pEEType, GC_ALLOC_FINALIZE, 0);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(Array *, RhpNewArray, (MethodTable * pArrayEEType, int numElements))
+FCIMPL2(Array *, RhpNewArray, MethodTable * pArrayEEType, int numElements)
 {
     Thread * pCurThread = ThreadStore::GetCurrentThread();
     gc_alloc_context * acontext = pCurThread->GetAllocContext();
@@ -122,24 +124,27 @@ COOP_PINVOKE_HELPER(Array *, RhpNewArray, (MethodTable * pArrayEEType, int numEl
 
     return (Array*)AllocateObject(pArrayEEType, 0, numElements);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(String *, RhNewString, (MethodTable * pArrayEEType, int numElements))
+FCIMPL2(String *, RhNewString, MethodTable * pArrayEEType, int numElements)
 {
     // TODO: Implement. We tail call to RhpNewArray for now since there's a bunch of TODOs in the places
     // that matter anyway.
     return (String*)RhpNewArray(pArrayEEType, numElements);
 }
+FCIMPLEND
 
 #if defined(FEATURE_64BIT_ALIGNMENT)
 GPTR_DECL(MethodTable, g_pFreeObjectEEType);
 
-COOP_PINVOKE_HELPER(Object *, RhpNewFinalizableAlign8, (MethodTable* pEEType))
+FCIMPL1(Object *, RhpNewFinalizableAlign8, MethodTable* pEEType)
 {
     return AllocateObject(pEEType, GC_ALLOC_FINALIZE | GC_ALLOC_ALIGN8, 0);
 }
+FCIMPLEND
 
 #ifndef HOST_64BIT
-COOP_PINVOKE_HELPER(Object*, RhpNewFastAlign8, (MethodTable* pEEType))
+FCIMPL1(Object*, RhpNewFastAlign8, MethodTable* pEEType)
 {
     ASSERT(!pEEType->HasFinalizer());
 
@@ -174,8 +179,9 @@ COOP_PINVOKE_HELPER(Object*, RhpNewFastAlign8, (MethodTable* pEEType))
 
     return AllocateObject(pEEType, GC_ALLOC_ALIGN8, 0);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(Object*, RhpNewFastMisalign, (MethodTable* pEEType))
+FCIMPL1(Object*, RhpNewFastMisalign, MethodTable* pEEType)
 {
     Thread* pCurThread = ThreadStore::GetCurrentThread();
     gc_alloc_context* acontext = pCurThread->GetAllocContext();
@@ -207,8 +213,9 @@ COOP_PINVOKE_HELPER(Object*, RhpNewFastMisalign, (MethodTable* pEEType))
 
     return AllocateObject(pEEType, GC_ALLOC_ALIGN8 | GC_ALLOC_ALIGN8_BIAS, 0);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(Array*, RhpNewArrayAlign8, (MethodTable* pArrayEEType, int numElements))
+FCIMPL2(Array*, RhpNewArrayAlign8, MethodTable* pArrayEEType, int numElements)
 {
     Thread* pCurThread = ThreadStore::GetCurrentThread();
     gc_alloc_context* acontext = pCurThread->GetAllocContext();
@@ -256,53 +263,63 @@ COOP_PINVOKE_HELPER(Array*, RhpNewArrayAlign8, (MethodTable* pArrayEEType, int n
 
     return (Array*)AllocateObject(pArrayEEType, GC_ALLOC_ALIGN8, numElements);
 }
+FCIMPLEND
 #endif // !HOST_64BIT
 #endif // FEATURE_64BIT_ALIGNMENT
 
-COOP_PINVOKE_HELPER(void, RhpInitialDynamicInterfaceDispatch, ())
+FCIMPL0(void, RhpInitialDynamicInterfaceDispatch)
 {
     ASSERT_UNCONDITIONALLY("NYI");
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch1, ())
+FCIMPL0(void, RhpInterfaceDispatch1)
 {
     ASSERT_UNCONDITIONALLY("NYI");
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch2, ())
+FCIMPL0(void, RhpInterfaceDispatch2)
 {
     ASSERT_UNCONDITIONALLY("NYI");
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch4, ())
+FCIMPL0(void, RhpInterfaceDispatch4)
 {
     ASSERT_UNCONDITIONALLY("NYI");
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch8, ())
+FCIMPL0(void, RhpInterfaceDispatch8)
 {
     ASSERT_UNCONDITIONALLY("NYI");
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch16, ())
+FCIMPL0(void, RhpInterfaceDispatch16)
 {
     ASSERT_UNCONDITIONALLY("NYI");
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch32, ())
+FCIMPL0(void, RhpInterfaceDispatch32)
 {
     ASSERT_UNCONDITIONALLY("NYI");
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch64, ())
+FCIMPL0(void, RhpInterfaceDispatch64)
 {
     ASSERT_UNCONDITIONALLY("NYI");
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpVTableOffsetDispatch, ())
+FCIMPL0(void, RhpVTableOffsetDispatch)
 {
     ASSERT_UNCONDITIONALLY("NYI");
 }
+FCIMPLEND
 
 // @TODO Implement UniversalTransition
 EXTERN_C void * ReturnFromUniversalTransition;
@@ -318,137 +335,122 @@ void * ReturnFromUniversalTransition_DebugStepTailCall;
 //
 // Return address hijacking
 //
-COOP_PINVOKE_HELPER(void, RhpGcStressHijack, ())
+FCIMPL0(void, RhpGcStressHijack)
 {
     ASSERT_UNCONDITIONALLY("NYI");
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpGcProbeHijack, ())
+FCIMPL0(void, RhpGcProbeHijack)
 {
     ASSERT_UNCONDITIONALLY("NYI");
 }
+FCIMPLEND
 
 #endif // defined(USE_PORTABLE_HELPERS) || defined(TARGET_UNIX)
 
 #if defined(USE_PORTABLE_HELPERS)
 
 #if !defined (HOST_ARM64)
-COOP_PINVOKE_HELPER(void, RhpAssignRef, (Object ** dst, Object * ref))
+FCIMPL2(void, RhpAssignRef, Object ** dst, Object * ref)
 {
     // @TODO: USE_PORTABLE_HELPERS - Null check
     *dst = ref;
     InlineWriteBarrier(dst, ref);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpCheckedAssignRef, (Object ** dst, Object * ref))
+FCIMPL2(void, RhpCheckedAssignRef, Object ** dst, Object * ref)
 {
     // @TODO: USE_PORTABLE_HELPERS - Null check
     *dst = ref;
     InlineCheckedWriteBarrier(dst, ref);
 }
+FCIMPLEND
 #endif
 
-COOP_PINVOKE_HELPER(Object *, RhpCheckedLockCmpXchg, (Object ** location, Object * value, Object * comparand))
+FCIMPL3(Object *, RhpCheckedLockCmpXchg, Object ** location, Object * value, Object * comparand)
 {
-    // @TODO: USE_PORTABLE_HELPERS - Null check
     Object * ret = (Object *)PalInterlockedCompareExchangePointer((void * volatile *)location, value, comparand);
     InlineCheckedWriteBarrier(location, value);
     return ret;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(Object *, RhpCheckedXchg, (Object ** location, Object * value))
+FCIMPL2(Object *, RhpCheckedXchg, Object ** location, Object * value)
 {
     // @TODO: USE_PORTABLE_HELPERS - Null check
     Object * ret = (Object *)PalInterlockedExchangePointer((void * volatile *)location, value);
     InlineCheckedWriteBarrier(location, value);
     return ret;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(uint8_t, RhpLockCmpXchg8, (uint8_t * location, uint8_t value, uint8_t comparand))
-{
-    ASSERT_UNCONDITIONALLY("NYI");
-    return 0;
-}
-
-COOP_PINVOKE_HELPER(int16_t, RhpLockCmpXchg16, (int16_t * location, int16_t value, int16_t comparand))
-{
-    ASSERT_UNCONDITIONALLY("NYI");
-    return 0;
-}
-
-COOP_PINVOKE_HELPER(int32_t, RhpLockCmpXchg32, (int32_t * location, int32_t value, int32_t comparand))
-{
-    // @TODO: USE_PORTABLE_HELPERS - Null check
-    return PalInterlockedCompareExchange(location, value, comparand);
-}
-
-COOP_PINVOKE_HELPER(int64_t, RhpLockCmpXchg64, (int64_t * location, int64_t value, int64_t comparand))
-{
-    // @TODO: USE_PORTABLE_HELPERS - Null check
-    return PalInterlockedCompareExchange64(location, value, comparand);
-}
-
-EXTERN_C NATIVEAOT_API void* __cdecl RhAllocateThunksMapping()
+FCIMPL0(void*, RhAllocateThunksMapping)
 {
     return NULL;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void *, RhpGetThunksBase, ())
+FCIMPL0(void *, RhpGetThunksBase)
 {
     return NULL;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int, RhpGetNumThunkBlocksPerMapping, ())
+FCIMPL0(int, RhpGetNumThunkBlocksPerMapping)
 {
     ASSERT_UNCONDITIONALLY("NYI");
     return 0;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int, RhpGetNumThunksPerBlock, ())
+FCIMPL0(int, RhpGetNumThunksPerBlock)
 {
     ASSERT_UNCONDITIONALLY("NYI");
     return 0;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int, RhpGetThunkSize, ())
+FCIMPL0(int, RhpGetThunkSize)
 {
     ASSERT_UNCONDITIONALLY("NYI");
     return 0;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void*, RhpGetThunkDataBlockAddress, (void* pThunkStubAddress))
+FCIMPL1(void*, RhpGetThunkDataBlockAddress, void* pThunkStubAddress)
 {
     ASSERT_UNCONDITIONALLY("NYI");
     return NULL;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void*, RhpGetThunkStubsBlockAddress, (void* pThunkDataAddress))
+FCIMPL1(void*, RhpGetThunkStubsBlockAddress, void* pThunkDataAddress)
 {
     ASSERT_UNCONDITIONALLY("NYI");
     return NULL;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(int, RhpGetThunkBlockSize, ())
+FCIMPL0(int, RhpGetThunkBlockSize)
 {
     ASSERT_UNCONDITIONALLY("NYI");
     return 0;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void *, RhGetCommonStubAddress, ())
-{
-    ASSERT_UNCONDITIONALLY("NYI");
-    return NULL;
-}
-
-COOP_PINVOKE_HELPER(void *, RhGetCurrentThunkContext, ())
+FCIMPL0(void *, RhGetCommonStubAddress)
 {
     ASSERT_UNCONDITIONALLY("NYI");
     return NULL;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpGcPoll, ())
+FCIMPL0(void, RhpGcPoll)
 {
     // TODO: implement
 }
+FCIMPLEND
 
 #endif
diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h
index b9c017521057..739a4eec2309 100644
--- a/src/coreclr/nativeaot/Runtime/regdisplay.h
+++ b/src/coreclr/nativeaot/Runtime/regdisplay.h
@@ -46,8 +46,43 @@ struct REGDISPLAY
 
     inline void SetIP(PCODE IP) { this->IP = IP; }
     inline void SetSP(uintptr_t SP) { this->SP = SP; }
+
+#ifdef TARGET_X86
+    TADDR PCTAddr;
+    // SP for use by catch funclet when resuming execution
+    uintptr_t ResumeSP;
+
+    inline unsigned long *GetEaxLocation() { return (unsigned long *)pRax; }
+    inline unsigned long *GetEcxLocation() { return (unsigned long *)pRcx; }
+    inline unsigned long *GetEdxLocation() { return (unsigned long *)pRdx; }
+    inline unsigned long *GetEbpLocation() { return (unsigned long *)pRbp; }
+    inline unsigned long *GetEbxLocation() { return (unsigned long *)pRbx; }
+    inline unsigned long *GetEsiLocation() { return (unsigned long *)pRsi; }
+    inline unsigned long *GetEdiLocation() { return (unsigned long *)pRdi; }
+
+    inline void SetEaxLocation(unsigned long *loc) { pRax = (PTR_uintptr_t)loc; }
+    inline void SetEcxLocation(unsigned long *loc) { pRcx = (PTR_uintptr_t)loc; }
+    inline void SetEdxLocation(unsigned long *loc) { pRdx = (PTR_uintptr_t)loc; }
+    inline void SetEbxLocation(unsigned long *loc) { pRbx = (PTR_uintptr_t)loc; }
+    inline void SetEsiLocation(unsigned long *loc) { pRsi = (PTR_uintptr_t)loc; }
+    inline void SetEdiLocation(unsigned long *loc) { pRdi = (PTR_uintptr_t)loc; }
+    inline void SetEbpLocation(unsigned long *loc) { pRbp = (PTR_uintptr_t)loc; }
+#endif
 };
 
+#ifdef TARGET_X86
+inline TADDR GetRegdisplayFP(REGDISPLAY *display)
+{
+    return (TADDR)*display->GetEbpLocation();
+}
+
+inline void SetRegdisplayPCTAddr(REGDISPLAY *display, TADDR addr)
+{
+    display->PCTAddr = addr;
+    display->SetIP(*PTR_PCODE(addr));
+}
+#endif
+
 #elif defined(TARGET_ARM)
 
 struct REGDISPLAY
diff --git a/src/coreclr/nativeaot/Runtime/rhassert.h b/src/coreclr/nativeaot/Runtime/rhassert.h
index ecf0297980ae..34403e216f5b 100644
--- a/src/coreclr/nativeaot/Runtime/rhassert.h
+++ b/src/coreclr/nativeaot/Runtime/rhassert.h
@@ -44,6 +44,10 @@ void Assert(const char * expr, const char * file, unsigned int line_num, const c
 #define _ASSERTE(_expr) ASSERT(_expr)
 #endif
 
+#ifndef _ASSERTE_ALL_BUILDS
+#define _ASSERTE_ALL_BUILDS(_expr) ASSERT(_expr)
+#endif
+
 #define PORTABILITY_ASSERT(message) \
     ASSERT_UNCONDITIONALLY(message); \
     ASSUME(0); \
diff --git a/src/coreclr/nativeaot/Runtime/runtimeeventinternal.cpp b/src/coreclr/nativeaot/Runtime/runtimeeventinternal.cpp
index 7b4afafd8671..5de8fa373c78 100644
--- a/src/coreclr/nativeaot/Runtime/runtimeeventinternal.cpp
+++ b/src/coreclr/nativeaot/Runtime/runtimeeventinternal.cpp
@@ -5,52 +5,52 @@
 
 #ifdef FEATURE_PERFTRACING
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogContentionLockCreated(intptr_t LockID, intptr_t AssociatedObjectID, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogContentionLockCreated(intptr_t LockID, intptr_t AssociatedObjectID, uint16_t ClrInstanceID)
 {
     FireEtwContentionLockCreated(reinterpret_cast<const void*>(LockID), reinterpret_cast<const void*>(AssociatedObjectID), ClrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogContentionStart(uint8_t ContentionFlags, uint16_t ClrInstanceID, intptr_t LockID, intptr_t AssociatedObjectID, uint64_t LockOwnerThreadID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogContentionStart(uint8_t ContentionFlags, uint16_t ClrInstanceID, intptr_t LockID, intptr_t AssociatedObjectID, uint64_t LockOwnerThreadID)
 {
     FireEtwContentionStart_V2((const unsigned char)(ContentionFlags), ClrInstanceID, reinterpret_cast<const void*>(LockID), reinterpret_cast<const void*>(AssociatedObjectID), LockOwnerThreadID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogContentionStop(uint8_t ContentionFlags, uint16_t ClrInstanceID, double DurationNs)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogContentionStop(uint8_t ContentionFlags, uint16_t ClrInstanceID, double DurationNs)
 {
     FireEtwContentionStop_V1((const unsigned char)(ContentionFlags), ClrInstanceID, DurationNs);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkerThreadStart(uint32_t activeWorkerThreadCount, uint32_t retiredWorkerThreadCount, uint16_t clrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadStart(uint32_t activeWorkerThreadCount, uint32_t retiredWorkerThreadCount, uint16_t clrInstanceID)
 {
     FireEtwThreadPoolWorkerThreadStart(activeWorkerThreadCount, retiredWorkerThreadCount, clrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkerThreadStop(uint32_t ActiveWorkerThreadCount, uint32_t RetiredWorkerThreadCount, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadStop(uint32_t ActiveWorkerThreadCount, uint32_t RetiredWorkerThreadCount, uint16_t ClrInstanceID)
 {
     FireEtwThreadPoolWorkerThreadStop(ActiveWorkerThreadCount, RetiredWorkerThreadCount, ClrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkerThreadWait(uint32_t ActiveWorkerThreadCount, uint32_t RetiredWorkerThreadCount, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadWait(uint32_t ActiveWorkerThreadCount, uint32_t RetiredWorkerThreadCount, uint16_t ClrInstanceID)
 {
     FireEtwThreadPoolWorkerThreadWait(ActiveWorkerThreadCount, RetiredWorkerThreadCount, ClrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolMinMaxThreads(uint16_t MinWorkerThreads, uint16_t MaxWorkerThreads, uint16_t MinIOCompletionThreads, uint16_t MaxIOCompletionThreads, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolMinMaxThreads(uint16_t MinWorkerThreads, uint16_t MaxWorkerThreads, uint16_t MinIOCompletionThreads, uint16_t MaxIOCompletionThreads, uint16_t ClrInstanceID)
 {
     FireEtwThreadPoolMinMaxThreads(MinWorkerThreads, MaxWorkerThreads, MinIOCompletionThreads, MaxIOCompletionThreads, ClrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentSample(double Throughput, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentSample(double Throughput, uint16_t ClrInstanceID)
 {
     FireEtwThreadPoolWorkerThreadAdjustmentSample(Throughput, ClrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentAdjustment(double AverageThroughput, uint32_t NewWorkerThreadCount, uint32_t Reason, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentAdjustment(double AverageThroughput, uint32_t NewWorkerThreadCount, uint32_t Reason, uint16_t ClrInstanceID)
 {
     FireEtwThreadPoolWorkerThreadAdjustmentAdjustment(AverageThroughput, NewWorkerThreadCount, Reason, ClrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentStats(
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentStats(
     double Duration,
     double Throughput,
     double ThreadPoolWorkerThreadWait,
@@ -66,7 +66,7 @@ EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorker
     FireEtwThreadPoolWorkerThreadAdjustmentStats(Duration, Throughput, ThreadPoolWorkerThreadWait, ThroughputWave, ThroughputErrorEstimate, AverageThroughputErrorEstimate, ThroughputRatio, Confidence, NewControlSetting, NewThreadWaveMagnitude, ClrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolIOEnqueue(
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolIOEnqueue(
     void * NativeOverlapped,
     void * Overlapped,
     BOOL MultiDequeues,
@@ -75,22 +75,22 @@ EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolIOEnqu
     FireEtwThreadPoolIOEnqueue(NativeOverlapped, Overlapped, MultiDequeues, ClrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolIODequeue(void * NativeOverlapped, void * Overlapped, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolIODequeue(void * NativeOverlapped, void * Overlapped, uint16_t ClrInstanceID)
 {
     FireEtwThreadPoolIODequeue(NativeOverlapped, Overlapped, ClrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolWorkingThreadCount(uint32_t Count, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkingThreadCount(uint32_t Count, uint16_t ClrInstanceID)
 {
     FireEtwThreadPoolWorkingThreadCount(Count, ClrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogThreadPoolIOPack(void * NativeOverlapped, void * Overlapped, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolIOPack(void * NativeOverlapped, void * Overlapped, uint16_t ClrInstanceID)
 {
     FireEtwThreadPoolIOPack(NativeOverlapped, Overlapped, ClrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogExceptionThrown(const WCHAR* exceptionTypeName, const WCHAR* exceptionMessage, void* faultingIP, HRESULT hresult)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogExceptionThrown(const WCHAR* exceptionTypeName, const WCHAR* exceptionMessage, void* faultingIP, HRESULT hresult)
 {
     FireEtwExceptionThrown_V1(exceptionTypeName,
         exceptionMessage,
@@ -100,12 +100,12 @@ EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogExceptionThrown(
         GetClrInstanceId());
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogWaitHandleWaitStart(uint8_t WaitSource, intptr_t AssociatedObjectID, uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogWaitHandleWaitStart(uint8_t WaitSource, intptr_t AssociatedObjectID, uint16_t ClrInstanceID)
 {
     FireEtwWaitHandleWaitStart(WaitSource, reinterpret_cast<const void*>(AssociatedObjectID), ClrInstanceID);
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl NativeRuntimeEventSource_LogWaitHandleWaitStop(uint16_t ClrInstanceID)
+EXTERN_C void QCALLTYPE NativeRuntimeEventSource_LogWaitHandleWaitStop(uint16_t ClrInstanceID)
 {
     FireEtwWaitHandleWaitStop(ClrInstanceID);
 }
diff --git a/src/coreclr/nativeaot/Runtime/slist.inl b/src/coreclr/nativeaot/Runtime/slist.inl
index bdfbb131f27b..a9fb59898754 100644
--- a/src/coreclr/nativeaot/Runtime/slist.inl
+++ b/src/coreclr/nativeaot/Runtime/slist.inl
@@ -8,6 +8,27 @@ MSVC_DISABLE_WARNING(4127)  // conditional expression is constant --
 //-------------------------------------------------------------------------------------------------
 namespace rh { namespace std
 {
+    template<class _InIt, class _Ty>
+    inline
+    uintptr_t count(_InIt _First, _InIt _Last, const _Ty& _Val)
+    {
+        uintptr_t _Ret = 0;
+        for (; _First != _Last; _First++)
+            if (*_First == _Val)
+                ++_Ret;
+        return _Ret;
+    }
+
+    template<class _InIt, class _Ty>
+    inline
+    _InIt find(_InIt _First, _InIt _Last, const _Ty& _Val)
+    {   // find first matching _Val
+        for (; _First != _Last; ++_First)
+            if (*_First == _Val)
+                break;
+        return (_First);
+    }
+
     // Specialize rh::std::find for SList iterators so that it will use _Traits::Equals.
     template<class _Tx, class _Traits, class _Ty>
     inline
diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp
index f1da550fc01c..4096c61f36ed 100644
--- a/src/coreclr/nativeaot/Runtime/startup.cpp
+++ b/src/coreclr/nativeaot/Runtime/startup.cpp
@@ -67,12 +67,7 @@ typedef size_t GSCookie;
 
 #ifdef FEATURE_READONLY_GS_COOKIE
 
-#ifdef __APPLE__
-#define READONLY_ATTR_ARGS section("__DATA,__const")
-#else
-#define READONLY_ATTR_ARGS section(".rodata")
-#endif
-#define READONLY_ATTR __attribute__((READONLY_ATTR_ARGS))
+#define READONLY_ATTR __attribute__((section(".rodata")))
 
 // const is so that it gets placed in the .text section (which is read-only)
 // volatile is so that accesses to it do not get optimized away because of the const
diff --git a/src/coreclr/nativeaot/Runtime/stressLog.cpp b/src/coreclr/nativeaot/Runtime/stressLog.cpp
index b99f48bed801..a04f2b8169fb 100644
--- a/src/coreclr/nativeaot/Runtime/stressLog.cpp
+++ b/src/coreclr/nativeaot/Runtime/stressLog.cpp
@@ -29,9 +29,7 @@
 #include "threadstore.h"
 #include "threadstore.inl"
 #include "thread.inl"
-
-template<typename T> inline T VolatileLoad(T const * pt) { return *(T volatile const *)pt; }
-template<typename T> inline void VolatileStore(T* pt, T val) { *(T volatile *)pt = val; }
+#include "volatile.h"
 
 #ifdef STRESS_LOG
 
diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp
index d3372cb4779e..6a17a48892f8 100644
--- a/src/coreclr/nativeaot/Runtime/thread.cpp
+++ b/src/coreclr/nativeaot/Runtime/thread.cpp
@@ -30,10 +30,6 @@
 
 #ifndef DACCESS_COMPILE
 
-EXTERN_C NATIVEAOT_API void* REDHAWK_CALLCONV RhpHandleAlloc(void* pObject, int type);
-EXTERN_C NATIVEAOT_API void REDHAWK_CALLCONV RhHandleSet(void* handle, void* pObject);
-EXTERN_C NATIVEAOT_API void REDHAWK_CALLCONV RhHandleFree(void* handle);
-
 extern int (*g_RuntimeInitializationCallback)();
 static Thread* g_RuntimeInitializingThread;
 
@@ -1084,10 +1080,11 @@ void Thread::ValidateExInfoPop(ExInfo * pExInfo, void * limitSP)
 #endif // _DEBUG
 }
 
-COOP_PINVOKE_HELPER(void, RhpValidateExInfoPop, (Thread * pThread, ExInfo * pExInfo, void * limitSP))
+FCIMPL3(void, RhpValidateExInfoPop, Thread * pThread, ExInfo * pExInfo, void * limitSP)
 {
     pThread->ValidateExInfoPop(pExInfo, limitSP);
 }
+FCIMPLEND
 
 void Thread::SetDoNotTriggerGc()
 {
@@ -1129,6 +1126,44 @@ void Thread::SetActivationPending(bool isPending)
     }
 }
 
+#ifdef TARGET_X86
+
+void Thread::SetPendingRedirect(PCODE eip)
+{
+    m_LastRedirectIP = eip;
+    m_SpinCount = 0;
+}
+
+bool Thread::CheckPendingRedirect(PCODE eip)
+{
+    if (eip == m_LastRedirectIP)
+    {
+        // We need to test for an infinite loop in assembly, as this will break the heuristic we
+        // are using.
+        const BYTE short_jmp = 0xeb;    // Machine code for a short jump.
+        const BYTE self = 0xfe;         // -2.  Short jumps are calculated as [ip]+2+[second_byte].
+
+        // If we find that we are in an infinite loop, we'll set the last redirected IP to 0 so that we will
+        // redirect the next time we attempt it.  Delaying one interation allows us to narrow the window of
+        // the race we are working around in this corner case.
+        BYTE *ip = (BYTE *)m_LastRedirectIP;
+        if (ip[0] == short_jmp && ip[1] == self)
+            m_LastRedirectIP = 0;
+
+        // We set a hard limit of 5 times we will spin on this to avoid any tricky race which we have not
+        // accounted for.
+        m_SpinCount++;
+        if (m_SpinCount >= 5)
+            m_LastRedirectIP = 0;
+
+        return true;
+    }
+
+    return false;
+}
+
+#endif // TARGET_X86
+
 #endif // !DACCESS_COMPILE
 
 void Thread::ValidateExInfoStack()
@@ -1151,7 +1186,7 @@ void Thread::ValidateExInfoStack()
 #ifndef DACCESS_COMPILE
 
 #ifndef TARGET_UNIX
-EXTERN_C NATIVEAOT_API uint32_t __cdecl RhCompatibleReentrantWaitAny(UInt32_BOOL alertable, uint32_t timeout, uint32_t count, HANDLE* pHandles)
+EXTERN_C uint32_t QCALLTYPE RhCompatibleReentrantWaitAny(UInt32_BOOL alertable, uint32_t timeout, uint32_t count, HANDLE* pHandles)
 {
     return PalCompatibleWaitAny(alertable, timeout, count, pHandles, /*allowReentrantWait:*/ TRUE);
 }
@@ -1226,22 +1261,24 @@ void Thread::SetThreadAbortException(Object *exception)
     m_threadAbortException = exception;
 }
 
-COOP_PINVOKE_HELPER(Object *, RhpGetThreadAbortException, ())
+FCIMPL0(Object *, RhpGetThreadAbortException)
 {
     Thread * pCurThread = ThreadStore::RawGetCurrentThread();
     return pCurThread->GetThreadAbortException();
 }
+FCIMPLEND
 
 Object** Thread::GetThreadStaticStorage()
 {
     return &m_pThreadLocalStatics;
 }
 
-COOP_PINVOKE_HELPER(Object**, RhGetThreadStaticStorage, ())
+FCIMPL0(Object**, RhGetThreadStaticStorage)
 {
     Thread* pCurrentThread = ThreadStore::RawGetCurrentThread();
     return pCurrentThread->GetThreadStaticStorage();
 }
+FCIMPLEND
 
 InlinedThreadStaticRoot* Thread::GetInlinedThreadStaticList()
 {
@@ -1257,14 +1294,15 @@ void Thread::RegisterInlinedThreadStaticRoot(InlinedThreadStaticRoot* newRoot, T
     newRoot->m_typeManager = typeManager;
 }
 
-COOP_PINVOKE_HELPER(void, RhRegisterInlinedThreadStaticRoot, (Object** root, TypeManager* typeManager))
+FCIMPL2(void, RhRegisterInlinedThreadStaticRoot, Object** root, TypeManager* typeManager)
 {
     Thread* pCurrentThread = ThreadStore::RawGetCurrentThread();
     pCurrentThread->RegisterInlinedThreadStaticRoot((InlinedThreadStaticRoot*)root, typeManager);
 }
+FCIMPLEND
 
 // This is function is used to quickly query a value that can uniquely identify a thread
-COOP_PINVOKE_HELPER(uint8_t*, RhCurrentNativeThreadId, ())
+FCIMPL0(uint8_t*, RhCurrentNativeThreadId)
 {
 #ifndef TARGET_UNIX
     return PalNtCurrentTeb();
@@ -1272,12 +1310,14 @@ COOP_PINVOKE_HELPER(uint8_t*, RhCurrentNativeThreadId, ())
     return (uint8_t*)ThreadStore::RawGetCurrentThread();
 #endif // TARGET_UNIX
 }
+FCIMPLEND
 
 // This function is used to get the OS thread identifier for the current thread.
-COOP_PINVOKE_HELPER(uint64_t, RhCurrentOSThreadId, ())
+FCIMPL0(uint64_t, RhCurrentOSThreadId)
 {
     return PalGetCurrentOSThreadId();
 }
+FCIMPLEND
 
 // Standard calling convention variant and actual implementation for RhpReversePInvokeAttachOrTrapThread
 EXTERN_C NOINLINE void FASTCALL RhpReversePInvokeAttachOrTrapThread2(ReversePInvokeFrame* pFrame)
@@ -1290,7 +1330,7 @@ EXTERN_C NOINLINE void FASTCALL RhpReversePInvokeAttachOrTrapThread2(ReversePInv
 // PInvoke
 //
 
-COOP_PINVOKE_HELPER(void, RhpReversePInvoke, (ReversePInvokeFrame * pFrame))
+FCIMPL1(void, RhpReversePInvoke, ReversePInvokeFrame * pFrame)
 {
     Thread * pCurThread = ThreadStore::RawGetCurrentThread();
     pFrame->m_savedThread = pCurThread;
@@ -1299,27 +1339,31 @@ COOP_PINVOKE_HELPER(void, RhpReversePInvoke, (ReversePInvokeFrame * pFrame))
 
     RhpReversePInvokeAttachOrTrapThread2(pFrame);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpReversePInvokeReturn, (ReversePInvokeFrame * pFrame))
+FCIMPL1(void, RhpReversePInvokeReturn, ReversePInvokeFrame * pFrame)
 {
     pFrame->m_savedThread->InlineReversePInvokeReturn(pFrame);
 }
+FCIMPLEND
 
 #ifdef USE_PORTABLE_HELPERS
 
 #ifndef HOST_WASM
-COOP_PINVOKE_HELPER(void, RhpPInvoke, (PInvokeTransitionFrame* pFrame))
+FCIMPL1(void, RhpPInvoke, PInvokeTransitionFrame* pFrame)
 {
     Thread * pCurThread = ThreadStore::RawGetCurrentThread();
     pCurThread->InlinePInvoke(pFrame);
 }
+FCIMPLEND
 #endif // !HOST_WASM
 
-COOP_PINVOKE_HELPER(void, RhpPInvokeReturn, (PInvokeTransitionFrame* pFrame))
+FCIMPL1(void, RhpPInvokeReturn, PInvokeTransitionFrame* pFrame)
 {
     //reenter cooperative mode
     pFrame->m_pThread->InlinePInvokeReturn(pFrame);
 }
+FCIMPLEND
 
 #endif //USE_PORTABLE_HELPERS
 
diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h
index 5b9704092651..b0b270c6bf70 100644
--- a/src/coreclr/nativeaot/Runtime/thread.h
+++ b/src/coreclr/nativeaot/Runtime/thread.h
@@ -99,6 +99,10 @@ struct ThreadBuffer
 #endif // FEATURE_HIJACK
     PTR_ExInfo              m_pExInfoStackHead;
     Object*                 m_threadAbortException;                 // ThreadAbortException instance -set only during thread abort
+#ifdef TARGET_X86
+    PCODE                   m_LastRedirectIP;
+    uint64_t                m_SpinCount;
+#endif
     Object*                 m_pThreadLocalStatics;
     InlinedThreadStaticRoot* m_pInlinedThreadLocalStatics;
     GCFrameRegistration*    m_pGCFrameRegistrations;
@@ -331,6 +335,11 @@ class Thread : private ThreadBuffer
 
     bool                IsActivationPending();
     void                SetActivationPending(bool isPending);
+
+#ifdef TARGET_X86
+    void                SetPendingRedirect(PCODE eip);
+    bool                CheckPendingRedirect(PCODE eip);
+#endif
 };
 
 #ifndef __GCENV_BASE_INCLUDED__
diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp
index d49358c793c3..63bb947e2baa 100644
--- a/src/coreclr/nativeaot/Runtime/threadstore.cpp
+++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp
@@ -232,7 +232,7 @@ void SpinWait(int iteration, int usecLimit)
     int64_t ticksPerSecond = PalQueryPerformanceFrequency();
     int64_t endTicks = startTicks + (usecLimit * ticksPerSecond) / 1000000;
 
-    int l = min((unsigned)iteration, 30);
+    int l = iteration >= 0 ? min(iteration, 30): 30;
     for (int i = 0; i < l; i++)
     {
         for (int j = 0; j < (1 << i); j++)
@@ -414,20 +414,22 @@ void ThreadStore::CancelThreadAbort(Thread* targetThread)
     ResumeAllThreads(/* waitForGCEvent = */ false);
 }
 
-COOP_PINVOKE_HELPER(void *, RhpGetCurrentThread, ())
+EXTERN_C void* QCALLTYPE RhpGetCurrentThread()
 {
     return ThreadStore::GetCurrentThread();
 }
 
-COOP_PINVOKE_HELPER(void, RhpInitiateThreadAbort, (void* thread, Object * threadAbortException, CLR_BOOL doRudeAbort))
+FCIMPL3(void, RhpInitiateThreadAbort, void* thread, Object * threadAbortException, CLR_BOOL doRudeAbort)
 {
     GetThreadStore()->InitiateThreadAbort((Thread*)thread, threadAbortException, doRudeAbort);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpCancelThreadAbort, (void* thread))
+FCIMPL1(void, RhpCancelThreadAbort, void* thread)
 {
     GetThreadStore()->CancelThreadAbort((Thread*)thread);
 }
+FCIMPLEND
 
 C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer));
 
@@ -481,7 +483,6 @@ GVAL_IMPL(uint32_t, SECTIONREL__tls_CurrentThread);
 //
 // This routine supports the !Thread debugger extension routine
 //
-typedef DPTR(TADDR) PTR_TADDR;
 // static
 PTR_Thread ThreadStore::GetThreadFromTEB(TADDR pTEB)
 {
diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h
index 51c3a8572718..983f17a36aba 100644
--- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h
+++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h
@@ -5,52 +5,86 @@
 
 #include <errno.h>
 
+FORCEINLINE void PalInterlockedOperationBarrier()
+{
+#if (defined(HOST_ARM64) && !defined(LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT) && !defined(__clang__)) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64)
+    // On arm64, most of the __sync* functions generate a code sequence like:
+    //   loop:
+    //     ldaxr (load acquire exclusive)
+    //     ...
+    //     stlxr (store release exclusive)
+    //     cbnz loop
+    //
+    // It is possible for a load following the code sequence above to be reordered to occur prior to the store above due to the
+    // release barrier, this is substantiated by https://github.com/dotnet/coreclr/pull/17508. Interlocked operations in the PAL
+    // require the load to occur after the store. This memory barrier should be used following a call to a __sync* function to
+    // prevent that reordering. Code generated for arm32 includes a 'dmb' after 'cbnz', so no issue there at the moment.
+    __sync_synchronize();
+#endif
+}
+
 FORCEINLINE int32_t PalInterlockedIncrement(_Inout_ int32_t volatile *pDst)
 {
-    return __sync_add_and_fetch(pDst, 1);
+    int32_t result = __sync_add_and_fetch(pDst, 1);
+    PalInterlockedOperationBarrier();
+    return result;
 }
 
 FORCEINLINE int32_t PalInterlockedDecrement(_Inout_ int32_t volatile *pDst)
 {
-    return __sync_sub_and_fetch(pDst, 1);
+    int32_t result = __sync_sub_and_fetch(pDst, 1);
+    PalInterlockedOperationBarrier();
+    return result;
 }
 
 FORCEINLINE uint32_t PalInterlockedOr(_Inout_ uint32_t volatile *pDst, uint32_t iValue)
 {
-    return __sync_or_and_fetch(pDst, iValue);
+    int32_t result = __sync_or_and_fetch(pDst, iValue);
+    PalInterlockedOperationBarrier();
+    return result;
 }
 
 FORCEINLINE uint32_t PalInterlockedAnd(_Inout_ uint32_t volatile *pDst, uint32_t iValue)
 {
-    return __sync_and_and_fetch(pDst, iValue);
+    int32_t result = __sync_and_and_fetch(pDst, iValue);
+    PalInterlockedOperationBarrier();
+    return result;
 }
 
 FORCEINLINE int32_t PalInterlockedExchange(_Inout_ int32_t volatile *pDst, int32_t iValue)
 {
 #ifdef __clang__
-    return __sync_swap(pDst, iValue);
+    int32_t result =__sync_swap(pDst, iValue);
 #else
-    return __atomic_exchange_n(pDst, iValue, __ATOMIC_ACQ_REL);
+    int32_t result =__atomic_exchange_n(pDst, iValue, __ATOMIC_ACQ_REL);
 #endif
+    PalInterlockedOperationBarrier();
+    return result;
 }
 
 FORCEINLINE int64_t PalInterlockedExchange64(_Inout_ int64_t volatile *pDst, int64_t iValue)
 {
 #ifdef __clang__
-    return __sync_swap(pDst, iValue);
+    int32_t result =__sync_swap(pDst, iValue);
 #else
-    return __atomic_exchange_n(pDst, iValue, __ATOMIC_ACQ_REL);
+    int32_t result =__atomic_exchange_n(pDst, iValue, __ATOMIC_ACQ_REL);
 #endif
+    PalInterlockedOperationBarrier();
+    return result;
 }
 
 FORCEINLINE int32_t PalInterlockedCompareExchange(_Inout_ int32_t volatile *pDst, int32_t iValue, int32_t iComparand)
 {
-    return __sync_val_compare_and_swap(pDst, iComparand, iValue);
+    int32_t result = __sync_val_compare_and_swap(pDst, iComparand, iValue);
+    PalInterlockedOperationBarrier();
+    return result;
 }
 
 FORCEINLINE int64_t PalInterlockedCompareExchange64(_Inout_ int64_t volatile *pDst, int64_t iValue, int64_t iComparand)
 {
-    return __sync_val_compare_and_swap(pDst, iComparand, iValue);
+    int64_t result = __sync_val_compare_and_swap(pDst, iComparand, iValue);
+    PalInterlockedOperationBarrier();
+    return result;
 }
 
 #if defined(HOST_AMD64) || defined(HOST_ARM64)
@@ -58,6 +92,7 @@ FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *p
 {
     __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0];
     __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow);
+    PalInterlockedOperationBarrier();
     pComparandAndResult[0] = (int64_t)iResult; pComparandAndResult[1] = (int64_t)(iResult >> 64);
     return iComparand == iResult;
 }
diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
index 502aa4c885f5..3d724f50829e 100644
--- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
+++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
@@ -487,12 +487,12 @@ EXTERN_C intptr_t* RhpGetThunkData()
 {
     return &tls_thunkData;
 }
+#endif //FEATURE_EMULATED_TLS
 
 EXTERN_C intptr_t RhGetCurrentThunkContext()
 {
     return tls_thunkData;
 }
-#endif //FEATURE_EMULATED_TLS
 
 // Register the thread with OS to be notified when thread is about to be destroyed
 // It fails fast if a different thread was already registered.
diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp
index 48efa06bc8e6..6759662d5683 100644
--- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp
+++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp
@@ -212,14 +212,11 @@ void UnixNativeCodeManager::EnumGcRefs(MethodInfo *    pMethodInfo,
     ASSERT(((uintptr_t)codeOffset & 1) == 0);
 #endif
 
-    if (!isActiveStackFrame)
+    bool executionAborted = ((UnixNativeMethodInfo*)pMethodInfo)->executionAborted;
+
+    if (!isActiveStackFrame && !executionAborted)
     {
-        // If we are not in the active method, we are currently pointing
-        // to the return address. That may not be reachable after a call (if call does not return)
-        // or reachable via a jump and thus have a different live set.
-        // Therefore we simply adjust the offset to inside of call instruction.
-        // NOTE: The GcInfoDecoder depends on this; if you change it, you must
-        // revisit the GcInfoEncoder/Decoder
+        // the reasons for this adjustment are explained in EECodeManager::EnumGcRefs
         codeOffset--;
     }
 
@@ -230,7 +227,7 @@ void UnixNativeCodeManager::EnumGcRefs(MethodInfo *    pMethodInfo,
     );
 
     ICodeManagerFlags flags = (ICodeManagerFlags)0;
-    if (((UnixNativeMethodInfo*)pMethodInfo)->executionAborted)
+    if (executionAborted)
         flags = ICodeManagerFlags::ExecutionAborted;
 
     if (IsFilter(pMethodInfo))
@@ -378,7 +375,7 @@ bool UnixNativeCodeManager::IsUnwindable(PTR_VOID pvAddress)
     ASSERT(((uintptr_t)pvAddress & 1) == 0);
 #endif
 
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(TARGET_ARM)
     MethodInfo methodInfo;
     FindMethodInfo(pvAddress, &methodInfo);
     pMethodInfo = &methodInfo;
@@ -393,74 +390,7 @@ bool UnixNativeCodeManager::IsUnwindable(PTR_VOID pvAddress)
 #endif
 }
 
-// checks for known prolog instructions generated by ILC and returns
-//  1 - in prolog
-//  0 - not in prolog,
-// -1 - unknown.
-int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddress)
-{
-#if defined(TARGET_ARM64)
-
-// post/pre
-
-
-// stp with signed offset
-// x010 1001 00xx xxxx xxxx xxxx xxxx xxxx
-#define STP_BITS1 0x29000000
-#define STP_MASK1 0x7FC00000
-
-// stp with pre/post/no offset
-// x010 100x x0xx xxxx xxxx xxxx xxxx xxxx
-#define STP_BITS2 0x28000000
-#define STP_MASK2 0x7E400000
-
-// add fp, sp, x
-// mov fp, sp
-// 1001 0001 0xxx xxxx xxxx xx11 1111 1101
-#define ADD_FP_SP_BITS 0x910003FD
-#define ADD_FP_SP_MASK 0xFF8003FF
-
-#define STP_RT2_RT_MASK  0x7C1F
-#define STP_RT2_RT_FP_LR 0x781D
-#define STP_RN_MASK      0x3E0
-#define STP_RN_SP        0x3E0
-#define STP_RN_FP        0x3A0
-
-    UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo;
-    ASSERT(pNativeMethodInfo != NULL);
-
-    uint32_t* start  = (uint32_t*)pNativeMethodInfo->pMethodStartAddress;
-    bool savedFpLr = false;
-    bool establishedFp = false;
-
-    for (uint32_t* pInstr = (uint32_t*)start; pInstr < pvAddress && !(savedFpLr && establishedFp); pInstr++)
-    {
-        uint32_t instr = *pInstr;
-
-        if (((instr & STP_MASK1) == STP_BITS1 || (instr & STP_MASK2) == STP_BITS2) &&
-            ((instr & STP_RN_MASK) == STP_RN_SP || (instr & STP_RN_MASK) == STP_RN_FP))
-        {
-            // SP/FP-relative store of pair of registers
-            savedFpLr |= (instr & STP_RT2_RT_MASK) == STP_RT2_RT_FP_LR;
-        }
-        else if ((instr & ADD_FP_SP_MASK) == ADD_FP_SP_BITS)
-        {
-            establishedFp = true;
-        }
-        else
-        {
-            // JIT generates other patterns into the prolog that we currently don't
-            // recognize (saving unpaired register, stack pointer adjustments). We
-            // don't need to recognize these patterns unless a compact unwinding code
-            // is generated for them in ILC.
-            // https://github.com/dotnet/runtime/issues/76371
-            return -1;
-        }
-    }
-
-    return savedFpLr && establishedFp ? 0 : 1;
-
-#elif defined(TARGET_ARM)
+#if defined(TARGET_ARM)
 
 // SUB<c> SP, SP, #<imm>
 // 1011 0000 1xxx xxxx
@@ -487,6 +417,11 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre
 #define SUB_W_SP_REG_BITS 0xEBAD0D00
 #define SUB_W_SP_REG_MASK 0xFFEF8F00
 
+// ADD{S}<c>.W FP, SP, #<const>
+// 1111 0x01 000x 1101 0xxx 1011 xxxx xxxx
+#define ADD_W_FP_SP_BITS 0xF10D0B00 
+#define ADD_W_FP_SP_MASK 0xFBEF8F00
+
 // PUSH<c> <registers>
 // 1011 010x xxxx xxxx
 #define PUSH_BITS 0xB400
@@ -535,7 +470,8 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre
 // MOV R9, SP
 #define MOV_R9_SP 0x46E9
 
-    uint16_t* pInstr = (uint16_t*)pvAddress;
+static bool IsArmPrologInstruction(uint16_t* pInstr)
+{
     uint32_t instr = *pInstr;
 
     if ((instr & SUB_SP_IMM_MASK) == SUB_SP_IMM_BITS ||
@@ -551,6 +487,7 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre
     if ((instr & SUB_W_SP_IMM_MASK) == SUB_W_SP_IMM_BITS ||
         (instr & SUBW_SP_IMM_MASK) == SUBW_SP_IMM_BITS ||
         (instr & SUB_W_SP_REG_MASK) == SUB_W_SP_REG_BITS ||
+        (instr & ADD_W_FP_SP_MASK) == ADD_W_FP_SP_BITS ||
         (instr & PUSH_W_MASK_T2) == PUSH_W_BITS_T2 ||
         (instr & PUSH_W_MASK_T3) == PUSH_W_BITS_T3 ||
         (instr & VPUSH_MASK_T1) == VPUSH_BITS_T1 ||
@@ -559,6 +496,114 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre
         return 1;
     }
 
+    return 0;
+}
+
+#endif
+
+// checks for known prolog instructions generated by ILC and returns
+//  1 - in prolog
+//  0 - not in prolog,
+// -1 - unknown.
+int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddress)
+{
+#if defined(TARGET_ARM64)
+
+// post/pre
+
+
+// stp with signed offset
+// x010 1001 00xx xxxx xxxx xxxx xxxx xxxx
+#define STP_BITS1 0x29000000
+#define STP_MASK1 0x7FC00000
+
+// stp with pre/post/no offset
+// x010 100x x0xx xxxx xxxx xxxx xxxx xxxx
+#define STP_BITS2 0x28000000
+#define STP_MASK2 0x7E400000
+
+// add fp, sp, x
+// mov fp, sp
+// 1001 0001 0xxx xxxx xxxx xx11 1111 1101
+#define ADD_FP_SP_BITS 0x910003FD
+#define ADD_FP_SP_MASK 0xFF8003FF
+
+#define STP_RT2_RT_MASK  0x7C1F
+#define STP_RT2_RT_FP_LR 0x781D
+#define STP_RN_MASK      0x3E0
+#define STP_RN_SP        0x3E0
+#define STP_RN_FP        0x3A0
+
+    UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo;
+    ASSERT(pNativeMethodInfo != NULL);
+
+    uint32_t* start  = (uint32_t*)pNativeMethodInfo->pMethodStartAddress;
+    bool savedFpLr = false;
+    bool establishedFp = false;
+
+    for (uint32_t* pInstr = (uint32_t*)start; pInstr < pvAddress && !(savedFpLr && establishedFp); pInstr++)
+    {
+        uint32_t instr = *pInstr;
+
+        if (((instr & STP_MASK1) == STP_BITS1 || (instr & STP_MASK2) == STP_BITS2) &&
+            ((instr & STP_RN_MASK) == STP_RN_SP || (instr & STP_RN_MASK) == STP_RN_FP))
+        {
+            // SP/FP-relative store of pair of registers
+            savedFpLr |= (instr & STP_RT2_RT_MASK) == STP_RT2_RT_FP_LR;
+        }
+        else if ((instr & ADD_FP_SP_MASK) == ADD_FP_SP_BITS)
+        {
+            establishedFp = true;
+        }
+        else
+        {
+            // JIT generates other patterns into the prolog that we currently don't
+            // recognize (saving unpaired register, stack pointer adjustments). We
+            // don't need to recognize these patterns unless a compact unwinding code
+            // is generated for them in ILC.
+            // https://github.com/dotnet/runtime/issues/76371
+            return -1;
+        }
+    }
+
+    return savedFpLr && establishedFp ? 0 : 1;
+
+#elif defined(TARGET_ARM)
+
+    UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo;
+    ASSERT(pNativeMethodInfo != NULL);
+
+    uint16_t* pInstr = (uint16_t*)pvAddress;
+
+    // First check if the current instruction is any of the recognized prolog
+    // instructions. That may be a false positive but it's not going to be a
+    // false negative (with the exception of localloc pattern below).
+    if (IsArmPrologInstruction(pInstr))
+    {
+        // Verify that everything in front of the instruction was also a prolog.
+        pInstr = (uint16_t*)pNativeMethodInfo->pMethodStartAddress;
+        while (pInstr < pvAddress)
+        {
+            if (!IsArmPrologInstruction(pInstr))
+            {
+                return 0;
+            }
+
+            uint16_t instr = *pInstr;
+            if (instr == MOV_R9_SP)
+            {
+                // The frame has been established, so anything that follows is
+                // not considered a prolog (ie. unwinding works).
+                return 0;
+            }
+
+            // Skip over to next instruction
+            pInstr += (instr & 0xE000) == 0xE000 && (instr & 0xF800) != 0xE000 ? 2 : 1;
+        }
+
+        return 1;
+    }
+
     // The localloc pattern generated by JIT looks like:
     //
     //    movw  r4, #frameSize
@@ -577,7 +622,7 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre
     // We can look ahead by couple of instructions and look for "mov sp, rXX".
     for (int c = 5; c >= 0; --c)
     {
-        instr = *pInstr;
+        uint16_t instr = *pInstr;
         if (instr == MOV_SP_R4)
         {
             return 1;
@@ -593,15 +638,7 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre
         }
 
         // Skip over to next instruction
-        if ((instr & 0xE000) == 0xE000 && (instr & 0xF800) != 0xE000)
-        {
-            // 32-but Thumb instruction
-            pInstr += 2;
-        }
-        else
-        {
-            pInstr++;
-        }
+        pInstr += (instr & 0xE000) == 0xE000 && (instr & 0xF800) != 0xE000 ? 2 : 1;
     }
 
     return 0;
@@ -937,28 +974,49 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho
 #define VPOP_BITS_T2 0xECBD0A00
 #define VPOP_MASK_T2 0xFFBF0F00
 
-    uint32_t instr = *(uint16_t*)pvAddress;
+    uint16_t *pInstr = (uint16_t *)pvAddress;
+    uint32_t instr;
 
-    if ((instr & ADD_SP_IMM_MASK) == ADD_SP_IMM_BITS ||
-        (instr & ADD_SP_REG_MASK) == ADD_SP_REG_BITS ||
-        (instr & POP_MASK) == POP_BITS ||
-        (instr & BX_LR_MASK) == BX_LR_BITS)
+    while (1)
     {
-        return -1;
-    }
+        instr = *pInstr;
 
-    instr <<= 16;
-    instr |= *((uint16_t*)pvAddress + 1);
-
-    if ((instr & ADD_W_SP_IMM_MASK) == ADD_W_SP_IMM_BITS ||
-        (instr & ADDW_SP_IMM_MASK) == ADDW_SP_IMM_BITS ||
-        (instr & ADD_W_SP_REG_MASK) == ADD_W_SP_REG_BITS ||
-        (instr & POP_W_MASK_T2) == POP_W_BITS_T2 ||
-        (instr & POP_W_MASK_T3) == POP_W_BITS_T3 ||
-        (instr & VPOP_MASK_T1) == VPOP_BITS_T1 ||
-        (instr & VPOP_MASK_T2) == VPOP_BITS_T2)
-    {
-        return -1;
+        if ((instr & 0xE000) == 0xE000 && (instr & 0xF800) != 0xE000)
+        {
+            // 32-bit instruction
+            instr <<= 16;
+            instr |= *(pInstr + 1);
+            pInstr += 2;
+        }
+        else
+        {
+            pInstr++;
+        }
+
+        // POP, VPOP and BX LR are definitely epilog
+        if ((instr & POP_MASK) == POP_BITS ||
+            (instr & BX_LR_MASK) == BX_LR_BITS ||
+            (instr & POP_W_MASK_T2) == POP_W_BITS_T2 ||
+            (instr & POP_W_MASK_T3) == POP_W_BITS_T3 ||
+            (instr & VPOP_MASK_T1) == VPOP_BITS_T1 ||
+            (instr & VPOP_MASK_T2) == VPOP_BITS_T2)
+        {
+            return -1;
+        }
+
+        // ADD SP, xxx may be part of epilog but in methods with frame
+        // pointer it can also appear in the body. Skip to the next
+        // instruction and check if it's still epilog.
+        if ((instr & ADD_SP_IMM_MASK) == ADD_SP_IMM_BITS ||
+            (instr & ADD_SP_REG_MASK) == ADD_SP_REG_BITS ||
+            (instr & ADD_W_SP_IMM_MASK) == ADD_W_SP_IMM_BITS ||
+            (instr & ADDW_SP_IMM_MASK) == ADDW_SP_IMM_BITS ||
+            (instr & ADD_W_SP_REG_MASK) == ADD_W_SP_REG_BITS)
+        {
+            continue;
+        }
+
+        return 0;
     }
 
 #endif
diff --git a/src/coreclr/nativeaot/Runtime/unix/cgroupcpu.cpp b/src/coreclr/nativeaot/Runtime/unix/cgroupcpu.cpp
index a448062b27fd..e2e4f75d22a5 100644
--- a/src/coreclr/nativeaot/Runtime/unix/cgroupcpu.cpp
+++ b/src/coreclr/nativeaot/Runtime/unix/cgroupcpu.cpp
@@ -38,7 +38,6 @@ Module Name:
 #include "cgroupcpu.h"
 
 #define CGROUP2_SUPER_MAGIC 0x63677270
-#define TMPFS_MAGIC 0x01021994
 
 #define BASE_TEN 10
 
@@ -102,12 +101,16 @@ class CGroup
         if (result != 0)
             return 0;
 
-        switch (stats.f_type)
+        if (stats.f_type == CGROUP2_SUPER_MAGIC)
         {
-            case TMPFS_MAGIC: return 1;
-            case CGROUP2_SUPER_MAGIC: return 2;
-            default:
-                return 0;
+            return 2;
+        }
+        else
+        {
+            // Assume that if /sys/fs/cgroup exists and the file system type is not cgroup2fs,
+            // it is cgroup v1. Typically the file system type is tmpfs, but other values have
+            // been seen in the wild.
+            return 1;
         }
 #endif
     }
diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc
index 9ec98a0881b8..68631819f7de 100644
--- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc
+++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc
@@ -77,13 +77,13 @@ C_FUNC(\Name):
 
 .macro PREPARE_EXTERNAL_VAR Name, HelperReg
         movw \HelperReg, #:lower16:C_FUNC(\Name) - (. + 12)
-        movt \HelperReg, #:upper16:C_FUNC(\Name) - (. + 12)
+        movt \HelperReg, #:upper16:C_FUNC(\Name) - (. + 8)
         add \HelperReg, pc
 .endm
 
 .macro PREPARE_EXTERNAL_VAR_INDIRECT Name, HelperReg
         movw \HelperReg, #:lower16:C_FUNC(\Name) - (. + 12)
-        movt \HelperReg, #:upper16:C_FUNC(\Name) - (. + 12)
+        movt \HelperReg, #:upper16:C_FUNC(\Name) - (. + 8)
         add \HelperReg, pc
         ldr \HelperReg, [\HelperReg]
 .endm
@@ -276,8 +276,12 @@ C_FUNC(\Name):
 .endm
 
 .macro INLINE_GETTHREAD
+#ifdef FEATURE_EMULATED_TLS
+        bl C_FUNC(RhpGetThread)
+#else
         // Inlined version of call C_FUNC(RhpGetThread)
         INLINE_GET_TLS_VAR tls_CurrentThread
+#endif
 .endm
 
 .macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2
diff --git a/src/coreclr/nativeaot/Runtime/wasm/AllocFast.cpp b/src/coreclr/nativeaot/Runtime/wasm/AllocFast.cpp
index 84f03fde41e7..51f4a3527bc8 100644
--- a/src/coreclr/nativeaot/Runtime/wasm/AllocFast.cpp
+++ b/src/coreclr/nativeaot/Runtime/wasm/AllocFast.cpp
@@ -67,7 +67,7 @@ struct gc_alloc_context
 //
 // Allocations
 //
-COOP_PINVOKE_HELPER(Object*, RhpNewFast, (void* pShadowStack, MethodTable* pEEType))
+FCIMPL2(Object*, RhpNewFast, void* pShadowStack, MethodTable* pEEType)
 {
     ASSERT(!pEEType->HasFinalizer());
 
@@ -87,14 +87,16 @@ COOP_PINVOKE_HELPER(Object*, RhpNewFast, (void* pShadowStack, MethodTable* pEETy
 
     return AllocateObject(pShadowStack, pEEType, 0, 0);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(Object*, RhpNewFinalizable, (void* pShadowStack, MethodTable* pEEType))
+FCIMPL2(Object*, RhpNewFinalizable, void* pShadowStack, MethodTable* pEEType)
 {
     ASSERT(pEEType->HasFinalizer());
     return AllocateObject(pShadowStack, pEEType, GC_ALLOC_FINALIZE, 0);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(Array*, RhpNewArray, (void* pShadowStack, MethodTable* pArrayEEType, int numElements))
+FCIMPL3(Array*, RhpNewArray, void* pShadowStack, MethodTable* pArrayEEType, int numElements)
 {
     Thread* pCurThread = ThreadStore::GetCurrentThread();
     gc_alloc_context* acontext = pCurThread->GetAllocContext();
@@ -131,23 +133,26 @@ COOP_PINVOKE_HELPER(Array*, RhpNewArray, (void* pShadowStack, MethodTable* pArra
 
     return (Array*)AllocateObject(pShadowStack, pArrayEEType, 0, numElements);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(String*, RhNewString, (MethodTable* pArrayEEType, int numElements))
+FCIMPL2(String*, RhNewString, MethodTable* pArrayEEType, int numElements)
 {
     // TODO: Implement. We call RhpNewArray for now since there's a bunch of TODOs in the places that matter anyway.
     void* pShadowStack = RhpGetShadowStackTop();
     return (String*)RhpNewArray(pShadowStack, pArrayEEType, numElements);
 }
+FCIMPLEND
 
 #if defined(FEATURE_64BIT_ALIGNMENT)
 GPTR_DECL(MethodTable, g_pFreeObjectEEType);
 
-COOP_PINVOKE_HELPER(Object*, RhpNewFinalizableAlign8, (void* pShadowStack, MethodTable* pEEType))
+FCIMPL2(Object*, RhpNewFinalizableAlign8, void* pShadowStack, MethodTable* pEEType)
 {
     return AllocateObject(pShadowStack, pEEType, GC_ALLOC_FINALIZE | GC_ALLOC_ALIGN8, 0);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(Object*, RhpNewFastAlign8, (void* pShadowStack, MethodTable* pEEType))
+FCIMPL2(Object*, RhpNewFastAlign8, void* pShadowStack, MethodTable* pEEType)
 {
     ASSERT(!pEEType->HasFinalizer());
 
@@ -182,8 +187,9 @@ COOP_PINVOKE_HELPER(Object*, RhpNewFastAlign8, (void* pShadowStack, MethodTable*
 
     return AllocateObject(pShadowStack, pEEType, GC_ALLOC_ALIGN8, 0);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(Object*, RhpNewFastMisalign, (void* pShadowStack, MethodTable* pEEType))
+FCIMPL2(Object*, RhpNewFastMisalign, void* pShadowStack, MethodTable* pEEType)
 {
     Thread* pCurThread = ThreadStore::GetCurrentThread();
     gc_alloc_context* acontext = pCurThread->GetAllocContext();
@@ -215,8 +221,9 @@ COOP_PINVOKE_HELPER(Object*, RhpNewFastMisalign, (void* pShadowStack, MethodTabl
 
     return AllocateObject(pShadowStack, pEEType, GC_ALLOC_ALIGN8 | GC_ALLOC_ALIGN8_BIAS, 0);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(Array*, RhpNewArrayAlign8, (void* pShadowStack, MethodTable* pArrayEEType, int numElements))
+FCIMPL3(Array*, RhpNewArrayAlign8, void* pShadowStack, MethodTable* pArrayEEType, int numElements)
 {
     Thread* pCurThread = ThreadStore::GetCurrentThread();
     gc_alloc_context* acontext = pCurThread->GetAllocContext();
@@ -265,4 +272,5 @@ COOP_PINVOKE_HELPER(Array*, RhpNewArrayAlign8, (void* pShadowStack, MethodTable*
 
     return (Array*)AllocateObject(pShadowStack, pArrayEEType, GC_ALLOC_ALIGN8, numElements);
 }
+FCIMPLEND
 #endif // FEATURE_64BIT_ALIGNMENT
diff --git a/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.Cpp.cpp b/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.Cpp.cpp
index 260a98ead7d4..899ed85ac77f 100644
--- a/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.Cpp.cpp
+++ b/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.Cpp.cpp
@@ -6,12 +6,14 @@
 
 extern "C" void __cxa_end_catch();
 
-COOP_PINVOKE_HELPER(void, RhpThrowNativeException, ())
+FCIMPL0(void, RhpThrowNativeException)
 {
     throw 0;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpReleaseNativeException, ())
+FCIMPL0(void, RhpReleaseNativeException)
 {
     __cxa_end_catch();
 }
+FCIMPLEND
diff --git a/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.Emulated.cpp b/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.Emulated.cpp
index 64df12c977c1..63c4f6c2ce04 100644
--- a/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.Emulated.cpp
+++ b/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.Emulated.cpp
@@ -6,12 +6,14 @@
 
 extern "C" thread_local int RhpExceptionThrown = 0;
 
-COOP_PINVOKE_HELPER(void, RhpThrowNativeException, ())
+FCIMPL0(void, RhpThrowNativeException)
 {
     RhpExceptionThrown = 1;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpReleaseNativeException, ())
+FCIMPL0(void, RhpReleaseNativeException)
 {
     ASSERT(RhpExceptionThrown == 0);
 }
+FCIMPLEND
diff --git a/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.Wasm.cpp b/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.Wasm.cpp
index defdacea966b..8f5db57135da 100644
--- a/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.Wasm.cpp
+++ b/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.Wasm.cpp
@@ -4,11 +4,13 @@
 #include "CommonTypes.h"
 #include "CommonMacros.h"
 
-COOP_PINVOKE_HELPER(void, RhpThrowNativeException, ())
+FCIMPL0(void, RhpThrowNativeException)
 {
     __builtin_wasm_throw(/* CPP_EXCEPTION_TAG */ 0, nullptr);
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpReleaseNativeException, ())
+FCIMPL0(void, RhpReleaseNativeException)
 {
 }
+FCIMPLEND
diff --git a/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.cpp b/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.cpp
index 467a5c6bae5b..183cff39278b 100644
--- a/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.cpp
+++ b/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.cpp
@@ -17,7 +17,7 @@ struct VirtualUnwindFrame
 //
 thread_local VirtualUnwindFrame* t_pLastVirtualUnwindFrame = nullptr;
 
-COOP_PINVOKE_HELPER(void, RhpPushVirtualUnwindFrame, (VirtualUnwindFrame* pFrame, void* pUnwindTable, size_t unwindIndex))
+FCIMPL3(void, RhpPushVirtualUnwindFrame, VirtualUnwindFrame* pFrame, void* pUnwindTable, size_t unwindIndex)
 {
     ASSERT(t_pLastVirtualUnwindFrame < pFrame);
     pFrame->Prev = t_pLastVirtualUnwindFrame;
@@ -26,19 +26,21 @@ COOP_PINVOKE_HELPER(void, RhpPushVirtualUnwindFrame, (VirtualUnwindFrame* pFrame
 
     t_pLastVirtualUnwindFrame = pFrame;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpPopVirtualUnwindFrame, ())
+FCIMPL0(void, RhpPopVirtualUnwindFrame)
 {
     ASSERT(t_pLastVirtualUnwindFrame != nullptr);
     t_pLastVirtualUnwindFrame = t_pLastVirtualUnwindFrame->Prev;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void*, RhpGetRawLastVirtualUnwindFrameRef, ())
+FCIMPL0(void*, RhpGetRawLastVirtualUnwindFrameRef)
 {
     return &t_pLastVirtualUnwindFrame;
 }
-
+FCIMPLEND
 // We do not use these helpers. TODO-LLVM: exclude them from the WASM build.
-COOP_PINVOKE_HELPER(void*, RhpCallCatchFunclet, (void*, void*, void*, void*)) { abort(); }
-COOP_PINVOKE_HELPER(bool, RhpCallFilterFunclet, (void*, void*, void*)) { abort(); }
-COOP_PINVOKE_HELPER(void, RhpCallFinallyFunclet, (void*, void*)) { abort(); }
+FCIMPL4(void*, RhpCallCatchFunclet, void*, void*, void*, void*) { abort(); } FCIMPLEND
+FCIMPL3(bool, RhpCallFilterFunclet, void*, void*, void*) { abort(); } FCIMPLEND
+FCIMPL2(void, RhpCallFinallyFunclet, void*, void*) { abort(); } FCIMPLEND
diff --git a/src/coreclr/nativeaot/Runtime/wasm/FinalizerHelpers.SingleThreaded.cpp b/src/coreclr/nativeaot/Runtime/wasm/FinalizerHelpers.SingleThreaded.cpp
index 00c1b58c9bcb..037c63ecc5fb 100644
--- a/src/coreclr/nativeaot/Runtime/wasm/FinalizerHelpers.SingleThreaded.cpp
+++ b/src/coreclr/nativeaot/Runtime/wasm/FinalizerHelpers.SingleThreaded.cpp
@@ -15,7 +15,7 @@
 #include "thread.inl"
 
 // Finalizer method implemented by the managed runtime.
-extern "C" __cdecl void RhpProcessFinalizersAndReturn();
+extern "C" void RhpProcessFinalizersAndReturn();
 
 static void ProcessFinalizersAndReturn()
 {
@@ -42,7 +42,7 @@ void RhEnableFinalization()
     // here as it will deadlock the GC.
 }
 
-EXTERN_C NATIVEAOT_API void __cdecl RhWaitForPendingFinalizers(UInt32_BOOL allowReentrantWait)
+EXTERN_C void QCALLTYPE RhWaitForPendingFinalizers(UInt32_BOOL allowReentrantWait)
 {
     // Must be called in preemptive mode as "ProcessFinalizersAndReturn" RPIs back into managed.
     ASSERT(!ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode());
@@ -51,7 +51,7 @@ EXTERN_C NATIVEAOT_API void __cdecl RhWaitForPendingFinalizers(UInt32_BOOL allow
 }
 
 // Fetch next object which needs finalization or return null if we've reached the end of the list.
-COOP_PINVOKE_HELPER(OBJECTREF, RhpGetNextFinalizableObject, ())
+FCIMPL0 (OBJECTREF, RhpGetNextFinalizableObject)
 {
     while (true)
     {
@@ -73,3 +73,4 @@ COOP_PINVOKE_HELPER(OBJECTREF, RhpGetNextFinalizableObject, ())
         return refNext;
     }
 }
+FCIMPLEND
diff --git a/src/coreclr/nativeaot/Runtime/wasm/GcStress.cpp b/src/coreclr/nativeaot/Runtime/wasm/GcStress.cpp
index b33a6d35b821..68fdf47267d3 100644
--- a/src/coreclr/nativeaot/Runtime/wasm/GcStress.cpp
+++ b/src/coreclr/nativeaot/Runtime/wasm/GcStress.cpp
@@ -14,7 +14,7 @@
 #include "threadstore.inl"
 #include "thread.inl"
 
-COOP_PINVOKE_HELPER(void*, RhpGcStressOnce, (void* obj, uint8_t* pFlag))
+FCIMPL2(void*, RhpGcStressOnce, void* obj, uint8_t* pFlag)
 {
     if (*pFlag)
     {
@@ -53,8 +53,9 @@ COOP_PINVOKE_HELPER(void*, RhpGcStressOnce, (void* obj, uint8_t* pFlag))
     PalSetLastError(lastErrorOnEntry);
     return obj;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(Object*, RhpCheckObj, (Object* obj))
+FCIMPL1(Object*, RhpCheckObj, Object* obj)
 {
     if (obj != nullptr)
     {
@@ -68,3 +69,4 @@ COOP_PINVOKE_HELPER(Object*, RhpCheckObj, (Object* obj))
 
     return obj;
 }
+FCIMPLEND
diff --git a/src/coreclr/nativeaot/Runtime/wasm/PInvoke.cpp b/src/coreclr/nativeaot/Runtime/wasm/PInvoke.cpp
index 5ada6305b79d..6a4ca498a1dc 100644
--- a/src/coreclr/nativeaot/Runtime/wasm/PInvoke.cpp
+++ b/src/coreclr/nativeaot/Runtime/wasm/PInvoke.cpp
@@ -27,7 +27,7 @@ void* GetShadowStackTop()
     return t_pShadowStackTop;
 }
 
-COOP_PINVOKE_HELPER(void*, RhpGetOrInitShadowStackTop, ())
+FCIMPL0(void*, RhpGetOrInitShadowStackTop)
 {
     void* pShadowStack = t_pShadowStackTop;
     if (pShadowStack == nullptr)
@@ -44,20 +44,24 @@ COOP_PINVOKE_HELPER(void*, RhpGetOrInitShadowStackTop, ())
 
     return pShadowStack;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void*, RhpGetShadowStackTop, ())
+FCIMPL0(void*, RhpGetShadowStackTop)
 {
     return t_pShadowStackTop;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpSetShadowStackTop, (void* pShadowStack))
+FCIMPL1(void, RhpSetShadowStackTop, void* pShadowStack)
 {
     t_pShadowStackTop = pShadowStack;
 }
+FCIMPLEND
 
-COOP_PINVOKE_HELPER(void, RhpPInvoke, (void* pShadowStack, PInvokeTransitionFrame* pFrame))
+FCIMPL2(void, RhpPInvoke, void* pShadowStack, PInvokeTransitionFrame* pFrame)
 {
     RhpSetShadowStackTop(pShadowStack);
     Thread* pCurThread = ThreadStore::RawGetCurrentThread();
     pCurThread->InlinePInvoke(pFrame);
 }
+FCIMPLEND
diff --git a/src/coreclr/nativeaot/Runtime/wasm/StubDispatch.cpp b/src/coreclr/nativeaot/Runtime/wasm/StubDispatch.cpp
index c9642f82e639..da33ddfca7d9 100644
--- a/src/coreclr/nativeaot/Runtime/wasm/StubDispatch.cpp
+++ b/src/coreclr/nativeaot/Runtime/wasm/StubDispatch.cpp
@@ -24,7 +24,7 @@
 // Cache miss case, call the runtime to resolve the target and update the cache.
 extern "C" PCODE RhpCidResolveWasm_Managed(void* pShadowStack, Object* pObject, void* pCell);
 
-COOP_PINVOKE_HELPER(PCODE, RhpResolveInterfaceDispatch, (void* pShadowStack, Object* pObject, InterfaceDispatchCell* pCell))
+FCIMPL3(PCODE, RhpResolveInterfaceDispatch, void* pShadowStack, Object* pObject, InterfaceDispatchCell* pCell)
 {
     ASSERT(pObject != nullptr);
     InterfaceDispatchCache* pCache = (InterfaceDispatchCache*)pCell->GetCache();
@@ -43,6 +43,7 @@ COOP_PINVOKE_HELPER(PCODE, RhpResolveInterfaceDispatch, (void* pShadowStack, Obj
 
     return RhpCidResolveWasm_Managed(pShadowStack, pObject, pCell);
 }
+FCIMPLEND
 
 extern "C" void* RhpInitialInterfaceDispatch(void*, Object*, InterfaceDispatchCell*) __attribute__((alias ("RhpResolveInterfaceDispatch")));
 extern "C" void* RhpInitialDynamicInterfaceDispatch(void*, Object*, InterfaceDispatchCell*) __attribute__((alias ("RhpResolveInterfaceDispatch")));
@@ -55,11 +56,13 @@ extern "C" void* RhpInterfaceDispatch32(void*, Object*, InterfaceDispatchCell*)
 extern "C" void* RhpInterfaceDispatch64(void*, Object*, InterfaceDispatchCell*) __attribute__((alias ("RhpResolveInterfaceDispatch")));
 
 // Stub dispatch routine for dispatch to a vtable slot.
-COOP_PINVOKE_HELPER(void*, RhpVTableOffsetDispatch, (void* pShadowStack, Object* pObject, InterfaceDispatchCell* pCell))
+FCIMPL3(void*, RhpVTableOffsetDispatch, void* pShadowStack, Object* pObject, InterfaceDispatchCell* pCell)
 {
     uintptr_t pVTable = reinterpret_cast<uintptr_t>(pObject->GetMethodTable());
     uintptr_t offset = pCell->m_pCache;
 
     return *(void**)(pVTable + offset);
 }
+FCIMPLEND
+
 #endif // FEATURE_CACHED_INTERFACE_DISPATCH
diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp
index 1215431a83e0..0f2aa4f73669 100644
--- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp
+++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp
@@ -19,6 +19,22 @@
 #define GCINFODECODER_NO_EE
 #include "gcinfodecoder.cpp"
 
+#ifdef TARGET_X86
+#define FEATURE_EH_FUNCLETS
+
+// Disable contracts
+#define LIMITED_METHOD_CONTRACT
+#define LIMITED_METHOD_DAC_CONTRACT
+#define CONTRACTL
+#define CONTRACTL_END
+#define NOTHROW
+#define GC_NOTRIGGER
+
+#include "../../inc/gcdecoder.cpp"
+#include "../../inc/gc_unwind_x86.h"
+#include "../../vm/gc_unwind_x86.inl"
+#endif
+
 #define UBF_FUNC_KIND_MASK      0x03
 #define UBF_FUNC_KIND_ROOT      0x00
 #define UBF_FUNC_KIND_HANDLER   0x01
@@ -167,7 +183,6 @@ static PTR_VOID GetUnwindDataBlob(TADDR moduleBase, PTR_RUNTIME_FUNCTION pRuntim
 #endif
 }
 
-
 CoffNativeCodeManager::CoffNativeCodeManager(TADDR moduleBase,
                                              PTR_VOID pvManagedCodeStartRange, uint32_t cbManagedCodeRange,
                                              PTR_RUNTIME_FUNCTION pRuntimeFunctionTable, uint32_t nRuntimeFunctionTable,
@@ -306,7 +321,7 @@ bool CoffNativeCodeManager::IsFilter(MethodInfo * pMethInfo)
 }
 
 PTR_VOID CoffNativeCodeManager::GetFramePointer(MethodInfo *   pMethInfo,
-                                         REGDISPLAY *   pRegisterSet)
+                                                REGDISPLAY *   pRegisterSet)
 {
     CoffNativeMethodInfo * pMethodInfo = (CoffNativeMethodInfo *)pMethInfo;
 
@@ -324,6 +339,39 @@ PTR_VOID CoffNativeCodeManager::GetFramePointer(MethodInfo *   pMethInfo,
     return NULL;
 }
 
+#ifdef TARGET_X86
+uintptr_t CoffNativeCodeManager::GetResumeSp(MethodInfo *   pMethodInfo,
+                                             REGDISPLAY *   pRegisterSet)
+{
+    PTR_uint8_t gcInfo;
+    uint32_t codeOffset = GetCodeOffset(pMethodInfo, (PTR_VOID)pRegisterSet->IP, &gcInfo);
+
+    hdrInfo infoBuf;
+    size_t infoSize = DecodeGCHdrInfo(GCInfoToken(gcInfo), codeOffset, &infoBuf);
+    PTR_CBYTE table = gcInfo + infoSize;
+
+    _ASSERTE(infoBuf.epilogOffs == hdrInfo::NOT_IN_EPILOG && infoBuf.prologOffs == hdrInfo::NOT_IN_PROLOG);
+
+    bool isESPFrame = !infoBuf.ebpFrame && !infoBuf.doubleAlign;
+
+    CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo;
+    if (pNativeMethodInfo->mainRuntimeFunction != pNativeMethodInfo->runtimeFunction)
+    {
+        // Treat funclet's frame as ESP frame
+        isESPFrame = true;
+    }
+
+    if (isESPFrame)
+    {
+        const uintptr_t curESP = pRegisterSet->SP;
+        return curESP + GetPushedArgSize(&infoBuf, table, codeOffset);
+    }
+
+    const uintptr_t curEBP = pRegisterSet->GetFP();
+    return GetOutermostBaseFP(curEBP, &infoBuf);
+}
+#endif // TARGET_X86
+
 uint32_t CoffNativeCodeManager::GetCodeOffset(MethodInfo* pMethodInfo, PTR_VOID address, /*out*/ PTR_uint8_t* gcInfo)
 {
     CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo;
@@ -351,7 +399,6 @@ uint32_t CoffNativeCodeManager::GetCodeOffset(MethodInfo* pMethodInfo, PTR_VOID
 
 bool CoffNativeCodeManager::IsSafePoint(PTR_VOID pvAddress)
 {
-#ifdef USE_GC_INFO_DECODER
     MethodInfo pMethodInfo;
     if (!FindMethodInfo(pvAddress, &pMethodInfo))
     {
@@ -361,6 +408,7 @@ bool CoffNativeCodeManager::IsSafePoint(PTR_VOID pvAddress)
     PTR_uint8_t gcInfo;
     uint32_t codeOffset = GetCodeOffset(&pMethodInfo, pvAddress, &gcInfo);
 
+#ifdef USE_GC_INFO_DECODER
     GcInfoDecoder decoder(
         GCInfoToken(gcInfo),
         GcInfoDecoderFlags(DECODE_INTERRUPTIBILITY),
@@ -369,9 +417,11 @@ bool CoffNativeCodeManager::IsSafePoint(PTR_VOID pvAddress)
 
     return decoder.IsInterruptible();
 #else
-    // x86 has custom GC info, see DecodeGCHdrInfo in eetwain.cpp
-    PORTABILITY_ASSERT("IsSafePoint");
-    RhFailFast();
+    // Extract the necessary information from the info block header
+    hdrInfo info;
+    DecodeGCHdrInfo(GCInfoToken(gcInfo), codeOffset, &info);
+
+    return info.interruptible && info.prologOffs == hdrInfo::NOT_IN_PROLOG && info.epilogOffs == hdrInfo::NOT_IN_EPILOG;
 #endif
 }
 
@@ -381,29 +431,13 @@ void CoffNativeCodeManager::EnumGcRefs(MethodInfo *    pMethodInfo,
                                        GCEnumContext * hCallback,
                                        bool            isActiveStackFrame)
 {
-#ifdef USE_GC_INFO_DECODER
     PTR_uint8_t gcInfo;
     uint32_t codeOffset = GetCodeOffset(pMethodInfo, safePointAddress, &gcInfo);
 
-    if (!isActiveStackFrame)
-    {
-        // If we are not in the active method, we are currently pointing
-        // to the return address. That may not be reachable after a call (if call does not return)
-        // or reachable via a jump and thus have a different live set.
-        // Therefore we simply adjust the offset to inside of call instruction.
-        // NOTE: The GcInfoDecoder depends on this; if you change it, you must
-        // revisit the GcInfoEncoder/Decoder
-        codeOffset--;
-    }
-
-    GcInfoDecoder decoder(
-        GCInfoToken(gcInfo),
-        GcInfoDecoderFlags(DECODE_GC_LIFETIMES | DECODE_SECURITY_OBJECT | DECODE_VARARG),
-        codeOffset
-        );
+    bool executionAborted = ((CoffNativeMethodInfo *)pMethodInfo)->executionAborted;
 
     ICodeManagerFlags flags = (ICodeManagerFlags)0;
-    if (((CoffNativeMethodInfo *)pMethodInfo)->executionAborted)
+    if (executionAborted)
         flags = ICodeManagerFlags::ExecutionAborted;
 
     if (IsFilter(pMethodInfo))
@@ -412,6 +446,19 @@ void CoffNativeCodeManager::EnumGcRefs(MethodInfo *    pMethodInfo,
     if (isActiveStackFrame)
         flags = (ICodeManagerFlags)(flags | ICodeManagerFlags::ActiveStackFrame);
 
+#ifdef USE_GC_INFO_DECODER
+    if (!isActiveStackFrame && !executionAborted)
+    {
+        // the reasons for this adjustment are explained in EECodeManager::EnumGcRefs
+        codeOffset--;
+    }
+
+    GcInfoDecoder decoder(
+        GCInfoToken(gcInfo),
+        GcInfoDecoderFlags(DECODE_GC_LIFETIMES | DECODE_SECURITY_OBJECT | DECODE_VARARG),
+        codeOffset
+        );
+
     if (!decoder.EnumerateLiveSlots(
         pRegisterSet,
         isActiveStackFrame /* reportScratchSlots */,
@@ -423,9 +470,22 @@ void CoffNativeCodeManager::EnumGcRefs(MethodInfo *    pMethodInfo,
         assert(false);
     }
 #else
-    // x86 has custom GC info, see EnumGcRefs in eetwain.cpp
-    PORTABILITY_ASSERT("EnumGcRefs");
-    RhFailFast();
+    size_t unwindDataBlobSize;
+    CoffNativeMethodInfo* pNativeMethodInfo = (CoffNativeMethodInfo *) pMethodInfo;
+    PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->runtimeFunction, &unwindDataBlobSize);
+    PTR_uint8_t p = dac_cast<PTR_uint8_t>(pUnwindDataBlob) + unwindDataBlobSize;
+    uint8_t unwindBlockFlags = *p++;
+
+    ::EnumGcRefsX86(pRegisterSet,
+                    (PTR_CBYTE)(m_moduleBase + pNativeMethodInfo->mainRuntimeFunction->BeginAddress),
+                    codeOffset,
+                    GCInfoToken(gcInfo),
+                    (PTR_CBYTE)(m_moduleBase + pNativeMethodInfo->runtimeFunction->BeginAddress),
+                    (unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT,
+                    (unwindBlockFlags & UBF_FUNC_KIND_MASK) == UBF_FUNC_KIND_FILTER,
+                    flags,
+                    hCallback->pCallback,
+                    hCallback);
 #endif
 
 }
@@ -474,8 +534,13 @@ uintptr_t CoffNativeCodeManager::GetConservativeUpperBoundForOutgoingArgs(Method
         // all outgoing arguments.
         upperBound = dac_cast<TADDR>(basePointer + slot);
 #else
-        PORTABILITY_ASSERT("GetConservativeUpperBoundForOutgoingArgs");
-        RhFailFast();
+        hdrInfo info;
+        DecodeGCHdrInfo(GCInfoToken(p), 0, &info);
+        assert(info.revPInvokeOffset != INVALID_REV_PINVOKE_OFFSET);
+        upperBound =
+            info.ebpFrame ?
+            dac_cast<TADDR>(pRegisterSet->GetFP()) - info.revPInvokeOffset :
+            dac_cast<TADDR>(pRegisterSet->GetSP()) + info.revPInvokeOffset;
 #endif
     }
     else
@@ -535,11 +600,25 @@ uintptr_t CoffNativeCodeManager::GetConservativeUpperBoundForOutgoingArgs(Method
                         NULL);
 
         upperBound = dac_cast<TADDR>(context.Sp);
-
 #else
-        PORTABILITY_ASSERT("GetConservativeUpperBoundForOutgoingArgs");
-        upperBound = NULL;
-        RhFailFast();
+        PTR_uint8_t gcInfo;
+        uint32_t codeOffset = GetCodeOffset(pMethodInfo, (PTR_VOID)pRegisterSet->IP, &gcInfo);
+
+        hdrInfo infoBuf;
+        size_t infoSize = DecodeGCHdrInfo(GCInfoToken(gcInfo), codeOffset, &infoBuf);
+        PTR_CBYTE table = gcInfo + infoSize;
+
+        REGDISPLAY registerSet = *pRegisterSet;
+
+        ::UnwindStackFrameX86(&registerSet,
+                              (PTR_CBYTE)(m_moduleBase + pNativeMethodInfo->mainRuntimeFunction->BeginAddress),
+                              codeOffset,
+                              &infoBuf,
+                              table,
+                              (PTR_CBYTE)(m_moduleBase + pNativeMethodInfo->runtimeFunction->BeginAddress),
+                              (unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT,
+                              true);
+        upperBound = dac_cast<TADDR>(registerSet.PCTAddr);
 #endif
     }
     return upperBound;
@@ -587,21 +666,46 @@ bool CoffNativeCodeManager::UnwindStackFrame(MethodInfo *    pMethodInfo,
         }
 
         *ppPreviousTransitionFrame = *(PInvokeTransitionFrame**)(basePointer + slot);
+#else
+        hdrInfo info;
+        DecodeGCHdrInfo(GCInfoToken(p), 0, &info);
+        assert(info.revPInvokeOffset != INVALID_REV_PINVOKE_OFFSET);
+        *ppPreviousTransitionFrame =
+            info.ebpFrame ?
+            *(PInvokeTransitionFrame**)(dac_cast<TADDR>(pRegisterSet->GetFP()) - info.revPInvokeOffset) :
+            *(PInvokeTransitionFrame**)(dac_cast<TADDR>(pRegisterSet->GetSP()) + info.revPInvokeOffset);
+#endif
 
         if ((flags & USFF_StopUnwindOnTransitionFrame) != 0)
         {
             return true;
         }
-#else
-        PORTABILITY_ASSERT("GetConservativeUpperBoundForOutgoingArgs");
-        RhFailFast();
-#endif
     }
     else
     {
         *ppPreviousTransitionFrame = NULL;
     }
 
+#if defined(TARGET_X86)
+    PTR_uint8_t gcInfo;
+    uint32_t codeOffset = GetCodeOffset(pMethodInfo, (PTR_VOID)pRegisterSet->IP, &gcInfo);
+
+    hdrInfo infoBuf;
+    size_t infoSize = DecodeGCHdrInfo(GCInfoToken(gcInfo), codeOffset, &infoBuf);
+    PTR_CBYTE table = gcInfo + infoSize;
+
+    if (!::UnwindStackFrameX86(pRegisterSet,
+                               (PTR_CBYTE)(m_moduleBase + pNativeMethodInfo->mainRuntimeFunction->BeginAddress),
+                               codeOffset,
+                               &infoBuf,
+                               table,
+                               (PTR_CBYTE)(m_moduleBase + pNativeMethodInfo->runtimeFunction->BeginAddress),
+                               (unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT,
+                               true))
+    {
+        return false;
+    }
+#else
     CONTEXT context;
     KNONVOLATILE_CONTEXT_POINTERS contextPointers;
 
@@ -635,10 +739,7 @@ bool CoffNativeCodeManager::UnwindStackFrame(MethodInfo *    pMethodInfo,
 
     FOR_EACH_NONVOLATILE_REGISTER(REGDISPLAY_TO_CONTEXT);
 
-#if defined(TARGET_X86)
-    PORTABILITY_ASSERT("CoffNativeCodeManager::UnwindStackFrame");
-#elif defined(TARGET_AMD64)
-
+#if defined(TARGET_AMD64)
     if (!(flags & USFF_GcUnwind))
     {
         memcpy(&context.Xmm6, pRegisterSet->Xmm, sizeof(pRegisterSet->Xmm));
@@ -696,7 +797,7 @@ bool CoffNativeCodeManager::UnwindStackFrame(MethodInfo *    pMethodInfo,
         for (int i = 8; i < 16; i++)
             pRegisterSet->D[i - 8] = context.V[i].Low;
     }
-#endif // defined(TARGET_X86)
+#endif
 
     FOR_EACH_NONVOLATILE_REGISTER(CONTEXT_TO_REGDISPLAY);
 
@@ -705,6 +806,8 @@ bool CoffNativeCodeManager::UnwindStackFrame(MethodInfo *    pMethodInfo,
 #undef REGDISPLAY_TO_CONTEXT
 #undef CONTEXT_TO_REGDISPLAY
 
+#endif // defined(TARGET_X86)
+
     return true;
 }
 
@@ -732,7 +835,6 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo *    pMethodIn
                                                 PTR_PTR_VOID *  ppvRetAddrLocation, // out
                                                 GCRefKind *     pRetValueKind)      // out
 {
-#ifdef USE_GC_INFO_DECODER
     CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo;
 
     size_t unwindDataBlobSize;
@@ -757,6 +859,7 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo *    pMethodIn
     if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0)
         p += sizeof(int32_t);
 
+#ifdef USE_GC_INFO_DECODER
     // Decode the GC info for the current method to determine its return type
     GcInfoDecoderFlags flags = DECODE_RETURN_KIND;
 #if defined(TARGET_ARM64)
@@ -845,8 +948,35 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo *    pMethodIn
     return false;
 #endif // defined(TARGET_AMD64)
 #else // defined(USE_GC_INFO_DECODER)
-    PORTABILITY_ASSERT("GetReturnAddressHijackInfo");
-    RhFailFast();
+    PTR_uint8_t gcInfo;
+    uint32_t codeOffset = GetCodeOffset(pMethodInfo, (PTR_VOID)pRegisterSet->IP, &gcInfo);
+    hdrInfo infoBuf;
+    size_t infoSize = DecodeGCHdrInfo(GCInfoToken(gcInfo), codeOffset, &infoBuf);
+
+    // TODO: Hijack with saving the return value in FP stack
+    if (infoBuf.returnKind == RT_Float)
+    {
+        return false;
+    }
+
+    REGDISPLAY registerSet = *pRegisterSet;
+
+    if (!::UnwindStackFrameX86(&registerSet,
+                               (PTR_CBYTE)(m_moduleBase + pNativeMethodInfo->mainRuntimeFunction->BeginAddress),
+                               codeOffset,
+                               &infoBuf,
+                               gcInfo + infoSize,
+                               (PTR_CBYTE)(m_moduleBase + pNativeMethodInfo->runtimeFunction->BeginAddress),
+                               (unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT,
+                               false))
+    {
+        return false;
+    }
+
+    *ppvRetAddrLocation = (PTR_PTR_VOID)registerSet.PCTAddr;
+    *pRetValueKind = GetGcRefKind(infoBuf.returnKind);
+
+    return true;
 #endif 
 }
 
diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h
index 6c56ee9c1ef9..c1dacbfd8f98 100644
--- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h
+++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h
@@ -65,6 +65,11 @@ class CoffNativeCodeManager : public ICodeManager
     PTR_VOID GetFramePointer(MethodInfo *   pMethodInfo,
                              REGDISPLAY *   pRegisterSet);
 
+#ifdef TARGET_X86
+    uintptr_t GetResumeSp(MethodInfo *   pMethodInfo,
+                          REGDISPLAY *   pRegisterSet);
+#endif
+
     uint32_t GetCodeOffset(MethodInfo * pMethodInfo, PTR_VOID address, /*out*/ PTR_uint8_t* gcInfo);
 
     bool IsSafePoint(PTR_VOID pvAddress);
diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp
index 06706ed1e8a4..0de949935d97 100644
--- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp
+++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp
@@ -28,6 +28,7 @@
 #include "gcconfig.h"
 
 #include "thread.h"
+#include "threadstore.h"
 
 #define REDHAWK_PALEXPORT extern "C"
 #define REDHAWK_PALAPI __stdcall
@@ -322,10 +323,120 @@ REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTR
     return CreateEventW(pEventAttributes, manualReset, initialState, pName);
 }
 
+#ifdef TARGET_X86
+
+#define EXCEPTION_HIJACK  0xe0434f4e    // 0xe0000000 | 'COM'+1
+
+PEXCEPTION_REGISTRATION_RECORD GetCurrentSEHRecord()
+{
+    return (PEXCEPTION_REGISTRATION_RECORD)__readfsdword(0);
+}
+
+VOID SetCurrentSEHRecord(EXCEPTION_REGISTRATION_RECORD *pSEH)
+{
+    __writefsdword(0, (DWORD)pSEH);
+}
+
+VOID PopSEHRecords(LPVOID pTargetSP)
+{
+    PEXCEPTION_REGISTRATION_RECORD currentContext = GetCurrentSEHRecord();
+    // The last record in the chain is EXCEPTION_CHAIN_END which is defined as maxiumum
+    // pointer value so it cannot satisfy the loop condition.
+    while (currentContext < pTargetSP)
+    {
+        currentContext = currentContext->Next;
+    }
+    SetCurrentSEHRecord(currentContext);
+}
+
+// This will check who caused the exception.  If it was caused by the redirect function,
+// the reason is to resume the thread back at the point it was redirected in the first
+// place.  If the exception was not caused by the function, then it was caused by the call
+// out to the I[GC|Debugger]ThreadControl client and we need to determine if it's an
+// exception that we can just eat and let the runtime resume the thread, or if it's an
+// uncatchable exception that we need to pass on to the runtime.
+int RtlRestoreContextFallbackExceptionFilter(PEXCEPTION_POINTERS pExcepPtrs, CONTEXT *pCtx, Thread *pThread)
+{
+    if (pExcepPtrs->ExceptionRecord->ExceptionCode == STATUS_STACK_OVERFLOW)
+    {
+        return EXCEPTION_CONTINUE_SEARCH;
+    }
+
+    // Get the thread handle
+    _ASSERTE(pExcepPtrs->ExceptionRecord->ExceptionCode == EXCEPTION_HIJACK);
+
+    // Copy everything in the saved context record into the EH context.
+    // Historically the EH context has enough space for every enabled context feature.
+    // That may not hold for the future features beyond AVX, but this codepath is
+    // supposed to be used only on OSes that do not have RtlRestoreContext.
+    CONTEXT* pTarget = pExcepPtrs->ContextRecord;
+    if (!CopyContext(pTarget, pCtx->ContextFlags, pCtx))
+    {
+        PalPrintFatalError("Could not set context record.\n");
+        RhFailFast();
+    }
+
+    DWORD espValue = pCtx->Esp;
+
+    // NOTE: Ugly, ugly workaround.
+    // We need to resume the thread into the managed code where it was redirected,
+    // and the corresponding ESP is below the current one.  But C++ expects that
+    // on an EXCEPTION_CONTINUE_EXECUTION that the ESP will be above where it has
+    // installed the SEH handler.  To solve this, we need to remove all handlers
+    // that reside above the resumed ESP, but we must leave the OS-installed
+    // handler at the top, so we grab the top SEH handler, call
+    // PopSEHRecords which will remove all SEH handlers above the target ESP and
+    // then link the OS handler back in with SetCurrentSEHRecord.
+
+    // Get the special OS handler and save it until PopSEHRecords is done
+    EXCEPTION_REGISTRATION_RECORD *pCurSEH = GetCurrentSEHRecord();
+
+    // Unlink all records above the target resume ESP
+    PopSEHRecords((LPVOID)(size_t)espValue);
+
+    // Link the special OS handler back in to the top
+    pCurSEH->Next = GetCurrentSEHRecord();
+
+    // Register the special OS handler as the top handler with the OS
+    SetCurrentSEHRecord(pCurSEH);
+
+    // Resume execution at point where thread was originally redirected
+    return EXCEPTION_CONTINUE_EXECUTION;
+}
+
+EXTERN_C VOID __cdecl RtlRestoreContextFallback(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord)
+{
+    Thread *pThread = ThreadStore::GetCurrentThread();
+
+    // A counter to avoid a nasty case where an
+    // up-stack filter throws another exception
+    // causing our filter to be run again for
+    // some unrelated exception.
+    int filter_count = 0;
+
+    __try
+    {
+        // Save the instruction pointer where we redirected last.  This does not race with the check
+        // against this variable because the GC will not attempt to redirect the thread until the
+        // instruction pointer of this thread is back in managed code.
+        pThread->SetPendingRedirect(ContextRecord->Eip);
+        RaiseException(EXCEPTION_HIJACK, 0, 0, NULL);
+    }
+    __except (++filter_count == 1
+            ? RtlRestoreContextFallbackExceptionFilter(GetExceptionInformation(), ContextRecord, pThread)
+            : EXCEPTION_CONTINUE_SEARCH)
+    {
+        _ASSERTE(!"Reached body of __except in RtlRestoreContextFallback");
+    }
+}
+
+#endif // TARGET_X86
+
 typedef BOOL(WINAPI* PINITIALIZECONTEXT2)(PVOID Buffer, DWORD ContextFlags, PCONTEXT* Context, PDWORD ContextLength, ULONG64 XStateCompactionMask);
 PINITIALIZECONTEXT2 pfnInitializeContext2 = NULL;
 
 #ifdef TARGET_X86
+EXTERN_C VOID __cdecl RtlRestoreContextFallback(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord);
 typedef VOID(__cdecl* PRTLRESTORECONTEXT)(PCONTEXT ContextRecord, struct _EXCEPTION_RECORD* ExceptionRecord);
 PRTLRESTORECONTEXT pfnRtlRestoreContext = NULL;
 
@@ -356,6 +467,11 @@ REDHAWK_PALEXPORT CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextB
     {
         HMODULE hm = GetModuleHandleW(_T("ntdll.dll"));
         pfnRtlRestoreContext = (PRTLRESTORECONTEXT)GetProcAddress(hm, "RtlRestoreContext");
+        if (pfnRtlRestoreContext == NULL)
+        {
+            // Fallback to the internal implementation if OS doesn't provide one.
+            pfnRtlRestoreContext = RtlRestoreContextFallback;
+        }
     }
 #endif //TARGET_X86
 
@@ -438,7 +554,12 @@ REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalSetThreadContext(HAND
 REDHAWK_PALEXPORT void REDHAWK_PALAPI PalRestoreContext(CONTEXT * pCtx)
 {
     __asan_handle_no_return();
+#ifdef TARGET_X86
+    _ASSERTE(pfnRtlRestoreContext != NULL);
+    pfnRtlRestoreContext(pCtx, NULL);
+#else
     RtlRestoreContext(pCtx, NULL);
+#endif //TARGET_X86
 }
 
 REDHAWK_PALIMPORT void REDHAWK_PALAPI PopulateControlSegmentRegisters(CONTEXT* pContext)
@@ -543,7 +664,7 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* p
         pThread->SetActivationPending(false);
 
         DWORD lastError = GetLastError();
-        if (lastError != ERROR_INVALID_PARAMETER)
+        if (lastError != ERROR_INVALID_PARAMETER && lastError != ERROR_NOT_SUPPORTED)
         {
             // An unexpected failure has happened. It is a concern.
             ASSERT_UNCONDITIONALLY("Failed to queue an APC for unusual reason.");
@@ -568,16 +689,41 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* p
 
     if (GetThreadContext(hThread, &win32ctx))
     {
+        bool isSafeToRedirect = true;
+
+#ifdef TARGET_X86
+        // Workaround around WOW64 problems. Only do this workaround if a) this is x86, and b) the OS does
+        // not support trap frame reporting.
+        if ((win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) == 0)
+        {
+            // This code fixes a race between GetThreadContext and NtContinue.  If we redirect managed code
+            // at the same place twice in a row, we run the risk of reading a bogus CONTEXT when we redirect
+            // the second time.  This leads to access violations on x86 machines.  To fix the problem, we
+            // never redirect at the same instruction pointer that we redirected at on the previous GC.
+            if (((Thread*)pThreadToHijack)->CheckPendingRedirect(win32ctx.Eip))
+            {
+                isSafeToRedirect = false;
+            }
+        }
+#else
+        // In some cases Windows will not set the CONTEXT_EXCEPTION_REPORTING flag if the thread is executing
+        // in kernel mode (i.e. in the middle of a syscall or exception handling). Therefore, we should treat
+        // the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that it is not safe to
+        // manipulate with the current state of the thread context.
+        isSafeToRedirect = (win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0;
+#endif
+
         // The CONTEXT_SERVICE_ACTIVE and CONTEXT_EXCEPTION_ACTIVE output flags indicate we suspended the thread
         // at a point where the kernel cannot guarantee a completely accurate context. We'll fail the request in
         // this case (which should force our caller to resume the thread and try again -- since this is a fairly
         // narrow window we're highly likely to succeed next time).
-        // Note: in some cases (x86 WOW64, ARM32 on ARM64) the OS will not set the CONTEXT_EXCEPTION_REPORTING flag
-        // if the thread is executing in kernel mode (i.e. in the middle of a syscall or exception handling).
-        // Therefore, we should treat the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that
-        // it is not safe to manipulate with the current state of the thread context.
         if ((win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0 &&
-            ((win32ctx.ContextFlags & (CONTEXT_SERVICE_ACTIVE | CONTEXT_EXCEPTION_ACTIVE)) == 0))
+            ((win32ctx.ContextFlags & (CONTEXT_SERVICE_ACTIVE | CONTEXT_EXCEPTION_ACTIVE)) != 0))
+        {
+            isSafeToRedirect = false;
+        }
+
+        if (isSafeToRedirect)
         {
             g_pHijackCallback(&win32ctx, pThreadToHijack);
         }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.xml b/src/coreclr/nativeaot/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.xml
index 60ca0245fda0..91859db1b0f8 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.xml
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.xml
@@ -1,8 +1,4 @@
 <linker>
-  <type fullname="System.Runtime.CompilerServices.RuntimeFeature">
-    <method signature="System.Boolean get_IsDynamicCodeCompiled()" body="stub" value="false" />
-    <method signature="System.Boolean get_IsDynamicCodeSupported()" body="stub" value="false" />
-  </type>
 
   <type fullname="System.Collections.Generic.Comparer`1" feature="System.Collections.Generic.DefaultComparers" featurevalue="false">
     <method signature="System.Boolean get_SupportsGenericIComparableInterfaces()" body="stub" />
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/Execution/MethodBaseInvoker.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/Execution/MethodBaseInvoker.cs
index 78fc4d99d279..9a33ed504cd4 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/Execution/MethodBaseInvoker.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Reflection/Core/Execution/MethodBaseInvoker.cs
@@ -57,7 +57,7 @@ protected static void ValidateThis(object thisObject, RuntimeTypeHandle declarin
                 throw new TargetException(SR.RFLCT_Targ_StatMethReqTarg);
 
             if (!RuntimeAugments.IsAssignable(thisObject, declaringTypeHandle))
-                throw new TargetException(SR.RFLCT_Targ_ITargMismatch);
+                throw new TargetException(SR.Format(SR.RFLCT_Targ_ITargMismatch_WithType, declaringTypeHandle.GetRuntimeTypeInfoForRuntimeTypeHandle(), thisObject.GetType()));
         }
     }
 }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs
index 06712a5fc06b..6459b346cf18 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/Augments/RuntimeAugments.cs
@@ -138,11 +138,11 @@ public static Delegate CreateDelegate(RuntimeTypeHandle typeHandleForDelegate, I
         }
 
         //
-        // Helper to extract the artifact that uniquely identifies a method in the runtime mapping tables.
+        // Helper to extract the artifact that identifies a reflectable delegate target in the runtime mapping tables.
         //
-        public static IntPtr GetDelegateLdFtnResult(Delegate d, out RuntimeTypeHandle typeOfFirstParameterIfInstanceDelegate, out bool isOpenResolver, out bool isInterpreterEntrypoint)
+        public static IntPtr GetDelegateLdFtnResult(Delegate d, out RuntimeTypeHandle typeOfFirstParameterIfInstanceDelegate, out bool isOpenResolver)
         {
-            return d.GetFunctionPointer(out typeOfFirstParameterIfInstanceDelegate, out isOpenResolver, out isInterpreterEntrypoint);
+            return d.GetDelegateLdFtnResult(out typeOfFirstParameterIfInstanceDelegate, out isOpenResolver);
         }
 
         // Low level method that returns the loaded modules as array. ReadOnlySpan returning overload
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/InteropHelpers.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/InteropHelpers.cs
index 93a44c07bc2c..7b3604360ca6 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/InteropHelpers.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/InteropHelpers.cs
@@ -360,24 +360,23 @@ internal static unsafe void FixupMethodCell(IntPtr hModule, MethodFixupCell* pCe
             if (charSetMangling == 0)
             {
                 // Look for the user-provided entry point name only
-                pTarget = Interop.Kernel32.GetProcAddress(hModule, methodName);
+                pTarget = GetProcAddressWithMangling(hModule, methodName, pCell);
             }
-            else
-            if (charSetMangling == CharSet.Ansi)
+            else if (charSetMangling == CharSet.Ansi)
             {
                 // For ANSI, look for the user-provided entry point name first.
                 // If that does not exist, try the charset suffix.
-                pTarget = Interop.Kernel32.GetProcAddress(hModule, methodName);
+                pTarget = GetProcAddressWithMangling(hModule, methodName, pCell);
                 if (pTarget == IntPtr.Zero)
-                    pTarget = GetProcAddressWithSuffix(hModule, methodName, (byte)'A');
+                    pTarget = GetProcAddressWithSuffix(hModule, methodName, (byte)'A', pCell);
             }
             else
             {
                 // For Unicode, look for the entry point name with the charset suffix first.
                 // The 'W' API takes precedence over the undecorated one.
-                pTarget = GetProcAddressWithSuffix(hModule, methodName, (byte)'W');
+                pTarget = GetProcAddressWithSuffix(hModule, methodName, (byte)'W', pCell);
                 if (pTarget == IntPtr.Zero)
-                    pTarget = Interop.Kernel32.GetProcAddress(hModule, methodName);
+                    pTarget = GetProcAddressWithMangling(hModule, methodName, pCell);
             }
 #else
             pTarget = Interop.Sys.GetProcAddress(hModule, methodName);
@@ -392,23 +391,43 @@ internal static unsafe void FixupMethodCell(IntPtr hModule, MethodFixupCell* pCe
         }
 
 #if TARGET_WINDOWS
-        private static unsafe IntPtr GetProcAddressWithSuffix(IntPtr hModule, byte* methodName, byte suffix)
+        private static unsafe IntPtr GetProcAddressWithMangling(IntPtr hModule, byte* methodName, MethodFixupCell* pCell)
+        {
+            IntPtr pMethod = Interop.Kernel32.GetProcAddress(hModule, methodName);
+#if TARGET_X86
+            if (pMethod == IntPtr.Zero && pCell->IsStdcall)
+            {
+                int nameLength = string.strlen(methodName);
+                // We need to add an extra bytes for the prefix, null terminator and stack size suffix:
+                // - 1 byte for '_' prefix
+                // - 1 byte for '@' suffix
+                // - up to 10 bytes for digits (maximum positive number representable by uint)
+                // - 1 byte for NULL termination character
+                byte* probedMethodName = stackalloc byte[nameLength + 13];
+                probedMethodName[0] = (byte)'_';
+                Unsafe.CopyBlock(probedMethodName + 1, methodName, (uint)nameLength);
+                probedMethodName[nameLength + 1] = (byte)'@';
+                pCell->SignatureBytes.TryFormat(new Span<byte>(probedMethodName + 2 + nameLength, 10), out int bytesWritten);
+                probedMethodName[nameLength + 2 + bytesWritten] = 0;
+                pMethod = Interop.Kernel32.GetProcAddress(hModule, probedMethodName);
+            }
+#else
+            _ = pCell;
+#endif
+            return pMethod;
+        }
+
+        private static unsafe IntPtr GetProcAddressWithSuffix(IntPtr hModule, byte* methodName, byte suffix, MethodFixupCell* pCell)
         {
             int nameLength = string.strlen(methodName);
 
             // We need to add an extra byte for the suffix, and an extra byte for the null terminator
             byte* probedMethodName = stackalloc byte[nameLength + 2];
-
-            for (int i = 0; i < nameLength; i++)
-            {
-                probedMethodName[i] = methodName[i];
-            }
-
-            probedMethodName[nameLength + 1] = 0;
-
+            Unsafe.CopyBlock(probedMethodName, methodName, (uint)nameLength);
             probedMethodName[nameLength] = suffix;
+            probedMethodName[nameLength + 1] = 0;
 
-            return Interop.Kernel32.GetProcAddress(hModule, probedMethodName);
+            return GetProcAddressWithMangling(hModule, probedMethodName, pCell);
         }
 #endif
 #endif
@@ -627,11 +646,16 @@ internal unsafe struct MethodFixupCell
             public IntPtr Target;
             public IntPtr MethodName;
             public ModuleFixupCell* Module;
-            private int Flags;
+            private uint Flags;
 
             public CharSet CharSetMangling => (CharSet)(Flags & MethodFixupCellFlagsConstants.CharSetMask);
+#if FEATURE_OBJCMARSHAL
             public bool IsObjectiveCMessageSend => (Flags & MethodFixupCellFlagsConstants.IsObjectiveCMessageSendMask) != 0;
-            public int ObjectiveCMessageSendFunction => (Flags & MethodFixupCellFlagsConstants.ObjectiveCMessageSendFunctionMask) >> MethodFixupCellFlagsConstants.ObjectiveCMessageSendFunctionShift;
+            public int ObjectiveCMessageSendFunction => (int)((Flags & MethodFixupCellFlagsConstants.ObjectiveCMessageSendFunctionMask) >> MethodFixupCellFlagsConstants.ObjectiveCMessageSendFunctionShift);
+#elif TARGET_WINDOWS && TARGET_X86
+            public bool IsStdcall => (Flags & MethodFixupCellFlagsConstants.IsStdcall) != 0;
+            public ushort SignatureBytes => (ushort)(Flags >> 16);
+#endif
         }
 
         internal unsafe struct CustomMarshallerKey : IEquatable<CustomMarshallerKey>
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs
index e6a27db5159e..7ce093cfa0f6 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MathHelpers.cs
@@ -2,19 +2,78 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Diagnostics;
 using System.Runtime;
 using System.Runtime.CompilerServices;
-
-using Internal.Runtime;
+using System.Runtime.InteropServices;
 
 namespace Internal.Runtime.CompilerHelpers
 {
     /// <summary>
-    /// Math helpers for generated code. The helpers marked with [RuntimeExport] and the type
-    /// itself need to be public because they constitute a public contract with the .NET Native toolchain.
+    /// Math helpers for generated code. The helpers here are referenced by the runtime.
     /// </summary>
-    internal static class MathHelpers
+    [StackTraceHidden]
+    internal static partial class MathHelpers
     {
+        private const double Int32MaxValueOffset = (double)int.MaxValue + 1;
+        private const double UInt32MaxValueOffset = (double)uint.MaxValue + 1;
+
+        [RuntimeExport("Dbl2IntOvf")]
+        public static int Dbl2IntOvf(double value)
+        {
+            // Note that this expression also works properly for val = NaN case
+            if (value is > -Int32MaxValueOffset - 1 and < Int32MaxValueOffset)
+            {
+                return (int)value;
+            }
+
+            ThrowHelper.ThrowOverflowException();
+            return 0;
+        }
+
+        [RuntimeExport("Dbl2UIntOvf")]
+        public static uint Dbl2UIntOvf(double value)
+        {
+            // Note that this expression also works properly for val = NaN case
+            if (value is > -1.0 and < UInt32MaxValueOffset)
+            {
+                return (uint)value;
+            }
+
+            ThrowHelper.ThrowOverflowException();
+            return 0;
+        }
+
+        [RuntimeExport("Dbl2LngOvf")]
+        public static long Dbl2LngOvf(double value)
+        {
+            const double two63 = Int32MaxValueOffset * UInt32MaxValueOffset;
+
+            // Note that this expression also works properly for val = NaN case
+            // We need to compare with the very next double to two63. 0x402 is epsilon to get us there.
+            if (value is > -two63 - 0x402 and < two63)
+            {
+                return (long)value;
+            }
+
+            ThrowHelper.ThrowOverflowException();
+            return 0;
+        }
+
+        [RuntimeExport("Dbl2ULngOvf")]
+        public static ulong Dbl2ULngOvf(double value)
+        {
+            const double two64 = UInt32MaxValueOffset * UInt32MaxValueOffset;
+            // Note that this expression also works properly for val = NaN case
+            if (value is > -1.0 and < two64)
+            {
+                return (ulong)value;
+            }
+
+            ThrowHelper.ThrowOverflowException();
+            return 0;
+        }
+
 #if !TARGET_64BIT
         //
         // 64-bit checked multiplication for 32-bit platforms
@@ -22,318 +81,232 @@ internal static class MathHelpers
 
         private const string RuntimeLibrary = "*";
 
-        // Helper to multiply two 32-bit uints
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static ulong Mul32x32To64(uint a, uint b)
-        {
-            return a * (ulong)b;
-        }
-
-        // Helper to get high 32-bit of 64-bit int
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static uint Hi32Bits(long a)
+        private static uint High32Bits(ulong a)
         {
             return (uint)(a >> 32);
         }
 
-        // Helper to get high 32-bit of 64-bit int
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static uint Hi32Bits(ulong a)
+        private static ulong BigMul(uint left, uint right)
         {
-            return (uint)(a >> 32);
+            return (ulong)left * right;
         }
 
         [RuntimeExport("LMulOvf")]
-        public static long LMulOvf(long i, long j)
+        public static long LMulOvf(long left, long right)
         {
-            long ret;
+#if DEBUG
+            long result = left * right;
+#endif
 
             // Remember the sign of the result
-            int sign = (int)(Hi32Bits(i) ^ Hi32Bits(j));
+            int sign = (int)(High32Bits((ulong)left) ^ High32Bits((ulong)right));
 
             // Convert to unsigned multiplication
-            if (i < 0) i = -i;
-            if (j < 0) j = -j;
+            if (left < 0)
+                left = -left;
+            if (right < 0)
+                right = -right;
 
             // Get the upper 32 bits of the numbers
-            uint val1High = Hi32Bits(i);
-            uint val2High = Hi32Bits(j);
+            uint val1High = High32Bits((ulong)left);
+            uint val2High = High32Bits((ulong)right);
 
             ulong valMid;
 
             if (val1High == 0)
             {
                 // Compute the 'middle' bits of the long multiplication
-                valMid = Mul32x32To64(val2High, (uint)i);
+                valMid = BigMul(val2High, (uint)left);
             }
             else
             {
                 if (val2High != 0)
-                    goto ThrowExcep;
+                    goto Overflow;
                 // Compute the 'middle' bits of the long multiplication
-                valMid = Mul32x32To64(val1High, (uint)j);
+                valMid = BigMul(val1High, (uint)right);
             }
 
             // See if any bits after bit 32 are set
-            if (Hi32Bits(valMid) != 0)
-                goto ThrowExcep;
+            if (High32Bits(valMid) != 0)
+                goto Overflow;
 
-            ret = (long)(Mul32x32To64((uint)i, (uint)j) + (valMid << 32));
+            long ret = (long)(BigMul((uint)left, (uint)right) + (valMid << 32));
 
             // check for overflow
-            if (Hi32Bits(ret) < (uint)valMid)
-                goto ThrowExcep;
+            if (High32Bits((ulong)ret) < (uint)valMid)
+                goto Overflow;
 
             if (sign >= 0)
             {
                 // have we spilled into the sign bit?
                 if (ret < 0)
-                    goto ThrowExcep;
+                    goto Overflow;
             }
             else
             {
                 ret = -ret;
                 // have we spilled into the sign bit?
                 if (ret > 0)
-                    goto ThrowExcep;
+                    goto Overflow;
             }
+
+#if DEBUG
+            Debug.Assert(ret == result, $"Multiply overflow got: {ret}, expected: {result}");
+#endif
             return ret;
 
-        ThrowExcep:
-            return ThrowLngOvf();
+        Overflow:
+            ThrowHelper.ThrowOverflowException();
+            return 0;
         }
 
         [RuntimeExport("ULMulOvf")]
-        public static ulong ULMulOvf(ulong i, ulong j)
+        public static ulong ULMulOvf(ulong left, ulong right)
         {
-            ulong ret;
-
             // Get the upper 32 bits of the numbers
-            uint val1High = Hi32Bits(i);
-            uint val2High = Hi32Bits(j);
+            uint val1High = High32Bits(left);
+            uint val2High = High32Bits(right);
 
             ulong valMid;
 
             if (val1High == 0)
             {
                 if (val2High == 0)
-                    return Mul32x32To64((uint)i, (uint)j);
+                    return (ulong)(uint)left * (uint)right;
                 // Compute the 'middle' bits of the long multiplication
-                valMid = Mul32x32To64(val2High, (uint)i);
+                valMid = BigMul(val2High, (uint)left);
             }
             else
             {
                 if (val2High != 0)
-                    goto ThrowExcep;
+                    goto Overflow;
                 // Compute the 'middle' bits of the long multiplication
-                valMid = Mul32x32To64(val1High, (uint)j);
+                valMid = BigMul(val1High, (uint)right);
             }
 
             // See if any bits after bit 32 are set
-            if (Hi32Bits(valMid) != 0)
-                goto ThrowExcep;
+            if (High32Bits(valMid) != 0)
+                goto Overflow;
 
-            ret = Mul32x32To64((uint)i, (uint)j) + (valMid << 32);
+            ulong ret = BigMul((uint)left, (uint)right) + (valMid << 32);
 
             // check for overflow
-            if (Hi32Bits(ret) < (uint)valMid)
-                goto ThrowExcep;
-            return ret;
-
-        ThrowExcep:
-            return ThrowULngOvf();
-        }
+            if (High32Bits(ret) < (uint)valMid)
+                goto Overflow;
 
-        [RuntimeImport(RuntimeLibrary, "RhpULMod")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        private static extern ulong RhpULMod(ulong i, ulong j);
-
-        public static ulong ULMod(ulong i, ulong j)
-        {
-            if (j == 0)
-                return ThrowULngDivByZero();
-            else
-                return RhpULMod(i, j);
-        }
-
-        [RuntimeImport(RuntimeLibrary, "RhpLMod")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        private static extern long RhpLMod(long i, long j);
-
-        public static long LMod(long i, long j)
-        {
-            if (j == 0)
-                return ThrowLngDivByZero();
-            else
-                return RhpLMod(i, j);
-        }
-
-        [RuntimeImport(RuntimeLibrary, "RhpULDiv")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        private static extern ulong RhpULDiv(ulong i, ulong j);
+            Debug.Assert(ret == left * right, $"Multiply overflow got: {ret}, expected: {left * right}");
+            return ret;
 
-        public static ulong ULDiv(ulong i, ulong j)
-        {
-            if (j == 0)
-                return ThrowULngDivByZero();
-            else
-                return RhpULDiv(i, j);
+        Overflow:
+            ThrowHelper.ThrowOverflowException();
+            return 0;
         }
 
-        [RuntimeImport(RuntimeLibrary, "RhpLDiv")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        private static extern long RhpLDiv(long i, long j);
+        [LibraryImport(RuntimeLibrary)]
+        [SuppressGCTransition]
+        private static partial ulong RhpULMod(ulong dividend, ulong divisor);
 
-        public static long LDiv(long i, long j)
+        public static ulong ULMod(ulong dividend, ulong divisor)
         {
-            if (j == 0)
-                return ThrowLngDivByZero();
-            else if (j == -1 && i == long.MinValue)
-                return ThrowLngArithExc();
-            else
-                return RhpLDiv(i, j);
-        }
+            if (divisor == 0)
+                ThrowHelper.ThrowDivideByZeroException();
 
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        private static long ThrowLngDivByZero()
-        {
-            throw new DivideByZeroException();
+            return RhpULMod(dividend, divisor);
         }
 
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        private static ulong ThrowULngDivByZero()
-        {
-            throw new DivideByZeroException();
-        }
+        [LibraryImport(RuntimeLibrary)]
+        [SuppressGCTransition]
+        private static partial long RhpLMod(long dividend, long divisor);
 
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        private static long ThrowLngArithExc()
-        {
-            throw new ArithmeticException();
-        }
-#endif // TARGET_64BIT
-
-        [RuntimeExport("Dbl2IntOvf")]
-        public static int Dbl2IntOvf(double val)
-        {
-            const double two31 = 2147483648.0;
-
-            // Note that this expression also works properly for val = NaN case
-            if (val > -two31 - 1 && val < two31)
-                return unchecked((int)val);
-
-            return ThrowIntOvf();
-        }
-
-        [RuntimeExport("Dbl2UIntOvf")]
-        public static uint Dbl2UIntOvf(double val)
+        public static long LMod(long dividend, long divisor)
         {
-            // Note that this expression also works properly for val = NaN case
-            if (val > -1.0 && val < 4294967296.0)
-                return unchecked((uint)val);
+            if (divisor == 0)
+                ThrowHelper.ThrowDivideByZeroException();
+            if (divisor == -1 && dividend == long.MinValue)
+                ThrowHelper.ThrowOverflowException();
 
-            return ThrowUIntOvf();
+            return RhpLMod(dividend, divisor);
         }
 
-        [RuntimeExport("Dbl2LngOvf")]
-        public static long Dbl2LngOvf(double val)
-        {
-            const double two63 = 2147483648.0 * 4294967296.0;
-
-            // Note that this expression also works properly for val = NaN case
-            // We need to compare with the very next double to two63. 0x402 is epsilon to get us there.
-            if (val > -two63 - 0x402 && val < two63)
-                return unchecked((long)val);
-
-            return ThrowLngOvf();
-        }
+        [LibraryImport(RuntimeLibrary)]
+        [SuppressGCTransition]
+        private static partial ulong RhpULDiv(ulong dividend, ulong divisor);
 
-        [RuntimeExport("Dbl2ULngOvf")]
-        public static ulong Dbl2ULngOvf(double val)
+        public static ulong ULDiv(ulong dividend, ulong divisor)
         {
-            const double two64 = 2.0 * 2147483648.0 * 4294967296.0;
-
-            // Note that this expression also works properly for val = NaN case
-            if (val > -1.0 && val < two64)
-                return unchecked((ulong)val);
+            if (divisor == 0)
+                ThrowHelper.ThrowDivideByZeroException();
 
-            return ThrowULngOvf();
+            return RhpULDiv(dividend, divisor);
         }
 
-        [RuntimeExport("Flt2IntOvf")]
-        public static int Flt2IntOvf(float val)
-        {
-            const double two31 = 2147483648.0;
-
-            // Note that this expression also works properly for val = NaN case
-            if (val > -two31 - 1 && val < two31)
-                return ((int)val);
+        [LibraryImport(RuntimeLibrary)]
+        [SuppressGCTransition]
+        private static partial long RhpLDiv(long dividend, long divisor);
 
-            return ThrowIntOvf();
-        }
-
-        [RuntimeExport("Flt2LngOvf")]
-        public static long Flt2LngOvf(float val)
+        public static long LDiv(long dividend, long divisor)
         {
-            const double two63 = 2147483648.0 * 4294967296.0;
+            if (divisor == 0)
+                ThrowHelper.ThrowDivideByZeroException();
+            if (divisor == -1 && dividend == long.MinValue)
+                ThrowHelper.ThrowOverflowException();
 
-            // Note that this expression also works properly for val = NaN case
-            // We need to compare with the very next double to two63. 0x402 is epsilon to get us there.
-            if (val > -two63 - 0x402 && val < two63)
-                return ((long)val);
-
-            return ThrowIntOvf();
+            return RhpLDiv(dividend, divisor);
         }
 
 #if TARGET_ARM || TARGET_WASM // TODO-LLVM: include TARGET_WASM at least until we copy over the implementations from IL to RyuJit
         [RuntimeImport(RuntimeLibrary, "RhpIDiv")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        private static extern int RhpIDiv(int i, int j);
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        private static extern int RhpIDiv(int dividend, int divisor);
 
-        public static int IDiv(int i, int j)
+        public static int IDiv(int dividend, int divisor)
         {
-            if (j == 0)
-                return ThrowIntDivByZero();
-            else if (j == -1 && i == int.MinValue)
-                return ThrowIntArithExc();
-            else
-                return RhpIDiv(i, j);
+            if (divisor == 0)
+                ThrowHelper.ThrowDivideByZeroException();
+            if (divisor == -1 && dividend == int.MinValue)
+                ThrowHelper.ThrowOverflowException();
+
+            return RhpIDiv(dividend, divisor);
         }
 
         [RuntimeImport(RuntimeLibrary, "RhpUDiv")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        private static extern uint RhpUDiv(uint i, uint j);
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        private static extern uint RhpUDiv(uint dividend, uint divisor);
 
-        public static long UDiv(uint i, uint j)
+        public static long UDiv(uint dividend, uint divisor)
         {
-            if (j == 0)
-                return ThrowUIntDivByZero();
-            else
-                return RhpUDiv(i, j);
+            if (divisor == 0)
+                ThrowHelper.ThrowDivideByZeroException();
+
+            return RhpUDiv(dividend, divisor);
         }
 
         [RuntimeImport(RuntimeLibrary, "RhpIMod")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        private static extern int RhpIMod(int i, int j);
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        private static extern int RhpIMod(int dividend, int divisor);
 
-        public static int IMod(int i, int j)
+        public static int IMod(int dividend, int divisor)
         {
-            if (j == 0)
-                return ThrowIntDivByZero();
-            else
-                return RhpIMod(i, j);
+            if (divisor == 0)
+                ThrowHelper.ThrowDivideByZeroException();
+            if (divisor == -1 && dividend == int.MinValue)
+                ThrowHelper.ThrowOverflowException();
+
+            return RhpIMod(dividend, divisor);
         }
 
         [RuntimeImport(RuntimeLibrary, "RhpUMod")]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        private static extern uint RhpUMod(uint i, uint j);
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        private static extern uint RhpUMod(uint dividend, uint divisor);
 
-        public static long UMod(uint i, uint j)
+        public static long UMod(uint dividend, uint divisor)
         {
-            if (j == 0)
+            if (divisor == 0)
                 return ThrowUIntDivByZero();
             else
-                return RhpUMod(i, j);
+                return RhpUMod(dividend, divisor);
         }
 #endif // TARGET_ARM || TARGET_WASM
 
@@ -385,5 +358,6 @@ private static int ThrowIntArithExc()
             throw new ArithmeticException();
         }
 #endif // TARGET_ARM || TARGET_WASM
+#endif // TARGET_64BIT
     }
 }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/FunctionPointerOps.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/FunctionPointerOps.cs
index a0f5312c9230..e77a106b252b 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/FunctionPointerOps.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerServices/FunctionPointerOps.cs
@@ -149,5 +149,16 @@ public static unsafe bool Compare(IntPtr functionPointerA, IntPtr functionPointe
 
             return pointerDefA->MethodFunctionPointer == pointerDefB->MethodFunctionPointer;
         }
+
+        public static unsafe int GetHashCode(IntPtr functionPointer)
+        {
+            if (!IsGenericMethodPointer(functionPointer))
+            {
+                return functionPointer.GetHashCode();
+            }
+
+            GenericMethodDescriptor* pointerDef = ConvertToGenericDescriptor(functionPointer);
+            return pointerDef->GetHashCode();
+        }
     }
 }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj b/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj
index 88aed06d3166..b68293c7da3e 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj
@@ -187,15 +187,12 @@
     <Compile Include="System\Diagnostics\StackTrace.NativeAot.cs" />
     <Compile Include="System\Diagnostics\StackTrace.NativeAot.Browser.cs" Condition="'$(TargetOS)' == 'browser'" />
     <Compile Include="System\Diagnostics\StackTrace.NativeAot.Wasi.cs" Condition="'$(TargetOS)' == 'wasi'" />
-    <Compile Include="System\Diagnostics\Eventing\EventPipe.NativeAot.cs" />
-    <Compile Include="System\Diagnostics\Eventing\NativeRuntimeEventSource.Threading.NativeSinks.NativeAot.cs" Condition="'$(FeaturePerfTracing)' == 'true'" />
     <Compile Include="System\Enum.NativeAot.cs" />
     <Compile Include="System\Environment.NativeAot.cs" />
     <Compile Include="System\GC.NativeAot.cs" />
     <Compile Include="System\Math.NativeAot.cs" />
     <Compile Include="System\MathF.NativeAot.cs" />
     <Compile Include="System\Object.NativeAot.cs" />
-    <Compile Include="System\Resources\ManifestBasedResourceGroveler.NativeAot.cs" />
     <Compile Include="System\RuntimeArgumentHandle.cs" />
     <Compile Include="System\RuntimeType.cs" />
     <Compile Include="System\Runtime\ControlledExecution.NativeAot.cs" />
@@ -264,12 +261,22 @@
     </Compile>
   </ItemGroup>
   <ItemGroup Condition="'$(TargetsWindows)'=='true'">
+    <Compile Include="System\EventReporter.cs" />
     <Compile Include="Internal\Runtime\FrozenObjectHeapManager.Windows.cs" />
     <Compile Include="System\Runtime\InteropServices\NativeLibrary.NativeAot.Windows.cs" />
     <Compile Include="System\Runtime\InteropServices\PInvokeMarshal.Windows.cs" />
     <Compile Include="$(CommonPath)\System\Runtime\InteropServices\BuiltInVariantExtensions.cs">
       <Link>System\Runtime\InteropServices\BuiltInVariantExtensions.cs</Link>
     </Compile>
+    <Compile Include="$(CommonPath)Interop\Windows\Advapi32\Interop.RegisterEventSource_IntPtr.cs">
+      <Link>Interop\Windows\Advapi32\Interop.RegisterEventSource_IntPtr.cs</Link>
+    </Compile>
+    <Compile Include="$(CommonPath)Interop\Windows\Advapi32\Interop.DeregisterEventSource.cs">
+      <Link>Interop\Windows\Advapi32\Interop.DeregisterEventSource.cs</Link>
+    </Compile>
+    <Compile Include="$(CommonPath)Interop\Windows\Advapi32\Interop.ReportEvent_IntPtr.cs">
+      <Link>Common\Interop\Windows\Advapi32\Interop.ReportEvent_IntPtr.cs</Link>
+    </Compile>
     <Compile Include="$(CommonPath)\Interop\Windows\Kernel32\Interop.IsDebuggerPresent.cs">
       <Link>Interop\Windows\Kernel32\Interop.IsDebuggerPresent.cs</Link>
     </Compile>
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs
index 64ba6597446a..61f70e212483 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs
@@ -494,7 +494,7 @@ private static unsafe void CopyImplValueTypeArrayNoInnerGcRefs(Array sourceArray
             // Copy scenario: ValueType-array to value-type array with no embedded gc-refs.
             nuint elementSize = sourceArray.ElementSize;
 
-            Buffer.Memmove(
+            SpanHelpers.Memmove(
                 ref Unsafe.AddByteOffset(ref MemoryMarshal.GetArrayDataReference(destinationArray), (nuint)destinationIndex * elementSize),
                 ref Unsafe.AddByteOffset(ref MemoryMarshal.GetArrayDataReference(sourceArray), (nuint)sourceIndex * elementSize),
                 elementSize * (nuint)length);
@@ -534,7 +534,7 @@ private static unsafe void CopyImplPrimitiveTypeWithWidening(Array sourceArray,
                 if (sourceElementType == destElementType)
                 {
                     // Multidim arrays and enum->int copies can still reach this path.
-                    Buffer.Memmove(ref *data, ref *srcData, (nuint)length * srcElementSize);
+                    SpanHelpers.Memmove(ref *data, ref *srcData, (nuint)length * srcElementSize);
                     return;
                 }
 
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Delegate.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Delegate.cs
index 1c565a045b82..bc8c517acb8e 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Delegate.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Delegate.cs
@@ -7,6 +7,7 @@
 using System.Reflection;
 using System.Runtime;
 using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 using System.Runtime.Serialization;
 
 using Internal.Reflection.Augments;
@@ -40,10 +41,19 @@ protected Delegate([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.Al
 
         // New Delegate Implementation
 
-        private object m_firstParameter;
-        private object m_helperObject;
-        private nint m_extraFunctionPointerOrData;
-        private IntPtr m_functionPointer;
+        private object _firstParameter;
+        private object _helperObject;
+        private nint _extraFunctionPointerOrData;
+        private IntPtr _functionPointer;
+
+        // _helperObject may point to an array of delegates if this is a multicast delegate. We use this wrapper to distinguish between
+        // our own array of delegates and user provided Wrapper[]. As a added benefit, this wrapper also eliminates array co-variance
+        // overhead for our own array of delegates.
+        private struct Wrapper
+        {
+            public Wrapper(Delegate value) => Value = value;
+            public Delegate Value;
+        }
 
         // WARNING: These constants are also declared in System.Private.TypeLoader\Internal\Runtime\TypeLoader\CallConverterThunk.cs
         // Do not change their values without updating the values in the calling convention converter component
@@ -63,14 +73,8 @@ private protected virtual IntPtr GetThunk(int whichThunk)
         }
 
         /// <summary>
-        /// Used by various parts of the runtime as a replacement for Delegate.Method
-        ///
-        /// The Interop layer uses this to distinguish between different methods on a
-        /// single type, and to get the function pointer for delegates to static functions
-        ///
         /// The reflection apis use this api to figure out what MethodInfo is related
         /// to a delegate.
-        ///
         /// </summary>
         /// <param name="typeOfFirstParameterIfInstanceDelegate">
         ///   This value indicates which type an delegate's function pointer is associated with
@@ -79,57 +83,40 @@ private protected virtual IntPtr GetThunk(int whichThunk)
         /// <param name="isOpenResolver">
         ///   This value indicates if the returned pointer is an open resolver structure.
         /// </param>
-        /// <param name="isInterpreterEntrypoint">
-        ///   Delegate points to an object array thunk (the delegate wraps a Func&lt;object[], object&gt; delegate). This
-        ///   is typically a delegate pointing to the LINQ expression interpreter.
-        /// </param>
-        /// <returns></returns>
-        internal unsafe IntPtr GetFunctionPointer(out RuntimeTypeHandle typeOfFirstParameterIfInstanceDelegate, out bool isOpenResolver, out bool isInterpreterEntrypoint)
+        internal unsafe IntPtr GetDelegateLdFtnResult(out RuntimeTypeHandle typeOfFirstParameterIfInstanceDelegate, out bool isOpenResolver)
         {
             typeOfFirstParameterIfInstanceDelegate = default(RuntimeTypeHandle);
             isOpenResolver = false;
-            isInterpreterEntrypoint = false;
 
-            if (GetThunk(MulticastThunk) == m_functionPointer)
-            {
-                return IntPtr.Zero;
-            }
-            else if (GetThunk(ObjectArrayThunk) == m_functionPointer)
+            if (_extraFunctionPointerOrData != 0)
             {
-                isInterpreterEntrypoint = true;
-                return IntPtr.Zero;
-            }
-            else if (m_extraFunctionPointerOrData != 0)
-            {
-                if (GetThunk(OpenInstanceThunk) == m_functionPointer)
+                if (GetThunk(OpenInstanceThunk) == _functionPointer)
                 {
-                    typeOfFirstParameterIfInstanceDelegate = ((OpenMethodResolver*)m_extraFunctionPointerOrData)->DeclaringType;
+                    typeOfFirstParameterIfInstanceDelegate = ((OpenMethodResolver*)_extraFunctionPointerOrData)->DeclaringType;
                     isOpenResolver = true;
                 }
-                return m_extraFunctionPointerOrData;
+                return _extraFunctionPointerOrData;
             }
             else
             {
-                if (m_firstParameter != null)
-                    typeOfFirstParameterIfInstanceDelegate = new RuntimeTypeHandle(m_firstParameter.GetMethodTable());
-
-                // TODO! Implementation issue for generic invokes here ... we need another IntPtr for uniqueness.
+                if (_firstParameter != null)
+                    typeOfFirstParameterIfInstanceDelegate = new RuntimeTypeHandle(_firstParameter.GetMethodTable());
 
-                return m_functionPointer;
+                return _functionPointer;
             }
         }
 
-        // This function is known to the IL Transformer.
+        // This function is known to the compiler.
         private void InitializeClosedInstance(object firstParameter, IntPtr functionPointer)
         {
             if (firstParameter is null)
                 throw new ArgumentException(SR.Arg_DlgtNullInst);
 
-            m_functionPointer = functionPointer;
-            m_firstParameter = firstParameter;
+            _functionPointer = functionPointer;
+            _firstParameter = firstParameter;
         }
 
-        // This function is known to the IL Transformer.
+        // This function is known to the compiler.
         private void InitializeClosedInstanceSlow(object firstParameter, IntPtr functionPointer)
         {
             // This method is like InitializeClosedInstance, but it handles ALL cases. In particular, it handles generic method with fun function pointers.
@@ -139,15 +126,15 @@ private void InitializeClosedInstanceSlow(object firstParameter, IntPtr function
 
             if (!FunctionPointerOps.IsGenericMethodPointer(functionPointer))
             {
-                m_functionPointer = functionPointer;
-                m_firstParameter = firstParameter;
+                _functionPointer = functionPointer;
+                _firstParameter = firstParameter;
             }
             else
             {
-                m_firstParameter = this;
-                m_functionPointer = GetThunk(ClosedInstanceThunkOverGenericMethod);
-                m_extraFunctionPointerOrData = functionPointer;
-                m_helperObject = firstParameter;
+                _firstParameter = this;
+                _functionPointer = GetThunk(ClosedInstanceThunkOverGenericMethod);
+                _extraFunctionPointerOrData = functionPointer;
+                _helperObject = firstParameter;
             }
         }
 
@@ -166,27 +153,28 @@ private void InitializeClosedInstanceWithGVMResolution(object firstParameter, Ru
             }
             if (!FunctionPointerOps.IsGenericMethodPointer(functionResolution))
             {
-                m_functionPointer = functionResolution;
-                m_firstParameter = firstParameter;
+                _functionPointer = functionResolution;
+                _firstParameter = firstParameter;
             }
             else
             {
-                m_firstParameter = this;
-                m_functionPointer = GetThunk(ClosedInstanceThunkOverGenericMethod);
-                m_extraFunctionPointerOrData = functionResolution;
-                m_helperObject = firstParameter;
+                _firstParameter = this;
+                _functionPointer = GetThunk(ClosedInstanceThunkOverGenericMethod);
+                _extraFunctionPointerOrData = functionResolution;
+                _helperObject = firstParameter;
             }
 
             return;
         }
 
+        // This function is known to the compiler.
         private void InitializeClosedInstanceToInterface(object firstParameter, IntPtr dispatchCell)
         {
             if (firstParameter is null)
                 throw new NullReferenceException();
 
-            m_functionPointer = RuntimeImports.RhpResolveInterfaceMethod(firstParameter, dispatchCell);
-            m_firstParameter = firstParameter;
+            _functionPointer = RuntimeImports.RhpResolveInterfaceMethod(firstParameter, dispatchCell);
+            _firstParameter = firstParameter;
         }
 
         // This is used to implement MethodInfo.CreateDelegate() in a desktop-compatible way. Yes, the desktop really
@@ -195,61 +183,53 @@ private void InitializeClosedInstanceWithoutNullCheck(object firstParameter, Int
         {
             if (!FunctionPointerOps.IsGenericMethodPointer(functionPointer))
             {
-                m_functionPointer = functionPointer;
-                m_firstParameter = firstParameter;
+                _functionPointer = functionPointer;
+                _firstParameter = firstParameter;
             }
             else
             {
-                m_firstParameter = this;
-                m_functionPointer = GetThunk(ClosedInstanceThunkOverGenericMethod);
-                m_extraFunctionPointerOrData = functionPointer;
-                m_helperObject = firstParameter;
+                _firstParameter = this;
+                _functionPointer = GetThunk(ClosedInstanceThunkOverGenericMethod);
+                _extraFunctionPointerOrData = functionPointer;
+                _helperObject = firstParameter;
             }
         }
 
-        // This function is known to the compiler backend.
+        // This function is known to the compiler.
         private void InitializeClosedStaticThunk(object firstParameter, IntPtr functionPointer, IntPtr functionPointerThunk)
         {
-            m_extraFunctionPointerOrData = functionPointer;
-            m_helperObject = firstParameter;
-            m_functionPointer = functionPointerThunk;
-            m_firstParameter = this;
+            _extraFunctionPointerOrData = functionPointer;
+            _helperObject = firstParameter;
+            _functionPointer = functionPointerThunk;
+            _firstParameter = this;
         }
 
-        // This function is known to the compiler backend.
+        // This function is known to the compiler.
         private void InitializeOpenStaticThunk(object _ /*firstParameter*/, IntPtr functionPointer, IntPtr functionPointerThunk)
         {
             // This sort of delegate is invoked by calling the thunk function pointer with the arguments to the delegate + a reference to the delegate object itself.
-            m_firstParameter = this;
-            m_functionPointer = functionPointerThunk;
-            m_extraFunctionPointerOrData = functionPointer;
+            _firstParameter = this;
+            _functionPointer = functionPointerThunk;
+            _extraFunctionPointerOrData = functionPointer;
         }
 
         private void InitializeOpenInstanceThunkDynamic(IntPtr functionPointer, IntPtr functionPointerThunk)
         {
             // This sort of delegate is invoked by calling the thunk function pointer with the arguments to the delegate + a reference to the delegate object itself.
-            m_firstParameter = this;
-            m_functionPointer = functionPointerThunk;
-            m_extraFunctionPointerOrData = functionPointer;
+            _firstParameter = this;
+            _functionPointer = functionPointerThunk;
+            _extraFunctionPointerOrData = functionPointer;
         }
 
         // This function is only ever called by the open instance method thunk, and in that case,
-        // m_extraFunctionPointerOrData always points to an OpenMethodResolver
+        // _extraFunctionPointerOrData always points to an OpenMethodResolver
         [MethodImpl(MethodImplOptions.NoInlining)]
         private IntPtr GetActualTargetFunctionPointer(object thisObject)
         {
-            return OpenMethodResolver.ResolveMethod(m_extraFunctionPointerOrData, thisObject);
+            return OpenMethodResolver.ResolveMethod(_extraFunctionPointerOrData, thisObject);
         }
 
-        internal bool IsDynamicDelegate()
-        {
-            if (this.GetThunk(MulticastThunk) == IntPtr.Zero)
-            {
-                return true;
-            }
-
-            return false;
-        }
+        internal bool IsDynamicDelegate() => GetThunk(MulticastThunk) == IntPtr.Zero;
 
         [DebuggerGuidedStepThroughAttribute]
         protected virtual object? DynamicInvokeImpl(object?[]? args)
@@ -257,7 +237,7 @@ internal bool IsDynamicDelegate()
             if (IsDynamicDelegate())
             {
                 // DynamicDelegate case
-                object? result = ((Func<object?[]?, object?>)m_helperObject)(args);
+                object? result = ((Func<object?[]?, object?>)_helperObject)(args);
                 DebugAnnotations.PreviousCallContainsDebuggerStepInCode();
                 return result;
             }
@@ -265,7 +245,7 @@ internal bool IsDynamicDelegate()
             {
                 DynamicInvokeInfo dynamicInvokeInfo = ReflectionAugments.ReflectionCoreCallbacks.GetDelegateDynamicInvokeInfo(GetType());
 
-                object? result = dynamicInvokeInfo.Invoke(m_firstParameter, m_functionPointer,
+                object? result = dynamicInvokeInfo.Invoke(_firstParameter, _functionPointer,
                     args, binderBundle: null, wrapInTargetInvocationException: true);
                 DebugAnnotations.PreviousCallContainsDebuggerStepInCode();
                 return result;
@@ -274,35 +254,53 @@ internal bool IsDynamicDelegate()
 
         protected virtual MethodInfo GetMethodImpl()
         {
+            // Multi-cast delegates return the Method of the last delegate in the list
+            if (_helperObject is Wrapper[] invocationList)
+            {
+                int invocationCount = (int)_extraFunctionPointerOrData;
+                return invocationList[invocationCount - 1].Value.GetMethodImpl();
+            }
+
+            // Return the delegate Invoke method for marshalled function pointers and LINQ expressions
+            if ((_firstParameter is NativeFunctionPointerWrapper) || (_functionPointer == GetThunk(ObjectArrayThunk)))
+            {
+                return GetType().GetMethod("Invoke");
+            }
+
             return ReflectionAugments.ReflectionCoreCallbacks.GetDelegateMethod(this);
         }
 
-        public object Target
+        public object? Target
         {
             get
             {
                 // Multi-cast delegates return the Target of the last delegate in the list
-                if (m_functionPointer == GetThunk(MulticastThunk))
+                if (_helperObject is Wrapper[] invocationList)
                 {
-                    Delegate[] invocationList = (Delegate[])m_helperObject;
-                    int invocationCount = (int)m_extraFunctionPointerOrData;
-                    return invocationList[invocationCount - 1].Target;
+                    int invocationCount = (int)_extraFunctionPointerOrData;
+                    return invocationList[invocationCount - 1].Value.Target;
                 }
 
-                // Closed static delegates place a value in m_helperObject that they pass to the target method.
-                if (m_functionPointer == GetThunk(ClosedStaticThunk) ||
-                    m_functionPointer == GetThunk(ClosedInstanceThunkOverGenericMethod) ||
-                    m_functionPointer == GetThunk(ObjectArrayThunk))
-                    return m_helperObject;
+                // Closed static delegates place a value in _helperObject that they pass to the target method.
+                if (_functionPointer == GetThunk(ClosedStaticThunk) ||
+                    _functionPointer == GetThunk(ClosedInstanceThunkOverGenericMethod) ||
+                    _functionPointer == GetThunk(ObjectArrayThunk))
+                    return _helperObject;
+
+                // Other non-closed thunks can be identified as the _firstParameter field points at this.
+                if (object.ReferenceEquals(_firstParameter, this))
+                {
+                    return null;
+                }
 
-                // Other non-closed thunks can be identified as the m_firstParameter field points at this.
-                if (object.ReferenceEquals(m_firstParameter, this))
+                // NativeFunctionPointerWrapper used by marshalled function pointers is not returned as a public target
+                if (_firstParameter is NativeFunctionPointerWrapper)
                 {
                     return null;
                 }
 
-                // Closed instance delegates place a value in m_firstParameter, and we've ruled out all other types of delegates
-                return m_firstParameter;
+                // Closed instance delegates place a value in _firstParameter, and we've ruled out all other types of delegates
+                return _firstParameter;
             }
         }
 
@@ -319,13 +317,9 @@ public object Target
         // V1 api: Creates open delegates to static methods only, relaxed signature checking disallowed.
         public static Delegate CreateDelegate(Type type, [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)] Type target, string method, bool ignoreCase, bool throwOnBindFailure) => ReflectionAugments.ReflectionCoreCallbacks.CreateDelegate(type, target, method, ignoreCase, throwOnBindFailure);
 
-        internal bool IsOpenStatic
-        {
-            get
-            {
-                return GetThunk(OpenStaticThunk) == m_functionPointer;
-            }
-        }
+        internal IntPtr TryGetOpenStaticFunctionPointer() => (GetThunk(OpenStaticThunk) == _functionPointer) ? _extraFunctionPointerOrData : 0;
+
+        internal NativeFunctionPointerWrapper? TryGetNativeFunctionPointerWrapper() => _firstParameter as NativeFunctionPointerWrapper;
 
         internal static unsafe bool InternalEqualTypes(object a, object b)
         {
@@ -350,9 +344,9 @@ internal static unsafe Delegate CreateObjectArrayDelegate(Type t, Func<object?[]
                 throw new InvalidOperationException();
             }
 
-            del.m_helperObject = handler;
-            del.m_functionPointer = objArrayThunk;
-            del.m_firstParameter = del;
+            del._helperObject = handler;
+            del._functionPointer = objArrayThunk;
+            del._firstParameter = del;
             return del;
         }
 
@@ -397,44 +391,34 @@ internal static unsafe Delegate CreateDelegate(MethodTable* delegateEEType, IntP
             return del;
         }
 
-        private unsafe MulticastDelegate NewMulticastDelegate(Delegate[] invocationList, int invocationCount, bool thisIsMultiCastAlready = false)
+        private unsafe Delegate NewMulticastDelegate(Wrapper[] invocationList, int invocationCount, bool thisIsMultiCastAlready = false)
         {
-            // First, allocate a new multicast delegate just like this one, i.e. same type as the this object
-            MulticastDelegate result = (MulticastDelegate)RuntimeImports.RhNewObject(this.GetMethodTable());
+            // First, allocate a new delegate just like this one, i.e. same type as the this object
+            Delegate result = Unsafe.As<Delegate>(RuntimeImports.RhNewObject(this.GetMethodTable()));
 
             // Performance optimization - if this already points to a true multicast delegate,
-            // copy _methodPtr and _methodPtrAux fields rather than calling into the EE to get them
-            if (thisIsMultiCastAlready)
-            {
-                result.m_functionPointer = this.m_functionPointer;
-            }
-            else
-            {
-                result.m_functionPointer = GetThunk(MulticastThunk);
-            }
-            result.m_firstParameter = result;
-            result.m_helperObject = invocationList;
-            result.m_extraFunctionPointerOrData = (IntPtr)invocationCount;
+            // copy _functionPointer field rather than calling GetThunk to get it
+            result._functionPointer = thisIsMultiCastAlready ? _functionPointer : GetThunk(MulticastThunk);
+            result._firstParameter = result;
+            result._helperObject = invocationList;
+            result._extraFunctionPointerOrData = (IntPtr)invocationCount;
 
             return result;
         }
 
-        private static bool TrySetSlot(Delegate[] a, int index, Delegate o)
+        private static bool TrySetSlot(Wrapper[] a, int index, Delegate o)
         {
-            if (a[index] == null && System.Threading.Interlocked.CompareExchange<Delegate>(ref a[index], o, null) == null)
+            if (a[index].Value == null && System.Threading.Interlocked.CompareExchange(ref a[index].Value, o, null) == null)
                 return true;
 
             // The slot may be already set because we have added and removed the same method before.
             // Optimize this case, because it's cheaper than copying the array.
-            if (a[index] != null)
+            if (a[index].Value is Delegate dd)
             {
-                MulticastDelegate d = (MulticastDelegate)o;
-                MulticastDelegate dd = (MulticastDelegate)a[index];
-
-                if (object.ReferenceEquals(dd.m_firstParameter, d.m_firstParameter) &&
-                    object.ReferenceEquals(dd.m_helperObject, d.m_helperObject) &&
-                    dd.m_extraFunctionPointerOrData == d.m_extraFunctionPointerOrData &&
-                    dd.m_functionPointer == d.m_functionPointer)
+                if (object.ReferenceEquals(dd._firstParameter, o._firstParameter) &&
+                    object.ReferenceEquals(dd._helperObject, o._helperObject) &&
+                    dd._extraFunctionPointerOrData == o._extraFunctionPointerOrData &&
+                    dd._functionPointer == o._functionPointer)
                 {
                     return true;
                 }
@@ -446,35 +430,31 @@ private static bool TrySetSlot(Delegate[] a, int index, Delegate o)
         //  to form a new delegate.
         protected virtual Delegate CombineImpl(Delegate? d)
         {
-            if (d is null) // cast to object for a more efficient test
+            if (d is null)
                 return this;
 
             // Verify that the types are the same...
             if (!InternalEqualTypes(this, d))
                 throw new ArgumentException(SR.Arg_DlgtTypeMis);
 
-            if (IsDynamicDelegate() && d.IsDynamicDelegate())
-            {
+            if (IsDynamicDelegate())
                 throw new InvalidOperationException();
-            }
 
-            MulticastDelegate dFollow = (MulticastDelegate)d;
-            Delegate[]? resultList;
             int followCount = 1;
-            Delegate[]? followList = dFollow.m_helperObject as Delegate[];
+            Wrapper[]? followList = d._helperObject as Wrapper[];
             if (followList != null)
-                followCount = (int)dFollow.m_extraFunctionPointerOrData;
+                followCount = (int)d._extraFunctionPointerOrData;
 
             int resultCount;
-            Delegate[]? invocationList = m_helperObject as Delegate[];
-            if (invocationList == null)
+            Wrapper[]? resultList;
+            if (_helperObject is not Wrapper[] invocationList)
             {
                 resultCount = 1 + followCount;
-                resultList = new Delegate[resultCount];
-                resultList[0] = this;
+                resultList = new Wrapper[resultCount];
+                resultList[0] = new Wrapper(this);
                 if (followList == null)
                 {
-                    resultList[1] = dFollow;
+                    resultList[1] = new Wrapper(d);
                 }
                 else
                 {
@@ -485,7 +465,7 @@ protected virtual Delegate CombineImpl(Delegate? d)
             }
             else
             {
-                int invocationCount = (int)m_extraFunctionPointerOrData;
+                int invocationCount = (int)_extraFunctionPointerOrData;
                 resultCount = invocationCount + followCount;
                 resultList = null;
                 if (resultCount <= invocationList.Length)
@@ -493,14 +473,14 @@ protected virtual Delegate CombineImpl(Delegate? d)
                     resultList = invocationList;
                     if (followList == null)
                     {
-                        if (!TrySetSlot(resultList, invocationCount, dFollow))
+                        if (!TrySetSlot(resultList, invocationCount, d))
                             resultList = null;
                     }
                     else
                     {
                         for (int i = 0; i < followCount; i++)
                         {
-                            if (!TrySetSlot(resultList, invocationCount + i, followList[i]))
+                            if (!TrySetSlot(resultList, invocationCount + i, followList[i].Value))
                             {
                                 resultList = null;
                                 break;
@@ -515,14 +495,14 @@ protected virtual Delegate CombineImpl(Delegate? d)
                     while (allocCount < resultCount)
                         allocCount *= 2;
 
-                    resultList = new Delegate[allocCount];
+                    resultList = new Wrapper[allocCount];
 
                     for (int i = 0; i < invocationCount; i++)
                         resultList[i] = invocationList[i];
 
                     if (followList == null)
                     {
-                        resultList[invocationCount] = dFollow;
+                        resultList[invocationCount] = new Wrapper(d);
                     }
                     else
                     {
@@ -534,14 +514,13 @@ protected virtual Delegate CombineImpl(Delegate? d)
             }
         }
 
-        private Delegate[] DeleteFromInvocationList(Delegate[] invocationList, int invocationCount, int deleteIndex, int deleteCount)
+        private static Wrapper[] DeleteFromInvocationList(Wrapper[] invocationList, int invocationCount, int deleteIndex, int deleteCount)
         {
-            Delegate[] thisInvocationList = (Delegate[])m_helperObject;
-            int allocCount = thisInvocationList.Length;
+            int allocCount = invocationList.Length;
             while (allocCount / 2 >= invocationCount - deleteCount)
                 allocCount /= 2;
 
-            Delegate[] newInvocationList = new Delegate[allocCount];
+            Wrapper[] newInvocationList = new Wrapper[allocCount];
 
             for (int i = 0; i < deleteIndex; i++)
                 newInvocationList[i] = invocationList[i];
@@ -552,11 +531,11 @@ private Delegate[] DeleteFromInvocationList(Delegate[] invocationList, int invoc
             return newInvocationList;
         }
 
-        private static bool EqualInvocationLists(Delegate[] a, Delegate[] b, int start, int count)
+        private static bool EqualInvocationLists(Wrapper[] a, Wrapper[] b, int start, int count)
         {
             for (int i = 0; i < count; i++)
             {
-                if (!(a[start + i].Equals(b[i])))
+                if (!(a[start + i].Value.Equals(b[i].Value)))
                     return false;
             }
             return true;
@@ -572,34 +551,31 @@ private static bool EqualInvocationLists(Delegate[] a, Delegate[] b, int start,
             // There is a special case were we are removing using a delegate as
             //    the value we need to check for this case
             //
-            MulticastDelegate? v = d as MulticastDelegate;
-
-            if (v is null)
+            if (d is null)
                 return this;
-            if (v.m_helperObject as Delegate[] == null)
+            if (d._helperObject is not Wrapper[] dInvocationList)
             {
-                Delegate[]? invocationList = m_helperObject as Delegate[];
-                if (invocationList == null)
+                if (_helperObject is not Wrapper[] invocationList)
                 {
                     // they are both not real Multicast
-                    if (this.Equals(v))
+                    if (this.Equals(d))
                         return null;
                 }
                 else
                 {
-                    int invocationCount = (int)m_extraFunctionPointerOrData;
+                    int invocationCount = (int)_extraFunctionPointerOrData;
                     for (int i = invocationCount; --i >= 0;)
                     {
-                        if (v.Equals(invocationList[i]))
+                        if (d.Equals(invocationList[i].Value))
                         {
                             if (invocationCount == 2)
                             {
                                 // Special case - only one value left, either at the beginning or the end
-                                return invocationList[1 - i];
+                                return invocationList[1 - i].Value;
                             }
                             else
                             {
-                                Delegate[] list = DeleteFromInvocationList(invocationList, invocationCount, i, 1);
+                                Wrapper[] list = DeleteFromInvocationList(invocationList, invocationCount, i, 1);
                                 return NewMulticastDelegate(list, invocationCount - 1, true);
                             }
                         }
@@ -608,29 +584,28 @@ private static bool EqualInvocationLists(Delegate[] a, Delegate[] b, int start,
             }
             else
             {
-                Delegate[]? invocationList = m_helperObject as Delegate[];
-                if (invocationList != null)
+                if (_helperObject is Wrapper[] invocationList)
                 {
-                    int invocationCount = (int)m_extraFunctionPointerOrData;
-                    int vInvocationCount = (int)v.m_extraFunctionPointerOrData;
-                    for (int i = invocationCount - vInvocationCount; i >= 0; i--)
+                    int invocationCount = (int)_extraFunctionPointerOrData;
+                    int dInvocationCount = (int)d._extraFunctionPointerOrData;
+                    for (int i = invocationCount - dInvocationCount; i >= 0; i--)
                     {
-                        if (EqualInvocationLists(invocationList, v.m_helperObject as Delegate[], i, vInvocationCount))
+                        if (EqualInvocationLists(invocationList, dInvocationList, i, dInvocationCount))
                         {
-                            if (invocationCount - vInvocationCount == 0)
+                            if (invocationCount - dInvocationCount == 0)
                             {
                                 // Special case - no values left
                                 return null;
                             }
-                            else if (invocationCount - vInvocationCount == 1)
+                            else if (invocationCount - dInvocationCount == 1)
                             {
                                 // Special case - only one value left, either at the beginning or the end
-                                return invocationList[i != 0 ? 0 : invocationCount - 1];
+                                return invocationList[i != 0 ? 0 : invocationCount - 1].Value;
                             }
                             else
                             {
-                                Delegate[] list = DeleteFromInvocationList(invocationList, invocationCount, i, vInvocationCount);
-                                return NewMulticastDelegate(list, invocationCount - vInvocationCount, true);
+                                Wrapper[] list = DeleteFromInvocationList(invocationList, invocationCount, i, dInvocationCount);
+                                return NewMulticastDelegate(list, invocationCount - dInvocationCount, true);
                             }
                         }
                     }
@@ -642,41 +617,19 @@ private static bool EqualInvocationLists(Delegate[] a, Delegate[] b, int start,
 
         public virtual Delegate[] GetInvocationList()
         {
-            Delegate[] del;
-            Delegate[]? invocationList = m_helperObject as Delegate[];
-            if (invocationList == null)
-            {
-                del = new Delegate[1];
-                del[0] = this;
-            }
-            else
+            if (_helperObject is Wrapper[] invocationList)
             {
                 // Create an array of delegate copies and each
                 //    element into the array
-                int invocationCount = (int)m_extraFunctionPointerOrData;
-                del = new Delegate[invocationCount];
+                int invocationCount = (int)_extraFunctionPointerOrData;
 
+                var del = new Delegate[invocationCount];
                 for (int i = 0; i < del.Length; i++)
-                    del[i] = invocationList[i];
+                    del[i] = invocationList[i].Value;
+                return del;
             }
-            return del;
-        }
-
-        private bool InvocationListEquals(MulticastDelegate d)
-        {
-            Delegate[] invocationList = (Delegate[])m_helperObject;
-            if (d.m_extraFunctionPointerOrData != m_extraFunctionPointerOrData)
-                return false;
 
-            int invocationCount = (int)m_extraFunctionPointerOrData;
-            for (int i = 0; i < invocationCount; i++)
-            {
-                Delegate dd = invocationList[i];
-                Delegate[] dInvocationList = (Delegate[])d.m_helperObject;
-                if (!dd.Equals(dInvocationList[i]))
-                    return false;
-            }
-            return true;
+            return new Delegate[] { this };
         }
 
         public override bool Equals([NotNullWhen(true)] object? obj)
@@ -688,73 +641,92 @@ public override bool Equals([NotNullWhen(true)] object? obj)
             if (!InternalEqualTypes(this, obj))
                 return false;
 
-            // Since this is a MulticastDelegate and we know
-            // the types are the same, obj should also be a
-            // MulticastDelegate
-            Debug.Assert(obj is MulticastDelegate, "Shouldn't have failed here since we already checked the types are the same!");
-            var d = Unsafe.As<MulticastDelegate>(obj);
+            // Since this is a Delegate and we know the types are the same, obj should also be a Delegate
+            Debug.Assert(obj is Delegate, "Shouldn't have failed here since we already checked the types are the same!");
+            var d = Unsafe.As<Delegate>(obj);
 
-            // there are 2 kind of delegate kinds for comparison
-            // 1- Multicast (m_helperObject is Delegate[])
-            // 2- Single-cast delegate, which can be compared with a structural comparison
-
-            IntPtr multicastThunk = GetThunk(MulticastThunk);
-            if (m_functionPointer == multicastThunk)
-            {
-                return d.m_functionPointer == multicastThunk && InvocationListEquals(d);
-            }
-            else
+            if (_helperObject is Wrapper[] invocationList)
             {
-                if (!object.ReferenceEquals(m_helperObject, d.m_helperObject) ||
-                    (!FunctionPointerOps.Compare(m_extraFunctionPointerOrData, d.m_extraFunctionPointerOrData)) ||
-                    (!FunctionPointerOps.Compare(m_functionPointer, d.m_functionPointer)))
-                {
+                if (d._extraFunctionPointerOrData != _extraFunctionPointerOrData)
                     return false;
-                }
 
-                // Those delegate kinds with thunks put themselves into the m_firstParameter, so we can't
-                // blindly compare the m_firstParameter fields for equality.
-                if (object.ReferenceEquals(m_firstParameter, this))
+                if (d._helperObject is not Wrapper[] dInvocationList)
+                    return false;
+
+                int invocationCount = (int)_extraFunctionPointerOrData;
+                for (int i = 0; i < invocationCount; i++)
                 {
-                    return object.ReferenceEquals(d.m_firstParameter, d);
+                    if (!invocationList[i].Value.Equals(dInvocationList[i].Value))
+                        return false;
                 }
+                return true;
+            }
 
-                return object.ReferenceEquals(m_firstParameter, d.m_firstParameter);
+            if (_firstParameter is NativeFunctionPointerWrapper nativeFunctionPointerWrapper)
+            {
+                if (d._firstParameter is not NativeFunctionPointerWrapper dnativeFunctionPointerWrapper)
+                    return false;
+
+                return nativeFunctionPointerWrapper.NativeFunctionPointer == dnativeFunctionPointerWrapper.NativeFunctionPointer;
+            }
+
+            if (!object.ReferenceEquals(_helperObject, d._helperObject) ||
+                (!FunctionPointerOps.Compare(_extraFunctionPointerOrData, d._extraFunctionPointerOrData)) ||
+                (!FunctionPointerOps.Compare(_functionPointer, d._functionPointer)))
+            {
+                return false;
+            }
+
+            // Those delegate kinds with thunks put themselves into the _firstParameter, so we can't
+            // blindly compare the _firstParameter fields for equality.
+            if (object.ReferenceEquals(_firstParameter, this))
+            {
+                return object.ReferenceEquals(d._firstParameter, d);
             }
+
+            return object.ReferenceEquals(_firstParameter, d._firstParameter);
         }
 
         public override int GetHashCode()
         {
-            Delegate[]? invocationList = m_helperObject as Delegate[];
-            if (invocationList == null)
-            {
-                return base.GetHashCode();
-            }
-            else
+            if (_helperObject is Wrapper[] invocationList)
             {
-                int hash = 0;
-                for (int i = 0; i < (int)m_extraFunctionPointerOrData; i++)
+                int multiCastHash = 0;
+                for (int i = 0; i < (int)_extraFunctionPointerOrData; i++)
                 {
-                    hash = hash * 33 + invocationList[i].GetHashCode();
+                    multiCastHash = multiCastHash * 33 + invocationList[i].Value.GetHashCode();
                 }
+                return multiCastHash;
+            }
+
+            if (_firstParameter is NativeFunctionPointerWrapper nativeFunctionPointerWrapper)
+            {
+                return nativeFunctionPointerWrapper.NativeFunctionPointer.GetHashCode();
+            }
+
+            int hash = RuntimeHelpers.GetHashCode(_helperObject) +
+                7 * FunctionPointerOps.GetHashCode(_extraFunctionPointerOrData) +
+                13 * FunctionPointerOps.GetHashCode(_functionPointer);
 
-                return hash;
+            if (!object.ReferenceEquals(_firstParameter, this))
+            {
+                hash += 17 * RuntimeHelpers.GetHashCode(_firstParameter);
             }
+
+            return hash;
         }
 
-        public bool HasSingleTarget => !(m_helperObject is Delegate[]);
+        public bool HasSingleTarget => _helperObject is not Wrapper[];
 
         // Used by delegate invocation list enumerator
         internal Delegate? TryGetAt(int index)
         {
-            if (!(m_helperObject is Delegate[] invocationList))
+            if (_helperObject is Wrapper[] invocationList)
             {
-                return (index == 0) ? this : null;
-            }
-            else
-            {
-                return ((uint)index < (uint)m_extraFunctionPointerOrData) ? invocationList[index] : null;
+                return ((uint)index < (uint)_extraFunctionPointerOrData) ? invocationList[index].Value : null;
             }
+
+            return (index == 0) ? this : null;
         }
     }
 }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Diagnostics/Eventing/EventPipe.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Diagnostics/Eventing/EventPipe.NativeAot.cs
deleted file mode 100644
index 8098c630dc90..000000000000
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Diagnostics/Eventing/EventPipe.NativeAot.cs
+++ /dev/null
@@ -1,163 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Runtime;
-using System.Runtime.InteropServices;
-
-#if FEATURE_PERFTRACING
-
-namespace System.Diagnostics.Tracing
-{
-    //
-    // NOTE:
-    //
-    // The implementation below takes some manual marshaling actions to ensure arguments are in
-    // primitive form before they are passed through to the underlying RuntimeImports.Rh*
-    // function.
-    //
-    // These extra steps are necessary only if the RuntimeImports mechanism represents "raw"
-    // calls into the native runtime (as has been the case at least in the distant past).
-    //
-    // If the RuntimeImports mechanism automatically applies rich p/invoke marshaling to all of
-    // these calls, then all of the manual steps below are unnecessary and can be removed (by
-    // making the RuntimeImports.Rh* function signatures generally match the corresponding
-    // EventPipeInternal function signatures; in other words, by making the RuntimeImports.Rh*
-    // functions look like the QCalls in EventPipe.CoreCLR.cs).
-    //
-    internal static partial class EventPipeInternal
-    {
-        //
-        // These PInvokes are used by the configuration APIs to interact with EventPipe.
-        //
-        private static unsafe ulong Enable(
-            char* outputFile,
-            EventPipeSerializationFormat format,
-            uint circularBufferSizeInMB,
-            EventPipeProviderConfigurationNative* providers,
-            uint numProviders)
-        {
-            return RuntimeImports.RhEventPipeInternal_Enable(
-                outputFile,
-                (int)format,
-                circularBufferSizeInMB,
-                providers,
-                numProviders);
-        }
-
-        internal static void Disable(ulong sessionID)
-        {
-            RuntimeImports.RhEventPipeInternal_Disable(sessionID);
-        }
-
-        //
-        // These PInvokes are used by EventSource to interact with the EventPipe.
-        //
-
-//        private static extern unsafe IntPtr CreateProvider(string providerName, IntPtr callbackFunc, IntPtr callbackContext);
-
-        internal static unsafe IntPtr CreateProvider(string providerName,
-                    delegate* unmanaged<byte*, int, byte, long, long, Interop.Advapi32.EVENT_FILTER_DESCRIPTOR*, void*, void> callbackFunc,
-                    void* callbackContext)
-                    => CreateProvider(providerName, (IntPtr)callbackFunc, (IntPtr)callbackContext);
-        //internal static unsafe IntPtr CreateProvider(string providerName, IntPtr callbackFunc, IntPtr callbackContext);
-
-        internal static unsafe IntPtr CreateProvider(string providerName, IntPtr callbackFunc, IntPtr callbackContext)
-        {
-            fixed (char* pProviderName = providerName)
-            {
-                return RuntimeImports.RhEventPipeInternal_CreateProvider(
-                    pProviderName,
-                    callbackFunc,
-                    callbackContext);
-            }
-        }
-
-        internal static unsafe IntPtr DefineEvent(
-            IntPtr provHandle,
-            uint eventID,
-            long keywords,
-            uint eventVersion,
-            uint level,
-            void *pMetadata,
-            uint metadataLength)
-        {
-            return RuntimeImports.RhEventPipeInternal_DefineEvent(
-                provHandle,
-                eventID,
-                keywords,
-                eventVersion,
-                level,
-                pMetadata,
-                metadataLength);
-        }
-
-        internal static unsafe IntPtr GetProvider(string providerName)
-        {
-            fixed (char* pProviderName = providerName)
-            {
-                return RuntimeImports.RhEventPipeInternal_GetProvider(pProviderName);
-            }
-        }
-
-        internal static void DeleteProvider(IntPtr provHandle)
-        {
-            RuntimeImports.RhEventPipeInternal_DeleteProvider(provHandle);
-        }
-
-        internal static unsafe int EventActivityIdControl(uint controlCode, ref Guid activityId)
-        {
-            //
-            // Ensure that the address passed to native code is never on the managed heap, while still
-            // managing the supplied byref in an in/out manner.
-            //
-            Guid localActivityId = activityId;
-            try { return RuntimeImports.RhEventPipeInternal_EventActivityIdControl(controlCode, &localActivityId); }
-            finally { activityId = localActivityId; }
-        }
-
-        internal static unsafe void WriteEventData(
-            IntPtr eventHandle,
-            EventProvider.EventData* pEventData,
-            uint dataCount,
-            Guid* activityId,
-            Guid* relatedActivityId)
-        {
-            RuntimeImports.RhEventPipeInternal_WriteEventData(
-                eventHandle,
-                pEventData,
-                dataCount,
-                activityId,
-                relatedActivityId);
-        }
-
-        //
-        // These PInvokes are used as part of the EventPipeEventDispatcher.
-        //
-        internal static unsafe bool GetSessionInfo(ulong sessionID, EventPipeSessionInfo* pSessionInfo)
-        {
-            uint rawBool = RuntimeImports.RhEventPipeInternal_GetSessionInfo(sessionID, pSessionInfo);
-            return (rawBool != 0);
-        }
-
-        internal static unsafe bool GetNextEvent(ulong sessionID, EventPipeEventInstanceData* pInstance)
-        {
-            uint rawBool = RuntimeImports.RhEventPipeInternal_GetNextEvent(sessionID, pInstance);
-            return (rawBool != 0);
-        }
-
-        internal static bool SignalSession(ulong sessionID)
-        {
-            uint rawBool = RuntimeImports.RhEventPipeInternal_SignalSession(sessionID);
-            return (rawBool != 0);
-        }
-
-        internal static bool WaitForSessionSignal(ulong sessionID, int timeoutMs)
-        {
-            uint rawBool = RuntimeImports.RhEventPipeInternal_WaitForSessionSignal(sessionID, timeoutMs);
-            return (rawBool != 0);
-        }
-    }
-}
-
-#endif // FEATURE_PERFTRACING
-
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Diagnostics/Eventing/NativeRuntimeEventSource.Threading.NativeSinks.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Diagnostics/Eventing/NativeRuntimeEventSource.Threading.NativeSinks.NativeAot.cs
deleted file mode 100644
index 7f214ab0b2bb..000000000000
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Diagnostics/Eventing/NativeRuntimeEventSource.Threading.NativeSinks.NativeAot.cs
+++ /dev/null
@@ -1,152 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics.Tracing;
-using System.Runtime;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-using System.Threading;
-
-using Internal.Runtime;
-using Internal.Runtime.CompilerServices;
-
-namespace System.Diagnostics.Tracing
-{
-    // This is part of the NativeRuntimeEventsource, which is the managed version of the Microsoft-Windows-DotNETRuntime provider.
-    // It contains the runtime specific interop to native event sinks.
-    internal sealed partial class NativeRuntimeEventSource : EventSource
-    {
-        // We don't have these keywords defined from the genRuntimeEventSources.py, so we need to manually define them here.
-        public static partial class Keywords
-        {
-            public const EventKeywords ContentionKeyword = (EventKeywords)0x4000;
-            public const EventKeywords ThreadingKeyword = (EventKeywords)0x10000;
-            public const EventKeywords ThreadTransferKeyword = (EventKeywords)0x80000000;
-            public const EventKeywords WaitHandleKeyword = (EventKeywords)0x40000000000;
-        }
-
-        [NonEvent]
-        internal static void LogContentionLockCreated(nint LockID, nint AssociatedObjectID, ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogContentionLockCreated(LockID, AssociatedObjectID, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogContentionStart(ContentionFlagsMap ContentionFlags, ushort ClrInstanceID, nint LockID, nint AssociatedObjectID, ulong LockOwnerThreadID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogContentionStart((byte)ContentionFlags, ClrInstanceID, LockID, AssociatedObjectID, LockOwnerThreadID);
-        }
-
-        [NonEvent]
-        internal static void LogContentionStop(ContentionFlagsMap ContentionFlags, ushort ClrInstanceID, double DurationNs)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogContentionStop((byte)ContentionFlags, ClrInstanceID, DurationNs);
-        }
-
-        [NonEvent]
-        internal static void LogThreadPoolWorkerThreadStart(uint ActiveWorkerThreadCount, uint RetiredWorkerThreadCount, ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogThreadPoolWorkerThreadStart(ActiveWorkerThreadCount, RetiredWorkerThreadCount, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogThreadPoolWorkerThreadStop(uint ActiveWorkerThreadCount, uint RetiredWorkerThreadCount, ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogThreadPoolWorkerThreadStop(ActiveWorkerThreadCount, RetiredWorkerThreadCount, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogThreadPoolWorkerThreadWait(uint ActiveWorkerThreadCount, uint RetiredWorkerThreadCount, ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogThreadPoolWorkerThreadWait(ActiveWorkerThreadCount, RetiredWorkerThreadCount, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogThreadPoolMinMaxThreads(ushort MinWorkerThreads, ushort MaxWorkerThreads, ushort MinIOCompletionThreads, ushort MaxIOCompletionThreads, ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogThreadPoolMinMaxThreads(MinWorkerThreads, MaxWorkerThreads, MinIOCompletionThreads, MaxIOCompletionThreads, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogThreadPoolWorkerThreadAdjustmentSample(double Throughput, ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentSample(Throughput, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogThreadPoolWorkerThreadAdjustmentAdjustment(double AverageThroughput, uint NewWorkerThreadCount, ThreadAdjustmentReasonMap Reason, ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentAdjustment(AverageThroughput, NewWorkerThreadCount, (uint)Reason, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogThreadPoolWorkerThreadAdjustmentStats(
-            double Duration,
-            double Throughput,
-            double ThreadPoolWorkerThreadWait,
-            double ThroughputWave,
-            double ThroughputErrorEstimate,
-            double AverageThroughputErrorEstimate,
-            double ThroughputRatio,
-            double Confidence,
-            double NewControlSetting,
-            ushort NewThreadWaveMagnitude,
-            ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentStats(Duration, Throughput, ThreadPoolWorkerThreadWait, ThroughputWave,
-            ThroughputErrorEstimate, AverageThroughputErrorEstimate, ThroughputRatio, Confidence, NewControlSetting, NewThreadWaveMagnitude, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogThreadPoolIOEnqueue(
-            IntPtr NativeOverlapped,
-            IntPtr Overlapped,
-            [MarshalAs(UnmanagedType.Bool)] bool MultiDequeues,
-            ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogThreadPoolIOEnqueue(NativeOverlapped, Overlapped, MultiDequeues, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogThreadPoolIODequeue(
-            IntPtr NativeOverlapped,
-            IntPtr Overlapped,
-            ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogThreadPoolIODequeue(NativeOverlapped, Overlapped, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogThreadPoolWorkingThreadCount(
-            uint Count,
-            ushort ClrInstanceID
-        )
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogThreadPoolWorkingThreadCount(Count, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogThreadPoolIOPack(
-            IntPtr NativeOverlapped,
-            IntPtr Overlapped,
-            ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogThreadPoolIOPack(NativeOverlapped, Overlapped, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogWaitHandleWaitStart(
-            WaitHandleWaitSourceMap WaitSource,
-            IntPtr AssociatedObjectID,
-            ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogWaitHandleWaitStart((byte)WaitSource, AssociatedObjectID, ClrInstanceID);
-        }
-
-        [NonEvent]
-        internal static void LogWaitHandleWaitStop(ushort ClrInstanceID)
-        {
-            RuntimeImports.NativeRuntimeEventSource_LogWaitHandleWaitStop(ClrInstanceID);
-        }
-    }
-}
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/EventReporter.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/EventReporter.cs
new file mode 100644
index 000000000000..efb46025dbf5
--- /dev/null
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/EventReporter.cs
@@ -0,0 +1,190 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.IO;
+using System.Runtime;
+using System.Text;
+using System.Threading;
+
+namespace System
+{
+    internal class EventReporter
+    {
+        private readonly RhFailFastReason _eventType;
+        private readonly StringBuilder _description = new StringBuilder();
+        private bool _bufferFull;
+
+        public unsafe EventReporter(RhFailFastReason eventType)
+        {
+            _eventType = eventType;
+
+            string? processPath = Environment.ProcessPath;
+
+            _description.Append("Application: ");
+
+            // If we were able to get an app name.
+            if (processPath != null)
+            {
+                // If app name has a '\', consider the part after that; otherwise consider whole name.
+                _description.AppendLine(Path.GetFileName(processPath));
+            }
+            else
+            {
+                _description.AppendLine("unknown");
+            }
+
+            _description.Append("CoreCLR Version: ");
+
+            byte* utf8version = RuntimeImports.RhGetRuntimeVersion(out int cbLength);
+            _description.AppendLine(new string((sbyte*)utf8version));
+
+            switch (_eventType)
+            {
+                case RhFailFastReason.UnhandledException:
+                case RhFailFastReason.UnhandledExceptionFromPInvoke:
+                    _description.AppendLine("Description: The process was terminated due to an unhandled exception.");
+                    break;
+                case RhFailFastReason.EnvironmentFailFast:
+                case RhFailFastReason.AssertionFailure:
+                    _description.AppendLine("Description: The application requested process termination through System.Environment.FailFast.");
+                    break;
+                case RhFailFastReason.InternalError:
+                    _description.AppendLine("Description: The process was terminated due to an internal error in the .NET Runtime ");
+                    break;
+                default:
+                    Debug.Fail($"Unknown {nameof(RhFailFastReason)}");
+                    break;
+            }
+        }
+
+        public void AddDescription(string s)
+        {
+            Debug.Assert(_eventType is RhFailFastReason.UnhandledException
+                or RhFailFastReason.EnvironmentFailFast or RhFailFastReason.AssertionFailure
+                or RhFailFastReason.UnhandledExceptionFromPInvoke or RhFailFastReason.InternalError);
+            if (_eventType is RhFailFastReason.EnvironmentFailFast or RhFailFastReason.AssertionFailure)
+            {
+                _description.Append("Message: ");
+            }
+            else if (_eventType == RhFailFastReason.UnhandledException)
+            {
+                _description.Append("Exception Info: ");
+            }
+            _description.AppendLine(s);
+        }
+
+        public void BeginStackTrace()
+        {
+            Debug.Assert(_eventType is RhFailFastReason.UnhandledException
+                or RhFailFastReason.EnvironmentFailFast or RhFailFastReason.AssertionFailure
+                or RhFailFastReason.UnhandledExceptionFromPInvoke);
+            _description.AppendLine("Stack:");
+        }
+
+        public void AddStackTrace(string s)
+        {
+            // The (approx.) maximum size that EventLog appears to allow.
+            //
+            // An event entry comprises of string to be written and event header information.
+            // The total permissible length of the string and event header is 32K.
+            const int MAX_SIZE_EVENTLOG_ENTRY_STRING = 0x7C62; // decimal 31842
+
+            // Continue to append to the buffer until we are full
+            if (!_bufferFull)
+            {
+                _description.AppendLine(s);
+
+                // Truncate the buffer if we have exceeded the limit based upon the OS we are on
+                if (_description.Length > MAX_SIZE_EVENTLOG_ENTRY_STRING)
+                {
+                    // Load the truncation message
+                    string truncate = "\nThe remainder of the message was truncated.\n";
+
+                    int truncCount = truncate.Length;
+
+                    // Go back "truncCount" characters from the end of the string.
+                    int ext = MAX_SIZE_EVENTLOG_ENTRY_STRING - truncCount;
+
+                    // Now look for a "\n" from the last position we got
+                    for (; ext > 0 && _description[ext] != '\n'; ext--) ;
+
+                    // Truncate the string till our current position and append
+                    // the truncation message
+                    _description.Length = ext;
+
+                    _description.Append(truncate);
+
+                    // Set the flag that we are full - no point appending more stack details
+                    _bufferFull = true;
+                }
+            }
+        }
+
+        public void Report()
+        {
+            uint eventID;
+            switch (_eventType)
+            {
+                case RhFailFastReason.UnhandledException:
+                case RhFailFastReason.UnhandledExceptionFromPInvoke:
+                    eventID = 1026;
+                    break;
+                case RhFailFastReason.EnvironmentFailFast:
+                case RhFailFastReason.AssertionFailure:
+                    eventID = 1025;
+                    break;
+                case RhFailFastReason.InternalError:
+                    eventID = 1023;
+                    break;
+                default:
+                    Debug.Fail("Invalid event type");
+                    eventID = 1023;
+                    break;
+            }
+
+            if (_description.Length > 0)
+            {
+                ClrReportEvent(".NET Runtime",
+                       1 /* EVENTLOG_ERROR_TYPE */,
+                       0,
+                       eventID,
+                       _description.ToString()
+                       );
+            }
+        }
+
+        private static unsafe void ClrReportEvent(string eventSource, short type, ushort category, uint eventId, string message)
+        {
+            IntPtr handle = Interop.Advapi32.RegisterEventSource(
+                null, // uses local computer
+                eventSource);
+
+            if (handle == IntPtr.Zero)
+                return;
+
+            fixed (char* pMessage = message)
+            {
+                Interop.Advapi32.ReportEvent(handle, type, category, eventId, null, 1, 0, (nint)(&pMessage), null);
+            }
+
+            Interop.Advapi32.DeregisterEventSource(handle);
+        }
+
+        private static byte s_once;
+
+        public static bool ShouldLogInEventLog
+        {
+            get
+            {
+                if (Interop.Kernel32.IsDebuggerPresent())
+                    return false;
+
+                if (s_once == 1 || Interlocked.Exchange(ref s_once, 1) == 1)
+                    return false;
+
+                return true;
+            }
+        }
+    }
+}
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/GC.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/GC.NativeAot.cs
index 62ed9a722671..0ee16609157a 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/GC.NativeAot.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/GC.NativeAot.cs
@@ -695,7 +695,9 @@ public static unsafe IReadOnlyDictionary<string, object> GetConfigurationVariabl
                 Configurations = new Dictionary<string, object>()
             };
 
-            RuntimeImports.RhEnumerateConfigurationValues(Unsafe.AsPointer(ref context), &ConfigCallback);
+#pragma warning disable CS8500 // takes address of managed type
+            RuntimeImports.RhEnumerateConfigurationValues(&context, &ConfigCallback);
+#pragma warning restore CS8500
             return context.Configurations!;
         }
 
@@ -830,7 +832,9 @@ static T[] AllocateNewUninitializedArray(int length, bool pinned)
                     throw new OverflowException();
 
                 T[]? array = null;
-                RuntimeImports.RhAllocateNewArray(MethodTable.Of<T[]>(), (uint)length, (uint)flags, Unsafe.AsPointer(ref array));
+#pragma warning disable CS8500 // takes address of managed type
+                RuntimeImports.RhAllocateNewArray(MethodTable.Of<T[]>(), (uint)length, (uint)flags, &array);
+#pragma warning restore CS8500
                 if (array == null)
                     throw new OutOfMemoryException();
 
@@ -857,7 +861,9 @@ public static unsafe T[] AllocateArray<T>(int length, bool pinned = false)
                 throw new OverflowException();
 
             T[]? array = null;
-            RuntimeImports.RhAllocateNewArray(MethodTable.Of<T[]>(), (uint)length, (uint)flags, Unsafe.AsPointer(ref array));
+#pragma warning disable CS8500 // takes address of managed type
+            RuntimeImports.RhAllocateNewArray(MethodTable.Of<T[]>(), (uint)length, (uint)flags, &array);
+#pragma warning restore CS8500
             if (array == null)
                 throw new OutOfMemoryException();
 
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Math.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Math.NativeAot.cs
index 15b629dbee21..f5e6b88fbf27 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Math.NativeAot.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Math.NativeAot.cs
@@ -154,12 +154,6 @@ public static double Tanh(double value)
             return RuntimeImports.tanh(value);
         }
 
-        [Intrinsic]
-        private static double FMod(double x, double y)
-        {
-            return RuntimeImports.fmod(x, y);
-        }
-
         [Intrinsic]
         private static unsafe double ModF(double x, double* intptr)
         {
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/MathF.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/MathF.NativeAot.cs
index 2f42ad90e94c..3f430fba6314 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/MathF.NativeAot.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/MathF.NativeAot.cs
@@ -154,12 +154,6 @@ public static float Tanh(float x)
             return RuntimeImports.tanhf(x);
         }
 
-        [Intrinsic]
-        private static float FMod(float x, float y)
-        {
-            return RuntimeImports.fmodf(x, y);
-        }
-
         [Intrinsic]
         private static unsafe float ModF(float x, float* intptr)
         {
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs
index 727fbc9fbfdd..9f8dbe11a212 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs
@@ -41,7 +41,7 @@ protected internal unsafe object MemberwiseClone()
             if (this.GetMethodTable()->ContainsGCPointers)
                 Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount);
             else
-                Buffer.Memmove(ref dst, ref src, byteCount);
+                SpanHelpers.Memmove(ref dst, ref src, byteCount);
 
             return clone;
         }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/ConstructorInvoker.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/ConstructorInvoker.cs
index 57e446520b2e..52e2589b3e97 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/ConstructorInvoker.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/ConstructorInvoker.cs
@@ -53,7 +53,7 @@ public object Invoke(object? arg1)
                 ThrowForArgCountMismatch();
             }
 
-            object result = _methodBaseInvoker.CreateInstanceWithFewArgs(new Span<object?>(ref arg1, _parameterCount));
+            object result = _methodBaseInvoker.CreateInstanceWithFewArgs(new Span<object?>(ref arg1));
             DebugAnnotations.PreviousCallContainsDebuggerStepInCode();
             return result;
         }
@@ -67,9 +67,9 @@ public object Invoke(object? arg1, object? arg2)
             }
 
             StackAllocatedArguments argStorage = default;
-            argStorage._args.Set(0, arg1);
-            argStorage._args.Set(1, arg2);
-            object result = _methodBaseInvoker.CreateInstanceWithFewArgs(argStorage._args.AsSpan(_parameterCount));
+            argStorage._args[0] = arg1;
+            argStorage._args[1] = arg2;
+            object result = _methodBaseInvoker.CreateInstanceWithFewArgs(((Span<object?>)argStorage._args).Slice(0, 2));
             DebugAnnotations.PreviousCallContainsDebuggerStepInCode();
             return result;
         }
@@ -83,10 +83,10 @@ public object Invoke(object? arg1, object? arg2, object? arg3)
             }
 
             StackAllocatedArguments argStorage = default;
-            argStorage._args.Set(0, arg1);
-            argStorage._args.Set(1, arg2);
-            argStorage._args.Set(2, arg3);
-            object result = _methodBaseInvoker.CreateInstanceWithFewArgs(argStorage._args.AsSpan(_parameterCount));
+            argStorage._args[0] = arg1;
+            argStorage._args[1] = arg2;
+            argStorage._args[2] = arg3;
+            object result = _methodBaseInvoker.CreateInstanceWithFewArgs(((Span<object?>)argStorage._args).Slice(0, 3));
             DebugAnnotations.PreviousCallContainsDebuggerStepInCode();
             return result;
         }
@@ -100,11 +100,11 @@ public object Invoke(object? arg1, object? arg2, object? arg3, object? arg4)
             }
 
             StackAllocatedArguments argStorage = default;
-            argStorage._args.Set(0, arg1);
-            argStorage._args.Set(1, arg2);
-            argStorage._args.Set(2, arg3);
-            argStorage._args.Set(3, arg4);
-            object result = _methodBaseInvoker.CreateInstanceWithFewArgs(argStorage._args.AsSpan(_parameterCount));
+            argStorage._args[0] = arg1;
+            argStorage._args[1] = arg2;
+            argStorage._args[2] = arg3;
+            argStorage._args[3] = arg4;
+            object result = _methodBaseInvoker.CreateInstanceWithFewArgs(((Span<object?>)argStorage._args).Slice(0, 4));
             DebugAnnotations.PreviousCallContainsDebuggerStepInCode();
             return result;
         }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/DynamicInvokeInfo.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/DynamicInvokeInfo.cs
index 012e410885b8..6ddac7c3d3de 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/DynamicInvokeInfo.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/DynamicInvokeInfo.cs
@@ -497,10 +497,9 @@ private unsafe ref byte InvokeWithFewArguments(
             object?[] parameters, BinderBundle? binderBundle, bool wrapInTargetInvocationException)
         {
             Debug.Assert(_argumentCount <= MaxStackAllocArgCount);
-            int argCount = _argumentCount;
 
             StackAllocatedArguments argStorage = default;
-            Span<object?> copyOfParameters = argStorage._args.AsSpan(argCount);
+            Span<object?> copyOfParameters = ((Span<object?>)argStorage._args).Slice(0, _argumentCount);
             StackAllocatedByRefs byrefStorage = default;
 #pragma warning disable CS8500
             void* pByRefStorage = (ByReference*)&byrefStorage;
@@ -532,10 +531,9 @@ private unsafe ref byte InvokeWithFewArguments(
             IntPtr methodToCall, ref byte thisArg, ref byte ret, Span<object?> parameters)
         {
             Debug.Assert(_argumentCount <= MaxStackAllocArgCount);
-            int argCount = _argumentCount;
 
             StackAllocatedArguments argStorage = default;
-            Span<object?> copyOfParameters = argStorage._args.AsSpan(argCount);
+            Span<object?> copyOfParameters = ((Span<object?>)argStorage._args).Slice(0, _argumentCount);
             StackAllocatedByRefs byrefStorage = default;
 #pragma warning disable CS8500
             void* pByRefStorage = (ByReference*)&byrefStorage;
@@ -884,19 +882,6 @@ private unsafe object ReturnTransform(ref byte byref, bool wrapInTargetInvocatio
         internal struct ArgumentData<T>
         {
             private T _arg0;
-
-            [UnscopedRef]
-            public Span<T> AsSpan(int length)
-            {
-                Debug.Assert((uint)length <= MaxStackAllocArgCount);
-                return new Span<T>(ref _arg0, length);
-            }
-
-            public void Set(int index, T value)
-            {
-                Debug.Assert((uint)index < MaxStackAllocArgCount);
-                Unsafe.Add(ref _arg0, index) = value;
-            }
         }
 
         // Helper struct to avoid intermediate object[] allocation in calls to the native reflection stack.
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/MethodInvoker.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/MethodInvoker.cs
index 847914850f99..661f7f285352 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/MethodInvoker.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/MethodInvoker.cs
@@ -81,10 +81,10 @@ public static MethodInvoker Create(MethodBase method)
             }
 
             StackAllocatedArguments argStorage = default;
-            argStorage._args.Set(0, arg1);
-            argStorage._args.Set(1, arg2);
+            argStorage._args[0] = arg1;
+            argStorage._args[1] = arg2;
 
-            object? result = _methodBaseInvoker.InvokeDirectWithFewArgs(obj, argStorage._args.AsSpan(_parameterCount));
+            object? result = _methodBaseInvoker.InvokeDirectWithFewArgs(obj, ((Span<object?>)argStorage._args).Slice(0, 2));
             DebugAnnotations.PreviousCallContainsDebuggerStepInCode();
             return result;
         }
@@ -98,11 +98,11 @@ public static MethodInvoker Create(MethodBase method)
             }
 
             StackAllocatedArguments argStorage = default;
-            argStorage._args.Set(0, arg1);
-            argStorage._args.Set(1, arg2);
-            argStorage._args.Set(2, arg3);
+            argStorage._args[0] = arg1;
+            argStorage._args[1] = arg2;
+            argStorage._args[2] = arg3;
 
-            object? result = _methodBaseInvoker.InvokeDirectWithFewArgs(obj, argStorage._args.AsSpan(_parameterCount));
+            object? result = _methodBaseInvoker.InvokeDirectWithFewArgs(obj, ((Span<object?>)argStorage._args).Slice(0, 3));
             DebugAnnotations.PreviousCallContainsDebuggerStepInCode();
             return result;
         }
@@ -116,12 +116,12 @@ public static MethodInvoker Create(MethodBase method)
             }
 
             StackAllocatedArguments argStorage = default;
-            argStorage._args.Set(0, arg1);
-            argStorage._args.Set(1, arg2);
-            argStorage._args.Set(2, arg3);
-            argStorage._args.Set(3, arg4);
+            argStorage._args[0] = arg1;
+            argStorage._args[1] = arg2;
+            argStorage._args[2] = arg3;
+            argStorage._args[3] = arg4;
 
-            object? result = _methodBaseInvoker.InvokeDirectWithFewArgs(obj, argStorage._args.AsSpan(_parameterCount));
+            object? result = _methodBaseInvoker.InvokeDirectWithFewArgs(obj, ((Span<object?>)argStorage._args).Slice(0, 4));
             DebugAnnotations.PreviousCallContainsDebuggerStepInCode();
             return result;
         }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/CustomMethodInvoker.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/CustomMethodInvoker.cs
index fbf56e6e4603..f2c15681c288 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/CustomMethodInvoker.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/CustomMethodInvoker.cs
@@ -36,7 +36,7 @@ public CustomMethodInvoker(Type thisType, Type[] parameterTypes, InvokerOptions
             if (!(thisObject == null && 0 != (_options & InvokerOptions.AllowNullThis)))
                 ValidateThis(thisObject, _thisType.TypeHandle);
 
-            int argCount = (arguments != null) ? arguments.Length : 0;
+            int argCount = arguments.Length;
             if (argCount != _parameterTypes.Length)
                 throw new TargetParameterCountException();
 
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/RuntimeNamedMethodInfo.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/RuntimeNamedMethodInfo.cs
index 68554ad27168..a0a726a4607e 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/RuntimeNamedMethodInfo.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/MethodInfos/RuntimeNamedMethodInfo.cs
@@ -309,7 +309,7 @@ protected sealed override MethodBaseInvoker UncachedMethodInvoker
                 if (invoker != null)
                     return invoker;
 
-                return GetUncachedMethodInvoker(Array.Empty<RuntimeTypeInfo>(), this);
+                return GetUncachedMethodInvoker(GenericTypeParameters, this);
             }
         }
 
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/RuntimeAssembly.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/RuntimeAssembly.cs
index 24a10f91faa7..9587d6f2d67f 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/RuntimeAssembly.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/RuntimeAssembly.cs
@@ -1,10 +1,34 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Globalization;
+
+using Internal.Reflection.Augments;
+
 namespace System.Reflection
 {
     // Base class for runtime implemented Assembly
     public abstract class RuntimeAssembly : Assembly
     {
+        internal static Assembly? InternalGetSatelliteAssembly(Assembly mainAssembly, CultureInfo culture, Version? version, bool throwOnFileNotFound)
+        {
+            AssemblyName mainAssemblyAn = mainAssembly.GetName();
+            AssemblyName an = new AssemblyName();
+
+            an.CultureInfo = culture;
+            an.Name = mainAssemblyAn.Name + ".resources";
+            an.SetPublicKeyToken(mainAssemblyAn.GetPublicKeyToken());
+            an.Flags = mainAssemblyAn.Flags;
+            an.Version = version ?? mainAssemblyAn.Version;
+
+            Assembly? retAssembly = ReflectionAugments.ReflectionCoreCallbacks.Load(an, throwOnFileNotFound);
+
+            if (retAssembly == mainAssembly)
+            {
+                retAssembly = null;
+            }
+
+            return retAssembly;
+        }
     }
 }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.NativeAot.cs
deleted file mode 100644
index f16c1ba38923..000000000000
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.NativeAot.cs
+++ /dev/null
@@ -1,41 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Globalization;
-using System.IO;
-using System.Reflection;
-using System.Text;
-
-using Internal.Reflection.Augments;
-
-namespace System.Resources
-{
-    internal partial class ManifestBasedResourceGroveler
-    {
-        // Internal version of GetSatelliteAssembly that avoids throwing FileNotFoundException
-        private static Assembly? InternalGetSatelliteAssembly(Assembly mainAssembly,
-                                                             CultureInfo culture,
-                                                             Version? version)
-        {
-            AssemblyName mainAssemblyAn = mainAssembly.GetName();
-            AssemblyName an = new AssemblyName();
-
-            an.CultureInfo = culture;
-            an.Name = mainAssemblyAn.Name + ".resources";
-            an.SetPublicKeyToken(mainAssemblyAn.GetPublicKeyToken());
-            an.Flags = mainAssemblyAn.Flags;
-            an.Version = version ?? mainAssemblyAn.Version;
-
-            Assembly? retAssembly = ReflectionAugments.ReflectionCoreCallbacks.Load(an, false);
-
-            if (retAssembly == mainAssembly)
-            {
-                retAssembly = null;
-            }
-
-            return retAssembly;
-        }
-    }
-}
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeFeature.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeFeature.NativeAot.cs
index 2babd66c3597..d91f79ce16a2 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeFeature.NativeAot.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeFeature.NativeAot.cs
@@ -1,11 +1,16 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Diagnostics.CodeAnalysis;
+
 namespace System.Runtime.CompilerServices
 {
     public static partial class RuntimeFeature
     {
+        [FeatureSwitchDefinition("System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeSupported")]
         public static bool IsDynamicCodeSupported => false;
+
+        [FeatureGuard(typeof(RequiresDynamicCodeAttribute))]
         public static bool IsDynamicCodeCompiled => false;
     }
 }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.NativeAot.cs
index 262ddb9857b7..c790682ecfcb 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.NativeAot.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.NativeAot.cs
@@ -361,6 +361,44 @@ public static unsafe object GetUninitializedObject(
 
             return RuntimeImports.RhNewObject(mt);
         }
+
+        /// <summary>
+        /// Create a boxed object of the specified type from the data located at the target reference.
+        /// </summary>
+        /// <param name="target">The target data</param>
+        /// <param name="type">The type of box to create.</param>
+        /// <returns>A boxed object containing the specified data.</returns>
+        /// <exception cref="ArgumentNullException">The specified type handle is <c>null</c>.</exception>
+        /// <exception cref="ArgumentException">The specified type cannot have a boxed instance of itself created.</exception>
+        /// <exception cref="NotSupportedException">The passed in type is a by-ref-like type.</exception>
+        public static unsafe object? Box(ref byte target, RuntimeTypeHandle type)
+        {
+            if (type.IsNull)
+                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.type);
+
+            MethodTable* mt = type.ToMethodTable();
+
+            if (mt->ElementType == EETypeElementType.Void || mt->IsGenericTypeDefinition || mt->IsByRef || mt->IsPointer || mt->IsFunctionPointer)
+                throw new ArgumentException(SR.Arg_TypeNotSupported);
+
+            if (mt->NumVtableSlots == 0)
+            {
+                // This is a type without a vtable or GCDesc. We must not allow creating an instance of it
+                throw ReflectionCoreExecution.ExecutionEnvironment.CreateMissingMetadataException(Type.GetTypeFromHandle(type));
+            }
+            // Paranoid check: not-meant-for-GC-heap types should be reliably identifiable by empty vtable.
+            Debug.Assert(!mt->ContainsGCPointers || RuntimeImports.RhGetGCDescSize(mt) != 0);
+
+            if (!mt->IsValueType)
+            {
+                return Unsafe.As<byte, object>(ref target);
+            }
+
+            if (mt->IsByRefLike)
+                throw new NotSupportedException(SR.NotSupported_ByRefLike);
+
+            return RuntimeImports.RhBox(mt, ref target);
+        }
     }
 
     // CLR arrays are laid out in memory as follows (multidimensional array bounds are optional):
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs
index a3ccfc5a8c43..490997c1da90 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs
@@ -93,7 +93,7 @@ internal static unsafe void PtrToStructureImpl(IntPtr ptr, object structure)
             {
                 nuint size = (nuint)RuntimeInteropData.GetStructUnsafeStructSize(structureTypeHandle);
 
-                Buffer.Memmove(ref structure.GetRawData(), ref *(byte*)ptr, size);
+                SpanHelpers.Memmove(ref structure.GetRawData(), ref *(byte*)ptr, size);
             }
         }
 
@@ -180,7 +180,7 @@ public static unsafe void StructureToPtr(object structure, IntPtr ptr, bool fDel
             {
                 nuint size = (nuint)RuntimeInteropData.GetStructUnsafeStructSize(structureTypeHandle);
 
-                Buffer.Memmove(ref *(byte*)ptr, ref structure.GetRawData(), size);
+                SpanHelpers.Memmove(ref *(byte*)ptr, ref structure.GetRawData(), size);
             }
         }
 
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeFunctionPointerWrapper.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeFunctionPointerWrapper.cs
index bb3ea2b5f10e..4571165530ab 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeFunctionPointerWrapper.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeFunctionPointerWrapper.cs
@@ -12,14 +12,9 @@ internal abstract class NativeFunctionPointerWrapper
     {
         public NativeFunctionPointerWrapper(IntPtr nativeFunctionPointer)
         {
-            m_nativeFunctionPointer = nativeFunctionPointer;
+            NativeFunctionPointer = nativeFunctionPointer;
         }
 
-        private IntPtr m_nativeFunctionPointer;
-
-        public IntPtr NativeFunctionPointer
-        {
-            get { return m_nativeFunctionPointer; }
-        }
+        public IntPtr NativeFunctionPointer { get; }
     }
 }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs
index bf7fa122af6c..0e1ad8d04865 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs
@@ -56,7 +56,7 @@ public static unsafe IntPtr GetFunctionPointerForDelegate(Delegate del)
                 throw new ArgumentException(SR.Argument_NeedNonGenericType, "delegate");
 #pragma warning restore CA2208
 
-            NativeFunctionPointerWrapper? fpWrapper = del.Target as NativeFunctionPointerWrapper;
+            NativeFunctionPointerWrapper? fpWrapper = del.TryGetNativeFunctionPointerWrapper();
             if (fpWrapper != null)
             {
                 //
@@ -104,64 +104,71 @@ internal unsafe struct ThunkContextData
             public IntPtr FunctionPtr;     // Function pointer for open static delegates
         }
 
-        internal sealed class PInvokeDelegateThunk
+        internal sealed unsafe class PInvokeDelegateThunk
         {
-            public IntPtr Thunk;        //  Thunk pointer
-            public IntPtr ContextData;  //  ThunkContextData pointer which will be stored in the context slot of the thunk
+            public readonly IntPtr Thunk;        //  Thunk pointer
+            public readonly IntPtr ContextData;  //  ThunkContextData pointer which will be stored in the context slot of the thunk
 
             public PInvokeDelegateThunk(Delegate del)
             {
-
                 Thunk = RuntimeAugments.AllocateThunk(s_thunkPoolHeap);
-                Debug.Assert(Thunk != IntPtr.Zero);
-
                 if (Thunk == IntPtr.Zero)
                 {
-                    // We've either run out of memory, or failed to allocate a new thunk due to some other bug. Now we should fail fast
-                    Environment.FailFast("Insufficient number of thunks.");
+                    throw new OutOfMemoryException();
                 }
-                else
-                {
-                    //
-                    // Allocate unmanaged memory for GCHandle of delegate and function pointer of open static delegate
-                    // We will store this pointer on the context slot of thunk data
-                    //
-                    unsafe
-                    {
-                        ContextData = (IntPtr)NativeMemory.Alloc((nuint)(2 * IntPtr.Size));
 
-                        ThunkContextData* thunkData = (ThunkContextData*)ContextData;
+                //
+                //  For open static delegates set target to ReverseOpenStaticDelegateStub which calls the static function pointer directly
+                //
+                IntPtr openStaticFunctionPointer = del.TryGetOpenStaticFunctionPointer();
+
+                //
+                // Allocate unmanaged memory for GCHandle of delegate and function pointer of open static delegate
+                // We will store this pointer on the context slot of thunk data
+                //
+                unsafe
+                {
+                    ContextData = (IntPtr)NativeMemory.AllocZeroed((nuint)(2 * IntPtr.Size));
 
-                        // allocate a weak GChandle for the delegate
-                        thunkData->Handle = GCHandle.Alloc(del, GCHandleType.Weak);
+                    ThunkContextData* thunkData = (ThunkContextData*)ContextData;
 
-                        // if it is an open static delegate get the function pointer
-                        if (del.IsOpenStatic)
-                            thunkData->FunctionPtr = del.GetFunctionPointer(out RuntimeTypeHandle _, out bool _, out bool _);
-                        else
-                            thunkData->FunctionPtr = default;
-                    }
+                    // allocate a weak GChandle for the delegate
+                    thunkData->Handle = GCHandle.Alloc(del, GCHandleType.WeakTrackResurrection);
+                    thunkData->FunctionPtr = openStaticFunctionPointer;
                 }
+
+                IntPtr pTarget = RuntimeInteropData.GetDelegateMarshallingStub(new RuntimeTypeHandle(del.GetMethodTable()), openStaticFunctionPointer != IntPtr.Zero);
+                Debug.Assert(pTarget != IntPtr.Zero);
+
+                RuntimeAugments.SetThunkData(s_thunkPoolHeap, Thunk, ContextData, pTarget);
             }
 
             ~PInvokeDelegateThunk()
             {
-                // Free the thunk
-                RuntimeAugments.FreeThunk(s_thunkPoolHeap, Thunk);
-                unsafe
+                if (ContextData != IntPtr.Zero)
                 {
-                    if (ContextData != IntPtr.Zero)
+                    // free the GCHandle
+                    GCHandle handle = ((ThunkContextData*)ContextData)->Handle;
+                    if (handle.IsAllocated)
                     {
-                        // free the GCHandle
-                        GCHandle handle = ((ThunkContextData*)ContextData)->Handle;
-                        if (handle.IsAllocated)
+                        // If the delegate is still alive, defer finalization.
+                        if (handle.Target != null)
                         {
-                            handle.Free();
+                            GC.ReRegisterForFinalize(this);
+                            return;
                         }
 
-                        // Free the allocated context data memory
-                        NativeMemory.Free((void*)ContextData);
+                        handle.Free();
                     }
+
+                    // Free the allocated context data memory
+                    NativeMemory.Free((void*)ContextData);
+                }
+
+                // Free the thunk
+                if (Thunk != IntPtr.Zero)
+                {
+                    RuntimeAugments.FreeThunk(s_thunkPoolHeap, Thunk);
                 }
             }
         }
@@ -179,19 +186,7 @@ private static unsafe PInvokeDelegateThunk AllocateThunk(Delegate del)
                 Debug.Assert(s_thunkPoolHeap != null);
             }
 
-            var delegateThunk = new PInvokeDelegateThunk(del);
-
-            //
-            //  For open static delegates set target to ReverseOpenStaticDelegateStub which calls the static function pointer directly
-            //
-            bool openStaticDelegate = del.IsOpenStatic;
-
-            IntPtr pTarget = RuntimeInteropData.GetDelegateMarshallingStub(new RuntimeTypeHandle(del.GetMethodTable()), openStaticDelegate);
-            Debug.Assert(pTarget != IntPtr.Zero);
-
-            RuntimeAugments.SetThunkData(s_thunkPoolHeap, delegateThunk.Thunk, delegateThunk.ContextData, pTarget);
-
-            return delegateThunk;
+            return new PInvokeDelegateThunk(del);
         }
 
         /// <summary>
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs
index 3f30983a74c3..17a0bca07e13 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs
@@ -42,11 +42,9 @@ internal static partial class RuntimeImports
 
         [LibraryImport(RuntimeLibrary)]
         [SuppressGCTransition]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static partial ulong RhpGetTickCount64();
 
         [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static partial IntPtr RhpGetCurrentThread();
 
         [MethodImpl(MethodImplOptions.InternalCall)]
@@ -166,15 +164,12 @@ internal static void RhWaitForPendingFinalizers(bool allowReentrantWait)
         internal static extern long RhGetLastGCDuration(int generation);
 
         [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static unsafe partial IntPtr RhRegisterFrozenSegment(void* pSegmentStart, nuint allocSize, nuint commitSize, nuint reservedSize);
 
         [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static unsafe partial void RhUpdateFrozenSegment(IntPtr seg, void* allocated, void* committed);
 
         [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static partial void RhUnregisterFrozenSegment(IntPtr pSegmentHandle);
 
         [MethodImpl(MethodImplOptions.InternalCall)]
@@ -221,7 +216,6 @@ internal enum GCConfigurationType
         }
 
         [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static unsafe partial void RhEnumerateConfigurationValues(void* configurationContext, delegate* unmanaged<void*, void*, void*, GCConfigurationType, long, void> callback);
 
         internal struct GCHeapHardLimitInfo
@@ -237,19 +231,15 @@ internal struct GCHeapHardLimitInfo
         }
 
         [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static partial int RhRefreshMemoryLimit(GCHeapHardLimitInfo heapHardLimitInfo);
 
         [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static unsafe partial int RhEnableNoGCRegionCallback(void* callback, long totalSize);
 
         [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static partial long RhGetGenerationBudget(int generation);
 
         [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static partial long RhGetTotalAllocatedBytesPrecise();
 
         [MethodImpl(MethodImplOptions.InternalCall)]
@@ -375,7 +365,6 @@ internal static IntPtr RhHandleAllocDependent(object primary, object secondary)
         internal static extern int RhpGetThunkBlockSize();
 
         [LibraryImport(RuntimeLibrary, EntryPoint = "RhAllocateThunksMapping")]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static partial IntPtr RhAllocateThunksMapping();
 
         //
@@ -427,22 +416,18 @@ internal static IntPtr RhHandleAllocDependent(object primary, object secondary)
         // Busy spin for the given number of iterations.
         [LibraryImport(RuntimeLibrary, EntryPoint = "RhSpinWait")]
         [SuppressGCTransition]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static partial void RhSpinWait(int iterations);
 
         // Call RhSpinWait with a GC transition
         [LibraryImport(RuntimeLibrary, EntryPoint = "RhSpinWait")]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static partial void RhLongSpinWait(int iterations);
 
         // Yield the cpu to another thread ready to process, if one is available.
         [LibraryImport(RuntimeLibrary, EntryPoint = "RhYield")]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         private static partial int _RhYield();
         internal static bool RhYield() { return (_RhYield() != 0); }
 
         [LibraryImport(RuntimeLibrary, EntryPoint = "RhFlushProcessWriteBuffers")]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static partial void RhFlushProcessWriteBuffers();
 
 #if !TARGET_UNIX
@@ -652,183 +637,14 @@ internal static IntPtr RhGetModuleSection(TypeManagerHandle module, ReadyToRunSe
         [RuntimeImport(RuntimeLibrary, "RhUnregisterForGCReporting")]
         internal static extern unsafe void RhUnregisterForGCReporting(GCFrameRegistration* pRegistration);
 
-
 #if FEATURE_PERFTRACING
-        //
-        // EventPipeInternal helpers.
-        //
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static unsafe partial ulong RhEventPipeInternal_Enable(
-            char* outputFile,
-            int format,
-            uint circularBufferSizeInMB,
-            void* providers,
-            uint numProviders);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void RhEventPipeInternal_Disable(ulong sessionID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static unsafe partial IntPtr RhEventPipeInternal_CreateProvider(char* providerName, IntPtr callbackFunc, IntPtr callbackContext);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static unsafe partial IntPtr RhEventPipeInternal_DefineEvent(
-            IntPtr provHandle,
-            uint eventID,
-            long keywords,
-            uint eventVersion,
-            uint level,
-            void *pMetadata,
-            uint metadataLength);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static unsafe partial IntPtr RhEventPipeInternal_GetProvider(char* providerName);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void RhEventPipeInternal_DeleteProvider(IntPtr provHandle);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static unsafe partial int RhEventPipeInternal_EventActivityIdControl(uint controlCode, Guid* activityId);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static unsafe partial void RhEventPipeInternal_WriteEventData(
-            IntPtr eventHandle,
-            void* pEventData,
-            uint dataCount,
-            Guid* activityId,
-            Guid* relatedActivityId);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static unsafe partial uint RhEventPipeInternal_GetSessionInfo(ulong sessionID, void* pSessionInfo);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static unsafe partial uint RhEventPipeInternal_GetNextEvent(ulong sessionID, void* pInstance);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial uint RhEventPipeInternal_SignalSession(ulong sessionID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial uint RhEventPipeInternal_WaitForSessionSignal(ulong sessionID, int timeoutMs);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogContentionLockCreated(nint LockID, nint AssociatedObjectID, ushort ClrInstanceID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogContentionStart(byte ContentionFlags, ushort ClrInstanceID, nint LockID, nint AssociatedObjectID, ulong LockOwnerThreadID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogContentionStop(byte ContentionFlags, ushort ClrInstanceID, double DurationNs);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogThreadPoolWorkerThreadStart(uint ActiveWorkerThreadCount, uint RetiredWorkerThreadCount, ushort ClrInstanceID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogThreadPoolWorkerThreadStop(uint ActiveWorkerThreadCount, uint RetiredWorkerThreadCount, ushort ClrInstanceID);
-
         [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogThreadPoolWorkerThreadWait(uint ActiveWorkerThreadCount, uint RetiredWorkerThreadCount, ushort ClrInstanceID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogThreadPoolMinMaxThreads(ushort MinWorkerThreads, ushort MaxWorkerThreads, ushort MinIOCompletionThreads, ushort MaxIOCompletionThreads, ushort ClrInstanceID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentSample(double Throughput, ushort ClrInstanceID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentAdjustment(double AverageThroughput, uint NewWorkerThreadCount, uint Reason, ushort ClrInstanceID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentStats(
-            double Duration,
-            double Throughput,
-            double ThreadPoolWorkerThreadWait,
-            double ThroughputWave,
-            double ThroughputErrorEstimate,
-            double AverageThroughputErrorEstimate,
-            double ThroughputRatio,
-            double Confidence,
-            double NewControlSetting,
-            ushort NewThreadWaveMagnitude,
-            ushort ClrInstanceID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogThreadPoolIOEnqueue(
-            IntPtr NativeOverlapped,
-            IntPtr Overlapped,
-            [MarshalAs(UnmanagedType.Bool)] bool MultiDequeues,
-            ushort ClrInstanceID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogThreadPoolIODequeue(
-            IntPtr NativeOverlapped,
-            IntPtr Overlapped,
-            ushort ClrInstanceID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogThreadPoolWorkingThreadCount(
-            uint Count,
-            ushort ClrInstanceID
-        );
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogThreadPoolIOPack(
-            IntPtr NativeOverlapped,
-            IntPtr Overlapped,
-            ushort ClrInstanceID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static unsafe partial void NativeRuntimeEventSource_LogExceptionThrown(char* exceptionTypeName, char* exceptionMessage, IntPtr faultingIP, long hresult);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogWaitHandleWaitStart(
-            byte WaitSource,
-            IntPtr AssociatedObjectID,
-            ushort ClrInstanceID);
-
-        [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
-        internal static partial void NativeRuntimeEventSource_LogWaitHandleWaitStop(ushort ClrInstanceID);
+        internal static unsafe partial void NativeRuntimeEventSource_LogExceptionThrown(char* exceptionTypeName, char* exceptionMessage, IntPtr faultingIP, int hresult);
 #endif // FEATURE_PERFTRACING
 
         //
         // Interlocked helpers
         //
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        [RuntimeImport(RuntimeLibrary, "RhpLockCmpXchg8")]
-        internal static extern byte InterlockedCompareExchange(ref byte location1, byte value, byte comparand);
-
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        [RuntimeImport(RuntimeLibrary, "RhpLockCmpXchg16")]
-        internal static extern short InterlockedCompareExchange(ref short location1, short value, short comparand);
-
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "RhpLockCmpXchg32")]
         internal static extern int InterlockedCompareExchange(ref int location1, int value, int comparand);
@@ -845,265 +661,216 @@ internal static partial void NativeRuntimeEventSource_LogWaitHandleWaitStart(
         [RuntimeImport(RuntimeLibrary, "RhpCheckedXchg")]
         internal static extern object InterlockedExchange([NotNullIfNotNull(nameof(value))] ref object? location1, object? value);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "acos")]
         internal static extern double acos(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "acosf")]
         internal static extern float acosf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "acosh")]
         internal static extern double acosh(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "acoshf")]
         internal static extern float acoshf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "asin")]
         internal static extern double asin(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "asinf")]
         internal static extern float asinf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "asinh")]
         internal static extern double asinh(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "asinhf")]
         internal static extern float asinhf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "atan")]
         internal static extern double atan(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "atanf")]
         internal static extern float atanf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "atan2")]
         internal static extern double atan2(double y, double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "atan2f")]
         internal static extern float atan2f(float y, float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "atanh")]
         internal static extern double atanh(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "atanhf")]
         internal static extern float atanhf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "cbrt")]
         internal static extern double cbrt(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "cbrtf")]
         internal static extern float cbrtf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "ceil")]
         internal static extern double ceil(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "ceilf")]
         internal static extern float ceilf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "cos")]
         internal static extern double cos(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "cosf")]
         internal static extern float cosf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "cosh")]
         internal static extern double cosh(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "coshf")]
         internal static extern float coshf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "exp")]
         internal static extern double exp(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "expf")]
         internal static extern float expf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "floor")]
         internal static extern double floor(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "floorf")]
         internal static extern float floorf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "log")]
         internal static extern double log(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "logf")]
         internal static extern float logf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "log2")]
         internal static extern double log2(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "log2f")]
         internal static extern float log2f(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "log10")]
         internal static extern double log10(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "log10f")]
         internal static extern float log10f(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "pow")]
         internal static extern double pow(double x, double y);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "powf")]
         internal static extern float powf(float x, float y);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "sin")]
         internal static extern double sin(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "sinf")]
         internal static extern float sinf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "sinh")]
         internal static extern double sinh(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "sinhf")]
         internal static extern float sinhf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "sqrt")]
         internal static extern double sqrt(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "sqrtf")]
         internal static extern float sqrtf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "tan")]
         internal static extern double tan(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "tanf")]
         internal static extern float tanf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "tanh")]
         internal static extern double tanh(double x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "tanhf")]
         internal static extern float tanhf(float x);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "fmod")]
         internal static extern double fmod(double x, double y);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "fmodf")]
         internal static extern float fmodf(float x, float y);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "fma")]
         internal static extern double fma(double x, double y, double z);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "fmaf")]
         internal static extern float fmaf(float x, float y, float z);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "modf")]
         internal static extern unsafe double modf(double x, double* intptr);
 
-        [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "modff")]
         internal static extern unsafe float modff(float x, float* intptr);
 
         [LibraryImport(RuntimeImports.RuntimeLibrary)]
+        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static unsafe partial void* memmove(byte* dmem, byte* smem, nuint size);
 
         [LibraryImport(RuntimeImports.RuntimeLibrary)]
+        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static unsafe partial void* memset(byte* mem, int value, nuint size);
 
 #if TARGET_X86 || TARGET_AMD64
         [LibraryImport(RuntimeLibrary)]
-        [UnmanagedCallConv(CallConvs = [typeof(CallConvCdecl)])]
         internal static unsafe partial void RhCpuIdEx(int* cpuInfo, int functionId, int subFunctionId);
 #endif
     }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/RuntimeExceptionHelpers.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/RuntimeExceptionHelpers.cs
index 427f75014ec5..0ed5f375cc27 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/RuntimeExceptionHelpers.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/RuntimeExceptionHelpers.cs
@@ -228,6 +228,27 @@ internal static unsafe void FailFast(string? message = null, Exception? exceptio
                         Internal.Console.Error.WriteLine();
                     }
 
+#if TARGET_WINDOWS
+                    if (EventReporter.ShouldLogInEventLog)
+                    {
+                        var reporter = new EventReporter(reason);
+                        if (exception != null && reason is not RhFailFastReason.AssertionFailure)
+                        {
+                            reporter.AddDescription($"{exception.GetType()}: {exception.Message}");
+                            reporter.AddStackTrace(exception.StackTrace);
+                        }
+                        else
+                        {
+                            if (message != null)
+                                reporter.AddDescription(message);
+                            reporter.BeginStackTrace();
+                            reporter.AddStackTrace(new StackTrace().ToString());
+                        }
+
+                        reporter.Report();
+                    }
+#endif
+
                     if (exception != null)
                     {
                         crashInfo.WriteException(exception);
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Condition.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Condition.cs
index 1c6cdadaf022..ba8aba61a0a1 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Condition.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Condition.cs
@@ -89,7 +89,9 @@ private unsafe void RemoveWaiter(Waiter waiter)
 
         public Condition(Lock @lock)
         {
+#pragma warning disable CS9216 // A value of type 'System.Threading.Lock' converted to a different type will use likely unintended monitor-based locking in 'lock' statement.
             ArgumentNullException.ThrowIfNull(@lock);
+#pragma warning restore CS9216
             _lock = @lock;
         }
 
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.Wasm.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.Wasm.cs
index 0c78d815ccb7..4a4f185a322e 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.Wasm.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.Wasm.cs
@@ -19,12 +19,6 @@ namespace System.Threading
     //
     public static partial class Interlocked
     {
-        [Intrinsic]
-        public static unsafe byte CompareExchange(ref byte location1, byte value, byte comparand) => CompareExchange(ref location1, value, comparand);
-
-        [Intrinsic]
-        public static unsafe short CompareExchange(ref short location1, short value, short comparand) => CompareExchange(ref location1, value, comparand);
-
         [Intrinsic]
         public static unsafe int CompareExchange(ref int location1, int value, int comparand) => CompareExchange(ref location1, value, comparand);
 
@@ -50,12 +44,6 @@ public static T CompareExchange<T>(ref T location1, T value, T comparand) where
             return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand);
         }
 
-        [Intrinsic]
-        public static byte Exchange(ref byte location1, byte value) => Exchange(ref location1, value);
-
-        [Intrinsic]
-        public static short Exchange(ref short location1, short value) => Exchange(ref location1, value);
-
         [Intrinsic]
         public static int Exchange(ref int location1, int value) => Exchange(ref location1, value);
 
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs
index 28de476b6eb5..82d6aad7a06e 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs
@@ -12,42 +12,27 @@ public static partial class Interlocked
 #if !TARGET_WASM
         #region CompareExchange
 
-        [Intrinsic]
-        public static byte CompareExchange(ref byte location1, byte value, byte comparand)
-        {
-#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
-            return CompareExchange(ref location1, value, comparand);
-#else
-            return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand);
-#endif
-        }
-
-        [Intrinsic]
-        public static short CompareExchange(ref short location1, short value, short comparand)
-        {
-#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
-            return CompareExchange(ref location1, value, comparand);
-#else
-            return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand);
-#endif
-        }
-
         [Intrinsic]
         public static int CompareExchange(ref int location1, int value, int comparand)
         {
 #if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64
-            return CompareExchange(ref location1, value, comparand);
+            return CompareExchange(ref location1, value, comparand); // Must expand intrinsic
 #else
+            if (Unsafe.IsNullRef(ref location1))
+                ThrowHelper.ThrowNullReferenceException();
             return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand);
 #endif
         }
 
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static long CompareExchange(ref long location1, long value, long comparand)
         {
 #if TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64
-            return CompareExchange(ref location1, value, comparand);
+            return CompareExchange(ref location1, value, comparand); // Must expand intrinsic
 #else
+            if (Unsafe.IsNullRef(ref location1))
+                ThrowHelper.ThrowNullReferenceException();
             return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand);
 #endif
         }
@@ -57,13 +42,16 @@ public static long CompareExchange(ref long location1, long value, long comparan
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static T CompareExchange<T>(ref T location1, T value, T comparand) where T : class?
         {
-            return Unsafe.As<T>(RuntimeImports.InterlockedCompareExchange(ref Unsafe.As<T, object?>(ref location1), value, comparand));
+            return Unsafe.As<T>(CompareExchange(ref Unsafe.As<T, object?>(ref location1), value, comparand));
         }
 
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [return: NotNullIfNotNull(nameof(location1))]
         public static object? CompareExchange(ref object? location1, object? value, object? comparand)
         {
+            if (Unsafe.IsNullRef(ref location1))
+                ThrowHelper.ThrowNullReferenceException();
             return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand);
         }
 
@@ -71,45 +59,11 @@ public static T CompareExchange<T>(ref T location1, T value, T comparand) where
 
         #region Exchange
 
-        [Intrinsic]
-        public static byte Exchange(ref byte location1, byte value)
-        {
-#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
-            return Exchange(ref location1, value);
-#else
-            byte oldValue;
-
-            do
-            {
-                oldValue = location1;
-            } while (CompareExchange(ref location1, value, oldValue) != oldValue);
-
-            return oldValue;
-#endif
-        }
-
-        [Intrinsic]
-        public static short Exchange(ref short location1, short value)
-        {
-#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
-            return Exchange(ref location1, value);
-#else
-            short oldValue;
-
-            do
-            {
-                oldValue = location1;
-            } while (CompareExchange(ref location1, value, oldValue) != oldValue);
-
-            return oldValue;
-#endif
-        }
-
         [Intrinsic]
         public static int Exchange(ref int location1, int value)
         {
 #if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64
-            return Exchange(ref location1, value);
+            return Exchange(ref location1, value); // Must expand intrinsic
 #else
             int oldValue;
 
@@ -126,7 +80,7 @@ public static int Exchange(ref int location1, int value)
         public static long Exchange(ref long location1, long value)
         {
 #if TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64
-            return Exchange(ref location1, value);
+            return Exchange(ref location1, value); // Must expand intrinsic
 #else
             long oldValue;
 
@@ -140,17 +94,22 @@ public static long Exchange(ref long location1, long value)
         }
 
         [Intrinsic]
-        [return: NotNullIfNotNull(nameof(location1))]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [return: NotNullIfNotNull(nameof(location1))]
         public static T Exchange<T>([NotNullIfNotNull(nameof(value))] ref T location1, T value) where T : class?
         {
+            if (Unsafe.IsNullRef(ref location1))
+                ThrowHelper.ThrowNullReferenceException();
             return Unsafe.As<T>(RuntimeImports.InterlockedExchange(ref Unsafe.As<T, object?>(ref location1), value));
         }
 
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [return: NotNullIfNotNull(nameof(location1))]
         public static object? Exchange([NotNullIfNotNull(nameof(value))] ref object? location1, object? value)
         {
+            if (Unsafe.IsNullRef(ref location1))
+                ThrowHelper.ThrowNullReferenceException();
             return RuntimeImports.InterlockedExchange(ref location1, value);
         }
 
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Lock.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Lock.NativeAot.cs
index 78fc77454019..7ac43d2257e7 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Lock.NativeAot.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Lock.NativeAot.cs
@@ -207,14 +207,18 @@ private static bool TryInitializeStatics()
         // Returns false until the static variable is lazy-initialized
         internal static bool IsSingleProcessor => s_isSingleProcessor;
 
-        // Used to transfer the state when inflating thin locks
-        internal void InitializeLocked(int managedThreadId, uint recursionCount)
+        // Used to transfer the state when inflating thin locks. The lock is considered unlocked if managedThreadId is zero, and
+        // locked otherwise.
+        internal void ResetForMonitor(int managedThreadId, uint recursionCount)
         {
             Debug.Assert(recursionCount == 0 || managedThreadId != 0);
+            Debug.Assert(!new State(this).UseTrivialWaits);
 
             _state = managedThreadId == 0 ? State.InitialStateValue : State.LockedStateValue;
             _owningThreadId = (uint)managedThreadId;
             _recursionCount = recursionCount;
+
+            Debug.Assert(!new State(this).UseTrivialWaits);
         }
 
         internal struct ThreadId
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Unix.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Unix.cs
index 5046b6230104..a0873fc273ff 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Unix.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Unix.cs
@@ -9,7 +9,7 @@ namespace System.Threading
     /// A LIFO semaphore.
     /// Waits on this semaphore are uninterruptible.
     /// </summary>
-    internal sealed partial class LowLevelLifoSemaphore : LowLevelLifoSemaphoreBase, IDisposable
+    internal sealed partial class LowLevelLifoSemaphore : IDisposable
     {
         private WaitSubsystem.WaitableObject _semaphore;
 
@@ -27,7 +27,7 @@ private bool WaitCore(int timeoutMs)
             return WaitSubsystem.Wait(_semaphore, timeoutMs, false, true) == WaitHandle.WaitSuccess;
         }
 
-        protected override void ReleaseCore(int count)
+        private void ReleaseCore(int count)
         {
             WaitSubsystem.ReleaseSemaphore(_semaphore, count);
         }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/SyncTable.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/SyncTable.cs
index c3a273d32739..c9ec4990f590 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/SyncTable.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/SyncTable.cs
@@ -140,7 +140,7 @@ public static unsafe int AssignEntry(object obj, int* pHeader)
 
                     // Found a free entry to assign
                     Debug.Assert(!entry.Owner.IsAllocated);
-                    Debug.Assert(entry.Lock == null);
+                    Debug.Assert(entry.Lock is null);
                     Debug.Assert(entry.HashCode == 0);
 
                     // Set up the new entry.  We should not fail after this point.
@@ -274,7 +274,7 @@ public static void MoveThinLockToNewEntry(int syncIndex, int threadId, uint recu
             Debug.Assert(s_lock.IsHeldByCurrentThread);
             Debug.Assert((0 < syncIndex) && (syncIndex < s_unusedEntryIndex));
 
-            s_entries[syncIndex].Lock.InitializeLocked(threadId, recursionLevel);
+            s_entries[syncIndex].Lock.ResetForMonitor(threadId, recursionLevel);
         }
 
         /// <summary>
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/TypedReference.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/TypedReference.cs
index 3b209cd489c4..56e8ef05dd1c 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/TypedReference.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/TypedReference.cs
@@ -50,23 +50,18 @@ internal static RuntimeTypeHandle RawTargetTypeToken(TypedReference value)
 
         public static unsafe object ToObject(TypedReference value)
         {
-            RuntimeTypeHandle typeHandle = value._typeHandle;
-            if (typeHandle.IsNull)
+            RuntimeTypeHandle handle = RawTargetTypeToken(value);
+
+            if (handle.IsNull)
                 ThrowHelper.ThrowArgumentException_ArgumentNull_TypedRefType();
 
-            MethodTable* eeType = typeHandle.ToMethodTable();
-            if (eeType->IsValueType)
-            {
-                return RuntimeImports.RhBox(eeType, ref value.Value);
-            }
-            else if (eeType->IsPointer || eeType->IsFunctionPointer)
+            MethodTable* mt = handle.ToMethodTable();
+            if (mt->IsPointer || mt->IsFunctionPointer)
             {
-                return RuntimeImports.RhBox(MethodTable.Of<UIntPtr>(), ref value.Value);
-            }
-            else
-            {
-                return Unsafe.As<byte, object>(ref value.Value);
+                handle = typeof(UIntPtr).TypeHandle;
             }
+
+            return RuntimeHelpers.Box(ref value.Value, handle);
         }
 
         public static void SetTypedReference(TypedReference target, object? value) { throw new NotSupportedException(); }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/ValueType.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/ValueType.cs
index e8340e411915..968e97c425cf 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/ValueType.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/ValueType.cs
@@ -95,43 +95,28 @@ public override unsafe bool Equals([NotNullWhen(true)] object? obj)
 
         public override unsafe int GetHashCode()
         {
-            int hashCode = (int)this.GetMethodTable()->HashCode;
+            HashCode hashCode = default;
+            hashCode.Add((IntPtr)this.GetMethodTable());
 
-            hashCode ^= GetHashCodeImpl();
-
-            return hashCode;
-        }
-
-        private unsafe int GetHashCodeImpl()
-        {
             int numFields = __GetFieldHelper(GetNumFields, out _);
 
             if (numFields == UseFastHelper)
-                return FastGetValueTypeHashCodeHelper(this.GetMethodTable(), ref this.GetRawData());
+                hashCode.AddBytes(GetSpanForField(this.GetMethodTable(), ref this.GetRawData()));
+            else
+                RegularGetValueTypeHashCode(ref hashCode, ref this.GetRawData(), numFields);
 
-            return RegularGetValueTypeHashCode(ref this.GetRawData(), numFields);
+            return hashCode.ToHashCode();
         }
 
-        private static unsafe int FastGetValueTypeHashCodeHelper(MethodTable* type, ref byte data)
+        private static unsafe ReadOnlySpan<byte> GetSpanForField(MethodTable* type, ref byte data)
         {
             // Sanity check - if there are GC references, we should not be hashing bytes
             Debug.Assert(!type->ContainsGCPointers);
-
-            int size = (int)type->ValueTypeSize;
-            int hashCode = 0;
-
-            for (int i = 0; i < size / 4; i++)
-            {
-                hashCode ^= Unsafe.As<byte, int>(ref Unsafe.Add(ref data, i * 4));
-            }
-
-            return hashCode;
+            return new ReadOnlySpan<byte>(ref data, (int)type->ValueTypeSize);
         }
 
-        private unsafe int RegularGetValueTypeHashCode(ref byte data, int numFields)
+        private unsafe void RegularGetValueTypeHashCode(ref HashCode hashCode, ref byte data, int numFields)
         {
-            int hashCode = 0;
-
             // We only take the hashcode for the first non-null field. That's what the CLR does.
             for (int i = 0; i < numFields; i++)
             {
@@ -142,15 +127,15 @@ private unsafe int RegularGetValueTypeHashCode(ref byte data, int numFields)
 
                 if (fieldType->ElementType == EETypeElementType.Single)
                 {
-                    hashCode = Unsafe.As<byte, float>(ref fieldData).GetHashCode();
+                    hashCode.Add(Unsafe.As<byte, float>(ref fieldData));
                 }
                 else if (fieldType->ElementType == EETypeElementType.Double)
                 {
-                    hashCode = Unsafe.As<byte, double>(ref fieldData).GetHashCode();
+                    hashCode.Add(Unsafe.As<byte, double>(ref fieldData));
                 }
                 else if (fieldType->IsPrimitive)
                 {
-                    hashCode = FastGetValueTypeHashCodeHelper(fieldType, ref fieldData);
+                    hashCode.AddBytes(GetSpanForField(fieldType, ref fieldData));
                 }
                 else if (fieldType->IsValueType)
                 {
@@ -164,7 +149,7 @@ private unsafe int RegularGetValueTypeHashCode(ref byte data, int numFields)
                     var fieldValue = (ValueType)RuntimeImports.RhBox(fieldType, ref fieldData);
                     if (fieldValue != null)
                     {
-                        hashCode = fieldValue.GetHashCodeImpl();
+                        hashCode.Add(fieldValue);
                     }
                     else
                     {
@@ -177,7 +162,7 @@ private unsafe int RegularGetValueTypeHashCode(ref byte data, int numFields)
                     object fieldValue = Unsafe.As<byte, object>(ref fieldData);
                     if (fieldValue != null)
                     {
-                        hashCode = fieldValue.GetHashCode();
+                        hashCode.Add(fieldValue);
                     }
                     else
                     {
@@ -187,8 +172,6 @@ private unsafe int RegularGetValueTypeHashCode(ref byte data, int numFields)
                 }
                 break;
             }
-
-            return hashCode;
         }
     }
 }
diff --git a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/TypeLoader/ConstraintValidator.cs b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/TypeLoader/ConstraintValidator.cs
index ccb1a9279f0a..d347b6d0f4af 100644
--- a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/TypeLoader/ConstraintValidator.cs
+++ b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Execution/TypeLoader/ConstraintValidator.cs
@@ -13,9 +13,9 @@ internal static partial class ConstraintValidator
     {
         private static bool SatisfiesConstraints(this Type genericVariable, SigTypeContext typeContextOfConstraintDeclarer, Type typeArg)
         {
-            GenericParameterAttributes specialConstraints = genericVariable.GenericParameterAttributes & GenericParameterAttributes.SpecialConstraintMask;
+            GenericParameterAttributes attributes = genericVariable.GenericParameterAttributes;
 
-            if ((specialConstraints & GenericParameterAttributes.NotNullableValueTypeConstraint) != 0)
+            if ((attributes & GenericParameterAttributes.NotNullableValueTypeConstraint) != 0)
             {
                 if (!typeArg.IsValueType)
                 {
@@ -30,19 +30,19 @@ private static bool SatisfiesConstraints(this Type genericVariable, SigTypeConte
                 }
             }
 
-            if ((specialConstraints & GenericParameterAttributes.ReferenceTypeConstraint) != 0)
+            if ((attributes & GenericParameterAttributes.ReferenceTypeConstraint) != 0)
             {
                 if (typeArg.IsValueType)
                     return false;
             }
 
-            if ((specialConstraints & GenericParameterAttributes.DefaultConstructorConstraint) != 0)
+            if ((attributes & GenericParameterAttributes.DefaultConstructorConstraint) != 0)
             {
                 if (!typeArg.HasExplicitOrImplicitPublicDefaultConstructor())
                     return false;
             }
 
-            if (typeArg.IsByRefLike && (specialConstraints & (GenericParameterAttributes)0x20 /* GenericParameterAttributes.AcceptByRefLike */) == 0)
+            if (typeArg.IsByRefLike && (attributes & (GenericParameterAttributes)0x20 /* GenericParameterAttributes.AllowByRefLike */) == 0)
                 return false;
 
             // Now check general subtype constraints
diff --git a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Extensions/NonPortable/DelegateMethodInfoRetriever.cs b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Extensions/NonPortable/DelegateMethodInfoRetriever.cs
index 5ae75d1d55eb..5669203a6f39 100644
--- a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Extensions/NonPortable/DelegateMethodInfoRetriever.cs
+++ b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Internal/Reflection/Extensions/NonPortable/DelegateMethodInfoRetriever.cs
@@ -17,20 +17,7 @@ public static class DelegateMethodInfoRetriever
     {
         public static MethodInfo GetDelegateMethodInfo(Delegate del)
         {
-            Delegate[] invokeList = del.GetInvocationList();
-            del = invokeList[invokeList.Length - 1];
-            IntPtr originalLdFtnResult = RuntimeAugments.GetDelegateLdFtnResult(del, out RuntimeTypeHandle typeOfFirstParameterIfInstanceDelegate, out bool isOpenResolver, out bool isInterpreterEntrypoint);
-
-            if (isInterpreterEntrypoint)
-            {
-                // This is a special kind of delegate where the invoke method is "ObjectArrayThunk". Typically,
-                // this will be a delegate that points the LINQ Expression interpreter. We could manufacture
-                // a MethodInfo based on the delegate's Invoke signature, but let's just throw for now.
-                throw new NotSupportedException(SR.DelegateGetMethodInfo_ObjectArrayDelegate);
-            }
-
-            if (originalLdFtnResult == (IntPtr)0)
-                return null;
+            IntPtr originalLdFtnResult = RuntimeAugments.GetDelegateLdFtnResult(del, out RuntimeTypeHandle typeOfFirstParameterIfInstanceDelegate, out bool isOpenResolver);
 
             QMethodDefinition methodHandle = default(QMethodDefinition);
             RuntimeTypeHandle[] genericMethodTypeArgumentHandles = null;
@@ -79,11 +66,7 @@ public static MethodInfo GetDelegateMethodInfo(Delegate del)
                         throw new NotSupportedException(SR.Format(SR.DelegateGetMethodInfo_NoDynamic_WithDisplayString, methodDisplayString));
                 }
             }
-            MethodBase methodBase = ExecutionDomain.GetMethod(typeOfFirstParameterIfInstanceDelegate, methodHandle, genericMethodTypeArgumentHandles);
-            MethodInfo methodInfo = methodBase as MethodInfo;
-            if (methodInfo != null)
-                return methodInfo;
-            return null; // GetMethod() returned a ConstructorInfo.
+            return (MethodInfo)ExecutionDomain.GetMethod(typeOfFirstParameterIfInstanceDelegate, methodHandle, genericMethodTypeArgumentHandles);
         }
     }
 }
diff --git a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Resources/Strings.resx b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Resources/Strings.resx
index 80023f62d226..e5432845a520 100644
--- a/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Resources/Strings.resx
+++ b/src/coreclr/nativeaot/System.Private.Reflection.Execution/src/Resources/Strings.resx
@@ -204,9 +204,6 @@
   <data name="DelegateGetMethodInfo_NoInstantiation" xml:space="preserve">
     <value>Cannot retrieve a MethodInfo for this delegate because the necessary generic instantiation was not metadata-enabled.</value>
   </data>
-  <data name="DelegateGetMethodInfo_ObjectArrayDelegate" xml:space="preserve">
-    <value>Cannot retrieve a MethodInfo for this delegate because the delegate target is an interpreted LINQ expression.</value>
-  </data>
   <data name="Arg_InterfaceMapMustNotBeAbstract" xml:space="preserve">
     <value>Could not retrieve the mapping of the interface '{0}' on type '{1}' because the type implements the interface abstractly.</value>
   </data>
diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/EETypeCreator.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/EETypeCreator.cs
index 26137a0c61d5..411f41d32337 100644
--- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/EETypeCreator.cs
+++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/EETypeCreator.cs
@@ -104,25 +104,14 @@ public static int AlignUp(int val, int alignment)
             return result;
         }
 
-        public static unsafe void Memset(IntPtr destination, int length, byte value)
+        public static unsafe void* AllocateMemory(int cbBytes)
         {
-            byte* pbDest = (byte*)destination.ToPointer();
-            while (length > 0)
-            {
-                *pbDest = value;
-                pbDest++;
-                length--;
-            }
+            return NativeMemory.Alloc((nuint)cbBytes);
         }
 
-        public static unsafe IntPtr AllocateMemory(int cbBytes)
+        public static unsafe void FreeMemory(void* memoryPtrToFree)
         {
-            return (IntPtr)NativeMemory.Alloc((nuint)cbBytes);
-        }
-
-        public static unsafe void FreeMemory(IntPtr memoryPtrToFree)
-        {
-            NativeMemory.Free((void*)memoryPtrToFree);
+            NativeMemory.Free(memoryPtrToFree);
         }
     }
 
@@ -132,12 +121,12 @@ private static void CreateEETypeWorker(MethodTable* pTemplateEEType, uint hashCo
             int arity, TypeBuilderState state)
         {
             bool successful = false;
-            IntPtr eeTypePtrPlusGCDesc = IntPtr.Zero;
-            IntPtr writableDataPtr = IntPtr.Zero;
-            IntPtr gcStaticData = IntPtr.Zero;
-            IntPtr nonGcStaticData = IntPtr.Zero;
-            IntPtr genericComposition = IntPtr.Zero;
-            IntPtr threadStaticIndex = IntPtr.Zero;
+            void* eeTypePlusGCDesc = null;
+            void* writableData = null;
+            void* nonGcStaticData = null;
+            void* genericComposition = null;
+            void* threadStaticIndex = null;
+            nint gcStaticData = 0;
 
             try
             {
@@ -266,10 +255,10 @@ private static void CreateEETypeWorker(MethodTable* pTemplateEEType, uint hashCo
                     int cbGCDescAligned = MemoryHelpers.AlignUp(cbGCDesc, IntPtr.Size);
 
                     // Allocate enough space for the MethodTable + gcDescSize
-                    eeTypePtrPlusGCDesc = MemoryHelpers.AllocateMemory(cbGCDescAligned + cbEEType + cbOptionalFieldsSize);
+                    eeTypePlusGCDesc = MemoryHelpers.AllocateMemory(cbGCDescAligned + cbEEType + cbOptionalFieldsSize);
 
                     // Get the MethodTable pointer, and the template MethodTable pointer
-                    pEEType = (MethodTable*)(eeTypePtrPlusGCDesc + cbGCDescAligned);
+                    pEEType = (MethodTable*)((byte*)eeTypePlusGCDesc + cbGCDescAligned);
                     state.HalfBakedRuntimeTypeHandle = pEEType->ToRuntimeTypeHandle();
 
                     // Set basic MethodTable fields
@@ -319,9 +308,9 @@ private static void CreateEETypeWorker(MethodTable* pTemplateEEType, uint hashCo
                     *((void**)((byte*)pEEType + cbSealedVirtualSlotsTypeOffset)) = pTemplateEEType->GetSealedVirtualTable();
                 }
 
-                writableDataPtr = MemoryHelpers.AllocateMemory(WritableData.GetSize(IntPtr.Size));
-                MemoryHelpers.Memset(writableDataPtr, WritableData.GetSize(IntPtr.Size), 0);
-                pEEType->WritableData = (void*)writableDataPtr;
+                writableData = MemoryHelpers.AllocateMemory(WritableData.GetSize(IntPtr.Size));
+                NativeMemory.Clear(writableData, (nuint)WritableData.GetSize(IntPtr.Size));
+                pEEType->WritableData = writableData;
 
                 pEEType->DynamicTemplateType = pTemplateEEType;
 
@@ -340,13 +329,13 @@ private static void CreateEETypeWorker(MethodTable* pTemplateEEType, uint hashCo
                     if (arity > 1)
                     {
                         genericComposition = MemoryHelpers.AllocateMemory(MethodTable.GetGenericCompositionSize(arity));
-                        pEEType->SetGenericComposition(genericComposition);
+                        pEEType->SetGenericComposition((IntPtr)genericComposition);
                     }
 
                     if (allocatedNonGCDataSize > 0)
                     {
                         nonGcStaticData = MemoryHelpers.AllocateMemory(allocatedNonGCDataSize);
-                        MemoryHelpers.Memset(nonGcStaticData, allocatedNonGCDataSize, 0);
+                        NativeMemory.Clear(nonGcStaticData, (nuint)allocatedNonGCDataSize);
                         Debug.Assert(nonGCStaticDataOffset <= allocatedNonGCDataSize);
                         pEEType->DynamicNonGcStaticsData = (IntPtr)((byte*)nonGcStaticData + nonGCStaticDataOffset);
                     }
@@ -359,7 +348,7 @@ private static void CreateEETypeWorker(MethodTable* pTemplateEEType, uint hashCo
                     threadStaticIndex = MemoryHelpers.AllocateMemory(IntPtr.Size * 2);
                     *(IntPtr*)threadStaticIndex = pEEType->PointerToTypeManager;
                     *(((IntPtr*)threadStaticIndex) + 1) = (IntPtr)state.ThreadStaticOffset;
-                    pEEType->DynamicThreadStaticsIndex = threadStaticIndex;
+                    pEEType->DynamicThreadStaticsIndex = (IntPtr)threadStaticIndex;
                 }
 
                 if (state.GcDataSize != 0)
@@ -368,7 +357,7 @@ private static void CreateEETypeWorker(MethodTable* pTemplateEEType, uint hashCo
                     object obj = RuntimeAugments.RawNewObject(((MethodTable*)state.GcStaticDesc)->ToRuntimeTypeHandle());
                     gcStaticData = RuntimeAugments.RhHandleAlloc(obj, GCHandleType.Normal);
 
-                    pEEType->DynamicGcStaticsData = gcStaticData;
+                    pEEType->DynamicGcStaticsData = (IntPtr)gcStaticData;
                 }
 
                 if (state.Dictionary != null)
@@ -383,20 +372,16 @@ private static void CreateEETypeWorker(MethodTable* pTemplateEEType, uint hashCo
             {
                 if (!successful)
                 {
-                    if (eeTypePtrPlusGCDesc != IntPtr.Zero)
-                        MemoryHelpers.FreeMemory(eeTypePtrPlusGCDesc);
-                    if (state.HalfBakedDictionary != IntPtr.Zero)
-                        MemoryHelpers.FreeMemory(state.HalfBakedDictionary);
-                    if (gcStaticData != IntPtr.Zero)
+                    if (gcStaticData != 0)
                         RuntimeAugments.RhHandleFree(gcStaticData);
-                    if (genericComposition != IntPtr.Zero)
-                        MemoryHelpers.FreeMemory(genericComposition);
-                    if (nonGcStaticData != IntPtr.Zero)
-                        MemoryHelpers.FreeMemory(nonGcStaticData);
-                    if (writableDataPtr != IntPtr.Zero)
-                        MemoryHelpers.FreeMemory(writableDataPtr);
-                    if (threadStaticIndex != IntPtr.Zero)
-                        MemoryHelpers.FreeMemory(threadStaticIndex);
+
+                    MemoryHelpers.FreeMemory((void*)state.HalfBakedDictionary);
+
+                    MemoryHelpers.FreeMemory(threadStaticIndex);
+                    MemoryHelpers.FreeMemory(nonGcStaticData);
+                    MemoryHelpers.FreeMemory(genericComposition);
+                    MemoryHelpers.FreeMemory(writableData);
+                    MemoryHelpers.FreeMemory(eeTypePlusGCDesc);
                 }
             }
         }
diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/GenericDictionary.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/GenericDictionary.cs
index c660b63d0c31..7d2ead3b32ae 100644
--- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/GenericDictionary.cs
+++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/GenericDictionary.cs
@@ -10,10 +10,10 @@
 
 namespace Internal.Runtime.TypeLoader
 {
-    internal abstract class GenericDictionary
+    internal abstract unsafe class GenericDictionary
     {
         protected GenericDictionaryCell[] _cells;
-        protected IntPtr _addressOfFirstCellSlot;
+        protected void* _addressOfFirstCellSlot;
 
         public GenericDictionary(GenericDictionaryCell[] cells)
         {
@@ -23,9 +23,9 @@ public GenericDictionary(GenericDictionaryCell[] cells)
 
         public abstract IntPtr Allocate();
 
-        public unsafe void Finish(TypeBuilder typeBuilder)
+        public void Finish(TypeBuilder typeBuilder)
         {
-            Debug.Assert(_cells.Length == 0 || _addressOfFirstCellSlot != IntPtr.Zero);
+            Debug.Assert(_cells.Length == 0 || _addressOfFirstCellSlot != null);
 
             IntPtr* realCells = (IntPtr*)_addressOfFirstCellSlot;
             for (int i = 0; i < _cells.Length; i++)
@@ -41,9 +41,9 @@ public GenericTypeDictionary(GenericDictionaryCell[] cells)
             : base(cells)
         { }
 
-        public override IntPtr Allocate()
+        public override unsafe IntPtr Allocate()
         {
-            Debug.Assert(_addressOfFirstCellSlot == IntPtr.Zero);
+            Debug.Assert(_addressOfFirstCellSlot == null);
 
             if (_cells.Length > 0)
             {
@@ -51,7 +51,7 @@ public override IntPtr Allocate()
                 _addressOfFirstCellSlot = MemoryHelpers.AllocateMemory(checked((int)(_cells.Length * IntPtr.Size)));
             }
 
-            return _addressOfFirstCellSlot;
+            return (IntPtr)_addressOfFirstCellSlot;
         }
     }
 
@@ -63,20 +63,20 @@ public GenericMethodDictionary(GenericDictionaryCell[] cells)
 
         public override unsafe IntPtr Allocate()
         {
-            Debug.Assert(_addressOfFirstCellSlot == IntPtr.Zero);
+            Debug.Assert(_addressOfFirstCellSlot == null);
 
             // Method dictionaries start with a header containing the hash code, which is not part of the native layout.
             // The real first slot is located after the header.
             // Use checked typecast to int to ensure there aren't any overflows/truncations
-            IntPtr dictionaryWithHeader = MemoryHelpers.AllocateMemory(checked((int)((_cells.Length + 1) * IntPtr.Size)));
+            void* dictionaryWithHeader = MemoryHelpers.AllocateMemory(checked((int)((_cells.Length + 1) * IntPtr.Size)));
 
             // Put a magic hash code to indicate dynamically allocated method dictionary for
             // debugging purposes.
             *(int*)dictionaryWithHeader = 0xD1CC0DE; // DICCODE
 
-            _addressOfFirstCellSlot = IntPtr.Add(dictionaryWithHeader, IntPtr.Size);
+            _addressOfFirstCellSlot = (byte*)dictionaryWithHeader + IntPtr.Size;
 
-            return _addressOfFirstCellSlot;
+            return (IntPtr)_addressOfFirstCellSlot;
         }
     }
 }
diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeBuilder.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeBuilder.cs
index 0dbceb8be3c8..8ee004b11d42 100644
--- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeBuilder.cs
+++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeBuilder.cs
@@ -1080,7 +1080,7 @@ private unsafe IntPtr BuildGenericLookupTarget(TypeSystemContext typeSystemConte
 
             // The first is a pointer that points to the TypeManager indirection cell.
             // The second is the offset into the native layout info blob in that TypeManager, where the native signature is encoded.
-            IntPtr** lazySignature = (IntPtr**)signature.ToPointer();
+            IntPtr** lazySignature = (IntPtr**)signature;
             typeManager = new TypeManagerHandle(lazySignature[0][0]);
             offset = checked((uint)new IntPtr(lazySignature[1]).ToInt32());
             reader = TypeLoaderEnvironment.GetNativeLayoutInfoReader(typeManager);
diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.LdTokenResultLookup.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.LdTokenResultLookup.cs
index d5e82d10d94c..9ddec8a39d7a 100644
--- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.LdTokenResultLookup.cs
+++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeLoaderEnvironment.LdTokenResultLookup.cs
@@ -43,7 +43,7 @@ internal struct DynamicMethodHandleInfo
         #region String conversions
         private static unsafe string GetStringFromMemoryInNativeFormat(IntPtr pointerToDataStream)
         {
-            byte* dataStream = (byte*)pointerToDataStream.ToPointer();
+            byte* dataStream = (byte*)pointerToDataStream;
             uint stringLen = NativePrimitiveDecoder.DecodeUnsigned(ref dataStream);
             return Encoding.UTF8.GetString(dataStream, checked((int)stringLen));
         }
@@ -54,7 +54,7 @@ private static unsafe string GetStringFromMemoryInNativeFormat(IntPtr pointerToD
         /// </summary>
         /// <param name="str"></param>
         /// <returns></returns>
-        public IntPtr GetNativeFormatStringForString(string str)
+        public unsafe IntPtr GetNativeFormatStringForString(string str)
         {
             using (_typeLoaderLock.EnterScope())
             {
@@ -69,13 +69,13 @@ public IntPtr GetNativeFormatStringForString(string str)
                 foreach (byte b in utf8Bytes)
                     stringEncoder.WriteByte(b);
 
-                IntPtr allocatedNativeFormatString = MemoryHelpers.AllocateMemory(stringEncoder.Size);
+                void* allocatedNativeFormatString = MemoryHelpers.AllocateMemory(stringEncoder.Size);
                 unsafe
                 {
-                    stringEncoder.Save((byte*)allocatedNativeFormatString.ToPointer(), stringEncoder.Size);
+                    stringEncoder.Save((byte*)allocatedNativeFormatString, stringEncoder.Size);
                 }
-                _nativeFormatStrings.Add(str, allocatedNativeFormatString);
-                return allocatedNativeFormatString;
+                _nativeFormatStrings.Add(str, (IntPtr)allocatedNativeFormatString);
+                return (IntPtr)allocatedNativeFormatString;
             }
         }
 
@@ -197,16 +197,12 @@ public unsafe RuntimeFieldHandle GetRuntimeFieldHandleForComponents(RuntimeTypeH
             {
                 if (!_runtimeFieldHandles.TryGetValue(key, out runtimeFieldHandle))
                 {
-                    IntPtr runtimeFieldHandleValue = MemoryHelpers.AllocateMemory(sizeof(DynamicFieldHandleInfo));
-                    if (runtimeFieldHandleValue == IntPtr.Zero)
-                        throw new OutOfMemoryException();
-
-                    DynamicFieldHandleInfo* fieldData = (DynamicFieldHandleInfo*)runtimeFieldHandleValue.ToPointer();
+                    DynamicFieldHandleInfo* fieldData = (DynamicFieldHandleInfo*)MemoryHelpers.AllocateMemory(sizeof(DynamicFieldHandleInfo));
                     fieldData->DeclaringType = *(IntPtr*)&declaringTypeHandle;
                     fieldData->FieldName = fieldName;
 
                     // Special flag (lowest bit set) in the handle value to indicate it was dynamically allocated
-                    runtimeFieldHandleValue++;
+                    IntPtr runtimeFieldHandleValue = (IntPtr)fieldData + 1;
                     runtimeFieldHandle = *(RuntimeFieldHandle*)&runtimeFieldHandleValue;
 
                     _runtimeFieldHandles.Add(key, runtimeFieldHandle);
@@ -228,10 +224,9 @@ private unsafe bool TryGetDynamicRuntimeFieldHandleComponents(RuntimeFieldHandle
             IntPtr runtimeFieldHandleValue = *(IntPtr*)&runtimeFieldHandle;
 
             // Special flag in the handle value to indicate it was dynamically allocated
-            Debug.Assert((runtimeFieldHandleValue.ToInt64() & 0x1) == 0x1);
-            runtimeFieldHandleValue--;
+            Debug.Assert((runtimeFieldHandleValue & 0x1) == 0x1);
 
-            DynamicFieldHandleInfo* fieldData = (DynamicFieldHandleInfo*)runtimeFieldHandleValue.ToPointer();
+            DynamicFieldHandleInfo* fieldData = (DynamicFieldHandleInfo*)(runtimeFieldHandleValue - 1);
             declaringTypeHandle = *(RuntimeTypeHandle*)&(fieldData->DeclaringType);
 
             // FieldName points to the field name in NativeLayout format, so we parse it using a NativeParser
@@ -297,11 +292,8 @@ public unsafe RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTyp
                     int numGenericMethodArgs = genericMethodArgs == null ? 0 : genericMethodArgs.Length;
                     // Use checked arithmetics to ensure there aren't any overflows/truncations
                     sizeToAllocate = checked(sizeToAllocate + (numGenericMethodArgs > 0 ? sizeof(IntPtr) * (numGenericMethodArgs - 1) : 0));
-                    IntPtr runtimeMethodHandleValue = MemoryHelpers.AllocateMemory(sizeToAllocate);
-                    if (runtimeMethodHandleValue == IntPtr.Zero)
-                        throw new OutOfMemoryException();
 
-                    DynamicMethodHandleInfo* methodData = (DynamicMethodHandleInfo*)runtimeMethodHandleValue.ToPointer();
+                    DynamicMethodHandleInfo* methodData = (DynamicMethodHandleInfo*)MemoryHelpers.AllocateMemory(sizeToAllocate);
                     methodData->DeclaringType = *(IntPtr*)&declaringTypeHandle;
                     methodData->MethodName = methodName;
                     methodData->MethodSignature = methodSignature;
@@ -314,7 +306,7 @@ public unsafe RuntimeMethodHandle GetRuntimeMethodHandleForComponents(RuntimeTyp
                     }
 
                     // Special flag in the handle value to indicate it was dynamically allocated, and doesn't point into the InvokeMap blob
-                    runtimeMethodHandleValue++;
+                    IntPtr runtimeMethodHandleValue = (IntPtr)methodData + 1;
                     runtimeMethodHandle = *(RuntimeMethodHandle*)&runtimeMethodHandleValue;
 
                     _runtimeMethodHandles.Add(key, runtimeMethodHandle);
@@ -346,12 +338,12 @@ public bool TryGetRuntimeMethodHandleComponents(RuntimeMethodHandle runtimeMetho
         private unsafe bool TryGetDynamicRuntimeMethodHandleComponents(RuntimeMethodHandle runtimeMethodHandle, out RuntimeTypeHandle declaringTypeHandle, out MethodNameAndSignature nameAndSignature, out RuntimeTypeHandle[] genericMethodArgs)
         {
             IntPtr runtimeMethodHandleValue = *(IntPtr*)&runtimeMethodHandle;
-            Debug.Assert((runtimeMethodHandleValue.ToInt64() & 0x1) == 0x1);
 
             // Special flag in the handle value to indicate it was dynamically allocated, and doesn't point into the InvokeMap blob
-            runtimeMethodHandleValue--;
+            Debug.Assert((runtimeMethodHandleValue & 0x1) == 0x1);
+
+            DynamicMethodHandleInfo* methodData = (DynamicMethodHandleInfo*)(runtimeMethodHandleValue - 1);
 
-            DynamicMethodHandleInfo* methodData = (DynamicMethodHandleInfo*)runtimeMethodHandleValue.ToPointer();
             declaringTypeHandle = *(RuntimeTypeHandle*)&(methodData->DeclaringType);
             genericMethodArgs = null;
 
diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/TypeSystemContext.Runtime.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/TypeSystemContext.Runtime.cs
index 75252d276d49..464a370a5f7b 100644
--- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/TypeSystemContext.Runtime.cs
+++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/TypeSystem/TypeSystemContext.Runtime.cs
@@ -42,7 +42,7 @@ protected override RuntimeTypeHandle ConvertIntPtrToValue(IntPtr pointer)
             {
                 unsafe
                 {
-                    return ((MethodTable*)pointer.ToPointer())->ToRuntimeTypeHandle();
+                    return ((MethodTable*)pointer)->ToRuntimeTypeHandle();
                 }
             }
 
@@ -104,7 +104,7 @@ protected override RuntimeTypeHandle ConvertIntPtrToValue(IntPtr pointer)
             {
                 unsafe
                 {
-                    return ((MethodTable*)pointer.ToPointer())->ToRuntimeTypeHandle();
+                    return ((MethodTable*)pointer)->ToRuntimeTypeHandle();
                 }
             }
 
diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Diagnostics/DebuggerStepThroughAttribute.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Diagnostics/DebuggerStepThroughAttribute.cs
new file mode 100644
index 000000000000..732a27d5da64
--- /dev/null
+++ b/src/coreclr/nativeaot/Test.CoreLib/src/System/Diagnostics/DebuggerStepThroughAttribute.cs
@@ -0,0 +1,11 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Diagnostics
+{
+    [AttributeUsage(AttributeTargets.Class | AttributeTargets.Struct | AttributeTargets.Method | AttributeTargets.Constructor, Inherited = false)]
+    public sealed class DebuggerStepThroughAttribute : Attribute
+    {
+        public DebuggerStepThroughAttribute() { }
+    }
+}
diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Diagnostics/StackTraceHiddenAttribute.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Diagnostics/StackTraceHiddenAttribute.cs
new file mode 100644
index 000000000000..cc3efc2c154d
--- /dev/null
+++ b/src/coreclr/nativeaot/Test.CoreLib/src/System/Diagnostics/StackTraceHiddenAttribute.cs
@@ -0,0 +1,18 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Diagnostics
+{
+    /// <summary>
+    /// Types and Methods attributed with StackTraceHidden will be omitted from the stack trace text shown in StackTrace.ToString()
+    /// and Exception.StackTrace
+    /// </summary>
+    [AttributeUsage(AttributeTargets.Class | AttributeTargets.Method | AttributeTargets.Constructor | AttributeTargets.Struct, Inherited = false)]
+    public sealed class StackTraceHiddenAttribute : Attribute
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="StackTraceHiddenAttribute"/> class.
+        /// </summary>
+        public StackTraceHiddenAttribute() { }
+    }
+}
diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/CompilerServices/InlineArrayAttribute.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/CompilerServices/InlineArrayAttribute.cs
new file mode 100644
index 000000000000..f7d47791c7e3
--- /dev/null
+++ b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/CompilerServices/InlineArrayAttribute.cs
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Runtime.CompilerServices
+{
+    [AttributeUsage(AttributeTargets.Struct, AllowMultiple = false)]
+    public sealed class InlineArrayAttribute : Attribute
+    {
+        public InlineArrayAttribute(int length)
+        {
+            Length = length;
+        }
+
+        public int Length { get; }
+    }
+}
diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/InteropServices/MemoryMarshal.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/InteropServices/MemoryMarshal.cs
new file mode 100644
index 000000000000..fa8e7dce0a03
--- /dev/null
+++ b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/InteropServices/MemoryMarshal.cs
@@ -0,0 +1,14 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+
+namespace System.Runtime.InteropServices
+{
+    public static class MemoryMarshal
+    {
+        [Intrinsic]
+        public static ref T GetArrayDataReference<T>(T[] array) =>
+            ref GetArrayDataReference(array);
+    }
+}
diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs
index 9bfc3314b2d3..4751e40da3b2 100644
--- a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs
+++ b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs
@@ -40,13 +40,13 @@ internal static IntPtr RhHandleAlloc(object value, GCHandleType type)
             return h;
         }
 
-        [DllImport(RuntimeLibrary, CallingConvention = CallingConvention.Cdecl)]
+        [DllImport(RuntimeLibrary)]
         internal static extern unsafe IntPtr RhRegisterFrozenSegment(void* pSegmentStart, nuint allocSize, nuint commitSize, nuint reservedSize);
 
-        [DllImport(RuntimeLibrary, CallingConvention = CallingConvention.Cdecl)]
+        [DllImport(RuntimeLibrary)]
         internal static extern unsafe void RhUpdateFrozenSegment(IntPtr seg, void* allocated, void* committed);
 
-        [DllImport(RuntimeLibrary, CallingConvention = CallingConvention.Cdecl)]
+        [DllImport(RuntimeLibrary)]
         internal static extern void RhUnregisterFrozenSegment(IntPtr pSegmentHandle);
 
         [RuntimeImport(RuntimeLibrary, "RhpGetModuleSection")]
@@ -90,14 +90,6 @@ internal static IntPtr RhGetModuleSection(TypeManagerHandle module, ReadyToRunSe
         //
         // Interlocked helpers
         //
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        [RuntimeImport(RuntimeLibrary, "RhpLockCmpXchg8")]
-        internal static extern byte InterlockedCompareExchange(ref byte location1, byte value, byte comparand);
-
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        [RuntimeImport(RuntimeLibrary, "RhpLockCmpXchg16")]
-        internal static extern short InterlockedCompareExchange(ref short location1, short value, short comparand);
-
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         [RuntimeImport(RuntimeLibrary, "RhpLockCmpXchg32")]
         internal static extern int InterlockedCompareExchange(ref int location1, int value, int comparand);
diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs
index 12b3bb500d3a..9ac6aa5110ad 100644
--- a/src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs
+++ b/src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs
@@ -18,26 +18,6 @@ public static IntPtr CompareExchange(ref IntPtr location1, IntPtr value, IntPtr
 #endif
         }
 
-        [Intrinsic]
-        public static byte CompareExchange(ref byte location1, byte value, byte comparand)
-        {
-#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
-            return CompareExchange(ref location1, value, comparand);
-#else
-            return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand);
-#endif
-        }
-
-        [Intrinsic]
-        public static short CompareExchange(ref short location1, short value, short comparand)
-        {
-#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
-            return CompareExchange(ref location1, value, comparand);
-#else
-            return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand);
-#endif
-        }
-
         [Intrinsic]
         public static int CompareExchange(ref int location1, int value, int comparand)
         {
diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj b/src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj
index 3a0004179264..40eb0de46bab 100644
--- a/src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj
+++ b/src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj
@@ -225,10 +225,14 @@
     <Compile Include="Internal\Runtime\MethodTable.Runtime.cs" />
     <Compile Include="System\Runtime\CompilerServices\CastCache.cs" />
     <Compile Include="System\Runtime\CompilerServices\ClassConstructorRunner.cs" />
+    <Compile Include="System\Runtime\CompilerServices\InlineArrayAttribute.cs" />
     <Compile Include="System\Runtime\CompilerServices\StaticClassConstructionContext.cs" />
     <Compile Include="System\Runtime\InteropServices\InAttribute.cs" />
+    <Compile Include="System\Diagnostics\DebuggerStepThroughAttribute.cs" />
+    <Compile Include="System\Diagnostics\StackTraceHiddenAttribute.cs" />
     <Compile Include="System\Diagnostics\CodeAnalysis\DoesNotReturnIfAttribute.cs" />
     <Compile Include="System\Diagnostics\Debug.cs" />
+    <Compile Include="System\Runtime\InteropServices\MemoryMarshal.cs" />
     <Compile Include="System\Runtime\RuntimeImports.cs" />
     <Compile Include="System\Runtime\RuntimeHelpers.cs" />
     <Compile Include="System\Runtime\InitializeFinalizerThread.cs" />
diff --git a/src/coreclr/pal/CMakeLists.txt b/src/coreclr/pal/CMakeLists.txt
index 4509e9fc0f8b..9213941ba6da 100644
--- a/src/coreclr/pal/CMakeLists.txt
+++ b/src/coreclr/pal/CMakeLists.txt
@@ -7,7 +7,6 @@ include_directories(${COREPAL_SOURCE_DIR}/src)
 include_directories(${COREPAL_SOURCE_DIR}/../inc)
 
 add_compile_options(-fexceptions)
-add_definitions(-DUSE_STL)
 
 add_subdirectory(src)
 add_subdirectory(tests)
diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h
index 64eb19e9fd17..9a1d25515bb3 100644
--- a/src/coreclr/pal/inc/pal.h
+++ b/src/coreclr/pal/inc/pal.h
@@ -33,7 +33,6 @@ Module Name:
 #ifndef __PAL_H__
 #define __PAL_H__
 
-#ifdef PAL_STDCPP_COMPAT
 #include <float.h>
 #include <limits.h>
 #include <stddef.h>
@@ -42,12 +41,22 @@ Module Name:
 #include <stdarg.h>
 #include <stdint.h>
 #include <string.h>
+#include <math.h>
 #include <strings.h>
 #include <errno.h>
 #include <ctype.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
+#include <wctype.h>
+
+#ifdef __cplusplus
+extern "C++"
+{
+
+#include <new>
+
+}
 #endif
 
 #ifdef  __cplusplus
@@ -183,78 +192,6 @@ extern bool g_arm64_atomics_present;
 #endif // __has_cpp_attribute(fallthrough)
 #endif // FALLTHROUGH
 
-#ifndef PAL_STDCPP_COMPAT
-
-#if __GNUC__
-
-typedef __builtin_va_list va_list;
-
-/* We should consider if the va_arg definition here is actually necessary.
-   Could we use the standard va_arg definition? */
-
-#define va_start    __builtin_va_start
-#define va_arg      __builtin_va_arg
-
-#define va_copy     __builtin_va_copy
-#define va_end      __builtin_va_end
-
-#define VOID void
-
-#else // __GNUC__
-
-typedef char * va_list;
-
-#define _INTSIZEOF(n)   ( (sizeof(n) + sizeof(int) - 1) & ~(sizeof(int) - 1) )
-
-#if _MSC_VER >= 1400
-
-#ifdef  __cplusplus
-#define _ADDRESSOF(v)   ( &reinterpret_cast<const char &>(v) )
-#else
-#define _ADDRESSOF(v)   ( &(v) )
-#endif
-
-#define _crt_va_start(ap,v)  ( ap = (va_list)_ADDRESSOF(v) + _INTSIZEOF(v) )
-#define _crt_va_arg(ap,t)    ( *(t *)((ap += _INTSIZEOF(t)) - _INTSIZEOF(t)) )
-#define _crt_va_end(ap)      ( ap = (va_list)0 )
-
-#define va_start _crt_va_start
-#define va_arg _crt_va_arg
-#define va_end _crt_va_end
-
-#else  // _MSC_VER
-
-#define va_start(ap,v)    (ap = (va_list) (&(v)) + _INTSIZEOF(v))
-#define va_arg(ap,t)    ( *(t *)((ap += _INTSIZEOF(t)) - _INTSIZEOF(t)) )
-#define va_end(ap)
-
-#endif // _MSC_VER
-
-#define va_copy(dest,src) (dest = src)
-
-#endif // __GNUC__
-
-#define CHAR_BIT      8
-
-#define SCHAR_MIN   (-128)
-#define SCHAR_MAX     127
-#define UCHAR_MAX     0xff
-
-#define SHRT_MIN    (-32768)
-#define SHRT_MAX      32767
-#define USHRT_MAX     0xffff
-
-#define INT_MIN     (-2147483647 - 1)
-#define INT_MAX       2147483647
-#define UINT_MAX      0xffffffff
-
-// LONG_MIN, LONG_MAX, ULONG_MAX -- use INT32_MIN etc. instead.
-
-#define FLT_MAX 3.402823466e+38F
-#define DBL_MAX 1.7976931348623157e+308
-
-#endif // !PAL_STDCPP_COMPAT
-
 /******************* PAL-Specific Entrypoints *****************************/
 
 #define IsDebuggerPresent PAL_IsDebuggerPresent
@@ -264,44 +201,6 @@ BOOL
 PALAPI
 PAL_IsDebuggerPresent();
 
-/* minimum signed 64 bit value */
-#define _I64_MIN    (I64(-9223372036854775807) - 1)
-/* maximum signed 64 bit value */
-#define _I64_MAX      I64(9223372036854775807)
-/* maximum unsigned 64 bit value */
-#define _UI64_MAX     UI64(0xffffffffffffffff)
-
-#define _I8_MAX   SCHAR_MAX
-#define _I8_MIN   SCHAR_MIN
-#define _I16_MAX  SHRT_MAX
-#define _I16_MIN  SHRT_MIN
-#define _I32_MAX  INT_MAX
-#define _I32_MIN  INT_MIN
-#define _UI8_MAX  UCHAR_MAX
-#define _UI8_MIN  UCHAR_MIN
-#define _UI16_MAX USHRT_MAX
-#define _UI16_MIN USHRT_MIN
-#define _UI32_MAX UINT_MAX
-#define _UI32_MIN UINT_MIN
-
-#undef NULL
-
-#if defined(__cplusplus)
-#define NULL    0
-#else
-#define NULL    ((PVOID)0)
-#endif
-
-#if defined(PAL_STDCPP_COMPAT) && !defined(__cplusplus)
-#define nullptr NULL
-#endif // defined(PAL_STDCPP_COMPAT) && !defined(__cplusplus)
-
-#ifndef PAL_STDCPP_COMPAT
-
-typedef __int64 time_t;
-#define _TIME_T_DEFINED
-#endif // !PAL_STDCPP_COMPAT
-
 #define DLL_PROCESS_ATTACH 1
 #define DLL_THREAD_ATTACH  2
 #define DLL_THREAD_DETACH  3
@@ -3448,7 +3347,7 @@ BitScanReverse64(
 
 FORCEINLINE void PAL_InterlockedOperationBarrier()
 {
-#if defined(HOST_ARM64) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64)
+#if (defined(HOST_ARM64) && !defined(LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT) && !defined(__clang__)) || defined(HOST_LOONGARCH64) || defined(HOST_RISCV64)
     // On arm64, most of the __sync* functions generate a code sequence like:
     //   loop:
     //     ldaxr (load acquire exclusive)
@@ -3636,20 +3535,6 @@ Return Values
 The function returns the initial value pointed to by Target.
 
 --*/
-Define_InterlockMethod(
-    CHAR,
-    InterlockedExchange8(IN OUT CHAR volatile *Target, CHAR Value),
-    InterlockedExchange8(Target, Value),
-    __atomic_exchange_n(Target, Value, __ATOMIC_ACQ_REL)
-)
-
-Define_InterlockMethod(
-    SHORT,
-    InterlockedExchange16(IN OUT SHORT volatile *Target, SHORT Value),
-    InterlockedExchange16(Target, Value),
-    __atomic_exchange_n(Target, Value, __ATOMIC_ACQ_REL)
-)
-
 Define_InterlockMethod(
     LONG,
     InterlockedExchange(IN OUT LONG volatile *Target, LONG Value),
@@ -3708,26 +3593,6 @@ Return Values
 The return value is the initial value of the destination.
 
 --*/
-Define_InterlockMethod(
-    CHAR,
-    InterlockedCompareExchange8(IN OUT CHAR volatile *Destination, IN CHAR Exchange, IN CHAR Comperand),
-    InterlockedCompareExchange8(Destination, Exchange, Comperand),
-    __sync_val_compare_and_swap(
-        Destination, /* The pointer to a variable whose value is to be compared with. */
-        Comperand, /* The value to be compared */
-        Exchange /* The value to be stored */)
-)
-
-Define_InterlockMethod(
-    SHORT,
-    InterlockedCompareExchange16(IN OUT SHORT volatile *Destination, IN SHORT Exchange, IN SHORT Comperand),
-    InterlockedCompareExchange16(Destination, Exchange, Comperand),
-    __sync_val_compare_and_swap(
-        Destination, /* The pointer to a variable whose value is to be compared with. */
-        Comperand, /* The value to be compared */
-        Exchange /* The value to be stored */)
-)
-
 Define_InterlockMethod(
     LONG,
     InterlockedCompareExchange(IN OUT LONG volatile *Destination, IN LONG Exchange, IN LONG Comperand),
@@ -3976,68 +3841,6 @@ PAL_GetCurrentThreadAffinitySet(SIZE_T size, UINT_PTR* data);
 #endif //FEATURE_PAL_ANSI
 /******************* C Runtime Entrypoints *******************************/
 
-/* Some C runtime functions needs to be reimplemented by the PAL.
-   To avoid name collisions, those functions have been renamed using
-   defines */
-#ifndef PAL_STDCPP_COMPAT
-#define exit          PAL_exit
-#define realloc       PAL_realloc
-#define fopen         PAL_fopen
-#define fprintf       PAL_fprintf
-#define vfprintf      PAL_vfprintf
-#define rand          PAL_rand
-#define time          PAL_time
-#define getenv        PAL_getenv
-#define fgets         PAL_fgets
-#define qsort         PAL_qsort
-#define bsearch       PAL_bsearch
-#define ferror        PAL_ferror
-#define fread         PAL_fread
-#define fwrite        PAL_fwrite
-#define ftell         PAL_ftell
-#define fclose        PAL_fclose
-#define fflush        PAL_fflush
-#define fputs         PAL_fputs
-#define fseek         PAL_fseek
-#define fgetpos       PAL_fgetpos
-#define fsetpos       PAL_fsetpos
-#define setvbuf       PAL_setvbuf
-#define acos          PAL_acos
-#define asin          PAL_asin
-#define atan2         PAL_atan2
-#define exp           PAL_exp
-#define ilogb         PAL_ilogb
-#define log           PAL_log
-#define log10         PAL_log10
-#define pow           PAL_pow
-#define sincos        PAL_sincos
-#define acosf         PAL_acosf
-#define asinf         PAL_asinf
-#define atan2f        PAL_atan2f
-#define expf          PAL_expf
-#define ilogbf        PAL_ilogbf
-#define logf          PAL_logf
-#define log10f        PAL_log10f
-#define powf          PAL_powf
-#define sincosf       PAL_sincosf
-#define malloc        PAL_malloc
-#define free          PAL_free
-#define _open         PAL__open
-#define _pread        PAL__pread
-#define _close        PAL__close
-#define _flushall     PAL__flushall
-
-#ifdef HOST_AMD64
-#define _mm_getcsr    PAL__mm_getcsr
-#define _mm_setcsr    PAL__mm_setcsr
-#endif // HOST_AMD64
-
-// Forward declare functions that are in header files we can't include yet
-int printf(const char *, ...);
-int vprintf(const char *, va_list);
-
-#endif // !PAL_STDCPP_COMPAT
-
 #ifndef _CONST_RETURN
 #ifdef  __cplusplus
 #define _CONST_RETURN  const
@@ -4050,72 +3853,16 @@ int vprintf(const char *, va_list);
 /* For backwards compatibility */
 #define _WConst_return _CONST_RETURN
 
-#define EOF     (-1)
-
-typedef int errno_t;
-
-#if defined(__WINT_TYPE__)
-typedef __WINT_TYPE__ wint_t;
-#else
-typedef unsigned int wint_t;
-#endif
-
-#ifndef PAL_STDCPP_COMPAT
-PALIMPORT void * __cdecl memcpy(void *, const void *, size_t);
-PALIMPORT int    __cdecl memcmp(const void *, const void *, size_t);
-PALIMPORT void * __cdecl memset(void *, int, size_t);
-PALIMPORT void * __cdecl memmove(void *, const void *, size_t);
-PALIMPORT void * __cdecl memchr(const void *, int, size_t);
-PALIMPORT long long int __cdecl atoll(const char *) MATH_THROW_DECL;
-PALIMPORT size_t __cdecl strlen(const char *);
-PALIMPORT int __cdecl strcmp(const char*, const char *);
-PALIMPORT int __cdecl strncmp(const char*, const char *, size_t);
-PALIMPORT int __cdecl strncasecmp(const char *, const char *, size_t);
-PALIMPORT char * __cdecl strcat(char *, const char *);
-PALIMPORT char * __cdecl strncat(char *, const char *, size_t);
-PALIMPORT char * __cdecl strcpy(char *, const char *);
-PALIMPORT char * __cdecl strncpy(char *, const char *, size_t);
-PALIMPORT char * __cdecl strchr(const char *, int);
-PALIMPORT char * __cdecl strrchr(const char *, int);
-PALIMPORT char * __cdecl strpbrk(const char *, const char *);
-PALIMPORT char * __cdecl strstr(const char *, const char *);
-PALIMPORT char * __cdecl strtok_r(char *, const char *, char **);
-PALIMPORT char * __cdecl strdup(const char*);
-PALIMPORT int __cdecl atoi(const char *);
-PALIMPORT unsigned long __cdecl strtoul(const char *, char **, int);
-PALIMPORT ULONGLONG __cdecl strtoull(const char *, char **, int);
-PALIMPORT double __cdecl atof(const char *);
-PALIMPORT double __cdecl strtod(const char *, char **);
-PALIMPORT size_t strnlen(const char *, size_t);
-PALIMPORT int __cdecl isprint(int);
-PALIMPORT int __cdecl isspace(int);
-PALIMPORT int __cdecl isalpha(int);
-PALIMPORT int __cdecl isalnum(int);
-PALIMPORT int __cdecl isdigit(int);
-PALIMPORT int __cdecl isxdigit(int);
-PALIMPORT int __cdecl tolower(int);
-PALIMPORT int __cdecl toupper(int);
-PALIMPORT int __cdecl iswalpha(wint_t);
-PALIMPORT int __cdecl iswdigit(wint_t);
-PALIMPORT int __cdecl iswupper(wint_t);
-PALIMPORT int __cdecl iswprint(wint_t);
-PALIMPORT int __cdecl iswspace(wint_t);
-PALIMPORT int __cdecl iswxdigit(wint_t);
-PALIMPORT wint_t __cdecl towupper(wint_t);
-PALIMPORT wint_t __cdecl towlower(wint_t);
-PALIMPORT int remove(const char*);
-#endif // PAL_STDCPP_COMPAT
-
 /* _TRUNCATE */
 #if !defined(_TRUNCATE)
 #define _TRUNCATE ((size_t)-1)
 #endif
 
+// errno_t is only defined when the Secure CRT Extensions library is available (which no standard library that we build with implements anyway)
+typedef int errno_t;
+
 PALIMPORT DLLEXPORT errno_t __cdecl memcpy_s(void *, size_t, const void *, size_t) THROW_DECL;
 PALIMPORT errno_t __cdecl memmove_s(void *, size_t, const void *, size_t);
-PALIMPORT DLLEXPORT int __cdecl strcasecmp(const char *, const char *);
-PALIMPORT char * __cdecl _gcvt_s(char *, int, double, int);
-PALIMPORT int __cdecl __iscsym(int);
 PALIMPORT DLLEXPORT int __cdecl _wcsicmp(const WCHAR *, const WCHAR*);
 PALIMPORT int __cdecl _wcsnicmp(const WCHAR *, const WCHAR *, size_t);
 PALIMPORT DLLEXPORT int __cdecl _vsnprintf_s(char *, size_t, size_t, const char *, va_list);
@@ -4142,6 +3889,7 @@ PALIMPORT DLLEXPORT double __cdecl PAL_wcstod(const WCHAR *, WCHAR **);
 PALIMPORT errno_t __cdecl _wcslwr_s(WCHAR *, size_t sz);
 PALIMPORT DLLEXPORT errno_t __cdecl _i64tow_s(long long, WCHAR *, size_t, int);
 PALIMPORT int __cdecl _wtoi(const WCHAR *);
+PALIMPORT FILE * __cdecl _wfopen(const WCHAR *, const WCHAR *);
 
 inline int _stricmp(const char* a, const char* b)
 {
@@ -4158,6 +3906,10 @@ inline char* _strdup(const char* a)
     return strdup(a);
 }
 
+// Define the MSVC implementation of the alloca concept.
+// As this allocates on the current stack frame, use a macro instead of an inline function.
+#define _alloca(x) alloca(x)
+
 #ifdef __cplusplus
 extern "C++" {
 inline WCHAR *PAL_wcschr(WCHAR* S, WCHAR C)
@@ -4191,11 +3943,6 @@ unsigned int __cdecl _rotl(unsigned int value, int shift)
 }
 #endif // !__has_builtin(_rotl)
 
-// On 64 bit unix, make the long an int.
-#ifdef HOST_64BIT
-#define _lrotl _rotl
-#endif
-
 #if !__has_builtin(_rotr)
 
 /*++
@@ -4218,208 +3965,7 @@ unsigned int __cdecl _rotr(unsigned int value, int shift)
 
 #endif // !__has_builtin(_rotr)
 
-PALIMPORT int __cdecl abs(int);
-// clang complains if this is declared with __int64
-PALIMPORT long long __cdecl llabs(long long);
-#ifndef PAL_STDCPP_COMPAT
-
-PALIMPORT int __cdecl _finite(double);
-PALIMPORT int __cdecl _isnan(double);
-PALIMPORT double __cdecl _copysign(double, double);
-PALIMPORT double __cdecl acos(double);
-PALIMPORT double __cdecl acosh(double) MATH_THROW_DECL;
-PALIMPORT double __cdecl asin(double);
-PALIMPORT double __cdecl asinh(double) MATH_THROW_DECL;
-PALIMPORT double __cdecl atan(double) MATH_THROW_DECL;
-PALIMPORT double __cdecl atanh(double) MATH_THROW_DECL;
-PALIMPORT double __cdecl atan2(double, double);
-PALIMPORT double __cdecl cbrt(double) MATH_THROW_DECL;
-PALIMPORT double __cdecl ceil(double);
-PALIMPORT double __cdecl cos(double);
-PALIMPORT double __cdecl cosh(double);
-PALIMPORT double __cdecl exp(double);
-PALIMPORT double __cdecl fabs(double);
-PALIMPORT double __cdecl floor(double);
-PALIMPORT double __cdecl fmod(double, double);
-PALIMPORT double __cdecl fma(double, double, double) MATH_THROW_DECL;
-PALIMPORT int __cdecl ilogb(double);
-PALIMPORT double __cdecl log(double);
-PALIMPORT double __cdecl log2(double) MATH_THROW_DECL;
-PALIMPORT double __cdecl log10(double);
-PALIMPORT double __cdecl modf(double, double*);
-PALIMPORT double __cdecl pow(double, double);
-PALIMPORT double __cdecl sin(double);
-PALIMPORT void __cdecl sincos(double, double*, double*);
-PALIMPORT double __cdecl sinh(double);
-PALIMPORT double __cdecl sqrt(double);
-PALIMPORT double __cdecl tan(double);
-PALIMPORT double __cdecl tanh(double);
-PALIMPORT double __cdecl trunc(double);
-
-PALIMPORT int __cdecl _finitef(float);
-PALIMPORT int __cdecl _isnanf(float);
-PALIMPORT float __cdecl _copysignf(float, float);
-PALIMPORT float __cdecl acosf(float);
-PALIMPORT float __cdecl acoshf(float) MATH_THROW_DECL;
-PALIMPORT float __cdecl asinf(float);
-PALIMPORT float __cdecl asinhf(float) MATH_THROW_DECL;
-PALIMPORT float __cdecl atanf(float) MATH_THROW_DECL;
-PALIMPORT float __cdecl atanhf(float) MATH_THROW_DECL;
-PALIMPORT float __cdecl atan2f(float, float);
-PALIMPORT float __cdecl cbrtf(float) MATH_THROW_DECL;
-PALIMPORT float __cdecl ceilf(float);
-PALIMPORT float __cdecl cosf(float);
-PALIMPORT float __cdecl coshf(float);
-PALIMPORT float __cdecl expf(float);
-PALIMPORT float __cdecl fabsf(float);
-PALIMPORT float __cdecl floorf(float);
-PALIMPORT float __cdecl fmodf(float, float);
-PALIMPORT float __cdecl fmaf(float, float, float) MATH_THROW_DECL;
-PALIMPORT int __cdecl ilogbf(float);
-PALIMPORT float __cdecl logf(float);
-PALIMPORT float __cdecl log2f(float) MATH_THROW_DECL;
-PALIMPORT float __cdecl log10f(float);
-PALIMPORT float __cdecl modff(float, float*);
-PALIMPORT float __cdecl powf(float, float);
-PALIMPORT float __cdecl sinf(float);
-PALIMPORT void __cdecl sincosf(float, float*, float*);
-PALIMPORT float __cdecl sinhf(float);
-PALIMPORT float __cdecl sqrtf(float);
-PALIMPORT float __cdecl tanf(float);
-PALIMPORT float __cdecl tanhf(float);
-PALIMPORT float __cdecl truncf(float);
-#endif // !PAL_STDCPP_COMPAT
-
-#ifndef PAL_STDCPP_COMPAT
-
-#ifdef __cplusplus
-extern "C++" {
-
-inline __int64 abs(__int64 _X) {
-    return llabs(_X);
-}
-
-#ifdef __APPLE__
-inline __int64 abs(SSIZE_T _X) {
-    return llabs((__int64)_X);
-}
-#endif
-
-}
-#endif
-
-PALIMPORT DLLEXPORT void * __cdecl malloc(size_t);
-PALIMPORT DLLEXPORT void   __cdecl free(void *);
-PALIMPORT DLLEXPORT void * __cdecl realloc(void *, size_t);
-
-#if defined(_MSC_VER)
-#define alloca _alloca
-#else
-#define _alloca alloca
-#endif //_MSC_VER
-
-#define alloca  __builtin_alloca
-
-#define max(a, b) (((a) > (b)) ? (a) : (b))
-#define min(a, b) (((a) < (b)) ? (a) : (b))
-
-#endif // !PAL_STDCPP_COMPAT
-
-PALIMPORT PAL_NORETURN void __cdecl exit(int);
-
-#ifndef PAL_STDCPP_COMPAT
-
-PALIMPORT DLLEXPORT void __cdecl qsort(void *, size_t, size_t, int(__cdecl *)(const void *, const void *));
-PALIMPORT DLLEXPORT void * __cdecl bsearch(const void *, const void *, size_t, size_t,
-    int(__cdecl *)(const void *, const void *));
-
-PALIMPORT time_t __cdecl time(time_t *);
-
-#endif // !PAL_STDCPP_COMPAT
-
-PALIMPORT DLLEXPORT int __cdecl _open(const char *szPath, int nFlags, ...);
-PALIMPORT DLLEXPORT size_t __cdecl _pread(int fd, void *buf, size_t nbytes, ULONG64 offset);
-PALIMPORT DLLEXPORT int __cdecl _close(int);
-PALIMPORT DLLEXPORT int __cdecl _flushall();
-
-#ifdef PAL_STDCPP_COMPAT
-
-struct _PAL_FILE;
-typedef struct _PAL_FILE PAL_FILE;
-
-#else // PAL_STDCPP_COMPAT
-
-struct _FILE;
-typedef struct _FILE FILE;
-typedef struct _FILE PAL_FILE;
-
-#define SEEK_SET    0
-#define SEEK_CUR    1
-#define SEEK_END    2
-
-/* Locale categories */
-#define LC_ALL          0
-#define LC_COLLATE      1
-#define LC_CTYPE        2
-#define LC_MONETARY     3
-#define LC_NUMERIC      4
-#define LC_TIME         5
-
-#define _IOFBF  0       /* setvbuf should set fully buffered */
-#define _IOLBF  1       /* setvbuf should set line buffered */
-#define _IONBF  2       /* setvbuf should set unbuffered */
-
-#endif // PAL_STDCPP_COMPAT
-
-PALIMPORT int __cdecl PAL_fclose(PAL_FILE *);
-PALIMPORT DLLEXPORT int __cdecl PAL_fflush(PAL_FILE *);
-PALIMPORT size_t __cdecl PAL_fwrite(const void *, size_t, size_t, PAL_FILE *);
-PALIMPORT size_t __cdecl PAL_fread(void *, size_t, size_t, PAL_FILE *);
-PALIMPORT char * __cdecl PAL_fgets(char *, int, PAL_FILE *);
-PALIMPORT int __cdecl PAL_fputs(const char *, PAL_FILE *);
-PALIMPORT DLLEXPORT int __cdecl PAL_fprintf(PAL_FILE *, const char *, ...);
-PALIMPORT int __cdecl PAL_vfprintf(PAL_FILE *, const char *, va_list);
-PALIMPORT int __cdecl PAL_fseek(PAL_FILE *, LONG, int);
-PALIMPORT LONG __cdecl PAL_ftell(PAL_FILE *);
-PALIMPORT int __cdecl PAL_ferror(PAL_FILE *);
-PALIMPORT PAL_FILE * __cdecl PAL_fopen(const char *, const char *);
-PALIMPORT int __cdecl PAL_setvbuf(PAL_FILE *stream, char *, int, size_t);
-
-PALIMPORT PAL_FILE * __cdecl _wfopen(const WCHAR *, const WCHAR *);
-
-/* Maximum value that can be returned by the rand function. */
-
-#ifndef PAL_STDCPP_COMPAT
-#define RAND_MAX 0x7fff
-#endif // !PAL_STDCPP_COMPAT
-
-PALIMPORT int __cdecl rand(void);
-PALIMPORT void __cdecl srand(unsigned int);
-
-#ifdef _MSC_VER
-#define PAL_get_caller _MSC_VER
-#else
-#define PAL_get_caller 0
-#endif
-
-PALIMPORT DLLEXPORT PAL_FILE * __cdecl PAL_get_stdout(int caller);
-PALIMPORT PAL_FILE * __cdecl PAL_get_stdin(int caller);
-PALIMPORT DLLEXPORT PAL_FILE * __cdecl PAL_get_stderr(int caller);
-PALIMPORT DLLEXPORT int * __cdecl PAL_errno(int caller);
-
-#ifdef PAL_STDCPP_COMPAT
-#define PAL_stdout (PAL_get_stdout(PAL_get_caller))
-#define PAL_stdin  (PAL_get_stdin(PAL_get_caller))
-#define PAL_stderr (PAL_get_stderr(PAL_get_caller))
-#define PAL_errno   (*PAL_errno(PAL_get_caller))
-#else // PAL_STDCPP_COMPAT
-#define stdout (PAL_get_stdout(PAL_get_caller))
-#define stdin  (PAL_get_stdin(PAL_get_caller))
-#define stderr (PAL_get_stderr(PAL_get_caller))
-#define errno  (*PAL_errno(PAL_get_caller))
-#endif // PAL_STDCPP_COMPAT
-
-PALIMPORT DLLEXPORT char * __cdecl getenv(const char *);
+PALIMPORT DLLEXPORT char * __cdecl PAL_getenv(const char *);
 PALIMPORT DLLEXPORT int __cdecl _putenv(const char *);
 
 #define ERANGE          34
@@ -4450,15 +3996,7 @@ PALAPI
 PAL_GetCpuTickCount();
 #endif // PAL_PERF
 
-/******************* PAL functions for SIMD extensions *****************/
-
-PALIMPORT
-unsigned int _mm_getcsr(void);
-
-PALIMPORT
-void _mm_setcsr(unsigned int i);
-
-/******************* PAL functions for CPU capability detection *******/
+/******************* PAL functions for exceptions *******/
 
 #ifdef __cplusplus
 
diff --git a/src/coreclr/pal/inc/pal_assert.h b/src/coreclr/pal/inc/pal_assert.h
index 87af991d9abb..a4b49c1ddafc 100644
--- a/src/coreclr/pal/inc/pal_assert.h
+++ b/src/coreclr/pal/inc/pal_assert.h
@@ -35,7 +35,7 @@ extern "C" {
 #if defined(_DEBUG)
 #define _ASSERTE(e) do {                                        \
         if (!(e)) {                                             \
-            PAL_fprintf (PAL_get_stderr(PAL_get_caller),        \
+            fprintf (stderr,                                    \
                      "ASSERT FAILED\n"                          \
                      "\tExpression: %s\n"                       \
                      "\tLocation:   line %d in %s\n"            \
diff --git a/src/coreclr/pal/inc/pal_endian.h b/src/coreclr/pal/inc/pal_endian.h
index 43a8167562ee..ddfdeab47297 100644
--- a/src/coreclr/pal/inc/pal_endian.h
+++ b/src/coreclr/pal/inc/pal_endian.h
@@ -102,6 +102,11 @@ inline void SwapGuid(GUID *pGuid)
 #define ALIGN_ACCESS    ((1<<LOG2_PTRSIZE)-1)
 #endif
 
+#ifdef HOST_RISCV64
+#define LOG2_PTRSIZE	3
+#define ALIGN_ACCESS    ((1<<LOG2_PTRSIZE)-1)
+#endif
+
 #if defined(ALIGN_ACCESS) && !defined(_MSC_VER)
 #ifdef __cplusplus
 extern "C++" {
diff --git a/src/coreclr/pal/inc/pal_mstypes.h b/src/coreclr/pal/inc/pal_mstypes.h
index 1eee6b2bbbd2..d59103002d18 100644
--- a/src/coreclr/pal/inc/pal_mstypes.h
+++ b/src/coreclr/pal/inc/pal_mstypes.h
@@ -64,13 +64,15 @@ extern "C" {
 #define _cdecl
 #define CDECL
 
-// On ARM __fastcall is ignored and causes a compile error
-#if !defined(PAL_STDCPP_COMPAT) || defined(__arm__)
-#  undef __fastcall
-#  undef _fastcall
-#  define __fastcall
-#  define _fastcall
-#endif // !defined(PAL_STDCPP_COMPAT) || defined(__arm__)
+// Some platforms (such as FreeBSD) define the __fastcall macro
+// on all targets, even when using it will fail.
+// Undefine it here so we can use it on all platforms without error.
+#ifdef __fastcall
+#undef __fastcall
+#endif
+
+#define __fastcall
+#define _fastcall
 
 #endif  // !defined(__i386__)
 
@@ -101,7 +103,9 @@ extern "C" {
 #else
 
 #define PALIMPORT
+#ifndef DLLEXPORT
 #define DLLEXPORT __attribute__((visibility("default")))
+#endif
 #define PAL_NORETURN    __attribute__((noreturn))
 
 #endif
@@ -206,21 +210,6 @@ extern "C" {
 
 #endif // _MSC_VER
 
-#ifndef PAL_STDCPP_COMPAT
-// Defined in gnu's types.h. For non PAL_IMPLEMENTATION system
-// includes are not included, so we need to define them.
-#ifndef PAL_IMPLEMENTATION
-
-typedef __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-typedef __int32 int32_t;
-typedef unsigned __int32 uint32_t;
-typedef __int16 int16_t;
-typedef unsigned __int16 uint16_t;
-typedef __int8 int8_t;
-typedef unsigned __int8 uint8_t;
-
-#endif // PAL_IMPLEMENTATION
 
 #ifndef _MSC_VER
 
@@ -229,7 +218,6 @@ typedef long double LONG_DOUBLE;
 #endif
 
 #endif // _MSC_VER
-#endif // !PAL_STDCPP_COMPAT
 
 typedef void VOID;
 
@@ -565,49 +553,10 @@ static_assert(sizeof(SSIZE_T) == sizeof(void*), "SSIZE_T should be pointer sized
 #define SSIZE_T_MIN (ssize_t)I64(0x8000000000000000)
 #endif
 
-#ifndef PAL_STDCPP_COMPAT
-#ifdef HOST_64BIT
-typedef unsigned long size_t;
-typedef long ssize_t;
-typedef long ptrdiff_t;
-#else // !HOST_64BIT
-typedef unsigned int size_t;
-typedef int ptrdiff_t;
-#endif // !HOST_64BIT
-#endif // !PAL_STDCPP_COMPAT
-#define _SIZE_T_DEFINED
-
 typedef LONG_PTR LPARAM;
 
-#define _PTRDIFF_T_DEFINED
-#ifdef _MINGW_
-// We need to define _PTRDIFF_T to make sure ptrdiff_t doesn't get defined
-// again by system headers - but only for MinGW.
-#define _PTRDIFF_T
-#endif
-
 typedef char16_t WCHAR;
 
-#ifndef PAL_STDCPP_COMPAT
-
-#if defined(__linux__)
-#ifdef HOST_64BIT
-typedef long int intptr_t;
-typedef unsigned long int uintptr_t;
-#else // !HOST_64BIT
-typedef int intptr_t;
-typedef unsigned int uintptr_t;
-#endif // !HOST_64BIT
-#else
-typedef long int intptr_t;
-typedef unsigned long int uintptr_t;
-#endif
-
-#endif // PAL_STDCPP_COMPAT
-
-#define _INTPTR_T_DEFINED
-#define _UINTPTR_T_DEFINED
-
 typedef DWORD LCID;
 typedef PDWORD PLCID;
 typedef WORD LANGID;
diff --git a/src/coreclr/pal/inc/rt/cpp/assert.h b/src/coreclr/pal/inc/rt/cpp/assert.h
deleted file mode 100644
index 7493b151d6a0..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/assert.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File:  assert.h
-//
-// ===========================================================================
-// dummy assert.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/cstdlib b/src/coreclr/pal/inc/rt/cpp/cstdlib
deleted file mode 100644
index 1cfd40828a47..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/cstdlib
+++ /dev/null
@@ -1,13 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-//
-// <OWNER>clrosdev</OWNER>
-//
-// ===========================================================================
-// File: cstdlib
-//
-// ===========================================================================
-// dummy cstdlib for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/ctype.h b/src/coreclr/pal/inc/rt/cpp/ctype.h
deleted file mode 100644
index cb41fcd88e6e..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/ctype.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: ctype.h
-//
-// ===========================================================================
-// dummy ctype.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/emmintrin.h b/src/coreclr/pal/inc/rt/cpp/emmintrin.h
deleted file mode 100644
index f2e8e0c1fd66..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/emmintrin.h
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-// From llvm-3.9/clang-3.9.1 emmintrin.h:
-
-/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- *===-----------------------------------------------------------------------===
- */
-
-#include "palrt.h"
-#ifdef __GNUC__
-#ifndef __EMMINTRIN_H
-#define __IMMINTRIN_H
-
-typedef long long __m128i __attribute__((__vector_size__(16)));
-
-typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
-typedef short __v8hi __attribute__((__vector_size__(16)));
-typedef char __v16qi __attribute__((__vector_size__(16)));
-
-
-/* Define the default attribute for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, NODEBUG_ATTRIBUTE))
-
-/// \brief Performs a bitwise OR of two 128-bit integer vectors.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the \c VPOR / POR instruction.
-///
-/// \param __a
-///    A 128-bit integer vector containing one of the source operands.
-/// \param __b
-///    A 128-bit integer vector containing one of the source operands.
-/// \returns A 128-bit integer vector containing the bitwise OR of the values
-///    in both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_or_si128(__m128i __a, __m128i __b)
-{
-  return (__m128i)((__v2du)__a | (__v2du)__b);
-}
-
-/// \brief Compares each of the corresponding 16-bit values of the 128-bit
-///    integer vectors for equality. Each comparison yields 0h for false, FFFFh
-///    for true.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the \c VPCMPEQW / PCMPEQW instruction.
-///
-/// \param __a
-///    A 128-bit integer vector.
-/// \param __b
-///    A 128-bit integer vector.
-/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_cmpeq_epi16(__m128i __a, __m128i __b)
-{
-  return (__m128i)((__v8hi)__a == (__v8hi)__b);
-}
-
-/// \brief Moves packed integer values from an unaligned 128-bit memory location
-///    to elements in a 128-bit integer vector.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the \c VMOVDQU / MOVDQU instruction.
-///
-/// \param __p
-///    A pointer to a memory location containing integer values.
-/// \returns A 128-bit integer vector containing the moved values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_loadu_si128(__m128i const *__p)
-{
-  struct __loadu_si128 {
-    __m128i __v;
-  } __attribute__((__packed__, __may_alias__));
-  return ((struct __loadu_si128*)__p)->__v;
-}
-
-/// \brief Initializes all values in a 128-bit vector of [8 x i16] with the
-///    specified 16-bit value.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic is a utility function and does not correspond to a specific
-///    instruction.
-///
-/// \param __w
-///    A 16-bit value used to initialize the elements of the destination integer
-///    vector.
-/// \returns An initialized 128-bit vector of [8 x i16] with all elements
-///    containing the value provided in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_set1_epi16(short __w)
-{
-  return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
-}
-
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm_movemask_epi8(__m128i __a)
-{
-  return __builtin_ia32_pmovmskb128((__v16qi)__a);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif /* __EMMINTRIN_H */
-#endif // __GNUC__
diff --git a/src/coreclr/pal/inc/rt/cpp/fcntl.h b/src/coreclr/pal/inc/rt/cpp/fcntl.h
deleted file mode 100644
index 556145a9f084..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/fcntl.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: fcntl.h
-//
-// ===========================================================================
-// dummy fcntl.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/float.h b/src/coreclr/pal/inc/rt/cpp/float.h
deleted file mode 100644
index a1dc803380e4..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/float.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: float.h
-//
-// ===========================================================================
-// dummy float.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/limits.h b/src/coreclr/pal/inc/rt/cpp/limits.h
deleted file mode 100644
index bd667f14eaf9..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/limits.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: limits.h
-//
-// ===========================================================================
-// dummy limits.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/malloc.h b/src/coreclr/pal/inc/rt/cpp/malloc.h
deleted file mode 100644
index 255a2c7f2fa2..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/malloc.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: malloc.h
-//
-// ===========================================================================
-// dummy malloc.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/math.h b/src/coreclr/pal/inc/rt/cpp/math.h
deleted file mode 100644
index e42c1852c139..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/math.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: math.h
-//
-// ===========================================================================
-// dummy math.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/memory.h b/src/coreclr/pal/inc/rt/cpp/memory.h
deleted file mode 100644
index bcc0d7d9c5d5..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/memory.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: memory.h
-//
-// ===========================================================================
-// dummy memory.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/stdarg.h b/src/coreclr/pal/inc/rt/cpp/stdarg.h
deleted file mode 100644
index 59d0d046d5f9..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/stdarg.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: stdarg.h
-//
-// ===========================================================================
-// dummy stdarg.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/stdbool.h b/src/coreclr/pal/inc/rt/cpp/stdbool.h
deleted file mode 100644
index b23533a2940d..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/stdbool.h
+++ /dev/null
@@ -1,4 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/stddef.h b/src/coreclr/pal/inc/rt/cpp/stddef.h
deleted file mode 100644
index b347dbf41497..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/stddef.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: stddef.h
-//
-// ===========================================================================
-// dummy stddef.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/stdint.h b/src/coreclr/pal/inc/rt/cpp/stdint.h
deleted file mode 100644
index b23533a2940d..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/stdint.h
+++ /dev/null
@@ -1,4 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/stdio.h b/src/coreclr/pal/inc/rt/cpp/stdio.h
deleted file mode 100644
index 33c1912bb2b7..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/stdio.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: stdio.h
-//
-// ===========================================================================
-// dummy stdio.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/stdlib.h b/src/coreclr/pal/inc/rt/cpp/stdlib.h
deleted file mode 100644
index d2d49357b88e..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/stdlib.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: stdlib.h
-//
-// ===========================================================================
-// dummy stdlib.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/string.h b/src/coreclr/pal/inc/rt/cpp/string.h
deleted file mode 100644
index b66d883338e1..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/string.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: string.h
-//
-// ===========================================================================
-// dummy string.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/time.h b/src/coreclr/pal/inc/rt/cpp/time.h
deleted file mode 100644
index 00c83f99d343..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/time.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: time.h
-//
-// ===========================================================================
-// dummy time.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/wchar.h b/src/coreclr/pal/inc/rt/cpp/wchar.h
deleted file mode 100644
index 5497d729e43b..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/wchar.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-//
-
-//
-// ===========================================================================
-// File: wchar.h
-//
-// ===========================================================================
-// dummy wchar.h for PAL
-
-#include "palrt.h"
diff --git a/src/coreclr/pal/inc/rt/cpp/xmmintrin.h b/src/coreclr/pal/inc/rt/cpp/xmmintrin.h
deleted file mode 100644
index 826d2d788676..000000000000
--- a/src/coreclr/pal/inc/rt/cpp/xmmintrin.h
+++ /dev/null
@@ -1,117 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-// From llvm-3.9/clang-3.9.1 xmmintrin.h:
-
-/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
-*
-* Permission is hereby granted, free of charge, to any person obtaining a copy
-* of this software and associated documentation files (the "Software"), to deal
-* in the Software without restriction, including without limitation the rights
-* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the Software is
-* furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-* THE SOFTWARE.
-*
-*===-----------------------------------------------------------------------===
-*/
-
-#ifdef __GNUC__
-
-typedef float __m128 __attribute__((__vector_size__(16)));
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, NODEBUG_ATTRIBUTE))
-
-/// \brief Loads a 128-bit floating-point vector of [4 x float] from an aligned
-///    memory location.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the \c VMOVAPS / MOVAPS instruction.
-///
-/// \param __p
-///    A pointer to a 128-bit memory location. The address of the memory
-///    location has to be 128-bit aligned.
-/// \returns A 128-bit vector of [4 x float] containing the loaded valus.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_load_ps(const float *__p)
-{
-    return *(__m128*)__p;
-}
-
-/// \brief Loads a 128-bit floating-point vector of [4 x float] from an
-///    unaligned memory location.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the \c VMOVUPS / MOVUPS instruction.
-///
-/// \param __p
-///    A pointer to a 128-bit memory location. The address of the memory
-///    location does not have to be aligned.
-/// \returns A 128-bit vector of [4 x float] containing the loaded values.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_loadu_ps(const float *__p)
-{
-    struct __loadu_ps
-    {
-        __m128 __v;
-    } __attribute__((__packed__, __may_alias__));
-    return ((struct __loadu_ps*)__p)->__v;
-}
-
-/// \brief Stores float values from a 128-bit vector of [4 x float] to an
-///    unaligned memory location.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the \c VMOVUPS / MOVUPS instruction.
-///
-/// \param __p
-///    A pointer to a 128-bit memory location. The address of the memory
-///    location does not have to be aligned.
-/// \param __a
-///    A 128-bit vector of [4 x float] containing the values to be stored.
-static __inline__ void __DEFAULT_FN_ATTRS
-_mm_storeu_ps(float *__p, __m128 __a)
-{
-    struct __storeu_ps
-    {
-        __m128 __v;
-    } __attribute__((__packed__, __may_alias__));
-    ((struct __storeu_ps*)__p)->__v = __a;
-}
-
-/// \brief Stores the lower 32 bits of a 128-bit vector of [4 x float] into
-///    four contiguous elements in an aligned memory location.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to \c VMOVAPS / MOVAPS + \c shuffling
-///    instruction.
-///
-/// \param __p
-///    A pointer to a 128-bit memory location.
-/// \param __a
-///    A 128-bit vector of [4 x float] whose lower 32 bits are stored to each
-///    of the four contiguous elements pointed by __p.
-static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps(float *__p, __m128 __a)
-{
-    *(__m128*)__p = __a;
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif // __GNUC__
diff --git a/src/coreclr/pal/inc/rt/palrt.h b/src/coreclr/pal/inc/rt/palrt.h
index 1e52727ccdba..18e25222c5db 100644
--- a/src/coreclr/pal/inc/rt/palrt.h
+++ b/src/coreclr/pal/inc/rt/palrt.h
@@ -135,18 +135,6 @@ typedef enum tagEFaultRepRetVal
 
 #include "pal.h"
 
-#ifndef PAL_STDCPP_COMPAT
-#ifdef __cplusplus
-#ifndef __PLACEMENT_NEW_INLINE
-#define __PLACEMENT_NEW_INLINE
-inline void *__cdecl operator new(size_t, void *_P)
-{
-    return (_P);
-}
-#endif // __PLACEMENT_NEW_INLINE
-#endif // __cplusplus
-#endif // !PAL_STDCPP_COMPAT
-
 #include <pal_assert.h>
 
 #define NTAPI       __cdecl
@@ -280,9 +268,7 @@ typedef union _ULARGE_INTEGER {
         DWORD HighPart;
 #endif
     }
-#ifndef PAL_STDCPP_COMPAT
     u
-#endif // PAL_STDCPP_COMPAT
      ;
     ULONGLONG QuadPart;
 } ULARGE_INTEGER, *PULARGE_INTEGER;
@@ -681,9 +667,9 @@ inline int __cdecl _vscprintf_unsafe(const char *_Format, va_list _ArgList)
     }
 }
 
-inline errno_t __cdecl _wfopen_unsafe(PAL_FILE * *ff, const WCHAR *fileName, const WCHAR *mode)
+inline errno_t __cdecl _wfopen_unsafe(FILE * *ff, const WCHAR *fileName, const WCHAR *mode)
 {
-    PAL_FILE *result = _wfopen(fileName, mode);
+    FILE *result = _wfopen(fileName, mode);
     if(result == 0) {
         return 1;
     } else {
@@ -692,9 +678,9 @@ inline errno_t __cdecl _wfopen_unsafe(PAL_FILE * *ff, const WCHAR *fileName, con
     }
 }
 
-inline errno_t __cdecl _fopen_unsafe(PAL_FILE * *ff, const char *fileName, const char *mode)
+inline errno_t __cdecl _fopen_unsafe(FILE * *ff, const char *fileName, const char *mode)
 {
-  PAL_FILE *result = PAL_fopen(fileName, mode);
+  FILE *result = fopen(fileName, mode);
   if(result == 0) {
     return 1;
   } else {
diff --git a/src/coreclr/pal/inc/rt/safecrt.h b/src/coreclr/pal/inc/rt/safecrt.h
index 12b5eceaad58..df31623d903c 100644
--- a/src/coreclr/pal/inc/rt/safecrt.h
+++ b/src/coreclr/pal/inc/rt/safecrt.h
@@ -86,15 +86,6 @@
 #endif
 #endif
 
-/* NULL */
-#if !defined(NULL)
-#if !defined(__cplusplus)
-#define NULL 0
-#else
-#define NULL ((void *)0)
-#endif
-#endif
-
 /* _W64 */
 #if !defined(_W64)
 #if !defined(__midl) && (defined(HOST_X86) || defined(_M_IX86)) && _MSC_VER >= 1300
@@ -104,16 +95,6 @@
 #endif
 #endif
 
-/* uintptr_t */
-#if !defined(_UINTPTR_T_DEFINED)
-#if defined(HOST_64BIT)
-typedef unsigned __int64    uintptr_t;
-#else
-typedef _W64 unsigned int   uintptr_t;
-#endif
-#define _UINTPTR_T_DEFINED
-#endif
-
 #ifdef __GNUC__
 #define SAFECRT_DEPRECATED __attribute__((deprecated))
 #else
@@ -1116,10 +1097,8 @@ errno_t __cdecl _wcsnset_s(WCHAR *_Dst, size_t _SizeInWords, WCHAR _Value, size_
 
 #endif
 
-#ifndef PAL_STDCPP_COMPAT
-
 /* wcsnlen */
-_SAFECRT__EXTERN_C
+extern
 size_t __cdecl wcsnlen(const WCHAR *inString, size_t inMaxSize);
 
 #if _SAFECRT_USE_INLINES || _SAFECRT_IMPL
@@ -1140,7 +1119,6 @@ size_t __cdecl wcsnlen(const WCHAR *inString, size_t inMaxSize)
 }
 
 #endif
-#endif // PAL_STDCPP_COMPAT
 
 /* _wmakepath_s */
 _SAFECRT__EXTERN_C
diff --git a/src/coreclr/pal/inc/rt/sal.h b/src/coreclr/pal/inc/rt/sal.h
index bec3352aa3f1..9d461e8050f5 100644
--- a/src/coreclr/pal/inc/rt/sal.h
+++ b/src/coreclr/pal/inc/rt/sal.h
@@ -2399,19 +2399,14 @@ extern "C" {
     #define _SA_SPECSTRIZE( x ) #x
 
     /*
-     __null p
      __notnull p
      __maybenull p
 
-     Annotates a pointer p. States that pointer p is null. Commonly used
-     in the negated form __notnull or the possibly null form __maybenull.
+     Annotates a pointer p. States that pointer p is never null or maybe null.
     */
 
-#ifndef PAL_STDCPP_COMPAT
-    #define __null                  _Null_impl_
     #define __notnull               _Notnull_impl_
     #define __maybenull             _Maybenull_impl_
-#endif // !PAL_STDCPP_COMPAT
 
     /*
      __readonly l
@@ -2598,11 +2593,8 @@ extern "C" {
 
 
 #else // ][
-#ifndef PAL_STDCPP_COMPAT
-    #define __null
     #define __notnull
     #define __deref
-#endif // !PAL_STDCPP_COMPAT
     #define __maybenull
     #define __readonly
     #define __notreadonly
diff --git a/src/coreclr/pal/inc/rt/specstrings.h b/src/coreclr/pal/inc/rt/specstrings.h
index 21a40d91a0dd..1cccb42e1554 100644
--- a/src/coreclr/pal/inc/rt/specstrings.h
+++ b/src/coreclr/pal/inc/rt/specstrings.h
@@ -309,11 +309,9 @@ __ANNOTATION(SAL_failureDefault(enum __SAL_failureKind));
 				            __byte_readableTo((expr) ? (size) : (size) * 2)
 #define __post_invalid                      _Post_ __notvalid
 /* integer related macros */
-#ifndef PAL_STDCPP_COMPAT
 #define __allocator                         __inner_allocator
 #define __deallocate(kind)                  _Pre_ __notnull __post_invalid
 #define __deallocate_opt(kind)              _Pre_ __maybenull __post_invalid
-#endif
 #define __bound                             __inner_bound
 #define __range(lb,ub)                      __inner_range(lb,ub)
 #define __in_bound                          _Pre_ __inner_bound
diff --git a/src/coreclr/pal/inc/rt/specstrings_strict.h b/src/coreclr/pal/inc/rt/specstrings_strict.h
index dadb49930ceb..d066f76b3c43 100644
--- a/src/coreclr/pal/inc/rt/specstrings_strict.h
+++ b/src/coreclr/pal/inc/rt/specstrings_strict.h
@@ -630,7 +630,6 @@
 #define __callback               __allowed(on_function)
 #define __format_string          __allowed(on_parameter_or_return)
 #define __blocksOn(resource)     __allowed(on_function)
-#define __fallthrough            __allowed(as_statement)
 #define __range(lb,ub)           __allowed(on_return)
 #define __in_range(lb,ub)        _SAL_VERSION_CHECK(__in_range)
 #define __out_range(lb,ub)       _SAL_VERSION_CHECK(__out_range)
@@ -656,7 +655,6 @@
 /***************************************************************************
 * Expert Macros
 ***************************************************************************/
-#define __null                  __allowed(on_typedecl)
 #define __notnull               __allowed(on_typedecl)
 #define __maybenull             __allowed(on_typedecl)
 #define __exceptthat            __allowed(on_typedecl)
diff --git a/src/coreclr/pal/inc/rt/specstrings_undef.h b/src/coreclr/pal/inc/rt/specstrings_undef.h
index b0e1848c5eb8..884ad919bc7b 100644
--- a/src/coreclr/pal/inc/rt/specstrings_undef.h
+++ b/src/coreclr/pal/inc/rt/specstrings_undef.h
@@ -5,10 +5,8 @@
 
 */
 
-#ifndef PAL_STDCPP_COMPAT
 #undef __in
 #undef __out
-#endif // !PAL_STDCPP_COMPAT
 
 #undef _At_
 #undef _Deref_out_
@@ -261,7 +259,6 @@
 #undef __encoded_array
 #undef __encoded_pointer
 #undef __exceptthat
-#undef __fallthrough
 #undef __field_bcount
 #undef __field_bcount_full
 #undef __field_bcount_full_opt
@@ -388,7 +385,6 @@
 #undef __notnull
 #undef __notreadonly
 #undef __notvalid
-#undef __null
 #undef __nullnullterminated
 #undef __nullterminated
 #undef __out_awcount
diff --git a/src/coreclr/pal/inc/strsafe.h b/src/coreclr/pal/inc/strsafe.h
index b69feb73c251..b833526e6177 100644
--- a/src/coreclr/pal/inc/strsafe.h
+++ b/src/coreclr/pal/inc/strsafe.h
@@ -27,15 +27,6 @@
 #include <string.h>     // for memset
 #include <stdarg.h>     // for va_start, etc.
 
-#ifndef _SIZE_T_DEFINED
-#ifdef  HOST_64BIT
-typedef unsigned __int64    size_t;
-#else
-typedef __w64 unsigned int  size_t;
-#endif  // !HOST_64BIT
-#define _SIZE_T_DEFINED
-#endif  // !_SIZE_T_DEFINED
-
 #ifndef SUCCEEDED
 #define SUCCEEDED(hr)  ((HRESULT)(hr) >= 0)
 #endif
diff --git a/src/coreclr/pal/inc/unixasmmacrosamd64.inc b/src/coreclr/pal/inc/unixasmmacrosamd64.inc
index bb1e70a27bef..bc6d770a5182 100644
--- a/src/coreclr/pal/inc/unixasmmacrosamd64.inc
+++ b/src/coreclr/pal/inc/unixasmmacrosamd64.inc
@@ -1,6 +1,8 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#define C_VAR(Name) rip + C_FUNC(Name)
+
 .macro NESTED_ENTRY Name, Section, Handler
         LEAF_ENTRY \Name, \Section
         .ifnc \Handler, NoHandler
diff --git a/src/coreclr/pal/prebuilt/idl/sospriv_i.cpp b/src/coreclr/pal/prebuilt/idl/sospriv_i.cpp
index 141ec62612e4..f070ae5816a8 100644
--- a/src/coreclr/pal/prebuilt/idl/sospriv_i.cpp
+++ b/src/coreclr/pal/prebuilt/idl/sospriv_i.cpp
@@ -5,11 +5,9 @@
 /* link this file in with the server and any clients */
 
 
- /* File created by MIDL compiler version 8.01.0622 */
-/* at Mon Jan 18 19:14:07 2038
- */
+ /* File created by MIDL compiler version 8.01.0628 */
 /* Compiler settings for sospriv.idl:
-    Oicf, W1, Zp8, env=Win64 (32b run), target_arch=AMD64 8.01.0622 
+    Oicf, W1, Zp8, env=Win64 (32b run), target_arch=AMD64 8.01.0628 
     protocol : dce , ms_ext, c_ext, robust
     error checks: allocation ref bounds_check enum stub_data 
     VC __declspec() decoration level: 
@@ -120,6 +118,9 @@ MIDL_DEFINE_GUID(IID, IID_ISOSDacInterface12,0x1b93bacc,0x8ca4,0x432d,0x94,0x3a,
 
 MIDL_DEFINE_GUID(IID, IID_ISOSDacInterface13,0x3176a8ed,0x597b,0x4f54,0xa7,0x1f,0x83,0x69,0x5c,0x6a,0x8c,0x5e);
 
+
+MIDL_DEFINE_GUID(IID, IID_ISOSDacInterface14,0x9aa22aca,0x6dc6,0x4a0c,0xb4,0xe0,0x70,0xd2,0x41,0x6b,0x98,0x37);
+
 #undef MIDL_DEFINE_GUID
 
 #ifdef __cplusplus
diff --git a/src/coreclr/pal/prebuilt/inc/corerror.h b/src/coreclr/pal/prebuilt/inc/corerror.h
index 994648cb25ea..12d3490aaf09 100644
--- a/src/coreclr/pal/prebuilt/inc/corerror.h
+++ b/src/coreclr/pal/prebuilt/inc/corerror.h
@@ -214,6 +214,7 @@
 #define CORDIAGIPC_E_UNKNOWN_ERROR EMAKEHR(0x1387)
 #define CORPROF_E_SUSPENSION_IN_PROGRESS EMAKEHR(0x1388)
 #define CORPROF_E_NOT_GC_OBJECT EMAKEHR(0x1389)
+#define CORPROF_E_MODULE_IS_ENC EMAKEHR(0x138A)
 #define CORSEC_E_POLICY_EXCEPTION EMAKEHR(0x1416)
 #define CORSEC_E_MIN_GRANT_FAIL EMAKEHR(0x1417)
 #define CORSEC_E_NO_EXEC_PERM EMAKEHR(0x1418)
diff --git a/src/coreclr/pal/prebuilt/inc/sospriv.h b/src/coreclr/pal/prebuilt/inc/sospriv.h
index 4c86b39cb6f7..855696ef0ce4 100644
--- a/src/coreclr/pal/prebuilt/inc/sospriv.h
+++ b/src/coreclr/pal/prebuilt/inc/sospriv.h
@@ -205,6 +205,7 @@ typedef int VCSHeapType;
 typedef enum { TYPEDEFTOMETHODTABLE, TYPEREFTOMETHODTABLE } ModuleMapType;
 typedef enum {IndcellHeap, LookupHeap, ResolveHeap, DispatchHeap, CacheEntryHeap, VtableHeap} VCSHeapType;
 typedef enum {LoaderHeapKindNormal = 0, LoaderHeapKindExplicitControl = 1} LoaderHeapKind;
+typedef enum {MethodTableInitialized = 1, MethodTableInitializationFailed = 2} MethodTableInitializationFlags;
 typedef enum {FreeUnknownRegion = 0, FreeGlobalHugeRegion = 1, FreeGlobalRegion = 2, FreeRegion = 3, FreeSohSegment = 4, FreeUohSegment = 5 } FreeRegionKind;
 typedef void ( *MODULEMAPTRAVERSE )(
     UINT index,
@@ -3343,6 +3344,118 @@ EXTERN_C const IID IID_ISOSDacInterface13;
 #endif 	/* __ISOSDacInterface13_INTERFACE_DEFINED__ */
 
 
+#ifndef __ISOSDacInterface14_INTERFACE_DEFINED__
+#define __ISOSDacInterface14_INTERFACE_DEFINED__
+
+/* interface ISOSDacInterface14 */
+/* [uuid][local][object] */ 
+
+
+EXTERN_C const IID IID_ISOSDacInterface14;
+
+#if defined(__cplusplus) && !defined(CINTERFACE)
+    
+    MIDL_INTERFACE("9aa22aca-6dc6-4a0c-b4e0-70d2416b9837")
+    ISOSDacInterface14 : public IUnknown
+    {
+    public:
+        virtual HRESULT STDMETHODCALLTYPE GetStaticBaseAddress( 
+            CLRDATA_ADDRESS methodTable,
+            CLRDATA_ADDRESS *nonGCStaticsAddress,
+            CLRDATA_ADDRESS *GCStaticsAddress) = 0;
+        
+        virtual HRESULT STDMETHODCALLTYPE GetThreadStaticBaseAddress( 
+            CLRDATA_ADDRESS methodTable,
+            CLRDATA_ADDRESS thread,
+            CLRDATA_ADDRESS *nonGCStaticsAddress,
+            CLRDATA_ADDRESS *GCStaticsAddress) = 0;
+        
+        virtual HRESULT STDMETHODCALLTYPE GetMethodTableInitializationFlags( 
+            CLRDATA_ADDRESS methodTable,
+            MethodTableInitializationFlags *initializationStatus) = 0;
+        
+    };
+    
+    
+#else 	/* C style interface */
+
+    typedef struct ISOSDacInterface14Vtbl
+    {
+        BEGIN_INTERFACE
+        
+        HRESULT ( STDMETHODCALLTYPE *QueryInterface )( 
+            ISOSDacInterface14 * This,
+            /* [in] */ REFIID riid,
+            /* [annotation][iid_is][out] */ 
+            _COM_Outptr_  void **ppvObject);
+        
+        ULONG ( STDMETHODCALLTYPE *AddRef )( 
+            ISOSDacInterface14 * This);
+        
+        ULONG ( STDMETHODCALLTYPE *Release )( 
+            ISOSDacInterface14 * This);
+        
+        HRESULT ( STDMETHODCALLTYPE *GetStaticBaseAddress )( 
+            ISOSDacInterface14 * This,
+            CLRDATA_ADDRESS methodTable,
+            CLRDATA_ADDRESS *nonGCStaticsAddress,
+            CLRDATA_ADDRESS *GCStaticsAddress);
+        
+        HRESULT ( STDMETHODCALLTYPE *GetThreadStaticBaseAddress )( 
+            ISOSDacInterface14 * This,
+            CLRDATA_ADDRESS methodTable,
+            CLRDATA_ADDRESS thread,
+            CLRDATA_ADDRESS *nonGCStaticsAddress,
+            CLRDATA_ADDRESS *GCStaticsAddress);
+        
+        HRESULT ( STDMETHODCALLTYPE *GetMethodTableInitializationFlags )( 
+            ISOSDacInterface14 * This,
+            CLRDATA_ADDRESS methodTable,
+            MethodTableInitializationFlags *initializationStatus);
+        
+        END_INTERFACE
+    } ISOSDacInterface14Vtbl;
+
+    interface ISOSDacInterface14
+    {
+        CONST_VTBL struct ISOSDacInterface14Vtbl *lpVtbl;
+    };
+
+    
+
+#ifdef COBJMACROS
+
+
+#define ISOSDacInterface14_QueryInterface(This,riid,ppvObject)	\
+    ( (This)->lpVtbl -> QueryInterface(This,riid,ppvObject) ) 
+
+#define ISOSDacInterface14_AddRef(This)	\
+    ( (This)->lpVtbl -> AddRef(This) ) 
+
+#define ISOSDacInterface14_Release(This)	\
+    ( (This)->lpVtbl -> Release(This) ) 
+
+
+#define ISOSDacInterface14_GetStaticBaseAddress(This,methodTable,nonGCStaticsAddress,GCStaticsAddress)	\
+    ( (This)->lpVtbl -> GetStaticBaseAddress(This,methodTable,nonGCStaticsAddress,GCStaticsAddress) ) 
+
+#define ISOSDacInterface14_GetThreadStaticBaseAddress(This,methodTable,thread,nonGCStaticsAddress,GCStaticsAddress)	\
+    ( (This)->lpVtbl -> GetThreadStaticBaseAddress(This,methodTable,thread,nonGCStaticsAddress,GCStaticsAddress) ) 
+
+#define ISOSDacInterface14_GetMethodTableInitializationFlags(This,methodTable,initializationStatus)	\
+    ( (This)->lpVtbl -> GetMethodTableInitializationFlags(This,methodTable,initializationStatus) ) 
+
+#endif /* COBJMACROS */
+
+
+#endif 	/* C style interface */
+
+
+
+
+#endif 	/* __ISOSDacInterface14_INTERFACE_DEFINED__ */
+
+
 /* Additional Prototypes for ALL interfaces */
 
 /* end of Additional Prototypes */
@@ -3353,3 +3466,4 @@ EXTERN_C const IID IID_ISOSDacInterface13;
 
 #endif
 
+
diff --git a/src/coreclr/pal/src/CMakeLists.txt b/src/coreclr/pal/src/CMakeLists.txt
index 22b1e1f78bde..cb3693655dca 100644
--- a/src/coreclr/pal/src/CMakeLists.txt
+++ b/src/coreclr/pal/src/CMakeLists.txt
@@ -129,13 +129,6 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND (CLR_CMAKE_HOST_ARCH_AMD64 OR CLR_CM
 endif()
 
 set(SOURCES
-  cruntime/file.cpp
-  cruntime/filecrt.cpp
-  cruntime/malloc.cpp
-  cruntime/math.cpp
-  cruntime/misc.cpp
-  cruntime/printfcpp.cpp
-  cruntime/thread.cpp
   cruntime/wchar.cpp
   debug/debug.cpp
   exception/seh.cpp
diff --git a/src/coreclr/pal/src/arch/arm/context2.S b/src/coreclr/pal/src/arch/arm/context2.S
index 32983c196969..e292ca26fe2a 100644
--- a/src/coreclr/pal/src/arch/arm/context2.S
+++ b/src/coreclr/pal/src/arch/arm/context2.S
@@ -18,9 +18,8 @@
 
 #define CONTEXT_CONTROL 1 // Sp, Lr, Pc, Cpsr
 #define CONTEXT_INTEGER 2 // R0-R12
-#define CONTEXT_SEGMENTS 4 //
-#define CONTEXT_FLOATING_POINT 8
-#define CONTEXT_DEBUG_REGISTERS 16 //
+#define CONTEXT_FLOATING_POINT 4
+#define CONTEXT_DEBUG_REGISTERS 8 //
 
 #define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT)
 
diff --git a/src/coreclr/pal/src/config.h.in b/src/coreclr/pal/src/config.h.in
index b5ddd025f3eb..aba4249541b4 100644
--- a/src/coreclr/pal/src/config.h.in
+++ b/src/coreclr/pal/src/config.h.in
@@ -119,17 +119,6 @@
 #cmakedefine01 SEM_INIT_MODIFIES_ERRNO
 #cmakedefine01 HAVE_PROCFS_CTL
 #cmakedefine01 HAVE_PROCFS_STAT
-#cmakedefine01 HAVE_COMPATIBLE_ACOS
-#cmakedefine01 HAVE_COMPATIBLE_ASIN
-#cmakedefine01 HAVE_COMPATIBLE_POW
-#cmakedefine01 HAVE_VALID_NEGATIVE_INF_POW
-#cmakedefine01 HAVE_VALID_POSITIVE_INF_POW
-#cmakedefine01 HAVE_COMPATIBLE_ATAN2
-#cmakedefine01 HAVE_COMPATIBLE_EXP
-#cmakedefine01 HAVE_COMPATIBLE_ILOGB0
-#cmakedefine01 HAVE_COMPATIBLE_ILOGBNAN
-#cmakedefine01 HAVE_COMPATIBLE_LOG
-#cmakedefine01 HAVE_COMPATIBLE_LOG10
 #cmakedefine01 UNGETC_NOT_RETURN_EOF
 #cmakedefine01 HAS_POSIX_SEMAPHORES
 #cmakedefine01 FILE_OPS_CHECK_FERROR_OF_PREVIOUS_CALL
diff --git a/src/coreclr/pal/src/configure.cmake b/src/coreclr/pal/src/configure.cmake
index b8cdb3a4df41..ed730378074c 100644
--- a/src/coreclr/pal/src/configure.cmake
+++ b/src/coreclr/pal/src/configure.cmake
@@ -644,214 +644,6 @@ int main(void) {
   exit(0);
 }" HAVE_PROCFS_STAT)
 set(CMAKE_REQUIRED_LIBRARIES)
-set(CMAKE_REQUIRED_LIBRARIES m)
-check_cxx_source_runs("
-#include <math.h>
-#include <stdlib.h>
-
-int main(void) {
-  volatile double x = 10;
-  if (!isnan(acos(x))) {
-    exit(1);
-  }
-  exit(0);
-}" HAVE_COMPATIBLE_ACOS)
-set(CMAKE_REQUIRED_LIBRARIES)
-set(CMAKE_REQUIRED_LIBRARIES m)
-check_cxx_source_runs("
-#include <math.h>
-#include <stdlib.h>
-
-int main(void) {
-  volatile double arg = 10;
-  if (!isnan(asin(arg))) {
-    exit(1);
-  }
-  exit(0);
-}" HAVE_COMPATIBLE_ASIN)
-set(CMAKE_REQUIRED_LIBRARIES)
-set(CMAKE_REQUIRED_LIBRARIES m)
-check_cxx_source_runs("
-#include <math.h>
-#include <stdlib.h>
-
-int main(void) {
-  volatile double base = 1.0;
-  volatile double infinity = 1.0 / 0.0;
-  if (pow(base, infinity) != 1.0 || pow(base, -infinity) != 1.0) {
-    exit(1);
-  }
-  if (pow(-base, infinity) != 1.0 || pow(-base, -infinity) != 1.0) {
-    exit(1);
-  }
-
-  base = 0.0;
-  if (pow(base, infinity) != 0.0) {
-    exit(1);
-  }
-  if (pow(base, -infinity) != infinity) {
-    exit(1);
-  }
-
-  base = 1.1;
-  if (pow(-base, infinity) != infinity || pow(base, infinity) != infinity) {
-    exit(1);
-  }
-  if (pow(-base, -infinity) != 0.0 || pow(base, -infinity) != 0.0) {
-    exit(1);
-  }
-
-  base = 0.0;
-  volatile int iexp = 1;
-  if (pow(-base, -iexp) != -infinity) {
-    exit(1);
-  }
-  if (pow(base, -iexp) != infinity) {
-    exit(1);
-  }
-  exit(0);
-}" HAVE_COMPATIBLE_POW)
-set(CMAKE_REQUIRED_LIBRARIES)
-set(CMAKE_REQUIRED_LIBRARIES m)
-check_cxx_source_runs("
-#include <math.h>
-#include <stdlib.h>
-
-int main(int argc, char **argv) {
-  double result;
-  volatile double base = 3.2e-10;
-  volatile double exp = 1 - 5e14;
-
-  result = pow(-base, exp);
-  if (result != -1.0 / 0.0) {
-    exit(1);
-  }
-  exit(0);
-}" HAVE_VALID_NEGATIVE_INF_POW)
-set(CMAKE_REQUIRED_LIBRARIES)
-set(CMAKE_REQUIRED_LIBRARIES m)
-check_cxx_source_runs("
-#include <math.h>
-#include <stdlib.h>
-
-int main(int argc, char **argv) {
-    double result;
-    volatile double base = 3.5;
-    volatile double exp = 3e100;
-
-    result = pow(-base, exp);
-    if (result != 1.0 / 0.0) {
-        exit(1);
-    }
-    exit(0);
-}" HAVE_VALID_POSITIVE_INF_POW)
-set(CMAKE_REQUIRED_LIBRARIES)
-set(CMAKE_REQUIRED_LIBRARIES m)
-check_cxx_source_runs("
-#include <math.h>
-#include <stdlib.h>
-
-int main(void) {
-  double pi = 3.14159265358979323846;
-  double result;
-  volatile double y = 0.0;
-  volatile double x = 0.0;
-
-  result = atan2(y, -x);
-  if (fabs(pi - result) > 0.0000001) {
-    exit(1);
-  }
-
-  result = atan2(-y, -x);
-  if (fabs(-pi - result) > 0.0000001) {
-    exit(1);
-  }
-
-  result = atan2 (-y, x);
-  if (result != 0.0 || copysign (1.0, result) > 0) {
-    exit(1);
-  }
-
-  result = atan2 (y, x);
-  if (result != 0.0 || copysign (1.0, result) < 0) {
-    exit(1);
-  }
-
-  exit (0);
-}" HAVE_COMPATIBLE_ATAN2)
-set(CMAKE_REQUIRED_LIBRARIES)
-set(CMAKE_REQUIRED_LIBRARIES m)
-check_cxx_source_runs("
-#include <math.h>
-#include <stdlib.h>
-
-int main(void) {
-  double d = exp(1.0), e = M_E;
-
-  /* Used memcmp rather than == to test that the doubles are equal to
-   prevent gcc's optimizer from using its 80 bit internal long
-   doubles. If you use ==, then on BSD you get a false negative since
-   exp(1.0) == M_E to 64 bits, but not 80.
-  */
-
-  if (memcmp (&d, &e, sizeof (double)) == 0) {
-    exit(0);
-  }
-  exit(1);
-}" HAVE_COMPATIBLE_EXP)
-set(CMAKE_REQUIRED_LIBRARIES)
-set(CMAKE_REQUIRED_LIBRARIES m)
-check_cxx_source_runs("
-#include <math.h>
-#include <stdlib.h>
-
-int main(void) {
-  if (FP_ILOGB0 != -2147483648) {
-    exit(1);
-  }
-
-  exit(0);
-}" HAVE_COMPATIBLE_ILOGB0)
-set(CMAKE_REQUIRED_LIBRARIES)
-set(CMAKE_REQUIRED_LIBRARIES m)
-check_cxx_source_runs("
-#include <math.h>
-#include <stdlib.h>
-
-int main(void) {
-  if (FP_ILOGBNAN != 2147483647) {
-    exit(1);
-  }
-
-  exit(0);
-}" HAVE_COMPATIBLE_ILOGBNAN)
-set(CMAKE_REQUIRED_LIBRARIES)
-set(CMAKE_REQUIRED_LIBRARIES m)
-check_cxx_source_runs("
-#include <math.h>
-#include <stdlib.h>
-
-int main(void) {
-  volatile int arg = 10000;
-  if (!isnan(log(-arg))) {
-    exit(1);
-  }
-  exit(0);
-}" HAVE_COMPATIBLE_LOG)
-set(CMAKE_REQUIRED_LIBRARIES)
-set(CMAKE_REQUIRED_LIBRARIES m)
-check_cxx_source_runs("
-#include <math.h>
-#include <stdlib.h>
-
-int main(void) {
-  volatile int arg = 10000;
-  if (!isnan(log10(-arg))) {
-    exit(1);
-  }
-  exit(0);
-}" HAVE_COMPATIBLE_LOG10)
-set(CMAKE_REQUIRED_LIBRARIES)
 check_cxx_source_runs("
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/src/coreclr/pal/src/cruntime/file.cpp b/src/coreclr/pal/src/cruntime/file.cpp
deleted file mode 100644
index 57cf7faa4caf..000000000000
--- a/src/coreclr/pal/src/cruntime/file.cpp
+++ /dev/null
@@ -1,665 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    file.c
-
-Abstract:
-
-    Implementation of the file functions in the C runtime library that
-    are Windows specific.
-
-
-
---*/
-
-#include "pal/palinternal.h"
-#include "pal/dbgmsg.h"
-#include "pal/file.h"
-#include "pal/cruntime.h"
-
-#include "pal/thread.hpp"
-#include "pal/threadsusp.hpp"
-
-#include <unistd.h>
-#include <errno.h>
-#include <sys/stat.h>
-#include <pthread.h>
-
-#if FILE_OPS_CHECK_FERROR_OF_PREVIOUS_CALL
-    #define CLEARERR(f) clearerr((f)->bsdFilePtr)
-#else
-    #define CLEARERR(f)
-#endif
-
-SET_DEFAULT_DEBUG_CHANNEL(CRT);
-
-/* Global variables storing the std streams.*/
-PAL_FILE PAL_Stdout;
-PAL_FILE PAL_Stdin;
-PAL_FILE PAL_Stderr;
-
-/*++
-
-Function:
-
-    CRTInitStdStreams.
-
-    Initializes the standard streams.
-    Returns TRUE on success, FALSE otherwise.
---*/
-BOOL CRTInitStdStreams()
-{
-    /* stdout */
-    PAL_Stdout.bsdFilePtr = stdout;
-    PAL_Stdout.PALferrorCode = PAL_FILE_NOERROR;
-    PAL_Stdout.bTextMode = TRUE;
-
-    /* stdin */
-    PAL_Stdin.bsdFilePtr = stdin;
-    PAL_Stdin.PALferrorCode = PAL_FILE_NOERROR;
-    PAL_Stdin.bTextMode = TRUE;
-
-    /* stderr */
-    PAL_Stderr.bsdFilePtr = stderr;
-    PAL_Stderr.PALferrorCode = PAL_FILE_NOERROR;
-    PAL_Stderr.bTextMode = TRUE;
-    return TRUE;
-}
-
-/*++
-Function :
-
-    MapFileOpenModes
-
-    Maps Windows file open modes to Unix fopen modes and validates.
-
---*/
-static LPSTR MapFileOpenModes(LPSTR str , BOOL * bTextMode)
-{
-    LPSTR retval = NULL;
-    LPSTR temp = NULL;
-
-    if (NULL == bTextMode)
-    {
-        ASSERT("MapFileOpenModes called with a NULL parameter for bTextMode.\n");
-        return NULL;
-    }
-
-    *bTextMode = TRUE;
-
-    if (NULL == str)
-    {
-        ASSERT("MapFileOpenModes called with a NULL parameter for str.\n");
-        return NULL;
-    }
-
-    /* The PAL behaves differently for some Windows file open modes:
-
-    c, n, S, R, and T: these are all hints to the system that aren't supported
-    by the PAL. Since the user cannot depend on this behavior, it's safe to
-    simply ignore these modes.
-
-    D: specifies a file as temporary. This file is expected to be deleted when
-    the last file descriptor is closed. The PAL does not support this behavior
-    and asserts when this mode is used.
-
-    t: represents opening in text mode. Calls to fdopen on Unix don't accept
-    't' so it is silently stripped out. However, the PAL supports the mode by
-    having the PAL wrappers do the translation of CR-LF to LF and vice versa.
-
-    t vs. b: To get binary mode, you must explicitly use 'b'. If neither mode
-    is specified on Windows, the default mode is defined by the global
-    variable _fmode. The PAL simply defaults to text mode. After examining
-    CLR usage patterns, the PAL behavior seems acceptable. */
-
-    /* Check if the mode specifies deleting the temporary file
-    automatically when the last file descriptor is closed.
-    The PAL does not support this behavior. */
-    if (NULL != strchr(str,'D'))
-    {
-        ASSERT("The PAL doesn't support the 'D' flag for fopen.\n");
-        return NULL;
-    }
-
-    /* Check if the mode specifies opening in binary.
-    If so, set the bTextMode to false. */
-    if(NULL != strchr(str,'b'))
-    {
-        *bTextMode = FALSE;
-    }
-
-    retval = (LPSTR)PAL_malloc( ( strlen( str ) + 1 ) * sizeof( CHAR ) );
-    if (NULL == retval)
-    {
-        ERROR("Unable to allocate memory.\n");
-        return NULL;
-    }
-
-    temp = retval;
-    while ( *str )
-    {
-        if ( *str == 'r' || *str == 'w' || *str == 'a' )
-        {
-            *temp = *str;
-            temp++;
-            if ( ( ++str != NULL ) && *str == '+' )
-            {
-                *temp = *str;
-                temp++;
-                str++;
-            }
-        }
-        else
-        {
-            str++;
-        }
-    }
-    *temp = '\0';
-    return retval;
-}
-
-#if UNGETC_NOT_RETURN_EOF
-/*++
-Function :
-
-    WriteOnlyMode
-
-    Returns TRUE to if a file is opened in write-only mode,
-    Otherwise FALSE.
-
---*/
-static BOOL WriteOnlyMode(FILE* pFile)
-{
-    INT fd, flags;
-
-    if (pFile != NULL)
-    {
-        fd = fileno(pFile);
-        if ((flags = fcntl(fd, F_GETFL)) >= 0)
-        {
-            if ((flags & O_ACCMODE) == O_WRONLY)
-            {
-                return TRUE;
-            }
-        }
-    }
-    return FALSE;
-}
-#endif //UNGETC_NOT_RETURN_EOF
-
-/*++
-
-Function :
-    fopen
-
-see MSDN doc.
-
---*/
-PAL_FILE *
-__cdecl
-PAL_fopen(const char * fileName, const char * mode)
-{
-    PAL_FILE *f = NULL;
-    LPSTR supported = NULL;
-    LPSTR UnixFileName = NULL;
-    struct stat stat_data;
-    BOOL bTextMode = TRUE;
-
-    PERF_ENTRY(fopen);
-    ENTRY("fopen ( fileName=%p (%s) mode=%p (%s))\n", fileName, fileName, mode , mode );
-
-    _ASSERTE(fileName != NULL);
-    _ASSERTE(mode != NULL);
-
-    if ( *mode == 'r' || *mode == 'w' || *mode == 'a' )
-    {
-        supported = MapFileOpenModes( (char*)mode,&bTextMode);
-
-        if ( !supported )
-        {
-            goto done;
-        }
-
-        UnixFileName = strdup(fileName);
-        if (UnixFileName == NULL )
-        {
-            ERROR("strdup() failed\n");
-            SetLastError(ERROR_NOT_ENOUGH_MEMORY);
-            goto done;
-        }
-
-        /*I am not checking for the case where stat fails
-         *as fopen will handle the error more gracefully in case
-         *UnixFileName is invalid*/
-        if ((stat(UnixFileName, &stat_data) == 0 ) &&
-            ((stat_data.st_mode & S_IFMT) == S_IFDIR))
-        {
-            goto done;
-        }
-
-        f = (PAL_FILE*)PAL_malloc( sizeof( PAL_FILE ) );
-        if ( f )
-        {
-            f->bsdFilePtr =  (FILE*)fopen( UnixFileName, supported );
-            f->PALferrorCode = PAL_FILE_NOERROR;
-            f->bTextMode = bTextMode;
-            if ( !f->bsdFilePtr )
-            {
-                /* Failed */
-                PAL_free( f );
-                f = NULL;
-            }
-#if UNGETC_NOT_RETURN_EOF
-            else
-            {
-                f->bWriteOnlyMode = WriteOnlyMode(f->bsdFilePtr);
-            }
-#endif //UNGETC_NOT_RETURN_EOF
-        }
-        else
-        {
-            ERROR( "Unable to allocate memory to the PAL_FILE wrapper\n" );
-        }
-    }
-    else
-    {
-        ERROR( "The mode flags must start with either an a, w, or r.\n" );
-    }
-
-done:
-    PAL_free( supported );
-    supported = NULL;
-    PAL_free( UnixFileName );
-
-    LOGEXIT( "fopen returns FILE* %p\n", f );
-    PERF_EXIT(fopen);
-    return f;
-}
-
-/*++
-Function:
-  _wfopen
-
-see MSDN doc.
-
---*/
-PAL_FILE *
-__cdecl
-_wfopen(
-    const wchar_16 *fileName,
-    const wchar_16 *mode)
-{
-    CHAR mbFileName[ _MAX_PATH ];
-    CHAR mbMode[ 10 ];
-    PAL_FILE * filePtr = NULL;
-
-    PERF_ENTRY(_wfopen);
-    ENTRY("_wfopen(fileName:%p (%S), mode:%p (%S))\n", fileName, fileName, mode, mode);
-
-    _ASSERTE(fileName != NULL);
-    _ASSERTE(mode != NULL);
-
-    /* Convert the parameters to ASCII and defer to PAL_fopen */
-    if ( WideCharToMultiByte( CP_ACP, 0, fileName, -1, mbFileName,
-                              sizeof mbFileName, NULL, NULL ) != 0 )
-    {
-        if ( WideCharToMultiByte( CP_ACP, 0, mode, -1, mbMode,
-                                  sizeof mbMode, NULL, NULL ) != 0 )
-        {
-            filePtr = PAL_fopen(mbFileName, mbMode);
-        }
-        else
-        {
-            ERROR( "An error occurred while converting mode to ANSI.\n" );
-        }
-    }
-    else
-    {
-        ERROR( "An error occurred while converting"
-               " fileName to ANSI string.\n" );
-    }
-    LOGEXIT("_wfopen returning FILE* %p\n", filePtr);
-    PERF_EXIT(_wfopen);
-    return filePtr;
-}
-
-/*++
-Function
-    PAL_get_stdout.
-
-    Returns the stdout stream.
---*/
-PAL_FILE * __cdecl PAL_get_stdout(int caller)
-{
-    PERF_ENTRY(get_stdout);
-    ENTRY("PAL_get_stdout\n");
-    LOGEXIT("PAL_get_stdout returns PAL_FILE * %p\n", &PAL_Stdout );
-    PERF_EXIT(get_stdout);
-    return &PAL_Stdout;
-}
-
-/*++
-Function
-    PAL_get_stdin.
-
-    Returns the stdin stream.
---*/
-PAL_FILE * __cdecl PAL_get_stdin(int caller)
-{
-    PERF_ENTRY(get_stdin);
-    ENTRY("PAL_get_stdin\n");
-    LOGEXIT("PAL_get_stdin returns PAL_FILE * %p\n", &PAL_Stdin );
-    PERF_EXIT(get_stdin);
-    return &PAL_Stdin;
-}
-
-/*++
-Function
-    PAL_get_stderr.
-
-    Returns the stderr stream.
---*/
-PAL_FILE * __cdecl PAL_get_stderr(int caller)
-{
-    PERF_ENTRY(get_stderr);
-    ENTRY("PAL_get_stderr\n");
-    LOGEXIT("PAL_get_stderr returns PAL_FILE * %p\n", &PAL_Stderr );
-    PERF_EXIT(get_stderr);
-    return &PAL_Stderr;
-}
-
-/*++
-
-Function:
-
-    PAL_pread
-
-See msdn for more details.
---*/
-size_t __cdecl PAL__pread(int fd, void *buf, size_t nbytes, ULONG64 offset)
-{
-    return pread(fd, buf, nbytes, offset);
-}
-
-/*++
-
-Function:
-
-    _close
-
-See msdn for more details.
---*/
-int __cdecl PAL__close(int handle)
-{
-    INT nRetVal = 0;
-
-    PERF_ENTRY(_close);
-    ENTRY( "_close( handle=%d )\n", handle );
-
-    nRetVal = close( handle );
-
-    LOGEXIT( "_close returning %d.\n", nRetVal );
-    PERF_EXIT(_close);
-    return nRetVal;
-}
-
-int __cdecl PAL__flushall()
-{
-    return fflush(NULL);
-}
-
-int __cdecl PAL_getc(PAL_FILE *stream);
-
-/*++
-Function :
-
-    fread
-
-    See MSDN for more details.
---*/
-
-size_t
-__cdecl
-PAL_fread(void * buffer, size_t size, size_t count, PAL_FILE * f)
-{
-    size_t nReadBytes = 0;
-
-    PERF_ENTRY(fread);
-    ENTRY( "fread( buffer=%p, size=%d, count=%d, f=%p )\n",
-           buffer, size, count, f );
-
-    _ASSERTE(f != NULL);
-
-    CLEARERR(f);
-
-    if(f->bTextMode != TRUE)
-    {
-        nReadBytes = fread( buffer, size, count, f->bsdFilePtr );
-    }
-    else
-    {
-        size_t i=0;
-        if(size > 0)
-        {
-            size_t j=0;
-            LPSTR temp = (LPSTR)buffer;
-            int nChar = 0;
-            int nCount =0;
-
-            for(i=0; i< count; i++)
-            {
-                for(j=0; j< size; j++)
-                {
-                    if((nChar = PAL_getc(f)) == EOF)
-                    {
-                        nReadBytes = i;
-                        goto done;
-                    }
-                    else
-                    {
-                        temp[nCount++]= (char)nChar;
-                    }
-                }
-            }
-        }
-        nReadBytes = i;
-    }
-
-done:
-    LOGEXIT( "fread returning size_t %d\n", nReadBytes );
-    PERF_EXIT(fread);
-    return nReadBytes;
-}
-
-
-/*++
-Function :
-
-    ferror
-
-    See MSDN for more details.
---*/
-int
-_cdecl
-PAL_ferror(PAL_FILE * f)
-{
-    INT nErrorCode = PAL_FILE_NOERROR;
-
-    PERF_ENTRY(ferror);
-    ENTRY( "ferror( f=%p )\n", f );
-
-    _ASSERTE(f != NULL);
-
-    nErrorCode = ferror( f->bsdFilePtr );
-    if ( 0 == nErrorCode )
-    {
-        /* See if the PAL file error code is set. */
-        nErrorCode = f->PALferrorCode;
-    }
-
-    LOGEXIT( "ferror returns %d\n", nErrorCode );
-    PERF_EXIT(ferror);
-    return nErrorCode;
-}
-
-
-/*++
-Function :
-
-    fclose
-
-    See MSDN for more details.
---*/
-int
-_cdecl
-PAL_fclose(PAL_FILE * f)
-{
-    INT nRetVal = 0;
-
-    PERF_ENTRY(fclose);
-    ENTRY( "fclose( f=%p )\n", f );
-
-    _ASSERTE(f != NULL);
-
-    CLEARERR(f);
-
-    nRetVal = fclose( f->bsdFilePtr );
-    PAL_free( f );
-
-    LOGEXIT( "fclose returning %d\n", nRetVal );
-    PERF_EXIT(fclose);
-    return nRetVal;
-}
-
-/*++
-Function :
-
-    fputs
-
-    See MSDN for more details.
---*/
-int
-_cdecl
-PAL_fputs(const char * str,  PAL_FILE * f)
-{
-    INT nRetVal = 0;
-
-    PERF_ENTRY(fputs);
-    ENTRY( "fputs( %p (%s), %p )\n", str, str, f);
-
-    _ASSERTE(str != NULL);
-    _ASSERTE(f != NULL);
-
-    CLEARERR(f);
-
-    nRetVal = fputs( str, f->bsdFilePtr );
-
-    LOGEXIT( "fputs returning %d\n", nRetVal );
-    PERF_EXIT(fputs);
-    return nRetVal;
-}
-
-/*++
-Function :
-
-    ftell
-
-    See MSDN for more details.
---*/
-LONG
-_cdecl
-PAL_ftell(PAL_FILE * f)
-{
-    long lRetVal = 0;
-
-    PERF_ENTRY(ftell);
-    ENTRY( "ftell( %p )\n", f );
-
-    _ASSERTE(f != NULL);
-    lRetVal = ftell( f->bsdFilePtr );
-
-#ifdef HOST_64BIT
-    /* Windows does not set an error if the file pointer's position
-    is greater than _I32_MAX. It just returns -1. */
-    if (lRetVal > _I32_MAX)
-    {
-        lRetVal = -1;
-    }
-#endif
-
-    LOGEXIT( "ftell returning %ld\n", lRetVal );
-    PERF_EXIT(ftell);
-    /* This explicit cast to LONG is used to silence any potential warnings
-    due to implicitly casting the native long lRetVal to LONG when returning. */
-    return (LONG)lRetVal;
-}
-
-/*++
-Function :
-    getc
-
-    See MSDN for more details.
---*/
-int
-_cdecl
-PAL_getc(PAL_FILE * f)
-{
-    INT nRetVal = 0;
-    INT temp =0;
-
-    PERF_ENTRY(getc);
-    ENTRY( "getc( %p )\n", f );
-
-    _ASSERTE(f != NULL);
-
-    CLEARERR(f);
-
-    nRetVal = getc( f->bsdFilePtr );
-
-    if ( (f->bTextMode) && (nRetVal == '\r') )
-    {
-        if ((temp = getc( f->bsdFilePtr ))== '\n')
-        {
-            nRetVal ='\n';
-        }
-        else if (EOF == ungetc( temp, f->bsdFilePtr ))
-        {
-            ERROR("ungetc operation failed\n");
-        }
-    }
-
-    LOGEXIT( "getc returning %d\n", nRetVal );
-    PERF_EXIT(getc);
-    return nRetVal;
-}
-
-/*++
-Function :
-
-    setvbuf
-
-    See MSDN for more details.
---*/
-int
-_cdecl
-PAL_setvbuf(PAL_FILE *f, char *buf, int type, size_t size)
-{
-    INT nRetVal = 0;
-
-    PERF_ENTRY(setvbuf);
-    ENTRY( "setvbuf( %p, %p, %d, %ul )\n", f, buf, type, size);
-
-    _ASSERTE(f != NULL);
-
-    nRetVal = setvbuf(f->bsdFilePtr, buf, type, size);
-
-    LOGEXIT( "setvbuf returning %d\n", nRetVal );
-    PERF_EXIT(setvbuf);
-    return nRetVal;
-}
diff --git a/src/coreclr/pal/src/cruntime/filecrt.cpp b/src/coreclr/pal/src/cruntime/filecrt.cpp
deleted file mode 100644
index 5d2fe0e5d999..000000000000
--- a/src/coreclr/pal/src/cruntime/filecrt.cpp
+++ /dev/null
@@ -1,378 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-Module Name:
-
-    filecrt.cpp
-
-Abstract:
-
-    Implementation of the file functions in the C runtime library that
-    are Windows specific.
-
---*/
-
-#include "pal/thread.hpp"
-#include "pal/file.hpp"
-
-#include "pal/palinternal.h"
-#include "pal/dbgmsg.h"
-#include "pal/file.h"
-#include "pal/cruntime.h"
-
-#include <unistd.h>
-#include <errno.h>
-#include <sys/stat.h>
-
-#ifdef __APPLE__
-#include <sys/syscall.h>
-#endif // __APPLE__
-
-using namespace CorUnix;
-
-SET_DEFAULT_DEBUG_CHANNEL(CRT);
-
-/*++
-Function:
-    PAL_fflush
-
-See MSDN for more details.
---*/
-int
-_cdecl
-PAL_fflush( PAL_FILE *stream )
-{
-    int nRetVal = 0;
-
-    PERF_ENTRY(fflush);
-    ENTRY( "fflush( %p )\n", stream );
-
-    nRetVal = fflush(stream ? stream->bsdFilePtr : NULL);
-
-    LOGEXIT( "fflush returning %d\n", nRetVal );
-    PERF_EXIT(fflush);
-    return nRetVal;
-}
-
-
-/*++
-PAL__open
-
-Wrapper function for InternalOpen.
-
-Input parameters:
-
-szPath = pointer to a pathname of a file to be opened
-nFlags = arguments that control how the file should be accessed
-mode = file permission settings that are used only when a file is created
-
-Return value:
-    File descriptor on success, -1 on failure
---*/
-int
-__cdecl
-PAL__open(
-    const char *szPath,
-    int nFlags,
-    ...
-    )
-{
-    int nRet = -1;
-    int mode = 0;
-    va_list ap;
-
-    // If nFlags does not contain O_CREAT, the mode parameter will be ignored.
-    if (nFlags & O_CREAT)
-    {
-        va_start(ap, nFlags);
-        mode = va_arg(ap, int);
-        va_end(ap);
-    }
-
-    nRet = InternalOpen(szPath, nFlags, mode);
-    return nRet;
-}
-
-/*++
-InternalOpen
-
-Wrapper for open.
-
-Input parameters:
-
-szPath = pointer to a pathname of a file to be opened
-nFlags = arguments that control how the file should be accessed
-mode = file permission settings that are used only when a file is created
-
-Return value:
-    File descriptor on success, -1 on failure
---*/
-int
-CorUnix::InternalOpen(
-    const char *szPath,
-    int nFlags,
-    ...
-    )
-{
-    int nRet = -1;
-    int mode = 0;
-    va_list ap;
-
-    // If nFlags does not contain O_CREAT, the mode parameter will be ignored.
-    if (nFlags & O_CREAT)
-    {
-        va_start(ap, nFlags);
-        mode = va_arg(ap, int);
-        va_end(ap);
-    }
-
-    do
-    {
-#if OPEN64_IS_USED_INSTEAD_OF_OPEN
-        nRet = open64(szPath, nFlags, mode);
-#else
-        nRet = open(szPath, nFlags, mode);
-#endif
-    }
-    while ((nRet == -1) && (errno == EINTR));
-
-    return nRet;
-}
-
-
-/*++
-PAL_fgets
-
-Wrapper function for InternalFgets.
-
-Input parameters:
-
-sz = stores characters read from the given file stream
-nSize = number of characters to be read
-pf = stream to read characters from
-
-Return value:
-    Returns a pointer to the string storing the characters on success
-    and NULL on failure.
---*/
-char *
-__cdecl
-PAL_fgets(
-    char *sz,
-    int nSize,
-    PAL_FILE *pf
-    )
-{
-    char * szBuf;
-
-    PERF_ENTRY(fgets);
-    ENTRY( "fgets(sz=%p (%s) nSize=%d pf=%p)\n", sz, sz, nSize, pf);
-
-    if (pf != NULL)
-    {
-        szBuf = InternalFgets(sz, nSize, pf->bsdFilePtr, pf->bTextMode);
-    }
-    else
-    {
-        szBuf = NULL;
-    }
-
-    LOGEXIT("fgets() returns %p\n", szBuf);
-    PERF_EXIT(fgets);
-
-    return szBuf;
-}
-
-/*++
-InternalFgets
-
-Wrapper for fgets.
-
-Input parameters:
-
-sz = stores characters read from the given file stream
-nSize = number of characters to be read
-f = stream to read characters from
-fTextMode = flag that indicates if file contents are text or binary
-
-Return value:
-    Returns a pointer to the string storing the characters on success
-    and NULL on failure.
-
-Notes:
-In Unix systems, fgets() can return an error if it gets interrupted by a
-signal before reading anything, and errno is set to EINTR. When this
-happens, it is SOP to call fgets again.
---*/
-char *
-CorUnix::InternalFgets(
-    char *sz,
-    int nSize,
-    FILE *f,
-    bool fTextMode
-    )
-{
-    char *retval = NULL;
-
-    _ASSERTE(sz != NULL);
-    _ASSERTE(f != NULL);
-
-#if FILE_OPS_CHECK_FERROR_OF_PREVIOUS_CALL
-    clearerr(f);
-#endif
-
-    do
-    {
-        retval =  fgets(sz, nSize, f);
-        if (NULL==retval)
-        {
-            if (feof(f))
-            {
-                TRACE("Reached EOF\n");
-                break;
-            }
-            /* The man page suggests using ferror and feof to distinguish
-               between error and EOF, but feof and errno is sufficient.
-               Not all cases that set errno also flag ferror, so just
-               checking errno is the best solution. */
-            if (EINTR != errno)
-            {
-                WARN("got error; errno is %d (%s)\n",errno, strerror(errno));
-                break;
-            }
-            /* we ignored a EINTR error, reset the stream's error state */
-            clearerr(f);
-            TRACE("call got interrupted (EINTR), trying again\n");
-        }
-        if (fTextMode)
-        {
-            int len = strlen(sz);
-            if ((len>=2) && (sz[len-1]=='\n') && (sz[len-2]=='\r'))
-            {
-                sz[len-2]='\n';
-                sz[len-1]='\0';
-            }
-        }
-    } while(NULL == retval);
-
-    return retval;
-}
-
-/*++
-PAL_fwrite
-
-Wrapper function for InternalFwrite.
-
-Input parameters:
-
-pvBuffer = array of objects to write to the given file stream
-nSize = size of a object in bytes
-nCount = number of objects to write
-pf = stream to write characters to
-
-Return value:
-    Returns the number of objects written.
---*/
-size_t
-__cdecl
-PAL_fwrite(
-    const void *pvBuffer,
-    size_t nSize,
-    size_t nCount,
-    PAL_FILE *pf
-    )
-{
-    size_t nWrittenBytes = 0;
-
-    PERF_ENTRY(fwrite);
-    ENTRY( "fwrite( pvBuffer=%p, nSize=%d, nCount=%d, pf=%p )\n",
-           pvBuffer, nSize, nCount, pf);
-    _ASSERTE(pf != NULL);
-
-    nWrittenBytes = InternalFwrite(pvBuffer, nSize, nCount, pf->bsdFilePtr, &pf->PALferrorCode);
-
-    LOGEXIT( "fwrite returning size_t %d\n", nWrittenBytes );
-    PERF_EXIT(fwrite);
-    return nWrittenBytes;
-}
-
-/*++
-InternalFwrite
-
-Wrapper for fwrite.
-
-Input parameters:
-
-pvBuffer = array of objects to write to the given file stream
-nSize = size of a object in bytes
-nCount = number of objects to write
-f = stream to write characters to
-pnErrorCode = reference to a PAL_FILE's fwrite error code field
-
-Return value:
-    Returns the number of objects written.
---*/
-size_t
-CorUnix::InternalFwrite(
-    const void *pvBuffer,
-    size_t nSize,
-    size_t nCount,
-    FILE *f,
-    INT *pnErrorCode
-    )
-{
-    size_t nWrittenBytes = 0;
-    _ASSERTE(f != NULL);
-
-#if FILE_OPS_CHECK_FERROR_OF_PREVIOUS_CALL
-    clearerr(f);
-#endif
-
-    nWrittenBytes = fwrite(pvBuffer, nSize, nCount, f);
-
-    // Make sure no error occurred.
-    if ( nWrittenBytes < nCount )
-    {
-        // Set the FILE* error code
-        *pnErrorCode = PAL_FILE_ERROR;
-    }
-
-    return nWrittenBytes;
-}
-
-
-/*++
-PAL_fseek
-
-Wrapper function for fseek.
-
-Input parameters:
-
-pf = a given file stream
-lOffset = distance from position to set file-position indicator
-nWhence = method used to determine the file_position indicator location relative to lOffset
-
-Return value:
-    0 on success, -1 on failure.
---*/
-int
-_cdecl
-PAL_fseek(
-    PAL_FILE * pf,
-    LONG lOffset,
-    int nWhence
-    )
-{
-    int nRet = 0;
-
-    PERF_ENTRY(fseek);
-    ENTRY( "fseek( %p, %ld, %d )\n", pf, lOffset, nWhence );
-
-    nRet = fseek(pf ? pf->bsdFilePtr : NULL, lOffset, nWhence);
-
-    LOGEXIT("fseek returning %d\n", nRet);
-    PERF_EXIT(fseek);
-    return nRet;
-}
diff --git a/src/coreclr/pal/src/cruntime/malloc.cpp b/src/coreclr/pal/src/cruntime/malloc.cpp
deleted file mode 100644
index c4b3797e0b30..000000000000
--- a/src/coreclr/pal/src/cruntime/malloc.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    malloc.cpp
-
-Abstract:
-
-    Implementation of suspension safe memory allocation functions.
-
-Revision History:
-
-
-
---*/
-
-#include "pal/corunix.hpp"
-#include "pal/thread.hpp"
-#include "pal/malloc.hpp"
-#include "pal/dbgmsg.h"
-
-#include <string.h>
-
-SET_DEFAULT_DEBUG_CHANNEL(CRT);
-
-using namespace CorUnix;
-
-void *
-__cdecl
-PAL_realloc(
-    void* pvMemblock,
-    size_t szSize
-    )
-{
-    return InternalRealloc(pvMemblock, szSize);
-}
-
-void *
-CorUnix::InternalRealloc(
-    void* pvMemblock,
-    size_t szSize
-    )
-{
-    void *pvMem;
-
-    PERF_ENTRY(InternalRealloc);
-    ENTRY("realloc (memblock:%p size=%d)\n", pvMemblock, szSize);
-
-    if (szSize == 0)
-    {
-        // If pvMemblock is NULL, there's no reason to call free.
-        if (pvMemblock != NULL)
-        {
-            free(pvMemblock);
-        }
-        pvMem = NULL;
-    }
-    else
-    {
-        pvMem = realloc(pvMemblock, szSize);
-    }
-
-    LOGEXIT("realloc returns void * %p\n", pvMem);
-    PERF_EXIT(InternalRealloc);
-    return pvMem;
-}
-
-void
-__cdecl
-PAL_free(
-    void *pvMem
-    )
-{
-    free(pvMem);
-}
-
-void *
-__cdecl
-PAL_malloc(
-    size_t szSize
-    )
-{
-    return InternalMalloc(szSize);
-}
-
-void *
-CorUnix::InternalMalloc(
-    size_t szSize
-    )
-{
-    void *pvMem;
-
-    if (szSize == 0)
-    {
-        // malloc may return null for a requested size of zero bytes. Force a nonzero size to get a valid pointer.
-        szSize = 1;
-    }
-
-    pvMem = (void*)malloc(szSize);
-    return pvMem;
-}
diff --git a/src/coreclr/pal/src/cruntime/math.cpp b/src/coreclr/pal/src/cruntime/math.cpp
deleted file mode 100644
index 14243f43a79c..000000000000
--- a/src/coreclr/pal/src/cruntime/math.cpp
+++ /dev/null
@@ -1,859 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    math.cpp
-
-Abstract:
-
-    Implementation of math family functions.
-
-
-
---*/
-
-#include "pal/palinternal.h"
-#include "pal/dbgmsg.h"
-
-#include <math.h>
-
-#if HAVE_IEEEFP_H
-#include <ieeefp.h>
-#endif  // HAVE_IEEEFP_H
-
-#include <errno.h>
-
-#define PAL_NAN_DBL     sqrt(-1.0)
-#define PAL_POSINF_DBL -log(0.0)
-#define PAL_NEGINF_DBL  log(0.0)
-
-#define IS_DBL_NEGZERO(x)         (((*((INT64*)((void*)&x))) & I64(0xFFFFFFFFFFFFFFFF)) == I64(0x8000000000000000))
-
-#define PAL_NAN_FLT     sqrtf(-1.0f)
-#define PAL_POSINF_FLT -logf(0.0f)
-#define PAL_NEGINF_FLT  logf(0.0f)
-
-#define IS_FLT_NEGZERO(x)         (((*((INT32*)((void*)&x))) & 0xFFFFFFFF) == 0x80000000)
-
-SET_DEFAULT_DEBUG_CHANNEL(CRT);
-
-/*++
-Function:
-  _finite
-
-Determines whether given double-precision floating point value is finite.
-
-Return Value
-
-_finite returns a nonzero value (TRUE) if its argument x is not
-infinite, that is, if -INF < x < +INF. It returns 0 (FALSE) if the
-argument is infinite or a NaN.
-
-Parameter
-
-x  Double-precision floating-point value
-
---*/
-int __cdecl _finite(double x)
-{
-    int ret;
-    PERF_ENTRY(_finite);
-    ENTRY("_finite (x=%f)\n", x);
-
-    ret = isfinite(x);
-
-    LOGEXIT("_finite returns int %d\n", ret);
-    PERF_EXIT(_finite);
-    return ret;
-}
-
-/*++
-Function:
-  _isnan
-
-See MSDN doc
---*/
-int __cdecl _isnan(double x)
-{
-    int ret;
-    PERF_ENTRY(_isnan);
-    ENTRY("_isnan (x=%f)\n", x);
-
-    ret = isnan(x);
-
-    LOGEXIT("_isnan returns int %d\n", ret);
-    PERF_EXIT(_isnan);
-    return ret;
-}
-
-/*++
-Function:
-  _copysign
-
-See MSDN doc
---*/
-double __cdecl _copysign(double x, double y)
-{
-    double ret;
-    PERF_ENTRY(_copysign);
-    ENTRY("_copysign (x=%f, y=%f)\n", x, y);
-
-    ret = copysign(x, y);
-
-    LOGEXIT("_copysign returns double %f\n", ret);
-    PERF_EXIT(_copysign);
-    return ret;
-}
-
-/*++
-Function:
-    acos
-
-See MSDN.
---*/
-PALIMPORT double __cdecl PAL_acos(double x)
-{
-    double ret;
-    PERF_ENTRY(acos);
-    ENTRY("acos (x=%f)\n", x);
-
-#if !HAVE_COMPATIBLE_ACOS
-    errno = 0;
-#endif  // HAVE_COMPATIBLE_ACOS
-
-    ret = acos(x);
-
-#if !HAVE_COMPATIBLE_ACOS
-    if (errno == EDOM)
-    {
-        ret = PAL_NAN_DBL;  // NaN
-    }
-#endif  // HAVE_COMPATIBLE_ACOS
-
-    LOGEXIT("acos returns double %f\n", ret);
-    PERF_EXIT(acos);
-    return ret;
-}
-
-/*++
-Function:
-    asin
-
-See MSDN.
---*/
-PALIMPORT double __cdecl PAL_asin(double x)
-{
-    double ret;
-    PERF_ENTRY(asin);
-    ENTRY("asin (x=%f)\n", x);
-
-#if !HAVE_COMPATIBLE_ASIN
-    errno = 0;
-#endif  // HAVE_COMPATIBLE_ASIN
-
-    ret = asin(x);
-
-#if !HAVE_COMPATIBLE_ASIN
-    if (errno == EDOM)
-    {
-        ret = PAL_NAN_DBL;  // NaN
-    }
-#endif  // HAVE_COMPATIBLE_ASIN
-
-    LOGEXIT("asin returns double %f\n", ret);
-    PERF_EXIT(asin);
-    return ret;
-}
-
-/*++
-Function:
-    atan2
-
-See MSDN.
---*/
-PALIMPORT double __cdecl PAL_atan2(double y, double x)
-{
-    double ret;
-    PERF_ENTRY(atan2);
-    ENTRY("atan2 (y=%f, x=%f)\n", y, x);
-
-#if !HAVE_COMPATIBLE_ATAN2
-    errno = 0;
-#endif  // !HAVE_COMPATIBLE_ATAN2
-
-    ret = atan2(y, x);
-
-#if !HAVE_COMPATIBLE_ATAN2
-    if ((errno == EDOM) && (x == 0.0) && (y == 0.0))
-    {
-        const double sign_x = copysign(1.0, x);
-        const double sign_y = copysign(1.0, y);
-
-        if (sign_x > 0)
-        {
-            ret = copysign(0.0, sign_y);
-        }
-        else
-        {
-            ret = copysign(atan2(0.0, -1.0), sign_y);
-        }
-    }
-#endif  // !HAVE_COMPATIBLE_ATAN2
-
-    LOGEXIT("atan2 returns double %f\n", ret);
-    PERF_EXIT(atan2);
-    return ret;
-}
-
-/*++
-Function:
-    exp
-
-See MSDN.
---*/
-PALIMPORT double __cdecl PAL_exp(double x)
-{
-    double ret;
-    PERF_ENTRY(exp);
-    ENTRY("exp (x=%f)\n", x);
-
-#if !HAVE_COMPATIBLE_EXP
-    if (x == 1.0)
-    {
-        ret = M_E;
-    }
-    else
-    {
-#endif  // HAVE_COMPATIBLE_EXP
-
-    ret = exp(x);
-
-#if !HAVE_COMPATIBLE_EXP
-    }
-#endif // HAVE_COMPATIBLE_EXP
-
-    LOGEXIT("exp returns double %f\n", ret);
-    PERF_EXIT(exp);
-    return ret;
-}
-
-/*++
-Function:
-    ilogb
-
-See MSDN.
---*/
-PALIMPORT int __cdecl PAL_ilogb(double x)
-{
-    int ret;
-    PERF_ENTRY(ilogb);
-    ENTRY("ilogb (x=%f)\n", x);
-
-#if !HAVE_COMPATIBLE_ILOGB0
-    if (x == 0.0)
-    {
-        ret = -2147483648;
-    }
-    else
-#endif // !HAVE_COMPATIBLE_ILOGB0
-
-#if !HAVE_COMPATIBLE_ILOGBNAN
-    if (isnan(x))
-    {
-        ret = 2147483647;
-    }
-    else
-#endif // !HAVE_COMPATIBLE_ILOGBNAN
-
-    {
-        ret = ilogb(x);
-    }
-
-    LOGEXIT("ilogb returns int %d\n", ret);
-    PERF_EXIT(ilogb);
-    return ret;
-}
-
-/*++
-Function:
-    log
-
-See MSDN.
---*/
-PALIMPORT double __cdecl PAL_log(double x)
-{
-    double ret;
-    PERF_ENTRY(log);
-    ENTRY("log (x=%f)\n", x);
-
-#if !HAVE_COMPATIBLE_LOG
-    errno = 0;
-#endif  // !HAVE_COMPATIBLE_LOG
-
-    ret = log(x);
-
-#if !HAVE_COMPATIBLE_LOG
-    if ((errno == EDOM) && (x < 0))
-    {
-        ret = PAL_NAN_DBL;    // NaN
-    }
-#endif  // !HAVE_COMPATIBLE_LOG
-
-    LOGEXIT("log returns double %f\n", ret);
-    PERF_EXIT(log);
-    return ret;
-}
-
-/*++
-Function:
-    log10
-
-See MSDN.
---*/
-PALIMPORT double __cdecl PAL_log10(double x)
-{
-    double ret;
-    PERF_ENTRY(log10);
-    ENTRY("log10 (x=%f)\n", x);
-
-#if !HAVE_COMPATIBLE_LOG10
-    errno = 0;
-#endif  // !HAVE_COMPATIBLE_LOG10
-
-    ret = log10(x);
-
-#if !HAVE_COMPATIBLE_LOG10
-    if ((errno == EDOM) && (x < 0))
-    {
-        ret = PAL_NAN_DBL;    // NaN
-    }
-#endif  // !HAVE_COMPATIBLE_LOG10
-
-    LOGEXIT("log10 returns double %f\n", ret);
-    PERF_EXIT(log10);
-    return ret;
-}
-
-/*++
-Function:
-    pow
-
-See MSDN.
---*/
-PALIMPORT double __cdecl PAL_pow(double x, double y)
-{
-    double ret;
-    PERF_ENTRY(pow);
-    ENTRY("pow (x=%f, y=%f)\n", x, y);
-
-#if !HAVE_COMPATIBLE_POW
-    if ((y == PAL_POSINF_DBL) && !isnan(x))    // +Inf
-    {
-        if (x == 1.0)
-        {
-            ret = x;
-        }
-        else if (x == -1.0)
-        {
-            ret = 1.0;
-        }
-        else if ((x > -1.0) && (x < 1.0))
-        {
-            ret = 0.0;
-        }
-        else
-        {
-            ret = PAL_POSINF_DBL;    // +Inf
-        }
-    }
-    else if ((y == PAL_NEGINF_DBL) && !isnan(x))   // -Inf
-    {
-        if (x == 1.0)
-        {
-            ret = x;
-        }
-        else if (x == -1.0)
-        {
-            ret = 1.0;
-        }
-        else if ((x > -1.0) && (x < 1.0))
-        {
-            ret = PAL_POSINF_DBL;    // +Inf
-        }
-        else
-        {
-            ret = 0.0;
-        }
-    }
-    else if (IS_DBL_NEGZERO(x) && (y == -1.0))
-    {
-        ret = PAL_NEGINF_DBL;    // -Inf
-    }
-    else if ((x == 0.0) && (y < 0.0))
-    {
-        ret = PAL_POSINF_DBL;    // +Inf
-    }
-    else
-#endif  // !HAVE_COMPATIBLE_POW
-
-    ret = pow(x, y);
-
-#if !HAVE_VALID_NEGATIVE_INF_POW
-    if ((ret == PAL_POSINF_DBL) && (x < 0) && isfinite(x) && (ceil(y / 2) != floor(y / 2)))
-    {
-        ret = PAL_NEGINF_DBL;   // -Inf
-    }
-#endif  // !HAVE_VALID_NEGATIVE_INF_POW
-
-#if !HAVE_VALID_POSITIVE_INF_POW
-    /*
-    * The even/odd test in the if (this one and the one above) used to be ((long long) y % 2 == 0)
-    * on SPARC (long long) y for large y (>2**63) is always 0x7fffffff7fffffff, which
-    * is an odd number, so the test ((long long) y % 2 == 0) will always fail for
-    * large y. Since large double numbers are always even (e.g., the representation of
-    * 1E20+1 is the same as that of 1E20, the last .+1. is too insignificant to be part
-    * of the representation), this test will always return the wrong result for large y.
-    *
-    * The (ceil(y/2) == floor(y/2)) test is slower, but more robust.
-    */
-    if ((ret == PAL_NEGINF_DBL) && (x < 0) && isfinite(x) && (ceil(y / 2) == floor(y / 2)))
-    {
-        ret = PAL_POSINF_DBL;   // +Inf
-    }
-#endif  // !HAVE_VALID_POSITIVE_INF_POW
-
-    LOGEXIT("pow returns double %f\n", ret);
-    PERF_EXIT(pow);
-    return ret;
-}
-
-/*++
-Function:
-    sincos
-
-See MSDN.
---*/
-PALIMPORT void __cdecl PAL_sincos(double x, double* sin, double* cos)
-{
-    PERF_ENTRY(sincos);
-    ENTRY("sincos (x=%f)\n", x);
-
-#if defined(__APPLE__)
-    __sincos(x, sin, cos);
-#else
-    sincos(x, sin, cos);
-#endif // !__APPLE__
-
-    LOGEXIT("sincos returns (double %f, double %f)\n", *sin, *cos);
-    PERF_EXIT(sincos);
-}
-
-/*++
-Function:
-  _finitef
-
-Determines whether given single-precision floating point value is finite.
-
-Return Value
-
-_finitef returns a nonzero value (TRUE) if its argument x is not
-infinite, that is, if -INF < x < +INF. It returns 0 (FALSE) if the
-argument is infinite or a NaN.
-
-Parameter
-
-x  Single-precision floating-point value
-
---*/
-int __cdecl _finitef(float x)
-{
-    int ret;
-    PERF_ENTRY(_finitef);
-    ENTRY("_finitef (x=%f)\n", x);
-
-    ret = isfinite(x);
-
-    LOGEXIT("_finitef returns int %d\n", ret);
-    PERF_EXIT(_finitef);
-    return ret;
-}
-
-/*++
-Function:
-  _isnanf
-
-See MSDN doc
---*/
-int __cdecl _isnanf(float x)
-{
-    int ret;
-    PERF_ENTRY(_isnanf);
-    ENTRY("_isnanf (x=%f)\n", x);
-
-    ret = isnan(x);
-
-    LOGEXIT("_isnanf returns int %d\n", ret);
-    PERF_EXIT(_isnanf);
-    return ret;
-}
-
-/*++
-Function:
-  _copysignf
-
-See MSDN doc
---*/
-float __cdecl _copysignf(float x, float y)
-{
-    float ret;
-    PERF_ENTRY(_copysignf);
-    ENTRY("_copysignf (x=%f, y=%f)\n", x, y);
-
-    ret = copysign(x, y);
-
-    LOGEXIT("_copysignf returns float %f\n", ret);
-    PERF_EXIT(_copysignf);
-    return ret;
-}
-
-/*++
-Function:
-    acosf
-
-See MSDN.
---*/
-PALIMPORT float __cdecl PAL_acosf(float x)
-{
-    float ret;
-    PERF_ENTRY(acosf);
-    ENTRY("acosf (x=%f)\n", x);
-
-#if !HAVE_COMPATIBLE_ACOS
-    errno = 0;
-#endif  // HAVE_COMPATIBLE_ACOS
-
-    ret = acosf(x);
-
-#if !HAVE_COMPATIBLE_ACOS
-    if (errno == EDOM)
-    {
-        ret = PAL_NAN_FLT;  // NaN
-    }
-#endif  // HAVE_COMPATIBLE_ACOS
-
-    LOGEXIT("acosf returns float %f\n", ret);
-    PERF_EXIT(acosf);
-    return ret;
-}
-
-/*++
-Function:
-    asinf
-
-See MSDN.
---*/
-PALIMPORT float __cdecl PAL_asinf(float x)
-{
-    float ret;
-    PERF_ENTRY(asinf);
-    ENTRY("asinf (x=%f)\n", x);
-
-#if !HAVE_COMPATIBLE_ASIN
-    errno = 0;
-#endif  // HAVE_COMPATIBLE_ASIN
-
-    ret = asinf(x);
-
-#if !HAVE_COMPATIBLE_ASIN
-    if (errno == EDOM)
-    {
-        ret = PAL_NAN_FLT;  // NaN
-    }
-#endif  // HAVE_COMPATIBLE_ASIN
-
-    LOGEXIT("asinf returns float %f\n", ret);
-    PERF_EXIT(asinf);
-    return ret;
-}
-
-/*++
-Function:
-    atan2f
-
-See MSDN.
---*/
-PALIMPORT float __cdecl PAL_atan2f(float y, float x)
-{
-    float ret;
-    PERF_ENTRY(atan2f);
-    ENTRY("atan2f (y=%f, x=%f)\n", y, x);
-
-#if !HAVE_COMPATIBLE_ATAN2
-    errno = 0;
-#endif  // !HAVE_COMPATIBLE_ATAN2
-
-    ret = atan2f(y, x);
-
-#if !HAVE_COMPATIBLE_ATAN2
-    if ((errno == EDOM) && (x == 0.0f) && (y == 0.0f))
-    {
-        const float sign_x = copysign(1.0f, x);
-        const float sign_y = copysign(1.0f, y);
-
-        if (sign_x > 0)
-        {
-            ret = copysign(0.0f, sign_y);
-        }
-        else
-        {
-            ret = copysign(atan2f(0.0f, -1.0f), sign_y);
-        }
-    }
-#endif  // !HAVE_COMPATIBLE_ATAN2
-
-    LOGEXIT("atan2f returns float %f\n", ret);
-    PERF_EXIT(atan2f);
-    return ret;
-}
-
-/*++
-Function:
-    expf
-
-See MSDN.
---*/
-PALIMPORT float __cdecl PAL_expf(float x)
-{
-    float ret;
-    PERF_ENTRY(expf);
-    ENTRY("expf (x=%f)\n", x);
-
-#if !HAVE_COMPATIBLE_EXP
-    if (x == 1.0f)
-    {
-        ret = M_E;
-    }
-    else
-    {
-#endif  // HAVE_COMPATIBLE_EXP
-
-    ret = expf(x);
-
-#if !HAVE_COMPATIBLE_EXP
-    }
-#endif // HAVE_COMPATIBLE_EXP
-
-    LOGEXIT("expf returns float %f\n", ret);
-    PERF_EXIT(expf);
-    return ret;
-}
-
-/*++
-Function:
-    ilogbf
-
-See MSDN.
---*/
-PALIMPORT int __cdecl PAL_ilogbf(float x)
-{
-    int ret;
-    PERF_ENTRY(ilogbf);
-    ENTRY("ilogbf (x=%f)\n", x);
-
-#if !HAVE_COMPATIBLE_ILOGB0
-    if (x == 0.0f)
-    {
-        ret = -2147483648;
-    }
-    else
-#endif // !HAVE_COMPATIBLE_ILOGB0
-
-#if !HAVE_COMPATIBLE_ILOGBNAN
-    if (isnan(x))
-    {
-        ret = 2147483647;
-    }
-    else
-#endif // !HAVE_COMPATIBLE_ILOGBNAN
-
-    {
-        ret = ilogbf(x);
-    }
-
-    LOGEXIT("ilogbf returns int %d\n", ret);
-    PERF_EXIT(ilogbf);
-    return ret;
-}
-
-/*++
-Function:
-    logf
-
-See MSDN.
---*/
-PALIMPORT float __cdecl PAL_logf(float x)
-{
-    float ret;
-    PERF_ENTRY(logf);
-    ENTRY("logf (x=%f)\n", x);
-
-#if !HAVE_COMPATIBLE_LOG
-    errno = 0;
-#endif  // !HAVE_COMPATIBLE_LOG
-
-    ret = logf(x);
-
-#if !HAVE_COMPATIBLE_LOG
-    if ((errno == EDOM) && (x < 0))
-    {
-        ret = PAL_NAN_FLT;    // NaN
-    }
-#endif  // !HAVE_COMPATIBLE_LOG
-
-    LOGEXIT("logf returns float %f\n", ret);
-    PERF_EXIT(logf);
-    return ret;
-}
-
-/*++
-Function:
-    log10f
-
-See MSDN.
---*/
-PALIMPORT float __cdecl PAL_log10f(float x)
-{
-    float ret;
-    PERF_ENTRY(log10f);
-    ENTRY("log10f (x=%f)\n", x);
-
-#if !HAVE_COMPATIBLE_LOG10
-    errno = 0;
-#endif  // !HAVE_COMPATIBLE_LOG10
-
-    ret = log10f(x);
-
-#if !HAVE_COMPATIBLE_LOG10
-    if ((errno == EDOM) && (x < 0))
-    {
-        ret = PAL_NAN_FLT;    // NaN
-    }
-#endif  // !HAVE_COMPATIBLE_LOG10
-
-    LOGEXIT("log10f returns float %f\n", ret);
-    PERF_EXIT(log10f);
-    return ret;
-}
-
-/*++
-Function:
-    powf
-
-See MSDN.
---*/
-PALIMPORT float __cdecl PAL_powf(float x, float y)
-{
-    float ret;
-    PERF_ENTRY(powf);
-    ENTRY("powf (x=%f, y=%f)\n", x, y);
-
-#if !HAVE_COMPATIBLE_POW
-    if ((y == PAL_POSINF_FLT) && !isnan(x))    // +Inf
-    {
-        if (x == 1.0f)
-        {
-            ret = x;
-        }
-        else if (x == -1.0f)
-        {
-            ret = 1.0f;
-        }
-        else if ((x > -1.0f) && (x < 1.0f))
-        {
-            ret = 0.0f;
-        }
-        else
-        {
-            ret = PAL_POSINF_FLT;    // +Inf
-        }
-    }
-    else if ((y == PAL_NEGINF_FLT) && !isnan(x))   // -Inf
-    {
-        if (x == 1.0f)
-        {
-            ret = x;
-        }
-        else if (x == -1.0f)
-        {
-            ret = 1.0f;
-        }
-        else if ((x > -1.0f) && (x < 1.0f))
-        {
-            ret = PAL_POSINF_FLT;    // +Inf
-        }
-        else
-        {
-            ret = 0.0f;
-        }
-    }
-    else if (IS_FLT_NEGZERO(x) && (y == -1.0f))
-    {
-        ret = PAL_NEGINF_FLT;    // -Inf
-    }
-    else if ((x == 0.0f) && (y < 0.0f))
-    {
-        ret = PAL_POSINF_FLT;    // +Inf
-    }
-    else
-#endif  // !HAVE_COMPATIBLE_POW
-
-    ret = powf(x, y);
-
-#if !HAVE_VALID_NEGATIVE_INF_POW
-    if ((ret == PAL_POSINF_FLT) && (x < 0) && isfinite(x) && (ceilf(y / 2) != floorf(y / 2)))
-    {
-        ret = PAL_NEGINF_FLT;   // -Inf
-    }
-#endif  // !HAVE_VALID_NEGATIVE_INF_POW
-
-#if !HAVE_VALID_POSITIVE_INF_POW
-    /*
-    * The (ceil(y/2) == floor(y/2)) test is slower, but more robust for platforms where large y
-    * will return the wrong result for ((long) y % 2 == 0). See PAL_pow(double) above for more details.
-    */
-    if ((ret == PAL_NEGINF_FLT) && (x < 0) && isfinite(x) && (ceilf(y / 2) == floorf(y / 2)))
-    {
-        ret = PAL_POSINF_FLT;   // +Inf
-    }
-#endif  // !HAVE_VALID_POSITIVE_INF_POW
-
-    LOGEXIT("powf returns float %f\n", ret);
-    PERF_EXIT(powf);
-    return ret;
-}
-
-/*++
-Function:
-    sincosf
-
-See MSDN.
---*/
-PALIMPORT void __cdecl PAL_sincosf(float x, float* sin, float* cos)
-{
-    PERF_ENTRY(sincosf);
-    ENTRY("sincosf (x=%f)\n", x);
-
-#if defined(__APPLE__)
-    __sincosf(x, sin, cos);
-#else
-    sincosf(x, sin, cos);
-#endif // !__APPLE__
-
-    LOGEXIT("sincosf returns (float %f, float %f)\n", *sin, *cos);
-    PERF_EXIT(sincosf);
-}
diff --git a/src/coreclr/pal/src/cruntime/misc.cpp b/src/coreclr/pal/src/cruntime/misc.cpp
deleted file mode 100644
index 0820be8c2579..000000000000
--- a/src/coreclr/pal/src/cruntime/misc.cpp
+++ /dev/null
@@ -1,262 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    cruntime/misc.cpp
-
-Abstract:
-
-    Implementation of C runtime functions that don't fit anywhere else.
-
-
-
---*/
-
-#include "pal/thread.hpp"
-#include "pal/threadsusp.hpp"
-#include "pal/palinternal.h"
-#include "pal/dbgmsg.h"
-#include "pal/misc.h"
-
-#include <errno.h>
-/* <stdarg.h> needs to be included after "palinternal.h" to avoid name
-   collision for va_start and va_end */
-#include <stdarg.h>
-#include <time.h>
-#include <limits.h>
-
-#if defined(HOST_AMD64) || defined(_x86_)
-#include <xmmintrin.h>
-#endif // defined(HOST_AMD64) || defined(_x86_)
-#if defined(_DEBUG)
-#include <assert.h>
-#endif //defined(_DEBUG)
-
-SET_DEFAULT_DEBUG_CHANNEL(CRT);
-
-using namespace CorUnix;
-
-/*++
-Function:
-  _gcvt_s
-
-See MSDN doc.
---*/
-char *
-__cdecl
-_gcvt_s( char * buffer, int iSize, double value, int digits )
-{
-    PERF_ENTRY(_gcvt);
-    ENTRY( "_gcvt( value:%f digits=%d, buffer=%p )\n", value, digits, buffer );
-
-    if ( !buffer )
-    {
-        ERROR( "buffer was an invalid pointer.\n" );
-    }
-
-    switch ( digits )
-    {
-    case 7 :
-        /* Fall through */
-    case 8 :
-        /* Fall through */
-    case 15 :
-        /* Fall through */
-    case 17 :
-
-        sprintf_s( buffer, iSize, "%.*g", digits, value );
-        break;
-
-    default :
-        ASSERT( "Only the digits 7, 8, 15, and 17 are valid.\n" );
-        *buffer = '\0';
-    }
-
-    LOGEXIT( "_gcvt returns %p (%s)\n", buffer , buffer );
-    PERF_EXIT(_gcvt);
-    return buffer;
-}
-
-
-/*++
-Function :
-
-    __iscsym
-
-See MSDN for more details.
---*/
-int
-__cdecl
-__iscsym( int c )
-{
-    PERF_ENTRY(__iscsym);
-    ENTRY( "__iscsym( c=%d )\n", c );
-
-    if ( isalnum( c ) || c == '_'  )
-    {
-        LOGEXIT( "__iscsym returning 1\n" );
-        PERF_EXIT(__iscsym);
-        return 1;
-    }
-
-    LOGEXIT( "__iscsym returning 0\n" );
-    PERF_EXIT(__iscsym);
-    return 0;
-}
-
-
-/*++
-
-Function :
-
-    PAL_errno
-
-    Returns the address of the errno.
-
---*/
-int * __cdecl PAL_errno( int caller )
-{
-    int *retval;
-    PERF_ENTRY(errno);
-    ENTRY( "PAL_errno( void )\n" );
-    retval = (INT*)(&errno);
-    LOGEXIT("PAL_errno returns %p\n",retval);
-    PERF_EXIT(errno);
-    return retval;
-}
-
-
-/*++
-Function:
-
-   rand
-
-   The RAND_MAX value can vary by platform.
-
-See MSDN for more details.
---*/
-int
-__cdecl
-PAL_rand(void)
-{
-    int ret;
-    PERF_ENTRY(rand);
-    ENTRY("rand(void)\n");
-
-    ret = (rand() % (PAL_RAND_MAX + 1));
-
-    LOGEXIT("rand() returning %d\n", ret);
-    PERF_EXIT(rand);
-    return ret;
-}
-
-
-/*++
-Function:
-
-   time
-
-See MSDN for more details.
---*/
-PAL_time_t
-__cdecl
-PAL_time(PAL_time_t *tloc)
-{
-    time_t result;
-
-    PERF_ENTRY(time);
-    ENTRY( "time( tloc=%p )\n",tloc );
-
-    time_t t;
-    result = time(&t);
-    if (tloc != NULL)
-    {
-        *tloc = t;
-    }
-
-    LOGEXIT( "time returning %#lx\n",result );
-    PERF_EXIT(time);
-    return result;
-}
-
-PALIMPORT
-void __cdecl
-PAL_qsort(void *base, size_t nmemb, size_t size,
-          int (__cdecl *compar )(const void *, const void *))
-{
-    PERF_ENTRY(qsort);
-    ENTRY("qsort(base=%p, nmemb=%lu, size=%lu, compar=%p\n",
-          base,(unsigned long) nmemb,(unsigned long) size, compar);
-
-/* reset ENTRY nesting level back to zero, qsort will invoke app-defined
-   callbacks and we want their entry traces... */
-#if _ENABLE_DEBUG_MESSAGES_
-{
-    int old_level;
-    old_level = DBG_change_entrylevel(0);
-#endif /* _ENABLE_DEBUG_MESSAGES_ */
-
-    qsort(base,nmemb,size,compar);
-
-/* ...and set nesting level back to what it was */
-#if _ENABLE_DEBUG_MESSAGES_
-    DBG_change_entrylevel(old_level);
-}
-#endif /* _ENABLE_DEBUG_MESSAGES_ */
-
-    LOGEXIT("qsort returns\n");
-    PERF_EXIT(qsort);
-}
-
-PALIMPORT
-void * __cdecl
-PAL_bsearch(const void *key, const void *base, size_t nmemb, size_t size,
-            int (__cdecl *compar)(const void *, const void *))
-{
-    void *retval;
-
-    PERF_ENTRY(bsearch);
-    ENTRY("bsearch(key=%p, base=%p, nmemb=%lu, size=%lu, compar=%p\n",
-          key, base, (unsigned long) nmemb, (unsigned long) size, compar);
-
-/* reset ENTRY nesting level back to zero, bsearch will invoke app-defined
-   callbacks and we want their entry traces... */
-#if _ENABLE_DEBUG_MESSAGES_
-{
-    int old_level;
-    old_level = DBG_change_entrylevel(0);
-#endif /* _ENABLE_DEBUG_MESSAGES_ */
-
-    retval = bsearch(key,base,nmemb,size,compar);
-
-/* ...and set nesting level back to what it was */
-#if _ENABLE_DEBUG_MESSAGES_
-    DBG_change_entrylevel(old_level);
-}
-#endif /* _ENABLE_DEBUG_MESSAGES_ */
-
-    LOGEXIT("bsearch returns %p\n",retval);
-    PERF_EXIT(bsearch);
-    return retval;
-}
-
-#ifdef HOST_AMD64
-
-PALIMPORT
-unsigned int PAL__mm_getcsr(void)
-{
-    return _mm_getcsr();
-}
-
-PALIMPORT
-void PAL__mm_setcsr(unsigned int i)
-{
-    _mm_setcsr(i);
-}
-
-#endif // HOST_AMD64
diff --git a/src/coreclr/pal/src/cruntime/printfcpp.cpp b/src/coreclr/pal/src/cruntime/printfcpp.cpp
deleted file mode 100644
index b9c1c92b9bc8..000000000000
--- a/src/coreclr/pal/src/cruntime/printfcpp.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    printfcpp.cpp
-
-Abstract:
-
-    Implementation of suspension safe printf functions.
-
-Revision History:
-
-
-
---*/
-
-#include "pal/corunix.hpp"
-#include "pal/thread.hpp"
-#include "pal/malloc.hpp"
-#include "pal/file.hpp"
-#include "pal/printfcpp.hpp"
-#include "pal/palinternal.h"
-#include "pal/dbgmsg.h"
-#include "pal/cruntime.h"
-
-#include <errno.h>
-
-SET_DEFAULT_DEBUG_CHANNEL(CRT);
-
-extern "C"
-{
-
-// Forward declare functions that are in header files we can't include yet
-int vfprintf(FILE* stream, const char* format, va_list ap);
-
-/*++
-Function:
-  PAL_fprintf
-
-See MSDN doc.
---*/
-int
-__cdecl
-PAL_fprintf(PAL_FILE *stream,const char *format,...)
-{
-    LONG Length = 0;
-    va_list ap;
-
-    PERF_ENTRY(fprintf);
-    ENTRY("PAL_fprintf(stream=%p,format=%p (%s))\n",stream, format, format);
-
-    va_start(ap, format);
-    Length = vfprintf(stream->bsdFilePtr, format, ap);
-    va_end(ap);
-
-    LOGEXIT("PAL_fprintf returns int %d\n", Length);
-    PERF_EXIT(fprintf);
-    return Length;
-}
-
-/*******************************************************************************
-Function:
-  PAL_vfprintf
-
-Parameters:
-  stream
-    - out stream
-  Format
-    - format string
-  ap
-    - stdarg parameter list
-*******************************************************************************/
-
-int __cdecl PAL_vfprintf(PAL_FILE *stream, const char *format, va_list ap)
-{
-    return vfprintf(stream->bsdFilePtr, format, ap);
-}
-
-} // end extern "C"
diff --git a/src/coreclr/pal/src/cruntime/thread.cpp b/src/coreclr/pal/src/cruntime/thread.cpp
deleted file mode 100644
index 883c5d1b0019..000000000000
--- a/src/coreclr/pal/src/cruntime/thread.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    thread.c
-
-Abstract:
-
-    Implementation of the threads/process functions in the C runtime library
-    that are Windows specific.
-
-
-
---*/
-
-#include "pal/palinternal.h"
-#include "pal/dbgmsg.h"
-#include "pal/init.h"
-
-SET_DEFAULT_DEBUG_CHANNEL(CRT);
-
-void
-PAL_exit(int status)
-{
-    PERF_ENTRY(exit);
-    ENTRY ("exit(status=%d)\n", status);
-
-    /* should also clean up any resources allocated by pal/cruntime, if any */
-    ExitProcess(status);
-
-    LOGEXIT ("exit returns void");
-    PERF_EXIT(exit);
-}
diff --git a/src/coreclr/pal/src/cruntime/wchar.cpp b/src/coreclr/pal/src/cruntime/wchar.cpp
index 8635b303d578..88340538ebca 100644
--- a/src/coreclr/pal/src/cruntime/wchar.cpp
+++ b/src/coreclr/pal/src/cruntime/wchar.cpp
@@ -55,10 +55,10 @@ _wtoi(
               GetLastError());
         return -1;
     }
-    tempStr = (char *) PAL_malloc(len);
+    tempStr = (char *) malloc(len);
     if (!tempStr)
     {
-        ERROR("PAL_malloc failed\n");
+        ERROR("malloc failed\n");
         SetLastError(ERROR_NOT_ENOUGH_MEMORY);
         return -1;
     }
@@ -67,12 +67,12 @@ _wtoi(
     {
         ASSERT("WideCharToMultiByte failed.  Error is %d\n",
               GetLastError());
-        PAL_free(tempStr);
+        free(tempStr);
         return -1;
     }
     ret = atoi(tempStr);
 
-    PAL_free(tempStr);
+    free(tempStr);
     LOGEXIT("_wtoi returns int %d\n", ret);
     PERF_EXIT(_wtoi);
     return ret;
@@ -261,10 +261,10 @@ PAL_wcstoul(
         res = 0;
         goto PAL_wcstoulExit;
     }
-    s_nptr = (char *)PAL_malloc(size);
+    s_nptr = (char *)malloc(size);
     if (!s_nptr)
     {
-        ERROR("PAL_malloc failed\n");
+        ERROR("malloc failed\n");
         SetLastError(ERROR_NOT_ENOUGH_MEMORY);
         res = 0;
         goto PAL_wcstoulExit;
@@ -310,7 +310,7 @@ PAL_wcstoul(
     }
 
 PAL_wcstoulExit:
-    PAL_free(s_nptr);
+    free(s_nptr);
     LOGEXIT("wcstoul returning unsigned long %lu\n", res);
     PERF_EXIT(wcstoul);
 
@@ -351,10 +351,10 @@ PAL__wcstoui64(
         res = 0;
         goto PAL__wcstoui64Exit;
     }
-    s_nptr = (char *)PAL_malloc(size);
+    s_nptr = (char *)malloc(size);
     if (!s_nptr)
     {
-        ERROR("PAL_malloc failed\n");
+        ERROR("malloc failed\n");
         SetLastError(ERROR_NOT_ENOUGH_MEMORY);
         res = 0;
         goto PAL__wcstoui64Exit;
@@ -381,7 +381,7 @@ PAL__wcstoui64(
     }
 
 PAL__wcstoui64Exit:
-    PAL_free(s_nptr);
+    free(s_nptr);
     LOGEXIT("_wcstoui64 returning unsigned long long %llu\n", res);
     PERF_EXIT(_wcstoui64);
 
@@ -896,7 +896,7 @@ PAL_wcstod( const wchar_16 * nptr, wchar_16 **endptr )
     if ( lpEndOfExpression != lpStartOfExpression )
     {
         Length = lpEndOfExpression - lpStartOfExpression;
-        lpStringRep = (LPSTR)PAL_malloc( Length + 1);
+        lpStringRep = (LPSTR)malloc( Length + 1);
 
         if ( lpStringRep )
         {
@@ -939,8 +939,56 @@ PAL_wcstod( const wchar_16 * nptr, wchar_16 **endptr )
         *endptr = lpEndOfExpression;
     }
 
-    PAL_free( lpStringRep );
+    free( lpStringRep );
     LOGEXIT( "wcstod returning %f.\n", RetVal );
     PERF_EXIT(wcstod);
     return RetVal;
 }
+
+/*++
+Function:
+  _wfopen
+
+see MSDN doc.
+
+--*/
+extern "C"
+FILE *
+__cdecl
+_wfopen(
+    const wchar_16 *fileName,
+    const wchar_16 *mode)
+{
+    CHAR mbFileName[ _MAX_PATH ];
+    CHAR mbMode[ 10 ];
+    FILE * filePtr = NULL;
+
+    PERF_ENTRY(_wfopen);
+    ENTRY("_wfopen(fileName:%p (%S), mode:%p (%S))\n", fileName, fileName, mode, mode);
+
+    _ASSERTE(fileName != NULL);
+    _ASSERTE(mode != NULL);
+
+    /* Convert the parameters to ASCII and defer to PAL_fopen */
+    if ( WideCharToMultiByte( CP_ACP, 0, fileName, -1, mbFileName,
+                              sizeof mbFileName, NULL, NULL ) != 0 )
+    {
+        if ( WideCharToMultiByte( CP_ACP, 0, mode, -1, mbMode,
+                                  sizeof mbMode, NULL, NULL ) != 0 )
+        {
+            filePtr = fopen(mbFileName, mbMode);
+        }
+        else
+        {
+            ERROR( "An error occurred while converting mode to ANSI.\n" );
+        }
+    }
+    else
+    {
+        ERROR( "An error occurred while converting"
+               " fileName to ANSI string.\n" );
+    }
+    LOGEXIT("_wfopen returning FILE* %p\n", filePtr);
+    PERF_EXIT(_wfopen);
+    return filePtr;
+}
diff --git a/src/coreclr/pal/src/debug/debug.cpp b/src/coreclr/pal/src/debug/debug.cpp
index f0a504452c59..b38810864a58 100644
--- a/src/coreclr/pal/src/debug/debug.cpp
+++ b/src/coreclr/pal/src/debug/debug.cpp
@@ -40,6 +40,7 @@ SET_DEFAULT_DEBUG_CHANNEL(DEBUG); // some headers have code with asserts, so do
 
 #include <signal.h>
 #include <unistd.h>
+#include <fcntl.h>
 #if HAVE_PROCFS_CTL
 #include <unistd.h>
 #elif defined(HAVE_TTRACE) // HAVE_PROCFS_CTL
@@ -203,7 +204,7 @@ OutputDebugStringW(
     }
 
     /* strLen includes the null terminator */
-    if ((lpOutputStringA = (LPSTR) InternalMalloc((strLen * sizeof(CHAR)))) == NULL)
+    if ((lpOutputStringA = (LPSTR) malloc((strLen * sizeof(CHAR)))) == NULL)
     {
         ERROR("Insufficient memory available !\n");
         SetLastError(ERROR_NOT_ENOUGH_MEMORY);
diff --git a/src/coreclr/pal/src/eventprovider/dummyprovider/CMakeLists.txt b/src/coreclr/pal/src/eventprovider/dummyprovider/CMakeLists.txt
index e0105865f9ae..09986597b7c1 100644
--- a/src/coreclr/pal/src/eventprovider/dummyprovider/CMakeLists.txt
+++ b/src/coreclr/pal/src/eventprovider/dummyprovider/CMakeLists.txt
@@ -24,7 +24,6 @@ foreach(DUMMY_PROVIDER_FILE ${DUMMY_PROVIDER_OUTPUT})
   list(APPEND DUMMY_PROVIDER_SOURCES ${DUMMY_PROVIDER_FILE})
 endforeach()
 
-add_definitions(-DPAL_STDCPP_COMPAT=1)
 include_directories(${COREPAL_SOURCE_DIR}/inc/rt)
 include_directories(${CMAKE_CURRENT_BINARY_DIR}/dummy)
 
diff --git a/src/coreclr/pal/src/eventprovider/lttngprovider/CMakeLists.txt b/src/coreclr/pal/src/eventprovider/lttngprovider/CMakeLists.txt
index d116c0095ea5..40f65bf17114 100644
--- a/src/coreclr/pal/src/eventprovider/lttngprovider/CMakeLists.txt
+++ b/src/coreclr/pal/src/eventprovider/lttngprovider/CMakeLists.txt
@@ -30,7 +30,6 @@ foreach(LTTNG_PROVIDER_FILE ${LTTNG_PROVIDER_OUTPUT})
   endif()
 endforeach()
 
-add_definitions(-DPAL_STDCPP_COMPAT=1)
 include_directories(${COREPAL_SOURCE_DIR}/inc/rt)
 include_directories(${CMAKE_CURRENT_BINARY_DIR}/lttng)
 
diff --git a/src/coreclr/pal/src/exception/machexception.cpp b/src/coreclr/pal/src/exception/machexception.cpp
index 50db83248fe7..8f58cd4d627b 100644
--- a/src/coreclr/pal/src/exception/machexception.cpp
+++ b/src/coreclr/pal/src/exception/machexception.cpp
@@ -849,7 +849,7 @@ HijackFaultingThread(
     if (fIsStackOverflow)
     {
         // Allocate the minimal stack necessary for handling stack overflow
-        int stackOverflowStackSize = 7 * 4096;
+        int stackOverflowStackSize = 15 * 4096;
         // Align the size to virtual page size and add one virtual page as a stack guard
         stackOverflowStackSize = ALIGN_UP(stackOverflowStackSize, GetVirtualPageSize()) + GetVirtualPageSize();
         void* stackOverflowHandlerStack = mmap(NULL, stackOverflowStackSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
diff --git a/src/coreclr/pal/src/file/directory.cpp b/src/coreclr/pal/src/file/directory.cpp
index 4cd0600e5cf6..e06afd0b19e1 100644
--- a/src/coreclr/pal/src/file/directory.cpp
+++ b/src/coreclr/pal/src/file/directory.cpp
@@ -75,11 +75,11 @@ CreateDirectoryW(
         goto done;
     }
 
-    if (((mb_dir = (char *)PAL_malloc(mb_size)) == NULL) ||
+    if (((mb_dir = (char *)malloc(mb_size)) == NULL) ||
         (WideCharToMultiByte( CP_ACP, 0, lpPathName, -1, mb_dir, mb_size, NULL,
                               NULL) != mb_size))
     {
-        ASSERT("WideCharToMultiByte or PAL_malloc failure! LastError:%d errno:%d\n",
+        ASSERT("WideCharToMultiByte or malloc failure! LastError:%d errno:%d\n",
               GetLastError(), errno);
         dwLastError = ERROR_INTERNAL_ERROR;
         goto done;
@@ -93,7 +93,7 @@ CreateDirectoryW(
     }
     if (mb_dir != NULL)
     {
-        PAL_free(mb_dir);
+        free(mb_dir);
     }
     LOGEXIT("CreateDirectoryW returns BOOL %d\n", bRet);
     PERF_EXIT(CreateDirectoryW);
@@ -280,7 +280,7 @@ GetCurrentDirectoryA(PathCharString& lpBuffer)
 
     dwDirLen = strlen( current_dir );
     lpBuffer.Set(current_dir, dwDirLen);
-    PAL_free(current_dir);
+    free(current_dir);
 done:
 
     if ( dwLastError )
@@ -486,7 +486,7 @@ CreateDirectoryA(
     {
         SetLastError( dwLastError );
     }
-    PAL_free( unixPathName );
+    free( unixPathName );
     LOGEXIT("CreateDirectoryA returns BOOL %d\n", bRet);
     PERF_EXIT(CreateDirectoryA);
     return bRet;
diff --git a/src/coreclr/pal/src/file/file.cpp b/src/coreclr/pal/src/file/file.cpp
index a15be4dba741..1712be93f698 100644
--- a/src/coreclr/pal/src/file/file.cpp
+++ b/src/coreclr/pal/src/file/file.cpp
@@ -1529,6 +1529,52 @@ SetFileAttributesW(
     return bRet;
 }
 
+/*++
+InternalOpen
+
+Wrapper for open.
+
+Input parameters:
+
+szPath = pointer to a pathname of a file to be opened
+nFlags = arguments that control how the file should be accessed
+mode = file permission settings that are used only when a file is created
+
+Return value:
+    File descriptor on success, -1 on failure
+--*/
+int
+CorUnix::InternalOpen(
+    const char *szPath,
+    int nFlags,
+    ...
+    )
+{
+    int nRet = -1;
+    int mode = 0;
+    va_list ap;
+
+    // If nFlags does not contain O_CREAT, the mode parameter will be ignored.
+    if (nFlags & O_CREAT)
+    {
+        va_start(ap, nFlags);
+        mode = va_arg(ap, int);
+        va_end(ap);
+    }
+
+    do
+    {
+#if OPEN64_IS_USED_INSTEAD_OF_OPEN
+        nRet = open64(szPath, nFlags, mode);
+#else
+        nRet = open(szPath, nFlags, mode);
+#endif
+    }
+    while ((nRet == -1) && (errno == EINTR));
+
+    return nRet;
+}
+
 PAL_ERROR
 CorUnix::InternalWriteFile(
     CPalThread *pThread,
@@ -2835,7 +2881,7 @@ GetTempFileNameW(
         prefix_stringPS.CloseBuffer(prefix_size - 1);
     }
 
-    tempfile_name = (char*)InternalMalloc(MAX_LONGPATH);
+    tempfile_name = (char*)malloc(MAX_LONGPATH);
     if (tempfile_name == NULL)
     {
         pThread->SetLastError(ERROR_NOT_ENOUGH_MEMORY);
@@ -3547,42 +3593,3 @@ BOOL FILEInitStdHandles(void)
     pStdErr = INVALID_HANDLE_VALUE;
     return FALSE;
 }
-
-/*++
-FILECleanupStdHandles
-
-Remove all regions, locked by a file pointer, from shared memory
-
-(no parameters)
-
---*/
-void FILECleanupStdHandles(void)
-{
-    HANDLE stdin_handle;
-    HANDLE stdout_handle;
-    HANDLE stderr_handle;
-
-    TRACE("closing standard handles\n");
-    stdin_handle = pStdIn;
-    stdout_handle = pStdOut;
-    stderr_handle = pStdErr;
-
-    pStdIn = INVALID_HANDLE_VALUE;
-    pStdOut = INVALID_HANDLE_VALUE;
-    pStdErr = INVALID_HANDLE_VALUE;
-
-    if (stdin_handle != INVALID_HANDLE_VALUE)
-    {
-        CloseHandle(stdin_handle);
-    }
-
-    if (stdout_handle != INVALID_HANDLE_VALUE)
-    {
-        CloseHandle(stdout_handle);
-    }
-
-    if (stderr_handle != INVALID_HANDLE_VALUE)
-    {
-        CloseHandle(stderr_handle);
-    }
-}
diff --git a/src/coreclr/pal/src/file/find.cpp b/src/coreclr/pal/src/file/find.cpp
index b874885992f8..ead5c4335e76 100644
--- a/src/coreclr/pal/src/file/find.cpp
+++ b/src/coreclr/pal/src/file/find.cpp
@@ -138,7 +138,7 @@ FindFirstFileA(
         goto done;
     }
 
-    find_data = (find_obj *)InternalMalloc(sizeof(find_obj));
+    find_data = (find_obj *)malloc(sizeof(find_obj));
     if ( find_data == NULL )
     {
         ERROR("Unable to allocate memory for find_data\n");
diff --git a/src/coreclr/pal/src/handlemgr/handlemgr.cpp b/src/coreclr/pal/src/handlemgr/handlemgr.cpp
index 5dc198c7f5a3..09405f1ec514 100644
--- a/src/coreclr/pal/src/handlemgr/handlemgr.cpp
+++ b/src/coreclr/pal/src/handlemgr/handlemgr.cpp
@@ -51,7 +51,7 @@ CSimpleHandleManager::Initialize(
        field, with the head in the global 'm_hiFreeListStart'. */
     m_dwTableSize = m_dwTableGrowthRate;
 
-    m_rghteHandleTable = reinterpret_cast<HANDLE_TABLE_ENTRY*>(InternalMalloc((m_dwTableSize * sizeof(HANDLE_TABLE_ENTRY))));
+    m_rghteHandleTable = reinterpret_cast<HANDLE_TABLE_ENTRY*>(malloc((m_dwTableSize * sizeof(HANDLE_TABLE_ENTRY))));
     if(NULL == m_rghteHandleTable)
     {
         ERROR("Unable to create initial handle table array");
@@ -108,7 +108,7 @@ CSimpleHandleManager::AllocateHandle(
         }
 
         /* grow handle table */
-        rghteTempTable = reinterpret_cast<HANDLE_TABLE_ENTRY*>(InternalRealloc(
+        rghteTempTable = reinterpret_cast<HANDLE_TABLE_ENTRY*>(realloc(
             m_rghteHandleTable,
             (m_dwTableSize + m_dwTableGrowthRate) * sizeof(HANDLE_TABLE_ENTRY)));
 
diff --git a/src/coreclr/pal/src/include/pal/cruntime.h b/src/coreclr/pal/src/include/pal/cruntime.h
index a2ce2789c306..b09559565fd1 100644
--- a/src/coreclr/pal/src/include/pal/cruntime.h
+++ b/src/coreclr/pal/src/include/pal/cruntime.h
@@ -27,121 +27,6 @@ Module Name:
 #ifdef __cplusplus
 typedef char16_t wchar_16; // __wchar_16 (which is defined in palinternal.h) is defined as wchar_16_cpp.
 
-extern "C"
-{
-#endif // __cplusplus
-
-typedef enum
-{
-    PFF_NONE  =  0,
-    PFF_MINUS =  1,
-    PFF_POUND =  2,
-    PFF_ZERO  =  4,
-    PFF_SPACE =  8,
-    PFF_PLUS  = 16
-}PRINTF_FORMAT_FLAGS;
-
-typedef enum
-{
-    WIDTH_DEFAULT = -1,
-    WIDTH_STAR    = -2, /* e.g. "%*.10s"  */
-    WIDTH_INVALID = -3  /* e.g. "%*3.10s" */
-}WIDTH_FLAGS;
-
-typedef enum
-{
-    PRECISION_DEFAULT = -1,
-    PRECISION_STAR    = -2, /* e.g. "%10.*s"  */
-    PRECISION_DOT     = -3, /* e.g. "%10.s"   */
-    PRECISION_INVALID = -4  /* e.g. "%10.*3s" */
-}PRECISION_FLAGS;
-
-typedef enum
-{
-    PFF_PREFIX_DEFAULT  = -1,
-    PFF_PREFIX_SHORT    = 1,
-    PFF_PREFIX_LONG     = 2,
-    PFF_PREFIX_LONGLONG = 3,
-    PFF_PREFIX_LONG_W   = 4
-}PRINTF_PREFIXES;
-
-typedef enum
-{
-    PFF_TYPE_DEFAULT = -1,
-    PFF_TYPE_CHAR    = 1,
-    PFF_TYPE_STRING  = 2,
-    PFF_TYPE_WSTRING = 3,
-    PFF_TYPE_INT     = 4,
-    PFF_TYPE_P       = 5,
-    PFF_TYPE_N       = 6,
-    PFF_TYPE_FLOAT   = 7
-}PRINTF_TYPES;
-
-typedef enum
-{
-    SCANF_PREFIX_SHORT = 1,
-    SCANF_PREFIX_LONG = 2,
-    SCANF_PREFIX_LONGLONG = 3
-}SCANF_PREFIXES;
-
-typedef enum
-{
-    SCANF_TYPE_CHAR     = 1,
-    SCANF_TYPE_STRING   = 2,
-    SCANF_TYPE_INT      = 3,
-    SCANF_TYPE_N        = 4,
-    SCANF_TYPE_FLOAT    = 5,
-    SCANF_TYPE_BRACKETS = 6,
-    SCANF_TYPE_SPACE    = 7
-}SCANF_TYPES;
-
-/*++
-
-struct PAL_FILE.
-Used to mimic the behavior of windows.
-fwrite under windows can set the ferror flag,
-under BSD fwrite doesn't.
---*/
-struct _FILE
-{
-   FILE *   bsdFilePtr;     /* The BSD file to be passed to the
-                            functions needing it. */
-
-   INT      PALferrorCode;  /* The ferror code that fwrite sets,
-                            incase of error */
-
-   BOOL     bTextMode;     /* Boolean variable to denote that the
-                              fle is opened in text/binary mode*/
-#if UNGETC_NOT_RETURN_EOF
-   BOOL     bWriteOnlyMode;/* Boolean variable to denote that the
-                              fle is opened in write-only mode*/
-#endif //UNGETC_NOT_RETURN_EOF
-};
-
-enum CRT_ERROR_CODES
-{
-    PAL_FILE_NOERROR = 0,
-    PAL_FILE_ERROR
-};
-
-/* Global variables storing the std streams. Defined in cruntime/file.c. */
-extern PAL_FILE PAL_Stdout;
-extern PAL_FILE PAL_Stdin;
-extern PAL_FILE PAL_Stderr;
-
-/*++
-
-Functio:
-
-    CRTInitStdStreams.
-
-    Initializes the standard streams.
-    Returns TRUE on success, FALSE otherwise.
---*/
-BOOL CRTInitStdStreams( void );
-
-#ifdef __cplusplus
-}
 #endif // __cplusplus
 
 #endif /* _PAL_CRUNTIME_H_ */
diff --git a/src/coreclr/pal/src/include/pal/file.h b/src/coreclr/pal/src/include/pal/file.h
index 52054306cb3a..0ec765317d48 100644
--- a/src/coreclr/pal/src/include/pal/file.h
+++ b/src/coreclr/pal/src/include/pal/file.h
@@ -25,6 +25,7 @@ Revision History:
 #include "pal/stackstring.hpp"
 #include <sys/types.h>
 #include <dirent.h>
+#include <glob.h>
 
 #ifdef __cplusplus
 extern "C"
@@ -101,15 +102,6 @@ Return value:
 --*/
 BOOL FILEInitStdHandles(void);
 
-/*++
-FILECleanupStdHandles
-
-Close promary handles for stdin, stdout and stderr
-
-(no parameters, no return value)
---*/
-void FILECleanupStdHandles(void);
-
 /*++
 
 Function :
@@ -123,86 +115,6 @@ Windows behavoir.
 */
 void FILEGetProperNotFoundError( LPCSTR lpPath, LPDWORD lpErrorCode );
 
-/*++
-PAL_fflush
-
-Calls fflush
-
-Input parameters:
-
-PAL_FILE *stream = stream to be flushed.
-
-Return value:
-    0 is returned on success, otherwise EOF is returned.
---*/
-int _cdecl PAL_fflush( PAL_FILE *stream );
-
-/*++
-PAL_fgets
-
-Wrapper function for InternalFgets.
-
-Input parameters:
-
-sz = stores characters read from the given file stream
-nSize = number of characters to be read
-pf = stream to read characters from
-
-Return value:
-    Returns a pointer to the string storing the characters on success
-    and NULL on failure.
---*/
-char * __cdecl PAL_fgets(char *sz, int nSize, PAL_FILE *pf);
-
-/*++
-PAL_fwrite
-
-Wrapper function for InternalFwrite
-
-Input parameters:
-
-pvBuffer = array of objects to write to the given file stream
-nSize = size of a object in bytes
-nCount = number of objects to write
-pf = stream to write characters to
-
-Return value:
-    Returns the number of objects written.
---*/
-size_t __cdecl PAL_fwrite(const void *pvBuffer, size_t nSize, size_t nCount, PAL_FILE *pf);
-
-/*++
-PAL__open
-
-Wrapper function for InternalOpen.
-
-Input parameters:
-
-szPath = pointer to a pathname of a file to be opened
-nFlags = arguments that control how the file should be accessed
-mode = file permission settings that are used only when a file is created
-
-Return value:
-    File descriptor on success, -1 on failure
---*/
-int __cdecl PAL__open(const char *szPath, int nFlags, ...);
-
-/*++
-PAL_fseek
-
-Wrapper function for fseek
-
-Input parameters:
-
-pf = a given file stream
-lOffset = distance from position to set file-position indicator
-nWhence = method used to determine the file_position indicator location relative to lOffset
-
-Return value:
-    0 on success, -1 on failure.
---*/
-int _cdecl PAL_fseek(PAL_FILE *pf, LONG lOffset, int nWhence);
-
 #ifdef __cplusplus
 }
 #endif // __cplusplus
diff --git a/src/coreclr/pal/src/include/pal/file.hpp b/src/coreclr/pal/src/include/pal/file.hpp
index c35aa5cbeb86..9f162b70ff49 100644
--- a/src/coreclr/pal/src/include/pal/file.hpp
+++ b/src/coreclr/pal/src/include/pal/file.hpp
@@ -126,31 +126,6 @@ namespace CorUnix
         PathCharString& lpBuffer
         );
 
-    /*++
-    InternalFgets
-    Wraps fgets
-    --*/
-    char *
-    InternalFgets(
-        char *sz,
-        int nSize,
-        FILE *f,
-        bool fTextMode
-        );
-
-    /*++
-    InternalFwrite
-    Wraps fwrite
-    --*/
-    size_t
-    InternalFwrite(
-        const void *pvBuffer,
-        size_t nSize,
-        size_t nCount,
-        FILE *f,
-        INT *pnErrorCode
-        );
-
     /*++
     InternalOpen
     Wraps open
@@ -224,15 +199,6 @@ Return value:
 --*/
 BOOL FILEInitStdHandles(void);
 
-/*++
-FILECleanupStdHandles
-
-Close primary handles for stdin, stdout and stderr
-
-(no parameters, no return value)
---*/
-void FILECleanupStdHandles(void);
-
 /*++
 
 Function :
diff --git a/src/coreclr/pal/src/include/pal/malloc.hpp b/src/coreclr/pal/src/include/pal/malloc.hpp
index 4e7b96da0e22..65715fa9387a 100644
--- a/src/coreclr/pal/src/include/pal/malloc.hpp
+++ b/src/coreclr/pal/src/include/pal/malloc.hpp
@@ -26,96 +26,17 @@ Module Name:
 #include <stdlib.h>
 #include <new>
 
-extern "C"
-{
-    void *
-    __cdecl
-    PAL_realloc(
-        void* pvMemblock,
-        size_t szSize
-        );
-
-    void *
-    __cdecl
-    PAL_malloc(
-        size_t szSize
-        );
-
-    void
-    __cdecl
-    PAL_free(
-        void *pvMem
-        );
-}
-
 namespace CorUnix{
-
-    void *
-    InternalRealloc(
-        void *pvMemblock,
-        size_t szSize
-        );
-
-    void *
-    InternalMalloc(
-        size_t szSize
-        );
-
-    // Define common code for "new" style allocators below.
-#define INTERNAL_NEW_COMMON()                    \
-        T *pMem = (T*)InternalMalloc(sizeof(T)); \
-        if (pMem == NULL)                        \
-            return NULL;
-
-    // Define "new" style allocators (which allocate then call a constructor) for different numbers of
-    // constructor arguments. Added based on usage.
-
-    // Default constructor (0 args) case.
-    template<class T>
-    T* InternalNew()
+    // Define "new" style allocators (which allocate then call a constructor).
+    template<class T, class... Ts>
+    T* InternalNew(Ts... args)
     {
-        INTERNAL_NEW_COMMON();
-        return new (pMem) T();
-    }
+        T* pMem = (T*)malloc(sizeof(T));
 
-    // 1 arg case.
-    template<class T, class A1>
-    T* InternalNew(A1 arg1)
-    {
-        INTERNAL_NEW_COMMON();
-        return new (pMem) T(arg1);
-    }
-
-    // 2 args case.
-    template<class T, class A1, class A2>
-    T* InternalNew(A1 arg1, A2 arg2)
-    {
-        INTERNAL_NEW_COMMON();
-        return new (pMem) T(arg1, arg2);
-    }
-
-    // 3 args case.
-    template<class T, class A1, class A2, class A3>
-    T* InternalNew(A1 arg1, A2 arg2, A3 arg3)
-    {
-        INTERNAL_NEW_COMMON();
-        return new (pMem) T(arg1, arg2, arg3);
-    }
-
-    // 4 args case.
-    template<class T, class A1, class A2, class A3, class A4>
-    T* InternalNew(A1 arg1, A2 arg2, A3 arg3, A4 arg4)
-    {
-        INTERNAL_NEW_COMMON();
-        return new (pMem) T(arg1, arg2, arg3, arg4);
-    }
+        if (pMem == NULL)
+            return NULL;
 
-    // 5 args case.
-    template<class T, class A1, class A2, class A3, class A4, class A5>
-    T* InternalNew(A1 arg1, A2 arg2, A3 arg3, A4 arg4, A5 arg5)
-    {
-        INTERNAL_NEW_COMMON();
-        return new (pMem) T(arg1, arg2, arg3, arg4, arg5);
+        return new (pMem) T(args...);
     }
 
     template<class T> T* InternalNewArray(size_t cElements)
@@ -123,7 +44,7 @@ namespace CorUnix{
         size_t cbSize = (cElements * sizeof(T)) + sizeof(size_t);
         T *pMem;
 
-        pMem = (T*)InternalMalloc(cbSize);
+        pMem = (T*)malloc(cbSize);
 
         if (pMem == NULL)
             return NULL;
diff --git a/src/coreclr/pal/src/include/pal/misc.h b/src/coreclr/pal/src/include/pal/misc.h
index aa5b2b4852b6..ffa6448ed7d3 100644
--- a/src/coreclr/pal/src/include/pal/misc.h
+++ b/src/coreclr/pal/src/include/pal/misc.h
@@ -25,23 +25,6 @@ extern "C"
 {
 #endif // __cplusplus
 
-/*++
-Function :
-
-    PAL_rand
-
-    Calls rand and mitigates the difference between RAND_MAX
-    on Windows and FreeBSD.
---*/
-int __cdecl PAL_rand(void);
-
-/*++
-Function :
-
-    PAL_time
---*/
-PAL_time_t __cdecl PAL_time(PAL_time_t*);
-
 /*++
 Function :
     MsgBoxInitialize
diff --git a/src/coreclr/pal/src/include/pal/palinternal.h b/src/coreclr/pal/src/include/pal/palinternal.h
index 632d769bfcdf..3fa16f38cfbe 100644
--- a/src/coreclr/pal/src/include/pal/palinternal.h
+++ b/src/coreclr/pal/src/include/pal/palinternal.h
@@ -161,155 +161,6 @@ function_name() to call the system's implementation
 #include "pal_perf.h"
 #endif
 
-/* C runtime functions needed to be renamed to avoid duplicate definition
-   of those functions when including standard C header files */
-#define memcpy DUMMY_memcpy
-#define memcmp DUMMY_memcmp
-#define memset DUMMY_memset
-#define memmove DUMMY_memmove
-#define memchr DUMMY_memchr
-#define atoll DUMMY_atoll
-#define strlen DUMMY_strlen
-#define stricmp DUMMY_stricmp
-#define strstr DUMMY_strstr
-#define strcmp DUMMY_strcmp
-#define strcat DUMMY_strcat
-#define strncat DUMMY_strncat
-#define strcpy DUMMY_strcpy
-#define strncmp DUMMY_strncmp
-#define strncpy DUMMY_strncpy
-#define strchr DUMMY_strchr
-#define strrchr DUMMY_strrchr
-#define strpbrk DUMMY_strpbrk
-#define strtod DUMMY_strtod
-#define strtoul DUMMY_strtoul
-#define strtoull DUMMY_strtoull
-#define strnlen  DUMMY_strnlen
-#define strcasecmp DUMMY_strcasecmp
-#define strncasecmp DUMMY_strncasecmp
-#define strdup DUMMY_strdup
-#define strtok_r DUMMY_strtok_r
-#define tolower DUMMY_tolower
-#define toupper DUMMY_toupper
-#define isprint DUMMY_isprint
-#define isdigit DUMMY_isdigit
-#define iswalpha DUMMY_iswalpha
-#define iswdigit DUMMY_iswdigit
-#define iswupper DUMMY_iswupper
-#define towupper DUMMY_towupper
-#define towlower DUMMY_towlower
-#define iswprint DUMMY_iswprint
-#define iswspace DUMMY_iswspace
-#define iswxdigit DUMMY_iswxdigit
-#define wint_t DUMMY_wint_t
-#define srand DUMMY_srand
-#define atoi DUMMY_atoi
-#define atof DUMMY_atof
-#define size_t DUMMY_size_t
-#define time_t PAL_time_t
-#define va_list DUMMY_va_list
-#define abs DUMMY_abs
-#define llabs DUMMY_llabs
-#define ceil DUMMY_ceil
-#define cos DUMMY_cos
-#define cosh DUMMY_cosh
-#define fabs DUMMY_fabs
-#define floor DUMMY_floor
-#define fmod DUMMY_fmod
-#define modf DUMMY_modf
-#define sin DUMMY_sin
-#define sinh DUMMY_sinh
-#define sqrt DUMMY_sqrt
-#define tan DUMMY_tan
-#define tanh DUMMY_tanh
-#define trunc DUMMY_trunc
-#define ceilf DUMMY_ceilf
-#define cosf DUMMY_cosf
-#define coshf DUMMY_coshf
-#define fabsf DUMMY_fabsf
-#define floorf DUMMY_floorf
-#define fmodf DUMMY_fmodf
-#define modff DUMMY_modff
-#define sinf DUMMY_sinf
-#define sinhf DUMMY_sinhf
-#define sqrtf DUMMY_sqrtf
-#define tanf DUMMY_tanf
-#define tanhf DUMMY_tanhf
-#define truncf DUMMY_truncf
-#define remove DUMMY_remove
-#define printf DUMMY_printf
-#define vprintf DUMMY_vprintf
-
-/* RAND_MAX needed to be renamed to avoid duplicate definition when including
-   stdlib.h header files. PAL_RAND_MAX should have the same value as RAND_MAX
-   defined in pal.h  */
-#define PAL_RAND_MAX 0x7fff
-
-/* The standard headers define isspace and isxdigit as macros and functions,
-   To avoid redefinition problems, undefine those macros. */
-#ifdef isspace
-#undef isspace
-#endif
-#ifdef isxdigit
-#undef isxdigit
-#endif
-#ifdef isalpha
-#undef isalpha
-#endif
-#ifdef isalnum
-#undef isalnum
-#endif
-#define isspace DUMMY_isspace
-#define isxdigit DUMMY_isxdigit
-#define isalpha DUMMY_isalpha
-#define isalnum DUMMY_isalnum
-
-#ifdef stdin
-#undef stdin
-#endif
-#ifdef stdout
-#undef stdout
-#endif
-#ifdef stderr
-#undef stderr
-#endif
-
-#ifdef SCHAR_MIN
-#undef SCHAR_MIN
-#endif
-#ifdef SCHAR_MAX
-#undef SCHAR_MAX
-#endif
-#ifdef SHRT_MIN
-#undef SHRT_MIN
-#endif
-#ifdef SHRT_MAX
-#undef SHRT_MAX
-#endif
-#ifdef UCHAR_MAX
-#undef UCHAR_MAX
-#endif
-#ifdef USHRT_MAX
-#undef USHRT_MAX
-#endif
-#ifdef ULONG_MAX
-#undef ULONG_MAX
-#endif
-#ifdef LONG_MIN
-#undef LONG_MIN
-#endif
-#ifdef LONG_MAX
-#undef LONG_MAX
-#endif
-#ifdef RAND_MAX
-#undef RAND_MAX
-#endif
-#ifdef DBL_MAX
-#undef DBL_MAX
-#endif
-#ifdef FLT_MAX
-#undef FLT_MAX
-#endif
 #ifdef __record_type_class
 #undef __record_type_class
 #endif
@@ -317,23 +168,6 @@ function_name() to call the system's implementation
 #undef __real_type_class
 #endif
 
-// The standard headers define va_start and va_end as macros,
-// To avoid redefinition problems, undefine those macros.
-#ifdef va_start
-#undef va_start
-#endif
-#ifdef va_end
-#undef va_end
-#endif
-#ifdef va_copy
-#undef va_copy
-#endif
-
-#define ptrdiff_t PAL_ptrdiff_t
-#define intptr_t PAL_intptr_t
-#define uintptr_t PAL_uintptr_t
-#define timeval PAL_timeval
-#define FILE PAL_FILE
 
 #include "pal.h"
 #include "palprivate.h"
@@ -350,204 +184,6 @@ function_name() to call the system's implementation
 #undef _BitScanReverse64
 #endif
 
-/* pal.h defines alloca(3) as a compiler builtin.
-   Redefining it to native libc will result in undefined breakage because
-   a compiler is allowed to make assumptions about the stack and frame
-   pointers. */
-
-/* Undef all functions and types previously defined so those functions and
-   types could be mapped to the C runtime and socket implementation of the
-   native OS */
-#undef exit
-#undef memcpy
-#undef memcmp
-#undef memset
-#undef memmove
-#undef memchr
-#undef atoll
-#undef strlen
-#undef strnlen
-#undef wcsnlen
-#undef stricmp
-#undef strstr
-#undef strcmp
-#undef strcat
-#undef strncat
-#undef strcpy
-#undef strncmp
-#undef strncpy
-#undef strchr
-#undef strrchr
-#undef strpbrk
-#undef strtoul
-#undef strtoull
-#undef strcasecmp
-#undef strncasecmp
-#undef strdup
-#undef strtod
-#undef strtok_r
-#undef strdup
-#undef tolower
-#undef toupper
-#undef isprint
-#undef isdigit
-#undef isspace
-#undef iswdigit
-#undef iswxdigit
-#undef iswalpha
-#undef iswprint
-#undef isxdigit
-#undef isalpha
-#undef isalnum
-#undef iswalpha
-#undef iswdigit
-#undef iswupper
-#undef towupper
-#undef towlower
-#undef wint_t
-#undef atoi
-#undef atof
-#undef malloc
-#undef realloc
-#undef free
-#undef qsort
-#undef bsearch
-#undef time
-#undef FILE
-#undef fclose
-#undef fopen
-#undef fread
-#undef ferror
-#undef ftell
-#undef fflush
-#undef fwrite
-#undef fgets
-#undef fputs
-#undef fseek
-#undef fgetpos
-#undef fsetpos
-#undef getcwd
-#undef _flushall
-#undef setvbuf
-#undef unlink
-#undef size_t
-#undef time_t
-#undef va_list
-#undef va_start
-#undef va_end
-#undef va_copy
-#undef va_arg
-#undef stdin
-#undef stdout
-#undef stderr
-#undef abs
-#undef llabs
-#undef acos
-#undef acosh
-#undef asin
-#undef asinh
-#undef atan
-#undef atanh
-#undef atan2
-#undef cbrt
-#undef ceil
-#undef cos
-#undef cosh
-#undef exp
-#undef fabs
-#undef floor
-#undef fmod
-#undef fma
-#undef ilogb
-#undef log
-#undef log2
-#undef log10
-#undef modf
-#undef pow
-#undef sin
-#undef sincos
-#undef sinh
-#undef sqrt
-#undef tan
-#undef tanh
-#undef trunc
-#undef acosf
-#undef acoshf
-#undef asinf
-#undef asinhf
-#undef atanf
-#undef atanhf
-#undef atan2f
-#undef cbrtf
-#undef ceilf
-#undef cosf
-#undef coshf
-#undef expf
-#undef fabsf
-#undef floorf
-#undef fmodf
-#undef fmaf
-#undef ilogbf
-#undef logf
-#undef log2f
-#undef log10f
-#undef modff
-#undef powf
-#undef sinf
-#undef sincosf
-#undef sinhf
-#undef sqrtf
-#undef tanf
-#undef tanhf
-#undef truncf
-#undef rand
-#undef srand
-#undef errno
-#undef getenv
-#undef open
-#undef glob
-#undef remove
-#undef printf
-#undef vprintf
-#undef ptrdiff_t
-#undef intptr_t
-#undef uintptr_t
-#undef timeval
-
-#undef fprintf
-#undef vfprintf
-#undef iswupper
-#undef iswspace
-#undef towlower
-#undef towupper
-
-#ifdef HOST_AMD64
-#undef _mm_getcsr
-#undef _mm_setcsr
-#endif // HOST_AMD64
-
-#undef min
-#undef max
-
-#undef SCHAR_MIN
-#undef SCHAR_MAX
-#undef UCHAR_MAX
-#undef SHRT_MIN
-#undef SHRT_MAX
-#undef USHRT_MAX
-#undef LONG_MIN
-#undef LONG_MAX
-#undef ULONG_MAX
-#undef RAND_MAX
-#undef DBL_MAX
-#undef FLT_MAX
-#undef __record_type_class
-#undef __real_type_class
-
-#if HAVE_CHAR_BIT
-#undef CHAR_BIT
-#endif
-
 // We need a sigsetjmp prototype in pal.h for the SEH macros, but we
 // can't use the "real" prototype (because we don't want to define sigjmp_buf).
 // So we must rename the "real" sigsetjmp to avoid redefinition errors.
@@ -569,17 +205,6 @@ function_name() to call the system's implementation
 // https://gcc.gnu.org/ml/libstdc++/2016-01/msg00025.html
 #define _GLIBCXX_INCLUDE_NEXT_C_HEADERS 1
 
-#define _WITH_GETLINE
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <sys/types.h>
-#include <pwd.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <glob.h>
-
 #ifdef __APPLE__
 
 #undef GetCurrentThread
@@ -714,12 +339,6 @@ const char StackOverflowMessage[] = "Stack overflow.\n";
 
 #endif // __cplusplus
 
-#if __has_cpp_attribute(fallthrough)
-#define FALLTHROUGH [[fallthrough]]
-#else
-#define FALLTHROUGH
-#endif
-
 DWORD PALAPI GetCurrentSessionId();
 
 #endif /* _PAL_INTERNAL_H_ */
diff --git a/src/coreclr/pal/src/include/pal/printfcpp.hpp b/src/coreclr/pal/src/include/pal/printfcpp.hpp
deleted file mode 100644
index 44526cb8a4ea..000000000000
--- a/src/coreclr/pal/src/include/pal/printfcpp.hpp
+++ /dev/null
@@ -1,46 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    pal/printfcpp.hpp
-
-Abstract:
-    Declarations for suspension safe memory allocation functions
-
-
-
---*/
-
-#ifndef _PRINTFCPP_HPP
-#define _PRINTFCPP_HPP
-
-#ifdef __cplusplus
-#include "pal/threadinfo.hpp"
-#endif
-
-#include <stdarg.h>
-
-#ifdef __cplusplus
-typedef char16_t wchar_16; // __wchar_16_cpp (which is defined in palinternal.h) needs to be redefined to wchar_16.
-extern "C"
-{
-#endif // __cplusplus
-
-    int
-    __cdecl
-    PAL_vfprintf(
-        PAL_FILE *stream,
-        const char *format,
-        va_list ap);
-
-#ifdef __cplusplus
-}
-#endif // __cplusplus
-
-#endif // _PRINTFCPP_HPP
-
diff --git a/src/coreclr/pal/src/include/pal/process.h b/src/coreclr/pal/src/include/pal/process.h
index 5b0cd0739ed5..1c48093af219 100644
--- a/src/coreclr/pal/src/include/pal/process.h
+++ b/src/coreclr/pal/src/include/pal/process.h
@@ -155,7 +155,10 @@ BOOL PROCAbortInitialize();
 
   Does not return
 --*/
+#if !defined(HOST_ARM)  // PAL_NORETURN produces broken unwinding information for this method
+                        // making crash dumps impossible to analyze
 PAL_NORETURN
+#endif
 VOID PROCAbort(int signal = SIGABRT, siginfo_t* siginfo = nullptr);
 
 /*++
diff --git a/src/coreclr/pal/src/include/pal/stackstring.hpp b/src/coreclr/pal/src/include/pal/stackstring.hpp
index 4a27a15579c7..22e79a571502 100644
--- a/src/coreclr/pal/src/include/pal/stackstring.hpp
+++ b/src/coreclr/pal/src/include/pal/stackstring.hpp
@@ -21,7 +21,7 @@ class StackString
     void DeleteBuffer()
     {
         if (m_innerBuffer != m_buffer)
-            PAL_free(m_buffer);
+            free(m_buffer);
 
         m_buffer = NULL;
         return;
@@ -44,7 +44,7 @@ class StackString
             m_buffer = NULL;
         }
 
-        T * newBuffer = (T *)PAL_realloc(m_buffer, (count_allocated + 1) * sizeof(T));
+        T * newBuffer = (T *)realloc(m_buffer, (count_allocated + 1) * sizeof(T));
         if (NULL == newBuffer)
         {
             SetLastError(ERROR_NOT_ENOUGH_MEMORY);
diff --git a/src/coreclr/pal/src/include/pal/threadsusp.hpp b/src/coreclr/pal/src/include/pal/threadsusp.hpp
index c3e59df89d79..d9441372a5a2 100644
--- a/src/coreclr/pal/src/include/pal/threadsusp.hpp
+++ b/src/coreclr/pal/src/include/pal/threadsusp.hpp
@@ -32,7 +32,6 @@ Module Name:
 // instantiation time.
 #include "pal/threadinfo.hpp"
 #include "pal/thread.hpp"
-#include "pal/printfcpp.hpp"
 #include "pal/mutex.hpp"
 #include "pal/init.h"
 #if !HAVE_MACH_EXCEPTIONS
diff --git a/src/coreclr/pal/src/include/pal/utils.h b/src/coreclr/pal/src/include/pal/utils.h
index fdd5b3b965a1..980cdf56ab6c 100644
--- a/src/coreclr/pal/src/include/pal/utils.h
+++ b/src/coreclr/pal/src/include/pal/utils.h
@@ -194,7 +194,7 @@ class StringHolder
         StringHolder() : data(NULL) { }
         ~StringHolder()
         {
-            PAL_free( data);
+            free( data);
         }
 
         operator LPSTR () { return data;}
diff --git a/src/coreclr/pal/src/init/pal.cpp b/src/coreclr/pal/src/init/pal.cpp
index c88febfc2616..67fcbb92bd25 100644
--- a/src/coreclr/pal/src/init/pal.cpp
+++ b/src/coreclr/pal/src/init/pal.cpp
@@ -96,13 +96,6 @@ int CacheLineSize;
 
 using namespace CorUnix;
 
-//
-// $$TODO The C++ compiler doesn't like pal/cruntime.h so duplicate the
-// necessary prototype here
-//
-
-extern "C" BOOL CRTInitStdStreams(void);
-
 extern bool g_running_in_exe;
 
 #if defined(HOST_ARM64)
@@ -288,6 +281,14 @@ InitializeDefaultStackSize()
         }
     }
 
+#ifdef HOST_APPLE
+    // Match Windows stack size
+    if (g_defaultStackSize == 0)
+    {
+        g_defaultStackSize = 1536 * 1024;
+    }
+#endif
+
 #ifdef ENSURE_PRIMARY_STACK_SIZE
     if (g_defaultStackSize == 0)
     {
@@ -665,13 +666,6 @@ Initialize(
             }
         }
 
-        if (FALSE == CRTInitStdStreams())
-        {
-            ERROR("Unable to initialize CRT standard streams\n");
-            palError = ERROR_PALINIT_STD_STREAMS;
-            goto CLEANUP15;
-        }
-
         TRACE("First-time PAL initialization complete.\n");
         init_count++;
 
@@ -691,9 +685,6 @@ Initialize(
     }
     goto done;
 
-    /* No cleanup required for CRTInitStdStreams */
-CLEANUP15:
-    FILECleanupStdHandles();
 CLEANUP14:
     SEHCleanup();
 CLEANUP13:
@@ -1179,7 +1170,7 @@ static LPWSTR INIT_FormatCommandLine (int argc, const char * const *argv)
         length+=3;
         length+=strlen(argv[i])*2;
     }
-    command_line = reinterpret_cast<LPSTR>(InternalMalloc(length));
+    command_line = reinterpret_cast<LPSTR>(malloc(length != 0 ? length : 1));
 
     if(!command_line)
     {
@@ -1231,7 +1222,7 @@ static LPWSTR INIT_FormatCommandLine (int argc, const char * const *argv)
         return nullptr;
     }
 
-    retval = reinterpret_cast<LPWSTR>(InternalMalloc((sizeof(WCHAR)*i)));
+    retval = reinterpret_cast<LPWSTR>(malloc((sizeof(WCHAR)*i)));
     if(retval == nullptr)
     {
         ERROR("can't allocate memory for Unicode command line!\n");
@@ -1287,7 +1278,7 @@ static LPWSTR INIT_GetCurrentEXEPath()
         return nullptr;
     }
 
-    return_value = reinterpret_cast<LPWSTR>(InternalMalloc((return_size*sizeof(WCHAR))));
+    return_value = reinterpret_cast<LPWSTR>(malloc((return_size*sizeof(WCHAR))));
     if (nullptr == return_value)
     {
         ERROR("Not enough memory to create full path\n");
diff --git a/src/coreclr/pal/src/loader/module.cpp b/src/coreclr/pal/src/loader/module.cpp
index f0651d3bad58..0cda5045e01e 100644
--- a/src/coreclr/pal/src/loader/module.cpp
+++ b/src/coreclr/pal/src/loader/module.cpp
@@ -1548,7 +1548,7 @@ static MODSTRUCT *LOADAllocModule(NATIVE_LIBRARY_HANDLE dl_handle, LPCSTR name)
     LPWSTR wide_name;
 
     /* no match found : try to create a new module structure */
-    module = (MODSTRUCT *)InternalMalloc(sizeof(MODSTRUCT));
+    module = (MODSTRUCT *)malloc(sizeof(MODSTRUCT));
     if (nullptr == module)
     {
         ERROR("malloc() failed! errno is %d (%s)\n", errno, strerror(errno));
@@ -1805,11 +1805,11 @@ MODSTRUCT *LOADGetPalLibrary()
         if (g_szCoreCLRPath == nullptr)
         {
             size_t  cbszCoreCLRPath = strlen(info.dli_fname) + 1;
-            g_szCoreCLRPath = (char*) InternalMalloc(cbszCoreCLRPath);
+            g_szCoreCLRPath = (char*) malloc(cbszCoreCLRPath);
 
             if (g_szCoreCLRPath == nullptr)
             {
-                ERROR("LOADGetPalLibrary: InternalMalloc failed!");
+                ERROR("LOADGetPalLibrary: malloc failed!");
                 goto exit;
             }
 
diff --git a/src/coreclr/pal/src/locale/unicode.cpp b/src/coreclr/pal/src/locale/unicode.cpp
index 8bfa58608e59..862f98a6b212 100644
--- a/src/coreclr/pal/src/locale/unicode.cpp
+++ b/src/coreclr/pal/src/locale/unicode.cpp
@@ -109,9 +109,9 @@ BOOL GetUnicodeData(INT nUnicodeValue, UnicodeDataRec *pDataRec)
     return bRet;
 }
 
-wchar_16
+char16_t
 __cdecl
-PAL_ToUpperInvariant( wchar_16 c )
+PAL_ToUpperInvariant( char16_t c )
 {
     UnicodeDataRec dataRec;
 
@@ -140,9 +140,9 @@ PAL_ToUpperInvariant( wchar_16 c )
     }
 }
 
-wchar_16
+char16_t
 __cdecl
-PAL_ToLowerInvariant( wchar_16 c )
+PAL_ToLowerInvariant( char16_t c )
 {
     UnicodeDataRec dataRec;
 
diff --git a/src/coreclr/pal/src/map/map.cpp b/src/coreclr/pal/src/map/map.cpp
index 707284b58fad..4f8cb6190c6d 100644
--- a/src/coreclr/pal/src/map/map.cpp
+++ b/src/coreclr/pal/src/map/map.cpp
@@ -35,6 +35,7 @@ Module Name:
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <unistd.h>
+#include <fcntl.h>
 #include <errno.h>
 
 #include "rt/ntimage.h"
@@ -1128,7 +1129,7 @@ CorUnix::InternalMapViewOfFile(
         // the global list.
         //
 
-        PMAPPED_VIEW_LIST pNewView = (PMAPPED_VIEW_LIST)InternalMalloc(sizeof(*pNewView));
+        PMAPPED_VIEW_LIST pNewView = (PMAPPED_VIEW_LIST)malloc(sizeof(*pNewView));
         if (NULL != pNewView)
         {
             pNewView->lpAddress = pvBaseAddress;
@@ -1832,7 +1833,7 @@ static PMAPPED_VIEW_LIST FindSharedMappingReplacement(
                 /* The new desired mapping is fully contained in the
                    one just found: we can reuse this one */
 
-                pNewView = (PMAPPED_VIEW_LIST)InternalMalloc(sizeof(MAPPED_VIEW_LIST));
+                pNewView = (PMAPPED_VIEW_LIST)malloc(sizeof(MAPPED_VIEW_LIST));
                 if (pNewView)
                 {
                     memcpy(pNewView, pView, sizeof(*pNewView));
@@ -1867,7 +1868,7 @@ static NativeMapHolder * NewNativeMapHolder(CPalThread *pThread, LPVOID address,
     }
 
     pThisMapHolder =
-        (NativeMapHolder *)InternalMalloc(sizeof(NativeMapHolder));
+        (NativeMapHolder *)malloc(sizeof(NativeMapHolder));
 
     if (pThisMapHolder)
     {
@@ -1933,7 +1934,7 @@ MAPRecordMapping(
 
     PAL_ERROR palError = NO_ERROR;
     PMAPPED_VIEW_LIST pNewView;
-    pNewView = (PMAPPED_VIEW_LIST)InternalMalloc(sizeof(*pNewView));
+    pNewView = (PMAPPED_VIEW_LIST)malloc(sizeof(*pNewView));
     if (NULL != pNewView)
     {
         pNewView->lpAddress = addr;
diff --git a/src/coreclr/pal/src/map/virtual.cpp b/src/coreclr/pal/src/map/virtual.cpp
index 364f3bba1f02..3145faac5f50 100644
--- a/src/coreclr/pal/src/map/virtual.cpp
+++ b/src/coreclr/pal/src/map/virtual.cpp
@@ -401,7 +401,7 @@ static BOOL VIRTUALStoreAllocationInfo(
         return FALSE;
     }
 
-    if (!(pNewEntry = (PCMI)InternalMalloc(sizeof(*pNewEntry))))
+    if (!(pNewEntry = (PCMI)malloc(sizeof(*pNewEntry))))
     {
         ERROR( "Unable to allocate memory for the structure.\n");
         return FALSE;
diff --git a/src/coreclr/pal/src/misc/cgroup.cpp b/src/coreclr/pal/src/misc/cgroup.cpp
index ee3c0ae58439..ecdbccf2ee66 100644
--- a/src/coreclr/pal/src/misc/cgroup.cpp
+++ b/src/coreclr/pal/src/misc/cgroup.cpp
@@ -28,7 +28,6 @@ SET_DEFAULT_DEBUG_CHANNEL(MISC);
 #endif
 
 #define CGROUP2_SUPER_MAGIC 0x63677270
-#define TMPFS_MAGIC 0x01021994
 
 #define BASE_TEN 10
 
@@ -55,7 +54,7 @@ class CGroup
 
     static void Cleanup()
     {
-        PAL_free(s_cpu_cgroup_path);
+        free(s_cpu_cgroup_path);
     }
 
     static bool GetCpuLimit(UINT *val)
@@ -94,12 +93,16 @@ class CGroup
         if (result != 0)
             return 0;
 
-        switch (stats.f_type)
+        if (stats.f_type == CGROUP2_SUPER_MAGIC)
         {
-            case TMPFS_MAGIC: return 1;
-            case CGROUP2_SUPER_MAGIC: return 2;
-            default:
-                return 0;
+            return 2;
+        }
+        else
+        {
+            // Assume that if /sys/fs/cgroup exists and the file system type is not cgroup2fs,
+            // it is cgroup v1. Typically the file system type is tmpfs, but other values have
+            // been seen in the wild.
+            return 1;
         }
 #endif
     }
@@ -126,7 +129,7 @@ class CGroup
 
         len = strlen(hierarchy_mount);
         len += strlen(cgroup_path_relative_to_mount);
-        cgroup_path = (char*)PAL_malloc(len+1);
+        cgroup_path = (char*)malloc(len+1);
         if (cgroup_path == nullptr)
            goto done;
 
@@ -157,8 +160,8 @@ class CGroup
         strcat_s(cgroup_path, len+1, cgroup_path_relative_to_mount + common_path_prefix_len);
 
     done:
-        PAL_free(hierarchy_root);
-        PAL_free(cgroup_path_relative_to_mount);
+        free(hierarchy_root);
+        free(cgroup_path_relative_to_mount);
         *pcgroup_path = cgroup_path;
         if (pcgroup_hierarchy_mount != nullptr)
         {
@@ -166,7 +169,7 @@ class CGroup
         }
         else
         {
-            PAL_free(hierarchy_mount);
+            free(hierarchy_mount);
         }
     }
 
@@ -187,14 +190,14 @@ class CGroup
         {
             if (filesystemType == nullptr || lineLen > maxLineLen)
             {
-                PAL_free(filesystemType);
+                free(filesystemType);
                 filesystemType = nullptr;
-                PAL_free(options);
+                free(options);
                 options = nullptr;
-                filesystemType = (char*)PAL_malloc(lineLen+1);
+                filesystemType = (char*)malloc(lineLen+1);
                 if (filesystemType == nullptr)
                     goto done;
-                options = (char*)PAL_malloc(lineLen+1);
+                options = (char*)malloc(lineLen+1);
                 if (options == nullptr)
                     goto done;
                 maxLineLen = lineLen;
@@ -227,10 +230,10 @@ class CGroup
                 }
                 if (isSubsystemMatch)
                 {
-                    mountpath = (char*)PAL_malloc(lineLen+1);
+                    mountpath = (char*)malloc(lineLen+1);
                     if (mountpath == nullptr)
                         goto done;
-                    mountroot = (char*)PAL_malloc(lineLen+1);
+                    mountroot = (char*)malloc(lineLen+1);
                     if (mountroot == nullptr)
                         goto done;
 
@@ -249,10 +252,10 @@ class CGroup
             }
         }
     done:
-        PAL_free(mountpath);
-        PAL_free(mountroot);
-        PAL_free(filesystemType);
-        PAL_free(options);
+        free(mountpath);
+        free(mountroot);
+        free(filesystemType);
+        free(options);
         free(line);
         if (mountinfofile)
             fclose(mountinfofile);
@@ -275,14 +278,14 @@ class CGroup
         {
             if (subsystem_list == nullptr || lineLen > maxLineLen)
             {
-                PAL_free(subsystem_list);
+                free(subsystem_list);
                 subsystem_list = nullptr;
-                PAL_free(cgroup_path);
+                free(cgroup_path);
                 cgroup_path = nullptr;
-                subsystem_list = (char*)PAL_malloc(lineLen+1);
+                subsystem_list = (char*)malloc(lineLen+1);
                 if (subsystem_list == nullptr)
                     goto done;
-                cgroup_path = (char*)PAL_malloc(lineLen+1);
+                cgroup_path = (char*)malloc(lineLen+1);
                 if (cgroup_path == nullptr)
                     goto done;
                 maxLineLen = lineLen;
@@ -332,10 +335,10 @@ class CGroup
             }
         }
     done:
-        PAL_free(subsystem_list);
+        free(subsystem_list);
         if (!result)
         {
-            PAL_free(cgroup_path);
+            free(cgroup_path);
             cgroup_path = nullptr;
         }
         free(line);
diff --git a/src/coreclr/pal/src/misc/environ.cpp b/src/coreclr/pal/src/misc/environ.cpp
index a31d6b177760..4980d213fa3b 100644
--- a/src/coreclr/pal/src/misc/environ.cpp
+++ b/src/coreclr/pal/src/misc/environ.cpp
@@ -183,7 +183,7 @@ GetEnvironmentVariableW(
         goto done;
     }
 
-    inBuff = (CHAR *)PAL_malloc(inBuffSize);
+    inBuff = (CHAR *)malloc(inBuffSize);
     if (inBuff == nullptr)
     {
         ERROR("malloc failed\n");
@@ -193,7 +193,7 @@ GetEnvironmentVariableW(
 
     if (nSize)
     {
-        outBuff = (CHAR *)PAL_malloc(nSize*2);
+        outBuff = (CHAR *)malloc(nSize*2);
         if (outBuff == nullptr)
         {
             ERROR("malloc failed\n");
@@ -243,8 +243,8 @@ GetEnvironmentVariableW(
     }
 
 done:
-    PAL_free(outBuff);
-    PAL_free(inBuff);
+    free(outBuff);
+    free(inBuff);
 
     LOGEXIT("GetEnvironmentVariableW returns DWORD 0x%x\n", size);
     PERF_EXIT(GetEnvironmentVariableW);
@@ -310,7 +310,7 @@ SetEnvironmentVariableW(
         goto done;
     }
 
-    name = (PCHAR)PAL_malloc(sizeof(CHAR)* nameSize);
+    name = (PCHAR)malloc(sizeof(CHAR)* nameSize);
     if (name == nullptr)
     {
         ERROR("malloc failed\n");
@@ -336,7 +336,7 @@ SetEnvironmentVariableW(
             goto done;
         }
 
-        value = (PCHAR)PAL_malloc(sizeof(CHAR)*valueSize);
+        value = (PCHAR)malloc(sizeof(CHAR)*valueSize);
 
         if (value == nullptr)
         {
@@ -356,8 +356,8 @@ SetEnvironmentVariableW(
 
     bRet = SetEnvironmentVariableA(name, value);
 done:
-    PAL_free(value);
-    PAL_free(name);
+    free(value);
+    free(name);
 
     LOGEXIT("SetEnvironmentVariableW returning BOOL %d\n", bRet);
     PERF_EXIT(SetEnvironmentVariableW);
@@ -414,7 +414,7 @@ GetEnvironmentStringsW(
         envNum += len;
     }
 
-    wenviron = (WCHAR *)PAL_malloc(sizeof(WCHAR)* (envNum + 1));
+    wenviron = (WCHAR *)malloc(sizeof(WCHAR)* (envNum + 1));
     if (wenviron == nullptr)
     {
         ERROR("malloc failed\n");
@@ -476,7 +476,7 @@ FreeEnvironmentStringsW(
 
     if (lpValue != nullptr)
     {
-        PAL_free(lpValue);
+        free(lpValue);
     }
 
     LOGEXIT("FreeEnvironmentStringW returning BOOL TRUE\n");
@@ -559,7 +559,7 @@ SetEnvironmentVariableA(
     {
         // All the conditions are met. Set the variable.
         int iLen = strlen(lpName) + strlen(lpValue) + 2;
-        LPSTR string = (LPSTR) PAL_malloc(iLen);
+        LPSTR string = (LPSTR) malloc(iLen);
         if (string == nullptr)
         {
             bRet = FALSE;
@@ -571,7 +571,7 @@ SetEnvironmentVariableA(
         sprintf_s(string, iLen, "%s=%s", lpName, lpValue);
         nResult = EnvironPutenv(string, FALSE) ? 0 : -1;
 
-        PAL_free(string);
+        free(string);
         string = nullptr;
 
         // If EnvironPutenv returns FALSE, it almost certainly failed to allocate memory.
diff --git a/src/coreclr/pal/src/misc/fmtmessage.cpp b/src/coreclr/pal/src/misc/fmtmessage.cpp
index bd9a649a3583..0598914b06cb 100644
--- a/src/coreclr/pal/src/misc/fmtmessage.cpp
+++ b/src/coreclr/pal/src/misc/fmtmessage.cpp
@@ -25,8 +25,6 @@ Revision History:
 #include "pal/module.h"
 #include "pal/misc.h"
 
-#include "pal/printfcpp.hpp"
-
 #include "errorstrings.h"
 
 #include <stdarg.h>
@@ -63,7 +61,7 @@ static LPWSTR FMTMSG_GetMessageString( DWORD dwErrCode )
         allocChars = MAX_ERROR_STRING_LENGTH + 1;
     }
 
-    LPWSTR lpRetVal = (LPWSTR)PAL_malloc(allocChars * sizeof(WCHAR));
+    LPWSTR lpRetVal = (LPWSTR)malloc(allocChars * sizeof(WCHAR));
 
     if (lpRetVal)
     {
@@ -142,7 +140,7 @@ static INT FMTMSG__watoi( LPWSTR str )
         UINT NumOfBytes = 0; \
         nSize *= 2; \
         NumOfBytes = nSize * sizeof( WCHAR ); \
-        lpTemp = static_cast<WCHAR *>( PAL_malloc( NumOfBytes ) ); \
+        lpTemp = static_cast<WCHAR *>( malloc( NumOfBytes ) ); \
         TRACE( "Growing the buffer.\n" );\
         \
         if ( !lpTemp ) \
@@ -329,7 +327,7 @@ FormatMessageW(
     }
 
     lpWorkingString = static_cast<WCHAR *>(
-        PAL_malloc( nSize * sizeof( WCHAR ) ) );
+        malloc( nSize * sizeof( WCHAR ) ) );
     if ( !lpWorkingString )
     {
         ERROR( "Unable to allocate memory for the working string.\n" );
diff --git a/src/coreclr/pal/src/misc/miscpalapi.cpp b/src/coreclr/pal/src/misc/miscpalapi.cpp
index 06129210ef9c..f0d32f0388e8 100644
--- a/src/coreclr/pal/src/misc/miscpalapi.cpp
+++ b/src/coreclr/pal/src/misc/miscpalapi.cpp
@@ -29,6 +29,7 @@ Revision History:
 
 #include <errno.h>
 #include <unistd.h>
+#include <fcntl.h>
 #include <time.h>
 #include <pthread.h>
 #include <dlfcn.h>
diff --git a/src/coreclr/pal/src/misc/perfjitdump.cpp b/src/coreclr/pal/src/misc/perfjitdump.cpp
index 50b0f2c6dadc..6223d533ac7f 100644
--- a/src/coreclr/pal/src/misc/perfjitdump.cpp
+++ b/src/coreclr/pal/src/misc/perfjitdump.cpp
@@ -2,15 +2,15 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // ===========================================================================
 
-#if defined(__linux__)
-#define JITDUMP_SUPPORTED
-#endif
-
 #include "pal/palinternal.h"
 #include "pal/dbgmsg.h"
 
 #include <cstddef>
 
+#if defined(__linux__) || defined(__APPLE__)
+#define JITDUMP_SUPPORTED
+#endif
+
 #ifdef JITDUMP_SUPPORTED
 
 #include <fcntl.h>
@@ -61,24 +61,11 @@ namespace
         JIT_CODE_LOAD = 0,
     };
 
-    uint64_t GetTimeStampNS()
+    static uint64_t GetTimeStampNS()
     {
-#if HAVE_CLOCK_MONOTONIC
-        struct timespec ts;
-        int result = clock_gettime(CLOCK_MONOTONIC, &ts);
-
-        if (result != 0)
-        {
-            ASSERT("clock_gettime(CLOCK_MONOTONIC) failed: %d\n", result);
-            return 0;
-        }
-        else
-        {
-            return  ts.tv_sec * 1000000000ULL + ts.tv_nsec;
-        }
-#else
-    #error "The PAL jitdump requires clock_gettime(CLOCK_MONOTONIC) to be supported."
-#endif
+        LARGE_INTEGER result;
+        QueryPerformanceCounter(&result);
+        return result.QuadPart;
     }
 
     struct FileHeader
@@ -115,7 +102,7 @@ namespace
     {
         JitCodeLoadRecord() :
             pid(getpid()),
-            tid(syscall(SYS_gettid))
+            tid((uint32_t)PlatformGetCurrentThreadId())
         {
             header.id = JIT_CODE_LOAD;
             header.timestamp = GetTimeStampNS();
@@ -170,6 +157,19 @@ struct PerfJitDumpState
     {
         int result = 0;
 
+        // On platforms where JITDUMP is used, the PAL QueryPerformanceFrequency
+        // returns tccSecondsToNanoSeconds, meaning QueryPerformanceCounter
+        // will return a direct nanosecond value. If this isn't true,
+        // then some other method will need to be used to implement GetTimeStampNS.
+        // Validate this is true once in Start here.
+        LARGE_INTEGER freq;
+        QueryPerformanceFrequency(&freq);
+        if (freq.QuadPart != tccSecondsToNanoSeconds)
+        {
+            _ASSERTE(!"QueryPerformanceFrequency does not return tccSecondsToNanoSeconds. Implement JITDUMP GetTimeStampNS directly for this platform.\n");
+            FatalError();
+        }
+
         // Write file header
         FileHeader header;
 
@@ -203,12 +203,18 @@ struct PerfJitDumpState
         if (result == -1)
             return FatalError();
 
+#if !defined(__APPLE__)
         // mmap jitdump file
-        // this is a marker for perf inject to find the jitdumpfile
+        // this is a marker for perf inject to find the jitdumpfile on linux.
+        // On OSX, samply and others hook open and mmap is not needed. It also fails on OSX,
+        // likely because of PROT_EXEC and hardened runtime
         mmapAddr = mmap(nullptr, sizeof(FileHeader), PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0);
 
         if (mmapAddr == MAP_FAILED)
             return FatalError();
+#else
+        mmapAddr = NULL;
+#endif
 
         enabled = true;
 
@@ -308,16 +314,13 @@ struct PerfJitDumpState
         {
             enabled = false;
 
-            if (result != 0)
-                return FatalError();
-
-            if (!enabled)
-                goto exit;
-
-            result = munmap(mmapAddr, sizeof(FileHeader));
+            if (mmapAddr != NULL)
+            {
+                result = munmap(mmapAddr, sizeof(FileHeader));
 
-            if (result == -1)
-                return FatalError();
+                if (result == -1)
+                    return FatalError();
+            }
 
             mmapAddr = MAP_FAILED;
 
@@ -333,7 +336,7 @@ struct PerfJitDumpState
 
             fd = -1;
         }
-exit:
+
         return 0;
     }
 };
diff --git a/src/coreclr/pal/src/misc/perftrace.cpp b/src/coreclr/pal/src/misc/perftrace.cpp
index 5dd0d38209d1..9419005099f4 100644
--- a/src/coreclr/pal/src/misc/perftrace.cpp
+++ b/src/coreclr/pal/src/misc/perftrace.cpp
@@ -92,12 +92,9 @@ typedef struct _pal_perf_program_info
     char    start_time[32]; /*  must be at least 26 characters */
 } pal_perf_program_info;
 
-typedef PAL_FILE PERF_FILE;
-#define PERF_FILEFN(x) PAL_ ## x
-
 static ULONGLONG PERFGetTicks();
 static double PERFComputeStandardDeviation(pal_perf_api_info *api);
-static void PERFPrintProgramHeaderInfo(PERF_FILE * hFile, BOOL completedExecution);
+static void PERFPrintProgramHeaderInfo(FILE * hFile, BOOL completedExecution);
 static BOOL PERFInitProgramInfo(LPWSTR command_line, LPWSTR exe_path);
 static BOOL PERFReadSetting( );
 static void PERFLogFileName(PathCharString * destFileString, const char *fileName, const char *suffix, int max_length);
@@ -212,7 +209,7 @@ PERFComputeStandardDeviation(pal_perf_api_info *api)
 
 static
 void
-PERFPrintProgramHeaderInfo(PERF_FILE * hFile, BOOL completedExecution)
+PERFPrintProgramHeaderInfo(FILE * hFile, BOOL completedExecution)
 {
     ULONGLONG etime = 0;
     ULONGLONG ttime = 0;
@@ -222,11 +219,11 @@ PERFPrintProgramHeaderInfo(PERF_FILE * hFile, BOOL completedExecution)
        ttime = program_info.total_duration;
        ptime = program_info.pal_duration;
     }
-    PERF_FILEFN(fprintf)(hFile,"#LOG\tversion=1.00\n");
+    fprintf(hFile,"#LOG\tversion=1.00\n");
 
-    PERF_FILEFN(fprintf)(hFile, "#MACHINE\thostname=%s\tcpu_clock_frequency=%g\n", program_info.hostname,
+    fprintf(hFile, "#MACHINE\thostname=%s\tcpu_clock_frequency=%g\n", program_info.hostname,
         program_info.cpu_clock_frequency);
-    PERF_FILEFN(fprintf)(hFile, "#PROCESS\tprocess_id=%d\ttotal_latency=" LLFORMAT "\tthread_times=" LLFORMAT "\tpal_time=" LLFORMAT "\texe_path=%s\tcommand_line=%s\tstart_time=%s",
+    fprintf(hFile, "#PROCESS\tprocess_id=%d\ttotal_latency=" LLFORMAT "\tthread_times=" LLFORMAT "\tpal_time=" LLFORMAT "\texe_path=%s\tcommand_line=%s\tstart_time=%s",
         program_info.process_id, etime, ttime, ptime,
         program_info.exe_path,program_info.command_line,program_info.start_time);
 }
@@ -324,7 +321,7 @@ PERFInitialize(LPWSTR command_line, LPWSTR exe_path)
 
     if( ret == TRUE )
     {
-        pal_function_map = (char*)PAL_malloc(PAL_API_NUMBER);
+        pal_function_map = (char*)malloc(PAL_API_NUMBER);
         if(pal_function_map != NULL)
         {
             bRead = PERFReadSetting( );  // we don't quit even we failed to read the file.
@@ -358,7 +355,7 @@ void PERFTerminate(  )
 
     PERFlushAllLogs();
     pthread_key_delete(PERF_tlsTableKey );
-    PAL_free(pal_function_map);
+    free(pal_function_map);
 }
 
 
@@ -379,21 +376,21 @@ BOOL PERFAllocThreadInfo(  )
         memory resources could be exhausted. If this ever becomes a problem, the memory allocated
         per thread should be freed when a thread exits. */
 
-    node = ( pal_thread_list_node * )PAL_malloc(sizeof(pal_thread_list_node));
+    node = ( pal_thread_list_node * )malloc(sizeof(pal_thread_list_node));
     if(node == NULL)
     {
         ret = FALSE;
         goto PERFAllocThreadInfoExit;
     }
 
-    local_info = (pal_perf_thread_info *)PAL_malloc(sizeof(pal_perf_thread_info));
+    local_info = (pal_perf_thread_info *)malloc(sizeof(pal_perf_thread_info));
     if (local_info == NULL)
     {
         ret = FALSE;
         goto PERFAllocThreadInfoExit;
     }
 
-    apiTable = (pal_perf_api_info *)PAL_malloc( PAL_API_NUMBER *  sizeof(pal_perf_api_info));
+    apiTable = (pal_perf_api_info *)malloc( PAL_API_NUMBER *  sizeof(pal_perf_api_info));
     if (apiTable == NULL)
     {
         ret = FALSE;
@@ -414,7 +411,7 @@ BOOL PERFAllocThreadInfo(  )
         apiTable[i].sum_of_square_duration = 0.0;
         if (pal_perf_histogram_size > 0)
         {
-            apiTable[i].histograms = (DWORD *)PAL_malloc(pal_perf_histogram_size*sizeof(DWORD));
+            apiTable[i].histograms = (DWORD *)malloc(pal_perf_histogram_size*sizeof(DWORD));
             if (apiTable[i].histograms == NULL)
             {
                 ret = FALSE;
@@ -428,7 +425,7 @@ BOOL PERFAllocThreadInfo(  )
         }
     }
 
-    log_buf = (char * )PAL_malloc( PAL_PERF_PROFILE_BUFFER_SIZE );
+    log_buf = (char * )malloc( PAL_PERF_PROFILE_BUFFER_SIZE );
 
     if(log_buf == NULL)
     {
@@ -457,11 +454,11 @@ BOOL PERFAllocThreadInfo(  )
     {
         if (node != NULL)
         {
-            PAL_free(node);
+            free(node);
         }
         if (local_info != NULL)
         {
-            PAL_free(local_info);
+            free(local_info);
         }
         if (apiTable != NULL)
         {
@@ -469,14 +466,14 @@ BOOL PERFAllocThreadInfo(  )
             {
                 if (apiTable[i].histograms != NULL)
                 {
-                    PAL_free(apiTable[i].histograms);
+                    free(apiTable[i].histograms);
                 }
             }
-            PAL_free(apiTable);
+            free(apiTable);
         }
         if (log_buf != NULL)
         {
-            PAL_free(log_buf);
+            free(log_buf);
         }
     }
     return ret;
@@ -557,26 +554,26 @@ PERFlushAllLogs( )
                 PERFUpdateProgramInfo(current->thread_info);
                 if (table1->histograms != NULL)
                 {
-                    PAL_free(table1->histograms);
+                    free(table1->histograms);
                 }
-                PAL_free(table1);
+                free(table1);
             }
             PERFFlushLog(current->thread_info, FALSE);
-            PAL_free(current->thread_info->pal_write_buf);
-            PAL_free(current->thread_info);
+            free(current->thread_info->pal_write_buf);
+            free(current->thread_info);
         }
-        PAL_free(current);
+        free(current);
     }
     PERFWriteCounters(table0);
     if (table0->histograms != NULL)
     {
-        PAL_free(table0->histograms);
+        free(table0->histograms);
     }
-    PAL_free(table0);
+    free(table0);
     PERFFlushLog(node->thread_info, FALSE);
-    PAL_free(node->thread_info->pal_write_buf);
-    PAL_free(node->thread_info);
-    PAL_free(node);
+    free(node->thread_info->pal_write_buf);
+    free(node->thread_info);
+    free(node);
 }
 
 static
@@ -619,11 +616,11 @@ PERFWriteCounters( pal_perf_api_info * table )
     off = table;
 
     PERFLogFileName(fileName, profile_summary_log_name, "_perf_summary.log");
-    hFile = PERF_FILEFN(fopen)(fileName, "a+");
+    hFile = fopen(fileName, "a+");
     if(hFile != NULL)
     {
         PERFPrintProgramHeaderInfo(hFile, TRUE);
-        PERF_FILEFN(fprintf)(hFile,"#api_name\tapi_id\tperf_entries\tperf_exits\tsum_of_latency\tmin_latency\tmax_latency\tstd_dev_latency\tsum_of_square_latency\n");
+        fprintf(hFile,"#api_name\tapi_id\tperf_entries\tperf_exits\tsum_of_latency\tmin_latency\tmax_latency\tstd_dev_latency\tsum_of_square_latency\n");
         for(i=0;i<PAL_API_NUMBER;i++)
         {
             double dev;
@@ -641,7 +638,7 @@ PERFWriteCounters( pal_perf_api_info * table )
 
             if (off->counter > 0 || !report_only_called_apis)
             {
-                PERF_FILEFN(fprintf)(hFile,"%s\t%d\t" LLFORMAT "\t" LLFORMAT "\t" LLFORMAT "\t" LLFORMAT "\t" LLFORMAT "\t%g\t%g\n",
+                fprintf(hFile,"%s\t%d\t" LLFORMAT "\t" LLFORMAT "\t" LLFORMAT "\t" LLFORMAT "\t" LLFORMAT "\t%g\t%g\n",
                     API_list[i], i, off->entries, off->counter,off->sum_duration,
                     min_duration, off->max_duration, dev, off->sum_of_square_duration);
             }
@@ -653,36 +650,36 @@ PERFWriteCounters( pal_perf_api_info * table )
     {
         return -1;
     }
-    PERF_FILEFN(fclose)(hFile);
+    fclose(hFile);
 
     if (pal_perf_histogram_size > 0)
     {
         off = table;
         PERFLogFileName(fileName, profile_summary_log_name, "_perf_summary.hist");
-        hFile = PERF_FILEFN(fopen)(fileName, "a+");
+        hFile = fopen(fileName, "a+");
 
         if (hFile != NULL)
         {
             DWORD j;
-            PERF_FILEFN(fprintf)(hFile,"#api_name\tapi_id");
+            fprintf(hFile,"#api_name\tapi_id");
             for (j = 0; j < pal_perf_histogram_size; j++)
             {
-                PERF_FILEFN(fprintf)(hFile, "\t%d", j*pal_perf_histogram_step);
+                fprintf(hFile, "\t%d", j*pal_perf_histogram_step);
             }
-            PERF_FILEFN(fprintf)(hFile, "\n");
+            fprintf(hFile, "\n");
 
             for(i = 0; i < PAL_API_NUMBER; i++)
             {
                 if (off->counter > 0)
                 {
-                    PERF_FILEFN(fprintf)(hFile,"%s\t%d", API_list[i], i);
+                    fprintf(hFile,"%s\t%d", API_list[i], i);
 
                     for (j = 0; j < pal_perf_histogram_size; j++)
                     {
-                        PERF_FILEFN(fprintf)(hFile, "\t%d", off->histograms[j]);
+                        fprintf(hFile, "\t%d", off->histograms[j]);
                     }
 
-                    PERF_FILEFN(fprintf)(hFile, "\n");
+                    fprintf(hFile, "\n");
                 }
 
                 off++;
@@ -692,7 +689,7 @@ PERFWriteCounters( pal_perf_api_info * table )
         {
             return -1;
         }
-        PERF_FILEFN(fclose)(hFile);
+        fclose(hFile);
     }
 
     return 0;
@@ -993,7 +990,7 @@ PERFFlushLog(pal_perf_thread_info * local_info, BOOL output_header)
 
     PERFLogFileName(fileName, profile_time_log_name, "_perf_time.log");
 
-    hFile = PERF_FILEFN(fopen)(fileName, "a+");
+    hFile = fopen(fileName, "a+");
 
     if(hFile)
     {
@@ -1003,7 +1000,7 @@ PERFFlushLog(pal_perf_thread_info * local_info, BOOL output_header)
         }
         if (local_info->buf_offset > 0)
         {
-            nWrittenBytes = PERF_FILEFN(fwrite)(local_info->pal_write_buf, local_info->buf_offset, 1, hFile);
+            nWrittenBytes = fwrite(local_info->pal_write_buf, local_info->buf_offset, 1, hFile);
             if (nWrittenBytes < 1)
             {
                 ERROR("fwrite() failed with errno == %d\n", errno);
@@ -1011,7 +1008,7 @@ PERFFlushLog(pal_perf_thread_info * local_info, BOOL output_header)
             }
             local_info->buf_offset = 0;
         }
-        PERF_FILEFN(fclose)(hFile);
+        fclose(hFile);
         ret = TRUE;
     }
 
diff --git a/src/coreclr/pal/src/misc/strutil.cpp b/src/coreclr/pal/src/misc/strutil.cpp
index ed29831232ca..e665e22b6529 100644
--- a/src/coreclr/pal/src/misc/strutil.cpp
+++ b/src/coreclr/pal/src/misc/strutil.cpp
@@ -53,7 +53,7 @@ CPalString::CopyString(
         _ASSERTE(psSource->GetMaxLength() > psSource->GetStringLength());
 
         WCHAR *pwsz = reinterpret_cast<WCHAR*>(
-            InternalMalloc(psSource->GetMaxLength() * sizeof(WCHAR))
+            malloc(psSource->GetMaxLength() * sizeof(WCHAR))
             );
 
         if (NULL != pwsz)
diff --git a/src/coreclr/pal/src/misc/utils.cpp b/src/coreclr/pal/src/misc/utils.cpp
index 0d96cc991305..261be25bcaba 100644
--- a/src/coreclr/pal/src/misc/utils.cpp
+++ b/src/coreclr/pal/src/misc/utils.cpp
@@ -190,7 +190,7 @@ LPSTR UTIL_WCToMB_Alloc(LPCWSTR lpWideCharStr, int cchWideChar)
     }
 
     /* allocate required buffer */
-    lpMultiByteStr = (LPSTR)PAL_malloc(length);
+    lpMultiByteStr = (LPSTR)malloc(length);
     if(NULL == lpMultiByteStr)
     {
         ERROR("malloc() failed! errno is %d (%s)\n", errno,strerror(errno));
@@ -204,7 +204,7 @@ LPSTR UTIL_WCToMB_Alloc(LPCWSTR lpWideCharStr, int cchWideChar)
     if(0 == length)
     {
         ASSERT("WCToMB error; GetLastError returns %#x\n", GetLastError());
-        PAL_free(lpMultiByteStr);
+        free(lpMultiByteStr);
         return NULL;
     }
     return lpMultiByteStr;
@@ -250,7 +250,7 @@ LPWSTR UTIL_MBToWC_Alloc(LPCSTR lpMultiByteStr, int cbMultiByte)
         return NULL;
     }
 
-    lpWideCharStr = (LPWSTR)PAL_malloc(fullsize);
+    lpWideCharStr = (LPWSTR)malloc(fullsize);
     if(NULL == lpWideCharStr)
     {
         ERROR("malloc() failed! errno is %d (%s)\n", errno,strerror(errno));
@@ -264,7 +264,7 @@ LPWSTR UTIL_MBToWC_Alloc(LPCSTR lpMultiByteStr, int cbMultiByte)
     if(0 >= length)
     {
         ASSERT("MCToMB error; GetLastError returns %#x\n", GetLastError());
-        PAL_free(lpWideCharStr);
+        free(lpWideCharStr);
         return NULL;
     }
     return lpWideCharStr;
diff --git a/src/coreclr/pal/src/objmgr/palobjbase.cpp b/src/coreclr/pal/src/objmgr/palobjbase.cpp
index dbfdf3b0c715..c39b5df7e268 100644
--- a/src/coreclr/pal/src/objmgr/palobjbase.cpp
+++ b/src/coreclr/pal/src/objmgr/palobjbase.cpp
@@ -58,7 +58,7 @@ CPalObjectBase::Initialize(
 
     if (0 != m_pot->GetImmutableDataSize())
     {
-        m_pvImmutableData = InternalMalloc(m_pot->GetImmutableDataSize());
+        m_pvImmutableData = malloc(m_pot->GetImmutableDataSize());
         if (NULL != m_pvImmutableData)
         {
             ZeroMemory(m_pvImmutableData, m_pot->GetImmutableDataSize());
@@ -80,7 +80,7 @@ CPalObjectBase::Initialize(
             goto InitializeExit;
         }
 
-        m_pvLocalData = InternalMalloc(m_pot->GetProcessLocalDataSize());
+        m_pvLocalData = malloc(m_pot->GetProcessLocalDataSize());
         if (NULL != m_pvLocalData)
         {
             ZeroMemory(m_pvLocalData, m_pot->GetProcessLocalDataSize());
diff --git a/src/coreclr/pal/src/objmgr/shmobject.cpp b/src/coreclr/pal/src/objmgr/shmobject.cpp
index 55b0e87c088a..282dd113da2e 100644
--- a/src/coreclr/pal/src/objmgr/shmobject.cpp
+++ b/src/coreclr/pal/src/objmgr/shmobject.cpp
@@ -119,7 +119,7 @@ CSharedMemoryObject::Initialize(
             // Allocate local memory to hold the shared data
             //
 
-            m_pvSharedData = InternalMalloc(m_pot->GetSharedDataSize());
+            m_pvSharedData = malloc(m_pot->GetSharedDataSize());
             if (NULL == m_pvSharedData)
             {
                 ERROR("Failure allocating m_pvSharedData (local copy)\n");
diff --git a/src/coreclr/pal/src/safecrt/input.inl b/src/coreclr/pal/src/safecrt/input.inl
index 9934eeb33f54..556fafa6f6b4 100644
--- a/src/coreclr/pal/src/safecrt/input.inl
+++ b/src/coreclr/pal/src/safecrt/input.inl
@@ -46,9 +46,9 @@
 
 #define _istspace(x)    isspace((unsigned char)x)
 
-#define _malloc_crt PAL_malloc
-#define _realloc_crt PAL_realloc
-#define _free_crt PAL_free
+#define _malloc_crt malloc
+#define _realloc_crt realloc
+#define _free_crt free
 
 #define _FASSIGN(flag, argument, number, dec_point, locale) _safecrt_fassign((flag), (argument), (number))
 #define _WFASSIGN(flag, argument, number, dec_point, locale) _safecrt_wfassign((flag), (argument), (number))
diff --git a/src/coreclr/pal/src/safecrt/vsprintf.cpp b/src/coreclr/pal/src/safecrt/vsprintf.cpp
index b8ff745f563c..360222d5dc67 100644
--- a/src/coreclr/pal/src/safecrt/vsprintf.cpp
+++ b/src/coreclr/pal/src/safecrt/vsprintf.cpp
@@ -95,7 +95,7 @@ DLLEXPORT int __cdecl _vsnprintf_s (
         retvalue = vsnprintf(string, sizeInBytes, format, ap);
         string[sizeInBytes - 1] = '\0';
         /* we allow truncation if count == _TRUNCATE */
-        if (retvalue > (int)sizeInBytes && count == _TRUNCATE)
+        if (retvalue >= (int)sizeInBytes && count == _TRUNCATE)
         {
             if (errno == ERANGE)
             {
diff --git a/src/coreclr/pal/src/sharedmemory/sharedmemory.cpp b/src/coreclr/pal/src/sharedmemory/sharedmemory.cpp
index ea5aae444dad..ba9447b889c3 100644
--- a/src/coreclr/pal/src/sharedmemory/sharedmemory.cpp
+++ b/src/coreclr/pal/src/sharedmemory/sharedmemory.cpp
@@ -139,7 +139,7 @@ const UINT64 SharedMemoryHelpers::InvalidSharedThreadId = static_cast<UINT64>(-1
 
 void *SharedMemoryHelpers::Alloc(SIZE_T byteCount)
 {
-    void *buffer = InternalMalloc(byteCount);
+    void *buffer = malloc(byteCount != 0 ? byteCount : 1);
     if (buffer == nullptr)
     {
         throw SharedMemoryException(static_cast<DWORD>(SharedMemoryError::OutOfMemory));
diff --git a/src/coreclr/pal/src/synchmgr/synchmanager.cpp b/src/coreclr/pal/src/synchmgr/synchmanager.cpp
index c34aa4e27fc8..c6b0b3db1cfd 100644
--- a/src/coreclr/pal/src/synchmgr/synchmanager.cpp
+++ b/src/coreclr/pal/src/synchmgr/synchmanager.cpp
@@ -28,6 +28,7 @@ SET_DEFAULT_DEBUG_CHANNEL(SYNC); // some headers have code with asserts, so do t
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <fcntl.h>
 #include <limits.h>
 #include <sched.h>
 #include <signal.h>
diff --git a/src/coreclr/pal/src/synchmgr/synchmanager.hpp b/src/coreclr/pal/src/synchmgr/synchmanager.hpp
index 925b896e7e57..ce325f75ecc1 100644
--- a/src/coreclr/pal/src/synchmgr/synchmanager.hpp
+++ b/src/coreclr/pal/src/synchmgr/synchmanager.hpp
@@ -496,7 +496,7 @@ namespace CorUnix
     class CPalSynchronizationManager : public IPalSynchronizationManager
     {
         friend class CPalSynchMgrController;
-        template <class T> friend T *CorUnix::InternalNew();
+        template <class T, class... Ts> friend T *CorUnix::InternalNew(Ts... args);
 
     public:
         // types
diff --git a/src/coreclr/pal/src/thread/process.cpp b/src/coreclr/pal/src/thread/process.cpp
index b23eab001cca..033996645cb5 100644
--- a/src/coreclr/pal/src/thread/process.cpp
+++ b/src/coreclr/pal/src/thread/process.cpp
@@ -44,6 +44,7 @@ SET_DEFAULT_DEBUG_CHANNEL(PROCESS); // some headers have code with asserts, so d
 #endif  // HAVE_POLL
 
 #include <unistd.h>
+#include <fcntl.h>
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -84,6 +85,7 @@ SET_DEFAULT_DEBUG_CHANNEL(PROCESS); // some headers have code with asserts, so d
 
 #ifdef __APPLE__
 #include <libproc.h>
+#include <pwd.h>
 #include <sys/sysctl.h>
 #include <sys/posix_sem.h>
 #include <mach/task.h>
@@ -732,7 +734,7 @@ CorUnix::InternalCreateProcess(
             }
         }
         EnvironmentEntries++;
-        EnvironmentArray = (char **)InternalMalloc(EnvironmentEntries * sizeof(char *));
+        EnvironmentArray = (char **)malloc(EnvironmentEntries * sizeof(char *));
 
         EnvironmentEntries = 0;
         // Convert the environment block to array of strings
@@ -1271,7 +1273,7 @@ RaiseFailFastException(
     ENTRY("RaiseFailFastException");
 
     TerminateCurrentProcessNoExit(TRUE);
-    PROCAbort();
+    for (;;) PROCAbort();
 
     LOGEXIT("RaiseFailFastException");
     PERF_EXIT(RaiseFailFastException);
@@ -2026,7 +2028,7 @@ PROCNotifyProcessShutdownDestructor()
 char*
 PROCFormatInt(ULONG32 value)
 {
-    char* buffer = (char*)InternalMalloc(128);
+    char* buffer = (char*)malloc(128);
     if (buffer != nullptr)
     {
         if (sprintf_s(buffer, 128, "%d", value) == -1)
@@ -2048,7 +2050,7 @@ PROCFormatInt(ULONG32 value)
 char*
 PROCFormatInt64(ULONG64 value)
 {
-    char* buffer = (char*)InternalMalloc(128);
+    char* buffer = (char*)malloc(128);
     if (buffer != nullptr)
     {
         if (sprintf_s(buffer, 128, "%lld", value) == -1)
@@ -2087,7 +2089,7 @@ PROCBuildCreateDumpCommandLine(
     }
     const char* DumpGeneratorName = "createdump";
     int programLen = strlen(g_szCoreCLRPath) + strlen(DumpGeneratorName) + 1;
-    char* program = *pprogram = (char*)InternalMalloc(programLen);
+    char* program = *pprogram = (char*)malloc(programLen);
     if (program == nullptr)
     {
         return FALSE;
@@ -2541,7 +2543,9 @@ PROCCreateCrashDumpIfEnabled(int signal, siginfo_t* siginfo, bool serialize)
 
   Does not return
 --*/
+#if !defined(HOST_ARM)
 PAL_NORETURN
+#endif
 VOID
 PROCAbort(int signal, siginfo_t* siginfo)
 {
@@ -2831,7 +2835,7 @@ CorUnix::InitializeProcessCommandLine(
         size_t n = PAL_wcslen(lpwstrFullPath) + 1;
 
         size_t iLen = n;
-        initial_dir = reinterpret_cast<LPWSTR>(InternalMalloc(iLen*sizeof(WCHAR)));
+        initial_dir = reinterpret_cast<LPWSTR>(malloc(iLen*sizeof(WCHAR)));
         if (NULL == initial_dir)
         {
             ERROR("malloc() failed! (initial_dir) \n");
@@ -3758,7 +3762,7 @@ buildArgv(
     pThread = InternalGetCurrentThread();
     /* make sure to allocate enough space, up for the worst case scenario */
     int iLength = (iWlen + lpAppPath.GetCount() + 2);
-    lpAsciiCmdLine = (char *) InternalMalloc(iLength);
+    lpAsciiCmdLine = (char *) malloc(iLength);
 
     if (lpAsciiCmdLine == NULL)
     {
@@ -3938,7 +3942,7 @@ buildArgv(
 
     /* allocate lppargv according to the number of arguments
        in the command line */
-    lppArgv = (char **) InternalMalloc((((*pnArg)+1) * sizeof(char *)));
+    lppArgv = (char **) malloc((((*pnArg)+1) * sizeof(char *)));
 
     if (lppArgv == NULL)
     {
diff --git a/src/coreclr/pal/src/thread/thread.cpp b/src/coreclr/pal/src/thread/thread.cpp
index 9420a442c1f6..d388521da160 100644
--- a/src/coreclr/pal/src/thread/thread.cpp
+++ b/src/coreclr/pal/src/thread/thread.cpp
@@ -1564,7 +1564,7 @@ CorUnix::InternalSetThreadDescription(
         goto InternalSetThreadDescriptionExit;
     }
 
-    nameBuf = (char *)PAL_malloc(nameSize);
+    nameBuf = (char *)malloc(nameSize);
     if (nameBuf == NULL)
     {
         palError = ERROR_OUTOFMEMORY;
@@ -1616,7 +1616,7 @@ CorUnix::InternalSetThreadDescription(
     }
 
     if (NULL != nameBuf) {
-        PAL_free(nameBuf);
+        free(nameBuf);
     }
 
 #endif //defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__)
diff --git a/src/coreclr/pal/src/thread/threadsusp.cpp b/src/coreclr/pal/src/thread/threadsusp.cpp
index d2fae05c4256..867f46b3fa38 100644
--- a/src/coreclr/pal/src/thread/threadsusp.cpp
+++ b/src/coreclr/pal/src/thread/threadsusp.cpp
@@ -29,6 +29,7 @@ Revision History:
 
 #include <pthread.h>
 #include <unistd.h>
+#include <fcntl.h>
 #include <errno.h>
 #include <stddef.h>
 #include <sys/stat.h>
diff --git a/src/coreclr/pal/tests/palsuite/CMakeLists.txt b/src/coreclr/pal/tests/palsuite/CMakeLists.txt
index f878009c7b64..3d5dc9a74908 100644
--- a/src/coreclr/pal/tests/palsuite/CMakeLists.txt
+++ b/src/coreclr/pal/tests/palsuite/CMakeLists.txt
@@ -63,71 +63,8 @@ add_executable_clr(paltests
   #composite/threading/threadsuspension_switchthread/threadsuspension.cpp
   #composite/wfmo/main.cpp
   #composite/wfmo/mutex.cpp
-  c_runtime/abs/test1/abs.cpp
-  c_runtime/acos/test1/test1.cpp
-  c_runtime/acosf/test1/test1.cpp
-  c_runtime/acosh/test1/test1.cpp
-  c_runtime/acoshf/test1/test1.cpp
-  c_runtime/asin/test1/test1.cpp
-  c_runtime/asinf/test1/test1.cpp
-  c_runtime/asinh/test1/test1.cpp
-  c_runtime/asinhf/test1/test1.cpp
-  c_runtime/atan/test1/test1.cpp
-  c_runtime/atan2/test1/test1.cpp
-  c_runtime/atan2f/test1/test1.cpp
-  c_runtime/atanf/test1/test1.cpp
-  c_runtime/atanh/test1/test1.cpp
-  c_runtime/atanhf/test1/test1.cpp
   c_runtime/atof/test1/test1.cpp
   c_runtime/atoi/test1/test1.cpp
-  c_runtime/bsearch/test1/test1.cpp
-  c_runtime/bsearch/test2/test2.cpp
-  c_runtime/cbrt/test1/test1.cpp
-  c_runtime/cbrtf/test1/test1.cpp
-  c_runtime/ceil/test1/test1.cpp
-  c_runtime/ceilf/test1/test1.cpp
-  c_runtime/cos/test1/test1.cpp
-  c_runtime/cosf/test1/test1.cpp
-  c_runtime/cosh/test1/test1.cpp
-  c_runtime/coshf/test1/test1.cpp
-  c_runtime/errno/test1/test1.cpp
-  c_runtime/errno/test2/test2.cpp
-  c_runtime/exit/test1/test1.cpp
-  c_runtime/exit/test2/test2.cpp
-  c_runtime/exp/test1/test1.cpp
-  c_runtime/expf/test1/test1.cpp
-  c_runtime/fabs/test1/test1.cpp
-  c_runtime/fabsf/test1/test1.cpp
-  c_runtime/ferror/test1/test1.cpp
-  c_runtime/ferror/test2/test2.cpp
-  c_runtime/fflush/test1/test1.cpp
-  c_runtime/fgets/test1/test1.cpp
-  c_runtime/fgets/test2/test2.cpp
-  c_runtime/fgets/test3/test3.cpp
-  c_runtime/floor/test1/test1.cpp
-  c_runtime/floorf/test1/test1.cpp
-  c_runtime/fma/test1/test1.cpp
-  c_runtime/fmaf/test1/test1.cpp
-  c_runtime/fmod/test1/test1.cpp
-  c_runtime/fmodf/test1/test1.cpp
-  c_runtime/fopen/test1/test1.cpp
-  c_runtime/fopen/test2/test2.cpp
-  c_runtime/fopen/test3/test3.cpp
-  c_runtime/fopen/test4/test4.cpp
-  c_runtime/fopen/test5/test5.cpp
-  c_runtime/fopen/test6/test6.cpp
-  c_runtime/fopen/test7/test7.cpp
-  c_runtime/fputs/test1/test1.cpp
-  c_runtime/fputs/test2/test2.cpp
-  c_runtime/fread/test1/test1.cpp
-  c_runtime/fread/test2/test2.cpp
-  c_runtime/fread/test3/test3.cpp
-  c_runtime/free/test1/test1.cpp
-  c_runtime/fseek/test1/test1.cpp
-  c_runtime/ftell/test1/ftell.cpp
-  c_runtime/fwrite/test1/test1.cpp
-  c_runtime/ilogb/test1/test1.cpp
-  c_runtime/ilogbf/test1/test1.cpp
   c_runtime/isalnum/test1/test1.cpp
   c_runtime/isalpha/test1/test1.cpp
   c_runtime/isdigit/test1/test1.cpp
@@ -140,34 +77,10 @@ add_executable_clr(paltests
   c_runtime/iswupper/test1/test1.cpp
   c_runtime/isxdigit/test1/test1.cpp
   c_runtime/llabs/test1/test1.cpp
-  c_runtime/log/test1/test1.cpp
-  c_runtime/log10/test1/test1.cpp
-  c_runtime/log10f/test1/test1.cpp
-  c_runtime/log2/test1/test1.cpp
-  c_runtime/log2f/test1/test1.cpp
-  c_runtime/logf/test1/test1.cpp
-  c_runtime/malloc/test1/test1.cpp
-  c_runtime/malloc/test2/test2.cpp
   c_runtime/memchr/test1/test1.cpp
   c_runtime/memcmp/test1/test1.cpp
   c_runtime/memmove/test1/test1.cpp
   c_runtime/memset/test1/test1.cpp
-  c_runtime/modf/test1/test1.cpp
-  c_runtime/modff/test1/test1.cpp
-  c_runtime/pow/test1/test1.cpp
-  c_runtime/powf/test1/test1.cpp
-  c_runtime/qsort/test1/test1.cpp
-  c_runtime/qsort/test2/test2.cpp
-  c_runtime/rand_srand/test1/test1.cpp
-  c_runtime/realloc/test1/test1.cpp
-  c_runtime/sin/test1/test1.cpp
-  c_runtime/sincos/test1/test1.cpp
-  c_runtime/sincosf/test1/test1.cpp
-  c_runtime/sinf/test1/test1.cpp
-  c_runtime/sinh/test1/test1.cpp
-  c_runtime/sinhf/test1/test1.cpp
-  c_runtime/sqrt/test1/test1.cpp
-  c_runtime/sqrtf/test1/test1.cpp
   c_runtime/sscanf_s/test1/test1.cpp
   c_runtime/sscanf_s/test10/test10.cpp
   c_runtime/sscanf_s/test11/test11.cpp
@@ -196,11 +109,6 @@ add_executable_clr(paltests
   c_runtime/strpbrk/test1/test1.cpp
   c_runtime/strrchr/test1/test1.cpp
   c_runtime/strstr/test1/test1.cpp
-  c_runtime/tan/test1/test1.cpp
-  c_runtime/tanf/test1/test1.cpp
-  c_runtime/tanh/test1/test1.cpp
-  c_runtime/tanhf/test1/test1.cpp
-  c_runtime/time/test1/test1.cpp
   c_runtime/tolower/test1/test1.cpp
   c_runtime/toupper/test1/test1.cpp
   c_runtime/towlower/test1/test1.cpp
@@ -224,12 +132,8 @@ add_executable_clr(paltests
   c_runtime/wcstoul/test5/test5.cpp
   c_runtime/wcstoul/test6/test6.cpp
   c_runtime/_alloca/test1/test1.cpp
-  c_runtime/_finite/test1/test1.cpp
-  c_runtime/_finitef/test1/test1.cpp
   #c_runtime/_gcvt/test1/_gcvt.cpp
   #c_runtime/_gcvt/test2/test2.cpp
-  c_runtime/_isnan/test1/test1.cpp
-  c_runtime/_isnanf/test1/test1.cpp
   c_runtime/_itow/test1/test1.cpp
   c_runtime/_putenv/test1/test1.cpp
   c_runtime/_putenv/test2/test2.cpp
@@ -264,7 +168,6 @@ add_executable_clr(paltests
   c_runtime/_wfopen/test6/test6.cpp
   c_runtime/_wfopen/test7/test7.cpp
   c_runtime/_wtoi/test1/test1.cpp
-  c_runtime/__iscsym/test1/__iscsym.cpp
   #debug_api/DebugBreak/test1/test1.cpp
   debug_api/OutputDebugStringA/test1/helper.cpp
   debug_api/OutputDebugStringA/test1/test1.cpp
@@ -514,11 +417,7 @@ add_executable_clr(paltests
   miscellaneous/SetEnvironmentVariableW/test4/test4.cpp
   miscellaneous/SetLastError/test1/test.cpp
   miscellaneous/_i64tow/test1/test1.cpp
-  pal_specific/PAL_errno/test1/PAL_errno.cpp
 # pal_specific/PAL_GetUserTempDirectoryW/test1/PAL_GetUserTempDirectoryW.cpp
-  #pal_specific/PAL_get_stderr/test1/PAL_get_stderr.cpp
-  #pal_specific/PAL_get_stdin/test1/PAL_get_stdin.cpp
-  #pal_specific/PAL_get_stdout/test1/PAL_get_stdout.cpp
   pal_specific/PAL_Initialize_Terminate/test1/PAL_Initialize_Terminate.cpp
   pal_specific/PAL_Initialize_Terminate/test2/pal_initialize_twice.cpp
 #  pal_specific/PAL_RegisterLibraryW_UnregisterLibraryW/test1/PAL_RegisterLibraryW_UnregisterLibraryW.cpp
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/__iscsym/test1/__iscsym.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/__iscsym/test1/__iscsym.cpp
deleted file mode 100644
index 9244c5f0a32e..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/__iscsym/test1/__iscsym.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================
-**
-** Source: __iscsym.c
-**
-** Purpose: Positive test the __iscsym API.
-**          Call __iscsym to letter, digit and underscore
-**
-**
-**============================================================*/
-#include <palsuite.h>
-
-PALTEST(c_runtime___iscsym_test1_paltest_iscsym_test1, "c_runtime/__iscsym/test1/paltest_iscsym_test1")
-{
-    int err;
-    int index;
-    char non_letter_set[]=
-        {'~','`','!','@','#','$','%','^','&','*','(',')',')',
-            '-','+','=','|','\\',';',':','"','\'','<','>',
-            ',','.','?','/','\0'};
-    char errBuffer[200];
-
-    /*Initialize the PAL environment*/
-    err = PAL_Initialize(argc, argv);
-    if(0 != err)
-    {
-        return FAIL;
-    }
-  
-    /*To check if the parameter passed in is a character*/
-    for(index = 'a'; index <= 'z'; index++)
-    {
-        err = __iscsym(index);
-        if(0 == err)
-        {
-            Fail("\n__iscsym failed to recognize  a "
-                    "lower-case letter:%c!\n", index);
-        }
-    }
-    
-    /*To check if the parameter passed in is a character*/
-    for(index = 'A'; index <= 'Z'; index++)
-    {
-        err = __iscsym(index);
-        if(0 == err)
-        {
-            Fail("\n__iscsym failed to recognize an "
-                    "upper-case letter: %c!\n", index);
-        }
-    }
-
-    /*To check if the parameter passed in is a digit*/
-    for(index = '0'; index <= '9'; index++)
-    {
-        err = __iscsym(index);
-        if(0 == err)
-        {
-            Fail("\n__iscsym failed to recognize a digit %c!\n",
-                        index);
-        }
-    }
-
-    /*To check if the parameter passed in is a underscore*/
-    err = __iscsym('_');
-    if(0 == err)
-    {
-        Fail("\n__iscsym failed to recognize an underscore!\n");
-    }
-
-    memset(errBuffer, 0, 200);
-
-    for(index = 0; non_letter_set[index]; index++)
-    {
-        err = __iscsym(non_letter_set[index]);
-        if(0 != err)
-        {
-            strncat(errBuffer, &non_letter_set[index], 1);
-            strcat(errBuffer, ", ");
-        }
-    }
-
-    if(strlen(errBuffer) > 0)
-    {
-        Fail("\n__iscsym failed to identify the characters '%s' "
-             "as not letters, digits "
-             "or underscores\n", errBuffer);
-    }
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/_finite/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/_finite/test1/test1.cpp
deleted file mode 100644
index e21c9551b5c0..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/_finite/test1/test1.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Checks that _finite correctly classifies all types
-**          of floating point numbers (NaN, -Infinity, Infinity,
-**          finite nonzero, unnormalized, 0, and -0)
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-/*
-The IEEE double precision floating point standard looks like this:
-
-  S EEEEEEEEEEE FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
-  0 1        11 12                                                63
-
-S is the sign bit.  The E bits are the exponent, and the 52 F bits are
-the fraction.  These represent a value, V.
-
-If E=2047 and F is nonzero, then V=NaN ("Not a number")
-If E=2047 and F is zero and S is 1, then V=-Infinity
-If E=2047 and F is zero and S is 0, then V=Infinity
-If 0<E<2047 then V=(-1)^S * 2^(E-1023) * (1.F) where "1.F" is the binary
-   number created by prefixing F with a leading 1 and a binary point.
-If E=0 and F is nonzero, then V=(-1)^S * 2^(-1022) * (0.F) These are
-   "unnormalized" values.
-If E=0 and F is zero and S is 1, then V=-0
-If E=0 and F is zero and S is 0, then V=0
-
-*/
-
-#define TO_DOUBLE(x)    (*((double*)((void*)&x)))
-
-PALTEST(c_runtime__finite_test1_paltest_finite_test1, "c_runtime/_finite/test1/paltest_finite_test1")
-{
-    /*non-finite numbers*/
-    UINT64 lsnan =              UI64(0xffffffffffffffff);
-    UINT64 lqnan =              UI64(0x7fffffffffffffff);
-    UINT64 lneginf =            UI64(0xfff0000000000000);
-    UINT64 lposinf =            UI64(0x7ff0000000000000);
-
-    double snan =               TO_DOUBLE(lsnan);
-    double qnan =               TO_DOUBLE(lqnan);
-    double neginf =             TO_DOUBLE(lneginf);
-    double posinf =             TO_DOUBLE(lposinf);
-
-    /*finite numbers*/
-    UINT64 lnegunnormalized =   UI64(0x800fffffffffffff);
-    UINT64 lposunnormalized =   UI64(0x000fffffffffffff);
-    UINT64 lnegzero =           UI64(0x8000000000000000);
-
-    double negunnormalized =    TO_DOUBLE(lnegunnormalized);
-    double posunnormalized =    TO_DOUBLE(lposunnormalized);
-    double negzero =            TO_DOUBLE(lnegzero);
-
-    /*
-     * Initialize the PAL and return FAIL if this fails
-     */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    /*non-finite numbers*/
-    if (_finite(snan) || _finite(qnan))
-    {
-        Fail("_finite() found NAN to be finite.\n");
-    }
-
-    if (_finite(neginf))
-    {
-        Fail("_finite() found negative infinity to be finite.\n");
-    }
-
-    if (_finite(posinf))
-    {
-        Fail("_finite() found infinity to be finite.\n");
-    }
-
-    /*finite numbers*/
-    if (!_finite(negunnormalized))
-    {
-        Fail("_finite() found a negative unnormalized value to be infinite.\n");
-    }
-
-    if (!_finite(posunnormalized))
-    {
-        Fail("_finite() found an unnormalized value to be infinite.\n");
-    }
-
-    if (!_finite(negzero))
-    {
-        Fail("_finite() found negative zero to be infinite.\n");
-    }
-
-    if (!_finite(+0.0))
-    {
-        Fail("_finite() found zero to be infinite.\n");
-    }
-
-    if (!_finite(-123.456))
-    {
-        Fail("_finite() found %f to be infinite.\n", -123.456);
-    }
-
-    if (!_finite(+123.456))
-    {
-        Fail("_finite() found %f to be infinite.\n", +123.456);
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/_finitef/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/_finitef/test1/test1.cpp
deleted file mode 100644
index 5158fa9d8404..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/_finitef/test1/test1.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Checks that _finitef correctly classifies all types
-**          of floating point numbers (NaN, -Infinity, Infinity,
-**          finite nonzero, unnormalized, 0, and -0)
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-/*
-The IEEE single precision floating point standard looks like this:
-
-  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF
-  0 1      8 9                    31
-
-S is the sign bit.  The E bits are the exponent, and the 23 F bits are
-the fraction.  These represent a value, V.
-
-If E=255 and F is nonzero, then V=NaN ("Not a number")
-If E=255 and F is zero and S is 1, then V=-Infinity
-If E=255 and F is zero and S is 0, then V=Infinity
-If 0<E<255 then V=(-1)^S * 2^(E-1028) * (1.F) where "1.F" is the binary
-   number created by prefixing F with a leading 1 and a binary point.
-If E=0 and F is nonzero, then V=(-1)^S * 2^(-127) * (0.F) These are
-   "unnormalized" values.
-If E=0 and F is zero and S is 1, then V=-0
-If E=0 and F is zero and S is 0, then V=0
-
-*/
-
-#define TO_FLOAT(x)    (*((float*)((void*)&x)))
-
-PALTEST(c_runtime__finitef_test1_paltest_finitef_test1, "c_runtime/_finitef/test1/paltest_finitef_test1")
-{
-    /*non-finite numbers*/
-    UINT32 lsnan =              0xffffffffu;
-    UINT32 lqnan =              0x7fffffffu;
-    UINT32 lneginf =            0xff800000u;
-    UINT32 lposinf =            0x7f800000u;
-
-    float snan =               TO_FLOAT(lsnan);
-    float qnan =               TO_FLOAT(lqnan);
-    float neginf =             TO_FLOAT(lneginf);
-    float posinf =             TO_FLOAT(lposinf);
-
-    /*finite numbers*/
-    UINT32 lnegunnormalized =   0x807fffffu;
-    UINT32 lposunnormalized =   0x007fffffu;
-    UINT32 lnegzero =           0x80000000u;
-
-    float negunnormalized =    TO_FLOAT(lnegunnormalized);
-    float posunnormalized =    TO_FLOAT(lposunnormalized);
-    float negzero =            TO_FLOAT(lnegzero);
-
-    /*
-     * Initialize the PAL and return FAIL if this fails
-     */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    /*non-finite numbers*/
-    if (_finitef(snan) || _finitef(qnan))
-    {
-        Fail("_finitef() found NAN to be finite.\n");
-    }
-
-    if (_finitef(neginf))
-    {
-        Fail("_finitef() found negative infinity to be finite.\n");
-    }
-
-    if (_finitef(posinf))
-    {
-        Fail("_finitef() found infinity to be finite.\n");
-    }
-
-    /*finite numbers*/
-    if (!_finitef(negunnormalized))
-    {
-        Fail("_finitef() found a negative unnormalized value to be infinite.\n");
-    }
-
-    if (!_finitef(posunnormalized))
-    {
-        Fail("_finitef() found an unnormalized value to be infinite.\n");
-    }
-
-    if (!_finitef(negzero))
-    {
-        Fail("_finitef() found negative zero to be infinite.\n");
-    }
-
-    if (!_finitef(+0.0f))
-    {
-        Fail("_finitef() found zero to be infinite.\n");
-    }
-
-    if (!_finitef(-123.456f))
-    {
-        Fail("_finitef() found %f to be infinite.\n", -123.456f);
-    }
-
-    if (!_finitef(+123.456f))
-    {
-        Fail("_finitef() found %f to be infinite.\n", +123.456f);
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/_isnan/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/_isnan/test1/test1.cpp
deleted file mode 100644
index 98a96cc2474a..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/_isnan/test1/test1.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: 
-** Test _isnan with a number of trivial values, to ensure they indicated that
-** they are numbers.  Then try with Positive/Negative Infinite, which should
-** also be numbers.  Finally set the least and most significant bits of 
-** the fraction to positive and negative, at which point it should return
-** the true value. 
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-#define TO_DOUBLE(x)    (*((double*)((void*)&x)))
-#define TO_I64(x)       (*((INT64*)((void*)&x)))
-
-/*
- * NaN: any double with maximum exponent (0x7ff) and non-zero fraction
- */
-PALTEST(c_runtime__isnan_test1_paltest_isnan_test1, "c_runtime/_isnan/test1/paltest_isnan_test1")
-{
-    /*
-     * Initialize the PAL and return FAIL if this fails
-     */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    /*
-     * Try some trivial values
-     */
-    if (_isnan(0.0))
-    {
-        Fail("_isnan() incorrectly identified %f as NaN!\n", 0.0);
-    }
-
-    if (_isnan(1.23456))
-    {
-        Fail("_isnan() incorrectly identified %f as NaN!\n", 1.234567);
-    }
-
-    if (_isnan(42.0))
-    {
-        Fail("_isnan() incorrectly identified %f as NaN!\n", 42.0);
-    }
-
-    UINT64 lneginf =            UI64(0xfff0000000000000);
-    UINT64 lposinf =            UI64(0x7ff0000000000000);
-    
-    double neginf =             TO_DOUBLE(lneginf);
-    double posinf =             TO_DOUBLE(lposinf);
-
-    /*
-     * Try positive and negative infinity
-     */
-    if (_isnan(neginf))
-    {
-        Fail("_isnan() incorrectly identified negative infinity as NaN!\n");
-    }
-
-    if (_isnan(posinf))
-    {
-        Fail("_isnan() incorrectly identified infinity as NaN!\n");
-    }
-
-    /*
-     * Try setting the least significant bit of the fraction,
-     * positive and negative
-     */
-    UINT64 lsnan =              UI64(0xfff0000000000001);
-    double snan =               TO_DOUBLE(lsnan);
-    
-    if (!_isnan(snan))
-    {
-        Fail("_isnan() failed to identify %I64x as NaN!\n", lsnan);
-    }
-
-    UINT64 lqnan =              UI64(0x7ff0000000000001);
-    double qnan =               TO_DOUBLE(lqnan);
-    
-    if (!_isnan(qnan))
-    {
-        Fail("_isnan() failed to identify %I64x as NaN!\n", lqnan);
-    }
-
-    /*
-     * Try setting the most significant bit of the fraction,
-     * positive and negative
-     */
-    lsnan =                     UI64(0xfff8000000000000);
-    snan =                      TO_DOUBLE(lsnan);
-
-    if (!_isnan(snan))
-    {
-        Fail ("_isnan() failed to identify %I64x as NaN!\n", lsnan);
-    }
-
-    lqnan =                     UI64(0x7ff8000000000000);
-    qnan =                      TO_DOUBLE(lqnan);
-
-    if (!_isnan(qnan))
-    {
-        Fail ("_isnan() failed to identify %I64x as NaN!\n", lqnan);
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/_isnanf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/_isnanf/test1/test1.cpp
deleted file mode 100644
index b46b238342ad..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/_isnanf/test1/test1.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: 
-** Test _isnanf with a number of trivial values, to ensure they indicated that
-** they are numbers.  Then try with Positive/Negative Infinite, which should
-** also be numbers.  Finally set the least and most significant bits of 
-** the fraction to positive and negative, at which point it should return
-** the true value. 
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-#define TO_FLOAT(x)    (*((float*)((void*)&x)))
-#define TO_I32(x)      (*((INT32*)((void*)&x)))
-
-/*
- * NaN: any float with maximum exponent (0x7f8) and non-zero fraction
- */
-PALTEST(c_runtime__isnanf_test1_paltest_isnanf_test1, "c_runtime/_isnanf/test1/paltest_isnanf_test1")
-{
-    /*
-     * Initialize the PAL and return FAIL if this fails
-     */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    /*
-     * Try some trivial values
-     */
-    if (_isnanf(0.0f))
-    {
-        Fail("_isnanf() incorrectly identified %f as NaN!\n", 0.0f);
-    }
-
-    if (_isnanf(1.234567f))
-    {
-        Fail("_isnanf() incorrectly identified %f as NaN!\n", 1.234567f);
-    }
-
-    if (_isnanf(42.0f))
-    {
-        Fail("_isnanf() incorrectly identified %f as NaN!\n", 42.0f);
-    }
-
-    UINT32 lneginf =           0xff800000u;
-    UINT32 lposinf =           0x7f800000u;
-    
-    float neginf =             TO_FLOAT(lneginf);
-    float posinf =             TO_FLOAT(lposinf);
-
-    /*
-     * Try positive and negative infinity
-     */
-    if (_isnanf(neginf))
-    {
-        Fail("_isnanf() incorrectly identified negative infinity as NaN!\n");
-    }
-
-    if (_isnanf(posinf))
-    {
-        Fail("_isnanf() incorrectly identified infinity as NaN!\n");
-    }
-
-    /*
-     * Try setting the least significant bit of the fraction,
-     * positive and negative
-     */
-    UINT32 lsnan =             0xff800001u;
-    float snan =               TO_FLOAT(lsnan);
-    
-    if (!_isnanf(snan))
-    {
-        Fail("_isnanf() failed to identify %I32x as NaN!\n", lsnan);
-    }
-
-    UINT32 lqnan =             0x7f800001u;
-    float qnan =               TO_FLOAT(lqnan);
-    
-    if (!_isnanf(qnan))
-    {
-        Fail("_isnanf() failed to identify %I32x as NaN!\n", lqnan);
-    }
-
-    /*
-     * Try setting the most significant bit of the fraction,
-     * positive and negative
-     */
-    lsnan =                     0xffc00000u;
-    snan =                      TO_FLOAT(lsnan);
-
-    if (!_isnanf(snan))
-    {
-        Fail ("_isnanf() failed to identify %I32x as NaN!\n", lsnan);
-    }
-
-    lqnan =                     0x7fc00000u;
-    qnan =                      TO_FLOAT(lqnan);
-
-    if (!_isnanf(qnan))
-    {
-        Fail ("_isnanf() failed to identify %I32x as NaN!\n", lqnan);
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/_putenv/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/_putenv/test1/test1.cpp
index a7ebbe4fa618..6b9b6d94ee53 100644
--- a/src/coreclr/pal/tests/palsuite/c_runtime/_putenv/test1/test1.cpp
+++ b/src/coreclr/pal/tests/palsuite/c_runtime/_putenv/test1/test1.cpp
@@ -56,7 +56,7 @@ PALTEST(c_runtime__putenv_test1_paltest_putenv_test1, "c_runtime/_putenv/test1/p
          */
         if (TestCases[i].bValidString == TRUE)
         {       
-            variableValue = getenv(TestCases[i].varName);
+            variableValue = PAL_getenv(TestCases[i].varName);
         
             if (variableValue == NULL)
             { 
@@ -81,7 +81,7 @@ PALTEST(c_runtime__putenv_test1_paltest_putenv_test1, "c_runtime/_putenv/test1/p
              * Check to see that putenv fails for malformed _putenvString values
              */
         {
-            variableValue = getenv(TestCases[i].varName);
+            variableValue = PAL_getenv(TestCases[i].varName);
         
             if (variableValue != NULL)
             { 
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/_putenv/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/_putenv/test2/test2.cpp
index ee84e375c2e2..ef118e513260 100644
--- a/src/coreclr/pal/tests/palsuite/c_runtime/_putenv/test2/test2.cpp
+++ b/src/coreclr/pal/tests/palsuite/c_runtime/_putenv/test2/test2.cpp
@@ -35,7 +35,7 @@ PALTEST(c_runtime__putenv_test2_paltest_putenv_test2, "c_runtime/_putenv/test2/p
              "_putenv(%s)\n", _putenvString0);
     }
 
-    variableValue = getenv(variable);
+    variableValue = PAL_getenv(variable);
     
     if (variableValue == NULL)
     { 
@@ -60,7 +60,7 @@ PALTEST(c_runtime__putenv_test2_paltest_putenv_test2, "c_runtime/_putenv/test2/p
                      "_putenv(%s)\n", _putenvString1);
             }
 
-            variableValue = getenv(variable);
+            variableValue = PAL_getenv(variable);
 
             if (variableValue != NULL)
             { 
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/_putenv/test3/test3.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/_putenv/test3/test3.cpp
index ab1397193ce3..07380e1a5145 100644
--- a/src/coreclr/pal/tests/palsuite/c_runtime/_putenv/test3/test3.cpp
+++ b/src/coreclr/pal/tests/palsuite/c_runtime/_putenv/test3/test3.cpp
@@ -50,7 +50,7 @@ PALTEST(c_runtime__putenv_test3_paltest_putenv_test3, "c_runtime/_putenv/test3/p
        differing only by case, returns it's own value.
     */
 
-    result = getenv(FirstVarName);
+    result = PAL_getenv(FirstVarName);
     if(result == NULL)
     {
         Fail("ERROR: The result of getenv on a valid Environment Variable "
@@ -77,7 +77,7 @@ PALTEST(c_runtime__putenv_test3_paltest_putenv_test3, "c_runtime/_putenv/test3/p
     /* Verify that the environment variables
     */
 
-    result = getenv(FirstVarName);
+    result = PAL_getenv(FirstVarName);
     if(result == NULL)
     {
         Fail("ERROR: The result of getenv on a valid Environment Variable "
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/_vsnprintf_s/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/_vsnprintf_s/test1/test1.cpp
index fb5ab3a2d7af..62b725208769 100644
--- a/src/coreclr/pal/tests/palsuite/c_runtime/_vsnprintf_s/test1/test1.cpp
+++ b/src/coreclr/pal/tests/palsuite/c_runtime/_vsnprintf_s/test1/test1.cpp
@@ -49,6 +49,18 @@ PALTEST(c_runtime__vsnprintf_s_test1_paltest_vsnprintf_test1, "c_runtime/_vsnpri
         Fail("ERROR: expected %s (up to %d chars), got %s\n", checkstr, 8, buf);
     }
 
+    char buf8[8] = {0};
+
+    ret = Testvsnprintf(buf8, 8, "abcdefgh");
+    if (ret >= 0)
+    {
+        Fail("ERROR: expected negative return value, got %d", ret);
+    }
+    if (memcmp(buf8, "abcdefg\0", 8) != 0)
+    {
+        Fail("ERROR: Expected 7 chars + null terminator");
+    }
+
     PAL_Terminate();
     return PASS;
 }
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/abs/test1/abs.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/abs/test1/abs.cpp
deleted file mode 100644
index 9ad83d64e3a7..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/abs/test1/abs.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  abs.c (test 1)
-**
-** Purpose: Tests the PAL implementation of the abs function.
-**
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-struct TESTS 
-{
-    int nTest;
-    int nResult;
-};
-
-PALTEST(c_runtime_abs_test1_paltest_abs_test1, "c_runtime/abs/test1/paltest_abs_test1")
-{
-    int i = 0;
-    int nRc = 0;
-    struct TESTS testCase[] = 
-    {
-        {0, 0},
-        {1, 1},
-        {-1, 1}
-    };
-
-
-    if (0 != PAL_Initialize(argc,argv))
-    {
-        return FAIL;
-    }
-
-    for (i = 0; i < (sizeof(testCase)/sizeof(struct TESTS)); i++)
-    {
-        nRc = abs(testCase[i].nTest);
-        if (nRc != testCase[i].nResult)
-        {
-            Fail("abs: ERROR -> abs(%d) returned %d "
-                "when it was expected to return %d \n",
-                testCase[i].nTest,
-                nRc,
-                testCase[i].nResult);
-        }
-    }
-    
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/acos/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/acos/test1/test1.cpp
deleted file mode 100644
index 5de46458e505..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/acos/test1/test1.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that acos return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * acos_test1_validate
- *
- * test validation function
- */
-void __cdecl acos_test1_validate(double value, double expected, double variance)
-{
-    double result = acos(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("acos(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * acos_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl acos_test1_validate_isnan(double value)
-{
-    double result = acos(value);
-
-    if (!_isnan(result))
-    {
-        Fail("acos(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_acos_test1_paltest_acos_test1, "c_runtime/acos/test1/paltest_acos_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance */
-        { -1,                      3.1415926535897932,     PAL_EPSILON * 10 },      // expected:  pi
-        { -0.91173391478696510,    2.7182818284590452,     PAL_EPSILON * 10 },      // expected:  e
-        { -0.66820151019031295,    2.3025850929940457,     PAL_EPSILON * 10 },      // expected:  ln(10)
-        {  0,                      1.5707963267948966,     PAL_EPSILON * 10 },      // expected:  pi / 2
-        {  0.12775121753523991,    1.4426950408889634,     PAL_EPSILON * 10 },      // expected:  log2(e)
-        {  0.15594369476537447,    1.4142135623730950,     PAL_EPSILON * 10 },      // expected:  sqrt(2)
-        {  0.42812514788535792,    1.1283791670955126,     PAL_EPSILON * 10 },      // expected:  2 / sqrt(pi)
-        {  0.54030230586813972,    1,                      PAL_EPSILON * 10 },
-        {  0.70710678118654752,    0.78539816339744831,    PAL_EPSILON },           // expected:  pi / 4,         value:  1 / sqrt(2)
-        {  0.76024459707563015,    0.70710678118654752,    PAL_EPSILON },           // expected:  1 / sqrt(2)
-        {  0.76923890136397213,    0.69314718055994531,    PAL_EPSILON },           // expected:  ln(2)
-        {  0.80410982822879171,    0.63661977236758134,    PAL_EPSILON },           // expected:  2 / pi
-        {  0.90716712923909839,    0.43429448190325183,    PAL_EPSILON },           // expected:  log10(e)
-        {  0.94976571538163866,    0.31830988618379067,    PAL_EPSILON },           // expected:  1 / pi
-        {  1,                      0,                      PAL_EPSILON },
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        acos_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-    }
-    
-    acos_test1_validate_isnan(PAL_NEGINF);
-    acos_test1_validate_isnan(PAL_NAN);
-    acos_test1_validate_isnan(PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/acosf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/acosf/test1/test1.cpp
deleted file mode 100644
index 909d43cab78a..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/acosf/test1/test1.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that acosf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * acosf_test1_validate
- *
- * test validation function
- */
-void __cdecl acosf_test1_validate(float value, float expected, float variance)
-{
-    float result = acosf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("acosf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * acosf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl acosf_test1_validate_isnan(float value)
-{
-    float result = acosf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("acosf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_acosf_test1_paltest_acosf_test1, "c_runtime/acosf/test1/paltest_acosf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected         variance */
-        { -1,               3.14159265f,     PAL_EPSILON * 10 },      // expected:  pi
-        { -0.911733915f,    2.71828183f,     PAL_EPSILON * 10 },      // expected:  e
-        { -0.668201510f,    2.30258509f,     PAL_EPSILON * 10 },      // expected:  ln(10)
-        {  0,               1.57079633f,     PAL_EPSILON * 10 },      // expected:  pi / 2
-        {  0.127751218f,    1.44269504f,     PAL_EPSILON * 10 },      // expected:  logf2(e)
-        {  0.155943695f,    1.41421356f,     PAL_EPSILON * 10 },      // expected:  sqrtf(2)
-        {  0.428125148f,    1.12837917f,     PAL_EPSILON * 10 },      // expected:  2 / sqrtf(pi)
-        {  0.540302306f,    1,               PAL_EPSILON * 10 },
-        {  0.707106781f,    0.785398163f,    PAL_EPSILON },           // expected:  pi / 4,         value:  1 / sqrtf(2)
-        {  0.760244597f,    0.707106781f,    PAL_EPSILON },           // expected:  1 / sqrtf(2)
-        {  0.769238901f,    0.693147181f,    PAL_EPSILON },           // expected:  ln(2)
-        {  0.804109828f,    0.636619772f,    PAL_EPSILON },           // expected:  2 / pi
-        {  0.907167129f,    0.434294482f,    PAL_EPSILON },           // expected:  logf10f(e)
-        {  0.949765715f,    0.318309886f,    PAL_EPSILON },           // expected:  1 / pi
-        {  1,               0,               PAL_EPSILON },
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        acosf_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-    }
-    
-    acosf_test1_validate_isnan(PAL_NEGINF);
-    acosf_test1_validate_isnan(PAL_NAN);
-    acosf_test1_validate_isnan(PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/acosh/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/acosh/test1/test1.cpp
deleted file mode 100644
index 3d9368fcd94f..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/acosh/test1/test1.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that acosh return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * acosh_test1_validate
- *
- * test validation function
- */
-void __cdecl acosh_test1_validate(double value, double expected, double variance)
-{
-    double result = acosh(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("acosh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * acosh_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl acosh_test1_validate_isnan(double value)
-{
-    double result = acosh(value);
-
-    if (!_isnan(result))
-    {
-        Fail("acosh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_acosh_test1_paltest_acosh_test1, "c_runtime/acosh/test1/paltest_acosh_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance */
-        {  1,                      0,                      PAL_EPSILON },
-        {  1.0510897883672876,     0.31830988618379067,    PAL_EPSILON },        // expected:  1 / pi
-        {  1.0957974645564909,     0.43429448190325183,    PAL_EPSILON },        // expected:  log10(e)
-        {  1.2095794864199787,     0.63661977236758134,    PAL_EPSILON },        // expected:  2 / pi
-        {  1.25,                   0.69314718055994531,    PAL_EPSILON },        // expected:  ln(2)
-        {  1.2605918365213561,     0.70710678118654752,    PAL_EPSILON },        // expected:  1 / sqrt(2)
-        {  1.3246090892520058,     0.78539816339744831,    PAL_EPSILON },        // expected:  pi / 4
-        {  1.5430806348152438,     1,                      PAL_EPSILON * 10 },
-        {  1.7071001431069344,     1.1283791670955126,     PAL_EPSILON * 10 },   // expected:  2 / sqrt(pi)
-        {  2.1781835566085709,     1.4142135623730950,     PAL_EPSILON * 10 },   // expected:  sqrt(2)
-        {  2.2341880974508023,     1.4426950408889634,     PAL_EPSILON * 10 },   // expected:  log2(e)
-        {  2.5091784786580568,     1.5707963267948966,     PAL_EPSILON * 10 },   // expected:  pi / 2
-        {  5.05,                   2.3025850929940457,     PAL_EPSILON * 10 },   // expected:  ln(10)
-        {  7.6101251386622884,     2.7182818284590452,     PAL_EPSILON * 10 },   // expected:  e
-        {  11.591953275521521,     3.1415926535897932,     PAL_EPSILON * 10 },   // expected:  pi
-        {  PAL_POSINF,             PAL_POSINF,             0 },
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        acosh_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-    }
-
-    acosh_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/acoshf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/acoshf/test1/test1.cpp
deleted file mode 100644
index 78bab37dbf37..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/acoshf/test1/test1.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that acoshf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * acoshf_test1_validate
- *
- * test validation function
- */
-void __cdecl acoshf_test1_validate(float value, float expected, float variance)
-{
-    float result = acoshf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("acoshf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * acoshf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl acoshf_test1_validate_isnan(float value)
-{
-    float result = acoshf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("acoshf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_acoshf_test1_paltest_acoshf_test1, "c_runtime/acoshf/test1/paltest_acoshf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected         variance */
-        {  1,               0,               PAL_EPSILON },
-        {  1.05108979f,     0.318309886f,    PAL_EPSILON },        // expected:  1 / pi
-        {  1.09579746f,     0.434294482f,    PAL_EPSILON },        // expected:  log10f(e)
-        {  1.20957949f,     0.636619772f,    PAL_EPSILON },        // expected:  2 / pi
-        {  1.25f,           0.693147181f,    PAL_EPSILON },        // expected:  ln(2)
-        {  1.26059184f,     0.707106781f,    PAL_EPSILON },        // expected:  1 / sqrtf(2)
-        {  1.32460909f,     0.785398163f,    PAL_EPSILON },        // expected:  pi / 4
-        {  1.54308063f,     1,               PAL_EPSILON * 10 },
-        {  1.70710014f,     1.12837917f,     PAL_EPSILON * 10 },   // expected:  2 / sqrtf(pi)
-        {  2.17818356f,     1.41421356f,     PAL_EPSILON * 10 },   // expected:  sqrtf(2)
-        {  2.23418810f,     1.44269504f,     PAL_EPSILON * 10 },   // expected:  logf2(e)
-        {  2.50917848f,     1.57079633f,     PAL_EPSILON * 10 },   // expected:  pi / 2
-        {  5.05f,           2.30258509f,     PAL_EPSILON * 10 },   // expected:  ln(10)
-        {  7.61012514f,     2.71828183f,     PAL_EPSILON * 10 },   // expected:  e
-        {  11.5919533f,     3.14159265f,     PAL_EPSILON * 100 },  // expected:  pi
-        {  PAL_POSINF,      PAL_POSINF,      0 },
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        acoshf_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-    }
-
-    acoshf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/asin/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/asin/test1/test1.cpp
deleted file mode 100644
index b72599ad33d3..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/asin/test1/test1.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that asin return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * asin_test1_validate
- *
- * test validation function
- */
-void __cdecl asin_test1_validate(double value, double expected, double variance)
-{
-    double result = asin(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("asin(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * asin_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl asin_test1_validate_isnan(double value)
-{
-    double result = asin(value);
-
-    if (!_isnan(result))
-    {
-        Fail("asin(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * asin_test1_validate
- *
- * test validation function for values returning +INF
- */
-void __cdecl asin_test1_validate_isinf_positive(double value)
-{
-    double result = asin(value);
-
-    if (result != PAL_POSINF)
-    {
-        Fail("asin(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_POSINF);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_asin_test1_paltest_asin_test1, "c_runtime/asin/test1/paltest_asin_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance */
-        {  0,                      0,                      PAL_EPSILON },
-        {  0.31296179620778659,    0.31830988618379067,    PAL_EPSILON },           // expected:  1 / pi
-        {  0.41078129050290870,    0.42331082513074800,    PAL_EPSILON },           // expected:  pi - e
-        {  0.42077048331375735,    0.43429448190325183,    PAL_EPSILON },           // expected:  log10(e)
-        {  0.59448076852482208,    0.63661977236758134,    PAL_EPSILON },           // expected:  2 / pi
-        {  0.63896127631363480,    0.69314718055994531,    PAL_EPSILON },           // expected:  ln(2)
-        {  0.64963693908006244,    0.70710678118654752,    PAL_EPSILON },           // expected:  1 / sqrt(2)
-        {  0.70710678118654752,    0.78539816339744831,    PAL_EPSILON },           // expected:  pi / 4,         value: 1 / sqrt(2)
-        {  0.74398033695749319,    0.83900756059574755,    PAL_EPSILON },           // expected:  pi - ln(10)
-        {  0.84147098480789651,    1,                      PAL_EPSILON * 10 },
-        {  0.90371945743584630,    1.1283791670955126,     PAL_EPSILON * 10 },      // expected:  2 / sqrt(pi)
-        {  0.98776594599273553,    1.4142135623730950,     PAL_EPSILON * 10 },      // expected:  sqrt(2)
-        {  0.99180624439366372,    1.4426950408889634,     PAL_EPSILON * 10 },      // expected:  log2(e)
-        {  1,                      1.5707963267948966,     PAL_EPSILON * 10 },      // expected:  pi / 2
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        asin_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        asin_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-    
-    asin_test1_validate_isnan(PAL_NEGINF);
-    asin_test1_validate_isnan(PAL_NAN);
-    asin_test1_validate_isnan(PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/asinf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/asinf/test1/test1.cpp
deleted file mode 100644
index abf35c261851..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/asinf/test1/test1.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that asinf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * asinf_test1_validate
- *
- * test validation function
- */
-void __cdecl asinf_test1_validate(float value, float expected, float variance)
-{
-    float result = asinf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("asinf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * asinf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl asinf_test1_validate_isnan(float value)
-{
-    float result = asinf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("asinf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * asinf_test1_validate
- *
- * test validation function for values returning +INF
- */
-void __cdecl asinf_test1_validate_isinf_positive(float value)
-{
-    float result = asinf(value);
-
-    if (result != PAL_POSINF)
-    {
-        Fail("asinf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_POSINF);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_asinf_test1_paltest_asinf_test1, "c_runtime/asinf/test1/paltest_asinf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected         variance */
-        {  0,               0,               PAL_EPSILON },
-        {  0.312961796f,    0.318309886f,    PAL_EPSILON },           // expected:  1 / pi
-        {  0.410781291f,    0.423310825f,    PAL_EPSILON },           // expected:  pi - e
-        {  0.420770483f,    0.434294482f,    PAL_EPSILON },           // expected:  logf10f(e)
-        {  0.594480769f,    0.636619772f,    PAL_EPSILON },           // expected:  2 / pi
-        {  0.638961276f,    0.693147181f,    PAL_EPSILON },           // expected:  ln(2)
-        {  0.649636939f,    0.707106781f,    PAL_EPSILON },           // expected:  1 / sqrtf(2)
-        {  0.707106781f,    0.785398163f,    PAL_EPSILON },           // expected:  pi / 4,         value: 1 / sqrtf(2)
-        {  0.743980337f,    0.839007561f,    PAL_EPSILON },           // expected:  pi - ln(10)
-        {  0.841470985f,    1,               PAL_EPSILON * 10 },
-        {  0.903719457f,    1.12837917f,     PAL_EPSILON * 10 },      // expected:  2 / sqrtf(pi)
-        {  0.987765946f,    1.41421356f,     PAL_EPSILON * 10 },      // expected:  sqrtf(2)
-        {  0.991806244f,    1.44269504f,     PAL_EPSILON * 10 },      // expected:  logf2(e)
-        {  1,               1.57079633f,     PAL_EPSILON * 10 },      // expected:  pi / 2
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        asinf_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        asinf_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-    
-    asinf_test1_validate_isnan(PAL_NEGINF);
-    asinf_test1_validate_isnan(PAL_NAN);
-    asinf_test1_validate_isnan(PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/asinh/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/asinh/test1/test1.cpp
deleted file mode 100644
index a0ed7953d2ed..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/asinh/test1/test1.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that asinh return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * asinh_test1_validate
- *
- * test validation function
- */
-void __cdecl asinh_test1_validate(double value, double expected, double variance)
-{
-    double result = asinh(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("asinh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * asinh_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl asinh_test1_validate_isnan(double value)
-{
-    double result = asinh(value);
-
-    if (!_isnan(result))
-    {
-        Fail("asinh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * asinh_test1_validate
- *
- * test validation function for values returning +INF
- */
-void __cdecl asinh_test1_validate_isinf_positive(double value)
-{
-    double result = asinh(value);
-
-    if (result != PAL_POSINF)
-    {
-        Fail("asinh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_POSINF);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_asinh_test1_paltest_asinh_test1, "c_runtime/asinh/test1/paltest_asinh_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance */
-        {  0,                      0,                      PAL_EPSILON },
-        {  0.32371243907207108,    0.31830988618379067,    PAL_EPSILON },           // expected:  1 / pi
-        {  0.44807597941469025,    0.43429448190325183,    PAL_EPSILON },           // expected:  log10(e)
-        {  0.68050167815224332,    0.63661977236758134,    PAL_EPSILON },           // expected:  2 / pi
-        {  0.75,                   0.69314718055994531,    PAL_EPSILON },           // expected:  ln(2)
-        {  0.76752314512611633,    0.70710678118654752,    PAL_EPSILON },           // expected:  1 / sqrt(2)
-        {  0.86867096148600961,    0.78539816339744831,    PAL_EPSILON },           // expected:  pi / 4
-        {  1.1752011936438015,     1,                      PAL_EPSILON * 10 },
-        {  1.3835428792038633,     1.1283791670955126,     PAL_EPSILON * 10 },      // expected:  2 / sqrt(pi)
-        {  1.9350668221743567,     1.4142135623730950,     PAL_EPSILON * 10 },      // expected:  sqrt(2)
-        {  1.9978980091062796,     1.4426950408889634,     PAL_EPSILON * 10 },      // expected:  log2(e)
-        {  2.3012989023072949,     1.5707963267948966,     PAL_EPSILON * 10 },      // expected:  pi / 2
-        {  4.95,                   2.3025850929940457,     PAL_EPSILON * 10 },      // expected:  ln(10)
-        {  7.5441371028169758,     2.7182818284590452,     PAL_EPSILON * 10 },      // expected:  e
-        {  11.548739357257748,     3.1415926535897932,     PAL_EPSILON * 10 },      // expected:  pi
-        {  PAL_POSINF,             PAL_POSINF,             0 },
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        asinh_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        asinh_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-    
-    asinh_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/asinhf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/asinhf/test1/test1.cpp
deleted file mode 100644
index eb2af42b98c7..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/asinhf/test1/test1.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that asinhf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * asinhf_test1_validate
- *
- * test validation function
- */
-void __cdecl asinhf_test1_validate(float value, float expected, float variance)
-{
-    float result = asinhf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("asinhf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * asinhf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl asinhf_test1_validate_isnan(float value)
-{
-    float result = asinhf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("asinhf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * asinhf_test1_validate
- *
- * test validation function for values returning +INF
- */
-void __cdecl asinhf_test1_validate_isinf_positive(float value)
-{
-    float result = asinhf(value);
-
-    if (result != PAL_POSINF)
-    {
-        Fail("asinhf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_POSINF);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_asinhf_test1_paltest_asinhf_test1, "c_runtime/asinhf/test1/paltest_asinhf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected         variance */
-        {  0,               0,               PAL_EPSILON },
-        {  0.323712439f,    0.318309886f,    PAL_EPSILON },           // expected:  1 / pi
-        {  0.448075979f,    0.434294482f,    PAL_EPSILON },           // expected:  log10f(e)
-        {  0.680501678f,    0.636619772f,    PAL_EPSILON },           // expected:  2 / pi
-        {  0.75,            0.693147181f,    PAL_EPSILON },           // expected:  ln(2)
-        {  0.767523145f,    0.707106781f,    PAL_EPSILON },           // expected:  1 / sqrtf(2)
-        {  0.868670961f,    0.785398163f,    PAL_EPSILON },           // expected:  pi / 4
-        {  1.17520119f,     1,               PAL_EPSILON * 10 },
-        {  1.38354288f,     1.12837917f,     PAL_EPSILON * 10 },      // expected:  2 / sqrtf(pi)
-        {  1.93506682f,     1.41421356f,     PAL_EPSILON * 10 },      // expected:  sqrtf(2)
-        {  1.99789801f,     1.44269504f,     PAL_EPSILON * 10 },      // expected:  logf2(e)
-        {  2.30129890f,     1.57079633f,     PAL_EPSILON * 10 },      // expected:  pi / 2
-        {  4.95f,           2.30258509f,     PAL_EPSILON * 10 },      // expected:  ln(10)
-        {  7.54413710f,     2.71828183f,     PAL_EPSILON * 10 },      // expected:  e
-        {  11.5487394f,     3.14159265f,     PAL_EPSILON * 10 },      // expected:  pi
-        {  PAL_POSINF,      PAL_POSINF,      0 },
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        asinhf_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        asinhf_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-
-    asinhf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/atan/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/atan/test1/test1.cpp
deleted file mode 100644
index 0e65933bc894..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/atan/test1/test1.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that atan return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * atan_test1_validate
- *
- * test validation function
- */
-void __cdecl atan_test1_validate(double value, double expected, double variance)
-{
-    double result = atan(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("atan(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * atan_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl atan_test1_validate_isnan(double value)
-{
-    double result = atan(value);
-
-    if (!_isnan(result))
-    {
-        Fail("atan(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_atan_test1_paltest_atan_test1, "c_runtime/atan/test1/paltest_atan_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance */
-        {  0,                      0,                      PAL_EPSILON },
-        {  0.32951473309607836,    0.31830988618379067,    PAL_EPSILON },           // expected:  1 / pi
-        {  0.45054953406980750,    0.42331082513074800,    PAL_EPSILON },           // expected:  pi - e
-        {  0.46382906716062964,    0.43429448190325183,    PAL_EPSILON },           // expected:  log10(e)
-        {  0.73930295048660405,    0.63661977236758134,    PAL_EPSILON },           // expected:  2 / pi
-        {  0.83064087786078395,    0.69314718055994531,    PAL_EPSILON },           // expected:  ln(2)
-        {  0.85451043200960189,    0.70710678118654752,    PAL_EPSILON },           // expected:  1 / sqrt(2)
-        {  1,                      0.78539816339744831,    PAL_EPSILON },           // expected:  pi / 4
-        {  1.1134071468135374,     0.83900756059574755,    PAL_EPSILON },           // expected:  pi - ln(10)
-        {  1.5574077246549022,     1,                      PAL_EPSILON * 10 },
-        {  2.1108768356626451,     1.1283791670955126,     PAL_EPSILON * 10 },      // expected:  2 / sqrt(pi)
-        {  6.3341191670421916,     1.4142135623730950,     PAL_EPSILON * 10 },      // expected:  sqrt(2)
-        {  7.7635756709721848,     1.4426950408889634,     PAL_EPSILON * 10 },      // expected:  log2(e)
-        {  PAL_POSINF,             1.5707963267948966,     PAL_EPSILON * 10 },      // expected:  pi / 2
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        atan_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        atan_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-
-    atan_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/atan2/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/atan2/test1/test1.cpp
deleted file mode 100644
index 6298487994d5..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/atan2/test1/test1.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests that atan2 returns correct values for a subset of values.
-**          Tests with positive and negative values of x and y to ensure
-**          atan2 is returning results from the correct quadrant.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-struct test
-{
-    double y;         /* second component of the value to test the function with */
-    double x;         /* first component of the value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * atan2_test1_validate
- *
- * test validation function
- */
-void __cdecl atan2_test1_validate(double y, double x, double expected, double variance)
-{
-    double result = atan2(y, x);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("atan2(%g, %g) returned %20.17g when it should have returned %20.17g",
-             y, x, result, expected);
-    }
-}
-
-/**
- * atan2_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl atan2_test1_validate_isnan(double y, double x)
-{
-    double result = atan2(y, x);
-
-    if (!_isnan(result))
-    {
-        Fail("atan2(%g, %g) returned %20.17g when it should have returned %20.17g",
-             y, x, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_atan2_test1_paltest_atan2_test1, "c_runtime/atan2/test1/paltest_atan2_test1")
-{
-    struct test tests[] = 
-    {
-        /* y                       x                       expected                variance */
-        {  0,                      PAL_POSINF,             0,                      PAL_EPSILON },
-        {  0,                      0,                      0,                      PAL_EPSILON },
-        {  0.31296179620778659,    0.94976571538163866,    0.31830988618379067,    PAL_EPSILON },           // expected:  1 / pi
-        {  0.42077048331375735,    0.90716712923909839,    0.43429448190325183,    PAL_EPSILON },           // expected:  log10(e)
-        {  0.59448076852482208,    0.80410982822879171,    0.63661977236758134,    PAL_EPSILON },           // expected:  2 / pi
-        {  0.63896127631363480,    0.76923890136397213,    0.69314718055994531,    PAL_EPSILON },           // expected:  ln(2)
-        {  0.64963693908006244,    0.76024459707563015,    0.70710678118654752,    PAL_EPSILON },           // expected:  1 / sqrt(2)
-        {  0.70710678118654752,    0.70710678118654752,    0.78539816339744831,    PAL_EPSILON },           // expected:  pi / 4,         value:  1 / sqrt(2)
-        {  1,                      1,                      0.78539816339744831,    PAL_EPSILON },           // expected:  pi / 4
-        {  PAL_POSINF,             PAL_POSINF,             0.78539816339744831,    PAL_EPSILON },           // expected:  pi / 4
-        {  0.84147098480789651,    0.54030230586813972,    1,                      PAL_EPSILON * 10 },
-        {  0.90371945743584630,    0.42812514788535792,    1.1283791670955126,     PAL_EPSILON * 10 },      // expected:  2 / sqrt(pi)
-        {  0.98776594599273553,    0.15594369476537447,    1.4142135623730950,     PAL_EPSILON * 10 },      // expected:  sqrt(2)
-        {  0.99180624439366372,    0.12775121753523991,    1.4426950408889634,     PAL_EPSILON * 10 },      // expected:  log2(e)
-        {  1,                      0,                      1.5707963267948966,     PAL_EPSILON * 10 },      // expected:  pi / 2
-        {  PAL_POSINF,             0,                      1.5707963267948966,     PAL_EPSILON * 10 },      // expected:  pi / 2
-        {  PAL_POSINF,             1,                      1.5707963267948966,     PAL_EPSILON * 10 },      // expected:  pi / 2
-        {  0.74398033695749319,   -0.66820151019031295,    2.3025850929940457,     PAL_EPSILON * 10 },      // expected:  ln(10)
-        {  0.41078129050290870,   -0.91173391478696510,    2.7182818284590452,     PAL_EPSILON * 10 },      // expected:  e
-        {  0,                     -1,                      3.1415926535897932,     PAL_EPSILON * 10 },      // expected:  pi
-        {  1,                      PAL_POSINF,             0,                      PAL_EPSILON },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        const double pi = 3.1415926535897932;
-        
-        atan2_test1_validate( tests[i].y,  tests[i].x,  tests[i].expected,      tests[i].variance);
-        atan2_test1_validate(-tests[i].y,  tests[i].x, -tests[i].expected,      tests[i].variance);
-        atan2_test1_validate( tests[i].y, -tests[i].x,  pi - tests[i].expected, tests[i].variance);
-        atan2_test1_validate(-tests[i].y, -tests[i].x,  tests[i].expected - pi, tests[i].variance);
-    }
-    
-    atan2_test1_validate_isnan(PAL_NEGINF, PAL_NAN);
-    atan2_test1_validate_isnan(PAL_NAN,    PAL_NEGINF);
-    atan2_test1_validate_isnan(PAL_NAN,    PAL_POSINF);
-    atan2_test1_validate_isnan(PAL_POSINF, PAL_NAN);
-    
-    atan2_test1_validate_isnan(PAL_NAN, -1);
-    atan2_test1_validate_isnan(PAL_NAN, -0.0);
-    atan2_test1_validate_isnan(PAL_NAN,  0);
-    atan2_test1_validate_isnan(PAL_NAN,  1);
-    
-    atan2_test1_validate_isnan(-1,   PAL_NAN);
-    atan2_test1_validate_isnan(-0.0, PAL_NAN);
-    atan2_test1_validate_isnan( 0,   PAL_NAN);
-    atan2_test1_validate_isnan( 1,   PAL_NAN);
-    
-    atan2_test1_validate_isnan(PAL_NAN, PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/atan2f/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/atan2f/test1/test1.cpp
deleted file mode 100644
index f5915b3a0e1c..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/atan2f/test1/test1.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests that atan2f returns correct values for a subset of values.
-**          Tests with positive and negative values of x and y to ensure
-**          atan2f is returning results from the correct quadrant.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-struct test
-{
-    float y;         /* second component of the value to test the function with */
-    float x;         /* first component of the value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * atan2f_test1_validate
- *
- * test validation function
- */
-void __cdecl atan2f_test1_validate(float y, float x, float expected, float variance)
-{
-    float result = atan2f(y, x);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("atan2f(%g, %g) returned %10.9g when it should have returned %10.9g",
-             y, x, result, expected);
-    }
-}
-
-/**
- * atan2f_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl atan2f_test1_validate_isnan(float y, float x)
-{
-    float result = atan2f(y, x);
-
-    if (!_isnanf(result))
-    {
-        Fail("atan2f(%g, %g) returned %10.9g when it should have returned %10.9g",
-             y, x, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_atan2f_test1_paltest_atan2f_test1, "c_runtime/atan2f/test1/paltest_atan2f_test1")
-{
-    struct test tests[] = 
-    {
-        /* y                x                expected         variance */
-        {  0,               PAL_POSINF,      0,               PAL_EPSILON },
-        {  0,               0,               0,               PAL_EPSILON },
-        {  0.312961796f,    0.949765715f,    0.318309886f,    PAL_EPSILON },           // expected:  1 / pi
-        {  0.420770483f,    0.907167129f,    0.434294482f,    PAL_EPSILON },           // expected:  logf10f(e)
-        {  0.594480769f,    0.804109828f,    0.636619772f,    PAL_EPSILON },           // expected:  2 / pi
-        {  0.638961276f,    0.769238901f,    0.693147181f,    PAL_EPSILON },           // expected:  ln(2)
-        {  0.649636939f,    0.760244597f,    0.707106781f,    PAL_EPSILON },           // expected:  1 / sqrtf(2)
-        {  0.707106781f,    0.707106781f,    0.785398163f,    PAL_EPSILON },           // expected:  pi / 4,         value:  1 / sqrtf(2)
-        {  1,               1,               0.785398163f,    PAL_EPSILON },           // expected:  pi / 4
-        {  PAL_POSINF,      PAL_POSINF,      0.785398163f,    PAL_EPSILON },           // expected:  pi / 4
-        {  0.841470985f,    0.540302306f,    1,               PAL_EPSILON * 10 },
-        {  0.903719457f,    0.428125148f,    1.12837917f,     PAL_EPSILON * 10 },      // expected:  2 / sqrtf(pi)
-        {  0.987765946f,    0.155943695f,    1.41421356f,     PAL_EPSILON * 10 },      // expected:  sqrtf(2)
-        {  0.991806244f,    0.127751218f,    1.44269504f,     PAL_EPSILON * 10 },      // expected:  logf2(e)
-        {  1,               0,               1.57079633f,     PAL_EPSILON * 10 },      // expected:  pi / 2
-        {  PAL_POSINF,      0,               1.57079633f,     PAL_EPSILON * 10 },      // expected:  pi / 2
-        {  PAL_POSINF,      1,               1.57079633f,     PAL_EPSILON * 10 },      // expected:  pi / 2
-        {  0.743980337f,   -0.668201510f,    2.30258509f,     PAL_EPSILON * 10 },      // expected:  ln(10)
-        {  0.410781291f,   -0.911733915f,    2.71828183f,     PAL_EPSILON * 10 },      // expected:  e
-        {  0,              -1,               3.14159265f,     PAL_EPSILON * 10 },      // expected:  pi
-        {  1,               PAL_POSINF,      0,               PAL_EPSILON },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        const float pi = 3.14159265f;
-        
-        atan2f_test1_validate( tests[i].y,  tests[i].x,  tests[i].expected,      tests[i].variance);
-        atan2f_test1_validate(-tests[i].y,  tests[i].x, -tests[i].expected,      tests[i].variance);
-        atan2f_test1_validate( tests[i].y, -tests[i].x,  pi - tests[i].expected, tests[i].variance);
-        atan2f_test1_validate(-tests[i].y, -tests[i].x,  tests[i].expected - pi, tests[i].variance);
-    }
-    
-    atan2f_test1_validate_isnan(PAL_NEGINF, PAL_NAN);
-    atan2f_test1_validate_isnan(PAL_NAN,    PAL_NEGINF);
-    atan2f_test1_validate_isnan(PAL_NAN,    PAL_POSINF);
-    atan2f_test1_validate_isnan(PAL_POSINF, PAL_NAN);
-    
-    atan2f_test1_validate_isnan(PAL_NAN, -1);
-    atan2f_test1_validate_isnan(PAL_NAN, -0.0f);
-    atan2f_test1_validate_isnan(PAL_NAN,  0);
-    atan2f_test1_validate_isnan(PAL_NAN,  1);
-    
-    atan2f_test1_validate_isnan(-1,   PAL_NAN);
-    atan2f_test1_validate_isnan(-0.0f, PAL_NAN);
-    atan2f_test1_validate_isnan( 0,   PAL_NAN);
-    atan2f_test1_validate_isnan( 1,   PAL_NAN);
-    
-    atan2f_test1_validate_isnan(PAL_NAN, PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/atanf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/atanf/test1/test1.cpp
deleted file mode 100644
index 518775f1f9c3..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/atanf/test1/test1.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that atanf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * atanf_test1_validate
- *
- * test validation function
- */
-void __cdecl atanf_test1_validate(float value, float expected, float variance)
-{
-    float result = atanf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("atanf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * atanf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl atanf_test1_validate_isnan(float value)
-{
-    float result = atanf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("atanf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_atanf_test1_paltest_atanf_test1, "c_runtime/atanf/test1/paltest_atanf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected         variance */
-        {  0,               0,               PAL_EPSILON },
-        {  0.329514733f,    0.318309886f,    PAL_EPSILON },           // expected:  1 / pi
-        {  0.450549534f,    0.423310825f,    PAL_EPSILON },           // expected:  pi - e
-        {  0.463829067f,    0.434294482f,    PAL_EPSILON },           // expected:  logf10f(e)
-        {  0.739302950f,    0.636619772f,    PAL_EPSILON },           // expected:  2 / pi
-        {  0.830640878f,    0.693147181f,    PAL_EPSILON },           // expected:  ln(2)
-        {  0.854510432f,    0.707106781f,    PAL_EPSILON },           // expected:  1 / sqrtf(2)
-        {  1,               0.785398163f,    PAL_EPSILON },           // expected:  pi / 4
-        {  1.11340715f,     0.839007561f,    PAL_EPSILON },           // expected:  pi - ln(10)
-        {  1.55740772f,     1,               PAL_EPSILON * 10 },
-        {  2.11087684f,     1.12837917f,     PAL_EPSILON * 10 },      // expected:  2 / sqrtf(pi)
-        {  6.33411917f,     1.41421356f,     PAL_EPSILON * 10 },      // expected:  sqrtf(2)
-        {  7.76357567f,     1.44269504f,     PAL_EPSILON * 10 },      // expected:  logf2(e)
-        {  PAL_POSINF,      1.57079633f,     PAL_EPSILON * 10 },      // expected:  pi / 2
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        atanf_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        atanf_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-
-    atanf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/atanh/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/atanh/test1/test1.cpp
deleted file mode 100644
index 24a042826e09..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/atanh/test1/test1.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that atanh return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * atanh_test1_validate
- *
- * test validation function
- */
-void __cdecl atanh_test1_validate(double value, double expected, double variance)
-{
-    double result = atanh(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("atanh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * atanh_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl atanh_test1_validate_isnan(double value)
-{
-    double result = atanh(value);
-
-    if (!_isnan(result))
-    {
-        Fail("atanh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_atanh_test1_paltest_atanh_test1, "c_runtime/atanh/test1/paltest_atanh_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance */
-        {  0,                      0,                      PAL_EPSILON },
-        {  0.30797791269089433,    0.31830988618379067,    PAL_EPSILON },       // expected:  1 / pi
-        {  0.40890401183401433,    0.43429448190325183,    PAL_EPSILON },       // expected:  log10(e)
-        {  0.56259360033158334,    0.63661977236758134,    PAL_EPSILON },       // expected:  2 / pi
-        {  0.6,                    0.69314718055994531,    PAL_EPSILON },       // expected:  ln(2)
-        {  0.60885936501391381,    0.70710678118654752,    PAL_EPSILON },       // expected:  1 / sqrt(2)
-        {  0.65579420263267244,    0.78539816339744831,    PAL_EPSILON },       // expected:  pi / 4
-        {  0.76159415595576489,    1,                      PAL_EPSILON * 10 },
-        {  0.81046380599898809,    1.1283791670955126,     PAL_EPSILON * 10 },  // expected:  2 / sqrt(pi)
-        {  0.88838556158566054,    1.4142135623730950,     PAL_EPSILON * 10 },  // expected:  sqrt(2)
-        {  0.89423894585503855,    1.4426950408889634,     PAL_EPSILON * 10 },  // expected:  log2(e)
-        {  0.91715233566727435,    1.5707963267948966,     PAL_EPSILON * 10 },  // expected:  pi / 2
-        {  0.98019801980198020,    2.3025850929940457,     PAL_EPSILON * 10 },  // expected:  ln(10)
-        {  0.99132891580059984,    2.7182818284590452,     PAL_EPSILON * 10 },  // expected:  e
-        {  0.99627207622074994,    3.1415926535897932,     PAL_EPSILON * 10 },  // expected:  pi
-        {  1,                      PAL_POSINF,             PAL_EPSILON * 10 }
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        atanh_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        atanh_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-
-    atanh_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/atanhf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/atanhf/test1/test1.cpp
deleted file mode 100644
index d8d184f905fb..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/atanhf/test1/test1.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that atanhf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * atanhf_test1_validate
- *
- * test validation function
- */
-void __cdecl atanhf_test1_validate(float value, float expected, float variance)
-{
-    float result = atanhf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("atanhf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * atanhf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl atanhf_test1_validate_isnan(float value)
-{
-    float result = atanhf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("atanhf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_atanhf_test1_paltest_atanhf_test1, "c_runtime/atanhf/test1/paltest_atanhf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected         variance */
-        {  0,               0,               PAL_EPSILON },
-        {  0.307977913f,    0.318309886f,    PAL_EPSILON },       // expected:  1 / pi
-        {  0.408904012f,    0.434294482f,    PAL_EPSILON },       // expected:  log10f(e)
-        {  0.562593600f,    0.636619772f,    PAL_EPSILON },       // expected:  2 / pi
-        {  0.6f,            0.693147181f,    PAL_EPSILON },       // expected:  ln(2)
-        {  0.608859365f,    0.707106781f,    PAL_EPSILON },       // expected:  1 / sqrtf(2)
-        {  0.655794203f,    0.785398163f,    PAL_EPSILON },       // expected:  pi / 4
-        {  0.761594156f,    1,               PAL_EPSILON * 10 },
-        {  0.810463806f,    1.12837917f,     PAL_EPSILON * 10 },  // expected:  2 / sqrtf(pi)
-        {  0.888385562f,    1.41421356f,     PAL_EPSILON * 10 },  // expected:  sqrtf(2)
-        {  0.894238946f,    1.44269504f,     PAL_EPSILON * 10 },  // expected:  logf2(e)
-        {  0.917152336f,    1.57079633f,     PAL_EPSILON * 10 },  // expected:  pi / 2
-        {  0.980198020f,    2.30258509f,     PAL_EPSILON * 10 },  // expected:  ln(10)
-        {  0.991328916f,    2.71828183f,     PAL_EPSILON * 10 },  // expected:  e
-        {  0.996272076f,    3.14159265f,     PAL_EPSILON * 10 },  // expected:  pi
-        {  1,               PAL_POSINF,      PAL_EPSILON * 10 }
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        atanhf_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        atanhf_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-
-    atanhf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/bsearch/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/bsearch/test1/test1.cpp
deleted file mode 100644
index eacb660dee09..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/bsearch/test1/test1.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Calls bsearch to find a character in a sorted buffer, and
-**          verifies that the correct position is returned.
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-int __cdecl charcmp_bsearch_test1(const void *pa, const void *pb)
-{
-    return memcmp(pa, pb, 1);
-}
-
-PALTEST(c_runtime_bsearch_test1_paltest_bsearch_test1, "c_runtime/bsearch/test1/paltest_bsearch_test1")
-{
-
-    const char array[] = "abcdefghij";
-    char * found=NULL;
-
-    /*
-     *  Initialize the PAL and return FAIL if this fails
-     */
-    if (0 != (PAL_Initialize(argc, argv)))
-    {
-        return FAIL;
-    }
-
-    found = (char *)bsearch(&"d", array, sizeof(array) - 1, (sizeof(char))
-                            , charcmp_bsearch_test1);
-    if (found != array + 3)
-    {
-        Fail ("bsearch was unable to find a specified character in a "
-                "sorted list.\n");
-    }
-    PAL_Terminate();
-    return PASS;
-}
-
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/bsearch/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/bsearch/test2/test2.cpp
deleted file mode 100644
index a916e61362ee..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/bsearch/test2/test2.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Calls bsearch to find a character in a sorted buffer, 
-**          that does not exist.
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-int __cdecl charcmp_bsearch_test2(const void *pa, const void *pb)
-{
-    return *(const char *)pa - *(const char *)pb;
-}
-
-PALTEST(c_runtime_bsearch_test2_paltest_bsearch_test2, "c_runtime/bsearch/test2/paltest_bsearch_test2")
-{
-
-    const char array[] = "abcefghij";
-    const char missing[] = "0dz";
-    char * found=NULL;
-    const char * candidate = missing;
-
-    /*
-     *  Initialize the PAL and return FAIL if this fails
-     */
-    if (0 != (PAL_Initialize(argc, argv)))
-    {
-        return FAIL;
-    }
-
-    while (*candidate) {
-        found = (char *)bsearch(candidate, array, sizeof(array) - 1,
-                                (sizeof(char)), charcmp_bsearch_test2);
-        if (found != NULL)
-        {
-            Fail ("ERROR: bsearch was able to find a specified character '%c' "
-                  "in a sorted list '%s' as '%c' "
-                  "even though the character is not in the list.\n",
-                  *candidate, array, *found);
-        }
-
-	candidate++;
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
-
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/cbrt/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/cbrt/test1/test1.cpp
deleted file mode 100644
index 4cc57c27e491..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/cbrt/test1/test1.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose:  Call the cbrt function on a positive value, a positive value
-** with a decimal and on the maximum possible double value.
-**
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * cbrt_test1_validate
- *
- * test validation function
- */
-void __cdecl cbrt_test1_validate(double value, double expected, double variance)
-{
-    double result = cbrt(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("cbrt(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * cbrt_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl cbrt_test1_validate_isnan(double value)
-{
-    double result = cbrt(value);
-
-    if (!_isnan(result))
-    {
-        Fail("cbrt(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-PALTEST(c_runtime_cbrt_test1_paltest_cbrt_test1, "c_runtime/cbrt/test1/paltest_cbrt_test1")
-{
-    struct test tests[] =
-    {
-        /* value                   expected                variance */
-        {  0.31830988618379067,    0.68278406325529568,    PAL_EPSILON },       // value:  1 / pi
-        {  0.43429448190325183,    0.75728863133090766,    PAL_EPSILON },       // value:  log10(e)
-        {  0.63661977236758134,    0.86025401382809963,    PAL_EPSILON },       // value:  2 / pi
-        {  0.69314718055994531,    0.88499704450051772,    PAL_EPSILON },       // value:  ln(2)
-        {  0.70710678118654752,    0.89089871814033930,    PAL_EPSILON },       // value:  1 / sqrt(2)
-        {  0.78539816339744831,    0.92263507432201421,    PAL_EPSILON },       // value:  pi / 4
-        {  1,                      1,                      PAL_EPSILON * 10 },
-        {  1.1283791670955126,     1.0410821966965807,     PAL_EPSILON * 10 },  // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     1.1224620483093730,     PAL_EPSILON * 10 },  // value:  sqrt(2)
-        {  1.4426950408889634,     1.1299472763373901,     PAL_EPSILON * 10 },  // value:  log2(e)
-        {  1.5707963267948966,     1.1624473515096265,     PAL_EPSILON * 10 },  // value:  pi / 2
-        {  2.3025850929940457,     1.3205004784536852,     PAL_EPSILON * 10 },  // value:  ln(10)
-        {  2.7182818284590452,     1.3956124250860895,     PAL_EPSILON * 10 },  // value:  e
-        {  3.1415926535897932,     1.4645918875615233,     PAL_EPSILON * 10 },  // value:  pi
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    cbrt_test1_validate(-0.0, -0.0, PAL_EPSILON);
-    cbrt_test1_validate( 0.0,  0.0, PAL_EPSILON);
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        cbrt_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-        cbrt_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-
-    cbrt_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/cbrtf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/cbrtf/test1/test1.cpp
deleted file mode 100644
index 8879a11ed77c..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/cbrtf/test1/test1.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose:  Call the cbrtf function on a positive value, a positive value
-** with a decimal and on the maximum possible float value.
-**
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * cbrtf_test1_validate
- *
- * test validation function
- */
-void __cdecl cbrtf_test1_validate(float value, float expected, float variance)
-{
-    float result = cbrtf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("cbrtf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * cbrtf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl cbrtf_test1_validate_isnan(float value)
-{
-    float result = cbrtf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("cbrtf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-PALTEST(c_runtime_cbrtf_test1_paltest_cbrtf_test1, "c_runtime/cbrtf/test1/paltest_cbrtf_test1")
-{
-    struct test tests[] =
-    {
-        /* value            expected         variance */
-        {  0.318309886f,    0.682784063f,    PAL_EPSILON },       // value:  1 / pi
-        {  0.434294482f,    0.757288631f,    PAL_EPSILON },       // value:  log10f(e)
-        {  0.636619772f,    0.860254014f,    PAL_EPSILON },       // value:  2 / pi
-        {  0.693147181f,    0.884997045f,    PAL_EPSILON },       // value:  ln(2)
-        {  0.707106781f,    0.890898718f,    PAL_EPSILON },       // value:  1 / sqrtf(2)
-        {  0.785398163f,    0.922635074f,    PAL_EPSILON },       // value:  pi / 4
-        {  1,               1,               PAL_EPSILON * 10 },
-        {  1.12837917f,     1.04108220f,     PAL_EPSILON * 10 },  // value:  2 / sqrtf(pi)
-        {  1.41421356f,     1.12246205f,     PAL_EPSILON * 10 },  // value:  sqrtf(2)
-        {  1.44269504f,     1.12994728f,     PAL_EPSILON * 10 },  // value:  logf2(e)
-        {  1.57079633f,     1.16244735f,     PAL_EPSILON * 10 },  // value:  pi / 2
-        {  2.30258509f,     1.32050048f,     PAL_EPSILON * 10 },  // value:  ln(10)
-        {  2.71828183f,     1.39561243f,     PAL_EPSILON * 10 },  // value:  e
-        {  3.14159265f,     1.46459189f,     PAL_EPSILON * 10 },  // value:  pi
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    cbrtf_test1_validate(-0.0f, -0.0f, PAL_EPSILON);
-    cbrtf_test1_validate( 0.0f,  0.0f, PAL_EPSILON);
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        cbrtf_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-        cbrtf_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-
-    cbrtf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/ceil/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/ceil/test1/test1.cpp
deleted file mode 100644
index 3f1d71e625c7..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/ceil/test1/test1.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests ceil with simple positive and negative values.  Also tests 
-**          extreme cases like extremely small values and positive and 
-**          negative infinity.  Makes sure that calling ceil on NaN returns 
-**          NaN
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * ceil_test1_validate
- *
- * test validation function
- */
-void __cdecl ceil_test1_validate(double value, double expected, double variance)
-{
-    double result = ceil(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("ceil(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * ceil_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl ceil_test1_validate_isnan(double value)
-{
-    double result = ceil(value);
-
-    if (!_isnan(result))
-    {
-        Fail("ceil(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_ceil_test1_paltest_ceil_test1, "c_runtime/ceil/test1/paltest_ceil_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected           variance */
-        {  0.31830988618379067,    1,                 PAL_EPSILON * 10 },     // value:  1 / pi
-        {  0.43429448190325183,    1,                 PAL_EPSILON * 10 },     // value:  log10(e)
-        {  0.63661977236758134,    1,                 PAL_EPSILON * 10 },     // value:  2 / pi
-        {  0.69314718055994531,    1,                 PAL_EPSILON * 10 },     // value:  ln(2)
-        {  0.70710678118654752,    1,                 PAL_EPSILON * 10 },     // value:  1 / sqrt(2)
-        {  0.78539816339744831,    1,                 PAL_EPSILON * 10 },     // value:  pi / 4
-        {  1.1283791670955126,     2,                 PAL_EPSILON * 10 },     // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     2,                 PAL_EPSILON * 10 },     // value:  sqrt(2)
-        {  1.4426950408889634,     2,                 PAL_EPSILON * 10 },     // value:  log2(e)
-        {  1.5707963267948966,     2,                 PAL_EPSILON * 10 },     // value:  pi / 2
-        {  2.3025850929940457,     3,                 PAL_EPSILON * 10 },     // value:  ln(10)
-        {  2.7182818284590452,     3,                 PAL_EPSILON * 10 },     // value:  e
-        {  3.1415926535897932,     4,                 PAL_EPSILON * 10 },     // value:  pi
-        {  PAL_POSINF,             PAL_POSINF,        0 }
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-    
-    ceil_test1_validate( 0,    0, PAL_EPSILON);
-    ceil_test1_validate(-0.0,  0, PAL_EPSILON);
-    
-    ceil_test1_validate( 1,    1, PAL_EPSILON * 10);
-    ceil_test1_validate(-1.0, -1, PAL_EPSILON * 10);
-    
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        ceil_test1_validate( tests[i].value, tests[i].expected,     tests[i].variance);
-        ceil_test1_validate(-tests[i].value, 1 - tests[i].expected, tests[i].variance);
-    }
-    
-    ceil_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/ceilf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/ceilf/test1/test1.cpp
deleted file mode 100644
index cf237ea8eb95..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/ceilf/test1/test1.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests ceilf with simple positive and negative values.  Also tests 
-**          extreme cases like extremely small values and positive and 
-**          negative infinity.  Makes sure that calling ceilf on NaN returns 
-**          NaN
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * ceilf_test1_validate
- *
- * test validation function
- */
-void __cdecl ceilf_test1_validate(float value, float expected, float variance)
-{
-    float result = ceilf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("ceilf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * ceilf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl ceilf_test1_validate_isnan(float value)
-{
-    float result = ceilf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("ceilf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_ceilf_test1_paltest_ceilf_test1, "c_runtime/ceilf/test1/paltest_ceilf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected       variance */
-        {  0.318309886f,    1,             PAL_EPSILON * 10 },     // value:  1 / pi
-        {  0.434294482f,    1,             PAL_EPSILON * 10 },     // value:  log10f(e)
-        {  0.636619772f,    1,             PAL_EPSILON * 10 },     // value:  2 / pi
-        {  0.693147181f,    1,             PAL_EPSILON * 10 },     // value:  ln(2)
-        {  0.707106781f,    1,             PAL_EPSILON * 10 },     // value:  1 / sqrtf(2)
-        {  0.785398163f,    1,             PAL_EPSILON * 10 },     // value:  pi / 4
-        {  1.12837917f,     2,             PAL_EPSILON * 10 },     // value:  2 / sqrtf(pi)
-        {  1.41421356f,     2,             PAL_EPSILON * 10 },     // value:  sqrtf(2)
-        {  1.44269504f,     2,             PAL_EPSILON * 10 },     // value:  logf2(e)
-        {  1.57079633f,     2,             PAL_EPSILON * 10 },     // value:  pi / 2
-        {  2.30258509f,     3,             PAL_EPSILON * 10 },     // value:  ln(10)
-        {  2.71828183f,     3,             PAL_EPSILON * 10 },     // value:  e
-        {  3.14159265f,     4,             PAL_EPSILON * 10 },     // value:  pi
-        {  PAL_POSINF,      PAL_POSINF,    0 }
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-    
-    ceilf_test1_validate( 0,    0, PAL_EPSILON);
-    ceilf_test1_validate(-0.0f,  0, PAL_EPSILON);
-    
-    ceilf_test1_validate( 1,    1, PAL_EPSILON * 10);
-    ceilf_test1_validate(-1.0f, -1, PAL_EPSILON * 10);
-    
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        ceilf_test1_validate( tests[i].value, tests[i].expected,     tests[i].variance);
-        ceilf_test1_validate(-tests[i].value, 1 - tests[i].expected, tests[i].variance);
-    }
-    
-    ceilf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/cos/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/cos/test1/test1.cpp
deleted file mode 100644
index d574895731d5..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/cos/test1/test1.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that cos return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * cos_test1_validate
- *
- * test validation function
- */
-void __cdecl cos_test1_validate(double value, double expected, double variance)
-{
-    double result = cos(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("cos(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * cos_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl cos_test1_validate_isnan(double value)
-{
-    double result = cos(value);
-
-    if (!_isnan(result))
-    {
-        Fail("cos(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_cos_test1_paltest_cos_test1, "c_runtime/cos/test1/paltest_cos_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance */
-        {  0,                      1,                      PAL_EPSILON * 10 },
-        {  0.31830988618379067,    0.94976571538163866,    PAL_EPSILON },       // value:  1 / pi
-        {  0.43429448190325183,    0.90716712923909839,    PAL_EPSILON },       // value:  log10(e)
-        {  0.63661977236758134,    0.80410982822879171,    PAL_EPSILON },       // value:  2 / pi
-        {  0.69314718055994531,    0.76923890136397213,    PAL_EPSILON },       // value:  ln(2)
-        {  0.70710678118654752,    0.76024459707563015,    PAL_EPSILON },       // value:  1 / sqrt(2)
-        {  0.78539816339744831,    0.70710678118654752,    PAL_EPSILON },       // value:  pi / 4,         expected:  1 / sqrt(2)
-        {  1,                      0.54030230586813972,    PAL_EPSILON },
-        {  1.1283791670955126,     0.42812514788535792,    PAL_EPSILON },       // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     0.15594369476537447,    PAL_EPSILON },       // value:  sqrt(2)
-        {  1.4426950408889634,     0.12775121753523991,    PAL_EPSILON },       // value:  log2(e)
-        {  1.5707963267948966,     0,                      PAL_EPSILON },       // value:  pi / 2
-        {  2.3025850929940457,    -0.66820151019031295,    PAL_EPSILON },       // value:  ln(10)
-        {  2.7182818284590452,    -0.91173391478696510,    PAL_EPSILON },       // value:  e
-        {  3.1415926535897932,    -1,                      PAL_EPSILON * 10 },  // value:  pi
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        cos_test1_validate( tests[i].value, tests[i].expected, tests[i].variance);
-        cos_test1_validate(-tests[i].value, tests[i].expected, tests[i].variance);
-    }
-    
-    cos_test1_validate_isnan(PAL_NEGINF);
-    cos_test1_validate_isnan(PAL_NAN);
-    cos_test1_validate_isnan(PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/cosf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/cosf/test1/test1.cpp
deleted file mode 100644
index ed3a4512b7fe..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/cosf/test1/test1.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that cosf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * cosf_test1_validate
- *
- * test validation function
- */
-void __cdecl cosf_test1_validate(float value, float expected, float variance)
-{
-    float result = cosf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("cosf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * cosf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl cosf_test1_validate_isnan(float value)
-{
-    float result = cosf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("cosf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_cosf_test1_paltest_cosf_test1, "c_runtime/cosf/test1/paltest_cosf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected         variance */
-        {  0,               1,               PAL_EPSILON * 10 },
-        {  0.318309886f,    0.949765715f,    PAL_EPSILON },       // value:  1 / pi
-        {  0.434294482f,    0.907167129f,    PAL_EPSILON },       // value:  log10f(e)
-        {  0.636619772f,    0.804109828f,    PAL_EPSILON },       // value:  2 / pi
-        {  0.693147181f,    0.769238901f,    PAL_EPSILON },       // value:  ln(2)
-        {  0.707106781f,    0.760244597f,    PAL_EPSILON },       // value:  1 / sqrtf(2)
-        {  0.785398163f,    0.707106781f,    PAL_EPSILON },       // value:  pi / 4,         expected:  1 / sqrtf(2)
-        {  1,               0.540302306f,    PAL_EPSILON },
-        {  1.12837917f,     0.428125148f,    PAL_EPSILON },       // value:  2 / sqrtf(pi)
-        {  1.41421356f,     0.155943695f,    PAL_EPSILON },       // value:  sqrtf(2)
-        {  1.44269504f,     0.127751218f,    PAL_EPSILON },       // value:  logf2(e)
-        {  1.57079633f,     0,               PAL_EPSILON },       // value:  pi / 2
-        {  2.30258509f,    -0.668201510f,    PAL_EPSILON },       // value:  ln(10)
-        {  2.71828183f,    -0.911733918f,    PAL_EPSILON },       // value:  e
-        {  3.14159265f,    -1,               PAL_EPSILON * 10 },  // value:  pi
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        cosf_test1_validate( tests[i].value, tests[i].expected, tests[i].variance);
-        cosf_test1_validate(-tests[i].value, tests[i].expected, tests[i].variance);
-    }
-    
-    cosf_test1_validate_isnan(PAL_NEGINF);
-    cosf_test1_validate_isnan(PAL_NAN);
-    cosf_test1_validate_isnan(PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/cosh/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/cosh/test1/test1.cpp
deleted file mode 100644
index 78518b78e0e4..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/cosh/test1/test1.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that cosh return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;      /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * cosh_test1_validate
- *
- * test validation function
- */
-void __cdecl cosh_test1_validate(double value, double expected, double variance)
-{
-    double result = cosh(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("cosh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * cosh_test1_validate
- *
- * test validation function for values returning PAL_NAN
- */
-void __cdecl cosh_test1_validate_isnan(double value)
-{
-    double result = cosh(value);
-
-    if (!_isnan(result))
-    {
-        Fail("cosh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_cosh_test1_paltest_cosh_test1, "c_runtime/cosh/test1/paltest_cosh_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected               variance */
-        {  0,                      1,                     PAL_EPSILON * 10 },
-        {  0.31830988618379067,    1.0510897883672876,    PAL_EPSILON * 10 },   // value:  1 / pi
-        {  0.43429448190325183,    1.0957974645564909,    PAL_EPSILON * 10 },   // value:  log10(e)
-        {  0.63661977236758134,    1.2095794864199787,    PAL_EPSILON * 10 },   // value:  2 / pi
-        {  0.69314718055994531,    1.25,                  PAL_EPSILON * 10 },   // value:  ln(2)
-        {  0.70710678118654752,    1.2605918365213561,    PAL_EPSILON * 10 },   // value:  1 / sqrt(2)
-        {  0.78539816339744831,    1.3246090892520058,    PAL_EPSILON * 10 },   // value:  pi / 4
-        {  1,                      1.5430806348152438,    PAL_EPSILON * 10 },
-        {  1.1283791670955126,     1.7071001431069344,    PAL_EPSILON * 10 },   // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     2.1781835566085709,    PAL_EPSILON * 10 },   // value:  sqrt(2)
-        {  1.4426950408889634,     2.2341880974508023,    PAL_EPSILON * 10 },   // value:  log2(e)
-        {  1.5707963267948966,     2.5091784786580568,    PAL_EPSILON * 10 },   // value:  pi / 2
-        {  2.3025850929940457,     5.05,                  PAL_EPSILON * 10 },   // value:  ln(10)
-        {  2.7182818284590452,     7.6101251386622884,    PAL_EPSILON * 10 },   // value:  e
-        {  3.1415926535897932,     11.591953275521521,    PAL_EPSILON * 100 },  // value:  pi
-        {  PAL_POSINF,             PAL_POSINF,            0 },
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        cosh_test1_validate( tests[i].value, tests[i].expected, tests[i].variance);
-        cosh_test1_validate(-tests[i].value, tests[i].expected, tests[i].variance);
-    }
-    
-    cosh_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/coshf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/coshf/test1/test1.cpp
deleted file mode 100644
index 27ba18c080f4..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/coshf/test1/test1.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that coshf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;      /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * coshf_test1_validate
- *
- * test validation function
- */
-void __cdecl coshf_test1_validate(float value, float expected, float variance)
-{
-    float result = coshf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("coshf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * coshf_test1_validate
- *
- * test validation function for values returning PAL_NAN
- */
-void __cdecl coshf_test1_validate_isnan(float value)
-{
-    float result = coshf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("coshf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_coshf_test1_paltest_coshf_test1, "c_runtime/coshf/test1/paltest_coshf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected        variance */
-        {  0,               1,              PAL_EPSILON * 10 },
-        {  0.318309886f,    1.05108979f,    PAL_EPSILON * 10 },   // value:  1 / pi
-        {  0.434294482f,    1.09579746f,    PAL_EPSILON * 10 },   // value:  log10f(e)
-        {  0.636619772f,    1.20957949f,    PAL_EPSILON * 10 },   // value:  2 / pi
-        {  0.693147181f,    1.25f,          PAL_EPSILON * 10 },   // value:  ln(2)
-        {  0.707106781f,    1.26059184f,    PAL_EPSILON * 10 },   // value:  1 / sqrtf(2)
-        {  0.785398163f,    1.32460909f,    PAL_EPSILON * 10 },   // value:  pi / 4
-        {  1,               1.54308063f,    PAL_EPSILON * 10 },
-        {  1.12837917f,     1.70710014f,    PAL_EPSILON * 10 },   // value:  2 / sqrtf(pi)
-        {  1.41421356f,     2.17818356f,    PAL_EPSILON * 10 },   // value:  sqrtf(2)
-        {  1.44269504f,     2.23418810f,    PAL_EPSILON * 10 },   // value:  logf2(e)
-        {  1.57079633f,     2.50917848f,    PAL_EPSILON * 10 },   // value:  pi / 2
-        {  2.30258509f,     5.05f,          PAL_EPSILON * 10 },   // value:  ln(10)
-        {  2.71828183f,     7.61012514f,    PAL_EPSILON * 10 },   // value:  e
-        {  3.14159265f,     11.5919533f,    PAL_EPSILON * 100 },  // value:  pi
-        {  PAL_POSINF,      PAL_POSINF,     0 },
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        coshf_test1_validate( tests[i].value, tests[i].expected, tests[i].variance);
-        coshf_test1_validate(-tests[i].value, tests[i].expected, tests[i].variance);
-    }
-    
-    coshf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/errno/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/errno/test1/test1.cpp
deleted file mode 100644
index 7daab481ef10..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/errno/test1/test1.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Test that errno begins as 0, and sets to ERANGE when that 
-** error is forced with wcstoul.  
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_errno_test1_paltest_errno_test1, "c_runtime/errno/test1/paltest_errno_test1")
-{
-    WCHAR overstr[] = {'4','2','9','4','9','6','7','2','9','6',0};
-    WCHAR *end;
-    
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-    
-    /* 
-       The only value that must be supported is
-       ERANGE, in the event that wcstoul() fails due to overflow. 
-    */ 
-    
-    wcstoul(overstr, &end, 10);
-    
-    if (errno != ERANGE)
-    {
-        Fail("ERROR: wcstoul did not set errno to ERANGE.  Instead "
-             "the value of errno is %d\n", errno);
-    }
-
-        
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/errno/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/errno/test2/test2.cpp
deleted file mode 100644
index 7a0e27007f71..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/errno/test2/test2.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test2.c
-**
-** Purpose: Test that errno is 'per-thread' as noted in the documentation. 
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-/* 
-   This thread function just checks that errno is initially 0 and then sets
-   it to a new value before returning.
-*/
-DWORD PALAPI ThreadFunc_errno_test2( LPVOID lpParam ) 
-{ 
-       
-    if(errno != 0) 
-    {
-        *((DWORD*)lpParam) = 1;
-    }
-
-    errno = 20;
-
-    return 0; 
-} 
-
-
-PALTEST(c_runtime_errno_test2_paltest_errno_test2, "c_runtime/errno/test2/paltest_errno_test2")
-{
-    DWORD dwThreadId, dwThrdParam = 0; 
-    HANDLE hThread; 
-    
-    
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-    
-    /* Set errno to a value within this thread */
-
-    errno = 50;
-    
-    hThread = CreateThread(NULL, 0, ThreadFunc_errno_test2, &dwThrdParam, 0, &dwThreadId);
-    
-    if (hThread == NULL) 
-    {
-        Fail("ERROR: CreateThread failed to create a thread.  "
-             "GetLastError() returned %d.\n",GetLastError());
-    }
-    
-    WaitForSingleObject(hThread, INFINITE);
- 
-    /* This checks the result of calling the thread */
-    if(dwThrdParam)
-    {
-        Fail("ERROR: errno was not set to 0 in the new thread.  Each "
-             "thread should have its own value for errno.\n");
-    }
-    
-    /* Check to make sure errno is still set to 50 */
-    if(errno != 50)
-    {
-        Fail("ERROR: errno should be 50 in the main thread, even though "
-             "it was set to 20 in another thread.  Currently it is %d.\n",
-             errno);
-    }
-    
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/exit/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/exit/test1/test1.cpp
deleted file mode 100644
index 2bb42e3563c4..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/exit/test1/test1.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Calls exit, and verifies that it actually stops program execution.
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_exit_test1_paltest_exit_test1, "c_runtime/exit/test1/paltest_exit_test1")
-{
-    /*
-     *  Initialize the PAL and return FAIL if this fails
-     */
-    if (0 != (PAL_Initialize(argc, argv)))
-    {
-        return FAIL;
-    }
-
-    /*should return 0*/
-    exit(0);
-
-    Fail ("Exit didn't actually stop execution.\n");
-
-    return FAIL;
-}
-
-
-
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/exit/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/exit/test2/test2.cpp
deleted file mode 100644
index 6125b3c38899..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/exit/test2/test2.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test2.c
-**
-** Purpose: Calls exit on fail, and verifies that it actually
-**          stops program execution and return 1.
-
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_exit_test2_paltest_exit_test2, "c_runtime/exit/test2/paltest_exit_test2")
-{
-    /*
-     *  Initialize the PAL and return FAIL if this fails
-     */
-    if (0 != (PAL_Initialize(argc, argv)))
-    {
-        return FAIL;
-    }
-
-    /*should return 1*/
-    exit(1);
-
-}
-
-
-
-
-
-
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/exp/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/exp/test1/test1.cpp
deleted file mode 100644
index b3c8bb9307e0..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/exp/test1/test1.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests exp with a normal set of values.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * exp_test1_validate
- *
- * test validation function
- */
-void __cdecl exp_test1_validate(double value, double expected, double variance)
-{
-    double result = exp(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("exp(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * exp_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl exp_test1_validate_isnan(double value)
-{
-    double result = exp(value);
-
-    if (!_isnan(result))
-    {
-        Fail("exp(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_exp_test1_paltest_exp_test1, "c_runtime/exp/test1/paltest_exp_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                 variance */
-        { PAL_NEGINF,              0,                       PAL_EPSILON },
-        { -3.1415926535897932,     0.043213918263772250,    PAL_EPSILON / 10 },   // value: -(pi)
-        { -2.7182818284590452,     0.065988035845312537,    PAL_EPSILON / 10 },   // value: -(e)
-        { -2.3025850929940457,     0.1,                     PAL_EPSILON },        // value: -(ln(10))
-        { -1.5707963267948966,     0.20787957635076191,     PAL_EPSILON },        // value: -(pi / 2)
-        { -1.4426950408889634,     0.23629008834452270,     PAL_EPSILON },        // value: -(log2(e))
-        { -1.4142135623730950,     0.24311673443421421,     PAL_EPSILON },        // value: -(sqrt(2))
-        { -1.1283791670955126,     0.32355726390307110,     PAL_EPSILON },        // value: -(2 / sqrt(pi))
-        { -1,                      0.36787944117144232,     PAL_EPSILON },        // value: -(1)
-        { -0.78539816339744831,    0.45593812776599624,     PAL_EPSILON },        // value: -(pi / 4)
-        { -0.70710678118654752,    0.49306869139523979,     PAL_EPSILON },        // value: -(1 / sqrt(2))
-        { -0.69314718055994531,    0.5,                     PAL_EPSILON },        // value: -(ln(2))
-        { -0.63661977236758134,    0.52907780826773535,     PAL_EPSILON },        // value: -(2 / pi)
-        { -0.43429448190325183,    0.64772148514180065,     PAL_EPSILON },        // value: -(log10(e))
-        { -0.31830988618379067,    0.72737734929521647,     PAL_EPSILON },        // value: -(1 / pi)
-        {  0,                      1,                       PAL_EPSILON * 10 },
-        {  0.31830988618379067,    1.3748022274393586,      PAL_EPSILON * 10 },   // value:  1 / pi
-        {  0.43429448190325183,    1.5438734439711811,      PAL_EPSILON * 10 },   // value:  log10(e)
-        {  0.63661977236758134,    1.8900811645722220,      PAL_EPSILON * 10 },   // value:  2 / pi
-        {  0.69314718055994531,    2,                       PAL_EPSILON * 10 },   // value:  ln(2)
-        {  0.70710678118654752,    2.0281149816474725,      PAL_EPSILON * 10 },   // value:  1 / sqrt(2)
-        {  0.78539816339744831,    2.1932800507380155,      PAL_EPSILON * 10 },   // value:  pi / 4
-        {  1,                      2.7182818284590452,      PAL_EPSILON * 10 },   //                           expected: e
-        {  1.1283791670955126,     3.0906430223107976,      PAL_EPSILON * 10 },   // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     4.1132503787829275,      PAL_EPSILON * 10 },   // value:  sqrt(2)
-        {  1.4426950408889634,     4.2320861065570819,      PAL_EPSILON * 10 },   // value:  log2(e)
-        {  1.5707963267948966,     4.8104773809653517,      PAL_EPSILON * 10 },   // value:  pi / 2
-        {  2.3025850929940457,     10,                      PAL_EPSILON * 100 },  // value:  ln(10)
-        {  2.7182818284590452,     15.154262241479264,      PAL_EPSILON * 100 },  // value:  e
-        {  3.1415926535897932,     23.140692632779269,      PAL_EPSILON * 100 },  // value:  pi
-        {  PAL_POSINF,             PAL_POSINF,              0 },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        exp_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-    }
-
-    exp_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/expf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/expf/test1/test1.cpp
deleted file mode 100644
index 72d417ff180a..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/expf/test1/test1.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests expf with a normal set of values.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * expf_test1_validate
- *
- * test validation function
- */
-void __cdecl expf_test1_validate(float value, float expected, float variance)
-{
-    float result = expf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("expf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * expf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl expf_test1_validate_isnan(float value)
-{
-    float result = expf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("expf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_expf_test1_paltest_expf_test1, "c_runtime/expf/test1/paltest_expf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected          variance */
-        { PAL_NEGINF,       0,                PAL_EPSILON },
-        { -3.14159265f,     0.0432139183f,    PAL_EPSILON / 10 },   // value: -(pi)
-        { -2.71828183f,     0.0659880358f,    PAL_EPSILON / 10 },   // value: -(e)
-        { -2.30258509f,     0.1f,             PAL_EPSILON },        // value: -(ln(10))
-        { -1.57079633f,     0.207879576f,     PAL_EPSILON },        // value: -(pi / 2)
-        { -1.44269504f,     0.236290088f,     PAL_EPSILON },        // value: -(logf2(e))
-        { -1.41421356f,     0.243116734f,     PAL_EPSILON },        // value: -(sqrtf(2))
-        { -1.12837917f,     0.323557264f,     PAL_EPSILON },        // value: -(2 / sqrtf(pi))
-        { -1,               0.367879441f,     PAL_EPSILON },        // value: -(1)
-        { -0.785398163f,    0.455938128f,     PAL_EPSILON },        // value: -(pi / 4)
-        { -0.707106781f,    0.493068691f,     PAL_EPSILON },        // value: -(1 / sqrtf(2))
-        { -0.693147181f,    0.5f,             PAL_EPSILON },        // value: -(ln(2))
-        { -0.636619772f,    0.529077808f,     PAL_EPSILON },        // value: -(2 / pi)
-        { -0.434294482f,    0.647721485f,     PAL_EPSILON },        // value: -(log10f(e))
-        { -0.318309886f,    0.727377349f,     PAL_EPSILON },        // value: -(1 / pi)
-        {  0,               1,                PAL_EPSILON * 10 },
-        {  0.318309886f,    1.37480223f,      PAL_EPSILON * 10 },   // value:  1 / pi
-        {  0.434294482f,    1.54387344f,      PAL_EPSILON * 10 },   // value:  log10f(e)
-        {  0.636619772f,    1.89008116f,      PAL_EPSILON * 10 },   // value:  2 / pi
-        {  0.693147181f,    2,                PAL_EPSILON * 10 },   // value:  ln(2)
-        {  0.707106781f,    2.02811498f,      PAL_EPSILON * 10 },   // value:  1 / sqrtf(2)
-        {  0.785398163f,    2.19328005f,      PAL_EPSILON * 10 },   // value:  pi / 4
-        {  1,               2.71828183f,      PAL_EPSILON * 10 },   //                           expected: e
-        {  1.12837917f,     3.09064302f,      PAL_EPSILON * 10 },   // value:  2 / sqrtf(pi)
-        {  1.41421356f,     4.11325038f,      PAL_EPSILON * 10 },   // value:  sqrtf(2)
-        {  1.44269504f,     4.23208611f,      PAL_EPSILON * 10 },   // value:  logf2(e)
-        {  1.57079633f,     4.81047738f,      PAL_EPSILON * 10 },   // value:  pi / 2
-        {  2.30258509f,     10,               PAL_EPSILON * 100 },  // value:  ln(10)
-        {  2.71828183f,     15.1542622f,      PAL_EPSILON * 100 },  // value:  e
-        {  3.14159265f,     23.1406926f,      PAL_EPSILON * 100 },  // value:  pi
-        {  PAL_POSINF,      PAL_POSINF,       0 },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        expf_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-    }
-
-    expf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fabs/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fabs/test1/test1.cpp
deleted file mode 100644
index f3997dea0191..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fabs/test1/test1.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that fabs return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * fabs_test1_validate
- *
- * test validation function
- */
-void __cdecl fabs_test1_validate(double value, double expected, double variance)
-{
-    double result = fabs(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("fabs(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * fabs_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl fabs_test1_validate_isnan(double value)
-{
-    double result = fabs(value);
-
-    if (!_isnan(result))
-    {
-        Fail("fabs(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_fabs_test1_paltest_fabs_test1, "c_runtime/fabs/test1/paltest_fabs_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance */
-        { PAL_NEGINF,              PAL_POSINF,             0 },
-        { -3.1415926535897932,     3.1415926535897932,     PAL_EPSILON * 10 },  // value: -(pi)              expected:  pi
-        { -2.7182818284590452,     2.7182818284590452,     PAL_EPSILON * 10 },  // value: -(e)               expected:  e
-        { -2.3025850929940457,     2.3025850929940457,     PAL_EPSILON * 10 },  // value: -(ln(10))          expected:  ln(10)
-        { -1.5707963267948966,     1.5707963267948966,     PAL_EPSILON * 10 },  // value: -(pi / 2)          expected:  pi / 2
-        { -1.4426950408889634,     1.4426950408889634,     PAL_EPSILON * 10 },  // value: -(log2(e))         expected:  log2(e)
-        { -1.4142135623730950,     1.4142135623730950,     PAL_EPSILON * 10 },  // value: -(sqrt(2))         expected:  sqrt(2)
-        { -1.1283791670955126,     1.1283791670955126,     PAL_EPSILON * 10 },  // value: -(2 / sqrt(pi))    expected:  2 / sqrt(pi)
-        { -1,                      1,                      PAL_EPSILON * 10 },
-        { -0.78539816339744831,    0.78539816339744831,    PAL_EPSILON },       // value: -(pi / 4)          expected:  pi / 4
-        { -0.70710678118654752,    0.70710678118654752,    PAL_EPSILON },       // value: -(1 / sqrt(2))     expected:  1 / sqrt(2)
-        { -0.69314718055994531,    0.69314718055994531,    PAL_EPSILON },       // value: -(ln(2))           expected:  ln(2)
-        { -0.63661977236758134,    0.63661977236758134,    PAL_EPSILON },       // value: -(2 / pi)          expected:  2 / pi
-        { -0.43429448190325183,    0.43429448190325183,    PAL_EPSILON },       // value: -(log10(e))        expected:  log10(e)
-        { -0.31830988618379067,    0.31830988618379067,    PAL_EPSILON },       // value: -(1 / pi)          expected:  1 / pi
-        { -0.0,                    0,                      PAL_EPSILON },
-    };
-
-
-    // PAL initialization
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        fabs_test1_validate( tests[i].value, tests[i].expected, tests[i].variance);
-        fabs_test1_validate(-tests[i].value, tests[i].expected, tests[i].variance);
-    }
-    
-    fabs_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fabsf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fabsf/test1/test1.cpp
deleted file mode 100644
index 68481ca97cef..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fabsf/test1/test1.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that fabsf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-                    
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * fabsf_test1_validate
- *
- * test validation function
- */
-void __cdecl fabsf_test1_validate(float value, float expected, float variance)
-{
-    float result = fabsf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("fabsf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * fabsf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl fabsf_test1_validate_isnan(float value)
-{
-    float result = fabsf(value);
-
-    if (!_isnan(result))
-    {
-        Fail("fabsf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_fabsf_test1_paltest_fabsf_test1, "c_runtime/fabsf/test1/paltest_fabsf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected           variance */
-        { PAL_NEGINF,       PAL_POSINF,        0 },
-        { -3.14159265f,     3.14159265f,       PAL_EPSILON * 10 },    // value: -(pi)              expected:  pi
-        { -2.71828183f,     2.71828183f,       PAL_EPSILON * 10 },    // value: -(e)               expected:  e
-        { -2.30258509f,     2.30258509f,       PAL_EPSILON * 10 },    // value: -(ln(10))          expected:  ln(10)
-        { -1.57079633f,     1.57079633f,       PAL_EPSILON * 10 },    // value: -(pi / 2)          expected:  pi / 2
-        { -1.44269504f,     1.44269504f,       PAL_EPSILON * 10 },    // value: -(log2(e))         expected:  log2(e)
-        { -1.41421356f,     1.41421356f,       PAL_EPSILON * 10 },    // value: -(sqrt(2))         expected:  sqrt(2)
-        { -1.12837917f,     1.12837917f,       PAL_EPSILON * 10 },    // value: -(2 / sqrt(pi))    expected:  2 / sqrt(pi)
-        { -1,               1,                 PAL_EPSILON * 10 },
-        { -0.785398163f,    0.785398163f,      PAL_EPSILON },         // value: -(pi / 4)          expected:  pi / 4
-        { -0.707106781f,    0.707106781f,      PAL_EPSILON },         // value: -(1 / sqrt(2))     expected:  1 / sqrt(2)
-        { -0.693147181f,    0.693147181f,      PAL_EPSILON },         // value: -(ln(2))           expected:  ln(2)
-        { -0.636619772f,    0.636619772f,      PAL_EPSILON },         // value: -(2 / pi)          expected:  2 / pi
-        { -0.434294482f,    0.434294482f,      PAL_EPSILON },         // value: -(log10(e))        expected:  log10(e)
-        { -0.318309886f,    0.318309886f,      PAL_EPSILON },         // value: -(1 / pi)          expected:  1 / pi
-        { -0.0f,            0,                 PAL_EPSILON },
-    };
-
-
-    // PAL initialization
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        fabsf_test1_validate( tests[i].value, tests[i].expected, tests[i].variance);
-        fabsf_test1_validate(-tests[i].value, tests[i].expected, tests[i].variance);
-    }
-    
-    fabsf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/ferror/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/ferror/test1/test1.cpp
deleted file mode 100644
index 5863d51d0c09..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/ferror/test1/test1.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests the PAL implementation of the ferror function.
-**
-** Depends:
-**     fopen
-**     fread
-**     fclose
-**     
-**
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_ferror_test1_paltest_ferror_test1, "c_runtime/ferror/test1/paltest_ferror_test1")
-{
-    const char filename[] = "testfile";
-    char buffer[128];
-    FILE * fp = NULL;
-    int result;
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-  
-    /* Open a file in READ mode */
-
-    if((fp = fopen(filename, "r")) == NULL)
-    {
-        Fail("Unable to open a file for reading.  Is the file "
-               "in the directory?  It should be.");
-    }
-
-    /* Read 10 characters from the file.  The file has 15 
-       characters in it.
-    */
-  
-    if((result = fread(buffer,1,10,fp)) == 0)
-    {
-        Fail("ERROR: Zero characters read from the file.  It should have "
-               "read 10 character in from a 15 character file.");
-    }
-  
-    if(ferror(fp) != 0)
-    {
-        Fail("ERROR:  ferror returned a value not equal to 0. The read "
-               "operation shouldn't have caused an error, and ferror should "
-               "return 0 still.");
-    }
-  
-    /* 
-       Close the open file and end the test.
-    */
-
-    if(fclose(fp) != 0)
-    {
-        Fail("ERROR: fclose failed when trying to close a file pointer. "
-               "This test depends on fclose working properly.");
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
-   
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/ferror/test1/testfile b/src/coreclr/pal/tests/palsuite/c_runtime/ferror/test1/testfile
deleted file mode 100644
index 273c1a9ffdc2..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/ferror/test1/testfile
+++ /dev/null
@@ -1 +0,0 @@
-This is a test.
\ No newline at end of file
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/ferror/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/ferror/test2/test2.cpp
deleted file mode 100644
index 3ac444b0ec35..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/ferror/test2/test2.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test2.c
-**
-** Purpose: Open a read-only file and attempt to write some data to it.
-** Check to ensure that an ferror occurs.
-**
-** Depends:
-**     fopen
-**     fwrite
-**     fclose
-**     
-**
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_ferror_test2_paltest_ferror_test2, "c_runtime/ferror/test2/paltest_ferror_test2")
-{
-    const char filename[] = "testfile";
-    FILE * fp = NULL;
-    int result;
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-    /* Open a file in READONLY mode */
-  
-    if((fp = fopen(filename, "r")) == NULL)
-    {
-        Fail("Unable to open a file for reading.");
-    }
-
-    /* Attempt to write 14 characters to the file. */
-  
-    if((result = fwrite("This is a test",1,14,fp)) != 0)
-    {
-        Fail("ERROR: %d characters written.  0 characters should "
-             "have been written, since this file is read-only.", result);
-    }
-  
-    if(ferror(fp) == 0)
-    {
-        Fail("ERROR:  ferror should have generated an error when "
-             "write was called on a read-only file.  But, it "
-             "retured 0, indicating no error.\n");
-    }
-  
-    /* Close the file. */
-
-    if(fclose(fp) != 0)
-    {
-        Fail("ERROR: fclose failed when trying to close a file pointer. "
-	     "This test depends on fclose working properly.");
-    }
-
-   
-    PAL_Terminate();
-    return PASS;
-}
-   
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/ferror/test2/testfile b/src/coreclr/pal/tests/palsuite/c_runtime/ferror/test2/testfile
deleted file mode 100644
index 0135842a03e1..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/ferror/test2/testfile
+++ /dev/null
@@ -1 +0,0 @@
-This is a test file.  This needs to be kept in CVS.
\ No newline at end of file
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fflush/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fflush/test1/test1.cpp
deleted file mode 100644
index 716a151b5f8f..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fflush/test1/test1.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests to see that fflush is working properly.  Flushes a couple
-** buffers and checks the return value.  Can't figure out a way to test
-** and ensure it is really dropping the buffers, since the system
-** does this automatically most of the time ...
-**
-**
-**==========================================================================*/
-
-/* This function is really tough to test.  Right now it just tests 
-   a bunch of return values.  No solid way to ensure that it is really
-   flushing a buffer or not -- might have to be a manual test someday.
-*/
-
-#include <palsuite.h>
-
-
-PALTEST(c_runtime_fflush_test1_paltest_fflush_test1, "c_runtime/fflush/test1/paltest_fflush_test1")
-{
-    
-    int TheReturn;
-    FILE* TheFile; 
-    FILE* AnotherFile = NULL;
-  
-    PAL_Initialize(argc,argv);
-     
-    TheFile = fopen("theFile","w+");
-
-    if(TheFile == NULL) 
-    {
-        Fail("ERROR: fopen failed.  Test depends on this function.");
-    }
-    
-    TheReturn = fwrite("foo",3,3,TheFile);
-    
-    if(TheReturn != 3) 
-    {
-        Fail("ERROR: fwrite failed.  Test depends on this function.");
-    }
-  
-    /* Test to see that FlushFileBuffers returns a success value */
-    TheReturn = fflush(TheFile);
-
-    if(TheReturn != 0) 
-    {
-        Fail("ERROR: The fflush function returned non-zero, which "
-               "indicates failure, when trying to flush a buffer.");
-    }
-
-    /* Test to see that FlushFileBuffers returns a success value */
-    TheReturn = fflush(NULL);
-
-    if(TheReturn != 0) 
-    {
-        Fail("ERROR: The fflush function returned non-zero, which "
-               "indicates failure, when trying to flush all buffers.");
-    }
-
-    /* Test to see that FlushFileBuffers returns a success value */
-    TheReturn = fflush(AnotherFile);
-
-    if(TheReturn != 0) 
-    {
-        Fail("ERROR: The fflush function returned non-zero, which "
-               "indicates failure, when trying to flush a stream not "
-               "associated with a file.");
-    }
-  
-    PAL_Terminate();
-    return PASS;
-}
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fgets/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fgets/test1/test1.cpp
deleted file mode 100644
index e16c9e2ad9d0..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fgets/test1/test1.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Writes a simple file and calls fgets() to get a string shorter
-**          than the first line of the file.  Verifies that the correct
-**          string is returned.
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_fgets_test1_paltest_fgets_test1, "c_runtime/fgets/test1/paltest_fgets_test1")
-{
-    const char outBuf1[] = "This is a test.\n";
-    const char outBuf2[] = "This is too.";
-    char inBuf[sizeof(outBuf1) + sizeof(outBuf2)];
-    const char filename[] = "testfile.tmp";
-    const int offset = 5;  /* value chosen arbitrarily */
-    int actualLen;
-    int expectedLen;
-    FILE * fp;
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-  
-    /*write the file that we will use to test */
-    fp = fopen(filename, "w");
-    if (fp == NULL)
-    {
-        Fail("Unable to open file for write.\n");
-    }
-  
-    fwrite(outBuf1, sizeof(outBuf1[0]), sizeof(outBuf1), fp);
-    fwrite(outBuf2, sizeof(outBuf2[0]), sizeof(outBuf2), fp);
-
-    if (fclose(fp) != 0) 
-    {
-        Fail("Error closing a file opened for write.\n");
-    }
-
-
-    /*now read back the entire first string*/
-    fp = fopen(filename, "r"); 
-    if (fp == NULL)
-    {
-        Fail("Unable to open file for read.\n");
-    }
-
-    /*note: +1 because strlen() returns the length of a string _not_
-      including the NULL, while fgets() returns a string of specified
-      maximum length _including_ the NULL.*/
-    if (fgets(inBuf, strlen(outBuf1) - offset + 1, fp) != inBuf)
-    {
-        Fail("Error reading from file using fgets.\n");
-    }
-
-
-    expectedLen = strlen(outBuf1) - offset;
-    actualLen = strlen(inBuf);
-
-    if (actualLen < expectedLen)
-    {
-        Fail("fgets() was asked to read a one-line string and given the "
-             "length of the string as a parameter.  The string it has "
-             "read is too short.\n");
-    }
-    if (actualLen > expectedLen)
-    {
-        Fail("fgets() was asked to read a one-line string and given the "
-             "length of the string as a parameter.  The string it has "
-             "read is too long.\n");
-    }
-    if (memcmp(inBuf, outBuf1, actualLen) != 0)
-    {
-        /*We didn't read back exactly outBuf1*/
-        Fail("fgets() was asked to read a one-line string, and given the "
-             "length of the string as an parameter.  It has returned a "
-             "string of the correct length, but the contents are not "
-             "correct.\n");
-    }
-    
-    if (fclose(fp) != 0)
-    {
-        Fail("Error closing file after using fgets().\n");
-    }
-
-
-    PAL_Terminate();
-    return PASS;
-
-}
-
-  
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fgets/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fgets/test2/test2.cpp
deleted file mode 100644
index 7f21d0758175..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fgets/test2/test2.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test2.c
-**
-** Purpose: Calls fgets to read a full line from a file.  A maximum length
-**          parameter greater than the length of the line is passed.
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_fgets_test2_paltest_fgets_test2, "c_runtime/fgets/test2/paltest_fgets_test2")
-{
-    const char outBuf1[] = "This is a test.\n";  
-    const char outBuf2[] = "This is too.";
-
-    char inBuf[sizeof(outBuf1) + sizeof(outBuf2)];
-    const char filename[] = "testfile.tmp";
-    const int offset = 5;  /*value chosen arbitrarily*/
-    int expectedLen;
-    int actualLen;
-
-    FILE * fp;
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-
-    /*write the file that we will use to test */
-    fp = fopen(filename, "w"); 
-    if (fp == NULL)
-    {
-        Fail("Unable to open file for write.\n");
-    }
-  
-    fwrite(outBuf1, sizeof(outBuf1[0]), sizeof(outBuf1), fp);
-    fwrite(outBuf2, sizeof(outBuf2[0]), sizeof(outBuf2), fp);
-
-    if (fclose(fp) != 0) 
-    {
-        Fail("error closing stream opened for write.\n");
-    }
-
-    /*Read until the first linebreak*/
-    fp = fopen(filename, "r");
-    if (fp == NULL)
-    {
-        Fail("Unable to open file for read.\n");
-    }
-  
-  
-    if (fgets(inBuf, sizeof(outBuf1) + offset , fp) != inBuf)
-    {
-        Fail("Error reading from file using fgets.\n");
-    }
-
-    /*note: -1 because strlen returns the length of a string _not_
-      including the NULL, while fgets returns a string of specified
-      maximum length _including_ the NULL.*/
-    expectedLen = strlen(outBuf1);  
-    actualLen = strlen(inBuf);
-    if (actualLen > expectedLen)
-    {
-        Fail("fgets() was asked to read the first line of a file, but did "
-             "not stop at the end of the line.\n");
-    }
-    else if (actualLen < expectedLen)
-    {
-        Fail("fgets() was asked to read the first line of a file, but did "
-             "not read the entire line.\n");
-    }
-    else if (memcmp(inBuf, outBuf1, actualLen) != 0)
-    {
-        /*We didn't read back exactly outBuf1*/
-        Fail("fgets() was asked to read the first line of a file.  It "
-             "has read back a string of the correct length, but the"
-             " contents are not correct.\n");
-    }
-    
-    if (fclose(fp) != 0)
-    {
-        Fail("Error closing file after using fgets().\n");
-    }
-
-    PAL_Terminate();
-    return PASS;
-
-}
-
-  
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fgets/test3/test3.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fgets/test3/test3.cpp
deleted file mode 100644
index f46b179de1dd..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fgets/test3/test3.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test3.c
-**
-** Purpose: Tries to read from an empty file using fgets(), to verify
-**          handling of EOF condition.
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_fgets_test3_paltest_fgets_test3, "c_runtime/fgets/test3/paltest_fgets_test3")
-{
-    char inBuf[10];
-    const char filename[] = "testfile.tmp";
-
-    FILE * fp;
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-
-    /*write the empty file that we will use to test */
-    fp = fopen(filename, "w");
-    if (fp == NULL)
-    {
-        Fail("Unable to open file for write.\n");
-    }
-
-    /*Don't write anything*/
-  
-    if (fclose(fp) != 0) 
-    {
-        Fail("Error closing stream opened for write.\n");
-    }
-
-
-    /*Open the file and try to read.*/
-    fp = fopen(filename, "r");
-    if (fp == NULL)
-    {
-        Fail("Unable to open file for read.\n");
-    }
-
-  
-    if (fgets(inBuf, sizeof(inBuf) , fp) != NULL)
-    {
-        /*NULL could also mean an error condition, but since the PAL
-          doesn't supply feof or ferror, we can't distinguish between
-          the two.*/
-        Fail("fgets doesn't handle EOF properly.  When asked to read from "
-             "an empty file, it didn't return NULL as it should have.\n");
-    }
-
-    if (fclose(fp) != 0)
-    {
-        Fail("Error closing an empty file after trying to use fgets().\n");
-    }
-    PAL_Terminate();
-    return PASS;
-
-}
-
-  
-
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/floor/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/floor/test1/test1.cpp
deleted file mode 100644
index 549a2d83a0de..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/floor/test1/test1.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests floor with simple positive and negative values.  Also tests 
-**          extreme cases like extremely small values and positive and 
-**          negative infinity.  Makes sure that calling floor on NaN returns 
-**          NaN
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * floor_test1_validate
- *
- * test validation function
- */
-void __cdecl floor_test1_validate(double value, double expected, double variance)
-{
-    double result = floor(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("floor(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * floor_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl floor_test1_validate_isnan(double value)
-{
-    double result = floor(value);
-
-    if (!_isnan(result))
-    {
-        Fail("floor(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_floor_test1_paltest_floor_test1, "c_runtime/floor/test1/paltest_floor_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected           variance */
-        {  0.31830988618379067,    0,                 PAL_EPSILON },        // value:  1 / pi
-        {  0.43429448190325183,    0,                 PAL_EPSILON },        // value:  log10(e)
-        {  0.63661977236758134,    0,                 PAL_EPSILON },        // value:  2 / pi
-        {  0.69314718055994531,    0,                 PAL_EPSILON },        // value:  ln(2)
-        {  0.70710678118654752,    0,                 PAL_EPSILON },        // value:  1 / sqrt(2)
-        {  0.78539816339744831,    0,                 PAL_EPSILON },        // value:  pi / 4
-        {  1.1283791670955126,     1,                 PAL_EPSILON * 10 },   // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     1,                 PAL_EPSILON * 10 },   // value:  sqrt(2)
-        {  1.4426950408889634,     1,                 PAL_EPSILON * 10 },   // value:  log2(e)
-        {  1.5707963267948966,     1,                 PAL_EPSILON * 10 },   // value:  pi / 2
-        {  2.3025850929940457,     2,                 PAL_EPSILON * 10 },   // value:  ln(10)
-        {  2.7182818284590452,     2,                 PAL_EPSILON * 10 },   // value:  e
-        {  3.1415926535897932,     3,                 PAL_EPSILON * 10 },   // value:  pi
-        {  PAL_POSINF,             PAL_POSINF,        0 }
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-    
-    floor_test1_validate( 0,    0, PAL_EPSILON);
-    floor_test1_validate(-0.0,  0, PAL_EPSILON);
-    
-    floor_test1_validate( 1,    1, PAL_EPSILON * 10);
-    floor_test1_validate(-1.0, -1, PAL_EPSILON * 10);
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        floor_test1_validate( tests[i].value,  tests[i].expected,       tests[i].variance);
-        floor_test1_validate(-tests[i].value, -(tests[i].expected + 1), tests[i].variance);
-    }
-    
-    floor_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/floorf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/floorf/test1/test1.cpp
deleted file mode 100644
index b80320c7de31..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/floorf/test1/test1.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests floorf with simple positive and negative values.  Also tests 
-**          extreme cases like extremely small values and positive and 
-**          negative infinity.  Makes sure that calling floorf on NaN returns 
-**          NaN
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * floorf_test1_validate
- *
- * test validation function
- */
-void __cdecl floorf_test1_validate(float value, float expected, float variance)
-{
-    float result = floorf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("floorf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * floorf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl floorf_test1_validate_isnan(float value)
-{
-    float result = floorf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("floorf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_floorf_test1_paltest_floorf_test1, "c_runtime/floorf/test1/paltest_floorf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected       variance */
-        {  0.318309886f,    0,             PAL_EPSILON },        // value:  1 / pi
-        {  0.434294482f,    0,             PAL_EPSILON },        // value:  log10f(e)
-        {  0.636619772f,    0,             PAL_EPSILON },        // value:  2 / pi
-        {  0.693147181f,    0,             PAL_EPSILON },        // value:  ln(2)
-        {  0.707106781f,    0,             PAL_EPSILON },        // value:  1 / sqrtf(2)
-        {  0.785398163f,    0,             PAL_EPSILON },        // value:  pi / 4
-        {  1.12837917f,     1,             PAL_EPSILON * 10 },   // value:  2 / sqrtf(pi)
-        {  1.41421356f,     1,             PAL_EPSILON * 10 },   // value:  sqrtf(2)
-        {  1.44269504f,     1,             PAL_EPSILON * 10 },   // value:  logf2(e)
-        {  1.57079633f,     1,             PAL_EPSILON * 10 },   // value:  pi / 2
-        {  2.30258509f,     2,             PAL_EPSILON * 10 },   // value:  ln(10)
-        {  2.71828183f,     2,             PAL_EPSILON * 10 },   // value:  e
-        {  3.14159265f,     3,             PAL_EPSILON * 10 },   // value:  pi
-        {  PAL_POSINF,      PAL_POSINF,    0 }
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-    
-    floorf_test1_validate( 0,    0, PAL_EPSILON);
-    floorf_test1_validate(-0.0f, 0, PAL_EPSILON);
-    
-    floorf_test1_validate( 1,     1, PAL_EPSILON * 10);
-    floorf_test1_validate(-1.0f, -1, PAL_EPSILON * 10);
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        floorf_test1_validate( tests[i].value,  tests[i].expected,       tests[i].variance);
-        floorf_test1_validate(-tests[i].value, -(tests[i].expected + 1), tests[i].variance);
-    }
-    
-    floorf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fma/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fma/test1/test1.cpp
deleted file mode 100644
index a82adc4b6caa..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fma/test1/test1.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests that fma returns correct values for a subset of values.
-**          Tests with positive and negative values of x, y, and z to ensure
-**          fmaf is returning correct results.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double x;         /* first component of the value to test the function with */
-    double y;         /* second component of the value to test the function with */
-    double z;         /* third component of the value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * fma_test1_validate
- *
- * test validation function
- */
-void __cdecl fma_test1_validate(double x, double y, double z, double expected, double variance)
-{
-    double result = fma(x, y, z);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("fma(%g, %g, %g) returned %20.17g when it should have returned %20.17g",
-             x, y, z, result, expected);
-    }
-}
-
-/**
- * fma_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl fma_test1_validate_isnan(double x, double y, double z)
-{
-    double result = fma(x, y, z);
-
-    if (!_isnan(result))
-    {
-        Fail("fma(%g, %g, %g) returned %20.17g when it should have returned %20.17g",
-             x, y, z, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_fma_test1_paltest_fma_test1, "c_runtime/fma/test1/paltest_fma_test1")
-{
-    struct test tests[] = 
-    {
-        /* x                       y                       z                       expected                   variance */
-        {  PAL_NEGINF,             PAL_NEGINF,             PAL_NEGINF,             PAL_NEGINF,                0 },
-        { -1e308,                  2,                      1e308,                 -1e308,                     0 },
-        {  1e308,                  2,                     -1e308,                  1e308,                     0 },
-        {  PAL_POSINF,             PAL_POSINF,             PAL_POSINF,             PAL_POSINF,                0 },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        fma_test1_validate(tests[i].x, tests[i].y, tests[i].z, tests[i].expected, tests[i].variance);
-    }
-
-    // Returns NaN if x or y is infinite, the other is zero, and z is NaN
-    fma_test1_validate_isnan(PAL_NEGINF, 0, PAL_NAN);
-    fma_test1_validate_isnan(PAL_POSINF, 0, PAL_NAN);
-    fma_test1_validate_isnan(0, PAL_NEGINF, PAL_NAN);
-    fma_test1_validate_isnan(0, PAL_POSINF, PAL_NAN);
-
-    // Returns NaN if x or y is infinite, the other is zero, and z is not-NaN
-    fma_test1_validate_isnan(PAL_POSINF, 0, PAL_NEGINF);
-    fma_test1_validate_isnan(PAL_NEGINF, 0, PAL_NEGINF);
-    fma_test1_validate_isnan(0, PAL_POSINF, PAL_NEGINF);
-    fma_test1_validate_isnan(0, PAL_NEGINF, PAL_NEGINF);
-    
-    fma_test1_validate_isnan(PAL_POSINF, 0, 0);
-    fma_test1_validate_isnan(PAL_NEGINF, 0, 0);
-    fma_test1_validate_isnan(0, PAL_POSINF, 0);
-    fma_test1_validate_isnan(0, PAL_NEGINF, 0);
-
-    fma_test1_validate_isnan(PAL_POSINF, 0, PAL_POSINF);
-    fma_test1_validate_isnan(PAL_NEGINF, 0, PAL_POSINF);
-    fma_test1_validate_isnan(0, PAL_POSINF, PAL_POSINF);
-    fma_test1_validate_isnan(0, PAL_NEGINF, PAL_POSINF);
-
-    // Returns NaN if (x * y) is infinite, and z is an infinite of the opposite sign
-    fma_test1_validate_isnan(PAL_POSINF, PAL_POSINF, PAL_NEGINF);
-    fma_test1_validate_isnan(PAL_NEGINF, PAL_NEGINF, PAL_NEGINF);
-    fma_test1_validate_isnan(PAL_POSINF, PAL_NEGINF, PAL_POSINF);
-    fma_test1_validate_isnan(PAL_NEGINF, PAL_POSINF, PAL_POSINF);
-
-    fma_test1_validate_isnan(PAL_POSINF, 1, PAL_NEGINF);
-    fma_test1_validate_isnan(PAL_NEGINF, 1, PAL_POSINF);
-    fma_test1_validate_isnan(1, PAL_POSINF, PAL_NEGINF);
-    fma_test1_validate_isnan(1, PAL_NEGINF, PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fmaf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fmaf/test1/test1.cpp
deleted file mode 100644
index 10015a585118..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fmaf/test1/test1.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests that fmaf returns correct values for a subset of values.
-**          Tests with positive and negative values of x, y, and z to ensure
-**          fmaf is returning correct results.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float x;         /* first component of the value to test the function with */
-    float y;         /* second component of the value to test the function with */
-    float z;         /* third component of the value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * fmaf_test1_validate
- *
- * test validation function
- */
-void __cdecl fmaf_test1_validate(float x, float y, float z, float expected, float variance)
-{
-    float result = fmaf(x, y, z);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("fmaf(%g, %g, %g) returned %10.9g when it should have returned %10.9g",
-             x, y, z, result, expected);
-    }
-}
-
-/**
- * fmaf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl fmaf_test1_validate_isnan(float x, float y, float z)
-{
-    float result = fmaf(x, y, z);
-
-    if (!_isnanf(result))
-    {
-        Fail("fmaf(%g, %g, %g) returned %10.9g when it should have returned %10.9g",
-             x, y, z, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_fmaf_test1_paltest_fmaf_test1, "c_runtime/fmaf/test1/paltest_fmaf_test1")
-{
-    struct test tests[] = 
-    {
-        /* x                y                z                expected            variance */
-        {  PAL_NEGINF,      PAL_NEGINF,      PAL_NEGINF,      PAL_NEGINF,         0 },
-        { -1e38,            2,               1e38,           -1e38,               0 },
-        {  1e38,            2,              -1e38,            1e38,               0 },
-        {  PAL_POSINF,      PAL_POSINF,      PAL_POSINF,      PAL_POSINF,         0 },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        fmaf_test1_validate(tests[i].x, tests[i].y, tests[i].z, tests[i].expected, tests[i].variance);
-    }
-
-    // Returns NaN if x or y is infinite, the other is zero, and z is NaN
-    fmaf_test1_validate_isnan(PAL_NEGINF, 0, PAL_NAN);
-    fmaf_test1_validate_isnan(PAL_POSINF, 0, PAL_NAN);
-    fmaf_test1_validate_isnan(0, PAL_NEGINF, PAL_NAN);
-    fmaf_test1_validate_isnan(0, PAL_POSINF, PAL_NAN);
-
-    // Returns NaN if x or y is infinite, the other is zero, and z is not-NaN
-    fmaf_test1_validate_isnan(PAL_POSINF, 0, PAL_NEGINF);
-    fmaf_test1_validate_isnan(PAL_NEGINF, 0, PAL_NEGINF);
-    fmaf_test1_validate_isnan(0, PAL_POSINF, PAL_NEGINF);
-    fmaf_test1_validate_isnan(0, PAL_NEGINF, PAL_NEGINF);
-    
-    fmaf_test1_validate_isnan(PAL_POSINF, 0, 0);
-    fmaf_test1_validate_isnan(PAL_NEGINF, 0, 0);
-    fmaf_test1_validate_isnan(0, PAL_POSINF, 0);
-    fmaf_test1_validate_isnan(0, PAL_NEGINF, 0);
-
-    fmaf_test1_validate_isnan(PAL_POSINF, 0, PAL_POSINF);
-    fmaf_test1_validate_isnan(PAL_NEGINF, 0, PAL_POSINF);
-    fmaf_test1_validate_isnan(0, PAL_POSINF, PAL_POSINF);
-    fmaf_test1_validate_isnan(0, PAL_NEGINF, PAL_POSINF);
-
-    // Returns NaN if (x * y) is infinite, and z is an infinite of the opposite sign
-    fmaf_test1_validate_isnan(PAL_POSINF, PAL_POSINF, PAL_NEGINF);
-    fmaf_test1_validate_isnan(PAL_NEGINF, PAL_NEGINF, PAL_NEGINF);
-    fmaf_test1_validate_isnan(PAL_POSINF, PAL_NEGINF, PAL_POSINF);
-    fmaf_test1_validate_isnan(PAL_NEGINF, PAL_POSINF, PAL_POSINF);
-
-    fmaf_test1_validate_isnan(PAL_POSINF, 1, PAL_NEGINF);
-    fmaf_test1_validate_isnan(PAL_NEGINF, 1, PAL_POSINF);
-    fmaf_test1_validate_isnan(1, PAL_POSINF, PAL_NEGINF);
-    fmaf_test1_validate_isnan(1, PAL_NEGINF, PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fmod/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fmod/test1/test1.cpp
deleted file mode 100644
index d02b09d0e0d0..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fmod/test1/test1.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that fmod return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double numerator;    /* second component of the value to test the function with */
-    double denominator;  /* first component of the value to test the function with */
-    double expected;     /* expected result */
-    double variance;     /* maximum delta between the expected and actual result */
-};
-
-/**
- * fmod_test1_validate
- *
- * test validation function
- */
-void __cdecl fmod_test1_validate(double numerator, double denominator, double expected, double variance)
-{
-    double result = fmod(numerator, denominator);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("fmod(%g, %g) returned %20.17g when it should have returned %20.17g",
-             numerator, denominator, result, expected);
-    }
-}
-
-/**
- * fmod_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl fmod_test1_validate_isnan(double numerator, double denominator)
-{
-    double result = fmod(numerator, denominator);
-
-    if (!_isnan(result))
-    {
-        Fail("fmod(%g, %g) returned %20.17g when it should have returned %20.17g",
-             numerator, denominator, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_fmod_test1_paltest_fmod_test1, "c_runtime/fmod/test1/paltest_fmod_test1")
-{
-    struct test tests[] = 
-    {
-        /* numerator               denominator             expected                 variance */
-        {  0,                      PAL_POSINF,             0,                       PAL_EPSILON },
-        {  0.31296179620778659,    0.94976571538163866,    0.31296179620778658,     PAL_EPSILON },
-        {  0.42077048331375735,    0.90716712923909839,    0.42077048331375733,     PAL_EPSILON },
-        {  0.59448076852482208,    0.80410982822879171,    0.59448076852482212,     PAL_EPSILON },
-        {  0.63896127631363480,    0.76923890136397213,    0.63896127631363475,     PAL_EPSILON },
-        {  0.64963693908006244,    0.76024459707563015,    0.64963693908006248,     PAL_EPSILON },
-        {  0.70710678118654752,    0.70710678118654752,    0,                       PAL_EPSILON },
-        {  1,                      1,                      0,                       PAL_EPSILON },
-        {  0.84147098480789651,    0.54030230586813972,    0.30116867893975674,     PAL_EPSILON },
-        {  0.90371945743584630,    0.42812514788535792,    0.047469161665130377,    PAL_EPSILON / 10 },
-        {  0.98776594599273553,    0.15594369476537447,    0.052103777400488605,    PAL_EPSILON / 10 },
-        {  0.99180624439366372,    0.12775121753523991,    0.097547721646984359,    PAL_EPSILON / 10 },
-        {  0.74398033695749319,   -0.66820151019031295,    0.075778826767180285,    PAL_EPSILON / 10 },
-        {  0.41078129050290870,   -0.91173391478696510,    0.41078129050290868,     PAL_EPSILON },
-        {  0,                     -1,                      0,                       PAL_EPSILON },
-        {  1,                      PAL_POSINF,             1,                       PAL_EPSILON * 10 },
-    };
-
-
-    // PAL initialization
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        fmod_test1_validate( tests[i].numerator,  tests[i].denominator,  tests[i].expected, tests[i].variance);
-        fmod_test1_validate(-tests[i].numerator,  tests[i].denominator, -tests[i].expected, tests[i].variance);
-        fmod_test1_validate( tests[i].numerator, -tests[i].denominator,  tests[i].expected, tests[i].variance);
-        fmod_test1_validate(-tests[i].numerator, -tests[i].denominator, -tests[i].expected, tests[i].variance);
-    }
-
-    fmod_test1_validate_isnan( 0,    0);
-    fmod_test1_validate_isnan(-0.0,  0);
-    fmod_test1_validate_isnan( 0,   -0.0);
-    fmod_test1_validate_isnan(-0.0, -0.0);
-    
-    fmod_test1_validate_isnan( 1,    0);
-    fmod_test1_validate_isnan(-1.0,  0);
-    fmod_test1_validate_isnan( 1,   -0.0);
-    fmod_test1_validate_isnan(-1.0, -0.0);
-    
-    fmod_test1_validate_isnan(PAL_POSINF,  PAL_POSINF);
-    fmod_test1_validate_isnan(PAL_NEGINF,  PAL_POSINF);
-    fmod_test1_validate_isnan(PAL_POSINF, PAL_NEGINF);
-    fmod_test1_validate_isnan(PAL_NEGINF, PAL_NEGINF);
-    
-    fmod_test1_validate_isnan(PAL_POSINF,  0);
-    fmod_test1_validate_isnan(PAL_NEGINF,  0);
-    fmod_test1_validate_isnan(PAL_POSINF, -0.0);
-    fmod_test1_validate_isnan(PAL_NEGINF, -0.0);
-    
-    fmod_test1_validate_isnan(PAL_POSINF,  1);
-    fmod_test1_validate_isnan(PAL_NEGINF,  1);
-    fmod_test1_validate_isnan(PAL_POSINF, -1.0);
-    fmod_test1_validate_isnan(PAL_NEGINF, -1.0);
-    
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fmodf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fmodf/test1/test1.cpp
deleted file mode 100644
index b73f7824f166..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fmodf/test1/test1.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that fmodf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabsf
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float numerator;    /* second component of the value to test the function with */
-    float denominator;  /* first component of the value to test the function with */
-    float expected;     /* expected result */
-    float variance;     /* maximum delta between the expected and actual result */
-};
-
-/**
- * fmodf_test1_validate
- *
- * test validation function
- */
-void __cdecl fmodf_test1_validate(float numerator, float denominator, float expected, float variance)
-{
-    float result = fmodf(numerator, denominator);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("fmodf(%g, %g) returned %10.9g when it should have returned %10.9g",
-             numerator, denominator, result, expected);
-    }
-}
-
-/**
- * fmodf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl fmodf_test1_validate_isnan(float numerator, float denominator)
-{
-    float result = fmodf(numerator, denominator);
-
-    if (!_isnan(result))
-    {
-        Fail("fmodf(%g, %g) returned %10.9g when it should have returned %10.9g",
-             numerator, denominator, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_fmodf_test1_paltest_fmodf_test1, "c_runtime/fmodf/test1/paltest_fmodf_test1")
-{
-    struct test tests[] = 
-    {
-        /* numerator        denominator        expected          variance */
-        {  0,               PAL_POSINF,        0,                PAL_EPSILON },
-        {  0.312961796f,    0.949765715f,      0.312961796f,     PAL_EPSILON },
-        {  0.420770483f,    0.907167129f,      0.420770483f,     PAL_EPSILON },
-        {  0.594480769f,    0.804109828f,      0.594480769f,     PAL_EPSILON },
-        {  0.638961276f,    0.769238901f,      0.638961276f,     PAL_EPSILON },
-        {  0.649636939f,    0.760244597f,      0.649636939f,     PAL_EPSILON },
-        {  0.707106781f,    0.707106781f,      0,                PAL_EPSILON },
-        {  1,               1,                 0,                PAL_EPSILON },
-        {  0.841470985f,    0.540302306f,      0.301168679f,     PAL_EPSILON },
-        {  0.903719457f,    0.428125148f,      0.0474691617f,    PAL_EPSILON / 10 },
-        {  0.987765946f,    0.155943695f,      0.0521037774f,    PAL_EPSILON / 10 },
-        {  0.991806244f,    0.127751218f,      0.0975477216f,    PAL_EPSILON / 10 },
-        {  0.743980337f,   -0.668201510f,      0.0757788268f,    PAL_EPSILON / 10 },
-        {  0.410781291f,   -0.911733915f,      0.410781291f,     PAL_EPSILON },
-        {  0,              -1,                 0,                PAL_EPSILON },
-        {  1,               PAL_POSINF,        1,                PAL_EPSILON * 10 },
-    };
-
-
-    // PAL initialization
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        fmodf_test1_validate( tests[i].numerator,  tests[i].denominator,  tests[i].expected, tests[i].variance);
-        fmodf_test1_validate(-tests[i].numerator,  tests[i].denominator, -tests[i].expected, tests[i].variance);
-        fmodf_test1_validate( tests[i].numerator, -tests[i].denominator,  tests[i].expected, tests[i].variance);
-        fmodf_test1_validate(-tests[i].numerator, -tests[i].denominator, -tests[i].expected, tests[i].variance);
-    }
-
-    fmodf_test1_validate_isnan( 0,     0);
-    fmodf_test1_validate_isnan(-0.0f,  0);
-    fmodf_test1_validate_isnan( 0,    -0.0f);
-    fmodf_test1_validate_isnan(-0.0f, -0.0f);
-    
-    fmodf_test1_validate_isnan( 1,     0);
-    fmodf_test1_validate_isnan(-1,     0);
-    fmodf_test1_validate_isnan( 1,    -0.0f);
-    fmodf_test1_validate_isnan(-1,    -0.0f);
-    
-    fmodf_test1_validate_isnan(PAL_POSINF, PAL_POSINF);
-    fmodf_test1_validate_isnan(PAL_NEGINF, PAL_POSINF);
-    fmodf_test1_validate_isnan(PAL_POSINF, PAL_NEGINF);
-    fmodf_test1_validate_isnan(PAL_NEGINF, PAL_NEGINF);
-    
-    fmodf_test1_validate_isnan(PAL_POSINF,  0);
-    fmodf_test1_validate_isnan(PAL_NEGINF,  0);
-    fmodf_test1_validate_isnan(PAL_POSINF, -0.0f);
-    fmodf_test1_validate_isnan(PAL_NEGINF, -0.0f);
-    
-    fmodf_test1_validate_isnan(PAL_POSINF,  1);
-    fmodf_test1_validate_isnan(PAL_NEGINF,  1);
-    fmodf_test1_validate_isnan(PAL_POSINF, -1);
-    fmodf_test1_validate_isnan(PAL_NEGINF, -1);
-    
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test1/test1.cpp
deleted file mode 100644
index abbc328d5946..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test1/test1.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests the PAL implementation of the fopen function. 
-**          This test simply attempts to open a number of files
-**          with different modes.  It checks to ensure a valid
-**          file pointer is returned.  It doesn't do any checking
-**          to ensure the mode is really what it claims.  And checks
-**          for a NULL pointer when attempts to open a directory.
-**  
-
-**
-**===================================================================*/
-                                 
-#include <palsuite.h>
-
-struct testCase
-{
-    int CorrectResult;
-    char mode[20];
-};
-
-PALTEST(c_runtime_fopen_test1_paltest_fopen_test1, "c_runtime/fopen/test1/paltest_fopen_test1")
-{
-  
-    FILE *fp;
-    char name[128];
-    int i;
-
-    struct testCase testCases[] = 
-        {
-            {0,  "r"},  {1, "w"},   {1,  "a"},
-            {0,  "r+"}, {1,  "w+"}, {1,  "a+"},
-            {1,  "wt"}, {1,  "wb"}, {1,  "wS"},
-            {1,  "w+c"}, {1,  "w+n"}, {1, "wR"},
-            {1,  "wT"}, {0, "tw"} 
-        };
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-  
-  
-    for(i = 0; i < sizeof(testCases) / sizeof(struct testCase); i++)
-    {
-        strcpy(name,"testfiles");
-        strcat(name,testCases[i].mode);
-      
-        fp = fopen(name,testCases[i].mode);
-      
-        if ((fp == 0 && testCases[i].CorrectResult != 0)  ||
-            (testCases[i].CorrectResult == 0 && fp != 0) )
-        {
-            Fail("ERROR: fopen returned incorrectly "
-                   "opening a file in %s mode.  Perhaps it opened a "
-                   "read only file which didn't exist and returned a correct "
-                   "pointer?",testCases[i].mode);
-        }    
-
-        memset(name, '\0', 128);
-        
-    }      
-
-    /* When attempt to open a directory fopen should returned NULL */
-    if ( fopen(".", "r") != NULL)
-    {
-        Fail("ERROR: fopen returned non-NULL when trying to open a directory"
-             " the returned value was %d\n", fp);
-    }
-  
-    PAL_Terminate();
-    return PASS;
-}
-   
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test2/test2.cpp
deleted file mode 100644
index b588c41fd23e..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test2/test2.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test2.c
-**
-** Purpose: Tests the PAL implementation of the fopen function. 
-**          Test to ensure that you can write to a 'w' mode file.
-**          And that you can't read from a 'w' mode file.
-**
-** Depends:
-**      fprintf
-**      fseek
-**      fgets
-**  
-
-**
-**===================================================================*/
-                                                                         
-#include <palsuite.h>
-
-PALTEST(c_runtime_fopen_test2_paltest_fopen_test2, "c_runtime/fopen/test2/paltest_fopen_test2")
-{
-  
-    FILE *fp;
-    char buffer[128];
-  
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-  
-    if( (fp = fopen( "testfile", "w" )) == NULL )
-    {
-        Fail( "ERROR: The file failed to open with 'w' mode.\n" );
-    }  
-
-    /* Test that you can write */
-    if(fprintf(fp,"%s","some text") <= 0) 
-    {
-        Fail("ERROR: Attempted to WRITE to a file opened with 'w' mode "
-               "but fprintf failed.  Either fopen or fprintf have problems.");
-    }
-
-    if(fseek(fp, 0, SEEK_SET)) 
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.");
-    }
-
-    /* Test that you can't read */
-    if(fgets(buffer,10,fp) != NULL)
-    {
-        Fail("ERROR: Tried to READ from a file with only 'w' mode set. "
-               "This should fail, but fgets didn't return NULL.  Either "
-               "fgets or fopen is broken.");
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
-   
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test3/test3.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test3/test3.cpp
deleted file mode 100644
index 3be80f161099..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test3/test3.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test3.c
-**
-** Purpose: Tests the PAL implementation of the fopen function. 
-**          Test to ensure that you can write to a 'w+' mode file.
-**          And that you can read from a 'w+' mode file.
-**
-** Depends:
-**      fprintf
-**      fseek
-**      fgets
-**  
-
-**
-**===================================================================*/
-                                                                         
-#include <palsuite.h>
-
-PALTEST(c_runtime_fopen_test3_paltest_fopen_test3, "c_runtime/fopen/test3/paltest_fopen_test3")
-{
-  
-    FILE *fp;
-    char buffer[128];
-  
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-  
-    /* Open a file with 'w+' mode */
-    if( (fp = fopen( "testfile", "w+" )) == NULL )
-    {
-        Fail( "ERROR: The file failed to open with 'w+' mode.\n" );
-    }  
-  
-    /* Write some text to the file */
-    if(fprintf(fp,"%s","some text") <= 0) 
-    {
-        Fail("ERROR: Attempted to WRITE to a file opened with 'w+' mode "
-               "but fprintf failed.  Either fopen or fprintf have problems.");
-    }
-
-    if(fseek(fp, 0, SEEK_SET)) 
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.");
-    }
-  
-    /* Attempt to read from the 'w+' only file, should pass */
-    if(fgets(buffer,10,fp) == NULL)
-    {
-        Fail("ERROR: Tried to READ from a file with 'w+' mode set. "
-               "This should succeed, but fgets returned NULL.  Either fgets "
-               "or fopen is broken.");
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
-   
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test4/test4.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test4/test4.cpp
deleted file mode 100644
index 24f8bfa11e79..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test4/test4.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test4.c
-**
-** Purpose: Tests the PAL implementation of the fopen function. 
-**          Test to ensure that you can't write to a 'r' mode file.
-**          And that you can read from a 'r' mode file.
-**
-** Depends:
-**      fprintf
-**      fclose
-**      fgets
-**  
-
-**
-**===================================================================*/
-                                                                         
-#include <palsuite.h>
-
-PALTEST(c_runtime_fopen_test4_paltest_fopen_test4, "c_runtime/fopen/test4/paltest_fopen_test4")
-{
-  
-    FILE *fp;
-    char buffer[128];
-  
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-  
-    /* Open a file with 'w' mode */
-    if( (fp = fopen( "testfile", "w" )) == NULL )
-    {
-        Fail( "ERROR: The file failed to open with 'w' mode.\n" );
-    }  
-  
-    /* Write some text to the file */
-    if(fprintf(fp,"%s","some text") <= 0) 
-    {
-        Fail("ERROR: Attempted to WRITE to a file opened with 'w' mode "
-               "but fprintf failed.  Either fopen or fprintf have problems.");
-    }
-
-    if(fclose(fp))
-    {
-        Fail("ERROR: Attempted to close a file, but fclose failed. "
-               "This test depends upon it.");
-    }
-
-    /* Open a file with 'r' mode */
-    if( (fp = fopen( "testfile", "r" )) == NULL )
-    {
-        Fail( "ERROR: The file failed to open with 'r' mode.\n" );
-    } 
-  
-    /* Attempt to read from the 'r' only file, should pass */
-    if(fgets(buffer,10,fp) == NULL)
-    {
-        Fail("ERROR: Tried to READ from a file with 'r' mode set. "
-               "This should succeed, but fgets returned NULL.  Either fgets "
-               "or fopen is broken.");
-    }
-
-    /* Write some text to the file */
-    if(fprintf(fp,"%s","some text") > 0) 
-    {
-        Fail("ERROR: Attempted to WRITE to a file opened with 'r' mode "
-               "but fprintf succeeded  It should have failed.  "
-               "Either fopen or fprintf have problems.");
-    }
-
-
-    PAL_Terminate();
-    return PASS;
-}
-   
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test5/test5.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test5/test5.cpp
deleted file mode 100644
index 04205aca8335..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test5/test5.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test5.c
-**
-** Purpose: Tests the PAL implementation of the fopen function. 
-**          Test to ensure that you can write to a 'r+' mode file.
-**          And that you can read from a 'r+' mode file.
-**
-** Depends:
-**      fprintf
-**      fclose
-**      fgets
-**      fseek
-**  
-
-**
-**===================================================================*/
-                                     
-#include <palsuite.h>
-
-PALTEST(c_runtime_fopen_test5_paltest_fopen_test5, "c_runtime/fopen/test5/paltest_fopen_test5")
-{
-  
-  FILE *fp;
-  char buffer[128];
-  
-  if (PAL_Initialize(argc, argv))
-  {
-    return FAIL;
-  }
-
-  
-  /* Open a file with 'w' mode */
-  if( (fp = fopen( "testfile", "w" )) == NULL )
-    {
-      Fail( "ERROR: The file failed to open with 'w' mode.\n" );
-    }  
-  
-  if(fclose(fp))
-    {
-      Fail("ERROR: Attempted to close a file, but fclose failed. "
-         "This test depends upon it.");
-    }
-
-  if( (fp = fopen( "testfile", "r+" )) == NULL )
-    {
-      Fail( "ERROR: The file failed to open with 'r+' mode.\n" );
-    } 
-  
-  /* Write some text to the file */
-  if(fprintf(fp,"%s","some text") <= 0) 
-    {
-      Fail("ERROR: Attempted to WRITE to a file opened with 'r+' mode "
-         "but fprintf failed.  Either fopen or fprintf have problems.");
-    }
-  
-   if(fseek(fp, 0, SEEK_SET)) 
-    {
-      Fail("ERROR: fseek failed, and this test depends on it.");
-    }
-  
-  /* Attempt to read from the 'r+' only file, should pass */
-  if(fgets(buffer,10,fp) == NULL)
-    {
-      Fail("ERROR: Tried to READ from a file with 'r+' mode set. "
-         "This should succeed, but fgets returned NULL.  Either fgets "
-         "or fopen is broken.");
-    }  
-
-  PAL_Terminate();
-  return PASS;
-}
-   
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test6/test6.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test6/test6.cpp
deleted file mode 100644
index 13f00cc9300b..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test6/test6.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test6.c
-**
-** Purpose: Tests the PAL implementation of the fopen function. 
-**          Test to ensure that you can write to an 'a' mode file.
-**          And that you can't read from a 'a' mode file. Also ensure
-**          that you can use fseek and still write to the end of a file.
-**
-** Depends:
-**      fprintf
-**      fgets
-**      fseek
-**      fclose
-**  
-
-**
-**===================================================================*/
-                                                                         
-#include <palsuite.h>
-
-PALTEST(c_runtime_fopen_test6_paltest_fopen_test6, "c_runtime/fopen/test6/paltest_fopen_test6")
-{
-  
-    FILE *fp;
-    char buffer[128];
-  
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-  
-    /* Open a file with 'a+' mode */
-    if( (fp = fopen( "testfile", "a" )) == NULL )
-    {
-        Fail( "ERROR: The file failed to open with 'a' mode.\n" );
-    }  
-    
-    /* Write some text to the file */
-    if(fprintf(fp,"%s","some text") <= 0) 
-    {
-        Fail("ERROR: Attempted to WRITE to a file opened with 'a' mode "
-               "but fprintf failed.  Either fopen or fprintf have problems.");
-    }
-  
-    if(fseek(fp, 0, SEEK_SET)) 
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.");
-    }
-  
-    /* Attempt to read from the 'a' only file, should fail */
-    if(fgets(buffer,10,fp) != NULL)
-    {
-        Fail("ERROR: Tried to READ from a file with 'a' mode set. "
-               "This should fail, but fgets returned success.  Either fgets "
-               "or fopen is broken.");
-    }
-
-    
-    /* Attempt to write to a file after using 'a' and fseek */
-    fp = fopen("testfile2", "a");
-    if(fp == NULL)
-    {
-        Fail("ERROR: The file failed to be created with 'a' mode.\n");
-    }
-
-    /* write text to the file initially */
-    if(fprintf(fp,"%s","abcd") <= 0)
-    {
-        Fail("ERROR: Attempted to WRITE to a file opened with 'a' mode "
-            "but fprintf failed.  Either fopen or fprintf have problems.\n");
-    }
-
-    /* set the pointer to the front of the file */
-    if(fseek(fp, 0, SEEK_SET))
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.\n");
-    }
-
-    /* using 'a' should still write to the end of the file, not the front */
-    if(fputs("efgh",fp) < 0)
-    {
-        Fail("ERROR: Attempted to WRITE to a file opened with 'a' mode "
-            "but fputs failed.\n");
-    }
-
-    /* set the pointer to the front of the file */
-    if(fseek(fp, 0, SEEK_SET))
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.\n");
-    }
-
-    /* a file with 'a' mode can only write, so close the file before reading */
-    if(fclose(fp))
-    {
-        Fail("ERROR: fclose failed when it should have succeeded.\n");
-    }
-
-    /* open the file again to read */
-    fp = fopen("testfile2","r");
-    if(fp == NULL)
-    {
-        Fail("ERROR: fopen failed to open the file using 'r' mode");
-    }
-
-    /* Attempt to read from the 'a' only file, should succeed */
-    if(fgets(buffer,10,fp) == NULL)
-    {
-        Fail("ERROR: Tried to READ from a file with 'a' mode set. "
-               "This should pass, but fgets returned failure.  Either fgets "
-               "or fopen is broken.\n");
-    }
-
-    /* Compare what was read and what should have been in the file */
-    if(memcmp(buffer,"abcdefgh",8))
-    {
-        Fail("ERROR: The string read should have equaled 'abcdefgh' "
-            "but instead it is %s\n", buffer);
-    }
-
-
-    PAL_Terminate();
-    return PASS;
-}
-   
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test7/test7.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test7/test7.cpp
deleted file mode 100644
index 0cf274d8c6b7..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fopen/test7/test7.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test7.c
-**
-** Purpose: Tests the PAL implementation of the fopen function. 
-**          Test to ensure that you can write to an 'a+' mode file.
-**          And that you can read from a 'a+' mode file.  Also ensure
-**          that you can use fseek and still write to the end of a file.
-**
-** Depends:
-**      fprintf
-**      fgets
-**      fseek
-**      fclose
-**  
-
-**
-**===================================================================*/
-                                                                         
-#include <palsuite.h>
-
-PALTEST(c_runtime_fopen_test7_paltest_fopen_test7, "c_runtime/fopen/test7/paltest_fopen_test7")
-{
-  
-    FILE *fp;
-    char buffer[128];
-  
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-  
-    /* Open a file with 'a+' mode */
-    if( (fp = fopen( "testfile", "a+" )) == NULL )
-    {
-        Fail( "ERROR: The file failed to open with 'a+' mode.\n" );
-    }  
-    
-    /* Write some text to the file */
-    if(fprintf(fp,"%s","some text") <= 0) 
-    {
-        Fail("ERROR: Attempted to WRITE to a file opened with 'a+' mode "
-               "but fprintf failed.  Either fopen or fprintf have problems.\n");
-    }
-  
-    if(fseek(fp, 0, SEEK_SET)) 
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.\n");
-    }
-  
-    /* Attempt to read from the 'a+' only file, should succeed */
-    if(fgets(buffer,10,fp) == NULL)
-    {
-        Fail("ERROR: Tried to READ from a file with 'a+' mode set. "
-               "This should pass, but fgets returned failure.  Either fgets "
-               "or fopen is broken.\n");
-    }
-
-     
-    /* Attempt to write to a file after using 'a+' and fseek */
-    fp = fopen("testfile2", "a+");
-    if(fp == NULL)
-    {
-        Fail("ERROR: The file failed to be created with 'a+' mode.\n");
-    }
-
-    /* write text to the file initially */
-    if(fprintf(fp,"%s","abcd") <= 0)
-    {
-        Fail("ERROR: Attempted to WRITE to a file opened with 'a+' mode "
-            "but fprintf failed.  Either fopen or fprintf have problems.\n");
-    }
-
-    /* set the pointer to the front of the file */
-    if(fseek(fp, 0, SEEK_SET))
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.\n");
-    }
-
-    /* using 'a+' should still write to the end of the file, not the front */
-    if(fputs("efgh",fp) < 0)
-    {
-        Fail("ERROR: Attempted to WRITE to a file opened with 'a+' mode "
-            "but fputs failed.\n");
-    }
-
-    /* set the pointer to the front of the file */
-    if(fseek(fp, 0, SEEK_SET))
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.\n");
-    }
-
-    /* Attempt to read from the 'a+' only file, should succeed */
-    if(fgets(buffer,10,fp) == NULL)
-    {
-        Fail("ERROR: Tried to READ from a file with 'a+' mode set. "
-               "This should pass, but fgets returned failure.  Either fgets "
-               "or fopen is broken.\n");
-    }
-
-    /* Compare what was read and what should have been in the file */
-    if(memcmp(buffer,"abcdefgh",8))
-    {
-        Fail("ERROR: The string read should have equaled 'abcdefgh' "
-            "but instead it is %s\n", buffer);
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
-   
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fputs/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fputs/test1/test1.cpp
deleted file mode 100644
index c746daff9e6d..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fputs/test1/test1.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose:  Call fputs twice and write two strings to a file.  Then
-** call fread on the file and check that the data which was written is what
-** we expect it to be.
-**  
-
-**
-**===================================================================*/
-
-                                 
-#include <palsuite.h>
-
-
-PALTEST(c_runtime_fputs_test1_paltest_fputs_test1, "c_runtime/fputs/test1/paltest_fputs_test1")
-{
-    
-    FILE* TheFile;
-    char* StringOne = "FooBar";
-    char* StringTwo = "BarFoo";
-    char* CompleteString = "FooBarBarFoo";
-    char ReadBuffer[64];
-    int ret;
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-    /* Open the file that we'll be working with */
-
-    TheFile = fopen("TestFile", "w+");
-  
-    if(TheFile == NULL)
-    {
-        Fail("ERROR: fopen failed to open the file 'TestFile' in read/write "
-             "mode.\n");
-    }
-
-    /* Call fputs twice to write two strings to the file stream */
-    
-    if(fputs(StringOne, TheFile) < 0)
-    {
-        Fail("ERROR: fputs returned a negative value when attempting to "
-             "put the string '%s' to the file.\n",StringOne);
-    }
-    
-    if(fputs(StringTwo, TheFile) < 0)
-    {
-        Fail("ERROR: fputs returned a negative value when attempting to "
-             "put the string '%s' to the file.\n",StringTwo);
-    }
-    
-    /* Flush the buffers */
-    if(fflush(TheFile) != 0)
-    {
-        Fail("ERROR: fflush failed to properly flush the buffers.\n");
-    }
-
-    /* Now read from the file to ensure the data was written correctly. 
-       Note: We read more than what was written to make sure nothing extra
-       was written.
-    */
-
-    if(fseek(TheFile, 0, SEEK_SET) != 0)
-    {
-        Fail("ERROR: fseek failed to set the file pointer back to the start "
-             "of the file.\n");
-    }
-
-
-    if((ret = fread(ReadBuffer, 1, 20, TheFile)) != 12)
-    {
-        Fail("ERROR: fread should have returned that it read in 12 characters "
-             "from the file, but instead it returned %d.\n", ret);
-    }
-
-    ReadBuffer[ret] = '\0';
-
-    if(strcmp(ReadBuffer, CompleteString) != 0)
-    {
-        Fail("ERROR: The data read back from the file is not exactly the same "
-             "as the data that was written by fputs.  The file contains '%s' "
-             "instead of '%s'.\n",ReadBuffer, CompleteString);
-    }
-
-    if(fclose(TheFile) != 0)
-    {
-        Fail("ERROR: fclose failed to close the file stream.\n");
-    }
-     
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fputs/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fputs/test2/test2.cpp
deleted file mode 100644
index aa8d36a5115b..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fputs/test2/test2.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test2.c
-**
-** Purpose:  Check to see that fputs fails and returns EOF when called on
-**           a closed file stream and a read-only file stream.
-**  
-
-**
-**===================================================================*/
-                                 
-#include <palsuite.h>
-
-PALTEST(c_runtime_fputs_test2_paltest_fputs_test2, "c_runtime/fputs/test2/paltest_fputs_test2")
-{
-    
-    FILE* TheFile;
-    char* StringOne = "FooBar";
-    int ret;
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-    /* Create a file with read/write access */
-
-    TheFile = fopen("TestFile", "w+");
-  
-    if(TheFile == NULL)
-    {
-        Fail("ERROR: fopen failed to open the file 'TestFile' in read/write "
-             "mode.\n");
-    }
-
-    /* Then close that file we just opened */
-
-    if(fclose(TheFile) != 0)
-    {
-        Fail("ERROR: fclose failed to close the file.\n");
-    }
-
-    /* Check that calling fputs on this closed file stream fails. */
-
-    if((ret = fputs(StringOne, TheFile)) >= 0)
-    {
-        Fail("ERROR: fputs should have failed to write to a closed "
-             "file stream, but it didn't return a negative value.\n");
-    }
-
-    if(ret != EOF)
-    {
-        Fail("ERROR: fputs should have returned EOF on an error, but instead "
-              "returned %d.\n",ret);
-    }
-
-    /* Open a file as Readonly */
-
-    TheFile = fopen("TestFile", "r");
-  
-    if(TheFile == NULL)
-    {
-        Fail("ERROR: fopen failed to open the file 'TestFile' in read/write "
-             "mode.\n");
-    }
-
-    /* Check that fputs fails when trying to write to a read-only stream */
-
-    if((ret = fputs(StringOne, TheFile)) >= 0)
-    {
-        Fail("ERROR: fputs should have failed to write to a read-only "
-             "file stream, but it didn't return a negative value.\n");
-    }
-    
-    if(ret != EOF)
-    {
-        Fail("ERROR: fputs should have returned EOF when writing to a "
-             "read-only filestream, but instead  "
-             "returned %d.\n",ret);
-    }
-    
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fread/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fread/test1/test1.cpp
deleted file mode 100644
index 3cfe9fa444a5..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fread/test1/test1.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests the PAL implementation of the fread function.
-**          Open a file in READ mode, and then try to read all
-**          the characters, more than all the characters,
-**          0 characters and 0 sized characters and check that
-**          the return values are correct.
-**
-** Depends:
-**     fopen
-**     fseek
-**     fclose
-**
-**
-**===================================================================*/
-
-/* Note: testfile should exist in the directory with 15 characters 
-   in it ... something got lost if it isn't here.
-*/
-
-/* Note: Under win32, fread() crashes when passed NULL.  The test to ensure that
-   it returns 0 has been removed to reflect this.
-*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_fread_test1_paltest_fread_test1, "c_runtime/fread/test1/paltest_fread_test1")
-{
-    const char filename[] = "testfile";
-    char buffer[128];
-    FILE * fp = NULL;
-    int result;
-
-    if (0 != PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-  
-    /* Open a file in READ mode */
-
-    if((fp = fopen(filename, "r")) == NULL)
-    {
-        Fail("Unable to open a file for reading.  Is the file "
-               "in the directory?  It should be.");
-    }
-
-    /* Read 15 characters from the file.  The file has exactly this many 
-       in it.
-    */
-    if((result = fread(buffer,1,15,fp)) == 0)
-    {
-        Fail("ERROR: Zero characters read from the file.  It should have "
-               "15 characters in it.");
-    }
-
-    if(result != 15)
-    {
-        Fail("ERROR: The fread function should have returned that it read "
-               "in 15 characters from the file.  But it indicates having "
-               "read %i characters.",result);
-    }
-
-    /* Go back to the start of the file */
-
-    if(fseek(fp, 0, SEEK_SET)) 
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.");
-    }
-
-    /* Attempt to read 17 characters, the return should still be 15 */
-
-    if((result = fread(buffer,1,17,fp)) == 0)
-    {
-        Fail("ERROR: Zero characters read from the file.  It should have "
-               "15 characters in it.  Though, it attempted to read 17.");
-    }  
-  
-    if(result != 15)
-    {
-        Fail("ERROR: The fread function should have returned that it read "
-               "in 15 characters from the file.  "
-               "But it indicates having read  %i characters.",result);
-    }
-  
-    /* Back to the start of the file */
-  
-    if(fseek(fp, 0, SEEK_SET)) 
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.");
-    }
-
-    /* Read 0 characters and ensure the function returns 0 */
-
-    if((result = fread(buffer,1,0,fp)) != 0)
-    {
-        Fail("ERROR: The return value should be 0, as we attempted to "
-               "read 0 characters.");
-    } 
-
-    /* Read characters of 0 size and ensure the return value is 0 */
-
-    if((result = fread(buffer,0,5,fp)) != 0)
-    {
-        Fail("ERROR: The return value should be 0, as we attempted to "
-               "read 0 sized data.");
-    } 
-
-    /* Close the file */
-
-    if(fclose(fp))
-    {
-        Fail("ERROR: fclose failed.  Test depends on it.");
-    }
-
-    /* Read 5 characters of 1 size from a closed file pointer 
-       and ensure the return value is 0 
-    */
-
-    if((result = fread(buffer,1,5,fp)) != 0)
-    {
-        Fail("ERROR: The return value should be 0, as we attempted to "
-               "read data from a closed file pointer.");
-    } 
-
-    PAL_Terminate();
-    return PASS;
-}
-   
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fread/test1/testfile b/src/coreclr/pal/tests/palsuite/c_runtime/fread/test1/testfile
deleted file mode 100644
index 273c1a9ffdc2..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fread/test1/testfile
+++ /dev/null
@@ -1 +0,0 @@
-This is a test.
\ No newline at end of file
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fread/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fread/test2/test2.cpp
deleted file mode 100644
index de372204eb66..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fread/test2/test2.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test2.c
-**
-** Purpose: Tests the PAL implementation of the fread function.
-**          Open a file in READ mode, and then try to read all
-**          the characters, more than all the characters,
-**          0 characters and 0 sized characters and check that
-**          the strings read in are correct.
-**
-** Depends:
-**     fopen
-**     fseek
-**     fclose
-**     strcmp
-**     memset
-**
-**
-**===================================================================*/
-
-/* Note: testfile should exist in the directory with 15 characters 
-   in it ... something got lost if it isn't here.
-*/
-
-/* Note: The behaviour in win32 is to crash if a NULL pointer is passed to
-   fread, so the test to check that it returns 0 has been removed.
-*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_fread_test2_paltest_fread_test2, "c_runtime/fread/test2/paltest_fread_test2")
-{
-    const char filename[] = "testfile";
-    char buffer[128];
-    FILE * fp = NULL;
-
-    if (0 != PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-    /* Open a file in READ mode */
-
-    if((fp = fopen(filename, "r")) == NULL)
-    {
-        Fail("Unable to open a file for reading.  Is the file "
-               "in the directory?  It should be.");
-    }
-
-    /* Read 15 characters from the file.  The file has exactly this many 
-       in it. Then check to see that the data read in is correct.
-       Note: The 'testfile' should have "This is a test." written in it.
-    */
-    memset(buffer,'\0',128);
-    fread(buffer,1,15,fp);
-
-    if(strcmp(buffer,"This is a test.") != 0)
-    {
-	Fail("ERROR: The data read in should have been "
-	     "'This is a test.' but, the buffer contains '%s'.",
-	     buffer);
-    }
-    
-    /* Go back to the start of the file */
-
-    if(fseek(fp, 0, SEEK_SET)) 
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.");
-    }
-
-    /* Attempt to read 17 characters. The same 15 characters should
-       be in the buffer.
-    */
-    
-    memset(buffer,'\0',128);
-    fread(buffer,1,17,fp);
-
-    if(strcmp(buffer,"This is a test.") != 0)
-    {
-	Fail("ERROR: The data read in should have been "
-	     "'This is a test.' but, the buffer contains '%s'.",
-	     buffer);
-    }
-      
-    /* Back to the start of the file */
-  
-    if(fseek(fp, 0, SEEK_SET)) 
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.");
-    }
-
-    /* Read 0 characters and ensure the buffer is empty */
-    
-    memset(buffer,'\0',128);
-    fread(buffer,1,0,fp);
-     
-    if(strcmp(buffer,"\0") != 0)
-    {
-	Fail("ERROR: The data read in should have been "
-	     "NULL but, the buffer contains '%s'.",
-	     buffer);
-    }
-   
-    /* Read characters of 0 size and ensure the buffer is empty */
-    
-    memset(buffer,'\0',128);
-    fread(buffer,0,5,fp);
-   
-    if(strcmp(buffer,"\0") != 0)
-    {
-	Fail("ERROR: The data read in should have been "
-	     "NULL but, the buffer contains '%s'.",
-	     buffer);
-    }
-   
-    /* Close the file */
-
-    if(fclose(fp))
-    {
-        Fail("ERROR: fclose failed.  Test depends on it.");
-    }
-
-    /* Read 5 characters of 1 size from a closed file pointer 
-       and ensure the buffer is empty 
-    */
-    memset(buffer,'\0',128);
-    fread(buffer,1,5,fp);
-    if(strcmp(buffer,"\0") != 0)
-    {
-	Fail("ERROR: The data read in should have been "
-	     "NULL but, the buffer contains '%s'.",
-	     buffer);
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
-   
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fread/test2/testfile b/src/coreclr/pal/tests/palsuite/c_runtime/fread/test2/testfile
deleted file mode 100644
index 273c1a9ffdc2..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fread/test2/testfile
+++ /dev/null
@@ -1 +0,0 @@
-This is a test.
\ No newline at end of file
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fread/test3/test3.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fread/test3/test3.cpp
deleted file mode 100644
index 75ae9c6ad1b3..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fread/test3/test3.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test3.c
-**
-** Purpose: Tests the PAL implementation of the fread function.
-**          Open a file in READ mode, then try to read from the file with
-**          different 'size' params.  Check to ensure the return values and
-**          the text in the buffer is correct.
-**
-** Depends:
-**     fopen
-**     fseek
-**     strcmp
-**     memset
-**
-**
-**===================================================================*/
-
-/* Note: testfile should exist in the directory with 15 characters 
-   in it ... something got lost if it isn't here.
-*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_fread_test3_paltest_fread_test3, "c_runtime/fread/test3/paltest_fread_test3")
-{
-    const char filename[] = "testfile";
-    char buffer[128];
-    FILE * fp = NULL;
-    int result;
-
-    if (0 != PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-    /* Open a file in READ mode */
-
-    if((fp = fopen(filename, "r")) == NULL)
-    {
-        Fail("Unable to open a file for reading.  Is the file "
-             "in the directory?  It should be.");
-    }
-
-    memset(buffer,'x',128);
-
-    /* Put the null one character past the end of the text that was read
-       in, to ensure that it wasn't reading in 0
-    */ 
-
-    buffer[16] = '\0';
-
-    /* Attempt to read in 5 bytes at a time.  This should return 3 and 
-       contain the full string in the buffer.
-    */
-    
-    if((result = fread(buffer,5,3,fp)) != 3)
-    {
-        Fail("ERROR: Attempted to read in data of size 5.  The file has "
-             "15 bytes in it so 3 items should have been read.  But the value "
-             "returned was %d.",result);
-    }
-
-    if(strcmp(buffer, "This is a test.x") != 0)
-    {
-        Fail("ERROR: The buffer should have contained the text "
-             "'This is a test.x' but instead contained '%s'.",buffer);
-    }
-
-    memset(buffer,'x',128);
-
-    if(fseek(fp, 0, SEEK_SET)) 
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.");
-    }    
-
-    buffer[16] = '\0';
-
-    /* Attempt to read in 6 bytes at a time. The return should be 2.  The
-       full string should still be in the buffer.
-    */
-    
-    if((result = fread(buffer,6,3,fp)) != 2)
-    {
-        Fail("ERROR: Attempted to read in data of size 6.  The file has "
-             "15 bytes in it, so 2 items should have been read.  But the "
-             "value returned was %d.",result);
-    }
-
-    if(strcmp(buffer, "This is a test.x") != 0)
-    {
-        Fail("ERROR: The buffer should have contained the text "
-             "'This is a test.x' but instead contained '%s'.",buffer);
-    }
-
-    memset(buffer,'x',128);
-
-    buffer[7] = '\0';
-
-    if(fseek(fp, 0, SEEK_SET)) 
-    {
-        Fail("ERROR: fseek failed, and this test depends on it.");
-    }
-
-    /* Attempt to read in 6 bytes at a time but only one item max. 
-       The return should be 1.  The first 6 characters should be in the
-       buffer.
-    */
-    
-    if((result = fread(buffer,6,1,fp)) != 1)
-    {
-        Fail("ERROR: Attempted to read in data of size 6 with a max count "
-             "of 1. Thus, one item should have been read, but the "
-             "value returned was %d.",result);
-    }
-
-    if(strcmp(buffer, "This ix") != 0)
-    {
-        Fail("ERROR: The buffer should have contained the text "
-             "'This ix.' but instead contained '%s'.",buffer);
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fread/test3/testfile b/src/coreclr/pal/tests/palsuite/c_runtime/fread/test3/testfile
deleted file mode 100644
index 273c1a9ffdc2..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fread/test3/testfile
+++ /dev/null
@@ -1 +0,0 @@
-This is a test.
\ No newline at end of file
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/free/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/free/test1/test1.cpp
deleted file mode 100644
index dc8d13158862..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/free/test1/test1.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Repeatedly allocates and frees a chunk of memory, to verify
-**          that free is really returning memory to the heap
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_free_test1_paltest_free_test1, "c_runtime/free/test1/paltest_free_test1")
-{
-
-    char *testA;
-
-    long i;
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-
-    /* check that free really returns memory to the heap. */
-    for(i=1; i<1000000; i++)
-    {
-        testA = (char *)malloc(1000*sizeof(char));
-        if (testA==NULL)
-        {
-            Fail("Either free is failing to return memory to the heap, or"
-                 " the system is running out of memory for some other "
-                 "reason.\n");
-        }
-        free(testA);
-    }
-
-    free(NULL); /*should do nothing*/
-    PAL_Terminate();
-    return PASS;
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fseek/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fseek/test1/test1.cpp
deleted file mode 100644
index 7fdfc2fef2fe..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fseek/test1/test1.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Call fseek to move a file pointer to the start of a file,
-**          a position offset from the start, a position offset from the
-**          current position, and a position offset from the end of the
-**          file.  Check that the file pointer is at the correct position
-**          after each seek.
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-const char filename[] = "testfile.txt";
-
-static BOOL Cleanup(HANDLE hFile)
-{
-    BOOL result= TRUE;
-
-    if (fclose((PAL_FILE*)hFile))
-    {
-        Trace("fseek: ERROR -> Unable to close file \"%s\".\n", 
-            filename);
-        result= FALSE;
-    }
-    if (!DeleteFileA(filename))
-    {
-        result= FALSE;
-        Trace("fseek: ERROR -> Unable to delete file \"%s\". ", 
-            "GetLastError returned %u.\n", 
-            filename,
-            GetLastError());
-    }
-    return result;
-}
-
-PALTEST(c_runtime_fseek_test1_paltest_fseek_test1, "c_runtime/fseek/test1/paltest_fseek_test1")
-{
-    char outBuf[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-    char inBuf[20];
-    FILE * fp;
-    int size = ( sizeof(outBuf)/sizeof(char) ) - 1;
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-
-    /*create the file*/
-    fp = fopen(filename, "w");
-    if (fp == NULL)
-    {
-        Fail("Unable to open a file for write.\n");
-    }
-    if(fprintf(fp, outBuf) !=  size)
-    {
-        Trace("Unable to write to %s.\n", filename);
-        Cleanup(fp);
-        Fail("");
-    }
-
-    if (fclose(fp) != 0)
-    {
-        Trace("Unable to close newly written file.\n");
-        if (!DeleteFileA(filename))
-        {
-            Trace("fseek: ERROR -> Unable to delete file \"%s\". ", 
-                "GetLastError returned %u.\n", 
-                filename,
-                GetLastError());
-        }
-        Fail("");
-    }
-
-    fp = fopen(filename, "r");
-    if (fp == NULL)
-    {
-        if (!DeleteFileA(filename))
-        {
-            Trace("_putw: ERROR -> Unable to delete file \"%s\". ", 
-                "GetLastError returned %u.\n", 
-                filename,
-                GetLastError());
-        }
-        Fail("Unable to open a file for read.\n");
-    }
-
-    /*seek to the start*/
-    if (fseek(fp, 0, SEEK_SET) != 0)
-    {
-        Cleanup(fp);
-        Fail("fseek failed when seeking the start of a file.\n");
-    }
-    if (fgets(inBuf, 11, fp) != inBuf)
-    {
-        Cleanup(fp);
-        Fail("Unable to read from file after using fseek to move to the start.\n");
-    }
-    if (strncmp(inBuf, outBuf, 10) != 0)
-    {
-        Cleanup(fp);
-        Fail("fseek was asked to seek the start of a file," 
-             "but didn't get there.\n");
-    }
-
-    /*Seek with an offset from the start*/
-
-    if (fseek(fp, 10, SEEK_SET) != 0)
-    {
-        Cleanup(fp);
-        Fail("fseek failed when called with SEEK_SET and a positive offset.\n");
-    }
-  
-    if (fgets(inBuf, 6, fp) != inBuf)
-    {
-        Cleanup(fp);
-        Fail("fgets failed after feek was called with SEEK_SET" 
-             "and a positive offset.\n");
-    }
-
-
-    if (strncmp(inBuf, "ABCDE", 5) != 0)
-    {
-        Cleanup(fp);
-        Fail("fseek did not move to the correct position when passed SEEK_SET"
-             " and a positive offset.\n");
-    }
-
-    /*now move backwards and read the same string*/
-    if (fseek(fp, -5, SEEK_CUR) != 0)
-    {
-        Cleanup(fp);
-        Fail("fseek failed when passed SEEK_CUR and a negative offset.\n");
-    }
-
-    if (fgets(inBuf, 6, fp) != inBuf)
-    {
-        Cleanup(fp);
-        Fail("fgets failed after fseek was called with SEEK_CUR and a " 
-             "negative offset.\n");
-    }
-
-    if (strncmp(inBuf, "ABCDE", 5) != 0)
-    {
-        Cleanup(fp);
-        Fail("fseek did not move to the correct position when called with"
-             " SEEK_CUR and a negative offset.\n");
-    }
-
-    /*Try seeking relative to the end of the file.*/
-    if (fseek(fp, -10, SEEK_END) != 0)
-    {
-        Cleanup(fp);
-        Fail("fseek failed when called with SEEK_END and a negative"
-             " offset.\n");
-    }
-    if (fgets(inBuf, 2, fp) != inBuf)
-    {
-        Cleanup(fp);
-        Fail("fgets failed after fseek was called with SEEK_END and a "
-             "negative offset\n");
-    }
-
-    if (strncmp(inBuf, "Q", 1) != 0)
-    {
-        Cleanup(fp);
-        Fail("fseek did not move to the correct position when called with "
-             "SEEK_END and a negative offset.\n");
-    }
-
-
-    /*close the file*/
-    if(!Cleanup(fp))
-    {
-        Fail("");
-    } 
-
-    PAL_Terminate();
-    return PASS;
-}
-
-
-
-
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/ftell/test1/ftell.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/ftell/test1/ftell.cpp
deleted file mode 100644
index 1c60fab20791..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/ftell/test1/ftell.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  ftell.c (test 1)
-**
-** Purpose: Tests the PAL implementation of the ftell function.
-**
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-FILE* pFile;
-struct TESTS 
-{
-    long lDist;
-    int nFrom;
-    long lPosition;
-};
-
-
-/*************************************************
-**
-** Validate 
-**
-** Purpose:
-**      Tests whether the move was successful. If
-**      it passes, it returns TRUE. If it fails
-**      it outputs some error messages and returns
-**      FALSE.
-**
-*************************************************/
-BOOL Validate(long lExpected)
-{
-    long lPos = -2;
-
-    if (((lPos = ftell(pFile)) == -1) || (lPos != lExpected))
-    {
-        Trace("ftell: ERROR -> ftell returned %ld when expecting %ld.\n", 
-            lPos,
-            lExpected);
-        if (fclose(pFile) != 0)
-        {
-            Trace("ftell: ERROR -> fclose failed to close the file.\n");
-        }
-        return FALSE;
-    }
-    return TRUE;
-}
-
-
-/*************************************************
-**
-** MovePointer
-**
-** Purpose:
-**      Accepts the distance to move and the
-**      distance and calls fseek to move the file
-**      pointer. If the fseek fails, error messages
-**      are displayed and FALSE is returned. TRUE
-**      is returned on a successful fseek.
-**
-*************************************************/
-BOOL MovePointer(long lDist, int nFrom)
-{
-    /* move the file pointer*/
-    if (fseek(pFile, lDist, nFrom) != 0)
-    {
-        Trace("ftell: ERROR -> fseek failed to move the file pointer "
-            "%l characters.\n",
-            lDist);
-        if (fclose(pFile) != 0)
-        {
-            Trace("ftell: ERROR -> fclose failed to close the file.\n");
-        }
-        return FALSE;
-    }
-    return TRUE;
-}
-
-
-
-PALTEST(c_runtime_ftell_test1_paltest_ftell_test1, "c_runtime/ftell/test1/paltest_ftell_test1")
-{
-    const char szFileName[] = {"testfile.txt"};
-    long lPos = -1;
-    int i;
-    char szTempBuffer[256];
-    struct TESTS testCase[] = 
-    {
-        {0, SEEK_SET, 0},
-        {10, SEEK_CUR, 10},
-        {-5, SEEK_CUR, 5},
-        {-2, SEEK_END, 50}
-    };
-       
-
-
-    if (0 != PAL_Initialize(argc,argv))
-    {
-        return FAIL;
-    }
-
-    memset(szTempBuffer, 0, 256);
-
-
-    /* open the test file */
-    pFile = fopen(szFileName, "r");
-    if (pFile == NULL)
-    {
-        Fail("ftell: ERROR -> fopen failed to open the file \"%s\".\n");
-    }
-
-    /* loop through the test cases */
-    for (i = 0; i < (sizeof(testCase)/sizeof(struct TESTS)); i++)
-    {
-        if (MovePointer(testCase[i].lDist, testCase[i].nFrom) != TRUE)
-        {
-            Fail("");
-        }
-        else if (Validate(testCase[i].lPosition) != TRUE)
-        {
-            Fail("");
-        }
-    }
-
-    if (fclose(pFile) != 0)
-    {
-        Fail("ftell: ERROR -> fclose failed to close the file.\n");
-    }
-
-    /* lets just see if we can find out where we are in a closed stream... */
-    if ((lPos = ftell(pFile)) != -1)
-    {
-        Fail("ftell: ERROR -> ftell returned a valid position (%ld) on a "
-            "closed file handle\n", 
-            lPos);
-    }
-    
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/ftell/test1/testfile.txt b/src/coreclr/pal/tests/palsuite/c_runtime/ftell/test1/testfile.txt
deleted file mode 100644
index dd0fe15fe101..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/ftell/test1/testfile.txt
+++ /dev/null
@@ -1 +0,0 @@
-The quick brown fox jumped over the lazy dog's back.
\ No newline at end of file
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/fwrite/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/fwrite/test1/test1.cpp
deleted file mode 100644
index 5b74faa8eac9..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/fwrite/test1/test1.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Write a short string to a file and check that it was written
-**          properly.
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_fwrite_test1_paltest_fwrite_test1, "c_runtime/fwrite/test1/paltest_fwrite_test1")
-{
-    const char filename[] = "testfile.tmp";
-    const char outBuffer[] = "This is a test.";
-    char inBuffer[sizeof(outBuffer) + 10];
-    int itemsExpected;
-    int itemsWritten;
-    FILE * fp = NULL;
-  
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-    if((fp = fopen(filename, "w")) == NULL)
-    {
-        Fail("Unable to open a file for write.\n");
-    }
-
-    itemsExpected = sizeof(outBuffer);
-    itemsWritten = fwrite(outBuffer, 
-                          sizeof(outBuffer[0]), 
-                          sizeof(outBuffer), 
-                          fp);  
-    
-    if (itemsWritten == 0)
-    {
-        if(fclose(fp) != 0)
-        {
-            Fail("fwrite: Error occurred during the closing of a file.\n");
-        }
-
-        Fail("fwrite() couldn't write to a stream at all\n");
-    }
-    else if (itemsWritten != itemsExpected) 
-    {
-        if(fclose(fp) != 0)
-        {
-            Fail("fwrite: Error occurred during the closing of a file.\n");
-        }
-
-        Fail("fwrite() produced errors writing to a stream.\n");
-    }
-      
-    if(fclose(fp) != 0)
-    {
-        Fail("fwrite: Error occurred during the closing of a file.\n");
-    }
-
-    /* open the file to verify what was written to the file */
-    if ((fp = fopen(filename, "r")) == NULL)
-    {
-        Fail("Couldn't open newly written file for read.\n");
-    }
-
-    if (fgets(inBuffer, sizeof(inBuffer), fp) == NULL)
-    {
-        if(fclose(fp) != 0)
-        {
-            Fail("fwrite: Error occurred during the closing of a file.\n");
-        }
-
-        Fail("We wrote something to a file using fwrite() and got errors"
-             " when we tried to read it back using fgets().  Either "
-             "fwrite() or fgets() is broken.\n");
-    }
-
-    if (strcmp(inBuffer, outBuffer) != 0)
-    {
-        if(fclose(fp) != 0)
-        {
-            Fail("fwrite: Error occurred during the closing of a file.\n");
-        }
-
-        Fail("fwrite() (or fgets()) is broken.  The string read back from"
-             " the file does not match the string written.\n");
-    }
-
-    if(fclose(fp) != 0)
-    {
-        Fail("fwrite: Error occurred during the closing of a file.\n");
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
-   
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/ilogb/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/ilogb/test1/test1.cpp
deleted file mode 100644
index 9fbd270d5257..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/ilogb/test1/test1.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests that ilogb returns correct values.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    int expected;     /* expected result */
-};
-
-/**
- * ilogb_test1_validate
- *
- * test validation function
- */
-void __cdecl ilogb_test1_validate(double value, int expected)
-{
-    int result = ilogb(value);
-
-    if (result != expected)
-    {
-        Fail("ilogb(%g) returned %d when it should have returned %d",
-             value, result, expected);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_ilogb_test1_paltest_ilogb_test1, "c_runtime/ilogb/test1/paltest_ilogb_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                       expected */
-        {  PAL_NEGINF,                 2147483647 },
-        {  0,                         -2147483648 },
-        {  PAL_POSINF,                 2147483647 },
-        {  0.11331473229676087,       -4          },   // expected: -(pi)
-        {  0.15195522325791297,       -3          },   // expected: -(e)
-        {  0.20269956628651730,       -3          },   // expected: -(ln(10))
-        {  0.33662253682241906,       -2          },   // expected: -(pi / 2)
-        {  0.36787944117144232,       -2          },   // expected: -(log2(e))
-        {  0.37521422724648177,       -2          },   // expected: -(sqrt(2))
-        {  0.45742934732229695,       -2          },   // expected: -(2 / sqrt(pi))
-        {  0.5,                       -1          },   // expected: -(1)
-        {  0.58019181037172444,       -1          },   // expected: -(pi / 4)
-        {  0.61254732653606592,       -1          },   // expected: -(1 / sqrt(2))
-        {  0.61850313780157598,       -1          },   // expected: -(ln(2))
-        {  0.64321824193300488,       -1          },   // expected: -(2 / pi)
-        {  0.74005557395545179,       -1          },   // expected: -(log10(e))
-        {  0.80200887896145195,       -1          },   // expected: -(1 / pi)
-        {  1,                          0          },
-        {  1.2468689889006383,         0          },   // expected:  1 / pi
-        {  1.3512498725672678,         0          },   // expected:  log10(e)
-        {  1.5546822754821001,         0          },   // expected:  2 / pi
-        {  1.6168066722416747,         0          },   // expected:  ln(2)
-        {  1.6325269194381528,         0          },   // expected:  1 / sqrt(2)
-        {  1.7235679341273495,         0          },   // expected:  pi / 4
-        {  2,                          1          },
-        {  2.1861299583286618,         1          },   // expected:  2 / sqrt(pi)
-        {  2.6651441426902252,         1          },   // expected:  sqrt(2)
-        {  2.7182818284590452,         1          },   // expected:  log2(e)             value: e
-        {  2.9706864235520193,         1          },   // expected:  pi / 2
-        {  4.9334096679145963,         2          },   // expected:  ln(10)
-        {  6.5808859910179210,         2          },   // expected:  e
-        {  8.8249778270762876,         3          },   // expected:  pi
-        {  PAL_NAN,                    2147483647 },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        ilogb_test1_validate(tests[i].value, tests[i].expected);
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/ilogbf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/ilogbf/test1/test1.cpp
deleted file mode 100644
index c5022056c97e..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/ilogbf/test1/test1.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests that ilogbf returns correct values.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    int expected;    /* expected result */
-};
-
-/**
- * ilogbf_test1_validate
- *
- * test validation function
- */
-void __cdecl ilogbf_test1_validate(float value, int expected)
-{
-    int result = ilogbf(value);
-
-    if (result != expected)
-    {
-        Fail("ilogbf(%g) returned %d when it should have returned %d",
-             value, result, expected);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_ilogbf_test1_paltest_ilogbf_test1, "c_runtime/ilogbf/test1/paltest_ilogbf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                expected */
-        {  PAL_NEGINF,          2147483647 },
-        {  0,                  -2147483648 },
-        {  PAL_POSINF,          2147483647 },
-        {  0.113314732f,       -4          },   // expected: -(pi)
-        {  0.151955223f,       -3          },   // expected: -(e)
-        {  0.202699566f,       -3          },   // expected: -(ln(10))
-        {  0.336622537f,       -2          },   // expected: -(pi / 2)
-        {  0.367879441f,       -2          },   // expected: -(log2(e))
-        {  0.375214227f,       -2          },   // expected: -(sqrt(2))
-        {  0.457429347f,       -2          },   // expected: -(2 / sqrt(pi))
-        {  0.5f,               -1          },   // expected: -(1)
-        {  0.580191810f,       -1          },   // expected: -(pi / 4)
-        {  0.612547327f,       -1          },   // expected: -(1 / sqrt(2))
-        {  0.618503138f,       -1          },   // expected: -(ln(2))
-        {  0.643218242f,       -1          },   // expected: -(2 / pi)
-        {  0.740055574f,       -1          },   // expected: -(log10(e))
-        {  0.802008879f,       -1          },   // expected: -(1 / pi)
-        {  1,                   0          },
-        {  1.24686899f,         0          },   // expected:  1 / pi
-        {  1.35124987f,         0          },   // expected:  log10(e)
-        {  1.55468228f,         0          },   // expected:  2 / pi
-        {  1.61680667f,         0          },   // expected:  ln(2)
-        {  1.63252692f,         0          },   // expected:  1 / sqrt(2)
-        {  1.72356793f,         0          },   // expected:  pi / 4
-        {  2,                   1          },
-        {  2.18612996f,         1          },   // expected:  2 / sqrt(pi)
-        {  2.66514414f,         1          },   // expected:  sqrt(2)
-        {  2.71828183f,         1          },   // expected:  log2(e)             value: e
-        {  2.97068642f,         1          },   // expected:  pi / 2
-        {  4.93340967f,         2          },   // expected:  ln(10)
-        {  6.58088599f,         2          },   // expected:  e
-        {  8.82497783f,         3          },   // expected:  pi
-        {  PAL_NAN,             2147483647 },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        ilogbf_test1_validate(tests[i].value, tests[i].expected);
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/log/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/log/test1/test1.cpp
deleted file mode 100644
index a780558a049b..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/log/test1/test1.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests log with a normal set of values.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * log_test1_validate
- *
- * test validation function
- */
-void __cdecl log_test1_validate(double value, double expected, double variance)
-{
-    double result = log(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("log(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * log_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl log_test1_validate_isnan(double value)
-{
-    double result = log(value);
-
-    if (!_isnan(result))
-    {
-        Fail("log(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_log_test1_paltest_log_test1, "c_runtime/log/test1/paltest_log_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                       expected               variance */
-        {  0,                          PAL_NEGINF,            0 },
-        {  0.043213918263772250,      -3.1415926535897932,    PAL_EPSILON * 10 },   // expected: -(pi)
-        {  0.065988035845312537,      -2.7182818284590452,    PAL_EPSILON * 10 },   // expected: -(e)
-        {  0.1,                       -2.3025850929940457,    PAL_EPSILON * 10 },   // expected: -(ln(10))
-        {  0.20787957635076191,       -1.5707963267948966,    PAL_EPSILON * 10 },   // expected: -(pi / 2)
-        {  0.23629008834452270,       -1.4426950408889634,    PAL_EPSILON * 10 },   // expected: -(log2(e))
-        {  0.24311673443421421,       -1.4142135623730950,    PAL_EPSILON * 10 },   // expected: -(sqrt(2))
-        {  0.32355726390307110,       -1.1283791670955126,    PAL_EPSILON * 10 },   // expected: -(2 / sqrt(pi))
-        {  0.36787944117144232,       -1,                     PAL_EPSILON * 10 },   // expected: -(1)
-        {  0.45593812776599624,       -0.78539816339744831,   PAL_EPSILON },        // expected: -(pi / 4)
-        {  0.49306869139523979,       -0.70710678118654752,   PAL_EPSILON },        // expected: -(1 / sqrt(2))
-        {  0.5,                       -0.69314718055994531,   PAL_EPSILON },        // expected: -(ln(2))
-        {  0.52907780826773535,       -0.63661977236758134,   PAL_EPSILON },        // expected: -(2 / pi)
-        {  0.64772148514180065,       -0.43429448190325183,   PAL_EPSILON },        // expected: -(log10(e))
-        {  0.72737734929521647,       -0.31830988618379067,   PAL_EPSILON },        // expected: -(1 / pi)
-        {  1,                          0,                     PAL_EPSILON },
-        {  1.3748022274393586,         0.31830988618379067,   PAL_EPSILON },        // expected:  1 / pi
-        {  1.5438734439711811,         0.43429448190325183,   PAL_EPSILON },        // expected:  log10(e)
-        {  1.8900811645722220,         0.63661977236758134,   PAL_EPSILON },        // expected:  2 / pi
-        {  2,                          0.69314718055994531,   PAL_EPSILON },        // expected:  ln(2)
-        {  2.0281149816474725,         0.70710678118654752,   PAL_EPSILON },        // expected:  1 / sqrt(2)
-        {  2.1932800507380155,         0.78539816339744831,   PAL_EPSILON },        // expected:  pi / 4
-        {  2.7182818284590452,         1,                     PAL_EPSILON * 10 },   //                               value: e
-        {  3.0906430223107976,         1.1283791670955126,    PAL_EPSILON * 10 },   // expected:  2 / sqrt(pi)
-        {  4.1132503787829275,         1.4142135623730950,    PAL_EPSILON * 10 },   // expected:  sqrt(2)
-        {  4.2320861065570819,         1.4426950408889634,    PAL_EPSILON * 10 },   // expected:  log2(e)
-        {  4.8104773809653517,         1.5707963267948966,    PAL_EPSILON * 10 },   // expected:  pi / 2
-        {  10,                         2.3025850929940457,    PAL_EPSILON * 10 },   // expected:  ln(10)
-        {  15.154262241479264,         2.7182818284590452,    PAL_EPSILON * 10 },   // expected:  e
-        {  23.140692632779269,         3.1415926535897932,    PAL_EPSILON * 10 },   // expected:  pi
-        {  PAL_POSINF,                 PAL_POSINF,            0 },
-    };
-
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        log_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-    }
-    
-    log_test1_validate_isnan(PAL_NEGINF);
-    log_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/log10/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/log10/test1/test1.cpp
deleted file mode 100644
index 26d13508cc4b..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/log10/test1/test1.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that log10 returns correct values.
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**               _isnan
-** 
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * log10_test1_validate
- *
- * test validation function
- */
-void __cdecl log10_test1_validate(double value, double expected, double variance)
-{
-    double result = log10(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("log10(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * log10_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl log10_test1_validate_isnan(double value)
-{
-    double result = log10(value);
-
-    if (!_isnan(result))
-    {
-        Fail("log10(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_log10_test1_paltest_log10_test1, "c_runtime/log10/test1/paltest_log10_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                       expected               variance */
-        {  0,                          PAL_NEGINF,            0 },
-        {  0.00072178415907472774,    -3.1415926535897932,    PAL_EPSILON * 10 },   // expected: -(pi)
-        {  0.0019130141022243176,     -2.7182818284590452,    PAL_EPSILON * 10 },   // expected: -(e)
-        {  0.0049821282964407206,     -2.3025850929940457,    PAL_EPSILON * 10 },   // expected: -(ln(10))
-        {  0.026866041001136132,      -1.5707963267948966,    PAL_EPSILON * 10 },   // expected: -(pi / 2)
-        {  0.036083192820787210,      -1.4426950408889634,    PAL_EPSILON * 10 },   // expected: -(log2(e))
-        {  0.038528884700322026,      -1.4142135623730950,    PAL_EPSILON * 10 },   // expected: -(sqrt(2))
-        {  0.074408205860642723,      -1.1283791670955126,    PAL_EPSILON * 10 },   // expected: -(2 / sqrt(pi))
-        {  0.1,                       -1,                     PAL_EPSILON * 10 },   // expected: -(1)
-        {  0.16390863613957665,       -0.78539816339744831,   PAL_EPSILON },        // expected: -(pi / 4)
-        {  0.19628775993505562,       -0.70710678118654752,   PAL_EPSILON },        // expected: -(1 / sqrt(2))
-        {  0.20269956628651730,       -0.69314718055994531,   PAL_EPSILON },        // expected: -(ln(2))
-        {  0.23087676451600055,       -0.63661977236758134,   PAL_EPSILON },        // expected: -(2 / pi)
-        {  0.36787944117144232,       -0.43429448190325183,   PAL_EPSILON },        // expected: -(log10(e))
-        {  0.48049637305186868,       -0.31830988618379067,   PAL_EPSILON },        // expected: -(1 / pi)
-        {  1,                          0,                     PAL_EPSILON },
-        {  2.0811811619898573,         0.31830988618379067,   PAL_EPSILON },        // expected:  1 / pi
-        {  2.7182818284590452,         0.43429448190325183,   PAL_EPSILON },        // expected:  log10(e)           value: e
-        {  4.3313150290214525,         0.63661977236758134,   PAL_EPSILON },        // expected:  2 / pi
-        {  4.9334096679145963,         0.69314718055994531,   PAL_EPSILON },        // expected:  ln(2)
-        {  5.0945611704512962,         0.70710678118654752,   PAL_EPSILON },        // expected:  1 / sqrt(2)
-        {  6.1009598002416937,         0.78539816339744831,   PAL_EPSILON },        // expected:  pi / 4
-        {  10,                         1,                     PAL_EPSILON * 10 },
-        {  13.439377934644400,         1.1283791670955126,    PAL_EPSILON * 10 },   // expected:  2 / sqrt(pi)
-        {  25.954553519470081,         1.4142135623730950,    PAL_EPSILON * 10 },   // expected:  sqrt(2)
-        {  27.713733786437790,         1.4426950408889634,    PAL_EPSILON * 10 },   // expected:  log2(e)
-        {  37.221710484165167,         1.5707963267948966,    PAL_EPSILON * 10 },   // expected:  pi / 2
-        {  200.71743249053009,         2.3025850929940457,    PAL_EPSILON * 10 },   // expected:  ln(10)
-        {  522.73529967043665,         2.7182818284590452,    PAL_EPSILON * 10 },   // expected:  e
-        {  1385.4557313670111,         3.1415926535897932,    PAL_EPSILON * 10 },   // expected:  pi
-        {  PAL_POSINF,                 PAL_POSINF,            0 },
-    };
-
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        log10_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-    }
-    
-    log10_test1_validate_isnan(PAL_NEGINF);
-    log10_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/log10f/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/log10f/test1/test1.cpp
deleted file mode 100644
index a503ebaab744..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/log10f/test1/test1.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that log10f returns correct values.
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**               _isnanf
-** 
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * log10f_test1_validate
- *
- * test validation function
- */
-void __cdecl log10f_test1_validate(float value, float expected, float variance)
-{
-    float result = log10f(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("log10f(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * log10f_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl log10f_test1_validate_isnan(float value)
-{
-    float result = log10f(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("log10f(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_log10f_test1_paltest_log10f_test1, "c_runtime/log10f/test1/paltest_log10f_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                expected        variance */
-        {  0,                   PAL_NEGINF,     0 },
-        {  0.000721784159f,    -3.14159265f,    PAL_EPSILON * 10 },   // expected: -(pi)
-        {  0.00191301410f,     -2.71828183f,    PAL_EPSILON * 10 },   // expected: -(e)
-        {  0.00498212830f,     -2.30258509f,    PAL_EPSILON * 10 },   // expected: -(ln(10))
-        {  0.0268660410f,      -1.57079633f,    PAL_EPSILON * 10 },   // expected: -(pi / 2)
-        {  0.0360831928f,      -1.44269504f,    PAL_EPSILON * 10 },   // expected: -(logf2(e))
-        {  0.0385288847f,      -1.41421356f,    PAL_EPSILON * 10 },   // expected: -(sqrtf(2))
-        {  0.0744082059f,      -1.12837917f,    PAL_EPSILON * 10 },   // expected: -(2 / sqrtf(pi))
-        {  0.1f,               -1,              PAL_EPSILON * 10 },   // expected: -(1)
-        {  0.163908636f,       -0.785398163f,   PAL_EPSILON },        // expected: -(pi / 4)
-        {  0.196287760f,       -0.707106781f,   PAL_EPSILON },        // expected: -(1 / sqrtf(2))
-        {  0.202699566f,       -0.693147181f,   PAL_EPSILON },        // expected: -(ln(2))
-        {  0.230876765f,       -0.636619772f,   PAL_EPSILON },        // expected: -(2 / pi)
-        {  0.367879441f,       -0.434294482f,   PAL_EPSILON },        // expected: -(log10f(e))
-        {  0.480496373f,       -0.318309886f,   PAL_EPSILON },        // expected: -(1 / pi)
-        {  1,                   0,              PAL_EPSILON },
-        {  2.08118116f,         0.318309886f,   PAL_EPSILON },        // expected:  1 / pi
-        {  2.71828183f,         0.434294482f,   PAL_EPSILON },        // expected:  log10f(e)           value: e
-        {  4.33131503f,         0.636619772f,   PAL_EPSILON },        // expected:  2 / pi
-        {  4.93340967f,         0.693147181f,   PAL_EPSILON },        // expected:  ln(2)
-        {  5.09456117f,         0.707106781f,   PAL_EPSILON },        // expected:  1 / sqrtf(2)
-        {  6.10095980f,         0.785398163f,   PAL_EPSILON },        // expected:  pi / 4
-        {  10,                  1,              PAL_EPSILON * 10 },
-        {  13.4393779f,         1.12837917f,    PAL_EPSILON * 10 },   // expected:  2 / sqrtf(pi)
-        {  25.9545535f,         1.41421356f,    PAL_EPSILON * 10 },   // expected:  sqrtf(2)
-        {  27.7137338f,         1.44269504f,    PAL_EPSILON * 10 },   // expected:  logf2(e)
-        {  37.2217105f,         1.57079633f,    PAL_EPSILON * 10 },   // expected:  pi / 2
-        {  200.717432f,         2.30258509f,    PAL_EPSILON * 10 },   // expected:  ln(10)
-        {  522.735300f,         2.71828183f,    PAL_EPSILON * 10 },   // expected:  e
-        {  1385.45573f,         3.14159265f,    PAL_EPSILON * 10 },   // expected:  pi
-        {  PAL_POSINF,          PAL_POSINF,     0 },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        log10f_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-    }
-    
-    log10f_test1_validate_isnan(PAL_NEGINF);
-    log10f_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/log2/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/log2/test1/test1.cpp
deleted file mode 100644
index 8ca12f4dd54c..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/log2/test1/test1.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests that log2 returns correct values.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * log2_test1_validate
- *
- * test validation function
- */
-void __cdecl log2_test1_validate(double value, double expected, double variance)
-{
-    double result = log2(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("log2(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * log2_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl log2_test1_validate_isnan(double value)
-{
-    double result = log2(value);
-
-    if (!_isnan(result))
-    {
-        Fail("log2(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_log2_test1_paltest_log2_test1, "c_runtime/log2/test1/paltest_log2_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                       expected               variance */
-        {  0,                          PAL_NEGINF,            0 },
-        {  0.11331473229676087,       -3.1415926535897932,    PAL_EPSILON * 10 },   // expected: -(pi)
-        {  0.15195522325791297,       -2.7182818284590452,    PAL_EPSILON * 10 },   // expected: -(e)
-        {  0.20269956628651730,       -2.3025850929940457,    PAL_EPSILON * 10 },   // expected: -(ln(10))
-        {  0.33662253682241906,       -1.5707963267948966,    PAL_EPSILON * 10 },   // expected: -(pi / 2)
-        {  0.36787944117144232,       -1.4426950408889634,    PAL_EPSILON * 10 },   // expected: -(log2(e))
-        {  0.37521422724648177,       -1.4142135623730950,    PAL_EPSILON * 10 },   // expected: -(sqrt(2))
-        {  0.45742934732229695,       -1.1283791670955126,    PAL_EPSILON * 10 },   // expected: -(2 / sqrt(pi))
-        {  0.5,                       -1,                     PAL_EPSILON * 10 },   // expected: -(1)
-        {  0.58019181037172444,       -0.78539816339744831,   PAL_EPSILON },        // expected: -(pi / 4)
-        {  0.61254732653606592,       -0.70710678118654752,   PAL_EPSILON },        // expected: -(1 / sqrt(2))
-        {  0.61850313780157598,       -0.69314718055994531,   PAL_EPSILON },        // expected: -(ln(2))
-        {  0.64321824193300488,       -0.63661977236758134,   PAL_EPSILON },        // expected: -(2 / pi)
-        {  0.74005557395545179,       -0.43429448190325183,   PAL_EPSILON },        // expected: -(log10(e))
-        {  0.80200887896145195,       -0.31830988618379067,   PAL_EPSILON },        // expected: -(1 / pi)
-        {  1,                          0,                     PAL_EPSILON },
-        {  1.2468689889006383,         0.31830988618379067,   PAL_EPSILON },        // expected:  1 / pi
-        {  1.3512498725672678,         0.43429448190325183,   PAL_EPSILON },        // expected:  log10(e)
-        {  1.5546822754821001,         0.63661977236758134,   PAL_EPSILON },        // expected:  2 / pi
-        {  1.6168066722416747,         0.69314718055994531,   PAL_EPSILON },        // expected:  ln(2)
-        {  1.6325269194381528,         0.70710678118654752,   PAL_EPSILON },        // expected:  1 / sqrt(2)
-        {  1.7235679341273495,         0.78539816339744831,   PAL_EPSILON },        // expected:  pi / 4
-        {  2,                          1,                     PAL_EPSILON * 10 },
-        {  2.1861299583286618,         1.1283791670955126,    PAL_EPSILON * 10 },   // expected:  2 / sqrt(pi)
-        {  2.6651441426902252,         1.4142135623730950,    PAL_EPSILON * 10 },   // expected:  sqrt(2)
-        {  2.7182818284590452,         1.4426950408889634,    PAL_EPSILON * 10 },   // expected:  log2(e)             value: e
-        {  2.9706864235520193,         1.5707963267948966,    PAL_EPSILON * 10 },   // expected:  pi / 2
-        {  4.9334096679145963,         2.3025850929940457,    PAL_EPSILON * 10 },   // expected:  ln(10)
-        {  6.5808859910179210,         2.7182818284590452,    PAL_EPSILON * 10 },   // expected:  e
-        {  8.8249778270762876,         3.1415926535897932,    PAL_EPSILON * 10 },   // expected:  pi
-        {  PAL_POSINF,                 PAL_POSINF,            0 },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        log2_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-    }
-
-    log2_test1_validate_isnan(PAL_NEGINF);
-    log2_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/log2f/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/log2f/test1/test1.cpp
deleted file mode 100644
index 29de3c76de79..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/log2f/test1/test1.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests that log2f returns correct values.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * log2f_test1_validate
- *
- * test validation function
- */
-void __cdecl log2f_test1_validate(float value, float expected, float variance)
-{
-    float result = log2f(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("log2f(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * log2f_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl log2f_test1_validate_isnan(float value)
-{
-    float result = log2f(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("log2f(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_log2f_test1_paltest_log2f_test1, "c_runtime/log2f/test1/paltest_log2f_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                expected        variance */
-        {  0,                   PAL_NEGINF,     0 },
-        {  0.113314732f,       -3.14159265f,    PAL_EPSILON * 10 },   // expected: -(pi)
-        {  0.151955223f,       -2.71828183f,    PAL_EPSILON * 10 },   // expected: -(e)
-        {  0.202699566f,       -2.30258509f,    PAL_EPSILON * 10 },   // expected: -(ln(10))
-        {  0.336622537f,       -1.57079633f,    PAL_EPSILON * 10 },   // expected: -(pi / 2)
-        {  0.367879441f,       -1.44269504f,    PAL_EPSILON * 10 },   // expected: -(logf2(e))
-        {  0.375214227f,       -1.41421356f,    PAL_EPSILON * 10 },   // expected: -(sqrtf(2))
-        {  0.457429347f,       -1.12837917f,    PAL_EPSILON * 10 },   // expected: -(2 / sqrtf(pi))
-        {  0.5f,               -1,              PAL_EPSILON * 10 },   // expected: -(1)
-        {  0.580191810f,       -0.785398163f,   PAL_EPSILON },        // expected: -(pi / 4)
-        {  0.612547327f,       -0.707106781f,   PAL_EPSILON },        // expected: -(1 / sqrtf(2))
-        {  0.618503138f,       -0.693147181f,   PAL_EPSILON },        // expected: -(ln(2))
-        {  0.643218242f,       -0.636619772f,   PAL_EPSILON },        // expected: -(2 / pi)
-        {  0.740055574f,       -0.434294482f,   PAL_EPSILON },        // expected: -(log10f(e))
-        {  0.802008879f,       -0.318309886f,   PAL_EPSILON },        // expected: -(1 / pi)
-        {  1,                   0,              PAL_EPSILON },
-        {  1.24686899f,         0.318309886f,   PAL_EPSILON },        // expected:  1 / pi
-        {  1.35124987f,         0.434294482f,   PAL_EPSILON },        // expected:  log10f(e)           value: e
-        {  1.55468228f,         0.636619772f,   PAL_EPSILON },        // expected:  2 / pi
-        {  1.61680667f,         0.693147181f,   PAL_EPSILON },        // expected:  ln(2)
-        {  1.63252692f,         0.707106781f,   PAL_EPSILON },        // expected:  1 / sqrtf(2)
-        {  1.72356793f,         0.785398163f,   PAL_EPSILON },        // expected:  pi / 4
-        {  2,                   1,              PAL_EPSILON * 10 },
-        {  2.18612996f,         1.12837917f,    PAL_EPSILON * 10 },   // expected:  2 / sqrtf(pi)
-        {  2.66514414f,         1.41421356f,    PAL_EPSILON * 10 },   // expected:  sqrtf(2)
-        {  2.71828183f,         1.44269504f,    PAL_EPSILON * 10 },   // expected:  logf2(e)
-        {  2.97068642f,         1.57079633f,    PAL_EPSILON * 10 },   // expected:  pi / 2
-        {  4.93340967f,         2.30258509f,    PAL_EPSILON * 10 },   // expected:  ln(10)
-        {  6.58088599f,         2.71828183f,    PAL_EPSILON * 10 },   // expected:  e
-        {  8.82497783f,         3.14159265f,    PAL_EPSILON * 10 },   // expected:  pi
-        {  PAL_POSINF,          PAL_POSINF,     0 },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        log2f_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-    }
-
-    log2f_test1_validate_isnan(PAL_NEGINF);
-    log2f_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/logf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/logf/test1/test1.cpp
deleted file mode 100644
index 7980ae1b28e9..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/logf/test1/test1.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests logf with a normal set of values.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * logf_test1_validate
- *
- * test validation function
- */
-void __cdecl logf_test1_validate(float value, float expected, float variance)
-{
-    float result = logf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("logf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * logf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl logf_test1_validate_isnan(float value)
-{
-    float result = logf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("logf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_logf_test1_paltest_logf_test1, "c_runtime/logf/test1/paltest_logf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value              expected        variance */
-        {  0,                 PAL_NEGINF,     0 },
-        {  0.0432139183f,    -3.14159265f,    PAL_EPSILON * 10 },   // expected: -(pi)
-        {  0.0659880358f,    -2.71828183f,    PAL_EPSILON * 10 },   // expected: -(e)
-        {  0.1f,             -2.30258509f,    PAL_EPSILON * 10 },   // expected: -(ln(10))
-        {  0.207879576f,     -1.57079633f,    PAL_EPSILON * 10 },   // expected: -(pi / 2)
-        {  0.236290088f,     -1.44269504f,    PAL_EPSILON * 10 },   // expected: -(logf2(e))
-        {  0.243116734f,     -1.41421356f,    PAL_EPSILON * 10 },   // expected: -(sqrtf(2))
-        {  0.323557264f,     -1.12837917f,    PAL_EPSILON * 10 },   // expected: -(2 / sqrtf(pi))
-        {  0.367879441f,     -1,              PAL_EPSILON * 10 },   // expected: -(1)
-        {  0.455938128f,     -0.785398163f,   PAL_EPSILON },        // expected: -(pi / 4)
-        {  0.493068691f,     -0.707106781f,   PAL_EPSILON },        // expected: -(1 / sqrtf(2))
-        {  0.5f,             -0.693147181f,   PAL_EPSILON },        // expected: -(ln(2))
-        {  0.529077808f,     -0.636619772f,   PAL_EPSILON },        // expected: -(2 / pi)
-        {  0.647721485f,     -0.434294482f,   PAL_EPSILON },        // expected: -(log10f(e))
-        {  0.727377349f,     -0.318309886f,   PAL_EPSILON },        // expected: -(1 / pi)
-        {  1,                 0,              PAL_EPSILON },
-        {  1.37480223f,       0.318309886f,   PAL_EPSILON },        // expected:  1 / pi
-        {  1.54387344f,       0.434294482f,   PAL_EPSILON },        // expected:  log10f(e)
-        {  1.89008116f,       0.636619772f,   PAL_EPSILON },        // expected:  2 / pi
-        {  2,                 0.693147181f,   PAL_EPSILON },        // expected:  ln(2)
-        {  2.02811498f,       0.707106781f,   PAL_EPSILON },        // expected:  1 / sqrtf(2)
-        {  2.19328005f,       0.785398163f,   PAL_EPSILON },        // expected:  pi / 4
-        {  2.71828183f,       1,              PAL_EPSILON * 10 },   //                               value: e
-        {  3.09064302f,       1.12837917f,    PAL_EPSILON * 10 },   // expected:  2 / sqrtf(pi)
-        {  4.11325038f,       1.41421356f,    PAL_EPSILON * 10 },   // expected:  sqrtf(2)
-        {  4.23208611f,       1.44269504f,    PAL_EPSILON * 10 },   // expected:  logf2(e)
-        {  4.81047738f,       1.57079633f,    PAL_EPSILON * 10 },   // expected:  pi / 2
-        {  10,                2.30258509f,    PAL_EPSILON * 10 },   // expected:  ln(10)
-        {  15.1542622f,       2.71828183f,    PAL_EPSILON * 10 },   // expected:  e
-        {  23.1406926f,       3.14159265f,    PAL_EPSILON * 10 },   // expected:  pi
-        {  PAL_POSINF,        PAL_POSINF,     0 },
-    };
-
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        logf_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-    }
-    
-    logf_test1_validate_isnan(PAL_NEGINF);
-    logf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/malloc/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/malloc/test1/test1.cpp
deleted file mode 100644
index 067791fe866d..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/malloc/test1/test1.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Test that malloc returns usable memory
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-
-PALTEST(c_runtime_malloc_test1_paltest_malloc_test1, "c_runtime/malloc/test1/paltest_malloc_test1")
-{
-
-    char *testA;
-    int i;
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-    /* check that malloc really gives us addressable memory */
-    testA = (char *)malloc(20 * sizeof(char));
-    if (testA == NULL)
-    {
-        Fail("Call to malloc failed.\n");
-    }
-    for (i = 0; i < 20; i++)
-    {
-        testA[i] = 'a';
-    }
-    for (i = 0; i < 20; i++)
-    {
-        if (testA[i] != 'a')
-        {
-            Fail("The memory doesn't seem to be properly allocated.\n");
-        }
-    }
-    free(testA);
-
-    PAL_Terminate();
-
-    return PASS;
-}
-
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/malloc/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/malloc/test2/test2.cpp
deleted file mode 100644
index 9f94f1050d6a..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/malloc/test2/test2.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test2.c
-**
-** Purpose: Test that malloc(0) returns non-zero value
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-
-PALTEST(c_runtime_malloc_test2_paltest_malloc_test2, "c_runtime/malloc/test2/paltest_malloc_test2")
-{
-
-    char *testA;
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-    /* check that malloc(0) returns non-zero value */
-    testA = (char *)malloc(0);
-    if (testA == NULL)
-    {
-        Fail("Call to malloc(0) failed.\n");
-    }
-
-    free(testA);
-
-    PAL_Terminate();
-
-    return PASS;
-}
-
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/modf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/modf/test1/test1.cpp
deleted file mode 100644
index 726e524d0a4e..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/modf/test1/test1.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c (modf)
-**
-** Purpose: Test to ensure that modf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;             /* value to test the function with */
-    double expected;          /* expected result */   
-    double variance;          /* maximum delta between the expected and actual result */
-    double expected_intpart;  /* expected result */
-    double variance_intpart;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * modf_test1_validate
- *
- * test validation function
- */
-void __cdecl modf_test1_validate(double value, double expected, double variance, double expected_intpart, double variance_intpart)
-{
-    double result_intpart;
-    double result = modf(value, &result_intpart);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-    double delta_intpart = fabs(result_intpart - expected_intpart);
-
-    if ((delta > variance) || (delta_intpart > variance_intpart))
-    {
-        Fail("modf(%g) returned %20.17g with an intpart of %20.17g when it should have returned %20.17g with an intpart of %20.17g",
-             value, result, result_intpart, expected, expected_intpart);
-    }
-}
-
-/**
- * modf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl modf_test1_validate_isnan(double value)
-{
-    double result_intpart;
-    double result = modf(value, &result_intpart);
-
-    if (!_isnan(result) || !_isnan(result_intpart))
-    {
-        Fail("modf(%g) returned %20.17g with an intpart of %20.17g when it should have returned %20.17g with an intpart of %20.17g",
-             value, result, result_intpart, PAL_NAN, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_modf_test1_paltest_modf_test1, "c_runtime/modf/test1/paltest_modf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance         expected_intpart     variance_intpart */
-        {  0,                      0,                      PAL_EPSILON,         0,                   PAL_EPSILON },
-        {  0.31830988618379067,    0.31830988618379067,    PAL_EPSILON,         0,                   PAL_EPSILON },       // value:  1 / pi
-        {  0.43429448190325183,    0.43429448190325183,    PAL_EPSILON,         0,                   PAL_EPSILON },       // value:  log10(e)
-        {  0.63661977236758134,    0.63661977236758134,    PAL_EPSILON,         0,                   PAL_EPSILON },       // value:  2 / pi
-        {  0.69314718055994531,    0.69314718055994531,    PAL_EPSILON,         0,                   PAL_EPSILON },       // value:  ln(2)
-        {  0.70710678118654752,    0.70710678118654752,    PAL_EPSILON,         0,                   PAL_EPSILON },       // value:  1 / sqrt(2)
-        {  0.78539816339744831,    0.78539816339744831,    PAL_EPSILON,         0,                   PAL_EPSILON },       // value:  pi / 4
-        {  1,                      0,                      PAL_EPSILON,         1,                   PAL_EPSILON * 10 },
-        {  1.1283791670955126,     0.1283791670955126,     PAL_EPSILON,         1,                   PAL_EPSILON * 10 },  // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     0.4142135623730950,     PAL_EPSILON,         1,                   PAL_EPSILON * 10 },  // value:  sqrt(2)
-        {  1.4426950408889634,     0.4426950408889634,     PAL_EPSILON,         1,                   PAL_EPSILON * 10 },  // value:  log2(e)
-        {  1.5707963267948966,     0.5707963267948966,     PAL_EPSILON,         1,                   PAL_EPSILON * 10 },  // value:  pi / 2
-        {  2.3025850929940457,     0.3025850929940457,     PAL_EPSILON,         2,                   PAL_EPSILON * 10 },  // value:  ln(10)
-        {  2.7182818284590452,     0.7182818284590452,     PAL_EPSILON,         2,                   PAL_EPSILON * 10 },  // value:  e
-        {  3.1415926535897932,     0.1415926535897932,     PAL_EPSILON,         3,                   PAL_EPSILON * 10 },  // value:  pi
-        {  PAL_POSINF,             0,                      PAL_EPSILON,         PAL_POSINF,          0 }
-        
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        modf_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance,  tests[i].expected_intpart, tests[i].variance_intpart);
-        modf_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance, -tests[i].expected_intpart, tests[i].variance_intpart);
-    }
-
-    modf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/modff/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/modff/test1/test1.cpp
deleted file mode 100644
index 898566b27398..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/modff/test1/test1.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c (modf)
-**
-** Purpose: Test to ensure that modf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;             /* value to test the function with */
-    float expected;          /* expected result */   
-    float variance;          /* maximum delta between the expected and actual result */
-    float expected_intpart;  /* expected result */
-    float variance_intpart;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * modff_test1_validate
- *
- * test validation function
- */
-void __cdecl modff_test1_validate(float value, float expected, float variance, float expected_intpart, float variance_intpart)
-{
-    float result_intpart;
-    float result = modff(value, &result_intpart);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-    float delta_intpart = fabsf(result_intpart - expected_intpart);
-
-    if ((delta > variance) || (delta_intpart > variance_intpart))
-    {
-        Fail("modff(%g) returned %10.9g with an intpart of %10.9g when it should have returned %10.9g with an intpart of %10.9g",
-             value, result, result_intpart, expected, expected_intpart);
-    }
-}
-
-/**
- * modff_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl modff_test1_validate_isnan(float value)
-{
-    float result_intpart;
-    float result = modff(value, &result_intpart);
-
-    if (!_isnan(result) || !_isnan(result_intpart))
-    {
-        Fail("modff(%g) returned %10.9g with an intpart of %10.9g when it should have returned %10.9g with an intpart of %10.9g",
-             value, result, result_intpart, PAL_NAN, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_modff_test1_paltest_modff_test1, "c_runtime/modff/test1/paltest_modff_test1")
-{
-    struct test tests[] = 
-    {
-        /* value              expected         variance    expected_intpart    variance_intpart */
-        {  0,                 0,               PAL_EPSILON,    0,                  PAL_EPSILON },
-        {  0.318309886f,      0.318309886f,    PAL_EPSILON,    0,                  PAL_EPSILON },       // value:  1 / pi
-        {  0.434294482f,      0.434294482f,    PAL_EPSILON,    0,                  PAL_EPSILON },       // value:  log10(e)
-        {  0.636619772f,      0.636619772f,    PAL_EPSILON,    0,                  PAL_EPSILON },       // value:  2 / pi
-        {  0.693147181f,      0.693147181f,    PAL_EPSILON,    0,                  PAL_EPSILON },       // value:  ln(2)
-        {  0.707106781f,      0.707106781f,    PAL_EPSILON,    0,                  PAL_EPSILON },       // value:  1 / sqrt(2)
-        {  0.785398163f,      0.785398163f,    PAL_EPSILON,    0,                  PAL_EPSILON },       // value:  pi / 4
-        {  1,                 0,               PAL_EPSILON,    1,                  PAL_EPSILON * 10 },
-        {  1.12837917f,       0.128379167f,    PAL_EPSILON,    1,                  PAL_EPSILON * 10 },  // value:  2 / sqrt(pi)
-        {  1.41421356f,       0.414213562f,    PAL_EPSILON,    1,                  PAL_EPSILON * 10 },  // value:  sqrt(2)
-        {  1.44269504f,       0.442695041f,    PAL_EPSILON,    1,                  PAL_EPSILON * 10 },  // value:  log2(e)
-        {  1.57079633f,       0.570796327f,    PAL_EPSILON,    1,                  PAL_EPSILON * 10 },  // value:  pi / 2
-        {  2.30258509f,       0.302585093f,    PAL_EPSILON,    2,                  PAL_EPSILON * 10 },  // value:  ln(10)
-        {  2.71828183f,       0.718281828f,    PAL_EPSILON,    2,                  PAL_EPSILON * 10 },  // value:  e
-        {  3.14159265f,       0.141592654f,    PAL_EPSILON,    3,                  PAL_EPSILON * 10 },  // value:  pi
-        {  PAL_POSINF,        0,               PAL_EPSILON,    PAL_POSINF,         0 }
-        
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        modff_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance,  tests[i].expected_intpart, tests[i].variance_intpart);
-        modff_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance, -tests[i].expected_intpart, tests[i].variance_intpart);
-    }
-
-    modff_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/pow/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/pow/test1/test1.cpp
deleted file mode 100644
index 22f482868b3d..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/pow/test1/test1.cpp
+++ /dev/null
@@ -1,229 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests that atan2 returns correct values for a subset of values.
-**          Tests with positive and negative values of x and y to ensure
-**          atan2 is returning results from the correct quadrant.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double x;         /* first component of the value to test the function with */
-    double y;         /* second component of the value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * pow_test1_validate
- *
- * test validation function
- */
-void __cdecl pow_test1_validate(double x, double y, double expected, double variance)
-{
-    double result = pow(x, y);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("pow(%g, %g) returned %20.17g when it should have returned %20.17g",
-             x, y, result, expected);
-    }
-}
-
-/**
- * pow_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl pow_test1_validate_isnan(double x, double y)
-{
-    double result = pow(x, y);
-
-    if (!_isnan(result))
-    {
-        Fail("pow(%g, %g) returned %20.17g when it should have returned %20.17g",
-             x, y, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_pow_test1_paltest_pow_test1, "c_runtime/pow/test1/paltest_pow_test1")
-{
-    struct test tests[] = 
-    {
-        /* x                       y                       expected                   variance */
-        {  PAL_NEGINF,             PAL_NEGINF,             0,                         PAL_EPSILON },
-        {  PAL_NEGINF,             PAL_POSINF,             PAL_POSINF,                0 },
-
-        { -10,                     PAL_NEGINF,             0,                         PAL_EPSILON },
-        { -10,                    -1,                     -0.1,                       PAL_EPSILON },
-        { -10,                     0,                      1,                         PAL_EPSILON * 10 },
-        { -10,                     1,                     -10,                        PAL_EPSILON * 100 },
-        { -10,                     PAL_POSINF,             PAL_POSINF,                0 },
-
-        { -2.7182818284590452,     PAL_NEGINF,             0,                         PAL_EPSILON },          // x: -(e)
-        { -2.7182818284590452,    -1,                     -0.36787944117144232,       PAL_EPSILON },          // x: -(e)
-        { -2.7182818284590452,     0,                      1,                         PAL_EPSILON * 10 },     // x: -(e)
-        { -2.7182818284590452,     1,                     -2.7182818284590452,        PAL_EPSILON * 10 },     // x: -(e)                       expected: e
-        { -2.7182818284590452,     PAL_POSINF,             PAL_POSINF,                0 },                    // x: -(e)
-
-        { -1.0,                    PAL_NEGINF,             1.0,                       PAL_EPSILON * 10 },
-        { -1.0,                    PAL_POSINF,             1.0,                       PAL_EPSILON * 10 },
-
-        { -0.0,                    PAL_NEGINF,             PAL_POSINF,                0 },
-        { -0.0,                   -1,                      PAL_NEGINF,                0 },
-        { -0.0,                   -0.0,                    1,                         PAL_EPSILON * 10 },
-        { -0.0,                    0,                      1,                         PAL_EPSILON * 10 },
-        { -0.0,                    1,                     -0.0,                       PAL_EPSILON },
-        { -0.0,                    PAL_POSINF,             0,                         PAL_EPSILON },
-
-        {  PAL_NAN,               -0.0,                    1.0,                       PAL_EPSILON * 10 },
-        {  PAL_NAN,                0,                      1.0,                       PAL_EPSILON * 10 },
-
-        {  0.0,                    PAL_NEGINF,             PAL_POSINF,                0 },
-        {  0.0,                   -1,                      PAL_POSINF,                0 },
-        {  0,                     -0.0,                    1,                         PAL_EPSILON * 10 },
-        {  0,                      0,                      1,                         PAL_EPSILON * 10 },
-        {  0.0,                    1,                      0,                         PAL_EPSILON },
-        {  0.0,                    PAL_POSINF,             0,                         PAL_EPSILON },
-
-        {  1,                      PAL_NEGINF,             1,                         PAL_EPSILON * 10 },
-        {  1,                      PAL_POSINF,             1,                         PAL_EPSILON * 10 },
-
-        {  2.7182818284590452,     PAL_NEGINF,             0,                         PAL_EPSILON },
-        {  2.7182818284590452,    -3.1415926535897932,     0.043213918263772250,      PAL_EPSILON / 10 },     // x: e     y: -(pi)
-        {  2.7182818284590452,    -2.7182818284590452,     0.065988035845312537,      PAL_EPSILON / 10 },     // x: e     y: -(e)
-        {  2.7182818284590452,    -2.3025850929940457,     0.1,                       PAL_EPSILON },          // x: e     y: -(ln(10))
-        {  2.7182818284590452,    -1.5707963267948966,     0.20787957635076191,       PAL_EPSILON },          // x: e     y: -(pi / 2)
-        {  2.7182818284590452,    -1.4426950408889634,     0.23629008834452270,       PAL_EPSILON },          // x: e     y: -(log2(e))
-        {  2.7182818284590452,    -1.4142135623730950,     0.24311673443421421,       PAL_EPSILON },          // x: e     y: -(sqrt(2))
-        {  2.7182818284590452,    -1.1283791670955126,     0.32355726390307110,       PAL_EPSILON },          // x: e     y: -(2 / sqrt(pi))
-        {  2.7182818284590452,    -1,                      0.36787944117144232,       PAL_EPSILON },          // x: e     y: -(1)
-        {  2.7182818284590452,    -0.78539816339744831,    0.45593812776599624,       PAL_EPSILON },          // x: e     y: -(pi / 4)
-        {  2.7182818284590452,    -0.70710678118654752,    0.49306869139523979,       PAL_EPSILON },          // x: e     y: -(1 / sqrt(2))
-        {  2.7182818284590452,    -0.69314718055994531,    0.5,                       PAL_EPSILON },          // x: e     y: -(ln(2))
-        {  2.7182818284590452,    -0.63661977236758134,    0.52907780826773535,       PAL_EPSILON },          // x: e     y: -(2 / pi)
-        {  2.7182818284590452,    -0.43429448190325183,    0.64772148514180065,       PAL_EPSILON },          // x: e     y: -(log10(e))
-        {  2.7182818284590452,    -0.31830988618379067,    0.72737734929521647,       PAL_EPSILON },          // x: e     y: -(1 / pi)
-        {  2.7182818284590452,     0,                      1,                         PAL_EPSILON * 10 },     // x: e
-        {  2.7182818284590452,     0.31830988618379067,    1.3748022274393586,        PAL_EPSILON * 10 },     // x: e     y:  1 / pi
-        {  2.7182818284590452,     0.43429448190325183,    1.5438734439711811,        PAL_EPSILON * 10 },     // x: e     y:  log10(e)
-        {  2.7182818284590452,     0.63661977236758134,    1.8900811645722220,        PAL_EPSILON * 10 },     // x: e     y:  2 / pi
-        {  2.7182818284590452,     0.69314718055994531,    2,                         PAL_EPSILON * 10 },     // x: e     y:  ln(2)
-        {  2.7182818284590452,     0.70710678118654752,    2.0281149816474725,        PAL_EPSILON * 10 },     // x: e     y:  1 / sqrt(2)
-        {  2.7182818284590452,     0.78539816339744831,    2.1932800507380155,        PAL_EPSILON * 10 },     // x: e     y:  pi / 4
-        {  2.7182818284590452,     1,                      2.7182818284590452,        PAL_EPSILON * 10 },     // x: e                         expected: e
-        {  2.7182818284590452,     1.1283791670955126,     3.0906430223107976,        PAL_EPSILON * 10 },     // x: e     y:  2 / sqrt(pi)
-        {  2.7182818284590452,     1.4142135623730950,     4.1132503787829275,        PAL_EPSILON * 10 },     // x: e     y:  sqrt(2)
-        {  2.7182818284590452,     1.4426950408889634,     4.2320861065570819,        PAL_EPSILON * 10 },     // x: e     y:  log2(e)
-        {  2.7182818284590452,     1.5707963267948966,     4.8104773809653517,        PAL_EPSILON * 10 },     // x: e     y:  pi / 2
-        {  2.7182818284590452,     2.3025850929940457,     10,                        PAL_EPSILON * 100 },    // x: e     y:  ln(10)
-        {  2.7182818284590452,     2.7182818284590452,     15.154262241479264,        PAL_EPSILON * 100 },    // x: e     y:  e
-        {  2.7182818284590452,     3.1415926535897932,     23.140692632779269,        PAL_EPSILON * 100 },    // x: e     y:  pi
-        {  2.7182818284590452,     PAL_POSINF,             PAL_POSINF,                0 },                    // x: e
-        
-        {  10,                     PAL_NEGINF,             0,                         0 },
-        {  10,                    -3.1415926535897932,     0.00072178415907472774,    PAL_EPSILON / 1000 },   //          y: -(pi)
-        {  10,                    -2.7182818284590452,     0.0019130141022243176,     PAL_EPSILON / 100 },    //          y: -(e)
-        {  10,                    -2.3025850929940457,     0.0049821282964407206,     PAL_EPSILON / 100 },    //          y: -(ln(10))
-        {  10,                    -1.5707963267948966,     0.026866041001136132,      PAL_EPSILON / 10 },     //          y: -(pi / 2)
-        {  10,                    -1.4426950408889634,     0.036083192820787210,      PAL_EPSILON / 10 },     //          y: -(log2(e))
-        {  10,                    -1.4142135623730950,     0.038528884700322026,      PAL_EPSILON / 10 },     //          y: -(sqrt(2))
-        {  10,                    -1.1283791670955126,     0.074408205860642723,      PAL_EPSILON / 10 },     //          y: -(2 / sqrt(pi))
-        {  10,                    -1,                      0.1,                       PAL_EPSILON },          //          y: -(1)
-        {  10,                    -0.78539816339744831,    0.16390863613957665,       PAL_EPSILON },          //          y: -(pi / 4)
-        {  10,                    -0.70710678118654752,    0.19628775993505562,       PAL_EPSILON },          //          y: -(1 / sqrt(2))
-        {  10,                    -0.69314718055994531,    0.20269956628651730,       PAL_EPSILON },          //          y: -(ln(2))
-        {  10,                    -0.63661977236758134,    0.23087676451600055,       PAL_EPSILON },          //          y: -(2 / pi)
-        {  10,                    -0.43429448190325183,    0.36787944117144232,       PAL_EPSILON },          //          y: -(log10(e))
-        {  10,                    -0.31830988618379067,    0.48049637305186868,       PAL_EPSILON },          //          y: -(1 / pi)
-        {  10,                     0,                      1,                         PAL_EPSILON * 10 },
-        {  10,                     0.31830988618379067,    2.0811811619898573,        PAL_EPSILON * 10 },     //          y:  1 / pi
-        {  10,                     0.43429448190325183,    2.7182818284590452,        PAL_EPSILON * 10 },     //          y:  log10(e)           expected: e
-        {  10,                     0.63661977236758134,    4.3313150290214525,        PAL_EPSILON * 10 },     //          y:  2 / pi
-        {  10,                     0.69314718055994531,    4.9334096679145963,        PAL_EPSILON * 10 },     //          y:  ln(2)
-        {  10,                     0.70710678118654752,    5.0945611704512962,        PAL_EPSILON * 10 },     //          y:  1 / sqrt(2)
-        {  10,                     0.78539816339744831,    6.1009598002416937,        PAL_EPSILON * 10 },     //          y:  pi / 4
-        {  10,                     1,                      10,                        PAL_EPSILON * 100 },
-        {  10,                     1.1283791670955126,     13.439377934644400,        PAL_EPSILON * 100 },    //          y:  2 / sqrt(pi)
-        {  10,                     1.4142135623730950,     25.954553519470081,        PAL_EPSILON * 100 },    //          y:  sqrt(2)
-        {  10,                     1.4426950408889634,     27.713733786437790,        PAL_EPSILON * 100 },    //          y:  log2(e)
-        {  10,                     1.5707963267948966,     37.221710484165167,        PAL_EPSILON * 100 },    //          y:  pi / 2
-        {  10,                     2.3025850929940457,     200.71743249053009,        PAL_EPSILON * 1000 },   //          y:  ln(10)
-        {  10,                     2.7182818284590452,     522.73529967043665,        PAL_EPSILON * 1000 },   //          y:  e
-        {  10,                     3.1415926535897932,     1385.4557313670111,        PAL_EPSILON * 10000 },  //          y:  pi
-        {  10,                     PAL_POSINF,             PAL_POSINF,                0 },
-        
-        {  PAL_POSINF,             PAL_NEGINF,             0,                         PAL_EPSILON },
-        {  PAL_POSINF,             PAL_POSINF,             PAL_POSINF,                0 },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        pow_test1_validate(tests[i].x, tests[i].y, tests[i].expected, tests[i].variance);
-    }
-
-    pow_test1_validate_isnan(-10, -1.5707963267948966);                                                                 //          y: -(pi / 2)
-    pow_test1_validate_isnan(-10, -0.78539816339744828);                                                                //          y: -(pi / 4)
-    pow_test1_validate_isnan(-10,  0.78539816339744828);                                                                //          y:   pi / 4
-    pow_test1_validate_isnan(-10,  1.5707963267948966);                                                                 //          y:   pi / 2
-    
-    pow_test1_validate_isnan(-2.7182818284590452, -1.5707963267948966);                                                 // x: -(e)  y: -(pi / 2)
-    pow_test1_validate_isnan(-2.7182818284590452, -0.78539816339744828);                                                // x: -(e)  y: -(pi / 4)
-    pow_test1_validate_isnan(-2.7182818284590452,  0.78539816339744828);                                                // x: -(e)  y:   pi / 4
-    pow_test1_validate_isnan(-2.7182818284590452,  1.5707963267948966);                                                 // x: -(e)  y:   pi / 2
-
-    pow_test1_validate_isnan(PAL_NEGINF, PAL_NAN);
-    pow_test1_validate_isnan(PAL_NAN,    PAL_NEGINF);
-    
-    pow_test1_validate_isnan(PAL_POSINF, PAL_NAN);
-    pow_test1_validate_isnan(PAL_NAN,    PAL_POSINF);
-    
-    pow_test1_validate_isnan(PAL_NAN, PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/powf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/powf/test1/test1.cpp
deleted file mode 100644
index 5e704f2d59b6..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/powf/test1/test1.cpp
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Tests that atan2f returns correct values for a subset of values.
-**          Tests with positive and negative values of x and y to ensure
-**          atan2f is returning results from the correct quadrant.
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float x;         /* first component of the value to test the function with */
-    float y;         /* second component of the value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * powf_test1_validate
- *
- * test validation function
- */
-void __cdecl powf_test1_validate(float x, float y, float expected, float variance)
-{
-    float result = powf(x, y);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("powf(%g, %g) returned %10.9g when it should have returned %10.9g",
-             x, y, result, expected);
-    }
-}
-
-/**
- * powf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl powf_test1_validate_isnan(float x, float y)
-{
-    float result = powf(x, y);
-
-    if (!_isnanf(result))
-    {
-        Fail("powf(%g, %g) returned %10.9g when it should have returned %10.9g",
-             x, y, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_powf_test1_paltest_powf_test1, "c_runtime/powf/test1/paltest_powf_test1")
-{
-    struct test tests[] = 
-    {
-        /* x                y                expected            variance */
-        {  PAL_NEGINF,      PAL_NEGINF,      0,                  PAL_EPSILON },
-        {  PAL_NEGINF,      PAL_POSINF,      PAL_POSINF,         0 },
-
-        { -10,              PAL_NEGINF,      0,                  PAL_EPSILON },
-        { -10,             -1,              -0.1f,               PAL_EPSILON },
-        { -10,              0,               1,                  PAL_EPSILON * 10 },
-        { -10,              1,              -10,                 PAL_EPSILON * 100 },
-        { -10,              PAL_POSINF,      PAL_POSINF,         0 },
-
-        { -2.71828183f,     PAL_NEGINF,      0,                  PAL_EPSILON },          // x: -(e)
-        { -2.71828183f,    -1,              -0.367879441f,       PAL_EPSILON },          // x: -(e)
-        { -2.71828183f,     0,               1,                  PAL_EPSILON * 10 },     // x: -(e)
-        { -2.71828183f,     1,              -2.71828183f,        PAL_EPSILON * 10 },     // x: -(e)                       expected: e
-        { -2.71828183f,     PAL_POSINF,      PAL_POSINF,         0 },                    // x: -(e)
-
-        { -1.0,             PAL_NEGINF,      1.0,                PAL_EPSILON * 10 },
-        { -1.0,             PAL_POSINF,      1.0,                PAL_EPSILON * 10 },
-
-        { -0.0,             PAL_NEGINF,      PAL_POSINF,         0 },
-        { -0.0,            -1,               PAL_NEGINF,         0 },
-        { -0.0f,           -0.0f,            1,                  PAL_EPSILON * 10 },
-        { -0.0f,            0,               1,                  PAL_EPSILON * 10 },
-        { -0.0,             1,              -0.0,                PAL_EPSILON },
-        { -0.0,             PAL_POSINF,      0,                  PAL_EPSILON },
-
-        {  PAL_NAN,        -0.0,             1.0,                PAL_EPSILON * 10 },
-        {  PAL_NAN,         0,               1.0,                PAL_EPSILON * 10 },
-
-        {  0.0,             PAL_NEGINF,      PAL_POSINF,         0 },
-        {  0.0,            -1,               PAL_POSINF,         0 },
-        {  0,              -0.0f,            1,                  PAL_EPSILON * 10 },
-        {  0,               0,               1,                  PAL_EPSILON * 10 },
-        {  0.0,             1,               0,                  PAL_EPSILON },
-        {  0.0,             PAL_POSINF,      0,                  PAL_EPSILON },
-
-        {  1,               PAL_NEGINF,      1,                  PAL_EPSILON * 10 },
-        {  1,               PAL_POSINF,      1,                  PAL_EPSILON * 10 },
-
-        {  2.71828183f,     PAL_NEGINF,      0,                  PAL_EPSILON },
-        {  2.71828183f,    -3.14159265f,     0.0432139183f,      PAL_EPSILON / 10 },     // x: e     y: -(pi)
-        {  2.71828183f,    -2.71828183f,     0.0659880358f,      PAL_EPSILON / 10 },     // x: e     y: -(e)
-        {  2.71828183f,    -2.30258509f,     0.1f,               PAL_EPSILON },          // x: e     y: -(ln(10))
-        {  2.71828183f,    -1.57079633f,     0.207879576f,       PAL_EPSILON },          // x: e     y: -(pi / 2)
-        {  2.71828183f,    -1.44269504f,     0.236290088f,       PAL_EPSILON },          // x: e     y: -(logf2(e))
-        {  2.71828183f,    -1.41421356f,     0.243116734f,       PAL_EPSILON },          // x: e     y: -(sqrtf(2))
-        {  2.71828183f,    -1.12837917f,     0.323557264f,       PAL_EPSILON },          // x: e     y: -(2 / sqrtf(pi))
-        {  2.71828183f,    -1,               0.367879441f,       PAL_EPSILON },          // x: e     y: -(1)
-        {  2.71828183f,    -0.785398163f,    0.455938128f,       PAL_EPSILON },          // x: e     y: -(pi / 4)
-        {  2.71828183f,    -0.707106781f,    0.493068691f,       PAL_EPSILON },          // x: e     y: -(1 / sqrtf(2))
-        {  2.71828183f,    -0.693147181f,    0.5f,               PAL_EPSILON },          // x: e     y: -(ln(2))
-        {  2.71828183f,    -0.636619772f,    0.529077808f,       PAL_EPSILON },          // x: e     y: -(2 / pi)
-        {  2.71828183f,    -0.434294482f,    0.647721485f,       PAL_EPSILON },          // x: e     y: -(log10f(e))
-        {  2.71828183f,    -0.318309886f,    0.727377349f,       PAL_EPSILON },          // x: e     y: -(1 / pi)
-        {  2.71828183f,     0,               1,                  PAL_EPSILON * 10 },     // x: e
-        {  2.71828183f,     0.318309886f,    1.37480223f,        PAL_EPSILON * 10 },     // x: e     y:  1 / pi
-        {  2.71828183f,     0.434294482f,    1.54387344f,        PAL_EPSILON * 10 },     // x: e     y:  log10f(e)
-        {  2.71828183f,     0.636619772f,    1.89008116f,        PAL_EPSILON * 10 },     // x: e     y:  2 / pi
-        {  2.71828183f,     0.693147181f,    2,                  PAL_EPSILON * 10 },     // x: e     y:  ln(2)
-        {  2.71828183f,     0.707106781f,    2.02811498f,        PAL_EPSILON * 10 },     // x: e     y:  1 / sqrtf(2)
-        {  2.71828183f,     0.785398163f,    2.19328005f,        PAL_EPSILON * 10 },     // x: e     y:  pi / 4
-        {  2.71828183f,     1,               2.71828183f,        PAL_EPSILON * 10 },     // x: e                         expected: e
-        {  2.71828183f,     1.12837917f,     3.09064302f,        PAL_EPSILON * 10 },     // x: e     y:  2 / sqrtf(pi)
-        {  2.71828183f,     1.41421356f,     4.11325038f,        PAL_EPSILON * 10 },     // x: e     y:  sqrtf(2)
-        {  2.71828183f,     1.44269504f,     4.23208611f,        PAL_EPSILON * 10 },     // x: e     y:  logf2(e)
-        {  2.71828183f,     1.57079633f,     4.81047738f,        PAL_EPSILON * 10 },     // x: e     y:  pi / 2
-        {  2.71828183f,     2.30258509f,     10,                 PAL_EPSILON * 100 },    // x: e     y:  ln(10)
-        {  2.71828183f,     2.71828183f,     15.1542622f,        PAL_EPSILON * 100 },    // x: e     y:  e
-        {  2.71828183f,     3.14159265f,     23.1406926f,        PAL_EPSILON * 100 },    // x: e     y:  pi
-        {  2.71828183f,     PAL_POSINF,      PAL_POSINF,         0 },                    // x: e
-        
-        {  10,              PAL_NEGINF,      0,                  0 },
-        {  10,             -3.14159265f,     0.000721784159f,    PAL_EPSILON / 1000 },   //          y: -(pi)
-        {  10,             -2.71828183f,     0.00191301410f,     PAL_EPSILON / 100 },    //          y: -(e)
-        {  10,             -2.30258509f,     0.00498212830f,     PAL_EPSILON / 100 },    //          y: -(ln(10))
-        {  10,             -1.57079633f,     0.0268660410f,      PAL_EPSILON / 10 },     //          y: -(pi / 2)
-        {  10,             -1.44269504f,     0.0360831928f,      PAL_EPSILON / 10 },     //          y: -(logf2(e))
-        {  10,             -1.41421356f,     0.0385288847f,      PAL_EPSILON / 10 },     //          y: -(sqrtf(2))
-        {  10,             -1.12837917f,     0.0744082059f,      PAL_EPSILON / 10 },     //          y: -(2 / sqrtf(pi))
-        {  10,             -1,               0.1f,               PAL_EPSILON },          //          y: -(1)
-        {  10,             -0.785398163f,    0.163908636f,       PAL_EPSILON },          //          y: -(pi / 4)
-        {  10,             -0.707106781f,    0.196287760f,       PAL_EPSILON },          //          y: -(1 / sqrtf(2))
-        {  10,             -0.693147181f,    0.202699566f,       PAL_EPSILON },          //          y: -(ln(2))
-        {  10,             -0.636619772f,    0.230876765f,       PAL_EPSILON },          //          y: -(2 / pi)
-        {  10,             -0.434294482f,    0.367879441f,       PAL_EPSILON },          //          y: -(log10f(e))
-        {  10,             -0.318309886f,    0.480496373f,       PAL_EPSILON },          //          y: -(1 / pi)
-        {  10,              0,               1,                  PAL_EPSILON * 10 },
-        {  10,              0.318309886f,    2.08118116f,        PAL_EPSILON * 10 },     //          y:  1 / pi
-        {  10,              0.434294482f,    2.71828183f,        PAL_EPSILON * 10 },     //          y:  log10f(e)           expected: e
-        {  10,              0.636619772f,    4.33131503f,        PAL_EPSILON * 10 },     //          y:  2 / pi
-        {  10,              0.693147181f,    4.93340967f,        PAL_EPSILON * 10 },     //          y:  ln(2)
-        {  10,              0.707106781f,    5.09456117f,        PAL_EPSILON * 10 },     //          y:  1 / sqrtf(2)
-        {  10,              0.785398163f,    6.10095980f,        PAL_EPSILON * 10 },     //          y:  pi / 4
-        {  10,              1,               10,                 PAL_EPSILON * 100 },
-        {  10,              1.12837917f,     13.4393779f,        PAL_EPSILON * 100 },    //          y:  2 / sqrtf(pi)
-        {  10,              1.41421356f,     25.9545535f,        PAL_EPSILON * 100 },    //          y:  sqrtf(2)
-        {  10,              1.44269504f,     27.7137338f,        PAL_EPSILON * 100 },    //          y:  logf2(e)
-        {  10,              1.57079633f,     37.2217105f,        PAL_EPSILON * 100 },    //          y:  pi / 2
-        {  10,              2.30258509f,     200.717432f,        PAL_EPSILON * 1000 },   //          y:  ln(10)
-        {  10,              2.71828183f,     522.735300f,        PAL_EPSILON * 1000 },   //          y:  e
-        {  10,              3.14159265f,     1385.45573f,        PAL_EPSILON * 10000 },  //          y:  pi
-        {  10,              PAL_POSINF,      PAL_POSINF,         0 },
-        
-        {  PAL_POSINF,      PAL_NEGINF,      0,                  PAL_EPSILON },
-        {  PAL_POSINF,      PAL_POSINF,      PAL_POSINF,         0 },
-    };
-
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        powf_test1_validate(tests[i].x, tests[i].y, tests[i].expected, tests[i].variance);
-    }
-
-    powf_test1_validate_isnan(-10, -1.57079633f);                                                   //          y: -(pi / 2)
-    powf_test1_validate_isnan(-10, -0.785398163f);                                                  //          y: -(pi / 4)
-    powf_test1_validate_isnan(-10,  0.785398163f);                                                  //          y:   pi / 4
-    powf_test1_validate_isnan(-10,  1.57079633f);                                                   //          y:   pi / 2
-    
-    powf_test1_validate_isnan(-2.71828183f, -1.57079633f);                                          // x: -(e)  y: -(pi / 2)
-    powf_test1_validate_isnan(-2.71828183f, -0.785398163f);                                         // x: -(e)  y: -(pi / 4)
-    powf_test1_validate_isnan(-2.71828183f,  0.785398163f);                                         // x: -(e)  y:   pi / 4
-    powf_test1_validate_isnan(-2.71828183f,  1.57079633f);                                          // x: -(e)  y:   pi / 2
-
-    powf_test1_validate_isnan(PAL_NEGINF, PAL_NAN);
-    powf_test1_validate_isnan(PAL_NAN,    PAL_NEGINF);
-    
-    powf_test1_validate_isnan(PAL_POSINF, PAL_NAN);
-    powf_test1_validate_isnan(PAL_NAN,    PAL_POSINF);
-    
-    powf_test1_validate_isnan(PAL_NAN, PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/qsort/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/qsort/test1/test1.cpp
deleted file mode 100644
index 57b288a809ea..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/qsort/test1/test1.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Calls qsort to sort a buffer, and verifies that it has done
-**          the job correctly.
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-int __cdecl charcmp_qsort_test1(const void *pa, const void *pb)
-{
-    return memcmp(pa, pb, 1);
-}
-
-PALTEST(c_runtime_qsort_test1_paltest_qsort_test1, "c_runtime/qsort/test1/paltest_qsort_test1")
-{
-    char before[] = "cgaiehdbjf";
-    const char after[] = "abcdefghij";
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-
-    qsort(before, sizeof(before) - 1, sizeof(char), charcmp_qsort_test1);
-  
-    if (memcmp(before, after, sizeof(before)) != 0)
-    {
-        Fail("qsort did not correctly sort an array of characters.\n");
-    }
-
-    PAL_Terminate();
-    return PASS;
-
-}
-
-
-
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/qsort/test2/test2.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/qsort/test2/test2.cpp
deleted file mode 100644
index 20d76c5677e3..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/qsort/test2/test2.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test2.c
-**
-** Purpose: Calls qsort to sort a buffer, and verifies that it has done
-**          the job correctly.
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-int __cdecl twocharcmp_qsort_test2(const void *pa, const void *pb)
-{
-    return memcmp(pa, pb, 2);
-}
-
-PALTEST(c_runtime_qsort_test2_paltest_qsort_test2, "c_runtime/qsort/test2/paltest_qsort_test2")
-{
-    char before[] = "ccggaaiieehhddbbjjff";
-    const char after[] = "aabbccddeeffgghhiijj";
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-
-    qsort(before, (sizeof(before) - 1) / 2, 2 * sizeof(char), twocharcmp_qsort_test2);
-  
-    if (memcmp(before, after, sizeof(before)) != 0)
-    {
-        Fail("qsort did not correctly sort an array of 2-character "
-             "buffers.\n");
-    }
-
-    PAL_Terminate();
-    return PASS;
-
-}
-
-
-
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/rand_srand/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/rand_srand/test1/test1.cpp
deleted file mode 100644
index cd752c39f539..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/rand_srand/test1/test1.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that srand provide random
-**          number to rand. Also make sure that rand result from a
-**          srand with seed 1 and no call to srand are the same.
-**
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**				 Fail
-**               srand()
-**
-
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-
-PALTEST(c_runtime_rand_srand_test1_paltest_rand_srand_test1, "c_runtime/rand_srand/test1/paltest_rand_srand_test1")
-{
-    int RandNumber[10];
-    int TempRandNumber;
-    int i;
-    int SRAND_SEED;
-    int SRAND_REINIT  =  1;
-
-    /*
-     * Initialize the PAL and return FAILURE if this fails
-     */
-
-    if (PAL_Initialize(argc, argv))
-    {
-       return FAIL;
-    }
-
-    SRAND_SEED = time(NULL);
-
-    /* does not initialize srand and call rand. */
-    for (i=0; i<10; i++)
-    {
-        /* keep the value in an array            */
-        RandNumber[i]=rand();
-        if (RandNumber[i] < 0 || RandNumber[i] > RAND_MAX)
-        {
-            Fail("1) ERROR: random generated an invalid value: %d", RandNumber[i]);
-        }
-    }
-
-
-    /*   initialize random generator            */
-    srand(SRAND_SEED);
-
-
-    /* choose 10 numbers with a different seed.
-       the numbers should be different than
-       those the previously generated one       */
-    for(i = 0; i < 10; i++)
-    {
-        TempRandNumber=rand();
-        if (TempRandNumber < 0 || TempRandNumber > RAND_MAX)
-        {
-            Fail("2) ERROR: random generated an invalid value: %d", TempRandNumber);
-        }
-    }
-
-
-
-    /* renitialize the srand with 1 */
-    srand(SRAND_REINIT);
-
-
-
-    /* choose 10 numbers with seed 1,
-       the number should be the same as those we kept in the array. */
-    for( i = 0;   i < 10;i++ )
-    {
-        /* pick the random number*/
-        TempRandNumber=rand();
-        /* test if it is the same number generated in the first sequences*/
-        if(RandNumber[i]!=TempRandNumber)
-        {
-            Fail ("ERROR: rand should return the same value when srand "
-                  "is initialized with 1 or not initialized at all");
-        }
-        if (TempRandNumber < 0 || TempRandNumber > RAND_MAX)
-        {
-            Fail("3) ERROR: random generated an invalid value: %d", TempRandNumber);
-        }
-    }
-
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/realloc/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/realloc/test1/test1.cpp
deleted file mode 100644
index edd075da23f7..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/realloc/test1/test1.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Uses realloc to allocate and realloate memory, checking
-**          that memory contents are copied when the memory is reallocated.
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_realloc_test1_paltest_realloc_test1, "c_runtime/realloc/test1/paltest_realloc_test1")
-{
-    char *testA;
-    const int len1 = 10;
-    const char str1[] = "aaaaaaaaaa";
-
-    const int len2 = 20;
-    const char str2[] = "bbbbbbbbbbbbbbbbbbbb";
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-    /* this should work like malloc */
-    testA = (char *)realloc(NULL, len1*sizeof(char));  
-    memcpy(testA, str1, len1);
-    if (testA == NULL)
-    {
-        Fail("We ran out of memory (unlikely), or realloc is broken.\n");
-    }
-
-    if (memcmp(testA, str1, len1) != 0)
-    { 
-        Fail("realloc doesn't properly allocate new memory.\n");
-    }
-  
-    testA = (char *)realloc(testA, len2*sizeof(char));  
-    if (memcmp(testA, str1, len1) != 0)
-    { 
-        Fail("realloc doesn't move the contents of the original memory "
-             "block to the newly allocated block.\n");
-    }
-
-    memcpy(testA, str2, len2);
-    if (memcmp(testA, str2, len2) != 0)
-    {
-        Fail("Couldn't write to memory allocated by realloc.\n");
-    }
-
-    /* free the buffer */
-    testA = (char*)realloc(testA, 0);
-    if (testA != NULL)
-    {
-        Fail("Realloc didn't return NULL when called with a length "
-             "of zero.\n");
-    }
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/sin/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/sin/test1/test1.cpp
deleted file mode 100644
index bcfeb4baa051..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/sin/test1/test1.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that sin return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * sin_test1_validate
- *
- * test validation function
- */
-void __cdecl sin_test1_validate(double value, double expected, double variance)
-{
-    double result = sin(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("sin(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * sin_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl sin_test1_validate_isnan(double value)
-{
-    double result = sin(value);
-
-    if (!_isnan(result))
-    {
-        Fail("sin(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_sin_test1_paltest_sin_test1, "c_runtime/sin/test1/paltest_sin_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance */
-        {  0,                      0,                      PAL_EPSILON },
-        {  0.31830988618379067,    0.31296179620778659,    PAL_EPSILON },       // value:  1 / pi
-        {  0.43429448190325183,    0.42077048331375735,    PAL_EPSILON },       // value:  log10(e)
-        {  0.63661977236758134,    0.59448076852482208,    PAL_EPSILON },       // value:  2 / pi
-        {  0.69314718055994531,    0.63896127631363480,    PAL_EPSILON },       // value:  ln(2)
-        {  0.70710678118654752,    0.64963693908006244,    PAL_EPSILON },       // value:  1 / sqrt(2)
-        {  0.78539816339744831,    0.70710678118654752,    PAL_EPSILON },       // value:  pi / 4,         expected: 1 / sqrt(2)
-        {  1,                      0.84147098480789651,    PAL_EPSILON },
-        {  1.1283791670955126,     0.90371945743584630,    PAL_EPSILON },       // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     0.98776594599273553,    PAL_EPSILON },       // value:  sqrt(2)
-        {  1.4426950408889634,     0.99180624439366372,    PAL_EPSILON },       // value:  log2(e)
-        {  1.5707963267948966,     1,                      PAL_EPSILON * 10 },  // value:  pi / 2
-        {  2.3025850929940457,     0.74398033695749319,    PAL_EPSILON },       // value:  ln(10)
-        {  2.7182818284590452,     0.41078129050290870,    PAL_EPSILON },       // value:  e
-        {  3.1415926535897932,     0,                      PAL_EPSILON },       // value:  pi
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        sin_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        sin_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-    
-    sin_test1_validate_isnan(PAL_NEGINF);
-    sin_test1_validate_isnan(PAL_NAN);
-    sin_test1_validate_isnan(PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/sincos/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/sincos/test1/test1.cpp
deleted file mode 100644
index e2734652a3e7..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/sincos/test1/test1.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that sincos return the correct values
-**
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;         /* value to test the function with */
-    double expected_sin;  /* expected sin result */
-    double expected_cos;  /* expected cos result */
-    double variance_sin;  /* maximum delta between the expected and actual sin result */
-    double variance_cos;  /* maximum delta between the expected and actual cos result */
-};
-
-/**
- * sincos_test1_validate
- *
- * test validation function
- */
-void __cdecl sincos_test1_validate(double value, double expected_sin, double expected_cos, double variance_sin, double variance_cos)
-{
-    double result_sin, result_cos;
-    sincos(value, &result_sin, &result_cos);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta_sin = fabs(result_sin - expected_sin);
-    double delta_cos = fabs(result_cos - expected_cos);
-
-    if ((delta_sin > variance_sin) || (delta_cos > variance_cos))
-    {
-        Fail("sincos(%g) returned (%20.17g, %20.17g) when it should have returned (%20.17g, %20.17g)",
-             value, result_sin, result_cos, expected_sin, expected_cos);
-    }
-}
-
-/**
- * sincos_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl sincos_test1_validate_isnan(double value)
-{
-    double result_sin, result_cos;
-    sincos(value, &result_sin, &result_cos);
-
-    if (!_isnan(result_sin) || !_isnan(result_cos))
-    {
-        Fail("sincos(%g) returned (%20.17g, %20.17g) when it should have returned (%20.17g, %20.17g)",
-             value, result_sin, result_cos, PAL_NAN, PAL_NAN);
-    }
-}
-
-/**
- * main
- *
- * executable entry point
- */
-PALTEST(c_runtime_sincos_test1_paltest_sincos_test1, "c_runtime/sincos/test1/paltest_sincos_test1")
-{
-    struct test tests[] =
-    {
-        /* value                   expected_sin             expected_cos            variance_sin         variance_cos */
-        {  0,                      0,                       1,                      PAL_EPSILON,         PAL_EPSILON * 10 },
-        {  0.31830988618379067,    0.31296179620778659,     0.94976571538163866,    PAL_EPSILON,         PAL_EPSILON },       // value:  1 / pi
-        {  0.43429448190325183,    0.42077048331375735,     0.90716712923909839,    PAL_EPSILON,         PAL_EPSILON },       // value:  log10(e)
-        {  0.63661977236758134,    0.59448076852482208,     0.80410982822879171,    PAL_EPSILON,         PAL_EPSILON },       // value:  2 / pi
-        {  0.69314718055994531,    0.63896127631363480,     0.76923890136397213,    PAL_EPSILON,         PAL_EPSILON },       // value:  ln(2)
-        {  0.70710678118654752,    0.64963693908006244,     0.76024459707563015,    PAL_EPSILON,         PAL_EPSILON },       // value:  1 / sqrt(2)
-        {  0.78539816339744831,    0.70710678118654752,     0.70710678118654752,    PAL_EPSILON,         PAL_EPSILON },       // value:  pi / 4,         expected_sin: 1 / sqrtf(2),    expected_cos:  1
-        {  1,                      0.84147098480789651,     0.54030230586813972,    PAL_EPSILON,         PAL_EPSILON },
-        {  1.1283791670955126,     0.90371945743584630,     0.42812514788535792,    PAL_EPSILON,         PAL_EPSILON },       // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     0.98776594599273553,     0.15594369476537447,    PAL_EPSILON,         PAL_EPSILON },       // value:  sqrt(2)
-        {  1.4426950408889634,     0.99180624439366372,     0.12775121753523991,    PAL_EPSILON,         PAL_EPSILON },       // value:  log2(e)
-        {  1.5707963267948966,     1,                       0,                      PAL_EPSILON * 10,    PAL_EPSILON },       // value:  pi / 2
-        {  2.3025850929940457,     0.74398033695749319,    -0.66820151019031295,    PAL_EPSILON,         PAL_EPSILON },       // value:  ln(10)
-        {  2.7182818284590452,     0.41078129050290870,    -0.91173391478696510,    PAL_EPSILON,         PAL_EPSILON },       // value:  e
-        {  3.1415926535897932,     0,                      -1,                      PAL_EPSILON,         PAL_EPSILON * 10 },  // value:  pi
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        sincos_test1_validate( tests[i].value,  tests[i].expected_sin, tests[i].expected_cos, tests[i].variance_sin, tests[i].variance_cos);
-        sincos_test1_validate(-tests[i].value, -tests[i].expected_sin, tests[i].expected_cos, tests[i].variance_sin, tests[i].variance_cos);
-    }
-
-    sincos_test1_validate_isnan(PAL_NEGINF);
-    sincos_test1_validate_isnan(PAL_NAN);
-    sincos_test1_validate_isnan(PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/sincosf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/sincosf/test1/test1.cpp
deleted file mode 100644
index 7332309e3d48..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/sincosf/test1/test1.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that sincosf return the correct values
-**
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;         /* value to test the function with */
-    float expected_sin;  /* expected sin result */
-    float expected_cos;  /* expected cos result */
-    float variance_sin;  /* maximum delta between the expected and actual sin result */
-    float variance_cos;  /* maximum delta between the expected and actual cos result */
-};
-
-/**
- * sincosf_test1_validate
- *
- * test validation function
- */
-void __cdecl sincosf_test1_validate(float value, float expected_sin, float expected_cos, float variance_sin, float variance_cos)
-{
-    float result_sin, result_cos;
-    sincosf(value, &result_sin, &result_cos);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta_sin = fabsf(result_sin - expected_sin);
-    float delta_cos = fabsf(result_cos - expected_cos);
-
-    if ((delta_sin > variance_sin) || (delta_cos > variance_cos))
-    {
-        Fail("sincosf(%g) returned (%10.9g, %10.9g) when it should have returned (%10.9g, %10.9g)",
-             value, result_sin, result_cos, expected_sin, expected_cos);
-    }
-}
-
-/**
- * sincosf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl sincosf_test1_validate_isnan(float value)
-{
-    float result_sin, result_cos;
-    sincosf(value, &result_sin, &result_cos);
-
-    if (!_isnanf(result_sin) || !_isnanf(result_cos))
-    {
-        Fail("sincosf(%g) returned (%10.9g, %10.9g) when it should have returned (%10.9g, %10.9g)",
-             value, result_sin, result_cos, PAL_NAN, PAL_NAN);
-    }
-}
-
-/**
- * main
- *
- * executable entry point
- */
-PALTEST(c_runtime_sincosf_test1_paltest_sincosf_test1, "c_runtime/sincosf/test1/paltest_sincosf_test1")
-{
-    struct test tests[] =
-    {
-        /* value            expected_sin      expected_cos     variance_sin         variance_cos */
-        {  0,               0,                1,               PAL_EPSILON,         PAL_EPSILON * 10 },
-        {  0.318309886f,    0.312961796f,     0.949765715f,    PAL_EPSILON,         PAL_EPSILON },       // value:  1 / pi
-        {  0.434294482f,    0.420770483f,     0.907167129f,    PAL_EPSILON,         PAL_EPSILON },       // value:  log10f(e)
-        {  0.636619772f,    0.594480769f,     0.804109828f,    PAL_EPSILON,         PAL_EPSILON },       // value:  2 / pi
-        {  0.693147181f,    0.638961276f,     0.769238901f,    PAL_EPSILON,         PAL_EPSILON },       // value:  ln(2)
-        {  0.707106781f,    0.649636939f,     0.760244597f,    PAL_EPSILON,         PAL_EPSILON },       // value:  1 / sqrtf(2)
-        {  0.785398163f,    0.707106781f,     0.707106781f,    PAL_EPSILON,         PAL_EPSILON },       // value:  pi / 4,         expected_sin: 1 / sqrtf(2),    expected_cos:  1
-        {  1,               0.841470985f,     0.540302306f,    PAL_EPSILON,         PAL_EPSILON },
-        {  1.12837917f,     0.903719457f,     0.428125148f,    PAL_EPSILON,         PAL_EPSILON },       // value:  2 / sqrtf(pi)
-        {  1.41421356f,     0.987765946f,     0.155943695f,    PAL_EPSILON,         PAL_EPSILON },       // value:  sqrtf(2)
-        {  1.44269504f,     0.991806244f,     0.127751218f,    PAL_EPSILON,         PAL_EPSILON },       // value:  logf2(e)
-        {  1.57079633f,     1,                0,               PAL_EPSILON * 10,    PAL_EPSILON },       // value:  pi / 2
-        {  2.30258509f,     0.743980337f,    -0.668201510f,    PAL_EPSILON,         PAL_EPSILON },       // value:  ln(10)
-        {  2.71828183f,     0.410781291f,    -0.911733918f,    PAL_EPSILON,         PAL_EPSILON },       // value:  e
-        {  3.14159265f,     0,               -1,               PAL_EPSILON,         PAL_EPSILON * 10 },  // value:  pi
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        sincosf_test1_validate( tests[i].value,  tests[i].expected_sin, tests[i].expected_cos, tests[i].variance_sin, tests[i].variance_cos);
-        sincosf_test1_validate(-tests[i].value, -tests[i].expected_sin, tests[i].expected_cos, tests[i].variance_sin, tests[i].variance_cos);
-    }
-
-    sincosf_test1_validate_isnan(PAL_NEGINF);
-    sincosf_test1_validate_isnan(PAL_NAN);
-    sincosf_test1_validate_isnan(PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/sinf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/sinf/test1/test1.cpp
deleted file mode 100644
index 1481843fed47..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/sinf/test1/test1.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that sinf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * sinf_test1_validate
- *
- * test validation function
- */
-void __cdecl sinf_test1_validate(float value, float expected, float variance)
-{
-    float result = sinf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("sinf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * sinf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl sinf_test1_validate_isnan(float value)
-{
-    float result = sinf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("sinf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_sinf_test1_paltest_sinf_test1, "c_runtime/sinf/test1/paltest_sinf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected         variance */
-        {  0,               0,               PAL_EPSILON },
-        {  0.318309886f,    0.312961796f,    PAL_EPSILON },       // value:  1 / pi
-        {  0.434294482f,    0.420770483f,    PAL_EPSILON },       // value:  log10f(e)
-        {  0.636619772f,    0.594480769f,    PAL_EPSILON },       // value:  2 / pi
-        {  0.693147181f,    0.638961276f,    PAL_EPSILON },       // value:  ln(2)
-        {  0.707106781f,    0.649636939f,    PAL_EPSILON },       // value:  1 / sqrtf(2)
-        {  0.785398163f,    0.707106781f,    PAL_EPSILON },       // value:  pi / 4,         expected: 1 / sqrtf(2)
-        {  1,               0.841470985f,    PAL_EPSILON },
-        {  1.12837917f,     0.903719457f,    PAL_EPSILON },       // value:  2 / sqrtf(pi)
-        {  1.41421356f,     0.987765946f,    PAL_EPSILON },       // value:  sqrtf(2)
-        {  1.44269504f,     0.991806244f,    PAL_EPSILON },       // value:  logf2(e)
-        {  1.57079633f,     1,               PAL_EPSILON * 10 },  // value:  pi / 2
-        {  2.30258509f,     0.743980337f,    PAL_EPSILON },       // value:  ln(10)
-        {  2.71828183f,     0.410781291f,    PAL_EPSILON },       // value:  e
-        {  3.14159265f,     0,               PAL_EPSILON },       // value:  pi
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        sinf_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        sinf_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-    
-    sinf_test1_validate_isnan(PAL_NEGINF);
-    sinf_test1_validate_isnan(PAL_NAN);
-    sinf_test1_validate_isnan(PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/sinh/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/sinh/test1/test1.cpp
deleted file mode 100644
index 2879bf16aea9..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/sinh/test1/test1.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that sinh return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * sinh_test1_validate
- *
- * test validation function
- */
-void __cdecl sinh_test1_validate(double value, double expected, double variance)
-{
-    double result = sinh(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("sinh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * sinh_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl sinh_test1_validate_isnan(double value)
-{
-    double result = sinh(value);
-
-    if (!_isnan(result))
-    {
-        Fail("sinh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_sinh_test1_paltest_sinh_test1, "c_runtime/sinh/test1/paltest_sinh_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance */
-        {  0,                      0,                      PAL_EPSILON },
-        {  0.31830988618379067,    0.32371243907207108,    PAL_EPSILON },           // value:  1 / pi
-        {  0.43429448190325183,    0.44807597941469025,    PAL_EPSILON },           // value:  log10(e)
-        {  0.63661977236758134,    0.68050167815224332,    PAL_EPSILON },           // value:  2 / pi
-        {  0.69314718055994531,    0.75,                   PAL_EPSILON },           // value:  ln(2)
-        {  0.70710678118654752,    0.76752314512611633,    PAL_EPSILON },           // value:  1 / sqrt(2)
-        {  0.78539816339744831,    0.86867096148600961,    PAL_EPSILON },           // value:  pi / 4
-        {  1,                      1.1752011936438015,     PAL_EPSILON * 10 },
-        {  1.1283791670955126,     1.3835428792038633,     PAL_EPSILON * 10 },      // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     1.9350668221743567,     PAL_EPSILON * 10 },      // value:  sqrt(2)
-        {  1.4426950408889634,     1.9978980091062796,     PAL_EPSILON * 10 },      // value:  log2(e)
-        {  1.5707963267948966,     2.3012989023072949,     PAL_EPSILON * 10 },      // value:  pi / 2
-        {  2.3025850929940457,     4.95,                   PAL_EPSILON * 10 },      // value:  ln(10)
-        {  2.7182818284590452,     7.5441371028169758,     PAL_EPSILON * 10 },      // value:  e
-        {  3.1415926535897932,     11.548739357257748,     PAL_EPSILON * 100 },     // value:  pi
-        {  PAL_POSINF,             PAL_POSINF,             0 },
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        sinh_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        sinh_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-    
-    sinh_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/sinhf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/sinhf/test1/test1.cpp
deleted file mode 100644
index a67ab96a4101..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/sinhf/test1/test1.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that sinhf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * sinhf_test1_validate
- *
- * test validation function
- */
-void __cdecl sinhf_test1_validate(float value, float expected, float variance)
-{
-    float result = sinhf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("sinhf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * sinhf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl sinhf_test1_validate_isnan(float value)
-{
-    float result = sinhf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("sinhf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_sinhf_test1_paltest_sinhf_test1, "c_runtime/sinhf/test1/paltest_sinhf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected         variance */
-        {  0,               0,               PAL_EPSILON },
-        {  0.318309886f,    0.323712439f,    PAL_EPSILON },           // value:  1 / pi
-        {  0.434294482f,    0.448075979f,    PAL_EPSILON },           // value:  log10f(e)
-        {  0.636619772f,    0.680501678f,    PAL_EPSILON },           // value:  2 / pi
-        {  0.693147181f,    0.75,            PAL_EPSILON },           // value:  ln(2)
-        {  0.707106781f,    0.767523145f,    PAL_EPSILON },           // value:  1 / sqrtf(2)
-        {  0.785398163f,    0.868670961f,    PAL_EPSILON },           // value:  pi / 4
-        {  1,               1.17520119f,     PAL_EPSILON * 10 },
-        {  1.12837917f,     1.38354288f,     PAL_EPSILON * 10 },      // value:  2 / sqrtf(pi)
-        {  1.41421356f,     1.93506682f,     PAL_EPSILON * 10 },      // value:  sqrtf(2)
-        {  1.44269504f,     1.99789801f,     PAL_EPSILON * 10 },      // value:  logf2(e)
-        {  1.57079633f,     2.30129890f,     PAL_EPSILON * 10 },      // value:  pi / 2
-        {  2.30258509f,     4.95f,           PAL_EPSILON * 10 },      // value:  ln(10)
-        {  2.71828183f,     7.54413710f,     PAL_EPSILON * 10 },      // value:  e
-        {  3.14159265f,     11.5487394f,     PAL_EPSILON * 100 },     // value:  pi
-        {  PAL_POSINF,      PAL_POSINF,      0 },
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        sinhf_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        sinhf_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-    
-    sinhf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/sqrt/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/sqrt/test1/test1.cpp
deleted file mode 100644
index f61e51111f49..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/sqrt/test1/test1.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose:  Call the sqrt function on a positive value, a positive value
-** with a decimal and on the maximum possible double value.
-**
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * sqrt_test1_validate
- *
- * test validation function
- */
-void __cdecl sqrt_test1_validate(double value, double expected, double variance)
-{
-    double result = sqrt(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("sqrt(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * sqrt_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl sqrt_test1_validate_isnan(double value)
-{
-    double result = sqrt(value);
-
-    if (!_isnan(result))
-    {
-        Fail("sqrt(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-PALTEST(c_runtime_sqrt_test1_paltest_sqrt_test1, "c_runtime/sqrt/test1/paltest_sqrt_test1")
-{
-    struct test tests[] =
-    {
-        /* value                   expected                variance */
-        {  0.31830988618379067,    0.56418958354775629,    PAL_EPSILON },       // value:  1 / pi
-        {  0.43429448190325183,    0.65901022898226081,    PAL_EPSILON },       // value:  log10(e)
-        {  0.63661977236758134,    0.79788456080286536,    PAL_EPSILON },       // value:  2 / pi
-        {  0.69314718055994531,    0.83255461115769776,    PAL_EPSILON },       // value:  ln(2)
-        {  0.70710678118654752,    0.84089641525371454,    PAL_EPSILON },       // value:  1 / sqrt(2)
-        {  0.78539816339744831,    0.88622692545275801,    PAL_EPSILON },       // value:  pi / 4
-        {  1,                      1,                      PAL_EPSILON * 10 },
-        {  1.1283791670955126,     1.0622519320271969,     PAL_EPSILON * 10 },  // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     1.1892071150027211,     PAL_EPSILON * 10 },  // value:  sqrt(2)
-        {  1.4426950408889634,     1.2011224087864498,     PAL_EPSILON * 10 },  // value:  log2(e)
-        {  1.5707963267948966,     1.2533141373155003,     PAL_EPSILON * 10 },  // value:  pi / 2
-        {  2.3025850929940457,     1.5174271293851464,     PAL_EPSILON * 10 },  // value:  ln(10)
-        {  2.7182818284590452,     1.6487212707001281,     PAL_EPSILON * 10 },  // value:  e
-        {  3.1415926535897932,     1.7724538509055160,     PAL_EPSILON * 10 },  // value:  pi
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    sqrt_test1_validate(-0.0, -0.0, PAL_EPSILON);
-    sqrt_test1_validate( 0.0,  0.0, PAL_EPSILON);
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        sqrt_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-        sqrt_test1_validate_isnan(-tests[i].value);
-    }
-
-    sqrt_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/sqrtf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/sqrtf/test1/test1.cpp
deleted file mode 100644
index 821ae42750cf..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/sqrtf/test1/test1.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=====================================================================
-**
-** Source:  test1.c
-**
-** Purpose:  Call the sqrtf function on a positive value, a positive value
-** with a decimal and on the maximum possible float value.
-**
-**
-**===================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * sqrtf_test1_validate
- *
- * test validation function
- */
-void __cdecl sqrtf_test1_validate(float value, float expected, float variance)
-{
-    float result = sqrtf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("sqrtf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * sqrtf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl sqrtf_test1_validate_isnan(float value)
-{
-    float result = sqrtf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("sqrtf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-PALTEST(c_runtime_sqrtf_test1_paltest_sqrtf_test1, "c_runtime/sqrtf/test1/paltest_sqrtf_test1")
-{
-    struct test tests[] =
-    {
-        /* value            expected         variance */
-        {  0.318309886f,    0.564189584f,    PAL_EPSILON },       // value:  1 / pi
-        {  0.434294482f,    0.659010229f,    PAL_EPSILON },       // value:  log10f(e)
-        {  0.636619772f,    0.797884561f,    PAL_EPSILON },       // value:  2 / pi
-        {  0.693147181f,    0.832554611f,    PAL_EPSILON },       // value:  ln(2)
-        {  0.707106781f,    0.840896415f,    PAL_EPSILON },       // value:  1 / sqrtf(2)
-        {  0.785398163f,    0.886226925f,    PAL_EPSILON },       // value:  pi / 4
-        {  1,               1,               PAL_EPSILON * 10 },
-        {  1.12837917f,     1.06225193f,     PAL_EPSILON * 10 },  // value:  2 / sqrtf(pi)
-        {  1.41421356f,     1.18920712f,     PAL_EPSILON * 10 },  // value:  sqrtf(2)
-        {  1.44269504f,     1.20112241f,     PAL_EPSILON * 10 },  // value:  logf2(e)
-        {  1.57079633f,     1.25331414f,     PAL_EPSILON * 10 },  // value:  pi / 2
-        {  2.30258509f,     1.51742713f,     PAL_EPSILON * 10 },  // value:  ln(10)
-        {  2.71828183f,     1.64872127f,     PAL_EPSILON * 10 },  // value:  e
-        {  3.14159265f,     1.77245385F,     PAL_EPSILON * 10 },  // value:  pi
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    sqrtf_test1_validate(-0.0f, -0.0f, PAL_EPSILON);
-    sqrtf_test1_validate( 0.0f,  0.0f, PAL_EPSILON);
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        sqrtf_test1_validate(tests[i].value, tests[i].expected, tests[i].variance);
-        sqrtf_test1_validate_isnan(-tests[i].value);
-    }
-
-    sqrtf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/tan/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/tan/test1/test1.cpp
deleted file mode 100644
index 11f9123354ea..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/tan/test1/test1.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that tan return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * tan_test1_validate
- *
- * test validation function
- */
-void __cdecl tan_test1_validate(double value, double expected, double variance)
-{
-    double result = tan(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("tan(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * tan_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl tan_test1_validate_isnan(double value)
-{
-    double result = tan(value);
-
-    if (!_isnan(result))
-    {
-        Fail("tan(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_tan_test1_paltest_tan_test1, "c_runtime/tan/test1/paltest_tan_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance */
-        {  0,                      0,                      PAL_EPSILON },
-        {  0.31830988618379067,    0.32951473309607836,    PAL_EPSILON },       // value:  1 / pi
-        {  0.43429448190325183,    0.46382906716062964,    PAL_EPSILON },       // value:  log10(e)
-        {  0.63661977236758134,    0.73930295048660405,    PAL_EPSILON },       // value:  2 / pi
-        {  0.69314718055994531,    0.83064087786078395,    PAL_EPSILON },       // value:  ln(2)
-        {  0.70710678118654752,    0.85451043200960189,    PAL_EPSILON },       // value:  1 / sqrt(2)
-        {  0.78539816339744831,    1,                      PAL_EPSILON * 10 },  // value:  pi / 4
-        {  1,                      1.5574077246549022,     PAL_EPSILON * 10 },
-        {  1.1283791670955126,     2.1108768356626451,     PAL_EPSILON * 10 },  // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     6.3341191670421916,     PAL_EPSILON * 10 },  // value:  sqrt(2)
-        {  1.4426950408889634,     7.7635756709721848,     PAL_EPSILON * 10 },  // value:  log2(e)
-    // SEE BELOW -- {  1.5707963267948966,     PAL_POSINF,             0 },                 // value:  pi / 2
-        {  2.3025850929940457,    -1.1134071468135374,     PAL_EPSILON * 10 },  // value:  ln(10)
-        {  2.7182818284590452,    -0.45054953406980750,    PAL_EPSILON },       // value:  e
-        {  3.1415926535897932,     0,                      PAL_EPSILON },       // value:  pi
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        tan_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        tan_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-    
-    // -- SPECIAL CASE --
-    // Normally, tan(pi / 2) would return PAL_POSINF (atan2(PAL_POSINF) does return (pi / 2)).
-    // However, it seems instead (on all supported systems), we get a different number entirely.
-    tan_test1_validate( 1.5707963267948966,  16331239353195370.0, 0);
-    tan_test1_validate(-1.5707963267948966, -16331239353195370.0, 0);
-    
-    tan_test1_validate_isnan(PAL_NEGINF);
-    tan_test1_validate_isnan(PAL_NAN);
-    tan_test1_validate_isnan(PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/tanf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/tanf/test1/test1.cpp
deleted file mode 100644
index 2f7358cc2774..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/tanf/test1/test1.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that tanf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * tanf_test1_validate
- *
- * test validation function
- */
-void __cdecl tanf_test1_validate(float value, float expected, float variance)
-{
-    float result = tanf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("tanf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * tanf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl tanf_test1_validate_isnan(float value)
-{
-    float result = tanf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("tanf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_tanf_test1_paltest_tanf_test1, "c_runtime/tanf/test1/paltest_tanf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected         variance */
-        {  0,               0,               PAL_EPSILON },
-        {  0.318309886f,    0.329514733f,    PAL_EPSILON },       // value:  1 / pi
-        {  0.434294482f,    0.463829067f,    PAL_EPSILON },       // value:  log10f(e)
-        {  0.636619772f,    0.739302950f,    PAL_EPSILON },       // value:  2 / pi
-        {  0.693147181f,    0.830640878f,    PAL_EPSILON },       // value:  ln(2)
-        {  0.707106781f,    0.854510432f,    PAL_EPSILON },       // value:  1 / sqrtf(2)
-        {  0.785398163f,    1,               PAL_EPSILON * 10 },  // value:  pi / 4
-        {  1,               1.55740772f,     PAL_EPSILON * 10 },
-        {  1.12837917f,     2.11087684f,     PAL_EPSILON * 10 },  // value:  2 / sqrtf(pi)
-        {  1.41421356f,     6.33411917f,     PAL_EPSILON * 10 },  // value:  sqrtf(2)
-        {  1.44269504f,     7.76357567f,     PAL_EPSILON * 10 },  // value:  logf2(e)
-    // SEE BELOW -- {  1.57079633f,     PAL_POSINF,      0 },                 // value:  pi / 2
-        {  2.30258509f,    -1.11340715f,     PAL_EPSILON * 10 },  // value:  ln(10)
-        {  2.71828183f,    -0.450549534f,    PAL_EPSILON },       // value:  e
-        {  3.14159265f,     0,               PAL_EPSILON },       // value:  pi
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        tanf_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        tanf_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-    
-    // -- SPECIAL CASE --
-    // Normally, tanf(pi / 2) would return PAL_POSINF (atan2f(PAL_POSINF) does return (pi / 2)).
-    // However, it seems instead (on all supported systems), we get a different number entirely.
-    tanf_test1_validate( 1.57079633f, -22877332.0, PAL_EPSILON * 100000000);
-    tanf_test1_validate(-1.57079633f,  22877332.0, PAL_EPSILON * 100000000);
-    
-    tanf_test1_validate_isnan(PAL_NEGINF);
-    tanf_test1_validate_isnan(PAL_NAN);
-    tanf_test1_validate_isnan(PAL_POSINF);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/tanh/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/tanh/test1/test1.cpp
deleted file mode 100644
index a2df3885803c..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/tanh/test1/test1.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that tanh return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary64 (double) has a machine epsilon of 2^-52 (approx. 2.22e-16). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-50 (approx. 8.88e-16) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (15-17 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxxxxxxxxxx will use
-// PAL_EPSILON for the variance, while an expected result in the format of 0.0xxxxxxxxxxxxxxxxx
-// will use PAL_EPSILON / 10 and and expected result in the format of x.xxxxxxxxxxxxxxxx will
-// use PAL_EPSILON * 10.
-#define PAL_EPSILON 8.8817841970012523e-16
-
-#define PAL_NAN     sqrt(-1.0)
-#define PAL_POSINF -log(0.0)
-#define PAL_NEGINF  log(0.0)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    double value;     /* value to test the function with */
-    double expected;  /* expected result */
-    double variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * tanh_test1_validate
- *
- * test validation function
- */
-void __cdecl tanh_test1_validate(double value, double expected, double variance)
-{
-    double result = tanh(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    double delta = fabs(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("tanh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, expected);
-    }
-}
-
-/**
- * tanh_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl tanh_test1_validate_isnan(double value)
-{
-    double result = tanh(value);
-
-    if (!_isnan(result))
-    {
-        Fail("tanh(%g) returned %20.17g when it should have returned %20.17g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_tanh_test1_paltest_tanh_test1, "c_runtime/tanh/test1/paltest_tanh_test1")
-{
-    struct test tests[] = 
-    {
-        /* value                   expected                variance */
-        {  0,                      0,                      PAL_EPSILON },
-        {  0.31830988618379067,    0.30797791269089433,    PAL_EPSILON },       // value:  1 / pi
-        {  0.43429448190325183,    0.40890401183401433,    PAL_EPSILON },       // value:  log10(e)
-        {  0.63661977236758134,    0.56259360033158334,    PAL_EPSILON },       // value:  2 / pi
-        {  0.69314718055994531,    0.6,                    PAL_EPSILON },       // value:  ln(2)
-        {  0.70710678118654752,    0.60885936501391381,    PAL_EPSILON },       // value:  1 / sqrt(2)
-        {  0.78539816339744831,    0.65579420263267244,    PAL_EPSILON },       // value:  pi / 4
-        {  1,                      0.76159415595576489,    PAL_EPSILON },
-        {  1.1283791670955126,     0.81046380599898809,    PAL_EPSILON },       // value:  2 / sqrt(pi)
-        {  1.4142135623730950,     0.88838556158566054,    PAL_EPSILON },       // value:  sqrt(2)
-        {  1.4426950408889634,     0.89423894585503855,    PAL_EPSILON },       // value:  log2(e)
-        {  1.5707963267948966,     0.91715233566727435,    PAL_EPSILON },       // value:  pi / 2
-        {  2.3025850929940457,     0.98019801980198020,    PAL_EPSILON },       // value:  ln(10)
-        {  2.7182818284590452,     0.99132891580059984,    PAL_EPSILON },       // value:  e
-        {  3.1415926535897932,     0.99627207622074994,    PAL_EPSILON },       // value:  pi
-        {  PAL_POSINF,             1,                      PAL_EPSILON * 10 }
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        tanh_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        tanh_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-    
-    tanh_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/tanhf/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/tanhf/test1/test1.cpp
deleted file mode 100644
index 646e743fa418..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/tanhf/test1/test1.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================================
-**
-** Source: test1.c
-**
-** Purpose: Test to ensure that tanhf return the correct values
-** 
-** Dependencies: PAL_Initialize
-**               PAL_Terminate
-**               Fail
-**               fabs
-**
-**===========================================================================*/
-
-#include <palsuite.h>
-
-// binary32 (float) has a machine epsilon of 2^-23 (approx. 1.19e-07). However, this 
-// is slightly too accurate when writing tests meant to run against libm implementations
-// for various platforms. 2^-21 (approx. 4.76e-07) seems to be as accurate as we can get.
-//
-// The tests themselves will take PAL_EPSILON and adjust it according to the expected result
-// so that the delta used for comparison will compare the most significant digits and ignore
-// any digits that are outside the double precision range (6-9 digits).
-
-// For example, a test with an expect result in the format of 0.xxxxxxxxx will use PAL_EPSILON
-// for the variance, while an expected result in the format of 0.0xxxxxxxxx will use
-// PAL_EPSILON / 10 and and expected result in the format of x.xxxxxx will use PAL_EPSILON * 10.
-#define PAL_EPSILON 4.76837158e-07
-
-#define PAL_NAN     sqrtf(-1.0f)
-#define PAL_POSINF -logf(0.0f)
-#define PAL_NEGINF  logf(0.0f)
-
-/**
- * Helper test structure
- */
-struct test
-{
-    float value;     /* value to test the function with */
-    float expected;  /* expected result */
-    float variance;  /* maximum delta between the expected and actual result */
-};
-
-/**
- * tanhf_test1_validate
- *
- * test validation function
- */
-void __cdecl tanhf_test1_validate(float value, float expected, float variance)
-{
-    float result = tanhf(value);
-
-    /*
-     * The test is valid when the difference between result
-     * and expected is less than or equal to variance
-     */
-    float delta = fabsf(result - expected);
-
-    if (delta > variance)
-    {
-        Fail("tanhf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, expected);
-    }
-}
-
-/**
- * tanhf_test1_validate
- *
- * test validation function for values returning NaN
- */
-void __cdecl tanhf_test1_validate_isnan(float value)
-{
-    float result = tanhf(value);
-
-    if (!_isnanf(result))
-    {
-        Fail("tanhf(%g) returned %10.9g when it should have returned %10.9g",
-             value, result, PAL_NAN);
-    }
-}
-
-/**
- * main
- * 
- * executable entry point
- */
-PALTEST(c_runtime_tanhf_test1_paltest_tanhf_test1, "c_runtime/tanhf/test1/paltest_tanhf_test1")
-{
-    struct test tests[] = 
-    {
-        /* value            expected         variance */
-        {  0,               0,               PAL_EPSILON },
-        {  0.318309886f,    0.307977913f,    PAL_EPSILON },       // value:  1 / pi
-        {  0.434294482f,    0.408904012f,    PAL_EPSILON },       // value:  log10f(e)
-        {  0.636619772f,    0.562593600f,    PAL_EPSILON },       // value:  2 / pi
-        {  0.693147181f,    0.6f,            PAL_EPSILON },       // value:  ln(2)
-        {  0.707106781f,    0.608859365f,    PAL_EPSILON },       // value:  1 / sqrtf(2)
-        {  0.785398163f,    0.655794203f,    PAL_EPSILON },       // value:  pi / 4
-        {  1,               0.761594156f,    PAL_EPSILON },
-        {  1.12837917f,     0.810463806f,    PAL_EPSILON },       // value:  2 / sqrtf(pi)
-        {  1.41421356f,     0.888385562f,    PAL_EPSILON },       // value:  sqrtf(2)
-        {  1.44269504f,     0.894238946f,    PAL_EPSILON },       // value:  logf2(e)
-        {  1.57079633f,     0.917152336f,    PAL_EPSILON },       // value:  pi / 2
-        {  2.30258509f,     0.980198020f,    PAL_EPSILON },       // value:  ln(10)
-        {  2.71828183f,     0.991328916f,    PAL_EPSILON },       // value:  e
-        {  3.14159265f,     0.996272076f,    PAL_EPSILON },       // value:  pi
-        {  PAL_POSINF,      1,               PAL_EPSILON * 10 }
-    };
-
-    /* PAL initialization */
-    if (PAL_Initialize(argc, argv) != 0)
-    {
-        return FAIL;
-    }
-
-    for (int i = 0; i < (sizeof(tests) / sizeof(struct test)); i++)
-    {
-        tanhf_test1_validate( tests[i].value,  tests[i].expected, tests[i].variance);
-        tanhf_test1_validate(-tests[i].value, -tests[i].expected, tests[i].variance);
-    }
-    
-    tanhf_test1_validate_isnan(PAL_NAN);
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/time/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/time/test1/test1.cpp
deleted file mode 100644
index 72d905be0a51..000000000000
--- a/src/coreclr/pal/tests/palsuite/c_runtime/time/test1/test1.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*============================================================================
-**
-** Source:  test1.c
-**
-** Purpose: Calls the time function and verifies that the time returned
-**          is at least a positive value.
-**
-**
-**==========================================================================*/
-
-#include <palsuite.h>
-
-PALTEST(c_runtime_time_test1_paltest_time_test1, "c_runtime/time/test1/paltest_time_test1")
-{
-    time_t t = 0;
-
-    if (PAL_Initialize(argc, argv))
-    {
-        return FAIL;
-    }
-
-
-    time(&t);
-    /*I was going to test that the time returned didn't exceed some
-      reasonable value, but decided not to, for fear of creating my own
-      little Y2K-style disaster.*/
-
-    if (t <= 0)
-    {
-        Fail("time() function doesn't return a time.\n");
-    }
-    t = 0;
-    t = time(NULL);  
-    if (t <= 0)
-    {
-        Fail("time() function doesn't return a time.\n");
-    }
-    PAL_Terminate();
-    return PASS;
-}
-
-
-
-
-
-
-
diff --git a/src/coreclr/pal/tests/palsuite/c_runtime/wcstoul/test5/test5.cpp b/src/coreclr/pal/tests/palsuite/c_runtime/wcstoul/test5/test5.cpp
index 2ffab4b9de05..428a5f24caa6 100644
--- a/src/coreclr/pal/tests/palsuite/c_runtime/wcstoul/test5/test5.cpp
+++ b/src/coreclr/pal/tests/palsuite/c_runtime/wcstoul/test5/test5.cpp
@@ -32,9 +32,9 @@ PALTEST(c_runtime_wcstoul_test5_paltest_wcstoul_test5, "c_runtime/wcstoul/test5/
     errno = 0;
     l = wcstoul(overstr, &end, 10);
 
-    if (l != _UI32_MAX)
+    if (l != UINT32_MAX)
     {
-        Fail("ERROR: Expected wcstoul to return %u, got %u\n", _UI32_MAX, l);
+        Fail("ERROR: Expected wcstoul to return %u, got %u\n", UINT32_MAX, l);
     }
     if (end != overstr + 10)
     {
@@ -49,9 +49,9 @@ PALTEST(c_runtime_wcstoul_test5_paltest_wcstoul_test5, "c_runtime/wcstoul/test5/
     errno = 0;
     l = wcstoul(understr, &end, 10);
 
-    if (l != _UI32_MAX)
+    if (l != UINT32_MAX)
     {
-        Fail("ERROR: Expected wcstoul to return %u, got %u\n", _UI32_MAX, l);
+        Fail("ERROR: Expected wcstoul to return %u, got %u\n", UINT32_MAX, l);
     }
     if (end != understr + 2)
     {
diff --git a/src/coreclr/pal/tests/palsuite/common/palsuite.h b/src/coreclr/pal/tests/palsuite/common/palsuite.h
index c275b3812a25..9494daed71be 100644
--- a/src/coreclr/pal/tests/palsuite/common/palsuite.h
+++ b/src/coreclr/pal/tests/palsuite/common/palsuite.h
@@ -25,6 +25,7 @@ typedef unsigned short char16_t;
 #include <palprivate.h>
 #include <minipal/utils.h>
 #include <minipal/types.h>
+#include <errno.h>
 
 #define PALTEST(testfunc, testname) \
  int __cdecl testfunc(int argc, char* argv[]); \
diff --git a/src/coreclr/pal/tests/palsuite/compilableTests.txt b/src/coreclr/pal/tests/palsuite/compilableTests.txt
index 1b1d07245236..4d865fc63417 100644
--- a/src/coreclr/pal/tests/palsuite/compilableTests.txt
+++ b/src/coreclr/pal/tests/palsuite/compilableTests.txt
@@ -1,22 +1,5 @@
-c_runtime/abs/test1/paltest_abs_test1
-c_runtime/acos/test1/paltest_acos_test1
-c_runtime/acosf/test1/paltest_acosf_test1
-c_runtime/acosh/test1/paltest_acosh_test1
-c_runtime/acoshf/test1/paltest_acoshf_test1
-c_runtime/asin/test1/paltest_asin_test1
-c_runtime/asinf/test1/paltest_asinf_test1
-c_runtime/asinh/test1/paltest_asinh_test1
-c_runtime/asinhf/test1/paltest_asinhf_test1
-c_runtime/atan/test1/paltest_atan_test1
-c_runtime/atan2/test1/paltest_atan2_test1
-c_runtime/atan2f/test1/paltest_atan2f_test1
-c_runtime/atanf/test1/paltest_atanf_test1
-c_runtime/atanh/test1/paltest_atanh_test1
-c_runtime/atanhf/test1/paltest_atanhf_test1
 c_runtime/atof/test1/paltest_atof_test1
 c_runtime/atoi/test1/paltest_atoi_test1
-c_runtime/bsearch/test1/paltest_bsearch_test1
-c_runtime/bsearch/test2/paltest_bsearch_test2
 c_runtime/cbrt/test1/paltest_cbrt_test1
 c_runtime/cbrtf/test1/paltest_cbrtf_test1
 c_runtime/ceil/test1/paltest_ceil_test1
@@ -27,42 +10,6 @@ c_runtime/cosh/test1/paltest_cosh_test1
 c_runtime/coshf/test1/paltest_coshf_test1
 c_runtime/errno/test1/paltest_errno_test1
 c_runtime/errno/test2/paltest_errno_test2
-c_runtime/exit/test1/paltest_exit_test1
-c_runtime/exit/test2/paltest_exit_test2
-c_runtime/exp/test1/paltest_exp_test1
-c_runtime/expf/test1/paltest_expf_test1
-c_runtime/fabs/test1/paltest_fabs_test1
-c_runtime/fabsf/test1/paltest_fabsf_test1
-c_runtime/ferror/test1/paltest_ferror_test1
-c_runtime/ferror/test2/paltest_ferror_test2
-c_runtime/fflush/test1/paltest_fflush_test1
-c_runtime/fgets/test1/paltest_fgets_test1
-c_runtime/fgets/test2/paltest_fgets_test2
-c_runtime/fgets/test3/paltest_fgets_test3
-c_runtime/floor/test1/paltest_floor_test1
-c_runtime/floorf/test1/paltest_floorf_test1
-c_runtime/fma/test1/paltest_fma_test1
-c_runtime/fmaf/test1/paltest_fmaf_test1
-c_runtime/fmod/test1/paltest_fmod_test1
-c_runtime/fmodf/test1/paltest_fmodf_test1
-c_runtime/fopen/test1/paltest_fopen_test1
-c_runtime/fopen/test2/paltest_fopen_test2
-c_runtime/fopen/test3/paltest_fopen_test3
-c_runtime/fopen/test4/paltest_fopen_test4
-c_runtime/fopen/test5/paltest_fopen_test5
-c_runtime/fopen/test6/paltest_fopen_test6
-c_runtime/fopen/test7/paltest_fopen_test7
-c_runtime/fputs/test1/paltest_fputs_test1
-c_runtime/fputs/test2/paltest_fputs_test2
-c_runtime/fread/test1/paltest_fread_test1
-c_runtime/fread/test2/paltest_fread_test2
-c_runtime/fread/test3/paltest_fread_test3
-c_runtime/free/test1/paltest_free_test1
-c_runtime/fseek/test1/paltest_fseek_test1
-c_runtime/ftell/test1/paltest_ftell_test1
-c_runtime/fwrite/test1/paltest_fwrite_test1
-c_runtime/ilogb/test1/paltest_ilogb_test1
-c_runtime/ilogbf/test1/paltest_ilogbf_test1
 c_runtime/isalnum/test1/paltest_isalnum_test1
 c_runtime/isalpha/test1/paltest_isalpha_test1
 c_runtime/isdigit/test1/paltest_isdigit_test1
@@ -74,34 +21,10 @@ c_runtime/iswspace/test1/paltest_iswspace_test1
 c_runtime/iswupper/test1/paltest_iswupper_test1
 c_runtime/isxdigit/test1/paltest_isxdigit_test1
 c_runtime/llabs/test1/paltest_llabs_test1
-c_runtime/log/test1/paltest_log_test1
-c_runtime/log10/test1/paltest_log10_test1
-c_runtime/log10f/test1/paltest_log10f_test1
-c_runtime/log2/test1/paltest_log2_test1
-c_runtime/log2f/test1/paltest_log2f_test1
-c_runtime/logf/test1/paltest_logf_test1
-c_runtime/malloc/test1/paltest_malloc_test1
-c_runtime/malloc/test2/paltest_malloc_test2
 c_runtime/memchr/test1/paltest_memchr_test1
 c_runtime/memcmp/test1/paltest_memcmp_test1
 c_runtime/memmove/test1/paltest_memmove_test1
 c_runtime/memset/test1/paltest_memset_test1
-c_runtime/modf/test1/paltest_modf_test1
-c_runtime/modff/test1/paltest_modff_test1
-c_runtime/pow/test1/paltest_pow_test1
-c_runtime/powf/test1/paltest_powf_test1
-c_runtime/qsort/test1/paltest_qsort_test1
-c_runtime/qsort/test2/paltest_qsort_test2
-c_runtime/rand_srand/test1/paltest_rand_srand_test1
-c_runtime/realloc/test1/paltest_realloc_test1
-c_runtime/sin/test1/paltest_sin_test1
-c_runtime/sincos/test1/paltest_sincos_test1
-c_runtime/sincosf/test1/paltest_sincosf_test1
-c_runtime/sinf/test1/paltest_sinf_test1
-c_runtime/sinh/test1/paltest_sinh_test1
-c_runtime/sinhf/test1/paltest_sinhf_test1
-c_runtime/sqrt/test1/paltest_sqrt_test1
-c_runtime/sqrtf/test1/paltest_sqrtf_test1
 c_runtime/sscanf_s/test1/paltest_sscanf_test1
 c_runtime/sscanf_s/test10/paltest_sscanf_test10
 c_runtime/sscanf_s/test11/paltest_sscanf_test11
@@ -130,11 +53,6 @@ c_runtime/strncpy/test1/paltest_strncpy_test1
 c_runtime/strpbrk/test1/paltest_strpbrk_test1
 c_runtime/strrchr/test1/paltest_strrchr_test1
 c_runtime/strstr/test1/paltest_strstr_test1
-c_runtime/tan/test1/paltest_tan_test1
-c_runtime/tanf/test1/paltest_tanf_test1
-c_runtime/tanh/test1/paltest_tanh_test1
-c_runtime/tanhf/test1/paltest_tanhf_test1
-c_runtime/time/test1/paltest_time_test1
 c_runtime/tolower/test1/paltest_tolower_test1
 c_runtime/toupper/test1/paltest_toupper_test1
 c_runtime/towlower/test1/paltest_towlower_test1
@@ -158,10 +76,6 @@ c_runtime/wcstoul/test4/paltest_wcstoul_test4
 c_runtime/wcstoul/test5/paltest_wcstoul_test5
 c_runtime/wcstoul/test6/paltest_wcstoul_test6
 c_runtime/_alloca/test1/paltest_alloca_test1
-c_runtime/_finite/test1/paltest_finite_test1
-c_runtime/_finitef/test1/paltest_finitef_test1
-c_runtime/_isnan/test1/paltest_isnan_test1
-c_runtime/_isnanf/test1/paltest_isnanf_test1
 c_runtime/_itow/test1/paltest_itow_test1
 c_runtime/_putenv/test1/paltest_putenv_test1
 c_runtime/_putenv/test2/paltest_putenv_test2
@@ -196,7 +110,6 @@ c_runtime/_wfopen/test5/paltest_wfopen_test5
 c_runtime/_wfopen/test6/paltest_wfopen_test6
 c_runtime/_wfopen/test7/paltest_wfopen_test7
 c_runtime/_wtoi/test1/paltest_wtoi_test1
-c_runtime/__iscsym/test1/paltest_iscsym_test1
 debug_api/OutputDebugStringA/test1/paltest_outputdebugstringa_test1
 debug_api/OutputDebugStringW/test1/paltest_outputdebugstringw_test1
 exception_handling/RaiseException/test1/paltest_raiseexception_test1
@@ -409,7 +322,6 @@ miscellaneous/SetEnvironmentVariableW/test3/paltest_setenvironmentvariablew_test
 miscellaneous/SetEnvironmentVariableW/test4/paltest_setenvironmentvariablew_test4
 miscellaneous/SetLastError/test1/paltest_setlasterror_test1
 miscellaneous/_i64tow/test1/paltest_i64tow_test1
-pal_specific/PAL_errno/test1/paltest_pal_errno_test1
 pal_specific/PAL_GetUserTempDirectoryW/test1/paltest_pal_getusertempdirectoryw_test1
 pal_specific/PAL_Initialize_Terminate/test1/paltest_pal_initialize_terminate_test1
 pal_specific/PAL_Initialize_Terminate/test2/paltest_pal_initialize_terminate_test2
diff --git a/src/coreclr/pal/tests/palsuite/debug_api/OutputDebugStringA/test1/test1.cpp b/src/coreclr/pal/tests/palsuite/debug_api/OutputDebugStringA/test1/test1.cpp
index 98f0a1b95b0a..a8f55d7f9c04 100644
--- a/src/coreclr/pal/tests/palsuite/debug_api/OutputDebugStringA/test1/test1.cpp
+++ b/src/coreclr/pal/tests/palsuite/debug_api/OutputDebugStringA/test1/test1.cpp
@@ -44,13 +44,10 @@ PALTEST(debug_api_OutputDebugStringA_test1_paltest_outputdebugstringa_test1, "de
                         FALSE, 0, NULL, NULL, &si, &pi))
     {
         DWORD dwError = GetLastError();
-        free(name);
         Fail("ERROR: CreateProcess failed to load executable 'helper'.  "
              "GetLastError() returned %d.\n", dwError);
     }
 
-    free(name);
-
     /* This is the main loop.  It exits when the process which is being
        debugged is finished executing.
     */
diff --git a/src/coreclr/pal/tests/palsuite/manual-unautomatable.dat b/src/coreclr/pal/tests/palsuite/manual-unautomatable.dat
index c7a2a3913e7b..78e1831fbc6a 100644
--- a/src/coreclr/pal/tests/palsuite/manual-unautomatable.dat
+++ b/src/coreclr/pal/tests/palsuite/manual-unautomatable.dat
@@ -1,9 +1,6 @@
 # Licensed to the .NET Foundation under one or more agreements.
 # The .NET Foundation licenses this file to you under the MIT license.
 
-#This test is negative and will exit with exit(1).
-#Therefore, the harness would record it as a failure
-c_runtime/exit/test2,1
 # A successful DebugBreak test run dumps core or throws up an ASSERT
 # dialog box (or...) and returns an exit code != 0
 debug_api/debugbreak/test1,1
diff --git a/src/coreclr/pal/tests/palsuite/pal_specific/PAL_errno/test1/PAL_errno.cpp b/src/coreclr/pal/tests/palsuite/pal_specific/PAL_errno/test1/PAL_errno.cpp
deleted file mode 100644
index 29f2da53b055..000000000000
--- a/src/coreclr/pal/tests/palsuite/pal_specific/PAL_errno/test1/PAL_errno.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================
-**
-** Source: pal_errno.c
-**
-** Purpose: Positive test the PAL_errno API.
-**          call PAL_errno to retrieve the pointer to 
-**          the per-thread errno value.
-**
-**
-**============================================================*/
-#include <palsuite.h>
-
-PALTEST(pal_specific_PAL_errno_test1_paltest_pal_errno_test1, "pal_specific/PAL_errno/test1/paltest_pal_errno_test1")
-{
-    int err;
-    FILE *pFile = NULL;
-
-    /*Initialize the PAL environment*/
-    err = PAL_Initialize(argc, argv);
-    if( 0 != err)
-    {
-        return FAIL;
-    }
-    
-    /*Try to open a not-exist file to read to generate an error*/
-    pFile = fopen( "no_exist_file_name", "r" );
-    
-    if( NULL != pFile )
-    {
-        Trace("\nFailed to call fopen to open a not exist for reading, "
-                "an error is expected, but no error occurred\n");
-
-        if( EOF == fclose( pFile ) )
-        {
-            Trace("\nFailed to call fclose to close a file stream\n");
-        }
-        Fail( "Test failed! fopen() Should not have worked!" );
-    }
-
-    /*retrieve the per-thread error value pointer*/
-    if( 2 != errno )
-    {
-        Fail("\nFailed to call PAL_errno API, this value is not correct."
-             " The correct value is ENOENT[2] ( No such file or directory.).\n");
-    }
-    
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/pal_specific/PAL_get_stderr/test1/PAL_get_stderr.cpp b/src/coreclr/pal/tests/palsuite/pal_specific/PAL_get_stderr/test1/PAL_get_stderr.cpp
deleted file mode 100644
index 2377505158c6..000000000000
--- a/src/coreclr/pal/tests/palsuite/pal_specific/PAL_get_stderr/test1/PAL_get_stderr.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================
-**
-** Source: pal_get_stderr.c
-**
-** Purpose: Positive test the PAL_get_stderr API.
-**          Call PAL_get_stderr to retrieve the PAL standard error
-**          output stream pointer.
-**          This test case should be run manually and automatically.
-**          
-
-**
-**============================================================*/
-#include <palsuite.h>
-
-PALTEST(pal_specific_PAL_get_stderr_test1_paltest_pal_get_stderr_test1, "pal_specific/PAL_get_stderr/test1/paltest_pal_get_stderr_test1")
-{
-    int err;
-    FILE *pPAL_stderr = NULL;  
-    const char *pMsg = "\nThis is a PAL_get_stderr test message, "
-                    "not an error message!\n";
-
-    /*Initialize the PAL environment*/
-    err = PAL_Initialize(argc, argv);
-    if(0 != err)
-    {
-        return FAIL;
-    }
-
-    /*retrieve the PAL standard error output stream pointer*/
-    pPAL_stderr = PAL_get_stderr();  
-
-    if(NULL == pPAL_stderr)
-    {
-        Fail("\nFailed to call PAL_get_stderr API, error code = %u\n",
-                GetLastError());
-    }    
-    
-    /*output a test message through PAL standard error stream*/    
-    err = fputs(pMsg, pPAL_stderr);
-    if(EOF == err)
-    {
-        Fail("\nFailed to call fputs to output message to PAL stdandard "
-                "error stream, error code=%u\n", GetLastError());
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/pal_specific/PAL_get_stdin/test1/PAL_get_stdin.cpp b/src/coreclr/pal/tests/palsuite/pal_specific/PAL_get_stdin/test1/PAL_get_stdin.cpp
deleted file mode 100644
index 91d8a3a9a650..000000000000
--- a/src/coreclr/pal/tests/palsuite/pal_specific/PAL_get_stdin/test1/PAL_get_stdin.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================
-**
-** Source: pal_get_stdin.c
-**
-** Purpose: Positive test the PAL_get_stdout API.
-**          Call PAL_get_stdin to retrieve the PAL standard input
-**          stream pointer.
-**          This test case should be run manually.
-**          
-
-**
-**============================================================*/
-#include <palsuite.h>
-
-PALTEST(pal_specific_PAL_get_stdin_test1_paltest_pal_get_stdin_test1, "pal_specific/PAL_get_stdin/test1/paltest_pal_get_stdin_test1")
-{
-    int err;
-    FILE *pPAL_stdin = NULL;  
-    char Buffer[256];
-    
-    /*Initialize the PAL environment*/
-    err = PAL_Initialize(argc, argv);
-    if(0 != err)
-    {
-        return FAIL;
-    }
-
-    /*retrieve the PAL standard input stream pointer*/
-    pPAL_stdin = PAL_get_stdin();  
-    if(NULL == pPAL_stdin)
-    {
-        Fail("\nFailed to call PAL_get_stdin API to retrieve the "
-                "PAL standard input stream pointer, "
-                "error code = %u\n", GetLastError());
-    }    
-
-    /*zero the buffer*/
-    memset(Buffer, 0, 256);
-
-    printf("\nPlease input some words: (less than 255 characters)\n");     
-
-    /*further test the input stream*/
-    /*read message from the PAL standard input stream*/
-    if(NULL == fgets(Buffer, 255, pPAL_stdin))
-    {
-        Fail( "Failed to call fgets to get a string from PAL standard "
-            "input stream, error code=%u\n", GetLastError());
-    }    
-    else
-    {
-        if(1 == strlen(Buffer) && Buffer[0] == '\n')
-        {
-            printf("\nEmpty input!\n");
-        }
-        else
-        {
-            printf("\nYour input words are:\n%s\n", Buffer);
-        }
-    }
-
-    
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/pal_specific/PAL_get_stdout/test1/PAL_get_stdout.cpp b/src/coreclr/pal/tests/palsuite/pal_specific/PAL_get_stdout/test1/PAL_get_stdout.cpp
deleted file mode 100644
index d891e9ac40ab..000000000000
--- a/src/coreclr/pal/tests/palsuite/pal_specific/PAL_get_stdout/test1/PAL_get_stdout.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*=============================================================
-**
-** Source: pal_get_stdout.c
-**
-** Purpose: Positive test the PAL_get_stdout API.
-**          Call PAL_get_stdout to retrieve the PAL standard output
-**          stream pointer.
-**          This test case should be run manually and automatically.
-**          
-
-**
-**============================================================*/
-#include <palsuite.h>
-
-PALTEST(pal_specific_PAL_get_stdout_test1_paltest_pal_get_stdout_test1, "pal_specific/PAL_get_stdout/test1/paltest_pal_get_stdout_test1")
-{
-    int err;
-    FILE *pPAL_stdout = NULL;  
-    const char *pMsg = "\nThis is a PAL_get_stdout test output message, "
-                    "not an error message!\n";
-
-    /*Initialize the PAL environment*/
-    err = PAL_Initialize(argc, argv);
-    if(0 != err)
-    {
-        return FAIL;
-    }
-
-    /*retrieve the PAL output stream pointer*/
-    pPAL_stdout = PAL_get_stdout();    
-    if(NULL == pPAL_stdout)
-    {
-        Fail("\nFailed to call PAL_get_stdout API to retrieve the "
-            "standard PAL output stream pointer, error code=%u\n",
-            GetLastError());
-    }
-
-    /*output a test message through PAL standard output stream*/    
-    err = fputs(pMsg, pPAL_stdout);
-    if(EOF == err)
-    {
-        Fail("\nFailed to call fputs to output message to PAL stdandard "
-                "output stream, error code=%u\n", GetLastError());
-    }
-
-    PAL_Terminate();
-    return PASS;
-}
diff --git a/src/coreclr/pal/tests/palsuite/paltestlist.txt b/src/coreclr/pal/tests/palsuite/paltestlist.txt
index eff6efb16290..e141789e71f0 100644
--- a/src/coreclr/pal/tests/palsuite/paltestlist.txt
+++ b/src/coreclr/pal/tests/palsuite/paltestlist.txt
@@ -1,60 +1,5 @@
-c_runtime/abs/test1/paltest_abs_test1
-c_runtime/acos/test1/paltest_acos_test1
-c_runtime/acosf/test1/paltest_acosf_test1
-c_runtime/acosh/test1/paltest_acosh_test1
-c_runtime/acoshf/test1/paltest_acoshf_test1
-c_runtime/asin/test1/paltest_asin_test1
-c_runtime/asinf/test1/paltest_asinf_test1
-c_runtime/asinh/test1/paltest_asinh_test1
-c_runtime/asinhf/test1/paltest_asinhf_test1
-c_runtime/atan/test1/paltest_atan_test1
-c_runtime/atan2/test1/paltest_atan2_test1
-c_runtime/atan2f/test1/paltest_atan2f_test1
-c_runtime/atanf/test1/paltest_atanf_test1
-c_runtime/atanh/test1/paltest_atanh_test1
-c_runtime/atanhf/test1/paltest_atanhf_test1
 c_runtime/atof/test1/paltest_atof_test1
 c_runtime/atoi/test1/paltest_atoi_test1
-c_runtime/bsearch/test1/paltest_bsearch_test1
-c_runtime/bsearch/test2/paltest_bsearch_test2
-c_runtime/cbrt/test1/paltest_cbrt_test1
-c_runtime/cbrtf/test1/paltest_cbrtf_test1
-c_runtime/ceil/test1/paltest_ceil_test1
-c_runtime/ceilf/test1/paltest_ceilf_test1
-c_runtime/cos/test1/paltest_cos_test1
-c_runtime/cosf/test1/paltest_cosf_test1
-c_runtime/cosh/test1/paltest_cosh_test1
-c_runtime/coshf/test1/paltest_coshf_test1
-c_runtime/errno/test1/paltest_errno_test1
-c_runtime/errno/test2/paltest_errno_test2
-c_runtime/exit/test1/paltest_exit_test1
-c_runtime/exp/test1/paltest_exp_test1
-c_runtime/expf/test1/paltest_expf_test1
-c_runtime/fabs/test1/paltest_fabs_test1
-c_runtime/fabsf/test1/paltest_fabsf_test1
-c_runtime/fflush/test1/paltest_fflush_test1
-c_runtime/fgets/test1/paltest_fgets_test1
-c_runtime/fgets/test2/paltest_fgets_test2
-c_runtime/fgets/test3/paltest_fgets_test3
-c_runtime/floor/test1/paltest_floor_test1
-c_runtime/floorf/test1/paltest_floorf_test1
-c_runtime/fma/test1/paltest_fma_test1
-c_runtime/fmaf/test1/paltest_fmaf_test1
-c_runtime/fmod/test1/paltest_fmod_test1
-c_runtime/fmodf/test1/paltest_fmodf_test1
-c_runtime/fopen/test1/paltest_fopen_test1
-c_runtime/fopen/test2/paltest_fopen_test2
-c_runtime/fopen/test3/paltest_fopen_test3
-c_runtime/fopen/test4/paltest_fopen_test4
-c_runtime/fopen/test5/paltest_fopen_test5
-c_runtime/fopen/test6/paltest_fopen_test6
-c_runtime/fopen/test7/paltest_fopen_test7
-c_runtime/fputs/test1/paltest_fputs_test1
-c_runtime/free/test1/paltest_free_test1
-c_runtime/fseek/test1/paltest_fseek_test1
-c_runtime/fwrite/test1/paltest_fwrite_test1
-c_runtime/ilogb/test1/paltest_ilogb_test1
-c_runtime/ilogbf/test1/paltest_ilogbf_test1
 c_runtime/isalnum/test1/paltest_isalnum_test1
 c_runtime/isalpha/test1/paltest_isalpha_test1
 c_runtime/isdigit/test1/paltest_isdigit_test1
@@ -65,35 +10,10 @@ c_runtime/iswdigit/test1/paltest_iswdigit_test1
 c_runtime/iswspace/test1/paltest_iswspace_test1
 c_runtime/iswupper/test1/paltest_iswupper_test1
 c_runtime/isxdigit/test1/paltest_isxdigit_test1
-c_runtime/llabs/test1/paltest_llabs_test1
-c_runtime/log/test1/paltest_log_test1
-c_runtime/log2/test1/paltest_log2_test1
-c_runtime/log2f/test1/paltest_log2f_test1
-c_runtime/log10/test1/paltest_log10_test1
-c_runtime/log10f/test1/paltest_log10f_test1
-c_runtime/logf/test1/paltest_logf_test1
-c_runtime/malloc/test1/paltest_malloc_test1
-c_runtime/malloc/test2/paltest_malloc_test2
 c_runtime/memchr/test1/paltest_memchr_test1
 c_runtime/memcmp/test1/paltest_memcmp_test1
 c_runtime/memmove/test1/paltest_memmove_test1
 c_runtime/memset/test1/paltest_memset_test1
-c_runtime/modf/test1/paltest_modf_test1
-c_runtime/modff/test1/paltest_modff_test1
-c_runtime/pow/test1/paltest_pow_test1
-c_runtime/powf/test1/paltest_powf_test1
-c_runtime/qsort/test1/paltest_qsort_test1
-c_runtime/qsort/test2/paltest_qsort_test2
-c_runtime/rand_srand/test1/paltest_rand_srand_test1
-c_runtime/realloc/test1/paltest_realloc_test1
-c_runtime/sin/test1/paltest_sin_test1
-c_runtime/sincos/test1/paltest_sincos_test1
-c_runtime/sincosf/test1/paltest_sincosf_test1
-c_runtime/sinf/test1/paltest_sinf_test1
-c_runtime/sinh/test1/paltest_sinh_test1
-c_runtime/sinhf/test1/paltest_sinhf_test1
-c_runtime/sqrt/test1/paltest_sqrt_test1
-c_runtime/sqrtf/test1/paltest_sqrtf_test1
 c_runtime/sscanf_s/test1/paltest_sscanf_test1
 c_runtime/sscanf_s/test10/paltest_sscanf_test10
 c_runtime/sscanf_s/test11/paltest_sscanf_test11
@@ -122,11 +42,6 @@ c_runtime/strncpy/test1/paltest_strncpy_test1
 c_runtime/strpbrk/test1/paltest_strpbrk_test1
 c_runtime/strrchr/test1/paltest_strrchr_test1
 c_runtime/strstr/test1/paltest_strstr_test1
-c_runtime/tan/test1/paltest_tan_test1
-c_runtime/tanf/test1/paltest_tanf_test1
-c_runtime/tanh/test1/paltest_tanh_test1
-c_runtime/tanhf/test1/paltest_tanhf_test1
-c_runtime/time/test1/paltest_time_test1
 c_runtime/tolower/test1/paltest_tolower_test1
 c_runtime/toupper/test1/paltest_toupper_test1
 c_runtime/towlower/test1/paltest_towlower_test1
@@ -150,10 +65,6 @@ c_runtime/wcstoul/test4/paltest_wcstoul_test4
 c_runtime/wcstoul/test5/paltest_wcstoul_test5
 c_runtime/wcstoul/test6/paltest_wcstoul_test6
 c_runtime/_alloca/test1/paltest_alloca_test1
-c_runtime/_finite/test1/paltest_finite_test1
-c_runtime/_finitef/test1/paltest_finitef_test1
-c_runtime/_isnan/test1/paltest_isnan_test1
-c_runtime/_isnanf/test1/paltest_isnanf_test1
 c_runtime/_itow/test1/paltest_itow_test1
 c_runtime/_putenv/test1/paltest_putenv_test1
 c_runtime/_putenv/test2/paltest_putenv_test2
@@ -188,7 +99,6 @@ c_runtime/_wfopen/test5/paltest_wfopen_test5
 c_runtime/_wfopen/test6/paltest_wfopen_test6
 c_runtime/_wfopen/test7/paltest_wfopen_test7
 c_runtime/_wtoi/test1/paltest_wtoi_test1
-c_runtime/__iscsym/test1/paltest_iscsym_test1
 debug_api/OutputDebugStringW/test1/paltest_outputdebugstringw_test1
 exception_handling/RaiseException/test1/paltest_raiseexception_test1
 exception_handling/RaiseException/test2/paltest_raiseexception_test2
@@ -363,7 +273,6 @@ miscellaneous/SetEnvironmentVariableW/test3/paltest_setenvironmentvariablew_test
 miscellaneous/SetEnvironmentVariableW/test4/paltest_setenvironmentvariablew_test4
 miscellaneous/SetLastError/test1/paltest_setlasterror_test1
 miscellaneous/_i64tow/test1/paltest_i64tow_test1
-pal_specific/PAL_errno/test1/paltest_pal_errno_test1
 pal_specific/PAL_Initialize_Terminate/test1/paltest_pal_initialize_terminate_test1
 pal_specific/PAL_Initialize_Terminate/test2/paltest_pal_initialize_terminate_test2
 samples/test1/paltest_samples_test1
diff --git a/src/coreclr/pal/tests/palsuite/paltestlist_to_be_reviewed.txt b/src/coreclr/pal/tests/palsuite/paltestlist_to_be_reviewed.txt
index 2dbbd64e9f2b..bc5589f4323c 100644
--- a/src/coreclr/pal/tests/palsuite/paltestlist_to_be_reviewed.txt
+++ b/src/coreclr/pal/tests/palsuite/paltestlist_to_be_reviewed.txt
@@ -1,7 +1,6 @@
 This is a list of failing PAL tests that need to be reviewed because.
 They should either be fixed or deleted if they are no longer applicable.
 
-c_runtime/exit/test2/paltest_exit_test2
 c_runtime/ferror/test1/paltest_ferror_test1
 c_runtime/ferror/test2/paltest_ferror_test2
 c_runtime/fputs/test2/paltest_fputs_test2
diff --git a/src/coreclr/pal/tests/palsuite/runpaltests.sh b/src/coreclr/pal/tests/palsuite/runpaltests.sh
index c10930e2acc3..39c492212bc8 100755
--- a/src/coreclr/pal/tests/palsuite/runpaltests.sh
+++ b/src/coreclr/pal/tests/palsuite/runpaltests.sh
@@ -9,7 +9,7 @@ then
   echo "runpaltests.sh <path to root build directory of the pal tests>  [<path to test output folder>] [<path to temp folder for PAL tests>]"
   echo
   echo "For example:"
-  echo "runpaltests.sh /projectk/build/debug"
+  echo "runpaltests.sh artifacts/bin/coreclr/linux.x64.Debug/paltests/"
   echo
   exit 1
 fi
diff --git a/src/coreclr/pal/tests/palsuite/tests-manual.dat b/src/coreclr/pal/tests/palsuite/tests-manual.dat
index b87a39486af1..3f32f49df043 100644
--- a/src/coreclr/pal/tests/palsuite/tests-manual.dat
+++ b/src/coreclr/pal/tests/palsuite/tests-manual.dat
@@ -1,7 +1,6 @@
 # Licensed to the .NET Foundation under one or more agreements.
 # The .NET Foundation licenses this file to you under the MIT license.
 
-c_runtime/exit/test2,1
 pal_specific/pal_get_stderr/test1,1
 pal_specific/pal_get_stdin/test1,1
 pal_specific/pal_get_stdout/test1,1
diff --git a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test8/test8.cpp b/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test8/test8.cpp
index 24f22afa456c..8081b69109a9 100644
--- a/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test8/test8.cpp
+++ b/src/coreclr/pal/tests/palsuite/threading/CriticalSectionFunctions/test8/test8.cpp
@@ -12,6 +12,7 @@
 **
 **===================================================================*/
 #include <palsuite.h>
+#include <time.h>
 
 #define MAX_THREAD_COUNT       128
 #define DEFAULT_THREAD_COUNT   10
diff --git a/src/coreclr/pal/tests/palsuite/threading/WaitForMultipleObjectsEx/test6/test6.cpp b/src/coreclr/pal/tests/palsuite/threading/WaitForMultipleObjectsEx/test6/test6.cpp
index 80ecbaa2016c..6ac838bd24b0 100644
--- a/src/coreclr/pal/tests/palsuite/threading/WaitForMultipleObjectsEx/test6/test6.cpp
+++ b/src/coreclr/pal/tests/palsuite/threading/WaitForMultipleObjectsEx/test6/test6.cpp
@@ -12,6 +12,7 @@
 **=========================================================*/
 
 #include <palsuite.h>
+#include <time.h>
 
 #define MAX_COUNT 10000
 #define MAX_THREADS 256
diff --git a/src/coreclr/palrt/memorystream.cpp b/src/coreclr/palrt/memorystream.cpp
index 0ed06547f3bf..91a5ca8b2d31 100644
--- a/src/coreclr/palrt/memorystream.cpp
+++ b/src/coreclr/palrt/memorystream.cpp
@@ -23,6 +23,10 @@ Revision History:
 #include "common.h"
 
 #include "objidl.h"
+#include <algorithm>
+
+using std::min;
+using std::max;
 
 class MemoryStream : public IStream
 {
@@ -44,7 +48,7 @@ class MemoryStream : public IStream
             n = min(2 * n, n + n / 4 + 0x100000);
 
             // don't allocate tiny chunks
-            n = max(n, 0x100);
+            n = max(n, (ULONG)0x100);
 
             // compare with the hard limit
             nNewData = max(n, nNewData);
diff --git a/src/coreclr/runtime-prereqs.proj b/src/coreclr/runtime-prereqs.proj
index b1d1cf8b041d..6bbe50f7d550 100644
--- a/src/coreclr/runtime-prereqs.proj
+++ b/src/coreclr/runtime-prereqs.proj
@@ -13,6 +13,10 @@
   <Import Project="$(RepositoryEngineeringDir)versioning.targets" />
   <Import Project="$(RepositoryEngineeringDir)nativepgo.targets" />
 
+  <ItemGroup>
+    <ProjectReference Include="$(RepoRoot)\src\native\managed\compile-native.proj" ReferenceOutputAssembly="false"/>
+  </ItemGroup>
+
   <Target Name="BuildPrereqs" BeforeTargets="Build" DependsOnTargets="GenerateRuntimeVersionFile;GenerateNativeSourcelinkFile;OutputPgoPathForCI" />
   <Import Project="Sdk.targets" Sdk="Microsoft.Build.NoTargets" />
   <!--
diff --git a/src/coreclr/runtime.proj b/src/coreclr/runtime.proj
index f6ea75679e90..600a3bc6d130 100644
--- a/src/coreclr/runtime.proj
+++ b/src/coreclr/runtime.proj
@@ -1,7 +1,6 @@
 <Project Sdk="Microsoft.Build.NoTargets">
 
   <PropertyGroup>
-    <NativeBuildPartitionPropertiesToRemove>ClrFullNativeBuild;ClrRuntimeSubset;ClrJitSubset;ClrPalTestsSubset;ClrAllJitsSubset;ClrILToolsSubset;ClrNativeAotSubset;ClrSpmiSubset;ClrCrossComponentsSubset;ClrDebugSubset;HostArchitecture;PgoInstrument;NativeOptimizationDataSupported;CMakeArgs</NativeBuildPartitionPropertiesToRemove>
     <_IcuDir Condition="'$(PkgMicrosoft_NETCore_Runtime_ICU_Transport)' != ''">$(PkgMicrosoft_NETCore_Runtime_ICU_Transport)/runtimes/$(TargetOS)-$(TargetArchitecture)$(_RuntimeVariant)/native</_IcuDir>
 
     <_BuildNativeTargetOS>$(TargetOS)</_BuildNativeTargetOS>
diff --git a/src/coreclr/scripts/emitUnitTests.sh b/src/coreclr/scripts/emitUnitTests.sh
index 9738dc5c50e9..0d08f5bf8a49 100755
--- a/src/coreclr/scripts/emitUnitTests.sh
+++ b/src/coreclr/scripts/emitUnitTests.sh
@@ -109,6 +109,7 @@ cut -f 3- -d ' ' $output_dir/capstone_output.txt \
 
 if [ -n "$verbose" ]; then
     egrep "$verbose" $output_dir/clr_instrs.txt
+    egrep "$verbose" $output_dir/capstone_output.txt
 else
     (head -n 5; tail -n 5) < $output_dir/clr_instrs.txt
 fi
diff --git a/src/coreclr/scripts/genDummyProvider.py b/src/coreclr/scripts/genDummyProvider.py
index ccf421421ca0..90ec297b0bf0 100644
--- a/src/coreclr/scripts/genDummyProvider.py
+++ b/src/coreclr/scripts/genDummyProvider.py
@@ -124,8 +124,6 @@ def generateDummyFiles(etwmanifest, out_dirname, runtimeFlavor, extern, dryRun):
 #include "pal_mstypes.h"
 #include "pal_error.h"
 #include "pal.h"
-#define PAL_free free
-#define PAL_realloc realloc
 #include "pal/stackstring.hpp"
 #endif
 
diff --git a/src/coreclr/scripts/genLttngProvider.py b/src/coreclr/scripts/genLttngProvider.py
index 70affdbbfe01..fc93f240315c 100644
--- a/src/coreclr/scripts/genLttngProvider.py
+++ b/src/coreclr/scripts/genLttngProvider.py
@@ -88,7 +88,7 @@
         "win:Binary"        :"const BYTE"
         }
 
-monoLttngDataTypeMapping ={
+portableLttngDataTypeMapping ={
         #constructed types
         "win:null"          :" ",
         "win:Int64"         :"const int64_t",
@@ -113,8 +113,8 @@
 def getLttngDataTypeMapping(runtimeFlavor):
     if runtimeFlavor.coreclr:
         return coreCLRLttngDataTypeMapping
-    elif runtimeFlavor.mono:
-        return monoLttngDataTypeMapping
+    else:
+        return portableLttngDataTypeMapping
 
 ctfDataTypeMapping ={
         #constructed types
@@ -580,8 +580,6 @@ def generateLttngFiles(etwmanifest, eventprovider_directory, runtimeFlavor, dryR
 #include "pal_mstypes.h"
 #include "pal_error.h"
 #include "pal.h"
-#define PAL_free free
-#define PAL_realloc realloc
 #include "pal/stackstring.hpp"
 """)
                 lttngimpl_file.write("#include \"" + lttngevntheadershortname + "\"\n\n")
diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py
index baca765929ce..99ea6e65e52b 100644
--- a/src/coreclr/scripts/superpmi.py
+++ b/src/coreclr/scripts/superpmi.py
@@ -22,6 +22,7 @@
 import datetime
 import locale
 import logging
+import math
 import os
 import multiprocessing
 import platform
@@ -226,6 +227,10 @@
 Compile only specified method contexts, e.g., `-compile 20,25`. This is passed directly to the superpmi.exe `-compile` argument. See `superpmi.exe -?` for full documentation about allowed formats.
 """
 
+produce_repro_help = """\
+If passed, produce the *.mc repro files.
+"""
+
 # Start of parser object creation.
 
 parser = argparse.ArgumentParser(description=description)
@@ -296,7 +301,7 @@ def add_core_root_arguments(parser, build_type_default, build_type_help):
 collect_parser.add_argument("-output_mch_path", help="Location to place the final MCH file. Default is a constructed file name in the current directory.")
 collect_parser.add_argument("--merge_mch_files", action="store_true", help="Merge multiple MCH files. Use the -mch_files flag to pass a list of MCH files to merge.")
 collect_parser.add_argument("-mch_files", metavar="MCH_FILE", nargs='+', help="Pass a sequence of MCH files which will be merged. Required by --merge_mch_files.")
-collect_parser.add_argument("--use_zapdisable", action="store_true", help="Sets DOTNET_ZapDisable=1 and DOTNET_ReadyToRun=0 when doing collection to cause NGEN/ReadyToRun images to not be used, and thus causes JIT compilation and SuperPMI collection of these methods.")
+collect_parser.add_argument("--disable_r2r", action="store_true", help="Sets DOTNET_ReadyToRun=0 when doing collection to cause ReadyToRun images to not be used, and thus causes JIT compilation and SuperPMI collection of these methods.")
 collect_parser.add_argument("--tiered_compilation", action="store_true", help="Sets DOTNET_TieredCompilation=1 when doing collections.")
 collect_parser.add_argument("--tiered_pgo", action="store_true", help="Sets DOTNET_TieredCompilation=1 and DOTNET_TieredPGO=1 when doing collections.")
 collect_parser.add_argument("--ci", action="store_true", help="Special collection mode for handling zero-sized files in Azure DevOps + Helix pipelines collections.")
@@ -322,6 +327,7 @@ def add_core_root_arguments(parser, build_type_default, build_type_help):
 replay_common_parser.add_argument("-jit_ee_version", help=jit_ee_version_help)
 replay_common_parser.add_argument("-private_store", action="append", help=private_store_help)
 replay_common_parser.add_argument("-compile", "-c", help=compile_help)
+replay_common_parser.add_argument("--produce_repro", action="store_true", help=produce_repro_help)
 
 # subparser for replay
 replay_parser = subparsers.add_parser("replay", description=replay_description, parents=[core_root_parser, target_parser, superpmi_common_parser, replay_common_parser])
@@ -505,9 +511,10 @@ def create_artifacts_base_name(coreclr_args, mch_file):
     return artifacts_base_name
 
 def read_csv(path):
-    with open(path) as csv_file:
+    with open(path, encoding="utf-8") as csv_file:
         reader = csv.DictReader(csv_file)
-        return list(reader)
+        for row in reader:
+            yield row
 
 def decode_clrjit_build_string(clrjit_path):
     """ Obtain information about the compiler that was used to compile the clrjit at the specified path.
@@ -520,7 +527,7 @@ def decode_clrjit_build_string(clrjit_path):
     with open(clrjit_path, "rb") as fh:
         contents = fh.read()
 
-    match = re.search(b'RyuJIT built by ([^\0]+?) targeting ([^\0]+?)-([^\0]+?)(| \(with native PGO\)| \(without native PGO\)|)\0', contents)
+    match = re.search(b'RyuJIT built by ([^\0]+?) targeting ([^\0]+?)-([^\0]+?)(| \\(with native PGO\\)| \\(without native PGO\\)|)\0', contents)
     if match is None:
         return None
 
@@ -613,7 +620,7 @@ def run_to_completion(self, async_callback, *extra_args):
                 loop = asyncio.new_event_loop()
 
             asyncio.set_event_loop(loop)
-            
+
         loop.run_until_complete(self.__run_to_completion__(async_callback, *extra_args))
         os.environ.clear()
         os.environ.update(reset_env)
@@ -841,8 +848,7 @@ def __collect_mc_files__(self):
             else:
                 dotnet_env["TieredCompilation"] = "0"
 
-            if self.coreclr_args.use_zapdisable:
-                dotnet_env["ZapDisable"] = "1"
+            if self.coreclr_args.disable_r2r:
                 dotnet_env["ReadyToRun"] = "0"
 
             logging.debug("Starting collection.")
@@ -1499,6 +1505,7 @@ def save_repro_mc_files(temp_location, coreclr_args, artifacts_base_name, repro_
     """ For commands that use the superpmi "-r" option to create "repro" .mc files, copy these to a
         location where they are saved (and not in a "temp" directory) for easy use by the user.
     """
+
     # If there are any .mc files, drop them into artifacts/repro/<host_os>.<arch>.<build_type>/*.mc
     mc_files = [os.path.join(temp_location, item) for item in os.listdir(temp_location) if item.endswith(".mc")]
     if len(mc_files) > 0:
@@ -1523,7 +1530,7 @@ def save_repro_mc_files(temp_location, coreclr_args, artifacts_base_name, repro_
 
 
 def parse_replay_asserts(mch_file, replay_output):
-    """ Parse output from failed replay, looking for asserts and correlating them to provide the best
+    r""" Parse output from failed replay, looking for asserts and correlating them to provide the best
         repro scenarios.
 
         Look for lines like:
@@ -1642,10 +1649,14 @@ def replay(self):
             repro_flags = []
 
             common_flags = [
-                "-v", "ewi",  # display errors, warnings, missing, jit info
-                "-r", os.path.join(temp_location, "repro") # Repro name prefix, create .mc repro files
+                "-v", "ewi"  # display errors, warnings, missing, jit info
             ]
 
+            if self.coreclr_args.produce_repro:
+                common_flags += [
+                    "-r", os.path.join(temp_location, "repro") # Repro name prefix, create .mc repro files
+                ]
+
             if self.coreclr_args.altjit:
                 repro_flags += [
                     "-jitoption", "force", "AltJit=*",
@@ -1709,8 +1720,8 @@ def replay(self):
                 command = [self.superpmi_path] + flags + [self.jit_path, mch_file]
                 (return_code, replay_output) = run_and_log_return_output(command)
 
-                details = read_csv(details_info_file)
-                print_superpmi_result(return_code, self.coreclr_args, self.aggregate_replay_metrics(details), None)
+                replay_metrics = self.aggregate_replay_metrics(details_info_file)
+                print_superpmi_result(return_code, self.coreclr_args, replay_metrics, None)
 
                 if return_code != 0:
                     # Don't report as replay failure missing data (return code 3).
@@ -1751,8 +1762,8 @@ def replay(self):
 
         return result
 
-    def aggregate_replay_metrics(self, details):
-        """ Given the CSV details file output by SPMI for a replay aggregate the
+    def aggregate_replay_metrics(self, details_file):
+        """ Given a path to a CSV details file output by SPMI for a replay aggregate the
         successes, misses and failures
 
         Returns:
@@ -1762,7 +1773,7 @@ def aggregate_replay_metrics(self, details):
         num_successes = 0
         num_misses = 0
         num_failures = 0
-        for row in details:
+        for row in read_csv(details_file):
             result = row["Result"]
             if result == "Success":
                 num_successes += 1
@@ -1781,7 +1792,7 @@ def aggregate_replay_metrics(self, details):
 def html_color(color, text):
     return "<span style=\"color:{}\">{}</span>".format(color, text)
 
-def calculate_improvements_regressions(base_diff_sizes):
+def calculate_size_improvements_regressions(base_diff_sizes):
     num_improvements = sum(1 for (base_size, diff_size) in base_diff_sizes if diff_size < base_size)
     num_regressions = sum(1 for (base_size, diff_size) in base_diff_sizes if diff_size > base_size)
     num_same = sum(1 for (base_size, diff_size) in base_diff_sizes if diff_size == base_size)
@@ -1791,6 +1802,36 @@ def calculate_improvements_regressions(base_diff_sizes):
 
     return (num_improvements, num_regressions, num_same, byte_improvements, byte_regressions)
 
+def calculate_perfscore_improvements_regressions(base_diff_perfscores):
+    num_ps_improvements = 0
+    num_ps_regressions = 0
+    num_ps_same = 0
+    sum_log_ps_improvements = 0
+    sum_log_ps_regressions = 0
+    for (base_ps, diff_ps) in base_diff_perfscores:
+        base_ps = max(base_ps, 1.0)
+        diff_ps = max(diff_ps, 1.0)
+        log_relative_perfscore = math.log(diff_ps / base_ps)
+
+        if abs(diff_ps - base_ps) < 0.01:
+            num_ps_same += 1
+        elif diff_ps < base_ps:
+            num_ps_improvements += 1
+            sum_log_ps_improvements += log_relative_perfscore
+        elif base_ps < diff_ps:
+            num_ps_regressions += 1
+            sum_log_ps_regressions += log_relative_perfscore
+
+    ps_improvements = 0.0
+    if num_ps_improvements > 0:
+        ps_improvements = math.exp(sum_log_ps_improvements / num_ps_improvements) - 1
+
+    ps_regressions = 0.0
+    if num_ps_regressions > 0:
+        ps_regressions = math.exp(sum_log_ps_regressions / num_ps_regressions) - 1
+
+    return (num_ps_improvements, num_ps_regressions, num_ps_same, ps_improvements, ps_regressions)
+
 def format_delta(base, diff):
     plus_if_positive = "+" if diff >= base else ""
     text = "{}{:,d}".format(plus_if_positive, diff - base)
@@ -1807,10 +1848,10 @@ def compute_pct(base, diff):
     else:
         return 0.0
 
-def format_pct(pct):
+def format_pct(pct, num_decimals = 2):
     plus_if_positive = "+" if pct > 0 else ""
 
-    text = "{}{:.2f}%".format(plus_if_positive, pct)
+    text = "{}{:.{prec}f}%".format(plus_if_positive, pct, prec=num_decimals)
     if pct != 0:
         color = "red" if pct > 0 else "green"
         return html_color(color, text)
@@ -1826,11 +1867,11 @@ def write_jit_options(coreclr_args, write_fh):
     Args:
         coreclr_args: args class instance
         write_fh: file to output to
-    
+
     """
     base_options = []
     diff_options = []
-    
+
     if coreclr_args.jitoption:
         base_options += coreclr_args.jitoption
         diff_options += coreclr_args.jitoption
@@ -1860,19 +1901,26 @@ def __enter__(self):
     def __exit__(self, *args):
         self.write_fh.write("\n\n</div></details>\n")
 
-def aggregate_diff_metrics(details):
-    """ Given the CSV details file output by SPMI for a diff aggregate the metrics.
+def aggregate_diff_metrics(details_file):
+    """ Given the path to a CSV details file output by SPMI for a diff aggregate the metrics.
     """
 
     base_minopts = {"Successful compiles": 0, "Missing compiles": 0, "Failing compiles": 0,
-                    "Contexts with diffs": 0, "Diffed code bytes": 0, "Diff executed instructions": 0,
-                    "Diffed contexts": 0}
+                    "Contexts with diffs": 0, "Diffed code bytes": 0,
+                    "Diffed PerfScore" : 0.0, "Relative PerfScore Geomean": 0.0,
+                    "Diff executed instructions": 0, "Diffed contexts": 0}
     base_fullopts = base_minopts.copy()
 
     diff_minopts = base_minopts.copy()
     diff_fullopts = base_minopts.copy()
 
-    for row in details:
+    # Project out these fields for the saved diffs, to use for further
+    # processing. Saving everything into memory is costly on memory when there
+    # are a large number of diffs.
+    diffs_fields = ["Context", "Context size", "Base size", "Diff size", "Base PerfScore", "Diff PerfScore"]
+    diffs = []
+
+    for row in read_csv(details_file):
         base_result = row["Base result"]
 
         if row["MinOpts"] == "True":
@@ -1910,12 +1958,22 @@ def aggregate_diff_metrics(details):
             base_dict["Diff executed instructions"] += base_insts
             diff_dict["Diff executed instructions"] += diff_insts
 
+            base_perfscore = float(row["Base PerfScore"])
+            diff_perfscore = float(row["Diff PerfScore"])
+            base_dict["Diffed PerfScore"] += base_perfscore
+            diff_dict["Diffed PerfScore"] += diff_perfscore
+
+            log_relative_perfscore = math.log(max(diff_perfscore, 1.0) / max(base_perfscore, 1.0))
+            base_dict["Relative PerfScore Geomean"] += log_relative_perfscore
+            diff_dict["Relative PerfScore Geomean"] += log_relative_perfscore
+
             base_dict["Diffed contexts"] += 1
             diff_dict["Diffed contexts"] += 1
 
             if row["Has diff"] == "True":
                 base_dict["Contexts with diffs"] += 1
                 diff_dict["Contexts with diffs"] += 1
+                diffs.append({key: row[key] for key in diffs_fields})
 
     base_overall = base_minopts.copy()
     for k in base_overall.keys():
@@ -1925,8 +1983,21 @@ def aggregate_diff_metrics(details):
     for k in diff_overall.keys():
         diff_overall[k] += diff_fullopts[k]
 
+    for d in [base_overall, base_minopts, base_fullopts, diff_overall, diff_minopts, diff_fullopts]:
+        sum_of_logs = d["Relative PerfScore Geomean"]
+        if d["Diffed contexts"] > 0:
+            d["Relative PerfScore Geomean"] = math.exp(sum_of_logs / d["Diffed contexts"])
+        else:
+            d["Relative PerfScore Geomean"] = 1
+
+        if d["Contexts with diffs"] > 0:
+            d["Relative PerfScore Geomean (Diffs)"] = math.exp(sum_of_logs / d["Contexts with diffs"])
+        else:
+            d["Relative PerfScore Geomean (Diffs)"] = 1
+
     return ({"Overall": base_overall, "MinOpts": base_minopts, "FullOpts": base_fullopts},
-            {"Overall": diff_overall, "MinOpts": diff_minopts, "FullOpts": diff_fullopts})
+            {"Overall": diff_overall, "MinOpts": diff_minopts, "FullOpts": diff_fullopts},
+            diffs)
 
 
 class SuperPMIReplayAsmDiffs:
@@ -2090,8 +2161,12 @@ def replay_with_asm_diffs(self):
                     "-v", "ewi",  # display errors, warnings, missing, jit info
                     "-f", fail_mcl_file,  # Failing mc List
                     "-details", detailed_info_file,  # Detailed information about each context
-                    "-r", os.path.join(temp_location, "repro"),  # Repro name prefix, create .mc repro files
                 ]
+                if self.coreclr_args.produce_repro:
+                    flags += [
+                        "-r", os.path.join(temp_location, "repro") # Repro name prefix, create .mc repro files
+                    ]
+
                 flags += altjit_asm_diffs_flags
                 flags += base_option_flags
                 flags += diff_option_flags
@@ -2128,8 +2203,7 @@ def replay_with_asm_diffs(self):
                     command = [self.superpmi_path] + flags + [self.base_jit_path, self.diff_jit_path, mch_file]
                     return_code = run_and_log(command)
 
-                details = read_csv(detailed_info_file)
-                (base_metrics, diff_metrics) = aggregate_diff_metrics(details)
+                (base_metrics, diff_metrics, diffs) = aggregate_diff_metrics(detailed_info_file)
 
                 print_superpmi_result(return_code, self.coreclr_args, base_metrics, diff_metrics)
                 artifacts_base_name = create_artifacts_base_name(self.coreclr_args, mch_file)
@@ -2149,7 +2223,6 @@ def replay_with_asm_diffs(self):
                             repro_base_command_line = "{} {} {}".format(self.superpmi_path, " ".join(altjit_asm_diffs_flags), self.diff_jit_path)
                             save_repro_mc_files(temp_location, self.coreclr_args, artifacts_base_name, repro_base_command_line)
 
-                diffs = [r for r in details if r["Has diff"] == "True"]
                 if any(diffs):
                     files_with_asm_diffs.append(mch_file)
 
@@ -2266,6 +2339,22 @@ def create_exception():
                         logging.info("Total bytes of diff: {}".format(diff_bytes))
                         delta_bytes = diff_bytes - base_bytes
                         logging.info("Total bytes of delta: {} ({:.2%} of base)".format(delta_bytes, delta_bytes / base_bytes))
+                        logging.info("")
+
+                        base_perfscore = base_metrics["Overall"]["Diffed PerfScore"]
+                        diff_perfscore = diff_metrics["Overall"]["Diffed PerfScore"]
+                        logging.info("Total PerfScore of base: {}".format(base_perfscore))
+                        logging.info("Total PerfScore of diff: {}".format(diff_perfscore))
+                        if base_perfscore != 0:
+                            delta_perfscore = diff_perfscore - base_perfscore
+                            logging.info("Total PerfScore of delta: {} ({:.2%} of base)".format(delta_perfscore, delta_perfscore / base_perfscore))
+                        logging.info("")
+
+                        relative_perfscore_geomean = diff_metrics["Overall"]["Relative PerfScore Geomean"]
+                        logging.info("Relative PerfScore Geomean: {:.4%}".format(relative_perfscore_geomean - 1))
+                        relative_perfscore_geomean_diffs = diff_metrics["Overall"]["Relative PerfScore Geomean (Diffs)"]
+                        logging.info("Relative PerfScore Geomean (Diffs): {:.4%}".format(relative_perfscore_geomean_diffs - 1))
+                        logging.info("")
 
                     try:
                         current_text_diff = text_differences.get_nowait()
@@ -2290,6 +2379,10 @@ def create_exception():
                                 if self.coreclr_args.metrics:
                                     for metric in self.coreclr_args.metrics:
                                         command += [ "--metrics", metric ]
+
+                                    if self.coreclr_args.metrics == ["PerfScore"]:
+                                        command += [ "--override-total-base-metric", str(base_perfscore), "--override-total-diff-metric", str(diff_perfscore) ]
+
                                 elif base_bytes is not None and diff_bytes is not None:
                                     command += [ "--override-total-base-metric", str(base_bytes), "--override-total-diff-metric", str(diff_bytes) ]
 
@@ -2331,17 +2424,26 @@ def create_exception():
                     # If we are not specifying custom metrics then print a summary here, otherwise leave the summarization up to jit-analyze.
                     if self.coreclr_args.metrics is None:
                         base_diff_sizes = [(int(r["Base size"]), int(r["Diff size"])) for r in diffs]
-
-                        (num_improvements, num_regressions, num_same, byte_improvements, byte_regressions) = calculate_improvements_regressions(base_diff_sizes)
-
-                        logging.info("{:,d} contexts with diffs ({:,d} improvements, {:,d} regressions, {:,d} same size)".format(
-                            len(diffs),
-                            num_improvements,
-                            num_regressions,
-                            num_same,
+                        (num_size_improvements, num_size_regressions, num_size_same, byte_improvements, byte_regressions) = calculate_size_improvements_regressions(base_diff_sizes)
+
+                        num_diffs_str = "{:,d} contexts with diffs".format(len(diffs))
+                        logging.info("{} ({:,d} size improvements, {:,d} size regressions, {:,d} same size)".format(
+                            num_diffs_str,
+                            num_size_improvements,
+                            num_size_regressions,
+                            num_size_same,
                             byte_improvements,
                             byte_regressions))
 
+                        base_diff_perfscores = [(float(r["Base PerfScore"]), float(r["Diff PerfScore"])) for r in diffs]
+                        (num_ps_improvements, num_ps_regressions, num_ps_same, ps_improvements, ps_regressions) = calculate_perfscore_improvements_regressions(base_diff_perfscores)
+
+                        logging.info("{} ({:,d} PerfScore improvements, {:,d} PerfScore regressions, {:,d} same PerfScore)".format(
+                            ' ' * len(num_diffs_str),
+                            num_ps_improvements,
+                            num_ps_regressions,
+                            num_ps_same))
+
                         if byte_improvements > 0 and byte_regressions > 0:
                             logging.info("  -{:,d}/+{:,d} bytes".format(byte_improvements, byte_regressions))
                         elif byte_improvements > 0:
@@ -2349,6 +2451,13 @@ def create_exception():
                         elif byte_regressions > 0:
                             logging.info("  +{:,d} bytes".format(byte_regressions))
 
+                        if num_ps_improvements > 0 and num_ps_regressions > 0:
+                            logging.info("  {:.2f}%/+{:.2f}% PerfScore".format(ps_improvements * 100, ps_regressions * 100))
+                        elif num_ps_improvements > 0:
+                            logging.info("  {:.2f}% PerfScore".format(ps_improvements * 100))
+                        elif num_ps_regressions > 0:
+                            logging.info("  +{:.2f}% PerfScore".format(ps_regressions * 100))
+
                         logging.info("")
                         logging.info("")
 
@@ -2493,19 +2602,20 @@ def write_pivot_section(row):
                 sum_diff = sum(diff_metrics[row]["Diffed code bytes"] for (_, _, diff_metrics, _, _, _) in asm_diffs)
 
                 with DetailsSection(write_fh, "{} ({} bytes)".format(row, format_delta(sum_base, sum_diff))):
-                    write_fh.write("|Collection|Base size (bytes)|Diff size (bytes)|\n")
-                    write_fh.write("|---|--:|--:|\n")
+                    write_fh.write("|Collection|Base size (bytes)|Diff size (bytes)|PerfScore in Diffs\n")
+                    write_fh.write("|---|--:|--:|--:|\n")
                     for (mch_file, base_metrics, diff_metrics, _, _, _) in asm_diffs:
                         # Exclude this particular row?
                         if not has_diffs(diff_metrics[row]):
                             continue
 
-                        write_fh.write("|{}|{:,d}|{}|\n".format(
+                        write_fh.write("|{}|{:,d}|{}|{}|\n".format(
                             mch_file,
                             base_metrics[row]["Diffed code bytes"],
                             format_delta(
                                 base_metrics[row]["Diffed code bytes"],
-                                diff_metrics[row]["Diffed code bytes"])))
+                                diff_metrics[row]["Diffed code bytes"]),
+                            format_pct(diff_metrics[row]["Relative PerfScore Geomean (Diffs)"] * 100 - 100)))
 
             write_top_context_section()
             write_pivot_section("Overall")
@@ -2523,13 +2633,13 @@ def write_pivot_section(row):
             # Next write a detailed section
             with DetailsSection(write_fh, "Details"):
                 if any_diffs:
-                    write_fh.write("#### Improvements/regressions per collection\n\n")
+                    write_fh.write("#### Size improvements/regressions per collection\n\n")
                     write_fh.write("|Collection|Contexts with diffs|Improvements|Regressions|Same size|Improvements (bytes)|Regressions (bytes)|\n")
                     write_fh.write("|---|--:|--:|--:|--:|--:|--:|\n")
 
                     def write_row(name, diffs):
                         base_diff_sizes = [(int(r["Base size"]), int(r["Diff size"])) for r in diffs]
-                        (num_improvements, num_regressions, num_same, byte_improvements, byte_regressions) = calculate_improvements_regressions(base_diff_sizes)
+                        (num_improvements, num_regressions, num_same, byte_improvements, byte_regressions) = calculate_size_improvements_regressions(base_diff_sizes)
                         write_fh.write("|{}|{:,d}|{}|{}|{}|{}|{}|\n".format(
                             name,
                             len(diffs),
@@ -2539,13 +2649,34 @@ def write_row(name, diffs):
                             html_color("green", "-{:,d}".format(byte_improvements)),
                             html_color("red", "+{:,d}".format(byte_regressions))))
 
-                    for (mch_file, _, _, diffs, _, _) in asm_diffs:
+                    for (mch_file, _, diff_metrics, diffs, _, _) in asm_diffs:
                         write_row(mch_file, diffs)
 
                     if len(asm_diffs) > 1:
                         write_row("", [r for (_, _, _, diffs, _, _) in asm_diffs for r in diffs])
 
                     write_fh.write("\n---\n\n")
+                    write_fh.write("#### PerfScore improvements/regressions per collection\n\n")
+                    write_fh.write("|Collection|Contexts with diffs|Improvements|Regressions|Same PerfScore|Improvements (PerfScore)|Regressions (PerfScore)|PerfScore Overall in FullOpts|\n")
+                    write_fh.write("|---|--:|--:|--:|--:|--:|--:|--:|\n")
+
+                    def write_ps_row(name, diffs, perfscore_geomean):
+                        base_diff_perfscores = [(float(r["Base PerfScore"]), float(r["Diff PerfScore"])) for r in diffs]
+                        (num_improvements, num_regressions, num_same, ps_improvements, ps_regressions) = calculate_perfscore_improvements_regressions(base_diff_perfscores)
+                        write_fh.write("|{}|{:,d}|{}|{}|{}|{}|{}|{}|\n".format(
+                            name,
+                            len(diffs),
+                            html_color("green", "{:,d}".format(num_improvements)),
+                            html_color("red", "{:,d}".format(num_regressions)),
+                            html_color("blue", "{:,d}".format(num_same)),
+                            format_pct(ps_improvements * 100),
+                            format_pct(ps_regressions * 100),
+                            format_pct(perfscore_geomean * 100, 4)))
+
+                    for (mch_file, _, diff_metrics, diffs, _, _) in asm_diffs:
+                        write_ps_row(mch_file, diffs, diff_metrics["FullOpts"]["Relative PerfScore Geomean"] - 1)
+
+                    write_fh.write("\n---\n\n")
 
                 write_fh.write("#### Context information\n\n")
                 write_fh.write("|Collection|Diffed contexts|MinOpts|FullOpts|Missed, base|Missed, diff|\n")
@@ -2668,7 +2799,7 @@ def pick_contexts_to_disassemble(self, diffs):
 
         # If there are non-default metrics then we need to disassemble
         # everything so that jit-analyze can handle those.
-        if self.coreclr_args.metrics is not None:
+        if self.coreclr_args.metrics is not None and self.coreclr_args.metrics != ["PerfScore"]:
             contexts = diffs
             examples = []
         else:
@@ -2686,22 +2817,29 @@ def display_subset(message, subset):
             smallest_contexts = sorted(diffs, key=lambda r: int(r["Context size"]))[:20]
             display_subset("Smallest {} contexts with binary differences:", smallest_contexts)
 
-            # Order by byte-wise improvement, largest improvements first
-            by_diff_size = sorted(diffs, key=lambda r: int(r["Diff size"]) - int(r["Base size"]))
-            # 20 top improvements, byte-wise
-            top_improvements_bytes = by_diff_size[:20]
-            # 20 top regressions, byte-wise
-            top_regressions_bytes = by_diff_size[-20:]
+            if self.coreclr_args.metrics is None:
+                base_metric_name = "Base size"
+                diff_metric_name = "Diff size"
+            else:
+                base_metric_name = "Base PerfScore"
+                diff_metric_name = "Diff PerfScore"
+
+            # Order by improvement, largest improvements first
+            by_diff = sorted(diffs, key=lambda r: float(r[diff_metric_name]) - float(r[base_metric_name]))
+            # 20 top improvements
+            top_improvements = by_diff[:20]
+            # 20 top regressions
+            top_regressions = by_diff[-20:]
 
-            display_subset("Top {} improvements, byte-wise:", top_improvements_bytes)
-            display_subset("Top {} regressions, byte-wise:", top_regressions_bytes)
+            display_subset("Top {} improvements:", top_improvements)
+            display_subset("Top {} regressions:", top_regressions)
 
             # Order by percentage-wise size improvement, largest improvements first
             def diff_pct(r):
-                base = int(r["Base size"])
+                base = float(r[base_metric_name])
                 if base == 0:
                     return 0
-                diff = int(r["Diff size"])
+                diff = float(r[diff_metric_name])
                 return (diff - base) / base
 
             by_diff_size_pct = sorted(diffs, key=diff_pct)
@@ -2723,7 +2861,7 @@ def diff_pct(r):
 
             example_improvements = by_diff_size_pct_examples[:3]
             example_regressions = by_diff_size_pct_examples[3:][-3:]
-            contexts = smallest_contexts + top_improvements_bytes + top_regressions_bytes + top_improvements_pct + top_regressions_pct + smallest_zero_size_contexts + example_improvements + example_regressions
+            contexts = smallest_contexts + top_improvements + top_regressions + top_improvements_pct + top_regressions_pct + smallest_zero_size_contexts + example_improvements + example_regressions
             examples = example_improvements + example_regressions
 
         final_contexts_indices = list(set(int(r["Context"]) for r in contexts))
@@ -2872,8 +3010,7 @@ def replay_with_throughput_diff(self):
                     command_string = " ".join(command)
                     logging.debug("'%s': Error return code: %s", command_string, return_code)
 
-                details = read_csv(detailed_info_file)
-                (base_metrics, diff_metrics) = aggregate_diff_metrics(details)
+                (base_metrics, diff_metrics, _) = aggregate_diff_metrics(detailed_info_file)
 
                 if base_metrics is not None and diff_metrics is not None:
                     base_instructions = base_metrics["Overall"]["Diff executed instructions"]
@@ -3540,7 +3677,7 @@ def filter_local_path(path):
 
 
 def process_mch_files_arg(coreclr_args):
-    """ Process the -mch_files argument. If the argument is not specified, then download files
+    r""" Process the -mch_files argument. If the argument is not specified, then download files
         from Azure Storage and any specified private MCH stores.
 
         Any files on UNC (i.e., "\\server\share" paths on Windows) or Azure Storage stores,
@@ -4360,6 +4497,11 @@ def verify_replay_common_args():
                             lambda unused: True,
                             "Method context not valid")
 
+        coreclr_args.verify(args,
+                            "produce_repro",
+                            lambda unused: True,
+                            "Unable to set produce_repro")
+
         coreclr_args.verify(args,
                             "private_store",
                             lambda item: True,
@@ -4451,6 +4593,11 @@ def verify_base_diff_args():
                             lambda unused: True,
                             "Method context not valid")
 
+        coreclr_args.verify(args,
+                            "produce_repro",  # The replay code checks this, so make sure it's set
+                            lambda unused: True,
+                            "Unable to set produce_repro")
+
         coreclr_args.verify(args,
                             "collection_command",
                             lambda unused: True,
@@ -4551,9 +4698,9 @@ def verify_base_diff_args():
                             "Unable to set clean.")
 
         coreclr_args.verify(args,
-                            "use_zapdisable",
+                            "disable_r2r",
                             lambda unused: True,
-                            "Unable to set use_zapdisable")
+                            "Unable to set disable_r2r")
 
         coreclr_args.verify(args,
                             "tiered_compilation",
diff --git a/src/coreclr/tools/Common/Compiler/CompilerTypeSystemContext.Validation.cs b/src/coreclr/tools/Common/Compiler/CompilerTypeSystemContext.Validation.cs
index 9326fe5978d6..6c0b5f9fe2e6 100644
--- a/src/coreclr/tools/Common/Compiler/CompilerTypeSystemContext.Validation.cs
+++ b/src/coreclr/tools/Common/Compiler/CompilerTypeSystemContext.Validation.cs
@@ -326,6 +326,12 @@ private static TypeDesc EnsureLoadableTypeUncached(TypeDesc type)
                     return type;
                 }
 
+                // Make sure instantiation length matches the expectation
+                if (type.Instantiation.Length != type.GetTypeDefinition().Instantiation.Length)
+                {
+                    ThrowHelper.ThrowTypeLoadException(ExceptionStringID.ClassLoadGeneral, type);
+                }
+
                 // We need to be able to load interfaces
                 foreach (var intf in type.RuntimeInterfaces)
                 {
@@ -343,12 +349,6 @@ private static TypeDesc EnsureLoadableTypeUncached(TypeDesc type)
                 defType.ComputeInstanceLayout(InstanceLayoutKind.TypeAndFields);
                 defType.ComputeStaticFieldLayout(StaticLayoutKind.StaticRegionSizesAndFields);
 
-                // Make sure instantiation length matches the expectation
-                if (defType.Instantiation.Length != defType.GetTypeDefinition().Instantiation.Length)
-                {
-                    ThrowHelper.ThrowTypeLoadException(ExceptionStringID.ClassLoadGeneral, type);
-                }
-
                 foreach (TypeDesc typeArg in defType.Instantiation)
                 {
                     // ByRefs, pointers, function pointers, and System.Void are never valid instantiation arguments
diff --git a/src/coreclr/tools/Common/Compiler/Dataflow/DynamicallyAccessedMembersBinder.cs b/src/coreclr/tools/Common/Compiler/Dataflow/DynamicallyAccessedMembersBinder.cs
index 6780cb561928..3b317fbd01cd 100644
--- a/src/coreclr/tools/Common/Compiler/Dataflow/DynamicallyAccessedMembersBinder.cs
+++ b/src/coreclr/tools/Common/Compiler/Dataflow/DynamicallyAccessedMembersBinder.cs
@@ -121,6 +121,12 @@ public static IEnumerable<TypeSystemEntity> GetDynamicallyAccessedMembers(this T
                 foreach (var e in typeDefinition.GetEventsOnTypeHierarchy(filter: null, bindingFlags: BindingFlags.Public | declaredOnlyFlags))
                     yield return e;
             }
+
+            if (memberTypes.HasFlag(DynamicallyAccessedMemberTypes.Interfaces))
+            {
+                foreach (DefType iface in typeDefinition.GetAllInterfaceImplementations(declaredOnly))
+                    yield return iface;
+            }
         }
 
         public static IEnumerable<MethodDesc> GetConstructorsOnType(this TypeDesc type, Func<MethodDesc, bool> filter, BindingFlags? bindingFlags = null)
diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs
index 3d2441891fbf..7585ca75fa8b 100644
--- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs
+++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs
@@ -487,8 +487,11 @@ public static unsafe void WriteValue(RelocType relocType, void* location, long v
                 case RelocType.IMAGE_REL_TPOFF:
                 case RelocType.IMAGE_REL_SYMBOL_SIZE:
                 case RelocType.IMAGE_REL_FILE_ABSOLUTE:
+                case RelocType.IMAGE_REL_AARCH64_TLSDESC_CALL:
                     *(int*)location = (int)value;
                     break;
+                case RelocType.IMAGE_REL_AARCH64_TLSDESC_LD64_LO12:
+                    break;
                 case RelocType.IMAGE_REL_BASED_DIR64:
                     *(long*)location = value;
                     break;
@@ -503,9 +506,11 @@ public static unsafe void WriteValue(RelocType relocType, void* location, long v
                     PutArm64Rel28((uint*)location, value);
                     break;
                 case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21:
+                case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21:
                     PutArm64Rel21((uint*)location, (int)value);
                     break;
                 case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A:
+                case RelocType.IMAGE_REL_AARCH64_TLSDESC_ADD_LO12:
                     PutArm64Rel12((uint*)location, (int)value);
                     break;
                 case RelocType.IMAGE_REL_BASED_LOONGARCH64_PC:
diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM/ARMEmitter.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM/ARMEmitter.cs
index 8562927f8387..0b5db9e65bf0 100644
--- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM/ARMEmitter.cs
+++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM/ARMEmitter.cs
@@ -121,9 +121,41 @@ public void EmitLDR(Register destination, Register source)
         // ldr.w reg, [reg, #offset]
         // reg range: [0..PC]
         // offset range: [-255..4095]
+        //
+        // for offset >= 4096 we do an expansion into:
+        // add.w destination, source, #const
+        // ldr.w destination, [destination, #offset]
         public void EmitLDR(Register destination, Register source, int offset)
         {
             Debug.Assert(IsValidReg(destination) && IsValidReg(source));
+
+            if (offset >= 0x1000)
+            {
+                uint constVal = (uint)offset & ~0xfffu;
+                uint mask32 = 0xff;
+                uint imm8 = 0;
+                int encode = 31; // 11111
+
+                do
+                {
+                    mask32 <<= 1;
+                    if ((constVal & ~mask32) == 0)
+                    {
+                        imm8 = (constVal & mask32) >> (32 - encode);
+                        break;
+                    }
+                    encode--;
+                } while (encode >= 8);
+
+                Debug.Assert(encode >= 8);
+                Debug.Assert((imm8 & 0x80) > 0);
+                Builder.EmitShort((short)(0xF100 + (byte)source + (((byte)encode & 0x10) << 6)));
+                Builder.EmitShort((short)((((byte)encode & 0xE) << 11) + ((byte)destination << 8) + (((byte)encode & 1) << 7) + (imm8 & 0x7f)));
+
+                offset = (int)(offset & 0xfffu);
+                source = destination;
+            }
+
             Debug.Assert(offset >= -255 && offset <= 4095);
             if (offset >= 0)
             {
diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_X86/X86Emitter.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_X86/X86Emitter.cs
index e386e9a34da6..8601f35180e4 100644
--- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_X86/X86Emitter.cs
+++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_X86/X86Emitter.cs
@@ -17,6 +17,37 @@ public X86Emitter(NodeFactory factory, bool relocsOnly)
         public ObjectDataBuilder Builder;
         public TargetRegisterMap TargetRegister;
 
+        public void EmitMOV(ref AddrMode addrMode, Register reg)
+        {
+            Debug.Assert(addrMode.Size != AddrModeSize.Int8 && addrMode.Size != AddrModeSize.Int16);
+            EmitIndirInstruction(0x89, (byte)reg, ref addrMode);
+        }
+
+        public void EmitMOV(Register reg, ref AddrMode addrMode)
+        {
+            Debug.Assert(addrMode.Size != AddrModeSize.Int8 && addrMode.Size != AddrModeSize.Int16);
+            EmitIndirInstruction(0x8B, (byte)reg, ref addrMode);
+        }
+
+        public void EmitMOV(ref AddrMode addrMode, ISymbolNode symbol)
+        {
+            if (symbol.RepresentsIndirectionCell)
+            {
+                throw new NotImplementedException();
+            }
+            else
+            {
+                EmitIndirInstruction(0xC7, (byte)0, ref addrMode);
+                Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_HIGHLOW);
+            }
+        }
+
+        public void EmitLEA(Register reg, ref AddrMode addrMode)
+        {
+            Debug.Assert(addrMode.Size != AddrModeSize.Int8 && addrMode.Size != AddrModeSize.Int16);
+            EmitIndirInstruction(0x8D, (byte)reg, ref addrMode);
+        }
+
         public void EmitCMP(ref AddrMode addrMode, sbyte immediate)
         {
             if (addrMode.Size == AddrModeSize.Int16)
@@ -48,18 +79,55 @@ public void EmitJMP(ISymbolNode symbol)
             }
         }
 
+        public void EmitJE(ISymbolNode symbol)
+        {
+            if (symbol.RepresentsIndirectionCell)
+            {
+                throw new NotImplementedException();
+            }
+            else
+            {
+                Builder.EmitByte(0x0f);
+                Builder.EmitByte(0x84);
+                Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_REL32);
+            }
+        }
+
         public void EmitXOR(Register register1, Register register2)
         {
             Builder.EmitByte(0x33);
             Builder.EmitByte((byte)(0xC0 | ((byte)register1 << 3) | (byte)register2));
         }
 
+        public void EmitZeroReg(Register reg)
+        {
+            EmitXOR(reg, reg);
+        }
+
+        public void EmitPOP(Register reg)
+        {
+            Builder.EmitByte((byte)(0x58 + (byte)reg));
+        }
+
+        public void EmitStackDup()
+        {
+            // PUSH [ESP]
+            Builder.EmitByte(0xff);
+            Builder.EmitByte(0x34);
+            Builder.EmitByte(0x24);
+        }
+
         public void EmitPUSH(sbyte imm8)
         {
             Builder.EmitByte(0x6A);
             Builder.EmitByte(unchecked((byte)imm8));
         }
 
+        public void EmitPUSH(Register reg)
+        {
+            Builder.EmitByte((byte)(0x50 + (byte)reg));
+        }
+
         public void EmitPUSH(ISymbolNode node)
         {
             if (node.RepresentsIndirectionCell)
@@ -103,6 +171,11 @@ public void EmitINT3()
             Builder.EmitByte(0xCC);
         }
 
+        public void EmitJmpToAddrMode(ref AddrMode addrMode)
+        {
+            EmitIndirInstruction(0xFF, 0x4, ref addrMode);
+        }
+
         public void EmitRET()
         {
             Builder.EmitByte(0xC3);
diff --git a/src/coreclr/tools/Common/Compiler/DevirtualizationManager.cs b/src/coreclr/tools/Common/Compiler/DevirtualizationManager.cs
index 6222ec6d649c..5574a9f8aa10 100644
--- a/src/coreclr/tools/Common/Compiler/DevirtualizationManager.cs
+++ b/src/coreclr/tools/Common/Compiler/DevirtualizationManager.cs
@@ -203,13 +203,17 @@ protected virtual MethodDesc ResolveVirtualMethod(MethodDesc declMethod, DefType
 
 #if !READYTORUN
         /// <summary>
-        /// Gets a value indicating whether it might be possible to obtain a constructed type data structure for the given type.
+        /// Gets a value indicating whether it might be possible to obtain a constructed type data structure for the given type
+        /// in this compilation (i.e. is it possible to reference a constructed MethodTable symbol for this).
         /// </summary>
-        /// <remarks>
-        /// This is a bit of a hack, but devirtualization manager has a global view of all allocated types
-        /// so it can answer this question.
-        /// </remarks>
-        public virtual bool CanConstructType(TypeDesc type) => true;
+        public virtual bool CanReferenceConstructedMethodTable(TypeDesc type) => true;
+
+        /// <summary>
+        /// Gets a value indicating whether a (potentially canonically-equlivalent) constructed MethodTable could
+        /// exist. This is similar to <see cref="CanReferenceConstructedMethodTable"/>, but will return true
+        /// for List&lt;__Canon&gt; if a constructed MethodTable for List&lt;object&gt; exists.
+        /// </summary>
+        public virtual bool CanReferenceConstructedTypeOrCanonicalFormOfType(TypeDesc type) => true;
 
         public virtual TypeDesc[] GetImplementingClasses(TypeDesc type) => null;
 #endif
diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs
index c30736fcb601..98d06568878d 100644
--- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs
+++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs
@@ -16,7 +16,22 @@ public static partial class HardwareIntrinsicHelpers
         /// </summary>
         public static bool IsHardwareIntrinsic(MethodDesc method)
         {
-            return !string.IsNullOrEmpty(InstructionSetSupport.GetHardwareIntrinsicId(method.Context.Target.Architecture, method.OwningType));
+            // Matches logic in
+            // https://github.com/dotnet/runtime/blob/5c40bb5636b939fb548492fdeb9d501b599ac5f5/src/coreclr/vm/methodtablebuilder.cpp#L1491-L1512
+            TypeDesc owningType = method.OwningType;
+            if (owningType.IsIntrinsic && !owningType.HasInstantiation)
+            {
+                var owningMdType = (MetadataType)owningType;
+                string ns = owningMdType.ContainingType?.Namespace ?? owningMdType.Namespace;
+                return method.Context.Target.Architecture switch
+                {
+                    TargetArchitecture.ARM64 => ns == "System.Runtime.Intrinsics.Arm",
+                    TargetArchitecture.X64 or TargetArchitecture.X86 => ns == "System.Runtime.Intrinsics.X86",
+                    _ => false,
+                };
+            }
+
+            return false;
         }
 
         public static void AddRuntimeRequiredIsaFlagsToBuilder(InstructionSetSupportBuilder builder, int flags)
@@ -69,6 +84,9 @@ private static class XArchIntrinsicConstants
             public const int VectorT128 = 0x4000000;
             public const int VectorT256 = 0x8000000;
             public const int VectorT512 = 0x10000000;
+            public const int Avx10v1 = 0x20000000;
+            public const int Avx10v1_v256 = 0x40000000;
+            public const int Avx10v1_v512 = unchecked((int)0x80000000);
 
             public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
             {
@@ -124,6 +142,12 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
                     builder.AddSupportedInstructionSet("avx512vbmi_vl");
                 if ((flags & Serialize) != 0)
                     builder.AddSupportedInstructionSet("serialize");
+                if ((flags & Avx10v1) != 0)
+                    builder.AddSupportedInstructionSet("avx10v1");
+                if ((flags & Avx10v1_v256) != 0)
+                    builder.AddSupportedInstructionSet("avx10v1_v256");
+                if ((flags & Avx10v1_v512) != 0)
+                    builder.AddSupportedInstructionSet("avx10v1_v512");
             }
 
             public static int FromInstructionSet(InstructionSet instructionSet)
@@ -187,6 +211,12 @@ public static int FromInstructionSet(InstructionSet instructionSet)
                     InstructionSet.X64_AVX512VBMI_VL_X64 => Avx512Vbmi_vl,
                     InstructionSet.X64_X86Serialize => Serialize,
                     InstructionSet.X64_X86Serialize_X64 => Serialize,
+                    InstructionSet.X64_AVX10v1 => Avx10v1,
+                    InstructionSet.X64_AVX10v1_X64 => Avx10v1,
+                    InstructionSet.X64_AVX10v1_V256 => Avx10v1_v256,
+                    InstructionSet.X64_AVX10v1_V256_X64 => Avx10v1_v256,
+                    InstructionSet.X64_AVX10v1_V512 => Avx10v1_v512,
+                    InstructionSet.X64_AVX10v1_V512_X64 => Avx10v1_v512,
 
                     // Baseline ISAs - they're always available
                     InstructionSet.X64_SSE => 0,
diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs
index 72d2427e2a86..fde027a32622 100644
--- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs
+++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs
@@ -10,6 +10,12 @@
 
 namespace ILCompiler
 {
+    [Flags]
+    public enum InstructionSetSupportFlags
+    {
+        Vector512Throttling = 0x1,
+    }
+
     public class InstructionSetSupport
     {
         private readonly TargetArchitecture _targetArchitecture;
@@ -17,19 +23,21 @@ public class InstructionSetSupport
         private readonly InstructionSetFlags _supportedInstructionSets;
         private readonly InstructionSetFlags _unsupportedInstructionSets;
         private readonly InstructionSetFlags _nonSpecifiableInstructionSets;
+        private readonly InstructionSetSupportFlags _flags;
 
         public InstructionSetSupport(InstructionSetFlags supportedInstructionSets, InstructionSetFlags unsupportedInstructionSets, TargetArchitecture architecture) :
             this(supportedInstructionSets, unsupportedInstructionSets, supportedInstructionSets, default(InstructionSetFlags), architecture)
         {
         }
 
-        public InstructionSetSupport(InstructionSetFlags supportedInstructionSets, InstructionSetFlags unsupportedInstructionSets, InstructionSetFlags optimisticInstructionSets, InstructionSetFlags nonSpecifiableInstructionSets, TargetArchitecture architecture)
+        public InstructionSetSupport(InstructionSetFlags supportedInstructionSets, InstructionSetFlags unsupportedInstructionSets, InstructionSetFlags optimisticInstructionSets, InstructionSetFlags nonSpecifiableInstructionSets, TargetArchitecture architecture, InstructionSetSupportFlags flags = 0)
         {
             _supportedInstructionSets = supportedInstructionSets;
             _unsupportedInstructionSets = unsupportedInstructionSets;
             _optimisticInstructionSets = optimisticInstructionSets;
             _targetArchitecture = architecture;
             _nonSpecifiableInstructionSets = nonSpecifiableInstructionSets;
+            _flags = flags;
         }
 
         public bool IsInstructionSetSupported(InstructionSet instructionSet)
@@ -54,6 +62,8 @@ public bool IsInstructionSetExplicitlyUnsupported(InstructionSet instructionSet)
 
         public TargetArchitecture Architecture => _targetArchitecture;
 
+        public InstructionSetSupportFlags Flags => _flags;
+
         public static string GetHardwareIntrinsicId(TargetArchitecture architecture, TypeDesc potentialTypeDesc)
         {
             if (!potentialTypeDesc.IsIntrinsic || !(potentialTypeDesc is MetadataType potentialType))
@@ -70,8 +80,6 @@ public static string GetHardwareIntrinsicId(TargetArchitecture architecture, Typ
             }
             else if (architecture == TargetArchitecture.X86)
             {
-                if (potentialType.Name == "X64")
-                    potentialType = (MetadataType)potentialType.ContainingType;
                 if (potentialType.Name == "VL")
                     potentialType = (MetadataType)potentialType.ContainingType;
                 if (potentialType.Namespace != "System.Runtime.Intrinsics.X86")
@@ -99,7 +107,7 @@ public static string GetHardwareIntrinsicId(TargetArchitecture architecture, Typ
             }
             else
             {
-                throw new InternalCompilerErrorException("Unknown architecture");
+                throw new InternalCompilerErrorException($"Unknown architecture '{architecture}'");
             }
 
             return potentialType.Name;
@@ -173,7 +181,7 @@ public class InstructionSetSupportBuilder
         private static Dictionary<TargetArchitecture, Dictionary<string, InstructionSet>> ComputeInstructionSetSupport()
         {
             var supportMatrix = new Dictionary<TargetArchitecture, Dictionary<string, InstructionSet>>();
-            foreach (TargetArchitecture arch in Enum.GetValues(typeof(TargetArchitecture)))
+            foreach (TargetArchitecture arch in Enum.GetValues<TargetArchitecture>())
             {
                 supportMatrix[arch] = ComputeInstructSetSupportForArch(arch);
             }
@@ -184,7 +192,7 @@ private static Dictionary<TargetArchitecture, Dictionary<string, InstructionSet>
         private static Dictionary<TargetArchitecture, InstructionSetFlags> ComputeNonSpecifiableInstructionSetSupport()
         {
             var matrix = new Dictionary<TargetArchitecture, InstructionSetFlags>();
-            foreach (TargetArchitecture arch in Enum.GetValues(typeof(TargetArchitecture)))
+            foreach (TargetArchitecture arch in Enum.GetValues<TargetArchitecture>())
             {
                 matrix[arch] = ComputeNonSpecifiableInstructionSetSupportForArch(arch);
             }
diff --git a/src/coreclr/tools/Common/Compiler/NativeAotNameMangler.cs b/src/coreclr/tools/Common/Compiler/NativeAotNameMangler.cs
index 48c0903f5e68..844cf58f20bd 100644
--- a/src/coreclr/tools/Common/Compiler/NativeAotNameMangler.cs
+++ b/src/coreclr/tools/Common/Compiler/NativeAotNameMangler.cs
@@ -108,7 +108,7 @@ private string SanitizeNameWithHash(string literal)
                     hash = SHA256.HashData(GetBytesFromString(literal));
                 }
 
-                mangledName += "_" + BitConverter.ToString(hash).Replace("-", "");
+                mangledName += "_" + Convert.ToHexString(hash);
             }
 
             return mangledName;
diff --git a/src/coreclr/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs
index 33d894f75893..bb0a5bc71a62 100644
--- a/src/coreclr/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs
+++ b/src/coreclr/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs
@@ -52,12 +52,19 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp
                     // to the same alignment as __m128, which is supported by the ABI.
                     alignment = new LayoutInt(8);
                 }
-                else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64 || defType.Context.Target.Architecture == TargetArchitecture.RiscV64)
+                else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64)
                 {
                     // The Procedure Call Standard for ARM 64-bit (with SVE support) defaults to
                     // 16-byte alignment for __m256.
                     alignment = new LayoutInt(16);
                 }
+                else if (defType.Context.Target.Architecture == TargetArchitecture.RiscV64)
+                {
+                    // TODO-RISCV64: Update alignment to proper value when we implement RISC-V intrinsic.
+                    // RISC-V Vector Extenstion Intrinsic Document
+                    // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/vector_type_infos.adoc
+                    alignment = new LayoutInt(16);
+                }
                 else
                 {
                     alignment = new LayoutInt(32);
@@ -73,12 +80,19 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp
                     // to the same alignment as __m128, which is supported by the ABI.
                     alignment = new LayoutInt(8);
                 }
-                else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64 || defType.Context.Target.Architecture == TargetArchitecture.RiscV64)
+                else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64)
                 {
                     // The Procedure Call Standard for ARM 64-bit (with SVE support) defaults to
                     // 16-byte alignment for __m256.
                     alignment = new LayoutInt(16);
                 }
+                else if (defType.Context.Target.Architecture == TargetArchitecture.RiscV64)
+                {
+                    // TODO-RISCV64: Update alignment to proper value when we implement RISC-V intrinsic.
+                    // RISC-V Vector Extenstion Intrinsic Document
+                    // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/vector_type_infos.adoc
+                    alignment = new LayoutInt(16);
+                }
                 else
                 {
                     alignment = new LayoutInt(64);
diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs
index e3236467f931..718053f13d0c 100644
--- a/src/coreclr/tools/Common/InstructionSetHelpers.cs
+++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs
@@ -4,6 +4,7 @@
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
 
 using ILCompiler;
 
@@ -19,6 +20,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
             string mustNotBeMessage, string invalidImplicationMessage, Logger logger, bool optimizingForSize = false)
         {
             InstructionSetSupportBuilder instructionSetSupportBuilder = new(targetArchitecture);
+            InstructionSetSupportFlags flags = 0;
 
             // Ready to run images are built with certain instruction set baselines
             if ((targetArchitecture == TargetArchitecture.X86) || (targetArchitecture == TargetArchitecture.X64))
@@ -63,6 +65,52 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
                 }
                 HardwareIntrinsicHelpers.AddRuntimeRequiredIsaFlagsToBuilder(instructionSetSupportBuilder, cpuFeatures);
 
+                if (targetArchitecture is TargetArchitecture.X64 or TargetArchitecture.X86)
+                {
+                    // Some architectures can experience frequency throttling when executing
+                    // 512-bit width instructions. To account for this we set the
+                    // default preferred vector width to 256-bits in some scenarios.
+                    (int Eax, int Ebx, int Ecx, int Edx) cpuidInfo = X86Base.CpuId(0, 0);
+                    bool isGenuineIntel = (cpuidInfo.Ebx == 0x756E6547) && // Genu
+                                          (cpuidInfo.Edx == 0x49656E69) && // ineI
+                                          (cpuidInfo.Ecx == 0x6C65746E);   // ntel
+                    if (isGenuineIntel)
+                    {
+                        cpuidInfo = X86Base.CpuId(1, 0);
+                        Debug.Assert((cpuidInfo.Edx & (1 << 15)) != 0); // CMOV
+                        int model = (cpuidInfo.Eax >> 4) & 0xF;
+                        int family = (cpuidInfo.Eax >> 8) & 0xF;
+                        int extendedModel = (cpuidInfo.Eax >> 16) & 0xF;
+
+                        if (family == 0x06)
+                        {
+                            if (extendedModel == 0x05)
+                            {
+                                if (model == 0x05)
+                                {
+                                    // * Skylake (Server)
+                                    // * Cascade Lake
+                                    // * Cooper Lake
+
+                                    flags |= InstructionSetSupportFlags.Vector512Throttling;
+                                }
+                            }
+                            else if (extendedModel == 0x06)
+                            {
+                                if (model == 0x06)
+                                {
+                                    // * Cannon Lake
+
+                                    flags |= InstructionSetSupportFlags.Vector512Throttling;
+                                }
+                            }
+                        }
+                    }
+
+                    if ((flags & InstructionSetSupportFlags.Vector512Throttling) != 0 && logger.IsVerbose)
+                        logger.LogMessage("Vector512 is throttled");
+                }
+
                 if (logger.IsVerbose)
                     logger.LogMessage($"The 'native' instruction set expanded to {instructionSetSupportBuilder}");
             }
@@ -198,7 +246,8 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
                 unsupportedInstructionSet,
                 optimisticInstructionSet,
                 InstructionSetSupportBuilder.GetNonSpecifiableInstructionSetsForArch(targetArchitecture),
-                targetArchitecture);
+                targetArchitecture,
+                flags);
         }
     }
 }
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
index 01071442f962..6fc5d9542e16 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
@@ -16,7 +16,7 @@ internal struct ReadyToRunHeaderConstants
         public const uint Signature = 0x00525452; // 'RTR'
 
         public const ushort CurrentMajorVersion = 9;
-        public const ushort CurrentMinorVersion = 1;
+        public const ushort CurrentMinorVersion = 2;
     }
 #if READYTORUN
 #pragma warning disable 0169
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
index 6ef8efa3e0b8..c6b11a70fff0 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs
@@ -57,7 +57,6 @@ public enum ReadyToRunMethodSigFlags : byte
     [Flags]
     public enum ReadyToRunFieldSigFlags : byte
     {
-        READYTORUN_FIELD_SIG_IndexInsteadOfToken = 0x08,
         READYTORUN_FIELD_SIG_MemberRefToken = 0x10,
         READYTORUN_FIELD_SIG_OwnerType = 0x40,
     }
@@ -239,7 +238,9 @@ public enum ReadyToRunHelper
         Stelem_Ref                  = 0x38,
         Ldelema_Ref                 = 0x39,
 
-        MemSet                      = 0x40,
+        MemZero                     = 0x3E,
+        MemSet                      = 0x3F,
+        NativeMemSet                = 0x40,
         MemCpy                      = 0x41,
 
         // P/Invoke support
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs
index 284131d1641a..0e2fec09e19d 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs
@@ -55,6 +55,9 @@ public enum ReadyToRunInstructionSet
         VectorT512=41,
         Rcpc2=42,
         Sve=43,
+        Avx10v1=44,
+        Avx10v1_V256=45,
+        Avx10v1_V512=46,
 
     }
 }
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs
index 7000b482ec60..533b017e2bae 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs
@@ -118,6 +118,12 @@ public static class ReadyToRunInstructionSetHelper
                             case InstructionSet.X64_AVX512VBMI_X64: return ReadyToRunInstructionSet.Avx512Vbmi;
                             case InstructionSet.X64_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
                             case InstructionSet.X64_AVX512VBMI_VL_X64: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
+                            case InstructionSet.X64_AVX10v1: return ReadyToRunInstructionSet.Avx10v1;
+                            case InstructionSet.X64_AVX10v1_X64: return ReadyToRunInstructionSet.Avx10v1;
+                            case InstructionSet.X64_AVX10v1_V256: return ReadyToRunInstructionSet.Avx10v1_V256;
+                            case InstructionSet.X64_AVX10v1_V256_X64: return ReadyToRunInstructionSet.Avx10v1_V256;
+                            case InstructionSet.X64_AVX10v1_V512: return ReadyToRunInstructionSet.Avx10v1_V512;
+                            case InstructionSet.X64_AVX10v1_V512_X64: return ReadyToRunInstructionSet.Avx10v1_V512;
                             case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128;
                             case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256;
                             case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512;
@@ -191,6 +197,12 @@ public static class ReadyToRunInstructionSetHelper
                             case InstructionSet.X86_AVX512VBMI_X64: return null;
                             case InstructionSet.X86_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
                             case InstructionSet.X86_AVX512VBMI_VL_X64: return null;
+                            case InstructionSet.X86_AVX10v1: return ReadyToRunInstructionSet.Avx10v1;
+                            case InstructionSet.X86_AVX10v1_X64: return null;
+                            case InstructionSet.X86_AVX10v1_V256: return ReadyToRunInstructionSet.Avx10v1_V256;
+                            case InstructionSet.X86_AVX10v1_V256_X64: return null;
+                            case InstructionSet.X86_AVX10v1_V512: return ReadyToRunInstructionSet.Avx10v1_V512;
+                            case InstructionSet.X86_AVX10v1_V512_X64: return null;
                             case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128;
                             case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256;
                             case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512;
diff --git a/src/coreclr/tools/Common/Internal/Runtime/RuntimeConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/RuntimeConstants.cs
index 1c4a195a21d3..8ce74a03072b 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/RuntimeConstants.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/RuntimeConstants.cs
@@ -73,5 +73,8 @@ internal static class MethodFixupCellFlagsConstants
         public const int IsObjectiveCMessageSendMask = 0x8;
         public const int ObjectiveCMessageSendFunctionMask = 0x70;
         public const int ObjectiveCMessageSendFunctionShift = 4;
+        // Uses the same bit as IsObjectiveCMessageSendMask since we never have
+        // TARGET_X86 and FEATURE_OBJCMARSHAL used at the same time.
+        public const int IsStdcall = 0x8;
     }
 }
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
index 25c35d9c611d..a4a6cb925814 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
@@ -215,7 +215,10 @@ which is the right helper to use to allocate an object of a given type. */
         CORINFO_HELP_INIT_PINVOKE_FRAME,   // initialize an inlined PInvoke Frame for the JIT-compiler
 
         CORINFO_HELP_MEMSET,                // Init block of memory
+        CORINFO_HELP_MEMZERO,               // Init block of memory with zeroes
         CORINFO_HELP_MEMCPY,                // Copy block of memory
+        CORINFO_HELP_NATIVE_MEMSET,         // Init block of memory using native memset (not safe for pDst being null,
+                                            // not safe for unbounded size, does not trigger GC)
 
         CORINFO_HELP_RUNTIMEHANDLE_METHOD,  // determine a type/field/method handle at run-time
         CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG, // determine a type/field/method handle at run-time, with IBC logging
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs
index 22444a14efbe..f4f381a6df65 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs
@@ -352,9 +352,11 @@ private CompilationResult CompileMethodInternal(IMethodNode methodCodeNodeNeedin
             IntPtr exception;
             IntPtr nativeEntry;
             uint codeSize;
+#pragma warning disable CS8500 // takes address of managed type
             var result = JitCompileMethod(out exception,
-                    _jit, (IntPtr)Unsafe.AsPointer(ref _this), _unmanagedCallbacks,
+                    _jit, (IntPtr)(&_this), _unmanagedCallbacks,
                     ref methodInfo, (uint)CorJitFlag.CORJIT_FLAG_CALL_GETJITFLAGS, out nativeEntry, out codeSize);
+#pragma warning restore CS8500
             if (exception != IntPtr.Zero)
             {
                 if (_lastException != null)
@@ -3158,6 +3160,12 @@ private void reportRichMappings(InlineTreeNode* inlineTree, uint numInlineTree,
             Marshal.FreeHGlobal((IntPtr)mappings);
         }
 
+#pragma warning disable CA1822 // Mark members as static
+        private void reportMetadata(byte* key, void* value, nuint length)
+#pragma warning restore CA1822 // Mark members as static
+        {
+        }
+
 #pragma warning disable CA1822 // Mark members as static
         private void* allocateArray(UIntPtr cBytes)
 #pragma warning restore CA1822 // Mark members as static
@@ -3279,7 +3287,7 @@ private void getEEInfo(ref CORINFO_EE_INFO pEEInfoOut)
 
             pEEInfoOut.inlinedCallFrameInfo.size = (uint)SizeOfPInvokeTransitionFrame;
 
-            pEEInfoOut.offsetOfDelegateInstance = (uint)pointerSize;            // Delegate::m_firstParameter
+            pEEInfoOut.offsetOfDelegateInstance = (uint)pointerSize;            // Delegate::_firstParameter
             pEEInfoOut.offsetOfDelegateFirstTarget = OffsetOfDelegateFirstTarget;
 
             pEEInfoOut.sizeOfReversePInvokeFrame = (uint)SizeOfReversePInvokeTransitionFrame;
@@ -3433,6 +3441,11 @@ private bool getSystemVAmd64PassStructInRegisterDescriptor(CORINFO_CLASS_STRUCT_
             return true;
         }
 
+        private void getSwiftLowering(CORINFO_CLASS_STRUCT_* structHnd, ref CORINFO_SWIFT_LOWERING lowering)
+        {
+            lowering = SwiftPhysicalLowering.LowerTypeForSwiftSignature(HandleToObject(structHnd));
+        }
+
         private uint getLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_STRUCT_* cls)
         {
             TypeDesc typeDesc = HandleToObject(cls);
@@ -3891,6 +3904,11 @@ private static RelocType GetRelocType(TargetArchitecture targetArchitecture, ush
                     const ushort IMAGE_REL_ARM64_BRANCH26 = 3;
                     const ushort IMAGE_REL_ARM64_PAGEBASE_REL21 = 4;
                     const ushort IMAGE_REL_ARM64_PAGEOFFSET_12A = 6;
+                    const ushort IMAGE_REL_ARM64_TLSDESC_ADR_PAGE21 = 0x107;
+                    const ushort IMAGE_REL_ARM64_TLSDESC_LD64_LO12 = 0x108;
+                    const ushort IMAGE_REL_ARM64_TLSDESC_ADD_LO12 = 0x109;
+                    const ushort IMAGE_REL_ARM64_TLSDESC_CALL = 0x10A;
+
 
                     switch (fRelocType)
                     {
@@ -3900,6 +3918,14 @@ private static RelocType GetRelocType(TargetArchitecture targetArchitecture, ush
                             return RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21;
                         case IMAGE_REL_ARM64_PAGEOFFSET_12A:
                             return RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A;
+                        case IMAGE_REL_ARM64_TLSDESC_ADR_PAGE21:
+                            return RelocType.IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21;
+                        case IMAGE_REL_ARM64_TLSDESC_ADD_LO12:
+                            return RelocType.IMAGE_REL_AARCH64_TLSDESC_ADD_LO12;
+                        case IMAGE_REL_ARM64_TLSDESC_LD64_LO12:
+                            return RelocType.IMAGE_REL_AARCH64_TLSDESC_LD64_LO12;
+                        case IMAGE_REL_ARM64_TLSDESC_CALL:
+                            return RelocType.IMAGE_REL_AARCH64_TLSDESC_CALL;
                         default:
                             Debug.Fail("Invalid RelocType: " + fRelocType);
                             return 0;
@@ -4011,8 +4037,10 @@ private ushort getRelocTypeHint(void* target)
                 case TargetArchitecture.X64:
                     return (ushort)RelocType.IMAGE_REL_BASED_REL32;
 
+#if READYTORUN
                 case TargetArchitecture.ARM:
                     return (ushort)RelocType.IMAGE_REL_BASED_THUMB_BRANCH24;
+#endif
 
                 default:
                     return ushort.MaxValue;
@@ -4115,6 +4143,9 @@ private uint getJitFlags(ref CORJIT_FLAGS flags, uint sizeInBytes)
                 case TargetArchitecture.X86:
                     Debug.Assert(InstructionSet.X86_SSE2 == InstructionSet.X64_SSE2);
                     Debug.Assert(_compilation.InstructionSetSupport.IsInstructionSetSupported(InstructionSet.X86_SSE2));
+
+                    if ((_compilation.InstructionSetSupport.Flags & InstructionSetSupportFlags.Vector512Throttling) != 0)
+                        flags.Set(CorJitFlag.CORJIT_FLAG_VECTOR512_THROTTLING);
                     break;
 
                 case TargetArchitecture.ARM64:
@@ -4269,7 +4300,7 @@ private HRESULT getPgoInstrumentationResults(CORINFO_METHOD_STRUCT_* ftnHnd, ref
 #pragma warning disable SA1001, SA1113, SA1115 // Commas should be spaced correctly
                     ComputeJitPgoInstrumentationSchema(ObjectToHandle, pgoResultsSchemas, out var nativeSchemas, _cachedMemoryStream
 #if !READYTORUN
-                        , _compilation.CanConstructType
+                        , _compilation.CanReferenceConstructedMethodTable
 #endif
                         );
 #pragma warning restore SA1001, SA1113, SA1115 // Commas should be spaced correctly
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs
index 13844628bb86..09ea54f989fd 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs
@@ -291,12 +291,12 @@ private static byte _resolveVirtualMethod(IntPtr thisHandle, IntPtr* ppException
         }
 
         [UnmanagedCallersOnly]
-        private static void _expandRawHandleIntrinsic(IntPtr thisHandle, IntPtr* ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_GENERICHANDLE_RESULT* pResult)
+        private static void _expandRawHandleIntrinsic(IntPtr thisHandle, IntPtr* ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_METHOD_STRUCT_* callerHandle, CORINFO_GENERICHANDLE_RESULT* pResult)
         {
             var _this = GetThis(thisHandle);
             try
             {
-                _this.expandRawHandleIntrinsic(ref *pResolvedToken, ref *pResult);
+                _this.expandRawHandleIntrinsic(ref *pResolvedToken, callerHandle, ref *pResult);
             }
             catch (Exception ex)
             {
@@ -1033,12 +1033,12 @@ private static byte _getStringChar(IntPtr thisHandle, IntPtr* ppException, CORIN
         }
 
         [UnmanagedCallersOnly]
-        private static byte _getReadyToRunHelper(IntPtr thisHandle, IntPtr* ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_LOOKUP_KIND* pGenericLookupKind, CorInfoHelpFunc id, CORINFO_CONST_LOOKUP* pLookup)
+        private static byte _getReadyToRunHelper(IntPtr thisHandle, IntPtr* ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_LOOKUP_KIND* pGenericLookupKind, CorInfoHelpFunc id, CORINFO_METHOD_STRUCT_* callerHandle, CORINFO_CONST_LOOKUP* pLookup)
         {
             var _this = GetThis(thisHandle);
             try
             {
-                return _this.getReadyToRunHelper(ref *pResolvedToken, ref *pGenericLookupKind, id, ref *pLookup) ? (byte)1 : (byte)0;
+                return _this.getReadyToRunHelper(ref *pResolvedToken, ref *pGenericLookupKind, id, callerHandle, ref *pLookup) ? (byte)1 : (byte)0;
             }
             catch (Exception ex)
             {
@@ -1048,12 +1048,12 @@ private static byte _getReadyToRunHelper(IntPtr thisHandle, IntPtr* ppException,
         }
 
         [UnmanagedCallersOnly]
-        private static void _getReadyToRunDelegateCtorHelper(IntPtr thisHandle, IntPtr* ppException, CORINFO_RESOLVED_TOKEN* pTargetMethod, mdToken targetConstraint, CORINFO_CLASS_STRUCT_* delegateType, CORINFO_LOOKUP* pLookup)
+        private static void _getReadyToRunDelegateCtorHelper(IntPtr thisHandle, IntPtr* ppException, CORINFO_RESOLVED_TOKEN* pTargetMethod, mdToken targetConstraint, CORINFO_CLASS_STRUCT_* delegateType, CORINFO_METHOD_STRUCT_* callerHandle, CORINFO_LOOKUP* pLookup)
         {
             var _this = GetThis(thisHandle);
             try
             {
-                _this.getReadyToRunDelegateCtorHelper(ref *pTargetMethod, targetConstraint, delegateType, ref *pLookup);
+                _this.getReadyToRunDelegateCtorHelper(ref *pTargetMethod, targetConstraint, delegateType, callerHandle, ref *pLookup);
             }
             catch (Exception ex)
             {
@@ -1547,6 +1547,20 @@ private static void _reportRichMappings(IntPtr thisHandle, IntPtr* ppException,
             }
         }
 
+        [UnmanagedCallersOnly]
+        private static void _reportMetadata(IntPtr thisHandle, IntPtr* ppException, byte* key, void* value, UIntPtr length)
+        {
+            var _this = GetThis(thisHandle);
+            try
+            {
+                _this.reportMetadata(key, value, length);
+            }
+            catch (Exception ex)
+            {
+                *ppException = _this.AllocException(ex);
+            }
+        }
+
         [UnmanagedCallersOnly]
         private static void* _allocateArray(IntPtr thisHandle, IntPtr* ppException, UIntPtr cBytes)
         {
@@ -1785,6 +1799,20 @@ private static byte _getSystemVAmd64PassStructInRegisterDescriptor(IntPtr thisHa
             }
         }
 
+        [UnmanagedCallersOnly]
+        private static void _getSwiftLowering(IntPtr thisHandle, IntPtr* ppException, CORINFO_CLASS_STRUCT_* structHnd, CORINFO_SWIFT_LOWERING* pLowering)
+        {
+            var _this = GetThis(thisHandle);
+            try
+            {
+                _this.getSwiftLowering(structHnd, ref *pLowering);
+            }
+            catch (Exception ex)
+            {
+                *ppException = _this.AllocException(ex);
+            }
+        }
+
         [UnmanagedCallersOnly]
         private static uint _getLoongArch64PassStructInRegisterFlags(IntPtr thisHandle, IntPtr* ppException, CORINFO_CLASS_STRUCT_* structHnd)
         {
@@ -1979,12 +2007,12 @@ private static CorInfoHelpFunc _getLazyStringLiteralHelper(IntPtr thisHandle, In
         }
 
         [UnmanagedCallersOnly]
-        private static void _embedGenericHandle(IntPtr thisHandle, IntPtr* ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, byte fEmbedParent, CORINFO_GENERICHANDLE_RESULT* pResult)
+        private static void _embedGenericHandle(IntPtr thisHandle, IntPtr* ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, byte fEmbedParent, CORINFO_METHOD_STRUCT_* callerHandle, CORINFO_GENERICHANDLE_RESULT* pResult)
         {
             var _this = GetThis(thisHandle);
             try
             {
-                _this.embedGenericHandle(ref *pResolvedToken, fEmbedParent != 0, ref *pResult);
+                _this.embedGenericHandle(ref *pResolvedToken, fEmbedParent != 0, callerHandle, ref *pResult);
             }
             catch (Exception ex)
             {
@@ -2551,7 +2579,7 @@ private static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_
 
         private static IntPtr GetUnmanagedCallbacks()
         {
-            void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 172);
+            void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 174);
 
             callbacks[0] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, byte>)&_isIntrinsic;
             callbacks[1] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, byte>)&_notifyMethodInfoUsage;
@@ -2572,7 +2600,7 @@ private static IntPtr GetUnmanagedCallbacks()
             callbacks[16] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, bool*, CORINFO_METHOD_STRUCT_*>)&_getUnboxedEntry;
             callbacks[17] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, CORINFO_CLASS_STRUCT_*>)&_getDefaultComparerClass;
             callbacks[18] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, CORINFO_CLASS_STRUCT_*>)&_getDefaultEqualityComparerClass;
-            callbacks[19] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, CORINFO_GENERICHANDLE_RESULT*, void>)&_expandRawHandleIntrinsic;
+            callbacks[19] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, CORINFO_METHOD_STRUCT_*, CORINFO_GENERICHANDLE_RESULT*, void>)&_expandRawHandleIntrinsic;
             callbacks[20] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, byte>)&_isIntrinsicType;
             callbacks[21] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_SIG_INFO*, bool*, CorInfoCallConvExtension>)&_getUnmanagedCallConv;
             callbacks[22] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_SIG_INFO*, byte>)&_pInvokeMarshalingRequired;
@@ -2622,8 +2650,8 @@ private static IntPtr GetUnmanagedCallbacks()
             callbacks[66] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_OBJECT_STRUCT_*, byte>)&_isObjectImmutable;
             callbacks[67] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_OBJECT_STRUCT_*, int, ushort*, byte>)&_getStringChar;
             callbacks[68] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_OBJECT_STRUCT_*, CORINFO_CLASS_STRUCT_*>)&_getObjectType;
-            callbacks[69] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, CORINFO_LOOKUP_KIND*, CorInfoHelpFunc, CORINFO_CONST_LOOKUP*, byte>)&_getReadyToRunHelper;
-            callbacks[70] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, mdToken, CORINFO_CLASS_STRUCT_*, CORINFO_LOOKUP*, void>)&_getReadyToRunDelegateCtorHelper;
+            callbacks[69] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, CORINFO_LOOKUP_KIND*, CorInfoHelpFunc, CORINFO_METHOD_STRUCT_*, CORINFO_CONST_LOOKUP*, byte>)&_getReadyToRunHelper;
+            callbacks[70] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, mdToken, CORINFO_CLASS_STRUCT_*, CORINFO_METHOD_STRUCT_*, CORINFO_LOOKUP*, void>)&_getReadyToRunDelegateCtorHelper;
             callbacks[71] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_FIELD_STRUCT_*, CORINFO_METHOD_STRUCT_*, CORINFO_CONTEXT_STRUCT*, CorInfoInitClassResult>)&_initClass;
             callbacks[72] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, void>)&_classMustBeLoadedBeforeCodeIsRun;
             callbacks[73] = (delegate* unmanaged<IntPtr, IntPtr*, CorInfoClassId, CORINFO_CLASS_STRUCT_*>)&_getBuiltinClass;
@@ -2657,74 +2685,76 @@ private static IntPtr GetUnmanagedCallbacks()
             callbacks[101] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, uint*, ILVarInfo**, bool*, void>)&_getVars;
             callbacks[102] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, uint, NativeVarInfo*, void>)&_setVars;
             callbacks[103] = (delegate* unmanaged<IntPtr, IntPtr*, InlineTreeNode*, uint, RichOffsetMapping*, uint, void>)&_reportRichMappings;
-            callbacks[104] = (delegate* unmanaged<IntPtr, IntPtr*, UIntPtr, void*>)&_allocateArray;
-            callbacks[105] = (delegate* unmanaged<IntPtr, IntPtr*, void*, void>)&_freeArray;
-            callbacks[106] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_ARG_LIST_STRUCT_*, CORINFO_ARG_LIST_STRUCT_*>)&_getArgNext;
-            callbacks[107] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_SIG_INFO*, CORINFO_ARG_LIST_STRUCT_*, CORINFO_CLASS_STRUCT_**, CorInfoTypeWithMod>)&_getArgType;
-            callbacks[108] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, int, CORINFO_CLASS_STRUCT_**, int>)&_getExactClasses;
-            callbacks[109] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_SIG_INFO*, CORINFO_ARG_LIST_STRUCT_*, CORINFO_CLASS_STRUCT_*>)&_getArgClass;
-            callbacks[110] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, CorInfoHFAElemType>)&_getHFAType;
-            callbacks[111] = (delegate* unmanaged<IntPtr, IntPtr*, void*, void*, byte>)&_runWithErrorTrap;
-            callbacks[112] = (delegate* unmanaged<IntPtr, IntPtr*, void*, void*, byte>)&_runWithSPMIErrorTrap;
-            callbacks[113] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_EE_INFO*, void>)&_getEEInfo;
-            callbacks[114] = (delegate* unmanaged<IntPtr, IntPtr*, char*>)&_getJitTimeLogFilename;
-            callbacks[115] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, mdToken>)&_getMethodDefFromMethod;
-            callbacks[116] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, byte*, UIntPtr, UIntPtr*, UIntPtr>)&_printMethodName;
-            callbacks[117] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, byte**, byte**, byte**, byte*>)&_getMethodNameFromMetadata;
-            callbacks[118] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, uint>)&_getMethodHash;
-            callbacks[119] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR*, byte>)&_getSystemVAmd64PassStructInRegisterDescriptor;
-            callbacks[120] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, uint>)&_getLoongArch64PassStructInRegisterFlags;
-            callbacks[121] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, uint>)&_getRISCV64PassStructInRegisterFlags;
-            callbacks[122] = (delegate* unmanaged<IntPtr, IntPtr*, void**, uint>)&_getThreadTLSIndex;
-            callbacks[123] = (delegate* unmanaged<IntPtr, IntPtr*, void**, int*>)&_getAddrOfCaptureThreadGlobal;
-            callbacks[124] = (delegate* unmanaged<IntPtr, IntPtr*, CorInfoHelpFunc, void**, void*>)&_getHelperFtn;
-            callbacks[125] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_CONST_LOOKUP*, CORINFO_ACCESS_FLAGS, void>)&_getFunctionEntryPoint;
-            callbacks[126] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, byte, CORINFO_CONST_LOOKUP*, void>)&_getFunctionFixedEntryPoint;
-            callbacks[127] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, void**, void*>)&_getMethodSync;
-            callbacks[128] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_MODULE_STRUCT_*, CorInfoHelpFunc>)&_getLazyStringLiteralHelper;
-            callbacks[129] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_MODULE_STRUCT_*, void**, CORINFO_MODULE_STRUCT_*>)&_embedModuleHandle;
-            callbacks[130] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, void**, CORINFO_CLASS_STRUCT_*>)&_embedClassHandle;
-            callbacks[131] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, void**, CORINFO_METHOD_STRUCT_*>)&_embedMethodHandle;
-            callbacks[132] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_FIELD_STRUCT_*, void**, CORINFO_FIELD_STRUCT_*>)&_embedFieldHandle;
-            callbacks[133] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, byte, CORINFO_GENERICHANDLE_RESULT*, void>)&_embedGenericHandle;
-            callbacks[134] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_LOOKUP_KIND*, void>)&_getLocationOfThisType;
-            callbacks[135] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_CONST_LOOKUP*, void>)&_getAddressOfPInvokeTarget;
-            callbacks[136] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_SIG_INFO*, void**, void*>)&_GetCookieForPInvokeCalliSig;
-            callbacks[137] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_SIG_INFO*, byte>)&_canGetCookieForPInvokeCalliSig;
-            callbacks[138] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_JUST_MY_CODE_HANDLE_**, CORINFO_JUST_MY_CODE_HANDLE_*>)&_getJustMyCodeHandle;
-            callbacks[139] = (delegate* unmanaged<IntPtr, IntPtr*, bool*, void**, bool*, void>)&_GetProfilingHandle;
-            callbacks[140] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, CORINFO_RESOLVED_TOKEN*, CORINFO_METHOD_STRUCT_*, CORINFO_CALLINFO_FLAGS, CORINFO_CALL_INFO*, void>)&_getCallInfo;
-            callbacks[141] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, void**, uint>)&_getClassDomainID;
-            callbacks[142] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_FIELD_STRUCT_*, byte*, int, int, byte, byte>)&_getStaticFieldContent;
-            callbacks[143] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_OBJECT_STRUCT_*, byte*, int, int, byte>)&_getObjectContent;
-            callbacks[144] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_FIELD_STRUCT_*, byte*, CORINFO_CLASS_STRUCT_*>)&_getStaticFieldCurrentClass;
-            callbacks[145] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_SIG_INFO*, void**, IntPtr>)&_getVarArgsHandle;
-            callbacks[146] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_SIG_INFO*, byte>)&_canGetVarArgsHandle;
-            callbacks[147] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_MODULE_STRUCT_*, mdToken, void**, InfoAccessType>)&_constructStringLiteral;
-            callbacks[148] = (delegate* unmanaged<IntPtr, IntPtr*, void**, InfoAccessType>)&_emptyStringLiteral;
-            callbacks[149] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_FIELD_STRUCT_*, void**, uint>)&_getFieldThreadLocalStoreID;
-            callbacks[150] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_CLASS_STRUCT_*, CORINFO_METHOD_STRUCT_*, DelegateCtorArgs*, CORINFO_METHOD_STRUCT_*>)&_GetDelegateCtor;
-            callbacks[151] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, void>)&_MethodCompileComplete;
-            callbacks[152] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, CORINFO_SIG_INFO*, CORINFO_GET_TAILCALL_HELPERS_FLAGS, CORINFO_TAILCALL_HELPERS*, byte>)&_getTailCallHelpers;
-            callbacks[153] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, byte, byte>)&_convertPInvokeCalliToCall;
-            callbacks[154] = (delegate* unmanaged<IntPtr, IntPtr*, InstructionSet, byte, byte>)&_notifyInstructionSetUsage;
-            callbacks[155] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CONST_LOOKUP*, void>)&_updateEntryPointForTailCall;
-            callbacks[156] = (delegate* unmanaged<IntPtr, IntPtr*, AllocMemArgs*, void>)&_allocMem;
-            callbacks[157] = (delegate* unmanaged<IntPtr, IntPtr*, byte, byte, uint, void>)&_reserveUnwindInfo;
-            callbacks[158] = (delegate* unmanaged<IntPtr, IntPtr*, byte*, byte*, uint, uint, uint, byte*, CorJitFuncKind, void>)&_allocUnwindInfo;
-            callbacks[159] = (delegate* unmanaged<IntPtr, IntPtr*, UIntPtr, void*>)&_allocGCInfo;
-            callbacks[160] = (delegate* unmanaged<IntPtr, IntPtr*, uint, void>)&_setEHcount;
-            callbacks[161] = (delegate* unmanaged<IntPtr, IntPtr*, uint, CORINFO_EH_CLAUSE*, void>)&_setEHinfo;
-            callbacks[162] = (delegate* unmanaged<IntPtr, IntPtr*, uint, byte*, IntPtr, byte>)&_logMsg;
-            callbacks[163] = (delegate* unmanaged<IntPtr, IntPtr*, byte*, int, byte*, int>)&_doAssert;
-            callbacks[164] = (delegate* unmanaged<IntPtr, IntPtr*, CorJitResult, void>)&_reportFatalError;
-            callbacks[165] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, PgoInstrumentationSchema**, uint*, byte**, PgoSource*, HRESULT>)&_getPgoInstrumentationResults;
-            callbacks[166] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, PgoInstrumentationSchema*, uint, byte**, HRESULT>)&_allocPgoInstrumentationBySchema;
-            callbacks[167] = (delegate* unmanaged<IntPtr, IntPtr*, uint, CORINFO_SIG_INFO*, CORINFO_METHOD_STRUCT_*, void>)&_recordCallSite;
-            callbacks[168] = (delegate* unmanaged<IntPtr, IntPtr*, void*, void*, void*, ushort, int, void>)&_recordRelocation;
-            callbacks[169] = (delegate* unmanaged<IntPtr, IntPtr*, void*, ushort>)&_getRelocTypeHint;
-            callbacks[170] = (delegate* unmanaged<IntPtr, IntPtr*, uint>)&_getExpectedTargetArchitecture;
-            callbacks[171] = (delegate* unmanaged<IntPtr, IntPtr*, CORJIT_FLAGS*, uint, uint>)&_getJitFlags;
+            callbacks[104] = (delegate* unmanaged<IntPtr, IntPtr*, byte*, void*, UIntPtr, void>)&_reportMetadata;
+            callbacks[105] = (delegate* unmanaged<IntPtr, IntPtr*, UIntPtr, void*>)&_allocateArray;
+            callbacks[106] = (delegate* unmanaged<IntPtr, IntPtr*, void*, void>)&_freeArray;
+            callbacks[107] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_ARG_LIST_STRUCT_*, CORINFO_ARG_LIST_STRUCT_*>)&_getArgNext;
+            callbacks[108] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_SIG_INFO*, CORINFO_ARG_LIST_STRUCT_*, CORINFO_CLASS_STRUCT_**, CorInfoTypeWithMod>)&_getArgType;
+            callbacks[109] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, int, CORINFO_CLASS_STRUCT_**, int>)&_getExactClasses;
+            callbacks[110] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_SIG_INFO*, CORINFO_ARG_LIST_STRUCT_*, CORINFO_CLASS_STRUCT_*>)&_getArgClass;
+            callbacks[111] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, CorInfoHFAElemType>)&_getHFAType;
+            callbacks[112] = (delegate* unmanaged<IntPtr, IntPtr*, void*, void*, byte>)&_runWithErrorTrap;
+            callbacks[113] = (delegate* unmanaged<IntPtr, IntPtr*, void*, void*, byte>)&_runWithSPMIErrorTrap;
+            callbacks[114] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_EE_INFO*, void>)&_getEEInfo;
+            callbacks[115] = (delegate* unmanaged<IntPtr, IntPtr*, char*>)&_getJitTimeLogFilename;
+            callbacks[116] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, mdToken>)&_getMethodDefFromMethod;
+            callbacks[117] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, byte*, UIntPtr, UIntPtr*, UIntPtr>)&_printMethodName;
+            callbacks[118] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, byte**, byte**, byte**, byte*>)&_getMethodNameFromMetadata;
+            callbacks[119] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, uint>)&_getMethodHash;
+            callbacks[120] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR*, byte>)&_getSystemVAmd64PassStructInRegisterDescriptor;
+            callbacks[121] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, CORINFO_SWIFT_LOWERING*, void>)&_getSwiftLowering;
+            callbacks[122] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, uint>)&_getLoongArch64PassStructInRegisterFlags;
+            callbacks[123] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, uint>)&_getRISCV64PassStructInRegisterFlags;
+            callbacks[124] = (delegate* unmanaged<IntPtr, IntPtr*, void**, uint>)&_getThreadTLSIndex;
+            callbacks[125] = (delegate* unmanaged<IntPtr, IntPtr*, void**, int*>)&_getAddrOfCaptureThreadGlobal;
+            callbacks[126] = (delegate* unmanaged<IntPtr, IntPtr*, CorInfoHelpFunc, void**, void*>)&_getHelperFtn;
+            callbacks[127] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_CONST_LOOKUP*, CORINFO_ACCESS_FLAGS, void>)&_getFunctionEntryPoint;
+            callbacks[128] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, byte, CORINFO_CONST_LOOKUP*, void>)&_getFunctionFixedEntryPoint;
+            callbacks[129] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, void**, void*>)&_getMethodSync;
+            callbacks[130] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_MODULE_STRUCT_*, CorInfoHelpFunc>)&_getLazyStringLiteralHelper;
+            callbacks[131] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_MODULE_STRUCT_*, void**, CORINFO_MODULE_STRUCT_*>)&_embedModuleHandle;
+            callbacks[132] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, void**, CORINFO_CLASS_STRUCT_*>)&_embedClassHandle;
+            callbacks[133] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, void**, CORINFO_METHOD_STRUCT_*>)&_embedMethodHandle;
+            callbacks[134] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_FIELD_STRUCT_*, void**, CORINFO_FIELD_STRUCT_*>)&_embedFieldHandle;
+            callbacks[135] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, byte, CORINFO_METHOD_STRUCT_*, CORINFO_GENERICHANDLE_RESULT*, void>)&_embedGenericHandle;
+            callbacks[136] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_LOOKUP_KIND*, void>)&_getLocationOfThisType;
+            callbacks[137] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_CONST_LOOKUP*, void>)&_getAddressOfPInvokeTarget;
+            callbacks[138] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_SIG_INFO*, void**, void*>)&_GetCookieForPInvokeCalliSig;
+            callbacks[139] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_SIG_INFO*, byte>)&_canGetCookieForPInvokeCalliSig;
+            callbacks[140] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_JUST_MY_CODE_HANDLE_**, CORINFO_JUST_MY_CODE_HANDLE_*>)&_getJustMyCodeHandle;
+            callbacks[141] = (delegate* unmanaged<IntPtr, IntPtr*, bool*, void**, bool*, void>)&_GetProfilingHandle;
+            callbacks[142] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, CORINFO_RESOLVED_TOKEN*, CORINFO_METHOD_STRUCT_*, CORINFO_CALLINFO_FLAGS, CORINFO_CALL_INFO*, void>)&_getCallInfo;
+            callbacks[143] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CLASS_STRUCT_*, void**, uint>)&_getClassDomainID;
+            callbacks[144] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_FIELD_STRUCT_*, byte*, int, int, byte, byte>)&_getStaticFieldContent;
+            callbacks[145] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_OBJECT_STRUCT_*, byte*, int, int, byte>)&_getObjectContent;
+            callbacks[146] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_FIELD_STRUCT_*, byte*, CORINFO_CLASS_STRUCT_*>)&_getStaticFieldCurrentClass;
+            callbacks[147] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_SIG_INFO*, void**, IntPtr>)&_getVarArgsHandle;
+            callbacks[148] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_SIG_INFO*, byte>)&_canGetVarArgsHandle;
+            callbacks[149] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_MODULE_STRUCT_*, mdToken, void**, InfoAccessType>)&_constructStringLiteral;
+            callbacks[150] = (delegate* unmanaged<IntPtr, IntPtr*, void**, InfoAccessType>)&_emptyStringLiteral;
+            callbacks[151] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_FIELD_STRUCT_*, void**, uint>)&_getFieldThreadLocalStoreID;
+            callbacks[152] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_CLASS_STRUCT_*, CORINFO_METHOD_STRUCT_*, DelegateCtorArgs*, CORINFO_METHOD_STRUCT_*>)&_GetDelegateCtor;
+            callbacks[153] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, void>)&_MethodCompileComplete;
+            callbacks[154] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, CORINFO_SIG_INFO*, CORINFO_GET_TAILCALL_HELPERS_FLAGS, CORINFO_TAILCALL_HELPERS*, byte>)&_getTailCallHelpers;
+            callbacks[155] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_RESOLVED_TOKEN*, byte, byte>)&_convertPInvokeCalliToCall;
+            callbacks[156] = (delegate* unmanaged<IntPtr, IntPtr*, InstructionSet, byte, byte>)&_notifyInstructionSetUsage;
+            callbacks[157] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_CONST_LOOKUP*, void>)&_updateEntryPointForTailCall;
+            callbacks[158] = (delegate* unmanaged<IntPtr, IntPtr*, AllocMemArgs*, void>)&_allocMem;
+            callbacks[159] = (delegate* unmanaged<IntPtr, IntPtr*, byte, byte, uint, void>)&_reserveUnwindInfo;
+            callbacks[160] = (delegate* unmanaged<IntPtr, IntPtr*, byte*, byte*, uint, uint, uint, byte*, CorJitFuncKind, void>)&_allocUnwindInfo;
+            callbacks[161] = (delegate* unmanaged<IntPtr, IntPtr*, UIntPtr, void*>)&_allocGCInfo;
+            callbacks[162] = (delegate* unmanaged<IntPtr, IntPtr*, uint, void>)&_setEHcount;
+            callbacks[163] = (delegate* unmanaged<IntPtr, IntPtr*, uint, CORINFO_EH_CLAUSE*, void>)&_setEHinfo;
+            callbacks[164] = (delegate* unmanaged<IntPtr, IntPtr*, uint, byte*, IntPtr, byte>)&_logMsg;
+            callbacks[165] = (delegate* unmanaged<IntPtr, IntPtr*, byte*, int, byte*, int>)&_doAssert;
+            callbacks[166] = (delegate* unmanaged<IntPtr, IntPtr*, CorJitResult, void>)&_reportFatalError;
+            callbacks[167] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, PgoInstrumentationSchema**, uint*, byte**, PgoSource*, HRESULT>)&_getPgoInstrumentationResults;
+            callbacks[168] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, PgoInstrumentationSchema*, uint, byte**, HRESULT>)&_allocPgoInstrumentationBySchema;
+            callbacks[169] = (delegate* unmanaged<IntPtr, IntPtr*, uint, CORINFO_SIG_INFO*, CORINFO_METHOD_STRUCT_*, void>)&_recordCallSite;
+            callbacks[170] = (delegate* unmanaged<IntPtr, IntPtr*, void*, void*, void*, ushort, int, void>)&_recordRelocation;
+            callbacks[171] = (delegate* unmanaged<IntPtr, IntPtr*, void*, ushort>)&_getRelocTypeHint;
+            callbacks[172] = (delegate* unmanaged<IntPtr, IntPtr*, uint>)&_getExpectedTargetArchitecture;
+            callbacks[173] = (delegate* unmanaged<IntPtr, IntPtr*, CORJIT_FLAGS*, uint, uint>)&_getJitFlags;
 
             return (IntPtr)callbacks;
         }
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs
index a5398c85ca07..270d4834a9c2 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs
@@ -73,6 +73,9 @@ public enum InstructionSet
         X64_AVX512DQ_VL = InstructionSet_X64.AVX512DQ_VL,
         X64_AVX512VBMI = InstructionSet_X64.AVX512VBMI,
         X64_AVX512VBMI_VL = InstructionSet_X64.AVX512VBMI_VL,
+        X64_AVX10v1 = InstructionSet_X64.AVX10v1,
+        X64_AVX10v1_V256 = InstructionSet_X64.AVX10v1_V256,
+        X64_AVX10v1_V512 = InstructionSet_X64.AVX10v1_V512,
         X64_VectorT128 = InstructionSet_X64.VectorT128,
         X64_VectorT256 = InstructionSet_X64.VectorT256,
         X64_VectorT512 = InstructionSet_X64.VectorT512,
@@ -105,6 +108,9 @@ public enum InstructionSet
         X64_AVX512DQ_VL_X64 = InstructionSet_X64.AVX512DQ_VL_X64,
         X64_AVX512VBMI_X64 = InstructionSet_X64.AVX512VBMI_X64,
         X64_AVX512VBMI_VL_X64 = InstructionSet_X64.AVX512VBMI_VL_X64,
+        X64_AVX10v1_X64 = InstructionSet_X64.AVX10v1_X64,
+        X64_AVX10v1_V256_X64 = InstructionSet_X64.AVX10v1_V256_X64,
+        X64_AVX10v1_V512_X64 = InstructionSet_X64.AVX10v1_V512_X64,
         X86_X86Base = InstructionSet_X86.X86Base,
         X86_SSE = InstructionSet_X86.SSE,
         X86_SSE2 = InstructionSet_X86.SSE2,
@@ -137,6 +143,9 @@ public enum InstructionSet
         X86_AVX512DQ_VL = InstructionSet_X86.AVX512DQ_VL,
         X86_AVX512VBMI = InstructionSet_X86.AVX512VBMI,
         X86_AVX512VBMI_VL = InstructionSet_X86.AVX512VBMI_VL,
+        X86_AVX10v1 = InstructionSet_X86.AVX10v1,
+        X86_AVX10v1_V256 = InstructionSet_X86.AVX10v1_V256,
+        X86_AVX10v1_V512 = InstructionSet_X86.AVX10v1_V512,
         X86_VectorT128 = InstructionSet_X86.VectorT128,
         X86_VectorT256 = InstructionSet_X86.VectorT256,
         X86_VectorT512 = InstructionSet_X86.VectorT512,
@@ -169,6 +178,9 @@ public enum InstructionSet
         X86_AVX512DQ_VL_X64 = InstructionSet_X86.AVX512DQ_VL_X64,
         X86_AVX512VBMI_X64 = InstructionSet_X86.AVX512VBMI_X64,
         X86_AVX512VBMI_VL_X64 = InstructionSet_X86.AVX512VBMI_VL_X64,
+        X86_AVX10v1_X64 = InstructionSet_X86.AVX10v1_X64,
+        X86_AVX10v1_V256_X64 = InstructionSet_X86.AVX10v1_V256_X64,
+        X86_AVX10v1_V512_X64 = InstructionSet_X86.AVX10v1_V512_X64,
     }
     public enum InstructionSet_ARM64
     {
@@ -237,38 +249,44 @@ public enum InstructionSet_X64
         AVX512DQ_VL = 30,
         AVX512VBMI = 31,
         AVX512VBMI_VL = 32,
-        VectorT128 = 33,
-        VectorT256 = 34,
-        VectorT512 = 35,
-        X86Base_X64 = 36,
-        SSE_X64 = 37,
-        SSE2_X64 = 38,
-        SSE3_X64 = 39,
-        SSSE3_X64 = 40,
-        SSE41_X64 = 41,
-        SSE42_X64 = 42,
-        AVX_X64 = 43,
-        AVX2_X64 = 44,
-        AES_X64 = 45,
-        BMI1_X64 = 46,
-        BMI2_X64 = 47,
-        FMA_X64 = 48,
-        LZCNT_X64 = 49,
-        PCLMULQDQ_X64 = 50,
-        POPCNT_X64 = 51,
-        AVXVNNI_X64 = 52,
-        MOVBE_X64 = 53,
-        X86Serialize_X64 = 54,
-        AVX512F_X64 = 55,
-        AVX512F_VL_X64 = 56,
-        AVX512BW_X64 = 57,
-        AVX512BW_VL_X64 = 58,
-        AVX512CD_X64 = 59,
-        AVX512CD_VL_X64 = 60,
-        AVX512DQ_X64 = 61,
-        AVX512DQ_VL_X64 = 62,
-        AVX512VBMI_X64 = 63,
-        AVX512VBMI_VL_X64 = 64,
+        AVX10v1 = 33,
+        AVX10v1_V256 = 34,
+        AVX10v1_V512 = 35,
+        VectorT128 = 36,
+        VectorT256 = 37,
+        VectorT512 = 38,
+        X86Base_X64 = 39,
+        SSE_X64 = 40,
+        SSE2_X64 = 41,
+        SSE3_X64 = 42,
+        SSSE3_X64 = 43,
+        SSE41_X64 = 44,
+        SSE42_X64 = 45,
+        AVX_X64 = 46,
+        AVX2_X64 = 47,
+        AES_X64 = 48,
+        BMI1_X64 = 49,
+        BMI2_X64 = 50,
+        FMA_X64 = 51,
+        LZCNT_X64 = 52,
+        PCLMULQDQ_X64 = 53,
+        POPCNT_X64 = 54,
+        AVXVNNI_X64 = 55,
+        MOVBE_X64 = 56,
+        X86Serialize_X64 = 57,
+        AVX512F_X64 = 58,
+        AVX512F_VL_X64 = 59,
+        AVX512BW_X64 = 60,
+        AVX512BW_VL_X64 = 61,
+        AVX512CD_X64 = 62,
+        AVX512CD_VL_X64 = 63,
+        AVX512DQ_X64 = 64,
+        AVX512DQ_VL_X64 = 65,
+        AVX512VBMI_X64 = 66,
+        AVX512VBMI_VL_X64 = 67,
+        AVX10v1_X64 = 68,
+        AVX10v1_V256_X64 = 69,
+        AVX10v1_V512_X64 = 70,
     }
 
     public enum InstructionSet_X86
@@ -307,38 +325,44 @@ public enum InstructionSet_X86
         AVX512DQ_VL = 30,
         AVX512VBMI = 31,
         AVX512VBMI_VL = 32,
-        VectorT128 = 33,
-        VectorT256 = 34,
-        VectorT512 = 35,
-        X86Base_X64 = 36,
-        SSE_X64 = 37,
-        SSE2_X64 = 38,
-        SSE3_X64 = 39,
-        SSSE3_X64 = 40,
-        SSE41_X64 = 41,
-        SSE42_X64 = 42,
-        AVX_X64 = 43,
-        AVX2_X64 = 44,
-        AES_X64 = 45,
-        BMI1_X64 = 46,
-        BMI2_X64 = 47,
-        FMA_X64 = 48,
-        LZCNT_X64 = 49,
-        PCLMULQDQ_X64 = 50,
-        POPCNT_X64 = 51,
-        AVXVNNI_X64 = 52,
-        MOVBE_X64 = 53,
-        X86Serialize_X64 = 54,
-        AVX512F_X64 = 55,
-        AVX512F_VL_X64 = 56,
-        AVX512BW_X64 = 57,
-        AVX512BW_VL_X64 = 58,
-        AVX512CD_X64 = 59,
-        AVX512CD_VL_X64 = 60,
-        AVX512DQ_X64 = 61,
-        AVX512DQ_VL_X64 = 62,
-        AVX512VBMI_X64 = 63,
-        AVX512VBMI_VL_X64 = 64,
+        AVX10v1 = 33,
+        AVX10v1_V256 = 34,
+        AVX10v1_V512 = 35,
+        VectorT128 = 36,
+        VectorT256 = 37,
+        VectorT512 = 38,
+        X86Base_X64 = 39,
+        SSE_X64 = 40,
+        SSE2_X64 = 41,
+        SSE3_X64 = 42,
+        SSSE3_X64 = 43,
+        SSE41_X64 = 44,
+        SSE42_X64 = 45,
+        AVX_X64 = 46,
+        AVX2_X64 = 47,
+        AES_X64 = 48,
+        BMI1_X64 = 49,
+        BMI2_X64 = 50,
+        FMA_X64 = 51,
+        LZCNT_X64 = 52,
+        PCLMULQDQ_X64 = 53,
+        POPCNT_X64 = 54,
+        AVXVNNI_X64 = 55,
+        MOVBE_X64 = 56,
+        X86Serialize_X64 = 57,
+        AVX512F_X64 = 58,
+        AVX512F_VL_X64 = 59,
+        AVX512BW_X64 = 60,
+        AVX512BW_VL_X64 = 61,
+        AVX512CD_X64 = 62,
+        AVX512CD_VL_X64 = 63,
+        AVX512DQ_X64 = 64,
+        AVX512DQ_VL_X64 = 65,
+        AVX512VBMI_X64 = 66,
+        AVX512VBMI_VL_X64 = 67,
+        AVX10v1_X64 = 68,
+        AVX10v1_V256_X64 = 69,
+        AVX10v1_V512_X64 = 70,
     }
 
     public unsafe struct InstructionSetFlags : IEnumerable<InstructionSet>
@@ -674,6 +698,18 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V256_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V256_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE))
                         resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2))
@@ -746,6 +782,34 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1))
+                        resultflags.AddInstructionSet(InstructionSet.X64_FMA);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT128))
                         resultflags.AddInstructionSet(InstructionSet.X64_SSE2);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT256))
@@ -833,6 +897,34 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1))
+                        resultflags.AddInstructionSet(InstructionSet.X86_FMA);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT128))
                         resultflags.AddInstructionSet(InstructionSet.X86_SSE2);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT256))
@@ -969,6 +1061,12 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V256_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
                         resultflags.AddInstructionSet(InstructionSet.X64_SSE);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE))
@@ -1041,6 +1139,34 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_FMA))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2))
                         resultflags.AddInstructionSet(InstructionSet.X64_VectorT128);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
@@ -1128,6 +1254,34 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_FMA))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V256))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2))
                         resultflags.AddInstructionSet(InstructionSet.X86_VectorT128);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
@@ -1248,6 +1402,9 @@ public static IEnumerable<InstructionSetInfo> ArchitectureToValidInstructionSets
                     yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X64_AVX512DQ_VL, true);
                     yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X64_AVX512VBMI, true);
                     yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X64_AVX512VBMI_VL, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X64_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v1_v256", "Avx10v1_V256", InstructionSet.X64_AVX10v1_V256, true);
+                    yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X64_AVX10v1_V512, true);
                     yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X64_VectorT128, true);
                     yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X64_VectorT256, true);
                     yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X64_VectorT512, true);
@@ -1286,6 +1443,9 @@ public static IEnumerable<InstructionSetInfo> ArchitectureToValidInstructionSets
                     yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X86_AVX512DQ_VL, true);
                     yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X86_AVX512VBMI, true);
                     yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X86_AVX512VBMI_VL, true);
+                    yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X86_AVX10v1, true);
+                    yield return new InstructionSetInfo("avx10v1_v256", "Avx10v1_V256", InstructionSet.X86_AVX10v1_V256, true);
+                    yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X86_AVX10v1_V512, true);
                     yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X86_VectorT128, true);
                     yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X86_VectorT256, true);
                     yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X86_VectorT512, true);
@@ -1378,6 +1538,12 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture)
                         AddInstructionSet(InstructionSet.X64_AVX512VBMI_X64);
                     if (HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL))
                         AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL_X64);
+                    if (HasInstructionSet(InstructionSet.X64_AVX10v1))
+                        AddInstructionSet(InstructionSet.X64_AVX10v1_X64);
+                    if (HasInstructionSet(InstructionSet.X64_AVX10v1_V256))
+                        AddInstructionSet(InstructionSet.X64_AVX10v1_V256_X64);
+                    if (HasInstructionSet(InstructionSet.X64_AVX10v1_V512))
+                        AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64);
                     break;
 
                 case TargetArchitecture.X86:
@@ -1432,6 +1598,9 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc
                     AddInstructionSet(InstructionSet.X64_AVX512DQ_VL_X64);
                     AddInstructionSet(InstructionSet.X64_AVX512VBMI_X64);
                     AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL_X64);
+                    AddInstructionSet(InstructionSet.X64_AVX10v1_X64);
+                    AddInstructionSet(InstructionSet.X64_AVX10v1_V256_X64);
+                    AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64);
                     break;
 
                 case TargetArchitecture.X86:
@@ -1464,6 +1633,9 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc
                     AddInstructionSet(InstructionSet.X86_AVX512DQ_VL_X64);
                     AddInstructionSet(InstructionSet.X86_AVX512VBMI_X64);
                     AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL_X64);
+                    AddInstructionSet(InstructionSet.X86_AVX10v1_X64);
+                    AddInstructionSet(InstructionSet.X86_AVX10v1_V256_X64);
+                    AddInstructionSet(InstructionSet.X86_AVX10v1_V512_X64);
                     break;
             }
         }
@@ -1741,6 +1913,24 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                         else
                         { return InstructionSet.X64_AVX512VBMI; }
 
+                    case "Avx10v1":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_AVX10v1_X64; }
+                        else
+                        { return InstructionSet.X64_AVX10v1; }
+
+                    case "Avx10v1_V256":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_AVX10v1_V256_X64; }
+                        else
+                        { return InstructionSet.X64_AVX10v1_V256; }
+
+                    case "Avx10v1_V512":
+                        if (nestedTypeName == "X64")
+                        { return InstructionSet.X64_AVX10v1_V512_X64; }
+                        else
+                        { return InstructionSet.X64_AVX10v1_V512; }
+
                     case "VectorT128":
                         { return InstructionSet.X64_VectorT128; }
 
@@ -1844,6 +2034,15 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                         else
                         { return InstructionSet.X86_AVX512VBMI; }
 
+                    case "Avx10v1":
+                        { return InstructionSet.X86_AVX10v1; }
+
+                    case "Avx10v1_V256":
+                        { return InstructionSet.X86_AVX10v1_V256; }
+
+                    case "Avx10v1_V512":
+                        { return InstructionSet.X86_AVX10v1_V512; }
+
                     case "VectorT128":
                         { return InstructionSet.X86_VectorT128; }
 
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs
index 07ab857727f6..2276c5cdc9a7 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs
@@ -3,7 +3,10 @@
 
 using System;
 using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using System.Text;
 using Internal.Pgo;
 
 namespace Internal.JitInterface
@@ -1493,4 +1496,58 @@ public unsafe struct CORINFO_TYPE_LAYOUT_NODE
         private byte _hasSignificantPadding;
         public bool hasSignificantPadding { get => _hasSignificantPadding != 0; set => _hasSignificantPadding = value ? (byte)1 : (byte)0; }
     }
+
+    public struct CORINFO_SWIFT_LOWERING
+    {
+        private byte _byReference;
+        public bool byReference { get => _byReference != 0; set => _byReference = value ? (byte)1 : (byte)0; }
+
+        [InlineArray(4)]
+        private struct SwiftLoweredTypes
+        {
+            public CorInfoType type;
+        }
+
+        [InlineArray(4)]
+        private struct LoweredOffsets
+        {
+            public uint offset;
+        }
+
+        private SwiftLoweredTypes _loweredElements;
+
+        [UnscopedRef]
+        public Span<CorInfoType> LoweredElements => _loweredElements;
+
+        private LoweredOffsets _offsets;
+
+        [UnscopedRef]
+        public Span<uint> Offsets => _offsets;
+
+        public nint numLoweredElements;
+
+        // Override for a better unit test experience
+        public override string ToString()
+        {
+            if (byReference)
+            {
+                return "byReference";
+            }
+
+            var stringBuilder = new StringBuilder();
+            stringBuilder.Append('{');
+            for (int i = 0; i < numLoweredElements; i++)
+            {
+                if (i != 0)
+                {
+                    stringBuilder.Append(", ");
+                }
+                stringBuilder.Append(LoweredElements[i]);
+                stringBuilder.Append(": ");
+                stringBuilder.Append(Offsets[i]);
+            }
+            stringBuilder.Append('}');
+            return stringBuilder.ToString();
+        }
+    }
 }
diff --git a/src/coreclr/tools/Common/JitInterface/JitConfigProvider.cs b/src/coreclr/tools/Common/JitInterface/JitConfigProvider.cs
index 5acb83597cc0..fd20788ea811 100644
--- a/src/coreclr/tools/Common/JitInterface/JitConfigProvider.cs
+++ b/src/coreclr/tools/Common/JitInterface/JitConfigProvider.cs
@@ -130,8 +130,6 @@ public string GetStringConfigValue(string name)
 
         private static string GetTargetSpec(TargetDetails target)
         {
-            // Use the same Jir for browser and wasi
-            string targetOSComponent = (target.OperatingSystem == TargetOS.Windows ? "win" : "unix");
             string targetArchComponent = target.Architecture switch
             {
                 TargetArchitecture.X86 => "x86",
@@ -145,10 +143,15 @@ private static string GetTargetSpec(TargetDetails target)
                 _ => throw new NotImplementedException(target.Architecture.ToString())
             };
 
-            if (target.IsWasm || (target.Architecture == TargetArchitecture.ARM64) || (target.Architecture == TargetArchitecture.ARM))
+            string targetOSComponent;
+            if (target.Architecture is TargetArchitecture.ARM64 or TargetArchitecture.ARM || target.IsWasm)
             {
                 targetOSComponent = "universal";
             }
+            else
+            {
+                targetOSComponent = target.OperatingSystem == TargetOS.Windows ? "win" : "unix";
+            }
 
             return targetOSComponent + '_' + targetArchComponent + "_" + RuntimeInformation.ProcessArchitecture.ToString().ToLowerInvariant();
         }
diff --git a/src/coreclr/tools/Common/JitInterface/LoongArch64PassStructInRegister.cs b/src/coreclr/tools/Common/JitInterface/LoongArch64PassStructInRegister.cs
index 8592386dbb7c..b0c193d09500 100644
--- a/src/coreclr/tools/Common/JitInterface/LoongArch64PassStructInRegister.cs
+++ b/src/coreclr/tools/Common/JitInterface/LoongArch64PassStructInRegister.cs
@@ -55,11 +55,7 @@ public static uint GetLoongArch64PassStructInRegisterFlags(TypeDesc typeDesc)
             int fieldIndex = 0;
             foreach (FieldDesc field in typeDesc.GetFields())
             {
-                if (fieldIndex > 1)
-                {
-                    return (uint)StructFloatFieldInfoFlags.STRUCT_NO_FLOAT_FIELD;
-                }
-                else if (field.IsStatic)
+                if (field.IsStatic)
                 {
                     continue;
                 }
@@ -162,6 +158,11 @@ public static uint GetLoongArch64PassStructInRegisterFlags(TypeDesc typeDesc)
 
                     default:
                     {
+                        if ((numIntroducedFields == 2) && (field.FieldType.Category == TypeFlags.Class))
+                        {
+                            return (uint)StructFloatFieldInfoFlags.STRUCT_NO_FLOAT_FIELD;
+                        }
+
                         if (field.FieldType.GetElementSize().AsInt == 8)
                         {
                             if (numIntroducedFields > 1)
diff --git a/src/coreclr/tools/Common/JitInterface/SwiftPhysicalLowering.cs b/src/coreclr/tools/Common/JitInterface/SwiftPhysicalLowering.cs
new file mode 100644
index 000000000000..f5d8afc33d31
--- /dev/null
+++ b/src/coreclr/tools/Common/JitInterface/SwiftPhysicalLowering.cs
@@ -0,0 +1,247 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+using Internal.TypeSystem;
+
+namespace Internal.JitInterface
+{
+    public static class SwiftPhysicalLowering
+    {
+        private enum LoweredType
+        {
+            Empty,
+            Opaque,
+            Int64,
+            Float,
+            Double,
+        }
+
+        private sealed class LoweringVisitor(int pointerSize) : FieldLayoutIntervalCalculator<LoweredType>(pointerSize)
+        {
+            protected override LoweredType EmptyIntervalData => LoweredType.Empty;
+
+            protected override bool IntervalsHaveCompatibleTags(LoweredType existingTag, LoweredType nextTag)
+            {
+                // Adjacent ranges mapped to opaque or empty can be combined.
+                if (existingTag is LoweredType.Empty
+                    && nextTag is LoweredType.Empty)
+                {
+                    return true;
+                }
+
+                if (existingTag is LoweredType.Opaque
+                    && nextTag is LoweredType.Opaque)
+                {
+                    return true;
+                }
+
+                return false;
+            }
+
+            protected override FieldLayoutInterval CombineIntervals(FieldLayoutInterval firstInterval, FieldLayoutInterval nextInterval)
+            {
+                FieldLayoutInterval resultInterval = firstInterval;
+                resultInterval.EndSentinel = nextInterval.EndSentinel;
+                if (resultInterval.Tag != nextInterval.Tag)
+                {
+                    resultInterval.Tag = LoweredType.Opaque;
+                }
+                return resultInterval;
+            }
+
+            protected override LoweredType GetIntervalDataForType(int offset, TypeDesc fieldType)
+            {
+                // Comments here are from the Swift Calling Convention document:
+                // In all of these examples, the maximum voluntary integer size is 4
+                // (`i32`) unless otherwise specified.
+
+                // If any range is mapped as a non-empty, non-opaque type, but its start
+                // offset is not a multiple of its natural alignment, remap it as opaque.
+                // For these purposes, the natural alignment of an integer type is the
+                // minimum of its size and the maximum voluntary integer size; the
+                // natural alignment of any other type is its C ABI type.
+                //
+                // TODO: What about 8-byte integers aligned at 4-byte boundaries?
+                // Can this even be done in Swift?
+                if (fieldType is MetadataType mdType && offset % mdType.InstanceFieldAlignment.AsInt != 0)
+                {
+                    return LoweredType.Opaque;
+                }
+
+                if (fieldType.Category is TypeFlags.Single)
+                {
+                    return LoweredType.Float;
+                }
+
+                if (fieldType.Category is TypeFlags.Double)
+                {
+                    return LoweredType.Double;
+                }
+
+                if (fieldType.Category is TypeFlags.UInt64 or TypeFlags.Int64)
+                {
+                    return LoweredType.Int64;
+                }
+
+                Debug.Assert(PointerSize == 8, "Swift interop is only supported on 64-bit platforms.");
+
+                if (fieldType.Category is TypeFlags.IntPtr or TypeFlags.UIntPtr or TypeFlags.Pointer or TypeFlags.FunctionPointer)
+                {
+                    return LoweredType.Int64;
+                }
+
+                Debug.Assert(fieldType.IsPrimitiveNumeric);
+
+                // If any range is mapped as an integer type that is not larger than the
+                // maximum voluntary size, remap it as opaque. Combine adjacent opaque
+                // ranges.
+                return LoweredType.Opaque;
+            }
+
+            protected override bool NeedsRecursiveLayout(int offset, TypeDesc fieldType) => fieldType.IsValueType && !fieldType.IsPrimitiveNumeric;
+
+            private List<FieldLayoutInterval> CreateConsolidatedIntervals()
+            {
+                // First, let's make a list of exclusively non-empty intervals.
+                List<FieldLayoutInterval> consolidatedIntervals = new(Intervals.Count);
+                foreach (var interval in Intervals)
+                {
+                    if (interval.Tag != LoweredType.Empty)
+                    {
+                        consolidatedIntervals.Add(interval);
+                    }
+                }
+
+                // Now, we'll look for adjacent opaque intervals and combine them.
+                for (int i = 0; i < consolidatedIntervals.Count - 1; i++)
+                {
+                    // Only merge sequential opaque intervals that are within the same PointerSize-sized chunk.
+                    if (consolidatedIntervals[i].Tag == LoweredType.Opaque
+                        && consolidatedIntervals[i + 1].Tag == LoweredType.Opaque
+                        && (consolidatedIntervals[i].EndSentinel - 1) / PointerSize == consolidatedIntervals[i + 1].Start / PointerSize)
+                    {
+                        consolidatedIntervals[i] = CombineIntervals(consolidatedIntervals[i], consolidatedIntervals[i + 1]);
+                        consolidatedIntervals.RemoveAt(i + 1);
+                        i--;
+                    }
+                }
+
+                return consolidatedIntervals;
+            }
+
+            public List<(CorInfoType, int)> GetLoweredTypeSequence()
+            {
+                List<(CorInfoType, int)> loweredTypes = new();
+                foreach (var interval in CreateConsolidatedIntervals())
+                {
+                    // Empty intervals at this point don't need to be represented in the lowered type sequence.
+                    // We want to skip over them.
+                    if (interval.Tag == LoweredType.Empty)
+                        continue;
+
+                    if (interval.Tag == LoweredType.Float)
+                    {
+                        loweredTypes.Add((CorInfoType.CORINFO_TYPE_FLOAT, interval.Start));
+                    }
+
+                    if (interval.Tag == LoweredType.Double)
+                    {
+                        loweredTypes.Add((CorInfoType.CORINFO_TYPE_DOUBLE, interval.Start));
+                    }
+
+                    if (interval.Tag == LoweredType.Int64)
+                    {
+                        loweredTypes.Add((CorInfoType.CORINFO_TYPE_LONG, interval.Start));
+                    }
+
+                    if (interval.Tag is LoweredType.Opaque)
+                    {
+                        // We need to split the opaque ranges into integer parameters.
+                        // As part of this splitting, we must ensure that we don't introduce alignment padding.
+                        // This lowering algorithm should produce a lowered type sequence that would have the same padding for
+                        // a naturally-aligned struct with the lowered fields as the original type has.
+                        // This algorithm intends to split the opaque range into the least number of lowered elements that covers the entire range.
+                        // The lowered range is allowed to extend past the end of the opaque range (including past the end of the struct),
+                        // but not into the next non-empty interval.
+                        // However, due to the properties of the lowering (the only non-8 byte elements of the lowering are 4-byte floats),
+                        // we'll never encounter a scenario where we need would need to account for a correctly-aligned
+                        // opaque range of > 4 bytes that we must not pad to 8 bytes.
+
+
+                        // While we need to fill more than 4 bytes and the sequence is currently 8-byte aligned, we'll split into 8-byte integers.
+                        // While we need to fill more than 2 bytes but less than 4 and the sequence is 4-byte aligned, we'll use a 4-byte integer to represent the rest of the parameters.
+                        // While we need to fill more than 1 bytes and the sequence is 2-byte aligned, we'll use a 2-byte integer to represent the rest of the parameters.
+                        // While we need to fill at least 1 byte, we'll use a 1-byte integer to represent the rest of the parameters.
+                        int opaqueIntervalStart = interval.Start;
+                        int remainingIntervalSize = interval.Size;
+                        while (remainingIntervalSize > 0)
+                        {
+                            if (remainingIntervalSize > 4 && opaqueIntervalStart == opaqueIntervalStart.AlignUp(8))
+                            {
+                                loweredTypes.Add((CorInfoType.CORINFO_TYPE_LONG, opaqueIntervalStart));
+                                opaqueIntervalStart += 8;
+                                remainingIntervalSize -= 8;
+                            }
+                            else if (remainingIntervalSize > 2 && opaqueIntervalStart == opaqueIntervalStart.AlignUp(4))
+                            {
+                                loweredTypes.Add((CorInfoType.CORINFO_TYPE_INT, opaqueIntervalStart));
+                                opaqueIntervalStart += 4;
+                                remainingIntervalSize -= 4;
+                            }
+                            else if (remainingIntervalSize > 1 && opaqueIntervalStart == opaqueIntervalStart.AlignUp(2))
+                            {
+                                loweredTypes.Add((CorInfoType.CORINFO_TYPE_SHORT, opaqueIntervalStart));
+                                opaqueIntervalStart += 2;
+                                remainingIntervalSize -= 2;
+                            }
+                            else
+                            {
+                                loweredTypes.Add((CorInfoType.CORINFO_TYPE_BYTE, opaqueIntervalStart));
+                                opaqueIntervalStart++;
+                                remainingIntervalSize--;
+                            }
+                        }
+                    }
+                }
+                return loweredTypes;
+            }
+        }
+
+        public static CORINFO_SWIFT_LOWERING LowerTypeForSwiftSignature(TypeDesc type)
+        {
+            if (!type.IsValueType || type is DefType { ContainsGCPointers: true })
+            {
+                Debug.Fail("Non-unmanaged types should not be passed directly to a Swift function.");
+                return new() { byReference = true };
+            }
+
+            LoweringVisitor visitor = new(type.Context.Target.PointerSize);
+            visitor.AddFields(type, addTrailingEmptyInterval: false);
+
+            List<(CorInfoType type, int offset)> loweredTypes = visitor.GetLoweredTypeSequence();
+
+            // If a type has a primitive sequence with more than 4 elements, Swift passes it by reference.
+            if (loweredTypes.Count > 4)
+            {
+                return new() { byReference = true };
+            }
+
+            CORINFO_SWIFT_LOWERING lowering = new()
+            {
+                byReference = false,
+                numLoweredElements = loweredTypes.Count
+            };
+
+            for (int i = 0; i < loweredTypes.Count; i++)
+            {
+                lowering.LoweredElements[i] = loweredTypes[i].type;
+                lowering.Offsets[i] = (uint)loweredTypes[i].offset;
+            }
+
+            return lowering;
+        }
+    }
+}
diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt
index 7b6e15b87bd8..5f1953f71e91 100644
--- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt
+++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt
@@ -57,6 +57,9 @@ instructionset     ,X86   ,Avx512DQ             ,        ,35 ,AVX512DQ
 instructionset     ,X86   ,Avx512DQ_VL          ,        ,36 ,AVX512DQ_VL           ,avx512dq_vl
 instructionset     ,X86   ,Avx512Vbmi           ,        ,37 ,AVX512VBMI            ,avx512vbmi
 instructionset     ,X86   ,Avx512Vbmi_VL        ,        ,38 ,AVX512VBMI_VL         ,avx512vbmi_vl
+instructionset     ,X86   ,Avx10v1              ,        ,44 ,AVX10v1               ,avx10v1
+instructionset     ,X86   ,Avx10v1_V256         ,        ,45 ,AVX10v1_V256          ,avx10v1_v256
+instructionset     ,X86   ,Avx10v1_V512         ,        ,46 ,AVX10v1_V512          ,avx10v1_v512
 instructionset     ,X86   ,VectorT128           ,        ,39 ,VectorT128            ,vectort128
 instructionset     ,X86   ,VectorT256           ,        ,40 ,VectorT256            ,vectort256
 instructionset     ,X86   ,VectorT512           ,        ,41 ,VectorT512            ,vectort512
@@ -90,6 +93,9 @@ instructionset64bit,X86   ,AVX512DQ
 instructionset64bit,X86   ,AVX512DQ_VL
 instructionset64bit,X86   ,AVX512VBMI
 instructionset64bit,X86   ,AVX512VBMI_VL
+instructionset64bit,X86   ,AVX10v1
+instructionset64bit,X86   ,AVX10v1_V256
+instructionset64bit,X86   ,AVX10v1_V512
 
 vectorinstructionset,X86  ,Vector128
 vectorinstructionset,X86  ,Vector256
@@ -131,6 +137,20 @@ implication        ,X86   ,AVX512DQ_VL          ,AVX512F_VL
 implication        ,X86   ,AVX512VBMI           ,AVX512BW
 implication        ,X86   ,AVX512VBMI_VL        ,AVX512VBMI
 implication        ,X86   ,AVX512VBMI_VL        ,AVX512BW_VL
+implication        ,X86   ,AVX10v1              ,AVX2
+implication        ,X86   ,AVX10v1              ,FMA
+implication        ,X86   ,AVX10v1_V256         ,AVX10v1
+implication        ,X86   ,AVX10v1_V512         ,AVX10v1_V256
+implication        ,X86   ,AVX10v1_V512         ,AVX512F
+implication        ,X86   ,AVX10v1_V512         ,AVX512F_VL
+implication        ,X86   ,AVX10v1_V512         ,AVX512CD
+implication        ,X86   ,AVX10v1_V512         ,AVX512CD_VL
+implication        ,X86   ,AVX10v1_V512         ,AVX512BW
+implication        ,X86   ,AVX10v1_V512         ,AVX512BW_VL
+implication        ,X86   ,AVX10v1_V512         ,AVX512DQ
+implication        ,X86   ,AVX10v1_V512         ,AVX512DQ_VL
+implication        ,X86   ,AVX10v1_V512         ,AVX512VBMI
+implication        ,X86   ,AVX10v1_V512         ,AVX512VBMI_VL
 implication        ,X86   ,VectorT128           ,SSE2
 implication        ,X86   ,VectorT256           ,AVX2
 implication        ,X86   ,VectorT512           ,AVX512F
diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt
index 3407e4e3a87c..8d6ecabaa77c 100644
--- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt
+++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt
@@ -83,6 +83,7 @@ CORINFO_RESOLVED_TOKEN*,ref CORINFO_RESOLVED_TOKEN
 CORINFO_RESOLVED_TOKEN_PTR,CORINFO_RESOLVED_TOKEN*,CORINFO_RESOLVED_TOKEN*,CORINFO_RESOLVED_TOKEN*
 CORINFO_EE_INFO*,ref CORINFO_EE_INFO
 CORINFO_TAILCALL_HELPERS*,ref CORINFO_TAILCALL_HELPERS
+CORINFO_SWIFT_LOWERING*,ref CORINFO_SWIFT_LOWERING
 CORINFO_GENERICHANDLE_RESULT*,ref CORINFO_GENERICHANDLE_RESULT
 CORINFO_METHOD_INFO*,CORINFO_METHOD_INFO*
 CORINFO_FIELD_INFO*,CORINFO_FIELD_INFO*
@@ -181,7 +182,7 @@ FUNCTIONS
     CORINFO_METHOD_HANDLE getUnboxedEntry(CORINFO_METHOD_HANDLE ftn, bool* requiresInstMethodTableArg);
     CORINFO_CLASS_HANDLE getDefaultComparerClass(CORINFO_CLASS_HANDLE elemType);
     CORINFO_CLASS_HANDLE getDefaultEqualityComparerClass(CORINFO_CLASS_HANDLE elemType);
-    void expandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN *        pResolvedToken, CORINFO_GENERICHANDLE_RESULT *  pResult);
+    void expandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN * pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, CORINFO_GENERICHANDLE_RESULT * pResult);
     bool isIntrinsicType(           CORINFO_CLASS_HANDLE        classHnd );
     CorInfoCallConvExtension getUnmanagedCallConv(            CORINFO_METHOD_HANDLE       method,            CORINFO_SIG_INFO*           callSiteSig, bool* pSuppressGCTransition);
     bool pInvokeMarshalingRequired(        CORINFO_METHOD_HANDLE       method,        CORINFO_SIG_INFO*           callSiteSig        );
@@ -231,8 +232,8 @@ FUNCTIONS
     bool isObjectImmutable(CORINFO_OBJECT_HANDLE objPtr)
     bool getStringChar(CORINFO_OBJECT_HANDLE strObj, int index, uint16_t* value);
     CORINFO_CLASS_HANDLE getObjectType(CORINFO_OBJECT_HANDLE objPtr)
-    bool getReadyToRunHelper(CORINFO_RESOLVED_TOKEN * pResolvedToken, CORINFO_LOOKUP_KIND * pGenericLookupKind, CorInfoHelpFunc id, CORINFO_CONST_LOOKUP *pLookup)
-    void getReadyToRunDelegateCtorHelper(CORINFO_RESOLVED_TOKEN * pTargetMethod, mdToken targetConstraint, CORINFO_CLASS_HANDLE delegateType, CORINFO_LOOKUP *pLookup)
+    bool getReadyToRunHelper(CORINFO_RESOLVED_TOKEN * pResolvedToken, CORINFO_LOOKUP_KIND * pGenericLookupKind, CorInfoHelpFunc id, CORINFO_METHOD_HANDLE callerHandle, CORINFO_CONST_LOOKUP *pLookup)
+    void getReadyToRunDelegateCtorHelper(CORINFO_RESOLVED_TOKEN * pTargetMethod, mdToken targetConstraint, CORINFO_CLASS_HANDLE delegateType, CORINFO_METHOD_HANDLE callerHandle, CORINFO_LOOKUP *pLookup)
     CorInfoInitClassResult initClass(CORINFO_FIELD_HANDLE field, CORINFO_METHOD_HANDLE method, CORINFO_CONTEXT_HANDLE context)
     void classMustBeLoadedBeforeCodeIsRun(CORINFO_CLASS_HANDLE cls)
     CORINFO_CLASS_HANDLE getBuiltinClass(CorInfoClassId classId)
@@ -266,6 +267,7 @@ FUNCTIONS
     void getVars(CORINFO_METHOD_HANDLE ftn, uint32_t* cVars, ICorDebugInfo::ILVarInfo** vars, bool* extendOthers)
     void setVars(CORINFO_METHOD_HANDLE ftn, uint32_t cVars, ICorDebugInfo::NativeVarInfo* vars)
     void reportRichMappings(ICorDebugInfo::InlineTreeNode* inlineTreeNodes, uint32_t numInlineTreeNodes, ICorDebugInfo::RichOffsetMapping* mappings, uint32_t numMappings)
+    void reportMetadata(const char* key, const void* value, size_t length)
     void*allocateArray(size_t cBytes);
     void freeArray(void*array);
     CORINFO_ARG_LIST_HANDLE getArgNext(CORINFO_ARG_LIST_HANDLE args);
@@ -282,6 +284,7 @@ FUNCTIONS
     const char* getMethodNameFromMetadata(CORINFO_METHOD_HANDLE ftn, const char **className, const char **namespaceName, const char **enclosingClassName);
     unsigned getMethodHash(CORINFO_METHOD_HANDLE       ftn);
     bool getSystemVAmd64PassStructInRegisterDescriptor(CORINFO_CLASS_HANDLE  structHnd, SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr);
+    void getSwiftLowering(CORINFO_CLASS_HANDLE structHnd, CORINFO_SWIFT_LOWERING* pLowering);
     uint32_t getLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd);
     uint32_t getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd);
     uint32_t getThreadTLSIndex(void                  **ppIndirection);
@@ -295,7 +298,7 @@ FUNCTIONS
     CORINFO_CLASS_HANDLE embedClassHandle(CORINFO_CLASS_HANDLE    handle, void                  **ppIndirection);
     CORINFO_METHOD_HANDLE embedMethodHandle(CORINFO_METHOD_HANDLE   handle, void                  **ppIndirection);
     CORINFO_FIELD_HANDLE embedFieldHandle(CORINFO_FIELD_HANDLE    handle, void                  **ppIndirection);
-    void embedGenericHandle(CORINFO_RESOLVED_TOKEN *        pResolvedToken, bool fEmbedParent, CORINFO_GENERICHANDLE_RESULT *  pResult);
+    void embedGenericHandle(CORINFO_RESOLVED_TOKEN *        pResolvedToken, bool fEmbedParent, CORINFO_METHOD_HANDLE   callerHandle, CORINFO_GENERICHANDLE_RESULT *  pResult);
     void getLocationOfThisType(CORINFO_METHOD_HANDLE context, CORINFO_LOOKUP_KIND* pLookupKind);
     void getAddressOfPInvokeTarget(CORINFO_METHOD_HANDLE   method, CORINFO_CONST_LOOKUP *pLookup);
     void* GetCookieForPInvokeCalliSig(CORINFO_SIG_INFO* szMetaSig, void           ** ppIndirection);
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/ArrayType.cs b/src/coreclr/tools/Common/TypeSystem/Common/ArrayType.cs
index 4bd5f2b49b32..768f5f7eaa4f 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/ArrayType.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/ArrayType.cs
@@ -260,9 +260,18 @@ public override MethodSignature Signature
                         case ArrayMethodKind.AddressWithHiddenArg:
                             {
                                 var parameters = new TypeDesc[_owningType.Rank + 1];
-                                parameters[0] = Context.GetPointerType(Context.GetWellKnownType(WellKnownType.Void));
-                                for (int i = 0; i < _owningType.Rank; i++)
-                                    parameters[i + 1] = _owningType.Context.GetWellKnownType(WellKnownType.Int32);
+                                if (Context.Target.Architecture == TargetArchitecture.X86)
+                                {
+                                    for (int i = 0; i < _owningType.Rank; i++)
+                                        parameters[i] = _owningType.Context.GetWellKnownType(WellKnownType.Int32);
+                                    parameters[_owningType.Rank] = Context.GetPointerType(Context.GetWellKnownType(WellKnownType.Void));
+                                }
+                                else
+                                {
+                                    parameters[0] = Context.GetPointerType(Context.GetWellKnownType(WellKnownType.Void));
+                                    for (int i = 0; i < _owningType.Rank; i++)
+                                        parameters[i + 1] = _owningType.Context.GetWellKnownType(WellKnownType.Int32);
+                                }
                                 _signature = new MethodSignature(0, 0, _owningType.ElementType.MakeByRefType(), parameters, MethodSignature.EmbeddedSignatureMismatchPermittedFlag);
                             }
                             break;
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/ConstructedTypeRewritingHelpers.cs b/src/coreclr/tools/Common/TypeSystem/Common/ConstructedTypeRewritingHelpers.cs
index 41e119bae681..3019606023a1 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/ConstructedTypeRewritingHelpers.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/ConstructedTypeRewritingHelpers.cs
@@ -78,7 +78,6 @@ public static TypeDesc ReplaceTypesInConstructionOfType(this TypeDesc type, Type
             if (type.HasInstantiation)
             {
                 TypeDesc[] newInstantiation = null;
-                Debug.Assert(type is InstantiatedType);
                 int instantiationIndex = 0;
                 for (; instantiationIndex < type.Instantiation.Length; instantiationIndex++)
                 {
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/ExplicitLayoutValidator.cs b/src/coreclr/tools/Common/TypeSystem/Common/ExplicitLayoutValidator.cs
index 8e2eda382f46..b176d0a1c506 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/ExplicitLayoutValidator.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/ExplicitLayoutValidator.cs
@@ -7,7 +7,7 @@
 
 namespace Internal.TypeSystem
 {
-    public struct ExplicitLayoutValidator
+    public static class ExplicitLayoutValidator
     {
         private enum FieldLayoutTag : byte
         {
@@ -17,291 +17,92 @@ private enum FieldLayoutTag : byte
             ByRef,
         }
 
-        private struct FieldLayoutInterval : IComparable<FieldLayoutInterval>
+        private sealed class Validator(MetadataType type) : FieldLayoutIntervalCalculator<FieldLayoutTag>(type.Context.Target.PointerSize)
         {
-            public FieldLayoutInterval(int start, int size, FieldLayoutTag tag)
-            {
-                Start = start;
-                Size = size;
-                Tag = tag;
-            }
+            // We want to mark empty intervals as having NonORef data.
+            protected override FieldLayoutTag EmptyIntervalData => FieldLayoutTag.NonORef;
 
-            public int Start;
-            public int Size;
+            protected override bool IntervalsHaveCompatibleTags(FieldLayoutTag existingTag, FieldLayoutTag nextTag) => existingTag == nextTag;
 
-            public int EndSentinel
+            protected override FieldLayoutInterval CombineIntervals(FieldLayoutInterval firstInterval, FieldLayoutInterval nextInterval)
             {
-                get
-                {
-                    return Start + Size;
-                }
-                set
-                {
-                    Size = value - Start;
-                    Debug.Assert(Size >= 0);
-                }
-            }
-
-            public FieldLayoutTag Tag;
-
-            public int CompareTo(FieldLayoutInterval other)
-            {
-                return Start.CompareTo(other.Start);
-            }
-        }
-
-        private readonly int _pointerSize;
-
-        // Represent field layout bits as a series of intervals to prevent pathological bad behavior
-        // involving excessively large explicit layout structures.
-        private readonly List<FieldLayoutInterval> _fieldLayout;
-
-        private readonly MetadataType _typeBeingValidated;
+                if (!IntervalsHaveCompatibleTags(firstInterval.Tag, nextInterval.Tag))
+                    ThrowFieldLayoutError(nextInterval.Start);
 
-        private ExplicitLayoutValidator(MetadataType type)
-        {
-            _typeBeingValidated = type;
-            _pointerSize = type.Context.Target.PointerSize;
-            _fieldLayout = new List<FieldLayoutInterval>();
-        }
-
-        public static void Validate(MetadataType type, ComputedInstanceFieldLayout layout)
-        {
-            ExplicitLayoutValidator validator = new ExplicitLayoutValidator(type);
-
-            foreach (FieldAndOffset fieldAndOffset in layout.Offsets)
-            {
-                validator.AddToFieldLayout(fieldAndOffset.Offset.AsInt, fieldAndOffset.Field.FieldType);
+                firstInterval.EndSentinel = nextInterval.EndSentinel;
+                return firstInterval;
             }
-        }
 
-        private void AddToFieldLayout(int offset, TypeDesc fieldType)
-        {
-            if (fieldType.IsGCPointer)
+            protected override FieldLayoutTag GetIntervalDataForType(int offset, TypeDesc fieldType)
             {
-                if (offset % _pointerSize != 0)
+                if (fieldType.IsGCPointer)
                 {
-                    // Misaligned ORef
-                    ThrowFieldLayoutError(offset);
-                }
-                SetFieldLayout(offset, _pointerSize, FieldLayoutTag.ORef);
-            }
-            else if (fieldType.IsPointer || fieldType.IsFunctionPointer)
-            {
-                SetFieldLayout(offset, _pointerSize, FieldLayoutTag.NonORef);
-            }
-            else if (fieldType.IsValueType)
-            {
-                MetadataType mdType = (MetadataType)fieldType;
-                int fieldSize = mdType.InstanceByteCountUnaligned.AsInt;
-                if (!mdType.ContainsGCPointers && !mdType.IsByRefLike)
-                {
-                    // Plain value type, mark the entire range as NonORef
-                    SetFieldLayout(offset, fieldSize, FieldLayoutTag.NonORef);
-                }
-                else
-                {
-                    if (offset % _pointerSize != 0)
+                    if (offset % PointerSize != 0)
                     {
-                        // Misaligned struct with GC pointers or ByRef
+                        // Misaligned ORef
                         ThrowFieldLayoutError(offset);
                     }
-
-                    List<FieldLayoutInterval> fieldRefMap = new();
-                    MarkByRefAndORefLocations(mdType, fieldRefMap, offset: 0);
-
-                    // Merge in fieldRefMap from structure specifying not attributed intervals as NonORef
-                    int lastGCRegionReportedEnd = 0;
-
-                    foreach (var gcRegion in fieldRefMap)
-                    {
-                        SetFieldLayout(offset + lastGCRegionReportedEnd, gcRegion.Start - lastGCRegionReportedEnd, FieldLayoutTag.NonORef);
-                        Debug.Assert(gcRegion.Tag == FieldLayoutTag.ORef || gcRegion.Tag == FieldLayoutTag.ByRef);
-                        SetFieldLayout(offset + gcRegion.Start, gcRegion.Size, gcRegion.Tag);
-                        lastGCRegionReportedEnd = gcRegion.EndSentinel;
-                    }
-
-                    if (fieldRefMap.Count > 0)
-                    {
-                        int trailingRegionStart = fieldRefMap[fieldRefMap.Count - 1].EndSentinel;
-                        int trailingRegionSize = fieldSize - trailingRegionStart;
-                        SetFieldLayout(offset + trailingRegionStart, trailingRegionSize, FieldLayoutTag.NonORef);
-                    }
+                    return FieldLayoutTag.ORef;
                 }
-            }
-            else if (fieldType.IsByRef)
-            {
-                if (offset % _pointerSize != 0)
-                {
-                    // Misaligned pointer field
-                    ThrowFieldLayoutError(offset);
-                }
-                SetFieldLayout(offset, _pointerSize, FieldLayoutTag.ByRef);
-            }
-            else
-            {
-                Debug.Assert(false, fieldType.ToString());
-            }
-        }
-
-        private void MarkByRefAndORefLocations(MetadataType type, List<FieldLayoutInterval> refMap, int offset)
-        {
-            // Recurse into struct fields
-            foreach (FieldDesc field in type.GetFields())
-            {
-                if (!field.IsStatic)
+                else if (fieldType.IsByRef)
                 {
-                    int fieldOffset = offset + field.Offset.AsInt;
-                    if (field.FieldType.IsGCPointer)
+                    if (offset % PointerSize != 0)
                     {
-                        SetFieldLayout(refMap, offset, _pointerSize, FieldLayoutTag.ORef);
-                    }
-                    else if (field.FieldType.IsByRef)
-                    {
-                        SetFieldLayout(refMap, offset, _pointerSize, FieldLayoutTag.ByRef);
-                    }
-                    else if (field.FieldType.IsValueType)
-                    {
-                        MetadataType mdFieldType = (MetadataType)field.FieldType;
-                        if (mdFieldType.ContainsGCPointers || mdFieldType.IsByRefLike)
-                        {
-                            MarkByRefAndORefLocations(mdFieldType, refMap, fieldOffset);
-                        }
+                        // Misaligned ByRef
+                        ThrowFieldLayoutError(offset);
                     }
+                    return FieldLayoutTag.ByRef;
                 }
-            }
-        }
-
-        private void SetFieldLayout(List<FieldLayoutInterval> fieldLayoutInterval, int offset, int count, FieldLayoutTag tag)
-        {
-            if (count == 0)
-                return;
-
-            var newInterval = new FieldLayoutInterval(offset, count, tag);
-
-            int binarySearchIndex = fieldLayoutInterval.BinarySearch(newInterval);
-
-            if (binarySearchIndex >= 0)
-            {
-                var existingInterval = fieldLayoutInterval[binarySearchIndex];
-
-                // Exact match found for start of interval.
-                if (tag != existingInterval.Tag)
+                else if (fieldType.IsPointer || fieldType.IsFunctionPointer)
                 {
-                    ThrowFieldLayoutError(offset);
+                    return FieldLayoutTag.NonORef;
                 }
-
-                if (existingInterval.Size >= count)
+                else if (fieldType.IsValueType)
                 {
-                    // Existing interval is big enough.
-                }
-                else
-                {
-                    // Expand existing interval, and then check to see if that's valid.
-                    existingInterval.Size = count;
-                    fieldLayoutInterval[binarySearchIndex] = existingInterval;
-
-                    ValidateAndMergeIntervalWithFollowingIntervals(fieldLayoutInterval, binarySearchIndex);
-                }
-            }
-            else
-            {
-                // No exact start match found.
-
-                int newIntervalLocation = ~binarySearchIndex;
-
-                // Check for previous interval overlaps cases
-                if (newIntervalLocation > 0)
-                {
-                    var previousInterval = fieldLayoutInterval[newIntervalLocation - 1];
-                    bool tagMatches = previousInterval.Tag == tag;
-
-                    if (previousInterval.EndSentinel > offset)
-                    {
-                        // Previous interval overlaps.
-                        if (!tagMatches)
-                        {
-                            ThrowFieldLayoutError(offset);
-                        }
-                    }
-
-                    if (previousInterval.EndSentinel > offset || (tagMatches && previousInterval.EndSentinel == offset))
-                    {
-                        // Previous interval overlaps, or exactly matches up with new interval and tag matches. Instead
-                        // of expanding interval set, simply expand the previous interval.
-                        previousInterval.EndSentinel = newInterval.EndSentinel;
-
-                        fieldLayoutInterval[newIntervalLocation - 1] = previousInterval;
-                        newIntervalLocation--;
-                    }
-                    else
+                    MetadataType mdType = (MetadataType)fieldType;
+                    if (!mdType.ContainsGCPointers && !mdType.IsByRefLike)
                     {
-                        fieldLayoutInterval.Insert(newIntervalLocation, newInterval);
+                        // Plain value type, mark the entire range as NonORef
+                        return FieldLayoutTag.NonORef;
                     }
+                    Debug.Fail("We should recurse on value types with GC pointers or ByRefLike types");
+                    return FieldLayoutTag.Empty;
                 }
                 else
                 {
-                    // New interval added at start
-                    fieldLayoutInterval.Insert(newIntervalLocation, newInterval);
+                    return FieldLayoutTag.Empty;
                 }
-
-                ValidateAndMergeIntervalWithFollowingIntervals(fieldLayoutInterval, newIntervalLocation);
             }
-        }
 
-        private void ValidateAndMergeIntervalWithFollowingIntervals(List<FieldLayoutInterval> fieldLayoutInterval, int intervalIndex)
-        {
-            while (true)
+            protected override bool NeedsRecursiveLayout(int offset, TypeDesc fieldType)
             {
-                if (intervalIndex + 1 == fieldLayoutInterval.Count)
+                if (!fieldType.IsValueType || !((MetadataType)fieldType).ContainsGCPointers && !fieldType.IsByRefLike)
                 {
-                    // existing interval is last interval. Expansion always succeeds
-                    break;
+                    return false;
                 }
-                else
-                {
-                    var nextInterval = fieldLayoutInterval[intervalIndex + 1];
-                    var expandedInterval = fieldLayoutInterval[intervalIndex];
-                    var tag = expandedInterval.Tag;
 
-                    if (nextInterval.Start > expandedInterval.EndSentinel)
-                    {
-                        // Next interval does not contact existing interval. Expansion succeeded
-                        break;
-                    }
-
-                    if ((nextInterval.Start == expandedInterval.EndSentinel) && nextInterval.Tag != tag)
-                    {
-                        // Next interval starts just after existing interval, but does not match tag. Expansion succeeded
-                        break;
-                    }
-
-                    Debug.Assert(nextInterval.Start <= expandedInterval.EndSentinel);
-                    // Next interval overlaps with expanded interval.
-
-                    if (nextInterval.Tag != tag)
-                    {
-                        ThrowFieldLayoutError(nextInterval.Start);
-                    }
-
-                    // Expand existing interval to cover region of next interval
-                    expandedInterval.EndSentinel = nextInterval.EndSentinel;
-                    fieldLayoutInterval[intervalIndex] = expandedInterval;
-
-                    // Remove next interval
-                    fieldLayoutInterval.RemoveAt(intervalIndex + 1);
+                if (offset % PointerSize != 0)
+                {
+                    // Misaligned struct with GC pointers or ByRef
+                    ThrowFieldLayoutError(offset);
                 }
+
+                return true;
             }
-        }
 
-        private void SetFieldLayout(int offset, int count, FieldLayoutTag tag)
-        {
-            SetFieldLayout(_fieldLayout, offset, count, tag);
+            private void ThrowFieldLayoutError(int offset)
+            {
+                ThrowHelper.ThrowTypeLoadException(ExceptionStringID.ClassLoadExplicitLayout, type, offset.ToStringInvariant());
+            }
         }
 
-        private void ThrowFieldLayoutError(int offset)
+        public static void Validate(MetadataType type, ComputedInstanceFieldLayout layout)
         {
-            ThrowHelper.ThrowTypeLoadException(ExceptionStringID.ClassLoadExplicitLayout, _typeBeingValidated, offset.ToStringInvariant());
+            Validator validator = new(type);
+            foreach (FieldAndOffset fieldAndOffset in layout.Offsets)
+            {
+                validator.AddToFieldLayout(fieldAndOffset.Offset.AsInt, fieldAndOffset.Field.FieldType);
+            }
         }
     }
 }
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutIntervalCalculator.cs b/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutIntervalCalculator.cs
new file mode 100644
index 000000000000..9f909eccc358
--- /dev/null
+++ b/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutIntervalCalculator.cs
@@ -0,0 +1,277 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace Internal.TypeSystem
+{
+    public abstract class FieldLayoutIntervalCalculator<TIntervalTag>
+    {
+        protected struct FieldLayoutInterval : IComparable<FieldLayoutInterval>
+        {
+            public FieldLayoutInterval(int start, int size, TIntervalTag tag)
+            {
+                Start = start;
+                Size = size;
+                Tag = tag;
+            }
+
+            public int Start;
+            public int Size;
+
+            public int EndSentinel
+            {
+                get
+                {
+                    return Start + Size;
+                }
+                set
+                {
+                    Size = value - Start;
+                    Debug.Assert(Size >= 0);
+                }
+            }
+
+            public TIntervalTag Tag;
+
+            public int CompareTo(FieldLayoutInterval other)
+            {
+                return Start.CompareTo(other.Start);
+            }
+        }
+
+        protected int PointerSize { get; }
+
+        // Represent field layout bits as a series of intervals to prevent pathological bad behavior
+        // involving excessively large explicit layout structures.
+        private readonly List<FieldLayoutInterval> _fieldLayout = new();
+
+        protected IReadOnlyList<FieldLayoutInterval> Intervals => _fieldLayout;
+
+        public FieldLayoutIntervalCalculator(int pointerSize)
+        {
+            PointerSize = pointerSize;
+        }
+
+        protected abstract bool NeedsRecursiveLayout(int offset, TypeDesc fieldType);
+
+        protected abstract TIntervalTag GetIntervalDataForType(int offset, TypeDesc fieldType);
+
+        protected abstract TIntervalTag EmptyIntervalData { get; }
+
+        private int GetFieldSize(TypeDesc fieldType)
+        {
+            if (fieldType.IsGCPointer || fieldType.IsPointer || fieldType.IsFunctionPointer || fieldType.IsByRef)
+            {
+                return PointerSize;
+            }
+            else if (fieldType.IsValueType)
+            {
+                return ((MetadataType)fieldType).InstanceByteCountUnaligned.AsInt;
+            }
+            else
+            {
+                Debug.Assert(false, fieldType.ToString());
+                return fieldType.GetElementSize().AsInt;
+            }
+        }
+
+        public void AddFields(TypeDesc type, bool addTrailingEmptyInterval = true) => AddToFieldLayout(0, type, addTrailingEmptyInterval: false);
+
+        public void AddToFieldLayout(int offset, TypeDesc fieldType) => AddToFieldLayout(offset, fieldType, addTrailingEmptyInterval: true);
+
+        public void AddToFieldLayout(int offset, TypeDesc fieldType, bool addTrailingEmptyInterval)
+        {
+            if (NeedsRecursiveLayout(offset, fieldType))
+            {
+                if (fieldType is MetadataType { IsInlineArray: true } mdType)
+                {
+                    fieldType = new TypeWithRepeatedFields(mdType);
+                }
+
+                List<FieldLayoutInterval> nestedIntervals = new List<FieldLayoutInterval>();
+                foreach (FieldDesc field in fieldType.GetFields())
+                {
+                    if (field.IsStatic)
+                        continue;
+
+                    int fieldOffset = field.Offset.AsInt;
+                    AddToFieldLayout(nestedIntervals, fieldOffset, field.FieldType);
+                }
+
+                // Merge in the intervals from structure, filling in the gaps with empty intervals.
+                int lastGCRegionReportedEnd = 0;
+
+                foreach (var interval in nestedIntervals)
+                {
+                    SetFieldLayout(offset + lastGCRegionReportedEnd, interval.Start - lastGCRegionReportedEnd, EmptyIntervalData);
+                    SetFieldLayout(offset + interval.Start, interval.Size, interval.Tag);
+                    lastGCRegionReportedEnd = interval.EndSentinel;
+                }
+
+                if (addTrailingEmptyInterval && nestedIntervals.Count > 0)
+                {
+                    int trailingRegionStart = nestedIntervals[nestedIntervals.Count - 1].EndSentinel;
+                    int trailingRegionSize = GetFieldSize(fieldType) - trailingRegionStart;
+                    SetFieldLayout(offset + trailingRegionStart, trailingRegionSize, EmptyIntervalData);
+                }
+            }
+            else
+            {
+                SetFieldLayout(offset, GetFieldSize(fieldType), GetIntervalDataForType(offset, fieldType));
+            }
+        }
+
+        private void AddToFieldLayout(List<FieldLayoutInterval> fieldLayout, int offset, TypeDesc fieldType)
+        {
+            if (NeedsRecursiveLayout(offset, fieldType))
+            {
+                if (fieldType is MetadataType { IsInlineArray: true } mdType)
+                {
+                    fieldType = new TypeWithRepeatedFields(mdType);
+                }
+
+                foreach (FieldDesc field in fieldType.GetFields())
+                {
+                    int fieldOffset = offset + field.Offset.AsInt;
+                    AddToFieldLayout(fieldLayout, fieldOffset, field.FieldType);
+                }
+            }
+            else
+            {
+                SetIntervalData(fieldLayout, offset, GetFieldSize(fieldType), GetIntervalDataForType(offset, fieldType));
+            }
+        }
+
+
+        protected abstract bool IntervalsHaveCompatibleTags(TIntervalTag existingTag, TIntervalTag nextTag);
+
+        /// <summary>
+        /// Combine two bordering or overlapping intervals into a single interval.
+        /// </summary>
+        /// <param name="firstInterval">The interval that starts earlier of the two intervals.</param>
+        /// <param name="nextInterval">The interval that ends later of the two intervals.</param>
+        /// <returns>A new interval that represents the combined range.</returns>
+        protected abstract FieldLayoutInterval CombineIntervals(FieldLayoutInterval firstInterval, FieldLayoutInterval nextInterval);
+
+        private void SetIntervalData(List<FieldLayoutInterval> fieldLayoutInterval, int offset, int count, TIntervalTag tag)
+        {
+            if (count == 0)
+                return;
+
+            var newInterval = new FieldLayoutInterval(offset, count, tag);
+
+            int binarySearchIndex = fieldLayoutInterval.BinarySearch(newInterval);
+
+            int updatedIntervalIndex;
+
+            if (binarySearchIndex >= 0)
+            {
+                var existingInterval = fieldLayoutInterval[binarySearchIndex];
+
+                fieldLayoutInterval[binarySearchIndex] = CombineIntervals(existingInterval, newInterval);
+                updatedIntervalIndex = binarySearchIndex;
+            }
+            else
+            {
+                // No exact start match found.
+
+                int newIntervalLocation = ~binarySearchIndex;
+
+                // Check for previous interval overlaps cases
+                if (newIntervalLocation > 0)
+                {
+                    var previousInterval = fieldLayoutInterval[newIntervalLocation - 1];
+
+                    if (previousInterval.EndSentinel > offset)
+                    {
+                        fieldLayoutInterval[newIntervalLocation - 1] = CombineIntervals(previousInterval, newInterval);
+                        newIntervalLocation--;
+                    }
+                    else
+                    {
+                        fieldLayoutInterval.Insert(newIntervalLocation, newInterval);
+                    }
+                }
+                else
+                {
+                    // New interval added at start
+                    fieldLayoutInterval.Insert(newIntervalLocation, newInterval);
+                }
+
+                updatedIntervalIndex = newIntervalLocation;
+            }
+            MergeIntervalWithNeighboringIntervals(fieldLayoutInterval, updatedIntervalIndex);
+        }
+
+        private void MergeIntervalWithNeighboringIntervals(List<FieldLayoutInterval> fieldLayoutInterval, int intervalIndex)
+        {
+            // Merge this interval first with the following intervals
+            while (true)
+            {
+                if (intervalIndex + 1 == fieldLayoutInterval.Count)
+                {
+                    // existing interval is last interval. Expansion always succeeds
+                    break;
+                }
+
+                var nextInterval = fieldLayoutInterval[intervalIndex + 1];
+                var expandedInterval = fieldLayoutInterval[intervalIndex];
+
+                if (nextInterval.Start > expandedInterval.EndSentinel)
+                {
+                    // Next interval does not contact existing interval. Expansion succeeded
+                    break;
+                }
+
+                if ((nextInterval.Start == expandedInterval.EndSentinel) && !IntervalsHaveCompatibleTags(expandedInterval.Tag, nextInterval.Tag))
+                {
+                    // Next interval starts just after existing interval, but does not match tag. Expansion succeeded
+                    break;
+                }
+
+                Debug.Assert(nextInterval.Start <= expandedInterval.EndSentinel);
+                // Next interval overlaps with expanded interval.
+                fieldLayoutInterval[intervalIndex] = CombineIntervals(expandedInterval, nextInterval);
+                fieldLayoutInterval.RemoveAt(intervalIndex + 1);
+            }
+
+            // Now merge with preceeding intervals
+            while (true)
+            {
+                if (intervalIndex == 0)
+                {
+                    // expanded interval is first interval. Expansion always succeeds
+                    break;
+                }
+
+                var previousInterval = fieldLayoutInterval[intervalIndex - 1];
+                var expandedInterval = fieldLayoutInterval[intervalIndex];
+
+                if (previousInterval.EndSentinel < expandedInterval.Start)
+                {
+                    // Previous interval does not contact expanded interval. Expansion succeeded
+                    break;
+                }
+
+                if ((previousInterval.EndSentinel == expandedInterval.Start) && !IntervalsHaveCompatibleTags(previousInterval.Tag, expandedInterval.Tag))
+                {
+                    // Expanded interval starts just after previous interval, but does not match tag. Expansion succeeded
+                    break;
+                }
+
+                Debug.Assert(previousInterval.EndSentinel <= expandedInterval.Start);
+                // Previous interval overlaps with expanded interval.
+                fieldLayoutInterval[intervalIndex] = CombineIntervals(previousInterval, expandedInterval);
+                fieldLayoutInterval.RemoveAt(--intervalIndex);
+            }
+        }
+
+        private void SetFieldLayout(int offset, int count, TIntervalTag tag)
+        {
+            SetIntervalData(_fieldLayout, offset, count, tag);
+        }
+    }
+}
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/GenericParameterDesc.cs b/src/coreclr/tools/Common/TypeSystem/Common/GenericParameterDesc.cs
index e1bf789b1123..c361733737d4 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/GenericParameterDesc.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/GenericParameterDesc.cs
@@ -61,7 +61,7 @@ public enum GenericConstraints
         /// <summary>
         /// A type is permitted to be ByRefLike.
         /// </summary>
-        AcceptByRefLike = 0x20,
+        AllowByRefLike = 0x20,
     }
 
     public abstract partial class GenericParameterDesc : TypeDesc
@@ -159,13 +159,13 @@ public bool HasDefaultConstructorConstraint
         }
 
         /// <summary>
-        /// Does this generic parameter have the AcceptByRefLike flag
+        /// Does this generic parameter have the AllowByRefLike flag
         /// </summary>
-        public bool HasAcceptByRefLikeConstraint
+        public bool HasAllowByRefLikeConstraint
         {
             get
             {
-                return (Constraints & GenericConstraints.AcceptByRefLike) != 0;
+                return (Constraints & GenericConstraints.AllowByRefLike) != 0;
             }
         }
 
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/ImpliedRepeatedFieldDesc.Sorting.cs b/src/coreclr/tools/Common/TypeSystem/Common/ImpliedRepeatedFieldDesc.Sorting.cs
new file mode 100644
index 000000000000..ef3b77aecbec
--- /dev/null
+++ b/src/coreclr/tools/Common/TypeSystem/Common/ImpliedRepeatedFieldDesc.Sorting.cs
@@ -0,0 +1,24 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace Internal.TypeSystem
+{
+    public sealed partial class ImpliedRepeatedFieldDesc : FieldDesc
+    {
+        protected internal override int CompareToImpl(FieldDesc other, TypeSystemComparer comparer)
+        {
+            var impliedRepeatedFieldDesc = (ImpliedRepeatedFieldDesc)other;
+
+            int result = comparer.Compare(_underlyingFieldDesc, impliedRepeatedFieldDesc._underlyingFieldDesc);
+
+            if (result != 0)
+            {
+                return result;
+            }
+
+            return FieldIndex.CompareTo(impliedRepeatedFieldDesc.FieldIndex);
+        }
+
+        protected internal override int ClassCode => 31666958;
+    }
+}
diff --git a/src/coreclr/tools/Common/Compiler/ImpliedRepeatedFieldDesc.cs b/src/coreclr/tools/Common/TypeSystem/Common/ImpliedRepeatedFieldDesc.cs
similarity index 73%
rename from src/coreclr/tools/Common/Compiler/ImpliedRepeatedFieldDesc.cs
rename to src/coreclr/tools/Common/TypeSystem/Common/ImpliedRepeatedFieldDesc.cs
index a4f6a4ab55b3..93829636916d 100644
--- a/src/coreclr/tools/Common/Compiler/ImpliedRepeatedFieldDesc.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/ImpliedRepeatedFieldDesc.cs
@@ -1,11 +1,9 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using Internal.TypeSystem;
-
-namespace ILCompiler
+namespace Internal.TypeSystem
 {
-    public sealed class ImpliedRepeatedFieldDesc : FieldDesc
+    public sealed partial class ImpliedRepeatedFieldDesc : FieldDesc
     {
         private readonly FieldDesc _underlyingFieldDesc;
 
@@ -36,26 +34,10 @@ public ImpliedRepeatedFieldDesc(DefType owningType, FieldDesc underlyingFieldDes
 
         public int FieldIndex { get; }
 
-        protected override int ClassCode => 31666958;
-
         public override EmbeddedSignatureData[] GetEmbeddedSignatureData() => _underlyingFieldDesc.GetEmbeddedSignatureData();
 
         public override bool HasCustomAttribute(string attributeNamespace, string attributeName) => _underlyingFieldDesc.HasCustomAttribute(attributeNamespace, attributeName);
 
-        protected override int CompareToImpl(FieldDesc other, TypeSystemComparer comparer)
-        {
-            var impliedRepeatedFieldDesc = (ImpliedRepeatedFieldDesc)other;
-
-            int result = comparer.Compare(_underlyingFieldDesc, impliedRepeatedFieldDesc._underlyingFieldDesc);
-
-            if (result != 0)
-            {
-                return result;
-            }
-
-            return FieldIndex.CompareTo(impliedRepeatedFieldDesc.FieldIndex);
-        }
-
         public override MarshalAsDescriptor GetMarshalAsDescriptor() => _underlyingFieldDesc.GetMarshalAsDescriptor();
 
         public override string Name => $"{_underlyingFieldDesc.Name}[{FieldIndex}]";
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/TargetDetails.cs b/src/coreclr/tools/Common/TypeSystem/Common/TargetDetails.cs
index 7aa340c27b77..077486502c30 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/TargetDetails.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/TargetDetails.cs
@@ -38,14 +38,6 @@ public enum TargetAbi
         /// model for armel execution model
         /// </summary>
         NativeAotArmel,
-        /// <summary>
-        /// Jit runtime ABI
-        /// </summary>
-        Jit,
-        /// <summary>
-        /// Cross-platform portable C++ codegen
-        /// </summary>
-        CppCodegen,
     }
 
     /// <summary>
@@ -103,7 +95,7 @@ public bool SupportsRelativePointers
         {
             get
             {
-                return (Abi != TargetAbi.CppCodegen) && (Architecture != TargetArchitecture.Wasm32);
+                return Architecture != TargetArchitecture.Wasm32;
             }
         }
 
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/TypeSystemConstraintsHelpers.cs b/src/coreclr/tools/Common/TypeSystem/Common/TypeSystemConstraintsHelpers.cs
index acd8e291cebd..97bceb5b1b83 100644
--- a/src/coreclr/tools/Common/TypeSystem/Common/TypeSystemConstraintsHelpers.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/TypeSystemConstraintsHelpers.cs
@@ -50,7 +50,7 @@ private static bool VerifyGenericParamConstraint(InstantiationContext genericPar
             }
 
             // Check for ByRefLike support
-            if (instantiationParam.IsByRefLike && (constraints & GenericConstraints.AcceptByRefLike) == 0)
+            if (instantiationParam.IsByRefLike && (constraints & GenericConstraints.AllowByRefLike) == 0)
                 return false;
 
             var instantiatedConstraints = default(ArrayBuilder<TypeDesc>);
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/TypeWithRepeatedFields.Diagnostic.cs b/src/coreclr/tools/Common/TypeSystem/Common/TypeWithRepeatedFields.Diagnostic.cs
new file mode 100644
index 000000000000..7dc033e16292
--- /dev/null
+++ b/src/coreclr/tools/Common/TypeSystem/Common/TypeWithRepeatedFields.Diagnostic.cs
@@ -0,0 +1,21 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Threading;
+
+namespace Internal.TypeSystem
+{
+    /// <summary>
+    /// This type represents a type that has one field in metadata,
+    /// but has that field repeated at runtime to represent an array of elements inline.
+    /// </summary>
+    public sealed partial class TypeWithRepeatedFields : MetadataType
+    {
+        public override string DiagnosticName => MetadataType.DiagnosticName;
+
+        public override string DiagnosticNamespace => MetadataType.DiagnosticNamespace;
+    }
+}
diff --git a/src/coreclr/tools/Common/TypeSystem/Common/TypeWithRepeatedFields.Sorting.cs b/src/coreclr/tools/Common/TypeSystem/Common/TypeWithRepeatedFields.Sorting.cs
new file mode 100644
index 000000000000..6d9fc17daff3
--- /dev/null
+++ b/src/coreclr/tools/Common/TypeSystem/Common/TypeWithRepeatedFields.Sorting.cs
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Threading;
+
+namespace Internal.TypeSystem
+{
+    /// <summary>
+    /// This type represents a type that has one field in metadata,
+    /// but has that field repeated at runtime to represent an array of elements inline.
+    /// </summary>
+    public sealed partial class TypeWithRepeatedFields : MetadataType
+    {
+        protected internal override int CompareToImpl(TypeDesc other, TypeSystemComparer comparer) => comparer.Compare(MetadataType, ((TypeWithRepeatedFields)other).MetadataType);
+        protected internal override int ClassCode => 779393465;
+    }
+}
diff --git a/src/coreclr/tools/Common/Compiler/TypeWithRepeatedFields.cs b/src/coreclr/tools/Common/TypeSystem/Common/TypeWithRepeatedFields.cs
similarity index 72%
rename from src/coreclr/tools/Common/Compiler/TypeWithRepeatedFields.cs
rename to src/coreclr/tools/Common/TypeSystem/Common/TypeWithRepeatedFields.cs
index 48382f075ea0..158925c84e64 100644
--- a/src/coreclr/tools/Common/Compiler/TypeWithRepeatedFields.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/TypeWithRepeatedFields.cs
@@ -5,9 +5,8 @@
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Threading;
-using Internal.TypeSystem;
 
-namespace ILCompiler
+namespace Internal.TypeSystem
 {
     /// <summary>
     /// This type represents a type that has one field in metadata,
@@ -90,49 +89,8 @@ public override IEnumerable<FieldDesc> GetFields()
         public override MethodImplRecord[] FindMethodsImplWithMatchingDeclName(string name) => MetadataType.FindMethodsImplWithMatchingDeclName(name);
         public override int GetHashCode() => MetadataType.GetHashCode();
         protected override MethodImplRecord[] ComputeVirtualMethodImplsForType() => Array.Empty<MethodImplRecord>();
-        protected override int CompareToImpl(TypeDesc other, TypeSystemComparer comparer) => comparer.Compare(MetadataType, ((TypeWithRepeatedFields)other).MetadataType);
 
-        protected override TypeFlags ComputeTypeFlags(TypeFlags mask)
-        {
-            TypeFlags flags = 0;
-
-            if ((mask & TypeFlags.CategoryMask) != 0)
-            {
-                flags |= MetadataType.Category;
-            }
-
-            if ((mask & TypeFlags.HasGenericVarianceComputed) != 0)
-            {
-                flags |= TypeFlags.HasGenericVarianceComputed;
-
-                if (MetadataType.HasVariance)
-                    flags |= TypeFlags.HasGenericVariance;
-            }
-
-            if ((mask & TypeFlags.HasFinalizerComputed) != 0)
-            {
-                flags |= TypeFlags.HasFinalizerComputed;
-
-                if (MetadataType.HasFinalizer)
-                    flags |= TypeFlags.HasFinalizer;
-            }
-
-            if ((mask & TypeFlags.AttributeCacheComputed) != 0)
-            {
-                flags |= TypeFlags.AttributeCacheComputed;
-
-                if (MetadataType.IsByRefLike)
-                    flags |= TypeFlags.IsByRefLike;
-
-                if (MetadataType.IsInlineArray)
-                    flags |= TypeFlags.IsInlineArray;
-
-                if (MetadataType.IsIntrinsic)
-                    flags |= TypeFlags.IsIntrinsic;
-            }
-
-            return flags;
-        }
+        protected override TypeFlags ComputeTypeFlags(TypeFlags mask) => MetadataType.GetTypeFlags(mask);
 
         public override string Namespace => MetadataType.Namespace;
 
@@ -160,14 +118,8 @@ protected override TypeFlags ComputeTypeFlags(TypeFlags mask)
 
         public override PInvokeStringFormat PInvokeStringFormat => MetadataType.PInvokeStringFormat;
 
-        public override string DiagnosticName => MetadataType.DiagnosticName;
-
-        public override string DiagnosticNamespace => MetadataType.DiagnosticNamespace;
-
         public override TypeSystemContext Context => MetadataType.Context;
 
-        protected override int ClassCode => 779393465;
-
         public override IEnumerable<MethodDesc> GetMethods() => MethodDesc.EmptyMethods;
     }
 }
diff --git a/src/coreclr/tools/Common/Compiler/TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/TypeSystem/Common/TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs
similarity index 99%
rename from src/coreclr/tools/Common/Compiler/TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs
rename to src/coreclr/tools/Common/TypeSystem/Common/TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs
index 93da0f28a448..b9537b9af8ba 100644
--- a/src/coreclr/tools/Common/Compiler/TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Common/TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs
@@ -3,7 +3,7 @@
 
 using Internal.TypeSystem;
 
-namespace ILCompiler
+namespace Internal.TypeSystem
 {
     /// <summary>
     /// Represents an algorithm that computes field layout for intrinsic vector types (Vector64/Vector128/Vector256).
diff --git a/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaGenericParameter.cs b/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaGenericParameter.cs
index ac6cd4307ca7..8ffa09bc12eb 100644
--- a/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaGenericParameter.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Ecma/EcmaGenericParameter.cs
@@ -113,7 +113,7 @@ public override GenericConstraints Constraints
             {
                 Debug.Assert((int)GenericConstraints.DefaultConstructorConstraint == (int)GenericParameterAttributes.DefaultConstructorConstraint);
                 GenericParameter parameter = _module.MetadataReader.GetGenericParameter(_handle);
-                const GenericParameterAttributes mask = GenericParameterAttributes.SpecialConstraintMask | (GenericParameterAttributes)GenericConstraints.AcceptByRefLike;
+                const GenericParameterAttributes mask = GenericParameterAttributes.SpecialConstraintMask | (GenericParameterAttributes)GenericConstraints.AllowByRefLike;
                 return (GenericConstraints)(parameter.Attributes & mask);
             }
         }
diff --git a/src/coreclr/tools/Common/TypeSystem/IL/ILReader.cs b/src/coreclr/tools/Common/TypeSystem/IL/ILReader.cs
index d0022a5bad78..3e2b5cd7aa02 100644
--- a/src/coreclr/tools/Common/TypeSystem/IL/ILReader.cs
+++ b/src/coreclr/tools/Common/TypeSystem/IL/ILReader.cs
@@ -1,42 +1,27 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System;
+using System.Buffers.Binary;
+
 using Internal.TypeSystem;
 
 using Debug = System.Diagnostics.Debug;
 
 namespace Internal.IL
 {
-    internal struct ILReader
+    internal ref struct ILReader
     {
         private int _currentOffset;
-        private readonly byte[] _ilBytes;
+        private readonly ReadOnlySpan<byte> _ilBytes;
 
-        public int Offset
-        {
-            get
-            {
-                return _currentOffset;
-            }
-        }
+        public readonly int Offset => _currentOffset;
 
-        public int Size
-        {
-            get
-            {
-                return _ilBytes.Length;
-            }
-        }
+        public readonly int Size => _ilBytes.Length;
 
-        public bool HasNext
-        {
-            get
-            {
-                return _currentOffset < _ilBytes.Length;
-            }
-        }
+        public readonly bool HasNext => _currentOffset < _ilBytes.Length;
 
-        public ILReader(byte[] ilBytes, int currentOffset = 0)
+        public ILReader(ReadOnlySpan<byte> ilBytes, int currentOffset = 0)
         {
             _ilBytes = ilBytes;
             _currentOffset = currentOffset;
@@ -56,22 +41,20 @@ public byte ReadILByte()
 
         public ushort ReadILUInt16()
         {
-            if (_currentOffset + 2 > _ilBytes.Length)
+            if (!BinaryPrimitives.TryReadUInt16LittleEndian(_ilBytes.Slice(_currentOffset), out ushort value))
                 ThrowHelper.ThrowInvalidProgramException();
 
-            ushort val = (ushort)(_ilBytes[_currentOffset] + (_ilBytes[_currentOffset + 1] << 8));
-            _currentOffset += 2;
-            return val;
+            _currentOffset += sizeof(ushort);
+            return value;
         }
 
         public uint ReadILUInt32()
         {
-            if (_currentOffset + 4 > _ilBytes.Length)
+            if (!BinaryPrimitives.TryReadUInt32LittleEndian(_ilBytes.Slice(_currentOffset), out uint value))
                 ThrowHelper.ThrowInvalidProgramException();
 
-            uint val = (uint)(_ilBytes[_currentOffset] + (_ilBytes[_currentOffset + 1] << 8) + (_ilBytes[_currentOffset + 2] << 16) + (_ilBytes[_currentOffset + 3] << 24));
-            _currentOffset += 4;
-            return val;
+            _currentOffset += sizeof(uint);
+            return value;
         }
 
         public int ReadILToken()
@@ -81,21 +64,29 @@ public int ReadILToken()
 
         public ulong ReadILUInt64()
         {
-            ulong value = ReadILUInt32();
-            value |= (((ulong)ReadILUInt32()) << 32);
+            if (!BinaryPrimitives.TryReadUInt64LittleEndian(_ilBytes.Slice(_currentOffset), out ulong value))
+                ThrowHelper.ThrowInvalidProgramException();
+
+            _currentOffset += sizeof(ulong);
             return value;
         }
 
-        public unsafe float ReadILFloat()
+        public float ReadILFloat()
         {
-            uint value = ReadILUInt32();
-            return *(float*)(&value);
+            if (!BinaryPrimitives.TryReadSingleLittleEndian(_ilBytes.Slice(_currentOffset), out float value))
+                ThrowHelper.ThrowInvalidProgramException();
+
+            _currentOffset += sizeof(float);
+            return value;
         }
 
         public unsafe double ReadILDouble()
         {
-            ulong value = ReadILUInt64();
-            return *(double*)(&value);
+            if (!BinaryPrimitives.TryReadDoubleLittleEndian(_ilBytes.Slice(_currentOffset), out double value))
+                ThrowHelper.ThrowInvalidProgramException();
+
+            _currentOffset += sizeof(double);
+            return value;
         }
 
         public ILOpcode ReadILOpcode()
diff --git a/src/coreclr/tools/Common/TypeSystem/IL/ILStackHelper.cs b/src/coreclr/tools/Common/TypeSystem/IL/ILStackHelper.cs
index 1204f1b6655f..19d5ccb0b5aa 100644
--- a/src/coreclr/tools/Common/TypeSystem/IL/ILStackHelper.cs
+++ b/src/coreclr/tools/Common/TypeSystem/IL/ILStackHelper.cs
@@ -25,16 +25,16 @@ public static void CheckStackBalance(this MethodIL methodIL)
         public static int ComputeMaxStack(this MethodIL methodIL)
         {
             const int StackHeightNotSet = int.MinValue;
+            const int StackAllocThreshold = 256 / sizeof(int);
 
-            byte[] ilbytes = methodIL.GetILBytes();
-            int currentOffset = 0;
+            var ilReader = new ILReader(methodIL.GetILBytes());
             int stackHeight = 0;
             int maxStack = 0;
 
-            // TODO: Use Span<T> for this and stackalloc the array if reasonably sized
-            int[] stackHeights = new int[ilbytes.Length];
-            for (int i = 0; i < stackHeights.Length; i++)
-                stackHeights[i] = StackHeightNotSet;
+            Span<int> stackHeights = ilReader.Size <= StackAllocThreshold ?
+                stackalloc int[StackAllocThreshold].Slice(0, ilReader.Size) : new int[ilReader.Size];
+
+            stackHeights.Fill(StackHeightNotSet);
 
             // Catch and filter clauses have a known non-zero stack height.
             foreach (ILExceptionRegion region in methodIL.GetExceptionRegions())
@@ -50,11 +50,10 @@ public static int ComputeMaxStack(this MethodIL methodIL)
                 }
             }
 
-            while (currentOffset < ilbytes.Length)
+            while (ilReader.HasNext)
             {
-                ILOpcode opcode = (ILOpcode)ilbytes[currentOffset];
-                if (opcode == ILOpcode.prefix1)
-                    opcode = 0x100 + (ILOpcode)ilbytes[currentOffset + 1];
+                int currentOffset = ilReader.Offset;
+                ILOpcode opcode = ilReader.ReadILOpcode();
 
                 // The stack height could be unknown if the previous instruction
                 // was an unconditional control transfer.
@@ -73,7 +72,7 @@ public static int ComputeMaxStack(this MethodIL methodIL)
                     || stackHeights[currentOffset] == stackHeight);
                 stackHeights[currentOffset] = stackHeight;
 
-                bool isVariableSize = false;
+                bool hasReadOperand = false;
                 switch (opcode)
                 {
                     case ILOpcode.arglist:
@@ -168,6 +167,7 @@ public static int ComputeMaxStack(this MethodIL methodIL)
                     case ILOpcode.stloc_2:
                     case ILOpcode.stloc_3:
                     case ILOpcode.stloc_s:
+                    case ILOpcode.switch_:
                         Debug.Assert(stackHeight > 0);
                         stackHeight -= 1;
                         break;
@@ -191,43 +191,6 @@ public static int ComputeMaxStack(this MethodIL methodIL)
                     case ILOpcode.blt:
                     case ILOpcode.blt_un:
                     case ILOpcode.bne_un:
-                        {
-                            int target = currentOffset + ReadInt32(ilbytes, currentOffset + 1) + 5;
-
-                            int adjustment;
-                            bool isConditional;
-                            if (opcode == ILOpcode.br || opcode == ILOpcode.leave)
-                            {
-                                isConditional = false;
-                                adjustment = 0;
-                            }
-                            else if (opcode == ILOpcode.brfalse || opcode == ILOpcode.brtrue)
-                            {
-                                isConditional = true;
-                                adjustment = 1;
-                            }
-                            else
-                            {
-                                isConditional = true;
-                                adjustment = 2;
-                            }
-
-                            Debug.Assert(stackHeight >= adjustment);
-                            stackHeight -= adjustment;
-
-                            Debug.Assert(stackHeights[target] == StackHeightNotSet
-                                || stackHeights[target] == stackHeight);
-
-                            // Forward branch carries information about stack height at a future
-                            // offset. We need to remember it.
-                            if (target > currentOffset)
-                                stackHeights[target] = stackHeight;
-
-                            if (!isConditional)
-                                stackHeight = StackHeightNotSet;
-                        }
-                        break;
-
                     case ILOpcode.br_s:
                     case ILOpcode.leave_s:
                     case ILOpcode.brfalse_s:
@@ -243,16 +206,17 @@ public static int ComputeMaxStack(this MethodIL methodIL)
                     case ILOpcode.blt_un_s:
                     case ILOpcode.bne_un_s:
                         {
-                            int target = currentOffset + (sbyte)ilbytes[currentOffset + 1] + 2;
+                            int target = ilReader.ReadBranchDestination(opcode);
+                            hasReadOperand = true;
 
                             int adjustment;
                             bool isConditional;
-                            if (opcode == ILOpcode.br_s || opcode == ILOpcode.leave_s)
+                            if (opcode is ILOpcode.br or ILOpcode.br_s or ILOpcode.leave or ILOpcode.leave_s)
                             {
                                 isConditional = false;
                                 adjustment = 0;
                             }
-                            else if (opcode == ILOpcode.brfalse_s || opcode == ILOpcode.brtrue_s)
+                            else if (opcode is ILOpcode.brfalse or ILOpcode.brfalse_s or ILOpcode.brtrue_s or ILOpcode.brtrue)
                             {
                                 isConditional = true;
                                 adjustment = 1;
@@ -284,11 +248,11 @@ public static int ComputeMaxStack(this MethodIL methodIL)
                     case ILOpcode.callvirt:
                     case ILOpcode.newobj:
                         {
-                            int token = ReadILToken(ilbytes, currentOffset + 1);
-                            object obj = methodIL.GetObject(token);
-                            MethodSignature sig = obj is MethodSignature ?
-                                (MethodSignature)obj :
-                                ((MethodDesc)obj).Signature;
+                            object obj = methodIL.GetObject(ilReader.ReadILToken());
+                            hasReadOperand = true;
+
+                            MethodSignature sig = obj is MethodSignature methodSignature ?
+                                methodSignature : ((MethodDesc)obj).Signature;
                             int adjustment = sig.Length;
                             if (opcode == ILOpcode.newobj)
                             {
@@ -436,38 +400,18 @@ public static int ComputeMaxStack(this MethodIL methodIL)
                         Debug.Assert(stackHeight > 0);
                         break;
 
-                    case ILOpcode.switch_:
-                        Debug.Assert(stackHeight > 0);
-                        isVariableSize = true;
-                        stackHeight -= 1;
-                        currentOffset += 1 + (ReadInt32(ilbytes, currentOffset + 1) * 4) + 4;
-                        break;
-
                     default:
                         Debug.Fail("Unknown instruction");
                         break;
                 }
 
-                if (!isVariableSize)
-                    currentOffset += opcode.GetSize();
+                if (!hasReadOperand)
+                    ilReader.Skip(opcode);
 
                 maxStack = Math.Max(maxStack, stackHeight);
             }
 
             return maxStack;
         }
-
-        private static int ReadInt32(byte[] ilBytes, int offset)
-        {
-            return ilBytes[offset]
-                + (ilBytes[offset + 1] << 8)
-                + (ilBytes[offset + 2] << 16)
-                + (ilBytes[offset + 3] << 24);
-        }
-
-        private static int ReadILToken(byte[] ilBytes, int offset)
-        {
-            return ReadInt32(ilBytes, offset);
-        }
     }
 }
diff --git a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/ArrayMethodILEmitter.cs b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/ArrayMethodILEmitter.cs
index b90abeeb79ab..69aec3abb713 100644
--- a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/ArrayMethodILEmitter.cs
+++ b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/ArrayMethodILEmitter.cs
@@ -81,7 +81,9 @@ private void EmitILForAccessor()
 
             int pointerSize = context.Target.PointerSize;
 
-            int argStartOffset = _method.Kind == ArrayMethodKind.AddressWithHiddenArg ? 2 : 1;
+            bool isX86 = context.Target.Architecture == TargetArchitecture.X86;
+            int argStartOffset = !isX86 && _method.Kind == ArrayMethodKind.AddressWithHiddenArg ? 2 : 1;
+            int hiddenArg = !isX86 ? 1 : 1 + _rank;
 
             var rangeExceptionLabel = _emitter.NewCodeLabel();
             ILCodeLabel typeMismatchExceptionLabel = null;
@@ -112,7 +114,7 @@ private void EmitILForAccessor()
                     // As per ECMA-335 III.2.3, the prefix suppresses the type check.
                     // if (hiddenArg == IntPtr.Zero)
                     //     goto TypeCheckPassed;
-                    codeStream.EmitLdArg(1);
+                    codeStream.EmitLdArg(hiddenArg);
                     codeStream.Emit(ILOpcode.brfalse, typeCheckPassedLabel);
 
                     // MethodTable* actualElementType = this.m_pEEType->RelatedParameterType; // ArrayElementType
@@ -122,7 +124,7 @@ private void EmitILForAccessor()
                         _emitter.NewToken(eetypeType.GetKnownMethod("get_RelatedParameterType", null)));
 
                     // MethodTable* expectedElementType = hiddenArg->RelatedParameterType; // ArrayElementType
-                    codeStream.EmitLdArg(1);
+                    codeStream.EmitLdArg(hiddenArg);
                     codeStream.Emit(ILOpcode.call,
                         _emitter.NewToken(eetypeType.GetKnownMethod("get_RelatedParameterType", null)));
 
diff --git a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/DelegateMethodILEmitter.cs b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/DelegateMethodILEmitter.cs
index 905d40fd2828..baa2ff1f354b 100644
--- a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/DelegateMethodILEmitter.cs
+++ b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/DelegateMethodILEmitter.cs
@@ -46,8 +46,8 @@ public static MethodIL EmitIL(MethodDesc method)
 
                 ILEmitter emit = new ILEmitter();
                 TypeDesc delegateType = context.GetWellKnownType(WellKnownType.MulticastDelegate).BaseType;
-                FieldDesc firstParameterField = delegateType.GetKnownField("m_firstParameter");
-                FieldDesc functionPointerField = delegateType.GetKnownField("m_functionPointer");
+                FieldDesc firstParameterField = delegateType.GetKnownField("_firstParameter");
+                FieldDesc functionPointerField = delegateType.GetKnownField("_functionPointer");
                 ILCodeStream codeStream = emit.NewCodeStream();
 
                 codeStream.EmitLdArg(0);
diff --git a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/DelegateThunks.cs b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/DelegateThunks.cs
index 99834d7cb236..3845514eb4db 100644
--- a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/DelegateThunks.cs
+++ b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/DelegateThunks.cs
@@ -63,7 +63,7 @@ protected FieldDesc ExtraFunctionPointerOrDataField
         {
             get
             {
-                return SystemDelegateType.GetKnownField("m_extraFunctionPointerOrData");
+                return SystemDelegateType.GetKnownField("_extraFunctionPointerOrData");
             }
         }
 
@@ -71,7 +71,7 @@ protected FieldDesc HelperObjectField
         {
             get
             {
-                return SystemDelegateType.GetKnownField("m_helperObject");
+                return SystemDelegateType.GetKnownField("_helperObject");
             }
         }
 
@@ -79,7 +79,7 @@ protected FieldDesc FirstParameterField
         {
             get
             {
-                return SystemDelegateType.GetKnownField("m_firstParameter");
+                return SystemDelegateType.GetKnownField("_firstParameter");
             }
         }
 
@@ -87,7 +87,7 @@ protected FieldDesc FunctionPointerField
         {
             get
             {
-                return SystemDelegateType.GetKnownField("m_functionPointer");
+                return SystemDelegateType.GetKnownField("_functionPointer");
             }
         }
 
@@ -304,7 +304,8 @@ public override MethodIL EmitIL()
             ILEmitter emitter = new ILEmitter();
             ILCodeStream codeStream = emitter.NewCodeStream();
 
-            ArrayType invocationListArrayType = SystemDelegateType.MakeArrayType();
+            TypeDesc delegateWrapperType = ((MetadataType)SystemDelegateType).GetKnownNestedType("Wrapper");
+            ArrayType invocationListArrayType = delegateWrapperType.MakeArrayType();
 
             ILLocalVariable delegateArrayLocal = emitter.NewLocal(invocationListArrayType);
             ILLocalVariable invocationCountLocal = emitter.NewLocal(Context.GetWellKnownType(WellKnownType.Int32));
@@ -318,11 +319,11 @@ public override MethodIL EmitIL()
             }
 
             // Fill in delegateArrayLocal
-            // Delegate[] delegateArrayLocal = (Delegate[])this.m_helperObject
+            // Delegate.Wrapper[] delegateArrayLocal = (Delegate.Wrapper[])this._helperObject
 
             // ldarg.0 (this pointer)
-            // ldfld Delegate.HelperObjectField
-            // castclass Delegate[]
+            // ldfld Delegate._helperObject
+            // castclass Delegate.Wrapper[]
             // stloc delegateArrayLocal
             codeStream.EmitLdArg(0);
             codeStream.Emit(ILOpcode.ldfld, emitter.NewToken(HelperObjectField));
@@ -330,9 +331,10 @@ public override MethodIL EmitIL()
             codeStream.EmitStLoc(delegateArrayLocal);
 
             // Fill in invocationCountLocal
-            // int invocationCountLocal = this.m_extraFunctionPointerOrData
+            // int invocationCountLocal = this._extraFunctionPointerOrData
+
             // ldarg.0 (this pointer)
-            // ldfld Delegate.m_extraFunctionPointerOrData
+            // ldfld Delegate._extraFunctionPointerOrData
             // stloc invocationCountLocal
             codeStream.EmitLdArg(0);
             codeStream.Emit(ILOpcode.ldfld, emitter.NewToken(ExtraFunctionPointerOrDataField));
@@ -352,25 +354,27 @@ public override MethodIL EmitIL()
 
             // Implement as do/while loop. We only have this stub in play if we're in the multicast situation
             // Find the delegate to call
-            // Delegate = delegateToCallLocal = delegateArrayLocal[iteratorLocal];
+            // Delegate = delegateToCallLocal = delegateArrayLocal[iteratorLocal].Value;
 
             // ldloc delegateArrayLocal
             // ldloc iteratorLocal
-            // ldelem System.Delegate
+            // ldelema Delegate.Wrapper
+            // ldfld Delegate.Wrapper.Value
             // stloc delegateToCallLocal
             codeStream.EmitLdLoc(delegateArrayLocal);
             codeStream.EmitLdLoc(iteratorLocal);
-            codeStream.Emit(ILOpcode.ldelem, emitter.NewToken(SystemDelegateType));
+            codeStream.Emit(ILOpcode.ldelema, emitter.NewToken(delegateWrapperType));
+            codeStream.Emit(ILOpcode.ldfld, emitter.NewToken(delegateWrapperType.GetKnownField("Value")));
             codeStream.EmitStLoc(delegateToCallLocal);
 
             // Call the delegate
             // returnValueLocal = delegateToCallLocal(...);
 
             // ldloc delegateToCallLocal
-            // ldfld System.Delegate.m_firstParameter
+            // ldfld Delegate._firstParameter
             // ldarg 1, n
             // ldloc delegateToCallLocal
-            // ldfld System.Delegate.m_functionPointer
+            // ldfld Delegate._functionPointer
             // calli returnValueType thiscall (all the params)
             // IF there is a return value
             // stloc returnValueLocal
@@ -501,7 +505,7 @@ public override MethodIL EmitIL()
             // args[1] = param1;
             //  ...
             // try {
-            //      ret = ((Func<object[], object>)dlg.m_helperObject)(args);
+            //      ret = ((Func<object[], object>)dlg._helperObject)(args);
             // } finally {
             //      param0 = (T0)args[0];   // only generated for each byref argument
             // }
diff --git a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/ILEmitter.cs b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/ILEmitter.cs
index b6f31e130142..4a4723d2b7af 100644
--- a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/ILEmitter.cs
+++ b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/ILEmitter.cs
@@ -686,27 +686,27 @@ private ILToken NewToken(object value, int tokenType)
 
         public ILToken NewToken(TypeDesc value)
         {
-            return NewToken(value, 0x01000000);
+            return NewToken(value, 0x01000000); // mdtTypeRef
         }
 
         public ILToken NewToken(MethodDesc value)
         {
-            return NewToken(value, 0x0a000000);
+            return NewToken(value, 0x0a000000); // mdtMemberRef
         }
 
         public ILToken NewToken(FieldDesc value)
         {
-            return NewToken(value, 0x0a000000);
+            return NewToken(value, 0x0a000000); // mdtMemberRef
         }
 
         public ILToken NewToken(string value)
         {
-            return NewToken(value, 0x70000000);
+            return NewToken(value, 0x70000000); // mdtString
         }
 
         public ILToken NewToken(MethodSignature value)
         {
-            return NewToken(value, 0x11000000);
+            return NewToken(value, 0x11000000); // mdtSignature
         }
 
         public ILLocalVariable NewLocal(TypeDesc localType, bool isPinned = false)
diff --git a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/PInvokeILEmitter.cs b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/PInvokeILEmitter.cs
index e9d7adc4ad0e..4621ebc0e465 100644
--- a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/PInvokeILEmitter.cs
+++ b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/PInvokeILEmitter.cs
@@ -300,17 +300,17 @@ private void EmitPInvokeCall(PInvokeILCodeStreams ilCodeStreams)
 
             if (!_pInvokeILEmitterConfiguration.GenerateDirectCall(_targetMethod, out _))
             {
+                MethodSignature nativeSig = new MethodSignature(
+                    MethodSignatureFlags.Static | MethodSignatureFlags.UnmanagedCallingConvention, 0, nativeReturnType, nativeParameterTypes,
+                    _targetMethod.GetPInvokeMethodCallingConventions().EncodeAsEmbeddedSignatureData(context));
+
                 MetadataType lazyHelperType = context.GetHelperType("InteropHelpers");
-                FieldDesc lazyDispatchCell = _interopStateManager.GetPInvokeLazyFixupField(_targetMethod);
+                FieldDesc lazyDispatchCell = _interopStateManager.GetPInvokeLazyFixupField(_targetMethod, nativeSig);
 
                 fnptrLoadStream.Emit(ILOpcode.ldsflda, emitter.NewToken(lazyDispatchCell));
                 fnptrLoadStream.Emit(ILOpcode.call, emitter.NewToken(lazyHelperType
                     .GetKnownMethod("ResolvePInvoke", null)));
 
-                MethodSignature nativeSig = new MethodSignature(
-                    MethodSignatureFlags.Static | MethodSignatureFlags.UnmanagedCallingConvention, 0, nativeReturnType, nativeParameterTypes,
-                    _targetMethod.GetPInvokeMethodCallingConventions().EncodeAsEmbeddedSignatureData(context));
-
                 ILLocalVariable vNativeFunctionPointer = emitter.NewLocal(context
                     .GetWellKnownType(WellKnownType.IntPtr));
 
diff --git a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/PInvokeLazyFixupField.cs b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/PInvokeLazyFixupField.cs
index b74dc75b46de..b14891312fa4 100644
--- a/src/coreclr/tools/Common/TypeSystem/IL/Stubs/PInvokeLazyFixupField.cs
+++ b/src/coreclr/tools/Common/TypeSystem/IL/Stubs/PInvokeLazyFixupField.cs
@@ -16,12 +16,14 @@ public sealed partial class PInvokeLazyFixupField : FieldDesc
     {
         private readonly DefType _owningType;
         private readonly MethodDesc _targetMethod;
+        private readonly MethodSignature _nativeSignature;
 
-        public PInvokeLazyFixupField(DefType owningType, MethodDesc targetMethod)
+        public PInvokeLazyFixupField(DefType owningType, MethodDesc targetMethod, MethodSignature nativeSignature)
         {
             Debug.Assert(targetMethod.IsPInvoke);
             _owningType = owningType;
             _targetMethod = targetMethod;
+            _nativeSignature = nativeSignature;
         }
 
         public MethodDesc TargetMethod
@@ -32,6 +34,14 @@ public MethodDesc TargetMethod
             }
         }
 
+        public MethodSignature NativeSignature
+        {
+            get
+            {
+                return _nativeSignature;
+            }
+        }
+
         public PInvokeMetadata PInvokeMetadata
         {
             get
diff --git a/src/coreclr/tools/Common/TypeSystem/IL/UnsafeAccessors.cs b/src/coreclr/tools/Common/TypeSystem/IL/UnsafeAccessors.cs
index 6338f725be22..e8e97f2eb319 100644
--- a/src/coreclr/tools/Common/TypeSystem/IL/UnsafeAccessors.cs
+++ b/src/coreclr/tools/Common/TypeSystem/IL/UnsafeAccessors.cs
@@ -29,12 +29,6 @@ public static MethodIL TryGetIL(EcmaMethod method)
                 return GenerateAccessorBadImageFailure(method);
             }
 
-            // Block generic support early
-            if (method.HasInstantiation || method.OwningType.HasInstantiation)
-            {
-                return GenerateAccessorBadImageFailure(method);
-            }
-
             if (!TryParseUnsafeAccessorAttribute(method, decodedAttribute.Value, out UnsafeAccessorKind kind, out string name))
             {
                 return GenerateAccessorBadImageFailure(method);
@@ -54,7 +48,7 @@ public static MethodIL TryGetIL(EcmaMethod method)
                 firstArgType = sig[0];
             }
 
-            bool isAmbiguous = false;
+            SetTargetResult result;
 
             // Using the kind type, perform the following:
             //  1) Validate the basic type information from the signature.
@@ -77,9 +71,10 @@ public static MethodIL TryGetIL(EcmaMethod method)
                     }
 
                     const string ctorName = ".ctor";
-                    if (!TrySetTargetMethod(ref context, ctorName, out isAmbiguous))
+                    result = TrySetTargetMethod(ref context, ctorName);
+                    if (result is not SetTargetResult.Success)
                     {
-                        return GenerateAccessorSpecificFailure(ref context, ctorName, isAmbiguous);
+                        return GenerateAccessorSpecificFailure(ref context, ctorName, result);
                     }
                     break;
                 case UnsafeAccessorKind.Method:
@@ -105,9 +100,10 @@ public static MethodIL TryGetIL(EcmaMethod method)
                     }
 
                     context.IsTargetStatic = kind == UnsafeAccessorKind.StaticMethod;
-                    if (!TrySetTargetMethod(ref context, name, out isAmbiguous))
+                    result = TrySetTargetMethod(ref context, name);
+                    if (result is not SetTargetResult.Success)
                     {
-                        return GenerateAccessorSpecificFailure(ref context, name, isAmbiguous);
+                        return GenerateAccessorSpecificFailure(ref context, name, result);
                     }
                     break;
 
@@ -136,9 +132,10 @@ public static MethodIL TryGetIL(EcmaMethod method)
                     }
 
                     context.IsTargetStatic = kind == UnsafeAccessorKind.StaticField;
-                    if (!TrySetTargetField(ref context, name, ((ParameterizedType)retType).GetParameterType()))
+                    result = TrySetTargetField(ref context, name, ((ParameterizedType)retType).GetParameterType());
+                    if (result is not SetTargetResult.Success)
                     {
-                        return GenerateAccessorSpecificFailure(ref context, name, isAmbiguous);
+                        return GenerateAccessorSpecificFailure(ref context, name, result);
                     }
                     break;
 
@@ -232,6 +229,12 @@ private static bool ValidateTargetType(TypeDesc targetTypeMaybe, out TypeDesc va
                 targetType = null;
             }
 
+            // We do not support signature variables as a target (for example, VAR and MVAR).
+            if (targetType is SignatureVariable)
+            {
+                targetType = null;
+            }
+
             validated = targetType;
             return validated != null;
         }
@@ -366,7 +369,45 @@ private static bool DoesMethodMatchUnsafeAccessorDeclaration(ref GenerationConte
             return true;
         }
 
-        private static bool TrySetTargetMethod(ref GenerationContext context, string name, out bool isAmbiguous, bool ignoreCustomModifiers = true)
+        private static bool VerifyDeclarationSatisfiesTargetConstraints(MethodDesc declaration, TypeDesc targetType, MethodDesc targetMethod)
+        {
+            Debug.Assert(declaration != null);
+            Debug.Assert(targetType != null);
+            Debug.Assert(targetMethod != null);
+
+            if (targetType.HasInstantiation)
+            {
+                Instantiation declClassInst = declaration.OwningType.Instantiation;
+                var instType = targetType.Context.GetInstantiatedType((MetadataType)targetType.GetTypeDefinition(), declClassInst);
+                if (!instType.CheckConstraints())
+                {
+                    return false;
+                }
+
+                targetMethod = instType.FindMethodOnExactTypeWithMatchingTypicalMethod(targetMethod);
+            }
+
+            if (targetMethod.HasInstantiation)
+            {
+                Instantiation declMethodInst = declaration.Instantiation;
+                var instMethod = targetType.Context.GetInstantiatedMethod(targetMethod, declMethodInst);
+                if (!instMethod.CheckConstraints())
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private enum SetTargetResult
+        {
+            Success,
+            Missing,
+            Ambiguous,
+            Invalid,
+        }
+
+        private static SetTargetResult TrySetTargetMethod(ref GenerationContext context, string name, bool ignoreCustomModifiers = true)
         {
             TypeDesc targetType = context.TargetType;
 
@@ -399,23 +440,39 @@ private static bool TrySetTargetMethod(ref GenerationContext context, string nam
                         // We have detected ambiguity when ignoring custom modifiers.
                         // Start over, but look for a match requiring custom modifiers
                         // to match precisely.
-                        if (TrySetTargetMethod(ref context, name, out isAmbiguous, ignoreCustomModifiers: false))
-                            return true;
+                        if (SetTargetResult.Success == TrySetTargetMethod(ref context, name, ignoreCustomModifiers: false))
+                            return SetTargetResult.Success;
                     }
-
-                    isAmbiguous = true;
-                    return false;
+                    return SetTargetResult.Ambiguous;
                 }
 
                 targetMaybe = md;
             }
 
-            isAmbiguous = false;
+            if (targetMaybe != null)
+            {
+                if (!VerifyDeclarationSatisfiesTargetConstraints(context.Declaration, targetType, targetMaybe))
+                {
+                    return SetTargetResult.Invalid;
+                }
+
+                if (targetMaybe.HasInstantiation)
+                {
+                    TypeDesc[] methodInstantiation = new TypeDesc[targetMaybe.Instantiation.Length];
+                    for (int i = 0; i < methodInstantiation.Length; ++i)
+                    {
+                        methodInstantiation[i] = targetMaybe.Context.GetSignatureVariable(i, true);
+                    }
+                    targetMaybe = targetMaybe.Context.GetInstantiatedMethod(targetMaybe, new Instantiation(methodInstantiation));
+                }
+                Debug.Assert(targetMaybe is not null);
+            }
+
             context.TargetMethod = targetMaybe;
-            return context.TargetMethod != null;
+            return context.TargetMethod != null ? SetTargetResult.Success : SetTargetResult.Missing;
         }
 
-        private static bool TrySetTargetField(ref GenerationContext context, string name, TypeDesc fieldType)
+        private static SetTargetResult TrySetTargetField(ref GenerationContext context, string name, TypeDesc fieldType)
         {
             TypeDesc targetType = context.TargetType;
 
@@ -431,10 +488,10 @@ private static bool TrySetTargetField(ref GenerationContext context, string name
                     && fieldType == fd.FieldType)
                 {
                     context.TargetField = fd;
-                    return true;
+                    return SetTargetResult.Success;
                 }
             }
-            return false;
+            return SetTargetResult.Missing;
         }
 
         private static MethodIL GenerateAccessor(ref GenerationContext context)
@@ -486,7 +543,7 @@ private static MethodIL GenerateAccessor(ref GenerationContext context)
             return emit.Link(context.Declaration);
         }
 
-        private static MethodIL GenerateAccessorSpecificFailure(ref GenerationContext context, string name, bool ambiguous)
+        private static MethodIL GenerateAccessorSpecificFailure(ref GenerationContext context, string name, SetTargetResult result)
         {
             ILEmitter emit = new ILEmitter();
             ILCodeStream codeStream = emit.NewCodeStream();
@@ -496,14 +553,19 @@ private static MethodIL GenerateAccessorSpecificFailure(ref GenerationContext co
 
             MethodDesc thrower;
             TypeSystemContext typeSysContext = context.Declaration.Context;
-            if (ambiguous)
+            if (result is SetTargetResult.Ambiguous)
             {
                 codeStream.EmitLdc((int)ExceptionStringID.AmbiguousMatchUnsafeAccessor);
                 thrower = typeSysContext.GetHelperEntryPoint("ThrowHelpers", "ThrowAmbiguousMatchException");
             }
+            else if (result is SetTargetResult.Invalid)
+            {
+                codeStream.EmitLdc((int)ExceptionStringID.InvalidProgramDefault);
+                thrower = typeSysContext.GetHelperEntryPoint("ThrowHelpers", "ThrowInvalidProgramException");
+            }
             else
             {
-
+                Debug.Assert(result is SetTargetResult.Missing);
                 ExceptionStringID id;
                 if (context.Kind == UnsafeAccessorKind.Field || context.Kind == UnsafeAccessorKind.StaticField)
                 {
diff --git a/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs b/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs
index 7b0cf61350ab..565a890b2de7 100644
--- a/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs
@@ -949,7 +949,7 @@ internal static bool ShouldCheckForPendingException(TargetDetails target, PInvok
                 && metadata.Name.StartsWith(ObjectiveCMsgSend);
         }
 
-        internal static int? GetObjectiveCMessageSendFunction(TargetDetails target, string pinvokeModule, string pinvokeFunction)
+        internal static uint? GetObjectiveCMessageSendFunction(TargetDetails target, string pinvokeModule, string pinvokeFunction)
         {
             if (!target.IsApplePlatform || pinvokeModule != ObjectiveCLibrary)
                 return null;
@@ -957,11 +957,11 @@ internal static bool ShouldCheckForPendingException(TargetDetails target, PInvok
 #pragma warning disable CA1416
             return pinvokeFunction switch
             {
-                "objc_msgSend" => (int)ObjectiveCMarshal.MessageSendFunction.MsgSend,
-                "objc_msgSend_fpret" => (int)ObjectiveCMarshal.MessageSendFunction.MsgSendFpret,
-                "objc_msgSend_stret" => (int)ObjectiveCMarshal.MessageSendFunction.MsgSendStret,
-                "objc_msgSendSuper" => (int)ObjectiveCMarshal.MessageSendFunction.MsgSendSuper,
-                "objc_msgSendSuper_stret" => (int)ObjectiveCMarshal.MessageSendFunction.MsgSendSuperStret,
+                "objc_msgSend" => (uint)ObjectiveCMarshal.MessageSendFunction.MsgSend,
+                "objc_msgSend_fpret" => (uint)ObjectiveCMarshal.MessageSendFunction.MsgSendFpret,
+                "objc_msgSend_stret" => (uint)ObjectiveCMarshal.MessageSendFunction.MsgSendStret,
+                "objc_msgSendSuper" => (uint)ObjectiveCMarshal.MessageSendFunction.MsgSendSuper,
+                "objc_msgSendSuper_stret" => (uint)ObjectiveCMarshal.MessageSendFunction.MsgSendSuperStret,
                 _ => null,
             };
 #pragma warning restore CA1416
diff --git a/src/coreclr/tools/Common/TypeSystem/Interop/IL/Marshaller.cs b/src/coreclr/tools/Common/TypeSystem/Interop/IL/Marshaller.cs
index 7f67f9230301..4160711b6a36 100644
--- a/src/coreclr/tools/Common/TypeSystem/Interop/IL/Marshaller.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Interop/IL/Marshaller.cs
@@ -908,13 +908,13 @@ protected override void EmitMarshalArgumentManagedToNative()
             {
                 ILCodeStream marshallingCodeStream = _ilCodeStreams.MarshallingCodeStream;
                 ILEmitter emitter = _ilCodeStreams.Emitter;
-                ILLocalVariable native = emitter.NewLocal(Context.GetWellKnownType(WellKnownType.IntPtr));
+                ILLocalVariable native = emitter.NewLocal(Context.GetPointerType(ManagedParameterType));
 
                 ILLocalVariable vPinnedByRef = emitter.NewLocal(ManagedParameterType, true);
                 marshallingCodeStream.EmitLdArg(Index - 1);
                 marshallingCodeStream.EmitStLoc(vPinnedByRef);
                 marshallingCodeStream.EmitLdLoc(vPinnedByRef);
-                marshallingCodeStream.Emit(ILOpcode.conv_i);
+                marshallingCodeStream.Emit(ILOpcode.conv_u);
                 marshallingCodeStream.EmitStLoc(native);
                 _ilCodeStreams.CallsiteSetupCodeStream.EmitLdLoc(native);
             }
@@ -1078,6 +1078,8 @@ protected virtual void EmitElementCount(ILCodeStream codeStream, MarshalDirectio
 
                     if (sizeConst.HasValue)
                         codeStream.Emit(ILOpcode.add);
+
+                    codeStream.Emit(ILOpcode.conv_ovf_i4);
                 }
 
                 if (!sizeConst.HasValue && !sizeParamIndex.HasValue)
diff --git a/src/coreclr/tools/Common/TypeSystem/Interop/InteropStateManager.cs b/src/coreclr/tools/Common/TypeSystem/Interop/InteropStateManager.cs
index f8a3ea396406..7147f65cbb68 100644
--- a/src/coreclr/tools/Common/TypeSystem/Interop/InteropStateManager.cs
+++ b/src/coreclr/tools/Common/TypeSystem/Interop/InteropStateManager.cs
@@ -180,9 +180,9 @@ public TypeDesc GetInlineArrayType(InlineArrayCandidate candidate)
             return _inlineArrayHashtable.GetOrCreateValue(candidate);
         }
 
-        public FieldDesc GetPInvokeLazyFixupField(MethodDesc method)
+        public FieldDesc GetPInvokeLazyFixupField(MethodDesc method, MethodSignature nativeSig)
         {
-            return _pInvokeLazyFixupFieldHashtable.GetOrCreateValue(method);
+            return _pInvokeLazyFixupFieldHashtable.GetOrCreateValue(new (method, nativeSig));
         }
 
         public MethodDesc GetPInvokeCalliStub(MethodSignature signature, ModuleDesc moduleContext)
@@ -446,11 +446,19 @@ public PInvokeDelegateWrapperHashtable(InteropStateManager interopStateManager,
             }
         }
 
-        private sealed class PInvokeLazyFixupFieldHashtable : LockFreeReaderHashtable<MethodDesc, PInvokeLazyFixupField>
+        private readonly struct PInvokeLazyFixupFieldKey
         {
-            protected override int GetKeyHashCode(MethodDesc key)
+            public readonly MethodDesc Method;
+            public readonly MethodSignature NativeSignature;
+            public PInvokeLazyFixupFieldKey(MethodDesc method, MethodSignature nativeSignature)
+                => (Method, NativeSignature) = (method, nativeSignature);
+        }
+
+        private sealed class PInvokeLazyFixupFieldHashtable : LockFreeReaderHashtable<PInvokeLazyFixupFieldKey, PInvokeLazyFixupField>
+        {
+            protected override int GetKeyHashCode(PInvokeLazyFixupFieldKey key)
             {
-                return key.GetHashCode();
+                return key.Method.GetHashCode();
             }
 
             protected override int GetValueHashCode(PInvokeLazyFixupField value)
@@ -458,9 +466,9 @@ protected override int GetValueHashCode(PInvokeLazyFixupField value)
                 return value.TargetMethod.GetHashCode();
             }
 
-            protected override bool CompareKeyToValue(MethodDesc key, PInvokeLazyFixupField value)
+            protected override bool CompareKeyToValue(PInvokeLazyFixupFieldKey key, PInvokeLazyFixupField value)
             {
-                return key == value.TargetMethod;
+                return key.Method == value.TargetMethod;
             }
 
             protected override bool CompareValueToValue(PInvokeLazyFixupField value1, PInvokeLazyFixupField value2)
@@ -468,9 +476,9 @@ protected override bool CompareValueToValue(PInvokeLazyFixupField value1, PInvok
                 return value1.TargetMethod == value2.TargetMethod;
             }
 
-            protected override PInvokeLazyFixupField CreateValueFromKey(MethodDesc key)
+            protected override PInvokeLazyFixupField CreateValueFromKey(PInvokeLazyFixupFieldKey key)
             {
-                return new PInvokeLazyFixupField(_owningType, key);
+                return new PInvokeLazyFixupField(_owningType, key.Method, key.NativeSignature);
             }
 
             private readonly DefType _owningType;
diff --git a/src/coreclr/tools/ILVerification/ILVerification.projitems b/src/coreclr/tools/ILVerification/ILVerification.projitems
index 58c2204c7951..81ce9f65d113 100644
--- a/src/coreclr/tools/ILVerification/ILVerification.projitems
+++ b/src/coreclr/tools/ILVerification/ILVerification.projitems
@@ -111,6 +111,9 @@
     <Compile Include="$(ToolsCommonPath)TypeSystem\Common\FieldLayoutAlgorithm.cs">
       <Link>TypeSystem\Common\FieldLayoutAlgorithm.cs</Link>
     </Compile>
+    <Compile Include="$(ToolsCommonPath)TypeSystem\Common\ImpliedRepeatedFieldDesc.cs">
+      <Link>TypeSystem\Common\ImpliedRepeatedFieldDesc.cs</Link>
+    </Compile>
     <Compile Include="$(ToolsCommonPath)TypeSystem\Common\IModuleResolver.cs">
       <Link>TypeSystem\Common\IModuleResolver.cs</Link>
     </Compile>
@@ -150,6 +153,9 @@
     <Compile Include="$(ToolsCommonPath)TypeSystem\Common\ExplicitLayoutValidator.cs">
       <Link>TypeSystem\Common\ExplicitLayoutValidator.cs</Link>
     </Compile>
+    <Compile Include="$(ToolsCommonPath)TypeSystem\Common\FieldLayoutIntervalCalculator.cs">
+      <Link>TypeSystem\Common\FieldLayoutIntervalCalculator.cs</Link>
+    </Compile>
     <Compile Include="$(ToolsCommonPath)TypeSystem\Common\MetadataRuntimeInterfacesAlgorithm.cs">
       <Link>TypeSystem\Common\MetadataRuntimeInterfacesAlgorithm.cs</Link>
     </Compile>
@@ -249,6 +255,12 @@
     <Compile Include="$(ToolsCommonPath)TypeSystem\Common\ThrowHelper.cs">
       <Link>TypeSystem\Common\ThrowHelper.cs</Link>
     </Compile>
+    <Compile Include="$(ToolsCommonPath)TypeSystem\Common\TypeWithRepeatedFields.cs">
+      <Link>TypeSystem\Common\TypeWithRepeatedFields.cs</Link>
+    </Compile>
+    <Compile Include="$(ToolsCommonPath)TypeSystem\Common\TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs">
+      <Link>TypeSystem\Common\TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs</Link>
+    </Compile>
     <Compile Include="$(ToolsCommonPath)TypeSystem\Common\Utilities\ExceptionTypeNameFormatter.cs">
       <Link>TypeSystem\Common\Utilities\ExceptionTypeNameFormatter.cs</Link>
     </Compile>
diff --git a/src/coreclr/tools/ILVerify/README.md b/src/coreclr/tools/ILVerify/README.md
index 3c838b4faa48..66c09870d258 100644
--- a/src/coreclr/tools/ILVerify/README.md
+++ b/src/coreclr/tools/ILVerify/README.md
@@ -17,7 +17,7 @@ dotnet tool install --global dotnet-ilverify
 Example of use:
 
 ```
-C:\test>dotnet ilverify hello.dll -r "c:\Program Files\dotnet\shared\Microsoft.NETCore.App\2.1.12\*.dll"
+C:\test>ilverify hello.dll -r "c:\Program Files\dotnet\shared\Microsoft.NETCore.App\2.1.12\*.dll"
 All Classes and Methods in C:\test\hello.dll Verified.
 ```
 
diff --git a/src/coreclr/tools/StressLogAnalyzer/StressLogDump.cpp b/src/coreclr/tools/StressLogAnalyzer/StressLogDump.cpp
index f3be8b881221..eadc27e2557c 100644
--- a/src/coreclr/tools/StressLogAnalyzer/StressLogDump.cpp
+++ b/src/coreclr/tools/StressLogAnalyzer/StressLogDump.cpp
@@ -24,6 +24,10 @@ class MapViewHolder
 #include "../../../inc/stresslog.h"
 #include "StressMsgReader.h"
 
+#ifdef HOST_WINDOWS
+#include <malloc.h>
+#endif
+
 
 void GcHistClear();
 void GcHistAddLog(LPCSTR msg, StressMsgReader stressMsg);
@@ -243,7 +247,7 @@ void formatOutput(struct IDebugDataSpaces* memCallBack, ___in FILE* file, __inou
                                     fprintf (file, " (%s", Symbol);
                                     if (Displacement)
                                     {
-                                        fprintf (file, "+%#llx", Displacement);
+                                        fprintf (file, "+%#llx", (unsigned long long)Displacement);
                                     }
                                     fprintf (file, ")");
                                 }
@@ -528,7 +532,7 @@ HRESULT StressLog::Dump(ULONG64 outProcLog, const char* fileName, struct IDebugD
             latestLog->readPtr = NULL;
             if (!bDoGcHist)
             {
-                fprintf(file, "------------ Last message from thread %llx -----------\n", latestLog->threadId);
+                fprintf(file, "------------ Last message from thread %llx -----------\n", (unsigned long long)latestLog->threadId);
             }
         }
 
diff --git a/src/coreclr/tools/StressLogAnalyzer/StressLogPlugin.cpp b/src/coreclr/tools/StressLogAnalyzer/StressLogPlugin.cpp
index dc4a63c0e8db..20d0f1b6b229 100644
--- a/src/coreclr/tools/StressLogAnalyzer/StressLogPlugin.cpp
+++ b/src/coreclr/tools/StressLogAnalyzer/StressLogPlugin.cpp
@@ -6,6 +6,7 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <math.h>
+#include <algorithm>
 
 #ifndef INFINITY
 #define INFINITY 1e300 // Practically good enough - not sure why we miss this in our Linux build.
@@ -42,6 +43,9 @@ bool IsInCantAllocStressLogRegion()
 #include "../../../inc/stresslog.h"
 #include "StressMsgReader.h"
 
+using std::min;
+using std::max;
+
 size_t StressLog::writing_base_address;
 size_t StressLog::reading_base_address;
 
@@ -1323,7 +1327,7 @@ int ProcessStressLog(void* baseAddress, int argc, char* argv[])
     double latestTime = FindLatestTime(hdr);
     if (s_timeFilterStart < 0)
     {
-        s_timeFilterStart = max(latestTime + s_timeFilterStart, 0);
+        s_timeFilterStart = max(latestTime + s_timeFilterStart, 0.0);
         s_timeFilterEnd = latestTime;
     }
     for (ThreadStressLog* tsl = StressLog::TranslateMemoryMappedPointer(hdr->logs.t); tsl != nullptr; tsl = StressLog::TranslateMemoryMappedPointer(tsl->next))
@@ -1346,7 +1350,7 @@ int ProcessStressLog(void* baseAddress, int argc, char* argv[])
     SYSTEM_INFO systemInfo;
     GetSystemInfo(&systemInfo);
 
-    DWORD threadCount = min(systemInfo.dwNumberOfProcessors, MAXIMUM_WAIT_OBJECTS);
+    DWORD threadCount = min(systemInfo.dwNumberOfProcessors, (DWORD)MAXIMUM_WAIT_OBJECTS);
     HANDLE threadHandle[64];
     for (DWORD i = 0; i < threadCount; i++)
     {
@@ -1361,7 +1365,7 @@ int ProcessStressLog(void* baseAddress, int argc, char* argv[])
 
     // the interlocked increment may have increased s_msgCount beyond MAX_MESSAGE_COUNT -
     // make sure we don't go beyond the end of the buffer
-    s_msgCount = min(s_msgCount, MAX_MESSAGE_COUNT);
+    s_msgCount = min<LONG64>((LONG64)s_msgCount, MAX_MESSAGE_COUNT);
 
     if (s_gcFilterStart != 0)
     {
diff --git a/src/coreclr/tools/StressLogAnalyzer/util.h b/src/coreclr/tools/StressLogAnalyzer/util.h
index 6999676c2e7c..752509277a77 100644
--- a/src/coreclr/tools/StressLogAnalyzer/util.h
+++ b/src/coreclr/tools/StressLogAnalyzer/util.h
@@ -12,7 +12,6 @@ typedef void* CRITSEC_COOKIE;
 
 #define STRESS_LOG_ANALYZER
 
-#include <malloc.h>
 #include "staticcontract.h"
 
 // This macro is used to standardize the wide character string literals between UNIX and Windows.
diff --git a/src/coreclr/tools/SuperFileCheck/SuperFileCheck.csproj b/src/coreclr/tools/SuperFileCheck/SuperFileCheck.csproj
index a3cf4fe6be2f..908da8fe32c3 100644
--- a/src/coreclr/tools/SuperFileCheck/SuperFileCheck.csproj
+++ b/src/coreclr/tools/SuperFileCheck/SuperFileCheck.csproj
@@ -40,7 +40,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="$(MicrosoftCodeAnalysisCSharpVersion)" />
-    <PackageDownload Condition="'$(DotNetBuildFromSource)' != 'true'" Include="runtime.$(JITToolsRid).Microsoft.NETCore.Runtime.JIT.Tools">
+    <PackageDownload Condition="'$(DotNetBuildSourceOnly)' != 'true'" Include="runtime.$(JITToolsRid).Microsoft.NETCore.Runtime.JIT.Tools">
       <Version>[$(JITToolsVersion)]</Version>
     </PackageDownload>
   </ItemGroup>
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/ILCompiler.Compiler.Tests.Assets/SwiftTypes.cs b/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/ILCompiler.Compiler.Tests.Assets/SwiftTypes.cs
new file mode 100644
index 000000000000..c83a084c7ce0
--- /dev/null
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/ILCompiler.Compiler.Tests.Assets/SwiftTypes.cs
@@ -0,0 +1,164 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace ILCompiler.Compiler.Tests.Assets.SwiftTypes;
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Double, ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Int16)]
+public struct I64_D_I8_I32_UI16
+{
+    public long i64;
+    public double d;
+    public sbyte i8;
+    public int i32;
+    public ushort ui16;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Double, ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Int8)]
+public struct I64_D_I8_I32_UI8
+{
+    public long i64;
+    public double d;
+    public sbyte i8;
+    public int i32;
+    public byte u8;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Int16)]
+public struct F5_S1_S0
+{
+    public short F0;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Int16, ExpectedLoweringAttribute.Lowered.Int64)]
+public struct F5_S1
+{
+    public ulong F0;
+    public long F1;
+    public F5_S1_S0 F2;
+    public long F3;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Int32)]
+[StructLayout(LayoutKind.Sequential, Size = 3)]
+public struct F5_S2_S0
+{
+    public short F0;
+    public byte F1;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Int32, ExpectedLoweringAttribute.Lowered.Int64)]
+public struct F5_S2
+{
+    public ulong F0;
+    public long F1;
+    public F5_S2_S0 F2;
+    public long F3;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Int64)]
+[StructLayout(LayoutKind.Sequential, Size = 5)]
+public struct ThreeByteStruct_SByte_Byte
+{
+    public F5_S2_S0 F0;
+    public sbyte F1;
+    public byte F2;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Double, ExpectedLoweringAttribute.Lowered.Float)]
+[StructLayout(LayoutKind.Sequential, Size = 12)]
+public struct F2087_S0_S0
+{
+    public double F0;
+    public float F1;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Double, ExpectedLoweringAttribute.Lowered.Float, ExpectedLoweringAttribute.Lowered.Int32, ExpectedLoweringAttribute.Lowered.Int8)]
+[StructLayout(LayoutKind.Sequential, Size = 17)]
+public struct F2087_S0
+{
+    public F2087_S0_S0 F0;
+    public int F1;
+    public sbyte F2;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Float, ExpectedLoweringAttribute.Lowered.Int32, ExpectedLoweringAttribute.Lowered.Int16)]
+public struct F114_S0
+{
+    public float F0;
+    public ushort F1;
+    public short F2;
+    public ushort F3;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Double, ExpectedLoweringAttribute.Lowered.Float, ExpectedLoweringAttribute.Lowered.Int32, ExpectedLoweringAttribute.Lowered.Int32)]
+[StructLayout(LayoutKind.Sequential, Size = 20)]
+struct F352_S0
+{
+    public double F0;
+    public float F1;
+    public uint F2;
+    public int F3;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Int64)]
+[InlineArray(4)]
+public struct InlineArray4Longs
+{
+    private long l;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Float, ExpectedLoweringAttribute.Lowered.Int32, ExpectedLoweringAttribute.Lowered.Int64)]
+public struct UnalignedLargeOpaque
+{
+    public float F0;
+    public short F1;
+    public short F2;
+    public int F3;
+    public int F4;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Int16, ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Int64)]
+[StructLayout(LayoutKind.Sequential, Size = 21)]
+public struct PointerSizeOpaqueBlocks
+{
+    public short F0;
+    public nint F1;
+    public int F2;
+    public byte F3;
+}
+
+public struct PointerSizeOpaqueBlocksNonNaturalAlignment_S0
+{
+    public byte F0;
+    public nint F1;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Int16, ExpectedLoweringAttribute.Lowered.Int8, ExpectedLoweringAttribute.Lowered.Int64, ExpectedLoweringAttribute.Lowered.Int64, Offsets = [0x0, 0x8, 0x10, 0x18])]
+[StructLayout(LayoutKind.Sequential, Size = 21)]
+public struct PointerSizeOpaqueBlocksNonNaturalAlignment
+{
+    public short F0;
+    public PointerSizeOpaqueBlocksNonNaturalAlignment_S0 F1;
+    public int F2;
+    public byte F3;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Int64)]
+public struct F128_S_S0
+{
+    public sbyte F0;
+    public short F1;
+    public int F2;
+}
+
+[ExpectedLowering(ExpectedLoweringAttribute.Lowered.Float, ExpectedLoweringAttribute.Lowered.Int32, ExpectedLoweringAttribute.Lowered.Int64)]
+public struct F128_S
+{
+    public float F0;
+    public F128_S_S0 F1;
+    public uint F2;
+}
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/ILCompiler.Compiler.Tests.Assets/SwiftTypesSupport.cs b/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/ILCompiler.Compiler.Tests.Assets/SwiftTypesSupport.cs
new file mode 100644
index 000000000000..d37109b4bd7d
--- /dev/null
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/ILCompiler.Compiler.Tests.Assets/SwiftTypesSupport.cs
@@ -0,0 +1,43 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.InteropServices;
+
+namespace ILCompiler.Compiler.Tests.Assets.SwiftTypes;
+
+[AttributeUsage(AttributeTargets.Struct)]
+public sealed class ExpectedLoweringAttribute : Attribute
+{
+    public ExpectedLoweringAttribute()
+    {
+    }
+
+    public ExpectedLoweringAttribute(Lowered expectedLowering1)
+    {
+    }
+
+    public ExpectedLoweringAttribute(Lowered expectedLowering1, Lowered expectedLowering2)
+    {
+    }
+
+    public ExpectedLoweringAttribute(Lowered expectedLowering1, Lowered expectedLowering2, Lowered expectedLowering3)
+    {
+    }
+
+    public ExpectedLoweringAttribute(Lowered expectedLowering1, Lowered expectedLowering2, Lowered expectedLowering3, Lowered expectedLowering4)
+    {
+    }
+
+    public enum Lowered
+    {
+        Float,
+        Double,
+        Int8,
+        Int16,
+        Int32,
+        Int64
+    }
+
+    public int[] Offsets { get; set; }
+}
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/ILCompiler.Compiler.Tests.csproj b/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/ILCompiler.Compiler.Tests.csproj
index 4512bbfd88e7..69e9d87637f9 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/ILCompiler.Compiler.Tests.csproj
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/ILCompiler.Compiler.Tests.csproj
@@ -1,4 +1,4 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
     <AssemblyName>ILCompiler.Compiler.Tests</AssemblyName>
     <TargetFramework>$(NetCoreAppToolCurrent)</TargetFramework>
@@ -42,5 +42,6 @@
   <ItemGroup>
     <Compile Include="DependencyGraphTests.cs" />
     <Compile Include="DevirtualizationTests.cs" />
+    <Compile Include="SwiftLoweringTests.cs" />
   </ItemGroup>
 </Project>
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/SwiftLoweringTests.cs b/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/SwiftLoweringTests.cs
new file mode 100644
index 000000000000..1487f1b5e388
--- /dev/null
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler.Tests/SwiftLoweringTests.cs
@@ -0,0 +1,137 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Collections.Immutable;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Internal.IL;
+using Internal.JitInterface;
+using Internal.TypeSystem;
+using Internal.TypeSystem.Ecma;
+using Xunit;
+using System.Reflection.Metadata;
+
+namespace ILCompiler.Compiler.Tests
+{
+    public class SwiftLoweringTests
+    {
+        // Keep in sync with ExpectedLoweringAttribute in SwiftTypesSupport.cs
+        enum ExpectedLowering
+        {
+            Float,
+            Double,
+            Int8,
+            Int16,
+            Int32,
+            Int64
+        }
+
+        public static IEnumerable<object[]> DiscoverSwiftTypes()
+        {
+            var target = new TargetDetails(TargetArchitecture.X64, TargetOS.Windows, TargetAbi.NativeAot);
+            var context  = new CompilerTypeSystemContext(target, SharedGenericsMode.CanonicalReferenceTypes, DelegateFeature.All);
+
+            context.InputFilePaths = new Dictionary<string, string> {
+                { "Test.CoreLib", @"Test.CoreLib.dll" },
+                { "ILCompiler.Compiler.Tests.Assets", @"ILCompiler.Compiler.Tests.Assets.dll" },
+                };
+            context.ReferenceFilePaths = new Dictionary<string, string>();
+
+            context.SetSystemModule(context.GetModuleForSimpleName("Test.CoreLib"));
+            var testModule = context.GetModuleForSimpleName("ILCompiler.Compiler.Tests.Assets");
+            foreach (var type in testModule.GetAllTypes())
+            {
+                if (type is EcmaType { Namespace: "ILCompiler.Compiler.Tests.Assets.SwiftTypes", IsValueType: true } ecmaType
+                    && ecmaType.GetDecodedCustomAttribute("ILCompiler.Compiler.Tests.Assets.SwiftTypes", "ExpectedLoweringAttribute") is { } expectedLoweringAttribute)
+                {
+                    // By default, we assume that our lowered representation is meant to be naturally aligned.
+                    // For types that are not naturally aligned, the test can specify the offsets.
+                    int[]? offsets = null;
+                    if (expectedLoweringAttribute.NamedArguments.FirstOrDefault(a => a.Name == "Offsets").Value is ImmutableArray<CustomAttributeTypedArgument<TypeDesc>> offsetsArgument)
+                    {
+                        offsets = new int[offsetsArgument.Length];
+                        for (int i = 0; i < offsetsArgument.Length; i++)
+                        {
+                            offsets[i] = (int)offsetsArgument[i].Value;
+                        }
+                    }
+
+                    CORINFO_SWIFT_LOWERING expected;
+                    if (expectedLoweringAttribute.FixedArguments.Length == 0)
+                    {
+                        expected = new CORINFO_SWIFT_LOWERING { byReference = true };
+                    }
+                    else
+                    {
+                        expected = new CORINFO_SWIFT_LOWERING
+                        {
+                            numLoweredElements = expectedLoweringAttribute.FixedArguments.Length,
+                        };
+                        int naturalOffset = 0;
+                        for (int i = 0; i < expectedLoweringAttribute.FixedArguments.Length; i++)
+                        {
+                            ExpectedLowering lowering = (ExpectedLowering)(int)expectedLoweringAttribute.FixedArguments[i].Value;
+                            expected.LoweredElements[i] = GetCorType(lowering);
+                            if (offsets is not null)
+                            {
+                                expected.Offsets[i] = (uint)offsets[i];
+                            }
+                            else
+                            {
+                                // For all types that we lower to, alignment == size
+                                int size = GetSize(lowering);
+                                if (size > 1)
+                                {
+                                    expected.Offsets[i] = (uint)naturalOffset.AlignUp(size);
+                                }
+                                naturalOffset += size;
+                            }
+                        }
+                    }
+                    yield return new object[] { type.Name, type, expected };
+                }
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(DiscoverSwiftTypes))]
+        public void VerifyLowering(string typeName, EcmaType type, CORINFO_SWIFT_LOWERING expectedLowering)
+        {
+            _ = typeName;
+
+            Assert.Equal(expectedLowering, SwiftPhysicalLowering.LowerTypeForSwiftSignature(type));
+        }
+
+        private static int GetSize(ExpectedLowering expectedLowering)
+        {
+            return expectedLowering switch
+            {
+                ExpectedLowering.Float => 4,
+                ExpectedLowering.Double => 8,
+                ExpectedLowering.Int8 => 1,
+                ExpectedLowering.Int16 => 2,
+                ExpectedLowering.Int32 => 4,
+                ExpectedLowering.Int64 => 8,
+                _ => throw new ArgumentOutOfRangeException(nameof(expectedLowering))
+            };
+        }
+
+        private static CorInfoType GetCorType(ExpectedLowering expectedLowering)
+        {
+            return expectedLowering switch
+            {
+                ExpectedLowering.Float => CorInfoType.CORINFO_TYPE_FLOAT,
+                ExpectedLowering.Double => CorInfoType.CORINFO_TYPE_DOUBLE,
+                ExpectedLowering.Int8 => CorInfoType.CORINFO_TYPE_BYTE,
+                ExpectedLowering.Int16 => CorInfoType.CORINFO_TYPE_SHORT,
+                ExpectedLowering.Int32 => CorInfoType.CORINFO_TYPE_INT,
+                ExpectedLowering.Int64 => CorInfoType.CORINFO_TYPE_LONG,
+                _ => throw new ArgumentOutOfRangeException(nameof(expectedLowering))
+            };
+        }
+    }
+}
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/AnalysisBasedMetadataManager.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/AnalysisBasedMetadataManager.cs
index 8c24c7addbb0..7b9f19c8e8be 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/AnalysisBasedMetadataManager.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/AnalysisBasedMetadataManager.cs
@@ -261,6 +261,11 @@ public bool GeneratesMetadata(EcmaModule module, ExportedTypeHandle exportedType
                 return GeneratesMetadata(targetType);
             }
 
+            public bool GeneratesInterfaceImpl(MetadataType typeDef, MetadataType interfaceImpl)
+            {
+                return _parent.IsInterfaceUsed(interfaceImpl.GetTypeDefinition());
+            }
+
             public bool IsBlocked(MetadataType typeDef)
             {
                 return _blockingPolicy.IsBlocked(typeDef);
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Compilation.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Compilation.cs
index 19bb65062862..31c868a77f00 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Compilation.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Compilation.cs
@@ -103,9 +103,14 @@ public bool CanInline(MethodDesc caller, MethodDesc callee)
             return _inliningPolicy.CanInline(caller, callee);
         }
 
-        public bool CanConstructType(TypeDesc type)
+        public bool CanReferenceConstructedMethodTable(TypeDesc type)
         {
-            return NodeFactory.DevirtualizationManager.CanConstructType(type);
+            return NodeFactory.DevirtualizationManager.CanReferenceConstructedMethodTable(type);
+        }
+
+        public bool CanReferenceConstructedTypeOrCanonicalFormOfType(TypeDesc type)
+        {
+            return NodeFactory.DevirtualizationManager.CanReferenceConstructedTypeOrCanonicalFormOfType(type);
         }
 
         public DelegateCreationInfo GetDelegateCtor(TypeDesc delegateType, MethodDesc target, TypeDesc constrainedType, bool followVirtualDispatch)
@@ -133,7 +138,7 @@ public virtual ISymbolNode GetFieldRvaData(FieldDesc field)
             }
             else if (field is ExternSymbolMappedField externField)
             {
-                return NodeFactory.ExternSymbol(externField.SymbolName);
+                return NodeFactory.ExternVariable(externField.SymbolName);
             }
             else
             {
@@ -205,9 +210,9 @@ public MethodDesc ExpandIntrinsicForCallsite(MethodDesc intrinsicMethod, MethodD
             return intrinsicMethod;
         }
 
-        public bool HasFixedSlotVTable(TypeDesc type)
+        public bool NeedsSlotUseTracking(TypeDesc type)
         {
-            return NodeFactory.VTable(type).HasFixedSlots;
+            return !NodeFactory.VTable(type).HasKnownVirtualMethodUse;
         }
 
         public bool IsEffectivelySealed(TypeDesc type)
@@ -261,7 +266,7 @@ public bool NeedsRuntimeLookup(ReadyToRunHelperId lookupKind, object targetOfLoo
 
         public ReadyToRunHelperId GetLdTokenHelperForType(TypeDesc type)
         {
-            bool canConstructPerWholeProgramAnalysis = NodeFactory.DevirtualizationManager.CanConstructType(type);
+            bool canConstructPerWholeProgramAnalysis = NodeFactory.DevirtualizationManager.CanReferenceConstructedMethodTable(type);
             bool creationAllowed = ConstructedEETypeNode.CreationAllowed(type);
             return (canConstructPerWholeProgramAnalysis && creationAllowed)
                 ? ReadyToRunHelperId.TypeHandle
@@ -383,43 +388,37 @@ public GenericDictionaryLookup ComputeGenericLookup(MethodDesc contextMethod, Re
                 lookupKind = ReadyToRunHelperId.TypeHandle;
             }
 
-            // Can we do a fixed lookup? Start by checking if we can get to the dictionary.
-            // Context source having a vtable with fixed slots is a prerequisite.
-            if (contextSource == GenericContextSource.MethodParameter
-                || HasFixedSlotVTable(contextMethod.OwningType))
+            DictionaryLayoutNode dictionaryLayout;
+            if (contextSource == GenericContextSource.MethodParameter)
+                dictionaryLayout = _nodeFactory.GenericDictionaryLayout(contextMethod);
+            else
+                dictionaryLayout = _nodeFactory.GenericDictionaryLayout(contextMethod.OwningType);
+
+            // If the dictionary layout has fixed slots, we can compute the lookup now. Otherwise defer to helper.
+            if (dictionaryLayout.HasFixedSlots)
             {
-                DictionaryLayoutNode dictionaryLayout;
-                if (contextSource == GenericContextSource.MethodParameter)
-                    dictionaryLayout = _nodeFactory.GenericDictionaryLayout(contextMethod);
-                else
-                    dictionaryLayout = _nodeFactory.GenericDictionaryLayout(contextMethod.OwningType);
+                int pointerSize = _nodeFactory.Target.PointerSize;
 
-                // If the dictionary layout has fixed slots, we can compute the lookup now. Otherwise defer to helper.
-                if (dictionaryLayout.HasFixedSlots)
+                GenericLookupResult lookup = ReadyToRunGenericHelperNode.GetLookupSignature(_nodeFactory, lookupKind, targetOfLookup);
+                if (dictionaryLayout.TryGetSlotForEntry(lookup, out int dictionarySlot))
                 {
-                    int pointerSize = _nodeFactory.Target.PointerSize;
+                    int dictionaryOffset = dictionarySlot * pointerSize;
 
-                    GenericLookupResult lookup = ReadyToRunGenericHelperNode.GetLookupSignature(_nodeFactory, lookupKind, targetOfLookup);
-                    if (dictionaryLayout.TryGetSlotForEntry(lookup, out int dictionarySlot))
+                    if (contextSource == GenericContextSource.MethodParameter)
                     {
-                        int dictionaryOffset = dictionarySlot * pointerSize;
-
-                        if (contextSource == GenericContextSource.MethodParameter)
-                        {
-                            return GenericDictionaryLookup.CreateFixedLookup(contextSource, dictionaryOffset);
-                        }
-                        else
-                        {
-                            int vtableSlot = VirtualMethodSlotHelper.GetGenericDictionarySlot(_nodeFactory, contextMethod.OwningType);
-                            int vtableOffset = EETypeNode.GetVTableOffset(pointerSize) + vtableSlot * pointerSize;
-                            return GenericDictionaryLookup.CreateFixedLookup(contextSource, vtableOffset, dictionaryOffset);
-                        }
+                        return GenericDictionaryLookup.CreateFixedLookup(contextSource, dictionaryOffset);
                     }
                     else
                     {
-                        return GenericDictionaryLookup.CreateNullLookup(contextSource);
+                        int vtableSlot = VirtualMethodSlotHelper.GetGenericDictionarySlot(_nodeFactory, contextMethod.OwningType);
+                        int vtableOffset = EETypeNode.GetVTableOffset(pointerSize) + vtableSlot * pointerSize;
+                        return GenericDictionaryLookup.CreateFixedLookup(contextSource, vtableOffset, dictionaryOffset);
                     }
                 }
+                else
+                {
+                    return GenericDictionaryLookup.CreateNullLookup(contextSource);
+                }
             }
 
             // Fixed lookup not possible - use helper.
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilationBuilder.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilationBuilder.Aot.cs
index 8d596f87e43f..dae5bc317ae4 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilationBuilder.Aot.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilationBuilder.Aot.cs
@@ -132,7 +132,7 @@ public CompilationBuilder UseDwarf5(bool value)
         protected PreinitializationManager GetPreinitializationManager()
         {
             if (_preinitializationManager == null)
-                return new PreinitializationManager(_context, _compilationGroup, GetILProvider(), new TypePreinit.DisabledPreinitializationPolicy(), new StaticReadOnlyFieldPolicy());
+                return new PreinitializationManager(_context, _compilationGroup, GetILProvider(), new TypePreinit.DisabledPreinitializationPolicy(), new StaticReadOnlyFieldPolicy(), null);
             return _preinitializationManager;
         }
 
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.BoxedTypes.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.BoxedTypes.cs
index 9236e10d2c35..85e9f484f44b 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.BoxedTypes.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.BoxedTypes.cs
@@ -442,21 +442,34 @@ public override MethodIL EmitIL()
                 ILEmitter emit = new ILEmitter();
                 ILCodeStream codeStream = emit.NewCodeStream();
 
+                bool isX86 = Context.Target.Architecture == TargetArchitecture.X86;
+
                 FieldDesc eeTypeField = Context.GetWellKnownType(WellKnownType.Object).GetKnownField("m_pEEType");
 
                 // Load ByRef to the field with the value of the boxed valuetype
                 codeStream.EmitLdArg(0);
                 codeStream.Emit(ILOpcode.ldflda, emit.NewToken(Context.SystemModule.GetKnownType("System.Runtime.CompilerServices", "RawData").GetField("Data")));
 
+                if (isX86)
+                {
+                    for (int i = 0; i < _targetMethod.Signature.Length; i++)
+                    {
+                        codeStream.EmitLdArg(i + 1);
+                    }
+                }
+
                 // Load the MethodTable of the boxed valuetype (this is the hidden generic context parameter expected
                 // by the (canonical) instance method, but normally not part of the signature in IL).
                 codeStream.EmitLdArg(0);
                 codeStream.Emit(ILOpcode.ldfld, emit.NewToken(eeTypeField));
 
                 // Load rest of the arguments
-                for (int i = 0; i < _targetMethod.Signature.Length; i++)
+                if (!isX86)
                 {
-                    codeStream.EmitLdArg(i + 1);
+                    for (int i = 0; i < _targetMethod.Signature.Length; i++)
+                    {
+                        codeStream.EmitLdArg(i + 1);
+                    }
                 }
 
                 // Call an instance method on the target valuetype that has a fake instantiation parameter
@@ -608,9 +621,18 @@ public override MethodSignature Signature
 
                         // Shared instance methods on generic valuetypes have a hidden parameter with the generic context.
                         // We add it to the signature so that we can refer to it from IL.
-                        parameters[0] = Context.GetWellKnownType(WellKnownType.Void).MakePointerType();
-                        for (int i = 0; i < _methodRepresented.Signature.Length; i++)
-                            parameters[i + 1] = _methodRepresented.Signature[i];
+                        if (Context.Target.Architecture == TargetArchitecture.X86)
+                        {
+                            for (int i = 0; i < _methodRepresented.Signature.Length; i++)
+                                parameters[i] = _methodRepresented.Signature[i];
+                            parameters[_methodRepresented.Signature.Length] = Context.GetWellKnownType(WellKnownType.Void).MakePointerType();
+                        }
+                        else
+                        {
+                            parameters[0] = Context.GetWellKnownType(WellKnownType.Void).MakePointerType();
+                            for (int i = 0; i < _methodRepresented.Signature.Length; i++)
+                                parameters[i + 1] = _methodRepresented.Signature[i];
+                        }
 
                         _signature = new MethodSignature(_methodRepresented.Signature.Flags,
                             _methodRepresented.Signature.GenericParameterCount,
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.InterfaceThunks.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.InterfaceThunks.cs
index 747d2f91848c..51bf6a6888e2 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.InterfaceThunks.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/CompilerTypeSystemContext.InterfaceThunks.cs
@@ -216,9 +216,19 @@ public override MethodIL EmitIL()
                 MethodDesc getOrdinalInterfaceMethod = Context.GetHelperEntryPoint("SharedCodeHelpers", "GetOrdinalInterface");
                 MethodDesc getCurrentContext = Context.GetHelperEntryPoint("SharedCodeHelpers", "GetCurrentSharedThunkContext");
 
+                bool isX86 = Context.Target.Architecture == TargetArchitecture.X86;
+
                 // Load "this"
                 codeStream.EmitLdArg(0);
 
+                if (isX86)
+                {
+                    for (int i = 0; i < _targetMethod.Signature.Length; i++)
+                    {
+                        codeStream.EmitLdArg(i + 1);
+                    }
+                }
+
                 // Load the instantiating argument.
                 if (_interfaceIndex == UseContextFromRuntime)
                 {
@@ -232,10 +242,13 @@ public override MethodIL EmitIL()
                     codeStream.Emit(ILOpcode.call, emit.NewToken(getOrdinalInterfaceMethod));
                 }
 
-                // Load rest of the arguments
-                for (int i = 0; i < _targetMethod.Signature.Length; i++)
+                if (!isX86)
                 {
-                    codeStream.EmitLdArg(i + 1);
+                    // Load rest of the arguments
+                    for (int i = 0; i < _targetMethod.Signature.Length; i++)
+                    {
+                        codeStream.EmitLdArg(i + 1);
+                    }
                 }
 
                 // Call an instance method on the target interface that has a fake instantiation parameter
@@ -292,9 +305,18 @@ public override MethodSignature Signature
 
                         // Shared instance methods on generic interfaces have a hidden parameter with the generic context.
                         // We add it to the signature so that we can refer to it from IL.
-                        parameters[0] = Context.GetWellKnownType(WellKnownType.Void).MakePointerType();
-                        for (int i = 0; i < _methodRepresented.Signature.Length; i++)
-                            parameters[i + 1] = _methodRepresented.Signature[i];
+                        if (Context.Target.Architecture == TargetArchitecture.X86)
+                        {
+                            for (int i = 0; i < _methodRepresented.Signature.Length; i++)
+                                parameters[i] = _methodRepresented.Signature[i];
+                            parameters[_methodRepresented.Signature.Length] = Context.GetWellKnownType(WellKnownType.Void).MakePointerType();
+                        }
+                        else
+                        {
+                            parameters[0] = Context.GetWellKnownType(WellKnownType.Void).MakePointerType();
+                            for (int i = 0; i < _methodRepresented.Signature.Length; i++)
+                                parameters[i + 1] = _methodRepresented.Signature[i];
+                        }
 
                         _signature = new MethodSignature(_methodRepresented.Signature.Flags,
                             _methodRepresented.Signature.GenericParameterCount,
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/MethodBodyScanner.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/MethodBodyScanner.cs
index 13a55993c829..87764b54a3d7 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/MethodBodyScanner.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/MethodBodyScanner.cs
@@ -188,8 +188,7 @@ public int MoveNext(int offset)
                     _foundEndOfPrevBlock = false;
                 }
 
-                var reader = new ILReader(_methodBody.GetILBytes());
-                reader.Seek(offset);
+                var reader = new ILReader(_methodBody.GetILBytes(), offset);
                 ILOpcode opcode = reader.ReadILOpcode();
                 if (opcode.IsControlFlowInstruction())
                 {
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/ReflectionMarker.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/ReflectionMarker.cs
index c2132ce7aa6d..279418b2fac3 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/ReflectionMarker.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/ReflectionMarker.cs
@@ -30,6 +30,7 @@ public class ReflectionMarker
         public NodeFactory Factory { get; }
         public FlowAnnotations Annotations { get; }
         public DependencyList Dependencies { get => _dependencies; }
+        public List<INodeWithRuntimeDeterminedDependencies> RuntimeDeterminedDependencies { get; } = new List<INodeWithRuntimeDeterminedDependencies>();
 
         internal enum AccessKind
         {
@@ -78,13 +79,13 @@ internal void MarkTypeSystemEntity(in MessageOrigin origin, TypeSystemEntity ent
                     MarkEvent(origin, @event, reason, accessKind);
                     break;
                     // case InterfaceImplementation
-                    //  Nothing to do currently as Native AOT will preserve all interfaces on a preserved type
+                    //  This is handled in the MetadataType case above
             }
         }
 
         internal bool TryResolveTypeNameAndMark(string typeName, in DiagnosticContext diagnosticContext, bool needsAssemblyName, string reason, [NotNullWhen(true)] out TypeDesc? type)
         {
-            ModuleDesc? callingModule = ((diagnosticContext.Origin.MemberDefinition as MethodDesc)?.OwningType as MetadataType)?.Module;
+            ModuleDesc? callingModule = (diagnosticContext.Origin.MemberDefinition.GetOwningType() as MetadataType)?.Module;
 
             List<ModuleDesc> referencedModules = new();
             TypeDesc foundType = System.Reflection.TypeNameParser.ResolveType(typeName, callingModule, diagnosticContext.Origin.MemberDefinition!.Context,
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/ReflectionMethodBodyScanner.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/ReflectionMethodBodyScanner.cs
index d3e41cd36eaf..1340d8669ca0 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/ReflectionMethodBodyScanner.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/Dataflow/ReflectionMethodBodyScanner.cs
@@ -2,11 +2,14 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Collections.Generic;
 using System.Collections.Immutable;
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Linq;
 using System.Reflection.Metadata;
+using ILCompiler.DependencyAnalysis;
+using ILCompiler.DependencyAnalysisFramework;
 using ILCompiler.Logging;
 using ILLink.Shared;
 using ILLink.Shared.TrimAnalysis;
@@ -121,12 +124,13 @@ protected override void Scan(MethodIL methodBody, ref InterproceduralState inter
             }
         }
 
-        public static DependencyList ScanAndProcessReturnValue(NodeFactory factory, FlowAnnotations annotations, Logger logger, MethodIL methodBody)
+        public static DependencyList ScanAndProcessReturnValue(NodeFactory factory, FlowAnnotations annotations, Logger logger, MethodIL methodBody, out List<INodeWithRuntimeDeterminedDependencies> runtimeDependencies)
         {
             var scanner = new ReflectionMethodBodyScanner(factory, annotations, logger, new MessageOrigin(methodBody.OwningMethod));
 
             scanner.InterproceduralScan(methodBody);
 
+            runtimeDependencies = scanner._reflectionMarker.RuntimeDeterminedDependencies;
             return scanner._reflectionMarker.Dependencies;
         }
 
@@ -357,8 +361,6 @@ public static bool HandleCall(
                 case IntrinsicId.Type_GetConstructor:
                 case IntrinsicId.MethodBase_GetMethodFromHandle:
                 case IntrinsicId.MethodBase_get_MethodHandle:
-                case IntrinsicId.Type_MakeGenericType:
-                case IntrinsicId.MethodInfo_MakeGenericMethod:
                 case IntrinsicId.Expression_Call:
                 case IntrinsicId.Expression_New:
                 case IntrinsicId.Type_GetType:
@@ -371,18 +373,124 @@ public static bool HandleCall(
                 case IntrinsicId.AppDomain_CreateInstanceFromAndUnwrap:
                 case IntrinsicId.Assembly_CreateInstance:
                     {
-                        bool result = handleCallAction.Invoke(calledMethod, instanceValue, argumentValues, intrinsicId, out methodReturnValue);
+                        return handleCallAction.Invoke(calledMethod, instanceValue, argumentValues, intrinsicId, out methodReturnValue);
+                    }
+
+            case IntrinsicId.Type_MakeGenericType:
+                    {
+                        bool triggersWarning = false;
 
-                        // Special case some intrinsics for AOT handling (on top of the trimming handling done in the HandleCallAction)
-                        switch (intrinsicId)
+                        if (!instanceValue.IsEmpty() && !argumentValues[0].IsEmpty())
                         {
-                            case IntrinsicId.Type_MakeGenericType:
-                            case IntrinsicId.MethodInfo_MakeGenericMethod:
-                                CheckAndReportRequires(diagnosticContext, calledMethod, DiagnosticUtilities.RequiresDynamicCodeAttribute);
-                                break;
+                            foreach (var value in instanceValue.AsEnumerable())
+                            {
+                                if (value is SystemTypeValue typeValue)
+                                {
+                                    TypeDesc typeInstantiated = typeValue.RepresentedType.Type;
+                                    if (!typeInstantiated.IsGenericDefinition)
+                                    {
+                                        // Nothing to do, will fail at runtime
+                                    }
+                                    else if (TryGetMakeGenericInstantiation(callingMethodDefinition, argumentValues[0], out Instantiation inst, out bool isExact))
+                                    {
+                                        if (inst.Length == typeInstantiated.Instantiation.Length)
+                                        {
+                                            typeInstantiated = ((MetadataType)typeInstantiated).MakeInstantiatedType(inst);
+
+                                            if (isExact)
+                                            {
+                                                reflectionMarker.MarkType(diagnosticContext.Origin, typeInstantiated, "MakeGenericType");
+                                            }
+                                            else
+                                            {
+                                                reflectionMarker.RuntimeDeterminedDependencies.Add(new MakeGenericTypeSite(typeInstantiated));
+                                            }
+                                        }
+                                    }
+                                    else
+                                    {
+                                        triggersWarning = true;
+                                    }
+
+                                }
+                                else if (value == NullValue.Instance)
+                                {
+                                    // Nothing to do
+                                }
+                                else
+                                {
+                                    // We don't know what type the `MakeGenericType` was called on
+                                    triggersWarning = true;
+                                }
+                            }
                         }
 
-                        return result;
+                        if (triggersWarning)
+                        {
+                            CheckAndReportRequires(diagnosticContext, calledMethod, DiagnosticUtilities.RequiresDynamicCodeAttribute);
+                        }
+
+                        // This intrinsic is relevant to both trimming and AOT - call into trimming logic as well.
+                        return handleCallAction.Invoke(calledMethod, instanceValue, argumentValues, intrinsicId, out methodReturnValue);
+                    }
+
+                case IntrinsicId.MethodInfo_MakeGenericMethod:
+                    {
+                        bool triggersWarning = false;
+
+                        if (!instanceValue.IsEmpty())
+                        {
+                            foreach (var methodValue in instanceValue.AsEnumerable())
+                            {
+                                if (methodValue is SystemReflectionMethodBaseValue methodBaseValue)
+                                {
+                                    MethodDesc methodInstantiated = methodBaseValue.RepresentedMethod.Method;
+                                    if (!methodInstantiated.IsGenericMethodDefinition)
+                                    {
+                                        // Nothing to do, will fail at runtime
+                                    }
+                                    else if (!methodInstantiated.OwningType.IsGenericDefinition
+                                        && TryGetMakeGenericInstantiation(callingMethodDefinition, argumentValues[0], out Instantiation inst, out bool isExact))
+                                    {
+                                        if (inst.Length == methodInstantiated.Instantiation.Length)
+                                        {
+                                            methodInstantiated = methodInstantiated.MakeInstantiatedMethod(inst);
+
+                                            if (isExact)
+                                            {
+                                                reflectionMarker.MarkMethod(diagnosticContext.Origin, methodInstantiated, "MakeGenericMethod");
+                                            }
+                                            else
+                                            {
+                                                reflectionMarker.RuntimeDeterminedDependencies.Add(new MakeGenericMethodSite(methodInstantiated));
+                                            }
+                                        }
+                                    }
+                                    else
+                                    {
+                                        // If the owning type is a generic definition, we can't help much.
+                                        triggersWarning = true;
+                                    }
+                                }
+                                else if (methodValue == NullValue.Instance)
+                                {
+                                    // Nothing to do
+                                }
+                                else
+                                {
+                                    // We don't know what method the `MakeGenericMethod` was called on
+                                    triggersWarning = true;
+                                }
+                            }
+                        }
+
+                        if (triggersWarning)
+                        {
+                            CheckAndReportRequires(diagnosticContext, calledMethod, DiagnosticUtilities.RequiresDynamicCodeAttribute);
+                        }
+
+                        // This intrinsic is relevant to both trimming and AOT - call into trimming logic as well.
+                        return handleCallAction.Invoke(calledMethod, instanceValue, argumentValues, intrinsicId, out methodReturnValue);
                     }
 
                 case IntrinsicId.None:
@@ -419,6 +527,19 @@ public static bool HandleCall(
                     }
                     break;
 
+                //
+                // System.Array
+                //
+                // CreateInstance (Type, Int32)
+                //
+                case IntrinsicId.Array_CreateInstance:
+                    {
+                        // We could try to analyze if the type is known, but for now making sure this works for canonical arrays is enough.
+                        TypeDesc canonArrayType = reflectionMarker.Factory.TypeSystemContext.CanonType.MakeArrayType();
+                        reflectionMarker.Dependencies.Add(reflectionMarker.Factory.NativeLayout.TemplateTypeLayout(canonArrayType), "Array.CreateInstance was called");
+                        goto case IntrinsicId.None;
+                    }
+
                 //
                 // System.Enum
                 //
@@ -686,6 +807,105 @@ void AddReturnValue(MultiValue value)
             }
         }
 
+        private static bool TryGetMakeGenericInstantiation(
+            MethodDesc contextMethod,
+            in MultiValue genericParametersArray,
+            out Instantiation inst,
+            out bool isExact)
+        {
+            // We support calling MakeGeneric APIs with a very concrete instantiation array.
+            // Only the form of `new Type[] { typeof(Foo), typeof(T), typeof(Foo<T>) }` is supported.
+
+            inst = default;
+            isExact = true;
+            Debug.Assert(contextMethod.GetTypicalMethodDefinition() == contextMethod);
+
+            var typesValue = genericParametersArray.AsSingleValue();
+            if (typesValue is NullValue)
+            {
+                // This will fail at runtime but no warning needed
+                inst = Instantiation.Empty;
+                return true;
+            }
+
+            // Is this an array we model?
+            if (typesValue is not ArrayValue array)
+            {
+                return false;
+            }
+
+            int? size = array.Size.AsConstInt();
+            if (size == null)
+            {
+                return false;
+            }
+
+            TypeDesc[]? sigInst = null;
+            TypeDesc[]? defInst = null;
+
+            ArrayBuilder<TypeDesc> result = default;
+            for (int i = 0; i < size.Value; i++)
+            {
+                // Go over each element of the array. If the value is unknown, bail.
+                if (!array.TryGetValueByIndex(i, out MultiValue value))
+                {
+                    return false;
+                }
+
+                var singleValue = value.AsSingleValue();
+
+                TypeDesc? type = singleValue switch
+                {
+                    SystemTypeValue systemType => systemType.RepresentedType.Type,
+                    GenericParameterValue genericParamType => genericParamType.GenericParameter.GenericParameter,
+                    NullableSystemTypeValue nullableSystemType => nullableSystemType.NullableType.Type,
+                    _ => null
+                };
+
+                if (type is null)
+                {
+                    return false;
+                }
+
+                // type is now some type.
+                // Because dataflow analysis oddly operates on method bodies instantiated over
+                // generic parameters (as opposed to instantiated over signature variables)
+                // We need to swap generic parameters (T, U,...) for signature variables (!0, !!1,...).
+                // We need to do this for both generic parameters of the owning type, and generic
+                // parameters of the owning method.
+                if (type.ContainsSignatureVariables(treatGenericParameterLikeSignatureVariable: true))
+                {
+                    if (sigInst == null)
+                    {
+                        TypeDesc contextType = contextMethod.OwningType;
+                        sigInst = new TypeDesc[contextType.Instantiation.Length + contextMethod.Instantiation.Length];
+                        defInst = new TypeDesc[contextType.Instantiation.Length + contextMethod.Instantiation.Length];
+                        TypeSystemContext context = type.Context;
+                        for (int j = 0; j < contextType.Instantiation.Length; j++)
+                        {
+                            sigInst[j] = context.GetSignatureVariable(j, method: false);
+                            defInst[j] = contextType.Instantiation[j];
+                        }
+                        for (int j = 0; j < contextMethod.Instantiation.Length; j++)
+                        {
+                            sigInst[j + contextType.Instantiation.Length] = context.GetSignatureVariable(j, method: true);
+                            defInst[j + contextType.Instantiation.Length] = contextMethod.Instantiation[j];
+                        }
+                    }
+
+                    isExact = false;
+
+                    // defInst is [T, U, V], sigInst is `[!0, !!0, !!1]`.
+                    type = type.ReplaceTypesInConstructionOfType(defInst, sigInst);
+                }
+
+                result.Add(type);
+            }
+
+            inst = new Instantiation(result.ToArray());
+            return true;
+        }
+
         private static bool IsAotUnsafeDelegate(TypeDesc parameterType)
         {
             TypeSystemContext context = parameterType.Context;
@@ -846,5 +1066,33 @@ private static bool IsPInvokeDangerous(MethodDesc calledMethod, out bool comDang
 
             return aotUnsafeDelegate || comDangerousMethod;
         }
+
+        private sealed class MakeGenericMethodSite : INodeWithRuntimeDeterminedDependencies
+        {
+            private readonly MethodDesc _method;
+
+            public MakeGenericMethodSite(MethodDesc method) => _method = method;
+
+            public IEnumerable<DependencyNodeCore<NodeFactory>.DependencyListEntry> InstantiateDependencies(NodeFactory factory, Instantiation typeInstantiation, Instantiation methodInstantiation)
+            {
+                var list = new DependencyList();
+                RootingHelpers.TryGetDependenciesForReflectedMethod(ref list, factory, _method.InstantiateSignature(typeInstantiation, methodInstantiation), "MakeGenericMethod");
+                return list;
+            }
+        }
+
+        private sealed class MakeGenericTypeSite : INodeWithRuntimeDeterminedDependencies
+        {
+            private readonly TypeDesc _type;
+
+            public MakeGenericTypeSite(TypeDesc type) => _type = type;
+
+            public IEnumerable<DependencyNodeCore<NodeFactory>.DependencyListEntry> InstantiateDependencies(NodeFactory factory, Instantiation typeInstantiation, Instantiation methodInstantiation)
+            {
+                var list = new DependencyList();
+                RootingHelpers.TryGetDependenciesForReflectedType(ref list, factory, _type.InstantiateSignature(typeInstantiation, methodInstantiation), "MakeGenericType");
+                return list;
+            }
+        }
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ArrayOfFrozenObjectsNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ArrayOfFrozenObjectsNode.cs
index bb77597f53f0..485ff593583b 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ArrayOfFrozenObjectsNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ArrayOfFrozenObjectsNode.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Diagnostics;
 
 using Internal.Text;
 using Internal.TypeSystem;
@@ -33,6 +34,7 @@ protected override ObjectData GetDehydratableData(NodeFactory factory, bool relo
             builder.AddSymbol(this);
             foreach (FrozenObjectNode node in factory.MetadataManager.GetFrozenObjects())
             {
+                Debug.Assert(node is not FrozenObjectNode frozenObj || !frozenObj.ObjectType.RequiresAlign8());
                 AlignNextObject(ref builder, factory);
 
                 node.InitializeOffsetFromBeginningOfArray(builder.CountBytes);
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/CanonicalEETypeNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/CanonicalEETypeNode.cs
index 38381c4a5788..615d07e932be 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/CanonicalEETypeNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/CanonicalEETypeNode.cs
@@ -24,7 +24,7 @@ public CanonicalEETypeNode(NodeFactory factory, TypeDesc type) : base(factory, t
             Debug.Assert(!type.IsCanonicalDefinitionType(CanonicalFormKind.Any));
             Debug.Assert(type.IsCanonicalSubtype(CanonicalFormKind.Any));
             Debug.Assert(type == type.ConvertToCanonForm(CanonicalFormKind.Specific));
-            Debug.Assert(!type.IsMdArray || factory.Target.Abi == TargetAbi.CppCodegen);
+            Debug.Assert(!type.IsMdArray);
         }
 
         public override bool StaticDependenciesAreComputed => true;
@@ -96,8 +96,12 @@ protected override void OutputGCDesc(ref ObjectDataBuilder builder)
 
         protected override void OutputInterfaceMap(NodeFactory factory, ref ObjectDataBuilder objData)
         {
-            for (int i = 0; i < _type.RuntimeInterfaces.Length; i++)
+            foreach (DefType intface in _type.RuntimeInterfaces)
             {
+                // If the interface was optimized away, skip it
+                if (!factory.InterfaceUse(intface.GetTypeDefinition()).Marked)
+                    continue;
+
                 // Interface omitted for canonical instantiations (constructed at runtime for dynamic types from the native layout info)
                 objData.EmitZeroPointer();
             }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/DataflowAnalyzedMethodNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/DataflowAnalyzedMethodNode.cs
index 1b7366524593..6a6e2e0b9fb6 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/DataflowAnalyzedMethodNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/DataflowAnalyzedMethodNode.cs
@@ -19,6 +19,7 @@ namespace ILCompiler.DependencyAnalysis
     public class DataflowAnalyzedMethodNode : DependencyNodeCore<NodeFactory>
     {
         private readonly MethodIL _methodIL;
+        private List<INodeWithRuntimeDeterminedDependencies> _runtimeDependencies;
 
         public DataflowAnalyzedMethodNode(MethodIL methodIL)
         {
@@ -32,26 +33,51 @@ public override IEnumerable<DependencyListEntry> GetStaticDependencies(NodeFacto
             var mdManager = (UsageBasedMetadataManager)factory.MetadataManager;
             try
             {
-                return Dataflow.ReflectionMethodBodyScanner.ScanAndProcessReturnValue(factory, mdManager.FlowAnnotations, mdManager.Logger, _methodIL);
+                return Dataflow.ReflectionMethodBodyScanner.ScanAndProcessReturnValue(factory, mdManager.FlowAnnotations, mdManager.Logger, _methodIL, out _runtimeDependencies);
             }
             catch (TypeSystemException)
             {
                 // Something wrong with the input - missing references, etc.
                 // The method body likely won't compile either, so we don't care.
+                _runtimeDependencies = new List<INodeWithRuntimeDeterminedDependencies>();
                 return Array.Empty<DependencyListEntry>();
             }
         }
 
+        public override IEnumerable<CombinedDependencyListEntry> SearchDynamicDependencies(List<DependencyNodeCore<NodeFactory>> markedNodes, int firstNode, NodeFactory factory)
+        {
+            MethodDesc analyzedMethod = _methodIL.OwningMethod;
+
+            // Look for any generic specialization of this method. If any are found, specialize any dataflow dependencies.
+            for (int i = firstNode; i < markedNodes.Count; i++)
+            {
+                if (markedNodes[i] is not IMethodBodyNode methodBody)
+                    continue;
+
+                MethodDesc method = methodBody.Method;
+                if (method.GetTypicalMethodDefinition() != analyzedMethod)
+                    continue;
+
+                // Instantiate all runtime dependencies for the found generic specialization
+                foreach (var n in _runtimeDependencies)
+                {
+                    foreach (var d in n.InstantiateDependencies(factory, method.OwningType.Instantiation, method.Instantiation))
+                    {
+                        yield return new CombinedDependencyListEntry(d.Node, null, d.Reason);
+                    }
+                }
+            }
+        }
+
         protected override string GetName(NodeFactory factory)
         {
             return "Dataflow analysis for " + _methodIL.OwningMethod.ToString();
         }
 
         public override bool InterestingForDynamicDependencyAnalysis => false;
-        public override bool HasDynamicDependencies => false;
+        public override bool HasDynamicDependencies => _runtimeDependencies.Count > 0;
         public override bool HasConditionalStaticDependencies => false;
         public override bool StaticDependenciesAreComputed => true;
         public override IEnumerable<CombinedDependencyListEntry> GetConditionalStaticDependencies(NodeFactory context) => null;
-        public override IEnumerable<CombinedDependencyListEntry> SearchDynamicDependencies(List<DependencyNodeCore<NodeFactory>> markedNodes, int firstNode, NodeFactory context) => null;
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/EETypeNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/EETypeNode.cs
index 8149c2c7886d..f0a837fdfc21 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/EETypeNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/EETypeNode.cs
@@ -299,6 +299,12 @@ public sealed override bool HasConditionalStaticDependencies
                     return true;
                 }
 
+                // If the type implements at least one interface, calls against that interface could result in this type's
+                // implementation being used.
+                // It might also be necessary for casting purposes.
+                if (_type.RuntimeInterfaces.Length > 0)
+                    return true;
+
                 if (!EmitVirtualSlots)
                     return false;
 
@@ -328,11 +334,6 @@ public sealed override bool HasConditionalStaticDependencies
                     currentType = currentType.BaseType;
                 }
 
-                // If the type implements at least one interface, calls against that interface could result in this type's
-                // implementation being used.
-                if (_type.RuntimeInterfaces.Length > 0)
-                    return true;
-
                 return _hasConditionalDependenciesFromMetadataManager;
             }
         }
@@ -367,13 +368,25 @@ public sealed override IEnumerable<CombinedDependencyListEntry> GetConditionalSt
                     "Information about static bases for type with template"));
             }
 
+            if (!_type.IsGenericDefinition && !_type.IsCanonicalSubtype(CanonicalFormKind.Any))
+            {
+                foreach (DefType iface in _type.RuntimeInterfaces)
+                {
+                    var ifaceDefinition = (DefType)iface.GetTypeDefinition();
+                    result.Add(new CombinedDependencyListEntry(
+                        GetInterfaceTypeNode(factory, iface),
+                        factory.InterfaceUse(ifaceDefinition),
+                        "Interface definition was visible"));
+                }
+            }
+
             if (!EmitVirtualSlots)
                 return result;
 
             DefType defType = _type.GetClosestDefType();
 
             // If we're producing a full vtable, none of the dependencies are conditional.
-            if (!factory.VTable(defType).HasFixedSlots)
+            if (!factory.VTable(defType).HasKnownVirtualMethodUse)
             {
                 bool isNonInterfaceAbstractType = !defType.IsInterface && ((MetadataType)defType).IsAbstract;
 
@@ -526,6 +539,14 @@ public sealed override IEnumerable<CombinedDependencyListEntry> GetConditionalSt
                                 {
                                     // Canonical instance default methods need to go through a thunk that adds the right generic context
                                     defaultIntfMethod = factory.TypeSystemContext.GetDefaultInterfaceMethodImplementationThunk(defaultIntfMethod, defType.ConvertToCanonForm(CanonicalFormKind.Specific), providingInterfaceDefinitionType);
+
+                                    // The above thunk will index into interface list to find the right context. Make sure to keep all interfaces prior to this one
+                                    for (int i = 0; i < interfaceIndex; i++)
+                                    {
+                                        result.Add(new CombinedDependencyListEntry(
+                                            factory.InterfaceUse(defTypeRuntimeInterfaces[i].GetTypeDefinition()),
+                                            factory.VirtualMethodUse(interfaceMethod), "Interface with shared default methods folows this"));
+                                    }
                                 }
                                 result.Add(new CombinedDependencyListEntry(factory.MethodEntrypoint(defaultIntfMethod), factory.VirtualMethodUse(interfaceMethod), "Interface method"));
 
@@ -580,6 +601,9 @@ protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFact
             // emitting it.
             dependencies.Add(new DependencyListEntry(_optionalFieldsNode, "Optional fields"));
 
+            if (_type.IsInterface)
+                dependencies.Add(factory.InterfaceUse(_type.GetTypeDefinition()), "Interface is used");
+
             if (EmitVirtualSlots)
             {
                 if (!_type.IsArrayTypeWithoutGenericInterfaces())
@@ -690,7 +714,11 @@ protected override ObjectData GetDehydratableData(NodeFactory factory, bool relo
 
             // Emit interface map
             SlotCounter interfaceSlotCounter = SlotCounter.BeginCounting(ref /* readonly */ objData);
-            OutputInterfaceMap(factory, ref objData);
+
+            if (!relocsOnly)
+            {
+                OutputInterfaceMap(factory, ref objData);
+            }
 
             // Update slot count
             int numberOfInterfaceSlots = interfaceSlotCounter.CountSlots(ref /* readonly */ objData);
@@ -1007,7 +1035,7 @@ private void OutputVirtualSlots(NodeFactory factory, ref ObjectDataBuilder objDa
 
             // It's only okay to touch the actual list of slots if we're in the final emission phase
             // or the vtable is not built lazily.
-            if (relocsOnly && !declVTable.HasFixedSlots)
+            if (relocsOnly && !declVTable.HasKnownVirtualMethodUse)
                 return;
 
             // Interface types don't place anything else in their physical vtable.
@@ -1035,13 +1063,19 @@ private void OutputVirtualSlots(NodeFactory factory, ref ObjectDataBuilder objDa
                 // No generic virtual methods can appear in the vtable!
                 Debug.Assert(!declMethod.HasInstantiation);
 
-                MethodDesc implMethod = implType.GetClosestDefType().FindVirtualFunctionTargetMethodOnObjectType(declMethod);
-
                 // Final NewSlot methods cannot be overridden, and therefore can be placed in the sealed-vtable to reduce the size of the vtable
                 // of this type and any type that inherits from it.
                 if (declMethod.CanMethodBeInSealedVTable(factory) && !declType.IsArrayTypeWithoutGenericInterfaces())
                     continue;
 
+                if (!declVTable.IsSlotUsed(declMethod))
+                {
+                    objData.EmitZeroPointer();
+                    continue;
+                }
+
+                MethodDesc implMethod = implType.GetClosestDefType().FindVirtualFunctionTargetMethodOnObjectType(declMethod);
+
                 bool shouldEmitImpl = !implMethod.IsAbstract;
 
                 // We do a size optimization that removes support for built-in ValueType Equals/GetHashCode
@@ -1090,7 +1124,13 @@ protected virtual void OutputInterfaceMap(NodeFactory factory, ref ObjectDataBui
         {
             foreach (var itf in _type.RuntimeInterfaces)
             {
-                objData.EmitPointerReloc(GetInterfaceTypeNode(factory, itf));
+                IEETypeNode interfaceTypeNode = GetInterfaceTypeNode(factory, itf);
+
+                // Only emit interfaces that were not optimized away.
+                if (interfaceTypeNode.Marked)
+                {
+                    objData.EmitPointerReloc(interfaceTypeNode);
+                }
             }
         }
 
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GCStaticEETypeNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GCStaticEETypeNode.cs
index addb715ec677..f8217164be84 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GCStaticEETypeNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GCStaticEETypeNode.cs
@@ -64,7 +64,6 @@ protected override ObjectData GetDehydratableData(NodeFactory factory, bool relo
             dataBuilder.AddSymbol(this);
 
             // +1 for SyncBlock (static size already includes MethodTable)
-            Debug.Assert(factory.Target.Abi == TargetAbi.NativeAot || factory.Target.Abi == TargetAbi.CppCodegen);
             int totalSize = (_gcMap.Size + 1) * _target.PointerSize;
 
             // We only need to check for containsPointers because ThreadStatics are always allocated
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericLookupResult.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericLookupResult.cs
index 0cf2a6bb44e4..17f717d49bb5 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericLookupResult.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/GenericLookupResult.cs
@@ -11,42 +11,6 @@
 
 namespace ILCompiler.DependencyAnalysis
 {
-    // Represents a generic lookup within a canonical method body.
-    // TODO: unify with NativeFormat.FixupSignatureKind
-    public enum LookupResultType
-    {
-        Invalid,
-        MethodTable,             // a type
-        UnwrapNullable,     // a type (The type T described by a type spec that is generic over Nullable<T>)
-        NonGcStatic,        // the non-gc statics of a type
-        GcStatic,           // the gc statics of a type
-        Method,             // a method
-        InterfaceDispatchCell,  // the dispatch cell for calling an interface method
-        MethodDictionary,   // a dictionary for calling a generic method
-        UnboxingStub,       // the unboxing stub for a method
-        ArrayType,          // an array of type
-        DefaultCtor,        // default ctor of a type
-        AllocObject,        // the allocator of a type
-        GvmVtableOffset,    // vtable offset of a generic virtual method
-        ProfileCounter,     // profiling counter cell
-        MethodLdToken,      // a ldtoken result for a method
-        FieldLdToken,       // a ldtoken result for a field
-        Field,              // a field descriptor
-        IsInst,             // isinst helper
-        CastClass,          // castclass helper
-        AllocArray,         // the array allocator of a type
-        TypeSize,           // size of the type
-        FieldOffset,        // field offset
-        CallingConvention_NoInstParam,      // CallingConventionConverterThunk NO_INSTANTIATING_PARAM
-        CallingConvention_HasInstParam,     // CallingConventionConverterThunk HAS_INSTANTIATING_PARAM
-        CallingConvention_MaybeInstParam,   // CallingConventionConverterThunk MAYBE_INSTANTIATING_PARAM
-        VtableOffset,       // Offset of a virtual method into the type's vtable
-        Constrained,        // ConstrainedCallDesc
-        ConstrainedDirect,  // Direct ConstrainedCallDesc
-        Integer,            // Integer
-        UnboxingMethod,     // UnboxingMethod
-    }
-
     public struct GenericLookupResultContext
     {
         private readonly TypeSystemEntity _canonicalOwner;
@@ -873,7 +837,7 @@ public override IEnumerable<DependencyNodeCore<NodeFactory>> NonRelocDependencie
 
             // If we're producing a full vtable for the type, we don't need to report virtual method use.
             // We also don't report virtual method use for generic virtual methods - tracking those is orthogonal.
-            if (!factory.VTable(canonMethod.OwningType).HasFixedSlots && !canonMethod.HasInstantiation)
+            if (!factory.VTable(canonMethod.OwningType).HasKnownVirtualMethodUse && !canonMethod.HasInstantiation)
             {
                 // Report the method as virtually used so that types that could be used here at runtime
                 // have the appropriate implementations generated.
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ILScanNodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ILScanNodeFactory.cs
index 2b2a0313a397..ab5dbd0e8508 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ILScanNodeFactory.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ILScanNodeFactory.cs
@@ -36,7 +36,7 @@ protected override IMethodNode CreateMethodEntrypointNode(MethodDesc method)
                 }
                 else if (method.HasCustomAttribute("System.Runtime", "RuntimeImportAttribute"))
                 {
-                    return new RuntimeImportMethodNode(method);
+                    return new RuntimeImportMethodNode(method, NameMangler);
                 }
             }
 
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchCellNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchCellNode.cs
index aa3883a2db50..5d22535ce5e7 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchCellNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchCellNode.cs
@@ -54,7 +54,7 @@ public override IEnumerable<DependencyListEntry> GetStaticDependencies(NodeFacto
         {
             DependencyList result = new DependencyList();
 
-            if (!factory.VTable(_targetMethod.OwningType).HasFixedSlots)
+            if (!factory.VTable(_targetMethod.OwningType).HasKnownVirtualMethodUse)
             {
                 result.Add(factory.VirtualMethodUse(_targetMethod), "Interface method use");
             }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchMapNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchMapNode.cs
index 94cde954dc01..45cc7ca69680 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchMapNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceDispatchMapNode.cs
@@ -96,18 +96,8 @@ public static bool MightHaveInterfaceDispatchMap(TypeDesc type, NodeFactory fact
                     null :
                     (InstantiatedType)declType.GetTypeDefinition().RuntimeInterfaces[interfaceIndex];
 
-                IEnumerable<MethodDesc> slots;
-
-                // If the vtable has fixed slots, we can query it directly.
-                // If it's a lazily built vtable, we might not be able to query slots
-                // just yet, so approximate by looking at all methods.
                 VTableSliceNode vtableSlice = factory.VTable(interfaceType);
-                if (vtableSlice.HasFixedSlots)
-                    slots = vtableSlice.Slots;
-                else
-                    slots = interfaceType.GetAllVirtualMethods();
-
-                foreach (MethodDesc slotMethod in slots)
+                foreach (MethodDesc slotMethod in vtableSlice.Slots)
                 {
                     MethodDesc declMethod = slotMethod;
 
@@ -164,6 +154,8 @@ private void EmitDispatchMap(ref ObjectDataBuilder builder, NodeFactory factory)
             bool needsEntriesForInstanceInterfaceMethodImpls = !isInterface
                     || ((MetadataType)declType).IsDynamicInterfaceCastableImplementation();
 
+            int entryIndex = 0;
+
             // Resolve all the interfaces, but only emit non-static and non-default implementations
             for (int interfaceIndex = 0; interfaceIndex < declTypeRuntimeInterfaces.Length; interfaceIndex++)
             {
@@ -171,12 +163,19 @@ private void EmitDispatchMap(ref ObjectDataBuilder builder, NodeFactory factory)
                 var definitionInterfaceType = declTypeDefinitionRuntimeInterfaces[interfaceIndex];
                 Debug.Assert(interfaceType.IsInterface);
 
-                IReadOnlyList<MethodDesc> virtualSlots = factory.VTable(interfaceType).Slots;
+                if (!factory.InterfaceUse(interfaceType.GetTypeDefinition()).Marked)
+                    continue;
+
+                VTableSliceNode interfaceVTable = factory.VTable(interfaceType);
+                IReadOnlyList<MethodDesc> virtualSlots = interfaceVTable.Slots;
 
                 for (int interfaceMethodSlot = 0; interfaceMethodSlot < virtualSlots.Count; interfaceMethodSlot++)
                 {
                     MethodDesc declMethod = virtualSlots[interfaceMethodSlot];
 
+                    if (!interfaceVTable.IsSlotUsed(declMethod))
+                        continue;
+
                     if (!declMethod.Signature.IsStatic && !needsEntriesForInstanceInterfaceMethodImpls)
                         continue;
 
@@ -210,11 +209,11 @@ private void EmitDispatchMap(ref ObjectDataBuilder builder, NodeFactory factory)
                             int genericContext = targetMethod.GetCanonMethodTarget(CanonicalFormKind.Specific).RequiresInstArg()
                                 ? StaticVirtualMethodContextSource.ContextFromThisClass
                                 : StaticVirtualMethodContextSource.None;
-                            staticImplementations.Add((interfaceIndex, emittedInterfaceSlot, emittedImplSlot, genericContext));
+                            staticImplementations.Add((entryIndex, emittedInterfaceSlot, emittedImplSlot, genericContext));
                         }
                         else
                         {
-                            builder.EmitShort((short)checked((ushort)interfaceIndex));
+                            builder.EmitShort((short)checked((ushort)entryIndex));
                             builder.EmitShort((short)checked((ushort)emittedInterfaceSlot));
                             builder.EmitShort((short)checked((ushort)emittedImplSlot));
                             entryCount++;
@@ -271,7 +270,7 @@ private void EmitDispatchMap(ref ObjectDataBuilder builder, NodeFactory factory)
                                     }
                                 }
                                 staticDefaultImplementations.Add((
-                                    interfaceIndex,
+                                    entryIndex,
                                     emittedInterfaceSlot,
                                     implSlot.Value,
                                     genericContext));
@@ -279,13 +278,15 @@ private void EmitDispatchMap(ref ObjectDataBuilder builder, NodeFactory factory)
                             else
                             {
                                 defaultImplementations.Add((
-                                    interfaceIndex,
+                                    entryIndex,
                                     emittedInterfaceSlot,
                                     implSlot.Value));
                             }
                         }
                     }
                 }
+
+                entryIndex++;
             }
 
             // Now emit the default implementations
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceUseNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceUseNode.cs
new file mode 100644
index 000000000000..9ef45bb71184
--- /dev/null
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/InterfaceUseNode.cs
@@ -0,0 +1,40 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+
+using ILCompiler.DependencyAnalysisFramework;
+
+using Internal.TypeSystem;
+
+using Debug = System.Diagnostics.Debug;
+
+namespace ILCompiler.DependencyAnalysis
+{
+    /// <summary>
+    /// Tracks uses of interface in IL sense: at the level of type definitions.
+    /// Trim warning suppressions within the framework prevent us from optimizing
+    /// at a smaller granularity (e.g. canonical forms or concrete instantiations).
+    /// </summary>
+    internal sealed class InterfaceUseNode : DependencyNodeCore<NodeFactory>
+    {
+        public TypeDesc Type { get; }
+
+        public InterfaceUseNode(TypeDesc type)
+        {
+            Debug.Assert(type.IsTypeDefinition);
+            Debug.Assert(type.IsInterface);
+            Type = type;
+        }
+
+        protected override string GetName(NodeFactory factory) => $"Interface use: {Type}";
+
+        public override IEnumerable<DependencyListEntry> GetStaticDependencies(NodeFactory factory) => null;
+        public override bool InterestingForDynamicDependencyAnalysis => false;
+        public override bool HasDynamicDependencies => false;
+        public override bool HasConditionalStaticDependencies => false;
+        public override bool StaticDependenciesAreComputed => true;
+        public override IEnumerable<CombinedDependencyListEntry> GetConditionalStaticDependencies(NodeFactory factory) => null;
+        public override IEnumerable<CombinedDependencyListEntry> SearchDynamicDependencies(List<DependencyNodeCore<NodeFactory>> markedNodes, int firstNode, NodeFactory context) => null;
+    }
+}
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutVertexNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutVertexNode.cs
index 1248906a4fba..60c6158de41a 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutVertexNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NativeLayoutVertexNode.cs
@@ -909,6 +909,15 @@ public override IEnumerable<DependencyListEntry> GetStaticDependencies(NodeFacto
                 }
             }
 
+            foreach (GenericParameterDesc genericParam in _method.GetTypicalMethodDefinition().Instantiation)
+            {
+                foreach (TypeDesc typeConstraint in genericParam.TypeConstraints)
+                {
+                    if (typeConstraint.IsInterface)
+                        yield return new DependencyListEntry(context.InterfaceUse(typeConstraint.GetTypeDefinition()), "Used as constraint");
+                }
+            }
+
             yield return new DependencyListEntry(context.GenericDictionaryLayout(_method), "Dictionary layout");
         }
 
@@ -1026,6 +1035,15 @@ public override IEnumerable<DependencyListEntry> GetStaticDependencies(NodeFacto
                 yield return new DependencyListEntry(context.MethodEntrypoint(_type.GetStaticConstructor().GetCanonMethodTarget(CanonicalFormKind.Specific)), "cctor for template");
             }
 
+            foreach (GenericParameterDesc genericParam in _type.GetTypeDefinition().Instantiation)
+            {
+                foreach (TypeDesc typeConstraint in genericParam.TypeConstraints)
+                {
+                    if (typeConstraint.IsInterface)
+                        yield return new DependencyListEntry(context.InterfaceUse(typeConstraint.GetTypeDefinition()), "Used as constraint");
+                }
+            }
+
             if (!_isUniversalCanon)
             {
                 DefType closestCanonDefType = (DefType)_type.GetClosestDefType().ConvertToCanonForm(CanonicalFormKind.Specific);
@@ -1387,7 +1405,7 @@ private static void ProcessVTableEntriesForCallingConventionSignatureGeneration(
                         break;
 
                     case VTableEntriesToProcess.AllOnTypesThatShouldProduceFullVTables:
-                        if (factory.VTable(declType).HasFixedSlots)
+                        if (factory.VTable(declType).HasKnownVirtualMethodUse)
                         {
                             vtableEntriesToProcess = factory.VTable(declType).Slots;
                         }
@@ -1398,7 +1416,7 @@ private static void ProcessVTableEntriesForCallingConventionSignatureGeneration(
                         break;
 
                     case VTableEntriesToProcess.AllOnTypesThatProducePartialVTables:
-                        if (factory.VTable(declType).HasFixedSlots)
+                        if (factory.VTable(declType).HasKnownVirtualMethodUse)
                         {
                             vtableEntriesToProcess = Array.Empty<MethodDesc>();
                         }
@@ -1621,7 +1639,7 @@ public sealed override IEnumerable<DependencyListEntry> GetStaticDependencies(No
             if (method.IsRuntimeDeterminedExactMethod)
                 method = method.GetCanonMethodTarget(CanonicalFormKind.Specific);
 
-            if (!factory.VTable(method.OwningType).HasFixedSlots)
+            if (!factory.VTable(method.OwningType).HasKnownVirtualMethodUse)
             {
                 yield return new DependencyListEntry(factory.VirtualMethodUse(method), "Slot number");
             }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NecessaryCanonicalEETypeNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NecessaryCanonicalEETypeNode.cs
index b3a020e7d67e..1746741ecdcc 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NecessaryCanonicalEETypeNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NecessaryCanonicalEETypeNode.cs
@@ -18,13 +18,17 @@ public NecessaryCanonicalEETypeNode(NodeFactory factory, TypeDesc type) : base(f
             Debug.Assert(!type.IsCanonicalDefinitionType(CanonicalFormKind.Any));
             Debug.Assert(type.IsCanonicalSubtype(CanonicalFormKind.Any));
             Debug.Assert(type == type.ConvertToCanonForm(CanonicalFormKind.Specific));
-            Debug.Assert(!type.IsMdArray || factory.Target.Abi == TargetAbi.CppCodegen);
+            Debug.Assert(!type.IsMdArray);
         }
 
         protected override void OutputInterfaceMap(NodeFactory factory, ref ObjectDataBuilder objData)
         {
-            for (int i = 0; i < _type.RuntimeInterfaces.Length; i++)
+            foreach (DefType intface in _type.RuntimeInterfaces)
             {
+                // If the interface was optimized away, skip it
+                if (!factory.InterfaceUse(intface.GetTypeDefinition()).Marked)
+                    continue;
+
                 // Interface omitted for canonical instantiations (constructed at runtime for dynamic types from the native layout info)
                 objData.EmitZeroPointer();
             }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs
index 0028ab9b188e..4f7b48f33030 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs
@@ -361,11 +361,16 @@ private void CreateNodeCaches()
             {
                 // We don't need to track virtual method uses for types that have a vtable with a known layout.
                 // It's a waste of CPU time and memory.
-                Debug.Assert(!VTable(method.OwningType).HasFixedSlots);
+                Debug.Assert(method.OwningType.IsGenericDefinition || !VTable(method.OwningType).HasKnownVirtualMethodUse);
 
                 return new VariantInterfaceMethodUseNode(method);
             });
 
+            _interfaceUses = new NodeCache<TypeDesc, InterfaceUseNode>((TypeDesc type) =>
+            {
+                return new InterfaceUseNode(type);
+            });
+
             _readyToRunHelpers = new NodeCache<ReadyToRunHelperKey, ISymbolNode>(CreateReadyToRunHelperNode);
 
             _genericReadyToRunHelpersFromDict = new NodeCache<ReadyToRunGenericHelperKey, ISymbolNode>(CreateGenericLookupFromDictionaryNode);
@@ -821,6 +826,12 @@ public ISortableSymbolNode ExternSymbol(string name)
             return _externSymbols.GetOrAdd(name);
         }
 
+        public ISortableSymbolNode ExternVariable(string name)
+        {
+            string mangledName = NameMangler.NodeMangler.ExternVariable(name);
+            return _externSymbols.GetOrAdd(mangledName);
+        }
+
         private NodeCache<string, ExternSymbolNode> _externIndirectSymbols;
 
         public ISortableSymbolNode ExternIndirectSymbol(string name)
@@ -1148,7 +1159,7 @@ protected override VirtualMethodUseNode CreateValueFromKey(MethodDesc key)
             {
                 // We don't need to track virtual method uses for types that have a vtable with a known layout.
                 // It's a waste of CPU time and memory.
-                Debug.Assert(!_factory.VTable(key.OwningType).HasFixedSlots);
+                Debug.Assert(!_factory.VTable(key.OwningType).HasKnownVirtualMethodUse);
                 return new VirtualMethodUseNode(key);
             }
             protected override int GetKeyHashCode(MethodDesc key) => key.GetHashCode();
@@ -1169,6 +1180,13 @@ public DependencyNodeCore<NodeFactory> VariantInterfaceMethodUse(MethodDesc decl
             return _variantMethods.GetOrAdd(decl);
         }
 
+        private NodeCache<TypeDesc, InterfaceUseNode> _interfaceUses;
+
+        public DependencyNodeCore<NodeFactory> InterfaceUse(TypeDesc type)
+        {
+            return _interfaceUses.GetOrAdd(type);
+        }
+
         private NodeCache<ReadyToRunHelperKey, ISymbolNode> _readyToRunHelpers;
 
         public ISymbolNode ReadyToRunHelper(ReadyToRunHelperId id, object target)
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/PInvokeMethodFixupNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/PInvokeMethodFixupNode.cs
index 5a477f9fd84f..866c5d0341bf 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/PInvokeMethodFixupNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/PInvokeMethodFixupNode.cs
@@ -77,25 +77,33 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
             // Module fixup cell
             builder.EmitPointerReloc(factory.PInvokeModuleFixup(_pInvokeMethodData.ModuleData));
 
-            int flags = 0;
+            uint flags = 0;
 
-            int charsetFlags = (int)_pInvokeMethodData.CharSetMangling;
+            uint charsetFlags = (uint)_pInvokeMethodData.CharSetMangling;
             Debug.Assert((charsetFlags & MethodFixupCellFlagsConstants.CharSetMask) == charsetFlags);
             charsetFlags &= MethodFixupCellFlagsConstants.CharSetMask;
             flags |= charsetFlags;
 
-            int? objcFunction = MarshalHelpers.GetObjectiveCMessageSendFunction(factory.Target, _pInvokeMethodData.ModuleData.ModuleName, _pInvokeMethodData.EntryPointName);
+            uint? objcFunction = MarshalHelpers.GetObjectiveCMessageSendFunction(factory.Target, _pInvokeMethodData.ModuleData.ModuleName, _pInvokeMethodData.EntryPointName);
             if (objcFunction.HasValue)
             {
                 flags |= MethodFixupCellFlagsConstants.IsObjectiveCMessageSendMask;
 
-                int objcFunctionFlags = objcFunction.Value << MethodFixupCellFlagsConstants.ObjectiveCMessageSendFunctionShift;
+                uint objcFunctionFlags = objcFunction.Value << MethodFixupCellFlagsConstants.ObjectiveCMessageSendFunctionShift;
                 Debug.Assert((objcFunctionFlags & MethodFixupCellFlagsConstants.ObjectiveCMessageSendFunctionMask) == objcFunctionFlags);
                 objcFunctionFlags &= MethodFixupCellFlagsConstants.ObjectiveCMessageSendFunctionMask;
                 flags |= objcFunctionFlags;
             }
+            else if (factory.Target.IsWindows && factory.Target.Architecture == TargetArchitecture.X86)
+            {
+                if (_pInvokeMethodData.SignatureBytes >= 0)
+                {
+                    flags |= MethodFixupCellFlagsConstants.IsStdcall;
+                    flags |= ((uint)_pInvokeMethodData.SignatureBytes << 16);
+                }
+            }
 
-            builder.EmitInt(flags);
+            builder.EmitUInt(flags);
 
             return builder.ToObjectData();
         }
@@ -113,16 +121,19 @@ public override int CompareToImpl(ISortableNode other, CompilerComparer comparer
         public readonly PInvokeModuleData ModuleData;
         public readonly string EntryPointName;
         public readonly CharSet CharSetMangling;
+        public readonly int SignatureBytes;
 
         public PInvokeMethodData(PInvokeLazyFixupField pInvokeLazyFixupField)
         {
             PInvokeMetadata metadata = pInvokeLazyFixupField.PInvokeMetadata;
             ModuleDesc declaringModule = ((MetadataType)pInvokeLazyFixupField.TargetMethod.OwningType).Module;
+            TargetDetails target = declaringModule.Context.Target;
+            EcmaMethod method = pInvokeLazyFixupField.TargetMethod as EcmaMethod;
 
             CustomAttributeValue<TypeDesc>? decodedAttr = null;
 
             // Look for DefaultDllImportSearchPath on the method
-            if (pInvokeLazyFixupField.TargetMethod is EcmaMethod method)
+            if (method is not null)
             {
                 decodedAttr = method.GetDecodedCustomAttribute("System.Runtime.InteropServices", "DefaultDllImportSearchPathsAttribute");
             }
@@ -153,7 +164,7 @@ public PInvokeMethodData(PInvokeLazyFixupField pInvokeLazyFixupField)
             EntryPointName = metadata.Name;
 
             CharSet charSetMangling = default;
-            if (declaringModule.Context.Target.IsWindows && !metadata.Flags.ExactSpelling)
+            if (target.IsWindows && !metadata.Flags.ExactSpelling)
             {
                 // Mirror CharSet normalization from Marshaller.CreateMarshaller
                 bool isAnsi = metadata.Flags.CharSet switch
@@ -167,13 +178,26 @@ public PInvokeMethodData(PInvokeLazyFixupField pInvokeLazyFixupField)
                 charSetMangling = isAnsi ? CharSet.Ansi : CharSet.Unicode;
             }
             CharSetMangling = charSetMangling;
+
+            int signatureBytes = -1;
+            if (target.IsWindows && target.Architecture == TargetArchitecture.X86 && method is not null &&
+                (method.GetPInvokeMethodCallingConventions() & UnmanagedCallingConventions.CallingConventionMask) == UnmanagedCallingConventions.Stdcall)
+            {
+                signatureBytes = 0;
+                foreach (var p in pInvokeLazyFixupField.NativeSignature)
+                {
+                    signatureBytes += AlignmentHelper.AlignUp(p.GetElementSize().AsInt, target.PointerSize);
+                }
+            }
+            SignatureBytes = signatureBytes;
         }
 
         public bool Equals(PInvokeMethodData other)
         {
             return ModuleData.Equals(other.ModuleData) &&
                 EntryPointName == other.EntryPointName &&
-                CharSetMangling == other.CharSetMangling;
+                CharSetMangling == other.CharSetMangling &&
+                SignatureBytes == other.SignatureBytes;
         }
 
         public override bool Equals(object obj)
@@ -196,7 +220,11 @@ public int CompareTo(PInvokeMethodData other, CompilerComparer comparer)
             if (moduleCompare != 0)
                 return moduleCompare;
 
-            return CharSetMangling.CompareTo(other.CharSetMangling);
+            var charsetCompare = CharSetMangling.CompareTo(other.CharSetMangling);
+            if (charsetCompare != 0)
+                return charsetCompare;
+
+            return SignatureBytes.CompareTo(other.SignatureBytes);
         }
 
         public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
@@ -209,6 +237,10 @@ public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
                 sb.Append("__");
                 sb.Append(CharSetMangling.ToString());
             }
+            if (SignatureBytes >= 0)
+            {
+                sb.Append($"@{SignatureBytes}");
+            }
         }
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReadyToRunGenericHelperNode.cs
index 90d0c1b9e540..8ba2e0876318 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReadyToRunGenericHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReadyToRunGenericHelperNode.cs
@@ -39,6 +39,9 @@ public bool HandlesInvalidEntries(NodeFactory factory)
 
         public ReadyToRunGenericHelperNode(NodeFactory factory, ReadyToRunHelperId helperId, object target, TypeSystemEntity dictionaryOwner)
         {
+            Debug.Assert(
+                (dictionaryOwner is TypeDesc type && type.HasInstantiation)
+                || (dictionaryOwner is MethodDesc method && method.HasInstantiation));
             _id = helperId;
             _dictionaryOwner = dictionaryOwner;
             _target = target;
@@ -151,7 +154,7 @@ public IEnumerable<DependencyListEntry> InstantiateDependencies(NodeFactory fact
                         if (createInfo.NeedsVirtualMethodUseTracking)
                         {
                             MethodDesc instantiatedTargetMethod = createInfo.TargetMethod.GetNonRuntimeDeterminedMethodFromRuntimeDeterminedMethodViaSubstitution(typeInstantiation, methodInstantiation);
-                            if (!factory.VTable(instantiatedTargetMethod.OwningType).HasFixedSlots)
+                            if (!factory.VTable(instantiatedTargetMethod.OwningType).HasKnownVirtualMethodUse)
                             {
                                 result.Add(
                                     new DependencyListEntry(
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReadyToRunHelperNode.cs
index f3a4410dcbab..ab9ccafa2db9 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReadyToRunHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReadyToRunHelperNode.cs
@@ -43,7 +43,7 @@ public enum ReadyToRunHelperId
         ConstrainedDirectCall,
     }
 
-    public partial class ReadyToRunHelperNode : AssemblyStubNode, INodeWithDebugInfo
+    public partial class ReadyToRunHelperNode : AssemblyStubNode
     {
         private readonly ReadyToRunHelperId _id;
         private readonly object _target;
@@ -64,7 +64,6 @@ public ReadyToRunHelperNode(ReadyToRunHelperId id, object target)
                         defType.ComputeStaticFieldLayout(StaticLayoutKind.StaticRegionSizesAndFields);
                     }
                     break;
-                case ReadyToRunHelperId.VirtualCall:
                 case ReadyToRunHelperId.ResolveVirtualFunction:
                     {
                         // Make sure we aren't trying to callvirt Object.Finalize
@@ -92,9 +91,6 @@ public override void AppendMangledName(NameMangler nameMangler, Utf8StringBuilde
         {
             switch (_id)
             {
-                case ReadyToRunHelperId.VirtualCall:
-                    sb.Append("__VirtualCall_").Append(nameMangler.GetMangledMethodName((MethodDesc)_target));
-                    break;
                 case ReadyToRunHelperId.GetNonGCStaticBase:
                     sb.Append("__GetNonGCStaticBase_").Append(nameMangler.GetMangledTypeName((TypeDesc)_target));
                     break;
@@ -122,7 +118,7 @@ public override void AppendMangledName(NameMangler nameMangler, Utf8StringBuilde
 
         protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFactory factory)
         {
-            if (_id == ReadyToRunHelperId.VirtualCall || _id == ReadyToRunHelperId.ResolveVirtualFunction)
+            if (_id == ReadyToRunHelperId.ResolveVirtualFunction)
             {
                 var targetMethod = (MethodDesc)_target;
 
@@ -131,7 +127,7 @@ protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFact
 #if !SUPPORT_JIT
                 factory.MetadataManager.GetDependenciesDueToVirtualMethodReflectability(ref dependencyList, factory, targetMethod);
 
-                if (!factory.VTable(targetMethod.OwningType).HasFixedSlots)
+                if (!factory.VTable(targetMethod.OwningType).HasKnownVirtualMethodUse)
 
                 {
                     dependencyList.Add(factory.VirtualMethodUse((MethodDesc)_target), "ReadyToRun Virtual Method Call");
@@ -152,7 +148,7 @@ protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFact
 #if !SUPPORT_JIT
                     factory.MetadataManager.GetDependenciesDueToVirtualMethodReflectability(ref dependencyList, factory, targetMethod);
 
-                    if (!factory.VTable(info.TargetMethod.OwningType).HasFixedSlots)
+                    if (!factory.VTable(info.TargetMethod.OwningType).HasKnownVirtualMethodUse)
                     {
                         dependencyList ??= new DependencyList();
                         dependencyList.Add(factory.VirtualMethodUse(info.TargetMethod), "ReadyToRun Delegate to virtual method");
@@ -176,39 +172,6 @@ public override IEnumerable<CombinedDependencyListEntry> GetConditionalStaticDep
             return dependencyList;
         }
 
-        IEnumerable<NativeSequencePoint> INodeWithDebugInfo.GetNativeSequencePoints()
-        {
-            if (_id == ReadyToRunHelperId.VirtualCall)
-            {
-                // Generate debug information that lets debuggers step into the virtual calls.
-                // We generate a step into sequence point at the point where the helper jumps to
-                // the target of the virtual call.
-                TargetDetails target = ((MethodDesc)_target).Context.Target;
-                int debuggerStepInOffset = -1;
-                switch (target.Architecture)
-                {
-                    case TargetArchitecture.X64:
-                        debuggerStepInOffset = 3;
-                        break;
-                }
-                if (debuggerStepInOffset != -1)
-                {
-                    return new NativeSequencePoint[]
-                    {
-                        new NativeSequencePoint(0, string.Empty, WellKnownLineNumber.DebuggerStepThrough),
-                        new NativeSequencePoint(debuggerStepInOffset, string.Empty, WellKnownLineNumber.DebuggerStepIn)
-                    };
-                }
-            }
-
-            return Array.Empty<NativeSequencePoint>();
-        }
-
-        IEnumerable<DebugVarInfoMetadata> INodeWithDebugInfo.GetDebugVars()
-        {
-            return Array.Empty<DebugVarInfoMetadata>();
-        }
-
 #if !SUPPORT_JIT
         public override int ClassCode => -911637948;
 
@@ -224,7 +187,6 @@ public override int CompareToImpl(ISortableNode other, CompilerComparer comparer
                 case ReadyToRunHelperId.GetGCStaticBase:
                 case ReadyToRunHelperId.GetThreadStaticBase:
                     return comparer.Compare((TypeDesc)_target, (TypeDesc)((ReadyToRunHelperNode)other)._target);
-                case ReadyToRunHelperId.VirtualCall:
                 case ReadyToRunHelperId.ResolveVirtualFunction:
                     return comparer.Compare((MethodDesc)_target, (MethodDesc)((ReadyToRunHelperNode)other)._target);
                 case ReadyToRunHelperId.DelegateCtor:
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectedMethodNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectedMethodNode.cs
index 79c291cdcf33..6fae955a5e5c 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectedMethodNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectedMethodNode.cs
@@ -68,7 +68,7 @@ public override IEnumerable<DependencyListEntry> GetStaticDependencies(NodeFacto
                     }
                     else
                     {
-                        if (ReflectionVirtualInvokeMapNode.NeedsVirtualInvokeInfo(factory, slotDefiningMethod) && !factory.VTable(slotDefiningMethod.OwningType).HasFixedSlots)
+                        if (ReflectionVirtualInvokeMapNode.NeedsVirtualInvokeInfo(factory, slotDefiningMethod) && !factory.VTable(slotDefiningMethod.OwningType).HasKnownVirtualMethodUse)
                             dependencies.Add(factory.VirtualMethodUse(slotDefiningMethod), "Virtually callable reflectable method");
                     }
                 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionFieldMapNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionFieldMapNode.cs
index 8ea831f1efdb..c657327d5478 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionFieldMapNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionFieldMapNode.cs
@@ -59,10 +59,6 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                 if (field.IsLiteral || field.HasRva)
                     continue;
 
-                // CppCodegen: implement thread statics
-                if (factory.Target.Abi == TargetAbi.CppCodegen && field.IsThreadStatic)
-                    continue;
-
                 FieldTableFlags flags;
                 if (field.IsStatic)
                 {
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionVirtualInvokeMapNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionVirtualInvokeMapNode.cs
index b2f8e973c167..9c49be270bdd 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionVirtualInvokeMapNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ReflectionVirtualInvokeMapNode.cs
@@ -95,7 +95,7 @@ public static void GetVirtualInvokeMapDependencies(ref DependencyList dependenci
                 if (!method.HasInstantiation)
                 {
                     MethodDesc slotDefiningMethod = MetadataVirtualMethodAlgorithm.FindSlotDefiningMethodForVirtualMethod(method);
-                    if (!factory.VTable(slotDefiningMethod.OwningType).HasFixedSlots)
+                    if (!factory.VTable(slotDefiningMethod.OwningType).HasKnownVirtualMethodUse)
                     {
                         dependencies.Add(factory.VirtualMethodUse(slotDefiningMethod), "Reflection virtual invoke method");
                     }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/RuntimeImportMethodNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/RuntimeImportMethodNode.cs
index 014e501d18c3..6ccf0d25d43a 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/RuntimeImportMethodNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/RuntimeImportMethodNode.cs
@@ -13,8 +13,8 @@ public class RuntimeImportMethodNode : ExternSymbolNode, IMethodNode, ISymbolDef
     {
         private MethodDesc _method;
 
-        public RuntimeImportMethodNode(MethodDesc method)
-            : base(((EcmaMethod)method).GetRuntimeImportName())
+        public RuntimeImportMethodNode(MethodDesc method, NameMangler nameMangler)
+            : base(nameMangler.NodeMangler.ExternMethod(((EcmaMethod)method).GetRuntimeImportName(), method))
         {
             _method = method;
         }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ScannedMethodNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ScannedMethodNode.cs
index b4391164b99d..c72491f13072 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ScannedMethodNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/ScannedMethodNode.cs
@@ -82,7 +82,7 @@ public override IEnumerable<CombinedDependencyListEntry> GetConditionalStaticDep
         }
 
         public override IEnumerable<CombinedDependencyListEntry> SearchDynamicDependencies(List<DependencyNodeCore<NodeFactory>> markedNodes, int firstNode, NodeFactory factory) => null;
-        public override bool InterestingForDynamicDependencyAnalysis => false;
+        public override bool InterestingForDynamicDependencyAnalysis => _method.HasInstantiation || _method.OwningType.HasInstantiation;
         public override bool HasDynamicDependencies => false;
         public override bool HasConditionalStaticDependencies => CodeBasedDependencyAlgorithm.HasConditionalDependenciesDueToMethodCodePresence(_method);
 
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/SealedVTableNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/SealedVTableNode.cs
index 85c9e40eec05..69f02c2406cf 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/SealedVTableNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/SealedVTableNode.cs
@@ -14,6 +14,7 @@ public class SealedVTableNode : ObjectNode, ISymbolDefinitionNode
     {
         private readonly TypeDesc _type;
         private List<SealedVTableEntry> _sealedVTableEntries;
+        private DependencyList _nonRelocationDependencies;
 
         public SealedVTableNode(TypeDesc type)
         {
@@ -100,10 +101,11 @@ public bool BuildSealedVTableSlots(NodeFactory factory, bool relocsOnly)
                 return true;
 
             DefType declType = _type.GetClosestDefType();
+            VTableSliceNode declTypeVTable = factory.VTable(declType);
 
             // It's only okay to touch the actual list of slots if we're in the final emission phase
             // or the vtable is not built lazily.
-            if (relocsOnly && !factory.VTable(declType).HasFixedSlots)
+            if (relocsOnly && !declTypeVTable.HasKnownVirtualMethodUse)
                 return false;
 
             _sealedVTableEntries = new List<SealedVTableEntry>();
@@ -116,17 +118,23 @@ public bool BuildSealedVTableSlots(NodeFactory factory, bool relocsOnly)
             bool needsEntriesForInstanceInterfaceMethodImpls = !isInterface
                     || ((MetadataType)declType).IsDynamicInterfaceCastableImplementation();
 
-            IReadOnlyList<MethodDesc> virtualSlots = factory.VTable(declType).Slots;
+            IReadOnlyList<MethodDesc> virtualSlots = declTypeVTable.Slots;
 
             for (int i = 0; i < virtualSlots.Count; i++)
             {
+                if (!declTypeVTable.IsSlotUsed(virtualSlots[i]))
+                    continue;
+
                 if (!virtualSlots[i].Signature.IsStatic && !needsEntriesForInstanceInterfaceMethodImpls)
                     continue;
 
                 MethodDesc implMethod = declType.FindVirtualFunctionTargetMethodOnObjectType(virtualSlots[i]);
 
                 if (implMethod.CanMethodBeInSealedVTable(factory))
-                    _sealedVTableEntries.Add(SealedVTableEntry.FromVirtualMethod(implMethod));
+                {
+                    IMethodNode node = factory.MethodEntrypoint(implMethod.GetCanonMethodTarget(CanonicalFormKind.Specific), unboxingStub: !implMethod.Signature.IsStatic && declType.IsValueType);
+                    _sealedVTableEntries.Add(SealedVTableEntry.FromVirtualMethod(implMethod, node));
+                }
             }
 
             TypeDesc declTypeDefinition = declType.GetTypeDefinition();
@@ -142,12 +150,16 @@ public bool BuildSealedVTableSlots(NodeFactory factory, bool relocsOnly)
                 var interfaceType = declTypeRuntimeInterfaces[interfaceIndex];
                 var definitionInterfaceType = declTypeDefinitionRuntimeInterfaces[interfaceIndex];
 
-                virtualSlots = factory.VTable(interfaceType).Slots;
+                VTableSliceNode interfaceVTable = factory.VTable(interfaceType);
+                virtualSlots = interfaceVTable.Slots;
 
                 for (int interfaceMethodSlot = 0; interfaceMethodSlot < virtualSlots.Count; interfaceMethodSlot++)
                 {
                     MethodDesc declMethod = virtualSlots[interfaceMethodSlot];
 
+                    if (!interfaceVTable.IsSlotUsed(declMethod))
+                        continue;
+
                     if (!declMethod.Signature.IsStatic && !needsEntriesForInstanceInterfaceMethodImpls)
                         continue;
 
@@ -173,7 +185,10 @@ public bool BuildSealedVTableSlots(NodeFactory factory, bool relocsOnly)
                                 targetMethod = factory.TypeSystemContext.GetMethodForInstantiatedType(implMethod.GetTypicalMethodDefinition(), (InstantiatedType)implType);
 
                             if (targetMethod.CanMethodBeInSealedVTable(factory) || implMethod.Signature.IsStatic)
-                                _sealedVTableEntries.Add(SealedVTableEntry.FromVirtualMethod(targetMethod));
+                            {
+                                IMethodNode node = factory.MethodEntrypoint(targetMethod.GetCanonMethodTarget(CanonicalFormKind.Specific), unboxingStub: !targetMethod.Signature.IsStatic && declType.IsValueType);
+                                _sealedVTableEntries.Add(SealedVTableEntry.FromVirtualMethod(targetMethod, node));
+                            }
                         }
                     }
                     else
@@ -185,7 +200,24 @@ public bool BuildSealedVTableSlots(NodeFactory factory, bool relocsOnly)
                         {
                             DefType providingInterfaceDefinitionType = (DefType)implMethod.OwningType;
                             implMethod = implMethod.InstantiateSignature(declType.Instantiation, Instantiation.Empty);
-                            _sealedVTableEntries.Add(SealedVTableEntry.FromDefaultInterfaceMethod(implMethod, providingInterfaceDefinitionType));
+
+                            MethodDesc canonImplMethod = implMethod.GetCanonMethodTarget(CanonicalFormKind.Specific);
+                            if (canonImplMethod.IsCanonicalMethod(CanonicalFormKind.Any) && !canonImplMethod.Signature.IsStatic)
+                            {
+                                // Canonical instance default interface methods need to go through a thunk that acquires the generic context from `this`.
+                                // Static methods have their generic context passed explicitly.
+                                canonImplMethod = factory.TypeSystemContext.GetDefaultInterfaceMethodImplementationThunk(canonImplMethod, declType.ConvertToCanonForm(CanonicalFormKind.Specific), providingInterfaceDefinitionType);
+
+                                // The above thunk will index into interface list to find the right context. Make sure to keep all interfaces prior to this one
+                                for (int i = 0; i < interfaceIndex; i++)
+                                {
+                                    _nonRelocationDependencies ??= new DependencyList();
+                                    _nonRelocationDependencies.Add(factory.InterfaceUse(declTypeRuntimeInterfaces[i].GetTypeDefinition()), "Interface with shared default methods folows this");
+                                }
+                            }
+                            IMethodNode node = factory.MethodEntrypoint(canonImplMethod, unboxingStub: implMethod.OwningType.IsValueType && !implMethod.Signature.IsStatic);
+
+                            _sealedVTableEntries.Add(SealedVTableEntry.FromDefaultInterfaceMethod(implMethod, providingInterfaceDefinitionType, node));
                         }
                     }
                 }
@@ -196,7 +228,9 @@ public bool BuildSealedVTableSlots(NodeFactory factory, bool relocsOnly)
 
         protected override DependencyList ComputeNonRelocationBasedDependencies(NodeFactory factory)
         {
-            var result = new DependencyList();
+            BuildSealedVTableSlots(factory, relocsOnly: true);
+
+            var result = new DependencyList(_nonRelocationDependencies ?? []);
 
             // When building the sealed vtable, we consult the vtable layout of these types
             TypeDesc declType = _type.GetClosestDefType();
@@ -216,11 +250,9 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly)
 
             if (BuildSealedVTableSlots(factory, relocsOnly))
             {
-                DefType defType = _type.GetClosestDefType();
-
                 for (int i = 0; i < _sealedVTableEntries.Count; i++)
                 {
-                    IMethodNode relocTarget = _sealedVTableEntries[i].GetTarget(factory, defType);
+                    IMethodNode relocTarget = _sealedVTableEntries[i].Target;
 
                     if (factory.Target.SupportsRelativePointers)
                         objData.EmitReloc(relocTarget, RelocType.IMAGE_REL_BASED_RELPTR32);
@@ -242,31 +274,19 @@ private readonly struct SealedVTableEntry
         {
             private readonly MethodDesc _method;
             private readonly DefType _interfaceDefinition;
+            public readonly IMethodNode Target;
 
-            private SealedVTableEntry(MethodDesc method, DefType interfaceDefinition)
+            private SealedVTableEntry(MethodDesc method, DefType interfaceDefinition, IMethodNode target)
             {
                 Debug.Assert(interfaceDefinition == null || method.GetTypicalMethodDefinition().OwningType == interfaceDefinition.GetTypeDefinition());
-                (_method, _interfaceDefinition) = (method, interfaceDefinition);
+                (_method, _interfaceDefinition, Target) = (method, interfaceDefinition, target);
             }
 
-            public static SealedVTableEntry FromVirtualMethod(MethodDesc method)
-                => new SealedVTableEntry(method, null);
+            public static SealedVTableEntry FromVirtualMethod(MethodDesc method, IMethodNode target)
+                => new SealedVTableEntry(method, null, target);
 
-            public static SealedVTableEntry FromDefaultInterfaceMethod(MethodDesc method, DefType interfaceOnDefinition)
-                => new SealedVTableEntry(method, interfaceOnDefinition);
-
-            public IMethodNode GetTarget(NodeFactory factory, TypeDesc implementingClass)
-            {
-                bool isStaticVirtualMethod = _method.Signature.IsStatic;
-                MethodDesc implMethod = _method.GetCanonMethodTarget(CanonicalFormKind.Specific);
-                if (_interfaceDefinition != null && !isStaticVirtualMethod && implMethod.IsCanonicalMethod(CanonicalFormKind.Any))
-                {
-                    // Canonical instance default interface methods need to go through a thunk that acquires the generic context from `this`.
-                    // Static methods have their generic context passed explicitly.
-                    implMethod = factory.TypeSystemContext.GetDefaultInterfaceMethodImplementationThunk(implMethod, implementingClass.ConvertToCanonForm(CanonicalFormKind.Specific), _interfaceDefinition);
-                }
-                return factory.MethodEntrypoint(implMethod, unboxingStub: !isStaticVirtualMethod && _method.OwningType.IsValueType);
-            }
+            public static SealedVTableEntry FromDefaultInterfaceMethod(MethodDesc method, DefType interfaceOnDefinition, IMethodNode target)
+                => new SealedVTableEntry(method, interfaceOnDefinition, target);
 
             public bool Matches(MethodDesc method)
             {
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunHelperNode.cs
index d6ecb47a38d6..b88fbd54e5b5 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunHelperNode.cs
@@ -17,29 +17,6 @@ protected override void EmitCode(NodeFactory factory, ref ARMEmitter encoder, bo
         {
             switch (Id)
             {
-                case ReadyToRunHelperId.VirtualCall:
-                    {
-                        MethodDesc targetMethod = (MethodDesc)Target;
-
-                        Debug.Assert(!targetMethod.OwningType.IsInterface);
-                        Debug.Assert(!targetMethod.CanMethodBeInSealedVTable(factory));
-
-                        int pointerSize = factory.Target.PointerSize;
-
-                        int slot = 0;
-                        if (!relocsOnly)
-                        {
-                            slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, targetMethod, targetMethod.OwningType);
-                            Debug.Assert(slot != -1);
-                        }
-
-                        encoder.EmitLDR(encoder.TargetRegister.InterproceduralScratch, encoder.TargetRegister.Arg0, 0);
-                        encoder.EmitLDR(encoder.TargetRegister.InterproceduralScratch, encoder.TargetRegister.InterproceduralScratch,
-                                        EETypeNode.GetVTableOffset(pointerSize) + (slot * pointerSize));
-                        encoder.EmitJMP(encoder.TargetRegister.InterproceduralScratch);
-                    }
-                    break;
-
                 case ReadyToRunHelperId.GetNonGCStaticBase:
                     {
                         MetadataType target = (MetadataType)Target;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs
index 6d94afdc3ee4..5ab16c1814ce 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs
@@ -18,29 +18,6 @@ protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder,
         {
             switch (Id)
             {
-                case ReadyToRunHelperId.VirtualCall:
-                    {
-                        MethodDesc targetMethod = (MethodDesc)Target;
-
-                        Debug.Assert(!targetMethod.OwningType.IsInterface);
-                        Debug.Assert(!targetMethod.CanMethodBeInSealedVTable(factory));
-
-                        int pointerSize = factory.Target.PointerSize;
-
-                        int slot = 0;
-                        if (!relocsOnly)
-                        {
-                            slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, targetMethod, targetMethod.OwningType);
-                            Debug.Assert(slot != -1);
-                        }
-
-                        encoder.EmitLDR(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.Arg0, 0);
-                        encoder.EmitLDR(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.IntraProcedureCallScratch1,
-                                        EETypeNode.GetVTableOffset(pointerSize) + (slot * pointerSize));
-                        encoder.EmitJMP(encoder.TargetRegister.IntraProcedureCallScratch1);
-                    }
-                    break;
-
                 case ReadyToRunHelperId.GetNonGCStaticBase:
                     {
                         MetadataType target = (MetadataType)Target;
@@ -74,26 +51,7 @@ protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder,
                         ISortableSymbolNode index = factory.TypeThreadStaticIndex(target);
                         if (index is TypeThreadStaticIndexNode ti && ti.IsInlined)
                         {
-                            if (!factory.PreinitializationManager.HasLazyStaticConstructor(target))
-                            {
-                                EmitInlineTLSAccess(factory, ref encoder);
-                            }
-                            else
-                            {
-                                // First arg: unused address of the TypeManager
-                                // encoder.EmitMOV(encoder.TargetRegister.Arg0, (ushort)0);
-
-                                // Second arg: ~0 (index of inlined storage)
-                                encoder.EmitMVN(encoder.TargetRegister.Arg1, 0);
-
-                                encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target));
-                                encoder.EmitSUB(encoder.TargetRegister.Arg2, NonGCStaticsNode.GetClassConstructorContextSize(factory.Target));
-                                encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2);
-                                encoder.EmitCMP(encoder.TargetRegister.Arg3, 0);
-
-                                encoder.EmitJNE(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase));
-                                EmitInlineTLSAccess(factory, ref encoder);
-                            }
+                            throw new NotImplementedException();
                         }
                         else
                         {
@@ -226,81 +184,5 @@ protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder,
                     throw new NotImplementedException();
             }
         }
-
-        // emits code that results in ThreadStaticBase referenced in X0.
-        // may trash volatile registers. (there are calls to the slow helper and possibly to the platform's TLS support)
-        private static void EmitInlineTLSAccess(NodeFactory factory, ref ARM64Emitter encoder)
-        {
-            ISymbolNode getInlinedThreadStaticBaseSlow = factory.HelperEntrypoint(HelperEntrypoint.GetInlinedThreadStaticBaseSlow);
-            ISymbolNode tlsRoot = factory.TlsRoot;
-            // IsSingleFileCompilation is not enough to guarantee that we can use "Initial Executable" optimizations.
-            // we need a special compiler flag analogous to /GA. Just assume "false" for now.
-            // bool isInitialExecutable = factory.CompilationModuleGroup.IsSingleFileCompilation;
-            bool isInitialExecutable = false;
-
-            if (factory.Target.OperatingSystem == TargetOS.Linux)
-            {
-                if (isInitialExecutable)
-                {
-                    // mrs  x0, tpidr_el0
-                    encoder.Builder.EmitUInt(0xd53bd040);
-
-                    // add  x0, x0, #:tprel_hi12:tlsRoot, lsl #12
-                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_HI12);
-                    encoder.Builder.EmitUInt(0x91400000);
-
-                    // add  x1, x0, #:tprel_lo12_nc:tlsRoot, lsl #0
-                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_AARCH64_TLSLE_ADD_TPREL_LO12_NC);
-                    encoder.Builder.EmitUInt(0x91000001);
-                }
-                else
-                {
-                    // stp     x29, x30, [sp, -16]!
-                    encoder.Builder.EmitUInt(0xa9bf7bfd);
-                    // mov     x29, sp
-                    encoder.Builder.EmitUInt(0x910003fd);
-
-                    // mrs     x1, tpidr_el0
-                    encoder.Builder.EmitUInt(0xd53bd041);
-
-                    // adrp    x0, :tlsdesc:tlsRoot
-                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21);
-                    encoder.Builder.EmitUInt(0x90000000);
-
-                    // ldr     x2, [x0, #:tlsdesc_lo12:tlsRoot]
-                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_AARCH64_TLSDESC_LD64_LO12);
-                    encoder.Builder.EmitUInt(0xf9400002);
-
-                    // add     x0, x0, :tlsdesc_lo12:tlsRoot
-                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_AARCH64_TLSDESC_ADD_LO12);
-                    encoder.Builder.EmitUInt(0x91000000);
-
-                    // blr     :tlsdesc_call:tlsRoot:x2
-                    encoder.Builder.EmitReloc(tlsRoot, RelocType.IMAGE_REL_AARCH64_TLSDESC_CALL);
-                    encoder.Builder.EmitUInt(0xd63f0040);
-
-                    // add     x1, x1, x0
-                    encoder.Builder.EmitUInt(0x8b000021);
-
-                    // ldp     x29, x30, [sp], 16
-                    encoder.Builder.EmitUInt(0xa8c17bfd);
-                }
-
-                encoder.EmitLDR(Register.X0, Register.X1);
-
-                // here we have:
-                // X1: addr, X0: storage
-                // if the storage is already allocated, just return, otherwise do slow path.
-
-                encoder.EmitCMP(Register.X0, 0);
-                encoder.EmitRETIfNotEqual();
-                encoder.EmitMOV(Register.X0, Register.X1);
-                encoder.EmitJMP(getInlinedThreadStaticBaseSlow);
-            }
-            else
-            {
-                throw new NotImplementedException();
-            }
-        }
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs
index 2b54f9ccdcc6..db4d1855e20f 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs
@@ -18,29 +18,6 @@ protected override void EmitCode(NodeFactory factory, ref LoongArch64Emitter enc
         {
             switch (Id)
             {
-                case ReadyToRunHelperId.VirtualCall:
-                    {
-                        MethodDesc targetMethod = (MethodDesc)Target;
-
-                        Debug.Assert(!targetMethod.OwningType.IsInterface);
-                        Debug.Assert(!targetMethod.CanMethodBeInSealedVTable(factory));
-
-                        int pointerSize = factory.Target.PointerSize;
-
-                        int slot = 0;
-                        if (!relocsOnly)
-                        {
-                            slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, targetMethod, targetMethod.OwningType);
-                            Debug.Assert(slot != -1);
-                        }
-
-                        encoder.EmitLD(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.Arg0, 0);
-                        encoder.EmitLD(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.IntraProcedureCallScratch1,
-                                        EETypeNode.GetVTableOffset(pointerSize) + (slot * pointerSize));
-                        encoder.EmitJMP(encoder.TargetRegister.IntraProcedureCallScratch1);
-                    }
-                    break;
-
                 case ReadyToRunHelperId.GetNonGCStaticBase:
                     {
                         MetadataType target = (MetadataType)Target;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs
index 1d430fb1f96f..620878463e3e 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs
@@ -18,29 +18,6 @@ protected override void EmitCode(NodeFactory factory, ref RiscV64Emitter encoder
         {
             switch (Id)
             {
-                case ReadyToRunHelperId.VirtualCall:
-                    {
-                        MethodDesc targetMethod = (MethodDesc)Target;
-
-                        Debug.Assert(!targetMethod.OwningType.IsInterface);
-                        Debug.Assert(!targetMethod.CanMethodBeInSealedVTable(factory));
-
-                        int pointerSize = factory.Target.PointerSize;
-
-                        int slot = 0;
-                        if (!relocsOnly)
-                        {
-                            slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, targetMethod, targetMethod.OwningType);
-                            Debug.Assert(slot != -1);
-                        }
-
-                        encoder.EmitLD(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.Arg0, 0);
-                        encoder.EmitLD(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.IntraProcedureCallScratch1,
-                                        EETypeNode.GetVTableOffset(pointerSize) + (slot * pointerSize));
-                        encoder.EmitJMP(encoder.TargetRegister.IntraProcedureCallScratch1);
-                    }
-                    break;
-
                 case ReadyToRunHelperId.GetNonGCStaticBase:
                     {
                         MetadataType target = (MetadataType)Target;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs
index 66265e9d71bb..4b1b2d247f44 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunHelperNode.cs
@@ -18,31 +18,6 @@ protected override void EmitCode(NodeFactory factory, ref X64Emitter encoder, bo
         {
             switch (Id)
             {
-                case ReadyToRunHelperId.VirtualCall:
-                    {
-                        MethodDesc targetMethod = (MethodDesc)Target;
-
-                        Debug.Assert(!targetMethod.OwningType.IsInterface);
-                        Debug.Assert(!targetMethod.CanMethodBeInSealedVTable(factory));
-
-                        AddrMode loadFromThisPtr = new AddrMode(encoder.TargetRegister.Arg0, null, 0, 0, AddrModeSize.Int64);
-                        encoder.EmitMOV(encoder.TargetRegister.Result, ref loadFromThisPtr);
-
-                        int pointerSize = factory.Target.PointerSize;
-
-                        int slot = 0;
-                        if (!relocsOnly)
-                        {
-                            slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, targetMethod, targetMethod.OwningType);
-                            Debug.Assert(slot != -1);
-                        }
-                        Debug.Assert(((NativeSequencePoint[])((INodeWithDebugInfo)this).GetNativeSequencePoints())[1].NativeOffset == encoder.Builder.CountBytes);
-
-                        AddrMode jmpAddrMode = new AddrMode(encoder.TargetRegister.Result, null, EETypeNode.GetVTableOffset(pointerSize) + (slot * pointerSize), 0, AddrModeSize.Int64);
-                        encoder.EmitJmpToAddrMode(ref jmpAddrMode);
-                    }
-                    break;
-
                 case ReadyToRunHelperId.GetNonGCStaticBase:
                     {
                         MetadataType target = (MetadataType)Target;
@@ -74,26 +49,7 @@ protected override void EmitCode(NodeFactory factory, ref X64Emitter encoder, bo
                         ISortableSymbolNode index = factory.TypeThreadStaticIndex(target);
                         if (index is TypeThreadStaticIndexNode ti && ti.IsInlined)
                         {
-                            if (!factory.PreinitializationManager.HasLazyStaticConstructor(target))
-                            {
-                                EmitInlineTLSAccess(factory, ref encoder);
-                            }
-                            else
-                            {
-                                // First arg: unused address of the TypeManager
-                                // encoder.EmitMOV(encoder.TargetRegister.Arg0, 0);
-
-                                // Second arg: -1 (index of inlined storage)
-                                encoder.EmitMOV(encoder.TargetRegister.Arg1, -1);
-
-                                encoder.EmitLEAQ(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target), -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target));
-
-                                AddrMode initialized = new AddrMode(encoder.TargetRegister.Arg2, null, 0, 0, AddrModeSize.Int64);
-                                encoder.EmitCMP(ref initialized, 0);
-                                encoder.EmitJNE(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase));
-
-                                EmitInlineTLSAccess(factory, ref encoder);
-                            }
+                            throw new NotImplementedException();
                         }
                         else
                         {
@@ -225,16 +181,5 @@ protected override void EmitCode(NodeFactory factory, ref X64Emitter encoder, bo
                     throw new NotImplementedException();
             }
         }
-
-        // emits code that results in ThreadStaticBase referenced in RAX.
-        // may trash volatile registers. (there are calls to the slow helper and possibly to platform's TLS support)
-        private static void EmitInlineTLSAccess(NodeFactory factory, ref X64Emitter encoder)
-        {
-            // For factory.Target.IsApplePlatform
-            // movq _\Var @TLVP(% rip), % rdi
-            // callq * (% rdi)
-
-            throw new NotImplementedException();
-        }
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X86/X86ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X86/X86ReadyToRunGenericHelperNode.cs
index 079a38cc7460..e61f9c5b088c 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X86/X86ReadyToRunGenericHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X86/X86ReadyToRunGenericHelperNode.cs
@@ -1,15 +1,254 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System;
+
 using ILCompiler.DependencyAnalysis.X86;
 
+using Internal.TypeSystem;
+
+using Debug = System.Diagnostics.Debug;
+
 namespace ILCompiler.DependencyAnalysis
 {
     public partial class ReadyToRunGenericHelperNode
     {
+        protected void EmitDictionaryLookup(NodeFactory factory, ref X86Emitter encoder, Register context, Register result, GenericLookupResult lookup, bool relocsOnly)
+        {
+            // INVARIANT: must not trash context register
+
+            // Find the generic dictionary slot
+            int dictionarySlot = 0;
+            if (!relocsOnly)
+            {
+                // The concrete slot won't be known until we're emitting data - don't ask for it in relocsOnly.
+                if (!factory.GenericDictionaryLayout(_dictionaryOwner).TryGetSlotForEntry(lookup, out dictionarySlot))
+                {
+                    encoder.EmitZeroReg(result);
+                    return;
+                }
+            }
+
+            // Load the generic dictionary cell
+            AddrMode loadEntry = new AddrMode(
+                context, null, dictionarySlot * factory.Target.PointerSize, 0, AddrModeSize.Int32);
+            encoder.EmitMOV(result, ref loadEntry);
+
+            // If there's any invalid entries, we need to test for them
+            //
+            // Skip this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries
+            // flag can change over the course of compilation and the bad slot helper dependency
+            // should be reported by someone else - the system should not rely on it coming from here.
+            if (!relocsOnly && _hasInvalidEntries)
+            {
+                AddrMode resultAddr = new AddrMode(Register.RegDirect | result, null, 0, 0, AddrModeSize.Int32);
+                encoder.EmitCMP(ref resultAddr, 0);
+                encoder.EmitJE(GetBadSlotHelper(factory));
+            }
+        }
+
         protected sealed override void EmitCode(NodeFactory factory, ref X86Emitter encoder, bool relocsOnly)
         {
-            encoder.EmitINT3();
+            // First load the generic context into the context register.
+            EmitLoadGenericContext(factory, ref encoder, relocsOnly);
+
+            switch (_id)
+            {
+                case ReadyToRunHelperId.GetNonGCStaticBase:
+                    {
+                        if (!TriggersLazyStaticConstructor(factory))
+                        {
+                            EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Result, _lookupSignature, relocsOnly);
+                            encoder.EmitRET();
+                        }
+                        else
+                        {
+                            EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg0, _lookupSignature, relocsOnly);
+                            encoder.EmitMOV(encoder.TargetRegister.Result, encoder.TargetRegister.Arg0);
+
+                            // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region.
+                            int cctorContextSize = NonGCStaticsNode.GetClassConstructorContextSize(factory.Target);
+                            AddrMode initialized = new AddrMode(encoder.TargetRegister.Arg0, null, -cctorContextSize, 0, AddrModeSize.Int32);
+                            encoder.EmitCMP(ref initialized, 0);
+                            encoder.EmitRETIfEqual();
+
+                            AddrMode loadCctor = new AddrMode(encoder.TargetRegister.Arg0, null, -cctorContextSize, 0, AddrModeSize.Int32);
+                            encoder.EmitLEA(encoder.TargetRegister.Arg0, ref loadCctor);
+                            encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result);
+                            encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnNonGCStaticBase));
+                        }
+                    }
+                    break;
+
+                case ReadyToRunHelperId.GetGCStaticBase:
+                    {
+                        MetadataType target = (MetadataType)_target;
+
+                        EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Result, _lookupSignature, relocsOnly);
+
+                        AddrMode loadFromResult = new AddrMode(encoder.TargetRegister.Result, null, 0, 0, AddrModeSize.Int32);
+                        encoder.EmitMOV(encoder.TargetRegister.Result, ref loadFromResult);
+
+                        if (!TriggersLazyStaticConstructor(factory))
+                        {
+                            encoder.EmitRET();
+                        }
+                        else
+                        {
+                            // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region.
+                            GenericLookupResult nonGcRegionLookup = factory.GenericLookup.TypeNonGCStaticBase(target);
+                            EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg0, nonGcRegionLookup, relocsOnly);
+
+                            int cctorContextSize = NonGCStaticsNode.GetClassConstructorContextSize(factory.Target);
+                            AddrMode initialized = new AddrMode(encoder.TargetRegister.Arg0, null, -cctorContextSize, 0, AddrModeSize.Int32);
+                            encoder.EmitCMP(ref initialized, 0);
+                            encoder.EmitRETIfEqual();
+
+                            encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result);
+                            AddrMode loadCctor = new AddrMode(encoder.TargetRegister.Arg0, null, -cctorContextSize, 0, AddrModeSize.Int32);
+                            encoder.EmitLEA(encoder.TargetRegister.Arg0, ref loadCctor);
+
+                            encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnGCStaticBase));
+                        }
+                    }
+                    break;
+
+                case ReadyToRunHelperId.GetThreadStaticBase:
+                    {
+                        MetadataType target = (MetadataType)_target;
+
+                        // Look up the index cell
+                        EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg1, _lookupSignature, relocsOnly);
+
+                        ISymbolNode helperEntrypoint;
+                        if (TriggersLazyStaticConstructor(factory))
+                        {
+                            // There is a lazy class constructor. We need the non-GC static base because that's where the
+                            // class constructor context lives.
+                            GenericLookupResult nonGcRegionLookup = factory.GenericLookup.TypeNonGCStaticBase(target);
+                            EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Result, nonGcRegionLookup, relocsOnly);
+                            int cctorContextSize = NonGCStaticsNode.GetClassConstructorContextSize(factory.Target);
+                            AddrMode loadCctor = new AddrMode(encoder.TargetRegister.Result, null, -cctorContextSize, 0, AddrModeSize.Int32);
+                            encoder.EmitLEA(encoder.TargetRegister.Result, ref loadCctor);
+
+                            AddrMode storeAtEspPlus4 = new AddrMode(Register.ESP, null, 4, 0, AddrModeSize.Int32);
+                            encoder.EmitStackDup();
+                            encoder.EmitMOV(ref storeAtEspPlus4, encoder.TargetRegister.Result);
+
+                            helperEntrypoint = factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase);
+                        }
+                        else
+                        {
+                            helperEntrypoint = factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType);
+                        }
+
+                        // First arg: address of the TypeManager slot that provides the helper with
+                        // information about module index and the type manager instance (which is used
+                        // for initialization on first access).
+                        AddrMode loadFromArg1 = new AddrMode(encoder.TargetRegister.Arg1, null, 0, 0, AddrModeSize.Int32);
+                        encoder.EmitMOV(encoder.TargetRegister.Arg0, ref loadFromArg1);
+
+                        // Second arg: index of the type in the ThreadStatic section of the modules
+                        AddrMode loadFromArg1AndDelta = new AddrMode(encoder.TargetRegister.Arg1, null, factory.Target.PointerSize, 0, AddrModeSize.Int32);
+                        encoder.EmitMOV(encoder.TargetRegister.Arg1, ref loadFromArg1AndDelta);
+
+                        encoder.EmitJMP(helperEntrypoint);
+                    }
+                    break;
+
+                case ReadyToRunHelperId.DelegateCtor:
+                    {
+                        // This is a weird helper. Codegen populated Arg0 and Arg1 with the values that the constructor
+                        // method expects. Codegen also passed us the generic context on stack.
+                        // We now need to load the delegate target method on the stack (using a dictionary lookup)
+                        // and the optional 4th parameter, and call the ctor.
+
+                        var target = (DelegateCreationInfo)_target;
+
+                        // EmitLoadGenericContext loaded the context from stack into Result
+                        EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Result, encoder.TargetRegister.Result, _lookupSignature, relocsOnly);
+
+                        AddrMode storeAtEspPlus4 = new AddrMode(Register.ESP, null, 4, 0, AddrModeSize.Int32);
+                        if (target.Thunk != null)
+                        {
+                            Debug.Assert(target.Constructor.Method.Signature.Length == 3);
+                            AddrMode storeAtEspPlus8 = new AddrMode(Register.ESP, null, 8, 0, AddrModeSize.Int32);
+                            encoder.EmitStackDup();
+                            encoder.EmitMOV(ref storeAtEspPlus8, encoder.TargetRegister.Result);
+                            encoder.EmitMOV(encoder.TargetRegister.Result, target.Thunk);
+                            encoder.EmitMOV(ref storeAtEspPlus4, encoder.TargetRegister.Result);
+                        }
+                        else
+                        {
+                            Debug.Assert(target.Constructor.Method.Signature.Length == 2);
+                            encoder.EmitMOV(ref storeAtEspPlus4, encoder.TargetRegister.Result);
+                        }
+
+                        encoder.EmitJMP(target.Constructor);
+                    }
+                    break;
+
+                // These are all simple: just get the thing from the dictionary and we're done
+                case ReadyToRunHelperId.TypeHandle:
+                case ReadyToRunHelperId.MethodHandle:
+                case ReadyToRunHelperId.FieldHandle:
+                case ReadyToRunHelperId.MethodDictionary:
+                case ReadyToRunHelperId.MethodEntry:
+                case ReadyToRunHelperId.VirtualDispatchCell:
+                case ReadyToRunHelperId.DefaultConstructor:
+                case ReadyToRunHelperId.ObjectAllocator:
+                case ReadyToRunHelperId.TypeHandleForCasting:
+                case ReadyToRunHelperId.ConstrainedDirectCall:
+                    {
+                        EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Result, _lookupSignature, relocsOnly);
+                        encoder.EmitRET();
+                    }
+                    break;
+                default:
+                    throw new NotImplementedException();
+            }
+        }
+
+        protected virtual void EmitLoadGenericContext(NodeFactory factory, ref X86Emitter encoder, bool relocsOnly)
+        {
+            // Assume generic context is already loaded in the context register.
+            if (Id == ReadyToRunHelperId.DelegateCtor)
+            {
+                AddrMode loadAtEspPlus4 = new AddrMode(Register.ESP, null, 4, 0, AddrModeSize.Int32);
+                encoder.EmitMOV(encoder.TargetRegister.Result, ref loadAtEspPlus4);
+            }
+        }
+    }
+
+    public partial class ReadyToRunGenericLookupFromTypeNode
+    {
+        protected override void EmitLoadGenericContext(NodeFactory factory, ref X86Emitter encoder, bool relocsOnly)
+        {
+            // We start with context register pointing to the MethodTable
+            Register contextRegister = encoder.TargetRegister.Arg0;
+
+            // Locate the VTable slot that points to the dictionary
+            int vtableSlot = 0;
+            if (!relocsOnly)
+            {
+                // The concrete slot won't be known until we're emitting data - don't ask for it in relocsOnly.
+                vtableSlot = VirtualMethodSlotHelper.GetGenericDictionarySlot(factory, (TypeDesc)_dictionaryOwner);
+            }
+
+            int pointerSize = factory.Target.PointerSize;
+            int slotOffset = EETypeNode.GetVTableOffset(pointerSize) + (vtableSlot * pointerSize);
+
+            // DelegateCtor is special, the context is on stack
+            if (Id == ReadyToRunHelperId.DelegateCtor)
+            {
+                AddrMode loadAtEspPlus4 = new AddrMode(Register.ESP, null, 4, 0, AddrModeSize.Int32);
+                encoder.EmitMOV(encoder.TargetRegister.Result, ref loadAtEspPlus4);
+                contextRegister = encoder.TargetRegister.Result;
+            }
+
+            // Load the dictionary pointer from the VTable
+            AddrMode loadDictionary = new AddrMode(contextRegister, null, slotOffset, 0, AddrModeSize.Int32);
+            encoder.EmitMOV(contextRegister, ref loadDictionary);
         }
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X86/X86ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X86/X86ReadyToRunHelperNode.cs
index 255ec2f0b016..239bfd338d9c 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X86/X86ReadyToRunHelperNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X86/X86ReadyToRunHelperNode.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Diagnostics;
 
 using ILCompiler.DependencyAnalysis.X86;
 
@@ -18,12 +19,6 @@ protected override void EmitCode(NodeFactory factory, ref X86Emitter encoder, bo
         {
             switch (Id)
             {
-                case ReadyToRunHelperId.VirtualCall:
-                    {
-                        encoder.EmitINT3();
-                    }
-                    break;
-
                 case ReadyToRunHelperId.GetNonGCStaticBase:
                     {
                         MetadataType target = (MetadataType)Target;
@@ -51,25 +46,146 @@ protected override void EmitCode(NodeFactory factory, ref X86Emitter encoder, bo
 
                 case ReadyToRunHelperId.GetThreadStaticBase:
                     {
-                        encoder.EmitINT3();
+                        MetadataType target = (MetadataType)Target;
+                        ISortableSymbolNode index = factory.TypeThreadStaticIndex(target);
+                        if (index is TypeThreadStaticIndexNode ti && ti.IsInlined)
+                        {
+                            throw new NotImplementedException();
+                        }
+                        else
+                        {
+                            encoder.EmitMOV(encoder.TargetRegister.Result, index);
+
+                            // First arg: address of the TypeManager slot that provides the helper with
+                            // information about module index and the type manager instance (which is used
+                            // for initialization on first access).
+                            AddrMode loadFromEax = new AddrMode(encoder.TargetRegister.Result, null, 0, 0, AddrModeSize.Int32);
+                            encoder.EmitMOV(encoder.TargetRegister.Arg0, ref loadFromEax);
+
+                            // Second arg: index of the type in the ThreadStatic section of the modules
+                            AddrMode loadFromEaxAndDelta = new AddrMode(encoder.TargetRegister.Result, null, factory.Target.PointerSize, 0, AddrModeSize.Int32);
+                            encoder.EmitMOV(encoder.TargetRegister.Arg1, ref loadFromEaxAndDelta);
+
+                            ISymbolNode helper = factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType);
+                            if (!factory.PreinitializationManager.HasLazyStaticConstructor(target))
+                            {
+                                encoder.EmitJMP(helper);
+                            }
+                            else
+                            {
+                                encoder.EmitMOV(encoder.TargetRegister.Result, factory.TypeNonGCStaticsSymbol(target), -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target));
+
+                                AddrMode initialized = new AddrMode(encoder.TargetRegister.Result, null, 0, 0, AddrModeSize.Int32);
+                                encoder.EmitCMP(ref initialized, 0);
+                                encoder.EmitJE(helper);
+
+                                // Add extra parameter and tail call
+                                encoder.EmitStackDup();
+                                AddrMode storeAtEspPlus4 = new AddrMode(Register.ESP, null, 4, 0, AddrModeSize.Int32);
+                                encoder.EmitMOV(ref storeAtEspPlus4, encoder.TargetRegister.Result);
+
+                                encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase));
+                            }
+                        }
                     }
                     break;
 
                 case ReadyToRunHelperId.GetGCStaticBase:
                     {
-                        encoder.EmitINT3();
+                        MetadataType target = (MetadataType)Target;
+                        bool hasLazyStaticConstructor = factory.PreinitializationManager.HasLazyStaticConstructor(target);
+                        encoder.EmitMOV(encoder.TargetRegister.Result, factory.TypeGCStaticsSymbol(target));
+                        AddrMode loadFromEax = new AddrMode(encoder.TargetRegister.Result, null, 0, 0, AddrModeSize.Int32);
+                        encoder.EmitMOV(encoder.TargetRegister.Result, ref loadFromEax);
+
+                        if (!hasLazyStaticConstructor)
+                        {
+                            encoder.EmitRET();
+                        }
+                        else
+                        {
+                            // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region.
+                            encoder.EmitMOV(encoder.TargetRegister.Arg0, factory.TypeNonGCStaticsSymbol(target), -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target));
+
+                            AddrMode initialized = new AddrMode(encoder.TargetRegister.Arg0, null, 0, 0, AddrModeSize.Int32);
+                            encoder.EmitCMP(ref initialized, 0);
+                            encoder.EmitRETIfEqual();
+
+                            encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result);
+                            encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnGCStaticBase));
+                        }
                     }
                     break;
 
                 case ReadyToRunHelperId.DelegateCtor:
                     {
-                        encoder.EmitINT3();
+                        DelegateCreationInfo target = (DelegateCreationInfo)Target;
+
+                        encoder.EmitStackDup();
+
+                        if (target.TargetNeedsVTableLookup)
+                        {
+                            Debug.Assert(!target.TargetMethod.CanMethodBeInSealedVTable(factory));
+
+                            AddrMode loadFromThisPtr = new AddrMode(encoder.TargetRegister.Arg1, null, 0, 0, AddrModeSize.Int32);
+                            encoder.EmitMOV(encoder.TargetRegister.Result, ref loadFromThisPtr);
+
+                            int slot = 0;
+                            if (!relocsOnly)
+                                slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, target.TargetMethod, target.TargetMethod.OwningType);
+
+                            Debug.Assert(slot != -1);
+                            AddrMode loadFromSlot = new AddrMode(encoder.TargetRegister.Result, null, EETypeNode.GetVTableOffset(factory.Target.PointerSize) + (slot * factory.Target.PointerSize), 0, AddrModeSize.Int32);
+                            encoder.EmitMOV(encoder.TargetRegister.Result, ref loadFromSlot);
+                        }
+                        else
+                        {
+                            encoder.EmitMOV(encoder.TargetRegister.Result, target.GetTargetNode(factory));
+                        }
+
+                        AddrMode storeAtEspPlus4 = new AddrMode(Register.ESP, null, 4, 0, AddrModeSize.Int32);
+                        if (target.Thunk != null)
+                        {
+                            Debug.Assert(target.Constructor.Method.Signature.Length == 3);
+                            AddrMode storeAtEspPlus8 = new AddrMode(Register.ESP, null, 8, 0, AddrModeSize.Int32);
+                            encoder.EmitStackDup();
+                            encoder.EmitMOV(ref storeAtEspPlus8, encoder.TargetRegister.Result);
+                            encoder.EmitMOV(ref storeAtEspPlus4, target.Thunk);
+                        }
+                        else
+                        {
+                            Debug.Assert(target.Constructor.Method.Signature.Length == 2);
+                            encoder.EmitMOV(ref storeAtEspPlus4, encoder.TargetRegister.Result);
+                        }
+
+                        encoder.EmitJMP(target.Constructor);
                     }
                     break;
 
                 case ReadyToRunHelperId.ResolveVirtualFunction:
                     {
-                        encoder.EmitINT3();
+                        MethodDesc targetMethod = (MethodDesc)Target;
+                        if (targetMethod.OwningType.IsInterface)
+                        {
+                            encoder.EmitMOV(encoder.TargetRegister.Arg1, factory.InterfaceDispatchCell(targetMethod));
+                            encoder.EmitJMP(factory.ExternSymbol("RhpResolveInterfaceMethod"));
+                        }
+                        else
+                        {
+                            if (relocsOnly)
+                                break;
+
+                            AddrMode loadFromThisPtr = new AddrMode(encoder.TargetRegister.Arg0, null, 0, 0, AddrModeSize.Int32);
+                            encoder.EmitMOV(encoder.TargetRegister.Result, ref loadFromThisPtr);
+
+                            Debug.Assert(!targetMethod.CanMethodBeInSealedVTable(factory));
+
+                            int slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, targetMethod, targetMethod.OwningType);
+                            Debug.Assert(slot != -1);
+                            AddrMode loadFromSlot = new AddrMode(encoder.TargetRegister.Result, null, EETypeNode.GetVTableOffset(factory.Target.PointerSize) + (slot * factory.Target.PointerSize), 0, AddrModeSize.Int32);
+                            encoder.EmitMOV(encoder.TargetRegister.Result, ref loadFromSlot);
+                            encoder.EmitRET();
+                        }
                     }
                     break;
 
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeMetadataNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeMetadataNode.cs
index b76ec72538a2..87a2c829c026 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeMetadataNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/TypeMetadataNode.cs
@@ -49,10 +49,6 @@ public override IEnumerable<DependencyListEntry> GetStaticDependencies(NodeFacto
             if (baseType != null)
                 GetMetadataDependencies(ref dependencies, factory, baseType, "Base type of a reflectable type");
 
-            // TODO-SIZE: if we start trimming interface lists, we can probably trim here
-            foreach (DefType interfaceType in _type.ExplicitlyImplementedInterfaces)
-                GetMetadataDependencies(ref dependencies, factory, interfaceType, "Interface of a reflectable type");
-
             var mdManager = (UsageBasedMetadataManager)factory.MetadataManager;
 
             if (_type.IsEnum)
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VTableSliceNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VTableSliceNode.cs
index 8a398b7809e2..9050cf8c097f 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VTableSliceNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VTableSliceNode.cs
@@ -1,6 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System;
 using System.Collections.Generic;
 
 using Internal.TypeSystem;
@@ -21,20 +22,54 @@ public abstract class VTableSliceNode : DependencyNodeCore<NodeFactory>
         public VTableSliceNode(TypeDesc type)
         {
             Debug.Assert(!type.IsArray, "Wanted to call GetClosestDefType?");
+            Debug.Assert(!type.IsGenericDefinition);
             _type = type;
         }
 
+        protected static MethodDesc[] ComputeSlots(TypeDesc type)
+        {
+            var slots = default(ArrayBuilder<MethodDesc>);
+
+            bool isObjectType = type.IsObject;
+            DefType defType = type.GetClosestDefType();
+
+            IEnumerable<MethodDesc> allSlots = type.IsInterface ?
+                type.GetAllVirtualMethods() : defType.EnumAllVirtualSlots();
+
+            foreach (var method in allSlots)
+            {
+                // GVMs are not emitted in the type's vtable.
+                if (method.HasInstantiation)
+                    continue;
+
+                // Finalizers are called via a field on the MethodTable, not through the VTable
+                if (isObjectType && method.Name == "Finalize")
+                    continue;
+
+                // Current type doesn't define this slot.
+                if (method.OwningType != defType)
+                    continue;
+
+                slots.Add(method);
+            }
+
+            return slots.ToArray();
+        }
+
         public abstract IReadOnlyList<MethodDesc> Slots
         {
             get;
         }
 
+        public abstract bool IsSlotUsed(MethodDesc slot);
+
         public TypeDesc Type => _type;
 
         /// <summary>
-        /// Gets a value indicating whether the slots are assigned at the beginning of the compilation.
+        /// Gets a value indicating whether <see cref="VirtualMethodUseNode"> is needed to track virtual method uses
+        /// in this vtable slice.
         /// </summary>
-        public abstract bool HasFixedSlots
+        public abstract bool HasKnownVirtualMethodUse
         {
             get;
         }
@@ -47,10 +82,14 @@ public override IEnumerable<DependencyListEntry> GetStaticDependencies(NodeFacto
         {
             if (_type.HasBaseType)
             {
-                return new[] { new DependencyListEntry(factory.VTable(_type.BaseType), "Base type VTable") };
+                yield return new DependencyListEntry(factory.VTable(_type.BaseType), "Base type VTable");
             }
 
-            return null;
+            TypeDesc canonType = _type.ConvertToCanonForm(CanonicalFormKind.Specific);
+            if (_type != canonType)
+            {
+                yield return new DependencyListEntry(factory.VTable(canonType), "Canonical type VTable");
+            }
         }
 
         public override IEnumerable<CombinedDependencyListEntry> GetConditionalStaticDependencies(NodeFactory factory) => null;
@@ -66,9 +105,9 @@ public override IEnumerable<DependencyListEntry> GetStaticDependencies(NodeFacto
     /// </summary>
     internal class PrecomputedVTableSliceNode : VTableSliceNode
     {
-        private readonly IReadOnlyList<MethodDesc> _slots;
+        private readonly MethodDesc[] _slots;
 
-        public PrecomputedVTableSliceNode(TypeDesc type, IReadOnlyList<MethodDesc> slots)
+        public PrecomputedVTableSliceNode(TypeDesc type, MethodDesc[] slots)
             : base(type)
         {
             _slots = slots;
@@ -82,7 +121,13 @@ public override IReadOnlyList<MethodDesc> Slots
             }
         }
 
-        public override bool HasFixedSlots
+        public override bool IsSlotUsed(MethodDesc slot)
+        {
+            Debug.Assert(Array.IndexOf(_slots, slot) != -1);
+            return true;
+        }
+
+        public override bool HasKnownVirtualMethodUse
         {
             get
             {
@@ -101,36 +146,6 @@ public EagerlyBuiltVTableSliceNode(TypeDesc type)
             : base(type, ComputeSlots(type))
         {
         }
-
-        private static MethodDesc[] ComputeSlots(TypeDesc type)
-        {
-            var slots = default(ArrayBuilder<MethodDesc>);
-
-            bool isObjectType = type.IsObject;
-            DefType defType = type.GetClosestDefType();
-
-            IEnumerable<MethodDesc> allSlots = type.IsInterface ?
-                type.GetAllVirtualMethods() : defType.EnumAllVirtualSlots();
-
-            foreach (var method in allSlots)
-            {
-                // GVMs are not emitted in the type's vtable.
-                if (method.HasInstantiation)
-                    continue;
-
-                // Finalizers are called via a field on the MethodTable, not through the VTable
-                if (isObjectType && method.Name == "Finalize")
-                    continue;
-
-                // Current type doesn't define this slot.
-                if (method.OwningType != defType)
-                    continue;
-
-                slots.Add(method);
-            }
-
-            return slots.ToArray();
-        }
     }
 
     /// <summary>
@@ -139,43 +154,37 @@ private static MethodDesc[] ComputeSlots(TypeDesc type)
     /// </summary>
     internal sealed class LazilyBuiltVTableSliceNode : VTableSliceNode
     {
-        private HashSet<MethodDesc> _usedMethods = new HashSet<MethodDesc>();
-        private MethodDesc[] _slots;
+        private readonly HashSet<MethodDesc> _usedMethods = new HashSet<MethodDesc>();
+        private readonly MethodDesc[] _slots;
+#if DEBUG
+        private bool _isLocked;
+#endif
 
-        public LazilyBuiltVTableSliceNode(TypeDesc type)
+        public LazilyBuiltVTableSliceNode(TypeDesc type, MethodDesc[] slots = null)
             : base(type)
         {
+            _slots = slots ?? ComputeSlots(type);
         }
 
         public override IReadOnlyList<MethodDesc> Slots
         {
             get
             {
-                if (_slots == null)
-                {
-                    // Sort the lazily populated slots in metadata order (the order in which they show up
-                    // in GetAllMethods()).
-                    // This ensures that Foo<string> and Foo<object> will end up with the same vtable
-                    // no matter the order in which VirtualMethodUse nodes populated it.
-                    ArrayBuilder<MethodDesc> slotsBuilder = default(ArrayBuilder<MethodDesc>);
-                    DefType defType = _type.GetClosestDefType();
-                    foreach (var method in defType.GetAllMethods())
-                    {
-                        if (_usedMethods.Contains(method))
-                            slotsBuilder.Add(method);
-                    }
-                    Debug.Assert(_usedMethods.Count == slotsBuilder.Count);
-                    _slots = slotsBuilder.ToArray();
-
-                    // Null out used methods so that we AV if someone tries to add now.
-                    _usedMethods = null;
-                }
-
                 return _slots;
             }
         }
 
-        public override bool HasFixedSlots
+        public override bool IsSlotUsed(MethodDesc slot)
+        {
+            Debug.Assert(Array.IndexOf(_slots, slot) != -1);
+#if DEBUG
+            _isLocked = true;
+#endif
+
+            return _usedMethods.Contains(slot);
+        }
+
+        public override bool HasKnownVirtualMethodUse
         {
             get
             {
@@ -188,8 +197,11 @@ public void AddEntry(MethodDesc virtualMethod)
             // GVMs are not emitted in the type's vtable.
             Debug.Assert(!virtualMethod.HasInstantiation);
             Debug.Assert(virtualMethod.IsVirtual);
-            Debug.Assert(_slots == null && _usedMethods != null);
             Debug.Assert(virtualMethod.OwningType == _type);
+            Debug.Assert(Array.IndexOf(_slots, virtualMethod) != -1);
+#if DEBUG
+            Debug.Assert(!_isLocked);
+#endif
 
             // Finalizers are called via a field on the MethodTable, not through the VTable
             if (_type.IsObject && virtualMethod.Name == "Finalize")
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VirtualMethodUseNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VirtualMethodUseNode.cs
index 2a637c46a77d..ca716e795ccb 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VirtualMethodUseNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/VirtualMethodUseNode.cs
@@ -85,7 +85,7 @@ public override IEnumerable<CombinedDependencyListEntry> GetConditionalStaticDep
             DefType universalCanonicalOwningType = (DefType)_decl.OwningType.ConvertToCanonForm(CanonicalFormKind.Universal);
             Debug.Assert(universalCanonicalOwningType.IsCanonicalSubtype(CanonicalFormKind.Universal));
 
-            if (!factory.VTable(universalCanonicalOwningType).HasFixedSlots)
+            if (!factory.VTable(universalCanonicalOwningType).HasKnownVirtualMethodUse)
             {
                 // This code ensures that in cases where we don't structurally force all universal canonical instantiations
                 // to have full vtables, that we ensure that all vtables are equivalently shaped between universal and non-universal types
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ILScanner.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ILScanner.cs
index 630de4a74706..97820c425b33 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ILScanner.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ILScanner.cs
@@ -264,7 +264,7 @@ public ReadOnlyFieldPolicy GetReadOnlyFieldPolicy()
 
         private sealed class ScannedVTableProvider : VTableSliceProvider
         {
-            private Dictionary<TypeDesc, IReadOnlyList<MethodDesc>> _vtableSlices = new Dictionary<TypeDesc, IReadOnlyList<MethodDesc>>();
+            private readonly Dictionary<TypeDesc, MethodDesc[]> _vtableSlices = new Dictionary<TypeDesc, MethodDesc[]>();
 
             public ScannedVTableProvider(ImmutableArray<DependencyNodeCore<NodeFactory>> markedNodes)
             {
@@ -273,7 +273,16 @@ public ScannedVTableProvider(ImmutableArray<DependencyNodeCore<NodeFactory>> mar
                     var vtableSliceNode = node as VTableSliceNode;
                     if (vtableSliceNode != null)
                     {
-                        _vtableSlices.Add(vtableSliceNode.Type, vtableSliceNode.Slots);
+                        ArrayBuilder<MethodDesc> usedSlots = default;
+
+                        for (int i = 0; i < vtableSliceNode.Slots.Count; i++)
+                        {
+                            MethodDesc slot = vtableSliceNode.Slots[i];
+                            if (vtableSliceNode.IsSlotUsed(slot))
+                                usedSlots.Add(slot);
+                        }
+
+                        _vtableSlices.Add(vtableSliceNode.Type, usedSlots.ToArray());
                     }
                 }
             }
@@ -284,7 +293,7 @@ internal override VTableSliceNode GetSlice(TypeDesc type)
                 // https://github.com/dotnet/corert/issues/3873
                 if (type.GetTypeDefinition() is Internal.TypeSystem.Ecma.EcmaType)
                 {
-                    if (!_vtableSlices.TryGetValue(type, out IReadOnlyList<MethodDesc> slots))
+                    if (!_vtableSlices.TryGetValue(type, out MethodDesc[] slots))
                     {
                         // If we couldn't find the vtable slice information for this type, it's because the scanner
                         // didn't correctly predict what will be needed.
@@ -297,7 +306,7 @@ internal override VTableSliceNode GetSlice(TypeDesc type)
                         string typeName = ExceptionTypeNameFormatter.Instance.FormatName(type);
                         throw new ScannerFailedException($"VTable of type '{typeName}' not computed by the IL scanner.");
                     }
-                    return new PrecomputedVTableSliceNode(type, slots);
+                    return new LazilyBuiltVTableSliceNode(type, slots);
                 }
                 else
                     return new LazilyBuiltVTableSliceNode(type);
@@ -413,7 +422,8 @@ public override DictionaryLayoutNode GetLayout(TypeSystemEntity methodOrType)
 
         private sealed class ScannedDevirtualizationManager : DevirtualizationManager
         {
-            private HashSet<TypeDesc> _constructedTypes = new HashSet<TypeDesc>();
+            private HashSet<TypeDesc> _constructedMethodTables = new HashSet<TypeDesc>();
+            private HashSet<TypeDesc> _canonConstructedMethodTables = new HashSet<TypeDesc>();
             private HashSet<TypeDesc> _canonConstructedTypes = new HashSet<TypeDesc>();
             private HashSet<TypeDesc> _unsealedTypes = new HashSet<TypeDesc>();
             private Dictionary<TypeDesc, HashSet<TypeDesc>> _implementators = new();
@@ -442,7 +452,12 @@ public ScannedDevirtualizationManager(NodeFactory factory, ImmutableArray<Depend
 
                     if (type != null)
                     {
-                        _constructedTypes.Add(type);
+                        _constructedMethodTables.Add(type);
+                        TypeDesc canonForm = type.ConvertToCanonForm(CanonicalFormKind.Specific);
+                        if (canonForm != type)
+                        {
+                            _canonConstructedMethodTables.Add(canonForm);
+                        }
 
                         if (type.IsInterface)
                         {
@@ -687,7 +702,11 @@ protected override MethodDesc ResolveVirtualMethod(MethodDesc declMethod, DefTyp
                 return result;
             }
 
-            public override bool CanConstructType(TypeDesc type) => _constructedTypes.Contains(type);
+            public override bool CanReferenceConstructedMethodTable(TypeDesc type)
+                => _constructedMethodTables.Contains(type);
+
+            public override bool CanReferenceConstructedTypeOrCanonicalFormOfType(TypeDesc type)
+                => _constructedMethodTables.Contains(type) || _canonConstructedMethodTables.Contains(type);
 
             public override TypeDesc[] GetImplementingClasses(TypeDesc type)
             {
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ILStreamReader.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ILStreamReader.cs
deleted file mode 100644
index 8238d72a3f8a..000000000000
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ILStreamReader.cs
+++ /dev/null
@@ -1,181 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-
-using Internal.TypeSystem;
-using Internal.IL;
-
-namespace Internal.Compiler
-{
-    /// <summary>
-    /// IL Opcode reader in external reader style where the reading is done by trying to read
-    /// various opcodes, and the reader can indicate success or failure of reading a particular opcode
-    ///
-    /// Used by logic which is designed to encode information in il structure, but not used
-    /// to support general compilation of IL.
-    /// </summary>
-    public struct ILStreamReader
-    {
-        private ILReader _reader;
-        private readonly MethodIL _methodIL;
-
-        public ILStreamReader(MethodIL methodIL)
-        {
-            _methodIL = methodIL;
-            _reader = new ILReader(methodIL.GetILBytes());
-        }
-
-        public bool HasNextInstruction
-        {
-            get
-            {
-                return _reader.HasNext;
-            }
-        }
-
-        public bool TryReadLdtoken(out int token)
-        {
-            if (_reader.PeekILOpcode() != ILOpcode.ldtoken)
-            {
-                token = 0;
-                return false;
-            }
-
-            _reader.ReadILOpcode();
-            token = _reader.ReadILToken();
-            return true;
-        }
-
-        public int ReadLdtoken()
-        {
-            int result;
-            if (!TryReadLdtoken(out result))
-                throw new BadImageFormatException();
-
-            return result;
-        }
-
-        public bool TryReadLdtokenAsTypeSystemEntity(out TypeSystemEntity entity)
-        {
-            int token;
-            bool tokenResolved;
-            try
-            {
-                tokenResolved = TryReadLdtoken(out token);
-                entity = tokenResolved ? (TypeSystemEntity)_methodIL.GetObject(token) : null;
-            }
-            catch (TypeSystemException)
-            {
-                tokenResolved = false;
-                entity = null;
-            }
-
-            return tokenResolved;
-        }
-
-        public TypeSystemEntity ReadLdtokenAsTypeSystemEntity()
-        {
-            TypeSystemEntity result;
-            if (!TryReadLdtokenAsTypeSystemEntity(out result))
-                throw new BadImageFormatException();
-
-            return result;
-        }
-
-        public bool TryReadLdcI4(out int value)
-        {
-            ILOpcode opcode = _reader.PeekILOpcode();
-
-            if (opcode == ILOpcode.ldc_i4) // ldc.i4
-            {
-                _reader.ReadILOpcode();
-                value = unchecked((int)_reader.ReadILUInt32());
-                return true;
-            }
-
-            if ((opcode >= ILOpcode.ldc_i4_m1) && (opcode <= ILOpcode.ldc_i4_8)) // ldc.m1 to ldc.i4.8
-            {
-                _reader.ReadILOpcode();
-                value = -1 + ((int)opcode) - 0x15;
-                return true;
-            }
-
-            if (opcode == ILOpcode.ldc_i4_s) // ldc.i4.s
-            {
-                _reader.ReadILOpcode();
-
-                value = (int)unchecked((sbyte)_reader.ReadILByte());
-                return true;
-            }
-            value = 0;
-            return false;
-        }
-
-        public int ReadLdcI4()
-        {
-            int result;
-            if (!TryReadLdcI4(out result))
-                throw new BadImageFormatException();
-
-            return result;
-        }
-
-        public bool TryReadRet()
-        {
-            ILOpcode opcode = _reader.PeekILOpcode();
-            if (opcode == ILOpcode.ret)
-            {
-                _reader.ReadILOpcode();
-                return true;
-            }
-            return false;
-        }
-
-        public void ReadRet()
-        {
-            if (!TryReadRet())
-                throw new BadImageFormatException();
-        }
-
-        public bool TryReadPop()
-        {
-            ILOpcode opcode = _reader.PeekILOpcode();
-            if (opcode == ILOpcode.pop)
-            {
-                _reader.ReadILOpcode();
-                return true;
-            }
-            return false;
-        }
-
-        public void ReadPop()
-        {
-            if (!TryReadPop())
-                throw new BadImageFormatException();
-        }
-
-        public bool TryReadLdstr(out string ldstrString)
-        {
-            if (_reader.PeekILOpcode() != ILOpcode.ldstr)
-            {
-                ldstrString = null;
-                return false;
-            }
-
-            _reader.ReadILOpcode();
-            int token = _reader.ReadILToken();
-            ldstrString = (string)_methodIL.GetObject(token);
-            return true;
-        }
-
-        public string ReadLdstr()
-        {
-            string result;
-            if (!TryReadLdstr(out result))
-                throw new BadImageFormatException();
-
-            return result;
-        }
-    }
-}
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
index f51a5c130651..4b6f2f4914a4 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs
@@ -83,27 +83,32 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id,
                     mangledName = "RhpAssignRefECX";
                     break;
                 case ReadyToRunHelper.WriteBarrier_EDI:
-                    mangledName = "RhpAssignRefECX";
+                    mangledName = "RhpAssignRefEDI";
                     break;
                 case ReadyToRunHelper.WriteBarrier_ESI:
-                    mangledName = "RhpAssignRefECX";
+                    mangledName = "RhpAssignRefESI";
+                    break;
+                case ReadyToRunHelper.WriteBarrier_EBP:
+                    mangledName = "RhpAssignRefEBP";
                     break;
                 case ReadyToRunHelper.CheckedWriteBarrier_EAX:
                     mangledName = "RhpCheckedAssignRefEAX";
                     break;
                 case ReadyToRunHelper.CheckedWriteBarrier_EBX:
-                    mangledName = "RhpCheckedAssignRefEAX";
+                    mangledName = "RhpCheckedAssignRefEBX";
                     break;
                 case ReadyToRunHelper.CheckedWriteBarrier_ECX:
                     mangledName = "RhpCheckedAssignRefECX";
                     break;
                 case ReadyToRunHelper.CheckedWriteBarrier_EDI:
-                    mangledName = "RhpCheckedAssignRefECX";
+                    mangledName = "RhpCheckedAssignRefEDI";
                     break;
                 case ReadyToRunHelper.CheckedWriteBarrier_ESI:
-                    mangledName = "RhpCheckedAssignRefECX";
+                    mangledName = "RhpCheckedAssignRefESI";
+                    break;
+                case ReadyToRunHelper.CheckedWriteBarrier_EBP:
+                    mangledName = "RhpCheckedAssignRefEBP";
                     break;
-
                 case ReadyToRunHelper.Box:
                 case ReadyToRunHelper.Box_Nullable:
                     mangledName = "RhBox";
@@ -137,10 +142,16 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id,
                     break;
 
                 case ReadyToRunHelper.MemCpy:
-                    mangledName = "memcpy"; // TODO: Null reference handling
+                    mangledName = "RhSpanHelpers_MemCopy";
                     break;
                 case ReadyToRunHelper.MemSet:
-                    mangledName = "memset"; // TODO: Null reference handling
+                    mangledName = "RhSpanHelpers_MemSet";
+                    break;
+                case ReadyToRunHelper.MemZero:
+                    mangledName = "RhSpanHelpers_MemZero";
+                    break;
+                case ReadyToRunHelper.NativeMemSet:
+                    mangledName = "memset";
                     break;
 
                 case ReadyToRunHelper.GetRuntimeTypeHandle:
@@ -190,10 +201,10 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id,
                     break;
 
                 case ReadyToRunHelper.DblRem:
-                    mangledName = "RhpDblRem";
+                    mangledName = "fmod";
                     break;
                 case ReadyToRunHelper.FltRem:
-                    mangledName = "RhpFltRem";
+                    mangledName = "fmodf";
                     break;
 
                 case ReadyToRunHelper.LMul:
@@ -339,13 +350,5 @@ public static string GetNewObjectHelperForType(TypeDesc type)
 
             return "RhpNewFast";
         }
-
-        public static string GetNewArrayHelperForType(TypeDesc type)
-        {
-            if (type.RequiresAlign8())
-                return "RhpNewArrayAlign8";
-
-            return "RhpNewArray";
-        }
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MetadataManager.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MetadataManager.cs
index 18a1af0e9840..b758f89e9478 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MetadataManager.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/MetadataManager.cs
@@ -79,6 +79,7 @@ private readonly SortedSet<TypeGVMEntriesNode> _typeGVMEntries
         private readonly SortedSet<TypeDesc> _typeTemplates = new SortedSet<TypeDesc>(TypeSystemComparer.Instance);
         private readonly SortedSet<MetadataType> _typesWithGenericStaticBaseInfo = new SortedSet<MetadataType>(TypeSystemComparer.Instance);
         private readonly SortedSet<MethodDesc> _genericMethodHashtableEntries = new SortedSet<MethodDesc>(TypeSystemComparer.Instance);
+        private readonly HashSet<TypeDesc> _usedInterfaces = new HashSet<TypeDesc>();
 
         private List<(DehydratableObjectNode Node, ObjectNode.ObjectData Data)> _dehydratableData = new List<(DehydratableObjectNode Node, ObjectNode.ObjectData data)>();
 
@@ -356,6 +357,11 @@ protected virtual void Graph_NewMarkedNode(DependencyNodeCore<NodeFactory> obj)
             {
                 _genericMethodHashtableEntries.Add(genericMethodsHashtableEntryNode.Method);
             }
+
+            if (obj is InterfaceUseNode interfaceUse)
+            {
+                _usedInterfaces.Add(interfaceUse.Type);
+            }
         }
 
         protected virtual bool AllMethodsCanBeReflectable => false;
@@ -1023,6 +1029,12 @@ public IEnumerable<MethodDesc> GetGenericMethodHashtableEntries()
             return _genericMethodHashtableEntries;
         }
 
+        public bool IsInterfaceUsed(TypeDesc type)
+        {
+            Debug.Assert(type.IsTypeDefinition);
+            return _usedInterfaces.Contains(type);
+        }
+
         internal IEnumerable<IMethodBodyNode> GetCompiledMethodBodies()
         {
             return _methodBodiesGenerated;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/NodeMangler.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/NodeMangler.cs
index 72110ed895d5..bcf71ddab35b 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/NodeMangler.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/NodeMangler.cs
@@ -26,5 +26,7 @@ public abstract class NodeMangler
         public abstract string ThreadStaticsIndex(TypeDesc type);
         public abstract string TypeGenericDictionary(TypeDesc type);
         public abstract string MethodGenericDictionary(MethodDesc method);
+        public abstract string ExternMethod(string unmangledName, MethodDesc method);
+        public abstract string ExternVariable(string unmangledName);
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CoffObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CoffObjectWriter.cs
index 304c9f6b5cca..f319eebfe6ce 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CoffObjectWriter.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CoffObjectWriter.cs
@@ -92,7 +92,7 @@ private protected override void CreateSection(ObjectNodeSection section, string
                 Name =
                     section == ObjectNodeSection.TLSSection ? ".tls$" :
                     section == ObjectNodeSection.HydrationTargetSection ? "hydrated" :
-                    (section.Name.StartsWith(".") ? section.Name : "." + section.Name),
+                    (section.Name.StartsWith('.') ? section.Name : "." + section.Name),
                 SectionCharacteristics = section.Type switch
                 {
                     SectionType.ReadOnly =>
@@ -213,6 +213,8 @@ private protected override void EmitSymbolTable(
             IDictionary<string, SymbolDefinition> definedSymbols,
             SortedSet<string> undefinedSymbols)
         {
+            Feat00Flags feat00Flags = _machine is Machine.I386 ? Feat00Flags.SafeSEH : 0;
+
             foreach (var (symbolName, symbolDefinition) in definedSymbols)
             {
                 if (_symbolNameToIndex.TryGetValue(symbolName, out uint symbolIndex))
@@ -253,13 +255,18 @@ private protected override void EmitSymbolTable(
                     gfidsSectionWriter.WriteLittleEndian<uint>(_symbolNameToIndex[symbolName]);
                 }
 
+                feat00Flags |= Feat00Flags.ControlFlowGuard;
+            }
+
+            if (feat00Flags != 0)
+            {
                 // Emit the feat.00 symbol that controls various linker behaviors
                 _symbols.Add(new CoffSymbol
                 {
                     Name = "@feat.00",
                     StorageClass = CoffSymbolClass.IMAGE_SYM_CLASS_STATIC,
                     SectionIndex = uint.MaxValue, // IMAGE_SYM_ABSOLUTE
-                    Value = 0x800, // cfGuardCF flags this object as control flow guard aware
+                    Value = (uint)feat00Flags,
                 });
             }
         }
@@ -446,8 +453,8 @@ private protected override void EmitUnwindInfo(
                     // Emit RUNTIME_FUNCTION
                     pdataSectionWriter.EmitAlignment(4);
                     pdataSectionWriter.EmitSymbolReference(IMAGE_REL_BASED_ADDR32NB, currentSymbolName, start);
-                    // Only x64 has the End symbol
-                    if (_machine == Machine.Amd64)
+                    // Only x86/x64 has the End symbol
+                    if (_machine is Machine.I386 or Machine.Amd64)
                     {
                         pdataSectionWriter.EmitSymbolReference(IMAGE_REL_BASED_ADDR32NB, currentSymbolName, end);
                     }
@@ -628,6 +635,35 @@ private protected override void EmitDebugFunctionInfo(
             }
         }
 
+        private protected override void EmitDebugThunkInfo(
+            string methodName,
+            SymbolDefinition methodSymbol,
+            INodeWithDebugInfo debugNode)
+        {
+            if (!debugNode.GetNativeSequencePoints().Any())
+                return;
+
+            CodeViewSymbolsBuilder debugSymbolsBuilder;
+
+            if (ShouldShareSymbol((ObjectNode)debugNode))
+            {
+                // If the method is emitted in COMDAT section then we need to create an
+                // associated COMDAT section for the debugging symbols.
+                var sectionWriter = GetOrCreateSection(DebugSymbolSection, methodName, null);
+                debugSymbolsBuilder = new CodeViewSymbolsBuilder(_nodeFactory.Target.Architecture, sectionWriter);
+            }
+            else
+            {
+                debugSymbolsBuilder = _debugSymbolsBuilder;
+            }
+
+            debugSymbolsBuilder.EmitLineInfo(
+                _debugFileTableBuilder,
+                methodName,
+                methodSymbol.Size,
+                debugNode.GetNativeSequencePoints());
+        }
+
         private protected override void EmitDebugSections(IDictionary<string, SymbolDefinition> definedSymbols)
         {
             _debugSymbolsBuilder.WriteUserDefinedTypes(_debugTypesBuilder.UserDefinedTypes);
@@ -1089,5 +1125,14 @@ public static uint CalculateChecksum(Stream stream)
                 return crc;
             }
         }
+
+        private enum Feat00Flags : uint
+        {
+            SafeSEH = 1,
+            StackGuard = 0x100,
+            SoftwareDevelopmentLifecycle = 0x200,
+            ControlFlowGuard = 0x800,
+            ExceptionContinuationMetadata = 0x4000,
+        }
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfInfo.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfInfo.cs
index b1408e3eae3b..fa02122c9a87 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfInfo.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfInfo.cs
@@ -510,7 +510,6 @@ private static void DumpVarLocation(DwarfExpressionBuilder e, VarLoc loc)
                     break;
                 case VarLocType.VLT_STK:
                 case VarLocType.VLT_STK2:
-                case VarLocType.VLT_FPSTK:
                 case VarLocType.VLT_STK_BYREF:
                     e.OpBReg(loc.B, loc.C);
                     if (loc.LocationType == VarLocType.VLT_STK_BYREF)
@@ -531,11 +530,16 @@ private static void DumpVarLocation(DwarfExpressionBuilder e, VarLoc loc)
                     e.OpPiece();
                     break;
                 case VarLocType.VLT_STK_REG:
-                    e.OpBReg(loc.C, loc.D);
+                    e.OpBReg(loc.B, loc.C);
                     e.OpPiece();
-                    e.OpReg(loc.B);
+                    e.OpReg(loc.D);
                     e.OpPiece();
                     break;
+                case VarLocType.VLT_FPSTK:
+                    // On ARM32 this is used to encode actual registers. This may be wrong for x86
+                    // which we don't support anyway.
+                    e.OpReg(loc.B);
+                    break;
                 default:
                     // Unsupported
                     Debug.Assert(loc.LocationType != VarLocType.VLT_FIXED_VA);
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfInfoWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfInfoWriter.cs
index cda75df2c8e1..18b2c34a4846 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfInfoWriter.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfInfoWriter.cs
@@ -3,6 +3,7 @@
 
 using System;
 using System.Buffers;
+using System.Buffers.Binary;
 using System.Collections.Generic;
 using System.Diagnostics;
 using ILCompiler.DependencyAnalysis;
@@ -106,7 +107,7 @@ public void WriteStringReference(string value)
         public void WriteInfoAbsReference(long offset)
         {
             Debug.Assert(offset < uint.MaxValue);
-            _infoSectionWriter.EmitSymbolReference(RelocType.IMAGE_REL_BASED_HIGHLOW, ".debug_info", offset);
+            WriteUInt32((uint)offset);
         }
 
         public void WriteInfoReference(uint typeIndex)
@@ -122,7 +123,7 @@ public void WriteInfoReference(uint typeIndex)
             }
             else
             {
-                _infoSectionWriter.EmitSymbolReference(RelocType.IMAGE_REL_BASED_HIGHLOW, ".debug_info", offset);
+                WriteUInt32(offset);
             }
         }
 
@@ -195,17 +196,10 @@ public void Dispose()
             // Debug.Assert(_dieStack.Count == 0);
 
             // Flush late bound forward references
-            int streamOffset = (int)_infoSectionWriter.Position;
             foreach (var lateBoundReference in _lateBoundReferences)
             {
                 uint offset = _builder.ResolveOffset(lateBoundReference.TypeIndex);
-
-                _infoSectionWriter.EmitRelocation(
-                    - streamOffset + lateBoundReference.Position,
-                    lateBoundReference.Data,
-                    RelocType.IMAGE_REL_BASED_HIGHLOW,
-                    ".debug_info",
-                    (int)offset);
+                BinaryPrimitives.WriteUInt32LittleEndian(lateBoundReference.Data, offset);
             }
 
             // Write abbreviation section
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Eabi/EabiNative.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Eabi/EabiNative.cs
index dafd0ef1cec0..82aab2a23d29 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Eabi/EabiNative.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Eabi/EabiNative.cs
@@ -31,6 +31,7 @@ internal static class EabiNative
         public const uint Tag_ABI_align_needed = 24;
         public const uint Tag_ABI_align_preserved = 25;
         public const uint Tag_ABI_enum_size = 26;
+        public const uint Tag_ABI_VFP_args = 28;
         public const uint Tag_ABI_optimization_goals = 30;
         public const uint Tag_CPU_unaligned_access = 34;
         public const uint Tag_ABI_FP_16bit_format = 38;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs
index 7ccc9a42b53d..503985d92934 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs
@@ -35,6 +35,7 @@ internal sealed class ElfObjectWriter : UnixObjectWriter
     {
         private readonly bool _useInlineRelocationAddends;
         private readonly ushort _machine;
+        private readonly bool _useSoftFPAbi;
         private readonly List<ElfSectionDefinition> _sections = new();
         private readonly List<ElfSymbol> _symbols = new();
         private uint _localSymbolCount;
@@ -47,6 +48,7 @@ internal sealed class ElfObjectWriter : UnixObjectWriter
         private static readonly ObjectNodeSection ArmUnwindIndexSection = new ObjectNodeSection(".ARM.exidx", SectionType.UnwindData);
         private static readonly ObjectNodeSection ArmUnwindTableSection = new ObjectNodeSection(".ARM.extab", SectionType.ReadOnly);
         private static readonly ObjectNodeSection ArmAttributesSection = new ObjectNodeSection(".ARM.attributes", SectionType.ReadOnly);
+        private static readonly ObjectNodeSection ArmTextThunkSection = new ObjectNodeSection(".text.thunks", SectionType.Executable);
 
         public ElfObjectWriter(NodeFactory factory, ObjectWritingOptions options)
             : base(factory, options)
@@ -60,6 +62,7 @@ public ElfObjectWriter(NodeFactory factory, ObjectWritingOptions options)
                 _ => throw new NotSupportedException("Unsupported architecture")
             };
             _useInlineRelocationAddends = _machine is EM_386 or EM_ARM;
+            _useSoftFPAbi = _machine is EM_ARM && factory.Target.Abi == TargetAbi.NativeAotArmel;
 
             // By convention the symbol table starts with empty symbol
             _symbols.Add(new ElfSymbol {});
@@ -262,6 +265,10 @@ private protected override void EmitSymbolTable(
                 });
             }
 
+            bool useArmThunks = _machine is EM_ARM && undefinedSymbols.Count > 0;
+            int thunkSectionIndex = useArmThunks ? GetOrCreateSection(ArmTextThunkSection).SectionIndex : 0;
+            int thunkSymbolsIndex = 0;
+
             foreach (string externSymbol in undefinedSymbols)
             {
                 if (!_symbolNameToIndex.ContainsKey(externSymbol))
@@ -271,6 +278,20 @@ private protected override void EmitSymbolTable(
                         Name = externSymbol,
                         Info = (STB_GLOBAL << 4),
                     });
+
+                    if (useArmThunks)
+                    {
+                        sortedSymbols.Add(new ElfSymbol
+                        {
+                            Name = $"{externSymbol}$thunk",
+                            Value = (ulong)((thunkSymbolsIndex * 4) | 1u),
+                            Size = 4u,
+                            Section = _sections[thunkSectionIndex],
+                            Info = (byte)(STT_FUNC | (STB_GLOBAL << 4)),
+                            Other = STV_HIDDEN,
+                        });
+                        thunkSymbolsIndex++;
+                    }
                 }
             }
 
@@ -284,6 +305,35 @@ private protected override void EmitSymbolTable(
                 symbolIndex++;
             }
 
+            if (useArmThunks)
+            {
+                // For ARM32 we use MOVW/MOVT for branch relocations. That cannot be done for
+                // external code since the MOVW/MOVT relocations are static only and equivalents
+                // don't exist for the dynamic linker. In order to circumvent this issue we
+                // create thunks in the .text.thunk section that contain BL.W jumps to the external
+                // code. Our MOVW/MOVT relocations are then redirected to point to those thunks.
+                SectionWriter thunkSectionWriter = GetOrCreateSection(ArmTextThunkSection);
+                Span<byte> relocationEntry = stackalloc byte[8];
+                var relocationStream = new MemoryStream(8 * undefinedSymbols.Count);
+                _sections[thunkSectionWriter.SectionIndex].RelocationStream = relocationStream;
+                foreach (string externSymbol in undefinedSymbols)
+                {
+                    if (_symbolNameToIndex.TryGetValue($"{externSymbol}$thunk", out uint thunkSymbolIndex))
+                    {
+                        // Write the relocation to external symbol for the thunk
+                        BinaryPrimitives.WriteUInt32LittleEndian(relocationEntry, (uint)thunkSectionWriter.Position);
+                        BinaryPrimitives.WriteUInt32LittleEndian(relocationEntry.Slice(4), ((uint)_symbolNameToIndex[externSymbol] << 8) | R_ARM_THM_JUMP24);
+                        relocationStream.Write(relocationEntry);
+                        // Write the thunk code:
+                        // B.W <offset>
+                        thunkSectionWriter.WriteLittleEndian<uint>(0xBFFEF7FF);
+                        // Redirect the extern symbol to our thunk to make our relocations point
+                        // to the thunk
+                        _symbolNameToIndex[externSymbol] = thunkSymbolIndex;
+                    }
+                }
+            }
+
             // Update group sections links
             foreach ((string comdatName, ElfSectionDefinition groupSection) in _comdatNameToElfSection)
             {
@@ -465,6 +515,7 @@ private protected override void EmitSectionsAndLayout()
                 attributesBuilder.WriteAttribute(Tag_ABI_FP_number_model, 3); // IEEE 754
                 attributesBuilder.WriteAttribute(Tag_ABI_align_needed, 1); // 8-byte
                 attributesBuilder.WriteAttribute(Tag_ABI_align_preserved, 1); // 8-byte
+                attributesBuilder.WriteAttribute(Tag_ABI_VFP_args, _useSoftFPAbi ? 0ul : 1ul); // FP parameters passes in VFP registers
                 attributesBuilder.WriteAttribute(Tag_CPU_unaligned_access, 0); // None
                 attributesBuilder.EndSection();
             }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ObjectWriter.cs
index 0de7f8214e50..086cd56bd3b0 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ObjectWriter.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ObjectWriter.cs
@@ -338,6 +338,13 @@ private protected abstract void EmitDebugFunctionInfo(
             INodeWithDebugInfo debugNode,
             bool hasSequencePoints);
 
+        private protected virtual void EmitDebugThunkInfo(
+            string methodName,
+            SymbolDefinition methodSymbol,
+            INodeWithDebugInfo debugNode)
+        {
+        }
+
         private protected abstract void EmitDebugSections(IDictionary<string, SymbolDefinition> definedSymbols);
 
         private void EmitObject(string objectFilePath, IReadOnlyCollection<DependencyNode> nodes, IObjectDumper dumper, Logger logger)
@@ -490,15 +497,21 @@ private void EmitObject(string objectFilePath, IReadOnlyCollection<DependencyNod
                         _userDefinedTypeDescriptor.GetTypeIndex(methodTable.Type, needsCompleteType: true);
                     }
 
-                    if (node is INodeWithDebugInfo debugNode and ISymbolDefinitionNode symbolDefinitionNode and IMethodNode methodNode)
+                    if (node is INodeWithDebugInfo debugNode and ISymbolDefinitionNode symbolDefinitionNode)
                     {
-                        bool hasSequencePoints = debugNode.GetNativeSequencePoints().Any();
-                        uint methodTypeIndex = hasSequencePoints ? _userDefinedTypeDescriptor.GetMethodFunctionIdTypeIndex(methodNode.Method) : 0;
                         string methodName = GetMangledName(symbolDefinitionNode);
-
                         if (_definedSymbols.TryGetValue(methodName, out var methodSymbol))
                         {
-                            EmitDebugFunctionInfo(methodTypeIndex, methodName, methodSymbol, debugNode, hasSequencePoints);
+                            if (node is IMethodNode methodNode)
+                            {
+                                bool hasSequencePoints = debugNode.GetNativeSequencePoints().Any();
+                                uint methodTypeIndex = hasSequencePoints ? _userDefinedTypeDescriptor.GetMethodFunctionIdTypeIndex(methodNode.Method) : 0;
+                                EmitDebugFunctionInfo(methodTypeIndex, methodName, methodSymbol, debugNode, hasSequencePoints);
+                            }
+                            else
+                            {
+                                EmitDebugThunkInfo(methodName, methodSymbol, debugNode);
+                            }
                         }
                     }
                 }
@@ -526,21 +539,13 @@ private void EmitObject(string objectFilePath, IReadOnlyCollection<DependencyNod
 
         public static void EmitObject(string objectFilePath, IReadOnlyCollection<DependencyNode> nodes, NodeFactory factory, ObjectWritingOptions options, IObjectDumper dumper, Logger logger)
         {
-            var stopwatch = new Stopwatch();
-            stopwatch.Start();
+            var stopwatch = Stopwatch.StartNew();
 
-            if (Environment.GetEnvironmentVariable("DOTNET_USE_LLVM_OBJWRITER") == "1")
-            {
-                LegacyObjectWriter.EmitObject(objectFilePath, nodes, factory, options, dumper, logger);
-            }
-            else
-            {
-                ObjectWriter objectWriter =
-                    factory.Target.IsApplePlatform ? new MachObjectWriter(factory, options) :
-                    factory.Target.OperatingSystem == TargetOS.Windows ? new CoffObjectWriter(factory, options) :
-                    new ElfObjectWriter(factory, options);
-                objectWriter.EmitObject(objectFilePath, nodes, dumper, logger);
-            }
+            ObjectWriter objectWriter =
+                factory.Target.IsApplePlatform ? new MachObjectWriter(factory, options) :
+                factory.Target.OperatingSystem == TargetOS.Windows ? new CoffObjectWriter(factory, options) :
+                new ElfObjectWriter(factory, options);
+            objectWriter.EmitObject(objectFilePath, nodes, dumper, logger);
 
             stopwatch.Stop();
             if (logger.IsVerbose)
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/PreinitializationManager.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/PreinitializationManager.cs
index 59aa50936046..58b3f1f0dd65 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/PreinitializationManager.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/PreinitializationManager.cs
@@ -2,9 +2,12 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Diagnostics;
+
 using Internal.IL;
 using Internal.TypeSystem;
 
+using FlowAnnotations = ILLink.Shared.TrimAnalysis.FlowAnnotations;
+
 namespace ILCompiler
 {
     /// <summary>
@@ -14,10 +17,10 @@ public class PreinitializationManager
     {
         private readonly bool _supportsLazyCctors;
 
-        public PreinitializationManager(TypeSystemContext context, CompilationModuleGroup compilationGroup, ILProvider ilprovider, TypePreinit.TypePreinitializationPolicy policy, ReadOnlyFieldPolicy readOnlyPolicy)
+        public PreinitializationManager(TypeSystemContext context, CompilationModuleGroup compilationGroup, ILProvider ilprovider, TypePreinit.TypePreinitializationPolicy policy, ReadOnlyFieldPolicy readOnlyPolicy, FlowAnnotations flowAnnotations)
         {
             _supportsLazyCctors = context.SystemModule.GetType("System.Runtime.CompilerServices", "ClassConstructorRunner", throwIfNotFound: false) != null;
-            _preinitHashTable = new PreinitializationInfoHashtable(compilationGroup, ilprovider, policy, readOnlyPolicy);
+            _preinitHashTable = new PreinitializationInfoHashtable(compilationGroup, ilprovider, policy, readOnlyPolicy, flowAnnotations);
         }
 
         /// <summary>
@@ -137,13 +140,15 @@ private sealed class PreinitializationInfoHashtable : LockFreeReaderHashtable<Me
             private readonly ILProvider _ilProvider;
             internal readonly TypePreinit.TypePreinitializationPolicy _policy;
             private readonly ReadOnlyFieldPolicy _readOnlyPolicy;
+            private readonly FlowAnnotations _flowAnnotations;
 
-            public PreinitializationInfoHashtable(CompilationModuleGroup compilationGroup, ILProvider ilProvider, TypePreinit.TypePreinitializationPolicy policy, ReadOnlyFieldPolicy readOnlyPolicy)
+            public PreinitializationInfoHashtable(CompilationModuleGroup compilationGroup, ILProvider ilProvider, TypePreinit.TypePreinitializationPolicy policy, ReadOnlyFieldPolicy readOnlyPolicy, FlowAnnotations flowAnnotations)
             {
                 _compilationGroup = compilationGroup;
                 _ilProvider = ilProvider;
                 _policy = policy;
                 _readOnlyPolicy = readOnlyPolicy;
+                _flowAnnotations = flowAnnotations;
             }
 
             protected override bool CompareKeyToValue(MetadataType key, TypePreinit.PreinitializationInfo value) => key == value.Type;
@@ -153,7 +158,7 @@ public PreinitializationInfoHashtable(CompilationModuleGroup compilationGroup, I
 
             protected override TypePreinit.PreinitializationInfo CreateValueFromKey(MetadataType key)
             {
-                var info = TypePreinit.ScanType(_compilationGroup, _ilProvider, _policy, _readOnlyPolicy, key);
+                var info = TypePreinit.ScanType(_compilationGroup, _ilProvider, _policy, _readOnlyPolicy, _flowAnnotations, key);
 
                 // We either successfully preinitialized or
                 // the type doesn't have a canonical form or
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/RootingHelpers.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/RootingHelpers.cs
index ffd286cce759..ce222b1334e8 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/RootingHelpers.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/RootingHelpers.cs
@@ -227,6 +227,16 @@ public static bool TryGetDependenciesForReflectedField(ref DependencyList depend
                     ((MetadataType)owningType).MakeInstantiatedType(inst));
             }
 
+            try
+            {
+                // Make sure we're not putting something into the graph that will crash later.
+                factory.TypeSystemContext.EnsureLoadableType(field.FieldType);
+            }
+            catch (TypeSystemException)
+            {
+                return false;
+            }
+
             dependencies.Add(factory.ReflectedField(field), reason);
 
             return true;
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/RootingServiceProvider.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/RootingServiceProvider.cs
index f6b562421ff8..fc13ed9a298e 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/RootingServiceProvider.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/RootingServiceProvider.cs
@@ -30,7 +30,10 @@ public void AddCompilationRoot(MethodDesc method, string reason, string exportNa
             _rootAdder(methodEntryPoint, reason);
 
             if (exportName != null)
+            {
+                exportName = _factory.NameMangler.NodeMangler.ExternMethod(exportName, method);
                 _factory.NodeAliases.Add(methodEntryPoint, exportName);
+            }
 
             if (canonMethod != method && method.HasInstantiation)
                 _rootAdder(_factory.MethodGenericDictionary(method), reason);
@@ -133,6 +136,7 @@ public void RootReadOnlyDataBlob(byte[] data, int alignment, string reason, stri
         {
             var blob = _factory.ReadOnlyDataBlob("__readonlydata_" + exportName, data, alignment);
             _rootAdder(blob, reason);
+            exportName = _factory.NameMangler.NodeMangler.ExternVariable(exportName);
             _factory.NodeAliases.Add(blob, exportName);
         }
 
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/RuntimeConfigurationRootProvider.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/RuntimeConfigurationRootProvider.cs
index d75b780021dd..8c16f9685f83 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/RuntimeConfigurationRootProvider.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/RuntimeConfigurationRootProvider.cs
@@ -51,7 +51,7 @@ public RuntimeConfigurationBlobNode(string blobName, IReadOnlyCollection<string>
 
             public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
             {
-                sb.Append(_blobName);
+                sb.Append(nameMangler.NodeMangler.ExternVariable(_blobName));
             }
 
             public override ObjectNodeSection GetSection(NodeFactory factory) =>
@@ -72,7 +72,7 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                 int valueIndex = 0;
                 foreach (string line in _runtimeOptions)
                 {
-                    int indexOfEquals = line.IndexOf("=");
+                    int indexOfEquals = line.IndexOf('=');
                     if (indexOfEquals > 0)
                     {
                         string key = line.Substring(0, indexOfEquals);
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/SubstitutedILProvider.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/SubstitutedILProvider.cs
index d2d614f761c0..09c54b96c5e8 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/SubstitutedILProvider.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/SubstitutedILProvider.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Buffers.Binary;
 using System.Collections.Generic;
 using System.Reflection.Metadata;
 using System.Reflection.Metadata.Ecma335;
@@ -106,9 +107,12 @@ public MethodIL GetMethodILWithInlinedSubstitutions(MethodIL method)
             // The "seek backwards to find what feeds the comparison" only works for a couple known instructions
             // (load constant, call). It can't e.g. skip over arguments to the call.
             //
-            // Last step is a sweep - we replace the tail of all unreachable blocks with "br $-2"
-            // and nop out the rest. If the basic block is smaller than 2 bytes, we don't touch it.
-            // We also eliminate any EH records that correspond to the stubbed out basic block.
+            // We then do a pass to compute the offsets of instructions in a new instruction stream, where
+            // the unreachable basic blocks are replaced with an infinite loop (`br $-2`).
+            // Because of this rewriting, all the offsets will shift.
+            //
+            // Last step is the actual rewriting: we copy instructions from the source basic block, remapping
+            // offsets for jumps, and replacing parts that are unreachable with the infinite loop.
             //
             // We also attempt to rewrite calls to SR.SomeResourceString accessors with string
             // literals looked up from the managed resources.
@@ -224,9 +228,7 @@ public MethodIL GetMethodILWithInlinedSubstitutions(MethodIL method)
             //
             // We also do another round of basic block marking to mark beginning of visible basic blocks
             // after dead branch elimination. This allows us to limit the number of potential small basic blocks
-            // that are not interesting (because no code jumps to them anymore), but could prevent us from
-            // finishing the process. Unreachable basic blocks smaller than 2 bytes abort the substitution
-            // inlining process because we can't neutralize them (turn them into an infinite loop).
+            // that are not interesting (because no code jumps to them anymore).
             offsetsToVisit.Push(0);
             while (offsetsToVisit.TryPop(out int offset))
             {
@@ -270,7 +272,7 @@ public MethodIL GetMethodILWithInlinedSubstitutions(MethodIL method)
                             offsetsToVisit.Push(ehRegion.HandlerOffset);
 
                             // RyuJIT is going to look at this basic block even though it's unreachable.
-                            // Consider it visible so that we replace the tail with an endless loop.
+                            // Consider it visible so that we replace it with an endless loop.
                             int handlerEnd = ehRegion.HandlerOffset + ehRegion.HandlerLength;
                             if (handlerEnd < flags.Length)
                                 flags[handlerEnd] |= OpcodeFlags.VisibleBasicBlockStart;
@@ -398,7 +400,7 @@ public MethodIL GetMethodILWithInlinedSubstitutions(MethodIL method)
                 }
             }
 
-            // Now sweep unreachable basic blocks by replacing them with nops
+            // Now sweep unreachable basic blocks
             bool hasUnmarkedInstruction = false;
             foreach (var flag in flags)
             {
@@ -412,54 +414,168 @@ public MethodIL GetMethodILWithInlinedSubstitutions(MethodIL method)
             if (!hasUnmarkedInstruction && !hasGetResourceStringCall)
                 return method;
 
-            byte[] newBody = (byte[])methodBytes.Clone();
-            int position = 0;
-            while (position < newBody.Length)
-            {
-                Debug.Assert((flags[position] & OpcodeFlags.InstructionStart) != 0);
-                Debug.Assert((flags[position] & OpcodeFlags.VisibleBasicBlockStart) != 0);
+            // Maps instruction offsets in original method body to offsets in rewritten method body.
+            var offsetMap = new int[methodBytes.Length];
+#if DEBUG
+            Array.Fill(offsetMap, -1);
+#endif
+            int srcPos = 0;
+            int dstPos = 0;
 
-                bool erase = (flags[position] & OpcodeFlags.Mark) == 0;
+            // Compute a map from original instruction offset to new instruction offsets
+            while (srcPos < flags.Length)
+            {
+                bool marked = (flags[srcPos] & OpcodeFlags.Mark) != 0;
 
-                int basicBlockStart = position;
+                // Go over all bytes in a single visible basic block
+                int lastInstructionPos = srcPos;
                 do
                 {
-                    if (erase)
-                        newBody[position] = (byte)ILOpCode.Nop;
-                    position++;
-                } while (position < newBody.Length && (flags[position] & OpcodeFlags.VisibleBasicBlockStart) == 0);
-
-                // If we had to nop out this basic block, we need to neutralize it by appending
-                // an infinite loop ("br $-2").
-                // We append instead of prepend because RyuJIT's importer has trouble with junk unreachable bytes.
-                if (erase)
+                    if ((flags[srcPos] & OpcodeFlags.InstructionStart) != 0)
+                    {
+                        lastInstructionPos = srcPos;
+                        offsetMap[srcPos] = dstPos;
+                    }
+
+                    if (marked)
+                    {
+                        dstPos++;
+                    }
+
+                    srcPos++;
+                } while (srcPos < flags.Length && (flags[srcPos] & OpcodeFlags.VisibleBasicBlockStart) == 0);
+
+                if (marked)
+                {
+                    // This was a marked visible basic block. If it ended in a short-form branch instruction,
+                    // we need to do a size adjustment because the rewritten code doesn't use short-form
+                    // instructions.
+                    var reader = new ILReader(methodBytes, lastInstructionPos);
+                    if (reader.HasNext)
+                    {
+                        ILOpcode opcode = reader.ReadILOpcode();
+                        int adjustment = opcode switch
+                        {
+                            >= ILOpcode.br_s and <= ILOpcode.blt_un_s => 3,
+                            ILOpcode.leave_s => 3,
+                            _ => 0,
+                        };
+                        dstPos += adjustment;
+                    }
+                }
+                else
+                {
+                    // This is a dead visible basic block. We're going to emit `br_s $-2`: reserve 2 bytes.
+                    dstPos += 2;
+                }
+            }
+
+            // Now generate the new body
+            var newBody = new byte[dstPos];
+            srcPos = 0;
+            dstPos = 0;
+            while (srcPos < flags.Length)
+            {
+                Debug.Assert((flags[srcPos] & OpcodeFlags.InstructionStart) != 0);
+                Debug.Assert((flags[srcPos] & OpcodeFlags.VisibleBasicBlockStart) != 0);
+
+                bool marked = (flags[srcPos] & OpcodeFlags.Mark) != 0;
+
+                if (!marked)
+                {
+                    // Dead basic block: emit endless loop and skip the rest of the original code.
+                    newBody[dstPos++] = (byte)ILOpCode.Br_s;
+                    newBody[dstPos++] = unchecked((byte)-2);
+
+                    do
+                    {
+                        srcPos++;
+                    }
+                    while (srcPos < flags.Length && (flags[srcPos] & OpcodeFlags.VisibleBasicBlockStart) == 0);
+                }
+                else
                 {
-                    if (position - basicBlockStart < 2)
+                    // Live basic block: copy the original bytes
+                    int lastInstructionPos = srcPos;
+                    do
                     {
-                        // We cannot neutralize the basic block, so better leave the method alone.
-                        // The control would fall through to the next basic block.
-                        return method;
+                        if ((flags[srcPos] & OpcodeFlags.InstructionStart) != 0)
+                            lastInstructionPos = srcPos;
+
+                        newBody[dstPos++] = methodBytes[srcPos++];
                     }
+                    while (srcPos < flags.Length && (flags[srcPos] & OpcodeFlags.VisibleBasicBlockStart) == 0);
 
-                    newBody[position - 2] = (byte)ILOpCode.Br_s;
-                    newBody[position - 1] = unchecked((byte)-2);
+                    // If the basic block ended in a branch, we need to map the target offset to new offset.
+                    // We'll also rewrite short-form instructions to their long forms.
+                    var reader = new ILReader(methodBytes, lastInstructionPos);
+                    if (reader.HasNext)
+                    {
+                        ILOpcode opcode = reader.ReadILOpcode();
+
+                        if (opcode == ILOpcode.switch_)
+                        {
+                            uint count = reader.ReadILUInt32();
+                            int srcJmpBase = reader.Offset + (int)(4 * count);
+                            int dstJmpBase = dstPos;
+                            dstPos -= (int)(sizeof(uint) * count);
+                            for (uint i = 0; i < count; i++)
+                            {
+                                int dest = offsetMap[(int)reader.ReadILUInt32() + srcJmpBase];
+                                BinaryPrimitives.WriteUInt32LittleEndian(new Span<byte>(newBody, dstPos, sizeof(uint)), (uint)(dest - dstJmpBase));
+                                dstPos += sizeof(uint);
+                            }
+                        }
+                        else if (opcode is >= ILOpcode.br_s and <= ILOpcode.blt_un || opcode is ILOpcode.leave or ILOpcode.leave_s)
+                        {
+                            int dest = offsetMap[reader.ReadBranchDestination(opcode)];
+
+                            if (opcode is >= ILOpcode.br_s and <= ILOpcode.blt_un_s || opcode == ILOpcode.leave_s)
+                            {
+                                dstPos -= 2;
+                                if (opcode == ILOpcode.leave_s)
+                                    opcode = ILOpcode.leave;
+                                else
+                                    opcode += ILOpcode.br - ILOpcode.br_s;
+                            }
+                            else
+                            {
+                                dstPos -= 5;
+                            }
+
+                            newBody[dstPos++] = (byte)opcode;
+                            BinaryPrimitives.WriteUInt32LittleEndian(new Span<byte>(newBody, dstPos, sizeof(uint)), (uint)(dest - (dstPos + sizeof(uint))));
+                            dstPos += sizeof(uint);
+                        }
+                    }
                 }
             }
 
             // EH regions with unmarked handlers belong to unmarked basic blocks
             // Need to eliminate them because they're not usable.
+            // The rest need to have their offsets and lengths remapped.
             ArrayBuilder<ILExceptionRegion> newEHRegions = default(ArrayBuilder<ILExceptionRegion>);
             foreach (ILExceptionRegion ehRegion in ehRegions)
             {
                 if ((flags[ehRegion.HandlerOffset] & OpcodeFlags.Mark) != 0)
                 {
-                    newEHRegions.Add(ehRegion);
+                    int tryOffset = offsetMap[ehRegion.TryOffset];
+                    int handlerOffset = offsetMap[ehRegion.HandlerOffset];
+
+                    var newEhRegion = new ILExceptionRegion(
+                        ehRegion.Kind,
+                        tryOffset,
+                        offsetMap[ehRegion.TryOffset + ehRegion.TryLength] - tryOffset,
+                        handlerOffset,
+                        offsetMap[ehRegion.HandlerOffset + ehRegion.HandlerLength] - handlerOffset,
+                        ehRegion.ClassToken,
+                        ehRegion.Kind == ILExceptionRegionKind.Filter ? offsetMap[ehRegion.FilterOffset] : -1);
+
+                    newEHRegions.Add(newEhRegion);
                 }
             }
 
-            // Existing debug information might not match new instruction boundaries (plus there's little point
-            // in generating debug information for NOPs) - generate new debug information by filtering
-            // out the sequence points associated with nopped out instructions.
+            // Remap debug information as well.
             MethodDebugInformation debugInfo = method.GetDebugInfo();
             IEnumerable<ILSequencePoint> oldSequencePoints = debugInfo?.GetSequencePoints();
             if (oldSequencePoints != null)
@@ -469,7 +585,9 @@ public MethodIL GetMethodILWithInlinedSubstitutions(MethodIL method)
                 {
                     if (sequencePoint.Offset < flags.Length && (flags[sequencePoint.Offset] & OpcodeFlags.Mark) != 0)
                     {
-                        sequencePoints.Add(sequencePoint);
+                        ILSequencePoint newSequencePoint = new ILSequencePoint(
+                            offsetMap[sequencePoint.Offset], sequencePoint.Document, sequencePoint.LineNumber);
+                        sequencePoints.Add(newSequencePoint);
                     }
                 }
 
@@ -486,11 +604,13 @@ public MethodIL GetMethodILWithInlinedSubstitutions(MethodIL method)
                 // of a MethodIL and we're making a new one here. It just has to be unique to the MethodIL.
                 int tokenRid = ecmaMethodIL.Module.MetadataReader.GetHeapSize(HeapIndex.UserString);
 
-                for (int offset = 0; offset < flags.Length; offset++)
+                for (int srcOffset = 0; srcOffset < flags.Length; srcOffset++)
                 {
-                    if ((flags[offset] & OpcodeFlags.GetResourceStringCall) == 0)
+                    if ((flags[srcOffset] & OpcodeFlags.GetResourceStringCall) == 0)
                         continue;
 
+                    int offset = offsetMap[srcOffset];
+
                     Debug.Assert(newBody[offset] == (byte)ILOpcode.call);
                     var getter = (EcmaMethod)method.GetObject(new ILReader(newBody, offset + 1).ReadILToken());
 
@@ -759,12 +879,16 @@ private static bool TryExpandTypeEquality(MethodIL methodIL, byte[] body, Opcode
                 return false;
 
             TypeDesc type2 = ReadLdToken(ref reader, methodIL, flags);
-            if (type1 == null)
+            if (type2 == null)
                 return false;
 
             if (!ReadGetTypeFromHandle(ref reader, methodIL, flags))
                 return false;
 
+            // No value in making this work for definitions
+            if (type1.IsGenericDefinition || type2.IsGenericDefinition)
+                return false;
+
             // Dataflow runs on top of uninstantiated IL and we can't answer some questions there.
             // Unfortunately this means dataflow will still see code that the rest of the system
             // might have optimized away. It should not be a problem in practice.
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/SubstitutionProvider.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/SubstitutionProvider.cs
index 71c6008ba4f5..27fc44064781 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/SubstitutionProvider.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/SubstitutionProvider.cs
@@ -4,6 +4,7 @@
 using System;
 using System.Collections.Generic;
 using System.IO;
+using System.Reflection;
 using System.Reflection.Metadata;
 using System.Reflection.PortableExecutable;
 using System.Resources;
@@ -33,11 +34,80 @@ public BodySubstitution GetSubstitution(MethodDesc method)
                 AssemblyFeatureInfo info = _hashtable.GetOrCreateValue(ecmaMethod.Module);
                 if (info.BodySubstitutions != null && info.BodySubstitutions.TryGetValue(ecmaMethod, out BodySubstitution result))
                     return result;
+
+                if (TryGetFeatureCheckValue(ecmaMethod, out bool value))
+                    return BodySubstitution.Create(value ? 1 : 0);
             }
 
             return null;
         }
 
+        private bool TryGetFeatureCheckValue(EcmaMethod method, out bool value)
+        {
+            value = false;
+
+            if (!method.Signature.IsStatic)
+                return false;
+
+            if (!method.Signature.ReturnType.IsWellKnownType(WellKnownType.Boolean))
+                return false;
+
+            if (FindProperty(method) is not PropertyPseudoDesc property)
+                return false;
+
+            if (property.SetMethod != null)
+                return false;
+
+            foreach (var featureSwitchDefinitionAttribute in property.GetDecodedCustomAttributes("System.Diagnostics.CodeAnalysis", "FeatureSwitchDefinitionAttribute"))
+            {
+                if (featureSwitchDefinitionAttribute.FixedArguments is not [CustomAttributeTypedArgument<TypeDesc> { Value: string switchName }])
+                    continue;
+
+                // If there's a FeatureSwitchDefinition, don't continue looking for FeatureGuard.
+                // We don't want to infer feature switch settings from FeatureGuard.
+                return _hashtable._switchValues.TryGetValue(switchName, out value);
+            }
+
+            foreach (var featureGuardAttribute in property.GetDecodedCustomAttributes("System.Diagnostics.CodeAnalysis", "FeatureGuardAttribute"))
+            {
+                if (featureGuardAttribute.FixedArguments is not [CustomAttributeTypedArgument<TypeDesc> { Value: EcmaType featureType }])
+                    continue;
+
+                if (featureType.Namespace == "System.Diagnostics.CodeAnalysis")
+                {
+                    switch (featureType.Name)
+                    {
+                        case "RequiresAssemblyFilesAttribute":
+                        case "RequiresUnreferencedCodeAttribute":
+                        case "RequiresDynamicCodeAttribute":
+                            return true;
+                    }
+                }
+            }
+
+            return false;
+
+            static PropertyPseudoDesc FindProperty(EcmaMethod method)
+            {
+                if ((method.Attributes & MethodAttributes.SpecialName) == 0)
+                    return null;
+
+                if (method.OwningType is not EcmaType declaringType)
+                    return null;
+
+                var reader = declaringType.MetadataReader;
+                foreach (PropertyDefinitionHandle propertyHandle in reader.GetTypeDefinition(declaringType.Handle).GetProperties())
+                {
+                    PropertyDefinition propertyDef = reader.GetPropertyDefinition(propertyHandle);
+                    var property = new PropertyPseudoDesc(declaringType, propertyHandle);
+                    if (property.GetMethod == method)
+                        return property;
+                }
+
+                return null;
+            }
+        }
+
         public object GetSubstitution(FieldDesc field)
         {
             if (field.GetTypicalFieldDefinition() is EcmaField ecmaField)
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/TypePreinit.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/TypePreinit.cs
index f8a8f2c24477..2f94045c153a 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/TypePreinit.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/TypePreinit.cs
@@ -12,6 +12,7 @@
 using Internal.TypeSystem;
 
 using CombinedDependencyList = System.Collections.Generic.List<ILCompiler.DependencyAnalysisFramework.DependencyNodeCore<ILCompiler.DependencyAnalysis.NodeFactory>.CombinedDependencyListEntry>;
+using FlowAnnotations = ILLink.Shared.TrimAnalysis.FlowAnnotations;
 
 namespace ILCompiler
 {
@@ -36,17 +37,19 @@ public class TypePreinit
         private readonly ILProvider _ilProvider;
         private readonly TypePreinitializationPolicy _policy;
         private readonly ReadOnlyFieldPolicy _readOnlyPolicy;
+        private readonly FlowAnnotations _flowAnnotations;
         private readonly Dictionary<FieldDesc, Value> _fieldValues = new Dictionary<FieldDesc, Value>();
         private readonly Dictionary<string, StringInstance> _internedStrings = new Dictionary<string, StringInstance>();
         private readonly Dictionary<TypeDesc, RuntimeTypeValue> _internedTypes = new Dictionary<TypeDesc, RuntimeTypeValue>();
 
-        private TypePreinit(MetadataType owningType, CompilationModuleGroup compilationGroup, ILProvider ilProvider, TypePreinitializationPolicy policy, ReadOnlyFieldPolicy readOnlyPolicy)
+        private TypePreinit(MetadataType owningType, CompilationModuleGroup compilationGroup, ILProvider ilProvider, TypePreinitializationPolicy policy, ReadOnlyFieldPolicy readOnlyPolicy, FlowAnnotations flowAnnotations)
         {
             _type = owningType;
             _compilationGroup = compilationGroup;
             _ilProvider = ilProvider;
             _policy = policy;
             _readOnlyPolicy = readOnlyPolicy;
+            _flowAnnotations = flowAnnotations;
 
             // Zero initialize all fields we model.
             foreach (var field in owningType.GetFields())
@@ -58,7 +61,7 @@ private TypePreinit(MetadataType owningType, CompilationModuleGroup compilationG
             }
         }
 
-        public static PreinitializationInfo ScanType(CompilationModuleGroup compilationGroup, ILProvider ilProvider, TypePreinitializationPolicy policy, ReadOnlyFieldPolicy readOnlyPolicy, MetadataType type)
+        public static PreinitializationInfo ScanType(CompilationModuleGroup compilationGroup, ILProvider ilProvider, TypePreinitializationPolicy policy, ReadOnlyFieldPolicy readOnlyPolicy, FlowAnnotations flowAnnotations, MetadataType type)
         {
             Debug.Assert(type.HasStaticConstructor);
             Debug.Assert(!type.IsGenericDefinition);
@@ -85,7 +88,7 @@ public static PreinitializationInfo ScanType(CompilationModuleGroup compilationG
             Status status;
             try
             {
-                preinit = new TypePreinit(type, compilationGroup, ilProvider, policy, readOnlyPolicy);
+                preinit = new TypePreinit(type, compilationGroup, ilProvider, policy, readOnlyPolicy, flowAnnotations);
                 int instructions = 0;
                 status = preinit.TryScanMethod(type.GetStaticConstructor(), null, null, ref instructions, out _);
             }
@@ -244,6 +247,11 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack<Method
                                 return Status.Fail(methodIL.OwningMethod, opcode, "Array out of bounds");
                             }
 
+                            if (elementType.RequiresAlign8())
+                            {
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Align8");
+                            }
+
                             AllocationSite allocSite = new AllocationSite(_type, instructionCounter);
                             stack.Push(new ArrayInstance(elementType.MakeArrayType(), elementCount, allocSite));
                         }
@@ -313,6 +321,11 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack<Method
                                 return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported static");
                             }
 
+                            if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(field))
+                            {
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
+                            }
+
                             if (_fieldValues[field] is IAssignableValue assignableField)
                             {
                                 if (!assignableField.TryAssign(stack.PopIntoLocation(field.FieldType)))
@@ -353,7 +366,7 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack<Method
                                 && field.OwningType.HasStaticConstructor
                                 && _policy.CanPreinitialize(field.OwningType))
                             {
-                                TypePreinit nestedPreinit = new TypePreinit((MetadataType)field.OwningType, _compilationGroup, _ilProvider, _policy, _readOnlyPolicy);
+                                TypePreinit nestedPreinit = new TypePreinit((MetadataType)field.OwningType, _compilationGroup, _ilProvider, _policy, _readOnlyPolicy, _flowAnnotations);
                                 recursionProtect ??= new Stack<MethodDesc>();
                                 recursionProtect.Push(methodIL.OwningMethod);
 
@@ -412,6 +425,11 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack<Method
                                 return Status.Fail(methodIL.OwningMethod, opcode, "Unsupported static");
                             }
 
+                            if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(field))
+                            {
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
+                            }
+
                             Value fieldValue = _fieldValues[field];
                             if (fieldValue == null || !fieldValue.TryCreateByRef(out Value byRefValue))
                             {
@@ -454,6 +472,11 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack<Method
                                 return Status.Fail(methodIL.OwningMethod, opcode, "Static constructor");
                             }
 
+                            if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(method))
+                            {
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
+                            }
+
                             Value[] methodParams = new Value[numParams];
                             for (int i = numParams - 1; i >= 0; i--)
                             {
@@ -516,6 +539,16 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack<Method
                                 return Status.Fail(methodIL.OwningMethod, opcode, "Finalizable class");
                             }
 
+                            if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(ctor))
+                            {
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
+                            }
+
+                            if (owningType.RequiresAlign8())
+                            {
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Align8");
+                            }
+
                             Value[] ctorParameters = new Value[ctorSig.Length + 1];
                             for (int i = ctorSig.Length - 1; i >= 0; i--)
                             {
@@ -714,6 +747,11 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack<Method
                                 return Status.Fail(methodIL.OwningMethod, opcode, "Byref field");
                             }
 
+                            if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(field))
+                            {
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
+                            }
+
                             if (instance.Value is not IHasInstanceFields settableInstance
                                 || !settableInstance.TrySetField(field, value))
                             {
@@ -755,6 +793,11 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack<Method
                                 return Status.Fail(methodIL.OwningMethod, opcode);
                             }
 
+                            if (_flowAnnotations.RequiresDataflowAnalysisDueToSignature(field))
+                            {
+                                return Status.Fail(methodIL.OwningMethod, opcode, "Needs dataflow analysis");
+                            }
+
                             StackEntry instance = stack.Pop();
 
                             var loadableInstance = instance.Value as IHasInstanceFields;
@@ -1592,6 +1635,9 @@ private Status TryScanMethod(MethodIL methodIL, Value[] parameters, Stack<Method
                                 if (type.IsNullable)
                                     return Status.Fail(methodIL.OwningMethod, opcode);
 
+                                if (type.RequiresAlign8())
+                                    return Status.Fail(methodIL.OwningMethod, opcode, "Align8");
+
                                 Value value = stack.PopIntoLocation(type);
                                 AllocationSite allocSite = new AllocationSite(_type, instructionCounter);
                                 if (!ObjectInstance.TryBox((DefType)type, value, allocSite, out ObjectInstance boxedResult))
@@ -2761,16 +2807,16 @@ public void WriteContent(ref ObjectDataBuilder builder, ISymbolNode thisNode, No
                 {
                     Debug.Assert(creationInfo.Constructor.Method.Name == "InitializeOpenStaticThunk");
 
-                    // m_firstParameter
+                    // _firstParameter
                     builder.EmitPointerReloc(thisNode);
 
-                    // m_helperObject
+                    // _helperObject
                     builder.EmitZeroPointer();
 
-                    // m_extraFunctionPointerOrData
+                    // _extraFunctionPointerOrData
                     builder.EmitPointerReloc(creationInfo.GetTargetNode(factory));
 
-                    // m_functionPointer
+                    // _functionPointer
                     Debug.Assert(creationInfo.Thunk != null);
                     builder.EmitPointerReloc(creationInfo.Thunk);
                 }
@@ -2778,16 +2824,16 @@ public void WriteContent(ref ObjectDataBuilder builder, ISymbolNode thisNode, No
                 {
                     Debug.Assert(creationInfo.Constructor.Method.Name == "InitializeClosedInstance");
 
-                    // m_firstParameter
+                    // _firstParameter
                     _firstParameter.WriteFieldData(ref builder, factory);
 
-                    // m_helperObject
+                    // _helperObject
                     builder.EmitZeroPointer();
 
-                    // m_extraFunctionPointerOrData
+                    // _extraFunctionPointerOrData
                     builder.EmitZeroPointer();
 
-                    // m_functionPointer
+                    // _functionPointer
                     builder.EmitPointerReloc(factory.CanonicalEntrypoint(_methodPointed));
                 }
             }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs
index 242759b0c914..5ef33807b0f4 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs
@@ -57,5 +57,15 @@ public sealed override string MethodGenericDictionary(MethodDesc method)
         {
             return GenericDictionaryNamePrefix + NameMangler.GetMangledMethodName(method);
         }
+
+        public sealed override string ExternMethod(string unmangledName, MethodDesc method)
+        {
+            return unmangledName;
+        }
+
+        public sealed override string ExternVariable(string unmangledName)
+        {
+            return unmangledName;
+        }
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UsageBasedMetadataManager.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UsageBasedMetadataManager.cs
index 2b823f424651..f5033e521a66 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UsageBasedMetadataManager.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UsageBasedMetadataManager.cs
@@ -1041,6 +1041,11 @@ public bool GeneratesMetadata(EcmaModule module, ExportedTypeHandle exportedType
                 return GeneratesMetadata(targetType) || !_factory.CompilationModuleGroup.ContainsType(targetType);
             }
 
+            public bool GeneratesInterfaceImpl(MetadataType typeDef, MetadataType interfaceImpl)
+            {
+                return _factory.MetadataManager.IsInterfaceUsed(interfaceImpl.GetTypeDefinition());
+            }
+
             public bool IsBlocked(MetadataType typeDef)
             {
                 return _blockingPolicy.IsBlocked(typeDef);
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs
index ee4117679432..95d6d53c55ce 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs
@@ -11,11 +11,18 @@ namespace ILCompiler
     //
     public class WindowsNodeMangler : NodeMangler
     {
+        private TargetDetails _target;
+
         public const string NonGCStaticMemberName = "__NONGCSTATICS";
         public const string GCStaticMemberName = "__GCSTATICS";
         public const string ThreadStaticMemberName = "__THREADSTATICS";
         public const string ThreadStaticIndexName = "__THREADSTATICINDEX";
 
+        public WindowsNodeMangler(TargetDetails target)
+        {
+            _target = target;
+        }
+
         // Mangled name of boxed version of a type
         public sealed override string MangledBoxedTypeName(TypeDesc type)
         {
@@ -68,9 +75,59 @@ public sealed override string TypeGenericDictionary(TypeDesc type)
             return GenericDictionaryNamePrefix + NameMangler.GetMangledTypeName(type);
         }
 
-        public override string MethodGenericDictionary(MethodDesc method)
+        public sealed override string MethodGenericDictionary(MethodDesc method)
         {
             return GenericDictionaryNamePrefix + NameMangler.GetMangledMethodName(method);
         }
+
+        public sealed override string ExternMethod(string unmangledName, MethodDesc method)
+        {
+            if (_target.Architecture != TargetArchitecture.X86)
+            {
+                return unmangledName;
+            }
+
+            UnmanagedCallingConventions callConv;
+            if (method.IsPInvoke)
+            {
+                callConv = method.GetPInvokeMethodCallingConventions() & UnmanagedCallingConventions.CallingConventionMask;
+            }
+            else if (method.IsUnmanagedCallersOnly)
+            {
+                if (method is not Internal.TypeSystem.Ecma.EcmaMethod)
+                    callConv = method.Signature.GetStandaloneMethodSignatureCallingConventions();
+                else
+                    callConv = method.GetUnmanagedCallersOnlyMethodCallingConventions() & UnmanagedCallingConventions.CallingConventionMask;
+            }
+            else
+            {
+                Debug.Assert(method is Internal.TypeSystem.Ecma.EcmaMethod ecmaMethod && (ecmaMethod.GetRuntimeImportName() != null || ecmaMethod.GetRuntimeExportName() != null));
+                return unmangledName;
+            }
+
+            int signatureBytes = 0;
+            foreach (var p in method.Signature)
+            {
+                signatureBytes += AlignmentHelper.AlignUp(p.GetElementSize().AsInt, _target.PointerSize);
+            }
+
+            return callConv switch
+            {
+                UnmanagedCallingConventions.Stdcall => $"_{unmangledName}@{signatureBytes}",
+                UnmanagedCallingConventions.Fastcall => $"@{unmangledName}@{signatureBytes}",
+                UnmanagedCallingConventions.Cdecl => $"_{unmangledName}",
+                _ => throw new System.NotImplementedException()
+            };
+        }
+
+        public sealed override string ExternVariable(string unmangledName)
+        {
+            if (_target.Architecture != TargetArchitecture.X86)
+            {
+                return unmangledName;
+            }
+
+            return $"_{unmangledName}";
+        }
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/XmlObjectDumper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/XmlObjectDumper.cs
index 2b5fe4f608b4..12043e18a42e 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/XmlObjectDumper.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/XmlObjectDumper.cs
@@ -70,7 +70,7 @@ protected override void DumpObjectNode(NodeFactory nodeFactory, ObjectNode node,
 
         private string HashData(byte[] data)
         {
-            return BitConverter.ToString(_sha256.ComputeHash(data)).Replace("-", "").ToLowerInvariant();
+            return Convert.ToHexStringLower(_sha256.ComputeHash(data));
         }
 
         internal override void End()
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs
index 56901e348db9..98a674d4d721 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs
@@ -737,11 +737,7 @@ private void ImportCall(ILOpcode opcode, int token)
                     _dependencies.Add(_factory.InterfaceDispatchCell(method), reason);
                 }
             }
-            else if (_compilation.HasFixedSlotVTable(method.OwningType))
-            {
-                // No dependencies: virtual call through the vtable
-            }
-            else
+            else if (_compilation.NeedsSlotUseTracking(method.OwningType))
             {
                 MethodDesc slotDefiningMethod = targetMethod.IsNewSlot ?
                         targetMethod : MetadataVirtualMethodAlgorithm.FindSlotDefiningMethodForVirtualMethod(targetMethod);
@@ -1254,7 +1250,7 @@ private void ImportBinaryOperation(ILOpcode opcode)
                     break;
                 case ILOpcode.mul_ovf:
                 case ILOpcode.mul_ovf_un:
-                    if (_compilation.TypeSystemContext.Target.Architecture == TargetArchitecture.ARM)
+                    if (_compilation.TypeSystemContext.Target.PointerSize == 4)
                     {
                         _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.LMulOfv), "_lmulovf");
                         _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.ULMulOvf), "_ulmulovf");
@@ -1279,6 +1275,10 @@ private void ImportBinaryOperation(ILOpcode opcode)
                             _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Overflow), "_divovf");
                         }
                         _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.ThrowDivZero), "_divbyzero");
+                        if (opcode == ILOpcode.div)
+                        {
+                            _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Overflow), "_ovf");
+                        }
                     }
                     break;
                 case ILOpcode.rem:
@@ -1298,7 +1298,14 @@ private void ImportBinaryOperation(ILOpcode opcode)
                             _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Overflow), "_removf");
                         }
                         _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.ThrowDivZero), "_divbyzero");
+                        if (opcode == ILOpcode.rem)
+                        {
+                            _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.Overflow), "_ovf");
+                        }
                     }
+
+                    _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.DblRem), "rem");
+                    _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.FltRem), "rem");
                     break;
             }
         }
diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj b/src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj
index 0ad0d464acab..8ccb841bb25c 100644
--- a/src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj
+++ b/src/coreclr/tools/aot/ILCompiler.Compiler/ILCompiler.Compiler.csproj
@@ -4,6 +4,7 @@
     <AssemblyName>ILCompiler.Compiler</AssemblyName>
     <TargetFramework>$(NetCoreAppToolCurrent)</TargetFramework>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <NoWarn>$(NoWarn);CA1866;CA1867</NoWarn>
     <EnableDefaultCompileItems>false</EnableDefaultCompileItems>
     <Platforms>x64;x86</Platforms>
     <PlatformTarget>AnyCPU</PlatformTarget>
@@ -15,7 +16,7 @@
     <GenerateDependencyFile>false</GenerateDependencyFile>
     <Configurations>Debug;Release;Checked</Configurations>
 
-    <NoWarn Condition="'$(DotNetBuildFromSource)' == 'true'">$(NoWarn);CS8524</NoWarn>
+    <NoWarn Condition="'$(DotNetBuildSourceOnly)' == 'true'">$(NoWarn);CS8524</NoWarn>
     <NoWarn>$(NoWarn);CA1859</NoWarn>
   </PropertyGroup>
 
@@ -24,16 +25,13 @@
     <ProjectReference Include="..\ILCompiler.MetadataTransform\ILCompiler.MetadataTransform.csproj" />
     <ProjectReference Include="..\ILCompiler.TypeSystem\ILCompiler.TypeSystem.csproj" />
 
-    <PackageReference Condition="'$(DotNetBuildFromSource)' != 'true'" Include="StaticCs" Version="$(StaticCsVersion)">
+    <PackageReference Condition="'$(DotNetBuildSourceOnly)' != 'true'" Include="StaticCs" Version="$(StaticCsVersion)">
       <PrivateAssets>all</PrivateAssets>
       <ExcludeAssets>contentfiles</ExcludeAssets> <!-- We include our own copy of the ClosedAttribute to work in source build -->
     </PackageReference>
   </ItemGroup>
 
   <ItemGroup>
-    <Compile Include="..\..\Common\Compiler\ImpliedRepeatedFieldDesc.cs" Link="Compiler\ImpliedRepeatedFieldDesc.cs" />
-    <Compile Include="..\..\Common\Compiler\TypeWithRepeatedFields.cs" Link="Compiler\TypeWithRepeatedFields.cs" />
-    <Compile Include="..\..\Common\Compiler\TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs" Link="Compiler\TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs" />
     <Compile Include="..\..\Common\Compiler\Win32Resources\ResourceData.cs" Link="Compiler\Win32Resources\ResourceData.cs" />
     <Compile Include="..\..\Common\Compiler\Win32Resources\ResourceData.Reader.cs" Link="Compiler\Win32Resources\ResourceData.Reader.cs" />
     <Compile Include="..\..\Common\Compiler\Win32Resources\ResourceData.ResourcesDataModel.cs" Link="Compiler\Win32Resources\ResourceData.ResourcesDataModel.cs" />
@@ -425,6 +423,7 @@
     <Compile Include="Compiler\DependencyAnalysis\GenericVirtualMethodImplNode.cs" />
     <Compile Include="Compiler\DependencyAnalysis\InlineableStringsResourceNode.cs" />
     <Compile Include="Compiler\DependencyAnalysis\InterfaceDispatchCellSectionNode.cs" />
+    <Compile Include="Compiler\DependencyAnalysis\InterfaceUseNode.cs" />
     <Compile Include="Compiler\DependencyAnalysis\MethodExceptionHandlingInfoNode.cs" />
     <Compile Include="Compiler\DependencyAnalysis\ModuleInitializerListNode.cs" />
     <Compile Include="Compiler\DependencyAnalysis\NotReadOnlyFieldNode.cs" />
@@ -484,7 +483,6 @@
     <Compile Include="Compiler\DependencyAnalysis\ShadowConcreteUnboxingThunkNode.cs" />
     <Compile Include="Compiler\ILScanner.cs" />
     <Compile Include="Compiler\ILScannerBuilder.cs" />
-    <Compile Include="Compiler\ILStreamReader.cs" />
     <Compile Include="Compiler\LibraryInitializers.cs" />
     <Compile Include="Compiler\Compilation.cs" />
     <Compile Include="Compiler\CompilerMetadataFieldLayoutAlgorithm.cs" />
@@ -619,7 +617,6 @@
     <Compile Include="Compiler\ObjectWriter\Eabi\EabiAttributesBuilder.cs" />
     <Compile Include="Compiler\ObjectWriter\Eabi\EabiNative.cs" />
     <Compile Include="Compiler\ObjectWriter\Eabi\EabiUnwindConverter.cs" />
-    <Compile Include="Compiler\ObjectWriter\LegacyObjectWriter.cs" />
     <Compile Include="Compiler\ObjectWriter\ObjectWriter.cs" />
     <Compile Include="Compiler\ObjectWriter\ObjectWritingOptions.cs" />
     <Compile Include="Compiler\ObjectWriter\MachNative.cs" />
diff --git a/src/coreclr/tools/aot/ILCompiler.Diagnostics/PerfMapWriter.cs b/src/coreclr/tools/aot/ILCompiler.Diagnostics/PerfMapWriter.cs
index 6f1d3113c315..98941b37c185 100644
--- a/src/coreclr/tools/aot/ILCompiler.Diagnostics/PerfMapWriter.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Diagnostics/PerfMapWriter.cs
@@ -135,6 +135,7 @@ private static PerfmapTokensForTarget TranslateTargetDetailsToPerfmapConstants(T
                 TargetArchitecture.ARM64 => PerfMapArchitectureToken.ARM64,
                 TargetArchitecture.X64 => PerfMapArchitectureToken.X64,
                 TargetArchitecture.X86 => PerfMapArchitectureToken.X86,
+                TargetArchitecture.RiscV64 => PerfMapArchitectureToken.RiscV64,
                 _ => throw new NotImplementedException(details.Architecture.ToString())
             };
 
diff --git a/src/coreclr/tools/aot/ILCompiler.Diagnostics/ReadyToRunDiagnosticsConstants.cs b/src/coreclr/tools/aot/ILCompiler.Diagnostics/ReadyToRunDiagnosticsConstants.cs
index fea26f9db028..395b26f151ee 100644
--- a/src/coreclr/tools/aot/ILCompiler.Diagnostics/ReadyToRunDiagnosticsConstants.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Diagnostics/ReadyToRunDiagnosticsConstants.cs
@@ -19,6 +19,7 @@ public enum PerfMapArchitectureToken : uint
     ARM64 = 2,
     X64 = 3,
     X86 = 4,
+    RiscV64 = 5,
 }
 
 public enum PerfMapOSToken : uint
diff --git a/src/coreclr/tools/aot/ILCompiler.LLVM/Compiler/DependencyAnalysis/LLVMCodegenNodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.LLVM/Compiler/DependencyAnalysis/LLVMCodegenNodeFactory.cs
index 7dc63d856281..9488b35a6ab5 100644
--- a/src/coreclr/tools/aot/ILCompiler.LLVM/Compiler/DependencyAnalysis/LLVMCodegenNodeFactory.cs
+++ b/src/coreclr/tools/aot/ILCompiler.LLVM/Compiler/DependencyAnalysis/LLVMCodegenNodeFactory.cs
@@ -98,7 +98,7 @@ protected override IMethodNode CreateMethodEntrypointNode(MethodDesc method)
                         return methodNode;
                     }
 
-                    return new RuntimeImportMethodNode(method);
+                    return new RuntimeImportMethodNode(method, NameMangler);
                 }
             }
             if (CompilationModuleGroup.ContainsMethodBody(method, false))
diff --git a/src/coreclr/tools/aot/ILCompiler.LLVM/Compiler/DependencyAnalysis/LLVMMethodCodeNode.cs b/src/coreclr/tools/aot/ILCompiler.LLVM/Compiler/DependencyAnalysis/LLVMMethodCodeNode.cs
index c542ff54a973..a58fc20b04cb 100644
--- a/src/coreclr/tools/aot/ILCompiler.LLVM/Compiler/DependencyAnalysis/LLVMMethodCodeNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.LLVM/Compiler/DependencyAnalysis/LLVMMethodCodeNode.cs
@@ -38,7 +38,7 @@ public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
         public int Offset => 0;
         public bool RepresentsIndirectionCell => false;
 
-        public override bool InterestingForDynamicDependencyAnalysis => false;
+        public override bool InterestingForDynamicDependencyAnalysis => _method.HasInstantiation || _method.OwningType.HasInstantiation;
         public override bool HasDynamicDependencies => false;
         public override IEnumerable<CombinedDependencyListEntry> SearchDynamicDependencies(List<DependencyNodeCore<NodeFactory>> markedNodes, int firstNode, NodeFactory factory) => null;
 
diff --git a/src/coreclr/tools/aot/ILCompiler.LLVM/Compiler/LLVMNodeMangler.cs b/src/coreclr/tools/aot/ILCompiler.LLVM/Compiler/LLVMNodeMangler.cs
index ba9dc4b9db96..75c50d747d24 100644
--- a/src/coreclr/tools/aot/ILCompiler.LLVM/Compiler/LLVMNodeMangler.cs
+++ b/src/coreclr/tools/aot/ILCompiler.LLVM/Compiler/LLVMNodeMangler.cs
@@ -52,5 +52,9 @@ public sealed override string MethodGenericDictionary(MethodDesc method)
         {
             return GenericDictionaryNamePrefix + NameMangler.GetMangledMethodName(method);
         }
+
+        public override string ExternMethod(string unmangledName, MethodDesc method) => unmangledName;
+
+        public override string ExternVariable(string unmangledName) => unmangledName;
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/IMetadataPolicy.cs b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/IMetadataPolicy.cs
index 3aa61454e57f..408a3be6ded5 100644
--- a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/IMetadataPolicy.cs
+++ b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/IMetadataPolicy.cs
@@ -48,6 +48,11 @@ public interface IMetadataPolicy
         /// </summary>
         bool GeneratesMetadata(Cts.Ecma.EcmaModule module, Ecma.ExportedTypeHandle exportedType);
 
+        /// <summary>
+        /// Returns true if InterfaceImpl should be generated for this type.
+        /// </summary>
+        bool GeneratesInterfaceImpl(Cts.MetadataType typeDef, Cts.MetadataType interfaceImpl);
+
         /// <summary>
         /// Returns true if a type should be blocked from generating any metadata.
         /// Blocked interfaces are skipped from interface lists, and custom attributes referring to
diff --git a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/Transform.Type.cs b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/Transform.Type.cs
index fc3ea4922c81..4ab76e79005d 100644
--- a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/Transform.Type.cs
+++ b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/ILCompiler/Metadata/Transform.Type.cs
@@ -287,6 +287,9 @@ private void InitializeTypeDef(Cts.MetadataType entity, TypeDefinition record)
                 {
                     if (IsBlocked(interfaceType))
                         continue;
+                    if (!_policy.GeneratesInterfaceImpl(entity, (Cts.MetadataType)interfaceType))
+                        continue;
+
                     record.Interfaces.Add(HandleType(interfaceType));
                 }
             }
diff --git a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/Internal/Metadata/NativeFormat/Writer/NativeMetadataWriter.cs b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/Internal/Metadata/NativeFormat/Writer/NativeMetadataWriter.cs
index 53b3af8c5e40..b05c1c6ed3a7 100644
--- a/src/coreclr/tools/aot/ILCompiler.MetadataTransform/Internal/Metadata/NativeFormat/Writer/NativeMetadataWriter.cs
+++ b/src/coreclr/tools/aot/ILCompiler.MetadataTransform/Internal/Metadata/NativeFormat/Writer/NativeMetadataWriter.cs
@@ -373,7 +373,7 @@ public void Write(Stream stream)
 
                     // 3rd, the name, Quote the string if not already quoted
                     string asString = rec.ToString(false);
-                    bool alreadyQuoted = asString.StartsWith("\"") && asString.EndsWith("\"");
+                    bool alreadyQuoted = asString.StartsWith('\"') && asString.EndsWith('\"');
                     if (!alreadyQuoted)
                     {
                         LogWriter.Write("\"");
@@ -875,7 +875,7 @@ public partial class MethodSemantics
     {
         public override string ToString()
         {
-            string str = Enum.GetName(typeof(MethodSemanticsAttributes), Attributes);
+            string str = Enum.GetName(Attributes);
             return str + " : " + Method.ToString();
         }
     }
@@ -903,7 +903,7 @@ public partial class PropertySignature
     {
         public override string ToString()
         {
-            return string.Join(" ", Enum.GetName(typeof(CallingConventions), CallingConvention),
+            return string.Join(" ", Enum.GetName(CallingConvention),
                 Type.ToString()) + "(" + ToString(Parameters) + ")";
         }
     }
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeGenericInfoMapNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeGenericInfoMapNode.cs
index e6b4762433df..8905c2aa31d7 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeGenericInfoMapNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeGenericInfoMapNode.cs
@@ -59,7 +59,7 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false)
                     if ((genericParameter.Attributes & GenericParameterAttributes.VarianceMask) != GenericParameterAttributes.None)
                         hasVariance = true;
 
-                    if ((genericParameter.Attributes & (GenericParameterAttributes.SpecialConstraintMask | (GenericParameterAttributes)GenericConstraints.AcceptByRefLike)) != default(GenericParameterAttributes) ||
+                    if ((genericParameter.Attributes & (GenericParameterAttributes.SpecialConstraintMask | (GenericParameterAttributes)GenericConstraints.AllowByRefLike)) != default(GenericParameterAttributes) ||
                         (genericParameter.GetConstraints().Count > 0))
                     {
                         hasConstraints = true;
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeValidationChecker.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeValidationChecker.cs
index 88882e12f1f5..a257dbbe967f 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeValidationChecker.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeValidationChecker.cs
@@ -107,7 +107,7 @@ Task<bool> ValidateTypeWorkerHelper(TypeDesc typeToCheckForSkipValidation)
             // The runtime has a number of checks in the type loader which it will skip running if the SkipValidation flag is set
             // This function attempts to document all of them, and implement *some* of them.
 
-            // This function performs a portion of the validation skipping that has been found to have some importance, or to serve as 
+            // This function performs a portion of the validation skipping that has been found to have some importance, or to serve as
             // In addition, there are comments about all validation skipping activities that the runtime will perform.
             try
             {
@@ -488,8 +488,9 @@ static bool CompareGenericParameterConstraint(MethodDesc declMethod, GenericPara
                     if (!parameterOfDecl.HasReferenceTypeConstraint)
                         return false;
 
-                if (parameterOfDecl.HasAcceptByRefLikeConstraint)
-                    if (!parameterOfImpl.HasAcceptByRefLikeConstraint)
+                // Constraints that 'allow' must check the impl first
+                if (parameterOfImpl.HasAllowByRefLikeConstraint)
+                    if (!parameterOfDecl.HasAllowByRefLikeConstraint)
                         return false;
 
                 HashSet<TypeDesc> constraintsOnDecl = new HashSet<TypeDesc>();
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IBC/IBCDataReader.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IBC/IBCDataReader.cs
index fffa41f08168..1893a39680c6 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IBC/IBCDataReader.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IBC/IBCDataReader.cs
@@ -39,7 +39,7 @@ uint ReadTokenWithMemory(ref uint lastToken)
                 return reader.ReadUInt32();
             }
 
-            uint current = reader.Read7BitEncodedUInt();
+            uint current = (uint)reader.Read7BitEncodedInt();
             byte highByte = (byte)(current >> 24);
             uint result;
 
@@ -116,7 +116,7 @@ private int ReadSmallInt()
 
         private uint ReadSmallUInt()
         {
-            return minified ? reader.Read7BitEncodedUInt() : reader.ReadUInt32();
+            return minified ? (uint)reader.Read7BitEncodedInt() : reader.ReadUInt32();
         }
         #endregion
 
@@ -327,7 +327,7 @@ IBC.MethodData ReadMethod()
 
             if (minified)
             {
-                uint firstBlockHitCount = reader.Read7BitEncodedUInt();
+                uint firstBlockHitCount = (uint)reader.Read7BitEncodedInt();
                 result.BasicBlocks.Add(new IBC.BasicBlockData { ILOffset = 0, ExecutionCount = firstBlockHitCount });
             }
 
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IBC/MIbcProfileParser.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IBC/MIbcProfileParser.cs
index 28b327ace002..dca94d90a3fe 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IBC/MIbcProfileParser.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IBC/MIbcProfileParser.cs
@@ -353,7 +353,7 @@ private enum MibcGroupParseState
         ///
         /// This format is designed to be extensible to hold more data as we add new per method profile data without breaking existing parsers.
         /// </summary>
-        private static IEnumerable<MethodProfileData> ReadMIbcGroup(EcmaMethod method)
+        private static List<MethodProfileData> ReadMIbcGroup(EcmaMethod method)
         {
             EcmaMethodIL ilBody = EcmaMethodIL.Create(method);
             MetadataLoaderForPgoData metadataLoader = new MetadataLoaderForPgoData(ilBody);
@@ -369,6 +369,7 @@ private static IEnumerable<MethodProfileData> ReadMIbcGroup(EcmaMethod method)
             Dictionary<MethodDesc, int> weights = null;
             List<long> instrumentationDataLongs = null;
             PgoSchemaElem[] pgoSchemaData = null;
+            var methodProfileData = new List<MethodProfileData>();
 
             while (ilReader.HasNext)
             {
@@ -552,8 +553,7 @@ private static IEnumerable<MethodProfileData> ReadMIbcGroup(EcmaMethod method)
                             if (methodInProgress != null)
                             {
                                 // If the method being loaded didn't have meaningful input, skip
-                                MethodProfileData mibcData = new MethodProfileData((MethodDesc)methodInProgress, MethodProfilingDataFlags.ReadMethodCode, exclusiveWeight, weights, 0xFFFFFFFF, pgoSchemaData);
-                                yield return mibcData;
+                                methodProfileData.Add(new MethodProfileData((MethodDesc)methodInProgress, MethodProfilingDataFlags.ReadMethodCode, exclusiveWeight, weights, 0xFFFFFFFF, pgoSchemaData));
                             }
                             state = MibcGroupParseState.LookingForNextMethod;
                             exclusiveWeight = 0;
@@ -608,6 +608,8 @@ private static IEnumerable<MethodProfileData> ReadMIbcGroup(EcmaMethod method)
                     }
                 }
             }
+
+            return methodProfileData;
         }
 
         /// <summary>
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IBC/ReaderExtensions.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IBC/ReaderExtensions.cs
index 711fbf9906a2..86639b89e72d 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IBC/ReaderExtensions.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/IBC/ReaderExtensions.cs
@@ -39,36 +39,6 @@ public static string ReadEncodedString(this BinaryReader reader, int length)
 
             return new string(characters, 0, length - 1);
         }
-
-        // BinaryReader.Read7BitEncodedInt is protected internal, so its
-        // implementation is duplicated here.
-        public static int Read7BitEncodedInt(this BinaryReader reader)
-        {
-            int result = 0;
-            int shift = 0;
-            byte current;
-            do
-            {
-                current = reader.ReadByte();
-
-                result |= (current & 0x7f) << shift;
-
-                shift += 7;
-            }
-            while ((shift <= 28) && ((current & 0x80) != 0));
-
-            if ((current & 0x80) != 0)
-            {
-                throw new InvalidDataException("Improperly encoded integer");
-            }
-
-            return result;
-        }
-
-        public static uint Read7BitEncodedUInt(this BinaryReader reader)
-        {
-            return (uint)reader.Read7BitEncodedInt();
-        }
     }
 
     public static class WriterExtensions
@@ -92,23 +62,5 @@ public static void WriteEncodedString(this BinaryWriter writer, string s)
 
             writer.Write((short)'\0');
         }
-
-        // BinaryWriter.Write7BitEncodedInt is protected internal, so its
-        // implementation is duplicated here.
-        public static void Write7BitEncodedInt(this BinaryWriter writer, uint i)
-        {
-            while (i > 0x7f)
-            {
-                writer.Write((byte)(i | 0x80));
-                i >>= 7;
-            }
-
-            writer.Write((byte)i);
-        }
-
-        public static void Write7BitEncodedInt(this BinaryWriter writer, int i)
-        {
-            writer.Write7BitEncodedInt((uint)i);
-        }
     }
 }
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj
index 9db169fb6dae..04fb0838bee7 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj
@@ -16,7 +16,7 @@
     <GenerateDependencyFile>false</GenerateDependencyFile>
     <Configurations>Debug;Release;Checked</Configurations>
     <RunAnalyzers>false</RunAnalyzers>
-    <NoWarn Condition="'$(DotNetBuildFromSource)' == 'true'">$(NoWarn);CS8524</NoWarn>
+    <NoWarn Condition="'$(DotNetBuildSourceOnly)' == 'true'">$(NoWarn);CS8524</NoWarn>
   </PropertyGroup>
 
   <ItemGroup>
@@ -24,7 +24,7 @@
     <ProjectReference Include="..\ILCompiler.Diagnostics\ILCompiler.Diagnostics.csproj" />
     <ProjectReference Include="..\ILCompiler.TypeSystem\ILCompiler.TypeSystem.csproj" />
 
-    <PackageReference Condition="'$(DotNetBuildFromSource)' != 'true'" Include="StaticCs" Version="$(StaticCsVersion)">
+    <PackageReference Condition="'$(DotNetBuildSourceOnly)' != 'true'" Include="StaticCs" Version="$(StaticCsVersion)">
       <PrivateAssets>all</PrivateAssets>
       <ExcludeAssets>contentfiles</ExcludeAssets> <!-- We include our own copy of the ClosedAttribute to work in source build -->
     </PackageReference>
@@ -33,9 +33,6 @@
   <Import Project="..\..\Common\Compiler\GenericCycleDetection\GenericCycleDetection.projitems" />
 
   <ItemGroup>
-    <Compile Include="..\..\Common\Compiler\ImpliedRepeatedFieldDesc.cs" Link="Compiler\ImpliedRepeatedFieldDesc.cs" />
-    <Compile Include="..\..\Common\Compiler\TypeWithRepeatedFields.cs" Link="Compiler\TypeWithRepeatedFields.cs" />
-    <Compile Include="..\..\Common\Compiler\TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs" Link="Compiler\TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs" />
     <Compile Include="..\..\Common\Internal\Runtime\CorConstants.cs" Link="Common\CorConstants.cs" />
     <Compile Include="..\..\Common\Internal\Runtime\ReadyToRunConstants.cs" Link="Common\ReadyToRunConstants.cs" />
     <Compile Include="..\..\Common\Internal\Runtime\ReadyToRunInstructionSet.cs" Link="Common\ReadyToRunInstructionSet.cs" />
@@ -138,6 +135,7 @@
     <Compile Include="..\..\Common\JitInterface\SystemVStructClassificator.cs" Link="JitInterface\SystemVStructClassificator.cs" />
     <Compile Include="..\..\Common\JitInterface\LoongArch64PassStructInRegister.cs" Link="JitInterface\LoongArch64PassStructInRegister.cs" />
     <Compile Include="..\..\Common\JitInterface\RISCV64PassStructInRegister.cs" Link="JitInterface\RISCV64PassStructInRegister.cs" />
+    <Compile Include="..\..\Common\JitInterface\SwiftPhysicalLowering.cs" Link="JitInterface\SwiftPhysicalLowering.cs" />
     <Compile Include="..\..\Common\TypeSystem\Interop\InteropTypes.cs" Link="Interop\InteropTypes.cs" />
     <Compile Include="..\..\Common\TypeSystem\Interop\UnmanagedCallingConventions.cs" Link="Interop\UnmanagedCallingConventions.cs" />
     <Compile Include="..\ILCompiler.Reflection.ReadyToRun\PEReaderExtensions.cs" Link="Reflection\PEReaderExtensions.cs" />
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
index f7527c96dd9d..e1b1b359c087 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
@@ -456,7 +456,7 @@ unsafe partial class CorInfoImpl
     {
         private const CORINFO_RUNTIME_ABI TargetABI = CORINFO_RUNTIME_ABI.CORINFO_CORECLR_ABI;
 
-        private uint OffsetOfDelegateFirstTarget => (uint)(3 * PointerSize); // Delegate::m_functionPointer
+        private uint OffsetOfDelegateFirstTarget => (uint)(3 * PointerSize); // Delegate._methodPtr
 
         private readonly ReadyToRunCodegenCompilation _compilation;
         private MethodWithGCInfo _methodCodeNode;
@@ -827,7 +827,7 @@ public void CompileMethod(MethodWithGCInfo methodCodeNodeNeedingCode, Logger log
             }
         }
 
-        private bool getReadyToRunHelper(ref CORINFO_RESOLVED_TOKEN pResolvedToken, ref CORINFO_LOOKUP_KIND pGenericLookupKind, CorInfoHelpFunc id, ref CORINFO_CONST_LOOKUP pLookup)
+        private bool getReadyToRunHelper(ref CORINFO_RESOLVED_TOKEN pResolvedToken, ref CORINFO_LOOKUP_KIND pGenericLookupKind, CorInfoHelpFunc id, CORINFO_METHOD_STRUCT_* callerHandle, ref CORINFO_CONST_LOOKUP pLookup)
         {
             switch (id)
             {
@@ -913,7 +913,7 @@ private bool getReadyToRunHelper(ref CORINFO_RESOLVED_TOKEN pResolvedToken, ref
                             helperArg = new FieldWithToken(fieldDesc, HandleToModuleToken(ref pResolvedToken));
                         }
 
-                        GenericContext methodContext = new GenericContext(entityFromContext(pResolvedToken.tokenContext));
+                        var methodContext = new GenericContext(HandleToObject(callerHandle));
                         ISymbolNode helper = _compilation.SymbolNodeFactory.GenericLookupHelper(
                             pGenericLookupKind.runtimeLookupKind,
                             helperId,
@@ -928,7 +928,7 @@ private bool getReadyToRunHelper(ref CORINFO_RESOLVED_TOKEN pResolvedToken, ref
             return true;
         }
 
-        private void getReadyToRunDelegateCtorHelper(ref CORINFO_RESOLVED_TOKEN pTargetMethod, mdToken targetConstraint, CORINFO_CLASS_STRUCT_* delegateType, ref CORINFO_LOOKUP pLookup)
+        private void getReadyToRunDelegateCtorHelper(ref CORINFO_RESOLVED_TOKEN pTargetMethod, mdToken targetConstraint, CORINFO_CLASS_STRUCT_* delegateType, CORINFO_METHOD_STRUCT_* callerHandle, ref CORINFO_LOOKUP pLookup)
         {
 #if DEBUG
             // In debug, write some bogus data to the struct to ensure we have filled everything
@@ -958,6 +958,7 @@ private void getReadyToRunDelegateCtorHelper(ref CORINFO_RESOLVED_TOKEN pTargetM
                 unboxing: false,
                 context: typeOrMethodContext);
 
+            // runtime lookup is not needed, callerHandle is unused
             pLookup.lookupKind.needsRuntimeLookup = false;
             pLookup.constLookup = CreateConstLookupToSymbol(_compilation.SymbolNodeFactory.DelegateCtor(delegateTypeDesc, targetMethod));
         }
@@ -1028,9 +1029,15 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum)
                 case CorInfoHelpFunc.CORINFO_HELP_MEMSET:
                     id = ReadyToRunHelper.MemSet;
                     break;
+                case CorInfoHelpFunc.CORINFO_HELP_MEMZERO:
+                    id = ReadyToRunHelper.MemZero;
+                    break;
                 case CorInfoHelpFunc.CORINFO_HELP_MEMCPY:
                     id = ReadyToRunHelper.MemCpy;
                     break;
+                case CorInfoHelpFunc.CORINFO_HELP_NATIVE_MEMSET:
+                    id = ReadyToRunHelper.NativeMemSet;
+                    break;
 
                 case CorInfoHelpFunc.CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD:
                     id = ReadyToRunHelper.GetRuntimeMethodHandle;
@@ -2179,7 +2186,7 @@ private void ceeInfoGetCallInfo(
                             useInstantiatingStub = true;
                         }
 
-                        ComputeRuntimeLookupForSharedGenericToken(entryKind, ref pResolvedToken, pConstrainedResolvedToken, originalMethod, ref pResult->codePointerOrStubLookup);
+                        ComputeRuntimeLookupForSharedGenericToken(entryKind, ref pResolvedToken, pConstrainedResolvedToken, originalMethod, HandleToObject(callerHandle), ref pResult->codePointerOrStubLookup);
                     }
                 }
                 else
@@ -2269,7 +2276,7 @@ private void ceeInfoGetCallInfo(
 
                 if (pResult->exactContextNeedsRuntimeLookup)
                 {
-                    ComputeRuntimeLookupForSharedGenericToken(DictionaryEntryKind.DispatchStubAddrSlot, ref pResolvedToken, null, originalMethod, ref pResult->codePointerOrStubLookup);
+                    ComputeRuntimeLookupForSharedGenericToken(DictionaryEntryKind.DispatchStubAddrSlot, ref pResolvedToken, null, originalMethod, HandleToObject(callerHandle), ref pResult->codePointerOrStubLookup);
                 }
                 else
                 {
@@ -2552,8 +2559,11 @@ private void ComputeRuntimeLookupForSharedGenericToken(
             ref CORINFO_RESOLVED_TOKEN pResolvedToken,
             CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken,
             MethodDesc templateMethod,
+            MethodDesc callerHandle,
             ref CORINFO_LOOKUP pResultLookup)
         {
+            Debug.Assert(callerHandle != null);
+
             pResultLookup.lookupKind.needsRuntimeLookup = true;
             pResultLookup.lookupKind.runtimeLookupFlags = 0;
 
@@ -2567,15 +2577,7 @@ private void ComputeRuntimeLookupForSharedGenericToken(
             pResult.indirections = CORINFO.USEHELPER;
             pResult.sizeOffset = CORINFO.CORINFO_NO_SIZE_CHECK;
 
-            // Runtime lookups in inlined contexts are not supported by the runtime for now
-            if (pResolvedToken.tokenContext != contextFromMethodBeingCompiled())
-            {
-                pResultLookup.lookupKind.runtimeLookupKind = CORINFO_RUNTIME_LOOKUP_KIND.CORINFO_LOOKUP_NOT_SUPPORTED;
-                return;
-            }
-
-            MethodDesc contextMethod = methodFromContext(pResolvedToken.tokenContext);
-            TypeDesc contextType = typeFromContext(pResolvedToken.tokenContext);
+            MethodDesc contextMethod = callerHandle;
 
             // There is a pathological case where invalid IL refereces __Canon type directly, but there is no dictionary availabled to store the lookup.
             if (!contextMethod.IsSharedByGenericInstantiations)
@@ -2637,7 +2639,7 @@ private void ComputeRuntimeLookupForSharedGenericToken(
             // different way that is more version resilient... plus we can't have pointers to existing MTs/MDs in the sigs)
         }
 
-        private void ceeInfoEmbedGenericHandle(ref CORINFO_RESOLVED_TOKEN pResolvedToken, bool fEmbedParent, ref CORINFO_GENERICHANDLE_RESULT pResult)
+        private void ceeInfoEmbedGenericHandle(ref CORINFO_RESOLVED_TOKEN pResolvedToken, bool fEmbedParent, CORINFO_METHOD_STRUCT_* callerHandle, ref CORINFO_GENERICHANDLE_RESULT pResult)
         {
 #if DEBUG
             // In debug, write some bogus data to the struct to ensure we have filled everything
@@ -2721,7 +2723,7 @@ private void ceeInfoEmbedGenericHandle(ref CORINFO_RESOLVED_TOKEN pResolvedToken
                         throw new NotImplementedException(pResult.handleType.ToString());
                 }
 
-                ComputeRuntimeLookupForSharedGenericToken(entryKind, ref pResolvedToken, pConstrainedResolvedToken: null, templateMethod, ref pResult.lookup);
+                ComputeRuntimeLookupForSharedGenericToken(entryKind, ref pResolvedToken, pConstrainedResolvedToken: null, templateMethod, HandleToObject(callerHandle), ref pResult.lookup);
             }
             else
             {
@@ -2746,9 +2748,9 @@ private void ceeInfoEmbedGenericHandle(ref CORINFO_RESOLVED_TOKEN pResolvedToken
             return null;
         }
 
-        private void embedGenericHandle(ref CORINFO_RESOLVED_TOKEN pResolvedToken, bool fEmbedParent, ref CORINFO_GENERICHANDLE_RESULT pResult)
+        private void embedGenericHandle(ref CORINFO_RESOLVED_TOKEN pResolvedToken, bool fEmbedParent, CORINFO_METHOD_STRUCT_* callerHandle, ref CORINFO_GENERICHANDLE_RESULT pResult)
         {
-            ceeInfoEmbedGenericHandle(ref pResolvedToken, fEmbedParent, ref pResult);
+            ceeInfoEmbedGenericHandle(ref pResolvedToken, fEmbedParent, callerHandle, ref pResult);
 
             Debug.Assert(pResult.compileTimeHandle != null);
 
@@ -2951,7 +2953,7 @@ private void getGSCookie(IntPtr* pCookieVal, IntPtr** ppCookieVal)
 
         private void getMethodVTableOffset(CORINFO_METHOD_STRUCT_* method, ref uint offsetOfIndirection, ref uint offsetAfterIndirection, ref bool isRelative)
         { throw new NotImplementedException("getMethodVTableOffset"); }
-        private void expandRawHandleIntrinsic(ref CORINFO_RESOLVED_TOKEN pResolvedToken, ref CORINFO_GENERICHANDLE_RESULT pResult)
+        private void expandRawHandleIntrinsic(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_METHOD_STRUCT_* callerHandle, ref CORINFO_GENERICHANDLE_RESULT pResult)
         { throw new NotImplementedException("expandRawHandleIntrinsic"); }
 
         private void* getMethodSync(CORINFO_METHOD_STRUCT_* ftn, ref void* ppIndirection)
@@ -3278,7 +3280,7 @@ private void updateEntryPointForTailCall(ref CORINFO_CONST_LOOKUP entryPoint)
         private int getExactClasses(CORINFO_CLASS_STRUCT_* baseType, int maxExactClasses, CORINFO_CLASS_STRUCT_** exactClsRet)
         {
             // Not implemented for R2R yet
-            return 0;
+            return -1;
         }
 
         private bool getStaticFieldContent(CORINFO_FIELD_STRUCT_* fieldHandle, byte* buffer, int bufferSize, int valueOffset, bool ignoreMovableObjects)
diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcInfo.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcInfo.cs
index cd578333425b..d01c636ef3b3 100644
--- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcInfo.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcInfo.cs
@@ -267,6 +267,14 @@ public override string ToString()
                 }
                 sb.AppendLine($"    Has Tailcalls: {_wantsReportOnlyLeaf}");
             }
+            else if (_machine == Machine.RiscV64)
+            {
+                if (StackBaseRegister != 0xffffffff)
+                {
+                    sb.AppendLine($"    StackBaseRegister: {(RiscV64.Registers)StackBaseRegister}");
+                }
+                sb.AppendLine($"    Has Tailcalls: {_wantsReportOnlyLeaf}");
+            }
 
             sb.AppendLine($"    Size of parameter area: 0x{SizeOfStackOutgoingAndScratchArea:X}");
             if (SizeOfEditAndContinuePreservedArea != 0xffffffff)
diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcSlotTable.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcSlotTable.cs
index 8355a1a0976d..d5ac7048589c 100644
--- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcSlotTable.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcSlotTable.cs
@@ -71,6 +71,9 @@ private static string GetRegisterName(int registerNumber, Machine machine)
                     case Machine.LoongArch64:
                         return ((LoongArch64.Registers)registerNumber).ToString();
 
+                    case Machine.RiscV64:
+                        return ((RiscV64.Registers)registerNumber).ToString();
+
                     default:
                         throw new NotImplementedException(machine.ToString());
                 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcTransition.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcTransition.cs
index ecea8cc1dfa6..68e4385e1992 100644
--- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcTransition.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/GcTransition.cs
@@ -69,6 +69,10 @@ public string GetSlotState(GcSlotTable slotTable, Machine machine)
                         regType = typeof(LoongArch64.Registers);
                         break;
 
+                    case Machine.RiscV64:
+                        regType = typeof(RiscV64.Registers);
+                        break;
+
                     default:
                         throw new NotImplementedException();
                 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/DebugInfo.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/DebugInfo.cs
index 03110197b587..f570dffdca50 100644
--- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/DebugInfo.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/DebugInfo.cs
@@ -74,6 +74,8 @@ public static string GetPlatformSpecificRegister(Machine machine, int regnum)
                     return ((Arm64.Registers)regnum).ToString();
                 case Machine.LoongArch64:
                     return ((LoongArch64.Registers)regnum).ToString();
+                case Machine.RiscV64:
+                    return ((RiscV64.Registers)regnum).ToString();
                 default:
                     throw new NotImplementedException($"No implementation for machine type {machine}.");
             }
diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunMethod.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunMethod.cs
index 37857ee0876d..790c7142475b 100644
--- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunMethod.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunMethod.cs
@@ -229,6 +229,10 @@ private int GetSize()
             {
                 return (int)loongarch64Info.FunctionLength;
             }
+            else if (UnwindInfo is RiscV64.UnwindInfo riscv64Info)
+            {
+                return (int)riscv64Info.FunctionLength;
+            }
             else if (Method.GcInfo != null)
             {
                 return Method.GcInfo.CodeLength;
@@ -612,6 +616,10 @@ private void ParseRuntimeFunctions(bool partial)
                 {
                     unwindInfo = new LoongArch64.UnwindInfo(_readyToRunReader.Image, unwindOffset);
                 }
+                else if (_readyToRunReader.Machine == Machine.RiscV64)
+                {
+                    unwindInfo = new RiscV64.UnwindInfo(_readyToRunReader.Image, unwindOffset);
+                }
 
                 if (i == 0 && unwindInfo != null)
                 {
diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs
index 8d325f467d60..0eae2f10cb8f 100644
--- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs
@@ -1685,10 +1685,18 @@ private void ParseHelper(StringBuilder builder)
                     builder.Append("MEM_SET");
                     break;
 
+                case ReadyToRunHelper.MemZero:
+                    builder.Append("MEM_ZERO");
+                    break;
+
                 case ReadyToRunHelper.MemCpy:
                     builder.Append("MEM_CPY");
                     break;
 
+                case ReadyToRunHelper.NativeMemSet:
+                    builder.Append("NATIVE_MEM_SET");
+                    break;
+
                 // PInvoke helpers
                 case ReadyToRunHelper.PInvokeBegin:
                     builder.Append("PINVOKE_BEGIN");
diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/RiscV64/Registers.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/RiscV64/Registers.cs
new file mode 100644
index 000000000000..d88ca03071b7
--- /dev/null
+++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/RiscV64/Registers.cs
@@ -0,0 +1,41 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace ILCompiler.Reflection.ReadyToRun.RiscV64
+{
+    public enum Registers
+    {
+        Zero,
+        Ra,
+        Sp,
+        Gp,
+        Tp,
+        T0,
+        T1,
+        T2,
+        Fp,
+        S1,
+        A0,
+        A1,
+        A2,
+        A3,
+        A4,
+        A5,
+        A6,
+        A7,
+        S2,
+        S3,
+        S4,
+        S5,
+        S6,
+        S7,
+        S8,
+        S9,
+        S10,
+        S11,
+        T3,
+        T4,
+        T5,
+        T6,
+    }
+}
diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/RiscV64/UnwindInfo.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/RiscV64/UnwindInfo.cs
new file mode 100644
index 000000000000..34411efe8e2c
--- /dev/null
+++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/RiscV64/UnwindInfo.cs
@@ -0,0 +1,161 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Text;
+
+namespace ILCompiler.Reflection.ReadyToRun.RiscV64
+{
+    public class Epilog
+    {
+        public int Index { get; set; }
+
+        public uint EpilogStartOffset { get; set; }
+        public uint Res { get; set; }
+        public uint Condition { get; set; }
+        public uint EpilogStartIndex { get; set; }
+        public uint EpilogStartOffsetFromMainFunctionBegin { get; set; }
+
+        public Epilog() { }
+
+        public Epilog(int index, int dw, uint startOffset)
+        {
+            Index = index;
+
+            EpilogStartOffset = UnwindInfo.ExtractBits(dw, 0, 18);
+            Res = UnwindInfo.ExtractBits(dw, 18, 4);
+            Condition = UnwindInfo.ExtractBits(dw, 20, 4);
+            EpilogStartIndex = UnwindInfo.ExtractBits(dw, 22, 10);
+
+            // Note that epilogStartOffset for a funclet is the offset from the beginning
+            // of the current funclet, not the offset from the beginning of the main function.
+            // To help find it when looking through JitDump output, also show the offset from
+            // the beginning of the main function.
+            EpilogStartOffsetFromMainFunctionBegin = EpilogStartOffset * 4 + startOffset;
+        }
+
+        public override string ToString()
+        {
+            StringBuilder sb = new StringBuilder();
+            sb.AppendLine($"        Epilog Start Offset: 0x{EpilogStartOffset:X5} Actual offset = 0x{EpilogStartOffset * 4:X5} Offset from main function begin = 0x{EpilogStartOffsetFromMainFunctionBegin:X6}");
+            sb.AppendLine($"        Condition: {Condition} (0x{Condition:X})" + ((Condition == 0xE) ? " (always)" : ""));
+            sb.Append($"        Epilog Start Index: {EpilogStartIndex} (0x{EpilogStartIndex:X})");
+            return sb.ToString();
+        }
+    }
+
+    public class UnwindCode
+    {
+        public int Index { get; set; }
+
+        public UnwindCode() { }
+
+        public UnwindCode(int index)
+        {
+            Index = index;
+
+        }
+    }
+
+    /// <summary>
+    /// based on <a href="https://github.com/dotnet/runtime/src/coreclr/jit/unwindriscv64.cpp">src/jit/unwindriscv64.cpp</a> DumpUnwindInfo
+    /// </summary>
+    public class UnwindInfo : BaseUnwindInfo
+    {
+        public uint CodeWords { get; set; }
+        public uint EpilogCount { get; set; }
+        public uint EBit { get; set; }
+        public uint XBit { get; set; }
+        public uint Vers { get; set; }
+        public uint FunctionLength { get; set; }
+
+        public uint ExtendedCodeWords { get; set; }
+        public uint ExtendedEpilogCount { get; set; }
+
+        public Epilog[] Epilogs { get; set; }
+
+        public UnwindInfo() { }
+
+        public UnwindInfo(byte[] image, int offset)
+        {
+            uint startOffset = (uint)offset;
+
+            int dw = NativeReader.ReadInt32(image, ref offset);
+            CodeWords = ExtractBits(dw, 27, 5);
+            EpilogCount = ExtractBits(dw, 22, 5);
+            EBit = ExtractBits(dw, 21, 1);
+            XBit = ExtractBits(dw, 20, 1);
+            Vers = ExtractBits(dw, 18, 2);
+            FunctionLength = ExtractBits(dw, 0, 18) * 4;
+
+            if (CodeWords == 0 && EpilogCount == 0)
+            {
+                // We have an extension word specifying a larger number of Code Words or Epilog Counts
+                // than can be specified in the header word.
+                dw = NativeReader.ReadInt32(image, ref offset);
+                ExtendedCodeWords = ExtractBits(dw, 16, 8);
+                ExtendedEpilogCount = ExtractBits(dw, 0, 16);
+            }
+
+            bool[] epilogStartAt = new bool[1024]; // One byte per possible epilog start index; initialized to false
+
+            if (EBit == 0)
+            {
+                Epilogs = new Epilog[EpilogCount];
+                if (EpilogCount != 0)
+                {
+                    for (int scope = 0; scope < EpilogCount; scope++)
+                    {
+                        dw = NativeReader.ReadInt32(image, ref offset);
+                        Epilogs[scope] = new Epilog(scope, dw, startOffset);
+                        epilogStartAt[Epilogs[scope].EpilogStartIndex] = true; // an epilog starts at this offset in the unwind codes
+                    }
+                }
+            }
+            else
+            {
+                Epilogs = new Epilog[0];
+                epilogStartAt[EpilogCount] = true; // the one and only epilog starts its unwind codes at this offset
+            }
+
+            Size = offset - (int)startOffset + (int)CodeWords * 4;
+            int alignmentPad = ((Size + sizeof(int) - 1) & ~(sizeof(int) - 1)) - Size;
+            Size += (alignmentPad + sizeof(uint));
+        }
+
+        public override string ToString()
+        {
+            StringBuilder sb = new StringBuilder();
+            sb.AppendLine($"    CodeWords: {CodeWords}");
+            sb.AppendLine($"    EpilogCount: {EpilogCount}");
+            sb.AppendLine($"    EBit: {EBit}");
+            sb.AppendLine($"    XBit: {XBit}");
+            sb.AppendLine($"    Vers: {Vers}");
+            sb.AppendLine($"    FunctionLength: {FunctionLength}");
+            if (CodeWords == 0 && EpilogCount == 0)
+            {
+                sb.AppendLine("    ---- Extension word ----");
+                sb.AppendLine($"    Extended Code Words: {CodeWords}");
+                sb.AppendLine($"    Extended Epilog Count: {EpilogCount}");
+            }
+            if (EpilogCount == 0)
+            {
+                sb.AppendLine("    No epilogs");
+            }
+            else
+            {
+                for (int i = 0; i < Epilogs.Length; i++)
+                {
+                    sb.AppendLine("        -------------------------");
+                    sb.AppendLine(Epilogs[i].ToString());
+                    sb.AppendLine("        -------------------------");
+                }
+            }
+            return sb.ToString();
+        }
+
+        internal static uint ExtractBits(int dw, int start, int length)
+        {
+            return (uint)((dw >> start) & ((1 << length) - 1));
+        }
+    }
+}
diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs
index ce717b1428c1..f8a4d654b313 100644
--- a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs
+++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/MethodCodeNode.cs
@@ -77,6 +77,8 @@ public override ObjectNodeSection GetSection(NodeFactory factory)
 
         public override bool StaticDependenciesAreComputed => _methodCode != null;
 
+        public override bool InterestingForDynamicDependencyAnalysis => _method.HasInstantiation || _method.OwningType.HasInstantiation;
+
         public virtual void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
         {
             sb.Append(nameMangler.GetMangledMethodName(_method));
diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/RyuJitNodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/RyuJitNodeFactory.cs
index beed49149f6d..f304b920fddc 100644
--- a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/RyuJitNodeFactory.cs
+++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/DependencyAnalysis/RyuJitNodeFactory.cs
@@ -34,7 +34,7 @@ protected override IMethodNode CreateMethodEntrypointNode(MethodDesc method)
                 }
                 else if (method.HasCustomAttribute("System.Runtime", "RuntimeImportAttribute"))
                 {
-                    return new RuntimeImportMethodNode(method);
+                    return new RuntimeImportMethodNode(method, NameMangler);
                 }
             }
 
diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilation.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilation.cs
index 704ecb558d38..9f31fe5bf982 100644
--- a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilation.cs
+++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilation.cs
@@ -72,7 +72,7 @@ public override IEETypeNode NecessaryTypeSymbolIfPossible(TypeDesc type)
             // information proving that it isn't, give RyuJIT the constructed symbol even
             // though we just need the unconstructed one.
             // https://github.com/dotnet/runtimelab/issues/1128
-            bool canPotentiallyConstruct = NodeFactory.DevirtualizationManager.CanConstructType(type);
+            bool canPotentiallyConstruct = NodeFactory.DevirtualizationManager.CanReferenceConstructedMethodTable(type);
             if (canPotentiallyConstruct)
                 return _nodeFactory.MaximallyConstructableType(type);
 
@@ -81,7 +81,7 @@ public override IEETypeNode NecessaryTypeSymbolIfPossible(TypeDesc type)
 
         public FrozenRuntimeTypeNode NecessaryRuntimeTypeIfPossible(TypeDesc type)
         {
-            bool canPotentiallyConstruct = NodeFactory.DevirtualizationManager.CanConstructType(type);
+            bool canPotentiallyConstruct = NodeFactory.DevirtualizationManager.CanReferenceConstructedMethodTable(type);
             if (canPotentiallyConstruct)
                 return _nodeFactory.SerializedMaximallyConstructableRuntimeTypeObject(type);
 
diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilationBuilder.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilationBuilder.cs
index b63acb15ad5a..17fc5a3ca630 100644
--- a/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilationBuilder.cs
+++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/Compiler/RyuJitCompilationBuilder.cs
@@ -28,7 +28,7 @@ protected RyuJitCompilationBuilder(CompilerTypeSystemContext context, Compilatio
 
         public RyuJitCompilationBuilder(CompilerTypeSystemContext context, CompilationModuleGroup group)
             : base(context, group,
-                  new NativeAotNameMangler(context.Target.IsWindows ? (NodeMangler)new WindowsNodeMangler() : (NodeMangler)new UnixNodeMangler()))
+                  new NativeAotNameMangler(context.Target.IsWindows ? (NodeMangler)new WindowsNodeMangler(context.Target) : (NodeMangler)new UnixNodeMangler()))
         {
         }
 
diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/ILCompiler.RyuJit.csproj b/src/coreclr/tools/aot/ILCompiler.RyuJit/ILCompiler.RyuJit.csproj
index 8805d2e37bb7..355e37ee17c8 100644
--- a/src/coreclr/tools/aot/ILCompiler.RyuJit/ILCompiler.RyuJit.csproj
+++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/ILCompiler.RyuJit.csproj
@@ -91,13 +91,15 @@
     <Compile Include="..\..\Common\JitInterface\RISCV64PassStructInRegister.cs">
       <Link>JitInterface\RISCV64PassStructInRegister.cs</Link>
     </Compile>
+    <Compile Include="..\..\Common\JitInterface\SwiftPhysicalLowering.cs">
+      <Link>JitInterface\SwiftPhysicalLowering.cs</Link>
+    </Compile>
     <Compile Include="..\..\Common\Pgo\TypeSystemEntityOrUnknown.cs">
       <Link>Pgo\TypeSystemEntityOrUnknown.cs</Link>
     </Compile>
 
     <Compile Include="..\ILCompiler.ReadyToRun\IBC\IBCDataModel.cs" Link="Pgo\IBCDataModel.cs" />
     <Compile Include="..\ILCompiler.ReadyToRun\IBC\IBCProfileData.cs" Link="Pgo\IBCProfileData.cs" />
-    <Compile Include="..\ILCompiler.ReadyToRun\IBC\ReaderExtensions.cs" Link="Pgo\ReaderExtensions.cs" />
     <Compile Include="..\ILCompiler.ReadyToRun\IBC\MIbcProfileParser.cs" Link="Pgo\MIbcProfileParser.cs" />
 
     <Compile Include="..\ILCompiler.ReadyToRun\Compiler\ProfileData.cs" Link="Pgo\ProfileData.cs" />
diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs
index ffcd5b436662..777fe0f74955 100644
--- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs
+++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs
@@ -28,7 +28,7 @@ internal unsafe partial class CorInfoImpl
     {
         private const CORINFO_RUNTIME_ABI TargetABI = CORINFO_RUNTIME_ABI.CORINFO_NATIVEAOT_ABI;
 
-        private uint OffsetOfDelegateFirstTarget => (uint)(4 * PointerSize); // Delegate::m_functionPointer
+        private uint OffsetOfDelegateFirstTarget => (uint)(4 * PointerSize); // Delegate._functionPointer
         private int SizeOfReversePInvokeTransitionFrame => 2 * PointerSize;
 
         private RyuJitCompilation _compilation;
@@ -239,27 +239,30 @@ private static CORINFO_RUNTIME_LOOKUP_KIND GetLookupKindFromContextSource(Generi
             }
         }
 
-        private void ComputeLookup(ref CORINFO_RESOLVED_TOKEN pResolvedToken, object entity, ReadyToRunHelperId helperId, ref CORINFO_LOOKUP lookup)
+        private void ComputeLookup(ref CORINFO_RESOLVED_TOKEN pResolvedToken, object entity, ReadyToRunHelperId helperId, MethodDesc callerHandle, ref CORINFO_LOOKUP lookup)
         {
+            Debug.Assert(callerHandle != null);
+
             if (_compilation.NeedsRuntimeLookup(helperId, entity))
             {
                 lookup.lookupKind.needsRuntimeLookup = true;
                 lookup.runtimeLookup.signature = null;
 
-                // Do not bother computing the runtime lookup if we are inlining. The JIT is going
-                // to abort the inlining attempt anyway.
-                if (pResolvedToken.tokenContext != contextFromMethodBeingCompiled())
-                {
-                    lookup.lookupKind.runtimeLookupKind = CORINFO_RUNTIME_LOOKUP_KIND.CORINFO_LOOKUP_NOT_SUPPORTED;
-                    return;
-                }
-
-                MethodDesc contextMethod = methodFromContext(pResolvedToken.tokenContext);
-
-                GenericDictionaryLookup genericLookup = _compilation.ComputeGenericLookup(contextMethod, helperId, entity);
+                GenericDictionaryLookup genericLookup = _compilation.ComputeGenericLookup(callerHandle, helperId, entity);
 
                 if (genericLookup.UseHelper)
                 {
+                    // If this is from a different context and we need a ReadyToRun helper, abort.
+                    // The ReadyToRun helpers need to be able to declare the dependencies and we can't
+                    // currently do it for an inline. This is not a big issue because ReadyToRun helpers
+                    // in optimized code only happen in special build configurations (such as
+                    // `-O --noscan` or multimodule build).
+                    if (pResolvedToken.tokenContext != contextFromMethodBeingCompiled())
+                    {
+                        lookup.lookupKind.runtimeLookupKind = CORINFO_RUNTIME_LOOKUP_KIND.CORINFO_LOOKUP_NOT_SUPPORTED;
+                        return;
+                    }
+
                     lookup.runtimeLookup.indirections = CORINFO.USEHELPER;
                     lookup.lookupKind.runtimeLookupFlags = (ushort)genericLookup.HelperId;
                     lookup.lookupKind.runtimeLookupArgs = (void*)ObjectToHandle(genericLookup.HelperObject);
@@ -303,7 +306,7 @@ private void ComputeLookup(ref CORINFO_RESOLVED_TOKEN pResolvedToken, object ent
             }
         }
 
-        private bool getReadyToRunHelper(ref CORINFO_RESOLVED_TOKEN pResolvedToken, ref CORINFO_LOOKUP_KIND pGenericLookupKind, CorInfoHelpFunc id, ref CORINFO_CONST_LOOKUP pLookup)
+        private bool getReadyToRunHelper(ref CORINFO_RESOLVED_TOKEN pResolvedToken, ref CORINFO_LOOKUP_KIND pGenericLookupKind, CorInfoHelpFunc id, CORINFO_METHOD_STRUCT_* callerHandle, ref CORINFO_CONST_LOOKUP pLookup)
         {
             switch (id)
             {
@@ -330,11 +333,11 @@ private bool getReadyToRunHelper(ref CORINFO_RESOLVED_TOKEN pResolvedToken, ref
                         Debug.Assert(pResolvedToken.token == 0 && pResolvedToken.tokenScope == null);
                         Debug.Assert(pGenericLookupKind.needsRuntimeLookup);
 
-                        DefType typeToInitialize = (DefType)MethodBeingCompiled.OwningType;
+                        DefType typeToInitialize = (DefType)HandleToObject(callerHandle).OwningType;
                         Debug.Assert(typeToInitialize.IsCanonicalSubtype(CanonicalFormKind.Any));
 
                         DefType helperArg = typeToInitialize.ConvertToSharedRuntimeDeterminedForm();
-                        ISymbolNode helper = GetGenericLookupHelper(pGenericLookupKind.runtimeLookupKind, ReadyToRunHelperId.GetNonGCStaticBase, helperArg);
+                        ISymbolNode helper = GetGenericLookupHelper(pGenericLookupKind.runtimeLookupKind, ReadyToRunHelperId.GetNonGCStaticBase, HandleToObject(callerHandle), helperArg);
                         pLookup = CreateConstLookupToSymbol(helper);
                     }
                     break;
@@ -344,7 +347,7 @@ private bool getReadyToRunHelper(ref CORINFO_RESOLVED_TOKEN pResolvedToken, ref
 
                         ReadyToRunHelperId helperId = (ReadyToRunHelperId)pGenericLookupKind.runtimeLookupFlags;
                         object helperArg = HandleToObject(pGenericLookupKind.runtimeLookupArgs);
-                        ISymbolNode helper = GetGenericLookupHelper(pGenericLookupKind.runtimeLookupKind, helperId, helperArg);
+                        ISymbolNode helper = GetGenericLookupHelper(pGenericLookupKind.runtimeLookupKind, helperId, HandleToObject(callerHandle), helperArg);
                         pLookup = CreateConstLookupToSymbol(helper);
                     }
                     break;
@@ -354,7 +357,7 @@ private bool getReadyToRunHelper(ref CORINFO_RESOLVED_TOKEN pResolvedToken, ref
             return true;
         }
 
-        private void getReadyToRunDelegateCtorHelper(ref CORINFO_RESOLVED_TOKEN pTargetMethod, mdToken targetConstraint, CORINFO_CLASS_STRUCT_* delegateType, ref CORINFO_LOOKUP pLookup)
+        private void getReadyToRunDelegateCtorHelper(ref CORINFO_RESOLVED_TOKEN pTargetMethod, mdToken targetConstraint, CORINFO_CLASS_STRUCT_* delegateType, CORINFO_METHOD_STRUCT_* callerHandle, ref CORINFO_LOOKUP pLookup)
         {
 #if DEBUG
             // In debug, write some bogus data to the struct to ensure we have filled everything
@@ -433,11 +436,7 @@ private void getReadyToRunDelegateCtorHelper(ref CORINFO_RESOLVED_TOKEN pTargetM
             {
                 pLookup.lookupKind.needsRuntimeLookup = true;
 
-                MethodDesc contextMethod = methodFromContext(pTargetMethod.tokenContext);
-
-                // We should not be inlining these. RyuJIT should have aborted inlining already.
-                Debug.Assert(contextMethod == MethodBeingCompiled);
-
+                MethodDesc contextMethod = HandleToObject(callerHandle);
                 pLookup.lookupKind.runtimeLookupKind = GetGenericRuntimeLookupKind(contextMethod);
                 pLookup.lookupKind.runtimeLookupFlags = (ushort)ReadyToRunHelperId.DelegateCtor;
                 pLookup.lookupKind.runtimeLookupArgs = (void*)ObjectToHandle(delegateInfo);
@@ -546,9 +545,15 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum)
                 case CorInfoHelpFunc.CORINFO_HELP_MEMSET:
                     id = ReadyToRunHelper.MemSet;
                     break;
+                case CorInfoHelpFunc.CORINFO_HELP_MEMZERO:
+                    id = ReadyToRunHelper.MemZero;
+                    break;
                 case CorInfoHelpFunc.CORINFO_HELP_MEMCPY:
                     id = ReadyToRunHelper.MemCpy;
                     break;
+                case CorInfoHelpFunc.CORINFO_HELP_NATIVE_MEMSET:
+                    id = ReadyToRunHelper.NativeMemSet;
+                    break;
 
                 case CorInfoHelpFunc.CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE:
                     id = ReadyToRunHelper.GetRuntimeType;
@@ -989,9 +994,6 @@ private ObjectNode.ObjectData EncodeEHInfo()
                                 RelocType.IMAGE_REL_BASED_ABSOLUTE :
                                 RelocType.IMAGE_REL_BASED_RELPTR32;
 
-                            if (_compilation.NodeFactory.Target.Abi == TargetAbi.Jit)
-                                rel = RelocType.IMAGE_REL_BASED_REL32;
-
                             builder.EmitReloc(typeSymbol, rel);
                         }
                         break;
@@ -1091,16 +1093,16 @@ private void SetDebugInformation(IMethodNode methodCodeNodeNeedingCode, MethodIL
             _debugInfo = _compilation.GetDebugInfo(methodIL);
         }
 
-        private ISymbolNode GetGenericLookupHelper(CORINFO_RUNTIME_LOOKUP_KIND runtimeLookupKind, ReadyToRunHelperId helperId, object helperArgument)
+        private ISymbolNode GetGenericLookupHelper(CORINFO_RUNTIME_LOOKUP_KIND runtimeLookupKind, ReadyToRunHelperId helperId, MethodDesc callerHandle, object helperArgument)
         {
             if (runtimeLookupKind == CORINFO_RUNTIME_LOOKUP_KIND.CORINFO_LOOKUP_THISOBJ
                 || runtimeLookupKind == CORINFO_RUNTIME_LOOKUP_KIND.CORINFO_LOOKUP_CLASSPARAM)
             {
-                return _compilation.NodeFactory.ReadyToRunHelperFromTypeLookup(helperId, helperArgument, MethodBeingCompiled.OwningType);
+                return _compilation.NodeFactory.ReadyToRunHelperFromTypeLookup(helperId, helperArgument, callerHandle.OwningType);
             }
 
             Debug.Assert(runtimeLookupKind == CORINFO_RUNTIME_LOOKUP_KIND.CORINFO_LOOKUP_METHODPARAM);
-            return _compilation.NodeFactory.ReadyToRunHelperFromDictionaryLookup(helperId, helperArgument, MethodBeingCompiled);
+            return _compilation.NodeFactory.ReadyToRunHelperFromDictionaryLookup(helperId, helperArgument, callerHandle);
         }
 
         private CorInfoHelpFunc getCastingHelper(ref CORINFO_RESOLVED_TOKEN pResolvedToken, bool fThrowing)
@@ -1452,6 +1454,7 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO
                 ComputeLookup(ref pResolvedToken,
                     targetOfLookup,
                     ReadyToRunHelperId.MethodEntry,
+                    HandleToObject(callerHandle),
                     ref pResult->codePointerOrStubLookup);
 
                 targetIsFatFunctionPointer = true;
@@ -1473,20 +1476,9 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO
                     pResult->codePointerOrStubLookup.lookupKind.needsRuntimeLookup = true;
                     pResult->codePointerOrStubLookup.lookupKind.runtimeLookupFlags = 0;
                     pResult->codePointerOrStubLookup.runtimeLookup.indirections = CORINFO.USEHELPER;
-
-                    // Do not bother computing the runtime lookup if we are inlining. The JIT is going
-                    // to abort the inlining attempt anyway.
-                    if (pResolvedToken.tokenContext == contextFromMethodBeingCompiled())
-                    {
-                        MethodDesc contextMethod = methodFromContext(pResolvedToken.tokenContext);
-                        pResult->codePointerOrStubLookup.lookupKind.runtimeLookupKind = GetGenericRuntimeLookupKind(contextMethod);
-                        pResult->codePointerOrStubLookup.lookupKind.runtimeLookupFlags = (ushort)ReadyToRunHelperId.MethodEntry;
-                        pResult->codePointerOrStubLookup.lookupKind.runtimeLookupArgs = (void*)ObjectToHandle(GetRuntimeDeterminedObjectForToken(ref pResolvedToken));
-                    }
-                    else
-                    {
-                        pResult->codePointerOrStubLookup.lookupKind.runtimeLookupKind = CORINFO_RUNTIME_LOOKUP_KIND.CORINFO_LOOKUP_NOT_SUPPORTED;
-                    }
+                    pResult->codePointerOrStubLookup.lookupKind.runtimeLookupKind = GetGenericRuntimeLookupKind(HandleToObject(callerHandle));
+                    pResult->codePointerOrStubLookup.lookupKind.runtimeLookupFlags = (ushort)ReadyToRunHelperId.MethodEntry;
+                    pResult->codePointerOrStubLookup.lookupKind.runtimeLookupArgs = (void*)ObjectToHandle(GetRuntimeDeterminedObjectForToken(ref pResolvedToken));
                 }
                 else
                 {
@@ -1603,6 +1595,7 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO
                 ComputeLookup(ref pResolvedToken,
                     constrainedCallInfo,
                     constrainedHelperId,
+                    HandleToObject(callerHandle),
                     ref pResult->codePointerOrStubLookup);
 
                 targetIsFatFunctionPointer = true;
@@ -1625,6 +1618,7 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO
                 ComputeLookup(ref pResolvedToken,
                     targetOfLookup,
                     ReadyToRunHelperId.MethodHandle,
+                    HandleToObject(callerHandle),
                     ref pResult->codePointerOrStubLookup);
 
                 // RyuJIT will assert if we report CORINFO_CALLCONV_PARAMTYPE for a result of a ldvirtftn
@@ -1642,6 +1636,7 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO
                     ComputeLookup(ref pResolvedToken,
                         GetRuntimeDeterminedObjectForToken(ref pResolvedToken),
                         ReadyToRunHelperId.VirtualDispatchCell,
+                        HandleToObject(callerHandle),
                         ref pResult->codePointerOrStubLookup);
                     Debug.Assert(pResult->codePointerOrStubLookup.lookupKind.needsRuntimeLookup);
                 }
@@ -1661,31 +1656,14 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO
 
                 pResult->nullInstanceCheck = false;
             }
-            else if ((flags & CORINFO_CALLINFO_FLAGS.CORINFO_CALLINFO_LDFTN) == 0
-                // Canonically-equivalent types have the same vtable layout. Check the canonical form.
-                // We don't want to accidentally ask about Foo<object, __Canon> that may or may not
-                // be available to ask vtable questions about.
-                // This can happen in inlining that the scanner didn't expect.
-                && _compilation.HasFixedSlotVTable(targetMethod.OwningType.ConvertToCanonForm(CanonicalFormKind.Specific)))
+            else if ((flags & CORINFO_CALLINFO_FLAGS.CORINFO_CALLINFO_LDFTN) == 0)
             {
                 pResult->kind = CORINFO_CALL_KIND.CORINFO_VIRTUALCALL_VTABLE;
                 pResult->nullInstanceCheck = true;
             }
             else
             {
-                ReadyToRunHelperId helperId;
-                if ((flags & CORINFO_CALLINFO_FLAGS.CORINFO_CALLINFO_LDFTN) != 0)
-                {
-                    pResult->kind = CORINFO_CALL_KIND.CORINFO_VIRTUALCALL_LDVIRTFTN;
-                    helperId = ReadyToRunHelperId.ResolveVirtualFunction;
-                }
-                else
-                {
-                    // CORINFO_CALL_CODE_POINTER tells the JIT that this is indirect
-                    // call that should not be inlined.
-                    pResult->kind = CORINFO_CALL_KIND.CORINFO_CALL_CODE_POINTER;
-                    helperId = ReadyToRunHelperId.VirtualCall;
-                }
+                pResult->kind = CORINFO_CALL_KIND.CORINFO_VIRTUALCALL_LDVIRTFTN;
 
                 // If this is a non-interface call, we actually don't need a runtime lookup to find the target.
                 // We don't even need to keep track of the runtime-determined method being called because the system ensures
@@ -1696,7 +1674,6 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO
                     // We need JitInterface changes to fully support this.
                     // If this is LDVIRTFTN of an interface method that is part of a verifiable delegate creation sequence,
                     // RyuJIT is not going to use this value.
-                    Debug.Assert(helperId == ReadyToRunHelperId.ResolveVirtualFunction);
                     pResult->exactContextNeedsRuntimeLookup = false;
                     pResult->codePointerOrStubLookup.constLookup = CreateConstLookupToSymbol(_compilation.NodeFactory.ExternSymbol("NYI_LDVIRTFTN"));
                 }
@@ -1712,7 +1689,7 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO
 
                     pResult->codePointerOrStubLookup.constLookup =
                         CreateConstLookupToSymbol(
-                            _compilation.NodeFactory.ReadyToRunHelper(helperId, slotDefiningMethod));
+                            _compilation.NodeFactory.ReadyToRunHelper(ReadyToRunHelperId.ResolveVirtualFunction, slotDefiningMethod));
                 }
 
                 // The current NativeAOT ReadyToRun helpers do not handle null thisptr - ask the JIT to emit explicit null checks
@@ -1759,7 +1736,7 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO
             }
         }
 
-        private void embedGenericHandle(ref CORINFO_RESOLVED_TOKEN pResolvedToken, bool fEmbedParent, ref CORINFO_GENERICHANDLE_RESULT pResult)
+        private void embedGenericHandle(ref CORINFO_RESOLVED_TOKEN pResolvedToken, bool fEmbedParent, CORINFO_METHOD_STRUCT_* callerHandle, ref CORINFO_GENERICHANDLE_RESULT pResult)
         {
 #if DEBUG
             // In debug, write some bogus data to the struct to ensure we have filled everything
@@ -1850,7 +1827,7 @@ private void embedGenericHandle(ref CORINFO_RESOLVED_TOKEN pResolvedToken, bool
 
             Debug.Assert(pResult.compileTimeHandle != null);
 
-            ComputeLookup(ref pResolvedToken, target, helperId, ref pResult.lookup);
+            ComputeLookup(ref pResolvedToken, target, helperId, HandleToObject(callerHandle), ref pResult.lookup);
         }
 
         private CORINFO_METHOD_STRUCT_* embedMethodHandle(CORINFO_METHOD_STRUCT_* handle, ref void* ppIndirection)
@@ -1886,16 +1863,20 @@ private void getMethodVTableOffset(CORINFO_METHOD_STRUCT_* method, ref uint offs
             // Canonically-equivalent types have the same slots, so ask for Foo<__Canon, __Canon>.
             methodDesc = methodDesc.GetCanonMethodTarget(CanonicalFormKind.Specific);
 
-            int slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(_compilation.NodeFactory, methodDesc, methodDesc.OwningType);
+            TypeDesc owningType = methodDesc.OwningType;
+            int slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(_compilation.NodeFactory, methodDesc, owningType);
             if (slot == -1)
             {
                 throw new InvalidOperationException(methodDesc.ToString());
             }
 
+            if (_compilation.NeedsSlotUseTracking(owningType))
+                (_additionalDependencies ??= new ILCompiler.DependencyAnalysisFramework.DependencyNodeCore<NodeFactory>.DependencyList()).Add(_compilation.NodeFactory.VirtualMethodUse(methodDesc), "Virtual method call");
+
             offsetAfterIndirection = (uint)(EETypeNode.GetVTableOffset(pointerSize) + slot * pointerSize);
         }
 
-        private void expandRawHandleIntrinsic(ref CORINFO_RESOLVED_TOKEN pResolvedToken, ref CORINFO_GENERICHANDLE_RESULT pResult)
+        private void expandRawHandleIntrinsic(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_METHOD_STRUCT_* callerHandle, ref CORINFO_GENERICHANDLE_RESULT pResult)
         {
             // Resolved token as a potentially RuntimeDetermined object.
             MethodDesc method = (MethodDesc)GetRuntimeDeterminedObjectForToken(ref pResolvedToken);
@@ -1905,15 +1886,15 @@ private void expandRawHandleIntrinsic(ref CORINFO_RESOLVED_TOKEN pResolvedToken,
             switch (method.Name)
             {
                 case "Of":
-                    ComputeLookup(ref pResolvedToken, method.Instantiation[0], ReadyToRunHelperId.TypeHandle, ref pResult.lookup);
+                    ComputeLookup(ref pResolvedToken, method.Instantiation[0], ReadyToRunHelperId.TypeHandle, HandleToObject(callerHandle), ref pResult.lookup);
                     pResult.handleType = CorInfoGenericHandleType.CORINFO_HANDLETYPE_CLASS;
                     break;
                 case "DefaultConstructorOf":
-                    ComputeLookup(ref pResolvedToken, method.Instantiation[0], ReadyToRunHelperId.DefaultConstructor, ref pResult.lookup);
+                    ComputeLookup(ref pResolvedToken, method.Instantiation[0], ReadyToRunHelperId.DefaultConstructor, HandleToObject(callerHandle), ref pResult.lookup);
                     pResult.handleType = CorInfoGenericHandleType.CORINFO_HANDLETYPE_METHOD;
                     break;
                 case "AllocatorOf":
-                    ComputeLookup(ref pResolvedToken, method.Instantiation[0], ReadyToRunHelperId.ObjectAllocator, ref pResult.lookup);
+                    ComputeLookup(ref pResolvedToken, method.Instantiation[0], ReadyToRunHelperId.ObjectAllocator, HandleToObject(callerHandle), ref pResult.lookup);
                     pResult.handleType = CorInfoGenericHandleType.CORINFO_HANDLETYPE_UNKNOWN;
                     break;
                 default:
@@ -1964,6 +1945,7 @@ private void getAddressOfPInvokeTarget(CORINFO_METHOD_STRUCT_* method, ref CORIN
             MethodDesc md = HandleToObject(method);
 
             string externName = _compilation.PInvokeILProvider.GetDirectCallExternName(md);
+            externName = _compilation.NodeFactory.NameMangler.NodeMangler.ExternMethod(externName, md);
 
             pLookup = CreateConstLookupToSymbol(_compilation.NodeFactory.ExternSymbol(externName));
         }
@@ -1972,7 +1954,7 @@ private void getGSCookie(IntPtr* pCookieVal, IntPtr** ppCookieVal)
         {
             if (ppCookieVal != null)
             {
-                *ppCookieVal = (IntPtr*)ObjectToHandle(_compilation.NodeFactory.ExternSymbol("__security_cookie"));
+                *ppCookieVal = (IntPtr*)ObjectToHandle(_compilation.NodeFactory.ExternVariable("__security_cookie"));
                 *pCookieVal = IntPtr.Zero;
             }
             else
@@ -2117,7 +2099,7 @@ private void updateEntryPointForTailCall(ref CORINFO_CONST_LOOKUP entryPoint)
         private int* getAddrOfCaptureThreadGlobal(ref void* ppIndirection)
         {
             ppIndirection = null;
-            return (int*)ObjectToHandle(_compilation.NodeFactory.ExternSymbol("RhpTrapThreads"));
+            return (int*)ObjectToHandle(_compilation.NodeFactory.ExternVariable("RhpTrapThreads"));
         }
 
         private void getFieldInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_METHOD_STRUCT_* callerHandle, CORINFO_ACCESS_FLAGS flags, CORINFO_FIELD_INFO* pResult)
@@ -2171,46 +2153,41 @@ private void getFieldInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_MET
                     fieldAccessor = CORINFO_FIELD_ACCESSOR.CORINFO_FIELD_STATIC_READYTORUN_HELPER;
                     pResult->helper = CorInfoHelpFunc.CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE;
 
-                    // Don't try to compute the runtime lookup if we're inlining. The JIT is going to abort the inlining
-                    // attempt anyway.
-                    if (pResolvedToken.tokenContext == contextFromMethodBeingCompiled())
-                    {
-                        MethodDesc contextMethod = methodFromContext(pResolvedToken.tokenContext);
+                    MethodDesc contextMethod = HandleToObject(callerHandle);
 
-                        FieldDesc runtimeDeterminedField = (FieldDesc)GetRuntimeDeterminedObjectForToken(ref pResolvedToken);
+                    FieldDesc runtimeDeterminedField = (FieldDesc)GetRuntimeDeterminedObjectForToken(ref pResolvedToken);
 
-                        ReadyToRunHelperId helperId;
+                    ReadyToRunHelperId helperId;
 
-                        // Find out what kind of base do we need to look up.
-                        if (field.IsThreadStatic)
-                        {
-                            helperId = ReadyToRunHelperId.GetThreadStaticBase;
-                        }
-                        else if (field.HasGCStaticBase)
-                        {
-                            helperId = ReadyToRunHelperId.GetGCStaticBase;
-                        }
-                        else
-                        {
-                            helperId = ReadyToRunHelperId.GetNonGCStaticBase;
-                        }
-
-                        // What generic context do we look up the base from.
-                        ISymbolNode helper;
-                        if (contextMethod.AcquiresInstMethodTableFromThis() || contextMethod.RequiresInstMethodTableArg())
-                        {
-                            helper = _compilation.NodeFactory.ReadyToRunHelperFromTypeLookup(
-                                helperId, runtimeDeterminedField.OwningType, contextMethod.OwningType);
-                        }
-                        else
-                        {
-                            Debug.Assert(contextMethod.RequiresInstMethodDescArg());
-                            helper = _compilation.NodeFactory.ReadyToRunHelperFromDictionaryLookup(
-                                helperId, runtimeDeterminedField.OwningType, contextMethod);
-                        }
+                    // Find out what kind of base do we need to look up.
+                    if (field.IsThreadStatic)
+                    {
+                        helperId = ReadyToRunHelperId.GetThreadStaticBase;
+                    }
+                    else if (field.HasGCStaticBase)
+                    {
+                        helperId = ReadyToRunHelperId.GetGCStaticBase;
+                    }
+                    else
+                    {
+                        helperId = ReadyToRunHelperId.GetNonGCStaticBase;
+                    }
 
-                        pResult->fieldLookup = CreateConstLookupToSymbol(helper);
+                    // What generic context do we look up the base from.
+                    ISymbolNode helper;
+                    if (contextMethod.AcquiresInstMethodTableFromThis() || contextMethod.RequiresInstMethodTableArg())
+                    {
+                        helper = _compilation.NodeFactory.ReadyToRunHelperFromTypeLookup(
+                            helperId, runtimeDeterminedField.OwningType, contextMethod.OwningType);
                     }
+                    else
+                    {
+                        Debug.Assert(contextMethod.RequiresInstMethodDescArg());
+                        helper = _compilation.NodeFactory.ReadyToRunHelperFromDictionaryLookup(
+                            helperId, runtimeDeterminedField.OwningType, contextMethod);
+                    }
+
+                    pResult->fieldLookup = CreateConstLookupToSymbol(helper);
                 }
                 else
                 {
@@ -2227,7 +2204,10 @@ private void getFieldInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_MET
                     }
                     else if (field.IsThreadStatic)
                     {
-                        if ((MethodBeingCompiled.Context.Target.IsWindows || MethodBeingCompiled.Context.Target.OperatingSystem == TargetOS.Linux) && MethodBeingCompiled.Context.Target.Architecture == TargetArchitecture.X64)
+                        var target = MethodBeingCompiled.Context.Target;
+                        if ((target.IsWindows && target.Architecture is TargetArchitecture.X64) ||
+                            ((target.OperatingSystem == TargetOS.Linux) &&
+                            (target.Architecture is TargetArchitecture.X64 or TargetArchitecture.ARM64)))
                         {
                             ISortableSymbolNode index = _compilation.NodeFactory.TypeThreadStaticIndex((MetadataType)field.OwningType);
                             if (index is TypeThreadStaticIndexNode ti)
@@ -2301,14 +2281,42 @@ private void getFieldInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_MET
             //       and STS::AccessCheck::CanAccess.
         }
 
+        private bool CanNeverHaveInstanceOfSubclassOf(TypeDesc type)
+        {
+            // Don't try to optimize nullable
+            if (type.IsNullable)
+                return false;
+
+            // We don't track unconstructable types very well and they are rare anyway
+            if (!ConstructedEETypeNode.CreationAllowed(type))
+                return false;
+
+            TypeDesc canonType = type.ConvertToCanonForm(CanonicalFormKind.Specific);
+
+            // If we don't have a constructed MethodTable for the exact type or for its template,
+            // this type or any of its subclasses can never be instantiated.
+            return !_compilation.CanReferenceConstructedTypeOrCanonicalFormOfType(type)
+                && (type == canonType || !_compilation.CanReferenceConstructedMethodTable(canonType));
+        }
+
         private int getExactClasses(CORINFO_CLASS_STRUCT_* baseType, int maxExactClasses, CORINFO_CLASS_STRUCT_** exactClsRet)
         {
             MetadataType type = HandleToObject(baseType) as MetadataType;
             if (type == null)
+            {
+                return -1;
+            }
+
+            if (CanNeverHaveInstanceOfSubclassOf(type))
             {
                 return 0;
             }
 
+            if (maxExactClasses == 0)
+            {
+                return -1;
+            }
+
             // type is already sealed, return it
             if (_compilation.IsEffectivelySealed(type))
             {
@@ -2319,7 +2327,7 @@ private int getExactClasses(CORINFO_CLASS_STRUCT_* baseType, int maxExactClasses
             TypeDesc[] implClasses = _compilation.GetImplementingClasses(type);
             if (implClasses == null || implClasses.Length > maxExactClasses)
             {
-                return 0;
+                return -1;
             }
 
             int index = 0;
@@ -2361,12 +2369,16 @@ private bool getStaticFieldContent(CORINFO_FIELD_STRUCT_* fieldHandle, byte* buf
 
                     if (value == null)
                     {
-                        Debug.Assert(valueOffset == 0);
-                        Debug.Assert(bufferSize == targetPtrSize);
-
-                        // Write "null" to buffer
-                        new Span<byte>(buffer, targetPtrSize).Clear();
-                        return true;
+                        if ((valueOffset == 0) && (bufferSize == targetPtrSize))
+                        {
+                            // Write "null" to buffer
+                            new Span<byte>(buffer, targetPtrSize).Clear();
+                            return true;
+                        }
+                        else
+                        {
+                            return false;
+                        }
                     }
 
                     if (value.GetRawData(_compilation.NodeFactory, out object data))
@@ -2382,13 +2394,14 @@ private bool getStaticFieldContent(CORINFO_FIELD_STRUCT_* fieldHandle, byte* buf
                                 return false;
 
                             case FrozenObjectNode:
-                                Debug.Assert(valueOffset == 0);
-                                Debug.Assert(bufferSize == targetPtrSize);
-
-                                // save handle's value to buffer
-                                nint handle = ObjectToHandle(data);
-                                new Span<byte>(&handle, targetPtrSize).CopyTo(new Span<byte>(buffer, targetPtrSize));
-                                return true;
+                                if ((valueOffset == 0) && (bufferSize == targetPtrSize))
+                                {
+                                    // save handle's value to buffer
+                                    nint handle = ObjectToHandle(data);
+                                    new Span<byte>(&handle, targetPtrSize).CopyTo(new Span<byte>(buffer, targetPtrSize));
+                                    return true;
+                                }
+                                return false;
                         }
                     }
                 }
diff --git a/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/ILCompiler.Trimming.Tests.csproj b/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/ILCompiler.Trimming.Tests.csproj
index 508575ebce3b..935d178a29ef 100644
--- a/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/ILCompiler.Trimming.Tests.csproj
+++ b/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/ILCompiler.Trimming.Tests.csproj
@@ -15,11 +15,8 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="FluentAssertions" Version="$(FluentAssertionsVersion)" />
-
     <PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="$(MicrosoftCodeAnalysisCSharpVersion)" />
     <PackageReference Include="Microsoft.DotNet.Cecil" Version="$(MicrosoftDotNetCecilVersion)" />
-    <PackageReference Include="System.Reflection.MetadataLoadContext" Version="$(SystemReflectionMetadataLoadContextVersion)" />
 
     <ProjectReference Include="$(ToolsProjectRoot)illink/test/Mono.Linker.Tests.Cases/**/*.csproj" />
     <ProjectReference Include="$(ToolsProjectRoot)illink/test/Mono.Linker.Tests.Cases.Expectations/Mono.Linker.Tests.Cases.Expectations.csproj" />
diff --git a/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCases/TestDatabase.cs b/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCases/TestDatabase.cs
index 488a283b043d..f742d409d9b7 100644
--- a/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCases/TestDatabase.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCases/TestDatabase.cs
@@ -69,6 +69,11 @@ public static IEnumerable<object[]> SingleFile ()
 			return TestNamesBySuiteName ();
 		}
 
+		public static IEnumerable<object[]> Substitutions ()
+		{
+			return TestNamesBySuiteName ();
+		}
+
 		public static IEnumerable<object[]> TopLevelStatements ()
 		{
 			return TestNamesBySuiteName ();
diff --git a/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCases/TestSuites.cs b/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCases/TestSuites.cs
index 63751a6233d5..d4156edab60c 100644
--- a/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCases/TestSuites.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCases/TestSuites.cs
@@ -103,6 +103,20 @@ public void SingleFile (string t)
 			Run (t);
 		}
 
+		[Theory]
+		[MemberData (nameof (TestDatabase.Substitutions), MemberType = typeof (TestDatabase))]
+		public void Substitutions (string t)
+		{
+			switch (t) {
+			case "FeatureGuardSubstitutions":
+				Run (t);
+				break;
+			default:
+				// Skip the rest for now
+				break;
+			}
+		}
+
 		[Theory]
 		[MemberData (nameof (TestDatabase.TopLevelStatements), MemberType = typeof (TestDatabase))]
 		public void TopLevelStatements (string t)
diff --git a/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCasesRunner/AssemblyChecker.cs b/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCasesRunner/AssemblyChecker.cs
index 8dd1e4cb1367..c312bddba0af 100644
--- a/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCasesRunner/AssemblyChecker.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCasesRunner/AssemblyChecker.cs
@@ -6,7 +6,6 @@
 using System.IO;
 using System.Linq;
 using System.Text;
-using FluentAssertions;
 using ILCompiler;
 using Internal.TypeSystem;
 using Internal.TypeSystem.Ecma;
diff --git a/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCasesRunner/ResultChecker.cs b/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCasesRunner/ResultChecker.cs
index d0ac4085e83c..770bc1e97dfa 100644
--- a/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCasesRunner/ResultChecker.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Trimming.Tests/TestCasesRunner/ResultChecker.cs
@@ -8,7 +8,6 @@
 using System.IO;
 using System.Linq;
 using System.Text.RegularExpressions;
-using FluentAssertions;
 using ILCompiler.Logging;
 using Internal.TypeSystem;
 using Mono.Cecil;
diff --git a/src/coreclr/tools/aot/ILCompiler.TypeSystem.Tests/CoreTestAssembly/Platform.cs b/src/coreclr/tools/aot/ILCompiler.TypeSystem.Tests/CoreTestAssembly/Platform.cs
index fda0f02f5630..a2c1a1e06ad6 100644
--- a/src/coreclr/tools/aot/ILCompiler.TypeSystem.Tests/CoreTestAssembly/Platform.cs
+++ b/src/coreclr/tools/aot/ILCompiler.TypeSystem.Tests/CoreTestAssembly/Platform.cs
@@ -244,6 +244,7 @@ public class CallConvSuppressGCTransition { }
     public static class RuntimeFeature
     {
         public const string ByRefFields = nameof(ByRefFields);
+        public const string ByRefLikeGenerics = nameof(ByRefLikeGenerics);
         public const string UnmanagedSignatureCallingConvention = nameof(UnmanagedSignatureCallingConvention);
         public const string VirtualStaticsInInterfaces = nameof(VirtualStaticsInInterfaces);
     }
diff --git a/src/coreclr/tools/aot/ILCompiler.TypeSystem/ILCompiler.TypeSystem.csproj b/src/coreclr/tools/aot/ILCompiler.TypeSystem/ILCompiler.TypeSystem.csproj
index 268e2e6b1248..c46d5fecbfb7 100644
--- a/src/coreclr/tools/aot/ILCompiler.TypeSystem/ILCompiler.TypeSystem.csproj
+++ b/src/coreclr/tools/aot/ILCompiler.TypeSystem/ILCompiler.TypeSystem.csproj
@@ -5,11 +5,12 @@
     <AssemblyName>ILCompiler.TypeSystem</AssemblyName>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <TargetFramework>$(NetCoreAppToolCurrent)</TargetFramework>
+    <NoWarn>$(NoWarn);CA1866</NoWarn>
     <EnableDefaultCompileItems>false</EnableDefaultCompileItems>
     <Platforms>x64;x86</Platforms>
     <PlatformTarget>AnyCPU</PlatformTarget>
     <AppendTargetFrameworkToOutputPath>false</AppendTargetFrameworkToOutputPath>
-    <EnableDiaSymReaderUse Condition="'$(DotNetBuildFromSource)' != 'true'">true</EnableDiaSymReaderUse>
+    <EnableDiaSymReaderUse Condition="'$(DotNetBuildSourceOnly)' != 'true'">true</EnableDiaSymReaderUse>
     <DefineConstants Condition="'$(EnableDiaSymReaderUse)' != 'true'">$(DefineConstants);DISABLE_UNMANAGED_PDB_SYMBOLS</DefineConstants>
 
     <!-- We're binplacing these into an existing publish layout so that F5 build in VS updates
@@ -134,6 +135,9 @@
     <Compile Include="..\..\Common\TypeSystem\Common\ExplicitLayoutValidator.cs">
       <Link>TypeSystem\Common\ExplicitLayoutValidator.cs</Link>
     </Compile>
+    <Compile Include="..\..\Common\TypeSystem\Common\FieldLayoutIntervalCalculator.cs">
+      <Link>TypeSystem\Common\FieldLayoutIntervalCalculator.cs</Link>
+    </Compile>
     <Compile Include="..\..\Common\TypeSystem\Common\FunctionPointerType.cs">
       <Link>TypeSystem\Common\FunctionPointerType.cs</Link>
     </Compile>
@@ -173,6 +177,18 @@
     <Compile Include="..\..\Common\TypeSystem\Common\ThrowHelper.Common.cs">
       <Link>TypeSystem\Common\ThrowHelper.Common.cs</Link>
     </Compile>
+    <Compile Include="..\..\Common\TypeSystem\Common\TypeWithRepeatedFields.cs">
+      <Link>TypeSystem\Common\TypeWithRepeatedFields.cs</Link>
+    </Compile>
+    <Compile Include="..\..\Common\TypeSystem\Common\TypeWithRepeatedFields.Sorting.cs">
+      <Link>TypeSystem\Common\TypeWithRepeatedFields.Sorting.cs</Link>
+    </Compile>
+    <Compile Include="..\..\Common\TypeSystem\Common\TypeWithRepeatedFields.Diagnostic.cs">
+      <Link>TypeSystem\Common\TypeWithRepeatedFields.Diagnostic.cs</Link>
+    </Compile>
+    <Compile Include="..\..\Common\TypeSystem\Common\TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs">
+      <Link>TypeSystem\Common\TypeWithRepeatedFieldsFieldLayoutAlgorithm.cs</Link>
+    </Compile>
     <Compile Include="..\..\Common\TypeSystem\Common\UniversalCanonLayoutAlgorithm.cs">
       <Link>TypeSystem\Common\UniversalCanonLayoutAlgorithm.cs</Link>
     </Compile>
@@ -233,6 +249,12 @@
     <Compile Include="..\..\Common\TypeSystem\Common\FieldLayoutAlgorithm.cs">
       <Link>TypeSystem\Common\FieldLayoutAlgorithm.cs</Link>
     </Compile>
+    <Compile Include="..\..\Common\TypeSystem\Common\ImpliedRepeatedFieldDesc.cs">
+      <Link>TypeSystem\Common\ImpliedRepeatedFieldDesc.cs</Link>
+    </Compile>
+    <Compile Include="..\..\Common\TypeSystem\Common\ImpliedRepeatedFieldDesc.Sorting.cs">
+      <Link>TypeSystem\Common\ImpliedRepeatedFieldDesc.Sorting.cs</Link>
+    </Compile>
     <Compile Include="..\..\Common\TypeSystem\Common\InstantiatedMethod.cs">
       <Link>TypeSystem\Common\InstantiatedMethod.cs</Link>
     </Compile>
@@ -693,7 +715,7 @@
   </ItemGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.CodeAnalysis.BannedApiAnalyzers" Version="$(MicrosoftCodeAnalysisBannedApiAnalyzersVersion)" Condition="'$(DotNetBuildFromSource)' != 'true'">
+    <PackageReference Include="Microsoft.CodeAnalysis.BannedApiAnalyzers" Version="$(MicrosoftCodeAnalysisBannedApiAnalyzersVersion)" Condition="'$(DotNetBuildSourceOnly)' != 'true'">
       <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
       <PrivateAssets>all</PrivateAssets>
     </PackageReference>
diff --git a/src/coreclr/tools/aot/ILCompiler/ILCompiler.csproj b/src/coreclr/tools/aot/ILCompiler/ILCompiler.csproj
index 903a7b7ce61a..751be7cb589b 100644
--- a/src/coreclr/tools/aot/ILCompiler/ILCompiler.csproj
+++ b/src/coreclr/tools/aot/ILCompiler/ILCompiler.csproj
@@ -10,10 +10,9 @@
   <!-- BEGIN: Workaround for https://github.com/dotnet/runtime/issues/67742 -->
   <PropertyGroup Condition="'$(BuildingInsideVisualStudio)' != 'true' and '$(IlcInnerLoop)' != 'true'">
     <PublishDir>$(RuntimeBinDir)ilc-published/</PublishDir>
-    <!-- Can't use NativeAOT in source build yet https://github.com/dotnet/runtime/issues/66859 -->
-    <NativeAotSupported Condition="'$(DotNetBuildFromSource)' == 'true'">false</NativeAotSupported>
     <NativeAotSupported Condition="$(OutputRID.StartsWith('tizen')) == 'true'">false</NativeAotSupported>
     <NativeAotSupported Condition="$(OutputRID.EndsWith('-arm')) == 'true'">false</NativeAotSupported>
+    <NativeAotSupported Condition="$(OutputRID.EndsWith('-x86')) == 'true'">false</NativeAotSupported>
     <!-- Disable native AOT on FreeBSD when cross building from Linux. -->
     <NativeAotSupported Condition="'$(TargetOS)' == 'freebsd' and '$(CrossBuild)' == 'true'">false</NativeAotSupported>
     <PublishAot Condition="'$(NativeAotSupported)' == 'true'">true</PublishAot>
@@ -22,16 +21,11 @@
     <PublishSingleFile Condition="'$(NativeAotSupported)' != 'true'">true</PublishSingleFile>
     <PublishTrimmed Condition="'$(NativeAotSupported)' != 'true'">true</PublishTrimmed>
     <SuppressGenerateILCompilerExplicitPackageReferenceWarning>true</SuppressGenerateILCompilerExplicitPackageReferenceWarning>
-    <!-- Compute host package name (taken from Microsoft.DotNet.ILCompiler.SingleEntry.targets) -->
-    <_hostOS>$(NETCoreSdkPortableRuntimeIdentifier.SubString(0, $(NETCoreSdkPortableRuntimeIdentifier.LastIndexOf('-'))))</_hostOS>
-    <_hostArchitecture Condition="'$(OS)' != 'Windows_NT'">$(NETCoreSdkPortableRuntimeIdentifier.SubString($([MSBuild]::Add($(NETCoreSdkPortableRuntimeIdentifier.LastIndexOf('-')), 1))))</_hostArchitecture>
-    <_hostArchitecture Condition="'$(OS)' == 'Windows_NT'">$([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture.ToString().ToLowerInvariant)</_hostArchitecture>
-    <_hostPackageName>runtime.$(_hostOS)-$(_hostArchitecture).Microsoft.DotNet.ILCompiler</_hostPackageName>
   </PropertyGroup>
 
   <ItemGroup Condition="'$(NativeAotSupported)' == 'true'">
     <PackageReference Include="Microsoft.DotNet.ILCompiler" Version="$(MicrosoftDotNetILCompilerVersion)" />
-    <PackageReference Include="$(_hostPackageName)" Version="$(MicrosoftDotNetILCompilerVersion)" />
+    <PackageReference Include="runtime.$(ToolsRID).Microsoft.DotNet.ILCompiler" Version="$(MicrosoftDotNetILCompilerVersion)" />
   </ItemGroup>
 
   <!-- Needed for the amd64 -> amd64 musl cross-build to pass the target flag. -->
@@ -72,6 +66,19 @@
       <Output TaskParameter="ConsoleOutput" PropertyName="_CC_LDFLAGS" />
     </Exec>
 
+    <Exec Command="xcodebuild -version" Condition="'$(_IsApplePlatform)' == 'true'" IgnoreExitCode="true" StandardOutputImportance="Low" ConsoleToMSBuild="true">
+      <Output TaskParameter="ExitCode" PropertyName="_XcodeVersionStringExitCode" />
+      <Output TaskParameter="ConsoleOutput" PropertyName="_XcodeVersionString" />
+    </Exec>
+
+    <PropertyGroup Condition="('$(_XcodeVersionStringExitCode)' == '0' or '$(_XcodeVersionStringExitCode)' == '1') and '$(_XcodeVersionString)' != ''">
+      <_XcodeVersion>$([System.Text.RegularExpressions.Regex]::Match($(_XcodeVersionString), '[1-9]\d*'))</_XcodeVersion>
+    </PropertyGroup>
+
+    <ItemGroup Condition="'$(NativeAotSupported)' == 'true' and '$(_IsApplePlatform)' == 'true'">
+      <CustomLinkerArg Condition="'$(_XcodeVersion)' &gt;= '15'" Include="-ld_classic" />
+    </ItemGroup>
+
     <PropertyGroup>
       <CppLinker>$(_CC_LDFLAGS.SubString(0, $(_CC_LDFLAGS.IndexOf(';'))))</CppLinker>
       <_LDFLAGS>$(_CC_LDFLAGS.SubString($([MSBuild]::Add($(_CC_LDFLAGS.IndexOf(';')), 1))))</_LDFLAGS>
@@ -80,7 +87,7 @@
   </Target>
 
   <ItemGroup Condition="'$(NativeAotSupported)' == 'true'">
-    <CustomLinkerArg Condition="'$(CrossBuild)' == 'true' and '$(_hostArchitecture)' == '$(_targetArchitecture)' and '$(_hostOS)' != 'windows' and '$(_IsApplePlatform)' != 'true'" Include="--gcc-toolchain=$(ROOTFS_DIR)/usr" />
+    <CustomLinkerArg Condition="'$(CrossBuild)' == 'true' and '$(BuildArchitecture)' == '$(_targetArchitecture)' and '$(HostOS)' != 'windows' and '$(_IsApplePlatform)' != 'true'" Include="--gcc-toolchain=$(ROOTFS_DIR)/usr" />
   </ItemGroup>
 
   <Target Name="PublishCompiler"
diff --git a/src/coreclr/tools/aot/ILCompiler/ILCompiler.props b/src/coreclr/tools/aot/ILCompiler/ILCompiler.props
index 75930bae7b61..4786697996e1 100644
--- a/src/coreclr/tools/aot/ILCompiler/ILCompiler.props
+++ b/src/coreclr/tools/aot/ILCompiler/ILCompiler.props
@@ -23,51 +23,13 @@
   </PropertyGroup>
 
   <PropertyGroup>
-    <!-- Massage the RuntimeIdentifier into an ObjWriter package RID that we can download -->
-    <_objWriterRidPlatformIndex>$(RuntimeIdentifier.LastIndexOf('-'))</_objWriterRidPlatformIndex>
-    <ObjWriterRidWithoutPlatform>$(RuntimeIdentifier.Substring(0, $(_objWriterRidPlatformIndex)))</ObjWriterRidWithoutPlatform>
-    <ObjWriterRidPlatform>$(RuntimeIdentifier.Substring($(_objWriterRidPlatformIndex)).TrimStart('-'))</ObjWriterRidPlatform>
-
-    <!-- If it's not win/osx/freebsd/linux-musl, it's a non-portable Linux. Treat as Linux. -->
-    <ObjWriterRidWithoutPlatform Condition="'$(ObjWriterRidWithoutPlatform)' != 'win' and '$(ObjWriterRidWithoutPlatform)' != 'osx' and '$(ObjWriterRidWithoutPlatform)' != 'linux-musl' and '$(ObjWriterRidWithoutPlatform)' != 'freebsd'">linux</ObjWriterRidWithoutPlatform>
-
-    <ObjWriterRid Condition="'$(ObjWriterRid)' == ''">$(ObjWriterRidWithoutPlatform)-$(ObjWriterRidPlatform)</ObjWriterRid>
-
-    <ObjWriterVersion Condition="'$(ObjWriterVersion)' == '' and '$(ObjWriterRid)' == 'linux-arm64'">$(runtimelinuxarm64MicrosoftNETCoreRuntimeObjWriterVersion)</ObjWriterVersion>
-    <ObjWriterVersion Condition="'$(ObjWriterVersion)' == '' and '$(ObjWriterRid)' == 'linux-x64'">$(runtimelinuxx64MicrosoftNETCoreRuntimeObjWriterVersion)</ObjWriterVersion>
-    <ObjWriterVersion Condition="'$(ObjWriterVersion)' == '' and '$(ObjWriterRid)' == 'linux-musl-arm64'">$(runtimelinuxmuslarm64MicrosoftNETCoreRuntimeObjWriterVersion)</ObjWriterVersion>
-    <ObjWriterVersion Condition="'$(ObjWriterVersion)' == '' and '$(ObjWriterRid)' == 'linux-musl-x64'">$(runtimelinuxmuslx64MicrosoftNETCoreRuntimeObjWriterVersion)</ObjWriterVersion>
-    <ObjWriterVersion Condition="'$(ObjWriterVersion)' == '' and '$(ObjWriterRid)' == 'win-arm64'">$(runtimewinarm64MicrosoftNETCoreRuntimeObjWriterVersion)</ObjWriterVersion>
-    <ObjWriterVersion Condition="'$(ObjWriterVersion)' == '' and '$(ObjWriterRid)' == 'win-x64'">$(runtimewinx64MicrosoftNETCoreRuntimeObjWriterVersion)</ObjWriterVersion>
-    <ObjWriterVersion Condition="'$(ObjWriterVersion)' == '' and '$(ObjWriterRid)' == 'osx-arm64'">$(runtimeosxarm64MicrosoftNETCoreRuntimeObjWriterVersion)</ObjWriterVersion>
-    <ObjWriterVersion Condition="'$(ObjWriterVersion)' == '' and '$(ObjWriterRid)' == 'osx-x64'">$(runtimeosxx64MicrosoftNETCoreRuntimeObjWriterVersion)</ObjWriterVersion>
-    <ObjWriterVersion Condition="'$(ObjWriterVersion)' == '' and '$(ObjWriterRid)' == 'freebsd-x64'">$(runtimefreebsdx64MicrosoftNETCoreRuntimeObjWriterVersion)</ObjWriterVersion>
-    <ObjWriterVersion Condition="'$(ObjWriterVersion)' == '' and '$(ObjWriterRid)' == 'freebsd-arm64'">$(runtimefreebsdarm64MicrosoftNETCoreRuntimeObjWriterVersion)</ObjWriterVersion>
-
     <!-- CoreDisTools are used in debugging visualizers. -->
     <IncludeCoreDisTools Condition="'$(Configuration)' != 'Release' and '$(CrossHostArch)' == ''">true</IncludeCoreDisTools>
-    <!-- source-build doesn't use ObjWriter for the ILCompiler.  the end-user will end up pulling Microsoft-built bits for NativeAOT anyway.  -->
-    <IncludeObjWriter Condition="'$(DotNetBuildFromSource)' != 'true' and '$(ObjWriterVersion)' != ''">true</IncludeObjWriter>
   </PropertyGroup>
 
-  <Import Project="$(RepositoryEngineeringDir)coredistools.targets" Condition="'$(DotNetBuildFromSource)' != 'true' and '$(IncludeCoreDisTools)' == 'true'" />
+  <Import Project="$(RepositoryEngineeringDir)coredistools.targets" Condition="'$(DotNetBuildSourceOnly)' != 'true' and '$(IncludeCoreDisTools)' == 'true'" />
 
   <ItemGroup>
-    <PackageReference Include="runtime.$(ObjWriterRid).Microsoft.NETCore.Runtime.ObjWriter" Condition="'$(IncludeObjWriter)' == 'true'">
-      <Version>$(ObjWriterVersion)</Version>
-    </PackageReference>
-
-    <!-- Workaround until the SDK does this, see https://github.com/dotnet/sdk/issues/24799 -->
-    <PackageReference Include="NETStandard.Library">
-      <Version>$(NetStandardLibraryVersion)</Version>
-    </PackageReference>
-
-    <Content Include="$(NuGetPackageRoot)runtime.$(ObjWriterRid).microsoft.netcore.runtime.objwriter\$(ObjWriterVersion)\runtimes\$(ObjWriterRid)\native\$(LibPrefix)objwriter$(LibSuffix)" Condition="'$(IncludeObjWriter)' == 'true'">
-      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-      <Visible>false</Visible>
-      <Pack>false</Pack>
-    </Content>
-
     <Content Include="$(CoreDisToolsLibrary)" Condition="Exists('$(CoreDisToolsLibrary)') and '$(IncludeCoreDisTools)' == 'true'">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
diff --git a/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs b/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs
index 917f1b612942..f38811fd93d2 100644
--- a/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs
+++ b/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs
@@ -148,7 +148,7 @@ internal sealed class ILCompilerRootCommand : CliRootCommand
         public CliOption<bool> RootDefaultAssemblies { get; } =
             new("--defaultrooting") { Description = "Root assemblies that are not marked [IsTrimmable]" };
         public CliOption<TargetArchitecture> TargetArchitecture { get; } =
-            new("--targetarch") { CustomParser = result => Helpers.GetTargetArchitecture(result.Tokens.Count > 0 ? result.Tokens[0].Value : null), DefaultValueFactory = result => Helpers.GetTargetArchitecture(result.Tokens.Count > 0 ? result.Tokens[0].Value : null), Description = "Target architecture for cross compilation", HelpName = "arg" };
+            new("--targetarch") { CustomParser = MakeTargetArchitecture, DefaultValueFactory = MakeTargetArchitecture, Description = "Target architecture for cross compilation", HelpName = "arg" };
         public CliOption<TargetOS> TargetOS { get; } =
             new("--targetos") { CustomParser = result => Helpers.GetTargetOS(result.Tokens.Count > 0 ? result.Tokens[0].Value : null), DefaultValueFactory = result => Helpers.GetTargetOS(result.Tokens.Count > 0 ? result.Tokens[0].Value : null), Description = "Target OS for cross compilation", HelpName = "arg" };
         public CliOption<string> JitPath { get; } =
@@ -170,6 +170,7 @@ internal sealed class ILCompilerRootCommand : CliRootCommand
 
         public OptimizationMode OptimizationMode { get; private set; }
         public ParseResult Result;
+        public static bool IsArmel { get; private set; }
 
         public ILCompilerRootCommand(string[] args) : base(".NET Native IL Compiler")
         {
@@ -373,6 +374,18 @@ public static IEnumerable<Func<HelpContext, bool>> GetExtendedHelp(HelpContext _
             };
         }
 
+        private static TargetArchitecture MakeTargetArchitecture(ArgumentResult result)
+        {
+            string firstToken = result.Tokens.Count > 0 ? result.Tokens[0].Value : null;
+            if (firstToken != null && firstToken.Equals("armel", StringComparison.OrdinalIgnoreCase))
+            {
+                IsArmel = true;
+                return Internal.TypeSystem.TargetArchitecture.ARM;
+            }
+
+            return Helpers.GetTargetArchitecture(firstToken);
+        }
+
         private static int MakeParallelism(ArgumentResult result)
         {
             if (result.Tokens.Count > 0)
diff --git a/src/coreclr/tools/aot/ILCompiler/Program.cs b/src/coreclr/tools/aot/ILCompiler/Program.cs
index bd9d12b708f3..ff51ce727e58 100644
--- a/src/coreclr/tools/aot/ILCompiler/Program.cs
+++ b/src/coreclr/tools/aot/ILCompiler/Program.cs
@@ -112,7 +112,7 @@ public int Run()
             SharedGenericsMode genericsMode = SharedGenericsMode.CanonicalReferenceTypes;
 
             var simdVectorLength = instructionSetSupport.GetVectorTSimdVector();
-            var targetAbi = TargetAbi.NativeAot;
+            var targetAbi = ILCompilerRootCommand.IsArmel ? TargetAbi.NativeAotArmel : TargetAbi.NativeAot;
             var targetDetails = new TargetDetails(targetArchitecture, targetOS, targetAbi, simdVectorLength);
             CompilerTypeSystemContext typeSystemContext =
                 new CompilerTypeSystemContext(targetDetails, genericsMode, supportsReflection ? DelegateFeature.All : 0,
@@ -464,7 +464,7 @@ public int Run()
             TypePreinit.TypePreinitializationPolicy preinitPolicy = preinitStatics ?
                 new TypePreinit.TypeLoaderAwarePreinitializationPolicy() : new TypePreinit.DisabledPreinitializationPolicy();
 
-            var preinitManager = new PreinitializationManager(typeSystemContext, compilationGroup, ilProvider, preinitPolicy, new StaticReadOnlyFieldPolicy());
+            var preinitManager = new PreinitializationManager(typeSystemContext, compilationGroup, ilProvider, preinitPolicy, new StaticReadOnlyFieldPolicy(), flowAnnotations);
             builder
                 .UseILProvider(ilProvider)
                 .UsePreinitializationManager(preinitManager);
@@ -543,7 +543,7 @@ void RunScanner()
                 {
                     var readOnlyFieldPolicy = scanResults.GetReadOnlyFieldPolicy();
                     preinitManager = new PreinitializationManager(typeSystemContext, compilationGroup, ilProvider, scanResults.GetPreinitializationPolicy(),
-                        readOnlyFieldPolicy);
+                        readOnlyFieldPolicy, flowAnnotations);
                     builder.UsePreinitializationManager(preinitManager)
                         .UseReadOnlyFieldPolicy(readOnlyFieldPolicy);
                 }
diff --git a/src/coreclr/tools/aot/ILCompiler/repro/repro.csproj b/src/coreclr/tools/aot/ILCompiler/repro/repro.csproj
index 09e2a5bec4c8..841272436060 100644
--- a/src/coreclr/tools/aot/ILCompiler/repro/repro.csproj
+++ b/src/coreclr/tools/aot/ILCompiler/repro/repro.csproj
@@ -6,7 +6,9 @@
     <PlatformTarget>AnyCPU</PlatformTarget>
     <AppendTargetFrameworkToOutputPath>false</AppendTargetFrameworkToOutputPath>
     <AppendRuntimeIdentifierToOutputPath>false</AppendRuntimeIdentifierToOutputPath>
-    <RuntimeIdentifiers>linux-x64;win-x64;osx-x64;freebsd-x64;freebsd-arm64</RuntimeIdentifiers>
+    <RuntimeIdentifiers>linux-x64;win-x64;osx-x64</RuntimeIdentifiers>
+    <!-- FreeBSD runtime/apphost packs aren't built in the official build so only reference the RIDs when targetting FreeBSD -->
+    <RuntimeIdentifiers Condition="'$(TargetOS)' == 'freebsd'">$(RuntimeIdentifiers);freebsd-x64;freebsd-arm64</RuntimeIdentifiers>
     <Configurations>Debug;Release;Checked</Configurations>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <RunAnalyzers>false</RunAnalyzers>
@@ -18,6 +20,7 @@
 
     <ItemGroup>
       <ReproResponseLines Include="$(OutputPath)$(AssemblyName)$(TargetExt)" />
+      <ReproResponseLines Include="--targetarch:$(Platform)" />
       <ReproResponseLines Include="-o:$(OutputPath)$(AssemblyName).obj" />
       <ReproResponseLines Include="-r:$(RuntimeBinDir)aotsdk\*.dll" />
       <ReproResponseLines Include="-r:$(MicrosoftNetCoreAppRuntimePackRidLibTfmDir)*.dll" />
diff --git a/src/coreclr/tools/aot/ILCompiler/reproNative/reproNative.vcxproj b/src/coreclr/tools/aot/ILCompiler/reproNative/reproNative.vcxproj
index d6b3471a2838..5a94961e0492 100644
--- a/src/coreclr/tools/aot/ILCompiler/reproNative/reproNative.vcxproj
+++ b/src/coreclr/tools/aot/ILCompiler/reproNative/reproNative.vcxproj
@@ -1,5 +1,13 @@
 <Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Checked|Win32">
+      <Configuration>Checked</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|x64">
       <Configuration>Debug</Configuration>
       <Platform>x64</Platform>
@@ -8,6 +16,10 @@
       <Configuration>Checked</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|x64">
       <Configuration>Release</Configuration>
       <Platform>x64</Platform>
@@ -30,6 +42,12 @@
     <PlatformToolset>v143</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Checked|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -37,6 +55,13 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Checked|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -44,31 +69,59 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Checked|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Checked|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>false</LinkIncremental>
     <GenerateManifest>false</GenerateManifest>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <GenerateManifest>false</GenerateManifest>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Checked|x64'">
     <LinkIncremental>false</LinkIncremental>
     <GenerateManifest>false</GenerateManifest>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Checked|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <GenerateManifest>false</GenerateManifest>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
     <GenerateManifest>false</GenerateManifest>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <GenerateManifest>false</GenerateManifest>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -87,6 +140,25 @@
       <AdditionalDependencies>$(ArtifactsRoot)bin\repro\x64\Debug\repro.obj;$(Win32SDKLibs);%(AdditionalDependencies);$(ArtifactsRoot)bin\coreclr\windows.x64.Debug\aotsdk\Runtime.WorkstationGC.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Debug\aotsdk\System.Globalization.Native.Aot.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Debug\aotsdk\System.IO.Compression.Native.Aot.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Debug\aotsdk\eventpipe-disabled.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Debug\aotsdk\Runtime.VxsortDisabled.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Debug\aotsdk\standalonegc-disabled.lib</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_LIB;HOST_X86;HOST_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>$(CoreClrSourceRoot)gc;$(CoreClrSourceRoot)gc\env</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4477</DisableSpecificWarnings>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <MinimalRebuild>false</MinimalRebuild>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <CallingConvention>StdCall</CallingConvention>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>$(ArtifactsRoot)bin\repro\x86\Debug\repro.obj;$(Win32SDKLibs);%(AdditionalDependencies);$(ArtifactsRoot)bin\coreclr\windows.x86.Debug\aotsdk\Runtime.WorkstationGC.lib;$(ArtifactsRoot)bin\coreclr\windows.x86.Debug\aotsdk\System.Globalization.Native.Aot.lib;$(ArtifactsRoot)bin\coreclr\windows.x86.Debug\aotsdk\System.IO.Compression.Native.Aot.lib;$(ArtifactsRoot)bin\coreclr\windows.x86.Debug\aotsdk\eventpipe-disabled.lib;$(ArtifactsRoot)bin\coreclr\windows.x86.Debug\aotsdk\standalonegc-disabled.lib</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Checked|x64'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -107,6 +179,27 @@
       <AdditionalDependencies>$(ArtifactsRoot)bin\repro\x64\Checked\repro.obj;$(Win32SDKLibs);%(AdditionalDependencies);$(ArtifactsRoot)bin\coreclr\windows.x64.Checked\aotsdk\Runtime.WorkstationGC.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Checked\aotsdk\System.Globalization.Native.Aot.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Checked\aotsdk\System.IO.Compression.Native.Aot.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Checked\aotsdk\eventpipe-disabled.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Checked\aotsdk\Runtime.VxsortDisabled.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Checked\aotsdk\standalonegc-disabled.lib</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Checked|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_LIB;HOST_X86;HOST_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>$(CoreClrSourceRoot)gc;$(CoreClrSourceRoot)gc\env</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4477</DisableSpecificWarnings>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <CallingConvention>StdCall</CallingConvention>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>$(ArtifactsRoot)bin\repro\x86\Checked\repro.obj;$(Win32SDKLibs);%(AdditionalDependencies);$(ArtifactsRoot)bin\coreclr\windows.x86.Checked\aotsdk\Runtime.WorkstationGC.lib;$(ArtifactsRoot)bin\coreclr\windows.x86.Checked\aotsdk\System.Globalization.Native.Aot.lib;$(ArtifactsRoot)bin\coreclr\windows.x86.Checked\aotsdk\System.IO.Compression.Native.Aot.lib;$(ArtifactsRoot)bin\coreclr\windows.x86.Checked\aotsdk\eventpipe-disabled.lib;$(ArtifactsRoot)bin\coreclr\windows.x86.Checked\aotsdk\standalonegc-disabled.lib</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -127,6 +220,27 @@
       <AdditionalDependencies>$(ArtifactsRoot)bin\repro\x64\Release\repro.obj;$(Win32SDKLibs);%(AdditionalDependencies);$(ArtifactsRoot)bin\coreclr\windows.x64.Release\aotsdk\Runtime.WorkstationGC.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Release\aotsdk\System.Globalization.Native.Aot.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Release\aotsdk\System.IO.Compression.Native.Aot.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Release\aotsdk\eventpipe-disabled.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Release\aotsdk\Runtime.VxsortDisabled.lib;$(ArtifactsRoot)bin\coreclr\windows.x64.Release\aotsdk\standalonegc-disabled.lib</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_LIB;HOST_X86;HOST_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>$(CoreClrSourceRoot)gc;$(CoreClrSourceRoot)gc\env</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4477</DisableSpecificWarnings>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <CallingConvention>StdCall</CallingConvention>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>$(ArtifactsRoot)bin\repro\x86\Release\repro.obj;$(Win32SDKLibs);%(AdditionalDependencies);$(ArtifactsRoot)bin\coreclr\windows.x86.Release\aotsdk\Runtime.WorkstationGC.lib;$(ArtifactsRoot)bin\coreclr\windows.x86.Release\aotsdk\System.Globalization.Native.Aot.lib;$(ArtifactsRoot)bin\coreclr\windows.x86.Release\aotsdk\System.IO.Compression.Native.Aot.lib;$(ArtifactsRoot)bin\coreclr\windows.x86.Release\aotsdk\eventpipe-disabled.lib;$(ArtifactsRoot)bin\coreclr\windows.x86.Release\aotsdk\standalonegc-disabled.lib</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="$(NativeAotSourceRoot)\Bootstrap\main.cpp" />
     <None Include="..\repro\Program.cs" />
diff --git a/src/coreclr/tools/aot/crossgen2/crossgen2_publish.csproj b/src/coreclr/tools/aot/crossgen2/crossgen2_publish.csproj
index 984ca91f13a7..657ac23590a0 100644
--- a/src/coreclr/tools/aot/crossgen2/crossgen2_publish.csproj
+++ b/src/coreclr/tools/aot/crossgen2/crossgen2_publish.csproj
@@ -9,6 +9,7 @@
   <PropertyGroup>
     <NativeAotSupported Condition="$(OutputRID.StartsWith('tizen')) == 'true'">false</NativeAotSupported>
     <NativeAotSupported Condition="$(OutputRID.EndsWith('-arm')) == 'true'">false</NativeAotSupported>
+    <NativeAotSupported Condition="$(OutputRID.EndsWith('-x86')) == 'true'">false</NativeAotSupported>
     <!-- Publish crossgen2 as a single-file app on native-OS builds. Cross-OS NativeAOT compilation is not supported yet -->
     <NativeAotSupported Condition="'$(CrossBuild)' == 'true' and '$(TargetOS)' != '$(HostOS)'">false</NativeAotSupported>
     <PublishTrimmed>true</PublishTrimmed>
diff --git a/src/coreclr/tools/aot/ilc.sln b/src/coreclr/tools/aot/ilc.sln
index 2b2d3b8009aa..cee0d50969cb 100644
--- a/src/coreclr/tools/aot/ilc.sln
+++ b/src/coreclr/tools/aot/ilc.sln
@@ -18,22 +18,24 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.LLVM", "ILCompil
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "repro", "ILCompiler\repro\repro.csproj", "{CBDE0470-E0C9-4693-9A11-ACC117522F3F}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clrjit_universal_wasm32_x64", "..\..\..\..\artifacts\obj\coreclr\windows.x64.Debug\ide\jit\clrjit_universal_wasm32_x64.vcxproj", "{D24E2045-B535-3592-911C-9A43AF824048}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clrjit_universal_wasm32_x64", "..\..\..\..\artifacts\obj\coreclr\windows.x64.Debug\ide\jit\clrjit_universal_wasm32_x64.vcxproj", "{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clrjit_win_x64_x64", "..\..\..\..\artifacts\obj\coreclr\windows.x64.Debug\ide\jit\clrjit_win_x64_x64.vcxproj", "{8A189216-D635-3531-861A-C34679A4E081}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clrjit_win_x64_x64", "..\..\..\..\artifacts\obj\coreclr\windows.x64.Debug\ide\jit\clrjit_win_x64_x64.vcxproj", "{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}"
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.Compiler.Tests", "ILCompiler.Compiler.Tests\ILCompiler.Compiler.Tests.csproj", "{24CBA9C6-EDBA-47D6-A0B5-04417BDE5FE3}"
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.TypeSystem.Tests", "ILCompiler.TypeSystem.Tests\ILCompiler.TypeSystem.Tests.csproj", "{740CDFF4-B8EC-4A37-951B-C9FE9980EF2A}"
 EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Mono.Linker.Tests", "Mono.Linker.Tests\Mono.Linker.Tests.csproj", "{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}"
-EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Mono.Linker.Tests.Cases", "..\..\..\tools\illink\test\Mono.Linker.Tests.Cases\Mono.Linker.Tests.Cases.csproj", "{9DA153BF-51C4-4AD7-A355-9F9528843DC7}"
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Mono.Linker.Tests.Cases.Expectations", "..\..\..\tools\illink\test\Mono.Linker.Tests.Cases.Expectations\Mono.Linker.Tests.Cases.Expectations.csproj", "{219E0AC3-CDBF-4104-B324-85915DD16E25}"
 EndProject
 Project("{D954291E-2A0B-460D-934E-DC6B0785DB48}") = "ILLink.Shared", "..\..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.shproj", "{FF598E93-8E9E-4091-9F50-61A7572663AE}"
 EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILCompiler.Trimming.Tests", "ILCompiler.Trimming.Tests\ILCompiler.Trimming.Tests.csproj", "{C331F49A-B2BA-46A4-975B-E922AA43FB6F}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ILLink.RoslynAnalyzer", "..\..\..\tools\illink\src\ILLink.RoslynAnalyzer\ILLink.RoslynAnalyzer.csproj", "{B49A6D20-EF0F-48EE-A686-0E615D6640B0}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Checked|Any CPU = Checked|Any CPU
@@ -218,54 +220,54 @@ Global
 		{CBDE0470-E0C9-4693-9A11-ACC117522F3F}.RelWithDebInfo|x64.Build.0 = Release|x64
 		{CBDE0470-E0C9-4693-9A11-ACC117522F3F}.RelWithDebInfo|x86.ActiveCfg = Release|x86
 		{CBDE0470-E0C9-4693-9A11-ACC117522F3F}.RelWithDebInfo|x86.Build.0 = Release|x86
-		{D24E2045-B535-3592-911C-9A43AF824048}.Checked|Any CPU.ActiveCfg = Checked|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Checked|Any CPU.Build.0 = Checked|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Checked|x64.ActiveCfg = Checked|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Checked|x64.Build.0 = Checked|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Checked|x86.ActiveCfg = Checked|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Checked|x86.Build.0 = Checked|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Debug|Any CPU.ActiveCfg = Debug|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Debug|Any CPU.Build.0 = Debug|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Debug|x64.ActiveCfg = Debug|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Debug|x64.Build.0 = Debug|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Debug|x86.ActiveCfg = Debug|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Debug|x86.Build.0 = Debug|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Release|Any CPU.ActiveCfg = Release|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Release|Any CPU.Build.0 = Release|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Release|x64.ActiveCfg = Release|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Release|x64.Build.0 = Release|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Release|x86.ActiveCfg = Release|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.Release|x86.Build.0 = Release|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.RelWithDebInfo|Any CPU.ActiveCfg = RelWithDebInfo|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.RelWithDebInfo|Any CPU.Build.0 = RelWithDebInfo|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.RelWithDebInfo|x86.ActiveCfg = RelWithDebInfo|x64
-		{D24E2045-B535-3592-911C-9A43AF824048}.RelWithDebInfo|x86.Build.0 = RelWithDebInfo|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Checked|Any CPU.ActiveCfg = Checked|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Checked|Any CPU.Build.0 = Checked|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Checked|x64.ActiveCfg = Checked|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Checked|x64.Build.0 = Checked|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Checked|x86.ActiveCfg = Checked|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Checked|x86.Build.0 = Checked|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Debug|Any CPU.ActiveCfg = Debug|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Debug|Any CPU.Build.0 = Debug|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Debug|x64.ActiveCfg = Debug|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Debug|x64.Build.0 = Debug|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Debug|x86.ActiveCfg = Debug|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Debug|x86.Build.0 = Debug|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Release|Any CPU.ActiveCfg = Release|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Release|Any CPU.Build.0 = Release|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Release|x64.ActiveCfg = Release|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Release|x64.Build.0 = Release|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Release|x86.ActiveCfg = Release|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.Release|x86.Build.0 = Release|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.RelWithDebInfo|Any CPU.ActiveCfg = RelWithDebInfo|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.RelWithDebInfo|Any CPU.Build.0 = RelWithDebInfo|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.RelWithDebInfo|x86.ActiveCfg = RelWithDebInfo|x64
-		{8A189216-D635-3531-861A-C34679A4E081}.RelWithDebInfo|x86.Build.0 = RelWithDebInfo|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Checked|Any CPU.ActiveCfg = Checked|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Checked|Any CPU.Build.0 = Checked|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Checked|x64.ActiveCfg = Checked|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Checked|x64.Build.0 = Checked|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Checked|x86.ActiveCfg = Checked|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Checked|x86.Build.0 = Checked|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Debug|Any CPU.ActiveCfg = Debug|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Debug|Any CPU.Build.0 = Debug|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Debug|x64.ActiveCfg = Debug|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Debug|x64.Build.0 = Debug|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Debug|x86.ActiveCfg = Debug|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Debug|x86.Build.0 = Debug|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Release|Any CPU.ActiveCfg = Release|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Release|Any CPU.Build.0 = Release|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Release|x64.ActiveCfg = Release|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Release|x64.Build.0 = Release|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Release|x86.ActiveCfg = Release|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.Release|x86.Build.0 = Release|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.RelWithDebInfo|Any CPU.ActiveCfg = RelWithDebInfo|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.RelWithDebInfo|Any CPU.Build.0 = RelWithDebInfo|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.RelWithDebInfo|x86.ActiveCfg = RelWithDebInfo|x64
+		{5B480F70-D8EF-31EF-9BFD-AFA0D1014B22}.RelWithDebInfo|x86.Build.0 = RelWithDebInfo|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Checked|Any CPU.ActiveCfg = Checked|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Checked|Any CPU.Build.0 = Checked|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Checked|x64.ActiveCfg = Checked|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Checked|x64.Build.0 = Checked|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Checked|x86.ActiveCfg = Checked|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Checked|x86.Build.0 = Checked|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Debug|Any CPU.ActiveCfg = Debug|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Debug|Any CPU.Build.0 = Debug|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Debug|x64.ActiveCfg = Debug|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Debug|x64.Build.0 = Debug|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Debug|x86.ActiveCfg = Debug|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Debug|x86.Build.0 = Debug|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Release|Any CPU.ActiveCfg = Release|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Release|Any CPU.Build.0 = Release|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Release|x64.ActiveCfg = Release|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Release|x64.Build.0 = Release|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Release|x86.ActiveCfg = Release|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.Release|x86.Build.0 = Release|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.RelWithDebInfo|Any CPU.ActiveCfg = RelWithDebInfo|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.RelWithDebInfo|Any CPU.Build.0 = RelWithDebInfo|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.RelWithDebInfo|x86.ActiveCfg = RelWithDebInfo|x64
+		{AE4FFE05-5F31-31CF-84DC-4DD333FF3D22}.RelWithDebInfo|x86.Build.0 = RelWithDebInfo|x64
 		{24CBA9C6-EDBA-47D6-A0B5-04417BDE5FE3}.Checked|Any CPU.ActiveCfg = Checked|x64
 		{24CBA9C6-EDBA-47D6-A0B5-04417BDE5FE3}.Checked|Any CPU.Build.0 = Checked|x64
 		{24CBA9C6-EDBA-47D6-A0B5-04417BDE5FE3}.Checked|x64.ActiveCfg = Checked|x64
@@ -314,30 +316,6 @@ Global
 		{740CDFF4-B8EC-4A37-951B-C9FE9980EF2A}.RelWithDebInfo|x64.Build.0 = Release|x64
 		{740CDFF4-B8EC-4A37-951B-C9FE9980EF2A}.RelWithDebInfo|x86.ActiveCfg = Release|Any CPU
 		{740CDFF4-B8EC-4A37-951B-C9FE9980EF2A}.RelWithDebInfo|x86.Build.0 = Release|Any CPU
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Checked|Any CPU.ActiveCfg = Checked|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Checked|Any CPU.Build.0 = Checked|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Checked|x64.ActiveCfg = Checked|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Checked|x64.Build.0 = Checked|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Checked|x86.ActiveCfg = Checked|x86
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Checked|x86.Build.0 = Checked|x86
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Debug|Any CPU.ActiveCfg = Debug|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Debug|Any CPU.Build.0 = Debug|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Debug|x64.ActiveCfg = Debug|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Debug|x64.Build.0 = Debug|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Debug|x86.ActiveCfg = Debug|x86
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Debug|x86.Build.0 = Debug|x86
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Release|Any CPU.ActiveCfg = Release|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Release|Any CPU.Build.0 = Release|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Release|x64.ActiveCfg = Release|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Release|x64.Build.0 = Release|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Release|x86.ActiveCfg = Release|x86
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.Release|x86.Build.0 = Release|x86
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.RelWithDebInfo|Any CPU.ActiveCfg = Release|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.RelWithDebInfo|Any CPU.Build.0 = Release|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.RelWithDebInfo|x64.ActiveCfg = Release|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.RelWithDebInfo|x64.Build.0 = Release|x64
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.RelWithDebInfo|x86.ActiveCfg = Release|x86
-		{4CF2ECD3-A1C3-4A28-AB08-A61C53114143}.RelWithDebInfo|x86.Build.0 = Release|x86
 		{9DA153BF-51C4-4AD7-A355-9F9528843DC7}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
 		{9DA153BF-51C4-4AD7-A355-9F9528843DC7}.Checked|Any CPU.Build.0 = Debug|Any CPU
 		{9DA153BF-51C4-4AD7-A355-9F9528843DC7}.Checked|x64.ActiveCfg = Debug|Any CPU
@@ -386,6 +364,54 @@ Global
 		{219E0AC3-CDBF-4104-B324-85915DD16E25}.RelWithDebInfo|x64.Build.0 = Release|Any CPU
 		{219E0AC3-CDBF-4104-B324-85915DD16E25}.RelWithDebInfo|x86.ActiveCfg = Release|Any CPU
 		{219E0AC3-CDBF-4104-B324-85915DD16E25}.RelWithDebInfo|x86.Build.0 = Release|Any CPU
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Checked|Any CPU.ActiveCfg = Checked|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Checked|Any CPU.Build.0 = Checked|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Checked|x64.ActiveCfg = Checked|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Checked|x64.Build.0 = Checked|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Checked|x86.ActiveCfg = Checked|x86
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Checked|x86.Build.0 = Checked|x86
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Debug|Any CPU.ActiveCfg = Debug|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Debug|Any CPU.Build.0 = Debug|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Debug|x64.ActiveCfg = Debug|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Debug|x64.Build.0 = Debug|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Debug|x86.ActiveCfg = Debug|x86
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Debug|x86.Build.0 = Debug|x86
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Release|Any CPU.ActiveCfg = Release|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Release|Any CPU.Build.0 = Release|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Release|x64.ActiveCfg = Release|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Release|x64.Build.0 = Release|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Release|x86.ActiveCfg = Release|x86
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.Release|x86.Build.0 = Release|x86
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.RelWithDebInfo|Any CPU.ActiveCfg = Release|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.RelWithDebInfo|Any CPU.Build.0 = Release|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.RelWithDebInfo|x64.ActiveCfg = Release|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.RelWithDebInfo|x64.Build.0 = Release|x64
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.RelWithDebInfo|x86.ActiveCfg = Release|x86
+		{C331F49A-B2BA-46A4-975B-E922AA43FB6F}.RelWithDebInfo|x86.Build.0 = Release|x86
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Checked|Any CPU.Build.0 = Debug|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Checked|x64.Build.0 = Debug|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Checked|x86.Build.0 = Debug|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Debug|x64.ActiveCfg = Debug|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Debug|x64.Build.0 = Debug|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Debug|x86.Build.0 = Debug|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Release|Any CPU.Build.0 = Release|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Release|x64.ActiveCfg = Release|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Release|x64.Build.0 = Release|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Release|x86.ActiveCfg = Release|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.Release|x86.Build.0 = Release|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.RelWithDebInfo|x64.Build.0 = Release|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.RelWithDebInfo|x86.ActiveCfg = Release|Any CPU
+		{B49A6D20-EF0F-48EE-A686-0E615D6640B0}.RelWithDebInfo|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -394,6 +420,8 @@ Global
 		SolutionGuid = {A484CF9D-B203-427F-9D15-A5BBC6013421}
 	EndGlobalSection
 	GlobalSection(SharedMSBuildProjectFiles) = preSolution
+		..\..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{b49a6d20-ef0f-48ee-a686-0e615d6640b0}*SharedItemsImports = 5
+		..\..\..\tools\illink\test\Trimming.Tests.Shared\Trimming.Tests.Shared.projitems*{c331f49a-b2ba-46a4-975b-e922aa43fb6f}*SharedItemsImports = 5
 		..\..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{ff598e93-8e9e-4091-9f50-61a7572663ae}*SharedItemsImports = 13
 		..\..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{ffbd9619-de6f-4a98-8732-8a14ec3c1a18}*SharedItemsImports = 5
 	EndGlobalSection
diff --git a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h
index 5d659488b292..754394f16ee9 100644
--- a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h
+++ b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h
@@ -30,7 +30,7 @@ struct JitInterfaceCallbacks
     CORINFO_METHOD_HANDLE (* getUnboxedEntry)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE ftn, bool* requiresInstMethodTableArg);
     CORINFO_CLASS_HANDLE (* getDefaultComparerClass)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE elemType);
     CORINFO_CLASS_HANDLE (* getDefaultEqualityComparerClass)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE elemType);
-    void (* expandRawHandleIntrinsic)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_GENERICHANDLE_RESULT* pResult);
+    void (* expandRawHandleIntrinsic)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, CORINFO_GENERICHANDLE_RESULT* pResult);
     bool (* isIntrinsicType)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE classHnd);
     CorInfoCallConvExtension (* getUnmanagedCallConv)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* callSiteSig, bool* pSuppressGCTransition);
     bool (* pInvokeMarshalingRequired)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* callSiteSig);
@@ -80,8 +80,8 @@ struct JitInterfaceCallbacks
     bool (* isObjectImmutable)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_OBJECT_HANDLE objPtr);
     bool (* getStringChar)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_OBJECT_HANDLE strObj, int index, uint16_t* value);
     CORINFO_CLASS_HANDLE (* getObjectType)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_OBJECT_HANDLE objPtr);
-    bool (* getReadyToRunHelper)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_LOOKUP_KIND* pGenericLookupKind, CorInfoHelpFunc id, CORINFO_CONST_LOOKUP* pLookup);
-    void (* getReadyToRunDelegateCtorHelper)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* pTargetMethod, unsigned int targetConstraint, CORINFO_CLASS_HANDLE delegateType, CORINFO_LOOKUP* pLookup);
+    bool (* getReadyToRunHelper)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_LOOKUP_KIND* pGenericLookupKind, CorInfoHelpFunc id, CORINFO_METHOD_HANDLE callerHandle, CORINFO_CONST_LOOKUP* pLookup);
+    void (* getReadyToRunDelegateCtorHelper)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* pTargetMethod, unsigned int targetConstraint, CORINFO_CLASS_HANDLE delegateType, CORINFO_METHOD_HANDLE callerHandle, CORINFO_LOOKUP* pLookup);
     CorInfoInitClassResult (* initClass)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_FIELD_HANDLE field, CORINFO_METHOD_HANDLE method, CORINFO_CONTEXT_HANDLE context);
     void (* classMustBeLoadedBeforeCodeIsRun)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE cls);
     CORINFO_CLASS_HANDLE (* getBuiltinClass)(void * thisHandle, CorInfoExceptionClass** ppException, CorInfoClassId classId);
@@ -115,6 +115,7 @@ struct JitInterfaceCallbacks
     void (* getVars)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE ftn, uint32_t* cVars, ICorDebugInfo::ILVarInfo** vars, bool* extendOthers);
     void (* setVars)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE ftn, uint32_t cVars, ICorDebugInfo::NativeVarInfo* vars);
     void (* reportRichMappings)(void * thisHandle, CorInfoExceptionClass** ppException, ICorDebugInfo::InlineTreeNode* inlineTreeNodes, uint32_t numInlineTreeNodes, ICorDebugInfo::RichOffsetMapping* mappings, uint32_t numMappings);
+    void (* reportMetadata)(void * thisHandle, CorInfoExceptionClass** ppException, const char* key, const void* value, size_t length);
     void* (* allocateArray)(void * thisHandle, CorInfoExceptionClass** ppException, size_t cBytes);
     void (* freeArray)(void * thisHandle, CorInfoExceptionClass** ppException, void* array);
     CORINFO_ARG_LIST_HANDLE (* getArgNext)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_ARG_LIST_HANDLE args);
@@ -131,6 +132,7 @@ struct JitInterfaceCallbacks
     const char* (* getMethodNameFromMetadata)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE ftn, const char** className, const char** namespaceName, const char** enclosingClassName);
     unsigned (* getMethodHash)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE ftn);
     bool (* getSystemVAmd64PassStructInRegisterDescriptor)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE structHnd, SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr);
+    void (* getSwiftLowering)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE structHnd, CORINFO_SWIFT_LOWERING* pLowering);
     uint32_t (* getLoongArch64PassStructInRegisterFlags)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE structHnd);
     uint32_t (* getRISCV64PassStructInRegisterFlags)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE structHnd);
     uint32_t (* getThreadTLSIndex)(void * thisHandle, CorInfoExceptionClass** ppException, void** ppIndirection);
@@ -144,7 +146,7 @@ struct JitInterfaceCallbacks
     CORINFO_CLASS_HANDLE (* embedClassHandle)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CLASS_HANDLE handle, void** ppIndirection);
     CORINFO_METHOD_HANDLE (* embedMethodHandle)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE handle, void** ppIndirection);
     CORINFO_FIELD_HANDLE (* embedFieldHandle)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_FIELD_HANDLE handle, void** ppIndirection);
-    void (* embedGenericHandle)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, bool fEmbedParent, CORINFO_GENERICHANDLE_RESULT* pResult);
+    void (* embedGenericHandle)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, bool fEmbedParent, CORINFO_METHOD_HANDLE callerHandle, CORINFO_GENERICHANDLE_RESULT* pResult);
     void (* getLocationOfThisType)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE context, CORINFO_LOOKUP_KIND* pLookupKind);
     void (* getAddressOfPInvokeTarget)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE method, CORINFO_CONST_LOOKUP* pLookup);
     void* (* GetCookieForPInvokeCalliSig)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_SIG_INFO* szMetaSig, void** ppIndirection);
@@ -388,10 +390,11 @@ class JitInterfaceWrapper : public ICorJitInfo
 
     virtual void expandRawHandleIntrinsic(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     CorInfoExceptionClass* pException = nullptr;
-    _callbacks->expandRawHandleIntrinsic(_thisHandle, &pException, pResolvedToken, pResult);
+    _callbacks->expandRawHandleIntrinsic(_thisHandle, &pException, pResolvedToken, callerHandle, pResult);
     if (pException != nullptr) throw pException;
 }
 
@@ -873,10 +876,11 @@ class JitInterfaceWrapper : public ICorJitInfo
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
           CORINFO_LOOKUP_KIND* pGenericLookupKind,
           CorInfoHelpFunc id,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_CONST_LOOKUP* pLookup)
 {
     CorInfoExceptionClass* pException = nullptr;
-    bool temp = _callbacks->getReadyToRunHelper(_thisHandle, &pException, pResolvedToken, pGenericLookupKind, id, pLookup);
+    bool temp = _callbacks->getReadyToRunHelper(_thisHandle, &pException, pResolvedToken, pGenericLookupKind, id, callerHandle, pLookup);
     if (pException != nullptr) throw pException;
     return temp;
 }
@@ -885,10 +889,11 @@ class JitInterfaceWrapper : public ICorJitInfo
           CORINFO_RESOLVED_TOKEN* pTargetMethod,
           unsigned int targetConstraint,
           CORINFO_CLASS_HANDLE delegateType,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_LOOKUP* pLookup)
 {
     CorInfoExceptionClass* pException = nullptr;
-    _callbacks->getReadyToRunDelegateCtorHelper(_thisHandle, &pException, pTargetMethod, targetConstraint, delegateType, pLookup);
+    _callbacks->getReadyToRunDelegateCtorHelper(_thisHandle, &pException, pTargetMethod, targetConstraint, delegateType, callerHandle, pLookup);
     if (pException != nullptr) throw pException;
 }
 
@@ -1214,6 +1219,16 @@ class JitInterfaceWrapper : public ICorJitInfo
     if (pException != nullptr) throw pException;
 }
 
+    virtual void reportMetadata(
+          const char* key,
+          const void* value,
+          size_t length)
+{
+    CorInfoExceptionClass* pException = nullptr;
+    _callbacks->reportMetadata(_thisHandle, &pException, key, value, length);
+    if (pException != nullptr) throw pException;
+}
+
     virtual void* allocateArray(
           size_t cBytes)
 {
@@ -1357,6 +1372,15 @@ class JitInterfaceWrapper : public ICorJitInfo
     return temp;
 }
 
+    virtual void getSwiftLowering(
+          CORINFO_CLASS_HANDLE structHnd,
+          CORINFO_SWIFT_LOWERING* pLowering)
+{
+    CorInfoExceptionClass* pException = nullptr;
+    _callbacks->getSwiftLowering(_thisHandle, &pException, structHnd, pLowering);
+    if (pException != nullptr) throw pException;
+}
+
     virtual uint32_t getLoongArch64PassStructInRegisterFlags(
           CORINFO_CLASS_HANDLE structHnd)
 {
@@ -1485,10 +1509,11 @@ class JitInterfaceWrapper : public ICorJitInfo
     virtual void embedGenericHandle(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
           bool fEmbedParent,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     CorInfoExceptionClass* pException = nullptr;
-    _callbacks->embedGenericHandle(_thisHandle, &pException, pResolvedToken, fEmbedParent, pResult);
+    _callbacks->embedGenericHandle(_thisHandle, &pException, pResolvedToken, fEmbedParent, callerHandle, pResult);
     if (pException != nullptr) throw pException;
 }
 
diff --git a/src/coreclr/tools/metainfo/mdinfo.cpp b/src/coreclr/tools/metainfo/mdinfo.cpp
index 579a5362f96d..84d7f8d50f26 100644
--- a/src/coreclr/tools/metainfo/mdinfo.cpp
+++ b/src/coreclr/tools/metainfo/mdinfo.cpp
@@ -6,6 +6,7 @@
 #include <objbase.h>
 #include <crtdbg.h>
 #include <assert.h>
+#include <algorithm>
 
 #include <corpriv.h>
 #include <cor.h>
@@ -3772,7 +3773,7 @@ int MDInfo::DumpHex(
         ++nLines;
 
         // Calculate spacing.
-        nPrint = min(cbData, nLine);
+        nPrint = std::min(cbData, nLine);
         nSpace = nLine - nPrint;
 
             // dump in hex.
diff --git a/src/coreclr/tools/r2rdump/CoreDisTools.cs b/src/coreclr/tools/r2rdump/CoreDisTools.cs
index 6763c94859a6..32e88ae8d39d 100644
--- a/src/coreclr/tools/r2rdump/CoreDisTools.cs
+++ b/src/coreclr/tools/r2rdump/CoreDisTools.cs
@@ -29,9 +29,6 @@ public enum TargetArch
         [DllImport(_dll, CallingConvention = CallingConvention.Cdecl)]
         public static extern IntPtr InitBufferedDisasm(TargetArch Target);
 
-        [DllImport(_dll, CallingConvention = CallingConvention.Cdecl)]
-        public static extern void DumpCodeBlock(IntPtr Disasm, IntPtr Address, IntPtr Bytes, IntPtr Size);
-
         [DllImport(_dll, CallingConvention = CallingConvention.Cdecl)]
         public static extern int DumpInstruction(IntPtr Disasm, IntPtr Address, IntPtr Bytes, IntPtr Size);
 
@@ -236,6 +233,11 @@ public int GetInstruction(RuntimeFunction rtf, int imageOffset, int rtfOffset, o
             }
 
             int instrSize = CoreDisTools.GetInstruction(_disasm, rtf, imageOffset, rtfOffset, _reader.Image, out instruction);
+            if (instrSize == 0)
+            {
+                instruction = "Decode failure, aborting disassembly" + Environment.NewLine;
+                return rtf.Size - rtfOffset;
+            }
 
             // CoreDisTools dumps instructions in the following format:
             //
diff --git a/src/coreclr/tools/r2rdump/R2RDump.csproj b/src/coreclr/tools/r2rdump/R2RDump.csproj
index 103d8160d0b5..9459b4d87693 100644
--- a/src/coreclr/tools/r2rdump/R2RDump.csproj
+++ b/src/coreclr/tools/r2rdump/R2RDump.csproj
@@ -15,7 +15,7 @@
     <RunAnalyzers>false</RunAnalyzers>
   </PropertyGroup>
 
-  <Import Project="$(RepositoryEngineeringDir)coredistools.targets" Condition="'$(DotNetBuildFromSource)' != 'true'" />
+  <Import Project="$(RepositoryEngineeringDir)coredistools.targets" Condition="'$(DotNetBuildSourceOnly)' != 'true'" />
   <Import Project="$(RepositoryEngineeringDir)DiaSymReaderNative.targets" />
 
   <ItemGroup>
@@ -28,10 +28,6 @@
     </PackageReference>
     <ProjectReference Include="..\aot\ILCompiler.Diagnostics\ILCompiler.Diagnostics.csproj" />
     <ProjectReference Include="..\aot\ILCompiler.Reflection.ReadyToRun\ILCompiler.Reflection.ReadyToRun.csproj" />
-    <!-- Workaround until the SDK does this, see https://github.com/dotnet/sdk/issues/24799 -->
-    <PackageReference Include="NETStandard.Library">
-      <Version>$(NetStandardLibraryVersion)</Version>
-    </PackageReference>
     <Content Include="$(CoreDisToolsLibrary)" Condition="Exists('$(CoreDisToolsLibrary)')">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h
index 3ee4190e8890..104a7ae1fabd 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h
+++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h
@@ -423,9 +423,16 @@ struct Agnostic_CheckMethodModifier
 struct Agnostic_EmbedGenericHandle
 {
     Agnostic_CORINFO_RESOLVED_TOKEN ResolvedToken;
+    DWORDLONG                       hCallerHandle;
     DWORD                           fEmbedParent;
 };
 
+struct Agnostic_ExpandRawHandleIntrinsic
+{
+    Agnostic_CORINFO_RESOLVED_TOKENin ResolvedToken;
+    DWORDLONG                         hCallerHandle;
+};
+
 struct Agnostic_CORINFO_GENERICHANDLE_RESULT
 {
     Agnostic_CORINFO_LOOKUP lookup;
@@ -614,6 +621,14 @@ struct Agnostic_GetSystemVAmd64PassStructInRegisterDescriptor
     DWORD result;
 };
 
+struct Agnostic_GetSwiftLowering
+{
+    DWORD byReference;
+    DWORD loweredElements[MAX_SWIFT_LOWERED_ELEMENTS];
+    DWORD offsets[MAX_SWIFT_LOWERED_ELEMENTS];
+    DWORD numLoweredElements;
+};
+
 struct Agnostic_ResolveVirtualMethodKey
 {
     DWORDLONG                       virtualMethod;
@@ -679,6 +694,7 @@ struct GetReadyToRunHelper_TOKENin
     Agnostic_CORINFO_RESOLVED_TOKEN ResolvedToken;
     Agnostic_CORINFO_LOOKUP_KIND    GenericLookupKind;
     DWORD                           id;
+    DWORDLONG                       callerHandle;
 };
 
 struct GetReadyToRunHelper_TOKENout
@@ -692,6 +708,7 @@ struct GetReadyToRunDelegateCtorHelper_TOKENIn
     Agnostic_CORINFO_RESOLVED_TOKEN TargetMethod;
     mdToken                         targetConstraint;
     DWORDLONG                       delegateType;
+    DWORDLONG                       callerHandle;
 };
 
 struct Agnostic_RecordRelocation
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp
index 3c6653c41a1c..74040dc5aa3b 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp
+++ b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp
@@ -32,7 +32,13 @@ CompileResult::CompileResult()
     allocGCInfoDets.retval = nullptr;
     allocGCInfoDets.size   = 0;
 
+    MethodFullName = nullptr;
+    TieringName = nullptr;
     memoryTracker = nullptr;
+
+#define JITMETADATAINFO(name, type, flags)
+#define JITMETADATAMETRIC(name, type, flags) name = 0;
+#include "jitmetadatalist.h"
 }
 
 CompileResult::~CompileResult()
@@ -685,6 +691,18 @@ const char* relocationTypeToString(uint16_t fRelocType)
         // From corinfo.h
         case IMAGE_REL_BASED_REL32:
             return "rel32";
+        case IMAGE_REL_SECREL:
+            return "secrel";
+        case IMAGE_REL_TLSGD:
+            return "tlsgd";
+        case IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21:
+            return "tlsdesc_high21";
+        case IMAGE_REL_AARCH64_TLSDESC_LD64_LO12:
+            return "tlsdesc_lo12";
+        case IMAGE_REL_AARCH64_TLSDESC_ADD_LO12:
+            return "tlsdesc_add_lo12";
+        case IMAGE_REL_AARCH64_TLSDESC_CALL:
+            return "tlsdesc_call";
         case IMAGE_REL_BASED_THUMB_BRANCH24:
             return "thumb_branch24";
         default:
@@ -845,6 +863,7 @@ void CompileResult::applyRelocs(RelocContext* rc, unsigned char* block1, ULONG b
                 break;
 
                 case IMAGE_REL_ARM64_PAGEBASE_REL21: // ADRP 21 bit PC-relative page address
+                case IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21: // ADRP 21 bit for TLSDesc
                 {
                     if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
                     {
@@ -869,6 +888,16 @@ void CompileResult::applyRelocs(RelocContext* rc, unsigned char* block1, ULONG b
                 }
                 break;
 
+                case IMAGE_REL_AARCH64_TLSDESC_LD64_LO12:
+                case IMAGE_REL_AARCH64_TLSDESC_ADD_LO12: // TLSDESC ADD for corresponding ADRP
+                case IMAGE_REL_AARCH64_TLSDESC_CALL:
+                {
+                    // These are patched later by linker during actual execution
+                    // and do not need relocation.
+                    wasRelocHandled = true;
+                }
+                break;
+
                 default:
                     break;
             }
@@ -896,13 +925,19 @@ void CompileResult::applyRelocs(RelocContext* rc, unsigned char* block1, ULONG b
 
                 wasRelocHandled = true;
             }
+            else if (relocType == IMAGE_REL_TLSGD)
+            {
+                // These are patched later by linker during actual execution
+                // and do not need relocation.
+                wasRelocHandled = true;
+            }
         }
 
         if (wasRelocHandled)
             continue;
 
         // Now do all-platform relocations.
-        if (tmp.fRelocType == IMAGE_REL_BASED_REL32)
+        if ((tmp.fRelocType == IMAGE_REL_BASED_REL32) || (tmp.fRelocType == IMAGE_REL_SECREL))
         {
             DWORDLONG fixupLocation = tmp.location;
 
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h
index b7be4dcd8927..72415f1d38f5 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h
+++ b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h
@@ -121,6 +121,8 @@ class CompileResult
     void dmpSetVars(DWORD key, const Agnostic_SetVars& value);
     bool repSetVars(CORINFO_METHOD_HANDLE* ftn, ULONG32* cVars, ICorDebugInfo::NativeVarInfo** vars);
 
+    void recMetadata(const char* key, const void* value);
+
     void recSetPatchpointInfo(PatchpointInfo* patchpointInfo);
     void dmpSetPatchpointInfo(DWORD key, const Agnostic_SetPatchpointInfo& value);
     bool repSetPatchpointInfo(PatchpointInfo** patchpointInfo);
@@ -215,6 +217,15 @@ class CompileResult
 #define DENSELWM(map, value) DenseLightWeightMap<value>* map;
 #include "crlwmlist.h"
 
+#define JITMETADATAINFO(name, type, flags)
+#define JITMETADATAMETRIC(name, type, flags) type name;
+#include "jitmetadatalist.h"
+
+    // Reported method full name from JIT (not available with release JIT)
+    const char* MethodFullName;
+    // Reported compilation tier from JIT
+    const char* TieringName;
+
     // not persisted to disk.
 public:
     LightWeightMap<DWORDLONG, DWORD>* CallTargetTypes;
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/jitmetadatalist.h b/src/coreclr/tools/superpmi/superpmi-shared/jitmetadatalist.h
new file mode 100644
index 000000000000..f43f4300d73a
--- /dev/null
+++ b/src/coreclr/tools/superpmi/superpmi-shared/jitmetadatalist.h
@@ -0,0 +1 @@
+#include "../../../jit/jitmetadatalist.h"
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h
index a44faac3cee2..c3445291eeb9 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h
+++ b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h
@@ -125,6 +125,7 @@ LWM(GetExpectedTargetArchitecture, DWORD, DWORD)
 LWM(GetSharedCCtorHelper, DWORDLONG, DWORD)
 LWM(GetStringConfigValue, DWORD, DWORD)
 LWM(GetSystemVAmd64PassStructInRegisterDescriptor, DWORDLONG, Agnostic_GetSystemVAmd64PassStructInRegisterDescriptor)
+LWM(GetSwiftLowering, DWORDLONG, Agnostic_GetSwiftLowering)
 LWM(GetLoongArch64PassStructInRegisterFlags, DWORDLONG, DWORD)
 LWM(GetRISCV64PassStructInRegisterFlags, DWORDLONG, DWORD)
 LWM(GetTailCallHelpers, Agnostic_GetTailCallHelpers, Agnostic_CORINFO_TAILCALL_HELPERS)
@@ -146,7 +147,7 @@ LWM(InitClass, Agnostic_InitClass, DWORD)
 LWM(IsDelegateCreationAllowed, DLDL, DWORD)
 LWM(IsFieldStatic, DWORDLONG, DWORD)
 LWM(GetArrayOrStringLength, DWORDLONG, DWORD)
-LWM(ExpandRawHandleIntrinsic, Agnostic_CORINFO_RESOLVED_TOKENin, Agnostic_CORINFO_GENERICHANDLE_RESULT)
+LWM(ExpandRawHandleIntrinsic, Agnostic_ExpandRawHandleIntrinsic, Agnostic_CORINFO_GENERICHANDLE_RESULT)
 LWM(IsIntrinsicType, DWORDLONG, DWORD)
 LWM(IsSDArray, DWORDLONG, DWORD)
 LWM(GetStringLiteral, DLDDD, DD)
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp
index c72b8b1eec1f..7b0ddc8ded5a 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp
+++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp
@@ -1670,14 +1670,15 @@ void MethodContext::repGetCallInfoFromMethodHandle(CORINFO_METHOD_HANDLE methodH
     LogException(EXCEPTIONCODE_MC, "Didn't find key %016" PRIX64 ".", methodHandle);
 }
 
-void MethodContext::recExpandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_GENERICHANDLE_RESULT* pResult)
+void MethodContext::recExpandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     if (ExpandRawHandleIntrinsic == nullptr)
-        ExpandRawHandleIntrinsic = new LightWeightMap<Agnostic_CORINFO_RESOLVED_TOKENin, Agnostic_CORINFO_GENERICHANDLE_RESULT>;
+        ExpandRawHandleIntrinsic = new LightWeightMap<Agnostic_ExpandRawHandleIntrinsic, Agnostic_CORINFO_GENERICHANDLE_RESULT>;
 
-    Agnostic_CORINFO_RESOLVED_TOKENin key;
+    Agnostic_ExpandRawHandleIntrinsic key;
     ZeroMemory(&key, sizeof(key)); // Zero key including any struct padding
-    key = SpmiRecordsHelper::CreateAgnostic_CORINFO_RESOLVED_TOKENin(pResolvedToken);
+    key.ResolvedToken = SpmiRecordsHelper::CreateAgnostic_CORINFO_RESOLVED_TOKENin(pResolvedToken);
+    key.hCallerHandle = CastHandle(callerHandle);
 
     Agnostic_CORINFO_GENERICHANDLE_RESULT value;
     value.lookup            = SpmiRecordsHelper::StoreAgnostic_CORINFO_LOOKUP(&pResult->lookup);
@@ -1687,19 +1688,21 @@ void MethodContext::recExpandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN* pResolve
     ExpandRawHandleIntrinsic->Add(key, value);
     DEBUG_REC(dmpExpandRawHandleIntrinsic(key, value));
 }
-void MethodContext::dmpExpandRawHandleIntrinsic(const Agnostic_CORINFO_RESOLVED_TOKENin& key, const Agnostic_CORINFO_GENERICHANDLE_RESULT& result)
+void MethodContext::dmpExpandRawHandleIntrinsic(const Agnostic_ExpandRawHandleIntrinsic& key, const Agnostic_CORINFO_GENERICHANDLE_RESULT& result)
 {
     printf("ExpandRawHandleIntrinsic key: %s, value %s cth-%016" PRIx64 " ht-%u",
-        SpmiDumpHelper::DumpAgnostic_CORINFO_RESOLVED_TOKENin(key).c_str(),
+        SpmiDumpHelper::DumpAgnostic_CORINFO_RESOLVED_TOKENin(key.ResolvedToken).c_str(),
         SpmiDumpHelper::DumpAgnostic_CORINFO_LOOKUP(result.lookup).c_str(),
         result.compileTimeHandle,
         result.handleType);
 }
-void MethodContext::repExpandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_GENERICHANDLE_RESULT* pResult)
+void MethodContext::repExpandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, CORINFO_GENERICHANDLE_RESULT* pResult)
 {
-    Agnostic_CORINFO_RESOLVED_TOKENin key;
+    Agnostic_ExpandRawHandleIntrinsic key;
     ZeroMemory(&key, sizeof(key)); // Zero key including any struct padding
-    key = SpmiRecordsHelper::CreateAgnostic_CORINFO_RESOLVED_TOKENin(pResolvedToken);
+
+    key.ResolvedToken = SpmiRecordsHelper::CreateAgnostic_CORINFO_RESOLVED_TOKENin(pResolvedToken);
+    key.hCallerHandle = CastHandle(callerHandle);
 
     Agnostic_CORINFO_GENERICHANDLE_RESULT value =
         LookupByKeyOrMiss(ExpandRawHandleIntrinsic, key, ": key %x", pResolvedToken->token);
@@ -2273,6 +2276,7 @@ CORINFO_CLASS_HANDLE MethodContext::repGetObjectType(CORINFO_OBJECT_HANDLE objPt
 void MethodContext::recGetReadyToRunHelper(CORINFO_RESOLVED_TOKEN* pResolvedToken,
                                            CORINFO_LOOKUP_KIND*    pGenericLookupKind,
                                            CorInfoHelpFunc         id,
+                                           CORINFO_METHOD_HANDLE   callerHandle,
                                            CORINFO_CONST_LOOKUP*   pLookup,
                                            bool                    result)
 {
@@ -2284,6 +2288,7 @@ void MethodContext::recGetReadyToRunHelper(CORINFO_RESOLVED_TOKEN* pResolvedToke
     key.ResolvedToken = SpmiRecordsHelper::StoreAgnostic_CORINFO_RESOLVED_TOKEN(pResolvedToken, GetReadyToRunHelper);
     key.GenericLookupKind = SpmiRecordsHelper::CreateAgnostic_CORINFO_LOOKUP_KIND(pGenericLookupKind);
     key.id                = (DWORD)id;
+    key.callerHandle      = CastHandle(callerHandle);
     GetReadyToRunHelper_TOKENout value;
     value.Lookup = SpmiRecordsHelper::StoreAgnostic_CORINFO_CONST_LOOKUP(pLookup);
     value.result = result;
@@ -2304,6 +2309,7 @@ void MethodContext::dmpGetReadyToRunHelper(GetReadyToRunHelper_TOKENin key, GetR
 bool MethodContext::repGetReadyToRunHelper(CORINFO_RESOLVED_TOKEN* pResolvedToken,
                                            CORINFO_LOOKUP_KIND*    pGenericLookupKind,
                                            CorInfoHelpFunc         id,
+                                           CORINFO_METHOD_HANDLE   callerHandle,
                                            CORINFO_CONST_LOOKUP*   pLookup)
 {
     AssertMapExistsNoMessage(GetReadyToRunHelper);
@@ -2313,6 +2319,7 @@ bool MethodContext::repGetReadyToRunHelper(CORINFO_RESOLVED_TOKEN* pResolvedToke
     key.ResolvedToken     = SpmiRecordsHelper::RestoreAgnostic_CORINFO_RESOLVED_TOKEN(pResolvedToken, GetReadyToRunHelper);
     key.GenericLookupKind = SpmiRecordsHelper::CreateAgnostic_CORINFO_LOOKUP_KIND(pGenericLookupKind);
     key.id                = (DWORD)id;
+    key.callerHandle      = CastHandle(callerHandle);
 
     GetReadyToRunHelper_TOKENout value = LookupByKeyOrMissNoMessage(GetReadyToRunHelper, key);
 
@@ -2325,6 +2332,7 @@ bool MethodContext::repGetReadyToRunHelper(CORINFO_RESOLVED_TOKEN* pResolvedToke
 void MethodContext::recGetReadyToRunDelegateCtorHelper(CORINFO_RESOLVED_TOKEN* pTargetMethod,
                                                        mdToken                 targetConstraint,
                                                        CORINFO_CLASS_HANDLE    delegateType,
+                                                       CORINFO_METHOD_HANDLE   callerHandle,
                                                        CORINFO_LOOKUP*         pLookup)
 {
     if (GetReadyToRunDelegateCtorHelper == nullptr)
@@ -2337,6 +2345,7 @@ void MethodContext::recGetReadyToRunDelegateCtorHelper(CORINFO_RESOLVED_TOKEN* p
         SpmiRecordsHelper::StoreAgnostic_CORINFO_RESOLVED_TOKEN(pTargetMethod, GetReadyToRunDelegateCtorHelper);
     key.targetConstraint          = targetConstraint;
     key.delegateType              = CastHandle(delegateType);
+    key.callerHandle              = CastHandle(callerHandle);
     Agnostic_CORINFO_LOOKUP value = SpmiRecordsHelper::StoreAgnostic_CORINFO_LOOKUP(pLookup);
     GetReadyToRunDelegateCtorHelper->Add(key, value);
     DEBUG_REC(dmpGetReadyToRunDelegateCtorHelper(key, value));
@@ -2353,6 +2362,7 @@ void MethodContext::dmpGetReadyToRunDelegateCtorHelper(GetReadyToRunDelegateCtor
 void MethodContext::repGetReadyToRunDelegateCtorHelper(CORINFO_RESOLVED_TOKEN* pTargetMethod,
                                                        mdToken                 targetConstraint,
                                                        CORINFO_CLASS_HANDLE    delegateType,
+                                                       CORINFO_METHOD_HANDLE   callerHandle,
                                                        CORINFO_LOOKUP*         pLookup)
 {
     AssertMapExistsNoMessage(GetReadyToRunDelegateCtorHelper);
@@ -2363,6 +2373,7 @@ void MethodContext::repGetReadyToRunDelegateCtorHelper(CORINFO_RESOLVED_TOKEN* p
         SpmiRecordsHelper::RestoreAgnostic_CORINFO_RESOLVED_TOKEN(pTargetMethod, GetReadyToRunDelegateCtorHelper);
     key.targetConstraint = targetConstraint;
     key.delegateType     = CastHandle(delegateType);
+    key.callerHandle     = CastHandle(callerHandle);
 
     Agnostic_CORINFO_LOOKUP value = LookupByKeyOrMissNoMessage(GetReadyToRunDelegateCtorHelper, key);
 
@@ -2776,15 +2787,15 @@ void MethodContext::recGetExactClasses(CORINFO_CLASS_HANDLE baseType, int maxExa
     key.A = CastHandle(baseType);
     key.B = maxExactClasses;
 
-    Assert(result >= 0);
+    int numResults = result < 0 ? 0 : result;
 
-    DWORDLONG* exactClassesAgnostic = new DWORDLONG[result];
-    for (int i = 0; i < result; i++)
+    DWORDLONG* exactClassesAgnostic = new DWORDLONG[numResults];
+    for (int i = 0; i < numResults; i++)
         exactClassesAgnostic[i] = CastHandle(exactClsRet[i]);
 
     Agnostic_GetExactClassesResult value;
     value.numClasses = result;
-    value.classes = GetExactClasses->AddBuffer((unsigned char*)exactClassesAgnostic, (unsigned int)(result * sizeof(DWORDLONG)));
+    value.classes = GetExactClasses->AddBuffer((unsigned char*)exactClassesAgnostic, (unsigned int)(numResults * sizeof(DWORDLONG)));
 
     delete[] exactClassesAgnostic;
 
@@ -3101,6 +3112,7 @@ CorInfoHelpFunc MethodContext::repGetNewHelper(CORINFO_CLASS_HANDLE  classHandle
 
 void MethodContext::recEmbedGenericHandle(CORINFO_RESOLVED_TOKEN*       pResolvedToken,
                                           bool                          fEmbedParent,
+                                          CORINFO_METHOD_HANDLE         callerHandle,
                                           CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     if (EmbedGenericHandle == nullptr)
@@ -3110,6 +3122,7 @@ void MethodContext::recEmbedGenericHandle(CORINFO_RESOLVED_TOKEN*       pResolve
     ZeroMemory(&key, sizeof(key)); // Zero key including any struct padding
     key.ResolvedToken = SpmiRecordsHelper::StoreAgnostic_CORINFO_RESOLVED_TOKEN(pResolvedToken, EmbedGenericHandle);
     key.fEmbedParent  = (DWORD)fEmbedParent;
+    key.hCallerHandle = CastHandle(callerHandle);
 
     Agnostic_CORINFO_GENERICHANDLE_RESULT value;
     value.lookup            = SpmiRecordsHelper::StoreAgnostic_CORINFO_LOOKUP(&pResult->lookup);
@@ -3131,6 +3144,7 @@ void MethodContext::dmpEmbedGenericHandle(const Agnostic_EmbedGenericHandle&
 }
 void MethodContext::repEmbedGenericHandle(CORINFO_RESOLVED_TOKEN*       pResolvedToken,
                                           bool                          fEmbedParent,
+                                          CORINFO_METHOD_HANDLE         callerHandle,
                                           CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     AssertMapExistsNoMessage(EmbedGenericHandle);
@@ -3139,6 +3153,7 @@ void MethodContext::repEmbedGenericHandle(CORINFO_RESOLVED_TOKEN*       pResolve
     ZeroMemory(&key, sizeof(key)); // Zero key including any struct padding
     key.ResolvedToken = SpmiRecordsHelper::RestoreAgnostic_CORINFO_RESOLVED_TOKEN(pResolvedToken, EmbedGenericHandle);
     key.fEmbedParent  = (DWORD)fEmbedParent;
+    key.hCallerHandle = CastHandle(callerHandle);
 
     Agnostic_CORINFO_GENERICHANDLE_RESULT value = LookupByKeyOrMissNoMessage(EmbedGenericHandle, key);
 
@@ -6220,6 +6235,56 @@ bool MethodContext::repGetSystemVAmd64PassStructInRegisterDescriptor(
     return value.result ? true : false;
 }
 
+void MethodContext::recGetSwiftLowering(CORINFO_CLASS_HANDLE structHnd, CORINFO_SWIFT_LOWERING* pLowering)
+{
+    if (GetSwiftLowering == nullptr)
+        GetSwiftLowering = new LightWeightMap<DWORDLONG, Agnostic_GetSwiftLowering>();
+
+    DWORDLONG key = CastHandle(structHnd);
+
+    Agnostic_GetSwiftLowering value;
+    ZeroMemory(&value, sizeof(value));
+    value.byReference = pLowering->byReference ? 1 : 0;
+    if (!pLowering->byReference)
+    {
+        value.numLoweredElements = static_cast<DWORD>(pLowering->numLoweredElements);
+        for (size_t i = 0; i < pLowering->numLoweredElements; i++)
+        {
+            value.loweredElements[i] = static_cast<DWORD>(pLowering->loweredElements[i]);
+            value.offsets[i] = pLowering->offsets[i];
+        }
+    }
+
+    GetSwiftLowering->Add(key, value);
+    DEBUG_REC(dmpGetSwiftLowering(key, value));
+}
+void MethodContext::dmpGetSwiftLowering(
+    DWORDLONG key, const Agnostic_GetSwiftLowering& value)
+{
+    printf("GetSwiftLowering key structHnd-%016" PRIX64 ", value byReference-%u numLoweredElements-%u", key,
+        value.byReference, value.numLoweredElements);
+    for (size_t i = 0; i < value.numLoweredElements; i++)
+    {
+        printf(" [%zu] %u", i, value.loweredElements[i]);
+    }
+}
+void MethodContext::repGetSwiftLowering(CORINFO_CLASS_HANDLE structHnd, CORINFO_SWIFT_LOWERING* pLowering)
+{
+    DWORDLONG key = CastHandle(structHnd);
+    Agnostic_GetSwiftLowering value = LookupByKeyOrMiss(GetSwiftLowering, key, ": key %016" PRIX64 "", key);
+
+    DEBUG_REP(dmpGetSwiftLowering(key, value));
+
+    pLowering->byReference = value.byReference != 0;
+    pLowering->numLoweredElements = value.numLoweredElements;
+
+    for (size_t i = 0; i < pLowering->numLoweredElements; i++)
+    {
+        pLowering->loweredElements[i] = static_cast<CorInfoType>(value.loweredElements[i]);
+        pLowering->offsets[i] = value.offsets[i];
+    }
+}
+
 void MethodContext::recGetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd, DWORD value)
 {
     if (GetLoongArch64PassStructInRegisterFlags == nullptr)
@@ -6567,7 +6632,7 @@ size_t MethodContext::repPrint(
     size_t bytesWritten = 0;
     if ((buffer != nullptr) && (bufferSize > 0))
     {
-        bytesWritten = min(bufferSize - 1, res.stringBufferSize);
+        bytesWritten = min(bufferSize - 1, (size_t)res.stringBufferSize);
         if (bytesWritten > 0)
         {
             // The "full buffer" check above ensures this given that
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h
index 3344552ca060..cf9c235ae987 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h
+++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h
@@ -272,9 +272,9 @@ class MethodContext
     void dmpIsSDArray(DWORDLONG key, DWORD value);
     bool repIsSDArray(CORINFO_CLASS_HANDLE cls);
 
-    void recExpandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_GENERICHANDLE_RESULT* pResult);
-    void dmpExpandRawHandleIntrinsic(const Agnostic_CORINFO_RESOLVED_TOKENin& key, const Agnostic_CORINFO_GENERICHANDLE_RESULT& result);
-    void repExpandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_GENERICHANDLE_RESULT* pResult);
+    void recExpandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, CORINFO_GENERICHANDLE_RESULT* pResult);
+    void dmpExpandRawHandleIntrinsic(const Agnostic_ExpandRawHandleIntrinsic& key, const Agnostic_CORINFO_GENERICHANDLE_RESULT& result);
+    void repExpandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, CORINFO_GENERICHANDLE_RESULT* pResult);
 
     void recIsIntrinsicType(CORINFO_CLASS_HANDLE cls, bool result);
     void dmpIsIntrinsicType(DWORDLONG key, DWORD value);
@@ -315,23 +315,27 @@ class MethodContext
     void recGetReadyToRunHelper(CORINFO_RESOLVED_TOKEN* pResolvedToken,
                                 CORINFO_LOOKUP_KIND*    pGenericLookupKind,
                                 CorInfoHelpFunc         id,
+                                CORINFO_METHOD_HANDLE   callerHandle,
                                 CORINFO_CONST_LOOKUP*   pLookup,
                                 bool                    result);
     void dmpGetReadyToRunHelper(GetReadyToRunHelper_TOKENin key, GetReadyToRunHelper_TOKENout value);
     bool repGetReadyToRunHelper(CORINFO_RESOLVED_TOKEN* pResolvedToken,
                                 CORINFO_LOOKUP_KIND*    pGenericLookupKind,
                                 CorInfoHelpFunc         id,
+                                CORINFO_METHOD_HANDLE   callerHandle,
                                 CORINFO_CONST_LOOKUP*   pLookup);
 
     void recGetReadyToRunDelegateCtorHelper(CORINFO_RESOLVED_TOKEN* pTargetMethod,
                                             mdToken                 targetConstraint,
                                             CORINFO_CLASS_HANDLE    delegateType,
+                                            CORINFO_METHOD_HANDLE   callerHandle,
                                             CORINFO_LOOKUP*         pLookup);
     void dmpGetReadyToRunDelegateCtorHelper(GetReadyToRunDelegateCtorHelper_TOKENIn key,
                                             Agnostic_CORINFO_LOOKUP                 pLookup);
     void repGetReadyToRunDelegateCtorHelper(CORINFO_RESOLVED_TOKEN* pTargetMethod,
                                             mdToken                 targetConstraint,
                                             CORINFO_CLASS_HANDLE    delegateType,
+                                            CORINFO_METHOD_HANDLE   callerHandle,
                                             CORINFO_LOOKUP*         pLookup);
 
     void recGetHelperFtn(CorInfoHelpFunc ftnNum, void** ppIndirection, void* result);
@@ -423,11 +427,13 @@ class MethodContext
 
     void recEmbedGenericHandle(CORINFO_RESOLVED_TOKEN*       pResolvedToken,
                                bool                          fEmbedParent,
+                               CORINFO_METHOD_HANDLE         callerHandle,
                                CORINFO_GENERICHANDLE_RESULT* pResult);
     void dmpEmbedGenericHandle(const Agnostic_EmbedGenericHandle&           key,
                                const Agnostic_CORINFO_GENERICHANDLE_RESULT& value);
     void repEmbedGenericHandle(CORINFO_RESOLVED_TOKEN*       pResolvedToken,
                                bool                          fEmbedParent,
+                               CORINFO_METHOD_HANDLE         callerHandle,
                                CORINFO_GENERICHANDLE_RESULT* pResult);
 
     void recGetEHinfo(CORINFO_METHOD_HANDLE ftn, unsigned EHnumber, CORINFO_EH_CLAUSE* clause);
@@ -757,6 +763,10 @@ class MethodContext
     bool repGetSystemVAmd64PassStructInRegisterDescriptor(
         CORINFO_CLASS_HANDLE structHnd, SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr);
 
+    void recGetSwiftLowering(CORINFO_CLASS_HANDLE structHnd, CORINFO_SWIFT_LOWERING* pLowering);
+    void dmpGetSwiftLowering(DWORDLONG key, const Agnostic_GetSwiftLowering& value);
+    void repGetSwiftLowering(CORINFO_CLASS_HANDLE structHnd, CORINFO_SWIFT_LOWERING* pLowering);
+
     void recGetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd, DWORD value);
     void dmpGetLoongArch64PassStructInRegisterFlags(DWORDLONG key, DWORD value);
     DWORD repGetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd);
@@ -886,6 +896,13 @@ class MethodContext
 
     bool WasEnvironmentChanged(const Environment& prevEnv);
 
+    void Reset()
+    {
+        delete cr;
+        FreeTempAllocations();
+        cr = new CompileResult();
+    }
+
     CompileResult* cr;
     CompileResult* originalCR;
     int            index;
@@ -1147,6 +1164,7 @@ enum mcPackets
     Packet_HaveSameMethodDefinition = 213,
     Packet_NotifyMethodInfoUsage = 214,
     Packet_IsExactType = 215,
+    Packet_GetSwiftLowering = 216,
 };
 
 void SetDebugDumpVariables();
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontextreader.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontextreader.cpp
index 2937661c044d..14f1d37dde61 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontextreader.cpp
+++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontextreader.cpp
@@ -619,3 +619,12 @@ bool MethodContextReader::IsMethodExcluded(MethodContext* mc)
     }
     return false;
 }
+
+void MethodContextReader::Reset(const int* newIndexes, int newIndexCount)
+{
+    Indexes = newIndexes;
+    IndexCount = newIndexCount;
+    curIndexPos = 0;
+    curMCIndex = 0;
+    curTOCIndex = 0;
+}
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontextreader.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontextreader.h
index c46f40a36003..b3c77e1637ef 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontextreader.h
+++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontextreader.h
@@ -149,6 +149,9 @@ class MethodContextReader
 
     // Return should this method context be excluded from the replay or not.
     bool IsMethodExcluded(MethodContext* mc);
+
+    // Reset for reading a new sequence of method indices
+    void Reset(const int* newIndexes, int newIndexCount);
 };
 #pragma pack(pop)
 
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp b/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp
index 725f52cbcc74..4a2d55c1dbb2 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp
+++ b/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp
@@ -93,7 +93,7 @@ void SpmiDumpHelper::FormatHandleArray(char*& pbuf, int& sizeOfBuffer, const Den
     sizeOfBuffer -= cch;
 
     const unsigned int maxHandleArrayDisplayElems = 5; // Don't display more than this.
-    const unsigned int handleArrayDisplayElems = min(maxHandleArrayDisplayElems, count);
+    const unsigned int handleArrayDisplayElems = min(maxHandleArrayDisplayElems, (unsigned int)count);
 
     bool first = true;
     for (DWORD i = startIndex; i < startIndex + handleArrayDisplayElems; i++)
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.h b/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.h
index 4dc1f28991a7..b989fb50d1c4 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.h
+++ b/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.h
@@ -110,8 +110,8 @@ inline std::string SpmiDumpHelper::DumpPSig(
             pbuf += cch;
             sizeOfBuffer -= cch;
 
-            const unsigned int maxSigDisplayBytes = 25; // Don't display more than this.
-            const unsigned int sigDisplayBytes = min(maxSigDisplayBytes, cbSig);
+            const size_t maxSigDisplayBytes = 25; // Don't display more than this.
+            const size_t sigDisplayBytes = min(maxSigDisplayBytes, (size_t)cbSig);
 
             // TODO: display character representation of the types?
 
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/standardpch.h b/src/coreclr/tools/superpmi/superpmi-shared/standardpch.h
index 8c511b45e91b..9b926556fdbb 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/standardpch.h
+++ b/src/coreclr/tools/superpmi/superpmi-shared/standardpch.h
@@ -50,7 +50,6 @@
 #include <string.h>
 #include <stdlib.h>
 #include <stddef.h>
-#include <malloc.h>
 #include <assert.h>
 #include <wchar.h>
 #include <specstrings.h>
@@ -59,21 +58,11 @@
 #include <ctype.h>
 #include <stdarg.h>
 
-// Getting STL to work with PAL is difficult, so reimplement STL functionality to not require it.
-#ifdef TARGET_UNIX
-#include "clr_std/utility"
-#include "clr_std/string"
-#include "clr_std/algorithm"
-#include "clr_std/vector"
-#else // !TARGET_UNIX
-#ifndef USE_STL
-#define USE_STL
-#endif // USE_STL
 #include <utility>
 #include <string>
 #include <algorithm>
 #include <vector>
-#endif // !TARGET_UNIX
+
 
 #ifdef USE_MSVCDIS
 #define DISLIB
@@ -128,6 +117,9 @@ static inline void __debugbreak()
 }
 #endif
 
+using std::min;
+using std::max;
+
 #include <minipal/utils.h>
 
 #endif // STANDARDPCH_H
diff --git a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp
index 47c50535e450..d53002a42914 100644
--- a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp
+++ b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp
@@ -259,11 +259,12 @@ CORINFO_CLASS_HANDLE interceptor_ICJI::getDefaultEqualityComparerClass(CORINFO_C
 }
 
 void interceptor_ICJI::expandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN*       pResolvedToken,
+                                                CORINFO_METHOD_HANDLE         callerHandle,
                                                 CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     mc->cr->AddCall("expandRawHandleIntrinsic");
-    original_ICorJitInfo->expandRawHandleIntrinsic(pResolvedToken, pResult);
-    mc->recExpandRawHandleIntrinsic(pResolvedToken, pResult);
+    original_ICorJitInfo->expandRawHandleIntrinsic(pResolvedToken, callerHandle, pResult);
+    mc->recExpandRawHandleIntrinsic(pResolvedToken, callerHandle, pResult);
 }
 
 // Is the given type in System.Private.Corelib and marked with IntrinsicAttribute?
@@ -780,22 +781,24 @@ CORINFO_CLASS_HANDLE interceptor_ICJI::getObjectType(CORINFO_OBJECT_HANDLE typeO
 bool interceptor_ICJI::getReadyToRunHelper(CORINFO_RESOLVED_TOKEN* pResolvedToken,
                                            CORINFO_LOOKUP_KIND*    pGenericLookupKind,
                                            CorInfoHelpFunc         id,
+                                           CORINFO_METHOD_HANDLE   callerHandle,
                                            CORINFO_CONST_LOOKUP*   pLookup)
 {
     mc->cr->AddCall("getReadyToRunHelper");
-    bool result = original_ICorJitInfo->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, pLookup);
-    mc->recGetReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, pLookup, result);
+    bool result = original_ICorJitInfo->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, callerHandle, pLookup);
+    mc->recGetReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, callerHandle, pLookup, result);
     return result;
 }
 
 void interceptor_ICJI::getReadyToRunDelegateCtorHelper(CORINFO_RESOLVED_TOKEN* pTargetMethod,
                                                        mdToken                 targetConstraint,
                                                        CORINFO_CLASS_HANDLE    delegateType,
+                                                       CORINFO_METHOD_HANDLE   callerHandle,
                                                        CORINFO_LOOKUP*         pLookup)
 {
     mc->cr->AddCall("getReadyToRunDelegateCtorHelper");
-    original_ICorJitInfo->getReadyToRunDelegateCtorHelper(pTargetMethod, targetConstraint, delegateType, pLookup);
-    mc->recGetReadyToRunDelegateCtorHelper(pTargetMethod, targetConstraint, delegateType, pLookup);
+    original_ICorJitInfo->getReadyToRunDelegateCtorHelper(pTargetMethod, targetConstraint, delegateType, callerHandle, pLookup);
+    mc->recGetReadyToRunDelegateCtorHelper(pTargetMethod, targetConstraint, delegateType, callerHandle, pLookup);
 }
 
 // This function tries to initialize the class (run the class constructor).
@@ -1193,6 +1196,12 @@ void interceptor_ICJI::reportRichMappings(ICorDebugInfo::InlineTreeNode*    inli
     original_ICorJitInfo->reportRichMappings(inlineTreeNodes, numInlineTreeNodes, mappings, numMappings);
 }
 
+void interceptor_ICJI::reportMetadata(const char* key, const void* value, size_t length)
+{
+    mc->cr->AddCall("reportMetadata");
+    original_ICorJitInfo->reportMetadata(key, value, length);
+}
+
 /*-------------------------- Misc ---------------------------------------*/
 // Used to allocate memory that needs to handed to the EE.
 // For eg, use this to allocated memory for reporting debug info,
@@ -1385,6 +1394,13 @@ bool interceptor_ICJI::getSystemVAmd64PassStructInRegisterDescriptor(
     return result;
 }
 
+void interceptor_ICJI::getSwiftLowering(CORINFO_CLASS_HANDLE structHnd, CORINFO_SWIFT_LOWERING* pLowering)
+{
+    mc->cr->AddCall("getSwiftLowering");
+    original_ICorJitInfo->getSwiftLowering(structHnd, pLowering);
+    mc->recGetSwiftLowering(structHnd, pLowering);
+}
+
 uint32_t interceptor_ICJI::getLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd)
 {
     mc->cr->AddCall("getLoongArch64PassStructInRegisterFlags");
@@ -1517,11 +1533,12 @@ CORINFO_FIELD_HANDLE interceptor_ICJI::embedFieldHandle(CORINFO_FIELD_HANDLE han
 void interceptor_ICJI::embedGenericHandle(CORINFO_RESOLVED_TOKEN* pResolvedToken,
                                           bool fEmbedParent, // TRUE - embeds parent type handle of the field/method
                                                              // handle
+                                          CORINFO_METHOD_HANDLE callerHandle,
                                           CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     mc->cr->AddCall("embedGenericHandle");
-    original_ICorJitInfo->embedGenericHandle(pResolvedToken, fEmbedParent, pResult);
-    mc->recEmbedGenericHandle(pResolvedToken, fEmbedParent, pResult);
+    original_ICorJitInfo->embedGenericHandle(pResolvedToken, fEmbedParent, callerHandle, pResult);
+    mc->recEmbedGenericHandle(pResolvedToken, fEmbedParent, callerHandle, pResult);
 }
 
 // Return information used to locate the exact enclosing type of the current method.
diff --git a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp
index 0f69dfae4a08..ef1b277e8053 100644
--- a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp
+++ b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp
@@ -171,10 +171,11 @@ CORINFO_CLASS_HANDLE interceptor_ICJI::getDefaultEqualityComparerClass(
 
 void interceptor_ICJI::expandRawHandleIntrinsic(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     mcs->AddCall("expandRawHandleIntrinsic");
-    original_ICorJitInfo->expandRawHandleIntrinsic(pResolvedToken, pResult);
+    original_ICorJitInfo->expandRawHandleIntrinsic(pResolvedToken, callerHandle, pResult);
 }
 
 bool interceptor_ICJI::isIntrinsicType(
@@ -564,20 +565,22 @@ bool interceptor_ICJI::getReadyToRunHelper(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
           CORINFO_LOOKUP_KIND* pGenericLookupKind,
           CorInfoHelpFunc id,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_CONST_LOOKUP* pLookup)
 {
     mcs->AddCall("getReadyToRunHelper");
-    return original_ICorJitInfo->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, pLookup);
+    return original_ICorJitInfo->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, callerHandle, pLookup);
 }
 
 void interceptor_ICJI::getReadyToRunDelegateCtorHelper(
           CORINFO_RESOLVED_TOKEN* pTargetMethod,
           mdToken targetConstraint,
           CORINFO_CLASS_HANDLE delegateType,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_LOOKUP* pLookup)
 {
     mcs->AddCall("getReadyToRunDelegateCtorHelper");
-    original_ICorJitInfo->getReadyToRunDelegateCtorHelper(pTargetMethod, targetConstraint, delegateType, pLookup);
+    original_ICorJitInfo->getReadyToRunDelegateCtorHelper(pTargetMethod, targetConstraint, delegateType, callerHandle, pLookup);
 }
 
 CorInfoInitClassResult interceptor_ICJI::initClass(
@@ -845,6 +848,15 @@ void interceptor_ICJI::reportRichMappings(
     original_ICorJitInfo->reportRichMappings(inlineTreeNodes, numInlineTreeNodes, mappings, numMappings);
 }
 
+void interceptor_ICJI::reportMetadata(
+          const char* key,
+          const void* value,
+          size_t length)
+{
+    mcs->AddCall("reportMetadata");
+    original_ICorJitInfo->reportMetadata(key, value, length);
+}
+
 void* interceptor_ICJI::allocateArray(
           size_t cBytes)
 {
@@ -970,6 +982,14 @@ bool interceptor_ICJI::getSystemVAmd64PassStructInRegisterDescriptor(
     return original_ICorJitInfo->getSystemVAmd64PassStructInRegisterDescriptor(structHnd, structPassInRegDescPtr);
 }
 
+void interceptor_ICJI::getSwiftLowering(
+          CORINFO_CLASS_HANDLE structHnd,
+          CORINFO_SWIFT_LOWERING* pLowering)
+{
+    mcs->AddCall("getSwiftLowering");
+    original_ICorJitInfo->getSwiftLowering(structHnd, pLowering);
+}
+
 uint32_t interceptor_ICJI::getLoongArch64PassStructInRegisterFlags(
           CORINFO_CLASS_HANDLE structHnd)
 {
@@ -1074,10 +1094,11 @@ CORINFO_FIELD_HANDLE interceptor_ICJI::embedFieldHandle(
 void interceptor_ICJI::embedGenericHandle(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
           bool fEmbedParent,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     mcs->AddCall("embedGenericHandle");
-    original_ICorJitInfo->embedGenericHandle(pResolvedToken, fEmbedParent, pResult);
+    original_ICorJitInfo->embedGenericHandle(pResolvedToken, fEmbedParent, callerHandle, pResult);
 }
 
 void interceptor_ICJI::getLocationOfThisType(
diff --git a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp
index 02bef7b549ac..55aef6512734 100644
--- a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp
+++ b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp
@@ -152,9 +152,10 @@ CORINFO_CLASS_HANDLE interceptor_ICJI::getDefaultEqualityComparerClass(
 
 void interceptor_ICJI::expandRawHandleIntrinsic(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_GENERICHANDLE_RESULT* pResult)
 {
-    original_ICorJitInfo->expandRawHandleIntrinsic(pResolvedToken, pResult);
+    original_ICorJitInfo->expandRawHandleIntrinsic(pResolvedToken, callerHandle, pResult);
 }
 
 bool interceptor_ICJI::isIntrinsicType(
@@ -495,18 +496,20 @@ bool interceptor_ICJI::getReadyToRunHelper(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
           CORINFO_LOOKUP_KIND* pGenericLookupKind,
           CorInfoHelpFunc id,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_CONST_LOOKUP* pLookup)
 {
-    return original_ICorJitInfo->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, pLookup);
+    return original_ICorJitInfo->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, callerHandle, pLookup);
 }
 
 void interceptor_ICJI::getReadyToRunDelegateCtorHelper(
           CORINFO_RESOLVED_TOKEN* pTargetMethod,
           mdToken targetConstraint,
           CORINFO_CLASS_HANDLE delegateType,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_LOOKUP* pLookup)
 {
-    original_ICorJitInfo->getReadyToRunDelegateCtorHelper(pTargetMethod, targetConstraint, delegateType, pLookup);
+    original_ICorJitInfo->getReadyToRunDelegateCtorHelper(pTargetMethod, targetConstraint, delegateType, callerHandle, pLookup);
 }
 
 CorInfoInitClassResult interceptor_ICJI::initClass(
@@ -741,6 +744,14 @@ void interceptor_ICJI::reportRichMappings(
     original_ICorJitInfo->reportRichMappings(inlineTreeNodes, numInlineTreeNodes, mappings, numMappings);
 }
 
+void interceptor_ICJI::reportMetadata(
+          const char* key,
+          const void* value,
+          size_t length)
+{
+    original_ICorJitInfo->reportMetadata(key, value, length);
+}
+
 void* interceptor_ICJI::allocateArray(
           size_t cBytes)
 {
@@ -850,6 +861,13 @@ bool interceptor_ICJI::getSystemVAmd64PassStructInRegisterDescriptor(
     return original_ICorJitInfo->getSystemVAmd64PassStructInRegisterDescriptor(structHnd, structPassInRegDescPtr);
 }
 
+void interceptor_ICJI::getSwiftLowering(
+          CORINFO_CLASS_HANDLE structHnd,
+          CORINFO_SWIFT_LOWERING* pLowering)
+{
+    original_ICorJitInfo->getSwiftLowering(structHnd, pLowering);
+}
+
 uint32_t interceptor_ICJI::getLoongArch64PassStructInRegisterFlags(
           CORINFO_CLASS_HANDLE structHnd)
 {
@@ -941,9 +959,10 @@ CORINFO_FIELD_HANDLE interceptor_ICJI::embedFieldHandle(
 void interceptor_ICJI::embedGenericHandle(
           CORINFO_RESOLVED_TOKEN* pResolvedToken,
           bool fEmbedParent,
+          CORINFO_METHOD_HANDLE callerHandle,
           CORINFO_GENERICHANDLE_RESULT* pResult)
 {
-    original_ICorJitInfo->embedGenericHandle(pResolvedToken, fEmbedParent, pResult);
+    original_ICorJitInfo->embedGenericHandle(pResolvedToken, fEmbedParent, callerHandle, pResult);
 }
 
 void interceptor_ICJI::getLocationOfThisType(
diff --git a/src/coreclr/tools/superpmi/superpmi/CMakeLists.txt b/src/coreclr/tools/superpmi/superpmi/CMakeLists.txt
index 5dd42eb5631b..ccb766f0d078 100644
--- a/src/coreclr/tools/superpmi/superpmi/CMakeLists.txt
+++ b/src/coreclr/tools/superpmi/superpmi/CMakeLists.txt
@@ -33,6 +33,7 @@ set(SUPERPMI_SOURCES
     methodstatsemitter.cpp
     neardiffer.cpp
     parallelsuperpmi.cpp
+    streamingsuperpmi.cpp
     superpmi.cpp
     fileio.cpp
     jithost.cpp
diff --git a/src/coreclr/tools/superpmi/superpmi/commandline.cpp b/src/coreclr/tools/superpmi/superpmi/commandline.cpp
index 6635995f65ba..ddd42e3a2e09 100644
--- a/src/coreclr/tools/superpmi/superpmi/commandline.cpp
+++ b/src/coreclr/tools/superpmi/superpmi/commandline.cpp
@@ -110,6 +110,11 @@ void CommandLine::DumpHelp(const char* program)
     printf("     If 'workerCount' is not specified, the number of workers used is\n");
     printf("     the number of processors on the machine.\n");
     printf("\n");
+    printf(" -streaming filename\n");
+    printf("     Streaming mode. Read and execute work requests from indicated file (can be 'stdin').\n");
+    printf("     Each line is a method context number and additional force jit options for that method.\n");
+    printf("     Blank line or EOF terminates\n");
+    printf("\n");
     printf(" -failureLimit <limit>\n");
     printf("     For a positive 'limit' number, replay and asm diffs will exit if it sees more than 'limit' failures.\n");
     printf("     Otherwise, all methods will be compiled.\n");
@@ -170,7 +175,7 @@ void CommandLine::DumpHelp(const char* program)
     printf("     ; if there are any failures, record their MC numbers in the file fail.mcl\n");
 }
 
-static bool ParseJitOption(const char* optionString, WCHAR** key, WCHAR** value)
+bool CommandLine::ParseJitOption(const char* optionString, WCHAR** key, WCHAR** value)
 {
     char tempKey[1024];
 
@@ -468,6 +473,17 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o)
                 }
                 o->hash = argv[i];
             }
+            else if ((_strnicmp(&argv[i][1], "streaming", argLen) == 0))
+            {
+                if (++i >= argc)
+                {
+                    LogError("'-streaming' must be followed by a file name or 'stdin'.");
+                    DumpHelp(argv[0]);
+                    return false;
+                }
+
+                o->streamFile = argv[i];
+            }
             else if ((_strnicmp(&argv[i][1], "parallel", argLen) == 0))
             {
                 o->parallel = true;
@@ -677,6 +693,21 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o)
         }
     }
 
+    if (o->streamFile != nullptr)
+    {
+        if (o->parallel)
+        {
+            LogError("streaming mode and parallel mode are incompatible.");
+            return false;
+        }
+
+        if (o->nameOfJit2 != nullptr)
+        {
+            LogError("streaming mode and diff mode are incompatible.");
+            return false;
+        }
+    }
+
     SPMI_TARGET_ARCHITECTURE defaultSpmiTargetArchitecture = GetSpmiTargetArchitecture();
     SetSuperPmiTargetArchitecture(o->targetArchitecture);
 
diff --git a/src/coreclr/tools/superpmi/superpmi/commandline.h b/src/coreclr/tools/superpmi/superpmi/commandline.h
index c9a9bcb46a79..6c31264eadcc 100644
--- a/src/coreclr/tools/superpmi/superpmi/commandline.h
+++ b/src/coreclr/tools/superpmi/superpmi/commandline.h
@@ -25,6 +25,7 @@ class CommandLine
         bool  ignoreStoredConfig = false;
         bool  applyDiff = false;
         bool  parallel = false;        // User specified to use /parallel mode.
+        char* streamFile = nullptr;
 #if !defined(USE_MSVCDIS) && defined(USE_COREDISTOOLS)
         bool  useCoreDisTools = true; // Use CoreDisTools library instead of Msvcdis
 #else
@@ -58,6 +59,8 @@ class CommandLine
                              LightWeightMap<DWORD, DWORD>** pJitOptions,
                              LightWeightMap<DWORD, DWORD>** pForceJitOptions);
 
+    static bool ParseJitOption(const char* optionString, WCHAR** key, WCHAR** value);
+
 private:
     static void DumpHelp(const char* program);
 };
diff --git a/src/coreclr/tools/superpmi/superpmi/fileio.cpp b/src/coreclr/tools/superpmi/superpmi/fileio.cpp
index ed16485a038e..e26723de6e3e 100644
--- a/src/coreclr/tools/superpmi/superpmi/fileio.cpp
+++ b/src/coreclr/tools/superpmi/superpmi/fileio.cpp
@@ -27,10 +27,7 @@ bool FileWriter::Printf(const char* fmt, ...)
         }
         else
         {
-            DWORD numWritten;
-            bool result =
-                WriteFile(m_file.Get(), pBuffer, static_cast<DWORD>(printed), &numWritten, nullptr) &&
-                (numWritten == static_cast<DWORD>(printed));
+            bool result = Print(pBuffer, static_cast<size_t>(printed));
 
             if (pBuffer != stackBuffer)
                 delete[] pBuffer;
@@ -41,6 +38,75 @@ bool FileWriter::Printf(const char* fmt, ...)
     }
 }
 
+bool FileWriter::Print(const char* value, size_t numChars)
+{
+    DWORD numWritten;
+    bool result =
+        WriteFile(m_file.Get(), value, static_cast<DWORD>(numChars), &numWritten, nullptr) &&
+        (numWritten == static_cast<DWORD>(numChars));
+    return result;
+}
+
+bool FileWriter::Print(const char* value)
+{
+    return Print(value, strlen(value));
+}
+
+bool FileWriter::Print(int value)
+{
+    return Printf("%d", value);
+}
+
+bool FileWriter::Print(int64_t value)
+{
+    return Printf("%lld", value);
+}
+
+bool FileWriter::Print(double value)
+{
+    return Printf("%f", value);
+}
+
+bool FileWriter::PrintQuotedCsvField(const char* value)
+{
+    size_t numQuotes = 0;
+    for (const char* p = value; *p != '\0'; p++)
+    {
+        if (*p == '"')
+        {
+            numQuotes++;
+        }
+    }
+
+    if (numQuotes == 0)
+    {
+        return Printf("\"%s\"", value);
+    }
+    else
+    {
+        size_t len = 2 + strlen(value) + numQuotes;
+        char* buffer = new char[len];
+
+        size_t index = 0;
+        buffer[index++] = '"';
+        for (const char* p = value; *p != '\0'; p++)
+        {
+            if (*p == '"')
+            {
+                buffer[index++] = '"';
+            }
+            buffer[index++] = *p;
+        }
+
+        buffer[index++] = '"';
+        assert(index == len);
+
+        bool result = Print(buffer, len);
+        delete[] buffer;
+        return result;
+    }
+}
+
 bool FileWriter::CreateNew(const char* path, FileWriter* fw)
 {
     FileHandle handle(CreateFile(path, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr));
diff --git a/src/coreclr/tools/superpmi/superpmi/fileio.h b/src/coreclr/tools/superpmi/superpmi/fileio.h
index a88e74d6ee00..4a1434f97259 100644
--- a/src/coreclr/tools/superpmi/superpmi/fileio.h
+++ b/src/coreclr/tools/superpmi/superpmi/fileio.h
@@ -93,6 +93,12 @@ class FileWriter
     {
     }
 
+    bool Print(const char* value, size_t numChars);
+    bool Print(const char* value);
+    bool Print(int value);
+    bool Print(int64_t value);
+    bool Print(double value);
+    bool PrintQuotedCsvField(const char* value);
     bool Printf(const char* fmt, ...);
 
     static bool CreateNew(const char* path, FileWriter* fw);
diff --git a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp
index 15c17173abdd..f23c8e12f866 100644
--- a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp
+++ b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp
@@ -221,10 +221,10 @@ CORINFO_CLASS_HANDLE MyICJI::getDefaultEqualityComparerClass(CORINFO_CLASS_HANDL
     return result;
 }
 
-void MyICJI::expandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_GENERICHANDLE_RESULT* pResult)
+void MyICJI::expandRawHandleIntrinsic(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     jitInstance->mc->cr->AddCall("expandRawHandleIntrinsic");
-    jitInstance->mc->repExpandRawHandleIntrinsic(pResolvedToken, pResult);
+    jitInstance->mc->repExpandRawHandleIntrinsic(pResolvedToken, callerHandle, pResult);
 }
 
 // Is the given type in System.Private.Corelib and marked with IntrinsicAttribute?
@@ -653,19 +653,21 @@ CORINFO_CLASS_HANDLE MyICJI::getObjectType(CORINFO_OBJECT_HANDLE objPtr)
 bool MyICJI::getReadyToRunHelper(CORINFO_RESOLVED_TOKEN* pResolvedToken,
                                  CORINFO_LOOKUP_KIND*    pGenericLookupKind,
                                  CorInfoHelpFunc         id,
+                                 CORINFO_METHOD_HANDLE   callerHandle,
                                  CORINFO_CONST_LOOKUP*   pLookup)
 {
     jitInstance->mc->cr->AddCall("getReadyToRunHelper");
-    return jitInstance->mc->repGetReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, pLookup);
+    return jitInstance->mc->repGetReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, callerHandle, pLookup);
 }
 
 void MyICJI::getReadyToRunDelegateCtorHelper(CORINFO_RESOLVED_TOKEN* pTargetMethod,
                                              mdToken                 targetConstraint,
                                              CORINFO_CLASS_HANDLE    delegateType,
+                                             CORINFO_METHOD_HANDLE   callerHandle,
                                              CORINFO_LOOKUP*         pLookup)
 {
     jitInstance->mc->cr->AddCall("getReadyToRunDelegateCtorHelper");
-    jitInstance->mc->repGetReadyToRunDelegateCtorHelper(pTargetMethod, targetConstraint, delegateType, pLookup);
+    jitInstance->mc->repGetReadyToRunDelegateCtorHelper(pTargetMethod, targetConstraint, delegateType, callerHandle, pLookup);
 }
 
 // This function tries to initialize the class (run the class constructor).
@@ -1027,6 +1029,37 @@ void MyICJI::reportRichMappings(
     freeArray(mappings);
 }
 
+void MyICJI::reportMetadata(const char* key, const void* value, size_t length)
+{
+    jitInstance->mc->cr->AddCall("reportMetadata");
+
+    if (strcmp(key, "MethodFullName") == 0)
+    {
+        char* buf = static_cast<char*>(jitInstance->mc->cr->allocateMemory(length + 1));
+        memcpy(buf, value, length + 1);
+        jitInstance->mc->cr->MethodFullName = buf;
+        return;
+    }
+
+    if (strcmp(key, "TieringName") == 0)
+    {
+        char* buf = static_cast<char*>(jitInstance->mc->cr->allocateMemory(length + 1));
+        memcpy(buf, value, length + 1);
+        jitInstance->mc->cr->TieringName = buf;
+        return;
+    }
+
+#define JITMETADATAINFO(name, type, flags)
+#define JITMETADATAMETRIC(name, type, flags) \
+    if ((strcmp(key, #name) == 0) && (length == sizeof(type)))   \
+    {                                                            \
+        memcpy(&jitInstance->mc->cr->name, value, sizeof(type)); \
+        return;                                                  \
+    }
+
+#include "jitmetadatalist.h"
+}
+
 /*-------------------------- Misc ---------------------------------------*/
 
 // Used to allocate memory that needs to handed to the EE.
@@ -1194,6 +1227,12 @@ bool MyICJI::getSystemVAmd64PassStructInRegisterDescriptor(
     return jitInstance->mc->repGetSystemVAmd64PassStructInRegisterDescriptor(structHnd, structPassInRegDescPtr);
 }
 
+void MyICJI::getSwiftLowering(CORINFO_CLASS_HANDLE structHnd, CORINFO_SWIFT_LOWERING* pLowering)
+{
+    jitInstance->mc->cr->AddCall("getSwiftLowering");
+    jitInstance->mc->repGetSwiftLowering(structHnd, pLowering);
+}
+
 uint32_t MyICJI::getLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE structHnd)
 {
     jitInstance->mc->cr->AddCall("getLoongArch64PassStructInRegisterFlags");
@@ -1301,10 +1340,11 @@ CORINFO_FIELD_HANDLE MyICJI::embedFieldHandle(CORINFO_FIELD_HANDLE handle, void*
 //
 void MyICJI::embedGenericHandle(CORINFO_RESOLVED_TOKEN* pResolvedToken,
                                 bool fEmbedParent, // TRUE - embeds parent type handle of the field/method handle
+                                CORINFO_METHOD_HANDLE callerHandle,
                                 CORINFO_GENERICHANDLE_RESULT* pResult)
 {
     jitInstance->mc->cr->AddCall("embedGenericHandle");
-    jitInstance->mc->repEmbedGenericHandle(pResolvedToken, fEmbedParent, pResult);
+    jitInstance->mc->repEmbedGenericHandle(pResolvedToken, fEmbedParent, callerHandle, pResult);
 }
 
 // Return information used to locate the exact enclosing type of the current method.
diff --git a/src/coreclr/tools/superpmi/superpmi/jitinstance.cpp b/src/coreclr/tools/superpmi/superpmi/jitinstance.cpp
index d13e0c75800c..8eb0e69e8b2e 100644
--- a/src/coreclr/tools/superpmi/superpmi/jitinstance.cpp
+++ b/src/coreclr/tools/superpmi/superpmi/jitinstance.cpp
@@ -459,6 +459,7 @@ ReplayResults JitInstance::CompileMethod(MethodContext* MethodToCompile, int mcI
     }
 
     mc->cr->secondsToCompile = stj.GetSeconds();
+    param.results.CompileResults = mc->cr;
 
     UINT64 insCountAfter = 0;
     Instrumentor_GetInsCount(&insCountAfter);
@@ -641,3 +642,8 @@ const MethodContext::Environment& JitInstance::getEnvironment()
 {
     return environment;
 }
+
+void JitInstance::updateForceOptions(LightWeightMap<DWORD, DWORD>* newForceOptions)
+{
+    forceOptions = newForceOptions;
+}
diff --git a/src/coreclr/tools/superpmi/superpmi/jitinstance.h b/src/coreclr/tools/superpmi/superpmi/jitinstance.h
index 42f1f4ade7c4..b13fe46d641f 100644
--- a/src/coreclr/tools/superpmi/superpmi/jitinstance.h
+++ b/src/coreclr/tools/superpmi/superpmi/jitinstance.h
@@ -22,6 +22,7 @@ struct ReplayResults
     bool IsMinOpts = false;
     uint32_t NumCodeBytes = 0;
     uint64_t NumExecutedInstructions = 0;
+    CompileResult* CompileResults = nullptr;
 };
 
 class JitInstance
@@ -81,6 +82,8 @@ class JitInstance
     void* allocateLongLivedArray(size_t size);
     void freeArray(void* array);
     void freeLongLivedArray(void* array);
+
+    void updateForceOptions(LightWeightMap<DWORD, DWORD>* newForceOptions);
 };
 
 #endif
diff --git a/src/coreclr/tools/superpmi/superpmi/streamingsuperpmi.cpp b/src/coreclr/tools/superpmi/superpmi/streamingsuperpmi.cpp
new file mode 100644
index 000000000000..4da3f0f0561f
--- /dev/null
+++ b/src/coreclr/tools/superpmi/superpmi/streamingsuperpmi.cpp
@@ -0,0 +1,318 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "standardpch.h"
+#include "superpmi.h"
+#include "jitinstance.h"
+#include "simpletimer.h"
+#include "mclist.h"
+#include "lightweightmap.h"
+#include "commandline.h"
+#include "errorhandling.h"
+#include "methodcontext.h"
+#include "methodcontextreader.h"
+#include "spmiutil.h"
+#include "fileio.h"
+#include "commandline.h"
+
+#if defined(_WIN32)
+#define strtok_r strtok_s
+#endif
+
+static bool AddJitOption(LightWeightMap<DWORD,DWORD>* map, char* newOption)
+{
+    WCHAR* key;
+    WCHAR* value;
+
+    if (!CommandLine::ParseJitOption(newOption, &key, &value))
+    {
+        return false;
+    }
+
+    DWORD keyIndex =
+        (DWORD)map->AddBuffer((unsigned char*)key, sizeof(WCHAR) * ((unsigned int)u16_strlen(key) + 1));
+    DWORD valueIndex =
+        (DWORD)map->AddBuffer((unsigned char*)value, sizeof(WCHAR) * ((unsigned int)u16_strlen(value) + 1));
+    map->Add(keyIndex, valueIndex);
+
+    delete[] key;
+    delete[] value;
+    
+    return true;
+}
+
+struct CacheEntry
+{
+    MethodContext* mc;
+    int age;
+};
+
+static MethodContext* getMethodContext(int index, MethodContextReader* reader)
+{
+    enum { CACHE_SIZE = 100 };
+    static CacheEntry cache[CACHE_SIZE] = {};
+    static int count = 0;
+    static int age = 0;
+    int i = 0;
+
+    // Search the cache
+    //
+    for (; i < count; i++)
+    {
+        if (cache[i].mc->index == index)
+        {
+            break;
+        }
+    }
+
+    if (i == count)
+    {
+        // Method not found in cache
+        //
+        LogDebug("[streaming] loading MC %i from file", index);
+        if (i == CACHE_SIZE)
+        {
+            // Cache is full, evict oldest entry
+            //
+            int oldestAge = age;
+            int oldestEntry = -1;
+            for (int j = 0; j < CACHE_SIZE; j++)
+            {
+                if (cache[j].age < oldestAge)
+                {
+                    oldestEntry = j;
+                    oldestAge = cache[j].age;
+                }
+            }
+
+            LogDebug("[streaming] evicting MC %i from cache", cache[oldestEntry].mc->index);
+            delete cache[oldestEntry].mc;
+            cache[oldestEntry].mc = nullptr;
+            i = oldestEntry;
+        }
+        else
+        {
+            count++;
+        }
+
+        reader->Reset(&index, 1);
+        MethodContextBuffer mcb = reader->GetNextMethodContext();
+
+        if (mcb.Error())
+        {
+            return nullptr;
+        }
+
+        MethodContext* mc = nullptr;
+        if (!MethodContext::Initialize(index, mcb.buff, mcb.size, &mc))
+        {
+            return nullptr;
+        }
+
+        cache[i].mc = mc;
+    }
+    else
+    {
+        LogDebug("[streaming] found MC %i in cache", index);
+    }
+
+    // Move to front...
+    //
+    if (i != 0)
+    {
+        CacheEntry temp = cache[0];
+        cache[0] = cache[i];
+        cache[i] = temp;
+    }
+
+    cache[0].age = age++;
+    return cache[0].mc;
+}
+
+int doStreamingSuperPMI(CommandLine::Options& o)
+{
+    HRESULT     hr = E_FAIL;
+    SimpleTimer st;
+    st.Start();
+
+    FILE* streamFile = nullptr;
+    if (_stricmp(o.streamFile, "stdin") == 0)
+    {
+        streamFile = stdin;
+    }
+    else 
+    {
+        streamFile = fopen(o.streamFile, "r");
+    }
+
+    if (streamFile == nullptr)
+    {
+        LogError("Failed to open file '%s'. GetLastError()=%u", o.streamFile, GetLastError());
+        return 1;
+    }
+
+    // Just one worker for now... all method selection done via stream file
+    //
+    o.workerCount = 1;
+    o.indexes = nullptr;
+    o.indexCount = -1;
+    o.hash = nullptr;
+    o.offset = -1;
+    o.increment = -1;
+
+    // The method context reader handles skipping any unrequested method contexts
+    // Used in conjunction with an MCI file, it does a lot less work...
+    MethodContextReader* reader =
+        new MethodContextReader(o.nameOfInputMethodContextFile, o.indexes, o.indexCount, o.hash, o.offset, o.increment);
+    if (!reader->isValid())
+    {
+        return (int)SpmiResult::GeneralFailure;
+    }
+
+    JitInstance* jit = nullptr;
+
+    enum { BUFFER_SIZE = 2048 };
+
+    char line[BUFFER_SIZE];
+    const char* const seps = "!";
+    char *next = nullptr;
+
+    // Syntax is dddd { ! <jit-option>=value }*
+    // Likes starting with '#' are ignored
+    //
+    while (fgets(line, BUFFER_SIZE, streamFile) != nullptr)
+    {
+        for (int i = 0; i < BUFFER_SIZE; i++)
+        {
+            if (line[i] == '\n' || line[i] == '\r')
+            {
+                line[i]= 0;
+                break;
+            }
+        }
+        line[BUFFER_SIZE - 1] = '0';
+
+        LogDebug("[streaming] Request: '%s'", line);
+
+        if (line[0] == '#')
+        {
+            continue;
+        }
+
+        if (strncmp(line, "quit", 4) == 0)
+        {
+            LogDebug("[streaming] Quitting");
+            break;
+        }
+
+        char* tok = strtok_r(line, seps, &next);
+        const int index = atoi(tok);
+
+        if (index == 0)
+        {
+            LogDebug("[streaming] Stopping");
+            break;
+        }
+
+        LogDebug("[streaming] Method %d", index);
+
+        LightWeightMap<DWORD,DWORD>* baseForceJitOptions = o.forceJitOptions;
+        LightWeightMap<DWORD,DWORD>* forceJitOptions = nullptr;
+        bool skip = false;
+
+        while ((tok = strtok_r(nullptr, seps, &next)))
+        {
+            if (forceJitOptions == nullptr)
+            {
+                if (baseForceJitOptions == nullptr)
+                {
+                    forceJitOptions = new LightWeightMap<DWORD, DWORD>();
+                }
+                else
+                {
+                    forceJitOptions = new LightWeightMap<DWORD, DWORD>(*baseForceJitOptions);
+                }
+            }
+
+            bool added = AddJitOption(forceJitOptions, tok);
+
+            if (!added)
+            {
+                LogInfo("[streaming] unable to parse option '%s'", tok);
+                skip = true;
+                break;
+            }
+        }
+
+        if (skip)
+        {
+            continue;
+        }
+
+        LogDebug("[streaming] Launching...");
+        MethodContext* const mc = getMethodContext(index, reader);
+
+        if (mc == nullptr)
+        {
+            return (int)SpmiResult::GeneralFailure;
+        }
+
+        if (mc->index != index)
+        {
+            LogDebug("MC cache lookup failure, wanted index %d, got index %d\n", index, mc->index);
+            return (int)SpmiResult::GeneralFailure;
+        }
+
+        if (jit == nullptr)
+        {    
+            LogDebug("[streaming] loading jit %s", o.nameOfJit);
+            SimpleTimer stInitJit;
+            jit = JitInstance::InitJit(o.nameOfJit, o.breakOnAssert, &stInitJit, mc, forceJitOptions, o.jitOptions);
+            
+            if (jit == nullptr)
+            {
+                // InitJit already printed a failure message
+                return (int)SpmiResult::JitFailedToInit;
+            }
+        }
+        else
+        {
+            jit->updateForceOptions(forceJitOptions);
+            jit->resetConfig(mc);
+        }
+
+        LogDebug("[streaming] invoking jit");
+        fflush(stdout);
+
+        bool collectThroughput = false;
+        ReplayResults res = jit->CompileMethod(mc, reader->GetMethodContextIndex(), collectThroughput);
+
+        if (res.Result == ReplayResult::Success)
+        {
+            if (Logger::IsLogLevelEnabled(LOGLEVEL_DEBUG))
+            {
+                mc->cr->dumpToConsole(); // Dump the compile results if doing debug logging
+            }
+        }
+        else if (res.Result == ReplayResult::Error)
+        {
+            LogDebug("[streaming] jit compilation failed");
+
+            LogError("Method %d of size %d failed to load and compile correctly%s (%s).",
+                reader->GetMethodContextIndex(), mc->methodSize,
+                (o.nameOfJit2 == nullptr) ? "" : " by JIT1", o.nameOfJit);
+        }
+
+        // Protocol with clients is for them to read stdout. Let them know we're done.
+        //
+        printf("[streaming] Done. Status=%d\n", (int) res.Result);
+        fflush(stdout);
+
+        // Cleanup
+        //
+        delete forceJitOptions;
+        mc->Reset();
+    }
+
+    return (int)SpmiResult::Success;
+}
diff --git a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp
index 0410b14846db..f0b4b76fcc1c 100644
--- a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp
+++ b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp
@@ -21,6 +21,7 @@
 #include "fileio.h"
 
 extern int doParallelSuperPMI(CommandLine::Options& o);
+extern int doStreamingSuperPMI(CommandLine::Options& o);
 
 // NOTE: these output status strings are parsed by parallelsuperpmi.cpp::ProcessChildStdOut().
 // There must be a single, fixed prefix common to all strings, to ease the determination of when
@@ -136,42 +137,81 @@ static const char* ResultToString(ReplayResult result)
     }
 }
 
-static bool PrintDiffsCsvHeader(FileWriter& fw)
+static void PrintDiffsCsvHeader(FileWriter& fw)
 {
-    return fw.Printf("Context,Context size,Base result,Diff result,MinOpts,Has diff,Base size,Diff size,Base instructions,Diff instructions\n");
+    fw.Print("Context,Context size,Method full name,Tier name,Base result,Diff result,MinOpts,Has diff,Base size,Diff size,Base instructions,Diff instructions");
+
+#define JITMETADATAINFO(name, type, flags)
+#define JITMETADATAMETRIC(name, type, flags) fw.Print(",Base " #name ",Diff " #name);
+
+#include "jitmetadatalist.h"
+
+    fw.Print("\n");
 }
 
-static bool PrintDiffsCsvRow(
+static void PrintDiffsCsvRow(
     FileWriter& fw,
     int context, uint32_t contextSize,
     const ReplayResults& baseRes,
     const ReplayResults& diffRes,
     bool hasDiff)
 {
-    return fw.Printf("%d,%u,%s,%s,%s,%s,%u,%u,%lld,%lld\n",
-        context, contextSize,
+    fw.Printf("%d,%u,", context, contextSize);
+    fw.PrintQuotedCsvField(baseRes.CompileResults->MethodFullName == nullptr ? "" : baseRes.CompileResults->MethodFullName);
+    fw.Printf(
+        ",%s,%s,%s,%s,%s,%u,%u,%lld,%lld",
+        baseRes.CompileResults->TieringName == nullptr ? "" : baseRes.CompileResults->TieringName,
         ResultToString(baseRes.Result), ResultToString(diffRes.Result),
         baseRes.IsMinOpts ? "True" : "False",
         hasDiff ? "True" : "False",
         baseRes.NumCodeBytes, diffRes.NumCodeBytes,
         baseRes.NumExecutedInstructions, diffRes.NumExecutedInstructions);
+
+#define JITMETADATAINFO(name, type, flags)
+#define JITMETADATAMETRIC(name, type, flags) \
+    fw.Print(",");                           \
+    fw.Print(baseRes.CompileResults->name);  \
+    fw.Print(",");                           \
+    fw.Print(diffRes.CompileResults->name);
+
+#include "jitmetadatalist.h"
+
+    fw.Print("\n");
 }
 
-static bool PrintReplayCsvHeader(FileWriter& fw)
+static void PrintReplayCsvHeader(FileWriter& fw)
 {
-    return fw.Printf("Context,Context size,Result,MinOpts,Size,Instructions\n");
+    fw.Printf("Context,Context size,Method full name,Tier name,Result,MinOpts,Size,Instructions");
+
+#define JITMETADATAINFO(name, type, flags)
+#define JITMETADATAMETRIC(name, type, flags) fw.Print("," #name);
+
+#include "jitmetadatalist.h"
+
+    fw.Print("\n");
 }
 
-static bool PrintReplayCsvRow(
+static void PrintReplayCsvRow(
     FileWriter& fw,
     int context, uint32_t contextSize,
     const ReplayResults& res)
 {
-    return fw.Printf("%d,%u,%s,%s,%u,%lld\n",
-        context, contextSize,
+    fw.Printf("%d,%u,", context, contextSize);
+    fw.PrintQuotedCsvField(res.CompileResults->MethodFullName == nullptr ? "" : res.CompileResults->MethodFullName);
+    fw.Printf(",%s,%s,%s,%u,%lld",
+        res.CompileResults->TieringName == nullptr ? "" : res.CompileResults->TieringName,
         ResultToString(res.Result),
         res.IsMinOpts ? "True" : "False",
         res.NumCodeBytes, res.NumExecutedInstructions);
+
+#define JITMETADATAINFO(name, type, flags)
+#define JITMETADATAMETRIC(name, type, flags) \
+    fw.Print(",");                           \
+    fw.Print(res.CompileResults->name);
+
+#include "jitmetadatalist.h"
+
+    fw.Print("\n");
 }
 
 // Run superpmi. The return value is as follows:
@@ -230,6 +270,11 @@ int __cdecl main(int argc, char* argv[])
         return doParallelSuperPMI(o);
     }
 
+    if (o.streamFile != nullptr)
+    {
+        return doStreamingSuperPMI(o);
+    }
+
     SetBreakOnException(o.breakOnException);
 
     if (o.methodStatsTypes != NULL &&
diff --git a/src/coreclr/unwinder/i386/unwinder.cpp b/src/coreclr/unwinder/i386/unwinder.cpp
index 5ee1193763a2..d8e7e7355681 100644
--- a/src/coreclr/unwinder/i386/unwinder.cpp
+++ b/src/coreclr/unwinder/i386/unwinder.cpp
@@ -21,15 +21,23 @@ BOOL OOPStackUnwinderX86::Unwind(T_CONTEXT* pContextRecord, T_KNONVOLATILE_CONTE
         rd.pCurrentContextPointers = pContextPointers;
     }
 
-    CodeManState codeManState;
-    codeManState.dwIsSet = 0;
-
     DWORD ControlPc = pContextRecord->Eip;
 
     EECodeInfo codeInfo;
     codeInfo.Init((PCODE) ControlPc);
 
-    if (!UnwindStackFrame(&rd, &codeInfo, UpdateAllRegs, &codeManState, NULL))
+    GCInfoToken gcInfoToken = codeInfo.GetGCInfoToken();
+    hdrInfo hdrInfoBody;
+    DWORD hdrInfoSize = (DWORD)DecodeGCHdrInfo(gcInfoToken, codeInfo.GetRelOffset(), &hdrInfoBody);
+
+    if (!UnwindStackFrameX86(&rd,
+                             PTR_CBYTE(codeInfo.GetSavedMethodCode()),
+                             codeInfo.GetRelOffset(),
+                             &hdrInfoBody,
+                             dac_cast<PTR_CBYTE>(gcInfoToken.Info) + hdrInfoSize,
+                             PTR_CBYTE(codeInfo.GetJitManager()->GetFuncletStartAddress(&codeInfo)),
+                             codeInfo.IsFunclet(),
+                             true))
     {
         return FALSE;
     }
@@ -185,10 +193,7 @@ BOOL DacUnwindStackFrame(T_CONTEXT* pContextRecord, T_KNONVOLATILE_CONTEXT_POINT
 //     language specific exception handler is returned. Otherwise, NULL is
 //     returned.
 //
-EXTERN_C
-NTSYSAPI
 PEXCEPTION_ROUTINE
-NTAPI
 RtlVirtualUnwind (
     _In_ DWORD HandlerType,
     _In_ DWORD ImageBase,
diff --git a/src/coreclr/unwinder/loongarch64/unwinder.cpp b/src/coreclr/unwinder/loongarch64/unwinder.cpp
index 93cc2a7aee65..2a263f0516ac 100644
--- a/src/coreclr/unwinder/loongarch64/unwinder.cpp
+++ b/src/coreclr/unwinder/loongarch64/unwinder.cpp
@@ -106,7 +106,9 @@ do {
     if (ARGUMENT_PRESENT(Params)) {                                                   \
         PT_KNONVOLATILE_CONTEXT_POINTERS ContextPointers = (Params)->ContextPointers; \
         if (ARGUMENT_PRESENT(ContextPointers)) {                                      \
-            if (RegisterNumber ==  22)                                                \
+            if (RegisterNumber == 1)                                                  \
+                ContextPointers->Ra = (PDWORD64)Address;                              \
+            else if (RegisterNumber == 22)                                            \
                 ContextPointers->Fp = (PDWORD64)Address;                              \
             else if (RegisterNumber >=  23 && RegisterNumber <= 31) {                 \
                 (&ContextPointers->S0)[RegisterNumber - 23] = (PDWORD64)Address;      \
diff --git a/src/coreclr/unwinder/riscv64/unwinder.cpp b/src/coreclr/unwinder/riscv64/unwinder.cpp
index 508d31b5b667..9de9104b4a47 100644
--- a/src/coreclr/unwinder/riscv64/unwinder.cpp
+++ b/src/coreclr/unwinder/riscv64/unwinder.cpp
@@ -106,7 +106,9 @@ do {
     if (ARGUMENT_PRESENT(Params)) {                                                   \
         PT_KNONVOLATILE_CONTEXT_POINTERS ContextPointers = (Params)->ContextPointers; \
         if (ARGUMENT_PRESENT(ContextPointers)) {                                      \
-            if (RegisterNumber == 8)                                                  \
+            if (RegisterNumber == 1)                                                  \
+                ContextPointers->Ra = (PDWORD64)Address;                              \
+            else if (RegisterNumber == 8)                                             \
                 ContextPointers->Fp = (PDWORD64)Address;                              \
             else if (RegisterNumber == 9)                                             \
                 ContextPointers->S1 = (PDWORD64)Address;                              \
diff --git a/src/coreclr/utilcode/check.cpp b/src/coreclr/utilcode/check.cpp
index 30296b0ebc06..617f0c3dd983 100644
--- a/src/coreclr/utilcode/check.cpp
+++ b/src/coreclr/utilcode/check.cpp
@@ -63,7 +63,6 @@ SPECIALIZED_VIOLATION(GCViolation);
 SPECIALIZED_VIOLATION(ModeViolation);
 SPECIALIZED_VIOLATION(FaultViolation);
 SPECIALIZED_VIOLATION(FaultNotFatal);
-SPECIALIZED_VIOLATION(HostViolation);
 SPECIALIZED_VIOLATION(TakesLockViolation);
 SPECIALIZED_VIOLATION(LoadsTypeViolation);
 
diff --git a/src/coreclr/utilcode/clrconfig.cpp b/src/coreclr/utilcode/clrconfig.cpp
index 8ea705a917e8..b531018eb08a 100644
--- a/src/coreclr/utilcode/clrconfig.cpp
+++ b/src/coreclr/utilcode/clrconfig.cpp
@@ -201,7 +201,11 @@ namespace
                 // Validate the cache and no-cache logic result in the same answer
                 SString nameToConvert(name);
 
+#ifdef HOST_WINDOWS
                 CLRConfigNoCache nonCache = CLRConfigNoCache::Get(nameToConvert.GetUTF8(), noPrefix);
+#else
+                CLRConfigNoCache nonCache = CLRConfigNoCache::Get(nameToConvert.GetUTF8(), noPrefix, &PAL_getenv);
+#endif
                 LPCSTR valueNoCache = nonCache.AsString();
 
                 _ASSERTE(SString::_stricmp(valueNoCache, temp.GetUTF8()) == 0);
diff --git a/src/coreclr/utilcode/clrhost_nodependencies.cpp b/src/coreclr/utilcode/clrhost_nodependencies.cpp
index b385474b6dc0..7aceae763c43 100644
--- a/src/coreclr/utilcode/clrhost_nodependencies.cpp
+++ b/src/coreclr/utilcode/clrhost_nodependencies.cpp
@@ -246,6 +246,11 @@ FORCEINLINE void* ClrMalloc(size_t size)
 
     p = HeapAlloc(hHeap, 0, size);
 #else
+    if (size == 0)
+    {
+        // Allocate at least one byte.
+        size = 1;
+    }
     p = malloc(size);
 #endif
 
diff --git a/src/coreclr/utilcode/collections.cpp b/src/coreclr/utilcode/collections.cpp
index ed5271fde77f..1896d702b5e7 100644
--- a/src/coreclr/utilcode/collections.cpp
+++ b/src/coreclr/utilcode/collections.cpp
@@ -268,6 +268,12 @@ BYTE *CHashTable::FindNextEntry(        // The next entry, or0 for end of list.
         if (psSrch->iNext != UINT32_MAX)
         {
             psEntry = EntryPtr(psSrch->iNext);
+#if DACCESS_COMPILE
+            // If there is a simple infinite loop in the linked list
+            // If more complex forms of infinite loops are present, this code may need to be adjusted to handle an arbitrary cycle.
+            if (psEntry->iNext == psSrch->iNext)
+                return NULL;
+#endif
             psSrch->iNext = psEntry->iNext;
             return ((BYTE *) psEntry);
         }
diff --git a/src/coreclr/utilcode/debug.cpp b/src/coreclr/utilcode/debug.cpp
index 363ff6599bab..cc49e9bcfedc 100644
--- a/src/coreclr/utilcode/debug.cpp
+++ b/src/coreclr/utilcode/debug.cpp
@@ -20,9 +20,8 @@
 
 #include "log.h"
 
-extern "C" _CRTIMP int __cdecl _flushall(void);
-
 #ifdef HOST_WINDOWS
+extern "C" _CRTIMP int __cdecl _flushall(void);
 void CreateCrashDumpIfEnabled(bool stackoverflow = false);
 #endif
 
@@ -51,7 +50,11 @@ static void DECLSPEC_NORETURN FailFastOnAssert()
     WRAPPER_NO_CONTRACT; // If we're calling this, we're well past caring about contract consistency!
 
     FlushLogging(); // make certain we get the last part of the log
+#ifdef HOST_WINDOWS
     _flushall();
+#else
+    fflush(NULL);
+#endif
 
     ShutdownLogging();
 #ifdef HOST_WINDOWS
@@ -254,7 +257,7 @@ bool _DbgBreakCheck(
     if (formattedMessages)
     {
         OutputDebugStringUtf8(formatBuffer);
-        fprintf(stderr, formatBuffer);
+        fprintf(stderr, "%s", formatBuffer);
     }
     else
     {
@@ -408,12 +411,6 @@ VOID DbgAssertDialog(const char *szFile, int iLine, const char *szExpr)
 
     SUPPRESS_ALLOCATION_ASSERTS_IN_THIS_SCOPE;
 
-    // Raising the assert dialog can cause us to re-enter the host when allocating
-    // memory for the string.  Since this is debug-only code, we can safely skip
-    // violation asserts here, particularly since they can also cause infinite
-    // recursion.
-    PERMANENT_CONTRACT_VIOLATION(HostViolation, ReasonDebugOnly);
-
     dbgForceToMemory = &szFile;     //make certain these args are available in the debugger
     dbgForceToMemory = &iLine;
     dbgForceToMemory = &szExpr;
diff --git a/src/coreclr/utilcode/loaderheap.cpp b/src/coreclr/utilcode/loaderheap.cpp
index 72d0d1a6f6f7..985df665be6f 100644
--- a/src/coreclr/utilcode/loaderheap.cpp
+++ b/src/coreclr/utilcode/loaderheap.cpp
@@ -1154,7 +1154,7 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit)
         }
 
         // Figure out how much to reserve
-        dwSizeToReserve = max(dwSizeToCommit, m_dwReserveBlockSize);
+        dwSizeToReserve = max<size_t>(dwSizeToCommit, m_dwReserveBlockSize);
 
         // Round to VIRTUAL_ALLOC_RESERVE_GRANULARITY
         dwSizeToReserve = ALIGN_UP(dwSizeToReserve, VIRTUAL_ALLOC_RESERVE_GRANULARITY);
diff --git a/src/coreclr/utilcode/md5.cpp b/src/coreclr/utilcode/md5.cpp
index 7297114f21fe..cc86a48bedc2 100644
--- a/src/coreclr/utilcode/md5.cpp
+++ b/src/coreclr/utilcode/md5.cpp
@@ -141,7 +141,7 @@ void MD5::GetHashValue(MD5HASHDATA* phash)
     //
     // but our compiler has an intrinsic!
 
-    #if (defined(HOST_X86) || defined(HOST_ARM)) && defined(TARGET_UNIX)
+    #if (defined(HOST_X86) || defined(HOST_ARM) || !defined(__clang__)) && defined(TARGET_UNIX)
     #define ROL(x, n)        (((x) << (n)) | ((x) >> (32-(n))))
     #define ROTATE_LEFT(x,n) (x) = ROL(x,n)
     #else
diff --git a/src/coreclr/utilcode/sstring_com.cpp b/src/coreclr/utilcode/sstring_com.cpp
index f788b2204125..b674556741fc 100644
--- a/src/coreclr/utilcode/sstring_com.cpp
+++ b/src/coreclr/utilcode/sstring_com.cpp
@@ -22,13 +22,7 @@ BOOL SString::LoadResource(CCompRC::ResourceCategory eCategory, int resourceID)
 
 HRESULT SString::LoadResourceAndReturnHR(CCompRC::ResourceCategory eCategory, int resourceID)
 {
-    WRAPPER_NO_CONTRACT;
-    return LoadResourceAndReturnHR(NULL, eCategory,resourceID);
-}
-
-HRESULT SString::LoadResourceAndReturnHR(CCompRC* pResourceDLL, CCompRC::ResourceCategory eCategory, int resourceID)
-{
-    CONTRACT(BOOL)
+    CONTRACT(HRESULT)
     {
         INSTANCE_CHECK;
         NOTHROW;
@@ -38,11 +32,7 @@ HRESULT SString::LoadResourceAndReturnHR(CCompRC* pResourceDLL, CCompRC::Resourc
     HRESULT hr = E_FAIL;
 
 #ifndef FEATURE_UTILCODE_NO_DEPENDENCIES
-    if (pResourceDLL == NULL)
-    {
-        pResourceDLL = CCompRC::GetDefaultResourceDll();
-    }
-
+    CCompRC* pResourceDLL = CCompRC::GetDefaultResourceDll();
     if (pResourceDLL != NULL)
     {
 
diff --git a/src/coreclr/utilcode/stdafx.h b/src/coreclr/utilcode/stdafx.h
index 18b820306f77..78e984057581 100644
--- a/src/coreclr/utilcode/stdafx.h
+++ b/src/coreclr/utilcode/stdafx.h
@@ -12,6 +12,9 @@
 #include <switches.h>
 #include <crtwrap.h>
 #include <dn-u16.h>
+#include <algorithm>
+using std::min;
+using std::max;
 
 #define IN_WINFIX_CPP
 
diff --git a/src/coreclr/utilcode/stgpool.cpp b/src/coreclr/utilcode/stgpool.cpp
index f04f6e9e7b3b..e7aebc55d6ca 100644
--- a/src/coreclr/utilcode/stgpool.cpp
+++ b/src/coreclr/utilcode/stgpool.cpp
@@ -1938,7 +1938,7 @@ CInMemoryStream::CopyTo(
 
     _ASSERTE(cb.QuadPart <= UINT32_MAX);
     ULONG       cbTotal = min(static_cast<ULONG>(cb.QuadPart), m_cbSize - m_cbCurrent);
-    ULONG       cbRead=min(1024, cbTotal);
+    ULONG       cbRead=min((ULONG)1024, cbTotal);
     CQuickBytes rBuf;
     void        *pBuf = rBuf.AllocNoThrow(cbRead);
     if (pBuf == 0)
@@ -2061,7 +2061,7 @@ CGrowableStream::CGrowableStream(float multiplicativeGrowthRate, DWORD additiveG
     m_multiplicativeGrowthRate = min(max(1.0F, multiplicativeGrowthRate), 2.0F);
 
     _ASSERTE(additiveGrowthRate >= 1);
-    m_additiveGrowthRate = max(1, additiveGrowthRate);
+    m_additiveGrowthRate = max((DWORD)1, additiveGrowthRate);
 } // CGrowableStream::CGrowableStream
 
 #ifndef DACCESS_COMPILE
@@ -2115,7 +2115,7 @@ HRESULT CGrowableStream::EnsureCapacity(DWORD newLogicalSize)
             multSize = (DWORD)multSizeF;
         }
 
-        DWORD newBufferSize = max(max(newLogicalSize, multSize), addSize.Value());
+        DWORD newBufferSize = max(max(newLogicalSize, multSize), (DWORD)addSize.Value());
 
         char *tmp = new (nothrow) char[newBufferSize];
         if(tmp == NULL)
diff --git a/src/coreclr/utilcode/stresslog.cpp b/src/coreclr/utilcode/stresslog.cpp
index 90ad5900473e..37abeb2cb92f 100644
--- a/src/coreclr/utilcode/stresslog.cpp
+++ b/src/coreclr/utilcode/stresslog.cpp
@@ -227,7 +227,7 @@ void StressLog::Initialize(unsigned facilities, unsigned level, unsigned maxByte
         // in this case, interpret the number as GB
         maxBytesPerThread *= (1024 * 1024 * 1024);
     }
-    theLog.MaxSizePerThread = (unsigned)min(maxBytesPerThread,0xffffffff);
+    theLog.MaxSizePerThread = (unsigned)min(maxBytesPerThread,(size_t)0xffffffff);
 
     size_t maxBytesTotal = maxBytesTotalArg;
     if (maxBytesTotal < STRESSLOG_CHUNK_SIZE * 256)
@@ -235,7 +235,7 @@ void StressLog::Initialize(unsigned facilities, unsigned level, unsigned maxByte
         // in this case, interpret the number as GB
         maxBytesTotal *= (1024 * 1024 * 1024);
     }
-    theLog.MaxSizeTotal = (unsigned)min(maxBytesTotal, 0xffffffff);
+    theLog.MaxSizeTotal = (unsigned)min(maxBytesTotal, (size_t)0xffffffff);
     theLog.totalChunk = 0;
     theLog.facilitiesToLog = facilities | LF_ALWAYS;
     theLog.levelToLog = level;
diff --git a/src/coreclr/utilcode/util.cpp b/src/coreclr/utilcode/util.cpp
index 13668d244135..1819f38e0a43 100644
--- a/src/coreclr/utilcode/util.cpp
+++ b/src/coreclr/utilcode/util.cpp
@@ -816,7 +816,7 @@ DWORD LCM(DWORD u, DWORD v)
         DWORD currentProcsInGroup = 0;
         for (WORD i = 0; i < m_nGroups; i++)
         {
-            currentProcsInGroup = max(currentProcsInGroup, m_CPUGroupInfoArray[i].nr_active);
+            currentProcsInGroup = max(currentProcsInGroup, (DWORD)m_CPUGroupInfoArray[i].nr_active);
         }
         *max_procs_per_group = currentProcsInGroup;
         return true;
diff --git a/src/coreclr/utilcode/utsem.cpp b/src/coreclr/utilcode/utsem.cpp
index 94c1636dbe6e..e8e786cc3af9 100644
--- a/src/coreclr/utilcode/utsem.cpp
+++ b/src/coreclr/utilcode/utsem.cpp
@@ -84,7 +84,7 @@ SpinConstants g_SpinConstants = {
 
 inline void InitializeSpinConstants_NoHost()
 {
-    g_SpinConstants.dwMaximumDuration = max(2, g_SystemInfo.dwNumberOfProcessors) * 20000;
+    g_SpinConstants.dwMaximumDuration = max((DWORD)2, g_SystemInfo.dwNumberOfProcessors) * 20000;
 }
 
 #else //!SELF_NO_HOST
diff --git a/src/coreclr/vm/.vscode/c_cpp_properties.json b/src/coreclr/vm/.vscode/c_cpp_properties.json
index a1c7e61d179b..4192e236e354 100644
--- a/src/coreclr/vm/.vscode/c_cpp_properties.json
+++ b/src/coreclr/vm/.vscode/c_cpp_properties.json
@@ -31,6 +31,7 @@
                 "_UNICODE",
                 "_WIN32",
                 "_WIN32_WINNT=0x0602",
+                "NOMINMAX",
                 "HOST_64BIT",
                 "AMD64",
                 "HOST_64BIT=1",
@@ -71,7 +72,6 @@
                 "FEATURE_USE_ASM_GC_WRITE_BARRIERS",
                 "FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP",
                 "FEATURE_UTF8STRING=1",
-                "FEATURE_WIN32_REGISTRY",
                 "TARGET_WINDOWS=1",
                 "PROFILING_SUPPORTED_DATA",
                 "UNICODE",
diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt
index eb00b7c6a675..345d5ac35f00 100644
--- a/src/coreclr/vm/CMakeLists.txt
+++ b/src/coreclr/vm/CMakeLists.txt
@@ -300,7 +300,6 @@ set(VM_SOURCES_WKS
     comdependenthandle.cpp
     comdynamic.cpp
     commodule.cpp
-    compatibilityswitch.cpp
     comsynchronizable.cpp
     comthreadpool.cpp
     comutilnative.cpp
@@ -359,7 +358,6 @@ set(VM_SOURCES_WKS
     reflectclasswriter.cpp
     reflectioninvocation.cpp
     runtimehandles.cpp
-    safehandle.cpp
     simplerwlock.cpp
     stackingallocator.cpp
     stringliteralmap.cpp
@@ -402,7 +400,6 @@ set(VM_HEADERS_WKS
     comdependenthandle.h
     comdynamic.h
     commodule.h
-    compatibilityswitch.h
     comsynchronizable.h
     comthreadpool.h
     comutilnative.h
@@ -490,21 +487,6 @@ set(GC_SOURCES_WKS
     ../gc/softwarewritewatch.cpp
     ../gc/handletablecache.cpp)
 
-if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
-  set ( GC_SOURCES_WKS
-    ${GC_SOURCES_WKS}
-    ../gc/vxsort/isa_detection.cpp
-    ../gc/vxsort/do_vxsort_avx2.cpp
-    ../gc/vxsort/do_vxsort_avx512.cpp
-    ../gc/vxsort/machine_traits.avx2.cpp
-    ../gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.cpp
-    ../gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp
-    ../gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp
-    ../gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp
-    ../gc/vxsort/smallsort/avx2_load_mask_tables.cpp
-)
-endif (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
-
 set(GC_HEADERS_WKS
     ${GC_HEADERS_DAC_AND_WKS_COMMON}
     ../gc/gceventstatus.h
@@ -636,7 +618,6 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64)
         ${ARCH_SOURCES_DIR}/AsmHelpers.asm
         ${ARCH_SOURCES_DIR}/CallDescrWorkerAMD64.asm
         ${ARCH_SOURCES_DIR}/ComCallPreStub.asm
-        ${ARCH_SOURCES_DIR}/CrtHelpers.asm
         ${ARCH_SOURCES_DIR}/GenericComCallStubs.asm
         ${ARCH_SOURCES_DIR}/GenericComPlusCallStubs.asm
         ${ARCH_SOURCES_DIR}/getstate.asm
@@ -676,7 +657,6 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
     set(VM_SOURCES_WKS_ARCH_ASM
         ${ARCH_SOURCES_DIR}/AsmHelpers.asm
         ${ARCH_SOURCES_DIR}/CallDescrWorkerARM64.asm
-        ${ARCH_SOURCES_DIR}/CrtHelpers.asm
         ${ARCH_SOURCES_DIR}/patchedcode.asm
         ${ARCH_SOURCES_DIR}/PInvokeStubs.asm
         ${ARCH_SOURCES_DIR}/thunktemplates.asm
@@ -693,7 +673,6 @@ else(CLR_CMAKE_TARGET_WIN32)
         set(VM_SOURCES_WKS_ARCH_ASM
             ${ARCH_SOURCES_DIR}/asmhelpers.S
             ${ARCH_SOURCES_DIR}/calldescrworkeramd64.S
-            ${ARCH_SOURCES_DIR}/crthelpers.S
             ${ARCH_SOURCES_DIR}/externalmethodfixupthunk.S
             ${ARCH_SOURCES_DIR}/getstate.S
             ${ARCH_SOURCES_DIR}/jithelpers_fast.S
@@ -723,7 +702,6 @@ else(CLR_CMAKE_TARGET_WIN32)
     elseif(CLR_CMAKE_TARGET_ARCH_ARM)
         set(VM_SOURCES_WKS_ARCH_ASM
             ${ARCH_SOURCES_DIR}/asmhelpers.S
-            ${ARCH_SOURCES_DIR}/crthelpers.S
             ${ARCH_SOURCES_DIR}/ehhelpers.S
             ${ARCH_SOURCES_DIR}/patchedcode.S
             ${ARCH_SOURCES_DIR}/pinvokestubs.S
@@ -733,7 +711,6 @@ else(CLR_CMAKE_TARGET_WIN32)
         set(VM_SOURCES_WKS_ARCH_ASM
             ${ARCH_SOURCES_DIR}/asmhelpers.S
             ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S
-            ${ARCH_SOURCES_DIR}/crthelpers.S
             ${ARCH_SOURCES_DIR}/patchedcode.S
             ${ARCH_SOURCES_DIR}/pinvokestubs.S
             ${ARCH_SOURCES_DIR}/thunktemplates.S
@@ -742,7 +719,6 @@ else(CLR_CMAKE_TARGET_WIN32)
         set(VM_SOURCES_WKS_ARCH_ASM
             ${ARCH_SOURCES_DIR}/asmhelpers.S
             ${ARCH_SOURCES_DIR}/calldescrworkerloongarch64.S
-            ${ARCH_SOURCES_DIR}/crthelpers.S
             ${ARCH_SOURCES_DIR}/pinvokestubs.S
             ${ARCH_SOURCES_DIR}/thunktemplates.S
         )
@@ -750,7 +726,6 @@ else(CLR_CMAKE_TARGET_WIN32)
         set(VM_SOURCES_WKS_ARCH_ASM
             ${ARCH_SOURCES_DIR}/asmhelpers.S
             ${ARCH_SOURCES_DIR}/calldescrworkerriscv64.S
-            ${ARCH_SOURCES_DIR}/crthelpers.S
             ${ARCH_SOURCES_DIR}/pinvokestubs.S
             ${ARCH_SOURCES_DIR}/thunktemplates.S
         )
diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm
index aa1c443cf56f..3fb2ca02e15a 100644
--- a/src/coreclr/vm/amd64/AsmHelpers.asm
+++ b/src/coreclr/vm/amd64/AsmHelpers.asm
@@ -634,18 +634,6 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT
 NESTED_END ProfileTailcallNaked, _TEXT
 
 
-; EXTERN_C void moveOWord(LPVOID* src, LPVOID* target);
-; <NOTE>
-; MOVDQA is not an atomic operation.  You need to call this function in a crst.
-; </NOTE>
-LEAF_ENTRY moveOWord, _TEXT
-        movdqa      xmm0, [rcx]
-        movdqa      [rdx], xmm0
-
-        ret
-LEAF_END moveOWord, _TEXT
-
-
 extern JIT_InternalThrowFromHelper:proc
 
 LEAF_ENTRY SinglecastDelegateInvokeStub, _TEXT
diff --git a/src/coreclr/vm/amd64/CrtHelpers.asm b/src/coreclr/vm/amd64/CrtHelpers.asm
deleted file mode 100644
index 09f48fa5879b..000000000000
--- a/src/coreclr/vm/amd64/CrtHelpers.asm
+++ /dev/null
@@ -1,79 +0,0 @@
-; Licensed to the .NET Foundation under one or more agreements.
-; The .NET Foundation licenses this file to you under the MIT license.
-
-include AsmMacros.inc
-
-extern memset:proc
-extern memmove:proc
-
-; JIT_MemSet/JIT_MemCpy
-;
-; It is IMPORTANT that the exception handling code is able to find these guys
-; on the stack, but on windows platforms we can just defer to the platform
-; implementation.
-;
-
-; void JIT_MemSet(void* dest, int c, size_t count)
-;
-; Purpose:
-;    Sets the first "count" bytes of the block of memory pointed byte
-;    "dest" to the specified value (interpreted as an unsigned char).
-;
-; Entry:
-;    RCX: void* dest    - Pointer to the block of memory to fill.
-;    RDX: int c         - Value to be set.
-;    R8:  size_t count  - Number of bytes to be set to the value.
-;
-; Exit:
-;
-; Uses:
-;
-; Exceptions:
-;
-LEAF_ENTRY JIT_MemSet, _TEXT
-        test    r8, r8                  ; check if count is zero
-        jz      Exit_MemSet             ; if zero, no bytes to set
-
-        cmp     byte ptr [rcx], 0       ; check dest for null
-
-        jmp     memset                  ; forward to the CRT implementation
-
-Exit_MemSet:
-        ret
-
-LEAF_END_MARKED JIT_MemSet, _TEXT
-
-; void JIT_MemCpy(void* dest, const void* src, size_t count)
-;
-; Purpose:
-;    Copies the values of "count" bytes from the location pointed to
-;    by "src" to the memory block pointed by "dest".
-;
-; Entry:
-;    RCX: void* dest             - Pointer to the destination array where content is to be copied.
-;    RDX: const void* src        - Pointer to the source of the data to be copied.
-;    R8:  size_t count           - Number of bytes to copy.
-;
-; Exit:
-;
-; Uses:
-;
-; Exceptions:
-;
-LEAF_ENTRY JIT_MemCpy, _TEXT
-        test    r8, r8                  ; check if count is zero
-        jz      Exit_MemCpy             ; if zero, no bytes to copy
-
-        cmp     byte ptr [rcx], 0       ; check dest for null
-        cmp     byte ptr [rdx], 0       ; check src for null
-
-        ; Use memmove to handle overlapping buffers for better
-        ; compatibility with .NET Framework. Needing to handle
-        ; overlapping buffers in cpblk is undefined by the spec.
-        jmp     memmove                 ; forward to the CRT implementation
-
-Exit_MemCpy:
-        ret
-
-LEAF_END_MARKED JIT_MemCpy, _TEXT
-		end
diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm
index 0f1b71b5ee93..70291c8307dc 100644
--- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm
+++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm
@@ -56,9 +56,14 @@ extern JIT_InternalThrow:proc
 ;   RDI - address of ref-field (assigned to)
 ;   RSI - address of the data  (source)
 ;   RCX is trashed
-;   RAX is trashed when FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP is defined
+;   RAX is trashed
+;
+;   NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+;         if you add more trashed registers.
+;
 ; Exit:
 ;   RDI, RSI are incremented by SIZEOF(LPVOID)
+;
 LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
         mov     rcx, [rsi]
 
@@ -151,8 +156,6 @@ endif
         cmp     rcx, [g_ephemeral_high]
         jnb     Exit
 
-        ; do the following checks only if we are allowed to trash rax
-        ; otherwise we don't have enough registers
 ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
         mov     rax, rcx
 
diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h
index e12f3e1eafd2..47cca560d7bb 100644
--- a/src/coreclr/vm/amd64/asmconstants.h
+++ b/src/coreclr/vm/amd64/asmconstants.h
@@ -108,21 +108,21 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__ComPlusCallInfo__m_pILStub
 
 #endif // FEATURE_COMINTEROP
 
-#define               OFFSETOF__Thread__m_fPreemptiveGCDisabled     0x0C
+#define               OFFSETOF__Thread__m_fPreemptiveGCDisabled     0x04
 ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_fPreemptiveGCDisabled
                     == offsetof(Thread, m_fPreemptiveGCDisabled));
 #define Thread_m_fPreemptiveGCDisabled OFFSETOF__Thread__m_fPreemptiveGCDisabled
 
-#define               OFFSETOF__Thread__m_pFrame                    0x10
+#define               OFFSETOF__Thread__m_pFrame                    0x08
 ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pFrame
                     == offsetof(Thread, m_pFrame));
 #define Thread_m_pFrame OFFSETOF__Thread__m_pFrame
 
 
-#define               OFFSET__Thread__m_alloc_context__alloc_ptr 0x58
+#define               OFFSET__Thread__m_alloc_context__alloc_ptr 0x50
 ASMCONSTANTS_C_ASSERT(OFFSET__Thread__m_alloc_context__alloc_ptr == offsetof(Thread, m_alloc_context) + offsetof(gc_alloc_context, alloc_ptr));
 
-#define               OFFSET__Thread__m_alloc_context__alloc_limit 0x60
+#define               OFFSET__Thread__m_alloc_context__alloc_limit 0x58
 ASMCONSTANTS_C_ASSERT(OFFSET__Thread__m_alloc_context__alloc_limit == offsetof(Thread, m_alloc_context) + offsetof(gc_alloc_context, alloc_limit));
 
 #define               OFFSETOF__gc_alloc_context__alloc_ptr 0x0
diff --git a/src/coreclr/vm/amd64/cgenamd64.cpp b/src/coreclr/vm/amd64/cgenamd64.cpp
index 26c228102606..8e136612c8c8 100644
--- a/src/coreclr/vm/amd64/cgenamd64.cpp
+++ b/src/coreclr/vm/amd64/cgenamd64.cpp
@@ -58,10 +58,18 @@ void ClearRegDisplayArgumentAndScratchRegisters(REGDISPLAY * pRD)
     pContextPointers->R11 = NULL;
 }
 
-void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     LIMITED_METHOD_CONTRACT;
 
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+        _ASSERTE(pRD->pCurrentContext->Rip == GetReturnAddress());
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 
@@ -76,7 +84,7 @@ void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    TransitionFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP));
 }
 
-void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACTL
     {
@@ -85,7 +93,6 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 #ifdef PROFILING_SUPPORTED
         PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this));
 #endif
-        HOST_NOCALLS;
         MODE_ANY;
         SUPPORTS_DAC;
     }
@@ -97,6 +104,13 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
         return;
     }
 
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 
@@ -117,7 +131,7 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    InlinedCallFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP));
 }
 
-void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACTL
     {
@@ -129,6 +143,14 @@ void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     }
     CONTRACTL_END;
 
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+        _ASSERTE(pRD->pCurrentContext->Rip == m_MachState.m_Rip);
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 
@@ -183,7 +205,7 @@ void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 
 #endif // TARGET_UNIX
 
-#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContextPointers->regname = m_MachState.m_Ptrs.p##regname;
+#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContextPointers->regname = (DWORD64 *)(TADDR *)m_MachState.m_Ptrs.p##regname;
     ENUM_CALLEE_SAVED_REGISTERS();
 #undef CALLEE_SAVED_REGISTER
 
@@ -196,7 +218,7 @@ void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     ClearRegDisplayArgumentAndScratchRegisters(pRD);
 }
 
-void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     LIMITED_METHOD_DAC_CONTRACT;
 
@@ -233,7 +255,7 @@ TADDR ResumableFrame::GetReturnAddressPtr()
     return dac_cast<TADDR>(m_Regs) + offsetof(CONTEXT, Rip);
 }
 
-void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
@@ -273,7 +295,7 @@ void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 }
 
 // The HijackFrame has to know the registers that are pushed by OnHijackTripThread
-void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACTL {
         NOTHROW;
diff --git a/src/coreclr/vm/amd64/cgencpu.h b/src/coreclr/vm/amd64/cgencpu.h
index 5dcaf12dfb81..5c6b3cf3c4b2 100644
--- a/src/coreclr/vm/amd64/cgencpu.h
+++ b/src/coreclr/vm/amd64/cgencpu.h
@@ -48,9 +48,6 @@ EXTERN_C void FastCallFinalizeWorker(Object *obj, PCODE funcPtr);
 #define SIZEOF_LOAD_AND_JUMP_THUNK              22   // # bytes to mov r10, X; jmp Z
 #define SIZEOF_LOAD2_AND_JUMP_THUNK             32   // # bytes to mov r10, X; mov r11, Y; jmp Z
 
-// Also in CorCompile.h, FnTableAccess.h
-#define USE_INDIRECT_CODEHEADER                 // use CodeHeader, RealCodeHeader construct
-
 #define HAS_NDIRECT_IMPORT_PRECODE              1
 #define HAS_FIXUP_PRECODE                       1
 
@@ -188,6 +185,9 @@ struct REGDISPLAY;
 
 #define NUM_CALLEE_SAVED_REGISTERS 6
 
+// No floating point callee saved registers on Unix AMD64
+#define ENUM_FP_CALLEE_SAVED_REGISTERS()
+
 #else // UNIX_AMD64_ABI
 
 #define ENUM_ARGUMENT_REGISTERS() \
@@ -212,6 +212,18 @@ struct REGDISPLAY;
 
 #define NUM_CALLEE_SAVED_REGISTERS 8
 
+#define ENUM_FP_CALLEE_SAVED_REGISTERS() \
+    CALLEE_SAVED_REGISTER(Xmm6) \
+    CALLEE_SAVED_REGISTER(Xmm7) \
+    CALLEE_SAVED_REGISTER(Xmm8) \
+    CALLEE_SAVED_REGISTER(Xmm9) \
+    CALLEE_SAVED_REGISTER(Xmm10) \
+    CALLEE_SAVED_REGISTER(Xmm11) \
+    CALLEE_SAVED_REGISTER(Xmm12) \
+    CALLEE_SAVED_REGISTER(Xmm13) \
+    CALLEE_SAVED_REGISTER(Xmm14) \
+    CALLEE_SAVED_REGISTER(Xmm15)
+
 #endif // UNIX_AMD64_ABI
 
 typedef DPTR(struct ArgumentRegisters) PTR_ArgumentRegisters;
@@ -429,7 +441,13 @@ inline void SetSSP(CONTEXT *context, DWORD64 ssp)
 }
 #endif // !DACCESS_COMPILE
 
-#define SetFP(context, ebp)
+inline void SetFP(CONTEXT *context, TADDR rbp)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    context->Rbp = (DWORD64)rbp;
+}
+
 inline TADDR GetFP(const CONTEXT * context)
 {
     LIMITED_METHOD_CONTRACT;
diff --git a/src/coreclr/vm/amd64/crthelpers.S b/src/coreclr/vm/amd64/crthelpers.S
deleted file mode 100644
index 82219e574092..000000000000
--- a/src/coreclr/vm/amd64/crthelpers.S
+++ /dev/null
@@ -1,74 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-.intel_syntax noprefix
-#include "unixasmmacros.inc"
-#include "asmconstants.h"
-
-// JIT_MemSet/JIT_MemCpy
-//
-// It is IMPORTANT that the exception handling code is able to find these guys
-// on the stack, but on non-windows platforms we can just defer to the platform
-// implementation.
-//
-
-// void JIT_MemSet(void* dest, int c, size_t count)
-//
-// Purpose:
-//    Sets the first "count" bytes of the block of memory pointed byte
-//    "dest" to the specified value (interpreted as an unsigned char).
-//
-// Entry:
-//    RDI: void* dest    - Pointer to the block of memory to fill.
-//    RSI: int c         - Value to be set.
-//    RDX: size_t count  - Number of bytes to be set to the value.
-//
-// Exit:
-//
-// Uses:
-//
-// Exceptions:
-//
-LEAF_ENTRY JIT_MemSet, _TEXT
-        test    rdx, rdx                // check if count is zero
-        jz      Exit_MemSet             // if zero, no bytes to set
-
-        cmp     byte ptr [rdi], 0       // check dest for null
-
-        jmp     C_PLTFUNC(memset)       // forward to the CRT implementation
-
-Exit_MemSet:
-        ret
-
-LEAF_END_MARKED JIT_MemSet, _TEXT
-
-// void JIT_MemCpy(void* dest, const void* src, size_t count)
-//
-// Purpose:
-//    Copies the values of "count" bytes from the location pointed to
-//    by "src" to the memory block pointed by "dest".
-//
-// Entry:
-//    RDI: void* dest             - Pointer to the destination array where content is to be copied.
-//    RSI: const void* src        - Pointer to the source of the data to be copied.
-//    RDX:  size_t count           - Number of bytes to copy.
-//
-// Exit:
-//
-// Uses:
-//
-// Exceptions:
-//
-LEAF_ENTRY JIT_MemCpy, _TEXT
-        test    rdx, rdx                // check if count is zero
-        jz      Exit_MemCpy             // if zero, no bytes to set
-
-        cmp     byte ptr [rdi], 0       // check dest for null
-        cmp     byte ptr [rsi], 0       // check src for null
-
-        jmp     C_PLTFUNC(memcpy)      // forward to the CRT implementation
-
-Exit_MemCpy:
-        ret
-
-LEAF_END_MARKED JIT_MemCpy, _TEXT
diff --git a/src/coreclr/vm/amd64/gmsamd64.cpp b/src/coreclr/vm/amd64/gmsamd64.cpp
index bc1079c14ace..8af5247d07c3 100644
--- a/src/coreclr/vm/amd64/gmsamd64.cpp
+++ b/src/coreclr/vm/amd64/gmsamd64.cpp
@@ -11,8 +11,7 @@
 void LazyMachState::unwindLazyState(LazyMachState* baseState,
                                     MachState* unwoundState,
                                     DWORD threadId,
-                                    int funCallDepth /* = 1 */,
-                                    HostCallPreference hostCallPreference /* = (HostCallPreference)(-1) */)
+                                    int funCallDepth /* = 1 */)
 {
     CONTRACTL
     {
@@ -83,20 +82,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
         {
             // Determine  whether given IP resides in JITted code. (It returns nonzero in that case.)
             // Use it now to see if we've unwound to managed code yet.
-            BOOL fFailedReaderLock = FALSE;
-            BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc, hostCallPreference, &fFailedReaderLock);
-            if (fFailedReaderLock)
-            {
-                // We don't know if we would have been able to find a JIT
-                // manager, because we couldn't enter the reader lock without
-                // yielding (and our caller doesn't want us to yield).  So abort
-                // now.
-
-                // Invalidate the lazyState we're returning, so the caller knows
-                // we aborted before we could fully unwind
-                unwoundState->_pRetAddr = NULL;
-                return;
-            }
+            BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc);
 
             if (fIsManagedCode)
                 break;
@@ -136,7 +122,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
 
 #else  // !DACCESS_COMPILE
 
-#define CALLEE_SAVED_REGISTER(regname) unwoundState->m_Ptrs.p##regname = PTR_ULONG64(nonVolRegPtrs.regname);
+#define CALLEE_SAVED_REGISTER(regname) unwoundState->m_Ptrs.p##regname = PTR_TADDR(nonVolRegPtrs.regname);
     ENUM_CALLEE_SAVED_REGISTERS();
 #undef CALLEE_SAVED_REGISTER
 
diff --git a/src/coreclr/vm/amd64/gmscpu.h b/src/coreclr/vm/amd64/gmscpu.h
index 9836f03253cf..4154fd995d1c 100644
--- a/src/coreclr/vm/amd64/gmscpu.h
+++ b/src/coreclr/vm/amd64/gmscpu.h
@@ -101,8 +101,7 @@ struct LazyMachState : public MachState
     static void unwindLazyState(LazyMachState* baseState,
                                 MachState* lazyState,
                                 DWORD threadId,
-                                int funCallDepth = 1,
-                                HostCallPreference hostCallPreference = AllowHostCalls);
+                                int funCallDepth = 1);
 
     friend class HelperMethodFrame;
     friend class CheckAsmOffsets;
diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S
index 3a2d803a1460..96195353db29 100644
--- a/src/coreclr/vm/amd64/jithelpers_fast.S
+++ b/src/coreclr/vm/amd64/jithelpers_fast.S
@@ -10,19 +10,15 @@
 // Entry:
 //   RDI - address of ref-field (assigned to)
 //   RSI - address of the data  (source)
-//
-//   Note: RyuJIT assumes that all volatile registers can be trashed by
-//   the CORINFO_HELP_ASSIGN_BYREF helper (i.e. JIT_ByRefWriteBarrier)
-//   except RDI and RSI. This helper uses and defines RDI and RSI, so
-//   they remain as live GC refs or byrefs, and are not killed.
-//
-//
 //   RCX is trashed
 //   RAX is trashed
-//   R10 is trashed
-//   R11 is trashed on Debug build
+//
+//   NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+//         if you add more trashed registers.
+//
 // Exit:
 //   RDI, RSI are incremented by SIZEOF(LPVOID)
+//
 LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
         mov     rcx, [rsi]
 
@@ -36,31 +32,31 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
         // but if it isn't then it will just return.
         //
         // See if this is in GCHeap
-        PREPARE_EXTERNAL_VAR g_lowest_address, rax
-        cmp     rdi, [rax]
+        cmp     rdi, [C_VAR(g_lowest_address)]
         jb      LOCAL_LABEL(NotInHeap_ByRefWriteBarrier)
-        PREPARE_EXTERNAL_VAR g_highest_address, rax
-        cmp     rdi, [rax]
+        cmp     rdi, [C_VAR(g_highest_address)]
         jnb     LOCAL_LABEL(NotInHeap_ByRefWriteBarrier)
 
 #ifdef WRITE_BARRIER_CHECK
+        // we can only trash rcx in this function so in _DEBUG we need to save
+        // some scratch registers.
+        push    r10
+        push    r11
+        push    rax
+
         // **ALSO update the shadow GC heap if that is enabled**
         // Do not perform the work if g_GCShadow is 0
-        PREPARE_EXTERNAL_VAR g_GCShadow, rax
-        cmp     qword ptr [rax], 0
+        cmp     qword ptr [C_VAR(g_GCShadow)], 0
         je      LOCAL_LABEL(NoShadow_ByRefWriteBarrier)
 
         // If we end up outside of the heap don't corrupt random memory
         mov     r10, rdi
-        PREPARE_EXTERNAL_VAR g_lowest_address, rax
-        sub     r10, [rax]
+        sub     r10, [C_VAR(g_lowest_address)]
         jb      LOCAL_LABEL(NoShadow_ByRefWriteBarrier)
 
         // Check that our adjusted destination is somewhere in the shadow gc
-        PREPARE_EXTERNAL_VAR g_GCShadow, rax
-        add     r10, [rax]
-        PREPARE_EXTERNAL_VAR g_GCShadowEnd, rax
-        cmp     r10, [rax]
+        add     r10, [C_VAR(g_GCShadow)]
+        cmp     r10, [C_VAR(g_GCShadowEnd)]
         jnb     LOCAL_LABEL(NoShadow_ByRefWriteBarrier)
 
         // Write ref into real GC
@@ -91,63 +87,57 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
     // Additionally we know for sure that we are inside the heap and therefore don't
     // need to replicate the above checks.
     LOCAL_LABEL(DoneShadow_ByRefWriteBarrier):
+        pop     rax
+        pop     r11
+        pop     r10
 #endif
 
 #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
         // Update the write watch table if necessary
-        PREPARE_EXTERNAL_VAR g_sw_ww_enabled_for_gc_heap, rax
-        cmp     byte ptr [rax], 0x0
+        cmp     byte ptr [C_VAR(g_sw_ww_enabled_for_gc_heap)], 0x0
         je      LOCAL_LABEL(CheckCardTable_ByRefWriteBarrier)
         mov     rax, rdi
         shr     rax, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift
-        PREPARE_EXTERNAL_VAR g_sw_ww_table, r10
-        add     rax, qword ptr [r10]
+        add     rax, qword ptr [C_VAR(g_sw_ww_table)]
         cmp     byte ptr [rax], 0x0
         jne     LOCAL_LABEL(CheckCardTable_ByRefWriteBarrier)
         mov     byte ptr [rax], 0xFF
 #endif
 
-    LOCAL_LABEL(CheckCardTable_ByRefWriteBarrier):
         // See if we can just quick out
-        PREPARE_EXTERNAL_VAR g_ephemeral_low, rax
-        cmp     rcx, [rax]
+    LOCAL_LABEL(CheckCardTable_ByRefWriteBarrier):
+        cmp     rcx, [C_VAR(g_ephemeral_low)]
         jb      LOCAL_LABEL(Exit_ByRefWriteBarrier)
-        PREPARE_EXTERNAL_VAR g_ephemeral_high, rax
-        cmp     rcx, [rax]
+        cmp     rcx, [C_VAR(g_ephemeral_high)]
         jnb     LOCAL_LABEL(Exit_ByRefWriteBarrier)
 
         mov     rax, rcx
 
-        PREPARE_EXTERNAL_VAR g_region_shr, rcx
-        mov     cl, [rcx]
+        mov     cl, [C_VAR(g_region_shr)]
         test    cl, cl
         je      LOCAL_LABEL(SkipCheck_ByRefWriteBarrier)
 
         // check if the source is in gen 2 - then it's not an ephemeral pointer
         shr     rax, cl
-        PREPARE_EXTERNAL_VAR g_region_to_generation_table, r10
-        mov     r10, [r10]
-        cmp     byte ptr [rax + r10], 0x82
+        add     rax, [C_VAR(g_region_to_generation_table)]
+        cmp     byte ptr [rax], 0x82
         je      LOCAL_LABEL(Exit_ByRefWriteBarrier)
 
         // check if the destination happens to be in gen 0
         mov     rax, rdi
         shr     rax, cl
-        cmp     byte ptr [rax + r10], 0
+        add     rax, [C_VAR(g_region_to_generation_table)]
+        cmp     byte ptr [rax], 0
         je      LOCAL_LABEL(Exit_ByRefWriteBarrier)
     LOCAL_LABEL(SkipCheck_ByRefWriteBarrier):
 
-        PREPARE_EXTERNAL_VAR g_card_table, r10
-        mov     r10, [r10]
-
-        PREPARE_EXTERNAL_VAR g_region_use_bitwise_write_barrier, rax
-        cmp     byte ptr [rax], 0
+        cmp     byte ptr [C_VAR(g_region_use_bitwise_write_barrier)], 0
         je      LOCAL_LABEL(CheckCardTableByte_ByRefWriteBarrier)
 
         // compute card table bit
-        mov     ecx, edi
+        mov     rcx, rdi
         mov     al, 1
-        shr     ecx, 8
+        shr     rcx, 8
         and     cl, 7
         shl     al, cl
 
@@ -159,48 +149,51 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
         // Check if we need to update the card table
         // Calc pCardByte
         shr     rcx, 0xB
+        add     rcx, [C_VAR(g_card_table)]
+
         // Check if this card table bit is already set
-        test    byte ptr [rcx + r10], al
+        test    byte ptr [rcx], al
         je      LOCAL_LABEL(SetCardTableBit_ByRefWriteBarrier)
         REPRET
 
     LOCAL_LABEL(SetCardTableBit_ByRefWriteBarrier):
-        lock or byte ptr [rcx + r10], al
-
+        lock or byte ptr [rcx], al
         jmp     LOCAL_LABEL(CheckCardBundle_ByRefWriteBarrier)
+LOCAL_LABEL(CheckCardTableByte_ByRefWriteBarrier):
 
-    LOCAL_LABEL(CheckCardTableByte_ByRefWriteBarrier):
         // move current rdi value into rcx and then increment the pointers
         mov     rcx, rdi
         add     rsi, 0x8
         add     rdi, 0x8
 
+        // Check if we need to update the card table
+        // Calc pCardByte
         shr     rcx, 0xB
-        cmp     byte ptr [rcx + r10], 0xFF
-        jne     LOCAL_LABEL(SetCardTableByte_ByRefWriteBarrier)
+        add     rcx, [C_VAR(g_card_table)]
+
+        // Check if this card is dirty
+        cmp     byte ptr [rcx], 0xFF
+        jne     LOCAL_LABEL(UpdateCardTable_ByRefWriteBarrier)
         REPRET
-    LOCAL_LABEL(SetCardTableByte_ByRefWriteBarrier):
-        mov     byte ptr [rcx + r10], 0xFF
+
+    LOCAL_LABEL(UpdateCardTable_ByRefWriteBarrier):
+        mov     byte ptr [rcx], 0xFF
 
     LOCAL_LABEL(CheckCardBundle_ByRefWriteBarrier):
 
 #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
-        // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already)
-        shr     rcx, 0x0A
-
-        PREPARE_EXTERNAL_VAR g_card_bundle_table, rax
-        add     rcx, [rax]
-
-        // Check if this bundle byte is dirty
+        // check if we need to update the card bundle table
+        // restore destination address from rdi - rdi has been incremented by 8 already
+        lea     rcx, [rdi-8]
+        shr     rcx, 0x15
+        add     rcx, [C_VAR(g_card_bundle_table)]
         cmp     byte ptr [rcx], 0xFF
-
-        jne     LOCAL_LABEL(UpdateCardBundle_ByRefWriteBarrier)
+        jne     LOCAL_LABEL(UpdateCardBundleTable_ByRefWriteBarrier)
         REPRET
 
-    LOCAL_LABEL(UpdateCardBundle_ByRefWriteBarrier):
+    LOCAL_LABEL(UpdateCardBundleTable_ByRefWriteBarrier):
         mov     byte ptr [rcx], 0xFF
 #endif
-
         ret
 
     .balign 16
diff --git a/src/coreclr/vm/amd64/theprestubamd64.S b/src/coreclr/vm/amd64/theprestubamd64.S
index dd02f70780e2..8d601c0ab9f2 100644
--- a/src/coreclr/vm/amd64/theprestubamd64.S
+++ b/src/coreclr/vm/amd64/theprestubamd64.S
@@ -6,7 +6,8 @@
 #include "asmconstants.h"
 
 NESTED_ENTRY ThePreStub, _TEXT, NoHandler
-        PROLOG_WITH_TRANSITION_BLOCK 0, 0, 0, 0, 0
+        PROLOG_WITH_TRANSITION_BLOCK 8, 0, 0, 0, 0
+        mov             [rsp], rax // Return buffer in Swift calling convention
 
         //
         // call PreStubWorker
@@ -14,9 +15,11 @@ NESTED_ENTRY ThePreStub, _TEXT, NoHandler
         lea             rdi, [rsp + __PWTB_TransitionBlock]     // pTransitionBlock*
         mov             rsi, METHODDESC_REGISTER
         call            C_FUNC(PreStubWorker)
+        mov             r10, rax
 
+        mov             rax, [rsp]
         EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
-        TAILJMP_RAX
+        jmp             r10
 
 NESTED_END ThePreStub, _TEXT
 
diff --git a/src/coreclr/vm/amd64/unixasmhelpers.S b/src/coreclr/vm/amd64/unixasmhelpers.S
index 4711ee9857f2..77fe1384dcd9 100644
--- a/src/coreclr/vm/amd64/unixasmhelpers.S
+++ b/src/coreclr/vm/amd64/unixasmhelpers.S
@@ -71,14 +71,16 @@ NESTED_ENTRY NDirectImportThunk, _TEXT, NoHandler
         //
         // Save integer parameter registers.
         // Make sure to preserve r11 as well as it is used to pass the stack argument size from JIT
+        // Make sure to preserve rax as well as it is used for the return buffer for Swift calls
         //
         PUSH_ARGUMENT_REGISTERS
         push_register r11
+        push_register rax
 
         //
-        // Allocate space for XMM parameter registers
+        // Allocate space for XMM parameter registers and alignment
         //
-        alloc_stack     0x80
+        alloc_stack     0x88
 
         SAVE_FLOAT_ARGUMENT_REGISTERS 0
 
@@ -89,34 +91,26 @@ NESTED_ENTRY NDirectImportThunk, _TEXT, NoHandler
         //
         mov             rdi, METHODDESC_REGISTER
         call            C_FUNC(NDirectImportWorker)
+        mov             r10, rax
 
         RESTORE_FLOAT_ARGUMENT_REGISTERS 0
 
         //
-        // epilogue, rax contains the native target address
+        // epilogue, r10 contains the native target address
         //
-        free_stack      0x80
+        free_stack      0x88
 
         //
-        // Restore integer parameter registers and r11
+        // Restore integer parameter registers, r11 and rax
         //
+        pop_register rax
         pop_register r11
         POP_ARGUMENT_REGISTERS
 
-    TAILJMP_RAX
+        jmp r10
+    
 NESTED_END NDirectImportThunk, _TEXT
 
-// EXTERN_C void moveOWord(LPVOID* src, LPVOID* target);
-// <NOTE>
-// MOVDQA is not an atomic operation.  You need to call this function in a crst.
-// </NOTE>
-LEAF_ENTRY moveOWord, _TEXT
-        movdqu          xmm0, xmmword ptr [rdi]
-        movdqu          xmmword ptr [rsi], xmm0
-
-        ret
-LEAF_END moveOWord, _TEXT
-
 //------------------------------------------------
 // JIT_RareDisableHelper
 //
diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp
index feafd1f8abad..07f24361dccc 100644
--- a/src/coreclr/vm/appdomain.cpp
+++ b/src/coreclr/vm/appdomain.cpp
@@ -313,7 +313,7 @@ OBJECTREF* PinnedHeapHandleTable::AllocateHandles(DWORD nRequested)
     // Retrieve the remaining number of handles in the bucket.
     DWORD numRemainingHandlesInBucket = (m_pHead != NULL) ? m_pHead->GetNumRemainingHandles() : 0;
     PTRARRAYREF pinnedHandleArrayObj = NULL;
-    DWORD nextBucketSize = min(m_NextBucketSize * 2, MAX_BUCKETSIZE);
+    DWORD nextBucketSize = min<DWORD>(m_NextBucketSize * 2, MAX_BUCKETSIZE);
 
     // create a new block if this request doesn't fit in the current block
     if (nRequested > numRemainingHandlesInBucket)
@@ -1352,7 +1352,7 @@ void SystemDomain::LoadBaseSystemClasses()
         // further loading of nonprimitive types may need casting support.
         // initialize cast cache here.
         CastCache::Initialize();
-        ECall::PopulateManagedCastHelpers();
+        ECall::PopulateManagedHelpers();
 
         // used by IsImplicitInterfaceOfSZArray
         CoreLibBinder::GetClass(CLASS__IENUMERABLEGENERIC);
@@ -1414,9 +1414,6 @@ void SystemDomain::LoadBaseSystemClasses()
         g_profControlBlock.fBaseSystemClassesLoaded = TRUE;
     #endif // PROFILING_SUPPORTED
 
-        // Perform any once-only SafeHandle initialization.
-        SafeHandle::Init();
-
     #if defined(_DEBUG)
         g_CoreLib.Check();
         g_CoreLib.CheckExtended();
@@ -2608,16 +2605,20 @@ void AppDomain::LoadDomainAssembly(DomainAssembly *pFile,
 
 #ifndef DACCESS_COMPILE
 
-FileLoadLevel AppDomain::GetThreadFileLoadLevel()
+thread_local LoadLevelLimiter* LoadLevelLimiter::t_currentLoadLevelLimiter = nullptr;
+
+namespace
 {
-    WRAPPER_NO_CONTRACT;
-    if (GetThread()->GetLoadLevelLimiter() == NULL)
-        return FILE_ACTIVE;
-    else
-        return (FileLoadLevel)(GetThread()->GetLoadLevelLimiter()->GetLoadLevel()-1);
+    FileLoadLevel GetCurrentFileLoadLevel()
+    {
+        WRAPPER_NO_CONTRACT;
+        if (LoadLevelLimiter::GetCurrent() == NULL)
+            return FILE_ACTIVE;
+        else
+            return (FileLoadLevel)(LoadLevelLimiter::GetCurrent()->GetLoadLevel()-1);
+    }
 }
 
-
 Assembly *AppDomain::LoadAssembly(AssemblySpec* pIdentity,
                                   PEAssembly * pPEAssembly,
                                   FileLoadLevel targetLevel)
@@ -2710,7 +2711,7 @@ DomainAssembly *AppDomain::LoadDomainAssemblyInternal(AssemblySpec* pIdentity,
         PRECONDITION(CheckPointer(pPEAssembly));
         PRECONDITION(::GetAppDomain()==this);
         POSTCONDITION(CheckPointer(RETVAL));
-        POSTCONDITION(RETVAL->GetLoadLevel() >= GetThreadFileLoadLevel()
+        POSTCONDITION(RETVAL->GetLoadLevel() >= GetCurrentFileLoadLevel()
                       || RETVAL->GetLoadLevel() >= targetLevel);
         POSTCONDITION(RETVAL->CheckNoError(targetLevel));
         INJECT_FAULT(COMPlusThrowOM(););
@@ -2817,7 +2818,7 @@ DomainAssembly *AppDomain::LoadDomainAssembly(FileLoadLock *pLock, FileLoadLevel
         STANDARD_VM_CHECK;
         PRECONDITION(CheckPointer(pLock));
         PRECONDITION(AppDomain::GetCurrentDomain() == this);
-        POSTCONDITION(RETVAL->GetLoadLevel() >= GetThreadFileLoadLevel()
+        POSTCONDITION(RETVAL->GetLoadLevel() >= GetCurrentFileLoadLevel()
                       || RETVAL->GetLoadLevel() >= targetLevel);
         POSTCONDITION(RETVAL->CheckNoError(targetLevel));
     }
@@ -4176,7 +4177,7 @@ void DomainLocalModule::EnsureDynamicClassIndex(DWORD dwID)
         return;
     }
 
-    SIZE_T aDynamicEntries = max(16, oldDynamicEntries);
+    SIZE_T aDynamicEntries = max<SIZE_T>(16, oldDynamicEntries);
     while (aDynamicEntries <= dwID)
     {
         aDynamicEntries *= 2;
diff --git a/src/coreclr/vm/appdomain.hpp b/src/coreclr/vm/appdomain.hpp
index 5924e4f4b9fb..1c400154c760 100644
--- a/src/coreclr/vm/appdomain.hpp
+++ b/src/coreclr/vm/appdomain.hpp
@@ -43,7 +43,6 @@ class StringLiteralMap;
 class FrozenObjectHeapManager;
 class MngStdInterfacesInfo;
 class DomainAssembly;
-class LoadLevelLimiter;
 class TypeEquivalenceHashTable;
 
 #ifdef FEATURE_COMINTEROP
@@ -815,23 +814,19 @@ typedef FileLoadLock::Holder FileLoadLockHolder;
     typedef ListLockBase<NativeCodeVersion> JitListLock;
     typedef ListLockEntryBase<NativeCodeVersion> JitListLockEntry;
 
-
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning (disable: 4324) //sometimes 64bit compilers complain about alignment
-#endif
-class LoadLevelLimiter
+class LoadLevelLimiter final
 {
-    FileLoadLevel                   m_currentLevel;
+    static thread_local LoadLevelLimiter* t_currentLoadLevelLimiter;
+    FileLoadLevel m_currentLevel;
     LoadLevelLimiter* m_previousLimit;
-    BOOL m_bActive;
+    bool m_bActive;
 
 public:
 
     LoadLevelLimiter()
       : m_currentLevel(FILE_ACTIVE),
-      m_previousLimit(NULL),
-      m_bActive(FALSE)
+      m_previousLimit(nullptr),
+      m_bActive(false)
     {
         LIMITED_METHOD_CONTRACT;
     }
@@ -839,11 +834,11 @@ class LoadLevelLimiter
     void Activate()
     {
         WRAPPER_NO_CONTRACT;
-        m_previousLimit= GetThread()->GetLoadLevelLimiter();
-        if(m_previousLimit)
-            m_currentLevel=m_previousLimit->GetLoadLevel();
-        GetThread()->SetLoadLevelLimiter(this);
-        m_bActive=TRUE;
+        m_previousLimit = t_currentLoadLevelLimiter;
+        if (m_previousLimit)
+            m_currentLevel = m_previousLimit->GetLoadLevel();
+        t_currentLoadLevelLimiter = this;
+        m_bActive = true;
     }
 
     void Deactivate()
@@ -851,8 +846,8 @@ class LoadLevelLimiter
         WRAPPER_NO_CONTRACT;
         if (m_bActive)
         {
-            GetThread()->SetLoadLevelLimiter(m_previousLimit);
-            m_bActive=FALSE;
+            t_currentLoadLevelLimiter = m_previousLimit;
+            m_bActive = false;
         }
     }
 
@@ -882,10 +877,13 @@ class LoadLevelLimiter
         LIMITED_METHOD_CONTRACT;
         m_currentLevel = level;
     }
+
+    static LoadLevelLimiter* GetCurrent()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return t_currentLoadLevelLimiter;
+    }
 };
-#ifdef _MSC_VER
-#pragma warning (pop) //4324
-#endif
 
 #define OVERRIDE_LOAD_LEVEL_LIMIT(newLimit)                    \
     LoadLevelLimiter __newLimit;                                                    \
@@ -1798,7 +1796,6 @@ class AppDomain : public BaseDomain
     CHECK CheckLoading(DomainAssembly *pFile, FileLoadLevel level);
 
     BOOL IsLoading(DomainAssembly *pFile, FileLoadLevel level);
-    static FileLoadLevel GetThreadFileLoadLevel();
 
     void LoadDomainAssembly(DomainAssembly *pFile,
                         FileLoadLevel targetLevel);
diff --git a/src/coreclr/vm/arm/asmconstants.h b/src/coreclr/vm/arm/asmconstants.h
index 16931168e3ce..5c92427008bb 100644
--- a/src/coreclr/vm/arm/asmconstants.h
+++ b/src/coreclr/vm/arm/asmconstants.h
@@ -141,11 +141,11 @@ ASMCONSTANTS_C_ASSERT(UnmanagedToManagedFrame__m_pvDatum == offsetof(UnmanagedTo
 
 #endif // FEATURE_COMINTEROP
 
-#define               Thread__m_fPreemptiveGCDisabled   0x08
+#define               Thread__m_fPreemptiveGCDisabled   0x04
 ASMCONSTANTS_C_ASSERT(Thread__m_fPreemptiveGCDisabled == offsetof(Thread, m_fPreemptiveGCDisabled));
 #define Thread_m_fPreemptiveGCDisabled Thread__m_fPreemptiveGCDisabled
 
-#define               Thread__m_pFrame                  0x0C
+#define               Thread__m_pFrame                  0x08
 ASMCONSTANTS_C_ASSERT(Thread__m_pFrame == offsetof(Thread, m_pFrame));
 #define Thread_m_pFrame Thread__m_pFrame
 
diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S
index 91e18dc81faf..27a44b62c119 100644
--- a/src/coreclr/vm/arm/asmhelpers.S
+++ b/src/coreclr/vm/arm/asmhelpers.S
@@ -127,7 +127,8 @@ LOCAL_LABEL(LReturnDone):
         EPILOG_STACK_RESTORE_OFFSET   r7, #8
         EPILOG_POP              "{r4,r5,r7,pc}"
 
-PATCH_LABEL CallDescrWorkerInternalReturnAddressOffset
+CallDescrWorkerInternalReturnAddressOffset:
+    .global CallDescrWorkerInternalReturnAddressOffset
     .word LOCAL_LABEL(CallDescrWorkerInternalReturnAddress) - C_FUNC(CallDescrWorkerInternal)
 
         NESTED_END CallDescrWorkerInternal,_TEXT
diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h
index f60822ccaa87..d800551a5170 100644
--- a/src/coreclr/vm/arm/cgencpu.h
+++ b/src/coreclr/vm/arm/cgencpu.h
@@ -21,6 +21,27 @@
 #define RESOLVE_STUB_THIRD_WORD 0xb460
 #define LOOKUP_STUB_FIRST_WORD 0xf8df
 
+#define ENUM_CALLEE_SAVED_REGISTERS() \
+    CALLEE_SAVED_REGISTER(R4) \
+    CALLEE_SAVED_REGISTER(R5) \
+    CALLEE_SAVED_REGISTER(R6) \
+    CALLEE_SAVED_REGISTER(R7) \
+    CALLEE_SAVED_REGISTER(R8) \
+    CALLEE_SAVED_REGISTER(R9) \
+    CALLEE_SAVED_REGISTER(R10) \
+    CALLEE_SAVED_REGISTER(R11) \
+    CALLEE_SAVED_REGISTER(Lr)
+
+#define ENUM_FP_CALLEE_SAVED_REGISTERS() \
+    CALLEE_SAVED_REGISTER(D[8]) \
+    CALLEE_SAVED_REGISTER(D[9]) \
+    CALLEE_SAVED_REGISTER(D[10]) \
+    CALLEE_SAVED_REGISTER(D[11]) \
+    CALLEE_SAVED_REGISTER(D[12]) \
+    CALLEE_SAVED_REGISTER(D[13]) \
+    CALLEE_SAVED_REGISTER(D[14]) \
+    CALLEE_SAVED_REGISTER(D[15])
+
 class MethodDesc;
 class FramedMethodFrame;
 class Module;
@@ -54,9 +75,6 @@ EXTERN_C void checkStack(void);
 
 #define HAS_NDIRECT_IMPORT_PRECODE              1
 
-#define USE_INDIRECT_CODEHEADER
-
-
 EXTERN_C void getFPReturn(int fpSize, INT64 *pRetVal);
 EXTERN_C void setFPReturn(int fpSize, INT64 retVal);
 
diff --git a/src/coreclr/vm/arm/crthelpers.S b/src/coreclr/vm/arm/crthelpers.S
deleted file mode 100644
index db0ed192c4d6..000000000000
--- a/src/coreclr/vm/arm/crthelpers.S
+++ /dev/null
@@ -1,51 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-// ***********************************************************************
-// File: crthelpers.S
-//
-// ***********************************************************************
-
-#include "unixasmmacros.inc"
-#include "asmconstants.h"
-
-.syntax unified
-.thumb
-
-// JIT_MemSet/JIT_MemCpy
-//
-// It is IMPORANT that the exception handling code is able to find these guys
-// on the stack, but to keep them from being tailcalled by VC++ we need to turn
-// off optimization and it ends up being a wasteful implementation.
-//
-// Hence these assembly helpers.
-//
-//EXTERN_C void __stdcall JIT_MemSet(void* _dest, int c, size_t count)
-LEAF_ENTRY JIT_MemSet, _TEXT
-
-        cmp r2, #0
-        it eq
-        bxeq lr
-
-        ldrb r3, [r0]
-
-        b C_PLTFUNC(memset)
-
-LEAF_END_MARKED JIT_MemSet, _TEXT
-
-
-//EXTERN_C void __stdcall JIT_MemCpy(void* _dest, const void *_src, size_t count)
-LEAF_ENTRY JIT_MemCpy, _TEXT
-//
-
-        cmp r2, #0
-        it eq
-        bxeq lr
-
-        ldrb r3, [r0]
-        ldrb r3, [r1]
-
-        b C_PLTFUNC(memcpy)
-
-LEAF_END_MARKED JIT_MemCpy, _TEXT
-
diff --git a/src/coreclr/vm/arm/gmscpu.h b/src/coreclr/vm/arm/gmscpu.h
index faa93a2279b3..42641889ad9e 100644
--- a/src/coreclr/vm/arm/gmscpu.h
+++ b/src/coreclr/vm/arm/gmscpu.h
@@ -81,8 +81,7 @@ struct LazyMachState : public MachState {
     static void unwindLazyState(LazyMachState* baseState,
                                 MachState* lazyState,
                                 DWORD threadId,
-                                int funCallDepth = 1,
-                                HostCallPreference hostCallPreference = AllowHostCalls);
+                                int funCallDepth = 1);
 
     friend class HelperMethodFrame;
     friend class CheckAsmOffsets;
diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp
index 771d24409670..5f8e3bf445e7 100644
--- a/src/coreclr/vm/arm/stubs.cpp
+++ b/src/coreclr/vm/arm/stubs.cpp
@@ -509,8 +509,7 @@ void FlushWriteBarrierInstructionCache()
 void LazyMachState::unwindLazyState(LazyMachState* baseState,
                                     MachState* unwoundstate,
                                     DWORD threadId,
-                                    int funCallDepth,
-                                    HostCallPreference hostCallPreference)
+                                    int funCallDepth)
 {
     T_CONTEXT                         ctx;
     T_KNONVOLATILE_CONTEXT_POINTERS   nonVolRegPtrs;
@@ -575,20 +574,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
         {
             // Determine  whether given IP resides in JITted code. (It returns nonzero in that case.)
             // Use it now to see if we've unwound to managed code yet.
-            BOOL fFailedReaderLock = FALSE;
-            BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc, hostCallPreference, &fFailedReaderLock);
-            if (fFailedReaderLock)
-            {
-                // We don't know if we would have been able to find a JIT
-                // manager, because we couldn't enter the reader lock without
-                // yielding (and our caller doesn't want us to yield).  So abort
-                // now.
-
-                // Invalidate the lazyState we're returning, so the caller knows
-                // we aborted before we could fully unwind
-                unwoundstate->_isValid = false;
-                return;
-            }
+            BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc);
 
             if (fIsManagedCode)
                 break;
@@ -629,7 +615,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
     unwoundstate->_isValid = true;
 }
 
-void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACTL
     {
@@ -640,6 +626,14 @@ void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     }
     CONTRACTL_END;
 
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+        _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress());
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 
@@ -1500,8 +1494,16 @@ void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegis
     pRD->pCurrentContextPointers->Lr = NULL;
 }
 
-void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+        _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress());
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 
@@ -1529,7 +1531,7 @@ void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    TransitionFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP));
 }
 
-void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     LIMITED_METHOD_DAC_CONTRACT;
 
@@ -1555,7 +1557,7 @@ void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 }
 
-void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
@@ -1567,7 +1569,6 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 #ifdef PROFILING_SUPPORTED
         PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this));
 #endif
-        HOST_NOCALLS;
         MODE_ANY;
         SUPPORTS_DAC;
     }
@@ -1581,6 +1582,13 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
         return;
     }
 
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+    }
+#endif // DACCESS_COMPILE
+
     // reset pContext; it's only valid for active (top-most) frame
     pRD->pContext = NULL;
 
@@ -1596,7 +1604,7 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 
     // Update the frame pointer in the current context.
     pRD->pCurrentContext->R11 = m_pCalleeSavedFP;
-    pRD->pCurrentContextPointers->R11 = &m_pCalleeSavedFP;
+    pRD->pCurrentContextPointers->R11 = (DWORD *)&m_pCalleeSavedFP;
 
     // This is necessary to unwind methods with alloca. This needs to stay
     // in sync with definition of REG_SAVED_LOCALLOC_SP in the JIT.
@@ -1613,7 +1621,7 @@ TADDR ResumableFrame::GetReturnAddressPtr(void)
     return dac_cast<TADDR>(m_Regs) + offsetof(T_CONTEXT, Pc);
 }
 
-void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
@@ -1649,7 +1657,7 @@ void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 }
 
-void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
      CONTRACTL {
          NOTHROW;
diff --git a/src/coreclr/vm/arm/thunktemplates.S b/src/coreclr/vm/arm/thunktemplates.S
index 0686bb2ed4b7..8744c8ebb632 100644
--- a/src/coreclr/vm/arm/thunktemplates.S
+++ b/src/coreclr/vm/arm/thunktemplates.S
@@ -11,7 +11,7 @@
 
 PAGE_SIZE = 4096
 
-#define DATA_SLOT(stub, field) stub##Code + PAGE_SIZE + stub##Data__##field
+#define DATA_SLOT(stub, field) . - (. - stub##Code) + PAGE_SIZE + stub##Data__##field
 
     LEAF_ENTRY StubPrecodeCode
         ldr r12, DATA_SLOT(StubPrecode, MethodDesc)
diff --git a/src/coreclr/vm/arm64/PInvokeStubs.asm b/src/coreclr/vm/arm64/PInvokeStubs.asm
index d7b432240d2d..37699e790b24 100644
--- a/src/coreclr/vm/arm64/PInvokeStubs.asm
+++ b/src/coreclr/vm/arm64/PInvokeStubs.asm
@@ -188,8 +188,6 @@ RarePath
 
         LEAF_END
 
-        INLINE_GETTHREAD_CONSTANT_POOL
-
 ; ------------------------------------------------------------------
 ; VarargPInvokeStub & VarargPInvokeGenILStub
 ;
diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h
index b52c164e1ab7..262fa6860df7 100644
--- a/src/coreclr/vm/arm64/asmconstants.h
+++ b/src/coreclr/vm/arm64/asmconstants.h
@@ -33,8 +33,8 @@
 #define DynamicHelperFrameFlags_ObjectArg   1
 #define DynamicHelperFrameFlags_ObjectArg2  2
 
-#define               Thread__m_fPreemptiveGCDisabled   0x0C
-#define               Thread__m_pFrame                  0x10
+#define               Thread__m_fPreemptiveGCDisabled   0x04
+#define               Thread__m_pFrame                  0x08
 
 ASMCONSTANTS_C_ASSERT(Thread__m_fPreemptiveGCDisabled == offsetof(Thread, m_fPreemptiveGCDisabled));
 ASMCONSTANTS_C_ASSERT(Thread__m_pFrame == offsetof(Thread, m_pFrame));
@@ -96,7 +96,11 @@ ASMCONSTANTS_C_ASSERT(MachState__isValid == offsetof(MachState, _isValid))
 #define LazyMachState_captureX19_X29 MachState__captureX19_X29
 ASMCONSTANTS_C_ASSERT(LazyMachState_captureX19_X29 == offsetof(LazyMachState, captureX19_X29))
 
+#ifdef __APPLE__
+#define LazyMachState_captureSp     (MachState__isValid+8+88) // padding for alignment
+#else // __APPLE__
 #define LazyMachState_captureSp     (MachState__isValid+8) // padding for alignment
+#endif // __APPLE
 ASMCONSTANTS_C_ASSERT(LazyMachState_captureSp == offsetof(LazyMachState, captureSp))
 
 #define LazyMachState_captureIp     (LazyMachState_captureSp+8)
diff --git a/src/coreclr/vm/arm64/asmmacros.h b/src/coreclr/vm/arm64/asmmacros.h
index 4a0cb5dde49a..8dd56dc59045 100644
--- a/src/coreclr/vm/arm64/asmmacros.h
+++ b/src/coreclr/vm/arm64/asmmacros.h
@@ -313,18 +313,17 @@ __RedirectionFuncName SETS "|?RedirectedHandledJITCaseFor":CC:"$reason":CC:"@Thr
 
         MEND
 
-;-----------------------------------------------------------------------------
-; Macro to get a pointer to the Thread* object for the currently executing thread
-;
-__tls_array     equ 0x58    ;; offsetof(TEB, ThreadLocalStoragePointer)
+;; -----------------------------------------------------------------------------
+;;
+;; Macro to get a pointer to a threadlocal symbol for the currently executing thread
+;;
 
-    EXTERN _tls_index
-
-    GBLS __SECTIONREL_gCurrentThreadInfo
-__SECTIONREL_gCurrentThreadInfo SETS "SECTIONREL_gCurrentThreadInfo"
+__tls_array     equ 0x58    ;; offsetof(TEB, ThreadLocalStoragePointer)
 
     MACRO
-        INLINE_GETTHREAD $destReg, $trashReg
+        INLINE_GET_TLS_VAR $destReg, $trashReg, $variable
+
+        EXTERN _tls_index
 
         ;; The following macro variables are just some assembler magic to get the name of the 32-bit version
         ;; of $trashReg. It does it by string manipulation. Replaces something like x3 with w3.
@@ -332,33 +331,27 @@ __SECTIONREL_gCurrentThreadInfo SETS "SECTIONREL_gCurrentThreadInfo"
 TrashRegister32Bit SETS "$trashReg"
 TrashRegister32Bit SETS "w":CC:("$TrashRegister32Bit":RIGHT:((:LEN:TrashRegister32Bit) - 1))
 
-        ldr         $trashReg, =_tls_index
-        ldr         $TrashRegister32Bit, [$trashReg]
+        adrp        $destReg, _tls_index
+        ldr         $TrashRegister32Bit, [$destReg, _tls_index]
         ldr         $destReg, [xpr, #__tls_array]
-        ldr         $destReg, [$destReg, $trashReg lsl #3]
-        ldr         $trashReg, =$__SECTIONREL_gCurrentThreadInfo
-        ldr         $trashReg, [$trashReg]
-        ldr         $destReg, [$destReg, $trashReg]        ; return gCurrentThreadInfo.m_pThread
+        ldr         $destReg, [$destReg, $TrashRegister32Bit uxtw #3]
+        add         $destReg, $destReg, #0, lsl #0xC
+        RELOC       0xA, $variable                          ;; IMAGE_REL_ARM64_SECREL_HIGH12A
+        add         $destReg, $destReg, #0, lsl #0
+        RELOC       0x9, $variable                          ;; IMAGE_REL_ARM64_SECREL_LOW12A
     MEND
 
-;-----------------------------------------------------------------------------
-; INLINE_GETTHREAD_CONSTANT_POOL macro has to be used after the last function in the .asm file that used
-; INLINE_GETTHREAD. Optionally, it can be also used after any function that used INLINE_GETTHREAD
-; to improve density, or to reduce distance between the constant pool and its use.
-;
+;; -----------------------------------------------------------------------------
+;;
+;; Macro to get a pointer to the Thread* object for the currently executing thread
+;;
     SETALIAS gCurrentThreadInfo, ?gCurrentThreadInfo@@3UThreadLocalInfo@@A
 
     MACRO
-        INLINE_GETTHREAD_CONSTANT_POOL
+        INLINE_GETTHREAD $destReg, $trashReg
 
         EXTERN $gCurrentThreadInfo
 
-    ;; Section relocs are 32 bits. Using an extra DCD initialized to zero for 8-byte alignment.
-$__SECTIONREL_gCurrentThreadInfo
-        DCD $gCurrentThreadInfo
-        RELOC 8, $gCurrentThreadInfo    ;; SECREL
-        DCD 0
-
-__SECTIONREL_gCurrentThreadInfo SETS "$__SECTIONREL_gCurrentThreadInfo":CC:"_"
-
+        INLINE_GET_TLS_VAR $destReg, $trashReg, $gCurrentThreadInfo
+        ldr $destReg, [$destReg]                            ;; return gCurrentThreadInfo.m_pThread
     MEND
diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h
index 3ec3d6ea3b1a..700221aa8696 100644
--- a/src/coreclr/vm/arm64/cgencpu.h
+++ b/src/coreclr/vm/arm64/cgencpu.h
@@ -17,6 +17,28 @@
 #define USE_REDIRECT_FOR_GCSTRESS
 #endif // TARGET_UNIX
 
+#define ENUM_CALLEE_SAVED_REGISTERS() \
+    CALLEE_SAVED_REGISTER(Fp) \
+    CALLEE_SAVED_REGISTER(Lr) \
+    CALLEE_SAVED_REGISTER(X19) \
+    CALLEE_SAVED_REGISTER(X20) \
+    CALLEE_SAVED_REGISTER(X21) \
+    CALLEE_SAVED_REGISTER(X22) \
+    CALLEE_SAVED_REGISTER(X23) \
+    CALLEE_SAVED_REGISTER(X24) \
+    CALLEE_SAVED_REGISTER(X25) \
+    CALLEE_SAVED_REGISTER(X26)
+
+#define ENUM_FP_CALLEE_SAVED_REGISTERS() \
+    CALLEE_SAVED_REGISTER(V[8].Low) \
+    CALLEE_SAVED_REGISTER(V[9].Low) \
+    CALLEE_SAVED_REGISTER(V[10].Low) \
+    CALLEE_SAVED_REGISTER(V[11].Low) \
+    CALLEE_SAVED_REGISTER(V[12].Low) \
+    CALLEE_SAVED_REGISTER(V[13].Low) \
+    CALLEE_SAVED_REGISTER(V[14].Low) \
+    CALLEE_SAVED_REGISTER(V[15].Low)
+
 EXTERN_C void getFPReturn(int fpSize, INT64 *pRetVal);
 EXTERN_C void setFPReturn(int fpSize, INT64 retVal);
 
@@ -38,8 +60,6 @@ extern PCODE GetPreStubEntryPoint();
 
 #define HAS_NDIRECT_IMPORT_PRECODE              1
 
-#define USE_INDIRECT_CODEHEADER
-
 #define HAS_FIXUP_PRECODE                       1
 
 // ThisPtrRetBufPrecode one is necessary for closed delegates over static methods with return buffer
diff --git a/src/coreclr/vm/arm64/crthelpers.S b/src/coreclr/vm/arm64/crthelpers.S
deleted file mode 100644
index e123fc82808d..000000000000
--- a/src/coreclr/vm/arm64/crthelpers.S
+++ /dev/null
@@ -1,33 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#include "unixasmmacros.inc"
-
-// JIT_MemSet/JIT_MemCpy
-//
-// It is IMPORTANT that the exception handling code is able to find these guys
-// on the stack, but on non-windows platforms we can just defer to the platform
-// implementation.
-//
-LEAF_ENTRY JIT_MemSet, _TEXT
-    cbz x2, LOCAL_LABEL(JIT_MemSet_ret)
-
-    ldrb wzr, [x0]
-
-    b C_PLTFUNC(memset)
-
-LOCAL_LABEL(JIT_MemSet_ret):
-    ret         lr
-LEAF_END_MARKED JIT_MemSet, _TEXT
-
-LEAF_ENTRY JIT_MemCpy, _TEXT
-    cbz x2, LOCAL_LABEL(JIT_MemCpy_ret)
-
-    ldrb wzr, [x0]
-    ldrb wzr, [x1]
-
-    b C_PLTFUNC(memcpy)
-
-LOCAL_LABEL(JIT_MemCpy_ret):
-    ret         lr
-LEAF_END_MARKED JIT_MemCpy, _TEXT
diff --git a/src/coreclr/vm/arm64/crthelpers.asm b/src/coreclr/vm/arm64/crthelpers.asm
deleted file mode 100644
index d4d13351365c..000000000000
--- a/src/coreclr/vm/arm64/crthelpers.asm
+++ /dev/null
@@ -1,81 +0,0 @@
-; Licensed to the .NET Foundation under one or more agreements.
-; The .NET Foundation licenses this file to you under the MIT license.
-
-#include "ksarm64.h"
-#include "asmconstants.h"
-#include "asmmacros.h"
-
-    IMPORT memset
-    IMPORT memmove
-
-; JIT_MemSet/JIT_MemCpy
-;
-; It is IMPORTANT that the exception handling code is able to find these guys
-; on the stack, but on windows platforms we can just defer to the platform
-; implementation.
-;
-
-; void JIT_MemSet(void* dest, int c, size_t count)
-;
-; Purpose:
-;    Sets the first "count" bytes of the block of memory pointed byte
-;    "dest" to the specified value (interpreted as an unsigned char).
-;
-; Entry:
-;    RCX: void* dest    - Pointer to the block of memory to fill.
-;    RDX: int c         - Value to be set.
-;    R8:  size_t count  - Number of bytes to be set to the value.
-;
-; Exit:
-;
-; Uses:
-;
-; Exceptions:
-;
-
-    TEXTAREA
-    
-    LEAF_ENTRY JIT_MemSet
-        cbz x2, JIT_MemSet_ret  ; check if count is zero, no bytes to set
-
-        ldrb wzr, [x0]          ; check dest for null
-
-        b     memset            ; forward to the CRT implementation
-
-JIT_MemSet_ret
-        ret         lr
-    
-    LEAF_END_MARKED JIT_MemSet
-
-; void JIT_MemCpy(void* dest, const void* src, size_t count)
-;
-; Purpose:
-;    Copies the values of "count" bytes from the location pointed to
-;    by "src" to the memory block pointed by "dest".
-;
-; Entry:
-;    RCX: void* dest             - Pointer to the destination array where content is to be copied.
-;    RDX: const void* src        - Pointer to the source of the data to be copied.
-;    R8:  size_t count           - Number of bytes to copy.
-;
-; Exit:
-;
-; Uses:
-;
-; Exceptions:
-;
-    LEAF_ENTRY JIT_MemCpy
-        cbz x2, JIT_MemCpy_ret  ; check if count is zero, no bytes to set
-
-        ldrb wzr, [x0]          ; check dest for null
-        ldrb wzr, [x1]          ; check src for null
-
-        b     memmove           ; forward to the CRT implementation
-
-JIT_MemCpy_ret
-    ret         lr
-
-    LEAF_END_MARKED JIT_MemCpy
-
-; Must be at very end of file
-    END
diff --git a/src/coreclr/vm/arm64/gmscpu.h b/src/coreclr/vm/arm64/gmscpu.h
index 887a41b4f07c..f33230702afc 100644
--- a/src/coreclr/vm/arm64/gmscpu.h
+++ b/src/coreclr/vm/arm64/gmscpu.h
@@ -25,6 +25,11 @@ struct MachState {
     TADDR          _pc; // program counter after the function returns
     TADDR          _sp; // stack pointer after the function returns
     BOOL           _isValid;
+#ifdef __APPLE__
+    // libunwind on macOS doesn't support context pointers and we cannot modify the captureX19_X29,
+    // so we store the unwound values in a separate array.
+    ULONG64        unwoundX19_X29[NUM_NONVOLATILE_CONTEXT_POINTERS]; // preserved registers
+#endif // __APPLE__
 
     BOOL   isValid()    { LIMITED_METHOD_DAC_CONTRACT; return _isValid; }
     TADDR  GetRetAddr() { LIMITED_METHOD_DAC_CONTRACT; return _pc; }
@@ -39,8 +44,7 @@ struct LazyMachState : public MachState{
     static void unwindLazyState(LazyMachState* baseState,
                                 MachState* lazyState,
                                 DWORD threadId,
-                                int funCallDepth = 1,
-                                HostCallPreference hostCallPreference = AllowHostCalls);
+                                int funCallDepth = 1);
 };
 
 inline void LazyMachState::setLazyStateFromUnwind(MachState* copy)
@@ -55,6 +59,10 @@ inline void LazyMachState::setLazyStateFromUnwind(MachState* copy)
     _sp = copy->_sp;
     _pc = copy->_pc;
 
+#ifdef __APPLE__
+    memcpy(unwoundX19_X29, copy->unwoundX19_X29, sizeof(unwoundX19_X29));
+#endif // __APPLE__
+
     // Capture* has already been set, so there is no need to touch it
 
     // loop over the nonvolatile context pointers and make
@@ -80,7 +88,6 @@ inline void LazyMachState::setLazyStateFromUnwind(MachState* copy)
         }
 
         *pDst++ = valueSrc;
-        captureX19_X29[i] = copy->captureX19_X29[i];
     }
 
 
diff --git a/src/coreclr/vm/arm64/patchedcode.S b/src/coreclr/vm/arm64/patchedcode.S
index 2c1199be69a7..ae8d07ab1f94 100644
--- a/src/coreclr/vm/arm64/patchedcode.S
+++ b/src/coreclr/vm/arm64/patchedcode.S
@@ -42,6 +42,9 @@ LEAF_END JIT_PatchedCodeStart, _TEXT
 //   x15  : trashed
 //   x17  : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
 //
+//   NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+//         if you add more trashed registers.
+//
 WRITE_BARRIER_ENTRY JIT_ByRefWriteBarrier
 
     ldr  x15, [x13], 8
diff --git a/src/coreclr/vm/arm64/patchedcode.asm b/src/coreclr/vm/arm64/patchedcode.asm
index bd4f57cc6810..4bb8aa196818 100644
--- a/src/coreclr/vm/arm64/patchedcode.asm
+++ b/src/coreclr/vm/arm64/patchedcode.asm
@@ -75,6 +75,9 @@ wbs_GCShadowEnd
 ;   x14  : incremented by 8
 ;   x15  : trashed
 ;   x17  : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+;
+;   NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
+;         if you add more trashed registers.
 ;
     WRITE_BARRIER_ENTRY JIT_ByRefWriteBarrier
 
diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp
index cc5dbf7d66b8..623938dfba61 100644
--- a/src/coreclr/vm/arm64/stubs.cpp
+++ b/src/coreclr/vm/arm64/stubs.cpp
@@ -277,8 +277,7 @@ void ClearRegDisplayArgumentAndScratchRegisters(REGDISPLAY * pRD)
 void LazyMachState::unwindLazyState(LazyMachState* baseState,
                                     MachState* unwoundstate,
                                     DWORD threadId,
-                                    int funCallDepth,
-                                    HostCallPreference hostCallPreference)
+                                    int funCallDepth)
 {
     T_CONTEXT context;
     T_KNONVOLATILE_CONTEXT_POINTERS nonVolContextPtrs;
@@ -357,20 +356,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
         {
             // Determine  whether given IP resides in JITted code. (It returns nonzero in that case.)
             // Use it now to see if we've unwound to managed code yet.
-            BOOL fFailedReaderLock = FALSE;
-            BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc, hostCallPreference, &fFailedReaderLock);
-            if (fFailedReaderLock)
-            {
-                // We don't know if we would have been able to find a JIT
-                // manager, because we couldn't enter the reader lock without
-                // yielding (and our caller doesn't want us to yield).  So abort
-                // now.
-
-                // Invalidate the lazyState we're returning, so the caller knows
-                // we aborted before we could fully unwind
-                unwoundstate->_isValid = false;
-                return;
-            }
+            BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc);
 
             if (fIsManagedCode)
                 break;
@@ -378,19 +364,19 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
         }
     } while (true);
 
-#ifdef TARGET_UNIX
-    unwoundstate->captureX19_X29[0] = context.X19;
-    unwoundstate->captureX19_X29[1] = context.X20;
-    unwoundstate->captureX19_X29[2] = context.X21;
-    unwoundstate->captureX19_X29[3] = context.X22;
-    unwoundstate->captureX19_X29[4] = context.X23;
-    unwoundstate->captureX19_X29[5] = context.X24;
-    unwoundstate->captureX19_X29[6] = context.X25;
-    unwoundstate->captureX19_X29[7] = context.X26;
-    unwoundstate->captureX19_X29[8] = context.X27;
-    unwoundstate->captureX19_X29[9] = context.X28;
-    unwoundstate->captureX19_X29[10] = context.Fp;
-#endif
+#ifdef __APPLE__
+    unwoundstate->unwoundX19_X29[0] = context.X19;
+    unwoundstate->unwoundX19_X29[1] = context.X20;
+    unwoundstate->unwoundX19_X29[2] = context.X21;
+    unwoundstate->unwoundX19_X29[3] = context.X22;
+    unwoundstate->unwoundX19_X29[4] = context.X23;
+    unwoundstate->unwoundX19_X29[5] = context.X24;
+    unwoundstate->unwoundX19_X29[6] = context.X25;
+    unwoundstate->unwoundX19_X29[7] = context.X26;
+    unwoundstate->unwoundX19_X29[8] = context.X27;
+    unwoundstate->unwoundX19_X29[9] = context.X28;
+    unwoundstate->unwoundX19_X29[10] = context.Fp;
+#endif // __APPLE__
 
 #ifdef DACCESS_COMPILE
     // For DAC builds, we update the registers directly since we dont have context pointers
@@ -426,7 +412,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
     unwoundstate->_isValid = TRUE;
 }
 
-void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACTL
     {
@@ -437,6 +423,14 @@ void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     }
     CONTRACTL_END;
 
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+        _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress());
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 
@@ -497,20 +491,20 @@ void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     pRD->pCurrentContext->Pc = pRD->ControlPC;
     pRD->pCurrentContext->Sp = pRD->SP;
 
-#ifdef TARGET_UNIX
-    pRD->pCurrentContext->X19 = m_MachState.ptrX19_X29[0] ? *m_MachState.ptrX19_X29[0] : m_MachState.captureX19_X29[0];
-    pRD->pCurrentContext->X20 = m_MachState.ptrX19_X29[1] ? *m_MachState.ptrX19_X29[1] : m_MachState.captureX19_X29[1];
-    pRD->pCurrentContext->X21 = m_MachState.ptrX19_X29[2] ? *m_MachState.ptrX19_X29[2] : m_MachState.captureX19_X29[2];
-    pRD->pCurrentContext->X22 = m_MachState.ptrX19_X29[3] ? *m_MachState.ptrX19_X29[3] : m_MachState.captureX19_X29[3];
-    pRD->pCurrentContext->X23 = m_MachState.ptrX19_X29[4] ? *m_MachState.ptrX19_X29[4] : m_MachState.captureX19_X29[4];
-    pRD->pCurrentContext->X24 = m_MachState.ptrX19_X29[5] ? *m_MachState.ptrX19_X29[5] : m_MachState.captureX19_X29[5];
-    pRD->pCurrentContext->X25 = m_MachState.ptrX19_X29[6] ? *m_MachState.ptrX19_X29[6] : m_MachState.captureX19_X29[6];
-    pRD->pCurrentContext->X26 = m_MachState.ptrX19_X29[7] ? *m_MachState.ptrX19_X29[7] : m_MachState.captureX19_X29[7];
-    pRD->pCurrentContext->X27 = m_MachState.ptrX19_X29[8] ? *m_MachState.ptrX19_X29[8] : m_MachState.captureX19_X29[8];
-    pRD->pCurrentContext->X28 = m_MachState.ptrX19_X29[9] ? *m_MachState.ptrX19_X29[9] : m_MachState.captureX19_X29[9];
-    pRD->pCurrentContext->Fp = m_MachState.ptrX19_X29[10] ? *m_MachState.ptrX19_X29[10] : m_MachState.captureX19_X29[10];
+#ifdef __APPLE__
+    pRD->pCurrentContext->X19 = (DWORD64)(m_MachState.unwoundX19_X29[0]);
+    pRD->pCurrentContext->X20 = (DWORD64)(m_MachState.unwoundX19_X29[1]);
+    pRD->pCurrentContext->X21 = (DWORD64)(m_MachState.unwoundX19_X29[2]);
+    pRD->pCurrentContext->X22 = (DWORD64)(m_MachState.unwoundX19_X29[3]);
+    pRD->pCurrentContext->X23 = (DWORD64)(m_MachState.unwoundX19_X29[4]);
+    pRD->pCurrentContext->X24 = (DWORD64)(m_MachState.unwoundX19_X29[5]);
+    pRD->pCurrentContext->X25 = (DWORD64)(m_MachState.unwoundX19_X29[6]);
+    pRD->pCurrentContext->X26 = (DWORD64)(m_MachState.unwoundX19_X29[7]);
+    pRD->pCurrentContext->X27 = (DWORD64)(m_MachState.unwoundX19_X29[8]);
+    pRD->pCurrentContext->X28 = (DWORD64)(m_MachState.unwoundX19_X29[9]);
+    pRD->pCurrentContext->Fp = (DWORD64)(m_MachState.unwoundX19_X29[10]);
     pRD->pCurrentContext->Lr = NULL; // Unwind again to get Caller's PC
-#else // TARGET_UNIX
+#else // __APPLE__
     pRD->pCurrentContext->X19 = *m_MachState.ptrX19_X29[0];
     pRD->pCurrentContext->X20 = *m_MachState.ptrX19_X29[1];
     pRD->pCurrentContext->X21 = *m_MachState.ptrX19_X29[2];
@@ -523,7 +517,7 @@ void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     pRD->pCurrentContext->X28 = *m_MachState.ptrX19_X29[9];
     pRD->pCurrentContext->Fp  = *m_MachState.ptrX19_X29[10];
     pRD->pCurrentContext->Lr = NULL; // Unwind again to get Caller's PC
-#endif
+#endif // __APPLE__
 
 #if !defined(DACCESS_COMPILE)
     pRD->pCurrentContextPointers->X19 = m_MachState.ptrX19_X29[0];
@@ -601,8 +595,16 @@ void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegis
 }
 
 
-void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+        _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress());
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 
@@ -626,7 +628,7 @@ void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 
 
 
-void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     LIMITED_METHOD_DAC_CONTRACT;
 
@@ -659,7 +661,7 @@ void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    FaultingExceptionFrame::UpdateRegDisplay(pc:%p, sp:%p)\n", pRD->ControlPC, pRD->SP));
 }
 
-void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
@@ -668,7 +670,6 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 #ifdef PROFILING_SUPPORTED
         PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this));
 #endif
-        HOST_NOCALLS;
         MODE_ANY;
         SUPPORTS_DAC;
     }
@@ -680,6 +681,13 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
         return;
     }
 
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;
 
@@ -708,7 +716,7 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 
 
     // Update the frame pointer in the current context.
-    pRD->pCurrentContextPointers->Fp = &m_pCalleeSavedFP;
+    pRD->pCurrentContextPointers->Fp = (DWORD64 *)&m_pCalleeSavedFP;
 
     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    InlinedCallFrame::UpdateRegDisplay(pc:%p, sp:%p)\n", pRD->ControlPC, pRD->SP));
 
@@ -722,7 +730,7 @@ TADDR ResumableFrame::GetReturnAddressPtr(void)
     return dac_cast<TADDR>(m_Regs) + offsetof(T_CONTEXT, Pc);
 }
 
-void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
@@ -762,7 +770,7 @@ void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     RETURN;
 }
 
-void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     LIMITED_METHOD_CONTRACT;
 
diff --git a/src/coreclr/vm/callconvbuilder.cpp b/src/coreclr/vm/callconvbuilder.cpp
index 20f95f122241..7303cd960fa1 100644
--- a/src/coreclr/vm/callconvbuilder.cpp
+++ b/src/coreclr/vm/callconvbuilder.cpp
@@ -298,15 +298,12 @@ namespace
     {
         STANDARD_VM_CONTRACT;
 
-        TypeHandle type;
-        MethodDesc* pMD;
-        FieldDesc* pFD;
+        ResolvedToken resolved{};
+        pResolver->ResolveToken(token, &resolved);
 
-        pResolver->ResolveToken(token, &type, &pMD, &pFD);
+        _ASSERTE(!resolved.TypeHandle.IsNull());
 
-        _ASSERTE(!type.IsNull());
-
-        *nameOut = type.GetMethodTable()->GetFullyQualifiedNameInfo(namespaceOut);
+        *nameOut = resolved.TypeHandle.GetMethodTable()->GetFullyQualifiedNameInfo(namespaceOut);
 
         return S_OK;
     }
@@ -456,7 +453,7 @@ HRESULT CallConv::TryGetCallingConventionFromUnmanagedCallConv(
 
     InlineFactory<SArray<CaValue>, 4> caValueArrayFactory;
     DomainAssembly* domainAssembly = pMD->GetLoaderModule()->GetDomainAssembly();
-    IfFailThrow(Attribute::ParseAttributeArgumentValues(
+    IfFailThrow(Attribute::ParseArgumentValues(
         pData,
         cData,
         &caValueArrayFactory,
@@ -531,7 +528,7 @@ bool CallConv::TryGetCallingConventionFromUnmanagedCallersOnly(_In_ MethodDesc*
 
     InlineFactory<SArray<CaValue>, 4> caValueArrayFactory;
     DomainAssembly* domainAssembly = pMD->GetLoaderModule()->GetDomainAssembly();
-    IfFailThrow(Attribute::ParseAttributeArgumentValues(
+    IfFailThrow(Attribute::ParseArgumentValues(
         pData,
         cData,
         &caValueArrayFactory,
diff --git a/src/coreclr/vm/callcounting.cpp b/src/coreclr/vm/callcounting.cpp
index c464949f7aee..a6577fd42de3 100644
--- a/src/coreclr/vm/callcounting.cpp
+++ b/src/coreclr/vm/callcounting.cpp
@@ -664,7 +664,7 @@ bool CallCountingManager::SetCodeEntryPoint(
             // direct calls in codegen and they need to be promoted earlier than their callers.
             if (methodDesc->GetMethodTable() == g_pCastHelpers)
             {
-                callCountThreshold = max(1, (CallCount)(callCountThreshold / 2));
+                callCountThreshold = max<CallCount>(1, (CallCount)(callCountThreshold / 2));
             }
 
             NewHolder<CallCountingInfo> callCountingInfoHolder = new CallCountingInfo(activeCodeVersion, callCountThreshold);
diff --git a/src/coreclr/vm/callhelpers.cpp b/src/coreclr/vm/callhelpers.cpp
index 055fbbd50958..bc426b9c40b1 100644
--- a/src/coreclr/vm/callhelpers.cpp
+++ b/src/coreclr/vm/callhelpers.cpp
@@ -545,9 +545,7 @@ void MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments, ARG_SLOT *
     if (transitionToPreemptive)
     {
         GCPreemp transitionIfILStub(transitionToPreemptive);
-        DWORD* pLastError = &GetThread()->m_dwLastErrorInterp;
         CallDescrWorkerInternal(&callDescrData);
-        *pLastError = GetLastError();
     }
     else
 #endif // FEATURE_INTERPRETER
diff --git a/src/coreclr/vm/castcache.cpp b/src/coreclr/vm/castcache.cpp
index 1e59f7862d72..27105f3d25ef 100644
--- a/src/coreclr/vm/castcache.cpp
+++ b/src/coreclr/vm/castcache.cpp
@@ -12,6 +12,7 @@
 BASEARRAYREF* CastCache::s_pTableRef = NULL;
 OBJECTHANDLE CastCache::s_sentinelTable = NULL;
 DWORD CastCache::s_lastFlushSize     = INITIAL_CACHE_SIZE;
+const DWORD CastCache::INITIAL_CACHE_SIZE;
 
 BASEARRAYREF CastCache::CreateCastCache(DWORD size)
 {
diff --git a/src/coreclr/vm/ceeload.cpp b/src/coreclr/vm/ceeload.cpp
index 0796e59a15c2..0696635e9027 100644
--- a/src/coreclr/vm/ceeload.cpp
+++ b/src/coreclr/vm/ceeload.cpp
@@ -747,11 +747,6 @@ void Module::Destruct()
         delete m_debuggerSpecificData.m_pDynamicILBlobTable;
     }
 
-    if (m_debuggerSpecificData.m_pTemporaryILBlobTable)
-    {
-        delete m_debuggerSpecificData.m_pTemporaryILBlobTable;
-    }
-
     if (m_debuggerSpecificData.m_pILOffsetMappingTable)
     {
         for (ILOffsetMappingTable::Iterator pCurElem = m_debuggerSpecificData.m_pILOffsetMappingTable->Begin(),
@@ -951,26 +946,26 @@ void Module::BuildStaticsOffsets(AllocMemTracker *pamTracker)
                     case ELEMENT_TYPE_I2:
                     case ELEMENT_TYPE_U2:
                     case ELEMENT_TYPE_CHAR:
-                        dwAlignment[kk] =  max(2, dwAlignment[kk]);
+                        dwAlignment[kk] =  max<DWORD>(2, dwAlignment[kk]);
                         dwClassNonGCBytes[kk] += 2;
                         break;
                     case ELEMENT_TYPE_I4:
                     case ELEMENT_TYPE_U4:
                     case ELEMENT_TYPE_R4:
-                        dwAlignment[kk] =  max(4, dwAlignment[kk]);
+                        dwAlignment[kk] =  max<DWORD>(4, dwAlignment[kk]);
                         dwClassNonGCBytes[kk] += 4;
                         break;
                     case ELEMENT_TYPE_FNPTR:
                     case ELEMENT_TYPE_PTR:
                     case ELEMENT_TYPE_I:
                     case ELEMENT_TYPE_U:
-                        dwAlignment[kk] =  max((1 << LOG2_PTRSIZE), dwAlignment[kk]);
+                        dwAlignment[kk] =  max<DWORD>((1 << LOG2_PTRSIZE), dwAlignment[kk]);
                         dwClassNonGCBytes[kk] += (1 << LOG2_PTRSIZE);
                         break;
                     case ELEMENT_TYPE_I8:
                     case ELEMENT_TYPE_U8:
                     case ELEMENT_TYPE_R8:
-                        dwAlignment[kk] =  max(8, dwAlignment[kk]);
+                        dwAlignment[kk] =  max<DWORD>(8, dwAlignment[kk]);
                         dwClassNonGCBytes[kk] += 8;
                         break;
                     case ELEMENT_TYPE_VAR:
@@ -994,7 +989,7 @@ void Module::BuildStaticsOffsets(AllocMemTracker *pamTracker)
                         {
                             // We'll have to be pessimistic here
                             dwClassNonGCBytes[kk] += MAX_PRIMITIVE_FIELD_SIZE;
-                            dwAlignment[kk] = max(MAX_PRIMITIVE_FIELD_SIZE, dwAlignment[kk]);
+                            dwAlignment[kk] = max<DWORD>(MAX_PRIMITIVE_FIELD_SIZE, dwAlignment[kk]);
 
                             dwClassGCHandles[kk]  += 1;
                             break;
@@ -1233,13 +1228,10 @@ void Module::InitializeDynamicILCrst()
 //     Input:
 //         token        method token
 //         blobAddress  address of the start of the IL blob address, including the header
-//         fTemporaryOverride
-//                      is this a permanent override that should go in the
-//                      DynamicILBlobTable, or a temporary one?
 //     Output: not explicit, but if the pair was not already in the table it will be added.
 //             Does not add duplicate tokens to the table.
 
-void Module::SetDynamicIL(mdToken token, TADDR blobAddress, BOOL fTemporaryOverride)
+void Module::SetDynamicIL(mdToken token, TADDR blobAddress)
 {
     DynamicILBlobEntry entry = {mdToken(token), TADDR(blobAddress)};
 
@@ -1252,16 +1244,12 @@ void Module::SetDynamicIL(mdToken token, TADDR blobAddress, BOOL fTemporaryOverr
 
     CrstHolder ch(m_debuggerSpecificData.m_pDynamicILCrst);
 
-    // Figure out which table to fill in
-    PTR_DynamicILBlobTable &table(fTemporaryOverride ? m_debuggerSpecificData.m_pTemporaryILBlobTable
-                                                     : m_debuggerSpecificData.m_pDynamicILBlobTable);
-
     // Lazily allocate the hash table.
-    if (table == NULL)
+    if (m_debuggerSpecificData.m_pDynamicILBlobTable == NULL)
     {
-        table = PTR_DynamicILBlobTable(new DynamicILBlobTable);
+        m_debuggerSpecificData.m_pDynamicILBlobTable = PTR_DynamicILBlobTable(new DynamicILBlobTable);
     }
-    table->AddOrReplace(entry);
+    m_debuggerSpecificData.m_pDynamicILBlobTable->AddOrReplace(entry);
 }
 
 #endif // !DACCESS_COMPILE
@@ -1273,7 +1261,7 @@ void Module::SetDynamicIL(mdToken token, TADDR blobAddress, BOOL fTemporaryOverr
 //         fAllowTemporary also check the temporary overrides
 // Return Value: starting (target) address of the IL blob corresponding to the input token
 
-TADDR Module::GetDynamicIL(mdToken token, BOOL fAllowTemporary)
+TADDR Module::GetDynamicIL(mdToken token)
 {
     SUPPORTS_DAC;
 
@@ -1288,20 +1276,9 @@ TADDR Module::GetDynamicIL(mdToken token, BOOL fAllowTemporary)
     CrstHolder ch(m_debuggerSpecificData.m_pDynamicILCrst);
 #endif
 
-    // Both hash tables are lazily allocated, so if they're NULL
+    // The hash table is lazily allocated, so if it is NULL
     // then we have no IL blobs
 
-    if (fAllowTemporary && m_debuggerSpecificData.m_pTemporaryILBlobTable != NULL)
-    {
-        DynamicILBlobEntry entry = m_debuggerSpecificData.m_pTemporaryILBlobTable->Lookup(token);
-
-        // Only return a value if the lookup succeeded
-        if (!DynamicILBlobTraits::IsNull(entry))
-        {
-            return entry.m_il;
-        }
-    }
-
     if (m_debuggerSpecificData.m_pDynamicILBlobTable == NULL)
     {
         return TADDR(NULL);
@@ -1555,7 +1532,7 @@ DWORD Module::AllocateDynamicEntry(MethodTable *pMT)
 
     if (newId >= m_maxDynamicEntries)
     {
-        SIZE_T maxDynamicEntries = max(16, m_maxDynamicEntries);
+        SIZE_T maxDynamicEntries = max<SIZE_T>(16, m_maxDynamicEntries);
         while (maxDynamicEntries <= newId)
         {
             maxDynamicEntries *= 2;
@@ -3402,24 +3379,6 @@ MethodDesc *Module::FindMethod(mdToken pMethod)
     RETURN pMDRet;
 }
 
-//
-// GetPropertyInfoForMethodDef wraps the metadata function of the same name.
-//
-
-HRESULT Module::GetPropertyInfoForMethodDef(mdMethodDef md, mdProperty *ppd, LPCSTR *pName, ULONG *pSemantic)
-{
-    CONTRACTL
-    {
-        INSTANCE_CHECK;
-        NOTHROW;
-        GC_NOTRIGGER;
-        MODE_ANY;
-    }
-    CONTRACTL_END;
-
-    return GetMDImport()->GetPropertyInfoForMethodDef(md, ppd, pName, pSemantic);
-}
-
 // Return true if this module has any live (jitted) JMC functions.
 // If a module has no jitted JMC functions, then it's as if it's a
 // non-user module.
@@ -4677,22 +4636,178 @@ PTR_VOID ReflectionModule::GetRvaField(RVA field) // virtual
 // VASigCookies
 // ===========================================================================
 
+static bool TypeSignatureContainsGenericVariables(SigParser& sp);
+static bool MethodSignatureContainsGenericVariables(SigParser& sp);
+
+static bool TypeSignatureContainsGenericVariables(SigParser& sp)
+{
+    STANDARD_VM_CONTRACT;
+
+    CorElementType et = ELEMENT_TYPE_END;
+    IfFailThrow(sp.GetElemType(&et));
+
+    if (CorIsPrimitiveType(et))
+        return false;
+
+    switch (et)
+    {
+        case ELEMENT_TYPE_OBJECT:
+        case ELEMENT_TYPE_STRING:
+        case ELEMENT_TYPE_TYPEDBYREF:
+            return false;
+
+        case ELEMENT_TYPE_BYREF:
+        case ELEMENT_TYPE_PTR:
+        case ELEMENT_TYPE_SZARRAY:
+            return TypeSignatureContainsGenericVariables(sp);
+
+        case ELEMENT_TYPE_VALUETYPE:
+        case ELEMENT_TYPE_CLASS:
+            IfFailThrow(sp.GetToken(NULL)); // Skip RID
+            return false;
+
+        case ELEMENT_TYPE_FNPTR:
+            return MethodSignatureContainsGenericVariables(sp);
+
+        case ELEMENT_TYPE_ARRAY:
+            {
+                if (TypeSignatureContainsGenericVariables(sp))
+                    return true;
+
+                uint32_t rank;
+                IfFailThrow(sp.GetData(&rank)); // Get rank
+                if (rank)
+                {
+                    uint32_t nsizes;
+                    IfFailThrow(sp.GetData(&nsizes)); // Get # of sizes
+                    while (nsizes--)
+                    {
+                        IfFailThrow(sp.GetData(NULL)); // Skip size
+                    }
+
+                    uint32_t nlbounds;
+                    IfFailThrow(sp.GetData(&nlbounds)); // Get # of lower bounds
+                    while (nlbounds--)
+                    {
+                        IfFailThrow(sp.GetData(NULL)); // Skip lower bounds
+                    }
+                }
+            }
+            return false;
+
+        case ELEMENT_TYPE_GENERICINST:
+            {
+                if (TypeSignatureContainsGenericVariables(sp))
+                    return true;
+
+                uint32_t argCnt;
+                IfFailThrow(sp.GetData(&argCnt)); // Get number of parameters
+                while (argCnt--)
+                {
+                    if (TypeSignatureContainsGenericVariables(sp))
+                        return true;
+                }
+            }
+            return false;
+
+        case ELEMENT_TYPE_INTERNAL:
+            IfFailThrow(sp.GetPointer(NULL));
+            return false;
+
+        case ELEMENT_TYPE_VAR:
+        case ELEMENT_TYPE_MVAR:
+            return true;
+
+        default:
+            // Return conservative answer for unhandled elements
+            _ASSERTE(!"Unexpected element type.");
+            return true;
+    }
+}
+
+static bool MethodSignatureContainsGenericVariables(SigParser& sp)
+{
+    STANDARD_VM_CONTRACT;
+
+    uint32_t callConv = 0;
+    IfFailThrow(sp.GetCallingConvInfo(&callConv));
+
+    if (callConv & IMAGE_CEE_CS_CALLCONV_GENERIC)
+    {
+        // Generic signatures should never show up here, return conservative answer.
+        _ASSERTE(!"Unexpected generic signature.");
+        return true;
+    }
+
+    uint32_t numArgs = 0;
+    IfFailThrow(sp.GetData(&numArgs));
+
+    // iterate over the return type and parameters
+    for (uint32_t i = 0; i <= numArgs; i++)
+    {
+        if (TypeSignatureContainsGenericVariables(sp))
+            return true;
+    }
+
+    return false;
+}
+
 //==========================================================================
 // Enregisters a VASig.
 //==========================================================================
-VASigCookie *Module::GetVASigCookie(Signature vaSignature)
+VASigCookie *Module::GetVASigCookie(Signature vaSignature, const SigTypeContext* typeContext)
 {
     CONTRACT(VASigCookie*)
     {
         INSTANCE_CHECK;
-        THROWS;
-        GC_TRIGGERS;
-        MODE_ANY;
+        STANDARD_VM_CHECK;
         POSTCONDITION(CheckPointer(RETVAL));
         INJECT_FAULT(COMPlusThrowOM());
     }
     CONTRACT_END;
 
+    SigTypeContext emptyContext;
+
+    Module* pLoaderModule = this;
+    if (!typeContext->IsEmpty())
+    {
+        // Strip the generic context if it is not actually used by the signature. It is nececessary for both:
+        // - Performance: allow more sharing of vasig cookies
+        // - Functionality: built-in runtime marshalling is disallowed for generic signatures
+        SigParser sigParser = vaSignature.CreateSigParser();
+        if (MethodSignatureContainsGenericVariables(sigParser))
+        {
+            pLoaderModule = ClassLoader::ComputeLoaderModuleWorker(this, mdTokenNil, typeContext->m_classInst, typeContext->m_methodInst);
+        }
+        else
+        {
+            typeContext = &emptyContext;
+        }
+    }
+    else
+    {
+#ifdef _DEBUG
+        // The method signature should not contain any generic variables if the generic context is not provided.
+        SigParser sigParser = vaSignature.CreateSigParser();
+        _ASSERTE(!MethodSignatureContainsGenericVariables(sigParser));
+#endif
+    }
+
+    VASigCookie *pCookie = GetVASigCookieWorker(this, pLoaderModule, vaSignature, typeContext);
+
+    RETURN pCookie;
+}
+
+VASigCookie *Module::GetVASigCookieWorker(Module* pDefiningModule, Module* pLoaderModule, Signature vaSignature, const SigTypeContext* typeContext)
+{
+    CONTRACT(VASigCookie*)
+    {
+        STANDARD_VM_CHECK;
+        POSTCONDITION(CheckPointer(RETVAL));
+        INJECT_FAULT(COMPlusThrowOM());
+    }
+    CONTRACT_END;
+    
     VASigCookieBlock *pBlock;
     VASigCookie      *pCookie;
 
@@ -4700,39 +4815,70 @@ VASigCookie *Module::GetVASigCookie(Signature vaSignature)
 
     // First, see if we already enregistered this sig.
     // Note that we're outside the lock here, so be a bit careful with our logic
-    for (pBlock = m_pVASigCookieBlock; pBlock != NULL; pBlock = pBlock->m_Next)
+    for (pBlock = pLoaderModule->m_pVASigCookieBlock; pBlock != NULL; pBlock = pBlock->m_Next)
     {
         for (UINT i = 0; i < pBlock->m_numcookies; i++)
         {
             if (pBlock->m_cookies[i].signature.GetRawSig() == vaSignature.GetRawSig())
             {
-                pCookie = &(pBlock->m_cookies[i]);
-                break;
+                _ASSERTE(pBlock->m_cookies[i].classInst.GetNumArgs() == typeContext->m_classInst.GetNumArgs());
+                _ASSERTE(pBlock->m_cookies[i].methodInst.GetNumArgs() == typeContext->m_methodInst.GetNumArgs());
+
+                bool instMatch = true;
+
+                for (DWORD j = 0; j < pBlock->m_cookies[i].classInst.GetNumArgs(); j++)
+                {
+                    if (pBlock->m_cookies[i].classInst[j] != typeContext->m_classInst[j])
+                    {
+                        instMatch = false;
+                        break;
+                    }
+                }
+
+                if (instMatch)
+                {
+                    for (DWORD j = 0; j < pBlock->m_cookies[i].methodInst.GetNumArgs(); j++)
+                    {
+                        if (pBlock->m_cookies[i].methodInst[j] != typeContext->m_methodInst[j])
+                        {
+                            instMatch = false;
+                            break;
+                        }
+                    }
+                }
+
+                if (instMatch)
+                {
+                    pCookie = &(pBlock->m_cookies[i]);
+                    break;
+                }
             }
         }
     }
-
+    
     if (!pCookie)
     {
         // If not, time to make a new one.
 
         // Compute the size of args first, outside of the lock.
 
-        // @TODO GENERICS: We may be calling a varargs method from a
-        // generic type/method. Using an empty context will make such a
-        // case cause an unexpected exception. To make this work,
-        // we need to create a specialized signature for every instantiation
-        SigTypeContext typeContext;
-
-        MetaSig metasig(vaSignature, this, &typeContext);
+        MetaSig metasig(vaSignature, pDefiningModule, typeContext);
         ArgIterator argit(&metasig);
 
         // Upper estimate of the vararg size
         DWORD sizeOfArgs = argit.SizeOfArgStack();
 
+        // Prepare instantiation
+        LoaderAllocator  *pLoaderAllocator = pLoaderModule->GetLoaderAllocator();
+
+        DWORD classInstCount = typeContext->m_classInst.GetNumArgs();
+        DWORD methodInstCount = typeContext->m_methodInst.GetNumArgs();
+        pLoaderAllocator->EnsureInstantiation(pDefiningModule, typeContext->m_classInst);
+        pLoaderAllocator->EnsureInstantiation(pDefiningModule, typeContext->m_methodInst);
+
         // enable gc before taking lock
         {
-            CrstHolder ch(&m_Crst);
+            CrstHolder ch(&pLoaderModule->m_Crst);
 
             // Note that we were possibly racing to create the cookie, and another thread
             // may have already created it.  We could put another check
@@ -4740,32 +4886,57 @@ VASigCookie *Module::GetVASigCookie(Signature vaSignature)
             // occasional duplicate cookie instead.
 
             // Is the first block in the list full?
-            if (m_pVASigCookieBlock && m_pVASigCookieBlock->m_numcookies
+            if (pLoaderModule->m_pVASigCookieBlock && pLoaderModule->m_pVASigCookieBlock->m_numcookies
                 < VASigCookieBlock::kVASigCookieBlockSize)
             {
                 // Nope, reserve a new slot in the existing block.
-                pCookie = &(m_pVASigCookieBlock->m_cookies[m_pVASigCookieBlock->m_numcookies]);
+                pCookie = &(pLoaderModule->m_pVASigCookieBlock->m_cookies[pLoaderModule->m_pVASigCookieBlock->m_numcookies]);
             }
             else
             {
                 // Yes, create a new block.
                 VASigCookieBlock *pNewBlock = new VASigCookieBlock();
 
-                pNewBlock->m_Next = m_pVASigCookieBlock;
+                pNewBlock->m_Next = pLoaderModule->m_pVASigCookieBlock;
                 pNewBlock->m_numcookies = 0;
-                m_pVASigCookieBlock = pNewBlock;
+                pLoaderModule->m_pVASigCookieBlock = pNewBlock;
                 pCookie = &(pNewBlock->m_cookies[0]);
             }
 
             // Now, fill in the new cookie (assuming we had enough memory to create one.)
-            pCookie->pModule = this;
+            pCookie->pModule = pDefiningModule;
             pCookie->pNDirectILStub = NULL;
             pCookie->sizeOfArgs = sizeOfArgs;
             pCookie->signature = vaSignature;
+            pCookie->pLoaderModule = pLoaderModule;
+
+            AllocMemTracker amt;
+        
+            if (classInstCount != 0)
+            {
+                TypeHandle* pClassInst = (TypeHandle*)(void*)amt.Track(pLoaderAllocator->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(classInstCount) * S_SIZE_T(sizeof(TypeHandle))));
+                for (DWORD i = 0; i < classInstCount; i++)
+                {
+                    pClassInst[i] = typeContext->m_classInst[i];
+                }
+                pCookie->classInst = Instantiation(pClassInst, classInstCount);
+            }
+
+            if (methodInstCount != 0)
+            {
+                TypeHandle* pMethodInst = (TypeHandle*)(void*)amt.Track(pLoaderAllocator->GetHighFrequencyHeap()->AllocMem(S_SIZE_T(methodInstCount) * S_SIZE_T(sizeof(TypeHandle))));
+                for (DWORD i = 0; i < methodInstCount; i++)
+                {
+                    pMethodInst[i] = typeContext->m_methodInst[i];
+                }
+                pCookie->methodInst = Instantiation(pMethodInst, methodInstCount);
+            }
+        
+            amt.SuppressRelease();
 
             // Finally, now that it's safe for asynchronous readers to see it,
             // update the count.
-            m_pVASigCookieBlock->m_numcookies++;
+            pLoaderModule->m_pVASigCookieBlock->m_numcookies++;
         }
     }
 
diff --git a/src/coreclr/vm/ceeload.h b/src/coreclr/vm/ceeload.h
index 18335c5a5f01..f85f4eab8d31 100644
--- a/src/coreclr/vm/ceeload.h
+++ b/src/coreclr/vm/ceeload.h
@@ -338,7 +338,10 @@ struct VASigCookie
     unsigned        sizeOfArgs;             // size of argument list
     Volatile<PCODE> pNDirectILStub;         // will be use if target is NDirect (tag == 0)
     PTR_Module      pModule;
+    PTR_Module      pLoaderModule;
     Signature       signature;
+    Instantiation   classInst;
+    Instantiation   methodInst;
 };
 
 //
@@ -1321,8 +1324,6 @@ class Module : public ModuleBase
     MethodDesc *FindMethodThrowing(mdToken pMethod);
     MethodDesc *FindMethod(mdToken pMethod);
 
-    HRESULT GetPropertyInfoForMethodDef(mdMethodDef md, mdProperty *ppd, LPCSTR *pName, ULONG *pSemantic);
-
 public:
 
     // Debugger stuff
@@ -1360,7 +1361,9 @@ class Module : public ModuleBase
     void NotifyEtwLoadFinished(HRESULT hr);
 
     // Enregisters a VASig.
-    VASigCookie *GetVASigCookie(Signature vaSignature);
+    VASigCookie *GetVASigCookie(Signature vaSignature, const SigTypeContext* typeContext);
+private:
+    static VASigCookie *GetVASigCookieWorker(Module* pDefiningModule, Module* pLoaderModule, Signature vaSignature, const SigTypeContext* typeContext);
 
 public:
 #ifndef DACCESS_COMPILE
@@ -1473,8 +1476,8 @@ class Module : public ModuleBase
     void   StartUnload();
 
 public:
-    void SetDynamicIL(mdToken token, TADDR blobAddress, BOOL fTemporaryOverride);
-    TADDR GetDynamicIL(mdToken token, BOOL fAllowTemporary);
+    void SetDynamicIL(mdToken token, TADDR blobAddress);
+    TADDR GetDynamicIL(mdToken token);
 
     // store and retrieve the instrumented IL offset mapping for a particular method
 #if !defined(DACCESS_COMPILE)
@@ -1669,10 +1672,6 @@ class Module : public ModuleBase
                                                 // this map *always* overrides the Metadata RVA
         PTR_DynamicILBlobTable   m_pDynamicILBlobTable;
 
-                                                // maps tokens for to their corresponding overridden IL blobs
-                                                // this map conditionally overrides the Metadata RVA and the DynamicILBlobTable
-        PTR_DynamicILBlobTable   m_pTemporaryILBlobTable;
-
         // hash table storing any profiler-provided instrumented IL offset mapping
         PTR_ILOffsetMappingTable m_pILOffsetMappingTable;
 
diff --git a/src/coreclr/vm/ceeload.inl b/src/coreclr/vm/ceeload.inl
index ff446d4ec799..18d7557d6e85 100644
--- a/src/coreclr/vm/ceeload.inl
+++ b/src/coreclr/vm/ceeload.inl
@@ -64,7 +64,7 @@ inline
 void LookupMap<SIZE_T>::SetValueAt(PTR_TADDR pValue, SIZE_T value, TADDR flags)
 {
     WRAPPER_NO_CONTRACT;
-    VolatileStore(pValue, value | flags);
+    VolatileStore(pValue, dac_cast<TADDR>(value | flags));
 }
 #endif // DACCESS_COMPILE
 
diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp
index 813616e529c8..1c29942942c0 100644
--- a/src/coreclr/vm/ceemain.cpp
+++ b/src/coreclr/vm/ceemain.cpp
@@ -869,8 +869,6 @@ void EEStartupHelper()
         // Set up the sync block
         SyncBlockCache::Start();
 
-        StackwalkCache::Init();
-
         // This isn't done as part of InitializeGarbageCollector() above because it
         // requires write barriers to have been set up on x86, which happens as part
         // of InitJITHelpers1.
@@ -935,7 +933,7 @@ void EEStartupHelper()
         // retrieve configured max size for the mini-metadata buffer (defaults to 64KB)
         g_MiniMetaDataBuffMaxSize = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_MiniMdBufferCapacity);
         // align up to GetOsPageSize(), with a maximum of 1 MB
-        g_MiniMetaDataBuffMaxSize = (DWORD) min(ALIGN_UP(g_MiniMetaDataBuffMaxSize, GetOsPageSize()), 1024 * 1024);
+        g_MiniMetaDataBuffMaxSize = (DWORD) min(ALIGN_UP(g_MiniMetaDataBuffMaxSize, GetOsPageSize()), (DWORD)(1024 * 1024));
         // allocate the buffer. this is never touched while the process is running, so it doesn't
         // contribute to the process' working set. it is needed only as a "shadow" for a mini-metadata
         // buffer that will be set up and reported / updated in the Watson process (the
@@ -1331,12 +1329,6 @@ void STDMETHODCALLTYPE EEShutDownHelper(BOOL fIsDllUnloading)
             {
                 g_fEEShutDown |= ShutDown_Phase2;
 
-                // Shutdown finalizer before we suspend all background threads. Otherwise we
-                // never get to finalize anything.
-
-                // No longer process exceptions
-                g_fNoExceptions = true;
-
                 // <TODO>@TODO: This does things which shouldn't occur in part 2.  Namely,
                 // calling managed dll main callbacks (AppDomain::SignalProcessDetach), and
                 // RemoveAppDomainFromIPC.
@@ -1353,32 +1345,9 @@ void STDMETHODCALLTYPE EEShutDownHelper(BOOL fIsDllUnloading)
                 TerminateDebugger();
 #endif // DEBUGGING_SUPPORTED
 
-                StubManager::TerminateStubManagers();
-
-#ifdef FEATURE_INTERPRETER
-                Interpreter::Terminate();
-#endif // FEATURE_INTERPRETER
-
                 //@TODO: find the right place for this
                 VirtualCallStubManager::UninitStatic();
 
-                // Unregister our vectored exception and continue handlers from the OS.
-                // This will ensure that if any other DLL unload (after ours) has an exception,
-                // we wont attempt to process that exception (which could lead to various
-                // issues including AV in the runtime).
-                //
-                // This should be done:
-                //
-                // 1) As the last action during the shutdown so that any unexpected AVs
-                //    in the runtime during shutdown do result in FailFast in VEH.
-                //
-                // 2) Only when the runtime is processing DLL_PROCESS_DETACH.
-                CLRRemoveVectoredHandlers();
-
-#if USE_DISASSEMBLER
-                Disassembler::StaticClose();
-#endif // USE_DISASSEMBLER
-
                 WriteJitHelperCountToSTRESSLOG();
 
                 STRESS_LOG0(LF_STARTUP, LL_INFO10, "EEShutdown shutting down logging");
@@ -1702,10 +1671,6 @@ BOOL STDMETHODCALLTYPE EEDllMain( // TRUE on success, FALSE on error.
                     LOG((LF_STARTUP, INFO3, "EEShutDown invoked from EEDllMain"));
                     EEShutDown(TRUE); // shut down EE if it was started up
                 }
-                else
-                {
-                    CLRRemoveVectoredHandlers();
-                }
                 break;
             }
         }
diff --git a/src/coreclr/vm/cgensys.h b/src/coreclr/vm/cgensys.h
index 1396d7558c29..a3accc91a997 100644
--- a/src/coreclr/vm/cgensys.h
+++ b/src/coreclr/vm/cgensys.h
@@ -84,8 +84,6 @@ BOOL GetAnyThunkTarget (T_CONTEXT *pctx, TADDR *pTarget, TADDR *pTargetMethodDes
 
 #endif // DACCESS_COMPILE
 
-
-
 //
 // ResetProcessorStateHolder saves/restores processor state around calls to
 // CoreLib during exception handling.
diff --git a/src/coreclr/vm/classhash.cpp b/src/coreclr/vm/classhash.cpp
index 5d2be11c9b32..1a2af02a0574 100644
--- a/src/coreclr/vm/classhash.cpp
+++ b/src/coreclr/vm/classhash.cpp
@@ -234,7 +234,7 @@ VOID EEClassHashTable::ConstructKeyFromData(PTR_EEClassHashEntry pEntry, // IN
 #endif
 
         // If IsCaseInsensitiveTable() is true for the hash table, strings passed to the ConstructKeyCallback instance
-        // will be dynamically allocated. This is to prevent wasting bytes in the Loader Heap. Thusly, it is important 
+        // will be dynamically allocated. This is to prevent wasting bytes in the Loader Heap. Thusly, it is important
         // to note that in this case, the lifetime of Key is bounded by the lifetime of the single call to UseKeys, and
         // will be freed when that function returns.
 
@@ -452,7 +452,7 @@ EEClassHashTable *EEClassHashTable::MakeCaseInsensitiveTable(Module *pModule, Al
 
     // Allocate the table and verify that we actually got one.
     EEClassHashTable * pCaseInsTable = EEClassHashTable::Create(pModule,
-                                                                max(BaseGetElementCount() / 2, 11),
+                                                                max(BaseGetElementCount() / 2, (DWORD)11),
                                                                 this,
                                                                 pamTracker);
 
diff --git a/src/coreclr/vm/classlayoutinfo.cpp b/src/coreclr/vm/classlayoutinfo.cpp
index 9dd6fb4881b2..8336f8906603 100644
--- a/src/coreclr/vm/classlayoutinfo.cpp
+++ b/src/coreclr/vm/classlayoutinfo.cpp
@@ -136,7 +136,7 @@ namespace
     )
     {
         UINT32 cbCurOffset = parentSize;
-        BYTE LargestAlignmentRequirement = max(1, min(packingSize, parentAlignmentRequirement));
+        BYTE LargestAlignmentRequirement = max<BYTE>(1, min(packingSize, parentAlignmentRequirement));
 
         // Start with the size inherited from the parent (if any).
         uint32_t calcTotalSize = parentSize;
@@ -198,7 +198,7 @@ namespace
                 COMPlusThrowOM();
 
             // size must be large enough to accommodate layout. If not, we use the layout size instead.
-            calcTotalSize = max(classSize, calcTotalSize);
+            calcTotalSize = max((uint32_t)classSize, calcTotalSize);
         }
         else
         {
@@ -1023,7 +1023,6 @@ EEClassNativeLayoutInfo* EEClassNativeLayoutInfo::CollectNativeLayoutFieldMetada
     {
         // The intrinsic Vector<T> type has a special size. Copy the native size and alignment
         // from the managed size and alignment.
-        // Crossgen scenarios block Vector<T> from even being loaded, so only do this check when not in crossgen.
         if (pMT->HasSameTypeDefAs(CoreLibBinder::GetClass(CLASS__VECTORT)))
         {
             pNativeLayoutInfo->m_size = pEEClassLayoutInfo->GetManagedSize();
diff --git a/src/coreclr/vm/clrtocomcall.cpp b/src/coreclr/vm/clrtocomcall.cpp
index c604a6c8a901..d47445f8f64f 100644
--- a/src/coreclr/vm/clrtocomcall.cpp
+++ b/src/coreclr/vm/clrtocomcall.cpp
@@ -505,7 +505,7 @@ UINT32 CLRToCOMLateBoundWorker(
     ULONG uSemantic;
 
     // See if there is property information for this member.
-    hr = pItfMT->GetModule()->GetPropertyInfoForMethodDef(pItfMD->GetMemberDef(), &propToken, &strMemberName, &uSemantic);
+    hr = pItfMT->GetMDImport()->GetPropertyInfoForMethodDef(pItfMD->GetMemberDef(), &propToken, &strMemberName, &uSemantic);
     if (hr != S_OK)
     {
         // Non-property method
diff --git a/src/coreclr/vm/clsload.cpp b/src/coreclr/vm/clsload.cpp
index 51dbb6c139aa..32d69142890b 100644
--- a/src/coreclr/vm/clsload.cpp
+++ b/src/coreclr/vm/clsload.cpp
@@ -2751,7 +2751,7 @@ TypeHandle ClassLoader::CreateTypeHandleForTypeKey(const TypeKey* pKey, AllocMem
             // no parameterized type allowed on a reference
             if (paramType.GetInternalCorElementType() == ELEMENT_TYPE_BYREF)
             {
-                ThrowTypeLoadException(pKey, IDS_CLASSLOAD_GENERAL);
+                ThrowTypeLoadException(pKey, (kind == ELEMENT_TYPE_BYREF) ? IDS_CLASSLOAD_BYREF_OF_BYREF : IDS_CLASSLOAD_POINTER_OF_BYREF);
             }
 
             // We do allow parameterized types of ByRefLike types. Languages may restrict them to produce safe or verifiable code,
@@ -3065,7 +3065,7 @@ TypeHandle ClassLoader::LoadTypeHandleForTypeKeyNoLock(const TypeKey *pTypeKey,
 //
 class PendingTypeLoadHolder
 {
-    Thread * m_pThread;
+    static thread_local PendingTypeLoadHolder * t_pCurrent;
     PendingTypeLoadTable::Entry * m_pEntry;
     PendingTypeLoadHolder * m_pPrevious;
 
@@ -3074,26 +3074,25 @@ class PendingTypeLoadHolder
     {
         LIMITED_METHOD_CONTRACT;
 
-        m_pThread = GetThread();
         m_pEntry = pEntry;
 
-        m_pPrevious = m_pThread->GetPendingTypeLoad();
-        m_pThread->SetPendingTypeLoad(this);
+        m_pPrevious = t_pCurrent;
+        t_pCurrent = this;
     }
 
     ~PendingTypeLoadHolder()
     {
         LIMITED_METHOD_CONTRACT;
 
-        _ASSERTE(m_pThread->GetPendingTypeLoad() == this);
-        m_pThread->SetPendingTypeLoad(m_pPrevious);
+        _ASSERTE(t_pCurrent == this);
+        t_pCurrent = m_pPrevious;
     }
 
     static bool CheckForDeadLockOnCurrentThread(PendingTypeLoadTable::Entry * pEntry)
     {
         LIMITED_METHOD_CONTRACT;
 
-        PendingTypeLoadHolder * pCurrent = GetThread()->GetPendingTypeLoad();
+        PendingTypeLoadHolder * pCurrent = t_pCurrent;
 
         while (pCurrent != NULL)
         {
@@ -3107,6 +3106,8 @@ class PendingTypeLoadHolder
     }
 };
 
+thread_local PendingTypeLoadHolder * PendingTypeLoadHolder::t_pCurrent = NULL;
+
 //---------------------------------------------------------------------------------------
 //
 TypeHandle
diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp
index 52917161fb00..228c65f00412 100644
--- a/src/coreclr/vm/codeman.cpp
+++ b/src/coreclr/vm/codeman.cpp
@@ -645,11 +645,10 @@ BOOL EEJitManager::CodeHeapIterator::Next()
 // writer lock and check for any readers. If there are any, the WriterLockHolder functions
 // release the writer and yield to wait for the readers to be done.
 
-ExecutionManager::ReaderLockHolder::ReaderLockHolder(HostCallPreference hostCallPreference /*=AllowHostCalls*/)
+ExecutionManager::ReaderLockHolder::ReaderLockHolder()
 {
     CONTRACTL {
         NOTHROW;
-        if (hostCallPreference == AllowHostCalls) { HOST_CALLS; } else { HOST_NOCALLS; }
         GC_NOTRIGGER;
         CAN_TAKE_LOCK;
     } CONTRACTL_END;
@@ -662,15 +661,6 @@ ExecutionManager::ReaderLockHolder::ReaderLockHolder(HostCallPreference hostCall
 
     if (VolatileLoad(&m_dwWriterLock) != 0)
     {
-        if (hostCallPreference != AllowHostCalls)
-        {
-            // Rats, writer lock is held. Gotta bail. Since the reader count was already
-            // incremented, we're technically still blocking writers at the moment. But
-            // the holder who called us is about to call DecrementReader in its
-            // destructor and unblock writers.
-            return;
-        }
-
         YIELD_WHILE ((VolatileLoad(&m_dwWriterLock) != 0));
     }
 }
@@ -765,7 +755,7 @@ ExecutionManager::WriterLockHolder::~WriterLockHolder()
 // If it is, we will assume the locked data is in an inconsistent
 // state and throw. We never actually take the lock.
 // Note: Throws
-ExecutionManager::ReaderLockHolder::ReaderLockHolder(HostCallPreference hostCallPreference /*=AllowHostCalls*/)
+ExecutionManager::ReaderLockHolder::ReaderLockHolder()
 {
     SUPPORTS_DAC;
 
@@ -947,7 +937,6 @@ ExecutionManager::ScanFlag ExecutionManager::GetScanFlags()
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     } CONTRACTL_END;
 
@@ -1297,9 +1286,6 @@ void EEJitManager::SetCpuInfo()
         CPUCompileFlags.Set(InstructionSet_VectorT512);
     }
 
-    // TODO-XArch: Add support for 512-bit Vector<T>
-    _ASSERTE(!CPUCompileFlags.IsSet(InstructionSet_VectorT512));
-
     if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic))
     {
         CPUCompileFlags.Set(InstructionSet_X86Base);
@@ -1448,6 +1434,26 @@ void EEJitManager::SetCpuInfo()
     {
         CPUCompileFlags.Set(InstructionSet_X86Serialize);
     }
+
+    // As Avx10v1_V512 could imply Avx10v1_V256 and Avx10v1, and Avx10v1_V256 could imply Avx10v1
+    // then the flag check here can be conducted for only once, and let 
+    // `EnusreValidInstructionSetSupport` to handle the illegal combination.
+    // To ensure `EnusreValidInstructionSetSupport` handle the dependency correctly, the implication
+    // defined in InstructionSetDesc.txt should be explicit, no transitive implication should be assumed.
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx10v1) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v1))
+    {
+        CPUCompileFlags.Set(InstructionSet_AVX10v1);
+    }
+
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx10v1_V256) != 0))
+    {
+        CPUCompileFlags.Set(InstructionSet_AVX10v1_V256);
+    }
+
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx10v1_V512) != 0))
+    {
+        CPUCompileFlags.Set(InstructionSet_AVX10v1_V512);
+    }
 #elif defined(TARGET_ARM64)
 
 #if !defined(TARGET_WINDOWS)
@@ -2296,7 +2302,7 @@ VOID EEJitManager::EnsureJumpStubReserve(BYTE * pImageBase, SIZE_T imageSize, SI
     int allocMode = 0;
 
     // Try to reserve at least 16MB at a time
-    SIZE_T allocChunk = max(ALIGN_UP(reserveSize, VIRTUAL_ALLOC_RESERVE_GRANULARITY), 16*1024*1024);
+    SIZE_T allocChunk = max<SIZE_T>(ALIGN_UP(reserveSize, VIRTUAL_ALLOC_RESERVE_GRANULARITY), 16*1024*1024);
 
     while (reserveSize > 0)
     {
@@ -2795,9 +2801,7 @@ void* EEJitManager::allocCodeRaw(CodeHeapRequestInfo *pInfo,
 
 void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveForJumpStubs, CorJitAllocMemFlag flag, CodeHeader** ppCodeHeader, CodeHeader** ppCodeHeaderRW,
                              size_t* pAllocatedSize, HeapList** ppCodeHeap
-#ifdef USE_INDIRECT_CODEHEADER
                            , BYTE** ppRealHeader
-#endif
 #ifdef FEATURE_EH_FUNCLETS
                            , UINT nUnwindInfos
 #endif
@@ -2816,11 +2820,11 @@ void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveFo
 
     if ((flag & CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN) != 0)
     {
-        alignment = max(alignment, 32);
+        alignment = max(alignment, 32u);
     }
     else if ((flag & CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN) != 0)
     {
-        alignment = max(alignment, 16);
+        alignment = max(alignment, 16u);
     }
 
 #if defined(TARGET_X86)
@@ -2828,7 +2832,7 @@ void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveFo
     // the JIT can in turn 8-byte align the loop entry headers.
     else if ((g_pConfig->GenOptimizeType() != OPT_SIZE))
     {
-        alignment = max(alignment, 8);
+        alignment = max(alignment, 8u);
     }
 #endif
 
@@ -2850,8 +2854,11 @@ void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveFo
 #endif
     requestInfo.setReserveForJumpStubs(reserveForJumpStubs);
 
-#if defined(USE_INDIRECT_CODEHEADER)
+#ifdef FEATURE_EH_FUNCLETS
     SIZE_T realHeaderSize = offsetof(RealCodeHeader, unwindInfos[0]) + (sizeof(T_RUNTIME_FUNCTION) * nUnwindInfos);
+#else
+    SIZE_T realHeaderSize = sizeof(RealCodeHeader);
+#endif
 
     // if this is a LCG method then we will be allocating the RealCodeHeader
     // following the code so that the code block can be removed easily by
@@ -2861,7 +2868,6 @@ void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveFo
         totalSize = ALIGN_UP(totalSize, sizeof(void*)) + realHeaderSize;
         static_assert_no_msg(CODE_SIZE_ALIGN >= sizeof(void*));
     }
-#endif  // USE_INDIRECT_CODEHEADER
 
     // Scope the lock
     {
@@ -2891,7 +2897,6 @@ void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveFo
             pCodeHdrRW = pCodeHdr;
         }
 
-#ifdef USE_INDIRECT_CODEHEADER
         if (requestInfo.IsDynamicDomain())
         {
             // Set the real code header to the writeable mapping so that we can set its members via the CodeHeader methods below
@@ -2905,7 +2910,6 @@ void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveFo
             BYTE* pRealHeader = (BYTE*)(void*)pMD->GetLoaderAllocator()->GetLowFrequencyHeap()->AllocMem(S_SIZE_T(realHeaderSize));
             pCodeHdrRW->SetRealCodeHeader(pRealHeader);
         }
-#endif
 
         pCodeHdrRW->SetDebugInfo(NULL);
         pCodeHdrRW->SetEHInfo(NULL);
@@ -2915,7 +2919,6 @@ void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveFo
         pCodeHdrRW->SetNumberOfUnwindInfos(nUnwindInfos);
 #endif
 
-#ifdef USE_INDIRECT_CODEHEADER
         if (requestInfo.IsDynamicDomain())
         {
             *ppRealHeader = (BYTE*)pCode + ALIGN_UP(blockSize, sizeof(void*));
@@ -2924,7 +2927,6 @@ void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveFo
         {
             *ppRealHeader = NULL;
         }
-#endif // USE_INDIRECT_CODEHEADER
     }
 
     *ppCodeHeader = pCodeHdr;
@@ -3204,7 +3206,7 @@ JumpStubBlockHeader *  EEJitManager::allocJumpStubBlock(MethodDesc* pMD, DWORD n
         CrstHolder ch(&m_CodeHeapCritSec);
 
         mem = (TADDR) allocCodeRaw(&requestInfo, sizeof(CodeHeader), blockSize, CODE_SIZE_ALIGN, &pCodeHeap);
-        if (mem == NULL)
+        if (mem == (TADDR)0)
         {
             _ASSERTE(!throwOnOutOfMemoryWithinRange);
             RETURN(NULL);
@@ -3284,7 +3286,6 @@ GCInfoToken EEJitManager::GetGCInfoToken(const METHODTOKEN& MethodToken)
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     } CONTRACTL_END;
 
@@ -3754,7 +3755,7 @@ static CodeHeader * GetCodeHeaderFromDebugInfoRequest(const DebugInfoRequest & r
     } CONTRACTL_END;
 
     TADDR address = (TADDR) request.GetStartAddress();
-    _ASSERTE(address != NULL);
+    _ASSERTE(address != (TADDR)0);
 
     CodeHeader * pHeader = dac_cast<PTR_CodeHeader>(address & ~3) - 1;
     _ASSERTE(pHeader != NULL);
@@ -3861,9 +3862,7 @@ void CodeHeader::EnumMemoryRegions(CLRDataEnumMemoryFlags flags, IJitManager* pJ
 
     DAC_ENUM_DTHIS();
 
-#ifdef USE_INDIRECT_CODEHEADER
     this->pRealCodeHeader.EnumMem();
-#endif // USE_INDIRECT_CODEHEADER
 
 #ifdef FEATURE_ON_STACK_REPLACEMENT
     BOOL hasFlagByte = TRUE;
@@ -3926,7 +3925,7 @@ BOOL EEJitManager::JitCodeToMethodInfo(
         return FALSE;
 
     TADDR start = dac_cast<PTR_EEJitManager>(pRangeSection->_pjit)->FindMethodCode(pRangeSection, currentPC);
-    if (start == NULL)
+    if (start == (TADDR)0)
         return FALSE;
 
     CodeHeader * pCHdr = PTR_CodeHeader(start - sizeof(CodeHeader));
@@ -3971,7 +3970,7 @@ StubCodeBlockKind EEJitManager::GetStubCodeBlockKind(RangeSection * pRangeSectio
     }
 
     TADDR start = dac_cast<PTR_EEJitManager>(pRangeSection->_pjit)->FindMethodCode(pRangeSection, currentPC);
-    if (start == NULL)
+    if (start == (TADDR)0)
         return STUB_CODE_BLOCK_NOCODE;
     CodeHeader * pCHdr = PTR_CodeHeader(start - sizeof(CodeHeader));
     return pCHdr->IsStubCodeBlock() ? pCHdr->GetStubCodeBlockKind() : STUB_CODE_BLOCK_MANAGED;
@@ -4426,7 +4425,7 @@ ExecutionManager::FindCodeRange(PCODE currentPC, ScanFlag scanFlag)
         SUPPORTS_DAC;
     } CONTRACTL_END;
 
-    if (currentPC == NULL)
+    if (currentPC == (PCODE)NULL)
         return NULL;
 
     if (scanFlag == ScanReaderLock)
@@ -4464,7 +4463,7 @@ ExecutionManager::FindCodeRangeWithLock(PCODE currentPC)
 PCODE ExecutionManager::GetCodeStartAddress(PCODE currentPC)
 {
     WRAPPER_NO_CONTRACT;
-    _ASSERTE(currentPC != NULL);
+    _ASSERTE(currentPC != (PCODE)NULL);
 
     EECodeInfo codeInfo(currentPC);
     if (!codeInfo.IsValid())
@@ -4512,7 +4511,7 @@ BOOL ExecutionManager::IsManagedCode(PCODE currentPC)
         GC_NOTRIGGER;
     } CONTRACTL_END;
 
-    if (currentPC == NULL)
+    if (currentPC == (PCODE)NULL)
         return FALSE;
 
     if (GetScanFlags() == ScanReaderLock)
@@ -4545,35 +4544,6 @@ BOOL ExecutionManager::IsManagedCodeWithLock(PCODE currentPC)
     return result;
 }
 
-//**************************************************************************
-BOOL ExecutionManager::IsManagedCode(PCODE currentPC, HostCallPreference hostCallPreference /*=AllowHostCalls*/, BOOL *pfFailedReaderLock /*=NULL*/)
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-    } CONTRACTL_END;
-
-#ifdef DACCESS_COMPILE
-    return IsManagedCode(currentPC);
-#else
-    if (hostCallPreference == AllowHostCalls)
-    {
-        return IsManagedCode(currentPC);
-    }
-
-    ReaderLockHolder rlh(hostCallPreference);
-    if (!rlh.Acquired())
-    {
-        _ASSERTE(pfFailedReaderLock != NULL);
-        *pfFailedReaderLock = TRUE;
-        return FALSE;
-    }
-
-    RangeSectionLockState lockState = RangeSectionLockState::ReaderLocked;
-    return IsManagedCodeWorker(currentPC, &lockState);
-#endif
-}
-
 //**************************************************************************
 // Assumes that the ExecutionManager reader/writer lock is taken or that
 // it is safe not to take it.
@@ -4598,7 +4568,7 @@ BOOL ExecutionManager::IsManagedCodeWorker(PCODE currentPC, RangeSectionLockStat
         // but on we could also be in a stub, so we check for that
         // as well and we don't consider stub to be real managed code.
         TADDR start = dac_cast<PTR_EEJitManager>(pRS->_pjit)->FindMethodCode(pRS, currentPC);
-        if (start == NULL)
+        if (start == (TADDR)0)
             return FALSE;
         CodeHeader * pCHdr = PTR_CodeHeader(start - sizeof(CodeHeader));
         if (!pCHdr->IsStubCodeBlock())
@@ -4686,7 +4656,6 @@ RangeSection* ExecutionManager::GetRangeSection(TADDR addr, RangeSectionLockStat
 {
     CONTRACTL {
         NOTHROW;
-        HOST_NOCALLS;
         GC_NOTRIGGER;
         SUPPORTS_DAC;
     } CONTRACTL_END;
@@ -4702,7 +4671,6 @@ PTR_Module ExecutionManager::FindReadyToRunModule(TADDR currentData)
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
-        STATIC_CONTRACT_HOST_CALLS;
         SUPPORTS_DAC;
     }
     CONTRACTL_END;
@@ -4776,7 +4744,6 @@ void ExecutionManager::AddCodeRange(TADDR          pStartRange,
     CONTRACTL {
         THROWS;
         GC_NOTRIGGER;
-        HOST_CALLS;
         PRECONDITION(pStartRange < pEndRange);
         PRECONDITION(CheckPointer(pJit));
         PRECONDITION(CheckPointer(pModule));
@@ -4800,7 +4767,6 @@ void ExecutionManager::AddCodeRange(TADDR          pStartRange,
     CONTRACTL {
         THROWS;
         GC_NOTRIGGER;
-        HOST_CALLS;
         PRECONDITION(pStartRange < pEndRange);
         PRECONDITION(CheckPointer(pJit));
         PRECONDITION(CheckPointer(pHp));
@@ -4825,7 +4791,6 @@ void ExecutionManager::AddCodeRange(TADDR          pStartRange,
     CONTRACTL {
         THROWS;
         GC_NOTRIGGER;
-        HOST_CALLS;
         PRECONDITION(pStartRange < pEndRange);
         PRECONDITION(CheckPointer(pJit));
         PRECONDITION(CheckPointer(pRangeList));
@@ -4908,8 +4873,6 @@ void RangeSection::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
 
 void ExecutionManager::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
 {
-    STATIC_CONTRACT_HOST_CALLS;
-
     ReaderLockHolder rlh;
 
     //
@@ -4938,14 +4901,6 @@ void ExecutionManager::Unload(LoaderAllocator *pLoaderAllocator)
     // a size of 0 is a signal to Nirvana to flush the entire cache
     FlushInstructionCache(GetCurrentProcess(),0,0);
 
-    /* StackwalkCacheEntry::EIP is an address into code. Since we are
-    unloading the code, we need to invalidate the cache. Otherwise,
-    its possible that another appdomain might generate code at the very
-    same address, and we might incorrectly think that the old
-    StackwalkCacheEntry corresponds to it. So flush the cache.
-    */
-    StackwalkCache::Invalidate(pLoaderAllocator);
-
     JumpStubCache * pJumpStubCache = (JumpStubCache *) pLoaderAllocator->m_pJumpStubCache;
     if (pJumpStubCache != NULL)
     {
@@ -5013,7 +4968,7 @@ PCODE ExecutionManager::jumpStub(MethodDesc* pMD, PCODE target,
         POSTCONDITION((RETVAL != NULL) || !throwOnOutOfMemoryWithinRange);
     } CONTRACT_END;
 
-    PCODE jumpStub = NULL;
+    PCODE jumpStub = (PCODE)NULL;
 
     if (pLoaderAllocator == NULL)
     {
@@ -5063,7 +5018,7 @@ PCODE ExecutionManager::jumpStub(MethodDesc* pMD, PCODE target,
     {
         jumpStub = i->m_jumpStub;
 
-        _ASSERTE(jumpStub != NULL);
+        _ASSERTE(jumpStub != (PCODE)NULL);
 
         // Is the matching entry with the requested range?
         if (((TADDR)loAddr <= jumpStub) && (jumpStub <= (TADDR)hiAddr))
@@ -5075,10 +5030,10 @@ PCODE ExecutionManager::jumpStub(MethodDesc* pMD, PCODE target,
     // If we get here we need to create a new jump stub
     // add or change the jump stub table to point at the new one
     jumpStub = getNextJumpStub(pMD, target, loAddr, hiAddr, pLoaderAllocator, throwOnOutOfMemoryWithinRange); // this statement can throw
-    if (jumpStub == NULL)
+    if (jumpStub == (PCODE)NULL)
     {
         _ASSERTE(!throwOnOutOfMemoryWithinRange);
-        RETURN(NULL);
+        RETURN((PCODE)NULL);
     }
 
     _ASSERTE(((TADDR)loAddr <= jumpStub) && (jumpStub <= (TADDR)hiAddr));
@@ -5178,7 +5133,7 @@ PCODE ExecutionManager::getNextJumpStub(MethodDesc* pMD, PCODE target,
     if (curBlock == NULL)
     {
         _ASSERTE(!throwOnOutOfMemoryWithinRange);
-        RETURN(NULL);
+        RETURN((PCODE)NULL);
     }
 
     curBlockWriterHolder.AssignExecutableWriterHolder(curBlock, sizeof(JumpStubBlockHeader) + ((size_t) (curBlock->m_used + 1) * BACK_TO_BACK_JUMP_ALLOCATE_SIZE));
@@ -5584,7 +5539,6 @@ ReadyToRunInfo * ReadyToRunJitManager::JitTokenToReadyToRunInfo(const METHODTOKE
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     } CONTRACTL_END;
 
@@ -5596,7 +5550,6 @@ UINT32 ReadyToRunJitManager::JitTokenToGCInfoVersion(const METHODTOKEN& MethodTo
     CONTRACTL{
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     } CONTRACTL_END;
 
@@ -5610,7 +5563,6 @@ PTR_RUNTIME_FUNCTION ReadyToRunJitManager::JitTokenToRuntimeFunction(const METHO
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     } CONTRACTL_END;
 
@@ -5622,7 +5574,6 @@ TADDR ReadyToRunJitManager::JitTokenToStartAddress(const METHODTOKEN& MethodToke
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     } CONTRACTL_END;
 
@@ -5635,7 +5586,6 @@ GCInfoToken ReadyToRunJitManager::GetGCInfoToken(const METHODTOKEN& MethodToken)
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     } CONTRACTL_END;
 
@@ -6031,7 +5981,6 @@ BOOL ReadyToRunJitManager::IsFunclet(EECodeInfo* pCodeInfo)
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     } CONTRACTL_END;
 
@@ -6115,7 +6064,6 @@ void ReadyToRunJitManager::JitTokenToMethodRegionInfo(const METHODTOKEN& MethodT
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
         PRECONDITION(methodRegionInfo != NULL);
     } CONTRACTL_END;
diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h
index 6fe87885da11..3319d3e0c300 100644
--- a/src/coreclr/vm/codeman.h
+++ b/src/coreclr/vm/codeman.h
@@ -69,10 +69,6 @@ Module Name:
 #include "gcinfo.h"
 #include "eexcp.h"
 
-#if defined(FEATURE_EH_FUNCLETS) && !defined(USE_INDIRECT_CODEHEADER)
-#error "FEATURE_EH_FUNCLETS requires USE_INDIRECT_CODEHEADER"
-#endif // FEATURE_EH_FUNCLETS && !USE_INDIRECT_CODEHEADER
-
 class MethodDesc;
 class ICorJitCompiler;
 class IJitManager;
@@ -125,20 +121,10 @@ enum StubCodeBlockKind : int
 // Today CodeHeader is used by the EEJitManager.
 // The GCInfo version is always current GCINFO_VERSION in this header.
 
-#ifdef USE_INDIRECT_CODEHEADER
 typedef DPTR(struct _hpRealCodeHdr) PTR_RealCodeHeader;
 typedef DPTR(struct _hpCodeHdr) PTR_CodeHeader;
 
-#else // USE_INDIRECT_CODEHEADER
-typedef DPTR(struct _hpCodeHdr) PTR_CodeHeader;
-
-#endif // USE_INDIRECT_CODEHEADER
-
-#ifdef USE_INDIRECT_CODEHEADER
 typedef struct _hpRealCodeHdr
-#else // USE_INDIRECT_CODEHEADER
-typedef struct _hpCodeHdr
-#endif // USE_INDIRECT_CODEHEADER
 {
 public:
     PTR_BYTE            phdrDebugInfo;
@@ -160,95 +146,9 @@ typedef struct _hpCodeHdr
 #endif // FEATURE_EH_FUNCLETS
 
 public:
-#ifndef USE_INDIRECT_CODEHEADER
-    //
-    // Note: that the JITted code follows immediately after the MethodDesc*
-    //
-    PTR_BYTE                GetDebugInfo()
-    {
-        SUPPORTS_DAC;
-
-        return phdrDebugInfo;
-    }
-    PTR_EE_ILEXCEPTION      GetEHInfo()
-    {
-        return phdrJitEHInfo;
-    }
-    PTR_BYTE                GetGCInfo()
-    {
-        SUPPORTS_DAC;
-        return phdrJitGCInfo;
-    }
-    PTR_MethodDesc          GetMethodDesc()
-    {
-        SUPPORTS_DAC;
-        return phdrMDesc;
-    }
-#if defined(FEATURE_GDBJIT)
-    VOID*                GetCalledMethods()
-    {
-        SUPPORTS_DAC;
-        return pCalledMethods;
-    }
-#endif
-    TADDR                   GetCodeStartAddress()
-    {
-        SUPPORTS_DAC;
-        return dac_cast<TADDR>(dac_cast<PTR_CodeHeader>(this) + 1);
-    }
-    StubCodeBlockKind       GetStubCodeBlockKind()
-    {
-        SUPPORTS_DAC;
-        return (StubCodeBlockKind)dac_cast<TADDR>(phdrMDesc);
-    }
-    BOOL                    IsStubCodeBlock()
-    {
-        SUPPORTS_DAC;
-        // Note that it is important for this comparison to be unsigned
-        return dac_cast<TADDR>(phdrMDesc) <= (TADDR)STUB_CODE_BLOCK_LAST;
-    }
-
-    void SetDebugInfo(PTR_BYTE pDI)
-    {
-        phdrDebugInfo = pDI;
-    }
-    void SetEHInfo(PTR_EE_ILEXCEPTION pEH)
-    {
-        phdrJitEHInfo = pEH;
-    }
-    void SetGCInfo(PTR_BYTE pGC)
-    {
-        phdrJitGCInfo = pGC;
-    }
-    void SetMethodDesc(PTR_MethodDesc pMD)
-    {
-        phdrMDesc = pMD;
-    }
-#if defined(FEATURE_GDBJIT)
-    void SetCalledMethods(VOID* pCM)
-    {
-        pCalledMethods = pCM;
-    }
-#endif
-    void SetStubCodeBlockKind(StubCodeBlockKind kind)
-    {
-        phdrMDesc = (PTR_MethodDesc)kind;
-    }
-#endif // !USE_INDIRECT_CODEHEADER
-
 // if we're using the indirect codeheaders then all enumeration is done by the code header
-#ifndef USE_INDIRECT_CODEHEADER
-#ifdef DACCESS_COMPILE
-    void EnumMemoryRegions(CLRDataEnumMemoryFlags flags, IJitManager* pJitMan);
-#endif  // DACCESS_COMPILE
-#endif  // USE_INDIRECT_CODEHEADER
-#ifdef USE_INDIRECT_CODEHEADER
 } RealCodeHeader;
-#else // USE_INDIRECT_CODEHEADER
-} CodeHeader;
-#endif // USE_INDIRECT_CODEHEADER
 
-#ifdef USE_INDIRECT_CODEHEADER
 typedef struct _hpCodeHdr
 {
     PTR_RealCodeHeader   pRealCodeHeader;
@@ -355,7 +255,6 @@ typedef struct _hpCodeHdr
 #endif  // DACCESS_COMPILE
 
 } CodeHeader;
-#endif // USE_INDIRECT_CODEHEADER
 
 
 //-----------------------------------------------------------------------------
@@ -1459,7 +1358,12 @@ class RangeSectionMap
                         
                         // This level is completely empty. Free it, and then null out the pointer to it.
                         pointerToLevelData->Uninstall();
+#if defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfree-nonheap-object" // The compiler can't tell that this pointer always comes from a malloc call.
                         free((void*)rawData);
+#pragma GCC diagnostic pop
+#endif
                     }
                 }
 
@@ -1871,9 +1775,7 @@ class EEJitManager : public IJitManager
 
     void                allocCode(MethodDesc* pFD, size_t blockSize, size_t reserveForJumpStubs, CorJitAllocMemFlag flag, CodeHeader** ppCodeHeader, CodeHeader** ppCodeHeaderRW,
                                   size_t* pAllocatedSize, HeapList** ppCodeHeap
-#ifdef USE_INDIRECT_CODEHEADER
                                 , BYTE** ppRealHeader
-#endif
 #ifdef FEATURE_EH_FUNCLETS
                                 , UINT nUnwindInfos
 #endif
@@ -2103,9 +2005,6 @@ class ExecutionManager
     // Returns whether currentPC is in managed code. Returns false for jump stubs on WIN64.
     static BOOL IsManagedCode(PCODE currentPC);
 
-    // Special version with profiler hook
-    static BOOL IsManagedCode(PCODE currentPC, HostCallPreference hostCallPreference, BOOL *pfFailedReaderLock);
-
     // Returns true if currentPC is ready to run codegen
     static BOOL IsReadyToRunCode(PCODE currentPC);
 
@@ -2136,7 +2035,7 @@ class ExecutionManager
     class ReaderLockHolder
     {
     public:
-        ReaderLockHolder(HostCallPreference hostCallPreference = AllowHostCalls);
+        ReaderLockHolder();
         ~ReaderLockHolder();
 
         BOOL Acquired();
@@ -2369,7 +2268,6 @@ inline TADDR EEJitManager::JitTokenToStartAddress(const METHODTOKEN& MethodToken
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     } CONTRACTL_END;
 
@@ -2383,7 +2281,6 @@ inline void EEJitManager::JitTokenToMethodRegionInfo(const METHODTOKEN& MethodTo
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
         PRECONDITION(methodRegionInfo != NULL);
     } CONTRACTL_END;
@@ -2619,21 +2516,20 @@ class EECodeInfo
     PTR_RUNTIME_FUNCTION GetFunctionEntry();
     BOOL        IsFunclet()     { WRAPPER_NO_CONTRACT; return GetJitManager()->IsFunclet(this); }
     EECodeInfo  GetMainFunctionInfo();
-    ULONG               GetFixedStackSize();
-
-#if defined(TARGET_AMD64)
-    BOOL        HasFrameRegister();
-#endif // TARGET_AMD64
+#endif // FEATURE_EH_FUNCLETS
 
-#else // FEATURE_EH_FUNCLETS
+#if defined(TARGET_X86)
     ULONG       GetFixedStackSize()
     {
         WRAPPER_NO_CONTRACT;
         return GetCodeManager()->GetFrameSize(GetGCInfoToken());
     }
-#endif // FEATURE_EH_FUNCLETS
+#endif // TARGET_X86
 
 #if defined(TARGET_AMD64)
+    BOOL        HasFrameRegister();
+    ULONG       GetFixedStackSize();
+
     void         GetOffsetsFromUnwindInfo(ULONG* pRSPOffset, ULONG* pRBPOffset);
     ULONG        GetFrameOffsetFromUnwindInfo();
 #if defined(_DEBUG) && defined(HAVE_GCCOVER)
diff --git a/src/coreclr/vm/codeversion.cpp b/src/coreclr/vm/codeversion.cpp
index cf1aaaea6788..741fe0390bb5 100644
--- a/src/coreclr/vm/codeversion.cpp
+++ b/src/coreclr/vm/codeversion.cpp
@@ -920,7 +920,7 @@ PTR_COR_ILMETHOD ILCodeVersion::GetIL() const
         PTR_MethodDesc pMethodDesc = dac_cast<PTR_MethodDesc>(pModule->LookupMethodDef(GetMethodDef()));
         if (pMethodDesc != NULL)
         {
-            pIL = dac_cast<PTR_COR_ILMETHOD>(pMethodDesc->GetILHeader(TRUE));
+            pIL = dac_cast<PTR_COR_ILMETHOD>(pMethodDesc->GetILHeader());
         }
     }
 
diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp
index 7e7a4de4b02a..ef4021039a66 100644
--- a/src/coreclr/vm/comdelegate.cpp
+++ b/src/coreclr/vm/comdelegate.cpp
@@ -2019,7 +2019,7 @@ void COMDelegate::ThrowIfInvalidUnmanagedCallersOnlyUsage(MethodDesc* pMD)
 
     // Arguments - Scenarios involving UnmanagedCallersOnly are handled during the jit.
     bool unmanagedCallersOnlyRequiresMarshalling = false;
-    if (NDirect::MarshalingRequired(pMD, NULL, NULL, unmanagedCallersOnlyRequiresMarshalling))
+    if (NDirect::MarshalingRequired(pMD, NULL, NULL, NULL, unmanagedCallersOnlyRequiresMarshalling))
         EX_THROW(EEResourceException, (kInvalidProgramException, W("InvalidProgram_NonBlittableTypes")));
 }
 
diff --git a/src/coreclr/vm/comdynamic.cpp b/src/coreclr/vm/comdynamic.cpp
index 4cdecbb43a73..f0d7e5697b45 100644
--- a/src/coreclr/vm/comdynamic.cpp
+++ b/src/coreclr/vm/comdynamic.cpp
@@ -460,7 +460,7 @@ extern "C" void QCALLTYPE TypeBuilder_SetMethodIL(QCall::ModuleHandle pModule,
         // add the starting address of the il blob to the il blob hash table
         // we need to find this information from out of process for debugger inspection
         // APIs so we have to store this information where we can get it later
-        pModule->SetDynamicIL(mdToken(tk), TADDR(startBuf), FALSE);
+        pModule->SetDynamicIL(mdToken(tk), TADDR(startBuf));
 
         DWORD       dwImplFlags;
 
diff --git a/src/coreclr/vm/common.h b/src/coreclr/vm/common.h
index f0edc0f15cd0..8b8ff9e842b3 100644
--- a/src/coreclr/vm/common.h
+++ b/src/coreclr/vm/common.h
@@ -58,7 +58,7 @@
 #include <stdint.h>
 #include <stddef.h>
 #include <winwrap.h>
-
+#include <algorithm>
 
 #include <windef.h>
 #include <winnt.h>
@@ -66,13 +66,16 @@
 #include <wchar.h>
 #include <objbase.h>
 #include <float.h>
-#include <math.h>
+#include <cmath>
 #include <time.h>
 #include <limits.h>
 #include <assert.h>
 
 #include <olectl.h>
 
+using std::max;
+using std::min;
+
 #ifdef _MSC_VER
 //non inline intrinsics are faster
 #pragma function(memcpy,memcmp,strcmp,strcpy,strlen,strcat)
diff --git a/src/coreclr/vm/commtmemberinfomap.cpp b/src/coreclr/vm/commtmemberinfomap.cpp
index 8bc185e9d81a..7b2626a24c76 100644
--- a/src/coreclr/vm/commtmemberinfomap.cpp
+++ b/src/coreclr/vm/commtmemberinfomap.cpp
@@ -689,7 +689,7 @@ void ComMTMemberInfoMap::GetMethodPropsForMeth(
     rProps[ix].bFunction2Getter = FALSE;
 
     // See if there is property information for this member.
-    hr = pMeth->GetModule()->GetPropertyInfoForMethodDef(pMeth->GetMemberDef(), &pd, &pPropName, &uSemantic);
+    hr = pMeth->GetMDImport()->GetPropertyInfoForMethodDef(pMeth->GetMemberDef(), &pd, &pPropName, &uSemantic);
     IfFailThrow(hr);
 
     if (hr == S_OK)
diff --git a/src/coreclr/vm/compatibilityswitch.cpp b/src/coreclr/vm/compatibilityswitch.cpp
deleted file mode 100644
index aedddbc7cbbf..000000000000
--- a/src/coreclr/vm/compatibilityswitch.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-
-#include "common.h"
-#include "clrconfig.h"
-#include "compatibilityswitch.h"
-
-FCIMPL1(StringObject*, CompatibilitySwitch::GetValue, StringObject* switchNameUNSAFE) {
-    CONTRACTL {
-        FCALL_CHECK;
-    }
-    CONTRACTL_END;
-
-    if (!switchNameUNSAFE)
-        FCThrowRes(kArgumentNullException, W("Arg_InvalidSwitchName"));
-
-    STRINGREF name = (STRINGREF) switchNameUNSAFE;
-    VALIDATEOBJECTREF(name);
-
-    STRINGREF refName = NULL;
-
-    HELPER_METHOD_FRAME_BEGIN_RET_1(name);
-    CLRConfig::ConfigStringInfo info;
-    info.name = name->GetBuffer();
-    info.options = CLRConfig::LookupOptions::Default;
-    LPWSTR strVal = CLRConfig::GetConfigValue(info);
-    refName = StringObject::NewString(strVal);
-    HELPER_METHOD_FRAME_END();
-
-    return (StringObject*)OBJECTREFToObject(refName);
-}
-FCIMPLEND
diff --git a/src/coreclr/vm/compatibilityswitch.h b/src/coreclr/vm/compatibilityswitch.h
deleted file mode 100644
index bd291ee3e442..000000000000
--- a/src/coreclr/vm/compatibilityswitch.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-
-
-#ifndef _COMPATIBILITYSWITCH_H_
-#define _COMPATIBILITYSWITCH_H_
-
-#include "object.h"
-#include "typehandle.h"
-#include "fcall.h"
-#include "field.h"
-#include "typectxt.h"
-
-class CompatibilitySwitch
-{
-public:
-    static FCDECL1(StringObject*, GetValue, StringObject *switchNameUNSAFE);
-};
-
-
-#endif
-
diff --git a/src/coreclr/vm/comsynchronizable.h b/src/coreclr/vm/comsynchronizable.h
index 15610b0b3a24..4d14461602d5 100644
--- a/src/coreclr/vm/comsynchronizable.h
+++ b/src/coreclr/vm/comsynchronizable.h
@@ -59,7 +59,7 @@ friend class ThreadBaseObject;
     static FCDECL1(void,    Initialize,        ThreadBaseObject* pThisUNSAFE);
     static FCDECL1(FC_BOOL_RET, GetIsBackground,  ThreadBaseObject* pThisUNSAFE);
     static FCDECL1(INT32,   GetThreadState,    ThreadBaseObject* pThisUNSAFE);
-    static FCDECL1(INT32,   GetThreadContext,  ThreadBaseObject* pThisUNSAFE);
+
 #ifdef FEATURE_COMINTEROP_APARTMENT_SUPPORT
     static FCDECL1(INT32,   GetApartmentState, ThreadBaseObject* pThis);
     static FCDECL2(INT32,   SetApartmentState, ThreadBaseObject* pThisUNSAFE, INT32 iState);
diff --git a/src/coreclr/vm/comutilnative.cpp b/src/coreclr/vm/comutilnative.cpp
index 027c4ae8903a..a3c9d0a848cd 100644
--- a/src/coreclr/vm/comutilnative.cpp
+++ b/src/coreclr/vm/comutilnative.cpp
@@ -1463,22 +1463,6 @@ NOINLINE void GCInterface::GarbageCollectModeAny(int generation)
 
 #include <optsmallperfcritical.h>
 
-FCIMPL2(FC_UINT8_RET,COMInterlocked::Exchange8, UINT8 *location, UINT8 value)
-{
-    FCALL_CONTRACT;
-
-    return (UINT8)InterlockedExchange8((CHAR *) location, (CHAR)value);
-}
-FCIMPLEND
-
-FCIMPL2(FC_INT16_RET,COMInterlocked::Exchange16, INT16 *location, INT16 value)
-{
-    FCALL_CONTRACT;
-
-    return InterlockedExchange16((SHORT *) location, value);
-}
-FCIMPLEND
-
 FCIMPL2(INT32,COMInterlocked::Exchange32, INT32 *location, INT32 value)
 {
     FCALL_CONTRACT;
@@ -1495,22 +1479,6 @@ FCIMPL2_IV(INT64,COMInterlocked::Exchange64, INT64 *location, INT64 value)
 }
 FCIMPLEND
 
-FCIMPL3(FC_UINT8_RET, COMInterlocked::CompareExchange8, UINT8* location, UINT8 value, UINT8 comparand)
-{
-    FCALL_CONTRACT;
-
-    return (UINT8)InterlockedCompareExchange8((CHAR*)location, (CHAR)value, (CHAR)comparand);
-}
-FCIMPLEND
-
-FCIMPL3(FC_INT16_RET, COMInterlocked::CompareExchange16, INT16* location, INT16 value, INT16 comparand)
-{
-    FCALL_CONTRACT;
-
-    return InterlockedCompareExchange16((SHORT*)location, value, comparand);
-}
-FCIMPLEND
-
 FCIMPL3(INT32, COMInterlocked::CompareExchange32, INT32* location, INT32 value, INT32 comparand)
 {
     FCALL_CONTRACT;
@@ -1681,226 +1649,136 @@ BOOL CanCompareBitsOrUseFastGetHashCode(MethodTable* mt)
     return canCompareBitsOrUseFastGetHashCode;
 }
 
-NOINLINE static FC_BOOL_RET CanCompareBitsHelper(MethodTable* mt, OBJECTREF objRef)
+extern "C" BOOL QCALLTYPE MethodTable_CanCompareBitsOrUseFastGetHashCode(MethodTable * mt)
 {
-    FC_INNER_PROLOG(ValueTypeHelper::CanCompareBits);
-
-    _ASSERTE(mt != NULL);
-    _ASSERTE(objRef != NULL);
+    QCALL_CONTRACT;
 
     BOOL ret = FALSE;
 
-    HELPER_METHOD_FRAME_BEGIN_RET_ATTRIB_1(Frame::FRAME_ATTR_EXACT_DEPTH|Frame::FRAME_ATTR_CAPTURE_DEPTH_2, objRef);
+    BEGIN_QCALL;
 
     ret = CanCompareBitsOrUseFastGetHashCode(mt);
 
-    HELPER_METHOD_FRAME_END();
-    FC_INNER_EPILOG();
-
-    FC_RETURN_BOOL(ret);
-}
-
-// Return true if the valuetype does not contain pointer, is tightly packed,
-// does not have floating point number field and does not override Equals method.
-FCIMPL1(FC_BOOL_RET, ValueTypeHelper::CanCompareBits, Object* obj)
-{
-    FCALL_CONTRACT;
-
-    _ASSERTE(obj != NULL);
-    MethodTable* mt = obj->GetMethodTable();
-
-    if (mt->HasCheckedCanCompareBitsOrUseFastGetHashCode())
-    {
-        FC_RETURN_BOOL(mt->CanCompareBitsOrUseFastGetHashCode());
-    }
-
-    OBJECTREF objRef(obj);
+    END_QCALL;
 
-    FC_INNER_RETURN(FC_BOOL_RET, CanCompareBitsHelper(mt, objRef));
+    return ret;
 }
-FCIMPLEND
 
-static INT32 FastGetValueTypeHashCodeHelper(MethodTable *mt, void *pObjRef)
+enum ValueTypeHashCodeStrategy
 {
-    CONTRACTL
-    {
-        NOTHROW;
-        GC_NOTRIGGER;
-        MODE_COOPERATIVE;
-    } CONTRACTL_END;
+    None,
+    ReferenceField,
+    DoubleField,
+    SingleField,
+    FastGetHashCode,
+    ValueTypeOverride,
+};
 
-    INT32 hashCode = 0;
-    INT32 *pObj = (INT32*)pObjRef;
-
-    // this is a struct with no refs and no "strange" offsets, just go through the obj and xor the bits
-    INT32 size = mt->GetNumInstanceFieldBytes();
-    for (INT32 i = 0; i < (INT32)(size / sizeof(INT32)); i++)
-        hashCode ^= *pObj++;
-
-    return hashCode;
-}
-
-static INT32 RegularGetValueTypeHashCode(MethodTable *mt, void *pObjRef)
+static ValueTypeHashCodeStrategy GetHashCodeStrategy(MethodTable* mt, QCall::ObjectHandleOnStack objHandle, UINT32* fieldOffset, UINT32* fieldSize, MethodTable** fieldMTOut)
 {
     CONTRACTL
     {
         THROWS;
         GC_TRIGGERS;
-        MODE_COOPERATIVE;
+        MODE_PREEMPTIVE;
     } CONTRACTL_END;
 
-    INT32 hashCode = 0;
+    // Should be handled by caller
+    _ASSERTE(!mt->CanCompareBitsOrUseFastGetHashCode());
 
-    GCPROTECT_BEGININTERIOR(pObjRef);
+    ValueTypeHashCodeStrategy ret = ValueTypeHashCodeStrategy::None;
 
-    BOOL canUseFastGetHashCodeHelper = FALSE;
-    if (mt->HasCheckedCanCompareBitsOrUseFastGetHashCode())
-    {
-        canUseFastGetHashCodeHelper = mt->CanCompareBitsOrUseFastGetHashCode();
-    }
-    else
-    {
-        canUseFastGetHashCodeHelper = CanCompareBitsOrUseFastGetHashCode(mt);
-    }
+    // Grab the first non-null field and return its hash code or 'it' as hash code
+    ApproxFieldDescIterator fdIterator(mt, ApproxFieldDescIterator::INSTANCE_FIELDS);
 
-    // While we should not get here directly from ValueTypeHelper::GetHashCode, if we recurse we need to
-    // be able to handle getting the hashcode for an embedded structure whose hashcode is computed by the fast path.
-    if (canUseFastGetHashCodeHelper)
-    {
-        hashCode = FastGetValueTypeHashCodeHelper(mt, pObjRef);
-    }
-    else
+    FieldDesc *field;
+    while ((field = fdIterator.Next()) != NULL)
     {
-        // it's looking ugly so we'll use the old behavior in managed code. Grab the first non-null
-        // field and return its hash code or 'it' as hash code
-        // <TODO> Note that the old behavior has already been broken for value types
-        //              that is qualified for CanUseFastGetHashCodeHelper. So maybe we should
-        //              change the implementation here to use all fields instead of just the 1st one.
-        // </TODO>
-        //
-        // <TODO> check this approximation - we may be losing exact type information </TODO>
-        ApproxFieldDescIterator fdIterator(mt, ApproxFieldDescIterator::INSTANCE_FIELDS);
-
-        FieldDesc *field;
-        while ((field = fdIterator.Next()) != NULL)
+        _ASSERTE(!field->IsRVA());
+        if (field->IsObjRef())
         {
-            _ASSERTE(!field->IsRVA());
-            if (field->IsObjRef())
+            GCX_COOP();
+            // if we get an object reference we get the hash code out of that
+            if (*(Object**)((BYTE *)objHandle.Get()->UnBox() + *fieldOffset + field->GetOffsetUnsafe()) != NULL)
             {
-                // if we get an object reference we get the hash code out of that
-                if (*(Object**)((BYTE *)pObjRef + field->GetOffsetUnsafe()) != NULL)
-                {
-                    PREPARE_SIMPLE_VIRTUAL_CALLSITE(METHOD__OBJECT__GET_HASH_CODE, (*(Object**)((BYTE *)pObjRef + field->GetOffsetUnsafe())));
-                    DECLARE_ARGHOLDER_ARRAY(args, 1);
-                    args[ARGNUM_0] = PTR_TO_ARGHOLDER(*(Object**)((BYTE *)pObjRef + field->GetOffsetUnsafe()));
-                    CALL_MANAGED_METHOD(hashCode, INT32, args);
-                }
-                else
-                {
-                    // null object reference, try next
-                    continue;
-                }
+                *fieldOffset += field->GetOffsetUnsafe();
+                ret = ValueTypeHashCodeStrategy::ReferenceField;
             }
             else
             {
-                CorElementType fieldType = field->GetFieldType();
-                if (fieldType == ELEMENT_TYPE_R8)
-                {
-                    PREPARE_NONVIRTUAL_CALLSITE(METHOD__DOUBLE__GET_HASH_CODE);
-                    DECLARE_ARGHOLDER_ARRAY(args, 1);
-                    args[ARGNUM_0] = PTR_TO_ARGHOLDER(((BYTE *)pObjRef + field->GetOffsetUnsafe()));
-                    CALL_MANAGED_METHOD(hashCode, INT32, args);
-                }
-                else if (fieldType == ELEMENT_TYPE_R4)
+                // null object reference, try next
+                continue;
+            }
+        }
+        else
+        {
+            CorElementType fieldType = field->GetFieldType();
+            if (fieldType == ELEMENT_TYPE_R8)
+            {
+                *fieldOffset += field->GetOffsetUnsafe();
+                ret = ValueTypeHashCodeStrategy::DoubleField;
+            }
+            else if (fieldType == ELEMENT_TYPE_R4)
+            {
+                *fieldOffset += field->GetOffsetUnsafe();
+                ret = ValueTypeHashCodeStrategy::SingleField;
+            }
+            else if (fieldType != ELEMENT_TYPE_VALUETYPE)
+            {
+                *fieldOffset += field->GetOffsetUnsafe();
+                *fieldSize = field->LoadSize();
+                ret = ValueTypeHashCodeStrategy::FastGetHashCode;
+            }
+            else
+            {
+                // got another value type. Get the type
+                TypeHandle fieldTH = field->GetFieldTypeHandleThrowing();
+                _ASSERTE(!fieldTH.IsNull());
+                MethodTable* fieldMT = fieldTH.GetMethodTable();
+                if (CanCompareBitsOrUseFastGetHashCode(fieldMT))
                 {
-                    PREPARE_NONVIRTUAL_CALLSITE(METHOD__SINGLE__GET_HASH_CODE);
-                    DECLARE_ARGHOLDER_ARRAY(args, 1);
-                    args[ARGNUM_0] = PTR_TO_ARGHOLDER(((BYTE *)pObjRef + field->GetOffsetUnsafe()));
-                    CALL_MANAGED_METHOD(hashCode, INT32, args);
+                    *fieldOffset += field->GetOffsetUnsafe();
+                    *fieldSize = field->LoadSize();
+                    ret = ValueTypeHashCodeStrategy::FastGetHashCode;
                 }
-                else if (fieldType != ELEMENT_TYPE_VALUETYPE)
+                else if (HasOverriddenMethod(fieldMT,
+                                             CoreLibBinder::GetClass(CLASS__VALUE_TYPE),
+                                             CoreLibBinder::GetMethod(METHOD__VALUE_TYPE__GET_HASH_CODE)->GetSlot()))
                 {
-                    UINT fieldSize = field->LoadSize();
-                    INT32 *pValue = (INT32*)((BYTE *)pObjRef + field->GetOffsetUnsafe());
-                    for (INT32 j = 0; j < (INT32)(fieldSize / sizeof(INT32)); j++)
-                        hashCode ^= *pValue++;
+                    *fieldOffset += field->GetOffsetUnsafe();
+                    *fieldMTOut = fieldMT;
+                    ret = ValueTypeHashCodeStrategy::ValueTypeOverride;
                 }
                 else
                 {
-                    // got another value type. Get the type
-                    TypeHandle fieldTH = field->GetFieldTypeHandleThrowing();
-                    _ASSERTE(!fieldTH.IsNull());
-                    hashCode = RegularGetValueTypeHashCode(fieldTH.GetMethodTable(), (BYTE *)pObjRef + field->GetOffsetUnsafe());
+                    *fieldOffset += field->GetOffsetUnsafe();
+                    ret = GetHashCodeStrategy(fieldMT, objHandle, fieldOffset, fieldSize, fieldMTOut);
                 }
             }
-            break;
         }
+        break;
     }
 
-    GCPROTECT_END();
-
-    return hashCode;
+    return ret;
 }
 
-// The default implementation of GetHashCode() for all value types.
-// Note that this implementation reveals the value of the fields.
-// So if the value type contains any sensitive information it should
-// implement its own GetHashCode().
-FCIMPL1(INT32, ValueTypeHelper::GetHashCode, Object* objUNSAFE)
+extern "C" INT32 QCALLTYPE ValueType_GetHashCodeStrategy(MethodTable* mt, QCall::ObjectHandleOnStack objHandle, UINT32* fieldOffset, UINT32* fieldSize, MethodTable** fieldMT)
 {
-    FCALL_CONTRACT;
-
-    if (objUNSAFE == NULL)
-        FCThrow(kNullReferenceException);
-
-    OBJECTREF obj = ObjectToOBJECTREF(objUNSAFE);
-    VALIDATEOBJECTREF(obj);
+    QCALL_CONTRACT;
 
-    INT32 hashCode = 0;
-    MethodTable *pMT = objUNSAFE->GetMethodTable();
+    ValueTypeHashCodeStrategy ret = ValueTypeHashCodeStrategy::None;
+    *fieldOffset = 0;
+    *fieldSize = 0;
+    *fieldMT = NULL;
 
-    // We don't want to expose the method table pointer in the hash code
-    // Let's use the typeID instead.
-    UINT32 typeID = pMT->LookupTypeID();
-    if (typeID == TypeIDProvider::INVALID_TYPE_ID)
-    {
-        // If the typeID has yet to be generated, fall back to GetTypeID
-        // This only needs to be done once per MethodTable
-        HELPER_METHOD_FRAME_BEGIN_RET_1(obj);
-        typeID = pMT->GetTypeID();
-        HELPER_METHOD_FRAME_END();
-    }
-
-    // To get less colliding and more evenly distributed hash codes,
-    // we munge the class index with two big prime numbers
-    hashCode = typeID * 711650207 + 2506965631U;
+    BEGIN_QCALL;
 
-    BOOL canUseFastGetHashCodeHelper = FALSE;
-    if (pMT->HasCheckedCanCompareBitsOrUseFastGetHashCode())
-    {
-        canUseFastGetHashCodeHelper = pMT->CanCompareBitsOrUseFastGetHashCode();
-    }
-    else
-    {
-        HELPER_METHOD_FRAME_BEGIN_RET_1(obj);
-        canUseFastGetHashCodeHelper = CanCompareBitsOrUseFastGetHashCode(pMT);
-        HELPER_METHOD_FRAME_END();
-    }
+    ret = GetHashCodeStrategy(mt, objHandle, fieldOffset, fieldSize, fieldMT);
 
-    if (canUseFastGetHashCodeHelper)
-    {
-        hashCode ^= FastGetValueTypeHashCodeHelper(pMT, obj->UnBox());
-    }
-    else
-    {
-        HELPER_METHOD_FRAME_BEGIN_RET_1(obj);
-        hashCode ^= RegularGetValueTypeHashCode(pMT, obj->UnBox());
-        HELPER_METHOD_FRAME_END();
-    }
+    END_QCALL;
 
-    return hashCode;
+    return ret;
 }
-FCIMPLEND
 
 FCIMPL1(UINT32, MethodTableNative::GetNumInstanceFieldBytes, MethodTable* mt)
 {
diff --git a/src/coreclr/vm/comutilnative.h b/src/coreclr/vm/comutilnative.h
index 80d35da7b721..3e64207564c8 100644
--- a/src/coreclr/vm/comutilnative.h
+++ b/src/coreclr/vm/comutilnative.h
@@ -229,12 +229,8 @@ extern "C" uint64_t QCALLTYPE GCInterface_GetGenerationBudget(int generation);
 class COMInterlocked
 {
 public:
-        static FCDECL2(FC_UINT8_RET, Exchange8, UINT8 *location, UINT8 value);
-        static FCDECL2(FC_INT16_RET, Exchange16, INT16 *location, INT16 value);
         static FCDECL2(INT32, Exchange32, INT32 *location, INT32 value);
         static FCDECL2_IV(INT64, Exchange64, INT64 *location, INT64 value);
-        static FCDECL3(FC_UINT8_RET, CompareExchange8, UINT8* location, UINT8 value, UINT8 comparand);
-        static FCDECL3(FC_INT16_RET, CompareExchange16, INT16* location, INT16 value, INT16 comparand);
         static FCDECL3(INT32, CompareExchange32, INT32* location, INT32 value, INT32 comparand);
         static FCDECL3_IVV(INT64, CompareExchange64, INT64* location, INT64 value, INT64 comparand);
         static FCDECL2(LPVOID, ExchangeObject, LPVOID* location, LPVOID value);
@@ -245,18 +241,14 @@ class COMInterlocked
 
 extern "C" void QCALLTYPE Interlocked_MemoryBarrierProcessWide();
 
-class ValueTypeHelper {
-public:
-    static FCDECL1(FC_BOOL_RET, CanCompareBits, Object* obj);
-    static FCDECL1(INT32, GetHashCode, Object* objRef);
-};
-
 class MethodTableNative {
 public:
     static FCDECL1(UINT32, GetNumInstanceFieldBytes, MethodTable* mt);
 };
 
 extern "C" BOOL QCALLTYPE MethodTable_AreTypesEquivalent(MethodTable* mta, MethodTable* mtb);
+extern "C" BOOL QCALLTYPE MethodTable_CanCompareBitsOrUseFastGetHashCode(MethodTable* mt);
+extern "C" INT32 QCALLTYPE ValueType_GetHashCodeStrategy(MethodTable* mt, QCall::ObjectHandleOnStack objHandle, UINT32* fieldOffset, UINT32* fieldSize, MethodTable** fieldMT);
 
 class StreamNative {
 public:
diff --git a/src/coreclr/vm/corelib.cpp b/src/coreclr/vm/corelib.cpp
index c4b0b43450e5..c0a217593adc 100644
--- a/src/coreclr/vm/corelib.cpp
+++ b/src/coreclr/vm/corelib.cpp
@@ -29,7 +29,6 @@
 #include "floatdouble.h"
 #include "floatsingle.h"
 #include "comdatetime.h"
-#include "compatibilityswitch.h"
 #include "debugdebugger.h"
 #include "assemblynative.hpp"
 #include "comthreadpool.h"
diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h
index ffc2badf3e80..c14fc7a69ecb 100644
--- a/src/coreclr/vm/corelib.h
+++ b/src/coreclr/vm/corelib.h
@@ -188,9 +188,6 @@ DEFINE_METHOD(LICENSE_INTEROP_PROXY, SAVEKEYINCURRENTCONTEXT, SaveKeyInCurrentCo
 #endif // FEATURE_COMINTEROP
 END_ILLINK_FEATURE_SWITCH()
 
-DEFINE_CLASS_U(Interop,                CriticalHandle,             CriticalHandle)
-DEFINE_FIELD_U(handle,                     CriticalHandle,     m_handle)
-DEFINE_FIELD_U(_isClosed,                  CriticalHandle,     m_isClosed)
 DEFINE_CLASS(CRITICAL_HANDLE,       Interop,                CriticalHandle)
 DEFINE_FIELD(CRITICAL_HANDLE,       HANDLE,                 handle)
 DEFINE_METHOD(CRITICAL_HANDLE,      RELEASE_HANDLE,         ReleaseHandle,              IM_RetBool)
@@ -262,6 +259,12 @@ DEFINE_METHOD(DELEGATE,             GET_INVOKE_METHOD,      GetInvokeMethod,
 DEFINE_CLASS(INT128,               System,                 Int128)
 DEFINE_CLASS(UINT128,              System,                 UInt128)
 
+DEFINE_CLASS(MATH,                  System,                 Math)
+DEFINE_METHOD(MATH,                 ROUND,                  Round,                      SM_Dbl_RetDbl)
+
+DEFINE_CLASS(MATHF,                 System,                 MathF)
+DEFINE_METHOD(MATHF,                ROUND,                  Round,                      SM_Flt_RetFlt)
+
 DEFINE_CLASS(DYNAMICMETHOD,         ReflectionEmit,         DynamicMethod)
 
 DEFINE_CLASS(DYNAMICRESOLVER,       ReflectionEmit,         DynamicResolver)
@@ -337,13 +340,13 @@ DEFINE_FIELD(RT_TYPE_HANDLE,        M_TYPE,                 m_type)
 DEFINE_CLASS(TYPE_NAME_PARSER,      Reflection,             TypeNameParser)
 DEFINE_METHOD(TYPE_NAME_PARSER,     GET_TYPE_HELPER,        GetTypeHelper,              SM_Type_CharPtr_RuntimeAssembly_Bool_Bool_RetRuntimeType)
 
-DEFINE_CLASS_U(Reflection,             RtFieldInfo,         NoClass)
-DEFINE_FIELD_U(m_fieldHandle,              ReflectFieldObject, m_pFD)
+DEFINE_CLASS_U(Reflection,          RtFieldInfo,            NoClass)
+DEFINE_FIELD_U(m_fieldHandle,       ReflectFieldObject,     m_pFD)
 DEFINE_CLASS(RT_FIELD_INFO,         Reflection,             RtFieldInfo)
 DEFINE_FIELD(RT_FIELD_INFO,         HANDLE,                 m_fieldHandle)
 
-DEFINE_CLASS_U(System,                 RuntimeFieldInfoStub,       ReflectFieldObject)
-DEFINE_FIELD_U(m_fieldHandle,              ReflectFieldObject, m_pFD)
+DEFINE_CLASS_U(System,              RuntimeFieldInfoStub,   ReflectFieldObject)
+DEFINE_FIELD_U(m_fieldHandle,       ReflectFieldObject,     m_pFD)
 DEFINE_CLASS(STUBFIELDINFO,         System,                 RuntimeFieldInfoStub)
 #if FOR_ILLINK
 DEFINE_METHOD(STUBFIELDINFO,        CTOR,                   .ctor,                      IM_RetVoid)
@@ -569,12 +572,6 @@ DEFINE_METHOD(OBJECT,               GET_TYPE,               GetType,
 DEFINE_METHOD(OBJECT,               GET_HASH_CODE,          GetHashCode,                IM_RetInt)
 DEFINE_METHOD(OBJECT,               EQUALS,                 Equals,                     IM_Obj_RetBool)
 
-// DEFINE_CLASS(DOUBLE,                System,                 Double)
-DEFINE_METHOD(DOUBLE,               GET_HASH_CODE,          GetHashCode, IM_RetInt)
-
-// DEFINE_CLASS(SINGLE,                System,                 Single)
-DEFINE_METHOD(SINGLE,               GET_HASH_CODE,          GetHashCode, IM_RetInt)
-
 DEFINE_CLASS(__CANON,              System,                 __Canon)
 
 BEGIN_ILLINK_FEATURE_SWITCH(System.Runtime.InteropServices.BuiltInComInterop.IsSupported, true, true)
@@ -639,6 +636,11 @@ DEFINE_METHOD(RUNTIME_HELPERS,      ALLOC_TAILCALL_ARG_BUFFER, AllocTailCallArgB
 DEFINE_METHOD(RUNTIME_HELPERS,      GET_TAILCALL_INFO,      GetTailCallInfo, NoSig)
 DEFINE_METHOD(RUNTIME_HELPERS,      DISPATCH_TAILCALLS,     DispatchTailCalls,          NoSig)
 
+DEFINE_CLASS(SPAN_HELPERS,          System,                 SpanHelpers)
+DEFINE_METHOD(SPAN_HELPERS,         MEMSET,                 Fill, SM_RefByte_Byte_UIntPtr_RetVoid)
+DEFINE_METHOD(SPAN_HELPERS,         MEMZERO,                ClearWithoutReferences, SM_RefByte_UIntPtr_RetVoid)
+DEFINE_METHOD(SPAN_HELPERS,         MEMCOPY,                Memmove, SM_RefByte_RefByte_UIntPtr_RetVoid)
+
 DEFINE_CLASS(UNSAFE,                CompilerServices,       Unsafe)
 DEFINE_METHOD(UNSAFE,               AS_POINTER,             AsPointer, NoSig)
 DEFINE_METHOD(UNSAFE,               BYREF_IS_NULL,          IsNullRef, NoSig)
@@ -726,12 +728,6 @@ DEFINE_CLASS(CALLCONV_SUPPRESSGCTRANSITION,  CompilerServices,       CallConvSup
 DEFINE_CLASS(CALLCONV_MEMBERFUNCTION,        CompilerServices,       CallConvMemberFunction)
 DEFINE_CLASS(CALLCONV_SWIFT,                 CompilerServices,       CallConvSwift)
 
-DEFINE_CLASS_U(Interop,                SafeHandle,         SafeHandle)
-DEFINE_FIELD_U(_ctorStackTrace,            SafeHandle,            m_ctorStackTrace)
-DEFINE_FIELD_U(handle,                     SafeHandle,            m_handle)
-DEFINE_FIELD_U(_state,                     SafeHandle,            m_state)
-DEFINE_FIELD_U(_ownsHandle,                SafeHandle,            m_ownsHandle)
-DEFINE_FIELD_U(_fullyInitialized,          SafeHandle,            m_fullyInitialized)
 DEFINE_CLASS(SAFE_HANDLE,         Interop,                SafeHandle)
 DEFINE_FIELD(SAFE_HANDLE,           HANDLE,                 handle)
 DEFINE_METHOD(SAFE_HANDLE,          GET_IS_INVALID,         get_IsInvalid,              IM_RetBool)
@@ -890,9 +886,6 @@ DEFINE_FIELD_U(_taggedHandle,       WeakReferenceObject,    m_taggedHandle)
 DEFINE_CLASS(WEAKREFERENCE,         System,                 WeakReference)
 DEFINE_CLASS(WEAKREFERENCEGENERIC,  System,                 WeakReference`1)
 
-DEFINE_CLASS_U(Threading,           WaitHandle,             WaitHandleBase)
-DEFINE_FIELD_U(_waitHandle,         WaitHandleBase,         m_safeHandle)
-
 DEFINE_CLASS(DEBUGGER,              Diagnostics,            Debugger)
 DEFINE_METHOD(DEBUGGER,             BREAK,                  Break,                  SM_RetVoid)
 
@@ -1041,6 +1034,21 @@ DEFINE_METHOD(HANDLE_MARSHALER,          CONVERT_SAFEHANDLE_TO_NATIVE,ConvertSaf
 DEFINE_METHOD(HANDLE_MARSHALER,          THROW_SAFEHANDLE_FIELD_CHANGED, ThrowSafeHandleFieldChanged, SM_RetVoid)
 DEFINE_METHOD(HANDLE_MARSHALER,          THROW_CRITICALHANDLE_FIELD_CHANGED, ThrowCriticalHandleFieldChanged, SM_RetVoid)
 
+#ifdef TARGET_WINDOWS
+#ifdef TARGET_X86
+DEFINE_CLASS(COPY_CONSTRUCTOR_CHAIN, StubHelpers,                 CopyConstructorChain)
+DEFINE_METHOD(COPY_CONSTRUCTOR_CHAIN, EXECUTE_CURRENT_COPIES_AND_GET_TARGET, ExecuteCurrentCopiesAndGetTarget, SM_PtrVoid_RetPtrVoid)
+DEFINE_METHOD(COPY_CONSTRUCTOR_CHAIN, INSTALL,                    Install,               IM_PtrVoid_RetVoid)
+DEFINE_METHOD(COPY_CONSTRUCTOR_CHAIN, ADD,                        Add,                   IM_PtrCopyConstructorCookie_RetVoid)
+
+DEFINE_CLASS(COPY_CONSTRUCTOR_COOKIE, StubHelpers, CopyConstructorCookie)
+DEFINE_FIELD(COPY_CONSTRUCTOR_COOKIE, SOURCE, m_source)
+DEFINE_FIELD(COPY_CONSTRUCTOR_COOKIE, DESTINATION_OFFSET, m_destinationOffset)
+DEFINE_FIELD(COPY_CONSTRUCTOR_COOKIE, COPY_CONSTRUCTOR, m_copyConstructor)
+DEFINE_FIELD(COPY_CONSTRUCTOR_COOKIE, DESTRUCTOR, m_destructor)
+#endif // TARGET_X86
+#endif // TARGET_WINDOWS
+
 DEFINE_CLASS(COMVARIANT,            Marshalling,            ComVariant)
 
 DEFINE_CLASS(SZARRAYHELPER,         System,                        SZArrayHelper)
@@ -1152,8 +1160,8 @@ DEFINE_METHOD(CASTHELPERS, CHKCASTINTERFACE, ChkCastInterface,            SM_Ptr
 DEFINE_METHOD(CASTHELPERS, CHKCASTCLASS,     ChkCastClass,                SM_PtrVoid_Obj_RetObj)
 DEFINE_METHOD(CASTHELPERS, CHKCASTCLASSSPECIAL, ChkCastClassSpecial,      SM_PtrVoid_Obj_RetObj)
 DEFINE_METHOD(CASTHELPERS, UNBOX,            Unbox,                       SM_PtrVoid_Obj_RetRefByte)
-DEFINE_METHOD(CASTHELPERS, STELEMREF,        StelemRef,                   SM_Array_IntPtr_Obj_RetVoid)
-DEFINE_METHOD(CASTHELPERS, LDELEMAREF,       LdelemaRef,                  SM_Array_IntPtr_PtrVoid_RetRefObj)
+DEFINE_METHOD(CASTHELPERS, STELEMREF,        StelemRef,                   SM_ArrObject_IntPtr_Obj_RetVoid)
+DEFINE_METHOD(CASTHELPERS, LDELEMAREF,       LdelemaRef,                  SM_ArrObject_IntPtr_PtrVoid_RetRefObj)
 
 #ifdef FEATURE_EH_FUNCLETS
 DEFINE_CLASS(EH, Runtime, EH)
diff --git a/src/coreclr/vm/crst.h b/src/coreclr/vm/crst.h
index 92438cf1d66a..5928812dc49f 100644
--- a/src/coreclr/vm/crst.h
+++ b/src/coreclr/vm/crst.h
@@ -103,7 +103,6 @@ extern DWORD g_fEEShutDown;
 #endif
 // Total count of Crst lock  of the type (Shutdown) that are currently in use
 extern Volatile<LONG> g_ShutdownCrstUsageCount;
-extern Volatile<LONG> g_fForbidEnterEE;
 
 // The CRST.
 class CrstBase
diff --git a/src/coreclr/vm/customattribute.cpp b/src/coreclr/vm/customattribute.cpp
index b86ca79db928..efa355602c90 100644
--- a/src/coreclr/vm/customattribute.cpp
+++ b/src/coreclr/vm/customattribute.cpp
@@ -326,8 +326,7 @@ static HRESULT ParseCaNamedArgs(
     return hr;
 }
 
-/*static*/
-HRESULT Attribute::ParseAttributeArgumentValues(
+HRESULT Attribute::ParseArgumentValues(
     void* pCa,
     INT32 cCa,
     CaValueArrayFactory* pCaValueArrayFactory,
@@ -349,340 +348,47 @@ HRESULT Attribute::ParseAttributeArgumentValues(
     return hr;
 }
 
-FCIMPL6(LPVOID, COMCustomAttribute::CreateCaObject, ReflectModuleBaseObject* pAttributedModuleUNSAFE, ReflectClassBaseObject* pCaTypeUNSAFE, ReflectMethodObject *pMethodUNSAFE, BYTE** ppBlob, BYTE* pEndBlob, INT32* pcNamedArgs)
+// retrieve the string size in a CA blob. Advance the blob pointer to point to
+// the beginning of the string immediately following the size
+static int GetStringSize(BYTE **pBlob, const BYTE *endBlob)
 {
-    FCALL_CONTRACT;
-
-    struct
-    {
-        REFLECTCLASSBASEREF refCaType;
-        OBJECTREF ca;
-        REFLECTMETHODREF refCtor;
-        REFLECTMODULEBASEREF refAttributedModule;
-    } gc;
-    gc.refCaType = (REFLECTCLASSBASEREF)ObjectToOBJECTREF(pCaTypeUNSAFE);
-    TypeHandle th = gc.refCaType->GetType();
-
-    gc.ca = NULL;
-    gc.refCtor = (REFLECTMETHODREF)ObjectToOBJECTREF(pMethodUNSAFE);
-    gc.refAttributedModule = (REFLECTMODULEBASEREF)ObjectToOBJECTREF(pAttributedModuleUNSAFE);
-
-    if(gc.refAttributedModule == NULL)
-        FCThrowRes(kArgumentNullException, W("Arg_InvalidHandle"));
-
-    MethodDesc* pCtorMD = gc.refCtor->GetMethod();
-
-    HELPER_METHOD_FRAME_BEGIN_RET_PROTECT(gc);
+    CONTRACTL
     {
-        MethodDescCallSite ctorCallSite(pCtorMD, th);
-        MetaSig* pSig = ctorCallSite.GetMetaSig();
-        BYTE* pBlob = *ppBlob;
-
-        // get the number of arguments and allocate an array for the args
-        ARG_SLOT *args = NULL;
-        UINT cArgs = pSig->NumFixedArgs() + 1; // make room for the this pointer
-        UINT i = 1; // used to flag that we actually get the right number of arg from the blob
-
-        args = (ARG_SLOT*)_alloca(cArgs * sizeof(ARG_SLOT));
-        memset((void*)args, 0, cArgs * sizeof(ARG_SLOT));
-
-        OBJECTREF *argToProtect = (OBJECTREF*)_alloca(cArgs * sizeof(OBJECTREF));
-        memset((void*)argToProtect, 0, cArgs * sizeof(OBJECTREF));
-
-        // load the this pointer
-        argToProtect[0] = gc.refCaType->GetType().GetMethodTable()->Allocate(); // this is the value to return after the ctor invocation
-
-        if (pBlob)
-        {
-            if (pBlob < pEndBlob)
-            {
-                if (pBlob + 2 > pEndBlob)
-                {
-                    COMPlusThrow(kCustomAttributeFormatException);
-                }
-                INT16 prolog = GET_UNALIGNED_VAL16(pBlob);
-                if (prolog != 1)
-                    COMPlusThrow(kCustomAttributeFormatException);
-                pBlob += 2;
-            }
-
-            if (cArgs > 1)
-            {
-                GCPROTECT_ARRAY_BEGIN(*argToProtect, cArgs);
-                {
-                    // loop through the args
-                    for (i = 1; i < cArgs; i++) {
-                        CorElementType type = pSig->NextArg();
-                        if (type == ELEMENT_TYPE_END)
-                            break;
-                        BOOL bObjectCreated = FALSE;
-                        TypeHandle th = pSig->GetLastTypeHandleThrowing();
-                        if (th.IsArray())
-                            // get the array element
-                            th = th.GetArrayElementTypeHandle();
-                        ARG_SLOT data = GetDataFromBlob(pCtorMD->GetAssembly(), (CorSerializationType)type, th, &pBlob, pEndBlob, gc.refAttributedModule->GetModule(), &bObjectCreated);
-                        if (bObjectCreated)
-                            argToProtect[i] = ArgSlotToObj(data);
-                        else
-                            args[i] = data;
-                    }
-                }
-                GCPROTECT_END();
-
-                // We have borrowed the signature from MethodDescCallSite. We have to put it back into the initial position
-                // because of that's where MethodDescCallSite expects to find it below.
-                pSig->Reset();
-
-                for (i = 1; i < cArgs; i++)
-                {
-                    if (argToProtect[i] != NULL)
-                    {
-                        _ASSERTE(args[i] == NULL);
-                        args[i] = ObjToArgSlot(argToProtect[i]);
-                    }
-                }
-            }
-        }
-        args[0] = ObjToArgSlot(argToProtect[0]);
-
-        if (i != cArgs)
-            COMPlusThrow(kCustomAttributeFormatException);
-
-        // check if there are any named properties to invoke,
-        // if so set the by ref int passed in to point
-        // to the blob position where name properties start
-        *pcNamedArgs = 0;
-
-        if (pBlob && pBlob != pEndBlob)
-        {
-            if (pBlob + 2 > pEndBlob)
-                COMPlusThrow(kCustomAttributeFormatException);
-
-            *pcNamedArgs = GET_UNALIGNED_VAL16(pBlob);
-
-            pBlob += 2;
-        }
-
-        *ppBlob = pBlob;
-
-        if (*pcNamedArgs == 0 && pBlob != pEndBlob)
-            COMPlusThrow(kCustomAttributeFormatException);
-
-        // make the invocation to the ctor
-        gc.ca = ArgSlotToObj(args[0]);
-        if (pCtorMD->GetMethodTable()->IsValueType())
-            args[0] = PtrToArgSlot(OBJECTREFToObject(gc.ca)->UnBox());
-
-        ctorCallSite.CallWithValueTypes(args);
+        MODE_COOPERATIVE;
+        THROWS;
     }
-    HELPER_METHOD_FRAME_END();
-
-    return OBJECTREFToObject(gc.ca);
-}
-FCIMPLEND
-
-FCIMPL5(VOID, COMCustomAttribute::ParseAttributeUsageAttribute, PVOID pData, ULONG cData, ULONG* pTargets, CLR_BOOL* pInherited, CLR_BOOL* pAllowMultiple)
-{
-    FCALL_CONTRACT;
-
-    int inherited = 0;
-    int allowMultiple = 1;
-
-    {
-        CustomAttributeParser ca(pData, cData);
-
-        CaArg args[1];
-        args[0].InitEnum(SERIALIZATION_TYPE_I4, 0);
-        if (FAILED(::ParseKnownCaArgs(ca, args, ARRAY_SIZE(args))))
-        {
-            HELPER_METHOD_FRAME_BEGIN_0();
-            COMPlusThrow(kCustomAttributeFormatException);
-            HELPER_METHOD_FRAME_END();
-        }
-
-        *pTargets = args[0].val.u4;
-
-        CaNamedArg namedArgs[2];
-        CaType namedArgTypes[2];
-        namedArgTypes[inherited].Init(SERIALIZATION_TYPE_BOOLEAN);
-        namedArgTypes[allowMultiple].Init(SERIALIZATION_TYPE_BOOLEAN);
-        namedArgs[inherited].Init("Inherited", SERIALIZATION_TYPE_PROPERTY, namedArgTypes[inherited], TRUE);
-        namedArgs[allowMultiple].Init("AllowMultiple", SERIALIZATION_TYPE_PROPERTY, namedArgTypes[allowMultiple], FALSE);
-        if (FAILED(::ParseKnownCaNamedArgs(ca, namedArgs, ARRAY_SIZE(namedArgs))))
-        {
-            HELPER_METHOD_FRAME_BEGIN_0();
-            COMPlusThrow(kCustomAttributeFormatException);
-            HELPER_METHOD_FRAME_END();
-        }
+    CONTRACTL_END;
 
-        *pInherited = namedArgs[inherited].val.boolean == TRUE;
-        *pAllowMultiple = namedArgs[allowMultiple].val.boolean == TRUE;
+    if (*pBlob >= endBlob )
+    {   // No buffer at all, or buffer overrun
+        COMPlusThrow(kCustomAttributeFormatException);
     }
-}
-FCIMPLEND
-
-
-FCIMPL7(void, COMCustomAttribute::GetPropertyOrFieldData, ReflectModuleBaseObject *pModuleUNSAFE, BYTE** ppBlobStart, BYTE* pBlobEnd, STRINGREF* pName, CLR_BOOL* pbIsProperty, OBJECTREF* pType, OBJECTREF* value)
-{
-    FCALL_CONTRACT;
-
-    BYTE* pBlob = *ppBlobStart;
-    *pType = NULL;
 
-    REFLECTMODULEBASEREF refModule = (REFLECTMODULEBASEREF)ObjectToOBJECTREF(pModuleUNSAFE);
-
-    if(refModule == NULL)
-        FCThrowResVoid(kArgumentNullException, W("Arg_InvalidHandle"));
-
-    Module *pModule = refModule->GetModule();
+    if (**pBlob == 0xFF)
+    {   // Special case null string.
+        ++(*pBlob);
+        return -1;
+    }
 
-    HELPER_METHOD_FRAME_BEGIN_1(refModule);
+    ULONG ulSize;
+    if (FAILED(CPackedLen::SafeGetData((BYTE const *)*pBlob, (BYTE const *)endBlob, (ULONG *)&ulSize, (BYTE const **)pBlob)))
     {
-        Assembly *pCtorAssembly = NULL;
-
-        MethodTable *pMTValue = NULL;
-        CorSerializationType arrayType = SERIALIZATION_TYPE_BOOLEAN;
-        BOOL bObjectCreated = FALSE;
-        TypeHandle nullTH;
-
-        if (pBlob + 2 > pBlobEnd)
-            COMPlusThrow(kCustomAttributeFormatException);
-
-        // get whether it is a field or a property
-        CorSerializationType propOrField = (CorSerializationType)*pBlob;
-        pBlob++;
-        if (propOrField == SERIALIZATION_TYPE_FIELD)
-            *pbIsProperty = FALSE;
-        else if (propOrField == SERIALIZATION_TYPE_PROPERTY)
-            *pbIsProperty = TRUE;
-        else
-            COMPlusThrow(kCustomAttributeFormatException);
-
-        // get the type of the field
-        CorSerializationType fieldType = (CorSerializationType)*pBlob;
-        pBlob++;
-        if (fieldType == SERIALIZATION_TYPE_SZARRAY)
-        {
-            arrayType = (CorSerializationType)*pBlob;
-
-            if (pBlob + 1 > pBlobEnd)
-                COMPlusThrow(kCustomAttributeFormatException);
-
-            pBlob++;
-        }
-        if (fieldType == SERIALIZATION_TYPE_ENUM || arrayType == SERIALIZATION_TYPE_ENUM)
-        {
-            // get the enum type
-            ReflectClassBaseObject *pEnum =
-                (ReflectClassBaseObject*)OBJECTREFToObject(ArgSlotToObj(GetDataFromBlob(
-                    pCtorAssembly, SERIALIZATION_TYPE_TYPE, nullTH, &pBlob, pBlobEnd, pModule, &bObjectCreated)));
-
-            if (pEnum == NULL)
-                COMPlusThrow(kCustomAttributeFormatException);
-
-            _ASSERTE(bObjectCreated);
-
-            TypeHandle th = pEnum->GetType();
-            _ASSERTE(th.IsEnum());
-
-            pMTValue = th.AsMethodTable();
-            if (fieldType == SERIALIZATION_TYPE_ENUM)
-                // load the enum type to pass it back
-                *pType = th.GetManagedClassObject();
-            else
-                nullTH = th;
-        }
-
-        // get the string representing the field/property name
-        *pName = ArgSlotToString(GetDataFromBlob(
-            pCtorAssembly, SERIALIZATION_TYPE_STRING, nullTH, &pBlob, pBlobEnd, pModule, &bObjectCreated));
-        _ASSERTE(bObjectCreated || *pName == NULL);
-
-        // create the object and return it
-        switch (fieldType)
-        {
-            case SERIALIZATION_TYPE_TAGGED_OBJECT:
-                *pType = g_pObjectClass->GetManagedClassObject();
-                FALLTHROUGH;
-            case SERIALIZATION_TYPE_TYPE:
-            case SERIALIZATION_TYPE_STRING:
-                *value = ArgSlotToObj(GetDataFromBlob(
-                    pCtorAssembly, fieldType, nullTH, &pBlob, pBlobEnd, pModule, &bObjectCreated));
-                _ASSERTE(bObjectCreated || *value == NULL);
-
-                if (*value == NULL)
-                {
-                    // load the proper type so that code in managed knows which property to load
-                    if (fieldType == SERIALIZATION_TYPE_STRING)
-                        *pType = CoreLibBinder::GetElementType(ELEMENT_TYPE_STRING)->GetManagedClassObject();
-                    else if (fieldType == SERIALIZATION_TYPE_TYPE)
-                        *pType = CoreLibBinder::GetClass(CLASS__TYPE)->GetManagedClassObject();
-                }
-                break;
-            case SERIALIZATION_TYPE_SZARRAY:
-            {
-                *value = NULL;
-                int arraySize = (int)GetDataFromBlob(pCtorAssembly, SERIALIZATION_TYPE_I4, nullTH, &pBlob, pBlobEnd, pModule, &bObjectCreated);
-
-                if (arraySize != -1)
-                {
-                    _ASSERTE(!bObjectCreated);
-                    if (arrayType == SERIALIZATION_TYPE_STRING)
-                        nullTH = TypeHandle(CoreLibBinder::GetElementType(ELEMENT_TYPE_STRING));
-                    else if (arrayType == SERIALIZATION_TYPE_TYPE)
-                        nullTH = TypeHandle(CoreLibBinder::GetClass(CLASS__TYPE));
-                    else if (arrayType == SERIALIZATION_TYPE_TAGGED_OBJECT)
-                        nullTH = TypeHandle(g_pObjectClass);
-                    ReadArray(pCtorAssembly, arrayType, arraySize, nullTH, &pBlob, pBlobEnd, pModule, (BASEARRAYREF*)value);
-                }
-                if (*value == NULL)
-                {
-                    TypeHandle arrayTH;
-                    switch (arrayType)
-                    {
-                        case SERIALIZATION_TYPE_STRING:
-                            arrayTH = TypeHandle(CoreLibBinder::GetElementType(ELEMENT_TYPE_STRING));
-                            break;
-                        case SERIALIZATION_TYPE_TYPE:
-                            arrayTH = TypeHandle(CoreLibBinder::GetClass(CLASS__TYPE));
-                            break;
-                        case SERIALIZATION_TYPE_TAGGED_OBJECT:
-                            arrayTH = TypeHandle(g_pObjectClass);
-                            break;
-                        default:
-                            if (SERIALIZATION_TYPE_BOOLEAN <= arrayType && arrayType <= SERIALIZATION_TYPE_R8)
-                                arrayTH = TypeHandle(CoreLibBinder::GetElementType((CorElementType)arrayType));
-                    }
-                    if (!arrayTH.IsNull())
-                    {
-                        arrayTH = ClassLoader::LoadArrayTypeThrowing(arrayTH);
-                        *pType = arrayTH.GetManagedClassObject();
-                    }
-                }
-                break;
-            }
-            default:
-                if (SERIALIZATION_TYPE_BOOLEAN <= fieldType && fieldType <= SERIALIZATION_TYPE_R8)
-                    pMTValue = CoreLibBinder::GetElementType((CorElementType)fieldType);
-                else if(fieldType == SERIALIZATION_TYPE_ENUM)
-                    fieldType = (CorSerializationType)pMTValue->GetInternalCorElementType();
-                else
-                    COMPlusThrow(kCustomAttributeFormatException);
-
-                ARG_SLOT val = GetDataFromBlob(pCtorAssembly, fieldType, nullTH, &pBlob, pBlobEnd, pModule, &bObjectCreated);
-                _ASSERTE(!bObjectCreated);
-
-                *value = pMTValue->Box((void*)ArgSlotEndiannessFixup(&val, pMTValue->GetNumInstanceFieldBytes()));
-        }
-
-        *ppBlobStart = pBlob;
+        COMPlusThrow(kCustomAttributeFormatException);
     }
-    HELPER_METHOD_FRAME_END();
+
+    return (int)ulSize;
 }
-FCIMPLEND
 
-/*static*/
-TypeHandle COMCustomAttribute::GetTypeHandleFromBlob(Assembly *pCtorAssembly,
+// Forward declaration
+static ARG_SLOT GetDataFromBlob(Assembly *pCtorAssembly,
+                      CorSerializationType type,
+                      TypeHandle th,
+                      BYTE **pBlob,
+                      const BYTE *endBlob,
+                      Module *pModule,
+                      BOOL *bObjectCreated);
+
+static TypeHandle GetTypeHandleFromBlob(Assembly *pCtorAssembly,
                                     CorSerializationType objType,
                                     BYTE **pBlob,
                                     const BYTE *endBlob,
@@ -690,6 +396,7 @@ TypeHandle COMCustomAttribute::GetTypeHandleFromBlob(Assembly *pCtorAssembly,
 {
     CONTRACTL
     {
+        MODE_COOPERATIVE;
         THROWS;
     }
     CONTRACTL_END;
@@ -776,50 +483,25 @@ TypeHandle COMCustomAttribute::GetTypeHandleFromBlob(Assembly *pCtorAssembly,
     return RtnTypeHnd;
 }
 
-// retrieve the string size in a CA blob. Advance the blob pointer to point to
-// the beginning of the string immediately following the size
-/*static*/
-int COMCustomAttribute::GetStringSize(BYTE **pBlob, const BYTE *endBlob)
+// copy the values of an array of integers from a CA blob
+// (i.e., always stored in little-endian, and needs not be aligned).
+// Returns TRUE on success, FALSE if the blob was not big enough.
+// Advances *pBlob by the amount copied.
+template<typename T>
+static bool CopyArrayVAL(BASEARRAYREF pArray, int nElements, BYTE **pBlob, const BYTE *endBlob)
 {
     CONTRACTL
     {
-        THROWS;
+        MODE_COOPERATIVE;
+        NOTHROW;
     }
     CONTRACTL_END;
 
-    if (*pBlob >= endBlob )
-    {   // No buffer at all, or buffer overrun
-        COMPlusThrow(kCustomAttributeFormatException);
-    }
-
-    if (**pBlob == 0xFF)
-    {   // Special case null string.
-        ++(*pBlob);
-        return -1;
-    }
-
-    ULONG ulSize;
-    if (FAILED(CPackedLen::SafeGetData((BYTE const *)*pBlob, (BYTE const *)endBlob, (ULONG *)&ulSize, (BYTE const **)pBlob)))
-    {
-        COMPlusThrow(kCustomAttributeFormatException);
-    }
-
-    return (int)ulSize;
-}
-
-// copy the values of an array of integers from a CA blob
-// (i.e., always stored in little-endian, and needs not be aligned).
-// Returns TRUE on success, FALSE if the blob was not big enough.
-// Advances *pBlob by the amount copied.
-/*static*/
-template < typename T >
-BOOL COMCustomAttribute::CopyArrayVAL(BASEARRAYREF pArray, int nElements, BYTE **pBlob, const BYTE *endBlob)
-{
     int sizeData;   // = size * 2; with integer overflow check
     if (!ClrSafeInt<int>::multiply(nElements, sizeof(T), sizeData))
-        return FALSE;
+        return false;
     if (sizeData > endBlob - *pBlob)     // integer overflow check
-        return FALSE;
+        return false;
 #if BIGENDIAN
     T *ptDest = reinterpret_cast<T *>(pArray->GetDataPtr());
     for (int iElement = 0; iElement < nElements; iElement++)
@@ -837,34 +519,36 @@ BOOL COMCustomAttribute::CopyArrayVAL(BASEARRAYREF pArray, int nElements, BYTE *
     memcpyNoGCRefs(pArray->GetDataPtr(), *pBlob, sizeData);
 #endif // BIGENDIAN
     *pBlob += sizeData;
-    return TRUE;
+    return true;
 }
 
 // read the whole array as a chunk
-/*static*/
-void COMCustomAttribute::ReadArray(Assembly *pCtorAssembly,
+static BASEARRAYREF ReadArray(Assembly *pCtorAssembly,
                CorSerializationType arrayType,
                int size,
                TypeHandle th,
                BYTE **pBlob,
                const BYTE *endBlob,
-               Module *pModule,
-               BASEARRAYREF *pArray)
+               Module *pModule)
 {
     CONTRACTL
     {
+        MODE_COOPERATIVE;
         THROWS;
     }
     CONTRACTL_END;
 
     ARG_SLOT element = 0;
 
+    BASEARRAYREF array = NULL;
+    GCPROTECT_BEGIN(array);
+
     switch ((DWORD)arrayType) {
     case SERIALIZATION_TYPE_BOOLEAN:
     case SERIALIZATION_TYPE_I1:
     case SERIALIZATION_TYPE_U1:
-        *pArray = (BASEARRAYREF)AllocatePrimitiveArray((CorElementType)arrayType, size);
-        if (!CopyArrayVAL<BYTE>(*pArray, size, pBlob, endBlob))
+        array = (BASEARRAYREF)AllocatePrimitiveArray((CorElementType)arrayType, size);
+        if (!CopyArrayVAL<BYTE>(array, size, pBlob, endBlob))
             goto badBlob;
         break;
 
@@ -872,8 +556,8 @@ void COMCustomAttribute::ReadArray(Assembly *pCtorAssembly,
     case SERIALIZATION_TYPE_I2:
     case SERIALIZATION_TYPE_U2:
     {
-        *pArray = (BASEARRAYREF)AllocatePrimitiveArray((CorElementType)arrayType, size);
-        if (!CopyArrayVAL<UINT16>(*pArray, size, pBlob, endBlob))
+        array = (BASEARRAYREF)AllocatePrimitiveArray((CorElementType)arrayType, size);
+        if (!CopyArrayVAL<UINT16>(array, size, pBlob, endBlob))
             goto badBlob;
         break;
     }
@@ -881,8 +565,8 @@ void COMCustomAttribute::ReadArray(Assembly *pCtorAssembly,
     case SERIALIZATION_TYPE_U4:
     case SERIALIZATION_TYPE_R4:
     {
-        *pArray = (BASEARRAYREF)AllocatePrimitiveArray((CorElementType)arrayType, size);
-        if (!CopyArrayVAL<UINT32>(*pArray, size, pBlob, endBlob))
+        array = (BASEARRAYREF)AllocatePrimitiveArray((CorElementType)arrayType, size);
+        if (!CopyArrayVAL<UINT32>(array, size, pBlob, endBlob))
             goto badBlob;
         break;
     }
@@ -890,8 +574,8 @@ void COMCustomAttribute::ReadArray(Assembly *pCtorAssembly,
     case SERIALIZATION_TYPE_U8:
     case SERIALIZATION_TYPE_R8:
     {
-        *pArray = (BASEARRAYREF)AllocatePrimitiveArray((CorElementType)arrayType, size);
-        if (!CopyArrayVAL<UINT64>(*pArray, size, pBlob, endBlob))
+        array = (BASEARRAYREF)AllocatePrimitiveArray((CorElementType)arrayType, size);
+        if (!CopyArrayVAL<UINT64>(array, size, pBlob, endBlob))
             goto badBlob;
         break;
     }
@@ -907,14 +591,14 @@ void COMCustomAttribute::ReadArray(Assembly *pCtorAssembly,
         if (th.IsNull())
             goto badBlob;
 
-        *pArray = (BASEARRAYREF)AllocateObjectArray(size, th);
+        array = (BASEARRAYREF)AllocateObjectArray(size, th);
         if (arrayType == SERIALIZATION_TYPE_SZARRAY)
             // switch the th to be the proper one
             th = th.GetArrayElementTypeHandle();
         for (int i = 0; i < size; i++) {
             element = GetDataFromBlob(pCtorAssembly, arrayType, th, pBlob, endBlob, pModule, &isObject);
             _ASSERTE(isObject || element == NULL);
-            ((PTRARRAYREF)(*pArray))->SetAt(i, ArgSlotToObj(element));
+            ((PTRARRAYREF)(array))->SetAt(i, ArgSlotToObj(element));
         }
         break;
     }
@@ -931,21 +615,21 @@ void COMCustomAttribute::ReadArray(Assembly *pCtorAssembly,
         TypeHandle arrayHandle = ClassLoader::LoadArrayTypeThrowing(th);
         if (arrayHandle.IsNull())
             goto badBlob;
-        *pArray = (BASEARRAYREF)AllocateSzArray(arrayHandle, bounds);
+        array = (BASEARRAYREF)AllocateSzArray(arrayHandle, bounds);
         BOOL fSuccess;
         switch (elementSize)
         {
         case 1:
-            fSuccess = CopyArrayVAL<BYTE>(*pArray, size, pBlob, endBlob);
+            fSuccess = CopyArrayVAL<BYTE>(array, size, pBlob, endBlob);
             break;
         case 2:
-            fSuccess = CopyArrayVAL<UINT16>(*pArray, size, pBlob, endBlob);
+            fSuccess = CopyArrayVAL<UINT16>(array, size, pBlob, endBlob);
             break;
         case 4:
-            fSuccess = CopyArrayVAL<UINT32>(*pArray, size, pBlob, endBlob);
+            fSuccess = CopyArrayVAL<UINT32>(array, size, pBlob, endBlob);
             break;
         case 8:
-            fSuccess = CopyArrayVAL<UINT64>(*pArray, size, pBlob, endBlob);
+            fSuccess = CopyArrayVAL<UINT64>(array, size, pBlob, endBlob);
             break;
         default:
             fSuccess = FALSE;
@@ -960,11 +644,12 @@ void COMCustomAttribute::ReadArray(Assembly *pCtorAssembly,
         COMPlusThrow(kCustomAttributeFormatException);
     }
 
+    GCPROTECT_END();
+    return array;
 }
 
 // get data out of the blob according to a CorElementType
-/*static*/
-ARG_SLOT COMCustomAttribute::GetDataFromBlob(Assembly *pCtorAssembly,
+static ARG_SLOT GetDataFromBlob(Assembly *pCtorAssembly,
                       CorSerializationType type,
                       TypeHandle th,
                       BYTE **pBlob,
@@ -974,6 +659,7 @@ ARG_SLOT COMCustomAttribute::GetDataFromBlob(Assembly *pCtorAssembly,
 {
     CONTRACTL
     {
+        MODE_COOPERATIVE;
         THROWS;
     }
     CONTRACTL_END;
@@ -1162,11 +848,7 @@ ARG_SLOT COMCustomAttribute::GetDataFromBlob(Assembly *pCtorAssembly,
             else
                 arrayType = (CorSerializationType)th.GetInternalCorElementType();
 
-            BASEARRAYREF array = NULL;
-            GCPROTECT_BEGIN(array);
-            ReadArray(pCtorAssembly, arrayType, size, th, pBlob, endBlob, pModule, &array);
-            retValue = ObjToArgSlot(array);
-            GCPROTECT_END();
+            retValue = ObjToArgSlot(ReadArray(pCtorAssembly, arrayType, size, th, pBlob, endBlob, pModule));
         }
         *bObjectCreated = TRUE;
         break;
@@ -1180,3 +862,317 @@ ARG_SLOT COMCustomAttribute::GetDataFromBlob(Assembly *pCtorAssembly,
 
     return retValue;
 }
+
+extern "C" BOOL QCALLTYPE CustomAttribute_ParseAttributeUsageAttribute(
+    PVOID pData,
+    ULONG cData,
+    ULONG* pTargets,
+    BOOL* pAllowMultiple,
+    BOOL* pInherited)
+{
+    QCALL_CONTRACT_NO_GC_TRANSITION;
+
+    CustomAttributeParser ca(pData, cData);
+    CaArg args[1];
+    args[0].InitEnum(SERIALIZATION_TYPE_I4, 0);
+    if (FAILED(::ParseKnownCaArgs(ca, args, ARRAY_SIZE(args))))
+        return FALSE;
+    *pTargets = args[0].val.u4;
+
+    // Define index values.
+    const int allowMultiple = 0;
+    const int inherited = 1;
+
+    CaNamedArg namedArgs[2];
+    CaType namedArgTypes[2];
+    namedArgTypes[allowMultiple].Init(SERIALIZATION_TYPE_BOOLEAN);
+    namedArgTypes[inherited].Init(SERIALIZATION_TYPE_BOOLEAN);
+    namedArgs[allowMultiple].Init("AllowMultiple", SERIALIZATION_TYPE_PROPERTY, namedArgTypes[allowMultiple], FALSE);
+    namedArgs[inherited].Init("Inherited", SERIALIZATION_TYPE_PROPERTY, namedArgTypes[inherited], TRUE);
+    if (FAILED(::ParseKnownCaNamedArgs(ca, namedArgs, ARRAY_SIZE(namedArgs))))
+        return FALSE;
+
+    *pAllowMultiple = namedArgs[allowMultiple].val.boolean ? TRUE : FALSE;
+    *pInherited = namedArgs[inherited].val.boolean ? TRUE : FALSE;
+    return TRUE;
+}
+
+extern "C" void QCALLTYPE CustomAttribute_CreateCustomAttributeInstance(
+    QCall::ModuleHandle pModule,
+    QCall::ObjectHandleOnStack pCaType,
+    QCall::ObjectHandleOnStack pMethod,
+    BYTE** ppBlob,
+    BYTE* pEndBlob,
+    INT32* pcNamedArgs,
+    QCall::ObjectHandleOnStack result)
+{
+    QCALL_CONTRACT;
+
+    BEGIN_QCALL;
+
+    GCX_COOP();
+
+    MethodDesc* pCtorMD = ((REFLECTMETHODREF)pMethod.Get())->GetMethod();
+    TypeHandle th = ((REFLECTCLASSBASEREF)pCaType.Get())->GetType();
+
+    MethodDescCallSite ctorCallSite(pCtorMD, th);
+    MetaSig* pSig = ctorCallSite.GetMetaSig();
+    BYTE* pBlob = *ppBlob;
+
+    // get the number of arguments and allocate an array for the args
+    ARG_SLOT *args = NULL;
+    UINT cArgs = pSig->NumFixedArgs() + 1; // make room for the this pointer
+    UINT i = 1; // used to flag that we actually get the right number of arg from the blob
+
+    args = (ARG_SLOT*)_alloca(cArgs * sizeof(ARG_SLOT));
+    memset((void*)args, 0, cArgs * sizeof(ARG_SLOT));
+
+    OBJECTREF *argToProtect = (OBJECTREF*)_alloca(cArgs * sizeof(OBJECTREF));
+    memset((void*)argToProtect, 0, cArgs * sizeof(OBJECTREF));
+
+    // load the this pointer
+    argToProtect[0] = th.GetMethodTable()->Allocate(); // this is the value to return after the ctor invocation
+
+    if (pBlob)
+    {
+        if (pBlob < pEndBlob)
+        {
+            if (pBlob + 2 > pEndBlob)
+            {
+                COMPlusThrow(kCustomAttributeFormatException);
+            }
+            INT16 prolog = GET_UNALIGNED_VAL16(pBlob);
+            if (prolog != 1)
+                COMPlusThrow(kCustomAttributeFormatException);
+            pBlob += 2;
+        }
+
+        if (cArgs > 1)
+        {
+            GCPROTECT_ARRAY_BEGIN(*argToProtect, cArgs);
+            {
+                // loop through the args
+                for (i = 1; i < cArgs; i++) {
+                    CorElementType type = pSig->NextArg();
+                    if (type == ELEMENT_TYPE_END)
+                        break;
+                    BOOL bObjectCreated = FALSE;
+                    TypeHandle th = pSig->GetLastTypeHandleThrowing();
+                    if (th.IsArray())
+                        // get the array element
+                        th = th.GetArrayElementTypeHandle();
+                    ARG_SLOT data = GetDataFromBlob(pCtorMD->GetAssembly(), (CorSerializationType)type, th, &pBlob, pEndBlob, pModule, &bObjectCreated);
+                    if (bObjectCreated)
+                        argToProtect[i] = ArgSlotToObj(data);
+                    else
+                        args[i] = data;
+                }
+            }
+            GCPROTECT_END();
+
+            // We have borrowed the signature from MethodDescCallSite. We have to put it back into the initial position
+            // because of that's where MethodDescCallSite expects to find it below.
+            pSig->Reset();
+
+            for (i = 1; i < cArgs; i++)
+            {
+                if (argToProtect[i] != NULL)
+                {
+                    _ASSERTE(args[i] == NULL);
+                    args[i] = ObjToArgSlot(argToProtect[i]);
+                }
+            }
+        }
+    }
+    args[0] = ObjToArgSlot(argToProtect[0]);
+
+    if (i != cArgs)
+        COMPlusThrow(kCustomAttributeFormatException);
+
+    // check if there are any named properties to invoke,
+    // if so set the by ref int passed in to point
+    // to the blob position where name properties start
+    *pcNamedArgs = 0;
+
+    if (pBlob && pBlob != pEndBlob)
+    {
+        if (pBlob + 2 > pEndBlob)
+            COMPlusThrow(kCustomAttributeFormatException);
+
+        *pcNamedArgs = GET_UNALIGNED_VAL16(pBlob);
+
+        pBlob += 2;
+    }
+
+    *ppBlob = pBlob;
+
+    if (*pcNamedArgs == 0 && pBlob != pEndBlob)
+        COMPlusThrow(kCustomAttributeFormatException);
+
+    // make the invocation to the ctor
+    result.Set(ArgSlotToObj(args[0]));
+    if (pCtorMD->GetMethodTable()->IsValueType())
+        args[0] = PtrToArgSlot(OBJECTREFToObject(result.Get())->UnBox());
+
+    ctorCallSite.CallWithValueTypes(args);
+
+    END_QCALL;
+}
+
+extern "C" void QCALLTYPE CustomAttribute_CreatePropertyOrFieldData(
+    QCall::ModuleHandle pModule,
+    BYTE** ppBlobStart,
+    BYTE* pBlobEnd,
+    QCall::StringHandleOnStack pName,
+    BOOL* pbIsProperty,
+    QCall::ObjectHandleOnStack pType,
+    QCall::ObjectHandleOnStack pValue)
+{
+    QCALL_CONTRACT;
+
+    BEGIN_QCALL;
+
+    BYTE* pBlob = *ppBlobStart;
+
+    GCX_COOP();
+
+    Assembly *pCtorAssembly = NULL;
+
+    MethodTable *pMTValue = NULL;
+    CorSerializationType arrayType = SERIALIZATION_TYPE_BOOLEAN;
+    BOOL bObjectCreated = FALSE;
+    TypeHandle nullTH;
+
+    if (pBlob + 2 > pBlobEnd)
+        COMPlusThrow(kCustomAttributeFormatException);
+
+    // get whether it is a field or a property
+    CorSerializationType propOrField = (CorSerializationType)*pBlob;
+    pBlob++;
+    if (propOrField == SERIALIZATION_TYPE_FIELD)
+        *pbIsProperty = FALSE;
+    else if (propOrField == SERIALIZATION_TYPE_PROPERTY)
+        *pbIsProperty = TRUE;
+    else
+        COMPlusThrow(kCustomAttributeFormatException);
+
+    // get the type of the field
+    CorSerializationType fieldType = (CorSerializationType)*pBlob;
+    pBlob++;
+    if (fieldType == SERIALIZATION_TYPE_SZARRAY)
+    {
+        arrayType = (CorSerializationType)*pBlob;
+
+        if (pBlob + 1 > pBlobEnd)
+            COMPlusThrow(kCustomAttributeFormatException);
+
+        pBlob++;
+    }
+    if (fieldType == SERIALIZATION_TYPE_ENUM || arrayType == SERIALIZATION_TYPE_ENUM)
+    {
+        // get the enum type
+        ReflectClassBaseObject *pEnum =
+            (ReflectClassBaseObject*)OBJECTREFToObject(ArgSlotToObj(GetDataFromBlob(
+                pCtorAssembly, SERIALIZATION_TYPE_TYPE, nullTH, &pBlob, pBlobEnd, pModule, &bObjectCreated)));
+
+        if (pEnum == NULL)
+            COMPlusThrow(kCustomAttributeFormatException);
+
+        _ASSERTE(bObjectCreated);
+
+        TypeHandle th = pEnum->GetType();
+        _ASSERTE(th.IsEnum());
+
+        pMTValue = th.AsMethodTable();
+        if (fieldType == SERIALIZATION_TYPE_ENUM)
+            // load the enum type to pass it back
+            pType.Set(th.GetManagedClassObject());
+        else
+            nullTH = th;
+    }
+
+    // get the string representing the field/property name
+    pName.Set(ArgSlotToString(GetDataFromBlob(
+        pCtorAssembly, SERIALIZATION_TYPE_STRING, nullTH, &pBlob, pBlobEnd, pModule, &bObjectCreated)));
+    _ASSERTE(bObjectCreated || pName.Get() == NULL);
+
+    // create the object and return it
+    switch (fieldType)
+    {
+        case SERIALIZATION_TYPE_TAGGED_OBJECT:
+            pType.Set(g_pObjectClass->GetManagedClassObject());
+            FALLTHROUGH;
+        case SERIALIZATION_TYPE_TYPE:
+        case SERIALIZATION_TYPE_STRING:
+            pValue.Set(ArgSlotToObj(GetDataFromBlob(
+                pCtorAssembly, fieldType, nullTH, &pBlob, pBlobEnd, pModule, &bObjectCreated)));
+            _ASSERTE(bObjectCreated || pValue.Get() == NULL);
+
+            if (pValue.Get() == NULL)
+            {
+                // load the proper type so that code in managed knows which property to load
+                if (fieldType == SERIALIZATION_TYPE_STRING)
+                    pType.Set(CoreLibBinder::GetElementType(ELEMENT_TYPE_STRING)->GetManagedClassObject());
+                else if (fieldType == SERIALIZATION_TYPE_TYPE)
+                    pType.Set(CoreLibBinder::GetClass(CLASS__TYPE)->GetManagedClassObject());
+            }
+            break;
+        case SERIALIZATION_TYPE_SZARRAY:
+        {
+            pValue.Set(NULL);
+            int arraySize = (int)GetDataFromBlob(pCtorAssembly, SERIALIZATION_TYPE_I4, nullTH, &pBlob, pBlobEnd, pModule, &bObjectCreated);
+
+            if (arraySize != -1)
+            {
+                _ASSERTE(!bObjectCreated);
+                if (arrayType == SERIALIZATION_TYPE_STRING)
+                    nullTH = TypeHandle(CoreLibBinder::GetElementType(ELEMENT_TYPE_STRING));
+                else if (arrayType == SERIALIZATION_TYPE_TYPE)
+                    nullTH = TypeHandle(CoreLibBinder::GetClass(CLASS__TYPE));
+                else if (arrayType == SERIALIZATION_TYPE_TAGGED_OBJECT)
+                    nullTH = TypeHandle(g_pObjectClass);
+                pValue.Set(ReadArray(pCtorAssembly, arrayType, arraySize, nullTH, &pBlob, pBlobEnd, pModule));
+            }
+            if (pValue.Get() == NULL)
+            {
+                TypeHandle arrayTH;
+                switch (arrayType)
+                {
+                    case SERIALIZATION_TYPE_STRING:
+                        arrayTH = TypeHandle(CoreLibBinder::GetElementType(ELEMENT_TYPE_STRING));
+                        break;
+                    case SERIALIZATION_TYPE_TYPE:
+                        arrayTH = TypeHandle(CoreLibBinder::GetClass(CLASS__TYPE));
+                        break;
+                    case SERIALIZATION_TYPE_TAGGED_OBJECT:
+                        arrayTH = TypeHandle(g_pObjectClass);
+                        break;
+                    default:
+                        if (SERIALIZATION_TYPE_BOOLEAN <= arrayType && arrayType <= SERIALIZATION_TYPE_R8)
+                            arrayTH = TypeHandle(CoreLibBinder::GetElementType((CorElementType)arrayType));
+                }
+                if (!arrayTH.IsNull())
+                {
+                    arrayTH = ClassLoader::LoadArrayTypeThrowing(arrayTH);
+                    pType.Set(arrayTH.GetManagedClassObject());
+                }
+            }
+            break;
+        }
+        default:
+            if (SERIALIZATION_TYPE_BOOLEAN <= fieldType && fieldType <= SERIALIZATION_TYPE_R8)
+                pMTValue = CoreLibBinder::GetElementType((CorElementType)fieldType);
+            else if(fieldType == SERIALIZATION_TYPE_ENUM)
+                fieldType = (CorSerializationType)pMTValue->GetInternalCorElementType();
+            else
+                COMPlusThrow(kCustomAttributeFormatException);
+
+            ARG_SLOT val = GetDataFromBlob(pCtorAssembly, fieldType, nullTH, &pBlob, pBlobEnd, pModule, &bObjectCreated);
+            _ASSERTE(!bObjectCreated);
+
+            pValue.Set(pMTValue->Box((void*)ArgSlotEndiannessFixup(&val, pMTValue->GetNumInstanceFieldBytes())));
+    }
+
+    *ppBlobStart = pBlob;
+    END_QCALL;
+}
diff --git a/src/coreclr/vm/customattribute.h b/src/coreclr/vm/customattribute.h
index 14ce07c785d5..5ad802fc7fc0 100644
--- a/src/coreclr/vm/customattribute.h
+++ b/src/coreclr/vm/customattribute.h
@@ -8,12 +8,11 @@
 #include "fcall.h"
 #include "../md/compiler/custattr.h"
 
-typedef Factory< SArray<CaValue> > CaValueArrayFactory;
+using CaValueArrayFactory = Factory<SArray<CaValue>>;
 
-class Attribute
+namespace Attribute
 {
-public:
-    static HRESULT ParseAttributeArgumentValues(
+    HRESULT ParseArgumentValues(
         void* pCa,
         INT32 cCa,
         CaValueArrayFactory* pCaValueArrayFactory,
@@ -22,56 +21,31 @@ class Attribute
         CaNamedArg* pCaNamedArgs,
         COUNT_T cNamedArgs,
         DomainAssembly* pDomainAssembly);
-};
-
-class COMCustomAttribute
-{
-public:
-
-    // custom attributes utility functions
-    static FCDECL5(VOID, ParseAttributeUsageAttribute, PVOID pData, ULONG cData, ULONG* pTargets, CLR_BOOL* pInherited, CLR_BOOL* pAllowMultiple);
-    static FCDECL6(LPVOID, CreateCaObject, ReflectModuleBaseObject* pAttributedModuleUNSAFE, ReflectClassBaseObject* pCaTypeUNSAFE, ReflectMethodObject *pMethodUNSAFE, BYTE** ppBlob, BYTE* pEndBlob, INT32* pcNamedArgs);
-    static FCDECL7(void, GetPropertyOrFieldData, ReflectModuleBaseObject *pModuleUNSAFE, BYTE** ppBlobStart, BYTE* pBlobEnd, STRINGREF* pName, CLR_BOOL* pbIsProperty, OBJECTREF* pType, OBJECTREF* value);
-
-private:
-
-    static TypeHandle GetTypeHandleFromBlob(
-        Assembly *pCtorAssembly,
-        CorSerializationType objType,
-        BYTE **pBlob,
-        const BYTE *endBlob,
-        Module *pModule);
-
-    static ARG_SLOT GetDataFromBlob(
-        Assembly *pCtorAssembly,
-        CorSerializationType type,
-        TypeHandle th,
-        BYTE **pBlob,
-        const BYTE *endBlob,
-        Module *pModule,
-        BOOL *bObjectCreated);
-
-    static void ReadArray(
-        Assembly *pCtorAssembly,
-        CorSerializationType arrayType,
-        int size,
-        TypeHandle th,
-        BYTE **pBlob,
-        const BYTE *endBlob,
-        Module *pModule,
-        BASEARRAYREF *pArray);
-
-    static int GetStringSize(
-        BYTE **pBlob,
-        const BYTE *endBlob);
-
-    template < typename T >
-    static BOOL CopyArrayVAL(
-        BASEARRAYREF pArray,
-        int nElements,
-        BYTE **pBlob,
-        const BYTE *endBlob);
-};
-
-#endif
-
+}
+
+extern "C" BOOL QCALLTYPE CustomAttribute_ParseAttributeUsageAttribute(
+    PVOID pData,
+    ULONG cData,
+    ULONG* pTargets,
+    BOOL* pAllowMultiple,
+    BOOL* pInherited);
+
+extern "C" void QCALLTYPE CustomAttribute_CreateCustomAttributeInstance(
+    QCall::ModuleHandle pModule,
+    QCall::ObjectHandleOnStack pCaType,
+    QCall::ObjectHandleOnStack pMethod,
+    BYTE** ppBlob,
+    BYTE* pEndBlob,
+    INT32* pcNamedArgs,
+    QCall::ObjectHandleOnStack result);
+
+extern "C" void QCALLTYPE CustomAttribute_CreatePropertyOrFieldData(
+    QCall::ModuleHandle pModule,
+    BYTE** ppBlobStart,
+    BYTE* pBlobEnd,
+    QCall::StringHandleOnStack pName,
+    BOOL* pbIsProperty,
+    QCall::ObjectHandleOnStack pType,
+    QCall::ObjectHandleOnStack value);
+
+#endif // _CUSTOMATTRIBUTE_H_
diff --git a/src/coreclr/vm/dacenumerablehash.inl b/src/coreclr/vm/dacenumerablehash.inl
index a6083e26fda4..93d63116e0f4 100644
--- a/src/coreclr/vm/dacenumerablehash.inl
+++ b/src/coreclr/vm/dacenumerablehash.inl
@@ -7,7 +7,7 @@
 // See DacEnumerableHash.h for a more detailed description.
 //
 
-#include "clr_std/type_traits"
+#include <type_traits>
 
 // Our implementation embeds entry data supplied by the hash sub-class into a larger entry structure
 // containing DacEnumerableHash metadata. We often end up returning pointers to the inner entry to sub-class code and
diff --git a/src/coreclr/vm/debugdebugger.cpp b/src/coreclr/vm/debugdebugger.cpp
index fc6e7a5019e7..a6bdd075afc4 100644
--- a/src/coreclr/vm/debugdebugger.cpp
+++ b/src/coreclr/vm/debugdebugger.cpp
@@ -33,82 +33,13 @@
 #define IMAGE_DEBUG_TYPE_EMBEDDED_PORTABLE_PDB  17
 
 #ifndef DACCESS_COMPILE
-//----------------------------------------------------------------------------
-//
-// FindMostRecentUserCodeOnStack - find out the most recent user managed code on stack
-//
-//
-// Arguments:
-//    pContext - [optional] pointer to the context to be restored the user code's context if found
-//
-// Return Value:
-//    The most recent user managed code or NULL if not found.
-//
-// Note:
-//    It is a heuristic approach to get the address of the user managed code that calls into
-//    BCL like System.Diagnostics.Debugger.Break assuming that we can find the original user
-//    code caller with stack walking.
-//
-//    DoWatsonForUserBreak has the address returned from the helper frame that points to an
-//    internal BCL helpful function doing permission check.  From bucketing perspetive it is
-//    more preferable to report the user managed code that invokes Debugger.Break instead.
-//
-//    User managed code is managed code in non-system assembly.   Currently, only CoreLib
-//    is marked as system assembly.
-//
-//----------------------------------------------------------------------------
-UINT_PTR FindMostRecentUserCodeOnStack(void)
-{
-    CONTRACTL
-    {
-        NOTHROW;
-        GC_NOTRIGGER;
-        CAN_TAKE_LOCK;
-    }
-    CONTRACTL_END;
-
-    Thread * pThread = GetThread();
-    UINT_PTR address = NULL;
-
-    CONTEXT ctx;
-    REGDISPLAY rd;
-    SetUpRegdisplayForStackWalk(pThread, &ctx, &rd);
 
-    StackFrameIterator frameIter;
-    frameIter.Init(pThread, pThread->GetFrame(), &rd, FUNCTIONSONLY | LIGHTUNWIND);
-
-    while (frameIter.IsValid())
-    {
-        MethodDesc * pMD = frameIter.m_crawl.GetFunction();
-
-        // Is it not a system assembly?  User manged user will not be in system assembly.
-        if ((pMD != NULL) && (!pMD->GetAssembly()->IsSystem()))
-        {
-            CrawlFrame * pCF = &(frameIter.m_crawl);
-            address = (UINT_PTR)GetControlPC(pCF->GetRegisterSet());
-            break;
-        }
-
-        if (frameIter.Next() != SWA_CONTINUE)
-        {
-            break;
-        }
-    }
-
-    return address;
-}
-
-
-// This does a user break, triggered by System.Diagnostics.Debugger.Break, or the IL opcode for break.
 //
 // Notes:
 //    If a managed debugger is attached, this should send the managed UserBreak event.
 //    Else if a native debugger is attached, this should send a native break event (kernel32!DebugBreak)
 //    Else, this should invoke Watson.
 //
-// Historical trivia:
-// - In whidbey, this would still invoke Watson if a native-only debugger is attached.
-// - In arrowhead, the managed debugging pipeline switched to be built on the native pipeline.
 FCIMPL0(void, DebugDebugger::Break)
 {
     FCALL_CONTRACT;
@@ -917,7 +848,7 @@ void DebugStackTrace::GetStackFramesHelper(Frame *pStartFrame,
         pData->TargetThread->GetInternal() == GetThread())
     {
         // Null target thread specifies current thread.
-        GetThread()->StackWalkFrames(GetStackFramesCallback, pData, FUNCTIONSONLY, pStartFrame);
+        GetThread()->StackWalkFrames(GetStackFramesCallback, pData, FUNCTIONSONLY | QUICKUNWIND, pStartFrame);
     }
     else
     {
@@ -1152,9 +1083,18 @@ void DebugStackTrace::GetStackFramesFromException(OBJECTREF * e,
                 // to spot.
                 DWORD dwNativeOffset;
 
-                if (cur.ip)
+                UINT_PTR ip = cur.ip;
+#if defined(DACCESS_COMPILE) && defined(TARGET_AMD64)
+                // Compensate for a bug in the old EH that for a frame that faulted
+                // has the ip pointing to an address before the faulting instruction
+                if (g_isNewExceptionHandlingEnabled && (i == 0) && ((cur.flags & STEF_IP_ADJUSTED) == 0))
+                {
+                    ip -= 1;
+                }
+#endif // DACCESS_COMPILE && TARGET_AMD64
+                if (ip)
                 {
-                    EECodeInfo codeInfo(cur.ip);
+                    EECodeInfo codeInfo(ip);
                     dwNativeOffset = codeInfo.GetRelOffset();
                 }
                 else
@@ -1165,7 +1105,7 @@ void DebugStackTrace::GetStackFramesFromException(OBJECTREF * e,
                 pData->pElements[i].InitPass1(
                     dwNativeOffset,
                     pMD,
-                    (PCODE)cur.ip,
+                    (PCODE)ip,
                     cur.flags);
 #ifndef DACCESS_COMPILE
                 pData->pElements[i].InitPass2();
@@ -1245,7 +1185,7 @@ void DebugStackTrace::DebugStackTraceElement::InitPass2()
         bRes = g_pDebugInterface->GetILOffsetFromNative(
             pFunc,
             (LPCBYTE)this->ip,
-            fAdjustOffset ? this->dwOffset - 1 : this->dwOffset,
+            fAdjustOffset ? this->dwOffset - STACKWALK_CONTROLPC_ADJUST_OFFSET : this->dwOffset,
             &this->dwILOffset);
     }
 
diff --git a/src/coreclr/vm/dispatchinfo.cpp b/src/coreclr/vm/dispatchinfo.cpp
index eb0c83f7a6ce..8b769c71bcc0 100644
--- a/src/coreclr/vm/dispatchinfo.cpp
+++ b/src/coreclr/vm/dispatchinfo.cpp
@@ -2578,10 +2578,9 @@ bool DispatchInfo::IsPropertyAccessorVisible(bool fIsSetter, OBJECTREF* pMemberI
 
         // Check to see if the new method is a property accessor.
         mdToken tkMember = mdTokenNil;
-        MethodTable *pDeclaringMT = pMDForProperty->GetMethodTable();
-        if (pMDForProperty->GetModule()->GetPropertyInfoForMethodDef(pMDForProperty->GetMemberDef(), &tkMember, NULL, NULL) == S_OK)
+        if (pMDForProperty->GetMDImport()->GetPropertyInfoForMethodDef(pMDForProperty->GetMemberDef(), &tkMember, NULL, NULL) == S_OK)
         {
-            if (IsMemberVisibleFromCom(pDeclaringMT, tkMember, pMDForProperty->GetMemberDef()))
+            if (IsMemberVisibleFromCom(pMDForProperty->GetMethodTable(), tkMember, pMDForProperty->GetMemberDef()))
                 return true;
         }
     }
diff --git a/src/coreclr/vm/dllimport.cpp b/src/coreclr/vm/dllimport.cpp
index f3f1c29d5326..208db4b2ea2d 100644
--- a/src/coreclr/vm/dllimport.cpp
+++ b/src/coreclr/vm/dllimport.cpp
@@ -113,7 +113,7 @@ StubSigDesc::StubSigDesc(MethodDesc *pMD)
     INDEBUG(InitDebugNames());
 }
 
-StubSigDesc::StubSigDesc(MethodDesc* pMD, const Signature& sig, Module* pModule)
+StubSigDesc::StubSigDesc(MethodDesc* pMD, const Signature& sig, Module* pModule, Module* pLoaderModule)
 {
     CONTRACTL
     {
@@ -135,13 +135,13 @@ StubSigDesc::StubSigDesc(MethodDesc* pMD, const Signature& sig, Module* pModule)
         m_tkMethodDef = pMD->GetMemberDef();
         SigTypeContext::InitTypeContext(pMD, &m_typeContext);
         m_pMetadataModule = pMD->GetModule();
-        m_pLoaderModule = pMD->GetLoaderModule();   // Used for ILStubCache selection and MethodTable creation.
+        m_pLoaderModule = pLoaderModule == NULL ? pMD->GetLoaderModule() : pLoaderModule;   // Used for ILStubCache selection and MethodTable creation.
     }
     else
     {
         m_tkMethodDef = mdMethodDefNil;
         m_pMetadataModule = m_pModule;
-        m_pLoaderModule = m_pModule;
+        m_pLoaderModule = pLoaderModule == NULL ? m_pModule : pLoaderModule;
     }
 
     INDEBUG(InitDebugNames());
@@ -545,6 +545,13 @@ class ILStubState : public StubState
                 pcsExceptionHandler->EmitINITOBJ(m_slIL.GetDispatchCodeStream()->GetToken(returnTypeHnd));
             }
             break;
+        case ELEMENT_TYPE_PTR:
+            pcsExceptionHandler->EmitPOP();
+            pcsExceptionHandler->EmitLDC(0);
+            pcsExceptionHandler->EmitCONV_U();
+            _ASSERTE(retvalLocalNum != (DWORD)-1);
+            pcsExceptionHandler->EmitSTLOC(retvalLocalNum);
+            break;
         case ELEMENT_TYPE_BOOLEAN:
         case ELEMENT_TYPE_CHAR:
         case ELEMENT_TYPE_I1:
@@ -1623,6 +1630,10 @@ NDirectStubLinker::NDirectStubLinker(
         m_pcsSetup->EmitSTLOC(m_dwTargetInterfacePointerLocalNum);
     }
 #endif // FEATURE_COMINTEROP
+
+#if defined(TARGET_X86) && defined(FEATURE_IJW)
+    m_dwCopyCtorChainLocalNum = (DWORD)-1;
+#endif // defined(TARGET_X86) && defined(FEATURE_IJW)
 }
 
 void NDirectStubLinker::SetCallingConvention(CorInfoCallConvExtension unmngCallConv, BOOL fIsVarArg)
@@ -1835,6 +1846,23 @@ DWORD NDirectStubLinker::GetReturnValueLocalNum()
     return m_dwRetValLocalNum;
 }
 
+#if defined(TARGET_X86) && defined(FEATURE_IJW)
+DWORD NDirectStubLinker::GetCopyCtorChainLocalNum()
+{
+    STANDARD_VM_CONTRACT;
+
+    if (m_dwCopyCtorChainLocalNum == (DWORD)-1)
+    {
+        // The local is created and initialized lazily when first asked.
+        m_dwCopyCtorChainLocalNum = NewLocal(CoreLibBinder::GetClass(CLASS__COPY_CONSTRUCTOR_CHAIN));
+        m_pcsSetup->EmitLDLOCA(m_dwCopyCtorChainLocalNum);
+        m_pcsSetup->EmitINITOBJ(m_pcsSetup->GetToken(CoreLibBinder::GetClass(CLASS__COPY_CONSTRUCTOR_CHAIN)));
+    }
+
+    return m_dwCopyCtorChainLocalNum;
+}
+#endif // defined(TARGET_X86) && defined(FEATURE_IJW)
+
 BOOL NDirectStubLinker::IsCleanupNeeded()
 {
     LIMITED_METHOD_CONTRACT;
@@ -2064,6 +2092,10 @@ void NDirectStubLinker::End(DWORD dwStubFlags)
     }
 }
 
+#if defined(TARGET_X86) && defined(TARGET_WINDOWS)
+EXTERN_C void STDCALL CopyConstructorCallStub(void);
+#endif // defined(TARGET_X86) && defined(TARGET_WINDOWS)
+
 void NDirectStubLinker::DoNDirect(ILCodeStream *pcsEmit, DWORD dwStubFlags, MethodDesc * pStubMD)
 {
     STANDARD_VM_CONTRACT;
@@ -2147,6 +2179,21 @@ void NDirectStubLinker::DoNDirect(ILCodeStream *pcsEmit, DWORD dwStubFlags, Meth
         }
     }
 
+#if defined(TARGET_X86) && defined(FEATURE_IJW)
+    if (m_dwCopyCtorChainLocalNum != (DWORD)-1)
+    {
+        // If we have a copy constructor chain local, we need to call the copy constructor stub
+        // to ensure that the chain is called correctly.
+        // Let's install the stub chain here and redirect the call to the stub.
+        DWORD targetLoc = NewLocal(ELEMENT_TYPE_I);
+        pcsEmit->EmitSTLOC(targetLoc);
+        pcsEmit->EmitLDLOCA(m_dwCopyCtorChainLocalNum);
+        pcsEmit->EmitLDLOC(targetLoc);
+        pcsEmit->EmitCALL(METHOD__COPY_CONSTRUCTOR_CHAIN__INSTALL, 2, 0);
+        pcsEmit->EmitLDC((DWORD_PTR)&CopyConstructorCallStub);
+    }
+#endif // defined(TARGET_X86) && defined(FEATURE_IJW)
+
     // For managed-to-native calls, the rest of the work is done by the JIT. It will
     // erect InlinedCallFrame, flip GC mode, and use the specified calling convention
     // to call the target. For native-to-managed calls, this is an ordinary managed
@@ -2820,6 +2867,7 @@ static LPBYTE FollowIndirect(LPBYTE pTarget)
 }
 #endif // !TARGET_UNIX
 
+#ifdef FEATURE_IJW
 BOOL HeuristicDoesThisLookLikeAGetLastErrorCall(LPBYTE pTarget)
 {
     CONTRACTL
@@ -2830,7 +2878,6 @@ BOOL HeuristicDoesThisLookLikeAGetLastErrorCall(LPBYTE pTarget)
     }
     CONTRACTL_END;
 
-#if !defined(TARGET_UNIX)
     static LPBYTE pGetLastError = NULL;
     if (!pGetLastError)
     {
@@ -2865,18 +2912,10 @@ BOOL HeuristicDoesThisLookLikeAGetLastErrorCall(LPBYTE pTarget)
         // jmp [xxxx] - could be an import thunk
         return pTarget2 == pGetLastError;
     }
-#endif // !TARGET_UNIX
 
     return FALSE;
 }
-
-DWORD STDMETHODCALLTYPE FalseGetLastError()
-{
-    WRAPPER_NO_CONTRACT;
-
-    return GetThread()->m_dwLastError;
-}
-
+#endif // FEATURE_IJW
 
 CorInfoCallConvExtension GetDefaultCallConv(BOOL bIsVarArg)
 {
@@ -3173,6 +3212,7 @@ BOOL NDirect::MarshalingRequired(
     _In_opt_ MethodDesc* pMD,
     _In_opt_ PCCOR_SIGNATURE pSig,
     _In_opt_ Module* pModule,
+    _In_opt_ SigTypeContext* pTypeContext,
     _In_ bool unmanagedCallersOnlyRequiresMarshalling)
 {
     CONTRACTL
@@ -3253,8 +3293,6 @@ BOOL NDirect::MarshalingRequired(
     mdParamDef *pParamTokenArray = (mdParamDef *)_alloca(numArgs * sizeof(mdParamDef));
     IMDInternalImport *pMDImport = pModule->GetMDImport();
 
-    SigTypeContext emptyTypeContext;
-
     mdMethodDef methodToken = mdMethodDefNil;
     if (pMD != NULL)
     {
@@ -3314,7 +3352,7 @@ BOOL NDirect::MarshalingRequired(
             case ELEMENT_TYPE_VALUETYPE:
             case ELEMENT_TYPE_GENERICINST:
             {
-                TypeHandle hndArgType = arg.GetTypeHandleThrowing(pModule, &emptyTypeContext);
+                TypeHandle hndArgType = arg.GetTypeHandleThrowing(pModule, pTypeContext);
                 bool isValidGeneric = IsValidForGenericMarshalling(hndArgType.GetMethodTable(), false, runtimeMarshallingEnabled);
                 if(!hndArgType.IsValueType() ||  !isValidGeneric)
                     return true;
@@ -4185,8 +4223,10 @@ namespace
                                         pHashParams,
                                         pParams->m_dwStubFlags,
                                         pParams->m_pModule,
+                                        pParams->m_pLoaderModule,
                                         pParams->m_sig.GetRawSig(),
                                         pParams->m_sig.GetRawSigLen(),
+                                        pParams->m_pTypeContext,
                                         pamTracker,
                                         bILStubCreator,
                                         pLastMD);
@@ -4256,8 +4296,7 @@ static void CreateNDirectStubAccessMetadata(
     {
         if (unmgdCallConv == CorInfoCallConvExtension::Managed ||
             unmgdCallConv == CorInfoCallConvExtension::Fastcall ||
-            unmgdCallConv == CorInfoCallConvExtension::FastcallMemberFunction ||
-            unmgdCallConv == CorInfoCallConvExtension::Swift)
+            unmgdCallConv == CorInfoCallConvExtension::FastcallMemberFunction)
         {
             COMPlusThrow(kTypeLoadException, IDS_INVALID_PINVOKE_CALLCONV);
         }
@@ -5034,6 +5073,21 @@ namespace
                                 }
                                 else
                                 {
+                                    if (!pSigDesc->m_typeContext.IsEmpty())
+                                    {
+                                        // For generic calli, we only support blittable types
+                                        if (SF_IsCALLIStub(dwStubFlags)
+                                            && NDirect::MarshalingRequired(NULL, pStubMD->GetSig(), pSigDesc->m_pModule, &pSigDesc->m_typeContext))
+                                        {
+                                            COMPlusThrow(kMarshalDirectiveException, IDS_EE_BADMARSHAL_GENERICS_RESTRICTION);
+                                        }
+                                        // We don't want to support generic varargs, so block it
+                                        else if (SF_IsVarArgStub(dwStubFlags))
+                                        {
+                                            COMPlusThrow(kNotSupportedException, BFA_GENCODE_NOT_BE_VARARG);
+                                        }
+                                    }
+
                                     CreateNDirectStubWorker(pss,
                                                             pSigDesc,
                                                             nlType,
@@ -6020,7 +6074,7 @@ PCODE GetILStubForCalli(VASigCookie *pVASigCookie, MethodDesc *pMD)
             }
         }
 
-        LoaderHeap *pHeap = pVASigCookie->pModule->GetLoaderAllocator()->GetHighFrequencyHeap();
+        LoaderHeap *pHeap = pVASigCookie->pLoaderModule->GetLoaderAllocator()->GetHighFrequencyHeap();
         PCOR_SIGNATURE new_sig = (PCOR_SIGNATURE)(void *)pHeap->AllocMem(S_SIZE_T(signature.GetRawSigLen()));
         CopyMemory(new_sig, signature.GetRawSig(), signature.GetRawSigLen());
 
@@ -6058,7 +6112,8 @@ PCODE GetILStubForCalli(VASigCookie *pVASigCookie, MethodDesc *pMD)
         nlType  = nltAnsi;
     }
 
-    StubSigDesc sigDesc(pMD, signature, pVASigCookie->pModule);
+    StubSigDesc sigDesc(pMD, signature, pVASigCookie->pModule, pVASigCookie->pLoaderModule);
+    sigDesc.InitTypeContext(pVASigCookie->classInst, pVASigCookie->methodInst);
 
     MethodDesc* pStubMD = NDirect::CreateCLRToNativeILStub(&sigDesc,
                                     nlType,
@@ -6078,5 +6133,21 @@ PCODE GetILStubForCalli(VASigCookie *pVASigCookie, MethodDesc *pMD)
     RETURN pVASigCookie->pNDirectILStub;
 }
 
+#if defined(TARGET_X86) && defined(FEATURE_IJW)
+// Copy constructor support for C++/CLI
+EXTERN_C void* STDCALL CallCopyConstructorsWorker(void* esp)
+{
+    STATIC_CONTRACT_THROWS;
+    STATIC_CONTRACT_GC_TRIGGERS;
+    STATIC_CONTRACT_MODE_PREEMPTIVE; // we've already switched to preemptive
+
+    using ExecuteCallback = void*(STDMETHODCALLTYPE*)(void*);
+
+    MethodDesc* pMD = CoreLibBinder::GetMethod(METHOD__COPY_CONSTRUCTOR_CHAIN__EXECUTE_CURRENT_COPIES_AND_GET_TARGET);
+    ExecuteCallback pExecute = (ExecuteCallback)pMD->GetMultiCallableAddrOfCode();
+
+    return pExecute(esp);
+}
+#endif // defined(TARGET_X86) && defined(FEATURE_IJW)
 
 #endif // #ifndef DACCESS_COMPILE
diff --git a/src/coreclr/vm/dllimport.h b/src/coreclr/vm/dllimport.h
index 256b95079933..9a7b1575bff0 100644
--- a/src/coreclr/vm/dllimport.h
+++ b/src/coreclr/vm/dllimport.h
@@ -16,9 +16,9 @@ struct StubSigDesc
 {
 public:
     StubSigDesc(MethodDesc* pMD);
-    StubSigDesc(MethodDesc*  pMD, const Signature& sig, Module* m_pModule);
-    StubSigDesc(MethodTable* pMT, const Signature& sig, Module* m_pModule);
-    StubSigDesc(const Signature& sig, Module* m_pModule);
+    StubSigDesc(MethodDesc*  pMD, const Signature& sig, Module* pModule, Module* pLoaderModule = NULL);
+    StubSigDesc(MethodTable* pMT, const Signature& sig, Module* pModule);
+    StubSigDesc(const Signature& sig, Module* pModule);
 
     MethodDesc        *m_pMD;
     MethodTable       *m_pMT;
@@ -56,6 +56,17 @@ struct StubSigDesc
         }
     }
 #endif // _DEBUG
+
+#ifndef DACCESS_COMPILE
+    void InitTypeContext(Instantiation classInst, Instantiation methodInst)
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(m_typeContext.IsEmpty());
+
+        m_typeContext = SigTypeContext(classInst, methodInst);
+    }
+#endif
 };
 
 //=======================================================================
@@ -92,6 +103,7 @@ class NDirect
         _In_opt_ MethodDesc* pMD,
         _In_opt_ PCCOR_SIGNATURE pSig = NULL,
         _In_opt_ Module* pModule = NULL,
+        _In_opt_ SigTypeContext* pTypeContext = NULL,
         _In_ bool unmanagedCallersOnlyRequiresMarshalling = true);
 
     static void PopulateNDirectMethodDesc(_Inout_ NDirectMethodDesc* pNMD);
@@ -484,6 +496,9 @@ class NDirectStubLinker : public ILStubLinker
     DWORD   GetCleanupWorkListLocalNum();
     DWORD   GetThreadLocalNum();
     DWORD   GetReturnValueLocalNum();
+#if defined(TARGET_X86) && defined(FEATURE_IJW)
+    DWORD   GetCopyCtorChainLocalNum();
+#endif // defined(TARGET_X86) && defined(FEATURE_IJW)
     void    SetCleanupNeeded();
     void    SetExceptionCleanupNeeded();
     BOOL    IsCleanupWorkListSetup();
@@ -553,6 +568,10 @@ class NDirectStubLinker : public ILStubLinker
     DWORD               m_dwTargetEntryPointLocalNum;
 #endif // FEATURE_COMINTEROP
 
+#if defined(TARGET_X86) && defined(FEATURE_IJW)
+    DWORD               m_dwCopyCtorChainLocalNum;
+#endif // defined(TARGET_X86) && defined(FEATURE_IJW)
+
     BOOL                m_fHasCleanupCode;
     BOOL                m_fHasExceptionCleanupCode;
     BOOL                m_fCleanupWorkListIsSetup;
diff --git a/src/coreclr/vm/dllimportcallback.h b/src/coreclr/vm/dllimportcallback.h
index fb2214a8c18d..ac2f2e93cdfd 100644
--- a/src/coreclr/vm/dllimportcallback.h
+++ b/src/coreclr/vm/dllimportcallback.h
@@ -185,7 +185,7 @@ class UMEntryThunk
         uMThunkMarshInfoWriterHolder.GetRW()->RunTimeInit();
 
         // Ensure that we have either the managed target or the delegate.
-        if (m_pObjectHandle == NULL && m_pManagedTarget == NULL)
+        if (m_pObjectHandle == NULL && m_pManagedTarget == (TADDR)0)
             m_pManagedTarget = m_pMD->GetMultiCallableAddrOfCode();
 
         m_code.Encode(&pUMEntryThunkRX->m_code, (BYTE*)m_pUMThunkMarshInfo->GetExecStubEntryPoint(), pUMEntryThunkRX);
@@ -223,7 +223,7 @@ class UMEntryThunk
         }
         else
         {
-            if (m_pManagedTarget != NULL)
+            if (m_pManagedTarget != (TADDR)0)
             {
                 RETURN m_pManagedTarget;
             }
diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp
index bd5bebcce50f..beeb0cd64b2a 100644
--- a/src/coreclr/vm/dynamicmethod.cpp
+++ b/src/coreclr/vm/dynamicmethod.cpp
@@ -515,7 +515,7 @@ HostCodeHeap::TrackAllocation* HostCodeHeap::AllocFromFreeList(size_t header, si
 
                 // The space left is not big enough for a new block, let's just
                 // update the TrackAllocation record for the current block
-                if (pCurrent->size - realSize < max(HOST_CODEHEAP_SIZE_ALIGN, sizeof(TrackAllocation)))
+                if (pCurrent->size - realSize < max<size_t>(HOST_CODEHEAP_SIZE_ALIGN, sizeof(TrackAllocation)))
                 {
                     LOG((LF_BCL, LL_INFO100, "Level2 - CodeHeap [0x%p] - Item removed %p, size 0x%X\n", this, pCurrent, pCurrent->size));
                     // remove current
@@ -1325,7 +1325,7 @@ void LCGMethodResolver::AddToUsedIndCellList(BYTE * indcell)
 
 }
 
-void LCGMethodResolver::ResolveToken(mdToken token, TypeHandle * pTH, MethodDesc ** ppMD, FieldDesc ** ppFD)
+void LCGMethodResolver::ResolveToken(mdToken token, ResolvedToken* resolvedToken)
 {
     STANDARD_VM_CONTRACT;
 
@@ -1335,24 +1335,35 @@ void LCGMethodResolver::ResolveToken(mdToken token, TypeHandle * pTH, MethodDesc
 
     DECLARE_ARGHOLDER_ARRAY(args, 5);
 
+    TypeHandle handle;
+    MethodDesc* pMD = NULL;
+    FieldDesc* pFD = NULL;
     args[ARGNUM_0] = OBJECTREF_TO_ARGHOLDER(ObjectFromHandle(m_managedResolver));
     args[ARGNUM_1] = DWORD_TO_ARGHOLDER(token);
-    args[ARGNUM_2] = pTH;
-    args[ARGNUM_3] = ppMD;
-    args[ARGNUM_4] = ppFD;
+    args[ARGNUM_2] = &handle;
+    args[ARGNUM_3] = &pMD;
+    args[ARGNUM_4] = &pFD;
 
     CALL_MANAGED_METHOD_NORET(args);
 
-    _ASSERTE(*ppMD == NULL || *ppFD == NULL);
+    _ASSERTE(pMD == NULL || pFD == NULL);
 
-    if (pTH->IsNull())
+    if (handle.IsNull())
     {
-        if (*ppMD != NULL) *pTH = (*ppMD)->GetMethodTable();
-        else
-        if (*ppFD != NULL) *pTH = (*ppFD)->GetEnclosingMethodTable();
+        if (pMD != NULL)
+        {
+            handle = pMD->GetMethodTable();
+        }
+        else if (pFD != NULL)
+        {
+            handle = pFD->GetEnclosingMethodTable();
+        }
     }
 
-    _ASSERTE(!pTH->IsNull());
+    _ASSERTE(!handle.IsNull());
+    resolvedToken->TypeHandle = handle;
+    resolvedToken->Method = pMD;
+    resolvedToken->Field = pFD;
 }
 
 //---------------------------------------------------------------------------------------
diff --git a/src/coreclr/vm/dynamicmethod.h b/src/coreclr/vm/dynamicmethod.h
index ddbe3c795cfe..a26a24100611 100644
--- a/src/coreclr/vm/dynamicmethod.h
+++ b/src/coreclr/vm/dynamicmethod.h
@@ -37,6 +37,15 @@ class ChunkAllocator
     void Delete();
 };
 
+struct ResolvedToken final
+{
+    TypeHandle TypeHandle;
+    SigPointer TypeSignature;
+    SigPointer MethodSignature;
+    MethodDesc* Method;
+    FieldDesc* Field;
+};
+
 //---------------------------------------------------------------------------------------
 //
 class DynamicResolver
@@ -90,7 +99,7 @@ class DynamicResolver
     virtual OBJECTHANDLE ConstructStringLiteral(mdToken metaTok) = 0;
     virtual BOOL IsValidStringRef(mdToken metaTok) = 0;
     virtual STRINGREF GetStringLiteral(mdToken metaTok) = 0;
-    virtual void ResolveToken(mdToken token, TypeHandle * pTH, MethodDesc ** ppMD, FieldDesc ** ppFD) = 0;
+    virtual void ResolveToken(mdToken token, ResolvedToken* resolvedToken) = 0;
     virtual SigPointer ResolveSignature(mdToken token) = 0;
     virtual SigPointer ResolveSignatureForVarArg(mdToken token) = 0;
     virtual void GetEHInfo(unsigned EHnumber, CORINFO_EH_CLAUSE* clause) = 0;
@@ -141,7 +150,7 @@ class LCGMethodResolver : public DynamicResolver
 
     OBJECTHANDLE ConstructStringLiteral(mdToken metaTok);
     BOOL IsValidStringRef(mdToken metaTok);
-    void ResolveToken(mdToken token, TypeHandle * pTH, MethodDesc ** ppMD, FieldDesc ** ppFD);
+    void ResolveToken(mdToken token, ResolvedToken* resolvedToken);
     SigPointer ResolveSignature(mdToken token);
     SigPointer ResolveSignatureForVarArg(mdToken token);
     void GetEHInfo(unsigned EHnumber, CORINFO_EH_CLAUSE* clause);
diff --git a/src/coreclr/vm/ecall.cpp b/src/coreclr/vm/ecall.cpp
index 37ac50d124f6..35a5d36eae4d 100644
--- a/src/coreclr/vm/ecall.cpp
+++ b/src/coreclr/vm/ecall.cpp
@@ -96,7 +96,7 @@ void ECall::PopulateManagedStringConstructors()
     INDEBUG(fInitialized = true);
 }
 
-void ECall::PopulateManagedCastHelpers()
+void ECall::PopulateManagedHelpers()
 {
 
     STANDARD_VM_CONTRACT;
@@ -144,6 +144,26 @@ void ECall::PopulateManagedCastHelpers()
     pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__CASTHELPERS__LDELEMAREF));
     pDest = pMD->GetMultiCallableAddrOfCode();
     SetJitHelperFunction(CORINFO_HELP_LDELEMA_REF, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__SPAN_HELPERS__MEMSET));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_MEMSET, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__SPAN_HELPERS__MEMZERO));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_MEMZERO, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__SPAN_HELPERS__MEMCOPY));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_MEMCPY, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATH__ROUND));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_DBLROUND, pDest);
+
+    pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__MATHF__ROUND));
+    pDest = pMD->GetMultiCallableAddrOfCode();
+    SetJitHelperFunction(CORINFO_HELP_FLTROUND, pDest);
 }
 
 static CrstStatic gFCallLock;
@@ -596,7 +616,6 @@ MethodDesc* ECall::MapTargetBackToMethod(PCODE pTarg, PCODE * ppAdjustedEntryPoi
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     }
     CONTRACTL_END;
diff --git a/src/coreclr/vm/ecall.h b/src/coreclr/vm/ecall.h
index bc9d63ae4671..792eea633e8f 100644
--- a/src/coreclr/vm/ecall.h
+++ b/src/coreclr/vm/ecall.h
@@ -94,7 +94,7 @@ class ECall
 
         static void PopulateManagedStringConstructors();
 
-        static void PopulateManagedCastHelpers();
+        static void PopulateManagedHelpers();
 
 #ifdef DACCESS_COMPILE
         // Enumerates all gFCallMethods for minidumps.
diff --git a/src/coreclr/vm/ecalllist.h b/src/coreclr/vm/ecalllist.h
index 1ff1218e4294..fa4f8cb48568 100644
--- a/src/coreclr/vm/ecalllist.h
+++ b/src/coreclr/vm/ecalllist.h
@@ -76,11 +76,6 @@ FCFuncStart(gStringFuncs)
     FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_PtrSByt_Int_Int_Encoding_RetVoid, ECall::CtorSBytePtrStartLengthEncodingManaged)
 FCFuncEnd()
 
-FCFuncStart(gValueTypeFuncs)
-    FCFuncElement("CanCompareBits", ValueTypeHelper::CanCompareBits)
-    FCFuncElement("GetHashCode", ValueTypeHelper::GetHashCode)
-FCFuncEnd()
-
 FCFuncStart(gDiagnosticsDebugger)
     FCFuncElement("BreakInternal", DebugDebugger::Break)
     FCFuncElement("get_IsAttached", DebugDebugger::IsDebuggerAttached)
@@ -98,10 +93,6 @@ FCFuncStart(gEnvironmentFuncs)
     FCFuncElement("get_TickCount64", SystemNative::GetTickCount64)
     FCFuncElement("set_ExitCode", SystemNative::SetExitCode)
     FCFuncElement("get_ExitCode", SystemNative::GetExitCode)
-
-    FCFuncElementSig("FailFast", &gsig_SM_Str_RetVoid, SystemNative::FailFast)
-    FCFuncElementSig("FailFast", &gsig_SM_Str_Exception_RetVoid, SystemNative::FailFastWithException)
-    FCFuncElementSig("FailFast", &gsig_SM_Str_Exception_Str_RetVoid, SystemNative::FailFastWithExceptionAndSource)
 FCFuncEnd()
 
 FCFuncStart(gExceptionFuncs)
@@ -205,7 +196,6 @@ FCFuncStart(gSignatureNative)
 FCFuncEnd()
 
 FCFuncStart(gRuntimeMethodHandle)
-    FCFuncElement("_GetCurrentMethod", RuntimeMethodHandle::GetCurrentMethod)
     FCFuncElement("InvokeMethod", RuntimeMethodHandle::InvokeMethod)
     FCFuncElement("ReboxFromNullable", RuntimeMethodHandle::ReboxFromNullable)
     FCFuncElement("ReboxToNullable", RuntimeMethodHandle::ReboxToNullable)
@@ -242,6 +232,9 @@ FCFuncStart(gCOMFieldHandleNewFuncs)
     FCFuncElement("GetStaticFieldForGenericType", RuntimeFieldHandle::GetStaticFieldForGenericType)
     FCFuncElement("AcquiresContextFromThis", RuntimeFieldHandle::AcquiresContextFromThis)
     FCFuncElement("GetLoaderAllocator", RuntimeFieldHandle::GetLoaderAllocator)
+    FCFuncElement("IsFastPathSupported", RuntimeFieldHandle::IsFastPathSupported)
+    FCFuncElement("GetInstanceFieldOffset", RuntimeFieldHandle::GetInstanceFieldOffset)
+    FCFuncElement("GetStaticFieldAddress", RuntimeFieldHandle::GetStaticFieldAddress)
 FCFuncEnd()
 
 FCFuncStart(gCOMModuleHandleFuncs)
@@ -251,16 +244,6 @@ FCFuncStart(gCOMModuleHandleFuncs)
     FCFuncElement("GetMDStreamVersion", ModuleHandle::GetMDStreamVersion)
 FCFuncEnd()
 
-FCFuncStart(gCOMCustomAttributeFuncs)
-    FCFuncElement("_ParseAttributeUsageAttribute", COMCustomAttribute::ParseAttributeUsageAttribute)
-    FCFuncElement("_CreateCaObject", COMCustomAttribute::CreateCaObject)
-    FCFuncElement("_GetPropertyOrFieldData",  COMCustomAttribute::GetPropertyOrFieldData)
-FCFuncEnd()
-
-FCFuncStart(gCompatibilitySwitchFuncs)
-    FCFuncElement("GetValueInternal", CompatibilitySwitch::GetValue)
-FCFuncEnd()
-
 FCFuncStart(gRuntimeAssemblyFuncs)
     FCFuncElement("FCallIsDynamic", AssemblyNative::IsDynamic)
     FCFuncElement("GetManifestModule", AssemblyHandle::GetManifestModule)
@@ -295,7 +278,6 @@ FCFuncStart(gMathFuncs)
     FCFuncElement("Cosh", COMDouble::Cosh)
     FCFuncElement("Exp", COMDouble::Exp)
     FCFuncElement("Floor", COMDouble::Floor)
-    FCFuncElement("FMod", COMDouble::FMod)
     FCFuncElement("FusedMultiplyAdd", COMDouble::FusedMultiplyAdd)
     FCFuncElement("Log", COMDouble::Log)
     FCFuncElement("Log2", COMDouble::Log2)
@@ -324,7 +306,6 @@ FCFuncStart(gMathFFuncs)
     FCFuncElement("Cosh", COMSingle::Cosh)
     FCFuncElement("Exp", COMSingle::Exp)
     FCFuncElement("Floor", COMSingle::Floor)
-    FCFuncElement("FMod", COMSingle::FMod)
     FCFuncElement("FusedMultiplyAdd", COMSingle::FusedMultiplyAdd)
     FCFuncElement("Log", COMSingle::Log)
     FCFuncElement("Log2", COMSingle::Log2)
@@ -439,13 +420,9 @@ FCFuncStart(gInteropMarshalFuncs)
 FCFuncEnd()
 
 FCFuncStart(gInterlockedFuncs)
-    FCFuncElement("Exchange8", COMInterlocked::Exchange8)
-    FCFuncElement("Exchange16", COMInterlocked::Exchange16)
     FCFuncElement("Exchange32", COMInterlocked::Exchange32)
     FCFuncElement("Exchange64", COMInterlocked::Exchange64)
     FCFuncElement("ExchangeObject", COMInterlocked::ExchangeObject)
-    FCFuncElement("CompareExchange8", COMInterlocked::CompareExchange8)
-    FCFuncElement("CompareExchange16", COMInterlocked::CompareExchange16)
     FCFuncElement("CompareExchange32", COMInterlocked::CompareExchange32)
     FCFuncElement("CompareExchange64", COMInterlocked::CompareExchange64)
     FCFuncElement("CompareExchangeObject", COMInterlocked::CompareExchangeObject)
@@ -473,8 +450,7 @@ FCFuncStart(gRuntimeHelpers)
     FCFuncElement("PrepareDelegate", ReflectionInvocation::PrepareDelegate)
     FCFuncElement("GetHashCode", ObjectNative::GetHashCode)
     FCFuncElement("TryGetHashCode", ObjectNative::TryGetHashCode)
-    FCFuncElement("Equals", ObjectNative::Equals)
-    FCFuncElement("AllocateUninitializedClone", ObjectNative::AllocateUninitializedClone)
+    FCFuncElement("ContentEquals", ObjectNative::ContentEquals)
     FCFuncElement("EnsureSufficientExecutionStack", ReflectionInvocation::EnsureSufficientExecutionStack)
     FCFuncElement("TryEnsureSufficientExecutionStack", ReflectionInvocation::TryEnsureSufficientExecutionStack)
     FCFuncElement("AllocTailCallArgBuffer", TailCallHelp::AllocTailCallArgBuffer)
@@ -572,8 +548,6 @@ FCClassElement("AssemblyLoadContext", "System.Runtime.Loader", gAssemblyLoadCont
 FCClassElement("Buffer", "System", gBufferFuncs)
 FCClassElement("CastHelpers", "System.Runtime.CompilerServices", gCastHelpers)
 FCClassElement("ComAwareWeakReference", "System", gComAwareWeakReferenceFuncs)
-FCClassElement("CompatibilitySwitch", "System.Runtime.Versioning", gCompatibilitySwitchFuncs)
-FCClassElement("CustomAttribute", "System.Reflection", gCOMCustomAttributeFuncs)
 FCClassElement("Debugger", "System.Diagnostics", gDiagnosticsDebugger)
 FCClassElement("Delegate", "System", gDelegateFuncs)
 FCClassElement("DependentHandle", "System.Runtime", gDependentHandleFuncs)
@@ -616,7 +590,6 @@ FCClassElement("Thread", "System.Threading", gThreadFuncs)
 FCClassElement("ThreadPool", "System.Threading", gThreadPoolFuncs)
 FCClassElement("Type", "System", gSystem_Type)
 FCClassElement("TypedReference", "System", gTypedReferenceFuncs)
-FCClassElement("ValueType", "System", gValueTypeFuncs)
 #ifdef FEATURE_COMINTEROP
 FCClassElement("Variant", "System", gVariantFuncs)
 #endif
diff --git a/src/coreclr/vm/eeconfig.cpp b/src/coreclr/vm/eeconfig.cpp
index bcbe20a72397..29017c18d3c7 100644
--- a/src/coreclr/vm/eeconfig.cpp
+++ b/src/coreclr/vm/eeconfig.cpp
@@ -113,10 +113,8 @@ HRESULT EEConfig::Init()
     fJitEnableOptionalRelocs = false;
     fPInvokeRestoreEsp = (DWORD)-1;
 
-    fNgenBindOptimizeNonGac = false;
     fStressLog = false;
     fForceEnc = false;
-    fProbeForStackOverflow = true;
 
     INDEBUG(fStressLog = true;)
 
@@ -179,12 +177,6 @@ HRESULT EEConfig::Init()
     DoubleArrayToLargeObjectHeapThreshold = 1000;
 #endif
 
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
-    dwDisableStackwalkCache = 0;
-#else // TARGET_X86
-    dwDisableStackwalkCache = 1;
-#endif // TARGET_X86
-
 #ifdef _DEBUG
     // interop logging
     m_TraceWrapper = 0;
@@ -482,9 +474,6 @@ HRESULT EEConfig::sync()
     DoubleArrayToLargeObjectHeapThreshold = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_DoubleArrayToLargeObjectHeap, DoubleArrayToLargeObjectHeapThreshold);
 #endif
 
-    dwDisableStackwalkCache = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_DisableStackwalkCache, dwDisableStackwalkCache);
-
-
 #ifdef _DEBUG
     IfFailRet (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_BreakOnClassLoad, (LPWSTR*) &pszBreakOnClassLoad));
     pszBreakOnClassLoad = NarrowWideChar((LPWSTR)pszBreakOnClassLoad);
diff --git a/src/coreclr/vm/eeconfig.h b/src/coreclr/vm/eeconfig.h
index ea28adf7efed..54af05d20b16 100644
--- a/src/coreclr/vm/eeconfig.h
+++ b/src/coreclr/vm/eeconfig.h
@@ -309,12 +309,6 @@ class EEConfig
     unsigned int  GetDoubleArrayToLargeObjectHeapThreshold() const { LIMITED_METHOD_CONTRACT; return DoubleArrayToLargeObjectHeapThreshold; }
 #endif
 
-    inline bool ProbeForStackOverflow() const
-    {
-        LIMITED_METHOD_CONTRACT;
-        return fProbeForStackOverflow;
-    }
-
 #ifdef TEST_DATA_CONSISTENCY
     // get the value of fTestDataConsistency, which controls whether we test that we can correctly detect
     // held locks in DAC builds. This is determined by an environment variable.
@@ -422,10 +416,6 @@ class EEConfig
     // Loader
     bool    ExcludeReadyToRun(LPCUTF8 assemblyName) const;
 
-    bool    NgenBindOptimizeNonGac()        const { LIMITED_METHOD_CONTRACT; return fNgenBindOptimizeNonGac; }
-
-    DWORD   DisableStackwalkCache()         const {LIMITED_METHOD_CONTRACT;  return dwDisableStackwalkCache; }
-
     bool    StressLog()                     const { LIMITED_METHOD_CONTRACT; return fStressLog; }
     bool    ForceEnc()                      const { LIMITED_METHOD_CONTRACT; return fForceEnc; }
     bool    DebugAssembliesModifiable()     const { LIMITED_METHOD_CONTRACT; return fDebugAssembliesModifiable; }
@@ -594,15 +584,9 @@ class EEConfig
     // Assemblies which cannot use Ready to Run images.
     AssemblyNamesList * pReadyToRunExcludeList;
 
-    bool fNgenBindOptimizeNonGac;
-
     bool fStressLog;
     bool fForceEnc;
     bool fDebugAssembliesModifiable;
-    bool fProbeForStackOverflow;
-
-    // Stackwalk optimization flag
-    DWORD dwDisableStackwalkCache;
 
 #ifdef _DEBUG
     // interop logging
diff --git a/src/coreclr/vm/eecontract.cpp b/src/coreclr/vm/eecontract.cpp
index 6a8ca22eba48..87d30daf7c46 100644
--- a/src/coreclr/vm/eecontract.cpp
+++ b/src/coreclr/vm/eecontract.cpp
@@ -159,44 +159,5 @@ void EEContract::DoChecks(UINT testmask, _In_z_ const char *szFunction, _In_z_ c
         default:
             UNREACHABLE();
     }
-
-    // Host Triggers check
-    switch (testmask & HOST_Mask)
-    {
-        case HOST_Calls:
-            {
-                if (!m_pClrDebugState->IsHostCaller())
-                {
-                    if (!( (HostViolation|BadDebugState) & m_pClrDebugState->ViolationMask()))
-                    {
-                        // Avoid infinite recursion by temporarily allowing HOST_CALLS
-                        // violations so that we don't get contract asserts in anything
-                        // called downstream of CONTRACT_ASSERT. If we unwind out of
-                        // here, our dtor will reset our state to what it was on entry.
-                        CONTRACT_VIOLATION(HostViolation);
-                        CONTRACT_ASSERT("HOST_CALLS  encountered in a HOST_NOCALLS scope",
-                                        Contract::HOST_NoCalls,
-                                        Contract::HOST_Mask,
-                                        m_contractStackRecord.m_szFunction,
-                                        m_contractStackRecord.m_szFile,
-                                        m_contractStackRecord.m_lineNum
-                                        );
-                    }
-                }
-            }
-            break;
-
-        case HOST_NoCalls:
-           //  m_pClrDebugState->ViolationMaskReset( HostViolation );
-            m_pClrDebugState->ResetHostCaller();
-            break;
-
-        case HOST_Disabled:
-            // Nothing
-            break;
-
-        default:
-            UNREACHABLE();
-    }
 }
 #endif // ENABLE_CONTRACTS
diff --git a/src/coreclr/vm/eecontract.h b/src/coreclr/vm/eecontract.h
index 4df0c6f8fcaf..290100775f7a 100644
--- a/src/coreclr/vm/eecontract.h
+++ b/src/coreclr/vm/eecontract.h
@@ -52,9 +52,6 @@ class EEContract : public BaseContract
 #define GC_TRIGGERS          do { STATIC_CONTRACT_GC_TRIGGERS; REQUEST_TEST(Contract::GC_Triggers,   Contract::GC_Disabled); } while(0)
 #define GC_NOTRIGGER         do { STATIC_CONTRACT_GC_NOTRIGGER; REQUEST_TEST(Contract::GC_NoTrigger,  Contract::GC_Disabled); } while(0)
 
-#define HOST_NOCALLS         do { STATIC_CONTRACT_HOST_NOCALLS; REQUEST_TEST(Contract::HOST_NoCalls, Contract::HOST_Disabled); } while(0)
-#define HOST_CALLS           do {  STATIC_CONTRACT_HOST_CALLS; REQUEST_TEST(Contract::HOST_Calls, Contract::HOST_Disabled); } while(0)
-
 #else   // ENABLE_CONTRACTS_IMPL
 
 #define MODE_COOPERATIVE
@@ -62,8 +59,6 @@ class EEContract : public BaseContract
 #define MODE_ANY
 #define GC_TRIGGERS
 #define GC_NOTRIGGER
-#define HOST_NOCALLS
-#define HOST_CALLS
 
 #endif  // ENABLE_CONTRACTS_IMPL
 
diff --git a/src/coreclr/vm/eehash.inl b/src/coreclr/vm/eehash.inl
index 9fc5406e7eb8..f8e4ecb6064a 100644
--- a/src/coreclr/vm/eehash.inl
+++ b/src/coreclr/vm/eehash.inl
@@ -346,7 +346,7 @@ BOOL EEHashTableBase<KeyType, Helper, bDefaultCopyIsDeep>::DeleteValue(KeyType p
     _ASSERTE (OwnLock());
 
     Thread *pThread = GetThreadNULLOk();
-    GCX_MAYBE_COOP_NO_THREAD_BROKEN(pThread ? !(pThread->m_StateNC & Thread::TSNC_UnsafeSkipEnterCooperative) : FALSE);
+    GCX_MAYBE_COOP_NO_THREAD_BROKEN(pThread != NULL);
 
     _ASSERTE(m_pVolatileBucketTable->m_dwNumBuckets != 0);
 
@@ -850,7 +850,7 @@ BOOL EEHashTableBase<KeyType, Helper, bDefaultCopyIsDeep>::
     _ASSERTE_IMPL(OwnLock());
 
     Thread *pThread = GetThreadNULLOk();
-    GCX_MAYBE_COOP_NO_THREAD_BROKEN(pThread ? !(pThread->m_StateNC & Thread::TSNC_UnsafeSkipEnterCooperative) : FALSE);
+    GCX_MAYBE_COOP_NO_THREAD_BROKEN(pThread != NULL);
 
     _ASSERTE(pIter->m_pTable == (void *) this);
 
diff --git a/src/coreclr/vm/eepolicy.cpp b/src/coreclr/vm/eepolicy.cpp
index c88d6206a06a..657fdd075016 100644
--- a/src/coreclr/vm/eepolicy.cpp
+++ b/src/coreclr/vm/eepolicy.cpp
@@ -41,8 +41,6 @@ void SafeExitProcess(UINT exitCode, ShutdownCompleteAction sca = SCA_ExitProcess
     // other DLLs call Release() on us in their detach [dangerous!], etc.
     GCX_PREEMP_NO_DTOR();
 
-    InterlockedExchange((LONG*)&g_fForbidEnterEE, TRUE);
-
     // Note that for free and retail builds StressLog must also be enabled
     if (g_pConfig && g_pConfig->StressLog())
     {
@@ -59,13 +57,6 @@ void SafeExitProcess(UINT exitCode, ShutdownCompleteAction sca = SCA_ExitProcess
         }
     }
 
-    // Turn off exception processing, because if some other random DLL has a
-    //  fault in DLL_PROCESS_DETACH, we could get called for exception handling.
-    //  Since we've turned off part of the runtime, we can't, for instance,
-    //  properly execute the GC that handling an exception might trigger.
-    g_fNoExceptions = true;
-    LOG((LF_EH, LL_INFO10, "SafeExitProcess: turning off exceptions\n"));
-
     if (sca == SCA_TerminateProcessWhenShutdownComplete)
     {
         // disabled because if we fault in this code path we will trigger our Watson code
diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp
index b0886fcadebe..4cdfc0852a6a 100644
--- a/src/coreclr/vm/eetwain.cpp
+++ b/src/coreclr/vm/eetwain.cpp
@@ -9,8 +9,6 @@
 #include "dbginterface.h"
 #include "gcenv.h"
 
-#define RETURN_ADDR_OFFS        1       // in DWORDS
-
 #ifdef USE_GC_INFO_DECODER
 #include "gcinfodecoder.h"
 #endif
@@ -19,50 +17,7 @@
 #include "gccover.h"
 #endif // HAVE_GCCOVER
 
-#include "argdestination.h"
-
-#define X86_INSTR_TEST_ESP_SIB          0x24
-#define X86_INSTR_PUSH_0                0x6A    // push 00, entire instruction is 0x6A00
-#define X86_INSTR_PUSH_IMM              0x68    // push NNNN,
-#define X86_INSTR_W_PUSH_IND_IMM        0x35FF  // push [NNNN]
-#define X86_INSTR_CALL_REL32            0xE8    // call rel32
-#define X86_INSTR_W_CALL_IND_IMM        0x15FF  // call [addr32]
-#define X86_INSTR_NOP                   0x90    // nop
-#define X86_INSTR_NOP2                  0x9090  // 2-byte nop
-#define X86_INSTR_NOP3_1                0x9090  // 1st word of 3-byte nop
-#define X86_INSTR_NOP3_3                0x90    // 3rd byte of 3-byte nop
-#define X86_INSTR_NOP4                  0x90909090 // 4-byte nop
-#define X86_INSTR_NOP5_1                0x90909090 // 1st dword of 5-byte nop
-#define X86_INSTR_NOP5_5                0x90    // 5th byte of 5-byte nop
-#define X86_INSTR_INT3                  0xCC    // int3
-#define X86_INSTR_HLT                   0xF4    // hlt
-#define X86_INSTR_PUSH_EAX              0x50    // push eax
-#define X86_INSTR_PUSH_EBP              0x55    // push ebp
-#define X86_INSTR_W_MOV_EBP_ESP         0xEC8B  // mov ebp, esp
-#define X86_INSTR_POP_ECX               0x59    // pop ecx
-#define X86_INSTR_RET                   0xC2    // ret imm16
-#define X86_INSTR_RETN                  0xC3    // ret
-#define X86_INSTR_XOR                   0x33    // xor
-#define X86_INSTR_w_TEST_ESP_EAX        0x0485  // test [esp], eax
-#define X86_INSTR_w_TEST_ESP_DWORD_OFFSET_EAX   0x8485      // test [esp-dwOffset], eax
-#define X86_INSTR_w_LEA_ESP_EBP_BYTE_OFFSET     0x658d      // lea esp, [ebp-bOffset]
-#define X86_INSTR_w_LEA_ESP_EBP_DWORD_OFFSET    0xa58d      // lea esp, [ebp-dwOffset]
-#define X86_INSTR_w_LEA_EAX_ESP_BYTE_OFFSET     0x448d      // lea eax, [esp-bOffset]
-#define X86_INSTR_w_LEA_EAX_ESP_DWORD_OFFSET    0x848d      // lea eax, [esp-dwOffset]
-#define X86_INSTR_JMP_NEAR_REL32        0xE9    // near jmp rel32
-#define X86_INSTR_w_JMP_FAR_IND_IMM     0x25FF  // far jmp [addr32]
-
-#ifndef USE_GC_INFO_DECODER
-
-
-#ifdef  _DEBUG
-// For dumping of verbose info.
-#ifndef DACCESS_COMPILE
-static  bool  trFixContext          = false;
-#endif
-static  bool  trEnumGCRefs          = false;
-static  bool  dspPtr                = false; // prints the live ptrs as reported
-#endif
+#ifdef TARGET_X86
 
 // NOTE: enabling compiler optimizations, even for debug builds.
 // Comment this out in order to be able to fully debug methods here.
@@ -70,5104 +25,1348 @@ static  bool  dspPtr                = false; // prints the live ptrs as reported
 #pragma optimize("tg", on)
 #endif
 
-__forceinline unsigned decodeUnsigned(PTR_CBYTE& src)
-{
-    LIMITED_METHOD_CONTRACT;
-    SUPPORTS_DAC;
-
-#ifdef DACCESS_COMPILE
-    PTR_CBYTE begin = src;
-#endif
-
-    BYTE     byte  = *src++;
-    unsigned value = byte & 0x7f;
-    while (byte & 0x80)
-    {
-#ifdef DACCESS_COMPILE
-        // In DAC builds, the target data may be corrupt.  Rather than return incorrect data
-        // and risk wasting time in a potentially long loop, we want to fail early and gracefully.
-        // The data is encoded with 7 value-bits per byte, and so we may need to read a maximum
-        // of 5 bytes (7*5=35) to read a full 32-bit integer.
-        if ((src - begin) > 5)
-        {
-            DacError(CORDBG_E_TARGET_INCONSISTENT);
-        }
-#endif
-
-        byte    = *src++;
-        value <<= 7;
-        value  += byte & 0x7f;
-    }
-    return value;
-}
-
-__forceinline int decodeSigned(PTR_CBYTE& src)
-{
-    LIMITED_METHOD_CONTRACT;
-    SUPPORTS_DAC;
-
-#ifdef DACCESS_COMPILE
-    PTR_CBYTE begin = src;
-#endif
+void promoteVarArgs(PTR_BYTE argsStart, PTR_VASigCookie varArgSig, GCCONTEXT* ctx);
 
-    BYTE     byte  = *src++;
-    BYTE     first = byte;
-    int      value = byte & 0x3f;
-    while (byte & 0x80)
-    {
-#ifdef DACCESS_COMPILE
-        // In DAC builds, the target data may be corrupt.  Rather than return incorrect data
-        // and risk wasting time in a potentially long loop, we want to fail early and gracefully.
-        // The data is encoded with 7 value-bits per byte, and so we may need to read a maximum
-        // of 5 bytes (7*5=35) to read a full 32-bit integer.
-        if ((src - begin) > 5)
-        {
-            DacError(CORDBG_E_TARGET_INCONSISTENT);
-        }
-#endif
+#include "gc_unwind_x86.inl"
 
-        byte = *src++;
-        value <<= 7;
-        value += byte & 0x7f;
-    }
-    if (first & 0x40)
-        value = -value;
-    return value;
-}
+#endif // TARGET_X86
 
-// Fast versions of the above, with one iteration of the loop unrolled
-#define fastDecodeUnsigned(src) (((*(src) & 0x80) == 0) ? (unsigned) (*(src)++) : decodeUnsigned((src)))
-#define fastDecodeSigned(src)   (((*(src) & 0xC0) == 0) ? (unsigned) (*(src)++) : decodeSigned((src)))
+#include "argdestination.h"
 
-// Fast skipping past encoded integers
 #ifndef DACCESS_COMPILE
-#define fastSkipUnsigned(src) { while ((*(src)++) & 0x80) { } }
-#define fastSkipSigned(src)   { while ((*(src)++) & 0x80) { } }
-#else
-// In DAC builds we want to trade-off a little perf in the common case for reliaiblity against corrupt data.
-#define fastSkipUnsigned(src) (decodeUnsigned(src))
-#define fastSkipSigned(src) (decodeSigned(src))
-#endif
-
+#ifndef FEATURE_EH_FUNCLETS
 
 /*****************************************************************************
  *
- *  Decodes the X86 GcInfo header and returns the decoded information
- *  in the hdrInfo struct.
- *  curOffset is the code offset within the active method used in the
- *  computation of PrologOffs/EpilogOffs.
- *  Returns the size of the header (number of bytes decoded).
+ *  Setup context to enter an exception handler (a 'catch' block).
+ *  This is the last chance for the runtime support to do fixups in
+ *  the context before execution continues inside a filter, catch handler,
+ *  or finally.
  */
-size_t DecodeGCHdrInfo(GCInfoToken gcInfoToken,
-                       unsigned    curOffset,
-                       hdrInfo   * infoPtr)
+void EECodeManager::FixContext( ContextType     ctxType,
+                                EHContext      *ctx,
+                                EECodeInfo     *pCodeInfo,
+                                DWORD           dwRelOffset,
+                                DWORD           nestingLevel,
+                                OBJECTREF       thrownObject,
+                                CodeManState   *pState,
+                                size_t       ** ppShadowSP,
+                                size_t       ** ppEndRegion)
 {
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
-        SUPPORTS_DAC;
     } CONTRACTL_END;
 
-    PTR_CBYTE table = (PTR_CBYTE) gcInfoToken.Info;
-#if VERIFY_GC_TABLES
-    _ASSERTE(*castto(table, unsigned short *)++ == 0xFEEF);
-#endif
-
-    infoPtr->methodSize = fastDecodeUnsigned(table);
-
-    _ASSERTE(curOffset >= 0);
-    _ASSERTE(curOffset <= infoPtr->methodSize);
-
-    /* Decode the InfoHdr */
+    _ASSERTE((ctxType == FINALLY_CONTEXT) == (thrownObject == NULL));
 
-    InfoHdr header;
-    table = decodeHeader(table, gcInfoToken.Version, &header);
+    _ASSERTE(sizeof(CodeManStateBuf) <= sizeof(pState->stateBuf));
+    CodeManStateBuf * stateBuf = (CodeManStateBuf*)pState->stateBuf;
 
-    BOOL hasArgTabOffset = FALSE;
-    if (header.untrackedCnt == HAS_UNTRACKED)
-    {
-        hasArgTabOffset = TRUE;
-        header.untrackedCnt = fastDecodeUnsigned(table);
-    }
+    /* Extract the necessary information from the info block header */
 
-    if (header.varPtrTableSize == HAS_VARPTR)
-    {
-        hasArgTabOffset = TRUE;
-        header.varPtrTableSize = fastDecodeUnsigned(table);
-    }
+    stateBuf->hdrInfoSize = (DWORD)DecodeGCHdrInfo(pCodeInfo->GetGCInfoToken(),
+                                       dwRelOffset,
+                                       &stateBuf->hdrInfoBody);
+    pState->dwIsSet = 1;
 
-    if (header.gsCookieOffset == HAS_GS_COOKIE_OFFSET)
-    {
-        header.gsCookieOffset = fastDecodeUnsigned(table);
+#ifdef  _DEBUG
+    if (trFixContext) {
+        printf("FixContext [%s][%s] for %s.%s: ",
+               stateBuf->hdrInfoBody.ebpFrame?"ebp":"   ",
+               stateBuf->hdrInfoBody.interruptible?"int":"   ",
+               "UnknownClass","UnknownMethod");
+        fflush(stdout);
     }
+#endif
 
-    if (header.syncStartOffset == HAS_SYNC_OFFSET)
-    {
-        header.syncStartOffset = decodeUnsigned(table);
-        header.syncEndOffset = decodeUnsigned(table);
-
-        _ASSERTE(header.syncStartOffset != INVALID_SYNC_OFFSET && header.syncEndOffset != INVALID_SYNC_OFFSET);
-        _ASSERTE(header.syncStartOffset < header.syncEndOffset);
-    }
+    /* make sure that we have an ebp stack frame */
 
-    if (header.revPInvokeOffset == HAS_REV_PINVOKE_FRAME_OFFSET)
-    {
-        header.revPInvokeOffset = fastDecodeUnsigned(table);
-    }
+    _ASSERTE(stateBuf->hdrInfoBody.ebpFrame);
+    _ASSERTE(stateBuf->hdrInfoBody.handlers); // <TODO>@TODO : This will always be set. Remove it</TODO>
 
-    /* Some sanity checks on header */
+    TADDR      baseSP;
+    GetHandlerFrameInfo(&stateBuf->hdrInfoBody, ctx->Ebp,
+                                ctxType == FILTER_CONTEXT ? ctx->Esp : IGNORE_VAL,
+                                ctxType == FILTER_CONTEXT ? (DWORD) IGNORE_VAL : nestingLevel,
+                                &baseSP,
+                                &nestingLevel);
 
-    _ASSERTE( header.prologSize +
-           (size_t)(header.epilogCount*header.epilogSize) <= infoPtr->methodSize);
-    _ASSERTE( header.epilogCount == 1 || !header.epilogAtEnd);
+    _ASSERTE((size_t)ctx->Ebp >= baseSP);
+    _ASSERTE(baseSP >= (size_t)ctx->Esp);
 
-    _ASSERTE( header.untrackedCnt <= header.argCount+header.frameSize);
+    ctx->Esp = (DWORD)baseSP;
 
-    _ASSERTE( header.ebpSaved || !(header.ebpFrame || header.doubleAlign));
-    _ASSERTE(!header.ebpFrame || !header.doubleAlign  );
-    _ASSERTE( header.ebpFrame || !header.security     );
-    _ASSERTE( header.ebpFrame || !header.handlers     );
-    _ASSERTE( header.ebpFrame || !header.localloc     );
-    _ASSERTE( header.ebpFrame || !header.editNcontinue);  // <TODO> : Esp frames NYI for EnC</TODO>
+    // EE will write Esp to **pShadowSP before jumping to handler
 
-    /* Initialize the infoPtr struct */
+    PTR_TADDR pBaseSPslots =
+        GetFirstBaseSPslotPtr(ctx->Ebp, &stateBuf->hdrInfoBody);
+    *ppShadowSP = (size_t *)&pBaseSPslots[-(int) nestingLevel   ];
+                   pBaseSPslots[-(int)(nestingLevel+1)] = 0; // Zero out the next slot
 
-    infoPtr->argSize         = header.argCount * 4;
-    infoPtr->ebpFrame        = header.ebpFrame;
-    infoPtr->interruptible   = header.interruptible;
-    infoPtr->returnKind      = (ReturnKind) header.returnKind;
+    // EE will write the end offset of the filter
+    if (ctxType == FILTER_CONTEXT)
+        *ppEndRegion = (size_t *)pBaseSPslots + 1;
 
-    infoPtr->prologSize      = header.prologSize;
-    infoPtr->epilogSize      = header.epilogSize;
-    infoPtr->epilogCnt       = header.epilogCount;
-    infoPtr->epilogEnd       = header.epilogAtEnd;
+    /*  This is just a simple assignment of throwObject to ctx->Eax,
+        just pretend the cast goo isn't there.
+     */
 
-    infoPtr->untrackedCnt    = header.untrackedCnt;
-    infoPtr->varPtrTableSize = header.varPtrTableSize;
-    infoPtr->gsCookieOffset  = header.gsCookieOffset;
+    *((OBJECTREF*)&(ctx->Eax)) = thrownObject;
+}
 
-    infoPtr->syncStartOffset = header.syncStartOffset;
-    infoPtr->syncEndOffset   = header.syncEndOffset;
-    infoPtr->revPInvokeOffset = header.revPInvokeOffset;
+#endif // !FEATURE_EH_FUNCLETS
 
-    infoPtr->doubleAlign     = header.doubleAlign;
-    infoPtr->handlers        = header.handlers;
-    infoPtr->localloc        = header.localloc;
-    infoPtr->editNcontinue   = header.editNcontinue;
-    infoPtr->varargs         = header.varargs;
-    infoPtr->profCallbacks   = header.profCallbacks;
-    infoPtr->genericsContext = header.genericsContext;
-    infoPtr->genericsContextIsMethodDesc = header.genericsContextIsMethodDesc;
-    infoPtr->isSpeculativeStackWalk = false;
 
-    /* Are we within the prolog of the method? */
 
-    if  (curOffset < infoPtr->prologSize)
-    {
-        infoPtr->prologOffs = curOffset;
-    }
-    else
-    {
-        infoPtr->prologOffs = hdrInfo::NOT_IN_PROLOG;
-    }
 
-    /* Assume we're not in the epilog of the method */
 
-    infoPtr->epilogOffs = hdrInfo::NOT_IN_EPILOG;
+/*****************************************************************************/
 
-    /* Are we within an epilog of the method? */
+bool        VarIsInReg(ICorDebugInfo::VarLoc varLoc)
+{
+    LIMITED_METHOD_CONTRACT;
 
-    if  (infoPtr->epilogCnt)
+    switch(varLoc.vlType)
     {
-        unsigned epilogStart;
-
-        if  (infoPtr->epilogCnt > 1 || !infoPtr->epilogEnd)
-        {
-#if VERIFY_GC_TABLES
-            _ASSERTE(*castto(table, unsigned short *)++ == 0xFACE);
-#endif
-            epilogStart = 0;
-            for (unsigned i = 0; i < infoPtr->epilogCnt; i++)
-            {
-                epilogStart += fastDecodeUnsigned(table);
-                if  (curOffset > epilogStart &&
-                     curOffset < epilogStart + infoPtr->epilogSize)
-                {
-                    infoPtr->epilogOffs = curOffset - epilogStart;
-                }
-            }
-        }
-        else
-        {
-            epilogStart = infoPtr->methodSize - infoPtr->epilogSize;
-
-            if  (curOffset > epilogStart &&
-                 curOffset < epilogStart + infoPtr->epilogSize)
-            {
-                infoPtr->epilogOffs = curOffset - epilogStart;
-            }
-        }
+    case ICorDebugInfo::VLT_REG:
+    case ICorDebugInfo::VLT_REG_REG:
+    case ICorDebugInfo::VLT_REG_STK:
+        return true;
 
-        infoPtr->syncEpilogStart = epilogStart;
+    default:
+        return false;
     }
+}
 
-    unsigned argTabOffset = INVALID_ARGTAB_OFFSET;
-    if (hasArgTabOffset)
-    {
-        argTabOffset = fastDecodeUnsigned(table);
-    }
-    infoPtr->argTabOffset    = argTabOffset;
+#ifdef FEATURE_REMAP_FUNCTION
+/*****************************************************************************
+ *  Last chance for the runtime support to do fixups in the context
+ *  before execution continues inside an EnC updated function.
+ *  It also adjusts ESP and munges on the stack. So the caller has to make
+ *  sure that this stack region is not needed (by doing a localloc).
+ *  Also, if this returns EnC_FAIL, we should not have munged the
+ *  context ie. transcated commit
+ *  The plan of attack is:
+ *  1) Error checking up front.  If we get through here, everything
+ *      else should work
+ *  2) Get all the info about current variables, registers, etc
+ *  3) zero out the stack frame - this'll initialize _all_ variables
+ *  4) Put the variables from step 3 into their new locations.
+ *
+ *  Note that while we use the ShuffleVariablesGet/Set methods, they don't
+ *  have any info/logic that's internal to the runtime: another codemanger
+ *  could easily duplicate what they do, which is why we're calling into them.
+ */
 
-    size_t frameDwordCount = header.frameSize;
+HRESULT EECodeManager::FixContextForEnC(PCONTEXT         pCtx,
+                                        EECodeInfo *     pOldCodeInfo,
+                   const ICorDebugInfo::NativeVarInfo *  oldMethodVars,
+                                        SIZE_T           oldMethodVarsCount,
+                                        EECodeInfo *     pNewCodeInfo,
+                   const ICorDebugInfo::NativeVarInfo *  newMethodVars,
+                                        SIZE_T           newMethodVarsCount)
+{
+    CONTRACTL {
+        DISABLED(NOTHROW);
+        DISABLED(GC_NOTRIGGER);
+    } CONTRACTL_END;
 
-    /* Set the rawStackSize to the number of bytes that it bumps ESP */
+    HRESULT hr = S_OK;
 
-    infoPtr->rawStkSize = (UINT)(frameDwordCount * sizeof(size_t));
+     // Grab a copy of the context before the EnC update.
+    T_CONTEXT oldCtx = *pCtx;
 
-    /* Calculate the callee saves regMask and adjust stackSize to */
-    /* include the callee saves register spills                   */
+#if defined(TARGET_X86)
 
-    unsigned savedRegs = RM_NONE;
-    unsigned savedRegsCount = 0;
+    /* Extract the necessary information from the info block header */
 
-    if  (header.ediSaved)
-    {
-        savedRegsCount++;
-        savedRegs |= RM_EDI;
-    }
-    if  (header.esiSaved)
-    {
-        savedRegsCount++;
-        savedRegs |= RM_ESI;
-    }
-    if  (header.ebxSaved)
-    {
-        savedRegsCount++;
-        savedRegs |= RM_EBX;
-    }
-    if  (header.ebpSaved)
-    {
-        savedRegsCount++;
-        savedRegs |= RM_EBP;
-    }
+    hdrInfo  oldInfo, newInfo;
 
-    infoPtr->savedRegMask = (RegMask)savedRegs;
+    DecodeGCHdrInfo(pOldCodeInfo->GetGCInfoToken(),
+                       pOldCodeInfo->GetRelOffset(),
+                       &oldInfo);
 
-    infoPtr->savedRegsCountExclFP = savedRegsCount;
-    if (header.ebpFrame || header.doubleAlign)
-    {
-        _ASSERTE(header.ebpSaved);
-        infoPtr->savedRegsCountExclFP = savedRegsCount - 1;
-    }
+    DecodeGCHdrInfo(pNewCodeInfo->GetGCInfoToken(),
+                       pNewCodeInfo->GetRelOffset(),
+                       &newInfo);
 
-    frameDwordCount += savedRegsCount;
+    //1) Error checking up front.  If we get through here, everything
+    //     else should work
 
-    infoPtr->stackSize  =  (UINT)(frameDwordCount * sizeof(size_t));
+    if (!oldInfo.editNcontinue || !newInfo.editNcontinue) {
+        LOG((LF_ENC, LL_INFO100, "**Error** EECM::FixContextForEnC EnC_INFOLESS_METHOD\n"));
+        return CORDBG_E_ENC_INFOLESS_METHOD;
+    }
 
-    _ASSERTE(infoPtr->gsCookieOffset == INVALID_GS_COOKIE_OFFSET ||
-             (infoPtr->gsCookieOffset < infoPtr->stackSize) &&
-             ((header.gsCookieOffset % sizeof(void*)) == 0));
+    if (!oldInfo.ebpFrame || !newInfo.ebpFrame) {
+        LOG((LF_ENC, LL_INFO100, "**Error** EECM::FixContextForEnC Esp frames NYI\n"));
+        return E_FAIL; // Esp frames NYI
+    }
 
-    return  table - PTR_CBYTE(gcInfoToken.Info);
-}
+    if (pCtx->Esp != pCtx->Ebp - oldInfo.stackSize + sizeof(DWORD)) {
+        LOG((LF_ENC, LL_INFO100, "**Error** EECM::FixContextForEnC stack should be empty\n"));
+        return E_FAIL; // stack should be empty - <TODO> @TODO : Barring localloc</TODO>
+    }
 
-/*****************************************************************************/
+    if (oldInfo.handlers)
+    {
+        bool      hasInnerFilter;
+        TADDR     baseSP;
+        FrameType frameType = GetHandlerFrameInfo(&oldInfo, pCtx->Ebp,
+                                                  pCtx->Esp, IGNORE_VAL,
+                                                  &baseSP, NULL, &hasInnerFilter);
+        _ASSERTE(frameType != FR_INVALID);
+        _ASSERTE(!hasInnerFilter); // FixContextForEnC() is called for bottommost funclet
 
-// We do a "pop eax; jmp eax" to return from a fault or finally handler
-const size_t END_FIN_POP_STACK = sizeof(TADDR);
+        // If the method is in a fuclet, and if the framesize grows, we are in trouble.
 
-inline
-size_t GetLocallocSPOffset(hdrInfo * info)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
+        if (frameType != FR_NORMAL)
+        {
+           /* <TODO> @TODO : What if the new method offset is in a fuclet,
+              and the old is not, or the nesting level changed, etc </TODO> */
 
-    _ASSERTE(info->localloc && info->ebpFrame);
+            if (oldInfo.stackSize != newInfo.stackSize) {
+                LOG((LF_ENC, LL_INFO100, "**Error** EECM::FixContextForEnC stack size mismatch\n"));
+                return CORDBG_E_ENC_IN_FUNCLET;
+            }
+        }
+    }
 
-    unsigned position = info->savedRegsCountExclFP +
-                        1;
-    return position * sizeof(TADDR);
-}
+    /* @TODO: Check if we have grown out of space for locals, in the face of localloc */
+    _ASSERTE(!oldInfo.localloc && !newInfo.localloc);
 
-inline
-size_t GetParamTypeArgOffset(hdrInfo * info)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
+    // @TODO: If nesting level grows above the MAX_EnC_HANDLER_NESTING_LEVEL,
+    // we should return EnC_NESTED_HANLDERS
+    _ASSERTE(oldInfo.handlers && newInfo.handlers);
 
-    _ASSERTE((info->genericsContext || info->handlers) && info->ebpFrame);
+    LOG((LF_ENC, LL_INFO100, "EECM::FixContextForEnC: Checks out\n"));
 
-    unsigned position = info->savedRegsCountExclFP +
-                        info->localloc +
-                        1;  // For CORINFO_GENERICS_CTXT_FROM_PARAMTYPEARG
-    return position * sizeof(TADDR);
-}
+#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
 
-inline size_t GetStartShadowSPSlotsOffset(hdrInfo * info)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-    _ASSERTE(info->handlers && info->ebpFrame);
-
-    return GetParamTypeArgOffset(info) +
-           sizeof(TADDR); // Slot for end-of-last-executed-filter
-}
-
-/*****************************************************************************
- *  Returns the start of the hidden slots for the shadowSP for functions
- *  with exception handlers. There is one slot per nesting level starting
- *  near Ebp and is zero-terminated after the active slots.
- */
-
-inline
-PTR_TADDR GetFirstBaseSPslotPtr(TADDR ebp, hdrInfo * info)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-    _ASSERTE(info->handlers && info->ebpFrame);
-
-    size_t offsetFromEBP = GetStartShadowSPSlotsOffset(info)
-                        + sizeof(TADDR); // to get to the *start* of the next slot
-
-    return PTR_TADDR(ebp - offsetFromEBP);
-}
-
-inline size_t GetEndShadowSPSlotsOffset(hdrInfo * info, unsigned maxHandlerNestingLevel)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-    _ASSERTE(info->handlers && info->ebpFrame);
-
-    unsigned numberOfShadowSPSlots = maxHandlerNestingLevel +
-                                     1 + // For zero-termination
-                                     1; // For a filter (which can be active at the same time as a catch/finally handler
-
-    return GetStartShadowSPSlotsOffset(info) +
-           (numberOfShadowSPSlots * sizeof(TADDR));
-}
-
-/*****************************************************************************
- *    returns the base frame pointer corresponding to the target nesting level.
- */
-
-inline
-TADDR GetOutermostBaseFP(TADDR ebp, hdrInfo * info)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-    // we are not taking into account double alignment.  We are
-    // safe because the jit currently bails on double alignment if there
-    // are handles or localalloc
-    _ASSERTE(!info->doubleAlign);
-    if (info->localloc)
-    {
-        // If the function uses localloc we will fetch the ESP from the localloc
-        // slot.
-        PTR_TADDR pLocalloc = PTR_TADDR(ebp - GetLocallocSPOffset(info));
-
-        return (*pLocalloc);
-    }
-    else
-    {
-        // Default, go back all the method's local stack size
-        return ebp - info->stackSize + sizeof(int);
-    }
-}
-
-/*****************************************************************************
- *
- *  For functions with handlers, checks if it is currently in a handler.
- *  Either of unwindESP or unwindLevel will specify the target nesting level.
- *  If unwindLevel is specified, info about the funclet at that nesting level
- *    will be returned. (Use if you are interested in a specific nesting level.)
- *  If unwindESP is specified, info for nesting level invoked before the stack
- *   reached unwindESP will be returned. (Use if you have a specific ESP value
- *   during stack walking.)
- *
- *  *pBaseSP is set to the base SP (base of the stack on entry to
- *    the current funclet) corresponding to the target nesting level.
- *  *pNestLevel is set to the nesting level of the target nesting level (useful
- *    if unwindESP!=IGNORE_VAL
- *  *pHasInnerFilter will be set to true (only when unwindESP!=IGNORE_VAL) if a filter
- *    is currently active, but the target nesting level is an outer nesting level.
- *  *pHadInnerFilter - was the last use of the frame to execute a filter.
- *    This mainly affects GC lifetime reporting.
- */
-
-enum FrameType
-{
-    FR_NORMAL,              // Normal method frame - no exceptions currently active
-    FR_FILTER,              // Frame-let of a filter
-    FR_HANDLER,             // Frame-let of a callable catch/fault/finally
-
-    FR_INVALID,             // Invalid frame (for speculative stackwalks)
-};
-
-enum { IGNORE_VAL = -1 };
-
-FrameType   GetHandlerFrameInfo(hdrInfo   * info,
-                                TADDR       frameEBP,
-                                TADDR       unwindESP,
-                                DWORD       unwindLevel,
-                                TADDR     * pBaseSP = NULL,         /* OUT */
-                                DWORD     * pNestLevel = NULL,      /* OUT */
-                                bool      * pHasInnerFilter = NULL, /* OUT */
-                                bool      * pHadInnerFilter = NULL) /* OUT */
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-        HOST_NOCALLS;
-        SUPPORTS_DAC;
-    } CONTRACTL_END;
-
-    _ASSERTE(info->ebpFrame && info->handlers);
-    // One and only one of them should be IGNORE_VAL
-    _ASSERTE((unwindESP == (TADDR) IGNORE_VAL) !=
-           (unwindLevel == (DWORD) IGNORE_VAL));
-    _ASSERTE(pHasInnerFilter == NULL || unwindESP != (TADDR) IGNORE_VAL);
-
-    // Many of the conditions that we'd like to assert cannot be asserted in the case that we're
-    // in the middle of a stackwalk seeded by a profiler, since such seeds can't be trusted
-    // (profilers are external, untrusted sources).  So during profiler walks, we test the condition
-    // and throw an exception if it's not met.  Otherwise, we just assert the condition.
-    #define FAIL_IF_SPECULATIVE_WALK(condition)         \
-        if (info->isSpeculativeStackWalk)               \
-        {                                               \
-            if (!(condition))                           \
-            {                                           \
-                return FR_INVALID;                      \
-            }                                           \
-        }                                               \
-        else                                            \
-        {                                               \
-            _ASSERTE(condition);                        \
-        }
-
-    PTR_TADDR pFirstBaseSPslot = GetFirstBaseSPslotPtr(frameEBP, info);
-    TADDR  baseSP            = GetOutermostBaseFP(frameEBP, info);
-    bool    nonLocalHandlers = false; // Are the funclets invoked by EE (instead of managed code itself)
-    bool    hasInnerFilter   = false;
-    bool    hadInnerFilter   = false;
-
-    /* Get the last non-zero slot >= unwindESP, or lvl<unwindLevel.
-       Also do some sanity checks */
-
-    // The shadow slots contain the SP of the nested EH clauses currently active on the stack.
-    // The slots grow towards lower address on the stack and is terminted by a NULL entry.
-    // Since each subsequent slot contains the SP of a more nested EH clause, the contents of the slots are
-    // expected to be in decreasing order.
-    size_t lvl = 0;
-#ifndef FEATURE_EH_FUNCLETS
-    PTR_TADDR pSlot;
-    for(lvl = 0, pSlot = pFirstBaseSPslot;
-        *pSlot && lvl < unwindLevel;
-        pSlot--, lvl++)
-    {
-        // Filters cant have inner funclets
-        FAIL_IF_SPECULATIVE_WALK(!(baseSP & ICodeManager::SHADOW_SP_IN_FILTER));
-
-        TADDR curSlotVal = *pSlot;
-
-        // The shadowSPs have to be less unless the stack has been unwound.
-        FAIL_IF_SPECULATIVE_WALK(baseSP >  curSlotVal ||
-               (baseSP == curSlotVal && pSlot == pFirstBaseSPslot));
-
-        if (curSlotVal == LCL_FINALLY_MARK)
-        {
-            // Locally called finally
-            baseSP -= sizeof(TADDR);
-        }
-        else
-        {
-            // Is this a funclet we unwound before (can only happen with filters) ?
-            // If unwindESP is specified, normally we expect it to be the last entry in the shadow slot array.
-            // Or, if there is a filter, we expect unwindESP to be the second last entry.  However, this may
-            // not be the case in DAC builds.  For example, the user can use .cxr in an EH clause to set a
-            // CONTEXT captured in the try clause.  In this case, unwindESP will be the ESP of the parent
-            // function, but the shadow slot array will contain the SP of the EH clause, which is closer to
-            // the leaf than the parent method.
-
-            if (unwindESP != (TADDR) IGNORE_VAL &&
-                unwindESP > END_FIN_POP_STACK +
-                (curSlotVal & ~ICodeManager::SHADOW_SP_BITS))
-            {
-                // In non-DAC builds, the only time unwindESP is closer to the root than entries in the shadow
-                // slot array is when the last entry in the array is for a filter.  Also, filters can't have
-                // nested handlers.
-                if ((pSlot[0] & ICodeManager::SHADOW_SP_IN_FILTER) &&
-                    (pSlot[-1] == 0) &&
-                    !(baseSP & ICodeManager::SHADOW_SP_IN_FILTER))
-                {
-                    if (pSlot[0] & ICodeManager::SHADOW_SP_FILTER_DONE)
-                        hadInnerFilter = true;
-                    else
-                        hasInnerFilter = true;
-                    break;
-                }
-                else
-                {
-#if defined(DACCESS_COMPILE)
-                    // In DAC builds, this could happen.  We just need to bail out of this loop early.
-                    break;
-#else  // !DACCESS_COMPILE
-                    // In non-DAC builds, this is an error.
-                    FAIL_IF_SPECULATIVE_WALK(FALSE);
-#endif // DACCESS_COMPILE
-                }
-            }
-
-            nonLocalHandlers = true;
-            baseSP = curSlotVal;
-        }
-    }
-#endif // FEATURE_EH_FUNCLETS
-
-    if (unwindESP != (TADDR) IGNORE_VAL)
-    {
-        FAIL_IF_SPECULATIVE_WALK(baseSP >= unwindESP ||
-               baseSP == unwindESP - sizeof(TADDR));  // About to locally call a finally
-
-        if (baseSP < unwindESP)                       // About to locally call a finally
-            baseSP = unwindESP;
-    }
-    else
-    {
-        FAIL_IF_SPECULATIVE_WALK(lvl == unwindLevel); // unwindLevel must be currently active on stack
-    }
-
-    if (pBaseSP)
-        *pBaseSP = baseSP & ~ICodeManager::SHADOW_SP_BITS;
-
-    if (pNestLevel)
-    {
-        *pNestLevel = (DWORD)lvl;
-    }
-
-    if (pHasInnerFilter)
-        *pHasInnerFilter = hasInnerFilter;
-
-    if (pHadInnerFilter)
-        *pHadInnerFilter = hadInnerFilter;
-
-    if (baseSP & ICodeManager::SHADOW_SP_IN_FILTER)
-    {
-        FAIL_IF_SPECULATIVE_WALK(!hasInnerFilter); // nested filters not allowed
-        return FR_FILTER;
-    }
-    else if (nonLocalHandlers)
-    {
-        return FR_HANDLER;
-    }
-    else
-    {
-        return FR_NORMAL;
-    }
-
-    #undef FAIL_IF_SPECULATIVE_WALK
-}
-
-// Returns the number of bytes at the beginning of the stack frame that shouldn't be
-// modified by an EnC.  This is everything except the space for locals and temporaries.
-inline size_t GetSizeOfFrameHeaderForEnC(hdrInfo * info)
-{
-    WRAPPER_NO_CONTRACT;
-
-    // See comment above Compiler::lvaAssignFrameOffsets() in src\jit\il\lclVars.cpp
-    // for frame layout
-
-    // EnC supports increasing the maximum handler nesting level by always
-    // assuming that the max is MAX_EnC_HANDLER_NESTING_LEVEL. Methods with
-    // a higher max cannot be updated by EnC
-
-    // Take the offset (from EBP) of the last slot of the header, plus one for the EBP slot itself
-    // to get the total size of the header.
-    return sizeof(TADDR) +
-            GetEndShadowSPSlotsOffset(info, MAX_EnC_HANDLER_NESTING_LEVEL);
-}
-#endif // !USE_GC_INFO_DECODER
-
-#ifndef DACCESS_COMPILE
-#ifndef FEATURE_EH_FUNCLETS
-
-/*****************************************************************************
- *
- *  Setup context to enter an exception handler (a 'catch' block).
- *  This is the last chance for the runtime support to do fixups in
- *  the context before execution continues inside a filter, catch handler,
- *  or finally.
- */
-void EECodeManager::FixContext( ContextType     ctxType,
-                                EHContext      *ctx,
-                                EECodeInfo     *pCodeInfo,
-                                DWORD           dwRelOffset,
-                                DWORD           nestingLevel,
-                                OBJECTREF       thrownObject,
-                                CodeManState   *pState,
-                                size_t       ** ppShadowSP,
-                                size_t       ** ppEndRegion)
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-    } CONTRACTL_END;
-
-    _ASSERTE((ctxType == FINALLY_CONTEXT) == (thrownObject == NULL));
-
-    _ASSERTE(sizeof(CodeManStateBuf) <= sizeof(pState->stateBuf));
-    CodeManStateBuf * stateBuf = (CodeManStateBuf*)pState->stateBuf;
-
-    /* Extract the necessary information from the info block header */
-
-    stateBuf->hdrInfoSize = (DWORD)DecodeGCHdrInfo(pCodeInfo->GetGCInfoToken(),
-                                       dwRelOffset,
-                                       &stateBuf->hdrInfoBody);
-    pState->dwIsSet = 1;
-
-#ifdef  _DEBUG
-    if (trFixContext) {
-        printf("FixContext [%s][%s] for %s.%s: ",
-               stateBuf->hdrInfoBody.ebpFrame?"ebp":"   ",
-               stateBuf->hdrInfoBody.interruptible?"int":"   ",
-               "UnknownClass","UnknownMethod");
-        fflush(stdout);
-    }
-#endif
-
-    /* make sure that we have an ebp stack frame */
-
-    _ASSERTE(stateBuf->hdrInfoBody.ebpFrame);
-    _ASSERTE(stateBuf->hdrInfoBody.handlers); // <TODO>@TODO : This will always be set. Remove it</TODO>
-
-    TADDR      baseSP;
-    GetHandlerFrameInfo(&stateBuf->hdrInfoBody, ctx->Ebp,
-                                ctxType == FILTER_CONTEXT ? ctx->Esp : IGNORE_VAL,
-                                ctxType == FILTER_CONTEXT ? (DWORD) IGNORE_VAL : nestingLevel,
-                                &baseSP,
-                                &nestingLevel);
-
-    _ASSERTE((size_t)ctx->Ebp >= baseSP);
-    _ASSERTE(baseSP >= (size_t)ctx->Esp);
-
-    ctx->Esp = (DWORD)baseSP;
-
-    // EE will write Esp to **pShadowSP before jumping to handler
-
-    PTR_TADDR pBaseSPslots =
-        GetFirstBaseSPslotPtr(ctx->Ebp, &stateBuf->hdrInfoBody);
-    *ppShadowSP = (size_t *)&pBaseSPslots[-(int) nestingLevel   ];
-                   pBaseSPslots[-(int)(nestingLevel+1)] = 0; // Zero out the next slot
-
-    // EE will write the end offset of the filter
-    if (ctxType == FILTER_CONTEXT)
-        *ppEndRegion = (size_t *)pBaseSPslots + 1;
-
-    /*  This is just a simple assignment of throwObject to ctx->Eax,
-        just pretend the cast goo isn't there.
-     */
-
-    *((OBJECTREF*)&(ctx->Eax)) = thrownObject;
-}
-
-#endif // !FEATURE_EH_FUNCLETS
-
-
-
-
-
-/*****************************************************************************/
-
-bool        VarIsInReg(ICorDebugInfo::VarLoc varLoc)
-{
-    LIMITED_METHOD_CONTRACT;
-
-    switch(varLoc.vlType)
-    {
-    case ICorDebugInfo::VLT_REG:
-    case ICorDebugInfo::VLT_REG_REG:
-    case ICorDebugInfo::VLT_REG_STK:
-        return true;
-
-    default:
-        return false;
-    }
-}
-
-#ifdef FEATURE_REMAP_FUNCTION
-/*****************************************************************************
- *  Last chance for the runtime support to do fixups in the context
- *  before execution continues inside an EnC updated function.
- *  It also adjusts ESP and munges on the stack. So the caller has to make
- *  sure that this stack region is not needed (by doing a localloc).
- *  Also, if this returns EnC_FAIL, we should not have munged the
- *  context ie. transcated commit
- *  The plan of attack is:
- *  1) Error checking up front.  If we get through here, everything
- *      else should work
- *  2) Get all the info about current variables, registers, etc
- *  3) zero out the stack frame - this'll initialize _all_ variables
- *  4) Put the variables from step 3 into their new locations.
- *
- *  Note that while we use the ShuffleVariablesGet/Set methods, they don't
- *  have any info/logic that's internal to the runtime: another codemanger
- *  could easily duplicate what they do, which is why we're calling into them.
- */
-
-HRESULT EECodeManager::FixContextForEnC(PCONTEXT         pCtx,
-                                        EECodeInfo *     pOldCodeInfo,
-                   const ICorDebugInfo::NativeVarInfo *  oldMethodVars,
-                                        SIZE_T           oldMethodVarsCount,
-                                        EECodeInfo *     pNewCodeInfo,
-                   const ICorDebugInfo::NativeVarInfo *  newMethodVars,
-                                        SIZE_T           newMethodVarsCount)
-{
-    CONTRACTL {
-        DISABLED(NOTHROW);
-        DISABLED(GC_NOTRIGGER);
-    } CONTRACTL_END;
-
-    HRESULT hr = S_OK;
-
-     // Grab a copy of the context before the EnC update.
-    T_CONTEXT oldCtx = *pCtx;
-
-#if defined(TARGET_X86)
-
-    /* Extract the necessary information from the info block header */
-
-    hdrInfo  oldInfo, newInfo;
-
-    DecodeGCHdrInfo(pOldCodeInfo->GetGCInfoToken(),
-                       pOldCodeInfo->GetRelOffset(),
-                       &oldInfo);
-
-    DecodeGCHdrInfo(pNewCodeInfo->GetGCInfoToken(),
-                       pNewCodeInfo->GetRelOffset(),
-                       &newInfo);
-
-    //1) Error checking up front.  If we get through here, everything
-    //     else should work
-
-    if (!oldInfo.editNcontinue || !newInfo.editNcontinue) {
-        LOG((LF_ENC, LL_INFO100, "**Error** EECM::FixContextForEnC EnC_INFOLESS_METHOD\n"));
-        return CORDBG_E_ENC_INFOLESS_METHOD;
-    }
-
-    if (!oldInfo.ebpFrame || !newInfo.ebpFrame) {
-        LOG((LF_ENC, LL_INFO100, "**Error** EECM::FixContextForEnC Esp frames NYI\n"));
-        return E_FAIL; // Esp frames NYI
-    }
-
-    if (pCtx->Esp != pCtx->Ebp - oldInfo.stackSize + sizeof(DWORD)) {
-        LOG((LF_ENC, LL_INFO100, "**Error** EECM::FixContextForEnC stack should be empty\n"));
-        return E_FAIL; // stack should be empty - <TODO> @TODO : Barring localloc</TODO>
-    }
-
-    if (oldInfo.handlers)
-    {
-        bool      hasInnerFilter;
-        TADDR     baseSP;
-        FrameType frameType = GetHandlerFrameInfo(&oldInfo, pCtx->Ebp,
-                                                  pCtx->Esp, IGNORE_VAL,
-                                                  &baseSP, NULL, &hasInnerFilter);
-        _ASSERTE(frameType != FR_INVALID);
-        _ASSERTE(!hasInnerFilter); // FixContextForEnC() is called for bottommost funclet
-
-        // If the method is in a fuclet, and if the framesize grows, we are in trouble.
-
-        if (frameType != FR_NORMAL)
-        {
-           /* <TODO> @TODO : What if the new method offset is in a fuclet,
-              and the old is not, or the nesting level changed, etc </TODO> */
-
-            if (oldInfo.stackSize != newInfo.stackSize) {
-                LOG((LF_ENC, LL_INFO100, "**Error** EECM::FixContextForEnC stack size mismatch\n"));
-                return CORDBG_E_ENC_IN_FUNCLET;
-            }
-        }
-    }
-
-    /* @TODO: Check if we have grown out of space for locals, in the face of localloc */
-    _ASSERTE(!oldInfo.localloc && !newInfo.localloc);
-
-    // @TODO: If nesting level grows above the MAX_EnC_HANDLER_NESTING_LEVEL,
-    // we should return EnC_NESTED_HANLDERS
-    _ASSERTE(oldInfo.handlers && newInfo.handlers);
-
-    LOG((LF_ENC, LL_INFO100, "EECM::FixContextForEnC: Checks out\n"));
-
-#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
-
-    // Strategy for zeroing out the frame on x64:
-    //
-    // The stack frame looks like this (stack grows up)
-    //
-    // =======================================
-    //             <--- RSP == RBP (invariant: localalloc disallowed before remap)
-    // Arguments for next call (if there is one)
-    // PSPSym (optional)
-    // JIT temporaries (if any)
-    // Security object (if any)
-    // Local variables (if any)
-    // ---------------------------------------
-    // Frame header (stuff we must preserve, such as bool for synchronized
-    // methods, saved FP, saved callee-preserved registers, etc.)
-    // Return address (also included in frame header)
-    // ---------------------------------------
-    // Arguments for this frame (that's getting remapped).  Will naturally be preserved
-    // since fixed-frame size doesn't include this.
-    // =======================================
-    //
-    // Goal: Zero out everything AFTER (above) frame header.
-    //
-    // How do we find this stuff?
-    //
-    // EECodeInfo::GetFixedStackSize() gives us the full size from the top ("Arguments
-    // for next call") all the way down to and including Return Address.
-    //
-    // GetSizeOfEditAndContinuePreservedArea() gives us the size in bytes of the
-    // frame header at the bottom.
-    //
-    // So we start at RSP, and zero out:
-    //     GetFixedStackSize() - GetSizeOfEditAndContinuePreservedArea() bytes.
-    //
-    // We'll need to restore PSPSym; location gotten from GCInfo.
-    // We'll need to copy security object; location gotten from GCInfo.
-    //
-    // On ARM64 the JIT generates a slightly different frame and we do not have
-    // the invariant FP == SP, since the FP needs to point at the saved fp/lr
-    // pair for ETW stack walks. The frame there looks something like:
-    // =======================================
-    // Arguments for next call (if there is one)     <- SP
-    // JIT temporaries
-    // Locals
-    // PSPSym
-    // ---------------------------------------    ^ zeroed area
-    // MonitorAcquired (for synchronized methods)
-    // Saved FP                                      <- FP
-    // Saved LR
-    // ---------------------------------------    ^ preserved area
-    // Arguments
-    //
-    // The JIT reports the size of the "preserved" area, which includes
-    // MonitorAcquired when it is present. It could also include other local
-    // values that need to be preserved across EnC transitions, but no explicit
-    // treatment of these is necessary here beyond preserving the values in
-    // this region.
-
-    // GCInfo for old method
-    GcInfoDecoder oldGcDecoder(
-        pOldCodeInfo->GetGCInfoToken(),
-        GcInfoDecoderFlags(DECODE_SECURITY_OBJECT | DECODE_PSP_SYM | DECODE_EDIT_AND_CONTINUE),
-        0       // Instruction offset (not needed)
-        );
-
-    // GCInfo for new method
-    GcInfoDecoder newGcDecoder(
-        pNewCodeInfo->GetGCInfoToken(),
-        GcInfoDecoderFlags(DECODE_SECURITY_OBJECT | DECODE_PSP_SYM | DECODE_EDIT_AND_CONTINUE),
-        0       // Instruction offset (not needed)
-        );
-
-    UINT32 oldSizeOfPreservedArea = oldGcDecoder.GetSizeOfEditAndContinuePreservedArea();
-    UINT32 newSizeOfPreservedArea = newGcDecoder.GetSizeOfEditAndContinuePreservedArea();
-
-    LOG((LF_CORDB, LL_INFO100, "EECM::FixContextForEnC: Got old and new EnC preserved area sizes of %u and %u\n", oldSizeOfPreservedArea, newSizeOfPreservedArea));
-    // This ensures the JIT generated EnC compliant code.
-    if ((oldSizeOfPreservedArea == NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA) ||
-        (newSizeOfPreservedArea == NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA))
-    {
-        _ASSERTE(!"FixContextForEnC called on a non-EnC-compliant method frame");
-        return CORDBG_E_ENC_INFOLESS_METHOD;
-    }
-
-    TADDR oldStackBase = GetSP(&oldCtx);
-
-    LOG((LF_CORDB, LL_INFO100, "EECM::FixContextForEnC: Old SP=%p, FP=%p\n", (void*)oldStackBase, (void*)GetFP(&oldCtx)));
-
-#if defined(TARGET_AMD64)
-    // Note: we cannot assert anything about the relationship between oldFixedStackSize
-    // and newFixedStackSize.  It's possible the edited frame grows (new locals) or
-    // shrinks (less temporaries).
-    DWORD oldFixedStackSize = pOldCodeInfo->GetFixedStackSize();
-    DWORD newFixedStackSize = pNewCodeInfo->GetFixedStackSize();
-
-    // This verifies no localallocs were used in the old method.
-    // JIT is required to emit frame register for EnC-compliant code
-    _ASSERTE(pOldCodeInfo->HasFrameRegister());
-    _ASSERTE(pNewCodeInfo->HasFrameRegister());
-
-#elif defined(TARGET_ARM64)
-    DWORD oldFixedStackSize = oldGcDecoder.GetSizeOfEditAndContinueFixedStackFrame();
-    DWORD newFixedStackSize = newGcDecoder.GetSizeOfEditAndContinueFixedStackFrame();
-#else
-    PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform.");
-#endif
-
-    LOG((LF_CORDB, LL_INFO100, "EECM::FixContextForEnC: Old and new fixed stack sizes are %u and %u\n", oldFixedStackSize, newFixedStackSize));
-
-#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
-    // win-x64: SP == FP before localloc
-    if (oldStackBase != GetFP(&oldCtx))
-    {
-        return E_FAIL;
-    }
-#else
-    // All other 64-bit targets use frame chaining with the FP stored right below the
-    // return address (LR is always pushed on arm64). FP + 16 == SP + oldFixedStackSize
-    // gives the caller's SP before stack alloc.
-    if (GetFP(&oldCtx) + 16 != oldStackBase + oldFixedStackSize)
-    {
-        return E_FAIL;
-    }
-#endif
-
-    // EnC remap inside handlers is not supported
-    if (pOldCodeInfo->IsFunclet() || pNewCodeInfo->IsFunclet())
-        return CORDBG_E_ENC_IN_FUNCLET;
-
-    if (oldSizeOfPreservedArea != newSizeOfPreservedArea)
-    {
-        _ASSERTE(!"FixContextForEnC called with method whose frame header size changed from old to new version.");
-        return E_FAIL;
-    }
-
-    TADDR callerSP = oldStackBase + oldFixedStackSize;
-
-#ifdef _DEBUG
-    // If the old method has a PSPSym, then its value should == initial-SP (i.e.
-    // oldStackBase) for x64 and callerSP for arm64
-    INT32 nOldPspSymStackSlot = oldGcDecoder.GetPSPSymStackSlot();
-    if (nOldPspSymStackSlot != NO_PSP_SYM)
-    {
-#if defined(TARGET_AMD64)
-        TADDR oldPSP = *PTR_TADDR(oldStackBase + nOldPspSymStackSlot);
-        _ASSERTE(oldPSP == oldStackBase);
-#else
-        TADDR oldPSP = *PTR_TADDR(callerSP + nOldPspSymStackSlot);
-        _ASSERTE(oldPSP == callerSP);
-#endif
-    }
-#endif // _DEBUG
-
-#else
-    PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform.");
-#endif
-
-    // 2) Get all the info about current variables, registers, etc
-
-    const ICorDebugInfo::NativeVarInfo *  pOldVar;
-
-    // sorted by varNumber
-    ICorDebugInfo::NativeVarInfo * oldMethodVarsSorted = NULL;
-    ICorDebugInfo::NativeVarInfo * oldMethodVarsSortedBase = NULL;
-    ICorDebugInfo::NativeVarInfo *newMethodVarsSorted = NULL;
-    ICorDebugInfo::NativeVarInfo *newMethodVarsSortedBase = NULL;
-
-    SIZE_T *rgVal1 = NULL;
-    SIZE_T *rgVal2 = NULL;
-
-    {
-        SIZE_T local;
-
-        // We'll need to sort the old native var info by variable number, since the
-        // order of them isn't necc. the same.  We'll use the number as the key.
-        // We will assume we may have hidden arguments (which have negative values as the index)
-
-        unsigned oldNumVars = unsigned(-ICorDebugInfo::UNKNOWN_ILNUM);
-        for (pOldVar = oldMethodVars, local = 0;
-             local < oldMethodVarsCount;
-             local++, pOldVar++)
-        {
-            DWORD varNumber = pOldVar->varNumber;
-            if (signed(varNumber) >= 0)
-            {
-                // This is an explicit (not special) var, so add its varNumber + 1 to our
-                // max count ("+1" because varNumber is zero-based).
-                oldNumVars = max(oldNumVars, unsigned(-ICorDebugInfo::UNKNOWN_ILNUM) + varNumber + 1);
-            }
-        }
-
-        oldMethodVarsSortedBase = new (nothrow) ICorDebugInfo::NativeVarInfo[oldNumVars];
-        if (!oldMethodVarsSortedBase)
-        {
-            hr = E_FAIL;
-            goto ErrExit;
-        }
-        oldMethodVarsSorted = oldMethodVarsSortedBase + (-ICorDebugInfo::UNKNOWN_ILNUM);
-
-        memset((void *)oldMethodVarsSortedBase, 0, oldNumVars * sizeof(ICorDebugInfo::NativeVarInfo));
-
-        for (local = 0; local < oldNumVars;local++)
-             oldMethodVarsSortedBase[local].loc.vlType = ICorDebugInfo::VLT_INVALID;
-
-        BYTE **rgVCs = NULL;
-        DWORD oldMethodOffset = pOldCodeInfo->GetRelOffset();
-
-        for (pOldVar = oldMethodVars, local = 0;
-             local < oldMethodVarsCount;
-             local++, pOldVar++)
-        {
-            DWORD varNumber = pOldVar->varNumber;
-
-            _ASSERTE(varNumber + unsigned(-ICorDebugInfo::UNKNOWN_ILNUM) < oldNumVars);
-
-            // Only care about old local variables alive at oldMethodOffset
-            if (pOldVar->startOffset <= oldMethodOffset &&
-                pOldVar->endOffset   >  oldMethodOffset)
-            {
-                // Indexing should be performed with a signed value - could be negative.
-                oldMethodVarsSorted[(int32_t)varNumber] = *pOldVar;
-            }
-        }
-
-        // 3) Next sort the new var info by varNumber.  We want to do this here, since
-        // we're allocating memory (which may fail) - do this before going to step 2
-
-        // First, count the new vars the same way we did the old vars above.
-
-        const ICorDebugInfo::NativeVarInfo * pNewVar;
-
-        unsigned newNumVars = unsigned(-ICorDebugInfo::UNKNOWN_ILNUM);
-        for (pNewVar = newMethodVars, local = 0;
-             local < newMethodVarsCount;
-             local++, pNewVar++)
-        {
-            DWORD varNumber = pNewVar->varNumber;
-            if (signed(varNumber) >= 0)
-            {
-                // This is an explicit (not special) var, so add its varNumber + 1 to our
-                // max count ("+1" because varNumber is zero-based).
-                newNumVars = max(newNumVars, unsigned(-ICorDebugInfo::UNKNOWN_ILNUM) + varNumber + 1);
-            }
-        }
-
-        // sorted by varNumber
-        newMethodVarsSortedBase = new (nothrow) ICorDebugInfo::NativeVarInfo[newNumVars];
-        if (!newMethodVarsSortedBase)
-        {
-            hr = E_FAIL;
-            goto ErrExit;
-        }
-        newMethodVarsSorted = newMethodVarsSortedBase + (-ICorDebugInfo::UNKNOWN_ILNUM);
-
-        memset(newMethodVarsSortedBase, 0, newNumVars * sizeof(ICorDebugInfo::NativeVarInfo));
-        for (local = 0; local < newNumVars;local++)
-             newMethodVarsSortedBase[local].loc.vlType = ICorDebugInfo::VLT_INVALID;
-
-        DWORD newMethodOffset = pNewCodeInfo->GetRelOffset();
-
-        for (pNewVar = newMethodVars, local = 0;
-             local < newMethodVarsCount;
-             local++, pNewVar++)
-        {
-            DWORD varNumber = pNewVar->varNumber;
-
-            _ASSERTE(varNumber + unsigned(-ICorDebugInfo::UNKNOWN_ILNUM) < newNumVars);
-
-            // Only care about new local variables alive at newMethodOffset
-            if (pNewVar->startOffset <= newMethodOffset &&
-                pNewVar->endOffset   >  newMethodOffset)
-            {
-                // Indexing should be performed with a signed valued - could be negative.
-                newMethodVarsSorted[(int32_t)varNumber] = *pNewVar;
-            }
-        }
-
-        _ASSERTE(newNumVars >= oldNumVars ||
-                 !"Not allowed to reduce the number of locals between versions!");
-
-        LOG((LF_ENC, LL_INFO100, "EECM::FixContextForEnC: gathered info!\n"));
-
-        rgVal1 = new (nothrow) SIZE_T[newNumVars];
-        if (rgVal1 == NULL)
-        {
-            hr = E_FAIL;
-            goto ErrExit;
-        }
-
-        rgVal2 = new (nothrow) SIZE_T[newNumVars];
-        if (rgVal2 == NULL)
-        {
-            hr = E_FAIL;
-            goto ErrExit;
-        }
-
-        // 4) Next we'll zero them out, so any variables that aren't in scope
-        // in the old method, but are in scope in the new, will have the
-        // default, zero, value.
-
-        memset(rgVal1, 0, sizeof(SIZE_T) * newNumVars);
-        memset(rgVal2, 0, sizeof(SIZE_T) * newNumVars);
-
-        unsigned varsToGet = (oldNumVars > newNumVars)
-                ? newNumVars
-                : oldNumVars;
-
-         //  2) Get all the info about current variables, registers, etc.
-
-        hr = g_pDebugInterface->GetVariablesFromOffset(pOldCodeInfo->GetMethodDesc(),
-                                                       varsToGet,
-                                                       oldMethodVarsSortedBase,
-                                                       oldMethodOffset,
-                                                       &oldCtx,
-                                                       rgVal1,
-                                                       rgVal2,
-                                                       newNumVars,
-                                                       &rgVCs);
-        if (FAILED(hr))
-        {
-            goto ErrExit;
-        }
-
-
-        LOG((LF_ENC, LL_INFO100, "EECM::FixContextForEnC: got vars!\n"));
-
-        /*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*
-         *  IMPORTANT : Once we start munging on the context, we cannot return
-         *  EnC_FAIL, as this should be a transacted commit,
-         **=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
-
-#if defined(TARGET_X86)
-        // Zero out all  the registers as some may hold new variables.
-        pCtx->Eax = pCtx->Ecx = pCtx->Edx = pCtx->Ebx = pCtx->Esi = pCtx->Edi = 0;
-
-        // 3) zero out the stack frame - this'll initialize _all_ variables
-
-        /*-------------------------------------------------------------------------
-         * Adjust the stack height
-         */
-        pCtx->Esp -= (newInfo.stackSize - oldInfo.stackSize);
-
-        // Zero-init the local and tempory section of new stack frame being careful to avoid
-        // touching anything in the frame header.
-        // This is necessary to ensure that any JIT temporaries in the old version can't be mistaken
-        // for ObjRefs now.
-        size_t frameHeaderSize = GetSizeOfFrameHeaderForEnC( &newInfo );
-        _ASSERTE( frameHeaderSize <= oldInfo.stackSize );
-        _ASSERTE( GetSizeOfFrameHeaderForEnC( &oldInfo ) == frameHeaderSize );
-
-#elif defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
-
-        // Next few statements zero out all registers that may end up holding new variables.
-
-        // volatile int registers (JIT may use these to enregister variables)
-        pCtx->Rax = pCtx->Rcx = pCtx->Rdx = pCtx->R8 = pCtx->R9 = pCtx->R10 = pCtx->R11 = 0;
-
-        // volatile float registers
-        pCtx->Xmm1.High = pCtx->Xmm1.Low = 0;
-        pCtx->Xmm2.High = pCtx->Xmm2.Low = 0;
-        pCtx->Xmm3.High = pCtx->Xmm3.Low = 0;
-        pCtx->Xmm4.High = pCtx->Xmm4.Low = 0;
-        pCtx->Xmm5.High = pCtx->Xmm5.Low = 0;
-
-        // 3) zero out the stack frame - this'll initialize _all_ variables
-
-        /*-------------------------------------------------------------------------
-        * Adjust the stack height
-        */
-
-        TADDR newStackBase = callerSP - newFixedStackSize;
-
-        SetSP(pCtx, newStackBase);
-
-        // We want to zero-out everything pushed after the frame header. This way we'll zero
-        // out locals (both old & new) and temporaries. This is necessary to ensure that any
-        // JIT temporaries in the old version can't be mistaken for ObjRefs now. (I am told
-        // this last point is less of an issue on x64 as it is on x86, but zeroing out the
-        // temporaries is still the cleanest, most robust way to go.)
-        size_t frameHeaderSize = newSizeOfPreservedArea;
-        _ASSERTE(frameHeaderSize <= oldFixedStackSize);
-        _ASSERTE(frameHeaderSize <= newFixedStackSize);
-
-        // For EnC-compliant x64 code, FP == SP.  Since SP changed above, update FP now
-        pCtx->Rbp = newStackBase;
-
-#else
-#if defined(TARGET_ARM64)
-        // Zero out volatile part of stack frame
-        // x0-x17
-        memset(&pCtx->X[0], 0, sizeof(pCtx->X[0]) * 18);
-        // v0-v7
-        memset(&pCtx->V[0], 0, sizeof(pCtx->V[0]) * 8);
-        // v16-v31
-        memset(&pCtx->V[16], 0, sizeof(pCtx->V[0]) * 16);
-#elif defined(TARGET_AMD64)
-        // SysV ABI
-        pCtx->Rax = pCtx->Rdi = pCtx->Rsi = pCtx->Rdx = pCtx->Rcx = pCtx->R8 = pCtx->R9 = 0;
-
-        // volatile float registers
-        memset(&pCtx->Xmm0, 0, sizeof(pCtx->Xmm0) * 16);
-#else
-        PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform.");
-#endif
-
-        TADDR newStackBase = callerSP - newFixedStackSize;
-
-        SetSP(pCtx, newStackBase);
-
-        size_t frameHeaderSize = newSizeOfPreservedArea;
-        _ASSERTE(frameHeaderSize <= oldFixedStackSize);
-        _ASSERTE(frameHeaderSize <= newFixedStackSize);
-
-        // EnC prolog saves only FP (and LR on arm64), and FP points to saved FP for frame chaining.
-        // These should already be set up from previous version.
-        _ASSERTE(GetFP(pCtx) == callerSP - 16);
-#endif
-
-        // Perform some debug-only sanity checks on stack variables.  Some checks are
-        // performed differently between X86/AMD64.
-
-#ifdef _DEBUG
-        for( unsigned i = 0; i < newNumVars; i++ )
-        {
-            // Make sure that stack variables existing in both old and new methods did not
-            // move.  This matters if the address of a local is used in the remapped method.
-            // For example:
-            //
-            //    static unsafe void Main(string[] args)
-            //    {
-            //        int x;
-            //        int* p = &x;
-            //                 <- Edit made here - cannot move address of x
-            //        *p = 5;
-            //    }
-            //
-            if ((i + unsigned(-ICorDebugInfo::UNKNOWN_ILNUM) < oldNumVars) &&  // Does variable exist in old method?
-                 (oldMethodVarsSorted[i].loc.vlType == ICorDebugInfo::VLT_STK) &&   // Is the variable on the stack?
-                 (newMethodVarsSorted[i].loc.vlType == ICorDebugInfo::VLT_STK))
-            {
-                SIZE_T * pOldVarStackLocation = NativeVarStackAddr(oldMethodVarsSorted[i].loc, &oldCtx);
-                SIZE_T * pNewVarStackLocation = NativeVarStackAddr(newMethodVarsSorted[i].loc, pCtx);
-                _ASSERTE(pOldVarStackLocation == pNewVarStackLocation);
-            }
-
-            // Sanity-check that the range we're clearing contains all of the stack variables
-
-#if defined(TARGET_X86)
-            const ICorDebugInfo::VarLoc &varLoc = newMethodVarsSortedBase[i].loc;
-            if( varLoc.vlType == ICorDebugInfo::VLT_STK )
-            {
-                // This is an EBP frame, all stack variables should be EBP relative
-                _ASSERTE( varLoc.vlStk.vlsBaseReg == ICorDebugInfo::REGNUM_EBP );
-                // Generic special args may show up as locals with positive offset from EBP, so skip them
-                if( varLoc.vlStk.vlsOffset <= 0 )
-                {
-                    // Normal locals must occur after the header on the stack
-                    _ASSERTE( unsigned(-varLoc.vlStk.vlsOffset) >= frameHeaderSize );
-                    // Value must occur before the top of the stack
-                    _ASSERTE( unsigned(-varLoc.vlStk.vlsOffset) < newInfo.stackSize );
-                }
-
-                // Ideally we'd like to verify that the stack locals (if any) start at exactly the end
-                // of the header.  However, we can't easily determine the size of value classes here,
-                // and so (since the stack grows towards 0) can't easily determine where the end of
-                // the local lies.
-            }
-#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
-            switch(newMethodVarsSortedBase[i].loc.vlType)
-            {
-            default:
-                // No validation here for non-stack locals
-                break;
-
-            case ICorDebugInfo::VLT_STK_BYREF:
-                {
-                    // For byrefs, verify that the ptr will be zeroed out
-
-                    SIZE_T regOffs = GetRegOffsInCONTEXT(newMethodVarsSortedBase[i].loc.vlStk.vlsBaseReg);
-                    TADDR baseReg = *(TADDR *)(regOffs + (BYTE*)pCtx);
-                    TADDR addrOfPtr = baseReg + newMethodVarsSortedBase[i].loc.vlStk.vlsOffset;
-
-                    _ASSERTE(
-                        // The ref must exist in the portion we'll zero-out
-                        (
-                            (newStackBase <= addrOfPtr) &&
-                            (addrOfPtr < newStackBase + (newFixedStackSize - frameHeaderSize))
-                        ) ||
-                        // OR in the caller's frame (for parameters)
-                        (addrOfPtr >= newStackBase + newFixedStackSize));
-
-                    // Deliberately fall through, so that we also verify that the value that the ptr
-                    // points to will be zeroed out
-                    // ...
-                }
-                __fallthrough;
-
-            case ICorDebugInfo::VLT_STK:
-            case ICorDebugInfo::VLT_STK2:
-            case ICorDebugInfo::VLT_REG_STK:
-            case ICorDebugInfo::VLT_STK_REG:
-                SIZE_T * pVarStackLocation = NativeVarStackAddr(newMethodVarsSortedBase[i].loc, pCtx);
-                _ASSERTE (pVarStackLocation != NULL);
-                _ASSERTE(
-                    // The value must exist in the portion we'll zero-out
-                    (
-                        (newStackBase <= (TADDR) pVarStackLocation) &&
-                        ((TADDR) pVarStackLocation < newStackBase + (newFixedStackSize - frameHeaderSize))
-                    ) ||
-                    // OR in the caller's frame (for parameters)
-                    ((TADDR) pVarStackLocation >= newStackBase + newFixedStackSize));
-                break;
-            }
-#else   // !X86, !X64, !ARM64
-            PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform.");
-#endif
-        }
-
-#endif // _DEBUG
-
-        // Clear the local and temporary stack space
-
-#if defined(TARGET_X86)
-        memset((void*)(size_t)(pCtx->Esp), 0, newInfo.stackSize - frameHeaderSize );
-#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
-        memset((void*)newStackBase, 0, newFixedStackSize - frameHeaderSize);
-
-        // Restore PSPSym for the new function. Its value should be set to our new FP. But
-        // first, we gotta find PSPSym's location on the stack
-        INT32 nNewPspSymStackSlot = newGcDecoder.GetPSPSymStackSlot();
-        if (nNewPspSymStackSlot != NO_PSP_SYM)
-        {
-#if defined(TARGET_AMD64)
-            *PTR_TADDR(newStackBase + nNewPspSymStackSlot) = newStackBase;
-#elif defined(TARGET_ARM64)
-            *PTR_TADDR(callerSP + nNewPspSymStackSlot) = callerSP;
-#else
-            PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform.");
-#endif
-        }
-#else   // !X86, !X64, !ARM64
-        PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform.");
-#endif
-
-        // 4) Put the variables from step 3 into their new locations.
-
-        LOG((LF_ENC, LL_INFO100, "EECM::FixContextForEnC: set vars!\n"));
-
-        // Move the old variables into their new places.
-
-        hr = g_pDebugInterface->SetVariablesAtOffset(pNewCodeInfo->GetMethodDesc(),
-                                                     newNumVars,
-                                                     newMethodVarsSortedBase,
-                                                     newMethodOffset,
-                                                     pCtx, // place them into the new context
-                                                     rgVal1,
-                                                     rgVal2,
-                                                     rgVCs);
-
-        /*-----------------------------------------------------------------------*/
-    }
-ErrExit:
-    if (oldMethodVarsSortedBase)
-        delete[] oldMethodVarsSortedBase;
-    if (newMethodVarsSortedBase)
-        delete[] newMethodVarsSortedBase;
-    if (rgVal1 != NULL)
-        delete[] rgVal1;
-    if (rgVal2 != NULL)
-        delete[] rgVal2;
-
-    LOG((LF_ENC, LL_INFO100, "EECM::FixContextForEnC: exiting!\n"));
-
-    return hr;
-}
-#endif // !FEATURE_METADATA_UPDATER
-
-#endif // #ifndef DACCESS_COMPILE
-
-#ifdef USE_GC_INFO_DECODER
-/*****************************************************************************
- *
- *  Is the function currently at a "GC safe point" ?
- */
-bool EECodeManager::IsGcSafe( EECodeInfo     *pCodeInfo,
-                              DWORD           dwRelOffset)
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-    } CONTRACTL_END;
-
-    GCInfoToken gcInfoToken = pCodeInfo->GetGCInfoToken();
-
-    GcInfoDecoder gcInfoDecoder(
-            gcInfoToken,
-            DECODE_INTERRUPTIBILITY,
-            dwRelOffset
-            );
-
-    return gcInfoDecoder.IsInterruptible();
-}
-
-#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
-bool EECodeManager::HasTailCalls( EECodeInfo     *pCodeInfo)
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-    } CONTRACTL_END;
-
-    GCInfoToken gcInfoToken = pCodeInfo->GetGCInfoToken();
-
-    GcInfoDecoder gcInfoDecoder(
-            gcInfoToken,
-            DECODE_HAS_TAILCALLS,
-            0
-            );
-
-    return gcInfoDecoder.HasTailCalls();
-}
-#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
-
-#if defined(TARGET_AMD64) && defined(_DEBUG)
-
-struct FindEndOfLastInterruptibleRegionState
-{
-    unsigned curOffset;
-    unsigned endOffset;
-    unsigned lastRangeOffset;
-};
-
-bool FindEndOfLastInterruptibleRegionCB (
-        UINT32 startOffset,
-        UINT32 stopOffset,
-        LPVOID hCallback)
-{
-    FindEndOfLastInterruptibleRegionState *pState = (FindEndOfLastInterruptibleRegionState*)hCallback;
-
-    //
-    // If the current range doesn't overlap the given range, keep searching.
-    //
-    if (   startOffset >= pState->endOffset
-        || stopOffset < pState->curOffset)
-    {
-        return false;
-    }
-
-    //
-    // If the range overlaps the end, then the last point is the end.
-    //
-    if (   stopOffset > pState->endOffset
-        /*&& startOffset < pState->endOffset*/)
-    {
-        // The ranges should be sorted in increasing order.
-        CONSISTENCY_CHECK(startOffset >= pState->lastRangeOffset);
-
-        pState->lastRangeOffset = pState->endOffset;
-        return true;
-    }
-
-    //
-    // See if the end of this range is the closet to the end that we've found
-    // so far.
-    //
-    if (stopOffset > pState->lastRangeOffset)
-        pState->lastRangeOffset = stopOffset;
-
-    return false;
-}
-
-/*
-    Locates the end of the last interruptible region in the given code range.
-    Returns 0 if the entire range is uninterruptible.  Returns the end point
-    if the entire range is interruptible.
-*/
-unsigned EECodeManager::FindEndOfLastInterruptibleRegion(unsigned curOffset,
-                                                         unsigned endOffset,
-                                                         GCInfoToken gcInfoToken)
-{
-#ifndef DACCESS_COMPILE
-    GcInfoDecoder gcInfoDecoder(
-            gcInfoToken,
-            DECODE_FOR_RANGES_CALLBACK
-            );
-
-    FindEndOfLastInterruptibleRegionState state;
-    state.curOffset = curOffset;
-    state.endOffset = endOffset;
-    state.lastRangeOffset = 0;
-
-    gcInfoDecoder.EnumerateInterruptibleRanges(&FindEndOfLastInterruptibleRegionCB, &state);
-
-    return state.lastRangeOffset;
-#else
-    DacNotImpl();
-    return NULL;
-#endif // #ifndef DACCESS_COMPILE
-}
-
-#endif // TARGET_AMD64 && _DEBUG
-
-
-#else // !USE_GC_INFO_DECODER
-
-/*****************************************************************************
- *
- *  Is the function currently at a "GC safe point" ?
- */
-bool EECodeManager::IsGcSafe( EECodeInfo     *pCodeInfo,
-                              DWORD           dwRelOffset)
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-        SUPPORTS_DAC;
-    } CONTRACTL_END;
-
-    hdrInfo         info;
-    BYTE    *       table;
-
-    /* Extract the necessary information from the info block header */
-
-    table = (BYTE *)DecodeGCHdrInfo(pCodeInfo->GetGCInfoToken(),
-                                       dwRelOffset,
-                                       &info);
-
-    /* workaround: prevent interruption within prolog/epilog */
-
-    if  (info.prologOffs != hdrInfo::NOT_IN_PROLOG || info.epilogOffs != hdrInfo::NOT_IN_EPILOG)
-        return false;
-
-#if VERIFY_GC_TABLES
-    _ASSERTE(*castto(table, unsigned short *)++ == 0xBEEF);
-#endif
-
-    return (info.interruptible);
-}
-
-
-/*****************************************************************************/
-static
-PTR_CBYTE skipToArgReg(const hdrInfo& info, PTR_CBYTE table)
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-        SUPPORTS_DAC;
-    } CONTRACTL_END;
-
-#ifdef _DEBUG
-    PTR_CBYTE tableStart = table;
-#else
-    if (info.argTabOffset != INVALID_ARGTAB_OFFSET)
-    {
-        return table + info.argTabOffset;
-    }
-#endif
-
-    unsigned count;
-
-#if VERIFY_GC_TABLES
-    _ASSERTE(*castto(table, unsigned short *)++ == 0xBEEF);
-#endif
-
-    /* Skip over the untracked frame variable table */
-
-    count = info.untrackedCnt;
-    while (count-- > 0) {
-        fastSkipSigned(table);
-    }
-
-#if VERIFY_GC_TABLES
-    _ASSERTE(*castto(table, unsigned short *)++ == 0xCAFE);
-#endif
-
-    /* Skip over the frame variable lifetime table */
-
-    count = info.varPtrTableSize;
-    while (count-- > 0) {
-        fastSkipUnsigned(table); fastSkipUnsigned(table); fastSkipUnsigned(table);
-    }
-
-#if VERIFY_GC_TABLES
-    _ASSERTE(*castto(table, unsigned short *) == 0xBABE);
-#endif
-
-#ifdef _DEBUG
-    if (info.argTabOffset != INVALID_ARGTAB_OFFSET)
-    {
-        CONSISTENCY_CHECK_MSGF((info.argTabOffset == (unsigned) (table - tableStart)),
-          ("table = %p, tableStart = %p, info.argTabOffset = %d", table, tableStart, info.argTabOffset));
-    }
-#endif
-
-    return table;
-}
-
-/*****************************************************************************/
-
-#define regNumToMask(regNum) RegMask(1<<(regNum))
-
-/*****************************************************************************
- Helper for scanArgRegTable() and scanArgRegTableI() for regMasks
- */
-
-void *      getCalleeSavedReg(PREGDISPLAY pContext, regNum reg)
-{
-    LIMITED_METHOD_CONTRACT;
-    SUPPORTS_DAC;
-
-    switch (reg)
-    {
-        case REGI_EBP: return pContext->GetEbpLocation();
-        case REGI_EBX: return pContext->GetEbxLocation();
-        case REGI_ESI: return pContext->GetEsiLocation();
-        case REGI_EDI: return pContext->GetEdiLocation();
-
-        default: _ASSERTE(!"bad info.thisPtrResult"); return NULL;
-    }
-}
-
-/*****************************************************************************
- These functions converts the bits in the GC encoding to RegMask
- */
-
-inline
-RegMask     convertCalleeSavedRegsMask(unsigned inMask) // EBP,EBX,ESI,EDI
-{
-    LIMITED_METHOD_CONTRACT;
-    SUPPORTS_DAC;
-
-    _ASSERTE((inMask & 0x0F) == inMask);
-
-    unsigned outMask = RM_NONE;
-    if (inMask & 0x1) outMask |= RM_EDI;
-    if (inMask & 0x2) outMask |= RM_ESI;
-    if (inMask & 0x4) outMask |= RM_EBX;
-    if (inMask & 0x8) outMask |= RM_EBP;
-
-    return (RegMask) outMask;
-}
-
-inline
-RegMask     convertAllRegsMask(unsigned inMask) // EAX,ECX,EDX,EBX, EBP,ESI,EDI
-{
-    LIMITED_METHOD_CONTRACT;
-    SUPPORTS_DAC;
-
-    _ASSERTE((inMask & 0xEF) == inMask);
-
-    unsigned outMask = RM_NONE;
-    if (inMask & 0x01) outMask |= RM_EAX;
-    if (inMask & 0x02) outMask |= RM_ECX;
-    if (inMask & 0x04) outMask |= RM_EDX;
-    if (inMask & 0x08) outMask |= RM_EBX;
-    if (inMask & 0x20) outMask |= RM_EBP;
-    if (inMask & 0x40) outMask |= RM_ESI;
-    if (inMask & 0x80) outMask |= RM_EDI;
-
-    return (RegMask)outMask;
-}
-
-/*****************************************************************************
- * scan the register argument table for the not fully interruptible case.
-   this function is called to find all live objects (pushed arguments)
-   and to get the stack base for EBP-less methods.
-
-   NOTE: If info->argTabResult is NULL, info->argHnumResult indicates
-         how many bits in argMask are valid
-         If info->argTabResult is non-NULL, then the argMask field does
-         not fit in 32-bits and the value in argMask meaningless.
-         Instead argHnum specifies the number of (variable-length) elements
-         in the array, and argTabBytes specifies the total byte size of the
-         array. [ Note this is an extremely rare case ]
- */
-
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable:21000) // Suppress PREFast warning about overly large function
-#endif
-static
-unsigned scanArgRegTable(PTR_CBYTE    table,
-                         unsigned     curOffs,
-                         hdrInfo    * info)
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-        SUPPORTS_DAC;
-    } CONTRACTL_END;
-
-    regNum    thisPtrReg    = REGI_NA;
-#ifdef _DEBUG
-    bool      isCall        = false;
-#endif
-    unsigned  regMask       = 0;    // EBP,EBX,ESI,EDI
-    unsigned  argMask       = 0;
-    unsigned  argHnum       = 0;
-    PTR_CBYTE argTab        = 0;
-    unsigned  argTabBytes   = 0;
-    unsigned  stackDepth    = 0;
-
-    unsigned  iregMask      = 0;    // EBP,EBX,ESI,EDI
-    unsigned  iargMask      = 0;
-    unsigned  iptrMask      = 0;
-
-#if VERIFY_GC_TABLES
-    _ASSERTE(*castto(table, unsigned short *)++ == 0xBABE);
-#endif
-
-    unsigned scanOffs = 0;
-
-    _ASSERTE(scanOffs <= info->methodSize);
-
-    if (info->ebpFrame) {
-  /*
-      Encoding table for methods with an EBP frame and
-                         that are not fully interruptible
-
-      The encoding used is as follows:
-
-      this pointer encodings:
-
-         01000000          this pointer in EBX
-         00100000          this pointer in ESI
-         00010000          this pointer in EDI
-
-      tiny encoding:
-
-         0bsdDDDD
-                           requires code delta     < 16 (4-bits)
-                           requires pushed argmask == 0
-
-           where    DDDD   is code delta
-                       b   indicates that register EBX is a live pointer
-                       s   indicates that register ESI is a live pointer
-                       d   indicates that register EDI is a live pointer
-
-      small encoding:
-
-         1DDDDDDD bsdAAAAA
-
-                           requires code delta     < 120 (7-bits)
-                           requires pushed argmask <  64 (5-bits)
-
-           where DDDDDDD   is code delta
-                   AAAAA   is the pushed args mask
-                       b   indicates that register EBX is a live pointer
-                       s   indicates that register ESI is a live pointer
-                       d   indicates that register EDI is a live pointer
-
-      medium encoding
-
-         0xFD aaaaaaaa AAAAdddd bseDDDDD
-
-                           requires code delta     <    0x1000000000  (9-bits)
-                           requires pushed argmask < 0x1000000000000 (12-bits)
-
-           where    DDDDD  is the upper 5-bits of the code delta
-                     dddd  is the low   4-bits of the code delta
-                     AAAA  is the upper 4-bits of the pushed arg mask
-                 aaaaaaaa  is the low   8-bits of the pushed arg mask
-                        b  indicates that register EBX is a live pointer
-                        s  indicates that register ESI is a live pointer
-                        e  indicates that register EDI is a live pointer
-
-      medium encoding with interior pointers
-
-         0xF9 DDDDDDDD bsdAAAAAA iiiIIIII
-
-                           requires code delta     < (8-bits)
-                           requires pushed argmask < (5-bits)
-
-           where  DDDDDDD  is the code delta
-                        b  indicates that register EBX is a live pointer
-                        s  indicates that register ESI is a live pointer
-                        d  indicates that register EDI is a live pointer
-                    AAAAA  is the pushed arg mask
-                      iii  indicates that EBX,EDI,ESI are interior pointers
-                    IIIII  indicates that bits is the arg mask are interior
-                           pointers
-
-      large encoding
-
-         0xFE [0BSD0bsd][32-bit code delta][32-bit argMask]
-
-                        b  indicates that register EBX is a live pointer
-                        s  indicates that register ESI is a live pointer
-                        d  indicates that register EDI is a live pointer
-                        B  indicates that register EBX is an interior pointer
-                        S  indicates that register ESI is an interior pointer
-                        D  indicates that register EDI is an interior pointer
-                           requires pushed  argmask < 32-bits
-
-      large encoding  with interior pointers
-
-         0xFA [0BSD0bsd][32-bit code delta][32-bit argMask][32-bit interior pointer mask]
-
-
-                        b  indicates that register EBX is a live pointer
-                        s  indicates that register ESI is a live pointer
-                        d  indicates that register EDI is a live pointer
-                        B  indicates that register EBX is an interior pointer
-                        S  indicates that register ESI is an interior pointer
-                        D  indicates that register EDI is an interior pointer
-                           requires pushed  argmask < 32-bits
-                           requires pushed iArgmask < 32-bits
-
-      huge encoding        This is the only encoding that supports
-                           a pushed argmask which is greater than
-                           32-bits.
-
-         0xFB [0BSD0bsd][32-bit code delta]
-              [32-bit table count][32-bit table size]
-              [pushed ptr offsets table...]
-
-                       b   indicates that register EBX is a live pointer
-                       s   indicates that register ESI is a live pointer
-                       d   indicates that register EDI is a live pointer
-                       B   indicates that register EBX is an interior pointer
-                       S   indicates that register ESI is an interior pointer
-                       D   indicates that register EDI is an interior pointer
-                       the list count is the number of entries in the list
-                       the list size gives the byte-length of the list
-                       the offsets in the list are variable-length
-  */
-        while (scanOffs < curOffs)
-        {
-            iregMask = 0;
-            iargMask = 0;
-            argTab = NULL;
-#ifdef _DEBUG
-            isCall = true;
-#endif
-
-            /* Get the next byte and check for a 'special' entry */
-
-            unsigned encType = *table++;
-#if defined(DACCESS_COMPILE)
-            // In this scenario, it is invalid to have a zero byte in the GC info encoding (refer to the
-            // comments above). At least one bit has to be set.  For example, a byte can represent which
-            // register is the "this" pointer, and this byte has to be 0x10, 0x20, or 0x40.  Having a zero
-            // byte indicates there is most likely some sort of DAC error, and it may lead to problems such as
-            // infinite loops.  So we bail out early instead.
-            if (encType == 0)
-            {
-                DacError(CORDBG_E_TARGET_INCONSISTENT);
-                UNREACHABLE();
-            }
-#endif // DACCESS_COMPILE
-
-            switch (encType)
-            {
-                unsigned    val, nxt;
-
-            default:
-
-                /* A tiny or small call entry */
-                val = encType;
-                if ((val & 0x80) == 0x00) {
-                    if (val & 0x0F) {
-                        /* A tiny call entry */
-                        scanOffs += (val & 0x0F);
-                        regMask   = (val & 0x70) >> 4;
-                        argMask   = 0;
-                        argHnum   = 0;
-                    }
-                    else {
-                        /* This pointer liveness encoding */
-                        regMask   = (val & 0x70) >> 4;
-                        if (regMask == 0x1)
-                            thisPtrReg = REGI_EDI;
-                        else if (regMask == 0x2)
-                            thisPtrReg = REGI_ESI;
-                        else if (regMask == 0x4)
-                            thisPtrReg = REGI_EBX;
-                        else
-                           _ASSERTE(!"illegal encoding for 'this' pointer liveness");
-                    }
-                }
-                else {
-                    /* A small call entry */
-                    scanOffs += (val & 0x7F);
-                    val       = *table++;
-                    regMask   = val >> 5;
-                    argMask   = val & 0x1F;
-                    argHnum   = 5;
-                }
-                break;
-
-            case 0xFD:  // medium encoding
-
-                argMask   = *table++;
-                val       = *table++;
-                argMask  |= ((val & 0xF0) << 4);
-                argHnum   = 12;
-                nxt       = *table++;
-                scanOffs += (val & 0x0F) + ((nxt & 0x1F) << 4);
-                regMask   = nxt >> 5;                   // EBX,ESI,EDI
-
-                break;
-
-            case 0xF9:  // medium encoding with interior pointers
-
-                scanOffs   += *table++;
-                val         = *table++;
-                argMask     = val & 0x1F;
-                argHnum     = 5;
-                regMask     = val >> 5;
-                val         = *table++;
-                iargMask    = val & 0x1F;
-                iregMask    = val >> 5;
-
-                break;
-
-            case 0xFE:  // large encoding
-            case 0xFA:  // large encoding with interior pointers
-
-                val         = *table++;
-                regMask     = val & 0x7;
-                iregMask    = val >> 4;
-                scanOffs   += *dac_cast<PTR_DWORD>(table);  table += sizeof(DWORD);
-                argMask     = *dac_cast<PTR_DWORD>(table);  table += sizeof(DWORD);
-                argHnum     = 31;
-                if (encType == 0xFA) // read iargMask
-                {
-                    iargMask = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
-                }
-                break;
-
-            case 0xFB:  // huge encoding        This is the only partially interruptible
-                        //                      encoding that supports a pushed ArgMask
-                        //                      which is greater than 32-bits.
-                        //                      The ArgMask is encoded using the argTab
-                val         = *table++;
-                regMask     = val & 0x7;
-                iregMask    = val >> 4;
-                scanOffs   += *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
-                argHnum     = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
-                argTabBytes = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
-                argTab      = table;                       table += argTabBytes;
-
-                argMask     = 0;
-                break;
-
-            case 0xFF:
-                scanOffs = curOffs + 1;
-                break;
-
-            } // end case
-
-            // iregMask & iargMask are subsets of regMask & argMask respectively
-
-            _ASSERTE((iregMask & regMask) == iregMask);
-            _ASSERTE((iargMask & argMask) == iargMask);
-
-        } // end while
-
-    }
-    else {
-
-/*
- *    Encoding table for methods with an ESP frame and are not fully interruptible
- *    This encoding does not support a pushed ArgMask greater than 32
- *
- *               The encoding used is as follows:
- *
- *  push     000DDDDD                     ESP push one item with 5-bit delta
- *  push     00100000 [pushCount]         ESP push multiple items
- *  reserved 0011xxxx
- *  skip     01000000 [Delta]             Skip Delta, arbitrary sized delta
- *  skip     0100DDDD                     Skip small Delta, for call (DDDD != 0)
- *  pop      01CCDDDD                     ESP pop  CC items with 4-bit delta (CC != 00)
- *  call     1PPPPPPP                     Call Pattern, P=[0..79]
- *  call     1101pbsd DDCCCMMM            Call RegMask=pbsd,ArgCnt=CCC,
- *                                        ArgMask=MMM Delta=commonDelta[DD]
- *  call     1110pbsd [ArgCnt] [ArgMask]  Call ArgCnt,RegMask=pbsd,[32-bit ArgMask]
- *  call     11111000 [PBSDpbsd][32-bit delta][32-bit ArgCnt]
- *                    [32-bit PndCnt][32-bit PndSize][PndOffs...]
- *  iptr     11110000 [IPtrMask]          Arbitrary 32-bit Interior Pointer Mask
- *  thisptr  111101RR                     This pointer is in Register RR
- *                                        00=EDI,01=ESI,10=EBX,11=EBP
- *  reserved 111100xx                     xx  != 00
- *  reserved 111110xx                     xx  != 00
- *  reserved 11111xxx                     xxx != 000 && xxx != 111(EOT)
- *
- *   The value 11111111 [0xFF] indicates the end of the table.
- *
- *  An offset (at which stack-walking is performed) without an explicit encoding
- *  is assumed to be a trivial call-site (no GC registers, stack empty before and
- *  after) to avoid having to encode all trivial calls.
- *
- * Note on the encoding used for interior pointers
- *
- *   The iptr encoding must immediately precede a call encoding.  It is used to
- *   transform a normal GC pointer addresses into an interior pointers for GC purposes.
- *   The mask supplied to the iptr encoding is read from the least signicant bit
- *   to the most signicant bit. (i.e the lowest bit is read first)
- *
- *   p   indicates that register EBP is a live pointer
- *   b   indicates that register EBX is a live pointer
- *   s   indicates that register ESI is a live pointer
- *   d   indicates that register EDI is a live pointer
- *   P   indicates that register EBP is an interior pointer
- *   B   indicates that register EBX is an interior pointer
- *   S   indicates that register ESI is an interior pointer
- *   D   indicates that register EDI is an interior pointer
- *
- *   As an example the following sequence indicates that EDI.ESI and the 2nd pushed pointer
- *   in ArgMask are really interior pointers.  The pointer in ESI in a normal pointer:
- *
- *   iptr 11110000 00010011           => read Interior Ptr, Interior Ptr, Normal Ptr, Normal Ptr, Interior Ptr
- *   call 11010011 DDCCC011 RRRR=1011 => read EDI is a GC-pointer, ESI is a GC-pointer. EBP is a GC-pointer
- *                           MMM=0011 => read two GC-pointers arguments on the stack (nested call)
- *
- *   Since the call instruction mentions 5 GC-pointers we list them in the required order:
- *   EDI, ESI, EBP, 1st-pushed pointer, 2nd-pushed pointer
- *
- *   And we apply the Interior Pointer mask mmmm=10011 to the above five ordered GC-pointers
- *   we learn that EDI and ESI are interior GC-pointers and that the second push arg is an
- *   interior GC-pointer.
- */
-
-#if defined(DACCESS_COMPILE)
-        DWORD cbZeroBytes = 0;
-#endif // DACCESS_COMPILE
-
-        while (scanOffs <= curOffs)
-        {
-            unsigned callArgCnt;
-            unsigned skip;
-            unsigned newRegMask, inewRegMask;
-            unsigned newArgMask, inewArgMask;
-            unsigned oldScanOffs = scanOffs;
-
-            if (iptrMask)
-            {
-                // We found this iptrMask in the previous iteration.
-                // This iteration must be for a call. Set these variables
-                // so that they are available at the end of the loop
-
-                inewRegMask   = iptrMask & 0x0F; // EBP,EBX,ESI,EDI
-                inewArgMask   = iptrMask >> 4;
-
-                iptrMask      = 0;
-            }
-            else
-            {
-                // Zero out any stale values.
-
-                inewRegMask = 0;
-                inewArgMask = 0;
-            }
-
-            /* Get the next byte and decode it */
-
-            unsigned val = *table++;
-#if defined(DACCESS_COMPILE)
-            // In this scenario, a 0 means that there is a push at the current offset.  For a struct with
-            // two double fields, the JIT may use two movq instructions to push the struct onto the stack, and
-            // the JIT will encode 4 pushes at the same code offset.  This means that we can have up to 4
-            // consecutive bytes of 0 without changing the code offset.  Having more than 4 consecutive bytes
-            // of zero indicates that there is most likely some sort of DAC error, and it may lead to problems
-            // such as infinite loops.  So we bail out early instead.
-            if (val == 0)
-            {
-                cbZeroBytes += 1;
-                if (cbZeroBytes > 4)
-                {
-                    DacError(CORDBG_E_TARGET_INCONSISTENT);
-                    UNREACHABLE();
-                }
-            }
-            else
-            {
-                cbZeroBytes = 0;
-            }
-#endif // DACCESS_COMPILE
-
-#ifdef _DEBUG
-            if (scanOffs != curOffs)
-                isCall = false;
-#endif
-
-            /* Check pushes, pops, and skips */
-
-            if  (!(val & 0x80)) {
-
-                //  iptrMask can immediately precede only calls
-
-                _ASSERTE(inewRegMask == 0);
-                _ASSERTE(inewArgMask == 0);
-
-                if (!(val & 0x40)) {
-
-                    unsigned pushCount;
-
-                    if (!(val & 0x20))
-                    {
-                        //
-                        // push    000DDDDD                 ESP push one item, 5-bit delta
-                        //
-                        pushCount   = 1;
-                        scanOffs   += val & 0x1f;
-                    }
-                    else
-                    {
-                        //
-                        // push    00100000 [pushCount]     ESP push multiple items
-                        //
-                        _ASSERTE(val == 0x20);
-                        pushCount = fastDecodeUnsigned(table);
-                    }
-
-                    if (scanOffs > curOffs)
-                    {
-                        scanOffs = oldScanOffs;
-                        goto FINISHED;
-                    }
-
-                    stackDepth +=  pushCount;
-                }
-                else if ((val & 0x3f) != 0) {
-                    //
-                    //  pop     01CCDDDD         pop CC items, 4-bit delta
-                    //
-                    scanOffs   +=  val & 0x0f;
-                    if (scanOffs > curOffs)
-                    {
-                        scanOffs = oldScanOffs;
-                        goto FINISHED;
-                    }
-                    stackDepth -= (val & 0x30) >> 4;
-
-                } else if (scanOffs < curOffs) {
-                    //
-                    // skip    01000000 [Delta]  Skip arbitrary sized delta
-                    //
-                    skip = fastDecodeUnsigned(table);
-                    scanOffs += skip;
-                }
-                else // don't process a skip if we are already at curOffs
-                    goto FINISHED;
-
-                /* reset regs and args state since we advance past last call site */
-
-                 regMask    = 0;
-                iregMask    = 0;
-                 argMask    = 0;
-                iargMask    = 0;
-                argHnum     = 0;
-
-            }
-            else /* It must be a call, thisptr, or iptr */
-            {
-                switch ((val & 0x70) >> 4) {
-                default:    // case 0-4, 1000xxxx through 1100xxxx
-                    //
-                    // call    1PPPPPPP          Call Pattern, P=[0..79]
-                    //
-                    decodeCallPattern((val & 0x7f), &callArgCnt,
-                                      &newRegMask, &newArgMask, &skip);
-                    // If we've already reached curOffs and the skip amount
-                    // is non-zero then we are done
-                    if ((scanOffs == curOffs) && (skip > 0))
-                        goto FINISHED;
-                    // otherwise process this call pattern
-                    scanOffs   += skip;
-                    if (scanOffs > curOffs)
-                        goto FINISHED;
-#ifdef _DEBUG
-                    isCall      = true;
-#endif
-                    regMask     = newRegMask;
-                    argMask     = newArgMask;   argTab = NULL;
-                    iregMask    = inewRegMask;
-                    iargMask    = inewArgMask;
-                    stackDepth -= callArgCnt;
-                    argHnum     = 2;             // argMask is known to be <= 3
-                    break;
-
-                  case 5:
-                    //
-                    // call    1101RRRR DDCCCMMM  Call RegMask=RRRR,ArgCnt=CCC,
-                    //                        ArgMask=MMM Delta=commonDelta[DD]
-                    //
-                    newRegMask  = val & 0xf;    // EBP,EBX,ESI,EDI
-                    val         = *table++;     // read next byte
-                    skip        = callCommonDelta[val>>6];
-                    // If we've already reached curOffs and the skip amount
-                    // is non-zero then we are done
-                    if ((scanOffs == curOffs) && (skip > 0))
-                        goto FINISHED;
-                    // otherwise process this call encoding
-                    scanOffs   += skip;
-                    if (scanOffs > curOffs)
-                        goto FINISHED;
-#ifdef _DEBUG
-                    isCall      = true;
-#endif
-                    regMask     = newRegMask;
-                    iregMask    = inewRegMask;
-                    callArgCnt  = (val >> 3) & 0x7;
-                    stackDepth -= callArgCnt;
-                    argMask     = (val & 0x7);  argTab = NULL;
-                    iargMask    = inewArgMask;
-                    argHnum     = 3;
-                    break;
-
-                  case 6:
-                    //
-                    // call    1110RRRR [ArgCnt] [ArgMask]
-                    //                          Call ArgCnt,RegMask=RRR,ArgMask
-                    //
-#ifdef _DEBUG
-                    isCall      = true;
-#endif
-                    regMask     = val & 0xf;    // EBP,EBX,ESI,EDI
-                    iregMask    = inewRegMask;
-                    callArgCnt  = fastDecodeUnsigned(table);
-                    stackDepth -= callArgCnt;
-                    argMask     = fastDecodeUnsigned(table);  argTab = NULL;
-                    iargMask    = inewArgMask;
-                    argHnum     = sizeof(argMask) * 8;  // The size of argMask in bits
-                    break;
-
-                  case 7:
-                    switch (val & 0x0C)
-                    {
-                      case 0x00:
-                        //
-                        // 0xF0   iptr     11110000   [IPtrMask] Arbitrary Interior Pointer Mask
-                        //
-                        iptrMask = fastDecodeUnsigned(table);
-                        break;
-
-                      case 0x04:
-                        //
-                        // 0xF4   thisptr  111101RR   This pointer is in Register RR
-                        //                            00=EDI,01=ESI,10=EBX,11=EBP
-                        //
-                        {
-                            static const regNum calleeSavedRegs[] =
-                                { REGI_EDI, REGI_ESI, REGI_EBX, REGI_EBP };
-                            thisPtrReg = calleeSavedRegs[val&0x3];
-                        }
-                        break;
-
-                      case 0x08:
-                        //
-                        // 0xF8   call     11111000   [PBSDpbsd][32-bit delta][32-bit ArgCnt]
-                        //                            [32-bit PndCnt][32-bit PndSize][PndOffs...]
-                        //
-                        val         = *table++;
-                        skip        = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
-// [VSUQFE 4670]
-                        // If we've already reached curOffs and the skip amount
-                        // is non-zero then we are done
-                        if ((scanOffs == curOffs) && (skip > 0))
-                            goto FINISHED;
-// [VSUQFE 4670]
-                        scanOffs   += skip;
-                        if (scanOffs > curOffs)
-                            goto FINISHED;
-#ifdef _DEBUG
-                        isCall      = true;
-#endif
-                        regMask     = val & 0xF;
-                        iregMask    = val >> 4;
-                        callArgCnt  = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
-                        stackDepth -= callArgCnt;
-                        argHnum     = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
-                        argTabBytes = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
-                        argTab      = table;
-                        table      += argTabBytes;
-                        break;
-
-                      case 0x0C:
-                        //
-                        // 0xFF   end      11111111   End of table marker
-                        //
-                        _ASSERTE(val==0xff);
-                        goto FINISHED;
-
-                      default:
-                        _ASSERTE(!"reserved GC encoding");
-                        break;
-                    }
-                    break;
-
-                } // end switch
-
-            } // end else (!(val & 0x80))
-
-            // iregMask & iargMask are subsets of regMask & argMask respectively
-
-            _ASSERTE((iregMask & regMask) == iregMask);
-            _ASSERTE((iargMask & argMask) == iargMask);
-
-        } // end while
-
-    } // end else ebp-less frame
-
-FINISHED:
-
-    // iregMask & iargMask are subsets of regMask & argMask respectively
-
-    _ASSERTE((iregMask & regMask) == iregMask);
-    _ASSERTE((iargMask & argMask) == iargMask);
-
-    if (scanOffs != curOffs)
-    {
-        /* must have been a boring call */
-        info->regMaskResult  = RM_NONE;
-        info->argMaskResult  = ptrArgTP(0);
-        info->iregMaskResult = RM_NONE;
-        info->iargMaskResult = ptrArgTP(0);
-        info->argHnumResult  = 0;
-        info->argTabResult   = NULL;
-        info->argTabBytes    = 0;
-    }
-    else
-    {
-        info->regMaskResult  = convertCalleeSavedRegsMask(regMask);
-        info->argMaskResult  = ptrArgTP(argMask);
-        info->argHnumResult  = argHnum;
-        info->iregMaskResult = convertCalleeSavedRegsMask(iregMask);
-        info->iargMaskResult = ptrArgTP(iargMask);
-        info->argTabResult   = argTab;
-        info->argTabBytes    = argTabBytes;
-    }
-
-#ifdef _DEBUG
-    if (scanOffs != curOffs) {
-        isCall = false;
-    }
-    _ASSERTE(thisPtrReg == REGI_NA || (!isCall || (regNumToMask(thisPtrReg) & info->regMaskResult)));
-#endif
-    info->thisPtrResult  = thisPtrReg;
-
-    _ASSERTE(int(stackDepth) < INT_MAX); // check that it did not underflow
-    return (stackDepth * sizeof(unsigned));
-}
-#ifdef _PREFAST_
-#pragma warning(pop)
-#endif
-
-
-/*****************************************************************************
- * scan the register argument table for the fully interruptible case.
-   this function is called to find all live objects (pushed arguments)
-   and to get the stack base for fully interruptible methods.
-   Returns size of things pushed on the stack for ESP frames
-
-   Arguments:
-      table       - The pointer table
-      curOffsRegs - The current code offset that should be used for reporting registers
-      curOffsArgs - The current code offset that should be used for reporting args
-      info        - Incoming arg used to determine if there's a frame, and to save results
- */
-
-static
-unsigned scanArgRegTableI(PTR_CBYTE    table,
-                          unsigned     curOffsRegs,
-                          unsigned     curOffsArgs,
-                          hdrInfo   *  info)
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-        SUPPORTS_DAC;
-    } CONTRACTL_END;
-
-    regNum thisPtrReg = REGI_NA;
-    unsigned  ptrRegs    = 0;    // The mask of registers that contain pointers
-    unsigned iptrRegs    = 0;    // The subset of ptrRegs that are interior pointers
-    unsigned  ptrOffs    = 0;    // The code offset of the table entry we are currently looking at
-    unsigned  argCnt     = 0;    // The number of args that have been pushed
-
-    ptrArgTP  ptrArgs(0);        // The mask of stack values that contain pointers.
-    ptrArgTP iptrArgs(0);        // The subset of ptrArgs that are interior pointers.
-    ptrArgTP  argHigh(0);        // The current mask position that corresponds to the top of the stack.
-
-    bool      isThis     = false;
-    bool      iptr       = false;
-
-    // The comment before the call to scanArgRegTableI in EnumGCRefs
-    // describes why curOffsRegs can be smaller than curOffsArgs.
-    _ASSERTE(curOffsRegs <= curOffsArgs);
-
-#if VERIFY_GC_TABLES
-    _ASSERTE(*castto(table, unsigned short *)++ == 0xBABE);
-#endif
-
-    bool      hasPartialArgInfo;
-
-#ifndef UNIX_X86_ABI
-    hasPartialArgInfo = info->ebpFrame;
-#else
-    // For x86/Linux, interruptible code always has full arg info
-    //
-    // This should be aligned with emitFullArgInfo setting at
-    // emitter::emitEndCodeGen (in JIT)
-    hasPartialArgInfo = false;
-#endif
-
-  /*
-      Encoding table for methods that are fully interruptible
-
-      The encoding used is as follows:
-
-          ptr reg dead        00RRRDDD    [RRR != 100]
-          ptr reg live        01RRRDDD    [RRR != 100]
-
-      non-ptr arg push        10110DDD                    [SSS == 110]
-          ptr arg push        10SSSDDD                    [SSS != 110] && [SSS != 111]
-          ptr arg pop         11CCCDDD    [CCC != 000] && [CCC != 110] && [CCC != 111]
-      little delta skip       11000DDD    [CCC == 000]
-      bigger delta skip       11110BBB                    [CCC == 110]
-
-      The values used in the encodings are as follows:
-
-        DDD                 code offset delta from previous entry (0-7)
-        BBB                 bigger delta 000=8,001=16,010=24,...,111=64
-        RRR                 register number (EAX=000,ECX=001,EDX=010,EBX=011,
-                              EBP=101,ESI=110,EDI=111), ESP=100 is reserved
-        SSS                 argument offset from base of stack. This is
-                              redundant for frameless methods as we can
-                              infer it from the previous pushes+pops. However,
-                              for EBP-methods, we only report GC pushes, and
-                              so we need SSS
-        CCC                 argument count being popped (includes only ptrs for EBP methods)
-
-      The following are the 'large' versions:
-
-        large delta skip        10111000 [0xB8] , encodeUnsigned(delta)
-
-        large     ptr arg push  11111000 [0xF8] , encodeUnsigned(pushCount)
-        large non-ptr arg push  11111001 [0xF9] , encodeUnsigned(pushCount)
-        large     ptr arg pop   11111100 [0xFC] , encodeUnsigned(popCount)
-        large         arg dead  11111101 [0xFD] , encodeUnsigned(popCount) for caller-pop args.
-                                                    Any GC args go dead after the call,
-                                                    but are still sitting on the stack
-
-        this pointer prefix     10111100 [0xBC]   the next encoding is a ptr live
-                                                    or a ptr arg push
-                                                    and contains the this pointer
-
-        interior or by-ref      10111111 [0xBF]   the next encoding is a ptr live
-             pointer prefix                         or a ptr arg push
-                                                    and contains an interior
-                                                    or by-ref pointer
-
-
-        The value 11111111 [0xFF] indicates the end of the table.
-  */
-
-#if defined(DACCESS_COMPILE)
-    bool fLastByteIsZero = false;
-#endif // DACCESS_COMPILE
-
-    /* Have we reached the instruction we're looking for? */
-
-    while (ptrOffs <= curOffsArgs)
-    {
-        unsigned    val;
-
-        int         isPop;
-        unsigned    argOfs;
-
-        unsigned    regMask;
-
-        // iptrRegs & iptrArgs are subsets of ptrRegs & ptrArgs respectively
-
-        _ASSERTE((iptrRegs & ptrRegs) == iptrRegs);
-        _ASSERTE((iptrArgs & ptrArgs) == iptrArgs);
-
-        /* Now find the next 'life' transition */
-
-        val = *table++;
-#if defined(DACCESS_COMPILE)
-        // In this scenario, a zero byte means that EAX is going dead at the current offset.  Since EAX
-        // can't go dead more than once at any given offset, it's invalid to have two consecutive bytes
-        // of zero.  If this were to happen, then it means that there is most likely some sort of DAC
-        // error, and it may lead to problems such as infinite loops.  So we bail out early instead.
-        if ((val == 0) && fLastByteIsZero)
-        {
-            DacError(CORDBG_E_TARGET_INCONSISTENT);
-            UNREACHABLE();
-        }
-        fLastByteIsZero = (val == 0);
-#endif // DACCESS_COMPILE
-
-        if  (!(val & 0x80))
-        {
-            /* A small 'regPtr' encoding */
-
-            regNum       reg;
-
-            ptrOffs += (val     ) & 0x7;
-            if (ptrOffs > curOffsArgs) {
-                iptr = isThis = false;
-                goto REPORT_REFS;
-            }
-            else if (ptrOffs > curOffsRegs) {
-                iptr = isThis = false;
-                continue;
-            }
-
-            reg     = (regNum)((val >> 3) & 0x7);
-            regMask = 1 << reg;         // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
-
-#if 0
-            printf("regMask = %04X -> %04X\n", ptrRegs,
-                       (val & 0x40) ? (ptrRegs |  regMask)
-                                    : (ptrRegs & ~regMask));
-#endif
-
-            /* The register is becoming live/dead here */
-
-            if  (val & 0x40)
-            {
-                /* Becomes Live */
-                _ASSERTE((ptrRegs  &  regMask) == 0);
-
-                ptrRegs |=  regMask;
-
-                if  (isThis)
-                {
-                    thisPtrReg = reg;
-                }
-                if  (iptr)
-                {
-                    iptrRegs |= regMask;
-                }
-            }
-            else
-            {
-                /* Becomes Dead */
-                _ASSERTE((ptrRegs  &  regMask) != 0);
-
-                ptrRegs &= ~regMask;
-
-                if  (reg == thisPtrReg)
-                {
-                    thisPtrReg = REGI_NA;
-                }
-                if  (iptrRegs & regMask)
-                {
-                    iptrRegs &= ~regMask;
-                }
-            }
-            iptr = isThis = false;
-            continue;
-        }
-
-        /* This is probably an argument push/pop */
-
-        argOfs = (val & 0x38) >> 3;
-
-        /* 6 [110] and 7 [111] are reserved for other encodings */
-        if  (argOfs < 6)
-        {
-
-            /* A small argument encoding */
-
-            ptrOffs += (val & 0x07);
-            if (ptrOffs > curOffsArgs) {
-                iptr = isThis = false;
-                goto REPORT_REFS;
-            }
-            isPop    = (val & 0x40);
-
-        ARG:
-
-            if  (isPop)
-            {
-                if (argOfs == 0)
-                    continue;           // little skip encoding
-
-                /* We remove (pop) the top 'argOfs' entries */
-
-                _ASSERTE(argOfs || argOfs <= argCnt);
-
-                /* adjust # of arguments */
-
-                argCnt -= argOfs;
-                _ASSERTE(argCnt < MAX_PTRARG_OFS);
-
-//              printf("[%04X] popping %u args: mask = %04X\n", ptrOffs, argOfs, (int)ptrArgs);
-
-                do
-                {
-                    _ASSERTE(!isZero(argHigh));
-
-                    /* Do we have an argument bit that's on? */
-
-                    if  (intersect(ptrArgs, argHigh))
-                    {
-                        /* Turn off the bit */
-
-                        setDiff(ptrArgs, argHigh);
-                        setDiff(iptrArgs, argHigh);
-
-                        /* We've removed one more argument bit */
-
-                        argOfs--;
-                    }
-                    else if (hasPartialArgInfo)
-                        argCnt--;
-                    else /* full arg info && not a ref */
-                        argOfs--;
-
-                    /* Continue with the next lower bit */
-
-                    argHigh >>= 1;
-                }
-                while (argOfs);
-
-                _ASSERTE(!hasPartialArgInfo    ||
-                         isZero(argHigh)       ||
-                        (argHigh == CONSTRUCT_ptrArgTP(1, (argCnt-1))));
-
-                if (hasPartialArgInfo)
-                {
-                    // We always leave argHigh pointing to the next ptr arg.
-                    // So, while argHigh is non-zero, and not a ptrArg, we shift right (and subtract
-                    // one arg from our argCnt) until it is a ptrArg.
-                    while (!intersect(argHigh, ptrArgs) && (!isZero(argHigh)))
-                    {
-                        argHigh >>= 1;
-                        argCnt--;
-                    }
-                }
-
-            }
-            else
-            {
-                /* Add a new ptr arg entry at stack offset 'argOfs' */
-
-                if  (argOfs >= MAX_PTRARG_OFS)
-                {
-                     _ASSERTE_ALL_BUILDS(!"scanArgRegTableI: args pushed 'too deep'");
-                }
-                else
-                {
-                    /* Full arg info reports all pushes, and thus
-                       argOffs has to be consistent with argCnt */
-
-                    _ASSERTE(hasPartialArgInfo || argCnt == argOfs);
-
-                    /* store arg count */
-
-                    argCnt  = argOfs + 1;
-                    _ASSERTE((argCnt < MAX_PTRARG_OFS));
-
-                    /* Compute the appropriate argument offset bit */
-
-                    ptrArgTP argMask = CONSTRUCT_ptrArgTP(1, argOfs);
-
-//                  printf("push arg at offset %02u --> mask = %04X\n", argOfs, (int)argMask);
-
-                    /* We should never push twice at the same offset */
-
-                    _ASSERTE(!intersect( ptrArgs, argMask));
-                    _ASSERTE(!intersect(iptrArgs, argMask));
-
-                    /* We should never push within the current highest offset */
-
-                    // _ASSERTE(argHigh < argMask);
-
-                    /* This is now the highest bit we've set */
-
-                    argHigh = argMask;
-
-                    /* Set the appropriate bit in the argument mask */
-
-                    ptrArgs |= argMask;
-
-                    if (iptr)
-                        iptrArgs |= argMask;
-                }
-
-                iptr = isThis = false;
-            }
-            continue;
-        }
-        else if (argOfs == 6)
-        {
-            if (val & 0x40) {
-                /* Bigger delta  000=8,001=16,010=24,...,111=64 */
-                ptrOffs += (((val & 0x07) + 1) << 3);
-            }
-            else {
-                /* non-ptr arg push */
-                _ASSERTE(!hasPartialArgInfo);
-                ptrOffs += (val & 0x07);
-                if (ptrOffs > curOffsArgs) {
-                    iptr = isThis = false;
-                    goto REPORT_REFS;
-                }
-                argHigh = CONSTRUCT_ptrArgTP(1, argCnt);
-                argCnt++;
-                _ASSERTE(argCnt < MAX_PTRARG_OFS);
-            }
-            continue;
-        }
-
-        /* argOfs was 7 [111] which is reserved for the larger encodings */
-
-        _ASSERTE(argOfs==7);
-
-        switch (val)
-        {
-        case 0xFF:
-            iptr = isThis = false;
-            goto REPORT_REFS;   // the method might loop !!!
-
-        case 0xB8:
-            val = fastDecodeUnsigned(table);
-            ptrOffs += val;
-            continue;
-
-        case 0xBC:
-            isThis = true;
-            break;
-
-        case 0xBF:
-            iptr = true;
-            break;
-
-        case 0xF8:
-        case 0xFC:
-            isPop  = val & 0x04;
-            argOfs = fastDecodeUnsigned(table);
-            goto ARG;
-
-        case 0xFD: {
-            argOfs  = fastDecodeUnsigned(table);
-            _ASSERTE(argOfs && argOfs <= argCnt);
-
-            // Kill the top "argOfs" pointers.
-
-            ptrArgTP argMask;
-            for(argMask = CONSTRUCT_ptrArgTP(1, argCnt); (argOfs != 0); argMask >>= 1)
-            {
-                _ASSERTE(!isZero(argMask) && !isZero(ptrArgs)); // there should be remaining pointers
-
-                if (intersect(ptrArgs, argMask))
-                {
-                    setDiff(ptrArgs, argMask);
-                    setDiff(iptrArgs, argMask);
-                    argOfs--;
-                }
-            }
-
-            // For partial arg info, need to find the next highest pointer for argHigh
-
-            if (hasPartialArgInfo)
-            {
-                for(argHigh = ptrArgTP(0); !isZero(argMask); argMask >>= 1)
-                {
-                    if (intersect(ptrArgs, argMask)) {
-                        argHigh = argMask;
-                        break;
-                    }
-                }
-            }
-            } break;
-
-        case 0xF9:
-            argOfs = fastDecodeUnsigned(table);
-            argCnt  += argOfs;
-            break;
-
-        default:
-            _ASSERTE(!"Unexpected special code %04X");
-        }
-    }
-
-    /* Report all live pointer registers */
-REPORT_REFS:
-
-    _ASSERTE((iptrRegs & ptrRegs) == iptrRegs); // iptrRegs is a subset of ptrRegs
-    _ASSERTE((iptrArgs & ptrArgs) == iptrArgs); // iptrArgs is a subset of ptrArgs
-
-    /* Save the current live register, argument set, and argCnt */
-
-    info->regMaskResult  = convertAllRegsMask(ptrRegs);
-    info->argMaskResult  = ptrArgs;
-    info->argHnumResult  = 0;
-    info->iregMaskResult = convertAllRegsMask(iptrRegs);
-    info->iargMaskResult = iptrArgs;
-
-    info->thisPtrResult  = thisPtrReg;
-    _ASSERTE(thisPtrReg == REGI_NA || (regNumToMask(thisPtrReg) & info->regMaskResult));
-
-    if (hasPartialArgInfo)
-    {
-        return 0;
-    }
-    else
-    {
-        _ASSERTE(int(argCnt) < INT_MAX); // check that it did not underflow
-        return (argCnt * sizeof(unsigned));
-    }
-}
-
-/*****************************************************************************/
-
-unsigned GetPushedArgSize(hdrInfo * info, PTR_CBYTE table, DWORD curOffs)
-{
-    SUPPORTS_DAC;
-
-    unsigned sz;
-
-    if  (info->interruptible)
-    {
-        sz = scanArgRegTableI(skipToArgReg(*info, table),
-                              curOffs,
-                              curOffs,
-                              info);
-    }
-    else
-    {
-        sz = scanArgRegTable(skipToArgReg(*info, table),
-                             curOffs,
-                             info);
-    }
-
-    return sz;
-}
-
-/*****************************************************************************/
-
-inline
-void    TRASH_CALLEE_UNSAVED_REGS(PREGDISPLAY pContext)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-#ifdef _DEBUG
-    /* This is not completely correct as we lose the current value, but
-       it should not really be useful to anyone. */
-    static DWORD s_badData = 0xDEADBEEF;
-    pContext->SetEaxLocation(&s_badData);
-    pContext->SetEcxLocation(&s_badData);
-    pContext->SetEdxLocation(&s_badData);
-#endif //_DEBUG
-}
-
-/*****************************************************************************
- *  Sizes of certain i386 instructions which are used in the prolog/epilog
- */
-
-// Can we use sign-extended byte to encode the imm value, or do we need a dword
-#define CAN_COMPRESS(val)       ((INT8)(val) == (INT32)(val))
-
-#define SZ_ADD_REG(val)         ( 2 +  (CAN_COMPRESS(val) ? 1 : 4))
-#define SZ_AND_REG(val)         SZ_ADD_REG(val)
-#define SZ_POP_REG              1
-#define SZ_LEA(offset)          SZ_ADD_REG(offset)
-#define SZ_MOV_REG_REG          2
-
-bool IsMarkerInstr(BYTE val)
-{
-    SUPPORTS_DAC;
-
-#ifdef _DEBUG
-    if (val == X86_INSTR_INT3)
-    {
-        return true;
-    }
-#ifdef HAVE_GCCOVER
-    else // GcCover might have stomped on the instruction
-    {
-        if (GCStress<cfg_any>::IsEnabled())
-        {
-            if (IsGcCoverageInterruptInstructionVal(val))
-            {
-                return true;
-            }
-        }
-    }
-#endif // HAVE_GCCOVER
-#endif // _DEBUG
-
-    return false;
-}
-
-/* Check if the given instruction opcode is the one we expect.
-   This is a "necessary" but not "sufficient" check as it ignores the check
-   if the instruction is one of our special markers (for debugging and GcStress) */
-
-bool CheckInstrByte(BYTE val, BYTE expectedValue)
-{
-    SUPPORTS_DAC;
-    return ((val == expectedValue) || IsMarkerInstr(val));
-}
-
-/* Similar to CheckInstrByte(). Use this to check a masked opcode (ignoring
-   optional bits in the opcode encoding).
-   valPattern is the masked out value.
-   expectedPattern is the mask value we expect.
-   val is the actual instruction opcode
- */
-bool CheckInstrBytePattern(BYTE valPattern, BYTE expectedPattern, BYTE val)
-{
-    SUPPORTS_DAC;
-
-    _ASSERTE((valPattern & val) == valPattern);
-
-    return ((valPattern == expectedPattern) || IsMarkerInstr(val));
-}
-
-/* Similar to CheckInstrByte() */
-
-bool CheckInstrWord(WORD val, WORD expectedValue)
-{
-    LIMITED_METHOD_CONTRACT;
-    SUPPORTS_DAC;
-
-    return ((val == expectedValue) || IsMarkerInstr(val & 0xFF));
-}
-
-// Use this to check if the instruction at offset "walkOffset" has already
-// been executed
-// "actualHaltOffset" is the offset when the code was suspended
-// It is assumed that there is linear control flow from offset 0 to "actualHaltOffset".
-//
-// This has been factored out just so that the intent of the comparison
-// is clear (compared to the opposite intent)
-
-bool InstructionAlreadyExecuted(unsigned walkOffset, unsigned actualHaltOffset)
-{
-    SUPPORTS_DAC;
-    return (walkOffset < actualHaltOffset);
-}
-
-// skips past a "arith REG, IMM"
-inline unsigned SKIP_ARITH_REG(int val, PTR_CBYTE base, unsigned offset)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-    unsigned delta = 0;
-    if (val != 0)
-    {
-#ifdef _DEBUG
-        // Confirm that arith instruction is at the correct place
-        _ASSERTE(CheckInstrBytePattern(base[offset  ] & 0xFD, 0x81, base[offset]) &&
-                 CheckInstrBytePattern(base[offset+1] & 0xC0, 0xC0, base[offset+1]));
-        // only use DWORD form if needed
-        _ASSERTE(((base[offset] & 2) != 0) == CAN_COMPRESS(val) ||
-                 IsMarkerInstr(base[offset]));
-#endif
-        delta = 2 + (CAN_COMPRESS(val) ? 1 : 4);
-    }
-    return(offset + delta);
-}
-
-inline unsigned SKIP_PUSH_REG(PTR_CBYTE base, unsigned offset)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-    // Confirm it is a push instruction
-    _ASSERTE(CheckInstrBytePattern(base[offset] & 0xF8, 0x50, base[offset]));
-    return(offset + 1);
-}
-
-inline unsigned SKIP_POP_REG(PTR_CBYTE base, unsigned offset)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-    // Confirm it is a pop instruction
-    _ASSERTE(CheckInstrBytePattern(base[offset] & 0xF8, 0x58, base[offset]));
-    return(offset + 1);
-}
-
-inline unsigned SKIP_MOV_REG_REG(PTR_CBYTE base, unsigned offset)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-    // Confirm it is a move instruction
-    // Note that only the first byte may have been stomped on by IsMarkerInstr()
-    // So we can check the second byte directly
-    _ASSERTE(CheckInstrBytePattern(base[offset] & 0xFD, 0x89, base[offset]) &&
-             (base[offset+1] & 0xC0) == 0xC0);
-    return(offset + 2);
-}
-
-inline unsigned SKIP_LEA_ESP_EBP(int val, PTR_CBYTE base, unsigned offset)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-#ifdef _DEBUG
-    // Confirm it is the right instruction
-    // Note that only the first byte may have been stomped on by IsMarkerInstr()
-    // So we can check the second byte directly
-    WORD wOpcode = *(PTR_WORD)base;
-    _ASSERTE((CheckInstrWord(wOpcode, X86_INSTR_w_LEA_ESP_EBP_BYTE_OFFSET) &&
-              (val == *(PTR_SBYTE)(base+2)) &&
-              CAN_COMPRESS(val)) ||
-             (CheckInstrWord(wOpcode, X86_INSTR_w_LEA_ESP_EBP_DWORD_OFFSET) &&
-              (val == *(PTR_INT32)(base+2)) &&
-              !CAN_COMPRESS(val)));
-#endif
-
-    unsigned delta = 2 + (CAN_COMPRESS(val) ? 1 : 4);
-    return(offset + delta);
-}
-
-inline unsigned SKIP_LEA_EAX_ESP(int val, PTR_CBYTE base, unsigned offset)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-#ifdef _DEBUG
-    WORD wOpcode = *(PTR_WORD)(base + offset);
-    if (CheckInstrWord(wOpcode, X86_INSTR_w_LEA_EAX_ESP_BYTE_OFFSET))
-    {
-        _ASSERTE(val == *(PTR_SBYTE)(base + offset + 3));
-        _ASSERTE(CAN_COMPRESS(val));
-    }
-    else
-    {
-        _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_w_LEA_EAX_ESP_DWORD_OFFSET));
-        _ASSERTE(val == *(PTR_INT32)(base + offset + 3));
-        _ASSERTE(!CAN_COMPRESS(val));
-    }
-#endif
-
-    unsigned delta = 3 + (CAN_COMPRESS(-val) ? 1 : 4);
-    return(offset + delta);
-}
-
-inline unsigned SKIP_HELPER_CALL(PTR_CBYTE base, unsigned offset)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-    unsigned delta;
-
-    if (CheckInstrByte(base[offset], X86_INSTR_CALL_REL32))
-    {
-        delta = 5;
-    }
-    else
-    {
-#ifdef _DEBUG
-        WORD wOpcode = *(PTR_WORD)(base+offset);
-        _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_W_CALL_IND_IMM));
-#endif
-        delta = 6;
-    }
-
-    return(offset+delta);
-}
-
-unsigned SKIP_ALLOC_FRAME(int size, PTR_CBYTE base, unsigned offset)
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-        SUPPORTS_DAC;
-    } CONTRACTL_END;
-
-    _ASSERTE(size != 0);
-
-    if (size == sizeof(void*))
-    {
-        // JIT emits "push eax" instead of "sub esp,4"
-        return SKIP_PUSH_REG(base, offset);
-    }
-
-    const int STACK_PROBE_PAGE_SIZE_BYTES = 4096;
-    const int STACK_PROBE_BOUNDARY_THRESHOLD_BYTES = 1024;
-
-    int lastProbedLocToFinalSp = size;
-
-    if (size < STACK_PROBE_PAGE_SIZE_BYTES)
-    {
-        // sub esp, size
-        offset = SKIP_ARITH_REG(size, base, offset);
-    }
-    else
-    {
-        WORD wOpcode = *(PTR_WORD)(base + offset);
-
-        if (CheckInstrWord(wOpcode, X86_INSTR_w_TEST_ESP_DWORD_OFFSET_EAX))
-        {
-            // In .NET 5.0 and earlier for frames that have size smaller than 0x3000 bytes
-            // JIT emits one or two 'test eax, [esp-dwOffset]' instructions before adjusting the stack pointer.
-            _ASSERTE(size < 0x3000);
-
-            // test eax, [esp-0x1000]
-            offset += 7;
-            lastProbedLocToFinalSp -= 0x1000;
-
-            if (size >= 0x2000)
-            {
-#ifdef _DEBUG
-                wOpcode = *(PTR_WORD)(base + offset);
-                _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_w_TEST_ESP_DWORD_OFFSET_EAX));
-#endif
-                //test eax, [esp-0x2000]
-                offset += 7;
-                lastProbedLocToFinalSp -= 0x1000;
-            }
-
-            // sub esp, size
-            offset = SKIP_ARITH_REG(size, base, offset);
-        }
-        else
-        {
-            bool pushedStubParam = false;
-
-            if (CheckInstrByte(base[offset], X86_INSTR_PUSH_EAX))
-            {
-                // push eax
-                offset = SKIP_PUSH_REG(base, offset);
-                pushedStubParam = true;
-            }
-
-            if (CheckInstrByte(base[offset], X86_INSTR_XOR))
-            {
-                // In .NET Core 3.1 and earlier for frames that have size greater than or equal to 0x3000 bytes
-                // JIT emits the following loop.
-                _ASSERTE(size >= 0x3000);
-
-                offset += 2;
-                //      xor eax, eax                2
-                //      [nop]                       0-3
-                // loop:
-                //      test [esp + eax], eax       3
-                //      sub eax, 0x1000             5
-                //      cmp eax, -size              5
-                //      jge loop                    2
-
-                // R2R images that support ReJIT may have extra nops we need to skip over.
-                while (offset < 5)
-                {
-                    if (CheckInstrByte(base[offset], X86_INSTR_NOP))
-                    {
-                        offset++;
-                    }
-                    else
-                    {
-                        break;
-                    }
-                }
-
-                offset += 15;
-
-                if (pushedStubParam)
-                {
-                    // pop eax
-                    offset = SKIP_POP_REG(base, offset);
-                }
-
-                // sub esp, size
-                return SKIP_ARITH_REG(size, base, offset);
-            }
-            else
-            {
-                // In .NET 5.0 and later JIT emits a call to JIT_StackProbe helper.
-
-                if (pushedStubParam)
-                {
-                    // lea eax, [esp-size+4]
-                    offset = SKIP_LEA_EAX_ESP(-size + 4, base, offset);
-                    // call JIT_StackProbe
-                    offset = SKIP_HELPER_CALL(base, offset);
-                    // pop eax
-                    offset = SKIP_POP_REG(base, offset);
-                    // sub esp, size
-                    return SKIP_ARITH_REG(size, base, offset);
-                }
-                else
-                {
-                    // lea eax, [esp-size]
-                    offset = SKIP_LEA_EAX_ESP(-size, base, offset);
-                    // call JIT_StackProbe
-                    offset = SKIP_HELPER_CALL(base, offset);
-                    // mov esp, eax
-                    return SKIP_MOV_REG_REG(base, offset);
-                }
-            }
-        }
-    }
-
-    if (lastProbedLocToFinalSp + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > STACK_PROBE_PAGE_SIZE_BYTES)
-    {
-#ifdef _DEBUG
-        WORD wOpcode = *(PTR_WORD)(base + offset);
-        _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_w_TEST_ESP_EAX));
-#endif
-        // test [esp], eax
-        offset += 3;
-    }
-
-    return offset;
-}
-
-#endif // !USE_GC_INFO_DECODER
-
-
-#if defined(FEATURE_EH_FUNCLETS)
-
-void EECodeManager::EnsureCallerContextIsValid( PREGDISPLAY  pRD, StackwalkCacheEntry* pCacheEntry, EECodeInfo * pCodeInfo /*= NULL*/ )
-{
-    CONTRACTL
-    {
-        NOTHROW;
-        GC_NOTRIGGER;
-        SUPPORTS_DAC;
-    }
-    CONTRACTL_END;
-
-    if( !pRD->IsCallerContextValid )
-    {
-#if !defined(DACCESS_COMPILE) && defined(HAS_QUICKUNWIND)
-        if (pCacheEntry != NULL)
-        {
-            // lightened schema: take stack unwind info from stackwalk cache
-            QuickUnwindStackFrame(pRD, pCacheEntry, EnsureCallerStackFrameIsValid);
-        }
-        else
-#endif // !DACCESS_COMPILE
-        {
-            // We need to make a copy here (instead of switching the pointers), in order to preserve the current context
-            *(pRD->pCallerContext) = *(pRD->pCurrentContext);
-            *(pRD->pCallerContextPointers) = *(pRD->pCurrentContextPointers);
-
-            Thread::VirtualUnwindCallFrame(pRD->pCallerContext, pRD->pCallerContextPointers, pCodeInfo);
-        }
-
-        pRD->IsCallerContextValid = TRUE;
-    }
-
-    _ASSERTE( pRD->IsCallerContextValid );
-}
-
-size_t EECodeManager::GetCallerSp( PREGDISPLAY  pRD )
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-        SUPPORTS_DAC;
-    } CONTRACTL_END;
-
-    // Don't add usage of this field.  This is only temporary.
-    // See ExceptionTracker::InitializeCrawlFrame() for more information.
-    if (!pRD->IsCallerSPValid)
-    {
-        EnsureCallerContextIsValid(pRD, NULL);
-    }
-
-    return GetSP(pRD->pCallerContext);
-}
-
-#endif // FEATURE_EH_FUNCLETS
-
-#ifdef HAS_QUICKUNWIND
-/*
-  *  Light unwind the current stack frame, using provided cache entry.
-  *  pPC, Esp and pEbp of pContext are updated.
-  */
-
-// static
-void EECodeManager::QuickUnwindStackFrame(PREGDISPLAY pRD, StackwalkCacheEntry *pCacheEntry, QuickUnwindFlag flag)
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-    } CONTRACTL_END;
-
-    _ASSERTE(pCacheEntry);
-    _ASSERTE(GetControlPC(pRD) == (PCODE)(pCacheEntry->IP));
-
-#if defined(TARGET_X86)
-    _ASSERTE(flag == UnwindCurrentStackFrame);
-
-    _ASSERTE(!pCacheEntry->fUseEbp || pCacheEntry->fUseEbpAsFrameReg);
-
-    if (pCacheEntry->fUseEbpAsFrameReg)
-    {
-        _ASSERTE(pCacheEntry->fUseEbp);
-        TADDR curEBP = GetRegdisplayFP(pRD);
-
-        // EBP frame, update ESP through EBP, since ESPOffset may vary
-        pRD->SetEbpLocation(PTR_DWORD(curEBP));
-        pRD->SP = curEBP + sizeof(void*);
-    }
-    else
-    {
-        _ASSERTE(!pCacheEntry->fUseEbp);
-        // ESP frame, update up to retAddr using ESPOffset
-        pRD->SP += pCacheEntry->ESPOffset;
-    }
-    pRD->PCTAddr  = (TADDR)pRD->SP;
-    pRD->ControlPC = *PTR_PCODE(pRD->PCTAddr);
-    pRD->SP     += sizeof(void*) + pCacheEntry->argSize;
-
-#elif defined(TARGET_AMD64)
-    if (pRD->IsCallerContextValid)
-    {
-        pRD->pCurrentContext->Rbp = pRD->pCallerContext->Rbp;
-        pRD->pCurrentContext->Rsp = pRD->pCallerContext->Rsp;
-        pRD->pCurrentContext->Rip = pRD->pCallerContext->Rip;
-    }
-    else
-    {
-        PCONTEXT pSourceCtx = NULL;
-        PCONTEXT pTargetCtx = NULL;
-        if (flag == UnwindCurrentStackFrame)
-        {
-            pTargetCtx = pRD->pCurrentContext;
-            pSourceCtx = pRD->pCurrentContext;
-        }
-        else
-        {
-            pTargetCtx = pRD->pCallerContext;
-            pSourceCtx = pRD->pCurrentContext;
-        }
-
-        // Unwind RBP.  The offset is relative to the current sp.
-        if (pCacheEntry->RBPOffset == 0)
-        {
-            pTargetCtx->Rbp = pSourceCtx->Rbp;
-        }
-        else
-        {
-            pTargetCtx->Rbp = *(UINT_PTR*)(pSourceCtx->Rsp + pCacheEntry->RBPOffset);
-        }
-
-        // Adjust the sp.  From this pointer onwards pCurrentContext->Rsp is the caller sp.
-        pTargetCtx->Rsp = pSourceCtx->Rsp + pCacheEntry->RSPOffset;
-
-        // Retrieve the return address.
-        pTargetCtx->Rip = *(UINT_PTR*)((pTargetCtx->Rsp) - sizeof(UINT_PTR));
-    }
-
-    if (flag == UnwindCurrentStackFrame)
-    {
-        SyncRegDisplayToCurrentContext(pRD);
-        pRD->IsCallerContextValid = FALSE;
-        pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
-    }
-
-#else  // !TARGET_X86 && !TARGET_AMD64
-    PORTABILITY_ASSERT("EECodeManager::QuickUnwindStackFrame is not implemented on this platform.");
-#endif // !TARGET_X86 && !TARGET_AMD64
-}
-#endif // HAS_QUICKUNWIND
-
-/*****************************************************************************/
-#ifdef TARGET_X86 // UnwindStackFrame
-/*****************************************************************************/
-
-const RegMask CALLEE_SAVED_REGISTERS_MASK[] =
-{
-    RM_EDI, // first register to be pushed
-    RM_ESI,
-    RM_EBX,
-    RM_EBP  // last register to be pushed
-};
-
-static void SetLocation(PREGDISPLAY pRD, int ind, PDWORD loc)
-{
-#ifdef FEATURE_EH_FUNCLETS
-    static const SIZE_T OFFSET_OF_CALLEE_SAVED_REGISTERS[] =
-    {
-        offsetof(T_KNONVOLATILE_CONTEXT_POINTERS, Edi), // first register to be pushed
-        offsetof(T_KNONVOLATILE_CONTEXT_POINTERS, Esi),
-        offsetof(T_KNONVOLATILE_CONTEXT_POINTERS, Ebx),
-        offsetof(T_KNONVOLATILE_CONTEXT_POINTERS, Ebp), // last register to be pushed
-    };
-
-    SIZE_T offsetOfRegPtr = OFFSET_OF_CALLEE_SAVED_REGISTERS[ind];
-    *(LPVOID*)(PBYTE(pRD->pCurrentContextPointers) + offsetOfRegPtr) = loc;
-#else
-    static const SIZE_T OFFSET_OF_CALLEE_SAVED_REGISTERS[] =
-    {
-        offsetof(REGDISPLAY, pEdi), // first register to be pushed
-        offsetof(REGDISPLAY, pEsi),
-        offsetof(REGDISPLAY, pEbx),
-        offsetof(REGDISPLAY, pEbp), // last register to be pushed
-    };
-
-    SIZE_T offsetOfRegPtr = OFFSET_OF_CALLEE_SAVED_REGISTERS[ind];
-    *(LPVOID*)(PBYTE(pRD) + offsetOfRegPtr) = loc;
-#endif
-}
-
-/*****************************************************************************/
-
-void UnwindEspFrameEpilog(
-        PREGDISPLAY pContext,
-        hdrInfo * info,
-        PTR_CBYTE epilogBase,
-        unsigned flags)
-{
-    LIMITED_METHOD_CONTRACT;
-    SUPPORTS_DAC;
-
-    _ASSERTE(info->epilogOffs != hdrInfo::NOT_IN_EPILOG);
-    _ASSERTE(!info->ebpFrame && !info->doubleAlign);
-    _ASSERTE(info->epilogOffs > 0);
-
-    int offset = 0;
-    unsigned ESP = pContext->SP;
-
-    if (info->rawStkSize)
-    {
-        if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
-        {
-            /* We have NOT executed the "ADD ESP, FrameSize",
-               so manually adjust stack pointer */
-            ESP += info->rawStkSize;
-        }
-
-        // We have already popped off the frame (excluding the callee-saved registers)
-
-        if (epilogBase[0] == X86_INSTR_POP_ECX)
-        {
-            // We may use "POP ecx" for doing "ADD ESP, 4",
-            // or we may not (in the case of JMP epilogs)
-            _ASSERTE(info->rawStkSize == sizeof(void*));
-            offset = SKIP_POP_REG(epilogBase, offset);
-        }
-        else
-        {
-            // "add esp, rawStkSize"
-            offset = SKIP_ARITH_REG(info->rawStkSize, epilogBase, offset);
-        }
-    }
-
-    /* Remaining callee-saved regs are at ESP. Need to update
-       regsMask as well to exclude registers which have already been popped. */
-
-    const RegMask regsMask = info->savedRegMask;
-
-    /* Increment "offset" in steps to see which callee-saved
-       registers have already been popped */
-
-    for (unsigned i = ARRAY_SIZE(CALLEE_SAVED_REGISTERS_MASK); i > 0; i--)
-    {
-        RegMask regMask = CALLEE_SAVED_REGISTERS_MASK[i - 1];
-
-        if (!(regMask & regsMask))
-            continue;
-
-        if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
-        {
-            /* We have NOT yet popped off the register.
-               Get the value from the stack if needed */
-            if ((flags & UpdateAllRegs) || (regMask == RM_EBP))
-            {
-                SetLocation(pContext, i - 1, PTR_DWORD((TADDR)ESP));
-            }
-
-            /* Adjust ESP */
-            ESP += sizeof(void*);
-        }
-
-        offset = SKIP_POP_REG(epilogBase, offset);
-    }
-
-    //CEE_JMP generates an epilog similar to a normal CEE_RET epilog except for the last instruction
-    _ASSERTE(CheckInstrBytePattern(epilogBase[offset] & X86_INSTR_RET, X86_INSTR_RET, epilogBase[offset]) //ret
-        || CheckInstrBytePattern(epilogBase[offset], X86_INSTR_JMP_NEAR_REL32, epilogBase[offset]) //jmp ret32
-        || CheckInstrWord(*PTR_WORD(epilogBase + offset), X86_INSTR_w_JMP_FAR_IND_IMM)); //jmp [addr32]
-
-    /* Finally we can set pPC */
-    pContext->PCTAddr = (TADDR)ESP;
-    pContext->ControlPC = *PTR_PCODE(pContext->PCTAddr);
-
-    pContext->SP = ESP;
-}
-
-/*****************************************************************************/
-
-void UnwindEbpDoubleAlignFrameEpilog(
-        PREGDISPLAY pContext,
-        hdrInfo * info,
-        PTR_CBYTE epilogBase,
-        unsigned flags)
-{
-    LIMITED_METHOD_CONTRACT;
-    SUPPORTS_DAC;
-
-    _ASSERTE(info->epilogOffs != hdrInfo::NOT_IN_EPILOG);
-    _ASSERTE(info->ebpFrame || info->doubleAlign);
-
-    _ASSERTE(info->argSize < 0x10000); // "ret" only has a 2 byte operand
-
-   /* See how many instructions we have executed in the
-      epilog to determine which callee-saved registers
-      have already been popped */
-    int offset = 0;
-
-    unsigned ESP = pContext->SP;
-
-    bool needMovEspEbp = false;
-
-    if (info->doubleAlign)
-    {
-        // add esp, rawStkSize
-
-        if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
-            ESP += info->rawStkSize;
-        _ASSERTE(info->rawStkSize != 0);
-        offset = SKIP_ARITH_REG(info->rawStkSize, epilogBase, offset);
-
-        // We also need "mov esp, ebp" after popping the callee-saved registers
-        needMovEspEbp = true;
-    }
-    else
-    {
-        bool needLea = false;
-
-        if (info->localloc)
-        {
-            // ESP may be variable if a localloc was actually executed. We will reset it.
-            //    lea esp, [ebp-calleeSavedRegs]
-
-            needLea = true;
-        }
-        else if (info->savedRegsCountExclFP == 0)
-        {
-            // We will just generate "mov esp, ebp" and be done with it.
-
-            if (info->rawStkSize != 0)
-            {
-                needMovEspEbp = true;
-            }
-        }
-        else if  (info->rawStkSize == 0)
-        {
-            // do nothing before popping the callee-saved registers
-        }
-        else if (info->rawStkSize == sizeof(void*))
-        {
-            // "pop ecx" will make ESP point to the callee-saved registers
-            if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
-                ESP += sizeof(void*);
-            offset = SKIP_POP_REG(epilogBase, offset);
-        }
-        else
-        {
-            // We need to make ESP point to the callee-saved registers
-            //    lea esp, [ebp-calleeSavedRegs]
-
-            needLea = true;
-        }
-
-        if (needLea)
-        {
-            // lea esp, [ebp-calleeSavedRegs]
-
-            unsigned calleeSavedRegsSize = info->savedRegsCountExclFP * sizeof(void*);
-
-            if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
-                ESP = GetRegdisplayFP(pContext) - calleeSavedRegsSize;
-
-            offset = SKIP_LEA_ESP_EBP(-int(calleeSavedRegsSize), epilogBase, offset);
-        }
-    }
-
-    for (unsigned i = STRING_LENGTH(CALLEE_SAVED_REGISTERS_MASK); i > 0; i--)
-    {
-        RegMask regMask = CALLEE_SAVED_REGISTERS_MASK[i - 1];
-        _ASSERTE(regMask != RM_EBP);
-
-        if ((info->savedRegMask & regMask) == 0)
-            continue;
-
-        if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
-        {
-            if (flags & UpdateAllRegs)
-            {
-                SetLocation(pContext, i - 1, PTR_DWORD((TADDR)ESP));
-            }
-            ESP += sizeof(void*);
-        }
+    // Strategy for zeroing out the frame on x64:
+    //
+    // The stack frame looks like this (stack grows up)
+    //
+    // =======================================
+    //             <--- RSP == RBP (invariant: localalloc disallowed before remap)
+    // Arguments for next call (if there is one)
+    // PSPSym (optional)
+    // JIT temporaries (if any)
+    // Security object (if any)
+    // Local variables (if any)
+    // ---------------------------------------
+    // Frame header (stuff we must preserve, such as bool for synchronized
+    // methods, saved FP, saved callee-preserved registers, etc.)
+    // Return address (also included in frame header)
+    // ---------------------------------------
+    // Arguments for this frame (that's getting remapped).  Will naturally be preserved
+    // since fixed-frame size doesn't include this.
+    // =======================================
+    //
+    // Goal: Zero out everything AFTER (above) frame header.
+    //
+    // How do we find this stuff?
+    //
+    // EECodeInfo::GetFixedStackSize() gives us the full size from the top ("Arguments
+    // for next call") all the way down to and including Return Address.
+    //
+    // GetSizeOfEditAndContinuePreservedArea() gives us the size in bytes of the
+    // frame header at the bottom.
+    //
+    // So we start at RSP, and zero out:
+    //     GetFixedStackSize() - GetSizeOfEditAndContinuePreservedArea() bytes.
+    //
+    // We'll need to restore PSPSym; location gotten from GCInfo.
+    // We'll need to copy security object; location gotten from GCInfo.
+    //
+    // On ARM64 the JIT generates a slightly different frame and we do not have
+    // the invariant FP == SP, since the FP needs to point at the saved fp/lr
+    // pair for ETW stack walks. The frame there looks something like:
+    // =======================================
+    // Arguments for next call (if there is one)     <- SP
+    // JIT temporaries
+    // Locals
+    // PSPSym
+    // ---------------------------------------    ^ zeroed area
+    // MonitorAcquired (for synchronized methods)
+    // Saved FP                                      <- FP
+    // Saved LR
+    // ---------------------------------------    ^ preserved area
+    // Arguments
+    //
+    // The JIT reports the size of the "preserved" area, which includes
+    // MonitorAcquired when it is present. It could also include other local
+    // values that need to be preserved across EnC transitions, but no explicit
+    // treatment of these is necessary here beyond preserving the values in
+    // this region.
 
-        offset = SKIP_POP_REG(epilogBase, offset);
-    }
+    // GCInfo for old method
+    GcInfoDecoder oldGcDecoder(
+        pOldCodeInfo->GetGCInfoToken(),
+        GcInfoDecoderFlags(DECODE_SECURITY_OBJECT | DECODE_PSP_SYM | DECODE_EDIT_AND_CONTINUE),
+        0       // Instruction offset (not needed)
+        );
 
-    if (needMovEspEbp)
-    {
-        if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
-            ESP = GetRegdisplayFP(pContext);
+    // GCInfo for new method
+    GcInfoDecoder newGcDecoder(
+        pNewCodeInfo->GetGCInfoToken(),
+        GcInfoDecoderFlags(DECODE_SECURITY_OBJECT | DECODE_PSP_SYM | DECODE_EDIT_AND_CONTINUE),
+        0       // Instruction offset (not needed)
+        );
 
-        offset = SKIP_MOV_REG_REG(epilogBase, offset);
-    }
+    UINT32 oldSizeOfPreservedArea = oldGcDecoder.GetSizeOfEditAndContinuePreservedArea();
+    UINT32 newSizeOfPreservedArea = newGcDecoder.GetSizeOfEditAndContinuePreservedArea();
 
-    // Have we executed the pop EBP?
-    if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
+    LOG((LF_CORDB, LL_INFO100, "EECM::FixContextForEnC: Got old and new EnC preserved area sizes of %u and %u\n", oldSizeOfPreservedArea, newSizeOfPreservedArea));
+    // This ensures the JIT generated EnC compliant code.
+    if ((oldSizeOfPreservedArea == NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA) ||
+        (newSizeOfPreservedArea == NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA))
     {
-        pContext->SetEbpLocation(PTR_DWORD(TADDR(ESP)));
-        ESP += sizeof(void*);
+        _ASSERTE(!"FixContextForEnC called on a non-EnC-compliant method frame");
+        return CORDBG_E_ENC_INFOLESS_METHOD;
     }
-    offset = SKIP_POP_REG(epilogBase, offset);
 
-    pContext->PCTAddr = (TADDR)ESP;
-    pContext->ControlPC = *PTR_PCODE(pContext->PCTAddr);
-
-    pContext->SP = ESP;
-}
+    TADDR oldStackBase = GetSP(&oldCtx);
 
-inline SIZE_T GetStackParameterSize(hdrInfo * info)
-{
-    SUPPORTS_DAC;
-    return (info->varargs ? 0 : info->argSize); // Note varargs is caller-popped
-}
+    LOG((LF_CORDB, LL_INFO100, "EECM::FixContextForEnC: Old SP=%p, FP=%p\n", (void*)oldStackBase, (void*)GetFP(&oldCtx)));
 
-//****************************************************************************
-// This is the value ESP is incremented by on doing a "return"
+#if defined(TARGET_AMD64)
+    // Note: we cannot assert anything about the relationship between oldFixedStackSize
+    // and newFixedStackSize.  It's possible the edited frame grows (new locals) or
+    // shrinks (less temporaries).
+    DWORD oldFixedStackSize = pOldCodeInfo->GetFixedStackSize();
+    DWORD newFixedStackSize = pNewCodeInfo->GetFixedStackSize();
 
-inline SIZE_T ESPIncrOnReturn(hdrInfo * info)
-{
-    SUPPORTS_DAC;
-    return sizeof(void *) + // pop off the return address
-           GetStackParameterSize(info);
-}
+    // This verifies no localallocs were used in the old method.
+    // JIT is required to emit frame register for EnC-compliant code
+    _ASSERTE(pOldCodeInfo->HasFrameRegister());
+    _ASSERTE(pNewCodeInfo->HasFrameRegister());
 
-/*****************************************************************************/
+#elif defined(TARGET_ARM64)
+    DWORD oldFixedStackSize = oldGcDecoder.GetSizeOfEditAndContinueFixedStackFrame();
+    DWORD newFixedStackSize = newGcDecoder.GetSizeOfEditAndContinueFixedStackFrame();
+#else
+    PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform.");
+#endif
 
-void UnwindEpilog(
-        PREGDISPLAY pContext,
-        hdrInfo * info,
-        PTR_CBYTE epilogBase,
-        unsigned flags)
-{
-    LIMITED_METHOD_CONTRACT;
-    SUPPORTS_DAC;
-    _ASSERTE(info->epilogOffs != hdrInfo::NOT_IN_EPILOG);
-    // _ASSERTE(flags & ActiveStackFrame); // <TODO> Wont work for thread death</TODO>
-    _ASSERTE(info->epilogOffs > 0);
+    LOG((LF_CORDB, LL_INFO100, "EECM::FixContextForEnC: Old and new fixed stack sizes are %u and %u\n", oldFixedStackSize, newFixedStackSize));
 
-    if  (info->ebpFrame || info->doubleAlign)
-    {
-        UnwindEbpDoubleAlignFrameEpilog(pContext, info, epilogBase, flags);
-    }
-    else
+#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
+    // win-x64: SP == FP before localloc
+    if (oldStackBase != GetFP(&oldCtx))
     {
-        UnwindEspFrameEpilog(pContext, info, epilogBase, flags);
+        return E_FAIL;
     }
-
-#ifdef _DEBUG
-    if (flags & UpdateAllRegs)
-        TRASH_CALLEE_UNSAVED_REGS(pContext);
-#endif
-
-    /* Now adjust stack pointer */
-
-    pContext->SP += ESPIncrOnReturn(info);
-}
-
-/*****************************************************************************/
-
-void UnwindEspFrameProlog(
-        PREGDISPLAY pContext,
-        hdrInfo * info,
-        PTR_CBYTE methodStart,
-        unsigned flags)
-{
-    LIMITED_METHOD_CONTRACT;
-    SUPPORTS_DAC;
-
-    /* we are in the middle of the prolog */
-    _ASSERTE(info->prologOffs != hdrInfo::NOT_IN_PROLOG);
-    _ASSERTE(!info->ebpFrame && !info->doubleAlign);
-
-    unsigned offset = 0;
-
-#ifdef _DEBUG
-    // If the first two instructions are 'nop, int3', then  we will
-    // assume that is from a JitHalt operation and skip past it
-    if (methodStart[0] == X86_INSTR_NOP && methodStart[1] == X86_INSTR_INT3)
+#else
+    // All other 64-bit targets use frame chaining with the FP stored right below the
+    // return address (LR is always pushed on arm64). FP + 16 == SP + oldFixedStackSize
+    // gives the caller's SP before stack alloc.
+    if (GetFP(&oldCtx) + 16 != oldStackBase + oldFixedStackSize)
     {
-        offset += 2;
+        return E_FAIL;
     }
 #endif
 
-    const DWORD curOffs = info->prologOffs;
-    unsigned ESP = pContext->SP;
-
-    // Find out how many callee-saved regs have already been pushed
-
-    unsigned regsMask = RM_NONE;
-    PTR_DWORD savedRegPtr = PTR_DWORD((TADDR)ESP);
-
-    for (unsigned i = 0; i < ARRAY_SIZE(CALLEE_SAVED_REGISTERS_MASK); i++)
-    {
-        RegMask regMask = CALLEE_SAVED_REGISTERS_MASK[i];
-
-        if (!(info->savedRegMask & regMask))
-            continue;
-
-        if (InstructionAlreadyExecuted(offset, curOffs))
-        {
-            ESP += sizeof(void*);
-            regsMask    |= regMask;
-        }
-
-        offset = SKIP_PUSH_REG(methodStart, offset);
-    }
+    // EnC remap inside handlers is not supported
+    if (pOldCodeInfo->IsFunclet() || pNewCodeInfo->IsFunclet())
+        return CORDBG_E_ENC_IN_FUNCLET;
 
-    if (info->rawStkSize)
+    if (oldSizeOfPreservedArea != newSizeOfPreservedArea)
     {
-        offset = SKIP_ALLOC_FRAME(info->rawStkSize, methodStart, offset);
-
-        // Note that this assumes that only the last instruction in SKIP_ALLOC_FRAME
-        // actually updates ESP
-        if (InstructionAlreadyExecuted(offset, curOffs + 1))
-        {
-            savedRegPtr += (info->rawStkSize / sizeof(DWORD));
-            ESP += info->rawStkSize;
-        }
+        _ASSERTE(!"FixContextForEnC called with method whose frame header size changed from old to new version.");
+        return E_FAIL;
     }
 
-    //
-    // Stack probe checks here
-    //
-
-    // Poison the value, we don't set it properly at the end of the prolog
-    INDEBUG(offset = 0xCCCCCCCC);
-
-
-    // Always restore EBP
-    if (regsMask & RM_EBP)
-        pContext->SetEbpLocation(savedRegPtr++);
+    TADDR callerSP = oldStackBase + oldFixedStackSize;
 
-    if (flags & UpdateAllRegs)
+#ifdef _DEBUG
+    // If the old method has a PSPSym, then its value should == initial-SP (i.e.
+    // oldStackBase) for x64 and callerSP for arm64
+    INT32 nOldPspSymStackSlot = oldGcDecoder.GetPSPSymStackSlot();
+    if (nOldPspSymStackSlot != NO_PSP_SYM)
     {
-        if (regsMask & RM_EBX)
-            pContext->SetEbxLocation(savedRegPtr++);
-        if (regsMask & RM_ESI)
-            pContext->SetEsiLocation(savedRegPtr++);
-        if (regsMask & RM_EDI)
-            pContext->SetEdiLocation(savedRegPtr++);
-
-        TRASH_CALLEE_UNSAVED_REGS(pContext);
+#if defined(TARGET_AMD64)
+        TADDR oldPSP = *PTR_TADDR(oldStackBase + nOldPspSymStackSlot);
+        _ASSERTE(oldPSP == oldStackBase);
+#else
+        TADDR oldPSP = *PTR_TADDR(callerSP + nOldPspSymStackSlot);
+        _ASSERTE(oldPSP == callerSP);
+#endif
     }
+#endif // _DEBUG
 
-#if 0
-// NOTE:
-// THIS IS ONLY TRUE IF PROLOGSIZE DOES NOT INCLUDE REG-VAR INITIALIZATION !!!!
-//
-    /* there is (potentially) only one additional
-       instruction in the prolog, (push ebp)
-       but if we would have been passed that instruction,
-       info->prologOffs would be hdrInfo::NOT_IN_PROLOG!
-    */
-    _ASSERTE(offset == info->prologOffs);
+#else
+    PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform.");
 #endif
 
-    pContext->SP = ESP;
-}
+    // 2) Get all the info about current variables, registers, etc
 
-/*****************************************************************************/
+    const ICorDebugInfo::NativeVarInfo *  pOldVar;
 
-void UnwindEspFrame(
-        PREGDISPLAY pContext,
-        hdrInfo * info,
-        PTR_CBYTE table,
-        PTR_CBYTE methodStart,
-        DWORD curOffs,
-        unsigned flags)
-{
-    LIMITED_METHOD_CONTRACT;
-    SUPPORTS_DAC;
+    // sorted by varNumber
+    ICorDebugInfo::NativeVarInfo * oldMethodVarsSorted = NULL;
+    ICorDebugInfo::NativeVarInfo * oldMethodVarsSortedBase = NULL;
+    ICorDebugInfo::NativeVarInfo *newMethodVarsSorted = NULL;
+    ICorDebugInfo::NativeVarInfo *newMethodVarsSortedBase = NULL;
 
-    _ASSERTE(!info->ebpFrame && !info->doubleAlign);
-    _ASSERTE(info->epilogOffs == hdrInfo::NOT_IN_EPILOG);
+    SIZE_T *rgVal1 = NULL;
+    SIZE_T *rgVal2 = NULL;
 
-    unsigned ESP = pContext->SP;
+    {
+        SIZE_T local;
 
+        // We'll need to sort the old native var info by variable number, since the
+        // order of them isn't necc. the same.  We'll use the number as the key.
+        // We will assume we may have hidden arguments (which have negative values as the index)
 
-    if (info->prologOffs != hdrInfo::NOT_IN_PROLOG)
-    {
-        if (info->prologOffs != 0) // Do nothing for the very start of the method
+        unsigned oldNumVars = unsigned(-ICorDebugInfo::UNKNOWN_ILNUM);
+        for (pOldVar = oldMethodVars, local = 0;
+             local < oldMethodVarsCount;
+             local++, pOldVar++)
         {
-            UnwindEspFrameProlog(pContext, info, methodStart, flags);
-            ESP = pContext->SP;
+            DWORD varNumber = pOldVar->varNumber;
+            if (signed(varNumber) >= 0)
+            {
+                // This is an explicit (not special) var, so add its varNumber + 1 to our
+                // max count ("+1" because varNumber is zero-based).
+                oldNumVars = max(oldNumVars, (unsigned)(unsigned(-ICorDebugInfo::UNKNOWN_ILNUM) + varNumber + 1));
+            }
         }
-    }
-    else
-    {
-        /* we are past the prolog, ESP has been set above */
-
-        // Are there any arguments pushed on the stack?
 
-        ESP += GetPushedArgSize(info, table, curOffs);
-
-        ESP += info->rawStkSize;
-
-        const RegMask regsMask = info->savedRegMask;
-
-        for (unsigned i = ARRAY_SIZE(CALLEE_SAVED_REGISTERS_MASK); i > 0; i--)
+        oldMethodVarsSortedBase = new (nothrow) ICorDebugInfo::NativeVarInfo[oldNumVars];
+        if (!oldMethodVarsSortedBase)
         {
-            RegMask regMask = CALLEE_SAVED_REGISTERS_MASK[i - 1];
-
-            if ((regMask & regsMask) == 0)
-                continue;
-
-            SetLocation(pContext, i - 1, PTR_DWORD((TADDR)ESP));
-
-            ESP += sizeof(unsigned);
+            hr = E_FAIL;
+            goto ErrExit;
         }
-    }
-
-    /* we can now set the (address of the) return address */
-
-    pContext->PCTAddr = (TADDR)ESP;
-    pContext->ControlPC = *PTR_PCODE(pContext->PCTAddr);
-
-    /* Now adjust stack pointer */
-
-    pContext->SP = ESP + ESPIncrOnReturn(info);
-}
-
-
-/*****************************************************************************/
-
-void UnwindEbpDoubleAlignFrameProlog(
-        PREGDISPLAY pContext,
-        hdrInfo * info,
-        PTR_CBYTE methodStart,
-        unsigned flags)
-{
-    LIMITED_METHOD_DAC_CONTRACT;
-
-    _ASSERTE(info->prologOffs != hdrInfo::NOT_IN_PROLOG);
-    _ASSERTE(info->ebpFrame || info->doubleAlign);
-
-    DWORD offset = 0;
-
-#ifdef _DEBUG
-    // If the first two instructions are 'nop, int3', then  we will
-    // assume that is from a JitHalt operation and skip past it
-    if (methodStart[0] == X86_INSTR_NOP && methodStart[1] == X86_INSTR_INT3)
-    {
-        offset += 2;
-    }
-#endif
-
-    /* Check for the case where EBP has not been updated yet. */
-
-    const DWORD curOffs = info->prologOffs;
-
-    // If we have still not excecuted "push ebp; mov ebp, esp", then we need to
-    // report the frame relative to ESP
-
-    if (!InstructionAlreadyExecuted(offset + 1, curOffs))
-    {
-        _ASSERTE(CheckInstrByte(methodStart [offset], X86_INSTR_PUSH_EBP) ||
-                 CheckInstrWord(*PTR_WORD(methodStart + offset), X86_INSTR_W_MOV_EBP_ESP) ||
-                 CheckInstrByte(methodStart [offset], X86_INSTR_JMP_NEAR_REL32));   // a rejit jmp-stamp
-
-        /* If we're past the "push ebp", adjust ESP to pop EBP off */
-
-        if  (curOffs == (offset + 1))
-            pContext->SP += sizeof(TADDR);
-
-        /* Stack pointer points to return address */
-
-        pContext->PCTAddr = (TADDR)pContext->SP;
-        pContext->ControlPC = *PTR_PCODE(pContext->PCTAddr);
-
-        /* EBP and callee-saved registers still have the correct value */
-
-        return;
-    }
+        oldMethodVarsSorted = oldMethodVarsSortedBase + (-ICorDebugInfo::UNKNOWN_ILNUM);
 
-    // We are atleast after the "push ebp; mov ebp, esp"
+        memset((void *)oldMethodVarsSortedBase, 0, oldNumVars * sizeof(ICorDebugInfo::NativeVarInfo));
 
-    offset = SKIP_MOV_REG_REG(methodStart,
-                SKIP_PUSH_REG(methodStart, offset));
+        for (local = 0; local < oldNumVars;local++)
+             oldMethodVarsSortedBase[local].loc.vlType = ICorDebugInfo::VLT_INVALID;
 
-    /* At this point, EBP has been set up. The caller's ESP and the return value
-       can be determined using EBP. Since we are still in the prolog,
-       we need to know our exact location to determine the callee-saved registers */
+        BYTE **rgVCs = NULL;
+        DWORD oldMethodOffset = pOldCodeInfo->GetRelOffset();
 
-    const unsigned curEBP = GetRegdisplayFP(pContext);
+        for (pOldVar = oldMethodVars, local = 0;
+             local < oldMethodVarsCount;
+             local++, pOldVar++)
+        {
+            DWORD varNumber = pOldVar->varNumber;
 
-    if (flags & UpdateAllRegs)
-    {
-        PTR_DWORD pSavedRegs = PTR_DWORD((TADDR)curEBP);
+            _ASSERTE(varNumber + unsigned(-ICorDebugInfo::UNKNOWN_ILNUM) < oldNumVars);
 
-        /* make sure that we align ESP just like the method's prolog did */
-        if  (info->doubleAlign)
-        {
-            // "and esp,-8"
-            offset = SKIP_ARITH_REG(-8, methodStart, offset);
-            if (curEBP & 0x04)
+            // Only care about old local variables alive at oldMethodOffset
+            if (pOldVar->startOffset <= oldMethodOffset &&
+                pOldVar->endOffset   >  oldMethodOffset)
             {
-                pSavedRegs--;
-#ifdef _DEBUG
-                if (dspPtr) printf("EnumRef: dblalign ebp: %08X\n", curEBP);
-#endif
+                // Indexing should be performed with a signed value - could be negative.
+                oldMethodVarsSorted[(int32_t)varNumber] = *pOldVar;
             }
         }
 
-        /* Increment "offset" in steps to see which callee-saved
-           registers have been pushed already */
+        // 3) Next sort the new var info by varNumber.  We want to do this here, since
+        // we're allocating memory (which may fail) - do this before going to step 2
 
-        for (unsigned i = 0; i < STRING_LENGTH(CALLEE_SAVED_REGISTERS_MASK); i++)
-        {
-            RegMask regMask = CALLEE_SAVED_REGISTERS_MASK[i];
-            _ASSERTE(regMask != RM_EBP);
+        // First, count the new vars the same way we did the old vars above.
 
-            if ((info->savedRegMask & regMask) == 0)
-                continue;
+        const ICorDebugInfo::NativeVarInfo * pNewVar;
 
-            if (InstructionAlreadyExecuted(offset, curOffs))
+        unsigned newNumVars = unsigned(-ICorDebugInfo::UNKNOWN_ILNUM);
+        for (pNewVar = newMethodVars, local = 0;
+             local < newMethodVarsCount;
+             local++, pNewVar++)
+        {
+            DWORD varNumber = pNewVar->varNumber;
+            if (signed(varNumber) >= 0)
             {
-                SetLocation(pContext, i, PTR_DWORD(--pSavedRegs));
+                // This is an explicit (not special) var, so add its varNumber + 1 to our
+                // max count ("+1" because varNumber is zero-based).
+                newNumVars = max(newNumVars, (unsigned)(unsigned(-ICorDebugInfo::UNKNOWN_ILNUM) + varNumber + 1));
             }
-
-            // "push reg"
-            offset = SKIP_PUSH_REG(methodStart, offset) ;
         }
 
-        TRASH_CALLEE_UNSAVED_REGS(pContext);
-    }
-
-    /* The caller's saved EBP is pointed to by our EBP */
-
-    pContext->SetEbpLocation(PTR_DWORD((TADDR)curEBP));
-    pContext->SP = DWORD((TADDR)(curEBP + sizeof(void *)));
-
-    /* Stack pointer points to return address */
-
-    pContext->PCTAddr = (TADDR)pContext->SP;
-    pContext->ControlPC = *PTR_PCODE(pContext->PCTAddr);
-}
-
-/*****************************************************************************/
-
-bool UnwindEbpDoubleAlignFrame(
-        PREGDISPLAY     pContext,
-        EECodeInfo     *pCodeInfo,
-        hdrInfo        *info,
-        PTR_CBYTE       table,
-        PTR_CBYTE       methodStart,
-        DWORD           curOffs,
-        unsigned        flags,
-        StackwalkCacheUnwindInfo  *pUnwindInfo) // out-only, perf improvement
-{
-    LIMITED_METHOD_CONTRACT;
-    SUPPORTS_DAC;
-
-    _ASSERTE(info->ebpFrame || info->doubleAlign);
-
-    const unsigned curESP = pContext->SP;
-    const unsigned curEBP = GetRegdisplayFP(pContext);
+        // sorted by varNumber
+        newMethodVarsSortedBase = new (nothrow) ICorDebugInfo::NativeVarInfo[newNumVars];
+        if (!newMethodVarsSortedBase)
+        {
+            hr = E_FAIL;
+            goto ErrExit;
+        }
+        newMethodVarsSorted = newMethodVarsSortedBase + (-ICorDebugInfo::UNKNOWN_ILNUM);
 
-    /* First check if we are in a filter (which is obviously after the prolog) */
+        memset(newMethodVarsSortedBase, 0, newNumVars * sizeof(ICorDebugInfo::NativeVarInfo));
+        for (local = 0; local < newNumVars;local++)
+             newMethodVarsSortedBase[local].loc.vlType = ICorDebugInfo::VLT_INVALID;
 
-    if (info->handlers && info->prologOffs == hdrInfo::NOT_IN_PROLOG)
-    {
-        TADDR baseSP;
+        DWORD newMethodOffset = pNewCodeInfo->GetRelOffset();
 
-#ifdef FEATURE_EH_FUNCLETS
-        // Funclets' frame pointers(EBP) are always restored so they can access to main function's local variables.
-        // Therefore the value of EBP is invalid for unwinder so we should use ESP instead.
-        // TODO If funclet frame layout is changed from CodeGen::genFuncletProlog() and genFuncletEpilog(),
-        //      we need to change here accordingly. It is likely to have changes when introducing PSPSym.
-        // TODO Currently we assume that ESP of funclet frames is always fixed but actually it could change.
-        if (pCodeInfo->IsFunclet())
+        for (pNewVar = newMethodVars, local = 0;
+             local < newMethodVarsCount;
+             local++, pNewVar++)
         {
-            baseSP = curESP;
-            // Set baseSP as initial SP
-            baseSP += GetPushedArgSize(info, table, curOffs);
-
-            // 16-byte stack alignment padding (allocated in genFuncletProlog)
-            // Current funclet frame layout (see CodeGen::genFuncletProlog() and genFuncletEpilog()):
-            //   prolog: sub esp, 12
-            //   epilog: add esp, 12
-            //           ret
-            // SP alignment padding should be added for all instructions except the first one and the last one.
-            // Epilog may not exist (unreachable), so we need to check the instruction code.
-            const TADDR funcletStart = pCodeInfo->GetJitManager()->GetFuncletStartAddress(pCodeInfo);
-            if (funcletStart != pCodeInfo->GetCodeAddress() && methodStart[pCodeInfo->GetRelOffset()] != X86_INSTR_RETN)
-                baseSP += 12;
-
-            pContext->PCTAddr = baseSP;
-            pContext->ControlPC = *PTR_PCODE(pContext->PCTAddr);
-
-            pContext->SP = (DWORD)(baseSP + sizeof(TADDR));
+            DWORD varNumber = pNewVar->varNumber;
 
-            return true;
+            _ASSERTE(varNumber + unsigned(-ICorDebugInfo::UNKNOWN_ILNUM) < newNumVars);
+
+            // Only care about new local variables alive at newMethodOffset
+            if (pNewVar->startOffset <= newMethodOffset &&
+                pNewVar->endOffset   >  newMethodOffset)
+            {
+                // Indexing should be performed with a signed valued - could be negative.
+                newMethodVarsSorted[(int32_t)varNumber] = *pNewVar;
+            }
         }
-#else // FEATURE_EH_FUNCLETS
 
-        FrameType frameType = GetHandlerFrameInfo(info, curEBP,
-                                                  curESP, (DWORD) IGNORE_VAL,
-                                                  &baseSP);
+        _ASSERTE(newNumVars >= oldNumVars ||
+                 !"Not allowed to reduce the number of locals between versions!");
 
-        /* If we are in a filter, we only need to unwind the funclet stack.
-           For catches/finallies, the normal handling will
-           cause the frame to be unwound all the way up to ebp skipping
-           other frames above it. This is OK, as those frames will be
-           dead. Also, the EE will detect that this has happened and it
-           will handle any EE frames correctly.
-         */
+        LOG((LF_ENC, LL_INFO100, "EECM::FixContextForEnC: gathered info!\n"));
 
-        if (frameType == FR_INVALID)
+        rgVal1 = new (nothrow) SIZE_T[newNumVars];
+        if (rgVal1 == NULL)
         {
-            return false;
+            hr = E_FAIL;
+            goto ErrExit;
         }
 
-        if (frameType == FR_FILTER)
+        rgVal2 = new (nothrow) SIZE_T[newNumVars];
+        if (rgVal2 == NULL)
         {
-            pContext->PCTAddr = baseSP;
-            pContext->ControlPC = *PTR_PCODE(pContext->PCTAddr);
-
-            pContext->SP = (DWORD)(baseSP + sizeof(TADDR));
-
-         // pContext->pEbp = same as before;
-
-#ifdef _DEBUG
-            /* The filter has to be called by the VM. So we dont need to
-               update callee-saved registers.
-             */
+            hr = E_FAIL;
+            goto ErrExit;
+        }
 
-            if (flags & UpdateAllRegs)
-            {
-                static DWORD s_badData = 0xDEADBEEF;
+        // 4) Next we'll zero them out, so any variables that aren't in scope
+        // in the old method, but are in scope in the new, will have the
+        // default, zero, value.
 
-                pContext->SetEaxLocation(&s_badData);
-                pContext->SetEcxLocation(&s_badData);
-                pContext->SetEdxLocation(&s_badData);
+        memset(rgVal1, 0, sizeof(SIZE_T) * newNumVars);
+        memset(rgVal2, 0, sizeof(SIZE_T) * newNumVars);
 
-                pContext->SetEbxLocation(&s_badData);
-                pContext->SetEsiLocation(&s_badData);
-                pContext->SetEdiLocation(&s_badData);
-            }
-#endif
+        unsigned varsToGet = (oldNumVars > newNumVars)
+                ? newNumVars
+                : oldNumVars;
 
-            if (pUnwindInfo)
-            {
-                // The filter funclet is like an ESP-framed-method.
-                pUnwindInfo->fUseEbp = FALSE;
-                pUnwindInfo->fUseEbpAsFrameReg = FALSE;
-            }
+         //  2) Get all the info about current variables, registers, etc.
 
-            return true;
+        hr = g_pDebugInterface->GetVariablesFromOffset(pOldCodeInfo->GetMethodDesc(),
+                                                       varsToGet,
+                                                       oldMethodVarsSortedBase,
+                                                       oldMethodOffset,
+                                                       &oldCtx,
+                                                       rgVal1,
+                                                       rgVal2,
+                                                       newNumVars,
+                                                       &rgVCs);
+        if (FAILED(hr))
+        {
+            goto ErrExit;
         }
-#endif // !FEATURE_EH_FUNCLETS
-    }
-
-    //
-    // Prolog of an EBP method
-    //
-
-    if (info->prologOffs != hdrInfo::NOT_IN_PROLOG)
-    {
-        UnwindEbpDoubleAlignFrameProlog(pContext, info, methodStart, flags);
 
-        /* Now adjust stack pointer. */
 
-        pContext->SP += ESPIncrOnReturn(info);
-        return true;
-    }
+        LOG((LF_ENC, LL_INFO100, "EECM::FixContextForEnC: got vars!\n"));
 
-    if (flags & UpdateAllRegs)
-    {
-        // Get to the first callee-saved register
-        PTR_DWORD pSavedRegs = PTR_DWORD((TADDR)curEBP);
+        /*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*
+         *  IMPORTANT : Once we start munging on the context, we cannot return
+         *  EnC_FAIL, as this should be a transacted commit,
+         **=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
 
-        if (info->doubleAlign && (curEBP & 0x04))
-            pSavedRegs--;
+#if defined(TARGET_X86)
+        // Zero out all  the registers as some may hold new variables.
+        pCtx->Eax = pCtx->Ecx = pCtx->Edx = pCtx->Ebx = pCtx->Esi = pCtx->Edi = 0;
 
-        for (unsigned i = 0; i < STRING_LENGTH(CALLEE_SAVED_REGISTERS_MASK); i++)
-        {
-            RegMask regMask = CALLEE_SAVED_REGISTERS_MASK[i];
-            if ((info->savedRegMask & regMask) == 0)
-                continue;
+        // 3) zero out the stack frame - this'll initialize _all_ variables
 
-            SetLocation(pContext, i, --pSavedRegs);
-        }
-    }
+        /*-------------------------------------------------------------------------
+         * Adjust the stack height
+         */
+        pCtx->Esp -= (newInfo.stackSize - oldInfo.stackSize);
 
-    /* The caller's ESP will be equal to EBP + retAddrSize + argSize. */
+        // Zero-init the local and tempory section of new stack frame being careful to avoid
+        // touching anything in the frame header.
+        // This is necessary to ensure that any JIT temporaries in the old version can't be mistaken
+        // for ObjRefs now.
+        size_t frameHeaderSize = GetSizeOfFrameHeaderForEnC( &newInfo );
+        _ASSERTE( frameHeaderSize <= oldInfo.stackSize );
+        _ASSERTE( GetSizeOfFrameHeaderForEnC( &oldInfo ) == frameHeaderSize );
 
-    pContext->SP = (DWORD)(curEBP + sizeof(curEBP) + ESPIncrOnReturn(info));
+#elif defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
 
-    /* The caller's saved EIP is right after our EBP */
+        // Next few statements zero out all registers that may end up holding new variables.
 
-    pContext->PCTAddr = (TADDR)curEBP + RETURN_ADDR_OFFS * sizeof(TADDR);
-    pContext->ControlPC = *PTR_PCODE(pContext->PCTAddr);
+        // volatile int registers (JIT may use these to enregister variables)
+        pCtx->Rax = pCtx->Rcx = pCtx->Rdx = pCtx->R8 = pCtx->R9 = pCtx->R10 = pCtx->R11 = 0;
 
-    /* The caller's saved EBP is pointed to by our EBP */
+        // volatile float registers
+        pCtx->Xmm1.High = pCtx->Xmm1.Low = 0;
+        pCtx->Xmm2.High = pCtx->Xmm2.Low = 0;
+        pCtx->Xmm3.High = pCtx->Xmm3.Low = 0;
+        pCtx->Xmm4.High = pCtx->Xmm4.Low = 0;
+        pCtx->Xmm5.High = pCtx->Xmm5.Low = 0;
 
-    pContext->SetEbpLocation(PTR_DWORD((TADDR)curEBP));
-    return true;
-}
+        // 3) zero out the stack frame - this'll initialize _all_ variables
 
-bool UnwindStackFrame(PREGDISPLAY     pContext,
-                      EECodeInfo     *pCodeInfo,
-                      unsigned        flags,
-                      CodeManState   *pState,
-                      StackwalkCacheUnwindInfo  *pUnwindInfo /* out-only, perf improvement */)
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_NOTRIGGER;
-        HOST_NOCALLS;
-        SUPPORTS_DAC;
-    } CONTRACTL_END;
+        /*-------------------------------------------------------------------------
+        * Adjust the stack height
+        */
 
-    // Address where the method has been interrupted
-    PCODE       breakPC = pContext->ControlPC;
-    _ASSERTE(PCODEToPINSTR(breakPC) == pCodeInfo->GetCodeAddress());
+        TADDR newStackBase = callerSP - newFixedStackSize;
 
-    PTR_CBYTE methodStart = PTR_CBYTE(pCodeInfo->GetSavedMethodCode());
+        SetSP(pCtx, newStackBase);
 
-    GCInfoToken gcInfoToken = pCodeInfo->GetGCInfoToken();
-    PTR_VOID    methodInfoPtr = gcInfoToken.Info;
-    DWORD       curOffs = pCodeInfo->GetRelOffset();
+        // We want to zero-out everything pushed after the frame header. This way we'll zero
+        // out locals (both old & new) and temporaries. This is necessary to ensure that any
+        // JIT temporaries in the old version can't be mistaken for ObjRefs now. (I am told
+        // this last point is less of an issue on x64 as it is on x86, but zeroing out the
+        // temporaries is still the cleanest, most robust way to go.)
+        size_t frameHeaderSize = newSizeOfPreservedArea;
+        _ASSERTE(frameHeaderSize <= oldFixedStackSize);
+        _ASSERTE(frameHeaderSize <= newFixedStackSize);
 
-    _ASSERTE(sizeof(CodeManStateBuf) <= sizeof(pState->stateBuf));
-    CodeManStateBuf * stateBuf = (CodeManStateBuf*)pState->stateBuf;
+        // For EnC-compliant x64 code, FP == SP.  Since SP changed above, update FP now
+        pCtx->Rbp = newStackBase;
 
-    if (pState->dwIsSet == 0)
-    {
-        /* Extract the necessary information from the info block header */
+#else
+#if defined(TARGET_ARM64)
+        // Zero out volatile part of stack frame
+        // x0-x17
+        memset(&pCtx->X[0], 0, sizeof(pCtx->X[0]) * 18);
+        // v0-v7
+        memset(&pCtx->V[0], 0, sizeof(pCtx->V[0]) * 8);
+        // v16-v31
+        memset(&pCtx->V[16], 0, sizeof(pCtx->V[0]) * 16);
+#elif defined(TARGET_AMD64)
+        // SysV ABI
+        pCtx->Rax = pCtx->Rdi = pCtx->Rsi = pCtx->Rdx = pCtx->Rcx = pCtx->R8 = pCtx->R9 = 0;
 
-        stateBuf->hdrInfoSize = (DWORD)DecodeGCHdrInfo(gcInfoToken,
-                                                          curOffs,
-                                                          &stateBuf->hdrInfoBody);
-    }
+        // volatile float registers
+        memset(&pCtx->Xmm0, 0, sizeof(pCtx->Xmm0) * 16);
+#else
+        PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform.");
+#endif
 
-    PTR_CBYTE table = dac_cast<PTR_CBYTE>(methodInfoPtr) + stateBuf->hdrInfoSize;
+        TADDR newStackBase = callerSP - newFixedStackSize;
 
-    hdrInfo * info = &stateBuf->hdrInfoBody;
+        SetSP(pCtx, newStackBase);
 
-    info->isSpeculativeStackWalk = ((flags & SpeculativeStackwalk) != 0);
+        size_t frameHeaderSize = newSizeOfPreservedArea;
+        _ASSERTE(frameHeaderSize <= oldFixedStackSize);
+        _ASSERTE(frameHeaderSize <= newFixedStackSize);
 
-    if (pUnwindInfo != NULL)
-    {
-        pUnwindInfo->fUseEbpAsFrameReg = info->ebpFrame;
-        pUnwindInfo->fUseEbp = ((info->savedRegMask & RM_EBP) != 0);
-    }
+        // EnC prolog saves only FP (and LR on arm64), and FP points to saved FP for frame chaining.
+        // These should already be set up from previous version.
+        _ASSERTE(GetFP(pCtx) == callerSP - 16);
+#endif
 
-    if  (info->epilogOffs != hdrInfo::NOT_IN_EPILOG)
-    {
-        /*---------------------------------------------------------------------
-         *  First, handle the epilog
-         */
+        // Perform some debug-only sanity checks on stack variables.  Some checks are
+        // performed differently between X86/AMD64.
 
-        PTR_CBYTE epilogBase = methodStart + (curOffs - info->epilogOffs);
-        UnwindEpilog(pContext, info, epilogBase, flags);
-    }
-    else if (!info->ebpFrame && !info->doubleAlign)
-    {
-        /*---------------------------------------------------------------------
-         *  Now handle ESP frames
-         */
+#ifdef _DEBUG
+        for( unsigned i = 0; i < newNumVars; i++ )
+        {
+            // Make sure that stack variables existing in both old and new methods did not
+            // move.  This matters if the address of a local is used in the remapped method.
+            // For example:
+            //
+            //    static unsafe void Main(string[] args)
+            //    {
+            //        int x;
+            //        int* p = &x;
+            //                 <- Edit made here - cannot move address of x
+            //        *p = 5;
+            //    }
+            //
+            if ((i + unsigned(-ICorDebugInfo::UNKNOWN_ILNUM) < oldNumVars) &&  // Does variable exist in old method?
+                 (oldMethodVarsSorted[i].loc.vlType == ICorDebugInfo::VLT_STK) &&   // Is the variable on the stack?
+                 (newMethodVarsSorted[i].loc.vlType == ICorDebugInfo::VLT_STK))
+            {
+                SIZE_T * pOldVarStackLocation = NativeVarStackAddr(oldMethodVarsSorted[i].loc, &oldCtx);
+                SIZE_T * pNewVarStackLocation = NativeVarStackAddr(newMethodVarsSorted[i].loc, pCtx);
+                _ASSERTE(pOldVarStackLocation == pNewVarStackLocation);
+            }
 
-        UnwindEspFrame(pContext, info, table, methodStart, curOffs, flags);
-        return true;
-    }
-    else
-    {
-        /*---------------------------------------------------------------------
-         *  Now we know that have an EBP frame
-         */
+            // Sanity-check that the range we're clearing contains all of the stack variables
 
-        if (!UnwindEbpDoubleAlignFrame(pContext, pCodeInfo, info, table, methodStart, curOffs, flags, pUnwindInfo))
-            return false;
-    }
+#if defined(TARGET_X86)
+            const ICorDebugInfo::VarLoc &varLoc = newMethodVarsSortedBase[i].loc;
+            if( varLoc.vlType == ICorDebugInfo::VLT_STK )
+            {
+                // This is an EBP frame, all stack variables should be EBP relative
+                _ASSERTE( varLoc.vlStk.vlsBaseReg == ICorDebugInfo::REGNUM_EBP );
+                // Generic special args may show up as locals with positive offset from EBP, so skip them
+                if( varLoc.vlStk.vlsOffset <= 0 )
+                {
+                    // Normal locals must occur after the header on the stack
+                    _ASSERTE( unsigned(-varLoc.vlStk.vlsOffset) >= frameHeaderSize );
+                    // Value must occur before the top of the stack
+                    _ASSERTE( unsigned(-varLoc.vlStk.vlsOffset) < newInfo.stackSize );
+                }
 
-    // TODO [DAVBR]: For the full fix for VsWhidbey 450273, all the below
-    // may be uncommented once isLegalManagedCodeCaller works properly
-    // with non-return address inputs, and with non-DEBUG builds
-    /*
-    // Ensure isLegalManagedCodeCaller succeeds for speculative stackwalks.
-    // (We just assert this below for non-speculative stackwalks.)
-    //
-    FAIL_IF_SPECULATIVE_WALK(isLegalManagedCodeCaller(GetControlPC(pContext)));
-    */
+                // Ideally we'd like to verify that the stack locals (if any) start at exactly the end
+                // of the header.  However, we can't easily determine the size of value classes here,
+                // and so (since the stack grows towards 0) can't easily determine where the end of
+                // the local lies.
+            }
+#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
+            switch(newMethodVarsSortedBase[i].loc.vlType)
+            {
+            default:
+                // No validation here for non-stack locals
+                break;
 
-    return true;
-}
+            case ICorDebugInfo::VLT_STK_BYREF:
+                {
+                    // For byrefs, verify that the ptr will be zeroed out
 
-#endif // TARGET_X86
+                    SIZE_T regOffs = GetRegOffsInCONTEXT(newMethodVarsSortedBase[i].loc.vlStk.vlsBaseReg);
+                    TADDR baseReg = *(TADDR *)(regOffs + (BYTE*)pCtx);
+                    TADDR addrOfPtr = baseReg + newMethodVarsSortedBase[i].loc.vlStk.vlsOffset;
 
-#ifdef FEATURE_EH_FUNCLETS
-#ifdef TARGET_X86
-size_t EECodeManager::GetResumeSp( PCONTEXT  pContext )
-{
-    PCODE currentPc = PCODE(pContext->Eip);
+                    _ASSERTE(
+                        // The ref must exist in the portion we'll zero-out
+                        (
+                            (newStackBase <= addrOfPtr) &&
+                            (addrOfPtr < newStackBase + (newFixedStackSize - frameHeaderSize))
+                        ) ||
+                        // OR in the caller's frame (for parameters)
+                        (addrOfPtr >= newStackBase + newFixedStackSize));
 
-    _ASSERTE(ExecutionManager::IsManagedCode(currentPc));
+                    // Deliberately fall through, so that we also verify that the value that the ptr
+                    // points to will be zeroed out
+                    // ...
+                }
+                __fallthrough;
 
-    EECodeInfo codeInfo(currentPc);
+            case ICorDebugInfo::VLT_STK:
+            case ICorDebugInfo::VLT_STK2:
+            case ICorDebugInfo::VLT_REG_STK:
+            case ICorDebugInfo::VLT_STK_REG:
+                SIZE_T * pVarStackLocation = NativeVarStackAddr(newMethodVarsSortedBase[i].loc, pCtx);
+                _ASSERTE (pVarStackLocation != NULL);
+                _ASSERTE(
+                    // The value must exist in the portion we'll zero-out
+                    (
+                        (newStackBase <= (TADDR) pVarStackLocation) &&
+                        ((TADDR) pVarStackLocation < newStackBase + (newFixedStackSize - frameHeaderSize))
+                    ) ||
+                    // OR in the caller's frame (for parameters)
+                    ((TADDR) pVarStackLocation >= newStackBase + newFixedStackSize));
+                break;
+            }
+#else   // !X86, !X64, !ARM64
+            PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform.");
+#endif
+        }
 
-    PTR_CBYTE methodStart = PTR_CBYTE(codeInfo.GetSavedMethodCode());
+#endif // _DEBUG
 
-    GCInfoToken gcInfoToken = codeInfo.GetGCInfoToken();
-    PTR_VOID    methodInfoPtr = gcInfoToken.Info;
-    DWORD       curOffs = codeInfo.GetRelOffset();
+        // Clear the local and temporary stack space
 
-    CodeManStateBuf stateBuf;
+#if defined(TARGET_X86)
+        memset((void*)(size_t)(pCtx->Esp), 0, newInfo.stackSize - frameHeaderSize );
+#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
+        memset((void*)newStackBase, 0, newFixedStackSize - frameHeaderSize);
 
-    stateBuf.hdrInfoSize = (DWORD)DecodeGCHdrInfo(gcInfoToken,
-                                                  curOffs,
-                                                  &stateBuf.hdrInfoBody);
+        // Restore PSPSym for the new function. Its value should be set to our new FP. But
+        // first, we gotta find PSPSym's location on the stack
+        INT32 nNewPspSymStackSlot = newGcDecoder.GetPSPSymStackSlot();
+        if (nNewPspSymStackSlot != NO_PSP_SYM)
+        {
+#if defined(TARGET_AMD64)
+            *PTR_TADDR(newStackBase + nNewPspSymStackSlot) = newStackBase;
+#elif defined(TARGET_ARM64)
+            *PTR_TADDR(callerSP + nNewPspSymStackSlot) = callerSP;
+#else
+            PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform.");
+#endif
+        }
+#else   // !X86, !X64, !ARM64
+        PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform.");
+#endif
 
-    PTR_CBYTE table = dac_cast<PTR_CBYTE>(methodInfoPtr) + stateBuf.hdrInfoSize;
+        // 4) Put the variables from step 3 into their new locations.
 
-    hdrInfo *info = &stateBuf.hdrInfoBody;
+        LOG((LF_ENC, LL_INFO100, "EECM::FixContextForEnC: set vars!\n"));
 
-    _ASSERTE(info->epilogOffs == hdrInfo::NOT_IN_EPILOG && info->prologOffs == hdrInfo::NOT_IN_PROLOG);
+        // Move the old variables into their new places.
 
-    bool isESPFrame = !info->ebpFrame && !info->doubleAlign;
+        hr = g_pDebugInterface->SetVariablesAtOffset(pNewCodeInfo->GetMethodDesc(),
+                                                     newNumVars,
+                                                     newMethodVarsSortedBase,
+                                                     newMethodOffset,
+                                                     pCtx, // place them into the new context
+                                                     rgVal1,
+                                                     rgVal2,
+                                                     rgVCs);
 
-    if (codeInfo.IsFunclet())
-    {
-        // Treat funclet's frame as ESP frame
-        isESPFrame = true;
+        /*-----------------------------------------------------------------------*/
     }
+ErrExit:
+    if (oldMethodVarsSortedBase)
+        delete[] oldMethodVarsSortedBase;
+    if (newMethodVarsSortedBase)
+        delete[] newMethodVarsSortedBase;
+    if (rgVal1 != NULL)
+        delete[] rgVal1;
+    if (rgVal2 != NULL)
+        delete[] rgVal2;
 
-    if (isESPFrame)
-    {
-        const size_t curESP = (size_t)(pContext->Esp);
-        return curESP + GetPushedArgSize(info, table, curOffs);
-    }
+    LOG((LF_ENC, LL_INFO100, "EECM::FixContextForEnC: exiting!\n"));
 
-    const size_t curEBP = (size_t)(pContext->Ebp);
-    return GetOutermostBaseFP(curEBP, info);
+    return hr;
 }
-#endif // TARGET_X86
-#endif // FEATURE_EH_FUNCLETS
+#endif // !FEATURE_METADATA_UPDATER
 
-#ifndef FEATURE_EH_FUNCLETS
+#endif // #ifndef DACCESS_COMPILE
 
+#ifdef USE_GC_INFO_DECODER
 /*****************************************************************************
  *
- *  Unwind the current stack frame, i.e. update the virtual register
- *  set in pContext. This will be similar to the state after the function
- *  returns back to caller (IP points to after the call, Frame and Stack
- *  pointer has been reset, callee-saved registers restored (if UpdateAllRegs),
- *  callee-unsaved registers are trashed.
- *  Returns success of operation.
+ *  Is the function currently at a "GC safe point" ?
  */
-
-bool EECodeManager::UnwindStackFrame(PREGDISPLAY     pContext,
-                                     EECodeInfo     *pCodeInfo,
-                                     unsigned        flags,
-                                     CodeManState   *pState,
-                                     StackwalkCacheUnwindInfo  *pUnwindInfo /* out-only, perf improvement */)
+bool EECodeManager::IsGcSafe( EECodeInfo     *pCodeInfo,
+                              DWORD           dwRelOffset)
 {
-#ifdef TARGET_X86
-    return ::UnwindStackFrame(pContext, pCodeInfo, flags, pState, pUnwindInfo);
-#else // TARGET_X86
-    PORTABILITY_ASSERT("EECodeManager::UnwindStackFrame");
-    return false;
-#endif // _TARGET_???_
-}
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+    } CONTRACTL_END;
 
-/*****************************************************************************/
-#else // !FEATURE_EH_FUNCLETS
-/*****************************************************************************/
+    GCInfoToken gcInfoToken = pCodeInfo->GetGCInfoToken();
 
-bool EECodeManager::UnwindStackFrame(PREGDISPLAY     pContext,
-                                     EECodeInfo     *pCodeInfo,
-                                     unsigned        flags,
-                                     CodeManState   *pState,
-                                     StackwalkCacheUnwindInfo  *pUnwindInfo /* out-only, perf improvement */)
+    GcInfoDecoder gcInfoDecoder(
+            gcInfoToken,
+            DECODE_INTERRUPTIBILITY,
+            dwRelOffset
+            );
+
+    return gcInfoDecoder.IsInterruptible();
+}
+
+#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
+bool EECodeManager::HasTailCalls( EECodeInfo     *pCodeInfo)
 {
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
     } CONTRACTL_END;
 
-#if defined(TARGET_AMD64)
-    // To avoid unnecessary computation, we only crack the unwind info if pUnwindInfo is not NULL, which only happens
-    // if the LIGHTUNWIND flag is passed to StackWalkFramesEx().
-    if (pUnwindInfo != NULL)
-    {
-        pCodeInfo->GetOffsetsFromUnwindInfo(&(pUnwindInfo->RSPOffsetFromUnwindInfo),
-                                            &(pUnwindInfo->RBPOffset));
-    }
-#endif // TARGET_AMD64
+    GCInfoToken gcInfoToken = pCodeInfo->GetGCInfoToken();
 
-    _ASSERTE(pCodeInfo != NULL);
-    Thread::VirtualUnwindCallFrame(pContext, pCodeInfo);
-    return true;
+    GcInfoDecoder gcInfoDecoder(
+            gcInfoToken,
+            DECODE_HAS_TAILCALLS,
+            0
+            );
+
+    return gcInfoDecoder.HasTailCalls();
 }
+#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
 
-/*****************************************************************************/
-#endif // FEATURE_EH_FUNCLETS
+#if defined(TARGET_AMD64) && defined(_DEBUG)
 
-/*****************************************************************************/
+struct FindEndOfLastInterruptibleRegionState
+{
+    unsigned curOffset;
+    unsigned endOffset;
+    unsigned lastRangeOffset;
+};
 
-/* report args in 'msig' to the GC.
-   'argsStart' is start of the stack-based arguments
-   'varArgSig' describes the arguments
-   'ctx' has the GC reporting info
-*/
-void promoteVarArgs(PTR_BYTE argsStart, PTR_VASigCookie varArgSig, GCCONTEXT* ctx)
+bool FindEndOfLastInterruptibleRegionCB (
+        UINT32 startOffset,
+        UINT32 stopOffset,
+        LPVOID hCallback)
 {
-    WRAPPER_NO_CONTRACT;
+    FindEndOfLastInterruptibleRegionState *pState = (FindEndOfLastInterruptibleRegionState*)hCallback;
 
-    //Note: no instantiations needed for varargs
-    MetaSig msig(varArgSig->signature,
-                 varArgSig->pModule,
-                 NULL);
+    //
+    // If the current range doesn't overlap the given range, keep searching.
+    //
+    if (   startOffset >= pState->endOffset
+        || stopOffset < pState->curOffset)
+    {
+        return false;
+    }
 
-    PTR_BYTE pFrameBase = argsStart - TransitionBlock::GetOffsetOfArgs();
+    //
+    // If the range overlaps the end, then the last point is the end.
+    //
+    if (   stopOffset > pState->endOffset
+        /*&& startOffset < pState->endOffset*/)
+    {
+        // The ranges should be sorted in increasing order.
+        CONSISTENCY_CHECK(startOffset >= pState->lastRangeOffset);
 
-    ArgIterator argit(&msig);
+        pState->lastRangeOffset = pState->endOffset;
+        return true;
+    }
 
-#ifdef TARGET_X86
-    // For the X86 target the JIT does not report any of the fixed args for a varargs method
-    // So we report the fixed args via the promoteArgs call below
-    bool skipFixedArgs = false;
-#else
-    // For other platforms the JITs do report the fixed args of a varargs method
-    // So we must tell promoteArgs to skip to the end of the fixed args
-    bool skipFixedArgs = true;
-#endif
+    //
+    // See if the end of this range is the closet to the end that we've found
+    // so far.
+    //
+    if (stopOffset > pState->lastRangeOffset)
+        pState->lastRangeOffset = stopOffset;
 
-    bool inVarArgs = false;
+    return false;
+}
 
-    int argOffset;
-    while ((argOffset = argit.GetNextOffset()) != TransitionBlock::InvalidOffset)
-    {
-        if (msig.GetArgProps().AtSentinel())
-            inVarArgs = true;
+/*
+    Locates the end of the last interruptible region in the given code range.
+    Returns 0 if the entire range is uninterruptible.  Returns the end point
+    if the entire range is interruptible.
+*/
+unsigned EECodeManager::FindEndOfLastInterruptibleRegion(unsigned curOffset,
+                                                         unsigned endOffset,
+                                                         GCInfoToken gcInfoToken)
+{
+#ifndef DACCESS_COMPILE
+    GcInfoDecoder gcInfoDecoder(
+            gcInfoToken,
+            DECODE_FOR_RANGES_CALLBACK
+            );
 
-        // if skipFixedArgs is false we report all arguments
-        //  otherwise we just report the varargs.
-        if (!skipFixedArgs || inVarArgs)
-        {
-            ArgDestination argDest(pFrameBase, argOffset, argit.GetArgLocDescForStructInRegs());
-            msig.GcScanRoots(&argDest, ctx->f, ctx->sc);
-        }
-    }
-}
+    FindEndOfLastInterruptibleRegionState state;
+    state.curOffset = curOffset;
+    state.endOffset = endOffset;
+    state.lastRangeOffset = 0;
 
-#ifndef DACCESS_COMPILE
-FCIMPL1(void, GCReporting::Register, GCFrame* frame)
-{
-    FCALL_CONTRACT;
+    gcInfoDecoder.EnumerateInterruptibleRanges(&FindEndOfLastInterruptibleRegionCB, &state);
 
-    // Construct a GCFrame.
-    _ASSERTE(frame != NULL);
-    frame->Push(GetThread());
+    return state.lastRangeOffset;
+#else
+    DacNotImpl();
+    return NULL;
+#endif // #ifndef DACCESS_COMPILE
 }
-FCIMPLEND
 
-FCIMPL1(void, GCReporting::Unregister, GCFrame* frame)
-{
-    FCALL_CONTRACT;
+#endif // TARGET_AMD64 && _DEBUG
 
-    // Destroy the GCFrame.
-    _ASSERTE(frame != NULL);
-    frame->Remove();
-}
-FCIMPLEND
-#endif // !DACCESS_COMPILE
 
-#ifndef USE_GC_INFO_DECODER
+#else // !USE_GC_INFO_DECODER
 
 /*****************************************************************************
  *
- *  Enumerate all live object references in that function using
- *  the virtual register set.
- *  Returns success of operation.
+ *  Is the function currently at a "GC safe point" ?
  */
-
-bool EECodeManager::EnumGcRefs( PREGDISPLAY     pContext,
-                                EECodeInfo     *pCodeInfo,
-                                unsigned        flags,
-                                GCEnumCallback  pCallBack,
-                                LPVOID          hCallBack,
-                                DWORD           relOffsetOverride)
+bool EECodeManager::IsGcSafe( EECodeInfo     *pCodeInfo,
+                              DWORD           dwRelOffset)
 {
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
+        SUPPORTS_DAC;
     } CONTRACTL_END;
 
-#ifdef FEATURE_EH_FUNCLETS
-    if (flags & ParentOfFuncletStackFrame)
-    {
-        LOG((LF_GCROOTS, LL_INFO100000, "Not reporting this frame because it was already reported via another funclet.\n"));
-        return true;
-    }
-#endif // FEATURE_EH_FUNCLETS
+    hdrInfo         info;
+    BYTE    *       table;
 
-    GCInfoToken gcInfoToken = pCodeInfo->GetGCInfoToken();
-    unsigned  curOffs = pCodeInfo->GetRelOffset();
+    /* Extract the necessary information from the info block header */
 
-    unsigned  EBP     = GetRegdisplayFP(pContext);
-    unsigned  ESP     = pContext->SP;
+    table = (BYTE *)DecodeGCHdrInfo(pCodeInfo->GetGCInfoToken(),
+                                       dwRelOffset,
+                                       &info);
 
-    unsigned  ptrOffs;
+    /* workaround: prevent interruption within prolog/epilog */
 
-    unsigned  count;
+    if  (info.prologOffs != hdrInfo::NOT_IN_PROLOG || info.epilogOffs != hdrInfo::NOT_IN_EPILOG)
+        return false;
 
-    hdrInfo   info;
-    PTR_CBYTE table = PTR_CBYTE(gcInfoToken.Info);
-#if 0
-    printf("EECodeManager::EnumGcRefs - EIP = %08x ESP = %08x  offset = %x  GC Info is at %08x\n", *pContext->pPC, ESP, curOffs, table);
+#if VERIFY_GC_TABLES
+    _ASSERTE(*castto(table, unsigned short *)++ == 0xBEEF);
 #endif
 
+    return (info.interruptible);
+}
 
-    /* Extract the necessary information from the info block header */
+#endif // !USE_GC_INFO_DECODER
 
-    table += DecodeGCHdrInfo(gcInfoToken,
-                             curOffs,
-                             &info);
 
-    _ASSERTE( curOffs <= info.methodSize);
+#if defined(FEATURE_EH_FUNCLETS)
 
-#ifdef  _DEBUG
-//    if ((gcInfoToken.Info == (void*)0x37760d0) && (curOffs == 0x264))
-//        __asm int 3;
-
-    if (trEnumGCRefs) {
-        static unsigned lastESP = 0;
-        unsigned        diffESP = ESP - lastESP;
-        if (diffESP > 0xFFFF) {
-            printf("------------------------------------------------------\n");
-        }
-        lastESP = ESP;
-        printf("EnumGCRefs [%s][%s] at %s.%s + 0x%03X:\n",
-               info.ebpFrame?"ebp":"   ",
-               info.interruptible?"int":"   ",
-               "UnknownClass","UnknownMethod", curOffs);
-        fflush(stdout);
+void EECodeManager::EnsureCallerContextIsValid( PREGDISPLAY  pRD, EECodeInfo * pCodeInfo /*= NULL*/, unsigned flags /*= 0*/)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SUPPORTS_DAC;
     }
-#endif
-
-    /* Are we in the prolog or epilog of the method? */
+    CONTRACTL_END;
 
-    if (info.prologOffs != hdrInfo::NOT_IN_PROLOG ||
-        info.epilogOffs != hdrInfo::NOT_IN_EPILOG)
+    if( !pRD->IsCallerContextValid )
     {
-
-#if !DUMP_PTR_REFS
-        // Under normal circumstances the system will not suspend a thread
-        // if it is in the prolog or epilog of the function.   However ThreadAbort
-        // exception or stack overflows can cause EH to happen in a prolog.
-        // Once in the handler, a GC can happen, so we can get to this code path.
-        // However since we are tearing down this frame, we don't need to report
-        // anything and we can simply return.
-
-        _ASSERTE(flags & ExecutionAborted);
+        if ((flags & LightUnwind) && (pCodeInfo != NULL))
+        {
+#if !defined(DACCESS_COMPILE) && defined(HAS_LIGHTUNWIND)
+            LightUnwindStackFrame(pRD, pCodeInfo, EnsureCallerStackFrameIsValid);
+#else
+            // We need to make a copy here (instead of switching the pointers), in order to preserve the current context
+            *(pRD->pCallerContext) = *(pRD->pCurrentContext);
+            // Skip updating context registers for light unwind
+            Thread::VirtualUnwindCallFrame(pRD->pCallerContext, NULL, pCodeInfo);
 #endif
-        return true;
-    }
-
-#ifdef _DEBUG
-#define CHK_AND_REPORT_REG(reg, doIt, iptr, regName)                    \
-        if  (doIt)                                                      \
-        {                                                               \
-            if (dspPtr)                                                 \
-                printf("    Live pointer register %s: ", #regName);     \
-                pCallBack(hCallBack,                                    \
-                          (OBJECTREF*)(pContext->Get##regName##Location()), \
-                          (iptr ? GC_CALL_INTERIOR : 0)                 \
-                          | CHECK_APP_DOMAIN                            \
-                          DAC_ARG(DacSlotLocation(reg, 0, false)));     \
         }
-#else // !_DEBUG
-#define CHK_AND_REPORT_REG(reg, doIt, iptr, regName)                    \
-        if  (doIt)                                                      \
-                pCallBack(hCallBack,                                    \
-                          (OBJECTREF*)(pContext->Get##regName##Location()), \
-                          (iptr ? GC_CALL_INTERIOR : 0)                 \
-                          | CHECK_APP_DOMAIN                            \
-                          DAC_ARG(DacSlotLocation(reg, 0, false)));
+        else
+        {
+            // We need to make a copy here (instead of switching the pointers), in order to preserve the current context
+            *(pRD->pCallerContext) = *(pRD->pCurrentContext);
+            *(pRD->pCallerContextPointers) = *(pRD->pCurrentContextPointers);
+            Thread::VirtualUnwindCallFrame(pRD->pCallerContext, pRD->pCallerContextPointers, pCodeInfo);
+        }
 
-#endif // _DEBUG
+        pRD->IsCallerContextValid = TRUE;
+    }
 
-    /* What kind of a frame is this ? */
+    _ASSERTE( pRD->IsCallerContextValid );
+}
 
-    FrameType   frameType = FR_NORMAL;
-    TADDR       baseSP = 0;
+size_t EECodeManager::GetCallerSp( PREGDISPLAY  pRD )
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SUPPORTS_DAC;
+    } CONTRACTL_END;
 
-    if (info.handlers)
+    // Don't add usage of this field.  This is only temporary.
+    // See ExceptionTracker::InitializeCrawlFrame() for more information.
+    if (!pRD->IsCallerSPValid)
     {
-        _ASSERTE(info.ebpFrame);
-
-        bool    hasInnerFilter, hadInnerFilter;
-        frameType = GetHandlerFrameInfo(&info, EBP,
-                                        ESP, (DWORD) IGNORE_VAL,
-                                        &baseSP, NULL,
-                                        &hasInnerFilter, &hadInnerFilter);
-        _ASSERTE(frameType != FR_INVALID);
-
-        /* If this is the parent frame of a filter which is currently
-           executing, then the filter would have enumerated the frame using
-           the filter PC.
-         */
+        EnsureCallerContextIsValid(pRD, NULL);
+    }
 
-        if (hasInnerFilter)
-            return true;
+    return GetSP(pRD->pCallerContext);
+}
 
-        /* If are in a try and we had a filter execute, we may have reported
-           GC refs from the filter (and not using the try's offset). So
-           we had better use the filter's end offset, as the try is
-           effectively dead and its GC ref's would be stale */
+#endif // FEATURE_EH_FUNCLETS
 
-        if (hadInnerFilter)
-        {
-            PTR_TADDR pFirstBaseSPslot = GetFirstBaseSPslotPtr(EBP, &info);
-            curOffs = (unsigned)pFirstBaseSPslot[1] - 1;
-            _ASSERTE(curOffs < info.methodSize);
+#ifdef HAS_LIGHTUNWIND
+/*
+  *  Light unwind the current stack frame, using provided cache entry.
+  *  pPC, Esp and pEbp of pContext are updated.
+  */
 
-            /* Extract the necessary information from the info block header */
+// static
+void EECodeManager::LightUnwindStackFrame(PREGDISPLAY pRD, EECodeInfo* pCodeInfo, LightUnwindFlag flag)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+    } CONTRACTL_END;
 
-            table = PTR_CBYTE(gcInfoToken.Info);
+#ifdef TARGET_AMD64
+    ULONG RBPOffset, RSPOffset;
+    pCodeInfo->GetOffsetsFromUnwindInfo(&RSPOffset, &RBPOffset);
 
-            table += DecodeGCHdrInfo(gcInfoToken,
-                                     curOffs,
-                                     &info);
-        }
+    if (pRD->IsCallerContextValid)
+    {
+        pRD->pCurrentContext->Rbp = pRD->pCallerContext->Rbp;
+        pRD->pCurrentContext->Rsp = pRD->pCallerContext->Rsp;
+        pRD->pCurrentContext->Rip = pRD->pCallerContext->Rip;
     }
-
-    bool        willContinueExecution = !(flags & ExecutionAborted);
-    unsigned    pushedSize = 0;
-
-    /* if we have been interrupted we don't have to report registers/arguments
-     * because we are about to lose this context anyway.
-     * Alas, if we are in a ebp-less method we have to parse the table
-     * in order to adjust ESP.
-     *
-     * Note that we report "this" for all methods, even if
-     * noncontinuable, because of the off chance they may be
-     * synchronized and we have to release the monitor on unwind. This
-     * could conceivably be optimized, but it turns out to be more
-     * expensive to check whether we're synchronized (which involves
-     * consulting metadata) than to just report "this" all the time in
-     * our most important scenarios.
-     */
-
-    if  (info.interruptible)
+    else
     {
-        unsigned curOffsRegs = curOffs;
-
-        // Don't decrement curOffsRegs when it is 0, as it is an unsigned and will wrap to MAX_UINT
-        //
-        if (curOffsRegs > 0)
+        PCONTEXT pSourceCtx = NULL;
+        PCONTEXT pTargetCtx = NULL;
+        if (flag == UnwindCurrentStackFrame)
         {
-            // If we are not on the active stack frame, we need to report gc registers
-            // that are live before the call. The reason is that the liveness of gc registers
-            // may change across a call to a method that does not return. In this case the instruction
-            // after the call may be a jump target and a register that didn't have a live gc pointer
-            // before the call may have a live gc pointer after the jump. To make sure we report the
-            // registers that have live gc pointers before the call we subtract 1 from curOffs.
-            if ((flags & ActiveStackFrame) == 0)
-            {
-                // We are not the top most stack frame (i.e. the ActiveStackFrame)
-                curOffsRegs--;   // decrement curOffsRegs
-            }
+            pTargetCtx = pRD->pCurrentContext;
+            pSourceCtx = pRD->pCurrentContext;
         }
-
-        pushedSize = scanArgRegTableI(skipToArgReg(info, table), curOffsRegs, curOffs, &info);
-
-        RegMask   regs  = info.regMaskResult;
-        RegMask  iregs  = info.iregMaskResult;
-        ptrArgTP  args  = info.argMaskResult;
-        ptrArgTP iargs  = info.iargMaskResult;
-
-        _ASSERTE((isZero(args) || pushedSize != 0) || info.ebpFrame);
-        _ASSERTE((args & iargs) == iargs);
-        // Only synchronized methods and generic code that accesses
-        // the type context via "this" need to report "this".
-        // If its reported for other methods, its probably
-        // done incorrectly. So flag such cases.
-        _ASSERTE(info.thisPtrResult == REGI_NA ||
-                 pCodeInfo->GetMethodDesc()->IsSynchronized() ||
-                 pCodeInfo->GetMethodDesc()->AcquiresInstMethodTableFromThis());
-
-            /* now report registers and arguments if we are not interrupted */
-
-        if  (willContinueExecution)
+        else
         {
+            pTargetCtx = pRD->pCallerContext;
+            pSourceCtx = pRD->pCurrentContext;
+        }
 
-            /* Propagate unsafed registers only in "current" method */
-            /* If this is not the active method, then the callee wil
-             * trash these registers, and so we wont need to report them */
-
-            if (flags & ActiveStackFrame)
-            {
-                CHK_AND_REPORT_REG(REGI_EAX, regs & RM_EAX, iregs & RM_EAX, Eax);
-                CHK_AND_REPORT_REG(REGI_ECX, regs & RM_ECX, iregs & RM_ECX, Ecx);
-                CHK_AND_REPORT_REG(REGI_EDX, regs & RM_EDX, iregs & RM_EDX, Edx);
-            }
-
-            CHK_AND_REPORT_REG(REGI_EBX, regs & RM_EBX, iregs & RM_EBX, Ebx);
-            CHK_AND_REPORT_REG(REGI_EBP, regs & RM_EBP, iregs & RM_EBP, Ebp);
-            CHK_AND_REPORT_REG(REGI_ESI, regs & RM_ESI, iregs & RM_ESI, Esi);
-            CHK_AND_REPORT_REG(REGI_EDI, regs & RM_EDI, iregs & RM_EDI, Edi);
-            _ASSERTE(!(regs & RM_ESP));
-
-            /* Report any pending pointer arguments */
-
-            DWORD * pPendingArgFirst;       // points **AT** first parameter
-            if (!info.ebpFrame)
-            {
-                // -sizeof(void*) because we want to point *AT* first parameter
-                pPendingArgFirst = (DWORD *)(size_t)(ESP + pushedSize - sizeof(void*));
-            }
-            else
-            {
-                _ASSERTE(willContinueExecution);
-
-                if (info.handlers)
-                {
-                    // -sizeof(void*) because we want to point *AT* first parameter
-                    pPendingArgFirst = (DWORD *)(size_t)(baseSP - sizeof(void*));
-                }
-                else if (info.localloc)
-                {
-                    baseSP = *(DWORD *)(size_t)(EBP - GetLocallocSPOffset(&info));
-                    // -sizeof(void*) because we want to point *AT* first parameter
-                    pPendingArgFirst = (DWORD *)(size_t) (baseSP - sizeof(void*));
-                }
-                else
-                {
-                    // Note that 'info.stackSize includes the size for pushing EBP, but EBP is pushed
-                    // BEFORE EBP is set from ESP, thus (EBP - info.stackSize) actually points past
-                    // the frame by one DWORD, and thus points *AT* the first parameter
-
-                    pPendingArgFirst = (DWORD *)(size_t)(EBP - info.stackSize);
-                }
-            }
-
-            if  (!isZero(args))
-            {
-                unsigned   i = 0;
-                ptrArgTP   b(1);
-                for (; !isZero(args) && (i < MAX_PTRARG_OFS); i += 1, b <<= 1)
-                {
-                    if  (intersect(args,b))
-                    {
-                        unsigned    argAddr = (unsigned)(size_t)(pPendingArgFirst - i);
-                        bool        iptr    = false;
-
-                        setDiff(args, b);
-                        if (intersect(iargs,b))
-                        {
-                            setDiff(iargs, b);
-                            iptr   = true;
-                        }
-
-#ifdef _DEBUG
-                        if (dspPtr)
-                        {
-                            printf("    Pushed ptr arg  [E");
-                            if  (info.ebpFrame)
-                                printf("BP-%02XH]: ", EBP - argAddr);
-                            else
-                                printf("SP+%02XH]: ", argAddr - ESP);
-                        }
-#endif
-                        _ASSERTE(true == GC_CALL_INTERIOR);
-                        pCallBack(hCallBack, (OBJECTREF *)(size_t)argAddr, (int)iptr | CHECK_APP_DOMAIN
-                                  DAC_ARG(DacSlotLocation(info.ebpFrame ? REGI_EBP : REGI_ESP,
-                                                          info.ebpFrame ? EBP - argAddr : argAddr - ESP,
-                                                          true)));
-                    }
-                }
-            }
+        // Unwind RBP.  The offset is relative to the current sp.
+        if (RBPOffset == 0)
+        {
+            pTargetCtx->Rbp = pSourceCtx->Rbp;
         }
         else
         {
-            // Is "this" enregistered. If so, report it as we might need to
-            // release the monitor for synchronized methods.
-            // Else, it is on the stack and will be reported below.
-
-            if (info.thisPtrResult != REGI_NA)
-            {
-                // Synchronized methods and methods satisfying
-                // MethodDesc::AcquiresInstMethodTableFromThis (i.e. those
-                // where "this" is reported in thisPtrResult) are
-                // not supported on value types.
-                _ASSERTE((regNumToMask(info.thisPtrResult) & info.iregMaskResult)== 0);
-
-                void * thisReg = getCalleeSavedReg(pContext, info.thisPtrResult);
-                pCallBack(hCallBack, (OBJECTREF *)thisReg, CHECK_APP_DOMAIN
-                          DAC_ARG(DacSlotLocation(info.thisPtrResult, 0, false)));
-            }
+            pTargetCtx->Rbp = *(UINT_PTR*)(pSourceCtx->Rsp + RBPOffset);
         }
-    }
-    else /* not interruptible */
-    {
-        pushedSize = scanArgRegTable(skipToArgReg(info, table), curOffs, &info);
-
-        RegMask    regMask = info.regMaskResult;
-        RegMask   iregMask = info.iregMaskResult;
-        ptrArgTP   argMask = info.argMaskResult;
-        ptrArgTP  iargMask = info.iargMaskResult;
-        unsigned   argHnum = info.argHnumResult;
-        PTR_CBYTE   argTab = info.argTabResult;
-
-        // Only synchronized methods and generic code that accesses
-        // the type context via "this" need to report "this".
-        // If its reported for other methods, its probably
-        // done incorrectly. So flag such cases.
-        _ASSERTE(info.thisPtrResult == REGI_NA ||
-                 pCodeInfo->GetMethodDesc()->IsSynchronized()   ||
-                 pCodeInfo->GetMethodDesc()->AcquiresInstMethodTableFromThis());
-
-
-        /* now report registers and arguments if we are not interrupted */
-
-        if  (willContinueExecution)
-        {
-
-            /* Report all live pointer registers */
-
-            CHK_AND_REPORT_REG(REGI_EDI, regMask & RM_EDI, iregMask & RM_EDI, Edi);
-            CHK_AND_REPORT_REG(REGI_ESI, regMask & RM_ESI, iregMask & RM_ESI, Esi);
-            CHK_AND_REPORT_REG(REGI_EBX, regMask & RM_EBX, iregMask & RM_EBX, Ebx);
-            CHK_AND_REPORT_REG(REGI_EBP, regMask & RM_EBP, iregMask & RM_EBP, Ebp);
-
-            /* Esp cant be reported */
-            _ASSERTE(!(regMask & RM_ESP));
-            /* No callee-trashed registers */
-            _ASSERTE(!(regMask & RM_CALLEE_TRASHED));
-            /* EBP can't be reported unless we have an EBP-less frame */
-            _ASSERTE(!(regMask & RM_EBP) || !(info.ebpFrame));
-
-            /* Report any pending pointer arguments */
-
-            if (argTab != 0)
-            {
-                unsigned    lowBits, stkOffs, argAddr, val;
 
-                // argMask does not fit in 32-bits
-                // thus arguments are reported via a table
-                // Both of these are very rare cases
-
-                do
-                {
-                    val = fastDecodeUnsigned(argTab);
-
-                    lowBits = val &  OFFSET_MASK;
-                    stkOffs = val & ~OFFSET_MASK;
-                    _ASSERTE((lowBits == 0) || (lowBits == byref_OFFSET_FLAG));
-
-                    argAddr = ESP + stkOffs;
-#ifdef _DEBUG
-                    if (dspPtr)
-                        printf("    Pushed %sptr arg at [ESP+%02XH]",
-                               lowBits ? "iptr " : "", stkOffs);
-#endif
-                    _ASSERTE(byref_OFFSET_FLAG == GC_CALL_INTERIOR);
-                    pCallBack(hCallBack, (OBJECTREF *)(size_t)argAddr, lowBits | CHECK_APP_DOMAIN
-                              DAC_ARG(DacSlotLocation(REGI_ESP, stkOffs, true)));
-                }
-                while(--argHnum);
-
-                _ASSERTE(info.argTabResult + info.argTabBytes == argTab);
-            }
-            else
-            {
-                unsigned argAddr = ESP;
-
-                while (!isZero(argMask))
-                {
-                    _ASSERTE(argHnum-- > 0);
-
-                    if  (toUnsigned(argMask) & 1)
-                    {
-                        bool     iptr    = false;
-
-                        if (toUnsigned(iargMask) & 1)
-                            iptr = true;
-#ifdef _DEBUG
-                        if (dspPtr)
-                            printf("    Pushed ptr arg at [ESP+%02XH]",
-                                   argAddr - ESP);
-#endif
-                        _ASSERTE(true == GC_CALL_INTERIOR);
-                        pCallBack(hCallBack, (OBJECTREF *)(size_t)argAddr, (int)iptr | CHECK_APP_DOMAIN
-                                  DAC_ARG(DacSlotLocation(REGI_ESP, argAddr - ESP, true)));
-                    }
-
-                    argMask >>= 1;
-                    iargMask >>= 1;
-                    argAddr  += 4;
-                }
+        // Adjust the sp.  From this pointer onwards pCurrentContext->Rsp is the caller sp.
+        pTargetCtx->Rsp = pSourceCtx->Rsp + RSPOffset;
 
-            }
+        // Retrieve the return address.
+        pTargetCtx->Rip = *(UINT_PTR*)((pTargetCtx->Rsp) - sizeof(UINT_PTR));
+    }
 
-        }
-        else
-        {
-            // Is "this" enregistered. If so, report it as we will need to
-            // release the monitor. Else, it is on the stack and will be
-            // reported below.
+    if (flag == UnwindCurrentStackFrame)
+    {
+        SyncRegDisplayToCurrentContext(pRD);
+        pRD->IsCallerContextValid = FALSE;
+        pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
+    }
+#else
+    PORTABILITY_ASSERT("EECodeManager::LightUnwindStackFrame is not implemented on this platform.");
+#endif
+}
+#endif // HAS_LIGHTUNWIND
 
-            // For partially interruptible code, info.thisPtrResult will be
-            // the last known location of "this". So the compiler needs to
-            // generate information which is correct at every point in the code,
-            // not just at call sites.
+#ifdef FEATURE_EH_FUNCLETS
+#ifdef TARGET_X86
+size_t EECodeManager::GetResumeSp( PCONTEXT  pContext )
+{
+    PCODE currentPc = PCODE(pContext->Eip);
 
-            if (info.thisPtrResult != REGI_NA)
-            {
-                // Synchronized methods on value types are not supported
-                _ASSERTE((regNumToMask(info.thisPtrResult) & info.iregMaskResult)== 0);
+    _ASSERTE(ExecutionManager::IsManagedCode(currentPc));
 
-                void * thisReg = getCalleeSavedReg(pContext, info.thisPtrResult);
-                pCallBack(hCallBack, (OBJECTREF *)thisReg, CHECK_APP_DOMAIN
-                          DAC_ARG(DacSlotLocation(info.thisPtrResult, 0, false)));
-            }
-        }
+    EECodeInfo codeInfo(currentPc);
 
-    } //info.interruptible
+    PTR_CBYTE methodStart = PTR_CBYTE(codeInfo.GetSavedMethodCode());
 
-    /* compute the argument base (reference point) */
+    GCInfoToken gcInfoToken = codeInfo.GetGCInfoToken();
+    PTR_VOID    methodInfoPtr = gcInfoToken.Info;
+    DWORD       curOffs = codeInfo.GetRelOffset();
 
-    unsigned    argBase;
+    CodeManStateBuf stateBuf;
 
-    if (info.ebpFrame)
-        argBase = EBP;
-    else
-        argBase = ESP + pushedSize;
+    stateBuf.hdrInfoSize = (DWORD)DecodeGCHdrInfo(gcInfoToken,
+                                                  curOffs,
+                                                  &stateBuf.hdrInfoBody);
 
-#if VERIFY_GC_TABLES
-    _ASSERTE(*castto(table, unsigned short *)++ == 0xBEEF);
-#endif
+    PTR_CBYTE table = dac_cast<PTR_CBYTE>(methodInfoPtr) + stateBuf.hdrInfoSize;
 
-    unsigned ptrAddr;
-    unsigned lowBits;
+    hdrInfo *info = &stateBuf.hdrInfoBody;
 
+    _ASSERTE(info->epilogOffs == hdrInfo::NOT_IN_EPILOG && info->prologOffs == hdrInfo::NOT_IN_PROLOG);
 
-    /* Process the untracked frame variable table */
+    bool isESPFrame = !info->ebpFrame && !info->doubleAlign;
 
-#if defined(FEATURE_EH_FUNCLETS)   // funclets
-    // Filters are the only funclet that run during the 1st pass, and must have
-    // both the leaf and the parent frame reported.  In order to avoid double
-    // reporting of the untracked variables, do not report them for the filter.
-    if (!pCodeInfo->GetJitManager()->IsFilterFunclet(pCodeInfo))
-#endif // FEATURE_EH_FUNCLETS
+    if (codeInfo.IsFunclet())
     {
-        count = info.untrackedCnt;
-        int lastStkOffs = 0;
-        while (count-- > 0)
-        {
-            int stkOffs = fastDecodeSigned(table);
-            stkOffs = lastStkOffs - stkOffs;
-            lastStkOffs = stkOffs;
-
-            _ASSERTE(0 == ~OFFSET_MASK % sizeof(void*));
-
-            lowBits  =   OFFSET_MASK & stkOffs;
-            stkOffs &=  ~OFFSET_MASK;
-
-            ptrAddr = argBase + stkOffs;
-            if (info.doubleAlign && stkOffs >= int(info.stackSize - sizeof(void*))) {
-                // We encode the arguments as if they were ESP based variables even though they aren't
-                // If this frame would have ben an ESP based frame,   This fake frame is one DWORD
-                // smaller than the real frame because it did not push EBP but the real frame did.
-                // Thus to get the correct EBP relative offset we have to adjust by info.stackSize-sizeof(void*)
-                ptrAddr = EBP + (stkOffs-(info.stackSize - sizeof(void*)));
-            }
-
-#ifdef  _DEBUG
-            if (dspPtr)
-            {
-                printf("    Untracked %s%s local at [E",
-                            (lowBits & pinned_OFFSET_FLAG) ? "pinned " : "",
-                            (lowBits & byref_OFFSET_FLAG)  ? "byref"   : "");
+        // Treat funclet's frame as ESP frame
+        isESPFrame = true;
+    }
 
-                int   dspOffs = ptrAddr;
-                char  frameType;
+    if (isESPFrame)
+    {
+        const size_t curESP = (size_t)(pContext->Esp);
+        return curESP + GetPushedArgSize(info, table, curOffs);
+    }
 
-                if (info.ebpFrame) {
-                    dspOffs   -= EBP;
-                    frameType  = 'B';
-                }
-                else {
-                    dspOffs   -= ESP;
-                    frameType  = 'S';
-                }
+    const size_t curEBP = (size_t)(pContext->Ebp);
+    return GetOutermostBaseFP(curEBP, info);
+}
+#endif // TARGET_X86
+#endif // FEATURE_EH_FUNCLETS
 
-                if (dspOffs < 0)
-                    printf("%cP-%02XH]: ", frameType, -dspOffs);
-                else
-                    printf("%cP+%02XH]: ", frameType, +dspOffs);
-            }
-#endif
+#ifndef FEATURE_EH_FUNCLETS
 
-            _ASSERTE((pinned_OFFSET_FLAG == GC_CALL_PINNED) &&
-                   (byref_OFFSET_FLAG  == GC_CALL_INTERIOR));
-            pCallBack(hCallBack, (OBJECTREF*)(size_t)ptrAddr, lowBits | CHECK_APP_DOMAIN
-                      DAC_ARG(DacSlotLocation(info.ebpFrame ? REGI_EBP : REGI_ESP,
-                                              info.ebpFrame ? EBP - ptrAddr : ptrAddr - ESP,
-                                              true)));
-        }
+/*****************************************************************************
+ *
+ *  Unwind the current stack frame, i.e. update the virtual register
+ *  set in pContext. This will be similar to the state after the function
+ *  returns back to caller (IP points to after the call, Frame and Stack
+ *  pointer has been reset, callee-saved registers restored (if UpdateAllRegs),
+ *  callee-unsaved registers are trashed.
+ *  Returns success of operation.
+ */
 
-    }
+bool EECodeManager::UnwindStackFrame(PREGDISPLAY     pContext,
+                                     EECodeInfo     *pCodeInfo,
+                                     unsigned        flags,
+                                     CodeManState   *pState)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SUPPORTS_DAC;
+    } CONTRACTL_END;
 
-#if VERIFY_GC_TABLES
-    _ASSERTE(*castto(table, unsigned short *)++ == 0xCAFE);
-#endif
+#ifdef TARGET_X86
+    bool updateAllRegs = flags & UpdateAllRegs;
 
-    /* Process the frame variable lifetime table */
-    count = info.varPtrTableSize;
+    // Address where the method has been interrupted
+    PCODE       breakPC = pContext->ControlPC;
+    _ASSERTE(PCODEToPINSTR(breakPC) == pCodeInfo->GetCodeAddress());
 
-    /* If we are not in the active method, we are currently pointing
-     * to the return address; at the return address stack variables
-     * can become dead if the call the last instruction of a try block
-     * and the return address is the jump around the catch block. Therefore
-     * we simply assume an offset inside of call instruction.
-     */
+    GCInfoToken gcInfoToken = pCodeInfo->GetGCInfoToken();
+    PTR_VOID    methodInfoPtr = gcInfoToken.Info;
+    DWORD       curOffs = pCodeInfo->GetRelOffset();
 
-    unsigned newCurOffs;
+    _ASSERTE(sizeof(CodeManStateBuf) <= sizeof(pState->stateBuf));
+    CodeManStateBuf * stateBuf = (CodeManStateBuf*)pState->stateBuf;
 
-    if (willContinueExecution)
-    {
-        newCurOffs = (flags & ActiveStackFrame) ?  curOffs    // after "call"
-                                                :  curOffs-1; // inside "call"
-    }
-    else
+    if (pState->dwIsSet == 0)
     {
-        /* However if ExecutionAborted, then this must be one of the
-         * ExceptionFrames. Handle accordingly
-         */
-        _ASSERTE(!(flags & AbortingCall) || !(flags & ActiveStackFrame));
+        /* Extract the necessary information from the info block header */
 
-        newCurOffs = (flags & AbortingCall) ? curOffs-1 // inside "call"
-                                            : curOffs;  // at faulting instr, or start of "try"
+        stateBuf->hdrInfoSize = (DWORD)DecodeGCHdrInfo(gcInfoToken,
+                                                          curOffs,
+                                                          &stateBuf->hdrInfoBody);
     }
 
-    ptrOffs    = 0;
-
-    while (count-- > 0)
-    {
-        int       stkOffs;
-        unsigned  begOffs;
-        unsigned  endOffs;
+    PTR_CBYTE table = dac_cast<PTR_CBYTE>(methodInfoPtr) + stateBuf->hdrInfoSize;
 
-        stkOffs = fastDecodeUnsigned(table);
-        begOffs  = ptrOffs + fastDecodeUnsigned(table);
-        endOffs  = begOffs + fastDecodeUnsigned(table);
+    hdrInfo * info = &stateBuf->hdrInfoBody;
 
-        _ASSERTE(0 == ~OFFSET_MASK % sizeof(void*));
+    info->isSpeculativeStackWalk = ((flags & SpeculativeStackwalk) != 0);
 
-        lowBits  =   OFFSET_MASK & stkOffs;
-        stkOffs &=  ~OFFSET_MASK;
+    return UnwindStackFrameX86(pContext,
+                               PTR_CBYTE(pCodeInfo->GetSavedMethodCode()),
+                               curOffs,
+                               info,
+                               table,
+                               IN_EH_FUNCLETS_COMMA(PTR_CBYTE(pCodeInfo->GetJitManager()->GetFuncletStartAddress(pCodeInfo)))
+                               IN_EH_FUNCLETS_COMMA(pCodeInfo->IsFunclet())
+                               updateAllRegs);
+#else // TARGET_X86
+    PORTABILITY_ASSERT("EECodeManager::UnwindStackFrame");
+    return false;
+#endif // _TARGET_???_
+}
 
-        if (info.ebpFrame) {
-            stkOffs = -stkOffs;
-            _ASSERTE(stkOffs < 0);
-        }
-        else {
-            _ASSERTE(stkOffs >= 0);
-        }
+/*****************************************************************************/
+#else // !FEATURE_EH_FUNCLETS
+/*****************************************************************************/
 
-        ptrAddr = argBase + stkOffs;
+bool EECodeManager::UnwindStackFrame(PREGDISPLAY     pContext,
+                                     EECodeInfo     *pCodeInfo,
+                                     unsigned        flags,
+                                     CodeManState   *pState)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+    } CONTRACTL_END;
 
-        /* Is this variable live right now? */
+    _ASSERTE(pCodeInfo != NULL);
 
-        if (newCurOffs >= begOffs)
-        {
-            if (newCurOffs <  endOffs)
-            {
-#ifdef  _DEBUG
-                if (dspPtr) {
-                    printf("    Frame %s%s local at [E",
-                           (lowBits & byref_OFFSET_FLAG) ? "byref "   : "",
-#ifndef FEATURE_EH_FUNCLETS
-                           (lowBits & this_OFFSET_FLAG)  ? "this-ptr" : "");
-#else
-                           (lowBits & pinned_OFFSET_FLAG)  ? "pinned" : "");
+#ifdef HAS_LIGHTUNWIND
+    if (flags & LightUnwind)
+    {
+        LightUnwindStackFrame(pContext, pCodeInfo, UnwindCurrentStackFrame);
+        return true;
+    }
 #endif
 
+    Thread::VirtualUnwindCallFrame(pContext, pCodeInfo);
+    return true;
+}
 
-                    int  dspOffs = ptrAddr;
-                    char frameType;
-
-                    if (info.ebpFrame) {
-                        dspOffs   -= EBP;
-                        frameType  = 'B';
-                    }
-                    else {
-                        dspOffs   -= ESP;
-                        frameType  = 'S';
-                    }
+/*****************************************************************************/
+#endif // FEATURE_EH_FUNCLETS
 
-                    if (dspOffs < 0)
-                        printf("%cP-%02XH]: ", frameType, -dspOffs);
-                    else
-                        printf("%cP+%02XH]: ", frameType, +dspOffs);
-                }
-#endif
+/*****************************************************************************/
 
-                unsigned flags = CHECK_APP_DOMAIN;
-#ifndef FEATURE_EH_FUNCLETS
-                // First  Bit : byref
-                // Second Bit : this
-                // The second bit means `this` not `pinned`. So we ignore it.
-                flags |= lowBits & byref_OFFSET_FLAG;
-#else
-                // First  Bit : byref
-                // Second Bit : pinned
-                // Both bits are valid
-                flags |= lowBits;
-#endif
+/* report args in 'msig' to the GC.
+   'argsStart' is start of the stack-based arguments
+   'varArgSig' describes the arguments
+   'ctx' has the GC reporting info
+*/
+void promoteVarArgs(PTR_BYTE argsStart, PTR_VASigCookie varArgSig, GCCONTEXT* ctx)
+{
+    WRAPPER_NO_CONTRACT;
 
-                _ASSERTE(byref_OFFSET_FLAG == GC_CALL_INTERIOR);
-                pCallBack(hCallBack, (OBJECTREF*)(size_t)ptrAddr, flags
-                          DAC_ARG(DacSlotLocation(info.ebpFrame ? REGI_EBP : REGI_ESP,
-                                          info.ebpFrame ? EBP - ptrAddr : ptrAddr - ESP,
-                                          true)));
-            }
-        }
-        // exit loop early if start of live range is beyond PC, as ranges are sorted by lower bound
-        else break;
+    SigTypeContext typeContext(varArgSig->classInst, varArgSig->methodInst);
+    MetaSig msig(varArgSig->signature,
+                 varArgSig->pModule,
+                 &typeContext);
 
-        ptrOffs  = begOffs;
-    }
+    PTR_BYTE pFrameBase = argsStart - TransitionBlock::GetOffsetOfArgs();
 
+    ArgIterator argit(&msig);
 
-#if VERIFY_GC_TABLES
-    _ASSERTE(*castto(table, unsigned short *)++ == 0xBABE);
+#ifdef TARGET_X86
+    // For the X86 target the JIT does not report any of the fixed args for a varargs method
+    // So we report the fixed args via the promoteArgs call below
+    bool skipFixedArgs = false;
+#else
+    // For other platforms the JITs do report the fixed args of a varargs method
+    // So we must tell promoteArgs to skip to the end of the fixed args
+    bool skipFixedArgs = true;
 #endif
 
-#ifdef FEATURE_EH_FUNCLETS   // funclets
-    //
-    // If we're in a funclet, we do not want to report the incoming varargs.  This is
-    // taken care of by the parent method and the funclet should access those arguments
-    // by way of the parent method's stack frame.
-    //
-    if(pCodeInfo->IsFunclet())
+    bool inVarArgs = false;
+
+    int argOffset;
+    while ((argOffset = argit.GetNextOffset()) != TransitionBlock::InvalidOffset)
     {
-        return true;
+        if (msig.GetArgProps().AtSentinel())
+            inVarArgs = true;
+
+        // if skipFixedArgs is false we report all arguments
+        //  otherwise we just report the varargs.
+        if (!skipFixedArgs || inVarArgs)
+        {
+            ArgDestination argDest(pFrameBase, argOffset, argit.GetArgLocDescForStructInRegs());
+            msig.GcScanRoots(&argDest, ctx->f, ctx->sc);
+        }
     }
-#endif // FEATURE_EH_FUNCLETS
+}
 
-    /* Are we a varargs function, if so we have to report all args
-       except 'this' (note that the GC tables created by the x86 jit
-       do not contain ANY arguments except 'this' (even if they
-       were statically declared */
+#ifndef DACCESS_COMPILE
+FCIMPL1(void, GCReporting::Register, GCFrame* frame)
+{
+    FCALL_CONTRACT;
 
-    if (info.varargs) {
-        LOG((LF_GCINFO, LL_INFO100, "Reporting incoming vararg GC refs\n"));
+    // Construct a GCFrame.
+    _ASSERTE(frame != NULL);
+    frame->Push(GetThread());
+}
+FCIMPLEND
 
-        PTR_BYTE argsStart;
+FCIMPL1(void, GCReporting::Unregister, GCFrame* frame)
+{
+    FCALL_CONTRACT;
 
-        if (info.ebpFrame || info.doubleAlign)
-            argsStart = PTR_BYTE((size_t)EBP) + 2* sizeof(void*);                 // pushed EBP and retAddr
-        else
-            argsStart = PTR_BYTE((size_t)argBase) + info.stackSize + sizeof(void*);   // ESP + locals + retAddr
+    // Destroy the GCFrame.
+    _ASSERTE(frame != NULL);
+    frame->Remove();
+}
+FCIMPLEND
+#endif // !DACCESS_COMPILE
 
-#if defined(_DEBUG) && !defined(DACCESS_COMPILE)
-        // Note that I really want to say hCallBack is a GCCONTEXT, but this is pretty close
-        extern void GcEnumObject(LPVOID pData, OBJECTREF *pObj, uint32_t flags);
-        _ASSERTE((void*) GcEnumObject == pCallBack);
-#endif
-        GCCONTEXT   *pCtx = (GCCONTEXT *) hCallBack;
+#ifndef USE_GC_INFO_DECODER
 
-        // For varargs, look up the signature using the varArgSig token passed on the stack
-        PTR_VASigCookie varArgSig = *PTR_PTR_VASigCookie(argsStart);
+/*****************************************************************************
+ *
+ *  Enumerate all live object references in that function using
+ *  the virtual register set.
+ *  Returns success of operation.
+ */
+
+bool EECodeManager::EnumGcRefs( PREGDISPLAY     pContext,
+                                EECodeInfo     *pCodeInfo,
+                                unsigned        flags,
+                                GCEnumCallback  pCallBack,
+                                LPVOID          hCallBack,
+                                DWORD           relOffsetOverride)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+    } CONTRACTL_END;
+
+    PTR_CBYTE methodStart = PTR_CBYTE(pCodeInfo->GetSavedMethodCode());
+    unsigned  curOffs = pCodeInfo->GetRelOffset();
+    GCInfoToken gcInfoToken = pCodeInfo->GetGCInfoToken();
 
-        promoteVarArgs(argsStart, varArgSig, pCtx);
+    if (relOffsetOverride != NO_OVERRIDE_OFFSET)
+    {
+        curOffs = relOffsetOverride;
     }
 
-    return true;
+    return ::EnumGcRefsX86(pContext,
+                           methodStart,
+                           curOffs,
+                           gcInfoToken,
+                           IN_EH_FUNCLETS_COMMA(PTR_CBYTE(pCodeInfo->GetJitManager()->GetFuncletStartAddress(pCodeInfo)))
+                           IN_EH_FUNCLETS_COMMA(pCodeInfo->IsFunclet())
+                           IN_EH_FUNCLETS_COMMA(pCodeInfo->GetJitManager()->IsFilterFunclet(pCodeInfo))
+                           flags,
+                           pCallBack,
+                           hCallBack);
 }
 
 #else // !USE_GC_INFO_DECODER
@@ -5266,17 +1465,15 @@ bool EECodeManager::EnumGcRefs( PREGDISPLAY     pRD,
     }
     else
     {
-        /* However if ExecutionAborted, then this must be one of the
-         * ExceptionFrames. Handle accordingly
-         */
-        _ASSERTE(!(flags & AbortingCall) || !(flags & ActiveStackFrame));
+        // Since we are aborting execution, we are either in a frame that actually faulted or in a throwing call.
+        // * We do not need to adjust in a leaf
+        // * A throwing call will have unreachable <brk> after it, thus GC info is the same as before the call.
+        // 
+        // Either way we do not need to adjust.
 
-        if (flags & AbortingCall)
-        {
-            curOffs--;
-            LOG((LF_GCINFO, LL_INFO1000, "Adjusted GC reporting offset due to flags ExecutionAborted && AbortingCall. Now reporting GC refs for %s at offset %04x.\n",
-                methodName, curOffs));
-        }
+        // NOTE: only fully interruptible methods may need to report anything here as without
+        //       exception handling all current local variables are already unreachable.
+        //       EnumerateLiveSlots will shortcircuit the partially interruptible case just a bit later.
     }
 
     // Check if we have been given an override value for relOffset
@@ -5450,7 +1647,6 @@ OBJECTREF EECodeManager::GetInstance( PREGDISPLAY    pContext,
     hdrInfo     info;
     unsigned    stackDepth;
     TADDR       taArgBase;
-    unsigned    count;
 
     /* Extract the necessary information from the info block header */
 
@@ -5509,7 +1705,7 @@ OBJECTREF EECodeManager::GetInstance( PREGDISPLAY    pContext,
     /* The 'this' pointer can never be located in the untracked table */
     /* as we only allow pinned and byrefs in the untracked table      */
 
-    count = info.untrackedCnt;
+    unsigned count = info.untrackedCnt;
     while (count-- > 0)
     {
         fastSkipSigned(table);
@@ -5754,8 +1950,6 @@ void * EECodeManager::GetGSCookieAddr(PREGDISPLAY     pContext,
         GC_NOTRIGGER;
     } CONTRACTL_END;
 
-    _ASSERTE(sizeof(CodeManStateBuf) <= sizeof(pState->stateBuf));
-
     GCInfoToken    gcInfoToken = pCodeInfo->GetGCInfoToken();
     unsigned       relOffset = pCodeInfo->GetRelOffset();
 
@@ -5767,6 +1961,8 @@ void * EECodeManager::GetGSCookieAddr(PREGDISPLAY     pContext,
 #endif
 
 #ifndef USE_GC_INFO_DECODER
+    _ASSERTE(sizeof(CodeManStateBuf) <= sizeof(pState->stateBuf));
+
     CodeManStateBuf * stateBuf = (CodeManStateBuf*)pState->stateBuf;
 
     /* Extract the necessary information from the info block header */
diff --git a/src/coreclr/vm/encee.cpp b/src/coreclr/vm/encee.cpp
index 3f29161cb4be..9b6c487c11a3 100644
--- a/src/coreclr/vm/encee.cpp
+++ b/src/coreclr/vm/encee.cpp
@@ -231,7 +231,7 @@ HRESULT EditAndContinueModule::ApplyEditAndContinue(
                     IfFailGo(E_INVALIDARG);
                 }
 
-                SetDynamicIL(token, (TADDR)(pLocalILMemory + dwMethodRVA), FALSE);
+                SetDynamicIL(token, (TADDR)(pLocalILMemory + dwMethodRVA));
 
                 // use module to resolve to method
                 MethodDesc *pMethod;
diff --git a/src/coreclr/vm/eventing/eventpipe/ds-rt-coreclr.h b/src/coreclr/vm/eventing/eventpipe/ds-rt-coreclr.h
index e6036353d8b9..fb6c0c3feeda 100644
--- a/src/coreclr/vm/eventing/eventpipe/ds-rt-coreclr.h
+++ b/src/coreclr/vm/eventing/eventpipe/ds-rt-coreclr.h
@@ -391,7 +391,11 @@ ds_rt_server_log_pause_message (void)
 	STATIC_CONTRACT_NOTHROW;
 
 	const char diagPortsName[] = "DiagnosticPorts";
-	CLRConfigNoCache diagPorts = CLRConfigNoCache::Get(diagPortsName);
+#ifdef HOST_WINDOWS
+    CLRConfigNoCache diagPorts = CLRConfigNoCache::Get(diagPortsName);
+#else
+    CLRConfigNoCache diagPorts = CLRConfigNoCache::Get(diagPortsName, /* noPrefix */ false, &PAL_getenv);
+#endif
 	LPCSTR ports = nullptr;
 	if (diagPorts.IsSet())
 	{
diff --git a/src/coreclr/vm/eventing/eventpipe/ep-rt-coreclr.cpp b/src/coreclr/vm/eventing/eventpipe/ep-rt-coreclr.cpp
index 913ef57fce50..8542b2d0a904 100644
--- a/src/coreclr/vm/eventing/eventpipe/ep-rt-coreclr.cpp
+++ b/src/coreclr/vm/eventing/eventpipe/ep-rt-coreclr.cpp
@@ -74,10 +74,6 @@ ep_rt_coreclr_walk_managed_stack_for_thread (
 	EP_ASSERT (thread != NULL);
 	EP_ASSERT (stack_contents != NULL);
 
-	// Calling into StackWalkFrames in preemptive mode violates the host contract,
-	// but this contract is not used on CoreCLR.
-	CONTRACT_VIOLATION (HostViolation);
-
 	// Before we call into StackWalkFrames we need to mark GC_ON_TRANSITIONS as FALSE
 	// because under GCStress runs (GCStress=0x3), a GC will be triggered for every transition,
 	// which will cause the GC to try to walk the stack while we are in the middle of walking the stack.
diff --git a/src/coreclr/vm/eventreporter.cpp b/src/coreclr/vm/eventreporter.cpp
index 2065ed5ea320..614c95bfbcce 100644
--- a/src/coreclr/vm/eventreporter.cpp
+++ b/src/coreclr/vm/eventreporter.cpp
@@ -287,7 +287,7 @@ void EventReporter::AddStackTrace(SString& s)
         COUNT_T curSize = m_Description.GetCount();
 
         // Truncate the buffer if we have exceeded the limit based upon the OS we are on
-        DWORD dwMaxSizeLimit = MAX_SIZE_EVENTLOG_ENTRY_STRING_WINVISTA;
+        DWORD dwMaxSizeLimit = MAX_SIZE_EVENTLOG_ENTRY_STRING;
         if (curSize >= dwMaxSizeLimit)
         {
             // Load the truncation message
diff --git a/src/coreclr/vm/eventreporter.h b/src/coreclr/vm/eventreporter.h
index 9b600e76cd4a..dced671f6db9 100644
--- a/src/coreclr/vm/eventreporter.h
+++ b/src/coreclr/vm/eventreporter.h
@@ -15,18 +15,11 @@
 #include "contract.h"
 #include "sstring.h"
 
-// Maximum size for a string in event log entry
-#define MAX_SIZE_EVENTLOG_ENTRY_STRING 0x8000 // decimal 32768
-
-// The (approx.) maximum size that Vista appears to allow. Post discussion with the OS event log team,
-// it has been identified that Vista has taken a breaking change in ReportEventW API implementation
-// without getting it publicly documented.
+// The (approx.) maximum size that EventLog appears to allow.
 //
-// An event entry comprises of string to be written and event header information. Prior to Vista,
-// 32K length strings were allowed and event header size was over it. Vista onwards, the total
-// permissible length of the string and event header became 32K, resulting in strings becoming
-// shorter in length. Hence, the change in size.
-#define MAX_SIZE_EVENTLOG_ENTRY_STRING_WINVISTA 0x7C62 // decimal 31842
+// An event entry comprises of string to be written and event header information.
+// The total permissible length of the string and event header is 32K.
+#define MAX_SIZE_EVENTLOG_ENTRY_STRING 0x7C62 // decimal 31842
 
 class EventReporter
 {
diff --git a/src/coreclr/vm/eventtrace.cpp b/src/coreclr/vm/eventtrace.cpp
index 89939e3d6679..1bac59f998fd 100644
--- a/src/coreclr/vm/eventtrace.cpp
+++ b/src/coreclr/vm/eventtrace.cpp
@@ -264,11 +264,6 @@ ETW::SamplingLog::EtwStackWalkStatus ETW::SamplingLog::GetCurrentThreadsCallStac
     }
     CONTRACTL_END;
 
-    // The stack walk performed below can cause allocations (thus entering the host). But
-    // this is acceptable, since we're not supporting the use of SQL/F1 profiling and
-    // full-blown ETW CLR stacks (which would be redundant).
-    PERMANENT_CONTRACT_VIOLATION(HostViolation, ReasonUnsupportedForSQLF1Profiling);
-
     m_FrameCount = 0;
     ETW::SamplingLog::EtwStackWalkStatus stackwalkStatus = SaveCurrentStack();
 
diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp
index 245909f7a72f..7437a7988ada 100644
--- a/src/coreclr/vm/excep.cpp
+++ b/src/coreclr/vm/excep.cpp
@@ -2889,7 +2889,7 @@ VOID DECLSPEC_NORETURN RealCOMPlusThrow(Object *exceptionObj)
     CONTRACTL_END;
 
     OBJECTREF throwable = ObjectToOBJECTREF(exceptionObj);
-    RealCOMPlusThrow(throwable, FALSE);
+    RealCOMPlusThrowWorker(throwable, FALSE);
 }
 #endif // USE_CHECKED_OBJECTREFS
 
@@ -3383,7 +3383,7 @@ BOOL StackTraceInfo::AppendElement(BOOL bAllowAllocMem, UINT_PTR currentIP, UINT
         }
         else if (!pCf->HasFaulted() && pStackTraceElem->ip != 0)
         {
-            pStackTraceElem->ip -= 1;
+            pStackTraceElem->ip -= STACKWALK_CONTROLPC_ADJUST_OFFSET;
             pStackTraceElem->flags |= STEF_IP_ADJUSTED;
         }
 
@@ -4585,13 +4585,6 @@ LONG InternalUnhandledExceptionFilter_Worker(
     STRESS_LOG2(LF_EH, LL_INFO10, "In InternalUnhandledExceptionFilter_Worker, Exception = %x, sp = %p\n",
                                     pExceptionInfo->ExceptionRecord->ExceptionCode, GetCurrentSP());
 
-    // If we can't enter the EE, done.
-    if (g_fForbidEnterEE)
-    {
-        LOG((LF_EH, LL_INFO100, "InternalUnhandledExceptionFilter_Worker: g_fForbidEnterEE is TRUE\n"));
-        return EXCEPTION_CONTINUE_SEARCH;
-    }
-
     // We don't do anything when this is called from an unmanaged thread.
     Thread *pThread = GetThreadNULLOk();
 
@@ -4623,18 +4616,6 @@ LONG InternalUnhandledExceptionFilter_Worker(
     }
 #endif
 
-    // This shouldn't be possible, but MSVC re-installs us... for now, just bail if this happens.
-    if (g_fNoExceptions)
-    {
-        return EXCEPTION_CONTINUE_SEARCH;
-    }
-
-    // Are we looking at a stack overflow here?
-    if ((pThread !=  NULL) && !pThread->DetermineIfGuardPagePresent())
-    {
-        g_fForbidEnterEE = true;
-    }
-
 #ifdef DEBUGGING_SUPPORTED
 
     // Mark that this exception has gone unhandled. At the moment only the debugger will
@@ -5533,8 +5514,6 @@ static LONG ThreadBaseExceptionFilter_Worker(PEXCEPTION_POINTERS pExceptionInfo,
     ThreadBaseExceptionFilterParam *pParam = (ThreadBaseExceptionFilterParam *) pvParam;
     UnhandledExceptionLocation location = pParam->location;
 
-    _ASSERTE(!g_fNoExceptions);
-
     Thread* pThread = GetThread();
 
 #ifdef _DEBUG
@@ -6292,9 +6271,6 @@ EXTERN_C void JIT_StackProbe_End();
 #ifdef FEATURE_EH_FUNCLETS
 
 #ifndef TARGET_X86
-EXTERN_C void JIT_MemSet_End();
-EXTERN_C void JIT_MemCpy_End();
-
 EXTERN_C void JIT_WriteBarrier_End();
 EXTERN_C void JIT_CheckedWriteBarrier_End();
 EXTERN_C void JIT_ByRefWriteBarrier_End();
@@ -6345,9 +6321,6 @@ bool IsIPInMarkedJitHelper(UINT_PTR uControlPc)
     if (GetEEFuncEntryPoint(name) <= uControlPc && uControlPc < GetEEFuncEntryPoint(name##_End)) return true;
 
 #ifndef TARGET_X86
-    CHECK_RANGE(JIT_MemSet)
-    CHECK_RANGE(JIT_MemCpy)
-
     CHECK_RANGE(JIT_WriteBarrier)
     CHECK_RANGE(JIT_CheckedWriteBarrier)
     CHECK_RANGE(JIT_ByRefWriteBarrier)
@@ -6563,7 +6536,6 @@ void HandleManagedFaultNew(EXCEPTION_RECORD* pExceptionRecord, CONTEXT* pContext
 #if defined(FEATURE_EH_FUNCLETS)
     *frame->GetGSCookiePtr() = GetProcessGSCookie();
 #endif // FEATURE_EH_FUNCLETS
-    pContext->ContextFlags |= CONTEXT_EXCEPTION_ACTIVE;
     frame->InitAndLink(pContext);
 
     Thread *pThread = GetThread();
@@ -6744,14 +6716,6 @@ VEH_ACTION WINAPI CLRVectoredExceptionHandlerPhase3(PEXCEPTION_POINTERS pExcepti
 
 VEH_ACTION WINAPI CLRVectoredExceptionHandler(PEXCEPTION_POINTERS pExceptionInfo)
 {
-    // It is not safe to execute code inside VM after we shutdown EE.  One example is DisablePreemptiveGC
-    // will block forever.
-    if (g_fForbidEnterEE)
-    {
-        return VEH_CONTINUE_SEARCH;
-    }
-
-
     //
     // DO NOT USE CONTRACTS HERE AS THIS ROUTINE MAY NEVER RETURN.  You can use
     // static contracts, but currently this is all WRAPPER_NO_CONTRACT.
@@ -7103,12 +7067,14 @@ VEH_ACTION WINAPI CLRVectoredExceptionHandlerPhase3(PEXCEPTION_POINTERS pExcepti
                 //
                 // On 64-bit, some additional work is required..
 #ifdef FEATURE_EH_FUNCLETS
+                pContext->ContextFlags &= ~CONTEXT_EXCEPTION_ACTIVE;
                 return VEH_EXECUTE_HANDLE_MANAGED_EXCEPTION;
 #endif // defined(FEATURE_EH_FUNCLETS)
             }
             else if (AdjustContextForVirtualStub(pExceptionRecord, pContext))
             {
 #ifdef FEATURE_EH_FUNCLETS
+                pContext->ContextFlags &= ~CONTEXT_EXCEPTION_ACTIVE;
                 return VEH_EXECUTE_HANDLE_MANAGED_EXCEPTION;
 #endif
             }
@@ -7439,11 +7405,9 @@ LONG WINAPI CLRVectoredExceptionHandlerShim(PEXCEPTION_POINTERS pExceptionInfo)
     // WARNING WARNING WARNING WARNING WARNING WARNING WARNING
     //
 
-    // If exceptions (or runtime) have been disabled, then simply return.
-    if (g_fForbidEnterEE || g_fNoExceptions)
-    {
-        return EXCEPTION_CONTINUE_SEARCH;
-    }
+#ifdef FEATURE_EH_FUNCLETS
+    pExceptionInfo->ContextRecord->ContextFlags |= CONTEXT_EXCEPTION_ACTIVE;
+#endif // FEATURE_EH_FUNCLETS
 
     // WARNING
     //
@@ -7689,35 +7653,6 @@ void CLRAddVectoredHandlers(void)
 #endif // !TARGET_UNIX
 }
 
-// This function removes the vectored exception and continue handler registration
-// from the OS.
-void CLRRemoveVectoredHandlers(void)
-{
-    CONTRACTL
-    {
-        NOTHROW;
-        GC_NOTRIGGER;
-        MODE_ANY;
-    }
-    CONTRACTL_END;
-#ifndef TARGET_UNIX
-
-    // Unregister the vectored exception handler if one is registered (and we can).
-    if (g_hVectoredExceptionHandler != NULL)
-    {
-        // Unregister the vectored exception handler
-        if (RemoveVectoredExceptionHandler(g_hVectoredExceptionHandler) == FALSE)
-        {
-            LOG((LF_EH, LL_INFO100, "CLRRemoveVectoredHandlers: RemoveVectoredExceptionHandler() failed.\n"));
-        }
-        else
-        {
-            LOG((LF_EH, LL_INFO100, "CLRRemoveVectoredHandlers: RemoveVectoredExceptionHandler() succeeded.\n"));
-        }
-    }
-#endif // !TARGET_UNIX
-}
-
 //
 // This does the work of the Unwind and Continue Hanlder inside the catch clause of that handler. The stack has not
 // been unwound when this is called. Keep that in mind when deciding where to put new code :)
@@ -7756,6 +7691,42 @@ void UnwindAndContinueRethrowHelperInsideCatch(Frame* pEntryFrame, Exception* pE
 #endif
 }
 
+#ifdef FEATURE_EH_FUNCLETS
+//
+// This function continues exception interception unwind after it crossed native frames using
+// standard EH / SEH.
+//
+VOID DECLSPEC_NORETURN ContinueExceptionInterceptionUnwind()
+{
+    STATIC_CONTRACT_THROWS;
+    STATIC_CONTRACT_GC_TRIGGERS;
+    STATIC_CONTRACT_MODE_ANY;
+
+    GCX_COOP();
+
+    Thread *pThread = GetThread();
+    ThreadExceptionState* pExState = pThread->GetExceptionState();
+    UINT_PTR uInterceptStackFrame  = 0;
+
+    pExState->GetDebuggerState()->GetDebuggerInterceptInfo(NULL, NULL,
+                                                        (PBYTE*)&uInterceptStackFrame,
+                                                        NULL, NULL);
+
+    PREPARE_NONVIRTUAL_CALLSITE(METHOD__EH__UNWIND_AND_INTERCEPT);
+    DECLARE_ARGHOLDER_ARRAY(args, 2);
+    args[ARGNUM_0] = PTR_TO_ARGHOLDER((ExInfo*)pExState->GetCurrentExceptionTracker());
+    args[ARGNUM_1] = PTR_TO_ARGHOLDER(uInterceptStackFrame);
+    pThread->IncPreventAbort();
+
+    //Ex.RhUnwindAndIntercept(throwable, &exInfo)
+    CRITICAL_CALLSITE;
+    CALL_MANAGED_METHOD_NORET(args)
+
+    UNREACHABLE();
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
 //
 // This does the work of the Unwind and Continue Hanlder after the catch clause of that handler. The stack has been
 // unwound by the time this is called. Keep that in mind when deciding where to put new code :)
@@ -7779,7 +7750,18 @@ VOID DECLSPEC_NORETURN UnwindAndContinueRethrowHelperAfterCatch(Frame* pEntryFra
 #ifdef FEATURE_EH_FUNCLETS
     if (g_isNewExceptionHandlingEnabled && !nativeRethrow)
     {
-        DispatchManagedException(orThrowable);
+        Thread *pThread = GetThread();
+        ThreadExceptionState* pExState = pThread->GetExceptionState();
+        ExInfo *pPrevExInfo = (ExInfo*)pExState->GetCurrentExceptionTracker();
+        if (pPrevExInfo != NULL && pPrevExInfo->m_DebuggerExState.GetDebuggerInterceptContext() != NULL)
+        {
+            ContinueExceptionInterceptionUnwind();
+            UNREACHABLE();
+        }
+        else
+        {
+            DispatchManagedException(orThrowable, /* preserveStackTrace */ false);
+        }
     }
     else
 #endif // FEATURE_EH_FUNCLETS
@@ -8848,11 +8830,9 @@ BOOL IsThrowableThreadAbortException(OBJECTREF oThrowable)
 #if defined(FEATURE_EH_FUNCLETS)
 PTR_ExceptionTrackerBase GetEHTrackerForPreallocatedException(OBJECTREF oPreAllocThrowable,
                                                           PTR_ExceptionTrackerBase pStartingEHTracker)
-#elif TARGET_X86
+#else
 PTR_ExInfo GetEHTrackerForPreallocatedException(OBJECTREF oPreAllocThrowable,
                                                 PTR_ExInfo pStartingEHTracker)
-#else
-#error Unsupported platform
 #endif
 {
     CONTRACTL
@@ -8870,11 +8850,9 @@ PTR_ExInfo GetEHTrackerForPreallocatedException(OBJECTREF oPreAllocThrowable,
     // Get the reference to the current exception tracker
 #if defined(FEATURE_EH_FUNCLETS)
     PTR_ExceptionTrackerBase pEHTracker = (pStartingEHTracker != NULL) ? pStartingEHTracker : GetThread()->GetExceptionState()->GetCurrentExceptionTracker();
-#elif TARGET_X86
+#else
     PTR_ExInfo pEHTracker = (pStartingEHTracker != NULL) ? pStartingEHTracker : GetThread()->GetExceptionState()->GetCurrentExceptionTracker();
-#else // !(HOST_64BIT || TARGET_X86)
-#error Unsupported platform
-#endif // HOST_64BIT
+#endif
 
     BOOL fFoundTracker = FALSE;
 
@@ -8949,13 +8927,10 @@ PTR_EHWatsonBucketTracker GetWatsonBucketTrackerForPreallocatedException(OBJECTR
  #if defined(FEATURE_EH_FUNCLETS)
         PTR_ExceptionTrackerBase pEHTracker = NULL;
         PTR_ExceptionTrackerBase pPreviousEHTracker = NULL;
-
-#elif TARGET_X86
+#else
         PTR_ExInfo pEHTracker = NULL;
         PTR_ExInfo pPreviousEHTracker = NULL;
-#else // !(HOST_64BIT || TARGET_X86)
-#error Unsupported platform
-#endif // HOST_64BIT
+#endif
 
         if (fStartSearchFromPreviousTracker)
         {
diff --git a/src/coreclr/vm/excep.h b/src/coreclr/vm/excep.h
index 9acd2b945c40..b84f0ef93e34 100644
--- a/src/coreclr/vm/excep.h
+++ b/src/coreclr/vm/excep.h
@@ -107,7 +107,6 @@ struct EE_ILEXCEPTION_CLAUSE;
 
 void InitializeExceptionHandling();
 void CLRAddVectoredHandlers(void);
-void CLRRemoveVectoredHandlers(void);
 void TerminateExceptionHandling();
 
 // Prototypes
@@ -518,11 +517,11 @@ EXCEPTION_HANDLER_DECL(COMPlusFrameHandlerRevCom);
 #endif // FEATURE_COMINTEROP
 
 // Pop off any SEH handlers we have registered below pTargetSP
-VOID __cdecl PopSEHRecords(LPVOID pTargetSP);
+VOID PopSEHRecords(LPVOID pTargetSP);
 
-#if defined(TARGET_X86) && defined(DEBUGGING_SUPPORTED)
+#ifdef DEBUGGING_SUPPORTED
 VOID UnwindExceptionTrackerAndResumeInInterceptionFrame(ExInfo* pExInfo, EHContext* context);
-#endif // TARGET_X86 && DEBUGGING_SUPPORTED
+#endif // DEBUGGING_SUPPORTED
 
 BOOL PopNestedExceptionRecords(LPVOID pTargetSP, BOOL bCheckForUnknownHandlers = FALSE);
 VOID PopNestedExceptionRecords(LPVOID pTargetSP, T_CONTEXT *pCtx, void *pSEH);
@@ -846,6 +845,10 @@ void ResetThreadAbortState(PTR_Thread pThread, CrawlFrame *pCf, StackFrame sfCur
 
 X86_ONLY(EXCEPTION_REGISTRATION_RECORD* GetNextCOMPlusSEHRecord(EXCEPTION_REGISTRATION_RECORD* pRec);)
 
+#ifdef FEATURE_EH_FUNCLETS
+VOID DECLSPEC_NORETURN ContinueExceptionInterceptionUnwind();
+#endif // FEATURE_EH_FUNCLETS
+
 #endif // !DACCESS_COMPILE
 
 #endif // __excep_h__
diff --git a/src/coreclr/vm/exceptionhandling.cpp b/src/coreclr/vm/exceptionhandling.cpp
index 4d28ed16a5e7..e4901303f186 100644
--- a/src/coreclr/vm/exceptionhandling.cpp
+++ b/src/coreclr/vm/exceptionhandling.cpp
@@ -19,6 +19,7 @@
 #include "corinfo.h"
 #include "exceptionhandlingqcalls.h"
 #include "exinfo.h"
+#include "configuration.h"
 
 #if defined(TARGET_X86)
 #define USE_CURRENT_CONTEXT_IN_FILTER
@@ -236,7 +237,7 @@ void InitializeExceptionHandling()
     // Initialize the lock used for synchronizing access to the stacktrace in the exception object
     g_StackTraceArrayLock.Init(LOCK_TYPE_DEFAULT, TRUE);
 
-    g_isNewExceptionHandlingEnabled = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableNewExceptionHandling) != 0;
+    g_isNewExceptionHandlingEnabled = Configuration::GetKnobBooleanValue(W("System.Runtime.LegacyExceptionHandling"), CLRConfig::EXTERNAL_LegacyExceptionHandling ) == 0;
 
 #ifdef TARGET_UNIX
     // Register handler of hardware exceptions like null reference in PAL
@@ -866,6 +867,24 @@ UINT_PTR ExceptionTracker::FinishSecondPass(
 
 void CleanUpForSecondPass(Thread* pThread, bool fIsSO, LPVOID MemoryStackFpForFrameChain, LPVOID MemoryStackFp);
 
+static void PopExplicitFrames(Thread *pThread, void *targetSp)
+{
+    Frame* pFrame = pThread->GetFrame();
+    while (pFrame < targetSp)
+    {
+        pFrame->ExceptionUnwind();
+        pFrame->Pop(pThread);
+        pFrame = pThread->GetFrame();
+    }
+
+    GCFrame* pGCFrame = pThread->GetGCFrame();
+    while (pGCFrame && pGCFrame < targetSp)
+    {
+        pGCFrame->Pop();
+        pGCFrame = pThread->GetGCFrame();
+    }
+}
+
 EXTERN_C EXCEPTION_DISPOSITION
 ProcessCLRExceptionNew(IN     PEXCEPTION_RECORD   pExceptionRecord,
                     IN     PVOID               pEstablisherFrame,
@@ -881,6 +900,19 @@ ProcessCLRExceptionNew(IN     PEXCEPTION_RECORD   pExceptionRecord,
     STATIC_CONTRACT_GC_TRIGGERS;
     STATIC_CONTRACT_THROWS;
 
+    Thread* pThread         = GetThread();
+
+    if (pThread->HasThreadStateNC(Thread::TSNC_ProcessedUnhandledException))
+    {
+        if ((pExceptionRecord->ExceptionFlags & EXCEPTION_UNWINDING))
+        {
+            GCX_COOP();
+            PopExplicitFrames(pThread, (void*)GetSP(pContextRecord));
+            ExInfo::PopExInfos(pThread, (void*)GetSP(pContextRecord));
+        }
+        return ExceptionContinueSearch;
+    }
+
 #ifndef HOST_UNIX
     if (!(pExceptionRecord->ExceptionFlags & EXCEPTION_UNWINDING))
     {
@@ -890,7 +922,6 @@ ProcessCLRExceptionNew(IN     PEXCEPTION_RECORD   pExceptionRecord,
             EEPOLICY_HANDLE_FATAL_ERROR(pExceptionRecord->ExceptionCode);
         }
 
-        Thread* pThread         = GetThread();
         ClrUnwindEx(pExceptionRecord,
                     (UINT_PTR)pThread,
                     INVALID_RESUME_ADDRESS,
@@ -899,15 +930,18 @@ ProcessCLRExceptionNew(IN     PEXCEPTION_RECORD   pExceptionRecord,
     else
     {
         GCX_COOP();
-        FrameWithCookie<FaultingExceptionFrame> frameWithCookie;
-        FaultingExceptionFrame *frame = &frameWithCookie;
-    #if defined(FEATURE_EH_FUNCLETS)
-        *frame->GetGSCookiePtr() = GetProcessGSCookie();
-    #endif // FEATURE_EH_FUNCLETS
-        frame->InitAndLink(pContextRecord);
-
-        OBJECTREF oref = ExceptionTracker::CreateThrowable(pExceptionRecord, FALSE);
-        DispatchManagedException(oref);
+        ThreadExceptionState* pExState = pThread->GetExceptionState();
+        ExInfo *pPrevExInfo = (ExInfo*)pExState->GetCurrentExceptionTracker();
+        if (pPrevExInfo != NULL && pPrevExInfo->m_DebuggerExState.GetDebuggerInterceptContext() != NULL)
+        {
+            ContinueExceptionInterceptionUnwind();
+            UNREACHABLE();
+        }
+        else
+        {
+            OBJECTREF oref = ExceptionTracker::CreateThrowable(pExceptionRecord, FALSE);
+            DispatchManagedException(oref, pContextRecord, /* preserveStackTrace */ false);
+        }
     }
 #endif // !HOST_UNIX
     EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, _T("SEH exception leaked into managed code"));
@@ -931,8 +965,7 @@ ProcessCLRException(IN     PEXCEPTION_RECORD   pExceptionRecord,
 
     if (g_isNewExceptionHandlingEnabled)
     {
-        ProcessCLRExceptionNew(pExceptionRecord, pEstablisherFrame, pContextRecord, pDispatcherContext);
-        UNREACHABLE();
+        return ProcessCLRExceptionNew(pExceptionRecord, pEstablisherFrame, pContextRecord, pDispatcherContext);
     }
 
     // We must preserve this so that GCStress=4 eh processing doesnt kill last error.
@@ -3567,6 +3600,7 @@ void ExceptionTracker::PopTrackerIfEscaping(
     }
     CONTRACTL_END;
 
+    _ASSERTE(!g_isNewExceptionHandlingEnabled);
     Thread*                 pThread  = GetThread();
     ThreadExceptionState*   pExState = pThread->GetExceptionState();
     ExceptionTracker*       pTracker = (ExceptionTracker*)pExState->m_pCurrentTracker;
@@ -3615,6 +3649,11 @@ void ExceptionTracker::PopTrackers(
     }
     CONTRACTL_END;
 
+    if (g_isNewExceptionHandlingEnabled)
+    {
+        return;
+    }
+
     Thread*             pThread     = GetThreadNULLOk();
     ExceptionTracker*   pTracker    = (pThread ? (ExceptionTracker*)pThread->GetExceptionState()->m_pCurrentTracker : NULL);
 
@@ -4271,9 +4310,12 @@ EXCEPTION_DISPOSITION ClrDebuggerDoUnwindAndIntercept(X86_FIRST_ARG(EXCEPTION_RE
     {
         GCX_COOP();
 
+        ExInfo* pExInfo = (ExInfo*)pExState->GetCurrentExceptionTracker();
+        _ASSERTE(pExInfo != NULL);
+
         PREPARE_NONVIRTUAL_CALLSITE(METHOD__EH__UNWIND_AND_INTERCEPT);
         DECLARE_ARGHOLDER_ARRAY(args, 2);
-        args[ARGNUM_0] = PTR_TO_ARGHOLDER(pExState->GetCurrentExceptionTracker());
+        args[ARGNUM_0] = PTR_TO_ARGHOLDER(pExInfo);
         args[ARGNUM_1] = PTR_TO_ARGHOLDER(uInterceptStackFrame);
         pThread->IncPreventAbort();
 
@@ -4930,9 +4972,13 @@ VOID DECLSPEC_NORETURN DispatchManagedException(PAL_SEHException& ex, bool isHar
 {
     if (g_isNewExceptionHandlingEnabled)
     {
+        if (!isHardwareException)
+        {
+            RtlCaptureContext(ex.GetContextRecord());
+        }
         GCX_COOP();
         OBJECTREF throwable = ExceptionTracker::CreateThrowable(ex.GetExceptionRecord(), FALSE);
-        DispatchManagedException(throwable);
+        DispatchManagedException(throwable, ex.GetContextRecord());
     }
 
     do
@@ -5406,7 +5452,7 @@ BOOL HandleHardwareException(PAL_SEHException* ex)
     if (ex->GetExceptionRecord()->ExceptionCode != STATUS_BREAKPOINT && ex->GetExceptionRecord()->ExceptionCode != STATUS_SINGLE_STEP)
     {
         // A hardware exception is handled only if it happened in a jitted code or
-        // in one of the JIT helper functions (JIT_MemSet, ...)
+        // in one of the JIT helper functions
         PCODE controlPc = GetIP(ex->GetContextRecord());
         if (ExecutionManager::IsManagedCode(controlPc) && IsGcMarker(ex->GetContextRecord(), ex->GetExceptionRecord()))
         {
@@ -5544,7 +5590,24 @@ BOOL HandleHardwareException(PAL_SEHException* ex)
 
 #endif // TARGET_UNIX
 
-VOID DECLSPEC_NORETURN DispatchManagedException(OBJECTREF throwable, bool preserveStackTrace)
+void FirstChanceExceptionNotification()
+{
+#ifndef TARGET_UNIX
+    if (IsDebuggerPresent())
+    {
+        PAL_TRY(VOID *, unused, NULL)
+        {
+            RaiseException(EXCEPTION_COMPLUS, 0, 0, NULL);
+        }
+        PAL_EXCEPT(EXCEPTION_EXECUTE_HANDLER)
+        {
+        }
+        PAL_ENDTRY;
+    }
+#endif // TARGET_UNIX
+}
+
+VOID DECLSPEC_NORETURN DispatchManagedException(OBJECTREF throwable, CONTEXT* pExceptionContext, bool preserveStackTrace)
 {
     STATIC_CONTRACT_THROWS;
     STATIC_CONTRACT_GC_TRIGGERS;
@@ -5554,13 +5617,15 @@ VOID DECLSPEC_NORETURN DispatchManagedException(OBJECTREF throwable, bool preser
 
    _ASSERTE(IsException(throwable->GetMethodTable()));
 
+    Thread *pThread = GetThread();
+
     if (preserveStackTrace)
     {
+        pThread->IncPreventAbort();
         ExceptionPreserveStackTrace(throwable);
+        pThread->DecPreventAbort();
     }
 
-    Thread *pThread = GetThread();
-
     ULONG_PTR hr = GetHRFromThrowable(throwable);
 
     EXCEPTION_RECORD exceptionRecord;
@@ -5570,10 +5635,7 @@ VOID DECLSPEC_NORETURN DispatchManagedException(OBJECTREF throwable, bool preser
     exceptionRecord.NumberParameters = MarkAsThrownByUs(exceptionRecord.ExceptionInformation, hr);
     exceptionRecord.ExceptionRecord = NULL;
 
-    CONTEXT exceptionContext;
-    RtlCaptureContext(&exceptionContext);
-
-    ExInfo exInfo(pThread, &exceptionRecord, &exceptionContext, ExKind::Throw);
+    ExInfo exInfo(pThread, &exceptionRecord, pExceptionContext, ExKind::Throw);
 
     if (pThread->IsAbortInitiated () && IsExceptionOfType(kThreadAbortException,&throwable))
     {
@@ -5593,6 +5655,8 @@ VOID DECLSPEC_NORETURN DispatchManagedException(OBJECTREF throwable, bool preser
     args[ARGNUM_0] = OBJECTREF_TO_ARGHOLDER(throwable);
     args[ARGNUM_1] = PTR_TO_ARGHOLDER(&exInfo);
 
+    FirstChanceExceptionNotification();
+
     pThread->IncPreventAbort();
 
     //Ex.RhThrowEx(throwable, &exInfo)
@@ -5605,6 +5669,19 @@ VOID DECLSPEC_NORETURN DispatchManagedException(OBJECTREF throwable, bool preser
     UNREACHABLE();
 }
 
+VOID DECLSPEC_NORETURN DispatchManagedException(OBJECTREF throwable, bool preserveStackTrace)
+{
+    STATIC_CONTRACT_THROWS;
+    STATIC_CONTRACT_GC_TRIGGERS;
+    STATIC_CONTRACT_MODE_COOPERATIVE;
+
+    CONTEXT exceptionContext;
+    RtlCaptureContext(&exceptionContext);
+
+    DispatchManagedException(throwable, &exceptionContext, preserveStackTrace);
+    UNREACHABLE();
+}
+
 VOID DECLSPEC_NORETURN DispatchManagedException(RuntimeExceptionKind reKind)
 {
     STATIC_CONTRACT_THROWS;
@@ -6076,7 +6153,7 @@ void CleanUpForSecondPass(Thread* pThread, bool fIsSO, LPVOID MemoryStackFpForFr
     // Instead, we rely on the END_SO_TOLERANT_CODE macro to call ClearExceptionStateAfterSO().  Of course,
     // we may leak in the UMThunkStubCommon() case where we don't have this macro lower on the stack
     // (stack grows up).
-    if (!fIsSO)
+    if (!fIsSO && !g_isNewExceptionHandlingEnabled)
     {
         ExceptionTracker::PopTrackerIfEscaping(MemoryStackFp);
     }
@@ -6566,6 +6643,13 @@ bool ExceptionTracker::IsInStackRegionUnwoundBySpecifiedException(CrawlFrame * p
 
     // Remember that sfLowerBound and sfUpperBound are in the "OS format".
     // Refer to the comment for CallerStackFrame for more information.
+
+    if (g_isNewExceptionHandlingEnabled)
+    {
+        // The new exception handling sets the ranges always to the SP of the unwound frame
+        return (sfLowerBound < csfToCheck) && (csfToCheck <= sfUpperBound);
+    }
+
 #ifndef STACK_RANGE_BOUNDS_ARE_CALLER_SP
     if ((sfLowerBound < csfToCheck) && (csfToCheck <= sfUpperBound))
 #else // !STACK_RANGE_BOUNDS_ARE_CALLER_SP
@@ -7488,53 +7572,54 @@ void MarkInlinedCallFrameAsEHHelperCall(Frame* pFrame)
     pInlinedCallFrame->m_Datum = (PTR_NDirectMethodDesc)((TADDR)pInlinedCallFrame->m_Datum | (TADDR)InlinedCallFrameMarker::ExceptionHandlingHelper);
 }
 
+static TADDR GetSpForDiagnosticReporting(REGDISPLAY *pRD)
+{
+#ifdef ESTABLISHER_FRAME_ADDRESS_IS_CALLER_SP
+    return CallerStackFrame::FromRegDisplay(pRD).SP;
+#else
+    return GetSP(pRD->pCurrentContext);
+#endif
+}
+
 extern "C" void QCALLTYPE AppendExceptionStackFrame(QCall::ObjectHandleOnStack exceptionObj, SIZE_T ip, SIZE_T sp, int flags, ExInfo *pExInfo)
 {
     QCALL_CONTRACT;
     
     BEGIN_QCALL;
-    GCX_COOP();
 
     Thread* pThread = GET_THREAD();
-    Frame* pFrame = pThread->GetFrame();
-    MarkInlinedCallFrameAsFuncletCall(pFrame);
 
-    bool canAllocateMemory = !(exceptionObj.Get() == CLRException::GetPreallocatedOutOfMemoryException()) &&
-                             !(exceptionObj.Get() == CLRException::GetPreallocatedStackOverflowException());
+    {
+        GCX_COOP();
+
+        Frame* pFrame = pThread->GetFrame();
+        MarkInlinedCallFrameAsEHHelperCall(pFrame);
 
-    MethodDesc *pMD = pExInfo->m_frameIter.m_crawl.GetFunction();
+        bool canAllocateMemory = !(exceptionObj.Get() == CLRException::GetPreallocatedOutOfMemoryException()) &&
+                                !(exceptionObj.Get() == CLRException::GetPreallocatedStackOverflowException());
+
+        MethodDesc *pMD = pExInfo->m_frameIter.m_crawl.GetFunction();
 #if _DEBUG
-    EECodeInfo codeInfo(ip);
-    _ASSERTE(codeInfo.IsValid());
-    _ASSERTE(pMD == codeInfo.GetMethodDesc());
+        EECodeInfo codeInfo(ip);
+        _ASSERTE(codeInfo.IsValid());
+        _ASSERTE(pMD == codeInfo.GetMethodDesc());
 #endif // _DEBUG
 
-    pExInfo->m_StackTraceInfo.AppendElement(canAllocateMemory, ip, sp, pMD, &pExInfo->m_frameIter.m_crawl);
-    pExInfo->m_StackTraceInfo.SaveStackTrace(canAllocateMemory, pExInfo->m_hThrowable, /*bReplaceStack*/FALSE, /*bSkipLastElement*/FALSE);
-    if (!pExInfo->DeliveredFirstChanceNotification())
-    {
-        ExceptionNotifications::DeliverFirstChanceNotification();
+        pExInfo->m_StackTraceInfo.AppendElement(canAllocateMemory, ip, sp, pMD, &pExInfo->m_frameIter.m_crawl);
+        pExInfo->m_StackTraceInfo.SaveStackTrace(canAllocateMemory, pExInfo->m_hThrowable, /*bReplaceStack*/FALSE, /*bSkipLastElement*/FALSE);
     }
 
-    END_QCALL;
-}
+    // Notify the debugger that we are on the first pass for a managed exception.
+    // Note that this callback is made for every managed frame.
+    TADDR spForDebugger = GetSpForDiagnosticReporting(pExInfo->m_frameIter.m_crawl.GetRegisterSet());
+    EEToDebuggerExceptionInterfaceWrapper::FirstChanceManagedException(pThread, ip, spForDebugger);
 
-static void PopExplicitFrames(Thread *pThread, void *targetSp)
-{
-    Frame* pFrame = pThread->GetFrame();
-    while (pFrame < targetSp)
+    if (!pExInfo->DeliveredFirstChanceNotification())
     {
-        pFrame->ExceptionUnwind();
-        pFrame->Pop(pThread);
-        pFrame = pThread->GetFrame();
+        ExceptionNotifications::DeliverFirstChanceNotification();
     }
 
-    GCFrame* pGCFrame = pThread->GetGCFrame();
-    while (pGCFrame && pGCFrame < targetSp)
-    {
-        pGCFrame->Pop();
-        pGCFrame = pThread->GetGCFrame();
-    }
+    END_QCALL;
 }
 
 UINT_PTR GetEstablisherFrame(REGDISPLAY* pvRegDisplay, ExInfo* exInfo)
@@ -7563,15 +7648,6 @@ UINT_PTR GetEstablisherFrame(REGDISPLAY* pvRegDisplay, ExInfo* exInfo)
 #endif        
 }
 
-static TADDR GetSpForDiagnosticReporting(REGDISPLAY *pRD)
-{
-#ifdef ESTABLISHER_FRAME_ADDRESS_IS_CALLER_SP
-    return CallerStackFrame::FromRegDisplay(pRD).SP;
-#else
-    return GetSP(pRD->pCurrentContext);
-#endif
-}
-
 extern "C" void * QCALLTYPE CallCatchFunclet(QCall::ObjectHandleOnStack exceptionObj, BYTE* pHandlerIP, REGDISPLAY* pvRegDisplay, ExInfo* exInfo)
 {
     QCALL_CONTRACT;
@@ -7637,6 +7713,7 @@ extern "C" void * QCALLTYPE CallCatchFunclet(QCall::ObjectHandleOnStack exceptio
     BOOL fIntercepted = pThread->GetExceptionState()->GetFlags()->DebuggerInterceptInfo();
     if (fIntercepted)
     {
+        _ASSERTE(pHandlerIP == NULL);
         // retrieve the interception information
         MethodDesc *pInterceptMD = NULL;
         StackFrame sfInterceptStackFrame;
@@ -7644,18 +7721,24 @@ extern "C" void * QCALLTYPE CallCatchFunclet(QCall::ObjectHandleOnStack exceptio
         ULONG_PTR   ulRelOffset;
 
         pThread->GetExceptionState()->GetDebuggerState()->GetDebuggerInterceptInfo(&pInterceptMD, NULL, (PBYTE*)&(sfInterceptStackFrame.SP), &ulRelOffset, NULL);
+        if (sfInterceptStackFrame.SP == GetSP(pvRegDisplay->pCurrentContext))
+        {
+            PCODE pStartAddress = pInterceptMD->GetNativeCode();
 
-        PCODE pStartAddress = pInterceptMD->GetNativeCode();
+            EECodeInfo codeInfo(pStartAddress);
+            _ASSERTE(codeInfo.IsValid());
 
-        EECodeInfo codeInfo(pStartAddress);
-        _ASSERTE(codeInfo.IsValid());
-
-        // Note that the value returned for ulRelOffset is actually the offset,
-        // so we need to adjust it to get the actual IP.
-        _ASSERTE(FitsIn<DWORD>(ulRelOffset));
-        uResumePC = codeInfo.GetJitManager()->GetCodeAddressForRelOffset(codeInfo.GetMethodToken(), static_cast<DWORD>(ulRelOffset));
+            // Note that the value returned for ulRelOffset is actually the offset,
+            // so we need to adjust it to get the actual IP.
+            _ASSERTE(FitsIn<DWORD>(ulRelOffset));
+            uResumePC = codeInfo.GetJitManager()->GetCodeAddressForRelOffset(codeInfo.GetMethodToken(), static_cast<DWORD>(ulRelOffset));
 
-        SetIP(pvRegDisplay->pCurrentContext, uResumePC);
+            SetIP(pvRegDisplay->pCurrentContext, uResumePC);
+        }
+        else
+        {
+            fIntercepted = FALSE;
+        }
     }
 #endif // DEBUGGING_SUPPORTED
 
@@ -7771,14 +7854,16 @@ extern "C" void QCALLTYPE ResumeAtInterceptionLocation(REGDISPLAY* pvRegDisplay)
     MarkInlinedCallFrameAsFuncletCall(pFrame);
     
     UINT_PTR targetSp = GetSP(pvRegDisplay->pCurrentContext);
+    ExInfo *pExInfo = (PTR_ExInfo)pThread->GetExceptionState()->GetCurrentExceptionTracker();
+
+    pExInfo->m_ScannedStackRange.ExtendUpperBound(targetSp);
+
     PopExplicitFrames(pThread, (void*)targetSp);
 
     // This must be done before we pop the ExInfos.
     BOOL fIntercepted = pThread->GetExceptionState()->GetFlags()->DebuggerInterceptInfo();
     _ASSERTE(fIntercepted);
 
-    ExInfo::PopExInfos(pThread, (void*)targetSp);
-
     // retrieve the interception information
     MethodDesc *pInterceptMD = NULL;
     StackFrame sfInterceptStackFrame;
@@ -7787,6 +7872,8 @@ extern "C" void QCALLTYPE ResumeAtInterceptionLocation(REGDISPLAY* pvRegDisplay)
 
     pThread->GetExceptionState()->GetDebuggerState()->GetDebuggerInterceptInfo(&pInterceptMD, NULL, (PBYTE*)&(sfInterceptStackFrame.SP), &ulRelOffset, NULL);
 
+    ExInfo::PopExInfos(pThread, (void*)targetSp);
+
     PCODE pStartAddress = pInterceptMD->GetNativeCode();
 
     EECodeInfo codeInfo(pStartAddress);
@@ -7909,7 +7996,7 @@ struct ExtendedEHClauseEnumerator : EH_CLAUSE_ENUMERATOR
     unsigned EHCount;
 };
 
-extern "C" BOOL QCALLTYPE EHEnumInitFromStackFrameIterator(StackFrameIterator *pFrameIter, BYTE** pMethodStartAddress, EH_CLAUSE_ENUMERATOR * pEHEnum)
+extern "C" BOOL QCALLTYPE EHEnumInitFromStackFrameIterator(StackFrameIterator *pFrameIter, IJitManager::MethodRegionInfo* pMethodRegionInfo, EH_CLAUSE_ENUMERATOR * pEHEnum)
 {
     QCALL_CONTRACT;
 
@@ -7923,7 +8010,7 @@ extern "C" BOOL QCALLTYPE EHEnumInitFromStackFrameIterator(StackFrameIterator *p
 
     IJitManager* pJitMan = pFrameIter->m_crawl.GetJitManager();
     const METHODTOKEN& MethToken = pFrameIter->m_crawl.GetMethodToken();
-    *pMethodStartAddress = (BYTE*)pJitMan->JitTokenToStartAddress(MethToken);
+    pJitMan->JitTokenToMethodRegionInfo(MethToken, pMethodRegionInfo);
     pExtendedEHEnum->EHCount = pJitMan->InitializeEHEnumeration(MethToken, pEHEnum);
 
     END_QCALL;
@@ -8022,7 +8109,7 @@ static BOOL CheckExceptionInterception(StackFrameIterator* pStackFrameIterator,
             reinterpret_cast<PBYTE *>(&(sfInterceptStackFrame.SP)),
             NULL, NULL);
 
-        TADDR spForDebugger = GetSpForDiagnosticReporting(pStackFrameIterator->m_crawl.GetRegisterSet());
+        TADDR spForDebugger = GetRegdisplaySP(pStackFrameIterator->m_crawl.GetRegisterSet());
 
         if ((pExInfo->m_passNumber == 1) ||
             ((pInterceptMD == pMD) && (sfInterceptStackFrame == spForDebugger)))
@@ -8041,7 +8128,6 @@ static void NotifyExceptionPassStarted(StackFrameIterator *pThis, Thread *pThrea
         GCX_COOP();
         pThread->SafeSetThrowables(pExInfo->m_exception);
         EEToProfilerExceptionInterfaceWrapper::ExceptionThrown(pThread);
-        UpdatePerformanceMetrics(&pThis->m_crawl, false, ((uint8_t)pExInfo->m_kind & (uint8_t)ExKind::RethrowFlag) == 0);
     }
     else // pExInfo->m_passNumber == 2
     {
@@ -8110,10 +8196,6 @@ static void NotifyFunctionEnter(StackFrameIterator *pThis, Thread *pThread, ExIn
             EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionLeave(pExInfo->m_pMDToReportFunctionLeave);
         }
         EEToProfilerExceptionInterfaceWrapper::ExceptionSearchFunctionEnter(pMD);
-        // Notify the debugger that we are on the first pass for a managed exception.
-        // Note that this callback is made for every managed frame.
-        TADDR spForDebugger = GetSpForDiagnosticReporting(pThis->m_crawl.GetRegisterSet());
-        EEToDebuggerExceptionInterfaceWrapper::FirstChanceManagedException(pThread, GetControlPC(pThis->m_crawl.GetRegisterSet()), spForDebugger);
     }
     else
     {
@@ -8144,28 +8226,21 @@ extern "C" bool QCALLTYPE SfiInit(StackFrameIterator* pThis, CONTEXT* pStackwalk
     // just clear the thread state.
     pThread->ResetThrowControlForThread();
 
-    // Skip the SfiInit pinvoke frame
-    pFrame = pThread->GetFrame()->PtrNextFrame();
+    pFrame = pExInfo->m_pInitialFrame;
 
     NotifyExceptionPassStarted(pThis, pThread, pExInfo);
 
-    if (pFrame == FRAME_TOP)
-    {
-        // There are no managed frames on the stack, fail fast and report unhandled exception
-        LONG disposition = InternalUnhandledExceptionFilter_Worker((EXCEPTION_POINTERS *)&pExInfo->m_ptrs);
-#ifdef HOST_WINDOWS
-        CreateCrashDumpIfEnabled(/* fSOException */ FALSE);
-        RaiseFailFastException(pExInfo->m_ptrs.ExceptionRecord, pExInfo->m_ptrs.ContextRecord, 0);
-#else
-        CrashDumpAndTerminateProcess(pExInfo->m_ExceptionCode);
-#endif
-    }
-
     REGDISPLAY* pRD = &pExInfo->m_regDisplay;
     pThread->FillRegDisplay(pRD, pStackwalkCtx);
 
     new (pThis) StackFrameIterator();
-    result = pThis->Init(pThread, pFrame, pRD, THREAD_EXECUTING_MANAGED_CODE) != FALSE;
+    result = pThis->Init(pThread, pFrame, pRD, THREAD_EXECUTING_MANAGED_CODE | UNWIND_FLOATS) != FALSE;
+
+    if (result && (pExInfo->m_passNumber == 1))
+    {
+        GCX_COOP();
+        UpdatePerformanceMetrics(&pThis->m_crawl, false, ((uint8_t)pExInfo->m_kind & (uint8_t)ExKind::RethrowFlag) == 0);
+    }
 
     // Walk the stack until it finds the first managed method
     while (result && pThis->GetFrameState() != StackFrameIterator::SFITER_FRAMELESS_METHOD)
@@ -8184,6 +8259,7 @@ extern "C" bool QCALLTYPE SfiInit(StackFrameIterator* pThis, CONTEXT* pStackwalk
                         !(pExInfo->m_exception == CLRException::GetPreallocatedStackOverflowException());
 
                     pExInfo->m_StackTraceInfo.AppendElement(canAllocateMemory, NULL, GetRegdisplaySP(pExInfo->m_frameIter.m_crawl.GetRegisterSet()), pMD, &pExInfo->m_frameIter.m_crawl);
+                    pExInfo->m_StackTraceInfo.SaveStackTrace(canAllocateMemory, pExInfo->m_hThrowable, /*bReplaceStack*/FALSE, /*bSkipLastElement*/FALSE);
 
 #if defined(DEBUGGING_SUPPORTED)
                     if (NotifyDebuggerOfStub(pThread, pFrame))
@@ -8197,6 +8273,22 @@ extern "C" bool QCALLTYPE SfiInit(StackFrameIterator* pThis, CONTEXT* pStackwalk
                 }
             }
         }
+        else // pass number 2
+        {
+            if (pThis->GetFrameState() == StackFrameIterator::SFITER_SKIPPED_FRAME_FUNCTION)
+            {
+                // Update context pointers using the skipped frame. This is needed when exception handling continues
+                // from ProcessCLRExceptionNew, since the RtlUnwind doesn't maintain context pointers.
+                // We explicitly don't do that for inlined frames as it would modify the PC/SP to point to
+                // a slightly different location in the managed code calling the pinvoke and the inlined
+                // call frame doesn't update the context pointers anyways.
+                Frame *pSkippedFrame = pThis->m_crawl.GetFrame();
+                if (pSkippedFrame->NeedsUpdateRegDisplay() && (pSkippedFrame->GetVTablePtr() != InlinedCallFrame::GetMethodFrameVPtr()))
+                {
+                    pSkippedFrame->UpdateRegDisplay(pThis->m_crawl.GetRegisterSet());
+                }
+            }
+        }
         StackWalkAction retVal = pThis->Next();
         result = (retVal != SWA_FAILED);
     }
@@ -8214,7 +8306,7 @@ extern "C" bool QCALLTYPE SfiInit(StackFrameIterator* pThis, CONTEXT* pStackwalk
     if (result)
     {
         TADDR controlPC = pThis->m_crawl.GetRegisterSet()->ControlPC;
-        if (!pThis->m_crawl.HasFaulted())
+        if (!pThis->m_crawl.HasFaulted() && !pThis->m_crawl.IsIPadjusted())
         {
             controlPC -= STACKWALK_CONTROLPC_ADJUST_OFFSET;
         }
@@ -8223,6 +8315,18 @@ extern "C" bool QCALLTYPE SfiInit(StackFrameIterator* pThis, CONTEXT* pStackwalk
 
         *pfIsExceptionIntercepted = CheckExceptionInterception(pThis, pExInfo);
     }
+    else
+    {
+        // There are no managed frames on the stack, fail fast and report unhandled exception
+        LONG disposition = InternalUnhandledExceptionFilter_Worker((EXCEPTION_POINTERS *)&pExInfo->m_ptrs);
+#ifdef HOST_WINDOWS
+        CreateCrashDumpIfEnabled(/* fSOException */ FALSE);
+        GetThread()->SetThreadStateNC(Thread::TSNC_ProcessedUnhandledException);
+        RaiseException(pExInfo->m_ExceptionCode, EXCEPTION_NONCONTINUABLE_EXCEPTION, pExInfo->m_ptrs.ExceptionRecord->NumberParameters, pExInfo->m_ptrs.ExceptionRecord->ExceptionInformation);
+#else
+        CrashDumpAndTerminateProcess(pExInfo->m_ExceptionCode);
+#endif
+    }
 
     return result;
 }
@@ -8337,7 +8441,8 @@ extern "C" bool QCALLTYPE SfiNext(StackFrameIterator* pThis, uint* uExCollideCla
                 else
                 {
 #ifdef HOST_WINDOWS
-                    RaiseFailFastException(pTopExInfo->m_ptrs.ExceptionRecord, pTopExInfo->m_ptrs.ContextRecord, 0);
+                    GetThread()->SetThreadStateNC(Thread::TSNC_ProcessedUnhandledException);
+                    RaiseException(pTopExInfo->m_ExceptionCode, EXCEPTION_NONCONTINUABLE_EXCEPTION, pTopExInfo->m_ptrs.ExceptionRecord->NumberParameters, pTopExInfo->m_ptrs.ExceptionRecord->ExceptionInformation);
 #else
                     CrashDumpAndTerminateProcess(pTopExInfo->m_ExceptionCode);
 #endif
@@ -8405,14 +8510,9 @@ extern "C" bool QCALLTYPE SfiNext(StackFrameIterator* pThis, uint* uExCollideCla
                             isCollided = true;
                             pExInfo->m_kind = (ExKind)((uint8_t)pExInfo->m_kind | (uint8_t)ExKind::SupersededFlag);
 
-                            // Unwind until we hit the frame of the prevExInfo
+                            // Unwind to the frame of the prevExInfo
                             ExInfo* pPrevExInfo = pThis->GetNextExInfo();
-                            do
-                            {
-                                retVal = MoveToNextNonSkippedFrame(pThis);
-                            }
-                            while ((retVal == SWA_CONTINUE) && pThis->m_crawl.GetRegisterSet()->SP != pPrevExInfo->m_regDisplay.SP);
-                            _ASSERTE(retVal != SWA_FAILED);
+                            pThis->SkipTo(&pPrevExInfo->m_frameIter);
 
                             pThis->ResetNextExInfoForSP(pThis->m_crawl.GetRegisterSet()->SP);
                         }
@@ -8429,6 +8529,8 @@ extern "C" bool QCALLTYPE SfiNext(StackFrameIterator* pThis, uint* uExCollideCla
                                              !(pTopExInfo->m_exception == CLRException::GetPreallocatedStackOverflowException());
 
                     pTopExInfo->m_StackTraceInfo.AppendElement(canAllocateMemory, NULL, GetRegdisplaySP(pTopExInfo->m_frameIter.m_crawl.GetRegisterSet()), pMD, &pTopExInfo->m_frameIter.m_crawl);
+                    pTopExInfo->m_StackTraceInfo.SaveStackTrace(canAllocateMemory, pTopExInfo->m_hThrowable, /*bReplaceStack*/FALSE, /*bSkipLastElement*/FALSE);
+
 #if defined(DEBUGGING_SUPPORTED)
                     if (NotifyDebuggerOfStub(pThread, pFrame))
                     {
@@ -8457,7 +8559,7 @@ Exit:;
     if (retVal != SWA_FAILED)
     {
         TADDR controlPC = pThis->m_crawl.GetRegisterSet()->ControlPC;
-        if (!pThis->m_crawl.HasFaulted())
+        if (!pThis->m_crawl.HasFaulted() && !pThis->m_crawl.IsIPadjusted())
         {
             controlPC -= STACKWALK_CONTROLPC_ADJUST_OFFSET;
         }
diff --git a/src/coreclr/vm/exceptionhandling.h b/src/coreclr/vm/exceptionhandling.h
index 74818b9485b6..7be99adfd202 100644
--- a/src/coreclr/vm/exceptionhandling.h
+++ b/src/coreclr/vm/exceptionhandling.h
@@ -22,6 +22,7 @@ ProcessCLRException(IN     PEXCEPTION_RECORD     pExceptionRecord,
                     IN OUT PT_CONTEXT            pContextRecord,
                     IN OUT PT_DISPATCHER_CONTEXT pDispatcherContext);
 
+VOID DECLSPEC_NORETURN DispatchManagedException(OBJECTREF throwable, CONTEXT *pExceptionContext, bool preserveStackTrace = true);
 VOID DECLSPEC_NORETURN DispatchManagedException(OBJECTREF throwable, bool preserveStackTrace = true);
 VOID DECLSPEC_NORETURN DispatchManagedException(RuntimeExceptionKind reKind);
 
@@ -50,8 +51,13 @@ typedef DPTR(ExInfo) PTR_ExInfo;
 // InlinedCallFrame::m_Datum field for details).
 enum class InlinedCallFrameMarker
 {
+#ifdef HOST_64BIT
     ExceptionHandlingHelper = 2,
     SecondPassFuncletCaller = 4,
+#else // HOST_64BIT
+    ExceptionHandlingHelper = 1,
+    SecondPassFuncletCaller = 2,
+#endif // HOST_64BIT
     Mask = ExceptionHandlingHelper | SecondPassFuncletCaller
 };
 
diff --git a/src/coreclr/vm/exceptionhandlingqcalls.h b/src/coreclr/vm/exceptionhandlingqcalls.h
index 7747c14f531d..7054080cef3c 100644
--- a/src/coreclr/vm/exceptionhandlingqcalls.h
+++ b/src/coreclr/vm/exceptionhandlingqcalls.h
@@ -17,7 +17,7 @@ extern "C" void QCALLTYPE CallFinallyFunclet(BYTE* pHandlerIP, REGDISPLAY* pvReg
 extern "C" BOOL QCALLTYPE CallFilterFunclet(QCall::ObjectHandleOnStack exceptionObj, BYTE* pFilterP, REGDISPLAY* pvRegDisplay);
 extern "C" void QCALLTYPE ResumeAtInterceptionLocation(REGDISPLAY* pvRegDisplay);
 extern "C" void QCALLTYPE AppendExceptionStackFrame(QCall::ObjectHandleOnStack exceptionObj, SIZE_T ip, SIZE_T sp, int flags, ExInfo *pExInfo);
-extern "C" BOOL QCALLTYPE EHEnumInitFromStackFrameIterator(StackFrameIterator *pFrameIter, BYTE** pMethodStartAddress, EH_CLAUSE_ENUMERATOR * pEHEnum);
+extern "C" BOOL QCALLTYPE EHEnumInitFromStackFrameIterator(StackFrameIterator *pFrameIter, IJitManager::MethodRegionInfo *pMethodRegionInfo, EH_CLAUSE_ENUMERATOR * pEHEnum);
 extern "C" BOOL QCALLTYPE EHEnumNext(EH_CLAUSE_ENUMERATOR* pEHEnum, RhEHClause* pEHClause);
 extern "C" bool QCALLTYPE SfiInit(StackFrameIterator* pThis, CONTEXT* pStackwalkCtx, bool instructionFault, bool* pIsExceptionIntercepted);
 extern "C" bool QCALLTYPE SfiNext(StackFrameIterator* pThis, unsigned int* uExCollideClauseIdx, bool* fUnwoundReversePInvoke, bool* pIsExceptionIntercepted);
diff --git a/src/coreclr/vm/exinfo.cpp b/src/coreclr/vm/exinfo.cpp
index 741bae687d81..8731a20ba585 100644
--- a/src/coreclr/vm/exinfo.cpp
+++ b/src/coreclr/vm/exinfo.cpp
@@ -121,10 +121,10 @@ void ExInfo::Init()
 
     m_pTopMostHandlerDuringSO = NULL;
 
-#if defined(TARGET_X86) && defined(DEBUGGING_SUPPORTED)
+#ifdef DEBUGGING_SUPPORTED
     m_InterceptionContext.Init();
     m_ValidInterceptionContext = FALSE;
-#endif //TARGET_X86 && DEBUGGING_SUPPORTED
+#endif // DEBUGGING_SUPPORTED
 }
 
 ExInfo::ExInfo()
@@ -327,11 +327,24 @@ ExInfo::ExInfo(Thread *pThread, EXCEPTION_RECORD *pExceptionRecord, CONTEXT *pEx
 #endif // HOST_UNIX
     m_CurrentClause({}),
     m_pMDToReportFunctionLeave(NULL),
-    m_exContext({})
+    m_lastReportedFunclet({0, 0, 0})
 {
     m_StackTraceInfo.AllocateStackTrace();
     pThread->GetExceptionState()->m_pCurrentTracker = this;
-    m_exContext.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
+    m_pInitialFrame = pThread->GetFrame();
+    if (exceptionKind == ExKind::HardwareFault)
+    {
+        // Hardware exception handling needs to start on the FaultingExceptionFrame, so we are
+        // passing in a context with zeroed out IP and SP.
+        SetIP(&m_exContext, 0);
+        SetSP(&m_exContext, 0);
+        m_exContext.ContextFlags = CONTEXT_FULL;
+    }
+    else
+    {
+        memcpy(&m_exContext, pExceptionContext, sizeof(CONTEXT));
+        m_exContext.ContextFlags = m_exContext.ContextFlags & (CONTEXT_FULL | CONTEXT_EXCEPTION_ACTIVE);
+    }
 }
 
 #if defined(TARGET_UNIX)
@@ -372,8 +385,36 @@ void ExInfo::ReleaseResources()
 void ExInfo::PopExInfos(Thread *pThread, void *targetSp)
 {
     ExInfo *pExInfo = (PTR_ExInfo)pThread->GetExceptionState()->GetCurrentExceptionTracker();
+#if defined(DEBUGGING_SUPPORTED)
+    DWORD_PTR dwInterceptStackFrame = 0;
+
+    // This method may be called on an unmanaged thread, in which case no interception can be done.
+    if (pExInfo)
+    {
+        ThreadExceptionState* pExState = pThread->GetExceptionState();
+
+        // If the exception is intercepted, then pop trackers according to the stack frame at which
+        // the exception is intercepted.  We must retrieve the frame pointer before we start popping trackers.
+        if (pExState->GetFlags()->DebuggerInterceptInfo())
+        {
+            pExState->GetDebuggerState()->GetDebuggerInterceptInfo(NULL, NULL, (PBYTE*)&dwInterceptStackFrame,
+                                                                   NULL, NULL);
+        }
+    }
+#endif // DEBUGGING_SUPPORTED
+
     while (pExInfo && pExInfo < (void*)targetSp)
     {
+#if defined(DEBUGGING_SUPPORTED)
+        if (g_pDebugInterface != NULL)
+        {
+            if (pExInfo->m_ScannedStackRange.GetUpperBound().SP < dwInterceptStackFrame)
+            {
+                g_pDebugInterface->DeleteInterceptContext(pExInfo->m_DebuggerExState.GetDebuggerInterceptContext());
+            }
+        }
+#endif // DEBUGGING_SUPPORTED
+
         pExInfo->ReleaseResources();
         pExInfo = (PTR_ExInfo)pExInfo->m_pPrevNestedInfo;
     }
diff --git a/src/coreclr/vm/exinfo.h b/src/coreclr/vm/exinfo.h
index ce612621e869..fc9f134dfeb4 100644
--- a/src/coreclr/vm/exinfo.h
+++ b/src/coreclr/vm/exinfo.h
@@ -136,7 +136,7 @@ class ExInfo
     EHClauseInfo        m_EHClauseInfo;
     ExceptionFlags      m_ExceptionFlags;
 
-#if defined(TARGET_X86) && defined(DEBUGGING_SUPPORTED)
+#ifdef DEBUGGING_SUPPORTED
     EHContext           m_InterceptionContext;
     BOOL                m_ValidInterceptionContext;
 #endif
@@ -155,9 +155,7 @@ class ExInfo
     ExInfo& operator=(const ExInfo &from);
 };
 
-#if defined(TARGET_X86)
 PTR_ExInfo GetEHTrackerForPreallocatedException(OBJECTREF oPreAllocThrowable, PTR_ExInfo pStartingEHTracker);
-#endif // TARGET_X86
 
 #else // !FEATURE_EH_FUNCLETS
 
@@ -198,6 +196,13 @@ enum class ExKind : uint8_t
 
 struct PAL_SEHException;
 
+struct LastReportedFuncletInfo
+{
+    PCODE IP;
+    TADDR FP;
+    uint32_t Flags;
+};
+
 struct ExInfo : public ExceptionTrackerBase
 {
     ExInfo(Thread *pThread, EXCEPTION_RECORD *pExceptionRecord, CONTEXT *pExceptionContext, ExKind exceptionKind);
@@ -269,6 +274,10 @@ struct ExInfo : public ExceptionTrackerBase
     // CONTEXT and REGDISPLAY used by the StackFrameIterator for stack walking
     CONTEXT        m_exContext;
     REGDISPLAY     m_regDisplay;
+    // Initial explicit frame for stack walking
+    Frame         *m_pInitialFrame;
+    // Info on the last reported funclet used to report references in the parent frame
+    LastReportedFuncletInfo m_lastReportedFunclet;
 
 #if defined(TARGET_UNIX)
     void TakeExceptionPointersOwnership(PAL_SEHException* ex);
diff --git a/src/coreclr/vm/field.cpp b/src/coreclr/vm/field.cpp
index b2973d8b4c66..c2eab291cc42 100644
--- a/src/coreclr/vm/field.cpp
+++ b/src/coreclr/vm/field.cpp
@@ -180,8 +180,8 @@ void* FieldDesc::GetStaticAddress(void *base)
 
     void* ret = GetStaticAddressHandle(base);       // Get the handle
 
-        // For value classes, the handle points at an OBJECTREF
-        // which holds the boxed value class, so dereference and unbox.
+    // For value classes, the handle points at an OBJECTREF
+    // which holds the boxed value class, so dereference and unbox.
     if (GetFieldType() == ELEMENT_TYPE_VALUETYPE && !IsRVA())
     {
         OBJECTREF obj = ObjectToOBJECTREF(*(Object**) ret);
@@ -211,11 +211,10 @@ MethodTable * FieldDesc::GetExactDeclaringType(MethodTable * ownerOrSubType)
 
 #endif // #ifndef DACCESS_COMPILE
 
-    // static value classes are actually stored in their boxed form.
-    // this means that their address moves.
+// Static value classes are actually stored in their boxed form.
+// This means that their address moves.
 PTR_VOID FieldDesc::GetStaticAddressHandle(PTR_VOID base)
 {
-
     CONTRACTL
     {
         INSTANCE_CHECK;
@@ -255,7 +254,6 @@ PTR_VOID FieldDesc::GetStaticAddressHandle(PTR_VOID base)
     }
 #endif // FEATURE_METADATA_UPDATER
 
-
     if (IsRVA())
     {
         Module* pModule = GetModule();
@@ -270,12 +268,10 @@ PTR_VOID FieldDesc::GetStaticAddressHandle(PTR_VOID base)
 
     PTR_VOID ret = PTR_VOID(dac_cast<PTR_BYTE>(base) + GetOffset());
 
-
     return ret;
 }
 
 
-
 // These routines encapsulate the operation of getting and setting
 // fields.
 void    FieldDesc::GetInstanceField(OBJECTREF o, VOID * pOutVal)
diff --git a/src/coreclr/vm/field.h b/src/coreclr/vm/field.h
index e2324787feba..c37fa4244dad 100644
--- a/src/coreclr/vm/field.h
+++ b/src/coreclr/vm/field.h
@@ -285,6 +285,14 @@ class FieldDesc
         SetOffset(FIELD_OFFSET_NEW_ENC);
     }
 
+    BOOL IsCollectible()    
+    {
+        LIMITED_METHOD_DAC_CONTRACT;
+
+        LoaderAllocator *pLoaderAllocator = GetApproxEnclosingMethodTable()->GetLoaderAllocator();
+        return pLoaderAllocator->IsCollectible();
+    }
+
     // Was this field added by EnC?
     // If this is true, then this object is an instance of EnCFieldDesc
     BOOL IsEnCNew()
@@ -518,7 +526,7 @@ class FieldDesc
         }
     }
 
-    VOID    CheckRunClassInitThrowing()
+    void CheckRunClassInitThrowing()
     {
         CONTRACTL
         {
diff --git a/src/coreclr/vm/fieldmarshaler.cpp b/src/coreclr/vm/fieldmarshaler.cpp
index 57fc5b82bad1..0d29374cecde 100644
--- a/src/coreclr/vm/fieldmarshaler.cpp
+++ b/src/coreclr/vm/fieldmarshaler.cpp
@@ -189,6 +189,9 @@ VOID ParseNativeType(Module*                     pModule,
         case MarshalInfo::MARSHAL_TYPE_FIXED_WSTR:
             *pNFD = NativeFieldDescriptor(pFD, CoreLibBinder::GetClass(CLASS__UINT16), pargs->fs.fixedStringLength);
             break;
+        case MarshalInfo::MARSHAL_TYPE_POINTER:
+            *pNFD = NativeFieldDescriptor(pFD, NativeFieldCategory::INTEGER, sizeof(void*), sizeof(void*));
+            break;
         case MarshalInfo::MARSHAL_TYPE_UNKNOWN:
         default:
             *pNFD = NativeFieldDescriptor(pFD);
diff --git a/src/coreclr/vm/frames.cpp b/src/coreclr/vm/frames.cpp
index 90c21e54aa81..cfd8eb11a7a9 100644
--- a/src/coreclr/vm/frames.cpp
+++ b/src/coreclr/vm/frames.cpp
@@ -464,6 +464,22 @@ void Frame::PopIfChained()
 }
 #endif // TARGET_UNIX && !DACCESS_COMPILE
 
+#if !defined(TARGET_X86) || defined(TARGET_UNIX)
+/* static */
+void Frame::UpdateFloatingPointRegisters(const PREGDISPLAY pRD)
+{
+    _ASSERTE(!ExecutionManager::IsManagedCode(::GetIP(pRD->pCurrentContext)));
+    while (!ExecutionManager::IsManagedCode(::GetIP(pRD->pCurrentContext)))
+    {
+#ifdef TARGET_UNIX
+        PAL_VirtualUnwind(pRD->pCurrentContext, NULL);
+#else
+        Thread::VirtualUnwindCallFrame(pRD);
+#endif
+    }
+}
+#endif // !TARGET_X86 || TARGET_UNIX
+
 //-----------------------------------------------------------------------
 #endif // #ifndef DACCESS_COMPILE
 //---------------------------------------------------------------
@@ -1348,10 +1364,10 @@ void TransitionFrame::PromoteCallerStack(promote_func* fn, ScanContext* sc)
     {
         VASigCookie *varArgSig = GetVASigCookie();
 
-        //Note: no instantiations needed for varargs
+        SigTypeContext typeContext(varArgSig->classInst, varArgSig->methodInst);
         MetaSig msig(varArgSig->signature,
                      varArgSig->pModule,
-                     NULL);
+                     &typeContext);
         PromoteCallerStackHelper (fn, sc, pFunction, &msig);
     }
 }
@@ -1482,10 +1498,10 @@ void TransitionFrame::PromoteCallerStackUsingGCRefMap(promote_func* fn, ScanCont
             {
                 VASigCookie *varArgSig = dac_cast<PTR_VASigCookie>(*ppObj);
 
-                //Note: no instantiations needed for varargs
+                SigTypeContext typeContext(varArgSig->classInst, varArgSig->methodInst);
                 MetaSig msig(varArgSig->signature,
                                 varArgSig->pModule,
-                                NULL);
+                                &typeContext);
                 PromoteCallerStackHelper (fn, sc, NULL, &msig);
             }
             break;
@@ -1509,10 +1525,10 @@ void PInvokeCalliFrame::PromoteCallerStack(promote_func* fn, ScanContext* sc)
         return;
     }
 
-    // no instantiations needed for varargs
+    SigTypeContext typeContext(varArgSig->classInst, varArgSig->methodInst);
     MetaSig msig(varArgSig->signature,
                  varArgSig->pModule,
-                 NULL);
+                 &typeContext);
     PromoteCallerStackHelper(fn, sc, NULL, &msig);
 }
 
@@ -1795,27 +1811,18 @@ MethodDesc* HelperMethodFrame::GetFunction()
 //             This is used when the HelperMethodFrame is first created.
 //         * false: complete any initialization that was left to do, if any.
 //      * unwindState - [out] DAC builds use this to return the unwound machine state.
-//      * hostCallPreference - (See code:HelperMethodFrame::HostCallPreference.)
 //
 // Return Value:
 //     Normally, the function always returns TRUE meaning the initialization succeeded.
 //
-//     However, if hostCallPreference is NoHostCalls, AND if a callee (like
-//     LazyMachState::unwindLazyState) needed to acquire a JIT reader lock and was unable
-//     to do so (lest it re-enter the host), then InsureInit will abort and return FALSE.
-//     So any callers that specify hostCallPreference = NoHostCalls (which is not the
-//     default), should check for FALSE return, and refuse to use the HMF in that case.
-//     Currently only asynchronous calls made by profilers use that code path.
 //
 
 BOOL HelperMethodFrame::InsureInit(bool initialInit,
-                                    MachState * unwindState,
-                                    HostCallPreference hostCallPreference /* = AllowHostCalls */)
+                                    MachState * unwindState)
 {
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        if ((hostCallPreference == AllowHostCalls) && !m_MachState.isValid()) { HOST_CALLS; } else { HOST_NOCALLS; }
         SUPPORTS_DAC;
     } CONTRACTL_END;
 
@@ -1856,8 +1863,7 @@ BOOL HelperMethodFrame::InsureInit(bool initialInit,
             lazy,
             &unwound,
             threadId,
-            0,
-            hostCallPreference);
+            0);
 
 #if !defined(DACCESS_COMPILE)
         if (!unwound.isValid())
@@ -1874,7 +1880,6 @@ BOOL HelperMethodFrame::InsureInit(bool initialInit,
             // will commonly return an unwound state with _pRetAddr==NULL (which counts
             // as an "invalid" MachState). So have DAC builds deliberately fall through
             // rather than aborting when unwound is invalid.
-            _ASSERTE(hostCallPreference == NoHostCalls);
             return FALSE;
         }
 #endif // !defined(DACCESS_COMPILE)
diff --git a/src/coreclr/vm/frames.h b/src/coreclr/vm/frames.h
index 907cc2e0e3eb..ea7eb1e19413 100644
--- a/src/coreclr/vm/frames.h
+++ b/src/coreclr/vm/frames.h
@@ -512,7 +512,7 @@ class Frame : public FrameBase
     // UpdateRegDisplay is generally used to fill out the REGDISPLAY parameter, some
     // overrides (e.g., code:ResumableFrame::UpdateRegDisplay) will actually READ what
     // you pass in. So be sure to pass in a valid or zeroed out REGDISPLAY.
-    virtual void UpdateRegDisplay(const PREGDISPLAY)
+    virtual void UpdateRegDisplay(const PREGDISPLAY, bool updateFloats = false)
     {
         LIMITED_METHOD_DAC_CONTRACT;
         return;
@@ -754,6 +754,12 @@ class Frame : public FrameBase
         LIMITED_METHOD_CONTRACT;
     }
 
+#ifndef DACCESS_COMPILE
+#if !defined(TARGET_X86) || defined(TARGET_UNIX)
+    static void UpdateFloatingPointRegisters(const PREGDISPLAY pRD);
+#endif // !TARGET_X86 || TARGET_UNIX
+#endif // DACCESS_COMPILE
+
 #if defined(TARGET_UNIX) && !defined(DACCESS_COMPILE)
     virtual ~Frame() { LIMITED_METHOD_CONTRACT; }
 
@@ -795,7 +801,7 @@ class ResumableFrame : public Frame
         return TRUE;
     }
 
-    virtual void UpdateRegDisplay(const PREGDISPLAY pRD);
+    virtual void UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats = false);
 
     virtual unsigned GetFrameAttribs() {
         LIMITED_METHOD_DAC_CONTRACT;
@@ -1000,7 +1006,7 @@ class TransitionFrame : public Frame
         return TRUE;
     }
 
-    virtual void UpdateRegDisplay(const PREGDISPLAY);
+    virtual void UpdateRegDisplay(const PREGDISPLAY, bool updateFloats = false);
 #ifdef TARGET_X86
     void UpdateRegDisplayHelper(const PREGDISPLAY, UINT cbStackPop);
 #endif
@@ -1080,7 +1086,11 @@ class FaultingExceptionFrame : public Frame
     unsigned GetFrameAttribs()
     {
         LIMITED_METHOD_DAC_CONTRACT;
+#ifdef FEATURE_EH_FUNCLETS
+        return FRAME_ATTR_EXCEPTION | (!!(m_ctx.ContextFlags & CONTEXT_EXCEPTION_ACTIVE) ? FRAME_ATTR_FAULTED : 0);
+#else
         return FRAME_ATTR_EXCEPTION | FRAME_ATTR_FAULTED;
+#endif        
     }
 
 #ifndef FEATURE_EH_FUNCLETS
@@ -1114,7 +1124,7 @@ class FaultingExceptionFrame : public Frame
         return TRUE;
     }
 
-    virtual void UpdateRegDisplay(const PREGDISPLAY);
+    virtual void UpdateRegDisplay(const PREGDISPLAY, bool updateFloats = false);
 
     // Keep as last entry in class
     DEFINE_VTABLE_GETTER_AND_DTOR(FaultingExceptionFrame)
@@ -1176,7 +1186,7 @@ class FuncEvalFrame : public Frame
         return TRUE;
     }
 
-    virtual void UpdateRegDisplay(const PREGDISPLAY);
+    virtual void UpdateRegDisplay(const PREGDISPLAY, bool updateFloats = false);
 
     virtual DebuggerEval * GetDebuggerEval();
 
@@ -1263,7 +1273,7 @@ class HelperMethodFrame : public Frame
         return TRUE;
     }
 
-    virtual void UpdateRegDisplay(const PREGDISPLAY);
+    virtual void UpdateRegDisplay(const PREGDISPLAY, bool updateFloats = false);
 
     virtual Interception GetInterception()
     {
@@ -1327,7 +1337,7 @@ class HelperMethodFrame : public Frame
     }
 #endif // DACCESS_COMPILE
 
-    BOOL InsureInit(bool initialInit, struct MachState* unwindState, HostCallPreference hostCallPreference = AllowHostCalls);
+    BOOL InsureInit(bool initialInit, struct MachState* unwindState);
 
     LazyMachState * MachineState() {
         LIMITED_METHOD_CONTRACT;
@@ -2037,7 +2047,7 @@ class PInvokeCalliFrame : public FramedMethodFrame
     }
 
 #ifdef TARGET_X86
-    virtual void UpdateRegDisplay(const PREGDISPLAY);
+    virtual void UpdateRegDisplay(const PREGDISPLAY, bool updateFloats = false);
 #endif // TARGET_X86
 
     BOOL TraceFrame(Thread *thread, BOOL fromPatch,
@@ -2081,7 +2091,7 @@ class HijackFrame : public Frame
         return TRUE;
     }
 
-    virtual void UpdateRegDisplay(const PREGDISPLAY);
+    virtual void UpdateRegDisplay(const PREGDISPLAY, bool updateFloats = false);
     virtual void GcScanRoots(promote_func *fn, ScanContext* sc);
 
     // HijackFrames are created by trip functions. See OnHijackTripThread()
@@ -2176,7 +2186,7 @@ class StubDispatchFrame : public FramedMethodFrame
     PTR_BYTE GetGCRefMap();
 
 #ifdef TARGET_X86
-    virtual void UpdateRegDisplay(const PREGDISPLAY pRD);
+    virtual void UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats = false);
     virtual PCODE GetReturnAddress();
 #endif // TARGET_X86
 
@@ -2319,7 +2329,7 @@ class ExternalMethodFrame : public FramedMethodFrame
     Interception GetInterception();
 
 #ifdef TARGET_X86
-    virtual void UpdateRegDisplay(const PREGDISPLAY pRD);
+    virtual void UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats = false);
 #endif
 
     // Keep as last entry in class
@@ -2341,7 +2351,7 @@ class DynamicHelperFrame : public FramedMethodFrame
     virtual void GcScanRoots(promote_func *fn, ScanContext* sc);
 
 #ifdef TARGET_X86
-    virtual void UpdateRegDisplay(const PREGDISPLAY pRD);
+    virtual void UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats = false);
 #endif
 
     virtual ETransitionType GetTransitionType()
@@ -2800,7 +2810,8 @@ class InlinedCallFrame : public Frame
     {
         WRAPPER_NO_CONTRACT;
         if (FrameHasActiveCall(this) && HasFunction())
-            return PTR_MethodDesc(m_Datum);
+            // Mask off marker bits
+            return PTR_MethodDesc((dac_cast<TADDR>(m_Datum) & ~(sizeof(TADDR) - 1)));
         else
             return NULL;
     }
@@ -2811,7 +2822,7 @@ class InlinedCallFrame : public Frame
 
 #ifdef HOST_64BIT
         // See code:GenericPInvokeCalliHelper
-        return ((m_Datum != NULL) && !(dac_cast<TADDR>(m_Datum) & 0x3));
+        return ((m_Datum != NULL) && !(dac_cast<TADDR>(m_Datum) & 0x1));
 #else // HOST_64BIT
         return ((dac_cast<TADDR>(m_Datum) & ~0xffff) != 0);
 #endif // HOST_64BIT
@@ -2868,7 +2879,7 @@ class InlinedCallFrame : public Frame
 #endif // defined(TARGET_X86) || defined(TARGET_ARM)
     }
 
-    virtual void UpdateRegDisplay(const PREGDISPLAY);
+    virtual void UpdateRegDisplay(const PREGDISPLAY, bool updateFloats = false);
 
     // m_Datum contains MethodDesc ptr or
     // - on 64 bit host: CALLI target address (if lowest bit is set)
@@ -3034,7 +3045,7 @@ class TailCallFrame : public Frame
         return TRUE;
     }
 
-    virtual void UpdateRegDisplay(const PREGDISPLAY pRD);
+    virtual void UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats = false);
 
 private:
     // Keep as last entry in class
diff --git a/src/coreclr/vm/frozenobjectheap.cpp b/src/coreclr/vm/frozenobjectheap.cpp
index 50ecb1bec528..b5f0913be345 100644
--- a/src/coreclr/vm/frozenobjectheap.cpp
+++ b/src/coreclr/vm/frozenobjectheap.cpp
@@ -47,6 +47,7 @@ Object* FrozenObjectHeapManager::TryAllocateObject(PTR_MethodTable type, size_t
 
             _ASSERT(type != nullptr);
             _ASSERT(FOH_COMMIT_SIZE >= MIN_OBJECT_SIZE);
+            _ASSERT(!type->Collectible());
 
             // Currently we don't support frozen objects with special alignment requirements
             // TODO: We should also give up on arrays of doubles on 32-bit platforms.
diff --git a/src/coreclr/vm/gc_unwind_x86.inl b/src/coreclr/vm/gc_unwind_x86.inl
new file mode 100644
index 000000000000..5b308911bc0b
--- /dev/null
+++ b/src/coreclr/vm/gc_unwind_x86.inl
@@ -0,0 +1,3829 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// This file is shared between CoreCLR and NativeAOT. Some of the differences are handled
+// with the FEATURE_NATIVEAOT and FEATURE_EH_FUNCLETS defines. There are three main methods
+// that are used by both runtimes - DecodeGCHdrInfo, UnwindStackFrameX86, and EnumGcRefsX86.
+
+#define RETURN_ADDR_OFFS        1       // in DWORDS
+
+#define X86_INSTR_TEST_ESP_SIB          0x24
+#define X86_INSTR_PUSH_0                0x6A    // push 00, entire instruction is 0x6A00
+#define X86_INSTR_PUSH_IMM              0x68    // push NNNN,
+#define X86_INSTR_W_PUSH_IND_IMM        0x35FF  // push [NNNN]
+#define X86_INSTR_CALL_REL32            0xE8    // call rel32
+#define X86_INSTR_W_CALL_IND_IMM        0x15FF  // call [addr32]
+#define X86_INSTR_NOP                   0x90    // nop
+#define X86_INSTR_NOP2                  0x9090  // 2-byte nop
+#define X86_INSTR_NOP3_1                0x9090  // 1st word of 3-byte nop
+#define X86_INSTR_NOP3_3                0x90    // 3rd byte of 3-byte nop
+#define X86_INSTR_NOP4                  0x90909090 // 4-byte nop
+#define X86_INSTR_NOP5_1                0x90909090 // 1st dword of 5-byte nop
+#define X86_INSTR_NOP5_5                0x90    // 5th byte of 5-byte nop
+#define X86_INSTR_INT3                  0xCC    // int3
+#define X86_INSTR_HLT                   0xF4    // hlt
+#define X86_INSTR_PUSH_EAX              0x50    // push eax
+#define X86_INSTR_PUSH_EBP              0x55    // push ebp
+#define X86_INSTR_W_MOV_EBP_ESP         0xEC8B  // mov ebp, esp
+#define X86_INSTR_POP_ECX               0x59    // pop ecx
+#define X86_INSTR_RET                   0xC2    // ret imm16
+#define X86_INSTR_RETN                  0xC3    // ret
+#define X86_INSTR_XOR                   0x33    // xor
+#define X86_INSTR_w_TEST_ESP_EAX        0x0485  // test [esp], eax
+#define X86_INSTR_w_TEST_ESP_DWORD_OFFSET_EAX   0x8485      // test [esp-dwOffset], eax
+#define X86_INSTR_w_LEA_ESP_EBP_BYTE_OFFSET     0x658d      // lea esp, [ebp-bOffset]
+#define X86_INSTR_w_LEA_ESP_EBP_DWORD_OFFSET    0xa58d      // lea esp, [ebp-dwOffset]
+#define X86_INSTR_w_LEA_EAX_ESP_BYTE_OFFSET     0x448d      // lea eax, [esp-bOffset]
+#define X86_INSTR_w_LEA_EAX_ESP_DWORD_OFFSET    0x848d      // lea eax, [esp-dwOffset]
+#define X86_INSTR_JMP_NEAR_REL32        0xE9    // near jmp rel32
+#define X86_INSTR_w_JMP_FAR_IND_IMM     0x25FF  // far jmp [addr32]
+
+#ifdef  _DEBUG
+// For dumping of verbose info.
+#ifndef DACCESS_COMPILE
+static  bool  trFixContext          = false;
+#endif
+static  bool  trEnumGCRefs          = false;
+static  bool  dspPtr                = false; // prints the live ptrs as reported
+#endif
+
+__forceinline unsigned decodeUnsigned(PTR_CBYTE& src)
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+
+#ifdef DACCESS_COMPILE
+    PTR_CBYTE begin = src;
+#endif
+
+    BYTE     byte  = *src++;
+    unsigned value = byte & 0x7f;
+    while (byte & 0x80)
+    {
+#ifdef DACCESS_COMPILE
+        // In DAC builds, the target data may be corrupt.  Rather than return incorrect data
+        // and risk wasting time in a potentially long loop, we want to fail early and gracefully.
+        // The data is encoded with 7 value-bits per byte, and so we may need to read a maximum
+        // of 5 bytes (7*5=35) to read a full 32-bit integer.
+        if ((src - begin) > 5)
+        {
+            DacError(CORDBG_E_TARGET_INCONSISTENT);
+        }
+#endif
+
+        byte    = *src++;
+        value <<= 7;
+        value  += byte & 0x7f;
+    }
+    return value;
+}
+
+__forceinline int decodeSigned(PTR_CBYTE& src)
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+
+#ifdef DACCESS_COMPILE
+    PTR_CBYTE begin = src;
+#endif
+
+    BYTE     byte  = *src++;
+    BYTE     first = byte;
+    int      value = byte & 0x3f;
+    while (byte & 0x80)
+    {
+#ifdef DACCESS_COMPILE
+        // In DAC builds, the target data may be corrupt.  Rather than return incorrect data
+        // and risk wasting time in a potentially long loop, we want to fail early and gracefully.
+        // The data is encoded with 7 value-bits per byte, and so we may need to read a maximum
+        // of 5 bytes (7*5=35) to read a full 32-bit integer.
+        if ((src - begin) > 5)
+        {
+            DacError(CORDBG_E_TARGET_INCONSISTENT);
+        }
+#endif
+
+        byte = *src++;
+        value <<= 7;
+        value += byte & 0x7f;
+    }
+    if (first & 0x40)
+        value = -value;
+    return value;
+}
+
+// Fast versions of the above, with one iteration of the loop unrolled
+#define fastDecodeUnsigned(src) (((*(src) & 0x80) == 0) ? (unsigned) (*(src)++) : decodeUnsigned((src)))
+#define fastDecodeSigned(src)   (((*(src) & 0xC0) == 0) ? (unsigned) (*(src)++) : decodeSigned((src)))
+
+// Fast skipping past encoded integers
+#ifndef DACCESS_COMPILE
+#define fastSkipUnsigned(src) { while ((*(src)++) & 0x80) { } }
+#define fastSkipSigned(src)   { while ((*(src)++) & 0x80) { } }
+#else
+// In DAC builds we want to trade-off a little perf in the common case for reliaiblity against corrupt data.
+#define fastSkipUnsigned(src) (decodeUnsigned(src))
+#define fastSkipSigned(src) (decodeSigned(src))
+#endif
+
+
+/*****************************************************************************
+ *
+ *  Decodes the X86 GcInfo header and returns the decoded information
+ *  in the hdrInfo struct.
+ *  curOffset is the code offset within the active method used in the
+ *  computation of PrologOffs/EpilogOffs.
+ *  Returns the size of the header (number of bytes decoded).
+ */
+size_t DecodeGCHdrInfo(GCInfoToken gcInfoToken,
+                       unsigned    curOffset,
+                       hdrInfo   * infoPtr)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SUPPORTS_DAC;
+    } CONTRACTL_END;
+
+    PTR_CBYTE table = (PTR_CBYTE) gcInfoToken.Info;
+#if VERIFY_GC_TABLES
+    _ASSERTE(*castto(table, unsigned short *)++ == 0xFEEF);
+#endif
+
+    infoPtr->methodSize = fastDecodeUnsigned(table);
+
+    _ASSERTE(curOffset >= 0);
+    _ASSERTE(curOffset <= infoPtr->methodSize);
+
+    /* Decode the InfoHdr */
+
+    InfoHdr header;
+    table = decodeHeader(table, gcInfoToken.Version, &header);
+
+    BOOL hasArgTabOffset = FALSE;
+    if (header.untrackedCnt == HAS_UNTRACKED)
+    {
+        hasArgTabOffset = TRUE;
+        header.untrackedCnt = fastDecodeUnsigned(table);
+    }
+
+    if (header.varPtrTableSize == HAS_VARPTR)
+    {
+        hasArgTabOffset = TRUE;
+        header.varPtrTableSize = fastDecodeUnsigned(table);
+    }
+
+    if (header.gsCookieOffset == HAS_GS_COOKIE_OFFSET)
+    {
+        header.gsCookieOffset = fastDecodeUnsigned(table);
+    }
+
+    if (header.syncStartOffset == HAS_SYNC_OFFSET)
+    {
+        header.syncStartOffset = decodeUnsigned(table);
+        header.syncEndOffset = decodeUnsigned(table);
+
+        _ASSERTE(header.syncStartOffset != INVALID_SYNC_OFFSET && header.syncEndOffset != INVALID_SYNC_OFFSET);
+        _ASSERTE(header.syncStartOffset < header.syncEndOffset);
+    }
+
+    if (header.revPInvokeOffset == HAS_REV_PINVOKE_FRAME_OFFSET)
+    {
+        header.revPInvokeOffset = fastDecodeUnsigned(table);
+    }
+
+    /* Some sanity checks on header */
+
+    _ASSERTE( header.prologSize +
+           (size_t)(header.epilogCount*header.epilogSize) <= infoPtr->methodSize);
+    _ASSERTE( header.epilogCount == 1 || !header.epilogAtEnd);
+
+    _ASSERTE( header.untrackedCnt <= header.argCount+header.frameSize);
+
+    _ASSERTE( header.ebpSaved || !(header.ebpFrame || header.doubleAlign));
+    _ASSERTE(!header.ebpFrame || !header.doubleAlign  );
+    _ASSERTE( header.ebpFrame || !header.security     );
+    _ASSERTE( header.ebpFrame || !header.handlers     );
+    _ASSERTE( header.ebpFrame || !header.localloc     );
+    _ASSERTE( header.ebpFrame || !header.editNcontinue);  // <TODO> : Esp frames NYI for EnC</TODO>
+
+    /* Initialize the infoPtr struct */
+
+    infoPtr->argSize         = header.argCount * 4;
+    infoPtr->ebpFrame        = header.ebpFrame;
+    infoPtr->interruptible   = header.interruptible;
+    infoPtr->returnKind      = (ReturnKind) header.returnKind;
+
+    infoPtr->prologSize      = header.prologSize;
+    infoPtr->epilogSize      = header.epilogSize;
+    infoPtr->epilogCnt       = header.epilogCount;
+    infoPtr->epilogEnd       = header.epilogAtEnd;
+
+    infoPtr->untrackedCnt    = header.untrackedCnt;
+    infoPtr->varPtrTableSize = header.varPtrTableSize;
+    infoPtr->gsCookieOffset  = header.gsCookieOffset;
+
+    infoPtr->syncStartOffset = header.syncStartOffset;
+    infoPtr->syncEndOffset   = header.syncEndOffset;
+    infoPtr->revPInvokeOffset = header.revPInvokeOffset;
+
+    infoPtr->doubleAlign     = header.doubleAlign;
+    infoPtr->handlers        = header.handlers;
+    infoPtr->localloc        = header.localloc;
+    infoPtr->editNcontinue   = header.editNcontinue;
+    infoPtr->varargs         = header.varargs;
+    infoPtr->profCallbacks   = header.profCallbacks;
+    infoPtr->genericsContext = header.genericsContext;
+    infoPtr->genericsContextIsMethodDesc = header.genericsContextIsMethodDesc;
+    infoPtr->isSpeculativeStackWalk = false;
+
+    /* Are we within the prolog of the method? */
+
+    if  (curOffset < infoPtr->prologSize)
+    {
+        infoPtr->prologOffs = curOffset;
+    }
+    else
+    {
+        infoPtr->prologOffs = hdrInfo::NOT_IN_PROLOG;
+    }
+
+    /* Assume we're not in the epilog of the method */
+
+    infoPtr->epilogOffs = hdrInfo::NOT_IN_EPILOG;
+
+    /* Are we within an epilog of the method? */
+
+    if  (infoPtr->epilogCnt)
+    {
+        unsigned epilogStart;
+
+        if  (infoPtr->epilogCnt > 1 || !infoPtr->epilogEnd)
+        {
+#if VERIFY_GC_TABLES
+            _ASSERTE(*castto(table, unsigned short *)++ == 0xFACE);
+#endif
+            epilogStart = 0;
+            for (unsigned i = 0; i < infoPtr->epilogCnt; i++)
+            {
+                epilogStart += fastDecodeUnsigned(table);
+                if  (curOffset > epilogStart &&
+                     curOffset < epilogStart + infoPtr->epilogSize)
+                {
+                    infoPtr->epilogOffs = curOffset - epilogStart;
+                }
+            }
+        }
+        else
+        {
+            epilogStart = infoPtr->methodSize - infoPtr->epilogSize;
+
+            if  (curOffset > epilogStart &&
+                 curOffset < epilogStart + infoPtr->epilogSize)
+            {
+                infoPtr->epilogOffs = curOffset - epilogStart;
+            }
+        }
+
+        infoPtr->syncEpilogStart = epilogStart;
+    }
+
+    unsigned argTabOffset = INVALID_ARGTAB_OFFSET;
+    if (hasArgTabOffset)
+    {
+        argTabOffset = fastDecodeUnsigned(table);
+    }
+    infoPtr->argTabOffset    = argTabOffset;
+
+    size_t frameDwordCount = header.frameSize;
+
+    /* Set the rawStackSize to the number of bytes that it bumps ESP */
+
+    infoPtr->rawStkSize = (UINT)(frameDwordCount * sizeof(size_t));
+
+    /* Calculate the callee saves regMask and adjust stackSize to */
+    /* include the callee saves register spills                   */
+
+    unsigned savedRegs = RM_NONE;
+    unsigned savedRegsCount = 0;
+
+    if  (header.ediSaved)
+    {
+        savedRegsCount++;
+        savedRegs |= RM_EDI;
+    }
+    if  (header.esiSaved)
+    {
+        savedRegsCount++;
+        savedRegs |= RM_ESI;
+    }
+    if  (header.ebxSaved)
+    {
+        savedRegsCount++;
+        savedRegs |= RM_EBX;
+    }
+    if  (header.ebpSaved)
+    {
+        savedRegsCount++;
+        savedRegs |= RM_EBP;
+    }
+
+    infoPtr->savedRegMask = (RegMask)savedRegs;
+
+    infoPtr->savedRegsCountExclFP = savedRegsCount;
+    if (header.ebpFrame || header.doubleAlign)
+    {
+        _ASSERTE(header.ebpSaved);
+        infoPtr->savedRegsCountExclFP = savedRegsCount - 1;
+    }
+
+    frameDwordCount += savedRegsCount;
+
+    infoPtr->stackSize  =  (UINT)(frameDwordCount * sizeof(size_t));
+
+    _ASSERTE(infoPtr->gsCookieOffset == INVALID_GS_COOKIE_OFFSET ||
+             (infoPtr->gsCookieOffset < infoPtr->stackSize) &&
+             ((header.gsCookieOffset % sizeof(void*)) == 0));
+
+    return  table - PTR_CBYTE(gcInfoToken.Info);
+}
+
+/*****************************************************************************/
+
+// We do a "pop eax; jmp eax" to return from a fault or finally handler
+const size_t END_FIN_POP_STACK = sizeof(TADDR);
+
+inline
+size_t GetLocallocSPOffset(hdrInfo * info)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    _ASSERTE(info->localloc && info->ebpFrame);
+
+    unsigned position = info->savedRegsCountExclFP +
+                        1;
+    return position * sizeof(TADDR);
+}
+
+inline
+size_t GetParamTypeArgOffset(hdrInfo * info)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    _ASSERTE((info->genericsContext || info->handlers) && info->ebpFrame);
+
+    unsigned position = info->savedRegsCountExclFP +
+                        info->localloc +
+                        1;  // For CORINFO_GENERICS_CTXT_FROM_PARAMTYPEARG
+    return position * sizeof(TADDR);
+}
+
+inline size_t GetStartShadowSPSlotsOffset(hdrInfo * info)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    _ASSERTE(info->handlers && info->ebpFrame);
+
+    return GetParamTypeArgOffset(info) +
+           sizeof(TADDR); // Slot for end-of-last-executed-filter
+}
+
+/*****************************************************************************
+ *  Returns the start of the hidden slots for the shadowSP for functions
+ *  with exception handlers. There is one slot per nesting level starting
+ *  near Ebp and is zero-terminated after the active slots.
+ */
+
+inline
+PTR_TADDR GetFirstBaseSPslotPtr(TADDR ebp, hdrInfo * info)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    _ASSERTE(info->handlers && info->ebpFrame);
+
+    size_t offsetFromEBP = GetStartShadowSPSlotsOffset(info)
+                        + sizeof(TADDR); // to get to the *start* of the next slot
+
+    return PTR_TADDR(ebp - offsetFromEBP);
+}
+
+inline size_t GetEndShadowSPSlotsOffset(hdrInfo * info, unsigned maxHandlerNestingLevel)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    _ASSERTE(info->handlers && info->ebpFrame);
+
+    unsigned numberOfShadowSPSlots = maxHandlerNestingLevel +
+                                     1 + // For zero-termination
+                                     1; // For a filter (which can be active at the same time as a catch/finally handler
+
+    return GetStartShadowSPSlotsOffset(info) +
+           (numberOfShadowSPSlots * sizeof(TADDR));
+}
+
+/*****************************************************************************
+ *    returns the base frame pointer corresponding to the target nesting level.
+ */
+
+inline
+TADDR GetOutermostBaseFP(TADDR ebp, hdrInfo * info)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    // we are not taking into account double alignment.  We are
+    // safe because the jit currently bails on double alignment if there
+    // are handles or localalloc
+    _ASSERTE(!info->doubleAlign);
+    if (info->localloc)
+    {
+        // If the function uses localloc we will fetch the ESP from the localloc
+        // slot.
+        PTR_TADDR pLocalloc = PTR_TADDR(ebp - GetLocallocSPOffset(info));
+
+        return (*pLocalloc);
+    }
+    else
+    {
+        // Default, go back all the method's local stack size
+        return ebp - info->stackSize + sizeof(int);
+    }
+}
+
+#ifndef FEATURE_NATIVEAOT
+/*****************************************************************************
+ *
+ *  For functions with handlers, checks if it is currently in a handler.
+ *  Either of unwindESP or unwindLevel will specify the target nesting level.
+ *  If unwindLevel is specified, info about the funclet at that nesting level
+ *    will be returned. (Use if you are interested in a specific nesting level.)
+ *  If unwindESP is specified, info for nesting level invoked before the stack
+ *   reached unwindESP will be returned. (Use if you have a specific ESP value
+ *   during stack walking.)
+ *
+ *  *pBaseSP is set to the base SP (base of the stack on entry to
+ *    the current funclet) corresponding to the target nesting level.
+ *  *pNestLevel is set to the nesting level of the target nesting level (useful
+ *    if unwindESP!=IGNORE_VAL
+ *  *pHasInnerFilter will be set to true (only when unwindESP!=IGNORE_VAL) if a filter
+ *    is currently active, but the target nesting level is an outer nesting level.
+ *  *pHadInnerFilter - was the last use of the frame to execute a filter.
+ *    This mainly affects GC lifetime reporting.
+ */
+
+enum FrameType
+{
+    FR_NORMAL,              // Normal method frame - no exceptions currently active
+    FR_FILTER,              // Frame-let of a filter
+    FR_HANDLER,             // Frame-let of a callable catch/fault/finally
+
+    FR_INVALID,             // Invalid frame (for speculative stackwalks)
+};
+
+enum { IGNORE_VAL = -1 };
+
+FrameType   GetHandlerFrameInfo(hdrInfo   * info,
+                                TADDR       frameEBP,
+                                TADDR       unwindESP,
+                                DWORD       unwindLevel,
+                                TADDR     * pBaseSP = NULL,         /* OUT */
+                                DWORD     * pNestLevel = NULL,      /* OUT */
+                                bool      * pHasInnerFilter = NULL, /* OUT */
+                                bool      * pHadInnerFilter = NULL) /* OUT */
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SUPPORTS_DAC;
+    } CONTRACTL_END;
+
+    _ASSERTE(info->ebpFrame && info->handlers);
+    // One and only one of them should be IGNORE_VAL
+    _ASSERTE((unwindESP == (TADDR) IGNORE_VAL) !=
+           (unwindLevel == (DWORD) IGNORE_VAL));
+    _ASSERTE(pHasInnerFilter == NULL || unwindESP != (TADDR) IGNORE_VAL);
+
+    // Many of the conditions that we'd like to assert cannot be asserted in the case that we're
+    // in the middle of a stackwalk seeded by a profiler, since such seeds can't be trusted
+    // (profilers are external, untrusted sources).  So during profiler walks, we test the condition
+    // and throw an exception if it's not met.  Otherwise, we just assert the condition.
+    #define FAIL_IF_SPECULATIVE_WALK(condition)         \
+        if (info->isSpeculativeStackWalk)               \
+        {                                               \
+            if (!(condition))                           \
+            {                                           \
+                return FR_INVALID;                      \
+            }                                           \
+        }                                               \
+        else                                            \
+        {                                               \
+            _ASSERTE(condition);                        \
+        }
+
+    PTR_TADDR pFirstBaseSPslot = GetFirstBaseSPslotPtr(frameEBP, info);
+    TADDR  baseSP            = GetOutermostBaseFP(frameEBP, info);
+    bool    nonLocalHandlers = false; // Are the funclets invoked by EE (instead of managed code itself)
+    bool    hasInnerFilter   = false;
+    bool    hadInnerFilter   = false;
+
+    /* Get the last non-zero slot >= unwindESP, or lvl<unwindLevel.
+       Also do some sanity checks */
+
+    // The shadow slots contain the SP of the nested EH clauses currently active on the stack.
+    // The slots grow towards lower address on the stack and is terminted by a NULL entry.
+    // Since each subsequent slot contains the SP of a more nested EH clause, the contents of the slots are
+    // expected to be in decreasing order.
+    size_t lvl = 0;
+#ifndef FEATURE_EH_FUNCLETS
+    PTR_TADDR pSlot;
+    for(lvl = 0, pSlot = pFirstBaseSPslot;
+        *pSlot && lvl < unwindLevel;
+        pSlot--, lvl++)
+    {
+        // Filters cant have inner funclets
+        FAIL_IF_SPECULATIVE_WALK(!(baseSP & ICodeManager::SHADOW_SP_IN_FILTER));
+
+        TADDR curSlotVal = *pSlot;
+
+        // The shadowSPs have to be less unless the stack has been unwound.
+        FAIL_IF_SPECULATIVE_WALK(baseSP >  curSlotVal ||
+               (baseSP == curSlotVal && pSlot == pFirstBaseSPslot));
+
+        if (curSlotVal == LCL_FINALLY_MARK)
+        {
+            // Locally called finally
+            baseSP -= sizeof(TADDR);
+        }
+        else
+        {
+            // Is this a funclet we unwound before (can only happen with filters) ?
+            // If unwindESP is specified, normally we expect it to be the last entry in the shadow slot array.
+            // Or, if there is a filter, we expect unwindESP to be the second last entry.  However, this may
+            // not be the case in DAC builds.  For example, the user can use .cxr in an EH clause to set a
+            // CONTEXT captured in the try clause.  In this case, unwindESP will be the ESP of the parent
+            // function, but the shadow slot array will contain the SP of the EH clause, which is closer to
+            // the leaf than the parent method.
+
+            if (unwindESP != (TADDR) IGNORE_VAL &&
+                unwindESP > END_FIN_POP_STACK +
+                (curSlotVal & ~ICodeManager::SHADOW_SP_BITS))
+            {
+                // In non-DAC builds, the only time unwindESP is closer to the root than entries in the shadow
+                // slot array is when the last entry in the array is for a filter.  Also, filters can't have
+                // nested handlers.
+                if ((pSlot[0] & ICodeManager::SHADOW_SP_IN_FILTER) &&
+                    (pSlot[-1] == 0) &&
+                    !(baseSP & ICodeManager::SHADOW_SP_IN_FILTER))
+                {
+                    if (pSlot[0] & ICodeManager::SHADOW_SP_FILTER_DONE)
+                        hadInnerFilter = true;
+                    else
+                        hasInnerFilter = true;
+                    break;
+                }
+                else
+                {
+#if defined(DACCESS_COMPILE)
+                    // In DAC builds, this could happen.  We just need to bail out of this loop early.
+                    break;
+#else  // !DACCESS_COMPILE
+                    // In non-DAC builds, this is an error.
+                    FAIL_IF_SPECULATIVE_WALK(FALSE);
+#endif // DACCESS_COMPILE
+                }
+            }
+
+            nonLocalHandlers = true;
+            baseSP = curSlotVal;
+        }
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+    if (unwindESP != (TADDR) IGNORE_VAL)
+    {
+        FAIL_IF_SPECULATIVE_WALK(baseSP >= unwindESP ||
+               baseSP == unwindESP - sizeof(TADDR));  // About to locally call a finally
+
+        if (baseSP < unwindESP)                       // About to locally call a finally
+            baseSP = unwindESP;
+    }
+    else
+    {
+        FAIL_IF_SPECULATIVE_WALK(lvl == unwindLevel); // unwindLevel must be currently active on stack
+    }
+
+    if (pBaseSP)
+        *pBaseSP = baseSP & ~ICodeManager::SHADOW_SP_BITS;
+
+    if (pNestLevel)
+    {
+        *pNestLevel = (DWORD)lvl;
+    }
+
+    if (pHasInnerFilter)
+        *pHasInnerFilter = hasInnerFilter;
+
+    if (pHadInnerFilter)
+        *pHadInnerFilter = hadInnerFilter;
+
+    if (baseSP & ICodeManager::SHADOW_SP_IN_FILTER)
+    {
+        FAIL_IF_SPECULATIVE_WALK(!hasInnerFilter); // nested filters not allowed
+        return FR_FILTER;
+    }
+    else if (nonLocalHandlers)
+    {
+        return FR_HANDLER;
+    }
+    else
+    {
+        return FR_NORMAL;
+    }
+
+    #undef FAIL_IF_SPECULATIVE_WALK
+}
+
+// Returns the number of bytes at the beginning of the stack frame that shouldn't be
+// modified by an EnC.  This is everything except the space for locals and temporaries.
+inline size_t GetSizeOfFrameHeaderForEnC(hdrInfo * info)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // See comment above Compiler::lvaAssignFrameOffsets() in src\jit\il\lclVars.cpp
+    // for frame layout
+
+    // EnC supports increasing the maximum handler nesting level by always
+    // assuming that the max is MAX_EnC_HANDLER_NESTING_LEVEL. Methods with
+    // a higher max cannot be updated by EnC
+
+    // Take the offset (from EBP) of the last slot of the header, plus one for the EBP slot itself
+    // to get the total size of the header.
+    return sizeof(TADDR) +
+            GetEndShadowSPSlotsOffset(info, MAX_EnC_HANDLER_NESTING_LEVEL);
+}
+#endif // FEATURE_NATIVEAOT
+
+/*****************************************************************************/
+static
+PTR_CBYTE skipToArgReg(const hdrInfo& info, PTR_CBYTE table)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SUPPORTS_DAC;
+    } CONTRACTL_END;
+
+#ifdef _DEBUG
+    PTR_CBYTE tableStart = table;
+#else
+    if (info.argTabOffset != INVALID_ARGTAB_OFFSET)
+    {
+        return table + info.argTabOffset;
+    }
+#endif
+
+    unsigned count;
+
+#if VERIFY_GC_TABLES
+    _ASSERTE(*castto(table, unsigned short *)++ == 0xBEEF);
+#endif
+
+    /* Skip over the untracked frame variable table */
+
+    count = info.untrackedCnt;
+    while (count-- > 0) {
+        fastSkipSigned(table);
+    }
+
+#if VERIFY_GC_TABLES
+    _ASSERTE(*castto(table, unsigned short *)++ == 0xCAFE);
+#endif
+
+    /* Skip over the frame variable lifetime table */
+
+    count = info.varPtrTableSize;
+    while (count-- > 0) {
+        fastSkipUnsigned(table); fastSkipUnsigned(table); fastSkipUnsigned(table);
+    }
+
+#if VERIFY_GC_TABLES
+    _ASSERTE(*castto(table, unsigned short *) == 0xBABE);
+#endif
+
+#if defined(_DEBUG) && defined(CONSISTENCY_CHECK_MSGF)
+    if (info.argTabOffset != INVALID_ARGTAB_OFFSET)
+    {
+        CONSISTENCY_CHECK_MSGF((info.argTabOffset == (unsigned) (table - tableStart)),
+          ("table = %p, tableStart = %p, info.argTabOffset = %d", table, tableStart, info.argTabOffset));
+    }
+#endif
+
+    return table;
+}
+
+/*****************************************************************************/
+
+#define regNumToMask(regNum) RegMask(1<<(regNum))
+
+/*****************************************************************************
+ Helper for scanArgRegTable() and scanArgRegTableI() for regMasks
+ */
+
+void *      getCalleeSavedReg(PREGDISPLAY pContext, regNum reg)
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+
+    switch (reg)
+    {
+        case REGI_EBP: return pContext->GetEbpLocation();
+        case REGI_EBX: return pContext->GetEbxLocation();
+        case REGI_ESI: return pContext->GetEsiLocation();
+        case REGI_EDI: return pContext->GetEdiLocation();
+
+        default: _ASSERTE(!"bad info.thisPtrResult"); return NULL;
+    }
+}
+
+/*****************************************************************************
+ These functions converts the bits in the GC encoding to RegMask
+ */
+
+inline
+RegMask     convertCalleeSavedRegsMask(unsigned inMask) // EBP,EBX,ESI,EDI
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+
+    _ASSERTE((inMask & 0x0F) == inMask);
+
+    unsigned outMask = RM_NONE;
+    if (inMask & 0x1) outMask |= RM_EDI;
+    if (inMask & 0x2) outMask |= RM_ESI;
+    if (inMask & 0x4) outMask |= RM_EBX;
+    if (inMask & 0x8) outMask |= RM_EBP;
+
+    return (RegMask) outMask;
+}
+
+inline
+RegMask     convertAllRegsMask(unsigned inMask) // EAX,ECX,EDX,EBX, EBP,ESI,EDI
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+
+    _ASSERTE((inMask & 0xEF) == inMask);
+
+    unsigned outMask = RM_NONE;
+    if (inMask & 0x01) outMask |= RM_EAX;
+    if (inMask & 0x02) outMask |= RM_ECX;
+    if (inMask & 0x04) outMask |= RM_EDX;
+    if (inMask & 0x08) outMask |= RM_EBX;
+    if (inMask & 0x20) outMask |= RM_EBP;
+    if (inMask & 0x40) outMask |= RM_ESI;
+    if (inMask & 0x80) outMask |= RM_EDI;
+
+    return (RegMask)outMask;
+}
+
+/*****************************************************************************
+ * scan the register argument table for the not fully interruptible case.
+   this function is called to find all live objects (pushed arguments)
+   and to get the stack base for EBP-less methods.
+
+   NOTE: If info->argTabResult is NULL, info->argHnumResult indicates
+         how many bits in argMask are valid
+         If info->argTabResult is non-NULL, then the argMask field does
+         not fit in 32-bits and the value in argMask meaningless.
+         Instead argHnum specifies the number of (variable-length) elements
+         in the array, and argTabBytes specifies the total byte size of the
+         array. [ Note this is an extremely rare case ]
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable:21000) // Suppress PREFast warning about overly large function
+#endif
+static
+unsigned scanArgRegTable(PTR_CBYTE    table,
+                         unsigned     curOffs,
+                         hdrInfo    * info)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SUPPORTS_DAC;
+    } CONTRACTL_END;
+
+    regNum    thisPtrReg    = REGI_NA;
+#ifdef _DEBUG
+    bool      isCall        = false;
+#endif
+    unsigned  regMask       = 0;    // EBP,EBX,ESI,EDI
+    unsigned  argMask       = 0;
+    unsigned  argHnum       = 0;
+    PTR_CBYTE argTab        = 0;
+    unsigned  argTabBytes   = 0;
+    unsigned  stackDepth    = 0;
+
+    unsigned  iregMask      = 0;    // EBP,EBX,ESI,EDI
+    unsigned  iargMask      = 0;
+    unsigned  iptrMask      = 0;
+
+#if VERIFY_GC_TABLES
+    _ASSERTE(*castto(table, unsigned short *)++ == 0xBABE);
+#endif
+
+    unsigned scanOffs = 0;
+
+    _ASSERTE(scanOffs <= info->methodSize);
+
+    if (info->ebpFrame) {
+  /*
+      Encoding table for methods with an EBP frame and
+                         that are not fully interruptible
+
+      The encoding used is as follows:
+
+      this pointer encodings:
+
+         01000000          this pointer in EBX
+         00100000          this pointer in ESI
+         00010000          this pointer in EDI
+
+      tiny encoding:
+
+         0bsdDDDD
+                           requires code delta     < 16 (4-bits)
+                           requires pushed argmask == 0
+
+           where    DDDD   is code delta
+                       b   indicates that register EBX is a live pointer
+                       s   indicates that register ESI is a live pointer
+                       d   indicates that register EDI is a live pointer
+
+      small encoding:
+
+         1DDDDDDD bsdAAAAA
+
+                           requires code delta     < 120 (7-bits)
+                           requires pushed argmask <  64 (5-bits)
+
+           where DDDDDDD   is code delta
+                   AAAAA   is the pushed args mask
+                       b   indicates that register EBX is a live pointer
+                       s   indicates that register ESI is a live pointer
+                       d   indicates that register EDI is a live pointer
+
+      medium encoding
+
+         0xFD aaaaaaaa AAAAdddd bseDDDDD
+
+                           requires code delta     <    0x1000000000  (9-bits)
+                           requires pushed argmask < 0x1000000000000 (12-bits)
+
+           where    DDDDD  is the upper 5-bits of the code delta
+                     dddd  is the low   4-bits of the code delta
+                     AAAA  is the upper 4-bits of the pushed arg mask
+                 aaaaaaaa  is the low   8-bits of the pushed arg mask
+                        b  indicates that register EBX is a live pointer
+                        s  indicates that register ESI is a live pointer
+                        e  indicates that register EDI is a live pointer
+
+      medium encoding with interior pointers
+
+         0xF9 DDDDDDDD bsdAAAAAA iiiIIIII
+
+                           requires code delta     < (8-bits)
+                           requires pushed argmask < (5-bits)
+
+           where  DDDDDDD  is the code delta
+                        b  indicates that register EBX is a live pointer
+                        s  indicates that register ESI is a live pointer
+                        d  indicates that register EDI is a live pointer
+                    AAAAA  is the pushed arg mask
+                      iii  indicates that EBX,EDI,ESI are interior pointers
+                    IIIII  indicates that bits is the arg mask are interior
+                           pointers
+
+      large encoding
+
+         0xFE [0BSD0bsd][32-bit code delta][32-bit argMask]
+
+                        b  indicates that register EBX is a live pointer
+                        s  indicates that register ESI is a live pointer
+                        d  indicates that register EDI is a live pointer
+                        B  indicates that register EBX is an interior pointer
+                        S  indicates that register ESI is an interior pointer
+                        D  indicates that register EDI is an interior pointer
+                           requires pushed  argmask < 32-bits
+
+      large encoding  with interior pointers
+
+         0xFA [0BSD0bsd][32-bit code delta][32-bit argMask][32-bit interior pointer mask]
+
+
+                        b  indicates that register EBX is a live pointer
+                        s  indicates that register ESI is a live pointer
+                        d  indicates that register EDI is a live pointer
+                        B  indicates that register EBX is an interior pointer
+                        S  indicates that register ESI is an interior pointer
+                        D  indicates that register EDI is an interior pointer
+                           requires pushed  argmask < 32-bits
+                           requires pushed iArgmask < 32-bits
+
+      huge encoding        This is the only encoding that supports
+                           a pushed argmask which is greater than
+                           32-bits.
+
+         0xFB [0BSD0bsd][32-bit code delta]
+              [32-bit table count][32-bit table size]
+              [pushed ptr offsets table...]
+
+                       b   indicates that register EBX is a live pointer
+                       s   indicates that register ESI is a live pointer
+                       d   indicates that register EDI is a live pointer
+                       B   indicates that register EBX is an interior pointer
+                       S   indicates that register ESI is an interior pointer
+                       D   indicates that register EDI is an interior pointer
+                       the list count is the number of entries in the list
+                       the list size gives the byte-length of the list
+                       the offsets in the list are variable-length
+  */
+        while (scanOffs < curOffs)
+        {
+            iregMask = 0;
+            iargMask = 0;
+            argTab = NULL;
+#ifdef _DEBUG
+            isCall = true;
+#endif
+
+            /* Get the next byte and check for a 'special' entry */
+
+            unsigned encType = *table++;
+#if defined(DACCESS_COMPILE)
+            // In this scenario, it is invalid to have a zero byte in the GC info encoding (refer to the
+            // comments above). At least one bit has to be set.  For example, a byte can represent which
+            // register is the "this" pointer, and this byte has to be 0x10, 0x20, or 0x40.  Having a zero
+            // byte indicates there is most likely some sort of DAC error, and it may lead to problems such as
+            // infinite loops.  So we bail out early instead.
+            if (encType == 0)
+            {
+                DacError(CORDBG_E_TARGET_INCONSISTENT);
+                UNREACHABLE();
+            }
+#endif // DACCESS_COMPILE
+
+            switch (encType)
+            {
+                unsigned    val, nxt;
+
+            default:
+
+                /* A tiny or small call entry */
+                val = encType;
+                if ((val & 0x80) == 0x00) {
+                    if (val & 0x0F) {
+                        /* A tiny call entry */
+                        scanOffs += (val & 0x0F);
+                        regMask   = (val & 0x70) >> 4;
+                        argMask   = 0;
+                        argHnum   = 0;
+                    }
+                    else {
+                        /* This pointer liveness encoding */
+                        regMask   = (val & 0x70) >> 4;
+                        if (regMask == 0x1)
+                            thisPtrReg = REGI_EDI;
+                        else if (regMask == 0x2)
+                            thisPtrReg = REGI_ESI;
+                        else if (regMask == 0x4)
+                            thisPtrReg = REGI_EBX;
+                        else
+                           _ASSERTE(!"illegal encoding for 'this' pointer liveness");
+                    }
+                }
+                else {
+                    /* A small call entry */
+                    scanOffs += (val & 0x7F);
+                    val       = *table++;
+                    regMask   = val >> 5;
+                    argMask   = val & 0x1F;
+                    argHnum   = 5;
+                }
+                break;
+
+            case 0xFD:  // medium encoding
+
+                argMask   = *table++;
+                val       = *table++;
+                argMask  |= ((val & 0xF0) << 4);
+                argHnum   = 12;
+                nxt       = *table++;
+                scanOffs += (val & 0x0F) + ((nxt & 0x1F) << 4);
+                regMask   = nxt >> 5;                   // EBX,ESI,EDI
+
+                break;
+
+            case 0xF9:  // medium encoding with interior pointers
+
+                scanOffs   += *table++;
+                val         = *table++;
+                argMask     = val & 0x1F;
+                argHnum     = 5;
+                regMask     = val >> 5;
+                val         = *table++;
+                iargMask    = val & 0x1F;
+                iregMask    = val >> 5;
+
+                break;
+
+            case 0xFE:  // large encoding
+            case 0xFA:  // large encoding with interior pointers
+
+                val         = *table++;
+                regMask     = val & 0x7;
+                iregMask    = val >> 4;
+                scanOffs   += *dac_cast<PTR_DWORD>(table);  table += sizeof(DWORD);
+                argMask     = *dac_cast<PTR_DWORD>(table);  table += sizeof(DWORD);
+                argHnum     = 31;
+                if (encType == 0xFA) // read iargMask
+                {
+                    iargMask = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
+                }
+                break;
+
+            case 0xFB:  // huge encoding        This is the only partially interruptible
+                        //                      encoding that supports a pushed ArgMask
+                        //                      which is greater than 32-bits.
+                        //                      The ArgMask is encoded using the argTab
+                val         = *table++;
+                regMask     = val & 0x7;
+                iregMask    = val >> 4;
+                scanOffs   += *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
+                argHnum     = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
+                argTabBytes = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
+                argTab      = table;                       table += argTabBytes;
+
+                argMask     = 0;
+                break;
+
+            case 0xFF:
+                scanOffs = curOffs + 1;
+                break;
+
+            } // end case
+
+            // iregMask & iargMask are subsets of regMask & argMask respectively
+
+            _ASSERTE((iregMask & regMask) == iregMask);
+            _ASSERTE((iargMask & argMask) == iargMask);
+
+        } // end while
+
+    }
+    else {
+
+/*
+ *    Encoding table for methods with an ESP frame and are not fully interruptible
+ *    This encoding does not support a pushed ArgMask greater than 32
+ *
+ *               The encoding used is as follows:
+ *
+ *  push     000DDDDD                     ESP push one item with 5-bit delta
+ *  push     00100000 [pushCount]         ESP push multiple items
+ *  reserved 0011xxxx
+ *  skip     01000000 [Delta]             Skip Delta, arbitrary sized delta
+ *  skip     0100DDDD                     Skip small Delta, for call (DDDD != 0)
+ *  pop      01CCDDDD                     ESP pop  CC items with 4-bit delta (CC != 00)
+ *  call     1PPPPPPP                     Call Pattern, P=[0..79]
+ *  call     1101pbsd DDCCCMMM            Call RegMask=pbsd,ArgCnt=CCC,
+ *                                        ArgMask=MMM Delta=commonDelta[DD]
+ *  call     1110pbsd [ArgCnt] [ArgMask]  Call ArgCnt,RegMask=pbsd,[32-bit ArgMask]
+ *  call     11111000 [PBSDpbsd][32-bit delta][32-bit ArgCnt]
+ *                    [32-bit PndCnt][32-bit PndSize][PndOffs...]
+ *  iptr     11110000 [IPtrMask]          Arbitrary 32-bit Interior Pointer Mask
+ *  thisptr  111101RR                     This pointer is in Register RR
+ *                                        00=EDI,01=ESI,10=EBX,11=EBP
+ *  reserved 111100xx                     xx  != 00
+ *  reserved 111110xx                     xx  != 00
+ *  reserved 11111xxx                     xxx != 000 && xxx != 111(EOT)
+ *
+ *   The value 11111111 [0xFF] indicates the end of the table.
+ *
+ *  An offset (at which stack-walking is performed) without an explicit encoding
+ *  is assumed to be a trivial call-site (no GC registers, stack empty before and
+ *  after) to avoid having to encode all trivial calls.
+ *
+ * Note on the encoding used for interior pointers
+ *
+ *   The iptr encoding must immediately precede a call encoding.  It is used to
+ *   transform a normal GC pointer addresses into an interior pointers for GC purposes.
+ *   The mask supplied to the iptr encoding is read from the least signicant bit
+ *   to the most signicant bit. (i.e the lowest bit is read first)
+ *
+ *   p   indicates that register EBP is a live pointer
+ *   b   indicates that register EBX is a live pointer
+ *   s   indicates that register ESI is a live pointer
+ *   d   indicates that register EDI is a live pointer
+ *   P   indicates that register EBP is an interior pointer
+ *   B   indicates that register EBX is an interior pointer
+ *   S   indicates that register ESI is an interior pointer
+ *   D   indicates that register EDI is an interior pointer
+ *
+ *   As an example the following sequence indicates that EDI.ESI and the 2nd pushed pointer
+ *   in ArgMask are really interior pointers.  The pointer in ESI in a normal pointer:
+ *
+ *   iptr 11110000 00010011           => read Interior Ptr, Interior Ptr, Normal Ptr, Normal Ptr, Interior Ptr
+ *   call 11010011 DDCCC011 RRRR=1011 => read EDI is a GC-pointer, ESI is a GC-pointer. EBP is a GC-pointer
+ *                           MMM=0011 => read two GC-pointers arguments on the stack (nested call)
+ *
+ *   Since the call instruction mentions 5 GC-pointers we list them in the required order:
+ *   EDI, ESI, EBP, 1st-pushed pointer, 2nd-pushed pointer
+ *
+ *   And we apply the Interior Pointer mask mmmm=10011 to the above five ordered GC-pointers
+ *   we learn that EDI and ESI are interior GC-pointers and that the second push arg is an
+ *   interior GC-pointer.
+ */
+
+#if defined(DACCESS_COMPILE)
+        DWORD cbZeroBytes = 0;
+#endif // DACCESS_COMPILE
+
+        while (scanOffs <= curOffs)
+        {
+            unsigned callArgCnt;
+            unsigned skip;
+            unsigned newRegMask, inewRegMask;
+            unsigned newArgMask, inewArgMask;
+            unsigned oldScanOffs = scanOffs;
+
+            if (iptrMask)
+            {
+                // We found this iptrMask in the previous iteration.
+                // This iteration must be for a call. Set these variables
+                // so that they are available at the end of the loop
+
+                inewRegMask   = iptrMask & 0x0F; // EBP,EBX,ESI,EDI
+                inewArgMask   = iptrMask >> 4;
+
+                iptrMask      = 0;
+            }
+            else
+            {
+                // Zero out any stale values.
+
+                inewRegMask = 0;
+                inewArgMask = 0;
+            }
+
+            /* Get the next byte and decode it */
+
+            unsigned val = *table++;
+#if defined(DACCESS_COMPILE)
+            // In this scenario, a 0 means that there is a push at the current offset.  For a struct with
+            // two double fields, the JIT may use two movq instructions to push the struct onto the stack, and
+            // the JIT will encode 4 pushes at the same code offset.  This means that we can have up to 4
+            // consecutive bytes of 0 without changing the code offset.  Having more than 4 consecutive bytes
+            // of zero indicates that there is most likely some sort of DAC error, and it may lead to problems
+            // such as infinite loops.  So we bail out early instead.
+            if (val == 0)
+            {
+                cbZeroBytes += 1;
+                if (cbZeroBytes > 4)
+                {
+                    DacError(CORDBG_E_TARGET_INCONSISTENT);
+                    UNREACHABLE();
+                }
+            }
+            else
+            {
+                cbZeroBytes = 0;
+            }
+#endif // DACCESS_COMPILE
+
+#ifdef _DEBUG
+            if (scanOffs != curOffs)
+                isCall = false;
+#endif
+
+            /* Check pushes, pops, and skips */
+
+            if  (!(val & 0x80)) {
+
+                //  iptrMask can immediately precede only calls
+
+                _ASSERTE(inewRegMask == 0);
+                _ASSERTE(inewArgMask == 0);
+
+                if (!(val & 0x40)) {
+
+                    unsigned pushCount;
+
+                    if (!(val & 0x20))
+                    {
+                        //
+                        // push    000DDDDD                 ESP push one item, 5-bit delta
+                        //
+                        pushCount   = 1;
+                        scanOffs   += val & 0x1f;
+                    }
+                    else
+                    {
+                        //
+                        // push    00100000 [pushCount]     ESP push multiple items
+                        //
+                        _ASSERTE(val == 0x20);
+                        pushCount = fastDecodeUnsigned(table);
+                    }
+
+                    if (scanOffs > curOffs)
+                    {
+                        scanOffs = oldScanOffs;
+                        goto FINISHED;
+                    }
+
+                    stackDepth +=  pushCount;
+                }
+                else if ((val & 0x3f) != 0) {
+                    //
+                    //  pop     01CCDDDD         pop CC items, 4-bit delta
+                    //
+                    scanOffs   +=  val & 0x0f;
+                    if (scanOffs > curOffs)
+                    {
+                        scanOffs = oldScanOffs;
+                        goto FINISHED;
+                    }
+                    stackDepth -= (val & 0x30) >> 4;
+
+                } else if (scanOffs < curOffs) {
+                    //
+                    // skip    01000000 [Delta]  Skip arbitrary sized delta
+                    //
+                    skip = fastDecodeUnsigned(table);
+                    scanOffs += skip;
+                }
+                else // don't process a skip if we are already at curOffs
+                    goto FINISHED;
+
+                /* reset regs and args state since we advance past last call site */
+
+                 regMask    = 0;
+                iregMask    = 0;
+                 argMask    = 0;
+                iargMask    = 0;
+                argHnum     = 0;
+
+            }
+            else /* It must be a call, thisptr, or iptr */
+            {
+                switch ((val & 0x70) >> 4) {
+                default:    // case 0-4, 1000xxxx through 1100xxxx
+                    //
+                    // call    1PPPPPPP          Call Pattern, P=[0..79]
+                    //
+                    decodeCallPattern((val & 0x7f), &callArgCnt,
+                                      &newRegMask, &newArgMask, &skip);
+                    // If we've already reached curOffs and the skip amount
+                    // is non-zero then we are done
+                    if ((scanOffs == curOffs) && (skip > 0))
+                        goto FINISHED;
+                    // otherwise process this call pattern
+                    scanOffs   += skip;
+                    if (scanOffs > curOffs)
+                        goto FINISHED;
+#ifdef _DEBUG
+                    isCall      = true;
+#endif
+                    regMask     = newRegMask;
+                    argMask     = newArgMask;   argTab = NULL;
+                    iregMask    = inewRegMask;
+                    iargMask    = inewArgMask;
+                    stackDepth -= callArgCnt;
+                    argHnum     = 2;             // argMask is known to be <= 3
+                    break;
+
+                  case 5:
+                    //
+                    // call    1101RRRR DDCCCMMM  Call RegMask=RRRR,ArgCnt=CCC,
+                    //                        ArgMask=MMM Delta=commonDelta[DD]
+                    //
+                    newRegMask  = val & 0xf;    // EBP,EBX,ESI,EDI
+                    val         = *table++;     // read next byte
+                    skip        = callCommonDelta[val>>6];
+                    // If we've already reached curOffs and the skip amount
+                    // is non-zero then we are done
+                    if ((scanOffs == curOffs) && (skip > 0))
+                        goto FINISHED;
+                    // otherwise process this call encoding
+                    scanOffs   += skip;
+                    if (scanOffs > curOffs)
+                        goto FINISHED;
+#ifdef _DEBUG
+                    isCall      = true;
+#endif
+                    regMask     = newRegMask;
+                    iregMask    = inewRegMask;
+                    callArgCnt  = (val >> 3) & 0x7;
+                    stackDepth -= callArgCnt;
+                    argMask     = (val & 0x7);  argTab = NULL;
+                    iargMask    = inewArgMask;
+                    argHnum     = 3;
+                    break;
+
+                  case 6:
+                    //
+                    // call    1110RRRR [ArgCnt] [ArgMask]
+                    //                          Call ArgCnt,RegMask=RRR,ArgMask
+                    //
+#ifdef _DEBUG
+                    isCall      = true;
+#endif
+                    regMask     = val & 0xf;    // EBP,EBX,ESI,EDI
+                    iregMask    = inewRegMask;
+                    callArgCnt  = fastDecodeUnsigned(table);
+                    stackDepth -= callArgCnt;
+                    argMask     = fastDecodeUnsigned(table);  argTab = NULL;
+                    iargMask    = inewArgMask;
+                    argHnum     = sizeof(argMask) * 8;  // The size of argMask in bits
+                    break;
+
+                  case 7:
+                    switch (val & 0x0C)
+                    {
+                      case 0x00:
+                        //
+                        // 0xF0   iptr     11110000   [IPtrMask] Arbitrary Interior Pointer Mask
+                        //
+                        iptrMask = fastDecodeUnsigned(table);
+                        break;
+
+                      case 0x04:
+                        //
+                        // 0xF4   thisptr  111101RR   This pointer is in Register RR
+                        //                            00=EDI,01=ESI,10=EBX,11=EBP
+                        //
+                        {
+                            static const regNum calleeSavedRegs[] =
+                                { REGI_EDI, REGI_ESI, REGI_EBX, REGI_EBP };
+                            thisPtrReg = calleeSavedRegs[val&0x3];
+                        }
+                        break;
+
+                      case 0x08:
+                        //
+                        // 0xF8   call     11111000   [PBSDpbsd][32-bit delta][32-bit ArgCnt]
+                        //                            [32-bit PndCnt][32-bit PndSize][PndOffs...]
+                        //
+                        val         = *table++;
+                        skip        = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
+// [VSUQFE 4670]
+                        // If we've already reached curOffs and the skip amount
+                        // is non-zero then we are done
+                        if ((scanOffs == curOffs) && (skip > 0))
+                            goto FINISHED;
+// [VSUQFE 4670]
+                        scanOffs   += skip;
+                        if (scanOffs > curOffs)
+                            goto FINISHED;
+#ifdef _DEBUG
+                        isCall      = true;
+#endif
+                        regMask     = val & 0xF;
+                        iregMask    = val >> 4;
+                        callArgCnt  = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
+                        stackDepth -= callArgCnt;
+                        argHnum     = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
+                        argTabBytes = *dac_cast<PTR_DWORD>(table); table += sizeof(DWORD);
+                        argTab      = table;
+                        table      += argTabBytes;
+                        break;
+
+                      case 0x0C:
+                        //
+                        // 0xFF   end      11111111   End of table marker
+                        //
+                        _ASSERTE(val==0xff);
+                        goto FINISHED;
+
+                      default:
+                        _ASSERTE(!"reserved GC encoding");
+                        break;
+                    }
+                    break;
+
+                } // end switch
+
+            } // end else (!(val & 0x80))
+
+            // iregMask & iargMask are subsets of regMask & argMask respectively
+
+            _ASSERTE((iregMask & regMask) == iregMask);
+            _ASSERTE((iargMask & argMask) == iargMask);
+
+        } // end while
+
+    } // end else ebp-less frame
+
+FINISHED:
+
+    // iregMask & iargMask are subsets of regMask & argMask respectively
+
+    _ASSERTE((iregMask & regMask) == iregMask);
+    _ASSERTE((iargMask & argMask) == iargMask);
+
+    if (scanOffs != curOffs)
+    {
+        /* must have been a boring call */
+        info->regMaskResult  = RM_NONE;
+        info->argMaskResult  = ptrArgTP(0);
+        info->iregMaskResult = RM_NONE;
+        info->iargMaskResult = ptrArgTP(0);
+        info->argHnumResult  = 0;
+        info->argTabResult   = NULL;
+        info->argTabBytes    = 0;
+    }
+    else
+    {
+        info->regMaskResult  = convertCalleeSavedRegsMask(regMask);
+        info->argMaskResult  = ptrArgTP(argMask);
+        info->argHnumResult  = argHnum;
+        info->iregMaskResult = convertCalleeSavedRegsMask(iregMask);
+        info->iargMaskResult = ptrArgTP(iargMask);
+        info->argTabResult   = argTab;
+        info->argTabBytes    = argTabBytes;
+    }
+
+#ifdef _DEBUG
+    if (scanOffs != curOffs) {
+        isCall = false;
+    }
+    _ASSERTE(thisPtrReg == REGI_NA || (!isCall || (regNumToMask(thisPtrReg) & info->regMaskResult)));
+#endif
+    info->thisPtrResult  = thisPtrReg;
+
+    _ASSERTE(int(stackDepth) < INT_MAX); // check that it did not underflow
+    return (stackDepth * sizeof(unsigned));
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+
+/*****************************************************************************
+ * scan the register argument table for the fully interruptible case.
+   this function is called to find all live objects (pushed arguments)
+   and to get the stack base for fully interruptible methods.
+   Returns size of things pushed on the stack for ESP frames
+
+   Arguments:
+      table       - The pointer table
+      curOffsRegs - The current code offset that should be used for reporting registers
+      curOffsArgs - The current code offset that should be used for reporting args
+      info        - Incoming arg used to determine if there's a frame, and to save results
+ */
+
+static
+unsigned scanArgRegTableI(PTR_CBYTE    table,
+                          unsigned     curOffsRegs,
+                          unsigned     curOffsArgs,
+                          hdrInfo   *  info)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SUPPORTS_DAC;
+    } CONTRACTL_END;
+
+    regNum thisPtrReg = REGI_NA;
+    unsigned  ptrRegs    = 0;    // The mask of registers that contain pointers
+    unsigned iptrRegs    = 0;    // The subset of ptrRegs that are interior pointers
+    unsigned  ptrOffs    = 0;    // The code offset of the table entry we are currently looking at
+    unsigned  argCnt     = 0;    // The number of args that have been pushed
+
+    ptrArgTP  ptrArgs(0);        // The mask of stack values that contain pointers.
+    ptrArgTP iptrArgs(0);        // The subset of ptrArgs that are interior pointers.
+    ptrArgTP  argHigh(0);        // The current mask position that corresponds to the top of the stack.
+
+    bool      isThis     = false;
+    bool      iptr       = false;
+
+    // The comment before the call to scanArgRegTableI in EnumGCRefs
+    // describes why curOffsRegs can be smaller than curOffsArgs.
+    _ASSERTE(curOffsRegs <= curOffsArgs);
+
+#if VERIFY_GC_TABLES
+    _ASSERTE(*castto(table, unsigned short *)++ == 0xBABE);
+#endif
+
+    bool      hasPartialArgInfo;
+
+#ifndef UNIX_X86_ABI
+    hasPartialArgInfo = info->ebpFrame;
+#else
+    // For x86/Linux, interruptible code always has full arg info
+    //
+    // This should be aligned with emitFullArgInfo setting at
+    // emitter::emitEndCodeGen (in JIT)
+    hasPartialArgInfo = false;
+#endif
+
+  /*
+      Encoding table for methods that are fully interruptible
+
+      The encoding used is as follows:
+
+          ptr reg dead        00RRRDDD    [RRR != 100]
+          ptr reg live        01RRRDDD    [RRR != 100]
+
+      non-ptr arg push        10110DDD                    [SSS == 110]
+          ptr arg push        10SSSDDD                    [SSS != 110] && [SSS != 111]
+          ptr arg pop         11CCCDDD    [CCC != 000] && [CCC != 110] && [CCC != 111]
+      little delta skip       11000DDD    [CCC == 000]
+      bigger delta skip       11110BBB                    [CCC == 110]
+
+      The values used in the encodings are as follows:
+
+        DDD                 code offset delta from previous entry (0-7)
+        BBB                 bigger delta 000=8,001=16,010=24,...,111=64
+        RRR                 register number (EAX=000,ECX=001,EDX=010,EBX=011,
+                              EBP=101,ESI=110,EDI=111), ESP=100 is reserved
+        SSS                 argument offset from base of stack. This is
+                              redundant for frameless methods as we can
+                              infer it from the previous pushes+pops. However,
+                              for EBP-methods, we only report GC pushes, and
+                              so we need SSS
+        CCC                 argument count being popped (includes only ptrs for EBP methods)
+
+      The following are the 'large' versions:
+
+        large delta skip        10111000 [0xB8] , encodeUnsigned(delta)
+
+        large     ptr arg push  11111000 [0xF8] , encodeUnsigned(pushCount)
+        large non-ptr arg push  11111001 [0xF9] , encodeUnsigned(pushCount)
+        large     ptr arg pop   11111100 [0xFC] , encodeUnsigned(popCount)
+        large         arg dead  11111101 [0xFD] , encodeUnsigned(popCount) for caller-pop args.
+                                                    Any GC args go dead after the call,
+                                                    but are still sitting on the stack
+
+        this pointer prefix     10111100 [0xBC]   the next encoding is a ptr live
+                                                    or a ptr arg push
+                                                    and contains the this pointer
+
+        interior or by-ref      10111111 [0xBF]   the next encoding is a ptr live
+             pointer prefix                         or a ptr arg push
+                                                    and contains an interior
+                                                    or by-ref pointer
+
+
+        The value 11111111 [0xFF] indicates the end of the table.
+  */
+
+#if defined(DACCESS_COMPILE)
+    bool fLastByteIsZero = false;
+#endif // DACCESS_COMPILE
+
+    /* Have we reached the instruction we're looking for? */
+
+    while (ptrOffs <= curOffsArgs)
+    {
+        unsigned    val;
+
+        int         isPop;
+        unsigned    argOfs;
+
+        unsigned    regMask;
+
+        // iptrRegs & iptrArgs are subsets of ptrRegs & ptrArgs respectively
+
+        _ASSERTE((iptrRegs & ptrRegs) == iptrRegs);
+        _ASSERTE((iptrArgs & ptrArgs) == iptrArgs);
+
+        /* Now find the next 'life' transition */
+
+        val = *table++;
+#if defined(DACCESS_COMPILE)
+        // In this scenario, a zero byte means that EAX is going dead at the current offset.  Since EAX
+        // can't go dead more than once at any given offset, it's invalid to have two consecutive bytes
+        // of zero.  If this were to happen, then it means that there is most likely some sort of DAC
+        // error, and it may lead to problems such as infinite loops.  So we bail out early instead.
+        if ((val == 0) && fLastByteIsZero)
+        {
+            DacError(CORDBG_E_TARGET_INCONSISTENT);
+            UNREACHABLE();
+        }
+        fLastByteIsZero = (val == 0);
+#endif // DACCESS_COMPILE
+
+        if  (!(val & 0x80))
+        {
+            /* A small 'regPtr' encoding */
+
+            regNum       reg;
+
+            ptrOffs += (val     ) & 0x7;
+            if (ptrOffs > curOffsArgs) {
+                iptr = isThis = false;
+                goto REPORT_REFS;
+            }
+            else if (ptrOffs > curOffsRegs) {
+                iptr = isThis = false;
+                continue;
+            }
+
+            reg     = (regNum)((val >> 3) & 0x7);
+            regMask = 1 << reg;         // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
+
+#if 0
+            printf("regMask = %04X -> %04X\n", ptrRegs,
+                       (val & 0x40) ? (ptrRegs |  regMask)
+                                    : (ptrRegs & ~regMask));
+#endif
+
+            /* The register is becoming live/dead here */
+
+            if  (val & 0x40)
+            {
+                /* Becomes Live */
+                _ASSERTE((ptrRegs  &  regMask) == 0);
+
+                ptrRegs |=  regMask;
+
+                if  (isThis)
+                {
+                    thisPtrReg = reg;
+                }
+                if  (iptr)
+                {
+                    iptrRegs |= regMask;
+                }
+            }
+            else
+            {
+                /* Becomes Dead */
+                _ASSERTE((ptrRegs  &  regMask) != 0);
+
+                ptrRegs &= ~regMask;
+
+                if  (reg == thisPtrReg)
+                {
+                    thisPtrReg = REGI_NA;
+                }
+                if  (iptrRegs & regMask)
+                {
+                    iptrRegs &= ~regMask;
+                }
+            }
+            iptr = isThis = false;
+            continue;
+        }
+
+        /* This is probably an argument push/pop */
+
+        argOfs = (val & 0x38) >> 3;
+
+        /* 6 [110] and 7 [111] are reserved for other encodings */
+        if  (argOfs < 6)
+        {
+
+            /* A small argument encoding */
+
+            ptrOffs += (val & 0x07);
+            if (ptrOffs > curOffsArgs) {
+                iptr = isThis = false;
+                goto REPORT_REFS;
+            }
+            isPop    = (val & 0x40);
+
+        ARG:
+
+            if  (isPop)
+            {
+                if (argOfs == 0)
+                    continue;           // little skip encoding
+
+                /* We remove (pop) the top 'argOfs' entries */
+
+                _ASSERTE(argOfs || argOfs <= argCnt);
+
+                /* adjust # of arguments */
+
+                argCnt -= argOfs;
+                _ASSERTE(argCnt < MAX_PTRARG_OFS);
+
+//              printf("[%04X] popping %u args: mask = %04X\n", ptrOffs, argOfs, (int)ptrArgs);
+
+                do
+                {
+                    _ASSERTE(!isZero(argHigh));
+
+                    /* Do we have an argument bit that's on? */
+
+                    if  (intersect(ptrArgs, argHigh))
+                    {
+                        /* Turn off the bit */
+
+                        setDiff(ptrArgs, argHigh);
+                        setDiff(iptrArgs, argHigh);
+
+                        /* We've removed one more argument bit */
+
+                        argOfs--;
+                    }
+                    else if (hasPartialArgInfo)
+                        argCnt--;
+                    else /* full arg info && not a ref */
+                        argOfs--;
+
+                    /* Continue with the next lower bit */
+
+                    argHigh >>= 1;
+                }
+                while (argOfs);
+
+                _ASSERTE(!hasPartialArgInfo    ||
+                         isZero(argHigh)       ||
+                        (argHigh == CONSTRUCT_ptrArgTP(1, (argCnt-1))));
+
+                if (hasPartialArgInfo)
+                {
+                    // We always leave argHigh pointing to the next ptr arg.
+                    // So, while argHigh is non-zero, and not a ptrArg, we shift right (and subtract
+                    // one arg from our argCnt) until it is a ptrArg.
+                    while (!intersect(argHigh, ptrArgs) && (!isZero(argHigh)))
+                    {
+                        argHigh >>= 1;
+                        argCnt--;
+                    }
+                }
+
+            }
+            else
+            {
+                /* Add a new ptr arg entry at stack offset 'argOfs' */
+
+                if  (argOfs >= MAX_PTRARG_OFS)
+                {
+                    _ASSERTE_ALL_BUILDS(!"scanArgRegTableI: args pushed 'too deep'");
+                }
+                else
+                {
+                    /* Full arg info reports all pushes, and thus
+                       argOffs has to be consistent with argCnt */
+
+                    _ASSERTE(hasPartialArgInfo || argCnt == argOfs);
+
+                    /* store arg count */
+
+                    argCnt  = argOfs + 1;
+                    _ASSERTE((argCnt < MAX_PTRARG_OFS));
+
+                    /* Compute the appropriate argument offset bit */
+
+                    ptrArgTP argMask = CONSTRUCT_ptrArgTP(1, argOfs);
+
+//                  printf("push arg at offset %02u --> mask = %04X\n", argOfs, (int)argMask);
+
+                    /* We should never push twice at the same offset */
+
+                    _ASSERTE(!intersect( ptrArgs, argMask));
+                    _ASSERTE(!intersect(iptrArgs, argMask));
+
+                    /* We should never push within the current highest offset */
+
+                    // _ASSERTE(argHigh < argMask);
+
+                    /* This is now the highest bit we've set */
+
+                    argHigh = argMask;
+
+                    /* Set the appropriate bit in the argument mask */
+
+                    ptrArgs |= argMask;
+
+                    if (iptr)
+                        iptrArgs |= argMask;
+                }
+
+                iptr = isThis = false;
+            }
+            continue;
+        }
+        else if (argOfs == 6)
+        {
+            if (val & 0x40) {
+                /* Bigger delta  000=8,001=16,010=24,...,111=64 */
+                ptrOffs += (((val & 0x07) + 1) << 3);
+            }
+            else {
+                /* non-ptr arg push */
+                _ASSERTE(!hasPartialArgInfo);
+                ptrOffs += (val & 0x07);
+                if (ptrOffs > curOffsArgs) {
+                    iptr = isThis = false;
+                    goto REPORT_REFS;
+                }
+                argHigh = CONSTRUCT_ptrArgTP(1, argCnt);
+                argCnt++;
+                _ASSERTE(argCnt < MAX_PTRARG_OFS);
+            }
+            continue;
+        }
+
+        /* argOfs was 7 [111] which is reserved for the larger encodings */
+
+        _ASSERTE(argOfs==7);
+
+        switch (val)
+        {
+        case 0xFF:
+            iptr = isThis = false;
+            goto REPORT_REFS;   // the method might loop !!!
+
+        case 0xB8:
+            val = fastDecodeUnsigned(table);
+            ptrOffs += val;
+            continue;
+
+        case 0xBC:
+            isThis = true;
+            break;
+
+        case 0xBF:
+            iptr = true;
+            break;
+
+        case 0xF8:
+        case 0xFC:
+            isPop  = val & 0x04;
+            argOfs = fastDecodeUnsigned(table);
+            goto ARG;
+
+        case 0xFD: {
+            argOfs  = fastDecodeUnsigned(table);
+            _ASSERTE(argOfs && argOfs <= argCnt);
+
+            // Kill the top "argOfs" pointers.
+
+            ptrArgTP argMask;
+            for(argMask = CONSTRUCT_ptrArgTP(1, argCnt); (argOfs != 0); argMask >>= 1)
+            {
+                _ASSERTE(!isZero(argMask) && !isZero(ptrArgs)); // there should be remaining pointers
+
+                if (intersect(ptrArgs, argMask))
+                {
+                    setDiff(ptrArgs, argMask);
+                    setDiff(iptrArgs, argMask);
+                    argOfs--;
+                }
+            }
+
+            // For partial arg info, need to find the next highest pointer for argHigh
+
+            if (hasPartialArgInfo)
+            {
+                for(argHigh = ptrArgTP(0); !isZero(argMask); argMask >>= 1)
+                {
+                    if (intersect(ptrArgs, argMask)) {
+                        argHigh = argMask;
+                        break;
+                    }
+                }
+            }
+            } break;
+
+        case 0xF9:
+            argOfs = fastDecodeUnsigned(table);
+            argCnt  += argOfs;
+            break;
+
+        default:
+            _ASSERTE(!"Unexpected special code %04X");
+        }
+    }
+
+    /* Report all live pointer registers */
+REPORT_REFS:
+
+    _ASSERTE((iptrRegs & ptrRegs) == iptrRegs); // iptrRegs is a subset of ptrRegs
+    _ASSERTE((iptrArgs & ptrArgs) == iptrArgs); // iptrArgs is a subset of ptrArgs
+
+    /* Save the current live register, argument set, and argCnt */
+
+    info->regMaskResult  = convertAllRegsMask(ptrRegs);
+    info->argMaskResult  = ptrArgs;
+    info->argHnumResult  = 0;
+    info->iregMaskResult = convertAllRegsMask(iptrRegs);
+    info->iargMaskResult = iptrArgs;
+
+    info->thisPtrResult  = thisPtrReg;
+    _ASSERTE(thisPtrReg == REGI_NA || (regNumToMask(thisPtrReg) & info->regMaskResult));
+
+    if (hasPartialArgInfo)
+    {
+        return 0;
+    }
+    else
+    {
+        _ASSERTE(int(argCnt) < INT_MAX); // check that it did not underflow
+        return (argCnt * sizeof(unsigned));
+    }
+}
+
+/*****************************************************************************/
+
+unsigned GetPushedArgSize(hdrInfo * info, PTR_CBYTE table, DWORD curOffs)
+{
+    SUPPORTS_DAC;
+
+    unsigned sz;
+
+    if  (info->interruptible)
+    {
+        sz = scanArgRegTableI(skipToArgReg(*info, table),
+                              curOffs,
+                              curOffs,
+                              info);
+    }
+    else
+    {
+        sz = scanArgRegTable(skipToArgReg(*info, table),
+                             curOffs,
+                             info);
+    }
+
+    return sz;
+}
+
+/*****************************************************************************/
+
+inline
+void    TRASH_CALLEE_UNSAVED_REGS(PREGDISPLAY pContext)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+#ifdef _DEBUG
+    /* This is not completely correct as we lose the current value, but
+       it should not really be useful to anyone. */
+    static DWORD s_badData = 0xDEADBEEF;
+    pContext->SetEaxLocation(&s_badData);
+    pContext->SetEcxLocation(&s_badData);
+    pContext->SetEdxLocation(&s_badData);
+#endif //_DEBUG
+}
+
+/*****************************************************************************
+ *  Sizes of certain i386 instructions which are used in the prolog/epilog
+ */
+
+// Can we use sign-extended byte to encode the imm value, or do we need a dword
+#define CAN_COMPRESS(val)       ((INT8)(val) == (INT32)(val))
+
+#define SZ_ADD_REG(val)         ( 2 +  (CAN_COMPRESS(val) ? 1 : 4))
+#define SZ_AND_REG(val)         SZ_ADD_REG(val)
+#define SZ_POP_REG              1
+#define SZ_LEA(offset)          SZ_ADD_REG(offset)
+#define SZ_MOV_REG_REG          2
+
+bool IsMarkerInstr(BYTE val)
+{
+    SUPPORTS_DAC;
+
+#ifdef _DEBUG
+    if (val == X86_INSTR_INT3)
+    {
+        return true;
+    }
+#ifdef HAVE_GCCOVER
+    else // GcCover might have stomped on the instruction
+    {
+        if (GCStress<cfg_any>::IsEnabled())
+        {
+            if (IsGcCoverageInterruptInstructionVal(val))
+            {
+                return true;
+            }
+        }
+    }
+#endif // HAVE_GCCOVER
+#endif // _DEBUG
+
+    return false;
+}
+
+/* Check if the given instruction opcode is the one we expect.
+   This is a "necessary" but not "sufficient" check as it ignores the check
+   if the instruction is one of our special markers (for debugging and GcStress) */
+
+bool CheckInstrByte(BYTE val, BYTE expectedValue)
+{
+    SUPPORTS_DAC;
+    return ((val == expectedValue) || IsMarkerInstr(val));
+}
+
+/* Similar to CheckInstrByte(). Use this to check a masked opcode (ignoring
+   optional bits in the opcode encoding).
+   valPattern is the masked out value.
+   expectedPattern is the mask value we expect.
+   val is the actual instruction opcode
+ */
+bool CheckInstrBytePattern(BYTE valPattern, BYTE expectedPattern, BYTE val)
+{
+    SUPPORTS_DAC;
+
+    _ASSERTE((valPattern & val) == valPattern);
+
+    return ((valPattern == expectedPattern) || IsMarkerInstr(val));
+}
+
+/* Similar to CheckInstrByte() */
+
+bool CheckInstrWord(WORD val, WORD expectedValue)
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+
+    return ((val == expectedValue) || IsMarkerInstr(val & 0xFF));
+}
+
+// Use this to check if the instruction at offset "walkOffset" has already
+// been executed
+// "actualHaltOffset" is the offset when the code was suspended
+// It is assumed that there is linear control flow from offset 0 to "actualHaltOffset".
+//
+// This has been factored out just so that the intent of the comparison
+// is clear (compared to the opposite intent)
+
+bool InstructionAlreadyExecuted(unsigned walkOffset, unsigned actualHaltOffset)
+{
+    SUPPORTS_DAC;
+    return (walkOffset < actualHaltOffset);
+}
+
+// skips past a "arith REG, IMM"
+inline unsigned SKIP_ARITH_REG(int val, PTR_CBYTE base, unsigned offset)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    unsigned delta = 0;
+    if (val != 0)
+    {
+#ifdef _DEBUG
+        // Confirm that arith instruction is at the correct place
+        _ASSERTE(CheckInstrBytePattern(base[offset  ] & 0xFD, 0x81, base[offset]) &&
+                 CheckInstrBytePattern(base[offset+1] & 0xC0, 0xC0, base[offset+1]));
+        // only use DWORD form if needed
+        _ASSERTE(((base[offset] & 2) != 0) == CAN_COMPRESS(val) ||
+                 IsMarkerInstr(base[offset]));
+#endif
+        delta = 2 + (CAN_COMPRESS(val) ? 1 : 4);
+    }
+    return(offset + delta);
+}
+
+inline unsigned SKIP_PUSH_REG(PTR_CBYTE base, unsigned offset)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    // Confirm it is a push instruction
+    _ASSERTE(CheckInstrBytePattern(base[offset] & 0xF8, 0x50, base[offset]));
+    return(offset + 1);
+}
+
+inline unsigned SKIP_POP_REG(PTR_CBYTE base, unsigned offset)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    // Confirm it is a pop instruction
+    _ASSERTE(CheckInstrBytePattern(base[offset] & 0xF8, 0x58, base[offset]));
+    return(offset + 1);
+}
+
+inline unsigned SKIP_MOV_REG_REG(PTR_CBYTE base, unsigned offset)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    // Confirm it is a move instruction
+    // Note that only the first byte may have been stomped on by IsMarkerInstr()
+    // So we can check the second byte directly
+    _ASSERTE(CheckInstrBytePattern(base[offset] & 0xFD, 0x89, base[offset]) &&
+             (base[offset+1] & 0xC0) == 0xC0);
+    return(offset + 2);
+}
+
+inline unsigned SKIP_LEA_ESP_EBP(int val, PTR_CBYTE base, unsigned offset)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+#ifdef _DEBUG
+    // Confirm it is the right instruction
+    // Note that only the first byte may have been stomped on by IsMarkerInstr()
+    // So we can check the second byte directly
+    WORD wOpcode = *(PTR_WORD)base;
+    _ASSERTE((CheckInstrWord(wOpcode, X86_INSTR_w_LEA_ESP_EBP_BYTE_OFFSET) &&
+              (val == *(PTR_SBYTE)(base+2)) &&
+              CAN_COMPRESS(val)) ||
+             (CheckInstrWord(wOpcode, X86_INSTR_w_LEA_ESP_EBP_DWORD_OFFSET) &&
+              (val == *(PTR_INT32)(base+2)) &&
+              !CAN_COMPRESS(val)));
+#endif
+
+    unsigned delta = 2 + (CAN_COMPRESS(val) ? 1 : 4);
+    return(offset + delta);
+}
+
+inline unsigned SKIP_LEA_EAX_ESP(int val, PTR_CBYTE base, unsigned offset)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+#ifdef _DEBUG
+    WORD wOpcode = *(PTR_WORD)(base + offset);
+    if (CheckInstrWord(wOpcode, X86_INSTR_w_LEA_EAX_ESP_BYTE_OFFSET))
+    {
+        _ASSERTE(val == *(PTR_SBYTE)(base + offset + 3));
+        _ASSERTE(CAN_COMPRESS(val));
+    }
+    else
+    {
+        _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_w_LEA_EAX_ESP_DWORD_OFFSET));
+        _ASSERTE(val == *(PTR_INT32)(base + offset + 3));
+        _ASSERTE(!CAN_COMPRESS(val));
+    }
+#endif
+
+    unsigned delta = 3 + (CAN_COMPRESS(-val) ? 1 : 4);
+    return(offset + delta);
+}
+
+inline unsigned SKIP_HELPER_CALL(PTR_CBYTE base, unsigned offset)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    unsigned delta;
+
+    if (CheckInstrByte(base[offset], X86_INSTR_CALL_REL32))
+    {
+        delta = 5;
+    }
+    else
+    {
+#ifdef _DEBUG
+        WORD wOpcode = *(PTR_WORD)(base+offset);
+        _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_W_CALL_IND_IMM));
+#endif
+        delta = 6;
+    }
+
+    return(offset+delta);
+}
+
+unsigned SKIP_ALLOC_FRAME(int size, PTR_CBYTE base, unsigned offset)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SUPPORTS_DAC;
+    } CONTRACTL_END;
+
+    _ASSERTE(size != 0);
+
+    if (size == sizeof(void*))
+    {
+        // JIT emits "push eax" instead of "sub esp,4"
+        return SKIP_PUSH_REG(base, offset);
+    }
+
+    const int STACK_PROBE_PAGE_SIZE_BYTES = 4096;
+    const int STACK_PROBE_BOUNDARY_THRESHOLD_BYTES = 1024;
+
+    int lastProbedLocToFinalSp = size;
+
+    if (size < STACK_PROBE_PAGE_SIZE_BYTES)
+    {
+        // sub esp, size
+        offset = SKIP_ARITH_REG(size, base, offset);
+    }
+    else
+    {
+        WORD wOpcode = *(PTR_WORD)(base + offset);
+
+        if (CheckInstrWord(wOpcode, X86_INSTR_w_TEST_ESP_DWORD_OFFSET_EAX))
+        {
+            // In .NET 5.0 and earlier for frames that have size smaller than 0x3000 bytes
+            // JIT emits one or two 'test eax, [esp-dwOffset]' instructions before adjusting the stack pointer.
+            _ASSERTE(size < 0x3000);
+
+            // test eax, [esp-0x1000]
+            offset += 7;
+            lastProbedLocToFinalSp -= 0x1000;
+
+            if (size >= 0x2000)
+            {
+#ifdef _DEBUG
+                wOpcode = *(PTR_WORD)(base + offset);
+                _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_w_TEST_ESP_DWORD_OFFSET_EAX));
+#endif
+                //test eax, [esp-0x2000]
+                offset += 7;
+                lastProbedLocToFinalSp -= 0x1000;
+            }
+
+            // sub esp, size
+            offset = SKIP_ARITH_REG(size, base, offset);
+        }
+        else
+        {
+            bool pushedStubParam = false;
+
+            if (CheckInstrByte(base[offset], X86_INSTR_PUSH_EAX))
+            {
+                // push eax
+                offset = SKIP_PUSH_REG(base, offset);
+                pushedStubParam = true;
+            }
+
+            if (CheckInstrByte(base[offset], X86_INSTR_XOR))
+            {
+                // In .NET Core 3.1 and earlier for frames that have size greater than or equal to 0x3000 bytes
+                // JIT emits the following loop.
+                _ASSERTE(size >= 0x3000);
+
+                offset += 2;
+                //      xor eax, eax                2
+                //      [nop]                       0-3
+                // loop:
+                //      test [esp + eax], eax       3
+                //      sub eax, 0x1000             5
+                //      cmp eax, -size              5
+                //      jge loop                    2
+
+                // R2R images that support ReJIT may have extra nops we need to skip over.
+                while (offset < 5)
+                {
+                    if (CheckInstrByte(base[offset], X86_INSTR_NOP))
+                    {
+                        offset++;
+                    }
+                    else
+                    {
+                        break;
+                    }
+                }
+
+                offset += 15;
+
+                if (pushedStubParam)
+                {
+                    // pop eax
+                    offset = SKIP_POP_REG(base, offset);
+                }
+
+                // sub esp, size
+                return SKIP_ARITH_REG(size, base, offset);
+            }
+            else
+            {
+                // In .NET 5.0 and later JIT emits a call to JIT_StackProbe helper.
+
+                if (pushedStubParam)
+                {
+                    // lea eax, [esp-size+4]
+                    offset = SKIP_LEA_EAX_ESP(-size + 4, base, offset);
+                    // call JIT_StackProbe
+                    offset = SKIP_HELPER_CALL(base, offset);
+                    // pop eax
+                    offset = SKIP_POP_REG(base, offset);
+                    // sub esp, size
+                    return SKIP_ARITH_REG(size, base, offset);
+                }
+                else
+                {
+                    // lea eax, [esp-size]
+                    offset = SKIP_LEA_EAX_ESP(-size, base, offset);
+                    // call JIT_StackProbe
+                    offset = SKIP_HELPER_CALL(base, offset);
+                    // mov esp, eax
+                    return SKIP_MOV_REG_REG(base, offset);
+                }
+            }
+        }
+    }
+
+    if (lastProbedLocToFinalSp + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > STACK_PROBE_PAGE_SIZE_BYTES)
+    {
+#ifdef _DEBUG
+        WORD wOpcode = *(PTR_WORD)(base + offset);
+        _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_w_TEST_ESP_EAX));
+#endif
+        // test [esp], eax
+        offset += 3;
+    }
+
+    return offset;
+}
+
+/*****************************************************************************/
+
+const RegMask CALLEE_SAVED_REGISTERS_MASK[] =
+{
+    RM_EDI, // first register to be pushed
+    RM_ESI,
+    RM_EBX,
+    RM_EBP  // last register to be pushed
+};
+
+static void SetLocation(PREGDISPLAY pRD, int ind, PDWORD loc)
+{
+#if defined(FEATURE_NATIVEAOT)
+    static const SIZE_T OFFSET_OF_CALLEE_SAVED_REGISTERS[] =
+    {
+        offsetof(REGDISPLAY, pRdi), // first register to be pushed
+        offsetof(REGDISPLAY, pRsi),
+        offsetof(REGDISPLAY, pRbx),
+        offsetof(REGDISPLAY, pRbp), // last register to be pushed
+    };
+
+    SIZE_T offsetOfRegPtr = OFFSET_OF_CALLEE_SAVED_REGISTERS[ind];
+    *(LPVOID*)(PBYTE(pRD) + offsetOfRegPtr) = loc;
+#elif defined(FEATURE_EH_FUNCLETS)
+    static const SIZE_T OFFSET_OF_CALLEE_SAVED_REGISTERS[] =
+    {
+        offsetof(T_KNONVOLATILE_CONTEXT_POINTERS, Edi), // first register to be pushed
+        offsetof(T_KNONVOLATILE_CONTEXT_POINTERS, Esi),
+        offsetof(T_KNONVOLATILE_CONTEXT_POINTERS, Ebx),
+        offsetof(T_KNONVOLATILE_CONTEXT_POINTERS, Ebp), // last register to be pushed
+    };
+
+    SIZE_T offsetOfRegPtr = OFFSET_OF_CALLEE_SAVED_REGISTERS[ind];
+    *(LPVOID*)(PBYTE(pRD->pCurrentContextPointers) + offsetOfRegPtr) = loc;
+#else
+    static const SIZE_T OFFSET_OF_CALLEE_SAVED_REGISTERS[] =
+    {
+        offsetof(REGDISPLAY, pEdi), // first register to be pushed
+        offsetof(REGDISPLAY, pEsi),
+        offsetof(REGDISPLAY, pEbx),
+        offsetof(REGDISPLAY, pEbp), // last register to be pushed
+    };
+
+    SIZE_T offsetOfRegPtr = OFFSET_OF_CALLEE_SAVED_REGISTERS[ind];
+    *(LPVOID*)(PBYTE(pRD) + offsetOfRegPtr) = loc;
+#endif
+}
+
+/*****************************************************************************/
+
+void UnwindEspFrameEpilog(
+        PREGDISPLAY pContext,
+        hdrInfo * info,
+        PTR_CBYTE epilogBase,
+        bool updateAllRegs)
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+
+    _ASSERTE(info->epilogOffs != hdrInfo::NOT_IN_EPILOG);
+    _ASSERTE(!info->ebpFrame && !info->doubleAlign);
+    _ASSERTE(info->epilogOffs > 0);
+
+    int offset = 0;
+    unsigned ESP = pContext->SP;
+
+    if (info->rawStkSize)
+    {
+        if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
+        {
+            /* We have NOT executed the "ADD ESP, FrameSize",
+               so manually adjust stack pointer */
+            ESP += info->rawStkSize;
+        }
+
+        // We have already popped off the frame (excluding the callee-saved registers)
+
+        if (epilogBase[0] == X86_INSTR_POP_ECX)
+        {
+            // We may use "POP ecx" for doing "ADD ESP, 4",
+            // or we may not (in the case of JMP epilogs)
+            _ASSERTE(info->rawStkSize == sizeof(void*));
+            offset = SKIP_POP_REG(epilogBase, offset);
+        }
+        else
+        {
+            // "add esp, rawStkSize"
+            offset = SKIP_ARITH_REG(info->rawStkSize, epilogBase, offset);
+        }
+    }
+
+    /* Remaining callee-saved regs are at ESP. Need to update
+       regsMask as well to exclude registers which have already been popped. */
+
+    const RegMask regsMask = info->savedRegMask;
+
+    /* Increment "offset" in steps to see which callee-saved
+       registers have already been popped */
+
+    for (unsigned i = ARRAY_SIZE(CALLEE_SAVED_REGISTERS_MASK); i > 0; i--)
+    {
+        RegMask regMask = CALLEE_SAVED_REGISTERS_MASK[i - 1];
+
+        if (!(regMask & regsMask))
+            continue;
+
+        if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
+        {
+            /* We have NOT yet popped off the register.
+               Get the value from the stack if needed */
+            if (updateAllRegs || (regMask == RM_EBP))
+            {
+                SetLocation(pContext, i - 1, PTR_DWORD((TADDR)ESP));
+            }
+
+            /* Adjust ESP */
+            ESP += sizeof(void*);
+        }
+
+        offset = SKIP_POP_REG(epilogBase, offset);
+    }
+
+    //CEE_JMP generates an epilog similar to a normal CEE_RET epilog except for the last instruction
+    _ASSERTE(CheckInstrBytePattern(epilogBase[offset] & X86_INSTR_RET, X86_INSTR_RET, epilogBase[offset]) //ret
+        || CheckInstrBytePattern(epilogBase[offset], X86_INSTR_JMP_NEAR_REL32, epilogBase[offset]) //jmp ret32
+        || CheckInstrWord(*PTR_WORD(epilogBase + offset), X86_INSTR_w_JMP_FAR_IND_IMM)); //jmp [addr32]
+
+    /* Finally we can set pPC */
+    SetRegdisplayPCTAddr(pContext, (TADDR)ESP);
+
+    pContext->SP = ESP;
+}
+
+/*****************************************************************************/
+
+void UnwindEbpDoubleAlignFrameEpilog(
+        PREGDISPLAY pContext,
+        hdrInfo * info,
+        PTR_CBYTE epilogBase,
+        bool updateAllRegs)
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+
+    _ASSERTE(info->epilogOffs != hdrInfo::NOT_IN_EPILOG);
+    _ASSERTE(info->ebpFrame || info->doubleAlign);
+
+    _ASSERTE(info->argSize < 0x10000); // "ret" only has a 2 byte operand
+
+   /* See how many instructions we have executed in the
+      epilog to determine which callee-saved registers
+      have already been popped */
+    int offset = 0;
+
+    unsigned ESP = pContext->SP;
+
+    bool needMovEspEbp = false;
+
+    if (info->doubleAlign)
+    {
+        // add esp, rawStkSize
+
+        if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
+            ESP += info->rawStkSize;
+        _ASSERTE(info->rawStkSize != 0);
+        offset = SKIP_ARITH_REG(info->rawStkSize, epilogBase, offset);
+
+        // We also need "mov esp, ebp" after popping the callee-saved registers
+        needMovEspEbp = true;
+    }
+    else
+    {
+        bool needLea = false;
+
+        if (info->localloc)
+        {
+            // ESP may be variable if a localloc was actually executed. We will reset it.
+            //    lea esp, [ebp-calleeSavedRegs]
+
+            needLea = true;
+        }
+        else if (info->savedRegsCountExclFP == 0)
+        {
+            // We will just generate "mov esp, ebp" and be done with it.
+
+            if (info->rawStkSize != 0)
+            {
+                needMovEspEbp = true;
+            }
+        }
+        else if  (info->rawStkSize == 0)
+        {
+            // do nothing before popping the callee-saved registers
+        }
+        else if (info->rawStkSize == sizeof(void*))
+        {
+            // "pop ecx" will make ESP point to the callee-saved registers
+            if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
+                ESP += sizeof(void*);
+            offset = SKIP_POP_REG(epilogBase, offset);
+        }
+        else
+        {
+            // We need to make ESP point to the callee-saved registers
+            //    lea esp, [ebp-calleeSavedRegs]
+
+            needLea = true;
+        }
+
+        if (needLea)
+        {
+            // lea esp, [ebp-calleeSavedRegs]
+
+            unsigned calleeSavedRegsSize = info->savedRegsCountExclFP * sizeof(void*);
+
+            if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
+                ESP = GetRegdisplayFP(pContext) - calleeSavedRegsSize;
+
+            offset = SKIP_LEA_ESP_EBP(-int(calleeSavedRegsSize), epilogBase, offset);
+        }
+    }
+
+    for (unsigned i = STRING_LENGTH(CALLEE_SAVED_REGISTERS_MASK); i > 0; i--)
+    {
+        RegMask regMask = CALLEE_SAVED_REGISTERS_MASK[i - 1];
+        _ASSERTE(regMask != RM_EBP);
+
+        if ((info->savedRegMask & regMask) == 0)
+            continue;
+
+        if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
+        {
+            if (updateAllRegs)
+            {
+                SetLocation(pContext, i - 1, PTR_DWORD((TADDR)ESP));
+            }
+            ESP += sizeof(void*);
+        }
+
+        offset = SKIP_POP_REG(epilogBase, offset);
+    }
+
+    if (needMovEspEbp)
+    {
+        if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
+            ESP = GetRegdisplayFP(pContext);
+
+        offset = SKIP_MOV_REG_REG(epilogBase, offset);
+    }
+
+    // Have we executed the pop EBP?
+    if (!InstructionAlreadyExecuted(offset, info->epilogOffs))
+    {
+        pContext->SetEbpLocation(PTR_DWORD(TADDR(ESP)));
+        ESP += sizeof(void*);
+    }
+    offset = SKIP_POP_REG(epilogBase, offset);
+
+    SetRegdisplayPCTAddr(pContext, (TADDR)ESP);
+
+    pContext->SP = ESP;
+}
+
+inline SIZE_T GetStackParameterSize(hdrInfo * info)
+{
+    SUPPORTS_DAC;
+    return (info->varargs ? 0 : info->argSize); // Note varargs is caller-popped
+}
+
+//****************************************************************************
+// This is the value ESP is incremented by on doing a "return"
+
+inline SIZE_T ESPIncrOnReturn(hdrInfo * info)
+{
+    SUPPORTS_DAC;
+    return sizeof(void *) + // pop off the return address
+           GetStackParameterSize(info);
+}
+
+/*****************************************************************************/
+
+void UnwindEpilog(
+        PREGDISPLAY pContext,
+        hdrInfo * info,
+        PTR_CBYTE epilogBase,
+        bool updateAllRegs)
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+    _ASSERTE(info->epilogOffs != hdrInfo::NOT_IN_EPILOG);
+    // _ASSERTE(flags & ActiveStackFrame); // <TODO> Wont work for thread death</TODO>
+    _ASSERTE(info->epilogOffs > 0);
+
+    if  (info->ebpFrame || info->doubleAlign)
+    {
+        UnwindEbpDoubleAlignFrameEpilog(pContext, info, epilogBase, updateAllRegs);
+    }
+    else
+    {
+        UnwindEspFrameEpilog(pContext, info, epilogBase, updateAllRegs);
+    }
+
+#ifdef _DEBUG
+    if (updateAllRegs)
+        TRASH_CALLEE_UNSAVED_REGS(pContext);
+#endif
+
+    /* Now adjust stack pointer */
+
+    pContext->SP += ESPIncrOnReturn(info);
+}
+
+/*****************************************************************************/
+
+void UnwindEspFrameProlog(
+        PREGDISPLAY pContext,
+        hdrInfo * info,
+        PTR_CBYTE methodStart,
+        bool updateAllRegs)
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+
+    /* we are in the middle of the prolog */
+    _ASSERTE(info->prologOffs != hdrInfo::NOT_IN_PROLOG);
+    _ASSERTE(!info->ebpFrame && !info->doubleAlign);
+
+    unsigned offset = 0;
+
+#ifdef _DEBUG
+    // If the first two instructions are 'nop, int3', then  we will
+    // assume that is from a JitHalt operation and skip past it
+    if (methodStart[0] == X86_INSTR_NOP && methodStart[1] == X86_INSTR_INT3)
+    {
+        offset += 2;
+    }
+#endif
+
+    const DWORD curOffs = info->prologOffs;
+    unsigned ESP = pContext->SP;
+
+    // Find out how many callee-saved regs have already been pushed
+
+    unsigned regsMask = RM_NONE;
+    PTR_DWORD savedRegPtr = PTR_DWORD((TADDR)ESP);
+
+    for (unsigned i = 0; i < ARRAY_SIZE(CALLEE_SAVED_REGISTERS_MASK); i++)
+    {
+        RegMask regMask = CALLEE_SAVED_REGISTERS_MASK[i];
+
+        if (!(info->savedRegMask & regMask))
+            continue;
+
+        if (InstructionAlreadyExecuted(offset, curOffs))
+        {
+            ESP += sizeof(void*);
+            regsMask    |= regMask;
+        }
+
+        offset = SKIP_PUSH_REG(methodStart, offset);
+    }
+
+    if (info->rawStkSize)
+    {
+        offset = SKIP_ALLOC_FRAME(info->rawStkSize, methodStart, offset);
+
+        // Note that this assumes that only the last instruction in SKIP_ALLOC_FRAME
+        // actually updates ESP
+        if (InstructionAlreadyExecuted(offset, curOffs + 1))
+        {
+            savedRegPtr += (info->rawStkSize / sizeof(DWORD));
+            ESP += info->rawStkSize;
+        }
+    }
+
+    //
+    // Stack probe checks here
+    //
+
+    // Poison the value, we don't set it properly at the end of the prolog
+#ifdef _DEBUG
+    offset = 0xCCCCCCCC;
+#endif
+
+    // Always restore EBP
+    if (regsMask & RM_EBP)
+        pContext->SetEbpLocation(savedRegPtr++);
+
+    if (updateAllRegs)
+    {
+        if (regsMask & RM_EBX)
+            pContext->SetEbxLocation(savedRegPtr++);
+        if (regsMask & RM_ESI)
+            pContext->SetEsiLocation(savedRegPtr++);
+        if (regsMask & RM_EDI)
+            pContext->SetEdiLocation(savedRegPtr++);
+
+        TRASH_CALLEE_UNSAVED_REGS(pContext);
+    }
+
+#if 0
+// NOTE:
+// THIS IS ONLY TRUE IF PROLOGSIZE DOES NOT INCLUDE REG-VAR INITIALIZATION !!!!
+//
+    /* there is (potentially) only one additional
+       instruction in the prolog, (push ebp)
+       but if we would have been passed that instruction,
+       info->prologOffs would be hdrInfo::NOT_IN_PROLOG!
+    */
+    _ASSERTE(offset == info->prologOffs);
+#endif
+
+    pContext->SP = ESP;
+}
+
+/*****************************************************************************/
+
+void UnwindEspFrame(
+        PREGDISPLAY pContext,
+        hdrInfo * info,
+        PTR_CBYTE table,
+        PTR_CBYTE methodStart,
+        DWORD curOffs,
+        unsigned flags)
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+
+    _ASSERTE(!info->ebpFrame && !info->doubleAlign);
+    _ASSERTE(info->epilogOffs == hdrInfo::NOT_IN_EPILOG);
+
+    unsigned ESP = pContext->SP;
+
+
+    if (info->prologOffs != hdrInfo::NOT_IN_PROLOG)
+    {
+        if (info->prologOffs != 0) // Do nothing for the very start of the method
+        {
+            UnwindEspFrameProlog(pContext, info, methodStart, flags);
+            ESP = pContext->SP;
+        }
+    }
+    else
+    {
+        /* we are past the prolog, ESP has been set above */
+
+        // Are there any arguments pushed on the stack?
+
+        ESP += GetPushedArgSize(info, table, curOffs);
+
+        ESP += info->rawStkSize;
+
+        const RegMask regsMask = info->savedRegMask;
+
+        for (unsigned i = ARRAY_SIZE(CALLEE_SAVED_REGISTERS_MASK); i > 0; i--)
+        {
+            RegMask regMask = CALLEE_SAVED_REGISTERS_MASK[i - 1];
+
+            if ((regMask & regsMask) == 0)
+                continue;
+
+            SetLocation(pContext, i - 1, PTR_DWORD((TADDR)ESP));
+
+            ESP += sizeof(unsigned);
+        }
+    }
+
+    /* we can now set the (address of the) return address */
+
+    SetRegdisplayPCTAddr(pContext, (TADDR)ESP);
+
+    /* Now adjust stack pointer */
+
+    pContext->SP = ESP + ESPIncrOnReturn(info);
+}
+
+
+/*****************************************************************************/
+
+void UnwindEbpDoubleAlignFrameProlog(
+        PREGDISPLAY pContext,
+        hdrInfo * info,
+        PTR_CBYTE methodStart,
+        bool updateAllRegs)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    _ASSERTE(info->prologOffs != hdrInfo::NOT_IN_PROLOG);
+    _ASSERTE(info->ebpFrame || info->doubleAlign);
+
+    DWORD offset = 0;
+
+#ifdef _DEBUG
+    // If the first two instructions are 'nop, int3', then  we will
+    // assume that is from a JitHalt operation and skip past it
+    if (methodStart[0] == X86_INSTR_NOP && methodStart[1] == X86_INSTR_INT3)
+    {
+        offset += 2;
+    }
+#endif
+
+    /* Check for the case where EBP has not been updated yet. */
+
+    const DWORD curOffs = info->prologOffs;
+
+    // If we have still not excecuted "push ebp; mov ebp, esp", then we need to
+    // report the frame relative to ESP
+
+    if (!InstructionAlreadyExecuted(offset + 1, curOffs))
+    {
+        _ASSERTE(CheckInstrByte(methodStart [offset], X86_INSTR_PUSH_EBP) ||
+                 CheckInstrWord(*PTR_WORD(methodStart + offset), X86_INSTR_W_MOV_EBP_ESP) ||
+                 CheckInstrByte(methodStart [offset], X86_INSTR_JMP_NEAR_REL32));   // a rejit jmp-stamp
+
+        /* If we're past the "push ebp", adjust ESP to pop EBP off */
+
+        if  (curOffs == (offset + 1))
+            pContext->SP += sizeof(TADDR);
+
+        /* Stack pointer points to return address */
+
+        SetRegdisplayPCTAddr(pContext, (TADDR)pContext->SP);
+
+        /* EBP and callee-saved registers still have the correct value */
+
+        return;
+    }
+
+    // We are atleast after the "push ebp; mov ebp, esp"
+
+    offset = SKIP_MOV_REG_REG(methodStart,
+                SKIP_PUSH_REG(methodStart, offset));
+
+    /* At this point, EBP has been set up. The caller's ESP and the return value
+       can be determined using EBP. Since we are still in the prolog,
+       we need to know our exact location to determine the callee-saved registers */
+
+    const unsigned curEBP = GetRegdisplayFP(pContext);
+
+    if (updateAllRegs)
+    {
+        PTR_DWORD pSavedRegs = PTR_DWORD((TADDR)curEBP);
+
+        /* make sure that we align ESP just like the method's prolog did */
+        if  (info->doubleAlign)
+        {
+            // "and esp,-8"
+            offset = SKIP_ARITH_REG(-8, methodStart, offset);
+            if (curEBP & 0x04)
+            {
+                pSavedRegs--;
+#ifdef _DEBUG
+                if (dspPtr) printf("EnumRef: dblalign ebp: %08X\n", curEBP);
+#endif
+            }
+        }
+
+        /* Increment "offset" in steps to see which callee-saved
+           registers have been pushed already */
+
+        for (unsigned i = 0; i < STRING_LENGTH(CALLEE_SAVED_REGISTERS_MASK); i++)
+        {
+            RegMask regMask = CALLEE_SAVED_REGISTERS_MASK[i];
+            _ASSERTE(regMask != RM_EBP);
+
+            if ((info->savedRegMask & regMask) == 0)
+                continue;
+
+            if (InstructionAlreadyExecuted(offset, curOffs))
+            {
+                SetLocation(pContext, i, PTR_DWORD(--pSavedRegs));
+            }
+
+            // "push reg"
+            offset = SKIP_PUSH_REG(methodStart, offset) ;
+        }
+
+        TRASH_CALLEE_UNSAVED_REGS(pContext);
+    }
+
+    /* The caller's saved EBP is pointed to by our EBP */
+
+    pContext->SetEbpLocation(PTR_DWORD((TADDR)curEBP));
+    pContext->SP = DWORD((TADDR)(curEBP + sizeof(void *)));
+
+    /* Stack pointer points to return address */
+
+    SetRegdisplayPCTAddr(pContext, (TADDR)pContext->SP);
+}
+
+/*****************************************************************************/
+
+bool UnwindEbpDoubleAlignFrame(
+        PREGDISPLAY     pContext,
+        hdrInfo        *info,
+        PTR_CBYTE       table,
+        PTR_CBYTE       methodStart,
+        DWORD           curOffs,
+        IN_EH_FUNCLETS_COMMA(PTR_CBYTE       funcletStart)
+        IN_EH_FUNCLETS_COMMA(bool            isFunclet)
+        bool            updateAllRegs)
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+
+    _ASSERTE(info->ebpFrame || info->doubleAlign);
+
+    const unsigned curESP = pContext->SP;
+    const unsigned curEBP = GetRegdisplayFP(pContext);
+
+    /* First check if we are in a filter (which is obviously after the prolog) */
+
+    if (info->handlers && info->prologOffs == hdrInfo::NOT_IN_PROLOG)
+    {
+        TADDR baseSP;
+
+#ifdef FEATURE_EH_FUNCLETS
+        // Funclets' frame pointers(EBP) are always restored so they can access to main function's local variables.
+        // Therefore the value of EBP is invalid for unwinder so we should use ESP instead.
+        // TODO If funclet frame layout is changed from CodeGen::genFuncletProlog() and genFuncletEpilog(),
+        //      we need to change here accordingly. It is likely to have changes when introducing PSPSym.
+        // TODO Currently we assume that ESP of funclet frames is always fixed but actually it could change.
+        if (isFunclet)
+        {
+            baseSP = curESP;
+            // Set baseSP as initial SP
+            baseSP += GetPushedArgSize(info, table, curOffs);
+
+#ifdef UNIX_X86_ABI
+            // 16-byte stack alignment padding (allocated in genFuncletProlog)
+            // Current funclet frame layout (see CodeGen::genFuncletProlog() and genFuncletEpilog()):
+            //   prolog: sub esp, 12
+            //   epilog: add esp, 12
+            //           ret
+            // SP alignment padding should be added for all instructions except the first one and the last one.
+            // Epilog may not exist (unreachable), so we need to check the instruction code.
+            if (funcletStart != methodStart + curOffs && methodStart[curOffs] != X86_INSTR_RETN)
+                baseSP += 12;
+#endif
+
+            SetRegdisplayPCTAddr(pContext, (TADDR)baseSP);
+
+            pContext->SP = (DWORD)(baseSP + sizeof(TADDR));
+
+            return true;
+        }
+#else // FEATURE_EH_FUNCLETS
+
+        FrameType frameType = GetHandlerFrameInfo(info, curEBP,
+                                                  curESP, (DWORD) IGNORE_VAL,
+                                                  &baseSP);
+
+        /* If we are in a filter, we only need to unwind the funclet stack.
+           For catches/finallies, the normal handling will
+           cause the frame to be unwound all the way up to ebp skipping
+           other frames above it. This is OK, as those frames will be
+           dead. Also, the EE will detect that this has happened and it
+           will handle any EE frames correctly.
+         */
+
+        if (frameType == FR_INVALID)
+        {
+            return false;
+        }
+
+        if (frameType == FR_FILTER)
+        {
+            SetRegdisplayPCTAddr(pContext, (TADDR)baseSP);
+
+            pContext->SP = (DWORD)(baseSP + sizeof(TADDR));
+
+         // pContext->pEbp = same as before;
+
+#ifdef _DEBUG
+            /* The filter has to be called by the VM. So we dont need to
+               update callee-saved registers.
+             */
+
+            if (updateAllRegs)
+            {
+                static DWORD s_badData = 0xDEADBEEF;
+
+                pContext->SetEaxLocation(&s_badData);
+                pContext->SetEcxLocation(&s_badData);
+                pContext->SetEdxLocation(&s_badData);
+
+                pContext->SetEbxLocation(&s_badData);
+                pContext->SetEsiLocation(&s_badData);
+                pContext->SetEdiLocation(&s_badData);
+            }
+#endif
+
+            return true;
+        }
+#endif // !FEATURE_EH_FUNCLETS
+    }
+
+    //
+    // Prolog of an EBP method
+    //
+
+    if (info->prologOffs != hdrInfo::NOT_IN_PROLOG)
+    {
+        UnwindEbpDoubleAlignFrameProlog(pContext, info, methodStart, updateAllRegs);
+
+        /* Now adjust stack pointer. */
+
+        pContext->SP += ESPIncrOnReturn(info);
+        return true;
+    }
+
+    if (updateAllRegs)
+    {
+        // Get to the first callee-saved register
+        PTR_DWORD pSavedRegs = PTR_DWORD((TADDR)curEBP);
+
+        if (info->doubleAlign && (curEBP & 0x04))
+            pSavedRegs--;
+
+        for (unsigned i = 0; i < STRING_LENGTH(CALLEE_SAVED_REGISTERS_MASK); i++)
+        {
+            RegMask regMask = CALLEE_SAVED_REGISTERS_MASK[i];
+            if ((info->savedRegMask & regMask) == 0)
+                continue;
+
+            SetLocation(pContext, i, --pSavedRegs);
+        }
+    }
+
+    /* The caller's ESP will be equal to EBP + retAddrSize + argSize. */
+
+    pContext->SP = (DWORD)(curEBP + sizeof(curEBP) + ESPIncrOnReturn(info));
+
+    /* The caller's saved EIP is right after our EBP */
+
+    SetRegdisplayPCTAddr(pContext, (TADDR)curEBP + RETURN_ADDR_OFFS * sizeof(TADDR));
+
+    /* The caller's saved EBP is pointed to by our EBP */
+
+    pContext->SetEbpLocation(PTR_DWORD((TADDR)curEBP));
+    return true;
+}
+
+bool UnwindStackFrameX86(PREGDISPLAY     pContext,
+                         PTR_CBYTE       methodStart,
+                         DWORD           curOffs,
+                         hdrInfo *       info,
+                         PTR_CBYTE       table,
+                         IN_EH_FUNCLETS_COMMA(PTR_CBYTE       funcletStart)
+                         IN_EH_FUNCLETS_COMMA(bool            isFunclet)
+                         bool            updateAllRegs)
+{
+    if  (info->epilogOffs != hdrInfo::NOT_IN_EPILOG)
+    {
+        /*---------------------------------------------------------------------
+         *  First, handle the epilog
+         */
+
+        PTR_CBYTE epilogBase = methodStart + (curOffs - info->epilogOffs);
+        UnwindEpilog(pContext, info, epilogBase, updateAllRegs);
+    }
+    else if (!info->ebpFrame && !info->doubleAlign)
+    {
+        /*---------------------------------------------------------------------
+         *  Now handle ESP frames
+         */
+
+        UnwindEspFrame(pContext, info, table, methodStart, curOffs, updateAllRegs);
+        return true;
+    }
+    else
+    {
+        /*---------------------------------------------------------------------
+         *  Now we know that have an EBP frame
+         */
+
+        if (!UnwindEbpDoubleAlignFrame(pContext,
+                                       info,
+                                       table,
+                                       methodStart,
+                                       curOffs,
+                                       IN_EH_FUNCLETS_COMMA(funcletStart)
+                                       IN_EH_FUNCLETS_COMMA(isFunclet)
+                                       updateAllRegs))
+            return false;
+    }
+
+    // TODO [DAVBR]: For the full fix for VsWhidbey 450273, all the below
+    // may be uncommented once isLegalManagedCodeCaller works properly
+    // with non-return address inputs, and with non-DEBUG builds
+    /*
+    // Ensure isLegalManagedCodeCaller succeeds for speculative stackwalks.
+    // (We just assert this below for non-speculative stackwalks.)
+    //
+    FAIL_IF_SPECULATIVE_WALK(isLegalManagedCodeCaller(GetControlPC(pContext)));
+    */
+
+    return true;
+}
+
+bool EnumGcRefsX86(PREGDISPLAY     pContext,
+                   PTR_CBYTE       methodStart,
+                   DWORD           curOffs,
+                   GCInfoToken     gcInfoToken,
+                   IN_EH_FUNCLETS_COMMA(PTR_CBYTE       funcletStart)
+                   IN_EH_FUNCLETS_COMMA(bool            isFunclet)
+                   IN_EH_FUNCLETS_COMMA(bool            isFilterFunclet)
+                   unsigned        flags,
+                   GCEnumCallback  pCallBack,
+                   LPVOID          hCallBack)
+{
+#ifdef FEATURE_EH_FUNCLETS
+    if (flags & ParentOfFuncletStackFrame)
+    {
+        LOG((LF_GCROOTS, LL_INFO100000, "Not reporting this frame because it was already reported via another funclet.\n"));
+        return true;
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+    unsigned  EBP     = GetRegdisplayFP(pContext);
+    unsigned  ESP     = pContext->SP;
+
+    unsigned  ptrOffs;
+
+    unsigned  count;
+
+    hdrInfo   info;
+    PTR_CBYTE table = PTR_CBYTE(gcInfoToken.Info);
+#if 0
+    printf("EECodeManager::EnumGcRefs - EIP = %08x ESP = %08x  offset = %x  GC Info is at %08x\n", *pContext->pPC, ESP, curOffs, table);
+#endif
+
+
+    /* Extract the necessary information from the info block header */
+
+    table += DecodeGCHdrInfo(gcInfoToken,
+                             curOffs,
+                             &info);
+
+    _ASSERTE( curOffs <= info.methodSize);
+
+#ifdef  _DEBUG
+//    if ((gcInfoToken.Info == (void*)0x37760d0) && (curOffs == 0x264))
+//        __asm int 3;
+
+    if (trEnumGCRefs) {
+        static unsigned lastESP = 0;
+        unsigned        diffESP = ESP - lastESP;
+        if (diffESP > 0xFFFF) {
+            printf("------------------------------------------------------\n");
+        }
+        lastESP = ESP;
+        printf("EnumGCRefs [%s][%s] at %s.%s + 0x%03X:\n",
+               info.ebpFrame?"ebp":"   ",
+               info.interruptible?"int":"   ",
+               "UnknownClass","UnknownMethod", curOffs);
+        fflush(stdout);
+    }
+#endif
+
+    /* Are we in the prolog or epilog of the method? */
+
+    if (info.prologOffs != hdrInfo::NOT_IN_PROLOG ||
+        info.epilogOffs != hdrInfo::NOT_IN_EPILOG)
+    {
+
+#if !DUMP_PTR_REFS
+        // Under normal circumstances the system will not suspend a thread
+        // if it is in the prolog or epilog of the function.   However ThreadAbort
+        // exception or stack overflows can cause EH to happen in a prolog.
+        // Once in the handler, a GC can happen, so we can get to this code path.
+        // However since we are tearing down this frame, we don't need to report
+        // anything and we can simply return.
+
+        _ASSERTE(flags & ExecutionAborted);
+#endif
+        return true;
+    }
+
+#ifdef _DEBUG
+#define CHK_AND_REPORT_REG(reg, doIt, iptr, regName)                    \
+        if  (doIt)                                                      \
+        {                                                               \
+            if (dspPtr)                                                 \
+                printf("    Live pointer register %s: ", #regName);     \
+                pCallBack(hCallBack,                                    \
+                          (OBJECTREF*)(pContext->Get##regName##Location()), \
+                          (iptr ? GC_CALL_INTERIOR : 0)                 \
+                          | CHECK_APP_DOMAIN                            \
+                          DAC_ARG(DacSlotLocation(reg, 0, false)));     \
+        }
+#else // !_DEBUG
+#define CHK_AND_REPORT_REG(reg, doIt, iptr, regName)                    \
+        if  (doIt)                                                      \
+                pCallBack(hCallBack,                                    \
+                          (OBJECTREF*)(pContext->Get##regName##Location()), \
+                          (iptr ? GC_CALL_INTERIOR : 0)                 \
+                          | CHECK_APP_DOMAIN                            \
+                          DAC_ARG(DacSlotLocation(reg, 0, false)));
+
+#endif // _DEBUG
+
+    /* What kind of a frame is this ? */
+
+#ifndef FEATURE_EH_FUNCLETS
+    FrameType   frameType = FR_NORMAL;
+    TADDR       baseSP = 0;
+
+    if (info.handlers)
+    {
+        _ASSERTE(info.ebpFrame);
+
+        bool    hasInnerFilter, hadInnerFilter;
+        frameType = GetHandlerFrameInfo(&info, EBP,
+                                        ESP, (DWORD) IGNORE_VAL,
+                                        &baseSP, NULL,
+                                        &hasInnerFilter, &hadInnerFilter);
+        _ASSERTE(frameType != FR_INVALID);
+
+        /* If this is the parent frame of a filter which is currently
+           executing, then the filter would have enumerated the frame using
+           the filter PC.
+         */
+
+        if (hasInnerFilter)
+            return true;
+
+        /* If are in a try and we had a filter execute, we may have reported
+           GC refs from the filter (and not using the try's offset). So
+           we had better use the filter's end offset, as the try is
+           effectively dead and its GC ref's would be stale */
+
+        if (hadInnerFilter)
+        {
+            PTR_TADDR pFirstBaseSPslot = GetFirstBaseSPslotPtr(EBP, &info);
+            curOffs = (unsigned)pFirstBaseSPslot[1] - 1;
+            _ASSERTE(curOffs < info.methodSize);
+
+            /* Extract the necessary information from the info block header */
+
+            table = PTR_CBYTE(gcInfoToken.Info);
+
+            table += DecodeGCHdrInfo(gcInfoToken,
+                                     curOffs,
+                                     &info);
+        }
+    }
+#endif
+
+    bool        willContinueExecution = !(flags & ExecutionAborted);
+    unsigned    pushedSize = 0;
+
+    /* if we have been interrupted we don't have to report registers/arguments
+     * because we are about to lose this context anyway.
+     * Alas, if we are in a ebp-less method we have to parse the table
+     * in order to adjust ESP.
+     *
+     * Note that we report "this" for all methods, even if
+     * noncontinuable, because of the off chance they may be
+     * synchronized and we have to release the monitor on unwind. This
+     * could conceivably be optimized, but it turns out to be more
+     * expensive to check whether we're synchronized (which involves
+     * consulting metadata) than to just report "this" all the time in
+     * our most important scenarios.
+     */
+
+    if  (info.interruptible)
+    {
+        unsigned curOffsRegs = curOffs;
+
+        // Don't decrement curOffsRegs when it is 0, as it is an unsigned and will wrap to MAX_UINT
+        //
+        if (curOffsRegs > 0)
+        {
+            // If we are not on the active stack frame, we need to report gc registers
+            // that are live before the call. The reason is that the liveness of gc registers
+            // may change across a call to a method that does not return. In this case the instruction
+            // after the call may be a jump target and a register that didn't have a live gc pointer
+            // before the call may have a live gc pointer after the jump. To make sure we report the
+            // registers that have live gc pointers before the call we subtract 1 from curOffs.
+            if ((flags & ActiveStackFrame) == 0)
+            {
+                // We are not the top most stack frame (i.e. the ActiveStackFrame)
+                curOffsRegs--;   // decrement curOffsRegs
+            }
+        }
+
+        pushedSize = scanArgRegTableI(skipToArgReg(info, table), curOffsRegs, curOffs, &info);
+
+        RegMask   regs  = info.regMaskResult;
+        RegMask  iregs  = info.iregMaskResult;
+        ptrArgTP  args  = info.argMaskResult;
+        ptrArgTP iargs  = info.iargMaskResult;
+
+        _ASSERTE((isZero(args) || pushedSize != 0) || info.ebpFrame);
+        _ASSERTE((args & iargs) == iargs);
+
+        /* now report registers and arguments if we are not interrupted */
+
+        if  (willContinueExecution)
+        {
+
+            /* Propagate unsafed registers only in "current" method */
+            /* If this is not the active method, then the callee wil
+             * trash these registers, and so we wont need to report them */
+
+            if (flags & ActiveStackFrame)
+            {
+                CHK_AND_REPORT_REG(REGI_EAX, regs & RM_EAX, iregs & RM_EAX, Eax);
+                CHK_AND_REPORT_REG(REGI_ECX, regs & RM_ECX, iregs & RM_ECX, Ecx);
+                CHK_AND_REPORT_REG(REGI_EDX, regs & RM_EDX, iregs & RM_EDX, Edx);
+            }
+
+            CHK_AND_REPORT_REG(REGI_EBX, regs & RM_EBX, iregs & RM_EBX, Ebx);
+            CHK_AND_REPORT_REG(REGI_EBP, regs & RM_EBP, iregs & RM_EBP, Ebp);
+            CHK_AND_REPORT_REG(REGI_ESI, regs & RM_ESI, iregs & RM_ESI, Esi);
+            CHK_AND_REPORT_REG(REGI_EDI, regs & RM_EDI, iregs & RM_EDI, Edi);
+            _ASSERTE(!(regs & RM_ESP));
+
+            /* Report any pending pointer arguments */
+
+            DWORD * pPendingArgFirst;       // points **AT** first parameter
+            if (!info.ebpFrame)
+            {
+                // -sizeof(void*) because we want to point *AT* first parameter
+                pPendingArgFirst = (DWORD *)(size_t)(ESP + pushedSize - sizeof(void*));
+            }
+            else
+            {
+                _ASSERTE(willContinueExecution);
+
+#ifdef FEATURE_EH_FUNCLETS
+                // Funclets' frame pointers(EBP) are always restored so they can access to main function's local variables.
+                // Therefore the value of EBP is invalid for unwinder so we should use ESP instead.
+                // See UnwindStackFrame for details.
+                if (isFunclet)
+                {
+                    TADDR baseSP = ESP;
+                    // Set baseSP as initial SP
+                    baseSP += GetPushedArgSize(&info, table, curOffs);
+
+#ifdef UNIX_X86_ABI
+                    // 16-byte stack alignment padding (allocated in genFuncletProlog)
+                    // Current funclet frame layout (see CodeGen::genFuncletProlog() and genFuncletEpilog()):
+                    //   prolog: sub esp, 12
+                    //   epilog: add esp, 12
+                    //           ret
+                    // SP alignment padding should be added for all instructions except the first one and the last one.
+                    // Epilog may not exist (unreachable), so we need to check the instruction code.
+                    if (funcletStart != methodStart + curOffs && methodStart[curOffs] != X86_INSTR_RETN)
+                        baseSP += 12;
+#endif
+
+                    // -sizeof(void*) because we want to point *AT* first parameter
+                    pPendingArgFirst = (DWORD *)(size_t)(baseSP - sizeof(void*));
+                }
+#else // FEATURE_EH_FUNCLETS
+                if (info.handlers)
+                {
+                    // -sizeof(void*) because we want to point *AT* first parameter
+                    pPendingArgFirst = (DWORD *)(size_t)(baseSP - sizeof(void*));
+                }
+#endif
+                else if (info.localloc)
+                {
+                    TADDR locallocBaseSP = *(DWORD *)(size_t)(EBP - GetLocallocSPOffset(&info));
+                    // -sizeof(void*) because we want to point *AT* first parameter
+                    pPendingArgFirst = (DWORD *)(size_t) (locallocBaseSP - sizeof(void*));
+                }
+                else
+                {
+                    // Note that 'info.stackSize includes the size for pushing EBP, but EBP is pushed
+                    // BEFORE EBP is set from ESP, thus (EBP - info.stackSize) actually points past
+                    // the frame by one DWORD, and thus points *AT* the first parameter
+
+                    pPendingArgFirst = (DWORD *)(size_t)(EBP - info.stackSize);
+                }
+            }
+
+            if  (!isZero(args))
+            {
+                unsigned   i = 0;
+                ptrArgTP   b(1);
+                for (; !isZero(args) && (i < MAX_PTRARG_OFS); i += 1, b <<= 1)
+                {
+                    if  (intersect(args,b))
+                    {
+                        unsigned    argAddr = (unsigned)(size_t)(pPendingArgFirst - i);
+                        bool        iptr    = false;
+
+                        setDiff(args, b);
+                        if (intersect(iargs,b))
+                        {
+                            setDiff(iargs, b);
+                            iptr   = true;
+                        }
+
+#ifdef _DEBUG
+                        if (dspPtr)
+                        {
+                            printf("    Pushed ptr arg  [E");
+                            if  (info.ebpFrame)
+                                printf("BP-%02XH]: ", EBP - argAddr);
+                            else
+                                printf("SP+%02XH]: ", argAddr - ESP);
+                        }
+#endif
+                        _ASSERTE(true == GC_CALL_INTERIOR);
+                        pCallBack(hCallBack, (OBJECTREF *)(size_t)argAddr, (int)iptr | CHECK_APP_DOMAIN
+                                  DAC_ARG(DacSlotLocation(info.ebpFrame ? REGI_EBP : REGI_ESP,
+                                                          info.ebpFrame ? EBP - argAddr : argAddr - ESP,
+                                                          true)));
+                    }
+                }
+            }
+        }
+        else
+        {
+            // Is "this" enregistered. If so, report it as we might need to
+            // release the monitor for synchronized methods.
+            // Else, it is on the stack and will be reported below.
+
+            if (info.thisPtrResult != REGI_NA)
+            {
+                // Synchronized methods and methods satisfying
+                // MethodDesc::AcquiresInstMethodTableFromThis (i.e. those
+                // where "this" is reported in thisPtrResult) are
+                // not supported on value types.
+                _ASSERTE((regNumToMask(info.thisPtrResult) & info.iregMaskResult)== 0);
+
+                void * thisReg = getCalleeSavedReg(pContext, info.thisPtrResult);
+                pCallBack(hCallBack, (OBJECTREF *)thisReg, CHECK_APP_DOMAIN
+                          DAC_ARG(DacSlotLocation(info.thisPtrResult, 0, false)));
+            }
+        }
+    }
+    else /* not interruptible */
+    {
+        pushedSize = scanArgRegTable(skipToArgReg(info, table), curOffs, &info);
+
+        RegMask    regMask = info.regMaskResult;
+        RegMask   iregMask = info.iregMaskResult;
+        ptrArgTP   argMask = info.argMaskResult;
+        ptrArgTP  iargMask = info.iargMaskResult;
+        unsigned   argHnum = info.argHnumResult;
+        PTR_CBYTE   argTab = info.argTabResult;
+
+        /* now report registers and arguments if we are not interrupted */
+
+        if  (willContinueExecution)
+        {
+
+            /* Report all live pointer registers */
+
+            CHK_AND_REPORT_REG(REGI_EDI, regMask & RM_EDI, iregMask & RM_EDI, Edi);
+            CHK_AND_REPORT_REG(REGI_ESI, regMask & RM_ESI, iregMask & RM_ESI, Esi);
+            CHK_AND_REPORT_REG(REGI_EBX, regMask & RM_EBX, iregMask & RM_EBX, Ebx);
+            CHK_AND_REPORT_REG(REGI_EBP, regMask & RM_EBP, iregMask & RM_EBP, Ebp);
+
+            /* Esp cant be reported */
+            _ASSERTE(!(regMask & RM_ESP));
+            /* No callee-trashed registers */
+            _ASSERTE(!(regMask & RM_CALLEE_TRASHED));
+            /* EBP can't be reported unless we have an EBP-less frame */
+            _ASSERTE(!(regMask & RM_EBP) || !(info.ebpFrame));
+
+            /* Report any pending pointer arguments */
+
+            if (argTab != 0)
+            {
+                unsigned    lowBits, stkOffs, argAddr, val;
+
+                // argMask does not fit in 32-bits
+                // thus arguments are reported via a table
+                // Both of these are very rare cases
+
+                do
+                {
+                    val = fastDecodeUnsigned(argTab);
+
+                    lowBits = val &  OFFSET_MASK;
+                    stkOffs = val & ~OFFSET_MASK;
+                    _ASSERTE((lowBits == 0) || (lowBits == byref_OFFSET_FLAG));
+
+                    argAddr = ESP + stkOffs;
+#ifdef _DEBUG
+                    if (dspPtr)
+                        printf("    Pushed %sptr arg at [ESP+%02XH]",
+                               lowBits ? "iptr " : "", stkOffs);
+#endif
+                    _ASSERTE(byref_OFFSET_FLAG == GC_CALL_INTERIOR);
+                    pCallBack(hCallBack, (OBJECTREF *)(size_t)argAddr, lowBits | CHECK_APP_DOMAIN
+                              DAC_ARG(DacSlotLocation(REGI_ESP, stkOffs, true)));
+                }
+                while(--argHnum);
+
+                _ASSERTE(info.argTabResult + info.argTabBytes == argTab);
+            }
+            else
+            {
+                unsigned argAddr = ESP;
+
+                while (!isZero(argMask))
+                {
+                    _ASSERTE(argHnum-- > 0);
+
+                    if  (toUnsigned(argMask) & 1)
+                    {
+                        bool     iptr    = false;
+
+                        if (toUnsigned(iargMask) & 1)
+                            iptr = true;
+#ifdef _DEBUG
+                        if (dspPtr)
+                            printf("    Pushed ptr arg at [ESP+%02XH]",
+                                   argAddr - ESP);
+#endif
+                        _ASSERTE(true == GC_CALL_INTERIOR);
+                        pCallBack(hCallBack, (OBJECTREF *)(size_t)argAddr, (int)iptr | CHECK_APP_DOMAIN
+                                  DAC_ARG(DacSlotLocation(REGI_ESP, argAddr - ESP, true)));
+                    }
+
+                    argMask >>= 1;
+                    iargMask >>= 1;
+                    argAddr  += 4;
+                }
+
+            }
+
+        }
+        else
+        {
+            // Is "this" enregistered. If so, report it as we will need to
+            // release the monitor. Else, it is on the stack and will be
+            // reported below.
+
+            // For partially interruptible code, info.thisPtrResult will be
+            // the last known location of "this". So the compiler needs to
+            // generate information which is correct at every point in the code,
+            // not just at call sites.
+
+            if (info.thisPtrResult != REGI_NA)
+            {
+                // Synchronized methods on value types are not supported
+                _ASSERTE((regNumToMask(info.thisPtrResult) & info.iregMaskResult)== 0);
+
+                void * thisReg = getCalleeSavedReg(pContext, info.thisPtrResult);
+                pCallBack(hCallBack, (OBJECTREF *)thisReg, CHECK_APP_DOMAIN
+                          DAC_ARG(DacSlotLocation(info.thisPtrResult, 0, false)));
+            }
+        }
+
+    } //info.interruptible
+
+    /* compute the argument base (reference point) */
+
+    unsigned    argBase;
+
+    if (info.ebpFrame)
+        argBase = EBP;
+    else
+        argBase = ESP + pushedSize;
+
+#if VERIFY_GC_TABLES
+    _ASSERTE(*castto(table, unsigned short *)++ == 0xBEEF);
+#endif
+
+    unsigned ptrAddr;
+    unsigned lowBits;
+
+
+    /* Process the untracked frame variable table */
+
+#if defined(FEATURE_EH_FUNCLETS)   // funclets
+    // Filters are the only funclet that run during the 1st pass, and must have
+    // both the leaf and the parent frame reported.  In order to avoid double
+    // reporting of the untracked variables, do not report them for the filter.
+    if (!isFilterFunclet)
+#endif // FEATURE_EH_FUNCLETS
+    {
+        count = info.untrackedCnt;
+        int lastStkOffs = 0;
+        while (count-- > 0)
+        {
+            int stkOffs = fastDecodeSigned(table);
+            stkOffs = lastStkOffs - stkOffs;
+            lastStkOffs = stkOffs;
+
+            _ASSERTE(0 == ~OFFSET_MASK % sizeof(void*));
+
+            lowBits  =   OFFSET_MASK & stkOffs;
+            stkOffs &=  ~OFFSET_MASK;
+
+            ptrAddr = argBase + stkOffs;
+            if (info.doubleAlign && stkOffs >= int(info.stackSize - sizeof(void*))) {
+                // We encode the arguments as if they were ESP based variables even though they aren't
+                // If this frame would have ben an ESP based frame,   This fake frame is one DWORD
+                // smaller than the real frame because it did not push EBP but the real frame did.
+                // Thus to get the correct EBP relative offset we have to adjust by info.stackSize-sizeof(void*)
+                ptrAddr = EBP + (stkOffs-(info.stackSize - sizeof(void*)));
+            }
+
+#ifdef  _DEBUG
+            if (dspPtr)
+            {
+                printf("    Untracked %s%s local at [E",
+                            (lowBits & pinned_OFFSET_FLAG) ? "pinned " : "",
+                            (lowBits & byref_OFFSET_FLAG)  ? "byref"   : "");
+
+                int   dspOffs = ptrAddr;
+                char  frameType;
+
+                if (info.ebpFrame) {
+                    dspOffs   -= EBP;
+                    frameType  = 'B';
+                }
+                else {
+                    dspOffs   -= ESP;
+                    frameType  = 'S';
+                }
+
+                if (dspOffs < 0)
+                    printf("%cP-%02XH]: ", frameType, -dspOffs);
+                else
+                    printf("%cP+%02XH]: ", frameType, +dspOffs);
+            }
+#endif
+
+            _ASSERTE((pinned_OFFSET_FLAG == GC_CALL_PINNED) &&
+                   (byref_OFFSET_FLAG  == GC_CALL_INTERIOR));
+            pCallBack(hCallBack, (OBJECTREF*)(size_t)ptrAddr, lowBits | CHECK_APP_DOMAIN
+                      DAC_ARG(DacSlotLocation(info.ebpFrame ? REGI_EBP : REGI_ESP,
+                                              info.ebpFrame ? EBP - ptrAddr : ptrAddr - ESP,
+                                              true)));
+        }
+
+    }
+
+#if VERIFY_GC_TABLES
+    _ASSERTE(*castto(table, unsigned short *)++ == 0xCAFE);
+#endif
+
+    /* Process the frame variable lifetime table */
+    count = info.varPtrTableSize;
+
+    /* If we are not in the active method, we are currently pointing
+     * to the return address; at the return address stack variables
+     * can become dead if the call the last instruction of a try block
+     * and the return address is the jump around the catch block. Therefore
+     * we simply assume an offset inside of call instruction.
+     */
+
+    unsigned newCurOffs;
+
+    if (willContinueExecution)
+    {
+        newCurOffs = (flags & ActiveStackFrame) ?  curOffs    // after "call"
+                                                :  curOffs-1; // inside "call"
+    }
+    else
+    {
+        newCurOffs = curOffs;
+    }
+
+    ptrOffs    = 0;
+
+    while (count-- > 0)
+    {
+        int       stkOffs;
+        unsigned  begOffs;
+        unsigned  endOffs;
+
+        stkOffs = fastDecodeUnsigned(table);
+        begOffs  = ptrOffs + fastDecodeUnsigned(table);
+        endOffs  = begOffs + fastDecodeUnsigned(table);
+
+        _ASSERTE(0 == ~OFFSET_MASK % sizeof(void*));
+
+        lowBits  =   OFFSET_MASK & stkOffs;
+        stkOffs &=  ~OFFSET_MASK;
+
+        if (info.ebpFrame) {
+            stkOffs = -stkOffs;
+            _ASSERTE(stkOffs < 0);
+        }
+        else {
+            _ASSERTE(stkOffs >= 0);
+        }
+
+        ptrAddr = argBase + stkOffs;
+
+        /* Is this variable live right now? */
+
+        if (newCurOffs >= begOffs)
+        {
+            if (newCurOffs <  endOffs)
+            {
+#ifdef  _DEBUG
+                if (dspPtr) {
+                    printf("    Frame %s%s local at [E",
+                           (lowBits & byref_OFFSET_FLAG) ? "byref "   : "",
+#ifndef FEATURE_EH_FUNCLETS
+                           (lowBits & this_OFFSET_FLAG)  ? "this-ptr" : "");
+#else
+                           (lowBits & pinned_OFFSET_FLAG)  ? "pinned" : "");
+#endif
+
+
+                    int  dspOffs = ptrAddr;
+                    char frameType;
+
+                    if (info.ebpFrame) {
+                        dspOffs   -= EBP;
+                        frameType  = 'B';
+                    }
+                    else {
+                        dspOffs   -= ESP;
+                        frameType  = 'S';
+                    }
+
+                    if (dspOffs < 0)
+                        printf("%cP-%02XH]: ", frameType, -dspOffs);
+                    else
+                        printf("%cP+%02XH]: ", frameType, +dspOffs);
+                }
+#endif
+
+                unsigned flags = CHECK_APP_DOMAIN;
+#ifndef FEATURE_EH_FUNCLETS
+                // First  Bit : byref
+                // Second Bit : this
+                // The second bit means `this` not `pinned`. So we ignore it.
+                flags |= lowBits & byref_OFFSET_FLAG;
+#else
+                // First  Bit : byref
+                // Second Bit : pinned
+                // Both bits are valid
+                flags |= lowBits;
+#endif
+
+                _ASSERTE(byref_OFFSET_FLAG == GC_CALL_INTERIOR);
+                pCallBack(hCallBack, (OBJECTREF*)(size_t)ptrAddr, flags
+                          DAC_ARG(DacSlotLocation(info.ebpFrame ? REGI_EBP : REGI_ESP,
+                                          info.ebpFrame ? EBP - ptrAddr : ptrAddr - ESP,
+                                          true)));
+            }
+        }
+        // exit loop early if start of live range is beyond PC, as ranges are sorted by lower bound
+        else break;
+
+        ptrOffs  = begOffs;
+    }
+
+
+#if VERIFY_GC_TABLES
+    _ASSERTE(*castto(table, unsigned short *)++ == 0xBABE);
+#endif
+
+#ifdef FEATURE_EH_FUNCLETS   // funclets
+    //
+    // If we're in a funclet, we do not want to report the incoming varargs.  This is
+    // taken care of by the parent method and the funclet should access those arguments
+    // by way of the parent method's stack frame.
+    //
+    if (isFunclet)
+    {
+        return true;
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+    /* Are we a varargs function, if so we have to report all args
+       except 'this' (note that the GC tables created by the x86 jit
+       do not contain ANY arguments except 'this' (even if they
+       were statically declared */
+
+    if (info.varargs) {
+#ifdef FEATURE_NATIVEAOT
+        PORTABILITY_ASSERT("EnumGCRefs: VarArgs");
+#else
+        LOG((LF_GCINFO, LL_INFO100, "Reporting incoming vararg GC refs\n"));
+
+        PTR_BYTE argsStart;
+
+        if (info.ebpFrame || info.doubleAlign)
+            argsStart = PTR_BYTE((size_t)EBP) + 2* sizeof(void*);                 // pushed EBP and retAddr
+        else
+            argsStart = PTR_BYTE((size_t)argBase) + info.stackSize + sizeof(void*);   // ESP + locals + retAddr
+
+#if defined(_DEBUG) && !defined(DACCESS_COMPILE)
+        // Note that I really want to say hCallBack is a GCCONTEXT, but this is pretty close
+        extern void GcEnumObject(LPVOID pData, OBJECTREF *pObj, uint32_t flags);
+        _ASSERTE((void*) GcEnumObject == pCallBack);
+#endif
+        GCCONTEXT   *pCtx = (GCCONTEXT *) hCallBack;
+
+        // For varargs, look up the signature using the varArgSig token passed on the stack
+        PTR_VASigCookie varArgSig = *PTR_PTR_VASigCookie(argsStart);
+
+        promoteVarArgs(argsStart, varArgSig, pCtx);
+#endif
+    }
+
+    return true;
+}
diff --git a/src/coreclr/vm/gccover.cpp b/src/coreclr/vm/gccover.cpp
index 278e6a84a91f..44ccfd4c46be 100644
--- a/src/coreclr/vm/gccover.cpp
+++ b/src/coreclr/vm/gccover.cpp
@@ -1635,7 +1635,7 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion)
 
         // unwind out of the prolog or epilog
         gcCover->codeMan->UnwindStackFrame(&regDisp,
-                &codeInfo, UpdateAllRegs, &codeManState, NULL);
+                &codeInfo, UpdateAllRegs, &codeManState);
 
         // Note we always doing the unwind, since that at does some checking (that we
         // unwind to a valid return address), but we only do the precise checking when
diff --git a/src/coreclr/vm/gcenv.ee.common.cpp b/src/coreclr/vm/gcenv.ee.common.cpp
index 79921a1b69b6..20c27d209f6e 100644
--- a/src/coreclr/vm/gcenv.ee.common.cpp
+++ b/src/coreclr/vm/gcenv.ee.common.cpp
@@ -3,6 +3,7 @@
 
 #include "common.h"
 #include "gcenv.h"
+#include <exinfo.h>
 
 #if defined(FEATURE_EH_FUNCLETS)
 
@@ -220,8 +221,54 @@ StackWalkAction GcStackCrawlCallBack(CrawlFrame* pCF, VOID* pData)
     // We may have unwound this crawlFrame and thus, shouldn't report the invalid
     // references it may contain.
     fReportGCReferences = pCF->ShouldCrawlframeReportGCReferences();
-#endif // defined(FEATURE_EH_FUNCLETS)
 
+    Thread *pThread = pCF->GetThread();
+    ExInfo *pExInfo = (ExInfo *)pThread->GetExceptionState()->GetCurrentExceptionTracker();
+
+    if (pCF->ShouldSaveFuncletInfo())
+    {
+        STRESS_LOG3(LF_GCROOTS, LL_INFO1000, "Saving info on funclet at SP: %p, PC: %p, FP: %p\n",
+            GetRegdisplaySP(pCF->GetRegisterSet()), GetControlPC(pCF->GetRegisterSet()), GetFP(pCF->GetRegisterSet()->pCurrentContext));
+
+        _ASSERTE(pExInfo);
+        REGDISPLAY *pRD = pCF->GetRegisterSet();
+        pExInfo->m_lastReportedFunclet.IP = GetControlPC(pRD);
+        pExInfo->m_lastReportedFunclet.FP = GetFP(pRD->pCurrentContext);
+        pExInfo->m_lastReportedFunclet.Flags = pCF->GetCodeManagerFlags();
+    }
+
+    if (pCF->ShouldParentToFuncletReportSavedFuncletSlots())
+    {
+        STRESS_LOG4(LF_GCROOTS, LL_INFO1000, "Reporting slots in funclet parent frame method at SP: %p, PC: %p using original FP: %p, PC: %p\n",
+            GetRegdisplaySP(pCF->GetRegisterSet()), GetControlPC(pCF->GetRegisterSet()), pExInfo->m_lastReportedFunclet.FP, pExInfo->m_lastReportedFunclet.IP);
+
+        _ASSERTE(!pCF->ShouldParentToFuncletUseUnwindTargetLocationForGCReporting());
+        _ASSERTE(pExInfo);
+
+        ICodeManager * pCM = pCF->GetCodeManager();
+        _ASSERTE(pCM != NULL);
+
+        CONTEXT context = {};
+        REGDISPLAY partialRD;
+        SetIP(&context, pExInfo->m_lastReportedFunclet.IP);
+        SetFP(&context, pExInfo->m_lastReportedFunclet.FP);
+        SetSP(&context, 0);
+
+        context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
+        FillRegDisplay(&partialRD, &context);
+
+        EECodeInfo codeInfo(pExInfo->m_lastReportedFunclet.IP);
+        _ASSERTE(codeInfo.IsValid());
+
+        pCM->EnumGcRefs(&partialRD,
+                        &codeInfo,
+                        pExInfo->m_lastReportedFunclet.Flags | ReportFPBasedSlotsOnly,
+                        GcEnumObject,
+                        pData,
+                        NO_OVERRIDE_OFFSET);
+    }
+    else
+#endif // defined(FEATURE_EH_FUNCLETS)
     if (fReportGCReferences)
     {
         if (pCF->IsFrameless())
@@ -297,7 +344,11 @@ StackWalkAction GcStackCrawlCallBack(CrawlFrame* pCF, VOID* pData)
             pFrame->GcScanRoots( gcctx->f, gcctx->sc);
         }
     }
-
+    else
+    {
+        STRESS_LOG2(LF_GCROOTS, LL_INFO1000, "Skipping GC scanning in frame method at SP: %p, PC: %p\n",
+            GetRegdisplaySP(pCF->GetRegisterSet()), GetControlPC(pCF->GetRegisterSet()));
+    }
 
     // If we're executing a LCG dynamic method then we must promote the associated resolver to ensure it
     // doesn't get collected and yank the method code out from under us).
diff --git a/src/coreclr/vm/i386/asmconstants.h b/src/coreclr/vm/i386/asmconstants.h
index 7de14a6c0631..edafbdf72ae7 100644
--- a/src/coreclr/vm/i386/asmconstants.h
+++ b/src/coreclr/vm/i386/asmconstants.h
@@ -174,13 +174,13 @@ ASMCONSTANTS_C_ASSERT(CORINFO_ArgumentException_ASM == CORINFO_ArgumentException
 
 
 
-#define Thread_m_State      0x04
+#define Thread_m_State      0x00
 ASMCONSTANTS_C_ASSERT(Thread_m_State == offsetof(Thread, m_State))
 
-#define Thread_m_fPreemptiveGCDisabled     0x08
+#define Thread_m_fPreemptiveGCDisabled     0x04
 ASMCONSTANTS_C_ASSERT(Thread_m_fPreemptiveGCDisabled == offsetof(Thread, m_fPreemptiveGCDisabled))
 
-#define Thread_m_pFrame     0x0C
+#define Thread_m_pFrame     0x08
 ASMCONSTANTS_C_ASSERT(Thread_m_pFrame == offsetof(Thread, m_pFrame))
 
 
@@ -231,8 +231,6 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__FrameHandlerExRecord__m_pEntryFrame == offsetof(
 #define STACK_OVERWRITE_BARRIER_VALUE 0xabcdefab
 #endif
 
-#define SIZEOF_FrameHandlerExRecordWithBarrier 0x5c
-ASMCONSTANTS_C_ASSERT(SIZEOF_FrameHandlerExRecordWithBarrier == sizeof(FrameHandlerExRecordWithBarrier))
 #endif
 
 
diff --git a/src/coreclr/vm/i386/asmhelpers.asm b/src/coreclr/vm/i386/asmhelpers.asm
index e03ffa9544f2..1d02fc48f8d8 100644
--- a/src/coreclr/vm/i386/asmhelpers.asm
+++ b/src/coreclr/vm/i386/asmhelpers.asm
@@ -41,6 +41,7 @@ EXTERN _NDirectImportWorker@4:PROC
 
 EXTERN _VarargPInvokeStubWorker@12:PROC
 EXTERN _GenericPInvokeCalliStubWorker@12:PROC
+EXTERN _CallCopyConstructorsWorker@4:PROC
 
 EXTERN _PreStubWorker@8:PROC
 EXTERN _TheUMEntryPrestubWorker@4:PROC
@@ -1062,6 +1063,29 @@ GoCallCalliWorker:
 
 _GenericPInvokeCalliHelper@0 endp
 
+;==========================================================================
+; This is small stub whose purpose is to record current stack pointer and
+; call CallCopyConstructorsWorker to invoke copy constructors and destructors
+; as appropriate. This stub operates on arguments already pushed to the
+; stack by JITted IL stub and must not create a new frame, i.e. it must tail
+; call to the target for it to see the arguments that copy ctors have been
+; called on.
+;
+_CopyConstructorCallStub@0 proc public
+    ; there may be an argument in ecx - save it
+    push    ecx
+    
+    ; push pointer to arguments
+    lea     edx, [esp + 8]
+    push    edx
+    
+    call    _CallCopyConstructorsWorker@4
+
+    ; restore ecx and tail call to the target
+    pop     ecx
+    jmp     eax
+_CopyConstructorCallStub@0 endp
+
 ifdef FEATURE_COMINTEROP
 
 ;==========================================================================
diff --git a/src/coreclr/vm/i386/cgencpu.h b/src/coreclr/vm/i386/cgencpu.h
index 655ad8c7a239..e99b8f542b59 100644
--- a/src/coreclr/vm/i386/cgencpu.h
+++ b/src/coreclr/vm/i386/cgencpu.h
@@ -51,10 +51,6 @@ EXTERN_C void SinglecastDelegateInvokeStub();
 #define JUMP_ALLOCATE_SIZE                      8   // # bytes to allocate for a jump instruction
 #define BACK_TO_BACK_JUMP_ALLOCATE_SIZE         8   // # bytes to allocate for a back to back jump instruction
 
-#ifdef FEATURE_EH_FUNCLETS
-#define USE_INDIRECT_CODEHEADER
-#endif // FEATURE_EH_FUNCLETS
-
 #define HAS_COMPACT_ENTRYPOINTS                 1
 
 // Needed for PInvoke inlining in ngened images
@@ -103,6 +99,9 @@ inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unu
     CALLEE_SAVED_REGISTER(Ebx) \
     CALLEE_SAVED_REGISTER(Ebp)
 
+// There are no FP callee saved registers on x86
+#define ENUM_FP_CALLEE_SAVED_REGISTERS()
+
 typedef DPTR(struct CalleeSavedRegisters) PTR_CalleeSavedRegisters;
 struct CalleeSavedRegisters {
 #define CALLEE_SAVED_REGISTER(regname) INT32 regname;
diff --git a/src/coreclr/vm/i386/cgenx86.cpp b/src/coreclr/vm/i386/cgenx86.cpp
index 496c7c3f3436..108bc66a99b1 100644
--- a/src/coreclr/vm/i386/cgenx86.cpp
+++ b/src/coreclr/vm/i386/cgenx86.cpp
@@ -139,14 +139,13 @@ void EHContext::UpdateFrame(PREGDISPLAY regs)
 }
 #endif // FEATURE_EH_FUNCLETS
 
-void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     }
     CONTRACT_END;
@@ -170,7 +169,6 @@ void TransitionFrame::UpdateRegDisplayHelper(const PREGDISPLAY pRD, UINT cbStack
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     }
     CONTRACT_END;
@@ -211,14 +209,13 @@ void TransitionFrame::UpdateRegDisplayHelper(const PREGDISPLAY pRD, UINT cbStack
     RETURN;
 }
 
-void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
-        HOST_NOCALLS;
         PRECONDITION(m_MachState.isValid());               // InsureInit has been called
         SUPPORTS_DAC;
     }
@@ -391,14 +388,13 @@ EXTERN_C MachState* STDCALL HelperMethodFrameConfirmState(HelperMethodFrame* fra
 }
 #endif
 
-void ExternalMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void ExternalMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     }
     CONTRACT_END;
@@ -411,14 +407,13 @@ void ExternalMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 }
 
 
-void StubDispatchFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void StubDispatchFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     }
     CONTRACT_END;
@@ -468,14 +463,13 @@ PCODE StubDispatchFrame::GetReturnAddress()
     return retAddress;
 }
 
-void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     }
     CONTRACT_END;
@@ -521,7 +515,7 @@ void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     RETURN;
 }
 
-void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
@@ -533,7 +527,6 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 #ifdef PROFILING_SUPPORTED
         PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this));
 #endif
-        HOST_NOCALLS;
         MODE_ANY;
         SUPPORTS_DAC;
     }
@@ -615,14 +608,13 @@ TADDR ResumableFrame::GetReturnAddressPtr()
     return dac_cast<TADDR>(m_Regs) + offsetof(CONTEXT, Eip);
 }
 
-void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     }
     CONTRACT_END;
@@ -696,12 +688,11 @@ void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 
 // The HijackFrame has to know the registers that are pushed by OnHijackTripThread
 //  -> HijackFrame::UpdateRegDisplay should restore all the registers pushed by OnHijackTripThread
-void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     }
     CONTRACTL_END;
@@ -753,20 +744,19 @@ void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 
 #endif  // FEATURE_HIJACK
 
-void PInvokeCalliFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void PInvokeCalliFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     }
     CONTRACT_END;
 
     VASigCookie *pVASigCookie = GetVASigCookie();
-    UpdateRegDisplayHelper(pRD, pVASigCookie->sizeOfArgs+sizeof(int));
+    UpdateRegDisplayHelper(pRD, pVASigCookie->sizeOfArgs);
 
     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    PInvokeCalliFrame::UpdateRegDisplay(ip:%p, sp:%p)\n", pRD->ControlPC, pRD->SP));
 
@@ -774,14 +764,13 @@ void PInvokeCalliFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 }
 
 #ifndef UNIX_X86_ABI
-void TailCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void TailCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     }
     CONTRACT_END;
@@ -822,7 +811,7 @@ void TailCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 #endif // !UNIX_X86_ABI
 
 #ifdef FEATURE_READYTORUN
-void DynamicHelperFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void DynamicHelperFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     WRAPPER_NO_CONTRACT;
     UpdateRegDisplayHelper(pRD, 0);
diff --git a/src/coreclr/vm/i386/excepx86.cpp b/src/coreclr/vm/i386/excepx86.cpp
index f52766d7f51e..13590fb23708 100644
--- a/src/coreclr/vm/i386/excepx86.cpp
+++ b/src/coreclr/vm/i386/excepx86.cpp
@@ -1571,9 +1571,6 @@ EXCEPTION_HANDLER_IMPL(COMPlusFrameHandler)
 
     _ASSERTE((pContext == NULL) || ((pContext->ContextFlags & CONTEXT_CONTROL) == CONTEXT_CONTROL));
 
-    if (g_fNoExceptions)
-        return ExceptionContinueSearch; // No EH during EE shutdown.
-
     // Check if the exception represents a GCStress Marker. If it does,
     // we shouldnt record its entry in the TLS as such exceptions are
     // continuable and can confuse the VM to treat them as CSE,
@@ -1849,39 +1846,7 @@ PEXCEPTION_REGISTRATION_RECORD GetCurrentSEHRecord()
 {
     WRAPPER_NO_CONTRACT;
 
-    LPVOID fs0 = (LPVOID)__readfsdword(0);
-
-#if 0  // This walk is too expensive considering we hit it every time we a CONTRACT(NOTHROW)
-#ifdef _DEBUG
-    EXCEPTION_REGISTRATION_RECORD *pEHR = (EXCEPTION_REGISTRATION_RECORD *)fs0;
-    LPVOID spVal;
-    __asm {
-        mov spVal, esp
-    }
-
-    // check that all the eh frames are all greater than the current stack value. If not, the
-    // stack has been updated somehow w/o unwinding the SEH chain.
-
-    // LOG((LF_EH, LL_INFO1000000, "ER Chain:\n"));
-    while (pEHR != NULL && pEHR != EXCEPTION_CHAIN_END) {
-        // LOG((LF_EH, LL_INFO1000000, "\tp: prev:p handler:%x\n", pEHR, pEHR->Next, pEHR->Handler));
-        if (pEHR < spVal) {
-            if (gLastResumedExceptionFunc != 0)
-                _ASSERTE(!"Stack is greater than start of SEH chain - possible missing leave in handler. See gLastResumedExceptionHandler & gLastResumedExceptionFunc for info");
-            else
-                _ASSERTE(!"Stack is greater than start of SEH chain (FS:0)");
-        }
-        if (pEHR->Handler == (void *)-1)
-            _ASSERTE(!"Handler value has been corrupted");
-
-            _ASSERTE(pEHR < pEHR->Next);
-
-        pEHR = pEHR->Next;
-    }
-#endif
-#endif // 0
-
-    return (EXCEPTION_REGISTRATION_RECORD*) fs0;
+    return (PEXCEPTION_REGISTRATION_RECORD)__readfsdword(0);
 }
 
 PEXCEPTION_REGISTRATION_RECORD GetFirstCOMPlusSEHRecord(Thread *pThread) {
@@ -1917,29 +1882,23 @@ PEXCEPTION_REGISTRATION_RECORD GetPrevSEHRecord(EXCEPTION_REGISTRATION_RECORD *n
 VOID SetCurrentSEHRecord(EXCEPTION_REGISTRATION_RECORD *pSEH)
 {
     WRAPPER_NO_CONTRACT;
-    *GetThread()->GetExceptionListPtr() = pSEH;
+
+    __writefsdword(0, (DWORD)pSEH);
 }
 
-// Note that this logic is copied below, in PopSEHRecords
-__declspec(naked)
-VOID __cdecl PopSEHRecords(LPVOID pTargetSP)
+VOID PopSEHRecords(LPVOID pTargetSP)
 {
-    // No CONTRACT possible on naked functions
     STATIC_CONTRACT_NOTHROW;
     STATIC_CONTRACT_GC_NOTRIGGER;
 
-    __asm{
-        mov     ecx, [esp+4]        ;; ecx <- pTargetSP
-        mov     eax, fs:[0]         ;; get current SEH record
-  poploop:
-        cmp     eax, ecx
-        jge     done
-        mov     eax, [eax]          ;; get next SEH record
-        jmp     poploop
-  done:
-        mov     fs:[0], eax
-        retn
+    PEXCEPTION_REGISTRATION_RECORD currentContext = GetCurrentSEHRecord();
+    // The last record in the chain is EXCEPTION_CHAIN_END which is defined as maxiumum
+    // pointer value so it cannot satisfy the loop condition.
+    while (currentContext < pTargetSP)
+    {
+        currentContext = currentContext->Next;
     }
+    SetCurrentSEHRecord(currentContext);
 }
 
 //
@@ -2970,8 +2929,7 @@ void ResumeAtJitEH(CrawlFrame* pCf,
         bool unwindSuccess = pCf->GetCodeManager()->UnwindStackFrame(pCf->GetRegisterSet(),
                                                                      pCf->GetCodeInfo(),
                                                                      pCf->GetCodeManagerFlags(),
-                                                                     pCf->GetCodeManState(),
-                                                                     NULL /* StackwalkCacheUnwindInfo* */);
+                                                                     pCf->GetCodeManState());
         _ASSERTE(unwindSuccess);
 
         if (((TADDR)pThread->m_pFrame < pCf->GetRegisterSet()->SP))
diff --git a/src/coreclr/vm/i386/gmscpu.h b/src/coreclr/vm/i386/gmscpu.h
index 5b3fa0ba881a..9dd91f42caf1 100644
--- a/src/coreclr/vm/i386/gmscpu.h
+++ b/src/coreclr/vm/i386/gmscpu.h
@@ -98,8 +98,7 @@ struct LazyMachState : public MachState {
     static void unwindLazyState(LazyMachState* baseState,
                                 MachState* lazyState,
                                 DWORD threadId,
-                                int funCallDepth = 1,
-                                HostCallPreference hostCallPreference = AllowHostCalls);
+                                int funCallDepth = 1);
 
     friend class HelperMethodFrame;
     friend class CheckAsmOffsets;
diff --git a/src/coreclr/vm/i386/gmsx86.cpp b/src/coreclr/vm/i386/gmsx86.cpp
index 7fd4d28e4305..ca3b45b5b4ea 100644
--- a/src/coreclr/vm/i386/gmsx86.cpp
+++ b/src/coreclr/vm/i386/gmsx86.cpp
@@ -362,8 +362,7 @@ static bool shouldEnterCall(PTR_BYTE ip) {
 void LazyMachState::unwindLazyState(LazyMachState* baseState,
                                     MachState* lazyState,
                                     DWORD threadId,
-                                    int funCallDepth /* = 1 */,
-                                    HostCallPreference hostCallPreference /* = (HostCallPreference)(-1) */)
+                                    int funCallDepth /* = 1 */)
 {
     CONTRACTL {
         NOTHROW;
@@ -1099,20 +1098,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
                 {
                     // Determine  whether given IP resides in JITted code. (It returns nonzero in that case.)
                     // Use it now to see if we've unwound to managed code yet.
-                    BOOL fFailedReaderLock = FALSE;
-                    BOOL fIsManagedCode = ExecutionManager::IsManagedCode(*lazyState->pRetAddr(), hostCallPreference, &fFailedReaderLock);
-                    if (fFailedReaderLock)
-                    {
-                        // We don't know if we would have been able to find a JIT
-                        // manager, because we couldn't enter the reader lock without
-                        // yielding (and our caller doesn't want us to yield).  So abort
-                        // now.
-
-                        // Invalidate the lazyState we're returning, so the caller knows
-                        // we aborted before we could fully unwind
-                        lazyState->_pRetAddr = NULL;
-                        return;
-                    }
+                    BOOL fIsManagedCode = ExecutionManager::IsManagedCode(*lazyState->pRetAddr());
 
                     if (fIsManagedCode)
                         goto done;
@@ -1285,8 +1271,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
 void LazyMachState::unwindLazyState(LazyMachState* baseState,
                                     MachState* lazyState,
                                     DWORD threadId,
-                                    int funCallDepth /* = 1 */,
-                                    HostCallPreference hostCallPreference /* = (HostCallPreference)(-1) */)
+                                    int funCallDepth /* = 1 */)
 {
     CONTRACTL {
         NOTHROW;
@@ -1347,20 +1332,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
         {
             // Determine  whether given IP resides in JITted code. (It returns nonzero in that case.)
             // Use it now to see if we've unwound to managed code yet.
-            BOOL fFailedReaderLock = FALSE;
-            BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc, hostCallPreference, &fFailedReaderLock);
-            if (fFailedReaderLock)
-            {
-                // We don't know if we would have been able to find a JIT
-                // manager, because we couldn't enter the reader lock without
-                // yielding (and our caller doesn't want us to yield).  So abort
-                // now.
-
-                // Invalidate the lazyState we're returning, so the caller knows
-                // we aborted before we could fully unwind
-                lazyState->_pRetAddr = NULL;
-                return;
-            }
+            BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc);
 
             if (fIsManagedCode)
                 break;
diff --git a/src/coreclr/vm/i386/jithelp.S b/src/coreclr/vm/i386/jithelp.S
index c1da6f4dcb80..d02752520278 100644
--- a/src/coreclr/vm/i386/jithelp.S
+++ b/src/coreclr/vm/i386/jithelp.S
@@ -551,87 +551,6 @@ LOCAL_LABEL(LRszMORE32):
     ret
 LEAF_END JIT_LRsz, _TEXT
 
-// *********************************************************************/
-//  JIT_Dbl2LngP4x87
-//
-// Purpose:
-//   converts a double to a long truncating toward zero (C semantics)
-//
-// uses stdcall calling conventions
-//
-//   This code is faster on a P4 than the Dbl2Lng code above, but is
-//   slower on a PIII.  Hence we choose this code when on a P4 or above.
-//
-LEAF_ENTRY JIT_Dbl2LngP4x87, _TEXT
-    // get some local space
-    sub 	esp, 8
-
-    #define arg1 [esp + 0x0C]
-    fld     QWORD PTR arg1          // fetch arg
-    fnstcw  WORD PTR arg1           // store FPCW
-    movzx   eax, WORD PTR arg1      // zero extend - wide
-    or      ah, 0x0C                // turn on OE and DE flags
-    mov     DWORD PTR [esp], eax    // store new FPCW bits
-    fldcw   WORD PTR  [esp]         // reload FPCW with new bits
-    fistp   QWORD PTR [esp]         // convert
-
-    // reload FP result
-    mov	    eax, DWORD PTR [esp]
-    mov	    edx, DWORD PTR [esp + 4]
-
-    // reload original FPCW value
-    fldcw   WORD PTR arg1
-    #undef arg1
-
-    // restore stack
-    add     esp, 8
-
-    ret
-LEAF_END JIT_Dbl2LngP4x87, _TEXT
-
-// *********************************************************************/
-//  JIT_Dbl2LngSSE3
-//
-// Purpose:
-//    converts a double to a long truncating toward zero (C semantics)
-//
-//  uses stdcall calling conventions
-//
-//    This code is faster than the above P4 x87 code for Intel processors
-//    equal or later than Core2 and Atom that have SSE3 support
-//
-LEAF_ENTRY JIT_Dbl2LngSSE3, _TEXT
-    // get some local space
-    sub     esp, 8
-
-    fld     QWORD PTR [esp + 0x0C]   // fetch arg
-    fisttp  QWORD PTR [esp]         // convert
-    mov     eax, DWORD PTR [esp]    // reload FP result
-    mov     edx, DWORD PTR [esp + 4]
-
-    // restore stack
-    add     esp, 8
-
-    ret
-LEAF_END JIT_Dbl2LngSSE3, _TEXT
-
-// *********************************************************************/
-// JIT_Dbl2IntSSE2
-//
-// Purpose:
-//  converts a double to a long truncating toward zero (C semantics)
-//
-// uses stdcall calling conventions
-//
-// This code is even faster than the P4 x87 code for Dbl2LongP4x87,
-// but only returns a 32 bit value (only good for int).
-//
-LEAF_ENTRY JIT_Dbl2IntSSE2, _TEXT
-    movsd     xmm0, [esp + 4]
-    cvttsd2si eax, xmm0
-    ret
-LEAF_END JIT_Dbl2IntSSE2, _TEXT
-
 // *********************************************************************/
 // JIT_StackProbe
 //
diff --git a/src/coreclr/vm/i386/jithelp.asm b/src/coreclr/vm/i386/jithelp.asm
index 5f6890b8312e..c2011190abc3 100644
--- a/src/coreclr/vm/i386/jithelp.asm
+++ b/src/coreclr/vm/i386/jithelp.asm
@@ -36,11 +36,6 @@ JIT_LLsh                        TEXTEQU <_JIT_LLsh@0>
 JIT_LRsh                        TEXTEQU <_JIT_LRsh@0>
 JIT_LRsz                        TEXTEQU <_JIT_LRsz@0>
 JIT_LMul                        TEXTEQU <@JIT_LMul@16>
-JIT_Dbl2LngOvf                  TEXTEQU <@JIT_Dbl2LngOvf@8>
-JIT_Dbl2Lng                     TEXTEQU <@JIT_Dbl2Lng@8>
-JIT_Dbl2IntSSE2                 TEXTEQU <@JIT_Dbl2IntSSE2@8>
-JIT_Dbl2LngP4x87                TEXTEQU <@JIT_Dbl2LngP4x87@8>
-JIT_Dbl2LngSSE3	                TEXTEQU <@JIT_Dbl2LngSSE3@8>
 JIT_InternalThrowFromHelper     TEXTEQU <@JIT_InternalThrowFromHelper@4>
 JIT_WriteBarrierReg_PreGrow     TEXTEQU <_JIT_WriteBarrierReg_PreGrow@0>
 JIT_WriteBarrierReg_PostGrow    TEXTEQU <_JIT_WriteBarrierReg_PostGrow@0>
@@ -75,7 +70,9 @@ EXTERN  g_GCShadowEnd:DWORD
 INVALIDGCVALUE equ 0CCCCCCCDh
 endif
 
+ifndef FEATURE_EH_FUNCLETS
 EXTERN _COMPlusEndCatch@20:PROC
+endif
 
 .686P
 .XMM
@@ -635,182 +632,6 @@ LMul_hard:
 
 JIT_LMul ENDP
 
-;*********************************************************************/
-; JIT_Dbl2LngOvf
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;   with check for overflow
-;
-;       uses stdcall calling conventions
-;
-PUBLIC JIT_Dbl2LngOvf
-JIT_Dbl2LngOvf PROC
-        fnclex
-        fld     qword ptr [esp+4]
-        push    ecx
-        push    ecx
-        fstp    qword ptr [esp]
-        call    JIT_Dbl2Lng
-        mov     ecx,eax
-        fnstsw  ax
-        test    ax,01h
-        jnz     Dbl2LngOvf_throw
-        mov     eax,ecx
-        ret     8
-
-Dbl2LngOvf_throw:
-        mov     ECX, CORINFO_OverflowException_ASM
-        call    JIT_InternalThrowFromHelper
-        ret     8
-JIT_Dbl2LngOvf ENDP
-
-;*********************************************************************/
-; JIT_Dbl2Lng
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;
-;       uses stdcall calling conventions
-;
-;   note that changing the rounding mode is very expensive.  This
-;   routine basiclly does the truncation semantics without changing
-;   the rounding mode, resulting in a win.
-;
-PUBLIC JIT_Dbl2Lng
-JIT_Dbl2Lng PROC
-        fld qword ptr[ESP+4]            ; fetch arg
-        lea ecx,[esp-8]
-        sub esp,16                      ; allocate frame
-        and ecx,-8                      ; align pointer on boundary of 8
-        fld st(0)                       ; duplciate top of stack
-        fistp qword ptr[ecx]            ; leave arg on stack, also save in temp
-        fild qword ptr[ecx]             ; arg, round(arg) now on stack
-        mov edx,[ecx+4]                 ; high dword of integer
-        mov eax,[ecx]                   ; low dword of integer
-        test eax,eax
-        je integer_QNaN_or_zero
-
-arg_is_not_integer_QNaN:
-        fsubp st(1),st                  ; TOS=d-round(d),
-                                        ; { st(1)=st(1)-st & pop ST }
-        test edx,edx                    ; what's sign of integer
-        jns positive
-                                        ; number is negative
-                                        ; dead cycle
-                                        ; dead cycle
-        fstp dword ptr[ecx]             ; result of subtraction
-        mov ecx,[ecx]                   ; dword of difference(single precision)
-        add esp,16
-        xor ecx,80000000h
-        add ecx,7fffffffh               ; if difference>0 then increment integer
-        adc eax,0                       ; inc eax (add CARRY flag)
-        adc edx,0                       ; propagate carry flag to upper bits
-        ret 8
-
-positive:
-        fstp dword ptr[ecx]             ;17-18 ; result of subtraction
-        mov ecx,[ecx]                   ; dword of difference (single precision)
-        add esp,16
-        add ecx,7fffffffh               ; if difference<0 then decrement integer
-        sbb eax,0                       ; dec eax (subtract CARRY flag)
-        sbb edx,0                       ; propagate carry flag to upper bits
-        ret 8
-
-integer_QNaN_or_zero:
-        test edx,7fffffffh
-        jnz arg_is_not_integer_QNaN
-        fstp st(0)                      ;; pop round(arg)
-        fstp st(0)                      ;; arg
-        add esp,16
-        ret 8
-JIT_Dbl2Lng ENDP
-
-;*********************************************************************/
-; JIT_Dbl2LngP4x87
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;
-;	uses stdcall calling conventions
-;
-;   This code is faster on a P4 than the Dbl2Lng code above, but is
-;   slower on a PIII.  Hence we choose this code when on a P4 or above.
-;
-PUBLIC JIT_Dbl2LngP4x87
-JIT_Dbl2LngP4x87 PROC
-arg1	equ	<[esp+0Ch]>
-
-    sub 	esp, 8                  ; get some local space
-
-    fld	qword ptr arg1              ; fetch arg
-    fnstcw  word ptr arg1           ; store FPCW
-    movzx   eax, word ptr arg1      ; zero extend - wide
-    or	ah, 0Ch                     ; turn on OE and DE flags
-    mov	dword ptr [esp], eax        ; store new FPCW bits
-    fldcw   word ptr  [esp]         ; reload FPCW with new bits
-    fistp   qword ptr [esp]         ; convert
-    mov	eax, dword ptr [esp]        ; reload FP result
-    mov	edx, dword ptr [esp+4]      ;
-    fldcw   word ptr arg1           ; reload original FPCW value
-
-    add esp, 8                      ; restore stack
-
-    ret	8
-JIT_Dbl2LngP4x87 ENDP
-
-;*********************************************************************/
-; JIT_Dbl2LngSSE3
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;
-;	uses stdcall calling conventions
-;
-;   This code is faster than the above P4 x87 code for Intel processors
-;   equal or later than Core2 and Atom that have SSE3 support
-;
-.686P
-.XMM
-PUBLIC JIT_Dbl2LngSSE3
-JIT_Dbl2LngSSE3 PROC
-arg1	equ	<[esp+0Ch]>
-
-    sub esp, 8                      ; get some local space
-
-    fld qword ptr arg1              ; fetch arg
-    fisttp qword ptr [esp]          ; convert
-    mov eax, dword ptr [esp]        ; reload FP result
-    mov edx, dword ptr [esp+4]
-
-    add esp, 8                      ; restore stack
-
-    ret	8
-JIT_Dbl2LngSSE3 ENDP
-.586
-
-;*********************************************************************/
-; JIT_Dbl2IntSSE2
-
-;Purpose:
-;   converts a double to a long truncating toward zero (C semantics)
-;
-;	uses stdcall calling conventions
-;
-;   This code is even faster than the P4 x87 code for Dbl2LongP4x87,
-;   but only returns a 32 bit value (only good for int).
-;
-.686P
-.XMM
-PUBLIC JIT_Dbl2IntSSE2
-JIT_Dbl2IntSSE2 PROC
-	$movsd	xmm0, [esp+4]
-	cvttsd2si eax, xmm0
-	ret 8
-JIT_Dbl2IntSSE2 ENDP
-.586
-
-
 ;*********************************************************************/
 ; This is the small write barrier thunk we use when we know the
 ; ephemeral generation is higher in memory than older generations.
@@ -1212,39 +1033,6 @@ JIT_TailCallVSDLeave:
 
 JIT_TailCall ENDP
 
-
-;------------------------------------------------------------------------------
-
-; HCIMPL2_VV(float, JIT_FltRem, float dividend, float divisor)
-@JIT_FltRem@8 proc public
-        fld  dword ptr [esp+4]          ; divisor
-        fld  dword ptr [esp+8]          ; dividend
-fremloop:
-        fprem
-        fstsw   ax
-        fwait
-        sahf
-        jp      fremloop        ; Continue while the FPU status bit C2 is set
-        fxch    ; swap, so divisor is on top and result is in st(1)
-        fstp    ST(0)           ; Pop the divisor from the FP stack
-        retn    8               ; Return value is in st(0)
-@JIT_FltRem@8 endp
-
-; HCIMPL2_VV(float, JIT_DblRem, float dividend, float divisor)
-@JIT_DblRem@16 proc public
-        fld  qword ptr [esp+4]          ; divisor
-        fld  qword ptr [esp+12]         ; dividend
-fremloopd:
-        fprem
-        fstsw   ax
-        fwait
-        sahf
-        jp      fremloopd       ; Continue while the FPU status bit C2 is set
-        fxch    ; swap, so divisor is on top and result is in st(1)
-        fstp    ST(0)           ; Pop the divisor from the FP stack
-        retn    16              ; Return value is in st(0)
-@JIT_DblRem@16 endp
-
 ;------------------------------------------------------------------------------
 
 ; PatchedCodeStart and PatchedCodeEnd are used to determine bounds of patched code.
@@ -1298,6 +1086,7 @@ ret
 _JIT_PatchedCodeEnd@0 endp
 
 
+ifndef FEATURE_EH_FUNCLETS
 ; Note that the debugger skips this entirely when doing SetIP,
 ; since COMPlusCheckForAbort should always return 0.  Excep.cpp:LeaveCatch
 ; asserts that to be true.  If this ends up doing more work, then the
@@ -1325,6 +1114,7 @@ JIT_EndCatch PROC stdcall public
     jmp     edx         ; eip = new eip
 
 JIT_EndCatch ENDP
+endif
 
 ; The following helper will access ("probe") a word on each page of the stack
 ; starting with the page right beneath esp down to the one pointed to by eax.
diff --git a/src/coreclr/vm/i386/jitinterfacex86.cpp b/src/coreclr/vm/i386/jitinterfacex86.cpp
index 08360e9ff0c0..bfc7c0abc674 100644
--- a/src/coreclr/vm/i386/jitinterfacex86.cpp
+++ b/src/coreclr/vm/i386/jitinterfacex86.cpp
@@ -96,26 +96,6 @@ extern "C" void STDCALL WriteBarrierAssert(BYTE* ptr, Object* obj)
 
 #endif // _DEBUG
 
-#ifndef TARGET_UNIX
-
-HCIMPL1_V(INT32, JIT_Dbl2IntOvf, double val)
-{
-    FCALL_CONTRACT;
-
-    INT64 ret = HCCALL1_V(JIT_Dbl2Lng, val);
-
-    if (ret != (INT32) ret)
-        goto THROW;
-
-    return (INT32) ret;
-
-THROW:
-    FCThrow(kOverflowException);
-}
-HCIMPLEND
-#endif // TARGET_UNIX
-
-
 FCDECL1(Object*, JIT_New, CORINFO_CLASS_HANDLE typeHnd_);
 
 
@@ -961,32 +941,6 @@ void InitJITHelpers1()
     JIT_TrialAlloc::Flags flags = GCHeapUtilities::UseThreadAllocationContexts() ?
         JIT_TrialAlloc::MP_ALLOCATOR : JIT_TrialAlloc::NORMAL;
 
-    // Get CPU features and check for SSE2 support.
-    // This code should eventually probably be moved into codeman.cpp,
-    // where we set the cpu feature flags for the JIT based on CPU type and features.
-    int cpuFeatures[4];
-    __cpuid(cpuFeatures, 1);
-
-    DWORD dwCPUFeaturesECX = cpuFeatures[2];
-    DWORD dwCPUFeaturesEDX = cpuFeatures[3];
-
-    //  If bit 26 (SSE2) is set, then we can use the SSE2 flavors
-    //  and faster x87 implementation for the P4 of Dbl2Lng.
-    if (dwCPUFeaturesEDX & (1<<26))
-    {
-        SetJitHelperFunction(CORINFO_HELP_DBL2INT, JIT_Dbl2IntSSE2);
-        if (dwCPUFeaturesECX & 1)  // check SSE3
-        {
-            SetJitHelperFunction(CORINFO_HELP_DBL2UINT, JIT_Dbl2LngSSE3);
-            SetJitHelperFunction(CORINFO_HELP_DBL2LNG, JIT_Dbl2LngSSE3);
-	}
-        else
-        {
-            SetJitHelperFunction(CORINFO_HELP_DBL2UINT, JIT_Dbl2LngP4x87);   // SSE2 only for signed
-            SetJitHelperFunction(CORINFO_HELP_DBL2LNG, JIT_Dbl2LngP4x87);
-        }
-    }
-
     if (!(TrackAllocationsEnabled()
         || LoggingOn(LF_GCALLOC, LL_INFO10)
 #ifdef _DEBUG
diff --git a/src/coreclr/vm/ilmarshalers.cpp b/src/coreclr/vm/ilmarshalers.cpp
index 59bb7c9ebed2..b0fcef12d6b8 100644
--- a/src/coreclr/vm/ilmarshalers.cpp
+++ b/src/coreclr/vm/ilmarshalers.cpp
@@ -3394,6 +3394,7 @@ ILCriticalHandleMarshaler::ReturnOverride(
     return OVERRIDDEN;
 } // ILCriticalHandleMarshaler::ReturnOverride
 
+#if defined(FEATURE_IJW)
 MarshalerOverrideStatus ILBlittableValueClassWithCopyCtorMarshaler::ArgumentOverride(NDirectStubLinker* psl,
                                                 BOOL               byref,
                                                 BOOL               fin,
@@ -3459,6 +3460,36 @@ MarshalerOverrideStatus ILBlittableValueClassWithCopyCtorMarshaler::ArgumentOver
 #ifdef TARGET_X86
         pslIL->SetStubTargetArgType(&locDesc);              // native type is the value type
         pslILDispatch->EmitLDLOC(dwNewValueTypeLocal);      // we load the local directly
+
+        // Record this argument's stack slot in the copy constructor chain so we can correctly invoke the copy constructor.
+        DWORD ctorCookie = pslIL->NewLocal(CoreLibBinder::GetClass(CLASS__COPY_CONSTRUCTOR_COOKIE));
+        pslIL->EmitLDLOCA(ctorCookie);
+        pslIL->EmitINITOBJ(pslIL->GetToken(CoreLibBinder::GetClass(CLASS__COPY_CONSTRUCTOR_COOKIE)));
+        pslIL->EmitLDLOCA(ctorCookie);
+        pslIL->EmitLDLOCA(dwNewValueTypeLocal);
+        pslIL->EmitSTFLD(pslIL->GetToken(CoreLibBinder::GetField(FIELD__COPY_CONSTRUCTOR_COOKIE__SOURCE)));
+        pslIL->EmitLDLOCA(ctorCookie);
+        pslIL->EmitLDC(nativeStackOffset);
+        pslIL->EmitSTFLD(pslIL->GetToken(CoreLibBinder::GetField(FIELD__COPY_CONSTRUCTOR_COOKIE__DESTINATION_OFFSET)));
+
+        if (pargs->mm.m_pCopyCtor)
+        {
+            pslIL->EmitLDLOCA(ctorCookie);
+            pslIL->EmitLDFTN(pslIL->GetToken(pargs->mm.m_pCopyCtor));
+            pslIL->EmitSTFLD(pslIL->GetToken(CoreLibBinder::GetField(FIELD__COPY_CONSTRUCTOR_COOKIE__COPY_CONSTRUCTOR)));
+        }
+
+        if (pargs->mm.m_pDtor)
+        {
+            pslIL->EmitLDLOCA(ctorCookie);
+            pslIL->EmitLDFTN(pslIL->GetToken(pargs->mm.m_pDtor));
+            pslIL->EmitSTFLD(pslIL->GetToken(CoreLibBinder::GetField(FIELD__COPY_CONSTRUCTOR_COOKIE__DESTRUCTOR)));
+        }
+
+        pslIL->EmitLDLOCA(psl->GetCopyCtorChainLocalNum());
+        pslIL->EmitLDLOCA(ctorCookie);
+        pslIL->EmitCALL(METHOD__COPY_CONSTRUCTOR_CHAIN__ADD, 2, 0);
+
 #else
         pslIL->SetStubTargetArgType(ELEMENT_TYPE_I);        // native type is a pointer
         EmitLoadNativeLocalAddrForByRefDispatch(pslILDispatch, dwNewValueTypeLocal);
@@ -3477,12 +3508,10 @@ MarshalerOverrideStatus ILBlittableValueClassWithCopyCtorMarshaler::ArgumentOver
 
         DWORD       dwNewValueTypeLocal;
         dwNewValueTypeLocal = pslIL->NewLocal(locDesc);
-        pslILDispatch->EmitLDARG(argidx);
-        pslILDispatch->EmitSTLOC(dwNewValueTypeLocal);
-        pslILDispatch->EmitLDLOCA(dwNewValueTypeLocal);
+        pslILDispatch->EmitLDARGA(argidx);
 #else
         LocalDesc   locDesc(pargs->mm.m_pMT);
-        locDesc.MakeCopyConstructedPointer();
+        locDesc.MakePointer();
 
         pslIL->SetStubTargetArgType(&locDesc);
         pslILDispatch->EmitLDARG(argidx);
@@ -3491,6 +3520,7 @@ MarshalerOverrideStatus ILBlittableValueClassWithCopyCtorMarshaler::ArgumentOver
         return OVERRIDDEN;
     }
 }
+#endif // defined(FEATURE_IJW)
 
 LocalDesc ILArgIteratorMarshaler::GetNativeType()
 {
diff --git a/src/coreclr/vm/ilmarshalers.h b/src/coreclr/vm/ilmarshalers.h
index 02589d44994f..11c3983fc654 100644
--- a/src/coreclr/vm/ilmarshalers.h
+++ b/src/coreclr/vm/ilmarshalers.h
@@ -365,7 +365,7 @@ class ILMarshaler
 
         // Convert the loaded local containing a native address
         // into a non-GC type for the byref case.
-        pslILEmit->EmitCONV_I();
+        pslILEmit->EmitCONV_U();
     }
 
     void EmitLoadManagedValue(ILCodeStream* pslILEmit)
@@ -399,7 +399,7 @@ class ILMarshaler
 
         // Convert the loaded value containing a native address
         // into a non-GC type for the byref case.
-        pslILEmit->EmitCONV_I();
+        pslILEmit->EmitCONV_U();
     }
 
     void EmitStoreManagedValue(ILCodeStream* pslILEmit)
@@ -1814,6 +1814,40 @@ class ILBlittableValueClassMarshaler : public ILCopyMarshalerBase
     }
 };
 
+class ILPointerMarshaler final : public ILCopyMarshalerBase
+{
+public:
+    enum
+    {
+        c_fInOnly               = TRUE,
+        c_nativeSize            = TARGET_POINTER_SIZE,
+    };
+protected:
+    LocalDesc GetManagedType() override
+    {
+        LIMITED_METHOD_CONTRACT;
+        LocalDesc native(m_pargs->m_pMT);
+        native.MakePointer();
+        return native;
+    }
+
+    LocalDesc GetNativeType() override
+    {
+        LIMITED_METHOD_CONTRACT;
+        LocalDesc native(m_pargs->m_pMT);
+        native.MakePointer();
+        return native;
+    }
+
+    virtual void EmitReInitNative(ILCodeStream* pslILEmit) override
+    {
+        STANDARD_VM_CONTRACT;
+
+        pslILEmit->EmitLDC(0);
+        pslILEmit->EmitCONV_U();
+        EmitStoreNativeValue(pslILEmit);
+    }
+};
 
 class ILDelegateMarshaler : public ILMarshaler
 {
@@ -2889,6 +2923,7 @@ class ILBlittableLayoutClassMarshaler : public ILMarshaler
     void EmitConvertContentsNativeToCLR(ILCodeStream* pslILEmit) override;
 };
 
+#if defined(FEATURE_IJW)
 class ILBlittableValueClassWithCopyCtorMarshaler : public ILMarshaler
 {
 public:
@@ -2922,6 +2957,7 @@ class ILBlittableValueClassWithCopyCtorMarshaler : public ILMarshaler
 
 
 };
+#endif // defined(TARGET_WINDOWS)
 
 class ILArgIteratorMarshaler : public ILMarshaler
 {
diff --git a/src/coreclr/vm/ilstubcache.cpp b/src/coreclr/vm/ilstubcache.cpp
index d0f55495c829..1d8d14456a1f 100644
--- a/src/coreclr/vm/ilstubcache.cpp
+++ b/src/coreclr/vm/ilstubcache.cpp
@@ -500,8 +500,10 @@ MethodDesc* ILStubCache::GetStubMethodDesc(
     ILStubHashBlob* pHashBlob,
     DWORD dwStubFlags,
     Module* pSigModule,
+    Module* pSigLoaderModule,
     PCCOR_SIGNATURE pSig,
     DWORD cbSig,
+    SigTypeContext* pTypeContext,
     AllocMemTracker* pamTracker,
     bool& bILStubCreator,
     MethodDesc *pLastMD)
@@ -538,22 +540,23 @@ MethodDesc* ILStubCache::GetStubMethodDesc(
         // Couldn't find it, let's make a new one.
         //
 
-        Module *pContainingModule = pSigModule;
-        if (pTargetMD != NULL)
+        if (pSigLoaderModule == NULL)
         {
-            // loader module may be different from signature module for generic targets
-            pContainingModule = pTargetMD->GetLoaderModule();
+            pSigLoaderModule = (pTargetMD != NULL) ? pTargetMD->GetLoaderModule() : pSigModule;
         }
 
-        MethodTable *pStubMT = GetOrCreateStubMethodTable(pContainingModule);
-
         SigTypeContext typeContext;
-        if (pTargetMD != NULL)
+        if (pTypeContext == NULL)
         {
-            SigTypeContext::InitTypeContext(pTargetMD, &typeContext);
+            if (pTargetMD != NULL)
+            {
+                SigTypeContext::InitTypeContext(pTargetMD, &typeContext);
+            }
+            pTypeContext = &typeContext;
         }
 
-        pMD = ILStubCache::CreateNewMethodDesc(m_pAllocator->GetHighFrequencyHeap(), pStubMT, dwStubFlags, pSigModule, pSig, cbSig, &typeContext, pamTracker);
+        MethodTable *pStubMT = GetOrCreateStubMethodTable(pSigLoaderModule);
+        pMD = ILStubCache::CreateNewMethodDesc(m_pAllocator->GetHighFrequencyHeap(), pStubMT, dwStubFlags, pSigModule, pSig, cbSig, pTypeContext, pamTracker);
 
         if (SF_IsSharedStub(dwStubFlags))
         {
diff --git a/src/coreclr/vm/ilstubcache.h b/src/coreclr/vm/ilstubcache.h
index 6324bad28eeb..c53fd7a1878c 100644
--- a/src/coreclr/vm/ilstubcache.h
+++ b/src/coreclr/vm/ilstubcache.h
@@ -53,8 +53,10 @@ class ILStubCache final
         ILStubHashBlob* pHashBlob,
         DWORD dwStubFlags,      // bitmask of NDirectStubFlags
         Module* pSigModule,
+        Module* pSigLoaderModule,
         PCCOR_SIGNATURE pSig,
         DWORD cbSig,
+        SigTypeContext* pTypeContext,
         AllocMemTracker* pamTracker,
         bool& bILStubCreator,
         MethodDesc* pLastMD);
diff --git a/src/coreclr/vm/ilstubresolver.cpp b/src/coreclr/vm/ilstubresolver.cpp
index c24be260c692..1efb9c2975e1 100644
--- a/src/coreclr/vm/ilstubresolver.cpp
+++ b/src/coreclr/vm/ilstubresolver.cpp
@@ -133,13 +133,10 @@ STRINGREF ILStubResolver::GetStringLiteral(mdToken metaTok)
     return NULL;
 }
 
-void ILStubResolver::ResolveToken(mdToken token, TypeHandle * pTH, MethodDesc ** ppMD, FieldDesc ** ppFD)
+void ILStubResolver::ResolveToken(mdToken token, ResolvedToken* resolvedToken)
 {
     STANDARD_VM_CONTRACT;
-
-    *pTH = NULL;
-    *ppMD = NULL;
-    *ppFD = NULL;
+    _ASSERTE(resolvedToken != NULL);
 
     switch (TypeFromToken(token))
     {
@@ -147,8 +144,8 @@ void ILStubResolver::ResolveToken(mdToken token, TypeHandle * pTH, MethodDesc **
         {
             MethodDesc* pMD = m_pCompileTimeState->m_tokenLookupMap.LookupMethodDef(token);
             _ASSERTE(pMD);
-            *ppMD = pMD;
-            *pTH = TypeHandle(pMD->GetMethodTable());
+            resolvedToken->Method = pMD;
+            resolvedToken->TypeHandle = TypeHandle(pMD->GetMethodTable());
         }
         break;
 
@@ -156,7 +153,7 @@ void ILStubResolver::ResolveToken(mdToken token, TypeHandle * pTH, MethodDesc **
         {
             TypeHandle typeHnd = m_pCompileTimeState->m_tokenLookupMap.LookupTypeDef(token);
             _ASSERTE(!typeHnd.IsNull());
-            *pTH = typeHnd;
+            resolvedToken->TypeHandle = typeHnd;
         }
         break;
 
@@ -164,10 +161,59 @@ void ILStubResolver::ResolveToken(mdToken token, TypeHandle * pTH, MethodDesc **
         {
             FieldDesc* pFD = m_pCompileTimeState->m_tokenLookupMap.LookupFieldDef(token);
             _ASSERTE(pFD);
-            *ppFD = pFD;
-            *pTH = TypeHandle(pFD->GetEnclosingMethodTable());
+            resolvedToken->Field = pFD;
+            resolvedToken->TypeHandle = TypeHandle(pFD->GetEnclosingMethodTable());
+        }
+        break;
+
+#if !defined(DACCESS_COMPILE)
+    case mdtMemberRef:
+        {
+            TokenLookupMap::MemberRefEntry entry = m_pCompileTimeState->m_tokenLookupMap.LookupMemberRef(token);
+            if (entry.Type == mdtFieldDef)
+            {
+                _ASSERTE(entry.Entry.Field != NULL);
+
+                if (entry.ClassSignatureToken != mdTokenNil)
+                    resolvedToken->TypeSignature = m_pCompileTimeState->m_tokenLookupMap.LookupSig(entry.ClassSignatureToken);
+
+                resolvedToken->Field = entry.Entry.Field;
+                resolvedToken->TypeHandle = TypeHandle(entry.Entry.Field->GetApproxEnclosingMethodTable());
+            }
+            else
+            {
+                _ASSERTE(entry.Type == mdtMethodDef);
+                _ASSERTE(entry.Entry.Method != NULL);
+
+                if (entry.ClassSignatureToken != mdTokenNil)
+                    resolvedToken->TypeSignature = m_pCompileTimeState->m_tokenLookupMap.LookupSig(entry.ClassSignatureToken);
+
+                resolvedToken->Method = entry.Entry.Method;
+                MethodTable* pMT = entry.Entry.Method->GetMethodTable();
+                _ASSERTE(!pMT->ContainsGenericVariables());
+                resolvedToken->TypeHandle = TypeHandle(pMT);
+            }
+        }
+        break;
+
+    case mdtMethodSpec:
+        {
+            TokenLookupMap::MethodSpecEntry entry = m_pCompileTimeState->m_tokenLookupMap.LookupMethodSpec(token);
+            _ASSERTE(entry.Method != NULL);
+
+            if (entry.ClassSignatureToken != mdTokenNil)
+                resolvedToken->TypeSignature = m_pCompileTimeState->m_tokenLookupMap.LookupSig(entry.ClassSignatureToken);
+
+            if (entry.MethodSignatureToken != mdTokenNil)
+                resolvedToken->MethodSignature = m_pCompileTimeState->m_tokenLookupMap.LookupSig(entry.MethodSignatureToken);
+
+            resolvedToken->Method = entry.Method;
+            MethodTable* pMT = entry.Method->GetMethodTable();
+            _ASSERTE(!pMT->ContainsGenericVariables());
+            resolvedToken->TypeHandle = TypeHandle(pMT);
         }
         break;
+#endif // !defined(DACCESS_COMPILE)
 
     default:
         UNREACHABLE_MSG("unexpected metadata token type");
diff --git a/src/coreclr/vm/ilstubresolver.h b/src/coreclr/vm/ilstubresolver.h
index 82a1217d79c7..ea823e7f7738 100644
--- a/src/coreclr/vm/ilstubresolver.h
+++ b/src/coreclr/vm/ilstubresolver.h
@@ -35,7 +35,7 @@ class ILStubResolver : DynamicResolver
     OBJECTHANDLE ConstructStringLiteral(mdToken metaTok);
     BOOL IsValidStringRef(mdToken metaTok);
     STRINGREF GetStringLiteral(mdToken metaTok);
-    void ResolveToken(mdToken token, TypeHandle * pTH, MethodDesc ** ppMD, FieldDesc ** ppFD);
+    void ResolveToken(mdToken token, ResolvedToken* resolvedToken);
     SigPointer ResolveSignature(mdToken token);
     SigPointer ResolveSignatureForVarArg(mdToken token);
     void GetEHInfo(unsigned EHnumber, CORINFO_EH_CLAUSE* clause);
diff --git a/src/coreclr/vm/interoputil.cpp b/src/coreclr/vm/interoputil.cpp
index d9e5d4375962..7a91cd41f7c1 100644
--- a/src/coreclr/vm/interoputil.cpp
+++ b/src/coreclr/vm/interoputil.cpp
@@ -2558,7 +2558,7 @@ BOOL IsMethodVisibleFromCom(MethodDesc *pMD)
     mdMethodDef md = pMD->GetMemberDef();
 
     // See if there is property information for this member.
-    hr = pMD->GetModule()->GetPropertyInfoForMethodDef(md, &pd, &pPropName, &uSemantic);
+    hr = pMD->GetMDImport()->GetPropertyInfoForMethodDef(md, &pd, &pPropName, &uSemantic);
     IfFailThrow(hr);
 
     if (hr == S_OK)
diff --git a/src/coreclr/vm/interpreter.cpp b/src/coreclr/vm/interpreter.cpp
index 878fd29a064f..2edb5948d72e 100644
--- a/src/coreclr/vm/interpreter.cpp
+++ b/src/coreclr/vm/interpreter.cpp
@@ -1846,7 +1846,7 @@ void Interpreter::JitMethodIfAppropriate(InterpreterMethodInfo* interpMethInfo,
             else
             {
                 COR_ILMETHOD_DECODER::DecoderStatus status;
-                pDecoder = new COR_ILMETHOD_DECODER(md->GetILHeader(TRUE),
+                pDecoder = new COR_ILMETHOD_DECODER(md->GetILHeader(),
                                                     md->GetMDImport(),
                                                     &status);
             }
@@ -6506,7 +6506,7 @@ void Interpreter::CkFinite()
         break;
     }
 
-    if (!_finite(val))
+    if (!isfinite(val))
         ThrowSysArithException();
 }
 
@@ -6828,6 +6828,16 @@ void Interpreter::SetILInstrCategories()
 }
 #endif // INTERP_ILINSTR_PROFILE
 
+#ifndef TARGET_WINDOWS
+namespace
+{
+    bool isnan(float val)
+    {
+        UINT32 bits = *reinterpret_cast<UINT32*>(&val);
+        return (bits & 0x7FFFFFFFU) > 0x7F800000U;
+    }
+}
+#endif
 
 template<int op>
 void Interpreter::CompareOp()
@@ -7135,7 +7145,7 @@ INT32 Interpreter::CompareOpRes(unsigned op1idx)
                 else if (op == CO_GT_UN)
                 {
                     // Check for NAN's here: if either is a NAN, they're unordered, so this comparison returns true.
-                    if (_isnan(val1) || _isnan(val2)) res = 1;
+                    if (isnan(val1) || isnan(val2)) res = 1;
                     else if (val1 > val2) res = 1;
                 }
                 else if (op == CO_LT)
@@ -7146,7 +7156,7 @@ INT32 Interpreter::CompareOpRes(unsigned op1idx)
                 {
                     _ASSERTE(op == CO_LT_UN);
                     // Check for NAN's here: if either is a NAN, they're unordered, so this comparison returns true.
-                    if (_isnan(val1) || _isnan(val2)) res = 1;
+                    if (isnan(val1) || isnan(val2)) res = 1;
                     else if (val1 < val2) res = 1;
                 }
             }
@@ -7177,7 +7187,7 @@ INT32 Interpreter::CompareOpRes(unsigned op1idx)
                 else if (op == CO_GT_UN)
                 {
                     // Check for NAN's here: if either is a NAN, they're unordered, so this comparison returns true.
-                    if (_isnan(val1) || _isnan(val2)) res = 1;
+                    if (isnan(val1) || isnan(val2)) res = 1;
                     else if (val1 > val2) res = 1;
                 }
                 else if (op == CO_LT)
@@ -7188,7 +7198,7 @@ INT32 Interpreter::CompareOpRes(unsigned op1idx)
                 {
                     _ASSERTE(op == CO_LT_UN);
                     // Check for NAN's here: if either is a NAN, they're unordered, so this comparison returns true.
-                    if (_isnan(val1) || _isnan(val2)) res = 1;
+                    if (isnan(val1) || isnan(val2)) res = 1;
                     else if (val1 < val2) res = 1;
                 }
             }
@@ -9261,19 +9271,6 @@ void Interpreter::DoCallWork(bool virtualCall, void* thisArg, CORINFO_RESOLVED_T
             break;
         }
 
-        // Plus some other calls that we're going to treat "like" intrinsics...
-        if (methToCall == CoreLibBinder::GetMethod(METHOD__STUBHELPERS__SET_LAST_ERROR))
-        {
-            // If we're interpreting a method that calls "SetLastError", it's very likely that the call(i) whose
-            // error we're trying to capture was performed with MethodDescCallSite machinery that itself trashes
-            // the last error.  We solve this by saving the last error in a special interpreter-specific field of
-            // "Thread" in that case, and essentially implement SetLastError here, taking that field as the
-            // source for the last error.
-            Thread* thrd = GetThread();
-            thrd->m_dwLastError = thrd->m_dwLastErrorInterp;
-            didIntrinsic = true;
-        }
-
         // TODO: The following check for hardware intrinsics is not a production-level
         //       solution and may produce incorrect results.
         static ConfigDWORD s_InterpreterHWIntrinsicsIsSupportedFalse;
diff --git a/src/coreclr/vm/interpreter.h b/src/coreclr/vm/interpreter.h
index 86a0a36efe45..7f7eed175dea 100644
--- a/src/coreclr/vm/interpreter.h
+++ b/src/coreclr/vm/interpreter.h
@@ -13,7 +13,7 @@
 #include "crst.h"
 #include "callhelpers.h"
 #include "codeversion.h"
-#include "clr_std/type_traits"
+#include <type_traits>
 
 typedef SSIZE_T NativeInt;
 typedef SIZE_T NativeUInt;
diff --git a/src/coreclr/vm/invokeutil.cpp b/src/coreclr/vm/invokeutil.cpp
index eb8462ed16f2..7d0c8f80becd 100644
--- a/src/coreclr/vm/invokeutil.cpp
+++ b/src/coreclr/vm/invokeutil.cpp
@@ -741,14 +741,14 @@ void InvokeUtil::ValidateObjectTarget(FieldDesc *pField, TypeHandle enclosingTyp
 
 // SetValidField
 // Given an target object, a value object and a field this method will set the field
-//  on the target object.  The field must be validate before calling this.
+//  on the target object.  The field must be validated before calling this.
 void InvokeUtil::SetValidField(CorElementType fldType,
                                TypeHandle fldTH,
                                FieldDesc *pField,
                                OBJECTREF *target,
                                OBJECTREF *valueObj,
                                TypeHandle declaringType,
-                               CLR_BOOL *pDomainInitialized) {
+                               CLR_BOOL *pIsClassInitialized) {
     CONTRACTL {
         THROWS;
         GC_TRIGGERS;
@@ -786,19 +786,18 @@ void InvokeUtil::SetValidField(CorElementType fldType,
         pDeclMT = pField->GetModule()->GetGlobalMethodTable();
     }
 
-    if (*pDomainInitialized == FALSE)
+    if (*pIsClassInitialized == FALSE)
     {
         EX_TRY
         {
             pDeclMT->EnsureInstanceActive();
             pDeclMT->CheckRunClassInitThrowing();
-
-            *pDomainInitialized = TRUE;
+            *pIsClassInitialized = pDeclMT->IsClassInited();            
         }
         EX_CATCH_THROWABLE(&Throwable);
     }
 #ifdef _DEBUG
-    else if (*pDomainInitialized == TRUE && !declaringType.IsNull())
+    else if (*pIsClassInitialized == TRUE && !declaringType.IsNull())
        CONSISTENCY_CHECK(declaringType.GetMethodTable()->CheckActivated());
 #endif
 
@@ -973,9 +972,7 @@ void InvokeUtil::SetValidField(CorElementType fldType,
 
 // GetFieldValue
 // This method will return an ARG_SLOT containing the value of the field.
-// GetFieldValue
-// This method will return an ARG_SLOT containing the value of the field.
-OBJECTREF InvokeUtil::GetFieldValue(FieldDesc* pField, TypeHandle fieldType, OBJECTREF* target, TypeHandle declaringType, CLR_BOOL *pDomainInitialized) {
+OBJECTREF InvokeUtil::GetFieldValue(FieldDesc* pField, TypeHandle fieldType, OBJECTREF* target, TypeHandle declaringType, CLR_BOOL *pIsClassInitialized) {
     CONTRACTL {
         THROWS;
         GC_TRIGGERS;
@@ -999,7 +996,7 @@ OBJECTREF InvokeUtil::GetFieldValue(FieldDesc* pField, TypeHandle fieldType, OBJ
     {
         pDeclMT = declaringType.GetMethodTable();
 
-        // We don't allow getting the field just so we don't have more specical
+        // We don't allow getting the field just so we don't have more special
         // cases than we need to.  Then we need at least the throw check to ensure
         // we don't allow data corruption.
         if (Nullable::IsNullableType(pDeclMT))
@@ -1013,23 +1010,21 @@ OBJECTREF InvokeUtil::GetFieldValue(FieldDesc* pField, TypeHandle fieldType, OBJ
         pDeclMT = pField->GetModule()->GetGlobalMethodTable();
     }
 
-    if (*pDomainInitialized == FALSE)
+    if (*pIsClassInitialized == FALSE)
     {
         EX_TRY
         {
             pDeclMT->EnsureInstanceActive();
             pDeclMT->CheckRunClassInitThrowing();
-
-            *pDomainInitialized = TRUE;
+            *pIsClassInitialized = pDeclMT->IsClassInited();
         }
         EX_CATCH_THROWABLE(&Throwable);
     }
 #ifdef _DEBUG
-    else if (*pDomainInitialized == TRUE && !declaringType.IsNull())
+    else if (*pIsClassInitialized == TRUE && !declaringType.IsNull())
        CONSISTENCY_CHECK(declaringType.GetMethodTable()->CheckActivated());
 #endif
 
-
     if(Throwable != NULL)
     {
         GCPROTECT_BEGIN(Throwable);
@@ -1084,7 +1079,7 @@ OBJECTREF InvokeUtil::GetFieldValue(FieldDesc* pField, TypeHandle fieldType, OBJ
 
     case ELEMENT_TYPE_VALUETYPE:
     {
-        // Value classes require createing a boxed version of the field and then
+        // Value classes require creating a boxed version of the field and then
         //  copying from the source...
         // Allocate an object to return...
         _ASSERTE(!fieldType.IsTypeDesc());
diff --git a/src/coreclr/vm/invokeutil.h b/src/coreclr/vm/invokeutil.h
index 0bd1577c7a19..b288c475aae0 100644
--- a/src/coreclr/vm/invokeutil.h
+++ b/src/coreclr/vm/invokeutil.h
@@ -138,9 +138,9 @@ class InvokeUtil
     // SetValidField
     // Given an target object, a value object and a field this method will set the field
     //  on the target object.  The field must be validate before calling this.
-    static void SetValidField(CorElementType fldType, TypeHandle fldTH, FieldDesc* pField, OBJECTREF* target, OBJECTREF* value, TypeHandle declaringType, CLR_BOOL *pDomainInitialized);
+    static void SetValidField(CorElementType fldType, TypeHandle fldTH, FieldDesc* pField, OBJECTREF* target, OBJECTREF* value, TypeHandle declaringType, CLR_BOOL *pIsClassInitialized);
 
-    static OBJECTREF GetFieldValue(FieldDesc* pField, TypeHandle fieldType, OBJECTREF* target, TypeHandle declaringType, CLR_BOOL *pDomainInitialized);
+    static OBJECTREF GetFieldValue(FieldDesc* pField, TypeHandle fieldType, OBJECTREF* target, TypeHandle declaringType, CLR_BOOL *pIsClassInitialized);
 
     // ValidateObjectTarget
     // This method will validate the Object/Target relationship
diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 0d1ec278e7bc..3aae4a155fc7 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -58,6 +58,9 @@
 
 #include "exinfo.h"
 
+using std::isfinite;
+using std::isnan;
+
 //========================================================================
 //
 // This file contains implementation of all JIT helpers. The helpers are
@@ -264,7 +267,7 @@ HCIMPL2(INT32, JIT_Div, INT32 dividend, INT32 divisor)
         }
         else if (divisor == -1)
         {
-            if (dividend == _I32_MIN)
+            if (dividend == INT32_MIN)
             {
                 ehKind = kOverflowException;
                 goto ThrowExcep;
@@ -296,7 +299,7 @@ HCIMPL2(INT32, JIT_Mod, INT32 dividend, INT32 divisor)
         }
         else if (divisor == -1)
         {
-            if (dividend == _I32_MIN)
+            if (dividend == INT32_MIN)
             {
                 ehKind = kOverflowException;
                 goto ThrowExcep;
@@ -488,256 +491,155 @@ HCIMPLEND
 #include <optsmallperfcritical.h>
 
 /*********************************************************************/
-//
-HCIMPL1_V(double, JIT_ULng2Dbl, UINT64 val)
+HCIMPL1_V(double, JIT_ULng2Dbl, uint64_t val)
 {
     FCALL_CONTRACT;
-
-    double conv = (double) ((INT64) val);
-    if (conv < 0)
-        conv += (4294967296.0 * 4294967296.0);  // add 2^64
-    _ASSERTE(conv >= 0);
-    return(conv);
+    return (double)val;
 }
 HCIMPLEND
 
 /*********************************************************************/
-// needed for ARM and RyuJIT-x86
-HCIMPL1_V(double, JIT_Lng2Dbl, INT64 val)
+HCIMPL1_V(double, JIT_Lng2Dbl, int64_t val)
 {
     FCALL_CONTRACT;
-    return double(val);
+    return (double)val;
 }
 HCIMPLEND
 
-//--------------------------------------------------------------------------
-template <class ftype>
-ftype modftype(ftype value, ftype *iptr);
-template <> float modftype(float value, float *iptr) { return modff(value, iptr); }
-template <> double modftype(double value, double *iptr) { return modf(value, iptr); }
-
-// round to nearest, round to even if tied
-template <class ftype>
-ftype BankersRound(ftype value)
-{
-    if (value < 0.0) return -BankersRound <ftype> (-value);
-
-    ftype integerPart;
-    modftype( value, &integerPart );
-
-    // if decimal part is exactly .5
-    if ((value -(integerPart +0.5)) == 0.0)
-    {
-        // round to even
-        if (fmod(ftype(integerPart), ftype(2.0)) == 0.0)
-            return integerPart;
-
-        // Else return the nearest even integer
-        return (ftype)_copysign(ceil(fabs(value+0.5)),
-                         value);
-    }
-
-    // Otherwise round to closest
-    return (ftype)_copysign(floor(fabs(value)+0.5),
-                     value);
-}
-
-
 /*********************************************************************/
-// round double to nearest int (as double)
-HCIMPL1_V(double, JIT_DoubleRound, double val)
+HCIMPL1_V(int64_t, JIT_Dbl2Lng, double val)
 {
     FCALL_CONTRACT;
-    return BankersRound(val);
-}
-HCIMPLEND
 
-/*********************************************************************/
-// round float to nearest int (as float)
-HCIMPL1_V(float, JIT_FloatRound, float val)
-{
-    FCALL_CONTRACT;
-    return BankersRound(val);
+#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_ARM)
+    const double int64_min = -2147483648.0 * 4294967296.0;
+    const double int64_max = 2147483648.0 * 4294967296.0;
+    return (val != val) ? 0 : (val <= int64_min) ? INT64_MIN : (val >= int64_max) ? INT64_MAX : (int64_t)val;
+#else
+    return (int64_t)val;
+#endif
 }
 HCIMPLEND
 
 /*********************************************************************/
-// Call fast Dbl2Lng conversion - used by functions below
-FORCEINLINE INT64 FastDbl2Lng(double val)
+HCIMPL1_V(uint32_t, JIT_Dbl2UIntOvf, double val)
 {
-#ifdef TARGET_X86
-    FCALL_CONTRACT;
-    return HCCALL1_V(JIT_Dbl2Lng, val);
-#else
     FCALL_CONTRACT;
-    return((__int64) val);
-#endif
+
+    // Note that this expression also works properly for val = NaN case
+    if (val > -1.0 && val < 4294967296.0)
+        return (uint32_t)val;
+
+    FCThrow(kOverflowException);
 }
+HCIMPLEND
 
 /*********************************************************************/
-HCIMPL1_V(UINT32, JIT_Dbl2UIntOvf, double val)
+HCIMPL1_V(int, JIT_Dbl2IntOvf, double val)
 {
     FCALL_CONTRACT;
 
-        // Note that this expression also works properly for val = NaN case
-    if (val > -1.0 && val < 4294967296.0)
-        return((UINT32)FastDbl2Lng(val));
+    const double two31 = 2147483648.0;
+    // Note that this expression also works properly for val = NaN case
+    if (val > -two31 - 1 && val < two31)
+        return (int32_t)val;
 
     FCThrow(kOverflowException);
 }
 HCIMPLEND
 
 /*********************************************************************/
-HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val)
+HCIMPL1_V(int64_t, JIT_Dbl2LngOvf, double val)
 {
     FCALL_CONTRACT;
 
     const double two63  = 2147483648.0 * 4294967296.0;
-    UINT64 ret;
-    if (val < two63) {
-        ret = FastDbl2Lng(val);
-    }
-    else {
-        // subtract 0x8000000000000000, do the convert then add it back again
-        ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000);
-    }
-    return ret;
+
+    // Note that this expression also works properly for val = NaN case
+    // We need to compare with the very next double to two63. 0x402 is epsilon to get us there.
+    if (val > -two63 - 0x402 && val < two63)
+        return (int64_t)val;
+
+    FCThrow(kOverflowException);
 }
 HCIMPLEND
 
 /*********************************************************************/
-HCIMPL1_V(UINT64, JIT_Dbl2ULngOvf, double val)
+HCIMPL1_V(uint64_t, JIT_Dbl2ULngOvf, double val)
 {
     FCALL_CONTRACT;
 
     const double two64  = 4294967296.0 * 4294967296.0;
-        // Note that this expression also works properly for val = NaN case
-    if (val > -1.0 && val < two64) {
-        const double two63  = 2147483648.0 * 4294967296.0;
-        UINT64 ret;
-        if (val < two63) {
-            ret = FastDbl2Lng(val);
-        }
-        else {
-            // subtract 0x8000000000000000, do the convert then add it back again
-            ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000);
-        }
-#ifdef _DEBUG
-        // since no overflow can occur, the value always has to be within 1
-        double roundTripVal = HCCALL1_V(JIT_ULng2Dbl, ret);
-        _ASSERTE(val - 1.0 <= roundTripVal && roundTripVal <= val + 1.0);
-#endif // _DEBUG
-        return ret;
-    }
+    // Note that this expression also works properly for val = NaN case
+    if (val > -1.0 && val < two64)
+        return (uint64_t)val;
 
     FCThrow(kOverflowException);
 }
 HCIMPLEND
 
-
-#if !defined(TARGET_X86) || defined(TARGET_UNIX)
-
-HCIMPL1_V(INT64, JIT_Dbl2Lng, double val)
+HCIMPL1_V(uint32_t, JIT_Dbl2UInt, double val)
 {
     FCALL_CONTRACT;
 
-    return((INT64)val);
+#if defined(TARGET_X86) || defined(TARGET_AMD64)
+    const double uint_max = 4294967295.0;
+    // Note that this expression also works properly for val = NaN case
+    return (val >= 0) ? ((val >= uint_max) ? UINT32_MAX : (uint32_t)val) : 0;
+#else
+    return (uint32_t)val;
+#endif
 }
 HCIMPLEND
 
-HCIMPL1_V(int, JIT_Dbl2IntOvf, double val)
+/*********************************************************************/
+HCIMPL1_V(int32_t, JIT_Dbl2Int, double val)
 {
     FCALL_CONTRACT;
 
-    const double two31 = 2147483648.0;
-
-        // Note that this expression also works properly for val = NaN case
-    if (val > -two31 - 1 && val < two31)
-        return((INT32)val);
-
-    FCThrow(kOverflowException);
+#if defined(TARGET_X86) || defined(TARGET_AMD64)
+    const double int32_min = -2147483648.0;
+    const double int32_max_plus_1 = 2147483648.0;
+    return (val != val) ? 0 : (val <= int32_min) ? INT32_MIN : (val >= int32_max_plus_1) ? INT32_MAX : (int32_t)val;
+#else
+    return (int32_t)val;
+#endif
 }
 HCIMPLEND
 
-HCIMPL1_V(INT64, JIT_Dbl2LngOvf, double val)
+/*********************************************************************/
+HCIMPL1_V(uint64_t, JIT_Dbl2ULng, double val)
 {
     FCALL_CONTRACT;
 
-    const double two63  = 2147483648.0 * 4294967296.0;
-
+#if defined(TARGET_X86) || defined(TARGET_AMD64)
+    const double uint64_max_plus_1 = 4294967296.0 * 4294967296.0;
     // Note that this expression also works properly for val = NaN case
-    // We need to compare with the very next double to two63. 0x402 is epsilon to get us there.
-    if (val > -two63 - 0x402 && val < two63)
-        return((INT64)val);
-
-    FCThrow(kOverflowException);
+    return (val >= 0) ? ((val >= uint64_max_plus_1) ? UINT64_MAX : (uint64_t)val) : 0;
+#else
+    return (uint64_t)val;
+#endif
 }
 HCIMPLEND
 
+/*********************************************************************/
 HCIMPL2_VV(float, JIT_FltRem, float dividend, float divisor)
 {
     FCALL_CONTRACT;
 
-    //
-    // From the ECMA standard:
-    //
-    // If [divisor] is zero or [dividend] is infinity
-    //   the result is NaN.
-    // If [divisor] is infinity,
-    //   the result is [dividend] (negated for -infinity***).
-    //
-    // ***"negated for -infinity" has been removed from the spec
-    //
-
-    if (divisor==0 || !_finite(dividend))
-    {
-        UINT32 NaN = CLR_NAN_32;
-        return *(float *)(&NaN);
-    }
-    else if (!_finite(divisor) && !_isnan(divisor))
-    {
-        return dividend;
-    }
-    // else...
-#if 0
-    // COMPILER BUG WITH FMODF() + /Oi, USE FMOD() INSTEAD
-    return fmodf(dividend,divisor);
-#else
-    return (float)fmod((double)dividend,(double)divisor);
-#endif
+    return fmodf(dividend, divisor);
 }
 HCIMPLEND
 
+/*********************************************************************/
 HCIMPL2_VV(double, JIT_DblRem, double dividend, double divisor)
 {
     FCALL_CONTRACT;
 
-    //
-    // From the ECMA standard:
-    //
-    // If [divisor] is zero or [dividend] is infinity
-    //   the result is NaN.
-    // If [divisor] is infinity,
-    //   the result is [dividend] (negated for -infinity***).
-    //
-    // ***"negated for -infinity" has been removed from the spec
-    //
-    if (divisor==0 || !_finite(dividend))
-    {
-        UINT64 NaN = CLR_NAN_64;
-        return *(double *)(&NaN);
-    }
-    else if (!_finite(divisor) && !_isnan(divisor))
-    {
-        return dividend;
-    }
-    // else...
-    return(fmod(dividend,divisor));
+    return fmod(dividend, divisor);
 }
 HCIMPLEND
 
-#endif // !TARGET_X86 || TARGET_UNIX
-
 #include <optdefault.h>
 
 
@@ -1280,7 +1182,7 @@ NOINLINE HCIMPL1(void, JIT_InitClass_Framed, MethodTable* pMT)
     // already have initialized the Global Class <Module>
     CONSISTENCY_CHECK(!pMT->IsGlobalClass());
 
-    pMT->CheckRestore();
+    _ASSERTE(pMT->IsFullyLoaded());
     pMT->CheckRunClassInitThrowing();
 
     HELPER_METHOD_FRAME_END();
@@ -1333,7 +1235,7 @@ HCIMPL2(void, JIT_InitInstantiatedClass, CORINFO_CLASS_HANDLE typeHnd_, CORINFO_
         pMT = pTemplateMT;
     }
 
-    pMT->CheckRestore();
+    _ASSERTE(pMT->IsFullyLoaded());
     pMT->EnsureInstanceActive();
     pMT->CheckRunClassInitThrowing();
     HELPER_METHOD_FRAME_END();
@@ -1585,7 +1487,7 @@ NOINLINE HCIMPL1(void*, JIT_GetGenericsGCStaticBase_Framed, MethodTable *pMT)
 
     HELPER_METHOD_FRAME_BEGIN_RET_0();
 
-    pMT->CheckRestore();
+    _ASSERTE(pMT->IsFullyLoaded());
 
     pMT->CheckRunClassInitThrowing();
 
@@ -1646,7 +1548,7 @@ NOINLINE HCIMPL1(void*, JIT_GetGenericsNonGCStaticBase_Framed, MethodTable *pMT)
 
     HELPER_METHOD_FRAME_BEGIN_RET_0();
 
-    pMT->CheckRestore();
+    _ASSERTE(pMT->IsFullyLoaded());
 
     // If pMT refers to a method table that requires some initialization work,
     // then pMT cannot to a method table that is shared by generic instantiations,
@@ -1726,9 +1628,7 @@ HCIMPL1(void*, JIT_GetNonGCThreadStaticBase_Helper, MethodTable * pMT)
 
     HELPER_METHOD_FRAME_BEGIN_RET_0();
 
-    // For generics, we need to call CheckRestore() for some reason
-    if (pMT->HasGenericsStaticsInfo())
-        pMT->CheckRestore();
+    _ASSERTE(pMT->IsFullyLoaded());
 
     // Get the TLM
     ThreadLocalModule * pThreadLocalModule = ThreadStatics::GetTLM(pMT);
@@ -1758,9 +1658,7 @@ HCIMPL1(void*, JIT_GetGCThreadStaticBase_Helper, MethodTable * pMT)
 
     HELPER_METHOD_FRAME_BEGIN_RET_0();
 
-    // For generics, we need to call CheckRestore() for some reason
-    if (pMT->HasGenericsStaticsInfo())
-        pMT->CheckRestore();
+    _ASSERTE(pMT->IsFullyLoaded());
 
     // Get the TLM
     ThreadLocalModule * pThreadLocalModule = ThreadStatics::GetTLM(pMT);
@@ -2860,7 +2758,7 @@ HCIMPL3(Object*, JIT_NewMDArr, CORINFO_CLASS_HANDLE classHnd, unsigned dwNumArgs
     HELPER_METHOD_FRAME_BEGIN_RET_1(ret);    // Set up a frame
 
     TypeHandle typeHnd(classHnd);
-    typeHnd.CheckRestore();
+    _ASSERTE(typeHnd.IsFullyLoaded());
     _ASSERTE(typeHnd.GetMethodTable()->IsArray());
 
     ret = AllocateArrayEx(typeHnd, pArgList, dwNumArgs);
@@ -2923,7 +2821,7 @@ HCIMPL2(Object*, JIT_Box, CORINFO_CLASS_HANDLE type, void* unboxedData)
 
     MethodTable *pMT = clsHnd.AsMethodTable();
 
-    pMT->CheckRestore();
+    _ASSERTE(pMT->IsFullyLoaded());
 
     _ASSERTE (pMT->IsValueType() && !pMT->IsByRefLike());
 
@@ -3570,6 +3468,14 @@ NOINLINE HCIMPL3(CORINFO_MethodPtr, JIT_VirtualFunctionPointer_Framed, Object *
 }
 HCIMPLEND
 
+HCIMPL3(void, Jit_NativeMemSet, void* pDest, int value, size_t length)
+{
+    _ASSERTE(pDest != nullptr);
+    FCALL_CONTRACT;
+    memset(pDest, value, length);
+}
+HCIMPLEND
+
 HCIMPL1(Object*, JIT_GetRuntimeFieldStub, CORINFO_FIELD_HANDLE field)
 {
     FCALL_CONTRACT;
@@ -4263,7 +4169,10 @@ void RethrowNew()
 
     ExInfo *pActiveExInfo = (ExInfo*)pThread->GetExceptionState()->GetCurrentExceptionTracker();
 
-    ExInfo exInfo(pThread, pActiveExInfo->m_ptrs.ExceptionRecord, pActiveExInfo->m_ptrs.ContextRecord, ExKind::None);
+    CONTEXT exceptionContext;
+    RtlCaptureContext(&exceptionContext);
+
+    ExInfo exInfo(pThread, pActiveExInfo->m_ptrs.ExceptionRecord, &exceptionContext, ExKind::None);
 
     GCPROTECT_BEGIN(exInfo.m_exception);
     PREPARE_NONVIRTUAL_CALLSITE(METHOD__EH__RH_RETHROW);
diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp
index f2f7d229d546..0889e131f037 100644
--- a/src/coreclr/vm/jitinterface.cpp
+++ b/src/coreclr/vm/jitinterface.cpp
@@ -156,15 +156,13 @@ inline CORINFO_MODULE_HANDLE GetScopeHandle(MethodDesc* method)
 //This is common refactored code from within several of the access check functions.
 static BOOL ModifyCheckForDynamicMethod(DynamicResolver *pResolver,
                                  TypeHandle *pOwnerTypeForSecurity,
-                                 AccessCheckOptions::AccessCheckType *pAccessCheckType,
-                                 DynamicResolver** ppAccessContext)
+                                 AccessCheckOptions::AccessCheckType *pAccessCheckType)
 {
     CONTRACTL {
         STANDARD_VM_CHECK;
         PRECONDITION(CheckPointer(pResolver));
         PRECONDITION(CheckPointer(pOwnerTypeForSecurity));
         PRECONDITION(CheckPointer(pAccessCheckType));
-        PRECONDITION(CheckPointer(ppAccessContext));
         PRECONDITION(*pAccessCheckType == AccessCheckOptions::kNormalAccessibilityChecks);
     } CONTRACTL_END;
 
@@ -702,7 +700,7 @@ size_t CEEInfo::printObjectDescription (
     const UTF8* utf8data = stackStr.GetUTF8();
     if (bufferSize > 0)
     {
-        bytesWritten = min(bufferSize - 1, stackStr.GetCount());
+        bytesWritten = min<size_t>(bufferSize - 1, stackStr.GetCount());
         memcpy((BYTE*)buffer, (BYTE*)utf8data, bytesWritten);
 
         // Always null-terminate
@@ -883,7 +881,18 @@ void CEEInfo::resolveToken(/* IN, OUT */ CORINFO_RESOLVED_TOKEN * pResolvedToken
 
     if (IsDynamicScope(pResolvedToken->tokenScope))
     {
-        GetDynamicResolver(pResolvedToken->tokenScope)->ResolveToken(pResolvedToken->token, &th, &pMD, &pFD);
+        ResolvedToken resolved{};
+        GetDynamicResolver(pResolvedToken->tokenScope)->ResolveToken(pResolvedToken->token, &resolved);
+
+        th = resolved.TypeHandle;
+        pMD = resolved.Method;
+        pFD = resolved.Field;
+
+        // Record supplied signatures.
+        if (!resolved.TypeSignature.IsNull())
+            resolved.TypeSignature.GetSignature(&pResolvedToken->pTypeSpec, &pResolvedToken->cbTypeSpec);
+        if (!resolved.MethodSignature.IsNull())
+            resolved.MethodSignature.GetSignature(&pResolvedToken->pMethodSpec, &pResolvedToken->cbMethodSpec);
 
         //
         // Check that we got the expected handles and fill in missing data if necessary
@@ -893,18 +902,10 @@ void CEEInfo::resolveToken(/* IN, OUT */ CORINFO_RESOLVED_TOKEN * pResolvedToken
 
         if (pMD != NULL)
         {
-            if ((tkType != mdtMethodDef) && (tkType != mdtMemberRef))
+            if ((tkType != mdtMethodDef) && (tkType != mdtMemberRef) && (tkType != mdtMethodSpec))
                 ThrowBadTokenException(pResolvedToken);
             if ((tokenType & CORINFO_TOKENKIND_Method) == 0)
                 ThrowBadTokenException(pResolvedToken);
-            if (th.IsNull())
-                th = pMD->GetMethodTable();
-
-            // "PermitUninstDefOrRef" check
-            if ((tokenType != CORINFO_TOKENKIND_Ldtoken) && pMD->ContainsGenericVariables())
-            {
-                COMPlusThrow(kInvalidProgramException);
-            }
 
             // if this is a BoxedEntryPointStub get the UnboxedEntryPoint one
             if (pMD->IsUnboxingStub())
@@ -924,8 +925,6 @@ void CEEInfo::resolveToken(/* IN, OUT */ CORINFO_RESOLVED_TOKEN * pResolvedToken
                 ThrowBadTokenException(pResolvedToken);
             if ((tokenType & CORINFO_TOKENKIND_Field) == 0)
                 ThrowBadTokenException(pResolvedToken);
-            if (th.IsNull())
-                th = pFD->GetApproxEnclosingMethodTable();
 
             if (pFD->IsStatic() && (tokenType != CORINFO_TOKENKIND_Ldtoken))
             {
@@ -959,7 +958,7 @@ void CEEInfo::resolveToken(/* IN, OUT */ CORINFO_RESOLVED_TOKEN * pResolvedToken
     else
     {
         mdToken metaTOK = pResolvedToken->token;
-        Module * pModule = (Module *)pResolvedToken->tokenScope;
+        Module * pModule = GetModule(pResolvedToken->tokenScope);
 
         switch (TypeFromToken(metaTOK))
         {
@@ -1580,6 +1579,8 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken,
                 // Optimization is disabled for linux/x86
 #elif defined(TARGET_LINUX_MUSL) && defined(TARGET_ARM64)
                 // Optimization is disabled for linux musl arm64
+#elif defined(TARGET_FREEBSD) && defined(TARGET_ARM64)
+                // Optimization is disabled for FreeBSD/arm64
 #else
                 bool optimizeThreadStaticAccess = true;
 #if !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_AMD64)
@@ -1703,7 +1704,9 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken,
             SigTypeContext::InitTypeContext(pCallerForSecurity, &typeContext);
 
             SigPointer sigptr(pResolvedToken->pTypeSpec, pResolvedToken->cbTypeSpec);
-            fieldTypeForSecurity = sigptr.GetTypeHandleThrowing((Module *)pResolvedToken->tokenScope, &typeContext);
+
+            Module* targetModule = GetModule(pResolvedToken->tokenScope);
+            fieldTypeForSecurity = sigptr.GetTypeHandleThrowing(targetModule, &typeContext);
 
             // typeHnd can be a variable type
             if (fieldTypeForSecurity.GetMethodTable() == NULL)
@@ -1715,15 +1718,13 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken,
         BOOL doAccessCheck = TRUE;
         AccessCheckOptions::AccessCheckType accessCheckType = AccessCheckOptions::kNormalAccessibilityChecks;
 
-        DynamicResolver * pAccessContext = NULL;
-
         //More in code:CEEInfo::getCallInfo, but the short version is that the caller and callee Descs do
         //not completely describe the type.
         TypeHandle callerTypeForSecurity = TypeHandle(pCallerForSecurity->GetMethodTable());
         if (IsDynamicScope(pResolvedToken->tokenScope))
         {
             doAccessCheck = ModifyCheckForDynamicMethod(GetDynamicResolver(pResolvedToken->tokenScope), &callerTypeForSecurity,
-                &accessCheckType, &pAccessContext);
+                &accessCheckType);
         }
 
         //Now for some link time checks.
@@ -1735,7 +1736,7 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken,
         {
             //Well, let's check some visibility at least.
             AccessCheckOptions accessCheckOptions(accessCheckType,
-                pAccessContext,
+                NULL,
                 FALSE,
                 pField);
 
@@ -1849,22 +1850,19 @@ CEEInfo::findCallSiteSig(
         {
             _ASSERTE(TypeFromToken(sigMethTok) == mdtMethodDef);
 
-            TypeHandle classHandle;
-            MethodDesc * pMD = NULL;
-            FieldDesc * pFD = NULL;
-
             // in this case a method is asked for its sig. Resolve the method token and get the sig
-            pResolver->ResolveToken(sigMethTok, &classHandle, &pMD, &pFD);
-            if (pMD == NULL)
+            ResolvedToken resolved{};
+            pResolver->ResolveToken(sigMethTok, &resolved);
+            if (resolved.Method == NULL)
                 COMPlusThrow(kInvalidProgramException);
 
             PCCOR_SIGNATURE pSig = NULL;
             DWORD           cbSig;
-            pMD->GetSig(&pSig, &cbSig);
+            resolved.Method->GetSig(&pSig, &cbSig);
             sig = SigPointer(pSig, cbSig);
 
-            context = MAKE_METHODCONTEXT(pMD);
-            scopeHnd = GetScopeHandle(pMD->GetModule());
+            context = MAKE_METHODCONTEXT(resolved.Method);
+            scopeHnd = GetScopeHandle(resolved.Method->GetModule());
         }
 
         sig.GetSignature(&pSig, &cbSig);
@@ -2625,6 +2623,37 @@ bool CEEInfo::getSystemVAmd64PassStructInRegisterDescriptor(
 #endif // !defined(UNIX_AMD64_ABI_ITF)
 }
 
+void CEEInfo::getSwiftLowering(CORINFO_CLASS_HANDLE structHnd, CORINFO_SWIFT_LOWERING* pLowering)
+{
+    CONTRACTL{
+        THROWS;
+        GC_TRIGGERS;
+        MODE_PREEMPTIVE;
+    } CONTRACTL_END;
+
+    JIT_TO_EE_TRANSITION();
+
+    TypeHandle th(structHnd);
+
+    bool useNativeLayout = false;
+    MethodTable* methodTablePtr = nullptr;
+    if (!th.IsTypeDesc())
+    {
+        methodTablePtr = th.AsMethodTable();
+    }
+    else
+    {
+        _ASSERTE(th.IsNativeValueType());
+
+        useNativeLayout = true;
+        methodTablePtr = th.AsNativeValueType();
+    }
+
+    methodTablePtr->GetNativeSwiftPhysicalLowering(pLowering, useNativeLayout);
+
+    EE_TO_JIT_TRANSITION();
+}
+
 /*********************************************************************/
 unsigned CEEInfo::getClassNumInstanceFields (CORINFO_CLASS_HANDLE clsHnd)
 {
@@ -2790,6 +2819,7 @@ void CEEInfo::MethodCompileComplete(CORINFO_METHOD_HANDLE methHnd)
 void CEEInfo::embedGenericHandle(
             CORINFO_RESOLVED_TOKEN * pResolvedToken,
             bool                     fEmbedParent,
+            CORINFO_METHOD_HANDLE    callerHandle,
             CORINFO_GENERICHANDLE_RESULT *pResult)
 {
     CONTRACTL {
@@ -2892,6 +2922,7 @@ void CEEInfo::embedGenericHandle(
                                                   pResolvedToken,
                                                   NULL,
                                                   pTemplateMD,
+                                                  GetMethod(callerHandle),
                                                   &pResult->lookup);
     }
     else
@@ -2989,7 +3020,7 @@ MethodDesc * CEEInfo::GetMethodForSecurity(CORINFO_METHOD_HANDLE callerHandle)
         return m_pMethodForSecurity_Value;
     }
 
-    MethodDesc * pCallerMethod = (MethodDesc *)callerHandle;
+    MethodDesc * pCallerMethod = GetMethod(callerHandle);
 
     //If the caller is generic, load the open type and then load the field again,  This allows us to
     //differentiate between BadGeneric<T> containing a memberRef for a field of type InaccessibleClass and
@@ -3062,6 +3093,7 @@ void CEEInfo::ComputeRuntimeLookupForSharedGenericToken(DictionaryEntryKind entr
                                                         CORINFO_RESOLVED_TOKEN * pResolvedToken,
                                                         CORINFO_RESOLVED_TOKEN * pConstrainedResolvedToken,
                                                         MethodDesc * pTemplateMD /* for method-based slots */,
+                                                        MethodDesc * pCallerMD,
                                                         CORINFO_LOOKUP *pResultLookup)
 {
     CONTRACTL{
@@ -3069,6 +3101,8 @@ void CEEInfo::ComputeRuntimeLookupForSharedGenericToken(DictionaryEntryKind entr
         PRECONDITION(CheckPointer(pResultLookup));
     } CONTRACTL_END;
 
+    _ASSERT(pCallerMD != nullptr);
+
     pResultLookup->lookupKind.needsRuntimeLookup = true;
     pResultLookup->lookupKind.runtimeLookupFlags = 0;
 
@@ -3084,16 +3118,8 @@ void CEEInfo::ComputeRuntimeLookupForSharedGenericToken(DictionaryEntryKind entr
     // Unless we decide otherwise, just do the lookup via a helper function
     pResult->indirections = CORINFO_USEHELPER;
 
-    // Runtime lookups in inlined contexts are not supported by the runtime for now
-    if (pResolvedToken->tokenContext != METHOD_BEING_COMPILED_CONTEXT())
-    {
-        pResultLookup->lookupKind.runtimeLookupKind = CORINFO_LOOKUP_NOT_SUPPORTED;
-        return;
-    }
-
-    MethodDesc* pContextMD = GetMethodFromContext(pResolvedToken->tokenContext);
+    MethodDesc* pContextMD = pCallerMD;
     MethodTable* pContextMT = pContextMD->GetMethodTable();
-    bool isStaticVirtual = (pConstrainedResolvedToken != nullptr && pContextMD != nullptr && pContextMD->IsStatic());
 
     // There is a pathological case where invalid IL refereces __Canon type directly, but there is no dictionary availabled to store the lookup.
     if (!pContextMD->IsSharedByGenericInstantiations())
@@ -3248,7 +3274,7 @@ void CEEInfo::ComputeRuntimeLookupForSharedGenericToken(DictionaryEntryKind entr
         sigBuilder.AppendData(pContextMT->GetNumDicts() - 1);
     }
 
-    Module * pModule = (Module *)pResolvedToken->tokenScope;
+    Module * pModule = GetModule(pResolvedToken->tokenScope);
 
     switch (entryKind)
     {
@@ -4929,7 +4955,6 @@ CorInfoIsAccessAllowedResult CEEInfo::canAccessClass(
 
     BOOL doAccessCheck = TRUE;
     AccessCheckOptions::AccessCheckType accessCheckType = AccessCheckOptions::kNormalAccessibilityChecks;
-    DynamicResolver * pAccessContext = NULL;
 
     //All access checks must be done on the open instantiation.
     MethodDesc * pCallerForSecurity = GetMethodForSecurity(callerHandle);
@@ -4942,7 +4967,7 @@ CorInfoIsAccessAllowedResult CEEInfo::canAccessClass(
         SigTypeContext::InitTypeContext(pCallerForSecurity, &typeContext);
 
         SigPointer sigptr(pResolvedToken->pTypeSpec, pResolvedToken->cbTypeSpec);
-        pCalleeForSecurity = sigptr.GetTypeHandleThrowing((Module *)pResolvedToken->tokenScope, &typeContext);
+        pCalleeForSecurity = sigptr.GetTypeHandleThrowing(GetModule(pResolvedToken->tokenScope), &typeContext);
     }
 
     while (pCalleeForSecurity.HasTypeParam())
@@ -4953,8 +4978,7 @@ CorInfoIsAccessAllowedResult CEEInfo::canAccessClass(
     if (IsDynamicScope(pResolvedToken->tokenScope))
     {
         doAccessCheck = ModifyCheckForDynamicMethod(GetDynamicResolver(pResolvedToken->tokenScope),
-                                                    &callerTypeForSecurity, &accessCheckType,
-                                                    &pAccessContext);
+                                                    &callerTypeForSecurity, &accessCheckType);
     }
 
     //Since this is a check against a TypeHandle, there are some things we can stick in a TypeHandle that
@@ -4969,7 +4993,7 @@ CorInfoIsAccessAllowedResult CEEInfo::canAccessClass(
     if (doAccessCheck)
     {
         AccessCheckOptions accessCheckOptions(accessCheckType,
-                                              pAccessContext,
+                                              NULL,
                                               FALSE /*throw on error*/,
                                               pCalleeForSecurity.GetMethodTable());
 
@@ -5256,19 +5280,6 @@ void CEEInfo::getCallInfo(
         {
             pResult->exactContextNeedsRuntimeLookup = TRUE;
         }
-
-        // Use main method as the context as long as the methods are called on the same type
-        if (pResult->exactContextNeedsRuntimeLookup &&
-            pResolvedToken->tokenContext == METHOD_BEING_COMPILED_CONTEXT() &&
-            constrainedType.IsNull() &&
-            exactType == m_pMethodBeingCompiled->GetMethodTable() &&
-            ((pResolvedToken->cbTypeSpec  == 0) || IsTypeSpecForTypicalInstantiation(SigPointer(pResolvedToken->pTypeSpec, pResolvedToken->cbTypeSpec))))
-        {
-            // The typespec signature should be only missing for dynamic methods
-            _ASSERTE((pResolvedToken->cbTypeSpec != 0) || m_pMethodBeingCompiled->IsDynamicMethod());
-
-            pResult->contextHandle = METHOD_BEING_COMPILED_CONTEXT();
-        }
     }
 
     //
@@ -5420,6 +5431,7 @@ void CEEInfo::getCallInfo(
                                                         pResolvedToken,
                                                         pConstrainedResolvedToken,
                                                         pMD,
+                                                        GetMethod(callerHandle),
                                                         &pResult->codePointerLookup);
         }
         else
@@ -5471,6 +5483,7 @@ void CEEInfo::getCallInfo(
                                                         pResolvedToken,
                                                         pConstrainedResolvedToken,
                                                         pMD,
+                                                        GetMethod(callerHandle),
                                                         &pResult->stubLookup);
         }
         else
@@ -5512,7 +5525,7 @@ void CEEInfo::getCallInfo(
     pResult->hMethod = CORINFO_METHOD_HANDLE(pTargetMD);
 
     pResult->accessAllowed = CORINFO_ACCESS_ALLOWED;
-    MethodDesc* callerMethod = (MethodDesc*)callerHandle;
+    MethodDesc* callerMethod = GetMethod(callerHandle);
     if ((flags & CORINFO_CALLINFO_SECURITYCHECKS)
         && RequiresAccessCheck(pResolvedToken->tokenScope))
     {
@@ -5541,7 +5554,7 @@ void CEEInfo::getCallInfo(
             if (pResolvedToken->pTypeSpec != NULL)
             {
                 SigPointer sigptr(pResolvedToken->pTypeSpec, pResolvedToken->cbTypeSpec);
-                calleeTypeForSecurity = sigptr.GetTypeHandleThrowing((Module *)pResolvedToken->tokenScope, &typeContext);
+                calleeTypeForSecurity = sigptr.GetTypeHandleThrowing(GetModule(pResolvedToken->tokenScope), &typeContext);
 
                 // typeHnd can be a variable type
                 if (calleeTypeForSecurity.GetMethodTable() == NULL)
@@ -5568,7 +5581,7 @@ void CEEInfo::getCallInfo(
                 IfFailThrow(sp.GetByte(&etype));
 
                 // Load the generic method instantiation
-                THROW_BAD_FORMAT_MAYBE(etype == (BYTE)IMAGE_CEE_CS_CALLCONV_GENERICINST, 0, (Module *)pResolvedToken->tokenScope);
+                THROW_BAD_FORMAT_MAYBE(etype == (BYTE)IMAGE_CEE_CS_CALLCONV_GENERICINST, 0, GetModule(pResolvedToken->tokenScope));
 
                 IfFailThrow(sp.GetData(&nGenericMethodArgs));
 
@@ -5582,7 +5595,7 @@ void CEEInfo::getCallInfo(
 
                 for (uint32_t i = 0; i < nGenericMethodArgs; i++)
                 {
-                    genericMethodArgs[i] = sp.GetTypeHandleThrowing((Module *)pResolvedToken->tokenScope, &typeContext);
+                    genericMethodArgs[i] = sp.GetTypeHandleThrowing(GetModule(pResolvedToken->tokenScope), &typeContext);
                     _ASSERTE (!genericMethodArgs[i].IsNull());
                     IfFailThrow(sp.SkipExactlyOne());
                 }
@@ -5602,14 +5615,13 @@ void CEEInfo::getCallInfo(
         BOOL doAccessCheck = TRUE;
         BOOL canAccessMethod = TRUE;
         AccessCheckOptions::AccessCheckType accessCheckType = AccessCheckOptions::kNormalAccessibilityChecks;
-        DynamicResolver * pAccessContext = NULL;
 
         callerTypeForSecurity = TypeHandle(pCallerForSecurity->GetMethodTable());
         if (pCallerForSecurity->IsDynamicMethod())
         {
             doAccessCheck = ModifyCheckForDynamicMethod(pCallerForSecurity->AsDynamicMethodDesc()->GetResolver(),
                                                         &callerTypeForSecurity,
-                                                        &accessCheckType, &pAccessContext);
+                                                        &accessCheckType);
         }
 
         pResult->accessAllowed = CORINFO_ACCESS_ALLOWED;
@@ -5617,7 +5629,7 @@ void CEEInfo::getCallInfo(
         if (doAccessCheck)
         {
             AccessCheckOptions accessCheckOptions(accessCheckType,
-                                                  pAccessContext,
+                                                  NULL,
                                                   FALSE,
                                                   pCalleeForSecurity);
 
@@ -6286,6 +6298,7 @@ bool CEEInfo::getReadyToRunHelper(
         CORINFO_RESOLVED_TOKEN *        pResolvedToken,
         CORINFO_LOOKUP_KIND *           pGenericLookupKind,
         CorInfoHelpFunc                 id,
+        CORINFO_METHOD_HANDLE           callerHandle,
         CORINFO_CONST_LOOKUP *          pLookup
         )
 {
@@ -6298,7 +6311,8 @@ void CEEInfo::getReadyToRunDelegateCtorHelper(
         CORINFO_RESOLVED_TOKEN * pTargetMethod,
         mdToken                  targetConstraint,
         CORINFO_CLASS_HANDLE     delegateType,
-        CORINFO_LOOKUP *   pLookup
+        CORINFO_METHOD_HANDLE    callerHandle,
+        CORINFO_LOOKUP *         pLookup
         )
 {
     LIMITED_METHOD_CONTRACT;
@@ -6377,7 +6391,11 @@ CORINFO_VARARGS_HANDLE CEEInfo::getVarArgsHandle(CORINFO_SIG_INFO *sig,
 
     Module* module = GetModule(sig->scope);
 
-    result = CORINFO_VARARGS_HANDLE(module->GetVASigCookie(Signature(sig->pSig, sig->cbSig)));
+    Instantiation classInst = Instantiation((TypeHandle*) sig->sigInst.classInst, sig->sigInst.classInstCount);
+    Instantiation methodInst = Instantiation((TypeHandle*) sig->sigInst.methInst, sig->sigInst.methInstCount);
+    SigTypeContext typeContext = SigTypeContext(classInst, methodInst);
+
+    result = CORINFO_VARARGS_HANDLE(module->GetVASigCookie(Signature(sig->pSig, sig->cbSig), &typeContext));
 
     EE_TO_JIT_TRANSITION();
 
@@ -7578,7 +7596,7 @@ bool getILIntrinsicImplementationForActivator(MethodDesc* ftn,
 
     // Replace the body with implementation that just returns "default"
     MethodDesc* createDefaultInstance = CoreLibBinder::GetMethod(METHOD__ACTIVATOR__CREATE_DEFAULT_INSTANCE_OF_T);
-    COR_ILMETHOD_DECODER header(createDefaultInstance->GetILHeader(FALSE), createDefaultInstance->GetMDImport(), NULL);
+    COR_ILMETHOD_DECODER header(createDefaultInstance->GetILHeader(), createDefaultInstance->GetMDImport(), NULL);
     getMethodInfoILMethodHeaderHelper(&header, methInfo);
     *pSig = SigPointer(header.LocalVarSig, header.cbLocalVarSig);
 
@@ -7861,7 +7879,7 @@ CEEInfo::getMethodInfo(
     }
     else if (!ftn->IsWrapperStub() && ftn->HasILHeader())
     {
-        COR_ILMETHOD_DECODER header(ftn->GetILHeader(TRUE), ftn->GetMDImport(), NULL);
+        COR_ILMETHOD_DECODER header(ftn->GetILHeader(), ftn->GetMDImport(), NULL);
         cxt.Header = &header;
         getMethodInfoHelper(cxt, methInfo, context);
         result = true;
@@ -8241,17 +8259,32 @@ void CEEInfo::reportInliningDecision (CORINFO_METHOD_HANDLE inlinerHnd,
 
         if (CORProfilerEnableRejit())
         {
-            // If ReJIT is enabled, there is a chance that a race happened where the profiler
-            // requested a ReJIT on a method, but before the ReJIT occurred an inlining happened.
-            // If we end up reporting an inlining on a method with non-default IL it means the race
-            // happened and we need to manually request ReJIT for it since it was missed.
-            CodeVersionManager* pCodeVersionManager = pCallee->GetCodeVersionManager();
-            CodeVersionManager::LockHolder codeVersioningLockHolder;
-            ILCodeVersion ilVersion = pCodeVersionManager->GetActiveILCodeVersion(pCallee);
-            if (ilVersion.GetRejitState() != ILCodeVersion::kStateActive || !ilVersion.HasDefaultIL())
+            ModuleID modId = 0;
+            mdMethodDef methodDef = mdMethodDefNil;
+            BOOL shouldCallReJIT = FALSE;
+
+            {
+                // If ReJIT is enabled, there is a chance that a race happened where the profiler
+                // requested a ReJIT on a method, but before the ReJIT occurred an inlining happened.
+                // If we end up reporting an inlining on a method with non-default IL it means the race
+                // happened and we need to manually request ReJIT for it since it was missed.
+                CodeVersionManager* pCodeVersionManager = pCallee->GetCodeVersionManager();
+                CodeVersionManager::LockHolder codeVersioningLockHolder;
+                ILCodeVersion ilVersion = pCodeVersionManager->GetActiveILCodeVersion(pCallee);
+                if (ilVersion.GetRejitState() != ILCodeVersion::kStateActive || !ilVersion.HasDefaultIL())
+                {
+                    shouldCallReJIT = TRUE;
+                    modId = reinterpret_cast<ModuleID>(pCaller->GetModule());
+                    methodDef = pCaller->GetMemberDef();
+                    // Do Not call RequestReJIT inside this scope, calling RequestReJIT while holding the CodeVersionManager lock
+                    // will cause deadlocks with other threads calling RequestReJIT since it tries to obtain the CodeVersionManager lock
+                }
+            }
+
+            if (shouldCallReJIT)
             {
-                ModuleID modId = reinterpret_cast<ModuleID>(pCaller->GetModule());
-                mdMethodDef methodDef = pCaller->GetMemberDef();
+                _ASSERTE(modId != 0);
+                _ASSERTE(methodDef != mdMethodDefNil);
                 ReJitManager::RequestReJIT(1, &modId, &methodDef, static_cast<COR_PRF_REJIT_FLAGS>(0));
             }
         }
@@ -8531,7 +8564,7 @@ void CEEInfo::getEHinfo(
     }
     else
     {
-        COR_ILMETHOD_DECODER header(ftn->GetILHeader(TRUE), ftn->GetMDImport(), NULL);
+        COR_ILMETHOD_DECODER header(ftn->GetILHeader(), ftn->GetMDImport(), NULL);
         getEHinfoHelper(ftnHnd, EHnumber, clause, &header);
     }
 
@@ -8912,6 +8945,7 @@ CORINFO_METHOD_HANDLE CEEInfo::getUnboxedEntry(
 /*********************************************************************/
 void CEEInfo::expandRawHandleIntrinsic(
     CORINFO_RESOLVED_TOKEN *        pResolvedToken,
+    CORINFO_METHOD_HANDLE           callerHandle,
     CORINFO_GENERICHANDLE_RESULT *  pResult)
 {
     LIMITED_METHOD_CONTRACT;
@@ -9622,8 +9656,6 @@ int CEEInfo::getExactClasses (
         MODE_ANY;
     } CONTRACTL_END;
 
-    int exactClassesCount = 0;
-
     JIT_TO_EE_TRANSITION();
 
     // This function is currently implemented only on NativeAOT
@@ -9631,7 +9663,7 @@ int CEEInfo::getExactClasses (
 
     EE_TO_JIT_TRANSITION();
 
-    return exactClassesCount;
+    return -1;
 }
 
 /*********************************************************************/
@@ -9853,10 +9885,13 @@ bool CEEInfo::pInvokeMarshalingRequired(CORINFO_METHOD_HANDLE method, CORINFO_SI
     if (method == NULL)
     {
         // check the call site signature
+        SigTypeContext typeContext;
+        GetTypeContext(&callSiteSig->sigInst, &typeContext);
         result = NDirect::MarshalingRequired(
                     NULL,
                     callSiteSig->pSig,
-                    GetModule(callSiteSig->scope));
+                    GetModule(callSiteSig->scope),
+                    &typeContext);
     }
     else
     {
@@ -10642,7 +10677,8 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc    ftnNum,         /* IN  */
         {
             _ASSERTE(ppIndirection != NULL);
             *ppIndirection = &hlpDynamicFuncTable[dynamicFtnNum].pfnHelper;
-            return NULL;
+            result = NULL;
+            goto exit;
         }
 #endif
 
@@ -10651,7 +10687,8 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc    ftnNum,         /* IN  */
         LPVOID finalTierAddr = hlpFinalTierAddrTable[dynamicFtnNum];
         if (finalTierAddr != NULL)
         {
-            return finalTierAddr;
+            result = finalTierAddr;
+            goto exit;
         }
 
         if (dynamicFtnNum == DYNAMIC_CORINFO_HELP_ISINSTANCEOFINTERFACE ||
@@ -10665,7 +10702,10 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc    ftnNum,         /* IN  */
             dynamicFtnNum == DYNAMIC_CORINFO_HELP_CHKCASTCLASS_SPECIAL ||
             dynamicFtnNum == DYNAMIC_CORINFO_HELP_UNBOX ||
             dynamicFtnNum == DYNAMIC_CORINFO_HELP_ARRADDR_ST ||
-            dynamicFtnNum == DYNAMIC_CORINFO_HELP_LDELEMA_REF)
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_LDELEMA_REF ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMSET ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMZERO ||
+            dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMCPY)
         {
             Precode* pPrecode = Precode::GetPrecodeFromEntryPoint((PCODE)hlpDynamicFuncTable[dynamicFtnNum].pfnHelper);
             _ASSERTE(pPrecode->GetType() == PRECODE_FIXUP);
@@ -10674,7 +10714,11 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc    ftnNum,         /* IN  */
             // so we no longer need to use indirections and can emit a direct call instead.
             //
             // Avoid taking the lock for foreground jit compilations
-            if (!GetAppDomain()->GetTieredCompilationManager()->IsTieringDelayActive())
+            //
+            // JitEnableOptionalRelocs being false means we should avoid non-deterministic
+            // optimizations that can randomly change codegen.
+            if (!GetAppDomain()->GetTieredCompilationManager()->IsTieringDelayActive() &&
+                g_pConfig->JitEnableOptionalRelocs())
             {
                 MethodDesc* helperMD = pPrecode->GetMethodDesc();
                 _ASSERT(helperMD != nullptr);
@@ -10695,13 +10739,15 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc    ftnNum,         /* IN  */
                     {
                         // Cache it for future uses to avoid taking the lock again.
                         hlpFinalTierAddrTable[dynamicFtnNum] = finalTierAddr;
-                        return finalTierAddr;
+                        result = finalTierAddr;
+                        goto exit;
                     }
                 }
             }
 
             *ppIndirection = ((FixupPrecode*)pPrecode)->GetTargetSlot();
-            return NULL;
+            result = NULL;
+            goto exit;
         }
 
         pfnHelper = hlpDynamicFuncTable[dynamicFtnNum].pfnHelper;
@@ -10715,8 +10761,8 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc    ftnNum,         /* IN  */
 
     result = (LPVOID)GetEEFuncEntryPoint(pfnHelper);
 
+exit: ;
     EE_TO_JIT_TRANSITION_LEAF();
-
     return result;
 }
 
@@ -10809,14 +10855,12 @@ void CEEJitInfo::WriteCodeBytes()
 {
     LIMITED_METHOD_CONTRACT;
 
-#ifdef USE_INDIRECT_CODEHEADER
     if (m_pRealCodeHeader != NULL)
     {
         // Restore the read only version of the real code header
         m_CodeHeaderRW->SetRealCodeHeader(m_pRealCodeHeader);
         m_pRealCodeHeader = NULL;
     }
-#endif // USE_INDIRECT_CODEHEADER
 
     if (m_CodeHeaderRW != m_CodeHeader)
     {
@@ -10939,6 +10983,22 @@ void CEEJitInfo::reportRichMappings(
     EE_TO_JIT_TRANSITION();
 }
 
+void CEEJitInfo::reportMetadata(
+        const char* key,
+        const void* value,
+        size_t length)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_PREEMPTIVE;
+    } CONTRACTL_END;
+
+    JIT_TO_EE_TRANSITION_LEAF();
+
+    EE_TO_JIT_TRANSITION_LEAF();
+}
+
 void CEEJitInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo)
 {
     CONTRACTL {
@@ -11392,7 +11452,7 @@ void CEEJitInfo::recordRelocation(void * location,
 
                     // Keep track of conservative estimate of how much memory may be needed by jump stubs. We will use it to reserve extra memory
                     // on retry to increase chances that the retry succeeds.
-                    m_reserveForJumpStubs = max(0x400, m_reserveForJumpStubs + 0x10);
+                    m_reserveForJumpStubs = max((size_t)0x400, m_reserveForJumpStubs + 0x10);
                 }
             }
 
@@ -11451,7 +11511,7 @@ void CEEJitInfo::recordRelocation(void * location,
 
                 // Keep track of conservative estimate of how much memory may be needed by jump stubs. We will use it to reserve extra memory
                 // on retry to increase chances that the retry succeeds.
-                m_reserveForJumpStubs = max(0x400, m_reserveForJumpStubs + 2*BACK_TO_BACK_JUMP_ALLOCATE_SIZE);
+                m_reserveForJumpStubs = max((size_t)0x400, m_reserveForJumpStubs + 2*BACK_TO_BACK_JUMP_ALLOCATE_SIZE);
 
                 if (jumpStubAddr == 0)
                 {
@@ -11719,13 +11779,13 @@ bool CEEInfo::getStaticFieldContent(CORINFO_FIELD_HANDLE fieldHnd, uint8_t* buff
     {
         if (field->IsObjRef())
         {
-            GCX_COOP();
-
-            _ASSERT(!field->IsRVA());
-            _ASSERT(valueOffset == 0); // there is no point in returning a chunk of a gc handle
-            _ASSERT((UINT)bufferSize == field->GetSize());
+            // there is no point in returning a chunk of a gc handle
+            if ((valueOffset == 0) && (sizeof(CORINFO_OBJECT_HANDLE) <= (UINT)bufferSize) && !field->IsRVA())
+            {
+                GCX_COOP();
 
-            result = getStaticObjRefContent(field->GetStaticOBJECTREF(), buffer, ignoreMovableObjects);
+                result = getStaticObjRefContent(field->GetStaticOBJECTREF(), buffer, ignoreMovableObjects);
+            }
         }
         else
         {
@@ -12185,9 +12245,7 @@ void CEEJitInfo::allocMem (AllocMemArgs *pArgs)
     }
 
     m_jitManager->allocCode(m_pMethodBeingCompiled, totalSize.Value(), GetReserveForJumpStubs(), pArgs->flag, &m_CodeHeader, &m_CodeHeaderRW, &m_codeWriteBufferSize, &m_pCodeHeap
-#ifdef USE_INDIRECT_CODEHEADER
                           , &m_pRealCodeHeader
-#endif
 #ifdef FEATURE_EH_FUNCLETS
                           , m_totalUnwindInfos
 #endif
@@ -12332,10 +12390,11 @@ void CEEJitInfo::setEHinfo (
         ((pEHClause->Flags & COR_ILEXCEPTION_CLAUSE_FILTER) == 0) &&
         (clause->ClassToken != NULL))
     {
-        MethodDesc * pMD; FieldDesc * pFD;
-        m_pMethodBeingCompiled->AsDynamicMethodDesc()->GetResolver()->ResolveToken(clause->ClassToken, (TypeHandle *)&pEHClause->TypeHandle, &pMD, &pFD);
+        ResolvedToken resolved{};
+        m_pMethodBeingCompiled->AsDynamicMethodDesc()->GetResolver()->ResolveToken(clause->ClassToken, &resolved);
+        pEHClause->TypeHandle = (void*)resolved.TypeHandle.AsPtr();
         SetHasCachedTypeHandle(pEHClause);
-        LOG((LF_EH, LL_INFO1000000, "  CachedTypeHandle: 0x%08lx  ->  0x%08lx\n",        clause->ClassToken,    pEHClause->TypeHandle));
+        LOG((LF_EH, LL_INFO1000000, "  CachedTypeHandle: 0x%08x  ->  %p\n",        clause->ClassToken,    pEHClause->TypeHandle));
     }
 
     EE_TO_JIT_TRANSITION();
@@ -12939,18 +12998,17 @@ PCODE UnsafeJitFunction(PrepareCodeConfig* config,
         //and its return type.
         AccessCheckOptions::AccessCheckType accessCheckType = AccessCheckOptions::kNormalAccessibilityChecks;
         TypeHandle ownerTypeForSecurity = TypeHandle(pMethodForSecurity->GetMethodTable());
-        DynamicResolver *pAccessContext = NULL;
         BOOL doAccessCheck = TRUE;
         if (pMethodForSecurity->IsDynamicMethod())
         {
             doAccessCheck = ModifyCheckForDynamicMethod(pMethodForSecurity->AsDynamicMethodDesc()->GetResolver(),
                                                         &ownerTypeForSecurity,
-                                                        &accessCheckType, &pAccessContext);
+                                                        &accessCheckType);
         }
         if (doAccessCheck)
         {
             AccessCheckOptions accessCheckOptions(accessCheckType,
-                                                  pAccessContext,
+                                                  NULL,
                                                   TRUE /*Throw on error*/,
                                                   pMethodForSecurity);
 
@@ -13494,7 +13552,8 @@ BOOL LoadDynamicInfoEntry(Module *currentModule,
         }
         {
         VarArgs:
-            result = (size_t) CORINFO_VARARGS_HANDLE(currentModule->GetVASigCookie(Signature(pSig, cSig)));
+            SigTypeContext typeContext = SigTypeContext();
+            result = (size_t) CORINFO_VARARGS_HANDLE(currentModule->GetVASigCookie(Signature(pSig, cSig), &typeContext));
         }
         break;
 
@@ -14404,6 +14463,12 @@ void CEEInfo::reportRichMappings(
     UNREACHABLE();      // only called on derived class.
 }
 
+void CEEInfo::reportMetadata(const char* key, const void* value, size_t length)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREACHABLE();      // only called on derived class.
+}
+
 void CEEInfo::setPatchpointInfo(PatchpointInfo* patchpointInfo)
 {
     LIMITED_METHOD_CONTRACT;
@@ -14503,7 +14568,6 @@ TADDR EECodeInfo::GetSavedMethodCode()
         // be used during GC.
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     } CONTRACTL_END;
 #ifndef HOST_64BIT
@@ -14531,7 +14595,6 @@ TADDR EECodeInfo::GetStartAddress()
     CONTRACTL {
         NOTHROW;
         GC_NOTRIGGER;
-        HOST_NOCALLS;
         SUPPORTS_DAC;
     } CONTRACTL_END;
 
diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h
index 1708a05df5e3..7429352a47de 100644
--- a/src/coreclr/vm/jitinterface.h
+++ b/src/coreclr/vm/jitinterface.h
@@ -325,17 +325,6 @@ EXTERN_C FCDECL2(Object*, JIT_NewArr1OBJ_MP_InlineGetThread, CORINFO_CLASS_HANDL
 
 EXTERN_C FCDECL2_VV(INT64, JIT_LMul, INT64 val1, INT64 val2);
 
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2Lng, double val);
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2IntSSE2, double val);
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2LngP4x87, double val);
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2LngSSE3, double val);
-EXTERN_C FCDECL1_V(INT64, JIT_Dbl2LngOvf, double val);
-
-EXTERN_C FCDECL1_V(INT32, JIT_Dbl2IntOvf, double val);
-
-EXTERN_C FCDECL2_VV(float, JIT_FltRem, float dividend, float divisor);
-EXTERN_C FCDECL2_VV(double, JIT_DblRem, double dividend, double divisor);
-
 #ifndef HOST_64BIT
 #ifdef TARGET_X86
 // JIThelp.asm
@@ -400,9 +389,6 @@ extern "C"
 
 #endif // TARGET_AMD64 || TARGET_ARM
 
-    void STDCALL JIT_MemSet(void *dest, int c, SIZE_T count);
-    void STDCALL JIT_MemCpy(void *dest, const void *src, SIZE_T count);
-
     void STDMETHODCALLTYPE JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle);
 #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
     void STDCALL JIT_StackProbe();
@@ -561,6 +547,7 @@ class CEEInfo : public ICorJitInfo
                                                    CORINFO_RESOLVED_TOKEN * pResolvedToken,
                                                    CORINFO_RESOLVED_TOKEN * pConstrainedResolvedToken /* for ConstrainedMethodEntrySlot */,
                                                    MethodDesc * pTemplateMD /* for method-based slots */,
+                                                   MethodDesc * pCallerMD,
                                                    CORINFO_LOOKUP *pResultLookup);
 
 #if defined(FEATURE_GDBJIT)
@@ -690,9 +677,7 @@ class CEEJitInfo : public CEEInfo
         m_CodeHeaderRW = NULL;
 
         m_codeWriteBufferSize = 0;
-#ifdef USE_INDIRECT_CODEHEADER
         m_pRealCodeHeader = NULL;
-#endif
         m_pCodeHeap = NULL;
 
         if (m_pOffsetMapping != NULL)
@@ -803,9 +788,7 @@ class CEEJitInfo : public CEEInfo
           m_CodeHeader(NULL),
           m_CodeHeaderRW(NULL),
           m_codeWriteBufferSize(0),
-#ifdef USE_INDIRECT_CODEHEADER
           m_pRealCodeHeader(NULL),
-#endif
           m_pCodeHeap(NULL),
           m_ILHeader(header),
 #ifdef FEATURE_EH_FUNCLETS
@@ -897,6 +880,8 @@ class CEEJitInfo : public CEEInfo
         ICorDebugInfo::RichOffsetMapping* mappings,
         uint32_t                          numMappings) override final;
 
+    void reportMetadata(const char* key, const void* value, size_t length) override final;
+
     void* getHelperFtn(CorInfoHelpFunc    ftnNum,                         /* IN  */
                        void **            ppIndirection) override final;  /* OUT */
     static PCODE getHelperFtnStatic(CorInfoHelpFunc ftnNum);
@@ -951,9 +936,7 @@ protected :
     CodeHeader*             m_CodeHeader;   // descriptor for JITTED code - read/execute address
     CodeHeader*             m_CodeHeaderRW; // descriptor for JITTED code - code write scratch buffer address
     size_t                  m_codeWriteBufferSize;
-#ifdef USE_INDIRECT_CODEHEADER
     BYTE*                   m_pRealCodeHeader;
-#endif
     HeapList*               m_pCodeHeap;
     COR_ILMETHOD_DECODER *  m_ILHeader;     // the code header as exist in the file
 #ifdef FEATURE_EH_FUNCLETS
diff --git a/src/coreclr/vm/loongarch64/asmconstants.h b/src/coreclr/vm/loongarch64/asmconstants.h
index e12d0040a74d..6828eec7505e 100644
--- a/src/coreclr/vm/loongarch64/asmconstants.h
+++ b/src/coreclr/vm/loongarch64/asmconstants.h
@@ -34,8 +34,8 @@
 #define DynamicHelperFrameFlags_ObjectArg   1
 #define DynamicHelperFrameFlags_ObjectArg2  2
 
-#define               Thread__m_fPreemptiveGCDisabled   0x0C
-#define               Thread__m_pFrame                  0x10
+#define               Thread__m_fPreemptiveGCDisabled   0x04
+#define               Thread__m_pFrame                  0x08
 
 ASMCONSTANTS_C_ASSERT(Thread__m_fPreemptiveGCDisabled == offsetof(Thread, m_fPreemptiveGCDisabled));
 ASMCONSTANTS_C_ASSERT(Thread__m_pFrame == offsetof(Thread, m_pFrame));
diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S
index 56fb0c0b9247..3f1f7b827e6d 100644
--- a/src/coreclr/vm/loongarch64/asmhelpers.S
+++ b/src/coreclr/vm/loongarch64/asmhelpers.S
@@ -822,13 +822,13 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler
     PROLOG_SAVE_REG_PAIR   25, 26, 32
     PROLOG_SAVE_REG_PAIR   27, 28, 48
     PROLOG_SAVE_REG_PAIR   29, 30, 64
-    PROLOG_SAVE_REG 31, 80
+    PROLOG_SAVE_REG_PAIR   31, 2, 80
 
     // save any integral return value(s)
     st.d  $a0, $sp, 96
     st.d  $a1, $sp, 104
 
-    // save any FP/HFA return value(s)
+    // save any FP return value(s)
     fst.d  $f0, $sp, 112
     fst.d  $f1, $sp, 120
 
@@ -841,15 +841,15 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler
     ld.d  $a0, $sp, 96
     ld.d  $a1, $sp, 104
 
-    // restore any FP/HFA return value(s)
-    fst.d  $f0, $sp, 112
-    fst.d  $f1, $sp, 120
+    // restore any FP return value(s)
+    fld.d  $f0, $sp, 112
+    fld.d  $f1, $sp, 120
 
     EPILOG_RESTORE_REG_PAIR   23, 24, 16
     EPILOG_RESTORE_REG_PAIR   25, 26, 32
     EPILOG_RESTORE_REG_PAIR   27, 28, 48
     EPILOG_RESTORE_REG_PAIR   29, 30, 64
-    EPILOG_RESTORE_REG  31, 80
+    EPILOG_RESTORE_REG_PAIR   31, 2,  80
     //                              $fp,$ra
     EPILOG_RESTORE_REG_PAIR_INDEXED  22, 1, 0x80
     EPILOG_RETURN
diff --git a/src/coreclr/vm/loongarch64/cgencpu.h b/src/coreclr/vm/loongarch64/cgencpu.h
index 6979fa11a967..129d3a35589d 100644
--- a/src/coreclr/vm/loongarch64/cgencpu.h
+++ b/src/coreclr/vm/loongarch64/cgencpu.h
@@ -15,6 +15,30 @@
 #define USE_REDIRECT_FOR_GCSTRESS
 #endif // TARGET_UNIX
 
+#define ENUM_CALLEE_SAVED_REGISTERS() \
+    CALLEE_SAVED_REGISTER(Fp) \
+    CALLEE_SAVED_REGISTER(Ra) \
+    CALLEE_SAVED_REGISTER(S0) \
+    CALLEE_SAVED_REGISTER(S1) \
+    CALLEE_SAVED_REGISTER(S2) \
+    CALLEE_SAVED_REGISTER(S3) \
+    CALLEE_SAVED_REGISTER(S4) \
+    CALLEE_SAVED_REGISTER(S5) \
+    CALLEE_SAVED_REGISTER(S6) \
+    CALLEE_SAVED_REGISTER(S7) \
+    CALLEE_SAVED_REGISTER(S8) \
+    CALLEE_SAVED_REGISTER(Tp)
+
+#define ENUM_FP_CALLEE_SAVED_REGISTERS() \
+    CALLEE_SAVED_REGISTER(F[24]) \
+    CALLEE_SAVED_REGISTER(F[25]) \
+    CALLEE_SAVED_REGISTER(F[26]) \
+    CALLEE_SAVED_REGISTER(F[27]) \
+    CALLEE_SAVED_REGISTER(F[28]) \
+    CALLEE_SAVED_REGISTER(F[29]) \
+    CALLEE_SAVED_REGISTER(F[30]) \
+    CALLEE_SAVED_REGISTER(F[31])
+
 EXTERN_C void getFPReturn(int fpSize, INT64 *pRetVal);
 EXTERN_C void setFPReturn(int fpSize, INT64 retVal);
 
@@ -36,8 +60,6 @@ extern PCODE GetPreStubEntryPoint();
 
 #define HAS_NDIRECT_IMPORT_PRECODE              1
 
-#define USE_INDIRECT_CODEHEADER
-
 #define HAS_FIXUP_PRECODE                       1
 #define HAS_FIXUP_PRECODE_CHUNKS                1
 
@@ -59,7 +81,7 @@ extern PCODE GetPreStubEntryPoint();
 
 // Given a return address retrieved during stackwalk,
 // this is the offset by which it should be decremented to arrive at the callsite.
-#define STACKWALK_CONTROLPC_ADJUST_OFFSET 8
+#define STACKWALK_CONTROLPC_ADJUST_OFFSET 4
 
 //**********************************************************************
 // Parameter size
@@ -448,11 +470,18 @@ struct DECLSPEC_ALIGN(16) UMEntryThunkCode
 
 struct HijackArgs
 {
+    DWORD64 Fp; // frame pointer
+    union
+    {
+        DWORD64 Ra;
+        size_t ReturnAddress;
+    };
+    DWORD64 S0, S1, S2, S3, S4, S5, S6, S7, S8, Tp;
     union
     {
         struct {
-             DWORD64 V0;
-             DWORD64 V1;
+             DWORD64 A0;
+             DWORD64 A1;
          };
         size_t ReturnValue[2];
     };
@@ -464,13 +493,6 @@ struct HijackArgs
          };
         size_t FPReturnValue[2];
     };
-    DWORD64 S0, S1, S2, S3, S4, S5, S6, S7, S8, Tp;
-    DWORD64 Fp; // frame pointer
-    union
-    {
-        DWORD64 Ra;
-        size_t ReturnAddress;
-    };
 };
 
 EXTERN_C VOID STDCALL PrecodeFixupThunk();
diff --git a/src/coreclr/vm/loongarch64/crthelpers.S b/src/coreclr/vm/loongarch64/crthelpers.S
deleted file mode 100644
index 88fd21938fda..000000000000
--- a/src/coreclr/vm/loongarch64/crthelpers.S
+++ /dev/null
@@ -1,37 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#include "unixasmmacros.inc"
-
-// JIT_MemSet/JIT_MemCpy
-//
-// It is IMPORTANT that the exception handling code is able to find these guys
-// on the stack, but on non-windows platforms we can just defer to the platform
-// implementation.
-//
-LEAF_ENTRY JIT_MemSet, _TEXT
-    beq $a2, $zero, LOCAL_LABEL(JIT_MemSet_ret)
-
-    ld.b  $zero, $a0, 0     //Is this really needed ?
-
-    b  memset
-
-LOCAL_LABEL(JIT_MemSet_ret):
-    jirl  $r0, $ra, 0
-
-////NOTO: Here must use LEAF_END_MARKED!  not LEAF_END !!!
-LEAF_END_MARKED JIT_MemSet, _TEXT
-
-LEAF_ENTRY JIT_MemCpy, _TEXT
-    beq $a2, $zero, LOCAL_LABEL(JIT_MemCpy_ret)
-
-    ld.b  $zero, $a0, 0
-    ld.b  $zero, $a1, 0    //Is this really needed ?
-
-    b  memcpy
-
-LOCAL_LABEL(JIT_MemCpy_ret):
-    jirl  $r0, $ra, 0
-
-////NOTO: Here must use LEAF_END_MARKED!  not LEAF_END !!!
-LEAF_END_MARKED JIT_MemCpy, _TEXT
diff --git a/src/coreclr/vm/loongarch64/gmscpu.h b/src/coreclr/vm/loongarch64/gmscpu.h
index 01420a815716..3576c3301a8b 100644
--- a/src/coreclr/vm/loongarch64/gmscpu.h
+++ b/src/coreclr/vm/loongarch64/gmscpu.h
@@ -39,8 +39,7 @@ struct LazyMachState : public MachState{
     static void unwindLazyState(LazyMachState* baseState,
                                 MachState* lazyState,
                                 DWORD threadId,
-                                int funCallDepth = 1,
-                                HostCallPreference hostCallPreference = AllowHostCalls);
+                                int funCallDepth = 1);
 };
 
 inline void LazyMachState::setLazyStateFromUnwind(MachState* copy)
diff --git a/src/coreclr/vm/loongarch64/stubs.cpp b/src/coreclr/vm/loongarch64/stubs.cpp
index 5fe3599d0dc5..56581498f003 100644
--- a/src/coreclr/vm/loongarch64/stubs.cpp
+++ b/src/coreclr/vm/loongarch64/stubs.cpp
@@ -301,8 +301,7 @@ void ClearRegDisplayArgumentAndScratchRegisters(REGDISPLAY * pRD)
 void LazyMachState::unwindLazyState(LazyMachState* baseState,
                                     MachState* unwoundstate,
                                     DWORD threadId,
-                                    int funCallDepth,
-                                    HostCallPreference hostCallPreference)
+                                    int funCallDepth)
 {
     T_CONTEXT context;
     T_KNONVOLATILE_CONTEXT_POINTERS nonVolContextPtrs;
@@ -381,20 +380,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
         {
             // Determine  whether given IP resides in JITted code. (It returns nonzero in that case.)
             // Use it now to see if we've unwound to managed code yet.
-            BOOL fFailedReaderLock = FALSE;
-            BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc, hostCallPreference, &fFailedReaderLock);
-            if (fFailedReaderLock)
-            {
-                // We don't know if we would have been able to find a JIT
-                // manager, because we couldn't enter the reader lock without
-                // yielding (and our caller doesn't want us to yield).  So abort
-                // now.
-
-                // Invalidate the lazyState we're returning, so the caller knows
-                // we aborted before we could fully unwind
-                unwoundstate->_isValid = false;
-                return;
-            }
+            BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc);
 
             if (fIsManagedCode)
                 break;
@@ -450,7 +436,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
     unwoundstate->_isValid = TRUE;
 }
 
-void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACTL
     {
@@ -461,6 +447,14 @@ void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     }
     CONTRACTL_END;
 
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+        _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress());
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 
@@ -620,8 +614,16 @@ void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegis
     pContextPointers->Ra  = (PDWORD64)&pCalleeSaved->ra;
 }
 
-void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+        _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress());
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 
@@ -643,7 +645,7 @@ void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    TransitionFrame::UpdateRegDisplay(pc:%p, sp:%p)\n", pRD->ControlPC, pRD->SP));
 }
 
-void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     LIMITED_METHOD_DAC_CONTRACT;
 
@@ -676,7 +678,7 @@ void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    FaultingExceptionFrame::UpdateRegDisplay(pc:%p, sp:%p)\n", pRD->ControlPC, pRD->SP));
 }
 
-void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
@@ -685,7 +687,6 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 #ifdef PROFILING_SUPPORTED
         PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this));
 #endif
-        HOST_NOCALLS;
         MODE_ANY;
         SUPPORTS_DAC;
     }
@@ -697,6 +698,13 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
         return;
     }
 
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;
 
@@ -739,7 +747,7 @@ TADDR ResumableFrame::GetReturnAddressPtr(void)
     return dac_cast<TADDR>(m_Regs) + offsetof(T_CONTEXT, Pc);
 }
 
-void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
@@ -796,7 +804,7 @@ void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     RETURN;
 }
 
-void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     LIMITED_METHOD_CONTRACT;
 
@@ -1905,7 +1913,7 @@ PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator,
             }
         }
 
-        _ASSERTE(indirectionsDataSize == dataOffset);
+        _ASSERTE((indirectionsDataSize ? indirectionsDataSize : codeSize) == dataOffset);
 
         // No null test required
         if (!pLookup->testForNull)
diff --git a/src/coreclr/vm/marshalnative.cpp b/src/coreclr/vm/marshalnative.cpp
index e137f51d7a3e..40fc30849c3b 100644
--- a/src/coreclr/vm/marshalnative.cpp
+++ b/src/coreclr/vm/marshalnative.cpp
@@ -34,6 +34,7 @@
 #include "comdelegate.h"
 #include "typestring.h"
 #include "appdomain.inl"
+#include "stubhelpers.h"
 
 #ifdef FEATURE_COMINTEROP
 #include "comcallablewrapper.h"
@@ -283,27 +284,47 @@ extern "C" IsInCooperativeGCMode_fn QCALLTYPE MarshalNative_GetIsInCooperativeGC
 #endif
 
 /************************************************************************
- * Marshal.GetLastPInvokeError
+ * Support for the last PInvoke error
  */
+static thread_local int t_lastPInvokeError;
+
 FCIMPL0(int, MarshalNative::GetLastPInvokeError)
 {
     FCALL_CONTRACT;
 
-    return (UINT32)(GetThread()->m_dwLastError);
+    return t_lastPInvokeError;
 }
 FCIMPLEND
 
-/************************************************************************
- * Marshal.SetLastPInvokeError
- */
 FCIMPL1(void, MarshalNative::SetLastPInvokeError, int error)
 {
     FCALL_CONTRACT;
 
-    GetThread()->m_dwLastError = (DWORD)error;
+    t_lastPInvokeError = error;
+}
+FCIMPLEND
+
+FCIMPL0(void, StubHelpers::SetLastError)
+{
+    // Make sure this is the first thing we do after returning from the target, as almost everything can cause the last error to get trashed
+    DWORD lastError = ::GetLastError();
+
+    FCALL_CONTRACT;
+
+    t_lastPInvokeError = lastError;
 }
 FCIMPLEND
 
+#ifdef FEATURE_IJW
+// GetLastError override for C++/CLI
+DWORD STDMETHODCALLTYPE FalseGetLastError()
+{
+    WRAPPER_NO_CONTRACT;
+
+    return t_lastPInvokeError;
+}
+#endif // FEATURE_IJW
+
 /************************************************************************
  * Support for the GCHandle class.
  */
diff --git a/src/coreclr/vm/metasig.h b/src/coreclr/vm/metasig.h
index 45cb5700db52..7ab9d6015e03 100644
--- a/src/coreclr/vm/metasig.h
+++ b/src/coreclr/vm/metasig.h
@@ -237,6 +237,9 @@ DEFINE_METASIG(SM(PtrSByt_RetInt, P(B), i))
 DEFINE_METASIG(SM(IntPtr_RetIntPtr, I, I))
 DEFINE_METASIG(SM(UIntPtr_RetIntPtr, U, I))
 DEFINE_METASIG(SM(PtrByte_PtrByte_Int_RetVoid, P(b) P(b) i, v))
+DEFINE_METASIG(SM(RefByte_RefByte_UIntPtr_RetVoid, r(b) r(b) U, v))
+DEFINE_METASIG(SM(RefByte_Byte_UIntPtr_RetVoid, r(b) b U, v))
+DEFINE_METASIG(SM(RefByte_UIntPtr_RetVoid, r(b) U, v))
 DEFINE_METASIG(SM(PtrVoid_Byte_UInt_RetVoid, P(v) b K, v))
 DEFINE_METASIG(SM(RefObj_IntPtr_RetVoid, r(j) I, v))
 DEFINE_METASIG(SM(RefObj_RefIntPtr_RetVoid, r(j) r(I), v))
@@ -585,6 +588,13 @@ DEFINE_METASIG_T(SM(RefCleanupWorkListElement_RetVoid, r(C(CLEANUP_WORK_LIST_ELE
 DEFINE_METASIG_T(SM(RefCleanupWorkListElement_SafeHandle_RetIntPtr, r(C(CLEANUP_WORK_LIST_ELEMENT)) C(SAFE_HANDLE), I))
 DEFINE_METASIG_T(SM(RefCleanupWorkListElement_Obj_RetVoid, r(C(CLEANUP_WORK_LIST_ELEMENT)) j, v))
 
+DEFINE_METASIG(SM(PtrVoid_RetPtrVoid, P(v), P(v)))
+DEFINE_METASIG(IM(PtrVoid_RetVoid, P(v), v))
+#if defined(TARGET_X86) && defined(TARGET_WINDOWS)
+DEFINE_METASIG_T(IM(PtrCopyConstructorCookie_RetVoid, P(g(COPY_CONSTRUCTOR_COOKIE)), v))
+#endif // defined(TARGET_X86) && defined(TARGET_WINDOWS)
+
+
 #ifdef FEATURE_ICASTABLE
 DEFINE_METASIG_T(SM(ICastable_RtType_RefException_RetBool, C(ICASTABLE) C(CLASS) r(C(EXCEPTION)), F))
 DEFINE_METASIG_T(SM(ICastable_RtType_RetRtType, C(ICASTABLE) C(CLASS), C(CLASS)))
@@ -605,8 +615,8 @@ DEFINE_METASIG(GM(RetT, IMAGE_CEE_CS_CALLCONV_DEFAULT, 1, _, M(0)))
 DEFINE_METASIG_T(SM(Array_Int_Array_Int_Int_RetVoid, C(ARRAY) i C(ARRAY) i i, v))
 DEFINE_METASIG_T(SM(Array_Int_Obj_RetVoid, C(ARRAY) i j, v))
 DEFINE_METASIG_T(SM(Array_Int_PtrVoid_RetRefObj, C(ARRAY) i P(v), r(j)))
-DEFINE_METASIG_T(SM(Array_IntPtr_Obj_RetVoid, C(ARRAY) I j, v))
-DEFINE_METASIG_T(SM(Array_IntPtr_PtrVoid_RetRefObj, C(ARRAY) I P(v), r(j)))
+DEFINE_METASIG(SM(ArrObject_IntPtr_Obj_RetVoid, a(j) I j, v))
+DEFINE_METASIG(SM(ArrObject_IntPtr_PtrVoid_RetRefObj, a(j) I P(v), r(j)))
 
 DEFINE_METASIG(SM(Obj_IntPtr_Bool_RetVoid, j I F, v))
 DEFINE_METASIG(SM(IntPtr_Obj_RetVoid, I j, v))
diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp
index de3a1c3d4e35..66296c4e06e1 100644
--- a/src/coreclr/vm/method.cpp
+++ b/src/coreclr/vm/method.cpp
@@ -89,18 +89,6 @@ const BYTE MethodDesc::s_ClassificationSizeTable[] = {
     METHOD_DESC_SIZES(sizeof(NonVtableSlot) + sizeof(NativeCodeSlot)),
     METHOD_DESC_SIZES(sizeof(MethodImpl) + sizeof(NativeCodeSlot)),
     METHOD_DESC_SIZES(sizeof(NonVtableSlot) + sizeof(MethodImpl) + sizeof(NativeCodeSlot)),
-
-#ifdef FEATURE_COMINTEROP
-    METHOD_DESC_SIZES(sizeof(ComPlusCallInfo)),
-    METHOD_DESC_SIZES(sizeof(NonVtableSlot) + sizeof(ComPlusCallInfo)),
-    METHOD_DESC_SIZES(sizeof(MethodImpl) + sizeof(ComPlusCallInfo)),
-    METHOD_DESC_SIZES(sizeof(NonVtableSlot) + sizeof(MethodImpl) + sizeof(ComPlusCallInfo)),
-
-    METHOD_DESC_SIZES(sizeof(NativeCodeSlot) + sizeof(ComPlusCallInfo)),
-    METHOD_DESC_SIZES(sizeof(NonVtableSlot) + sizeof(NativeCodeSlot) + sizeof(ComPlusCallInfo)),
-    METHOD_DESC_SIZES(sizeof(MethodImpl) + sizeof(NativeCodeSlot) + sizeof(ComPlusCallInfo)),
-    METHOD_DESC_SIZES(sizeof(NonVtableSlot) + sizeof(MethodImpl) + sizeof(NativeCodeSlot) + sizeof(ComPlusCallInfo))
-#endif
 };
 
 #ifndef FEATURE_COMINTEROP
@@ -1045,7 +1033,7 @@ BOOL MethodDesc::IsVarArg()
 }
 
 //*******************************************************************************
-COR_ILMETHOD* MethodDesc::GetILHeader(BOOL fAllowOverrides /*=FALSE*/)
+COR_ILMETHOD* MethodDesc::GetILHeader()
 {
     CONTRACTL
     {
@@ -1058,9 +1046,8 @@ COR_ILMETHOD* MethodDesc::GetILHeader(BOOL fAllowOverrides /*=FALSE*/)
 
     Module *pModule = GetModule();
 
-    // Always pickup 'permanent' overrides like reflection emit, EnC, etc.
-    // but only grab temporary overrides (like profiler rewrites) if asked to
-    TADDR pIL = pModule->GetDynamicIL(GetMemberDef(), fAllowOverrides);
+    // Always pickup overrides like reflection emit, EnC, etc.
+    TADDR pIL = pModule->GetDynamicIL(GetMemberDef());
 
     if (pIL == NULL)
     {
@@ -3576,8 +3563,10 @@ void NDirectMethodDesc::InitEarlyBoundNDirectTarget()
     const void *target = GetModule()->GetInternalPInvokeTarget(GetRVA());
     _ASSERTE(target != 0);
 
+#ifdef FEATURE_IJW
     if (HeuristicDoesThisLookLikeAGetLastErrorCall((LPBYTE)target))
         target = (BYTE*)FalseGetLastError;
+#endif
 
     // As long as we've set the NDirect target field we don't need to backpatch the import thunk glue.
     // All NDirect calls all through the NDirect target, so if it's updated, then we won't go into
diff --git a/src/coreclr/vm/method.hpp b/src/coreclr/vm/method.hpp
index 735e74f4815a..f7f98b9f7331 100644
--- a/src/coreclr/vm/method.hpp
+++ b/src/coreclr/vm/method.hpp
@@ -785,7 +785,7 @@ class MethodDesc
         return IsIL() && !IsUnboxingStub() && GetRVA();
     }
 
-    COR_ILMETHOD* GetILHeader(BOOL fAllowOverrides = FALSE);
+    COR_ILMETHOD* GetILHeader();
 
     BOOL HasStoredSig()
     {
diff --git a/src/coreclr/vm/methodtable.cpp b/src/coreclr/vm/methodtable.cpp
index 41307b3d1a8f..67903433833b 100644
--- a/src/coreclr/vm/methodtable.cpp
+++ b/src/coreclr/vm/methodtable.cpp
@@ -366,7 +366,6 @@ BOOL MethodTable::ValidateWithPossibleAV()
         (pEEClass && (pEEClass->GetMethodTableWithPossibleAV()->GetClassWithPossibleAV() == pEEClass))));
 }
 
-#ifndef DACCESS_COMPILE
 
 //==========================================================================================
 BOOL  MethodTable::IsClassInited()
@@ -379,7 +378,7 @@ BOOL  MethodTable::IsClassInited()
     if (IsSharedByGenericInstantiations())
         return FALSE;
 
-    DomainLocalModule *pLocalModule = GetDomainLocalModule();
+    PTR_DomainLocalModule pLocalModule = GetDomainLocalModule();
 
     _ASSERTE(pLocalModule != NULL);
 
@@ -391,12 +390,13 @@ BOOL  MethodTable::IsInitError()
 {
     WRAPPER_NO_CONTRACT;
 
-    DomainLocalModule *pLocalModule = GetDomainLocalModule();
+    PTR_DomainLocalModule pLocalModule = GetDomainLocalModule();
     _ASSERTE(pLocalModule != NULL);
 
     return pLocalModule->IsClassInitError(this);
 }
 
+#ifndef DACCESS_COMPILE
 //==========================================================================================
 // mark the class as having its .cctor run
 void MethodTable::SetClassInited()
@@ -472,6 +472,17 @@ WORD MethodTable::GetNumMethods()
     return GetClass()->GetNumMethods();
 }
 
+PTR_MethodTable MethodTable::GetTypicalMethodTable()
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+    if (IsArray())
+        return (PTR_MethodTable)this;
+
+    PTR_MethodTable methodTableMaybe = GetModule()->LookupTypeDef(GetCl()).AsMethodTable();
+    _ASSERTE(methodTableMaybe->IsTypicalTypeDefinition());
+    return methodTableMaybe;
+}
+
 //==========================================================================================
 BOOL MethodTable::HasSameTypeDefAs(MethodTable *pMT)
 {
@@ -2798,24 +2809,17 @@ void  MethodTable::AssignClassifiedEightByteTypes(SystemVStructRegisterPassingHe
 
 #endif // defined(UNIX_AMD64_ABI_ITF)
 
-#if defined(TARGET_LOONGARCH64)
-
-bool MethodTable::IsLoongArch64OnlyOneField(MethodTable * pMT)
+#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
+bool MethodTable::IsOnlyOneField(MethodTable * pMT)
 {
     TypeHandle th(pMT);
 
-    bool useNativeLayout      = false;
-    bool ret                  = false;
-    MethodTable* pMethodTable = nullptr;
+    bool ret = false;
 
     if (!th.IsTypeDesc())
     {
-        pMethodTable = th.AsMethodTable();
-        if (pMethodTable->HasLayout())
-        {
-            useNativeLayout = true;
-        }
-        else if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
+        MethodTable* pMethodTable = th.AsMethodTable();
+        if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
         {
             DWORD numIntroducedFields = pMethodTable->GetNumIntroducedInstanceFields();
 
@@ -2825,6 +2829,19 @@ bool MethodTable::IsLoongArch64OnlyOneField(MethodTable * pMT)
 
                 CorElementType fieldType = pFieldStart[0].GetFieldType();
 
+                // InlineArray types and fixed buffer types have implied repeated fields.
+                // Checking if a type is an InlineArray type is cheap, so we'll do that first.
+                bool hasImpliedRepeatedFields = HasImpliedRepeatedFields(pMethodTable);
+
+                if (hasImpliedRepeatedFields)
+                {
+                    numIntroducedFields = pMethodTable->GetNumInstanceFieldBytes() / pFieldStart->GetSize();
+                    if (numIntroducedFields != 1)
+                    {
+                        goto _End_arg;
+                    }
+                }
+
                 if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
                 {
                     ret = true;
@@ -2834,24 +2851,16 @@ bool MethodTable::IsLoongArch64OnlyOneField(MethodTable * pMT)
                     pMethodTable  = pFieldStart->GetApproxFieldTypeHandleThrowing().GetMethodTable();
                     if (pMethodTable->GetNumIntroducedInstanceFields() == 1)
                     {
-                        ret = IsLoongArch64OnlyOneField(pMethodTable);
+                        ret = IsOnlyOneField(pMethodTable);
                     }
                 }
             }
-            goto _End_arg;
         }
     }
     else
     {
-        _ASSERTE(th.IsNativeValueType());
-
-        useNativeLayout = true;
-        pMethodTable = th.AsNativeValueType();
-    }
-    _ASSERTE(pMethodTable != nullptr);
+        MethodTable* pMethodTable = th.AsNativeValueType();
 
-    if (useNativeLayout)
-    {
         if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
         {
             DWORD numIntroducedFields = pMethodTable->GetNativeLayoutInfo()->GetNumFields();
@@ -2887,7 +2896,7 @@ bool MethodTable::IsLoongArch64OnlyOneField(MethodTable * pMT)
                     if (nfc == NativeFieldCategory::NESTED)
                     {
                         pMethodTable = pNativeFieldDescs->GetNestedNativeMethodTable();
-                        ret = IsLoongArch64OnlyOneField(pMethodTable);
+                        ret = IsOnlyOneField(pMethodTable);
                     }
                     else if (nfc != NativeFieldCategory::ILLEGAL)
                     {
@@ -2905,23 +2914,19 @@ bool MethodTable::IsLoongArch64OnlyOneField(MethodTable * pMT)
 
     return ret;
 }
+#endif
 
+#if defined(TARGET_LOONGARCH64)
 int MethodTable::GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
 {
     TypeHandle th(cls);
 
-    bool useNativeLayout           = false;
     int size = STRUCT_NO_FLOAT_FIELD;
-    MethodTable* pMethodTable      = nullptr;
 
     if (!th.IsTypeDesc())
     {
-        pMethodTable = th.AsMethodTable();
-        if (pMethodTable->HasLayout())
-        {
-            useNativeLayout = true;
-        }
-        else if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
+        MethodTable* pMethodTable = th.AsMethodTable();
+        if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
         {
             DWORD numIntroducedFields = pMethodTable->GetNumIntroducedInstanceFields();
 
@@ -2931,6 +2936,44 @@ int MethodTable::GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cl
 
                 CorElementType fieldType = pFieldStart[0].GetFieldType();
 
+                // InlineArray types and fixed buffer types have implied repeated fields.
+                // Checking if a type is an InlineArray type is cheap, so we'll do that first.
+                bool hasImpliedRepeatedFields = HasImpliedRepeatedFields(pMethodTable);
+
+                if (hasImpliedRepeatedFields)
+                {
+                    numIntroducedFields = pMethodTable->GetNumInstanceFieldBytes() / pFieldStart->GetSize();
+                    if (numIntroducedFields > 2)
+                    {
+                        goto _End_arg;
+                    }
+
+                    if (fieldType == ELEMENT_TYPE_R4)
+                    {
+                        if (numIntroducedFields == 1)
+                        {
+                            size = STRUCT_FLOAT_FIELD_ONLY_ONE;
+                        }
+                        else if (numIntroducedFields == 2)
+                        {
+                            size = STRUCT_FLOAT_FIELD_ONLY_TWO;
+                        }
+                        goto _End_arg;
+                    }
+                    else if (fieldType == ELEMENT_TYPE_R8)
+                    {
+                        if (numIntroducedFields == 1)
+                        {
+                            size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8;
+                        }
+                        else if (numIntroducedFields == 2)
+                        {
+                            size = STRUCT_FIELD_TWO_DOUBLES;
+                        }
+                        goto _End_arg;
+                    }
+                }
+
                 if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
                 {
                     if (fieldType == ELEMENT_TYPE_R4)
@@ -2968,6 +3011,11 @@ int MethodTable::GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cl
                     goto _End_arg;
                 }
 
+                if (pFieldFirst->GetSize() > pFieldSecond->GetOffset())
+                {
+                    goto _End_arg;
+                }
+
                 CorElementType fieldType = pFieldFirst[0].GetFieldType();
                 if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
                 {
@@ -2988,7 +3036,7 @@ int MethodTable::GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cl
                 else if (fieldType == ELEMENT_TYPE_VALUETYPE)
                 {
                     pMethodTable  = pFieldFirst->GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    if (IsLoongArch64OnlyOneField(pMethodTable))
+                    if (IsOnlyOneField(pMethodTable))
                     {
                         size = GetLoongArch64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
                         if ((size & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
@@ -3044,7 +3092,7 @@ int MethodTable::GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cl
                 else if (fieldType == ELEMENT_TYPE_VALUETYPE)
                 {
                     pMethodTable  = pFieldSecond[0].GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    if (IsLoongArch64OnlyOneField(pMethodTable))
+                    if (IsOnlyOneField(pMethodTable))
                     {
                         int size2 = GetLoongArch64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
                         if ((size2 & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
@@ -3085,21 +3133,11 @@ int MethodTable::GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cl
                     size |= STRUCT_SECOND_FIELD_SIZE_IS8;
                 }
             }
-
-            goto _End_arg;
         }
     }
     else
     {
-        _ASSERTE(th.IsNativeValueType());
-
-        useNativeLayout = true;
-        pMethodTable = th.AsNativeValueType();
-    }
-    _ASSERTE(pMethodTable != nullptr);
-
-    if (useNativeLayout)
-    {
+        MethodTable* pMethodTable = th.AsNativeValueType();
         if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
         {
             DWORD numIntroducedFields = pMethodTable->GetNativeLayoutInfo()->GetNumFields();
@@ -3251,7 +3289,7 @@ int MethodTable::GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cl
 
                         MethodTable* pMethodTable2 = pNativeFieldDescs->GetNestedNativeMethodTable();
 
-                        if (!IsLoongArch64OnlyOneField(pMethodTable2))
+                        if (!IsOnlyOneField(pMethodTable2))
                         {
                             size = STRUCT_NO_FLOAT_FIELD;
                             goto _End_arg;
@@ -3346,7 +3384,7 @@ int MethodTable::GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cl
 
                         MethodTable* pMethodTable2 = pNativeFieldDescs[1].GetNestedNativeMethodTable();
 
-                        if (!IsLoongArch64OnlyOneField(pMethodTable2))
+                        if (!IsOnlyOneField(pMethodTable2))
                         {
                             size = STRUCT_NO_FLOAT_FIELD;
                             goto _End_arg;
@@ -3415,23 +3453,16 @@ int MethodTable::GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cl
 #endif
 
 #if defined(TARGET_RISCV64)
-
-bool MethodTable::IsRiscV64OnlyOneField(MethodTable * pMT)
+int MethodTable::GetRiscV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
 {
-    TypeHandle th(pMT);
+    TypeHandle th(cls);
 
-    bool useNativeLayout      = false;
-    bool ret                  = false;
-    MethodTable* pMethodTable = nullptr;
+    int size = STRUCT_NO_FLOAT_FIELD;
 
     if (!th.IsTypeDesc())
     {
-        pMethodTable = th.AsMethodTable();
-        if (pMethodTable->HasLayout())
-        {
-            useNativeLayout = true;
-        }
-        else if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
+        MethodTable* pMethodTable = th.AsMethodTable();
+        if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
         {
             DWORD numIntroducedFields = pMethodTable->GetNumIntroducedInstanceFields();
 
@@ -3441,44 +3472,6 @@ bool MethodTable::IsRiscV64OnlyOneField(MethodTable * pMT)
 
                 CorElementType fieldType = pFieldStart[0].GetFieldType();
 
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    ret = true;
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    pMethodTable  = pFieldStart->GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    if (pMethodTable->GetNumIntroducedInstanceFields() == 1)
-                    {
-                        ret = IsRiscV64OnlyOneField(pMethodTable);
-                    }
-                }
-            }
-            goto _End_arg;
-        }
-    }
-    else
-    {
-        _ASSERTE(th.IsNativeValueType());
-
-        useNativeLayout = true;
-        pMethodTable = th.AsNativeValueType();
-    }
-    _ASSERTE(pMethodTable != nullptr);
-
-    if (useNativeLayout)
-    {
-        if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
-        {
-            DWORD numIntroducedFields = pMethodTable->GetNativeLayoutInfo()->GetNumFields();
-            FieldDesc *pFieldStart = nullptr;
-
-            if (numIntroducedFields == 1)
-            {
-                pFieldStart = pMethodTable->GetApproxFieldDescListRaw();
-
-                CorElementType fieldType = pFieldStart->GetFieldType();
-
                 // InlineArray types and fixed buffer types have implied repeated fields.
                 // Checking if a type is an InlineArray type is cheap, so we'll do that first.
                 bool hasImpliedRepeatedFields = HasImpliedRepeatedFields(pMethodTable);
@@ -3486,66 +3479,36 @@ bool MethodTable::IsRiscV64OnlyOneField(MethodTable * pMT)
                 if (hasImpliedRepeatedFields)
                 {
                     numIntroducedFields = pMethodTable->GetNumInstanceFieldBytes() / pFieldStart->GetSize();
-                    if (numIntroducedFields != 1)
+                    if (numIntroducedFields > 2)
                     {
                         goto _End_arg;
                     }
-                }
 
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    ret = true;
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors();
-                    NativeFieldCategory nfc = pNativeFieldDescs->GetCategory();
-                    if (nfc == NativeFieldCategory::NESTED)
+                    if (fieldType == ELEMENT_TYPE_R4)
                     {
-                        pMethodTable = pNativeFieldDescs->GetNestedNativeMethodTable();
-                        ret = IsRiscV64OnlyOneField(pMethodTable);
+                        if (numIntroducedFields == 1)
+                        {
+                            size = STRUCT_FLOAT_FIELD_ONLY_ONE;
+                        }
+                        else if (numIntroducedFields == 2)
+                        {
+                            size = STRUCT_FLOAT_FIELD_ONLY_TWO;
+                        }
+                        goto _End_arg;
                     }
-                    else if (nfc != NativeFieldCategory::ILLEGAL)
+                    else if (fieldType == ELEMENT_TYPE_R8)
                     {
-                        ret = true;
+                        if (numIntroducedFields == 1)
+                        {
+                            size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8;
+                        }
+                        else if (numIntroducedFields == 2)
+                        {
+                            size = STRUCT_FIELD_TWO_DOUBLES;
+                        }
+                        goto _End_arg;
                     }
                 }
-            }
-            else
-            {
-                ret = false;
-            }
-        }
-    }
-_End_arg:
-
-    return ret;
-}
-
-int MethodTable::GetRiscV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
-{
-    TypeHandle th(cls);
-
-    bool useNativeLayout           = false;
-    int size = STRUCT_NO_FLOAT_FIELD;
-    MethodTable* pMethodTable      = nullptr;
-
-    if (!th.IsTypeDesc())
-    {
-        pMethodTable = th.AsMethodTable();
-        if (pMethodTable->HasLayout())
-        {
-            useNativeLayout = true;
-        }
-        else if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
-        {
-            DWORD numIntroducedFields = pMethodTable->GetNumIntroducedInstanceFields();
-
-            if (numIntroducedFields == 1)
-            {
-                FieldDesc *pFieldStart = pMethodTable->GetApproxFieldDescListRaw();
-
-                CorElementType fieldType = pFieldStart[0].GetFieldType();
 
                 if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
                 {
@@ -3584,6 +3547,11 @@ int MethodTable::GetRiscV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
                     goto _End_arg;
                 }
 
+                if (pFieldFirst->GetSize() > pFieldSecond->GetOffset())
+                {
+                    goto _End_arg;
+                }
+
                 CorElementType fieldType = pFieldFirst[0].GetFieldType();
                 if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
                 {
@@ -3604,7 +3572,7 @@ int MethodTable::GetRiscV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
                 else if (fieldType == ELEMENT_TYPE_VALUETYPE)
                 {
                     pMethodTable  = pFieldFirst->GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    if (IsRiscV64OnlyOneField(pMethodTable))
+                    if (IsOnlyOneField(pMethodTable))
                     {
                         size = GetRiscV64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
                         if ((size & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
@@ -3660,7 +3628,7 @@ int MethodTable::GetRiscV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
                 else if (fieldType == ELEMENT_TYPE_VALUETYPE)
                 {
                     pMethodTable  = pFieldSecond[0].GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    if (IsRiscV64OnlyOneField(pMethodTable))
+                    if (IsOnlyOneField(pMethodTable))
                     {
                         int size2 = GetRiscV64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
                         if ((size2 & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
@@ -3701,21 +3669,12 @@ int MethodTable::GetRiscV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
                     size |= STRUCT_SECOND_FIELD_SIZE_IS8;
                 }
             }
-
-            goto _End_arg;
         }
     }
     else
     {
-        _ASSERTE(th.IsNativeValueType());
+        MethodTable* pMethodTable = th.AsNativeValueType();
 
-        useNativeLayout = true;
-        pMethodTable = th.AsNativeValueType();
-    }
-    _ASSERTE(pMethodTable != nullptr);
-
-    if (useNativeLayout)
-    {
         if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
         {
             DWORD numIntroducedFields = pMethodTable->GetNativeLayoutInfo()->GetNumFields();
@@ -3867,7 +3826,7 @@ int MethodTable::GetRiscV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
 
                         MethodTable* pMethodTable2 = pNativeFieldDescs->GetNestedNativeMethodTable();
 
-                        if (!IsRiscV64OnlyOneField(pMethodTable2))
+                        if (!IsOnlyOneField(pMethodTable2))
                         {
                             size = STRUCT_NO_FLOAT_FIELD;
                             goto _End_arg;
@@ -3963,7 +3922,7 @@ int MethodTable::GetRiscV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
 
                         MethodTable* pMethodTable2 = pNativeFieldDescs[1].GetNestedNativeMethodTable();
 
-                        if (!IsRiscV64OnlyOneField(pMethodTable2))
+                        if (!IsOnlyOneField(pMethodTable2))
                         {
                             size = STRUCT_NO_FLOAT_FIELD;
                             goto _End_arg;
@@ -4026,6 +3985,400 @@ int MethodTable::GetRiscV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
 }
 #endif
 
+#if !defined(DACCESS_COMPILE)
+namespace
+{
+    // Underlying type specified so we can use memset in the algorithm below
+    // to set a range of values to a particular tag.
+    enum class SwiftPhysicalLoweringTag : uint8_t
+    {
+        Empty,
+        Opaque,
+        Int64,
+        Float,
+        Double
+    };
+
+    uint32_t GetAlignment(SwiftPhysicalLoweringTag tag)
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        switch (tag)
+        {
+            case SwiftPhysicalLoweringTag::Int64:
+                return 8;
+            case SwiftPhysicalLoweringTag::Float:
+                return 4;
+            case SwiftPhysicalLoweringTag::Double:
+                return 8;
+            default:
+                return 1;
+        }
+    }
+
+    void SetLoweringRange(CQuickArray<SwiftPhysicalLoweringTag>& intervals, uint32_t start, uint32_t size, SwiftPhysicalLoweringTag tag)
+    {
+        STANDARD_VM_CONTRACT;
+
+        bool forceOpaque = false;
+
+        if (!IS_ALIGNED(start, GetAlignment(tag)))
+        {
+            // If the start of the range is not aligned, we need to force the entire range to be opaque.
+            forceOpaque = true;
+        }
+
+        // Check if any of the range is non-empty.
+        // If so, we need to force this range to be opaque
+        // and extend the range mark the existing tag's range as opaque.
+        for (uint32_t i = 0; i < size; i++)
+        {
+            SwiftPhysicalLoweringTag currentTag = intervals[start + i];
+            if (currentTag != SwiftPhysicalLoweringTag::Empty
+                && currentTag != tag)
+            {
+                forceOpaque = true;
+
+                // Extend out start to the beginning of the existing tag's range
+                // and extend size to the end of the existing tag's range (if non-opaque/empty).
+                start = (uint32_t)ALIGN_DOWN(start, GetAlignment(currentTag));
+                size = (uint32_t)ALIGN_UP(size + start, GetAlignment(currentTag)) - start;
+                break;
+            }
+        }
+
+        if (forceOpaque)
+        {
+            tag = SwiftPhysicalLoweringTag::Opaque;
+        }
+
+        memset(&intervals[start], (uint8_t)tag, sizeof(SwiftPhysicalLoweringTag) * size);
+    }
+
+    void GetNativeSwiftPhysicalLowering(CQuickArray<SwiftPhysicalLoweringTag>& intervals, PTR_MethodTable pMT, uint32_t offset = 0);
+    void GetNativeSwiftPhysicalLoweringForInlineArray(CQuickArray<SwiftPhysicalLoweringTag>& intervals, PTR_MethodTable pMT, uint32_t offset = 0);
+
+    void GetNativeSwiftPhysicalLoweringForField(CQuickArray<SwiftPhysicalLoweringTag>& intervals, FieldDesc* pFieldDesc, uint32_t offset = 0)
+    {
+        STANDARD_VM_CONTRACT;
+
+        PTR_MethodTable fieldType = pFieldDesc->GetFieldTypeHandleThrowing().GetMethodTable();
+        CorElementType corType = fieldType->GetVerifierCorElementType();
+
+        if (corType == ELEMENT_TYPE_VALUETYPE)
+        {
+            if (fieldType->GetClass()->IsInlineArray())
+            {
+                GetNativeSwiftPhysicalLoweringForInlineArray(intervals, fieldType, offset);
+            }
+            else
+            {
+                GetNativeSwiftPhysicalLowering(intervals, fieldType, offset);
+            }
+        }
+        else if (corType == ELEMENT_TYPE_R4)
+        {
+            SetLoweringRange(intervals, offset, 4, SwiftPhysicalLoweringTag::Float);
+        }
+        else if (corType == ELEMENT_TYPE_R8)
+        {
+            SetLoweringRange(intervals, offset, 8, SwiftPhysicalLoweringTag::Double);
+        }
+        else if (corType == ELEMENT_TYPE_I8 || corType == ELEMENT_TYPE_U8)
+        {
+            SetLoweringRange(intervals, offset, 8, SwiftPhysicalLoweringTag::Int64);
+        }
+        else
+        {
+            SetLoweringRange(intervals, offset, fieldType->GetNumInstanceFieldBytes(), SwiftPhysicalLoweringTag::Opaque);
+        }
+    }
+
+    void GetNativeSwiftPhysicalLoweringForInlineArray(CQuickArray<SwiftPhysicalLoweringTag>& intervals, PTR_MethodTable pMT, uint32_t offset)
+    {
+        STANDARD_VM_CONTRACT;
+        _ASSERTE(pMT->GetClass()->IsInlineArray());
+        FieldDesc* pElementField = pMT->GetApproxFieldDescListRaw();
+
+        // If the type is an inline array, we need to calculate the size based on the number of elements.
+        const void* pVal;                  // The custom value.
+        ULONG       cbVal;                 // Size of the custom value.
+        HRESULT hr = pMT->GetCustomAttribute(
+            WellKnownAttribute::InlineArrayAttribute,
+            &pVal, &cbVal);
+
+        _ASSERTE(hr == S_OK);
+        if (hr != S_OK)
+        {
+            ThrowHR(hr);
+        }
+
+        // Validity of the InlineArray attribute is checked at type-load time,
+        // so we only assert here as we should have already checked this and failed
+        // type load if this condition is false.
+        _ASSERTE(cbVal >= (sizeof(INT32) + 2));
+        if (cbVal <= (sizeof(INT32) + 2))
+        {
+            return;
+        }
+
+        INT32 repeat = GET_UNALIGNED_VAL32((byte*)pVal + 2);
+
+        // Use the one FieldDesc to calculate the Swift intervals
+        // Use FieldDescs to calculate the Swift intervals
+        PTR_FieldDesc pFieldDesc = pMT->GetApproxFieldDescListRaw();
+        for (int32_t i = 0; i < repeat; i++)
+        {
+            GetNativeSwiftPhysicalLoweringForField(intervals, pFieldDesc, offset + pFieldDesc->GetOffset() + pFieldDesc->GetSize() * i);
+        }
+    }
+
+    void GetNativeSwiftPhysicalLowering(CQuickArray<SwiftPhysicalLoweringTag>& intervals, PTR_MethodTable pMT, uint32_t offset)
+    {
+        STANDARD_VM_CONTRACT;
+        // Use FieldDescs to calculate the Swift intervals
+        PTR_FieldDesc pFieldDescList = pMT->GetApproxFieldDescListRaw();
+        for (uint32_t i = 0; i < pMT->GetNumIntroducedInstanceFields(); i++)
+        {
+            PTR_FieldDesc pFieldDesc = pFieldDescList + i;
+            GetNativeSwiftPhysicalLoweringForField(intervals, pFieldDesc, offset + pFieldDesc->GetOffset());
+        }
+    }
+
+    void GetNativeSwiftPhysicalLowering(CQuickArray<SwiftPhysicalLoweringTag>& intervals, EEClassNativeLayoutInfo const* pNativeLayoutInfo, uint32_t offset = 0)
+    {
+        STANDARD_VM_CONTRACT;
+        // Use NativeLayout to calculate the Swift intervals
+        NativeFieldDescriptor const* pNativeFieldDescs = pNativeLayoutInfo->GetNativeFieldDescriptors();
+        for (uint32_t i = 0; i < pNativeLayoutInfo->GetNumFields(); i++)
+        {
+            NativeFieldDescriptor const& nfd = pNativeFieldDescs[i];
+            if (nfd.GetCategory() == NativeFieldCategory::NESTED)
+            {
+                PTR_MethodTable fieldType = nfd.GetNestedNativeMethodTable();
+                for (uint32_t i = 0; i < nfd.GetNumElements(); i++)
+                {
+                    if (fieldType->IsBlittable())
+                    {
+                        GetNativeSwiftPhysicalLowering(intervals, fieldType, offset + nfd.GetExternalOffset() + fieldType->GetNativeSize() * i);
+                    }
+                    else
+                    {
+                        GetNativeSwiftPhysicalLowering(intervals, fieldType->GetNativeLayoutInfo(), offset + nfd.GetExternalOffset() + fieldType->GetNativeSize() * i);
+                    }
+                }
+            }
+            else if (nfd.GetCategory() == NativeFieldCategory::FLOAT)
+            {
+                _ASSERTE(nfd.NativeSize() == 4 || nfd.NativeSize() == 8);
+                SetLoweringRange(intervals, offset + nfd.GetExternalOffset(), nfd.NativeSize(), nfd.NativeSize() == 4 ? SwiftPhysicalLoweringTag::Float : SwiftPhysicalLoweringTag::Double);
+            }
+            else if (nfd.GetCategory() == NativeFieldCategory::INTEGER && nfd.NativeSize() == 8)
+            {
+                SetLoweringRange(intervals, offset + nfd.GetExternalOffset(), nfd.NativeSize(), SwiftPhysicalLoweringTag::Int64);
+            }
+            else
+            {
+                SetLoweringRange(intervals, offset + nfd.GetExternalOffset(), nfd.NativeSize(), SwiftPhysicalLoweringTag::Opaque);
+            }
+        }
+    }
+}
+
+void MethodTable::GetNativeSwiftPhysicalLowering(CORINFO_SWIFT_LOWERING* pSwiftLowering, bool useNativeLayout)
+{
+    STANDARD_VM_CONTRACT;
+
+    // We'll build the intervals by scanning the fields byte-by-byte and then calculate the lowering intervals
+    // from that information.
+    CQuickArray<SwiftPhysicalLoweringTag> loweredBytes;
+    loweredBytes.AllocThrows(GetNumInstanceFieldBytes());
+    memset(loweredBytes.Ptr(), (uint8_t)SwiftPhysicalLoweringTag::Empty, sizeof(SwiftPhysicalLoweringTag) * loweredBytes.Size());
+
+    if (useNativeLayout && !IsBlittable())
+    {
+        // Use NativeLayout to calculate the layout
+        ::GetNativeSwiftPhysicalLowering(loweredBytes, GetNativeLayoutInfo());
+    }
+    else if (GetClass()->IsInlineArray())
+    {
+        // Use InlineArray to calculate the layout
+        ::GetNativeSwiftPhysicalLoweringForInlineArray(loweredBytes, PTR_MethodTable(this));
+    }
+    else
+    {
+        ::GetNativeSwiftPhysicalLowering(loweredBytes, PTR_MethodTable(this));
+    }
+
+    struct SwiftLoweringInterval
+    {
+        uint32_t offset;
+        uint32_t size;
+        SwiftPhysicalLoweringTag tag;
+    };
+
+    // Build intervals from the byte sequences
+    CQuickArrayList<SwiftLoweringInterval> intervals;
+    for (uint32_t i = 0; i < loweredBytes.Size(); ++i)
+    {
+        // Don't create an interval for empty bytes
+        if (loweredBytes[i] == SwiftPhysicalLoweringTag::Empty)
+        {
+            continue;
+        }
+
+        bool startNewInterval =
+            // We're at the start of the type
+            i == 0
+            // We're starting a new float (as we're aligned)
+            || (IS_ALIGNED(i, 4) && loweredBytes[i] == SwiftPhysicalLoweringTag::Float)
+            // We're starting a new double or int64_t (as we're aligned)
+            || (IS_ALIGNED(i, 8) && (loweredBytes[i] == SwiftPhysicalLoweringTag::Double || loweredBytes[i] == SwiftPhysicalLoweringTag::Int64))
+            // We've changed interval types
+            || loweredBytes[i] != loweredBytes[i - 1];
+
+        if (startNewInterval)
+        {
+            SwiftLoweringInterval interval;
+            interval.offset = i;
+            interval.size = 1;
+            interval.tag = loweredBytes[i];
+            intervals.Push(interval);
+        }
+        else
+        {
+            intervals[intervals.Size() - 1].size++;
+        }
+    }
+
+    // Merge opaque intervals that are in the same pointer-sized block.
+    CQuickArrayList<SwiftLoweringInterval> mergedIntervals;
+
+    for (uint32_t i = 0; i < intervals.Size(); ++i)
+    {
+        SwiftLoweringInterval interval = intervals[i];
+
+        if (i != 0 && interval.tag == SwiftPhysicalLoweringTag::Opaque)
+        {
+            // Merge two opaque intervals when the previous interval ends in the same pointer-sized block
+            SwiftLoweringInterval prevInterval = intervals[i - 1];
+            if (prevInterval.tag == SwiftPhysicalLoweringTag::Opaque &&
+                (prevInterval.offset + prevInterval.size) / TARGET_POINTER_SIZE == interval.offset / TARGET_POINTER_SIZE)
+            {
+                SwiftLoweringInterval& lastInterval = mergedIntervals[mergedIntervals.Size() - 1];
+                lastInterval.size = interval.offset + interval.size - lastInterval.offset;
+                continue;
+            }
+        }
+
+        // Otherwise keep all intervals
+        mergedIntervals.Push(interval);
+    }
+
+    // Now we have the intervals, we can calculate the lowering.
+    CorInfoType loweredTypes[MAX_SWIFT_LOWERED_ELEMENTS];
+    uint32_t offsets[MAX_SWIFT_LOWERED_ELEMENTS];
+    uint32_t numLoweredTypes = 0;
+
+    for (uint32_t i = 0; i < mergedIntervals.Size(); i++)
+    {
+        SwiftLoweringInterval interval = mergedIntervals[i];
+
+        if (numLoweredTypes == ARRAY_SIZE(loweredTypes))
+        {
+            // If we have more than four intervals, this type is passed by-reference in Swift.
+            pSwiftLowering->byReference = true;
+            return;
+        }
+
+        offsets[numLoweredTypes] = interval.offset;
+
+        switch (interval.tag)
+        {
+            case SwiftPhysicalLoweringTag::Empty:
+                _ASSERTE(!"Empty intervals should have been dropped during interval construction");
+                break;
+
+            case SwiftPhysicalLoweringTag::Int64:
+                loweredTypes[numLoweredTypes++] = CORINFO_TYPE_LONG;
+                break;
+            case SwiftPhysicalLoweringTag::Float:
+                loweredTypes[numLoweredTypes++] = CORINFO_TYPE_FLOAT;
+                break;
+            case SwiftPhysicalLoweringTag::Double:
+                loweredTypes[numLoweredTypes++] = CORINFO_TYPE_DOUBLE;
+                break;
+            case SwiftPhysicalLoweringTag::Opaque:
+            {
+                // We need to split the opaque ranges into integer parameters.
+                // As part of this splitting, we must ensure that we don't introduce alignment padding.
+                // This lowering algorithm should produce a lowered type sequence that would have the same padding for
+                // a naturally-aligned struct with the lowered fields as the original type has.
+                // This algorithm intends to split the opaque range into the least number of lowered elements that covers the entire range.
+                // The lowered range is allowed to extend past the end of the opaque range (including past the end of the struct),
+                // but not into the next non-empty interval.
+                // However, due to the properties of the lowering (the only non-8 byte elements of the lowering are 4-byte floats),
+                // we'll never encounter a scneario where we need would need to account for a correctly-aligned
+                // opaque range of > 4 bytes that we must not pad to 8 bytes.
+
+
+                // As long as we need to fill more than 4 bytes and the sequence is currently 8-byte aligned, we'll split into 8-byte integers.
+                // If we have more than 2 bytes but less than 4 and the sequence is 4-byte aligned, we'll use a 4-byte integer to represent the rest of the parameters.
+                // If we have 2 bytes and the sequence is 2-byte aligned, we'll use a 2-byte integer to represent the rest of the parameters.
+                // If we have 1 byte, we'll use a 1-byte integer to represent the rest of the parameters.
+                uint32_t opaqueIntervalStart = interval.offset;
+                // The remaining size here may become negative, so use a signed type.
+                int32_t remainingIntervalSize = static_cast<int32_t>(interval.size);
+                while (remainingIntervalSize > 0)
+                {
+                    if (numLoweredTypes == ARRAY_SIZE(loweredTypes))
+                    {
+                        // If we have more than four intervals and we still need to add another interval, this type is passed by-reference in Swift.
+                        pSwiftLowering->byReference = true;
+                        return;
+                    }
+
+                    offsets[numLoweredTypes] = opaqueIntervalStart;
+
+                    if (remainingIntervalSize > 4 && IS_ALIGNED(opaqueIntervalStart, 8))
+                    {
+                        loweredTypes[numLoweredTypes] = CORINFO_TYPE_LONG;
+                        opaqueIntervalStart += 8;
+                        remainingIntervalSize -= 8;
+                    }
+                    else if (remainingIntervalSize > 2 && IS_ALIGNED(opaqueIntervalStart, 4))
+                    {
+                        loweredTypes[numLoweredTypes] = CORINFO_TYPE_INT;
+                        opaqueIntervalStart += 4;
+                        remainingIntervalSize -= 4;
+                    }
+                    else if (remainingIntervalSize > 1 && IS_ALIGNED(opaqueIntervalStart, 2))
+                    {
+                        loweredTypes[numLoweredTypes] = CORINFO_TYPE_SHORT;
+                        opaqueIntervalStart += 2;
+                        remainingIntervalSize -= 2;
+                    }
+                    else
+                    {
+                        loweredTypes[numLoweredTypes] = CORINFO_TYPE_BYTE;
+                        opaqueIntervalStart += 1;
+                        remainingIntervalSize -= 1;
+                    }
+
+                    numLoweredTypes++;
+                }
+            }
+        }
+    }
+
+    memcpy(pSwiftLowering->loweredElements, loweredTypes, numLoweredTypes * sizeof(CorInfoType));
+    memcpy(pSwiftLowering->offsets, offsets, numLoweredTypes * sizeof(uint32_t));
+    pSwiftLowering->numLoweredElements = numLoweredTypes;
+    pSwiftLowering->byReference = false;
+}
+
+#endif // !DACCESS_COMPILE
+
 #if !defined(DACCESS_COMPILE)
 //==========================================================================================
 void MethodTable::AllocateRegularStaticBoxes()
@@ -4610,7 +4963,11 @@ OBJECTREF MethodTable::FastBox(void** data)
     if (IsNullable())
         return Nullable::Box(*data, this);
 
-    OBJECTREF ref = Allocate();
+    // MethodTable::Allocate() triggers cctors, so to avoid that we
+    // allocate directly without triggering cctors - boxing should not trigger cctors.
+    EnsureInstanceActive();
+    OBJECTREF ref = AllocateObject(this);
+
     CopyValueClass(ref->UnBox(), *data, this);
     return ref;
 }
@@ -9229,7 +9586,7 @@ int MethodTable::GetFieldAlignmentRequirement()
     {
         return GetClass()->GetOverriddenFieldAlignmentRequirement();
     }
-    return min(GetNumInstanceFieldBytes(), TARGET_POINTER_SIZE);
+    return min((int)GetNumInstanceFieldBytes(), TARGET_POINTER_SIZE);
 }
 
 UINT32 MethodTable::GetNativeSize()
diff --git a/src/coreclr/vm/methodtable.h b/src/coreclr/vm/methodtable.h
index 4984d010a71b..703508e2126b 100644
--- a/src/coreclr/vm/methodtable.h
+++ b/src/coreclr/vm/methodtable.h
@@ -824,15 +824,14 @@ class MethodTable
     // during object construction.
     void CheckRunClassInitAsIfConstructingThrowing();
 
+#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
+    static bool IsOnlyOneField(MethodTable * pMT);
 #if defined(TARGET_LOONGARCH64)
-    static bool IsLoongArch64OnlyOneField(MethodTable * pMT);
     static int GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE clh);
-#endif
-
-#if defined(TARGET_RISCV64)
-    static bool IsRiscV64OnlyOneField(MethodTable * pMT);
+#elif defined(TARGET_RISCV64)
     static int GetRiscV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE clh);
 #endif
+#endif
 
 #if defined(UNIX_AMD64_ABI_ITF)
     // Builds the internal data structures and classifies struct eightbytes for Amd System V calling convention.
@@ -840,6 +839,10 @@ class MethodTable
     bool ClassifyEightBytesWithNativeLayout(SystemVStructRegisterPassingHelperPtr helperPtr, unsigned int nestingLevel, unsigned int startOffsetOfStruct, EEClassNativeLayoutInfo const* nativeLayoutInfo);
 #endif // defined(UNIX_AMD64_ABI_ITF)
 
+#if !defined(DACCESS_COMPILE)
+    void GetNativeSwiftPhysicalLowering(CORINFO_SWIFT_LOWERING* pSwiftLowering, bool useNativeLayout);
+#endif
+
     // Copy m_dwFlags from another method table
     void CopyFlags(MethodTable * pOldMT)
     {
@@ -856,11 +859,10 @@ class MethodTable
     // mark the class as having its cctor run.
 #ifndef DACCESS_COMPILE
     void SetClassInited();
-    BOOL  IsClassInited();
-
-    BOOL IsInitError();
     void SetClassInitError();
 #endif
+    BOOL IsClassInited();
+    BOOL IsInitError();
 
     inline BOOL IsGlobalClass()
     {
@@ -1179,6 +1181,8 @@ class MethodTable
         return !HasInstantiation() || IsGenericTypeDefinition();
     }
 
+    PTR_MethodTable GetTypicalMethodTable();
+
     BOOL HasSameTypeDefAs(MethodTable *pMT);
 
     //-------------------------------------------------------------------
@@ -2228,9 +2232,9 @@ class MethodTable
     DWORD  GetOffsetOfFirstStaticHandle();
     DWORD  GetOffsetOfFirstStaticMT();
 
-#ifndef DACCESS_COMPILE
     inline PTR_BYTE GetNonGCStaticsBasePointer();
     inline PTR_BYTE GetGCStaticsBasePointer();
+#ifndef DACCESS_COMPILE
     inline PTR_BYTE GetNonGCThreadStaticsBasePointer();
     inline PTR_BYTE GetGCThreadStaticsBasePointer();
     inline PTR_BYTE GetGCThreadStaticsBaseHandle();
diff --git a/src/coreclr/vm/methodtable.inl b/src/coreclr/vm/methodtable.inl
index 1667912ed501..b2816fccc2c0 100644
--- a/src/coreclr/vm/methodtable.inl
+++ b/src/coreclr/vm/methodtable.inl
@@ -1078,8 +1078,6 @@ inline DWORD MethodTable::GetOptionalMembersSize()
     return GetEndOffsetOfOptionalMembers() - GetStartOffsetOfOptionalMembers();
 }
 
-#ifndef DACCESS_COMPILE
-
 //==========================================================================================
 inline PTR_BYTE MethodTable::GetNonGCStaticsBasePointer()
 {
@@ -1094,6 +1092,8 @@ inline PTR_BYTE MethodTable::GetGCStaticsBasePointer()
     return GetDomainLocalModule()->GetGCStaticsBasePointer(this);
 }
 
+#ifndef DACCESS_COMPILE
+
 //==========================================================================================
 inline PTR_BYTE MethodTable::GetNonGCThreadStaticsBasePointer()
 {
diff --git a/src/coreclr/vm/methodtablebuilder.cpp b/src/coreclr/vm/methodtablebuilder.cpp
index b6edb7fee7e5..69d2a105ecd8 100644
--- a/src/coreclr/vm/methodtablebuilder.cpp
+++ b/src/coreclr/vm/methodtablebuilder.cpp
@@ -2783,7 +2783,7 @@ MethodTableBuilder::EnumerateClassMethods()
             }
         }
 
-        bool hasGenericMethodArgsComputed;
+        bool hasGenericMethodArgsComputed = false;
         bool hasGenericMethodArgs = this->GetModule()->m_pMethodIsGenericMap->IsGeneric(tok, &hasGenericMethodArgsComputed);
         if (!hasGenericMethodArgsComputed)
         {
@@ -5988,17 +5988,17 @@ MethodTableBuilder::bmtMethodHandle MethodTableBuilder::FindDeclMethodOnClassInH
         }
     }
 
+    if (pDeclType == NULL)
+    {   // Method's type is not a parent.
+        BuildMethodTableThrowException(IDS_CLASSLOAD_MI_DECLARATIONNOTFOUND, it.Token());
+    }
+
     // Instead of using the Substitution chain that reaches back to the type being loaded, instead
     // use a substitution chain that points back to the open type associated with the memberref of the declsig.
     Substitution emptySubstitution;
     Substitution* pDeclTypeSubstitution = &emptySubstitution;
     DWORD lengthOfSubstitutionChainHandled = pDeclType->GetSubstitution().GetLength();
 
-    if (pDeclType == NULL)
-    {   // Method's type is not a parent.
-        BuildMethodTableThrowException(IDS_CLASSLOAD_MI_DECLARATIONNOTFOUND, it.Token());
-    }
-
     // 3. Find the matching method.
     bmtRTType *pCurDeclType = pDeclType;
     do
@@ -8419,7 +8419,7 @@ VOID    MethodTableBuilder::PlaceInstanceFields(MethodTable ** pByValueClassCach
             else
 #endif // FEATURE_64BIT_ALIGNMENT
             if (dwNumInstanceFieldBytes > TARGET_POINTER_SIZE) {
-                minAlign = containsGCPointers ? TARGET_POINTER_SIZE : (unsigned)largestAlignmentRequirement;
+                minAlign = (unsigned)(containsGCPointers ? TARGET_POINTER_SIZE : largestAlignmentRequirement);
             }
             else {
                 minAlign = 1;
@@ -8427,7 +8427,7 @@ VOID    MethodTableBuilder::PlaceInstanceFields(MethodTable ** pByValueClassCach
                     minAlign *= 2;
             }
 
-            if (minAlign != min(dwNumInstanceFieldBytes, TARGET_POINTER_SIZE))
+            if (minAlign != min(dwNumInstanceFieldBytes, (DWORD)TARGET_POINTER_SIZE))
             {
                 EnsureOptionalFieldsAreAllocated(GetHalfBakedClass(), m_pAllocMemTracker, GetLoaderAllocator()->GetLowFrequencyHeap());
                 GetHalfBakedClass()->GetOptionalFields()->m_requiredFieldAlignment = (BYTE)minAlign;
@@ -10051,6 +10051,11 @@ void MethodTableBuilder::CheckForSystemTypes()
                     // The Procedure Call Standard for ARM 64-bit (with SVE support) defaults to
                     // 16-byte alignment for __m256.
 
+                    pLayout->m_ManagedLargestAlignmentRequirementOfAllMembers = 16;
+    #elif defined(TARGET_RISCV64)
+                    // TODO-RISCV64: Update alignment to proper value when we implement RISC-V intrinsic.
+                    // RISC-V Vector Extenstion Intrinsic Document
+                    // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/vector_type_infos.adoc
                     pLayout->m_ManagedLargestAlignmentRequirementOfAllMembers = 16;
     #else
                     pLayout->m_ManagedLargestAlignmentRequirementOfAllMembers = 32; // sizeof(__m256)
@@ -10068,6 +10073,12 @@ void MethodTableBuilder::CheckForSystemTypes()
                     // 16-byte alignment for __m256.
 
                     pLayout->m_ManagedLargestAlignmentRequirementOfAllMembers = 16;
+
+    #elif defined(TARGET_RISCV64)
+                    // TODO-RISCV64: Update alignment to proper value when we implement RISC-V intrinsic.
+                    // RISC-V Vector Extenstion Intrinsic Document
+                    // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/vector_type_infos.adoc
+                    pLayout->m_ManagedLargestAlignmentRequirementOfAllMembers = 16;
     #else
                     pLayout->m_ManagedLargestAlignmentRequirementOfAllMembers = 64; // sizeof(__m512)
     #endif // TARGET_ARM elif TARGET_ARM64
diff --git a/src/coreclr/vm/mlinfo.cpp b/src/coreclr/vm/mlinfo.cpp
index 4c63edb027f3..f3401ae2c8fd 100644
--- a/src/coreclr/vm/mlinfo.cpp
+++ b/src/coreclr/vm/mlinfo.cpp
@@ -1144,18 +1144,26 @@ namespace
                 return MarshalInfo::MARSHAL_TYPE_GENERIC_8;
     #ifdef TARGET_64BIT
             case ELEMENT_TYPE_U:
-            case ELEMENT_TYPE_PTR:
             case ELEMENT_TYPE_FNPTR:
             case ELEMENT_TYPE_I:
                 return MarshalInfo::MARSHAL_TYPE_GENERIC_8;
     #else
             case ELEMENT_TYPE_U:
                 return MarshalInfo::MARSHAL_TYPE_GENERIC_U4;
-            case ELEMENT_TYPE_PTR:
             case ELEMENT_TYPE_FNPTR:
             case ELEMENT_TYPE_I:
                 return MarshalInfo::MARSHAL_TYPE_GENERIC_4;
     #endif
+            case ELEMENT_TYPE_PTR:
+            {
+                BYTE ptrByte;
+                sig.SkipCustomModifiers();
+                sig.GetByte(&ptrByte);
+                _ASSERTE(ptrByte == ELEMENT_TYPE_PTR);
+                TypeHandle sigTH = sig.GetTypeHandleThrowing(pModule, pTypeContext);
+                *pMTOut = sigTH.GetMethodTable();
+                return MarshalInfo::MARSHAL_TYPE_POINTER;
+            }
             case ELEMENT_TYPE_R4:
                 return MarshalInfo::MARSHAL_TYPE_FLOAT;
             case ELEMENT_TYPE_R8:
@@ -1693,17 +1701,23 @@ MarshalInfo::MarshalInfo(Module* pModule,
             break;
 
         case ELEMENT_TYPE_PTR:
+        {
             if (nativeType != NATIVE_TYPE_DEFAULT)
             {
                 m_resID = IDS_EE_BADMARSHAL_PTR;
                 IfFailGoto(E_FAIL, lFail);
             }
-#ifdef TARGET_64BIT
-            m_type = MARSHAL_TYPE_GENERIC_8;
-#else
-            m_type = MARSHAL_TYPE_GENERIC_4;
-#endif
+
+            SigPointer sigtmp = sig;
+            BYTE ptrByte;
+            sigtmp.SkipCustomModifiers();
+            sigtmp.GetByte(&ptrByte);
+            _ASSERTE(ptrByte == ELEMENT_TYPE_PTR);
+            TypeHandle sigTH = sigtmp.GetTypeHandleThrowing(pModule, pTypeContext);
+            m_args.m_pMT = sigTH.GetMethodTable();
+            m_type = MARSHAL_TYPE_POINTER;
             break;
+        }
 
         case ELEMENT_TYPE_FNPTR:
             if (!(nativeType == NATIVE_TYPE_FUNC || nativeType == NATIVE_TYPE_DEFAULT))
@@ -2369,6 +2383,7 @@ MarshalInfo::MarshalInfo(Module* pModule,
                     {
                         if (fNeedsCopyCtor && !IsFieldScenario()) // We don't support automatically discovering copy constructors for fields.
                         {
+#if defined(FEATURE_IJW)
                             MethodDesc *pCopyCtor;
                             MethodDesc *pDtor;
                             FindCopyCtor(pModule, m_pMT, &pCopyCtor);
@@ -2378,6 +2393,10 @@ MarshalInfo::MarshalInfo(Module* pModule,
                             m_args.mm.m_pCopyCtor = pCopyCtor;
                             m_args.mm.m_pDtor = pDtor;
                             m_type = MARSHAL_TYPE_BLITTABLEVALUECLASSWITHCOPYCTOR;
+#else // !defined(FEATURE_IJW)
+                            m_resID = IDS_EE_BADMARSHAL_BADMANAGED;
+                            IfFailGoto(E_FAIL, lFail);
+#endif // defined(FEATURE_IJW)
                         }
                         else
                         {
@@ -3107,7 +3126,9 @@ bool MarshalInfo::IsValueClass(MarshalType mtype)
     {
     case MARSHAL_TYPE_BLITTABLEVALUECLASS:
     case MARSHAL_TYPE_VALUECLASS:
+#if defined(FEATURE_IJW)
     case MARSHAL_TYPE_BLITTABLEVALUECLASSWITHCOPYCTOR:
+#endif // defined(FEATURE_IJW)
         return true;
 
     default:
@@ -3591,7 +3612,9 @@ DispParamMarshaler *MarshalInfo::GenerateDispParamMarshaler()
         case MARSHAL_TYPE_BLITTABLEVALUECLASS:
         case MARSHAL_TYPE_BLITTABLEPTR:
         case MARSHAL_TYPE_LAYOUTCLASSPTR:
+#if defined(FEATURE_IJW)
         case MARSHAL_TYPE_BLITTABLEVALUECLASSWITHCOPYCTOR:
+#endif // defined(FEATURE_IJW)
             pDispParamMarshaler = new DispParamRecordMarshaler(m_pMT);
             break;
 
diff --git a/src/coreclr/vm/mtypes.h b/src/coreclr/vm/mtypes.h
index 0cbc86c1a85f..628ded5f4af9 100644
--- a/src/coreclr/vm/mtypes.h
+++ b/src/coreclr/vm/mtypes.h
@@ -79,7 +79,10 @@ DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_VALUECLASS,                      ValueClassMa
 
 DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_REFERENCECUSTOMMARSHALER,        ReferenceCustomMarshaler)
 DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_ARGITERATOR,                     ArgIteratorMarshaler)
+
+#if defined(FEATURE_IJW)
 DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_BLITTABLEVALUECLASSWITHCOPYCTOR, BlittableValueClassWithCopyCtorMarshaler)
+#endif // defined(FEATURE_IJW)
 
 #ifdef FEATURE_COMINTEROP
 DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_OBJECT,                          ObjectMarshaler)
@@ -103,4 +106,6 @@ DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_FIXED_CSTR,                      FixedCSTRMar
 DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_BLITTABLE_LAYOUTCLASS,           BlittableLayoutClassMarshaler)
 DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_LAYOUTCLASS,                     LayoutClassMarshaler)
 
+DEFINE_MARSHALER_TYPE(MARSHAL_TYPE_POINTER,                         PointerMarshaler)
+
 #undef DEFINE_MARSHALER_TYPE
diff --git a/src/coreclr/vm/nativeeventsource.cpp b/src/coreclr/vm/nativeeventsource.cpp
index b4bca0355e6c..a8a160eaf620 100644
--- a/src/coreclr/vm/nativeeventsource.cpp
+++ b/src/coreclr/vm/nativeeventsource.cpp
@@ -34,6 +34,22 @@ extern "C" BOOL QCALLTYPE IsEventSourceLoggingEnabled()
     return retVal;
 }
 
+extern "C" LPWSTR QCALLTYPE EventSource_GetClrConfig(LPCWSTR configName)
+{
+    QCALL_CONTRACT;
+
+    LPWSTR ret = NULL;
+
+    BEGIN_QCALL;
+    CLRConfig::ConfigStringInfo info;
+    info.name = configName;
+    info.options = CLRConfig::LookupOptions::Default;
+    ret = CLRConfig::GetConfigValue(info);
+    END_QCALL;
+
+    return ret;
+}
+
 #endif //defined(FEATURE_EVENTSOURCE_XPLAT)
 
 #ifdef FEATURE_PERFTRACING
@@ -44,7 +60,7 @@ extern "C" BOOL QCALLTYPE IsEventSourceLoggingEnabled()
 // change genRuntimeEventSources.py script to not emit the body that throws NotImplementedException for the event that
 // want to be fired from managed code.
 // See https://github.com/dotnet/runtime/pull/47829 for an example of how to do this.
-extern "C" void QCALLTYPE LogThreadPoolWorkerThreadStart(_In_z_ uint activeWorkerThreadCount, _In_z_ uint retiredWorkerThreadCount, _In_z_ short clrInstanceID)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadStart(_In_z_ uint activeWorkerThreadCount, _In_z_ uint retiredWorkerThreadCount, _In_z_ short clrInstanceID)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
@@ -54,7 +70,7 @@ extern "C" void QCALLTYPE LogThreadPoolWorkerThreadStart(_In_z_ uint activeWorke
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogThreadPoolWorkerThreadStop(_In_z_ uint activeWorkerThreadCount, _In_z_ uint retiredWorkerThreadCount, _In_z_ short clrInstanceID)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadStop(_In_z_ uint activeWorkerThreadCount, _In_z_ uint retiredWorkerThreadCount, _In_z_ short clrInstanceID)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
@@ -64,7 +80,7 @@ extern "C" void QCALLTYPE LogThreadPoolWorkerThreadStop(_In_z_ uint activeWorker
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogThreadPoolWorkerThreadWait(_In_z_ uint activeWorkerThreadCount, _In_z_ uint retiredWorkerThreadCount, _In_z_ short clrInstanceID)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadWait(_In_z_ uint activeWorkerThreadCount, _In_z_ uint retiredWorkerThreadCount, _In_z_ short clrInstanceID)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
@@ -74,7 +90,7 @@ extern "C" void QCALLTYPE LogThreadPoolWorkerThreadWait(_In_z_ uint activeWorker
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogThreadPoolMinMaxThreads(_In_z_ short minWorkerThreads, _In_z_ short maxWorkerThreads, _In_z_ short minIOCompletionThreads, _In_z_ short maxIOCompletionThreads, _In_z_ short clrInstanceID)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolMinMaxThreads(_In_z_ short minWorkerThreads, _In_z_ short maxWorkerThreads, _In_z_ short minIOCompletionThreads, _In_z_ short maxIOCompletionThreads, _In_z_ short clrInstanceID)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
@@ -84,7 +100,7 @@ extern "C" void QCALLTYPE LogThreadPoolMinMaxThreads(_In_z_ short minWorkerThrea
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogThreadPoolWorkerThreadAdjustmentSample(_In_z_ double throughput, _In_z_ short clrInstanceID)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentSample(_In_z_ double throughput, _In_z_ short clrInstanceID)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
@@ -94,7 +110,7 @@ extern "C" void QCALLTYPE LogThreadPoolWorkerThreadAdjustmentSample(_In_z_ doubl
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogThreadPoolWorkerThreadAdjustmentAdjustment(_In_z_ double averageThroughput, _In_z_ uint newWorkerThreadCount, _In_z_ uint reason, _In_z_ short clrInstanceID)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentAdjustment(_In_z_ double averageThroughput, _In_z_ uint newWorkerThreadCount, _In_z_ uint reason, _In_z_ short clrInstanceID)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
@@ -104,7 +120,7 @@ extern "C" void QCALLTYPE LogThreadPoolWorkerThreadAdjustmentAdjustment(_In_z_ d
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogThreadPoolWorkerThreadAdjustmentStats(_In_z_ double duration, _In_z_ double throughput, _In_z_ double threadWave, _In_z_ double throughputWave, _In_z_ double throughputErrorEstimate, _In_z_ double AverageThroughputErrorEstimate, _In_z_ double ThroughputRatio, _In_z_ double confidence, _In_z_ double newControlSetting, _In_z_ short newThreadWaveMagnitude, _In_z_ short ClrInstanceID)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentStats(_In_z_ double duration, _In_z_ double throughput, _In_z_ double threadWave, _In_z_ double throughputWave, _In_z_ double throughputErrorEstimate, _In_z_ double AverageThroughputErrorEstimate, _In_z_ double ThroughputRatio, _In_z_ double confidence, _In_z_ double newControlSetting, _In_z_ short newThreadWaveMagnitude, _In_z_ short ClrInstanceID)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
@@ -114,7 +130,7 @@ extern "C" void QCALLTYPE LogThreadPoolWorkerThreadAdjustmentStats(_In_z_ double
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogThreadPoolIOEnqueue(_In_z_ void* nativeOverlapped, _In_z_ void* overlapped, _In_z_ bool multiDequeues, _In_z_ short ClrInstanceID)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolIOEnqueue(_In_z_ void* nativeOverlapped, _In_z_ void* overlapped, _In_z_ bool multiDequeues, _In_z_ short ClrInstanceID)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
@@ -124,7 +140,7 @@ extern "C" void QCALLTYPE LogThreadPoolIOEnqueue(_In_z_ void* nativeOverlapped,
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogThreadPoolIODequeue(_In_z_ void* nativeOverlapped, _In_z_ void* overlapped, _In_z_ short ClrInstanceID)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolIODequeue(_In_z_ void* nativeOverlapped, _In_z_ void* overlapped, _In_z_ short ClrInstanceID)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
@@ -134,7 +150,7 @@ extern "C" void QCALLTYPE LogThreadPoolIODequeue(_In_z_ void* nativeOverlapped,
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogThreadPoolWorkingThreadCount(_In_z_ uint count, _In_z_ short ClrInstanceID)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkingThreadCount(_In_z_ uint count, _In_z_ short ClrInstanceID)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
@@ -144,7 +160,7 @@ extern "C" void QCALLTYPE LogThreadPoolWorkingThreadCount(_In_z_ uint count, _In
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogThreadPoolIOPack(_In_z_ void* nativeOverlapped, _In_z_ void* overlapped, _In_z_ short ClrInstanceID)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolIOPack(_In_z_ void* nativeOverlapped, _In_z_ void* overlapped, _In_z_ short ClrInstanceID)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
@@ -154,7 +170,7 @@ extern "C" void QCALLTYPE LogThreadPoolIOPack(_In_z_ void* nativeOverlapped, _In
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogContentionLockCreated(void* LockID, void* AssociatedObjectID, uint16_t ClrInstanceID)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogContentionLockCreated(void* LockID, void* AssociatedObjectID, uint16_t ClrInstanceID)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
@@ -164,7 +180,7 @@ extern "C" void QCALLTYPE LogContentionLockCreated(void* LockID, void* Associate
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogContentionStart(
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogContentionStart(
     uint8_t ContentionFlags,
     uint16_t ClrInstanceID,
     void* LockID,
@@ -179,7 +195,7 @@ extern "C" void QCALLTYPE LogContentionStart(
     END_QCALL;
 }
 
-extern "C" void QCALLTYPE LogContentionStop(uint8_t ContentionFlags, uint16_t ClrInstanceID, double DurationNs)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogContentionStop(uint8_t ContentionFlags, uint16_t ClrInstanceID, double DurationNs)
 {
     QCALL_CONTRACT;
     BEGIN_QCALL;
diff --git a/src/coreclr/vm/nativeeventsource.h b/src/coreclr/vm/nativeeventsource.h
index a407c4b5ebb1..9f98001d4643 100644
--- a/src/coreclr/vm/nativeeventsource.h
+++ b/src/coreclr/vm/nativeeventsource.h
@@ -14,23 +14,27 @@
 
 #include "qcall.h"
 
-#if defined(FEATURE_PERFTRACING)
+#if defined(FEATURE_EVENTSOURCE_XPLAT)
 extern "C" void QCALLTYPE LogEventSource(_In_z_ int eventID, _In_z_ LPCWSTR eventName, _In_z_ LPCWSTR eventSourceName, _In_z_ LPCWSTR payload);
 extern "C" BOOL QCALLTYPE IsEventSourceLoggingEnabled();
-extern "C" void QCALLTYPE LogThreadPoolWorkerThreadStart(_In_z_ uint activeWorkerThreadCount, _In_z_ uint retiredWorkerThreadCount, _In_z_ short clrInstanceID);
-extern "C" void QCALLTYPE LogThreadPoolWorkerThreadStop(_In_z_ uint activeWorkerThreadCount, _In_z_ uint retiredWorkerThreadCount, _In_z_ short clrInstanceID);
-extern "C" void QCALLTYPE LogThreadPoolWorkerThreadWait(_In_z_ uint activeWorkerThreadCount, _In_z_ uint retiredWorkerThreadCount, _In_z_ short clrInstanceID);
-extern "C" void QCALLTYPE LogThreadPoolMinMaxThreads(_In_z_ short minWorkerThreads, _In_z_ short maxWorkerThreads, _In_z_ short minIOCompletionThreads, _In_z_ short maxIOCompletionThreads, _In_z_ short clrInstanceID);
-extern "C" void QCALLTYPE LogThreadPoolWorkerThreadAdjustmentSample(_In_z_ double throughput, _In_z_ short clrInstanceID);
-extern "C" void QCALLTYPE LogThreadPoolWorkerThreadAdjustmentAdjustment(_In_z_ double averageThroughput, _In_z_ uint newWorkerThreadCount, _In_z_ uint reason, _In_z_ short clrInstanceID);
-extern "C" void QCALLTYPE LogThreadPoolWorkerThreadAdjustmentStats(_In_z_ double duration, _In_z_ double throughput, _In_z_ double threadWave, _In_z_ double throughputWave, _In_z_ double throughputErrorEstimate, _In_z_ double AverageThroughputErrorEstimate, _In_z_ double ThroughputRatio, _In_z_ double confidence, _In_z_ double newControlSetting, _In_z_ short newThreadWaveMagnitude, _In_z_ short ClrInstanceID);
-extern "C" void QCALLTYPE LogThreadPoolIOEnqueue(_In_z_ void* nativeOverlapped, _In_z_ void* overlapped, _In_z_ bool multiDequeues, _In_z_ short ClrInstanceID);
-extern "C" void QCALLTYPE LogThreadPoolIODequeue(_In_z_ void* nativeOverlapped, _In_z_ void* overlapped, _In_z_ short ClrInstanceID);
-extern "C" void QCALLTYPE LogThreadPoolWorkingThreadCount(_In_z_ uint count, _In_z_ short ClrInstanceID);
-extern "C" void QCALLTYPE LogThreadPoolIOPack(_In_z_ void* nativeOverlapped, _In_z_ void* overlapped, _In_z_ short ClrInstanceID);
-extern "C" void QCALLTYPE LogContentionLockCreated(void* LockID, void* AssociatedObjectID, uint16_t ClrInstanceID);
-extern "C" void QCALLTYPE LogContentionStart(uint8_t ContentionFlags, uint16_t ClrInstanceID, void* LockID, void* AssociatedObjectID, uint64_t LockOwnerThreadID);
-extern "C" void QCALLTYPE LogContentionStop(uint8_t ContentionFlags, uint16_t ClrInstanceID, double DurationNs);
+extern "C" LPWSTR QCALLTYPE EventSource_GetClrConfig(LPCWSTR configName);
+#endif //defined(FEATURE_EVENTSOURCE_XPLAT)
+
+#if defined(FEATURE_PERFTRACING)
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadStart(_In_z_ uint activeWorkerThreadCount, _In_z_ uint retiredWorkerThreadCount, _In_z_ short clrInstanceID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadStop(_In_z_ uint activeWorkerThreadCount, _In_z_ uint retiredWorkerThreadCount, _In_z_ short clrInstanceID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadWait(_In_z_ uint activeWorkerThreadCount, _In_z_ uint retiredWorkerThreadCount, _In_z_ short clrInstanceID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolMinMaxThreads(_In_z_ short minWorkerThreads, _In_z_ short maxWorkerThreads, _In_z_ short minIOCompletionThreads, _In_z_ short maxIOCompletionThreads, _In_z_ short clrInstanceID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentSample(_In_z_ double throughput, _In_z_ short clrInstanceID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentAdjustment(_In_z_ double averageThroughput, _In_z_ uint newWorkerThreadCount, _In_z_ uint reason, _In_z_ short clrInstanceID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentStats(_In_z_ double duration, _In_z_ double throughput, _In_z_ double threadWave, _In_z_ double throughputWave, _In_z_ double throughputErrorEstimate, _In_z_ double AverageThroughputErrorEstimate, _In_z_ double ThroughputRatio, _In_z_ double confidence, _In_z_ double newControlSetting, _In_z_ short newThreadWaveMagnitude, _In_z_ short ClrInstanceID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolIOEnqueue(_In_z_ void* nativeOverlapped, _In_z_ void* overlapped, _In_z_ bool multiDequeues, _In_z_ short ClrInstanceID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolIODequeue(_In_z_ void* nativeOverlapped, _In_z_ void* overlapped, _In_z_ short ClrInstanceID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolWorkingThreadCount(_In_z_ uint count, _In_z_ short ClrInstanceID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogThreadPoolIOPack(_In_z_ void* nativeOverlapped, _In_z_ void* overlapped, _In_z_ short ClrInstanceID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogContentionLockCreated(void* LockID, void* AssociatedObjectID, uint16_t ClrInstanceID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogContentionStart(uint8_t ContentionFlags, uint16_t ClrInstanceID, void* LockID, void* AssociatedObjectID, uint64_t LockOwnerThreadID);
+extern "C" void QCALLTYPE NativeRuntimeEventSource_LogContentionStop(uint8_t ContentionFlags, uint16_t ClrInstanceID, double DurationNs);
 #endif // defined(FEATURE_PERFTRACING)
 
 #endif //_NATIVEEVENTSOURCE_H_
diff --git a/src/coreclr/vm/object.cpp b/src/coreclr/vm/object.cpp
index 213a9cde5925..2207b728ce1e 100644
--- a/src/coreclr/vm/object.cpp
+++ b/src/coreclr/vm/object.cpp
@@ -1636,7 +1636,12 @@ OBJECTREF Nullable::Box(void* srcPtr, MethodTable* nullableMT)
     OBJECTREF obj = 0;
     GCPROTECT_BEGININTERIOR (src);
     MethodTable* argMT = nullableMT->GetInstantiation()[0].AsMethodTable();
-    obj = argMT->Allocate();
+
+    // MethodTable::Allocate() triggers cctors, so to avoid that we
+    // allocate directly without triggering cctors - boxing should not trigger cctors.
+    argMT->EnsureInstanceActive();
+    obj = AllocateObject(argMT);
+
     CopyValueClass(obj->UnBox(), src->ValueAddr(nullableMT), argMT);
     GCPROTECT_END ();
 
diff --git a/src/coreclr/vm/object.h b/src/coreclr/vm/object.h
index 91bbc95304ce..c0774cae0d81 100644
--- a/src/coreclr/vm/object.h
+++ b/src/coreclr/vm/object.h
@@ -1130,6 +1130,7 @@ class ReflectFieldObject : public BaseObjectWithCachedData
     INT32               m_empty2;
     OBJECTREF           m_empty3;
     OBJECTREF           m_empty4;
+    OBJECTREF           m_empty5;
     FieldDesc *         m_pFD;
 
 public:
@@ -1193,15 +1194,7 @@ class ReflectModuleBaseObject : public Object
 NOINLINE ReflectModuleBaseObject* GetRuntimeModuleHelper(LPVOID __me, Module *pModule, OBJECTREF keepAlive);
 #define FC_RETURN_MODULE_OBJECT(pModule, refKeepAlive) FC_INNER_RETURN(ReflectModuleBaseObject*, GetRuntimeModuleHelper(__me, pModule, refKeepAlive))
 
-class SafeHandle;
 
-#ifdef USE_CHECKED_OBJECTREFS
-typedef REF<SafeHandle> SAFEHANDLE;
-typedef REF<SafeHandle> SAFEHANDLEREF;
-#else // USE_CHECKED_OBJECTREFS
-typedef SafeHandle * SAFEHANDLE;
-typedef SafeHandle * SAFEHANDLEREF;
-#endif // USE_CHECKED_OBJECTREFS
 
 
 
@@ -1241,19 +1234,6 @@ typedef CultureInfoBaseObject*     CULTUREINFOBASEREF;
 typedef PTR_ArrayBase ARRAYBASEREF;
 #endif
 
-// Note that the name must always be "" or "en-US".  Other cases and nulls
-// aren't allowed (we already checked.)
-__inline bool IsCultureEnglishOrInvariant(LPCWSTR localeName)
-{
-    LIMITED_METHOD_CONTRACT;
-    if (localeName != NULL &&
-        (localeName[0] == W('\0') ||
-         u16_strcmp(localeName, W("en-US")) == 0))
-    {
-        return true;
-    }
-    return false;
-    }
 
 class CultureInfoBaseObject : public Object
 {
@@ -1865,110 +1845,6 @@ typedef BStrWrapper*     BSTRWRAPPEROBJECTREF;
 
 #endif // FEATURE_COMINTEROP
 
-class SafeHandle : public Object
-{
-    friend class CoreLibBinder;
-
-  private:
-    // READ ME:
-    //   Modifying the order or fields of this object may require
-    //   other changes to the classlib class definition of this
-    //   object or special handling when loading this system class.
-#if DEBUG
-    STRINGREF m_ctorStackTrace; // Debug-only stack trace captured when the SafeHandle was constructed
-#endif
-    Volatile<LPVOID> m_handle;
-    Volatile<INT32> m_state;        // Combined ref count and closed/disposed state (for atomicity)
-    Volatile<CLR_BOOL> m_ownsHandle;
-    Volatile<CLR_BOOL> m_fullyInitialized;  // Did constructor finish?
-
-    // Describe the bits in the m_state field above.
-    enum StateBits
-    {
-        SH_State_Closed     = 0x00000001,
-        SH_State_Disposed   = 0x00000002,
-        SH_State_RefCount   = 0xfffffffc,
-        SH_RefCountOne      = 4,            // Amount to increment state field to yield a ref count increment of 1
-    };
-
-    static WORD s_IsInvalidHandleMethodSlot;
-    static WORD s_ReleaseHandleMethodSlot;
-
-    static void RunReleaseMethod(SafeHandle* psh);
-    BOOL IsFullyInitialized() const { LIMITED_METHOD_CONTRACT; return m_fullyInitialized; }
-
-  public:
-    static void Init();
-
-    // To use the SafeHandle from native, look at the SafeHandleHolder, which
-    // will do the AddRef & Release for you.
-    LPVOID GetHandle() const {
-        LIMITED_METHOD_CONTRACT;
-        _ASSERTE(((unsigned int) m_state) >= SH_RefCountOne);
-        return m_handle;
-    }
-
-    void AddRef();
-    void Release(bool fDispose = false);
-    void SetHandle(LPVOID handle);
-};
-
-void AcquireSafeHandle(SAFEHANDLEREF* s);
-void ReleaseSafeHandle(SAFEHANDLEREF* s);
-
-typedef Holder<SAFEHANDLEREF*, AcquireSafeHandle, ReleaseSafeHandle> SafeHandleHolder;
-
-class CriticalHandle : public Object
-{
-    friend class CoreLibBinder;
-
-  private:
-    // READ ME:
-    //   Modifying the order or fields of this object may require
-    //   other changes to the classlib class definition of this
-    //   object or special handling when loading this system class.
-    Volatile<LPVOID> m_handle;
-    Volatile<CLR_BOOL> m_isClosed;
-
-  public:
-    LPVOID GetHandle() const { LIMITED_METHOD_CONTRACT; return m_handle; }
-    static size_t GetHandleOffset() { LIMITED_METHOD_CONTRACT; return offsetof(CriticalHandle, m_handle); }
-
-    void SetHandle(LPVOID handle) { LIMITED_METHOD_CONTRACT; m_handle = handle; }
-};
-
-
-#ifdef USE_CHECKED_OBJECTREFS
-typedef REF<CriticalHandle> CRITICALHANDLE;
-typedef REF<CriticalHandle> CRITICALHANDLEREF;
-#else // USE_CHECKED_OBJECTREFS
-typedef CriticalHandle * CRITICALHANDLE;
-typedef CriticalHandle * CRITICALHANDLEREF;
-#endif // USE_CHECKED_OBJECTREFS
-
-// WaitHandleBase
-// Base class for WaitHandle
-class WaitHandleBase :public MarshalByRefObjectBaseObject
-{
-    friend class CoreLibBinder;
-
-public:
-    __inline LPVOID GetWaitHandle() {
-        LIMITED_METHOD_CONTRACT;
-        SAFEHANDLEREF safeHandle = (SAFEHANDLEREF)m_safeHandle.LoadWithoutBarrier();
-        return safeHandle != NULL ? safeHandle->GetHandle() : INVALID_HANDLE_VALUE;
-    }
-    __inline SAFEHANDLEREF GetSafeHandle() {LIMITED_METHOD_CONTRACT; return (SAFEHANDLEREF)m_safeHandle.LoadWithoutBarrier();}
-
-private:
-    Volatile<SafeHandle*> m_safeHandle;
-};
-
-#ifdef USE_CHECKED_OBJECTREFS
-typedef REF<WaitHandleBase> WAITHANDLEREF;
-#else // USE_CHECKED_OBJECTREFS
-typedef WaitHandleBase* WAITHANDLEREF;
-#endif // USE_CHECKED_OBJECTREFS
 
 // This class corresponds to System.MulticastDelegate on the managed side.
 class DelegateObject : public Object
diff --git a/src/coreclr/vm/object.inl b/src/coreclr/vm/object.inl
index 7f58c122097b..491aab1d4c87 100644
--- a/src/coreclr/vm/object.inl
+++ b/src/coreclr/vm/object.inl
@@ -91,7 +91,7 @@ inline void Object::EnumMemoryRegions(void)
     // Unfortunately, DacEnumMemoryRegion takes only ULONG32 as size argument
     while (size > 0) {
         // Use 0x10000000 instead of MAX_ULONG32 so that the chunks stays aligned
-        SIZE_T chunk = min(size, 0x10000000);
+        SIZE_T chunk = min(size, (SIZE_T)0x10000000);
         // If for any reason we can't enumerate the memory, stop.  This would generally mean
         // that we have target corruption, or that the target is executing, etc.
         if (!DacEnumMemoryRegion(ptr, chunk))
diff --git a/src/coreclr/vm/olevariant.cpp b/src/coreclr/vm/olevariant.cpp
index 888ebdd380da..40e039a1648e 100644
--- a/src/coreclr/vm/olevariant.cpp
+++ b/src/coreclr/vm/olevariant.cpp
@@ -2567,17 +2567,34 @@ void OleVariant::MarshalRecordVariantOleToCom(VARIANT *pOleVariant,
     if (!pRecInfo)
         COMPlusThrow(kArgumentException, IDS_EE_INVALID_OLE_VARIANT);
 
+    LPVOID pvRecord = V_RECORD(pOleVariant);
+    if (pvRecord == NULL)
+    {
+        pComVariant->SetObjRef(NULL);
+        return;
+    }
+
+    MethodTable* pValueClass = NULL;
+    {
+        GCX_PREEMP();
+        pValueClass = GetMethodTableForRecordInfo(pRecInfo);
+    }
+
+    if (pValueClass == NULL)
+    {
+        // This value type should have been registered through
+        // a TLB. CoreCLR doesn't support dynamic type mapping.
+        COMPlusThrow(kArgumentException, IDS_EE_CANNOT_MAP_TO_MANAGED_VC);
+    }
+    _ASSERTE(pValueClass->IsBlittable());
+
     OBJECTREF BoxedValueClass = NULL;
     GCPROTECT_BEGIN(BoxedValueClass)
     {
-        LPVOID pvRecord = V_RECORD(pOleVariant);
-        if (pvRecord)
-        {
-            // This value type should have been registered through
-            // a TLB. CoreCLR doesn't support dynamic type mapping.
-            COMPlusThrow(kArgumentException, IDS_EE_CANNOT_MAP_TO_MANAGED_VC);
-        }
-
+        // Now that we have a blittable value class, allocate an instance of the
+        // boxed value class and copy the contents of the record into it.
+        BoxedValueClass = AllocateObject(pValueClass);
+        memcpyNoGCRefs(BoxedValueClass->GetData(), (BYTE*)pvRecord, pValueClass->GetNativeSize());
         pComVariant->SetObjRef(BoxedValueClass);
     }
     GCPROTECT_END();
diff --git a/src/coreclr/vm/perfmap.cpp b/src/coreclr/vm/perfmap.cpp
index 4ede16efca07..d032dc6031dc 100644
--- a/src/coreclr/vm/perfmap.cpp
+++ b/src/coreclr/vm/perfmap.cpp
@@ -46,7 +46,11 @@ void PerfMap::Initialize()
 
 const char * PerfMap::InternalConstructPath()
 {
+#ifdef HOST_WINDOWS
     CLRConfigNoCache value = CLRConfigNoCache::Get("PerfMapJitDumpPath");
+#else
+    CLRConfigNoCache value = CLRConfigNoCache::Get("PerfMapJitDumpPath", /* noPrefix */ false, &PAL_getenv);
+#endif
     if (value.IsSet())
     {
         return value.AsString();
diff --git a/src/coreclr/vm/pgo.cpp b/src/coreclr/vm/pgo.cpp
index 20b060d2cb4a..c42a2236b3b9 100644
--- a/src/coreclr/vm/pgo.cpp
+++ b/src/coreclr/vm/pgo.cpp
@@ -205,7 +205,7 @@ void PgoManager::WritePgoData()
         return;
     }
 
-    FILE* const pgoDataFile = _wfopen(fileName, W("w"));
+    FILE* const pgoDataFile = _wfopen(fileName, W("wb"));
 
     if (pgoDataFile == NULL)
     {
@@ -238,7 +238,7 @@ void PgoManager::WritePgoData()
         unsigned lastOffset  = 0;
         auto lambda = [data, pgoDataFile] (const ICorJitInfo::PgoInstrumentationSchema &schema)
         {
-            fprintf(pgoDataFile, s_RecordString, schema.InstrumentationKind, schema.ILOffset, schema.Count, schema.Other);
+            fprintf(pgoDataFile, s_RecordString, (unsigned int)schema.InstrumentationKind, schema.ILOffset, schema.Count, schema.Other);
             for (int32_t iEntry = 0; iEntry < schema.Count; iEntry++)
             {
                 size_t entryOffset = schema.Offset + iEntry * InstrumentationKindToSize(schema.InstrumentationKind);
@@ -367,7 +367,7 @@ void PgoManager::ReadPgoData()
         return;
     }
 
-    FILE* const pgoDataFile = _wfopen(fileName, W("r"));
+    FILE* const pgoDataFile = _wfopen(fileName, W("rb"));
 
     if (pgoDataFile == NULL)
     {
diff --git a/src/coreclr/vm/prestub.cpp b/src/coreclr/vm/prestub.cpp
index 7df5865af792..64638b96cddf 100644
--- a/src/coreclr/vm/prestub.cpp
+++ b/src/coreclr/vm/prestub.cpp
@@ -1116,6 +1116,7 @@ namespace
             : Kind{ kind }
             , Declaration{ pMD }
             , DeclarationSig{ pMD }
+            , TargetTypeSig{}
             , TargetType{}
             , IsTargetStatic{ false }
             , TargetMethod{}
@@ -1125,13 +1126,14 @@ namespace
         UnsafeAccessorKind Kind;
         MethodDesc* Declaration;
         MetaSig DeclarationSig;
+        SigPointer TargetTypeSig;
         TypeHandle TargetType;
         bool IsTargetStatic;
         MethodDesc* TargetMethod;
         FieldDesc* TargetField;
     };
 
-    TypeHandle ValidateTargetType(TypeHandle targetTypeMaybe)
+    TypeHandle ValidateTargetType(TypeHandle targetTypeMaybe, CorElementType targetFromSig)
     {
         TypeHandle targetType = targetTypeMaybe.IsByRef()
             ? targetTypeMaybe.GetTypeParam()
@@ -1142,6 +1144,12 @@ namespace
         if (targetType.IsTypeDesc())
             ThrowHR(COR_E_BADIMAGEFORMAT, BFA_INVALID_UNSAFEACCESSOR);
 
+        // We do not support generic signature types as valid targets.
+        if (targetFromSig == ELEMENT_TYPE_VAR || targetFromSig == ELEMENT_TYPE_MVAR)
+        {
+            ThrowHR(COR_E_BADIMAGEFORMAT, BFA_INVALID_UNSAFEACCESSOR);
+        }
+
         return targetType;
     }
 
@@ -1167,16 +1175,29 @@ namespace
         ModuleBase* pModule2 = method->GetModule();
         const Substitution* pSubst2 = NULL;
 
+        //
+        // Parsing the signature follows details defined in ECMA-335 - II.23.2.1
+        //
+
         // Validate calling convention
         if ((*pSig1 & IMAGE_CEE_CS_CALLCONV_MASK) != (*pSig2 & IMAGE_CEE_CS_CALLCONV_MASK))
         {
             return false;
         }
 
-        BYTE callConv = *pSig1;
+        BYTE callConvDecl = *pSig1;
+        BYTE callConvMethod = *pSig2;
         pSig1++;
         pSig2++;
 
+        // Handle generic param count
+        DWORD declGenericCount = 0;
+        DWORD methodGenericCount = 0;
+        if (callConvDecl & IMAGE_CEE_CS_CALLCONV_GENERIC)
+            IfFailThrow(CorSigUncompressData_EndPtr(pSig1, pEndSig1, &declGenericCount));
+        if (callConvMethod & IMAGE_CEE_CS_CALLCONV_GENERIC)
+            IfFailThrow(CorSigUncompressData_EndPtr(pSig2, pEndSig2, &methodGenericCount));
+
         DWORD declArgCount;
         DWORD methodArgCount;
         IfFailThrow(CorSigUncompressData_EndPtr(pSig1, pEndSig1, &declArgCount));
@@ -1250,6 +1271,74 @@ namespace
         return true;
     }
 
+    void VerifyDeclarationSatisfiesTargetConstraints(MethodDesc* declaration, MethodTable* targetType, MethodDesc* targetMethod)
+    {
+        CONTRACTL
+        {
+            STANDARD_VM_CHECK;
+            PRECONDITION(declaration != NULL);
+            PRECONDITION(targetType != NULL);
+            PRECONDITION(targetMethod != NULL);
+        }
+        CONTRACTL_END;
+
+        // If the target method has no generic parameters there is nothing to verify
+        if (!targetMethod->HasClassOrMethodInstantiation())
+            return;
+
+        // Construct a context for verifying target's constraints are
+        // satisfied by the declaration.
+        Instantiation declClassInst;
+        Instantiation declMethodInst;
+        Instantiation targetClassInst;
+        Instantiation targetMethodInst;
+        if (targetType->HasInstantiation())
+        {
+            declClassInst = declaration->GetMethodTable()->GetInstantiation();
+            targetClassInst = targetType->GetTypicalMethodTable()->GetInstantiation();
+        }
+        if (targetMethod->HasMethodInstantiation())
+        {
+            declMethodInst = declaration->LoadTypicalMethodDefinition()->GetMethodInstantiation();
+            targetMethodInst = targetMethod->LoadTypicalMethodDefinition()->GetMethodInstantiation();
+        }
+
+        SigTypeContext typeContext;
+        SigTypeContext::InitTypeContext(declClassInst, declMethodInst, &typeContext);
+
+        InstantiationContext instContext{ &typeContext };
+
+        //
+        // Validate constraints on Type parameters
+        //
+        DWORD typeParamCount = targetClassInst.GetNumArgs();
+        if (typeParamCount != declClassInst.GetNumArgs())
+            COMPlusThrow(kInvalidProgramException, W("Argument_GenTypeConstraintsNotEqual"));
+
+        for (DWORD i = 0; i < typeParamCount; ++i)
+        {
+            TypeHandle arg = declClassInst[i];
+            TypeVarTypeDesc* param = targetClassInst[i].AsGenericVariable();
+            if (!param->SatisfiesConstraints(&typeContext, arg, &instContext))
+                COMPlusThrow(kInvalidProgramException, W("Argument_GenTypeConstraintsNotEqual"));
+        }
+
+        //
+        // Validate constraints on Method parameters
+        //
+        DWORD methodParamCount = targetMethodInst.GetNumArgs();
+        if (methodParamCount != declMethodInst.GetNumArgs())
+            COMPlusThrow(kInvalidProgramException, W("Argument_GenMethodConstraintsNotEqual"));
+
+        for (DWORD i = 0; i < methodParamCount; ++i)
+        {
+            TypeHandle arg = declMethodInst[i];
+            TypeVarTypeDesc* param = targetMethodInst[i].AsGenericVariable();
+            if (!param->SatisfiesConstraints(&typeContext, arg, &instContext))
+                COMPlusThrow(kInvalidProgramException, W("Argument_GenMethodConstraintsNotEqual"));
+        }
+    }
+
     bool TrySetTargetMethod(
         GenerationContext& cxt,
         LPCUTF8 methodName,
@@ -1264,11 +1353,13 @@ namespace
         TypeHandle targetType = cxt.TargetType;
         _ASSERTE(!targetType.IsTypeDesc());
 
+        MethodTable* pMT = targetType.AsMethodTable();
+
         MethodDesc* targetMaybe = NULL;
 
         // Following a similar iteration pattern found in MemberLoader::FindMethod().
         // However, we are only operating on the current type not walking the type hierarchy.
-        MethodTable::IntroducedMethodIterator iter(targetType.AsMethodTable());
+        MethodTable::IntroducedMethodIterator iter(pMT);
         for (; iter.IsValid(); iter.Next())
         {
             MethodDesc* curr = iter.GetMethodDesc();
@@ -1304,6 +1395,9 @@ namespace
             targetMaybe = curr;
         }
 
+        if (targetMaybe != NULL)
+            VerifyDeclarationSatisfiesTargetConstraints(cxt.Declaration, pMT, targetMaybe);
+
         cxt.TargetMethod = targetMaybe;
         return cxt.TargetMethod != NULL;
     }
@@ -1321,19 +1415,47 @@ namespace
         TypeHandle targetType = cxt.TargetType;
         _ASSERTE(!targetType.IsTypeDesc());
 
+        MethodTable* pMT = targetType.AsMethodTable();
+
+        CorElementType elemType = fieldType.GetSignatureCorElementType();
         ApproxFieldDescIterator fdIterator(
-            targetType.AsMethodTable(),
+            pMT,
             (cxt.IsTargetStatic ? ApproxFieldDescIterator::STATIC_FIELDS : ApproxFieldDescIterator::INSTANCE_FIELDS));
         PTR_FieldDesc pField;
         while ((pField = fdIterator.Next()) != NULL)
         {
             // Validate the name and target type match.
-            if (strcmp(fieldName, pField->GetName()) == 0
-                && fieldType == pField->LookupFieldTypeHandle())
+            if (strcmp(fieldName, pField->GetName()) != 0)
+                continue;
+
+            // We check if the possible field is class or valuetype
+            // since generic fields need resolution.
+            CorElementType fieldTypeMaybe = pField->GetFieldType();
+            if (fieldTypeMaybe == ELEMENT_TYPE_CLASS
+                || fieldTypeMaybe == ELEMENT_TYPE_VALUETYPE)
+            {
+                if (fieldType != pField->LookupFieldTypeHandle())
+                    continue;
+            }
+            else
+            {
+                if (elemType != fieldTypeMaybe)
+                    continue;
+            }
+
+            if (cxt.Kind == UnsafeAccessorKind::StaticField && pMT->HasGenericsStaticsInfo())
             {
-                cxt.TargetField = pField;
-                return true;
+                // Statics require the exact typed field as opposed to the canonically
+                // typed field. In order to do that we lookup the current index of the
+                // approx field and then use that index to get the precise field from
+                // the approx field.
+                MethodTable* pFieldMT = pField->GetApproxEnclosingMethodTable();
+                DWORD index = pFieldMT->GetIndexForFieldDesc(pField);
+                pField = pMT->GetFieldDescByIndex(index);
             }
+
+            cxt.TargetField = pField;
+            return true;
         }
         return false;
     }
@@ -1351,12 +1473,14 @@ namespace
         ilResolver->SetStubMethodDesc(cxt.Declaration);
         ilResolver->SetStubTargetMethodDesc(cxt.TargetMethod);
 
-        // [TODO] Handle generics
-        SigTypeContext emptyContext;
+        SigTypeContext genericContext;
+        if (cxt.Declaration->GetClassification() == mcInstantiated)
+            SigTypeContext::InitTypeContext(cxt.Declaration, &genericContext);
+
         ILStubLinker sl(
             cxt.Declaration->GetModule(),
             cxt.Declaration->GetSignature(),
-            &emptyContext,
+            &genericContext,
             cxt.TargetMethod,
             (ILStubLinkerFlags)ILSTUB_LINKER_FLAG_NONE);
 
@@ -1377,24 +1501,126 @@ namespace
         switch (cxt.Kind)
         {
         case UnsafeAccessorKind::Constructor:
+        {
             _ASSERTE(cxt.TargetMethod != NULL);
-            pCode->EmitNEWOBJ(pCode->GetToken(cxt.TargetMethod), targetArgCount);
+            mdToken target;
+            if (!cxt.TargetType.HasInstantiation())
+            {
+                target = pCode->GetToken(cxt.TargetMethod);
+            }
+            else
+            {
+                PCCOR_SIGNATURE sig;
+                uint32_t sigLen;
+                cxt.TargetTypeSig.GetSignature(&sig, &sigLen);
+                mdToken targetTypeSigToken = pCode->GetSigToken(sig, sigLen);
+                target = pCode->GetToken(cxt.TargetMethod, targetTypeSigToken);
+            }
+            pCode->EmitNEWOBJ(target, targetArgCount);
             break;
+        }
         case UnsafeAccessorKind::Method:
-            _ASSERTE(cxt.TargetMethod != NULL);
-            pCode->EmitCALLVIRT(pCode->GetToken(cxt.TargetMethod), targetArgCount, targetRetCount);
-            break;
         case UnsafeAccessorKind::StaticMethod:
+        {
             _ASSERTE(cxt.TargetMethod != NULL);
-            pCode->EmitCALL(pCode->GetToken(cxt.TargetMethod), targetArgCount, targetRetCount);
+            mdToken target;
+            if (!cxt.TargetMethod->HasClassOrMethodInstantiation())
+            {
+                target = pCode->GetToken(cxt.TargetMethod);
+            }
+            else
+            {
+                DWORD targetGenericCount = cxt.TargetMethod->GetNumGenericMethodArgs();
+
+                mdToken methodSpecSigToken = mdTokenNil;
+                SigBuilder sigBuilder;
+                uint32_t sigLen;
+                PCCOR_SIGNATURE sig;
+                if (targetGenericCount != 0)
+                {
+                    // Create signature for the MethodSpec. See ECMA-335 - II.23.2.15
+                    sigBuilder.AppendByte(IMAGE_CEE_CS_CALLCONV_GENERICINST);
+                    sigBuilder.AppendData(targetGenericCount);
+                    for (DWORD i = 0; i < targetGenericCount; ++i)
+                    {
+                        sigBuilder.AppendElementType(ELEMENT_TYPE_MVAR);
+                        sigBuilder.AppendData(i);
+                    }
+                    sigLen;
+                    sig = (PCCOR_SIGNATURE)sigBuilder.GetSignature((DWORD*)&sigLen);
+                    methodSpecSigToken = pCode->GetSigToken(sig, sigLen);
+                }
+
+                cxt.TargetTypeSig.GetSignature(&sig, &sigLen);
+                mdToken targetTypeSigToken = pCode->GetSigToken(sig, sigLen);
+
+                if (methodSpecSigToken == mdTokenNil)
+                {
+                    // Create a MemberRef
+                    target = pCode->GetToken(cxt.TargetMethod, targetTypeSigToken);
+                    _ASSERTE(TypeFromToken(target) == mdtMemberRef);
+                }
+                else
+                {
+                    // Use the method declaration Instantiation to find the instantiated MethodDesc target.
+                    Instantiation methodInst = cxt.Declaration->GetMethodInstantiation();
+                    MethodDesc* instantiatedTarget = MethodDesc::FindOrCreateAssociatedMethodDesc(cxt.TargetMethod, cxt.TargetType.GetMethodTable(), FALSE, methodInst, TRUE);
+
+                    // Create a MethodSpec
+                    target = pCode->GetToken(instantiatedTarget, targetTypeSigToken, methodSpecSigToken);
+                    _ASSERTE(TypeFromToken(target) == mdtMethodSpec);
+                }
+            }
+
+            if (cxt.Kind == UnsafeAccessorKind::StaticMethod)
+            {
+                pCode->EmitCALL(target, targetArgCount, targetRetCount);
+            }
+            else
+            {
+                pCode->EmitCALLVIRT(target, targetArgCount, targetRetCount);
+            }
             break;
+        }
         case UnsafeAccessorKind::Field:
+        {
             _ASSERTE(cxt.TargetField != NULL);
-            pCode->EmitLDFLDA(pCode->GetToken(cxt.TargetField));
+            mdToken target;
+            if (!cxt.TargetType.HasInstantiation())
+            {
+                target = pCode->GetToken(cxt.TargetField);
+            }
+            else
+            {
+                // See the static field case for why this can be mdTokenNil.
+                mdToken targetTypeSigToken = mdTokenNil;
+                target = pCode->GetToken(cxt.TargetField, targetTypeSigToken);
+            }
+            pCode->EmitLDFLDA(target);
             break;
+        }
         case UnsafeAccessorKind::StaticField:
             _ASSERTE(cxt.TargetField != NULL);
-            pCode->EmitLDSFLDA(pCode->GetToken(cxt.TargetField));
+            mdToken target;
+            if (!cxt.TargetType.HasInstantiation())
+            {
+                target = pCode->GetToken(cxt.TargetField);
+            }
+            else
+            {
+                // For accessing a generic instance field, every instantiation will
+                // be at the same offset, and be the same size, with the same GC layout,
+                // as long as the generic is canonically equivalent. However, for static fields,
+                // while the offset, size and GC layout remain the same, the address of the
+                // field is different, and needs to be found by a lookup of some form. The
+                // current form of lookup means the exact type isn't with a type signature.
+                PCCOR_SIGNATURE sig;
+                uint32_t sigLen;
+                cxt.TargetTypeSig.GetSignature(&sig, &sigLen);
+                mdToken targetTypeSigToken = pCode->GetSigToken(sig, sigLen);
+                target = pCode->GetToken(cxt.TargetField, targetTypeSigToken);
+            }
+            pCode->EmitLDSFLDA(target);
             break;
         default:
             _ASSERTE(!"Unknown UnsafeAccessorKind");
@@ -1449,10 +1675,6 @@ bool MethodDesc::TryGenerateUnsafeAccessor(DynamicResolver** resolver, COR_ILMET
     if (!IsStatic())
         ThrowHR(COR_E_BADIMAGEFORMAT, BFA_INVALID_UNSAFEACCESSOR);
 
-    // Block generic support early
-    if (HasClassOrMethodInstantiation())
-        ThrowHR(COR_E_BADIMAGEFORMAT, BFA_INVALID_UNSAFEACCESSOR);
-
     UnsafeAccessorKind kind;
     SString name;
 
@@ -1467,12 +1689,19 @@ bool MethodDesc::TryGenerateUnsafeAccessor(DynamicResolver** resolver, COR_ILMET
     //  * Instance member access - examine type of first parameter
     //  * Static member access - examine type of first parameter
     TypeHandle retType;
+    CorElementType retCorType;
     TypeHandle firstArgType;
+    CorElementType firstArgCorType = ELEMENT_TYPE_END;
+    retCorType = context.DeclarationSig.GetReturnType();
     retType = context.DeclarationSig.GetRetTypeHandleThrowing();
     UINT argCount = context.DeclarationSig.NumFixedArgs();
     if (argCount > 0)
     {
         context.DeclarationSig.NextArg();
+
+        // Get the target type signature and resolve to a type handle.
+        context.TargetTypeSig = context.DeclarationSig.GetArgProps();
+        (void)context.TargetTypeSig.PeekElemType(&firstArgCorType);
         firstArgType = context.DeclarationSig.GetLastTypeHandleThrowing();
     }
 
@@ -1491,7 +1720,9 @@ bool MethodDesc::TryGenerateUnsafeAccessor(DynamicResolver** resolver, COR_ILMET
             ThrowHR(COR_E_BADIMAGEFORMAT, BFA_INVALID_UNSAFEACCESSOR);
         }
 
-        context.TargetType = ValidateTargetType(retType);
+        // Get the target type signature from the return type.
+        context.TargetTypeSig = context.DeclarationSig.GetReturnProps();
+        context.TargetType = ValidateTargetType(retType, retCorType);
         if (!TrySetTargetMethod(context, ".ctor"))
             MemberLoader::ThrowMissingMethodException(context.TargetType.AsMethodTable(), ".ctor");
         break;
@@ -1511,7 +1742,7 @@ bool MethodDesc::TryGenerateUnsafeAccessor(DynamicResolver** resolver, COR_ILMET
             ThrowHR(COR_E_BADIMAGEFORMAT, BFA_INVALID_UNSAFEACCESSOR);
         }
 
-        context.TargetType = ValidateTargetType(firstArgType);
+        context.TargetType = ValidateTargetType(firstArgType, firstArgCorType);
         context.IsTargetStatic = kind == UnsafeAccessorKind::StaticMethod;
         if (!TrySetTargetMethod(context, name.GetUTF8()))
             MemberLoader::ThrowMissingMethodException(context.TargetType.AsMethodTable(), name.GetUTF8());
@@ -1536,7 +1767,7 @@ bool MethodDesc::TryGenerateUnsafeAccessor(DynamicResolver** resolver, COR_ILMET
             ThrowHR(COR_E_BADIMAGEFORMAT, BFA_INVALID_UNSAFEACCESSOR);
         }
 
-        context.TargetType = ValidateTargetType(firstArgType);
+        context.TargetType = ValidateTargetType(firstArgType, firstArgCorType);
         context.IsTargetStatic = kind == UnsafeAccessorKind::StaticField;
         if (!TrySetTargetField(context, name.GetUTF8(), retType.GetTypeParam()))
             MemberLoader::ThrowMissingFieldException(context.TargetType.AsMethodTable(), name.GetUTF8());
@@ -1643,7 +1874,7 @@ BOOL PrepareCodeConfig::SetNativeCode(PCODE pCode, PCODE * ppAlternateCodeToUse)
 COR_ILMETHOD* PrepareCodeConfig::GetILHeader()
 {
     STANDARD_VM_CONTRACT;
-    return m_pMethodDesc->GetILHeader(TRUE);
+    return m_pMethodDesc->GetILHeader();
 }
 
 CORJIT_FLAGS PrepareCodeConfig::GetJitCompilationFlags()
diff --git a/src/coreclr/vm/profdetach.cpp b/src/coreclr/vm/profdetach.cpp
index bf138209ce6a..7bfcba8ed2cc 100644
--- a/src/coreclr/vm/profdetach.cpp
+++ b/src/coreclr/vm/profdetach.cpp
@@ -326,7 +326,7 @@ void ProfilingAPIDetach::ExecuteEvacuationLoop()
         {
             CRITSEC_Holder csh(ProfilingAPIUtility::GetStatusCrst());
 
-            for (SIZE_T pos = 0; pos < s_profilerDetachInfos.Size(); ++pos)
+            while (s_profilerDetachInfos.Size() > 0)
             {
                 ProfilerDetachInfo current = s_profilerDetachInfos.Pop();
 
@@ -446,8 +446,8 @@ void ProfilingAPIDetach::SleepWhileProfilerEvacuates(ProfilerDetachInfo *pDetach
     }
 
     // ...but keep it in bounds!
-    ui64SleepMilliseconds = min(
-        max(ui64SleepMilliseconds, s_dwMinSleepMs),
+    ui64SleepMilliseconds = min<ULONGLONG>(
+        max<ULONGLONG>(ui64SleepMilliseconds, s_dwMinSleepMs),
         s_dwMaxSleepMs);
 
     // At this point it's safe to cast ui64SleepMilliseconds down to a DWORD since we
diff --git a/src/coreclr/vm/proftoeeinterfaceimpl.cpp b/src/coreclr/vm/proftoeeinterfaceimpl.cpp
index f2ce0f1b2159..e57cba3c597d 100644
--- a/src/coreclr/vm/proftoeeinterfaceimpl.cpp
+++ b/src/coreclr/vm/proftoeeinterfaceimpl.cpp
@@ -1921,11 +1921,6 @@ HRESULT GetFunctionInfoInternal(LPCBYTE ip, EECodeInfo * pCodeInfo)
         EE_THREAD_NOT_REQUIRED;
         CAN_TAKE_LOCK;
         CANNOT_RETAKE_LOCK;
-
-
-        // If this is called asynchronously (from a hijacked thread, as with F1), it must not re-enter the
-        // host (SQL).  Corners will be cut to ensure this is the case
-        if (ShouldAvoidHostCalls()) { HOST_NOCALLS; } else { HOST_CALLS; }
     }
     CONTRACTL_END;
 
@@ -1936,21 +1931,7 @@ HRESULT GetFunctionInfoInternal(LPCBYTE ip, EECodeInfo * pCodeInfo)
         return CORPROF_E_NOT_YET_AVAILABLE;
     }
 
-    if (ShouldAvoidHostCalls())
-    {
-        ExecutionManager::ReaderLockHolder rlh(NoHostCalls);
-        if (!rlh.Acquired())
-        {
-            // Couldn't get the info.  Try again later
-            return CORPROF_E_ASYNCHRONOUS_UNSAFE;
-        }
-
-        pCodeInfo->Init((PCODE)ip, ExecutionManager::ScanNoReaderLock);
-    }
-    else
-    {
-        pCodeInfo->Init((PCODE)ip);
-    }
+    pCodeInfo->Init((PCODE)ip);
 
     if (!pCodeInfo->IsValid())
     {
@@ -2019,11 +2000,6 @@ HRESULT ProfToEEInterfaceImpl::GetFunctionFromIP(LPCBYTE ip, FunctionID * pFunct
         // This contract detects any attempts to reenter locks held at the time
         // this function was called.
         CANNOT_RETAKE_LOCK;
-
-
-        // If this is called asynchronously (from a hijacked thread, as with F1), it must not re-enter the
-        // host (SQL).  Corners will be cut to ensure this is the case
-        if (ShouldAvoidHostCalls()) { HOST_NOCALLS; } else { HOST_CALLS; }
     }
     CONTRACTL_END;
 
@@ -2237,11 +2213,6 @@ HRESULT GetCodeInfoFromCodeStart(
         // We need to take the ExecutionManager reader lock to find the
         // appropriate jit manager.
         CAN_TAKE_LOCK;
-
-
-        // If this is called asynchronously (from a hijacked thread, as with F1), it must not re-enter the
-        // host (SQL).  Corners will be cut to ensure this is the case
-        if (ShouldAvoidHostCalls()) { HOST_NOCALLS; } else { HOST_CALLS; }
     }
     CONTRACTL_END;
 
@@ -2299,7 +2270,6 @@ HRESULT GetCodeInfoFromCodeStart(
         &codeInfo);
     if (hr == CORPROF_E_ASYNCHRONOUS_UNSAFE)
     {
-        _ASSERTE(ShouldAvoidHostCalls());
         return hr;
     }
     if (FAILED(hr))
@@ -2395,11 +2365,6 @@ HRESULT ProfToEEInterfaceImpl::GetCodeInfo(FunctionID functionId, LPCBYTE * pSta
 
         // (See locking contract comment in GetCodeInfoHelper.)
         CANNOT_RETAKE_LOCK;
-
-
-        // If this is called asynchronously (from a hijacked thread, as with F1), it must not re-enter the
-        // host (SQL).  Corners will be cut to ensure this is the case
-        if (ShouldAvoidHostCalls()) { HOST_NOCALLS; } else { HOST_CALLS; }
     }
     CONTRACTL_END;
 
@@ -2482,11 +2447,6 @@ HRESULT ProfToEEInterfaceImpl::GetCodeInfo2(FunctionID functionId,
         // (See locking contract comment in GetCodeInfoHelper.)
         CANNOT_RETAKE_LOCK;
 
-
-        // If this is called asynchronously (from a hijacked thread, as with F1), it must not re-enter the
-        // host (SQL).  Corners will be cut to ensure this is the case
-        if (ShouldAvoidHostCalls()) { HOST_NOCALLS; } else { HOST_CALLS; }
-
         PRECONDITION(CheckPointer(pcCodeInfos, NULL_OK));
         PRECONDITION(CheckPointer(codeInfos, NULL_OK));
     }
@@ -4303,7 +4263,7 @@ HRESULT ProfToEEInterfaceImpl::GetILFunctionBody(ModuleID    moduleId,
     LPCBYTE pbMethod = NULL;
 
     // Don't return rewritten IL, use the new API to get that.
-    pbMethod = (LPCBYTE) pModule->GetDynamicIL(methodId, FALSE);
+    pbMethod = (LPCBYTE) pModule->GetDynamicIL(methodId);
 
     // Method not overridden - get the original copy of the IL by going to metadata
     if (pbMethod == NULL)
@@ -4488,7 +4448,7 @@ HRESULT ProfToEEInterfaceImpl::SetILFunctionBody(ModuleID    moduleId,
     // This action is not temporary!
     // If the profiler want to be able to revert, they need to use
     // the new ReJIT APIs.
-    pModule->SetDynamicIL(methodId, (TADDR)pbNewILMethodHeader, FALSE);
+    pModule->SetDynamicIL(methodId, (TADDR)pbNewILMethodHeader);
 
     return (hr);
 }
@@ -5645,7 +5605,7 @@ HRESULT ProfToEEInterfaceImpl::GetAssemblyInfo(AssemblyID    assemblyId,
 
         if ((NULL != szName) && (cchName > 0))
         {
-            wcsncpy_s(szName, cchName, name.GetUnicode(), min(nameLength, cchName - 1));
+            wcsncpy_s(szName, cchName, name.GetUnicode(), min((size_t)nameLength, (size_t)(cchName - 1)));
         }
 
         if (NULL != pcchName)
@@ -8139,11 +8099,6 @@ static BOOL EnsureFrameInitialized(Frame * pFrame)
     {
         NOTHROW;
         GC_NOTRIGGER;
-
-        // If this is called asynchronously (from a hijacked thread, as with F1), it must not re-enter the
-        // host (SQL).  Corners will be cut to ensure this is the case
-        if (ShouldAvoidHostCalls()) { HOST_NOCALLS; } else { HOST_CALLS; }
-
         SUPPORTS_DAC;
     }
     CONTRACTL_END;
@@ -8159,19 +8114,14 @@ static BOOL EnsureFrameInitialized(Frame * pFrame)
 
     if (pHMF->InsureInit(
         false,                      // initialInit
-        NULL,                       // unwindState
-        (ShouldAvoidHostCalls() ?
-            NoHostCalls :
-            AllowHostCalls)
+        NULL                        // unwindState
         ) != NULL)
     {
         // InsureInit() succeeded and found the return address
         return TRUE;
     }
 
-    // No return address was found. It must be because we asked InsureInit() to bail if
-    // it would have entered the host
-    _ASSERTE(ShouldAvoidHostCalls());
+    // No return address was found
     return FALSE;
 }
 
@@ -8202,10 +8152,6 @@ HRESULT ProfToEEInterfaceImpl::ProfilerEbpWalker(
         NOTHROW;
         MODE_ANY;
         EE_THREAD_NOT_REQUIRED;
-
-        // If this is called asynchronously (from a hijacked thread, as with F1), it must not re-enter the
-        // host (SQL).  Corners will be cut to ensure this is the case
-        if (ShouldAvoidHostCalls()) { HOST_NOCALLS; } else { HOST_CALLS; }
     }
     CONTRACTL_END;
 
@@ -8256,7 +8202,6 @@ HRESULT ProfToEEInterfaceImpl::ProfilerEbpWalker(
                 &codeInfo);
             if (hr == CORPROF_E_ASYNCHRONOUS_UNSAFE)
             {
-                _ASSERTE(ShouldAvoidHostCalls());
                 return hr;
             }
             if (SUCCEEDED(hr))
@@ -8411,8 +8356,7 @@ HRESULT ProfToEEInterfaceImpl::ProfilerEbpWalker(
                     &rd,
                     &codeInfo,
                     SpeculativeStackwalk,
-                    &codeManState,
-                    NULL);
+                    &codeManState);
 
                 ctxCur.Ebp = *rd.GetEbpLocation();
                 ctxCur.Esp = rd.SP;
@@ -8486,27 +8430,18 @@ HRESULT ProfToEEInterfaceImpl::ProfilerStackWalkFramesWrapper(Thread * pThreadTo
 //
 // Arguments:
 //      pCtx - Context to look at
-//      hostCallPreference - Describes how to acquire the reader lock--either AllowHostCalls
-//          or NoHostCalls (see code:HostCallPreference).
 //
 // Return Value:
 //      S_OK: The context is in managed code
 //      S_FALSE: The context is not in managed code.
-//      Error: Unable to determine (typically because hostCallPreference was NoHostCalls
-//         and the reader lock was unattainable without yielding)
 //
 
-HRESULT IsContextInManagedCode(const CONTEXT * pCtx, HostCallPreference hostCallPreference)
+HRESULT IsContextInManagedCode(const CONTEXT * pCtx)
 {
     WRAPPER_NO_CONTRACT;
-    BOOL fFailedReaderLock = FALSE;
 
     // if there's no Jit Manager for the IP, it's not managed code.
-    BOOL fIsManagedCode = ExecutionManager::IsManagedCode(GetIP(pCtx), hostCallPreference, &fFailedReaderLock);
-    if (fFailedReaderLock)
-    {
-        return CORPROF_E_ASYNCHRONOUS_UNSAFE;
-    }
+    BOOL fIsManagedCode = ExecutionManager::IsManagedCode(GetIP(pCtx));
 
     return fIsManagedCode ? S_OK : S_FALSE;
 }
@@ -8681,8 +8616,6 @@ HRESULT ProfToEEInterfaceImpl::DoStackSnapshot(ThreadID thread,
         goto Cleanup;
     }
 
-    HostCallPreference hostCallPreference;
-
     // First, check "1) Target thread to walk == current thread OR Target thread is suspended"
     if (pThreadToSnapshot != pCurrentThread && !g_profControlBlock.fProfilerRequestedRuntimeSuspend)
     {
@@ -8728,11 +8661,6 @@ HRESULT ProfToEEInterfaceImpl::DoStackSnapshot(ThreadID thread,
 #endif // !PLATFORM_SUPPORTS_SAFE_THREADSUSPEND
     }
 
-    hostCallPreference =
-        ShouldAvoidHostCalls() ?
-            NoHostCalls :       // Async call: Ensure this thread won't yield & re-enter host
-            AllowHostCalls;     // Synchronous calls may re-enter host just fine
-
     // If target thread is in pre-emptive mode, the profiler's seed context is unnecessary
     // because our frame chain is good enough: it will give us at least as accurate a
     // starting point as the profiler could.  Also, since profiler contexts cannot be
@@ -8769,11 +8697,10 @@ HRESULT ProfToEEInterfaceImpl::DoStackSnapshot(ThreadID thread,
             goto Cleanup;
         }
 
-        hrCurrentContextIsManaged = IsContextInManagedCode(&ctxCurrent, hostCallPreference);
+        hrCurrentContextIsManaged = IsContextInManagedCode(&ctxCurrent);
         if (FAILED(hrCurrentContextIsManaged))
         {
             // Couldn't get the info.  Try again later
-            _ASSERTE(ShouldAvoidHostCalls());
             hr = CORPROF_E_ASYNCHRONOUS_UNSAFE;
             goto Cleanup;
         }
@@ -8841,7 +8768,7 @@ HRESULT ProfToEEInterfaceImpl::DoStackSnapshot(ThreadID thread,
         }
         else
         {
-            hr = IsContextInManagedCode(pctxSeed, hostCallPreference);
+            hr = IsContextInManagedCode(pctxSeed);
             if (FAILED(hr))
             {
                 hr = CORPROF_E_ASYNCHRONOUS_UNSAFE;
@@ -8877,16 +8804,12 @@ HRESULT ProfToEEInterfaceImpl::DoStackSnapshot(ThreadID thread,
         {
             if (pThreadToSnapshot->GetSafelyRedirectableThreadContext(Thread::kDefaultChecks, &ctxCurrent, &rd))
             {
-                BOOL fFailedReaderLock = FALSE;
-                BOOL fIsManagedCode = ExecutionManager::IsManagedCode(GetIP(&ctxCurrent), hostCallPreference, &fFailedReaderLock);
+                BOOL fIsManagedCode = ExecutionManager::IsManagedCode(GetIP(&ctxCurrent));
 
-                if (!fFailedReaderLock)
-                {
-                    // not in jitted or ngend code or inside an inlined P/Invoke (the leaf-most EE Frame is
-                    // an InlinedCallFrame with an active call)
-                    _ASSERTE(!fIsManagedCode ||
-                             (InlinedCallFrame::FrameHasActiveCall(pThreadToSnapshot->GetFrame())));
-                }
+                // not in jitted or ngend code or inside an inlined P/Invoke (the leaf-most EE Frame is
+                // an InlinedCallFrame with an active call)
+                _ASSERTE(!fIsManagedCode ||
+                            (InlinedCallFrame::FrameHasActiveCall(pThreadToSnapshot->GetFrame())));
             }
         }
 #endif // !PLATFORM_SUPPORTS_SAFE_THREADSUSPEND
diff --git a/src/coreclr/vm/proftoeeinterfaceimpl.inl b/src/coreclr/vm/proftoeeinterfaceimpl.inl
index 524900c7182e..afbf50aa067c 100644
--- a/src/coreclr/vm/proftoeeinterfaceimpl.inl
+++ b/src/coreclr/vm/proftoeeinterfaceimpl.inl
@@ -87,43 +87,6 @@ inline BOOL IsCalledAsynchronously()
 }
 
 
-//---------------------------------------------------------------------------------------
-//
-// Simple helper that decides whether we should avoid calling into the host. Generally,
-// host calls should be avoided if the current Info method was called asynchronously
-// (i.e., from an F1-style hijack), for fear of re-entering the host (mainly SQL).
-//
-// Server GC threads are native (non-EE) threads, which therefore do not track enough
-// state for us to determine if a call is made asynhronously on those threads. So we
-// pessimistically assume that the current call on a server GC thread is from a hijack
-// for the purposes of determining whether we may enter the host. Reasoning for this:
-//     * SQL enables server-mode GC
-//     * server GC threads are responsible for performing runtime suspension, and thus
-//         call Thread::SuspendThread() which yields/sleeps and thus enters the host. So
-//         server GC threads are examples of non-EE Threads that actually do spend time
-//         in the host (this otherwise almost never happens for other non-EE threads).
-//     * In spite of this pessimism, the effect on the profiler should be minimal. The
-//         host calls we're avoiding are from the code manager's lock, which:
-//             * a) Is only used when doing stack walks or translating IPs to functions
-//             * b) Is only affected if it tries to yield/sleep when the code manager
-//                 writer lock is taken, and that happens for incredibly tiny windows of
-//                 time.
-//
-
-inline BOOL ShouldAvoidHostCalls()
-{
-    LIMITED_METHOD_CONTRACT;
-
-    return
-    (
-        IsCalledAsynchronously() ||
-        (
-            (GetThreadNULLOk() == NULL) && IsGCSpecialThread()
-        )
-    );
-}
-
-
 //---------------------------------------------------------------------------------------
 //
 // Simple helper that returns nonzero iff the current thread is a non-EE thread in the
diff --git a/src/coreclr/vm/qcall.h b/src/coreclr/vm/qcall.h
index d5f355ad9662..e3154c7b1334 100644
--- a/src/coreclr/vm/qcall.h
+++ b/src/coreclr/vm/qcall.h
@@ -7,7 +7,7 @@
 #ifndef __QCall_h__
 #define __QCall_h__
 
-#include "clr_std/type_traits"
+#include <type_traits>
 
 //
 // QCALLS
diff --git a/src/coreclr/vm/qcallentrypoints.cpp b/src/coreclr/vm/qcallentrypoints.cpp
index ec72d3112f5f..1150c55aa36d 100644
--- a/src/coreclr/vm/qcallentrypoints.cpp
+++ b/src/coreclr/vm/qcallentrypoints.cpp
@@ -24,7 +24,6 @@
 #include "floatdouble.h"
 #include "floatsingle.h"
 #include "comdatetime.h"
-#include "compatibilityswitch.h"
 #include "debugdebugger.h"
 #include "assemblynative.hpp"
 #include "comthreadpool.h"
@@ -82,6 +81,9 @@ static const Entry s_QCall[] =
     DllImportEntry(ArgIterator_GetNextArgType)
     DllImportEntry(ArgIterator_GetNextArg)
     DllImportEntry(ArgIterator_GetNextArg2)
+    DllImportEntry(CustomAttribute_ParseAttributeUsageAttribute)
+    DllImportEntry(CustomAttribute_CreateCustomAttributeInstance)
+    DllImportEntry(CustomAttribute_CreatePropertyOrFieldData)
     DllImportEntry(Enum_GetValuesAndNames)
     DllImportEntry(DebugDebugger_Launch)
     DllImportEntry(DebugDebugger_Log)
@@ -94,12 +96,15 @@ static const Entry s_QCall[] =
     DllImportEntry(Delegate_FindMethodHandle)
     DllImportEntry(Delegate_InternalEqualMethodHandles)
     DllImportEntry(Environment_Exit)
+    DllImportEntry(Environment_FailFast)
     DllImportEntry(Environment_GetProcessorCount)
     DllImportEntry(ExceptionNative_GetMessageFromNativeResources)
     DllImportEntry(RuntimeTypeHandle_CreateInstanceForAnotherGenericParameter)
     DllImportEntry(QCall_GetGCHandleForTypeHandle)
     DllImportEntry(QCall_FreeGCHandleForTypeHandle)
     DllImportEntry(MethodTable_AreTypesEquivalent)
+    DllImportEntry(MethodTable_CanCompareBitsOrUseFastGetHashCode)
+    DllImportEntry(ValueType_GetHashCodeStrategy)
     DllImportEntry(RuntimeTypeHandle_MakePointer)
     DllImportEntry(RuntimeTypeHandle_MakeByRef)
     DllImportEntry(RuntimeTypeHandle_MakeSZArray)
@@ -116,6 +121,7 @@ static const Entry s_QCall[] =
     DllImportEntry(RuntimeTypeHandle_GetActivationInfo)
     DllImportEntry(RuntimeTypeHandle_AllocateTypeAssociatedMemory)
     DllImportEntry(RuntimeTypeHandle_RegisterCollectibleTypeDependency)
+    DllImportEntry(MethodBase_GetCurrentMethod)
     DllImportEntry(RuntimeMethodHandle_ConstructInstantiation)
     DllImportEntry(RuntimeMethodHandle_GetFunctionPointer)
     DllImportEntry(RuntimeMethodHandle_GetIsCollectible)
@@ -322,6 +328,7 @@ static const Entry s_QCall[] =
     DllImportEntry(GetFileLoadExceptionMessage)
     DllImportEntry(FileLoadException_GetMessageForHR)
     DllImportEntry(Interlocked_MemoryBarrierProcessWide)
+    DllImportEntry(ObjectNative_AllocateUninitializedClone)
     DllImportEntry(Monitor_Wait)
     DllImportEntry(Monitor_Pulse)
     DllImportEntry(Monitor_PulseAll)
@@ -348,22 +355,23 @@ static const Entry s_QCall[] =
 #if defined(FEATURE_EVENTSOURCE_XPLAT)
     DllImportEntry(IsEventSourceLoggingEnabled)
     DllImportEntry(LogEventSource)
+    DllImportEntry(EventSource_GetClrConfig)
 #endif
 #if defined(FEATURE_PERFTRACING)
-    DllImportEntry(LogThreadPoolWorkerThreadStart)
-    DllImportEntry(LogThreadPoolWorkerThreadStop)
-    DllImportEntry(LogThreadPoolWorkerThreadWait)
-    DllImportEntry(LogThreadPoolMinMaxThreads)
-    DllImportEntry(LogThreadPoolWorkerThreadAdjustmentSample)
-    DllImportEntry(LogThreadPoolWorkerThreadAdjustmentAdjustment)
-    DllImportEntry(LogThreadPoolWorkerThreadAdjustmentStats)
-    DllImportEntry(LogThreadPoolIOEnqueue)
-    DllImportEntry(LogThreadPoolIODequeue)
-    DllImportEntry(LogThreadPoolIOPack)
-    DllImportEntry(LogThreadPoolWorkingThreadCount)
-    DllImportEntry(LogContentionLockCreated)
-    DllImportEntry(LogContentionStart)
-    DllImportEntry(LogContentionStop)
+    DllImportEntry(NativeRuntimeEventSource_LogThreadPoolWorkerThreadStart)
+    DllImportEntry(NativeRuntimeEventSource_LogThreadPoolWorkerThreadStop)
+    DllImportEntry(NativeRuntimeEventSource_LogThreadPoolWorkerThreadWait)
+    DllImportEntry(NativeRuntimeEventSource_LogThreadPoolMinMaxThreads)
+    DllImportEntry(NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentSample)
+    DllImportEntry(NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentAdjustment)
+    DllImportEntry(NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentStats)
+    DllImportEntry(NativeRuntimeEventSource_LogThreadPoolIOEnqueue)
+    DllImportEntry(NativeRuntimeEventSource_LogThreadPoolIODequeue)
+    DllImportEntry(NativeRuntimeEventSource_LogThreadPoolIOPack)
+    DllImportEntry(NativeRuntimeEventSource_LogThreadPoolWorkingThreadCount)
+    DllImportEntry(NativeRuntimeEventSource_LogContentionLockCreated)
+    DllImportEntry(NativeRuntimeEventSource_LogContentionStart)
+    DllImportEntry(NativeRuntimeEventSource_LogContentionStop)
     DllImportEntry(EventPipeInternal_Enable)
     DllImportEntry(EventPipeInternal_Disable)
     DllImportEntry(EventPipeInternal_GetSessionInfo)
diff --git a/src/coreclr/vm/readytoruninfo.cpp b/src/coreclr/vm/readytoruninfo.cpp
index 2a56362e9200..a047e17ffa3d 100644
--- a/src/coreclr/vm/readytoruninfo.cpp
+++ b/src/coreclr/vm/readytoruninfo.cpp
@@ -1929,20 +1929,17 @@ bool ReadyToRun_TypeGenericInfoMap::HasConstraints(mdTypeDef input, bool *foundR
 
 bool ReadyToRun_MethodIsGenericMap::IsGeneric(mdMethodDef input, bool *foundResult) const
 {
-#ifdef DACCESS_COMPILE
-    *foundResult = false;
-    return false;
-#else
+#ifndef DACCESS_COMPILE
     uint32_t rid = RidFromToken(input);
-    if ((rid > MethodCount) || (rid == 0))
+    if ((rid <= MethodCount) && (rid != 0))
     {
-        *foundResult = false;
-        return false;
+        uint8_t chunk = ((uint8_t*)&MethodCount)[((rid - 1) / 8) + sizeof(uint32_t)];
+        chunk >>= 7 - ((rid - 1) % 8);
+        *foundResult = true;
+        return !!(chunk & 1);
     }
-
-    uint8_t chunk = ((uint8_t*)&MethodCount)[((rid - 1) / 8) + sizeof(uint32_t)];
-    chunk >>= 7 - ((rid - 1) % 8);
-    return !!(chunk & 1);
-#endif
+#endif // !DACCESS_COMPILE
+    *foundResult = false;
+    return false;
 }
 
diff --git a/src/coreclr/vm/readytorunstandalonemethodmetadata.cpp b/src/coreclr/vm/readytorunstandalonemethodmetadata.cpp
index 036d1d7a206a..30802a707691 100644
--- a/src/coreclr/vm/readytorunstandalonemethodmetadata.cpp
+++ b/src/coreclr/vm/readytorunstandalonemethodmetadata.cpp
@@ -23,7 +23,7 @@ class ReadyToRunStandaloneMethodMetadataHelper
 public:
 
     ReadyToRunStandaloneMethodMetadataHelper(MethodDesc *pMD, SArray<uint32_t> *pTypeRefTokenStreamInput) :
-        header(pMD->GetILHeader(TRUE), pMD->GetMDImport(), NULL),
+        header(pMD->GetILHeader(), pMD->GetMDImport(), NULL),
         currentILStreamIterator(0),
         pTypeRefTokenStream(pTypeRefTokenStreamInput),
         pModule(pMD->GetModule()),
diff --git a/src/coreclr/vm/reflectioninvocation.cpp b/src/coreclr/vm/reflectioninvocation.cpp
index e12ade403c6b..a7f88289d037 100644
--- a/src/coreclr/vm/reflectioninvocation.cpp
+++ b/src/coreclr/vm/reflectioninvocation.cpp
@@ -25,8 +25,9 @@
 #include "dbginterface.h"
 #include "argdestination.h"
 
-FCIMPL5(Object*, RuntimeFieldHandle::GetValue, ReflectFieldObject *pFieldUNSAFE, Object *instanceUNSAFE, ReflectClassBaseObject *pFieldTypeUNSAFE, ReflectClassBaseObject *pDeclaringTypeUNSAFE, CLR_BOOL *pDomainInitialized) {
-    CONTRACTL {
+FCIMPL5(Object*, RuntimeFieldHandle::GetValue, ReflectFieldObject *pFieldUNSAFE, Object *instanceUNSAFE, ReflectClassBaseObject *pFieldTypeUNSAFE, ReflectClassBaseObject *pDeclaringTypeUNSAFE, CLR_BOOL *pIsClassInitialized) {
+    CONTRACTL
+    {
         FCALL_CHECK;
     }
     CONTRACTL_END;
@@ -50,22 +51,11 @@ FCIMPL5(Object*, RuntimeFieldHandle::GetValue, ReflectFieldObject *pFieldUNSAFE,
     TypeHandle fieldType = gc.pFieldType->GetType();
     TypeHandle declaringType = (gc.pDeclaringType != NULL) ? gc.pDeclaringType->GetType() : TypeHandle();
 
-    Assembly *pAssem;
-    if (declaringType.IsNull())
-    {
-        // global field
-        pAssem = gc.refField->GetField()->GetModule()->GetAssembly();
-    }
-    else
-    {
-        pAssem = declaringType.GetAssembly();
-    }
-
     OBJECTREF rv = NULL; // not protected
 
     HELPER_METHOD_FRAME_BEGIN_RET_PROTECT(gc);
     // There can be no GC after this until the Object is returned.
-    rv = InvokeUtil::GetFieldValue(gc.refField->GetField(), fieldType, &gc.target, declaringType, pDomainInitialized);
+    rv = InvokeUtil::GetFieldValue(gc.refField->GetField(), fieldType, &gc.target, declaringType, pIsClassInitialized);
     HELPER_METHOD_FRAME_END();
 
     return OBJECTREFToObject(rv);
@@ -73,7 +63,8 @@ FCIMPL5(Object*, RuntimeFieldHandle::GetValue, ReflectFieldObject *pFieldUNSAFE,
 FCIMPLEND
 
 FCIMPL2(FC_BOOL_RET, ReflectionInvocation::CanValueSpecialCast, ReflectClassBaseObject *pValueTypeUNSAFE, ReflectClassBaseObject *pTargetTypeUNSAFE) {
-    CONTRACTL {
+    CONTRACTL
+    {
         FCALL_CHECK;
         PRECONDITION(CheckPointer(pValueTypeUNSAFE));
         PRECONDITION(CheckPointer(pTargetTypeUNSAFE));
@@ -126,7 +117,8 @@ FCIMPLEND
 ///  Allocate the value type and copy the optional value into it.
 /// </summary>
 FCIMPL2(Object*, ReflectionInvocation::AllocateValueType, ReflectClassBaseObject *pTargetTypeUNSAFE, Object *valueUNSAFE) {
-    CONTRACTL {
+    CONTRACTL
+    {
         FCALL_CHECK;
         PRECONDITION(CheckPointer(pTargetTypeUNSAFE));
         PRECONDITION(CheckPointer(valueUNSAFE, NULL_OK));
@@ -169,8 +161,9 @@ FCIMPL2(Object*, ReflectionInvocation::AllocateValueType, ReflectClassBaseObject
 }
 FCIMPLEND
 
-FCIMPL7(void, RuntimeFieldHandle::SetValue, ReflectFieldObject *pFieldUNSAFE, Object *targetUNSAFE, Object *valueUNSAFE, ReflectClassBaseObject *pFieldTypeUNSAFE, DWORD attr, ReflectClassBaseObject *pDeclaringTypeUNSAFE, CLR_BOOL *pDomainInitialized) {
-    CONTRACTL {
+FCIMPL6(void, RuntimeFieldHandle::SetValue, ReflectFieldObject *pFieldUNSAFE, Object *targetUNSAFE, Object *valueUNSAFE, ReflectClassBaseObject *pFieldTypeUNSAFE, ReflectClassBaseObject *pDeclaringTypeUNSAFE, CLR_BOOL *pIsClassInitialized) {
+    CONTRACTL
+    {
         FCALL_CHECK;
     }
     CONTRACTL_END;
@@ -195,24 +188,13 @@ FCIMPL7(void, RuntimeFieldHandle::SetValue, ReflectFieldObject *pFieldUNSAFE, Ob
     TypeHandle fieldType = gc.fieldType->GetType();
     TypeHandle declaringType = gc.declaringType != NULL ? gc.declaringType->GetType() : TypeHandle();
 
-    Assembly *pAssem;
-    if (declaringType.IsNull())
-    {
-        // global field
-        pAssem = gc.refField->GetField()->GetModule()->GetAssembly();
-    }
-    else
-    {
-        pAssem = declaringType.GetAssembly();
-    }
-
     FC_GC_POLL_NOT_NEEDED();
 
     FieldDesc* pFieldDesc = gc.refField->GetField();
 
     HELPER_METHOD_FRAME_BEGIN_PROTECT(gc);
 
-    InvokeUtil::SetValidField(fieldType.GetVerifierCorElementType(), fieldType, pFieldDesc, &gc.target, &gc.value, declaringType, pDomainInitialized);
+    InvokeUtil::SetValidField(fieldType.GetVerifierCorElementType(), fieldType, pFieldDesc, &gc.target, &gc.value, declaringType, pIsClassInitialized);
 
     HELPER_METHOD_FRAME_END();
 }
@@ -225,7 +207,8 @@ extern "C" void QCALLTYPE RuntimeTypeHandle_CreateInstanceForAnotherGenericParam
     QCall::ObjectHandleOnStack pInstantiatedObject
 )
 {
-    CONTRACTL{
+    CONTRACTL
+    {
         QCALL_CHECK;
         PRECONDITION(!pTypeHandle.AsTypeHandle().IsNull());
         PRECONDITION(cInstArray >= 0);
@@ -310,7 +293,8 @@ FCIMPLEND
 
 static OBJECTREF InvokeArrayConstructor(TypeHandle th, PVOID* args, int argCnt)
 {
-    CONTRACTL {
+    CONTRACTL
+    {
         THROWS;
         GC_TRIGGERS;
         MODE_COOPERATIVE;
@@ -344,7 +328,8 @@ static OBJECTREF InvokeArrayConstructor(TypeHandle th, PVOID* args, int argCnt)
 
 static BOOL IsActivationNeededForMethodInvoke(MethodDesc * pMD)
 {
-    CONTRACTL {
+    CONTRACTL
+    {
         THROWS;
         GC_TRIGGERS;
         MODE_COOPERATIVE;
@@ -875,7 +860,8 @@ struct SkipStruct {
 // This method is called by the GetMethod function and will crawl backward
 //  up the stack for integer methods.
 static StackWalkAction SkipMethods(CrawlFrame* frame, VOID* data) {
-    CONTRACTL {
+    CONTRACTL
+    {
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
@@ -899,45 +885,39 @@ static StackWalkAction SkipMethods(CrawlFrame* frame, VOID* data) {
     if (!frame->IsInCalleesFrames(pSkip->pStackMark))
         return SWA_CONTINUE;
 
-    if (pFunc->RequiresInstMethodDescArg())
-    {
-        pSkip->pMeth = (MethodDesc *) frame->GetParamTypeArg();
-        if (pSkip->pMeth == NULL)
-            pSkip->pMeth = pFunc;
-    }
-    else
-        pSkip->pMeth = pFunc;
+    pSkip->pMeth = pFunc;
     return SWA_ABORT;
 }
 
 // Return the MethodInfo that represents the current method (two above this one)
-FCIMPL1(ReflectMethodObject*, RuntimeMethodHandle::GetCurrentMethod, StackCrawlMark* stackMark) {
-    FCALL_CONTRACT;
-    REFLECTMETHODREF pRet = NULL;
+extern "C" MethodDesc* QCALLTYPE MethodBase_GetCurrentMethod(QCall::StackCrawlMarkHandle stackMark) {
+
+    QCALL_CONTRACT;
+
+    MethodDesc* pRet = nullptr;
+
+    BEGIN_QCALL;
 
-    HELPER_METHOD_FRAME_BEGIN_RET_0();
     SkipStruct skip;
     skip.pStackMark = stackMark;
     skip.pMeth = 0;
-    StackWalkFunctions(GetThread(), SkipMethods, &skip);
+    GetThread()->StackWalkFrames(SkipMethods, &skip, FUNCTIONSONLY | LIGHTUNWIND);
 
-    // If C<Foo>.m<Bar> was called, the stack walker returns C<object>.m<object>. We cannot
+    // If C<Foo>.m<Bar> was called, the stack walker returns C<__Canon>.m<__Canon>. We cannot
     // get know that the instantiation used Foo or Bar at that point. So the next best thing
     // is to return C<T>.m<P> and that's what LoadTypicalMethodDefinition will do for us.
 
     if (skip.pMeth != NULL)
-        pRet = skip.pMeth->LoadTypicalMethodDefinition()->GetStubMethodInfo();
-    else
-        pRet = NULL;
+        pRet = skip.pMeth->LoadTypicalMethodDefinition();
 
-    HELPER_METHOD_FRAME_END();
+    END_QCALL;
 
-    return (ReflectMethodObject*)OBJECTREFToObject(pRet);
+    return pRet;
 }
-FCIMPLEND
 
-static OBJECTREF DirectObjectFieldGet(FieldDesc *pField, TypeHandle fieldType, TypeHandle enclosingType, TypedByRef *pTarget, CLR_BOOL *pDomainInitialized) {
-    CONTRACTL {
+static OBJECTREF DirectObjectFieldGet(FieldDesc *pField, TypeHandle fieldType, TypeHandle enclosingType, TypedByRef *pTarget, CLR_BOOL *pIsClassInitialized) {
+    CONTRACTL
+    {
         THROWS;
         GC_TRIGGERS;
         MODE_COOPERATIVE;
@@ -954,13 +934,14 @@ static OBJECTREF DirectObjectFieldGet(FieldDesc *pField, TypeHandle fieldType, T
     }
 
     InvokeUtil::ValidateObjectTarget(pField, enclosingType, &objref);
-    refRet = InvokeUtil::GetFieldValue(pField, fieldType, &objref, enclosingType, pDomainInitialized);
+    refRet = InvokeUtil::GetFieldValue(pField, fieldType, &objref, enclosingType, pIsClassInitialized);
     GCPROTECT_END();
     return refRet;
 }
 
 FCIMPL4(Object*, RuntimeFieldHandle::GetValueDirect, ReflectFieldObject *pFieldUNSAFE, ReflectClassBaseObject *pFieldTypeUNSAFE, TypedByRef *pTarget, ReflectClassBaseObject *pDeclaringTypeUNSAFE) {
-    CONTRACTL {
+    CONTRACTL
+    {
         FCALL_CHECK;
     }
     CONTRACTL_END;
@@ -994,9 +975,9 @@ FCIMPL4(Object*, RuntimeFieldHandle::GetValueDirect, ReflectFieldObject *pFieldU
     _ASSERTE(gc.refDeclaringType == NULL || !gc.refDeclaringType->GetType().IsTypeDesc());
     MethodTable *pEnclosingMT = (gc.refDeclaringType != NULL ? gc.refDeclaringType->GetType() : TypeHandle()).AsMethodTable();
 
-    CLR_BOOL domainInitialized = FALSE;
+    CLR_BOOL isClassInitialized = FALSE;
     if (pField->IsStatic() || !targetType.IsValueType()) {
-        refRet = DirectObjectFieldGet(pField, fieldType, TypeHandle(pEnclosingMT), pTarget, &domainInitialized);
+        refRet = DirectObjectFieldGet(pField, fieldType, TypeHandle(pEnclosingMT), pTarget, &isClassInitialized);
         goto lExit;
     }
 
@@ -1059,8 +1040,9 @@ lExit: ;
 }
 FCIMPLEND
 
-static void DirectObjectFieldSet(FieldDesc *pField, TypeHandle fieldType, TypeHandle enclosingType, TypedByRef *pTarget, OBJECTREF *pValue, CLR_BOOL *pDomainInitialized) {
-    CONTRACTL {
+static void DirectObjectFieldSet(FieldDesc *pField, TypeHandle fieldType, TypeHandle enclosingType, TypedByRef *pTarget, OBJECTREF *pValue, CLR_BOOL *pIsClassInitialized) {
+    CONTRACTL
+    {
         THROWS;
         GC_TRIGGERS;
         MODE_COOPERATIVE;
@@ -1078,12 +1060,13 @@ static void DirectObjectFieldSet(FieldDesc *pField, TypeHandle fieldType, TypeHa
     // Validate the target/fld type relationship
     InvokeUtil::ValidateObjectTarget(pField, enclosingType, &objref);
 
-    InvokeUtil::SetValidField(pField->GetFieldType(), fieldType, pField, &objref, pValue, enclosingType, pDomainInitialized);
+    InvokeUtil::SetValidField(pField->GetFieldType(), fieldType, pField, &objref, pValue, enclosingType, pIsClassInitialized);
     GCPROTECT_END();
 }
 
 FCIMPL5(void, RuntimeFieldHandle::SetValueDirect, ReflectFieldObject *pFieldUNSAFE, ReflectClassBaseObject *pFieldTypeUNSAFE, TypedByRef *pTarget, Object *valueUNSAFE, ReflectClassBaseObject *pContextTypeUNSAFE) {
-    CONTRACTL {
+    CONTRACTL
+    {
         FCALL_CHECK;
     }
     CONTRACTL_END;
@@ -1124,9 +1107,9 @@ FCIMPL5(void, RuntimeFieldHandle::SetValueDirect, ReflectFieldObject *pFieldUNSA
     // Verify that the value passed can be widened into the target
     InvokeUtil::ValidField(fieldType, &gc.oValue);
 
-    CLR_BOOL domainInitialized = FALSE;
+    CLR_BOOL isClassInitialized = FALSE;
     if (pField->IsStatic() || !targetType.IsValueType()) {
-        DirectObjectFieldSet(pField, fieldType, TypeHandle(pEnclosingMT), pTarget, &gc.oValue, &domainInitialized);
+        DirectObjectFieldSet(pField, fieldType, TypeHandle(pEnclosingMT), pTarget, &gc.oValue, &isClassInitialized);
         goto lExit;
     }
 
@@ -1243,6 +1226,85 @@ lExit: ;
 }
 FCIMPLEND
 
+static bool IsFastPathSupportedHelper(FieldDesc* pFieldDesc)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        PRECONDITION(CheckPointer(pFieldDesc));
+    }
+    CONTRACTL_END;
+
+    return !pFieldDesc->IsThreadStatic() &&
+        !pFieldDesc->IsEnCNew() &&
+        !(pFieldDesc->IsCollectible() && pFieldDesc->IsStatic());
+}
+
+FCIMPL1(FC_BOOL_RET, RuntimeFieldHandle::IsFastPathSupported, ReflectFieldObject *pFieldUNSAFE)
+{
+    FCALL_CONTRACT;
+
+    REFLECTFIELDREF refField = (REFLECTFIELDREF)ObjectToOBJECTREF(pFieldUNSAFE);
+    _ASSERTE(refField != NULL);
+
+    FieldDesc* pFieldDesc = refField->GetField();
+    return IsFastPathSupportedHelper(pFieldDesc) ? TRUE : FALSE;
+}
+FCIMPLEND
+
+FCIMPL1(INT32, RuntimeFieldHandle::GetInstanceFieldOffset, ReflectFieldObject *pFieldUNSAFE)
+{
+    CONTRACTL
+    {
+        FCALL_CHECK;
+        PRECONDITION(CheckPointer(pFieldUNSAFE));
+    }
+    CONTRACTL_END;
+
+    REFLECTFIELDREF refField = (REFLECTFIELDREF)ObjectToOBJECTREF(pFieldUNSAFE);
+    _ASSERTE(refField != NULL);
+
+    FieldDesc* pFieldDesc = refField->GetField();
+    _ASSERTE(!pFieldDesc->IsStatic());
+
+    // IsFastPathSupported needs to checked before calling this method.
+    _ASSERTE(IsFastPathSupportedHelper(pFieldDesc));
+
+    return pFieldDesc->GetOffset();
+}
+FCIMPLEND
+
+FCIMPL1(void*, RuntimeFieldHandle::GetStaticFieldAddress, ReflectFieldObject *pFieldUNSAFE)
+{
+    CONTRACTL
+    {
+        FCALL_CHECK;
+        PRECONDITION(CheckPointer(pFieldUNSAFE));
+    }
+    CONTRACTL_END;
+
+    REFLECTFIELDREF refField = (REFLECTFIELDREF)ObjectToOBJECTREF(pFieldUNSAFE);
+    _ASSERTE(refField != NULL);
+
+    FieldDesc* pFieldDesc = refField->GetField();
+    _ASSERTE(pFieldDesc->IsStatic());
+
+    // IsFastPathSupported needs to checked before calling this method.
+    _ASSERTE(IsFastPathSupportedHelper(pFieldDesc));
+
+    PTR_BYTE base = 0;
+    if (!pFieldDesc->IsRVA())
+    {
+        // For RVA the base is ignored and offset is used.
+        base = pFieldDesc->GetBase();
+    }
+
+    return PTR_VOID(base + pFieldDesc->GetOffset());
+}
+FCIMPLEND
+
 extern "C" void QCALLTYPE ReflectionInvocation_CompileMethod(MethodDesc * pMD)
 {
     QCALL_CONTRACT;
@@ -1313,7 +1375,8 @@ static void PrepareMethodHelper(MethodDesc * pMD)
 // It does not walk a subset of callgraph to provide CER guarantees.
 extern "C" void QCALLTYPE ReflectionInvocation_PrepareMethod(MethodDesc *pMD, TypeHandle *pInstantiation, UINT32 cInstantiation)
 {
-    CONTRACTL {
+    CONTRACTL
+    {
         QCALL_CHECK;
         PRECONDITION(pMD != NULL);
         PRECONDITION(CheckPointer(pInstantiation, NULL_OK));
@@ -1366,7 +1429,8 @@ extern "C" void QCALLTYPE ReflectionInvocation_PrepareMethod(MethodDesc *pMD, Ty
 // was prepared prior to the Combine.
 FCIMPL1(void, ReflectionInvocation::PrepareDelegate, Object* delegateUNSAFE)
 {
-    CONTRACTL {
+    CONTRACTL
+    {
         FCALL_CHECK;
         PRECONDITION(CheckPointer(delegateUNSAFE, NULL_OK));
     }
@@ -1427,7 +1491,8 @@ FCIMPLEND
 
 FCIMPL4(void, ReflectionInvocation::MakeTypedReference, TypedByRef * value, Object* targetUNSAFE, ArrayBase* fldsUNSAFE, ReflectClassBaseObject *pFieldTypeUNSAFE)
 {
-    CONTRACTL {
+    CONTRACTL
+    {
         FCALL_CHECK;
         PRECONDITION(CheckPointer(targetUNSAFE));
         PRECONDITION(CheckPointer(fldsUNSAFE));
@@ -1699,7 +1764,8 @@ extern "C" void QCALLTYPE RuntimeTypeHandle_GetActivationInfo(
     BOOL* pfCtorIsPublic
 )
 {
-    CONTRACTL{
+    CONTRACTL
+    {
         QCALL_CHECK;
         PRECONDITION(CheckPointer(ppfnAllocator));
         PRECONDITION(CheckPointer(pvAllocatorFirstArg));
@@ -1818,7 +1884,8 @@ extern "C" void QCALLTYPE RuntimeTypeHandle_GetActivationInfo(
 FCIMPL1(Object*, RuntimeTypeHandle::AllocateComObject,
     void* pClassFactory)
 {
-    CONTRACTL{
+    CONTRACTL
+    {
         FCALL_CHECK;
         PRECONDITION(CheckPointer(pClassFactory));
     }
@@ -1867,7 +1934,8 @@ extern "C" void QCALLTYPE ReflectionSerialization_GetCreateUninitializedObjectIn
     PCODE* ppfnAllocator,
     void** pvAllocatorFirstArg)
 {
-    CONTRACTL{
+    CONTRACTL
+    {
         QCALL_CHECK;
         PRECONDITION(CheckPointer(ppfnAllocator));
         PRECONDITION(CheckPointer(pvAllocatorFirstArg));
diff --git a/src/coreclr/vm/rejit.cpp b/src/coreclr/vm/rejit.cpp
index c4f7394a9387..071fddacb3f4 100644
--- a/src/coreclr/vm/rejit.cpp
+++ b/src/coreclr/vm/rejit.cpp
@@ -508,6 +508,12 @@ HRESULT ReJitManager::UpdateActiveILVersions(
             continue;
         }
 
+        if (pModule->IsEditAndContinueEnabled())
+        {
+            ReportReJITError(pModule, rgMethodDefs[i], NULL, CORPROF_E_MODULE_IS_ENC);
+            continue;
+        }
+
         if (!pModule->GetMDImport()->IsValidToken(rgMethodDefs[i]))
         {
             ReportReJITError(pModule, rgMethodDefs[i], NULL, E_INVALIDARG);
diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h
index 71095e3cffc9..8076ee94f213 100644
--- a/src/coreclr/vm/riscv64/asmconstants.h
+++ b/src/coreclr/vm/riscv64/asmconstants.h
@@ -29,8 +29,8 @@
 #define DynamicHelperFrameFlags_ObjectArg   1
 #define DynamicHelperFrameFlags_ObjectArg2  2
 
-#define               Thread__m_fPreemptiveGCDisabled   0x0C
-#define               Thread__m_pFrame                  0x10
+#define               Thread__m_fPreemptiveGCDisabled   0x04
+#define               Thread__m_pFrame                  0x08
 
 ASMCONSTANTS_C_ASSERT(Thread__m_fPreemptiveGCDisabled == offsetof(Thread, m_fPreemptiveGCDisabled));
 ASMCONSTANTS_C_ASSERT(Thread__m_pFrame == offsetof(Thread, m_pFrame));
diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S
index 83f0484296c0..b64ac8725e15 100644
--- a/src/coreclr/vm/riscv64/asmhelpers.S
+++ b/src/coreclr/vm/riscv64/asmhelpers.S
@@ -687,23 +687,24 @@ LEAF_END JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain, _TEXT
 // ------------------------------------------------------------------
 // Hijack function for functions which return a scalar type or a struct (value type)
 NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler
-    PROLOG_SAVE_REG_PAIR_INDEXED   fp, ra, 0x90
+    PROLOG_SAVE_REG_PAIR_INDEXED   fp, ra, 0xa0
 
     // Spill callee saved registers
     PROLOG_SAVE_REG_PAIR   s1, s2, 16
     PROLOG_SAVE_REG_PAIR   s3, s4, 32
     PROLOG_SAVE_REG_PAIR   s5, s6, 48
     PROLOG_SAVE_REG_PAIR   s7, s8, 64
-    PROLOG_SAVE_REG_PAIR   s9, s10, 80 
-    PROLOG_SAVE_REG s11, 96
+    PROLOG_SAVE_REG_PAIR   s9, s10, 80
+    PROLOG_SAVE_REG_PAIR   s11, gp, 96
+    PROLOG_SAVE_REG        tp, 112
 
     // save any integral return value(s)
-    sd  a0, 104(sp)
-    sd  a1, 112(sp)
+    sd  a0, 120(sp)
+    sd  a1, 128(sp)
 
     // save any FP/HFA return value(s)
-    fsd  f0, 120(sp)
-    fsd  f1, 128(sp)
+    fsd  f0, 136(sp)
+    fsd  f1, 144(sp)
 
     addi  a0, sp, 0
     call  C_FUNC(OnHijackWorker)
@@ -711,20 +712,21 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler
     // restore callee saved registers
 
     // restore any integral return value(s)
-    ld  a0, 104(sp)
-    ld  a1, 112(sp)
+    ld  a0, 120(sp)
+    ld  a1, 128(sp)
 
     // restore any FP/HFA return value(s)
-    fld  f0, 120(sp)
-    fld  f1, 128(sp)
+    fld  f0, 136(sp)
+    fld  f1, 144(sp)
 
     EPILOG_RESTORE_REG_PAIR   s1, s2, 16
     EPILOG_RESTORE_REG_PAIR   s3, s4, 32
     EPILOG_RESTORE_REG_PAIR   s5, s6, 48
     EPILOG_RESTORE_REG_PAIR   s7, s8, 64
     EPILOG_RESTORE_REG_PAIR   s9, s10, 80
-    EPILOG_RESTORE_REG  s11, 96 
-    EPILOG_RESTORE_REG_PAIR_INDEXED  fp, ra, 0x90
+    EPILOG_RESTORE_REG_PAIR   s11, gp, 96
+    EPILOG_RESTORE_REG        tp, 112
+    EPILOG_RESTORE_REG_PAIR_INDEXED  fp, ra, 0xa0
     EPILOG_RETURN
 NESTED_END OnHijackTripThread, _TEXT
 
diff --git a/src/coreclr/vm/riscv64/cgencpu.h b/src/coreclr/vm/riscv64/cgencpu.h
index 1a3321474b1b..0900e7dd0129 100644
--- a/src/coreclr/vm/riscv64/cgencpu.h
+++ b/src/coreclr/vm/riscv64/cgencpu.h
@@ -15,6 +15,37 @@
 #define USE_REDIRECT_FOR_GCSTRESS
 #endif // TARGET_UNIX
 
+#define ENUM_CALLEE_SAVED_REGISTERS() \
+    CALLEE_SAVED_REGISTER(Fp) \
+    CALLEE_SAVED_REGISTER(Ra) \
+    CALLEE_SAVED_REGISTER(S1) \
+    CALLEE_SAVED_REGISTER(S2) \
+    CALLEE_SAVED_REGISTER(S3) \
+    CALLEE_SAVED_REGISTER(S4) \
+    CALLEE_SAVED_REGISTER(S5) \
+    CALLEE_SAVED_REGISTER(S6) \
+    CALLEE_SAVED_REGISTER(S7) \
+    CALLEE_SAVED_REGISTER(S8) \
+    CALLEE_SAVED_REGISTER(S9) \
+    CALLEE_SAVED_REGISTER(S10) \
+    CALLEE_SAVED_REGISTER(S11) \
+    CALLEE_SAVED_REGISTER(Tp) \
+    CALLEE_SAVED_REGISTER(Gp)
+
+#define ENUM_FP_CALLEE_SAVED_REGISTERS() \
+    CALLEE_SAVED_REGISTER(F[8]) \
+    CALLEE_SAVED_REGISTER(F[9]) \
+    CALLEE_SAVED_REGISTER(F[18]) \
+    CALLEE_SAVED_REGISTER(F[19]) \
+    CALLEE_SAVED_REGISTER(F[20]) \
+    CALLEE_SAVED_REGISTER(F[21]) \
+    CALLEE_SAVED_REGISTER(F[22]) \
+    CALLEE_SAVED_REGISTER(F[23]) \
+    CALLEE_SAVED_REGISTER(F[24]) \
+    CALLEE_SAVED_REGISTER(F[25]) \
+    CALLEE_SAVED_REGISTER(F[26]) \
+    CALLEE_SAVED_REGISTER(F[27])
+
 EXTERN_C void getFPReturn(int fpSize, INT64 *pRetVal);
 EXTERN_C void setFPReturn(int fpSize, INT64 retVal);
 
@@ -36,8 +67,6 @@ extern PCODE GetPreStubEntryPoint();
 
 #define HAS_NDIRECT_IMPORT_PRECODE              1
 
-#define USE_INDIRECT_CODEHEADER
-
 #define HAS_FIXUP_PRECODE                       1
 
 // ThisPtrRetBufPrecode one is necessary for closed delegates over static methods with return buffer
@@ -423,6 +452,13 @@ struct DECLSPEC_ALIGN(16) UMEntryThunkCode
 
 struct HijackArgs
 {
+    DWORD64 Fp; // frame pointer
+    union
+    {
+        DWORD64 Ra;
+        size_t ReturnAddress;
+    };
+    DWORD64 S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, Gp, Tp;
     union
     {
         struct {
@@ -439,14 +475,7 @@ struct HijackArgs
          };
         size_t FPReturnValue[2];
     };
-    DWORD64 Fp; // frame pointer
-    DWORD64 Gp, Tp, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
-    union
-    {
-        DWORD64 Ra;
-        size_t ReturnAddress;
-    };
- };
+};
 
 // Precode to shuffle this and retbuf for closed delegates over static methods with return buffer
 struct ThisPtrRetBufPrecode {
diff --git a/src/coreclr/vm/riscv64/crthelpers.S b/src/coreclr/vm/riscv64/crthelpers.S
deleted file mode 100644
index 3151387b3caf..000000000000
--- a/src/coreclr/vm/riscv64/crthelpers.S
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#include "unixasmmacros.inc"
-
-// JIT_MemSet/JIT_MemCpy
-//
-// It is IMPORTANT that the exception handling code is able to find these guys
-// on the stack, but on non-windows platforms we can just defer to the platform
-// implementation.
-//
-LEAF_ENTRY JIT_MemSet, _TEXT
-    beq  a2, zero, LOCAL_LABEL(JIT_MemSet_ret)
-
-    lb  zero, 0(a0) // Is this really needed ?
-
-    tail memset
-
-LOCAL_LABEL(JIT_MemSet_ret):
-    ret
-LEAF_END_MARKED JIT_MemSet, _TEXT
-
-////NOTE: Here must use LEAF_END_MARKED!  not LEAF_END !!!
-LEAF_ENTRY JIT_MemCpy, _TEXT
-    beq  a2, zero, LOCAL_LABEL(JIT_MemCpy_ret)
-
-    lb  zero, 0(a0)
-    lb  zero, 0(a1) // Is this really needed ?
-
-    tail memcpy 
-
-LOCAL_LABEL(JIT_MemCpy_ret):
-    ret
-
-////NOTE: Here must use LEAF_END_MARKED!  not LEAF_END !!!
-LEAF_END_MARKED JIT_MemCpy, _TEXT
diff --git a/src/coreclr/vm/riscv64/gmscpu.h b/src/coreclr/vm/riscv64/gmscpu.h
index 6506b10b8f75..9330e81e773c 100644
--- a/src/coreclr/vm/riscv64/gmscpu.h
+++ b/src/coreclr/vm/riscv64/gmscpu.h
@@ -39,8 +39,7 @@ struct LazyMachState : public MachState{
     static void unwindLazyState(LazyMachState* baseState,
                                 MachState* lazyState,
                                 DWORD threadId,
-                                int funCallDepth = 1,
-                                HostCallPreference hostCallPreference = AllowHostCalls);
+                                int funCallDepth = 1);
 };
 
 inline void LazyMachState::setLazyStateFromUnwind(MachState* copy)
diff --git a/src/coreclr/vm/riscv64/stubs.cpp b/src/coreclr/vm/riscv64/stubs.cpp
index 0f0273da0a47..ebc0d0495c2a 100644
--- a/src/coreclr/vm/riscv64/stubs.cpp
+++ b/src/coreclr/vm/riscv64/stubs.cpp
@@ -180,8 +180,7 @@ void ClearRegDisplayArgumentAndScratchRegisters(REGDISPLAY * pRD)
 void LazyMachState::unwindLazyState(LazyMachState* baseState,
                                     MachState* unwoundstate,
                                     DWORD threadId,
-                                    int funCallDepth,
-                                    HostCallPreference hostCallPreference)
+                                    int funCallDepth)
 {
     T_CONTEXT context;
     T_KNONVOLATILE_CONTEXT_POINTERS nonVolContextPtrs;
@@ -266,20 +265,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
         {
             // Determine  whether given IP resides in JITted code. (It returns nonzero in that case.)
             // Use it now to see if we've unwound to managed code yet.
-            BOOL fFailedReaderLock = FALSE;
-            BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc, hostCallPreference, &fFailedReaderLock);
-            if (fFailedReaderLock)
-            {
-                // We don't know if we would have been able to find a JIT
-                // manager, because we couldn't enter the reader lock without
-                // yielding (and our caller doesn't want us to yield).  So abort
-                // now.
-
-                // Invalidate the lazyState we're returning, so the caller knows
-                // we aborted before we could fully unwind
-                unwoundstate->_isValid = false;
-                return;
-            }
+            BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc);
 
             if (fIsManagedCode)
                 break;
@@ -344,7 +330,7 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState,
     unwoundstate->_isValid = TRUE;
 }
 
-void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACTL
     {
@@ -355,6 +341,14 @@ void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     }
     CONTRACTL_END;
 
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+        _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress());
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 
@@ -534,8 +528,16 @@ void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegis
     pContextPointers->Ra  = (PDWORD64)&pCalleeSaved->ra;
 }
 
-void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+        _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress());
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;        // Don't add usage of this field.  This is only temporary.
 
@@ -557,7 +559,7 @@ void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    TransitionFrame::UpdateRegDisplay(pc:%p, sp:%p)\n", pRD->ControlPC, pRD->SP));
 }
 
-void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     LIMITED_METHOD_DAC_CONTRACT;
 
@@ -593,7 +595,7 @@ void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    FaultingExceptionFrame::UpdateRegDisplay(pc:%p, sp:%p)\n", pRD->ControlPC, pRD->SP));
 }
 
-void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
@@ -602,7 +604,6 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
 #ifdef PROFILING_SUPPORTED
         PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this));
 #endif
-        HOST_NOCALLS;
         MODE_ANY;
         SUPPORTS_DAC;
     }
@@ -614,6 +615,13 @@ void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
         return;
     }
 
+#ifndef DACCESS_COMPILE
+    if (updateFloats)
+    {
+        UpdateFloatingPointRegisters(pRD);
+    }
+#endif // DACCESS_COMPILE
+
     pRD->IsCallerContextValid = FALSE;
     pRD->IsCallerSPValid      = FALSE;
 
@@ -659,7 +667,7 @@ TADDR ResumableFrame::GetReturnAddressPtr(void)
     return dac_cast<TADDR>(m_Regs) + offsetof(T_CONTEXT, Pc);
 }
 
-void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     CONTRACT_VOID
     {
@@ -716,7 +724,7 @@ void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
     RETURN;
 }
 
-void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD, bool updateFloats)
 {
     LIMITED_METHOD_CONTRACT;
 
@@ -1909,7 +1917,7 @@ PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator,
             p += 4;
         }
 
-        BYTE* pBLTCall = NULL;
+        BYTE* pBLECall = NULL;
 
         for (WORD i = 0; i < pLookup->indirections; i++)
         {
@@ -1939,8 +1947,8 @@ PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator,
                 p += 4;
                 *(DWORD*)p = ITypeInstr(0x13, 0, RegT4, RegT4, slotOffset & 0xfff);// addi  t4, t4, (slotOffset&0xfff)
                 p += 4;
-                // blt  t4, t5, CALL HELPER
-                pBLTCall = p;       // Offset filled later
+                // bge  t4, t5, CALL HELPER
+                pBLECall = p;       // Offset filled later
                 p += 4;
             }
 
@@ -1982,8 +1990,8 @@ PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator,
             p += 4;
 
             // CALL HELPER:
-            if (pBLTCall != NULL)
-                *(DWORD*)pBLTCall = BTypeInstr(0x63, 0x4, RegT4, RegT5, (UINT32)(p - pBLTCall));
+            if (pBLECall != NULL)
+                *(DWORD*)pBLECall = BTypeInstr(0x63, 0x5, RegT4, RegT5, (UINT32)(p - pBLECall));
 
             *(DWORD*)p = ITypeInstr(0x13, 0, RegA0, RegT2, 0);// addi  a0, t2, 0
             p += 4;
diff --git a/src/coreclr/vm/rtlfunctions.cpp b/src/coreclr/vm/rtlfunctions.cpp
index f3f80338f3f8..0e77e31a02d1 100644
--- a/src/coreclr/vm/rtlfunctions.cpp
+++ b/src/coreclr/vm/rtlfunctions.cpp
@@ -16,7 +16,7 @@
 #include "rtlfunctions.h"
 
 
-#ifdef TARGET_AMD64
+#ifdef HOST_AMD64
 
 RtlVirtualUnwindFn*                 RtlVirtualUnwind_Unsafe         = NULL;
 
@@ -45,7 +45,7 @@ HRESULT EnsureRtlFunctions()
     return S_OK;
 }
 
-#else // TARGET_AMD64
+#else // HOST_AMD64
 
 HRESULT EnsureRtlFunctions()
 {
@@ -53,9 +53,9 @@ HRESULT EnsureRtlFunctions()
     return S_OK;
 }
 
-#endif // TARGET_AMD64
+#endif // HOST_AMD64
 
-#if defined(FEATURE_EH_FUNCLETS)
+#ifndef HOST_X86
 
 VOID InstallEEFunctionTable (
         PVOID pvTableID,
@@ -127,5 +127,4 @@ VOID InstallEEFunctionTable (
     }
 }
 
-#endif // FEATURE_EH_FUNCLETS
-
+#endif // HOST_X86
diff --git a/src/coreclr/vm/rtlfunctions.h b/src/coreclr/vm/rtlfunctions.h
index 6d9ff9689ca4..ef0f64cba9db 100644
--- a/src/coreclr/vm/rtlfunctions.h
+++ b/src/coreclr/vm/rtlfunctions.h
@@ -49,7 +49,7 @@ PVOID DecodeDynamicFunctionTableContext (PVOID pvContext)
 #endif // FEATURE_EH_FUNCLETS
 
 
-#if defined(FEATURE_EH_FUNCLETS) && !defined(DACCESS_COMPILE) && !defined(TARGET_UNIX)
+#if !defined(DACCESS_COMPILE) && defined(HOST_WINDOWS) && !defined(HOST_X86)
 
 // Wrapper for RtlInstallFunctionTableCallback.
 VOID InstallEEFunctionTable(
@@ -67,12 +67,12 @@ VOID DeleteEEFunctionTable(
     RtlDeleteFunctionTable((PT_RUNTIME_FUNCTION)((ULONG64)pvTableID | 3));
 }
 
-#else // FEATURE_EH_FUNCLETS && !DACCESS_COMPILE && !TARGET_UNIX
+#else
 
 #define InstallEEFunctionTable(pvTableID, pvStartRange, cbRange, pfnGetRuntimeFunctionCallback, pvContext, TableType) do { } while (0)
 #define DeleteEEFunctionTable(pvTableID) do { } while (0)
 
-#endif // FEATURE_EH_FUNCLETS && !DACCESS_COMPILE && !TARGET_UNIX
+#endif
 
 
 #endif // !__RTLFUNCTIONS_H__
diff --git a/src/coreclr/vm/runtimecallablewrapper.cpp b/src/coreclr/vm/runtimecallablewrapper.cpp
index 67e8a2833598..aa28bfa6d837 100644
--- a/src/coreclr/vm/runtimecallablewrapper.cpp
+++ b/src/coreclr/vm/runtimecallablewrapper.cpp
@@ -1227,16 +1227,6 @@ HRESULT RCWCleanupList::ReleaseRCWListInCorrectCtx(LPVOID pData)
 
     LPVOID pCurrCtxCookie = GetCurrentCtxCookie();
 
-    // If we are releasing our IP's as a result of shutdown, we MUST not transition
-    // into cooperative GC mode. This "fix" will prevent us from doing so.
-    if (g_fEEShutDown & ShutDown_Finalize2)
-    {
-        Thread *pThread = GetThreadNULLOk();
-        if (pThread && !FinalizerThread::IsCurrentThreadFinalizer())
-            pThread->SetThreadStateNC(Thread::TSNC_UnsafeSkipEnterCooperative);
-    }
-
-
     // Make sure we're in the right context / apartment.
     // Also - if we've already transitioned once, we don't want to do so again.
     //  If the cookie exists in multiple MTA apartments, and the STA has gone away
@@ -1268,14 +1258,6 @@ HRESULT RCWCleanupList::ReleaseRCWListInCorrectCtx(LPVOID pData)
         }
     }
 
-    // Reset the bit indicating we cannot transition into cooperative GC mode.
-    if (g_fEEShutDown & ShutDown_Finalize2)
-    {
-        Thread *pThread = GetThreadNULLOk();
-        if (pThread && !FinalizerThread::IsCurrentThreadFinalizer())
-            pThread->ResetThreadStateNC(Thread::TSNC_UnsafeSkipEnterCooperative);
-    }
-
     return S_OK;
 }
 
@@ -1559,7 +1541,6 @@ void RCW::RemoveMemoryPressure()
         NOTHROW;
         GC_TRIGGERS;
         MODE_PREEMPTIVE;
-        PRECONDITION((GetThread()->m_StateNC & Thread::TSNC_UnsafeSkipEnterCooperative) == 0);
     }
     CONTRACTL_END;
 
@@ -1771,7 +1752,6 @@ void RCW::Cleanup()
     // if the wrapper is still in the cache.  Also, if we can't switch to coop mode,
     // we're guaranteed to have already decoupled the RCW from its object.
 #ifdef _DEBUG
-    if (!(GetThread()->m_StateNC & Thread::TSNC_UnsafeSkipEnterCooperative))
     {
         GCX_COOP();
 
@@ -1789,9 +1769,7 @@ void RCW::Cleanup()
         ReleaseAllInterfacesCallBack(this);
 
         // Remove the memory pressure caused by this RCW (if present)
-        // If we're in a shutdown situation, we can ignore the memory pressure.
-        if ((GetThread()->m_StateNC & Thread::TSNC_UnsafeSkipEnterCooperative) == 0 && !g_fForbidEnterEE)
-            RemoveMemoryPressure();
+        RemoveMemoryPressure();
     }
 
 #ifdef _DEBUG
diff --git a/src/coreclr/vm/runtimehandles.h b/src/coreclr/vm/runtimehandles.h
index 694a25a30623..6b0d995977c6 100644
--- a/src/coreclr/vm/runtimehandles.h
+++ b/src/coreclr/vm/runtimehandles.h
@@ -201,8 +201,6 @@ extern "C" void QCALLTYPE RuntimeTypeHandle_RegisterCollectibleTypeDependency(QC
 class RuntimeMethodHandle {
 
 public:
-    static FCDECL1(ReflectMethodObject*, GetCurrentMethod, StackCrawlMark* stackMark);
-
     static FCDECL4(Object*, InvokeMethod, Object *target, PVOID* args, SignatureNative* pSig, CLR_BOOL fConstructor);
 
     static FCDECL2(Object*, ReboxToNullable, Object *pBoxedValUNSAFE, ReflectClassBaseObject *pDestUNSAFE);
@@ -275,6 +273,9 @@ class RuntimeMethodHandle {
     static FCDECL1(Object*, GetLoaderAllocator, MethodDesc *pMethod);
 };
 
+
+extern "C" MethodDesc* QCALLTYPE MethodBase_GetCurrentMethod(QCall::StackCrawlMarkHandle stackMark);
+
 extern "C" BOOL QCALLTYPE RuntimeMethodHandle_IsCAVisibleFromDecoratedType(
         QCall::TypeHandle targetTypeHandle,
         MethodDesc * pTargetCtor,
@@ -292,10 +293,13 @@ extern "C" void QCALLTYPE RuntimeMethodHandle_Destroy(MethodDesc * pMethod);
 class RuntimeFieldHandle {
 
 public:
-    static FCDECL5(Object*, GetValue, ReflectFieldObject *pFieldUNSAFE, Object *instanceUNSAFE, ReflectClassBaseObject *pFieldType, ReflectClassBaseObject *pDeclaringType, CLR_BOOL *pDomainInitialized);
-    static FCDECL7(void, SetValue, ReflectFieldObject *pFieldUNSAFE, Object *targetUNSAFE, Object *valueUNSAFE, ReflectClassBaseObject *pFieldType, DWORD attr, ReflectClassBaseObject *pDeclaringType, CLR_BOOL *pDomainInitialized);
+    static FCDECL5(Object*, GetValue, ReflectFieldObject *pFieldUNSAFE, Object *instanceUNSAFE, ReflectClassBaseObject *pFieldType, ReflectClassBaseObject *pDeclaringType, CLR_BOOL *pIsClassInitialized);
+    static FCDECL6(void, SetValue, ReflectFieldObject *pFieldUNSAFE, Object *targetUNSAFE, Object *valueUNSAFE, ReflectClassBaseObject *pFieldType, ReflectClassBaseObject *pDeclaringType, CLR_BOOL *pIsClassInitialized);
     static FCDECL4(Object*, GetValueDirect, ReflectFieldObject *pFieldUNSAFE, ReflectClassBaseObject *pFieldType, TypedByRef *pTarget, ReflectClassBaseObject *pDeclaringType);
     static FCDECL5(void, SetValueDirect, ReflectFieldObject *pFieldUNSAFE, ReflectClassBaseObject *pFieldType, TypedByRef *pTarget, Object *valueUNSAFE, ReflectClassBaseObject *pContextType);
+    static FCDECL1(FC_BOOL_RET, IsFastPathSupported, ReflectFieldObject *pField);
+    static FCDECL1(INT32, GetInstanceFieldOffset, ReflectFieldObject *pField);
+    static FCDECL1(void*, GetStaticFieldAddress, ReflectFieldObject *pField);
     static FCDECL1(StringObject*, GetName, ReflectFieldObject *pFieldUNSAFE);
     static FCDECL1(LPCUTF8, GetUtf8Name, FieldDesc *pField);
 
diff --git a/src/coreclr/vm/safehandle.cpp b/src/coreclr/vm/safehandle.cpp
deleted file mode 100644
index 38ee027a41f2..000000000000
--- a/src/coreclr/vm/safehandle.cpp
+++ /dev/null
@@ -1,220 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-//
-
-/*============================================================
-**
-** Class:  SafeHandle
-**
-**
-** Purpose: The unmanaged implementation of the SafeHandle
-**          class
-**
-===========================================================*/
-
-#include "common.h"
-#include "vars.hpp"
-#include "object.h"
-#include "excep.h"
-#include "frames.h"
-#include "eecontract.h"
-#include "typestring.h"
-
-WORD SafeHandle::s_IsInvalidHandleMethodSlot = MethodTable::NO_SLOT;
-WORD SafeHandle::s_ReleaseHandleMethodSlot = MethodTable::NO_SLOT;
-
-void SafeHandle::Init()
-{
-    CONTRACTL {
-        THROWS;
-        GC_TRIGGERS;
-        MODE_ANY;
-    } CONTRACTL_END;
-
-    // For reliability purposes, we need to eliminate all possible failure
-    // points before making a call to a CER method. IsInvalidHandle, and
-    // ReleaseHandle methods are critical calls that are already prepared (code:
-    // PrepareCriticalFinalizerObject). As a performance optimization, we are
-    // calling these methods through a fast macro that assumes the method slot
-    // has been already cached. Since figuring out the method slot for these 2
-    // methods involves calling .GetMethod which can fail, we are doing this
-    // eagerly here, Otherwise we will have to do it at the time of the call,
-    // and this could be at risk if .GetMethod failed.
-    MethodDesc* pMD = CoreLibBinder::GetMethod(METHOD__SAFE_HANDLE__GET_IS_INVALID);
-    s_IsInvalidHandleMethodSlot = pMD->GetSlot();
-
-    pMD = CoreLibBinder::GetMethod(METHOD__SAFE_HANDLE__RELEASE_HANDLE);
-    s_ReleaseHandleMethodSlot = pMD->GetSlot();
-}
-
-// These AddRef and Release methods (and supporting functions) also exist with equivalent
-// code in SafeHandle.cs.  Those implementations are the primary ones used by most code
-// and exposed publicly; the implementations here are only for use by the runtime, without
-// having to call out to the managed implementations.
-
-void SafeHandle::AddRef()
-{
-    CONTRACTL {
-        THROWS;
-        GC_TRIGGERS;
-        MODE_COOPERATIVE;
-        INSTANCE_CHECK;
-    } CONTRACTL_END;
-
-    // Cannot use "this" after Release, which toggles the GC mode.
-    SAFEHANDLEREF sh(this);
-
-    _ASSERTE(sh->IsFullyInitialized());
-
-    // See comments in SafeHandle.cs
-
-    INT32 oldState, newState;
-    do {
-
-        oldState = sh->m_state;
-
-        if (oldState & SH_State_Closed)
-            COMPlusThrow(kObjectDisposedException, IDS_EE_SAFEHANDLECLOSED);
-
-        newState = oldState + SH_RefCountOne;
-
-    } while (InterlockedCompareExchange((LONG*)&sh->m_state, newState, oldState) != oldState);
-}
-
-void SafeHandle::Release(bool fDispose)
-{
-    CONTRACTL {
-        THROWS;
-        GC_TRIGGERS;
-        MODE_COOPERATIVE;
-        INSTANCE_CHECK;
-    } CONTRACTL_END;
-
-    // Cannot use "this" after RunReleaseMethod, which toggles the GC mode.
-    SAFEHANDLEREF sh(this);
-
-    _ASSERTE(sh->IsFullyInitialized());
-
-    // See comments in SafeHandle.cs
-
-    bool fPerformRelease = false;
-
-    INT32 oldState, newState;
-    do {
-
-        oldState = sh->m_state;
-        if (fDispose && (oldState & SH_State_Disposed))
-            return;
-
-        if ((oldState & SH_State_RefCount) == 0)
-            COMPlusThrow(kObjectDisposedException, IDS_EE_SAFEHANDLECLOSED);
-
-        fPerformRelease = ((oldState & (SH_State_RefCount | SH_State_Closed)) == SH_RefCountOne) && m_ownsHandle;
-
-        if (fPerformRelease)
-        {
-            GCPROTECT_BEGIN(sh);
-
-            CLR_BOOL fIsInvalid = FALSE;
-
-            DECLARE_ARGHOLDER_ARRAY(args, 1);
-            args[ARGNUM_0] = OBJECTREF_TO_ARGHOLDER(sh);
-
-            PREPARE_SIMPLE_VIRTUAL_CALLSITE_USING_SLOT(s_IsInvalidHandleMethodSlot, sh);
-
-            CRITICAL_CALLSITE;
-            CALL_MANAGED_METHOD(fIsInvalid, CLR_BOOL, args);
-
-            if (fIsInvalid)
-            {
-                fPerformRelease = false;
-            }
-
-            GCPROTECT_END();
-        }
-
-        newState = (oldState - SH_RefCountOne) |
-                   ((oldState & SH_State_RefCount) == SH_RefCountOne ? SH_State_Closed : 0) |
-                   (fDispose ? SH_State_Disposed : 0);
-
-    } while (InterlockedCompareExchange((LONG*)&sh->m_state, newState, oldState) != oldState);
-
-    if (fPerformRelease)
-        RunReleaseMethod((SafeHandle*) OBJECTREFToObject(sh));
-}
-
-void SafeHandle::SetHandle(LPVOID handle)
-{
-    CONTRACTL {
-        THROWS;
-        MODE_COOPERATIVE;
-        INSTANCE_CHECK;
-    } CONTRACTL_END;
-
-    _ASSERTE(IsFullyInitialized());
-
-    // The SafeHandle's handle field can only be set it if the SafeHandle isn't
-    // closed or disposed and its ref count is 1.
-    if (m_state != (LONG)SH_RefCountOne)
-        COMPlusThrow(kObjectDisposedException, IDS_EE_SAFEHANDLECANNOTSETHANDLE);
-
-    m_handle = handle;
-}
-
-void AcquireSafeHandle(SAFEHANDLEREF* s)
-{
-    WRAPPER_NO_CONTRACT;
-    GCX_COOP();
-    _ASSERTE(s != NULL && *s != NULL);
-    (*s)->AddRef();
-}
-
-void ReleaseSafeHandle(SAFEHANDLEREF* s)
-{
-    WRAPPER_NO_CONTRACT;
-    GCX_COOP();
-    _ASSERTE(s != NULL && *s != NULL);
-    (*s)->Release(false);
-}
-
-
-// This could theoretically be an instance method, but we'd need to
-// somehow GC protect the this pointer or never dereference any
-// field within the object.  It's a lot simpler if we simply make
-// this method static.
-void SafeHandle::RunReleaseMethod(SafeHandle* psh)
-{
-    CONTRACTL {
-        THROWS;
-        GC_TRIGGERS;
-        MODE_COOPERATIVE;
-    } CONTRACTL_END;
-
-    SAFEHANDLEREF sh(psh);
-    _ASSERTE(sh != NULL);
-    _ASSERTE(sh->m_ownsHandle);
-    _ASSERTE(sh->IsFullyInitialized());
-
-    GCPROTECT_BEGIN(sh);
-
-    // Save last error from P/Invoke in case the implementation of ReleaseHandle
-    // trashes it (important because this ReleaseHandle could occur implicitly
-    // as part of unmarshaling another P/Invoke).
-    Thread *pThread = GetThread();
-    DWORD dwSavedError = pThread->m_dwLastError;
-
-    CLR_BOOL fReleaseHandle = FALSE;
-
-    DECLARE_ARGHOLDER_ARRAY(args, 1);
-    args[ARGNUM_0] = OBJECTREF_TO_ARGHOLDER(sh);
-
-    PREPARE_SIMPLE_VIRTUAL_CALLSITE_USING_SLOT(s_ReleaseHandleMethodSlot, sh);
-
-    CRITICAL_CALLSITE;
-    CALL_MANAGED_METHOD(fReleaseHandle, CLR_BOOL, args);
-
-    pThread->m_dwLastError = dwSavedError;
-
-    GCPROTECT_END();
-}
diff --git a/src/coreclr/vm/siginfo.cpp b/src/coreclr/vm/siginfo.cpp
index 115b5454bf59..531f6d95f686 100644
--- a/src/coreclr/vm/siginfo.cpp
+++ b/src/coreclr/vm/siginfo.cpp
@@ -4813,9 +4813,11 @@ BOOL MetaSig::CompareVariableConstraints(const Substitution *pSubst1,
             if ((specialConstraints2 & (gpDefaultConstructorConstraint | gpNotNullableValueTypeConstraint)) == 0)
                 return FALSE;
         }
-        if ((specialConstraints1 & gpAcceptByRefLike) != 0)
+
+        // Constraints that 'allow' must check the overridden first
+        if ((specialConstraints2 & gpAllowByRefLike) != 0)
         {
-            if ((specialConstraints2 & gpAcceptByRefLike) == 0)
+            if ((specialConstraints1 & gpAllowByRefLike) == 0)
                 return FALSE;
         }
     }
diff --git a/src/coreclr/vm/siginfo.hpp b/src/coreclr/vm/siginfo.hpp
index a0ec6b3d4a26..fab9a79260d2 100644
--- a/src/coreclr/vm/siginfo.hpp
+++ b/src/coreclr/vm/siginfo.hpp
@@ -394,7 +394,7 @@ class Substitution
 
     Substitution(
         ModuleBase *         pModuleArg,
-        const SigPointer &   sigInst,
+        SigPointer           sigInst,
         const Substitution * pNextSubstitution)
     {
         LIMITED_METHOD_CONTRACT;
diff --git a/src/coreclr/vm/spinlock.cpp b/src/coreclr/vm/spinlock.cpp
index e135dcd7945f..e750ebab5d52 100644
--- a/src/coreclr/vm/spinlock.cpp
+++ b/src/coreclr/vm/spinlock.cpp
@@ -33,7 +33,11 @@ SpinLock::SpinLock()
     STATIC_CONTRACT_NOTHROW;
     STATIC_CONTRACT_GC_NOTRIGGER;
 
+    m_lock = 0;
+
+#ifdef _DEBUG
     m_Initialized = UnInitialized;
+#endif
 }
 
 void SpinLock::Init(LOCK_TYPE type, bool RequireCoopGC)
@@ -45,6 +49,7 @@ void SpinLock::Init(LOCK_TYPE type, bool RequireCoopGC)
     }
     CONTRACTL_END;
 
+#ifdef _DEBUG
     if (m_Initialized == Initialized)
     {
         _ASSERTE (type == m_LockType);
@@ -72,17 +77,12 @@ void SpinLock::Init(LOCK_TYPE type, bool RequireCoopGC)
         }
     }
 
-    {
-        m_lock = 0;
-    }
-
-#ifdef _DEBUG
     m_LockType = type;
     m_requireCoopGCMode = RequireCoopGC;
-#endif
 
     _ASSERTE (m_Initialized == BeingInitialized);
     m_Initialized = Initialized;
+#endif
 }
 
 #ifdef _DEBUG
diff --git a/src/coreclr/vm/spinlock.h b/src/coreclr/vm/spinlock.h
index 7601d2341d3b..b407d1c13556 100644
--- a/src/coreclr/vm/spinlock.h
+++ b/src/coreclr/vm/spinlock.h
@@ -153,6 +153,7 @@ class SpinLock
         LONG                m_lock;     // LONG used in interlocked exchange
     };
 
+#ifdef _DEBUG
     enum SpinLockState
     {
         UnInitialized,
@@ -163,7 +164,6 @@ class SpinLock
     Volatile<SpinLockState>      m_Initialized; // To verify initialized
                                         // And initialize once
 
-#ifdef _DEBUG
     LOCK_TYPE           m_LockType;     // lock type to track statistics
 
     // Check for dead lock situation.
diff --git a/src/coreclr/vm/stackingallocator.cpp b/src/coreclr/vm/stackingallocator.cpp
index 286c4d09e5fd..7db829eb1b44 100644
--- a/src/coreclr/vm/stackingallocator.cpp
+++ b/src/coreclr/vm/stackingallocator.cpp
@@ -188,7 +188,7 @@ bool StackingAllocator::AllocNewBlockForBytes(unsigned n)
         // request is larger than MaxBlockSize then allocate exactly that
         // amount.
         unsigned lower = MinBlockSize;
-        size_t allocSize = sizeof(StackBlock) + max(n, min(max(n * 4, lower), MaxBlockSize));
+        size_t allocSize = sizeof(StackBlock) + max(n, min(max(n * 4, lower), (unsigned)MaxBlockSize));
 
         // Allocate the block.
         // <TODO>@todo: Is it worth implementing a non-thread safe standard heap for
@@ -427,24 +427,33 @@ void * __cdecl operator new[](size_t n, StackingAllocator * alloc, const NoThrow
     return alloc->UnsafeAllocNoThrow((unsigned)n);
 }
 
+thread_local StackingAllocator* StackingAllocatorHolder::t_currentStackingAllocator = nullptr;
+
 StackingAllocatorHolder::~StackingAllocatorHolder()
 {
     m_pStackingAllocator->Collapse(m_checkpointMarker);
     if (m_owner)
     {
-        m_thread->m_stackLocalAllocator = NULL;
+        t_currentStackingAllocator = NULL;
         m_pStackingAllocator->~StackingAllocator();
     }
 }
 
-StackingAllocatorHolder::StackingAllocatorHolder(StackingAllocator *pStackingAllocator, Thread *pThread, bool owner) :
+StackingAllocatorHolder::StackingAllocatorHolder(StackingAllocator *pStackingAllocator, bool owner) :
     m_pStackingAllocator(pStackingAllocator),
     m_checkpointMarker(pStackingAllocator->GetCheckpoint()),
-    m_thread(pThread),
     m_owner(owner)
 {
+    _ASSERTE(pStackingAllocator != nullptr);
+    _ASSERTE((t_currentStackingAllocator == nullptr) == m_owner);
     if (m_owner)
     {
-        m_thread->m_stackLocalAllocator = pStackingAllocator;
+        t_currentStackingAllocator = pStackingAllocator;
     }
 }
+
+
+StackingAllocator* StackingAllocatorHolder::GetCurrentThreadStackingAllocator()
+{
+    return t_currentStackingAllocator;
+}
diff --git a/src/coreclr/vm/stackingallocator.h b/src/coreclr/vm/stackingallocator.h
index 2753de73908b..c306e1b482d3 100644
--- a/src/coreclr/vm/stackingallocator.h
+++ b/src/coreclr/vm/stackingallocator.h
@@ -224,14 +224,13 @@ private :
 };
 
 #define ACQUIRE_STACKING_ALLOCATOR(stackingAllocatorName)  \
-  Thread *pThread__ACQUIRE_STACKING_ALLOCATOR = GetThread(); \
-  StackingAllocator *stackingAllocatorName = pThread__ACQUIRE_STACKING_ALLOCATOR->m_stackLocalAllocator; \
+  StackingAllocator *stackingAllocatorName = StackingAllocatorHolder::GetCurrentThreadStackingAllocator(); \
   bool allocatorOwner__ACQUIRE_STACKING_ALLOCATOR = false; \
   NewArrayHolder<char> heapAllocatedStackingBuffer__ACQUIRE_STACKING_ALLOCATOR; \
 \
   if (stackingAllocatorName == NULL) \
   { \
-      if (pThread__ACQUIRE_STACKING_ALLOCATOR->CheckCanUseStackAlloc()) \
+      if (GetThread()->CheckCanUseStackAlloc()) \
       { \
           stackingAllocatorName = new (_alloca(sizeof(StackingAllocator))) StackingAllocator; \
       } \
@@ -245,21 +244,28 @@ private :
       }\
       allocatorOwner__ACQUIRE_STACKING_ALLOCATOR = true; \
   } \
-  StackingAllocatorHolder sah_ACQUIRE_STACKING_ALLOCATOR(stackingAllocatorName, pThread__ACQUIRE_STACKING_ALLOCATOR, allocatorOwner__ACQUIRE_STACKING_ALLOCATOR)
+  StackingAllocatorHolder sah_ACQUIRE_STACKING_ALLOCATOR(stackingAllocatorName, allocatorOwner__ACQUIRE_STACKING_ALLOCATOR)
 
-class Thread;
 class StackingAllocatorHolder
 {
+    // Allocator used during marshaling for temporary buffers, much faster than
+    // heap allocation.
+    //
+    // Uses of this allocator should be effectively statically scoped, i.e. a "region"
+    // is started using a CheckPointHolder and GetCheckpoint, and this region can then be used for allocations
+    // from that point onwards, and then all memory is reclaimed when the static scope for the
+    // checkpoint is exited by the running thread.
+    static thread_local StackingAllocator* t_currentStackingAllocator;
     StackingAllocator *m_pStackingAllocator;
     void* m_checkpointMarker;
-    Thread* m_thread;
     bool m_owner;
 
     public:
     ~StackingAllocatorHolder();
-    StackingAllocatorHolder(StackingAllocator *pStackingAllocator, Thread *pThread, bool owner);
+    StackingAllocatorHolder(StackingAllocator *pStackingAllocator, bool owner);
     StackingAllocator *GetStackingAllocator() { return m_pStackingAllocator; }
     StackingAllocator &operator->() { return *m_pStackingAllocator; }
+    static StackingAllocator* GetCurrentThreadStackingAllocator();
 };
 
 
diff --git a/src/coreclr/vm/stackwalk.cpp b/src/coreclr/vm/stackwalk.cpp
index 92a0cd9dc76b..56e76cdf4949 100644
--- a/src/coreclr/vm/stackwalk.cpp
+++ b/src/coreclr/vm/stackwalk.cpp
@@ -30,7 +30,6 @@ CrawlFrame::CrawlFrame()
     LIMITED_METHOD_DAC_CONTRACT;
     pCurGSCookie = NULL;
     pFirstGSCookie = NULL;
-    isCachedMethod = FALSE;
 }
 
 Assembly* CrawlFrame::GetAssembly()
@@ -995,11 +994,8 @@ StackWalkAction Thread::StackWalkFrames(PSTACKWALKFRAMESCALLBACK pCallback,
     {
         // Initialize the context
         memset(&ctx, 0x00, sizeof(T_CONTEXT));
-        SetIP(&ctx, 0);
-        SetSP(&ctx, 0);
-        SetFP(&ctx, 0);
         LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK    starting with partial context\n"));
-        FillRegDisplay(&rd, &ctx);
+        FillRegDisplay(&rd, &ctx, !!(flags & LIGHTUNWIND));
     }
 
 #ifdef STACKWALKER_MAY_POP_FRAMES
@@ -1010,18 +1006,6 @@ StackWalkAction Thread::StackWalkFrames(PSTACKWALKFRAMESCALLBACK pCallback,
     return StackWalkFramesEx(&rd, pCallback, pData, flags, pStartFrame);
 }
 
-StackWalkAction StackWalkFunctions(Thread * thread,
-                                   PSTACKWALKFRAMESCALLBACK pCallback,
-                                   VOID * pData)
-{
-    // Note: there are cases (i.e., exception handling) where we may never return from this function. This means
-    // that any C++ destructors pushed in this function will never execute, and it means that this function can
-    // never have a dynamic contract.
-    STATIC_CONTRACT_WRAPPER;
-
-    return thread->StackWalkFrames(pCallback, pData, FUNCTIONSONLY);
-}
-
 // ----------------------------------------------------------------------------
 // StackFrameIterator::StackFrameIterator
 //
@@ -1114,6 +1098,7 @@ void StackFrameIterator::CommonCtor(Thread * pThread, PTR_Frame pFrame, ULONG32
     m_forceReportingWhileSkipping = ForceGCReportingStage::Off;
     m_movedPastFirstExInfo = false;
     m_fFuncletNotSeen = false;
+    m_fFoundFirstFunclet = false;
 #if defined(RECORD_RESUMABLE_FRAME_SP)
     m_pvResumableFrameTargetSP = NULL;
 #endif
@@ -1203,7 +1188,8 @@ BOOL StackFrameIterator::Init(Thread *    pThread,
 
     m_crawl.pRD = pRegDisp;
 
-    m_codeManFlags = (ICodeManagerFlags)((flags & QUICKUNWIND) ? 0 : UpdateAllRegs);
+    m_codeManFlags = (ICodeManagerFlags)
+        (((flags & (QUICKUNWIND | LIGHTUNWIND)) ? 0 : UpdateAllRegs) | ((flags & LIGHTUNWIND) ? LightUnwind : 0));
     m_scanFlag = ExecutionManager::GetScanFlags();
 
 #if defined(ELIMINATE_FEF)
@@ -1234,6 +1220,12 @@ BOOL StackFrameIterator::Init(Thread *    pThread,
 
     // process the REGDISPLAY and stop at the first frame
     ProcessIp(GetControlPC(m_crawl.pRD));
+#ifdef FEATURE_EH_FUNCLETS
+    if (m_crawl.isFrameless && !!(m_crawl.pRD->pCurrentContext->ContextFlags & CONTEXT_EXCEPTION_ACTIVE))
+    {
+        m_crawl.hasFaulted = true;
+    }
+#endif // FEATURE_EH_FUNCLETS
     ProcessCurrentFrame();
 
     // advance to the next frame which matches the stackwalk flags
@@ -1302,7 +1294,8 @@ BOOL StackFrameIterator::ResetRegDisp(PREGDISPLAY pRegDisp,
 
     m_crawl.pRD = pRegDisp;
 
-    m_codeManFlags = (ICodeManagerFlags)((m_flags & QUICKUNWIND) ? 0 : UpdateAllRegs);
+    m_codeManFlags = (ICodeManagerFlags)
+        (((m_flags & (QUICKUNWIND | LIGHTUNWIND)) ? 0 : UpdateAllRegs) | ((m_flags & LIGHTUNWIND) ? LightUnwind : 0));
 
     // make sure the REGDISPLAY is synchronized with the CONTEXT
     UpdateRegDisp();
@@ -1321,7 +1314,7 @@ BOOL StackFrameIterator::ResetRegDisp(PREGDISPLAY pRegDisp,
         {
             // On 64-bit and ARM, we stop at the explicit frames contained in a managed stack frame
             // before the managed stack frame itself.
-            EECodeManager::EnsureCallerContextIsValid(m_crawl.pRD, NULL);
+            EECodeManager::EnsureCallerContextIsValid(m_crawl.pRD, NULL, m_codeManFlags);
             curSP = GetSP(m_crawl.pRD->pCallerContext);
         }
 #endif // PROCESS_EXPLICIT_FRAME_BEFORE_MANAGED_FRAME
@@ -1373,7 +1366,7 @@ BOOL StackFrameIterator::ResetRegDisp(PREGDISPLAY pRegDisp,
                 else
                 {
                     // unwind the REGDISPLAY using the transition frame and check the EBP
-                    m_crawl.pFrame->UpdateRegDisplay(&tmpRD);
+                    m_crawl.pFrame->UpdateRegDisplay(&tmpRD, m_flags & UNWIND_FLOATS);
                     if (GetRegdisplayFP(&tmpRD) != curEBP)
                     {
                         break;
@@ -1400,8 +1393,7 @@ BOOL StackFrameIterator::ResetRegDisp(PREGDISPLAY pRegDisp,
                     m_crawl.isIPadjusted = false;
                 }
 
-                m_crawl.pFrame->UpdateRegDisplay(m_crawl.pRD);
-
+                m_crawl.pFrame->UpdateRegDisplay(m_crawl.pRD, m_flags & UNWIND_FLOATS);
                 _ASSERTE(curPc == GetControlPC(m_crawl.pRD));
             }
 
@@ -1469,13 +1461,12 @@ void StackFrameIterator::ResetCrawlFrame()
     m_crawl.isFilterFuncletCached = false;
     m_crawl.fShouldParentToFuncletSkipReportingGCReferences = false;
     m_crawl.fShouldParentFrameUseUnwindTargetPCforGCReporting = false;
+    m_crawl.fShouldSaveFuncletInfo = false;
+    m_crawl.fShouldParentToFuncletReportSavedFuncletSlots = false;
 #endif // FEATURE_EH_FUNCLETS
 
     m_crawl.pThread = this->m_pThread;
 
-    m_crawl.isCachedMethod  = false;
-    m_crawl.stackWalkCache.ClearEntry();
-
     m_crawl.pCurGSCookie   = NULL;
     m_crawl.pFirstGSCookie = NULL;
 }
@@ -1555,6 +1546,66 @@ BOOL StackFrameIterator::IsValid(void)
     return TRUE;
 } // StackFrameIterator::IsValid()
 
+#ifndef DACCESS_COMPILE
+#ifdef FEATURE_EH_FUNCLETS
+//---------------------------------------------------------------------------------------
+//
+// Advance to the position that the other iterator is currently at.
+//
+void StackFrameIterator::SkipTo(StackFrameIterator *pOtherStackFrameIterator)
+{
+    // We copy the other stack frame iterator over the current one, but we need to
+    // keep a couple of members untouched. So we save them here and restore them
+    // after the copy.
+    ExInfo* pPrevExInfo = GetNextExInfo();
+    REGDISPLAY *pRD = m_crawl.GetRegisterSet();
+    Frame *pStartFrame = m_pStartFrame;
+#ifdef _DEBUG
+    Frame *pRealStartFrame = m_pRealStartFrame;
+#endif
+
+    *this = *pOtherStackFrameIterator;
+
+    m_pNextExInfo = pPrevExInfo;
+    m_crawl.pRD = pRD;
+    m_pStartFrame = pStartFrame;
+#ifdef _DEBUG
+    m_pRealStartFrame = pRealStartFrame;
+#endif
+
+    REGDISPLAY *pOtherRD = pOtherStackFrameIterator->m_crawl.GetRegisterSet();
+    *pRD->pCurrentContextPointers = *pOtherRD->pCurrentContextPointers;
+    SetIP(pRD->pCurrentContext, GetIP(pOtherRD->pCurrentContext));
+    SetSP(pRD->pCurrentContext, GetSP(pOtherRD->pCurrentContext));
+
+#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContext->regname = (pRD->pCurrentContextPointers->regname == NULL) ? pOtherRD->pCurrentContext->regname : *pRD->pCurrentContextPointers->regname;
+    ENUM_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+
+#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContext->regname = pOtherRD->pCurrentContext->regname;
+    ENUM_FP_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+
+    pRD->IsCallerContextValid = pOtherRD->IsCallerContextValid;
+    if (pRD->IsCallerContextValid)
+    {
+        *pRD->pCallerContextPointers = *pOtherRD->pCallerContextPointers;
+        SetIP(pRD->pCallerContext, GetIP(pOtherRD->pCallerContext));
+        SetSP(pRD->pCallerContext, GetSP(pOtherRD->pCallerContext));
+
+#define CALLEE_SAVED_REGISTER(regname) pRD->pCallerContext->regname = (pRD->pCallerContextPointers->regname == NULL) ? pOtherRD->pCallerContext->regname : *pRD->pCallerContextPointers->regname;
+        ENUM_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+
+#define CALLEE_SAVED_REGISTER(regname) pRD->pCallerContext->regname = pOtherRD->pCallerContext->regname;
+        ENUM_FP_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+    }
+    SyncRegDisplayToCurrentContext(pRD);
+}
+#endif // FEATURE_EH_FUNCLETS
+#endif // DACCESS_COMPILE
+
 //---------------------------------------------------------------------------------------
 //
 // Advance to the next frame according to the stackwalk flags.  If the iterator is stopped
@@ -1643,10 +1694,11 @@ StackWalkAction StackFrameIterator::Filter(void)
         {
             if (!m_movedPastFirstExInfo)
             {
-                if ((pExInfo->m_passNumber == 2) && !pExInfo->m_csfEnclosingClause.IsNull() && m_sfFuncletParent.IsNull())
+                if ((pExInfo->m_passNumber == 2) && !pExInfo->m_csfEnclosingClause.IsNull() && m_sfFuncletParent.IsNull() && pExInfo->m_lastReportedFunclet.IP != 0)
                 {
                     // We are in the 2nd pass and we have already called an exceptionally called
-                    // a finally funclet, but we have not seen any funclet on the call stack yet.
+                    // finally funclet and reported that to GC in a previous GC run. But we have
+                    // not seen any funclet on the call stack yet.
                     // Simulate that we have actualy seen a finally funclet during this pass and
                     // that it didn't report GC references to ensure that the references will be
                     // reported by the parent correctly.
@@ -1663,6 +1715,8 @@ StackWalkAction StackFrameIterator::Filter(void)
             }
         }
 
+        m_crawl.fShouldParentToFuncletReportSavedFuncletSlots = false;
+
         // by default, there is no funclet for the current frame
         // that reported GC references
         m_crawl.fShouldParentToFuncletSkipReportingGCReferences = false;
@@ -1671,6 +1725,8 @@ StackWalkAction StackFrameIterator::Filter(void)
         // CrawlFrame
         m_crawl.fShouldCrawlframeReportGCReferences = true;
 
+        m_crawl.fShouldSaveFuncletInfo = false;
+
         // By default, assume that parent frame is going to report GC references from
         // the actual location reported by the stack walk.
         m_crawl.fShouldParentFrameUseUnwindTargetPCforGCReporting = false;
@@ -1867,7 +1923,7 @@ StackWalkAction StackFrameIterator::Filter(void)
                                                 // Initiate force reporting of references in the new managed exception handling code frames. 
                                                 // These frames are still alive when we are in a finally funclet.
                                                 m_forceReportingWhileSkipping = ForceGCReportingStage::LookForManagedFrame;
-                                                STRESS_LOG0(LF_GCROOTS, LL_INFO100, "STACKWALK: Setting m_forceReportingWhileSkipping = ForceGCReportingStage::LookForManagedFrame\n");
+                                                STRESS_LOG0(LF_GCROOTS, LL_INFO100, "STACKWALK: Setting m_forceReportingWhileSkipping = ForceGCReportingStage::LookForManagedFrame while processing filter funclet\n");
                                             }
                                         }
                                     }
@@ -1883,12 +1939,12 @@ StackWalkAction StackFrameIterator::Filter(void)
                             {
                                 // Get a reference to the funclet's parent frame.
                                 m_sfFuncletParent = ExceptionTracker::FindParentStackFrameForStackWalk(&m_crawl, true);
-                                _ASSERTE(!m_fFuncletNotSeen);
 
+                                bool fFrameWasUnwound = ExceptionTracker::HasFrameBeenUnwoundByAnyActiveException(&m_crawl);
                                 if (m_sfFuncletParent.IsNull())
                                 {
                                     // This can only happen if the funclet (and its parent) have been unwound.
-                                    _ASSERTE(ExceptionTracker::HasFrameBeenUnwoundByAnyActiveException(&m_crawl));
+                                    _ASSERTE(fFrameWasUnwound);
                                 }
                                 else
                                 {
@@ -1911,7 +1967,19 @@ StackWalkAction StackFrameIterator::Filter(void)
 
                                         if (g_isNewExceptionHandlingEnabled)
                                         {
-                                            if (!ExecutionManager::IsManagedCode(GetIP(m_crawl.GetRegisterSet()->pCallerContext)))
+                                            if (!m_fFoundFirstFunclet && (pExInfo > (void*)GetRegdisplaySP(m_crawl.GetRegisterSet())) && ((void*)m_sfParent.SP > pExInfo))
+                                            {
+                                                // For the first funclet we encounter below the topmost ExInfo that has a parent above that ExInfo
+                                                // (so it is an exceptionally called funclet for the exception represented by the ExInfo),
+                                                // we instruct the GC scanning of the frame
+                                                // to save information on the funclet so that we can use it to report references in the parent frame if
+                                                // no such funclet is found in future GC scans for the same exception.
+                                                _ASSERTE(pExInfo != NULL);
+                                                m_crawl.fShouldSaveFuncletInfo = true;
+                                                m_fFoundFirstFunclet = true;
+                                            }
+
+                                            if (!fFrameWasUnwound && !ExecutionManager::IsManagedCode(GetIP(m_crawl.GetRegisterSet()->pCallerContext)))
                                             {
                                                 // Initiate force reporting of references in the new managed exception handling code frames. 
                                                 // These frames are still alive when we are in a finally funclet.
@@ -2131,6 +2199,14 @@ StackWalkAction StackFrameIterator::Filter(void)
                                         }
                                         else if (!m_crawl.IsFunclet())
                                         {
+                                            if (m_fFuncletNotSeen)
+                                            {
+                                                // We have reached a real parent of a funclet that would be on the stack if GC didn't
+                                                // kick in between the calls to funclets in the second pass. We instruct GC to report
+                                                // roots using the info of the saved funclet we've seen during a previous GC.
+                                                m_crawl.fShouldParentToFuncletReportSavedFuncletSlots = true;
+                                                m_fFuncletNotSeen = false;
+                                            }
                                             // we've reached the parent and it's not handling an exception, it's also not
                                             // a funclet so reset our state.  note that we cannot reset the state when the
                                             // parent is a funclet since the leaf funclet didn't report any references and
@@ -2143,15 +2219,6 @@ StackWalkAction StackFrameIterator::Filter(void)
                                         if (g_isNewExceptionHandlingEnabled)
                                         {
                                             _ASSERTE(!ExceptionTracker::HasFrameBeenUnwoundByAnyActiveException(&m_crawl));
-                                            if (m_fFuncletNotSeen && m_crawl.IsFunclet())
-                                            {
-                                                _ASSERTE(!m_fProcessIntermediaryNonFilterFunclet);
-                                                _ASSERTE(m_crawl.fShouldCrawlframeReportGCReferences);
-                                                m_fDidFuncletReportGCReferences = true;
-                                                shouldSkipReporting = false;
-                                                m_crawl.fShouldParentFrameUseUnwindTargetPCforGCReporting = true;
-                                                m_crawl.ehClauseForCatch = pExInfo->m_ClauseForCatch;
-                                            }                                                
                                         }
                                         else
                                         {
@@ -2557,76 +2624,17 @@ StackWalkAction StackFrameIterator::NextRaw(void)
              DBG_ADDR(GetRegdisplaySP(m_crawl.pRD)),
              DBG_ADDR(GetControlPC(m_crawl.pRD))));
 
-#if !defined(DACCESS_COMPILE) && defined(HAS_QUICKUNWIND)
-        StackwalkCacheEntry *pCacheEntry = m_crawl.GetStackwalkCacheEntry();
-        if (pCacheEntry != NULL)
+        if (!m_crawl.GetCodeManager()->UnwindStackFrame(
+                            m_crawl.pRD,
+                            &m_cachedCodeInfo,
+                            m_codeManFlags
+                                | m_crawl.GetCodeManagerFlags()
+                                | ((m_flags & PROFILER_DO_STACK_SNAPSHOT) ?  SpeculativeStackwalk : 0),
+                                                    &m_crawl.codeManState))
         {
-            _ASSERTE(m_crawl.stackWalkCache.Enabled() && (m_flags & LIGHTUNWIND));
-
-            // lightened schema: take stack unwind info from stackwalk cache
-            EECodeManager::QuickUnwindStackFrame(m_crawl.pRD, pCacheEntry, EECodeManager::UnwindCurrentStackFrame);
-        }
-        else
-#endif // !DACCESS_COMPILE && HAS_QUICKUNWIND
-        {
-#if !defined(DACCESS_COMPILE)
-            // non-optimized stack unwind schema, doesn't use StackwalkCache
-            UINT_PTR curSP = (UINT_PTR)GetRegdisplaySP(m_crawl.pRD);
-            UINT_PTR curIP = (UINT_PTR)GetControlPC(m_crawl.pRD);
-#endif // !DACCESS_COMPILE
-
-            bool fInsertCacheEntry = m_crawl.stackWalkCache.Enabled() &&
-                                     (m_flags & LIGHTUNWIND) &&
-                                     (m_pCachedGSCookie == NULL);
-
-            // Is this a dynamic method. Dynamic methods can be GC collected and so IP to method mapping
-            // is not persistent. Therefore do not cache information for this frame.
-            BOOL isCollectableMethod = ExecutionManager::IsCollectibleMethod(m_crawl.GetMethodToken());
-            if(isCollectableMethod)
-                fInsertCacheEntry = FALSE;
-
-            StackwalkCacheUnwindInfo unwindInfo;
-
-            if (!m_crawl.GetCodeManager()->UnwindStackFrame(
-                                m_crawl.pRD,
-                                &m_cachedCodeInfo,
-                                m_codeManFlags
-                                    | m_crawl.GetCodeManagerFlags()
-                                    | ((m_flags & PROFILER_DO_STACK_SNAPSHOT) ?  SpeculativeStackwalk : 0),
-                                                      &m_crawl.codeManState,
-                                (fInsertCacheEntry ? &unwindInfo : NULL)))
-            {
-                LOG((LF_CORPROF, LL_INFO100, "**PROF: m_crawl.GetCodeManager()->UnwindStackFrame failure leads to SWA_FAILED.\n"));
-                retVal = SWA_FAILED;
-                goto Cleanup;
-            }
-
-#if !defined(DACCESS_COMPILE)
-            // store into hashtable if fits, otherwise just use old schema
-            if (fInsertCacheEntry)
-            {
-                //
-                //  information we add to cache, consists of two parts:
-                //  1. SPOffset - locals, etc. of current method, adding which to current ESP we get to retAddr ptr
-                //  2. argSize - size of pushed function arguments, the rest we need to add to get new ESP
-                //  we have to store two parts of ESP delta, since we need to update pPC also, and so require retAddr ptr
-                //
-                //  newSP = oldSP + SPOffset + sizeof(PTR) + argSize
-                //
-                UINT_PTR SPOffset = (UINT_PTR)GetRegdisplayStackMark(m_crawl.pRD) - curSP;
-                UINT_PTR argSize  = (UINT_PTR)GetRegdisplaySP(m_crawl.pRD) - curSP - SPOffset - sizeof(void*);
-
-                StackwalkCacheEntry cacheEntry = {0};
-                if (cacheEntry.Init(
-                            curIP,
-                            SPOffset,
-                            &unwindInfo,
-                            argSize))
-                {
-                    m_crawl.stackWalkCache.Insert(&cacheEntry);
-                }
-            }
-#endif // !DACCESS_COMPILE
+            LOG((LF_CORPROF, LL_INFO100, "**PROF: m_crawl.GetCodeManager()->UnwindStackFrame failure leads to SWA_FAILED.\n"));
+            retVal = SWA_FAILED;
+            goto Cleanup;
         }
 
 #define FAIL_IF_SPECULATIVE_WALK(condition)             \
@@ -2721,7 +2729,7 @@ StackWalkAction StackFrameIterator::NextRaw(void)
 
             if (m_crawl.isFrameless)
             {
-                m_crawl.pFrame->UpdateRegDisplay(m_crawl.pRD);
+                m_crawl.pFrame->UpdateRegDisplay(m_crawl.pRD, m_flags & UNWIND_FLOATS);
 
 #if defined(RECORD_RESUMABLE_FRAME_SP)
                 CONSISTENCY_CHECK(NULL == m_pvResumableFrameTargetSP);
@@ -2741,14 +2749,7 @@ StackWalkAction StackFrameIterator::NextRaw(void)
                     // better not be suspended.
                     CONSISTENCY_CHECK(!(m_flags & THREAD_IS_SUSPENDED));
 
-#if !defined(DACCESS_COMPILE)
-                    if (m_crawl.stackWalkCache.Enabled() && (m_flags & LIGHTUNWIND))
-                    {
-                        m_crawl.isCachedMethod = m_crawl.stackWalkCache.Lookup((UINT_PTR)adr);
-                    }
-#endif // DACCESS_COMPILE
-
-                    EECodeManager::EnsureCallerContextIsValid(m_crawl.pRD, m_crawl.GetStackwalkCacheEntry());
+                    EECodeManager::EnsureCallerContextIsValid(m_crawl.pRD, NULL, m_codeManFlags);
                     m_pvResumableFrameTargetSP = (LPVOID)GetSP(m_crawl.pRD->pCallerContext);
                 }
 #endif // RECORD_RESUMABLE_FRAME_SP
@@ -3019,16 +3020,6 @@ void StackFrameIterator::ProcessCurrentFrame(void)
             // This must be a JITed/managed native method. There is no explicit frame.
             //------------------------------------------------------------------------
 
-#if !defined(DACCESS_COMPILE)
-            m_crawl.isCachedMethod = FALSE;
-            if (m_crawl.stackWalkCache.Enabled() && (m_flags & LIGHTUNWIND))
-            {
-                m_crawl.isCachedMethod = m_crawl.stackWalkCache.Lookup((UINT_PTR)GetControlPC(m_crawl.pRD));
-                _ASSERTE (m_crawl.isCachedMethod != m_crawl.stackWalkCache.IsEmpty());
-            }
-#endif // DACCESS_COMPILE
-
-
 #if defined(FEATURE_EH_FUNCLETS)
             m_crawl.isFilterFuncletCached = false;
 #endif // FEATURE_EH_FUNCLETS
@@ -3101,7 +3092,7 @@ BOOL StackFrameIterator::CheckForSkippedFrames(void)
     // frame will be reported before its containing method.
 
     // This should always succeed!  If it doesn't, it's a bug somewhere else!
-    EECodeManager::EnsureCallerContextIsValid(m_crawl.pRD, m_crawl.GetStackwalkCacheEntry(), &m_cachedCodeInfo);
+    EECodeManager::EnsureCallerContextIsValid(m_crawl.pRD, &m_cachedCodeInfo, m_codeManFlags);
     pvReferenceSP = GetSP(m_crawl.pRD->pCallerContext);
 #endif // PROCESS_EXPLICIT_FRAME_BEFORE_MANAGED_FRAME
 
@@ -3371,180 +3362,6 @@ void StackFrameIterator::ResetNextExInfoForSP(TADDR SP)
 }
 #endif // FEATURE_EH_FUNCLETS
 
-#if defined(TARGET_AMD64) && !defined(DACCESS_COMPILE)
-static CrstStatic g_StackwalkCacheLock;                // Global StackwalkCache lock; only used on AMD64
-EXTERN_C void moveOWord(LPVOID src, LPVOID target);
-#endif // TARGET_AMD64
-
-/*
-    copies 64-bit *src to *target, atomically accessing the data
-    requires 64-bit alignment for atomic load/store
-*/
-inline static void atomicMoveCacheEntry(UINT64* src, UINT64* target)
-{
-    LIMITED_METHOD_CONTRACT;
-
-#ifdef TARGET_X86
-    // the most negative value is used a sort of integer infinity
-    // value, so it have to be avoided
-    _ASSERTE(*src != 0x8000000000000000);
-    __asm
-    {
-        mov eax, src
-        fild qword ptr [eax]
-        mov eax, target
-        fistp qword ptr [eax]
-    }
-#elif defined(TARGET_AMD64) && !defined(DACCESS_COMPILE)
-    // On AMD64 there's no way to move 16 bytes atomically, so we need to take a lock before calling moveOWord().
-    CrstHolder ch(&g_StackwalkCacheLock);
-    moveOWord(src, target);
-#endif
-}
-
-/*
-============================================================
-Here is an implementation of StackwalkCache class, used to optimize performance
-of stack walking. Currently each CrawlFrame has a StackwalkCache member, which implements
-functionality for caching already walked methods (see Thread::StackWalkFramesEx).
-See class and corresponding types declaration at stackwalktypes.h
-We do use global cache g_StackwalkCache[] with InterlockCompareExchange, fitting
-each cache entry into 8 bytes.
-============================================================
-*/
-
-#ifndef DACCESS_COMPILE
-#define LOG_NUM_OF_CACHE_ENTRIES 10
-#else
-// Stack walk cache is disabled in DAC - save space
-#define LOG_NUM_OF_CACHE_ENTRIES 0
-#endif
-#define NUM_OF_CACHE_ENTRIES (1 << LOG_NUM_OF_CACHE_ENTRIES)
-
-static StackwalkCacheEntry g_StackwalkCache[NUM_OF_CACHE_ENTRIES] = {}; // Global StackwalkCache
-
-#ifdef DACCESS_COMPILE
-const BOOL StackwalkCache::s_Enabled = FALSE;
-#else
-BOOL StackwalkCache::s_Enabled = FALSE;
-
-/*
-    StackwalkCache class constructor.
-    Set "enable/disable optimization" flag according to registry key.
-*/
-StackwalkCache::StackwalkCache()
-{
-    CONTRACTL {
-       NOTHROW;
-       GC_NOTRIGGER;
-    } CONTRACTL_END;
-
-    ClearEntry();
-
-    static BOOL stackwalkCacheEnableChecked = FALSE;
-    if (!stackwalkCacheEnableChecked)
-    {
-        // We can enter this block on multiple threads because of racing.
-        // However, that is OK since this operation is idempotent
-
-        s_Enabled = ((g_pConfig->DisableStackwalkCache() == 0) &&
-                    // disable cache if for some reason it is not aligned
-                    IS_ALIGNED((void*)&g_StackwalkCache[0], STACKWALK_CACHE_ENTRY_ALIGN_BOUNDARY));
-        stackwalkCacheEnableChecked = TRUE;
-    }
-}
-
-#endif // #ifndef DACCESS_COMPILE
-
-// static
-void StackwalkCache::Init()
-{
-#if defined(TARGET_AMD64) && !defined(DACCESS_COMPILE)
-    g_StackwalkCacheLock.Init(CrstSecurityStackwalkCache, CRST_UNSAFE_ANYMODE);
-#endif // TARGET_AMD64
-}
-
-/*
-    Returns efficient hash table key based on provided IP.
-    CPU architecture dependent.
-*/
-inline unsigned StackwalkCache::GetKey(UINT_PTR IP)
-{
-    LIMITED_METHOD_CONTRACT;
-    return (unsigned)(((IP >> LOG_NUM_OF_CACHE_ENTRIES) ^ IP) & (NUM_OF_CACHE_ENTRIES-1));
-}
-
-/*
-    Looks into cache and returns StackwalkCache entry, if current IP is cached.
-    JIT team guarantees the same ESP offset for the same IPs for different call chains.
-*/
-BOOL StackwalkCache::Lookup(UINT_PTR IP)
-{
-    CONTRACTL {
-       NOTHROW;
-       GC_NOTRIGGER;
-    } CONTRACTL_END;
-
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
-    _ASSERTE(Enabled());
-    _ASSERTE(IP);
-
-    unsigned hkey = GetKey(IP);
-    _ASSERTE(IS_ALIGNED((void*)&g_StackwalkCache[hkey], STACKWALK_CACHE_ENTRY_ALIGN_BOUNDARY));
-    // Don't care about m_CacheEntry access atomicity, since it's private to this
-    // stackwalk/thread
-    atomicMoveCacheEntry((UINT64*)&g_StackwalkCache[hkey], (UINT64*)&m_CacheEntry);
-
-#ifdef _DEBUG
-    if (IP != m_CacheEntry.IP)
-    {
-        ClearEntry();
-    }
-#endif
-
-    return (IP == m_CacheEntry.IP);
-#else // TARGET_X86
-    return FALSE;
-#endif // TARGET_X86
-}
-
-/*
-    Caches data provided for current IP.
-*/
-void StackwalkCache::Insert(StackwalkCacheEntry *pCacheEntry)
-{
-    CONTRACTL {
-       NOTHROW;
-       GC_NOTRIGGER;
-    } CONTRACTL_END;
-
-    _ASSERTE(Enabled());
-    _ASSERTE(pCacheEntry);
-
-    unsigned hkey = GetKey(pCacheEntry->IP);
-    _ASSERTE(IS_ALIGNED((void*)&g_StackwalkCache[hkey], STACKWALK_CACHE_ENTRY_ALIGN_BOUNDARY));
-    atomicMoveCacheEntry((UINT64*)pCacheEntry, (UINT64*)&g_StackwalkCache[hkey]);
-}
-
-// static
-void StackwalkCache::Invalidate(LoaderAllocator * pLoaderAllocator)
-{
-    CONTRACTL {
-       NOTHROW;
-       GC_NOTRIGGER;
-    } CONTRACTL_END;
-
-    if (!s_Enabled)
-        return;
-
-    /* Note that we could just flush the entries corresponding to
-    pDomain if we wanted to get fancy. To keep things simple for now,
-    we just invalidate everything
-    */
-
-    ZeroMemory(PVOID(&g_StackwalkCache), sizeof(g_StackwalkCache));
-}
-
 //----------------------------------------------------------------------------
 //
 // SetUpRegdisplayForStackWalk - set up Regdisplay for a stack walk
diff --git a/src/coreclr/vm/stackwalk.h b/src/coreclr/vm/stackwalk.h
index fd0fa7c4c6e6..736ca2653ee7 100644
--- a/src/coreclr/vm/stackwalk.h
+++ b/src/coreclr/vm/stackwalk.h
@@ -52,22 +52,15 @@ class AppDomain;
 // Enumerate all functions.
 //************************************************************************
 
-/* This enumerator is meant to be used for the most common cases, i.e. to
-   enumerate just all the functions of the requested thread. It is just a
-   cover for the "real" enumerator.
- */
-
-StackWalkAction StackWalkFunctions(Thread * thread, PSTACKWALKFRAMESCALLBACK pCallback, VOID * pData);
-
-/*<TODO>@ISSUE: Maybe use a define instead?</TODO>
-#define StackWalkFunctions(thread, callBack, userdata) thread->StackWalkFrames(METHODSONLY, (callBack),(userData))
-*/
-
 namespace AsmOffsetsAsserts
 {
     class AsmOffsets;
 };
 
+#ifdef FEATURE_EH_FUNCLETS
+extern "C" void QCALLTYPE AppendExceptionStackFrame(QCall::ObjectHandleOnStack exceptionObj, SIZE_T ip, SIZE_T sp, int flags, ExInfo *pExInfo);
+#endif
+
 class CrawlFrame
 {
 public:
@@ -291,7 +284,6 @@ class CrawlFrame
             if (!HasFaulted() && !IsIPadjusted())
             {
                 _ASSERTE(!(flags & ActiveStackFrame));
-                flags |= AbortingCall;
             }
         }
 
@@ -380,20 +372,6 @@ class CrawlFrame
         return codeInfo.GetCodeManager();
     }
 
-    inline StackwalkCacheEntry* GetStackwalkCacheEntry()
-    {
-        LIMITED_METHOD_CONTRACT;
-        _ASSERTE (isCachedMethod != stackWalkCache.IsEmpty());
-        if (isCachedMethod && stackWalkCache.m_CacheEntry.IsSafeToUseCache())
-        {
-            return &(stackWalkCache.m_CacheEntry);
-        }
-        else
-        {
-            return NULL;
-        }
-    }
-
     void CheckGSCookies();
 
     inline Thread* GetThread()
@@ -438,6 +416,18 @@ class CrawlFrame
         return fShouldParentFrameUseUnwindTargetPCforGCReporting;
     }
 
+    bool ShouldParentToFuncletReportSavedFuncletSlots()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return fShouldParentToFuncletReportSavedFuncletSlots;
+    }
+
+    bool ShouldSaveFuncletInfo()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return fShouldSaveFuncletInfo;
+    }
+
     const EE_ILEXCEPTION_CLAUSE& GetEHClauseForCatch()
     {
         return ehClauseForCatch;
@@ -459,6 +449,7 @@ class CrawlFrame
     friend class StackFrameIterator;
 #ifdef FEATURE_EH_FUNCLETS
     friend class ExceptionTracker;
+    friend void QCALLTYPE AppendExceptionStackFrame(QCall::ObjectHandleOnStack exceptionObj, SIZE_T ip, SIZE_T sp, int flags, ExInfo *pExInfo);
 #endif // FEATURE_EH_FUNCLETS
 
     CodeManState      codeManState;
@@ -489,14 +480,12 @@ class CrawlFrame
     bool              fShouldParentToFuncletSkipReportingGCReferences;
     bool              fShouldCrawlframeReportGCReferences;
     bool              fShouldParentFrameUseUnwindTargetPCforGCReporting;
+    bool              fShouldSaveFuncletInfo;
+    bool              fShouldParentToFuncletReportSavedFuncletSlots;
     EE_ILEXCEPTION_CLAUSE ehClauseForCatch;
 #endif //FEATURE_EH_FUNCLETS
     Thread*           pThread;
 
-    // fields used for stackwalk cache
-    BOOL              isCachedMethod;
-    StackwalkCache    stackWalkCache;
-
     GSCookie         *pCurGSCookie;
     GSCookie         *pFirstGSCookie;
 
@@ -609,6 +598,13 @@ class StackFrameIterator
     // advance to the next frame according to the stackwalk flags
     StackWalkAction Next(void);
 
+#ifndef DACCESS_COMPILE
+#ifdef FEATURE_EH_FUNCLETS
+    // advance to the position that the other iterator is currently at
+    void SkipTo(StackFrameIterator *pOtherStackFrameIterator);
+#endif // FEATURE_EH_FUNCLETS
+#endif // DACCESS_COMPILE
+
 #ifdef FEATURE_EH_FUNCLETS
     void ResetNextExInfoForSP(TADDR SP);
 
@@ -725,7 +721,6 @@ class StackFrameIterator
 
         if (!ResetOnlyIntermediaryState)
         {
-            m_fFuncletNotSeen = false;
             m_sfFuncletParent = StackFrame();
             m_fProcessNonFilterFunclet = false;
         }
@@ -778,6 +773,9 @@ class StackFrameIterator
     bool          m_movedPastFirstExInfo;
     // Indicates that no funclet was seen during the current stack walk yet
     bool          m_fFuncletNotSeen;
+    // Indicates that the stack walk has moved past a funclet
+    bool          m_fFoundFirstFunclet;
+
 #if defined(RECORD_RESUMABLE_FRAME_SP)
     LPVOID m_pvResumableFrameTargetSP;
 #endif // RECORD_RESUMABLE_FRAME_SP
diff --git a/src/coreclr/vm/stackwalktypes.h b/src/coreclr/vm/stackwalktypes.h
index 601e001fe34c..2263740e67c2 100644
--- a/src/coreclr/vm/stackwalktypes.h
+++ b/src/coreclr/vm/stackwalktypes.h
@@ -13,7 +13,6 @@
 
 class CrawlFrame;
 struct RangeSection;
-struct StackwalkCacheEntry;
 
 //
 // This type should be used internally inside the code manager only. EECodeInfo should
@@ -72,154 +71,4 @@ typedef StackWalkAction (*PSTACKWALKFRAMESCALLBACK)(
 
 );
 
-/******************************************************************************
-   StackwalkCache: new class implements stackwalk perf optimization features.
-   StackwalkCacheEntry array: very simple per thread hash table, keeping cached data.
-   StackwalkCacheUnwindInfo: used by EECodeManager::UnwindStackFrame to return
-   stackwalk cache flags.
-   Cf. Ilyakoz for any questions.
-*/
-
-struct StackwalkCacheUnwindInfo
-{
-#if defined(TARGET_AMD64)
-    ULONG RBPOffset;
-    ULONG RSPOffsetFromUnwindInfo;
-#else  // !TARGET_AMD64
-    BOOL fUseEbp;                   // Is EBP modified by the method - either for a frame-pointer or for a scratch-register?
-    BOOL fUseEbpAsFrameReg;         // use EBP as the frame pointer?
-#endif // !TARGET_AMD64
-
-    inline StackwalkCacheUnwindInfo() { SUPPORTS_DAC; ZeroMemory(this, sizeof(StackwalkCacheUnwindInfo)); }
-    StackwalkCacheUnwindInfo(StackwalkCacheEntry * pCacheEntry);
-};
-
-//************************************************************************
-
-#if defined(HOST_64BIT)
-    #define STACKWALK_CACHE_ENTRY_ALIGN_BOUNDARY 0x10
-#else  // !HOST_64BIT
-    #define STACKWALK_CACHE_ENTRY_ALIGN_BOUNDARY 0x8
-#endif // !HOST_64BIT
-
-struct
-DECLSPEC_ALIGN(STACKWALK_CACHE_ENTRY_ALIGN_BOUNDARY)
-StackwalkCacheEntry
-{
-    //
-    //  don't rearrange the fields, so that invalid value 0x8000000000000000 will never appear
-    //  as StackwalkCacheEntry, it's required for atomicMOVQ using FILD/FISTP instructions
-    //
-    UINT_PTR IP;
-#if !defined(TARGET_AMD64)
-    WORD ESPOffset:15;          // stack offset (frame size + pending arguments + etc)
-    WORD fUseEbp:1;             // For ESP methods, is EBP touched at all?
-    WORD fUseEbpAsFrameReg:1;   // use EBP as the frame register?
-    WORD argSize:15;            // size of args pushed on stack
-#else  // TARGET_AMD64
-    DWORD RSPOffset;
-    DWORD RBPOffset;
-#endif // TARGET_AMD64
-
-    inline BOOL Init(UINT_PTR   IP,
-                     UINT_PTR   SPOffset,
-                     StackwalkCacheUnwindInfo *pUnwindInfo,
-                     UINT_PTR   argSize)
-    {
-        LIMITED_METHOD_CONTRACT;
-
-        this->IP              = IP;
-
-#if defined(TARGET_X86)
-        this->ESPOffset         = SPOffset;
-        this->argSize           = argSize;
-
-        this->fUseEbp           = pUnwindInfo->fUseEbp;
-        this->fUseEbpAsFrameReg = pUnwindInfo->fUseEbpAsFrameReg;
-        _ASSERTE(!fUseEbpAsFrameReg || fUseEbp);
-
-        // return success if we fit SPOffset and argSize into
-        return ((this->ESPOffset == SPOffset) &&
-                (this->argSize == argSize));
-#elif defined(TARGET_AMD64)
-        // The size of a stack frame is guaranteed to fit in 4 bytes, so we don't need to check RSPOffset and RBPOffset.
-
-        // The actual SP offset may be bigger than the offset we get from the unwind info because of stack allocations.
-        _ASSERTE(SPOffset >= pUnwindInfo->RSPOffsetFromUnwindInfo);
-
-        _ASSERTE(FitsIn<DWORD>(SPOffset));
-        this->RSPOffset  = static_cast<DWORD>(SPOffset);
-        _ASSERTE(FitsIn<DWORD>(pUnwindInfo->RBPOffset + (SPOffset - pUnwindInfo->RSPOffsetFromUnwindInfo)));
-        this->RBPOffset  = static_cast<DWORD>(pUnwindInfo->RBPOffset + (SPOffset - pUnwindInfo->RSPOffsetFromUnwindInfo));
-        return TRUE;
-#else  // !TARGET_X86 && !TARGET_AMD64
-        return FALSE;
-#endif // !TARGET_X86 && !TARGET_AMD64
-    }
-
-    inline BOOL IsSafeToUseCache()
-    {
-        LIMITED_METHOD_CONTRACT;
-
-#if defined(TARGET_X86)
-        return (!fUseEbp || fUseEbpAsFrameReg);
-#elif defined(TARGET_AMD64)
-        return TRUE;
-#else  // !TARGET_X86 && !TARGET_AMD64
-        return FALSE;
-#endif // !TARGET_X86 && !TARGET_AMD64
-    }
-};
-
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
-static_assert_no_msg(sizeof(StackwalkCacheEntry) == 2 * sizeof(UINT_PTR));
-#endif // TARGET_X86 || TARGET_AMD64
-
-//************************************************************************
-
-class StackwalkCache
-{
-    friend struct _DacGlobals;
-
-    public:
-        BOOL Lookup(UINT_PTR IP);
-        void Insert(StackwalkCacheEntry *pCacheEntry);
-        inline void ClearEntry () { LIMITED_METHOD_DAC_CONTRACT; m_CacheEntry.IP = 0; }
-        inline BOOL Enabled() { LIMITED_METHOD_DAC_CONTRACT;  return s_Enabled; };
-        inline BOOL IsEmpty () { LIMITED_METHOD_CONTRACT;  return m_CacheEntry.IP == 0; }
-
-#ifndef DACCESS_COMPILE
-        StackwalkCache();
-#endif
-        static void Init();
-
-        StackwalkCacheEntry m_CacheEntry; // local copy of Global Cache entry for current IP
-
-        static void Invalidate(LoaderAllocator * pLoaderAllocator);
-
-    private:
-        unsigned GetKey(UINT_PTR IP);
-
-#ifdef DACCESS_COMPILE
-        // DAC can't rely on the cache here
-        const static BOOL s_Enabled;
-#else
-        static BOOL s_Enabled;
-#endif
-};
-
-//************************************************************************
-
-inline StackwalkCacheUnwindInfo::StackwalkCacheUnwindInfo(StackwalkCacheEntry * pCacheEntry)
-{
-    LIMITED_METHOD_CONTRACT;
-
-#if defined(TARGET_AMD64)
-    RBPOffset = pCacheEntry->RBPOffset;
-#else  // !TARGET_AMD64
-    fUseEbp = pCacheEntry->fUseEbp;
-    fUseEbpAsFrameReg = pCacheEntry->fUseEbpAsFrameReg;
-#endif // !TARGET_AMD64
-}
-
 #endif  // __STACKWALKTYPES_H__
diff --git a/src/coreclr/vm/stdinterfaces.cpp b/src/coreclr/vm/stdinterfaces.cpp
index 08af895c3bae..3131f33d2892 100644
--- a/src/coreclr/vm/stdinterfaces.cpp
+++ b/src/coreclr/vm/stdinterfaces.cpp
@@ -611,6 +611,43 @@ HRESULT GetITypeLibForAssembly(_In_ Assembly *pAssembly, _Outptr_ ITypeLib **ppT
     return S_OK;
 } // HRESULT GetITypeLibForAssembly()
 
+// .NET Framework's mscorlib TLB GUID.
+static const GUID s_MscorlibGuid = { 0xBED7F4EA, 0x1A96, 0x11D2, { 0x8F, 0x08, 0x00, 0xA0, 0xC9, 0xA6, 0x18, 0x6D } };
+
+// Hard-coded GUID for System.Guid.
+static const GUID s_GuidForSystemGuid = { 0x9C5923E9, 0xDE52, 0x33EA, { 0x88, 0xDE, 0x7E, 0xBC, 0x86, 0x33, 0xB9, 0xCC } };
+
+// There are types that are helpful to provide that facilitate porting from
+// .NET Framework to .NET 8+. This function is used to acquire their ITypeInfo.
+// This should be used narrowly. Types at a minimum should be blittable.
+static bool TryDeferToMscorlib(MethodTable* pClass, ITypeInfo** ppTI)
+{
+    CONTRACTL
+    {
+        THROWS;
+        GC_TRIGGERS;
+        MODE_PREEMPTIVE;
+        PRECONDITION(pClass != NULL);
+        PRECONDITION(pClass->IsBlittable());
+        PRECONDITION(ppTI != NULL);
+    }
+    CONTRACTL_END;
+
+    // Marshalling of System.Guid is a common scenario that impacts many teams porting
+    // code to .NET 8+. Try to load the .NET Framework's TLB to support this scenario.
+    if (pClass == CoreLibBinder::GetClass(CLASS__GUID))
+    {
+        SafeComHolder<ITypeLib> pMscorlibTypeLib = NULL;
+        if (SUCCEEDED(::LoadRegTypeLib(s_MscorlibGuid, 2, 4, 0, &pMscorlibTypeLib)))
+        {
+            if (SUCCEEDED(pMscorlibTypeLib->GetTypeInfoOfGuid(s_GuidForSystemGuid, ppTI)))
+                return true;
+        }
+    }
+
+    return false;
+}
+
 HRESULT GetITypeInfoForEEClass(MethodTable *pClass, ITypeInfo **ppTI, bool bClassInfo)
 {
     CONTRACTL
@@ -625,6 +662,7 @@ HRESULT GetITypeInfoForEEClass(MethodTable *pClass, ITypeInfo **ppTI, bool bClas
     GUID clsid;
     GUID ciid;
     ComMethodTable *pComMT              = NULL;
+    MethodTable* pOriginalClass         = pClass;
     HRESULT                 hr          = S_OK;
     SafeComHolder<ITypeLib> pITLB       = NULL;
     SafeComHolder<ITypeInfo> pTI        = NULL;
@@ -770,12 +808,68 @@ HRESULT GetITypeInfoForEEClass(MethodTable *pClass, ITypeInfo **ppTI, bool bClas
     {
         if (!FAILED(hr))
             hr = E_FAIL;
+
+        if (pOriginalClass->IsValueType() && pOriginalClass->IsBlittable())
+        {
+            if (TryDeferToMscorlib(pOriginalClass, ppTI))
+                hr = S_OK;
+        }
     }
 
 ReturnHR:
     return hr;
 } // HRESULT GetITypeInfoForEEClass()
 
+// Only a narrow set of types are supported.
+// See TryDeferToMscorlib() above.
+MethodTable* GetMethodTableForRecordInfo(IRecordInfo* recInfo)
+{
+    CONTRACTL
+    {
+        THROWS;
+        GC_TRIGGERS;
+        MODE_PREEMPTIVE;
+        PRECONDITION(recInfo != NULL);
+    }
+    CONTRACTL_END;
+
+    HRESULT hr;
+
+    // Verify the associated TypeLib attribute
+    SafeComHolder<ITypeInfo> typeInfo;
+    hr = recInfo->GetTypeInfo(&typeInfo);
+    if (FAILED(hr))
+        return NULL;
+
+    SafeComHolder<ITypeLib> typeLib;
+    UINT index;
+    hr = typeInfo->GetContainingTypeLib(&typeLib, &index);
+    if (FAILED(hr))
+        return NULL;
+
+    TLIBATTR* attrs;
+    hr = typeLib->GetLibAttr(&attrs);
+    if (FAILED(hr))
+        return NULL;
+
+    GUID libGuid = attrs->guid;
+    typeLib->ReleaseTLibAttr(attrs);
+    if (s_MscorlibGuid != libGuid)
+        return NULL;
+
+    // Verify the Guid of the associated type
+    GUID typeGuid;
+    hr = recInfo->GetGuid(&typeGuid);
+    if (FAILED(hr))
+        return NULL;
+
+    // Check for supported types.
+    if (s_GuidForSystemGuid == typeGuid)
+        return CoreLibBinder::GetClass(CLASS__GUID);
+
+    return NULL;
+}
+
 // Returns a NON-ADDREF'd ITypeInfo.
 HRESULT GetITypeInfoForMT(ComMethodTable *pMT, ITypeInfo **ppTI)
 {
diff --git a/src/coreclr/vm/stdinterfaces.h b/src/coreclr/vm/stdinterfaces.h
index 8d6201439657..517ca810b33a 100644
--- a/src/coreclr/vm/stdinterfaces.h
+++ b/src/coreclr/vm/stdinterfaces.h
@@ -183,4 +183,7 @@ IErrorInfo *GetSupportedErrorInfo(IUnknown *iface, REFIID riid);
 // Helpers to get the ITypeInfo* for a type.
 HRESULT GetITypeInfoForEEClass(MethodTable *pMT, ITypeInfo **ppTI, bool bClassInfo = false);
 
+// Gets the MethodTable for the associated IRecordInfo.
+MethodTable* GetMethodTableForRecordInfo(IRecordInfo* recInfo);
+
 #endif
diff --git a/src/coreclr/vm/stdinterfaces_wrapper.cpp b/src/coreclr/vm/stdinterfaces_wrapper.cpp
index 4b16bb4213b0..b4a02644127f 100644
--- a/src/coreclr/vm/stdinterfaces_wrapper.cpp
+++ b/src/coreclr/vm/stdinterfaces_wrapper.cpp
@@ -45,17 +45,6 @@ struct IEnumConnectionPoints;
 // Common vtables for well-known COM interfaces
 // shared by all COM+ callable wrappers.
 
-namespace
-{
-    bool CanCallRuntimeInterfaceImplementations()
-    {
-        LIMITED_METHOD_CONTRACT;
-        // If we are finalizing all alive objects, or after this stage, we do not allow
-        // a thread to enter EE.
-        return !((g_fEEShutDown & ShutDown_Finalize2) || g_fForbidEnterEE);
-    }
-}
-
 //-------------------------------------------------------------------------
 // IUnknown methods
 
@@ -74,9 +63,6 @@ HRESULT STDMETHODCALLTYPE Unknown_QueryInterface(IUnknown* pUnk, REFIID riid, vo
     }
     CONTRACTL_END;
 
-    if (!CanCallRuntimeInterfaceImplementations())
-        return E_FAIL;
-
     ComCallWrapper* pWrap = MapIUnknownToWrapper(pUnk);
     return Unknown_QueryInterface_Internal(pWrap, pUnk, riid, ppv);
 }
@@ -268,9 +254,6 @@ HRESULT STDMETHODCALLTYPE Dispatch_GetTypeInfoCount_Wrapper(IDispatch* pDisp, un
     }
     CONTRACTL_END;
     
-    if (!CanCallRuntimeInterfaceImplementations())
-        return E_FAIL;
-
     return Dispatch_GetTypeInfoCount(pDisp, pctinfo);
 }
 
@@ -287,10 +270,6 @@ HRESULT STDMETHODCALLTYPE Dispatch_GetTypeInfo_Wrapper(IDispatch* pDisp, unsigne
         PRECONDITION(CheckPointer(pptinfo, NULL_OK));
     }
     CONTRACTL_END;
-    
-    
-    if (!CanCallRuntimeInterfaceImplementations())
-        return E_FAIL;
 
     return Dispatch_GetTypeInfo(pDisp, itinfo, lcid, pptinfo);
 }
@@ -310,9 +289,6 @@ HRESULT STDMETHODCALLTYPE Dispatch_GetIDsOfNames_Wrapper(IDispatch* pDisp, REFII
         PRECONDITION(CheckPointer(rgdispid, NULL_OK));
     }
     CONTRACTL_END;
-    
-    if (!CanCallRuntimeInterfaceImplementations())
-        return E_FAIL;
 
     return Dispatch_GetIDsOfNames(pDisp, riid, rgszNames, cNames, lcid, rgdispid);
 }
@@ -333,9 +309,6 @@ HRESULT STDMETHODCALLTYPE InternalDispatchImpl_GetIDsOfNames_Wrapper(IDispatch*
     }
     CONTRACTL_END;
     
-    if (!CanCallRuntimeInterfaceImplementations())
-        return E_FAIL;
-
     return InternalDispatchImpl_GetIDsOfNames(pDisp, riid, rgszNames, cNames, lcid, rgdispid);
 }
 
@@ -359,10 +332,6 @@ HRESULT STDMETHODCALLTYPE Dispatch_Invoke_Wrapper(IDispatch* pDisp, DISPID dispi
     }
     CONTRACTL_END;
     
-    
-    if (!CanCallRuntimeInterfaceImplementations())
-        return E_FAIL;
-
     return Dispatch_Invoke(pDisp, dispidMember, riid, lcid, wFlags, pdispparams, pvarResult, pexcepinfo, puArgErr);
 }
 
@@ -385,10 +354,6 @@ HRESULT STDMETHODCALLTYPE InternalDispatchImpl_Invoke_Wrapper(IDispatch* pDisp,
     }
     CONTRACTL_END;
     
-    
-    if (!CanCallRuntimeInterfaceImplementations())
-        return E_FAIL;
-
     return InternalDispatchImpl_Invoke(pDisp, dispidMember, riid, lcid, wFlags, pdispparams, pvarResult, pexcepinfo, puArgErr);
 }
 
@@ -411,9 +376,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return ClassInfo_GetClassInfo(pUnk, ppTI);
     }
 
@@ -435,9 +397,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return SupportsErroInfo_IntfSupportsErrorInfo(pUnk, riid);
     }
 
@@ -457,9 +416,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return ErrorInfo_GetDescription(pUnk, pbstrDescription);
     }
 
@@ -477,9 +433,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return ErrorInfo_GetGUID(pUnk, pguid);
     }
 
@@ -497,10 +450,6 @@ namespace
         }
         CONTRACTL_END;
     
-    
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return ErrorInfo_GetHelpContext(pUnk, pdwHelpCtxt);
     }
 
@@ -518,9 +467,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return ErrorInfo_GetHelpFile(pUnk, pbstrHelpFile);
     }
 
@@ -538,9 +484,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return ErrorInfo_GetSource(pUnk, pbstrSource);
     }
 
@@ -561,9 +504,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return DispatchEx_GetTypeInfoCount(pDisp, pctinfo);
     }
 
@@ -581,9 +521,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return DispatchEx_GetTypeInfo(pDisp, itinfo, lcid, pptinfo);
     }
 
@@ -603,9 +540,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return DispatchEx_GetIDsOfNames(pDisp, riid, rgszNames, cNames, lcid, rgdispid);
     }
 
@@ -628,9 +562,6 @@ namespace
         }
         CONTRACTL_END;
 
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return DispatchEx_Invoke(pDisp, dispidMember, riid, lcid, wFlags, pdispparams, pvarResult, pexcepinfo, puArgErr);
     }
 
@@ -647,9 +578,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return DispatchEx_DeleteMemberByDispID(pDisp, id);
     }
 
@@ -666,9 +594,6 @@ namespace
         }
         CONTRACTL_END;
 
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return DispatchEx_DeleteMemberByName(pDisp, bstrName, grfdex);
     }
 
@@ -686,9 +611,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return DispatchEx_GetMemberName(pDisp, id, pbstrName);
     }
 
@@ -707,9 +629,6 @@ namespace
         }
         CONTRACTL_END;
 
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return DispatchEx_GetDispID(pDisp, bstrName, grfdex, pid);
     }
 
@@ -727,9 +646,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return DispatchEx_GetMemberProperties(pDisp, id, grfdexFetch, pgrfdex);
     }
 
@@ -747,9 +663,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return DispatchEx_GetNameSpaceParent(pDisp, ppunk);
     }
 
@@ -767,9 +680,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return DispatchEx_GetNextDispID(pDisp, grfdex, id, pid);
     }
 
@@ -791,9 +701,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return DispatchEx_InvokeEx(pDisp, id, lcid, wFlags, pdp, pVarRes, pei, pspCaller);
     }
 
@@ -817,9 +724,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return Marshal_GetUnmarshalClass(pMarsh, riid, pv, dwDestContext, pvDestContext, mshlflags, pclsid);
     }
 
@@ -840,9 +744,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return Marshal_GetMarshalSizeMax(pMarsh, riid, pv, dwDestContext, pvDestContext, mshlflags, pSize);
     }
 
@@ -862,9 +763,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return Marshal_MarshalInterface(pMarsh, pStm, riid, pv, dwDestContext, pvDestContext, mshlflags);
     }
 
@@ -883,9 +781,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return Marshal_UnmarshalInterface(pMarsh, pStm, riid, ppvObj);
     }
 
@@ -903,9 +798,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return Marshal_ReleaseMarshalData(pMarsh, pStm);
     }
 
@@ -922,9 +814,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return Marshal_DisconnectObject(pMarsh, dwReserved);
     }
 
@@ -945,9 +834,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return ConnectionPointContainer_EnumConnectionPoints(pUnk, ppEnum);
     }
 
@@ -965,9 +851,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return ConnectionPointContainer_FindConnectionPoint(pUnk, riid, ppCP);
     }
 
@@ -991,9 +874,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return ObjectSafety_GetInterfaceSafetyOptions(pUnk, riid, pdwSupportedOptions, pdwEnabledOptions);
     }
 
@@ -1011,9 +891,6 @@ namespace
         }
         CONTRACTL_END;
     
-        if (!CanCallRuntimeInterfaceImplementations())
-            return E_FAIL;
-
         return ObjectSafety_SetInterfaceSafetyOptions(pUnk, riid, dwOptionSetMask, dwEnabledOptions);
     }
 }
diff --git a/src/coreclr/vm/stringliteralmap.cpp b/src/coreclr/vm/stringliteralmap.cpp
index 29d457207bf8..55d2267f02c8 100644
--- a/src/coreclr/vm/stringliteralmap.cpp
+++ b/src/coreclr/vm/stringliteralmap.cpp
@@ -442,7 +442,7 @@ static void LogStringLiteral(_In_z_ const char* action, EEStringData *pStringDat
     STATIC_CONTRACT_FORBID_FAULT;
 
     ULONG length = pStringData->GetCharCount();
-    length = min(length, 128);
+    length = min(length, (ULONG)128);
     WCHAR *szString = (WCHAR *)_alloca((length + 1) * sizeof(WCHAR));
     memcpyNoGCRefs((void*)szString, (void*)pStringData->GetStringBuffer(), length * sizeof(WCHAR));
     szString[length] = '\0';
diff --git a/src/coreclr/vm/stubgen.cpp b/src/coreclr/vm/stubgen.cpp
index 5f18e8f5d912..5ecb723b68c5 100644
--- a/src/coreclr/vm/stubgen.cpp
+++ b/src/coreclr/vm/stubgen.cpp
@@ -2639,22 +2639,16 @@ void ILStubLinker::TransformArgForJIT(LocalDesc *pLoc)
 
         case ELEMENT_TYPE_PTR:
         {
-#ifdef TARGET_X86
-            if (pLoc->bIsCopyConstructed)
-            {
-                // The only pointers that we don't transform to ELEMENT_TYPE_I are those that are
-                // ET_TYPE_CMOD_REQD<IsCopyConstructed>/ET_TYPE_CMOD_REQD<NeedsCopyConstructorModifier>
-                // in the original signature. This convention is understood by the UM thunk compiler
-                // (code:UMThunkMarshInfo.CompileNExportThunk) which will generate different thunk code.
-                // Such parameters come from unmanaged by value but must enter the IL stub by reference
-                // because we are not supposed to make a copy.
-            }
-            else
-#endif // TARGET_X86
-            {
-                pLoc->ElementType[0] = ELEMENT_TYPE_I;
-                pLoc->cbType = 1;
-            }
+            // Don't transform pointer types to ELEMENT_TYPE_I. The JIT can handle the correct type information,
+            // and it's required for some cases (such as SwiftError*).
+            break;
+        }
+
+        case ELEMENT_TYPE_BYREF:
+        {
+            // Transform ELEMENT_TYPE_BYREF to ELEMENT_TYPE_PTR to retain the pointed-to type information
+            // while making the type blittable.
+            pLoc->ElementType[0] = ELEMENT_TYPE_PTR;
             break;
         }
 
@@ -2669,7 +2663,7 @@ void ILStubLinker::TransformArgForJIT(LocalDesc *pLoc)
             FALLTHROUGH;
         }
 
-        // pointers, byrefs, strings, arrays, other ref types -> ELEMENT_TYPE_I
+        // ref types -> ELEMENT_TYPE_I
         default:
         {
             pLoc->ElementType[0] = ELEMENT_TYPE_I;
@@ -3133,6 +3127,18 @@ int ILStubLinker::GetToken(MethodDesc* pMD)
     return m_tokenMap.GetToken(pMD);
 }
 
+int ILStubLinker::GetToken(MethodDesc* pMD, mdToken typeSignature)
+{
+    STANDARD_VM_CONTRACT;
+    return m_tokenMap.GetToken(pMD, typeSignature);
+}
+
+int ILStubLinker::GetToken(MethodDesc* pMD, mdToken typeSignature, mdToken methodSignature)
+{
+    STANDARD_VM_CONTRACT;
+    return m_tokenMap.GetToken(pMD, typeSignature, methodSignature);
+}
+
 int ILStubLinker::GetToken(MethodTable* pMT)
 {
     STANDARD_VM_CONTRACT;
@@ -3151,6 +3157,12 @@ int ILStubLinker::GetToken(FieldDesc* pFD)
     return m_tokenMap.GetToken(pFD);
 }
 
+int ILStubLinker::GetToken(FieldDesc* pFD, mdToken typeSignature)
+{
+    STANDARD_VM_CONTRACT;
+    return m_tokenMap.GetToken(pFD, typeSignature);
+}
+
 int ILStubLinker::GetSigToken(PCCOR_SIGNATURE pSig, DWORD cbSig)
 {
     STANDARD_VM_CONTRACT;
@@ -3227,6 +3239,16 @@ int ILCodeStream::GetToken(MethodDesc* pMD)
     STANDARD_VM_CONTRACT;
     return m_pOwner->GetToken(pMD);
 }
+int ILCodeStream::GetToken(MethodDesc* pMD, mdToken typeSignature)
+{
+    STANDARD_VM_CONTRACT;
+    return m_pOwner->GetToken(pMD, typeSignature);
+}
+int ILCodeStream::GetToken(MethodDesc* pMD, mdToken typeSignature, mdToken methodSignature)
+{
+    STANDARD_VM_CONTRACT;
+    return m_pOwner->GetToken(pMD, typeSignature, methodSignature);
+}
 int ILCodeStream::GetToken(MethodTable* pMT)
 {
     STANDARD_VM_CONTRACT;
@@ -3242,6 +3264,11 @@ int ILCodeStream::GetToken(FieldDesc* pFD)
     STANDARD_VM_CONTRACT;
     return m_pOwner->GetToken(pFD);
 }
+int ILCodeStream::GetToken(FieldDesc* pFD, mdToken typeSignature)
+{
+    STANDARD_VM_CONTRACT;
+    return m_pOwner->GetToken(pFD, typeSignature);
+}
 int ILCodeStream::GetSigToken(PCCOR_SIGNATURE pSig, DWORD cbSig)
 {
     STANDARD_VM_CONTRACT;
diff --git a/src/coreclr/vm/stubgen.h b/src/coreclr/vm/stubgen.h
index 595de649220c..968e5f9b4829 100644
--- a/src/coreclr/vm/stubgen.h
+++ b/src/coreclr/vm/stubgen.h
@@ -45,7 +45,6 @@ struct LocalDesc
     {
         Module*         pSigModule;
         size_t          cbArrayBoundsInfo;
-        BOOL            bIsCopyConstructed; // used for E_T_PTR
     };
 
     LocalDesc()
@@ -56,7 +55,6 @@ struct LocalDesc
     {
         ElementType[0]     = static_cast<BYTE>(elemType);
         cbType             = 1;
-        bIsCopyConstructed = FALSE;
     }
 
     inline LocalDesc(TypeHandle thType)
@@ -64,7 +62,6 @@ struct LocalDesc
         ElementType[0]     = ELEMENT_TYPE_INTERNAL;
         cbType             = 1;
         InternalToken      = thType;
-        bIsCopyConstructed = FALSE;
     }
 
     inline LocalDesc(MethodTable *pMT)
@@ -73,7 +70,6 @@ struct LocalDesc
         ElementType[0]     = ELEMENT_TYPE_INTERNAL;
         cbType             = 1;
         InternalToken      = TypeHandle(pMT);
-        bIsCopyConstructed = FALSE;
     }
 
     void MakeByRef()
@@ -94,14 +90,6 @@ struct LocalDesc
         ChangeType(ELEMENT_TYPE_SZARRAY);
     }
 
-    // makes the LocalDesc semantically equivalent to ET_TYPE_CMOD_REQD<IsCopyConstructed>/ET_TYPE_CMOD_REQD<NeedsCopyConstructorModifier>
-    void MakeCopyConstructedPointer()
-    {
-        LIMITED_METHOD_CONTRACT;
-        MakePointer();
-        bIsCopyConstructed = TRUE;
-    }
-
     void MakePointer()
     {
         LIMITED_METHOD_CONTRACT;
@@ -307,10 +295,13 @@ class TokenLookupMap
         for (COUNT_T i = 0; i < pSrc->m_signatures.GetCount(); i++)
         {
             const CQuickBytesSpecifySize<16>& src = pSrc->m_signatures[i];
-            CQuickBytesSpecifySize<16>& dst = *m_signatures.Append();
-            dst.AllocThrows(src.Size());
-            memcpy(dst.Ptr(), src.Ptr(), src.Size());
+            auto dst = m_signatures.Append();
+            dst->AllocThrows(src.Size());
+            memcpy(dst->Ptr(), src.Ptr(), src.Size());
         }
+
+        m_memberRefs.Set(pSrc->m_memberRefs);
+        m_methodSpecs.Set(pSrc->m_methodSpecs);
     }
 
     TypeHandle LookupTypeDef(mdToken token)
@@ -328,6 +319,55 @@ class TokenLookupMap
         WRAPPER_NO_CONTRACT;
         return LookupTokenWorker<mdtFieldDef, FieldDesc*>(token);
     }
+
+    struct MemberRefEntry final
+    {
+        CorTokenType Type;
+        mdToken ClassSignatureToken;
+        union
+        {
+            FieldDesc* Field;
+            MethodDesc* Method;
+        } Entry;
+    };
+    MemberRefEntry LookupMemberRef(mdToken token)
+    {
+        CONTRACTL
+        {
+            NOTHROW;
+            MODE_ANY;
+            GC_NOTRIGGER;
+            PRECONDITION(RidFromToken(token) - 1 < m_memberRefs.GetCount());
+            PRECONDITION(RidFromToken(token) != 0);
+            PRECONDITION(TypeFromToken(token) == mdtMemberRef);
+        }
+        CONTRACTL_END;
+
+        return m_memberRefs[static_cast<COUNT_T>(RidFromToken(token) - 1)];
+    }
+
+    struct MethodSpecEntry final
+    {
+        mdToken ClassSignatureToken;
+        mdToken MethodSignatureToken;
+        MethodDesc* Method;
+    };
+    MethodSpecEntry LookupMethodSpec(mdToken token)
+    {
+        CONTRACTL
+        {
+            NOTHROW;
+            MODE_ANY;
+            GC_NOTRIGGER;
+            PRECONDITION(RidFromToken(token) - 1 < m_methodSpecs.GetCount());
+            PRECONDITION(RidFromToken(token) != 0);
+            PRECONDITION(TypeFromToken(token) == mdtMethodSpec);
+        }
+        CONTRACTL_END;
+
+        return m_methodSpecs[static_cast<COUNT_T>(RidFromToken(token) - 1)];
+    }
+
     SigPointer LookupSig(mdToken token)
     {
         CONTRACTL
@@ -357,11 +397,67 @@ class TokenLookupMap
         WRAPPER_NO_CONTRACT;
         return GetTokenWorker<mdtMethodDef, MethodDesc*>(pMD);
     }
+    mdToken GetToken(MethodDesc* pMD, mdToken typeSignature)
+    {
+        CONTRACTL
+        {
+            THROWS;
+            MODE_ANY;
+            GC_NOTRIGGER;
+            PRECONDITION(pMD != NULL);
+        }
+        CONTRACTL_END;
+
+        MemberRefEntry* entry;
+        mdToken token = GetMemberRefWorker(&entry);
+        entry->Type = mdtMethodDef;
+        entry->ClassSignatureToken = typeSignature;
+        entry->Entry.Method = pMD;
+        return token;
+    }
+    mdToken GetToken(MethodDesc* pMD, mdToken typeSignature, mdToken methodSignature)
+    {
+        CONTRACTL
+        {
+            THROWS;
+            MODE_ANY;
+            GC_NOTRIGGER;
+            PRECONDITION(pMD != NULL);
+            PRECONDITION(typeSignature != mdTokenNil);
+            PRECONDITION(methodSignature != mdTokenNil);
+        }
+        CONTRACTL_END;
+
+        MethodSpecEntry* entry;
+        mdToken token = GetMethodSpecWorker(&entry);
+        entry->ClassSignatureToken = typeSignature;
+        entry->MethodSignatureToken = methodSignature;
+        entry->Method = pMD;
+        return token;
+    }
     mdToken GetToken(FieldDesc* pFieldDesc)
     {
         WRAPPER_NO_CONTRACT;
         return GetTokenWorker<mdtFieldDef, FieldDesc*>(pFieldDesc);
     }
+    mdToken GetToken(FieldDesc* pFieldDesc, mdToken typeSignature)
+    {
+        CONTRACTL
+        {
+            THROWS;
+            MODE_ANY;
+            GC_NOTRIGGER;
+            PRECONDITION(pFieldDesc != NULL);
+        }
+        CONTRACTL_END;
+
+        MemberRefEntry* entry;
+        mdToken token = GetMemberRefWorker(&entry);
+        entry->Type = mdtFieldDef;
+        entry->ClassSignatureToken = typeSignature;
+        entry->Entry.Field = pFieldDesc;
+        return token;
+    }
 
     mdToken GetSigToken(PCCOR_SIGNATURE pSig, DWORD cbSig)
     {
@@ -382,6 +478,38 @@ class TokenLookupMap
     }
 
 protected:
+    mdToken GetMemberRefWorker(MemberRefEntry** entry)
+    {
+        CONTRACTL
+        {
+            THROWS;
+            MODE_ANY;
+            GC_NOTRIGGER;
+            PRECONDITION(entry != NULL);
+        }
+        CONTRACTL_END;
+
+        mdToken token = TokenFromRid(m_memberRefs.GetCount(), mdtMemberRef) + 1;
+        *entry = &*m_memberRefs.Append(); // Dereference the iterator and then take the address
+        return token;
+    }
+
+    mdToken GetMethodSpecWorker(MethodSpecEntry** entry)
+    {
+        CONTRACTL
+        {
+            THROWS;
+            MODE_ANY;
+            GC_NOTRIGGER;
+            PRECONDITION(entry != NULL);
+        }
+        CONTRACTL_END;
+
+        mdToken token = TokenFromRid(m_methodSpecs.GetCount(), mdtMethodSpec) + 1;
+        *entry = &*m_methodSpecs.Append(); // Dereference the iterator and then take the address
+        return token;
+    }
+
     template<mdToken TokenType, typename HandleType>
     HandleType LookupTokenWorker(mdToken token)
     {
@@ -423,9 +551,11 @@ class TokenLookupMap
         return token;
     }
 
-    unsigned int                                   m_nextAvailableRid;
+    uint32_t                                       m_nextAvailableRid;
     CQuickBytesSpecifySize<TOKEN_LOOKUP_MAP_SIZE>  m_qbEntries;
     SArray<CQuickBytesSpecifySize<16>, FALSE>      m_signatures;
+    SArray<MemberRefEntry, FALSE>                  m_memberRefs;
+    SArray<MethodSpecEntry, FALSE>                 m_methodSpecs;
 };
 
 class ILCodeLabel;
@@ -592,9 +722,12 @@ class ILStubLinker
     //
     ILCodeLabel* NewCodeLabel();
     int GetToken(MethodDesc* pMD);
+    int GetToken(MethodDesc* pMD, mdToken typeSignature);
+    int GetToken(MethodDesc* pMD, mdToken typeSignature, mdToken methodSignature);
     int GetToken(MethodTable* pMT);
     int GetToken(TypeHandle th);
     int GetToken(FieldDesc* pFD);
+    int GetToken(FieldDesc* pFD, mdToken typeSignature);
     int GetSigToken(PCCOR_SIGNATURE pSig, DWORD cbSig);
     DWORD NewLocal(CorElementType typ = ELEMENT_TYPE_I);
     DWORD NewLocal(LocalDesc loc);
@@ -821,9 +954,12 @@ class ILCodeStream
     //
 
     int GetToken(MethodDesc* pMD);
+    int GetToken(MethodDesc* pMD, mdToken typeSignature);
+    int GetToken(MethodDesc* pMD, mdToken typeSignature, mdToken methodSignature);
     int GetToken(MethodTable* pMT);
     int GetToken(TypeHandle th);
     int GetToken(FieldDesc* pFD);
+    int GetToken(FieldDesc* pFD, mdToken typeSignature);
     int GetSigToken(PCCOR_SIGNATURE pSig, DWORD cbSig);
 
     DWORD NewLocal(CorElementType typ = ELEMENT_TYPE_I);
diff --git a/src/coreclr/vm/stubhelpers.cpp b/src/coreclr/vm/stubhelpers.cpp
index 35ce9bd3dfa7..95a5650a199f 100644
--- a/src/coreclr/vm/stubhelpers.cpp
+++ b/src/coreclr/vm/stubhelpers.cpp
@@ -422,17 +422,6 @@ extern "C" void QCALLTYPE InterfaceMarshaler_ConvertToManaged(IUnknown** ppUnk,
 
 #endif // FEATURE_COMINTEROP
 
-FCIMPL0(void, StubHelpers::SetLastError)
-{
-    // Make sure this is the first thing we do after returning from the target, as almost everything can cause the last error to get trashed
-    DWORD lastError = ::GetLastError();
-
-    FCALL_CONTRACT;
-
-    GetThread()->m_dwLastError = lastError;
-}
-FCIMPLEND
-
 FCIMPL0(void, StubHelpers::ClearLastError)
 {
     FCALL_CONTRACT;
diff --git a/src/coreclr/vm/stubmgr.cpp b/src/coreclr/vm/stubmgr.cpp
index d6258ac5fd59..68af3ecf200a 100644
--- a/src/coreclr/vm/stubmgr.cpp
+++ b/src/coreclr/vm/stubmgr.cpp
@@ -718,22 +718,6 @@ void StubManager::InitializeStubManagers()
 #endif // !DACCESS_COMPILE
 }
 
-//-----------------------------------------------------------
-// Terminate the global stub manager service.
-//-----------------------------------------------------------
-void StubManager::TerminateStubManagers()
-{
-#if !defined(DACCESS_COMPILE)
-
-#if defined(_DEBUG)
-    DbgFinishLog();
-    s_DbgLogCrst.Destroy();
-#endif
-
-    s_StubManagerListCrst.Destroy();
-#endif // !DACCESS_COMPILE
-}
-
 #ifdef _DEBUG
 
 //-----------------------------------------------------------
diff --git a/src/coreclr/vm/stubmgr.h b/src/coreclr/vm/stubmgr.h
index 6064939dc21f..9da4f61f6d2a 100644
--- a/src/coreclr/vm/stubmgr.h
+++ b/src/coreclr/vm/stubmgr.h
@@ -210,7 +210,6 @@ class StubManager
   public:
     // Startup and shutdown the global stubmanager service.
     static void InitializeStubManagers();
-    static void TerminateStubManagers();
 
     // Does any sub manager recognise this EIP?
     static BOOL IsStub(PCODE stubAddress)
diff --git a/src/coreclr/vm/syncblk.cpp b/src/coreclr/vm/syncblk.cpp
index 2cc7de6bd2b0..eb0b29fafdd0 100644
--- a/src/coreclr/vm/syncblk.cpp
+++ b/src/coreclr/vm/syncblk.cpp
@@ -2681,7 +2681,7 @@ BOOL AwareLock::EnterEpilogHelper(Thread* pCurThread, INT32 timeOut)
                 {
                     duration = end - start;
                 }
-                duration = min(duration, (DWORD)timeOut);
+                duration = min(duration, (ULONGLONG)timeOut);
                 timeOut -= (INT32)duration;
             }
         }
diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp
index 081f5c6cf7a7..f519ff758fbc 100644
--- a/src/coreclr/vm/threads.cpp
+++ b/src/coreclr/vm/threads.cpp
@@ -1513,7 +1513,6 @@ Thread::Thread()
     m_TraceCallCount = 0;
     m_ThrewControlForThread = 0;
     m_ThreadTasks = (ThreadTasks)0;
-    m_pLoadLimiter= NULL;
 
     // The state and the tasks must be 32-bit aligned for atomicity to be guaranteed.
     _ASSERTE((((size_t) &m_State) & 3) == 0);
@@ -1548,17 +1547,11 @@ Thread::Thread()
     m_RedirectContextInUse = false;
 #endif
 
-#ifdef FEATURE_COMINTEROP
-    m_pRCWStack = new RCWStackHeader();
-#endif
-
 #ifdef _DEBUG
     m_bGCStressing = FALSE;
     m_bUniqueStacking = FALSE;
 #endif
 
-    m_pPendingTypeLoad = NULL;
-
     m_dwAVInRuntimeImplOkayCount = 0;
 
 #if defined(HAVE_GCCOVER) && defined(USE_REDIRECT_FOR_GCSTRESS) && !defined(TARGET_UNIX) // GCCOVER
@@ -1745,8 +1738,6 @@ void Thread::InitThread()
 
     _ASSERTE(HasValidThreadHandle());
 
-    m_random.Init();
-
     // Set floating point mode to round to nearest
 #ifndef TARGET_UNIX
     (void) _controlfp_s( NULL, _RC_NEAR, _RC_CHOP|_RC_UP|_RC_DOWN|_RC_NEAR );
@@ -2648,11 +2639,6 @@ Thread::~Thread()
     MarkRedirectContextInUse(m_pSavedRedirectContext);
     m_pSavedRedirectContext = NULL;
 
-#ifdef FEATURE_COMINTEROP
-    if (m_pRCWStack)
-        delete m_pRCWStack;
-#endif
-
     if (m_pExceptionDuringStartup)
     {
         Exception::Delete (m_pExceptionDuringStartup);
@@ -5167,7 +5153,6 @@ Thread::ApartmentState Thread::SetApartment(ApartmentState state)
 ThreadStore::ThreadStore()
            : m_Crst(CrstThreadStore, (CrstFlags) (CRST_UNSAFE_ANYMODE | CRST_DEBUGGER_THREAD)),
              m_ThreadCount(0),
-             m_MaxThreadCount(0),
              m_UnstartedThreadCount(0),
              m_BackgroundThreadCount(0),
              m_PendingThreadCount(0),
@@ -5286,8 +5271,6 @@ void ThreadStore::AddThread(Thread *newThread)
     s_pThreadStore->m_ThreadList.InsertTail(newThread);
 
     s_pThreadStore->m_ThreadCount++;
-    if (s_pThreadStore->m_MaxThreadCount < s_pThreadStore->m_ThreadCount)
-        s_pThreadStore->m_MaxThreadCount = s_pThreadStore->m_ThreadCount;
 
     if (newThread->IsUnstarted())
         s_pThreadStore->m_UnstartedThreadCount++;
@@ -7050,12 +7033,12 @@ bool Thread::InitRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, bool validCo
 }
 
 
-void Thread::FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx)
+void Thread::FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, bool fLightUnwind)
 {
     WRAPPER_NO_CONTRACT;
     SUPPORTS_DAC;
 
-    ::FillRegDisplay(pRD, pctx);
+    ::FillRegDisplay(pRD, pctx, NULL, fLightUnwind);
 
 #if defined(DEBUG_REGDISPLAY) && !defined(TARGET_X86)
     CONSISTENCY_CHECK(!pRD->_pThread || pRD->_pThread == this);
@@ -7784,7 +7767,7 @@ OBJECTREF Thread::GetCulture(BOOL bUICulture)
 
     // This is the case when we're building CoreLib and haven't yet created
     // the system assembly.
-    if (SystemDomain::System()->SystemAssembly()==NULL || g_fForbidEnterEE) {
+    if (SystemDomain::System()->SystemAssembly()==NULL) {
         return NULL;
     }
 
@@ -7906,32 +7889,6 @@ INT32 Thread::ResetManagedThreadObjectInCoopMode(INT32 nPriority)
     return nPriority;
 }
 
-BOOL Thread::IsRealThreadPoolResetNeeded()
-{
-    CONTRACTL
-    {
-        NOTHROW;
-        GC_NOTRIGGER;
-        MODE_COOPERATIVE;
-    }
-    CONTRACTL_END;
-
-    if(!IsBackground())
-        return TRUE;
-
-    THREADBASEREF pObject = (THREADBASEREF)ObjectFromHandle(m_ExposedObject);
-
-    if(pObject != NULL)
-    {
-        INT32 nPriority = pObject->GetPriority();
-
-        if(nPriority != ThreadNative::PRIORITY_NORMAL)
-            return TRUE;
-    }
-
-    return FALSE;
-}
-
 void Thread::InternalReset(BOOL fNotFinalizerThread, BOOL fThreadObjectResetNeeded, BOOL fResetAbort)
 {
     CONTRACTL {
@@ -8276,12 +8233,7 @@ void Thread::InitializeSpecialUserModeApc()
         return;
     }
 
-    // In the future, once code paths using the special user-mode APC get some bake time, it should be used regardless of
-    // whether CET shadow stacks are enabled
-    if (AreCetShadowStacksEnabled())
-    {
-        s_pfnQueueUserAPC2Proc = pfnQueueUserAPC2Proc;
-    }
+    s_pfnQueueUserAPC2Proc = pfnQueueUserAPC2Proc;
 }
 
 #endif // FEATURE_SPECIAL_USER_MODE_APC
diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h
index bfb154b0e539..b9472590fc8b 100644
--- a/src/coreclr/vm/threads.h
+++ b/src/coreclr/vm/threads.h
@@ -131,7 +131,6 @@ class     NDirect;
 class     Frame;
 class     ThreadBaseObject;
 class     AppDomainStack;
-class     LoadLevelLimiter;
 class     DomainAssembly;
 class     DeadlockAwareLock;
 struct    HelperMethodFrameCallerList;
@@ -141,7 +140,6 @@ class     FaultingExceptionFrame;
 enum      BinderMethodID : int;
 class     CRWLock;
 struct    LockEntry;
-class     PendingTypeLoadHolder;
 class     PrepareCodeConfig;
 class     NativeCodeVersion;
 
@@ -153,7 +151,6 @@ typedef void(*ADCallBackFcnType)(LPVOID);
 
 #include "stackwalktypes.h"
 #include "log.h"
-#include "stackingallocator.h"
 #include "excep.h"
 #include "synch.h"
 #include "exstate.h"
@@ -462,288 +459,6 @@ struct LockEntry
 BOOL MatchThreadHandleToOsId ( HANDLE h, DWORD osId );
 #endif
 
-#ifdef FEATURE_COMINTEROP
-
-#define RCW_STACK_SIZE 64
-
-class RCWStack
-{
-public:
-    inline RCWStack()
-    {
-        LIMITED_METHOD_CONTRACT;
-        memset(this, 0, sizeof(RCWStack));
-    }
-
-    inline VOID SetEntry(unsigned int index, RCW* pRCW)
-    {
-        CONTRACTL
-        {
-            NOTHROW;
-            GC_NOTRIGGER;
-            MODE_ANY;
-            PRECONDITION(index < RCW_STACK_SIZE);
-            PRECONDITION(CheckPointer(pRCW, NULL_OK));
-        }
-        CONTRACTL_END;
-
-        m_pList[index] = pRCW;
-    }
-
-    inline RCW* GetEntry(unsigned int index)
-    {
-        CONTRACT (RCW*)
-        {
-            NOTHROW;
-            GC_NOTRIGGER;
-            MODE_ANY;
-            PRECONDITION(index < RCW_STACK_SIZE);
-        }
-        CONTRACT_END;
-
-        RETURN m_pList[index];
-    }
-
-    inline VOID SetNextStack(RCWStack* pStack)
-    {
-        CONTRACTL
-        {
-            NOTHROW;
-            GC_NOTRIGGER;
-            MODE_ANY;
-            PRECONDITION(CheckPointer(pStack));
-            PRECONDITION(m_pNext == NULL);
-        }
-        CONTRACTL_END;
-
-        m_pNext = pStack;
-    }
-
-    inline RCWStack* GetNextStack()
-    {
-        CONTRACT (RCWStack*)
-        {
-            NOTHROW;
-            GC_NOTRIGGER;
-            MODE_ANY;
-            POSTCONDITION(CheckPointer(RETVAL, NULL_OK));
-        }
-        CONTRACT_END;
-
-        RETURN m_pNext;
-    }
-
-private:
-    RCWStack*   m_pNext;
-    RCW*        m_pList[RCW_STACK_SIZE];
-};
-
-
-class RCWStackHeader
-{
-public:
-    RCWStackHeader()
-    {
-        CONTRACTL
-        {
-            THROWS;
-            GC_NOTRIGGER;
-            MODE_ANY;
-        }
-        CONTRACTL_END;
-
-        m_iIndex = 0;
-        m_iSize = RCW_STACK_SIZE;
-        m_pHead = new RCWStack();
-    }
-
-    ~RCWStackHeader()
-    {
-        CONTRACTL
-        {
-            NOTHROW;
-            GC_NOTRIGGER;
-            MODE_ANY;
-        }
-        CONTRACTL_END;
-
-        RCWStack* pStack = m_pHead;
-        RCWStack* pNextStack = NULL;
-
-        while (pStack)
-        {
-            pNextStack = pStack->GetNextStack();
-            delete pStack;
-            pStack = pNextStack;
-        }
-    }
-
-    bool Push(RCW* pRCW)
-    {
-        CONTRACTL
-        {
-            NOTHROW;
-            GC_NOTRIGGER;
-            MODE_ANY;
-            PRECONDITION(CheckPointer(pRCW, NULL_OK));
-        }
-        CONTRACTL_END;
-
-        if (!GrowListIfNeeded())
-            return false;
-
-        // Fast Path
-        if (m_iIndex < RCW_STACK_SIZE)
-        {
-            m_pHead->SetEntry(m_iIndex, pRCW);
-            m_iIndex++;
-            return true;
-        }
-
-        // Slow Path
-        unsigned int count = m_iIndex;
-        RCWStack* pStack = m_pHead;
-        while (count >= RCW_STACK_SIZE)
-        {
-            pStack = pStack->GetNextStack();
-            _ASSERTE(pStack);
-
-            count -= RCW_STACK_SIZE;
-        }
-
-        pStack->SetEntry(count, pRCW);
-        m_iIndex++;
-        return true;
-    }
-
-    RCW* Pop()
-    {
-        CONTRACT (RCW*)
-        {
-            NOTHROW;
-            GC_NOTRIGGER;
-            MODE_ANY;
-            PRECONDITION(m_iIndex > 0);
-            POSTCONDITION(CheckPointer(RETVAL, NULL_OK));
-        }
-        CONTRACT_END;
-
-        RCW* pRCW = NULL;
-
-        m_iIndex--;
-
-        // Fast Path
-        if (m_iIndex < RCW_STACK_SIZE)
-        {
-            pRCW = m_pHead->GetEntry(m_iIndex);
-            m_pHead->SetEntry(m_iIndex, NULL);
-            RETURN pRCW;
-        }
-
-        // Slow Path
-        unsigned int count = m_iIndex;
-        RCWStack* pStack = m_pHead;
-        while (count >= RCW_STACK_SIZE)
-        {
-            pStack = pStack->GetNextStack();
-            _ASSERTE(pStack);
-            count -= RCW_STACK_SIZE;
-        }
-
-        pRCW = pStack->GetEntry(count);
-        pStack->SetEntry(count, NULL);
-
-        RETURN pRCW;
-    }
-
-    BOOL IsInStack(RCW* pRCW)
-    {
-        CONTRACTL
-        {
-            NOTHROW;
-            GC_NOTRIGGER;
-            MODE_ANY;
-            PRECONDITION(CheckPointer(pRCW));
-        }
-        CONTRACTL_END;
-
-        if (m_iIndex == 0)
-            return FALSE;
-
-        // Fast Path
-        if (m_iIndex <= RCW_STACK_SIZE)
-        {
-            for (int i = 0; i < (int)m_iIndex; i++)
-            {
-                if (pRCW == m_pHead->GetEntry(i))
-                    return TRUE;
-            }
-
-            return FALSE;
-        }
-
-        // Slow Path
-        RCWStack* pStack = m_pHead;
-        int totalcount = 0;
-        while (pStack != NULL)
-        {
-            for (int i = 0; (i < RCW_STACK_SIZE) && (totalcount < m_iIndex); i++, totalcount++)
-            {
-                if (pRCW == pStack->GetEntry(i))
-                    return TRUE;
-            }
-
-            pStack = pStack->GetNextStack();
-        }
-
-        return FALSE;
-    }
-
-private:
-    bool GrowListIfNeeded()
-    {
-        CONTRACTL
-        {
-            NOTHROW;
-            GC_NOTRIGGER;
-            MODE_ANY;
-            INJECT_FAULT(COMPlusThrowOM());
-            PRECONDITION(CheckPointer(m_pHead));
-        }
-        CONTRACTL_END;
-
-        if (m_iIndex == m_iSize)
-        {
-            RCWStack* pStack = m_pHead;
-            RCWStack* pNextStack = NULL;
-            while ( (pNextStack = pStack->GetNextStack()) != NULL)
-                pStack = pNextStack;
-
-            RCWStack* pNewStack = new (nothrow) RCWStack();
-            if (NULL == pNewStack)
-                return false;
-
-            pStack->SetNextStack(pNewStack);
-
-            m_iSize += RCW_STACK_SIZE;
-        }
-
-        return true;
-    }
-
-    // Zero-based index to the first free element in the list.
-    int        m_iIndex;
-
-    // Total size of the list, including all stacks.
-    int        m_iSize;
-
-    // Pointer to the first stack.
-    RCWStack*           m_pHead;
-};
-
-#endif // FEATURE_COMINTEROP
-
-
 typedef DWORD (*AppropriateWaitFunc) (void *args, DWORD timeout, DWORD option);
 
 // The Thread class represents a managed thread.  This thread could be internal
@@ -874,15 +589,6 @@ class Thread
     }
 
 public:
-    // Allocator used during marshaling for temporary buffers, much faster than
-    // heap allocation.
-    //
-    // Uses of this allocator should be effectively statically scoped, i.e. a "region"
-    // is started using a CheckPointHolder and GetCheckpoint, and this region can then be used for allocations
-    // from that point onwards, and then all memory is reclaimed when the static scope for the
-    // checkpoint is exited by the running thread.
-    StackingAllocator* m_stackLocalAllocator = NULL;
-
     // If we are trying to suspend a thread, we set the appropriate pending bit to
     // indicate why we want to suspend it (TS_GCSuspendPending or TS_DebugSuspendPending).
     //
@@ -932,7 +638,7 @@ class Thread
         TS_ReportDead             = 0x00010000,    // in WaitForOtherThreads()
         TS_FullyInitialized       = 0x00020000,    // Thread is fully initialized and we are ready to broadcast its existence to external clients
 
-        TS_TaskReset              = 0x00040000,    // The task is reset
+        // unused                 = 0x00040000,
 
         TS_SyncSuspended          = 0x00080000,    // Suspended via WaitSuspendEvent
         TS_DebugWillSync          = 0x00100000,    // Debugger will wait for this thread to sync
@@ -994,7 +700,7 @@ class Thread
         // unused                       = 0x00000040,
         TSNC_CLRCreatedThread           = 0x00000080, // The thread was created through Thread::CreateNewThread
         TSNC_ExistInThreadStore         = 0x00000100, // For dtor to know if it needs to be removed from ThreadStore
-        TSNC_UnsafeSkipEnterCooperative = 0x00000200, // This is a "fix" for deadlocks caused when cleaning up COM
+        // unused                       = 0x00000200,
         TSNC_OwnsSpinLock               = 0x00000400, // The thread owns a spinlock.
         TSNC_PreparingAbort             = 0x00000800, // Preparing abort.  This avoids recursive HandleThreadAbort call.
         TSNC_OSAlertableWait            = 0x00001000, // Preparing abort.  This avoids recursive HandleThreadAbort call.
@@ -1047,7 +753,7 @@ class Thread
     void InternalReset (BOOL fNotFinalizerThread=FALSE, BOOL fThreadObjectResetNeeded=TRUE, BOOL fResetAbort=TRUE);
     INT32 ResetManagedThreadObject(INT32 nPriority);
     INT32 ResetManagedThreadObjectInCoopMode(INT32 nPriority);
-    BOOL  IsRealThreadPoolResetNeeded();
+
 public:
     HRESULT DetachThread(BOOL fDLLThreadDetach);
 
@@ -1324,76 +1030,6 @@ class Thread
     Frame* NotifyFrameChainOfExceptionUnwind(Frame* pStartFrame, LPVOID pvLimitSP);
 #endif // DACCESS_COMPILE
 
-#if defined(FEATURE_COMINTEROP) && !defined(DACCESS_COMPILE)
-    void RegisterRCW(RCW *pRCW)
-    {
-        CONTRACTL
-        {
-            THROWS;
-            GC_NOTRIGGER;
-            MODE_ANY;
-            PRECONDITION(CheckPointer(pRCW));
-        }
-        CONTRACTL_END;
-
-        if (!m_pRCWStack->Push(pRCW))
-        {
-            ThrowOutOfMemory();
-        }
-    }
-
-    // Returns false on OOM.
-    BOOL RegisterRCWNoThrow(RCW *pRCW)
-    {
-        CONTRACTL
-        {
-            NOTHROW;
-            GC_NOTRIGGER;
-            MODE_ANY;
-            PRECONDITION(CheckPointer(pRCW, NULL_OK));
-        }
-        CONTRACTL_END;
-
-        return m_pRCWStack->Push(pRCW);
-    }
-
-    RCW *UnregisterRCW(INDEBUG(SyncBlock *pSB))
-    {
-        CONTRACTL
-        {
-            NOTHROW;
-            GC_NOTRIGGER;
-            MODE_ANY;
-            PRECONDITION(CheckPointer(pSB));
-        }
-        CONTRACTL_END;
-
-        RCW* pPoppedRCW = m_pRCWStack->Pop();
-
-#ifdef _DEBUG
-        // The RCW we popped must be the one pointed to by pSB if pSB still points to an RCW.
-        RCW* pCurrentRCW = pSB->GetInteropInfoNoCreate()->GetRawRCW();
-        _ASSERTE(pCurrentRCW == NULL || pPoppedRCW == NULL || pCurrentRCW == pPoppedRCW);
-#endif // _DEBUG
-
-        return pPoppedRCW;
-    }
-
-    BOOL RCWIsInUse(RCW* pRCW)
-    {
-        CONTRACTL
-        {
-            NOTHROW;
-            GC_NOTRIGGER;
-            MODE_ANY;
-            PRECONDITION(CheckPointer(pRCW));
-        }
-        CONTRACTL_END;
-
-        return m_pRCWStack->IsInStack(pRCW);
-    }
-#endif // FEATURE_COMINTEROP && !DACCESS_COMPILE
-
     // Lock thread is trying to acquire
     VolatilePtr<DeadlockAwareLock> m_pBlockingLock;
 
@@ -1427,11 +1063,6 @@ class Thread
 
     inline TypeHandle GetTHAllocContextObj() {LIMITED_METHOD_CONTRACT; return m_thAllocContextObj; }
 
-#ifdef FEATURE_COMINTEROP
-    // The header for the per-thread in-use RCW stack.
-    RCWStackHeader*      m_pRCWStack;
-#endif // FEATURE_COMINTEROP
-
     // Flags used to indicate tasks the thread has to do.
     ThreadTasks          m_ThreadTasks;
 
@@ -1555,24 +1186,6 @@ class Thread
     }
 #endif
 
- private:
-    LoadLevelLimiter *m_pLoadLimiter;
-
- public:
-    LoadLevelLimiter *GetLoadLevelLimiter()
-    {
-        LIMITED_METHOD_CONTRACT;
-        return m_pLoadLimiter;
-    }
-
-    void SetLoadLevelLimiter(LoadLevelLimiter *limiter)
-    {
-        LIMITED_METHOD_CONTRACT;
-        m_pLoadLimiter = limiter;
-    }
-
-
-
 public:
     //--------------------------------------------------------------
     // Constructor.
@@ -2620,26 +2233,6 @@ class Thread
 #endif // !DACCESS_COMPILE
 #endif // FEATURE_EMULATE_SINGLESTEP
 
-    private:
-
-    PendingTypeLoadHolder* m_pPendingTypeLoad;
-
-    public:
-
-#ifndef DACCESS_COMPILE
-    PendingTypeLoadHolder* GetPendingTypeLoad()
-    {
-        LIMITED_METHOD_CONTRACT;
-        return m_pPendingTypeLoad;
-    }
-
-    void SetPendingTypeLoad(PendingTypeLoadHolder* pPendingTypeLoad)
-    {
-        LIMITED_METHOD_CONTRACT;
-        m_pPendingTypeLoad = pPendingTypeLoad;
-    }
-#endif
-
     public:
 
     // Indicate whether this thread should run in the background.  Background threads
@@ -2709,14 +2302,16 @@ class Thread
 
     #define POPFRAMES                       0x0004
 
-    /* use the following  flag only if you REALLY know what you are doing !!! */
     #define QUICKUNWIND                     0x0008 // do not restore all registers during unwind
 
     #define HANDLESKIPPEDFRAMES             0x0010 // temporary to handle skipped frames for appdomain unload
                                                    // stack crawl. Eventually need to always do this but it
                                                    // breaks the debugger right now.
 
-    #define LIGHTUNWIND                     0x0020 // allow using cache schema (see StackwalkCache class)
+    #define LIGHTUNWIND                     0x0020 // Unwind PC+SP+FP only.
+                                                   // - Implemented on x64 only.
+                                                   // - Expects the initial context to be outside prolog/epilog.
+                                                   // - Cannot unwind through methods with stackalloc
 
     #define NOTIFY_ON_U2M_TRANSITIONS       0x0040 // Provide a callback for native transitions.
                                                    // This is only useful to a debugger trying to find native code
@@ -2768,6 +2363,8 @@ class Thread
     // may still execute GS cookie tracking/checking code paths.
     #define SKIP_GSCOOKIE_CHECK 0x10000
 
+    #define UNWIND_FLOATS 0x20000
+
     StackWalkAction StackWalkFramesEx(
                         PREGDISPLAY pRD,        // virtual register set at crawl start
                         PSTACKWALKFRAMESCALLBACK pCallback,
@@ -2792,7 +2389,7 @@ class Thread
                         PTR_Frame pStartFrame = PTR_NULL);
 
     bool InitRegDisplay(const PREGDISPLAY, const PT_CONTEXT, bool validContext);
-    void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx);
+    void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, bool fLightUnwind = false);
 
 #ifdef FEATURE_EH_FUNCLETS
     static PCODE VirtualUnwindCallFrame(T_CONTEXT* pContext, T_KNONVOLATILE_CONTEXT_POINTERS* pContextPointers = NULL,
@@ -2857,18 +2454,6 @@ class Thread
     // making m_Link public.
     SLink       m_Link;
 
-    // For N/Direct calls with the "setLastError" bit, this field stores
-    // the errorcode from that call.
-    DWORD       m_dwLastError;
-
-#ifdef FEATURE_INTERPRETER
-    // When we're interpreting IL stubs for N/Direct calls with the "setLastError" bit,
-    // the interpretation will trash the last error before we get to the call to "SetLastError".
-    // Therefore, we record it here immediately after the calli, and treat "SetLastError" as an
-    // intrinsic that transfers the value stored here into the field above.
-    DWORD       m_dwLastErrorInterp;
-#endif
-
     // Debugger per-thread flag for enabling notification on "manual"
     // method calls,  for stepping logic
     void IncrementTraceCallCount();
@@ -4237,11 +3822,6 @@ class Thread
     // See ThreadStore::TriggerGCForDeadThreadsIfNecessary()
     bool m_fHasDeadThreadBeenConsideredForGCTrigger;
 
-    CLRRandom m_random;
-
-public:
-    CLRRandom* GetRandom() {return &m_random;}
-
 #ifdef FEATURE_COMINTEROP
 private:
     // Cookie returned from CoRegisterInitializeSpy
@@ -4618,26 +4198,15 @@ class ThreadStore
     // m_DeadThreadCount is the subset of m_ThreadCount which have died.  The Win32
     // thread has disappeared, but something (like the exposed object) has kept the
     // refcount non-zero so we can't destruct yet.
-    //
-    // m_MaxThreadCount is the maximum value of m_ThreadCount. ie. the largest number
-    // of simultaneously active threads
 
 protected:
     LONG        m_ThreadCount;
-    LONG        m_MaxThreadCount;
 public:
     LONG        ThreadCountInEE ()
     {
         LIMITED_METHOD_CONTRACT;
         return m_ThreadCount;
     }
-#if defined(_DEBUG) || defined(DACCESS_COMPILE)
-    LONG        MaxThreadCountInEE ()
-    {
-        LIMITED_METHOD_CONTRACT;
-        return m_MaxThreadCount;
-    }
-#endif
 private:
     LONG        m_UnstartedThreadCount;
     LONG        m_BackgroundThreadCount;
@@ -6218,9 +5787,7 @@ class StackWalkerWalkingThreadHolder
 #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64)
 EXTERN_C void STDCALL ClrRestoreNonvolatileContextWorker(PCONTEXT ContextRecord, DWORD64 ssp);
 #endif
-#if !(defined(TARGET_WINDOWS) && defined(TARGET_X86))
 void ClrRestoreNonvolatileContext(PCONTEXT ContextRecord);
-#endif
 #endif // DACCESS_COMPILE
 
 #endif //__threads_h__
diff --git a/src/coreclr/vm/threadstatics.cpp b/src/coreclr/vm/threadstatics.cpp
index 94088ba39994..6a8a43a0821e 100644
--- a/src/coreclr/vm/threadstatics.cpp
+++ b/src/coreclr/vm/threadstatics.cpp
@@ -125,7 +125,7 @@ void ThreadLocalBlock::EnsureModuleIndex(ModuleIndex index)
         return;
     }
 
-    SIZE_T aModuleIndices = max(16, m_TLMTableSize);
+    SIZE_T aModuleIndices = max((SIZE_T)16, m_TLMTableSize);
     while (aModuleIndices <= index.m_dwIndex)
     {
         aModuleIndices *= 2;
@@ -411,7 +411,7 @@ void    ThreadLocalModule::EnsureDynamicClassIndex(DWORD dwID)
         return;
     }
 
-    SIZE_T aDynamicEntries = max(16, m_aDynamicEntries);
+    SIZE_T aDynamicEntries = max((SIZE_T)16, m_aDynamicEntries);
     while (aDynamicEntries <= dwID)
     {
         aDynamicEntries *= 2;
diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp
index 84d1ade6037b..f79193888480 100644
--- a/src/coreclr/vm/threadsuspend.cpp
+++ b/src/coreclr/vm/threadsuspend.cpp
@@ -1117,11 +1117,11 @@ BOOL Thread::IsContextSafeToRedirect(const CONTEXT* pContext)
 #ifndef TARGET_UNIX
 
 #if !defined(TARGET_X86)
-    // In some cases (x86 WOW64, ARM32 on ARM64) Windows will not set the CONTEXT_EXCEPTION_REPORTING flag
-    // if the thread is executing in kernel mode (i.e. in the middle of a syscall or exception handling).
-    // Therefore, we should treat the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that
-    // it is not safe to manipulate with the current state of the thread context.
-    // Note: the x86 WOW64 case is already handled in GetSafelyRedirectableThreadContext; in addition, this
+    // In some cases Windows will not set the CONTEXT_EXCEPTION_REPORTING flag if the thread is executing
+    // in kernel mode (i.e. in the middle of a syscall or exception handling). Therefore, we should treat
+    // the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that it is not safe to
+    // manipulate with the current state of the thread context.
+    // Note: The x86 WOW64 case is already handled in GetSafelyRedirectableThreadContext; in addition, this
     // flag is never set on Windows7 x86 WOW64. So this check is valid for non-x86 architectures only.
     isSafeToRedirect = (pContext->ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0;
 #endif // !defined(TARGET_X86)
@@ -1382,7 +1382,7 @@ Thread::UserAbort(EEPolicy::ThreadAbortTypes abortType, DWORD timeout)
 
         // If a thread is Dead or Detached, abort is a NOP.
         //
-        if (m_State & (TS_Dead | TS_Detached | TS_TaskReset))
+        if (m_State & (TS_Dead | TS_Detached))
         {
             UnmarkThreadForAbort();
 
@@ -2099,9 +2099,6 @@ void Thread::RareDisablePreemptiveGC()
         goto Exit;
     }
 
-    // This should NEVER be called if the TSNC_UnsafeSkipEnterCooperative bit is set!
-    _ASSERTE(!(m_StateNC & TSNC_UnsafeSkipEnterCooperative) && "DisablePreemptiveGC called while the TSNC_UnsafeSkipEnterCooperative bit is set");
-
     // Holding a spin lock in preemp mode and switch to coop mode could cause other threads spinning
     // waiting for GC
     _ASSERTE ((m_StateNC & Thread::TSNC_OwnsSpinLock) == 0);
@@ -3973,7 +3970,8 @@ ThrowControlForThread(
         exceptionRecord.ExceptionFlags = 0;
 
         OBJECTREF throwable = ExceptionTracker::CreateThrowable(&exceptionRecord, TRUE);
-        DispatchManagedException(throwable);
+        pfef->GetExceptionContext()->ContextFlags |= CONTEXT_EXCEPTION_ACTIVE;
+        DispatchManagedException(throwable, pfef->GetExceptionContext());
     }
     else
 #endif // FEATURE_EH_FUNCLETS
diff --git a/src/coreclr/vm/typedesc.cpp b/src/coreclr/vm/typedesc.cpp
index ccb342ca0358..72bc9e50c29b 100644
--- a/src/coreclr/vm/typedesc.cpp
+++ b/src/coreclr/vm/typedesc.cpp
@@ -1497,7 +1497,7 @@ BOOL TypeVarTypeDesc::SatisfiesConstraints(SigTypeContext *pTypeContextOfConstra
                 return FALSE;
         }
 
-        if (thArg.IsByRefLike() && (specialConstraints & gpAcceptByRefLike) == 0)
+        if (thArg.IsByRefLike() && (specialConstraints & gpAllowByRefLike) == 0)
             return FALSE;
     }
 
diff --git a/src/coreclr/vm/typehandle.h b/src/coreclr/vm/typehandle.h
index 8483a935af61..f0f5a4604ab2 100644
--- a/src/coreclr/vm/typehandle.h
+++ b/src/coreclr/vm/typehandle.h
@@ -647,9 +647,7 @@ inline CHECK CheckPointer(TypeHandle th, IsNullOK ok = NULL_NOT_OK)
 
 /*************************************************************************/
 // Instantiation is representation of generic instantiation.
-// It is simple read-only array of TypeHandles. In NGen, the type handles
-// may be encoded using indirections. That's one reason why it is convenient
-// to have wrapper class that performs the decoding.
+// It is simple read-only array of TypeHandles.
 class Instantiation
 {
 public:
@@ -695,6 +693,14 @@ class Instantiation
     }
 #endif
 
+    Instantiation& operator=(const Instantiation& inst)
+    {
+        _ASSERTE(this != &inst);
+        m_pArgs = inst.m_pArgs;
+        m_nArgs = inst.m_nArgs;
+        return *this;
+    }
+
     // Return i-th instantiation argument
     TypeHandle operator[](DWORD iArg) const
     {
diff --git a/src/coreclr/vm/util.cpp b/src/coreclr/vm/util.cpp
index 7412b03b3b4a..58dc2d28ee37 100644
--- a/src/coreclr/vm/util.cpp
+++ b/src/coreclr/vm/util.cpp
@@ -1774,21 +1774,19 @@ static BOOL TrustMeIAmSafe(void *pLock)
 
 LockOwner g_lockTrustMeIAmThreadSafe = { NULL, TrustMeIAmSafe };
 
-static DangerousNonHostedSpinLock g_randomLock;
-static CLRRandom g_random;
+namespace
+{
+    DangerousNonHostedSpinLock g_randomLock;
+    CLRRandom g_random;
+}
 
 int GetRandomInt(int maxVal)
 {
-    // Use the thread-local Random instance if possible
-    Thread* pThread = GetThreadNULLOk();
-    if (pThread)
-        return pThread->GetRandom()->Next(maxVal);
-
-    // No Thread object - need to fall back to the global generator.
     // In DAC builds we don't need the lock (DAC is single-threaded) and can't get it anyway (DNHSL isn't supported)
 #ifndef DACCESS_COMPILE
-    DangerousNonHostedSpinLockHolder lh(&g_randomLock);
+    DangerousNonHostedSpinLockHolder lockHolder(&g_randomLock);
 #endif
+    // Use the thread-local Random instance
     if (!g_random.IsInitialized())
         g_random.Init();
     return g_random.Next(maxVal);
diff --git a/src/coreclr/vm/util.hpp b/src/coreclr/vm/util.hpp
index e7b311d8724d..ef05074b186a 100644
--- a/src/coreclr/vm/util.hpp
+++ b/src/coreclr/vm/util.hpp
@@ -16,7 +16,7 @@
 #include "clrdata.h"
 #include "xclrdata.h"
 #include "posterror.h"
-#include "clr_std/type_traits"
+#include <type_traits>
 
 // Hot cache lines need to be aligned to cache line size to improve performance
 #if defined(TARGET_ARM64)
diff --git a/src/coreclr/vm/vars.cpp b/src/coreclr/vm/vars.cpp
index ba72d19f2b1e..f6a029539063 100644
--- a/src/coreclr/vm/vars.cpp
+++ b/src/coreclr/vm/vars.cpp
@@ -194,9 +194,7 @@ GVAL_IMPL(SIZE_T, g_runtimeVirtualSize);
 
 #ifndef DACCESS_COMPILE
 
-Volatile<LONG> g_fForbidEnterEE = false;
 bool g_fManagedAttach = false;
-bool g_fNoExceptions = false;
 
 DWORD g_FinalizerWaiterStatus = 0;
 
diff --git a/src/coreclr/vm/vars.hpp b/src/coreclr/vm/vars.hpp
index 65712d031512..51533187c8b6 100644
--- a/src/coreclr/vm/vars.hpp
+++ b/src/coreclr/vm/vars.hpp
@@ -16,46 +16,6 @@ typedef DPTR(SLOT) PTR_SLOT;
 
 typedef LPVOID  DictionaryEntry;
 
-/* Define the implementation dependent size types */
-
-#ifndef _INTPTR_T_DEFINED
-#ifdef  HOST_64BIT
-typedef __int64             intptr_t;
-#else
-typedef int                 intptr_t;
-#endif
-#define _INTPTR_T_DEFINED
-#endif
-
-#ifndef _UINTPTR_T_DEFINED
-#ifdef  HOST_64BIT
-typedef unsigned __int64    uintptr_t;
-#else
-typedef unsigned int        uintptr_t;
-#endif
-#define _UINTPTR_T_DEFINED
-#endif
-
-#ifndef _PTRDIFF_T_DEFINED
-#ifdef  HOST_64BIT
-typedef __int64             ptrdiff_t;
-#else
-typedef int                 ptrdiff_t;
-#endif
-#define _PTRDIFF_T_DEFINED
-#endif
-
-
-#ifndef _SIZE_T_DEFINED
-#ifdef  HOST_64BIT
-typedef unsigned __int64 size_t;
-#else
-typedef unsigned int     size_t;
-#endif
-#define _SIZE_T_DEFINED
-#endif
-
-
 #include "util.hpp"
 #include <corpriv.h>
 #include <cordbpriv.h>
@@ -480,13 +440,11 @@ EXTERN BOOL g_fComStarted;
 GVAL_DECL(DWORD, g_fEEShutDown);
 EXTERN DWORD g_fFastExitProcess;
 EXTERN BOOL g_fFatalErrorOccurredOnGCThread;
-EXTERN Volatile<LONG> g_fForbidEnterEE;
 GVAL_DECL(bool, g_fProcessDetach);
 #ifdef FEATURE_METADATA_UPDATER
 GVAL_DECL(bool, g_metadataUpdatesApplied);
 #endif
 EXTERN bool g_fManagedAttach;
-EXTERN bool g_fNoExceptions;
 
 // Indicates whether we're executing shut down as a result of DllMain
 // (DLL_PROCESS_DETACH). See comments at code:EEShutDown for details.
@@ -688,15 +646,6 @@ PTR_GSCookie GetProcessGSCookiePtr() { return  PTR_GSCookie(&s_gsCookie); }
 inline
 GSCookie GetProcessGSCookie() { return *(RAW_KEYWORD(volatile) GSCookie *)(&s_gsCookie); }
 
-// Passed to JitManager APIs to determine whether to avoid calling into the host.
-// The profiling API stackwalking uses this to ensure to avoid re-entering the host
-// (particularly SQL) from a hijacked thread.
-enum HostCallPreference
-{
-    AllowHostCalls,
-    NoHostCalls,
-};
-
 #ifdef TARGET_WINDOWS
 typedef BOOL(WINAPI* PINITIALIZECONTEXT2)(PVOID Buffer, DWORD ContextFlags, PCONTEXT* Context, PDWORD ContextLength, ULONG64 XStateCompactionMask);
 extern PINITIALIZECONTEXT2 g_pfnInitializeContext2;
diff --git a/src/coreclr/vm/versionresilienthashcode.cpp b/src/coreclr/vm/versionresilienthashcode.cpp
index cc4f8d188d16..796ac672032a 100644
--- a/src/coreclr/vm/versionresilienthashcode.cpp
+++ b/src/coreclr/vm/versionresilienthashcode.cpp
@@ -396,7 +396,7 @@ bool GetVersionResilientILCodeHashCode(MethodDesc *pMD, int* hashCode, unsigned*
     }
     else
     {
-        COR_ILMETHOD_DECODER header(pMD->GetILHeader(TRUE), pMD->GetMDImport(), NULL);
+        COR_ILMETHOD_DECODER header(pMD->GetILHeader(), pMD->GetMDImport(), NULL);
 
         pILCode = header.Code;
         cbILCode = header.GetCodeSize();
diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp
index e82f8b84a580..a87eac54f607 100644
--- a/src/coreclr/vm/virtualcallstub.cpp
+++ b/src/coreclr/vm/virtualcallstub.cpp
@@ -823,6 +823,8 @@ void VirtualCallStubManager::ReclaimAll()
     g_reclaim_counter++;
 }
 
+const UINT32 VirtualCallStubManager::counter_block::MAX_COUNTER_ENTRIES;
+
 /* reclaim/rearrange any structures that can only be done during a gc sync point
 i.e. need to be serialized and non-concurrant. */
 void VirtualCallStubManager::Reclaim()
diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h
index e6d89dcf5038..156353b6c186 100644
--- a/src/coreclr/vm/virtualcallstub.h
+++ b/src/coreclr/vm/virtualcallstub.h
@@ -282,7 +282,7 @@ class VirtualCallStubManager : public StubManager
           m_counters(NULL),
           m_cur_counter_block(NULL),
           m_cur_counter_block_for_reclaim(NULL),
-          m_cur_counter_block_for_reclaim_index(NULL),
+          m_cur_counter_block_for_reclaim_index(0),
           m_pNext(NULL)
     {
         LIMITED_METHOD_CONTRACT;
diff --git a/src/coreclr/vm/zapsig.cpp b/src/coreclr/vm/zapsig.cpp
index f0904e6452d0..28216cb27d1b 100644
--- a/src/coreclr/vm/zapsig.cpp
+++ b/src/coreclr/vm/zapsig.cpp
@@ -1024,46 +1024,32 @@ FieldDesc * ZapSig::DecodeField(Module *pReferencingModule,
         IfFailThrow(sig.SkipExactlyOne());
     }
 
-    if (fieldFlags & ENCODE_FIELD_SIG_IndexInsteadOfToken)
-    {
-        // get the field desc using index
-        uint32_t fieldIndex;
-        IfFailThrow(sig.GetData(&fieldIndex));
-
-        _ASSERTE(pOwnerMT != NULL);
+    RID rid;
+    IfFailThrow(sig.GetData(&rid));
 
-        pField = pOwnerMT->GetFieldDescByIndex(fieldIndex);
-        _ASSERTE(pOwnerMT == pField->GetApproxEnclosingMethodTable());
-    }
-    else
+    if (fieldFlags & ENCODE_FIELD_SIG_MemberRefToken)
     {
-        RID rid;
-        IfFailThrow(sig.GetData(&rid));
-
-        if (fieldFlags & ENCODE_FIELD_SIG_MemberRefToken)
+        if (pOwnerMT == NULL)
         {
-            if (pOwnerMT == NULL)
-            {
-                TypeHandle th;
-                MethodDesc * pMD = NULL;
-                FieldDesc * pFD = NULL;
+            TypeHandle th;
+            MethodDesc * pMD = NULL;
+            FieldDesc * pFD = NULL;
 
-                MemberLoader::GetDescFromMemberRef(pInfoModule, TokenFromRid(rid, mdtMemberRef), &pMD, &pFD, NULL, FALSE, &th);
-                _ASSERTE(pFD != NULL);
+            MemberLoader::GetDescFromMemberRef(pInfoModule, TokenFromRid(rid, mdtMemberRef), &pMD, &pFD, NULL, FALSE, &th);
+            _ASSERTE(pFD != NULL);
 
-                pField = pFD;
-            }
-            else
-            {
-                pField = MemberLoader::GetFieldDescFromMemberRefAndType(pInfoModule, TokenFromRid(rid, mdtMemberRef), pOwnerMT);
-            }
+            pField = pFD;
         }
         else
         {
-            _ASSERTE(pInfoModule->IsFullModule());
-            pField = MemberLoader::GetFieldDescFromFieldDef(static_cast<Module*>(pInfoModule), TokenFromRid(rid, mdtFieldDef), FALSE);
+            pField = MemberLoader::GetFieldDescFromMemberRefAndType(pInfoModule, TokenFromRid(rid, mdtMemberRef), pOwnerMT);
         }
     }
+    else
+    {
+        _ASSERTE(pInfoModule->IsFullModule());
+        pField = MemberLoader::GetFieldDescFromFieldDef(static_cast<Module*>(pInfoModule), TokenFromRid(rid, mdtFieldDef), FALSE);
+    }
 
     if (ppTH != NULL)
         *ppTH = (pOwnerMT != NULL) ? pOwnerMT : pField->GetApproxEnclosingMethodTable();
@@ -1341,6 +1327,9 @@ BOOL ZapSig::EncodeMethod(
         else
         {
             Instantiation inst = pMethod->GetMethodInstantiation();
+
+            pSigBuilder->AppendData(inst.GetNumArgs());
+
             for (DWORD i = 0; i < inst.GetNumArgs(); i++)
             {
                 TypeHandle t = inst[i];
diff --git a/src/installer/Directory.Build.props b/src/installer/Directory.Build.props
index 33a094c082f7..45def0200783 100644
--- a/src/installer/Directory.Build.props
+++ b/src/installer/Directory.Build.props
@@ -9,7 +9,6 @@
     <DefineConstants Condition="'$(Configuration)' == 'Debug'">$(DefineConstants),DEBUG,TRACE</DefineConstants>
     <DefineConstants Condition="'$(Configuration)' == 'Release'">$(DefineConstants),TRACE</DefineConstants>
     <RuntimeIdentifier Condition="'$(RuntimeIdentifier)' == '' and '$(EnsureRuntimeIdentifierSet)' == 'true'">$(OutputRID)</RuntimeIdentifier>
-    <TestTargetRid Condition="'$(TestTargetRid)' == ''">$(OutputRID)</TestTargetRid>
     <!-- Never use the NuGet fallback folder that comes with the SDK we use to build.
       The NuGet fallback folder can/will contain packages we are building in this repo, and we
       want to ensure we use the correct packages. -->
diff --git a/src/installer/Directory.Build.targets b/src/installer/Directory.Build.targets
index dccb8277ba76..c4e8a8c8fb70 100644
--- a/src/installer/Directory.Build.targets
+++ b/src/installer/Directory.Build.targets
@@ -1,7 +1,7 @@
 <Project>
 
   <PropertyGroup>
-    <InstallerName Condition="'$(PgoInstrument)' != ''">$(InstallerName)-pgo</InstallerName>
+    <InstallerName>$(InstallerName)</InstallerName>
     <ArchiveName Condition="'$(PgoInstrument)' != ''">$(ArchiveName)-pgo</ArchiveName>
   </PropertyGroup>
 
diff --git a/src/installer/managed/Microsoft.NET.HostModel/Microsoft.NET.HostModel.csproj b/src/installer/managed/Microsoft.NET.HostModel/Microsoft.NET.HostModel.csproj
index 4b2f49ec4356..fec324d46ccc 100644
--- a/src/installer/managed/Microsoft.NET.HostModel/Microsoft.NET.HostModel.csproj
+++ b/src/installer/managed/Microsoft.NET.HostModel/Microsoft.NET.HostModel.csproj
@@ -4,7 +4,7 @@
     <TargetFramework>netstandard2.0</TargetFramework>
     <Description>Abstractions for modifying .NET host binaries</Description>
     <IsShipping>false</IsShipping>
-    <IsPackable Condition="'$(PgoInstrument)' == ''">true</IsPackable>
+    <IsPackable Condition="'$(BuildOnlyPgoInstrumentedAssets)' != 'true'">true</IsPackable>
     <GeneratePackageOnBuild>true</GeneratePackageOnBuild>
     <IncludeSymbols>true</IncludeSymbols>
     <Serviceable>true</Serviceable>
diff --git a/src/installer/pkg/projects/Directory.Build.props b/src/installer/pkg/projects/Directory.Build.props
index 2917964afb8a..a07b360efc07 100644
--- a/src/installer/pkg/projects/Directory.Build.props
+++ b/src/installer/pkg/projects/Directory.Build.props
@@ -35,7 +35,7 @@
     <DisableRarCache>true</DisableRarCache>
 
     <!-- During a build from source, only current-RID assets are available from upstream repos. -->
-    <RestoreAllBuildRids Condition="'$(RestoreAllBuildRids)' == '' and '$(DotNetBuildFromSource)' == 'true'">false</RestoreAllBuildRids>
+    <RestoreAllBuildRids Condition="'$(RestoreAllBuildRids)' == '' and '$(DotNetBuildSourceOnly)' == 'true'">false</RestoreAllBuildRids>
     <!--
       Normally, it is necessary to restore all RIDs so that assets are available to inspect. For
       example, the platform manifest includes data from all RIDs.
diff --git a/src/installer/pkg/projects/Microsoft.DotNet.ILCompiler/ILCompilerRIDs.props b/src/installer/pkg/projects/Microsoft.DotNet.ILCompiler/ILCompilerRIDs.props
index 1d51a78880ae..f1f6193300ba 100644
--- a/src/installer/pkg/projects/Microsoft.DotNet.ILCompiler/ILCompilerRIDs.props
+++ b/src/installer/pkg/projects/Microsoft.DotNet.ILCompiler/ILCompilerRIDs.props
@@ -2,6 +2,7 @@
   <ItemGroup>
     <!-- The officially-supported subset of these RIDs should be included in ILCompilerSupportedRids in
          https://github.com/dotnet/installer/blob/main/src/redist/targets/GenerateBundledVersions.targets -->
+    <OfficialBuildRID Include="linux-musl-arm" Platform="arm" />
     <OfficialBuildRID Include="linux-arm" Platform="arm" />
     <OfficialBuildRID Include="linux-musl-arm64" Platform="arm64" />
     <OfficialBuildRID Include="linux-arm64" Platform="arm64" />
@@ -11,7 +12,9 @@
     <OfficialBuildRID Include="osx-arm64" Platform="arm64" />
     <OfficialBuildRID Include="win-arm64" Platform="arm64" />
     <OfficialBuildRID Include="win-x64" Platform="x64" />
+    <OfficialBuildRID Include="win-x86" Platform="x86" />
     <OfficialBuildRID Include="freebsd-x64" Platform="x64" />
     <OfficialBuildRID Include="freebsd-arm64" Platform="arm64" />
+    <OfficialBuildRID Include="$(OutputRID)" Platform="$(TargetArchitecture)" Exclude="@(OfficialBuildRID)" />
   </ItemGroup>
 </Project>
diff --git a/src/installer/pkg/projects/Microsoft.DotNet.ILCompiler/Microsoft.DotNet.ILCompiler.pkgproj b/src/installer/pkg/projects/Microsoft.DotNet.ILCompiler/Microsoft.DotNet.ILCompiler.pkgproj
index 3995419fc909..33d4b7ed137f 100644
--- a/src/installer/pkg/projects/Microsoft.DotNet.ILCompiler/Microsoft.DotNet.ILCompiler.pkgproj
+++ b/src/installer/pkg/projects/Microsoft.DotNet.ILCompiler/Microsoft.DotNet.ILCompiler.pkgproj
@@ -24,7 +24,7 @@
       <File Include="$(SharedNativeRoot)libs\Common\*" TargetPath="native/src/libs/Common"/>
     </ItemGroup>
 
-    <ItemGroup>
+    <ItemGroup Condition="'$(PackageTargetRuntime)' == ''">
       <File Include="$(CoreCLRBuildIntegrationDir)*" TargetPath="build" />
       <File Include="$(CoreCLRILCompilerDir)netstandard\*" TargetPath="tools/netstandard" />
     </ItemGroup>
diff --git a/src/installer/pkg/projects/netcoreappRIDs.props b/src/installer/pkg/projects/netcoreappRIDs.props
index b0f62cee619d..6c84841697b4 100644
--- a/src/installer/pkg/projects/netcoreappRIDs.props
+++ b/src/installer/pkg/projects/netcoreappRIDs.props
@@ -69,5 +69,11 @@
     <UnofficialBuildRID Include="linux-musl-ppc64le">
       <Platform>ppc64le</Platform>
     </UnofficialBuildRID>
+    <UnofficialBuildRID Include="linux-riscv64">
+      <Platform>riscv64</Platform>
+    </UnofficialBuildRID>
+    <UnofficialBuildRID Include="linux-musl-riscv64">
+      <Platform>riscv64</Platform>
+    </UnofficialBuildRID>
   </ItemGroup>
 </Project>
diff --git a/src/installer/pkg/sfx/Directory.Build.props b/src/installer/pkg/sfx/Directory.Build.props
index b0711d7f7ac9..dbf349249cef 100644
--- a/src/installer/pkg/sfx/Directory.Build.props
+++ b/src/installer/pkg/sfx/Directory.Build.props
@@ -11,7 +11,7 @@
     <DisableImplicitFrameworkReferences>true</DisableImplicitFrameworkReferences>
     <UseRuntimePackageDisclaimer>true</UseRuntimePackageDisclaimer>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(PgoInstrument)' != 'true'">
+  <PropertyGroup Condition="'$(BuildOnlyPgoInstrumentedAssets)' != 'true'">
     <GenerateInstallers>true</GenerateInstallers>
     <GenerateVSInsertionPackages>true</GenerateVSInsertionPackages>
   </PropertyGroup>
diff --git a/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props b/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props
index 6e48929f896e..e1e1ef7c08ae 100644
--- a/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props
+++ b/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props
@@ -128,6 +128,7 @@
     <PlatformManifestFileEntry Include="msquic.dll" IsNative="true" FallbackFileVersion="$(MsQuicFileVersion)" />
     <PlatformManifestFileEntry Include="libmsquic.dylib" IsNative="true" FallbackFileVersion="$(MsQuicFileVersion)" Condition="'$(Configuration)' == 'Debug' or '$(LibrariesConfiguration)' == 'Debug'"/>
     <PlatformManifestFileEntry Include="System.IO.Compression.Native.dll" IsNative="true" />
+    <PlatformManifestFileEntry Include="System.Globalization.Native.dll" IsNative="true" />
     <PlatformManifestFileEntry Include="createdump.exe" IsNative="true" />
     <PlatformManifestFileEntry Include="createdump" IsNative="true" />
     <PlatformManifestFileEntry Include="libcoreclrtraceptprovider.so" IsNative="true" />
@@ -144,6 +145,8 @@
     <PlatformManifestFileEntry Include="libeventpipe-enabled.a" IsNative="true" />
     <PlatformManifestFileEntry Include="libRuntime.ServerGC.a" IsNative="true" />
     <PlatformManifestFileEntry Include="libRuntime.WorkstationGC.a" IsNative="true" />
+    <PlatformManifestFileEntry Include="libRuntime.VxSortDisabled.a" IsNative="true" />
+    <PlatformManifestFileEntry Include="libRuntime.VxSortEnabled.a" IsNative="true" />
     <PlatformManifestFileEntry Include="libstandalonegc-disabled.a" IsNative="true" />
     <PlatformManifestFileEntry Include="libstandalonegc-enabled.a" IsNative="true" />
     <PlatformManifestFileEntry Include="libstdc++compat.a" IsNative="true" />
@@ -280,7 +283,7 @@
     <ExcludeFromClosure Include="@(NetFxReference)" />
  </ItemGroup>
 
-  <ItemGroup Condition="'$(DotNetBuildFromSource)' != 'true'">
+  <ItemGroup Condition="'$(DotNetBuildSourceOnly)' != 'true'">
     <!-- Add a reference to Microsoft.DiaSymReader.Native if one does not already exist. -->
     <PackageReference Include="Microsoft.DiaSymReader.Native"
                       Exclude="@(PackageReference)"
diff --git a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Crossgen2.sfxproj b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Crossgen2.sfxproj
index b36f9e956632..8d492718edd9 100644
--- a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Crossgen2.sfxproj
+++ b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Crossgen2.sfxproj
@@ -7,12 +7,9 @@
     <SkipBuild Condition="'$(RuntimeFlavor)' == 'Mono'">true</SkipBuild>
     <PlatformPackageType>ToolPack</PlatformPackageType>
     <SharedFrameworkName>$(SharedFrameworkName).Crossgen2</SharedFrameworkName>
-    <PgoSuffix Condition="'$(PgoInstrument)' != ''">.PGO</PgoSuffix>
-    <OverridePackageId>$(SharedFrameworkName)$(PgoSuffix).$(RuntimeIdentifier)</OverridePackageId>
+    <OverridePackageId>$(SharedFrameworkName).$(RuntimeIdentifier)</OverridePackageId>
     <ArchiveName>dotnet-crossgen2</ArchiveName>
     <SharedFrameworkHostFileNameOverride>crossgen2</SharedFrameworkHostFileNameOverride>
-    <!-- Build this pack for any RID if building from source. Otherwise, only build select RIDs. -->
-    <RuntimeIdentifiers Condition="'$(DotNetBuildFromSource)' != 'true'">linux-x64;linux-musl-x64;linux-arm;linux-musl-arm;linux-arm64;linux-musl-arm64;freebsd-x64;freebsd-arm64;osx-x64;osx-arm64;win-x64;win-x86;win-arm64</RuntimeIdentifiers>
     <GenerateInstallers>false</GenerateInstallers>
     <HostJsonTargetPath>tools/</HostJsonTargetPath>
     <PermitDllAndExeFilesLackingFileVersion>true</PermitDllAndExeFilesLackingFileVersion>
diff --git a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Host.sfxproj b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Host.sfxproj
index 8cd98f995ee0..fc7b8b90fe90 100644
--- a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Host.sfxproj
+++ b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Host.sfxproj
@@ -8,7 +8,7 @@
     <ArchiveName>dotnet-apphost-pack</ArchiveName>
     <InstallerName>dotnet-apphost-pack</InstallerName>
     <VSInsertionShortComponentName>NetCore.AppHostPack</VSInsertionShortComponentName>
-    <IsPackable Condition="'$(PgoInstrument)' != ''">false</IsPackable>
+    <IsPackable Condition="'$(BuildOnlyPgoInstrumentedAssets)' != ''">false</IsPackable>
   </PropertyGroup>
 
   <!--
diff --git a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Ref.sfxproj b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Ref.sfxproj
index 65ba460d088a..3580d2c54465 100644
--- a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Ref.sfxproj
+++ b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Ref.sfxproj
@@ -5,7 +5,7 @@
     <PlatformPackageType>TargetingPack</PlatformPackageType>
     <UseTemplatedPlatformManifest>true</UseTemplatedPlatformManifest>
     <InstallerName>dotnet-targeting-pack</InstallerName>
-    <IsPackable Condition="'$(PgoInstrument)' != ''">false</IsPackable>
+    <IsPackable Condition="'$(BuildOnlyPgoInstrumentedAssets)' != ''">false</IsPackable>
     <VSInsertionShortComponentName>NetCore.TargetingPack</VSInsertionShortComponentName>
     <PackageDescription>A set of .NET APIs that are included in the default .NET application model. Contains reference assemblies, documentation, and other design-time assets.</PackageDescription>
   </PropertyGroup>
diff --git a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.props b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.props
index 007249b9888f..c8ca3be5fd69 100644
--- a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.props
+++ b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.props
@@ -11,8 +11,6 @@
     <PublishReadyToRun Condition="'$(TargetOS)' == 'netbsd' or '$(TargetOS)' == 'illumos' or '$(TargetOS)' == 'solaris' or '$(TargetOS)' == 'haiku'">false</PublishReadyToRun>
     <!-- Disable crossgen on FreeBSD when cross building from Linux. -->
     <PublishReadyToRun Condition="'$(TargetOS)'=='freebsd' and '$(CrossBuild)'=='true'">false</PublishReadyToRun>
-    <!-- Disable crossgen on riscv64. -->
-    <PublishReadyToRun Condition="'$(TargetArchitecture)'=='riscv64'">false</PublishReadyToRun>
     <!-- These components are installed by the root shared framework, but not others. -->
     <IncludeWerRelatedKeys>true</IncludeWerRelatedKeys>
     <IncludeBreadcrumbStoreFolder>true</IncludeBreadcrumbStoreFolder>
diff --git a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.sfxproj b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.sfxproj
index 988b59bcecec..3389feacd0c0 100644
--- a/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.sfxproj
+++ b/src/installer/pkg/sfx/Microsoft.NETCore.App/Microsoft.NETCore.App.Runtime.sfxproj
@@ -8,7 +8,7 @@
     <InstallerName Condition="'$(TargetOS)' != 'osx'">dotnet-runtime</InstallerName>
     <InstallerName Condition="'$(TargetOS)' == 'osx'">dotnet-runtime-internal</InstallerName>
     <CreateSymbolsArchive Condition="'$(PgoInstrument)' == ''">true</CreateSymbolsArchive>
-    <IsPackable Condition="'$(PgoInstrument)' != ''">false</IsPackable>
+    <IsPackable Condition="'$(BuildOnlyPgoInstrumentedAssets)' != ''">false</IsPackable>
     <SymbolsArchiveName>dotnet-runtime-symbols</SymbolsArchiveName>
     <VSInsertionShortComponentName>NetCore.SharedFramework</VSInsertionShortComponentName>
     <UseTemplatedPlatformManifest>true</UseTemplatedPlatformManifest>
@@ -19,7 +19,7 @@
     <SkipInstallerBuild Condition="'$(BuildNativeAOTRuntimePack)' == 'true'">true</SkipInstallerBuild>
     <!-- Skip building any archives except in source-build, where the symbols archive is necessary
          for distro maintainers. It can generally be removed when https://github.com/dotnet/source-build/issues/3547 is resolved. -->
-    <SkipArchivesBuild Condition="'$(DotNetBuildFromSource)' != 'true' or '$(RuntimeFlavor)' != '$(PrimaryRuntimeFlavor)' or '$(TargetsMobile)' == 'true' or '$(BuildNativeAOTRuntimePack)' == 'true'">true</SkipArchivesBuild>
+    <SkipArchivesBuild Condition="'$(DotNetBuildSourceOnly)' != 'true' or '$(RuntimeFlavor)' != '$(PrimaryRuntimeFlavor)' or '$(TargetsMobile)' == 'true' or '$(BuildNativeAOTRuntimePack)' == 'true'">true</SkipArchivesBuild>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/src/installer/pkg/sfx/bundle/Microsoft.NETCore.App.Bundle.bundleproj b/src/installer/pkg/sfx/bundle/Microsoft.NETCore.App.Bundle.bundleproj
index acfcdd089bbe..178a37fed055 100644
--- a/src/installer/pkg/sfx/bundle/Microsoft.NETCore.App.Bundle.bundleproj
+++ b/src/installer/pkg/sfx/bundle/Microsoft.NETCore.App.Bundle.bundleproj
@@ -19,7 +19,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Condition="'$(PgoInstrument)' != 'true'" Include="Microsoft.DotNet.Build.Tasks.Installers" Version="$(MicrosoftDotNetBuildTasksInstallersVersion)" />
+    <PackageReference Condition="'$(BuildOnlyPgoInstrumentedAssets)' != 'true'" Include="Microsoft.DotNet.Build.Tasks.Installers" Version="$(MicrosoftDotNetBuildTasksInstallersVersion)" />
     <PackageReference Include="Microsoft.DotNet.Build.Tasks.Archives" Version="$(MicrosoftDotNetBuildTasksArchivesVersion)" />
   </ItemGroup>
 
diff --git a/src/installer/pkg/sfx/installers.proj b/src/installer/pkg/sfx/installers.proj
index 06e366db911d..7f4ce6b9c1c4 100644
--- a/src/installer/pkg/sfx/installers.proj
+++ b/src/installer/pkg/sfx/installers.proj
@@ -10,6 +10,7 @@
   </ItemGroup>
 
   <ItemGroup Condition="'$(BuildRpmPackage)' == 'true'">
+    <InstallerProjectReference Include="installers/dotnet-runtime-deps/dotnet-runtime-deps-azl.3.proj" />
     <InstallerProjectReference Include="installers/dotnet-runtime-deps/dotnet-runtime-deps-centos.8.proj" />
     <InstallerProjectReference Include="installers/dotnet-runtime-deps/dotnet-runtime-deps-cm.1.proj" />
     <InstallerProjectReference Include="installers/dotnet-runtime-deps/dotnet-runtime-deps-cm.2.proj" />
diff --git a/src/installer/pkg/sfx/installers/dotnet-runtime-deps/dotnet-runtime-deps-azl.3.proj b/src/installer/pkg/sfx/installers/dotnet-runtime-deps/dotnet-runtime-deps-azl.3.proj
new file mode 100644
index 000000000000..ee363967c365
--- /dev/null
+++ b/src/installer/pkg/sfx/installers/dotnet-runtime-deps/dotnet-runtime-deps-azl.3.proj
@@ -0,0 +1,10 @@
+<Project Sdk="Microsoft.Build.NoTargets">
+  <PropertyGroup>
+    <GenerateInstallers Condition="'$(BuildRpmPackage)' != 'true'">false</GenerateInstallers>
+    <PackageTargetOS>azl.3</PackageTargetOS>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <LinuxPackageDependency Include="openssl-libs;icu;krb5;ca-certificates" />
+  </ItemGroup>
+</Project>
diff --git a/src/installer/prepare-artifacts.proj b/src/installer/prepare-artifacts.proj
index 3f62ace0f8da..56249a2135c8 100644
--- a/src/installer/prepare-artifacts.proj
+++ b/src/installer/prepare-artifacts.proj
@@ -23,6 +23,8 @@
   </PropertyGroup>
   <Import Project="../tools/Sign.props" Sdk="Microsoft.DotNet.Arcade.Sdk" />
 
+  <Import Project="$(RepositoryEngineeringDir)Publishing.props" Condition="Exists('$(RepositoryEngineeringDir)Publishing.props')" />
+
   <UsingTask TaskName="GenerateChecksums" AssemblyFile="$(InstallerTasksAssemblyPath)" />
 
   <PropertyGroup>
@@ -56,6 +58,16 @@
     <ManifestBuildData Include="AzureDevOpsBranch=$(BUILD_SOURCEBRANCH)" />
   </ItemGroup>
 
+  <!-- 
+    Set metadata for assets that are not marked as NonShipping. 
+    This is used to determine if the asset should be shipped as part of .NET release.
+  -->
+  <ItemDefinitionGroup>
+    <ItemsToPush>
+      <ManifestArtifactData Condition="'$(ProducesDotNetReleaseShippingAssets)' == 'true'">DotNetReleaseShipping=true</ManifestArtifactData>
+    </ItemsToPush>
+  </ItemDefinitionGroup>
+
   <!--
     Run Arcade's signing project directly. The 'eng/Signing.props' extensibility props file checks
     if '$(<StageName>)' == 'true' and points Arcade to the correct files.
@@ -140,7 +152,7 @@
     </ItemGroup>
 
     <!-- Push items to AzDO as build artifacts, generating the asset manifest as a side effect. -->
-    <PushToAzureDevOpsArtifacts
+    <PushToBuildStorage
       AzureDevOpsCollectionUri="$(SYSTEM_TEAMFOUNDATIONCOLLECTIONURI)"
       AzureDevOpsProject="$(SYSTEM_TEAMPROJECT)"
       AzureDevOpsBuildId="$(BUILD_BUILDID)"
diff --git a/src/installer/tests/AppHost.Bundle.Tests/AppHost.Bundle.Tests.csproj b/src/installer/tests/AppHost.Bundle.Tests/AppHost.Bundle.Tests.csproj
index e70ca66a4f0b..c564835ae9e7 100644
--- a/src/installer/tests/AppHost.Bundle.Tests/AppHost.Bundle.Tests.csproj
+++ b/src/installer/tests/AppHost.Bundle.Tests/AppHost.Bundle.Tests.csproj
@@ -4,11 +4,9 @@
     <Description>Apphost Bundle Tests</Description>
     <TargetFramework>$(TestInfraTargetFramework)</TargetFramework>
     <AssemblyName>AppHost.Bundle.Tests</AssemblyName>
-    <PackageId>AppHost.Bundle.Tests</PackageId>
     <GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>
     <!-- Reduce the length of the test output dir to make it more reliable on Windows. -->
     <TestsOutputName>ahb</TestsOutputName>
-    <UsesTestAssets>true</UsesTestAssets>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/src/installer/tests/AppHost.Bundle.Tests/BundleProbe.cs b/src/installer/tests/AppHost.Bundle.Tests/BundleProbe.cs
index c74a9d211eea..6c7fc9f3f43e 100644
--- a/src/installer/tests/AppHost.Bundle.Tests/BundleProbe.cs
+++ b/src/installer/tests/AppHost.Bundle.Tests/BundleProbe.cs
@@ -35,8 +35,7 @@ private void SingleFileApp_ProbeFiles()
             };
 
             var result = Command.Create(singleFile, $"host_runtime_contract.bundle_probe {string.Join(" ", itemsToProbe.Select(i => i.Path))}")
-                .CaptureStdErr()
-                .CaptureStdOut()
+                .EnableTracingAndCaptureOutputs()
                 .Execute();
 
             result.Should().Pass();
diff --git a/src/installer/tests/AppHost.Bundle.Tests/BundledAppWithSubDirs.cs b/src/installer/tests/AppHost.Bundle.Tests/BundledAppWithSubDirs.cs
index 8ef85f06658a..a665e6bb6580 100644
--- a/src/installer/tests/AppHost.Bundle.Tests/BundledAppWithSubDirs.cs
+++ b/src/installer/tests/AppHost.Bundle.Tests/BundledAppWithSubDirs.cs
@@ -58,14 +58,12 @@ public void FrameworkDependent_NoBundleEntryPoint()
         {
             var singleFile = sharedTestState.FrameworkDependentApp.Bundle(BundleOptions.None);
 
-            string dotnetWithMockHostFxr = SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, "guiErrors"));
-            using (new TestArtifact(dotnetWithMockHostFxr))
+            using (var dotnetWithMockHostFxr = TestArtifact.Create("mockhostfxrFrameworkMissingFailure"))
             {
-                Directory.CreateDirectory(dotnetWithMockHostFxr);
-                var dotnetBuilder = new DotNetBuilder(dotnetWithMockHostFxr, TestContext.BuiltDotNet.BinPath, "mockhostfxrFrameworkMissingFailure")
+                var dotnet = new DotNetBuilder(dotnetWithMockHostFxr.Location, TestContext.BuiltDotNet.BinPath, null)
                     .RemoveHostFxr()
-                    .AddMockHostFxr(new Version(2, 2, 0));
-                var dotnet = dotnetBuilder.Build();
+                    .AddMockHostFxr(new Version(2, 2, 0))
+                    .Build();
 
                 // Run the bundled app (extract files)
                 RunTheApp(singleFile, dotnet.BinPath)
@@ -86,16 +84,15 @@ public void FrameworkDependent_GUI_DownlevelHostFxr_ErrorDialog(BundleOptions op
             var singleFile = sharedTestState.FrameworkDependentApp.Bundle(options);
             PEUtils.SetWindowsGraphicalUserInterfaceBit(singleFile);
 
-            string dotnetWithMockHostFxr = SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, "bundleErrors"));
-            using (new TestArtifact(dotnetWithMockHostFxr))
+            // The mockhostfxrBundleVersionFailure folder name is used by mock hostfxr to return the appropriate error code
+            using (var dotnetWithMockHostFxr = TestArtifact.Create("mockhostfxrBundleVersionFailure"))
             {
-                Directory.CreateDirectory(dotnetWithMockHostFxr);
                 string expectedErrorCode = Constants.ErrorCode.BundleExtractionFailure.ToString("x");
 
-                var dotnetBuilder = new DotNetBuilder(dotnetWithMockHostFxr, TestContext.BuiltDotNet.BinPath, "mockhostfxrBundleVersionFailure")
+                var dotnet = new DotNetBuilder(dotnetWithMockHostFxr.Location, TestContext.BuiltDotNet.BinPath, null)
                     .RemoveHostFxr()
-                    .AddMockHostFxr(new Version(5, 0, 0));
-                var dotnet = dotnetBuilder.Build();
+                    .AddMockHostFxr(new Version(5, 0, 0))
+                    .Build();
 
                 Command command = Command.Create(singleFile)
                     .EnableTracingAndCaptureOutputs()
diff --git a/src/installer/tests/Assets/Projects/HostApiInvokerApp/HostFXR.cs b/src/installer/tests/Assets/Projects/HostApiInvokerApp/HostFXR.cs
index 76dc6db1d65f..3dc91d5ea69f 100644
--- a/src/installer/tests/Assets/Projects/HostApiInvokerApp/HostFXR.cs
+++ b/src/installer/tests/Assets/Projects/HostApiInvokerApp/HostFXR.cs
@@ -106,62 +106,48 @@ internal static extern int hostfxr_get_dotnet_environment_info(
         /// <summary>
         /// Test invoking the native hostfxr api hostfxr_resolve_sdk2
         /// </summary>
-        /// <param name="args[0]">hostfxr_get_available_sdks</param>
-        /// <param name="args[1]">Directory of dotnet executable</param>
-        /// <param name="args[2]">Working directory where search for global.json begins</param>
-        /// <param name="args[3]">Flags</param>
+        /// <param name="args[0]">Directory of dotnet executable</param>
+        /// <param name="args[1]">Working directory where search for global.json begins</param>
+        /// <param name="args[2]">Flags</param>
         static void Test_hostfxr_resolve_sdk2(string[] args)
         {
-            if (args.Length != 4)
+            if (args.Length != 3)
             {
                 throw new ArgumentException("Invalid number of arguments passed");
             }
 
             var data = new List<(hostfxr.hostfxr_resolve_sdk2_result_key_t, string)>();
             int rc = hostfxr.hostfxr_resolve_sdk2(
-                exe_dir: args[1],
-                working_dir: args[2],
-                flags: Enum.Parse<hostfxr.hostfxr_resolve_sdk2_flags_t>(args[3]),
+                exe_dir: args[0],
+                working_dir: args[1],
+                flags: Enum.Parse<hostfxr.hostfxr_resolve_sdk2_flags_t>(args[2]),
                 result: (key, value) => data.Add((key, value)));
 
-            if (rc == 0)
-            {
-                Console.WriteLine("hostfxr_resolve_sdk2:Success");
-            }
-            else
-            {
-                Console.WriteLine($"hostfxr_resolve_sdk2:Fail[{rc}]");
-            }
-
-            Console.WriteLine($"hostfxr_resolve_sdk2 data:[{string.Join(';', data)}]");
+            string api = nameof(hostfxr.hostfxr_resolve_sdk2);
+            LogResult(api, rc);
+            Console.WriteLine($"{api} data:[{string.Join(';', data)}]");
         }
 
         /// <summary>
         /// Test invoking the native hostfxr api hostfxr_get_available_sdks
         /// </summary>
-        /// <param name="args[0]">hostfxr_get_available_sdks</param>
-        /// <param name="args[1]">Directory of dotnet executable</param>
+        /// <param name="args[0]">Directory of dotnet executable</param>
         static void Test_hostfxr_get_available_sdks(string[] args)
         {
-            if (args.Length != 2)
+            if (args.Length != 1)
             {
                 throw new ArgumentException("Invalid number of arguments passed");
             }
 
             string[] sdks = null;
             int rc = hostfxr.hostfxr_get_available_sdks(
-                exe_dir: args[1],
+                exe_dir: args[0],
                 (sdk_count, sdk_dirs) => sdks = sdk_dirs);
 
-            if (rc == 0)
-            {
-                Console.WriteLine("hostfxr_get_available_sdks:Success");
-                Console.WriteLine($"hostfxr_get_available_sdks sdks:[{string.Join(';', sdks)}]");
-            }
-            else
-            {
-                Console.WriteLine($"hostfxr_get_available_sdks:Fail[{rc}]");
-            }
+            string api = nameof(hostfxr.hostfxr_get_available_sdks);
+            LogResult(api, rc);
+            if (sdks != null)
+                Console.WriteLine($"{api} sdks:[{string.Join(';', sdks)}]");
         }
 
         static void Test_hostfxr_set_error_writer(string[] args)
@@ -193,13 +179,12 @@ static void Test_hostfxr_set_error_writer(string[] args)
         /// <summary>
         /// Test that invokes native api hostfxr_get_dotnet_environment_info.
         /// </summary>
-        /// <param name="args[0]">hostfxr_get_dotnet_environment_info</param>
-        /// <param name="args[1]">(Optional) Path to the directory with dotnet.exe</param>
+        /// <param name="args[0]">(Optional) Path to the directory with dotnet.exe</param>
         static void Test_hostfxr_get_dotnet_environment_info(string[] args)
         {
             string dotnetExeDir = null;
-            if (args.Length >= 2)
-                dotnetExeDir = args[1];
+            if (args.Length >= 1)
+                dotnetExeDir = args[0];
 
             string hostfxr_version;
             string hostfxr_commit_hash;
@@ -254,21 +239,20 @@ static void Test_hostfxr_get_dotnet_environment_info(string[] args)
                 result: result_fn,
                 result_context: new IntPtr(42));
 
-            if (rc != 0)
-            {
-                Console.WriteLine($"hostfxr_get_dotnet_environment_info:Fail[{rc}]");
-            }
-
-            Console.WriteLine($"hostfxr_get_dotnet_environment_info sdk versions:[{string.Join(";", sdks.Select(s => s.version).ToList())}]");
-            Console.WriteLine($"hostfxr_get_dotnet_environment_info sdk paths:[{string.Join(";", sdks.Select(s => s.path).ToList())}]");
+            string api = nameof(hostfxr.hostfxr_get_dotnet_environment_info);
+            LogResult(api, rc);
 
-            Console.WriteLine($"hostfxr_get_dotnet_environment_info framework names:[{string.Join(";", frameworks.Select(f => f.name).ToList())}]");
-            Console.WriteLine($"hostfxr_get_dotnet_environment_info framework versions:[{string.Join(";", frameworks.Select(f => f.version).ToList())}]");
-            Console.WriteLine($"hostfxr_get_dotnet_environment_info framework paths:[{string.Join(";", frameworks.Select(f => f.path).ToList())}]");
+            Console.WriteLine($"{api} sdk versions:[{string.Join(";", sdks.Select(s => s.version).ToList())}]");
+            Console.WriteLine($"{api} sdk paths:[{string.Join(";", sdks.Select(s => s.path).ToList())}]");
 
-            Console.WriteLine("hostfxr_get_dotnet_environment_info:Success");
+            Console.WriteLine($"{api} framework names:[{string.Join(";", frameworks.Select(f => f.name).ToList())}]");
+            Console.WriteLine($"{api} framework versions:[{string.Join(";", frameworks.Select(f => f.version).ToList())}]");
+            Console.WriteLine($"{api} framework paths:[{string.Join(";", frameworks.Select(f => f.path).ToList())}]");
         }
 
+        private static void LogResult(string apiName, int rc)
+            => Console.WriteLine(rc == 0 ? $"{apiName}:Success" : $"{apiName}:Fail[0x{rc:x}]");
+
         public static bool RunTest(string apiToTest, string[] args)
         {
             switch (apiToTest)
diff --git a/src/installer/tests/Assets/Projects/HostApiInvokerApp/HostRuntimeContract.cs b/src/installer/tests/Assets/Projects/HostApiInvokerApp/HostRuntimeContract.cs
index a75f34be6942..4ecf59b2761a 100644
--- a/src/installer/tests/Assets/Projects/HostApiInvokerApp/HostRuntimeContract.cs
+++ b/src/installer/tests/Assets/Projects/HostApiInvokerApp/HostRuntimeContract.cs
@@ -46,8 +46,9 @@ private static void Test_get_runtime_property(string[] args)
 
             static string GetProperty(string name, host_runtime_contract contract)
             {
-                Span<byte> nameSpan = stackalloc byte[Encoding.UTF8.GetMaxByteCount(name.Length)];
-                byte* namePtr = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(nameSpan));
+                int nameSize = Encoding.UTF8.GetMaxByteCount(name.Length);
+                byte* namePtr = stackalloc byte[nameSize];
+                Span<byte> nameSpan = new Span<byte>(namePtr, nameSize);
                 int nameLen = Encoding.UTF8.GetBytes(name, nameSpan);
                 nameSpan[nameLen] = 0;
 
@@ -86,8 +87,9 @@ public static void Test_bundle_probe(string[] args)
 
             unsafe static void Probe(host_runtime_contract contract, string path)
             {
-                Span<byte> pathSpan = stackalloc byte[Encoding.UTF8.GetMaxByteCount(path.Length)];
-                byte* pathPtr = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(pathSpan));
+                int pathSize = Encoding.UTF8.GetMaxByteCount(path.Length);
+                byte* pathPtr = stackalloc byte[pathSize];
+                Span<byte> pathSpan = new Span<byte>(pathPtr, pathSize);
                 int pathLen = Encoding.UTF8.GetBytes(path, pathSpan);
                 pathSpan[pathLen] = 0;
 
@@ -111,10 +113,10 @@ public static bool RunTest(string apiToTest, string[] args)
             switch (apiToTest)
             {
                 case $"{nameof(host_runtime_contract)}.{nameof(host_runtime_contract.get_runtime_property)}":
-                    Test_get_runtime_property(args[1..]);
+                    Test_get_runtime_property(args);
                     break;
                 case $"{nameof(host_runtime_contract)}.{nameof(host_runtime_contract.bundle_probe)}":
-                    Test_bundle_probe(args[1..]);
+                    Test_bundle_probe(args);
                     break;
                 default:
                     return false;
diff --git a/src/installer/tests/Assets/Projects/HostApiInvokerApp/Program.cs b/src/installer/tests/Assets/Projects/HostApiInvokerApp/Program.cs
index 2831ed8c3d48..f14003995e6a 100644
--- a/src/installer/tests/Assets/Projects/HostApiInvokerApp/Program.cs
+++ b/src/installer/tests/Assets/Projects/HostApiInvokerApp/Program.cs
@@ -31,9 +31,6 @@ public static void MainCore(string[] args)
             Console.WriteLine("Hello World!");
             Console.WriteLine(string.Join(Environment.NewLine, args));
 
-            // Enable tracing so that test assertion failures are easier to diagnose.
-            Environment.SetEnvironmentVariable("COREHOST_TRACE", "1");
-
             // If requested, test multilevel lookup using fake Global SDK directories:
             //     1. using a fake ProgramFiles location
             //     2. using a fake SDK Self-Registered location
@@ -61,13 +58,13 @@ public static void MainCore(string[] args)
             }
 
             string apiToTest = args[0];
-            if (HostFXR.RunTest(apiToTest, args))
+            if (HostFXR.RunTest(apiToTest, args[1..]))
                 return;
 
-            if (HostPolicy.RunTest(apiToTest, args))
+            if (HostPolicy.RunTest(apiToTest, args[1..]))
                 return;
 
-            if (HostRuntimeContract.RunTest(apiToTest, args))
+            if (HostRuntimeContract.RunTest(apiToTest, args[1..]))
                 return;
 
             throw new ArgumentException($"Invalid API to test passed as args[0]): {apiToTest}");
diff --git a/src/installer/tests/Assets/TestProjects/Directory.Build.props b/src/installer/tests/Assets/TestProjects/Directory.Build.props
deleted file mode 100644
index 06b429ead4e0..000000000000
--- a/src/installer/tests/Assets/TestProjects/Directory.Build.props
+++ /dev/null
@@ -1,9 +0,0 @@
-<Project>
-  <Import Project="$(MSBuildThisFileDirectory)..\TestUtils\TestProjects.props" />
-
-  <PropertyGroup>
-    <RestorePackagesPath>$(TestRestorePackagesPath)</RestorePackagesPath>
-    <RestoreNoCache>true</RestoreNoCache>
-  </PropertyGroup>
-
-</Project>
diff --git a/src/installer/tests/Assets/TestProjects/Directory.Build.targets b/src/installer/tests/Assets/TestProjects/Directory.Build.targets
deleted file mode 100644
index f6350ac71558..000000000000
--- a/src/installer/tests/Assets/TestProjects/Directory.Build.targets
+++ /dev/null
@@ -1,18 +0,0 @@
-<Project>
-  <Import Project="$(MSBuildThisFileDirectory)..\TestUtils\TestProjects.targets" />
-
-  <!--
-    Some known framework references are downstream of dotnet/runtime and don't exist with the build's
-    versions. Remove them before the SDK tries to download them.
-  -->
-  <Target Name="RemoveUpstackKnownFrameworkReferences"
-          BeforeTargets="ProcessFrameworkReferences">
-    <ItemGroup>
-      <KnownFrameworkReference Remove="Microsoft.AspNetCore.App" />
-      <KnownFrameworkReference Remove="Microsoft.WindowsDesktop.App" />
-      <KnownFrameworkReference Remove="Microsoft.WindowsDesktop.App.WPF" />
-      <KnownFrameworkReference Remove="Microsoft.WindowsDesktop.App.WindowsForms" />
-    </ItemGroup>
-  </Target>
-
-</Project>
diff --git a/src/installer/tests/Assets/TestUtils/TestProjects.props b/src/installer/tests/Assets/TestUtils/TestProjects.props
deleted file mode 100644
index 1ace72f7f050..000000000000
--- a/src/installer/tests/Assets/TestUtils/TestProjects.props
+++ /dev/null
@@ -1,17 +0,0 @@
-<Project>
-  <!--
-    This file is imported by the test projects from the artifacts dir or the src/installer/tests dir. It
-    provides basic info needed for restore and build with the vanilla SDK.
-  -->
-
-  <PropertyGroup>
-    <NetCoreAppCurrent>net9.0</NetCoreAppCurrent>
-    <!-- Turn off end of life target framework checks as we intentionally build older .NETCoreApp configurations. -->
-    <CheckEolTargetFramework>false</CheckEolTargetFramework>
-    <!--
-      If the test project needs an app host, use the one for the test target RID. This fixes
-      win-x86 tests that assumed a win-x64 app host RID based on the runner SDK.
-    -->
-    <AppHostRuntimeIdentifier>$(TestTargetRid)</AppHostRuntimeIdentifier>
-  </PropertyGroup>
-</Project>
diff --git a/src/installer/tests/Assets/TestUtils/TestProjects.targets b/src/installer/tests/Assets/TestUtils/TestProjects.targets
deleted file mode 100644
index 60faea781200..000000000000
--- a/src/installer/tests/Assets/TestUtils/TestProjects.targets
+++ /dev/null
@@ -1,7 +0,0 @@
-<Project>
-  <!--
-    This file is imported by the test projects from the artifacts dir or the src/installer/tests dir. It
-    provides basic info needed for restore and build with the vanilla SDK.
-  -->
-
-</Project>
diff --git a/src/installer/tests/Directory.Build.props b/src/installer/tests/Directory.Build.props
index 11472eb328a4..e56691554aaa 100644
--- a/src/installer/tests/Directory.Build.props
+++ b/src/installer/tests/Directory.Build.props
@@ -2,14 +2,7 @@
   <Import Project="$([MSBuild]::GetPathOfFileAbove(Directory.Build.props, $(MSBuildThisFileDirectory)..))" />
 
   <PropertyGroup>
-    <TestDir>$(InstallerProjectRoot)tests\</TestDir>
-    <TestAssetsDir>$(TestDir)Assets\</TestAssetsDir>
     <TestArtifactsOutputRoot>$(ArtifactsDir)tests\host\$(TargetOS).$(TargetArchitecture).$(Configuration)\</TestArtifactsOutputRoot>
-    <TestStabilizedLegacyPackagesDir>$(ArtifactsObjDir)TestStabilizedPackages\</TestStabilizedLegacyPackagesDir>
-    <TestRestorePackagesPath>$(ArtifactsObjDir)TestPackageCache\</TestRestorePackagesPath>
-    <TestRestoreNuGetConfigFile>$(ArtifactsObjDir)TestNuGetConfig\NuGet.config</TestRestoreNuGetConfigFile>
-    <InternalNupkgCacheDir>$(ArtifactsObjDir)ExtraNupkgsForTestRestore\</InternalNupkgCacheDir>
-    <TestArchitectures>$(TargetArchitecture)</TestArchitectures>
     <TestInfraTargetFramework>$(NetCoreAppToolCurrent)</TestInfraTargetFramework>
     <TestCaseFilter>category!=failing</TestCaseFilter>
     <TestRunnerAdditionalArguments>--filter $(TestCaseFilter) -v detailed</TestRunnerAdditionalArguments>
diff --git a/src/installer/tests/Directory.Build.targets b/src/installer/tests/Directory.Build.targets
index ecd79411081d..9bfb4ffbea28 100644
--- a/src/installer/tests/Directory.Build.targets
+++ b/src/installer/tests/Directory.Build.targets
@@ -1,44 +1,9 @@
 <Project>
 
-  <Target Name="SetUpSharedFrameworkPublish"
-          DependsOnTargets="DetermineTestOutputDirectory"
-          BeforeTargets="RunTests">
-    <!--
-      Set up the shared framework copy this set of tests should use. There's no known reason to have
-      one per test project, but RepoDirectoriesProvider may need some tweaking to share.
-    -->
-    <MSBuild Projects="$(InstallerProjectRoot)pkg\sfx\bundle\Microsoft.NETCore.App.Bundle.bundleproj"
-             Targets="PublishToDisk"
-             Properties="OutputPath=$(TestsOutputDir)sharedFrameworkPublish/" />
-  </Target>
-
-  <Target Name="RefreshProjectTestAssets"
-          Condition="'$(UsesTestAssets)' == 'true' and '$(SkipTests)' != 'true'"
-          DependsOnTargets="DetermineTestOutputDirectory;SetUpSharedFrameworkPublish"
-          BeforeTargets="RunTests">
-    <!-- Build PrepareTestAssets once for all tests. -->
-    <MSBuild
-      Projects="$(TestDir)PrepareTestAssets\PrepareTestAssets.proj"
-      Targets="PrepareTestAssets" />
-
-    <!-- Create files with a name that describes what a test dir is. Useful if path abbreviated. -->
-    <WriteLinesToFile
-      File="$(TestArtifactsOutputRoot)$(TestsOutputName)-is-$(MSBuildProjectName).txt"
-      Overwrite="true"
-      Lines="$(MSBuildProjectName) intermediates are located in '$(TestsOutputName)'. Abbreviated to work around path limits on Windows."
-      Condition="'$(TestsOutputName)' != '$(MSBuildProjectName)'" />
-    <WriteLinesToFile
-      File="$(TestsOutputDir)$(MSBuildProjectName).txt"
-      Overwrite="true"
-      Lines="$(MSBuildProjectName) intermediates are located in '$(TestsOutputName)'. Abbreviated to work around path limits on Windows."
-      Condition="'$(TestsOutputName)' != '$(MSBuildProjectName)'" />
-  </Target>
-
   <Target Name="SetupTestContextVariables"
           Condition="'$(IsTestProject)' == 'true'"
           DependsOnTargets="
             GetProductVersions;
-            GetNETCoreAppRuntimePackVersion;
             DetermineTestOutputDirectory"
           BeforeTargets="Build">
     <PropertyGroup>
@@ -75,16 +40,12 @@
       See https://github.com/dotnet/arcade/issues/3077.
     -->
     <ItemGroup>
-      <TestContextVariable Include="NUGET_PACKAGES=$(TestRestorePackagesPath)" />
       <TestContextVariable Include="TEST_ARTIFACTS=$(SystemPathTestsOutputDir)" />
-      <TestContextVariable Include="TEST_TARGETRID=$(TestTargetRid)" />
-      <TestContextVariable Include="BUILDRID=$(OutputRID)" />
+      <TestContextVariable Include="BUILD_RID=$(OutputRID)" />
       <TestContextVariable Include="BUILD_ARCHITECTURE=$(TargetArchitecture)" />
       <TestContextVariable Include="BUILD_CONFIGURATION=$(Configuration)" />
       <TestContextVariable Include="MNA_VERSION=$(NETCoreAppRuntimePackageVersion)" />
       <TestContextVariable Include="MNA_TFM=$(NetCoreAppCurrent)" />
-      <TestContextVariable Include="DOTNET_SDK_PATH=$(DotNetRoot)" />
-      <TestContextVariable Include="TEST_ASSETS=$(TestAssetsDir)" />
       <TestContextVariable Include="TEST_ASSETS_OUTPUT=$(TestArtifactsOutputRoot)" />
     </ItemGroup>
 
@@ -94,25 +55,8 @@
       Lines="@(TestContextVariable)" />
   </Target>
 
-  <!--
-    Fetch the package version of 'Microsoft.NETCore.App.Runtime.<rid>'. The runtime nupkg project
-    always ships, so it may or may not have a stable version depending on product lifecycle.
-
-    Some test projects end in ".Tests", which Arcade detects and applies IsShipping=false. This
-    makes ProductVersion non-stable, so we can't rely on the test project's ProductVersion to be the
-    same as the package's version. Fetch this directly from the project to avoid guesswork.
-  -->
-  <Target Name="GetNETCoreAppRuntimePackVersion">
-    <MSBuild
-      Projects="$(InstallerProjectRoot)pkg\sfx\Microsoft.NETCore.App\Microsoft.NETCore.App.Runtime.sfxproj"
-      Targets="ReturnProductVersion">
-      <Output TaskParameter="TargetOutputs" PropertyName="NETCoreAppRuntimePackVersion" />
-    </MSBuild>
-  </Target>
-
   <Target Name="DetermineTestOutputDirectory">
     <PropertyGroup>
-      <TestTargetRid Condition="'$(TestTargetRid)' == ''">$(PackageRID)</TestTargetRid>
       <TestsOutputName Condition="'$(TestsOutputName)' == ''">$(MSBuildProjectName)</TestsOutputName>
       <TestsOutputDir Condition="'$(TestsOutputDir)' == ''">$(TestArtifactsOutputRoot)$(TestsOutputName)/</TestsOutputDir>
     </PropertyGroup>
diff --git a/src/installer/tests/HostActivation.Tests/DependencyResolution/AdditionalDeps.cs b/src/installer/tests/HostActivation.Tests/DependencyResolution/AdditionalDeps.cs
index 37717376cc54..ebdd3c3ad719 100644
--- a/src/installer/tests/HostActivation.Tests/DependencyResolution/AdditionalDeps.cs
+++ b/src/installer/tests/HostActivation.Tests/DependencyResolution/AdditionalDeps.cs
@@ -9,7 +9,7 @@
 
 namespace Microsoft.DotNet.CoreSetup.Test.HostActivation.DependencyResolution
 {
-    public class AdditionalDeps : DependencyResolutionBase, IClassFixture<AdditionalDeps.SharedTestState>
+    public class AdditionalDeps : IClassFixture<AdditionalDeps.SharedTestState>
     {
         private SharedTestState SharedState { get; }
 
@@ -41,13 +41,12 @@ public AdditionalDeps(SharedTestState sharedState)
         [InlineData("4.1.2-preview.2",  new string[] { "4.0.0", "4.1.2", "4.2.0" },     null)]
         public void DepsDirectory(string fxVersion, string[] versions, string usedVersion)
         {
-            string additionalDepsDirectory = SharedFramework.CalculateUniqueTestDirectory(Path.Combine(SharedState.Location, "additionalDeps"));
-            using (TestArtifact artifact = new TestArtifact(additionalDepsDirectory))
+            using (TestArtifact additionalDeps = TestArtifact.Create("additionalDeps"))
             {
                 string depsJsonName = Path.GetFileName(SharedState.AdditionalDepsComponent.DepsJson);
                 foreach (string version in versions)
                 {
-                    string path = Path.Combine(additionalDepsDirectory, "shared", MicrosoftNETCoreApp, version);
+                    string path = Path.Combine(additionalDeps.Location, "shared", Constants.MicrosoftNETCoreApp, version);
                     Directory.CreateDirectory(path);
                     File.Copy(
                         SharedState.AdditionalDepsComponent.DepsJson,
@@ -61,12 +60,12 @@ public void DepsDirectory(string fxVersion, string[] versions, string usedVersio
                     // Make a copy of the app and update its framework version
                     app = SharedState.FrameworkReferenceApp.Copy();
                     RuntimeConfig.FromFile(app.RuntimeConfigJson)
-                        .RemoveFramework(MicrosoftNETCoreApp)
-                        .WithFramework(MicrosoftNETCoreApp, fxVersion)
+                        .RemoveFramework(Constants.MicrosoftNETCoreApp)
+                        .WithFramework(Constants.MicrosoftNETCoreApp, fxVersion)
                         .Save();
                 }
 
-                CommandResult result = SharedState.DotNetWithNetCoreApp.Exec(Constants.AdditionalDeps.CommandLineArgument, additionalDepsDirectory, app.AppDll)
+                CommandResult result = SharedState.DotNetWithNetCoreApp.Exec(Constants.AdditionalDeps.CommandLineArgument, additionalDeps.Location, app.AppDll)
                     .EnableTracingAndCaptureOutputs()
                     .Execute();
 
@@ -77,7 +76,7 @@ public void DepsDirectory(string fxVersion, string[] versions, string usedVersio
                 }
                 else
                 {
-                    result.Should().HaveUsedAdditionalDeps(Path.Combine(additionalDepsDirectory, "shared", MicrosoftNETCoreApp, usedVersion, depsJsonName));
+                    result.Should().HaveUsedAdditionalDeps(Path.Combine(additionalDeps.Location, "shared", Constants.MicrosoftNETCoreApp, usedVersion, depsJsonName));
                 }
             }
         }
@@ -138,7 +137,7 @@ public void InvalidJson()
             }
         }
 
-        public class SharedTestState : DependencyResolutionBase.SharedTestStateBase
+        public class SharedTestState : SharedTestStateBase
         {
             public DotNetCli DotNetWithNetCoreApp { get; }
 
@@ -155,7 +154,7 @@ public SharedTestState()
 
                 AdditionalDepsComponent = CreateComponentWithNoDependencies();
 
-                FrameworkReferenceApp = CreateFrameworkReferenceApp(MicrosoftNETCoreApp, NetCoreAppVersion);
+                FrameworkReferenceApp = CreateFrameworkReferenceApp(Constants.MicrosoftNETCoreApp, NetCoreAppVersion);
 
                 // Copy dependency next to app
                 File.Copy(AdditionalDepsComponent.AppDll, Path.Combine(FrameworkReferenceApp.Location, $"{AdditionalDepsComponent.AssemblyName}.dll"));
diff --git a/src/installer/tests/HostActivation.Tests/DependencyResolution/AdditionalProbingPath.cs b/src/installer/tests/HostActivation.Tests/DependencyResolution/AdditionalProbingPath.cs
index 29735b19729c..4914f5b7b95a 100644
--- a/src/installer/tests/HostActivation.Tests/DependencyResolution/AdditionalProbingPath.cs
+++ b/src/installer/tests/HostActivation.Tests/DependencyResolution/AdditionalProbingPath.cs
@@ -9,7 +9,7 @@
 
 namespace Microsoft.DotNet.CoreSetup.Test.HostActivation.DependencyResolution
 {
-    public class AdditionalProbingPath : DependencyResolutionBase, IClassFixture<AdditionalProbingPath.SharedTestState>
+    public class AdditionalProbingPath : IClassFixture<AdditionalProbingPath.SharedTestState>
     {
         private readonly SharedTestState sharedState;
 
@@ -90,7 +90,7 @@ public void RuntimeConfigSetting(bool dependencyExists)
             }
         }
 
-        public class SharedTestState : DependencyResolutionBase.SharedTestStateBase
+        public class SharedTestState : SharedTestStateBase
         {
             public DotNetCli DotNetWithNetCoreApp { get; }
 
@@ -114,12 +114,12 @@ public SharedTestState()
                     .AddMicrosoftNETCoreAppFrameworkMockCoreClr(TestContext.MicrosoftNETCoreAppVersion)
                     .Build();
 
-                string nativeDependencyRelPath = $"{TestContext.TargetRID}/{Binaries.GetSharedLibraryFileNameForCurrentPlatform("native")}";
-                FrameworkReferenceApp = CreateFrameworkReferenceApp(MicrosoftNETCoreApp, TestContext.MicrosoftNETCoreAppVersion, b => b
+                string nativeDependencyRelPath = $"{TestContext.BuildRID}/{Binaries.GetSharedLibraryFileNameForCurrentPlatform("native")}";
+                FrameworkReferenceApp = CreateFrameworkReferenceApp(Constants.MicrosoftNETCoreApp, TestContext.MicrosoftNETCoreAppVersion, b => b
                     .WithProject(DependencyName, DependencyVersion, p => p
                         .WithAssemblyGroup(null, g => g
                             .WithAsset($"{DependencyName}.dll", f => f.NotOnDisk()))
-                        .WithNativeLibraryGroup(TestContext.TargetRID, g => g
+                        .WithNativeLibraryGroup(TestContext.BuildRID, g => g
                             .WithAsset(nativeDependencyRelPath, f => f.NotOnDisk()))));
                 RuntimeConfig.FromFile(FrameworkReferenceApp.RuntimeConfigJson)
                     .WithTfm(TestContext.Tfm)
diff --git a/src/installer/tests/HostActivation.Tests/DependencyResolution/ComponentDependencyResolutionBase.cs b/src/installer/tests/HostActivation.Tests/DependencyResolution/ComponentDependencyResolutionBase.cs
deleted file mode 100644
index 608a3151b18c..000000000000
--- a/src/installer/tests/HostActivation.Tests/DependencyResolution/ComponentDependencyResolutionBase.cs
+++ /dev/null
@@ -1,106 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using Microsoft.DotNet.Cli.Build;
-using Microsoft.DotNet.Cli.Build.Framework;
-using System;
-using System.IO;
-
-namespace Microsoft.DotNet.CoreSetup.Test.HostActivation.DependencyResolution
-{
-    public abstract class ComponentDependencyResolutionBase : DependencyResolutionBase
-    {
-        public abstract class ComponentSharedTestStateBase : SharedTestStateBase
-        {
-            private const string resolve_component_dependencies = "resolve_component_dependencies";
-            private const string run_app_and_resolve = "run_app_and_resolve";
-            private const string run_app_and_resolve_multithreaded = "run_app_and_resolve_multithreaded";
-
-            public DotNetCli DotNetWithNetCoreApp { get; }
-
-            public TestApp FrameworkReferenceApp { get; }
-
-            public string NativeHostPath { get => _nativeHostingState.NativeHostPath; }
-
-            private readonly NativeHosting.SharedTestStateBase _nativeHostingState;
-
-            public ComponentSharedTestStateBase()
-            {
-                var dotNetBuilder = DotNet("WithNetCoreApp")
-                    .AddMicrosoftNETCoreAppFrameworkMockCoreClr("4.0.0", builder => CustomizeDotNetWithNetCoreAppMicrosoftNETCoreApp(builder));
-                CustomizeDotNetWithNetCoreApp(dotNetBuilder);
-                DotNetWithNetCoreApp = dotNetBuilder.Build();
-
-                FrameworkReferenceApp = CreateTestFrameworkReferenceApp();
-
-                _nativeHostingState = new NativeHosting.SharedTestStateBase();
-            }
-
-            protected virtual TestApp CreateTestFrameworkReferenceApp() => CreateFrameworkReferenceApp(MicrosoftNETCoreApp, "4.0.0");
-
-            protected virtual void CustomizeDotNetWithNetCoreAppMicrosoftNETCoreApp(NetCoreAppBuilder builder)
-            {
-            }
-
-            protected virtual void CustomizeDotNetWithNetCoreApp(DotNetBuilder builder)
-            {
-            }
-
-            public CommandResult RunComponentResolutionTest(TestApp component, Action<Command> commandCustomizer = null)
-            {
-                return RunComponentResolutionTest(component.AppDll, FrameworkReferenceApp, DotNetWithNetCoreApp.GreatestVersionHostFxrPath, commandCustomizer);
-            }
-
-            public CommandResult RunComponentResolutionTest(string componentPath, TestApp hostApp, string hostFxrFolder, Action<Command> commandCustomizer = null)
-            {
-                string[] args =
-                {
-                    resolve_component_dependencies,
-                    run_app_and_resolve,
-                    Path.Combine(hostFxrFolder, Binaries.HostFxr.FileName),
-                    hostApp.AppDll,
-                    componentPath
-                };
-
-                Command command = Command.Create(NativeHostPath, args)
-                    .EnableTracingAndCaptureOutputs()
-                    .MultilevelLookup(false);
-                commandCustomizer?.Invoke(command);
-
-                return command.Execute()
-                    .StdErrAfter("corehost_resolve_component_dependencies = {");
-            }
-
-            public CommandResult RunComponentResolutionMultiThreadedTest(TestApp componentOne, TestApp componentTwo)
-            {
-                return RunComponentResolutionMultiThreadedTest(componentOne.AppDll, componentTwo.AppDll, FrameworkReferenceApp, DotNetWithNetCoreApp.GreatestVersionHostFxrPath);
-            }
-
-            public CommandResult RunComponentResolutionMultiThreadedTest(string componentOnePath, string componentTwoPath, TestApp hostApp, string hostFxrFolder)
-            {
-                string[] args =
-                {
-                    resolve_component_dependencies,
-                    run_app_and_resolve_multithreaded,
-                    Path.Combine(hostFxrFolder, Binaries.HostFxr.FileName),
-                    hostApp.AppDll,
-                    componentOnePath,
-                    componentTwoPath
-                };
-
-                return Command.Create(NativeHostPath, args)
-                    .EnableTracingAndCaptureOutputs()
-                    .MultilevelLookup(false)
-                    .Execute();
-            }
-
-            public override void Dispose()
-            {
-                base.Dispose();
-
-                FrameworkReferenceApp.Dispose();
-                _nativeHostingState.Dispose();
-            }
-        }
-    }
-}
diff --git a/src/installer/tests/HostActivation.Tests/DependencyResolution/ComponentSharedTestStateBase.cs b/src/installer/tests/HostActivation.Tests/DependencyResolution/ComponentSharedTestStateBase.cs
new file mode 100644
index 000000000000..ab577df6bfe7
--- /dev/null
+++ b/src/installer/tests/HostActivation.Tests/DependencyResolution/ComponentSharedTestStateBase.cs
@@ -0,0 +1,102 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Microsoft.DotNet.Cli.Build;
+using Microsoft.DotNet.Cli.Build.Framework;
+using System;
+using System.IO;
+
+namespace Microsoft.DotNet.CoreSetup.Test.HostActivation.DependencyResolution
+{
+    public abstract class ComponentSharedTestStateBase : SharedTestStateBase
+    {
+        private const string resolve_component_dependencies = "resolve_component_dependencies";
+        private const string run_app_and_resolve = "run_app_and_resolve";
+        private const string run_app_and_resolve_multithreaded = "run_app_and_resolve_multithreaded";
+
+        public DotNetCli DotNetWithNetCoreApp { get; }
+
+        public TestApp FrameworkReferenceApp { get; }
+
+        public string NativeHostPath { get => _nativeHostingState.NativeHostPath; }
+
+        private readonly NativeHosting.SharedTestStateBase _nativeHostingState;
+
+        public ComponentSharedTestStateBase()
+        {
+            var dotNetBuilder = DotNet("WithNetCoreApp")
+                .AddMicrosoftNETCoreAppFrameworkMockCoreClr("4.0.0", builder => CustomizeDotNetWithNetCoreAppMicrosoftNETCoreApp(builder));
+            CustomizeDotNetWithNetCoreApp(dotNetBuilder);
+            DotNetWithNetCoreApp = dotNetBuilder.Build();
+
+            FrameworkReferenceApp = CreateTestFrameworkReferenceApp();
+
+            _nativeHostingState = new NativeHosting.SharedTestStateBase();
+        }
+
+        protected virtual TestApp CreateTestFrameworkReferenceApp() => CreateFrameworkReferenceApp(Constants.MicrosoftNETCoreApp, "4.0.0");
+
+        protected virtual void CustomizeDotNetWithNetCoreAppMicrosoftNETCoreApp(NetCoreAppBuilder builder)
+        {
+        }
+
+        protected virtual void CustomizeDotNetWithNetCoreApp(DotNetBuilder builder)
+        {
+        }
+
+        public CommandResult RunComponentResolutionTest(TestApp component, Action<Command> commandCustomizer = null)
+        {
+            return RunComponentResolutionTest(component.AppDll, FrameworkReferenceApp, DotNetWithNetCoreApp.GreatestVersionHostFxrPath, commandCustomizer);
+        }
+
+        public CommandResult RunComponentResolutionTest(string componentPath, TestApp hostApp, string hostFxrFolder, Action<Command> commandCustomizer = null)
+        {
+            string[] args =
+            {
+                resolve_component_dependencies,
+                run_app_and_resolve,
+                Path.Combine(hostFxrFolder, Binaries.HostFxr.FileName),
+                hostApp.AppDll,
+                componentPath
+            };
+
+            Command command = Command.Create(NativeHostPath, args)
+                .EnableTracingAndCaptureOutputs()
+                .MultilevelLookup(false);
+            commandCustomizer?.Invoke(command);
+
+            return command.Execute()
+                .StdErrAfter("corehost_resolve_component_dependencies = {");
+        }
+
+        public CommandResult RunComponentResolutionMultiThreadedTest(TestApp componentOne, TestApp componentTwo)
+        {
+            return RunComponentResolutionMultiThreadedTest(componentOne.AppDll, componentTwo.AppDll, FrameworkReferenceApp, DotNetWithNetCoreApp.GreatestVersionHostFxrPath);
+        }
+
+        public CommandResult RunComponentResolutionMultiThreadedTest(string componentOnePath, string componentTwoPath, TestApp hostApp, string hostFxrFolder)
+        {
+            string[] args =
+            {
+                resolve_component_dependencies,
+                run_app_and_resolve_multithreaded,
+                Path.Combine(hostFxrFolder, Binaries.HostFxr.FileName),
+                hostApp.AppDll,
+                componentOnePath,
+                componentTwoPath
+            };
+
+            return Command.Create(NativeHostPath, args)
+                .EnableTracingAndCaptureOutputs()
+                .MultilevelLookup(false)
+                .Execute();
+        }
+
+        protected override void Dispose(bool disposing)
+        {
+            FrameworkReferenceApp.Dispose();
+            _nativeHostingState.Dispose();
+            base.Dispose(disposing);
+        }
+    }
+}
diff --git a/src/installer/tests/HostActivation.Tests/DependencyResolution/DependencyResolutionBase.cs b/src/installer/tests/HostActivation.Tests/DependencyResolution/DependencyResolutionBase.cs
deleted file mode 100644
index e334dfcd6777..000000000000
--- a/src/installer/tests/HostActivation.Tests/DependencyResolution/DependencyResolutionBase.cs
+++ /dev/null
@@ -1,74 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-using System.IO;
-
-namespace Microsoft.DotNet.CoreSetup.Test.HostActivation.DependencyResolution
-{
-    public abstract class DependencyResolutionBase
-    {
-        protected const string MicrosoftNETCoreApp = "Microsoft.NETCore.App";
-
-        public abstract class SharedTestStateBase : TestArtifact
-        {
-            private static string GetBaseDir(string name)
-            {
-                string baseDir = Path.Combine(TestArtifactsPath, name);
-                return SharedFramework.CalculateUniqueTestDirectory(baseDir);
-            }
-
-            public SharedTestStateBase()
-                : base(GetBaseDir("dependencyResolution"))
-            {
-            }
-
-            public DotNetBuilder DotNet(string name)
-            {
-                return new DotNetBuilder(Location, TestContext.BuiltDotNet.BinPath, name);
-            }
-
-            public TestApp CreateFrameworkReferenceApp(string fxName, string fxVersion, Action<NetCoreAppBuilder> customizer = null)
-            {
-                // Prepare the app mock - we're not going to run anything really, so we just need the basic files
-                TestApp testApp = CreateTestApp(Location, "FrameworkReferenceApp");
-                testApp.PopulateFrameworkDependent(fxName, fxVersion, customizer);
-                return testApp;
-            }
-
-            protected TestApp CreateTestApp(string location, string name)
-            {
-                TestApp testApp;
-                if (location == null)
-                {
-                    testApp = TestApp.CreateEmpty(name);
-                }
-                else
-                {
-                    string path = Path.Combine(location, name);
-                    testApp = new TestApp(path);
-                }
-
-                RegisterCopy(testApp);
-                return testApp;
-            }
-
-            public TestApp CreateComponentWithNoDependencies(Action<NetCoreAppBuilder> customizer = null, string location = null)
-            {
-                TestApp componentWithNoDependencies = CreateTestApp(location, "ComponentWithNoDependencies");
-                NetCoreAppBuilder builder = NetCoreAppBuilder.PortableForNETCoreApp(componentWithNoDependencies)
-                    .WithProject(p => p.WithAssemblyGroup(null, g => g.WithMainAssembly()));
-                customizer?.Invoke(builder);
-
-                return builder.Build(componentWithNoDependencies);
-            }
-
-            public TestApp CreateSelfContainedAppWithMockCoreClr(string name, Action<NetCoreAppBuilder> customizer = null)
-            {
-                TestApp testApp = CreateTestApp(null, name);
-                testApp.PopulateSelfContained(TestApp.MockedComponent.CoreClr, customizer);
-                return testApp;
-            }
-        }
-    }
-}
diff --git a/src/installer/tests/HostActivation.Tests/DependencyResolution/DepsFile.cs b/src/installer/tests/HostActivation.Tests/DependencyResolution/DepsFile.cs
index 73f8c4fe6c52..0964eb0c4304 100644
--- a/src/installer/tests/HostActivation.Tests/DependencyResolution/DepsFile.cs
+++ b/src/installer/tests/HostActivation.Tests/DependencyResolution/DepsFile.cs
@@ -9,7 +9,7 @@
 
 namespace Microsoft.DotNet.CoreSetup.Test.HostActivation.DependencyResolution
 {
-    public class DepsFile : DependencyResolutionBase, IClassFixture<DepsFile.SharedTestState>
+    public class DepsFile : IClassFixture<DepsFile.SharedTestState>
     {
         private readonly SharedTestState sharedState;
 
@@ -47,7 +47,7 @@ public void SeparateDepsJson()
                 .And.HaveResolvedAssembly(dependencyPath);
         }
 
-        public class SharedTestState : DependencyResolutionBase.SharedTestStateBase
+        public class SharedTestState : SharedTestStateBase
         {
             public DotNetCli DotNetWithNetCoreApp { get; }
 
@@ -63,7 +63,7 @@ public SharedTestState()
                     .AddMicrosoftNETCoreAppFrameworkMockCoreClr(TestContext.MicrosoftNETCoreAppVersion)
                     .Build();
 
-                FrameworkReferenceApp = CreateFrameworkReferenceApp(MicrosoftNETCoreApp, TestContext.MicrosoftNETCoreAppVersion, b => b
+                FrameworkReferenceApp = CreateFrameworkReferenceApp(Constants.MicrosoftNETCoreApp, TestContext.MicrosoftNETCoreAppVersion, b => b
                     .WithProject(DependencyName, "1.0.0", p => p
                         .WithAssemblyGroup(null, g => g.WithAsset($"{DependencyName}.dll"))));
 
diff --git a/src/installer/tests/HostActivation.Tests/DependencyResolution/PerAssemblyVersionResolution.cs b/src/installer/tests/HostActivation.Tests/DependencyResolution/PerAssemblyVersionResolution.cs
index e9f5bc793203..63d850a91cf3 100644
--- a/src/installer/tests/HostActivation.Tests/DependencyResolution/PerAssemblyVersionResolution.cs
+++ b/src/installer/tests/HostActivation.Tests/DependencyResolution/PerAssemblyVersionResolution.cs
@@ -8,7 +8,6 @@
 namespace Microsoft.DotNet.CoreSetup.Test.HostActivation.DependencyResolution
 {
     public abstract class PerAssemblyVersionResolutionBase :
-        ComponentDependencyResolutionBase,
         IClassFixture<PerAssemblyVersionResolutionBase.SharedTestState>
     {
         protected readonly SharedTestState SharedState;
diff --git a/src/installer/tests/HostActivation.Tests/DependencyResolution/PerAssemblyVersionResolutionMultipleFrameworks.cs b/src/installer/tests/HostActivation.Tests/DependencyResolution/PerAssemblyVersionResolutionMultipleFrameworks.cs
index f91141f8fd42..d5e46c56aeff 100644
--- a/src/installer/tests/HostActivation.Tests/DependencyResolution/PerAssemblyVersionResolutionMultipleFrameworks.cs
+++ b/src/installer/tests/HostActivation.Tests/DependencyResolution/PerAssemblyVersionResolutionMultipleFrameworks.cs
@@ -8,7 +8,6 @@
 namespace Microsoft.DotNet.CoreSetup.Test.HostActivation.DependencyResolution
 {
     public abstract class PerAssemblyVersionResolutionMultipleFrameworksBase :
-        ComponentDependencyResolutionBase,
         IClassFixture<PerAssemblyVersionResolutionBase.SharedTestState>
     {
         protected readonly SharedTestState SharedState;
@@ -33,16 +32,16 @@ public PerAssemblyVersionResolutionMultipleFrameworksBase(SharedTestState fixtur
         private const string TestAssemblyWithBothVersions = "Test.Assembly.BothVersions";
 
         [Theory]
-        [InlineData(TestAssemblyWithBothVersions, null, null, MicrosoftNETCoreApp)] // NetCoreApp has higher version than HighWare
-        [InlineData(TestAssemblyWithBothVersions, "1.0.0.0", "1.0.0.0", MicrosoftNETCoreApp)]
+        [InlineData(TestAssemblyWithBothVersions, null, null, Constants.MicrosoftNETCoreApp)] // NetCoreApp has higher version than HighWare
+        [InlineData(TestAssemblyWithBothVersions, "1.0.0.0", "1.0.0.0", Constants.MicrosoftNETCoreApp)]
         [InlineData(TestAssemblyWithBothVersions, "3.0.0.0", "4.0.0.0", null)]  // App has higher version than any framework
         [InlineData(TestAssemblyWithBothVersions, "2.1.1.1", "3.3.0.0", null)]  // App has higher file version
-        [InlineData(TestAssemblyWithBothVersions, "2.1.1.1", "3.2.2.2", MicrosoftNETCoreApp)]  // Lower level framework always wins on equality (this is intentional)
-        [InlineData(TestAssemblyWithBothVersions, null, "4.0.0.0", MicrosoftNETCoreApp)] // The one with version wins
-        [InlineData(TestAssemblyWithBothVersions, null, "2.0.0.0", MicrosoftNETCoreApp)] // The one with version wins
+        [InlineData(TestAssemblyWithBothVersions, "2.1.1.1", "3.2.2.2", Constants.MicrosoftNETCoreApp)]  // Lower level framework always wins on equality (this is intentional)
+        [InlineData(TestAssemblyWithBothVersions, null, "4.0.0.0", Constants.MicrosoftNETCoreApp)] // The one with version wins
+        [InlineData(TestAssemblyWithBothVersions, null, "2.0.0.0", Constants.MicrosoftNETCoreApp)] // The one with version wins
         [InlineData(TestAssemblyWithBothVersions, "3.0.0.0", null, null)]
-        [InlineData(TestAssemblyWithBothVersions, "2.1.1.1", null, MicrosoftNETCoreApp)]
-        [InlineData(TestAssemblyWithNoVersions, null, null, MicrosoftNETCoreApp)] // No versions are treated as equal (so lower one wins)
+        [InlineData(TestAssemblyWithBothVersions, "2.1.1.1", null, Constants.MicrosoftNETCoreApp)]
+        [InlineData(TestAssemblyWithNoVersions, null, null, Constants.MicrosoftNETCoreApp)] // No versions are treated as equal (so lower one wins)
         [InlineData(TestAssemblyWithNoVersions, "1.0.0.0", null, null)] // The one with version wins
         [InlineData(TestAssemblyWithNoVersions, "1.0.0.0", "1.0.0.0", null)] // The one with version wins
         [InlineData(TestAssemblyWithNoVersions, null, "1.0.0.0", null)] // The one with version wins
@@ -52,11 +51,11 @@ public PerAssemblyVersionResolutionMultipleFrameworksBase(SharedTestState fixtur
         [InlineData(TestAssemblyWithAssemblyVersion, "3.0.0.0", "1.0.0.0", null)] // App has higher version than any framework
         [InlineData(TestAssemblyWithAssemblyVersion, "2.1.1.2", null, HighWare)] // Both are exactly the same, so lower level wins
         [InlineData(TestAssemblyWithAssemblyVersion, "2.1.1.2", "1.0.0.0", null)]
-        [InlineData(TestAssemblyWithFileVersion, null, null, MicrosoftNETCoreApp)] // Frameworks both have the same version - lower one wins
+        [InlineData(TestAssemblyWithFileVersion, null, null, Constants.MicrosoftNETCoreApp)] // Frameworks both have the same version - lower one wins
         [InlineData(TestAssemblyWithFileVersion, "1.0.0.0", null, null)] // App has assembly version, no framework has it - so app wins
-        [InlineData(TestAssemblyWithFileVersion, null, "1.0.0.0", MicrosoftNETCoreApp)]
+        [InlineData(TestAssemblyWithFileVersion, null, "1.0.0.0", Constants.MicrosoftNETCoreApp)]
         [InlineData(TestAssemblyWithFileVersion, null, "4.0.0.0", null)] // App has higher version than either framework
-        [InlineData(TestAssemblyWithFileVersion, null, "3.2.2.2", MicrosoftNETCoreApp)] // Exactly equal - lower one wins
+        [InlineData(TestAssemblyWithFileVersion, null, "3.2.2.2", Constants.MicrosoftNETCoreApp)] // Exactly equal - lower one wins
         public void AppWithSameAssemblyAsFramework(string testAssemblyName, string appAsmVersion, string appFileVersion, string frameWorkWins)
         {
             RunTest(null, testAssemblyName, appAsmVersion, appFileVersion, frameWorkWins);
@@ -73,7 +72,7 @@ public void AppWithExactlySameAssemblyAsFrameworkWithRollForward(string framewor
                 runtimeConfig => runtimeConfig
                     .WithFramework(HighWare, frameworkReferenceVersion)
                     .WithRollForward(Constants.RollForwardSetting.Major),
-                TestAssemblyWithBothVersions, "2.1.1.1", "3.2.2.2", MicrosoftNETCoreApp);
+                TestAssemblyWithBothVersions, "2.1.1.1", "3.2.2.2", Constants.MicrosoftNETCoreApp);
         }
 
         protected abstract void RunTest(Action<RuntimeConfig> runtimeConfigCustomizer, string testAssemblyName, string appAsmVersion, string appFileVersion, string frameWorkWins);
@@ -104,8 +103,8 @@ protected override void CustomizeDotNetWithNetCoreApp(DotNetBuilder builder)
                 builder.AddFramework(
                     HighWare,
                     "1.1.1",
-                    runtimeConfig => runtimeConfig.WithFramework(MicrosoftNETCoreApp, "4.0.0"),
-                    path => NetCoreAppBuilder.ForNETCoreApp(HighWare, TestContext.TargetRID)
+                    runtimeConfig => runtimeConfig.WithFramework(Constants.MicrosoftNETCoreApp, "4.0.0"),
+                    path => NetCoreAppBuilder.ForNETCoreApp(HighWare, TestContext.BuildRID)
                         .WithProject(HighWare, "1.1.1", p => p
                             .WithAssemblyGroup(null, g => g
                             .WithAsset(TestAssemblyWithNoVersions + ".dll")
@@ -152,7 +151,7 @@ protected override void RunTest(Action<RuntimeConfig> runtimeConfigCustomizer, s
 
             string expectedBaseLocation = frameworkWins switch
             {
-                MicrosoftNETCoreApp => SharedState.DotNetWithNetCoreApp.GreatestVersionSharedFxPath,
+                Constants.MicrosoftNETCoreApp => SharedState.DotNetWithNetCoreApp.GreatestVersionSharedFxPath,
                 HighWare => SharedState.HighWarePath,
                 _ => app.Location,
             };
diff --git a/src/installer/tests/HostActivation.Tests/DependencyResolution/ResolveComponentDependencies.cs b/src/installer/tests/HostActivation.Tests/DependencyResolution/ResolveComponentDependencies.cs
index 20aac37426ea..9818e1764786 100644
--- a/src/installer/tests/HostActivation.Tests/DependencyResolution/ResolveComponentDependencies.cs
+++ b/src/installer/tests/HostActivation.Tests/DependencyResolution/ResolveComponentDependencies.cs
@@ -9,7 +9,6 @@
 namespace Microsoft.DotNet.CoreSetup.Test.HostActivation.DependencyResolution
 {
     public class ResolveComponentDependencies :
-        ComponentDependencyResolutionBase,
         IClassFixture<ResolveComponentDependencies.SharedTestState>
     {
         private readonly SharedTestState sharedTestState;
diff --git a/src/installer/tests/HostActivation.Tests/DependencyResolution/RidAssetResolution.cs b/src/installer/tests/HostActivation.Tests/DependencyResolution/RidAssetResolution.cs
index c85d01f39979..f2042e2c2ece 100644
--- a/src/installer/tests/HostActivation.Tests/DependencyResolution/RidAssetResolution.cs
+++ b/src/installer/tests/HostActivation.Tests/DependencyResolution/RidAssetResolution.cs
@@ -11,7 +11,7 @@
 
 namespace Microsoft.DotNet.CoreSetup.Test.HostActivation.DependencyResolution
 {
-    public abstract class RidAssetResolutionBase : ComponentDependencyResolutionBase
+    public abstract class RidAssetResolutionBase
     {
         private static Version UseRidGraphDisabledVersion = new Version(8, 0);
         public class TestSetup
diff --git a/src/installer/tests/HostActivation.Tests/DependencyResolution/SharedTestStateBase.cs b/src/installer/tests/HostActivation.Tests/DependencyResolution/SharedTestStateBase.cs
new file mode 100644
index 000000000000..7fc6019e9e91
--- /dev/null
+++ b/src/installer/tests/HostActivation.Tests/DependencyResolution/SharedTestStateBase.cs
@@ -0,0 +1,90 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Microsoft.DotNet.CoreSetup.Test.HostActivation.DependencyResolution
+{
+    public abstract class SharedTestStateBase : IDisposable
+    {
+        public string Location { get; }
+
+        private readonly TestArtifact _baseDirectory;
+        private readonly List<TestApp> _apps = new List<TestApp>();
+
+        public SharedTestStateBase()
+        {
+            _baseDirectory = TestArtifact.Create("dependencyResolution");
+            Location = _baseDirectory.Location;
+        }
+
+        public DotNetBuilder DotNet(string name)
+        {
+            return new DotNetBuilder(_baseDirectory.Location, TestContext.BuiltDotNet.BinPath, name);
+        }
+
+        public TestApp CreateFrameworkReferenceApp(string fxName, string fxVersion, Action<NetCoreAppBuilder> customizer = null)
+        {
+            // Prepare the app mock - we're not going to run anything really, so we just need the basic files
+            TestApp testApp = CreateTestApp(_baseDirectory.Location, "FrameworkReferenceApp");
+            testApp.PopulateFrameworkDependent(fxName, fxVersion, customizer);
+            return testApp;
+        }
+
+        protected TestApp CreateTestApp(string location, string name)
+        {
+            TestApp testApp;
+            if (location == null)
+            {
+                testApp = TestApp.CreateEmpty(name);
+            }
+            else
+            {
+                string path = Path.Combine(location, name);
+                testApp = new TestApp(path);
+            }
+
+            _apps.Add(testApp);
+            return testApp;
+        }
+
+        public TestApp CreateComponentWithNoDependencies(Action<NetCoreAppBuilder> customizer = null, string location = null)
+        {
+            TestApp componentWithNoDependencies = CreateTestApp(location, "ComponentWithNoDependencies");
+            NetCoreAppBuilder builder = NetCoreAppBuilder.PortableForNETCoreApp(componentWithNoDependencies)
+                .WithProject(p => p.WithAssemblyGroup(null, g => g.WithMainAssembly()));
+            customizer?.Invoke(builder);
+
+            return builder.Build(componentWithNoDependencies);
+        }
+
+        public TestApp CreateSelfContainedAppWithMockCoreClr(string name, Action<NetCoreAppBuilder> customizer = null)
+        {
+            TestApp testApp = CreateTestApp(null, name);
+            testApp.PopulateSelfContained(TestApp.MockedComponent.CoreClr, customizer);
+            return testApp;
+        }
+
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+
+        protected virtual void Dispose(bool disposing)
+        {
+            if (!disposing)
+                return;
+
+            foreach (TestApp app in _apps)
+            {
+                app.Dispose();
+            }
+
+            _apps.Clear();
+            _baseDirectory.Dispose();
+        }
+    }
+}
diff --git a/src/installer/tests/HostActivation.Tests/DotnetArgValidation.cs b/src/installer/tests/HostActivation.Tests/DotnetArgValidation.cs
index e020e6f331f9..e1bd6522751f 100644
--- a/src/installer/tests/HostActivation.Tests/DotnetArgValidation.cs
+++ b/src/installer/tests/HostActivation.Tests/DotnetArgValidation.cs
@@ -124,7 +124,7 @@ public class SharedTestState : IDisposable
 
             public SharedTestState()
             {
-                BaseDirectory = new TestArtifact(SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, "argValidation")));
+                BaseDirectory = TestArtifact.Create("argValidation");
 
                 // Create an empty global.json file
                 Directory.CreateDirectory(BaseDirectory.Location);
diff --git a/src/installer/tests/HostActivation.Tests/FrameworkResolution/FrameworkResolutionBase.cs b/src/installer/tests/HostActivation.Tests/FrameworkResolution/FrameworkResolutionBase.cs
index b577e994af38..a0ce8d0cf396 100644
--- a/src/installer/tests/HostActivation.Tests/FrameworkResolution/FrameworkResolutionBase.cs
+++ b/src/installer/tests/HostActivation.Tests/FrameworkResolution/FrameworkResolutionBase.cs
@@ -87,28 +87,22 @@ protected CommandResult RunSelfContainedTest(
 
         public class SharedTestStateBase : IDisposable
         {
-            private readonly string _builtDotnet;
-            private readonly string _baseDir;
             private readonly TestArtifact _baseDirArtifact;
 
             public SharedTestStateBase()
             {
-                _builtDotnet = Path.Combine(TestArtifact.TestArtifactsPath, "sharedFrameworkPublish");
-
-                string baseDir = Path.Combine(TestArtifact.TestArtifactsPath, "frameworkResolution");
-                _baseDir = SharedFramework.CalculateUniqueTestDirectory(baseDir);
-                _baseDirArtifact = new TestArtifact(_baseDir);
+                _baseDirArtifact = TestArtifact.Create("frameworkResolution");
             }
 
             public DotNetBuilder DotNet(string name)
             {
-                return new DotNetBuilder(_baseDir, _builtDotnet, name);
+                return new DotNetBuilder(_baseDirArtifact.Location, TestContext.BuiltDotNet.BinPath, name);
             }
 
             public TestApp CreateFrameworkReferenceApp()
             {
                 // Prepare the app mock - we're not going to run anything really, so we just need the basic files
-                string testAppDir = Path.Combine(_baseDir, "FrameworkReferenceApp");
+                string testAppDir = Path.Combine(_baseDirArtifact.Location, "FrameworkReferenceApp");
                 Directory.CreateDirectory(testAppDir);
 
                 // ./FrameworkReferenceApp.dll
@@ -122,7 +116,7 @@ public TestApp CreateFrameworkReferenceApp()
 
             public TestApp CreateSelfContainedAppWithMockHostPolicy()
             {
-                string testAppDir = Path.Combine(_baseDir, "SelfContainedApp");
+                string testAppDir = Path.Combine(_baseDirArtifact.Location, "SelfContainedApp");
                 TestApp testApp = new TestApp(testAppDir);
                 testApp.PopulateSelfContained(TestApp.MockedComponent.HostPolicy);
 
diff --git a/src/installer/tests/HostActivation.Tests/HostActivation.Tests.csproj b/src/installer/tests/HostActivation.Tests/HostActivation.Tests.csproj
index f335c8c509ee..240237d85251 100644
--- a/src/installer/tests/HostActivation.Tests/HostActivation.Tests.csproj
+++ b/src/installer/tests/HostActivation.Tests/HostActivation.Tests.csproj
@@ -3,13 +3,11 @@
   <PropertyGroup>
     <TargetFramework>$(TestInfraTargetFramework)</TargetFramework>
     <AssemblyName>HostActivation.Tests</AssemblyName>
-    <PackageId>HostActivation.Tests</PackageId>
     <GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>
     <!-- Reduce the length of the test output dir to make it more reliable on Windows. -->
     <TestsOutputName>ha</TestsOutputName>
-    <UsesTestAssets>true</UsesTestAssets>
   </PropertyGroup>
-  
+
   <ItemGroup>
     <ProjectReference Include="..\TestUtils\TestUtils.csproj" />
     <ProjectReference Include="..\..\managed\Microsoft.NET.HostModel\Microsoft.NET.HostModel.csproj" />
diff --git a/src/installer/tests/HostActivation.Tests/InvalidHost.cs b/src/installer/tests/HostActivation.Tests/InvalidHost.cs
index 243b76560f1f..39c8c711a04c 100644
--- a/src/installer/tests/HostActivation.Tests/InvalidHost.cs
+++ b/src/installer/tests/HostActivation.Tests/InvalidHost.cs
@@ -109,7 +109,7 @@ public class SharedTestState : IDisposable
 
             public SharedTestState()
             {
-                BaseDirectory = new TestArtifact(SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, nameof(InvalidHost))));
+                BaseDirectory = TestArtifact.Create(nameof(InvalidHost));
                 Directory.CreateDirectory(BaseDirectory.Location);
 
                 RenamedDotNet = Path.Combine(BaseDirectory.Location, Binaries.GetExeFileNameForCurrentPlatform("renamed"));
diff --git a/src/installer/tests/HostActivation.Tests/MockCoreClrSanity.cs b/src/installer/tests/HostActivation.Tests/MockCoreClrSanity.cs
deleted file mode 100644
index 80d2336fd505..000000000000
--- a/src/installer/tests/HostActivation.Tests/MockCoreClrSanity.cs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using Microsoft.DotNet.Cli.Build;
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Runtime.InteropServices;
-using Xunit;
-
-namespace Microsoft.DotNet.CoreSetup.Test.HostActivation
-{
-    public class MockCoreClrSanity : IDisposable
-    {
-        private readonly DotNetCli DotNet;
-
-        private readonly TestArtifact _dotnetDirArtifact;
-
-        public MockCoreClrSanity()
-        {
-            _dotnetDirArtifact = new TestArtifact(Path.Combine(TestArtifact.TestArtifactsPath, "mockCoreclrSanity"));
-
-            DotNet = new DotNetBuilder(_dotnetDirArtifact.Location, Path.Combine(TestArtifact.TestArtifactsPath, "sharedFrameworkPublish"), "exe")
-                .AddMicrosoftNETCoreAppFrameworkMockCoreClr("9999.0.0")
-                .Build();
-        }
-
-        public void Dispose()
-        {
-            _dotnetDirArtifact.Dispose();
-        }
-
-        [Fact]
-        public void Muxer_ListRuntimes()
-        {
-            DotNet.Exec("--list-runtimes")
-                .CaptureStdOut()
-                .CaptureStdErr()
-                .Execute()
-                .Should().Pass()
-                .And.HaveStdOutContaining("Microsoft.NETCore.App 9999.0.0");
-        }
-
-        [Fact]
-        public void Muxer_ExecAppSequence()
-        {
-            var appDll = typeof(MockCoreClrSanity).Assembly.Location;
-            char sep = Path.DirectorySeparatorChar;
-
-            DotNet.Exec("--roll-forward-on-no-candidate-fx", "2", appDll, "argumentOne", "arg2")
-                .CaptureStdOut()
-                .CaptureStdErr()
-                .MultilevelLookup(false)
-                .Execute()
-                .Should().Pass()
-                .And.HaveStdOutContaining("mock coreclr_initialize() called")
-                .And.HaveStdOutContaining("mock property[TRUSTED_PLATFORM_ASSEMBLIES]")
-                .And.HaveStdOutContaining($"Microsoft.NETCore.App{sep}9999.0.0{sep}Microsoft.NETCore.App.deps.json")
-                .And.HaveStdOutContaining("mock coreclr_execute_assembly() called")
-                .And.HaveStdOutContaining("mock argc:2")
-                .And.HaveStdOutContaining($"mock managedAssemblyPath:{appDll}")
-                .And.HaveStdOutContaining("mock argv[0] = argumentOne")
-                .And.HaveStdOutContaining("mock argv[1] = arg2")
-                .And.HaveStdOutContaining("mock coreclr_shutdown_2() called");
-        }
-    }
-}
diff --git a/src/installer/tests/HostActivation.Tests/MultiArchInstallLocation.cs b/src/installer/tests/HostActivation.Tests/MultiArchInstallLocation.cs
index b0aaf060b21c..5d2b6b772d0f 100644
--- a/src/installer/tests/HostActivation.Tests/MultiArchInstallLocation.cs
+++ b/src/installer/tests/HostActivation.Tests/MultiArchInstallLocation.cs
@@ -3,8 +3,7 @@
 
 using System;
 using System.IO;
-using System.Runtime.InteropServices;
-using Microsoft.DotNet.Cli.Build;
+
 using Microsoft.DotNet.Cli.Build.Framework;
 using Microsoft.DotNet.CoreSetup.Test;
 using Microsoft.DotNet.CoreSetup.Test.HostActivation;
@@ -30,7 +29,7 @@ public void EnvironmentVariable_CurrentArchitectureIsUsedIfEnvVarSet()
                 .DotNetRoot(TestContext.BuiltDotNet.BinPath, arch)
                 .Execute()
                 .Should().Pass()
-                .And.HaveUsedDotNetRootInstallLocation(TestContext.BuiltDotNet.BinPath, TestContext.TargetRID, arch);
+                .And.HaveUsedDotNetRootInstallLocation(TestContext.BuiltDotNet.BinPath, TestContext.BuildRID, arch);
         }
 
         [Fact]
@@ -42,7 +41,7 @@ public void EnvironmentVariable_IfNoArchSpecificEnvVarIsFoundDotnetRootIsUsed()
                 .DotNetRoot(TestContext.BuiltDotNet.BinPath)
                 .Execute()
                 .Should().Pass()
-                .And.HaveUsedDotNetRootInstallLocation(TestContext.BuiltDotNet.BinPath, TestContext.TargetRID);
+                .And.HaveUsedDotNetRootInstallLocation(TestContext.BuiltDotNet.BinPath, TestContext.BuildRID);
         }
 
         [Fact]
@@ -56,7 +55,7 @@ public void EnvironmentVariable_ArchSpecificDotnetRootIsUsedOverDotnetRoot()
                 .DotNetRoot(dotnet, arch)
                 .Execute()
                 .Should().Pass()
-                .And.HaveUsedDotNetRootInstallLocation(dotnet, TestContext.TargetRID, arch)
+                .And.HaveUsedDotNetRootInstallLocation(dotnet, TestContext.BuildRID, arch)
                 .And.NotHaveStdErrContaining("Using environment variable DOTNET_ROOT=");
         }
 
@@ -78,7 +77,7 @@ public void EnvironmentVariable_DotNetRootIsUsedOverInstallLocationIfSet()
                     .DotNetRoot(dotnet, arch)
                     .Execute()
                     .Should().Pass()
-                    .And.HaveUsedDotNetRootInstallLocation(dotnet, TestContext.TargetRID, arch)
+                    .And.HaveUsedDotNetRootInstallLocation(dotnet, TestContext.BuildRID, arch)
                     .And.NotHaveStdErrContaining("Using global install location");
             }
         }
@@ -120,7 +119,7 @@ public void EnvironmentVariable_DotnetRootPathExistsButHasNoHost()
                         TestContext.BuiltDotNet.BinPath)
                     .Execute()
                     .Should().Fail()
-                    .And.HaveUsedDotNetRootInstallLocation(app.Location, TestContext.TargetRID)
+                    .And.HaveUsedDotNetRootInstallLocation(app.Location, TestContext.BuildRID)
                     // If DOTNET_ROOT points to a folder that exists we assume that there's a dotnet installation in it
                     .And.HaveStdErrContaining($"The required library {Binaries.HostFxr.FileName} could not be found.");
             }
@@ -225,15 +224,12 @@ public void InstallLocationFile_ReallyLongInstallPathIsParsedCorrectly()
         public void InstallLocationFile_MissingFile()
         {
             var app = sharedTestState.App.Copy();
-            string testArtifactsPath = SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, "missingInstallLocation"));
-            using (new TestArtifact(testArtifactsPath))
+            using (var testArtifact = TestArtifact.Create("missingInstallLocation"))
             using (var testOnlyProductBehavior = TestOnlyProductBehavior.Enable(app.AppExe))
             {
-                Directory.CreateDirectory(testArtifactsPath);
-
-                string installLocationDirectory = Path.Combine(testArtifactsPath, "installLocationOverride");
+                string installLocationDirectory = Path.Combine(testArtifact.Location, "installLocationOverride");
                 Directory.CreateDirectory(installLocationDirectory);
-                string defaultInstallLocation = Path.Combine(testArtifactsPath, "defaultInstallLocation");
+                string defaultInstallLocation = Path.Combine(testArtifact.Location, "defaultInstallLocation");
 
                 Command.Create(app.AppExe)
                     .CaptureStdErr()
@@ -252,7 +248,7 @@ public void InstallLocationFile_MissingFile()
         [Fact]
         public void RegisteredInstallLocation_DotNetInfo_ListOtherArchitectures()
         {
-            using (var testArtifact = new TestArtifact(SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, "listOtherArchs"))))
+            using (var testArtifact = TestArtifact.Create("listOtherArchs"))
             {
                 var dotnet = new DotNetBuilder(testArtifact.Location, TestContext.BuiltDotNet.BinPath, "exe").Build();
                 using (var registeredInstallLocationOverride = new RegisteredInstallLocationOverride(dotnet.GreatestVersionHostFxrFilePath))
diff --git a/src/installer/tests/HostActivation.Tests/MultilevelSDKLookup.cs b/src/installer/tests/HostActivation.Tests/MultilevelSDKLookup.cs
index 39138360ebed..6f5f13426a7e 100644
--- a/src/installer/tests/HostActivation.Tests/MultilevelSDKLookup.cs
+++ b/src/installer/tests/HostActivation.Tests/MultilevelSDKLookup.cs
@@ -33,10 +33,7 @@ public class MultilevelSDKLookup : IDisposable
 
         public MultilevelSDKLookup()
         {
-            // The dotnetMultilevelSDKLookup dir will contain some folders and files that will be
-            // necessary to perform the tests
-            string baseMultilevelDir = Path.Combine(TestArtifact.TestArtifactsPath, "dotnetMultilevelSDKLookup");
-            _multilevelDir = new TestArtifact(SharedFramework.CalculateUniqueTestDirectory(baseMultilevelDir));
+            _multilevelDir = TestArtifact.Create(nameof(MultilevelSDKLookup));
 
             // The tested locations will be the cwd, exe dir, and registered directory. cwd is no longer supported.
             //     All dirs will be placed inside the multilevel folder
@@ -44,7 +41,7 @@ public MultilevelSDKLookup()
             _exeDir = Path.Combine(_multilevelDir.Location, "exe");
             _regDir = Path.Combine(_multilevelDir.Location, "reg");
 
-            DotNet = new DotNetBuilder(_multilevelDir.Location, Path.Combine(TestArtifact.TestArtifactsPath, "sharedFrameworkPublish"), "exe")
+            DotNet = new DotNetBuilder(_multilevelDir.Location, TestContext.BuiltDotNet.BinPath, "exe")
                 .AddMicrosoftNETCoreAppFrameworkMockHostPolicy("9999.0.0")
                 .Build();
 
diff --git a/src/installer/tests/HostActivation.Tests/NativeHostApis.cs b/src/installer/tests/HostActivation.Tests/NativeHostApis.cs
index 959239d08d03..c865567ec23c 100644
--- a/src/installer/tests/HostActivation.Tests/NativeHostApis.cs
+++ b/src/installer/tests/HostActivation.Tests/NativeHostApis.cs
@@ -4,7 +4,7 @@
 using System;
 using System.Collections.Generic;
 using System.IO;
-
+using FluentAssertions;
 using Microsoft.DotNet.Cli.Build;
 using Microsoft.DotNet.TestUtils;
 using Xunit;
@@ -20,21 +20,33 @@ public NativeHostApis(SharedTestState fixture)
             sharedTestState = fixture;
         }
 
-        private class SdkResolutionFixture
+        private class ApiNames
         {
-            private readonly TestApp _app;
-
-            public DotNetCli Dotnet { get; }
-            public string AppDll => _app.AppDll;
-            public string ExeDir => Path.Combine(_app.Location, "ed");
-            public string ProgramFiles => Path.Combine(ExeDir, "pf");
-            public string SelfRegistered => Path.Combine(ExeDir, "sr");
-            public string WorkingDir => Path.Combine(_app.Location, "wd");
-            public string ProgramFilesGlobalSdkDir => Path.Combine(ProgramFiles, "dotnet", "sdk");
-            public string ProgramFilesGlobalFrameworksDir => Path.Combine(ProgramFiles, "dotnet", "shared");
-            public string SelfRegisteredGlobalSdkDir => Path.Combine(SelfRegistered, "sdk");
+            public const string hostfxr_get_available_sdks = nameof(hostfxr_get_available_sdks);
+            public const string hostfxr_resolve_sdk2 = nameof(hostfxr_resolve_sdk2);
+            public const string hostfxr_get_dotnet_environment_info = nameof(hostfxr_get_dotnet_environment_info);
+        }
+
+        internal sealed class SdkAndFrameworkFixture : IDisposable
+        {
+            private readonly TestArtifact _artifact;
+
+            public string EmptyGlobalJsonDir => Path.Combine(_artifact.Location, "wd");
+
+            public string ExeDir => Path.Combine(_artifact.Location, "ed");
             public string LocalSdkDir => Path.Combine(ExeDir, "sdk");
             public string LocalFrameworksDir => Path.Combine(ExeDir, "shared");
+            public string[] LocalSdks = new[] { "0.1.2", "5.6.7-preview", "1.2.3" };
+            public List<(string fwName, string[] fwVersions)> LocalFrameworks =
+                new List<(string fwName, string[] fwVersions)>()
+                {
+                    ("HostFxr.Test.B", new[] { "4.0.0", "5.6.7-A" }),
+                    ("HostFxr.Test.C", new[] { "3.0.0" })
+                };
+
+            public string ProgramFiles => Path.Combine(_artifact.Location, "pf");
+            public string ProgramFilesGlobalSdkDir => Path.Combine(ProgramFiles, "dotnet", "sdk");
+            public string ProgramFilesGlobalFrameworksDir => Path.Combine(ProgramFiles, "dotnet", "shared");
             public string[] ProgramFilesGlobalSdks = new[] { "4.5.6", "1.2.3", "2.3.4-preview" };
             public List<(string fwName, string[] fwVersions)> ProgramFilesGlobalFrameworks =
                 new List<(string fwName, string[] fwVersions)>()
@@ -42,26 +54,20 @@ private class SdkResolutionFixture
                     ("HostFxr.Test.A", new[] { "1.2.3", "3.0.0" }),
                     ("HostFxr.Test.B", new[] { "5.6.7-A" })
                 };
+
+            public string SelfRegistered => Path.Combine(_artifact.Location, "sr");
+            public string SelfRegisteredGlobalSdkDir => Path.Combine(SelfRegistered, "sdk");
             public string[] SelfRegisteredGlobalSdks = new[] { "3.0.0", "15.1.4-preview", "5.6.7" };
-            public string[] LocalSdks = new[] { "0.1.2", "5.6.7-preview", "1.2.3" };
-            public List<(string fwName, string[] fwVersions)> LocalFrameworks =
-                new List<(string fwName, string[] fwVersions)>()
-                {
-                    ("HostFxr.Test.B", new[] { "4.0.0", "5.6.7-A" }),
-                    ("HostFxr.Test.C", new[] { "3.0.0" })
-                };
 
-            public SdkResolutionFixture(SharedTestState state)
+            public SdkAndFrameworkFixture()
             {
-                Dotnet = TestContext.BuiltDotNet;
+                _artifact = TestArtifact.Create(nameof(SdkAndFrameworkFixture));
 
-                _app = state.HostApiInvokerApp.Copy();
-
-                Directory.CreateDirectory(WorkingDir);
+                Directory.CreateDirectory(EmptyGlobalJsonDir);
 
                 // start with an empty global.json, it will be ignored, but prevent one lying on disk
                 // on a given machine from impacting the test.
-                GlobalJson.CreateEmpty(WorkingDir);
+                GlobalJson.CreateEmpty(EmptyGlobalJsonDir);
 
                 foreach (string sdk in ProgramFilesGlobalSdks)
                 {
@@ -107,16 +113,20 @@ static void AddFrameworkDirectory(string frameworkDir, string name, string versi
                     File.WriteAllText(Path.Combine(versionDir, $"{name}.deps.json"), string.Empty);
                 }
             }
+
+            public void Dispose()
+            {
+                _artifact.Dispose();
+            }
         }
 
         [Fact]
         [PlatformSpecific(TestPlatforms.Windows)] // The test setup only works on Windows (and MLL was Windows-only anyway)
         public void Hostfxr_get_available_sdks_with_multilevel_lookup()
         {
-            var f = new SdkResolutionFixture(sharedTestState);
-
             // Starting with .NET 7, multi-level lookup is completely disabled for hostfxr API calls.
             // This test is still valuable to validate that it is in fact disabled
+            var f = sharedTestState.SdkAndFrameworkFixture;
             string expectedList = string.Join(';', new[]
             {
                 Path.Combine(f.LocalSdkDir, "0.1.2"),
@@ -124,27 +134,23 @@ public void Hostfxr_get_available_sdks_with_multilevel_lookup()
                 Path.Combine(f.LocalSdkDir, "5.6.7-preview"),
             });
 
-            using (TestOnlyProductBehavior.Enable(f.Dotnet.GreatestVersionHostFxrFilePath))
-            {
-                f.Dotnet.Exec(f.AppDll, new[] { "hostfxr_get_available_sdks", f.ExeDir })
-                    .EnvironmentVariable("TEST_MULTILEVEL_LOOKUP_PROGRAM_FILES", f.ProgramFiles)
-                    .EnvironmentVariable("TEST_MULTILEVEL_LOOKUP_SELF_REGISTERED", f.SelfRegistered)
-                    .CaptureStdOut()
-                    .CaptureStdErr()
-                    .Execute()
-                    .Should().Pass()
-                    .And.HaveStdOutContaining("hostfxr_get_available_sdks:Success")
-                    .And.HaveStdOutContaining($"hostfxr_get_available_sdks sdks:[{expectedList}]");
-            }
+            string api = ApiNames.hostfxr_get_available_sdks;
+            sharedTestState.TestBehaviorEnabledDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, api, f.ExeDir)
+                .EnvironmentVariable("TEST_MULTILEVEL_LOOKUP_PROGRAM_FILES", f.ProgramFiles)
+                .EnvironmentVariable("TEST_MULTILEVEL_LOOKUP_SELF_REGISTERED", f.SelfRegistered)
+                .EnableTracingAndCaptureOutputs()
+                .Execute()
+                .Should().Pass()
+                .And.ReturnStatusCode(api, Constants.ErrorCode.Success)
+                .And.HaveStdOutContaining($"{api} sdks:[{expectedList}]");
         }
 
         [Fact]
-        public void Hostfxr_get_available_sdks_without_multilevel_lookup()
+        public void Hostfxr_get_available_sdks()
         {
-            // Without multi-level lookup: get only sdks sorted by ascending version
-
-            var f = new SdkResolutionFixture(sharedTestState);
+            // Get SDKs sorted by ascending version
 
+            var f = sharedTestState.SdkAndFrameworkFixture;
             string expectedList = string.Join(';', new[]
             {
                  Path.Combine(f.LocalSdkDir, "0.1.2"),
@@ -152,13 +158,13 @@ public void Hostfxr_get_available_sdks_without_multilevel_lookup()
                  Path.Combine(f.LocalSdkDir, "5.6.7-preview"),
             });
 
-            f.Dotnet.Exec(f.AppDll, new[] { "hostfxr_get_available_sdks", f.ExeDir })
-                .CaptureStdOut()
-                .CaptureStdErr()
+            string api = ApiNames.hostfxr_get_available_sdks;
+            TestContext.BuiltDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, api, f.ExeDir)
+                .EnableTracingAndCaptureOutputs()
                 .Execute()
                 .Should().Pass()
-                .And.HaveStdOutContaining("hostfxr_get_available_sdks:Success")
-                .And.HaveStdOutContaining($"hostfxr_get_available_sdks sdks:[{expectedList}]");
+                .And.ReturnStatusCode(api, Constants.ErrorCode.Success)
+                .And.HaveStdOutContaining($"{api} sdks:[{expectedList}]");
         }
 
         [Fact]
@@ -166,20 +172,19 @@ public void Hostfxr_resolve_sdk2_without_global_json_or_flags()
         {
             // with no global.json and no flags, pick latest SDK
 
-            var f = new SdkResolutionFixture(sharedTestState);
-
+            var f = sharedTestState.SdkAndFrameworkFixture;
             string expectedData = string.Join(';', new[]
             {
                 ("resolved_sdk_dir", Path.Combine(f.LocalSdkDir, "5.6.7-preview")),
             });
 
-            f.Dotnet.Exec(f.AppDll, new[] { "hostfxr_resolve_sdk2", f.ExeDir, f.WorkingDir, "0" })
-                .CaptureStdOut()
-                .CaptureStdErr()
+            string api = ApiNames.hostfxr_resolve_sdk2;
+            TestContext.BuiltDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, api, f.ExeDir, f.EmptyGlobalJsonDir, "0")
+                .EnableTracingAndCaptureOutputs()
                 .Execute()
                 .Should().Pass()
-                .And.HaveStdOutContaining("hostfxr_resolve_sdk2:Success")
-                .And.HaveStdOutContaining($"hostfxr_resolve_sdk2 data:[{expectedData}]");
+                .And.ReturnStatusCode(api, Constants.ErrorCode.Success)
+                .And.HaveStdOutContaining($"{api} data:[{expectedData}]");
         }
 
         [Fact]
@@ -187,20 +192,19 @@ public void Hostfxr_resolve_sdk2_without_global_json_and_disallowing_previews()
         {
             // Without global.json and disallowing previews, pick latest non-preview
 
-            var f = new SdkResolutionFixture(sharedTestState);
-
+            var f = sharedTestState.SdkAndFrameworkFixture;
             string expectedData = string.Join(';', new[]
             {
                 ("resolved_sdk_dir", Path.Combine(f.LocalSdkDir, "1.2.3"))
             });
 
-            f.Dotnet.Exec(f.AppDll, new[] { "hostfxr_resolve_sdk2", f.ExeDir, f.WorkingDir, "disallow_prerelease" })
-                .CaptureStdOut()
-                .CaptureStdErr()
+            string api = ApiNames.hostfxr_resolve_sdk2;
+            TestContext.BuiltDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, api, f.ExeDir, f.EmptyGlobalJsonDir, "disallow_prerelease")
+                .EnableTracingAndCaptureOutputs()
                 .Execute()
                 .Should().Pass()
-                .And.HaveStdOutContaining("hostfxr_resolve_sdk2:Success")
-                .And.HaveStdOutContaining($"hostfxr_resolve_sdk2 data:[{expectedData}]");
+                .And.ReturnStatusCode(api, Constants.ErrorCode.Success)
+                .And.HaveStdOutContaining($"{api} data:[{expectedData}]");
         }
 
         [Fact]
@@ -210,32 +214,33 @@ public void Hostfxr_resolve_sdk2_with_global_json_and_disallowing_previews()
             // since flag has no impact if global.json specifies a preview.
             // Also check that global.json that impacted resolution is reported.
 
-            var f = new SdkResolutionFixture(sharedTestState);
-
-            string requestedVersion = "5.6.6-preview";
-            string globalJson = GlobalJson.CreateWithVersion(f.WorkingDir, requestedVersion);
-            string expectedData = string.Join(';', new[]
+            var f = sharedTestState.SdkAndFrameworkFixture;
+            using (TestArtifact workingDir = TestArtifact.Create(nameof(workingDir)))
             {
-                ("resolved_sdk_dir", Path.Combine(f.LocalSdkDir, "5.6.7-preview")),
-                ("global_json_path", globalJson),
-                ("requested_version", requestedVersion),
-            });
-
-            f.Dotnet.Exec(f.AppDll, new[] { "hostfxr_resolve_sdk2", f.ExeDir, f.WorkingDir, "disallow_prerelease" })
-                .CaptureStdOut()
-                .CaptureStdErr()
-                .Execute()
-                .Should().Pass()
-                .And.HaveStdOutContaining("hostfxr_resolve_sdk2:Success")
-                .And.HaveStdOutContaining($"hostfxr_resolve_sdk2 data:[{expectedData}]");
+                string requestedVersion = "5.6.6-preview";
+                string globalJson = GlobalJson.CreateWithVersion(workingDir.Location, requestedVersion);
+                string expectedData = string.Join(';', new[]
+                {
+                    ("resolved_sdk_dir", Path.Combine(f.LocalSdkDir, "5.6.7-preview")),
+                    ("global_json_path", globalJson),
+                    ("requested_version", requestedVersion),
+                });
+
+                string api = ApiNames.hostfxr_resolve_sdk2;
+                TestContext.BuiltDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, api, f.ExeDir, workingDir.Location, "disallow_prerelease")
+                    .EnableTracingAndCaptureOutputs()
+                    .Execute()
+                    .Should().Pass()
+                    .And.ReturnStatusCode(api, Constants.ErrorCode.Success)
+                    .And.HaveStdOutContaining($"{api} data:[{expectedData}]");
+            }
         }
 
         [Fact]
         public void Hostfxr_corehost_set_error_writer_test()
         {
             TestContext.BuiltDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, "Test_hostfxr_set_error_writer")
-                .CaptureStdOut()
-                .CaptureStdErr()
+                .EnableTracingAndCaptureOutputs()
                 .Execute()
                 .Should().Pass();
         }
@@ -243,7 +248,7 @@ public void Hostfxr_corehost_set_error_writer_test()
         [Fact]
         public void Hostfxr_get_dotnet_environment_info_dotnet_root_only()
         {
-            var f = new SdkResolutionFixture(sharedTestState);
+            var f = sharedTestState.SdkAndFrameworkFixture;
             string expectedSdkVersions = string.Join(";", new[]
             {
                 "0.1.2",
@@ -279,24 +284,24 @@ public void Hostfxr_get_dotnet_environment_info_dotnet_root_only()
                 Path.Combine(f.LocalFrameworksDir, "HostFxr.Test.C")
             });
 
-            f.Dotnet.Exec(f.AppDll, new[] { "hostfxr_get_dotnet_environment_info", f.ExeDir })
-            .CaptureStdOut()
-            .CaptureStdErr()
-            .Execute()
-            .Should().Pass()
-            .And.HaveStdOutContaining("hostfxr_get_dotnet_environment_info:Success")
-            .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info sdk versions:[{expectedSdkVersions}]")
-            .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info sdk paths:[{expectedSdkPaths}]")
-            .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info framework names:[{expectedFrameworkNames}]")
-            .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info framework versions:[{expectedFrameworkVersions}]")
-            .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info framework paths:[{expectedFrameworkPaths}]");
+            string api = ApiNames.hostfxr_get_dotnet_environment_info;
+            TestContext.BuiltDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, api, f.ExeDir)
+                .EnableTracingAndCaptureOutputs()
+                .Execute()
+                .Should().Pass()
+                .And.ReturnStatusCode(api, Constants.ErrorCode.Success)
+                .And.HaveStdOutContaining($"{api} sdk versions:[{expectedSdkVersions}]")
+                .And.HaveStdOutContaining($"{api} sdk paths:[{expectedSdkPaths}]")
+                .And.HaveStdOutContaining($"{api} framework names:[{expectedFrameworkNames}]")
+                .And.HaveStdOutContaining($"{api} framework versions:[{expectedFrameworkVersions}]")
+                .And.HaveStdOutContaining($"{api} framework paths:[{expectedFrameworkPaths}]");
         }
 
         [Fact]
         [PlatformSpecific(TestPlatforms.Windows)] // The test setup only works on Windows (and MLL was Windows-only anyway)
         public void Hostfxr_get_dotnet_environment_info_with_multilevel_lookup_with_dotnet_root()
         {
-            var f = new SdkResolutionFixture(sharedTestState);
+            var f = sharedTestState.SdkAndFrameworkFixture;
             string expectedSdkVersions = string.Join(';', new[]
             {
                 "0.1.2",
@@ -332,124 +337,84 @@ public void Hostfxr_get_dotnet_environment_info_with_multilevel_lookup_with_dotn
                 Path.Combine(f.LocalFrameworksDir, "HostFxr.Test.C")
             });
 
-            using (TestOnlyProductBehavior.Enable(f.Dotnet.GreatestVersionHostFxrFilePath))
-            {
-                f.Dotnet.Exec(f.AppDll, new[] { "hostfxr_get_dotnet_environment_info", f.ExeDir })
+            string api = ApiNames.hostfxr_get_dotnet_environment_info;
+            sharedTestState.TestBehaviorEnabledDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, new[] { api, f.ExeDir })
                 .EnvironmentVariable("TEST_MULTILEVEL_LOOKUP_PROGRAM_FILES", f.ProgramFiles)
                 .EnvironmentVariable("TEST_MULTILEVEL_LOOKUP_SELF_REGISTERED", f.SelfRegistered)
-                .CaptureStdOut()
-                .CaptureStdErr()
+                .EnableTracingAndCaptureOutputs()
                 .Execute()
                 .Should().Pass()
-                .And.HaveStdOutContaining("hostfxr_get_dotnet_environment_info:Success")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info sdk versions:[{expectedSdkVersions}]")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info sdk paths:[{expectedSdkPaths}]")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info framework names:[{expectedFrameworkNames}]")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info framework versions:[{expectedFrameworkVersions}]")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info framework paths:[{expectedFrameworkPaths}]");
-            }
+                .And.ReturnStatusCode(api, Constants.ErrorCode.Success)
+                .And.HaveStdOutContaining($"{api} sdk versions:[{expectedSdkVersions}]")
+                .And.HaveStdOutContaining($"{api} sdk paths:[{expectedSdkPaths}]")
+                .And.HaveStdOutContaining($"{api} framework names:[{expectedFrameworkNames}]")
+                .And.HaveStdOutContaining($"{api} framework versions:[{expectedFrameworkVersions}]")
+                .And.HaveStdOutContaining($"{api} framework paths:[{expectedFrameworkPaths}]");
         }
 
         [Fact]
         [PlatformSpecific(TestPlatforms.Windows)] // The test setup only works on Windows (and MLL was Windows-only anyway)
         public void Hostfxr_get_dotnet_environment_info_with_multilevel_lookup_only()
         {
-            var f = new SdkResolutionFixture(sharedTestState);
+            var f = sharedTestState.SdkAndFrameworkFixture;
 
             // Multi-level lookup is completely disabled on 7+
             // The test runs the API with the dotnet root directory set to a location which doesn't have any SDKs or frameworks
-            using (TestOnlyProductBehavior.Enable(f.Dotnet.GreatestVersionHostFxrFilePath))
-            {
-                // We pass f.WorkingDir so that we don't resolve dotnet_dir to the global installation
-                // in the native side.
-                f.Dotnet.Exec(f.AppDll, new[] { "hostfxr_get_dotnet_environment_info", f.WorkingDir })
+            string api = ApiNames.hostfxr_get_dotnet_environment_info;
+            sharedTestState.TestBehaviorEnabledDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, api, sharedTestState.HostApiInvokerApp.Location)
                 .EnvironmentVariable("TEST_MULTILEVEL_LOOKUP_PROGRAM_FILES", f.ProgramFiles)
                 .EnvironmentVariable("TEST_MULTILEVEL_LOOKUP_SELF_REGISTERED", f.SelfRegistered)
-                .CaptureStdOut()
-                .CaptureStdErr()
+                .EnableTracingAndCaptureOutputs()
                 .Execute()
                 .Should().Pass()
-                .And.HaveStdOutContaining("hostfxr_get_dotnet_environment_info:Success")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info sdk versions:[]")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info sdk paths:[]")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info framework names:[]")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info framework versions:[]")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info framework paths:[]");
-            }
+                .And.ReturnStatusCode(api, Constants.ErrorCode.Success)
+                .And.HaveStdOutContaining($"{api} sdk versions:[]")
+                .And.HaveStdOutContaining($"{api} sdk paths:[]")
+                .And.HaveStdOutContaining($"{api} framework names:[]")
+                .And.HaveStdOutContaining($"{api} framework versions:[]")
+                .And.HaveStdOutContaining($"{api} framework paths:[]");
         }
 
         [Fact]
-        [PlatformSpecific(TestPlatforms.Windows)] // The test setup only works on Windows (and MLL was Windows-only anyway)
-        public void Hostfxr_get_dotnet_environment_info_with_multilevel_lookup_only_self_register_program_files()
+        public void Hostfxr_get_dotnet_environment_info_global_install_path()
         {
-            var f = new SdkResolutionFixture(sharedTestState);
-
-            using (TestOnlyProductBehavior.Enable(f.Dotnet.GreatestVersionHostFxrFilePath))
-            {
-                // We pass f.WorkingDir so that we don't resolve dotnet_dir to the global installation
-                // in the native side.
-                f.Dotnet.Exec(f.AppDll, new[] { "hostfxr_get_dotnet_environment_info", f.WorkingDir })
-                .EnvironmentVariable("TEST_MULTILEVEL_LOOKUP_PROGRAM_FILES", f.ProgramFiles)
-                // Test with a self-registered path the same as ProgramFiles, with a trailing slash.  Expect this to be de-duped
-                .EnvironmentVariable("TEST_MULTILEVEL_LOOKUP_SELF_REGISTERED", Path.Combine(f.ProgramFiles, "dotnet") + Path.DirectorySeparatorChar)
-                .CaptureStdOut()
-                .CaptureStdErr()
+            string api = ApiNames.hostfxr_get_dotnet_environment_info;
+            TestContext.BuiltDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, api)
+                .EnableTracingAndCaptureOutputs()
                 .Execute()
                 .Should().Pass()
-                .And.HaveStdOutContaining("hostfxr_get_dotnet_environment_info:Success")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info framework names:[]")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info framework versions:[]")
-                .And.HaveStdOutContaining($"hostfxr_get_dotnet_environment_info framework paths:[]");
-            }
-        }
-
-        [Fact]
-        public void Hostfxr_get_dotnet_environment_info_global_install_path()
-        {
-            var f = new SdkResolutionFixture(sharedTestState);
-
-            f.Dotnet.Exec(f.AppDll, new[] { "hostfxr_get_dotnet_environment_info" })
-            .CaptureStdOut()
-            .CaptureStdErr()
-            .Execute()
-            .Should().Pass()
-            .And.HaveStdOutContaining("hostfxr_get_dotnet_environment_info:Success");
+                .And.ReturnStatusCode(api, Constants.ErrorCode.Success);
         }
 
         [Fact]
         public void Hostfxr_get_dotnet_environment_info_result_is_nullptr_fails()
         {
-            var f = new SdkResolutionFixture(sharedTestState);
-
-            f.Dotnet.Exec(f.AppDll, new[] { "hostfxr_get_dotnet_environment_info", "test_invalid_result_ptr" })
+            string api = ApiNames.hostfxr_get_dotnet_environment_info;
+            TestContext.BuiltDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, api, "test_invalid_result_ptr")
                 .EnableTracingAndCaptureOutputs()
                 .Execute()
                 .Should().Pass()
-                // 0x80008081 (InvalidArgFailure)
-                .And.HaveStdOutContaining("hostfxr_get_dotnet_environment_info:Fail[-2147450751]")
-                .And.HaveStdErrContaining("hostfxr_get_dotnet_environment_info received an invalid argument: result should not be null.");
+                .And.ReturnStatusCode(api, Constants.ErrorCode.InvalidArgFailure)
+                .And.HaveStdErrContaining($"{api} received an invalid argument: result should not be null.");
         }
 
         [Fact]
         public void Hostfxr_get_dotnet_environment_info_reserved_is_not_nullptr_fails()
         {
-            var f = new SdkResolutionFixture(sharedTestState);
-
-            f.Dotnet.Exec(f.AppDll, new[] { "hostfxr_get_dotnet_environment_info", "test_invalid_reserved_ptr" })
+            string api = ApiNames.hostfxr_get_dotnet_environment_info;
+            TestContext.BuiltDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, api, "test_invalid_reserved_ptr")
                 .EnableTracingAndCaptureOutputs()
                 .Execute()
                 .Should().Pass()
-                // 0x80008081 (InvalidArgFailure)
-                .And.HaveStdOutContaining("hostfxr_get_dotnet_environment_info:Fail[-2147450751]")
-                .And.HaveStdErrContaining("hostfxr_get_dotnet_environment_info received an invalid argument: reserved should be null.");
+                .And.ReturnStatusCode(api, Constants.ErrorCode.InvalidArgFailure)
+                .And.HaveStdErrContaining($"{api} received an invalid argument: reserved should be null.");
         }
 
         [Fact]
         public void Hostpolicy_corehost_set_error_writer_test()
         {
             TestContext.BuiltDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, "Test_corehost_set_error_writer")
-                .CaptureStdOut()
-                .CaptureStdErr()
+                .EnableTracingAndCaptureOutputs()
                 .Execute()
                 .Should().Pass();
         }
@@ -459,8 +424,7 @@ public void HostRuntimeContract_get_runtime_property()
         {
             TestApp app = sharedTestState.HostApiInvokerApp;
             TestContext.BuiltDotNet.Exec(app.AppDll, "host_runtime_contract.get_runtime_property", "APP_CONTEXT_BASE_DIRECTORY", "RUNTIME_IDENTIFIER", "DOES_NOT_EXIST", "ENTRY_ASSEMBLY_NAME")
-                .CaptureStdOut()
-                .CaptureStdErr()
+                .EnableTracingAndCaptureOutputs()
                 .Execute()
                 .Should().Pass()
                 .And.HaveStdOutContaining($"APP_CONTEXT_BASE_DIRECTORY = {Path.GetDirectoryName(app.AppDll)}")
@@ -473,8 +437,7 @@ public void HostRuntimeContract_get_runtime_property()
         public void HostRuntimeContract_bundle_probe()
         {
             TestContext.BuiltDotNet.Exec(sharedTestState.HostApiInvokerApp.AppDll, "host_runtime_contract.bundle_probe", "APP_CONTEXT_BASE_DIRECTORY", "RUNTIME_IDENTIFIER", "DOES_NOT_EXIST", "ENTRY_ASSEMBLY_NAME")
-                .CaptureStdOut()
-                .CaptureStdErr()
+                .EnableTracingAndCaptureOutputs()
                 .Execute()
                 .Should().Pass()
                 .And.HaveStdOutContaining("host_runtime_contract.bundle_probe is not set");
@@ -484,6 +447,11 @@ public class SharedTestState : IDisposable
         {
             public TestApp HostApiInvokerApp { get; }
 
+            public DotNetCli TestBehaviorEnabledDotNet { get; }
+            private readonly TestArtifact copiedDotnet;
+
+            internal SdkAndFrameworkFixture SdkAndFrameworkFixture { get; }
+
             public SharedTestState()
             {
                 HostApiInvokerApp = TestApp.CreateFromBuiltAssets("HostApiInvokerApp");
@@ -493,12 +461,34 @@ public SharedTestState()
                     // On non-Windows, we can't just P/Invoke to already loaded hostfxr, so copy it next to the app dll.
                     File.Copy(Binaries.HostFxr.FilePath, Path.Combine(HostApiInvokerApp.Location, Binaries.HostFxr.FileName));
                 }
+
+                // Make a copy of the built .NET, as we will enable test-only behaviour
+                copiedDotnet = TestArtifact.CreateFromCopy(nameof(NativeHostApis), TestContext.BuiltDotNet.BinPath);
+                TestBehaviorEnabledDotNet = new DotNetCli(copiedDotnet.Location);
+
+                // Enable test-only behavior for the copied .NET. We don't bother disabling the behaviour later,
+                // as we just delete the entire copy after the tests run.
+                _ = TestOnlyProductBehavior.Enable(TestBehaviorEnabledDotNet.GreatestVersionHostFxrFilePath);
+
+                SdkAndFrameworkFixture = new SdkAndFrameworkFixture();
             }
 
             public void Dispose()
             {
                 HostApiInvokerApp?.Dispose();
+                copiedDotnet.Dispose();
+                SdkAndFrameworkFixture.Dispose();
             }
         }
     }
+
+    public static class HostApisCommandResultExtensions
+    {
+        public static AndConstraint<CommandResultAssertions> ReturnStatusCode(this CommandResultAssertions assertion, string apiName, int statusCode)
+        {
+            return statusCode == Constants.ErrorCode.Success
+                ? assertion.HaveStdOutContaining($"{apiName}:Success")
+                : assertion.HaveStdOutContaining($"{apiName}:Fail[0x{statusCode:x}]");
+        }
+    }
 }
diff --git a/src/installer/tests/HostActivation.Tests/NativeHosting/GetNativeSearchDirectories.cs b/src/installer/tests/HostActivation.Tests/NativeHosting/GetNativeSearchDirectories.cs
index d28cbd2d28eb..16cf0ee8bb68 100644
--- a/src/installer/tests/HostActivation.Tests/NativeHosting/GetNativeSearchDirectories.cs
+++ b/src/installer/tests/HostActivation.Tests/NativeHosting/GetNativeSearchDirectories.cs
@@ -160,7 +160,7 @@ public class SharedTestState : SharedTestStateBase
 
             public SharedTestState()
             {
-                DotNet = new DotNetBuilder(BaseDirectory, Path.Combine(TestArtifact.TestArtifactsPath, "sharedFrameworkPublish"), "mockRuntime")
+                DotNet = new DotNetBuilder(BaseDirectory, TestContext.BuiltDotNet.BinPath, "mockRuntime")
                     .AddMicrosoftNETCoreAppFrameworkMockCoreClr(NetCoreAppVersion)
                     .Build();
 
diff --git a/src/installer/tests/HostActivation.Tests/NativeHosting/HostContext.cs b/src/installer/tests/HostActivation.Tests/NativeHosting/HostContext.cs
index a3db96a9f733..439224531549 100644
--- a/src/installer/tests/HostActivation.Tests/NativeHosting/HostContext.cs
+++ b/src/installer/tests/HostActivation.Tests/NativeHosting/HostContext.cs
@@ -683,7 +683,7 @@ public class SharedTestState : SharedTestStateBase
 
             public SharedTestState()
             {
-                var dotNet = new DotNetBuilder(BaseDirectory, Path.Combine(TestArtifact.TestArtifactsPath, "sharedFrameworkPublish"), "mockRuntime")
+                var dotNet = new DotNetBuilder(BaseDirectory, TestContext.BuiltDotNet.BinPath, "mockRuntime")
                     .AddMicrosoftNETCoreAppFrameworkMockCoreClr(NetCoreAppVersion)
                     .Build();
                 DotNetRoot = dotNet.BinPath;
diff --git a/src/installer/tests/HostActivation.Tests/NativeHosting/SharedTestStateBase.cs b/src/installer/tests/HostActivation.Tests/NativeHosting/SharedTestStateBase.cs
index e7c316ce76fe..ae855079be71 100644
--- a/src/installer/tests/HostActivation.Tests/NativeHosting/SharedTestStateBase.cs
+++ b/src/installer/tests/HostActivation.Tests/NativeHosting/SharedTestStateBase.cs
@@ -18,9 +18,8 @@ public class SharedTestStateBase : IDisposable
 
         public SharedTestStateBase()
         {
-            BaseDirectory = SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, "nativeHosting"));
-            _baseDirArtifact = new TestArtifact(BaseDirectory);
-            Directory.CreateDirectory(BaseDirectory);
+            _baseDirArtifact = TestArtifact.Create("nativeHosting");
+            BaseDirectory = _baseDirArtifact.Location;
 
             string nativeHostName = Binaries.GetExeFileNameForCurrentPlatform("nativehost");
             NativeHostPath = Path.Combine(BaseDirectory, nativeHostName);
diff --git a/src/installer/tests/HostActivation.Tests/NativeUnitTests.cs b/src/installer/tests/HostActivation.Tests/NativeUnitTests.cs
index a98e323d6ef1..74c3f7a1e4c5 100644
--- a/src/installer/tests/HostActivation.Tests/NativeUnitTests.cs
+++ b/src/installer/tests/HostActivation.Tests/NativeUnitTests.cs
@@ -8,16 +8,14 @@
 using Microsoft.DotNet.CoreSetup.Test;
 using Microsoft.DotNet.Cli.Build.Framework;
 
-namespace Microsoft.DotNet.CoreSetup.Test.HostActivation.NativeUnitTests
+namespace HostActivation.Tests
 {
     public class NativeUnitTests
     {
         [Fact]
         public void Native_Test_Fx_Ver()
         {
-            RepoDirectoriesProvider repoDirectoriesProvider = new RepoDirectoriesProvider();
-
-            string testPath = Path.Combine(repoDirectoriesProvider.HostTestArtifacts, Binaries.GetExeFileNameForCurrentPlatform("test_fx_ver"));
+            string testPath = Path.Combine(RepoDirectoriesProvider.Default.HostTestArtifacts, Binaries.GetExeFileNameForCurrentPlatform("test_fx_ver"));
 
             Command testCommand = Command.Create(testPath);
             testCommand
diff --git a/src/installer/tests/HostActivation.Tests/PortableAppActivation.cs b/src/installer/tests/HostActivation.Tests/PortableAppActivation.cs
index 9f8aefa950d0..fa19efd53121 100644
--- a/src/installer/tests/HostActivation.Tests/PortableAppActivation.cs
+++ b/src/installer/tests/HostActivation.Tests/PortableAppActivation.cs
@@ -1,12 +1,11 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using Microsoft.DotNet.Cli.Build;
-using Microsoft.DotNet.Cli.Build.Framework;
 using System;
 using System.Diagnostics;
 using System.IO;
-using System.Runtime.InteropServices;
+
+using Microsoft.DotNet.Cli.Build.Framework;
 using Xunit;
 
 namespace Microsoft.DotNet.CoreSetup.Test.HostActivation
@@ -267,10 +266,8 @@ public void MissingFrameworkInRuntimeConfig_Fails(bool useAppHost)
         [InlineData(false)]
         public void AppHost_CLI_FrameworkDependent_MissingRuntimeFramework_ErrorReportedInStdErr(bool missingHostfxr)
         {
-            string invalidDotNet = SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, "cliErrors"));
-            using (new TestArtifact(invalidDotNet))
+            using (var invalidDotNet = TestArtifact.Create("cliErrors"))
             {
-                Directory.CreateDirectory(invalidDotNet);
                 string expectedUrlQuery;
                 string expectedStdErr;
                 int expectedErrorCode = 0;
@@ -282,9 +279,8 @@ public void AppHost_CLI_FrameworkDependent_MissingRuntimeFramework_ErrorReported
                 }
                 else
                 {
-                    invalidDotNet = new DotNetBuilder(invalidDotNet, TestContext.BuiltDotNet.BinPath, "missingFramework")
-                        .Build()
-                        .BinPath;
+                    new DotNetBuilder(invalidDotNet.Location, TestContext.BuiltDotNet.BinPath, null)
+                        .Build();
 
                     expectedErrorCode = Constants.ErrorCode.FrameworkMissingFailure;
                     expectedStdErr = $"Framework: '{Constants.MicrosoftNETCoreApp}', " +
@@ -294,7 +290,7 @@ public void AppHost_CLI_FrameworkDependent_MissingRuntimeFramework_ErrorReported
 
                 CommandResult result = Command.Create(sharedTestState.App.AppExe)
                     .EnableTracingAndCaptureOutputs()
-                    .DotNetRoot(invalidDotNet)
+                    .DotNetRoot(invalidDotNet.Location)
                     .MultilevelLookup(false)
                     .Execute(expectedToFail: true);
 
@@ -316,22 +312,18 @@ public void AppHost_GUI_FrameworkDependent_MissingRuntimeFramework_ErrorReported
             app.CreateAppHost(isWindowsGui: true);
             string appExe = app.AppExe;
 
-            string invalidDotNet = SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, "guiErrors"));
-            using (new TestArtifact(invalidDotNet))
+            using (var invalidDotNet = TestArtifact.Create("guiMissingFramework"))
             {
-                Directory.CreateDirectory(invalidDotNet);
-
                 string expectedErrorCode;
                 string expectedUrlQuery;
-                invalidDotNet = new DotNetBuilder(invalidDotNet, TestContext.BuiltDotNet.BinPath, "missingFramework")
-                    .Build()
-                    .BinPath;
+                new DotNetBuilder(invalidDotNet.Location, TestContext.BuiltDotNet.BinPath, null)
+                    .Build();
 
                 expectedErrorCode = Constants.ErrorCode.FrameworkMissingFailure.ToString("x");
                 expectedUrlQuery = $"framework={Constants.MicrosoftNETCoreApp}&framework_version={TestContext.MicrosoftNETCoreAppVersion}";
                 Command command = Command.Create(appExe)
                     .EnableTracingAndCaptureOutputs()
-                    .DotNetRoot(invalidDotNet)
+                    .DotNetRoot(invalidDotNet.Location)
                     .MultilevelLookup(false)
                     .Start();
 
@@ -357,13 +349,11 @@ public void AppHost_GUI_MissingRuntime_ErrorReportedInDialog()
             app.CreateAppHost(isWindowsGui: true);
             string appExe = app.AppExe;
 
-            string invalidDotNet = SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, "guiErrors"));
-            using (new TestArtifact(invalidDotNet))
+            using (var invalidDotNet = TestArtifact.Create("guiMissingRuntime"))
             {
-                Directory.CreateDirectory(invalidDotNet);
                 var command = Command.Create(appExe)
                     .EnableTracingAndCaptureOutputs()
-                    .DotNetRoot(invalidDotNet)
+                    .DotNetRoot(invalidDotNet.Location)
                     .MultilevelLookup(false)
                     .Start();
 
@@ -388,16 +378,13 @@ public void AppHost_GUI_NoCustomErrorWriter_FrameworkMissing_ErrorReportedInDial
             app.CreateAppHost(isWindowsGui: true);
             string appExe = app.AppExe;
 
-            string dotnetWithMockHostFxr = SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, "guiErrors"));
-            using (new TestArtifact(dotnetWithMockHostFxr))
+            // The mockhostfxrFrameworkMissingFailure folder name is used by mock hostfxr to return the appropriate error code
+            using (var dotnetWithMockHostFxr = TestArtifact.Create("mockhostfxrFrameworkMissingFailure"))
             {
-                Directory.CreateDirectory(dotnetWithMockHostFxr);
-                string expectedErrorCode = Constants.ErrorCode.FrameworkMissingFailure.ToString("x");
-
-                var dotnetBuilder = new DotNetBuilder(dotnetWithMockHostFxr, TestContext.BuiltDotNet.BinPath, "mockhostfxrFrameworkMissingFailure")
+                var dotnet = new DotNetBuilder(dotnetWithMockHostFxr.Location, TestContext.BuiltDotNet.BinPath, null)
                     .RemoveHostFxr()
-                    .AddMockHostFxr(new Version(2, 2, 0));
-                var dotnet = dotnetBuilder.Build();
+                    .AddMockHostFxr(new Version(2, 2, 0))
+                    .Build();
 
                 Command command = Command.Create(appExe)
                     .EnableTracingAndCaptureOutputs()
@@ -408,6 +395,7 @@ public void AppHost_GUI_NoCustomErrorWriter_FrameworkMissing_ErrorReportedInDial
                 WindowsUtils.WaitForPopupFromProcess(command.Process);
                 command.Process.Kill();
 
+                string expectedErrorCode = Constants.ErrorCode.FrameworkMissingFailure.ToString("x");
                 command.WaitForExit(true)
                     .Should().Fail()
                     .And.HaveStdErrContaining($"Showing error dialog for application: '{Path.GetFileName(appExe)}' - error code: 0x{expectedErrorCode}")
@@ -425,13 +413,11 @@ public void AppHost_GUI_FrameworkDependent_DisabledGUIErrors_DialogNotShown()
             app.CreateAppHost(isWindowsGui: true);
             string appExe = app.AppExe;
 
-            string invalidDotNet = SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, "guiErrors"));
-            using (new TestArtifact(invalidDotNet))
+            using (var invalidDotNet = TestArtifact.Create("guiErrors"))
             {
-                Directory.CreateDirectory(invalidDotNet);
                 Command.Create(appExe)
                     .EnableTracingAndCaptureOutputs()
-                    .DotNetRoot(invalidDotNet)
+                    .DotNetRoot(invalidDotNet.Location)
                     .MultilevelLookup(false)
                     .EnvironmentVariable(Constants.DisableGuiErrors.EnvironmentVariable, "1")
                     .Execute()
@@ -450,8 +436,7 @@ public SharedTestState()
                 App = TestApp.CreateFromBuiltAssets("HelloWorld");
                 App.CreateAppHost();
 
-                MockApp = new TestApp(SharedFramework.CalculateUniqueTestDirectory(Path.Combine(TestArtifact.TestArtifactsPath, "portableAppActivation")), "App");
-                Directory.CreateDirectory(MockApp.Location);
+                MockApp = TestApp.CreateEmpty(nameof(MockApp));
                 File.WriteAllText(MockApp.AppDll, string.Empty);
                 MockApp.CreateAppHost(copyResources: false);
             }
diff --git a/src/installer/tests/HostActivation.Tests/RegisteredInstallLocationOverride.cs b/src/installer/tests/HostActivation.Tests/RegisteredInstallLocationOverride.cs
index fcb29470d9ee..66c0a2c9763c 100644
--- a/src/installer/tests/HostActivation.Tests/RegisteredInstallLocationOverride.cs
+++ b/src/installer/tests/HostActivation.Tests/RegisteredInstallLocationOverride.cs
@@ -56,7 +56,7 @@ public RegisteredInstallLocationOverride(string productBinaryPath)
                 // On Linux/macOS the install location is registered in a file which is normally
                 // located in /etc/dotnet/install_location
                 // So we need to redirect it to a different place here.
-                string directory = Path.Combine(TestArtifact.TestArtifactsPath, "installLocationOverride" + Process.GetCurrentProcess().Id.ToString());
+                string directory = Path.Combine(TestContext.TestArtifactsPath, "installLocationOverride" + Process.GetCurrentProcess().Id.ToString());
                 if (Directory.Exists(directory))
                     Directory.Delete(directory, true);
                 Directory.CreateDirectory(directory);
diff --git a/src/installer/tests/HostActivation.Tests/RuntimeProperties.cs b/src/installer/tests/HostActivation.Tests/RuntimeProperties.cs
index 84b9fe961c85..39b872df6132 100644
--- a/src/installer/tests/HostActivation.Tests/RuntimeProperties.cs
+++ b/src/installer/tests/HostActivation.Tests/RuntimeProperties.cs
@@ -131,8 +131,7 @@ public class SharedTestState : IDisposable
             public SharedTestState()
             {
                 // Make a copy of the built .NET, as we will update the framework's runtime config
-                copiedDotnet = TestArtifact.Create("runtimeProperties");
-                SharedFramework.CopyDirectory(TestContext.BuiltDotNet.BinPath, copiedDotnet.Location);
+                copiedDotnet = TestArtifact.CreateFromCopy("runtimeProperties", TestContext.BuiltDotNet.BinPath);
 
                 MockSDK = new DotNetBuilder(copiedDotnet.Location, TestContext.BuiltDotNet.BinPath, "mocksdk")
                     .AddMicrosoftNETCoreAppFrameworkMockCoreClr("9999.0.0")
diff --git a/src/installer/tests/HostActivation.Tests/SDKLookup.cs b/src/installer/tests/HostActivation.Tests/SDKLookup.cs
index c93c5e419593..923bf60568b2 100644
--- a/src/installer/tests/HostActivation.Tests/SDKLookup.cs
+++ b/src/installer/tests/HostActivation.Tests/SDKLookup.cs
@@ -24,7 +24,7 @@ public SDKLookup(SharedTestState sharedState)
         {
             SharedState = sharedState;
 
-            string exeDotNetPath = SharedFramework.CalculateUniqueTestDirectory(Path.Combine(sharedState.BaseDir, "exe"));
+            string exeDotNetPath = sharedState.BaseArtifact.GetUniqueSubdirectory("exe");
             ExecutableDotNetBuilder = new DotNetBuilder(exeDotNetPath, TestContext.BuiltDotNet.BinPath, null);
             ExecutableDotNet = ExecutableDotNetBuilder
                 .AddMicrosoftNETCoreAppFrameworkMockHostPolicy("9999.0.0")
@@ -34,7 +34,7 @@ public SDKLookup(SharedTestState sharedState)
             ExecutableSelectedMessage = $"Using .NET SDK dll=[{Path.Combine(ExecutableDotNet.BinPath, "sdk")}";
 
             // Note: no need to delete the directory, it will be removed once the entire class is done
-            //       since everything is under the BaseDir from the shared state
+            //       since everything is under the BaseArtifact from the shared state
         }
 
         [Fact]
@@ -1019,25 +1019,19 @@ private CommandResult RunTest(string command)
 
         public sealed class SharedTestState : IDisposable
         {
-            public string BaseDir { get; }
+            public TestArtifact BaseArtifact { get; }
 
             public string CurrentWorkingDir { get; }
 
-            private readonly TestArtifact _baseDirArtifact;
-
             public SharedTestState()
             {
-                // The dotnetSDKLookup dir will contain some folders and files that will be
-                // necessary to perform the tests
-                string baseDir = Path.Combine(TestArtifact.TestArtifactsPath, "dotnetSDKLookup");
-                BaseDir = SharedFramework.CalculateUniqueTestDirectory(baseDir);
-                _baseDirArtifact = new TestArtifact(BaseDir);
+                BaseArtifact = TestArtifact.Create(nameof(SDKLookup));
 
                 // The tested locations will be the cwd and the exe dir. cwd is no longer supported.
                 // All dirs will be placed inside the base folder
                 // Executable location is created per test as each test adds a different set of SDK versions
 
-                var currentWorkingSdk = new DotNetBuilder(BaseDir, TestContext.BuiltDotNet.BinPath, "current")
+                var currentWorkingSdk = new DotNetBuilder(BaseArtifact.Location, TestContext.BuiltDotNet.BinPath, "current")
                     .AddMockSDK("10000.0.0", "9999.0.0")
                     .Build();
                 CurrentWorkingDir = currentWorkingSdk.BinPath;
@@ -1045,7 +1039,7 @@ public SharedTestState()
 
             public void Dispose()
             {
-                _baseDirArtifact.Dispose();
+                BaseArtifact.Dispose();
             }
         }
     }
diff --git a/src/installer/tests/HostActivation.Tests/StandaloneAppActivation.cs b/src/installer/tests/HostActivation.Tests/StandaloneAppActivation.cs
index b4ea95307c01..7390a305ad15 100644
--- a/src/installer/tests/HostActivation.Tests/StandaloneAppActivation.cs
+++ b/src/installer/tests/HostActivation.Tests/StandaloneAppActivation.cs
@@ -133,7 +133,7 @@ public void DotNetRoot_IncorrectLayout_Fails()
                 .DotNetRoot(app.Location)
                 .Execute(expectedToFail: true)
                 .Should().Fail()
-                .And.HaveUsedDotNetRootInstallLocation(Path.GetFullPath(app.Location), TestContext.TargetRID)
+                .And.HaveUsedDotNetRootInstallLocation(Path.GetFullPath(app.Location), TestContext.BuildRID)
                 .And.HaveStdErrContaining($"The required library {Binaries.HostFxr.FileName} could not be found.");
         }
 
diff --git a/src/installer/tests/Microsoft.DotNet.CoreSetup.Packaging.Tests/NETCoreTests.cs b/src/installer/tests/Microsoft.DotNet.CoreSetup.Packaging.Tests/NETCoreTests.cs
index a50624ecb6e1..bcb276907b33 100644
--- a/src/installer/tests/Microsoft.DotNet.CoreSetup.Packaging.Tests/NETCoreTests.cs
+++ b/src/installer/tests/Microsoft.DotNet.CoreSetup.Packaging.Tests/NETCoreTests.cs
@@ -8,7 +8,7 @@ namespace Microsoft.DotNet.CoreSetup.Packaging.Tests
 {
     public class NETCoreTests
     {
-        private readonly RepoDirectoriesProvider dirs = new RepoDirectoriesProvider();
+        private readonly RepoDirectoriesProvider dirs = RepoDirectoriesProvider.Default;
 
         [Fact]
         public void NETCoreTargetingPackIsValid()
diff --git a/src/installer/tests/Microsoft.NET.HostModel.Tests/Bundle/BundlerConsistencyTests.cs b/src/installer/tests/Microsoft.NET.HostModel.Tests/Bundle/BundlerConsistencyTests.cs
index a48b5ae13e49..b7a2d6ab08cf 100644
--- a/src/installer/tests/Microsoft.NET.HostModel.Tests/Bundle/BundlerConsistencyTests.cs
+++ b/src/installer/tests/Microsoft.NET.HostModel.Tests/Bundle/BundlerConsistencyTests.cs
@@ -25,7 +25,7 @@ public BundlerConsistencyTests(SharedTestState fixture)
 
         private static string BundlerHostName = Binaries.GetExeFileNameForCurrentPlatform(SharedTestState.AppName);
         private Bundler CreateBundlerInstance(BundleOptions bundleOptions = BundleOptions.None, Version version = null, bool macosCodesign = true)
-            => new Bundler(BundlerHostName, SharedFramework.CalculateUniqueTestDirectory($"{sharedTestState.App.Location}-bundle"), bundleOptions, targetFrameworkVersion: version, macosCodesign: macosCodesign);
+            => new Bundler(BundlerHostName, sharedTestState.App.GetUniqueSubdirectory("bundle"), bundleOptions, targetFrameworkVersion: version, macosCodesign: macosCodesign);
 
         [Fact]
         public void EnableCompression_Before60_Fails()
@@ -212,7 +212,7 @@ public void BaseNameComputation()
                     new FileSpec(app.RuntimeConfigJson, runtimeConfigName),
                 };
 
-                var bundleDir = Directory.CreateDirectory(SharedFramework.CalculateUniqueTestDirectory(Path.Combine(app.Location, "bundle")));
+                var bundleDir = new DirectoryInfo(app.GetUniqueSubdirectory("bundle"));
                 var bundler = new Bundler(hostName, bundleDir.FullName);
                 bundler.GenerateBundle(fileSpecs);
 
diff --git a/src/installer/tests/Microsoft.NET.HostModel.Tests/Microsoft.NET.HostModel.Tests.csproj b/src/installer/tests/Microsoft.NET.HostModel.Tests/Microsoft.NET.HostModel.Tests.csproj
index e809026a29c4..3172cfcf0099 100644
--- a/src/installer/tests/Microsoft.NET.HostModel.Tests/Microsoft.NET.HostModel.Tests.csproj
+++ b/src/installer/tests/Microsoft.NET.HostModel.Tests/Microsoft.NET.HostModel.Tests.csproj
@@ -1,13 +1,11 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <Description>Microsoft.NET.HostModel.Tests</Description>
     <TargetFramework>$(TestInfraTargetFramework)</TargetFramework>
     <AssemblyName>Microsoft.NET.HostModel.Tests</AssemblyName>
     <GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>
     <!-- Reduce the length of the test output dir to make it more reliable on Windows. -->
     <TestsOutputName>hm</TestsOutputName>
-    <UsesTestAssets>true</UsesTestAssets>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/src/installer/tests/PrepareTestAssets/PrepareTestAssets.proj b/src/installer/tests/PrepareTestAssets/PrepareTestAssets.proj
deleted file mode 100644
index eb17dd4c391a..000000000000
--- a/src/installer/tests/PrepareTestAssets/PrepareTestAssets.proj
+++ /dev/null
@@ -1,133 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-  <Target Name="PrepareTestAssets"
-          DependsOnTargets="
-            GetNETCoreAppRuntimePackVersion;
-            CleanTestAssets;
-            GenerateTestRestoreSourcesNuGetConfig;
-            CopyPotentiallyInternalPackagesForTestRestore;
-            RestoreTestAssetProjects" />
-
-  <Target Name="CleanTestAssets"
-          DependsOnTargets="DetermineTestOutputDirectory">
-    <!--
-      Ensure installers (and therefore shared framework projects) are built first. Include used
-      transitive dependencies here in case Subset is defined.
-    -->
-    <MSBuild
-      Projects="
-        @(ManagedProjectToBuild);
-        @(PkgprojProjectToBuild);
-        @(InstallerProjectToBuild)"
-      Targets="Build" />
-
-    <PropertyGroup>
-      <TempFolderRoot>$(IntermediateOutputPath)temp\</TempFolderRoot>
-      <TestTargetRid Condition="'$(TestTargetRid)' == ''">$(_HostRid)</TestTargetRid>
-    </PropertyGroup>
-
-    <ItemGroup>
-      <DirsToClean Include="$(TestDir)\**\bin" />
-      <DirsToClean Include="$(TestDir)\**\obj" />
-      <DirsToClean Include="$(TestStabilizedLegacyPackagesDir)" />
-      <DirsToClean Include="$(TestRestorePackagesPath)" />
-      <DirsToClean Include="$(TempFolderRoot)$(TargetArchitecture)" />
-      <DirsToClean Include="$(InternalNupkgCacheDir)" />
-    </ItemGroup>
-
-    <RemoveDir Directories="@(DirsToClean)" />
-
-    <!-- Directory must exist even if there are no nupkgs inside to use, or NuGet fails. -->
-    <MakeDir Directories="$(TestStabilizedLegacyPackagesDir)" />
-  </Target>
-
-  <Target Name="GenerateTestRestoreSourcesNuGetConfig">
-    <ItemGroup>
-      <RestoreTestSource Include="$(ArtifactsShippingPackagesDir)" Key="artifacts-shipping-packages" />
-      <RestoreTestSource Include="$(ArtifactsNonShippingPackagesDir)" Key="artifacts-nonshipping-packages" />
-      <RestoreTestSource Include="$(LibrariesShippingPackagesDir)" Condition="Exists('$(LibrariesShippingPackagesDir)')" Key="libraries-shipping-packages" />
-      <RestoreTestSource Include="$(LibrariesNonShippingPackagesDir)" Condition="Exists('$(LibrariesNonShippingPackagesDir)')" Key="libraries-nonshipping-packages" />
-    </ItemGroup>
-
-    <ItemGroup Condition="'$(LibrariesPackagesDir)' != '$(LibrariesAllConfigPackagesDir)'">
-      <RestoreTestSource Include="$(LibrariesAllConfigShippingPackagesDir)" Condition="Exists('$(LibrariesAllConfigShippingPackagesDir)')" Key="libraries-allconfig-shipping-packages" />
-      <RestoreTestSource Include="$(LibrariesAllConfigNonShippingPackagesDir)" Condition="Exists('$(LibrariesAllConfigNonShippingPackagesDir)')" Key="libraries-allconfig-nonshipping-packages" />
-    </ItemGroup>
-
-    <ItemGroup>
-      <RestoreTestSource Include="$(TestStabilizedLegacyPackagesDir)" Key="stabilized-legacy-packages" />
-
-      <RestoreTestSource
-        Condition="'$(ContinuousIntegrationBuild)' == 'true'"
-        Include="$(InternalNupkgCacheDir)"
-        Key="internal-nupkg-cache-packages" />
-    </ItemGroup>
-
-    <PropertyGroup>
-      <TemplateNuGetConfigFile>$(RepoRoot)NuGet.config</TemplateNuGetConfigFile>
-
-      <RestoreTestSourceConfigLines>@(RestoreTestSource -> '&lt;add key="%(Key)" value="%(Identity)" /&gt;', '%0A    ')</RestoreTestSourceConfigLines>
-
-      <TestRestoreNuGetConfigContent>$([System.IO.File]::ReadAllText('$(TemplateNuGetConfigFile)').Replace(
-        '&lt;!-- TEST_RESTORE_SOURCES_INSERTION_LINE --&gt;',
-        '$(RestoreTestSourceConfigLines)'))</TestRestoreNuGetConfigContent>
-
-      <!--
-        Remove Azure DevOps feeds from NuGet.Config because they may require authenticated restore,
-        which is too flaky to use in test restore. See also
-        CopyPotentiallyInternalPackagesForTestRestore. https://github.com/dotnet/arcade/issues/3932
-      -->
-      <TestRestoreNuGetConfigContent>$([System.Text.RegularExpressions.Regex]::Replace(
-        '$(TestRestoreNuGetConfigContent)',
-        '&lt;add key=".+" value="https://pkgs.dev.azure.com/dnceng/internal/.+" /&gt;',
-        ''))</TestRestoreNuGetConfigContent>
-    </PropertyGroup>
-
-    <WriteLinesToFile
-      File="$(TestRestoreNuGetConfigFile)"
-      Lines="$(TestRestoreNuGetConfigContent)"
-      Overwrite="true" />
-  </Target>
-
-  <!--
-    Workaround for NuGet flakiness during authenticated restore.
-    https://github.com/dotnet/arcade/issues/3932
-
-    The initial Arcade restore is stable enough to grab internal packages, but running restore
-    against an authenticated feed for the test projects is too flaky. Luckily, the Arcade restore
-    happens to include all the packages the test projects requires, so as a workaround, we can copy
-    all nupkgs from the Arcade restore into a directory to use as a source for the test projects.
-  -->
-  <Target Name="CopyPotentiallyInternalPackagesForTestRestore"
-          Condition="'$(ContinuousIntegrationBuild)' == 'true'">
-    <Message
-      Importance="High"
-      Text="Copying all nupkgs in package cache to a local source for test restore..." />
-
-    <ItemGroup>
-      <PackageCacheNupkgFile Include="$(NuGetPackageRoot)*\*\*.nupkg" />
-    </ItemGroup>
-
-    <Copy SourceFiles="@(PackageCacheNupkgFile)" DestinationFolder="$(InternalNupkgCacheDir)" />
-  </Target>
-
-  <Target Name="RestoreTestAssetProjects">
-    <ItemGroup>
-      <TestAssetProjectToRestore Include="$(TestAssetsDir)**\*.csproj" />
-
-      <AllTestRestoreSources Include="@(RestoreTestSource)"/>
-      <AllTestRestoreSources Include="@(RestoreTestFallbackSource)"/>
-    </ItemGroup>
-
-    <Message Importance="High" Text="Running NuGet Restore for test asset projects..." />
-
-    <MSBuild
-      Projects="@(TestAssetProjectToRestore)"
-      Targets="Restore"
-      Properties="
-        RestoreConfigFile=$(TestRestoreNuGetConfigFile);
-        TestRestorePackagesPath=$(TestRestorePackagesPath);
-        TestTargetRid=$(TestTargetRid);
-        MNAVersion=$(NETCoreAppRuntimePackVersion)" />
-  </Target>
-
-</Project>
diff --git a/src/installer/tests/TestUtils/AnsiColorExtensions.cs b/src/installer/tests/TestUtils/AnsiColorExtensions.cs
deleted file mode 100644
index 97c104516c67..000000000000
--- a/src/installer/tests/TestUtils/AnsiColorExtensions.cs
+++ /dev/null
@@ -1,52 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-namespace Microsoft.DotNet.Cli.Build.Framework
-{
-    public static class AnsiColorExtensions
-    {
-        public static string Black(this string text)
-        {
-            return "\x1B[30m" + text + "\x1B[39m";
-        }
-
-        public static string Red(this string text)
-        {
-            return "\x1B[31m" + text + "\x1B[39m";
-        }
-        public static string Green(this string text)
-        {
-            return "\x1B[32m" + text + "\x1B[39m";
-        }
-
-        public static string Yellow(this string text)
-        {
-            return "\x1B[33m" + text + "\x1B[39m";
-        }
-
-        public static string Blue(this string text)
-        {
-            return "\x1B[34m" + text + "\x1B[39m";
-        }
-
-        public static string Magenta(this string text)
-        {
-            return "\x1B[35m" + text + "\x1B[39m";
-        }
-
-        public static string Cyan(this string text)
-        {
-            return "\x1B[36m" + text + "\x1B[39m";
-        }
-
-        public static string White(this string text)
-        {
-            return "\x1B[37m" + text + "\x1B[39m";
-        }
-
-        public static string Bold(this string text)
-        {
-            return "\x1B[1m" + text + "\x1B[22m";
-        }
-    }
-}
diff --git a/src/installer/tests/TestUtils/AnsiConsole.cs b/src/installer/tests/TestUtils/AnsiConsole.cs
deleted file mode 100644
index 2a8d6f3ad5a7..000000000000
--- a/src/installer/tests/TestUtils/AnsiConsole.cs
+++ /dev/null
@@ -1,145 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-using System.IO;
-
-namespace Microsoft.DotNet.Cli.Build.Framework
-{
-    public class AnsiConsole
-    {
-        private AnsiConsole(TextWriter writer)
-        {
-            Writer = writer;
-    
-            OriginalForegroundColor = Console.ForegroundColor;
-        }
-    
-        private int _boldRecursion;
-    
-        public static AnsiConsole GetOutput()
-        {
-            return new AnsiConsole(Console.Out);
-        }
-    
-        public static AnsiConsole GetError()
-        {
-            return new AnsiConsole(Console.Error);
-        }
-    
-        public TextWriter Writer { get; }
-    
-        public ConsoleColor OriginalForegroundColor { get; }
-    
-        private void SetColor(ConsoleColor color)
-        {
-            const int Light = 0x08;
-            int c = (int)color;
-
-            Console.ForegroundColor = 
-                c < 0 ? color :                                   // unknown, just use it
-                _boldRecursion > 0 ? (ConsoleColor)(c | Light) :  // ensure color is light
-                (ConsoleColor)(c & ~Light);                       // ensure color is dark
-        }
-    
-        private void SetBold(bool bold)
-        {
-            _boldRecursion += bold ? 1 : -1;
-            if (_boldRecursion > 1 || (_boldRecursion == 1 && !bold))
-            {
-                return;
-            }
-            
-            // switches on _boldRecursion to handle boldness
-            SetColor(Console.ForegroundColor);        
-        }
-
-        public void WriteLine(string message)
-        {
-            Write(message);
-            Writer.WriteLine();
-        }
-
-
-        public void Write(string message)
-        {
-            var escapeScan = 0;
-            for (;;)
-            {
-                var escapeIndex = message.IndexOf("\x1b[", escapeScan, StringComparison.Ordinal);
-                if (escapeIndex == -1)
-                {
-                    var text = message.Substring(escapeScan);
-                    Writer.Write(text);
-                    break;
-                }
-                else
-                {
-                    var startIndex = escapeIndex + 2;
-                    var endIndex = startIndex;
-                    while (endIndex != message.Length &&
-                        message[endIndex] >= 0x20 &&
-                        message[endIndex] <= 0x3f)
-                    {
-                        endIndex += 1;
-                    }
-    
-                    var text = message.Substring(escapeScan, escapeIndex - escapeScan);
-                    Writer.Write(text);
-                    if (endIndex == message.Length)
-                    {
-                        break;
-                    }
-    
-                    switch (message[endIndex])
-                    {
-                        case 'm':
-                            int value;
-                            if (int.TryParse(message.Substring(startIndex, endIndex - startIndex), out value))
-                            {
-                                switch (value)
-                                {
-                                    case 1:
-                                        SetBold(true);
-                                        break;
-                                    case 22:
-                                        SetBold(false);
-                                        break;
-                                    case 30:
-                                        SetColor(ConsoleColor.Black);
-                                        break;
-                                    case 31:
-                                        SetColor(ConsoleColor.Red);
-                                        break;
-                                    case 32:
-                                        SetColor(ConsoleColor.Green);
-                                        break;
-                                    case 33:
-                                        SetColor(ConsoleColor.Yellow);
-                                        break;
-                                    case 34:
-                                        SetColor(ConsoleColor.Blue);
-                                        break;
-                                    case 35:
-                                        SetColor(ConsoleColor.Magenta);
-                                        break;
-                                    case 36:
-                                        SetColor(ConsoleColor.Cyan);
-                                        break;
-                                    case 37:
-                                        SetColor(ConsoleColor.Gray);
-                                        break;
-                                    case 39:
-                                        Console.ForegroundColor = OriginalForegroundColor;
-                                        break;
-                                }
-                            }
-                            break;
-                    }
-    
-                    escapeScan = endIndex + 1;
-                }
-            }
-        }
-    }
-}
diff --git a/src/installer/tests/TestUtils/Assertions/CommandResultAssertions.cs b/src/installer/tests/TestUtils/Assertions/CommandResultAssertions.cs
index f312913aff8a..63f369b2a214 100644
--- a/src/installer/tests/TestUtils/Assertions/CommandResultAssertions.cs
+++ b/src/installer/tests/TestUtils/Assertions/CommandResultAssertions.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Linq;
 using System.Text.RegularExpressions;
 using FluentAssertions;
 using FluentAssertions.Execution;
@@ -112,14 +113,14 @@ public AndConstraint<CommandResultAssertions> HaveStdErrMatching(string pattern,
         public AndConstraint<CommandResultAssertions> NotHaveStdOut()
         {
             Execute.Assertion.ForCondition(string.IsNullOrEmpty(Result.StdOut))
-                .FailWith($"Expected command to not output to stdout but it was not:{GetDiagnosticsInfo()}");
+                .FailWith($"Expected command to not output to stdout but it did:{GetDiagnosticsInfo()}");
             return new AndConstraint<CommandResultAssertions>(this);
         }
 
         public AndConstraint<CommandResultAssertions> NotHaveStdErr()
         {
             Execute.Assertion.ForCondition(string.IsNullOrEmpty(Result.StdErr))
-                .FailWith($"Expected command to not output to stderr but it was not:{GetDiagnosticsInfo()}");
+                .FailWith($"Expected command to not output to stderr but it did:{GetDiagnosticsInfo()}");
             return new AndConstraint<CommandResultAssertions>(this);
         }
 
@@ -147,27 +148,17 @@ public AndConstraint<CommandResultAssertions> NotFileContains(string path, strin
         }
 
         public string GetDiagnosticsInfo()
-        {
-            return $"{Environment.NewLine}" +
-                $"File Name: {Result.StartInfo.FileName}{Environment.NewLine}" +
-                $"Arguments: {Result.StartInfo.Arguments}{Environment.NewLine}" +
-                $"Exit Code: {Result.ExitCode}{Environment.NewLine}" +
-                $"StdOut:{Environment.NewLine}{Result.StdOut}{Environment.NewLine}" +
-                $"StdErr:{Environment.NewLine}{Result.StdErr}{Environment.NewLine}";
-        }
-
-        public AndConstraint<CommandResultAssertions> HaveSkippedProjectCompilation(string skippedProject, string frameworkFullName)
-        {
-            Result.StdOut.Should().Contain("Project {0} ({1}) was previously compiled. Skipping compilation.", skippedProject, frameworkFullName);
-
-            return new AndConstraint<CommandResultAssertions>(this);
-        }
-
-        public AndConstraint<CommandResultAssertions> HaveCompiledProject(string compiledProject, string frameworkFullName)
-        {
-            Result.StdOut.Should().Contain($"Project {0} ({1}) will be compiled", compiledProject, frameworkFullName);
-
-            return new AndConstraint<CommandResultAssertions>(this);
-        }
+            => $"""
+
+                File Name: {Result.StartInfo.FileName}
+                Arguments: {Result.StartInfo.Arguments}
+                Environment:
+                {string.Join(Environment.NewLine, Result.StartInfo.Environment.Where(i => i.Key.StartsWith(Constants.DotnetRoot.EnvironmentVariable)).Select(i => $"  {i.Key} = {i.Value}"))}
+                Exit Code: 0x{Result.ExitCode:x}
+                StdOut:
+                {Result.StdOut}
+                StdErr:
+                {Result.StdErr}
+                """;
     }
 }
diff --git a/src/installer/tests/TestUtils/Assertions/DirectoryInfoExtensions.cs b/src/installer/tests/TestUtils/Assertions/DirectoryInfoExtensions.cs
index 16471d542dd4..0f240d6a91bc 100644
--- a/src/installer/tests/TestUtils/Assertions/DirectoryInfoExtensions.cs
+++ b/src/installer/tests/TestUtils/Assertions/DirectoryInfoExtensions.cs
@@ -11,10 +11,5 @@ public static DirectoryInfoAssertions Should(this DirectoryInfo dir)
         {
             return new DirectoryInfoAssertions(dir);
         }
-
-        public static DirectoryInfo Sub(this DirectoryInfo dir, string name)
-        {
-            return new DirectoryInfo(Path.Combine(dir.FullName, name));
-        }
     }
 }
diff --git a/src/installer/tests/TestUtils/BuildFailureException.cs b/src/installer/tests/TestUtils/BuildFailureException.cs
deleted file mode 100644
index f23406cce10a..000000000000
--- a/src/installer/tests/TestUtils/BuildFailureException.cs
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-
-namespace Microsoft.DotNet.Cli.Build.Framework
-{
-    public partial class BuildFailureException : Exception
-    {
-        public BuildFailureException()
-        {
-        }
-
-        public BuildFailureException(string message) : base(message)
-        {
-        }
-
-        public BuildFailureException(string message, Exception innerException) : base(message, innerException)
-        {
-        }
-    }
-}
diff --git a/src/installer/tests/TestUtils/BuildReporter.cs b/src/installer/tests/TestUtils/BuildReporter.cs
deleted file mode 100644
index e87987074ec9..000000000000
--- a/src/installer/tests/TestUtils/BuildReporter.cs
+++ /dev/null
@@ -1,38 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-
-namespace Microsoft.DotNet.Cli.Build.Framework
-{
-    public static class BuildReporter
-    {
-        private const string TimeSpanFormat = @"hh\:mm\:ss\.fff";
-        private static DateTime _initialTime = DateTime.Now;
-
-        public static void BeginSection(string type, string name)
-        {
-            Reporter.Output.WriteLine($"[{type.PadRight(10)} >]".Green() + $" [....] [{(DateTime.Now - _initialTime).ToString(TimeSpanFormat)}]".Blue() + $" {name}");
-        }
-
-        public static void SectionComment(string type, string comment)
-        {
-            Reporter.Output.WriteLine($"[{type.PadRight(10)} -]".Green() + $" [....] [{(DateTime.Now - _initialTime).ToString(TimeSpanFormat)}]".Blue() + $" {comment}");
-        }
-
-        public static void EndSection(string type, string name, bool success)
-        {
-            var header = $"[{type.PadRight(10)} <]";
-            if (success)
-            {
-                header = header.Green();
-            }
-            else
-            {
-                header = header.Red();
-            }
-            var successString = success ? " OK " : "FAIL";
-            Reporter.Output.WriteLine(header + $" [{successString}] [{(DateTime.Now - _initialTime).ToString(TimeSpanFormat)}]".Blue() + $" {name}");
-        }
-    }
-}
diff --git a/src/installer/tests/TestUtils/Command.cs b/src/installer/tests/TestUtils/Command.cs
index 2624e60e0487..1f403c4c4000 100644
--- a/src/installer/tests/TestUtils/Command.cs
+++ b/src/installer/tests/TestUtils/Command.cs
@@ -6,9 +6,7 @@
 using System.ComponentModel;
 using System.Diagnostics;
 using System.IO;
-using System.Linq;
 using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
 using System.Threading;
 
 namespace Microsoft.DotNet.Cli.Build.Framework
@@ -315,33 +313,32 @@ private string FormatProcessInfo(ProcessStartInfo info, bool includeWorkingDirec
             return prefix + " " + info.Arguments;
         }
 
+        private static DateTime _initialTime = DateTime.Now;
+
+        private string GetFormattedTime()
+        {
+            const string TimeSpanFormat = @"hh\:mm\:ss\.fff";
+            return (DateTime.Now - _initialTime).ToString(TimeSpanFormat);
+        }
+
         private void ReportExecBegin()
         {
-            BuildReporter.BeginSection("EXEC", FormatProcessInfo(Process.StartInfo, includeWorkingDirectory: false));
+            string message = FormatProcessInfo(Process.StartInfo, includeWorkingDirectory: false);
+            Console.WriteLine($"[EXEC >] [....] [{GetFormattedTime()}] {message}");
         }
 
         private void ReportExecWaitOnExit()
         {
-            BuildReporter.SectionComment("EXEC", $"Waiting for process {Process.Id} to exit...");
+            string message = $"Waiting for process {Process.Id} to exit...";
+            Console.WriteLine($"[EXEC -] [....] [{GetFormattedTime()}] {message}");
         }
 
         private void ReportExecEnd(int exitCode, bool fExpectedToFail)
         {
-            bool success = exitCode == 0;
-            string msgExpectedToFail = "";
-
-            if (fExpectedToFail)
-            {
-                success = !success;
-                msgExpectedToFail = "failed as expected and ";
-            }
-
-            var message = $"{FormatProcessInfo(Process.StartInfo, includeWorkingDirectory: !success)} {msgExpectedToFail}exited with {exitCode}";
-
-            BuildReporter.EndSection(
-                "EXEC",
-                success ? message.Green() : message.Red().Bold(),
-                success);
+            bool success = fExpectedToFail ? exitCode != 0 : exitCode == 0;
+            var status = success ? " OK " : "FAIL";
+            var message = $"{FormatProcessInfo(Process.StartInfo, includeWorkingDirectory: !success)} exited with {exitCode}. Expected: {(fExpectedToFail ? "non-zero" : "0")}";
+            Console.WriteLine($"[EXEC <] [{status}] [{GetFormattedTime()}] {message}");
         }
 
         private void ThrowIfRunning([CallerMemberName] string memberName = null)
diff --git a/src/installer/tests/TestUtils/CommandExtensions.cs b/src/installer/tests/TestUtils/CommandExtensions.cs
index cfa42d1fab4a..1adf97e7b7b6 100644
--- a/src/installer/tests/TestUtils/CommandExtensions.cs
+++ b/src/installer/tests/TestUtils/CommandExtensions.cs
@@ -17,7 +17,7 @@ public static Command EnableHostTracing(this Command command)
 
         public static Command EnableHostTracingToFile(this Command command, out string filePath)
         {
-            filePath = Path.Combine(TestArtifact.TestArtifactsPath, "trace" + Guid.NewGuid().ToString() + ".log");
+            filePath = Path.Combine(TestContext.TestArtifactsPath, "trace" + Guid.NewGuid().ToString() + ".log");
             if (File.Exists(filePath))
             {
                 File.Delete(filePath);
diff --git a/src/installer/tests/TestUtils/CommandResult.cs b/src/installer/tests/TestUtils/CommandResult.cs
index de7961be7043..36af034a3cb5 100644
--- a/src/installer/tests/TestUtils/CommandResult.cs
+++ b/src/installer/tests/TestUtils/CommandResult.cs
@@ -9,8 +9,6 @@ namespace Microsoft.DotNet.Cli.Build.Framework
 {
     public struct CommandResult
     {
-        public static readonly CommandResult Empty = new CommandResult();
-
         public ProcessStartInfo StartInfo { get; }
         public int ExitCode { get; }
         public string StdOut { get; }
@@ -23,28 +21,5 @@ public CommandResult(ProcessStartInfo startInfo, int exitCode, string stdOut, st
             StdOut = stdOut;
             StdErr = stdErr;
         }
-
-        public void EnsureSuccessful(bool suppressOutput = false)
-        {
-            if (ExitCode != 0)
-            {
-                StringBuilder message = new StringBuilder($"Command failed with exit code {ExitCode}: {StartInfo.FileName} {StartInfo.Arguments}");
-
-                if (!suppressOutput)
-                {
-                    if (!string.IsNullOrEmpty(StdOut))
-                    {
-                        message.AppendLine($"{Environment.NewLine}Standard Output:{Environment.NewLine}{StdOut}");
-                    }
-
-                    if (!string.IsNullOrEmpty(StdErr))
-                    {
-                        message.AppendLine($"{Environment.NewLine}Standard Error:{Environment.NewLine}{StdErr}");
-                    }
-                }
-
-                throw new BuildFailureException(message.ToString());
-            }
-        }
     }
 }
diff --git a/src/installer/tests/TestUtils/Constants.cs b/src/installer/tests/TestUtils/Constants.cs
index 61363f2eecc8..2a5ce2f53f55 100644
--- a/src/installer/tests/TestUtils/Constants.cs
+++ b/src/installer/tests/TestUtils/Constants.cs
@@ -113,6 +113,7 @@ public static class DotnetRoot
 
         public static class ErrorCode
         {
+            public const int Success = 0;
             public const int InvalidArgFailure = unchecked((int)0x80008081);
             public const int CoreHostLibMissingFailure = unchecked((int)0x80008083);
             public const int ResolverInitFailure = unchecked((int)0x8000808b);
diff --git a/src/installer/tests/TestUtils/DotNetBuilder.cs b/src/installer/tests/TestUtils/DotNetBuilder.cs
index a4a90d64d5ba..2ff602fd4b4f 100644
--- a/src/installer/tests/TestUtils/DotNetBuilder.cs
+++ b/src/installer/tests/TestUtils/DotNetBuilder.cs
@@ -33,9 +33,11 @@ public DotNetBuilder(string basePath, string builtDotnet, string name)
                 true);
 
             // ./host/fxr/<version>/hostfxr.dll - this is the component being tested
-            SharedFramework.CopyDirectory(
-                builtDotNetCli.GreatestVersionHostFxrPath,
-                Path.Combine(_path, "host", "fxr", Path.GetFileName(builtDotNetCli.GreatestVersionHostFxrPath)));
+            string hostfxrDir = Path.Combine(_path, "host", "fxr", Path.GetFileName(builtDotNetCli.GreatestVersionHostFxrPath));
+            Directory.CreateDirectory(hostfxrDir);
+            File.Copy(
+                builtDotNetCli.GreatestVersionHostFxrFilePath,
+                Path.Combine(hostfxrDir, Binaries.HostFxr.FileName));
         }
 
         /// <summary>
@@ -118,7 +120,7 @@ public DotNetBuilder AddMicrosoftNETCoreAppFrameworkMockCoreClr(string version,
             // ./shared/Microsoft.NETCore.App/<version> - create a mock of the root framework
             string netCoreAppPath = AddFramework(Constants.MicrosoftNETCoreApp, version);
 
-            string currentRid = TestContext.TargetRID;
+            string currentRid = TestContext.BuildRID;
 
             NetCoreAppBuilder.ForNETCoreApp(Constants.MicrosoftNETCoreApp, currentRid)
                 .WithStandardRuntimeFallbacks()
diff --git a/src/installer/tests/TestUtils/DotNetCli.cs b/src/installer/tests/TestUtils/DotNetCli.cs
index ff90b47e7a8a..385f596dc347 100644
--- a/src/installer/tests/TestUtils/DotNetCli.cs
+++ b/src/installer/tests/TestUtils/DotNetCli.cs
@@ -47,15 +47,7 @@ public Command Exec(string command, params string[] args)
 
             return Command.Create(DotnetExecutablePath, newArgs)
                 .EnvironmentVariable("DOTNET_SKIP_FIRST_TIME_EXPERIENCE", "1")
-                .EnvironmentVariable("DOTNET_MULTILEVEL_LOOKUP", "0"); // Avoid looking at machine state by default
+                .MultilevelLookup(false); // Avoid looking at machine state by default
         }
-
-        public Command Restore(params string[] args) => Exec("restore", args);
-        public Command Build(params string[] args) => Exec("build", args);
-        public Command Pack(params string[] args) => Exec("pack", args);
-        public Command Test(params string[] args) => Exec("test", args);
-        public Command Publish(params string[] args) => Exec("publish", args);
-
-        public Command Store(params string[] args) => Exec("store", args);
     }
 }
diff --git a/src/installer/tests/TestUtils/RepoDirectoriesProvider.cs b/src/installer/tests/TestUtils/RepoDirectoriesProvider.cs
index c0d7fd755b86..d5db123099ef 100644
--- a/src/installer/tests/TestUtils/RepoDirectoriesProvider.cs
+++ b/src/installer/tests/TestUtils/RepoDirectoriesProvider.cs
@@ -1,9 +1,5 @@
 using System;
-using System.Collections;
-using System.Collections.Generic;
-using System.Collections.Immutable;
 using System.IO;
-using System.Linq;
 
 namespace Microsoft.DotNet.CoreSetup.Test
 {
@@ -11,21 +7,12 @@ public sealed class RepoDirectoriesProvider
     {
         public static readonly RepoDirectoriesProvider Default = new RepoDirectoriesProvider();
 
-        // Values from test context can be overridden in constructor
-        public string BuiltDotnet { get; }
-
         // Paths computed by looking for the repo root
         public string BaseArtifactsFolder { get; }
         public string HostArtifacts { get; }
         public string HostTestArtifacts { get; }
 
-        // Paths used for building/publishing projects
-        public string TestAssetsFolder { get; }
-        public string NugetPackages { get; }
-        public string DotnetSDK { get; }
-
-        public RepoDirectoriesProvider(
-            string builtDotnet = null)
+        private RepoDirectoriesProvider()
         {
             string repoRoot = GetRepoRootDirectory();
             BaseArtifactsFolder = Path.Combine(repoRoot, "artifacts");
@@ -34,17 +21,6 @@ public RepoDirectoriesProvider(
             string artifacts = Path.Combine(BaseArtifactsFolder, "bin", osPlatformConfig);
             HostArtifacts = Path.Combine(artifacts, "corehost");
             HostTestArtifacts = Path.Combine(artifacts, "corehost_test");
-
-            TestAssetsFolder = TestContext.GetTestContextVariable("TEST_ASSETS");
-            DotnetSDK = TestContext.GetTestContextVariable("DOTNET_SDK_PATH");
-            if (!Directory.Exists(DotnetSDK))
-            {
-                throw new InvalidOperationException("ERROR: Test SDK folder not found.");
-            }
-
-            NugetPackages = TestContext.GetTestContextVariable("NUGET_PACKAGES");
-
-            BuiltDotnet = builtDotnet ?? TestContext.BuiltDotNet.BinPath;
         }
 
         private static string GetRepoRootDirectory()
diff --git a/src/installer/tests/TestUtils/Reporter.cs b/src/installer/tests/TestUtils/Reporter.cs
deleted file mode 100644
index c32353027b6a..000000000000
--- a/src/installer/tests/TestUtils/Reporter.cs
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-namespace Microsoft.DotNet.Cli.Build.Framework
-{
-    // Stupid-simple console manager
-    internal class Reporter
-    {
-        private static readonly Reporter Null = new Reporter(console: null);
-        private static object _lock = new object();
-
-        private readonly AnsiConsole _console;
-
-        private Reporter(AnsiConsole console)
-        {
-            _console = console;
-        }
-
-        public static Reporter Output { get; } = new Reporter(AnsiConsole.GetOutput());
-        public static Reporter Error { get; } = new Reporter(AnsiConsole.GetOutput());
-        public static Reporter Verbose { get; } = new Reporter(AnsiConsole.GetOutput());
-
-        public void WriteLine(string message)
-        {
-            lock (_lock)
-            {
-                _console?.WriteLine(message);
-            }
-        }
-
-        public void WriteLine()
-        {
-            lock (_lock)
-            {
-                _console?.Writer?.WriteLine();
-            }
-        }
-
-        public void Write(string message)
-        {
-            lock (_lock)
-            {
-                _console?.Writer?.Write(message);
-            }
-        }
-        
-        public void WriteBanner(string content)
-        {
-            string border = new string('*', content.Length + 6);
-            WriteLine($@"{border}
-*  {content}  *
-{border}".Green());
-        }
-    }
-}
diff --git a/src/installer/tests/TestUtils/SharedFramework.cs b/src/installer/tests/TestUtils/SharedFramework.cs
deleted file mode 100644
index 53b8ee3f963d..000000000000
--- a/src/installer/tests/TestUtils/SharedFramework.cs
+++ /dev/null
@@ -1,117 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.IO;
-using System.Linq;
-using System.Text.Json.Nodes;
-using System.Threading;
-
-namespace Microsoft.DotNet.CoreSetup.Test
-{
-    /// <summary>
-    /// Helper class for creating, modifying and cleaning up shared frameworks
-    /// </summary>
-    public static class SharedFramework
-    {
-        private static readonly Mutex id_mutex = new Mutex();
-
-        // Locate the first non-existent directory of the form <basePath>-<count>
-        public static string CalculateUniqueTestDirectory(string basePath)
-        {
-            id_mutex.WaitOne();
-
-            int count = 0;
-            string dir;
-
-            do
-            {
-                dir = $"{basePath}-{count}";
-                count++;
-            } while (Directory.Exists(dir));
-
-            id_mutex.ReleaseMutex();
-
-            return dir;
-        }
-
-        // CopyDirectory recursively copies a directory
-        // Remarks:
-        // - If the dest dir does not exist, then it is created.
-        // - If the dest dir exists, then it is substituted with the new one
-        //   (original files and subfolders are deleted).
-        // - If the src dir does not exist, then a DirectoryNotFoundException
-        //   is thrown.
-        public static void CopyDirectory(string srcDir, string dstDir)
-        {
-            DirectoryInfo srcDirInfo = new DirectoryInfo(srcDir);
-
-            if (!srcDirInfo.Exists)
-            {
-                throw new DirectoryNotFoundException();
-            }
-
-            DirectoryInfo dstDirInfo = new DirectoryInfo(dstDir);
-
-            if (dstDirInfo.Exists)
-            {
-                dstDirInfo.Delete(true);
-            }
-
-            dstDirInfo.Create();
-
-            foreach (FileInfo fileInfo in srcDirInfo.GetFiles())
-            {
-                string newFile = Path.Combine(dstDir, fileInfo.Name);
-                fileInfo.CopyTo(newFile);
-            }
-
-            foreach (DirectoryInfo subdirInfo in srcDirInfo.GetDirectories())
-            {
-                string newDir = Path.Combine(dstDir, subdirInfo.Name);
-                CopyDirectory(subdirInfo.FullName, newDir);
-            }
-        }
-
-        public static void AddReferenceToDepsJson(
-            string jsonFile,
-            string fxNameWithVersion,
-            string testPackage,
-            string testPackageVersion,
-            JsonObject testAssemblyVersionInfo = null,
-            string testAssembly = null)
-        {
-            JsonObject depsjson = (JsonObject)JsonObject.Parse(File.ReadAllText(jsonFile));
-
-            string testPackageWithVersion = testPackage + "/" + testPackageVersion;
-            testAssembly = testAssembly ?? (testPackage + ".dll");
-
-            JsonObject targetsValue = (JsonObject)depsjson["targets"].AsObject().First().Value;
-
-            JsonObject packageDependencies = (JsonObject)targetsValue[fxNameWithVersion]["dependencies"];
-            packageDependencies.Add(testPackage, (JsonNode)testPackageVersion);
-
-            if (testAssemblyVersionInfo == null)
-            {
-                testAssemblyVersionInfo = new JsonObject();
-            }
-
-            targetsValue.Add(testPackageWithVersion, new JsonObject
-            {
-                ["runtime"] = new JsonObject
-                {
-                    [testAssembly] = testAssemblyVersionInfo
-                }
-            });
-
-            JsonObject libraries = (JsonObject)depsjson["libraries"];
-            libraries.Add(testPackageWithVersion, new JsonObject
-            {
-                ["type"] = "assemblyreference",
-                ["serviceable"] = false,
-                ["sha512"] = ""
-            });
-
-            File.WriteAllText(jsonFile, depsjson.ToString());
-        }
-    }
-}
diff --git a/src/installer/tests/TestUtils/SingleFileTestApp.cs b/src/installer/tests/TestUtils/SingleFileTestApp.cs
index ce1008f97219..bc65e2f6a89f 100644
--- a/src/installer/tests/TestUtils/SingleFileTestApp.cs
+++ b/src/installer/tests/TestUtils/SingleFileTestApp.cs
@@ -80,7 +80,7 @@ public string Bundle(BundleOptions options = BundleOptions.None, Version? bundle
 
         public string Bundle(BundleOptions options, out Manifest manifest, Version? bundleVersion = null)
         {
-            string bundleDirectory = SharedFramework.CalculateUniqueTestDirectory(Path.Combine(Location, "bundle"));
+            string bundleDirectory = GetUniqueSubdirectory("bundle");
             var bundler = new Bundler(
                 Binaries.GetExeFileNameForCurrentPlatform(AppName),
                 bundleDirectory,
@@ -124,7 +124,7 @@ public string Bundle(BundleOptions options, out Manifest manifest, Version? bund
 
         public string GetNewExtractionRootPath()
         {
-            return SharedFramework.CalculateUniqueTestDirectory(Path.Combine(Location, "extract"));
+            return GetUniqueSubdirectory("extract");
         }
 
         public DirectoryInfo GetExtractionDir(string root, Manifest manifest)
@@ -144,7 +144,7 @@ private void PopulateBuiltAppDirectory()
             File.Delete(builtApp.DepsJson);
 
             var shortVersion = TestContext.Tfm[3..]; // trim "net" from beginning
-            var builder = NetCoreAppBuilder.ForNETCoreApp(AppName, TestContext.TargetRID, shortVersion);
+            var builder = NetCoreAppBuilder.ForNETCoreApp(AppName, TestContext.BuildRID, shortVersion);
 
             // Update the .runtimeconfig.json
             builder.WithRuntimeConfig(c =>
@@ -164,7 +164,7 @@ private void PopulateBuiltAppDirectory()
                     .WithAsset(Path.GetFileName(builtApp.AppDll), f => f.NotOnDisk())));
             if (selfContained)
             {
-                builder.WithRuntimePack($"{Constants.MicrosoftNETCoreApp}.Runtime.{TestContext.TargetRID}", TestContext.MicrosoftNETCoreAppVersion, l => l
+                builder.WithRuntimePack($"{Constants.MicrosoftNETCoreApp}.Runtime.{TestContext.BuildRID}", TestContext.MicrosoftNETCoreAppVersion, l => l
                     .WithAssemblyGroup(string.Empty, g =>
                     {
                         foreach (var file in Binaries.GetRuntimeFiles().Assemblies)
diff --git a/src/installer/tests/TestUtils/TestApp.cs b/src/installer/tests/TestUtils/TestApp.cs
index 3bd1dc88a3a4..cca9ef4c3740 100644
--- a/src/installer/tests/TestUtils/TestApp.cs
+++ b/src/installer/tests/TestUtils/TestApp.cs
@@ -2,14 +2,9 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
-using System.Collections.Generic;
 using System.Diagnostics;
-using System.Globalization;
 using System.IO;
-using System.Linq;
-using System.Reflection;
-using System.Reflection.Metadata;
-using Microsoft.DotNet.Cli.Build;
+
 using Microsoft.NET.HostModel.AppHost;
 
 namespace Microsoft.DotNet.CoreSetup.Test
@@ -21,9 +16,7 @@ public class TestApp : TestArtifact
         public string DepsJson { get; private set; }
         public string RuntimeConfigJson { get; private set; }
         public string RuntimeDevConfigJson { get; private set; }
-        public string HostPolicyDll { get; private set; }
         public string HostFxrDll { get; private set; }
-        public string CoreClrDll { get; private set; }
 
         public string AssemblyName { get; }
 
@@ -90,7 +83,7 @@ public void CreateAppHost(bool isWindowsGui = false, bool copyResources = true)
         public void CreateSingleFileHost(bool isWindowsGui = false, bool copyResources = true)
             => CreateAppHost(Binaries.SingleFileHost.FilePath, isWindowsGui, copyResources);
 
-        public void CreateAppHost(string hostSourcePath, bool isWindowsGui = false, bool copyResources = true)
+        private void CreateAppHost(string hostSourcePath, bool isWindowsGui = false, bool copyResources = true)
         {
             // Use the live-built apphost and HostModel to create the apphost to run
             HostWriter.CreateAppHost(
@@ -110,7 +103,7 @@ public enum MockedComponent
 
         public void PopulateSelfContained(MockedComponent mock, Action<NetCoreAppBuilder> customizer = null)
         {
-            var builder = NetCoreAppBuilder.ForNETCoreApp(Name, TestContext.TargetRID);
+            var builder = NetCoreAppBuilder.ForNETCoreApp(Name, TestContext.BuildRID);
 
             // Update the .runtimeconfig.json - add included framework and remove any existing NETCoreApp framework
             builder.WithRuntimeConfig(c =>
@@ -121,7 +114,7 @@ public void PopulateSelfContained(MockedComponent mock, Action<NetCoreAppBuilder
             builder.WithProject(p => p.WithAssemblyGroup(null, g => g.WithMainAssembly()));
 
             // Add runtime libraries and assets
-            builder.WithRuntimePack($"{Constants.MicrosoftNETCoreApp}.Runtime.{TestContext.TargetRID}", TestContext.MicrosoftNETCoreAppVersion, l =>
+            builder.WithRuntimePack($"{Constants.MicrosoftNETCoreApp}.Runtime.{TestContext.BuildRID}", TestContext.MicrosoftNETCoreAppVersion, l =>
             {
                 if (mock == MockedComponent.None)
                 {
@@ -194,9 +187,7 @@ private void LoadAssets()
             DepsJson = Path.Combine(Location, $"{AssemblyName}.deps.json");
             RuntimeConfigJson = Path.Combine(Location, $"{AssemblyName}.runtimeconfig.json");
             RuntimeDevConfigJson = Path.Combine(Location, $"{AssemblyName}.runtimeconfig.dev.json");
-            HostPolicyDll = Path.Combine(Location, Binaries.HostPolicy.FileName);
             HostFxrDll = Path.Combine(Location, Binaries.HostFxr.FileName);
-            CoreClrDll = Path.Combine(Location, Binaries.CoreClr.FileName);
         }
     }
 }
diff --git a/src/installer/tests/TestUtils/TestArtifact.cs b/src/installer/tests/TestUtils/TestArtifact.cs
index b22630934267..f11ee4db747f 100644
--- a/src/installer/tests/TestUtils/TestArtifact.cs
+++ b/src/installer/tests/TestUtils/TestArtifact.cs
@@ -7,16 +7,16 @@
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.IO;
+using System.Threading;
 
 namespace Microsoft.DotNet.CoreSetup.Test
 {
     public class TestArtifact : IDisposable
     {
         private static readonly Lazy<bool> _preserveTestRuns = new Lazy<bool>(() =>
-            TestContext.GetTestContextVariableOrNull("PRESERVE_TEST_RUNS") == "1");
+            Environment.GetEnvironmentVariable("PRESERVE_TEST_RUNS") == "1");
 
         public static bool PreserveTestRuns() => _preserveTestRuns.Value;
-        public static string TestArtifactsPath => TestContext.TestArtifactsPath;
 
         public string Location { get; }
         public string Name { get; }
@@ -24,6 +24,7 @@ public class TestArtifact : IDisposable
         protected string DirectoryToDelete { get; init; }
 
         private readonly List<TestArtifact> _copies = new List<TestArtifact>();
+        private readonly Mutex _subdirMutex = new Mutex();
 
         public TestArtifact(string location)
         {
@@ -42,6 +43,11 @@ protected TestArtifact(TestArtifact source)
             source._copies.Add(this);
         }
 
+        /// <summary>
+        /// Create a new test artifact.
+        /// </summary>
+        /// <param name="name">Name of the test artifact</param>
+        /// <returns>Test artifact containing no files</returns>
         public static TestArtifact Create(string name)
         {
             var (location, parentPath) = GetNewTestArtifactPath(name);
@@ -51,9 +57,37 @@ public static TestArtifact Create(string name)
             };
         }
 
-        protected void RegisterCopy(TestArtifact artifact)
+        /// <summary>
+        /// Create a new test artifact populated with a copy of <paramref name="sourceDirectory"/>.
+        /// </summary>
+        /// <param name="name">Name of the test artifact</param>
+        /// <param name="sourceDirectory">Source directory to copy</param>
+        /// <returns>Test artifact containing a copy of <paramref name="sourceDirectory"/></returns>
+        public static TestArtifact CreateFromCopy(string name, string sourceDirectory)
         {
-            _copies.Add(artifact);
+            var artifact = Create(name);
+            CopyRecursive(sourceDirectory, artifact.Location, overwrite: true);
+            return artifact;
+        }
+
+        /// <summary>
+        /// Locate the first non-existent subdirectory of the form <name>-<count>
+        /// </summary>
+        /// <param name="name">Name of the directory</param>
+        /// <returns>Path to the created directory</returns>
+        public string GetUniqueSubdirectory(string name)
+        {
+            _subdirMutex.WaitOne();
+            int count = 0;
+            string dir;
+            do
+            {
+                dir = Path.Combine(Location, $"{name}-{count}");
+                count++;
+            } while (Directory.Exists(dir));
+
+            _subdirMutex.ReleaseMutex();
+            return dir;
         }
 
         public virtual void Dispose()
@@ -86,7 +120,7 @@ protected static (string, string) GetNewTestArtifactPath(string artifactName)
             Exception? lastException = null;
             for (int i = 0; i < 10; i++)
             {
-                var parentPath = Path.Combine(TestArtifactsPath, Path.GetRandomFileName());
+                var parentPath = Path.Combine(TestContext.TestArtifactsPath, Path.GetRandomFileName());
                 // Create a lock file next to the target folder
                 var lockPath = parentPath + ".lock";
                 var artifactPath = Path.Combine(parentPath, artifactName);
diff --git a/src/installer/tests/TestUtils/TestContext.cs b/src/installer/tests/TestUtils/TestContext.cs
index e716ba9c3117..4c2f7994c989 100644
--- a/src/installer/tests/TestUtils/TestContext.cs
+++ b/src/installer/tests/TestUtils/TestContext.cs
@@ -10,7 +10,6 @@ public sealed class TestContext
         public static string BuildArchitecture { get; }
         public static string BuildRID { get; }
         public static string Configuration { get; }
-        public static string TargetRID { get; }
 
         public static string MicrosoftNETCoreAppVersion { get; }
         public static string Tfm { get; }
@@ -36,36 +35,25 @@ static TestContext()
                     StringComparer.OrdinalIgnoreCase);
 
             BuildArchitecture = GetTestContextVariable("BUILD_ARCHITECTURE");
-            BuildRID = GetTestContextVariable("BUILDRID");
+            BuildRID = GetTestContextVariable("BUILD_RID");
             Configuration = GetTestContextVariable("BUILD_CONFIGURATION");
-            TargetRID = GetTestContextVariable("TEST_TARGETRID");
 
             MicrosoftNETCoreAppVersion = GetTestContextVariable("MNA_VERSION");
             Tfm = GetTestContextVariable("MNA_TFM");
 
             TestAssetsOutput = GetTestContextVariable("TEST_ASSETS_OUTPUT");
             TestArtifactsPath = GetTestContextVariable("TEST_ARTIFACTS");
+            Directory.CreateDirectory(TestArtifactsPath);
 
-            BuiltDotNet = new DotNetCli(Path.Combine(TestArtifactsPath, "sharedFrameworkPublish"));
+            BuiltDotNet = new DotNetCli(Path.Combine(TestAssetsOutput, "sharedFrameworkPublish"));
         }
 
         public static string GetTestContextVariable(string name)
-        {
-            return GetTestContextVariableOrNull(name) ?? throw new ArgumentException(
-                $"Unable to find variable '{name}' in test context variable file '{_testContextVariableFilePath}'");
-        }
-
-        public static string GetTestContextVariableOrNull(string name)
         {
             // Allow env var override, although normally the test context variables file is used.
-            // Don't accept NUGET_PACKAGES env override specifically: Arcade sets this and it leaks
-            // in during build.cmd/sh runs, replacing the test-specific dir.
-            if (!name.Equals("NUGET_PACKAGES", StringComparison.OrdinalIgnoreCase))
+            if (Environment.GetEnvironmentVariable(name) is string envValue)
             {
-                if (Environment.GetEnvironmentVariable(name) is string envValue)
-                {
-                    return envValue;
-                }
+                return envValue;
             }
 
             if (_testContextVariables.TryGetValue(name, out string value))
@@ -73,7 +61,7 @@ public static string GetTestContextVariableOrNull(string name)
                 return value;
             }
 
-            return null;
+            throw new ArgumentException($"Unable to find variable '{name}' in test context variable file '{_testContextVariableFilePath}'");
         }
     }
 }
diff --git a/src/installer/tests/TestUtils/TestProject.cs b/src/installer/tests/TestUtils/TestProject.cs
deleted file mode 100644
index 967c58e69d97..000000000000
--- a/src/installer/tests/TestUtils/TestProject.cs
+++ /dev/null
@@ -1,76 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.IO;
-
-namespace Microsoft.DotNet.CoreSetup.Test
-{
-    public class TestProject : TestArtifact
-    {
-        public string ProjectDirectory { get => Location; }
-        public string ProjectName { get => Name; }
-
-        public string AssemblyName { get; private set; }
-        public string OutputDirectory { get; set; }
-        public string ProjectFile { get; private set; }
-        public string ProjectAssetsJson { get; private set; }
-        public string RuntimeConfigJson { get => BuiltApp?.RuntimeConfigJson; }
-        public string RuntimeDevConfigJson { get => BuiltApp?.RuntimeDevConfigJson; }
-        public string DepsJson { get => BuiltApp?.DepsJson; }
-        public string AppDll { get => BuiltApp?.AppDll; }
-        public string AppExe { get => BuiltApp?.AppExe; }
-        public string HostPolicyDll { get => BuiltApp?.HostPolicyDll; }
-        public string HostFxrDll { get => BuiltApp?.HostFxrDll; }
-        public string CoreClrDll { get => BuiltApp?.CoreClrDll; }
-
-        public TestApp BuiltApp { get; private set; }
-
-        public TestProject(
-            string projectDirectory,
-            string outputDirectory = null,
-            string assemblyName = null)
-            : base(projectDirectory)
-        {
-            Initialize(outputDirectory, assemblyName);
-        }
-
-        public TestProject(TestProject source)
-            : base(source)
-        {
-            Initialize(null, source.AssemblyName);
-        }
-
-        public TestProject Copy()
-        {
-            return new TestProject(this);
-        }
-
-        private void Initialize(string outputDirectory, string assemblyName)
-        {
-            AssemblyName = assemblyName ?? ProjectName;
-            ProjectFile = Path.Combine(ProjectDirectory, $"{ProjectName}.csproj");
-            ProjectAssetsJson = Path.Combine(ProjectDirectory, "obj", "project.assets.json");
-
-            OutputDirectory = outputDirectory ?? Path.Combine(ProjectDirectory, "bin");
-            if (Directory.Exists(OutputDirectory))
-            {
-                LoadOutputFiles();
-            }
-        }
-
-        public void LoadOutputFiles()
-        {
-            BuiltApp = new TestApp(OutputDirectory, AssemblyName);
-        }
-
-        public bool IsRestored()
-        {
-            if (string.IsNullOrEmpty(ProjectAssetsJson))
-            {
-                return false;
-            }
-
-            return File.Exists(ProjectAssetsJson);
-        }
-    }
-}
diff --git a/src/installer/tests/TestUtils/TestProjectFixture.cs b/src/installer/tests/TestUtils/TestProjectFixture.cs
deleted file mode 100644
index 8f40b301a38c..000000000000
--- a/src/installer/tests/TestUtils/TestProjectFixture.cs
+++ /dev/null
@@ -1,408 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using Microsoft.DotNet.Cli.Build;
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Threading;
-
-namespace Microsoft.DotNet.CoreSetup.Test
-{
-    /*
-     * TestProjectFixture is an abstraction around a TestProject which manages
-     * setup of the TestProject, copying test projects for perf on build/restore,
-     * and building/publishing/restoring test projects where necessary.
-     */
-    public class TestProjectFixture : IDisposable
-    {
-        private string _assemblyName;
-        private TestProject _sourceTestProject;
-
-        public DotNetCli SdkDotnet { get; }
-        public DotNetCli BuiltDotnet { get; }
-        public TestProject TestProject { get; private set; }
-
-        public string CurrentRid { get; private set; }
-        public string Framework { get; private set; }
-        public RepoDirectoriesProvider RepoDirProvider { get; }
-
-        public TestProjectFixture(
-            string testProjectName,
-            RepoDirectoriesProvider repoDirectoriesProvider,
-            string framework = null,
-            string assemblyName = null)
-        {
-            ValidateRequiredDirectories(repoDirectoriesProvider);
-
-            RepoDirProvider = repoDirectoriesProvider;
-
-            Framework = framework ?? TestContext.Tfm;
-
-            SdkDotnet = new DotNetCli(repoDirectoriesProvider.DotnetSDK);
-            CurrentRid = TestContext.TargetRID;
-
-            BuiltDotnet = new DotNetCli(repoDirectoriesProvider.BuiltDotnet);
-
-            _assemblyName = assemblyName;
-
-            var sourceTestProjectPath = Path.Combine(repoDirectoriesProvider.TestAssetsFolder, "TestProjects", testProjectName);
-            _sourceTestProject = new TestProject(
-                sourceTestProjectPath,
-                assemblyName: _assemblyName);
-
-            TestProject = CopyTestProject(_sourceTestProject);
-        }
-
-        public TestProjectFixture(TestProjectFixture fixtureToCopy)
-        {
-            RepoDirProvider = fixtureToCopy.RepoDirProvider;
-            SdkDotnet = fixtureToCopy.SdkDotnet;
-            CurrentRid = fixtureToCopy.CurrentRid;
-            BuiltDotnet = fixtureToCopy.BuiltDotnet;
-            _sourceTestProject = fixtureToCopy._sourceTestProject;
-            Framework = fixtureToCopy.Framework;
-            _assemblyName = fixtureToCopy._assemblyName;
-
-            TestProject = CopyTestProject(fixtureToCopy.TestProject);
-        }
-
-        public void Dispose()
-        {
-            if (TestProject != null)
-            {
-                TestProject.Dispose();
-                TestProject = null;
-            }
-        }
-
-        private readonly static object s_buildFilesLock = new object();
-
-        private TestProject CopyTestProject(TestProject sourceTestProject)
-        {
-            lock (s_buildFilesLock)
-            {
-                // Prevent in-process race condition since the TestArtifactsPath is shared by the current
-                // assembly
-                EnsureDirectoryBuildFiles(RepoDirProvider.TestAssetsFolder, TestArtifact.TestArtifactsPath);
-            }
-
-            return sourceTestProject.Copy();
-
-            static void EnsureDirectoryBuildFiles(string testAssetsFolder, string testArtifactDirectory)
-            {
-                Directory.CreateDirectory(testArtifactDirectory);
-
-                // write an empty Directory.Build.* file to ensure that msbuild doesn't pick up
-                // the repo's root Directory.Build.*.
-                EnsureTestProjectsFileContent(testAssetsFolder, testArtifactDirectory, "props");
-                EnsureTestProjectsFileContent(testAssetsFolder, testArtifactDirectory, "targets");
-
-                static void EnsureTestProjectsFileContent(string testAssetsFolder, string dir, string type)
-                {
-                    var fileName = Path.Combine(dir, $"Directory.Build.{type}");
-                    if (File.Exists(fileName))
-                    {
-                        return;
-                    }
-
-                    File.WriteAllText(
-                        fileName,
-                        string.Join(
-                            Environment.NewLine,
-                            "<Project>",
-                            $"  <Import Project=\"{testAssetsFolder}/TestUtils/TestProjects.{type}\" />",
-                            "</Project>"));
-                }
-            }
-        }
-
-        private void ValidateRequiredDirectories(RepoDirectoriesProvider repoDirectoriesProvider)
-        {
-            if ( ! Directory.Exists(repoDirectoriesProvider.BuiltDotnet))
-            {
-                throw new Exception($"Unable to find built host and sharedfx, please ensure the build has been run: {repoDirectoriesProvider.BuiltDotnet}");
-            }
-
-            if ( ! Directory.Exists(repoDirectoriesProvider.HostArtifacts))
-            {
-                throw new Exception($"Unable to find host artifacts directory, please ensure the build has been run: {repoDirectoriesProvider.HostArtifacts}");
-            }
-        }
-
-        public TestProjectFixture BuildProject(
-            DotNetCli dotnet = null,
-            string runtime = null,
-            string framework = null,
-            string outputDirectory = null,
-            bool restore = false)
-        {
-            dotnet = dotnet ?? SdkDotnet;
-            outputDirectory = outputDirectory ?? TestProject.OutputDirectory;
-            TestProject.OutputDirectory = outputDirectory;
-            framework = framework ?? Framework;
-            Framework = framework;
-
-            var buildArgs = new List<string>
-            {
-                "/bl:BuildProject.binlog"
-            };
-
-            if (restore != true)
-            {
-                buildArgs.Add("--no-restore");
-            }
-
-            if (runtime != null)
-            {
-                buildArgs.Add("--runtime");
-                buildArgs.Add(runtime);
-            }
-
-            if (framework != null)
-            {
-                buildArgs.Add("--framework");
-                buildArgs.Add(framework);
-            }
-
-            buildArgs.Add($"/p:TestTargetRid={TestContext.TargetRID}");
-            buildArgs.Add($"/p:MNAVersion={TestContext.MicrosoftNETCoreAppVersion}");
-
-            if (outputDirectory != null)
-            {
-                buildArgs.Add("-o");
-                buildArgs.Add(outputDirectory);
-            }
-
-            dotnet.Build(buildArgs.ToArray())
-                .WorkingDirectory(TestProject.ProjectDirectory)
-                .Environment("NUGET_PACKAGES", RepoDirProvider.NugetPackages)
-                .Environment("VERSION", "") // Generate with package version 1.0.0, not %VERSION%
-                .CaptureStdErr()
-                .CaptureStdOut()
-                .Execute()
-                .EnsureSuccessful();
-
-            TestProject.LoadOutputFiles();
-
-            return this;
-        }
-
-        public TestProjectFixture StoreProject(
-            DotNetCli dotnet = null,
-            string runtime = null,
-            string framework = null,
-            string manifest = null,
-            string outputDirectory = null)
-        {
-            dotnet = dotnet ?? SdkDotnet;
-            outputDirectory = outputDirectory ?? TestProject.OutputDirectory;
-            framework = framework ?? Framework;
-            Framework = framework;
-
-            var storeArgs = new List<string>
-            {
-                "--runtime"
-            };
-
-            if (runtime != null)
-            {
-                storeArgs.Add(runtime);
-            }
-            else
-            {
-               storeArgs.Add(CurrentRid);
-            }
-
-            if (framework != null)
-            {
-                storeArgs.Add("--framework");
-                storeArgs.Add(framework);
-            }
-
-                storeArgs.Add("--manifest");
-            if (manifest != null)
-            {
-                storeArgs.Add(manifest);
-            }
-            else
-            {
-                storeArgs.Add(_sourceTestProject.ProjectFile);
-            }
-
-            if (outputDirectory != null)
-            {
-                storeArgs.Add("-o");
-                storeArgs.Add(outputDirectory);
-            }
-
-            storeArgs.Add($"/p:MNAVersion={TestContext.MicrosoftNETCoreAppVersion}");
-            storeArgs.Add($"/p:NetCoreAppCurrent={Framework}");
-
-            // Ensure the project's OutputType isn't 'Exe', since that causes issues with 'dotnet store'
-            storeArgs.Add("/p:OutputType=Library");
-
-            dotnet.Store(storeArgs.ToArray())
-                .WorkingDirectory(TestProject.ProjectDirectory)
-                .Environment("NUGET_PACKAGES", RepoDirProvider.NugetPackages)
-                .CaptureStdErr()
-                .CaptureStdOut()
-                .Execute()
-                .EnsureSuccessful();
-
-            TestProject.LoadOutputFiles();
-
-            return this;
-        }
-
-        public TestProjectFixture PublishProject(
-            DotNetCli dotnet = null,
-            string runtime = null,
-            string framework = null,
-            bool? selfContained = null,
-            string outputDirectory = null,
-            bool singleFile = false,
-            bool restore = false,
-            params string[] extraArgs)
-        {
-            dotnet = dotnet ?? SdkDotnet;
-            outputDirectory = outputDirectory ?? TestProject.OutputDirectory;
-            TestProject.OutputDirectory = outputDirectory;
-            framework = framework ?? Framework;
-            Framework = framework;
-
-            var publishArgs = new List<string>
-            {
-                "/bl:PublishProject.binlog"
-            };
-
-            if (restore != true)
-            {
-                publishArgs.Add("--no-restore");
-            }
-
-            if (runtime != null)
-            {
-                publishArgs.Add("--runtime");
-                publishArgs.Add(runtime);
-
-                if (selfContained == null)
-                {
-                    // This is to prevent bugs caused by SDK defaulting self-contained differently for various configurations.
-                    // We still want to allow selfContained to remain unspecified for simple cases, for example for building libraries.
-                    throw new ArgumentException("If runtime is specified, then the caller also has to specify selfContained value.");
-                }
-            }
-
-            if (framework != null)
-            {
-                publishArgs.Add("--framework");
-                publishArgs.Add(framework);
-                publishArgs.Add($"/p:NetCoreAppCurrent={framework}");
-            }
-
-            if (selfContained != null)
-            {
-                publishArgs.Add("--self-contained");
-                publishArgs.Add(selfContained.Value ? "true" : "false");
-
-                // Workaround for https://github.com/dotnet/sdk/issues/25062
-                // If self-contained is specified via the command line, also specify the
-                // runtime identifier (if we didn't already). Otherwise, the SDK ends up
-                // passing the runtime identifier of the SDK such that the one specified
-                // in the project file is ignored.
-                if (selfContained.Value && runtime == null)
-                {
-                    publishArgs.Add("--runtime");
-                    publishArgs.Add(TestContext.TargetRID);
-                }
-            }
-
-            if (outputDirectory != null)
-            {
-                publishArgs.Add("-o");
-                publishArgs.Add(outputDirectory);
-            }
-
-            if (singleFile)
-            {
-                publishArgs.Add("/p:PublishSingleFile=true");
-            }
-
-            publishArgs.Add($"/p:TestTargetRid={TestContext.TargetRID}");
-            publishArgs.Add($"/p:MNAVersion={TestContext.MicrosoftNETCoreAppVersion}");
-
-            foreach (var arg in extraArgs)
-            {
-                publishArgs.Add(arg);
-            }
-
-            dotnet.Publish(publishArgs.ToArray())
-                .WorkingDirectory(TestProject.ProjectDirectory)
-                .Environment("NUGET_PACKAGES", RepoDirProvider.NugetPackages)
-                .CaptureStdErr()
-                .CaptureStdOut()
-                .Execute()
-                .EnsureSuccessful();
-
-            TestProject.LoadOutputFiles();
-
-            return this;
-        }
-
-        public TestProjectFixture RestoreProject(string[] fallbackSources, string extraMSBuildProperties = null)
-        {
-            var restoreArgs = new List<string>();
-            foreach (var fallbackSource in fallbackSources)
-            {
-                restoreArgs.Add("--source");
-                restoreArgs.Add(fallbackSource);
-            }
-            restoreArgs.Add("--disable-parallel");
-
-            restoreArgs.Add($"/p:MNAVersion={TestContext.MicrosoftNETCoreAppVersion}");
-            restoreArgs.Add($"/p:NetCoreAppCurrent={Framework}");
-
-            if (extraMSBuildProperties != null)
-            {
-                restoreArgs.Add(extraMSBuildProperties);
-            }
-
-            SdkDotnet.Restore(restoreArgs.ToArray())
-                .WorkingDirectory(TestProject.ProjectDirectory)
-                .CaptureStdErr()
-                .CaptureStdOut()
-                .Environment("NUGET_PACKAGES", RepoDirProvider.NugetPackages)
-                .Execute()
-                .EnsureSuccessful();
-
-            return this;
-        }
-
-        public TestProjectFixture EnsureRestored(params string[] fallbackSources)
-        {
-            if (!TestProject.IsRestored())
-            {
-                RestoreProject(fallbackSources);
-            }
-
-            return this;
-        }
-
-        public TestProjectFixture EnsureRestoredForRid(string rid, params string[] fallbackSources)
-        {
-            if (!TestProject.IsRestored())
-            {
-                string extraMSBuildProperties = $"/p:TestTargetRid={rid}";
-                RestoreProject(fallbackSources, extraMSBuildProperties);
-            }
-
-            return this;
-        }
-
-        public TestProjectFixture Copy()
-        {
-            return new TestProjectFixture(this);
-        }
-    }
-}
diff --git a/src/installer/tests/pretest.proj b/src/installer/tests/pretest.proj
new file mode 100644
index 000000000000..b97a2e77c2e1
--- /dev/null
+++ b/src/installer/tests/pretest.proj
@@ -0,0 +1,22 @@
+<Project Sdk="Microsoft.Build.Traversal">
+
+  <!-- Build all test project assets. Output will be used by tests. -->
+  <ItemGroup>
+    <ProjectReference Include="$(InstallerProjectRoot)tests\Assets\Projects\**\*.csproj" />
+  </ItemGroup>
+
+  <!-- Set up the shared framework copy the tests should use. -->
+  <Target Name="SetUpSharedFrameworkPublish"
+          BeforeTargets="Build">
+    <!--
+      Explicitly restore before PublishToDisk. IsRestoring property ensures that this evaluation won't be reused.
+      See https://github.com/dotnet/msbuild/issues/2811
+    -->
+    <MSBuild Projects="$(InstallerProjectRoot)pkg\sfx\Microsoft.NETCore.App\Microsoft.NETCore.App.Runtime.sfxproj"
+             Targets="Restore"
+             Properties="IsRestoring=true" />
+    <MSBuild Projects="$(InstallerProjectRoot)pkg\sfx\bundle\Microsoft.NETCore.App.Bundle.bundleproj"
+             Targets="PublishToDisk"
+             Properties="OutputPath=$(TestArtifactsOutputRoot)sharedFrameworkPublish/" />
+  </Target>
+</Project>
diff --git a/src/installer/tests/scripts/linux-test/README.md b/src/installer/tests/scripts/linux-test/README.md
deleted file mode 100644
index 4851921c6d8d..000000000000
--- a/src/installer/tests/scripts/linux-test/README.md
+++ /dev/null
@@ -1,49 +0,0 @@
-This project has the purpose to automate verification test for .NET Runtime and SDK linux packages.
-
-To have this test running in your local machine do the following steps:
-1. Download VerificationTestOnDocker.sh, RuntimeInstallation.sh, SdkInstallation.sh, images.txt in the same folder
-2. Update images.txt with images name you want to run the installation test
-3. Run $ ./VerificationTestOnDocker.sh \<package> \<version> \<command>
-
-The options are:
-
-* \<package>
-   * runtime - verification test for .NET Runtime Linux packages
-   * sdk - verification test for .NET SDK Linux packages
-* \<version>
-  * latest - install the latest available .NET package from our main repository
-  * \<number> - install the package corresponding to this version number
-* \<command>
-  * install - verification test for install
-  * install uninstall - verification test for install and uninstall
-
-
-The script VerificationTestOnDocker.sh is responsible for read a file (images.txt) containing docker images and run a docker container for each image specified in that file. Inside each container it will be executed the script to install .NET Runtime (RuntimeInstallation.sh) or .NET SDK (SdkInstallation.sh).
-
-Both scripts RuntimeInstallation.sh and SdkInstallation.sh automatically identify what distro and version is running in the current machine and can install and uninstall the latest version of .NET Runtime/Sdk packages corresponding to that distro & version. The installation's stdout for all containers is redirected to a single file (logfile.txt). In the end of this file (logfile.txt) it's also displayed the results of the test, printing for each distro and version the result 'failed' or 'passed'.
-
-.NET packages are downloaded from the blob https://dotnetcli.blob.core.windows.net/dotnet
-
-This project takes in account:
-  -> dotnet-sdk depends on dotnet-runtime and aspnet-runtime
-  -> aspnet-runtime depends on dotnet-runtime (can be different to what dotnet-sdk depends on)
-  -> dotnet-runtime-deps depends on system packages
-  -> .NET runtime carries: dotnet-runtime-deps, dotnet-host, dotnet-hostfxr and dotnet-runtime.
-
-Changes on how dotnet runtime packages are structured or modification on the packages dependencies may affect the verification test result.
-
-This verification test depends on docker images and the test result can be a false negative if the image doesn't carry some system packages required to have a proper runtime package installation.
-
-
-The script allows automated test only for the following distro & version:
-
-| Distro | Version |
-|--------|---------|
-| Ubuntu | 14.04, 16.04, 18.04 |
-| Debian | 8, 9 |
-| Centos | 7 |
-| Fedora | 27 |
-| OpenSUSE | 42 |
-| Oracle Linux | 7 |
-| RHEL | 7 |
-| SLES | 12 |
diff --git a/src/installer/tests/scripts/linux-test/RuntimeInstallation.sh b/src/installer/tests/scripts/linux-test/RuntimeInstallation.sh
deleted file mode 100644
index df6fec7846ed..000000000000
--- a/src/installer/tests/scripts/linux-test/RuntimeInstallation.sh
+++ /dev/null
@@ -1,243 +0,0 @@
-#!/usr/bin/env bash
-
-current_user=$(whoami)
-if [[ "$current_user" != "root" ]]; then
-    echo "script requires superuser privileges to run"
-    exit 1
-fi
-
-source /etc/os-release
-distro="$ID"
-version="$VERSION_ID"
-arch="x64"
-result_file="/docker/result.txt"
-log_file="/docker/logfile.txt"
-
-exec &>> $log_file
-
-if [[ "$ID" == "ol" ]]; then
-        distro="oraclelinux"
-fi
-if [[ "$distro" == "oraclelinux" || "$distro" == "rhel" ||  "$distro" == "opensuse" ]]; then
-	version=$(echo $version | cut -d . -f 1)
-fi
-
-echo $distro:$version
-
-runtime_version=$1
-if [[ "$runtime_version" == "latest" ]]; then
-	BLOB_RUNTIME_DIR="https://dotnetcli.blob.core.windows.net/dotnet/Runtime/master"
-else
-	BLOB_RUNTIME_DIR="https://dotnetcli.blob.core.windows.net/dotnet/Runtime/$runtime_version"
-fi
-
-install_curl(){
-	apt-get -y install curl
-	if [ $? -ne 0 ]; then
-		apt-get update
-		apt-get -y install curl
-	fi
-}
-download_from_blob_deb(){
-	BLOB_PATH=$1
-	if curl --output /dev/null --head --fail $BLOB_PATH; then
-		curl -O -s $BLOB_PATH
-	else
-		echo "Could not extract file from blob"
-		exit 1
-	fi
-}
-download_runtime_packages_deb(){
-        download_from_blob_deb "$BLOB_RUNTIME_DIR/dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.deb"
-        download_from_blob_deb "$BLOB_RUNTIME_DIR/dotnet-host-$runtime_version-$arch.deb"
-        download_from_blob_deb "$BLOB_RUNTIME_DIR/dotnet-hostfxr-$runtime_version-$arch.deb"
-        download_from_blob_deb "$BLOB_RUNTIME_DIR/dotnet-runtime-$runtime_version-$arch.deb"
-}
-install_runtime_packages_deb(){
-	dpkg -i dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.deb
-        apt-get install -f -y
-        dpkg -i *.deb
-}
-determine_runtime_version_deb(){
-	if [[ "$runtime_version" == "latest" ]]; then
-		runtime_version=$(dpkg-deb -f dotnet-runtime-latest-$arch.deb Package)
-		runtime_version=${runtime_version#dotnet-runtime-}
-	fi
-}
-check_if_runtime_is_installed_deb(){
-	find_runtime=$(apt list --installed | grep dotnet-runtime-$runtime_version)
-	if [[ -z "$find_runtime" ]]; then
-		echo "Not able to remove runtime $runtime_version because it is not installed"
-		exit 1
-	fi
-}
-uninstall_runtime_deb(){
-	apt-get remove -y $(apt list --installed | grep -e dotnet | cut -d "/" -f 1)
-	runtime_installed_packages=$(apt list --installed | grep -e dotnet)
-}
-install_wget_yum(){
-	yum install -y wget
-}
-install_wget_zypper(){
-	zypper --non-interactive install wget
-}
-download_from_blob_rpm(){
-	BLOB_PATH=$1
-	if wget --spider $BLOB_PATH; then
-		wget -nv $BLOB_PATH
-	else
-		echo "Could not extract file from blob"
-		exit 1
-	fi
-}
-download_runtime_packages_rpm(){
-	download_from_blob_rpm "$BLOB_RUNTIME_DIR/dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.rpm"
-        download_from_blob_rpm "$BLOB_RUNTIME_DIR/dotnet-host-$runtime_version-$arch.rpm"
-        download_from_blob_rpm "$BLOB_RUNTIME_DIR/dotnet-hostfxr-$runtime_version-$arch.rpm"
-        download_from_blob_rpm "$BLOB_RUNTIME_DIR/dotnet-runtime-$runtime_version-$arch.rpm"
-}
-install_runtime_packages_yum(){
-	yum localinstall -y dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.rpm
-        rm dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.rpm
-        rpm -Uvh *.rpm
-}
-install_runtime_packages_zypper(){
-	zypper --no-gpg-checks --non-interactive in ./dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.rpm
-	rm dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.rpm
-        rpm -Uvh *.rpm
-}
-determine_runtime_version_rpm(){
-	if [[ "$runtime_version" == "latest" ]]; then
-		runtime_version=$(rpm -qip dotnet-runtime-latest-$arch.rpm | grep Version)
-		runtime_version=$(echo $runtime_version | cut -d ":" -f 2)
-		runtime_version=$(echo $runtime_version | tr _ -)
-	fi
-}
-check_if_runtime_is_installed_rpm(){
-	find_runtime=$(rpm -qa | grep dotnet-runtime-$runtime_version)
-	if [[ -z "$find_runtime" ]]; then
-		echo "Not able to remove runtime $runtime_version because it is not installed"
-		exit 1
-	fi
-}
-uninstall_runtime_yum(){
-	yum remove -y $(rpm -qa | grep -e dotnet)
-	runtime_installed_packages=$(rpm -qa | grep -e dotnet)
-}
-uninstall_runtime_zypper(){
-	zypper -n rm $(rpm -qa | grep -e dotnet)
-	runtime_installed_packages=$(rpm -qa | grep -e dotnet)
-}
-determine_success_install(){
-	if [ -e $result_file ]; then
-		installed_runtime=$(dotnet --list-runtimes | grep $runtime_version)
-		if [[ -n "$installed_runtime" ]]; then
-        	        success_install=1
-	        else
-        	        success_install=0
-		fi
-	fi
-}
-test_result_install(){
-        if [ -e $result_file ]; then
-                if [ $success_install -eq 1 ]; then
-                        echo "$distro:$version install  ->  passed" >> $result_file
-                else
-                        echo "$distro:$version install  ->  failed" >> $result_file
-                fi
-        fi
-}
-uninstall_latest_runtime_warning(){
-	if [[ "$runtime_version" == "latest" ]]; then
-		echo "Specify runtime version to unistall. Type dotnet --list-runtimes to see runtimes versions installed"
-		exit 1
-	fi
-}
-test_result_uninstall(){
-        if [[ -z "$runtime_installed_packages" ]]; then
-                success_uninstall=1
-        else
-                success_uninstall=0
-        fi
-        if [ -e $result_file ]; then
-                if [ $success_uninstall -eq 1 ]; then
-                        echo "$distro:$version uninstall  ->  passed" >> $result_file
-                else
-                        echo "$distro:$version uninstall  ->  failed" >> $result_file
-                fi
-        fi
-}
-
-if [[ "$distro" == "ubuntu" || "$distro" == "debian" ]]; then
-	if [[ "$2" == "install" ]]; then
-		install_curl
-
-		download_runtime_packages_deb
-		install_runtime_packages_deb
-		dotnet --list-runtimes
-
-		determine_runtime_version_deb
-		determine_success_install
-		test_result_install
-
-	elif [[ "$2" == "uninstall" ]]; then
-		uninstall_latest_runtime_warning
-	fi
-
-	if [[ "$3" == "uninstall" || "$2" == "uninstall" ]]; then
-		check_if_runtime_is_installed_deb
-		uninstall_runtime_deb
-		test_result_uninstall
-	fi
-
-elif [[ "$distro" == "fedora" || "$distro" == "centos" || "$distro" == "oraclelinux" || "$distro" == "rhel" ]]; then
-	if [[ "$2" == "install" ]]; then
-		install_wget_yum
-
-		download_runtime_packages_rpm
-		install_runtime_packages_yum
-
-		dotnet --list-runtimes
-
-		determine_runtime_version_rpm
-		determine_success_install
-		test_result_install
-
-	elif [[ "$2" == "uninstall" ]]; then
-		uninstall_latest_runtime_warning
-	fi
-	if [[ "$3" == "uninstall" || "$2" == "uninstall" ]]; then
-		check_if_runtime_is_installed_rpm
-		uninstall_runtime_yum
-		test_result_uninstall
-	fi
-
-elif [[ "$distro" == "opensuse" || "$distro" == "sles" ]]; then
-	if [[ "$2" == "install" ]]; then
-		install_wget_zypper
-
-		download_runtime_packages_rpm
-		install_runtime_packages_zypper
-		dotnet --list-runtimes
-
-		determine_runtime_version_rpm
-		determine_success_install
-		test_result_install
-
-	elif [[ "$2" == "uninstall" ]]; then
-		uninstall_latest_runtime_warning
-	fi
-
-	if [[ "$3" == "uninstall" || "$2" == "uninstall" ]]; then
-		check_if_runtime_is_installed_rpm
-		uninstall_runtime_zypper
-		test_result_uninstall
-	fi
-fi
-
-if [ -e $log_file ]; then
-	ch=$(printf "%-160s" "-")
-	echo "${ch// /-} "
-fi
-
-
diff --git a/src/installer/tests/scripts/linux-test/SdkInstallation.sh b/src/installer/tests/scripts/linux-test/SdkInstallation.sh
deleted file mode 100644
index 95bb3bd754c6..000000000000
--- a/src/installer/tests/scripts/linux-test/SdkInstallation.sh
+++ /dev/null
@@ -1,405 +0,0 @@
-#!/usr/bin/env bash
-
-current_user=$(whoami)
-if [ $current_user != "root" ]; then
-    echo "script requires superuser privileges to run"
-    exit 1
-fi
-
-source /etc/os-release
-distro="$ID"
-version="$VERSION_ID"
-arch="x64"
-result_file="/docker/result.txt"
-log_file="/docker/logfile.txt"
-
-exec &>> $log_file
-
-if [[ "$ID" == "ol" ]]; then
-        distro="oraclelinux"
-fi
-if [[ "$distro" == "oraclelinux" || "$distro" == "rhel" || "$distro" == "opensuse" ]]; then
-	version=$(echo $version | cut -d . -f 1)
-fi
-
-echo $distro:$version
-
-sdk_version=$1
-
-BLOB_RUNTIME_DIR="https://dotnetcli.blob.core.windows.net/dotnet/Runtime"
-BLOB_SDK_DIR="https://dotnetcli.blob.core.windows.net/dotnet/Sdk"
-BLOB_ASPNET_DIR="https://dotnetcli.blob.core.windows.net/dotnet/aspnetcore/Runtime"
-
-install_curl(){
-	apt-get -y install curl
-	if [ $? -ne 0 ]; then
-		apt-get update
-		apt-get -y install curl
-	fi
-}
-download_from_blob_deb(){
-	BLOB_PATH=$1
-	if curl --output /dev/null --head --fail $BLOB_PATH; then
-		curl -O -s $BLOB_PATH
-	else
-		echo "Could not extract file from blob"
-		exit 1
-	fi
-}
-download_sdk_package_deb(){
-	if [[ "$sdk_version" == "latest" ]]; then
-                download_from_blob_deb "$BLOB_SDK_DIR/master/dotnet-sdk-latest-$arch.deb"
-        else
-                download_from_blob_deb "$BLOB_SDK_DIR/$sdk_version/dotnet-sdk-$sdk_version-$arch.deb"
-	fi
-}
-download_aspnet_package_deb(){
-        download_from_blob_deb "$BLOB_ASPNET_DIR/$aspnet_version/aspnetcore-runtime-$aspnet_version-$arch.deb"
-}
-determine_aspnet_version_install_deb(){
-	aspnet_version=$(dpkg -I dotnet-sdk-$sdk_version-$arch.deb | grep -o 'aspnetcore-runtime-[^ ]*')
-        aspnet_version=${aspnet_version#aspnetcore-runtime-}
-        [[ "${aspnet_version: -1}" == "," ]] && aspnet_version=${aspnet_version%,}
-}
-determine_runtime_sdk_install_deb(){
-	runtime_sdk=$(dpkg -I dotnet-sdk-$sdk_version-$arch.deb | grep -o 'dotnet-runtime-[^ ]*')
-        runtime_sdk=${runtime_sdk#dotnet-runtime-}
-        [[ "${runtime_sdk: -1}" == "," ]] && runtime_sdk=${runtime_sdk%,}
-}
-determine_runtime_aspnet_install_deb(){
-	runtime_aspnet=$(dpkg -I aspnetcore-runtime-$aspnet_version-$arch.deb | grep -o 'dotnet-runtime[^ ]*')
-        runtime_aspnet=${runtime_aspnet#dotnet-runtime-}
-        [[ "${runtime_aspnet: -1}" == "," ]] && runtime_sdk=${runtime_aspnet%,}
-}
-download_runtime_packages_deb(){
-        download_from_blob_deb "$BLOB_RUNTIME_DIR/$runtime_version/dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.deb"
-        download_from_blob_deb "$BLOB_RUNTIME_DIR/$runtime_version/dotnet-host-$runtime_version-$arch.deb"
-        download_from_blob_deb "$BLOB_RUNTIME_DIR/$runtime_version/dotnet-hostfxr-$runtime_version-$arch.deb"
-        download_from_blob_deb "$BLOB_RUNTIME_DIR/$runtime_version/dotnet-runtime-$runtime_version-$arch.deb"
-}
-install_runtime_packages_deb(){
-	dpkg -i dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.deb
-        apt-get install -f -y
-        dpkg -i *.deb
-}
-install_aspnet_and_sdk_deb(){
-       	dpkg -i aspnetcore-runtime-$aspnet_version-$arch.deb
-        dpkg -i dotnet-sdk-$sdk_version-$arch.deb
-}
-check_if_sdk_is_installed_deb(){
-	find_sdk=$(apt list --installed | grep dotnet-sdk-$sdk_version)
-	if [[ -z "$find_sdk" ]]; then
-		echo "Not able to remove sdk $sdk_version because it is not installed"
-		exit 1
-	fi
-}
-determine_runtime_sdk_uninstall_deb(){
-	runtime_sdk=$(apt-cache depends dotnet-sdk-$sdk_version | grep -o 'dotnet-runtime-[^ ]*')
-       	runtime_sdk=${runtime_sdk#dotnet-runtime-}
-}
-determine_aspnet_package_name_uninstall_deb(){
-	aspnet_package_name=$(apt-cache depends dotnet-sdk-$sdk_version | grep -o 'aspnetcore-runtime-[^ ]*')
-}
-determine_runtime_aspnet_uninstall_deb(){
-	runtime_aspnet=$(apt-cache depends $aspnet_package_name | grep -o 'dotnet-runtime-[^ ]*')
-   	runtime_aspnet=${runtime_aspnet#dotnet-runtime-}
-}
-uninstall_dotnet_deb(){
-	apt-get remove -y $(apt list --installed | grep -e dotnet -e aspnet | cut -d "/" -f 1)
-	dotnet_installed_packages=$(apt list --installed | grep -e dotnet -e aspnet)
-}
-
-install_wget_yum(){
-	yum install -y wget
-}
-install_wget_zypper(){
-	zypper --non-interactive install wget
-}
-download_from_blob_rpm(){
-	BLOB_PATH=$1
-	if wget --spider $BLOB_PATH; then
-		wget -nv $BLOB_PATH
-	else
-		echo "Could not extract file from blob"
-		exit 1
-	fi
-}
-download_sdk_package_rpm(){
-	if [[ "$sdk_version" == "latest" ]]; then
-                download_from_blob_rpm "$BLOB_SDK_DIR/master/dotnet-sdk-latest-$arch.rpm"
-        else
-                download_from_blob_rpm "$BLOB_SDK_DIR/$sdk_version/dotnet-sdk-$sdk_version-$arch.rpm"
-        fi
-}
-download_aspnet_package_rpm(){
-        download_from_blob_rpm "$BLOB_ASPNET_DIR/$aspnet_version/aspnetcore-runtime-$aspnet_version-$arch.rpm"
-}
-determine_aspnet_version_install_rpm(){
-	aspnet_version=$(rpm -qpR dotnet-sdk-$sdk_version-$arch.rpm | grep -o 'aspnetcore-runtime-[^ ]*')
-        aspnet_version=${aspnet_version#aspnetcore-runtime-}
-}
-determine_runtime_aspnet_install_rpm(){
-	runtime_aspnet=$(rpm -qpR aspnetcore-runtime-$aspnet_version-$arch.rpm | grep -o 'dotnet-runtime[^ ]*')
-        runtime_aspnet=${runtime_aspnet#dotnet-runtime-}
-}
-determine_runtime_sdk_install_rpm(){
- 	runtime_sdk=$(rpm -qpR dotnet-sdk-$sdk_version-$arch.rpm | grep -o 'dotnet-runtime-[^ ]*')
-        runtime_sdk=${runtime_sdk#dotnet-runtime-}
-
-}
-download_runtime_packages_rpm(){
-	download_from_blob_rpm "$BLOB_RUNTIME_DIR/$runtime_version/dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.rpm"
-        download_from_blob_rpm "$BLOB_RUNTIME_DIR/$runtime_version/dotnet-host-$runtime_version-$arch.rpm"
-        download_from_blob_rpm "$BLOB_RUNTIME_DIR/$runtime_version/dotnet-hostfxr-$runtime_version-$arch.rpm"
-        download_from_blob_rpm "$BLOB_RUNTIME_DIR/$runtime_version/dotnet-runtime-$runtime_version-$arch.rpm"
-}
-install_runtime_deps_package_yum(){
-	yum localinstall -y dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.rpm
-	rm dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.rpm
-}
-install_rpm_from_folder(){
-        rpm -Uvh *.rpm
-}
-install_runtime_deps_package_zypper(){
-	zypper --no-gpg-checks --non-interactive in ./dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.rpm
-	rm dotnet-runtime-deps-$runtime_version-$distro.$version-$arch.rpm
-}
-install_aspnet_and_sdk_rpm(){
-	rpm -i aspnetcore-runtime-$aspnet_version-$arch.rpm
-        rpm -i dotnet-sdk-$sdk_version-$arch.rpm
-}
-check_if_sdk_is_installed_rpm(){
-	find_sdk=$(rpm -qa | grep dotnet-sdk-$sdk_version)
-	if [[ -z "$find_sdk" ]]; then
-		echo "Not able to remove sdk $sdk_version because it is not installed"
-		exit 1
-	fi
-}
-determine_runtime_sdk_uninstall_rpm(){
-	runtime_sdk=$(rpm -q --requires dotnet-sdk-$sdk_version | grep -o 'dotnet-runtime-[^ ]*')
-        runtime_sdk=${runtime_sdk#dotnet-runtime-}
-}
-determine_aspnet_package_name_uninstall_rpm(){
-        aspnet_package_name=$(rpm -q --requires dotnet-sdk-$sdk_version | grep -o 'aspnetcore-runtime-[^ ]*')
-}
-determine_runtime_aspnet_uninstall_rpm(){
-	runtime_aspnet=$(rpm -q --requires $aspnet_package_name | grep -o 'dotnet-runtime-[^ ]*')
-        runtime_aspnet=${runtime_aspnet#dotnet-runtime-}
-}
-uninstall_dotnet_yum(){
-	yum remove -y $(rpm -qa | grep -e dotnet -e aspnet)
-	dotnet_installed_packages=$(rpm -qa | grep -e dotnet -e aspnet)
-}
-uninstall_dotnet_zypper(){
-	zypper -n rm $(rpm -qa | grep -e dotnet -e aspnet)
-	dotnet_installed_packages=$(rpm -qa | grep -e dotnet -e aspnet)
-}
-checkout_new_folder(){
-	mkdir temp_folder
-	cd temp_folder
-}
-checkout_previous_folder(){
-	cd ..
-}
-run_app(){
-	if [ -e $result_file ]; then
-		dotnet new console -o dockerApp
-		cd dockerApp
-		dotnet restore -s https://dotnet.myget.org/F/dotnet-core/api/v3/index.json
-		project_output=$(dotnet run)
-		if [[ "$project_output" == 'Hello World!' ]];
-		then
-			success_install=1;
-		else
-			success_install=0;
-		fi
-	fi
-}
-test_result_install(){
-	if [ -e $result_file ]; then
-		if [ $success_install -eq 1 ]; then
-			echo "$distro:$version install  ->  passed" >> $result_file
-		else
-			echo "$distro:$version install  ->  failed" >> $result_file
-		fi
-	fi
-}
-test_result_uninstall(){
-
-	if [[ -z "$dotnet_installed_packages" ]]; then
-		success_uninstall=1;
-	else
-		success_uninstall=0;
-	fi
-
-	if [ -e $result_file ]; then
-		if [ $success_uninstall -eq 1 ]; then
-               		echo "$distro:$version uninstall  ->  passed" >> $result_file
-		else
-	                echo "$distro:$version uninstall  ->  failed" >> $result_file
-		fi
-	fi
-}
-uninstall_latest_sdk_warning(){
-	if [[ "$sdk_version" == "latest" ]]; then
-		echo "Specify sdk version to unistall. Type dotnet --list-sdks to see sdks versions installed"
-		exit 1
-	fi
-}
-
-if [[ "$distro" == "ubuntu" || "$distro" == "debian" ]]; then
-	if [[ "$2" == "install" ]]; then
-		install_curl
-
-		download_sdk_package_deb
-
-		determine_aspnet_version_install_deb
-		download_aspnet_package_deb
-
-		determine_runtime_aspnet_install_deb
-		determine_runtime_sdk_install_deb
-
-		runtime_version="$runtime_aspnet"
-		download_runtime_packages_deb
-		install_runtime_packages_deb
-
-		if [ "$runtime_aspnet" != "$runtime_sdk" ]; then
-			runtime_version="$runtime_sdk"
-			checkout_new_folder
-			download_runtime_packages_deb
-			install_runtime_packages_deb
-			checkout_previous_folder
-		fi
-
-		install_aspnet_and_sdk_deb
-
-		dotnet --list-runtimes
-		dotnet --list-sdks
-
-		run_app
-		test_result_install
-
-	elif [[ "$2" == "uninstall" ]]; then
-		uninstall_latest_sdk_warning
-		check_if_sdk_is_installed_deb
-
-		determine_runtime_sdk_uninstall_deb
-		determine_aspnet_package_name_uninstall_deb
-		determine_runtime_aspnet_uninstall_deb
-
-	fi
-
-	if [[ "$3" == "uninstall" && "$success_install" == 1 || "$2" == "uninstall" ]]; then
-		uninstall_dotnet_deb
-		test_result_uninstall
-	fi
-
-elif [[ "$distro" == "fedora" || "$distro" == "centos" || "$distro" == "oraclelinux" || "$distro" == "rhel" ]]; then
-	if [[ "$2" == "install" ]]; then
-		install_wget_yum
-
-		download_sdk_package_rpm
-
-		determine_aspnet_version_install_rpm
-		download_aspnet_package_rpm
-
-		determine_runtime_aspnet_install_rpm
-		determine_runtime_sdk_install_rpm
-
-		checkout_new_folder
-		runtime_version="$runtime_aspnet"
-		download_runtime_packages_rpm
-		install_runtime_deps_package_yum
-
-		if [ "$runtime_aspnet" != "$runtime_sdk" ]; then
-			runtime_version="$runtime_sdk"
-			download_runtime_packages_rpm
-			install_runtime_deps_package_yum
-		fi
-		install_rpm_from_folder
-		checkout_previous_folder
-
-		install_aspnet_and_sdk_rpm
-
-		dotnet --list-runtimes
-		dotnet --list-sdks
-
-		run_app
-		test_result_install
-
-	elif [[ "$2" == "uninstall" ]]; then
-		uninstall_latest_sdk_warning
-		check_if_sdk_is_installed_rpm
-
-		determine_runtime_sdk_uninstall_rpm
-		determine_aspnet_package_name_uninstall_rpm
-		determine_runtime_aspnet_uninstall_rpm
-
-                echo $runtime_sdk
-                echo $runtime_aspnet
-
-	fi
-	if [[ "$3" == "uninstall" && "$success_install" == 1 || "$2" == "uninstall" ]]; then
-		uninstall_dotnet_yum
-		test_result_uninstall
-	fi
-
-
-elif [[ "$distro" == "opensuse" || "$distro" == "sles" ]]; then
-	if [[ "$2" == "install" ]]; then
-		install_wget_zypper
-
-		download_sdk_package_rpm
-
-		determine_aspnet_version_install_rpm
-		download_aspnet_package_rpm
-
-		determine_runtime_aspnet_install_rpm
-		determine_runtime_sdk_install_rpm
-
-		checkout_new_folder
-		runtime_version="$runtime_aspnet"
-		download_runtime_packages_rpm
-		install_runtime_deps_package_zypper
-
-		if [ "$runtime_aspnet" != "$runtime_sdk" ]; then
-			runtime_version="$runtime_sdk"
-			download_runtime_packages_rpm
-			install_runtime_deps_package_zypper
-		fi
-
-		install_rpm_from_folder
-		checkout_previous_folder
-
-		install_aspnet_and_sdk_rpm
-
-		dotnet --list-runtimes
-		dotnet --list-sdks
-
-		run_app
-		test_result_install
-
-	elif [[ "$2" == "uninstall" ]]; then
-		uninstall_latest_sdk_warning
-		check_if_sdk_is_installed_rpm
-
-		determine_runtime_sdk_uninstall_rpm
-		determine_aspnet_package_name_uninstall_rpm
-		determine_runtime_aspnet_uninstall_rpm
-
-		echo $runtime_sdk
-		echo $runtime_aspnet
-
-	fi
-
-	if [[ "$3" == "uninstall" && "$success_install" == 1 || "$2" == "uninstall" ]]; then
-		uninstall_dotnet_zypper
-		test_result_uninstall
-	fi
-fi
-
-if [ -e $log_file ]; then
-	ch=$(printf "%-160s" "-")
-	echo "${ch// /-} "
-fi
-
diff --git a/src/installer/tests/scripts/linux-test/VerificationTestOnDocker.sh b/src/installer/tests/scripts/linux-test/VerificationTestOnDocker.sh
deleted file mode 100644
index b62d729a5b7f..000000000000
--- a/src/installer/tests/scripts/linux-test/VerificationTestOnDocker.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env bash
-
-InstallationTestResult="result.txt"
-InstallationTestLogFile="logfile.txt"
-ImagesFile="images.txt"
-
-current_user=$(whoami)
-if [ $current_user != "root" ]; then
-    echo "test.sh requires superuser privileges to run"
-    exit 1
-fi
-
-if [ -e $InstallationTestLogFile ]; then
-	rm $InstallationTestLogFile -f
-fi
-
-if [[ "$1" == "sdk" ]]; then
-	InstallationScript="SdkInstallation.sh"
-	echo -e ".NET SDK verification test result\n" > $InstallationTestResult
-elif [[ "$1" == "runtime" ]]; then
-	InstallationScript="RuntimeInstallation.sh"
-	echo -e ".NET Runtime verification test result\n" > $InstallationTestResult
-fi
-
-while IFS='' read -r image || [[ -n "$image" ]]; do
-	echo $image
-	docker run --rm -v $(pwd):/docker -t $image /bin/bash /docker/$InstallationScript $2 $3 $4
-done <$ImagesFile
-
-cat $InstallationTestResult >> $InstallationTestLogFile
diff --git a/src/installer/tests/scripts/linux-test/images.txt b/src/installer/tests/scripts/linux-test/images.txt
deleted file mode 100644
index 919d0edf3f78..000000000000
--- a/src/installer/tests/scripts/linux-test/images.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-ubuntu:14.04
-ubuntu:16.04
-ubuntu:18.04
-debian:8
-debian:9
-centos:7
-fedora:27
-opensuse:42.2
-oraclelinux:7
diff --git a/src/libraries/Common/src/Interop/Browser/Interop.Locale.cs b/src/libraries/Common/src/Interop/Browser/Interop.Locale.cs
index c882d88afac2..b831e72e70cd 100644
--- a/src/libraries/Common/src/Interop/Browser/Interop.Locale.cs
+++ b/src/libraries/Common/src/Interop/Browser/Interop.Locale.cs
@@ -13,5 +13,7 @@ internal static unsafe partial class JsGlobalization
         internal static extern unsafe int GetFirstDayOfWeek(in string culture, out int exceptionalResult, out object result);
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         internal static extern unsafe int GetFirstWeekOfYear(in string culture, out int exceptionalResult, out object result);
+        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        internal static extern unsafe int GetLocaleInfo(in string locale, in string culture, char* buffer, int bufferLength, out int exceptionalResult, out object result);
     }
 }
diff --git a/src/libraries/Common/src/Interop/Browser/Interop.Runtime.NativeAOT.cs b/src/libraries/Common/src/Interop/Browser/Interop.Runtime.NativeAOT.cs
index 50e7a8ab6003..7c3116c8ef37 100644
--- a/src/libraries/Common/src/Interop/Browser/Interop.Runtime.NativeAOT.cs
+++ b/src/libraries/Common/src/Interop/Browser/Interop.Runtime.NativeAOT.cs
@@ -14,8 +14,10 @@ internal static unsafe partial class Runtime
 
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         internal static extern void ReleaseCSOwnedObject(IntPtr jsHandle);
-        [LibraryImport(JSLibrary, EntryPoint = "mono_wasm_bind_js_import", StringMarshalling = StringMarshalling.Utf16)]
-        public static unsafe partial void BindJSImport(void* signature, out int is_exception, out IntPtr result);
+        [LibraryImport(JSLibrary, EntryPoint = "mono_wasm_bind_js_import_ST", StringMarshalling = StringMarshalling.Utf16)]
+        public static unsafe partial IntPtr BindJSImportST(void* signature);
+        [LibraryImport(JSLibrary, EntryPoint = "mono_wasm_invoke_jsimport_ST")]
+        public static unsafe partial IntPtr InvokeJSImportST(int importHandle, nint args);
         [MethodImpl(MethodImplOptions.InternalCall)]
         public static extern void InvokeJSFunction(IntPtr bound_function_js_handle, nint data);
         [LibraryImport(JSLibrary, EntryPoint = "mono_wasm_invoke_js_import", StringMarshalling = StringMarshalling.Utf16)]
@@ -30,15 +32,12 @@ internal static unsafe partial class Runtime
         public static extern void DeregisterGCRoot(IntPtr handle);
         [MethodImpl(MethodImplOptions.InternalCall)]
         public static extern void CancelPromise(IntPtr gcHandle);
-
-        public static unsafe void BindJSImport(void* signature, out int is_exception, out object result)
-        {
-            BindJSImport(signature, out is_exception, out IntPtr _);
-            if (is_exception != 0)
-                result = "Runtime.BindJSFunction failed"; // TODO-LLVM-JSInterop: Marshal exception message
-            else
-                result = "";
-        }
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        public static extern void AssemblyGetEntryPoint(IntPtr assemblyNamePtr, int auto_insert_breakpoint, void** monoMethodPtrPtr);
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        public static extern void BindAssemblyExports(IntPtr assemblyNamePtr);
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        public static extern void GetAssemblyExport(IntPtr assemblyNamePtr, IntPtr namespacePtr, IntPtr classnamePtr, IntPtr methodNamePtr, IntPtr* monoMethodPtrPtr);
 
         public static unsafe void BindCSFunction(in string fully_qualified_name, int signature_hash, void* signature, out int is_exception, out object result)
         {
diff --git a/src/libraries/Common/src/Interop/Browser/Interop.Runtime.cs b/src/libraries/Common/src/Interop/Browser/Interop.Runtime.cs
index 01888e365ee0..518a4ff53bee 100644
--- a/src/libraries/Common/src/Interop/Browser/Interop.Runtime.cs
+++ b/src/libraries/Common/src/Interop/Browser/Interop.Runtime.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Reflection;
 using System.Runtime.CompilerServices;
 
 internal static partial class Interop
@@ -25,8 +26,6 @@ internal static unsafe partial class Runtime
         public static extern void InvokeJSFunctionSend(nint targetNativeTID, nint functionHandle, nint data);
 #endif
 
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        public static extern unsafe void BindCSFunction(in string fully_qualified_name, int signature_hash, void* signature, out int is_exception, out object result);
         [MethodImpl(MethodImplOptions.InternalCall)]
         public static extern void ResolveOrRejectPromise(nint data);
 #if FEATURE_WASM_MANAGED_THREADS
@@ -43,29 +42,33 @@ internal static unsafe partial class Runtime
 
 #if FEATURE_WASM_MANAGED_THREADS
         [MethodImpl(MethodImplOptions.InternalCall)]
-        public static extern void InstallWebWorkerInterop(nint proxyContextGCHandle);
+        public static extern void InstallWebWorkerInterop(nint proxyContextGCHandle, void* beforeSyncJSImport, void* afterSyncJSImport, void* pumpHandler);
         [MethodImpl(MethodImplOptions.InternalCall)]
         public static extern void UninstallWebWorkerInterop();
 
         [MethodImpl(MethodImplOptions.InternalCall)]
-        public static extern void InvokeJSImportSync(nint data, nint signature);
+        public static extern void InvokeJSImportSync(nint signature, nint args);
         [MethodImpl(MethodImplOptions.InternalCall)]
-        public static extern void InvokeJSImportSyncSend(nint targetNativeTID, nint data, nint signature);
+        public static extern void InvokeJSImportSyncSend(nint targetNativeTID, nint signature, nint args);
         [MethodImpl(MethodImplOptions.InternalCall)]
-        public static extern void InvokeJSImportAsyncPost(nint targetNativeTID, nint data, nint signature);
+        public static extern void InvokeJSImportAsyncPost(nint targetNativeTID, nint signature, nint args);
         [MethodImpl(MethodImplOptions.InternalCall)]
         public static extern void CancelPromise(nint taskHolderGCHandle);
         [MethodImpl(MethodImplOptions.InternalCall)]
         public static extern void CancelPromisePost(nint targetNativeTID, nint taskHolderGCHandle);
 #else
         [MethodImpl(MethodImplOptions.InternalCall)]
-        public static extern unsafe void BindJSImport(void* signature, out int is_exception, out object result);
+        public static extern unsafe nint BindJSImportST(void* signature);
         [MethodImpl(MethodImplOptions.InternalCall)]
-        public static extern void InvokeJSImport(int importHandle, nint data);
+        public static extern void InvokeJSImportST(int importHandle, nint args);
         [MethodImpl(MethodImplOptions.InternalCall)]
         public static extern void CancelPromise(nint gcHandle);
 #endif
-
-
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        public static extern void AssemblyGetEntryPoint(IntPtr assemblyNamePtr, int auto_insert_breakpoint, void** monoMethodPtrPtr);
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        public static extern void BindAssemblyExports(IntPtr assemblyNamePtr);
+        [MethodImpl(MethodImplOptions.InternalCall)]
+        public static extern void GetAssemblyExport(IntPtr assemblyNamePtr, IntPtr namespacePtr, IntPtr classnamePtr, IntPtr methodNamePtr, IntPtr* monoMethodPtrPtr);
     }
 }
diff --git a/src/libraries/Common/src/Interop/Linux/cgroups/Interop.cgroups.cs b/src/libraries/Common/src/Interop/Linux/cgroups/Interop.cgroups.cs
index 48ddfeebc871..4d3d79f58f2e 100644
--- a/src/libraries/Common/src/Interop/Linux/cgroups/Interop.cgroups.cs
+++ b/src/libraries/Common/src/Interop/Linux/cgroups/Interop.cgroups.cs
@@ -215,12 +215,17 @@ private static unsafe CGroupVersion FindCGroupVersion()
             int result = Interop.Sys.GetFormatInfoForMountPoint(SysFsCgroupFileSystemPath, formatBuffer, MountPointFormatBufferSizeInBytes, &numericFormat);
             if (result == 0)
             {
-                cgroupVersion = numericFormat switch
+                if (numericFormat == (int)Interop.Sys.UnixFileSystemTypes.cgroup2fs)
                 {
-                    (int)Interop.Sys.UnixFileSystemTypes.cgroup2fs => CGroupVersion.CGroup2,
-                    (int)Interop.Sys.UnixFileSystemTypes.tmpfs => CGroupVersion.CGroup1,
-                    _ => CGroupVersion.None,
-                };
+                    cgroupVersion = CGroupVersion.CGroup2;
+                }
+                else
+                {
+                    // Assume that if /sys/fs/cgroup exists and the file system type is not cgroup2fs,
+                    // it is cgroup v1. Typically the file system type is tmpfs, but other values have
+                    // been seen in the wild.
+                    cgroupVersion = CGroupVersion.CGroup1;
+                }
             }
 
             return cgroupVersion;
diff --git a/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.Aead.cs b/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.Aead.cs
index edadae0ea60e..2f405e0bfd0d 100644
--- a/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.Aead.cs
+++ b/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.Aead.cs
@@ -3,10 +3,13 @@
 
 using System;
 using System.Diagnostics;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Security.Cryptography;
 using System.Security.Cryptography.Apple;
 
+#pragma warning disable CS3016 // Arrays as attribute arguments are not CLS Compliant
+
 internal static partial class Interop
 {
     internal static partial class AppleCrypto
@@ -164,6 +167,7 @@ internal static unsafe void AesGcmDecrypt(
         }
 
         [LibraryImport(Libraries.AppleCryptoNative)]
+        [UnmanagedCallConv(CallConvs = [ typeof(CallConvSwift) ])]
         private static unsafe partial int AppleCryptoNative_ChaCha20Poly1305Encrypt(
             byte* keyPtr,
             int keyLength,
@@ -179,6 +183,7 @@ private static unsafe partial int AppleCryptoNative_ChaCha20Poly1305Encrypt(
             int aadLength);
 
         [LibraryImport(Libraries.AppleCryptoNative)]
+        [UnmanagedCallConv(CallConvs = [ typeof(CallConvSwift) ])]
         private static unsafe partial int AppleCryptoNative_ChaCha20Poly1305Decrypt(
             byte* keyPtr,
             int keyLength,
@@ -194,6 +199,7 @@ private static unsafe partial int AppleCryptoNative_ChaCha20Poly1305Decrypt(
             int aadLength);
 
         [LibraryImport(Libraries.AppleCryptoNative)]
+        [UnmanagedCallConv(CallConvs = [ typeof(CallConvSwift) ])]
         private static unsafe partial int AppleCryptoNative_AesGcmEncrypt(
             byte* keyPtr,
             int keyLength,
@@ -209,6 +215,7 @@ private static unsafe partial int AppleCryptoNative_AesGcmEncrypt(
             int aadLength);
 
         [LibraryImport(Libraries.AppleCryptoNative)]
+        [UnmanagedCallConv(CallConvs = [ typeof(CallConvSwift) ])]
         private static unsafe partial int AppleCryptoNative_AesGcmDecrypt(
             byte* keyPtr,
             int keyLength,
diff --git a/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.RSA.cs b/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.RSA.cs
index 4a3bd4454ede..deab51eeb2bb 100644
--- a/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.RSA.cs
+++ b/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.RSA.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Buffers;
 using System.Diagnostics;
 using System.Runtime.InteropServices;
 using System.Security.Cryptography;
@@ -69,8 +70,8 @@ private static partial int RsaDecryptOaep(
             out SafeCFDataHandle pEncryptedOut,
             out SafeCFErrorHandle pErrorOut);
 
-        [LibraryImport(Libraries.AppleCryptoNative, EntryPoint = "AppleCryptoNative_RsaDecryptPkcs")]
-        private static partial int RsaDecryptPkcs(
+        [LibraryImport(Libraries.AppleCryptoNative, EntryPoint = "AppleCryptoNative_RsaDecryptRaw")]
+        private static partial int RsaDecryptRaw(
             SafeSecKeyRefHandle publicKey,
             ReadOnlySpan<byte> pbData,
             int cbData,
@@ -166,17 +167,40 @@ internal static byte[] RsaDecrypt(
             byte[] data,
             RSAEncryptionPadding padding)
         {
+            if (padding == RSAEncryptionPadding.Pkcs1)
+            {
+                byte[] padded = ExecuteTransform(
+                    data,
+                    (ReadOnlySpan<byte> source, out SafeCFDataHandle decrypted, out SafeCFErrorHandle error) =>
+                        RsaDecryptRaw(privateKey, source, source.Length, out decrypted, out error));
+
+                byte[] depad = CryptoPool.Rent(padded.Length);
+                OperationStatus status = RsaPaddingProcessor.DepadPkcs1Encryption(padded, depad, out int written);
+                byte[]? ret = null;
+
+                if (status == OperationStatus.Done)
+                {
+                    ret = depad.AsSpan(0, written).ToArray();
+                }
+
+                // Clear the whole thing, especially on failure.
+                CryptoPool.Return(depad);
+                CryptographicOperations.ZeroMemory(padded);
+
+                if (ret is null)
+                {
+                    throw new CryptographicException(SR.Cryptography_InvalidPadding);
+                }
+
+                return ret;
+            }
+
+            Debug.Assert(padding.Mode == RSAEncryptionPaddingMode.Oaep);
+
             return ExecuteTransform(
                 data,
                 (ReadOnlySpan<byte> source, out SafeCFDataHandle decrypted, out SafeCFErrorHandle error) =>
                 {
-                    if (padding == RSAEncryptionPadding.Pkcs1)
-                    {
-                        return RsaDecryptPkcs(privateKey, source, source.Length, out decrypted, out error);
-                    }
-
-                    Debug.Assert(padding.Mode == RSAEncryptionPaddingMode.Oaep);
-
                     return RsaDecryptOaep(
                         privateKey,
                         source,
@@ -195,14 +219,63 @@ internal static bool TryRsaDecrypt(
             out int bytesWritten)
         {
             Debug.Assert(padding.Mode == RSAEncryptionPaddingMode.Pkcs1 || padding.Mode == RSAEncryptionPaddingMode.Oaep);
+
+            if (padding.Mode == RSAEncryptionPaddingMode.Pkcs1)
+            {
+                byte[] padded = CryptoPool.Rent(source.Length);
+                byte[] depad = CryptoPool.Rent(source.Length);
+
+                bool processed = TryExecuteTransform(
+                    source,
+                    padded,
+                    out int paddedLength,
+                    (ReadOnlySpan<byte> innerSource, out SafeCFDataHandle outputHandle, out SafeCFErrorHandle errorHandle) =>
+                        RsaDecryptRaw(privateKey, innerSource, innerSource.Length, out outputHandle, out errorHandle));
+
+                Debug.Assert(
+                    processed,
+                    "TryExecuteTransform should always return true for a large enough buffer.");
+
+                OperationStatus status = OperationStatus.InvalidData;
+                int depaddedLength = 0;
+
+                if (processed)
+                {
+                    status = RsaPaddingProcessor.DepadPkcs1Encryption(
+                        new ReadOnlySpan<byte>(padded, 0, paddedLength),
+                        depad,
+                        out depaddedLength);
+                }
+
+                CryptoPool.Return(padded);
+
+                if (status == OperationStatus.Done)
+                {
+                    if (depaddedLength <= destination.Length)
+                    {
+                        depad.AsSpan(0, depaddedLength).CopyTo(destination);
+                        CryptoPool.Return(depad);
+                        bytesWritten = depaddedLength;
+                        return true;
+                    }
+
+                    CryptoPool.Return(depad);
+                    bytesWritten = 0;
+                    return false;
+                }
+
+                CryptoPool.Return(depad);
+                Debug.Assert(status == OperationStatus.InvalidData);
+                throw new CryptographicException(SR.Cryptography_InvalidPadding);
+            }
+
             return TryExecuteTransform(
                 source,
                 destination,
                 out bytesWritten,
                 delegate (ReadOnlySpan<byte> innerSource, out SafeCFDataHandle outputHandle, out SafeCFErrorHandle errorHandle)
                 {
-                    return padding.Mode == RSAEncryptionPaddingMode.Pkcs1 ?
-                        RsaDecryptPkcs(privateKey, innerSource, innerSource.Length, out outputHandle, out errorHandle) :
+                    return
                         RsaDecryptOaep(privateKey, innerSource, innerSource.Length, PalAlgorithmFromAlgorithmName(padding.OaepHashAlgorithm), out outputHandle, out errorHandle);
                 });
         }
diff --git a/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.SecKeyRef.cs b/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.SecKeyRef.cs
index e7c08596a2c3..93a88661d25a 100644
--- a/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.SecKeyRef.cs
+++ b/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.SecKeyRef.cs
@@ -15,6 +15,8 @@ internal static partial class AppleCrypto
         private const int kSuccess = 1;
         private const int kErrorSeeError = -2;
         private const int kPlatformNotSupported = -5;
+        private const int kKeyIsSensitive = -6;
+        private const int kKeyIsNotExtractable = -7;
 
         internal enum PAL_KeyAlgorithm : uint
         {
@@ -125,8 +127,6 @@ internal static bool TrySecKeyCopyExternalRepresentation(
             SafeSecKeyRefHandle key,
             out byte[] externalRepresentation)
         {
-            const int errSecPassphraseRequired = -25260;
-
             int result = AppleCryptoNative_SecKeyCopyExternalRepresentation(
                 key,
                 out SafeCFDataHandle data,
@@ -140,12 +140,12 @@ internal static bool TrySecKeyCopyExternalRepresentation(
                     case kSuccess:
                         externalRepresentation = CoreFoundation.CFGetData(data);
                         return true;
+                    case kKeyIsSensitive:
+                        externalRepresentation = [];
+                        return false;
+                    case kKeyIsNotExtractable:
+                        throw new CryptographicException(SR.Cryptography_KeyNotExtractable);
                     case kErrorSeeError:
-                        if (Interop.CoreFoundation.GetErrorCode(errorHandle) == errSecPassphraseRequired)
-                        {
-                            externalRepresentation = Array.Empty<byte>();
-                            return false;
-                        }
                         throw CreateExceptionForCFError(errorHandle);
                     default:
                         Debug.Fail($"SecKeyCopyExternalRepresentation returned {result}");
diff --git a/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.SecKeyRef.macOS.cs b/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.SecKeyRef.macOS.cs
index e4bca886045c..74f4ccd2fafb 100644
--- a/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.SecKeyRef.macOS.cs
+++ b/src/libraries/Common/src/Interop/OSX/System.Security.Cryptography.Native.Apple/Interop.SecKeyRef.macOS.cs
@@ -3,6 +3,7 @@
 
 using System;
 using System.Diagnostics;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Security.Cryptography;
 using System.Security.Cryptography.Apple;
@@ -36,7 +37,7 @@ private static partial int AppleCryptoNative_SecKeyImportEphemeral(
 
         internal static SafeSecKeyRefHandle ImportEphemeralKey(ReadOnlySpan<byte> keyBlob, bool hasPrivateKey)
         {
-            Debug.Assert(keyBlob != null);
+            Debug.Assert(!Unsafe.IsNullRef(ref MemoryMarshal.GetReference(keyBlob)));
 
             SafeSecKeyRefHandle keyHandle;
             int osStatus;
diff --git a/src/libraries/Common/src/Interop/Unix/System.Security.Cryptography.Native/Interop.EVP.MacAlgs.cs b/src/libraries/Common/src/Interop/Unix/System.Security.Cryptography.Native/Interop.EVP.MacAlgs.cs
index e9a974433516..75e88e40aaa6 100644
--- a/src/libraries/Common/src/Interop/Unix/System.Security.Cryptography.Native/Interop.EVP.MacAlgs.cs
+++ b/src/libraries/Common/src/Interop/Unix/System.Security.Cryptography.Native/Interop.EVP.MacAlgs.cs
@@ -12,8 +12,19 @@ internal static partial class Crypto
     {
         internal static partial class EvpMacAlgs
         {
-            internal static SafeEvpMacHandle? Kmac128 { get; } = EvpMacFetch(HashAlgorithmNames.KMAC128);
-            internal static SafeEvpMacHandle? Kmac256 { get; } = EvpMacFetch(HashAlgorithmNames.KMAC256);
+            internal static SafeEvpMacHandle? Kmac128 { get; }
+            internal static SafeEvpMacHandle? Kmac256 { get; }
+
+            static EvpMacAlgs()
+            {
+                CryptoInitializer.Initialize();
+
+                // Do not use property initializers for these because we need to ensure CryptoInitializer.Initialize
+                // is called first. Property initializers happen before cctors, so instead set the property after the
+                // initializer is run.
+                Kmac128 = EvpMacFetch(HashAlgorithmNames.KMAC128);
+                Kmac256 = EvpMacFetch(HashAlgorithmNames.KMAC256);
+            }
 
             [LibraryImport(Libraries.CryptoNative, EntryPoint = "CryptoNative_EvpMacFetch", StringMarshalling = StringMarshalling.Utf8)]
             private static partial SafeEvpMacHandle CryptoNative_EvpMacFetch(string algorithm, out int haveFeature);
diff --git a/src/libraries/Common/src/Interop/Unix/System.Security.Cryptography.Native/Interop.OpenSsl.cs b/src/libraries/Common/src/Interop/Unix/System.Security.Cryptography.Native/Interop.OpenSsl.cs
index 3556698c613e..310981b194d0 100644
--- a/src/libraries/Common/src/Interop/Unix/System.Security.Cryptography.Native/Interop.OpenSsl.cs
+++ b/src/libraries/Common/src/Interop/Unix/System.Security.Cryptography.Native/Interop.OpenSsl.cs
@@ -25,8 +25,6 @@ internal static partial class OpenSsl
         private static readonly string? s_keyLogFile = Environment.GetEnvironmentVariable("SSLKEYLOGFILE");
         private static readonly FileStream? s_fileStream = s_keyLogFile != null ? File.Open(s_keyLogFile, FileMode.Append, FileAccess.Write, FileShare.ReadWrite) : null;
 #endif
-        private const string DisableTlsResumeCtxSwitch = "System.Net.Security.DisableTlsResume";
-        private const string DisableTlsResumeEnvironmentVariable = "DOTNET_SYSTEM_NET_SECURITY_DISABLETLSRESUME";
         private const string TlsCacheSizeCtxName = "System.Net.Security.TlsCacheSize";
         private const string TlsCacheSizeEnvironmentVariable = "DOTNET_SYSTEM_NET_SECURITY_TLSCACHESIZE";
         private const SslProtocols FakeAlpnSslProtocol = (SslProtocols)1;   // used to distinguish server sessions with ALPN
@@ -58,35 +56,6 @@ internal static partial class OpenSsl
 
         private static readonly int s_cacheSize = GetCacheSize();
 
-        private static volatile int s_disableTlsResume = -1;
-
-        private static bool DisableTlsResume
-        {
-            get
-            {
-                int disableTlsResume = s_disableTlsResume;
-                if (disableTlsResume != -1)
-                {
-                    return disableTlsResume != 0;
-                }
-
-                // First check for the AppContext switch, giving it priority over the environment variable.
-                if (AppContext.TryGetSwitch(DisableTlsResumeCtxSwitch, out bool value))
-                {
-                    s_disableTlsResume = value ? 1 : 0;
-                }
-                else
-                {
-                    // AppContext switch wasn't used. Check the environment variable.
-                    s_disableTlsResume =
-                        Environment.GetEnvironmentVariable(DisableTlsResumeEnvironmentVariable) is string envVar &&
-                        (envVar == "1" || envVar.Equals("true", StringComparison.OrdinalIgnoreCase)) ? 1 : 0;
-                }
-
-                return s_disableTlsResume != 0;
-            }
-        }
-
         private static int GetCacheSize()
         {
             string? value = AppContext.GetData(TlsCacheSizeCtxName) as string ?? Environment.GetEnvironmentVariable(TlsCacheSizeEnvironmentVariable);
@@ -298,7 +267,7 @@ internal static SafeSslHandle AllocateSslHandle(SslAuthenticationOptions sslAuth
             SafeSslContextHandle? newCtxHandle = null;
             SslProtocols protocols = CalculateEffectiveProtocols(sslAuthenticationOptions);
             bool hasAlpn = sslAuthenticationOptions.ApplicationProtocols != null && sslAuthenticationOptions.ApplicationProtocols.Count != 0;
-            bool cacheSslContext = sslAuthenticationOptions.AllowTlsResume && !DisableTlsResume && sslAuthenticationOptions.EncryptionPolicy == EncryptionPolicy.RequireEncryption && sslAuthenticationOptions.CipherSuitesPolicy == null;
+            bool cacheSslContext = sslAuthenticationOptions.AllowTlsResume && !SslStream.DisableTlsResume && sslAuthenticationOptions.EncryptionPolicy == EncryptionPolicy.RequireEncryption && sslAuthenticationOptions.CipherSuitesPolicy == null;
 
             if (cacheSslContext)
             {
@@ -692,9 +661,6 @@ private static unsafe int AlpnServerSelectCallback(IntPtr ssl, byte** outp, byte
                 return Ssl.SSL_TLSEXT_ERR_ALERT_FATAL;
             }
 
-            // reset application data to avoid dangling pointer.
-            Ssl.SslSetData(ssl, IntPtr.Zero);
-
             GCHandle protocolHandle = GCHandle.FromIntPtr(sslData);
             if (!(protocolHandle.Target is List<SslApplicationProtocol> protocolList))
             {
diff --git a/src/libraries/Common/src/Interop/Unix/System.Security.Cryptography.Native/Interop.Ssl.cs b/src/libraries/Common/src/Interop/Unix/System.Security.Cryptography.Native/Interop.Ssl.cs
index eade34f594a9..e1f2dfdc1f23 100644
--- a/src/libraries/Common/src/Interop/Unix/System.Security.Cryptography.Native/Interop.Ssl.cs
+++ b/src/libraries/Common/src/Interop/Unix/System.Security.Cryptography.Native/Interop.Ssl.cs
@@ -420,12 +420,6 @@ protected override void Dispose(bool disposing)
                 _writeBio?.Dispose();
             }
 
-            if (AlpnHandle.IsAllocated)
-            {
-                Interop.Ssl.SslSetData(handle, IntPtr.Zero);
-                AlpnHandle.Free();
-            }
-
             base.Dispose(disposing);
         }
 
@@ -438,6 +432,12 @@ protected override bool ReleaseHandle()
 
             SslContextHandle?.DangerousRelease();
 
+            if (AlpnHandle.IsAllocated)
+            {
+                Interop.Ssl.SslSetData(handle, IntPtr.Zero);
+                AlpnHandle.Free();
+            }
+
             IntPtr h = handle;
             SetHandle(IntPtr.Zero);
             Interop.Ssl.SslDestroy(h); // will free the handles underlying _readBio and _writeBio
diff --git a/src/libraries/Common/src/Interop/Windows/Advapi32/Interop.RegisterEventSource_IntPtr.cs b/src/libraries/Common/src/Interop/Windows/Advapi32/Interop.RegisterEventSource_IntPtr.cs
new file mode 100644
index 000000000000..66b58f06e7b5
--- /dev/null
+++ b/src/libraries/Common/src/Interop/Windows/Advapi32/Interop.RegisterEventSource_IntPtr.cs
@@ -0,0 +1,14 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.InteropServices;
+
+internal static partial class Interop
+{
+    internal static partial class Advapi32
+    {
+        [LibraryImport(Libraries.Advapi32, EntryPoint = "RegisterEventSourceW", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)]
+        internal static partial IntPtr RegisterEventSource(string lpUNCServerName, string lpSourceName);
+    }
+}
diff --git a/src/libraries/Common/src/Interop/Windows/Advapi32/Interop.ReportEvent_IntPtr.cs b/src/libraries/Common/src/Interop/Windows/Advapi32/Interop.ReportEvent_IntPtr.cs
new file mode 100644
index 000000000000..d605a21a4c92
--- /dev/null
+++ b/src/libraries/Common/src/Interop/Windows/Advapi32/Interop.ReportEvent_IntPtr.cs
@@ -0,0 +1,24 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.InteropServices;
+
+internal static partial class Interop
+{
+    internal static partial class Advapi32
+    {
+        [LibraryImport(Libraries.Advapi32, EntryPoint = "ReportEventW", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)]
+        [return: MarshalAs(UnmanagedType.Bool)]
+        public static partial bool ReportEvent(
+            IntPtr hEventLog,
+            short wType,
+            ushort wcategory,
+            uint dwEventID,
+            byte[] lpUserSid,
+            short wNumStrings,
+            int dwDataSize,
+            IntPtr lpStrings,
+            byte[] lpRawData);
+    }
+}
diff --git a/src/libraries/Common/src/Interop/Windows/Interop.Errors.cs b/src/libraries/Common/src/Interop/Windows/Interop.Errors.cs
index c1e4b476a056..a21197310f95 100644
--- a/src/libraries/Common/src/Interop/Windows/Interop.Errors.cs
+++ b/src/libraries/Common/src/Interop/Windows/Interop.Errors.cs
@@ -13,6 +13,7 @@ internal static partial class Errors
         internal const int ERROR_ACCESS_DENIED = 0x5;
         internal const int ERROR_INVALID_HANDLE = 0x6;
         internal const int ERROR_NOT_ENOUGH_MEMORY = 0x8;
+        internal const int ERROR_INVALID_ACCESS = 0xC;
         internal const int ERROR_INVALID_DATA = 0xD;
         internal const int ERROR_OUTOFMEMORY = 0xE;
         internal const int ERROR_INVALID_DRIVE = 0xF;
diff --git a/src/libraries/Common/src/Interop/Windows/Ole32/Interop.CoCreateGuid.cs b/src/libraries/Common/src/Interop/Windows/Ole32/Interop.CoCreateGuid.cs
index ad8e77784cf8..53478564603e 100644
--- a/src/libraries/Common/src/Interop/Windows/Ole32/Interop.CoCreateGuid.cs
+++ b/src/libraries/Common/src/Interop/Windows/Ole32/Interop.CoCreateGuid.cs
@@ -9,6 +9,6 @@ internal static partial class Interop
     internal static partial class Ole32
     {
         [LibraryImport(Libraries.Ole32)]
-        internal static partial int CoCreateGuid(out Guid guid);
+        internal static unsafe partial int CoCreateGuid(Guid* guid);
     }
 }
diff --git a/src/libraries/Common/src/Interop/Windows/SspiCli/ISSPIInterface.cs b/src/libraries/Common/src/Interop/Windows/SspiCli/ISSPIInterface.cs
index ccaca072c6bf..f504e175cfdc 100644
--- a/src/libraries/Common/src/Interop/Windows/SspiCli/ISSPIInterface.cs
+++ b/src/libraries/Common/src/Interop/Windows/SspiCli/ISSPIInterface.cs
@@ -22,6 +22,7 @@ internal interface ISSPIInterface
 
         int QueryContextChannelBinding(SafeDeleteContext phContext, Interop.SspiCli.ContextAttribute attribute, out SafeFreeContextBufferChannelBinding refHandle);
         int QueryContextAttributes(SafeDeleteContext phContext, Interop.SspiCli.ContextAttribute attribute, Span<byte> buffer, Type? handleType, out SafeHandle? refHandle);
+        unsafe int QueryContextAttributes(SafeDeleteContext phContext, Interop.SspiCli.ContextAttribute attribute, IntPtr* refHandle);
         int QuerySecurityContextToken(SafeDeleteContext phContext, out SecurityContextTokenHandle phToken);
         int CompleteAuthToken(ref SafeDeleteSslContext? refContext, in InputSecurityBuffer inputBuffer);
         int ApplyControlToken(ref SafeDeleteSslContext? refContext, in SecurityBuffer inputBuffer);
diff --git a/src/libraries/Common/src/Interop/Windows/SspiCli/SSPIAuthType.cs b/src/libraries/Common/src/Interop/Windows/SspiCli/SSPIAuthType.cs
index 387dbef10abc..2f24afa114b0 100644
--- a/src/libraries/Common/src/Interop/Windows/SspiCli/SSPIAuthType.cs
+++ b/src/libraries/Common/src/Interop/Windows/SspiCli/SSPIAuthType.cs
@@ -106,6 +106,11 @@ public int QueryContextChannelBinding(SafeDeleteContext context, Interop.SspiCli
             throw new NotSupportedException();
         }
 
+        public unsafe int QueryContextAttributes(SafeDeleteContext context, Interop.SspiCli.ContextAttribute attribute, IntPtr* refHandle)
+        {
+            return SafeFreeContextBuffer.QueryContextAttributes(context, attribute, refHandle);
+        }
+
         public unsafe int QueryContextAttributes(SafeDeleteContext context, Interop.SspiCli.ContextAttribute attribute, Span<byte> buffer, Type? handleType, out SafeHandle? refHandle)
         {
             refHandle = null;
@@ -115,10 +120,6 @@ public unsafe int QueryContextAttributes(SafeDeleteContext context, Interop.Sspi
                 {
                     refHandle = SafeFreeContextBuffer.CreateEmptyHandle();
                 }
-                else if (handleType == typeof(SafeFreeCertContext))
-                {
-                    refHandle = new SafeFreeCertContext();
-                }
                 else
                 {
                     throw new ArgumentException(SR.Format(SR.SSPIInvalidHandleType, handleType.FullName), nameof(handleType));
diff --git a/src/libraries/Common/src/Interop/Windows/SspiCli/SSPISecureChannelType.cs b/src/libraries/Common/src/Interop/Windows/SspiCli/SSPISecureChannelType.cs
index 3ca74bf7dfe1..90e40a5dc055 100644
--- a/src/libraries/Common/src/Interop/Windows/SspiCli/SSPISecureChannelType.cs
+++ b/src/libraries/Common/src/Interop/Windows/SspiCli/SSPISecureChannelType.cs
@@ -108,6 +108,11 @@ public unsafe int QueryContextChannelBinding(SafeDeleteContext phContext, Intero
             return SafeFreeContextBufferChannelBinding.QueryContextChannelBinding(phContext, attribute, &bindings, refHandle);
         }
 
+        public unsafe int QueryContextAttributes(SafeDeleteContext phContext, Interop.SspiCli.ContextAttribute attribute, IntPtr* refHandle)
+        {
+            return SafeFreeContextBuffer.QueryContextAttributes(phContext, attribute, refHandle);
+        }
+
         public unsafe int QueryContextAttributes(SafeDeleteContext phContext, Interop.SspiCli.ContextAttribute attribute, Span<byte> buffer, Type? handleType, out SafeHandle? refHandle)
         {
             refHandle = null;
diff --git a/src/libraries/Common/src/Interop/Windows/SspiCli/SSPIWrapper.cs b/src/libraries/Common/src/Interop/Windows/SspiCli/SSPIWrapper.cs
index 014e357bc23e..b41a8b3ce20e 100644
--- a/src/libraries/Common/src/Interop/Windows/SspiCli/SSPIWrapper.cs
+++ b/src/libraries/Common/src/Interop/Windows/SspiCli/SSPIWrapper.cs
@@ -270,29 +270,42 @@ public static bool QueryBlittableContextAttributes<T>(ISSPIInterface secModule,
             }
         }
 
-        private static bool QueryCertContextAttribute(ISSPIInterface secModule, SafeDeleteContext securityContext, Interop.SspiCli.ContextAttribute attribute, out SafeFreeCertContext? certContext)
+        private static unsafe bool QueryCertContextAttribute(ISSPIInterface secModule, SafeDeleteContext securityContext, Interop.SspiCli.ContextAttribute attribute, out SafeFreeCertContext? certContext)
         {
-            Span<IntPtr> buffer = stackalloc IntPtr[1];
-            int errorCode = secModule.QueryContextAttributes(
-                securityContext,
-                attribute,
-                MemoryMarshal.AsBytes(buffer),
-                typeof(SafeFreeCertContext),
-                out SafeHandle? sspiHandle);
+            IntPtr handle = IntPtr.Zero;
+            certContext = null;
 
-            // certificate is not always present (e.g. on server when querying client certificate)
-            // but we still want to consider such case as a success.
-            bool success = errorCode == 0 || errorCode == (int)Interop.SECURITY_STATUS.NoCredentials;
+            try
+            {
+                int errorCode = secModule.QueryContextAttributes(
+                    securityContext,
+                    attribute,
+                    &handle);
+
+                // certificate is not always present (e.g. on server when querying client certificate)
+                // but we still want to consider such case as a success.
+                bool success = errorCode == 0 || errorCode == (int)Interop.SECURITY_STATUS.NoCredentials;
 
-            if (!success)
+                if (errorCode == 0 && handle != IntPtr.Zero)
+                {
+                    certContext = new SafeFreeCertContext();
+                    certContext.Set(handle);
+                    // Handle was successfully transferred to SafeHandle
+                    handle = IntPtr.Zero;
+                }
+                if (!success)
+                {
+                    if (NetEventSource.Log.IsEnabled()) NetEventSource.Error(null, $"ERROR = {ErrorDescription(errorCode)}");
+                }
+                return success;
+            }
+            finally
             {
-                sspiHandle?.Dispose();
-                sspiHandle = null;
-                if (NetEventSource.Log.IsEnabled()) NetEventSource.Error(null, $"ERROR = {ErrorDescription(errorCode)}");
+                if (handle != IntPtr.Zero)
+                {
+                    Interop.Crypt32.CertFreeCertificateContext(handle);
+                }
             }
-
-            certContext = sspiHandle as SafeFreeCertContext;
-            return success;
         }
 
         public static bool QueryContextAttributes_SECPKG_ATTR_REMOTE_CERT_CONTEXT(ISSPIInterface secModule, SafeDeleteContext securityContext, out SafeFreeCertContext? certContext)
diff --git a/src/libraries/Common/src/Interop/Windows/SspiCli/SecuritySafeHandles.cs b/src/libraries/Common/src/Interop/Windows/SspiCli/SecuritySafeHandles.cs
index 4b0e0f4f689c..589bc56352bb 100644
--- a/src/libraries/Common/src/Interop/Windows/SspiCli/SecuritySafeHandles.cs
+++ b/src/libraries/Common/src/Interop/Windows/SspiCli/SecuritySafeHandles.cs
@@ -65,6 +65,23 @@ internal static SafeFreeContextBuffer CreateEmptyHandle()
             return new SafeFreeContextBuffer_SECURITY();
         }
 
+        public static unsafe int QueryContextAttributes(SafeDeleteContext phContext, Interop.SspiCli.ContextAttribute contextAttribute, IntPtr* handle)
+        {
+            bool mustRelease = false;
+            try
+            {
+                phContext.DangerousAddRef(ref mustRelease);
+                return Interop.SspiCli.QueryContextAttributesW(ref phContext._handle, contextAttribute, handle);
+            }
+            finally
+            {
+                if (mustRelease)
+                {
+                    phContext.DangerousRelease();
+                }
+            }
+        }
+
         //
         // After PInvoke call the method will fix the refHandle.handle with the returned value.
         // The caller is responsible for creating a correct SafeHandle template or null can be passed if no handle is returned.
@@ -98,7 +115,7 @@ public static unsafe int QueryContextAttributes(SafeDeleteContext phContext, Int
                 }
                 else
                 {
-                    ((SafeFreeCertContext)refHandle).Set(*(IntPtr*)buffer);
+                    Debug.Assert(false);
                 }
             }
 
diff --git a/src/libraries/Common/src/Microsoft/Win32/SafeHandles/SafeUnicodeStringHandle.cs b/src/libraries/Common/src/Microsoft/Win32/SafeHandles/SafeUnicodeStringHandle.cs
index e58bfd19695e..14d47033190c 100644
--- a/src/libraries/Common/src/Microsoft/Win32/SafeHandles/SafeUnicodeStringHandle.cs
+++ b/src/libraries/Common/src/Microsoft/Win32/SafeHandles/SafeUnicodeStringHandle.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 
 namespace Microsoft.Win32.SafeHandles
@@ -23,12 +24,12 @@ public SafeUnicodeStringHandle(string s)
         public unsafe SafeUnicodeStringHandle(ReadOnlySpan<char> s)
             : base(IntPtr.Zero, ownsHandle: true)
         {
-            // If s == default then the span represents the null string,
+            // If s contains a null ref then the span represents the null string,
             // and handle should be IntPtr.Zero to match Marshal.StringToHGlobalUni.
             //
             // Since that was already done in the base ctor call, we only need to do
-            // work when s != default.
-            if (s != default)
+            // work when s does not contain a null ref.
+            if (!Unsafe.IsNullRef(ref MemoryMarshal.GetReference(s)))
             {
                 int cch = checked(s.Length + 1);
                 int cb = checked(cch * sizeof(char));
diff --git a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/HPackEncoder.cs b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/HPackEncoder.cs
index 5c0bbecfcc7d..dab588146dc2 100644
--- a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/HPackEncoder.cs
+++ b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/HPackEncoder.cs
@@ -285,12 +285,7 @@ private static bool EncodeLiteralHeaderNewNameCore(byte mask, string name, strin
         }
 
         /// <summary>Encodes a "Literal Header Field without Indexing - New Name".</summary>
-        public static bool EncodeLiteralHeaderFieldWithoutIndexingNewName(string name, ReadOnlySpan<string> values, string separator, Span<byte> destination, out int bytesWritten)
-        {
-            return EncodeLiteralHeaderFieldWithoutIndexingNewName(name, values, separator, valueEncoding: null, destination, out bytesWritten);
-        }
-
-        public static bool EncodeLiteralHeaderFieldWithoutIndexingNewName(string name, ReadOnlySpan<string> values, string separator, Encoding? valueEncoding, Span<byte> destination, out int bytesWritten)
+        public static bool EncodeLiteralHeaderFieldWithoutIndexingNewName(string name, ReadOnlySpan<string> values, byte[] separator, Encoding? valueEncoding, Span<byte> destination, out int bytesWritten)
         {
             // From https://tools.ietf.org/html/rfc7541#section-6.2.2
             // ------------------------------------------------------
@@ -515,12 +510,7 @@ public static bool EncodeDynamicTableSizeUpdate(int value, Span<byte> destinatio
             return false;
         }
 
-        public static bool EncodeStringLiterals(ReadOnlySpan<string> values, string? separator, Span<byte> destination, out int bytesWritten)
-        {
-            return EncodeStringLiterals(values, separator, valueEncoding: null, destination, out bytesWritten);
-        }
-
-        public static bool EncodeStringLiterals(ReadOnlySpan<string> values, string? separator, Encoding? valueEncoding, Span<byte> destination, out int bytesWritten)
+        public static bool EncodeStringLiterals(ReadOnlySpan<string> values, byte[]? separator, Encoding? valueEncoding, Span<byte> destination, out int bytesWritten)
         {
             bytesWritten = 0;
 
@@ -536,23 +526,22 @@ public static bool EncodeStringLiterals(ReadOnlySpan<string> values, string? sep
             if (destination.Length != 0)
             {
                 Debug.Assert(separator != null);
-                int valueLength;
+                Debug.Assert(Ascii.IsValid(separator));
+                int valueLength = checked((values.Length - 1) * separator.Length);
 
-                // Calculate length of all parts and separators.
+                // Calculate length of all values.
                 if (valueEncoding is null || ReferenceEquals(valueEncoding, Encoding.Latin1))
                 {
-                    valueLength = checked((int)(values.Length - 1) * separator.Length);
                     foreach (string part in values)
                     {
-                        valueLength = checked((int)(valueLength + part.Length));
+                        valueLength = checked(valueLength + part.Length);
                     }
                 }
                 else
                 {
-                    valueLength = checked((int)(values.Length - 1) * valueEncoding.GetByteCount(separator));
                     foreach (string part in values)
                     {
-                        valueLength = checked((int)(valueLength + valueEncoding.GetByteCount(part)));
+                        valueLength = checked(valueLength + valueEncoding.GetByteCount(part));
                     }
                 }
 
@@ -571,7 +560,7 @@ public static bool EncodeStringLiterals(ReadOnlySpan<string> values, string? sep
 
                             for (int i = 1; i < values.Length; i++)
                             {
-                                EncodeValueStringPart(separator, destination);
+                                separator.CopyTo(destination);
                                 destination = destination.Slice(separator.Length);
 
                                 value = values[i];
@@ -586,8 +575,8 @@ public static bool EncodeStringLiterals(ReadOnlySpan<string> values, string? sep
 
                             for (int i = 1; i < values.Length; i++)
                             {
-                                written = valueEncoding.GetBytes(separator, destination);
-                                destination = destination.Slice(written);
+                                separator.CopyTo(destination);
+                                destination = destination.Slice(separator.Length);
 
                                 written = valueEncoding.GetBytes(values[i], destination);
                                 destination = destination.Slice(written);
diff --git a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/Huffman.cs b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/Huffman.cs
index b23b7e63529d..980bfbc35799 100644
--- a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/Huffman.cs
+++ b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/Huffman.cs
@@ -677,7 +677,7 @@ public static int Decode(ReadOnlySpan<byte> src, ref byte[] dstArray)
             // see comments in GenerateDecodingLookupTree() describing decoding table
 
             Span<byte> dst = dstArray;
-            Debug.Assert(dst != null && dst.Length > 0);
+            Debug.Assert(dst.Length > 0);
 
             ushort[] decodingTree = s_decodingTree;
 
diff --git a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http3/QPack/QPackEncoder.cs b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http3/QPack/QPackEncoder.cs
index aa951b249719..5d96530b457d 100644
--- a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http3/QPack/QPackEncoder.cs
+++ b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http3/QPack/QPackEncoder.cs
@@ -144,14 +144,9 @@ public static bool EncodeLiteralHeaderFieldWithoutNameReference(string name, str
         /// <summary>
         /// Encodes a Literal Header Field Without Name Reference, building the value by concatenating a collection of strings with separators.
         /// </summary>
-        public static bool EncodeLiteralHeaderFieldWithoutNameReference(string name, ReadOnlySpan<string> values, string valueSeparator, Span<byte> destination, out int bytesWritten)
+        public static bool EncodeLiteralHeaderFieldWithoutNameReference(string name, ReadOnlySpan<string> values, byte[] separator, Encoding? valueEncoding, Span<byte> destination, out int bytesWritten)
         {
-            return EncodeLiteralHeaderFieldWithoutNameReference(name, values, valueSeparator, valueEncoding: null, destination, out bytesWritten);
-        }
-
-        public static bool EncodeLiteralHeaderFieldWithoutNameReference(string name, ReadOnlySpan<string> values, string valueSeparator, Encoding? valueEncoding, Span<byte> destination, out int bytesWritten)
-        {
-            if (EncodeNameString(name, destination, out int nameLength) && EncodeValueString(values, valueSeparator, valueEncoding, destination.Slice(nameLength), out int valueLength))
+            if (EncodeNameString(name, destination, out int nameLength) && EncodeValueString(values, separator, valueEncoding, destination.Slice(nameLength), out int valueLength))
             {
                 bytesWritten = nameLength + valueLength;
                 return true;
@@ -222,12 +217,7 @@ private static bool EncodeValueString(string s, Encoding? valueEncoding, Span<by
         /// <summary>
         /// Encodes a value by concatenating a collection of strings, separated by a separator string.
         /// </summary>
-        public static bool EncodeValueString(ReadOnlySpan<string> values, string? separator, Span<byte> buffer, out int length)
-        {
-            return EncodeValueString(values, separator, valueEncoding: null, buffer, out length);
-        }
-
-        public static bool EncodeValueString(ReadOnlySpan<string> values, string? separator, Encoding? valueEncoding, Span<byte> buffer, out int length)
+        public static bool EncodeValueString(ReadOnlySpan<string> values, byte[]? separator, Encoding? valueEncoding, Span<byte> buffer, out int length)
         {
             if (values.Length == 1)
             {
@@ -243,10 +233,11 @@ public static bool EncodeValueString(ReadOnlySpan<string> values, string? separa
             if (buffer.Length > 0)
             {
                 Debug.Assert(separator != null);
-                int valueLength;
+                Debug.Assert(Ascii.IsValid(separator));
+                int valueLength = separator.Length * (values.Length - 1);
+
                 if (valueEncoding is null || ReferenceEquals(valueEncoding, Encoding.Latin1))
                 {
-                    valueLength = separator.Length * (values.Length - 1);
                     foreach (string part in values)
                     {
                         valueLength += part.Length;
@@ -254,7 +245,6 @@ public static bool EncodeValueString(ReadOnlySpan<string> values, string? separa
                 }
                 else
                 {
-                    valueLength = valueEncoding.GetByteCount(separator) * (values.Length - 1);
                     foreach (string part in values)
                     {
                         valueLength += valueEncoding.GetByteCount(part);
@@ -275,7 +265,7 @@ public static bool EncodeValueString(ReadOnlySpan<string> values, string? separa
 
                             for (int i = 1; i < values.Length; i++)
                             {
-                                EncodeValueStringPart(separator, buffer);
+                                separator.CopyTo(buffer);
                                 buffer = buffer.Slice(separator.Length);
 
                                 value = values[i];
@@ -290,8 +280,8 @@ public static bool EncodeValueString(ReadOnlySpan<string> values, string? separa
 
                             for (int i = 1; i < values.Length; i++)
                             {
-                                written = valueEncoding.GetBytes(separator, buffer);
-                                buffer = buffer.Slice(written);
+                                separator.CopyTo(buffer);
+                                buffer = buffer.Slice(separator.Length);
 
                                 written = valueEncoding.GetBytes(values[i], buffer);
                                 buffer = buffer.Slice(written);
diff --git a/src/libraries/Common/src/System/Net/Security/CertificateValidation.OSX.cs b/src/libraries/Common/src/System/Net/Security/CertificateValidation.OSX.cs
index aee4b77b5083..b269a0fb70fa 100644
--- a/src/libraries/Common/src/System/Net/Security/CertificateValidation.OSX.cs
+++ b/src/libraries/Common/src/System/Net/Security/CertificateValidation.OSX.cs
@@ -14,7 +14,7 @@ internal static class CertificateValidation
         private static readonly IdnMapping s_idnMapping = new IdnMapping();
 
         // WARNING: This function will do the verification using OpenSSL. If the intention is to use OS function, caller should use CertificatePal interface.
-        internal static SslPolicyErrors BuildChainAndVerifyProperties(X509Chain chain, X509Certificate2 remoteCertificate, bool checkCertName, bool _ /*isServer*/, string? hostName, IntPtr certificateBuffer, int bufferLength = 0)
+        internal static SslPolicyErrors BuildChainAndVerifyProperties(X509Chain chain, X509Certificate2 remoteCertificate, bool checkCertName, bool _ /*isServer*/, string? hostName, Span<byte> certificateBuffer)
         {
             SslPolicyErrors errors = chain.Build(remoteCertificate) ?
                 SslPolicyErrors.None :
@@ -31,15 +31,24 @@ internal static SslPolicyErrors BuildChainAndVerifyProperties(X509Chain chain, X
             }
 
             SafeX509Handle certHandle;
-            if (certificateBuffer != IntPtr.Zero && bufferLength > 0)
+            unsafe
             {
-                certHandle = Interop.Crypto.DecodeX509(certificateBuffer, bufferLength);
-            }
-            else
-            {
-                // We dont't have DER encoded buffer.
-                byte[] der = remoteCertificate.Export(X509ContentType.Cert);
-                certHandle = Interop.Crypto.DecodeX509(Marshal.UnsafeAddrOfPinnedArrayElement(der, 0), der.Length);
+                if (certificateBuffer.Length > 0)
+                {
+                    fixed (byte* pCert = certificateBuffer)
+                    {
+                        certHandle = Interop.Crypto.DecodeX509((IntPtr)pCert, certificateBuffer.Length);
+                    }
+                }
+                else
+                {
+                    // We dont't have DER encoded buffer.
+                    byte[] der = remoteCertificate.Export(X509ContentType.Cert);
+                    fixed (byte* pDer = der)
+                    {
+                        certHandle = Interop.Crypto.DecodeX509((IntPtr)pDer, der.Length);
+                    }
+                }
             }
 
             int hostNameMatch;
diff --git a/src/libraries/Common/src/System/Net/Security/CertificateValidation.Unix.cs b/src/libraries/Common/src/System/Net/Security/CertificateValidation.Unix.cs
index 65a1adb492fa..da3cb38a8682 100644
--- a/src/libraries/Common/src/System/Net/Security/CertificateValidation.Unix.cs
+++ b/src/libraries/Common/src/System/Net/Security/CertificateValidation.Unix.cs
@@ -13,7 +13,7 @@ internal static class CertificateValidation
         private static readonly IdnMapping s_idnMapping = new IdnMapping();
 
 #pragma warning disable IDE0060
-        internal static SslPolicyErrors BuildChainAndVerifyProperties(X509Chain chain, X509Certificate2 remoteCertificate, bool checkCertName, bool isServer, string? hostName, IntPtr certificateBuffer, int bufferLength)
+        internal static SslPolicyErrors BuildChainAndVerifyProperties(X509Chain chain, X509Certificate2 remoteCertificate, bool checkCertName, bool isServer, string? hostName, Span<byte> certificateBuffer)
             => BuildChainAndVerifyProperties(chain, remoteCertificate, checkCertName, isServer, hostName);
 #pragma warning restore IDE0060
 
diff --git a/src/libraries/Common/src/System/Net/Security/CertificateValidation.Windows.cs b/src/libraries/Common/src/System/Net/Security/CertificateValidation.Windows.cs
index d068015e534c..90be80c734cc 100644
--- a/src/libraries/Common/src/System/Net/Security/CertificateValidation.Windows.cs
+++ b/src/libraries/Common/src/System/Net/Security/CertificateValidation.Windows.cs
@@ -14,7 +14,7 @@ namespace System.Net
     internal static partial class CertificateValidation
     {
 #pragma warning disable IDE0060
-        internal static SslPolicyErrors BuildChainAndVerifyProperties(X509Chain chain, X509Certificate2 remoteCertificate, bool checkCertName, bool isServer, string? hostName, IntPtr certificateBuffer, int bufferLength)
+        internal static SslPolicyErrors BuildChainAndVerifyProperties(X509Chain chain, X509Certificate2 remoteCertificate, bool checkCertName, bool isServer, string? hostName, Span<byte> certificateBuffer)
             => BuildChainAndVerifyProperties(chain, remoteCertificate, checkCertName, isServer, hostName);
 #pragma warning restore IDE0060
 
diff --git a/src/libraries/Common/src/System/Number.Formatting.Common.cs b/src/libraries/Common/src/System/Number.Formatting.Common.cs
new file mode 100644
index 000000000000..4caa59472eae
--- /dev/null
+++ b/src/libraries/Common/src/System/Number.Formatting.Common.cs
@@ -0,0 +1,1191 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Globalization;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace System
+{
+    internal static partial class Number
+    {
+        private const int DefaultPrecisionExponentialFormat = 6;
+
+        private const int MaxUInt32DecDigits = 10;
+        private const string PosNumberFormat = "#";
+
+        private static readonly string[] s_posCurrencyFormats =
+        {
+            "$#", "#$", "$ #", "# $"
+        };
+
+        private static readonly string[] s_negCurrencyFormats =
+        {
+            "($#)", "-$#", "$-#", "$#-",
+            "(#$)", "-#$", "#-$", "#$-",
+            "-# $", "-$ #", "# $-", "$ #-",
+            "$ -#", "#- $", "($ #)", "(# $)",
+            "$- #"
+        };
+
+        private static readonly string[] s_posPercentFormats =
+        {
+            "# %", "#%", "%#", "% #"
+        };
+
+        private static readonly string[] s_negPercentFormats =
+        {
+            "-# %", "-#%", "-%#",
+            "%-#", "%#-",
+            "#-%", "#%-",
+            "-% #", "# %-", "% #-",
+            "% -#", "#- %"
+        };
+
+        private static readonly string[] s_negNumberFormats =
+        {
+            "(#)", "-#", "- #", "#-", "# -",
+        };
+
+        internal static unsafe char ParseFormatSpecifier(ReadOnlySpan<char> format, out int digits)
+        {
+            char c = default;
+            if (format.Length > 0)
+            {
+                // If the format begins with a symbol, see if it's a standard format
+                // with or without a specified number of digits.
+                c = format[0];
+                if (char.IsAsciiLetter(c))
+                {
+                    // Fast path for sole symbol, e.g. "D"
+                    if (format.Length == 1)
+                    {
+                        digits = -1;
+                        return c;
+                    }
+
+                    if (format.Length == 2)
+                    {
+                        // Fast path for symbol and single digit, e.g. "X4"
+                        int d = format[1] - '0';
+                        if ((uint)d < 10)
+                        {
+                            digits = d;
+                            return c;
+                        }
+                    }
+                    else if (format.Length == 3)
+                    {
+                        // Fast path for symbol and double digit, e.g. "F12"
+                        int d1 = format[1] - '0', d2 = format[2] - '0';
+                        if ((uint)d1 < 10 && (uint)d2 < 10)
+                        {
+                            digits = d1 * 10 + d2;
+                            return c;
+                        }
+                    }
+
+                    // Fallback for symbol and any length digits.  The digits value must be >= 0 && <= 999_999_999,
+                    // but it can begin with any number of 0s, and thus we may need to check more than 9
+                    // digits.  Further, for compat, we need to stop when we hit a null char.
+                    int n = 0;
+                    int i = 1;
+                    while ((uint)i < (uint)format.Length && char.IsAsciiDigit(format[i]))
+                    {
+                        // Check if we are about to overflow past our limit of 9 digits
+                        if (n >= 100_000_000)
+                        {
+                            ThrowHelper.ThrowFormatException_BadFormatSpecifier();
+                        }
+                        n = (n * 10) + format[i++] - '0';
+                    }
+
+                    // If we're at the end of the digits rather than having stopped because we hit something
+                    // other than a digit or overflowed, return the standard format info.
+                    if ((uint)i >= (uint)format.Length || format[i] == '\0')
+                    {
+                        digits = n;
+                        return c;
+                    }
+                }
+            }
+
+            // Default empty format to be "G"; custom format is signified with '\0'.
+            digits = -1;
+            return format.Length == 0 || c == '\0' ? // For compat, treat '\0' as the end of the specifier, even if the specifier extends beyond it.
+                'G' :
+                '\0';
+        }
+
+#if !SYSTEM_PRIVATE_CORELIB
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static unsafe TChar* UInt32ToDecChars<TChar>(TChar* bufferEnd, uint value, int digits) where TChar : unmanaged, IUtfChar<TChar>
+        {
+            // TODO: Consider to bring optimized implementation from CoreLib
+
+            while (value != 0 || digits > 0)
+            {
+                digits--;
+                (value, uint remainder) = Math.DivRem(value, 10);
+                *(--bufferEnd) = TChar.CastFrom(remainder + '0');
+            }
+
+            return bufferEnd;
+        }
+#endif
+
+        internal static unsafe void NumberToString<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, char format, int nMaxDigits, NumberFormatInfo info) where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(sizeof(TChar) == sizeof(char) || sizeof(TChar) == sizeof(byte));
+
+            number.CheckConsistency();
+            bool isCorrectlyRounded = (number.Kind == NumberBufferKind.FloatingPoint);
+
+            switch (format)
+            {
+                case 'C':
+                case 'c':
+                    {
+                        if (nMaxDigits < 0)
+                        {
+                            nMaxDigits = info.CurrencyDecimalDigits;
+                        }
+
+                        RoundNumber(ref number, number.Scale + nMaxDigits, isCorrectlyRounded); // Don't change this line to use digPos since digCount could have its sign changed.
+
+                        FormatCurrency(ref vlb, ref number, nMaxDigits, info);
+
+                        break;
+                    }
+
+                case 'F':
+                case 'f':
+                    {
+                        if (nMaxDigits < 0)
+                        {
+                            nMaxDigits = info.NumberDecimalDigits;
+                        }
+
+                        RoundNumber(ref number, number.Scale + nMaxDigits, isCorrectlyRounded);
+
+                        if (number.IsNegative)
+                        {
+                            vlb.Append(info.NegativeSignTChar<TChar>());
+                        }
+
+                        FormatFixed(ref vlb, ref number, nMaxDigits, null, info.NumberDecimalSeparatorTChar<TChar>(), null);
+
+                        break;
+                    }
+
+                case 'N':
+                case 'n':
+                    {
+                        if (nMaxDigits < 0)
+                        {
+                            nMaxDigits = info.NumberDecimalDigits; // Since we are using digits in our calculation
+                        }
+
+                        RoundNumber(ref number, number.Scale + nMaxDigits, isCorrectlyRounded);
+
+                        FormatNumber(ref vlb, ref number, nMaxDigits, info);
+
+                        break;
+                    }
+
+                case 'E':
+                case 'e':
+                    {
+                        if (nMaxDigits < 0)
+                        {
+                            nMaxDigits = DefaultPrecisionExponentialFormat;
+                        }
+                        nMaxDigits++;
+
+                        RoundNumber(ref number, nMaxDigits, isCorrectlyRounded);
+
+                        if (number.IsNegative)
+                        {
+                            vlb.Append(info.NegativeSignTChar<TChar>());
+                        }
+
+                        FormatScientific(ref vlb, ref number, nMaxDigits, info, format);
+
+                        break;
+                    }
+
+                case 'G':
+                case 'g':
+                    {
+                        bool noRounding = false;
+                        if (nMaxDigits < 1)
+                        {
+                            if ((number.Kind == NumberBufferKind.Decimal) && (nMaxDigits == -1))
+                            {
+                                noRounding = true;  // Turn off rounding for ECMA compliance to output trailing 0's after decimal as significant
+
+                                if (number.Digits[0] == 0)
+                                {
+                                    // -0 should be formatted as 0 for decimal. This is normally handled by RoundNumber (which we are skipping)
+                                    goto SkipSign;
+                                }
+
+                                goto SkipRounding;
+                            }
+                            else
+                            {
+                                // This ensures that the PAL code pads out to the correct place even when we use the default precision
+                                nMaxDigits = number.DigitsCount;
+                            }
+                        }
+
+                        RoundNumber(ref number, nMaxDigits, isCorrectlyRounded);
+
+                    SkipRounding:
+                        if (number.IsNegative)
+                        {
+                            vlb.Append(info.NegativeSignTChar<TChar>());
+                        }
+
+                    SkipSign:
+                        FormatGeneral(ref vlb, ref number, nMaxDigits, info, (char)(format - ('G' - 'E')), noRounding);
+
+                        break;
+                    }
+
+                case 'P':
+                case 'p':
+                    {
+                        if (nMaxDigits < 0)
+                        {
+                            nMaxDigits = info.PercentDecimalDigits;
+                        }
+                        number.Scale += 2;
+
+                        RoundNumber(ref number, number.Scale + nMaxDigits, isCorrectlyRounded);
+
+                        FormatPercent(ref vlb, ref number, nMaxDigits, info);
+
+                        break;
+                    }
+
+                case 'R':
+                case 'r':
+                    {
+                        format = (char)(format - ('R' - 'G'));
+                        Debug.Assert(format is 'G' or 'g');
+                        goto case 'G';
+                    }
+
+                default:
+                    ThrowHelper.ThrowFormatException_BadFormatSpecifier();
+                    break;
+            }
+        }
+
+        internal static unsafe void NumberToStringFormat<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, ReadOnlySpan<char> format, NumberFormatInfo info) where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(sizeof(TChar) == sizeof(char) || sizeof(TChar) == sizeof(byte));
+
+            number.CheckConsistency();
+
+            int digitCount;
+            int decimalPos;
+            int firstDigit;
+            int lastDigit;
+            int digPos;
+            bool scientific;
+            int thousandPos;
+            int thousandCount = 0;
+            bool thousandSeps;
+            int scaleAdjust;
+            int adjust;
+
+            int section;
+            int src;
+            byte* dig = number.DigitsPtr;
+            char ch;
+
+            section = FindSection(format, dig[0] == 0 ? 2 : number.IsNegative ? 1 : 0);
+
+            while (true)
+            {
+                digitCount = 0;
+                decimalPos = -1;
+                firstDigit = 0x7FFFFFFF;
+                lastDigit = 0;
+                scientific = false;
+                thousandPos = -1;
+                thousandSeps = false;
+                scaleAdjust = 0;
+                src = section;
+
+                fixed (char* pFormat = &MemoryMarshal.GetReference(format))
+                {
+                    while (src < format.Length && (ch = pFormat[src++]) != 0 && ch != ';')
+                    {
+                        switch (ch)
+                        {
+                            case '#':
+                                digitCount++;
+                                break;
+
+                            case '0':
+                                if (firstDigit == 0x7FFFFFFF)
+                                {
+                                    firstDigit = digitCount;
+                                }
+                                digitCount++;
+                                lastDigit = digitCount;
+                                break;
+
+                            case '.':
+                                if (decimalPos < 0)
+                                {
+                                    decimalPos = digitCount;
+                                }
+                                break;
+
+                            case ',':
+                                if (digitCount > 0 && decimalPos < 0)
+                                {
+                                    if (thousandPos >= 0)
+                                    {
+                                        if (thousandPos == digitCount)
+                                        {
+                                            thousandCount++;
+                                            break;
+                                        }
+                                        thousandSeps = true;
+                                    }
+                                    thousandPos = digitCount;
+                                    thousandCount = 1;
+                                }
+                                break;
+
+                            case '%':
+                                scaleAdjust += 2;
+                                break;
+
+                            case '\x2030':
+                                scaleAdjust += 3;
+                                break;
+
+                            case '\'':
+                            case '"':
+                                while (src < format.Length && pFormat[src] != 0 && pFormat[src++] != ch) ;
+                                break;
+
+                            case '\\':
+                                if (src < format.Length && pFormat[src] != 0)
+                                {
+                                    src++;
+                                }
+                                break;
+
+                            case 'E':
+                            case 'e':
+                                if ((src < format.Length && pFormat[src] == '0') ||
+                                    (src + 1 < format.Length && (pFormat[src] == '+' || pFormat[src] == '-') && pFormat[src + 1] == '0'))
+                                {
+                                    while (++src < format.Length && pFormat[src] == '0') ;
+                                    scientific = true;
+                                }
+                                break;
+                        }
+                    }
+                }
+
+                if (decimalPos < 0)
+                {
+                    decimalPos = digitCount;
+                }
+
+                if (thousandPos >= 0)
+                {
+                    if (thousandPos == decimalPos)
+                    {
+                        scaleAdjust -= thousandCount * 3;
+                    }
+                    else
+                    {
+                        thousandSeps = true;
+                    }
+                }
+
+                if (dig[0] != 0)
+                {
+                    number.Scale += scaleAdjust;
+                    int pos = scientific ? digitCount : number.Scale + digitCount - decimalPos;
+                    RoundNumber(ref number, pos, isCorrectlyRounded: false);
+                    if (dig[0] == 0)
+                    {
+                        src = FindSection(format, 2);
+                        if (src != section)
+                        {
+                            section = src;
+                            continue;
+                        }
+                    }
+                }
+                else
+                {
+                    if (number.Kind != NumberBufferKind.FloatingPoint)
+                    {
+                        // The integer types don't have a concept of -0 and decimal always format -0 as 0
+                        number.IsNegative = false;
+                    }
+                    number.Scale = 0;      // Decimals with scale ('0.00') should be rounded.
+                }
+
+                break;
+            }
+
+            firstDigit = firstDigit < decimalPos ? decimalPos - firstDigit : 0;
+            lastDigit = lastDigit > decimalPos ? decimalPos - lastDigit : 0;
+            if (scientific)
+            {
+                digPos = decimalPos;
+                adjust = 0;
+            }
+            else
+            {
+                digPos = number.Scale > decimalPos ? number.Scale : decimalPos;
+                adjust = number.Scale - decimalPos;
+            }
+            src = section;
+
+            // Adjust can be negative, so we make this an int instead of an unsigned int.
+            // Adjust represents the number of characters over the formatting e.g. format string is "0000" and you are trying to
+            // format 100000 (6 digits). Means adjust will be 2. On the other hand if you are trying to format 10 adjust will be
+            // -2 and we'll need to fixup these digits with 0 padding if we have 0 formatting as in this example.
+            Span<int> thousandsSepPos = stackalloc int[4];
+            int thousandsSepCtr = -1;
+
+            if (thousandSeps)
+            {
+                // We need to precompute this outside the number formatting loop
+                if (info.NumberGroupSeparator.Length > 0)
+                {
+                    // We need this array to figure out where to insert the thousands separator. We would have to traverse the string
+                    // backwards. PIC formatting always traverses forwards. These indices are precomputed to tell us where to insert
+                    // the thousands separator so we can get away with traversing forwards. Note we only have to compute up to digPos.
+                    // The max is not bound since you can have formatting strings of the form "000,000..", and this
+                    // should handle that case too.
+
+                    int[] groupDigits = info.NumberGroupSizes();
+
+                    int groupSizeIndex = 0;     // Index into the groupDigits array.
+                    int groupTotalSizeCount = 0;
+                    int groupSizeLen = groupDigits.Length;    // The length of groupDigits array.
+                    if (groupSizeLen != 0)
+                    {
+                        groupTotalSizeCount = groupDigits[groupSizeIndex];   // The current running total of group size.
+                    }
+                    int groupSize = groupTotalSizeCount;
+
+                    int totalDigits = digPos + ((adjust < 0) ? adjust : 0); // Actual number of digits in o/p
+                    int numDigits = (firstDigit > totalDigits) ? firstDigit : totalDigits;
+                    while (numDigits > groupTotalSizeCount)
+                    {
+                        if (groupSize == 0)
+                        {
+                            break;
+                        }
+
+                        ++thousandsSepCtr;
+                        if (thousandsSepCtr >= thousandsSepPos.Length)
+                        {
+                            var newThousandsSepPos = new int[thousandsSepPos.Length * 2];
+                            thousandsSepPos.CopyTo(newThousandsSepPos);
+                            thousandsSepPos = newThousandsSepPos;
+                        }
+
+                        thousandsSepPos[thousandsSepCtr] = groupTotalSizeCount;
+                        if (groupSizeIndex < groupSizeLen - 1)
+                        {
+                            groupSizeIndex++;
+                            groupSize = groupDigits[groupSizeIndex];
+                        }
+                        groupTotalSizeCount += groupSize;
+                    }
+                }
+            }
+
+            if (number.IsNegative && (section == 0) && (number.Scale != 0))
+            {
+                vlb.Append(info.NegativeSignTChar<TChar>());
+            }
+
+            bool decimalWritten = false;
+
+            fixed (char* pFormat = &MemoryMarshal.GetReference(format))
+            {
+                byte* cur = dig;
+
+                while (src < format.Length && (ch = pFormat[src++]) != 0 && ch != ';')
+                {
+                    if (adjust > 0)
+                    {
+                        switch (ch)
+                        {
+                            case '#':
+                            case '0':
+                            case '.':
+                                while (adjust > 0)
+                                {
+                                    // digPos will be one greater than thousandsSepPos[thousandsSepCtr] since we are at
+                                    // the character after which the groupSeparator needs to be appended.
+                                    vlb.Append(TChar.CastFrom(*cur != 0 ? (char)(*cur++) : '0'));
+                                    if (thousandSeps && digPos > 1 && thousandsSepCtr >= 0)
+                                    {
+                                        if (digPos == thousandsSepPos[thousandsSepCtr] + 1)
+                                        {
+                                            vlb.Append(info.NumberGroupSeparatorTChar<TChar>());
+                                            thousandsSepCtr--;
+                                        }
+                                    }
+                                    digPos--;
+                                    adjust--;
+                                }
+                                break;
+                        }
+                    }
+
+                    switch (ch)
+                    {
+                        case '#':
+                        case '0':
+                            {
+                                if (adjust < 0)
+                                {
+                                    adjust++;
+                                    ch = digPos <= firstDigit ? '0' : '\0';
+                                }
+                                else
+                                {
+                                    ch = *cur != 0 ? (char)(*cur++) : digPos > lastDigit ? '0' : '\0';
+                                }
+
+                                if (ch != 0)
+                                {
+                                    vlb.Append(TChar.CastFrom(ch));
+                                    if (thousandSeps && digPos > 1 && thousandsSepCtr >= 0)
+                                    {
+                                        if (digPos == thousandsSepPos[thousandsSepCtr] + 1)
+                                        {
+                                            vlb.Append(info.NumberGroupSeparatorTChar<TChar>());
+                                            thousandsSepCtr--;
+                                        }
+                                    }
+                                }
+
+                                digPos--;
+                                break;
+                            }
+
+                        case '.':
+                            {
+                                if (digPos != 0 || decimalWritten)
+                                {
+                                    // For compatibility, don't echo repeated decimals
+                                    break;
+                                }
+
+                                // If the format has trailing zeros or the format has a decimal and digits remain
+                                if (lastDigit < 0 || (decimalPos < digitCount && *cur != 0))
+                                {
+                                    vlb.Append(info.NumberDecimalSeparatorTChar<TChar>());
+                                    decimalWritten = true;
+                                }
+                                break;
+                            }
+
+                        case '\x2030':
+                            vlb.Append(info.PerMilleSymbolTChar<TChar>());
+                            break;
+
+                        case '%':
+                            vlb.Append(info.PercentSymbolTChar<TChar>());
+                            break;
+
+                        case ',':
+                            break;
+
+                        case '\'':
+                        case '"':
+                            while (src < format.Length && pFormat[src] != 0 && pFormat[src] != ch)
+                            {
+                                AppendUnknownChar(ref vlb, pFormat[src++]);
+                            }
+
+                            if (src < format.Length && pFormat[src] != 0)
+                            {
+                                src++;
+                            }
+                            break;
+
+                        case '\\':
+                            if (src < format.Length && pFormat[src] != 0)
+                            {
+                                AppendUnknownChar(ref vlb, pFormat[src++]);
+                            }
+                            break;
+
+                        case 'E':
+                        case 'e':
+                            {
+                                bool positiveSign = false;
+                                int i = 0;
+                                if (scientific)
+                                {
+                                    if (src < format.Length && pFormat[src] == '0')
+                                    {
+                                        // Handles E0, which should format the same as E-0
+                                        i++;
+                                    }
+                                    else if (src + 1 < format.Length && pFormat[src] == '+' && pFormat[src + 1] == '0')
+                                    {
+                                        // Handles E+0
+                                        positiveSign = true;
+                                    }
+                                    else if (src + 1 < format.Length && pFormat[src] == '-' && pFormat[src + 1] == '0')
+                                    {
+                                        // Handles E-0
+                                        // Do nothing, this is just a place holder s.t. we don't break out of the loop.
+                                    }
+                                    else
+                                    {
+                                        vlb.Append(TChar.CastFrom(ch));
+                                        break;
+                                    }
+
+                                    while (++src < format.Length && pFormat[src] == '0')
+                                    {
+                                        i++;
+                                    }
+
+                                    if (i > 10)
+                                    {
+                                        i = 10;
+                                    }
+
+                                    int exp = dig[0] == 0 ? 0 : number.Scale - decimalPos;
+                                    FormatExponent(ref vlb, info, exp, ch, i, positiveSign);
+                                    scientific = false;
+                                }
+                                else
+                                {
+                                    vlb.Append(TChar.CastFrom(ch));
+                                    if (src < format.Length)
+                                    {
+                                        if (pFormat[src] == '+' || pFormat[src] == '-')
+                                        {
+                                            AppendUnknownChar(ref vlb, pFormat[src++]);
+                                        }
+
+                                        while (src < format.Length && pFormat[src] == '0')
+                                        {
+                                            AppendUnknownChar(ref vlb, pFormat[src++]);
+                                        }
+                                    }
+                                }
+                                break;
+                            }
+
+                        default:
+                            AppendUnknownChar(ref vlb, ch);
+                            break;
+                    }
+                }
+            }
+
+            if (number.IsNegative && (section == 0) && (number.Scale == 0) && (vlb.Length > 0))
+            {
+                vlb.Insert(0, info.NegativeSignTChar<TChar>());
+            }
+        }
+
+        private static unsafe void FormatCurrency<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, int nMaxDigits, NumberFormatInfo info) where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(sizeof(TChar) == sizeof(char) || sizeof(TChar) == sizeof(byte));
+
+            string fmt = number.IsNegative ?
+                s_negCurrencyFormats[info.CurrencyNegativePattern] :
+                s_posCurrencyFormats[info.CurrencyPositivePattern];
+
+            foreach (char ch in fmt)
+            {
+                switch (ch)
+                {
+                    case '#':
+                        FormatFixed(ref vlb, ref number, nMaxDigits, info.CurrencyGroupSizes(), info.CurrencyDecimalSeparatorTChar<TChar>(), info.CurrencyGroupSeparatorTChar<TChar>());
+                        break;
+
+                    case '-':
+                        vlb.Append(info.NegativeSignTChar<TChar>());
+                        break;
+
+                    case '$':
+                        vlb.Append(info.CurrencySymbolTChar<TChar>());
+                        break;
+
+                    default:
+                        vlb.Append(TChar.CastFrom(ch));
+                        break;
+                }
+            }
+        }
+
+        private static unsafe void FormatFixed<TChar>(
+            ref ValueListBuilder<TChar> vlb, ref NumberBuffer number,
+            int nMaxDigits, int[]? groupDigits,
+            ReadOnlySpan<TChar> sDecimal, ReadOnlySpan<TChar> sGroup) where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(sizeof(TChar) == sizeof(char) || sizeof(TChar) == sizeof(byte));
+
+            int digPos = number.Scale;
+            byte* dig = number.DigitsPtr;
+
+            if (digPos > 0)
+            {
+                if (groupDigits != null)
+                {
+                    int groupSizeIndex = 0;                             // Index into the groupDigits array.
+                    int bufferSize = digPos;                            // The length of the result buffer string.
+                    int groupSize = 0;                                  // The current group size.
+
+                    // Find out the size of the string buffer for the result.
+                    if (groupDigits.Length != 0) // You can pass in 0 length arrays
+                    {
+                        int groupSizeCount = groupDigits[groupSizeIndex];   // The current total of group size.
+
+                        while (digPos > groupSizeCount)
+                        {
+                            groupSize = groupDigits[groupSizeIndex];
+                            if (groupSize == 0)
+                            {
+                                break;
+                            }
+
+                            bufferSize += sGroup.Length;
+                            if (groupSizeIndex < groupDigits.Length - 1)
+                            {
+                                groupSizeIndex++;
+                            }
+
+                            groupSizeCount += groupDigits[groupSizeIndex];
+                            ArgumentOutOfRangeException.ThrowIfNegative(groupSizeCount | bufferSize, string.Empty); // If we overflow
+                        }
+
+                        groupSize = groupSizeCount == 0 ? 0 : groupDigits[0]; // If you passed in an array with one entry as 0, groupSizeCount == 0
+                    }
+
+                    groupSizeIndex = 0;
+                    int digitCount = 0;
+                    int digLength = number.DigitsCount;
+                    int digStart = (digPos < digLength) ? digPos : digLength;
+                    fixed (TChar* spanPtr = &MemoryMarshal.GetReference(vlb.AppendSpan(bufferSize)))
+                    {
+                        TChar* p = spanPtr + bufferSize - 1;
+                        for (int i = digPos - 1; i >= 0; i--)
+                        {
+                            *(p--) = TChar.CastFrom((i < digStart) ? (char)dig[i] : '0');
+
+                            if (groupSize > 0)
+                            {
+                                digitCount++;
+                                if ((digitCount == groupSize) && (i != 0))
+                                {
+                                    for (int j = sGroup.Length - 1; j >= 0; j--)
+                                    {
+                                        *(p--) = sGroup[j];
+                                    }
+
+                                    if (groupSizeIndex < groupDigits.Length - 1)
+                                    {
+                                        groupSizeIndex++;
+                                        groupSize = groupDigits[groupSizeIndex];
+                                    }
+                                    digitCount = 0;
+                                }
+                            }
+                        }
+
+                        Debug.Assert(p >= spanPtr - 1, "Underflow");
+                        dig += digStart;
+                    }
+                }
+                else
+                {
+                    do
+                    {
+                        vlb.Append(TChar.CastFrom(*dig != 0 ? (char)(*dig++) : '0'));
+                    }
+                    while (--digPos > 0);
+                }
+            }
+            else
+            {
+                vlb.Append(TChar.CastFrom('0'));
+            }
+
+            if (nMaxDigits > 0)
+            {
+                vlb.Append(sDecimal);
+                if ((digPos < 0) && (nMaxDigits > 0))
+                {
+                    int zeroes = Math.Min(-digPos, nMaxDigits);
+                    for (int i = 0; i < zeroes; i++)
+                    {
+                        vlb.Append(TChar.CastFrom('0'));
+                    }
+                    digPos += zeroes;
+                    nMaxDigits -= zeroes;
+                }
+
+                while (nMaxDigits > 0)
+                {
+                    vlb.Append(TChar.CastFrom((*dig != 0) ? (char)(*dig++) : '0'));
+                    nMaxDigits--;
+                }
+            }
+        }
+
+        /// <summary>Appends a char to the builder when the char is not known to be ASCII.</summary>
+        /// <remarks>This requires a helper as if the character isn't ASCII, for UTF-8 encoding it will result in multiple bytes added.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static unsafe void AppendUnknownChar<TChar>(ref ValueListBuilder<TChar> vlb, char ch) where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(sizeof(TChar) == sizeof(char) || sizeof(TChar) == sizeof(byte));
+
+            if (sizeof(TChar) == sizeof(char) || char.IsAscii(ch))
+            {
+                vlb.Append(TChar.CastFrom(ch));
+            }
+            else
+            {
+                AppendNonAsciiBytes(ref vlb, ch);
+            }
+
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static void AppendNonAsciiBytes(ref ValueListBuilder<TChar> vlb, char ch)
+            {
+                var r = new Rune(ch);
+                r.EncodeToUtf8(MemoryMarshal.AsBytes(vlb.AppendSpan(r.Utf8SequenceLength)));
+            }
+        }
+
+        private static unsafe void FormatNumber<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, int nMaxDigits, NumberFormatInfo info) where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(sizeof(TChar) == sizeof(char) || sizeof(TChar) == sizeof(byte));
+
+            string fmt = number.IsNegative ?
+                s_negNumberFormats[info.NumberNegativePattern] :
+                PosNumberFormat;
+
+            foreach (char ch in fmt)
+            {
+                switch (ch)
+                {
+                    case '#':
+                        FormatFixed(ref vlb, ref number, nMaxDigits, info.NumberGroupSizes(), info.NumberDecimalSeparatorTChar<TChar>(), info.NumberGroupSeparatorTChar<TChar>());
+                        break;
+
+                    case '-':
+                        vlb.Append(info.NegativeSignTChar<TChar>());
+                        break;
+
+                    default:
+                        vlb.Append(TChar.CastFrom(ch));
+                        break;
+                }
+            }
+        }
+
+        private static unsafe void FormatScientific<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, int nMaxDigits, NumberFormatInfo info, char expChar) where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(sizeof(TChar) == sizeof(char) || sizeof(TChar) == sizeof(byte));
+
+            byte* dig = number.DigitsPtr;
+
+            vlb.Append(TChar.CastFrom((*dig != 0) ? (char)(*dig++) : '0'));
+
+            if (nMaxDigits != 1) // For E0 we would like to suppress the decimal point
+            {
+                vlb.Append(info.NumberDecimalSeparatorTChar<TChar>());
+            }
+
+            while (--nMaxDigits > 0)
+            {
+                vlb.Append(TChar.CastFrom((*dig != 0) ? (char)(*dig++) : '0'));
+            }
+
+            int e = number.Digits[0] == 0 ? 0 : number.Scale - 1;
+            FormatExponent(ref vlb, info, e, expChar, 3, true);
+        }
+
+        private static unsafe void FormatExponent<TChar>(ref ValueListBuilder<TChar> vlb, NumberFormatInfo info, int value, char expChar, int minDigits, bool positiveSign) where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(sizeof(TChar) == sizeof(char) || sizeof(TChar) == sizeof(byte));
+
+            vlb.Append(TChar.CastFrom(expChar));
+
+            if (value < 0)
+            {
+                vlb.Append(info.NegativeSignTChar<TChar>());
+                value = -value;
+            }
+            else
+            {
+                if (positiveSign)
+                {
+                    vlb.Append(info.PositiveSignTChar<TChar>());
+                }
+            }
+
+            TChar* digits = stackalloc TChar[MaxUInt32DecDigits];
+            TChar* p = UInt32ToDecChars(digits + MaxUInt32DecDigits, (uint)value, minDigits);
+            vlb.Append(new ReadOnlySpan<TChar>(p, (int)(digits + MaxUInt32DecDigits - p)));
+        }
+
+        private static unsafe void FormatGeneral<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, int nMaxDigits, NumberFormatInfo info, char expChar, bool suppressScientific) where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(sizeof(TChar) == sizeof(char) || sizeof(TChar) == sizeof(byte));
+
+            int digPos = number.Scale;
+            bool scientific = false;
+
+            if (!suppressScientific)
+            {
+                // Don't switch to scientific notation
+                if (digPos > nMaxDigits || digPos < -3)
+                {
+                    digPos = 1;
+                    scientific = true;
+                }
+            }
+
+            byte* dig = number.DigitsPtr;
+
+            if (digPos > 0)
+            {
+                do
+                {
+                    vlb.Append(TChar.CastFrom((*dig != 0) ? (char)(*dig++) : '0'));
+                }
+                while (--digPos > 0);
+            }
+            else
+            {
+                vlb.Append(TChar.CastFrom('0'));
+            }
+
+            if (*dig != 0 || digPos < 0)
+            {
+                vlb.Append(info.NumberDecimalSeparatorTChar<TChar>());
+
+                while (digPos < 0)
+                {
+                    vlb.Append(TChar.CastFrom('0'));
+                    digPos++;
+                }
+
+                while (*dig != 0)
+                {
+                    vlb.Append(TChar.CastFrom(*dig++));
+                }
+            }
+
+            if (scientific)
+            {
+                FormatExponent(ref vlb, info, number.Scale - 1, expChar, 2, true);
+            }
+        }
+
+        private static unsafe void FormatPercent<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, int nMaxDigits, NumberFormatInfo info) where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(sizeof(TChar) == sizeof(char) || sizeof(TChar) == sizeof(byte));
+
+            string fmt = number.IsNegative ?
+                s_negPercentFormats[info.PercentNegativePattern] :
+                s_posPercentFormats[info.PercentPositivePattern];
+
+            foreach (char ch in fmt)
+            {
+                switch (ch)
+                {
+                    case '#':
+                        FormatFixed(ref vlb, ref number, nMaxDigits, info.PercentGroupSizes(), info.PercentDecimalSeparatorTChar<TChar>(), info.PercentGroupSeparatorTChar<TChar>());
+                        break;
+
+                    case '-':
+                        vlb.Append(info.NegativeSignTChar<TChar>());
+                        break;
+
+                    case '%':
+                        vlb.Append(info.PercentSymbolTChar<TChar>());
+                        break;
+
+                    default:
+                        vlb.Append(TChar.CastFrom(ch));
+                        break;
+                }
+            }
+        }
+
+        internal static unsafe void RoundNumber(ref NumberBuffer number, int pos, bool isCorrectlyRounded)
+        {
+            byte* dig = number.DigitsPtr;
+
+            int i = 0;
+            while (i < pos && dig[i] != '\0')
+            {
+                i++;
+            }
+
+            if ((i == pos) && ShouldRoundUp(dig, i, number.Kind, isCorrectlyRounded))
+            {
+                while (i > 0 && dig[i - 1] == '9')
+                {
+                    i--;
+                }
+
+                if (i > 0)
+                {
+                    dig[i - 1]++;
+                }
+                else
+                {
+                    number.Scale++;
+                    dig[0] = (byte)('1');
+                    i = 1;
+                }
+            }
+            else
+            {
+                while (i > 0 && dig[i - 1] == '0')
+                {
+                    i--;
+                }
+            }
+
+            if (i == 0)
+            {
+                if (number.Kind != NumberBufferKind.FloatingPoint)
+                {
+                    // The integer types don't have a concept of -0 and decimal always format -0 as 0
+                    number.IsNegative = false;
+                }
+                number.Scale = 0;      // Decimals with scale ('0.00') should be rounded.
+            }
+
+            dig[i] = (byte)('\0');
+            number.DigitsCount = i;
+            number.CheckConsistency();
+
+            static bool ShouldRoundUp(byte* dig, int i, NumberBufferKind numberKind, bool isCorrectlyRounded)
+            {
+                // We only want to round up if the digit is greater than or equal to 5 and we are
+                // not rounding a floating-point number. If we are rounding a floating-point number
+                // we have one of two cases.
+                //
+                // In the case of a standard numeric-format specifier, the exact and correctly rounded
+                // string will have been produced. In this scenario, pos will have pointed to the
+                // terminating null for the buffer and so this will return false.
+                //
+                // However, in the case of a custom numeric-format specifier, we currently fall back
+                // to generating Single/DoublePrecisionCustomFormat digits and then rely on this
+                // function to round correctly instead. This can unfortunately lead to double-rounding
+                // bugs but is the best we have right now due to back-compat concerns.
+
+                byte digit = dig[i];
+
+                if ((digit == '\0') || isCorrectlyRounded)
+                {
+                    // Fast path for the common case with no rounding
+                    return false;
+                }
+
+                // Values greater than or equal to 5 should round up, otherwise we round down. The IEEE
+                // 754 spec actually dictates that ties (exactly 5) should round to the nearest even number
+                // but that can have undesired behavior for custom numeric format strings. This probably
+                // needs further thought for .NET 5 so that we can be spec compliant and so that users
+                // can get the desired rounding behavior for their needs.
+
+                return digit >= '5';
+            }
+        }
+
+        private static unsafe int FindSection(ReadOnlySpan<char> format, int section)
+        {
+            int src;
+            char ch;
+
+            if (section == 0)
+            {
+                return 0;
+            }
+
+            fixed (char* pFormat = &MemoryMarshal.GetReference(format))
+            {
+                src = 0;
+                while (true)
+                {
+                    if (src >= format.Length)
+                    {
+                        return 0;
+                    }
+
+                    switch (ch = pFormat[src++])
+                    {
+                        case '\'':
+                        case '"':
+                            while (src < format.Length && pFormat[src] != 0 && pFormat[src++] != ch) ;
+                            break;
+
+                        case '\\':
+                            if (src < format.Length && pFormat[src] != 0)
+                            {
+                                src++;
+                            }
+                            break;
+
+                        case ';':
+                            if (--section != 0)
+                            {
+                                break;
+                            }
+
+                            if (src < format.Length && pFormat[src] != 0 && pFormat[src] != ';')
+                            {
+                                return src;
+                            }
+                            goto case '\0';
+
+                        case '\0':
+                            return 0;
+                    }
+                }
+            }
+        }
+
+#if SYSTEM_PRIVATE_CORELIB
+        private static int[] NumberGroupSizes(this NumberFormatInfo info) => info._numberGroupSizes;
+
+        private static int[] CurrencyGroupSizes(this NumberFormatInfo info) => info._currencyGroupSizes;
+
+        private static int[] PercentGroupSizes(this NumberFormatInfo info) => info._percentGroupSizes;
+#else
+
+        private static int[] NumberGroupSizes(this NumberFormatInfo info) => info.NumberGroupSizes;
+
+        private static int[] CurrencyGroupSizes(this NumberFormatInfo info) => info.CurrencyGroupSizes;
+
+        private static int[] PercentGroupSizes(this NumberFormatInfo info) => info.PercentGroupSizes;
+#endif
+    }
+}
diff --git a/src/libraries/Common/src/System/Number.NumberBuffer.cs b/src/libraries/Common/src/System/Number.NumberBuffer.cs
index 5b4fc7a7564e..f877d3b72989 100644
--- a/src/libraries/Common/src/System/Number.NumberBuffer.cs
+++ b/src/libraries/Common/src/System/Number.NumberBuffer.cs
@@ -17,7 +17,7 @@ internal static partial class Number
         internal const int Int64NumberBufferLength = 19 + 1;    // 19 for the longest input: 9,223,372,036,854,775,807
         internal const int Int128NumberBufferLength = 39 + 1;    // 39 for the longest input: 170,141,183,460,469,231,731,687,303,715,884,105,727
         internal const int SingleNumberBufferLength = 112 + 1 + 1;  // 112 for the longest input + 1 for rounding: 1.40129846E-45
-        internal const int HalfNumberBufferLength = 21; // 19 for the longest input + 1 for rounding (+1 for the null terminator)
+        internal const int HalfNumberBufferLength = 21 + 1 + 1; // 21 for the longest input + 1 for rounding: 0.000122010707855224609375
         internal const int UInt32NumberBufferLength = 10 + 1;   // 10 for the longest input: 4,294,967,295
         internal const int UInt64NumberBufferLength = 20 + 1;   // 20 for the longest input: 18,446,744,073,709,551,615
         internal const int UInt128NumberBufferLength = 39 + 1; // 39 for the longest input: 340,282,366,920,938,463,463,374,607,431,768,211,455
@@ -29,7 +29,9 @@ internal unsafe ref struct NumberBuffer
             public bool IsNegative;
             public bool HasNonZeroTail;
             public NumberBufferKind Kind;
-            public Span<byte> Digits;
+            public byte* DigitsPtr;
+            public int DigitsLength;
+            public readonly Span<byte> Digits => new Span<byte>(DigitsPtr, DigitsLength);
 
             public NumberBuffer(NumberBufferKind kind, byte* digits, int digitsLength) : this(kind, new Span<byte>(digits, digitsLength))
             {
@@ -48,7 +50,8 @@ public NumberBuffer(NumberBufferKind kind, Span<byte> digits)
                 IsNegative = false;
                 HasNonZeroTail = false;
                 Kind = kind;
-                Digits = digits;
+                DigitsPtr = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(digits)); // Safe since memory must be fixed
+                DigitsLength = digits.Length;
 #if DEBUG
                 Digits.Fill(0xCC);
 #endif
@@ -83,13 +86,6 @@ public void CheckConsistency()
             }
 #pragma warning restore CA1822
 
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public byte* GetDigitsPointer()
-            {
-                // This is safe to do since we are a ref struct
-                return (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(Digits));
-            }
-
             //
             // Code coverage note: This only exists so that Number displays nicely in the VS watch window. So yes, I know it works.
             //
diff --git a/src/libraries/Common/src/System/Number.Parsing.Common.cs b/src/libraries/Common/src/System/Number.Parsing.Common.cs
index 16e9f777f346..e43cbe14c292 100644
--- a/src/libraries/Common/src/System/Number.Parsing.Common.cs
+++ b/src/libraries/Common/src/System/Number.Parsing.Common.cs
@@ -336,7 +336,7 @@ internal enum ParsingStatus
         private static unsafe TChar* MatchChars<TChar>(TChar* p, TChar* pEnd, ReadOnlySpan<TChar> value)
             where TChar : unmanaged, IUtfChar<TChar>
         {
-            Debug.Assert((p != null) && (pEnd != null) && (p <= pEnd) && (value != null));
+            Debug.Assert((p != null) && (pEnd != null) && (p <= pEnd));
 
             fixed (TChar* stringPointer = &MemoryMarshal.GetReference(value))
             {
diff --git a/src/libraries/Common/src/System/Obsoletions.cs b/src/libraries/Common/src/System/Obsoletions.cs
index 74b968ad2b7f..677ffd66b34b 100644
--- a/src/libraries/Common/src/System/Obsoletions.cs
+++ b/src/libraries/Common/src/System/Obsoletions.cs
@@ -171,5 +171,8 @@ internal static class Obsoletions
 
         internal const string AesGcmTagConstructorMessage = "AesGcm should indicate the required tag size for encryption and decryption. Use a constructor that accepts the tag size.";
         internal const string AesGcmTagConstructorDiagId = "SYSLIB0053";
+
+        internal const string ThreadVolatileReadWriteMessage = "Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.";
+        internal const string ThreadVolatileReadWriteDiagId = "SYSLIB0054";
     }
 }
diff --git a/src/libraries/Common/src/System/Resources/ResourceWriter.cs b/src/libraries/Common/src/System/Resources/ResourceWriter.cs
index ab3052773cb2..1f93de277639 100644
--- a/src/libraries/Common/src/System/Resources/ResourceWriter.cs
+++ b/src/libraries/Common/src/System/Resources/ResourceWriter.cs
@@ -488,7 +488,11 @@ private static ResourceTypeCode FindTypeCode(object? value, List<string> types)
                 if (typeName.StartsWith("ResourceTypeCode.", StringComparison.Ordinal))
                 {
                     typeName = typeName.Substring(17);  // Remove through '.'
+#if NETCOREAPP
+                    ResourceTypeCode typeCode = Enum.Parse<ResourceTypeCode>(typeName);
+#else
                     ResourceTypeCode typeCode = (ResourceTypeCode)Enum.Parse(typeof(ResourceTypeCode), typeName);
+#endif
                     return typeCode;
                 }
             }
diff --git a/src/libraries/Common/src/System/Runtime/InteropServices/BuiltInVariantExtensions.cs b/src/libraries/Common/src/System/Runtime/InteropServices/BuiltInVariantExtensions.cs
index 777a1c03b0a7..daedc25c5045 100644
--- a/src/libraries/Common/src/System/Runtime/InteropServices/BuiltInVariantExtensions.cs
+++ b/src/libraries/Common/src/System/Runtime/InteropServices/BuiltInVariantExtensions.cs
@@ -21,7 +21,7 @@ private static unsafe ref T GetByRefDataRef<T>(this ref ComVariant variant)
             return ref Unsafe.AsRef<T>((void*)variant.GetRawDataRef<nint>());
         }
 
-        public static unsafe void CopyFromIndirect(this ref ComVariant variant, object value)
+        public static unsafe void CopyFromIndirect(this ref ComVariant variant, object? value)
         {
             VarEnum vt = (VarEnum)(((int)variant.VarType) & ~((int)VarEnum.VT_BYREF));
 
@@ -154,9 +154,8 @@ public static unsafe void CopyFromIndirect(this ref ComVariant variant, object v
                 VarEnum.VT_DECIMAL => variant.As<decimal>(),
                 VarEnum.VT_CY => decimal.FromOACurrency(variant.GetRawDataRef<long>()),
                 VarEnum.VT_DATE => variant.As<DateTime>(),
-                VarEnum.VT_BSTR => Marshal.PtrToStringBSTR(variant.GetRawDataRef<nint>()),
-                VarEnum.VT_UNKNOWN => Marshal.GetObjectForIUnknown(variant.GetRawDataRef<nint>()),
-                VarEnum.VT_DISPATCH => Marshal.GetObjectForIUnknown(variant.GetRawDataRef<nint>()),
+                VarEnum.VT_BSTR => variant.GetRawDataRef<nint>() is 0 ? null : Marshal.PtrToStringBSTR(variant.GetRawDataRef<nint>()),
+                VarEnum.VT_UNKNOWN or VarEnum.VT_DISPATCH => variant.GetRawDataRef<nint>() is 0 ? null : Marshal.GetObjectForIUnknown(variant.GetRawDataRef<nint>()),
                 _ => GetObjectFromNativeVariant(ref variant),
             };
         }
diff --git a/src/libraries/Common/src/System/Security/Cryptography/Pkcs12Kdf.cs b/src/libraries/Common/src/System/Security/Cryptography/Pkcs12Kdf.cs
index 8e482b931c76..1fa1d0ee0339 100644
--- a/src/libraries/Common/src/System/Security/Cryptography/Pkcs12Kdf.cs
+++ b/src/libraries/Common/src/System/Security/Cryptography/Pkcs12Kdf.cs
@@ -3,6 +3,8 @@
 
 using System.Collections.Generic;
 using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 using System.Text;
 
 namespace System.Security.Cryptography.Pkcs
@@ -115,9 +117,15 @@ private static void Derive(
             // The password is a null-terminated UTF-16BE version of the input.
             int passLen = checked((password.Length + 1) * 2);
 
-            // If password == default then the span represents the null string (as opposed to
+            // If password contains a null ref then the span represents the null string (as opposed to
             // an empty string), and the P block should then have size 0 in the next step.
+#if NETSTANDARD
+#pragma warning disable CA2265 // Do not compare Span<T> to 'default'
             if (password == default)
+#pragma warning restore CA2265
+#else
+            if (Unsafe.IsNullRef(ref MemoryMarshal.GetReference(password)))
+#endif
             {
                 passLen = 0;
             }
diff --git a/src/libraries/Common/src/System/Security/Cryptography/RsaPaddingProcessor.cs b/src/libraries/Common/src/System/Security/Cryptography/RsaPaddingProcessor.cs
index 94b142e20ffe..efe542d61cf1 100644
--- a/src/libraries/Common/src/System/Security/Cryptography/RsaPaddingProcessor.cs
+++ b/src/libraries/Common/src/System/Security/Cryptography/RsaPaddingProcessor.cs
@@ -1,6 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Buffers;
 using System.Buffers.Binary;
 using System.Collections.Concurrent;
 using System.Diagnostics;
@@ -142,6 +143,109 @@ internal static void PadPkcs1Encryption(
             source.CopyTo(mInEM);
         }
 
+        internal static OperationStatus DepadPkcs1Encryption(
+            ReadOnlySpan<byte> source,
+            Span<byte> destination,
+            out int bytesWritten)
+        {
+            int primitive = DepadPkcs1Encryption(source);
+            int primitiveSign = SignStretch(primitive);
+
+            // Primitive is a positive length, or ~length to indicate
+            // an error, so flip ~length to length if the high bit is set.
+            int len = Choose(primitiveSign, ~primitive, primitive);
+            int spaceRemain = destination.Length - len;
+            int spaceRemainSign = SignStretch(spaceRemain);
+
+            // len = clampHigh(len, destination.Length);
+            len = Choose(spaceRemainSign, destination.Length, len);
+
+            // ret = spaceRemain < 0 ? DestinationTooSmall : Done
+            int ret = Choose(
+                spaceRemainSign,
+                (int)OperationStatus.DestinationTooSmall,
+                (int)OperationStatus.Done);
+
+            // ret = primitive < 0 ? InvalidData : ret;
+            ret = Choose(primitiveSign, (int)OperationStatus.InvalidData, ret);
+
+            // Write some number of bytes, regardless of the final return.
+            source[^len..].CopyTo(destination);
+
+            // bytesWritten = ret == Done ? len : 0;
+            bytesWritten = Choose(CheckZero(ret), len, 0);
+            return (OperationStatus)ret;
+        }
+
+        private static int DepadPkcs1Encryption(ReadOnlySpan<byte> source)
+        {
+            Debug.Assert(source.Length > 11);
+            ReadOnlySpan<byte> afterPadding = source.Slice(10);
+            ReadOnlySpan<byte> noZeros = source.Slice(2, 8);
+
+            // Find the first zero in noZeros, or -1 for no zeros.
+            int zeroPos = BlindFindFirstZero(noZeros);
+
+            // If zeroPos is negative, valid is -1, otherwise 0.
+            int valid = SignStretch(zeroPos);
+
+            // If there are no zeros in afterPadding then zeroPos is negative,
+            // so negating the sign stretch is 0, which makes hasPos 0.
+            // If there -was- a zero, sign stretching is 0, so negating it makes hasPos -1.
+            zeroPos = BlindFindFirstZero(afterPadding);
+            int hasLen = ~SignStretch(zeroPos);
+            valid &= hasLen;
+
+            // Check that the first two bytes are { 00 02 }
+            valid &= CheckZero(source[0] | (source[1] ^ 0x02));
+
+            int lenIfGood = afterPadding.Length - zeroPos - 1;
+            // If there were no zeros, use the full after-min-padding segment.
+            int lenIfBad = ~Choose(hasLen, lenIfGood, source.Length - 11);
+
+            Debug.Assert(lenIfBad < 0);
+            return Choose(valid, lenIfGood, lenIfBad);
+        }
+
+        private static int BlindFindFirstZero(ReadOnlySpan<byte> source)
+        {
+            // Any vectorization of this routine needs to use non-early termination,
+            // and instructions that do not vary their completion time on the input.
+
+            int pos = -1;
+
+            for (int i = source.Length - 1; i >= 0; i--)
+            {
+                // pos = source[i] == 0 ? i : pos;
+                int local = CheckZero(source[i]);
+                pos = Choose(local, i, pos);
+            }
+
+            return pos;
+        }
+
+        private static int SignStretch(int value)
+        {
+            return value >> 31;
+        }
+
+        private static int Choose(int selector, int yes, int no)
+        {
+            Debug.Assert((selector | (selector - 1)) == -1);
+            return (selector & yes) | (~selector & no);
+        }
+
+        private static int CheckZero(int value)
+        {
+            // For zero, ~value and value-1 are both all bits set (negative).
+            // For positive values, ~value is negative and value-1 is positive.
+            // For negative values except MinValue, ~value is positive and value-1 is negative.
+            // For MinValue, ~value is positive and value-1 is also positive.
+            // All together, the only thing that has negative & negative is 0, so stretch the sign bit.
+            int mask = ~value & (value - 1);
+            return SignStretch(mask);
+        }
+
         internal static void PadPkcs1Signature(
             HashAlgorithmName hashAlgorithmName,
             ReadOnlySpan<byte> source,
diff --git a/src/libraries/Common/tests/System/Net/Configuration.Sockets.cs b/src/libraries/Common/tests/System/Net/Configuration.Sockets.cs
index f9f9ba1dc17e..761998370685 100644
--- a/src/libraries/Common/tests/System/Net/Configuration.Sockets.cs
+++ b/src/libraries/Common/tests/System/Net/Configuration.Sockets.cs
@@ -29,7 +29,7 @@ public static IEnumerable<object[]> LocalAddresses()
                 {
                     yield return new[] { IPAddress.Loopback };
                 }
-                if (Socket.OSSupportsIPv6)
+                if (Socket.OSSupportsIPv6 && IsIPv6LoopbackAvailable)
                 {
                     yield return new[] { IPAddress.IPv6Loopback };
                 }
@@ -46,6 +46,23 @@ private static IPAddress GetIPv6LinkLocalAddress() =>
                     .Select(a => a.Address)
                     .Where(a => a.IsIPv6LinkLocal)
                     .FirstOrDefault();
+
+            private static readonly Lazy<bool> _isIPv6LoopbackAvailable = new Lazy<bool>(GetIsIPv6LoopbackAvailable);
+            public static bool IsIPv6LoopbackAvailable => _isIPv6LoopbackAvailable.Value;
+
+            private static bool GetIsIPv6LoopbackAvailable()
+            {
+                try
+                {
+                    using Socket s = new Socket(AddressFamily.InterNetworkV6, SocketType.Dgram, ProtocolType.Udp);
+                    s.Bind(new IPEndPoint(IPAddress.IPv6Loopback, 0));
+                    return true;
+                }
+                catch (SocketException)
+                {
+                    return false;
+                }
+            }
         }
     }
 }
diff --git a/src/libraries/Common/tests/System/Net/Http/HttpClientHandlerTest.Decompression.cs b/src/libraries/Common/tests/System/Net/Http/HttpClientHandlerTest.Decompression.cs
index 439cadf32ae6..4da9edc2ca21 100644
--- a/src/libraries/Common/tests/System/Net/Http/HttpClientHandlerTest.Decompression.cs
+++ b/src/libraries/Common/tests/System/Net/Http/HttpClientHandlerTest.Decompression.cs
@@ -28,7 +28,7 @@ public abstract class HttpClientHandler_Decompression_Test : HttpClientHandlerTe
         public HttpClientHandler_Decompression_Test(ITestOutputHelper output) : base(output) { }
 
         public static IEnumerable<object[]> DecompressedResponse_MethodSpecified_DecompressedContentReturned_MemberData() =>
-            from compressionName in new[] { "gzip", "zlib", "deflate", "br" }
+            from compressionName in new[] { "gzip", "GZIP", "zlib", "ZLIB", "deflate", "DEFLATE", "br", "BR" }
             from all in new[] { false, true }
             from copyTo in new[] { false, true }
             from contentLength in new[] { 0, 1, 12345 }
@@ -40,7 +40,7 @@ public static IEnumerable<object[]> DecompressedResponse_MethodSpecified_Decompr
         public async Task DecompressedResponse_MethodSpecified_DecompressedContentReturned(string compressionName, bool all, bool useCopyTo, int contentLength)
         {
             if (IsWinHttpHandler &&
-                (compressionName == "br" || compressionName == "zlib"))
+                (compressionName is "br" or "BR" or "zlib" or "ZLIB"))
             {
                 // brotli and zlib not supported on WinHttpHandler
                 return;
@@ -52,17 +52,20 @@ public async Task DecompressedResponse_MethodSpecified_DecompressedContentReturn
             switch (compressionName)
             {
                 case "gzip":
+                case "GZIP":
                     compress = s => new GZipStream(s, CompressionLevel.Optimal, leaveOpen: true);
                     methods = all ? DecompressionMethods.GZip : _all;
                     break;
 
 #if !NETFRAMEWORK
                 case "br":
+                case "BR":
                     compress = s => new BrotliStream(s, CompressionLevel.Optimal, leaveOpen: true);
                     methods = all ? DecompressionMethods.Brotli : _all;
                     break;
 
                 case "zlib":
+                case "ZLIB":
                     compress = s => new ZLibStream(s, CompressionLevel.Optimal, leaveOpen: true);
                     methods = all ? DecompressionMethods.Deflate : _all;
                     encodingName = "deflate";
@@ -70,6 +73,7 @@ public async Task DecompressedResponse_MethodSpecified_DecompressedContentReturn
 #endif
 
                 case "deflate":
+                case "DEFLATE":
                     compress = s => new DeflateStream(s, CompressionLevel.Optimal, leaveOpen: true);
                     methods = all ? DecompressionMethods.Deflate : _all;
                     break;
diff --git a/src/libraries/Common/tests/System/Net/Prerequisites/NetCoreServer/Handlers/EchoHandler.cs b/src/libraries/Common/tests/System/Net/Prerequisites/NetCoreServer/Handlers/EchoHandler.cs
index 6888c57e1128..667e99c29dc3 100644
--- a/src/libraries/Common/tests/System/Net/Prerequisites/NetCoreServer/Handlers/EchoHandler.cs
+++ b/src/libraries/Common/tests/System/Net/Prerequisites/NetCoreServer/Handlers/EchoHandler.cs
@@ -7,6 +7,7 @@
 using System.Threading;
 using System.Threading.Tasks;
 using Microsoft.AspNetCore.Http;
+using Microsoft.AspNetCore.Http.Features;
 
 namespace NetCoreServer
 {
@@ -30,6 +31,24 @@ public static async Task InvokeAsync(HttpContext context)
 
             byte[] bytes = Encoding.UTF8.GetBytes(echoJson);
 
+            var delay = 0;
+            if (context.Request.QueryString.HasValue)
+            {
+                if (context.Request.QueryString.Value.Contains("delay1sec"))
+                {
+                    delay = 1000;
+                }
+                else if (context.Request.QueryString.Value.Contains("delay10sec"))
+                {
+                    delay = 10000;
+                }
+            }
+
+            if (delay > 0)
+            {
+                context.Features.Get<IHttpResponseBodyFeature>().DisableBuffering();
+            }
+
             // Compute MD5 hash so that clients can verify the received data.
             using (MD5 md5 = MD5.Create())
             {
@@ -41,20 +60,19 @@ public static async Task InvokeAsync(HttpContext context)
                 context.Response.ContentLength = bytes.Length;
             }
 
-            if (context.Request.QueryString.HasValue && context.Request.QueryString.Value.Contains("delay10sec"))
+            if (delay > 0)
             {
                 await context.Response.StartAsync(CancellationToken.None);
+                await context.Response.Body.WriteAsync(bytes, 0, 10);
+                await context.Response.Body.FlushAsync();
+                await Task.Delay(delay);
+                await context.Response.Body.WriteAsync(bytes, 10, bytes.Length-10);
                 await context.Response.Body.FlushAsync();
-
-                await Task.Delay(10000);
             }
-            else if (context.Request.QueryString.HasValue && context.Request.QueryString.Value.Contains("delay1sec"))
+            else
             {
-                await context.Response.StartAsync(CancellationToken.None);
-                await Task.Delay(1000);
+                await context.Response.Body.WriteAsync(bytes, 0, bytes.Length);
             }
-            
-            await context.Response.Body.WriteAsync(bytes, 0, bytes.Length);
         }
     }
 }
diff --git a/src/libraries/Common/tests/System/Net/Prerequisites/NetCoreServer/Handlers/EchoWebSocketHandler.cs b/src/libraries/Common/tests/System/Net/Prerequisites/NetCoreServer/Handlers/EchoWebSocketHandler.cs
index 8304f2d11560..a290ce63bd4f 100644
--- a/src/libraries/Common/tests/System/Net/Prerequisites/NetCoreServer/Handlers/EchoWebSocketHandler.cs
+++ b/src/libraries/Common/tests/System/Net/Prerequisites/NetCoreServer/Handlers/EchoWebSocketHandler.cs
@@ -144,6 +144,18 @@ await socket.CloseAsync(
                     {
                         await Task.Delay(5000);
                     }
+                    else if (receivedMessage == ".receiveMessageAfterClose")
+                    {
+                        byte[] buffer = new byte[1024];
+                        string message = $"{receivedMessage} {DateTime.Now.ToString("HH:mm:ss")}";
+                        buffer = System.Text.Encoding.UTF8.GetBytes(message);
+                        await socket.SendAsync(
+                            new ArraySegment<byte>(buffer, 0, message.Length),
+                            WebSocketMessageType.Text,
+                            true,
+                            CancellationToken.None);
+                        await socket.CloseAsync(WebSocketCloseStatus.NormalClosure, receivedMessage, CancellationToken.None);
+                    }
                     else if (socket.State == WebSocketState.Open)
                     {
                         sendMessage = true;
diff --git a/src/libraries/Common/tests/System/Net/Security/FakeNtlmServer.cs b/src/libraries/Common/tests/System/Net/Security/FakeNtlmServer.cs
index cb7a3a785e7f..1117b3412f35 100644
--- a/src/libraries/Common/tests/System/Net/Security/FakeNtlmServer.cs
+++ b/src/libraries/Common/tests/System/Net/Security/FakeNtlmServer.cs
@@ -42,6 +42,8 @@ public FakeNtlmServer(NetworkCredential expectedCredential)
         public bool IsAuthenticated { get; private set; }
         public bool IsMICPresent { get; private set; }
         public string? ClientSpecifiedSpn { get; private set; }
+        public Flags InitialClientFlags { get; private set; }
+        public Flags NegotiatedFlags => _negotiatedFlags;
 
         private NetworkCredential _expectedCredential;
 
@@ -83,7 +85,7 @@ private enum MessageType : uint
         }
 
         [Flags]
-        private enum Flags : uint
+        public enum Flags : uint
         {
             NegotiateUnicode = 0x00000001,
             NegotiateOEM = 0x00000002,
@@ -177,17 +179,17 @@ private static ReadOnlySpan<byte> GetField(ReadOnlySpan<byte> payload, int field
                 case MessageType.Negotiate:
                     // We don't negotiate, we just verify
                     Assert.True(incomingBlob.Length >= 32);
-                    Flags flags = (Flags)BinaryPrimitives.ReadUInt32LittleEndian(incomingBlob.AsSpan(12, 4));
-                    Assert.Equal(_requiredFlags, (flags & _requiredFlags));
-                    Assert.True((flags & (Flags.NegotiateOEM | Flags.NegotiateUnicode)) != 0);
-                    if (flags.HasFlag(Flags.NegotiateDomainSupplied))
+                    InitialClientFlags = (Flags)BinaryPrimitives.ReadUInt32LittleEndian(incomingBlob.AsSpan(12, 4));
+                    Assert.Equal(_requiredFlags, (InitialClientFlags & _requiredFlags));
+                    Assert.True((InitialClientFlags & (Flags.NegotiateOEM | Flags.NegotiateUnicode)) != 0);
+                    if (InitialClientFlags.HasFlag(Flags.NegotiateDomainSupplied))
                     {
                         string domain = Encoding.ASCII.GetString(GetField(incomingBlob, 16));
                         Assert.Equal(_expectedCredential.Domain, domain);
                     }
                     _expectedMessageType = MessageType.Authenticate;
                     _negotiateMessage = incomingBlob;
-                    return _challengeMessage = GenerateChallenge(flags);
+                    return _challengeMessage = GenerateChallenge(InitialClientFlags);
 
                 case MessageType.Authenticate:
                     // Validate the authentication!
diff --git a/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/EncryptDecrypt.cs b/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/EncryptDecrypt.cs
index 39f3ebc82ec6..0aaffebe542a 100644
--- a/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/EncryptDecrypt.cs
+++ b/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/EncryptDecrypt.cs
@@ -2,6 +2,8 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
 using Test.Cryptography;
 using Microsoft.DotNet.XUnitExtensions;
 using Xunit;
@@ -716,23 +718,116 @@ public void NotSupportedValueMethods()
             }
         }
 
-        [ConditionalTheory]
-        [InlineData(new byte[] { 1, 2, 3, 4 })]
-        [InlineData(new byte[0])]
-        public void Decrypt_Pkcs1_ErrorsForInvalidPadding(byte[] data)
+        [Fact]
+        public void Decrypt_Pkcs1_BadPadding()
         {
-            if (data.Length == 0 && !PlatformSupportsEmptyRSAEncryption)
+            if ((PlatformDetection.IsWindows && !PlatformDetection.IsWindows10Version2004OrGreater))
             {
-                throw new SkipTestException("Platform does not support RSA encryption of empty data.");
+                return;
             }
 
-            using (RSA rsa = RSAFactory.Create(TestData.RSA2048Params))
+            RSAParameters keyParams = TestData.RSA2048Params;
+            BigInteger e = new BigInteger(keyParams.Exponent, true, true);
+            BigInteger n = new BigInteger(keyParams.Modulus, true, true);
+            byte[] buf = new byte[keyParams.Modulus.Length];
+            byte[] c = new byte[buf.Length];
+
+            buf[1] = 2;
+            buf.AsSpan(2).Fill(1);
+
+            ref byte afterMinPadding = ref buf[10];
+            ref byte lastByte = ref buf[^1];
+            afterMinPadding = 0;
+
+            using (RSA rsa = RSAFactory.Create(keyParams))
+            {
+                RawEncrypt(buf, e, n, c);
+                // Assert.NoThrow, check that manual padding is coherent
+                Decrypt(rsa, c, RSAEncryptionPadding.Pkcs1);
+
+                // All RSA encryption schemes start with 00, so pick any other number.
+                //
+                // If buf > modulus then encrypt should fail, so this
+                // is the largest legal-but-invalid value to test.
+                buf[0] = keyParams.Modulus[0];
+                RawEncrypt(buf, e, n, c);
+                Assert.ThrowsAny<CryptographicException>(() => Decrypt(rsa, c, RSAEncryptionPadding.Pkcs1));
+
+                // Check again with a zero length payload
+                (afterMinPadding, lastByte) = (lastByte, afterMinPadding);
+                RawEncrypt(buf, e, n, c);
+                Assert.ThrowsAny<CryptographicException>(() => Decrypt(rsa, c, RSAEncryptionPadding.Pkcs1));
+
+                // Back to valid padding
+                buf[0] = 0;
+                (afterMinPadding, lastByte) = (lastByte, afterMinPadding);
+                RawEncrypt(buf, e, n, c);
+                Decrypt(rsa, c, RSAEncryptionPadding.Pkcs1);
+
+                // This is (sort of) legal for PKCS1 signatures, but not decryption.
+                buf[1] = 1;
+                RawEncrypt(buf, e, n, c);
+                Assert.ThrowsAny<CryptographicException>(() => Decrypt(rsa, c, RSAEncryptionPadding.Pkcs1));
+
+                // No RSA PKCS1 padding scheme starts with 00 FF.
+                buf[1] = 255;
+                RawEncrypt(buf, e, n, c);
+                Assert.ThrowsAny<CryptographicException>(() => Decrypt(rsa, c, RSAEncryptionPadding.Pkcs1));
+
+                // Check again with a zero length payload
+                (afterMinPadding, lastByte) = (lastByte, afterMinPadding);
+                RawEncrypt(buf, e, n, c);
+                Assert.ThrowsAny<CryptographicException>(() => Decrypt(rsa, c, RSAEncryptionPadding.Pkcs1));
+
+                // Back to valid padding
+                buf[1] = 2;
+                (afterMinPadding, lastByte) = (lastByte, afterMinPadding);
+                RawEncrypt(buf, e, n, c);
+                Decrypt(rsa, c, RSAEncryptionPadding.Pkcs1);
+
+                // Try a zero in every possible required padding position
+                for (int i = 2; i < 10; i++)
+                {
+                    buf[i] = 0;
+
+                    RawEncrypt(buf, e, n, c);
+                    Assert.ThrowsAny<CryptographicException>(() => Decrypt(rsa, c, RSAEncryptionPadding.Pkcs1));
+
+                    // It used to be 1, now it's 2, still not zero.
+                    buf[i] = 2;
+                }
+
+                // Back to valid padding
+                RawEncrypt(buf, e, n, c);
+                Decrypt(rsa, c, RSAEncryptionPadding.Pkcs1);
+
+                // Make it such that
+                // "there is no octet with hexadecimal value 0x00 to separate PS from M"
+                // (RFC 3447 sec 7.2.2, rule 3, third clause)
+                buf.AsSpan(10).Fill(3);
+                RawEncrypt(buf, e, n, c);
+                Assert.ThrowsAny<CryptographicException>(() => Decrypt(rsa, c, RSAEncryptionPadding.Pkcs1));
+
+                // Every possible problem, for good measure.
+                buf[0] = 2;
+                buf[1] = 0;
+                buf[4] = 0;
+                RawEncrypt(buf, e, n, c);
+                Assert.ThrowsAny<CryptographicException>(() => Decrypt(rsa, c, RSAEncryptionPadding.Pkcs1));
+            }
+
+            static void RawEncrypt(ReadOnlySpan<byte> source, BigInteger e, BigInteger n, Span<byte> destination)
             {
-                byte[] encrypted = Encrypt(rsa, data, RSAEncryptionPadding.Pkcs1);
-                encrypted[1] ^= 0xFF;
+                BigInteger m = new BigInteger(source, true, true);
+                BigInteger c = BigInteger.ModPow(m, e, n);
+                int shift = destination.Length - c.GetByteCount(true);
+                destination.Slice(0, shift).Clear();
+                bool wrote = c.TryWriteBytes(destination.Slice(shift), out int written, true, true);
 
-                // PKCS#1, the data, and the key are all deterministic so this should always throw an exception.
-                Assert.ThrowsAny<CryptographicException>(() => Decrypt(rsa, encrypted, RSAEncryptionPadding.Pkcs1));
+                if (!wrote || written + shift != destination.Length)
+                {
+                    throw new UnreachableException();
+                }
             }
         }
 
diff --git a/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/ImportExport.cs b/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/ImportExport.cs
index 473e54797f16..72ffbef16926 100644
--- a/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/ImportExport.cs
+++ b/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/ImportExport.cs
@@ -10,8 +10,7 @@ namespace System.Security.Cryptography.Rsa.Tests
     [SkipOnPlatform(TestPlatforms.Browser, "Not supported on Browser")]
     public partial class ImportExport
     {
-        private static readonly Lazy<bool> s_supports16384 = new Lazy<bool>(TestRsa16384);
-        public static bool Supports16384 => s_supports16384.Value;
+        public static bool Supports16384 { get; } = TestRsa16384();
 
         [Fact]
         public static void ExportAutoKey()
diff --git a/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/RSAKeyFileTests.cs b/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/RSAKeyFileTests.cs
index 65f73cdef435..daa175dda47a 100644
--- a/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/RSAKeyFileTests.cs
+++ b/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/RSAKeyFileTests.cs
@@ -3,7 +3,6 @@
 
 using System.Security.Cryptography.Encryption.RC2.Tests;
 using System.Text;
-using Microsoft.DotNet.XUnitExtensions;
 using Test.Cryptography;
 using Xunit;
 
@@ -123,17 +122,9 @@ public static void ReadWriteDiminishedDPPrivatePkcs1()
                 TestData.DiminishedDPParameters);
         }
 
-        [ConditionalFact]
-        [OuterLoop("RSA 16384 takes considerable time.")]
+        [ConditionalFact(typeof(ImportExport), nameof(ImportExport.Supports16384))]
         public static void ReadWritePublicPkcs1()
         {
-            // Do not move this to the [ConditionalFact], otherwise the platform will check if RSA 16384 is supported
-            // during test discovery for innerloop, and the check itself is expensive.
-            if (!ImportExport.Supports16384)
-            {
-                throw new SkipTestException("Platform does not support RSA 16384.");
-            }
-
             ReadWriteBase64PublicPkcs1(
                 @"
 MIIICgKCCAEAmyxwX6kQNx+LSMao1StC1p5rKCEwcBjzI136An3B/BjthgezAOuu
@@ -207,18 +198,9 @@ public static void ReadWriteSubjectPublicKeyInfo_DiminishedDPKey()
                 TestData.DiminishedDPParameters);
         }
 
-
-        [ConditionalFact]
-        [OuterLoop("RSA 16384 takes considerable time.")]
+        [ConditionalFact(typeof(ImportExport), nameof(ImportExport.Supports16384))]
         public static void ReadWriteRsa16384SubjectPublicKeyInfo()
         {
-            // Do not move this to the [ConditionalFact], otherwise the platform will check if RSA 16384 is supported
-            // during test discovery for innerloop, and the check itself is expensive.
-            if (!ImportExport.Supports16384)
-            {
-                throw new SkipTestException("Platform does not support RSA 16384.");
-            }
-
             ReadWriteBase64SubjectPublicKeyInfo(
                 @"
 MIIIIjANBgkqhkiG9w0BAQEFAAOCCA8AMIIICgKCCAEAmyxwX6kQNx+LSMao1StC
@@ -268,17 +250,9 @@ public static void ReadWriteRsa16384SubjectPublicKeyInfo()
                 TestData.RSA16384Params);
         }
 
-        [ConditionalFact]
-        [OuterLoop("RSA 16384 takes considerable time.")]
+        [ConditionalFact(typeof(ImportExport), nameof(ImportExport.Supports16384))]
         public static void ReadWrite16384Pkcs8()
         {
-            // Do not move this to the [ConditionalFact], otherwise the platform will check if RSA 16384 is supported
-            // during test discovery for innerloop, and the check itself is expensive.
-            if (!ImportExport.Supports16384)
-            {
-                throw new SkipTestException("Platform does not support RSA 16384");
-            }
-
             ReadWriteBase64Pkcs8(
                 @"
 MIIkQgIBADANBgkqhkiG9w0BAQEFAASCJCwwgiQoAgEAAoIIAQCbLHBfqRA3H4tI
@@ -551,17 +525,9 @@ public static void ReadEncryptedRsa1032()
                 TestData.RSA1032Parameters);
         }
 
-        [ConditionalFact]
-        [OuterLoop("RSA 16384 takes considerable time.")]
+        [ConditionalFact(typeof(ImportExport), nameof(ImportExport.Supports16384))]
         public static void ReadEncryptedRsa16384()
         {
-            // Do not move this to the [ConditionalFact], otherwise the platform will check if RSA 16384 is supported
-            // during test discovery for innerloop, and the check itself is expensive.
-            if (!ImportExport.Supports16384)
-            {
-                throw new SkipTestException("Platform does not support RSA 16384");
-            }
-
             // PBES2: PBKDF2 + des (single DES, not 3DES).
             const string base64 = @"
 MIIkizA9BgkqhkiG9w0BBQ0wMDAbBgkqhkiG9w0BBQwwDgQI63upT8JPNNcCAggA
diff --git a/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/RSAXml.cs b/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/RSAXml.cs
index eb354216e77f..666f9bea3c40 100644
--- a/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/RSAXml.cs
+++ b/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/RSAXml.cs
@@ -3,7 +3,6 @@
 
 using System.Collections.Generic;
 using System.Xml.Linq;
-using Microsoft.DotNet.XUnitExtensions;
 using Xunit;
 
 namespace System.Security.Cryptography.Rsa.Tests
@@ -77,17 +76,9 @@ public static void TestRead1032Parameters_Private()
                 TestData.RSA1032Parameters);
         }
 
-        [ConditionalFact]
-        [OuterLoop("RSA 16384 takes considerable time.")]
+        [ConditionalFact(typeof(ImportExport), nameof(ImportExport.Supports16384))]
         public static void TestRead16384Parameters_Public()
         {
-            // Do not move this to the [ConditionalFact], otherwise the platform will check if RSA 16384 is supported
-            // during test discovery for innerloop, and the check itself is expensive.
-            if (!ImportExport.Supports16384)
-            {
-                throw new SkipTestException("Platform does not support RSA 16384");
-            }
-
             RSAParameters expectedParameters = ImportExport.MakePublic(TestData.RSA16384Params);
 
             // Bonus trait of this XML: the Modulus and Exponent parameters
@@ -166,16 +157,9 @@ iC2wXFMDafnWp1lxXiGcVVu9dE2LeglCgnMUps9QlJD0aXaJHYi2VDQ3zFdMvn8A    imlqKtZGdGf9
                 expectedParameters);
         }
 
-        [ConditionalFact]
+        [ConditionalFact(typeof(ImportExport), nameof(ImportExport.Supports16384))]
         public static void TestRead16384Parameters_Private()
         {
-            // Do not move this to the [ConditionalFact], otherwise the platform will check if RSA 16384 is supported
-            // during test discovery for innerloop, and the check itself is expensive.
-            if (!ImportExport.Supports16384)
-            {
-                throw new SkipTestException("Platform does not support RSA 16384");
-            }
-
             // Bonus trait of this XML: the D parameter is not in
             // canonical order.
             TestReadXml(
@@ -650,19 +634,11 @@ public static void TestWrite2048Parameters(bool includePrivateParameters)
                 ));
         }
 
-        [ConditionalTheory]
+        [ConditionalTheory(typeof(ImportExport), nameof(ImportExport.Supports16384))]
         [InlineData(true)]
         [InlineData(false)]
-        [OuterLoop("RSA 16384 takes considerable time for primality tests.")]
         public static void TestWrite16384Parameters(bool includePrivateParameters)
         {
-            // Do not move this to the [ConditionalFact], otherwise the platform will check if RSA 16384 is supported
-            // during test discovery for innerloop, and the check itself is expensive.
-            if (!ImportExport.Supports16384)
-            {
-                throw new SkipTestException("Platform does not support RSA 16384");
-            }
-
             TestWriteXml(
                 TestData.RSA16384Params,
                 includePrivateParameters,
diff --git a/src/libraries/Common/tests/System/Security/Cryptography/X509Certificates/CertificateAuthority.cs b/src/libraries/Common/tests/System/Security/Cryptography/X509Certificates/CertificateAuthority.cs
index 184d8a62e993..beb321161712 100644
--- a/src/libraries/Common/tests/System/Security/Cryptography/X509Certificates/CertificateAuthority.cs
+++ b/src/libraries/Common/tests/System/Security/Cryptography/X509Certificates/CertificateAuthority.cs
@@ -179,7 +179,7 @@ internal X509Certificate2 CreateOcspSigner(string subject, RSA publicKey)
                 subject,
                 publicKey,
                 TimeSpan.FromSeconds(1),
-                new X509ExtensionCollection() { s_eeConstraints, s_eeKeyUsage, s_ocspResponderEku},
+                new X509ExtensionCollection() { s_eeConstraints, s_eeKeyUsage, s_ocspResponderEku },
                 ocspResponder: true);
         }
 
@@ -950,12 +950,10 @@ private static string BuildSubject(
             PkiOptions pkiOptions,
             bool includePkiOptions)
         {
-            if (includePkiOptions)
-            {
-                return $"CN=\"{cn}\", O=\"{testName}\", OU=\"{pkiOptions}\"";
-            }
+            string testNamePart = !string.IsNullOrWhiteSpace(testName) ? $", O=\"{testName}\"" : "";
+            string pkiOptionsPart = includePkiOptions ? $", OU=\"{pkiOptions}\"" : "";
 
-            return $"CN=\"{cn}\", O=\"{testName}\"";
+            return $"CN=\"{cn}\"" + testNamePart + pkiOptionsPart;
         }
     }
 }
diff --git a/src/libraries/Common/tests/System/TimeProviderTests.cs b/src/libraries/Common/tests/System/TimeProviderTests.cs
index 428c5b13fecc..7a0cb33eb74d 100644
--- a/src/libraries/Common/tests/System/TimeProviderTests.cs
+++ b/src/libraries/Common/tests/System/TimeProviderTests.cs
@@ -214,7 +214,7 @@ private static void CancelAfter(TimeProvider provider, CancellationTokenSource c
         }
 #endif // NETFRAMEWORK
 
-        [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [MemberData(nameof(TimersProvidersListData))]
         public static void CancellationTokenSourceWithTimer(TimeProvider provider)
         {
diff --git a/src/libraries/Common/tests/TestUtilities/System/AssertExtensions.cs b/src/libraries/Common/tests/TestUtilities/System/AssertExtensions.cs
index 32dd05facb03..232c05afb8d2 100644
--- a/src/libraries/Common/tests/TestUtilities/System/AssertExtensions.cs
+++ b/src/libraries/Common/tests/TestUtilities/System/AssertExtensions.cs
@@ -400,6 +400,20 @@ public static void GreaterThanOrEqualTo<T>(T actual, T greaterThanOrEqualTo, str
                 throw new XunitException(AddOptionalUserMessage($"Expected: {actual} to be greater than or equal to {greaterThanOrEqualTo}", userMessage));
         }
 
+        /// <summary>
+        /// Validate that a given enum value has the expected flag set.
+        /// </summary>
+        /// <typeparam name="T">The enum type.</typeparam>
+        /// <param name="expected">The flag which should be present in <paramref name="actual"/>.</param>
+        /// <param name="actual">The value which should contain the flag <paramref name="expected"/>.</param>
+        public static void HasFlag<T>(T expected, T actual, string userMessage = null) where T : Enum
+        {
+            if (!actual.HasFlag(expected))
+            {
+                throw new XunitException(AddOptionalUserMessage($"Expected: Value {actual} (of enum type {typeof(T).FullName}) to have the flag {expected} set.", userMessage));
+            }
+        }
+
         // NOTE: Consider using SequenceEqual below instead, as it will give more useful information about what
         // the actual differences are, especially for large arrays/spans.
         /// <summary>
@@ -490,6 +504,19 @@ public static void CollectionEqual<T>(IEnumerable<T> expected, IEnumerable<T> ac
             }
         }
 
+        /// <summary>
+        /// Validates that the actual span is not equal to the expected span.
+        /// </summary>
+        /// <param name="expected">The sequence that <paramref name="actual"/> should be not be equal to.</param>
+        /// <param name="actual">The actual sequence.</param>
+        public static void SequenceNotEqual<T>(ReadOnlySpan<T> expected, ReadOnlySpan<T> actual) where T : IEquatable<T>
+        {
+            if (expected.SequenceEqual(actual))
+            {
+                throw new XunitException($"Expected: Contents of expected to differ from actual but were the same.");
+            }
+        }
+
         /// <summary>
         /// Validates that the actual span is equal to the expected span.
         /// If this fails, determine where the differences are and create an exception with that information.
diff --git a/src/libraries/Common/tests/TestUtilities/System/PlatformDetection.Unix.cs b/src/libraries/Common/tests/TestUtilities/System/PlatformDetection.Unix.cs
index 643f9dd915a6..1b4e6e3e0d18 100644
--- a/src/libraries/Common/tests/TestUtilities/System/PlatformDetection.Unix.cs
+++ b/src/libraries/Common/tests/TestUtilities/System/PlatformDetection.Unix.cs
@@ -20,13 +20,13 @@ public static partial class PlatformDetection
         public static bool IsUbuntu2004 => IsDistroAndVersion("ubuntu", 20, 4);
         public static bool IsDebian => IsDistroAndVersion("debian");
         public static bool IsAlpine => IsDistroAndVersion("alpine");
-        public static bool IsDebian10 => IsDistroAndVersion("debian", 10);
         public static bool IsRaspbian10 => IsDistroAndVersion("raspbian", 10);
         public static bool IsMariner => IsDistroAndVersion("mariner");
         public static bool IsSLES => IsDistroAndVersion("sles");
         public static bool IsTizen => IsDistroAndVersion("tizen");
         public static bool IsFedora => IsDistroAndVersion("fedora");
         public static bool IsLinuxBionic => IsBionic();
+        public static bool IsRedHatFamily => IsRedHatFamilyAndVersion();
 
         public static bool IsMonoLinuxArm64 => IsMonoRuntime && IsLinux && IsArm64Process;
         public static bool IsNotMonoLinuxArm64 => !IsMonoLinuxArm64;
@@ -40,14 +40,6 @@ public static partial class PlatformDetection
         public static bool IsAppSandbox => Environment.GetEnvironmentVariable("APP_SANDBOX_CONTAINER_ID") != null;
         public static bool IsNotAppSandbox => !IsAppSandbox;
 
-        // RedHat family covers RedHat and CentOS
-        public static bool IsRedHatFamily => IsRedHatFamilyAndVersion();
-        public static bool IsNotRedHatFamily => !IsRedHatFamily;
-        public static bool IsRedHatFamily7 => IsRedHatFamilyAndVersion(7);
-        public static bool IsCentos7 => IsDistroAndVersion("centos", 7);
-        public static bool IsNotFedoraOrRedHatFamily => !IsFedora && !IsRedHatFamily;
-        public static bool IsNotDebian10 => !IsDebian10;
-
         public static Version OpenSslVersion => !IsApplePlatform && !IsWindows && !IsAndroid ?
             GetOpenSslVersion() :
             throw new PlatformNotSupportedException();
diff --git a/src/libraries/Common/tests/TestUtilities/System/PlatformDetection.cs b/src/libraries/Common/tests/TestUtilities/System/PlatformDetection.cs
index 1b7d583186c9..62523b568175 100644
--- a/src/libraries/Common/tests/TestUtilities/System/PlatformDetection.cs
+++ b/src/libraries/Common/tests/TestUtilities/System/PlatformDetection.cs
@@ -136,6 +136,10 @@ public static int SlowRuntimeTimeoutModifier
         public static bool IsThreadingSupported => (!IsWasi && !IsBrowser) || IsWasmThreadingSupported;
         public static bool IsWasmThreadingSupported => IsBrowser && IsEnvironmentVariableTrue("IsBrowserThreadingSupported");
         public static bool IsNotWasmThreadingSupported => !IsWasmThreadingSupported;
+        public static bool IsWasmBackgroundExec => IsBrowser && IsEnvironmentVariableTrue("IsWasmBackgroundExec");
+        public static bool IsThreadingSupportedNotBrowserBackgroundExec => IsWasmThreadingSupported && !IsWasmBackgroundExec;
+        public static bool IsWasmBackgroundExecOrSingleThread => IsWasmBackgroundExec || IsNotWasmThreadingSupported;
+        public static bool IsThreadingSupportedOrBrowserBackgroundExec => IsWasmBackgroundExec || !IsBrowser;
         public static bool IsBinaryFormatterSupported => IsNotMobile && !IsNativeAot;
 
         public static bool IsStartingProcessesSupported => !IsiOS && !IstvOS;
diff --git a/src/libraries/Common/tests/Tests/System/StringTests.cs b/src/libraries/Common/tests/Tests/System/StringTests.cs
index 429192f549d6..f21dbe3baad6 100644
--- a/src/libraries/Common/tests/Tests/System/StringTests.cs
+++ b/src/libraries/Common/tests/Tests/System/StringTests.cs
@@ -7612,7 +7612,7 @@ public static unsafe void InternTest()
         }
 
         [Fact]
-        public static void InternalTestAotSubset()
+        public static void InternTestAotSubset()
         {
 #pragma warning disable 0618 // suppress obsolete warning for String.Copy
             string emptyFromField = string.Empty;
@@ -7629,6 +7629,17 @@ public static void InternalTestAotSubset()
 #pragma warning restore 0618 // restore warning when accessing obsolete members
         }
 
+        [Fact]
+        public static void InternTestCanReturnNull()
+        {
+            for (int i = 0; i < 20; i++)
+            {
+                if (string.IsInterned(Guid.NewGuid().ToString()) == null)
+                    return;
+            }
+            Assert.Fail("string.IsInterned never returns null");
+        }
+
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotInvariantGlobalization))]
         [ActiveIssue("https://github.com/dotnet/runtime/issues/34577", TestPlatforms.Windows, TargetFrameworkMonikers.Netcoreapp, TestRuntimes.Mono)]
         public static unsafe void NormalizationTest() // basic test; more tests in globalization tests
diff --git a/src/libraries/Common/tests/WasmTestRunner/WasmTestRunner.cs b/src/libraries/Common/tests/WasmTestRunner/WasmTestRunner.cs
index 4c4d4a44fac4..738ec5bcec38 100644
--- a/src/libraries/Common/tests/WasmTestRunner/WasmTestRunner.cs
+++ b/src/libraries/Common/tests/WasmTestRunner/WasmTestRunner.cs
@@ -4,27 +4,30 @@
 using System;
 using System.Collections.Generic;
 using System.Threading.Tasks;
-
 using Microsoft.DotNet.XHarness.TestRunners.Common;
 using Microsoft.DotNet.XHarness.TestRunners.Xunit;
 
-public class SimpleWasmTestRunner : WasmApplicationEntryPoint
+public class WasmTestRunner : WasmApplicationEntryPoint
 {
+    protected int MaxParallelThreadsFromArg { get; set; }
+    protected override int? MaxParallelThreads => RunInParallel ? MaxParallelThreadsFromArg : base.MaxParallelThreads;
+
     public static async Task<int> Main(string[] args)
     {
         int index = 0;
 
+        var runner = new WasmTestRunner();
 #if SINGLE_FILE_TEST_RUNNER
         // This runner is also used for NativeAOT testing, which defines SINGLE_FILE_TEST_RUNNER.
-        var testAssembly = typeof(SimpleWasmTestRunner).Assembly.GetName().Name;
+        runner.TestAssembly = typeof(WasmTestRunner).Assembly.GetName().Name;
 #else
         if (args.Length == 0)
         {
-            Console.WriteLine ($"No args given");
+            Console.WriteLine($"No args given");
             return -1;
         }
 
-        var testAssembly = args[index++];
+        runner.TestAssembly = args[index++];
 #endif
 
         var excludedTraits = new List<string>();
@@ -41,23 +44,23 @@ public static async Task<int> Main(string[] args)
             switch (option)
             {
                 case "-notrait":
-                    excludedTraits.Add (args[i + 1]);
+                    excludedTraits.Add(args[i + 1]);
                     i++;
                     break;
                 case "-trait":
-                    includedTraits.Add (args[i + 1]);
+                    includedTraits.Add(args[i + 1]);
                     i++;
                     break;
                 case "-namespace":
-                    includedNamespaces.Add (args[i + 1]);
+                    includedNamespaces.Add(args[i + 1]);
                     i++;
                     break;
                 case "-class":
-                    includedClasses.Add (args[i + 1]);
+                    includedClasses.Add(args[i + 1]);
                     i++;
                     break;
                 case "-method":
-                    includedMethods.Add (args[i + 1]);
+                    includedMethods.Add(args[i + 1]);
                     i++;
                     break;
                 case "-backgroundExec":
@@ -66,20 +69,28 @@ public static async Task<int> Main(string[] args)
                 case "-untilFailed":
                     untilFailed = true;
                     break;
+                case "-threads":
+                    runner.IsThreadless = false;
+                    break;
+                case "-parallelThreads":
+                    runner.MaxParallelThreadsFromArg = Math.Max(1, int.Parse(args[i + 1]));
+                    runner.RunInParallel = runner.MaxParallelThreadsFromArg > 1;
+                    i++;
+                    break;
+                case "-verbosity":
+                    runner.MinimumLogLevel = Enum.Parse<MinimumLogLevel>(args[i + 1]);
+                    i++;
+                    break;
                 default:
                     throw new ArgumentException($"Invalid argument '{option}'.");
             }
         }
 
-        var runner = new SimpleWasmTestRunner()
-        {
-            TestAssembly = testAssembly,
-            ExcludedTraits = excludedTraits,
-            IncludedTraits = includedTraits,
-            IncludedNamespaces = includedNamespaces,
-            IncludedClasses = includedClasses,
-            IncludedMethods = includedMethods
-        };
+        runner.ExcludedTraits = excludedTraits;
+        runner.IncludedTraits = includedTraits;
+        runner.IncludedNamespaces = includedNamespaces;
+        runner.IncludedClasses = includedClasses;
+        runner.IncludedMethods = includedMethods;
 
 #if !SINGLE_FILE_TEST_RUNNER
         if (OperatingSystem.IsBrowser())
@@ -107,6 +118,14 @@ public static async Task<int> Main(string[] args)
 
 #if SINGLE_FILE_TEST_RUNNER
     protected override IEnumerable<TestAssemblyInfo> GetTestAssemblies()
-        => new[] { new TestAssemblyInfo(typeof(SimpleWasmTestRunner).Assembly, typeof(SimpleWasmTestRunner).Assembly.GetName().Name) };
+        => new[] { new TestAssemblyInfo(typeof(WasmTestRunner).Assembly, typeof(WasmTestRunner).Assembly.GetName().Name) };
 #endif
+
+    public override Task RunAsync()
+    {
+        if (RunInParallel)
+            Console.WriteLine($"Running in parallel with {MaxParallelThreads} threads.");
+
+        return base.RunAsync();
+    }
 }
diff --git a/src/libraries/Directory.Build.props b/src/libraries/Directory.Build.props
index cc1443588858..c8d1737bea13 100644
--- a/src/libraries/Directory.Build.props
+++ b/src/libraries/Directory.Build.props
@@ -16,7 +16,6 @@
 
   <PropertyGroup>
     <BeforeTargetFrameworkInferenceTargets>$(RepositoryEngineeringDir)BeforeTargetFrameworkInference.targets</BeforeTargetFrameworkInferenceTargets>
-    <ShouldUnsetParentConfigurationAndPlatform>false</ShouldUnsetParentConfigurationAndPlatform>
     <GeneratePlatformNotSupportedAssemblyHeaderFile>$(RepositoryEngineeringDir)LicenseHeader.txt</GeneratePlatformNotSupportedAssemblyHeaderFile>
   </PropertyGroup>
 
diff --git a/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/ComRuntimeHelpers.cs b/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/ComRuntimeHelpers.cs
index d090f3300cdb..c1228bc1eade 100644
--- a/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/ComRuntimeHelpers.cs
+++ b/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/ComRuntimeHelpers.cs
@@ -166,7 +166,7 @@ internal static ComTypes.ITypeInfo GetITypeInfoFromIDispatch(IDispatch dispatch)
             return typeInfo;
         }
 
-        internal static ComTypes.TYPEATTR GetTypeAttrForTypeInfo(ComTypes.ITypeInfo typeInfo)
+        internal static unsafe ComTypes.TYPEATTR GetTypeAttrForTypeInfo(ComTypes.ITypeInfo typeInfo)
         {
             IntPtr pAttrs;
             typeInfo.GetTypeAttr(out pAttrs);
@@ -179,7 +179,7 @@ internal static ComTypes.TYPEATTR GetTypeAttrForTypeInfo(ComTypes.ITypeInfo type
 
             try
             {
-                return (ComTypes.TYPEATTR)Marshal.PtrToStructure(pAttrs, typeof(ComTypes.TYPEATTR));
+                return *(ComTypes.TYPEATTR*)pAttrs;
             }
             finally
             {
@@ -187,7 +187,7 @@ internal static ComTypes.TYPEATTR GetTypeAttrForTypeInfo(ComTypes.ITypeInfo type
             }
         }
 
-        internal static ComTypes.TYPELIBATTR GetTypeAttrForTypeLib(ComTypes.ITypeLib typeLib)
+        internal static unsafe ComTypes.TYPELIBATTR GetTypeAttrForTypeLib(ComTypes.ITypeLib typeLib)
         {
             IntPtr pAttrs;
             typeLib.GetLibAttr(out pAttrs);
@@ -200,7 +200,7 @@ internal static ComTypes.TYPELIBATTR GetTypeAttrForTypeLib(ComTypes.ITypeLib typ
 
             try
             {
-                return (ComTypes.TYPELIBATTR)Marshal.PtrToStructure(pAttrs, typeof(ComTypes.TYPELIBATTR));
+                return *(ComTypes.TYPELIBATTR*)pAttrs;
             }
             finally
             {
diff --git a/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/ComTypeEnumDesc.cs b/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/ComTypeEnumDesc.cs
index 0d9b7998872d..26e526a28f3c 100644
--- a/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/ComTypeEnumDesc.cs
+++ b/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/ComTypeEnumDesc.cs
@@ -36,7 +36,7 @@ internal ComTypeEnumDesc(ComTypes.ITypeInfo typeInfo, ComTypeLibDesc typeLibDesc
 
                 try
                 {
-                    varDesc = (ComTypes.VARDESC)Marshal.PtrToStructure(p, typeof(ComTypes.VARDESC));
+                    varDesc = Marshal.PtrToStructure<ComTypes.VARDESC>(p);
 
                     if (varDesc.varkind == ComTypes.VARKIND.VAR_CONST)
                     {
diff --git a/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/DynamicVariantExtensions.cs b/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/DynamicVariantExtensions.cs
index 27647ed388f2..5b8f45469ddc 100644
--- a/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/DynamicVariantExtensions.cs
+++ b/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/DynamicVariantExtensions.cs
@@ -249,8 +249,6 @@ public static unsafe void SetAsByrefVariantIndirect(ref this ComVariant variant,
                     variant.SetAsByrefVariant(ref value);
                     return;
                 case VarEnum.VT_RECORD:
-                    // VT_RECORD's are weird in that regardless of is the VT_BYREF flag is set or not
-                    // they have the same internal representation.
                     variant = ComVariant.CreateRaw(value.VarType | VarEnum.VT_BYREF, value.GetRawDataRef<Record>());
                     break;
                 case VarEnum.VT_DECIMAL:
@@ -379,14 +377,14 @@ public static void SetBstr(this ref ComVariant variant, string value)
             variant = ComVariant.Create(new BStrWrapper(value));
         }
 
-        public static void SetUnknown(this ref ComVariant variant, object value)
+        public static void SetUnknown(this ref ComVariant variant, object? value)
         {
-            variant = ComVariant.CreateRaw(VarEnum.VT_UNKNOWN, Marshal.GetIUnknownForObject(value));
+            variant = ComVariant.CreateRaw(VarEnum.VT_UNKNOWN, value is null ? IntPtr.Zero : Marshal.GetIUnknownForObject(value));
         }
 
-        public static void SetDispatch(this ref ComVariant variant, object value)
+        public static void SetDispatch(this ref ComVariant variant, object? value)
         {
-            variant = ComVariant.CreateRaw(VarEnum.VT_DISPATCH, Marshal.GetIDispatchForObject(value));
+            variant = ComVariant.CreateRaw(VarEnum.VT_DISPATCH, value is null ? IntPtr.Zero : Marshal.GetIDispatchForObject(value));
         }
 
         public static void SetError(this ref ComVariant variant, int value)
diff --git a/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/ExcepInfo.cs b/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/ExcepInfo.cs
index 7d65926d813c..b839dbb7d21d 100644
--- a/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/ExcepInfo.cs
+++ b/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/ExcepInfo.cs
@@ -29,7 +29,7 @@ internal struct ExcepInfo
 #if DEBUG
         static ExcepInfo()
         {
-            Debug.Assert(Marshal.SizeOf(typeof(ExcepInfo)) == Marshal.SizeOf(typeof(ComTypes.EXCEPINFO)));
+            Debug.Assert(Marshal.SizeOf<ExcepInfo>() == Marshal.SizeOf<ComTypes.EXCEPINFO>());
         }
 #endif
 
diff --git a/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/IDispatchComObject.cs b/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/IDispatchComObject.cs
index 5e1f62f83d7c..f4ee622d505f 100644
--- a/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/IDispatchComObject.cs
+++ b/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/ComInterop/IDispatchComObject.cs
@@ -328,7 +328,7 @@ DynamicMetaObject IDynamicMetaObjectProvider.GetMetaObject(Expression parameter)
             return new IDispatchMetaObject(parameter, this);
         }
 
-        private static void GetFuncDescForDescIndex(ComTypes.ITypeInfo typeInfo, int funcIndex, out ComTypes.FUNCDESC funcDesc, out IntPtr funcDescHandle)
+        private static unsafe void GetFuncDescForDescIndex(ComTypes.ITypeInfo typeInfo, int funcIndex, out ComTypes.FUNCDESC funcDesc, out IntPtr funcDescHandle)
         {
             IntPtr pFuncDesc;
             typeInfo.GetFuncDesc(funcIndex, out pFuncDesc);
@@ -339,7 +339,7 @@ private static void GetFuncDescForDescIndex(ComTypes.ITypeInfo typeInfo, int fun
                 throw Error.CannotRetrieveTypeInformation();
             }
 
-            funcDesc = (ComTypes.FUNCDESC)Marshal.PtrToStructure(pFuncDesc, typeof(ComTypes.FUNCDESC));
+            funcDesc = *(ComTypes.FUNCDESC*)pFuncDesc;
             funcDescHandle = pFuncDesc;
         }
 
diff --git a/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/SymbolTable.cs b/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/SymbolTable.cs
index b2f31a5eaf56..451cdd543469 100644
--- a/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/SymbolTable.cs
+++ b/src/libraries/Microsoft.CSharp/src/Microsoft/CSharp/RuntimeBinder/SymbolTable.cs
@@ -1439,7 +1439,7 @@ private static void SetParameterDataForMethProp(MethodOrPropertySymbol methProp,
             if (parameters.Length > 0)
             {
                 // See if we have a param array.
-                if (parameters[parameters.Length - 1].GetCustomAttribute(typeof(ParamArrayAttribute), false) != null)
+                if (parameters[parameters.Length - 1].GetCustomAttribute<ParamArrayAttribute>(false) != null)
                 {
                     methProp.isParamArray = true;
                 }
diff --git a/src/libraries/Microsoft.CSharp/tests/BindingErrors.cs b/src/libraries/Microsoft.CSharp/tests/BindingErrors.cs
index cec23a2f0cb3..d5cf0644cebe 100644
--- a/src/libraries/Microsoft.CSharp/tests/BindingErrors.cs
+++ b/src/libraries/Microsoft.CSharp/tests/BindingErrors.cs
@@ -61,6 +61,10 @@ public void DoSomething(double d)
             {
             }
 
+            public void DoSomething(float d)
+            {
+            }
+
             public static void DoSomething(int i)
             {
             }
diff --git a/src/libraries/Microsoft.Extensions.Configuration.CommandLine/src/Microsoft.Extensions.Configuration.CommandLine.csproj b/src/libraries/Microsoft.Extensions.Configuration.CommandLine/src/Microsoft.Extensions.Configuration.CommandLine.csproj
index 6bc91a8ffadb..606dbd874297 100644
--- a/src/libraries/Microsoft.Extensions.Configuration.CommandLine/src/Microsoft.Extensions.Configuration.CommandLine.csproj
+++ b/src/libraries/Microsoft.Extensions.Configuration.CommandLine/src/Microsoft.Extensions.Configuration.CommandLine.csproj
@@ -2,6 +2,7 @@
 
   <PropertyGroup>
     <TargetFrameworks>$(NetCoreAppCurrent);$(NetCoreAppPrevious);$(NetCoreAppMinimum);netstandard2.0;$(NetFrameworkMinimum)</TargetFrameworks>
+    <NoWarn>$(NoWarn);CA1866</NoWarn>
     <EnableDefaultItems>true</EnableDefaultItems>
     <IsPackable>true</IsPackable>
     <PackageDescription>Command line configuration provider implementation for Microsoft.Extensions.Configuration. This package enables you to read configuration parameters from the command line arguments of your application. You can use CommandLineConfigurationExtensions.AddCommandLine extension method on IConfigurationBuilder to add the command line configuration provider to the configuration builder.</PackageDescription>
diff --git a/src/libraries/Microsoft.Extensions.DependencyInjection.Abstractions/src/ActivatorUtilities.cs b/src/libraries/Microsoft.Extensions.DependencyInjection.Abstractions/src/ActivatorUtilities.cs
index e8eae2d2e9ee..6dbd4b3495ab 100644
--- a/src/libraries/Microsoft.Extensions.DependencyInjection.Abstractions/src/ActivatorUtilities.cs
+++ b/src/libraries/Microsoft.Extensions.DependencyInjection.Abstractions/src/ActivatorUtilities.cs
@@ -9,6 +9,7 @@
 using System.Reflection;
 using System.Runtime.CompilerServices;
 using System.Runtime.ExceptionServices;
+using System.Runtime.InteropServices;
 using Microsoft.Extensions.Internal;
 
 #if NETCOREAPP
@@ -28,9 +29,7 @@ public static class ActivatorUtilities
 
         // Support caching of constructor metadata for types in collectible assemblies.
         private static readonly Lazy<ConditionalWeakTable<Type, ConstructorInfoEx[]>> s_collectibleConstructorInfos = new();
-#endif
 
-#if NET8_0_OR_GREATER
         // Maximum number of fixed arguments for ConstructorInvoker.Invoke(arg1, etc).
         private const int FixedArgumentThreshold = 4;
 #endif
@@ -66,10 +65,23 @@ public static object CreateInstance(
             {
                 constructors = GetOrAddConstructors(instanceType);
             }
+
+            // Attempt to use the stack allocated arg values if <= 4 ctor args.
+            StackAllocatedObjects stackValues = default;
+            int maxArgs = GetMaxArgCount();
+            Span<object?> values = maxArgs <= StackAllocatedObjects.MaxStackAllocArgCount / 2 ?
+                stackValues :
+                new object?[maxArgs * 2];
+
+            Span<object?> ctorArgs = values.Slice(0, maxArgs);
+            Span<object?> bestCtorArgs = values.Slice(maxArgs, maxArgs);
 #else
             constructors = CreateConstructorInfoExs(instanceType);
+            object?[]? ctorArgs = null;
+            object?[]? bestCtorArgs = null;
 #endif
 
+            scoped ConstructorMatcher matcher = default;
             ConstructorInfoEx? constructor;
             IServiceProviderIsService? serviceProviderIsService = provider.GetService<IServiceProviderIsService>();
             // if container supports using IServiceProviderIsService, we try to find the longest ctor that
@@ -79,44 +91,71 @@ public static object CreateInstance(
             // instance if all parameters given to CreateInstance only match with a single ctor
             if (serviceProviderIsService != null)
             {
-                int bestLength = -1;
-                bool seenPreferred = false;
-
-                ConstructorMatcher bestMatcher = default;
-                bool multipleBestLengthFound = false;
-
+                // Handle the case where the attribute is used.
                 for (int i = 0; i < constructors.Length; i++)
                 {
                     constructor = constructors[i];
-                    ConstructorMatcher matcher = new(constructor);
-                    bool isPreferred = constructor.IsPreferred;
-                    int length = matcher.Match(parameters, serviceProviderIsService);
 
-                    if (isPreferred)
+                    if (constructor.IsPreferred)
                     {
-                        if (seenPreferred)
+                        for (int j = i + 1; j < constructors.Length; j++)
                         {
-                            ThrowMultipleCtorsMarkedWithAttributeException();
+                            if (constructors[j].IsPreferred)
+                            {
+                                ThrowMultipleCtorsMarkedWithAttributeException();
+                            }
                         }
 
-                        if (length == -1)
+                        InitializeCtorArgValues(ref ctorArgs, constructor.Parameters.Length);
+                        matcher = new ConstructorMatcher(constructor, ctorArgs);
+                        if (matcher.Match(parameters, serviceProviderIsService) == -1)
                         {
                             ThrowMarkedCtorDoesNotTakeAllProvidedArguments();
                         }
+
+                        return matcher.CreateInstance(provider);
                     }
+                }
+
+                int bestLength = -1;
+                scoped ConstructorMatcher bestMatcher = default;
+                bool multipleBestLengthFound = false;
+
+                // Find the constructor with the most matches.
+                for (int i = 0; i < constructors.Length; i++)
+                {
+                    constructor = constructors[i];
+
+                    InitializeCtorArgValues(ref ctorArgs, constructor.Parameters.Length);
+                    matcher = new ConstructorMatcher(constructor, ctorArgs);
+                    int length = matcher.Match(parameters, serviceProviderIsService);
+
+                    Debug.Assert(!constructor.IsPreferred);
 
-                    if (isPreferred || bestLength < length)
+                    if (bestLength < length)
                     {
                         bestLength = length;
-                        bestMatcher = matcher;
+#if NETCOREAPP
+                        ctorArgs.CopyTo(bestCtorArgs);
+#else
+                        if (i == constructors.Length - 1)
+                        {
+                            // We can prevent an alloc for the last case.
+                            bestCtorArgs = ctorArgs;
+                        }
+                        else
+                        {
+                            bestCtorArgs = new object?[length];
+                            ctorArgs.CopyTo(bestCtorArgs, 0);
+                        }
+#endif
+                        bestMatcher = new ConstructorMatcher(matcher.ConstructorInfo, bestCtorArgs);
                         multipleBestLengthFound = false;
                     }
                     else if (bestLength == length)
                     {
                         multipleBestLengthFound = true;
                     }
-
-                    seenPreferred |= isPreferred;
                 }
 
                 if (bestLength != -1)
@@ -144,24 +183,43 @@ public static object CreateInstance(
                 }
             }
 
-            FindApplicableConstructor(instanceType, argumentTypes, out ConstructorInfo constructorInfo, out int?[] parameterMap);
+            FindApplicableConstructor(instanceType, argumentTypes, constructors, out ConstructorInfo constructorInfo, out int?[] parameterMap);
+            constructor = FindConstructorEx(constructorInfo, constructors);
 
-            // Find the ConstructorInfoEx from the given constructorInfo.
-            constructor = null;
-            foreach (ConstructorInfoEx ctor in constructors)
+            InitializeCtorArgValues(ref ctorArgs, constructor.Parameters.Length);
+            matcher = new ConstructorMatcher(constructor, ctorArgs);
+            matcher.MapParameters(parameterMap, parameters);
+            return matcher.CreateInstance(provider);
+
+#if NETCOREAPP
+            int GetMaxArgCount()
             {
-                if (ReferenceEquals(ctor.Info, constructorInfo))
+                int max = 0;
+                for (int i = 0; i < constructors.Length; i++)
                 {
-                    constructor = ctor;
-                    break;
+                    max = int.Max(max, constructors[i].Parameters.Length);
                 }
-            }
 
-            Debug.Assert(constructor != null);
+                return max;
+            }
 
-            var constructorMatcher = new ConstructorMatcher(constructor);
-            constructorMatcher.MapParameters(parameterMap, parameters);
-            return constructorMatcher.CreateInstance(provider);
+            static void InitializeCtorArgValues(ref Span<object?> ctorArgs, int _)
+            {
+                ctorArgs.Clear();
+            }
+#else
+            static void InitializeCtorArgValues(ref object[] ctorArgs, int length)
+            {
+                if (ctorArgs is not null && ctorArgs.Length == length)
+                {
+                    Array.Clear(ctorArgs, 0, length);
+                }
+                else
+                {
+                    ctorArgs = new object?[length];
+                }
+            }
+#endif
         }
 
 #if NETCOREAPP
@@ -275,7 +333,7 @@ public static ObjectFactory<T>
 
         private static void CreateFactoryInternal([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] Type instanceType, Type[] argumentTypes, out ParameterExpression provider, out ParameterExpression argumentArray, out Expression factoryExpressionBody)
         {
-            FindApplicableConstructor(instanceType, argumentTypes, out ConstructorInfo constructor, out int?[] parameterMap);
+            FindApplicableConstructor(instanceType, argumentTypes, constructors: null, out ConstructorInfo constructor, out int?[] parameterMap);
 
             provider = Expression.Parameter(typeof(IServiceProvider), "provider");
             argumentArray = Expression.Parameter(typeof(object[]), "argumentArray");
@@ -396,10 +454,10 @@ private static ObjectFactory CreateFactoryReflection(
             [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] Type instanceType,
             Type?[] argumentTypes)
         {
-            FindApplicableConstructor(instanceType, argumentTypes, out ConstructorInfo constructor, out int?[] parameterMap);
+            FindApplicableConstructor(instanceType, argumentTypes, constructors: null, out ConstructorInfo constructor, out int?[] parameterMap);
             Type declaringType = constructor.DeclaringType!;
 
-#if NET8_0_OR_GREATER
+#if NETCOREAPP
             ConstructorInvoker invoker = ConstructorInvoker.Create(constructor);
 
             ParameterInfo[] constructorParameters = constructor.GetParameters();
@@ -468,7 +526,7 @@ ObjectFactory InvokeCanonical()
 
             FactoryParameterContext[] parameters = GetFactoryParameterContext();
             return (serviceProvider, arguments) => ReflectionFactoryCanonical(constructor, parameters, declaringType, serviceProvider, arguments);
-#endif // NET8_0_OR_GREATER
+#endif // NETCOREAPP
 
             FactoryParameterContext[] GetFactoryParameterContext()
             {
@@ -513,13 +571,14 @@ public FactoryParameterContext(Type parameterType, bool hasDefaultValue, object?
         private static void FindApplicableConstructor(
             [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] Type instanceType,
             Type?[] argumentTypes,
+            ConstructorInfoEx[]? constructors,
             out ConstructorInfo matchingConstructor,
             out int?[] matchingParameterMap)
         {
             ConstructorInfo? constructorInfo;
             int?[]? parameterMap;
 
-            if (!TryFindPreferredConstructor(instanceType, argumentTypes, out constructorInfo, out parameterMap) &&
+            if (!TryFindPreferredConstructor(instanceType, argumentTypes, constructors, out constructorInfo, out parameterMap) &&
                 !TryFindMatchingConstructor(instanceType, argumentTypes, out constructorInfo, out parameterMap))
             {
                 throw new InvalidOperationException(SR.Format(SR.CtorNotLocated, instanceType));
@@ -529,6 +588,21 @@ private static void FindApplicableConstructor(
             matchingParameterMap = parameterMap;
         }
 
+        // Find the ConstructorInfoEx from the given constructorInfo.
+        private static ConstructorInfoEx FindConstructorEx(ConstructorInfo constructorInfo, ConstructorInfoEx[] constructorExs)
+        {
+            for (int i = 0; i < constructorExs.Length; i++)
+            {
+                if (ReferenceEquals(constructorExs[i].Info, constructorInfo))
+                {
+                    return constructorExs[i];
+                }
+            }
+
+            Debug.Assert(false);
+            return null!;
+        }
+
         // Tries to find constructor based on provided argument types
         private static bool TryFindMatchingConstructor(
             [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] Type instanceType,
@@ -566,6 +640,7 @@ private static bool TryFindMatchingConstructor(
         private static bool TryFindPreferredConstructor(
             [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] Type instanceType,
             Type?[] argumentTypes,
+            ConstructorInfoEx[]? constructors,
             [NotNullWhen(true)] out ConstructorInfo? matchingConstructor,
             [NotNullWhen(true)] out int?[]? parameterMap)
         {
@@ -573,21 +648,33 @@ private static bool TryFindPreferredConstructor(
             matchingConstructor = null;
             parameterMap = null;
 
-            foreach (ConstructorInfo? constructor in instanceType.GetConstructors())
+            if (constructors is null)
             {
-                if (constructor.IsDefined(typeof(ActivatorUtilitiesConstructorAttribute), false))
+#if NETCOREAPP
+                if (!s_constructorInfos.TryGetValue(instanceType, out constructors))
+                {
+                    constructors = GetOrAddConstructors(instanceType);
+                }
+#else
+                constructors = CreateConstructorInfoExs(instanceType);
+#endif
+            }
+
+            foreach (ConstructorInfoEx constructor in constructors)
+            {
+                if (constructor.IsPreferred)
                 {
                     if (seenPreferred)
                     {
                         ThrowMultipleCtorsMarkedWithAttributeException();
                     }
 
-                    if (!TryCreateParameterMap(constructor.GetParameters(), argumentTypes, out int?[] tempParameterMap))
+                    if (!TryCreateParameterMap(constructor.Info.GetParameters(), argumentTypes, out int?[] tempParameterMap))
                     {
                         ThrowMarkedCtorDoesNotTakeAllProvidedArguments();
                     }
 
-                    matchingConstructor = constructor;
+                    matchingConstructor = constructor.Info;
                     parameterMap = tempParameterMap;
                     seenPreferred = true;
                 }
@@ -644,6 +731,17 @@ private sealed class ConstructorInfoEx
             public readonly ParameterInfo[] Parameters;
             public readonly bool IsPreferred;
             private readonly object?[]? _parameterKeys;
+#if NETCOREAPP
+            public ConstructorInvoker? _invoker;
+            public ConstructorInvoker Invoker
+            {
+                get
+                {
+                    _invoker ??= ConstructorInvoker.Create(Info);
+                    return _invoker;
+                }
+            }
+#endif
 
             public ConstructorInfoEx(ConstructorInfo constructor)
             {
@@ -705,17 +803,24 @@ public bool IsService(IServiceProviderIsService serviceProviderIsService, int pa
             }
         }
 
-        private readonly struct ConstructorMatcher
+        private readonly ref struct ConstructorMatcher
         {
             private readonly ConstructorInfoEx _constructor;
-            private readonly object?[] _parameterValues;
 
-            public ConstructorMatcher(ConstructorInfoEx constructor)
+#if NETCOREAPP
+            private readonly Span<object?> _parameterValues;
+            public ConstructorMatcher(ConstructorInfoEx constructor, Span<object?> parameterValues)
+#else
+            private readonly object?[] _parameterValues;
+            public ConstructorMatcher(ConstructorInfoEx constructor, object?[] parameterValues)
+#endif
             {
                 _constructor = constructor;
-                _parameterValues = new object[constructor.Parameters.Length];
+                _parameterValues = parameterValues;
             }
 
+            public ConstructorInfoEx ConstructorInfo => _constructor;
+
             public int Match(object[] givenParameters, IServiceProviderIsService serviceProviderIsService)
             {
                 for (int givenIndex = 0; givenIndex < givenParameters.Length; givenIndex++)
@@ -785,7 +890,9 @@ public object CreateInstance(IServiceProvider provider)
                     }
                 }
 
-#if NETFRAMEWORK || NETSTANDARD2_0
+#if NETCOREAPP
+                return _constructor.Invoker.Invoke(_parameterValues.Slice(0, _constructor.Parameters.Length));
+#else
                 try
                 {
                     return _constructor.Info.Invoke(_parameterValues);
@@ -796,8 +903,6 @@ public object CreateInstance(IServiceProvider provider)
                     // The above line will always throw, but the compiler requires we throw explicitly.
                     throw;
                 }
-#else
-                return _constructor.Info.Invoke(BindingFlags.DoNotWrapExceptions, binder: null, parameters: _parameterValues, culture: null);
 #endif
             }
 
@@ -823,7 +928,7 @@ private static void ThrowMarkedCtorDoesNotTakeAllProvidedArguments()
             throw new InvalidOperationException(SR.Format(SR.MarkedCtorMissingArgumentTypes, nameof(ActivatorUtilitiesConstructorAttribute)));
         }
 
-#if NET8_0_OR_GREATER // Use the faster ConstructorInvoker which also has alloc-free APIs when <= 4 parameters.
+#if NETCOREAPP // Use the faster ConstructorInvoker which also has alloc-free APIs when <= 4 parameters.
         private static object ReflectionFactoryServiceOnlyFixed(
             ConstructorInvoker invoker,
             FactoryParameterContext[] parameters,
@@ -1096,7 +1201,7 @@ private static object ReflectionFactoryCanonical(
 
             return constructor.Invoke(BindingFlags.DoNotWrapExceptions, binder: null, constructorArguments, culture: null);
         }
-#endif // NET8_0_OR_GREATER
+#endif
 
 #if NETCOREAPP
         internal static class ActivatorUtilitiesUpdateHandler
@@ -1111,6 +1216,13 @@ public static void ClearCache(Type[]? _)
                 }
             }
         }
+
+        [InlineArray(MaxStackAllocArgCount)]
+        private struct StackAllocatedObjects
+        {
+            internal const int MaxStackAllocArgCount = 8;
+            private object? _arg0;
+        }
 #endif
 
         private static object? GetKeyedService(IServiceProvider provider, Type type, object? serviceKey)
diff --git a/src/libraries/Microsoft.Extensions.DependencyInjection.Abstractions/src/ServiceCollectionServiceExtensions.Keyed.cs b/src/libraries/Microsoft.Extensions.DependencyInjection.Abstractions/src/ServiceCollectionServiceExtensions.Keyed.cs
index 788b20f5be19..1f1f1cef1a4e 100644
--- a/src/libraries/Microsoft.Extensions.DependencyInjection.Abstractions/src/ServiceCollectionServiceExtensions.Keyed.cs
+++ b/src/libraries/Microsoft.Extensions.DependencyInjection.Abstractions/src/ServiceCollectionServiceExtensions.Keyed.cs
@@ -327,7 +327,6 @@ public static IServiceCollection AddKeyedScoped<TService, TImplementation>(
             return services.AddKeyedScoped(typeof(TService), serviceKey, implementationFactory);
         }
 
-
         /// <summary>
         /// Adds a singleton service of the type specified in <paramref name="serviceType"/> with an
         /// implementation of the type specified in <paramref name="implementationType"/> to the
diff --git a/src/libraries/Microsoft.Extensions.DependencyInjection/src/Microsoft.Extensions.DependencyInjection.csproj b/src/libraries/Microsoft.Extensions.DependencyInjection/src/Microsoft.Extensions.DependencyInjection.csproj
index 9b57cba34093..6dc4c5cd9ce9 100644
--- a/src/libraries/Microsoft.Extensions.DependencyInjection/src/Microsoft.Extensions.DependencyInjection.csproj
+++ b/src/libraries/Microsoft.Extensions.DependencyInjection/src/Microsoft.Extensions.DependencyInjection.csproj
@@ -1,4 +1,4 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
     <TargetFrameworks>$(NetCoreAppCurrent);$(NetCoreAppPrevious);$(NetCoreAppMinimum);netstandard2.1;netstandard2.0;$(NetFrameworkMinimum)</TargetFrameworks>
@@ -14,8 +14,8 @@
   <PropertyGroup>
     <ILEmitBackend Condition="'$(TargetFramework)' != 'netstandard2.0'">true</ILEmitBackend>
     <DefineConstants Condition="'$(ILEmitBackend)' == 'true'">$(DefineConstants);IL_EMIT</DefineConstants>
-    <DefineConstants Condition="$([MSBuild]::GetTargetFrameworkIdentifier('$(TargetFramework)')) == '.NETFramework' and
-                                '$(ILEmitBackendSaveAssemblies)' == 'true'">$(DefineConstants);SAVE_ASSEMBLIES</DefineConstants>
+    <DefineConstants Condition="($([MSBuild]::GetTargetFrameworkIdentifier('$(TargetFramework)')) == '.NETFramework' or $([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net9.0'))) and
+                               '$(ILEmitBackendSaveAssemblies)' == 'true'">$(DefineConstants);SAVE_ASSEMBLIES</DefineConstants>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/src/libraries/Microsoft.Extensions.DependencyInjection/src/ServiceLookup/CallSiteValidator.cs b/src/libraries/Microsoft.Extensions.DependencyInjection/src/ServiceLookup/CallSiteValidator.cs
index e272c8a3d722..9ee79abbdd7f 100644
--- a/src/libraries/Microsoft.Extensions.DependencyInjection/src/ServiceLookup/CallSiteValidator.cs
+++ b/src/libraries/Microsoft.Extensions.DependencyInjection/src/ServiceLookup/CallSiteValidator.cs
@@ -10,21 +10,15 @@ namespace Microsoft.Extensions.DependencyInjection.ServiceLookup
     internal sealed class CallSiteValidator : CallSiteVisitor<CallSiteValidator.CallSiteValidatorState, Type?>
     {
         // Keys are services being resolved via GetService, values - first scoped service in their call site tree
-        private readonly ConcurrentDictionary<ServiceCacheKey, Type> _scopedServices = new ConcurrentDictionary<ServiceCacheKey, Type>();
+        private readonly ConcurrentDictionary<ServiceCacheKey, Type?> _scopedServices = new ConcurrentDictionary<ServiceCacheKey, Type?>();
 
-        public void ValidateCallSite(ServiceCallSite callSite)
-        {
-            Type? scoped = VisitCallSite(callSite, default);
-            if (scoped != null)
-            {
-                _scopedServices[callSite.Cache.Key] = scoped;
-            }
-        }
+        public void ValidateCallSite(ServiceCallSite callSite) => VisitCallSite(callSite, default);
 
         public void ValidateResolution(ServiceCallSite callSite, IServiceScope scope, IServiceScope rootScope)
         {
             if (ReferenceEquals(scope, rootScope)
-                && _scopedServices.TryGetValue(callSite.Cache.Key, out Type? scopedService))
+                && _scopedServices.TryGetValue(callSite.Cache.Key, out Type? scopedService)
+                && scopedService != null)
             {
                 Type serviceType = callSite.ServiceType;
                 if (serviceType == scopedService)
@@ -42,6 +36,34 @@ public void ValidateResolution(ServiceCallSite callSite, IServiceScope scope, IS
             }
         }
 
+        protected override Type? VisitCallSite(ServiceCallSite callSite, CallSiteValidatorState argument)
+        {
+            // First, check if we have encountered this call site before to prevent visiting call site trees that have already been visited
+            // If firstScopedServiceInCallSiteTree is null there are no scoped dependencies in this service's call site tree
+            // If firstScopedServiceInCallSiteTree has a value, it contains the first scoped service in this service's call site tree
+            if (!_scopedServices.TryGetValue(callSite.Cache.Key, out Type? firstScopedServiceInCallSiteTree))
+            {
+                // This call site wasn't cached yet, walk the tree
+                firstScopedServiceInCallSiteTree = base.VisitCallSite(callSite, argument);
+
+                // Cache the result
+                _scopedServices[callSite.Cache.Key] = firstScopedServiceInCallSiteTree;
+            }
+
+            // If there is a scoped service in the call site tree, make sure we are not resolving it from a singleton
+            if (firstScopedServiceInCallSiteTree != null && argument.Singleton != null)
+            {
+                throw new InvalidOperationException(SR.Format(SR.ScopedInSingletonException,
+                    callSite.ServiceType,
+                    argument.Singleton.ServiceType,
+                    nameof(ServiceLifetime.Scoped).ToLowerInvariant(),
+                    nameof(ServiceLifetime.Singleton).ToLowerInvariant()
+                    ));
+            }
+
+            return firstScopedServiceInCallSiteTree;
+        }
+
         protected override Type? VisitConstructor(ConstructorCallSite constructorCallSite, CallSiteValidatorState state)
         {
             Type? result = null;
@@ -78,15 +100,6 @@ public void ValidateResolution(ServiceCallSite callSite, IServiceScope scope, IS
             {
                 return null;
             }
-            if (state.Singleton != null)
-            {
-                throw new InvalidOperationException(SR.Format(SR.ScopedInSingletonException,
-                    scopedCallSite.ServiceType,
-                    state.Singleton.ServiceType,
-                    nameof(ServiceLifetime.Scoped).ToLowerInvariant(),
-                    nameof(ServiceLifetime.Singleton).ToLowerInvariant()
-                    ));
-            }
 
             VisitCallSiteMain(scopedCallSite, state);
             return scopedCallSite.ServiceType;
diff --git a/src/libraries/Microsoft.Extensions.DependencyInjection/src/ServiceLookup/ILEmit/ILEmitResolverBuilder.cs b/src/libraries/Microsoft.Extensions.DependencyInjection/src/ServiceLookup/ILEmit/ILEmitResolverBuilder.cs
index 13ac56d9a5cb..976869401572 100644
--- a/src/libraries/Microsoft.Extensions.DependencyInjection/src/ServiceLookup/ILEmit/ILEmitResolverBuilder.cs
+++ b/src/libraries/Microsoft.Extensions.DependencyInjection/src/ServiceLookup/ILEmit/ILEmitResolverBuilder.cs
@@ -104,8 +104,13 @@ private GeneratedMethod BuildTypeNoCache(ServiceCallSite callSite)
             var assemblyName = "Test" + DateTime.Now.Ticks;
             var fileName = assemblyName + ".dll";
 
+#if NETFRAMEWORK
             var assembly = AssemblyBuilder.DefineDynamicAssembly(new AssemblyName(assemblyName), AssemblyBuilderAccess.RunAndSave);
             var module = assembly.DefineDynamicModule(assemblyName, fileName);
+#else
+            var assembly = new PersistedAssemblyBuilder(new AssemblyName(assemblyName), typeof(object).Assembly);
+            var module = assembly.DefineDynamicModule(assemblyName);
+#endif
             var type = module.DefineType(callSite.ServiceType.Name + "Resolver");
 
             var method = type.DefineMethod(
@@ -114,7 +119,6 @@ private GeneratedMethod BuildTypeNoCache(ServiceCallSite callSite)
 
             GenerateMethodBody(callSite, method.GetILGenerator());
             type.CreateTypeInfo();
-            // Assembly.Save is only available in .NET Framework (https://github.com/dotnet/runtime/issues/15704)
             assembly.Save(fileName);
 #endif
             DependencyInjectionEventSource.Log.DynamicMethodBuilt(_rootScope.RootProvider, callSite.ServiceType, ilGenerator.ILOffset);
@@ -179,7 +183,7 @@ private GeneratedMethod BuildTypeNoCache(ServiceCallSite callSite)
             AddConstant(argument, generatedMethod.Lambda);
             // ProviderScope
             argument.Generator.Emit(OpCodes.Ldarg_1);
-            argument.Generator.Emit(OpCodes.Call, generatedMethod.Lambda.GetType().GetMethod("Invoke"));
+            argument.Generator.Emit(OpCodes.Call, generatedMethod.Lambda.GetType().GetMethod("Invoke")!);
 #else
             AddConstant(argument, generatedMethod.Context);
             // ProviderScope
diff --git a/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ActivatorUtilitiesTests.cs b/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ActivatorUtilitiesTests.cs
index f6e7c2f3a8eb..1144ff33a1c7 100644
--- a/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ActivatorUtilitiesTests.cs
+++ b/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ActivatorUtilitiesTests.cs
@@ -170,6 +170,96 @@ public void CreateInstance_ClassWithABC_ConstructorWithAttribute_PicksCtorWithAt
             Assert.Same(a, instance.A);
         }
 
+        [Fact]
+        public void CreateInstanceFailsWithAmbiguousConstructor()
+        {
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddTransient<ClassWithA_And_B>();
+            serviceCollection.AddTransient<A>();
+            serviceCollection.AddTransient<B>();
+
+            var serviceProvider = serviceCollection.BuildServiceProvider();
+
+            // Neither ctor(A) nor ctor(B) have [ActivatorUtilitiesConstructor].
+            Assert.Throws<InvalidOperationException>(() => ActivatorUtilities.CreateInstance<ClassWithA_And_B>(serviceProvider));
+        }
+
+        [Fact]
+        public void CreateInstanceFailsWithAmbiguousConstructor_ReversedOrder()
+        {
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddTransient<ClassWithB_And_A>();
+            serviceCollection.AddTransient<A>();
+            serviceCollection.AddTransient<B>();
+
+            var serviceProvider = serviceCollection.BuildServiceProvider();
+
+            // Neither ctor(A) nor ctor(B) have [ActivatorUtilitiesConstructor].
+            Assert.Throws<InvalidOperationException>(() => ActivatorUtilities.CreateInstance<ClassWithA_And_B>(serviceProvider));
+        }
+
+        [Fact]
+        public void CreateInstancePassesWithAmbiguousConstructor()
+        {
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddTransient<ClassWithA_And_B_ActivatorUtilitiesConstructorAttribute>();
+            serviceCollection.AddTransient<A>();
+            serviceCollection.AddTransient<B>();
+
+            var serviceProvider = serviceCollection.BuildServiceProvider();
+            var service = ActivatorUtilities.CreateInstance<ClassWithA_And_B_ActivatorUtilitiesConstructorAttribute>(serviceProvider);
+
+            // Ensure ctor(A) was selected over ctor(B) since A has [ActivatorUtilitiesConstructor].
+            Assert.NotNull(service.A);
+        }
+
+        [Fact]
+        public void CreateInstancePassesWithAmbiguousConstructor_ReversedOrder()
+        {
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddTransient<ClassWithB_And_A_ActivatorUtilitiesConstructorAttribute>();
+            serviceCollection.AddTransient<A>();
+            serviceCollection.AddTransient<B>();
+
+            var serviceProvider = serviceCollection.BuildServiceProvider();
+            var service = ActivatorUtilities.CreateInstance<ClassWithB_And_A_ActivatorUtilitiesConstructorAttribute>(serviceProvider);
+
+            // Ensure ctor(A) was selected over ctor(B) since A has [ActivatorUtilitiesConstructor].
+            Assert.NotNull(service.A);
+        }
+
+        [Fact]
+        public void CreateInstanceIgnoresActivatorUtilitiesConstructorAttribute()
+        {
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddTransient<ClassWithA_And_AB_ActivatorUtilitiesConstructorAttribute>();
+            serviceCollection.AddTransient<A>();
+            serviceCollection.AddTransient<B>();
+
+            var serviceProvider = serviceCollection.BuildServiceProvider();
+            var service = ActivatorUtilities.CreateInstance<ClassWithA_And_AB_ActivatorUtilitiesConstructorAttribute>(serviceProvider);
+
+            // Ensure ctor(A) was selected since A has [ActivatorUtilitiesConstructor].
+            Assert.NotNull(service.A);
+            Assert.Null(service.B);
+        }
+
+        [Fact]
+        public void CreateInstanceIgnoresActivatorUtilitiesConstructorAttribute_ReversedOrder()
+        {
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddTransient<ClassWithAB_And_A_ActivatorUtilitiesConstructorAttribute>();
+            serviceCollection.AddTransient<A>();
+            serviceCollection.AddTransient<B>();
+
+            var serviceProvider = serviceCollection.BuildServiceProvider();
+            var service = ActivatorUtilities.CreateInstance<ClassWithAB_And_A_ActivatorUtilitiesConstructorAttribute>(serviceProvider);
+
+            // Ensure ctor(A) was selected since it has [ActivatorUtilitiesConstructor].
+            Assert.NotNull(service.A);
+            Assert.Null(service.B);
+        }
+
         [Fact]
         public void CreateInstance_ClassWithABC_MultipleCtorsWithSameLength_ThrowsAmbiguous()
         {
@@ -662,6 +752,108 @@ public ClassWithABC_LastConstructorWithAttribute(B b, C c) : this(null, b, c) {
         public ClassWithABC_LastConstructorWithAttribute(A a, B b, C c) : base(a, b, c) { }
     }
 
+    internal class ClassWithA_And_B
+    {
+        public ClassWithA_And_B(A a)
+        {
+            A = a;
+        }
+
+        public ClassWithA_And_B(B b)
+        {
+            B = b;
+        }
+
+        public A A { get; }
+        public B B { get; }
+    }
+
+    internal class ClassWithB_And_A
+    {
+        public ClassWithB_And_A(A a)
+        {
+            A = a;
+        }
+
+        public ClassWithB_And_A(B b)
+        {
+            B = b;
+        }
+
+        public A A { get; }
+        public B B { get; }
+    }
+
+    internal class ClassWithA_And_B_ActivatorUtilitiesConstructorAttribute
+    {
+        [ActivatorUtilitiesConstructor]
+        public ClassWithA_And_B_ActivatorUtilitiesConstructorAttribute(A a)
+        {
+            A = a;
+        }
+
+        public ClassWithA_And_B_ActivatorUtilitiesConstructorAttribute(B b)
+        {
+            B = b;
+        }
+
+        public A A { get; }
+        public B B { get; }
+    }
+
+    internal class ClassWithB_And_A_ActivatorUtilitiesConstructorAttribute
+    {
+        public ClassWithB_And_A_ActivatorUtilitiesConstructorAttribute(B b)
+        {
+            B = b;
+        }
+
+        [ActivatorUtilitiesConstructor]
+        public ClassWithB_And_A_ActivatorUtilitiesConstructorAttribute(A a)
+        {
+            A = a;
+        }
+
+        public A A { get; }
+        public B B { get; }
+    }
+
+    internal class ClassWithA_And_AB_ActivatorUtilitiesConstructorAttribute
+    {
+        [ActivatorUtilitiesConstructor]
+        public ClassWithA_And_AB_ActivatorUtilitiesConstructorAttribute(A a)
+        {
+            A = a;
+        }
+
+        public ClassWithA_And_AB_ActivatorUtilitiesConstructorAttribute(A a, B b)
+        {
+            A = a;
+            B = b;
+        }
+
+        public A A { get; }
+        public B B { get; }
+    }
+
+    internal class ClassWithAB_And_A_ActivatorUtilitiesConstructorAttribute
+    {
+        public ClassWithAB_And_A_ActivatorUtilitiesConstructorAttribute(A a, B b)
+        {
+            A = a;
+            B = b;
+        }
+
+        [ActivatorUtilitiesConstructor]
+        public ClassWithAB_And_A_ActivatorUtilitiesConstructorAttribute(A a)
+        {
+            A = a;
+        }
+
+        public A A { get; }
+        public B B { get; }
+    }
+
     internal class FakeServiceProvider : IServiceProvider
     {
         private IServiceProvider _inner;
diff --git a/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceCollectionKeyedServiceExtensionsTest.cs b/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceCollectionKeyedServiceExtensionsTest.cs
index 6be7e22cce2c..3715c619c8fa 100644
--- a/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceCollectionKeyedServiceExtensionsTest.cs
+++ b/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceCollectionKeyedServiceExtensionsTest.cs
@@ -262,18 +262,19 @@ public static TheoryData TryAddEnumerableImplementationTypeData
             {
                 var serviceType = typeof(IFakeService);
                 var implementationType = typeof(FakeService);
-                return new TheoryData<ServiceDescriptor, Type, object, Type, ServiceLifetime>
+                return new TheoryData<Func<ServiceDescriptor>, Type, object, Type, ServiceLifetime>
                 {
-                    { ServiceDescriptor.KeyedTransient<IFakeService, FakeService>("service1"), serviceType, "service1", implementationType, ServiceLifetime.Transient },
-                    { ServiceDescriptor.KeyedTransient<IFakeService, FakeService>("service2", (s,k) => new FakeService()), serviceType, "service2", implementationType, ServiceLifetime.Transient },
+                    { () => ServiceDescriptor.KeyedTransient<IFakeService, FakeService>("service1"), serviceType, "service1", implementationType, ServiceLifetime.Transient },
+                    { () => ServiceDescriptor.KeyedTransient<IFakeService, FakeService>("service2", (s,k) => new FakeService()), serviceType, "service2", implementationType, ServiceLifetime.Transient },
+                    { () => ServiceDescriptor.KeyedTransient<IFakeService, FakeService>(7), serviceType, 7, implementationType, ServiceLifetime.Transient },
 
-                    { ServiceDescriptor.KeyedScoped<IFakeService, FakeService>("service3"), serviceType, "service3", implementationType, ServiceLifetime.Scoped },
-                    { ServiceDescriptor.KeyedScoped<IFakeService, FakeService>("service4", (s,k) => new FakeService()), serviceType, "service4", implementationType, ServiceLifetime.Scoped },
+                    { () => ServiceDescriptor.KeyedScoped<IFakeService, FakeService>("service3"), serviceType, "service3", implementationType, ServiceLifetime.Scoped },
+                    { () => ServiceDescriptor.KeyedScoped<IFakeService, FakeService>("service4", (s,k) => new FakeService()), serviceType, "service4", implementationType, ServiceLifetime.Scoped },
 
-                    { ServiceDescriptor.KeyedSingleton<IFakeService, FakeService>("service5"), serviceType, "service5", implementationType, ServiceLifetime.Singleton },
-                    { ServiceDescriptor.KeyedSingleton<IFakeService, FakeService>("service6", (s,k) => new FakeService()), serviceType, "service6", implementationType, ServiceLifetime.Singleton },
+                    { () => ServiceDescriptor.KeyedSingleton<IFakeService, FakeService>("service5"), serviceType, "service5", implementationType, ServiceLifetime.Singleton },
+                    { () => ServiceDescriptor.KeyedSingleton<IFakeService, FakeService>("service6", (s,k) => new FakeService()), serviceType, "service6", implementationType, ServiceLifetime.Singleton },
 
-                    { ServiceDescriptor.KeyedSingleton<IFakeService>("service6", _instance), serviceType, "service6", implementationType, ServiceLifetime.Singleton },
+                    { () => ServiceDescriptor.KeyedSingleton<IFakeService>("service6", _instance), serviceType, "service6", implementationType, ServiceLifetime.Singleton },
                 };
             }
         }
@@ -281,7 +282,7 @@ public static TheoryData TryAddEnumerableImplementationTypeData
         [Theory]
         [MemberData(nameof(TryAddEnumerableImplementationTypeData))]
         public void TryAddEnumerable_AddsService(
-            ServiceDescriptor descriptor,
+            Func<ServiceDescriptor> createDescriptor,
             Type expectedServiceType,
             object expectedKey,
             Type expectedImplementationType,
@@ -291,7 +292,7 @@ public void TryAddEnumerable_AddsService(
             var collection = new ServiceCollection();
 
             // Act
-            collection.TryAddEnumerable(descriptor);
+            collection.TryAddEnumerable(createDescriptor());
 
             // Assert
             var d = Assert.Single(collection);
@@ -305,7 +306,7 @@ public void TryAddEnumerable_AddsService(
         [Theory]
         [MemberData(nameof(TryAddEnumerableImplementationTypeData))]
         public void TryAddEnumerable_DoesNotAddDuplicate(
-            ServiceDescriptor descriptor,
+            Func<ServiceDescriptor> createDescriptor,
             Type expectedServiceType,
             object expectedKey,
             Type expectedImplementationType,
@@ -313,10 +314,10 @@ public void TryAddEnumerable_DoesNotAddDuplicate(
         {
             // Arrange
             var collection = new ServiceCollection();
-            collection.TryAddEnumerable(descriptor);
+            collection.TryAddEnumerable(createDescriptor());
 
             // Act
-            collection.TryAddEnumerable(descriptor);
+            collection.TryAddEnumerable(createDescriptor());
 
             // Assert
             var d = Assert.Single(collection);
@@ -326,40 +327,6 @@ public void TryAddEnumerable_DoesNotAddDuplicate(
             Assert.Equal(expectedLifetime, d.Lifetime);
         }
 
-        [Fact]
-        public void TryAddEnumerable_DoesNotAddDuplicateWhenKeyIsInt()
-        {
-            // Arrange
-            var collection = new ServiceCollection();
-            var descriptor1 = ServiceDescriptor.KeyedTransient<IFakeService, FakeService>(1);
-            collection.TryAddEnumerable(descriptor1);
-            var descriptor2 = ServiceDescriptor.KeyedTransient<IFakeService, FakeService>(1);
-
-            // Act
-            collection.TryAddEnumerable(descriptor2);
-
-            // Assert
-            var d = Assert.Single(collection);
-            Assert.Same(descriptor1, d);
-        }
-
-        [Fact]
-        public void TryAddEnumerable_DoesNotAddDuplicateWhenKeyIsString()
-        {
-            // Arrange
-            var collection = new ServiceCollection();
-            var descriptor1 = ServiceDescriptor.KeyedTransient<IFakeService, FakeService>("service1");
-            collection.TryAddEnumerable(descriptor1);
-            var descriptor2 = ServiceDescriptor.KeyedTransient<IFakeService, FakeService>("service1");
-
-            // Act
-            collection.TryAddEnumerable(descriptor2);
-
-            // Assert
-            var d = Assert.Single(collection);
-            Assert.Same(descriptor1, d);
-        }
-
         public static TheoryData TryAddEnumerableInvalidImplementationTypeData
         {
             get
diff --git a/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceLookup/CallSiteFactoryTest.cs b/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceLookup/CallSiteFactoryTest.cs
index 919dee647590..74f85296af95 100644
--- a/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceLookup/CallSiteFactoryTest.cs
+++ b/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceLookup/CallSiteFactoryTest.cs
@@ -792,7 +792,7 @@ public void CreateCallSite_EnumberableCachedAtLowestLevel(ServiceDescriptor[] de
             Assert.Equal(typeof(IEnumerable<FakeService>), callSite.Cache.Key.ServiceIdentifier.ServiceType);
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         public void CallSitesAreUniquePerServiceTypeAndSlot()
         {
             // Connected graph
@@ -828,7 +828,7 @@ public void CallSitesAreUniquePerServiceTypeAndSlot()
             }
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         public void CallSitesAreUniquePerServiceTypeAndSlotWithOpenGenericInGraph()
         {
             // Connected graph
diff --git a/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceProviderContainerTests.cs b/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceProviderContainerTests.cs
index 3e08a16db282..f668cee41efc 100644
--- a/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceProviderContainerTests.cs
+++ b/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceProviderContainerTests.cs
@@ -371,7 +371,7 @@ public void GetService_DisposeOnSameThread_Throws()
             });
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         public void GetAsyncService_DisposeAsyncOnSameThread_ThrowsAndDoesNotHangAndDisposeAsyncGetsCalled()
         {
             // Arrange
@@ -398,7 +398,7 @@ public void GetAsyncService_DisposeAsyncOnSameThread_ThrowsAndDoesNotHangAndDisp
             Assert.True(asyncDisposableResource.DisposeAsyncCalled);
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         public void GetService_DisposeOnSameThread_ThrowsAndDoesNotHangAndDisposeGetsCalled()
         {
             // Arrange
diff --git a/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceProviderValidationTests.cs b/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceProviderValidationTests.cs
index 8780312c2e8f..312043e56d47 100644
--- a/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceProviderValidationTests.cs
+++ b/src/libraries/Microsoft.Extensions.DependencyInjection/tests/DI.Tests/ServiceProviderValidationTests.cs
@@ -87,7 +87,7 @@ public void GetService_Throws_WhenGetServiceForScopedServiceIsCalledOnRoot()
         }
 
         [Fact]
-        public async void GetService_Throws_WhenGetServiceForScopedServiceIsCalledOnRoot_IL_Replacement()
+        public async Task GetService_Throws_WhenGetServiceForScopedServiceIsCalledOnRoot_IL_Replacement()
         {
             // Arrange
             var serviceCollection = new ServiceCollection();
@@ -180,6 +180,143 @@ public void GetService_DoesNotThrow_WhenScopeFactoryIsInjectedIntoSingleton()
             Assert.NotNull(result);
         }
 
+        [Fact]
+        public void GetService_DoesNotThrow_WhenGetServiceForServiceWithMultipleImplementationScopesWhereLastIsNotScoped()
+        {
+            // Arrange
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddScoped<IBar, Bar>();
+            serviceCollection.AddSingleton<IBar, Bar2>();
+            serviceCollection.AddSingleton<IBaz, Baz>();
+            var serviceProvider = serviceCollection.BuildServiceProvider(true);
+
+
+            // Act + Assert
+            var exception = Assert.Throws<InvalidOperationException>(() => serviceProvider.GetService(typeof(IEnumerable<IBar>)));
+            Assert.Equal($"Cannot resolve scoped service '{typeof(IEnumerable<IBar>)}' from root provider.", exception.Message);
+
+            var result = serviceProvider.GetService(typeof(IBar));
+            Assert.NotNull(result);
+        }
+
+
+        [Fact]
+        public void GetService_Throws_WhenGetServiceForServiceWithMultipleImplementationScopesWhereLastIsScoped()
+        {
+            // Arrange
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddSingleton<IBar, Bar>();
+            serviceCollection.AddScoped<IBar, Bar2>();
+            serviceCollection.AddSingleton<IBaz, Baz>();
+            var serviceProvider = serviceCollection.BuildServiceProvider(true);
+
+
+            // Act + Assert
+            var exception = Assert.Throws<InvalidOperationException>(() => serviceProvider.GetService(typeof(IEnumerable<IBar>)));
+            Assert.Equal($"Cannot resolve scoped service '{typeof(IEnumerable<IBar>)}' from root provider.", exception.Message);
+
+            exception = Assert.Throws<InvalidOperationException>(() => serviceProvider.GetService(typeof(IBar)));
+            Assert.Equal($"Cannot resolve scoped service '{typeof(IBar)}' from root provider.", exception.Message);
+        }
+
+        [Fact]
+        public void GetService_DoesNotThrow_WhenGetServiceForNonScopedImplementationWithMultipleImplementationScopesWhereLastIsScoped()
+        {
+            // Arrange
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddSingleton<IBar, Bar>();
+            serviceCollection.AddSingleton<Bar>();
+            serviceCollection.AddScoped<IBar, Bar2>();
+            serviceCollection.AddSingleton<IBaz, Baz>();
+            var serviceProvider = serviceCollection.BuildServiceProvider(true);
+
+
+            // Act + Assert
+            var exception = Assert.Throws<InvalidOperationException>(() => serviceProvider.GetService(typeof(IEnumerable<IBar>)));
+            Assert.Equal($"Cannot resolve scoped service '{typeof(IEnumerable<IBar>)}' from root provider.", exception.Message);
+
+            var result = serviceProvider.GetService(typeof(Bar));
+            Assert.NotNull(result);
+        }
+
+        [Fact]
+        public void BuildServiceProvider_ValidateOnBuild_Throws_WhenScopedIsInjectedIntoSingleton()
+        {
+            // Arrange
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddScoped<IBar, Bar>();
+            serviceCollection.AddSingleton<IFoo, Foo>();
+
+            // Act + Assert
+            var aggregateException = Assert.Throws<AggregateException>(() => serviceCollection.BuildServiceProvider(new ServiceProviderOptions() { ValidateOnBuild = true, ValidateScopes = true }));
+            Assert.StartsWith("Some services are not able to be constructed", aggregateException.Message);
+            Assert.Equal(1, aggregateException.InnerExceptions.Count);
+            Assert.Equal("Error while validating the service descriptor 'ServiceType: Microsoft.Extensions.DependencyInjection.Tests.ServiceProviderValidationTests+IFoo Lifetime: Singleton ImplementationType: Microsoft.Extensions.DependencyInjection.Tests.ServiceProviderValidationTests+Foo': " +
+                         "Cannot consume scoped service 'Microsoft.Extensions.DependencyInjection.Tests.ServiceProviderValidationTests+IBar' from singleton 'Microsoft.Extensions.DependencyInjection.Tests.ServiceProviderValidationTests+IFoo'."
+                , aggregateException.InnerExceptions[0].Message);
+        }
+
+        [Fact]
+        public void BuildServiceProvider_ValidateOnBuild_Throws_WhenScopedIsInjectedIntoSingleton_ReverseRegistrationOrder()
+        {
+            // Arrange
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddSingleton<IFoo, Foo>();
+            serviceCollection.AddScoped<IBar, Bar>();
+
+            // Act + Assert
+            var aggregateException = Assert.Throws<AggregateException>(() => serviceCollection.BuildServiceProvider(new ServiceProviderOptions() { ValidateOnBuild = true, ValidateScopes = true }));
+            Assert.StartsWith("Some services are not able to be constructed", aggregateException.Message);
+            Assert.Equal(1, aggregateException.InnerExceptions.Count);
+            Assert.Equal("Error while validating the service descriptor 'ServiceType: Microsoft.Extensions.DependencyInjection.Tests.ServiceProviderValidationTests+IFoo Lifetime: Singleton ImplementationType: Microsoft.Extensions.DependencyInjection.Tests.ServiceProviderValidationTests+Foo': " +
+                         "Cannot consume scoped service 'Microsoft.Extensions.DependencyInjection.Tests.ServiceProviderValidationTests+IBar' from singleton 'Microsoft.Extensions.DependencyInjection.Tests.ServiceProviderValidationTests+IFoo'."
+                , aggregateException.InnerExceptions[0].Message);
+        }
+
+        [Fact]
+        public void BuildServiceProvider_ValidateOnBuild_DoesNotThrow_WhenScopeFactoryIsInjectedIntoSingleton()
+        {
+            // Arrange
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddSingleton<IBoo, Boo>();
+
+            // Act + Assert
+            serviceCollection.BuildServiceProvider(new ServiceProviderOptions() { ValidateOnBuild = true, ValidateScopes = true });
+        }
+
+        [Fact]
+        public void BuildServiceProvider_ValidateOnBuild_Throws_WhenScopedIsInjectedIntoSingleton_CachedCallSites()
+        {
+            // Arrange
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddScoped<Foo>();
+            serviceCollection.AddSingleton<Foo2>();
+            serviceCollection.AddScoped<IBar, Bar2>();
+            serviceCollection.AddScoped<IBaz, Baz>();
+
+            // Act + Assert
+            var aggregateException = Assert.Throws<AggregateException>(() => serviceCollection.BuildServiceProvider(new ServiceProviderOptions() { ValidateOnBuild = true, ValidateScopes = true }));
+            Assert.StartsWith("Some services are not able to be constructed", aggregateException.Message);
+            Assert.Equal(1, aggregateException.InnerExceptions.Count);
+            Assert.Equal("Error while validating the service descriptor 'ServiceType: Microsoft.Extensions.DependencyInjection.Tests.ServiceProviderValidationTests+Foo2 Lifetime: Singleton ImplementationType: Microsoft.Extensions.DependencyInjection.Tests.ServiceProviderValidationTests+Foo2': " +
+                         "Cannot consume scoped service 'Microsoft.Extensions.DependencyInjection.Tests.ServiceProviderValidationTests+IBar' from singleton 'Microsoft.Extensions.DependencyInjection.Tests.ServiceProviderValidationTests+Foo2'."
+                , aggregateException.InnerExceptions[0].Message);
+        }
+
+        [Fact]
+        public void BuildServiceProvider_ValidateOnBuild_DoesNotThrow_CachedCallSites()
+        {
+            // Arrange
+            var serviceCollection = new ServiceCollection();
+            serviceCollection.AddScoped<Foo>();
+            serviceCollection.AddScoped<Foo2>();
+            serviceCollection.AddScoped<IBar, Bar2>();
+            serviceCollection.AddScoped<IBaz, Baz>();
+
+            // Act + Assert
+            serviceCollection.BuildServiceProvider(new ServiceProviderOptions() { ValidateOnBuild = true, ValidateScopes = true });
+        }
+
         [Fact]
         public void BuildServiceProvider_ValidateOnBuild_ThrowsForUnresolvableServices()
         {
@@ -268,6 +405,13 @@ public Foo(IBar bar)
             }
         }
 
+        private class Foo2 : IFoo
+        {
+            public Foo2(IBar bar)
+            {
+            }
+        }
+
         private interface IBar
         {
         }
diff --git a/src/libraries/Microsoft.Extensions.Diagnostics.Abstractions/src/Metrics/MetricsBuilderExtensions.Rules.cs b/src/libraries/Microsoft.Extensions.Diagnostics.Abstractions/src/Metrics/MetricsBuilderExtensions.Rules.cs
index 6b896aeffd75..595134063370 100644
--- a/src/libraries/Microsoft.Extensions.Diagnostics.Abstractions/src/Metrics/MetricsBuilderExtensions.Rules.cs
+++ b/src/libraries/Microsoft.Extensions.Diagnostics.Abstractions/src/Metrics/MetricsBuilderExtensions.Rules.cs
@@ -46,12 +46,12 @@ public static MetricsOptions EnableMetrics(this MetricsOptions options, string?
         /// <summary>
         /// Enables a specified <see cref="Instrument"/> for the given <see cref="Meter"/> and <see cref="IMetricsListener"/>.
         /// </summary>
-        /// <param name="options">The <see cref="MeterOptions"/>.</param>
+        /// <param name="options">The <see cref="MetricsOptions"/>.</param>
         /// <param name="meterName">The <see cref="Meter.Name"/> or prefix. A null value matches all meters.</param>
         /// <param name="instrumentName">The <see cref="Instrument.Name"/>. A null value matches all instruments.</param>
         /// <param name="listenerName">The <see cref="IMetricsListener"/>.Name. A null value matches all listeners.</param>
         /// <param name="scopes">Indicates which <see cref="MeterScope"/>'s to consider. Default to all scopes.</param>
-        /// <returns>The original <see cref="MeterOptions"/> for chaining.</returns>
+        /// <returns>The original <see cref="MetricsOptions"/> for chaining.</returns>
         public static MetricsOptions EnableMetrics(this MetricsOptions options, string? meterName, string? instrumentName = null, string? listenerName = null,
             MeterScope scopes = MeterScope.Global | MeterScope.Local)
             => options.AddRule(meterName, instrumentName, listenerName, scopes, enable: true);
@@ -90,12 +90,12 @@ public static MetricsOptions DisableMetrics(this MetricsOptions options, string?
         /// <summary>
         /// Disables a specified <see cref="Instrument"/> for the given <see cref="Meter"/> and <see cref="IMetricsListener"/>.
         /// </summary>
-        /// <param name="options">The <see cref="MeterOptions"/>.</param>
+        /// <param name="options">The <see cref="MetricsOptions"/>.</param>
         /// <param name="meterName">The <see cref="Meter.Name"/> or prefix. A null value matches all meters.</param>
         /// <param name="instrumentName">The <see cref="Instrument.Name"/>. A null value matches all instruments.</param>
         /// <param name="listenerName">The <see cref="IMetricsListener"/>.Name. A null value matches all listeners.</param>
         /// <param name="scopes">Indicates which <see cref="MeterScope"/>'s to consider. Default to all scopes.</param>
-        /// <returns>The original <see cref="MeterOptions"/> for chaining.</returns>
+        /// <returns>The original <see cref="MetricsOptions"/> for chaining.</returns>
         public static MetricsOptions DisableMetrics(this MetricsOptions options, string? meterName, string? instrumentName = null, string? listenerName = null,
             MeterScope scopes = MeterScope.Global | MeterScope.Local)
             => options.AddRule(meterName, instrumentName, listenerName, scopes, enable: false);
diff --git a/src/libraries/Microsoft.Extensions.FileProviders.Physical/src/Microsoft.Extensions.FileProviders.Physical.csproj b/src/libraries/Microsoft.Extensions.FileProviders.Physical/src/Microsoft.Extensions.FileProviders.Physical.csproj
index c4bbe1418891..030d46af2a6c 100644
--- a/src/libraries/Microsoft.Extensions.FileProviders.Physical/src/Microsoft.Extensions.FileProviders.Physical.csproj
+++ b/src/libraries/Microsoft.Extensions.FileProviders.Physical/src/Microsoft.Extensions.FileProviders.Physical.csproj
@@ -4,6 +4,7 @@
     <TargetFrameworks>$(NetCoreAppCurrent);$(NetCoreAppPrevious);$(NetCoreAppMinimum);netstandard2.0;$(NetFrameworkMinimum)</TargetFrameworks>
     <RootNamespace>Microsoft.Extensions.FileProviders</RootNamespace>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <NoWarn>$(NoWarn);CA1865;CA1866</NoWarn>
     <EnableDefaultItems>true</EnableDefaultItems>
     <IsPackable>true</IsPackable>
     <PackageDescription>File provider for physical files for Microsoft.Extensions.FileProviders.</PackageDescription>
diff --git a/src/libraries/Microsoft.Extensions.HostFactoryResolver/tests/HostFactoryResolverTests.cs b/src/libraries/Microsoft.Extensions.HostFactoryResolver/tests/HostFactoryResolverTests.cs
index 3982ae298468..f123b65bb58b 100644
--- a/src/libraries/Microsoft.Extensions.HostFactoryResolver/tests/HostFactoryResolverTests.cs
+++ b/src/libraries/Microsoft.Extensions.HostFactoryResolver/tests/HostFactoryResolverTests.cs
@@ -37,7 +37,7 @@ public void BuildWebHostPattern_CanFindServiceProvider()
             Assert.IsAssignableFrom<IServiceProvider>(factory(Array.Empty<string>()));
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(BuildWebHostInvalidSignature.Program))]
         public void BuildWebHostPattern__Invalid_CantFindWebHost()
         {
@@ -46,7 +46,7 @@ public void BuildWebHostPattern__Invalid_CantFindWebHost()
             Assert.Null(factory);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(BuildWebHostInvalidSignature.Program))]
         public void BuildWebHostPattern__Invalid_CantFindServiceProvider()
         {
@@ -55,7 +55,7 @@ public void BuildWebHostPattern__Invalid_CantFindServiceProvider()
             Assert.NotNull(factory);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(CreateWebHostBuilderPatternTestSite.Program))]
         public void CreateWebHostBuilderPattern_CanFindWebHostBuilder()
         {
@@ -65,7 +65,7 @@ public void CreateWebHostBuilderPattern_CanFindWebHostBuilder()
             Assert.IsAssignableFrom<IWebHostBuilder>(factory(Array.Empty<string>()));
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(CreateWebHostBuilderPatternTestSite.Program))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(IWebHost))]
         public void CreateWebHostBuilderPattern_CanFindServiceProvider()
@@ -76,7 +76,7 @@ public void CreateWebHostBuilderPattern_CanFindServiceProvider()
             Assert.IsAssignableFrom<IServiceProvider>(factory(Array.Empty<string>()));
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(CreateWebHostBuilderInvalidSignature.Program))]
         public void CreateWebHostBuilderPattern__Invalid_CantFindWebHostBuilder()
         {
@@ -85,7 +85,7 @@ public void CreateWebHostBuilderPattern__Invalid_CantFindWebHostBuilder()
             Assert.Null(factory);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(CreateWebHostBuilderInvalidSignature.Program))]
         public void CreateWebHostBuilderPattern__InvalidReturnType_CanFindServiceProvider()
         {
@@ -95,7 +95,7 @@ public void CreateWebHostBuilderPattern__InvalidReturnType_CanFindServiceProvide
             Assert.Null(factory(Array.Empty<string>()));
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(CreateHostBuilderPatternTestSite.Program))]
         public void CreateHostBuilderPattern_CanFindHostBuilder()
         {
@@ -105,7 +105,7 @@ public void CreateHostBuilderPattern_CanFindHostBuilder()
             Assert.IsAssignableFrom<IHostBuilder>(factory(Array.Empty<string>()));
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(CreateHostBuilderPatternTestSite.Program))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(Host))]
         public void CreateHostBuilderPattern_CanFindServiceProvider()
@@ -116,7 +116,7 @@ public void CreateHostBuilderPattern_CanFindServiceProvider()
             Assert.IsAssignableFrom<IServiceProvider>(factory(Array.Empty<string>()));
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(CreateHostBuilderInvalidSignature.Program))]
         public void CreateHostBuilderPattern__Invalid_CantFindHostBuilder()
         {
@@ -125,7 +125,7 @@ public void CreateHostBuilderPattern__Invalid_CantFindHostBuilder()
             Assert.Null(factory);
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(CreateHostBuilderInvalidSignature.Program))]
         public void CreateHostBuilderPattern__Invalid_CantFindServiceProvider()
         {
@@ -135,7 +135,7 @@ public void CreateHostBuilderPattern__Invalid_CantFindServiceProvider()
             Assert.Throws<InvalidOperationException>(() => factory(Array.Empty<string>()));
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(NoSpecialEntryPointPattern.Program))]
         public void NoSpecialEntryPointPattern()
         {
@@ -145,7 +145,7 @@ public void NoSpecialEntryPointPattern()
             Assert.IsAssignableFrom<IServiceProvider>(factory(Array.Empty<string>()));
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(NoSpecialEntryPointPattern.Program))]
         public void NoSpecialEntryPointPatternHostBuilderConfigureHostBuilderCallbackIsCalled()
         {
@@ -163,7 +163,7 @@ void ConfigureHostBuilder(object hostBuilder)
             Assert.True(called);
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(NoSpecialEntryPointPattern.Program))]
         public void NoSpecialEntryPointPatternBuildsThenThrowsCallsEntryPointCompletedCallback()
         {
@@ -183,7 +183,7 @@ void EntryPointCompleted(Exception? exception)
             Assert.Null(entryPointException);
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(NoSpecialEntryPointPatternBuildsThenThrows.Program))]
         public void NoSpecialEntryPointPatternBuildsThenThrowsCallsEntryPointCompletedCallbackWithException()
         {
@@ -203,7 +203,7 @@ void EntryPointCompleted(Exception? exception)
             Assert.NotNull(entryPointException);
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(NoSpecialEntryPointPatternThrows.Program))]
         public void NoSpecialEntryPointPatternThrows()
         {
@@ -213,7 +213,7 @@ public void NoSpecialEntryPointPatternThrows()
             Assert.Throws<Exception>(() => factory(Array.Empty<string>()));
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(NoSpecialEntryPointPatternExits.Program))]
         public void NoSpecialEntryPointPatternExits()
         {
@@ -223,7 +223,7 @@ public void NoSpecialEntryPointPatternExits()
             Assert.Throws<InvalidOperationException>(() => factory(Array.Empty<string>()));
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(NoSpecialEntryPointPatternHangs.Program))]
         public void NoSpecialEntryPointPatternHangs()
         {
@@ -233,7 +233,7 @@ public void NoSpecialEntryPointPatternHangs()
             Assert.Throws<InvalidOperationException>(() => factory(Array.Empty<string>()));
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(NoSpecialEntryPointPatternMainNoArgs.Program))]
         public void NoSpecialEntryPointPatternMainNoArgs()
         {
@@ -243,7 +243,7 @@ public void NoSpecialEntryPointPatternMainNoArgs()
             Assert.IsAssignableFrom<IServiceProvider>(factory(Array.Empty<string>()));
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, "Program", "TopLevelStatements")]
         public void TopLevelStatements()
         {
@@ -254,7 +254,7 @@ public void TopLevelStatements()
             Assert.IsAssignableFrom<IServiceProvider>(factory(Array.Empty<string>()));
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, "Program", "TopLevelStatementsTestsTimeout")]
         public void TopLevelStatementsTestsTimeout()
         {
@@ -265,7 +265,7 @@ public void TopLevelStatementsTestsTimeout()
             Assert.Throws<InvalidOperationException>(() => factory(Array.Empty<string>()));
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, "Program", "ApplicationNameSetFromArgument")]
         public void ApplicationNameSetFromArgument()
         {
@@ -277,7 +277,7 @@ public void ApplicationNameSetFromArgument()
             Assert.Contains("ApplicationNameSetFromArgument", configuration["applicationName"]);
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(NoSpecialEntryPointPattern.Program))]
         public void NoSpecialEntryPointPatternCanRunInParallel()
         {
diff --git a/src/libraries/Microsoft.Extensions.Hosting/src/Internal/Host.cs b/src/libraries/Microsoft.Extensions.Hosting/src/Internal/Host.cs
index 154b4225b002..e84d2f3a6462 100644
--- a/src/libraries/Microsoft.Extensions.Hosting/src/Internal/Host.cs
+++ b/src/libraries/Microsoft.Extensions.Hosting/src/Internal/Host.cs
@@ -29,7 +29,6 @@ internal sealed class Host : IHost, IAsyncDisposable
         private IEnumerable<IHostedService>? _hostedServices;
         private IEnumerable<IHostedLifecycleService>? _hostedLifecycleServices;
         private bool _hostStarting;
-        private volatile bool _stopCalled;
         private bool _hostStopped;
 
         public Host(IServiceProvider services,
@@ -190,7 +189,7 @@ private async Task TryExecuteBackgroundServiceAsync(BackgroundService background
             {
                 // When the host is being stopped, it cancels the background services.
                 // This isn't an error condition, so don't log it as an error.
-                if (_stopCalled && backgroundTask.IsCanceled && ex is OperationCanceledException)
+                if (_applicationLifetime.ApplicationStopping.IsCancellationRequested && backgroundTask.IsCanceled && ex is OperationCanceledException)
                 {
                     return;
                 }
@@ -217,7 +216,6 @@ private async Task TryExecuteBackgroundServiceAsync(BackgroundService background
         /// </summary>
         public async Task StopAsync(CancellationToken cancellationToken = default)
         {
-            _stopCalled = true;
             _logger.Stopping();
 
             CancellationTokenSource? cts = null;
diff --git a/src/libraries/Microsoft.Extensions.Hosting/tests/UnitTests/Internal/HostTests.cs b/src/libraries/Microsoft.Extensions.Hosting/tests/UnitTests/Internal/HostTests.cs
index 23cf84e91621..64f9dd8f9041 100644
--- a/src/libraries/Microsoft.Extensions.Hosting/tests/UnitTests/Internal/HostTests.cs
+++ b/src/libraries/Microsoft.Extensions.Hosting/tests/UnitTests/Internal/HostTests.cs
@@ -1487,6 +1487,39 @@ public async Task StartOnBackgroundServiceThatDoesNotCallBase()
             }
         }
 
+        /// <summary>
+        /// Tests that when a BackgroundService is cancelled when stopping a host which has not finished starting, it does not log an error
+        /// </summary>
+        [Fact]
+        public async Task HostNoErrorWhenStartingServiceIsCanceledAsPartOfStop()
+        {
+            TestLoggerProvider logger = new TestLoggerProvider();
+
+            using IHost host = CreateBuilder()
+                .ConfigureLogging(logging =>
+                {
+                    logging.AddProvider(logger);
+                })
+                .ConfigureServices(services =>
+                {
+                    services.AddHostedService<WorkerTemplateService>();
+                    services.AddHostedService<SlowStartService>();
+                })
+                .Build();
+
+            IHostApplicationLifetime lifetime = host.Services.GetRequiredService<IHostApplicationLifetime>();
+            _ = host.StartAsync();
+            lifetime.StopApplication();
+            await Task.Delay(TimeSpan.FromMilliseconds(100));
+            await host.WaitForShutdownAsync();
+
+            foreach (LogEvent logEvent in logger.GetEvents())
+            {
+                Assert.True(logEvent.LogLevel < LogLevel.Error);
+                Assert.NotEqual("BackgroundServiceFaulted", logEvent.EventId.Name);
+            }
+        }
+
         private IHostBuilder CreateBuilder(IConfiguration config = null)
         {
             return new HostBuilder().ConfigureHostConfiguration(builder => builder.AddConfiguration(config ?? new ConfigurationBuilder().Build()));
@@ -1637,5 +1670,15 @@ private class BackgroundServiceDoesNotCallBase : BackgroundService
 
             public override Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
         }
+
+        private class SlowStartService : IHostedService
+        {
+            public async Task StartAsync(CancellationToken cancellationToken)
+            {
+                await Task.Delay(TimeSpan.FromSeconds(10), CancellationToken.None);
+            }
+
+            public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
+        }
     }
 }
diff --git a/src/libraries/Microsoft.Extensions.Hosting/tests/UnitTests/OptionsBuilderExtensionsTests.cs b/src/libraries/Microsoft.Extensions.Hosting/tests/UnitTests/OptionsBuilderExtensionsTests.cs
index 26b61d0a2d76..74c2a1c0bfe9 100644
--- a/src/libraries/Microsoft.Extensions.Hosting/tests/UnitTests/OptionsBuilderExtensionsTests.cs
+++ b/src/libraries/Microsoft.Extensions.Hosting/tests/UnitTests/OptionsBuilderExtensionsTests.cs
@@ -246,7 +246,7 @@ private async Task ValidateOnStart_AddEagerValidation_DoesValidationWhenHostStar
         }
 
         [Fact]
-        private async void CanValidateOptionsEagerly_AddOptionsWithValidateOnStart_IValidateOptions()
+        private async Task CanValidateOptionsEagerly_AddOptionsWithValidateOnStart_IValidateOptions()
         {
             var hostBuilder = CreateHostBuilder(services =>
                 services.AddOptionsWithValidateOnStart<ComplexOptions, ComplexOptionsValidator>()
diff --git a/src/libraries/Microsoft.Extensions.Http/tests/Microsoft.Extensions.Http.Tests/DependencyInjection/HttpClientFactoryServiceCollectionExtensionsTest.cs b/src/libraries/Microsoft.Extensions.Http/tests/Microsoft.Extensions.Http.Tests/DependencyInjection/HttpClientFactoryServiceCollectionExtensionsTest.cs
index c72b17e72884..81bb589fcfb3 100644
--- a/src/libraries/Microsoft.Extensions.Http/tests/Microsoft.Extensions.Http.Tests/DependencyInjection/HttpClientFactoryServiceCollectionExtensionsTest.cs
+++ b/src/libraries/Microsoft.Extensions.Http/tests/Microsoft.Extensions.Http.Tests/DependencyInjection/HttpClientFactoryServiceCollectionExtensionsTest.cs
@@ -1203,7 +1203,7 @@ public async Task AddHttpClient_MessageHandler_Scope_TransientDependency()
             }
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported), nameof(PlatformDetection.IsReflectionEmitSupported))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec), nameof(PlatformDetection.IsReflectionEmitSupported))]
         public void AddHttpClient_GetAwaiterAndResult_InSingleThreadedSynchronizationContext_ShouldNotHangs()
         {
             // Arrange
diff --git a/src/libraries/Microsoft.Extensions.Http/tests/Microsoft.Extensions.Http.Tests/Logging/HttpClientLoggerTest.cs b/src/libraries/Microsoft.Extensions.Http/tests/Microsoft.Extensions.Http.Tests/Logging/HttpClientLoggerTest.cs
index fb1d94310d5c..6fb8e2dd8f7a 100644
--- a/src/libraries/Microsoft.Extensions.Http/tests/Microsoft.Extensions.Http.Tests/Logging/HttpClientLoggerTest.cs
+++ b/src/libraries/Microsoft.Extensions.Http/tests/Microsoft.Extensions.Http.Tests/Logging/HttpClientLoggerTest.cs
@@ -162,7 +162,7 @@ private void AssertCounters(TestCountingLogger testLogger, int requestCount, boo
         [InlineData(false, true)]
         [InlineData(true, false)]
         [InlineData(true, true)]
-        public async void CustomLogger_LogsCorrectEvents_Sync(bool requestSuccessful, bool asyncSecondCall)
+        public async Task CustomLogger_LogsCorrectEvents_Sync(bool requestSuccessful, bool asyncSecondCall)
         {
             var serviceCollection = new ServiceCollection();
             serviceCollection.AddTransient(_ =>
diff --git a/src/libraries/Microsoft.Extensions.Logging.Abstractions/src/Microsoft.Extensions.Logging.Abstractions.csproj b/src/libraries/Microsoft.Extensions.Logging.Abstractions/src/Microsoft.Extensions.Logging.Abstractions.csproj
index 085cade3966b..28d8bcddd185 100644
--- a/src/libraries/Microsoft.Extensions.Logging.Abstractions/src/Microsoft.Extensions.Logging.Abstractions.csproj
+++ b/src/libraries/Microsoft.Extensions.Logging.Abstractions/src/Microsoft.Extensions.Logging.Abstractions.csproj
@@ -44,7 +44,7 @@ Microsoft.Extensions.Logging.Abstractions.NullLogger</PackageDescription>
     <ProjectReference Include="..\gen\Microsoft.Extensions.Logging.Generators.Roslyn3.11.csproj"
                       ReferenceOutputAssembly="false"
                       PackAsAnalyzer="true"
-                      Condition="'$(DotNetBuildFromSource)' != 'true'" />
+                      Condition="'$(DotNetBuildSourceOnly)' != 'true'" />
     <ProjectReference Include="..\gen\Microsoft.Extensions.Logging.Generators.Roslyn4.0.csproj"
                       ReferenceOutputAssembly="false"
                       PackAsAnalyzer="true" />
diff --git a/src/libraries/Microsoft.Extensions.Logging.Abstractions/tests/Microsoft.Extensions.Logging.Generators.Tests/Microsoft.Extensions.Logging.Generators.Roslyn4.0.Tests.csproj b/src/libraries/Microsoft.Extensions.Logging.Abstractions/tests/Microsoft.Extensions.Logging.Generators.Tests/Microsoft.Extensions.Logging.Generators.Roslyn4.0.Tests.csproj
index 85734f21fcca..6f2b057c80d2 100644
--- a/src/libraries/Microsoft.Extensions.Logging.Abstractions/tests/Microsoft.Extensions.Logging.Generators.Tests/Microsoft.Extensions.Logging.Generators.Roslyn4.0.Tests.csproj
+++ b/src/libraries/Microsoft.Extensions.Logging.Abstractions/tests/Microsoft.Extensions.Logging.Generators.Tests/Microsoft.Extensions.Logging.Generators.Roslyn4.0.Tests.csproj
@@ -5,7 +5,6 @@
     <DefineConstants>$(DefineConstants);ROSLYN4_0_OR_GREATER</DefineConstants>
     <IsHighAotMemoryUsageTest>true</IsHighAotMemoryUsageTest>
     <EmccLinkOptimizationFlag Condition="'$(ContinuousIntegrationBuild)' == 'true'">-O1</EmccLinkOptimizationFlag>
-    <WasmNativeStrip>false</WasmNativeStrip>
     <!-- this Roslyn version brings in NS1.x dependencies -->
     <FlagNetStandard1XDependencies>false</FlagNetStandard1XDependencies>
   </PropertyGroup>
diff --git a/src/libraries/Microsoft.Extensions.Logging.Console/src/AnsiParser.cs b/src/libraries/Microsoft.Extensions.Logging.Console/src/AnsiParser.cs
index 4e8725118b1f..71ea987bff19 100644
--- a/src/libraries/Microsoft.Extensions.Logging.Console/src/AnsiParser.cs
+++ b/src/libraries/Microsoft.Extensions.Logging.Console/src/AnsiParser.cs
@@ -50,7 +50,7 @@ public void Parse(string message)
             ConsoleColor? foreground = null;
             ConsoleColor? background = null;
             var span = message.AsSpan();
-            const char EscapeChar = '\x1B';
+            const char EscapeChar = '\e';
             ConsoleColor? color = null;
             bool isBright = false;
             for (int i = 0; i < span.Length; i++)
@@ -59,7 +59,7 @@ public void Parse(string message)
                 {
                     if (span[i + 3] == 'm')
                     {
-                        // Example: \x1B[1m
+                        // Example: \e[1m
                         if (IsDigit(span[i + 2]))
                         {
                             escapeCode = (int)(span[i + 2] - '0');
@@ -77,7 +77,7 @@ public void Parse(string message)
                     }
                     else if (span.Length >= i + 5 && span[i + 4] == 'm')
                     {
-                        // Example: \x1B[40m
+                        // Example: \e[40m
                         if (IsDigit(span[i + 2]) && IsDigit(span[i + 3]))
                         {
                             escapeCode = (int)(span[i + 2] - '0') * 10 + (int)(span[i + 3] - '0');
@@ -127,28 +127,28 @@ public void Parse(string message)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static bool IsDigit(char c) => (uint)(c - '0') <= ('9' - '0');
 
-        internal const string DefaultForegroundColor = "\x1B[39m\x1B[22m"; // reset to default foreground color
-        internal const string DefaultBackgroundColor = "\x1B[49m"; // reset to the background color
+        internal const string DefaultForegroundColor = "\e[39m\e[22m"; // reset to default foreground color
+        internal const string DefaultBackgroundColor = "\e[49m"; // reset to the background color
 
         internal static string GetForegroundColorEscapeCode(ConsoleColor color)
         {
             return color switch
             {
-                ConsoleColor.Black => "\x1B[30m",
-                ConsoleColor.DarkRed => "\x1B[31m",
-                ConsoleColor.DarkGreen => "\x1B[32m",
-                ConsoleColor.DarkYellow => "\x1B[33m",
-                ConsoleColor.DarkBlue => "\x1B[34m",
-                ConsoleColor.DarkMagenta => "\x1B[35m",
-                ConsoleColor.DarkCyan => "\x1B[36m",
-                ConsoleColor.Gray => "\x1B[37m",
-                ConsoleColor.Red => "\x1B[1m\x1B[31m",
-                ConsoleColor.Green => "\x1B[1m\x1B[32m",
-                ConsoleColor.Yellow => "\x1B[1m\x1B[33m",
-                ConsoleColor.Blue => "\x1B[1m\x1B[34m",
-                ConsoleColor.Magenta => "\x1B[1m\x1B[35m",
-                ConsoleColor.Cyan => "\x1B[1m\x1B[36m",
-                ConsoleColor.White => "\x1B[1m\x1B[37m",
+                ConsoleColor.Black => "\e[30m",
+                ConsoleColor.DarkRed => "\e[31m",
+                ConsoleColor.DarkGreen => "\e[32m",
+                ConsoleColor.DarkYellow => "\e[33m",
+                ConsoleColor.DarkBlue => "\e[34m",
+                ConsoleColor.DarkMagenta => "\e[35m",
+                ConsoleColor.DarkCyan => "\e[36m",
+                ConsoleColor.Gray => "\e[37m",
+                ConsoleColor.Red => "\e[1m\e[31m",
+                ConsoleColor.Green => "\e[1m\e[32m",
+                ConsoleColor.Yellow => "\e[1m\e[33m",
+                ConsoleColor.Blue => "\e[1m\e[34m",
+                ConsoleColor.Magenta => "\e[1m\e[35m",
+                ConsoleColor.Cyan => "\e[1m\e[36m",
+                ConsoleColor.White => "\e[1m\e[37m",
                 _ => DefaultForegroundColor // default foreground color
             };
         }
@@ -157,14 +157,14 @@ internal static string GetBackgroundColorEscapeCode(ConsoleColor color)
         {
             return color switch
             {
-                ConsoleColor.Black => "\x1B[40m",
-                ConsoleColor.DarkRed => "\x1B[41m",
-                ConsoleColor.DarkGreen => "\x1B[42m",
-                ConsoleColor.DarkYellow => "\x1B[43m",
-                ConsoleColor.DarkBlue => "\x1B[44m",
-                ConsoleColor.DarkMagenta => "\x1B[45m",
-                ConsoleColor.DarkCyan => "\x1B[46m",
-                ConsoleColor.Gray => "\x1B[47m",
+                ConsoleColor.Black => "\e[40m",
+                ConsoleColor.DarkRed => "\e[41m",
+                ConsoleColor.DarkGreen => "\e[42m",
+                ConsoleColor.DarkYellow => "\e[43m",
+                ConsoleColor.DarkBlue => "\e[44m",
+                ConsoleColor.DarkMagenta => "\e[45m",
+                ConsoleColor.DarkCyan => "\e[46m",
+                ConsoleColor.Gray => "\e[47m",
                 _ => DefaultBackgroundColor // Use default background color
             };
         }
diff --git a/src/libraries/Microsoft.Extensions.Logging.Console/tests/Microsoft.Extensions.Logging.Console.Tests/AnsiParserTests.cs b/src/libraries/Microsoft.Extensions.Logging.Console/tests/Microsoft.Extensions.Logging.Console.Tests/AnsiParserTests.cs
index 74d04894eba4..215535c8efb9 100644
--- a/src/libraries/Microsoft.Extensions.Logging.Console/tests/Microsoft.Extensions.Logging.Console.Tests/AnsiParserTests.cs
+++ b/src/libraries/Microsoft.Extensions.Logging.Console/tests/Microsoft.Extensions.Logging.Console.Tests/AnsiParserTests.cs
@@ -11,12 +11,12 @@ namespace Microsoft.Extensions.Logging.Console.Test
 {
     public class AnsiParserTests
     {
-        private const char EscapeChar = '\x1B';
+        private const char EscapeChar = '\e';
 
         [Theory]
         [InlineData(1, "No Color", "No Color")]
-        [InlineData(2, "\x1B[41mColored\x1B[49mNo Color", "No Color")]
-        [InlineData(2, "\x1B[41m\x1B[1m\x1B[31mmColored\x1B[39m\x1B[49mNo Color", "No Color")]
+        [InlineData(2, "\e[41mColored\e[49mNo Color", "No Color")]
+        [InlineData(2, "\e[41m\e[1m\e[31mmColored\e[39m\e[49mNo Color", "No Color")]
         public void Parse_CheckTimesWrittenToConsole(int numSegments, string message, string lastSegment)
         {
             // Arrange
@@ -151,33 +151,33 @@ public void Parse_RepeatedColorChange_PicksLastSet()
 
         [Theory]
         // supported
-        [InlineData("\x1B[77mInfo", "Info")]
-        [InlineData("\x1B[77m\x1B[1m\x1B[2m\x1B[0mInfo\x1B[1m", "Info")]
-        [InlineData("\x1B[7mInfo", "Info")]
-        [InlineData("\x1B[40m\x1B[1m\x1B[33mwarn\x1B[39m\x1B[22m\x1B[49m:", "warn", ":")]
+        [InlineData("\e[77mInfo", "Info")]
+        [InlineData("\e[77m\e[1m\e[2m\e[0mInfo\e[1m", "Info")]
+        [InlineData("\e[7mInfo", "Info")]
+        [InlineData("\e[40m\e[1m\e[33mwarn\e[39m\e[22m\e[49m:", "warn", ":")]
         // unsupported: skips
-        [InlineData("Info\x1B[77m:", "Info", ":")]
-        [InlineData("Info\x1B[7m:", "Info", ":")]
+        [InlineData("Info\e[77m:", "Info", ":")]
+        [InlineData("Info\e[7m:", "Info", ":")]
         // treats as content
-        [InlineData("\x1B", "\x1B")]
-        [InlineData("\x1B ", "\x1B ")]
-        [InlineData("\x1Bm", "\x1Bm")]
-        [InlineData("\x1B m", "\x1B m")]
-        [InlineData("\x1Bxym", "\x1Bxym")]
-        [InlineData("\x1B[", "\x1B[")]
-        [InlineData("\x1B[m", "\x1B[m")]
-        [InlineData("\x1B[ ", "\x1B[ ")]
-        [InlineData("\x1B[ m", "\x1B[ m")]
-        [InlineData("\x1B[xym", "\x1B[xym")]
-        [InlineData("\x1B[7777m", "\x1B[7777m")]
-        [InlineData("\x1B\x1B\x1B", "\x1B\x1B\x1B")]
-        [InlineData("Message\x1B\x1B\x1B", "Message\x1B\x1B\x1B")]
-        [InlineData("\x1B\x1BMessage\x1B", "\x1B\x1BMessage\x1B")]
-        [InlineData("\x1B\x1B\x1BMessage", "\x1B\x1B\x1BMessage")]
-        [InlineData("Message\x1B ", "Message\x1B ")]
-        [InlineData("\x1BmMessage", "\x1BmMessage")]
-        [InlineData("\x1B[77m\x1B m\x1B[40m", "\x1B m")]
-        [InlineData("\x1B mMessage\x1Bxym", "\x1B mMessage\x1Bxym")]
+        [InlineData("\e", "\e")]
+        [InlineData("\e ", "\e ")]
+        [InlineData("\em", "\em")]
+        [InlineData("\e m", "\e m")]
+        [InlineData("\exym", "\exym")]
+        [InlineData("\e[", "\e[")]
+        [InlineData("\e[m", "\e[m")]
+        [InlineData("\e[ ", "\e[ ")]
+        [InlineData("\e[ m", "\e[ m")]
+        [InlineData("\e[xym", "\e[xym")]
+        [InlineData("\e[7777m", "\e[7777m")]
+        [InlineData("\e\e\e", "\e\e\e")]
+        [InlineData("Message\e\e\e", "Message\e\e\e")]
+        [InlineData("\e\eMessage\e", "\e\eMessage\e")]
+        [InlineData("\e\e\eMessage", "\e\e\eMessage")]
+        [InlineData("Message\e ", "Message\e ")]
+        [InlineData("\emMessage", "\emMessage")]
+        [InlineData("\e[77m\e m\e[40m", "\e m")]
+        [InlineData("\e mMessage\exym", "\e mMessage\exym")]
         public void Parse_ValidSupportedOrUnsupportedCodesInMessage_MessageParsedSuccessfully(string messageWithUnsupportedCode, params string[] output)
         {
             // Arrange
diff --git a/src/libraries/Microsoft.Extensions.Logging.Console/tests/Microsoft.Extensions.Logging.Console.Tests/TextWriterExtensionsTests.cs b/src/libraries/Microsoft.Extensions.Logging.Console/tests/Microsoft.Extensions.Logging.Console.Tests/TextWriterExtensionsTests.cs
index 3c6520ebb9bf..79c1d6b14ea9 100644
--- a/src/libraries/Microsoft.Extensions.Logging.Console/tests/Microsoft.Extensions.Logging.Console.Tests/TextWriterExtensionsTests.cs
+++ b/src/libraries/Microsoft.Extensions.Logging.Console/tests/Microsoft.Extensions.Logging.Console.Tests/TextWriterExtensionsTests.cs
@@ -16,7 +16,7 @@ public void WriteColoredMessage_WithForegroundEscapeCode_AndNoBackgroundColorSpe
             var message = "Request received";
             var expectedMessage = AnsiParser.GetForegroundColorEscapeCode(ConsoleColor.DarkGreen)
                 + message
-                + "\x1B[39m\x1B[22m"; //resets foreground color
+                + "\e[39m\e[22m"; //resets foreground color
             var textWriter = new StringWriter();
 
             // Act
@@ -33,7 +33,7 @@ public void WriteColoredMessage_WithBackgroundEscapeCode_AndNoForegroundColorSpe
             var message = "Request received";
             var expectedMessage = AnsiParser.GetBackgroundColorEscapeCode(ConsoleColor.Red)
                 + message
-                + "\x1B[49m"; //resets background color
+                + "\e[49m"; //resets background color
             var textWriter = new StringWriter();
 
             // Act
@@ -51,8 +51,8 @@ public void WriteColoredMessage_InOrder_WhenBothForegroundOrBackgroundColorsSpec
             var expectedMessage = AnsiParser.GetBackgroundColorEscapeCode(ConsoleColor.Red)
                 + AnsiParser.GetForegroundColorEscapeCode(ConsoleColor.DarkGreen)
                 + "Request received"
-                + "\x1B[39m\x1B[22m" //resets foreground color
-                + "\x1B[49m"; //resets background color
+                + "\e[39m\e[22m" //resets foreground color
+                + "\e[49m"; //resets background color
             var textWriter = new StringWriter();
 
             // Act
diff --git a/src/libraries/Microsoft.NETCore.Platforms/src/Microsoft.NETCore.Platforms.csproj b/src/libraries/Microsoft.NETCore.Platforms/src/Microsoft.NETCore.Platforms.csproj
index 958cf0e65df4..24aa038645ae 100644
--- a/src/libraries/Microsoft.NETCore.Platforms/src/Microsoft.NETCore.Platforms.csproj
+++ b/src/libraries/Microsoft.NETCore.Platforms/src/Microsoft.NETCore.Platforms.csproj
@@ -23,7 +23,7 @@
   <ItemGroup>
     <AdditionalRuntimeIdentifiers Include="$(AdditionalRuntimeIdentifiers)" Imports="$(AdditionalRuntimeIdentifierParent)" />
     <!-- When building from source, ensure the RID we're building for is part of the RID graph. -->
-    <AdditionalRuntimeIdentifiers Include="$(OutputRID)" Imports="$(AdditionalRuntimeIdentifierParent)" Condition="'$(DotNetBuildFromSource)' == 'true'" />
+    <AdditionalRuntimeIdentifiers Include="$(OutputRID)" Imports="$(AdditionalRuntimeIdentifierParent)" Condition="'$(DotNetBuildSourceOnly)' == 'true'" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/src/libraries/Microsoft.VisualBasic.Core/src/Microsoft/VisualBasic/FileIO/FileSystem.vb b/src/libraries/Microsoft.VisualBasic.Core/src/Microsoft/VisualBasic/FileIO/FileSystem.vb
index 45fe6e29b9a1..27ac1fec7e59 100644
--- a/src/libraries/Microsoft.VisualBasic.Core/src/Microsoft/VisualBasic/FileIO/FileSystem.vb
+++ b/src/libraries/Microsoft.VisualBasic.Core/src/Microsoft/VisualBasic/FileIO/FileSystem.vb
@@ -892,7 +892,7 @@ Namespace Microsoft.VisualBasic.FileIO
         Private Shared Sub CopyOrMoveDirectory(ByVal operation As CopyOrMove,
                                                ByVal sourceDirectoryName As String, ByVal destinationDirectoryName As String,
                                                ByVal overwrite As Boolean, ByVal showUI As UIOptionInternal, ByVal onUserCancel As UICancelOption)
-            Debug.Assert(System.Enum.IsDefined(GetType(CopyOrMove), operation), "Invalid Operation")
+            Debug.Assert([Enum].IsDefined(operation), "Invalid Operation")
 
             ' Verify enums.
             VerifyUICancelOption("onUserCancel", onUserCancel)
@@ -961,7 +961,7 @@ Namespace Microsoft.VisualBasic.FileIO
         Private Shared Sub FxCopyOrMoveDirectory(ByVal operation As CopyOrMove,
                                                  ByVal sourceDirectoryPath As String, ByVal targetDirectoryPath As String, ByVal overwrite As Boolean)
 
-            Debug.Assert(System.Enum.IsDefined(GetType(CopyOrMove), operation), "Invalid Operation")
+            Debug.Assert([Enum].IsDefined(operation), "Invalid Operation")
             Debug.Assert(sourceDirectoryPath <> "" And IO.Path.IsPathRooted(sourceDirectoryPath), "Invalid Source")
             Debug.Assert(targetDirectoryPath <> "" And IO.Path.IsPathRooted(targetDirectoryPath), "Invalid Target")
 
@@ -1010,7 +1010,7 @@ Namespace Microsoft.VisualBasic.FileIO
         Private Shared Sub CopyOrMoveDirectoryNode(ByVal Operation As CopyOrMove,
                                                    ByVal SourceDirectoryNode As DirectoryNode, ByVal Overwrite As Boolean, ByVal Exceptions As ListDictionary)
 
-            Debug.Assert(System.Enum.IsDefined(GetType(CopyOrMove), Operation), "Invalid Operation")
+            Debug.Assert([Enum].IsDefined(Operation), "Invalid Operation")
             Debug.Assert(Exceptions IsNot Nothing, "Null exception list")
             Debug.Assert(SourceDirectoryNode IsNot Nothing, "Null source node")
 
@@ -1092,7 +1092,7 @@ Namespace Microsoft.VisualBasic.FileIO
                                           ByVal sourceFileName As String, ByVal destinationFileName As String,
                                           ByVal overwrite As Boolean, ByVal showUI As UIOptionInternal, ByVal onUserCancel As UICancelOption
                                           )
-            Debug.Assert(System.Enum.IsDefined(GetType(CopyOrMove), operation), "Invalid Operation")
+            Debug.Assert([Enum].IsDefined(operation), "Invalid Operation")
 
             ' Verify enums.
             VerifyUICancelOption("onUserCancel", onUserCancel)
@@ -1597,8 +1597,8 @@ Namespace Microsoft.VisualBasic.FileIO
         ''' </remarks>
         Private Shared Sub ShellCopyOrMove(ByVal Operation As CopyOrMove, ByVal TargetType As FileOrDirectory,
             ByVal FullSourcePath As String, ByVal FullTargetPath As String, ByVal ShowUI As UIOptionInternal, ByVal OnUserCancel As UICancelOption)
-            Debug.Assert(System.Enum.IsDefined(GetType(CopyOrMove), Operation))
-            Debug.Assert(System.Enum.IsDefined(GetType(FileOrDirectory), TargetType))
+            Debug.Assert([Enum].IsDefined(Operation))
+            Debug.Assert([Enum].IsDefined(TargetType))
             Debug.Assert(FullSourcePath <> "" And IO.Path.IsPathRooted(FullSourcePath), "Invalid FullSourcePath")
             Debug.Assert(FullTargetPath <> "" And IO.Path.IsPathRooted(FullTargetPath), "Invalid FullTargetPath")
             Debug.Assert(ShowUI <> UIOptionInternal.NoUI, "Why call ShellDelete if ShowUI is NoUI???")
@@ -1693,7 +1693,7 @@ Namespace Microsoft.VisualBasic.FileIO
         Private Shared Sub ShellFileOperation(ByVal OperationType As SHFileOperationType, ByVal OperationFlags As ShFileOperationFlags,
             ByVal FullSource As String, ByVal FullTarget As String, ByVal OnUserCancel As UICancelOption, ByVal FileOrDirectory As FileOrDirectory)
 
-            Debug.Assert(System.Enum.IsDefined(GetType(SHFileOperationType), OperationType))
+            Debug.Assert([Enum].IsDefined(OperationType))
             Debug.Assert(OperationType <> SHFileOperationType.FO_RENAME, "Don't call Shell to rename")
             Debug.Assert(FullSource <> "" And IO.Path.IsPathRooted(FullSource), "Invalid FullSource path")
             Debug.Assert(OperationType = SHFileOperationType.FO_DELETE OrElse (FullTarget <> "" And IO.Path.IsPathRooted(FullTarget)), "Invalid FullTarget path")
@@ -1750,7 +1750,7 @@ Namespace Microsoft.VisualBasic.FileIO
         Private Shared Function GetShellOperationInfo(
                             ByVal OperationType As SHFileOperationType, ByVal OperationFlags As ShFileOperationFlags,
                             ByVal SourcePaths() As String, Optional ByVal TargetPath As String = Nothing) As SHFILEOPSTRUCT
-            Debug.Assert(System.Enum.IsDefined(GetType(SHFileOperationType), OperationType), "Invalid OperationType")
+            Debug.Assert([Enum].IsDefined(OperationType), "Invalid OperationType")
             Debug.Assert(TargetPath = "" Or IO.Path.IsPathRooted(TargetPath), "Invalid TargetPath")
             Debug.Assert(SourcePaths IsNot Nothing AndAlso SourcePaths.Length > 0, "Invalid SourcePaths")
 
diff --git a/src/libraries/System.CodeDom/src/Microsoft/CSharp/CSharpCodeGenerator.cs b/src/libraries/System.CodeDom/src/Microsoft/CSharp/CSharpCodeGenerator.cs
index 095b76eb7deb..49b8bd42d651 100644
--- a/src/libraries/System.CodeDom/src/Microsoft/CSharp/CSharpCodeGenerator.cs
+++ b/src/libraries/System.CodeDom/src/Microsoft/CSharp/CSharpCodeGenerator.cs
@@ -96,6 +96,7 @@ private string QuoteSnippetStringCStyle(string value)
 
             b.Append('\"');
 
+            bool isStringMultiline = false;
             int i = 0;
             while (i < value.Length)
             {
@@ -144,16 +145,26 @@ private string QuoteSnippetStringCStyle(string value)
                         b.Append(value[++i]);
                     }
 
-                    b.Append("\" +");
-                    b.Append(Environment.NewLine);
-                    b.Append(indentObj.IndentationString);
-                    b.Append('\"');
+                    if (i != value.Length - 1)
+                    {
+                        b.Append("\" +");
+                        b.Append(Environment.NewLine);
+                        b.Append(indentObj.IndentationString);
+                        b.Append('\"');
+                        isStringMultiline = true;
+                    }
                 }
                 ++i;
             }
 
             b.Append('\"');
 
+            if (isStringMultiline)
+            {
+                b.Insert(0, '(');
+                b.Append(')');
+            }
+
             return b.ToString();
         }
 
diff --git a/src/libraries/System.CodeDom/tests/System/CodeDom/Compiler/CSharpCodeGeneratorTests.cs b/src/libraries/System.CodeDom/tests/System/CodeDom/Compiler/CSharpCodeGeneratorTests.cs
index f20162b7c34a..58e7f3fc3ed2 100644
--- a/src/libraries/System.CodeDom/tests/System/CodeDom/Compiler/CSharpCodeGeneratorTests.cs
+++ b/src/libraries/System.CodeDom/tests/System/CodeDom/Compiler/CSharpCodeGeneratorTests.cs
@@ -539,11 +539,11 @@ public static IEnumerable<object[]> GenerateCodeFromExpression_TestData()
             yield return new object[] { new CodePrimitiveExpression("\uDC00"), null, "\"\uDC00\"" };
             yield return new object[] { new CodePrimitiveExpression("\uD800"), null, "\"\uD800\"" };
             yield return new object[] { new CodePrimitiveExpression("01234567890123456789012345678901234567890123456789012345678901234567890123456789"), null, $"\"01234567890123456789012345678901234567890123456789012345678901234567890123456789\"" };
-            yield return new object[] { new CodePrimitiveExpression("01234567890123456789012345678901234567890123456789012345678901234567890123456789\uD800"), null, $"\"01234567890123456789012345678901234567890123456789012345678901234567890123456789\uD800\" +{nl}    \"\"" };
-            yield return new object[] { new CodePrimitiveExpression("01234567890123456789012345678901234567890123456789012345678901234567890123456789\uD800\uDC00"), null, $"\"01234567890123456789012345678901234567890123456789012345678901234567890123456789\uD800\uDC00\" +{nl}    \"\"" };
-            yield return new object[] { new CodePrimitiveExpression("01234567890123456789012345678901234567890123456789012345678901234567890123456789\uD800a"), null, $"\"01234567890123456789012345678901234567890123456789012345678901234567890123456789\uD800\" +{nl}    \"a\"" };
-            yield return new object[] { new CodePrimitiveExpression("012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"), null, $"\"012345678901234567890123456789012345678901234567890123456789012345678901234567890\" +{nl}    \"123456789\"" };
-            yield return new object[] { new CodePrimitiveExpression("012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"), customOptions, $"\"012345678901234567890123456789012345678901234567890123456789012345678901234567890\" +{nl}$\"123456789\"" };
+            yield return new object[] { new CodePrimitiveExpression("01234567890123456789012345678901234567890123456789012345678901234567890123456789\uD800"), null, $"\"01234567890123456789012345678901234567890123456789012345678901234567890123456789\uD800\"" };
+            yield return new object[] { new CodePrimitiveExpression("01234567890123456789012345678901234567890123456789012345678901234567890123456789\uD800\uDC00"), null, $"\"01234567890123456789012345678901234567890123456789012345678901234567890123456789\uD800\uDC00\"" };
+            yield return new object[] { new CodePrimitiveExpression("01234567890123456789012345678901234567890123456789012345678901234567890123456789\uD800a"), null, $"(\"01234567890123456789012345678901234567890123456789012345678901234567890123456789\uD800\" +{nl}    \"a\")" };
+            yield return new object[] { new CodePrimitiveExpression("012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"), null, $"(\"012345678901234567890123456789012345678901234567890123456789012345678901234567890\" +{nl}    \"123456789\")" };
+            yield return new object[] { new CodePrimitiveExpression("012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"), customOptions, $"(\"012345678901234567890123456789012345678901234567890123456789012345678901234567890\" +{nl}$\"123456789\")" };
             yield return new object[] { new CodePrimitiveExpression(new string('a', 256)), null, $"@\"{new string('a', 256)}\"" };
             yield return new object[] { new CodePrimitiveExpression("\"" + new string('a', 254) + "\""), null, $"@\"\"\"{new string('a', 254)}\"\"\"" };
             yield return new object[] { new CodePrimitiveExpression("\"" + new string('a', 1498) + "\""), null, $"@\"\"\"{new string('a', 1498)}\"\"\"" };
@@ -918,7 +918,7 @@ public static IEnumerable<object[]> GenerateCodeFromStatement_TestData()
                 new CodeConditionStatement(
                     new CodePrimitiveExpression(1),
                     new CodeExpressionStatement(new CodePrimitiveExpression(new string('a', 82)))
-                ), null, $"if (1) {{{nl}    \"{new string('a', 81)}\" +{nl}        \"a\";{nl}}}{nl}"
+                ), null, $"if (1) {{{nl}    (\"{new string('a', 81)}\" +{nl}        \"a\");{nl}}}{nl}"
             };
             yield return new object[]
             {
@@ -928,7 +928,7 @@ public static IEnumerable<object[]> GenerateCodeFromStatement_TestData()
                         new CodePrimitiveExpression(2),
                         new CodeExpressionStatement(new CodePrimitiveExpression(new string('a', 82)))
                     )
-                ), null, $"if (1) {{{nl}    if (2) {{{nl}        \"{new string('a', 81)}\" +{nl}            \"a\";{nl}    }}{nl}}}{nl}"
+                ), null, $"if (1) {{{nl}    if (2) {{{nl}        (\"{new string('a', 81)}\" +{nl}            \"a\");{nl}    }}{nl}}}{nl}"
             };
             yield return new object[]
             {
@@ -941,7 +941,7 @@ public static IEnumerable<object[]> GenerateCodeFromStatement_TestData()
                             new CodeExpressionStatement(new CodePrimitiveExpression(new string('a', 82)))
                         )
                     )
-                ), null, $"if (1) {{{nl}    if (2) {{{nl}        if (3) {{{nl}            \"{new string('a', 81)}\" +{nl}                \"a\";{nl}        }}{nl}    }}{nl}}}{nl}"
+                ), null, $"if (1) {{{nl}    if (2) {{{nl}        if (3) {{{nl}            (\"{new string('a', 81)}\" +{nl}                \"a\");{nl}        }}{nl}    }}{nl}}}{nl}"
             };
             yield return new object[]
             {
@@ -957,7 +957,7 @@ public static IEnumerable<object[]> GenerateCodeFromStatement_TestData()
                             )
                         )
                     )
-                ), null, $"if (1) {{{nl}    if (2) {{{nl}        if (3) {{{nl}            if (4) {{{nl}                \"{new string('a', 81)}\" +{nl}                    \"a\";{nl}            }}{nl}        }}{nl}    }}{nl}}}{nl}"
+                ), null, $"if (1) {{{nl}    if (2) {{{nl}        if (3) {{{nl}            if (4) {{{nl}                (\"{new string('a', 81)}\" +{nl}                    \"a\");{nl}            }}{nl}        }}{nl}    }}{nl}}}{nl}"
             };
 
             yield return new object[]
@@ -1308,6 +1308,7 @@ public static IEnumerable<object[]> GenerateCodeFromStatement_TestData()
 
         [Theory]
         [MemberData(nameof(GenerateCodeFromStatement_TestData))]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, ".NET Framework has different string breakup handling")]
         public void GenerateCodeFromStatement_Invoke_Success(CodeStatement e, CodeGeneratorOptions o, string expected)
         {
             ICodeGenerator generator = GetGenerator();
@@ -2708,6 +2709,35 @@ public void ValidateIdentifier_InvokeInvalid_ThrowsArgumentException(string valu
             AssertExtensions.Throws<ArgumentException>("value", null, () => generator.ValidateIdentifier(value));
         }
 
+        [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, ".NET Framework has different string breakup handling")]
+        public void LineBreaksShouldPreserveTheWholeStringAsOneValue()
+        {
+            CodeStatement e = new CodeAssignStatement(
+                new CodeFieldReferenceExpression
+                {
+                    FieldName = "Value",
+                    TargetObject = new CodeTypeReferenceExpression("PF")
+                },
+                new CodeMethodInvokeExpression
+                {
+                    Parameters =
+                    {
+                        new CodePrimitiveExpression('|')
+                    },
+                    Method = new CodeMethodReferenceExpression
+                    {
+                        MethodName = "MethodName",
+                        TargetObject = new CodePrimitiveExpression(new string('*', 82))
+                    }
+                }
+            );
+            ICodeGenerator generator = GetGenerator();
+            var writer = new StringWriter();
+            generator.GenerateCodeFromStatement(e, writer, new CodeGeneratorOptions());
+            AssertEqualLong("PF.Value = (\"*********************************************************************************\" +" + writer.NewLine + "    \"*\").MethodName('|');" + writer.NewLine, writer.ToString());
+        }
+
         private static ICodeGenerator GetGenerator()
         {
 #pragma warning disable 0618
diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenDictionary.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenDictionary.cs
index dc06ca0cd928..46e3ae62e8b5 100644
--- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenDictionary.cs
+++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenDictionary.cs
@@ -461,7 +461,7 @@ public ref readonly TValue this[TKey key]
 
                 if (Unsafe.IsNullRef(ref Unsafe.AsRef(in valueRef)))
                 {
-                    ThrowHelper.ThrowKeyNotFoundException();
+                    ThrowHelper.ThrowKeyNotFoundException(key);
                 }
 
                 return ref valueRef;
diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/KeyAnalyzer.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/KeyAnalyzer.cs
index ddea9bfbe769..9f6094edb977 100644
--- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/KeyAnalyzer.cs
+++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/KeyAnalyzer.cs
@@ -37,7 +37,7 @@ public static AnalysisResults Analyze(
             AnalysisResults results;
             if (minLength == 0 || !TryUseSubstring(uniqueStrings, ignoreCase, minLength, maxLength, out results))
             {
-                results = CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, 0, 0, isSubstring: false, static (s, _, _) => s.AsSpan());
+                results = CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, 0, 0, static (s, _, _) => s.AsSpan());
             }
 
             return results;
@@ -77,7 +77,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
                     if (HasSufficientUniquenessFactor(set, uniqueStrings, acceptableNonUniqueCount))
                     {
                         results = CreateAnalysisResults(
-                            uniqueStrings, ignoreCase, minLength, maxLength, index, count, isSubstring: true,
+                            uniqueStrings, ignoreCase, minLength, maxLength, index, count,
                             static (string s, int index, int count) => s.AsSpan(index, count));
                         return true;
                     }
@@ -101,7 +101,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
                         if (HasSufficientUniquenessFactor(set, uniqueStrings, acceptableNonUniqueCount))
                         {
                             results = CreateAnalysisResults(
-                                uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count, isSubstring: true,
+                                uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count,
                                 static (string s, int index, int count) => s.AsSpan(s.Length + index, count));
                             return true;
                         }
@@ -115,7 +115,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
         }
 
         private static AnalysisResults CreateAnalysisResults(
-            ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count, bool isSubstring, GetSpan getSubstringSpan)
+            ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count, GetSpan getHashString)
         {
             // Start off by assuming all strings are ASCII
             bool allAsciiIfIgnoreCase = true;
@@ -125,30 +125,42 @@ private static AnalysisResults CreateAnalysisResults(
             // substrings are ASCII, so we check each.
             if (ignoreCase)
             {
-                // Further, if the ASCII keys (in their entirety) don't contain any letters, then we can
-                // actually perform the comparison as case-sensitive even if case-insensitive
-                // was requested, as there's nothing that would compare equally to the substring
-                // other than the substring itself.
-                bool canSwitchIgnoreCaseHashToCaseSensitive = !isSubstring;
+                // Further, if the ASCII keys (in their entirety) don't contain any letters,
+                // then we can actually perform the comparison as case-sensitive even if
+                // case-insensitive was requested, as there's nothing that would compare
+                // equally to the key other than the key itself.
+                bool canSwitchIgnoreCaseHashToCaseSensitive = true;
 
-                foreach (string s in uniqueStrings)
+                foreach (string uniqueString in uniqueStrings)
                 {
-                    // Get the span for the substring.
-                    ReadOnlySpan<char> substring = getSubstringSpan(s, index, count);
+                    // Get a span representing the slice of the uniqueString which will be hashed.
+                    ReadOnlySpan<char> hashString = getHashString(uniqueString, index, count);
 
-                    // If the substring isn't ASCII, bail out to return the results.
-                    if (!IsAllAscii(substring))
+                    // If the slice isn't ASCII, bail out to return the results.
+                    if (!IsAllAscii(hashString))
                     {
                         allAsciiIfIgnoreCase = false;
                         canSwitchIgnoreCaseHashToCaseSensitive = false;
                         break;
                     }
 
-                    // All substrings so far are still ASCII only.  If this one contains any ASCII
-                    // letters, mark that we can't switch to case-sensitive.
-                    if (canSwitchIgnoreCaseHashToCaseSensitive && ContainsAnyLetters(substring))
+                    // The hash string is ASCII only.  We disable the switch to
+                    // case sensitive if by examining the entire uniqueString we
+                    // find that it is not ASCII, or that it contains ASCII letters.
+                    if (canSwitchIgnoreCaseHashToCaseSensitive)
                     {
-                        canSwitchIgnoreCaseHashToCaseSensitive = false;
+                        // If count is 0 then uniqueString equals hashString,
+                        // and as we have just checked that IsAllAscii(hashString) is true
+                        // then we know IsAllAscii(uniqueString) must be true,
+                        // so we can skip the check.
+                        if (count > 0 && !IsAllAscii(uniqueString.AsSpan()))
+                        {
+                            canSwitchIgnoreCaseHashToCaseSensitive = false;
+                        }
+                        else if (ContainsAnyAsciiLetters(uniqueString.AsSpan()))
+                        {
+                            canSwitchIgnoreCaseHashToCaseSensitive = false;
+                        }
                     }
                 }
 
@@ -207,7 +219,7 @@ internal static unsafe bool IsAllAscii(ReadOnlySpan<char> s)
 #if NET8_0_OR_GREATER
         private static readonly SearchValues<char> s_asciiLetters = SearchValues.Create("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
 #endif
-        internal static bool ContainsAnyLetters(ReadOnlySpan<char> s)
+        internal static bool ContainsAnyAsciiLetters(ReadOnlySpan<char> s)
         {
             Debug.Assert(IsAllAscii(s));
 
diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableDictionary_2.Builder.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableDictionary_2.Builder.cs
index 6515395b4588..fac2b6163c13 100644
--- a/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableDictionary_2.Builder.cs
+++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableDictionary_2.Builder.cs
@@ -409,12 +409,12 @@ public TValue this[TKey key]
                 get
                 {
                     TValue value;
-                    if (this.TryGetValue(key, out value!))
+                    if (!this.TryGetValue(key, out value!))
                     {
-                        return value;
+                        ThrowHelper.ThrowKeyNotFoundException(key);
                     }
 
-                    throw new KeyNotFoundException(SR.Format(SR.Arg_KeyNotFoundWithKey, key.ToString()));
+                    return value;
                 }
 
                 set
diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableDictionary_2.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableDictionary_2.cs
index 3cc1c12d38fb..ee164957d36a 100644
--- a/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableDictionary_2.cs
+++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableDictionary_2.cs
@@ -246,12 +246,12 @@ public TValue this[TKey key]
                 Requires.NotNullAllowStructs(key, nameof(key));
 
                 TValue value;
-                if (this.TryGetValue(key, out value!))
+                if (!this.TryGetValue(key, out value!))
                 {
-                    return value;
+                    ThrowHelper.ThrowKeyNotFoundException(key);
                 }
 
-                throw new KeyNotFoundException(SR.Format(SR.Arg_KeyNotFoundWithKey, key.ToString()));
+                return value;
             }
         }
 
diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableSortedDictionary_2.Builder.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableSortedDictionary_2.Builder.cs
index 9a975cc55e27..ce8e4a0f1799 100644
--- a/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableSortedDictionary_2.Builder.cs
+++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableSortedDictionary_2.Builder.cs
@@ -178,12 +178,12 @@ public TValue this[TKey key]
                 get
                 {
                     TValue value;
-                    if (this.TryGetValue(key, out value!))
+                    if (!this.TryGetValue(key, out value!))
                     {
-                        return value;
+                        ThrowHelper.ThrowKeyNotFoundException(key);
                     }
 
-                    throw new KeyNotFoundException(SR.Format(SR.Arg_KeyNotFoundWithKey, key.ToString()));
+                    return value;
                 }
 
                 set
diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableSortedDictionary_2.Node.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableSortedDictionary_2.Node.cs
index de3e6cf95217..76330f19c18c 100644
--- a/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableSortedDictionary_2.Node.cs
+++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableSortedDictionary_2.Node.cs
@@ -338,7 +338,7 @@ internal ref readonly TValue ValueRef(TKey key, IComparer<TKey> keyComparer)
                 ImmutableSortedDictionary<TKey, TValue>.Node match = this.Search(key, keyComparer);
                 if (match.IsEmpty)
                 {
-                    throw new KeyNotFoundException(SR.Format(SR.Arg_KeyNotFoundWithKey, key.ToString()));
+                    ThrowHelper.ThrowKeyNotFoundException(key);
                 }
 
                 return ref match._value;
diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableSortedDictionary_2.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableSortedDictionary_2.cs
index b4d7d381e10e..d44687253fa5 100644
--- a/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableSortedDictionary_2.cs
+++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableSortedDictionary_2.cs
@@ -197,12 +197,12 @@ public TValue this[TKey key]
                 Requires.NotNullAllowStructs(key, nameof(key));
 
                 TValue? value;
-                if (this.TryGetValue(key, out value))
+                if (!this.TryGetValue(key, out value))
                 {
-                    return value;
+                    ThrowHelper.ThrowKeyNotFoundException(key);
                 }
 
-                throw new KeyNotFoundException(SR.Format(SR.Arg_KeyNotFoundWithKey, key.ToString()));
+                return value;
             }
         }
 
diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/ThrowHelper.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/ThrowHelper.cs
index 99800e23819f..4085f318e341 100644
--- a/src/libraries/System.Collections.Immutable/src/System/Collections/ThrowHelper.cs
+++ b/src/libraries/System.Collections.Immutable/src/System/Collections/ThrowHelper.cs
@@ -26,8 +26,8 @@ public static void ThrowArgumentNullException(string? paramName) =>
             throw new ArgumentNullException(paramName);
 
         [DoesNotReturn]
-        public static void ThrowKeyNotFoundException() =>
-            throw new KeyNotFoundException();
+        public static void ThrowKeyNotFoundException<TKey>(TKey key) =>
+            throw new KeyNotFoundException(SR.Format(SR.Arg_KeyNotFoundWithKey, key));
 
         [DoesNotReturn]
         public static void ThrowInvalidOperationException() =>
diff --git a/src/libraries/System.Collections.Immutable/tests/Frozen/KeyAnalyzerTests.cs b/src/libraries/System.Collections.Immutable/tests/Frozen/KeyAnalyzerTests.cs
index 31f6007b7244..00a16bf3cef5 100644
--- a/src/libraries/System.Collections.Immutable/tests/Frozen/KeyAnalyzerTests.cs
+++ b/src/libraries/System.Collections.Immutable/tests/Frozen/KeyAnalyzerTests.cs
@@ -107,6 +107,13 @@ public static void LeftHandCaseInsensitive()
             Assert.Equal(0, r.HashIndex);
             Assert.Equal(1, r.HashCount);
 
+            r = RunAnalysis(new[] { "0001", "0002", "0003", "0004", "0005", "0006" }, true);
+            Assert.False(r.RightJustifiedSubstring);
+            Assert.False(r.IgnoreCase);
+            Assert.True(r.AllAsciiIfIgnoreCase);
+            Assert.Equal(3, r.HashIndex);
+            Assert.Equal(1, r.HashCount);
+
         }
 
         [Fact]
@@ -226,9 +233,9 @@ public static void IsAllAscii()
         [Fact]
         public static void ContainsAnyLetters()
         {
-            Assert.True(KeyAnalyzer.ContainsAnyLetters("abc".AsSpan()));
-            Assert.True(KeyAnalyzer.ContainsAnyLetters("ABC".AsSpan()));
-            Assert.False(KeyAnalyzer.ContainsAnyLetters("123".AsSpan()));
+            Assert.True(KeyAnalyzer.ContainsAnyAsciiLetters("abc".AsSpan()));
+            Assert.True(KeyAnalyzer.ContainsAnyAsciiLetters("ABC".AsSpan()));
+            Assert.False(KeyAnalyzer.ContainsAnyAsciiLetters("123".AsSpan()));
             // note, must only pass ASCII to ContainsAnyLetters, anything else is a
             // Debug.Assert and would not have been called in the actual implementation
         }
diff --git a/src/libraries/System.Collections.Immutable/tests/System.Collections.Immutable.Tests.csproj b/src/libraries/System.Collections.Immutable/tests/System.Collections.Immutable.Tests.csproj
index 4286b79bbd41..839c10352bb4 100644
--- a/src/libraries/System.Collections.Immutable/tests/System.Collections.Immutable.Tests.csproj
+++ b/src/libraries/System.Collections.Immutable/tests/System.Collections.Immutable.Tests.csproj
@@ -6,7 +6,7 @@
   </PropertyGroup>
 
   <PropertyGroup Condition="'$(TargetOS)' == 'browser'">
-    <WasmXHarnessMonoArgs>--setenv=XHARNESS_LOG_TEST_START=true</WasmXHarnessMonoArgs>
+    <XunitShowProgress>true</XunitShowProgress>
     <!-- This WASM test is problematic and slow right now. This sets the xharness timeout but there is also override in sendtohelix-browser.targets -->
     <WasmXHarnessTestsTimeout>01:15:00</WasmXHarnessTestsTimeout>
   </PropertyGroup>
diff --git a/src/libraries/System.Collections/src/System/Collections/BitArray.cs b/src/libraries/System.Collections/src/System/Collections/BitArray.cs
index 0ac3331b062f..0f54b8a714c7 100644
--- a/src/libraries/System.Collections/src/System/Collections/BitArray.cs
+++ b/src/libraries/System.Collections/src/System/Collections/BitArray.cs
@@ -756,21 +756,19 @@ public unsafe void CopyTo(Array array, int index)
 
             if (array is int[] intArray)
             {
-                Div32Rem(m_length, out int extraBits);
-
-                if (extraBits == 0)
+                if (array.Length - index < GetInt32ArrayLengthFromBitLength(m_length))
                 {
-                    // we have perfect bit alignment, no need to sanitize, just copy
-                    Array.Copy(m_array, 0, intArray, index, m_array.Length);
+                    throw new ArgumentException(SR.Argument_InvalidOffLen);
                 }
-                else
-                {
-                    int last = (m_length - 1) >> BitShiftPerInt32;
-                    // do not copy the last int, as it is not completely used
-                    Array.Copy(m_array, 0, intArray, index, last);
 
+                int quotient = Div32Rem(m_length, out int extraBits);
+
+                Array.Copy(m_array, 0, intArray, index, quotient);
+
+                if (extraBits > 0)
+                {
                     // the last int needs to be masked
-                    intArray[index + last] = m_array[last] & unchecked((1 << extraBits) - 1);
+                    intArray[index + quotient] = m_array[quotient] & unchecked((1 << extraBits) - 1);
                 }
             }
             else if (array is byte[] byteArray)
diff --git a/src/libraries/System.Collections/src/System/Collections/Generic/PriorityQueue.cs b/src/libraries/System.Collections/src/System/Collections/Generic/PriorityQueue.cs
index edc1327b446c..5047b7643373 100644
--- a/src/libraries/System.Collections/src/System/Collections/Generic/PriorityQueue.cs
+++ b/src/libraries/System.Collections/src/System/Collections/Generic/PriorityQueue.cs
@@ -470,7 +470,7 @@ public void EnqueueRange(IEnumerable<TElement> elements, TPriority priority)
 
             if (_size == 0)
             {
-                // build using Heapify() if the queue is empty.
+                // If the queue is empty just append the elements since they all have the same priority.
 
                 int i = 0;
                 (TElement, TPriority)[] nodes = _nodes;
@@ -487,11 +487,6 @@ public void EnqueueRange(IEnumerable<TElement> elements, TPriority priority)
 
                 _size = i;
                 _version++;
-
-                if (i > 1)
-                {
-                    Heapify();
-                }
             }
             else
             {
diff --git a/src/libraries/System.Collections/tests/BitArray/BitArray_GetSetTests.cs b/src/libraries/System.Collections/tests/BitArray/BitArray_GetSetTests.cs
index 28f3cb96dca9..912164e4efc5 100644
--- a/src/libraries/System.Collections/tests/BitArray/BitArray_GetSetTests.cs
+++ b/src/libraries/System.Collections/tests/BitArray/BitArray_GetSetTests.cs
@@ -396,6 +396,18 @@ public static void CopyToIntArray()
             }
         }
 
+        // https://github.com/dotnet/runtime/issues/98813
+        [Fact]
+        public static void CopyToIntArray_Regression98813()
+        {
+            BitArray bitArray = new BitArray(256);
+            bitArray.Length = 32;
+            int[] expectedOutput = new int[] { 0 };
+            int[] actualOutput = new int[1];
+            bitArray.CopyTo(actualOutput, 0);
+            Assert.Equal(expectedOutput, actualOutput);
+        }
+
         // https://github.com/dotnet/runtime/issues/30440
         [Fact]
         public static void CopyToByteArray_Regression39929()
@@ -452,19 +464,13 @@ public static void CopyTo_Type_Invalid()
         [InlineData(default(int), BitsPerInt32, 1, 1)]
         [InlineData(default(int), BitsPerInt32 * 4, 4 - 1, 0)]
         [InlineData(default(int), BitsPerInt32 * 4, 4, 1)]
-        public static void CopyTo_Size_Invalid<T>(T def, int bits, int arraySize, int index)
+        [InlineData(default(int), BitsPerInt32 + 1, 1, 0)]
+        public static void CopyTo_Size_Invalid<T>(T _, int bits, int arraySize, int index)
         {
             ICollection bitArray = new BitArray(bits);
             T[] array = (T[])Array.CreateInstance(typeof(T), arraySize);
             AssertExtensions.Throws<ArgumentOutOfRangeException>("index", () => bitArray.CopyTo(array, -1));
-            if (def is int)
-            {
-                AssertExtensions.Throws<ArgumentException>("destinationArray", string.Empty, () => bitArray.CopyTo(array, index));
-            }
-            else
-            {
-                AssertExtensions.Throws<ArgumentException>(null, () => bitArray.CopyTo(array, index));
-            }
+            AssertExtensions.Throws<ArgumentException>(null, () => bitArray.CopyTo(array, index));
         }
 
         [Fact]
diff --git a/src/libraries/System.ComponentModel.Annotations/src/System/ComponentModel/DataAnnotations/Schema/DatabaseGeneratedAttribute.cs b/src/libraries/System.ComponentModel.Annotations/src/System/ComponentModel/DataAnnotations/Schema/DatabaseGeneratedAttribute.cs
index 7029771044d0..0125848d5890 100644
--- a/src/libraries/System.ComponentModel.Annotations/src/System/ComponentModel/DataAnnotations/Schema/DatabaseGeneratedAttribute.cs
+++ b/src/libraries/System.ComponentModel.Annotations/src/System/ComponentModel/DataAnnotations/Schema/DatabaseGeneratedAttribute.cs
@@ -15,7 +15,7 @@ public class DatabaseGeneratedAttribute : Attribute
         /// <param name="databaseGeneratedOption">The pattern used to generate values for the property in the database.</param>
         public DatabaseGeneratedAttribute(DatabaseGeneratedOption databaseGeneratedOption)
         {
-            if (!(Enum.IsDefined(typeof(DatabaseGeneratedOption), databaseGeneratedOption)))
+            if (!Enum.IsDefined(databaseGeneratedOption))
             {
                 throw new ArgumentOutOfRangeException(nameof(databaseGeneratedOption));
             }
diff --git a/src/libraries/System.ComponentModel.Primitives/ref/System.ComponentModel.Primitives.cs b/src/libraries/System.ComponentModel.Primitives/ref/System.ComponentModel.Primitives.cs
index cb8654836185..0cf04f9da116 100644
--- a/src/libraries/System.ComponentModel.Primitives/ref/System.ComponentModel.Primitives.cs
+++ b/src/libraries/System.ComponentModel.Primitives/ref/System.ComponentModel.Primitives.cs
@@ -89,14 +89,12 @@ public DescriptionAttribute(string description) { }
     [System.AttributeUsageAttribute(System.AttributeTargets.Class | System.AttributeTargets.Interface, AllowMultiple=true, Inherited=true)]
     public sealed partial class DesignerAttribute : System.Attribute
     {
-        public DesignerAttribute([System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] string designerTypeName) { }
-        public DesignerAttribute([System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] string designerTypeName, [System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] string designerBaseTypeName) { }
-        public DesignerAttribute([System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] string designerTypeName, [System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] System.Type designerBaseType) { }
-        public DesignerAttribute([System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] System.Type designerType) { }
-        public DesignerAttribute([System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] System.Type designerType, [System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] System.Type designerBaseType) { }
-        [System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)]
+        public DesignerAttribute(string designerTypeName) { }
+        public DesignerAttribute(string designerTypeName, string designerBaseTypeName) { }
+        public DesignerAttribute(string designerTypeName, System.Type designerBaseType) { }
+        public DesignerAttribute(System.Type designerType) { }
+        public DesignerAttribute(System.Type designerType, System.Type designerBaseType) { }
         public string DesignerBaseTypeName { get { throw null; } }
-        [System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)]
         public string DesignerTypeName { get { throw null; } }
         public override object TypeId { get { throw null; } }
         public override bool Equals(object? obj) { throw null; }
@@ -164,12 +162,10 @@ public DisplayNameAttribute(string displayName) { }
     public sealed partial class EditorAttribute : System.Attribute
     {
         public EditorAttribute() { }
-        public EditorAttribute([System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicConstructors)] string typeName, [System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicConstructors)] string? baseTypeName) { }
-        public EditorAttribute([System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicConstructors)] string typeName, [System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicConstructors)] System.Type baseType) { }
-        public EditorAttribute([System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicConstructors)] System.Type type, [System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicConstructors)] System.Type baseType) { }
-        [System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicConstructors)]
+        public EditorAttribute(string typeName, string? baseTypeName) { }
+        public EditorAttribute(string typeName, System.Type baseType) { }
+        public EditorAttribute(System.Type type, System.Type baseType) { }
         public string? EditorBaseTypeName { get { throw null; } }
-        [System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembers(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicConstructors)]
         public string EditorTypeName { get { throw null; } }
         public override object TypeId { get { throw null; } }
         public override bool Equals(object? obj) { throw null; }
diff --git a/src/libraries/System.ComponentModel.Primitives/src/System/ComponentModel/DesignerAttribute.cs b/src/libraries/System.ComponentModel.Primitives/src/System/ComponentModel/DesignerAttribute.cs
index 0b4fef2df2dc..9aa8f394d141 100644
--- a/src/libraries/System.ComponentModel.Primitives/src/System/ComponentModel/DesignerAttribute.cs
+++ b/src/libraries/System.ComponentModel.Primitives/src/System/ComponentModel/DesignerAttribute.cs
@@ -17,7 +17,7 @@ public sealed class DesignerAttribute : Attribute
         /// Initializes a new instance of the <see cref='System.ComponentModel.DesignerAttribute'/> class using the name of the type that
         /// provides design-time services.
         /// </summary>
-        public DesignerAttribute([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] string designerTypeName)
+        public DesignerAttribute(string designerTypeName)
         {
             ArgumentNullException.ThrowIfNull(designerTypeName);
 
@@ -29,7 +29,7 @@ public DesignerAttribute([DynamicallyAccessedMembers(DynamicallyAccessedMemberTy
         /// Initializes a new instance of the <see cref='System.ComponentModel.DesignerAttribute'/> class using the type that provides
         /// design-time services.
         /// </summary>
-        public DesignerAttribute([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type designerType)
+        public DesignerAttribute(Type designerType)
         {
             ArgumentNullException.ThrowIfNull(designerType);
 
@@ -41,9 +41,7 @@ public DesignerAttribute([DynamicallyAccessedMembers(DynamicallyAccessedMemberTy
         /// Initializes a new instance of the <see cref='System.ComponentModel.DesignerAttribute'/> class using the designer type and the
         /// base class for the designer.
         /// </summary>
-        public DesignerAttribute(
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] string designerTypeName,
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] string designerBaseTypeName)
+        public DesignerAttribute(string designerTypeName, string designerBaseTypeName)
         {
             ArgumentNullException.ThrowIfNull(designerTypeName);
 
@@ -55,9 +53,7 @@ public DesignerAttribute(
         /// Initializes a new instance of the <see cref='System.ComponentModel.DesignerAttribute'/> class, using the name of the designer
         /// class and the base class for the designer.
         /// </summary>
-        public DesignerAttribute(
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] string designerTypeName,
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type designerBaseType)
+        public DesignerAttribute(string designerTypeName, Type designerBaseType)
         {
             ArgumentNullException.ThrowIfNull(designerTypeName);
             ArgumentNullException.ThrowIfNull(designerBaseType);
@@ -70,9 +66,7 @@ public DesignerAttribute(
         /// Initializes a new instance of the <see cref='System.ComponentModel.DesignerAttribute'/> class using the types of the designer and
         /// designer base class.
         /// </summary>
-        public DesignerAttribute(
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type designerType,
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type designerBaseType)
+        public DesignerAttribute(Type designerType, Type designerBaseType)
         {
             ArgumentNullException.ThrowIfNull(designerType);
             ArgumentNullException.ThrowIfNull(designerBaseType);
@@ -85,13 +79,11 @@ public DesignerAttribute(
         /// Gets the name of the base type of this designer.
         /// </summary>
         // Using PublicParameterlessConstructor to preserve the type. See https://github.com/mono/linker/issues/1878
-        [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)]
         public string DesignerBaseTypeName { get; }
 
         /// <summary>
         /// Gets the name of the designer type associated with this designer attribute.
         /// </summary>
-        [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)]
         public string DesignerTypeName { get; }
 
         /// <summary>
diff --git a/src/libraries/System.ComponentModel.Primitives/src/System/ComponentModel/EditorAttribute.cs b/src/libraries/System.ComponentModel.Primitives/src/System/ComponentModel/EditorAttribute.cs
index 012038a5d495..a62b0f4dae88 100644
--- a/src/libraries/System.ComponentModel.Primitives/src/System/ComponentModel/EditorAttribute.cs
+++ b/src/libraries/System.ComponentModel.Primitives/src/System/ComponentModel/EditorAttribute.cs
@@ -27,9 +27,7 @@ public EditorAttribute()
         /// Initializes a new instance of the <see cref='System.ComponentModel.EditorAttribute'/> class with the type name and base type
         /// name of the editor.
         /// </summary>
-        public EditorAttribute(
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] string typeName,
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] string? baseTypeName)
+        public EditorAttribute(string typeName, string? baseTypeName)
         {
             ArgumentNullException.ThrowIfNull(typeName);
 
@@ -40,9 +38,7 @@ public EditorAttribute(
         /// <summary>
         /// Initializes a new instance of the <see cref='System.ComponentModel.EditorAttribute'/> class.
         /// </summary>
-        public EditorAttribute(
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] string typeName,
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] Type baseType)
+        public EditorAttribute(string typeName, Type baseType)
         {
             ArgumentNullException.ThrowIfNull(typeName);
             ArgumentNullException.ThrowIfNull(baseType);
@@ -54,9 +50,7 @@ public EditorAttribute(
         /// <summary>
         /// Initializes a new instance of the <see cref='System.ComponentModel.EditorAttribute'/> class.
         /// </summary>
-        public EditorAttribute(
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] Type type,
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] Type baseType)
+        public EditorAttribute(Type type, Type baseType)
         {
             ArgumentNullException.ThrowIfNull(type);
             ArgumentNullException.ThrowIfNull(baseType);
@@ -68,13 +62,11 @@ public EditorAttribute(
         /// <summary>
         /// Gets the name of the base class or interface serving as a lookup key for this editor.
         /// </summary>
-        [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)]
         public string? EditorBaseTypeName { get; }
 
         /// <summary>
         /// Gets the name of the editor class.
         /// </summary>
-        [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)]
         public string EditorTypeName { get; }
 
         /// <summary>
diff --git a/src/libraries/System.ComponentModel.TypeConverter/ref/System.ComponentModel.TypeConverter.cs b/src/libraries/System.ComponentModel.TypeConverter/ref/System.ComponentModel.TypeConverter.cs
index fe239f5de058..59c3284a7fab 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/ref/System.ComponentModel.TypeConverter.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/ref/System.ComponentModel.TypeConverter.cs
@@ -294,7 +294,7 @@ protected CustomTypeDescriptor(System.ComponentModel.ICustomTypeDescriptor? pare
         public virtual System.ComponentModel.EventDescriptor? GetDefaultEvent() { throw null; }
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("PropertyDescriptor's PropertyType cannot be statically discovered.")]
         public virtual System.ComponentModel.PropertyDescriptor? GetDefaultProperty() { throw null; }
-        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Editors registered in TypeDescriptor.AddEditorTable may be trimmed.")]
+        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming.")]
         public virtual object? GetEditor(System.Type editorBaseType) { throw null; }
         public virtual System.ComponentModel.EventDescriptorCollection GetEvents() { throw null; }
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("The public parameterless constructor or the 'Default' static field may be trimmed from the Attribute's Type.")]
@@ -428,9 +428,8 @@ public DoubleConverter() { }
     }
     public partial class EnumConverter : System.ComponentModel.TypeConverter
     {
-        public EnumConverter([System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicFields | System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] System.Type type) { }
+        public EnumConverter(System.Type type) { }
         protected virtual System.Collections.IComparer Comparer { get { throw null; } }
-        [System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicFields | System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)]
         protected System.Type EnumType { get { throw null; } }
         protected System.ComponentModel.TypeConverter.StandardValuesCollection? Values { get { throw null; } set { } }
         public override bool CanConvertFrom(System.ComponentModel.ITypeDescriptorContext? context, System.Type sourceType) { throw null; }
@@ -589,7 +588,7 @@ public partial interface ICustomTypeDescriptor
         System.ComponentModel.EventDescriptor? GetDefaultEvent();
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("PropertyDescriptor's PropertyType cannot be statically discovered.")]
         System.ComponentModel.PropertyDescriptor? GetDefaultProperty();
-        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Editors registered in TypeDescriptor.AddEditorTable may be trimmed.")]
+        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming.")]
         object? GetEditor(System.Type editorBaseType);
         System.ComponentModel.EventDescriptorCollection GetEvents();
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("The public parameterless constructor or the 'Default' static field may be trimmed from the Attribute's Type.")]
@@ -1089,7 +1088,7 @@ protected override void FillAttributes(System.Collections.IList attributeList) {
         public System.ComponentModel.PropertyDescriptorCollection GetChildProperties(object instance) { throw null; }
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("PropertyDescriptor's PropertyType cannot be statically discovered. The Type of instance cannot be statically discovered. The public parameterless constructor or the 'Default' static field may be trimmed from the Attribute's Type.")]
         public virtual System.ComponentModel.PropertyDescriptorCollection GetChildProperties(object? instance, System.Attribute[]? filter) { throw null; }
-        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Editors registered in TypeDescriptor.AddEditorTable may be trimmed. PropertyDescriptor's PropertyType cannot be statically discovered.")]
+        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming. PropertyDescriptor's PropertyType cannot be statically discovered.")]
         public virtual object? GetEditor(System.Type editorBaseType) { throw null; }
         public override int GetHashCode() { throw null; }
         protected override object? GetInvocationTarget(System.Type type, object instance) { throw null; }
@@ -1483,12 +1482,12 @@ public static void CreateAssociation(object primary, object secondary) { }
         public static System.ComponentModel.PropertyDescriptor? GetDefaultProperty(object component, bool noCustomTypeDesc) { throw null; }
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("PropertyDescriptor's PropertyType cannot be statically discovered.")]
         public static System.ComponentModel.PropertyDescriptor? GetDefaultProperty([System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.All)] System.Type componentType) { throw null; }
-        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Editors registered in TypeDescriptor.AddEditorTable may be trimmed. The Type of component cannot be statically discovered.")]
+        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming. The Type of component cannot be statically discovered.")]
         public static object? GetEditor(object component, System.Type editorBaseType) { throw null; }
-        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Editors registered in TypeDescriptor.AddEditorTable may be trimmed. The Type of component cannot be statically discovered.")]
+        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming. The Type of component cannot be statically discovered.")]
         [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Advanced)]
         public static object? GetEditor(object component, System.Type editorBaseType, bool noCustomTypeDesc) { throw null; }
-        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Editors registered in TypeDescriptor.AddEditorTable may be trimmed.")]
+        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming.")]
         public static object? GetEditor([System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute(System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes.All)] System.Type type, System.Type editorBaseType) { throw null; }
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("The Type of component cannot be statically discovered.")]
         public static System.ComponentModel.EventDescriptorCollection GetEvents(object component) { throw null; }
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/CompatibilitySuppressions.xml b/src/libraries/System.ComponentModel.TypeConverter/src/CompatibilitySuppressions.xml
new file mode 100644
index 000000000000..d3c053f6e463
--- /dev/null
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/CompatibilitySuppressions.xml
@@ -0,0 +1,10 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<!-- https://learn.microsoft.com/en-us/dotnet/fundamentals/package-validation/diagnostic-ids -->
+<Suppressions xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+  <Suppression>
+    <DiagnosticId>CP0015</DiagnosticId>
+    <Target>M:System.ComponentModel.TypeDescriptor.CreateDesigner(System.ComponentModel.IComponent,System.Type):[T:System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute]</Target>
+    <Left>ref/net9.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>lib/net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+</Suppressions>
\ No newline at end of file
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/Resources/Strings.resx b/src/libraries/System.ComponentModel.TypeConverter/src/Resources/Strings.resx
index f607a8a2fe91..b8a10ffbcd5f 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/src/Resources/Strings.resx
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/Resources/Strings.resx
@@ -76,6 +76,9 @@
   <data name="EnumConverterInvalidValue" xml:space="preserve">
     <value>The value '{0}' is not a valid value for the enum '{1}'.</value>
   </data>
+  <data name="EnumInvalidValue" xml:space="preserve">
+    <value>Type provided must be an Enum.</value>
+  </data>
   <data name="ErrorInvalidEventHandler" xml:space="preserve">
     <value>Invalid event handler for the {0} event.</value>
   </data>
@@ -103,6 +106,9 @@
   <data name="NullableConverterBadCtorArg" xml:space="preserve">
     <value>The specified type is not a nullable type.</value>
   </data>
+  <data name="RuntimeInstanceNotAllowed" xml:space="preserve">
+    <value>Runtime instantiation of this attribute is not allowed.</value>
+  </data>
   <data name="Text" xml:space="preserve">
     <value>(Text)</value>
   </data>
@@ -172,6 +178,9 @@
   <data name="CultureInfoConverterInvalidCulture" xml:space="preserve">
     <value>The {0} culture cannot be converted to a CultureInfo object on this computer.</value>
   </data>
+  <data name="IDesignerHostNotSupported" xml:space="preserve">
+    <value>Designer support has been disabled in the app configuration and is not supported.</value>
+  </data>
   <data name="ErrorInvalidServiceInstance" xml:space="preserve">
     <value>The service instance must derive from or implement {0}.</value>
   </data>
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/AmbientValueAttribute.cs b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/AmbientValueAttribute.cs
index 79b4d31603b9..53a432046683 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/AmbientValueAttribute.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/AmbientValueAttribute.cs
@@ -1,6 +1,8 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.ComponentModel.Design;
+using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 
 namespace System.ComponentModel
@@ -12,6 +14,11 @@ namespace System.ComponentModel
     [AttributeUsage(AttributeTargets.All)]
     public sealed class AmbientValueAttribute : Attribute
     {
+        /// <summary>
+        /// This is the default value.
+        /// </summary>
+        private object? _value;
+
         /// <summary>
         /// Initializes a new instance of the <see cref='System.ComponentModel.AmbientValueAttribute'/> class, converting the
         /// specified value to the specified type, and using the U.S. English culture as the
@@ -22,9 +29,15 @@ public AmbientValueAttribute([DynamicallyAccessedMembers(DynamicallyAccessedMemb
         {
             // The try/catch here is because attributes should never throw exceptions. We would fail to
             // load an otherwise normal class.
+
+            if (!IDesignerHost.IsSupported)
+            {
+                return;
+            }
+
             try
             {
-                Value = TypeDescriptor.GetConverter(type).ConvertFromInvariantString(value);
+                _value = TypeDescriptor.GetConverter(type).ConvertFromInvariantString(value);
             }
             catch
             {
@@ -37,7 +50,7 @@ public AmbientValueAttribute([DynamicallyAccessedMembers(DynamicallyAccessedMemb
         /// </summary>
         public AmbientValueAttribute(char value)
         {
-            Value = value;
+            _value = value;
         }
 
         /// <summary>
@@ -46,7 +59,7 @@ public AmbientValueAttribute(char value)
         /// </summary>
         public AmbientValueAttribute(byte value)
         {
-            Value = value;
+            _value = value;
         }
 
         /// <summary>
@@ -55,7 +68,7 @@ public AmbientValueAttribute(byte value)
         /// </summary>
         public AmbientValueAttribute(short value)
         {
-            Value = value;
+            _value = value;
         }
 
         /// <summary>
@@ -64,7 +77,7 @@ public AmbientValueAttribute(short value)
         /// </summary>
         public AmbientValueAttribute(int value)
         {
-            Value = value;
+            _value = value;
         }
 
         /// <summary>
@@ -73,7 +86,7 @@ public AmbientValueAttribute(int value)
         /// </summary>
         public AmbientValueAttribute(long value)
         {
-            Value = value;
+            _value = value;
         }
 
         /// <summary>
@@ -82,7 +95,7 @@ public AmbientValueAttribute(long value)
         /// </summary>
         public AmbientValueAttribute(float value)
         {
-            Value = value;
+            _value = value;
         }
 
         /// <summary>
@@ -91,7 +104,7 @@ public AmbientValueAttribute(float value)
         /// </summary>
         public AmbientValueAttribute(double value)
         {
-            Value = value;
+            _value = value;
         }
 
         /// <summary>
@@ -100,7 +113,7 @@ public AmbientValueAttribute(double value)
         /// </summary>
         public AmbientValueAttribute(bool value)
         {
-            Value = value;
+            _value = value;
         }
 
         /// <summary>
@@ -108,7 +121,7 @@ public AmbientValueAttribute(bool value)
         /// </summary>
         public AmbientValueAttribute(string? value)
         {
-            Value = value;
+            _value = value;
         }
 
         /// <summary>
@@ -117,13 +130,22 @@ public AmbientValueAttribute(string? value)
         /// </summary>
         public AmbientValueAttribute(object? value)
         {
-            Value = value;
+            _value = value;
         }
 
         /// <summary>
         /// Gets the ambient value of the property this attribute is bound to.
         /// </summary>
-        public object? Value { get; }
+        public object? Value {
+            get
+            {
+                if (!IDesignerHost.IsSupported)
+                {
+                    throw new ArgumentException(SR.RuntimeInstanceNotAllowed);
+                }
+                return _value;
+            }
+        }
 
         public override bool Equals([NotNullWhen(true)] object? obj)
         {
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/BindingList.cs b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/BindingList.cs
index d87deab82043..aa28f466deeb 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/BindingList.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/BindingList.cs
@@ -312,7 +312,7 @@ public virtual void EndNew(int itemIndex)
         {
             // Allow event handler to supply the new item for us
             // If event handler did not supply new item, create one ourselves
-            object? newItem = FireAddingNew() ?? Activator.CreateInstance(typeof(T));
+            object? newItem = FireAddingNew() ?? Activator.CreateInstance<T>();
 
             // Add item to end of list. Note: If event handler returned an item not of type T,
             // the cast below will trigger an InvalidCastException. This is by design.
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/CustomTypeDescriptor.cs b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/CustomTypeDescriptor.cs
index bc5c5ce6ffa6..0516dae7767c 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/CustomTypeDescriptor.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/CustomTypeDescriptor.cs
@@ -98,7 +98,7 @@ public virtual AttributeCollection GetAttributes()
         /// The GetEditor method returns an editor of the given type that is
         /// to be associated with the class this type descriptor is representing.
         /// </summary>
-        [RequiresUnreferencedCode(TypeDescriptor.EditorRequiresUnreferencedCode)]
+        [RequiresUnreferencedCode(TypeDescriptor.DesignTimeAttributeTrimmed)]
         public virtual object? GetEditor(Type editorBaseType) => _parent?.GetEditor(editorBaseType);
 
         /// <summary>
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/Design/IDesignerHost.cs b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/Design/IDesignerHost.cs
index b74eeaf316cd..8ab34614a7eb 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/Design/IDesignerHost.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/Design/IDesignerHost.cs
@@ -1,6 +1,8 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Diagnostics.CodeAnalysis;
+
 namespace System.ComponentModel.Design
 {
     /// <summary>
@@ -9,6 +11,9 @@ namespace System.ComponentModel.Design
     /// </summary>
     public interface IDesignerHost : IServiceContainer
     {
+        [FeatureSwitchDefinition("System.ComponentModel.Design.IDesignerHost.IsSupported")]
+        internal static bool IsSupported => AppContext.TryGetSwitch("System.ComponentModel.Design.IDesignerHost.IsSupported", out bool isSupported) ? isSupported : true;
+
         /// <summary>
         /// Gets or sets a value indicating whether the designer host
         /// is currently loading the document.
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/EnumConverter.cs b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/EnumConverter.cs
index 5dcc263e0d0c..c9d6d5a13a38 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/EnumConverter.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/EnumConverter.cs
@@ -4,6 +4,7 @@
 using System.Collections;
 using System.Collections.Generic;
 using System.ComponentModel.Design.Serialization;
+using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Globalization;
 using System.Reflection;
@@ -20,12 +21,16 @@ public class EnumConverter : TypeConverter
         /// Initializes a new instance of the <see cref='System.ComponentModel.EnumConverter'/> class for the given
         /// type.
         /// </summary>
-        public EnumConverter([DynamicallyAccessedMembers(TypeDescriptor.ReflectTypesDynamicallyAccessedMembers)] Type type)
+        public EnumConverter(Type type)
         {
+            if (!type.IsEnum && !type.Equals(typeof(Enum)))
+            {
+                throw new ArgumentException(SR.EnumInvalidValue);
+            }
+
             EnumType = type;
         }
 
-        [DynamicallyAccessedMembers(TypeDescriptor.ReflectTypesDynamicallyAccessedMembers)]
         protected Type EnumType { get; }
 
         protected StandardValuesCollection? Values { get; set; }
@@ -156,7 +161,10 @@ private static long GetEnumValue(bool isUnderlyingTypeUInt64, object enumVal, Cu
                 }
                 else
                 {
-                    FieldInfo? info = EnumType.GetField(enumName);
+                    [UnconditionalSuppressMessage("Trimming", "IL2075:", Justification = "Trimmer does not trim Enums")]
+                    FieldInfo? GetEnumField(string name) => EnumType.GetField(name);
+
+                    FieldInfo? info = GetEnumField(enumName);
                     if (info != null)
                     {
                         return new InstanceDescriptor(info, null);
@@ -227,9 +235,27 @@ public override StandardValuesCollection GetStandardValues(ITypeDescriptorContex
                 // We need to get the enum values in this rather round-about way so we can filter
                 // out fields marked Browsable(false). Note that if multiple fields have the same value,
                 // the behavior is undefined, since what we return are just enum values, not names.
-                Type reflectType = TypeDescriptor.GetReflectionType(EnumType) ?? EnumType;
+                // Given that EnumType is constrained to be an enum, we suppress calls for reflection with Enum.
+
+                [UnconditionalSuppressMessage("Trimming", "IL2067:", Justification = "Trimmer does not trim Enums")]
+                [return: DynamicallyAccessedMembers(TypeDescriptor.ReflectTypesDynamicallyAccessedMembers)]
+                static Type GetTypeDescriptorReflectionType(Type enumType) => TypeDescriptor.GetReflectionType(enumType);
+
+                Type _reflectType = GetTypeDescriptorReflectionType(EnumType);
+                FieldInfo[]? fields;
+
+                if (_reflectType == null)
+                {
+                    [UnconditionalSuppressMessage("Trimming", "IL2070:", Justification = "Trimmer does not trim Enums")]
+                    static FieldInfo[]? GetPublicStaticEnumFields(Type type) => type.GetFields(BindingFlags.Public | BindingFlags.Static);
+
+                    fields = GetPublicStaticEnumFields(EnumType);
+                }
+                else
+                {
+                    fields = _reflectType.GetFields(BindingFlags.Public | BindingFlags.Static);
+                }
 
-                FieldInfo[]? fields = reflectType.GetFields(BindingFlags.Public | BindingFlags.Static);
                 ArrayList? objValues = null;
 
                 if (fields != null && fields.Length > 0)
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ICustomTypeDescriptor.cs b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ICustomTypeDescriptor.cs
index 65c7db6623a2..aaec2986c1fa 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ICustomTypeDescriptor.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ICustomTypeDescriptor.cs
@@ -47,7 +47,7 @@ public interface ICustomTypeDescriptor
         /// <summary>
         /// Gets an editor of the specified type for this object.
         /// </summary>
-        [RequiresUnreferencedCode(TypeDescriptor.EditorRequiresUnreferencedCode)]
+        [RequiresUnreferencedCode(TypeDescriptor.DesignTimeAttributeTrimmed)]
         object? GetEditor(Type editorBaseType);
 
         /// <summary>
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/PropertyDescriptor.cs b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/PropertyDescriptor.cs
index 51fad8dd06d9..f21ddb67010e 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/PropertyDescriptor.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/PropertyDescriptor.cs
@@ -233,7 +233,7 @@ public virtual PropertyDescriptorCollection GetChildProperties(object? instance,
         /// <summary>
         /// Gets an editor of the specified type.
         /// </summary>
-        [RequiresUnreferencedCode(TypeDescriptor.EditorRequiresUnreferencedCode + " " + PropertyDescriptorPropertyTypeMessage)]
+        [RequiresUnreferencedCode(TypeDescriptor.DesignTimeAttributeTrimmed + " " + PropertyDescriptorPropertyTypeMessage)]
         public virtual object? GetEditor(Type editorBaseType)
         {
             object? editor = null;
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ReflectTypeDescriptionProvider.ReflectedTypeData.cs b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ReflectTypeDescriptionProvider.ReflectedTypeData.cs
index 11449f7489ff..6cd9c843a6f4 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ReflectTypeDescriptionProvider.ReflectedTypeData.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ReflectTypeDescriptionProvider.ReflectedTypeData.cs
@@ -292,7 +292,7 @@ internal TypeConverter GetConverter(object? instance)
             /// <summary>
             /// Retrieves the editor for the given base type.
             /// </summary>
-            [RequiresUnreferencedCode(TypeDescriptor.EditorRequiresUnreferencedCode + " The Type of instance cannot be statically discovered.")]
+            [RequiresUnreferencedCode(TypeDescriptor.DesignTimeAttributeTrimmed + " The Type of instance cannot be statically discovered.")]
             internal object? GetEditor(object? instance, Type editorBaseType)
             {
                 EditorAttribute? typeAttr;
@@ -391,6 +391,7 @@ internal TypeConverter GetConverter(object? instance)
             /// <summary>
             /// Helper method to return an editor attribute of the correct base type.
             /// </summary>
+            [RequiresUnreferencedCode("The type referenced by the Editor attribute may be trimmed away.")]
             private static EditorAttribute? GetEditorAttribute(AttributeCollection attributes, Type editorBaseType)
             {
                 foreach (Attribute attr in attributes)
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ReflectTypeDescriptionProvider.cs b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ReflectTypeDescriptionProvider.cs
index 1c9535b83440..5fc0d064f810 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ReflectTypeDescriptionProvider.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ReflectTypeDescriptionProvider.cs
@@ -193,7 +193,7 @@ private static Dictionary<object, IntrinsicTypeConverterData> IntrinsicTypeConve
                     //
                     [typeof(Array)] = new IntrinsicTypeConverterData((type) => new ArrayConverter()),
                     [typeof(ICollection)] = new IntrinsicTypeConverterData((type) => new CollectionConverter()),
-                    [typeof(Enum)] = new IntrinsicTypeConverterData((type) => CreateEnumConverter(type), cacheConverterInstance: false),
+                    [typeof(Enum)] = new IntrinsicTypeConverterData((type) => new EnumConverter(type), cacheConverterInstance: false),
                     [s_intrinsicNullableKey] = new IntrinsicTypeConverterData((type) => CreateNullableConverter(type), cacheConverterInstance: false),
                     [s_intrinsicReferenceKey] = new IntrinsicTypeConverterData((type) => new ReferenceConverter(type), cacheConverterInstance: false),
                 });
@@ -204,14 +204,6 @@ private static Dictionary<object, IntrinsicTypeConverterData> IntrinsicTypeConve
             Justification = "IntrinsicTypeConverters is marked with RequiresUnreferencedCode. It is the only place that should call this.")]
         private static NullableConverter CreateNullableConverter(Type type) => new NullableConverter(type);
 
-        [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2067:UnrecognizedReflectionPattern",
-            Justification = "Trimmer does not trim enums")]
-        private static EnumConverter CreateEnumConverter(Type type)
-        {
-            Debug.Assert(type.IsEnum || type == typeof(Enum));
-            return new EnumConverter(type);
-        }
-
         private static Hashtable PropertyCache => LazyInitializer.EnsureInitialized(ref s_propertyCache, () => new Hashtable());
 
         private static Hashtable EventCache => LazyInitializer.EnsureInitialized(ref s_eventCache, () => new Hashtable());
@@ -398,7 +390,7 @@ internal TypeConverter GetConverter([DynamicallyAccessedMembers(DynamicallyAcces
         /// <summary>
         /// Retrieves the editor for the given base type.
         /// </summary>
-        [RequiresUnreferencedCode(TypeDescriptor.EditorRequiresUnreferencedCode + " The Type of instance cannot be statically discovered.")]
+        [RequiresUnreferencedCode(TypeDescriptor.DesignTimeAttributeTrimmed + " The Type of instance cannot be statically discovered.")]
         internal object? GetEditor([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)] Type type, object? instance, Type editorBaseType)
         {
             ReflectedTypeData td = GetTypeData(type, true)!;
@@ -518,7 +510,7 @@ internal TypeConverter GetExtendedConverter(object instance)
         /// <summary>
         /// Retrieves the editor for the given base type.
         /// </summary>
-        [RequiresUnreferencedCode(TypeDescriptor.EditorRequiresUnreferencedCode + " The Type of instance cannot be statically discovered.")]
+        [RequiresUnreferencedCode(TypeDescriptor.DesignTimeAttributeTrimmed  + " The Type of instance cannot be statically discovered.")]
         internal object? GetExtendedEditor(object instance, Type editorBaseType)
         {
             return GetEditor(instance.GetType(), instance, editorBaseType);
@@ -1330,7 +1322,7 @@ internal void Refresh(Type type)
         /// for types as needed. These instances are stored back into the table
         /// for the base type, and for the original component type, for fast access.
         /// </summary>
-        [RequiresUnreferencedCode(TypeDescriptor.EditorRequiresUnreferencedCode)]
+        [RequiresUnreferencedCode(TypeDescriptor.DesignTimeAttributeTrimmed)]
         private static object? GetIntrinsicTypeEditor(Hashtable table, Type callingType)
         {
             object? hashEntry = null;
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ToolboxItemAttribute.cs b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ToolboxItemAttribute.cs
index b6a2244400db..4244338b10f1 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ToolboxItemAttribute.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/ToolboxItemAttribute.cs
@@ -1,6 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.ComponentModel.Design;
 using System.Diagnostics.CodeAnalysis;
 
 namespace System.ComponentModel
@@ -16,10 +17,12 @@ public class ToolboxItemAttribute : Attribute
         [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)]
         private readonly string? _toolboxItemTypeName;
 
+        private const string DefaultToolboxItemTypeName = "System.Drawing.Design.ToolboxItem, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a";
+
         /// <summary>
         /// Initializes a new instance of ToolboxItemAttribute and sets the type to
         /// </summary>
-        public static readonly ToolboxItemAttribute Default = new ToolboxItemAttribute("System.Drawing.Design.ToolboxItem, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a");
+        public static readonly ToolboxItemAttribute Default = new ToolboxItemAttribute(DefaultToolboxItemTypeName);
 
         /// <summary>
         /// Initializes a new instance of ToolboxItemAttribute and sets the type to
@@ -30,7 +33,7 @@ public class ToolboxItemAttribute : Attribute
         /// <summary>
         /// Gets whether the attribute is the default attribute.
         /// </summary>
-        public override bool IsDefaultAttribute() => Equals(Default);
+        public override bool IsDefaultAttribute() => _toolboxItemTypeName == DefaultToolboxItemTypeName;
 
         /// <summary>
         /// Initializes a new instance of ToolboxItemAttribute and specifies if default values should be used.
@@ -39,7 +42,12 @@ public ToolboxItemAttribute(bool defaultType)
         {
             if (defaultType)
             {
-                _toolboxItemTypeName = "System.Drawing.Design.ToolboxItem, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a";
+                if (!IDesignerHost.IsSupported)
+                {
+                    throw new NotSupportedException(SR.IDesignerHostNotSupported);
+                }
+
+                _toolboxItemTypeName = DefaultToolboxItemTypeName;
             }
         }
 
diff --git a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/TypeDescriptor.cs b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/TypeDescriptor.cs
index 948e3f4b386f..0e5bba3b7486 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/TypeDescriptor.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/src/System/ComponentModel/TypeDescriptor.cs
@@ -20,7 +20,7 @@ namespace System.ComponentModel
     public sealed class TypeDescriptor
     {
         internal const DynamicallyAccessedMemberTypes ReflectTypesDynamicallyAccessedMembers = DynamicallyAccessedMemberTypes.PublicParameterlessConstructor | DynamicallyAccessedMemberTypes.PublicFields;
-        internal const string EditorRequiresUnreferencedCode = "Editors registered in TypeDescriptor.AddEditorTable may be trimmed.";
+        internal const string DesignTimeAttributeTrimmed = "Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming.";
 
         // Note: this is initialized at class load because we
         // lock on it for thread safety. It is used from nearly
@@ -933,7 +933,7 @@ internal static ICustomTypeDescriptor GetExtendedDescriptor(object component)
         /// Gets an editor with the specified base type for the
         /// specified component.
         /// </summary>
-        [RequiresUnreferencedCode(EditorRequiresUnreferencedCode + " The Type of component cannot be statically discovered.")]
+        [RequiresUnreferencedCode(DesignTimeAttributeTrimmed + " The Type of component cannot be statically discovered.")]
         public static object? GetEditor(object component, Type editorBaseType)
         {
             return GetEditor(component, editorBaseType, false);
@@ -944,7 +944,7 @@ internal static ICustomTypeDescriptor GetExtendedDescriptor(object component)
         /// specified component.
         /// </summary>
         [EditorBrowsable(EditorBrowsableState.Advanced)]
-        [RequiresUnreferencedCode(EditorRequiresUnreferencedCode + " The Type of component cannot be statically discovered.")]
+        [RequiresUnreferencedCode(DesignTimeAttributeTrimmed + " The Type of component cannot be statically discovered.")]
         public static object? GetEditor(object component, Type editorBaseType, bool noCustomTypeDesc)
         {
             ArgumentNullException.ThrowIfNull(editorBaseType);
@@ -955,7 +955,7 @@ internal static ICustomTypeDescriptor GetExtendedDescriptor(object component)
         /// <summary>
         /// Gets an editor with the specified base type for the specified type.
         /// </summary>
-        [RequiresUnreferencedCode(EditorRequiresUnreferencedCode)]
+        [RequiresUnreferencedCode(DesignTimeAttributeTrimmed)]
         public static object? GetEditor(
             [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)] Type type,
             Type editorBaseType)
@@ -2342,7 +2342,7 @@ public static Type ComObjectType
             get => typeof(TypeDescriptorComObject);
         }
 
-        [RequiresUnreferencedCode("The Type of component cannot be statically discovered.")]
+        [RequiresUnreferencedCode(DesignTimeAttributeTrimmed)]
         public static IDesigner? CreateDesigner(IComponent component, Type designerBaseType)
         {
             Type? type = null;
@@ -2635,7 +2635,7 @@ PropertyDescriptor ICustomTypeDescriptor.GetDefaultProperty()
                     return _handler.GetDefaultProperty(_instance);
                 }
 
-                [RequiresUnreferencedCode(EditorRequiresUnreferencedCode)]
+                [RequiresUnreferencedCode(DesignTimeAttributeTrimmed)]
                 object ICustomTypeDescriptor.GetEditor(Type editorBaseType)
                 {
                     return _handler.GetEditor(_instance, editorBaseType);
@@ -2944,7 +2944,7 @@ TypeConverter ICustomTypeDescriptor.GetConverter()
             /// <summary>
             /// ICustomTypeDescriptor implementation.
             /// </summary>
-            [RequiresUnreferencedCode(EditorRequiresUnreferencedCode)]
+            [RequiresUnreferencedCode(DesignTimeAttributeTrimmed)]
             object? ICustomTypeDescriptor.GetEditor(Type editorBaseType)
             {
                 ArgumentNullException.ThrowIfNull(editorBaseType);
@@ -3307,7 +3307,7 @@ TypeConverter ICustomTypeDescriptor.GetConverter()
                 /// <summary>
                 /// ICustomTypeDescriptor implementation.
                 /// </summary>
-                [RequiresUnreferencedCode(EditorRequiresUnreferencedCode)]
+                [RequiresUnreferencedCode(DesignTimeAttributeTrimmed)]
                 object? ICustomTypeDescriptor.GetEditor(Type editorBaseType)
                 {
                     ArgumentNullException.ThrowIfNull(editorBaseType);
@@ -3632,7 +3632,7 @@ public TypeConverter GetConverter()
             /// <summary>
             /// ICustomTypeDescriptor implementation.
             /// </summary>
-            [RequiresUnreferencedCode(EditorRequiresUnreferencedCode)]
+            [RequiresUnreferencedCode(DesignTimeAttributeTrimmed)]
             public object? GetEditor(Type editorBaseType)
             {
                 ArgumentNullException.ThrowIfNull(editorBaseType);
diff --git a/src/libraries/System.ComponentModel.TypeConverter/tests/TypeDescriptorTests.cs b/src/libraries/System.ComponentModel.TypeConverter/tests/TypeDescriptorTests.cs
index b5468a6583f4..193dcba83359 100644
--- a/src/libraries/System.ComponentModel.TypeConverter/tests/TypeDescriptorTests.cs
+++ b/src/libraries/System.ComponentModel.TypeConverter/tests/TypeDescriptorTests.cs
@@ -1395,7 +1395,7 @@ public static IEnumerable<object[]> GetConverter_ByMultithread_ReturnsExpected_T
 
         [Theory]
         [MemberData(nameof(GetConverter_ByMultithread_ReturnsExpected_TestData))]
-        public async void GetConverter_ByMultithread_ReturnsExpected(Type typeForGetConverter, Type expectedConverterType)
+        public async Task GetConverter_ByMultithread_ReturnsExpected(Type typeForGetConverter, Type expectedConverterType)
         {
             TypeConverter[] actualConverters = await Task.WhenAll(
                 Enumerable.Range(0, 100).Select(_ =>
@@ -1415,7 +1415,7 @@ public static IEnumerable<object[]> GetConverterWithAddProvider_ByMultithread_Su
 
         [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsReflectionEmitSupported))]  // Mock will try to JIT
         [MemberData(nameof(GetConverterWithAddProvider_ByMultithread_Success_TestData))]
-        public async void GetConverterWithAddProvider_ByMultithread_Success(Type typeForGetConverter, Type expectedConverterType)
+        public async Task GetConverterWithAddProvider_ByMultithread_Success(Type typeForGetConverter, Type expectedConverterType)
         {
             TypeConverter[] actualConverters = await Task.WhenAll(
                 Enumerable.Range(0, 200).Select(_ =>
diff --git a/src/libraries/System.Configuration.ConfigurationManager/src/System/Diagnostics/TraceConfiguration.cs b/src/libraries/System.Configuration.ConfigurationManager/src/System/Diagnostics/TraceConfiguration.cs
index d02b5a006b3d..de819c301df5 100644
--- a/src/libraries/System.Configuration.ConfigurationManager/src/System/Diagnostics/TraceConfiguration.cs
+++ b/src/libraries/System.Configuration.ConfigurationManager/src/System/Diagnostics/TraceConfiguration.cs
@@ -55,7 +55,7 @@ private static void InitializingTraceSource(object sender, InitializingTraceSour
 
                             if (!string.IsNullOrEmpty(sourceElement.SwitchValue))
                             {
-                                traceSource.Switch.Level = (SourceLevels)Enum.Parse(typeof(SourceLevels), sourceElement.SwitchValue);
+                                traceSource.Switch.Level = Enum.Parse<SourceLevels>(sourceElement.SwitchValue);
                             }
                         }
                     }
@@ -74,7 +74,7 @@ private static void InitializingTraceSource(object sender, InitializingTraceSour
                         // The SwitchValue changed; just update our internalSwitch.
                         if (!string.IsNullOrEmpty(sourceElement.SwitchValue))
                         {
-                            traceSource.Switch.Level = (SourceLevels)Enum.Parse(typeof(SourceLevels), sourceElement.SwitchValue);
+                            traceSource.Switch.Level = Enum.Parse<SourceLevels>(sourceElement.SwitchValue);
                         }
                         else
                         {
diff --git a/src/libraries/System.Console/src/System/ConsolePal.Unix.cs b/src/libraries/System.Console/src/System/ConsolePal.Unix.cs
index 23d4b9ba595f..77a679150b29 100644
--- a/src/libraries/System.Console/src/System/ConsolePal.Unix.cs
+++ b/src/libraries/System.Console/src/System/ConsolePal.Unix.cs
@@ -355,19 +355,22 @@ private static void GetWindowSize(out int width, out int height)
                 // Invalidate before reading cached values.
                 CheckTerminalSettingsInvalidated();
 
-                Interop.Sys.WinSize winsize;
-                if (s_windowWidth == -1 &&
-                    s_terminalHandle != null &&
-                    Interop.Sys.GetWindowSize(s_terminalHandle, out winsize) == 0)
+                if (s_windowWidth == -1)
                 {
-                    s_windowWidth = winsize.Col;
-                    s_windowHeight = winsize.Row;
-                }
-                else
-                {
-                    s_windowWidth = TerminalFormatStringsInstance.Columns;
-                    s_windowHeight = TerminalFormatStringsInstance.Lines;
+                    Interop.Sys.WinSize winsize;
+                    if (s_terminalHandle != null &&
+                        Interop.Sys.GetWindowSize(s_terminalHandle, out winsize) == 0)
+                    {
+                        s_windowWidth = winsize.Col;
+                        s_windowHeight = winsize.Row;
+                    }
+                    else
+                    {
+                        s_windowWidth = TerminalFormatStringsInstance.Columns;
+                        s_windowHeight = TerminalFormatStringsInstance.Lines;
+                    }
                 }
+
                 width = s_windowWidth;
                 height = s_windowHeight;
             }
diff --git a/src/libraries/System.Console/src/System/ConsolePal.Windows.cs b/src/libraries/System.Console/src/System/ConsolePal.Windows.cs
index 820e2fc39910..ee25d844de86 100644
--- a/src/libraries/System.Console/src/System/ConsolePal.Windows.cs
+++ b/src/libraries/System.Console/src/System/ConsolePal.Windows.cs
@@ -108,7 +108,9 @@ public static void SetConsoleInputEncoding(Encoding enc)
             if (enc.CodePage != UnicodeCodePage)
             {
                 if (!Interop.Kernel32.SetConsoleCP(enc.CodePage))
-                    throw Win32Marshal.GetExceptionForWin32Error(Marshal.GetLastPInvokeError());
+                {
+                    HandleSetConsoleEncodingError(Marshal.GetLastPInvokeError());
+                }
             }
         }
 
@@ -122,10 +124,24 @@ public static void SetConsoleOutputEncoding(Encoding enc)
             if (enc.CodePage != UnicodeCodePage)
             {
                 if (!Interop.Kernel32.SetConsoleOutputCP(enc.CodePage))
-                    throw Win32Marshal.GetExceptionForWin32Error(Marshal.GetLastPInvokeError());
+                {
+                    HandleSetConsoleEncodingError(Marshal.GetLastPInvokeError());
+                }
             }
         }
 
+        private static void HandleSetConsoleEncodingError(int lastError)
+        {
+            if (lastError == Interop.Errors.ERROR_INVALID_HANDLE
+                || lastError == Interop.Errors.ERROR_INVALID_ACCESS)
+            {
+                // no console, or not a valid handle, so fail silently
+                return;
+            }
+
+            throw Win32Marshal.GetExceptionForWin32Error(lastError);
+        }
+
         /// <summary>Gets whether Console.In is targeting a terminal display.</summary>
         public static bool IsInputRedirectedCore()
         {
diff --git a/src/libraries/System.Console/src/System/IO/KeyParser.cs b/src/libraries/System.Console/src/System/IO/KeyParser.cs
index fd09e7fe227e..23326d485d49 100644
--- a/src/libraries/System.Console/src/System/IO/KeyParser.cs
+++ b/src/libraries/System.Console/src/System/IO/KeyParser.cs
@@ -8,7 +8,7 @@ namespace System.IO;
 
 internal static class KeyParser
 {
-    private const char Escape = '\u001B';
+    private const char Escape = '\e';
     private const char Delete = '\u007F';
     private const char VtSequenceEndTag = '~';
     private const char ModifierSeparator = ';';
diff --git a/src/libraries/System.Console/src/System/TerminalFormatStrings.cs b/src/libraries/System.Console/src/System/TerminalFormatStrings.cs
index e4e0392d82b1..a1f194184a09 100644
--- a/src/libraries/System.Console/src/System/TerminalFormatStrings.cs
+++ b/src/libraries/System.Console/src/System/TerminalFormatStrings.cs
@@ -46,7 +46,7 @@ internal sealed class TerminalFormatStrings
     /// doesn't contain it (as appears to be the case with e.g. screen and tmux on Ubuntu), at the risk
     /// of outputting the sequence on some terminal that's not compatible.
     /// </remarks>
-    public const string CursorPositionReport = "\x1B[6n";
+    public const string CursorPositionReport = "\e[6n";
     /// <summary>
     /// The dictionary of keystring to ConsoleKeyInfo.
     /// Only some members of the ConsoleKeyInfo are used; in particular, the actual char is ignored.
@@ -210,13 +210,13 @@ private static string GetTitle(TermInfo.Database db)
             case "linux":
             case "rxvt":
             case "xterm":
-                return "\x1B]0;%p1%s\x07";
+                return "\e]0;%p1%s\x07";
             case "cygwin":
-                return "\x1B];%p1%s\x07";
+                return "\e];%p1%s\x07";
             case "konsole":
-                return "\x1B]30;%p1%s\x07";
+                return "\e]30;%p1%s\x07";
             case "screen":
-                return "\x1Bk%p1%s\x1B\\";
+                return "\ek%p1%s\e\\";
             default:
                 return string.Empty;
         }
diff --git a/src/libraries/System.Console/tests/ConsoleEncoding.Windows.cs b/src/libraries/System.Console/tests/ConsoleEncoding.Windows.cs
index 7e19d385d06b..b32c73bce35f 100644
--- a/src/libraries/System.Console/tests/ConsoleEncoding.Windows.cs
+++ b/src/libraries/System.Console/tests/ConsoleEncoding.Windows.cs
@@ -2,8 +2,8 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
-using System.Text;
 using System.Runtime.InteropServices;
+using System.Text;
 using Microsoft.DotNet.RemoteExecutor;
 using Xunit;
 
@@ -38,6 +38,28 @@ public void InputEncoding_SetUnicodeEncoding_SilentlyIgnoredInternally()
         }).Dispose();
     }
 
+    [ConditionalFact(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))]
+    [PlatformSpecific(TestPlatforms.Windows)]
+    public void InputEncoding_SetEncodingWhenDetached_ErrorIsSilentlyIgnored()
+    {
+        RemoteExecutor.Invoke(() =>
+        {
+            Encoding encoding = Console.InputEncoding.CodePage != Encoding.ASCII.CodePage
+                ? Encoding.ASCII
+                : Encoding.Latin1;
+
+            // use FreeConsole to detach the current console - simulating a process started with the "DETACHED_PROCESS" flag
+            FreeConsole();
+
+            // Setting the input encoding should not throw an exception
+            Console.InputEncoding = encoding;
+            // The internal state of Console should have updated, despite the failure to change the console's input encoding
+            Assert.Equal(encoding, Console.InputEncoding);
+            // Operations on the console are no longer valid - GetConsoleCP fails.
+            Assert.Equal(0u, GetConsoleCP());
+        }).Dispose();
+    }
+
     [ConditionalFact(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))]
     [PlatformSpecific(TestPlatforms.Windows)]
     public void OutputEncoding_SetDefaultEncoding_Success()
@@ -67,9 +89,34 @@ public void OutputEncoding_SetUnicodeEncoding_SilentlyIgnoredInternally()
         }).Dispose();
     }
 
+    [ConditionalFact(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))]
+    [PlatformSpecific(TestPlatforms.Windows)]
+    public void OutputEncoding_SetEncodingWhenDetached_ErrorIsSilentlyIgnored()
+    {
+        RemoteExecutor.Invoke(() =>
+        {
+            Encoding encoding = Console.OutputEncoding.CodePage != Encoding.ASCII.CodePage
+                ? Encoding.ASCII
+                : Encoding.Latin1;
+
+            // use FreeConsole to detach the current console - simulating a process started with the "DETACHED_PROCESS" flag
+            FreeConsole();
+
+            // Setting the output encoding should not throw an exception
+            Console.OutputEncoding = encoding;
+            // The internal state of Console should have updated, despite the failure to change the console's output encoding
+            Assert.Equal(encoding, Console.OutputEncoding);
+            // Operations on the console are no longer valid - GetConsoleOutputCP fails.
+            Assert.Equal(0u, GetConsoleOutputCP());
+        }).Dispose();
+    }
+
     [LibraryImport("kernel32.dll")]
     public static partial uint GetConsoleCP();
 
     [LibraryImport("kernel32.dll")]
     public static partial uint GetConsoleOutputCP();
+
+    [LibraryImport("kernel32.dll")]
+    public static partial int FreeConsole();
 }
diff --git a/src/libraries/System.Console/tests/KeyParserTests.cs b/src/libraries/System.Console/tests/KeyParserTests.cs
index 557e889d2fb5..a893988995d5 100644
--- a/src/libraries/System.Console/tests/KeyParserTests.cs
+++ b/src/libraries/System.Console/tests/KeyParserTests.cs
@@ -42,7 +42,7 @@ public class KeyParserTests
             yield return ('.', ConsoleKey.OemPeriod);
             yield return (',', ConsoleKey.OemComma);
 
-            yield return ('\u001B', ConsoleKey.Escape);
+            yield return ('\e', ConsoleKey.Escape);
 
             for (char i = '0'; i <= '9'; i++)
             {
@@ -212,7 +212,7 @@ public void KeysAreProperlyMapped(TerminalData terminalData, byte[] recordedByte
             yield return (GetString(33), ConsoleKey.F19);
             yield return (GetString(34), ConsoleKey.F20);
 
-            static string GetString(int i) => $"\u001B[{i}~";
+            static string GetString(int i) => $"\e[{i}~";
         }
     }
 
@@ -223,7 +223,7 @@ public static IEnumerable<object[]> VTSequencesArguments
     [MemberData(nameof(VTSequencesArguments))]
     public void VTSequencesAreProperlyMapped(TerminalData terminalData, string input, ConsoleKey expectedKey)
     {
-        if (terminalData is RxvtUnicode && input == "\u001B[4~" && expectedKey == ConsoleKey.End)
+        if (terminalData is RxvtUnicode && input == "\e[4~" && expectedKey == ConsoleKey.End)
         {
             expectedKey = ConsoleKey.Select; // rxvt binds this key to Select in Terminfo and uses "^[[8~" for End key
         }
@@ -239,10 +239,10 @@ public void VTSequencesAreProperlyMapped(TerminalData terminalData, string input
     {
         get
         {
-            yield return ("\u001BOa", ConsoleKey.UpArrow);
-            yield return ("\u001BOb", ConsoleKey.DownArrow);
-            yield return ("\u001BOc", ConsoleKey.RightArrow);
-            yield return ("\u001BOd", ConsoleKey.LeftArrow);
+            yield return ("\eOa", ConsoleKey.UpArrow);
+            yield return ("\eOb", ConsoleKey.DownArrow);
+            yield return ("\eOc", ConsoleKey.RightArrow);
+            yield return ("\eOd", ConsoleKey.LeftArrow);
         }
     }
 
@@ -272,9 +272,9 @@ public void ExtendedStringCodePath()
             // Ctrl+Backspace
             yield return ("\b", new[] { new ConsoleKeyInfo('\b', ConsoleKey.Backspace, false, false, true) });
             // Alt+Backspace
-            yield return ("\u001B\u007F", new[] { new ConsoleKeyInfo((char)0x7F, ConsoleKey.Backspace, false, true, false) });
+            yield return ("\e\u007F", new[] { new ConsoleKeyInfo((char)0x7F, ConsoleKey.Backspace, false, true, false) });
             // Ctrl+Alt+Backspace
-            yield return ("\u001B\b", new[] { new ConsoleKeyInfo('\b', ConsoleKey.Backspace, false, true, true) });
+            yield return ("\e\b", new[] { new ConsoleKeyInfo('\b', ConsoleKey.Backspace, false, true, true) });
             // Enter
             yield return ("\r", new[] { new ConsoleKeyInfo('\r', ConsoleKey.Enter, false, false, false) });
             // Ctrl+Enter
@@ -283,18 +283,18 @@ public void ExtendedStringCodePath()
             // Escape key pressed multiple times
             for (int i = 1; i <= 5; i++)
             {
-                yield return (new string('\u001B', i), Enumerable.Repeat(new ConsoleKeyInfo('\u001B', ConsoleKey.Escape, false, false, false), i).ToArray());
+                yield return (new string('\e', i), Enumerable.Repeat(new ConsoleKeyInfo('\e', ConsoleKey.Escape, false, false, false), i).ToArray());
             }
 
             // Home key (^[[H) followed by H key
-            yield return ("\u001B[HH", new[]
+            yield return ("\e[HH", new[]
             {
                 new ConsoleKeyInfo(default, ConsoleKey.Home, false, false, false),
                 new ConsoleKeyInfo('H', ConsoleKey.H, true, false, false)
             });
 
             // escape sequence (F12 '^[[24~') followed by an extra tylde:
-            yield return ($"\u001B[24~~", new[]
+            yield return ($"\e[24~~", new[]
             {
                 new ConsoleKeyInfo(default, ConsoleKey.F12, false, false, false),
                 new ConsoleKeyInfo('~', default, false, false, false),
@@ -304,9 +304,9 @@ public void ExtendedStringCodePath()
             // Invalid modifiers (valid values are <2, 8>)
             foreach (int invalidModifier in new[] { 0, 1, 9 })
             {
-                yield return ($"\u001B[1;{invalidModifier}H", new[]
+                yield return ($"\e[1;{invalidModifier}H", new[]
                 {
-                    new ConsoleKeyInfo('\u001B', ConsoleKey.Escape, false, false, false),
+                    new ConsoleKeyInfo('\e', ConsoleKey.Escape, false, false, false),
                     new ConsoleKeyInfo('[', default, false, false, false),
                     new ConsoleKeyInfo('1', ConsoleKey.D1, false, false, false),
                     new ConsoleKeyInfo(';', default, false, false, false),
@@ -317,9 +317,9 @@ public void ExtendedStringCodePath()
             // Invalid ID (valid values are <1, 34> except of 9, 16, 22, 27, 30 and 35)
             foreach (int invalidId in new[] { 16, 22, 27, 30, 35, 36, 77, 99 })
             {
-                yield return ($"\u001B[{invalidId}~", new[]
+                yield return ($"\e[{invalidId}~", new[]
                 {
-                    new ConsoleKeyInfo('\u001B', ConsoleKey.Escape, false, false, false),
+                    new ConsoleKeyInfo('\e', ConsoleKey.Escape, false, false, false),
                     new ConsoleKeyInfo('[', default, false, false, false),
                     new ConsoleKeyInfo((char)('0' + invalidId / 10), ConsoleKey.D0 + invalidId / 10, false, false, false),
                     new ConsoleKeyInfo((char)('0' + invalidId % 10), ConsoleKey.D0 + invalidId % 10, false, false, false),
@@ -327,9 +327,9 @@ public void ExtendedStringCodePath()
                 });
             }
             // too long ID (more than 2 digits)
-            yield return ($"\u001B[111~", new[]
+            yield return ($"\e[111~", new[]
             {
-                new ConsoleKeyInfo('\u001B', ConsoleKey.Escape, false, false, false),
+                new ConsoleKeyInfo('\e', ConsoleKey.Escape, false, false, false),
                 new ConsoleKeyInfo('[', default, false, false, false),
                 new ConsoleKeyInfo('1', ConsoleKey.D1, false, false, false),
                 new ConsoleKeyInfo('1', ConsoleKey.D1, false, false, false),
@@ -337,9 +337,9 @@ public void ExtendedStringCodePath()
                 new ConsoleKeyInfo('~', default, false, false, false),
             });
             // missing closing tag (tylde):
-            yield return ($"\u001B[24", new[]
+            yield return ($"\e[24", new[]
             {
-                new ConsoleKeyInfo('\u001B', ConsoleKey.Escape, false, false, false),
+                new ConsoleKeyInfo('\e', ConsoleKey.Escape, false, false, false),
                 new ConsoleKeyInfo('[', default, false, false, false),
                 new ConsoleKeyInfo('2', ConsoleKey.D2, false, false, false),
                 new ConsoleKeyInfo('4', ConsoleKey.D4, false, false, false),
@@ -386,7 +386,7 @@ public void NewLineEscapeSequenceProducesCharacter()
     {
         XTermData xTerm = new();
 
-        ConsoleKeyInfo consoleKeyInfo = Parse("\u001BOM".ToCharArray(), xTerm.TerminalDb, xTerm.Verase, 3);
+        ConsoleKeyInfo consoleKeyInfo = Parse("\eOM".ToCharArray(), xTerm.TerminalDb, xTerm.Verase, 3);
 
         Assert.Equal(ConsoleKey.Enter, consoleKeyInfo.Key);
         Assert.Equal('\r', consoleKeyInfo.KeyChar);
@@ -398,7 +398,7 @@ public void BackTabEscapeSequence()
     {
         XTermData xTerm = new();
 
-        ConsoleKeyInfo consoleKeyInfo = Parse("\u001B[Z".ToCharArray(), xTerm.TerminalDb, xTerm.Verase, 3);
+        ConsoleKeyInfo consoleKeyInfo = Parse("\e[Z".ToCharArray(), xTerm.TerminalDb, xTerm.Verase, 3);
 
         Assert.Equal(ConsoleKey.Tab, consoleKeyInfo.Key);
         Assert.Equal(default, consoleKeyInfo.KeyChar);
diff --git a/src/libraries/System.Console/tests/TermInfo.Unix.cs b/src/libraries/System.Console/tests/TermInfo.Unix.cs
index e2fad1c0f183..2ce7c4c9ff20 100644
--- a/src/libraries/System.Console/tests/TermInfo.Unix.cs
+++ b/src/libraries/System.Console/tests/TermInfo.Unix.cs
@@ -76,21 +76,21 @@ public void VerifyTermInfoSupportsNewAndLegacyNcurses()
 
     [Theory]
     [PlatformSpecific(TestPlatforms.AnyUnix)]  // Tests TermInfo
-    [InlineData("xterm-256color", "\u001B\u005B\u00330m", "\u001B\u005B\u00340m", 0)]
-    [InlineData("xterm-256color", "\u001B\u005B\u00331m", "\u001B\u005B\u00341m", 1)]
-    [InlineData("xterm-256color", "\u001B\u005B90m", "\u001B\u005B100m", 8)]
-    [InlineData("screen", "\u001B\u005B\u00330m", "\u001B\u005B\u00340m", 0)]
-    [InlineData("screen", "\u001B\u005B\u00332m", "\u001B\u005B\u00342m", 2)]
-    [InlineData("screen", "\u001B\u005B\u00339m", "\u001B\u005B\u00349m", 9)]
-    [InlineData("Eterm", "\u001B\u005B\u00330m", "\u001B\u005B\u00340m", 0)]
-    [InlineData("Eterm", "\u001B\u005B\u00333m", "\u001B\u005B\u00343m", 3)]
-    [InlineData("Eterm", "\u001B\u005B\u003310m", "\u001B\u005B\u003410m", 10)]
-    [InlineData("wsvt25", "\u001B\u005B\u00330m", "\u001B\u005B\u00340m", 0)]
-    [InlineData("wsvt25", "\u001B\u005B\u00334m", "\u001B\u005B\u00344m", 4)]
-    [InlineData("wsvt25", "\u001B\u005B\u003311m", "\u001B\u005B\u003411m", 11)]
-    [InlineData("mach-color", "\u001B\u005B\u00330m", "\u001B\u005B\u00340m", 0)]
-    [InlineData("mach-color", "\u001B\u005B\u00335m", "\u001B\u005B\u00345m", 5)]
-    [InlineData("mach-color", "\u001B\u005B\u003312m", "\u001B\u005B\u003412m", 12)]
+    [InlineData("xterm-256color", "\e\u005B\u00330m", "\e\u005B\u00340m", 0)]
+    [InlineData("xterm-256color", "\e\u005B\u00331m", "\e\u005B\u00341m", 1)]
+    [InlineData("xterm-256color", "\e\u005B90m", "\e\u005B100m", 8)]
+    [InlineData("screen", "\e\u005B\u00330m", "\e\u005B\u00340m", 0)]
+    [InlineData("screen", "\e\u005B\u00332m", "\e\u005B\u00342m", 2)]
+    [InlineData("screen", "\e\u005B\u00339m", "\e\u005B\u00349m", 9)]
+    [InlineData("Eterm", "\e\u005B\u00330m", "\e\u005B\u00340m", 0)]
+    [InlineData("Eterm", "\e\u005B\u00333m", "\e\u005B\u00343m", 3)]
+    [InlineData("Eterm", "\e\u005B\u003310m", "\e\u005B\u003410m", 10)]
+    [InlineData("wsvt25", "\e\u005B\u00330m", "\e\u005B\u00340m", 0)]
+    [InlineData("wsvt25", "\e\u005B\u00334m", "\e\u005B\u00344m", 4)]
+    [InlineData("wsvt25", "\e\u005B\u003311m", "\e\u005B\u003411m", 11)]
+    [InlineData("mach-color", "\e\u005B\u00330m", "\e\u005B\u00340m", 0)]
+    [InlineData("mach-color", "\e\u005B\u00335m", "\e\u005B\u00345m", 5)]
+    [InlineData("mach-color", "\e\u005B\u003312m", "\e\u005B\u003412m", 12)]
     public void TermInfoVerification(string termToTest, string expectedForeground, string expectedBackground, int colorValue)
     {
         TermInfo.Database db = TermInfo.DatabaseFactory.ReadDatabase(termToTest);
@@ -109,8 +109,8 @@ public void TermInfoClearIncludesE3WhenExpected()
     {
         // XTerm defines E3 for clearing scrollback buffer and tmux does not.
         // This can't be added to TermInfoVerification because xterm-256color sometimes has E3 defined (e.g. on Ubuntu but not macOS)
-        Assert.Equal("\u001B[H\u001B[2J\u001B[3J", new XTermData().TerminalDb.Clear);
-        Assert.Equal("\u001B[H\u001B[J", new TmuxData().TerminalDb.Clear);
+        Assert.Equal("\e[H\e[2J\e[3J", new XTermData().TerminalDb.Clear);
+        Assert.Equal("\e[H\e[J", new TmuxData().TerminalDb.Clear);
     }
 
     [Fact]
@@ -119,7 +119,7 @@ public void EmuTermInfoDoesntBreakParser()
     {
         // This file (available by default on OS X) is called out specifically since it contains a format where it has %i
         // but only one variable instead of two. Make sure we don't break in this case
-        TermInfoVerification("emu", "\u001Br1;", "\u001Bs1;", 0);
+        TermInfoVerification("emu", "\er1;", "\es1;", 0);
     }
 
     [Fact]
diff --git a/src/libraries/System.Data.Common/ref/System.Data.Common.cs b/src/libraries/System.Data.Common/ref/System.Data.Common.cs
index cb319e5601ac..1c39049c9def 100644
--- a/src/libraries/System.Data.Common/ref/System.Data.Common.cs
+++ b/src/libraries/System.Data.Common/ref/System.Data.Common.cs
@@ -476,7 +476,7 @@ public void EndEdit() { }
         System.ComponentModel.EventDescriptor System.ComponentModel.ICustomTypeDescriptor.GetDefaultEvent() { throw null; }
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("PropertyDescriptor's PropertyType cannot be statically discovered.")]
         System.ComponentModel.PropertyDescriptor System.ComponentModel.ICustomTypeDescriptor.GetDefaultProperty() { throw null; }
-        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Editors registered in TypeDescriptor.AddEditorTable may be trimmed.")]
+        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming.")]
         object System.ComponentModel.ICustomTypeDescriptor.GetEditor(System.Type editorBaseType) { throw null; }
         System.ComponentModel.EventDescriptorCollection System.ComponentModel.ICustomTypeDescriptor.GetEvents() { throw null; }
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("The public parameterless constructor or the 'Default' static field may be trimmed from the Attribute's Type.")]
@@ -2256,7 +2256,7 @@ void System.Collections.IDictionary.Remove(object keyword) { }
         System.ComponentModel.EventDescriptor? System.ComponentModel.ICustomTypeDescriptor.GetDefaultEvent() { throw null; }
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("PropertyDescriptor's PropertyType cannot be statically discovered.")]
         System.ComponentModel.PropertyDescriptor? System.ComponentModel.ICustomTypeDescriptor.GetDefaultProperty() { throw null; }
-        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Editors registered in TypeDescriptor.AddEditorTable may be trimmed.")]
+        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming.")]
         object? System.ComponentModel.ICustomTypeDescriptor.GetEditor(System.Type editorBaseType) { throw null; }
         System.ComponentModel.EventDescriptorCollection System.ComponentModel.ICustomTypeDescriptor.GetEvents() { throw null; }
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("The public parameterless constructor or the 'Default' static field may be trimmed from the Attribute's Type.")]
@@ -2450,7 +2450,7 @@ protected DbDataRecord() { }
         System.ComponentModel.EventDescriptor System.ComponentModel.ICustomTypeDescriptor.GetDefaultEvent() { throw null; }
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("PropertyDescriptor's PropertyType cannot be statically discovered.")]
         System.ComponentModel.PropertyDescriptor System.ComponentModel.ICustomTypeDescriptor.GetDefaultProperty() { throw null; }
-        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Editors registered in TypeDescriptor.AddEditorTable may be trimmed.")]
+        [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming.")]
         object System.ComponentModel.ICustomTypeDescriptor.GetEditor(System.Type editorBaseType) { throw null; }
         System.ComponentModel.EventDescriptorCollection System.ComponentModel.ICustomTypeDescriptor.GetEvents() { throw null; }
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("The public parameterless constructor or the 'Default' static field may be trimmed from the Attribute's Type.")]
diff --git a/src/libraries/System.Data.Common/src/System/Data/Common/DataRecordInternal.cs b/src/libraries/System.Data.Common/src/System/Data/Common/DataRecordInternal.cs
index 825036afdf6b..d74645280fce 100644
--- a/src/libraries/System.Data.Common/src/System/Data/Common/DataRecordInternal.cs
+++ b/src/libraries/System.Data.Common/src/System/Data/Common/DataRecordInternal.cs
@@ -335,7 +335,7 @@ AttributeCollection ICustomTypeDescriptor.GetAttributes()
             return null;
         }
 
-        [RequiresUnreferencedCode("Editors registered in TypeDescriptor.AddEditorTable may be trimmed.")]
+        [RequiresUnreferencedCode("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming.")]
         object? ICustomTypeDescriptor.GetEditor(Type editorBaseType)
         {
             return null;
diff --git a/src/libraries/System.Data.Common/src/System/Data/Common/DbConnectionStringBuilder.cs b/src/libraries/System.Data.Common/src/System/Data/Common/DbConnectionStringBuilder.cs
index 46b45be82c84..809c9f3132ba 100644
--- a/src/libraries/System.Data.Common/src/System/Data/Common/DbConnectionStringBuilder.cs
+++ b/src/libraries/System.Data.Common/src/System/Data/Common/DbConnectionStringBuilder.cs
@@ -608,7 +608,7 @@ AttributeCollection ICustomTypeDescriptor.GetAttributes()
         {
             return TypeDescriptor.GetAttributes(this, true);
         }
-        [RequiresUnreferencedCode("Editors registered in TypeDescriptor.AddEditorTable may be trimmed.")]
+        [RequiresUnreferencedCode("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming.")]
         object? ICustomTypeDescriptor.GetEditor(Type editorBaseType)
         {
             return TypeDescriptor.GetEditor(this, editorBaseType, true);
diff --git a/src/libraries/System.Data.Common/src/System/Data/Common/DbDataRecord.cs b/src/libraries/System.Data.Common/src/System/Data/Common/DbDataRecord.cs
index f6211bfe3fe8..fe8c46bc7ef2 100644
--- a/src/libraries/System.Data.Common/src/System/Data/Common/DbDataRecord.cs
+++ b/src/libraries/System.Data.Common/src/System/Data/Common/DbDataRecord.cs
@@ -85,7 +85,7 @@ protected virtual DbDataReader GetDbDataReader(int i)
         [RequiresUnreferencedCode("PropertyDescriptor's PropertyType cannot be statically discovered.")]
         PropertyDescriptor? ICustomTypeDescriptor.GetDefaultProperty() => null;
 
-        [RequiresUnreferencedCode("Editors registered in TypeDescriptor.AddEditorTable may be trimmed.")]
+        [RequiresUnreferencedCode("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming.")]
         object? ICustomTypeDescriptor.GetEditor(Type editorBaseType) => null;
 
         EventDescriptorCollection ICustomTypeDescriptor.GetEvents() => new EventDescriptorCollection(null);
diff --git a/src/libraries/System.Data.Common/src/System/Data/DataRowView.cs b/src/libraries/System.Data.Common/src/System/Data/DataRowView.cs
index f2d8c97d7ede..b47b374994d5 100644
--- a/src/libraries/System.Data.Common/src/System/Data/DataRowView.cs
+++ b/src/libraries/System.Data.Common/src/System/Data/DataRowView.cs
@@ -242,7 +242,7 @@ public void EndEdit()
         [RequiresUnreferencedCode("PropertyDescriptor's PropertyType cannot be statically discovered.")]
         PropertyDescriptor? ICustomTypeDescriptor.GetDefaultProperty() => null;
 
-        [RequiresUnreferencedCode("Editors registered in TypeDescriptor.AddEditorTable may be trimmed.")]
+        [RequiresUnreferencedCode("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming.")]
         object? ICustomTypeDescriptor.GetEditor(Type editorBaseType) => null;
 
         EventDescriptorCollection ICustomTypeDescriptor.GetEvents() => new EventDescriptorCollection(null);
diff --git a/src/libraries/System.Data.Common/src/System/Data/DataViewManagerListItemTypeDescriptor.cs b/src/libraries/System.Data.Common/src/System/Data/DataViewManagerListItemTypeDescriptor.cs
index afa7277482b1..724f500755f8 100644
--- a/src/libraries/System.Data.Common/src/System/Data/DataViewManagerListItemTypeDescriptor.cs
+++ b/src/libraries/System.Data.Common/src/System/Data/DataViewManagerListItemTypeDescriptor.cs
@@ -66,7 +66,7 @@ internal DataView GetDataView(DataTable table)
         /// <summary>
         /// Retrieves the an editor for this object.
         /// </summary>
-        [RequiresUnreferencedCode("Editors registered in TypeDescriptor.AddEditorTable may be trimmed.")]
+        [RequiresUnreferencedCode("Design-time attributes are not preserved when trimming. Types referenced by attributes like EditorAttribute and DesignerAttribute may not be available after trimming.")]
         object? ICustomTypeDescriptor.GetEditor(Type editorBaseType) => null;
 
         /// <summary>
diff --git a/src/libraries/System.Diagnostics.DiagnosticSource/src/DiagnosticSourceUsersGuide.md b/src/libraries/System.Diagnostics.DiagnosticSource/src/DiagnosticSourceUsersGuide.md
index 8f98d156cd1e..bcd4e96cb8aa 100644
--- a/src/libraries/System.Diagnostics.DiagnosticSource/src/DiagnosticSourceUsersGuide.md
+++ b/src/libraries/System.Diagnostics.DiagnosticSource/src/DiagnosticSourceUsersGuide.md
@@ -180,7 +180,7 @@ Thus the event names only need to be unique within a component.
   reflection must be used to fetch fields).   This is both easier to program and more efficient.
   Thus in scenarios where there is likely high-volume filtering to be done by the logging listener, having
   this type available to do the cast is valuable.   Note that this type needs to be made public (since
-  the listener needs to see it), and should be under the namespace System.Diagnostics.DiagnosticSource.PayloadTypes.
+  the listener needs to see it).
   Note that if there is doubt about the value DO NOT create an explicit type, as you CAN convert from
   an anonymous type to a explicit type compatibly in the future, but once you expose the payload type
   you must keep it forever.   The payload type should simply have C#  'TYPE NAME {get; set; }' properties
@@ -405,6 +405,21 @@ Thus we could replace the `listener.Subscribe()` call in the previous example wi
 This very efficiently subscribes to only the 'RequestStart' events. All other events will cause the `DiagnosticSource.IsEnabled()`
 method to return `false`, and thus be efficiently filtered out.
 
+NOTE: Filtering is only designed as a performance optimization. It is possible for a listener to receive events even when they
+do not satisfy the filter. This could occur because some other listener has subscribed to the event or because the source
+of the event didn't check IsEnabled() prior to sending it. If you want to be certain that a given event satisfies the filter
+you will need to check it inside the callback. For example:
+
+```C#
+Action<KeyValuePair<string, object>> callback = (KeyValuePair<string, object> evnt) =>
+        {
+            if(predicate(evnt.Key)) // only print out events that satisfy our filter
+            {
+                Console.WriteLine("From Listener {0} Received Event {1} with payload {2}", networkListener.Name, evnt.Key, evnt.Value.ToString());
+            }
+        };
+```
+
 ##### Context-based Filtering
 Some scenarios require advanced filtering based on extended context.
 Producers may call `DiagnosticSource.IsEnabled()` overloads and supply additional event properties:
diff --git a/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/DiagnosticListener.cs b/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/DiagnosticListener.cs
index 3d39054f5694..1a58f827851c 100644
--- a/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/DiagnosticListener.cs
+++ b/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/DiagnosticListener.cs
@@ -183,7 +183,7 @@ public virtual void Dispose()
 
             // Indicate completion to all subscribers.
             DiagnosticSubscription? subscriber = null;
-            Interlocked.Exchange(ref subscriber, _subscriptions);
+            subscriber = Interlocked.Exchange(ref _subscriptions, subscriber);
             while (subscriber != null)
             {
                 subscriber.Observer.OnCompleted();
diff --git a/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/DiagnosticSourceEventSource.cs b/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/DiagnosticSourceEventSource.cs
index 510c0d9a21e6..fbb13c9c9b6d 100644
--- a/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/DiagnosticSourceEventSource.cs
+++ b/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/DiagnosticSourceEventSource.cs
@@ -1440,13 +1440,13 @@ public ValueTypedFetchProperty(Type type, PropertyInfo property) : base(type)
                     /// </summary>
                     private sealed class ReflectionPropertyFetch : PropertyFetch
                     {
-                        private readonly PropertyInfo _property;
+                        private readonly MethodInvoker _getterInvoker;
                         public ReflectionPropertyFetch(Type type, PropertyInfo property) : base(type)
                         {
-                            _property = property;
+                            _getterInvoker = MethodInvoker.Create(property.GetMethod!);
                         }
 
-                        public override object? Fetch(object? obj) => _property.GetValue(obj);
+                        public override object? Fetch(object? obj) => _getterInvoker.Invoke(obj);
                     }
 
                     /// <summary>
diff --git a/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/Metrics/Instrument.netcore.cs b/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/Metrics/Instrument.netcore.cs
index 0321f2b1aae8..2a862fcb6257 100644
--- a/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/Metrics/Instrument.netcore.cs
+++ b/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/Metrics/Instrument.netcore.cs
@@ -7,46 +7,6 @@
 
 namespace System.Diagnostics.Metrics
 {
-    // We define a separate structure for the different number of tags.
-    // The reason is, the performance is critical for the Metrics APIs that accept tags parameters.
-    // We are trying to reduce big tags structure initialization inside the APIs when using fewer tags.
-
-    [StructLayout(LayoutKind.Sequential)]
-    internal struct OneTagBag
-    {
-        internal KeyValuePair<string, object?> Tag1;
-        internal OneTagBag(KeyValuePair<string, object?> tag)
-        {
-            Tag1 = tag;
-        }
-    }
-
-    [StructLayout(LayoutKind.Sequential)]
-    internal struct TwoTagsBag
-    {
-        internal KeyValuePair<string, object?> Tag1;
-        internal KeyValuePair<string, object?> Tag2;
-        internal TwoTagsBag(KeyValuePair<string, object?> tag1, KeyValuePair<string, object?> tag2)
-        {
-            Tag1 = tag1;
-            Tag2 = tag2;
-        }
-    }
-
-    [StructLayout(LayoutKind.Sequential)]
-    internal struct ThreeTagsBag
-    {
-        internal KeyValuePair<string, object?> Tag1;
-        internal KeyValuePair<string, object?> Tag2;
-        internal KeyValuePair<string, object?> Tag3;
-        internal ThreeTagsBag(KeyValuePair<string, object?> tag1, KeyValuePair<string, object?> tag2, KeyValuePair<string, object?> tag3)
-        {
-            Tag1 = tag1;
-            Tag2 = tag2;
-            Tag3 = tag3;
-        }
-    }
-
     /// <summary>
     /// Instrument{T} is the base class from which all non-observable instruments will inherit from.
     /// </summary>
@@ -60,12 +20,8 @@ public abstract partial class Instrument<T> : Instrument where T : struct
         /// </summary>
         /// <param name="measurement">The measurement value.</param>
         /// <param name="tag">A key-value pair tag associated with the measurement.</param>
-        protected void RecordMeasurement(T measurement, KeyValuePair<string, object?> tag)
-        {
-            OneTagBag tags = new OneTagBag(tag);
-
-            RecordMeasurement(measurement, MemoryMarshal.CreateReadOnlySpan(ref tags.Tag1, 1));
-        }
+        protected void RecordMeasurement(T measurement, KeyValuePair<string, object?> tag) =>
+            RecordMeasurement(measurement, [tag]);
 
         /// <summary>
         /// Record the measurement by notifying all <see cref="MeterListener" /> objects which listening to this instrument.
@@ -73,12 +29,8 @@ protected void RecordMeasurement(T measurement, KeyValuePair<string, object?> ta
         /// <param name="measurement">The measurement value.</param>
         /// <param name="tag1">A first key-value pair tag associated with the measurement.</param>
         /// <param name="tag2">A second key-value pair tag associated with the measurement.</param>
-        protected void RecordMeasurement(T measurement, KeyValuePair<string, object?> tag1, KeyValuePair<string, object?> tag2)
-        {
-            TwoTagsBag tags = new TwoTagsBag(tag1, tag2);
-
-            RecordMeasurement(measurement, MemoryMarshal.CreateReadOnlySpan(ref tags.Tag1, 2));
-        }
+        protected void RecordMeasurement(T measurement, KeyValuePair<string, object?> tag1, KeyValuePair<string, object?> tag2) =>
+            RecordMeasurement(measurement, [tag1, tag2]);
 
         /// <summary>
         /// Record the measurement by notifying all <see cref="MeterListener" /> objects which listening to this instrument.
@@ -87,12 +39,8 @@ protected void RecordMeasurement(T measurement, KeyValuePair<string, object?> ta
         /// <param name="tag1">A first key-value pair tag associated with the measurement.</param>
         /// <param name="tag2">A second key-value pair tag associated with the measurement.</param>
         /// <param name="tag3">A third key-value pair tag associated with the measurement.</param>
-        protected void RecordMeasurement(T measurement, KeyValuePair<string, object?> tag1, KeyValuePair<string, object?> tag2, KeyValuePair<string, object?> tag3)
-        {
-            ThreeTagsBag tags = new ThreeTagsBag(tag1, tag2, tag3);
-
-            RecordMeasurement(measurement, MemoryMarshal.CreateReadOnlySpan(ref tags.Tag1, 3));
-        }
+        protected void RecordMeasurement(T measurement, KeyValuePair<string, object?> tag1, KeyValuePair<string, object?> tag2, KeyValuePair<string, object?> tag3) =>
+            RecordMeasurement(measurement, [tag1, tag2, tag3]);
 
         /// <summary>
         /// Record the measurement by notifying all <see cref="MeterListener" /> objects which listening to this instrument.
diff --git a/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/Metrics/MetricsEventSource.cs b/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/Metrics/MetricsEventSource.cs
index 0b446b23ad35..3fbe68f55454 100644
--- a/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/Metrics/MetricsEventSource.cs
+++ b/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/Metrics/MetricsEventSource.cs
@@ -737,7 +737,11 @@ private static string FormatQuantiles(QuantileValue[] quantiles)
                 StringBuilder sb = new StringBuilder();
                 for (int i = 0; i < quantiles.Length; i++)
                 {
-                    sb.Append(quantiles[i].Quantile).Append('=').Append(quantiles[i].Value);
+#if NETCOREAPP
+                    sb.Append(CultureInfo.InvariantCulture, $"{quantiles[i].Quantile}={quantiles[i].Value}");
+#else
+                    sb.AppendFormat(CultureInfo.InvariantCulture, "{0}={1}", quantiles[i].Quantile, quantiles[i].Value);
+#endif
                     if (i != quantiles.Length - 1)
                     {
                         sb.Append(';');
diff --git a/src/libraries/System.Diagnostics.DiagnosticSource/tests/DiagnosticSourceTests.cs b/src/libraries/System.Diagnostics.DiagnosticSource/tests/DiagnosticSourceTests.cs
index f8ca97c501cc..7be9494d77b0 100644
--- a/src/libraries/System.Diagnostics.DiagnosticSource/tests/DiagnosticSourceTests.cs
+++ b/src/libraries/System.Diagnostics.DiagnosticSource/tests/DiagnosticSourceTests.cs
@@ -116,6 +116,10 @@ public void Completed()
             listener.Dispose();
             Assert.True(observer.Completed);
 
+            // Subscriptions are removed when listener is disposed and don't receive further notifications
+            listener.Write("AnotherNotification", null);
+            Assert.Equal(1, result.Count);
+
             // confirm that we can unsubscribe without crashing
             subscription.Dispose();
 
diff --git a/src/libraries/System.Diagnostics.DiagnosticSource/tests/MetricEventSourceTests.cs b/src/libraries/System.Diagnostics.DiagnosticSource/tests/MetricEventSourceTests.cs
index 0db3af5edb57..8eec15c60c79 100644
--- a/src/libraries/System.Diagnostics.DiagnosticSource/tests/MetricEventSourceTests.cs
+++ b/src/libraries/System.Diagnostics.DiagnosticSource/tests/MetricEventSourceTests.cs
@@ -1,7 +1,6 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System;
 using System.Collections.Generic;
 using System.Diagnostics.Tracing;
 using System.Globalization;
@@ -9,7 +8,7 @@
 using System.Runtime.CompilerServices;
 using System.Text;
 using System.Threading;
-using System.Threading.Tasks;
+using Microsoft.DotNet.RemoteExecutor;
 using Xunit;
 using Xunit.Abstractions;
 
@@ -659,45 +658,59 @@ public void MultipleListeners_PublishingInstruments()
             AssertInitialEnumerationCompleteEventPresent(events2);
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))]
+        public static bool IsNotBrowserAndRemoteExecuteSupported => PlatformDetection.IsNotBrowser && RemoteExecutor.IsSupported;
+
+        [ConditionalFact(typeof(MetricEventSourceTests), nameof(IsNotBrowserAndRemoteExecuteSupported))]
         [OuterLoop("Slow and has lots of console spew")]
         public void EventSourcePublishesTimeSeriesWithEmptyMetadata()
         {
-            using Meter meter = new Meter("TestMeter1", null, new TagList() { { "Mk1", "Mv1" }, { "Mk2", "Mv2" } }, new object());
-            Counter<int> c = meter.CreateCounter<int>("counter1");
-            int counterState = 3;
-            ObservableCounter<int> oc = meter.CreateObservableCounter<int>("observableCounter1", () => { counterState += 7; return counterState; });
-            int gaugeState = 0;
-            ObservableGauge<int> og = meter.CreateObservableGauge<int>("observableGauge1", () => { gaugeState += 9; return gaugeState; });
-            Histogram<int> h = meter.CreateHistogram<int>("histogram1");
-            UpDownCounter<int> udc = meter.CreateUpDownCounter<int>("upDownCounter1");
-            int upDownCounterState = 0;
-            ObservableUpDownCounter<int> oudc = meter.CreateObservableUpDownCounter<int>("observableUpDownCounter1", () => { upDownCounterState -= 11; return upDownCounterState; });
-
-            EventWrittenEventArgs[] events;
-            using (MetricsEventListener listener = new MetricsEventListener(_output, MetricsEventListener.TimeSeriesValues, IntervalSecs, "TestMeter1"))
+            RemoteExecutor.Invoke(static () =>
             {
-                listener.WaitForCollectionStop(s_waitForEventTimeout, 1);
-                c.Add(5);
-                h.Record(19);
-                udc.Add(-33);
-                listener.WaitForCollectionStop(s_waitForEventTimeout, 2);
-                c.Add(12);
-                h.Record(26);
-                udc.Add(-40);
-                listener.WaitForCollectionStop(s_waitForEventTimeout, 3);
-                events = listener.Events.ToArray();
-            }
+                CultureInfo.DefaultThreadCurrentCulture = new CultureInfo("fi-FI");
 
-            AssertBeginInstrumentReportingEventsPresent(events, c, oc, og, h, udc, oudc);
-            AssertInitialEnumerationCompleteEventPresent(events);
-            AssertCounterEventsPresent(events, meter.Name, c.Name, "", "", ("5", "5"), ("12", "17"));
-            AssertCounterEventsPresent(events, meter.Name, oc.Name, "", "", ("", "10"), ("7", "17"));
-            AssertGaugeEventsPresent(events, meter.Name, og.Name, "", "", "9", "18");
-            AssertHistogramEventsPresent(events, meter.Name, h.Name, "", "", ("0.5=19;0.95=19;0.99=19", "1", "19"), ("0.5=26;0.95=26;0.99=26", "1", "26"));
-            AssertUpDownCounterEventsPresent(events, meter.Name, udc.Name, "", "", ("-33", "-33"), ("-40", "-73"));
-            AssertUpDownCounterEventsPresent(events, meter.Name, oudc.Name, "", "", ("", "-11"), ("-11", "-22"));
-            AssertCollectStartStopEventsPresent(events, IntervalSecs, 3);
+                using Meter meter = new Meter("TestMeter1", null, new TagList() { { "Mk1", "Mv1" }, { "Mk2", "Mv2" } }, new object());
+                Counter<int> c = meter.CreateCounter<int>("counter1");
+                int counterState = 3;
+                ObservableCounter<int> oc = meter.CreateObservableCounter<int>("observableCounter1", () => { counterState += 7; return counterState; });
+                int gaugeState = 0;
+                ObservableGauge<int> og = meter.CreateObservableGauge<int>("observableGauge1", () => { gaugeState += 9; return gaugeState; });
+                Histogram<int> h = meter.CreateHistogram<int>("histogram1");
+                UpDownCounter<int> udc = meter.CreateUpDownCounter<int>("upDownCounter1");
+                int upDownCounterState = 0;
+                ObservableUpDownCounter<int> oudc = meter.CreateObservableUpDownCounter<int>("observableUpDownCounter1", () => { upDownCounterState -= 11; return upDownCounterState; });
+
+                EventWrittenEventArgs[] events;
+                using (MetricsEventListener listener = new MetricsEventListener(NullTestOutputHelper.Instance, MetricsEventListener.TimeSeriesValues, IntervalSecs, "TestMeter1"))
+                {
+                    listener.WaitForCollectionStop(s_waitForEventTimeout, 1);
+                    c.Add(5);
+                    h.Record(19);
+                    udc.Add(-33);
+                    listener.WaitForCollectionStop(s_waitForEventTimeout, 2);
+                    c.Add(12);
+                    h.Record(26);
+                    udc.Add(-40);
+                    listener.WaitForCollectionStop(s_waitForEventTimeout, 3);
+                    events = listener.Events.ToArray();
+                }
+
+                AssertBeginInstrumentReportingEventsPresent(events, c, oc, og, h, udc, oudc);
+                AssertInitialEnumerationCompleteEventPresent(events);
+                AssertCounterEventsPresent(events, meter.Name, c.Name, "", "", ("5", "5"), ("12", "17"));
+                AssertCounterEventsPresent(events, meter.Name, oc.Name, "", "", ("", "10"), ("7", "17"));
+                AssertGaugeEventsPresent(events, meter.Name, og.Name, "", "", "9", "18");
+                AssertHistogramEventsPresent(events, meter.Name, h.Name, "", "", ("0.5=19;0.95=19;0.99=19", "1", "19"), ("0.5=26;0.95=26;0.99=26", "1", "26"));
+                AssertUpDownCounterEventsPresent(events, meter.Name, udc.Name, "", "", ("-33", "-33"), ("-40", "-73"));
+                AssertUpDownCounterEventsPresent(events, meter.Name, oudc.Name, "", "", ("", "-11"), ("-11", "-22"));
+                AssertCollectStartStopEventsPresent(events, IntervalSecs, 3);
+            }).Dispose();
+        }
+
+        private sealed class NullTestOutputHelper : ITestOutputHelper
+        {
+            public static NullTestOutputHelper Instance { get; } = new();
+            public void WriteLine(string message) { }
+            public void WriteLine(string format, params object[] args) { }
         }
 
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))]
@@ -1471,7 +1484,7 @@ private static string FormatTags(IEnumerable<KeyValuePair<string, object?>>? tag
             return sb.ToString();
         }
 
-        private void AssertBeginInstrumentReportingEventsPresent(EventWrittenEventArgs[] events, params Instrument[] expectedInstruments)
+        private static void AssertBeginInstrumentReportingEventsPresent(EventWrittenEventArgs[] events, params Instrument[] expectedInstruments)
         {
             var beginReportEvents = events.Where(e => e.EventName == "BeginInstrumentReporting").Select(e =>
                 new
@@ -1503,7 +1516,7 @@ private void AssertBeginInstrumentReportingEventsPresent(EventWrittenEventArgs[]
             Assert.Equal(expectedInstruments.Length, beginReportEvents.Length);
         }
 
-        private void AssertEndInstrumentReportingEventsPresent(EventWrittenEventArgs[] events, params Instrument[] expectedInstruments)
+        private static void AssertEndInstrumentReportingEventsPresent(EventWrittenEventArgs[] events, params Instrument[] expectedInstruments)
         {
             var beginReportEvents = events.Where(e => e.EventName == "EndInstrumentReporting").Select(e =>
                 new
@@ -1535,27 +1548,27 @@ private void AssertEndInstrumentReportingEventsPresent(EventWrittenEventArgs[] e
             Assert.Equal(expectedInstruments.Length, beginReportEvents.Length);
         }
 
-        private void AssertInitialEnumerationCompleteEventPresent(EventWrittenEventArgs[] events, int eventsCount = 1)
+        private static void AssertInitialEnumerationCompleteEventPresent(EventWrittenEventArgs[] events, int eventsCount = 1)
         {
             Assert.Equal(eventsCount, events.Where(e => e.EventName == "InitialInstrumentEnumerationComplete").Count());
         }
 
-        private void AssertTimeSeriesLimitPresent(EventWrittenEventArgs[] events)
+        private static void AssertTimeSeriesLimitPresent(EventWrittenEventArgs[] events)
         {
             Assert.Equal(1, events.Where(e => e.EventName == "TimeSeriesLimitReached").Count());
         }
 
-        private void AssertTimeSeriesLimitNotPresent(EventWrittenEventArgs[] events)
+        private static void AssertTimeSeriesLimitNotPresent(EventWrittenEventArgs[] events)
         {
             Assert.Equal(0, events.Where(e => e.EventName == "TimeSeriesLimitReached").Count());
         }
 
-        private void AssertHistogramLimitPresent(EventWrittenEventArgs[] events)
+        private static void AssertHistogramLimitPresent(EventWrittenEventArgs[] events)
         {
             Assert.Equal(1, events.Where(e => e.EventName == "HistogramLimitReached").Count());
         }
 
-        private void AssertInstrumentPublishingEventsPresent(EventWrittenEventArgs[] events, params Instrument[] expectedInstruments)
+        private static void AssertInstrumentPublishingEventsPresent(EventWrittenEventArgs[] events, params Instrument[] expectedInstruments)
         {
             var publishEvents = events.Where(e => e.EventName == "InstrumentPublished").Select(e =>
                 new
@@ -1587,19 +1600,19 @@ private void AssertInstrumentPublishingEventsPresent(EventWrittenEventArgs[] eve
             Assert.Equal(expectedInstruments.Length, publishEvents.Length);
         }
 
-        private void AssertCounterEventsPresent(EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags,
+        private static void AssertCounterEventsPresent(EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags,
             string expectedUnit, params (string, string)[] expected)
         {
             AssertGenericCounterEventsPresent("CounterRateValuePublished", events, meterName, instrumentName, tags, expectedUnit, expected);
         }
 
-        private void AssertUpDownCounterEventsPresent(EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags,
+        private static void AssertUpDownCounterEventsPresent(EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags,
             string expectedUnit, params (string, string)[] expected)
         {
             AssertGenericCounterEventsPresent("UpDownCounterRateValuePublished", events, meterName, instrumentName, tags, expectedUnit, expected);
         }
 
-        private void AssertGenericCounterEventsPresent(string eventName, EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags,
+        private static void AssertGenericCounterEventsPresent(string eventName, EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags,
             string expectedUnit, params (string, string)[] expected)
         {
             var counterEvents = events.Where(e => e.EventName == eventName).Select(e =>
@@ -1623,7 +1636,7 @@ private void AssertGenericCounterEventsPresent(string eventName, EventWrittenEve
             }
         }
 
-        private void AssertCounterEventsNotPresent(EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags)
+        private static void AssertCounterEventsNotPresent(EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags)
         {
             var counterEvents = events.Where(e => e.EventName == "CounterRateValuePublished").Select(e =>
                 new
@@ -1637,7 +1650,7 @@ private void AssertCounterEventsNotPresent(EventWrittenEventArgs[] events, strin
             Assert.Equal(0, filteredEvents.Length);
         }
 
-        private void AssertGaugeEventsPresent(EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags,
+        private static void AssertGaugeEventsPresent(EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags,
             string expectedUnit, params string[] expectedValues)
         {
             var counterEvents = events.Where(e => e.EventName == "GaugeValuePublished").Select(e =>
@@ -1659,7 +1672,7 @@ private void AssertGaugeEventsPresent(EventWrittenEventArgs[] events, string met
             }
         }
 
-        private void AssertHistogramEventsPresent(EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags,
+        private static void AssertHistogramEventsPresent(EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags,
             string expectedUnit, params (string, string, string)[] expected)
         {
             var counterEvents = events.Where(e => e.EventName == "HistogramValuePublished").Select(e =>
@@ -1685,7 +1698,7 @@ private void AssertHistogramEventsPresent(EventWrittenEventArgs[] events, string
             }
         }
 
-        private void AssertHistogramEventsNotPresent(EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags)
+        private static void AssertHistogramEventsNotPresent(EventWrittenEventArgs[] events, string meterName, string instrumentName, string tags)
         {
             var counterEvents = events.Where(e => e.EventName == "HistogramValuePublished").Select(e =>
                 new
@@ -1698,7 +1711,7 @@ private void AssertHistogramEventsNotPresent(EventWrittenEventArgs[] events, str
             var filteredEvents = counterEvents.Where(e => e.MeterName == meterName && e.InstrumentName == instrumentName && e.Tags == tags).ToArray();
             Assert.Equal(0, filteredEvents.Length);
         }
-        private void AssertCollectStartStopEventsPresent(EventWrittenEventArgs[] events, double expectedIntervalSecs, int expectedPairs)
+        private static void AssertCollectStartStopEventsPresent(EventWrittenEventArgs[] events, double expectedIntervalSecs, int expectedPairs)
         {
             int startEventsSeen = 0;
             int stopEventsSeen = 0;
@@ -1727,7 +1740,7 @@ private void AssertCollectStartStopEventsPresent(EventWrittenEventArgs[] events,
             Assert.Equal(expectedPairs, stopEventsSeen);
         }
 
-        private void AssertObservableCallbackErrorPresent(EventWrittenEventArgs[] events)
+        private static void AssertObservableCallbackErrorPresent(EventWrittenEventArgs[] events)
         {
             var errorEvents = events.Where(e => e.EventName == "ObservableInstrumentCallbackError").Select(e =>
                 new
@@ -1738,7 +1751,7 @@ private void AssertObservableCallbackErrorPresent(EventWrittenEventArgs[] events
             Assert.Contains("Example user exception", errorEvents[0].ErrorText);
         }
 
-        private void AssertMultipleSessionsConfiguredIncorrectlyErrorEventsPresent(EventWrittenEventArgs[] events,
+        private static void AssertMultipleSessionsConfiguredIncorrectlyErrorEventsPresent(EventWrittenEventArgs[] events,
             string expectedMaxHistograms, string actualMaxHistograms, string expectedMaxTimeSeries, string actualMaxTimeSeries,
             string expectedRefreshInterval, string actualRefreshInterval)
         {
diff --git a/src/libraries/System.Diagnostics.EventLog/src/System/Diagnostics/EventData.cs b/src/libraries/System.Diagnostics.EventLog/src/System/Diagnostics/EventData.cs
index 8497386db5ee..961a6e432110 100644
--- a/src/libraries/System.Diagnostics.EventLog/src/System/Diagnostics/EventData.cs
+++ b/src/libraries/System.Diagnostics.EventLog/src/System/Diagnostics/EventData.cs
@@ -39,7 +39,7 @@ public EventLogEntryType EntryType
             get => _entryType;
             set
             {
-                if (!Enum.IsDefined(typeof(EventLogEntryType), value))
+                if (!Enum.IsDefined(value))
                     throw new InvalidEnumArgumentException(nameof(EntryType), (int)value, typeof(EventLogEntryType));
 
                 _entryType = value;
diff --git a/src/libraries/System.Diagnostics.EventLog/src/System/Diagnostics/EventLogInternal.cs b/src/libraries/System.Diagnostics.EventLog/src/System/Diagnostics/EventLogInternal.cs
index 41173ac89234..f34da4ccf018 100644
--- a/src/libraries/System.Diagnostics.EventLog/src/System/Diagnostics/EventLogInternal.cs
+++ b/src/libraries/System.Diagnostics.EventLog/src/System/Diagnostics/EventLogInternal.cs
@@ -1292,7 +1292,7 @@ public void WriteEntry(string message, EventLogEntryType type, int eventID, shor
             if (Source.Length == 0)
                 throw new ArgumentException(SR.NeedSourceToWrite);
 
-            if (!Enum.IsDefined(typeof(EventLogEntryType), type))
+            if (!Enum.IsDefined(type))
                 throw new InvalidEnumArgumentException(nameof(type), (int)type, typeof(EventLogEntryType));
 
             string currentMachineName = machineName;
diff --git a/src/libraries/System.Diagnostics.PerformanceCounter/src/System/Diagnostics/CounterCreationData.cs b/src/libraries/System.Diagnostics.PerformanceCounter/src/System/Diagnostics/CounterCreationData.cs
index 33a814625986..0abb67e55b5f 100644
--- a/src/libraries/System.Diagnostics.PerformanceCounter/src/System/Diagnostics/CounterCreationData.cs
+++ b/src/libraries/System.Diagnostics.PerformanceCounter/src/System/Diagnostics/CounterCreationData.cs
@@ -33,7 +33,7 @@ public PerformanceCounterType CounterType
             }
             set
             {
-                if (!Enum.IsDefined(typeof(PerformanceCounterType), value))
+                if (!Enum.IsDefined(value))
                     throw new InvalidEnumArgumentException(nameof(PerformanceCounterType), (int)value, typeof(PerformanceCounterType));
 
                 _counterType = value;
diff --git a/src/libraries/System.Diagnostics.Process/src/System/Diagnostics/Process.OSX.cs b/src/libraries/System.Diagnostics.Process/src/System/Diagnostics/Process.OSX.cs
index 849306016b39..07f55780d82d 100644
--- a/src/libraries/System.Diagnostics.Process/src/System/Diagnostics/Process.OSX.cs
+++ b/src/libraries/System.Diagnostics.Process/src/System/Diagnostics/Process.OSX.cs
@@ -117,8 +117,8 @@ private static TimeSpan MapTime(ulong sysTime)
             if (denom == default)
             {
                 Interop.libSystem.mach_timebase_info_data_t timeBase = GetTimeBase();
-                s_timeBase_denom = denom = timeBase.denom;
                 s_timeBase_numer = timeBase.numer;
+                s_timeBase_denom = denom = timeBase.denom;
             }
             uint numer = s_timeBase_numer;
 
diff --git a/src/libraries/System.Diagnostics.Tracing/tests/BasicEventSourceTest/Harness/EtwListener.cs b/src/libraries/System.Diagnostics.Tracing/tests/BasicEventSourceTest/Harness/EtwListener.cs
index cdbb961313ec..7e514b20f1a1 100644
--- a/src/libraries/System.Diagnostics.Tracing/tests/BasicEventSourceTest/Harness/EtwListener.cs
+++ b/src/libraries/System.Diagnostics.Tracing/tests/BasicEventSourceTest/Harness/EtwListener.cs
@@ -3,6 +3,7 @@
 
 using Microsoft.Diagnostics.Tracing;
 using Microsoft.Diagnostics.Tracing.Session;
+using Microsoft.DotNet.XUnitExtensions;
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
@@ -39,7 +40,7 @@ public EtwListener(string dataFileName = "EventSourceTestData.etl", string sessi
             // Today you have to be Admin to turn on ETW events (anyone can write ETW events).
             if (TraceEventSession.IsElevated() != true)
             {
-                throw new Exception("Need to be elevated to run. ");
+                throw new SkipTestException("Need to be elevated to run. ");
             }
 
             if (dataFileName == null)
diff --git a/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/AD/SidList.cs b/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/AD/SidList.cs
index 2d6d63fee729..d488ad57dc2b 100644
--- a/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/AD/SidList.cs
+++ b/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/AD/SidList.cs
@@ -62,7 +62,7 @@ internal SidList(Interop.SID_AND_ATTRIBUTES[] sidAndAttr)
             TranslateSids(null, pSids);
         }
 
-        private void TranslateSids(string target, IntPtr[] pSids)
+        private unsafe void TranslateSids(string target, IntPtr[] pSids)
         {
             GlobalDebug.WriteLineIf(GlobalDebug.Info, "AuthZSet", "SidList: processing {0} SIDs", pSids.Length);
 
@@ -157,8 +157,8 @@ private void TranslateSids(string target, IntPtr[] pSids)
 
                 for (int i = 0; i < domainCount; i++)
                 {
-                    domains[i] = (Interop.LSA_TRUST_INFORMATION)Marshal.PtrToStructure(pCurrentDomain, typeof(Interop.LSA_TRUST_INFORMATION));
-                    pCurrentDomain = new IntPtr(pCurrentDomain.ToInt64() + Marshal.SizeOf(typeof(Interop.LSA_TRUST_INFORMATION)));
+                    domains[i] = *(Interop.LSA_TRUST_INFORMATION*)pCurrentDomain;
+                    pCurrentDomain += sizeof(Interop.LSA_TRUST_INFORMATION);
                 }
 
                 GlobalDebug.WriteLineIf(GlobalDebug.Info, "AuthZSet", "SidList: got {0} groups in {1} domains", sidCount, domainCount);
diff --git a/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/AuthZSet.cs b/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/AuthZSet.cs
index b9423466e825..73c71722cc2c 100644
--- a/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/AuthZSet.cs
+++ b/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/AuthZSet.cs
@@ -131,7 +131,7 @@ out pClientContext
 
                                 // Extract TOKEN_GROUPS.GroupCount
 
-                                Interop.TOKEN_GROUPS tokenGroups = (Interop.TOKEN_GROUPS)Marshal.PtrToStructure(pBuffer, typeof(Interop.TOKEN_GROUPS));
+                                Interop.TOKEN_GROUPS tokenGroups = *(Interop.TOKEN_GROUPS*)pBuffer;
 
                                 uint groupCount = tokenGroups.GroupCount;
 
@@ -141,13 +141,13 @@ out pClientContext
                                 // each native SID_AND_ATTRIBUTES into a managed SID_AND_ATTR.
                                 Interop.SID_AND_ATTRIBUTES[] groups = new Interop.SID_AND_ATTRIBUTES[groupCount];
 
-                                IntPtr currentItem = new IntPtr(pBuffer.ToInt64() + Marshal.SizeOf(typeof(Interop.TOKEN_GROUPS)) - sizeof(Interop.SID_AND_ATTRIBUTES));
+                                IntPtr currentItem = pBuffer + sizeof(Interop.TOKEN_GROUPS) - sizeof(Interop.SID_AND_ATTRIBUTES);
 
                                 for (int i = 0; i < groupCount; i++)
                                 {
-                                    groups[i] = (Interop.SID_AND_ATTRIBUTES)Marshal.PtrToStructure(currentItem, typeof(Interop.SID_AND_ATTRIBUTES));
+                                    groups[i] = *(Interop.SID_AND_ATTRIBUTES*)currentItem;
 
-                                    currentItem = new IntPtr(currentItem.ToInt64() + Marshal.SizeOf(typeof(Interop.SID_AND_ATTRIBUTES)));
+                                    currentItem += sizeof(Interop.SID_AND_ATTRIBUTES);
                                 }
 
                                 _groupSidList = new SidList(groups);
diff --git a/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/SAM/SAMStoreCtx.cs b/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/SAM/SAMStoreCtx.cs
index 1f91a0e5dacc..f31b35ca0c22 100644
--- a/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/SAM/SAMStoreCtx.cs
+++ b/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/SAM/SAMStoreCtx.cs
@@ -1053,7 +1053,7 @@ private void LoadComputerInfo()
                 if (err == 0)
                 {
                     UnsafeNativeMethods.WKSTA_INFO_100 wkstaInfo =
-                        (UnsafeNativeMethods.WKSTA_INFO_100)Marshal.PtrToStructure(buffer, typeof(UnsafeNativeMethods.WKSTA_INFO_100));
+                        Marshal.PtrToStructure<UnsafeNativeMethods.WKSTA_INFO_100>(buffer);
 
                     _machineFlatName = wkstaInfo.wki100_computername;
                     GlobalDebug.WriteLineIf(GlobalDebug.Info, "SAMStoreCtx", "LoadComputerInfo: machineFlatName={0}", _machineFlatName);
diff --git a/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/Utils.cs b/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/Utils.cs
index 619c48191e26..c4995b0a2c53 100644
--- a/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/Utils.cs
+++ b/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/Utils.cs
@@ -198,15 +198,14 @@ internal static SidType ClassifySID(byte[] sid)
         }
 
 
-        internal static SidType ClassifySID(IntPtr pSid)
+        internal static unsafe SidType ClassifySID(IntPtr pSid)
         {
             Debug.Assert(Interop.Advapi32.IsValidSid(pSid));
 
             // Get the issuing authority and the first RID
             IntPtr pIdentAuth = Interop.Advapi32.GetSidIdentifierAuthority(pSid);
 
-            Interop.Advapi32.SID_IDENTIFIER_AUTHORITY identAuth =
-                (Interop.Advapi32.SID_IDENTIFIER_AUTHORITY)Marshal.PtrToStructure(pIdentAuth, typeof(Interop.Advapi32.SID_IDENTIFIER_AUTHORITY));
+            Interop.Advapi32.SID_IDENTIFIER_AUTHORITY identAuth = *(Interop.Advapi32.SID_IDENTIFIER_AUTHORITY*)pIdentAuth;
 
             IntPtr pRid = Interop.Advapi32.GetSidSubAuthority(pSid, 0);
             int rid = Marshal.ReadInt32(pRid);
@@ -333,7 +332,7 @@ internal static bool IsSamUser()
         }
 
 
-        internal static IntPtr GetCurrentUserSid()
+        internal static unsafe IntPtr GetCurrentUserSid()
         {
             SafeTokenHandle tokenHandle = null;
             IntPtr pBuffer = IntPtr.Zero;
@@ -425,7 +424,7 @@ out tokenHandle
                 }
 
                 // Retrieve the user's SID from the user info
-                Interop.TOKEN_USER tokenUser = (Interop.TOKEN_USER)Marshal.PtrToStructure(pBuffer, typeof(Interop.TOKEN_USER));
+                Interop.TOKEN_USER tokenUser = *(Interop.TOKEN_USER*)pBuffer;
                 IntPtr pUserSid = tokenUser.sidAndAttributes.Sid;   // this is a reference into the NATIVE memory (into pBuffer)
 
                 Debug.Assert(Interop.Advapi32.IsValidSid(pUserSid));
@@ -457,7 +456,7 @@ out tokenHandle
         }
 
 
-        internal static IntPtr GetMachineDomainSid()
+        internal static unsafe IntPtr GetMachineDomainSid()
         {
             SafeLsaPolicyHandle policyHandle = null;
             IntPtr pBuffer = IntPtr.Zero;
@@ -496,8 +495,7 @@ internal static IntPtr GetMachineDomainSid()
                 }
 
                 Debug.Assert(pBuffer != IntPtr.Zero);
-                UnsafeNativeMethods.POLICY_ACCOUNT_DOMAIN_INFO info = (UnsafeNativeMethods.POLICY_ACCOUNT_DOMAIN_INFO)
-                                    Marshal.PtrToStructure(pBuffer, typeof(UnsafeNativeMethods.POLICY_ACCOUNT_DOMAIN_INFO));
+                UnsafeNativeMethods.POLICY_ACCOUNT_DOMAIN_INFO info = *(UnsafeNativeMethods.POLICY_ACCOUNT_DOMAIN_INFO*)pBuffer;
 
                 Debug.Assert(Interop.Advapi32.IsValidSid(info.DomainSid));
 
@@ -570,7 +568,7 @@ internal static UnsafeNativeMethods.DomainControllerInfo GetDcName(string comput
                 }
 
                 UnsafeNativeMethods.DomainControllerInfo domainControllerInfo =
-                    (UnsafeNativeMethods.DomainControllerInfo)Marshal.PtrToStructure(domainControllerInfoPtr, typeof(UnsafeNativeMethods.DomainControllerInfo));
+                    Marshal.PtrToStructure<UnsafeNativeMethods.DomainControllerInfo>(domainControllerInfoPtr);
 
                 return domainControllerInfo;
             }
@@ -802,7 +800,7 @@ internal static bool IsMachineDC(string computerName)
                 }
 
                 UnsafeNativeMethods.DSROLE_PRIMARY_DOMAIN_INFO_BASIC dsRolePrimaryDomainInfo =
-                    (UnsafeNativeMethods.DSROLE_PRIMARY_DOMAIN_INFO_BASIC)Marshal.PtrToStructure(dsRoleInfoPtr, typeof(UnsafeNativeMethods.DSROLE_PRIMARY_DOMAIN_INFO_BASIC));
+                    Marshal.PtrToStructure<UnsafeNativeMethods.DSROLE_PRIMARY_DOMAIN_INFO_BASIC>(dsRoleInfoPtr);
 
                 return (dsRolePrimaryDomainInfo.MachineRole == UnsafeNativeMethods.DSROLE_MACHINE_ROLE.DsRole_RoleBackupDomainController ||
                              dsRolePrimaryDomainInfo.MachineRole == UnsafeNativeMethods.DSROLE_MACHINE_ROLE.DsRole_RolePrimaryDomainController);
diff --git a/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/common/BerConverter.cs b/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/common/BerConverter.cs
index 7e37a4c15175..13f7b8b0963a 100644
--- a/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/common/BerConverter.cs
+++ b/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/common/BerConverter.cs
@@ -527,7 +527,7 @@ private static unsafe int EncodingMultiByteArrayHelper(SafeBerHandle berElement,
                 {
                     int i = 0;
                     berValArray = Utility.AllocHGlobalIntPtrArray(tempValue.Length + 1);
-                    int structSize = Marshal.SizeOf(typeof(BerVal));
+                    int structSize = Marshal.SizeOf<BerVal>();
                     managedBervalArray = new BerVal[tempValue.Length];
                     void** pBerValArray = (void**)berValArray;
 
diff --git a/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/common/DirectoryControl.cs b/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/common/DirectoryControl.cs
index 31548f6b2fd6..f47bc50318b9 100644
--- a/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/common/DirectoryControl.cs
+++ b/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/common/DirectoryControl.cs
@@ -716,7 +716,7 @@ public override unsafe byte[] GetValue()
             }
 
             IntPtr control = IntPtr.Zero;
-            int structSize = Marshal.SizeOf(typeof(SortKeyInterop));
+            int structSize = Marshal.SizeOf<SortKeyInterop>();
             int keyCount = nativeSortKeys.Length;
             IntPtr memHandle = Utility.AllocHGlobalIntPtrArray(keyCount + 1);
 
diff --git a/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/ldap/LdapConnection.cs b/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/ldap/LdapConnection.cs
index 0d478c6167ca..4be0407a9eea 100644
--- a/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/ldap/LdapConnection.cs
+++ b/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/ldap/LdapConnection.cs
@@ -544,7 +544,7 @@ private unsafe int SendRequestHelper(DirectoryRequest request, ref int messageID
             {
                 // Build server control.
                 managedServerControls = BuildControlArray(request.Controls, true);
-                int structSize = Marshal.SizeOf(typeof(LdapControl));
+                int structSize = Marshal.SizeOf<LdapControl>();
 
                 if (managedServerControls != null)
                 {
@@ -658,7 +658,7 @@ private unsafe int SendRequestHelper(DirectoryRequest request, ref int messageID
                     addModCount = (modifications == null ? 1 : modifications.Length + 1);
                     modArray = Utility.AllocHGlobalIntPtrArray(addModCount);
                     void** pModArray = (void**)modArray;
-                    int modStructSize = Marshal.SizeOf(typeof(LdapMod));
+                    int modStructSize = Marshal.SizeOf<LdapMod>();
                     int i = 0;
                     for (i = 0; i < addModCount - 1; i++)
                     {
@@ -918,12 +918,12 @@ private unsafe Interop.BOOL ProcessClientCertificate(IntPtr ldapHandle, IntPtr C
             var list = new ArrayList();
             if (CAs != IntPtr.Zero)
             {
-                SecPkgContext_IssuerListInfoEx trustedCAs = (SecPkgContext_IssuerListInfoEx)Marshal.PtrToStructure(CAs, typeof(SecPkgContext_IssuerListInfoEx));
+                SecPkgContext_IssuerListInfoEx trustedCAs = *(SecPkgContext_IssuerListInfoEx*)CAs;
                 int issuerNumber = trustedCAs.cIssuers;
                 for (int i = 0; i < issuerNumber; i++)
                 {
-                    IntPtr tempPtr = (IntPtr)((byte*)trustedCAs.aIssuers + Marshal.SizeOf(typeof(CRYPTOAPI_BLOB)) * (nint)i);
-                    CRYPTOAPI_BLOB info = (CRYPTOAPI_BLOB)Marshal.PtrToStructure(tempPtr, typeof(CRYPTOAPI_BLOB));
+                    IntPtr tempPtr = (IntPtr)((byte*)trustedCAs.aIssuers + sizeof(CRYPTOAPI_BLOB) * (nint)i);
+                    CRYPTOAPI_BLOB info = *(CRYPTOAPI_BLOB*)tempPtr;
                     int dataLength = info.cbData;
 
                     byte[] context = new byte[dataLength];
@@ -1077,7 +1077,7 @@ private void BindHelper(NetworkCredential newCredential, bool needSetCredential)
                 var cred = new SEC_WINNT_AUTH_IDENTITY_EX()
                 {
                     version = Interop.SEC_WINNT_AUTH_IDENTITY_VERSION,
-                    length = Marshal.SizeOf(typeof(SEC_WINNT_AUTH_IDENTITY_EX)),
+                    length = Marshal.SizeOf<SEC_WINNT_AUTH_IDENTITY_EX>(),
                     flags = Interop.SEC_WINNT_AUTH_IDENTITY_UNICODE
                 };
                 if (AuthType == AuthType.Kerberos)
@@ -1342,7 +1342,7 @@ internal static unsafe LdapMod[] BuildAttributes(CollectionBase directoryAttribu
 
                     attributes[i].values = Utility.AllocHGlobalIntPtrArray(valuesCount + 1);
                     void** pAttributesValues = (void**)attributes[i].values;
-                    int structSize = Marshal.SizeOf(typeof(BerVal));
+                    int structSize = Marshal.SizeOf<BerVal>();
                     IntPtr controlPtr;
 
                     int m;
diff --git a/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/ldap/LdapSessionOptions.cs b/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/ldap/LdapSessionOptions.cs
index 06809037a8ba..0fa241fc3011 100644
--- a/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/ldap/LdapSessionOptions.cs
+++ b/src/libraries/System.DirectoryServices.Protocols/src/System/DirectoryServices/Protocols/ldap/LdapSessionOptions.cs
@@ -555,7 +555,7 @@ public unsafe void StartTransportLayerSecurity(DirectoryControlCollection contro
             {
                 // build server control
                 managedServerControls = LdapConnection.BuildControlArray(controls, true);
-                int structSize = Marshal.SizeOf(typeof(LdapControl));
+                int structSize = Marshal.SizeOf<LdapControl>();
                 if (managedServerControls != null)
                 {
                     serverControlArray = Utility.AllocHGlobalIntPtrArray(managedServerControls.Length + 1);
@@ -848,7 +848,7 @@ private void ProcessCallBackRoutine(ReferralCallback tempCallback)
         {
             LdapReferralCallback value = new LdapReferralCallback()
             {
-                sizeofcallback = Marshal.SizeOf(typeof(LdapReferralCallback)),
+                sizeofcallback = Marshal.SizeOf<LdapReferralCallback>(),
                 query = tempCallback.QueryForConnection == null ? null : _queryDelegate,
                 notify = tempCallback.NotifyNewConnection == null ? null : _notifiyDelegate,
                 dereference = tempCallback.DereferenceConnection == null ? null : _dereferenceDelegate
diff --git a/src/libraries/System.DirectoryServices.Protocols/tests/BerConverterTests.cs b/src/libraries/System.DirectoryServices.Protocols/tests/BerConverterTests.cs
index ba92d33c2a7b..541d402d61f4 100644
--- a/src/libraries/System.DirectoryServices.Protocols/tests/BerConverterTests.cs
+++ b/src/libraries/System.DirectoryServices.Protocols/tests/BerConverterTests.cs
@@ -124,19 +124,70 @@ public void Encode_InvalidFormat_ThrowsBerConversionException(string format)
 
         public static IEnumerable<object[]> Decode_TestData()
         {
+            // Content: zero-length sequence
+            // Parsed as such
             yield return new object[] { "{}", new byte[] { 48, 0, 0, 0, 0, 0 }, new object[0] };
+
+            // Content: sequence containing octet string
+            // Parsed as such
             yield return new object[] { "{a}", new byte[] { 48, 132, 0, 0, 0, 5, 4, 3, 97, 98, 99 }, new object[] { "abc" } };
+
+            // Content: sequence containing integer
+            // Parsed as such
             yield return new object[] { "{i}", new byte[] { 48, 132, 0, 0, 0, 3, 2, 1, 10 }, new object[] { 10 } };
+
+            // Content: sequence containing two booleans
+            // Parsed as a sequence containing an integer, followed by an enumerated value
             yield return new object[] { "{ie}", new byte[] { 48, 132, 0, 0, 0, 6, 1, 1, 255, 1, 1, 0 }, new object[] { -1, 0 } };
+
+            // Content: sequence containing two booleans
+            // Parsed as such
             yield return new object[] { "{bb}", new byte[] { 48, 132, 0, 0, 0, 6, 1, 1, 255, 1, 1, 0 }, new object[] { true, false } };
+
+            // Content: sequence containing two booleans
+            // Parsed as a sequence containing two octet strings
             yield return new object[] { "{OO}", new byte[] { 48, 132, 0, 0, 0, 6, 1, 1, 255, 1, 1, 0 }, new object[] { new byte[] { 255 }, new byte[] { 0 } } };
+
+            // Content: sequence containing two booleans
+            // Parsed as a sequence containing two bitstrings
             yield return new object[] { "{BB}", new byte[] { 48, 132, 0, 0, 0, 6, 1, 1, 255, 1, 1, 0 }, new object[] { new byte[] { 255 }, new byte[] { 0 } } };
             if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) // vv and VV formats are not supported yet in Linux
             {
+                // Content: sequence containing three octet strings
+                // Parsed as a sequence containing two sequences of octet strings
                 yield return new object[] { "{vv}", new byte[] { 48, 132, 0, 0, 0, 9, 4, 3, 97, 98, 99, 4, 0, 4, 0 }, new object[] { null, null } };
+
+                // Content: sequence containing three octet strings
+                // Parsed as two sequences of octet strings
+                yield return new object[] { "vv", new byte[] { 48, 132, 0, 0, 0, 12, 4, 3, 97, 98, 99, 4, 2, 100, 101, 4, 1, 102 }, new object[] { new string[] { "abc", "de", "f" }, null } };
+
+                // Content: sequence containing two sequences of octet strings
+                // Parsed as such
+                yield return new object[] { "{vv}", new byte[] { 48, 14, 48, 5, 4, 3, 97, 98, 99, 48, 5, 4, 3, 100, 101, 102 }, new object[] { new string[] { "abc" }, new string[] { "def" } } };
+
+                // Content: sequence containing two booleans
+                // Parsed as a sequence containing two sequences of octet strings
                 yield return new object[] { "{vv}", new byte[] { 48, 132, 0, 0, 0, 6, 1, 1, 255, 1, 1, 0 }, new object[] { new string[] { "\x01" }, null } };
+
+                // Content: sequence containing two booleans. First boolean has a valid value which is also a valid UTF8 character
+                // Parsed as two sequences of octet strings
+                yield return new object[] { "vv", new byte[] { 48, 132, 0, 0, 0, 6, 1, 1, 48, 1, 1, 0 }, new object[] { new string[] { "\x30", "\x00" }, null } };
+
+                // Content: sequence of octet strings
+                // Parsed as a sequence containing two sequences of octet strings (returned as bytes)
                 yield return new object[] { "{VV}", new byte[] { 48, 132, 0, 0, 0, 9, 4, 3, 97, 98, 99, 4, 0, 4, 0 }, new object[] { null, null } };
-                yield return new object[] { "{VV}", new byte[] { 48, 132, 0, 0, 0, 6, 1, 1, 255, 1, 1, 0 }, new object[] { new byte[][] { new byte[] { 1 } }, null } };
+
+                // Content: sequence of octet strings
+                // Parsed as two sequences of octet strings (returned as bytes)
+                yield return new object[] { "VV", new byte[] { 48, 132, 0, 0, 0, 12, 4, 3, 97, 98, 99, 4, 2, 100, 101, 4, 1, 102 },new object[]{ new byte[][] { [97, 98, 99], [100, 101], [102] }, null } };
+
+                // Content: sequence containing two booleans
+                // Parsed as a sequence containing two sequences of octet strings (returned as bytes)
+                yield return new object[] { "{VV}", new byte[] { 48, 132, 0, 0, 0, 6, 1, 1, 255, 1, 1, 0 }, new object[] { new byte[][] { [1] }, null } };
+
+                // Content: sequence containing two booleans
+                // Parsed as two sequences of octet strings (returned as bytes)
+                yield return new object[] { "VV", new byte[] { 48, 132, 0, 0, 0, 6, 1, 1, 255, 1, 1, 0 }, new object[] { new byte[][] { [255], [0] }, null } };
             }
         }
 
@@ -188,5 +239,24 @@ public void Decode_Invalid_ThrowsBerConversionException(string format, byte[] va
         {
             Assert.Throws<BerConversionException>(() => BerConverter.Decode(format, values));
         }
+
+        public static IEnumerable<object[]> Manual_Wrapping_Required_Data()
+        {
+            // vv and VV formats are not supported yet in Linux
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                yield return new object[] { "v", new object[] { new string[] { "abc", "def" } } };
+
+                yield return new object[] { "V", new object[] { new byte[][] { [97, 98, 99], [100, 101, 102] } } };
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(Manual_Wrapping_Required_Data))]
+        public void Must_Manually_Wrap_Several_OctetStrings_In_Sequence(string format, object[] values)
+        {
+            Assert.Throws<BerConversionException>(() => BerConverter.Decode(format, BerConverter.Encode(format, values)));
+            Assert.Equal(values, BerConverter.Decode(format, BerConverter.Encode("{" + format + "}", values)));
+        }
     }
 }
diff --git a/src/libraries/System.DirectoryServices/src/Interop/EnumVariant.cs b/src/libraries/System.DirectoryServices/src/Interop/EnumVariant.cs
index eb1ac26a1d09..b8f3f3227e7f 100644
--- a/src/libraries/System.DirectoryServices/src/Interop/EnumVariant.cs
+++ b/src/libraries/System.DirectoryServices/src/Interop/EnumVariant.cs
@@ -58,10 +58,10 @@ public void Reset()
             /// Moves the pointer to the next value In the contained IEnumVariant, and
             /// stores the current value In currentValue.
             /// </devdoc>
-            private void Advance()
+            private unsafe void Advance()
             {
                 _currentValue = s_noMoreValues;
-                IntPtr addr = Marshal.AllocCoTaskMem(Marshal.SizeOf(typeof(Variant)));
+                IntPtr addr = Marshal.AllocCoTaskMem(sizeof(Variant));
                 try
                 {
                     int[] numRead = new int[] { 0 };
diff --git a/src/libraries/System.DirectoryServices/src/System.DirectoryServices.csproj b/src/libraries/System.DirectoryServices/src/System.DirectoryServices.csproj
index 447387495bd3..845f7627ca3e 100644
--- a/src/libraries/System.DirectoryServices/src/System.DirectoryServices.csproj
+++ b/src/libraries/System.DirectoryServices/src/System.DirectoryServices.csproj
@@ -5,7 +5,7 @@
     <TargetFrameworks Condition="'$(NetCoreAppPrevious)' != ''">$(TargetFrameworks);$(NetCoreAppPrevious)-windows;$(NetCoreAppPrevious)</TargetFrameworks>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <IncludeInternalObsoleteAttribute>true</IncludeInternalObsoleteAttribute>
-    <NoWarn>$(NoWarn);IDE0059;IDE0060;CA1822</NoWarn>
+    <NoWarn>$(NoWarn);IDE0059;IDE0060;CA1822;CA1865</NoWarn>
     <EnableAOTAnalyzer>false</EnableAOTAnalyzer>
     <UseCompilerGeneratedDocXmlFile>false</UseCompilerGeneratedDocXmlFile>
     <IsPackable>true</IsPackable>
diff --git a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ActiveDirectoryReplicationMetaData.cs b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ActiveDirectoryReplicationMetaData.cs
index baad123def51..6dffd10fe382 100644
--- a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ActiveDirectoryReplicationMetaData.cs
+++ b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ActiveDirectoryReplicationMetaData.cs
@@ -64,14 +64,14 @@ internal void AddHelper(int count, IntPtr info, bool advanced)
             {
                 if (advanced)
                 {
-                    addr = IntPtr.Add(info, sizeof(int) * 2 + i * Marshal.SizeOf(typeof(DS_REPL_ATTR_META_DATA_2)));
+                    addr = IntPtr.Add(info, sizeof(int) * 2 + i * Marshal.SizeOf<DS_REPL_ATTR_META_DATA_2>());
 
                     AttributeMetadata managedMetaData = new AttributeMetadata(addr, true, _server, _nameTable);
                     Add(managedMetaData.Name, managedMetaData);
                 }
                 else
                 {
-                    addr = IntPtr.Add(info, sizeof(int) * 2 + i * Marshal.SizeOf(typeof(DS_REPL_ATTR_META_DATA)));
+                    addr = IntPtr.Add(info, sizeof(int) * 2 + i * Marshal.SizeOf<DS_REPL_ATTR_META_DATA>());
 
                     AttributeMetadata managedMetaData = new AttributeMetadata(addr, false, _server, _nameTable);
                     Add(managedMetaData.Name, managedMetaData);
diff --git a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ActiveDirectorySchemaClass.cs b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ActiveDirectorySchemaClass.cs
index 9909ce661603..baa3c301fa5c 100644
--- a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ActiveDirectorySchemaClass.cs
+++ b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ActiveDirectorySchemaClass.cs
@@ -1429,7 +1429,7 @@ private ArrayList GetPropertyValuesRecursively(string[] propertyNames)
                 // get the properties of the auxiliary classes
                 foreach (string auxSchemaClassName in GetValuesFromCache(PropertyManager.AuxiliaryClass))
                 {
-                    ActiveDirectorySchemaClass auxSchemaClass = new ActiveDirectorySchemaClass(_context, auxSchemaClassName, (DirectoryEntry?)null, null);
+                    using ActiveDirectorySchemaClass auxSchemaClass = new ActiveDirectorySchemaClass(_context, auxSchemaClassName, (DirectoryEntry?)null, null);
 
                     foreach (string property in auxSchemaClass.GetPropertyValuesRecursively(propertyNames))
                     {
@@ -1441,8 +1441,7 @@ private ArrayList GetPropertyValuesRecursively(string[] propertyNames)
                 }
                 foreach (string auxSchemaClassName in GetValuesFromCache(PropertyManager.SystemAuxiliaryClass))
                 {
-                    ActiveDirectorySchemaClass auxSchemaClass = new ActiveDirectorySchemaClass(_context, auxSchemaClassName, (DirectoryEntry?)null, null);
-
+                    using ActiveDirectorySchemaClass auxSchemaClass = new ActiveDirectorySchemaClass(_context, auxSchemaClassName, (DirectoryEntry?)null, null);
                     foreach (string property in auxSchemaClass.GetPropertyValuesRecursively(propertyNames))
                     {
                         if (!values.Contains(property))
diff --git a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ActiveDirectorySite.cs b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ActiveDirectorySite.cs
index d51219dafba6..1a50524bb143 100644
--- a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ActiveDirectorySite.cs
+++ b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ActiveDirectorySite.cs
@@ -1302,7 +1302,7 @@ private unsafe void GetDomains()
                 DomainController dc = DomainController.GetDomainController(Utils.GetNewDirectoryContext(serverName, DirectoryContextType.DirectoryServer, context));
                 IntPtr handle = dc.Handle;
 
-                Debug.Assert(handle != (IntPtr)0);
+                Debug.Assert(handle != 0);
 
                 void* pDomains = null;
                 // call DsReplicaSyncAllW
@@ -1327,11 +1327,11 @@ private unsafe void GetDomains()
                     IntPtr val = names.rItems;
                     if (count > 0)
                     {
-                        Debug.Assert(val != (IntPtr)0);
-                        IntPtr tmpPtr = (IntPtr)0;
+                        Debug.Assert(val != 0);
+                        IntPtr tmpPtr = 0;
                         for (int i = 0; i < count; i++)
                         {
-                            tmpPtr = IntPtr.Add(val, Marshal.SizeOf(typeof(DS_NAME_RESULT_ITEM)) * i);
+                            tmpPtr = IntPtr.Add(val, Marshal.SizeOf<DS_NAME_RESULT_ITEM>() * i);
                             DS_NAME_RESULT_ITEM nameResult = new DS_NAME_RESULT_ITEM();
                             Marshal.PtrToStructure(tmpPtr, nameResult);
                             if (nameResult.status == DS_NAME_ERROR.DS_NAME_NO_ERROR || nameResult.status == DS_NAME_ERROR.DS_NAME_ERROR_DOMAIN_ONLY)
diff --git a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/DirectoryServer.cs b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/DirectoryServer.cs
index 6a742782d8f4..4d7862649788 100644
--- a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/DirectoryServer.cs
+++ b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/DirectoryServer.cs
@@ -692,8 +692,8 @@ private unsafe void FreeReplicaInfo(DS_REPL_INFO_TYPE type, IntPtr value, SafeLi
 
         internal unsafe void SyncReplicaHelper(IntPtr dsHandle, bool isADAM, string partition, string? sourceServer, int option, SafeLibraryHandle libHandle)
         {
-            int structSize = Marshal.SizeOf(typeof(Guid));
-            IntPtr unmanagedGuid = (IntPtr)0;
+            int structSize = sizeof(Guid);
+            IntPtr unmanagedGuid = 0;
             Guid guid = Guid.Empty;
             AdamInstance? adamServer = null;
             DomainController? dcServer = null;
diff --git a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ForestTrustRelationshipInformation.cs b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ForestTrustRelationshipInformation.cs
index c216689bf0a0..a606eaf17bdf 100644
--- a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ForestTrustRelationshipInformation.cs
+++ b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ForestTrustRelationshipInformation.cs
@@ -114,8 +114,8 @@ public unsafe void Save()
             {
                 try
                 {
-                    IntPtr ptr = (IntPtr)0;
-                    fileTime = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(FileTime)));
+                    IntPtr ptr = 0;
+                    fileTime = Marshal.AllocHGlobal(Marshal.SizeOf<FileTime>());
                     Interop.Kernel32.GetSystemTimeAsFileTime(fileTime);
 
                     // set the time
@@ -134,7 +134,7 @@ public unsafe void Save()
                         ptrList.Add(ptr);
                         Interop.NtDll.RtlInitUnicodeString(out record.TopLevelName, ptr);
 
-                        tmpPtr = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(LSA_FOREST_TRUST_RECORD)));
+                        tmpPtr = Marshal.AllocHGlobal(Marshal.SizeOf<LSA_FOREST_TRUST_RECORD>());
                         ptrList.Add(tmpPtr);
                         Marshal.StructureToPtr(record, tmpPtr, false);
 
@@ -163,7 +163,7 @@ public unsafe void Save()
                         ptr = Marshal.StringToHGlobalUni(_excludedNames[i]);
                         ptrList.Add(ptr);
                         Interop.NtDll.RtlInitUnicodeString(out record.TopLevelName, ptr);
-                        tmpPtr = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(LSA_FOREST_TRUST_RECORD)));
+                        tmpPtr = Marshal.AllocHGlobal(Marshal.SizeOf<LSA_FOREST_TRUST_RECORD>());
                         ptrList.Add(tmpPtr);
                         Marshal.StructureToPtr(record, tmpPtr, false);
 
@@ -196,7 +196,7 @@ public unsafe void Save()
                         ptrList.Add(record.DomainInfo.NetBIOSNameBuffer);
                         record.DomainInfo.NetBIOSNameLength = (short)(tmp.NetBiosName == null ? 0 : tmp.NetBiosName.Length * 2);
                         record.DomainInfo.NetBIOSNameMaximumLength = (short)(tmp.NetBiosName == null ? 0 : tmp.NetBiosName.Length * 2);
-                        tmpPtr = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(LSA_FOREST_TRUST_RECORD)));
+                        tmpPtr = Marshal.AllocHGlobal(Marshal.SizeOf<LSA_FOREST_TRUST_RECORD>());
                         ptrList.Add(tmpPtr);
                         Marshal.StructureToPtr(record, tmpPtr, false);
 
@@ -222,7 +222,7 @@ public unsafe void Save()
                             ptrList.Add(record.Data.Buffer);
                             Marshal.Copy((byte[])_binaryData[i]!, 0, record.Data.Buffer, record.Data.Length);
                         }
-                        tmpPtr = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(LSA_FOREST_TRUST_RECORD)));
+                        tmpPtr = Marshal.AllocHGlobal(Marshal.SizeOf<LSA_FOREST_TRUST_RECORD>());
                         ptrList.Add(tmpPtr);
                         Marshal.StructureToPtr(record, tmpPtr, false);
 
@@ -235,7 +235,7 @@ public unsafe void Save()
                     LSA_FOREST_TRUST_INFORMATION trustInformation = new LSA_FOREST_TRUST_INFORMATION();
                     trustInformation.RecordCount = count;
                     trustInformation.Entries = records;
-                    forestInfo = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(LSA_FOREST_TRUST_INFORMATION)));
+                    forestInfo = Marshal.AllocHGlobal(Marshal.SizeOf<LSA_FOREST_TRUST_INFORMATION>());
                     Marshal.StructureToPtr(trustInformation, forestInfo, false);
 
                     // get policy server name
diff --git a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationCursorCollection.cs b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationCursorCollection.cs
index 52e2946a207d..6b109f8663b4 100644
--- a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationCursorCollection.cs
+++ b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationCursorCollection.cs
@@ -49,13 +49,13 @@ internal void AddHelper(string partition, object cursors, bool advanced, IntPtr
             else
                 count = ((DS_REPL_CURSORS)cursors).cNumCursors;
 
-            IntPtr addr = (IntPtr)0;
+            IntPtr addr = 0;
 
             for (int i = 0; i < count; i++)
             {
                 if (advanced)
                 {
-                    addr = IntPtr.Add(info, sizeof(int) * 2 + i * Marshal.SizeOf(typeof(DS_REPL_CURSOR_3)));
+                    addr = IntPtr.Add(info, sizeof(int) * 2 + i * Marshal.SizeOf<DS_REPL_CURSOR_3>());
                     DS_REPL_CURSOR_3 cursor = new DS_REPL_CURSOR_3();
                     Marshal.PtrToStructure(addr, cursor);
 
@@ -69,7 +69,7 @@ internal void AddHelper(string partition, object cursors, bool advanced, IntPtr
                 }
                 else
                 {
-                    addr = IntPtr.Add(info, sizeof(int) * 2 + i * Marshal.SizeOf(typeof(DS_REPL_CURSOR)));
+                    addr = IntPtr.Add(info, sizeof(int) * 2 + i * Marshal.SizeOf<DS_REPL_CURSOR>());
                     DS_REPL_CURSOR cursor = new DS_REPL_CURSOR();
                     Marshal.PtrToStructure(addr, cursor);
 
diff --git a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationFailureCollection.cs b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationFailureCollection.cs
index fcf99e14cf3d..d67221932fc3 100644
--- a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationFailureCollection.cs
+++ b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationFailureCollection.cs
@@ -48,11 +48,11 @@ internal void AddHelper(DS_REPL_KCC_DSA_FAILURES failures, IntPtr info)
             // get the count
             int count = failures.cNumEntries;
 
-            IntPtr addr = (IntPtr)0;
+            IntPtr addr = 0;
 
             for (int i = 0; i < count; i++)
             {
-                addr = IntPtr.Add(info, sizeof(int) * 2 + i * Marshal.SizeOf(typeof(DS_REPL_KCC_DSA_FAILURE)));
+                addr = IntPtr.Add(info, sizeof(int) * 2 + i * Marshal.SizeOf<DS_REPL_KCC_DSA_FAILURE>());
 
                 ReplicationFailure managedFailure = new ReplicationFailure(addr, _server, _nameTable);
 
diff --git a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationNeighborCollection.cs b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationNeighborCollection.cs
index 34f7203e9f52..476a7591ef75 100644
--- a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationNeighborCollection.cs
+++ b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationNeighborCollection.cs
@@ -48,11 +48,11 @@ internal void AddHelper(DS_REPL_NEIGHBORS neighbors, IntPtr info)
             // get the count
             int count = neighbors.cNumNeighbors;
 
-            IntPtr addr = (IntPtr)0;
+            IntPtr addr = 0;
 
             for (int i = 0; i < count; i++)
             {
-                addr = IntPtr.Add(info, sizeof(int) * 2 + i * Marshal.SizeOf(typeof(DS_REPL_NEIGHBOR)));
+                addr = IntPtr.Add(info, sizeof(int) * 2 + i * Marshal.SizeOf<DS_REPL_NEIGHBOR>());
 
                 ReplicationNeighbor managedNeighbor = new ReplicationNeighbor(addr, _server, _nameTable);
 
diff --git a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationOperationCollection.cs b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationOperationCollection.cs
index 297691533afb..99b639d6eb26 100644
--- a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationOperationCollection.cs
+++ b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/ReplicationOperationCollection.cs
@@ -48,11 +48,11 @@ internal void AddHelper(DS_REPL_PENDING_OPS operations, IntPtr info)
             // get the count
             int count = operations.cNumPendingOps;
 
-            IntPtr addr = (IntPtr)0;
+            IntPtr addr = 0;
 
             for (int i = 0; i < count; i++)
             {
-                addr = IntPtr.Add(info, Marshal.SizeOf(typeof(DS_REPL_PENDING_OPS)) + i * Marshal.SizeOf(typeof(DS_REPL_OP)));
+                addr = IntPtr.Add(info, Marshal.SizeOf<DS_REPL_PENDING_OPS>() + i * Marshal.SizeOf<DS_REPL_OP>());
                 ReplicationOperation managedOperation = new ReplicationOperation(addr, _server, _nameTable);
 
                 Add(managedOperation);
diff --git a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/TrustHelper.cs b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/TrustHelper.cs
index 3c1f11fec589..6591d54f956e 100644
--- a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/TrustHelper.cs
+++ b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/TrustHelper.cs
@@ -220,7 +220,7 @@ internal static unsafe void SetTrustedDomainInfoStatus(DirectoryContext context,
                     }
 
                     // reconstruct the unmanaged structure to set it back
-                    newInfo = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(Interop.Advapi32.TRUSTED_DOMAIN_INFORMATION_EX)));
+                    newInfo = Marshal.AllocHGlobal(sizeof(Interop.Advapi32.TRUSTED_DOMAIN_INFORMATION_EX));
                     Marshal.StructureToPtr(domainInfo, newInfo, false);
 
                     result = Interop.Advapi32.LsaSetTrustedDomainInfoByName(handle, trustedDomainName, Interop.Advapi32.TRUSTED_INFORMATION_CLASS.TrustedDomainInformationEx, newInfo);
@@ -462,7 +462,7 @@ internal static void CreateTrust(DirectoryContext sourceContext, string? sourceN
                     Marshal.PtrToStructure(info, domainInfo);
 
                     AuthData = new LSA_AUTH_INFORMATION();
-                    fileTime = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(FileTime)));
+                    fileTime = Marshal.AllocHGlobal(Marshal.SizeOf<FileTime>());
                     Interop.Kernel32.GetSystemTimeAsFileTime(fileTime);
 
                     // set the time
@@ -477,7 +477,7 @@ internal static void CreateTrust(DirectoryContext sourceContext, string? sourceN
                     AuthData.AuthInfo = unmanagedPassword;
                     AuthData.AuthInfoLength = password.Length * 2;          // sizeof(WCHAR)
 
-                    unmanagedAuthData = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(LSA_AUTH_INFORMATION)));
+                    unmanagedAuthData = Marshal.AllocHGlobal(Marshal.SizeOf<LSA_AUTH_INFORMATION>());
                     Marshal.StructureToPtr(AuthData, unmanagedAuthData, false);
 
                     Interop.Advapi32.TRUSTED_DOMAIN_AUTH_INFORMATION AuthInfoEx = default;
@@ -616,7 +616,7 @@ internal static unsafe string UpdateTrust(DirectoryContext context, string? sour
 
                     // change the attribute value properly
                     AuthData = new LSA_AUTH_INFORMATION();
-                    fileTime = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(FileTime)));
+                    fileTime = Marshal.AllocHGlobal(Marshal.SizeOf<FileTime>());
                     Interop.Kernel32.GetSystemTimeAsFileTime(fileTime);
 
                     // set the time
@@ -631,7 +631,7 @@ internal static unsafe string UpdateTrust(DirectoryContext context, string? sour
                     AuthData.AuthInfo = unmanagedPassword;
                     AuthData.AuthInfoLength = password.Length * 2;
 
-                    unmanagedAuthData = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(LSA_AUTH_INFORMATION)));
+                    unmanagedAuthData = Marshal.AllocHGlobal(Marshal.SizeOf<LSA_AUTH_INFORMATION>());
                     Marshal.StructureToPtr(AuthData, unmanagedAuthData, false);
 
                     Interop.Advapi32.TRUSTED_DOMAIN_AUTH_INFORMATION AuthInfoEx = default;
@@ -743,7 +743,7 @@ internal static unsafe void UpdateTrustDirection(DirectoryContext context, strin
 
                     // change the attribute value properly
                     AuthData = new LSA_AUTH_INFORMATION();
-                    fileTime = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(FileTime)));
+                    fileTime = Marshal.AllocHGlobal(Marshal.SizeOf<FileTime>());
                     Interop.Kernel32.GetSystemTimeAsFileTime(fileTime);
 
                     // set the time
@@ -758,7 +758,7 @@ internal static unsafe void UpdateTrustDirection(DirectoryContext context, strin
                     AuthData.AuthInfo = unmanagedPassword;
                     AuthData.AuthInfoLength = password.Length * 2;
 
-                    unmanagedAuthData = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(LSA_AUTH_INFORMATION)));
+                    unmanagedAuthData = Marshal.AllocHGlobal(Marshal.SizeOf<LSA_AUTH_INFORMATION>());
                     Marshal.StructureToPtr(AuthData, unmanagedAuthData, false);
 
                     Interop.Advapi32.TRUSTED_DOMAIN_AUTH_INFORMATION AuthInfoEx;
diff --git a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/Utils.cs b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/Utils.cs
index dfa3f1f1e614..56412ca15b02 100644
--- a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/Utils.cs
+++ b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/ActiveDirectory/Utils.cs
@@ -2038,7 +2038,7 @@ internal static bool IsSamUser()
         }
 
 
-        internal static IntPtr GetCurrentUserSid()
+        internal static unsafe IntPtr GetCurrentUserSid()
         {
             SafeTokenHandle? tokenHandle = null;
             IntPtr pBuffer = IntPtr.Zero;
@@ -2120,7 +2120,7 @@ out tokenHandle
                 }
 
                 // Retrieve the user's SID from the user info
-                global::Interop.TOKEN_USER tokenUser = (global::Interop.TOKEN_USER)Marshal.PtrToStructure(pBuffer, typeof(global::Interop.TOKEN_USER))!;
+                Interop.TOKEN_USER tokenUser = *(Interop.TOKEN_USER*)pBuffer;
                 IntPtr pUserSid = tokenUser.sidAndAttributes.Sid;   // this is a reference into the NATIVE memory (into pBuffer)
 
                 Debug.Assert(global::Interop.Advapi32.IsValidSid(pUserSid));
@@ -2147,7 +2147,7 @@ out tokenHandle
             }
         }
 
-        internal static IntPtr GetMachineDomainSid()
+        internal static unsafe IntPtr GetMachineDomainSid()
         {
             SafeLsaPolicyHandle? policyHandle = null;
             IntPtr pBuffer = IntPtr.Zero;
@@ -2178,8 +2178,7 @@ internal static IntPtr GetMachineDomainSid()
                 }
 
                 Debug.Assert(pBuffer != IntPtr.Zero);
-                POLICY_ACCOUNT_DOMAIN_INFO info = (POLICY_ACCOUNT_DOMAIN_INFO)
-                                    Marshal.PtrToStructure(pBuffer, typeof(POLICY_ACCOUNT_DOMAIN_INFO))!;
+                POLICY_ACCOUNT_DOMAIN_INFO info = *(POLICY_ACCOUNT_DOMAIN_INFO*)pBuffer;
 
                 Debug.Assert(global::Interop.Advapi32.IsValidSid(info.DomainSid));
 
@@ -2226,7 +2225,7 @@ internal static bool IsMachineDC(string? computerName)
                 }
 
                 DSROLE_PRIMARY_DOMAIN_INFO_BASIC dsRolePrimaryDomainInfo =
-                    (DSROLE_PRIMARY_DOMAIN_INFO_BASIC)Marshal.PtrToStructure(dsRoleInfoPtr, typeof(DSROLE_PRIMARY_DOMAIN_INFO_BASIC))!;
+                    Marshal.PtrToStructure<DSROLE_PRIMARY_DOMAIN_INFO_BASIC>(dsRoleInfoPtr)!;
 
                 return (dsRolePrimaryDomainInfo.MachineRole == DSROLE_MACHINE_ROLE.DsRole_RoleBackupDomainController ||
                              dsRolePrimaryDomainInfo.MachineRole == DSROLE_MACHINE_ROLE.DsRole_RolePrimaryDomainController);
@@ -2238,15 +2237,14 @@ internal static bool IsMachineDC(string? computerName)
             }
         }
 
-        internal static SidType ClassifySID(IntPtr pSid)
+        internal static unsafe SidType ClassifySID(IntPtr pSid)
         {
             Debug.Assert(global::Interop.Advapi32.IsValidSid(pSid));
 
             // Get the issuing authority and the first RID
             IntPtr pIdentAuth = global::Interop.Advapi32.GetSidIdentifierAuthority(pSid);
 
-            global::Interop.Advapi32.SID_IDENTIFIER_AUTHORITY identAuth =
-                (global::Interop.Advapi32.SID_IDENTIFIER_AUTHORITY)Marshal.PtrToStructure(pIdentAuth, typeof(global::Interop.Advapi32.SID_IDENTIFIER_AUTHORITY))!;
+            Interop.Advapi32.SID_IDENTIFIER_AUTHORITY identAuth = *(Interop.Advapi32.SID_IDENTIFIER_AUTHORITY*)pIdentAuth;
 
             IntPtr pRid = global::Interop.Advapi32.GetSidSubAuthority(pSid, 0);
             int rid = Marshal.ReadInt32(pRid);
diff --git a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/DirectorySearcher.cs b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/DirectorySearcher.cs
index 02f44bcb4765..cdc2a4951234 100644
--- a/src/libraries/System.DirectoryServices/src/System/DirectoryServices/DirectorySearcher.cs
+++ b/src/libraries/System.DirectoryServices/src/System/DirectoryServices/DirectorySearcher.cs
@@ -844,7 +844,7 @@ private unsafe void SetSearchPreferences(UnsafeNativeMethods.IDirectorySearch ad
                         ptrVLVContexToFree = vlvValue.contextID;
                         Marshal.Copy(_vlv.DirectoryVirtualListViewContext._context, 0, vlvValue.contextID, vlvValue.contextIDlength);
                     }
-                    IntPtr vlvPtr = Marshal.AllocHGlobal(Marshal.SizeOf(typeof(AdsVLV)));
+                    IntPtr vlvPtr = Marshal.AllocHGlobal(Marshal.SizeOf<AdsVLV>());
                     byte[] vlvBytes = new byte[Marshal.SizeOf(vlvValue)];
                     try
                     {
@@ -892,10 +892,10 @@ private unsafe void SetSearchPreferences(UnsafeNativeMethods.IDirectorySearch ad
             }
         }
 
-        private static void DoSetSearchPrefs(UnsafeNativeMethods.IDirectorySearch adsSearch, AdsSearchPreferenceInfo[] prefs)
+        private static unsafe void DoSetSearchPrefs(UnsafeNativeMethods.IDirectorySearch adsSearch, AdsSearchPreferenceInfo[] prefs)
         {
-            int structSize = Marshal.SizeOf(typeof(AdsSearchPreferenceInfo));
-            IntPtr ptr = Marshal.AllocHGlobal((IntPtr)(structSize * prefs.Length));
+            int structSize = sizeof(AdsSearchPreferenceInfo);
+            IntPtr ptr = Marshal.AllocHGlobal(structSize * prefs.Length);
             try
             {
                 IntPtr tempPtr = ptr;
diff --git a/src/libraries/System.IO.FileSystem.Watcher/src/System/IO/FileSystemWatcher.Linux.cs b/src/libraries/System.IO.FileSystem.Watcher/src/System/IO/FileSystemWatcher.Linux.cs
index 65dcefea25da..b09e0197e044 100644
--- a/src/libraries/System.IO.FileSystem.Watcher/src/System/IO/FileSystemWatcher.Linux.cs
+++ b/src/libraries/System.IO.FileSystem.Watcher/src/System/IO/FileSystemWatcher.Linux.cs
@@ -720,9 +720,9 @@ private bool ProcessEvent(NotifyEvent nextEvent, ref ReadOnlySpan<char> previous
 
                         break;
                     case Interop.Sys.NotifyEvents.IN_MOVED_TO:
-                        if (previousEventName != null)
+                        if (!previousEventName.IsEmpty)
                         {
-                            // If the previous name from IN_MOVED_FROM is non-null, then this is a rename.
+                            // If the previous name from IN_MOVED_FROM is non-empty, then this is a rename.
                             watcher.NotifyRenameEventArgs(WatcherChangeTypes.Renamed, expandedName, previousEventName);
                         }
                         else
diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.cs
index e57c7e8cf92e..70ac2e600651 100644
--- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.cs
+++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.cs
@@ -196,7 +196,7 @@ public static byte[] Hash(ReadOnlySpan<byte> source, long seed = 0)
         /// <param name="seed">The seed value for this hash computation. The default is zero.</param>
         /// <returns>
         ///   <see langword="true"/> if <paramref name="destination"/> is long enough to receive
-        ///   the computed hash value (4 bytes); otherwise, <see langword="false"/>.
+        ///   the computed hash value (8 bytes); otherwise, <see langword="false"/>.
         /// </returns>
         public static bool TryHash(ReadOnlySpan<byte> source, Span<byte> destination, out int bytesWritten, long seed = 0)
         {
diff --git a/src/libraries/System.IO.Packaging/src/System/IO/Packaging/ContentType.cs b/src/libraries/System.IO.Packaging/src/System/IO/Packaging/ContentType.cs
index c6fe72e7f047..894fd4826aa8 100644
--- a/src/libraries/System.IO.Packaging/src/System/IO/Packaging/ContentType.cs
+++ b/src/libraries/System.IO.Packaging/src/System/IO/Packaging/ContentType.cs
@@ -351,8 +351,6 @@ private void ParseParameterAndValue(ReadOnlySpan<char> parameterAndValue)
         /// <returns></returns>
         private static int GetLengthOfParameterValue(ReadOnlySpan<char> s, int startIndex)
         {
-            Debug.Assert(s != null);
-
             int length;
 
             //if the parameter value does not start with a '"' then,
diff --git a/src/libraries/System.IO.Pipes.AccessControl/ref/System.IO.Pipes.AccessControl.TypeForwards.cs b/src/libraries/System.IO.Pipes.AccessControl/ref/System.IO.Pipes.AccessControl.TypeForwards.cs
new file mode 100644
index 000000000000..4012db03b63b
--- /dev/null
+++ b/src/libraries/System.IO.Pipes.AccessControl/ref/System.IO.Pipes.AccessControl.TypeForwards.cs
@@ -0,0 +1,4 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+[assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.IO.Pipes.PipeAccessRights))]
diff --git a/src/libraries/System.IO.Pipes.AccessControl/ref/System.IO.Pipes.AccessControl.cs b/src/libraries/System.IO.Pipes.AccessControl/ref/System.IO.Pipes.AccessControl.cs
index 775004246a5e..4d35d22a6719 100644
--- a/src/libraries/System.IO.Pipes.AccessControl/ref/System.IO.Pipes.AccessControl.cs
+++ b/src/libraries/System.IO.Pipes.AccessControl/ref/System.IO.Pipes.AccessControl.cs
@@ -14,27 +14,6 @@ public static class NamedPipeServerStreamAcl
     {
         public static System.IO.Pipes.NamedPipeServerStream Create(string pipeName, System.IO.Pipes.PipeDirection direction, int maxNumberOfServerInstances, System.IO.Pipes.PipeTransmissionMode transmissionMode, System.IO.Pipes.PipeOptions options, int inBufferSize, int outBufferSize, System.IO.Pipes.PipeSecurity? pipeSecurity, System.IO.HandleInheritability inheritability = System.IO.HandleInheritability.None, System.IO.Pipes.PipeAccessRights additionalAccessRights = default) { throw null; }
     }
-    [System.FlagsAttribute]
-    public enum PipeAccessRights
-    {
-        ReadData = 1,
-        WriteData = 2,
-        CreateNewInstance = 4,
-        ReadExtendedAttributes = 8,
-        WriteExtendedAttributes = 16,
-        ReadAttributes = 128,
-        WriteAttributes = 256,
-        Write = 274,
-        Delete = 65536,
-        ReadPermissions = 131072,
-        Read = 131209,
-        ReadWrite = 131483,
-        ChangePermissions = 262144,
-        TakeOwnership = 524288,
-        Synchronize = 1048576,
-        FullControl = 2032031,
-        AccessSystemSecurity = 16777216,
-    }
     public sealed partial class PipeAccessRule : System.Security.AccessControl.AccessRule
     {
         public PipeAccessRule(System.Security.Principal.IdentityReference identity, System.IO.Pipes.PipeAccessRights rights, System.Security.AccessControl.AccessControlType type) : base (default(System.Security.Principal.IdentityReference), default(int), default(bool), default(System.Security.AccessControl.InheritanceFlags), default(System.Security.AccessControl.PropagationFlags), default(System.Security.AccessControl.AccessControlType)) { }
diff --git a/src/libraries/System.IO.Pipes.AccessControl/ref/System.IO.Pipes.AccessControl.csproj b/src/libraries/System.IO.Pipes.AccessControl/ref/System.IO.Pipes.AccessControl.csproj
index 24b3086a23d0..d16c599454b6 100644
--- a/src/libraries/System.IO.Pipes.AccessControl/ref/System.IO.Pipes.AccessControl.csproj
+++ b/src/libraries/System.IO.Pipes.AccessControl/ref/System.IO.Pipes.AccessControl.csproj
@@ -5,6 +5,7 @@
 
   <ItemGroup>
     <Compile Include="System.IO.Pipes.AccessControl.cs" />
+    <Compile Include="System.IO.Pipes.AccessControl.TypeForwards.cs" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/src/libraries/System.IO.Pipes/ref/System.IO.Pipes.cs b/src/libraries/System.IO.Pipes/ref/System.IO.Pipes.cs
index f7b404b93a50..85387dbbd4b6 100644
--- a/src/libraries/System.IO.Pipes/ref/System.IO.Pipes.cs
+++ b/src/libraries/System.IO.Pipes/ref/System.IO.Pipes.cs
@@ -45,6 +45,8 @@ public sealed partial class NamedPipeClientStream : System.IO.Pipes.PipeStream
         public NamedPipeClientStream(System.IO.Pipes.PipeDirection direction, bool isAsync, bool isConnected, Microsoft.Win32.SafeHandles.SafePipeHandle safePipeHandle) : base (default(System.IO.Pipes.PipeDirection), default(int)) { }
         public NamedPipeClientStream(string pipeName) : base (default(System.IO.Pipes.PipeDirection), default(int)) { }
         public NamedPipeClientStream(string serverName, string pipeName) : base (default(System.IO.Pipes.PipeDirection), default(int)) { }
+        [System.Runtime.Versioning.SupportedOSPlatformAttribute("windows")]
+        public NamedPipeClientStream(string serverName, string pipeName, System.IO.Pipes.PipeAccessRights desiredAccessRights, PipeOptions options, System.Security.Principal.TokenImpersonationLevel impersonationLevel, HandleInheritability inheritability) : base(default(System.IO.Pipes.PipeDirection), default(int)) { }
         public NamedPipeClientStream(string serverName, string pipeName, System.IO.Pipes.PipeDirection direction) : base (default(System.IO.Pipes.PipeDirection), default(int)) { }
         public NamedPipeClientStream(string serverName, string pipeName, System.IO.Pipes.PipeDirection direction, System.IO.Pipes.PipeOptions options) : base (default(System.IO.Pipes.PipeDirection), default(int)) { }
         public NamedPipeClientStream(string serverName, string pipeName, System.IO.Pipes.PipeDirection direction, System.IO.Pipes.PipeOptions options, System.Security.Principal.TokenImpersonationLevel impersonationLevel) : base (default(System.IO.Pipes.PipeDirection), default(int)) { }
@@ -82,6 +84,27 @@ public void WaitForConnection() { }
         public System.Threading.Tasks.Task WaitForConnectionAsync() { throw null; }
         public System.Threading.Tasks.Task WaitForConnectionAsync(System.Threading.CancellationToken cancellationToken) { throw null; }
     }
+    [System.FlagsAttribute]
+    public enum PipeAccessRights
+    {
+        ReadData = 1,
+        WriteData = 2,
+        CreateNewInstance = 4,
+        ReadExtendedAttributes = 8,
+        WriteExtendedAttributes = 16,
+        ReadAttributes = 128,
+        WriteAttributes = 256,
+        Write = 274,
+        Delete = 65536,
+        ReadPermissions = 131072,
+        Read = 131209,
+        ReadWrite = 131483,
+        ChangePermissions = 262144,
+        TakeOwnership = 524288,
+        Synchronize = 1048576,
+        FullControl = 2032031,
+        AccessSystemSecurity = 16777216,
+    }
     public enum PipeDirection
     {
         In = 1,
diff --git a/src/libraries/System.IO.Pipes/src/CompatibilitySuppressions.xml b/src/libraries/System.IO.Pipes/src/CompatibilitySuppressions.xml
index a047c21f84ff..db08f006c4c5 100644
--- a/src/libraries/System.IO.Pipes/src/CompatibilitySuppressions.xml
+++ b/src/libraries/System.IO.Pipes/src/CompatibilitySuppressions.xml
@@ -1,7 +1,6 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <!-- https://learn.microsoft.com/en-us/dotnet/fundamentals/package-validation/diagnostic-ids -->
 <Suppressions xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
-  <!-- Exposed public in System.IO.Pipes.AccessControl but implemented in System.IO.Pipes. -->
   <Suppression>
     <DiagnosticId>CP0001</DiagnosticId>
     <Target>T:System.IO.Pipes.AnonymousPipeServerStreamAcl</Target>
@@ -14,12 +13,6 @@
     <Left>ref/net9.0/System.IO.Pipes.dll</Left>
     <Right>runtimes/win/lib/net9.0/System.IO.Pipes.dll</Right>
   </Suppression>
-  <Suppression>
-    <DiagnosticId>CP0001</DiagnosticId>
-    <Target>T:System.IO.Pipes.PipeAccessRights</Target>
-    <Left>ref/net9.0/System.IO.Pipes.dll</Left>
-    <Right>runtimes/win/lib/net9.0/System.IO.Pipes.dll</Right>
-  </Suppression>
   <Suppression>
     <DiagnosticId>CP0001</DiagnosticId>
     <Target>T:System.IO.Pipes.PipeAccessRule</Target>
diff --git a/src/libraries/System.IO.Pipes/src/Resources/Strings.resx b/src/libraries/System.IO.Pipes/src/Resources/Strings.resx
index 8910eb4d7ac4..e50c4aa31441 100644
--- a/src/libraries/System.IO.Pipes/src/Resources/Strings.resx
+++ b/src/libraries/System.IO.Pipes/src/Resources/Strings.resx
@@ -120,6 +120,9 @@
   <data name="ArgumentOutOfRange_NeedValidPipeAccessRights" xml:space="preserve">
     <value>Invalid PipeAccessRights value.</value>
   </data>
+  <data name="PlatformNotSupported_PipeAccessRights" xml:space="preserve">
+    <value>Specifying PipeAccessRights is not supported on this platform.</value>
+  </data>
   <data name="Argument_NonContainerInvalidAnyFlag" xml:space="preserve">
     <value>This flag may not be set on a pipe.</value>
   </data>
diff --git a/src/libraries/System.IO.Pipes/src/System.IO.Pipes.csproj b/src/libraries/System.IO.Pipes/src/System.IO.Pipes.csproj
index d3d1d3152e2e..b36765a03683 100644
--- a/src/libraries/System.IO.Pipes/src/System.IO.Pipes.csproj
+++ b/src/libraries/System.IO.Pipes/src/System.IO.Pipes.csproj
@@ -19,6 +19,7 @@
     <Compile Include="System\IO\Pipes\AnonymousPipeServerStream.cs" />
     <Compile Include="System\IO\Pipes\NamedPipeClientStream.cs" />
     <Compile Include="System\IO\Pipes\NamedPipeServerStream.cs" />
+    <Compile Include="System\IO\Pipes\PipeAccessRights.cs" />
     <Compile Include="System\IO\Pipes\PipeDirection.cs" />
     <Compile Include="System\IO\Pipes\PipeOptions.cs" />
     <Compile Include="System\IO\Pipes\PipeState.cs" />
@@ -116,7 +117,6 @@
     <Compile Include="System\IO\Pipes\NamedPipeClientStream.Windows.cs" />
     <Compile Include="System\IO\Pipes\NamedPipeServerStream.Windows.cs" />
     <Compile Include="System\IO\Pipes\NamedPipeServerStream.Win32.cs" />
-    <Compile Include="System\IO\Pipes\PipeAccessRights.cs" />
     <Compile Include="System\IO\Pipes\PipeAccessRule.cs" />
     <Compile Include="System\IO\Pipes\PipeAuditRule.cs" />
     <Compile Include="System\IO\Pipes\PipesAclExtensions.cs" />
diff --git a/src/libraries/System.IO.Pipes/src/System/IO/Pipes/NamedPipeClientStream.Unix.cs b/src/libraries/System.IO.Pipes/src/System/IO/Pipes/NamedPipeClientStream.Unix.cs
index 2dc43385bb6e..4c7df8d63e73 100644
--- a/src/libraries/System.IO.Pipes/src/System/IO/Pipes/NamedPipeClientStream.Unix.cs
+++ b/src/libraries/System.IO.Pipes/src/System/IO/Pipes/NamedPipeClientStream.Unix.cs
@@ -7,6 +7,7 @@
 using System.Runtime.InteropServices;
 using System.Runtime.Versioning;
 using System.Security;
+using System.Security.Principal;
 using System.Threading;
 using Microsoft.Win32.SafeHandles;
 
@@ -18,6 +19,16 @@ namespace System.IO.Pipes
     /// </summary>
     public sealed partial class NamedPipeClientStream : PipeStream
     {
+        [System.Runtime.Versioning.SupportedOSPlatform("windows")]
+        public NamedPipeClientStream(string serverName, string pipeName, PipeAccessRights desiredAccessRights,
+            PipeOptions options, TokenImpersonationLevel impersonationLevel, HandleInheritability inheritability)
+            : base(PipeDirection.InOut, 0)
+        {
+            throw new PlatformNotSupportedException(SR.PlatformNotSupported_PipeAccessRights);
+        }
+
+        private static int AccessRightsFromDirection(PipeDirection _) => 0;
+
         private bool TryConnect(int _ /* timeout */)
         {
             // timeout isn't used as Connect will be very fast,
diff --git a/src/libraries/System.IO.Pipes/src/System/IO/Pipes/NamedPipeClientStream.Windows.cs b/src/libraries/System.IO.Pipes/src/System/IO/Pipes/NamedPipeClientStream.Windows.cs
index 6f612bbc904b..7ffb560a3590 100644
--- a/src/libraries/System.IO.Pipes/src/System/IO/Pipes/NamedPipeClientStream.Windows.cs
+++ b/src/libraries/System.IO.Pipes/src/System/IO/Pipes/NamedPipeClientStream.Windows.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Versioning;
 using System.Security.Principal;
@@ -16,6 +17,81 @@ namespace System.IO.Pipes
     /// </summary>
     public sealed partial class NamedPipeClientStream : PipeStream
     {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="NamedPipeClientStream"/> class with the specified pipe and server names,
+        /// the desired <see cref="PipeAccessRights"/>, and the specified impersonation level and inheritability.
+        /// </summary>
+        /// <param name="serverName">The name of the remote computer to connect to, or "." to specify the local computer.</param>
+        /// <param name="pipeName">The name of the pipe.</param>
+        /// <param name="desiredAccessRights">One of the enumeration values that specifies the desired access rights of the pipe.</param>
+        /// <param name="options">One of the enumeration values that determines how to open or create the pipe.</param>
+        /// <param name="impersonationLevel">One of the enumeration values that determines the security impersonation level.</param>
+        /// <param name="inheritability">One of the enumeration values that determines whether the underlying handle will be inheritable by child processes.</param>
+        /// <exception cref="ArgumentNullException"><paramref name="pipeName"/> or <paramref name="serverName"/> is <c>null</c>.</exception>
+        /// <exception cref="ArgumentException"><paramref name="pipeName"/> or <paramref name="serverName"/> is a zero-length string.</exception>
+        /// <exception cref="ArgumentOutOfRangeException"><paramref name="pipeName"/> is set to "anonymous".</exception>
+        /// <exception cref="ArgumentOutOfRangeException"><paramref name="desiredAccessRights"/> is not a valid <see cref="PipeAccessRights"/> value.</exception>
+        /// <exception cref="ArgumentOutOfRangeException"><paramref name="options"/> is not a valid <see cref="PipeOptions"/> value.</exception>
+        /// <exception cref="ArgumentOutOfRangeException"><paramref name="impersonationLevel"/> is not a valid <see cref="TokenImpersonationLevel"/> value.</exception>
+        /// <exception cref="ArgumentOutOfRangeException"><paramref name="inheritability"/> is not a valid <see cref="HandleInheritability"/> value.</exception>
+        /// <remarks>
+        /// The pipe direction for this constructor is determined by the <paramref name="desiredAccessRights"/> parameter.
+        /// If the <paramref name="desiredAccessRights"/> parameter specifies <see cref="PipeAccessRights.ReadData"/>,
+        /// the pipe direction is <see cref="PipeDirection.In"/>. If the <paramref name="desiredAccessRights"/> parameter
+        /// specifies <see cref="PipeAccessRights.WriteData"/>, the pipe direction is <see cref="PipeDirection.Out"/>.
+        /// If the value of <paramref name="desiredAccessRights"/> specifies both <see cref="PipeAccessRights.ReadData"/>
+        /// and <see cref="PipeAccessRights.WriteData"/>, the pipe direction is <see cref="PipeDirection.InOut"/>.
+        /// </remarks>
+        [System.Runtime.Versioning.SupportedOSPlatform("windows")]
+        public NamedPipeClientStream(string serverName, string pipeName, PipeAccessRights desiredAccessRights,
+            PipeOptions options, TokenImpersonationLevel impersonationLevel, HandleInheritability inheritability)
+            : this(serverName, pipeName, DirectionFromRights(desiredAccessRights), options, impersonationLevel, inheritability)
+        {
+            _accessRights = (int)desiredAccessRights;
+        }
+
+        private static PipeDirection DirectionFromRights(PipeAccessRights desiredAccessRights, [CallerArgumentExpression(nameof(desiredAccessRights))] string? argumentName = null)
+        {
+            // Validate the desiredAccessRights parameter here to ensure an invalid value does not result
+            // in an argument exception being thrown for the direction argument
+            // Throw if there are any unrecognized bits
+            // Throw if neither ReadData nor WriteData are specified, as this will result in an invalid PipeDirection
+            if ((desiredAccessRights & ~(PipeAccessRights.FullControl | PipeAccessRights.AccessSystemSecurity)) != 0 ||
+                ((desiredAccessRights & (PipeAccessRights.ReadData | PipeAccessRights.WriteData)) == 0))
+            {
+                throw new ArgumentOutOfRangeException(argumentName, SR.ArgumentOutOfRange_NeedValidPipeAccessRights);
+            }
+
+            PipeDirection direction = 0;
+
+            if ((desiredAccessRights & PipeAccessRights.ReadData) != 0)
+            {
+                direction |= PipeDirection.In;
+            }
+            if ((desiredAccessRights & PipeAccessRights.WriteData) != 0)
+            {
+                direction |= PipeDirection.Out;
+            }
+
+            return direction;
+        }
+
+        private static int AccessRightsFromDirection(PipeDirection direction)
+        {
+            int access = 0;
+
+            if ((PipeDirection.In & direction) != 0)
+            {
+                access |= Interop.Kernel32.GenericOperations.GENERIC_READ;
+            }
+            if ((PipeDirection.Out & direction) != 0)
+            {
+                access |= Interop.Kernel32.GenericOperations.GENERIC_WRITE;
+            }
+
+            return access;
+        }
+
         // Waits for a pipe instance to become available. This method may return before WaitForConnection is called
         // on the server end, but WaitForConnection will not return until we have returned.  Any data written to the
         // pipe by us after we have connected but before the server has called WaitForConnection will be available
@@ -34,17 +110,7 @@ private bool TryConnect(int timeout)
                 _pipeFlags |= (((int)_impersonationLevel - 1) << 16);
             }
 
-            int access = 0;
-            if ((PipeDirection.In & _direction) != 0)
-            {
-                access |= Interop.Kernel32.GenericOperations.GENERIC_READ;
-            }
-            if ((PipeDirection.Out & _direction) != 0)
-            {
-                access |= Interop.Kernel32.GenericOperations.GENERIC_WRITE;
-            }
-
-            SafePipeHandle handle = CreateNamedPipeClient(_normalizedPipePath, ref secAttrs, _pipeFlags, access);
+            SafePipeHandle handle = CreateNamedPipeClient(_normalizedPipePath, ref secAttrs, _pipeFlags, _accessRights);
 
             if (handle.IsInvalid)
             {
@@ -81,7 +147,7 @@ private bool TryConnect(int timeout)
                 }
 
                 // Pipe server should be free. Let's try to connect to it.
-                handle = CreateNamedPipeClient(_normalizedPipePath, ref secAttrs, _pipeFlags, access);
+                handle = CreateNamedPipeClient(_normalizedPipePath, ref secAttrs, _pipeFlags, _accessRights);
 
                 if (handle.IsInvalid)
                 {
diff --git a/src/libraries/System.IO.Pipes/src/System/IO/Pipes/NamedPipeClientStream.cs b/src/libraries/System.IO.Pipes/src/System/IO/Pipes/NamedPipeClientStream.cs
index 35681b35b89b..26db55330317 100644
--- a/src/libraries/System.IO.Pipes/src/System/IO/Pipes/NamedPipeClientStream.cs
+++ b/src/libraries/System.IO.Pipes/src/System/IO/Pipes/NamedPipeClientStream.cs
@@ -24,6 +24,7 @@ public sealed partial class NamedPipeClientStream : PipeStream
         private readonly PipeOptions _pipeOptions;
         private readonly HandleInheritability _inheritability;
         private readonly PipeDirection _direction;
+        private readonly int _accessRights;
 
         // Creates a named pipe client using default server (same machine, or "."), and PipeDirection.InOut
         public NamedPipeClientStream(string pipeName)
@@ -84,6 +85,7 @@ public NamedPipeClientStream(string serverName, string pipeName, PipeDirection d
             _inheritability = inheritability;
             _impersonationLevel = impersonationLevel;
             _pipeOptions = options;
+            _accessRights = AccessRightsFromDirection(direction);
         }
 
         // Create a NamedPipeClientStream from an existing server pipe handle.
diff --git a/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.CreateClient.Unix.cs b/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.CreateClient.Unix.cs
new file mode 100644
index 000000000000..15e84f2ffeff
--- /dev/null
+++ b/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.CreateClient.Unix.cs
@@ -0,0 +1,35 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Security.Principal;
+using Microsoft.Win32.SafeHandles;
+using Xunit;
+
+namespace System.IO.Pipes.Tests
+{
+    /// <summary>
+    /// Unix-specific tests for the constructors for NamedPipeClientStream
+    /// </summary>
+    public partial class NamedPipeTest_CreateClient
+    {
+        [Fact]
+        public static void NotSupportedPipeAccessRights_Throws_PlatformNotSupportedException()
+        {
+            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream(".", "client1", PipeAccessRights.FullControl, PipeOptions.None, TokenImpersonationLevel.None, HandleInheritability.None));
+        }
+
+        [Fact]
+        public static void NotSupportedPipePath_Throws_PlatformNotSupportedException()
+        {
+            string hostName;
+            Assert.True(InteropTest.TryGetHostName(out hostName));
+
+            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream("foobar" + hostName, "foobar"));
+            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream(hostName, "foobar" + Path.GetInvalidFileNameChars()[0]));
+            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream(hostName, "/tmp/foo\0bar"));
+            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream(hostName, "/tmp/foobar/"));
+            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream(hostName, "/"));
+            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream(hostName, "\0"));
+        }
+    }
+}
diff --git a/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.CreateClient.Windows.cs b/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.CreateClient.Windows.cs
new file mode 100644
index 000000000000..93fef211c881
--- /dev/null
+++ b/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.CreateClient.Windows.cs
@@ -0,0 +1,69 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Security.Principal;
+using Microsoft.Win32.SafeHandles;
+using Xunit;
+
+namespace System.IO.Pipes.Tests
+{
+    /// <summary>
+    /// Windows-specific tests for the constructors for NamedPipeClientStream
+    /// </summary>
+    public partial class NamedPipeTest_CreateClient
+    {
+        [Fact]
+        public static void EmptyStringPipeName_Throws_ArgumentException_WithAccessRights()
+        {
+            AssertExtensions.Throws<ArgumentException>("pipeName", () => new NamedPipeClientStream(".", "", PipeAccessRights.FullControl, PipeOptions.None, TokenImpersonationLevel.None, HandleInheritability.None));
+        }
+
+        [Fact]
+        public static void NullServerName_Throws_ArgumentNullException_WithAccessRights()
+        {
+            AssertExtensions.Throws<ArgumentNullException>("serverName", () => new NamedPipeClientStream(null, "client1", PipeAccessRights.FullControl, PipeOptions.None, TokenImpersonationLevel.None, HandleInheritability.None));
+        }
+
+        [Fact]
+        public static void EmptyStringServerName_Throws_ArgumentException_WithAccessRights()
+        {
+            AssertExtensions.Throws<ArgumentException>(null, () => new NamedPipeClientStream("", "client1", PipeAccessRights.FullControl, PipeOptions.None, TokenImpersonationLevel.None, HandleInheritability.None));
+        }
+
+        [Fact]
+        public static void ReservedPipeName_Throws_ArgumentOutOfRangeException_WithAccessRights()
+        {
+            AssertExtensions.Throws<ArgumentOutOfRangeException>("pipeName", () => new NamedPipeClientStream(".", "anonymous", PipeAccessRights.FullControl, PipeOptions.None, TokenImpersonationLevel.None, HandleInheritability.None));
+        }
+
+        [Theory]
+        [InlineData(0)]  // No bits set
+        [InlineData(32)] // Invalid bit
+        [InlineData(32 + (int)PipeAccessRights.ReadData)] // ReadData plus an invalid bit
+        [InlineData(32 + (int)PipeAccessRights.WriteData)] // WriteData plus an invalid bit
+        [InlineData((int)PipeAccessRights.WriteAttributes)] // Missing ReadData and WriteData (no direction can be determined)
+        public static void InvalidPipeAccessRights_Throws_ArgumentOutOfRangeException(int rights)
+        {
+            AssertExtensions.Throws<ArgumentOutOfRangeException>("desiredAccessRights", () => new NamedPipeClientStream(".", "client1", (PipeAccessRights)rights, PipeOptions.None, TokenImpersonationLevel.None, HandleInheritability.None));
+        }
+
+        [Fact]
+        public static void InvalidPipeOptions_Throws_ArgumentOutOfRangeException_WithAccessRights()
+        {
+            AssertExtensions.Throws<ArgumentOutOfRangeException>("options", () => new NamedPipeClientStream(".", "client1", PipeAccessRights.FullControl, (PipeOptions)255, TokenImpersonationLevel.None, HandleInheritability.None));
+        }
+
+        [Fact]
+        public static void InvalidImpersonationLevel_Throws_ArgumentOutOfRangeException_WithAccessRights()
+        {
+            AssertExtensions.Throws<ArgumentOutOfRangeException>("impersonationLevel", () => new NamedPipeClientStream(".", "client1", PipeAccessRights.FullControl, PipeOptions.None, (TokenImpersonationLevel)999, HandleInheritability.None));
+        }
+
+        [Fact]
+        public static void NamedPipeClientStream_InvalidHandleInerhitability_WithAccessRights()
+        {
+            AssertExtensions.Throws<ArgumentOutOfRangeException>("inheritability", () => new NamedPipeClientStream("a", "b", PipeAccessRights.FullControl, 0, TokenImpersonationLevel.Delegation, HandleInheritability.None - 1));
+            AssertExtensions.Throws<ArgumentOutOfRangeException>("inheritability", () => new NamedPipeClientStream("a", "b", PipeAccessRights.FullControl, 0, TokenImpersonationLevel.Delegation, HandleInheritability.Inheritable + 1));
+        }
+    }
+}
diff --git a/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.CreateClient.cs b/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.CreateClient.cs
index 362ddd80be88..5b164b849fac 100644
--- a/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.CreateClient.cs
+++ b/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.CreateClient.cs
@@ -10,7 +10,7 @@ namespace System.IO.Pipes.Tests
     /// <summary>
     /// Tests for the constructors for NamedPipeClientStream
     /// </summary>
-    public class NamedPipeTest_CreateClient
+    public partial class NamedPipeTest_CreateClient
     {
         [Fact]
         public static void NullPipeName_Throws_ArgumentNullException()
@@ -65,21 +65,6 @@ public static void ReservedPipeName_Throws_ArgumentOutOfRangeException(PipeDirec
             AssertExtensions.Throws<ArgumentOutOfRangeException>("pipeName", () => new NamedPipeClientStream(serverName, reservedName, direction, PipeOptions.None, TokenImpersonationLevel.Impersonation));
         }
 
-        [Fact]
-        [PlatformSpecific(TestPlatforms.AnyUnix)]  // Not supported pipe path throws PNSE on Unix
-        public static void NotSupportedPipePath_Throws_PlatformNotSupportedException()
-        {
-            string hostName;
-            Assert.True(InteropTest.TryGetHostName(out hostName));
-
-            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream("foobar" + hostName, "foobar"));
-            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream(hostName, "foobar" + Path.GetInvalidFileNameChars()[0]));
-            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream(hostName, "/tmp/foo\0bar"));
-            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream(hostName, "/tmp/foobar/"));
-            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream(hostName, "/"));
-            Assert.Throws<PlatformNotSupportedException>(() => new NamedPipeClientStream(hostName, "\0"));
-        }
-
         [Theory]
         [InlineData((PipeDirection)123)]
         public static void InvalidPipeDirection_Throws_ArgumentOutOfRangeException(PipeDirection direction)
@@ -155,7 +140,7 @@ public static void BadHandleKind_Throws_IOException(PipeDirection direction)
         }
 
         [Fact]
-        public void NamedPipeClientStream_InvalidHandleInerhitability()
+        public static void NamedPipeClientStream_InvalidHandleInerhitability()
         {
             AssertExtensions.Throws<ArgumentOutOfRangeException>("inheritability", () => new NamedPipeClientStream("a", "b", PipeDirection.Out, 0, TokenImpersonationLevel.Delegation, HandleInheritability.None - 1));
             AssertExtensions.Throws<ArgumentOutOfRangeException>("inheritability", () => new NamedPipeClientStream("a", "b", PipeDirection.Out, 0, TokenImpersonationLevel.Delegation, HandleInheritability.Inheritable + 1));
diff --git a/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.MessageMode.Windows.cs b/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.MessageMode.Windows.cs
new file mode 100644
index 000000000000..ff28c9585587
--- /dev/null
+++ b/src/libraries/System.IO.Pipes/tests/NamedPipeTests/NamedPipeTest.MessageMode.Windows.cs
@@ -0,0 +1,102 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.IO.Tests;
+using System.Linq;
+using System.Runtime.InteropServices;
+using System.Threading;
+using System.Threading.Tasks;
+using Xunit;
+
+namespace System.IO.Pipes.Tests
+{
+    // Support for PipeAccessRights and setting ReadMode to Message is only supported on Windows
+    public class NamedPipeTest_MessageMode_Windows
+    {
+        private const PipeAccessRights MinimumMessageAccessRights = PipeAccessRights.ReadData | PipeAccessRights.WriteAttributes;
+
+        private static NamedPipeClientStream CreateClientStream(string pipeName, PipeOptions options) =>
+            new NamedPipeClientStream(".", pipeName, MinimumMessageAccessRights, options, Security.Principal.TokenImpersonationLevel.None, HandleInheritability.None);
+
+        [Theory]
+        [InlineData(PipeDirection.Out, PipeOptions.None)]
+        [InlineData(PipeDirection.InOut, PipeOptions.Asynchronous)]
+        public async Task Client_DetectsMessageCompleted(PipeDirection serverDirection, PipeOptions options)
+        {
+            string pipeName = PipeStreamConformanceTests.GetUniquePipeName();
+
+            using NamedPipeServerStream server = new NamedPipeServerStream(pipeName, serverDirection, 1, PipeTransmissionMode.Message, options);
+            using NamedPipeClientStream client = CreateClientStream(pipeName, options);
+
+            Task.WaitAll(server.WaitForConnectionAsync(), client.ConnectAsync());
+            client.ReadMode = PipeTransmissionMode.Message;
+
+            ValueTask serverWrite = server.WriteAsync(new byte[] { 1, 2, 3, 4, 5 });
+
+            byte[] buffer1 = new byte[2], buffer2 = new byte[2], buffer3 = new byte[2];
+            bool[] messageCompleted = new bool[3];
+
+            int bytesRead = client.Read(buffer1, 0, 2);
+            messageCompleted[0] = client.IsMessageComplete;
+
+            bytesRead += client.Read(buffer2, 0, 2);
+            messageCompleted[1] = client.IsMessageComplete;
+
+            bytesRead += client.Read(buffer3, 0, 2);
+            messageCompleted[2] = client.IsMessageComplete;
+
+            Assert.Equal(5, bytesRead);
+            Assert.Equal(new byte[] { 1, 2, 3, 4, 5, 0 }, buffer1.Concat(buffer2).Concat(buffer3));
+            Assert.Equal(new bool[] { false, false, true }, messageCompleted);
+
+            await serverWrite;
+        }
+
+        [Theory]
+        [InlineData(PipeTransmissionMode.Byte, PipeOptions.None)]
+        [InlineData(PipeTransmissionMode.Message, PipeOptions.None)]
+        [InlineData(PipeTransmissionMode.Byte, PipeOptions.Asynchronous)]
+        [InlineData(PipeTransmissionMode.Message, PipeOptions.Asynchronous)]
+        public void ServerIn_ClientConnect_Throws(PipeTransmissionMode serverMode, PipeOptions options)
+        {
+            string pipeName = PipeStreamConformanceTests.GetUniquePipeName();
+
+            using NamedPipeServerStream server = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, serverMode, options);
+            using NamedPipeClientStream client = CreateClientStream(pipeName, options);
+
+            Assert.Throws<UnauthorizedAccessException>(() => client.Connect());
+        }
+
+        [Theory]
+        [InlineData(PipeDirection.Out, PipeTransmissionMode.Byte, PipeOptions.None)]
+        [InlineData(PipeDirection.Out, PipeTransmissionMode.Byte, PipeOptions.Asynchronous)]
+        [InlineData(PipeDirection.InOut, PipeTransmissionMode.Byte, PipeOptions.None)]
+        [InlineData(PipeDirection.InOut, PipeTransmissionMode.Byte, PipeOptions.Asynchronous)]
+        public void ServerByteMode_ClientReadModeMessage_Throws(PipeDirection serverDirection, PipeTransmissionMode serverMode, PipeOptions options)
+        {
+            string pipeName = PipeStreamConformanceTests.GetUniquePipeName();
+
+            using NamedPipeServerStream server = new NamedPipeServerStream(pipeName, serverDirection, 1, serverMode, options);
+            using NamedPipeClientStream client = CreateClientStream(pipeName, options);
+
+            Task.WaitAll(server.WaitForConnectionAsync(), client.ConnectAsync());
+
+            Assert.Throws<IOException>(() => client.ReadMode = PipeTransmissionMode.Message);
+        }
+
+        [Fact]
+        public void PipeAccessRights_Without_WriteAttributes_ClientReadModeMessage_Throws()
+        {
+            string pipeName = PipeStreamConformanceTests.GetUniquePipeName();
+            PipeAccessRights rights = MinimumMessageAccessRights & ~PipeAccessRights.WriteAttributes;
+
+            using NamedPipeServerStream server = new NamedPipeServerStream(pipeName, PipeDirection.InOut, 1, PipeTransmissionMode.Message);
+            using NamedPipeClientStream client = new NamedPipeClientStream(".", pipeName, rights, PipeOptions.None, Security.Principal.TokenImpersonationLevel.None, HandleInheritability.None);
+
+            Task.WaitAll(server.WaitForConnectionAsync(), client.ConnectAsync());
+
+            Assert.Throws<UnauthorizedAccessException>(() => client.ReadMode = PipeTransmissionMode.Message);
+        }
+    }
+}
diff --git a/src/libraries/System.IO.Pipes/tests/System.IO.Pipes.Tests.csproj b/src/libraries/System.IO.Pipes/tests/System.IO.Pipes.Tests.csproj
index ef930d859fe1..6d498058d831 100644
--- a/src/libraries/System.IO.Pipes/tests/System.IO.Pipes.Tests.csproj
+++ b/src/libraries/System.IO.Pipes/tests/System.IO.Pipes.Tests.csproj
@@ -25,7 +25,9 @@
     <Compile Include="$(CommonTestPath)System\Threading\Tasks\TaskTimeoutExtensions.cs" Link="Common\System\Threading\Tasks\TaskTimeoutExtensions.cs" />
   </ItemGroup>
   <ItemGroup Condition="'$(TargetPlatformIdentifier)' == 'windows'">
+    <Compile Include="NamedPipeTests\NamedPipeTest.CreateClient.Windows.cs" />
     <Compile Include="NamedPipeTests\NamedPipeTest.CurrentUserOnly.Windows.cs" />
+    <Compile Include="NamedPipeTests\NamedPipeTest.MessageMode.Windows.cs" />
     <Compile Include="NamedPipeTests\NamedPipeTest.RunAsClient.Windows.cs" />
     <Compile Include="InteropTest.Windows.cs" />
     <Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.CancelIoEx.cs" Link="Common\Interop\Windows\Interop.CancelIoEx.cs" />
@@ -36,6 +38,7 @@
     <Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.MaxLengths.cs" Link="Common\Interop\Windows\Interop.MaxLengths.cs" />
   </ItemGroup>
   <ItemGroup Condition="'$(TargetPlatformIdentifier)' == 'unix'">
+    <Compile Include="NamedPipeTests\NamedPipeTest.CreateClient.Unix.cs" />
     <Compile Include="NamedPipeTests\NamedPipeTest.CurrentUserOnly.Unix.cs" />
     <Compile Include="NamedPipeTests\NamedPipeTest.UnixDomainSockets.cs" />
     <Compile Include="NamedPipeTests\NamedPipeTest.RunAsClient.Unix.cs" />
diff --git a/src/libraries/System.IO.Ports/src/System/IO/Ports/SerialPort.cs b/src/libraries/System.IO.Ports/src/System/IO/Ports/SerialPort.cs
index 83a9a02926bd..752163fd2bd3 100644
--- a/src/libraries/System.IO.Ports/src/System/IO/Ports/SerialPort.cs
+++ b/src/libraries/System.IO.Ports/src/System/IO/Ports/SerialPort.cs
@@ -963,7 +963,21 @@ public string ReadExisting()
                 Buffer.BlockCopy(_inBuffer, _readPos, bytesReceived, 0, CachedBytesToRead);
             }
 
-            _internalSerialStream.Read(bytesReceived, CachedBytesToRead, bytesReceived.Length - (CachedBytesToRead));    // get everything
+#if NET7_0_OR_GREATER
+            _internalSerialStream.ReadExactly(bytesReceived, CachedBytesToRead, bytesReceived.Length - CachedBytesToRead);    // get everything
+#else
+            int readCount = bytesReceived.Length - CachedBytesToRead;
+            int totalRead = 0;
+            while (totalRead < readCount)
+            {
+                int bytesRead = _internalSerialStream.Read(bytesReceived, CachedBytesToRead + totalRead, readCount - totalRead);
+                if (bytesRead <= 0)
+                {
+                    throw new EndOfStreamException();
+                }
+                totalRead += bytesRead;
+            }
+#endif
 
             // Read full characters and leave partial input in the buffer. Encoding.GetCharCount doesn't work because
             // it returns fallback characters on partial input, meaning that it overcounts. Instead, we use
diff --git a/src/libraries/System.Linq.Expressions/src/ILLink/ILLink.Substitutions.xml b/src/libraries/System.Linq.Expressions/src/ILLink/ILLink.Substitutions.xml
index 600116ec2ba0..b64cc2d765d6 100644
--- a/src/libraries/System.Linq.Expressions/src/ILLink/ILLink.Substitutions.xml
+++ b/src/libraries/System.Linq.Expressions/src/ILLink/ILLink.Substitutions.xml
@@ -1,13 +1,7 @@
 <linker>
   <assembly fullname="System.Linq.Expressions">
-    <type fullname="System.Linq.Expressions.LambdaExpression">
-      <method signature="System.Boolean get_CanCompileToIL()" feature="System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeSupported" featurevalue="false" body="stub" value="false" />
-    </type>
     <type fullname="System.Dynamic.Utils.DelegateHelpers">
       <method signature="System.Boolean get_CanEmitObjectArrayDelegate()" feature="System.Linq.Expressions.CanEmitObjectArrayDelegate" featurevalue="false" body="stub" value="false" />
     </type>
-    <type fullname="System.Linq.Expressions.Interpreter.CallInstruction">
-      <method signature="System.Boolean get_CanCreateArbitraryDelegates()" feature="System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeSupported" featurevalue="false" body="stub" value="false" />
-    </type>
   </assembly>
 </linker>
diff --git a/src/libraries/System.Linq.Expressions/src/System/Dynamic/Utils/TypeUtils.cs b/src/libraries/System.Linq.Expressions/src/System/Dynamic/Utils/TypeUtils.cs
index f15f09139f70..1cb6419a134e 100644
--- a/src/libraries/System.Linq.Expressions/src/System/Dynamic/Utils/TypeUtils.cs
+++ b/src/libraries/System.Linq.Expressions/src/System/Dynamic/Utils/TypeUtils.cs
@@ -41,10 +41,7 @@ public static Type LiftPrimitiveOrThrow(this Type type)
         {
             if (RuntimeFeature.IsDynamicCodeSupported)
             {
-#pragma warning disable IL3050
-                // Analyzer doesn't yet understand feature switches
                 return GetNullableType(type);
-#pragma warning restore IL3050
             }
             if (!type.IsValueType || IsNullableType(type))
             {
diff --git a/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/Compiler/DelegateHelpers.cs b/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/Compiler/DelegateHelpers.cs
index 5be00ff31ea8..f4c45f7e85bb 100644
--- a/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/Compiler/DelegateHelpers.cs
+++ b/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/Compiler/DelegateHelpers.cs
@@ -122,13 +122,10 @@ private static System.Reflection.TypeInfo MakeNewCustomDelegate(Type[] types)
                 const MethodImplAttributes implAttributes = MethodImplAttributes.Runtime | MethodImplAttributes.Managed;
                 const MethodAttributes invokeAttributes = MethodAttributes.Public | MethodAttributes.HideBySig | MethodAttributes.NewSlot | MethodAttributes.Virtual;
 
-#pragma warning disable IL3050
-                // Suppress analyzer warnings since they don't currently support feature flags
                 TypeBuilder builder = AssemblyGen.DefineDelegateType("Delegate" + types.Length);
                 builder.DefineConstructor(ctorAttributes, CallingConventions.Standard, delegateCtorSignature).SetImplementationFlags(implAttributes);
                 builder.DefineMethod("Invoke", invokeAttributes, returnType, parameters).SetImplementationFlags(implAttributes);
                 return builder.CreateTypeInfo();
-#pragma warning restore IL3050
             }
             else
             {
diff --git a/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/Interpreter/CallInstruction.cs b/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/Interpreter/CallInstruction.cs
index e582660ed3d2..ce8867e87a43 100644
--- a/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/Interpreter/CallInstruction.cs
+++ b/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/Interpreter/CallInstruction.cs
@@ -16,6 +16,7 @@ internal abstract partial class CallInstruction : Instruction
         /// </summary>
         public abstract int ArgumentCount { get; }
 
+        [FeatureGuard(typeof(RequiresDynamicCodeAttribute))]
         private static bool CanCreateArbitraryDelegates => RuntimeFeature.IsDynamicCodeSupported;
 
         #region Construction
@@ -50,8 +51,6 @@ public static CallInstruction Create(MethodInfo info, ParameterInfo[] parameters
             if (!CanCreateArbitraryDelegates)
                 return new MethodInfoCallInstruction(info, argumentCount);
 
-            // This code should be unreachable in AOT. The analyzer currently doesn't understand feature switches
-#pragma warning disable IL3050
             if (!info.IsStatic && info.DeclaringType!.IsValueType)
             {
                 return new MethodInfoCallInstruction(info, argumentCount);
@@ -115,7 +114,6 @@ public static CallInstruction Create(MethodInfo info, ParameterInfo[] parameters
             s_cache[info] = res;
 
             return res;
-#pragma warning restore IL3050
         }
 
         private static CallInstruction GetArrayAccessor(MethodInfo info, int argumentCount)
diff --git a/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/Interpreter/InstructionList.cs b/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/Interpreter/InstructionList.cs
index c9d525a81a08..148e24f812e0 100644
--- a/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/Interpreter/InstructionList.cs
+++ b/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/Interpreter/InstructionList.cs
@@ -44,7 +44,6 @@ internal sealed class DebugView
 
             public DebugView(InstructionArray array)
             {
-                ArgumentNullException.ThrowIfNull(array);
                 _array = array;
             }
 
diff --git a/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/LambdaExpression.cs b/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/LambdaExpression.cs
index d3f0366a01f3..a0ccf07bd77d 100644
--- a/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/LambdaExpression.cs
+++ b/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/LambdaExpression.cs
@@ -25,7 +25,7 @@ public abstract class LambdaExpression : Expression, IParameterProvider
 
         private readonly Expression _body;
 
-        // This can be flipped to false using feature switches at publishing time
+        [FeatureGuard(typeof(RequiresDynamicCodeAttribute))]
         public static bool CanCompileToIL => RuntimeFeature.IsDynamicCodeSupported;
 
         // This could be flipped to false using feature switches at publishing time
@@ -138,10 +138,7 @@ public Delegate Compile()
         {
             if (CanCompileToIL)
             {
-#pragma warning disable IL3050
-                // Analyzer doesn't yet understand feature switches
                 return Compiler.LambdaCompiler.Compile(this);
-#pragma warning restore IL3050
             }
             else
             {
@@ -221,10 +218,7 @@ internal Expression(Expression body)
         {
             if (CanCompileToIL)
             {
-#pragma warning disable IL3050
-                // Analyzer doesn't yet understand feature switches
                 return (TDelegate)(object)Compiler.LambdaCompiler.Compile(this);
-#pragma warning restore IL3050
             }
             else
             {
@@ -629,10 +623,7 @@ internal static LambdaExpression CreateLambda(Type delegateType, Expression body
                 MethodInfo create;
                 if (LambdaExpression.CanCompileToIL)
                 {
-#pragma warning disable IL3050
-                    // Analyzer doesn't yet understand feature switches
                     create = typeof(Expression<>).MakeGenericType(delegateType).GetMethod("Create", BindingFlags.Static | BindingFlags.NonPublic)!;
-#pragma warning restore IL3050
                 }
                 else
                 {
diff --git a/src/libraries/System.Linq.Expressions/src/System/Runtime/CompilerServices/CallSite.cs b/src/libraries/System.Linq.Expressions/src/System/Runtime/CompilerServices/CallSite.cs
index bb3b579e4a1e..a64bd1e2bde6 100644
--- a/src/libraries/System.Linq.Expressions/src/System/Runtime/CompilerServices/CallSite.cs
+++ b/src/libraries/System.Linq.Expressions/src/System/Runtime/CompilerServices/CallSite.cs
@@ -289,10 +289,7 @@ internal T MakeUpdateDelegate()
             if (System.Linq.Expressions.LambdaExpression.CanCompileToIL
                 && target.IsGenericType && IsSimpleSignature(invoke, out Type[] args))
             {
-#pragma warning disable IL3050
-                // Analyzer doesn't yet understand feature switches
                 return MakeUpdateDelegateWhenCanCompileToIL();
-#pragma warning restore IL3050
             }
 
             s_cachedNoMatch = CreateCustomNoMatchDelegate(invoke);
diff --git a/src/libraries/System.Linq.Expressions/tests/DebugViewTests.cs b/src/libraries/System.Linq.Expressions/tests/DebugViewTests.cs
index 05f74015a269..4d9167f24940 100644
--- a/src/libraries/System.Linq.Expressions/tests/DebugViewTests.cs
+++ b/src/libraries/System.Linq.Expressions/tests/DebugViewTests.cs
@@ -316,7 +316,7 @@ public static void Call()
             Check(".Call $x.ToString()", Expression.Call(x, typeof(int).GetMethod("ToString", Type.EmptyTypes)));
             Check(".Call $s.Substring($x)", Expression.Call(s, typeof(string).GetMethod("Substring", new[] { typeof(int) }), x));
             Check(".Call $s.Substring(\\r\\n    $x,\\r\\n    $y)", Expression.Call(s, typeof(string).GetMethod("Substring", new[] { typeof(int), typeof(int) }), x, y));
-            Check(".Call System.TimeSpan.FromSeconds($d)", Expression.Call(null, typeof(TimeSpan).GetMethod("FromSeconds", new[] { typeof(int) }), d));
+            Check(".Call System.TimeSpan.FromSeconds($d)", Expression.Call(null, typeof(TimeSpan).GetMethod("FromSeconds", new[] { typeof(double) }), d));
         }
 
         [Fact]
diff --git a/src/libraries/System.Linq.Parallel/tests/QueryOperators/AsEnumerableTests.cs b/src/libraries/System.Linq.Parallel/tests/QueryOperators/AsEnumerableTests.cs
index 962d8745b238..9a1848ea559b 100644
--- a/src/libraries/System.Linq.Parallel/tests/QueryOperators/AsEnumerableTests.cs
+++ b/src/libraries/System.Linq.Parallel/tests/QueryOperators/AsEnumerableTests.cs
@@ -50,12 +50,13 @@ public static void AsEnumerable_LinqBinding(Labeled<ParallelQuery<int>> labeled,
         {
             IEnumerable<int> enumerable = labeled.Item.AsEnumerable();
 
-            // The LINQ Cast<T>() retains origin type for ParallelEnumerable  and Partitioner when unordered,
+            // The LINQ Cast<T>() retains origin type for ParallelEnumerable and Partitioner when unordered,
             // (and all when ordered, due to the extra wrapper)
             // although aliased as IEnumerable<T>, so further LINQ calls work as expected.
             // If this test starts failing, update this test, and maybe mention it in release notes.
             Assert.IsNotType<ParallelQuery<int>>(enumerable.Cast<int>());
             Assert.True(enumerable.Cast<int>() is ParallelQuery<int>);
+            Assert.True(enumerable.OfType<int>() is ParallelQuery<int>); // for non-nullable value types, OfType is equivalent to Cast
 
             Assert.False(enumerable.DefaultIfEmpty() is ParallelQuery<int>);
             Assert.False(enumerable.Distinct() is ParallelQuery<int>);
@@ -64,7 +65,6 @@ public static void AsEnumerable_LinqBinding(Labeled<ParallelQuery<int>> labeled,
             Assert.False(enumerable.GroupJoin(Enumerable.Range(0, count), x => x, y => y, (x, g) => x) is ParallelQuery<int>);
             Assert.False(enumerable.Intersect(Enumerable.Range(0, count)) is ParallelQuery<int>);
             Assert.False(enumerable.Join(Enumerable.Range(0, count), x => x, y => y, (x, y) => x) is ParallelQuery<int>);
-            Assert.False(enumerable.OfType<int>() is ParallelQuery<int>);
             Assert.False(enumerable.OrderBy(x => x) is ParallelQuery<int>);
             Assert.False(enumerable.OrderByDescending(x => x) is ParallelQuery<int>);
             Assert.False(enumerable.Reverse() is ParallelQuery<int>);
diff --git a/src/libraries/System.Linq.Parallel/tests/QueryOperators/AsSequentialTests.cs b/src/libraries/System.Linq.Parallel/tests/QueryOperators/AsSequentialTests.cs
index 6eada873e8bd..0ed93d45be08 100644
--- a/src/libraries/System.Linq.Parallel/tests/QueryOperators/AsSequentialTests.cs
+++ b/src/libraries/System.Linq.Parallel/tests/QueryOperators/AsSequentialTests.cs
@@ -50,12 +50,13 @@ public static void AsSequential_LinqBinding(Labeled<ParallelQuery<int>> labeled,
         {
             IEnumerable<int> seq = labeled.Item.AsSequential();
 
-            // The LINQ Cast<T>() retains origin type for ParallelEnumerable  and Partitioner when unordered,
+            // The LINQ Cast<T>() retains origin type for ParallelEnumerable and Partitioner when unordered,
             // (and for all sources when ordered, due to the extra wrapper)
             // although aliased as IEnumerable<T>, so further LINQ calls work as expected.
             // If this test starts failing, update this test, and maybe mention it in release notes.
             Assert.IsNotType<ParallelQuery<int>>(seq.Cast<int>());
             Assert.True(seq.Cast<int>() is ParallelQuery<int>);
+            Assert.True(seq.OfType<int>() is ParallelQuery<int>); // for non-nullable value types, OfType is equivalent to Cast
 
             Assert.False(seq.DefaultIfEmpty() is ParallelQuery<int>);
             Assert.False(seq.Distinct() is ParallelQuery<int>);
@@ -64,7 +65,6 @@ public static void AsSequential_LinqBinding(Labeled<ParallelQuery<int>> labeled,
             Assert.False(seq.GroupJoin(Enumerable.Range(0, count), x => x, y => y, (x, g) => x) is ParallelQuery<int>);
             Assert.False(seq.Intersect(Enumerable.Range(0, count)) is ParallelQuery<int>);
             Assert.False(seq.Join(Enumerable.Range(0, count), x => x, y => y, (x, y) => x) is ParallelQuery<int>);
-            Assert.False(seq.OfType<int>() is ParallelQuery<int>);
             Assert.False(seq.OrderBy(x => x) is ParallelQuery<int>);
             Assert.False(seq.OrderByDescending(x => x) is ParallelQuery<int>);
             Assert.False(seq.Reverse() is ParallelQuery<int>);
diff --git a/src/libraries/System.Linq/System.Linq.sln b/src/libraries/System.Linq/System.Linq.sln
index a81e387e856b..b5283c72e434 100644
--- a/src/libraries/System.Linq/System.Linq.sln
+++ b/src/libraries/System.Linq/System.Linq.sln
@@ -1,4 +1,8 @@
-﻿Microsoft Visual Studio Solution File, Format Version 12.00
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 17
+VisualStudioVersion = 17.10.34618.27
+MinimumVisualStudioVersion = 10.0.40219.1
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestUtilities", "..\Common\tests\TestUtilities\TestUtilities.csproj", "{AF1B1B01-A4EC-45F4-AE51-CC1FA7892181}"
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Collections", "..\System.Collections\ref\System.Collections.csproj", "{3A8560D8-0E79-4BDE-802A-C96C7FE98258}"
@@ -35,11 +39,11 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{8CA90AB2-58B
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "gen", "{84E98F7C-FA2B-4048-AB7C-9FCDEA9CD37E}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "tools\gen", "{34793393-0347-438D-A832-2476F33C1BE3}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "gen", "{34793393-0347-438D-A832-2476F33C1BE3}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "tools\src", "{F8F69023-9ACD-4979-A710-39D16377AEEE}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{F8F69023-9ACD-4979-A710-39D16377AEEE}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "tools\ref", "{18C4E23D-AB0F-45E5-A6A1-A741F6462E85}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "ref", "{18C4E23D-AB0F-45E5-A6A1-A741F6462E85}"
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{0ADC596A-5B2E-4E5F-B5B5-DEB65A6C7E9D}"
 EndProject
@@ -111,24 +115,28 @@ Global
 	EndGlobalSection
 	GlobalSection(NestedProjects) = preSolution
 		{AF1B1B01-A4EC-45F4-AE51-CC1FA7892181} = {E291F4BF-7B8B-45AD-88F5-FB8B8380C126}
-		{80A4051B-4A36-4A8B-BA43-A5AB8AA959F3} = {E291F4BF-7B8B-45AD-88F5-FB8B8380C126}
 		{3A8560D8-0E79-4BDE-802A-C96C7FE98258} = {7C5B49B9-F7D9-41FB-A8FA-94328BDDCCD1}
 		{7E4C1F09-B4F2-470E-9E7B-2C386E93D657} = {7C5B49B9-F7D9-41FB-A8FA-94328BDDCCD1}
-		{D3160C37-FC48-4907-8F4A-F584ED12B275} = {7C5B49B9-F7D9-41FB-A8FA-94328BDDCCD1}
 		{14B966BB-CE23-4432-ADBB-89974389AC1D} = {8CA90AB2-58B9-45E7-A684-EDB60C6924B0}
+		{80A4051B-4A36-4A8B-BA43-A5AB8AA959F3} = {E291F4BF-7B8B-45AD-88F5-FB8B8380C126}
 		{9A13A12F-C924-43AF-94AF-6F1B33582D27} = {84E98F7C-FA2B-4048-AB7C-9FCDEA9CD37E}
 		{4BEC631E-B5FD-453F-82A0-C95C461798EA} = {84E98F7C-FA2B-4048-AB7C-9FCDEA9CD37E}
 		{C8F0459C-15D5-4624-8CE4-E93ADF96A28C} = {84E98F7C-FA2B-4048-AB7C-9FCDEA9CD37E}
+		{D3160C37-FC48-4907-8F4A-F584ED12B275} = {7C5B49B9-F7D9-41FB-A8FA-94328BDDCCD1}
 		{E0CA3ED5-EE6C-4F7C-BCE7-EFB1D64A9CD1} = {34793393-0347-438D-A832-2476F33C1BE3}
 		{3EFB74E7-616A-48C1-B43B-3F89AA5013E6} = {34793393-0347-438D-A832-2476F33C1BE3}
-		{34793393-0347-438D-A832-2476F33C1BE3} = {0ADC596A-5B2E-4E5F-B5B5-DEB65A6C7E9D}
 		{28ABC524-ACEE-4183-A64A-49E3DC830595} = {F8F69023-9ACD-4979-A710-39D16377AEEE}
 		{721DB3D9-8221-424E-BE29-084CDD20D26E} = {F8F69023-9ACD-4979-A710-39D16377AEEE}
-		{F8F69023-9ACD-4979-A710-39D16377AEEE} = {0ADC596A-5B2E-4E5F-B5B5-DEB65A6C7E9D}
 		{E19B8772-2DBD-4274-8190-F3CC0242A1C0} = {18C4E23D-AB0F-45E5-A6A1-A741F6462E85}
+		{34793393-0347-438D-A832-2476F33C1BE3} = {0ADC596A-5B2E-4E5F-B5B5-DEB65A6C7E9D}
+		{F8F69023-9ACD-4979-A710-39D16377AEEE} = {0ADC596A-5B2E-4E5F-B5B5-DEB65A6C7E9D}
 		{18C4E23D-AB0F-45E5-A6A1-A741F6462E85} = {0ADC596A-5B2E-4E5F-B5B5-DEB65A6C7E9D}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {A4970D79-BF1C-4343-9070-B409DBB69F93}
 	EndGlobalSection
+	GlobalSection(SharedMSBuildProjectFiles) = preSolution
+		..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{3efb74e7-616a-48c1-b43b-3f89aa5013e6}*SharedItemsImports = 5
+		..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{721db3d9-8221-424e-be29-084cdd20d26e}*SharedItemsImports = 5
+	EndGlobalSection
 EndGlobal
diff --git a/src/libraries/System.Linq/src/CompatibilitySuppressions.xml b/src/libraries/System.Linq/src/CompatibilitySuppressions.xml
deleted file mode 100644
index 0f5e8063636b..000000000000
--- a/src/libraries/System.Linq/src/CompatibilitySuppressions.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Suppressions xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
-  <!-- Exposed publicly in the implementation to enable reflection for data-binding scenarios. -->
-  <Suppression>
-    <DiagnosticId>CP0001</DiagnosticId>
-    <Target>T:System.Linq.Grouping`2</Target>
-  </Suppression>
-</Suppressions>
\ No newline at end of file
diff --git a/src/libraries/System.Linq/src/System.Linq.csproj b/src/libraries/System.Linq/src/System.Linq.csproj
index 2fc4153be689..68b88631587a 100644
--- a/src/libraries/System.Linq/src/System.Linq.csproj
+++ b/src/libraries/System.Linq/src/System.Linq.csproj
@@ -9,6 +9,7 @@
   <PropertyGroup>
     <TargetPlatformIdentifier>$([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)'))</TargetPlatformIdentifier>
     <OptimizeForSize Condition="'$(TargetPlatformIdentifier)' == 'browser' or '$(TargetPlatformIdentifier)' == 'android' or '$(TargetPlatformIdentifier)' == 'ios' or '$(TargetPlatformIdentifier)' == 'tvos'">true</OptimizeForSize>
+    <DefineConstants Condition="'$(OptimizeForSize)' == 'true'">$(DefineConstants);OPTIMIZE_FOR_SIZE</DefineConstants>
   </PropertyGroup>
 
   <ItemGroup Condition="'$(OptimizeForSize)' == true">
@@ -18,19 +19,22 @@
 
   <ItemGroup Condition="'$(OptimizeForSize)' != true">
     <Compile Include="System\Linq\AppendPrepend.SpeedOpt.cs" />
+    <Compile Include="System\Linq\Cast.SpeedOpt.cs" />
     <Compile Include="System\Linq\Concat.SpeedOpt.cs" />
     <Compile Include="System\Linq\DefaultIfEmpty.SpeedOpt.cs" />
     <Compile Include="System\Linq\Distinct.SpeedOpt.cs" />
     <Compile Include="System\Linq\Grouping.SpeedOpt.cs" />
+    <Compile Include="System\Linq\Iterator.SpeedOpt.cs" />
     <Compile Include="System\Linq\Lookup.SpeedOpt.cs" />
+    <Compile Include="System\Linq\OfType.SpeedOpt.cs" />
     <Compile Include="System\Linq\OrderedEnumerable.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Partition.SpeedOpt.cs" />
     <Compile Include="System\Linq\Range.SpeedOpt.cs" />
     <Compile Include="System\Linq\Repeat.SpeedOpt.cs" />
     <Compile Include="System\Linq\Reverse.SpeedOpt.cs" />
     <Compile Include="System\Linq\Select.SpeedOpt.cs" />
     <Compile Include="System\Linq\SelectMany.SpeedOpt.cs" />
     <Compile Include="System\Linq\Skip.SpeedOpt.cs" />
+    <Compile Include="System\Linq\SkipTake.SpeedOpt.cs" />
     <Compile Include="System\Linq\Take.SpeedOpt.cs" />
     <Compile Include="System\Linq\Union.SpeedOpt.cs" />
     <Compile Include="System\Linq\Where.SpeedOpt.cs" />
@@ -60,16 +64,16 @@
     <Compile Include="System\Linq\Index.cs" />
     <Compile Include="System\Linq\Intersect.cs" />
     <Compile Include="System\Linq\Iterator.cs" />
-    <Compile Include="System\Linq\IIListProvider.cs" />
-    <Compile Include="System\Linq\IPartition.cs" />
     <Compile Include="System\Linq\Join.cs" />
     <Compile Include="System\Linq\Last.cs" />
     <Compile Include="System\Linq\Lookup.cs" />
     <Compile Include="System\Linq\Max.cs" />
     <Compile Include="System\Linq\MaxMin.cs" />
     <Compile Include="System\Linq\Min.cs" />
+    <Compile Include="System\Linq\OfType.cs" />
     <Compile Include="System\Linq\OrderBy.cs" />
     <Compile Include="System\Linq\OrderedEnumerable.cs" />
+    <Compile Include="System\Linq\PartialArrayEnumerator.cs" />
     <Compile Include="System\Linq\Range.cs" />
     <Compile Include="System\Linq\Repeat.cs" />
     <Compile Include="System\Linq\Reverse.cs" />
diff --git a/src/libraries/System.Linq/src/System/Linq/Aggregate.cs b/src/libraries/System.Linq/src/System/Linq/Aggregate.cs
index 81c3c0aa9a9c..c1a4377c29a8 100644
--- a/src/libraries/System.Linq/src/System/Linq/Aggregate.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Aggregate.cs
@@ -9,12 +9,12 @@ public static partial class Enumerable
     {
         public static TSource Aggregate<TSource>(this IEnumerable<TSource> source, Func<TSource, TSource, TSource> func)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (func == null)
+            if (func is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.func);
             }
@@ -38,12 +38,12 @@ public static TSource Aggregate<TSource>(this IEnumerable<TSource> source, Func<
 
         public static TAccumulate Aggregate<TSource, TAccumulate>(this IEnumerable<TSource> source, TAccumulate seed, Func<TAccumulate, TSource, TAccumulate> func)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (func == null)
+            if (func is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.func);
             }
@@ -59,17 +59,17 @@ public static TAccumulate Aggregate<TSource, TAccumulate>(this IEnumerable<TSour
 
         public static TResult Aggregate<TSource, TAccumulate, TResult>(this IEnumerable<TSource> source, TAccumulate seed, Func<TAccumulate, TSource, TAccumulate> func, Func<TAccumulate, TResult> resultSelector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (func == null)
+            if (func is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.func);
             }
 
-            if (resultSelector == null)
+            if (resultSelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.resultSelector);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/AnyAll.cs b/src/libraries/System.Linq/src/System/Linq/AnyAll.cs
index 1fd5e5b4ae44..28a3783aa882 100644
--- a/src/libraries/System.Linq/src/System/Linq/AnyAll.cs
+++ b/src/libraries/System.Linq/src/System/Linq/AnyAll.cs
@@ -1,6 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Collections;
 using System.Collections.Generic;
 
 namespace System.Linq
@@ -9,25 +10,47 @@ public static partial class Enumerable
     {
         public static bool Any<TSource>(this IEnumerable<TSource> source)
         {
-            return
-                TryGetNonEnumeratedCount(source, out int count) ? count != 0 :
-                WithEnumerator(source);
+            if (source is null)
+            {
+                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
+            }
 
-            static bool WithEnumerator(IEnumerable<TSource> source)
+            if (source is ICollection<TSource> gc)
             {
-                using IEnumerator<TSource> e = source.GetEnumerator();
-                return e.MoveNext();
+                return gc.Count != 0;
             }
+
+#if !OPTIMIZE_FOR_SIZE
+            if (source is Iterator<TSource> iterator)
+            {
+                int count = iterator.GetCount(onlyIfCheap: true);
+                if (count >= 0)
+                {
+                    return count != 0;
+                }
+
+                iterator.TryGetFirst(out bool found);
+                return found;
+            }
+#endif
+
+            if (source is ICollection ngc)
+            {
+                return ngc.Count != 0;
+            }
+
+            using IEnumerator<TSource> e = source.GetEnumerator();
+            return e.MoveNext();
         }
 
         public static bool Any<TSource>(this IEnumerable<TSource> source, Func<TSource, bool> predicate)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
@@ -45,12 +68,12 @@ public static bool Any<TSource>(this IEnumerable<TSource> source, Func<TSource,
 
         public static bool All<TSource>(this IEnumerable<TSource> source, Func<TSource, bool> predicate)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/AppendPrepend.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/AppendPrepend.SpeedOpt.cs
index 80ee23998603..01adb0af01e4 100644
--- a/src/libraries/System.Linq/src/System/Linq/AppendPrepend.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/AppendPrepend.SpeedOpt.cs
@@ -8,15 +8,6 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private abstract partial class AppendPrependIterator<TSource> : IIListProvider<TSource>
-        {
-            public abstract TSource[] ToArray();
-
-            public abstract List<TSource> ToList();
-
-            public abstract int GetCount(bool onlyIfCheap);
-        }
-
         private sealed partial class AppendPrepend1Iterator<TSource>
         {
             private TSource[] LazyToArray()
@@ -130,14 +121,61 @@ public override List<TSource> ToList()
 
             public override int GetCount(bool onlyIfCheap)
             {
-                if (_source is IIListProvider<TSource> listProv)
+                if (_source is Iterator<TSource> iterator)
                 {
-                    int count = listProv.GetCount(onlyIfCheap);
+                    int count = iterator.GetCount(onlyIfCheap);
                     return count == -1 ? -1 : count + 1;
                 }
 
                 return !onlyIfCheap || _source is ICollection<TSource> ? _source.Count() + 1 : -1;
             }
+
+            public override TSource? TryGetFirst(out bool found)
+            {
+                if (_appending)
+                {
+                    TSource? first = _source.TryGetFirst(out found);
+                    if (found)
+                    {
+                        return first;
+                    }
+                }
+
+                found = true;
+                return _item;
+            }
+
+            public override TSource? TryGetLast(out bool found)
+            {
+                if (!_appending)
+                {
+                    TSource? last = _source.TryGetLast(out found);
+                    if (found)
+                    {
+                        return last;
+                    }
+                }
+
+                found = true;
+                return _item;
+            }
+
+            public override TSource? TryGetElementAt(int index, out bool found)
+            {
+                if (!_appending)
+                {
+                    if (index == 0)
+                    {
+                        found = true;
+                        return _item;
+                    }
+
+                    index--;
+                    return _source.TryGetElementAt(index, out found);
+                }
+
+                return base.TryGetElementAt(index, out found);
+            }
         }
 
         private sealed partial class AppendPrependN<TSource>
@@ -187,7 +225,7 @@ public override TSource[] ToArray()
 
                 TSource[] array = new TSource[count];
                 int index = 0;
-                for (SingleLinkedNode<TSource>? node = _prepended; node != null; node = node.Linked)
+                for (SingleLinkedNode<TSource>? node = _prepended; node is not null; node = node.Linked)
                 {
                     array[index] = node.Item;
                     ++index;
@@ -207,7 +245,7 @@ public override TSource[] ToArray()
                 }
 
                 index = array.Length;
-                for (SingleLinkedNode<TSource>? node = _appended; node != null; node = node.Linked)
+                for (SingleLinkedNode<TSource>? node = _appended; node is not null; node = node.Linked)
                 {
                     --index;
                     array[index] = node.Item;
@@ -232,9 +270,9 @@ public override List<TSource> ToList()
 
             public override int GetCount(bool onlyIfCheap)
             {
-                if (_source is IIListProvider<TSource> listProv)
+                if (_source is Iterator<TSource> iterator)
                 {
-                    int count = listProv.GetCount(onlyIfCheap);
+                    int count = iterator.GetCount(onlyIfCheap);
                     return count == -1 ? -1 : count + _appendCount + _prependCount;
                 }
 
diff --git a/src/libraries/System.Linq/src/System/Linq/AppendPrepend.cs b/src/libraries/System.Linq/src/System/Linq/AppendPrepend.cs
index 2397fbd39e28..3e2df29dcae5 100644
--- a/src/libraries/System.Linq/src/System/Linq/AppendPrepend.cs
+++ b/src/libraries/System.Linq/src/System/Linq/AppendPrepend.cs
@@ -10,7 +10,7 @@ public static partial class Enumerable
     {
         public static IEnumerable<TSource> Append<TSource>(this IEnumerable<TSource> source, TSource element)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -22,7 +22,7 @@ public static IEnumerable<TSource> Append<TSource>(this IEnumerable<TSource> sou
 
         public static IEnumerable<TSource> Prepend<TSource>(this IEnumerable<TSource> source, TSource element)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -43,13 +43,13 @@ private abstract partial class AppendPrependIterator<TSource> : Iterator<TSource
 
             protected AppendPrependIterator(IEnumerable<TSource> source)
             {
-                Debug.Assert(source != null);
+                Debug.Assert(source is not null);
                 _source = source;
             }
 
             protected void GetSourceEnumerator()
             {
-                Debug.Assert(_enumerator == null);
+                Debug.Assert(_enumerator is null);
                 _enumerator = _source.GetEnumerator();
             }
 
@@ -59,7 +59,7 @@ protected void GetSourceEnumerator()
 
             protected bool LoadFromEnumerator()
             {
-                Debug.Assert(_enumerator != null);
+                Debug.Assert(_enumerator is not null);
                 if (_enumerator.MoveNext())
                 {
                     _current = _enumerator.Current;
@@ -72,7 +72,7 @@ protected bool LoadFromEnumerator()
 
             public override void Dispose()
             {
-                if (_enumerator != null)
+                if (_enumerator is not null)
                 {
                     _enumerator.Dispose();
                     _enumerator = null;
@@ -98,7 +98,7 @@ public AppendPrepend1Iterator(IEnumerable<TSource> source, TSource item, bool ap
                 _appending = appending;
             }
 
-            public override Iterator<TSource> Clone() => new AppendPrepend1Iterator<TSource>(_source, _item, _appending);
+            private protected override Iterator<TSource> Clone() => new AppendPrepend1Iterator<TSource>(_source, _item, _appending);
 
             public override bool MoveNext()
             {
@@ -176,7 +176,7 @@ private sealed partial class AppendPrependN<TSource> : AppendPrependIterator<TSo
             public AppendPrependN(IEnumerable<TSource> source, SingleLinkedNode<TSource>? prepended, SingleLinkedNode<TSource>? appended, int prependCount, int appendCount)
                 : base(source)
             {
-                Debug.Assert(prepended != null || appended != null);
+                Debug.Assert(prepended is not null || appended is not null);
                 Debug.Assert(prependCount > 0 || appendCount > 0);
                 Debug.Assert(prependCount + appendCount >= 2);
                 Debug.Assert((prepended?.GetCount() ?? 0) == prependCount);
@@ -188,7 +188,7 @@ public AppendPrependN(IEnumerable<TSource> source, SingleLinkedNode<TSource>? pr
                 _appendCount = appendCount;
             }
 
-            public override Iterator<TSource> Clone() => new AppendPrependN<TSource>(_source, _prepended, _appended, _prependCount, _appendCount);
+            private protected override Iterator<TSource> Clone() => new AppendPrependN<TSource>(_source, _prepended, _appended, _prependCount, _appendCount);
 
             public override bool MoveNext()
             {
@@ -199,7 +199,7 @@ public override bool MoveNext()
                         _state = 2;
                         goto case 2;
                     case 2:
-                        if (_node != null)
+                        if (_node is not null)
                         {
                             _current = _node.Item;
                             _node = _node.Linked;
@@ -215,7 +215,7 @@ public override bool MoveNext()
                             return true;
                         }
 
-                        if (_appended == null)
+                        if (_appended is null)
                         {
                             return false;
                         }
@@ -233,13 +233,13 @@ public override bool MoveNext()
 
             public override AppendPrependIterator<TSource> Append(TSource item)
             {
-                var appended = _appended != null ? _appended.Add(item) : new SingleLinkedNode<TSource>(item);
+                var appended = _appended is not null ? _appended.Add(item) : new SingleLinkedNode<TSource>(item);
                 return new AppendPrependN<TSource>(_source, _prepended, appended, _prependCount, _appendCount + 1);
             }
 
             public override AppendPrependIterator<TSource> Prepend(TSource item)
             {
-                var prepended = _prepended != null ? _prepended.Add(item) : new SingleLinkedNode<TSource>(item);
+                var prepended = _prepended is not null ? _prepended.Add(item) : new SingleLinkedNode<TSource>(item);
                 return new AppendPrependN<TSource>(_source, prepended, _appended, _prependCount + 1, _appendCount);
             }
         }
diff --git a/src/libraries/System.Linq/src/System/Linq/Cast.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Cast.SpeedOpt.cs
new file mode 100644
index 000000000000..3cded1625e8d
--- /dev/null
+++ b/src/libraries/System.Linq/src/System/Linq/Cast.SpeedOpt.cs
@@ -0,0 +1,115 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections;
+using System.Collections.Generic;
+
+namespace System.Linq
+{
+    public static partial class Enumerable
+    {
+        private sealed partial class CastICollectionIterator<TResult>
+        {
+            public override int GetCount(bool onlyIfCheap) => _source.Count;
+
+            public override TResult[] ToArray()
+            {
+                TResult[] array = new TResult[_source.Count];
+
+                int index = 0;
+                foreach (TResult item in _source)
+                {
+                    array[index++] = item;
+                }
+
+                return array;
+            }
+
+            public override List<TResult> ToList()
+            {
+                List<TResult> list = new(_source.Count);
+
+                foreach (TResult item in _source)
+                {
+                    list.Add(item);
+                }
+
+                return list;
+            }
+
+            public override TResult? TryGetElementAt(int index, out bool found)
+            {
+                if (index >= 0)
+                {
+                    IEnumerator e = _source.GetEnumerator();
+                    try
+                    {
+                        while (e.MoveNext())
+                        {
+                            if (index == 0)
+                            {
+                                found = true;
+                                return (TResult)e.Current;
+                            }
+
+                            index--;
+                        }
+                    }
+                    finally
+                    {
+                        (e as IDisposable)?.Dispose();
+                    }
+                }
+
+                found = false;
+                return default;
+            }
+
+            public override TResult? TryGetFirst(out bool found)
+            {
+                IEnumerator e = _source.GetEnumerator();
+                try
+                {
+                    if (e.MoveNext())
+                    {
+                        found = true;
+                        return (TResult)e.Current;
+                    }
+                }
+                finally
+                {
+                    (e as IDisposable)?.Dispose();
+                }
+
+                found = false;
+                return default;
+            }
+
+            public override TResult? TryGetLast(out bool found)
+            {
+                IEnumerator e = _source.GetEnumerator();
+                try
+                {
+                    if (e.MoveNext())
+                    {
+                        TResult last = (TResult)e.Current;
+                        while (e.MoveNext())
+                        {
+                            last = (TResult)e.Current;
+                        }
+
+                        found = true;
+                        return last;
+                    }
+
+                    found = false;
+                    return default;
+                }
+                finally
+                {
+                    (e as IDisposable)?.Dispose();
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Linq/src/System/Linq/Cast.cs b/src/libraries/System.Linq/src/System/Linq/Cast.cs
index 0c20609b3eb7..77a001d13574 100644
--- a/src/libraries/System.Linq/src/System/Linq/Cast.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Cast.cs
@@ -3,32 +3,12 @@
 
 using System.Collections;
 using System.Collections.Generic;
+using System.Diagnostics;
 
 namespace System.Linq
 {
     public static partial class Enumerable
     {
-        public static IEnumerable<TResult> OfType<TResult>(this IEnumerable source)
-        {
-            if (source == null)
-            {
-                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
-            }
-
-            return OfTypeIterator<TResult>(source);
-        }
-
-        private static IEnumerable<TResult> OfTypeIterator<TResult>(IEnumerable source)
-        {
-            foreach (object? obj in source)
-            {
-                if (obj is TResult result)
-                {
-                    yield return result;
-                }
-            }
-        }
-
         public static IEnumerable<
 #nullable disable // there's no way to annotate the connection of the nullability of TResult to that of the source
                 TResult
@@ -40,11 +20,16 @@ public static IEnumerable<
                 return typedSource;
             }
 
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
+            if (source is ICollection collection)
+            {
+                return new CastICollectionIterator<TResult>(collection);
+            }
+
             return CastIterator<TResult>(source);
         }
 
@@ -55,5 +40,46 @@ private static IEnumerable<TResult> CastIterator<TResult>(IEnumerable source)
                 yield return (TResult)obj;
             }
         }
+
+        [DebuggerDisplay("Count = {Count}")]
+        private sealed partial class CastICollectionIterator<TResult>(ICollection source) : Iterator<TResult>
+        {
+            private readonly ICollection _source = source;
+            private IEnumerator? _enumerator;
+
+            private protected override Iterator<TResult> Clone() => new CastICollectionIterator<TResult>(_source);
+
+            public override bool MoveNext()
+            {
+                switch (_state)
+                {
+                    case 1:
+                        _enumerator = _source.GetEnumerator();
+                        _state = 2;
+                        goto case 2;
+
+                    case 2:
+                        Debug.Assert(_enumerator is not null);
+                        if (_enumerator.MoveNext())
+                        {
+                            _current = (TResult)_enumerator.Current;
+                            return true;
+                        }
+
+                        Dispose();
+                        break;
+                }
+
+                return false;
+            }
+
+            public override void Dispose()
+            {
+                (_enumerator as IDisposable)?.Dispose();
+                _enumerator = null;
+
+                base.Dispose();
+            }
+        }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/Chunk.cs b/src/libraries/System.Linq/src/System/Linq/Chunk.cs
index 680c65f72c44..d1a856bc4e0e 100644
--- a/src/libraries/System.Linq/src/System/Linq/Chunk.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Chunk.cs
@@ -35,7 +35,7 @@ public static partial class Enumerable
         /// </exception>
         public static IEnumerable<TSource[]> Chunk<TSource>(this IEnumerable<TSource> source, int size)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -45,15 +45,31 @@ public static IEnumerable<TSource[]> Chunk<TSource>(this IEnumerable<TSource> so
                 ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.size);
             }
 
-            if (IsEmptyArray(source))
+            if (source is TSource[] array)
             {
-                return [];
+                // Special-case arrays, which have an immutable length. This enables us to not only do an
+                // empty check and avoid allocating an iterator object when empty, it enables us to have a
+                // much more efficient (and simpler) implementation for chunking up the array.
+                return array.Length != 0 ?
+                    ArrayChunkIterator(array, size) :
+                    [];
             }
 
-            return ChunkIterator(source, size);
+            return EnumerableChunkIterator(source, size);
         }
 
-        private static IEnumerable<TSource[]> ChunkIterator<TSource>(IEnumerable<TSource> source, int size)
+        private static IEnumerable<TSource[]> ArrayChunkIterator<TSource>(TSource[] source, int size)
+        {
+            int index = 0;
+            while (index < source.Length)
+            {
+                TSource[] chunk = new ReadOnlySpan<TSource>(source, index, Math.Min(size, source.Length - index)).ToArray();
+                index += chunk.Length;
+                yield return chunk;
+            }
+        }
+
+        private static IEnumerable<TSource[]> EnumerableChunkIterator<TSource>(IEnumerable<TSource> source, int size)
         {
             using IEnumerator<TSource> e = source.GetEnumerator();
 
diff --git a/src/libraries/System.Linq/src/System/Linq/Concat.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Concat.SpeedOpt.cs
index a5ad64f78584..452d3fde1ab6 100644
--- a/src/libraries/System.Linq/src/System/Linq/Concat.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Concat.SpeedOpt.cs
@@ -170,7 +170,7 @@ public override int GetCount(bool onlyIfCheap)
                     // Enumerable.Count() handles ICollections in O(1) time, but check for them here anyway
                     // to avoid a method call because 1) they're common and 2) this code is run in a loop.
                     var collection = source as ICollection<TSource>;
-                    Debug.Assert(!_hasOnlyCollections || collection != null);
+                    Debug.Assert(!_hasOnlyCollections || collection is not null);
                     int sourceCount = collection?.Count ?? source.Count();
 
                     checked
@@ -178,7 +178,7 @@ public override int GetCount(bool onlyIfCheap)
                         count += sourceCount;
                     }
                 }
-                while ((previousN = node.PreviousN) != null);
+                while ((previousN = node.PreviousN) is not null);
 
                 Debug.Assert(node._tail is Concat2Iterator<TSource>);
                 return checked(count + node._tail.GetCount(onlyIfCheap));
@@ -202,7 +202,7 @@ private TSource[] LazyToArray()
                     // On the bright side, the bottleneck will usually be iterating, buffering, and copying
                     // each of the enumerables, so this shouldn't be a noticeable perf hit for most scenarios.
                     IEnumerable<TSource>? source = GetEnumerable(i);
-                    if (source == null)
+                    if (source is null)
                     {
                         break;
                     }
@@ -250,7 +250,7 @@ private TSource[] PreallocatingToArray()
                         source.CopyTo(array, arrayIndex);
                     }
                 }
-                while ((previousN = node.PreviousN) != null);
+                while ((previousN = node.PreviousN) is not null);
 
                 var previous2 = (Concat2Iterator<TSource>)node._tail;
                 var second = (ICollection<TSource>)previous2._second;
@@ -342,13 +342,9 @@ private TSource[] PreallocatingToArray()
             }
         }
 
-        private abstract partial class ConcatIterator<TSource> : IPartition<TSource>
+        private abstract partial class ConcatIterator<TSource>
         {
-            public abstract int GetCount(bool onlyIfCheap);
-
-            public abstract TSource[] ToArray();
-
-            public List<TSource> ToList()
+            public override List<TSource> ToList()
             {
                 int count = GetCount(onlyIfCheap: true);
                 var list = count != -1 ? new List<TSource>(count) : new List<TSource>();
@@ -356,7 +352,7 @@ public List<TSource> ToList()
                 for (int i = 0; ; i++)
                 {
                     IEnumerable<TSource>? source = GetEnumerable(i);
-                    if (source == null)
+                    if (source is null)
                     {
                         break;
                     }
@@ -367,16 +363,6 @@ public List<TSource> ToList()
                 return list;
             }
 
-            public abstract TSource? TryGetElementAt(int index, out bool found);
-
-            public abstract TSource? TryGetFirst(out bool found);
-
-            public abstract TSource? TryGetLast(out bool found);
-
-            public IPartition<TSource>? Skip(int count) => new EnumerablePartition<TSource>(this, count, -1);
-
-            public IPartition<TSource>? Take(int count) => new EnumerablePartition<TSource>(this, 0, count - 1);
-
         }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/Concat.cs b/src/libraries/System.Linq/src/System/Linq/Concat.cs
index e57efd1a047d..7b6dcc009d5e 100644
--- a/src/libraries/System.Linq/src/System/Linq/Concat.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Concat.cs
@@ -10,12 +10,12 @@ public static partial class Enumerable
     {
         public static IEnumerable<TSource> Concat<TSource>(this IEnumerable<TSource> first, IEnumerable<TSource> second)
         {
-            if (first == null)
+            if (first is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.first);
             }
 
-            if (second == null)
+            if (second is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.second);
             }
@@ -58,14 +58,14 @@ private sealed partial class Concat2Iterator<TSource> : ConcatIterator<TSource>
             /// <param name="second">The second source to concatenate.</param>
             internal Concat2Iterator(IEnumerable<TSource> first, IEnumerable<TSource> second)
             {
-                Debug.Assert(first != null);
-                Debug.Assert(second != null);
+                Debug.Assert(first is not null);
+                Debug.Assert(second is not null);
 
                 _first = first;
                 _second = second;
             }
 
-            public override Iterator<TSource> Clone() => new Concat2Iterator<TSource>(_first, _second);
+            private protected override Iterator<TSource> Clone() => new Concat2Iterator<TSource>(_first, _second);
 
             internal override ConcatIterator<TSource> Concat(IEnumerable<TSource> next)
             {
@@ -139,8 +139,8 @@ private sealed partial class ConcatNIterator<TSource> : ConcatIterator<TSource>
             /// </param>
             internal ConcatNIterator(ConcatIterator<TSource> tail, IEnumerable<TSource> head, int headIndex, bool hasOnlyCollections)
             {
-                Debug.Assert(tail != null);
-                Debug.Assert(head != null);
+                Debug.Assert(tail is not null);
+                Debug.Assert(head is not null);
                 Debug.Assert(headIndex >= 2);
 
                 _tail = tail;
@@ -151,7 +151,7 @@ internal ConcatNIterator(ConcatIterator<TSource> tail, IEnumerable<TSource> head
 
             private ConcatNIterator<TSource>? PreviousN => _tail as ConcatNIterator<TSource>;
 
-            public override Iterator<TSource> Clone() => new ConcatNIterator<TSource>(_tail, _head, _headIndex, _hasOnlyCollections);
+            private protected override Iterator<TSource> Clone() => new ConcatNIterator<TSource>(_tail, _head, _headIndex, _hasOnlyCollections);
 
             internal override ConcatIterator<TSource> Concat(IEnumerable<TSource> next)
             {
@@ -185,7 +185,7 @@ internal override ConcatIterator<TSource> Concat(IEnumerable<TSource> next)
                         return node._head;
                     }
                 }
-                while ((previousN = node.PreviousN) != null);
+                while ((previousN = node.PreviousN) is not null);
 
                 Debug.Assert(index == 0 || index == 1);
                 Debug.Assert(node._tail is Concat2Iterator<TSource>);
@@ -206,7 +206,7 @@ private abstract partial class ConcatIterator<TSource> : Iterator<TSource>
 
             public override void Dispose()
             {
-                if (_enumerator != null)
+                if (_enumerator is not null)
                 {
                     _enumerator.Dispose();
                     _enumerator = null;
@@ -240,7 +240,7 @@ public override bool MoveNext()
                 {
                     while (true)
                     {
-                        Debug.Assert(_enumerator != null);
+                        Debug.Assert(_enumerator is not null);
                         if (_enumerator.MoveNext())
                         {
                             _current = _enumerator.Current;
@@ -248,7 +248,7 @@ public override bool MoveNext()
                         }
 
                         IEnumerable<TSource>? next = GetEnumerable(_state++ - 1);
-                        if (next != null)
+                        if (next is not null)
                         {
                             _enumerator.Dispose();
                             _enumerator = next.GetEnumerator();
diff --git a/src/libraries/System.Linq/src/System/Linq/Contains.cs b/src/libraries/System.Linq/src/System/Linq/Contains.cs
index a907eea4a992..f242c7a35f52 100644
--- a/src/libraries/System.Linq/src/System/Linq/Contains.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Contains.cs
@@ -13,12 +13,12 @@ public static bool Contains<TSource>(this IEnumerable<TSource> source, TSource v
 
         public static bool Contains<TSource>(this IEnumerable<TSource> source, TSource value, IEqualityComparer<TSource>? comparer)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (comparer == null)
+            if (comparer is null)
             {
                 foreach (TSource element in source)
                 {
diff --git a/src/libraries/System.Linq/src/System/Linq/Count.cs b/src/libraries/System.Linq/src/System/Linq/Count.cs
index 14f3d457f6ea..6b8819f8c3cb 100644
--- a/src/libraries/System.Linq/src/System/Linq/Count.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Count.cs
@@ -10,7 +10,7 @@ public static partial class Enumerable
     {
         public static int Count<TSource>(this IEnumerable<TSource> source)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -20,10 +20,12 @@ public static int Count<TSource>(this IEnumerable<TSource> source)
                 return collectionoft.Count;
             }
 
-            if (source is IIListProvider<TSource> listProv)
+#if !OPTIMIZE_FOR_SIZE
+            if (source is Iterator<TSource> iterator)
             {
-                return listProv.GetCount(onlyIfCheap: false);
+                return iterator.GetCount(onlyIfCheap: false);
             }
+#endif
 
             if (source is ICollection collection)
             {
@@ -47,12 +49,12 @@ public static int Count<TSource>(this IEnumerable<TSource> source)
 
         public static int Count<TSource>(this IEnumerable<TSource> source, Func<TSource, bool> predicate)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
@@ -94,7 +96,7 @@ public static int Count<TSource>(this IEnumerable<TSource> source, Func<TSource,
         /// </remarks>
         public static bool TryGetNonEnumeratedCount<TSource>(this IEnumerable<TSource> source, out int count)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -105,15 +107,17 @@ public static bool TryGetNonEnumeratedCount<TSource>(this IEnumerable<TSource> s
                 return true;
             }
 
-            if (source is IIListProvider<TSource> listProv)
+#if !OPTIMIZE_FOR_SIZE
+            if (source is Iterator<TSource> iterator)
             {
-                int c = listProv.GetCount(onlyIfCheap: true);
+                int c = iterator.GetCount(onlyIfCheap: true);
                 if (c >= 0)
                 {
                     count = c;
                     return true;
                 }
             }
+#endif
 
             if (source is ICollection collection)
             {
@@ -127,7 +131,7 @@ public static bool TryGetNonEnumeratedCount<TSource>(this IEnumerable<TSource> s
 
         public static long LongCount<TSource>(this IEnumerable<TSource> source)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -149,12 +153,12 @@ public static long LongCount<TSource>(this IEnumerable<TSource> source)
 
         public static long LongCount<TSource>(this IEnumerable<TSource> source, Func<TSource, bool> predicate)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/DefaultIfEmpty.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/DefaultIfEmpty.SpeedOpt.cs
index 24619cc43813..c89d6797581e 100644
--- a/src/libraries/System.Linq/src/System/Linq/DefaultIfEmpty.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/DefaultIfEmpty.SpeedOpt.cs
@@ -8,15 +8,15 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private sealed partial class DefaultIfEmptyIterator<TSource> : IIListProvider<TSource>
+        private sealed partial class DefaultIfEmptyIterator<TSource>
         {
-            public TSource[] ToArray()
+            public override TSource[] ToArray()
             {
                 TSource[] array = _source.ToArray();
                 return array.Length == 0 ? [_default] : array;
             }
 
-            public List<TSource> ToList()
+            public override List<TSource> ToList()
             {
                 List<TSource> list = _source.ToList();
                 if (list.Count == 0)
@@ -27,7 +27,7 @@ public List<TSource> ToList()
                 return list;
             }
 
-            public int GetCount(bool onlyIfCheap)
+            public override int GetCount(bool onlyIfCheap)
             {
                 int count;
                 if (!onlyIfCheap || _source is ICollection<TSource> || _source is ICollection)
@@ -36,11 +36,51 @@ public int GetCount(bool onlyIfCheap)
                 }
                 else
                 {
-                    count = _source is IIListProvider<TSource> listProv ? listProv.GetCount(onlyIfCheap: true) : -1;
+                    count = _source is Iterator<TSource> iterator ? iterator.GetCount(onlyIfCheap: true) : -1;
                 }
 
                 return count == 0 ? 1 : count;
             }
+
+            public override TSource? TryGetFirst(out bool found)
+            {
+                TSource? first = _source.TryGetFirst(out found);
+                if (found)
+                {
+                    return first;
+                }
+
+                found = true;
+                return _default;
+            }
+
+            public override TSource? TryGetLast(out bool found)
+            {
+                TSource? last = _source.TryGetLast(out found);
+                if (found)
+                {
+                    return last;
+                }
+
+                found = true;
+                return _default;
+            }
+
+            public override TSource? TryGetElementAt(int index, out bool found)
+            {
+                TSource? item = _source.TryGetElementAt(index, out found);
+                if (found)
+                {
+                    return item;
+                }
+
+                if (index == 0)
+                {
+                    found = true;
+                }
+
+                return _default;
+            }
         }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/DefaultIfEmpty.cs b/src/libraries/System.Linq/src/System/Linq/DefaultIfEmpty.cs
index 593a6b8a67b3..1ee5b5fd304d 100644
--- a/src/libraries/System.Linq/src/System/Linq/DefaultIfEmpty.cs
+++ b/src/libraries/System.Linq/src/System/Linq/DefaultIfEmpty.cs
@@ -13,11 +13,16 @@ public static partial class Enumerable
 
         public static IEnumerable<TSource> DefaultIfEmpty<TSource>(this IEnumerable<TSource> source, TSource defaultValue)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
+            if (source is TSource[] { Length: > 0 })
+            {
+                return source;
+            }
+
             return new DefaultIfEmptyIterator<TSource>(source, defaultValue);
         }
 
@@ -29,12 +34,12 @@ private sealed partial class DefaultIfEmptyIterator<TSource> : Iterator<TSource>
 
             public DefaultIfEmptyIterator(IEnumerable<TSource> source, TSource defaultValue)
             {
-                Debug.Assert(source != null);
+                Debug.Assert(source is not null);
                 _source = source;
                 _default = defaultValue;
             }
 
-            public override Iterator<TSource> Clone() => new DefaultIfEmptyIterator<TSource>(_source, _default);
+            private protected override Iterator<TSource> Clone() => new DefaultIfEmptyIterator<TSource>(_source, _default);
 
             public override bool MoveNext()
             {
@@ -55,7 +60,7 @@ public override bool MoveNext()
 
                         return true;
                     case 2:
-                        Debug.Assert(_enumerator != null);
+                        Debug.Assert(_enumerator is not null);
                         if (_enumerator.MoveNext())
                         {
                             _current = _enumerator.Current;
@@ -71,7 +76,7 @@ public override bool MoveNext()
 
             public override void Dispose()
             {
-                if (_enumerator != null)
+                if (_enumerator is not null)
                 {
                     _enumerator.Dispose();
                     _enumerator = null;
diff --git a/src/libraries/System.Linq/src/System/Linq/Distinct.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Distinct.SpeedOpt.cs
index 70e96b7ed68f..4bb5f0373e20 100644
--- a/src/libraries/System.Linq/src/System/Linq/Distinct.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Distinct.SpeedOpt.cs
@@ -7,13 +7,15 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private sealed partial class DistinctIterator<TSource> : IIListProvider<TSource>
+        private sealed partial class DistinctIterator<TSource>
         {
-            public TSource[] ToArray() => Enumerable.HashSetToArray(new HashSet<TSource>(_source, _comparer));
+            public override TSource[] ToArray() => ICollectionToArray(new HashSet<TSource>(_source, _comparer));
 
-            public List<TSource> ToList() => new List<TSource>(new HashSet<TSource>(_source, _comparer));
+            public override List<TSource> ToList() => new List<TSource>(new HashSet<TSource>(_source, _comparer));
 
-            public int GetCount(bool onlyIfCheap) => onlyIfCheap ? -1 : new HashSet<TSource>(_source, _comparer).Count;
+            public override int GetCount(bool onlyIfCheap) => onlyIfCheap ? -1 : new HashSet<TSource>(_source, _comparer).Count;
+
+            public override TSource? TryGetFirst(out bool found) => _source.TryGetFirst(out found);
         }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/Distinct.cs b/src/libraries/System.Linq/src/System/Linq/Distinct.cs
index e41973a46ec1..0408f51da4ba 100644
--- a/src/libraries/System.Linq/src/System/Linq/Distinct.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Distinct.cs
@@ -12,7 +12,7 @@ public static partial class Enumerable
 
         public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source, IEqualityComparer<TSource>? comparer)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -101,12 +101,12 @@ private sealed partial class DistinctIterator<TSource> : Iterator<TSource>
 
             public DistinctIterator(IEnumerable<TSource> source, IEqualityComparer<TSource>? comparer)
             {
-                Debug.Assert(source != null);
+                Debug.Assert(source is not null);
                 _source = source;
                 _comparer = comparer;
             }
 
-            public override Iterator<TSource> Clone() => new DistinctIterator<TSource>(_source, _comparer);
+            private protected override Iterator<TSource> Clone() => new DistinctIterator<TSource>(_source, _comparer);
 
             public override bool MoveNext()
             {
@@ -127,8 +127,8 @@ public override bool MoveNext()
                         _state = 2;
                         return true;
                     case 2:
-                        Debug.Assert(_enumerator != null);
-                        Debug.Assert(_set != null);
+                        Debug.Assert(_enumerator is not null);
+                        Debug.Assert(_set is not null);
                         while (_enumerator.MoveNext())
                         {
                             element = _enumerator.Current;
@@ -148,7 +148,7 @@ public override bool MoveNext()
 
             public override void Dispose()
             {
-                if (_enumerator != null)
+                if (_enumerator is not null)
                 {
                     _enumerator.Dispose();
                     _enumerator = null;
diff --git a/src/libraries/System.Linq/src/System/Linq/ElementAt.cs b/src/libraries/System.Linq/src/System/Linq/ElementAt.cs
index b33fcaddff92..97b87f9eba99 100644
--- a/src/libraries/System.Linq/src/System/Linq/ElementAt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/ElementAt.cs
@@ -11,30 +11,29 @@ public static partial class Enumerable
     {
         public static TSource ElementAt<TSource>(this IEnumerable<TSource> source, int index)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (source is IPartition<TSource> partition)
-            {
-                TSource? element = partition.TryGetElementAt(index, out bool found);
-                if (found)
-                {
-                    return element!;
-                }
-            }
-            else if (source is IList<TSource> list)
+            if (source is IList<TSource> list)
             {
                 return list[index];
             }
-            else if (TryGetElement(source, index, out TSource? element))
+
+            bool found;
+            TSource? element =
+#if !OPTIMIZE_FOR_SIZE
+                source is Iterator<TSource> iterator ? iterator.TryGetElementAt(index, out found) :
+#endif
+                TryGetElementAtNonIterator(source, index, out found);
+
+            if (!found)
             {
-                return element;
+                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index);
             }
 
-            ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index);
-            return default;
+            return element!;
         }
 
         /// <summary>Returns the element at a specified index in a sequence.</summary>
@@ -50,7 +49,7 @@ public static TSource ElementAt<TSource>(this IEnumerable<TSource> source, int i
         /// </remarks>
         public static TSource ElementAt<TSource>(this IEnumerable<TSource> source, Index index)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -75,23 +74,12 @@ public static TSource ElementAt<TSource>(this IEnumerable<TSource> source, Index
 
         public static TSource? ElementAtOrDefault<TSource>(this IEnumerable<TSource> source, int index)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (source is IPartition<TSource> partition)
-            {
-                return partition.TryGetElementAt(index, out bool _);
-            }
-
-            if (source is IList<TSource> list)
-            {
-                return (uint)index < (uint)list.Count ? list[index] : default;
-            }
-
-            TryGetElement(source, index, out TSource? element);
-            return element;
+            return TryGetElementAt(source, index, out _);
         }
 
         /// <summary>Returns the element at a specified index in a sequence or a default value if the index is out of range.</summary>
@@ -106,7 +94,7 @@ public static TSource ElementAt<TSource>(this IEnumerable<TSource> source, Index
         /// </remarks>
         public static TSource? ElementAtOrDefault<TSource>(this IEnumerable<TSource> source, Index index)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -125,9 +113,25 @@ public static TSource ElementAt<TSource>(this IEnumerable<TSource> source, Index
             return element;
         }
 
-        private static bool TryGetElement<TSource>(IEnumerable<TSource> source, int index, [MaybeNullWhen(false)] out TSource element)
+        private static TSource? TryGetElementAt<TSource>(this IEnumerable<TSource> source, int index, out bool found)
+        {
+            if (source is IList<TSource> list)
+            {
+                return (found = (uint)index < (uint)list.Count) ?
+                    list[index] :
+                    default;
+            }
+
+            return
+#if !OPTIMIZE_FOR_SIZE
+                source is Iterator<TSource> iterator ? iterator.TryGetElementAt(index, out found) :
+#endif
+                TryGetElementAtNonIterator(source, index, out found);
+        }
+
+        private static TSource? TryGetElementAtNonIterator<TSource>(IEnumerable<TSource> source, int index, out bool found)
         {
-            Debug.Assert(source != null);
+            Debug.Assert(source is not null);
 
             if (index >= 0)
             {
@@ -136,21 +140,21 @@ private static bool TryGetElement<TSource>(IEnumerable<TSource> source, int inde
                 {
                     if (index == 0)
                     {
-                        element = e.Current;
-                        return true;
+                        found = true;
+                        return e.Current;
                     }
 
                     index--;
                 }
             }
 
-            element = default;
-            return false;
+            found = false;
+            return default;
         }
 
         private static bool TryGetElementFromEnd<TSource>(IEnumerable<TSource> source, int indexFromEnd, [MaybeNullWhen(false)] out TSource element)
         {
-            Debug.Assert(source != null);
+            Debug.Assert(source is not null);
 
             if (indexFromEnd > 0)
             {
diff --git a/src/libraries/System.Linq/src/System/Linq/Except.cs b/src/libraries/System.Linq/src/System/Linq/Except.cs
index b2277ccc92ba..c8517d0b4a78 100644
--- a/src/libraries/System.Linq/src/System/Linq/Except.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Except.cs
@@ -9,12 +9,12 @@ public static partial class Enumerable
     {
         public static IEnumerable<TSource> Except<TSource>(this IEnumerable<TSource> first, IEnumerable<TSource> second)
         {
-            if (first == null)
+            if (first is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.first);
             }
 
-            if (second == null)
+            if (second is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.second);
             }
@@ -24,12 +24,12 @@ public static IEnumerable<TSource> Except<TSource>(this IEnumerable<TSource> fir
 
         public static IEnumerable<TSource> Except<TSource>(this IEnumerable<TSource> first, IEnumerable<TSource> second, IEqualityComparer<TSource>? comparer)
         {
-            if (first == null)
+            if (first is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.first);
             }
 
-            if (second == null)
+            if (second is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.second);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/First.cs b/src/libraries/System.Linq/src/System/Linq/First.cs
index 1c62f547d9a0..74ff81c2fec8 100644
--- a/src/libraries/System.Linq/src/System/Linq/First.cs
+++ b/src/libraries/System.Linq/src/System/Linq/First.cs
@@ -2,7 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
-using System.Diagnostics.CodeAnalysis;
+using System.Diagnostics;
 
 namespace System.Linq
 {
@@ -64,16 +64,20 @@ public static TSource FirstOrDefault<TSource>(this IEnumerable<TSource> source,
 
         private static TSource? TryGetFirst<TSource>(this IEnumerable<TSource> source, out bool found)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (source is IPartition<TSource> partition)
-            {
-                return partition.TryGetFirst(out found);
-            }
+            return
+#if !OPTIMIZE_FOR_SIZE
+                source is Iterator<TSource> iterator ? iterator.TryGetFirst(out found) :
+#endif
+                TryGetFirstNonIterator(source, out found);
+        }
 
+        private static TSource? TryGetFirstNonIterator<TSource>(IEnumerable<TSource> source, out bool found)
+        {
             if (source is IList<TSource> list)
             {
                 if (list.Count > 0)
@@ -100,12 +104,12 @@ public static TSource FirstOrDefault<TSource>(this IEnumerable<TSource> source,
 
         private static TSource? TryGetFirst<TSource>(this IEnumerable<TSource> source, Func<TSource, bool> predicate, out bool found)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/GroupJoin.cs b/src/libraries/System.Linq/src/System/Linq/GroupJoin.cs
index d52a2d3740d2..8bd37bda7c8b 100644
--- a/src/libraries/System.Linq/src/System/Linq/GroupJoin.cs
+++ b/src/libraries/System.Linq/src/System/Linq/GroupJoin.cs
@@ -12,27 +12,27 @@ public static IEnumerable<TResult> GroupJoin<TOuter, TInner, TKey, TResult>(this
 
         public static IEnumerable<TResult> GroupJoin<TOuter, TInner, TKey, TResult>(this IEnumerable<TOuter> outer, IEnumerable<TInner> inner, Func<TOuter, TKey> outerKeySelector, Func<TInner, TKey> innerKeySelector, Func<TOuter, IEnumerable<TInner>, TResult> resultSelector, IEqualityComparer<TKey>? comparer)
         {
-            if (outer == null)
+            if (outer is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.outer);
             }
 
-            if (inner == null)
+            if (inner is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.inner);
             }
 
-            if (outerKeySelector == null)
+            if (outerKeySelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.outerKeySelector);
             }
 
-            if (innerKeySelector == null)
+            if (innerKeySelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.innerKeySelector);
             }
 
-            if (resultSelector == null)
+            if (resultSelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.resultSelector);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/Grouping.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Grouping.SpeedOpt.cs
index 97e3b1152139..d081a09380f9 100644
--- a/src/libraries/System.Linq/src/System/Linq/Grouping.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Grouping.SpeedOpt.cs
@@ -5,63 +5,54 @@
 
 namespace System.Linq
 {
-    internal sealed partial class GroupedResultEnumerable<TSource, TKey, TElement, TResult> : IIListProvider<TResult>
+    public static partial class Enumerable
     {
-        public TResult[] ToArray() =>
-            Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer).ToArray(_resultSelector);
-
-        public List<TResult> ToList() =>
-            Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer).ToList(_resultSelector);
+        private sealed partial class GroupByResultIterator<TSource, TKey, TElement, TResult>
+        {
+            public override TResult[] ToArray() =>
+                Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer).ToArray(_resultSelector);
 
-        public int GetCount(bool onlyIfCheap) =>
-            onlyIfCheap ? -1 : Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer).Count;
-    }
+            public override List<TResult> ToList() =>
+                Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer).ToList(_resultSelector);
 
-    internal sealed partial class GroupedResultEnumerable<TSource, TKey, TResult> : IIListProvider<TResult>
-    {
-        public TResult[] ToArray() =>
-            Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer).ToArray(_resultSelector);
+            public override int GetCount(bool onlyIfCheap) =>
+                onlyIfCheap ? -1 : Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer).Count;
+        }
 
-        public List<TResult> ToList() =>
-            Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer).ToList(_resultSelector);
+        private sealed partial class GroupByResultIterator<TSource, TKey, TResult>
+        {
+            public override TResult[] ToArray() =>
+                Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer).ToArray(_resultSelector);
 
-        public int GetCount(bool onlyIfCheap) =>
-            onlyIfCheap ? -1 : Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer).Count;
-    }
+            public override List<TResult> ToList() =>
+                Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer).ToList(_resultSelector);
 
-    internal sealed partial class GroupedEnumerable<TSource, TKey, TElement> : IIListProvider<IGrouping<TKey, TElement>>
-    {
-        public IGrouping<TKey, TElement>[] ToArray()
-        {
-            IIListProvider<IGrouping<TKey, TElement>> lookup = Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer);
-            return lookup.ToArray();
+            public override int GetCount(bool onlyIfCheap) =>
+                onlyIfCheap ? -1 : Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer).Count;
         }
 
-        public List<IGrouping<TKey, TElement>> ToList()
+        private sealed partial class GroupByIterator<TSource, TKey, TElement>
         {
-            IIListProvider<IGrouping<TKey, TElement>> lookup = Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer);
-            return lookup.ToList();
-        }
+            public override IGrouping<TKey, TElement>[] ToArray() =>
+                Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer).ToArray();
 
-        public int GetCount(bool onlyIfCheap) =>
-            onlyIfCheap ? -1 : Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer).Count;
-    }
+            public override List<IGrouping<TKey, TElement>> ToList() =>
+                Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer).ToList();
 
-    internal sealed partial class GroupedEnumerable<TSource, TKey> : IIListProvider<IGrouping<TKey, TSource>>
-    {
-        public IGrouping<TKey, TSource>[] ToArray()
-        {
-            IIListProvider<IGrouping<TKey, TSource>> lookup = Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer);
-            return lookup.ToArray();
+            public override int GetCount(bool onlyIfCheap) =>
+                onlyIfCheap ? -1 : Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer).Count;
         }
 
-        public List<IGrouping<TKey, TSource>> ToList()
+        private sealed partial class GroupByIterator<TSource, TKey>
         {
-            IIListProvider<IGrouping<TKey, TSource>> lookup = Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer);
-            return lookup.ToList();
-        }
+            public override IGrouping<TKey, TSource>[] ToArray() =>
+                Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer).ToArray();
 
-        public int GetCount(bool onlyIfCheap) =>
-            onlyIfCheap ? -1 : Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer).Count;
+            public override List<IGrouping<TKey, TSource>> ToList() =>
+                Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer).ToList();
+
+            public override int GetCount(bool onlyIfCheap) =>
+                onlyIfCheap ? -1 : Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer).Count;
+        }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/Grouping.cs b/src/libraries/System.Linq/src/System/Linq/Grouping.cs
index 958642f624d0..6e19e4ab384e 100644
--- a/src/libraries/System.Linq/src/System/Linq/Grouping.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Grouping.cs
@@ -29,7 +29,7 @@ public static IEnumerable<IGrouping<TKey, TSource>> GroupBy<TSource, TKey>(this
                 return [];
             }
 
-            return new GroupedEnumerable<TSource, TKey>(source, keySelector, comparer);
+            return new GroupByIterator<TSource, TKey>(source, keySelector, comparer);
         }
 
         public static IEnumerable<IGrouping<TKey, TElement>> GroupBy<TSource, TKey, TElement>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TSource, TElement> elementSelector) =>
@@ -57,7 +57,7 @@ public static IEnumerable<IGrouping<TKey, TElement>> GroupBy<TSource, TKey, TEle
                 return [];
             }
 
-            return new GroupedEnumerable<TSource, TKey, TElement>(source, keySelector, elementSelector, comparer);
+            return new GroupByIterator<TSource, TKey, TElement>(source, keySelector, elementSelector, comparer);
         }
 
         public static IEnumerable<TResult> GroupBy<TSource, TKey, TResult>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TKey, IEnumerable<TSource>, TResult> resultSelector) =>
@@ -85,7 +85,7 @@ public static IEnumerable<TResult> GroupBy<TSource, TKey, TResult>(this IEnumera
                 return [];
             }
 
-            return new GroupedResultEnumerable<TSource, TKey, TResult>(source, keySelector, resultSelector, comparer);
+            return new GroupByResultIterator<TSource, TKey, TResult>(source, keySelector, resultSelector, comparer);
         }
 
         public static IEnumerable<TResult> GroupBy<TSource, TKey, TElement, TResult>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TSource, TElement> elementSelector, Func<TKey, IEnumerable<TElement>, TResult> resultSelector) =>
@@ -118,7 +118,229 @@ public static IEnumerable<TResult> GroupBy<TSource, TKey, TElement, TResult>(thi
                 return [];
             }
 
-            return new GroupedResultEnumerable<TSource, TKey, TElement, TResult>(source, keySelector, elementSelector, resultSelector, comparer);
+            return new GroupByResultIterator<TSource, TKey, TElement, TResult>(source, keySelector, elementSelector, resultSelector, comparer);
+        }
+
+        private sealed partial class GroupByResultIterator<TSource, TKey, TElement, TResult> : Iterator<TResult>
+        {
+            private readonly IEnumerable<TSource> _source;
+            private readonly Func<TSource, TKey> _keySelector;
+            private readonly Func<TSource, TElement> _elementSelector;
+            private readonly IEqualityComparer<TKey>? _comparer;
+            private readonly Func<TKey, IEnumerable<TElement>, TResult> _resultSelector;
+
+            private Lookup<TKey, TElement>? _lookup;
+            private Grouping<TKey, TElement>? _g;
+
+            public GroupByResultIterator(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TSource, TElement> elementSelector, Func<TKey, IEnumerable<TElement>, TResult> resultSelector, IEqualityComparer<TKey>? comparer)
+            {
+                _source = source;
+                _keySelector = keySelector;
+                _elementSelector = elementSelector;
+                _comparer = comparer;
+                _resultSelector = resultSelector;
+            }
+
+            private protected override Iterator<TResult> Clone() => new GroupByResultIterator<TSource, TKey, TElement, TResult>(_source, _keySelector, _elementSelector, _resultSelector, _comparer);
+
+            public override bool MoveNext()
+            {
+                switch (_state)
+                {
+                    case 1:
+                        _lookup = Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer);
+                        _g = _lookup._lastGrouping;
+                        if (_g is not null)
+                        {
+                            _state = 2;
+                            goto ValidItem;
+                        }
+                        break;
+
+                    case 2:
+                        Debug.Assert(_g is not null);
+                        Debug.Assert(_lookup is not null);
+                        if (_g != _lookup._lastGrouping)
+                        {
+                            goto ValidItem;
+                        }
+                        break;
+                }
+
+                Dispose();
+                return false;
+
+            ValidItem:
+                _g = _g._next;
+                Debug.Assert(_g is not null);
+                _g.Trim();
+                _current = _resultSelector(_g.Key, _g._elements);
+                return true;
+            }
+        }
+
+        private sealed partial class GroupByResultIterator<TSource, TKey, TResult> : Iterator<TResult>
+        {
+            private readonly IEnumerable<TSource> _source;
+            private readonly Func<TSource, TKey> _keySelector;
+            private readonly IEqualityComparer<TKey>? _comparer;
+            private readonly Func<TKey, IEnumerable<TSource>, TResult> _resultSelector;
+
+            private Lookup<TKey, TSource>? _lookup;
+            private Grouping<TKey, TSource>? _g;
+
+            public GroupByResultIterator(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TKey, IEnumerable<TSource>, TResult> resultSelector, IEqualityComparer<TKey>? comparer)
+            {
+                _source = source;
+                _keySelector = keySelector;
+                _resultSelector = resultSelector;
+                _comparer = comparer;
+            }
+
+            private protected override Iterator<TResult> Clone() => new GroupByResultIterator<TSource, TKey, TResult>(_source, _keySelector, _resultSelector, _comparer);
+
+            public override bool MoveNext()
+            {
+                switch (_state)
+                {
+                    case 1:
+                        _lookup = Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer);
+                        _g = _lookup._lastGrouping;
+                        if (_g is not null)
+                        {
+                            _state = 2;
+                            goto ValidItem;
+                        }
+                        break;
+
+                    case 2:
+                        Debug.Assert(_g is not null);
+                        Debug.Assert(_lookup is not null);
+                        if (_g != _lookup._lastGrouping)
+                        {
+                            goto ValidItem;
+                        }
+                        break;
+                }
+
+                Dispose();
+                return false;
+
+            ValidItem:
+                _g = _g._next;
+                Debug.Assert(_g is not null);
+                _g.Trim();
+                _current = _resultSelector(_g.Key, _g._elements);
+                return true;
+            }
+        }
+
+        private sealed partial class GroupByIterator<TSource, TKey, TElement> : Iterator<IGrouping<TKey, TElement>>
+        {
+            private readonly IEnumerable<TSource> _source;
+            private readonly Func<TSource, TKey> _keySelector;
+            private readonly Func<TSource, TElement> _elementSelector;
+            private readonly IEqualityComparer<TKey>? _comparer;
+
+            private Lookup<TKey, TElement>? _lookup;
+            private Grouping<TKey, TElement>? _g;
+
+            public GroupByIterator(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TSource, TElement> elementSelector, IEqualityComparer<TKey>? comparer)
+            {
+                _source = source;
+                _keySelector = keySelector;
+                _elementSelector = elementSelector;
+                _comparer = comparer;
+            }
+
+            private protected override Iterator<IGrouping<TKey, TElement>> Clone() => new GroupByIterator<TSource, TKey, TElement>(_source, _keySelector, _elementSelector, _comparer);
+
+            public override bool MoveNext()
+            {
+                switch (_state)
+                {
+                    case 1:
+                        _lookup = Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer);
+                        _g = _lookup._lastGrouping;
+                        if (_g is not null)
+                        {
+                            _state = 2;
+                            goto ValidItem;
+                        }
+                        break;
+
+                    case 2:
+                        Debug.Assert(_g is not null);
+                        Debug.Assert(_lookup is not null);
+                        if (_g != _lookup._lastGrouping)
+                        {
+                            goto ValidItem;
+                        }
+                        break;
+                }
+
+                Dispose();
+                return false;
+
+            ValidItem:
+                _g = _g._next;
+                Debug.Assert(_g is not null);
+                _current = _g;
+                return true;
+            }
+        }
+
+        private sealed partial class GroupByIterator<TSource, TKey> : Iterator<IGrouping<TKey, TSource>>
+        {
+            private readonly IEnumerable<TSource> _source;
+            private readonly Func<TSource, TKey> _keySelector;
+            private readonly IEqualityComparer<TKey>? _comparer;
+
+            private Lookup<TKey, TSource>? _lookup;
+            private Grouping<TKey, TSource>? _g;
+
+            public GroupByIterator(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IEqualityComparer<TKey>? comparer)
+            {
+                _source = source;
+                _keySelector = keySelector;
+                _comparer = comparer;
+            }
+
+            private protected override Iterator<IGrouping<TKey, TSource>> Clone() => new GroupByIterator<TSource, TKey>(_source, _keySelector, _comparer);
+
+            public override bool MoveNext()
+            {
+                switch (_state)
+                {
+                    case 1:
+                        _lookup = Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer);
+                        _g = _lookup._lastGrouping;
+                        if (_g is not null)
+                        {
+                            _state = 2;
+                            goto ValidItem;
+                        }
+                        break;
+
+                    case 2:
+                        Debug.Assert(_g is not null);
+                        Debug.Assert(_lookup is not null);
+                        if (_g != _lookup._lastGrouping)
+                        {
+                            goto ValidItem;
+                        }
+                        break;
+                }
+
+                Dispose();
+                return false;
+
+                ValidItem:
+                _g = _g._next;
+                Debug.Assert(_g is not null);
+                _current = _g;
+                return true;
+            }
         }
     }
 
@@ -127,15 +349,9 @@ public interface IGrouping<out TKey, out TElement> : IEnumerable<TElement>
         TKey Key { get; }
     }
 
-    // It is (unfortunately) common to databind directly to Grouping.Key.
-    // Because of this, we have to declare this internal type public so that we
-    // can mark the Key property for public reflection.
-    //
-    // To limit the damage, the toolchain makes this type appear in a hidden assembly.
-    // (This is also why it is no longer a nested type of Lookup<,>).
     [DebuggerDisplay("Key = {Key}")]
     [DebuggerTypeProxy(typeof(SystemLinq_GroupingDebugView<,>))]
-    public class Grouping<TKey, TElement> : IGrouping<TKey, TElement>, IList<TElement>
+    internal sealed class Grouping<TKey, TElement> : IGrouping<TKey, TElement>, IList<TElement>
     {
         internal readonly TKey _key;
         internal readonly int _hashCode;
@@ -172,16 +388,12 @@ internal void Trim()
 
         public IEnumerator<TElement> GetEnumerator()
         {
-            for (int i = 0; i < _count; i++)
-            {
-                yield return _elements[i];
-            }
+            Debug.Assert(_count > 0, "A grouping should only have been created if an element was being added to it.");
+            return new PartialArrayEnumerator<TElement>(_elements, _count);
         }
 
         IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
 
-        // DDB195907: implement IGrouping<>.Key implicitly
-        // so that WPF binding works on this property.
         public TKey Key => _key;
 
         int ICollection<TElement>.Count => _count;
@@ -197,11 +409,7 @@ public IEnumerator<TElement> GetEnumerator()
         void ICollection<TElement>.CopyTo(TElement[] array, int arrayIndex) =>
             Array.Copy(_elements, 0, array, arrayIndex, _count);
 
-        bool ICollection<TElement>.Remove(TElement item)
-        {
-            ThrowHelper.ThrowNotSupportedException();
-            return false;
-        }
+        bool ICollection<TElement>.Remove(TElement item) => ThrowHelper.ThrowNotSupportedException_Boolean();
 
         int IList<TElement>.IndexOf(TElement item) => Array.IndexOf(_elements, item, 0, _count);
 
@@ -213,7 +421,7 @@ TElement IList<TElement>.this[int index]
         {
             get
             {
-                if (index < 0 || index >= _count)
+                if ((uint)index >= (uint)_count)
                 {
                     ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index);
                 }
@@ -221,100 +429,7 @@ TElement IList<TElement>.this[int index]
                 return _elements[index];
             }
 
-            set
-            {
-                ThrowHelper.ThrowNotSupportedException();
-            }
+            set => ThrowHelper.ThrowNotSupportedException();
         }
     }
-
-    internal sealed partial class GroupedResultEnumerable<TSource, TKey, TElement, TResult> : IEnumerable<TResult>
-    {
-        private readonly IEnumerable<TSource> _source;
-        private readonly Func<TSource, TKey> _keySelector;
-        private readonly Func<TSource, TElement> _elementSelector;
-        private readonly IEqualityComparer<TKey>? _comparer;
-        private readonly Func<TKey, IEnumerable<TElement>, TResult> _resultSelector;
-
-        public GroupedResultEnumerable(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TSource, TElement> elementSelector, Func<TKey, IEnumerable<TElement>, TResult> resultSelector, IEqualityComparer<TKey>? comparer)
-        {
-            _source = source;
-            _keySelector = keySelector;
-            _elementSelector = elementSelector;
-            _comparer = comparer;
-            _resultSelector = resultSelector;
-        }
-
-        public IEnumerator<TResult> GetEnumerator()
-        {
-            Lookup<TKey, TElement> lookup = Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer);
-            return lookup.ApplyResultSelector(_resultSelector).GetEnumerator();
-        }
-
-        IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
-    }
-
-    internal sealed partial class GroupedResultEnumerable<TSource, TKey, TResult> : IEnumerable<TResult>
-    {
-        private readonly IEnumerable<TSource> _source;
-        private readonly Func<TSource, TKey> _keySelector;
-        private readonly IEqualityComparer<TKey>? _comparer;
-        private readonly Func<TKey, IEnumerable<TSource>, TResult> _resultSelector;
-
-        public GroupedResultEnumerable(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TKey, IEnumerable<TSource>, TResult> resultSelector, IEqualityComparer<TKey>? comparer)
-        {
-            _source = source;
-            _keySelector = keySelector;
-            _resultSelector = resultSelector;
-            _comparer = comparer;
-        }
-
-        public IEnumerator<TResult> GetEnumerator()
-        {
-            Lookup<TKey, TSource> lookup = Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer);
-            return lookup.ApplyResultSelector(_resultSelector).GetEnumerator();
-        }
-
-        IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
-    }
-
-    internal sealed partial class GroupedEnumerable<TSource, TKey, TElement> : IEnumerable<IGrouping<TKey, TElement>>
-    {
-        private readonly IEnumerable<TSource> _source;
-        private readonly Func<TSource, TKey> _keySelector;
-        private readonly Func<TSource, TElement> _elementSelector;
-        private readonly IEqualityComparer<TKey>? _comparer;
-
-        public GroupedEnumerable(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TSource, TElement> elementSelector, IEqualityComparer<TKey>? comparer)
-        {
-            _source = source;
-            _keySelector = keySelector;
-            _elementSelector = elementSelector;
-            _comparer = comparer;
-        }
-
-        public IEnumerator<IGrouping<TKey, TElement>> GetEnumerator() =>
-            Lookup<TKey, TElement>.Create(_source, _keySelector, _elementSelector, _comparer).GetEnumerator();
-
-        IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
-    }
-
-    internal sealed partial class GroupedEnumerable<TSource, TKey> : IEnumerable<IGrouping<TKey, TSource>>
-    {
-        private readonly IEnumerable<TSource> _source;
-        private readonly Func<TSource, TKey> _keySelector;
-        private readonly IEqualityComparer<TKey>? _comparer;
-
-        public GroupedEnumerable(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IEqualityComparer<TKey>? comparer)
-        {
-            _source = source;
-            _keySelector = keySelector;
-            _comparer = comparer;
-        }
-
-        public IEnumerator<IGrouping<TKey, TSource>> GetEnumerator() =>
-            Lookup<TKey, TSource>.Create(_source, _keySelector, _comparer).GetEnumerator();
-
-        IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
-    }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/IIListProvider.cs b/src/libraries/System.Linq/src/System/Linq/IIListProvider.cs
deleted file mode 100644
index 9eefc6e61e0c..000000000000
--- a/src/libraries/System.Linq/src/System/Linq/IIListProvider.cs
+++ /dev/null
@@ -1,33 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Collections.Generic;
-
-namespace System.Linq
-{
-    /// <summary>
-    /// An iterator that can produce an array or <see cref="List{TElement}"/> through an optimized path.
-    /// </summary>
-    internal interface IIListProvider<TElement> : IEnumerable<TElement>
-    {
-        /// <summary>
-        /// Produce an array of the sequence through an optimized path.
-        /// </summary>
-        /// <returns>The array.</returns>
-        TElement[] ToArray();
-
-        /// <summary>
-        /// Produce a <see cref="List{TElement}"/> of the sequence through an optimized path.
-        /// </summary>
-        /// <returns>The <see cref="List{TElement}"/>.</returns>
-        List<TElement> ToList();
-
-        /// <summary>
-        /// Returns the count of elements in the sequence.
-        /// </summary>
-        /// <param name="onlyIfCheap">If true then the count should only be calculated if doing
-        /// so is quick (sure or likely to be constant time), otherwise -1 should be returned.</param>
-        /// <returns>The number of elements.</returns>
-        int GetCount(bool onlyIfCheap);
-    }
-}
diff --git a/src/libraries/System.Linq/src/System/Linq/IPartition.cs b/src/libraries/System.Linq/src/System/Linq/IPartition.cs
deleted file mode 100644
index 86db1921b12f..000000000000
--- a/src/libraries/System.Linq/src/System/Linq/IPartition.cs
+++ /dev/null
@@ -1,47 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-namespace System.Linq
-{
-    /// <summary>
-    /// An iterator that supports random access and can produce a partial sequence of its items through an optimized path.
-    /// </summary>
-    internal interface IPartition<TElement> : IIListProvider<TElement>
-    {
-        /// <summary>
-        /// Creates a new partition that skips the specified number of elements from this sequence.
-        /// </summary>
-        /// <param name="count">The number of elements to skip.</param>
-        /// <returns>An <see cref="IPartition{TElement}"/> with the first <paramref name="count"/> items removed, or null if known empty.</returns>
-        IPartition<TElement>? Skip(int count);
-
-        /// <summary>
-        /// Creates a new partition that takes the specified number of elements from this sequence.
-        /// </summary>
-        /// <param name="count">The number of elements to take.</param>
-        /// <returns>An <see cref="IPartition{TElement}"/> with only the first <paramref name="count"/> items, or null if known empty.</returns>
-        IPartition<TElement>? Take(int count);
-
-        /// <summary>
-        /// Gets the item associated with a 0-based index in this sequence.
-        /// </summary>
-        /// <param name="index">The 0-based index to access.</param>
-        /// <param name="found"><c>true</c> if the sequence contains an element at that index, <c>false</c> otherwise.</param>
-        /// <returns>The element if <paramref name="found"/> is <c>true</c>, otherwise, the default value of <typeparamref name="TElement"/>.</returns>
-        TElement? TryGetElementAt(int index, out bool found);
-
-        /// <summary>
-        /// Gets the first item in this sequence.
-        /// </summary>
-        /// <param name="found"><c>true</c> if the sequence contains an element, <c>false</c> otherwise.</param>
-        /// <returns>The element if <paramref name="found"/> is <c>true</c>, otherwise, the default value of <typeparamref name="TElement"/>.</returns>
-        TElement? TryGetFirst(out bool found);
-
-        /// <summary>
-        /// Gets the last item in this sequence.
-        /// </summary>
-        /// <param name="found"><c>true</c> if the sequence contains an element, <c>false</c> otherwise.</param>
-        /// <returns>The element if <paramref name="found"/> is <c>true</c>, otherwise, the default value of <typeparamref name="TElement"/>.</returns>
-        TElement? TryGetLast(out bool found);
-    }
-}
diff --git a/src/libraries/System.Linq/src/System/Linq/Index.cs b/src/libraries/System.Linq/src/System/Linq/Index.cs
index 1390a764c78d..49339b03d1ad 100644
--- a/src/libraries/System.Linq/src/System/Linq/Index.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Index.cs
@@ -13,7 +13,7 @@ public static partial class Enumerable
         /// <exception cref="ArgumentNullException"><paramref name="source" /> is <see langword="null" />.</exception>
         public static IEnumerable<(int Index, TSource Item)> Index<TSource>(this IEnumerable<TSource> source)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/Intersect.cs b/src/libraries/System.Linq/src/System/Linq/Intersect.cs
index b8db27d7a847..15d1179eaeb8 100644
--- a/src/libraries/System.Linq/src/System/Linq/Intersect.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Intersect.cs
@@ -11,12 +11,12 @@ public static partial class Enumerable
 
         public static IEnumerable<TSource> Intersect<TSource>(this IEnumerable<TSource> first, IEnumerable<TSource> second, IEqualityComparer<TSource>? comparer)
         {
-            if (first == null)
+            if (first is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.first);
             }
 
-            if (second == null)
+            if (second is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.second);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/Iterator.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Iterator.SpeedOpt.cs
new file mode 100644
index 000000000000..dfafa98cb275
--- /dev/null
+++ b/src/libraries/System.Linq/src/System/Linq/Iterator.SpeedOpt.cs
@@ -0,0 +1,71 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+
+namespace System.Linq
+{
+    public static partial class Enumerable
+    {
+        private abstract partial class Iterator<TSource>
+        {
+            /// <summary>
+            /// Produce an array of the sequence through an optimized path.
+            /// </summary>
+            /// <returns>The array.</returns>
+            public abstract TSource[] ToArray();
+
+            /// <summary>
+            /// Produce a <see cref="List{TSource}"/> of the sequence through an optimized path.
+            /// </summary>
+            /// <returns>The <see cref="List{TSource}"/>.</returns>
+            public abstract List<TSource> ToList();
+
+            /// <summary>
+            /// Returns the count of elements in the sequence.
+            /// </summary>
+            /// <param name="onlyIfCheap">If true then the count should only be calculated if doing
+            /// so is quick (sure or likely to be constant time), otherwise -1 should be returned.</param>
+            /// <returns>The number of elements.</returns>
+            public abstract int GetCount(bool onlyIfCheap);
+
+            /// <summary>
+            /// Creates a new iterator that skips the specified number of elements from this sequence.
+            /// </summary>
+            /// <param name="count">The number of elements to skip.</param>
+            /// <returns>An <see cref="Iterator{TSource}"/> with the first <paramref name="count"/> items removed, or null if known empty.</returns>
+            public virtual Iterator<TSource>? Skip(int count) => new IEnumerableSkipTakeIterator<TSource>(this, count, -1);
+
+            /// <summary>
+            /// Creates a new iterator that takes the specified number of elements from this sequence.
+            /// </summary>
+            /// <param name="count">The number of elements to take.</param>
+            /// <returns>An <see cref="Iterator{TSource}"/> with only the first <paramref name="count"/> items, or null if known empty.</returns>
+            public virtual Iterator<TSource>? Take(int count) => new IEnumerableSkipTakeIterator<TSource>(this, 0, count - 1);
+
+            /// <summary>
+            /// Gets the item associated with a 0-based index in this sequence.
+            /// </summary>
+            /// <param name="index">The 0-based index to access.</param>
+            /// <param name="found"><c>true</c> if the sequence contains an element at that index, <c>false</c> otherwise.</param>
+            /// <returns>The element if <paramref name="found"/> is <c>true</c>, otherwise, the default value of <typeparamref name="TSource"/>.</returns>
+            public virtual TSource? TryGetElementAt(int index, out bool found) =>
+                index == 0 ? TryGetFirst(out found) :
+                TryGetElementAtNonIterator(this, index, out found);
+
+            /// <summary>
+            /// Gets the first item in this sequence.
+            /// </summary>
+            /// <param name="found"><c>true</c> if the sequence contains an element, <c>false</c> otherwise.</param>
+            /// <returns>The element if <paramref name="found"/> is <c>true</c>, otherwise, the default value of <typeparamref name="TSource"/>.</returns>
+            public virtual TSource? TryGetFirst(out bool found) => TryGetFirstNonIterator(this, out found);
+
+            /// <summary>
+            /// Gets the last item in this sequence.
+            /// </summary>
+            /// <param name="found"><c>true</c> if the sequence contains an element, <c>false</c> otherwise.</param>
+            /// <returns>The element if <paramref name="found"/> is <c>true</c>, otherwise, the default value of <typeparamref name="TSource"/>.</returns>
+            public virtual TSource? TryGetLast(out bool found) => TryGetLastNonIterator(this, out found);
+        }
+    }
+}
diff --git a/src/libraries/System.Linq/src/System/Linq/Iterator.cs b/src/libraries/System.Linq/src/System/Linq/Iterator.cs
index b9e8c7b58c05..8d5982eb0b9b 100644
--- a/src/libraries/System.Linq/src/System/Linq/Iterator.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Iterator.cs
@@ -28,19 +28,12 @@ public static partial class Enumerable
         /// </description></item>
         /// </list>
         /// </remarks>
-        internal abstract class Iterator<TSource> : IEnumerable<TSource>, IEnumerator<TSource>
+        private abstract partial class Iterator<TSource> : IEnumerable<TSource>, IEnumerator<TSource>
         {
-            private readonly int _threadId;
-            internal int _state;
-            internal TSource _current = default!;
+            private readonly int _threadId = Environment.CurrentManagedThreadId;
 
-            /// <summary>
-            /// Initializes a new instance of the <see cref="Iterator{TSource}"/> class.
-            /// </summary>
-            protected Iterator()
-            {
-                _threadId = Environment.CurrentManagedThreadId;
-            }
+            private protected int _state;
+            private protected TSource _current = default!;
 
             /// <summary>
             /// The item currently yielded by this iterator.
@@ -53,7 +46,7 @@ protected Iterator()
             /// <remarks>
             /// This method is called if <see cref="GetEnumerator"/> is called more than once.
             /// </remarks>
-            public abstract Iterator<TSource> Clone();
+            private protected abstract Iterator<TSource> Clone();
 
             /// <summary>
             /// Puts this iterator in a state whereby no further enumeration will take place.
@@ -76,7 +69,7 @@ public virtual void Dispose()
             /// that created this iterator, the result will be this iterator. Otherwise, the result
             /// will be a shallow copy of this iterator.
             /// </remarks>
-            public IEnumerator<TSource> GetEnumerator()
+            public Iterator<TSource> GetEnumerator()
             {
                 Iterator<TSource> enumerator = _state == 0 && _threadId == Environment.CurrentManagedThreadId ? this : Clone();
                 enumerator._state = 1;
@@ -94,22 +87,24 @@ public IEnumerator<TSource> GetEnumerator()
             /// </summary>
             /// <typeparam name="TResult">The type of the mapped items.</typeparam>
             /// <param name="selector">The selector used to map each item.</param>
-            public virtual IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> selector)
-            {
-                return new SelectEnumerableIterator<TSource, TResult>(this, selector);
-            }
+            public virtual IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> selector) =>
+#if OPTIMIZE_FOR_SIZE
+                new IEnumerableSelectIterator<TSource, TResult>(this, selector);
+#else
+                new IteratorSelectIterator<TSource, TResult>(this, selector);
+#endif
+
 
             /// <summary>
             /// Returns an enumerable that filters each item in this iterator based on a predicate.
             /// </summary>
             /// <param name="predicate">The predicate used to filter each item.</param>
-            public virtual IEnumerable<TSource> Where(Func<TSource, bool> predicate)
-            {
-                return new WhereEnumerableIterator<TSource>(this, predicate);
-            }
+            public virtual IEnumerable<TSource> Where(Func<TSource, bool> predicate) =>
+                new IEnumerableWhereIterator<TSource>(this, predicate);
 
             object? IEnumerator.Current => Current;
 
+            IEnumerator<TSource> IEnumerable<TSource>.GetEnumerator() => GetEnumerator();
             IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
 
             void IEnumerator.Reset() => ThrowHelper.ThrowNotSupportedException();
diff --git a/src/libraries/System.Linq/src/System/Linq/Join.cs b/src/libraries/System.Linq/src/System/Linq/Join.cs
index b1c56e01725a..531a8518e888 100644
--- a/src/libraries/System.Linq/src/System/Linq/Join.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Join.cs
@@ -12,27 +12,27 @@ public static IEnumerable<TResult> Join<TOuter, TInner, TKey, TResult>(this IEnu
 
         public static IEnumerable<TResult> Join<TOuter, TInner, TKey, TResult>(this IEnumerable<TOuter> outer, IEnumerable<TInner> inner, Func<TOuter, TKey> outerKeySelector, Func<TInner, TKey> innerKeySelector, Func<TOuter, TInner, TResult> resultSelector, IEqualityComparer<TKey>? comparer)
         {
-            if (outer == null)
+            if (outer is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.outer);
             }
 
-            if (inner == null)
+            if (inner is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.inner);
             }
 
-            if (outerKeySelector == null)
+            if (outerKeySelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.outerKeySelector);
             }
 
-            if (innerKeySelector == null)
+            if (innerKeySelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.innerKeySelector);
             }
 
-            if (resultSelector == null)
+            if (resultSelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.resultSelector);
             }
@@ -58,7 +58,7 @@ private static IEnumerable<TResult> JoinIterator<TOuter, TInner, TKey, TResult>(
                         {
                             TOuter item = e.Current;
                             Grouping<TKey, TInner>? g = lookup.GetGrouping(outerKeySelector(item), create: false);
-                            if (g != null)
+                            if (g is not null)
                             {
                                 int count = g._count;
                                 TInner[] elements = g._elements;
diff --git a/src/libraries/System.Linq/src/System/Linq/Last.cs b/src/libraries/System.Linq/src/System/Linq/Last.cs
index 568f0d8670fa..007ee1659c3f 100644
--- a/src/libraries/System.Linq/src/System/Linq/Last.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Last.cs
@@ -63,16 +63,20 @@ public static TSource LastOrDefault<TSource>(this IEnumerable<TSource> source, F
 
         private static TSource? TryGetLast<TSource>(this IEnumerable<TSource> source, out bool found)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (source is IPartition<TSource> partition)
-            {
-                return partition.TryGetLast(out found);
-            }
+            return
+#if !OPTIMIZE_FOR_SIZE
+                source is Iterator<TSource> iterator ? iterator.TryGetLast(out found) :
+#endif
+                TryGetLastNonIterator(source, out found);
+        }
 
+        private static TSource? TryGetLastNonIterator<TSource>(IEnumerable<TSource> source, out bool found)
+        {
             if (source is IList<TSource> list)
             {
                 int count = list.Count;
@@ -107,17 +111,17 @@ public static TSource LastOrDefault<TSource>(this IEnumerable<TSource> source, F
 
         private static TSource? TryGetLast<TSource>(this IEnumerable<TSource> source, Func<TSource, bool> predicate, out bool found)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
 
-            if (source is OrderedEnumerable<TSource> ordered)
+            if (source is OrderedIterator<TSource> ordered)
             {
                 return ordered.TryGetLast(predicate, out found);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/Lookup.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Lookup.SpeedOpt.cs
index 16a9d7e0a3f4..42e642067649 100644
--- a/src/libraries/System.Linq/src/System/Linq/Lookup.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Lookup.SpeedOpt.cs
@@ -6,34 +6,19 @@
 
 namespace System.Linq
 {
-    public partial class Lookup<TKey, TElement> : IIListProvider<IGrouping<TKey, TElement>>
+    public partial class Lookup<TKey, TElement>
     {
-        IGrouping<TKey, TElement>[] IIListProvider<IGrouping<TKey, TElement>>.ToArray()
-        {
-            IGrouping<TKey, TElement>[] array;
-            if (_count > 0)
-            {
-                array = new IGrouping<TKey, TElement>[_count];
-                Fill(_lastGrouping, array);
-            }
-            else
-            {
-                array = [];
-            }
-            return array;
-        }
-
         internal TResult[] ToArray<TResult>(Func<TKey, IEnumerable<TElement>, TResult> resultSelector)
         {
             TResult[] array = new TResult[_count];
             int index = 0;
             Grouping<TKey, TElement>? g = _lastGrouping;
-            if (g != null)
+            if (g is not null)
             {
                 do
                 {
                     g = g._next;
-                    Debug.Assert(g != null);
+                    Debug.Assert(g is not null);
 
                     g.Trim();
                     array[index] = resultSelector(g._key, g._elements);
@@ -44,38 +29,5 @@ internal TResult[] ToArray<TResult>(Func<TKey, IEnumerable<TElement>, TResult> r
 
             return array;
         }
-
-        List<IGrouping<TKey, TElement>> IIListProvider<IGrouping<TKey, TElement>>.ToList()
-        {
-            var list = new List<IGrouping<TKey, TElement>>(_count);
-            if (_count > 0)
-            {
-                Fill(_lastGrouping, Enumerable.SetCountAndGetSpan(list, _count));
-            }
-
-            return list;
-        }
-
-        private static void Fill(Grouping<TKey, TElement>? lastGrouping, Span<IGrouping<TKey, TElement>> results)
-        {
-            int index = 0;
-            Grouping<TKey, TElement>? g = lastGrouping;
-            if (g != null)
-            {
-                do
-                {
-                    g = g._next;
-                    Debug.Assert(g != null);
-
-                    results[index] = g;
-                    ++index;
-                }
-                while (g != lastGrouping);
-            }
-
-            Debug.Assert(index == results.Length, "All list elements were not initialized.");
-        }
-
-        int IIListProvider<IGrouping<TKey, TElement>>.GetCount(bool onlyIfCheap) => _count;
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/Lookup.cs b/src/libraries/System.Linq/src/System/Linq/Lookup.cs
index 055bf6c61018..7669aaaef7e1 100644
--- a/src/libraries/System.Linq/src/System/Linq/Lookup.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Lookup.cs
@@ -14,12 +14,12 @@ public static ILookup<TKey, TSource> ToLookup<TSource, TKey>(this IEnumerable<TS
 
         public static ILookup<TKey, TSource> ToLookup<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IEqualityComparer<TKey>? comparer)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (keySelector == null)
+            if (keySelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.keySelector);
             }
@@ -37,17 +37,17 @@ public static ILookup<TKey, TElement> ToLookup<TSource, TKey, TElement>(this IEn
 
         public static ILookup<TKey, TElement> ToLookup<TSource, TKey, TElement>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TSource, TElement> elementSelector, IEqualityComparer<TKey>? comparer)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (keySelector == null)
+            if (keySelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.keySelector);
             }
 
-            if (elementSelector == null)
+            if (elementSelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.elementSelector);
             }
@@ -76,16 +76,16 @@ public partial class Lookup<TKey, TElement> : ILookup<TKey, TElement>
     {
         private readonly IEqualityComparer<TKey> _comparer;
         private Grouping<TKey, TElement>[] _groupings;
-        private Grouping<TKey, TElement>? _lastGrouping;
+        internal Grouping<TKey, TElement>? _lastGrouping;
         private int _count;
 
         internal static Lookup<TKey, TElement> Create<TSource>(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TSource, TElement> elementSelector, IEqualityComparer<TKey>? comparer)
         {
-            Debug.Assert(source != null);
-            Debug.Assert(keySelector != null);
-            Debug.Assert(elementSelector != null);
+            Debug.Assert(source is not null);
+            Debug.Assert(keySelector is not null);
+            Debug.Assert(elementSelector is not null);
 
-            Lookup<TKey, TElement> lookup = new Lookup<TKey, TElement>(comparer);
+            var lookup = new CollectionLookup<TKey, TElement>(comparer);
             foreach (TSource item in source)
             {
                 lookup.GetGrouping(keySelector(item), create: true)!.Add(elementSelector(item));
@@ -96,10 +96,10 @@ internal static Lookup<TKey, TElement> Create<TSource>(IEnumerable<TSource> sour
 
         internal static Lookup<TKey, TElement> Create(IEnumerable<TElement> source, Func<TElement, TKey> keySelector, IEqualityComparer<TKey>? comparer)
         {
-            Debug.Assert(source != null);
-            Debug.Assert(keySelector != null);
+            Debug.Assert(source is not null);
+            Debug.Assert(keySelector is not null);
 
-            Lookup<TKey, TElement> lookup = new Lookup<TKey, TElement>(comparer);
+            var lookup = new CollectionLookup<TKey, TElement>(comparer);
             foreach (TElement item in source)
             {
                 lookup.GetGrouping(keySelector(item), create: true)!.Add(item);
@@ -110,11 +110,11 @@ internal static Lookup<TKey, TElement> Create(IEnumerable<TElement> source, Func
 
         internal static Lookup<TKey, TElement> CreateForJoin(IEnumerable<TElement> source, Func<TElement, TKey> keySelector, IEqualityComparer<TKey>? comparer)
         {
-            Lookup<TKey, TElement> lookup = new Lookup<TKey, TElement>(comparer);
+            var lookup = new CollectionLookup<TKey, TElement>(comparer);
             foreach (TElement item in source)
             {
                 TKey key = keySelector(item);
-                if (key != null)
+                if (key is not null)
                 {
                     lookup.GetGrouping(key, create: true)!.Add(item);
                 }
@@ -123,7 +123,7 @@ internal static Lookup<TKey, TElement> CreateForJoin(IEnumerable<TElement> sourc
             return lookup;
         }
 
-        private Lookup(IEqualityComparer<TKey>? comparer)
+        private protected Lookup(IEqualityComparer<TKey>? comparer)
         {
             _comparer = comparer ?? EqualityComparer<TKey>.Default;
             _groupings = new Grouping<TKey, TElement>[7];
@@ -133,18 +133,18 @@ private Lookup(IEqualityComparer<TKey>? comparer)
 
         public IEnumerable<TElement> this[TKey key] => GetGrouping(key, create: false) ?? Enumerable.Empty<TElement>();
 
-        public bool Contains(TKey key) => GetGrouping(key, create: false) != null;
+        public bool Contains(TKey key) => GetGrouping(key, create: false) is not null;
 
         public IEnumerator<IGrouping<TKey, TElement>> GetEnumerator()
         {
             Grouping<TKey, TElement>? g = _lastGrouping;
-            if (g != null)
+            if (g is not null)
             {
                 do
                 {
                     g = g._next;
 
-                    Debug.Assert(g != null);
+                    Debug.Assert(g is not null);
                     yield return g;
                 }
                 while (g != _lastGrouping);
@@ -155,7 +155,7 @@ internal List<TResult> ToList<TResult>(Func<TKey, IEnumerable<TElement>, TResult
         {
             List<TResult> list = new List<TResult>(_count);
             Grouping<TKey, TElement>? g = _lastGrouping;
-            if (g != null)
+            if (g is not null)
             {
                 Span<TResult> span = Enumerable.SetCountAndGetSpan(list, _count);
                 int index = 0;
@@ -163,7 +163,7 @@ internal List<TResult> ToList<TResult>(Func<TKey, IEnumerable<TElement>, TResult
                 {
                     g = g._next;
 
-                    Debug.Assert(g != null);
+                    Debug.Assert(g is not null);
                     g.Trim();
                     span[index] = resultSelector(g._key, g._elements);
                     ++index;
@@ -179,13 +179,13 @@ internal List<TResult> ToList<TResult>(Func<TKey, IEnumerable<TElement>, TResult
         public IEnumerable<TResult> ApplyResultSelector<TResult>(Func<TKey, IEnumerable<TElement>, TResult> resultSelector)
         {
             Grouping<TKey, TElement>? g = _lastGrouping;
-            if (g != null)
+            if (g is not null)
             {
                 do
                 {
                     g = g._next;
 
-                    Debug.Assert(g != null);
+                    Debug.Assert(g is not null);
                     g.Trim();
                     yield return resultSelector(g._key, g._elements);
                 }
@@ -198,13 +198,13 @@ public IEnumerable<TResult> ApplyResultSelector<TResult>(Func<TKey, IEnumerable<
         private int InternalGetHashCode(TKey key)
         {
             // Handle comparer implementations that throw when passed null
-            return (key == null) ? 0 : _comparer.GetHashCode(key) & 0x7FFFFFFF;
+            return (key is null) ? 0 : _comparer.GetHashCode(key) & 0x7FFFFFFF;
         }
 
         internal Grouping<TKey, TElement>? GetGrouping(TKey key, bool create)
         {
             int hashCode = InternalGetHashCode(key);
-            for (Grouping<TKey, TElement>? g = _groupings[(uint)hashCode % _groupings.Length]; g != null; g = g._hashNext)
+            for (Grouping<TKey, TElement>? g = _groupings[(uint)hashCode % _groupings.Length]; g is not null; g = g._hashNext)
             {
                 if (g._hashCode == hashCode && _comparer.Equals(g._key, key))
                 {
@@ -223,7 +223,7 @@ private int InternalGetHashCode(TKey key)
                 Grouping<TKey, TElement> g = new Grouping<TKey, TElement>(key, hashCode);
                 g._hashNext = _groupings[index];
                 _groupings[index] = g;
-                if (_lastGrouping == null)
+                if (_lastGrouping is null)
                 {
                     g._next = g;
                 }
@@ -259,16 +259,68 @@ private void Resize()
         }
     }
 
+    internal sealed class CollectionLookup<TKey, TElement> : Lookup<TKey, TElement>, ICollection<IGrouping<TKey, TElement>>, IReadOnlyCollection<IGrouping<TKey, TElement>>
+    {
+        internal CollectionLookup(IEqualityComparer<TKey>? comparer) : base(comparer) { }
+
+        void ICollection<IGrouping<TKey, TElement>>.CopyTo(IGrouping<TKey, TElement>[] array, int arrayIndex)
+        {
+            ArgumentNullException.ThrowIfNull(array);
+            ArgumentOutOfRangeException.ThrowIfNegative(arrayIndex);
+            ArgumentOutOfRangeException.ThrowIfGreaterThan(arrayIndex, array.Length);
+            ArgumentOutOfRangeException.ThrowIfLessThan(array.Length - arrayIndex, Count, nameof(arrayIndex));
+
+            Grouping<TKey, TElement>? g = _lastGrouping;
+            if (g is not null)
+            {
+                do
+                {
+                    g = g._next;
+                    Debug.Assert(g is not null);
+
+                    array[arrayIndex] = g;
+                    ++arrayIndex;
+                }
+                while (g != _lastGrouping);
+            }
+        }
+
+        bool ICollection<IGrouping<TKey, TElement>>.Contains(IGrouping<TKey, TElement> item)
+        {
+            ArgumentNullException.ThrowIfNull(item);
+            return GetGrouping(item.Key, create: false) is { } grouping && grouping == item;
+        }
+
+        bool ICollection<IGrouping<TKey, TElement>>.IsReadOnly => true;
+        void ICollection<IGrouping<TKey, TElement>>.Add(IGrouping<TKey, TElement> item) => throw new NotSupportedException();
+        void ICollection<IGrouping<TKey, TElement>>.Clear() => throw new NotSupportedException();
+        bool ICollection<IGrouping<TKey, TElement>>.Remove(IGrouping<TKey, TElement> item) => throw new NotSupportedException();
+    }
+
     [DebuggerDisplay("Count = 0")]
     [DebuggerTypeProxy(typeof(SystemLinq_LookupDebugView<,>))]
-    internal sealed class EmptyLookup<TKey, TElement> : ILookup<TKey, TElement>
+    internal sealed class EmptyLookup<TKey, TElement> : ILookup<TKey, TElement>, ICollection<IGrouping<TKey, TElement>>, IReadOnlyCollection<IGrouping<TKey, TElement>>
     {
         public static readonly EmptyLookup<TKey, TElement> Instance = new();
 
         public IEnumerable<TElement> this[TKey key] => [];
         public int Count => 0;
-        public bool Contains(TKey key) => false;
+
         public IEnumerator<IGrouping<TKey, TElement>> GetEnumerator() => Enumerable.Empty<IGrouping<TKey, TElement>>().GetEnumerator();
         IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+
+        public bool Contains(TKey key) => false;
+        public bool Contains(IGrouping<TKey, TElement> item) => false;
+        public void CopyTo(IGrouping<TKey, TElement>[] array, int arrayIndex)
+        {
+            ArgumentNullException.ThrowIfNull(array);
+            ArgumentOutOfRangeException.ThrowIfNegative(arrayIndex);
+            ArgumentOutOfRangeException.ThrowIfGreaterThan(arrayIndex, array.Length);
+        }
+
+        public bool IsReadOnly => true;
+        public void Add(IGrouping<TKey, TElement> item) => throw new NotSupportedException();
+        public void Clear() => throw new NotSupportedException();
+        public bool Remove(IGrouping<TKey, TElement> item) => throw new NotSupportedException();
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/Max.cs b/src/libraries/System.Linq/src/System/Linq/Max.cs
index d0da2f7bfd74..998e5ff94770 100644
--- a/src/libraries/System.Linq/src/System/Linq/Max.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Max.cs
@@ -27,7 +27,7 @@ public static partial class Enumerable
 
         private static T? MaxInteger<T>(this IEnumerable<T?> source) where T : struct, IBinaryInteger<T>
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -167,7 +167,7 @@ private static T MaxFloat<T>(this IEnumerable<T> source) where T : struct, IFloa
 
         private static T? MaxFloat<T>(this IEnumerable<T?> source) where T : struct, IFloatingPointIeee754<T>
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -271,7 +271,7 @@ public static decimal Max(this IEnumerable<decimal> source)
 
         public static decimal? Max(this IEnumerable<decimal?> source)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -322,7 +322,7 @@ public static decimal Max(this IEnumerable<decimal> source)
         /// </remarks>
         public static TSource? Max<TSource>(this IEnumerable<TSource> source, IComparer<TSource>? comparer)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -347,7 +347,7 @@ public static decimal Max(this IEnumerable<decimal> source)
             TSource? value = default;
             using (IEnumerator<TSource> e = source.GetEnumerator())
             {
-                if (value == null)
+                if (value is null)
                 {
                     do
                     {
@@ -358,12 +358,12 @@ public static decimal Max(this IEnumerable<decimal> source)
 
                         value = e.Current;
                     }
-                    while (value == null);
+                    while (value is null);
 
                     while (e.MoveNext())
                     {
                         TSource next = e.Current;
-                        if (next != null && comparer.Compare(next, value) > 0)
+                        if (next is not null && comparer.Compare(next, value) > 0)
                         {
                             value = next;
                         }
@@ -432,12 +432,12 @@ public static decimal Max(this IEnumerable<decimal> source)
         /// </remarks>
         public static TSource? MaxBy<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IComparer<TKey>? comparer)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (keySelector == null)
+            if (keySelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.keySelector);
             }
@@ -463,7 +463,7 @@ public static decimal Max(this IEnumerable<decimal> source)
 
             if (default(TKey) is null)
             {
-                if (key == null)
+                if (key is null)
                 {
                     TSource firstValue = value;
 
@@ -478,14 +478,14 @@ public static decimal Max(this IEnumerable<decimal> source)
                         value = e.Current;
                         key = keySelector(value);
                     }
-                    while (key == null);
+                    while (key is null);
                 }
 
                 while (e.MoveNext())
                 {
                     TSource nextValue = e.Current;
                     TKey nextKey = keySelector(nextValue);
-                    if (nextKey != null && comparer.Compare(nextKey, key) > 0)
+                    if (nextKey is not null && comparer.Compare(nextKey, key) > 0)
                     {
                         key = nextKey;
                         value = nextValue;
@@ -535,12 +535,12 @@ public static decimal Max(this IEnumerable<decimal> source)
 
         private static TResult MaxInteger<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, TResult> selector) where TResult : struct, IBinaryInteger<TResult>
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -569,12 +569,12 @@ private static TResult MaxInteger<TSource, TResult>(this IEnumerable<TSource> so
 
         private static TResult? MaxInteger<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, TResult?> selector) where TResult : struct, IBinaryInteger<TResult>
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -644,12 +644,12 @@ private static TResult MaxInteger<TSource, TResult>(this IEnumerable<TSource> so
 
         private static TResult MaxFloat<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, TResult> selector) where TResult : struct, IFloatingPointIeee754<TResult>
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -688,12 +688,12 @@ private static TResult MaxFloat<TSource, TResult>(this IEnumerable<TSource> sour
 
         private static TResult? MaxFloat<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, TResult?> selector) where TResult : struct, IFloatingPointIeee754<TResult>
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -747,12 +747,12 @@ private static TResult MaxFloat<TSource, TResult>(this IEnumerable<TSource> sour
 
         public static decimal Max<TSource>(this IEnumerable<TSource> source, Func<TSource, decimal> selector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -781,12 +781,12 @@ public static decimal Max<TSource>(this IEnumerable<TSource> source, Func<TSourc
 
         public static decimal? Max<TSource>(this IEnumerable<TSource> source, Func<TSource, decimal?> selector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -823,12 +823,12 @@ public static decimal Max<TSource>(this IEnumerable<TSource> source, Func<TSourc
 
         public static TResult? Max<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, TResult> selector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -836,7 +836,7 @@ public static decimal Max<TSource>(this IEnumerable<TSource> source, Func<TSourc
             TResult? value = default;
             using (IEnumerator<TSource> e = source.GetEnumerator())
             {
-                if (value == null)
+                if (value is null)
                 {
                     do
                     {
@@ -847,13 +847,13 @@ public static decimal Max<TSource>(this IEnumerable<TSource> source, Func<TSourc
 
                         value = selector(e.Current);
                     }
-                    while (value == null);
+                    while (value is null);
 
                     Comparer<TResult> comparer = Comparer<TResult>.Default;
                     while (e.MoveNext())
                     {
                         TResult x = selector(e.Current);
-                        if (x != null && comparer.Compare(x, value) > 0)
+                        if (x is not null && comparer.Compare(x, value) > 0)
                         {
                             value = x;
                         }
diff --git a/src/libraries/System.Linq/src/System/Linq/Min.cs b/src/libraries/System.Linq/src/System/Linq/Min.cs
index 3a0f2130d966..39cf8723a907 100644
--- a/src/libraries/System.Linq/src/System/Linq/Min.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Min.cs
@@ -27,7 +27,7 @@ public static partial class Enumerable
 
         private static T? MinInteger<T>(this IEnumerable<T?> source) where T : struct, IBinaryInteger<T>
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -151,7 +151,7 @@ private static T MinFloat<T>(this IEnumerable<T> source) where T : struct, IFloa
 
         private static T? MinFloat<T>(this IEnumerable<T?> source) where T : struct, IFloatingPointIeee754<T>
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -249,7 +249,7 @@ public static decimal Min(this IEnumerable<decimal> source)
 
         public static decimal? Min(this IEnumerable<decimal?> source)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -300,7 +300,7 @@ public static decimal Min(this IEnumerable<decimal> source)
         /// </remarks>
         public static TSource? Min<TSource>(this IEnumerable<TSource> source, IComparer<TSource>? comparer)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -325,7 +325,7 @@ public static decimal Min(this IEnumerable<decimal> source)
             TSource? value = default;
             using (IEnumerator<TSource> e = source.GetEnumerator())
             {
-                if (value == null)
+                if (value is null)
                 {
                     do
                     {
@@ -336,12 +336,12 @@ public static decimal Min(this IEnumerable<decimal> source)
 
                         value = e.Current;
                     }
-                    while (value == null);
+                    while (value is null);
 
                     while (e.MoveNext())
                     {
                         TSource next = e.Current;
-                        if (next != null && comparer.Compare(next, value) < 0)
+                        if (next is not null && comparer.Compare(next, value) < 0)
                         {
                             value = next;
                         }
@@ -410,12 +410,12 @@ public static decimal Min(this IEnumerable<decimal> source)
         /// </remarks>
         public static TSource? MinBy<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IComparer<TKey>? comparer)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (keySelector == null)
+            if (keySelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.keySelector);
             }
@@ -441,7 +441,7 @@ public static decimal Min(this IEnumerable<decimal> source)
 
             if (default(TKey) is null)
             {
-                if (key == null)
+                if (key is null)
                 {
                     TSource firstValue = value;
 
@@ -456,14 +456,14 @@ public static decimal Min(this IEnumerable<decimal> source)
                         value = e.Current;
                         key = keySelector(value);
                     }
-                    while (key == null);
+                    while (key is null);
                 }
 
                 while (e.MoveNext())
                 {
                     TSource nextValue = e.Current;
                     TKey nextKey = keySelector(nextValue);
-                    if (nextKey != null && comparer.Compare(nextKey, key) < 0)
+                    if (nextKey is not null && comparer.Compare(nextKey, key) < 0)
                     {
                         key = nextKey;
                         value = nextValue;
@@ -513,12 +513,12 @@ public static decimal Min(this IEnumerable<decimal> source)
 
         private static TResult MinInteger<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, TResult> selector) where TResult : struct, IBinaryInteger<TResult>
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -547,12 +547,12 @@ private static TResult MinInteger<TSource, TResult>(this IEnumerable<TSource> so
 
         private static TResult? MinInteger<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, TResult?> selector) where TResult : struct, IBinaryInteger<TResult>
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -604,12 +604,12 @@ private static TResult MinInteger<TSource, TResult>(this IEnumerable<TSource> so
 
         private static TResult MinFloat<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, TResult> selector) where TResult : struct, IFloatingPointIeee754<TResult>
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -656,12 +656,12 @@ private static TResult MinFloat<TSource, TResult>(this IEnumerable<TSource> sour
 
         private static TResult? MinFloat<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, TResult?> selector) where TResult : struct, IFloatingPointIeee754<TResult>
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -710,12 +710,12 @@ private static TResult MinFloat<TSource, TResult>(this IEnumerable<TSource> sour
 
         public static decimal Min<TSource>(this IEnumerable<TSource> source, Func<TSource, decimal> selector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -744,12 +744,12 @@ public static decimal Min<TSource>(this IEnumerable<TSource> source, Func<TSourc
 
         public static decimal? Min<TSource>(this IEnumerable<TSource> source, Func<TSource, decimal?> selector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -786,12 +786,12 @@ public static decimal Min<TSource>(this IEnumerable<TSource> source, Func<TSourc
 
         public static TResult? Min<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, TResult> selector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -799,7 +799,7 @@ public static decimal Min<TSource>(this IEnumerable<TSource> source, Func<TSourc
             TResult? value = default;
             using (IEnumerator<TSource> e = source.GetEnumerator())
             {
-                if (value == null)
+                if (value is null)
                 {
                     do
                     {
@@ -810,13 +810,13 @@ public static decimal Min<TSource>(this IEnumerable<TSource> source, Func<TSourc
 
                         value = selector(e.Current);
                     }
-                    while (value == null);
+                    while (value is null);
 
                     Comparer<TResult> comparer = Comparer<TResult>.Default;
                     while (e.MoveNext())
                     {
                         TResult x = selector(e.Current);
-                        if (x != null && comparer.Compare(x, value) < 0)
+                        if (x is not null && comparer.Compare(x, value) < 0)
                         {
                             value = x;
                         }
diff --git a/src/libraries/System.Linq/src/System/Linq/OfType.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/OfType.SpeedOpt.cs
new file mode 100644
index 000000000000..834fcdad006d
--- /dev/null
+++ b/src/libraries/System.Linq/src/System/Linq/OfType.SpeedOpt.cs
@@ -0,0 +1,175 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace System.Linq
+{
+    public static partial class Enumerable
+    {
+        private sealed partial class OfTypeIterator<TResult>
+        {
+            public override int GetCount(bool onlyIfCheap)
+            {
+                if (onlyIfCheap)
+                {
+                    return -1;
+                }
+
+                int count = 0;
+                foreach (object? item in _source)
+                {
+                    if (item is TResult)
+                    {
+                        checked { count++; }
+                    }
+                }
+
+                return count;
+            }
+
+            public override TResult[] ToArray()
+            {
+                SegmentedArrayBuilder<TResult>.ScratchBuffer scratch = default;
+                SegmentedArrayBuilder<TResult> builder = new(scratch);
+
+                foreach (object? item in _source)
+                {
+                    if (item is TResult castItem)
+                    {
+                        builder.Add(castItem);
+                    }
+                }
+
+                TResult[] result = builder.ToArray();
+                builder.Dispose();
+
+                return result;
+            }
+
+            public override List<TResult> ToList()
+            {
+                var list = new List<TResult>();
+
+                foreach (object? item in _source)
+                {
+                    if (item is TResult castItem)
+                    {
+                        list.Add(castItem);
+                    }
+                }
+
+                return list;
+            }
+
+            public override TResult? TryGetFirst(out bool found)
+            {
+                foreach (object? item in _source)
+                {
+                    if (item is TResult castItem)
+                    {
+                        found = true;
+                        return castItem;
+                    }
+                }
+
+                found = false;
+                return default;
+            }
+
+            public override TResult? TryGetLast(out bool found)
+            {
+                IEnumerator e = _source.GetEnumerator();
+                try
+                {
+                    if (e.MoveNext())
+                    {
+                        do
+                        {
+                            if (e.Current is TResult last)
+                            {
+                                found = true;
+
+                                while (e.MoveNext())
+                                {
+                                    if (e.Current is TResult castCurrent)
+                                    {
+                                        last = castCurrent;
+                                    }
+                                }
+
+                                return last;
+                            }
+                        }
+                        while (e.MoveNext());
+                    }
+                }
+                finally
+                {
+                    (e as IDisposable)?.Dispose();
+                }
+
+                found = false;
+                return default;
+            }
+
+            public override TResult? TryGetElementAt(int index, out bool found)
+            {
+                if (index >= 0)
+                {
+                    foreach (object? item in _source)
+                    {
+                        if (item is TResult castItem)
+                        {
+                            if (index == 0)
+                            {
+                                found = true;
+                                return castItem;
+                            }
+
+                            index--;
+                        }
+                    }
+                }
+
+                found = false;
+                return default;
+            }
+
+            public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector)
+            {
+                // If the source is any generic enumerable of a reference type, which should be the 90% case, it'll covariantly
+                // implement IEnumerable<object>, and we can optimize the OfType().Select case by treating the OfType instead like
+                // a Where, using the same WhereSelectIterators that are used for Where.Select.
+                if (!typeof(TResult).IsValueType && _source is IEnumerable<object> objectSource)
+                {
+                    // Unsafe.As here is safe because we're only dealing with reference types, and we know by construction that only
+                    // TResult instances will be passed in. Using Unsafe.As allows us to avoid an extra closure and delegate allocation.
+                    Func<object, TResult2> localSelector =
+#if DEBUG
+                        o =>
+                        {
+                            Debug.Assert(o is TResult);
+                            return selector((TResult)o);
+                        };
+#else
+                        Unsafe.As<Func<object, TResult2>>(selector);
+#endif
+
+                    // We can special-case arrays and IEnumerable to use the corresponding WhereSelectIterators because
+                    // they're covariant. It's not worthwhile checking for List<T> to use the ListWhereSelectIterator
+                    // because List<> is not covariant.
+                    Func<object, bool> isTResult = static o => o is TResult;
+                    return objectSource is object[] array ?
+                        new ArrayWhereSelectIterator<object, TResult2>(array, isTResult, localSelector) :
+                        new IEnumerableWhereSelectIterator<object, TResult2>(objectSource, isTResult, localSelector);
+                }
+
+                return base.Select(selector);
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Linq/src/System/Linq/OfType.cs b/src/libraries/System.Linq/src/System/Linq/OfType.cs
new file mode 100644
index 000000000000..ec4db225b2ee
--- /dev/null
+++ b/src/libraries/System.Linq/src/System/Linq/OfType.cs
@@ -0,0 +1,73 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace System.Linq
+{
+    public static partial class Enumerable
+    {
+        public static IEnumerable<TResult> OfType<TResult>(this IEnumerable source)
+        {
+            if (source is null)
+            {
+                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
+            }
+
+            if (default(TResult) is not null && source is IEnumerable<TResult> typedSource)
+            {
+                // The source was already an IEnumerable<TResult> and TResult can't be null. As
+                // such, all values the original input can yield are valid, and we can just return
+                // the strongly-typed input directly as if this were Cast rather than OfType.
+                return typedSource;
+            }
+
+            return new OfTypeIterator<TResult>(source);
+        }
+
+        private sealed partial class OfTypeIterator<TResult>(IEnumerable source) : Iterator<TResult>
+        {
+            private readonly IEnumerable _source = source;
+            private IEnumerator? _enumerator;
+
+            private protected override Iterator<TResult> Clone() => new OfTypeIterator<TResult>(_source);
+
+            public override bool MoveNext()
+            {
+                switch (_state)
+                {
+                    case 1:
+                        _enumerator = _source.GetEnumerator();
+                        _state = 2;
+                        goto case 2;
+
+                    case 2:
+                        Debug.Assert(_enumerator is not null);
+                        while (_enumerator.MoveNext())
+                        {
+                            if (_enumerator.Current is TResult result)
+                            {
+                                _current = result;
+                                return true;
+                            }
+                        }
+
+                        Dispose();
+                        break;
+                }
+
+                return false;
+            }
+
+            public override void Dispose()
+            {
+                (_enumerator as IDisposable)?.Dispose();
+                _enumerator = null;
+
+                base.Dispose();
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Linq/src/System/Linq/OrderBy.cs b/src/libraries/System.Linq/src/System/Linq/OrderBy.cs
index aa7a08ee81c9..b27653910997 100644
--- a/src/libraries/System.Linq/src/System/Linq/OrderBy.cs
+++ b/src/libraries/System.Linq/src/System/Linq/OrderBy.cs
@@ -44,14 +44,14 @@ public static IOrderedEnumerable<T> Order<T>(this IEnumerable<T> source) =>
         /// </remarks>
         public static IOrderedEnumerable<T> Order<T>(this IEnumerable<T> source, IComparer<T>? comparer) =>
             TypeIsImplicitlyStable<T>() && (comparer is null || comparer == Comparer<T>.Default) ?
-                new OrderedImplicitlyStableEnumerable<T>(source, descending: false) :
+                new ImplicitlyStableOrderedIterator<T>(source, descending: false) :
                 OrderBy(source, EnumerableSorter<T>.IdentityFunc, comparer);
 
         public static IOrderedEnumerable<TSource> OrderBy<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector)
-            => new OrderedEnumerable<TSource, TKey>(source, keySelector, null, false, null);
+            => new OrderedIterator<TSource, TKey>(source, keySelector, null, false, null);
 
         public static IOrderedEnumerable<TSource> OrderBy<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IComparer<TKey>? comparer)
-            => new OrderedEnumerable<TSource, TKey>(source, keySelector, comparer, false, null);
+            => new OrderedIterator<TSource, TKey>(source, keySelector, comparer, false, null);
 
         /// <summary>
         /// Sorts the elements of a sequence in descending order.
@@ -89,18 +89,18 @@ public static IOrderedEnumerable<T> OrderDescending<T>(this IEnumerable<T> sourc
         /// </remarks>
         public static IOrderedEnumerable<T> OrderDescending<T>(this IEnumerable<T> source, IComparer<T>? comparer) =>
             TypeIsImplicitlyStable<T>() && (comparer is null || comparer == Comparer<T>.Default) ?
-                new OrderedImplicitlyStableEnumerable<T>(source, descending: true) :
+                new ImplicitlyStableOrderedIterator<T>(source, descending: true) :
                 OrderByDescending(source, EnumerableSorter<T>.IdentityFunc, comparer);
 
         public static IOrderedEnumerable<TSource> OrderByDescending<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector) =>
-            new OrderedEnumerable<TSource, TKey>(source, keySelector, null, true, null);
+            new OrderedIterator<TSource, TKey>(source, keySelector, null, true, null);
 
         public static IOrderedEnumerable<TSource> OrderByDescending<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IComparer<TKey>? comparer) =>
-            new OrderedEnumerable<TSource, TKey>(source, keySelector, comparer, true, null);
+            new OrderedIterator<TSource, TKey>(source, keySelector, comparer, true, null);
 
         public static IOrderedEnumerable<TSource> ThenBy<TSource, TKey>(this IOrderedEnumerable<TSource> source, Func<TSource, TKey> keySelector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -110,7 +110,7 @@ public static IOrderedEnumerable<TSource> ThenBy<TSource, TKey>(this IOrderedEnu
 
         public static IOrderedEnumerable<TSource> ThenBy<TSource, TKey>(this IOrderedEnumerable<TSource> source, Func<TSource, TKey> keySelector, IComparer<TKey>? comparer)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -120,7 +120,7 @@ public static IOrderedEnumerable<TSource> ThenBy<TSource, TKey>(this IOrderedEnu
 
         public static IOrderedEnumerable<TSource> ThenByDescending<TSource, TKey>(this IOrderedEnumerable<TSource> source, Func<TSource, TKey> keySelector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -130,7 +130,7 @@ public static IOrderedEnumerable<TSource> ThenByDescending<TSource, TKey>(this I
 
         public static IOrderedEnumerable<TSource> ThenByDescending<TSource, TKey>(this IOrderedEnumerable<TSource> source, Func<TSource, TKey> keySelector, IComparer<TKey>? comparer)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -140,14 +140,27 @@ public static IOrderedEnumerable<TSource> ThenByDescending<TSource, TKey>(this I
 
         /// <summary>Gets whether the results of an unstable sort will be observably the same as a stable sort.</summary>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static bool TypeIsImplicitlyStable<T>() =>
-            typeof(T) == typeof(sbyte) || typeof(T) == typeof(byte) ||
-            typeof(T) == typeof(int) || typeof(T) == typeof(uint) ||
-            typeof(T) == typeof(short) || typeof(T) == typeof(ushort) ||
-            typeof(T) == typeof(long) || typeof(T) == typeof(ulong) ||
-            typeof(T) == typeof(Int128) || typeof(T) == typeof(UInt128) ||
-            typeof(T) == typeof(nint) || typeof(T) == typeof(nuint) ||
-            typeof(T) == typeof(bool) || typeof(T) == typeof(char);
+        internal static bool TypeIsImplicitlyStable<T>()
+        {
+            Type t = typeof(T);
+            if (typeof(T).IsEnum)
+            {
+                t = typeof(T).GetEnumUnderlyingType();
+            }
+
+            // Check for integral primitive types that compare equally iff they have the same bit pattern.
+            // bool is included because, even though technically it can have 256 different values, anything
+            // other than 0/1 is only producible using unsafe code. It's tempting to include a type like string
+            // here, as it's so commonly used with ordering, but two different string objects can compare equally,
+            // and their reference identity can be observable in a stable vs unstable sort.
+            return
+                t == typeof(sbyte) || t == typeof(byte) || t == typeof(bool) ||
+                t == typeof(short) || t == typeof(ushort) || t == typeof(char) ||
+                t == typeof(int) || t == typeof(uint) ||
+                t == typeof(long) || t == typeof(ulong) ||
+                t == typeof(Int128) || t == typeof(UInt128) ||
+                t == typeof(nint) || t == typeof(nuint);
+        }
     }
 
     public interface IOrderedEnumerable<out TElement> : IEnumerable<TElement>
diff --git a/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.SpeedOpt.cs
index 615c196cced3..bf65a34b06ef 100644
--- a/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.SpeedOpt.cs
@@ -8,428 +8,543 @@
 
 namespace System.Linq
 {
-    internal abstract partial class OrderedEnumerable<TElement> : IPartition<TElement>
+    public static partial class Enumerable
     {
-        public virtual TElement[] ToArray()
+        private abstract partial class OrderedIterator<TElement>
         {
-            TElement[] buffer = _source.ToArray();
-            if (buffer.Length == 0)
+            public override TElement[] ToArray()
             {
-                return buffer;
-            }
-
-            TElement[] array = new TElement[buffer.Length];
-            Fill(buffer, array);
-            return array;
-        }
+                TElement[] buffer = _source.ToArray();
+                if (buffer.Length <= 1)
+                {
+                    return buffer;
+                }
 
-        public virtual List<TElement> ToList()
-        {
-            TElement[] buffer = _source.ToArray();
+                TElement[] array = new TElement[buffer.Length];
+                Fill(buffer, array);
+                return array;
+            }
 
-            List<TElement> list = new();
-            if (buffer.Length > 0)
+            public override List<TElement> ToList()
             {
-                Fill(buffer, Enumerable.SetCountAndGetSpan(list, buffer.Length));
-            }
+                TElement[] buffer = _source.ToArray();
 
-            return list;
-        }
+                List<TElement> list = new(buffer.Length);
+                if (buffer.Length >= 2)
+                {
+                    Fill(buffer, SetCountAndGetSpan(list, buffer.Length));
+                }
+                else if (buffer.Length == 1)
+                {
+                    list.Add(buffer[0]);
+                }
 
-        private void Fill(TElement[] buffer, Span<TElement> destination)
-        {
-            int[] map = SortedMap(buffer);
-            for (int i = 0; i < destination.Length; i++)
-            {
-                destination[i] = buffer[map[i]];
+                return list;
             }
-        }
 
-        public int GetCount(bool onlyIfCheap)
-        {
-            if (_source is IIListProvider<TElement> listProv)
+            private void Fill(TElement[] buffer, Span<TElement> destination)
             {
-                return listProv.GetCount(onlyIfCheap);
+                int[] map = SortedMap(buffer);
+                for (int i = 0; i < destination.Length; i++)
+                {
+                    destination[i] = buffer[map[i]];
+                }
             }
 
-            return !onlyIfCheap || _source is ICollection<TElement> || _source is ICollection ? _source.Count() : -1;
-        }
-
-        internal TElement[] ToArray(int minIdx, int maxIdx)
-        {
-            TElement[] buffer = _source.ToArray();
-            if (buffer.Length <= minIdx)
+            public override int GetCount(bool onlyIfCheap)
             {
-                return [];
-            }
+                if (_source is Iterator<TElement> iterator)
+                {
+                    return iterator.GetCount(onlyIfCheap);
+                }
 
-            if (buffer.Length <= maxIdx)
-            {
-                maxIdx = buffer.Length - 1;
+                return !onlyIfCheap || _source is ICollection<TElement> || _source is ICollection ? _source.Count() : -1;
             }
 
-            if (minIdx == maxIdx)
+            internal TElement[] ToArray(int minIdx, int maxIdx)
             {
-                return [GetEnumerableSorter().ElementAt(buffer, buffer.Length, minIdx)];
-            }
+                TElement[] buffer = _source.ToArray();
+                if (buffer.Length <= minIdx)
+                {
+                    return [];
+                }
 
-            TElement[] array = new TElement[maxIdx - minIdx + 1];
+                if (buffer.Length <= maxIdx)
+                {
+                    maxIdx = buffer.Length - 1;
+                }
 
-            Fill(minIdx, maxIdx, buffer, array);
+                if (minIdx == maxIdx)
+                {
+                    return [GetEnumerableSorter().ElementAt(buffer, buffer.Length, minIdx)];
+                }
 
-            return array;
-        }
+                TElement[] array = new TElement[maxIdx - minIdx + 1];
 
-        internal List<TElement> ToList(int minIdx, int maxIdx)
-        {
-            TElement[] buffer = _source.ToArray();
-            if (buffer.Length <= minIdx)
-            {
-                return new List<TElement>();
-            }
+                Fill(minIdx, maxIdx, buffer, array);
 
-            if (buffer.Length <= maxIdx)
-            {
-                maxIdx = buffer.Length - 1;
+                return array;
             }
 
-            if (minIdx == maxIdx)
+            internal List<TElement> ToList(int minIdx, int maxIdx)
             {
-                return new List<TElement>(1) { GetEnumerableSorter().ElementAt(buffer, buffer.Length, minIdx) };
-            }
+                TElement[] buffer = _source.ToArray();
+                if (buffer.Length <= minIdx)
+                {
+                    return new List<TElement>();
+                }
 
-            List<TElement> list = new();
-            Fill(minIdx, maxIdx, buffer, Enumerable.SetCountAndGetSpan(list, maxIdx - minIdx + 1));
-            return list;
-        }
+                if (buffer.Length <= maxIdx)
+                {
+                    maxIdx = buffer.Length - 1;
+                }
 
-        private void Fill(int minIdx, int maxIdx, TElement[] buffer, Span<TElement> destination)
-        {
-            int[] map = SortedMap(buffer, minIdx, maxIdx);
-            int idx = 0;
-            while (minIdx <= maxIdx)
-            {
-                destination[idx] = buffer[map[minIdx]];
-                ++idx;
-                ++minIdx;
+                if (minIdx == maxIdx)
+                {
+                    return new List<TElement>(1) { GetEnumerableSorter().ElementAt(buffer, buffer.Length, minIdx) };
+                }
+
+                List<TElement> list = new();
+                Fill(minIdx, maxIdx, buffer, SetCountAndGetSpan(list, maxIdx - minIdx + 1));
+                return list;
             }
-        }
 
-        internal int GetCount(int minIdx, int maxIdx, bool onlyIfCheap)
-        {
-            int count = GetCount(onlyIfCheap);
-            if (count <= 0)
+            private void Fill(int minIdx, int maxIdx, TElement[] buffer, Span<TElement> destination)
             {
-                return count;
+                int[] map = SortedMap(buffer, minIdx, maxIdx);
+                int idx = 0;
+                while (minIdx <= maxIdx)
+                {
+                    destination[idx] = buffer[map[minIdx]];
+                    ++idx;
+                    ++minIdx;
+                }
             }
 
-            if (count <= minIdx)
+            internal int GetCount(int minIdx, int maxIdx, bool onlyIfCheap)
             {
-                return 0;
-            }
+                int count = GetCount(onlyIfCheap);
+                if (count <= 0)
+                {
+                    return count;
+                }
 
-            return (count <= maxIdx ? count : maxIdx + 1) - minIdx;
-        }
+                if (count <= minIdx)
+                {
+                    return 0;
+                }
+
+                return (count <= maxIdx ? count : maxIdx + 1) - minIdx;
+            }
 
-        public IPartition<TElement> Skip(int count) => new OrderedPartition<TElement>(this, count, int.MaxValue);
+            public override Iterator<TElement> Skip(int count) => new SkipTakeOrderedIterator<TElement>(this, count, int.MaxValue);
 
-        public IPartition<TElement> Take(int count) => new OrderedPartition<TElement>(this, 0, count - 1);
+            public override Iterator<TElement> Take(int count) => new SkipTakeOrderedIterator<TElement>(this, 0, count - 1);
 
-        public TElement? TryGetElementAt(int index, out bool found)
-        {
-            if (index == 0)
+            public override TElement? TryGetElementAt(int index, out bool found)
             {
-                return TryGetFirst(out found);
+                if (index == 0)
+                {
+                    return TryGetFirst(out found);
+                }
+
+                if (index > 0)
+                {
+                    TElement[] buffer = _source.ToArray();
+                    if (index < buffer.Length)
+                    {
+                        found = true;
+                        return GetEnumerableSorter().ElementAt(buffer, buffer.Length, index);
+                    }
+                }
+
+                found = false;
+                return default;
             }
 
-            if (index > 0)
+            public override TElement? TryGetFirst(out bool found)
             {
-                TElement[] buffer = _source.ToArray();
-                if (index < buffer.Length)
+                CachingComparer<TElement> comparer = GetComparer();
+                using (IEnumerator<TElement> e = _source.GetEnumerator())
                 {
+                    if (!e.MoveNext())
+                    {
+                        found = false;
+                        return default;
+                    }
+
+                    TElement value = e.Current;
+                    comparer.SetElement(value);
+                    while (e.MoveNext())
+                    {
+                        TElement x = e.Current;
+                        if (comparer.Compare(x, true) < 0)
+                        {
+                            value = x;
+                        }
+                    }
+
                     found = true;
-                    return GetEnumerableSorter().ElementAt(buffer, buffer.Length, index);
+                    return value;
                 }
             }
 
-            found = false;
-            return default;
-        }
-
-        public virtual TElement? TryGetFirst(out bool found)
-        {
-            CachingComparer<TElement> comparer = GetComparer();
-            using (IEnumerator<TElement> e = _source.GetEnumerator())
+            public override TElement? TryGetLast(out bool found)
             {
-                if (!e.MoveNext())
+                using (IEnumerator<TElement> e = _source.GetEnumerator())
                 {
-                    found = false;
-                    return default;
-                }
+                    if (!e.MoveNext())
+                    {
+                        found = false;
+                        return default;
+                    }
 
-                TElement value = e.Current;
-                comparer.SetElement(value);
-                while (e.MoveNext())
-                {
-                    TElement x = e.Current;
-                    if (comparer.Compare(x, true) < 0)
+                    CachingComparer<TElement> comparer = GetComparer();
+                    TElement value = e.Current;
+                    comparer.SetElement(value);
+                    while (e.MoveNext())
                     {
-                        value = x;
+                        TElement current = e.Current;
+                        if (comparer.Compare(current, false) >= 0)
+                        {
+                            value = current;
+                        }
                     }
-                }
 
-                found = true;
-                return value;
+                    found = true;
+                    return value;
+                }
             }
-        }
 
-        public virtual TElement? TryGetLast(out bool found)
-        {
-            using (IEnumerator<TElement> e = _source.GetEnumerator())
+            public TElement? TryGetLast(int minIdx, int maxIdx, out bool found)
             {
-                if (!e.MoveNext())
+                TElement[] buffer = _source.ToArray();
+                if (minIdx < buffer.Length)
                 {
-                    found = false;
-                    return default;
+                    found = true;
+                    return (maxIdx < buffer.Length - 1) ?
+                        GetEnumerableSorter().ElementAt(buffer, buffer.Length, maxIdx) :
+                        Last(buffer);
                 }
 
+                found = false;
+                return default;
+            }
+
+            private TElement Last(TElement[] items)
+            {
                 CachingComparer<TElement> comparer = GetComparer();
-                TElement value = e.Current;
+
+                TElement value = items[0];
                 comparer.SetElement(value);
-                while (e.MoveNext())
+
+                for (int i = 1; i < items.Length; ++i)
                 {
-                    TElement current = e.Current;
-                    if (comparer.Compare(current, false) >= 0)
+                    TElement x = items[i];
+                    if (comparer.Compare(x, cacheLower: false) >= 0)
                     {
-                        value = current;
+                        value = x;
                     }
                 }
 
-                found = true;
                 return value;
             }
         }
 
-        public TElement? TryGetLast(int minIdx, int maxIdx, out bool found)
-        {
-            TElement[] buffer = _source.ToArray();
-            if (minIdx < buffer.Length)
-            {
-                found = true;
-                return (maxIdx < buffer.Length - 1) ?
-                    GetEnumerableSorter().ElementAt(buffer, buffer.Length, maxIdx) :
-                    Last(buffer);
-            }
-
-            found = false;
-            return default;
-        }
-
-        private TElement Last(TElement[] items)
+        private sealed partial class OrderedIterator<TElement, TKey> : OrderedIterator<TElement>
         {
-            CachingComparer<TElement> comparer = GetComparer();
-
-            TElement value = items[0];
-            comparer.SetElement(value);
+            // For complicated cases, rely on the base implementation that's more comprehensive.
+            // For the simple case of OrderBy(...).First() or OrderByDescending(...).First() (i.e. where
+            // there's just a single comparer we need to factor in), we can just do the iteration directly.
 
-            for (int i = 1; i < items.Length; ++i)
+            public override TElement? TryGetFirst(out bool found)
             {
-                TElement x = items[i];
-                if (comparer.Compare(x, cacheLower: false) >= 0)
+                if (_parent is not null)
                 {
-                    value = x;
+                    return base.TryGetFirst(out found);
                 }
-            }
 
-            return value;
-        }
-    }
+                using IEnumerator<TElement> e = _source.GetEnumerator();
 
-    internal sealed partial class OrderedEnumerable<TElement, TKey> : OrderedEnumerable<TElement>
-    {
-        // For complicated cases, rely on the base implementation that's more comprehensive.
-        // For the simple case of OrderBy(...).First() or OrderByDescending(...).First() (i.e. where
-        // there's just a single comparer we need to factor in), we can just do the iteration directly.
+                if (e.MoveNext())
+                {
+                    IComparer<TKey> comparer = _comparer;
+                    Func<TElement, TKey> keySelector = _keySelector;
 
-        public override TElement? TryGetFirst(out bool found)
-        {
-            if (_parent is not null)
-            {
-                return base.TryGetFirst(out found);
-            }
+                    TElement resultValue = e.Current;
+                    TKey resultKey = keySelector(resultValue);
+
+                    if (_descending)
+                    {
+                        while (e.MoveNext())
+                        {
+                            TElement nextValue = e.Current;
+                            TKey nextKey = keySelector(nextValue);
+                            if (comparer.Compare(nextKey, resultKey) > 0)
+                            {
+                                resultKey = nextKey;
+                                resultValue = nextValue;
+                            }
+                        }
+                    }
+                    else
+                    {
+                        while (e.MoveNext())
+                        {
+                            TElement nextValue = e.Current;
+                            TKey nextKey = keySelector(nextValue);
+                            if (comparer.Compare(nextKey, resultKey) < 0)
+                            {
+                                resultKey = nextKey;
+                                resultValue = nextValue;
+                            }
+                        }
+                    }
+
+                    found = true;
+                    return resultValue;
+                }
 
-            using IEnumerator<TElement> e = _source.GetEnumerator();
+                found = false;
+                return default;
+            }
 
-            if (e.MoveNext())
+            public override TElement? TryGetLast(out bool found)
             {
-                IComparer<TKey> comparer = _comparer;
-                Func<TElement, TKey> keySelector = _keySelector;
+                if (_parent is not null)
+                {
+                    return base.TryGetLast(out found);
+                }
 
-                TElement resultValue = e.Current;
-                TKey resultKey = keySelector(resultValue);
+                using IEnumerator<TElement> e = _source.GetEnumerator();
 
-                if (_descending)
+                if (e.MoveNext())
                 {
-                    while (e.MoveNext())
+                    IComparer<TKey> comparer = _comparer;
+                    Func<TElement, TKey> keySelector = _keySelector;
+
+                    TElement resultValue = e.Current;
+                    TKey resultKey = keySelector(resultValue);
+
+                    if (_descending)
                     {
-                        TElement nextValue = e.Current;
-                        TKey nextKey = keySelector(nextValue);
-                        if (comparer.Compare(nextKey, resultKey) > 0)
+                        while (e.MoveNext())
                         {
-                            resultKey = nextKey;
-                            resultValue = nextValue;
+                            TElement nextValue = e.Current;
+                            TKey nextKey = keySelector(nextValue);
+                            if (comparer.Compare(nextKey, resultKey) <= 0)
+                            {
+                                resultKey = nextKey;
+                                resultValue = nextValue;
+                            }
                         }
                     }
-                }
-                else
-                {
-                    while (e.MoveNext())
+                    else
                     {
-                        TElement nextValue = e.Current;
-                        TKey nextKey = keySelector(nextValue);
-                        if (comparer.Compare(nextKey, resultKey) < 0)
+                        while (e.MoveNext())
                         {
-                            resultKey = nextKey;
-                            resultValue = nextValue;
+                            TElement nextValue = e.Current;
+                            TKey nextKey = keySelector(nextValue);
+                            if (comparer.Compare(nextKey, resultKey) >= 0)
+                            {
+                                resultKey = nextKey;
+                                resultValue = nextValue;
+                            }
                         }
                     }
+
+                    found = true;
+                    return resultValue;
                 }
 
-                found = true;
-                return resultValue;
+                found = false;
+                return default;
             }
-
-            found = false;
-            return default;
         }
 
-        public override TElement? TryGetLast(out bool found)
+        private sealed partial class ImplicitlyStableOrderedIterator<TElement> : OrderedIterator<TElement>
         {
-            if (_parent is not null)
+            public override TElement[] ToArray()
             {
-                return base.TryGetLast(out found);
+                TElement[] array = _source.ToArray();
+                Sort(array, _descending);
+                return array;
             }
 
-            using IEnumerator<TElement> e = _source.GetEnumerator();
-
-            if (e.MoveNext())
+            public override List<TElement> ToList()
             {
-                IComparer<TKey> comparer = _comparer;
-                Func<TElement, TKey> keySelector = _keySelector;
+                List<TElement> list = _source.ToList();
+                Sort(CollectionsMarshal.AsSpan(list), _descending);
+                return list;
+            }
+
+            public override TElement? TryGetFirst(out bool found) =>
+                TryGetFirstOrLast(out found, first: !_descending);
 
-                TElement resultValue = e.Current;
-                TKey resultKey = keySelector(resultValue);
+            public override TElement? TryGetLast(out bool found) =>
+                TryGetFirstOrLast(out found, first: _descending);
 
-                if (_descending)
+            private TElement? TryGetFirstOrLast(out bool found, bool first)
+            {
+                if (TryGetSpan(_source, out ReadOnlySpan<TElement> span))
                 {
-                    while (e.MoveNext())
+                    if (span.Length != 0)
                     {
-                        TElement nextValue = e.Current;
-                        TKey nextKey = keySelector(nextValue);
-                        if (comparer.Compare(nextKey, resultKey) <= 0)
-                        {
-                            resultKey = nextKey;
-                            resultValue = nextValue;
-                        }
+                        Debug.Assert(TypeIsImplicitlyStable<TElement>(), "Using Min/Max has different semantics for floating-point values.");
+
+                        found = true;
+                        return first ?
+                            Min(_source) :
+                            Max(_source);
                     }
                 }
                 else
                 {
-                    while (e.MoveNext())
+                    using IEnumerator<TElement> e = _source.GetEnumerator();
+
+                    if (e.MoveNext())
                     {
-                        TElement nextValue = e.Current;
-                        TKey nextKey = keySelector(nextValue);
-                        if (comparer.Compare(nextKey, resultKey) >= 0)
+                        TElement resultValue = e.Current;
+
+                        if (first)
                         {
-                            resultKey = nextKey;
-                            resultValue = nextValue;
+                            while (e.MoveNext())
+                            {
+                                TElement nextValue = e.Current;
+                                if (Comparer<TElement>.Default.Compare(nextValue, resultValue) < 0)
+                                {
+                                    resultValue = nextValue;
+                                }
+                            }
+                        }
+                        else
+                        {
+                            while (e.MoveNext())
+                            {
+                                TElement nextValue = e.Current;
+                                if (Comparer<TElement>.Default.Compare(nextValue, resultValue) >= 0)
+                                {
+                                    resultValue = nextValue;
+                                }
+                            }
                         }
+
+                        found = true;
+                        return resultValue;
                     }
                 }
 
-                found = true;
-                return resultValue;
+                found = false;
+                return default;
             }
-
-            found = false;
-            return default;
         }
-    }
 
-    internal sealed partial class OrderedImplicitlyStableEnumerable<TElement> : OrderedEnumerable<TElement>
-    {
-        public override TElement[] ToArray()
+        private sealed class SkipTakeOrderedIterator<TElement> : Iterator<TElement>
         {
-            TElement[] array = _source.ToArray();
-            Sort(array, _descending);
-            return array;
-        }
+            private readonly OrderedIterator<TElement> _source;
+            private readonly int _minIndexInclusive;
+            private readonly int _maxIndexInclusive;
 
-        public override List<TElement> ToList()
-        {
-            List<TElement> list = _source.ToList();
-            Sort(CollectionsMarshal.AsSpan(list), _descending);
-            return list;
-        }
+            private TElement[]? _buffer;
+            private int[]? _map;
+            private int _maxIdx;
 
-        public override TElement? TryGetFirst(out bool found) =>
-            TryGetFirstOrLast(out found, first: !_descending);
+            public SkipTakeOrderedIterator(OrderedIterator<TElement> source, int minIdxInclusive, int maxIdxInclusive)
+            {
+                _source = source;
+                _minIndexInclusive = minIdxInclusive;
+                _maxIndexInclusive = maxIdxInclusive;
+            }
 
-        public override TElement? TryGetLast(out bool found) =>
-            TryGetFirstOrLast(out found, first: _descending);
+            private protected override Iterator<TElement> Clone() => new SkipTakeOrderedIterator<TElement>(_source, _minIndexInclusive, _maxIndexInclusive);
 
-        private TElement? TryGetFirstOrLast(out bool found, bool first)
-        {
-            if (Enumerable.TryGetSpan(_source, out ReadOnlySpan<TElement> span))
+            public override bool MoveNext()
             {
-                if (span.Length != 0)
+                int state = _state;
+
+                Initialized:
+                if (state > 1)
                 {
-                    Debug.Assert(Enumerable.TypeIsImplicitlyStable<TElement>(), "Using Min/Max has different semantics for floating-point values.");
+                    Debug.Assert(_buffer is not null);
+                    Debug.Assert(_map is not null);
 
-                    found = true;
-                    return first ?
-                        Enumerable.Min(_source) :
-                        Enumerable.Max(_source);
+                    int[] map = _map;
+                    int i = state - 2 + _minIndexInclusive;
+                    if (i <= _maxIdx)
+                    {
+                        _current = _buffer[map[i]];
+                        _state++;
+                        return true;
+                    }
                 }
-            }
-            else
-            {
-                using IEnumerator<TElement> e = _source.GetEnumerator();
-
-                if (e.MoveNext())
+                else if (state == 1)
                 {
-                    TElement resultValue = e.Current;
-
-                    if (first)
+                    TElement[] buffer = _source._source.ToArray();
+                    int count = buffer.Length;
+                    if (count > _minIndexInclusive)
                     {
-                        while (e.MoveNext())
+                        _maxIdx = _maxIndexInclusive;
+                        if (count <= _maxIdx)
                         {
-                            TElement nextValue = e.Current;
-                            if (Comparer<TElement>.Default.Compare(nextValue, resultValue) < 0)
-                            {
-                                resultValue = nextValue;
-                            }
+                            _maxIdx = count - 1;
                         }
-                    }
-                    else
-                    {
-                        while (e.MoveNext())
+
+                        if (_minIndexInclusive == _maxIdx)
                         {
-                            TElement nextValue = e.Current;
-                            if (Comparer<TElement>.Default.Compare(nextValue, resultValue) >= 0)
-                            {
-                                resultValue = nextValue;
-                            }
+                            _current = _source.GetEnumerableSorter().ElementAt(buffer, count, _minIndexInclusive);
+                            _state = -1;
+                            return true;
                         }
+
+                        _map = _source.SortedMap(buffer, _minIndexInclusive, _maxIdx);
+                        _buffer = buffer;
+                        _state = state = 2;
+                        goto Initialized;
                     }
+                }
 
-                    found = true;
-                    return resultValue;
+                Dispose();
+                return false;
+            }
+
+            public override Iterator<TElement>? Skip(int count)
+            {
+                int minIndex = _minIndexInclusive + count;
+                return (uint)minIndex > (uint)_maxIndexInclusive ? null : new SkipTakeOrderedIterator<TElement>(_source, minIndex, _maxIndexInclusive);
+            }
+
+            public override Iterator<TElement> Take(int count)
+            {
+                int maxIndex = _minIndexInclusive + count - 1;
+                if ((uint)maxIndex >= (uint)_maxIndexInclusive)
+                {
+                    return this;
                 }
+
+                return new SkipTakeOrderedIterator<TElement>(_source, _minIndexInclusive, maxIndex);
             }
 
-            found = false;
-            return default;
+            public override TElement? TryGetElementAt(int index, out bool found)
+            {
+                if ((uint)index <= (uint)(_maxIndexInclusive - _minIndexInclusive))
+                {
+                    return _source.TryGetElementAt(index + _minIndexInclusive, out found);
+                }
+
+                found = false;
+                return default;
+            }
+
+            public override TElement? TryGetFirst(out bool found) => _source.TryGetElementAt(_minIndexInclusive, out found);
+
+            public override TElement? TryGetLast(out bool found) =>
+                _source.TryGetLast(_minIndexInclusive, _maxIndexInclusive, out found);
+
+            public override TElement[] ToArray() => _source.ToArray(_minIndexInclusive, _maxIndexInclusive);
+
+            public override List<TElement> ToList() => _source.ToList(_minIndexInclusive, _maxIndexInclusive);
+
+            public override int GetCount(bool onlyIfCheap) => _source.GetCount(_minIndexInclusive, _maxIndexInclusive, onlyIfCheap);
         }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs b/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
index 6b6b83bac93f..91d51da2e585 100644
--- a/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
+++ b/src/libraries/System.Linq/src/System/Linq/OrderedEnumerable.cs
@@ -7,618 +7,654 @@
 
 namespace System.Linq
 {
-    internal abstract partial class OrderedEnumerable<TElement> : IOrderedEnumerable<TElement>
+    public static partial class Enumerable
     {
-        internal IEnumerable<TElement> _source;
+        private abstract partial class OrderedIterator<TElement> : Iterator<TElement>, IOrderedEnumerable<TElement>
+        {
+            internal readonly IEnumerable<TElement> _source;
 
-        protected OrderedEnumerable(IEnumerable<TElement> source) => _source = source;
+            protected OrderedIterator(IEnumerable<TElement> source) => _source = source;
 
-        private int[] SortedMap(TElement[] buffer) => GetEnumerableSorter().Sort(buffer, buffer.Length);
+            private protected int[] SortedMap(TElement[] buffer) => GetEnumerableSorter().Sort(buffer, buffer.Length);
 
-        private int[] SortedMap(TElement[] buffer, int minIdx, int maxIdx) =>
-            GetEnumerableSorter().Sort(buffer, buffer.Length, minIdx, maxIdx);
+            internal int[] SortedMap(TElement[] buffer, int minIdx, int maxIdx) =>
+                GetEnumerableSorter().Sort(buffer, buffer.Length, minIdx, maxIdx);
 
-        public virtual IEnumerator<TElement> GetEnumerator()
-        {
-            TElement[] buffer = _source.ToArray();
-            if (buffer.Length > 0)
+            internal abstract EnumerableSorter<TElement> GetEnumerableSorter(EnumerableSorter<TElement>? next = null);
+
+            internal abstract CachingComparer<TElement> GetComparer(CachingComparer<TElement>? childComparer = null);
+
+            IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+
+            IOrderedEnumerable<TElement> IOrderedEnumerable<TElement>.CreateOrderedEnumerable<TKey>(Func<TElement, TKey> keySelector, IComparer<TKey>? comparer, bool descending) =>
+                new OrderedIterator<TElement, TKey>(_source, keySelector, comparer, @descending, this);
+
+            public TElement? TryGetLast(Func<TElement, bool> predicate, out bool found)
             {
-                int[] map = SortedMap(buffer);
-                for (int i = 0; i < buffer.Length; i++)
+                CachingComparer<TElement> comparer = GetComparer();
+                using (IEnumerator<TElement> e = _source.GetEnumerator())
                 {
-                    yield return buffer[map[i]];
+                    TElement value;
+                    do
+                    {
+                        if (!e.MoveNext())
+                        {
+                            found = false;
+                            return default;
+                        }
+
+                        value = e.Current;
+                    }
+                    while (!predicate(value));
+
+                    comparer.SetElement(value);
+                    while (e.MoveNext())
+                    {
+                        TElement x = e.Current;
+                        if (predicate(x) && comparer.Compare(x, false) >= 0)
+                        {
+                            value = x;
+                        }
+                    }
+
+                    found = true;
+                    return value;
                 }
             }
         }
 
-        internal IEnumerator<TElement> GetEnumerator(int minIdx, int maxIdx)
+        private sealed partial class OrderedIterator<TElement, TKey> : OrderedIterator<TElement>
         {
-            TElement[] buffer = _source.ToArray();
-            int count = buffer.Length;
-            if (count > minIdx)
+            private readonly OrderedIterator<TElement>? _parent;
+            private readonly Func<TElement, TKey> _keySelector;
+            private readonly IComparer<TKey> _comparer;
+            private readonly bool _descending;
+            private TElement[]? _buffer;
+            private int[]? _map;
+
+            internal OrderedIterator(IEnumerable<TElement> source, Func<TElement, TKey> keySelector, IComparer<TKey>? comparer, bool descending, OrderedIterator<TElement>? parent) :
+                base(source)
             {
-                if (count <= maxIdx)
+                if (source is null)
                 {
-                    maxIdx = count - 1;
+                    ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
                 }
-
-                if (minIdx == maxIdx)
+                if (keySelector is null)
                 {
-                    yield return GetEnumerableSorter().ElementAt(buffer, count, minIdx);
-                }
-                else
-                {
-                    int[] map = SortedMap(buffer, minIdx, maxIdx);
-                    while (minIdx <= maxIdx)
-                    {
-                        yield return buffer[map[minIdx]];
-                        ++minIdx;
-                    }
+                    ThrowHelper.ThrowArgumentNullException(ExceptionArgument.keySelector);
                 }
+
+                _parent = parent;
+                _keySelector = keySelector;
+                _comparer = comparer ?? Comparer<TKey>.Default;
+                _descending = descending;
             }
-        }
 
-        private EnumerableSorter<TElement> GetEnumerableSorter() => GetEnumerableSorter(null);
+            private protected override Iterator<TElement> Clone() => new OrderedIterator<TElement, TKey>(_source, _keySelector, _comparer, _descending, _parent);
 
-        internal abstract EnumerableSorter<TElement> GetEnumerableSorter(EnumerableSorter<TElement>? next);
+            internal override EnumerableSorter<TElement> GetEnumerableSorter(EnumerableSorter<TElement>? next)
+            {
+                // Special case the common use of string with default comparer. Comparer<string>.Default checks the
+                // thread's Culture on each call which is an overhead which is not required, because we are about to
+                // do a sort which remains on the current thread (and EnumerableSorter is not used afterwards).
+                IComparer<TKey> comparer = _comparer;
+                if (typeof(TKey) == typeof(string) && comparer == Comparer<string>.Default)
+                {
+                    comparer = (IComparer<TKey>)StringComparer.CurrentCulture;
+                }
 
-        internal abstract CachingComparer<TElement> GetComparer(CachingComparer<TElement>? childComparer = null);
+                EnumerableSorter<TElement> sorter = new EnumerableSorter<TElement, TKey>(_keySelector, comparer, _descending, next);
+                if (_parent is not null)
+                {
+                    sorter = _parent.GetEnumerableSorter(sorter);
+                }
 
-        IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+                return sorter;
+            }
 
-        IOrderedEnumerable<TElement> IOrderedEnumerable<TElement>.CreateOrderedEnumerable<TKey>(Func<TElement, TKey> keySelector, IComparer<TKey>? comparer, bool descending) =>
-            new OrderedEnumerable<TElement, TKey>(_source, keySelector, comparer, @descending, this);
+            internal override CachingComparer<TElement> GetComparer(CachingComparer<TElement>? childComparer)
+            {
+                CachingComparer<TElement> cmp = childComparer is null
+                    ? new CachingComparer<TElement, TKey>(_keySelector, _comparer, _descending)
+                    : new CachingComparerWithChild<TElement, TKey>(_keySelector, _comparer, _descending, childComparer);
+                return _parent is not null ? _parent.GetComparer(cmp) : cmp;
+            }
 
-        public TElement? TryGetLast(Func<TElement, bool> predicate, out bool found)
-        {
-            CachingComparer<TElement> comparer = GetComparer();
-            using (IEnumerator<TElement> e = _source.GetEnumerator())
+            public override bool MoveNext()
             {
-                TElement value;
-                do
+                int state = _state;
+
+                Initialized:
+                if (state > 1)
                 {
-                    if (!e.MoveNext())
+                    Debug.Assert(_buffer is not null);
+                    Debug.Assert(_map is not null);
+                    Debug.Assert(_map.Length == _buffer.Length);
+
+                    int[] map = _map;
+                    int i = state - 2;
+                    if ((uint)i < (uint)map.Length)
                     {
-                        found = false;
-                        return default;
+                        _current = _buffer[map[i]];
+                        _state++;
+                        return true;
                     }
-
-                    value = e.Current;
                 }
-                while (!predicate(value));
-
-                comparer.SetElement(value);
-                while (e.MoveNext())
+                else if (state == 1)
                 {
-                    TElement x = e.Current;
-                    if (predicate(x) && comparer.Compare(x, false) >= 0)
+                    TElement[] buffer = _source.ToArray();
+                    if (buffer.Length != 0)
                     {
-                        value = x;
+                        _map = SortedMap(buffer);
+                        _buffer = buffer;
+                        _state = state = 2;
+                        goto Initialized;
                     }
                 }
 
-                found = true;
-                return value;
+                Dispose();
+                return false;
             }
-        }
-    }
 
-    internal sealed partial class OrderedEnumerable<TElement, TKey> : OrderedEnumerable<TElement>
-    {
-        private readonly OrderedEnumerable<TElement>? _parent;
-        private readonly Func<TElement, TKey> _keySelector;
-        private readonly IComparer<TKey> _comparer;
-        private readonly bool _descending;
-
-        internal OrderedEnumerable(IEnumerable<TElement> source, Func<TElement, TKey> keySelector, IComparer<TKey>? comparer, bool descending, OrderedEnumerable<TElement>? parent) :
-            base(source)
-        {
-            if (source is null)
+            public override void Dispose()
             {
-                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
+                _buffer = null;
+                _map = null;
+                base.Dispose();
             }
-            if (keySelector is null)
-            {
-                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.keySelector);
-            }
-
-            _parent = parent;
-            _keySelector = keySelector;
-            _comparer = comparer ?? Comparer<TKey>.Default;
-            _descending = descending;
         }
 
-        internal override EnumerableSorter<TElement> GetEnumerableSorter(EnumerableSorter<TElement>? next)
+        /// <summary>An ordered enumerable used by Order/OrderDescending for Ts that are bitwise indistinguishable for any considered equal.</summary>
+        private sealed partial class ImplicitlyStableOrderedIterator<TElement> : OrderedIterator<TElement>
         {
-            // Special case the common use of string with default comparer. Comparer<string>.Default checks the
-            // thread's Culture on each call which is an overhead which is not required, because we are about to
-            // do a sort which remains on the current thread (and EnumerableSorter is not used afterwards).
-            IComparer<TKey> comparer = _comparer;
-            if (typeof(TKey) == typeof(string) && comparer == Comparer<string>.Default)
-            {
-                comparer = (IComparer<TKey>)StringComparer.CurrentCulture;
-            }
+            private readonly bool _descending;
+            private TElement[]? _buffer;
 
-            EnumerableSorter<TElement> sorter = new EnumerableSorter<TElement, TKey>(_keySelector, comparer, _descending, next);
-            if (_parent != null)
+            public ImplicitlyStableOrderedIterator(IEnumerable<TElement> source, bool descending) : base(source)
             {
-                sorter = _parent.GetEnumerableSorter(sorter);
-            }
-
-            return sorter;
-        }
-
-        internal override CachingComparer<TElement> GetComparer(CachingComparer<TElement>? childComparer)
-        {
-            CachingComparer<TElement> cmp = childComparer == null
-                ? new CachingComparer<TElement, TKey>(_keySelector, _comparer, _descending)
-                : new CachingComparerWithChild<TElement, TKey>(_keySelector, _comparer, _descending, childComparer);
-            return _parent != null ? _parent.GetComparer(cmp) : cmp;
-        }
-    }
-
-    /// <summary>An ordered enumerable used by Order/OrderDescending for Ts that are bitwise indistinguishable for any considered equal.</summary>
-    internal sealed partial class OrderedImplicitlyStableEnumerable<TElement> : OrderedEnumerable<TElement>
-    {
-        private readonly bool _descending;
+                Debug.Assert(TypeIsImplicitlyStable<TElement>());
 
-        public OrderedImplicitlyStableEnumerable(IEnumerable<TElement> source, bool descending) : base(source)
-        {
-            Debug.Assert(Enumerable.TypeIsImplicitlyStable<TElement>());
+                if (source is null)
+                {
+                    ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
+                }
 
-            if (source is null)
-            {
-                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
+                _descending = descending;
             }
 
-            _descending = descending;
-        }
+            private protected override Iterator<TElement> Clone() => new ImplicitlyStableOrderedIterator<TElement>(_source, _descending);
 
-        internal override CachingComparer<TElement> GetComparer(CachingComparer<TElement>? childComparer) =>
-            childComparer == null ?
-                new CachingComparer<TElement, TElement>(EnumerableSorter<TElement>.IdentityFunc, Comparer<TElement>.Default, _descending) :
-                new CachingComparerWithChild<TElement, TElement>(EnumerableSorter<TElement>.IdentityFunc, Comparer<TElement>.Default, _descending, childComparer);
+            internal override CachingComparer<TElement> GetComparer(CachingComparer<TElement>? childComparer) =>
+                childComparer is null ?
+                    new CachingComparer<TElement, TElement>(EnumerableSorter<TElement>.IdentityFunc, Comparer<TElement>.Default, _descending) :
+                    new CachingComparerWithChild<TElement, TElement>(EnumerableSorter<TElement>.IdentityFunc, Comparer<TElement>.Default, _descending, childComparer);
 
-        internal override EnumerableSorter<TElement> GetEnumerableSorter(EnumerableSorter<TElement>? next) =>
-            new EnumerableSorter<TElement, TElement>(EnumerableSorter<TElement>.IdentityFunc, Comparer<TElement>.Default, _descending, next);
+            internal override EnumerableSorter<TElement> GetEnumerableSorter(EnumerableSorter<TElement>? next) =>
+                new EnumerableSorter<TElement, TElement>(EnumerableSorter<TElement>.IdentityFunc, Comparer<TElement>.Default, _descending, next);
 
-        public override IEnumerator<TElement> GetEnumerator()
-        {
-            TElement[] buffer = _source.ToArray();
-            if (buffer.Length > 0)
+            public override bool MoveNext()
             {
-                Sort(buffer, _descending);
-                for (int i = 0; i < buffer.Length; i++)
+                int state = _state;
+                TElement[]? buffer;
+
+                Initialized:
+                if (state > 1)
                 {
-                    yield return buffer[i];
+                    buffer = _buffer;
+                    Debug.Assert(buffer is not null);
+
+                    int i = state - 2;
+                    if ((uint)i < (uint)buffer.Length)
+                    {
+                        _current = buffer[i];
+                        _state++;
+                        return true;
+                    }
+                }
+                else if (state == 1)
+                {
+                    buffer = _source.ToArray();
+                    if (buffer.Length != 0)
+                    {
+                        Sort(buffer, _descending);
+                        _buffer = buffer;
+                        _state = state = 2;
+                        goto Initialized;
+                    }
                 }
-            }
-        }
 
-        private static void Sort(Span<TElement> span, bool descending)
-        {
-            if (descending)
-            {
-                span.Sort(static (a, b) => Comparer<TElement>.Default.Compare(b, a));
+                Dispose();
+                return false;
             }
-            else
+
+            public override void Dispose()
             {
-                span.Sort();
+                _buffer = null;
+                base.Dispose();
             }
-        }
-    }
-
-    // A comparer that chains comparisons, and pushes through the last element found to be
-    // lower or higher (depending on use), so as to represent the sort of comparisons
-    // done by OrderBy().ThenBy() combinations.
-    internal abstract class CachingComparer<TElement>
-    {
-        internal abstract int Compare(TElement element, bool cacheLower);
-
-        internal abstract void SetElement(TElement element);
-    }
-
-    internal class CachingComparer<TElement, TKey> : CachingComparer<TElement>
-    {
-        protected readonly Func<TElement, TKey> _keySelector;
-        protected readonly IComparer<TKey> _comparer;
-        protected readonly bool _descending;
-        protected TKey? _lastKey;
-
-        public CachingComparer(Func<TElement, TKey> keySelector, IComparer<TKey> comparer, bool descending)
-        {
-            _keySelector = keySelector;
-            _comparer = comparer;
-            _descending = descending;
-        }
 
-        internal override int Compare(TElement element, bool cacheLower)
-        {
-            TKey newKey = _keySelector(element);
-            int cmp = _descending ? _comparer.Compare(_lastKey, newKey) : _comparer.Compare(newKey, _lastKey);
-            if (cacheLower == cmp < 0)
+            private static void Sort(Span<TElement> span, bool descending)
             {
-                _lastKey = newKey;
+                if (descending)
+                {
+                    span.Sort(static (a, b) => Comparer<TElement>.Default.Compare(b, a));
+                }
+                else
+                {
+                    span.Sort();
+                }
             }
-
-            return cmp;
         }
 
-        internal override void SetElement(TElement element)
+        // A comparer that chains comparisons, and pushes through the last element found to be
+        // lower or higher (depending on use), so as to represent the sort of comparisons
+        // done by OrderBy().ThenBy() combinations.
+        private abstract class CachingComparer<TElement>
         {
-            _lastKey = _keySelector(element);
-        }
-    }
-
-    internal sealed class CachingComparerWithChild<TElement, TKey> : CachingComparer<TElement, TKey>
-    {
-        private readonly CachingComparer<TElement> _child;
+            internal abstract int Compare(TElement element, bool cacheLower);
 
-        public CachingComparerWithChild(Func<TElement, TKey> keySelector, IComparer<TKey> comparer, bool descending, CachingComparer<TElement> child)
-            : base(keySelector, comparer, descending)
-        {
-            _child = child;
+            internal abstract void SetElement(TElement element);
         }
 
-        internal override int Compare(TElement element, bool cacheLower)
+        private class CachingComparer<TElement, TKey> : CachingComparer<TElement>
         {
-            TKey newKey = _keySelector(element);
-            int cmp = _descending ? _comparer.Compare(_lastKey, newKey) : _comparer.Compare(newKey, _lastKey);
-            if (cmp == 0)
+            protected readonly Func<TElement, TKey> _keySelector;
+            protected readonly IComparer<TKey> _comparer;
+            protected readonly bool _descending;
+            protected TKey? _lastKey;
+
+            public CachingComparer(Func<TElement, TKey> keySelector, IComparer<TKey> comparer, bool descending)
             {
-                return _child.Compare(element, cacheLower);
+                _keySelector = keySelector;
+                _comparer = comparer;
+                _descending = descending;
             }
 
-            if (cacheLower == cmp < 0)
+            internal override int Compare(TElement element, bool cacheLower)
             {
-                _lastKey = newKey;
-                _child.SetElement(element);
+                TKey newKey = _keySelector(element);
+                int cmp = _descending ? _comparer.Compare(_lastKey, newKey) : _comparer.Compare(newKey, _lastKey);
+                if (cacheLower == cmp < 0)
+                {
+                    _lastKey = newKey;
+                }
+
+                return cmp;
             }
 
-            return cmp;
+            internal override void SetElement(TElement element)
+            {
+                _lastKey = _keySelector(element);
+            }
         }
 
-        internal override void SetElement(TElement element)
+        private sealed class CachingComparerWithChild<TElement, TKey> : CachingComparer<TElement, TKey>
         {
-            base.SetElement(element);
-            _child.SetElement(element);
-        }
-    }
-
-    internal abstract class EnumerableSorter<TElement>
-    {
-        /// <summary>Function that returns its input unmodified.</summary>
-        /// <remarks>
-        /// Used for reference equality in order to avoid unnecessary computation when a caller
-        /// can benefit from knowing that the produced value is identical to the input.
-        /// </remarks>
-        internal static readonly Func<TElement, TElement> IdentityFunc = e => e;
-
-        internal abstract void ComputeKeys(TElement[] elements, int count);
-
-        internal abstract int CompareAnyKeys(int index1, int index2);
+            private readonly CachingComparer<TElement> _child;
 
-        private int[] ComputeMap(TElement[] elements, int count)
-        {
-            ComputeKeys(elements, count);
-            int[] map = new int[count];
-            for (int i = 0; i < map.Length; i++)
+            public CachingComparerWithChild(Func<TElement, TKey> keySelector, IComparer<TKey> comparer, bool descending, CachingComparer<TElement> child)
+                : base(keySelector, comparer, descending)
             {
-                map[i] = i;
+                _child = child;
             }
 
-            return map;
-        }
+            internal override int Compare(TElement element, bool cacheLower)
+            {
+                TKey newKey = _keySelector(element);
+                int cmp = _descending ? _comparer.Compare(_lastKey, newKey) : _comparer.Compare(newKey, _lastKey);
+                if (cmp == 0)
+                {
+                    return _child.Compare(element, cacheLower);
+                }
 
-        internal int[] Sort(TElement[] elements, int count)
-        {
-            int[] map = ComputeMap(elements, count);
-            QuickSort(map, 0, count - 1);
-            return map;
-        }
+                if (cacheLower == cmp < 0)
+                {
+                    _lastKey = newKey;
+                    _child.SetElement(element);
+                }
 
-        internal int[] Sort(TElement[] elements, int count, int minIdx, int maxIdx)
-        {
-            int[] map = ComputeMap(elements, count);
-            PartialQuickSort(map, 0, count - 1, minIdx, maxIdx);
-            return map;
-        }
+                return cmp;
+            }
 
-        internal TElement ElementAt(TElement[] elements, int count, int idx)
-        {
-            int[] map = ComputeMap(elements, count);
-            return idx == 0 ?
-                elements[Min(map, count)] :
-                elements[QuickSelect(map, count - 1, idx)];
+            internal override void SetElement(TElement element)
+            {
+                base.SetElement(element);
+                _child.SetElement(element);
+            }
         }
 
-        protected abstract void QuickSort(int[] map, int left, int right);
-
-        // Sorts the k elements between minIdx and maxIdx without sorting all elements
-        // Time complexity: O(n + k log k) best and average case. O(n^2) worse case.
-        protected abstract void PartialQuickSort(int[] map, int left, int right, int minIdx, int maxIdx);
+        private abstract class EnumerableSorter<TElement>
+        {
+            /// <summary>Function that returns its input unmodified.</summary>
+            /// <remarks>
+            /// Used for reference equality in order to avoid unnecessary computation when a caller
+            /// can benefit from knowing that the produced value is identical to the input.
+            /// </remarks>
+            internal static readonly Func<TElement, TElement> IdentityFunc = e => e;
 
-        // Finds the element that would be at idx if the collection was sorted.
-        // Time complexity: O(n) best and average case. O(n^2) worse case.
-        protected abstract int QuickSelect(int[] map, int right, int idx);
+            internal abstract void ComputeKeys(TElement[] elements, int count);
 
-        protected abstract int Min(int[] map, int count);
-    }
+            internal abstract int CompareAnyKeys(int index1, int index2);
 
-    internal sealed class EnumerableSorter<TElement, TKey> : EnumerableSorter<TElement>
-    {
-        private readonly Func<TElement, TKey> _keySelector;
-        private readonly IComparer<TKey> _comparer;
-        private readonly bool _descending;
-        private readonly EnumerableSorter<TElement>? _next;
-        private TKey[]? _keys;
+            private int[] ComputeMap(TElement[] elements, int count)
+            {
+                ComputeKeys(elements, count);
 
-        internal EnumerableSorter(Func<TElement, TKey> keySelector, IComparer<TKey> comparer, bool descending, EnumerableSorter<TElement>? next)
-        {
-            _keySelector = keySelector;
-            _comparer = comparer;
-            _descending = descending;
-            _next = next;
-        }
+                int[] map = new int[count];
+                FillIncrementing(map, 0);
+                return map;
+            }
 
-        internal override void ComputeKeys(TElement[] elements, int count)
-        {
-            Func<TElement, TKey> keySelector = _keySelector;
-            if (!ReferenceEquals(keySelector, IdentityFunc))
+            internal int[] Sort(TElement[] elements, int count)
             {
-                var keys = new TKey[count];
-                for (int i = 0; i < keys.Length; i++)
-                {
-                    keys[i] = keySelector(elements[i]);
-                }
-                _keys = keys;
+                int[] map = ComputeMap(elements, count);
+                QuickSort(map, 0, count - 1);
+                return map;
             }
-            else
+
+            internal int[] Sort(TElement[] elements, int count, int minIdx, int maxIdx)
             {
-                // The key selector is our known identity function, which means we don't
-                // need to invoke the key selector for every element.  Further, we can just
-                // use the original array as the keys (even if count is smaller, as the additional
-                // values will just be ignored).
-                Debug.Assert(typeof(TKey) == typeof(TElement));
-                _keys = (TKey[])(object)elements;
+                int[] map = ComputeMap(elements, count);
+                PartialQuickSort(map, 0, count - 1, minIdx, maxIdx);
+                return map;
             }
 
-            _next?.ComputeKeys(elements, count);
-        }
-
-        internal override int CompareAnyKeys(int index1, int index2)
-        {
-            TKey[]? keys = _keys;
-            Debug.Assert(keys != null);
-
-            int c = _comparer.Compare(keys[index1], keys[index2]);
-            if (c == 0)
+            internal TElement ElementAt(TElement[] elements, int count, int idx)
             {
-                if (_next == null)
-                {
-                    return index1 - index2; // ensure stability of sort
-                }
-
-                return _next.CompareAnyKeys(index1, index2);
+                int[] map = ComputeMap(elements, count);
+                return idx == 0 ?
+                    elements[Min(map, count)] :
+                    elements[QuickSelect(map, count - 1, idx)];
             }
 
-            // -c will result in a negative value for int.MinValue (-int.MinValue == int.MinValue).
-            // Flipping keys earlier is more likely to trigger something strange in a comparer,
-            // particularly as it comes to the sort being stable.
-            return (_descending != (c > 0)) ? 1 : -1;
-        }
+            protected abstract void QuickSort(int[] map, int left, int right);
 
-        private int CompareAnyKeys_DefaultComparer_NoNext_Ascending(int index1, int index2)
-        {
-            Debug.Assert(typeof(TKey).IsValueType);
-            Debug.Assert(_comparer == Comparer<TKey>.Default);
-            Debug.Assert(_next is null);
-            Debug.Assert(!_descending);
-
-            TKey[]? keys = _keys;
-            Debug.Assert(keys != null);
-
-            int c = Comparer<TKey>.Default.Compare(keys[index1], keys[index2]);
-            return
-                c == 0 ? index1 - index2 : // ensure stability of sort
-                c;
-        }
+            // Sorts the k elements between minIdx and maxIdx without sorting all elements
+            // Time complexity: O(n + k log k) best and average case. O(n^2) worse case.
+            protected abstract void PartialQuickSort(int[] map, int left, int right, int minIdx, int maxIdx);
 
-        private int CompareAnyKeys_DefaultComparer_NoNext_Descending(int index1, int index2)
-        {
-            Debug.Assert(typeof(TKey).IsValueType);
-            Debug.Assert(_comparer == Comparer<TKey>.Default);
-            Debug.Assert(_next is null);
-            Debug.Assert(_descending);
-
-            TKey[]? keys = _keys;
-            Debug.Assert(keys != null);
-
-            int c = Comparer<TKey>.Default.Compare(keys[index2], keys[index1]);
-            return
-                c == 0 ? index1 - index2 : // ensure stability of sort
-                c;
-        }
+            // Finds the element that would be at idx if the collection was sorted.
+            // Time complexity: O(n) best and average case. O(n^2) worse case.
+            protected abstract int QuickSelect(int[] map, int right, int idx);
 
-        private int CompareKeys(int index1, int index2) => index1 == index2 ? 0 : CompareAnyKeys(index1, index2);
+            protected abstract int Min(int[] map, int count);
+        }
 
-        protected override void QuickSort(int[] keys, int lo, int hi)
+        private sealed class EnumerableSorter<TElement, TKey> : EnumerableSorter<TElement>
         {
-            Comparison<int> comparison;
+            private readonly Func<TElement, TKey> _keySelector;
+            private readonly IComparer<TKey> _comparer;
+            private readonly bool _descending;
+            private readonly EnumerableSorter<TElement>? _next;
+            private TKey[]? _keys;
+
+            internal EnumerableSorter(Func<TElement, TKey> keySelector, IComparer<TKey> comparer, bool descending, EnumerableSorter<TElement>? next)
+            {
+                _keySelector = keySelector;
+                _comparer = comparer;
+                _descending = descending;
+                _next = next;
+            }
 
-            if (typeof(TKey).IsValueType && _next is null && _comparer == Comparer<TKey>.Default)
+            internal override void ComputeKeys(TElement[] elements, int count)
             {
-                // We can use Comparer<TKey>.Default.Compare and benefit from devirtualization and inlining.
-                // We can also avoid extra steps to check whether we need to deal with a subsequent tie breaker (_next).
-                if (!_descending)
+                Func<TElement, TKey> keySelector = _keySelector;
+                if (!ReferenceEquals(keySelector, IdentityFunc))
                 {
-                    comparison = CompareAnyKeys_DefaultComparer_NoNext_Ascending;
+                    var keys = new TKey[count];
+                    for (int i = 0; i < keys.Length; i++)
+                    {
+                        keys[i] = keySelector(elements[i]);
+                    }
+                    _keys = keys;
                 }
                 else
                 {
-                    comparison = CompareAnyKeys_DefaultComparer_NoNext_Descending;
+                    // The key selector is our known identity function, which means we don't
+                    // need to invoke the key selector for every element.  Further, we can just
+                    // use the original array as the keys (even if count is smaller, as the additional
+                    // values will just be ignored).
+                    Debug.Assert(typeof(TKey) == typeof(TElement));
+                    _keys = (TKey[])(object)elements;
                 }
-            }
-            else
-            {
-                comparison = CompareAnyKeys;
-            }
 
-            new Span<int>(keys, lo, hi - lo + 1).Sort(comparison);
-        }
+                _next?.ComputeKeys(elements, count);
+            }
 
-        // Sorts the k elements between minIdx and maxIdx without sorting all elements
-        // Time complexity: O(n + k log k) best and average case. O(n^2) worse case.
-        protected override void PartialQuickSort(int[] map, int left, int right, int minIdx, int maxIdx)
-        {
-            do
+            internal override int CompareAnyKeys(int index1, int index2)
             {
-                int i = left;
-                int j = right;
-                int x = map[i + ((j - i) >> 1)];
-                do
+                TKey[]? keys = _keys;
+                Debug.Assert(keys is not null);
+
+                int c = _comparer.Compare(keys[index1], keys[index2]);
+                if (c == 0)
                 {
-                    while (i < map.Length && CompareKeys(x, map[i]) > 0)
+                    if (_next is null)
                     {
-                        i++;
+                        return index1 - index2; // ensure stability of sort
                     }
 
-                    while (j >= 0 && CompareKeys(x, map[j]) < 0)
-                    {
-                        j--;
-                    }
+                    return _next.CompareAnyKeys(index1, index2);
+                }
 
-                    if (i > j)
-                    {
-                        break;
-                    }
+                // -c will result in a negative value for int.MinValue (-int.MinValue == int.MinValue).
+                // Flipping keys earlier is more likely to trigger something strange in a comparer,
+                // particularly as it comes to the sort being stable.
+                return (_descending != (c > 0)) ? 1 : -1;
+            }
 
-                    if (i < j)
-                    {
-                        int temp = map[i];
-                        map[i] = map[j];
-                        map[j] = temp;
-                    }
+            private int CompareAnyKeys_DefaultComparer_NoNext_Ascending(int index1, int index2)
+            {
+                Debug.Assert(typeof(TKey).IsValueType);
+                Debug.Assert(_comparer == Comparer<TKey>.Default);
+                Debug.Assert(_next is null);
+                Debug.Assert(!_descending);
+
+                TKey[]? keys = _keys;
+                Debug.Assert(keys is not null);
+
+                int c = Comparer<TKey>.Default.Compare(keys[index1], keys[index2]);
+                return
+                    c == 0 ? index1 - index2 : // ensure stability of sort
+                    c;
+            }
 
-                    i++;
-                    j--;
-                }
-                while (i <= j);
+            private int CompareAnyKeys_DefaultComparer_NoNext_Descending(int index1, int index2)
+            {
+                Debug.Assert(typeof(TKey).IsValueType);
+                Debug.Assert(_comparer == Comparer<TKey>.Default);
+                Debug.Assert(_next is null);
+                Debug.Assert(_descending);
+
+                TKey[]? keys = _keys;
+                Debug.Assert(keys is not null);
+
+                int c = Comparer<TKey>.Default.Compare(keys[index2], keys[index1]);
+                return
+                    c == 0 ? index1 - index2 : // ensure stability of sort
+                    c;
+            }
 
-                if (minIdx >= i)
-                {
-                    left = i + 1;
-                }
-                else if (maxIdx <= j)
-                {
-                    right = j - 1;
-                }
+            private int CompareKeys(int index1, int index2) => index1 == index2 ? 0 : CompareAnyKeys(index1, index2);
 
-                if (j - left <= right - i)
+            protected override void QuickSort(int[] keys, int lo, int hi)
+            {
+                Comparison<int> comparison;
+
+                if (typeof(TKey).IsValueType && _next is null && _comparer == Comparer<TKey>.Default)
                 {
-                    if (left < j)
+                    // We can use Comparer<TKey>.Default.Compare and benefit from devirtualization and inlining.
+                    // We can also avoid extra steps to check whether we need to deal with a subsequent tie breaker (_next).
+                    if (!_descending)
                     {
-                        PartialQuickSort(map, left, j, minIdx, maxIdx);
+                        comparison = CompareAnyKeys_DefaultComparer_NoNext_Ascending;
+                    }
+                    else
+                    {
+                        comparison = CompareAnyKeys_DefaultComparer_NoNext_Descending;
                     }
-
-                    left = i;
                 }
                 else
                 {
-                    if (i < right)
-                    {
-                        PartialQuickSort(map, i, right, minIdx, maxIdx);
-                    }
-
-                    right = j;
+                    comparison = CompareAnyKeys;
                 }
+
+                new Span<int>(keys, lo, hi - lo + 1).Sort(comparison);
             }
-            while (left < right);
-        }
 
-        // Finds the element that would be at idx if the collection was sorted.
-        // Time complexity: O(n) best and average case. O(n^2) worse case.
-        protected override int QuickSelect(int[] map, int right, int idx)
-        {
-            int left = 0;
-            do
+            // Sorts the k elements between minIdx and maxIdx without sorting all elements
+            // Time complexity: O(n + k log k) best and average case. O(n^2) worse case.
+            protected override void PartialQuickSort(int[] map, int left, int right, int minIdx, int maxIdx)
             {
-                int i = left;
-                int j = right;
-                int x = map[i + ((j - i) >> 1)];
                 do
                 {
-                    while (i < map.Length && CompareKeys(x, map[i]) > 0)
+                    int i = left;
+                    int j = right;
+                    int x = map[i + ((j - i) >> 1)];
+                    do
                     {
+                        while (i < map.Length && CompareKeys(x, map[i]) > 0)
+                        {
+                            i++;
+                        }
+
+                        while (j >= 0 && CompareKeys(x, map[j]) < 0)
+                        {
+                            j--;
+                        }
+
+                        if (i > j)
+                        {
+                            break;
+                        }
+
+                        if (i < j)
+                        {
+                            int temp = map[i];
+                            map[i] = map[j];
+                            map[j] = temp;
+                        }
+
                         i++;
+                        j--;
                     }
+                    while (i <= j);
 
-                    while (j >= 0 && CompareKeys(x, map[j]) < 0)
+                    if (minIdx >= i)
                     {
-                        j--;
+                        left = i + 1;
                     }
-
-                    if (i > j)
+                    else if (maxIdx <= j)
                     {
-                        break;
+                        right = j - 1;
                     }
 
-                    if (i < j)
+                    if (j - left <= right - i)
                     {
-                        int temp = map[i];
-                        map[i] = map[j];
-                        map[j] = temp;
+                        if (left < j)
+                        {
+                            PartialQuickSort(map, left, j, minIdx, maxIdx);
+                        }
+
+                        left = i;
                     }
+                    else
+                    {
+                        if (i < right)
+                        {
+                            PartialQuickSort(map, i, right, minIdx, maxIdx);
+                        }
 
-                    i++;
-                    j--;
+                        right = j;
+                    }
                 }
-                while (i <= j);
+                while (left < right);
+            }
 
-                if (i <= idx)
-                {
-                    left = i + 1;
-                }
-                else
+            // Finds the element that would be at idx if the collection was sorted.
+            // Time complexity: O(n) best and average case. O(n^2) worse case.
+            protected override int QuickSelect(int[] map, int right, int idx)
+            {
+                int left = 0;
+                do
                 {
-                    right = j - 1;
-                }
+                    int i = left;
+                    int j = right;
+                    int x = map[i + ((j - i) >> 1)];
+                    do
+                    {
+                        while (i < map.Length && CompareKeys(x, map[i]) > 0)
+                        {
+                            i++;
+                        }
+
+                        while (j >= 0 && CompareKeys(x, map[j]) < 0)
+                        {
+                            j--;
+                        }
+
+                        if (i > j)
+                        {
+                            break;
+                        }
+
+                        if (i < j)
+                        {
+                            int temp = map[i];
+                            map[i] = map[j];
+                            map[j] = temp;
+                        }
 
-                if (j - left <= right - i)
-                {
-                    if (left < j)
+                        i++;
+                        j--;
+                    }
+                    while (i <= j);
+
+                    if (i <= idx)
                     {
-                        right = j;
+                        left = i + 1;
+                    }
+                    else
+                    {
+                        right = j - 1;
                     }
 
-                    left = i;
-                }
-                else
-                {
-                    if (i < right)
+                    if (j - left <= right - i)
                     {
+                        if (left < j)
+                        {
+                            right = j;
+                        }
+
                         left = i;
                     }
+                    else
+                    {
+                        if (i < right)
+                        {
+                            left = i;
+                        }
 
-                    right = j;
+                        right = j;
+                    }
                 }
-            }
-            while (left < right);
+                while (left < right);
 
-            return map[idx];
-        }
+                return map[idx];
+            }
 
-        protected override int Min(int[] map, int count)
-        {
-            int index = 0;
-            for (int i = 1; i < count; i++)
+            protected override int Min(int[] map, int count)
             {
-                if (CompareKeys(map[i], map[index]) < 0)
+                int index = 0;
+                for (int i = 1; i < count; i++)
                 {
-                    index = i;
+                    if (CompareKeys(map[i], map[index]) < 0)
+                    {
+                        index = i;
+                    }
                 }
+                return map[index];
             }
-            return map[index];
         }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/PartialArrayEnumerator.cs b/src/libraries/System.Linq/src/System/Linq/PartialArrayEnumerator.cs
new file mode 100644
index 000000000000..7a002d115c69
--- /dev/null
+++ b/src/libraries/System.Linq/src/System/Linq/PartialArrayEnumerator.cs
@@ -0,0 +1,42 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace System.Linq
+{
+    /// <summary>Enumerator for iterating through part of an array.</summary>
+    internal sealed class PartialArrayEnumerator<T> : IEnumerator<T>
+    {
+        private readonly T[] _array;
+        private readonly int _count;
+        private int _index = -1;
+
+        public PartialArrayEnumerator(T[] array, int count)
+        {
+            Debug.Assert(array is not null);
+            _array = array;
+            _count = count;
+        }
+
+        public bool MoveNext()
+        {
+            if (_index + 1 < _count)
+            {
+                _index++;
+                return true;
+            }
+
+            return false;
+        }
+
+        public T Current => _array[_index];
+        object? IEnumerator.Current => Current;
+
+        public void Dispose() { }
+
+        public void Reset() => _index = -1;
+    }
+}
diff --git a/src/libraries/System.Linq/src/System/Linq/Range.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Range.SpeedOpt.cs
index c125673e16d3..41d1336018bd 100644
--- a/src/libraries/System.Linq/src/System/Linq/Range.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Range.SpeedOpt.cs
@@ -2,76 +2,42 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
-using System.Numerics;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
 
 namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private sealed partial class RangeIterator : IPartition<int>, IList<int>, IReadOnlyList<int>
+        private sealed partial class RangeIterator : IList<int>, IReadOnlyList<int>
         {
             public override IEnumerable<TResult> Select<TResult>(Func<int, TResult> selector)
             {
-                return new SelectRangeIterator<TResult>(_start, _end, selector);
+                return new RangeSelectIterator<TResult>(_start, _end, selector);
             }
 
-            public int[] ToArray()
+            public override int[] ToArray()
             {
                 int start = _start;
                 int[] array = new int[_end - start];
-                Fill(array, start);
+                FillIncrementing(array, start);
                 return array;
             }
 
-            public List<int> ToList()
+            public override List<int> ToList()
             {
                 (int start, int end) = (_start, _end);
                 List<int> list = new List<int>(end - start);
-                Fill(SetCountAndGetSpan(list, end - start), start);
+                FillIncrementing(SetCountAndGetSpan(list, end - start), start);
                 return list;
             }
 
             public void CopyTo(int[] array, int arrayIndex) =>
-                Fill(array.AsSpan(arrayIndex, _end - _start), _start);
+                FillIncrementing(array.AsSpan(arrayIndex, _end - _start), _start);
 
-            private static void Fill(Span<int> destination, int value)
-            {
-                ref int pos = ref MemoryMarshal.GetReference(destination);
-                ref int end = ref Unsafe.Add(ref pos, destination.Length);
-
-                if (Vector.IsHardwareAccelerated &&
-                    destination.Length >= Vector<int>.Count)
-                {
-                    Vector<int> init = Vector<int>.Indices;
-                    Vector<int> current = new Vector<int>(value) + init;
-                    Vector<int> increment = new Vector<int>(Vector<int>.Count);
-
-                    ref int oneVectorFromEnd = ref Unsafe.Subtract(ref end, Vector<int>.Count);
-                    do
-                    {
-                        current.StoreUnsafe(ref pos);
-                        current += increment;
-                        pos = ref Unsafe.Add(ref pos, Vector<int>.Count);
-                    }
-                    while (!Unsafe.IsAddressGreaterThan(ref pos, ref oneVectorFromEnd));
-
-                    value = current[0];
-                }
-
-                while (Unsafe.IsAddressLessThan(ref pos, ref end))
-                {
-                    pos = value++;
-                    pos = ref Unsafe.Add(ref pos, 1);
-                }
-            }
-
-            public int GetCount(bool onlyIfCheap) => _end - _start;
+            public override int GetCount(bool onlyIfCheap) => _end - _start;
 
             public int Count => _end - _start;
 
-            public IPartition<int>? Skip(int count)
+            public override Iterator<int>? Skip(int count)
             {
                 if (count >= _end - _start)
                 {
@@ -81,7 +47,7 @@ private static void Fill(Span<int> destination, int value)
                 return new RangeIterator(_start + count, _end - _start - count);
             }
 
-            public IPartition<int> Take(int count)
+            public override Iterator<int> Take(int count)
             {
                 int curCount = _end - _start;
                 if (count >= curCount)
@@ -92,7 +58,7 @@ public IPartition<int> Take(int count)
                 return new RangeIterator(_start, count);
             }
 
-            public int TryGetElementAt(int index, out bool found)
+            public override int TryGetElementAt(int index, out bool found)
             {
                 if ((uint)index < (uint)(_end - _start))
                 {
@@ -104,13 +70,13 @@ public int TryGetElementAt(int index, out bool found)
                 return 0;
             }
 
-            public int TryGetFirst(out bool found)
+            public override int TryGetFirst(out bool found)
             {
                 found = true;
                 return _start;
             }
 
-            public int TryGetLast(out bool found)
+            public override int TryGetLast(out bool found)
             {
                 found = true;
                 return _end - 1;
diff --git a/src/libraries/System.Linq/src/System/Linq/Range.cs b/src/libraries/System.Linq/src/System/Linq/Range.cs
index 206f90415d52..1bf57b0e520d 100644
--- a/src/libraries/System.Linq/src/System/Linq/Range.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Range.cs
@@ -3,6 +3,9 @@
 
 using System.Collections.Generic;
 using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 
 namespace System.Linq
 {
@@ -42,7 +45,7 @@ public RangeIterator(int start, int count)
 
             private int CountForDebugger => _end - _start;
 
-            public override Iterator<int> Clone() => new RangeIterator(_start, _end - _start);
+            private protected override Iterator<int> Clone() => new RangeIterator(_start, _end - _start);
 
             public override bool MoveNext()
             {
@@ -71,5 +74,37 @@ public override void Dispose()
                 _state = -1; // Don't reset current
             }
         }
+
+        /// <summary>Fills the <paramref name="destination"/> with incrementing numbers, starting from <paramref name="value"/>.</summary>
+        private static void FillIncrementing(Span<int> destination, int value)
+        {
+            ref int pos = ref MemoryMarshal.GetReference(destination);
+            ref int end = ref Unsafe.Add(ref pos, destination.Length);
+
+            if (Vector.IsHardwareAccelerated &&
+                destination.Length >= Vector<int>.Count)
+            {
+                Vector<int> init = Vector<int>.Indices;
+                Vector<int> current = new Vector<int>(value) + init;
+                Vector<int> increment = new Vector<int>(Vector<int>.Count);
+
+                ref int oneVectorFromEnd = ref Unsafe.Subtract(ref end, Vector<int>.Count);
+                do
+                {
+                    current.StoreUnsafe(ref pos);
+                    current += increment;
+                    pos = ref Unsafe.Add(ref pos, Vector<int>.Count);
+                }
+                while (!Unsafe.IsAddressGreaterThan(ref pos, ref oneVectorFromEnd));
+
+                value = current[0];
+            }
+
+            while (Unsafe.IsAddressLessThan(ref pos, ref end))
+            {
+                pos = value++;
+                pos = ref Unsafe.Add(ref pos, 1);
+            }
+        }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/Repeat.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Repeat.SpeedOpt.cs
index 3c25ee20ba5f..2e98ae6b8fec 100644
--- a/src/libraries/System.Linq/src/System/Linq/Repeat.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Repeat.SpeedOpt.cs
@@ -8,15 +8,12 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private sealed partial class RepeatIterator<TResult> : IPartition<TResult>, IList<TResult>, IReadOnlyList<TResult>
+        private sealed partial class RepeatIterator<TResult> : IList<TResult>, IReadOnlyList<TResult>
         {
-            public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector) =>
-                new SelectIPartitionIterator<TResult, TResult2>(this, selector);
-
-            public TResult[] ToArray()
+            public override TResult[] ToArray()
             {
                 TResult[] array = new TResult[_count];
-                if (_current != null)
+                if (_current is not null)
                 {
                     Array.Fill(array, _current);
                 }
@@ -24,7 +21,7 @@ public TResult[] ToArray()
                 return array;
             }
 
-            public List<TResult> ToList()
+            public override List<TResult> ToList()
             {
                 List<TResult> list = new List<TResult>(_count);
                 SetCountAndGetSpan(list, _count).Fill(_current);
@@ -32,11 +29,11 @@ public List<TResult> ToList()
                 return list;
             }
 
-            public int GetCount(bool onlyIfCheap) => _count;
+            public override int GetCount(bool onlyIfCheap) => _count;
 
             public int Count => _count;
 
-            public IPartition<TResult>? Skip(int count)
+            public override Iterator<TResult>? Skip(int count)
             {
                 Debug.Assert(count > 0);
 
@@ -48,7 +45,7 @@ public List<TResult> ToList()
                 return new RepeatIterator<TResult>(_current, _count - count);
             }
 
-            public IPartition<TResult> Take(int count)
+            public override Iterator<TResult> Take(int count)
             {
                 Debug.Assert(count > 0);
 
@@ -60,7 +57,7 @@ public IPartition<TResult> Take(int count)
                 return new RepeatIterator<TResult>(_current, count);
             }
 
-            public TResult? TryGetElementAt(int index, out bool found)
+            public override TResult? TryGetElementAt(int index, out bool found)
             {
                 if ((uint)index < (uint)_count)
                 {
@@ -72,13 +69,13 @@ public IPartition<TResult> Take(int count)
                 return default;
             }
 
-            public TResult TryGetFirst(out bool found)
+            public override TResult TryGetFirst(out bool found)
             {
                 found = true;
                 return _current;
             }
 
-            public TResult TryGetLast(out bool found)
+            public override TResult TryGetLast(out bool found)
             {
                 found = true;
                 return _current;
diff --git a/src/libraries/System.Linq/src/System/Linq/Repeat.cs b/src/libraries/System.Linq/src/System/Linq/Repeat.cs
index d0ef9941e6ef..ed6a9a696a30 100644
--- a/src/libraries/System.Linq/src/System/Linq/Repeat.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Repeat.cs
@@ -39,7 +39,7 @@ public RepeatIterator(TResult element, int count)
                 _count = count;
             }
 
-            public override Iterator<TResult> Clone()
+            private protected override Iterator<TResult> Clone()
             {
                 return new RepeatIterator<TResult>(_current, _count);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/Reverse.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Reverse.SpeedOpt.cs
index bb301cc30848..d1ec26de879a 100644
--- a/src/libraries/System.Linq/src/System/Linq/Reverse.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Reverse.SpeedOpt.cs
@@ -7,28 +7,28 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private sealed partial class ReverseIterator<TSource> : IPartition<TSource>
+        private sealed partial class ReverseIterator<TSource>
         {
-            public TSource[] ToArray()
+            public override TSource[] ToArray()
             {
                 TSource[] array = _source.ToArray();
                 Array.Reverse(array);
                 return array;
             }
 
-            public List<TSource> ToList()
+            public override List<TSource> ToList()
             {
                 List<TSource> list = _source.ToList();
                 list.Reverse();
                 return list;
             }
 
-            public int GetCount(bool onlyIfCheap) =>
+            public override int GetCount(bool onlyIfCheap) =>
                 !onlyIfCheap ? _source.Count() :
                 TryGetNonEnumeratedCount(_source, out int count) ? count :
                 -1;
 
-            public TSource? TryGetElementAt(int index, out bool found)
+            public override TSource? TryGetElementAt(int index, out bool found)
             {
                 if (_source is IList<TSource> list)
                 {
@@ -53,11 +53,11 @@ public int GetCount(bool onlyIfCheap) =>
                 return default;
             }
 
-            public TSource? TryGetFirst(out bool found)
+            public override TSource? TryGetFirst(out bool found)
             {
-                if (_source is IPartition<TSource> partition)
+                if (_source is Iterator<TSource> iterator)
                 {
-                    return partition.TryGetLast(out found);
+                    return iterator.TryGetLast(out found);
                 }
                 else if (_source is IList<TSource> list)
                 {
@@ -89,11 +89,11 @@ public int GetCount(bool onlyIfCheap) =>
                 return default;
             }
 
-            public TSource? TryGetLast(out bool found)
+            public override TSource? TryGetLast(out bool found)
             {
-                if (_source is IPartition<TSource> partition)
+                if (_source is Iterator<TSource> iterator)
                 {
-                    return partition.TryGetFirst(out found);
+                    return iterator.TryGetFirst(out found);
                 }
                 else if (_source is IList<TSource> list)
                 {
@@ -116,10 +116,6 @@ public int GetCount(bool onlyIfCheap) =>
                 found = false;
                 return default;
             }
-
-            public IPartition<TSource>? Skip(int count) => new EnumerablePartition<TSource>(this, count, -1);
-
-            public IPartition<TSource>? Take(int count) => new EnumerablePartition<TSource>(this, 0, count - 1);
         }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/Reverse.cs b/src/libraries/System.Linq/src/System/Linq/Reverse.cs
index 2a02115daca1..0eba290b7c7f 100644
--- a/src/libraries/System.Linq/src/System/Linq/Reverse.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Reverse.cs
@@ -10,7 +10,7 @@ public static partial class Enumerable
     {
         public static IEnumerable<TSource> Reverse<TSource>(this IEnumerable<TSource> source)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -34,11 +34,11 @@ private sealed partial class ReverseIterator<TSource> : Iterator<TSource>
 
             public ReverseIterator(IEnumerable<TSource> source)
             {
-                Debug.Assert(source != null);
+                Debug.Assert(source is not null);
                 _source = source;
             }
 
-            public override Iterator<TSource> Clone() => new ReverseIterator<TSource>(_source);
+            private protected override Iterator<TSource> Clone() => new ReverseIterator<TSource>(_source);
 
             public override bool MoveNext()
             {
@@ -70,7 +70,7 @@ public override bool MoveNext()
                         int index = _state - 3;
                         if (index != -1)
                         {
-                            Debug.Assert(_buffer != null);
+                            Debug.Assert(_buffer is not null);
                             _current = _buffer[index];
                             --_state;
                             return true;
diff --git a/src/libraries/System.Linq/src/System/Linq/Select.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Select.SpeedOpt.cs
index 06f87db9c0f7..f491f1f0de01 100644
--- a/src/libraries/System.Linq/src/System/Linq/Select.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Select.SpeedOpt.cs
@@ -10,15 +10,9 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        static partial void CreateSelectIPartitionIterator<TResult, TSource>(
-            Func<TSource, TResult> selector, IPartition<TSource> partition, ref IEnumerable<TResult>? result)
+        private sealed partial class IEnumerableSelectIterator<TSource, TResult>
         {
-            result = new SelectIPartitionIterator<TSource, TResult>(partition, selector);
-        }
-
-        private sealed partial class SelectEnumerableIterator<TSource, TResult> : IIListProvider<TResult>
-        {
-            public TResult[] ToArray()
+            public override TResult[] ToArray()
             {
                 SegmentedArrayBuilder<TResult>.ScratchBuffer scratch = default;
                 SegmentedArrayBuilder<TResult> builder = new(scratch);
@@ -35,7 +29,7 @@ public TResult[] ToArray()
                 return result;
             }
 
-            public List<TResult> ToList()
+            public override List<TResult> ToList()
             {
                 var list = new List<TResult>();
 
@@ -48,7 +42,7 @@ public List<TResult> ToList()
                 return list;
             }
 
-            public int GetCount(bool onlyIfCheap)
+            public override int GetCount(bool onlyIfCheap)
             {
                 // In case someone uses Count() to force evaluation of
                 // the selector, run it provided `onlyIfCheap` is false.
@@ -71,11 +65,73 @@ public int GetCount(bool onlyIfCheap)
 
                 return count;
             }
+
+            public override TResult? TryGetElementAt(int index, out bool found)
+            {
+                if (index >= 0)
+                {
+                    IEnumerator<TSource> e = _source.GetEnumerator();
+                    try
+                    {
+                        while (e.MoveNext())
+                        {
+                            if (index == 0)
+                            {
+                                found = true;
+                                return _selector(e.Current);
+                            }
+
+                            index--;
+                        }
+                    }
+                    finally
+                    {
+                        (e as IDisposable)?.Dispose();
+                    }
+                }
+
+                found = false;
+                return default;
+            }
+
+            public override TResult? TryGetFirst(out bool found)
+            {
+                using IEnumerator<TSource> e = _source.GetEnumerator();
+                if (e.MoveNext())
+                {
+                    found = true;
+                    return _selector(e.Current);
+                }
+
+                found = false;
+                return default;
+            }
+
+            public override TResult? TryGetLast(out bool found)
+            {
+                using IEnumerator<TSource> e = _source.GetEnumerator();
+
+                if (e.MoveNext())
+                {
+                    found = true;
+                    TSource last = e.Current;
+
+                    while (e.MoveNext())
+                    {
+                        last = e.Current;
+                    }
+
+                    return _selector(last);
+                }
+
+                found = false;
+                return default;
+            }
         }
 
-        private sealed partial class SelectArrayIterator<TSource, TResult> : IPartition<TResult>
+        private sealed partial class ArraySelectIterator<TSource, TResult>
         {
-            public TResult[] ToArray()
+            public override TResult[] ToArray()
             {
                 // See assert in constructor.
                 // Since _source should never be empty, we don't check for 0/return Array.Empty.
@@ -88,7 +144,7 @@ public TResult[] ToArray()
                 return results;
             }
 
-            public List<TResult> ToList()
+            public override List<TResult> ToList()
             {
                 TSource[] source = _source;
                 Debug.Assert(source.Length > 0);
@@ -107,7 +163,7 @@ private static void Fill(ReadOnlySpan<TSource> source, Span<TResult> destination
                 }
             }
 
-            public int GetCount(bool onlyIfCheap)
+            public override int GetCount(bool onlyIfCheap)
             {
                 // In case someone uses Count() to force evaluation of
                 // the selector, run it provided `onlyIfCheap` is false.
@@ -123,7 +179,7 @@ public int GetCount(bool onlyIfCheap)
                 return _source.Length;
             }
 
-            public IPartition<TResult>? Skip(int count)
+            public override Iterator<TResult>? Skip(int count)
             {
                 Debug.Assert(count > 0);
                 if (count >= _source.Length)
@@ -131,30 +187,31 @@ public int GetCount(bool onlyIfCheap)
                     return null;
                 }
 
-                return new SelectListPartitionIterator<TSource, TResult>(_source, _selector, count, int.MaxValue);
+                return new IListSkipTakeSelectIterator<TSource, TResult>(_source, _selector, count, int.MaxValue);
             }
 
-            public IPartition<TResult> Take(int count)
+            public override Iterator<TResult> Take(int count)
             {
                 Debug.Assert(count > 0);
                 return count >= _source.Length ?
                     this :
-                    new SelectListPartitionIterator<TSource, TResult>(_source, _selector, 0, count - 1);
+                    new IListSkipTakeSelectIterator<TSource, TResult>(_source, _selector, 0, count - 1);
             }
 
-            public TResult? TryGetElementAt(int index, out bool found)
+            public override TResult? TryGetElementAt(int index, out bool found)
             {
-                if ((uint)index < (uint)_source.Length)
+                TSource[] source = _source;
+                if ((uint)index < (uint)source.Length)
                 {
                     found = true;
-                    return _selector(_source[index]);
+                    return _selector(source[index]);
                 }
 
                 found = false;
                 return default;
             }
 
-            public TResult TryGetFirst(out bool found)
+            public override TResult TryGetFirst(out bool found)
             {
                 Debug.Assert(_source.Length > 0); // See assert in constructor
 
@@ -162,34 +219,34 @@ public TResult TryGetFirst(out bool found)
                 return _selector(_source[0]);
             }
 
-            public TResult TryGetLast(out bool found)
+            public override TResult TryGetLast(out bool found)
             {
                 Debug.Assert(_source.Length > 0); // See assert in constructor
 
                 found = true;
-                return _selector(_source[_source.Length - 1]);
+                return _selector(_source[^1]);
             }
         }
 
-        private sealed partial class SelectRangeIterator<TResult> : Iterator<TResult>, IPartition<TResult>
+        private sealed partial class RangeSelectIterator<TResult> : Iterator<TResult>
         {
             private readonly int _start;
             private readonly int _end;
             private readonly Func<int, TResult> _selector;
 
-            public SelectRangeIterator(int start, int end, Func<int, TResult> selector)
+            public RangeSelectIterator(int start, int end, Func<int, TResult> selector)
             {
                 Debug.Assert(start < end);
                 Debug.Assert((uint)(end - start) <= (uint)int.MaxValue);
-                Debug.Assert(selector != null);
+                Debug.Assert(selector is not null);
 
                 _start = start;
                 _end = end;
                 _selector = selector;
             }
 
-            public override Iterator<TResult> Clone() =>
-                new SelectRangeIterator<TResult>(_start, _end, _selector);
+            private protected override Iterator<TResult> Clone() =>
+                new RangeSelectIterator<TResult>(_start, _end, _selector);
 
             public override bool MoveNext()
             {
@@ -206,9 +263,9 @@ public override bool MoveNext()
             }
 
             public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector) =>
-                new SelectRangeIterator<TResult2>(_start, _end, CombineSelectors(_selector, selector));
+                new RangeSelectIterator<TResult2>(_start, _end, CombineSelectors(_selector, selector));
 
-            public TResult[] ToArray()
+            public override TResult[] ToArray()
             {
                 var results = new TResult[_end - _start];
                 Fill(results, _start, _selector);
@@ -216,7 +273,7 @@ public TResult[] ToArray()
                 return results;
             }
 
-            public List<TResult> ToList()
+            public override List<TResult> ToList()
             {
                 var results = new List<TResult>(_end - _start);
                 Fill(SetCountAndGetSpan(results, _end - _start), _start, _selector);
@@ -232,7 +289,7 @@ private static void Fill(Span<TResult> results, int start, Func<int, TResult> fu
                 }
             }
 
-            public int GetCount(bool onlyIfCheap)
+            public override int GetCount(bool onlyIfCheap)
             {
                 // In case someone uses Count() to force evaluation of the selector,
                 // run it provided `onlyIfCheap` is false.
@@ -247,7 +304,7 @@ public int GetCount(bool onlyIfCheap)
                 return _end - _start;
             }
 
-            public IPartition<TResult>? Skip(int count)
+            public override Iterator<TResult>? Skip(int count)
             {
                 Debug.Assert(count > 0);
 
@@ -256,10 +313,10 @@ public int GetCount(bool onlyIfCheap)
                     return null;
                 }
 
-                return new SelectRangeIterator<TResult>(_start + count, _end, _selector);
+                return new RangeSelectIterator<TResult>(_start + count, _end, _selector);
             }
 
-            public IPartition<TResult> Take(int count)
+            public override Iterator<TResult> Take(int count)
             {
                 Debug.Assert(count > 0);
 
@@ -268,10 +325,10 @@ public IPartition<TResult> Take(int count)
                     return this;
                 }
 
-                return new SelectRangeIterator<TResult>(_start, _start + count, _selector);
+                return new RangeSelectIterator<TResult>(_start, _start + count, _selector);
             }
 
-            public TResult? TryGetElementAt(int index, out bool found)
+            public override TResult? TryGetElementAt(int index, out bool found)
             {
                 if ((uint)index < (uint)(_end - _start))
                 {
@@ -283,14 +340,14 @@ public IPartition<TResult> Take(int count)
                 return default;
             }
 
-            public TResult TryGetFirst(out bool found)
+            public override TResult TryGetFirst(out bool found)
             {
                 Debug.Assert(_end > _start);
                 found = true;
                 return _selector(_start);
             }
 
-            public TResult TryGetLast(out bool found)
+            public override TResult TryGetLast(out bool found)
             {
                 Debug.Assert(_end > _start);
                 found = true;
@@ -298,9 +355,9 @@ public TResult TryGetLast(out bool found)
             }
         }
 
-        private sealed partial class SelectListIterator<TSource, TResult> : IPartition<TResult>
+        private sealed partial class ListSelectIterator<TSource, TResult>
         {
-            public TResult[] ToArray()
+            public override TResult[] ToArray()
             {
                 ReadOnlySpan<TSource> source = CollectionsMarshal.AsSpan(_source);
                 if (source.Length == 0)
@@ -314,7 +371,7 @@ public TResult[] ToArray()
                 return results;
             }
 
-            public List<TResult> ToList()
+            public override List<TResult> ToList()
             {
                 ReadOnlySpan<TSource> source = CollectionsMarshal.AsSpan(_source);
 
@@ -332,7 +389,7 @@ private static void Fill(ReadOnlySpan<TSource> source, Span<TResult> destination
                 }
             }
 
-            public int GetCount(bool onlyIfCheap)
+            public override int GetCount(bool onlyIfCheap)
             {
                 // In case someone uses Count() to force evaluation of
                 // the selector, run it provided `onlyIfCheap` is false.
@@ -350,19 +407,19 @@ public int GetCount(bool onlyIfCheap)
                 return count;
             }
 
-            public IPartition<TResult> Skip(int count)
+            public override Iterator<TResult> Skip(int count)
             {
                 Debug.Assert(count > 0);
-                return new SelectListPartitionIterator<TSource, TResult>(_source, _selector, count, int.MaxValue);
+                return new IListSkipTakeSelectIterator<TSource, TResult>(_source, _selector, count, int.MaxValue);
             }
 
-            public IPartition<TResult> Take(int count)
+            public override Iterator<TResult> Take(int count)
             {
                 Debug.Assert(count > 0);
-                return new SelectListPartitionIterator<TSource, TResult>(_source, _selector, 0, count - 1);
+                return new IListSkipTakeSelectIterator<TSource, TResult>(_source, _selector, 0, count - 1);
             }
 
-            public TResult? TryGetElementAt(int index, out bool found)
+            public override TResult? TryGetElementAt(int index, out bool found)
             {
                 if ((uint)index < (uint)_source.Count)
                 {
@@ -374,7 +431,7 @@ public IPartition<TResult> Take(int count)
                 return default;
             }
 
-            public TResult? TryGetFirst(out bool found)
+            public override TResult? TryGetFirst(out bool found)
             {
                 if (_source.Count != 0)
                 {
@@ -386,7 +443,7 @@ public IPartition<TResult> Take(int count)
                 return default;
             }
 
-            public TResult? TryGetLast(out bool found)
+            public override TResult? TryGetLast(out bool found)
             {
                 int len = _source.Count;
                 if (len != 0)
@@ -400,9 +457,9 @@ public IPartition<TResult> Take(int count)
             }
         }
 
-        private sealed partial class SelectIListIterator<TSource, TResult> : IPartition<TResult>
+        private sealed partial class IListSelectIterator<TSource, TResult>
         {
-            public TResult[] ToArray()
+            public override TResult[] ToArray()
             {
                 int count = _source.Count;
                 if (count == 0)
@@ -416,7 +473,7 @@ public TResult[] ToArray()
                 return results;
             }
 
-            public List<TResult> ToList()
+            public override List<TResult> ToList()
             {
                 IList<TSource> source = _source;
                 int count = _source.Count;
@@ -435,7 +492,7 @@ private static void Fill(IList<TSource> source, Span<TResult> results, Func<TSou
                 }
             }
 
-            public int GetCount(bool onlyIfCheap)
+            public override int GetCount(bool onlyIfCheap)
             {
                 // In case someone uses Count() to force evaluation of
                 // the selector, run it provided `onlyIfCheap` is false.
@@ -453,19 +510,19 @@ public int GetCount(bool onlyIfCheap)
                 return count;
             }
 
-            public IPartition<TResult> Skip(int count)
+            public override Iterator<TResult> Skip(int count)
             {
                 Debug.Assert(count > 0);
-                return new SelectListPartitionIterator<TSource, TResult>(_source, _selector, count, int.MaxValue);
+                return new IListSkipTakeSelectIterator<TSource, TResult>(_source, _selector, count, int.MaxValue);
             }
 
-            public IPartition<TResult> Take(int count)
+            public override Iterator<TResult> Take(int count)
             {
                 Debug.Assert(count > 0);
-                return new SelectListPartitionIterator<TSource, TResult>(_source, _selector, 0, count - 1);
+                return new IListSkipTakeSelectIterator<TSource, TResult>(_source, _selector, 0, count - 1);
             }
 
-            public TResult? TryGetElementAt(int index, out bool found)
+            public override TResult? TryGetElementAt(int index, out bool found)
             {
                 if ((uint)index < (uint)_source.Count)
                 {
@@ -477,7 +534,7 @@ public IPartition<TResult> Take(int count)
                 return default;
             }
 
-            public TResult? TryGetFirst(out bool found)
+            public override TResult? TryGetFirst(out bool found)
             {
                 if (_source.Count != 0)
                 {
@@ -489,7 +546,7 @@ public IPartition<TResult> Take(int count)
                 return default;
             }
 
-            public TResult? TryGetLast(out bool found)
+            public override TResult? TryGetLast(out bool found)
             {
                 int len = _source.Count;
                 if (len != 0)
@@ -504,26 +561,26 @@ public IPartition<TResult> Take(int count)
         }
 
         /// <summary>
-        /// An iterator that maps each item of an <see cref="IPartition{TSource}"/>.
+        /// An iterator that maps each item of an <see cref="Iterator{TSource}"/>.
         /// </summary>
-        /// <typeparam name="TSource">The type of the source partition.</typeparam>
+        /// <typeparam name="TSource">The type of the source elements.</typeparam>
         /// <typeparam name="TResult">The type of the mapped items.</typeparam>
-        private sealed class SelectIPartitionIterator<TSource, TResult> : Iterator<TResult>, IPartition<TResult>
+        private sealed class IteratorSelectIterator<TSource, TResult> : Iterator<TResult>
         {
-            private readonly IPartition<TSource> _source;
+            private readonly Iterator<TSource> _source;
             private readonly Func<TSource, TResult> _selector;
-            private IEnumerator<TSource>? _enumerator;
+            private Iterator<TSource>? _enumerator;
 
-            public SelectIPartitionIterator(IPartition<TSource> source, Func<TSource, TResult> selector)
+            public IteratorSelectIterator(Iterator<TSource> source, Func<TSource, TResult> selector)
             {
-                Debug.Assert(source != null);
-                Debug.Assert(selector != null);
+                Debug.Assert(source is not null);
+                Debug.Assert(selector is not null);
                 _source = source;
                 _selector = selector;
             }
 
-            public override Iterator<TResult> Clone() =>
-                new SelectIPartitionIterator<TSource, TResult>(_source, _selector);
+            private protected override Iterator<TResult> Clone() =>
+                new IteratorSelectIterator<TSource, TResult>(_source, _selector);
 
             public override bool MoveNext()
             {
@@ -534,7 +591,7 @@ public override bool MoveNext()
                         _state = 2;
                         goto case 2;
                     case 2:
-                        Debug.Assert(_enumerator != null);
+                        Debug.Assert(_enumerator is not null);
                         if (_enumerator.MoveNext())
                         {
                             _current = _selector(_enumerator.Current);
@@ -550,7 +607,7 @@ public override bool MoveNext()
 
             public override void Dispose()
             {
-                if (_enumerator != null)
+                if (_enumerator is not null)
                 {
                     _enumerator.Dispose();
                     _enumerator = null;
@@ -560,23 +617,23 @@ public override void Dispose()
             }
 
             public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector) =>
-                new SelectIPartitionIterator<TSource, TResult2>(_source, CombineSelectors(_selector, selector));
+                new IteratorSelectIterator<TSource, TResult2>(_source, CombineSelectors(_selector, selector));
 
-            public IPartition<TResult>? Skip(int count)
+            public override Iterator<TResult>? Skip(int count)
             {
                 Debug.Assert(count > 0);
-                IPartition<TSource>? source = _source.Skip(count);
-                return source is null ? null : new SelectIPartitionIterator<TSource, TResult>(source, _selector);
+                Iterator<TSource>? source = _source.Skip(count);
+                return source is null ? null : new IteratorSelectIterator<TSource, TResult>(source, _selector);
             }
 
-            public IPartition<TResult>? Take(int count)
+            public override Iterator<TResult>? Take(int count)
             {
                 Debug.Assert(count > 0);
-                IPartition<TSource>? source = _source.Take(count);
-                return source is null ? null : new SelectIPartitionIterator<TSource, TResult>(source, _selector);
+                Iterator<TSource>? source = _source.Take(count);
+                return source is null ? null : new IteratorSelectIterator<TSource, TResult>(source, _selector);
             }
 
-            public TResult? TryGetElementAt(int index, out bool found)
+            public override TResult? TryGetElementAt(int index, out bool found)
             {
                 bool sourceFound;
                 TSource? input = _source.TryGetElementAt(index, out sourceFound);
@@ -584,7 +641,7 @@ public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> s
                 return sourceFound ? _selector(input!) : default!;
             }
 
-            public TResult? TryGetFirst(out bool found)
+            public override TResult? TryGetFirst(out bool found)
             {
                 bool sourceFound;
                 TSource? input = _source.TryGetFirst(out sourceFound);
@@ -592,7 +649,7 @@ public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> s
                 return sourceFound ? _selector(input!) : default!;
             }
 
-            public TResult? TryGetLast(out bool found)
+            public override TResult? TryGetLast(out bool found)
             {
                 bool sourceFound;
                 TSource? input = _source.TryGetLast(out sourceFound);
@@ -629,7 +686,7 @@ private TResult[] PreallocatingToArray(int count)
                 return array;
             }
 
-            public TResult[] ToArray()
+            public override TResult[] ToArray()
             {
                 int count = _source.GetCount(onlyIfCheap: true);
                 return count switch
@@ -640,7 +697,7 @@ public TResult[] ToArray()
                 };
             }
 
-            public List<TResult> ToList()
+            public override List<TResult> ToList()
             {
                 int count = _source.GetCount(onlyIfCheap: true);
                 List<TResult> list;
@@ -665,7 +722,7 @@ public List<TResult> ToList()
                 return list;
             }
 
-            private static void Fill(IPartition<TSource> source, Span<TResult> results, Func<TSource, TResult> func)
+            private static void Fill(Iterator<TSource> source, Span<TResult> results, Func<TSource, TResult> func)
             {
                 int index = 0;
                 foreach (TSource item in source)
@@ -677,7 +734,7 @@ private static void Fill(IPartition<TSource> source, Span<TResult> results, Func
                 Debug.Assert(index == results.Length, "All list elements were not initialized.");
             }
 
-            public int GetCount(bool onlyIfCheap)
+            public override int GetCount(bool onlyIfCheap)
             {
                 if (!onlyIfCheap)
                 {
@@ -705,17 +762,17 @@ public int GetCount(bool onlyIfCheap)
         /// <typeparam name="TSource">The type of the source list.</typeparam>
         /// <typeparam name="TResult">The type of the mapped items.</typeparam>
         [DebuggerDisplay("Count = {Count}")]
-        private sealed class SelectListPartitionIterator<TSource, TResult> : Iterator<TResult>, IPartition<TResult>
+        private sealed class IListSkipTakeSelectIterator<TSource, TResult> : Iterator<TResult>
         {
             private readonly IList<TSource> _source;
             private readonly Func<TSource, TResult> _selector;
             private readonly int _minIndexInclusive;
             private readonly int _maxIndexInclusive;
 
-            public SelectListPartitionIterator(IList<TSource> source, Func<TSource, TResult> selector, int minIndexInclusive, int maxIndexInclusive)
+            public IListSkipTakeSelectIterator(IList<TSource> source, Func<TSource, TResult> selector, int minIndexInclusive, int maxIndexInclusive)
             {
-                Debug.Assert(source != null);
-                Debug.Assert(selector != null);
+                Debug.Assert(source is not null);
+                Debug.Assert(selector is not null);
                 Debug.Assert(minIndexInclusive >= 0);
                 Debug.Assert(minIndexInclusive <= maxIndexInclusive);
                 _source = source;
@@ -724,8 +781,8 @@ public SelectListPartitionIterator(IList<TSource> source, Func<TSource, TResult>
                 _maxIndexInclusive = maxIndexInclusive;
             }
 
-            public override Iterator<TResult> Clone() =>
-                new SelectListPartitionIterator<TSource, TResult>(_source, _selector, _minIndexInclusive, _maxIndexInclusive);
+            private protected override Iterator<TResult> Clone() =>
+                new IListSkipTakeSelectIterator<TSource, TResult>(_source, _selector, _minIndexInclusive, _maxIndexInclusive);
 
             public override bool MoveNext()
             {
@@ -745,23 +802,23 @@ public override bool MoveNext()
             }
 
             public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector) =>
-                new SelectListPartitionIterator<TSource, TResult2>(_source, CombineSelectors(_selector, selector), _minIndexInclusive, _maxIndexInclusive);
+                new IListSkipTakeSelectIterator<TSource, TResult2>(_source, CombineSelectors(_selector, selector), _minIndexInclusive, _maxIndexInclusive);
 
-            public IPartition<TResult>? Skip(int count)
+            public override Iterator<TResult>? Skip(int count)
             {
                 Debug.Assert(count > 0);
                 int minIndex = _minIndexInclusive + count;
-                return (uint)minIndex > (uint)_maxIndexInclusive ? null : new SelectListPartitionIterator<TSource, TResult>(_source, _selector, minIndex, _maxIndexInclusive);
+                return (uint)minIndex > (uint)_maxIndexInclusive ? null : new IListSkipTakeSelectIterator<TSource, TResult>(_source, _selector, minIndex, _maxIndexInclusive);
             }
 
-            public IPartition<TResult> Take(int count)
+            public override Iterator<TResult> Take(int count)
             {
                 Debug.Assert(count > 0);
                 int maxIndex = _minIndexInclusive + count - 1;
-                return (uint)maxIndex >= (uint)_maxIndexInclusive ? this : new SelectListPartitionIterator<TSource, TResult>(_source, _selector, _minIndexInclusive, maxIndex);
+                return (uint)maxIndex >= (uint)_maxIndexInclusive ? this : new IListSkipTakeSelectIterator<TSource, TResult>(_source, _selector, _minIndexInclusive, maxIndex);
             }
 
-            public TResult? TryGetElementAt(int index, out bool found)
+            public override TResult? TryGetElementAt(int index, out bool found)
             {
                 if ((uint)index <= (uint)(_maxIndexInclusive - _minIndexInclusive) && index < _source.Count - _minIndexInclusive)
                 {
@@ -773,7 +830,7 @@ public IPartition<TResult> Take(int count)
                 return default;
             }
 
-            public TResult? TryGetFirst(out bool found)
+            public override TResult? TryGetFirst(out bool found)
             {
                 if (_source.Count > _minIndexInclusive)
                 {
@@ -785,7 +842,7 @@ public IPartition<TResult> Take(int count)
                 return default;
             }
 
-            public TResult? TryGetLast(out bool found)
+            public override TResult? TryGetLast(out bool found)
             {
                 int lastIndex = _source.Count - 1;
                 if (lastIndex >= _minIndexInclusive)
@@ -812,7 +869,7 @@ private int Count
                 }
             }
 
-            public TResult[] ToArray()
+            public override TResult[] ToArray()
             {
                 int count = Count;
                 if (count == 0)
@@ -826,7 +883,7 @@ public TResult[] ToArray()
                 return array;
             }
 
-            public List<TResult> ToList()
+            public override List<TResult> ToList()
             {
                 int count = Count;
                 if (count == 0)
@@ -848,7 +905,7 @@ private static void Fill(IList<TSource> source, Span<TResult> destination, Func<
                 }
             }
 
-            public int GetCount(bool onlyIfCheap)
+            public override int GetCount(bool onlyIfCheap)
             {
                 // In case someone uses Count() to force evaluation of
                 // the selector, run it provided `onlyIfCheap` is false.
diff --git a/src/libraries/System.Linq/src/System/Linq/Select.cs b/src/libraries/System.Linq/src/System/Linq/Select.cs
index 3059fa7f0a8e..37a41c450f87 100644
--- a/src/libraries/System.Linq/src/System/Linq/Select.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Select.cs
@@ -13,12 +13,12 @@ public static partial class Enumerable
         public static IEnumerable<TResult> Select<TSource, TResult>(
             this IEnumerable<TSource> source, Func<TSource, TResult> selector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -37,43 +37,28 @@ public static IEnumerable<TResult> Select<TSource, TResult>(
                         return [];
                     }
 
-                    return new SelectArrayIterator<TSource, TResult>(array, selector);
+                    return new ArraySelectIterator<TSource, TResult>(array, selector);
                 }
 
                 if (source is List<TSource> list)
                 {
-                    return new SelectListIterator<TSource, TResult>(list, selector);
+                    return new ListSelectIterator<TSource, TResult>(list, selector);
                 }
 
-                return new SelectIListIterator<TSource, TResult>(ilist, selector);
+                return new IListSelectIterator<TSource, TResult>(ilist, selector);
             }
 
-            if (source is IPartition<TSource> partition)
-            {
-                IEnumerable<TResult>? result = null;
-                CreateSelectIPartitionIterator(selector, partition, ref result);
-                if (result != null)
-                {
-                    return result;
-                }
-            }
-
-            return new SelectEnumerableIterator<TSource, TResult>(source, selector);
+            return new IEnumerableSelectIterator<TSource, TResult>(source, selector);
         }
 
-#pragma warning disable IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6177
-        static partial void CreateSelectIPartitionIterator<TResult, TSource>(
-            Func<TSource, TResult> selector, IPartition<TSource> partition, [NotNull] ref IEnumerable<TResult>? result);
-#pragma warning restore IDE0060
-
         public static IEnumerable<TResult> Select<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, int, TResult> selector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -105,26 +90,26 @@ private static IEnumerable<TResult> SelectIterator<TSource, TResult>(IEnumerable
         /// </summary>
         /// <typeparam name="TSource">The type of the source enumerable.</typeparam>
         /// <typeparam name="TResult">The type of the mapped items.</typeparam>
-        private sealed partial class SelectEnumerableIterator<TSource, TResult> : Iterator<TResult>
+        private sealed partial class IEnumerableSelectIterator<TSource, TResult> : Iterator<TResult>
         {
             private readonly IEnumerable<TSource> _source;
             private readonly Func<TSource, TResult> _selector;
             private IEnumerator<TSource>? _enumerator;
 
-            public SelectEnumerableIterator(IEnumerable<TSource> source, Func<TSource, TResult> selector)
+            public IEnumerableSelectIterator(IEnumerable<TSource> source, Func<TSource, TResult> selector)
             {
-                Debug.Assert(source != null);
-                Debug.Assert(selector != null);
+                Debug.Assert(source is not null);
+                Debug.Assert(selector is not null);
                 _source = source;
                 _selector = selector;
             }
 
-            public override Iterator<TResult> Clone() =>
-                new SelectEnumerableIterator<TSource, TResult>(_source, _selector);
+            private protected override Iterator<TResult> Clone() =>
+                new IEnumerableSelectIterator<TSource, TResult>(_source, _selector);
 
             public override void Dispose()
             {
-                if (_enumerator != null)
+                if (_enumerator is not null)
                 {
                     _enumerator.Dispose();
                     _enumerator = null;
@@ -142,7 +127,7 @@ public override bool MoveNext()
                         _state = 2;
                         goto case 2;
                     case 2:
-                        Debug.Assert(_enumerator != null);
+                        Debug.Assert(_enumerator is not null);
                         if (_enumerator.MoveNext())
                         {
                             _current = _selector(_enumerator.Current);
@@ -157,7 +142,7 @@ public override bool MoveNext()
             }
 
             public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector) =>
-                new SelectEnumerableIterator<TSource, TResult2>(_source, CombineSelectors(_selector, selector));
+                new IEnumerableSelectIterator<TSource, TResult2>(_source, CombineSelectors(_selector, selector));
         }
 
         /// <summary>
@@ -166,15 +151,15 @@ public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> s
         /// <typeparam name="TSource">The type of the source array.</typeparam>
         /// <typeparam name="TResult">The type of the mapped items.</typeparam>
         [DebuggerDisplay("Count = {CountForDebugger}")]
-        private sealed partial class SelectArrayIterator<TSource, TResult> : Iterator<TResult>
+        private sealed partial class ArraySelectIterator<TSource, TResult> : Iterator<TResult>
         {
             private readonly TSource[] _source;
             private readonly Func<TSource, TResult> _selector;
 
-            public SelectArrayIterator(TSource[] source, Func<TSource, TResult> selector)
+            public ArraySelectIterator(TSource[] source, Func<TSource, TResult> selector)
             {
-                Debug.Assert(source != null);
-                Debug.Assert(selector != null);
+                Debug.Assert(source is not null);
+                Debug.Assert(selector is not null);
                 Debug.Assert(source.Length > 0); // Caller should check this beforehand and return a cached result
                 _source = source;
                 _selector = selector;
@@ -182,7 +167,7 @@ public SelectArrayIterator(TSource[] source, Func<TSource, TResult> selector)
 
             private int CountForDebugger => _source.Length;
 
-            public override Iterator<TResult> Clone() => new SelectArrayIterator<TSource, TResult>(_source, _selector);
+            private protected override Iterator<TResult> Clone() => new ArraySelectIterator<TSource, TResult>(_source, _selector);
 
             public override bool MoveNext()
             {
@@ -200,7 +185,7 @@ public override bool MoveNext()
             }
 
             public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector) =>
-                new SelectArrayIterator<TSource, TResult2>(_source, CombineSelectors(_selector, selector));
+                new ArraySelectIterator<TSource, TResult2>(_source, CombineSelectors(_selector, selector));
         }
 
         /// <summary>
@@ -209,23 +194,23 @@ public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> s
         /// <typeparam name="TSource">The type of the source list.</typeparam>
         /// <typeparam name="TResult">The type of the mapped items.</typeparam>
         [DebuggerDisplay("Count = {CountForDebugger}")]
-        private sealed partial class SelectListIterator<TSource, TResult> : Iterator<TResult>
+        private sealed partial class ListSelectIterator<TSource, TResult> : Iterator<TResult>
         {
             private readonly List<TSource> _source;
             private readonly Func<TSource, TResult> _selector;
             private List<TSource>.Enumerator _enumerator;
 
-            public SelectListIterator(List<TSource> source, Func<TSource, TResult> selector)
+            public ListSelectIterator(List<TSource> source, Func<TSource, TResult> selector)
             {
-                Debug.Assert(source != null);
-                Debug.Assert(selector != null);
+                Debug.Assert(source is not null);
+                Debug.Assert(selector is not null);
                 _source = source;
                 _selector = selector;
             }
 
             private int CountForDebugger => _source.Count;
 
-            public override Iterator<TResult> Clone() => new SelectListIterator<TSource, TResult>(_source, _selector);
+            private protected override Iterator<TResult> Clone() => new ListSelectIterator<TSource, TResult>(_source, _selector);
 
             public override bool MoveNext()
             {
@@ -250,7 +235,7 @@ public override bool MoveNext()
             }
 
             public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector) =>
-                new SelectListIterator<TSource, TResult2>(_source, CombineSelectors(_selector, selector));
+                new ListSelectIterator<TSource, TResult2>(_source, CombineSelectors(_selector, selector));
         }
 
         /// <summary>
@@ -259,23 +244,23 @@ public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> s
         /// <typeparam name="TSource">The type of the source list.</typeparam>
         /// <typeparam name="TResult">The type of the mapped items.</typeparam>
         [DebuggerDisplay("Count = {CountForDebugger}")]
-        private sealed partial class SelectIListIterator<TSource, TResult> : Iterator<TResult>
+        private sealed partial class IListSelectIterator<TSource, TResult> : Iterator<TResult>
         {
             private readonly IList<TSource> _source;
             private readonly Func<TSource, TResult> _selector;
             private IEnumerator<TSource>? _enumerator;
 
-            public SelectIListIterator(IList<TSource> source, Func<TSource, TResult> selector)
+            public IListSelectIterator(IList<TSource> source, Func<TSource, TResult> selector)
             {
-                Debug.Assert(source != null);
-                Debug.Assert(selector != null);
+                Debug.Assert(source is not null);
+                Debug.Assert(selector is not null);
                 _source = source;
                 _selector = selector;
             }
 
             private int CountForDebugger => _source.Count;
 
-            public override Iterator<TResult> Clone() => new SelectIListIterator<TSource, TResult>(_source, _selector);
+            private protected override Iterator<TResult> Clone() => new IListSelectIterator<TSource, TResult>(_source, _selector);
 
             public override bool MoveNext()
             {
@@ -286,7 +271,7 @@ public override bool MoveNext()
                         _state = 2;
                         goto case 2;
                     case 2:
-                        Debug.Assert(_enumerator != null);
+                        Debug.Assert(_enumerator is not null);
                         if (_enumerator.MoveNext())
                         {
                             _current = _selector(_enumerator.Current);
@@ -302,7 +287,7 @@ public override bool MoveNext()
 
             public override void Dispose()
             {
-                if (_enumerator != null)
+                if (_enumerator is not null)
                 {
                     _enumerator.Dispose();
                     _enumerator = null;
@@ -312,7 +297,7 @@ public override void Dispose()
             }
 
             public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector) =>
-                new SelectIListIterator<TSource, TResult2>(_source, CombineSelectors(_selector, selector));
+                new IListSelectIterator<TSource, TResult2>(_source, CombineSelectors(_selector, selector));
         }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/SelectMany.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/SelectMany.SpeedOpt.cs
index 050ae6a4e06b..ae0bf35ef8f1 100644
--- a/src/libraries/System.Linq/src/System/Linq/SelectMany.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/SelectMany.SpeedOpt.cs
@@ -7,9 +7,9 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private sealed partial class SelectManySingleSelectorIterator<TSource, TResult> : IIListProvider<TResult>
+        private sealed partial class SelectManySingleSelectorIterator<TSource, TResult>
         {
-            public int GetCount(bool onlyIfCheap)
+            public override int GetCount(bool onlyIfCheap)
             {
                 if (onlyIfCheap)
                 {
@@ -29,7 +29,7 @@ public int GetCount(bool onlyIfCheap)
                 return count;
             }
 
-            public TResult[] ToArray()
+            public override TResult[] ToArray()
             {
                 SegmentedArrayBuilder<TResult>.ScratchBuffer scratch = default;
                 SegmentedArrayBuilder<TResult> builder = new(scratch);
@@ -46,7 +46,7 @@ public TResult[] ToArray()
                 return result;
             }
 
-            public List<TResult> ToList()
+            public override List<TResult> ToList()
             {
                 var list = new List<TResult>();
 
diff --git a/src/libraries/System.Linq/src/System/Linq/SelectMany.cs b/src/libraries/System.Linq/src/System/Linq/SelectMany.cs
index 748a9030f060..d8845e8334db 100644
--- a/src/libraries/System.Linq/src/System/Linq/SelectMany.cs
+++ b/src/libraries/System.Linq/src/System/Linq/SelectMany.cs
@@ -10,12 +10,12 @@ public static partial class Enumerable
     {
         public static IEnumerable<TResult> SelectMany<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, IEnumerable<TResult>> selector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -30,12 +30,12 @@ public static IEnumerable<TResult> SelectMany<TSource, TResult>(this IEnumerable
 
         public static IEnumerable<TResult> SelectMany<TSource, TResult>(this IEnumerable<TSource> source, Func<TSource, int, IEnumerable<TResult>> selector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (selector == null)
+            if (selector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
             }
@@ -67,17 +67,17 @@ private static IEnumerable<TResult> SelectManyIterator<TSource, TResult>(IEnumer
 
         public static IEnumerable<TResult> SelectMany<TSource, TCollection, TResult>(this IEnumerable<TSource> source, Func<TSource, int, IEnumerable<TCollection>> collectionSelector, Func<TSource, TCollection, TResult> resultSelector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (collectionSelector == null)
+            if (collectionSelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.collectionSelector);
             }
 
-            if (resultSelector == null)
+            if (resultSelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.resultSelector);
             }
@@ -109,17 +109,17 @@ private static IEnumerable<TResult> SelectManyIterator<TSource, TCollection, TRe
 
         public static IEnumerable<TResult> SelectMany<TSource, TCollection, TResult>(this IEnumerable<TSource> source, Func<TSource, IEnumerable<TCollection>> collectionSelector, Func<TSource, TCollection, TResult> resultSelector)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (collectionSelector == null)
+            if (collectionSelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.collectionSelector);
             }
 
-            if (resultSelector == null)
+            if (resultSelector is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.resultSelector);
             }
@@ -152,27 +152,27 @@ private sealed partial class SelectManySingleSelectorIterator<TSource, TResult>
 
             internal SelectManySingleSelectorIterator(IEnumerable<TSource> source, Func<TSource, IEnumerable<TResult>> selector)
             {
-                Debug.Assert(source != null);
-                Debug.Assert(selector != null);
+                Debug.Assert(source is not null);
+                Debug.Assert(selector is not null);
 
                 _source = source;
                 _selector = selector;
             }
 
-            public override Iterator<TResult> Clone()
+            private protected override Iterator<TResult> Clone()
             {
                 return new SelectManySingleSelectorIterator<TSource, TResult>(_source, _selector);
             }
 
             public override void Dispose()
             {
-                if (_subEnumerator != null)
+                if (_subEnumerator is not null)
                 {
                     _subEnumerator.Dispose();
                     _subEnumerator = null;
                 }
 
-                if (_sourceEnumerator != null)
+                if (_sourceEnumerator is not null)
                 {
                     _sourceEnumerator.Dispose();
                     _sourceEnumerator = null;
@@ -192,7 +192,7 @@ public override bool MoveNext()
                         goto case 2;
                     case 2:
                         // Take the next element from the source enumerator.
-                        Debug.Assert(_sourceEnumerator != null);
+                        Debug.Assert(_sourceEnumerator is not null);
                         if (!_sourceEnumerator.MoveNext())
                         {
                             break;
@@ -206,7 +206,7 @@ public override bool MoveNext()
                         goto case 3;
                     case 3:
                         // Take the next element from the sub-collection and yield.
-                        Debug.Assert(_subEnumerator != null);
+                        Debug.Assert(_subEnumerator is not null);
                         if (!_subEnumerator.MoveNext())
                         {
                             _subEnumerator.Dispose();
diff --git a/src/libraries/System.Linq/src/System/Linq/SequenceEqual.cs b/src/libraries/System.Linq/src/System/Linq/SequenceEqual.cs
index ff4c32200a5c..16bf60e68614 100644
--- a/src/libraries/System.Linq/src/System/Linq/SequenceEqual.cs
+++ b/src/libraries/System.Linq/src/System/Linq/SequenceEqual.cs
@@ -12,12 +12,12 @@ public static bool SequenceEqual<TSource>(this IEnumerable<TSource> first, IEnum
 
         public static bool SequenceEqual<TSource>(this IEnumerable<TSource> first, IEnumerable<TSource> second, IEqualityComparer<TSource>? comparer)
         {
-            if (first == null)
+            if (first is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.first);
             }
 
-            if (second == null)
+            if (second is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.second);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/Single.cs b/src/libraries/System.Linq/src/System/Linq/Single.cs
index 9e35009e6175..9c7328cc3ff1 100644
--- a/src/libraries/System.Linq/src/System/Linq/Single.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Single.cs
@@ -107,12 +107,12 @@ public static TSource SingleOrDefault<TSource>(this IEnumerable<TSource> source,
 
         private static TSource? TryGetSingle<TSource>(this IEnumerable<TSource> source, Func<TSource, bool> predicate, out bool found)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/SingleLinkedNode.cs b/src/libraries/System.Linq/src/System/Linq/SingleLinkedNode.cs
index e0bf5e54c51e..144d1a81889b 100644
--- a/src/libraries/System.Linq/src/System/Linq/SingleLinkedNode.cs
+++ b/src/libraries/System.Linq/src/System/Linq/SingleLinkedNode.cs
@@ -27,7 +27,7 @@ public SingleLinkedNode(TSource item)
         /// <param name="item">The item to place in this node.</param>
         private SingleLinkedNode(SingleLinkedNode<TSource> linked, TSource item)
         {
-            Debug.Assert(linked != null);
+            Debug.Assert(linked is not null);
             Linked = linked;
             Item = item;
         }
@@ -54,7 +54,7 @@ private SingleLinkedNode(SingleLinkedNode<TSource> linked, TSource item)
         public int GetCount()
         {
             int count = 0;
-            for (SingleLinkedNode<TSource>? node = this; node != null; node = node.Linked)
+            for (SingleLinkedNode<TSource>? node = this; node is not null; node = node.Linked)
             {
                 count++;
             }
@@ -77,7 +77,7 @@ public SingleLinkedNode<TSource> GetNode(int index)
             for (; index > 0; index--)
             {
                 node = node.Linked!;
-                Debug.Assert(node != null);
+                Debug.Assert(node is not null);
             }
 
             return node;
@@ -103,7 +103,7 @@ public TSource[] ToArray(int count)
         public void Fill(Span<TSource> span)
         {
             int index = 0;
-            for (SingleLinkedNode<TSource>? node = this; node != null; node = node.Linked)
+            for (SingleLinkedNode<TSource>? node = this; node is not null; node = node.Linked)
             {
                 span[index] = node.Item;
                 index++;
@@ -117,7 +117,7 @@ public void Fill(Span<TSource> span)
         public void FillReversed(Span<TSource> span)
         {
             int index = span.Length;
-            for (SingleLinkedNode<TSource>? node = this; node != null; node = node.Linked)
+            for (SingleLinkedNode<TSource>? node = this; node is not null; node = node.Linked)
             {
                 --index;
                 span[index] = node.Item;
diff --git a/src/libraries/System.Linq/src/System/Linq/Skip.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Skip.SpeedOpt.cs
index 1596dc0cc7cf..74ff73a06824 100644
--- a/src/libraries/System.Linq/src/System/Linq/Skip.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Skip.SpeedOpt.cs
@@ -9,7 +9,7 @@ public static partial class Enumerable
     {
         private static IEnumerable<TSource> SkipIterator<TSource>(IEnumerable<TSource> source, int count) =>
             source is IList<TSource> sourceList ?
-                (IEnumerable<TSource>)new ListPartition<TSource>(sourceList, count, int.MaxValue) :
-                new EnumerablePartition<TSource>(source, count, -1);
+                (IEnumerable<TSource>)new IListSkipTakeIterator<TSource>(sourceList, count, int.MaxValue) :
+                new IEnumerableSkipTakeIterator<TSource>(source, count, -1);
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/Skip.cs b/src/libraries/System.Linq/src/System/Linq/Skip.cs
index 3652d1da3e79..c87bea9e25e6 100644
--- a/src/libraries/System.Linq/src/System/Linq/Skip.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Skip.cs
@@ -9,7 +9,7 @@ public static partial class Enumerable
     {
         public static IEnumerable<TSource> Skip<TSource>(this IEnumerable<TSource> source, int count)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -23,29 +23,31 @@ public static IEnumerable<TSource> Skip<TSource>(this IEnumerable<TSource> sourc
             {
                 // Return source if not actually skipping, but only if it's a type from here, to avoid
                 // issues if collections are used as keys or otherwise must not be aliased.
-                if (source is Iterator<TSource> || source is IPartition<TSource>)
+                if (source is Iterator<TSource>)
                 {
                     return source;
                 }
 
                 count = 0;
             }
-            else if (source is IPartition<TSource> partition)
+#if !OPTIMIZE_FOR_SIZE
+            else if (source is Iterator<TSource> iterator)
             {
-                return partition.Skip(count) ?? Empty<TSource>();
+                return iterator.Skip(count) ?? Empty<TSource>();
             }
+#endif
 
             return SkipIterator(source, count);
         }
 
         public static IEnumerable<TSource> SkipWhile<TSource>(this IEnumerable<TSource> source, Func<TSource, bool> predicate)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
@@ -81,12 +83,12 @@ private static IEnumerable<TSource> SkipWhileIterator<TSource>(IEnumerable<TSour
 
         public static IEnumerable<TSource> SkipWhile<TSource>(this IEnumerable<TSource> source, Func<TSource, int, bool> predicate)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
@@ -128,7 +130,7 @@ private static IEnumerable<TSource> SkipWhileIterator<TSource>(IEnumerable<TSour
 
         public static IEnumerable<TSource> SkipLast<TSource>(this IEnumerable<TSource> source, int count)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/Partition.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/SkipTake.SpeedOpt.cs
similarity index 77%
rename from src/libraries/System.Linq/src/System/Linq/Partition.SpeedOpt.cs
rename to src/libraries/System.Linq/src/System/Linq/SkipTake.SpeedOpt.cs
index 202fb803881e..1a49488b09fb 100644
--- a/src/libraries/System.Linq/src/System/Linq/Partition.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/SkipTake.SpeedOpt.cs
@@ -1,69 +1,11 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System.Collections;
 using System.Collections.Generic;
 using System.Diagnostics;
 
 namespace System.Linq
 {
-    internal sealed class OrderedPartition<TElement> : IPartition<TElement>
-    {
-        private readonly OrderedEnumerable<TElement> _source;
-        private readonly int _minIndexInclusive;
-        private readonly int _maxIndexInclusive;
-
-        public OrderedPartition(OrderedEnumerable<TElement> source, int minIdxInclusive, int maxIdxInclusive)
-        {
-            _source = source;
-            _minIndexInclusive = minIdxInclusive;
-            _maxIndexInclusive = maxIdxInclusive;
-        }
-
-        public IEnumerator<TElement> GetEnumerator() => _source.GetEnumerator(_minIndexInclusive, _maxIndexInclusive);
-
-        IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
-
-        public IPartition<TElement>? Skip(int count)
-        {
-            int minIndex = _minIndexInclusive + count;
-            return (uint)minIndex > (uint)_maxIndexInclusive ? null : new OrderedPartition<TElement>(_source, minIndex, _maxIndexInclusive);
-        }
-
-        public IPartition<TElement> Take(int count)
-        {
-            int maxIndex = _minIndexInclusive + count - 1;
-            if ((uint)maxIndex >= (uint)_maxIndexInclusive)
-            {
-                return this;
-            }
-
-            return new OrderedPartition<TElement>(_source, _minIndexInclusive, maxIndex);
-        }
-
-        public TElement? TryGetElementAt(int index, out bool found)
-        {
-            if ((uint)index <= (uint)(_maxIndexInclusive - _minIndexInclusive))
-            {
-                return _source.TryGetElementAt(index + _minIndexInclusive, out found);
-            }
-
-            found = false;
-            return default;
-        }
-
-        public TElement? TryGetFirst(out bool found) => _source.TryGetElementAt(_minIndexInclusive, out found);
-
-        public TElement? TryGetLast(out bool found) =>
-            _source.TryGetLast(_minIndexInclusive, _maxIndexInclusive, out found);
-
-        public TElement[] ToArray() => _source.ToArray(_minIndexInclusive, _maxIndexInclusive);
-
-        public List<TElement> ToList() => _source.ToList(_minIndexInclusive, _maxIndexInclusive);
-
-        public int GetCount(bool onlyIfCheap) => _source.GetCount(_minIndexInclusive, _maxIndexInclusive, onlyIfCheap);
-    }
-
     public static partial class Enumerable
     {
         /// <summary>
@@ -71,15 +13,15 @@ public static partial class Enumerable
         /// </summary>
         /// <typeparam name="TSource">The type of the source list.</typeparam>
         [DebuggerDisplay("Count = {Count}")]
-        private sealed class ListPartition<TSource> : Iterator<TSource>, IPartition<TSource>, IList<TSource>, IReadOnlyList<TSource>
+        private sealed class IListSkipTakeIterator<TSource> : Iterator<TSource>, IList<TSource>, IReadOnlyList<TSource>
         {
             private readonly IList<TSource> _source;
             private readonly int _minIndexInclusive;
             private readonly int _maxIndexInclusive;
 
-            public ListPartition(IList<TSource> source, int minIndexInclusive, int maxIndexInclusive)
+            public IListSkipTakeIterator(IList<TSource> source, int minIndexInclusive, int maxIndexInclusive)
             {
-                Debug.Assert(source != null);
+                Debug.Assert(source is not null);
                 Debug.Assert(minIndexInclusive >= 0);
                 Debug.Assert(minIndexInclusive <= maxIndexInclusive);
                 _source = source;
@@ -87,8 +29,8 @@ public ListPartition(IList<TSource> source, int minIndexInclusive, int maxIndexI
                 _maxIndexInclusive = maxIndexInclusive;
             }
 
-            public override Iterator<TSource> Clone() =>
-                new ListPartition<TSource>(_source, _minIndexInclusive, _maxIndexInclusive);
+            private protected override Iterator<TSource> Clone() =>
+                new IListSkipTakeIterator<TSource>(_source, _minIndexInclusive, _maxIndexInclusive);
 
             public override bool MoveNext()
             {
@@ -108,21 +50,21 @@ public override bool MoveNext()
             }
 
             public override IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> selector) =>
-                new SelectListPartitionIterator<TSource, TResult>(_source, selector, _minIndexInclusive, _maxIndexInclusive);
+                new IListSkipTakeSelectIterator<TSource, TResult>(_source, selector, _minIndexInclusive, _maxIndexInclusive);
 
-            public IPartition<TSource>? Skip(int count)
+            public override Iterator<TSource>? Skip(int count)
             {
                 int minIndex = _minIndexInclusive + count;
-                return (uint)minIndex > (uint)_maxIndexInclusive ? null : new ListPartition<TSource>(_source, minIndex, _maxIndexInclusive);
+                return (uint)minIndex > (uint)_maxIndexInclusive ? null : new IListSkipTakeIterator<TSource>(_source, minIndex, _maxIndexInclusive);
             }
 
-            public IPartition<TSource> Take(int count)
+            public override Iterator<TSource> Take(int count)
             {
                 int maxIndex = _minIndexInclusive + count - 1;
-                return (uint)maxIndex >= (uint)_maxIndexInclusive ? this : new ListPartition<TSource>(_source, _minIndexInclusive, maxIndex);
+                return (uint)maxIndex >= (uint)_maxIndexInclusive ? this : new IListSkipTakeIterator<TSource>(_source, _minIndexInclusive, maxIndex);
             }
 
-            public TSource? TryGetElementAt(int index, out bool found)
+            public override TSource? TryGetElementAt(int index, out bool found)
             {
                 if ((uint)index <= (uint)(_maxIndexInclusive - _minIndexInclusive) && index < _source.Count - _minIndexInclusive)
                 {
@@ -134,7 +76,7 @@ public IPartition<TSource> Take(int count)
                 return default;
             }
 
-            public TSource? TryGetFirst(out bool found)
+            public override TSource? TryGetFirst(out bool found)
             {
                 if (_source.Count > _minIndexInclusive)
                 {
@@ -146,7 +88,7 @@ public IPartition<TSource> Take(int count)
                 return default;
             }
 
-            public TSource? TryGetLast(out bool found)
+            public override TSource? TryGetLast(out bool found)
             {
                 int lastIndex = _source.Count - 1;
                 if (lastIndex >= _minIndexInclusive)
@@ -173,9 +115,9 @@ public int Count
                 }
             }
 
-            public int GetCount(bool onlyIfCheap) => Count;
+            public override int GetCount(bool onlyIfCheap) => Count;
 
-            public TSource[] ToArray()
+            public override TSource[] ToArray()
             {
                 int count = Count;
                 if (count == 0)
@@ -188,16 +130,16 @@ public TSource[] ToArray()
                 return array;
             }
 
-            public List<TSource> ToList()
+            public override List<TSource> ToList()
             {
                 int count = Count;
-                if (count == 0)
+
+                List<TSource> list = [];
+                if (count != 0)
                 {
-                    return new List<TSource>();
+                    Fill(_source, SetCountAndGetSpan(list, count), _minIndexInclusive);
                 }
 
-                List<TSource> list = new List<TSource>(count);
-                Fill(_source, SetCountAndGetSpan(list, count), _minIndexInclusive);
                 return list;
             }
 
@@ -257,7 +199,7 @@ public TSource this[int index]
         /// An iterator that yields the items of part of an <see cref="IEnumerable{TSource}"/>.
         /// </summary>
         /// <typeparam name="TSource">The type of the source enumerable.</typeparam>
-        private sealed class EnumerablePartition<TSource> : Iterator<TSource>, IPartition<TSource>
+        private sealed class IEnumerableSkipTakeIterator<TSource> : Iterator<TSource>
         {
             private readonly IEnumerable<TSource> _source;
             private readonly int _minIndexInclusive;
@@ -265,9 +207,9 @@ private sealed class EnumerablePartition<TSource> : Iterator<TSource>, IPartitio
                                                      // If this is -1, it's impossible to set a limit on the count.
             private IEnumerator<TSource>? _enumerator;
 
-            internal EnumerablePartition(IEnumerable<TSource> source, int minIndexInclusive, int maxIndexInclusive)
+            internal IEnumerableSkipTakeIterator(IEnumerable<TSource> source, int minIndexInclusive, int maxIndexInclusive)
             {
-                Debug.Assert(source != null);
+                Debug.Assert(source is not null);
                 Debug.Assert(!(source is IList<TSource>), $"The caller needs to check for {nameof(IList<TSource>)}.");
                 Debug.Assert(minIndexInclusive >= 0);
                 Debug.Assert(maxIndexInclusive >= -1);
@@ -288,12 +230,12 @@ internal EnumerablePartition(IEnumerable<TSource> source, int minIndexInclusive,
 
             private int Limit => _maxIndexInclusive + 1 - _minIndexInclusive; // This is that upper bound.
 
-            public override Iterator<TSource> Clone() =>
-                new EnumerablePartition<TSource>(_source, _minIndexInclusive, _maxIndexInclusive);
+            private protected override Iterator<TSource> Clone() =>
+                new IEnumerableSkipTakeIterator<TSource>(_source, _minIndexInclusive, _maxIndexInclusive);
 
             public override void Dispose()
             {
-                if (_enumerator != null)
+                if (_enumerator is not null)
                 {
                     _enumerator.Dispose();
                     _enumerator = null;
@@ -302,7 +244,7 @@ public override void Dispose()
                 base.Dispose();
             }
 
-            public int GetCount(bool onlyIfCheap)
+            public override int GetCount(bool onlyIfCheap)
             {
                 if (onlyIfCheap)
                 {
@@ -319,7 +261,7 @@ public int GetCount(bool onlyIfCheap)
                 using (IEnumerator<TSource> en = _source.GetEnumerator())
                 {
                     // We only want to iterate up to _maxIndexInclusive + 1.
-                    // Past that, we know the enumerable will be able to fit this partition,
+                    // Past that, we know the enumerable will be able to fit this subset,
                     // so the count will just be _maxIndexInclusive + 1 - _minIndexInclusive.
 
                     // Note that it is possible for _maxIndexInclusive to be int.MaxValue here,
@@ -331,7 +273,6 @@ public int GetCount(bool onlyIfCheap)
                     Debug.Assert(count != (uint)int.MaxValue + 1 || _minIndexInclusive > 0, "Our return value will be incorrect.");
                     return Math.Max((int)count - _minIndexInclusive, 0);
                 }
-
             }
 
             public override bool MoveNext()
@@ -352,7 +293,7 @@ public override bool MoveNext()
                         _state = 2;
                         goto case 2;
                     case 2:
-                        Debug.Assert(_enumerator != null);
+                        Debug.Assert(_enumerator is not null);
                         if (!SkipBeforeFirst(_enumerator))
                         {
                             // Reached the end before we finished skipping.
@@ -362,7 +303,7 @@ public override bool MoveNext()
                         _state = 3;
                         goto default;
                     default:
-                        Debug.Assert(_enumerator != null);
+                        Debug.Assert(_enumerator is not null);
                         if ((!HasLimit || taken < Limit) && _enumerator.MoveNext())
                         {
                             if (HasLimit)
@@ -383,10 +324,7 @@ public override bool MoveNext()
                 return false;
             }
 
-            public override IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> selector) =>
-                new SelectIPartitionIterator<TSource, TResult>(this, selector);
-
-            public IPartition<TSource>? Skip(int count)
+            public override Iterator<TSource>? Skip(int count)
             {
                 int minIndex = _minIndexInclusive + count;
 
@@ -397,7 +335,7 @@ public override IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> sele
                         // If we don't know our max count and minIndex can no longer fit in a positive int,
                         // then we will need to wrap ourselves in another iterator.
                         // This can happen, for example, during e.Skip(int.MaxValue).Skip(int.MaxValue).
-                        return new EnumerablePartition<TSource>(this, count, -1);
+                        return new IEnumerableSkipTakeIterator<TSource>(this, count, -1);
                     }
                 }
                 else if ((uint)minIndex > (uint)_maxIndexInclusive)
@@ -409,10 +347,10 @@ public override IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> sele
                 }
 
                 Debug.Assert(minIndex >= 0, $"We should have taken care of all cases when {nameof(minIndex)} overflows.");
-                return new EnumerablePartition<TSource>(_source, minIndex, _maxIndexInclusive);
+                return new IEnumerableSkipTakeIterator<TSource>(_source, minIndex, _maxIndexInclusive);
             }
 
-            public IPartition<TSource> Take(int count)
+            public override Iterator<TSource> Take(int count)
             {
                 int maxIndex = _minIndexInclusive + count - 1;
                 if (!HasLimit)
@@ -425,7 +363,7 @@ public IPartition<TSource> Take(int count)
                         // _minIndexInclusive (which is count - 1) must fit in an int.
                         // Example: e.Skip(50).Take(int.MaxValue).
 
-                        return new EnumerablePartition<TSource>(this, 0, count - 1);
+                        return new IEnumerableSkipTakeIterator<TSource>(this, 0, count - 1);
                     }
                 }
                 else if ((uint)maxIndex >= (uint)_maxIndexInclusive)
@@ -437,18 +375,23 @@ public IPartition<TSource> Take(int count)
                 }
 
                 Debug.Assert(maxIndex >= 0, $"We should have taken care of all cases when {nameof(maxIndex)} overflows.");
-                return new EnumerablePartition<TSource>(_source, _minIndexInclusive, maxIndex);
+                return new IEnumerableSkipTakeIterator<TSource>(_source, _minIndexInclusive, maxIndex);
             }
 
-            public TSource? TryGetElementAt(int index, out bool found)
+            public override TSource? TryGetElementAt(int index, out bool found)
             {
                 // If the index is negative or >= our max count, return early.
                 if (index >= 0 && (!HasLimit || index < Limit))
                 {
-                    using (IEnumerator<TSource> en = _source.GetEnumerator())
+                    Debug.Assert(_minIndexInclusive + index >= 0, $"Adding {nameof(index)} caused {nameof(_minIndexInclusive)} to overflow.");
+
+                    if (_source is Iterator<TSource> iterator)
                     {
-                        Debug.Assert(_minIndexInclusive + index >= 0, $"Adding {nameof(index)} caused {nameof(_minIndexInclusive)} to overflow.");
+                        return iterator.TryGetElementAt(_minIndexInclusive + index, out found);
+                    }
 
+                    using (IEnumerator<TSource> en = _source.GetEnumerator())
+                    {
                         if (SkipBefore(_minIndexInclusive + index, en) && en.MoveNext())
                         {
                             found = true;
@@ -461,8 +404,15 @@ public IPartition<TSource> Take(int count)
                 return default;
             }
 
-            public TSource? TryGetFirst(out bool found)
+            public override TSource? TryGetFirst(out bool found)
             {
+                Debug.Assert(!HasLimit || Limit > 0);
+
+                if (_source is Iterator<TSource> iterator)
+                {
+                    return iterator.TryGetElementAt(_minIndexInclusive, out found);
+                }
+
                 using (IEnumerator<TSource> en = _source.GetEnumerator())
                 {
                     if (SkipBeforeFirst(en) && en.MoveNext())
@@ -476,8 +426,17 @@ public IPartition<TSource> Take(int count)
                 return default;
             }
 
-            public TSource? TryGetLast(out bool found)
+            public override TSource? TryGetLast(out bool found)
             {
+                if (_source is Iterator<TSource> iterator &&
+                    iterator.GetCount(onlyIfCheap: true) is int count &&
+                    count >= _minIndexInclusive)
+                {
+                    return !HasLimit ?
+                        iterator.TryGetLast(out found) :
+                        iterator.TryGetElementAt(_maxIndexInclusive, out found);
+                }
+
                 using (IEnumerator<TSource> en = _source.GetEnumerator())
                 {
                     if (SkipBeforeFirst(en) && en.MoveNext())
@@ -502,7 +461,7 @@ public IPartition<TSource> Take(int count)
                 return default;
             }
 
-            public TSource[] ToArray()
+            public override TSource[] ToArray()
             {
                 using (IEnumerator<TSource> en = _source.GetEnumerator())
                 {
@@ -530,7 +489,7 @@ public TSource[] ToArray()
                 return [];
             }
 
-            public List<TSource> ToList()
+            public override List<TSource> ToList()
             {
                 var list = new List<TSource>();
 
@@ -565,7 +524,7 @@ private static int SkipAndCount(int index, IEnumerator<TSource> en)
 
             private static uint SkipAndCount(uint index, IEnumerator<TSource> en)
             {
-                Debug.Assert(en != null);
+                Debug.Assert(en is not null);
 
                 for (uint i = 0; i < index; i++)
                 {
diff --git a/src/libraries/System.Linq/src/System/Linq/Take.SizeOpt.cs b/src/libraries/System.Linq/src/System/Linq/Take.SizeOpt.cs
index 65ad659f1a2c..f61b01ee5777 100644
--- a/src/libraries/System.Linq/src/System/Linq/Take.SizeOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Take.SizeOpt.cs
@@ -21,7 +21,7 @@ private static IEnumerable<TSource> TakeIterator<TSource>(IEnumerable<TSource> s
 
         private static IEnumerable<TSource> TakeRangeIterator<TSource>(IEnumerable<TSource> source, int startIndex, int endIndex)
         {
-            Debug.Assert(source != null);
+            Debug.Assert(source is not null);
             Debug.Assert(startIndex >= 0 && startIndex < endIndex);
 
             using IEnumerator<TSource> e = source.GetEnumerator();
diff --git a/src/libraries/System.Linq/src/System/Linq/Take.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Take.SpeedOpt.cs
index f97c5295f75a..81025ee21d0b 100644
--- a/src/libraries/System.Linq/src/System/Linq/Take.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Take.SpeedOpt.cs
@@ -10,30 +10,30 @@ public static partial class Enumerable
     {
         private static IEnumerable<TSource> TakeIterator<TSource>(IEnumerable<TSource> source, int count)
         {
-            Debug.Assert(source != null && !IsEmptyArray(source));
+            Debug.Assert(source is not null && !IsEmptyArray(source));
             Debug.Assert(count > 0);
 
             return
-                source is IPartition<TSource> partition ? (partition.Take(count) ?? Empty<TSource>()) :
-                source is IList<TSource> sourceList ? new ListPartition<TSource>(sourceList, 0, count - 1) :
-                new EnumerablePartition<TSource>(source, 0, count - 1);
+                source is Iterator<TSource> iterator ? (iterator.Take(count) ?? Empty<TSource>()) :
+                source is IList<TSource> sourceList ? new IListSkipTakeIterator<TSource>(sourceList, 0, count - 1) :
+                new IEnumerableSkipTakeIterator<TSource>(source, 0, count - 1);
         }
 
         private static IEnumerable<TSource> TakeRangeIterator<TSource>(IEnumerable<TSource> source, int startIndex, int endIndex)
         {
-            Debug.Assert(source != null && !IsEmptyArray(source));
+            Debug.Assert(source is not null && !IsEmptyArray(source));
             Debug.Assert(startIndex >= 0 && startIndex < endIndex);
 
             return
-                source is IPartition<TSource> partition ? TakePartitionRange(partition, startIndex, endIndex) :
-                source is IList<TSource> sourceList ? new ListPartition<TSource>(sourceList, startIndex, endIndex - 1) :
-                new EnumerablePartition<TSource>(source, startIndex, endIndex - 1);
+                source is Iterator<TSource> iterator ? TakeIteratorRange(iterator, startIndex, endIndex) :
+                source is IList<TSource> sourceList ? new IListSkipTakeIterator<TSource>(sourceList, startIndex, endIndex - 1) :
+                new IEnumerableSkipTakeIterator<TSource>(source, startIndex, endIndex - 1);
 
-            static IEnumerable<TSource> TakePartitionRange(IPartition<TSource> partition, int startIndex, int endIndex)
+            static IEnumerable<TSource> TakeIteratorRange(Iterator<TSource> iterator, int startIndex, int endIndex)
             {
-                IPartition<TSource>? source;
+                Iterator<TSource>? source;
                 if (endIndex != 0 &&
-                    (source = partition.Take(endIndex)) is not null &&
+                    (source = iterator.Take(endIndex)) is not null &&
                     (startIndex == 0 || (source = source!.Skip(startIndex)) is not null))
                 {
                     return source;
diff --git a/src/libraries/System.Linq/src/System/Linq/Take.cs b/src/libraries/System.Linq/src/System/Linq/Take.cs
index 627d5bf03ed7..65feb8a3eb08 100644
--- a/src/libraries/System.Linq/src/System/Linq/Take.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Take.cs
@@ -10,7 +10,7 @@ public static partial class Enumerable
     {
         public static IEnumerable<TSource> Take<TSource>(this IEnumerable<TSource> source, int count)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -32,7 +32,7 @@ public static IEnumerable<TSource> Take<TSource>(this IEnumerable<TSource> sourc
         /// </remarks>
         public static IEnumerable<TSource> Take<TSource>(this IEnumerable<TSource> source, Range range)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
@@ -70,7 +70,7 @@ public static IEnumerable<TSource> Take<TSource>(this IEnumerable<TSource> sourc
 
         private static IEnumerable<TSource> TakeRangeFromEndIterator<TSource>(IEnumerable<TSource> source, bool isStartIndexFromEnd, int startIndex, bool isEndIndexFromEnd, int endIndex)
         {
-            Debug.Assert(source != null);
+            Debug.Assert(source is not null);
             Debug.Assert(isStartIndexFromEnd || isEndIndexFromEnd);
             Debug.Assert(isStartIndexFromEnd
                 ? startIndex > 0 && (!isEndIndexFromEnd || startIndex > endIndex)
@@ -189,12 +189,12 @@ static int CalculateEndIndex(bool isEndIndexFromEnd, int endIndex, int count) =>
 
         public static IEnumerable<TSource> TakeWhile<TSource>(this IEnumerable<TSource> source, Func<TSource, bool> predicate)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
@@ -222,12 +222,12 @@ private static IEnumerable<TSource> TakeWhileIterator<TSource>(IEnumerable<TSour
 
         public static IEnumerable<TSource> TakeWhile<TSource>(this IEnumerable<TSource> source, Func<TSource, int, bool> predicate)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
@@ -261,7 +261,7 @@ private static IEnumerable<TSource> TakeWhileIterator<TSource>(IEnumerable<TSour
 
         public static IEnumerable<TSource> TakeLast<TSource>(this IEnumerable<TSource> source, int count)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/ToCollection.cs b/src/libraries/System.Linq/src/System/Linq/ToCollection.cs
index 043cac8f0038..05e18b2382c8 100644
--- a/src/libraries/System.Linq/src/System/Linq/ToCollection.cs
+++ b/src/libraries/System.Linq/src/System/Linq/ToCollection.cs
@@ -11,27 +11,16 @@ public static partial class Enumerable
     {
         public static TSource[] ToArray<TSource>(this IEnumerable<TSource> source)
         {
-            if (source is null)
-            {
-                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
-            }
-
-            if (source is IIListProvider<TSource> arrayProvider)
+#if !OPTIMIZE_FOR_SIZE
+            if (source is Iterator<TSource> iterator)
             {
-                return arrayProvider.ToArray();
+                return iterator.ToArray();
             }
+#endif
 
             if (source is ICollection<TSource> collection)
             {
-                int count = collection.Count;
-                if (count != 0)
-                {
-                    var result = new TSource[count];
-                    collection.CopyTo(result, 0);
-                    return result;
-                }
-
-                return [];
+                return ICollectionToArray(collection);
             }
 
             return EnumerableToArray(source);
@@ -39,6 +28,11 @@ public static TSource[] ToArray<TSource>(this IEnumerable<TSource> source)
             [MethodImpl(MethodImplOptions.NoInlining)] // avoid large stack allocation impacting other paths
             static TSource[] EnumerableToArray(IEnumerable<TSource> source)
             {
+                if (source is null)
+                {
+                    ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
+                }
+
                 SegmentedArrayBuilder<TSource>.ScratchBuffer scratch = default;
                 SegmentedArrayBuilder<TSource> builder = new(scratch);
 
@@ -50,6 +44,19 @@ static TSource[] EnumerableToArray(IEnumerable<TSource> source)
             }
         }
 
+        private static TSource[] ICollectionToArray<TSource>(ICollection<TSource> collection)
+        {
+            int count = collection.Count;
+            if (count != 0)
+            {
+                var result = new TSource[count];
+                collection.CopyTo(result, 0);
+                return result;
+            }
+
+            return [];
+        }
+
         public static List<TSource> ToList<TSource>(this IEnumerable<TSource> source)
         {
             if (source is null)
@@ -57,10 +64,12 @@ public static List<TSource> ToList<TSource>(this IEnumerable<TSource> source)
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (source is IIListProvider<TSource> listProvider)
+#if !OPTIMIZE_FOR_SIZE
+            if (source is Iterator<TSource> iterator)
             {
-                return listProvider.ToList();
+                return iterator.ToList();
             }
+#endif
 
             return new List<TSource>(source);
         }
@@ -255,12 +264,5 @@ public static HashSet<TSource> ToHashSet<TSource>(this IEnumerable<TSource> sour
         /// <summary>Default initial capacity to use when creating sets for internal temporary storage.</summary>
         /// <remarks>This is based on the implicit size used in previous implementations, which used a custom Set type.</remarks>
         private const int DefaultInternalSetCapacity = 7;
-
-        private static TSource[] HashSetToArray<TSource>(HashSet<TSource> set)
-        {
-            var result = new TSource[set.Count];
-            set.CopyTo(result);
-            return result;
-        }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/Union.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Union.SpeedOpt.cs
index 6acf199e665b..9f50bf0c39a3 100644
--- a/src/libraries/System.Linq/src/System/Linq/Union.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Union.SpeedOpt.cs
@@ -7,7 +7,7 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private abstract partial class UnionIterator<TSource> : IIListProvider<TSource>
+        private abstract partial class UnionIterator<TSource>
         {
             private HashSet<TSource> FillSet()
             {
@@ -15,7 +15,7 @@ private HashSet<TSource> FillSet()
                 for (int index = 0; ; ++index)
                 {
                     IEnumerable<TSource>? enumerable = GetEnumerable(index);
-                    if (enumerable == null)
+                    if (enumerable is null)
                     {
                         return set;
                     }
@@ -24,11 +24,27 @@ private HashSet<TSource> FillSet()
                 }
             }
 
-            public TSource[] ToArray() => Enumerable.HashSetToArray(FillSet());
+            public override TSource[] ToArray() => ICollectionToArray(FillSet());
 
-            public List<TSource> ToList() => new List<TSource>(FillSet());
+            public override List<TSource> ToList() => new List<TSource>(FillSet());
 
-            public int GetCount(bool onlyIfCheap) => onlyIfCheap ? -1 : FillSet().Count;
+            public override int GetCount(bool onlyIfCheap) => onlyIfCheap ? -1 : FillSet().Count;
+
+            public override TSource? TryGetFirst(out bool found)
+            {
+                IEnumerable<TSource>? source;
+                for (int i = 0; (source = GetEnumerable(i)) is not null; i++)
+                {
+                    TSource? result = source.TryGetFirst(out found);
+                    if (found)
+                    {
+                        return result;
+                    }
+                }
+
+                found = false;
+                return default;
+            }
         }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/Union.cs b/src/libraries/System.Linq/src/System/Linq/Union.cs
index 6031498bbce5..a8cb34ac5173 100644
--- a/src/libraries/System.Linq/src/System/Linq/Union.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Union.cs
@@ -13,12 +13,12 @@ public static partial class Enumerable
 
         public static IEnumerable<TSource> Union<TSource>(this IEnumerable<TSource> first, IEnumerable<TSource> second, IEqualityComparer<TSource>? comparer)
         {
-            if (first == null)
+            if (first is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.first);
             }
 
-            if (second == null)
+            if (second is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.second);
             }
@@ -111,7 +111,7 @@ protected UnionIterator(IEqualityComparer<TSource>? comparer)
 
             public sealed override void Dispose()
             {
-                if (_enumerator != null)
+                if (_enumerator is not null)
                 {
                     _enumerator.Dispose();
                     _enumerator = null;
@@ -134,7 +134,7 @@ private void SetEnumerator(IEnumerator<TSource> enumerator)
 
             private void StoreFirst()
             {
-                Debug.Assert(_enumerator != null);
+                Debug.Assert(_enumerator is not null);
 
                 var set = new HashSet<TSource>(DefaultInternalSetCapacity, _comparer);
                 TSource element = _enumerator.Current;
@@ -145,8 +145,8 @@ private void StoreFirst()
 
             private bool GetNext()
             {
-                Debug.Assert(_enumerator != null);
-                Debug.Assert(_set != null);
+                Debug.Assert(_enumerator is not null);
+                Debug.Assert(_set is not null);
 
                 HashSet<TSource> set = _set;
 
@@ -167,7 +167,7 @@ public sealed override bool MoveNext()
             {
                 if (_state == 1)
                 {
-                    for (IEnumerable<TSource>? enumerable = GetEnumerable(0); enumerable != null; enumerable = GetEnumerable(_state - 1))
+                    for (IEnumerable<TSource>? enumerable = GetEnumerable(0); enumerable is not null; enumerable = GetEnumerable(_state - 1))
                     {
                         IEnumerator<TSource> enumerator = enumerable.GetEnumerator();
                         SetEnumerator(enumerator);
@@ -190,7 +190,7 @@ public sealed override bool MoveNext()
                         }
 
                         IEnumerable<TSource>? enumerable = GetEnumerable(_state - 1);
-                        if (enumerable == null)
+                        if (enumerable is null)
                         {
                             break;
                         }
@@ -217,13 +217,13 @@ private sealed class UnionIterator2<TSource> : UnionIterator<TSource>
             public UnionIterator2(IEnumerable<TSource> first, IEnumerable<TSource> second, IEqualityComparer<TSource>? comparer)
                 : base(comparer)
             {
-                Debug.Assert(first != null);
-                Debug.Assert(second != null);
+                Debug.Assert(first is not null);
+                Debug.Assert(second is not null);
                 _first = first;
                 _second = second;
             }
 
-            public override Iterator<TSource> Clone() => new UnionIterator2<TSource>(_first, _second, _comparer);
+            private protected override Iterator<TSource> Clone() => new UnionIterator2<TSource>(_first, _second, _comparer);
 
             internal override IEnumerable<TSource>? GetEnumerable(int index)
             {
@@ -262,7 +262,7 @@ public UnionIteratorN(SingleLinkedNode<IEnumerable<TSource>> sources, int headIn
                 _headIndex = headIndex;
             }
 
-            public override Iterator<TSource> Clone() => new UnionIteratorN<TSource>(_sources, _headIndex, _comparer);
+            private protected override Iterator<TSource> Clone() => new UnionIteratorN<TSource>(_sources, _headIndex, _comparer);
 
             internal override IEnumerable<TSource>? GetEnumerable(int index) => index > _headIndex ? null : _sources.GetNode(_headIndex - index).Item;
 
diff --git a/src/libraries/System.Linq/src/System/Linq/Utilities.cs b/src/libraries/System.Linq/src/System/Linq/Utilities.cs
index 208d6878040a..987d0004d4ec 100644
--- a/src/libraries/System.Linq/src/System/Linq/Utilities.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Utilities.cs
@@ -26,7 +26,7 @@ public static bool AreEqualityComparersEqual<TSource>(IEqualityComparer<TSource>
 
             var defaultComparer = EqualityComparer<TSource>.Default;
 
-            if (left == null)
+            if (left is null)
             {
                 // Micro-opt: Typically it's impossible to get a different instance
                 // of the default comparer without reflection/serialization.
@@ -34,7 +34,7 @@ public static bool AreEqualityComparersEqual<TSource>(IEqualityComparer<TSource>
                 return right == defaultComparer || right!.Equals(defaultComparer);
             }
 
-            if (right == null)
+            if (right is null)
             {
                 return left == defaultComparer || left.Equals(defaultComparer);
             }
diff --git a/src/libraries/System.Linq/src/System/Linq/Where.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Where.SpeedOpt.cs
index 5bf0a7180883..40a05db9abf8 100644
--- a/src/libraries/System.Linq/src/System/Linq/Where.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Where.SpeedOpt.cs
@@ -8,9 +8,9 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private sealed partial class WhereEnumerableIterator<TSource> : IPartition<TSource>
+        private sealed partial class IEnumerableWhereIterator<TSource>
         {
-            public int GetCount(bool onlyIfCheap)
+            public override int GetCount(bool onlyIfCheap)
             {
                 if (onlyIfCheap)
                 {
@@ -33,7 +33,7 @@ public int GetCount(bool onlyIfCheap)
                 return count;
             }
 
-            public TSource[] ToArray()
+            public override TSource[] ToArray()
             {
                 SegmentedArrayBuilder<TSource>.ScratchBuffer scratch = default;
                 SegmentedArrayBuilder<TSource> builder = new(scratch);
@@ -53,7 +53,7 @@ public TSource[] ToArray()
                 return result;
             }
 
-            public List<TSource> ToList()
+            public override List<TSource> ToList()
             {
                 var list = new List<TSource>();
 
@@ -69,7 +69,7 @@ public List<TSource> ToList()
                 return list;
             }
 
-            public TSource? TryGetFirst(out bool found)
+            public override TSource? TryGetFirst(out bool found)
             {
                 Func<TSource, bool> predicate = _predicate;
 
@@ -86,7 +86,7 @@ public List<TSource> ToList()
                 return default;
             }
 
-            public TSource? TryGetLast(out bool found)
+            public override TSource? TryGetLast(out bool found)
             {
                 using IEnumerator<TSource> e = _source.GetEnumerator();
 
@@ -121,7 +121,7 @@ public List<TSource> ToList()
                 return default;
             }
 
-            public TSource? TryGetElementAt(int index, out bool found)
+            public override TSource? TryGetElementAt(int index, out bool found)
             {
                 if (index >= 0)
                 {
@@ -145,15 +145,11 @@ public List<TSource> ToList()
                 found = false;
                 return default;
             }
-
-            public IPartition<TSource>? Skip(int count) => new EnumerablePartition<TSource>(this, count, -1);
-
-            public IPartition<TSource>? Take(int count) => new EnumerablePartition<TSource>(this, 0, count - 1);
         }
 
-        internal sealed partial class WhereArrayIterator<TSource> : IPartition<TSource>
+        private sealed partial class ArrayWhereIterator<TSource>
         {
-            public int GetCount(bool onlyIfCheap) => GetCount(onlyIfCheap, _source, _predicate);
+            public override int GetCount(bool onlyIfCheap) => GetCount(onlyIfCheap, _source, _predicate);
 
             public static int GetCount(bool onlyIfCheap, ReadOnlySpan<TSource> source, Func<TSource, bool> predicate)
             {
@@ -178,7 +174,7 @@ public static int GetCount(bool onlyIfCheap, ReadOnlySpan<TSource> source, Func<
                 return count;
             }
 
-            public TSource[] ToArray() => ToArray(_source, _predicate);
+            public override TSource[] ToArray() => ToArray(_source, _predicate);
 
             public static TSource[] ToArray(ReadOnlySpan<TSource> source, Func<TSource, bool> predicate)
             {
@@ -199,7 +195,7 @@ public static TSource[] ToArray(ReadOnlySpan<TSource> source, Func<TSource, bool
                 return result;
             }
 
-            public List<TSource> ToList() => ToList(_source, _predicate);
+            public override List<TSource> ToList() => ToList(_source, _predicate);
 
             public static List<TSource> ToList(ReadOnlySpan<TSource> source, Func<TSource, bool> predicate)
             {
@@ -216,7 +212,7 @@ public static List<TSource> ToList(ReadOnlySpan<TSource> source, Func<TSource, b
                 return list;
             }
 
-            public TSource? TryGetFirst(out bool found)
+            public override TSource? TryGetFirst(out bool found)
             {
                 Func<TSource, bool> predicate = _predicate;
 
@@ -233,7 +229,7 @@ public static List<TSource> ToList(ReadOnlySpan<TSource> source, Func<TSource, b
                 return default;
             }
 
-            public TSource? TryGetLast(out bool found)
+            public override TSource? TryGetLast(out bool found)
             {
                 TSource[] source = _source;
                 Func<TSource, bool> predicate = _predicate;
@@ -251,7 +247,7 @@ public static List<TSource> ToList(ReadOnlySpan<TSource> source, Func<TSource, b
                 return default;
             }
 
-            public TSource? TryGetElementAt(int index, out bool found)
+            public override TSource? TryGetElementAt(int index, out bool found)
             {
                 if (index >= 0)
                 {
@@ -275,21 +271,17 @@ public static List<TSource> ToList(ReadOnlySpan<TSource> source, Func<TSource, b
                 found = false;
                 return default;
             }
-
-            public IPartition<TSource>? Skip(int count) => new EnumerablePartition<TSource>(this, count, -1);
-
-            public IPartition<TSource>? Take(int count) => new EnumerablePartition<TSource>(this, 0, count - 1);
         }
 
-        private sealed partial class WhereListIterator<TSource> : Iterator<TSource>, IPartition<TSource>
+        private sealed partial class ListWhereIterator<TSource> : Iterator<TSource>
         {
-            public int GetCount(bool onlyIfCheap) => WhereArrayIterator<TSource>.GetCount(onlyIfCheap, CollectionsMarshal.AsSpan(_source), _predicate);
+            public override int GetCount(bool onlyIfCheap) => ArrayWhereIterator<TSource>.GetCount(onlyIfCheap, CollectionsMarshal.AsSpan(_source), _predicate);
 
-            public TSource[] ToArray() => WhereArrayIterator<TSource>.ToArray(CollectionsMarshal.AsSpan(_source), _predicate);
+            public override TSource[] ToArray() => ArrayWhereIterator<TSource>.ToArray(CollectionsMarshal.AsSpan(_source), _predicate);
 
-            public List<TSource> ToList() => WhereArrayIterator<TSource>.ToList(CollectionsMarshal.AsSpan(_source), _predicate);
+            public override List<TSource> ToList() => ArrayWhereIterator<TSource>.ToList(CollectionsMarshal.AsSpan(_source), _predicate);
 
-            public TSource? TryGetFirst(out bool found)
+            public override TSource? TryGetFirst(out bool found)
             {
                 Func<TSource, bool> predicate = _predicate;
 
@@ -306,7 +298,7 @@ private sealed partial class WhereListIterator<TSource> : Iterator<TSource>, IPa
                 return default;
             }
 
-            public TSource? TryGetLast(out bool found)
+            public override TSource? TryGetLast(out bool found)
             {
                 ReadOnlySpan<TSource> source = CollectionsMarshal.AsSpan(_source);
                 Func<TSource, bool> predicate = _predicate;
@@ -324,7 +316,7 @@ private sealed partial class WhereListIterator<TSource> : Iterator<TSource>, IPa
                 return default;
             }
 
-            public TSource? TryGetElementAt(int index, out bool found)
+            public override TSource? TryGetElementAt(int index, out bool found)
             {
                 if (index >= 0)
                 {
@@ -348,15 +340,11 @@ private sealed partial class WhereListIterator<TSource> : Iterator<TSource>, IPa
                 found = false;
                 return default;
             }
-
-            public IPartition<TSource>? Skip(int count) => new EnumerablePartition<TSource>(this, count, -1);
-
-            public IPartition<TSource>? Take(int count) => new EnumerablePartition<TSource>(this, 0, count - 1);
         }
 
-        private sealed partial class WhereSelectArrayIterator<TSource, TResult> : IPartition<TResult>
+        private sealed partial class ArrayWhereSelectIterator<TSource, TResult>
         {
-            public int GetCount(bool onlyIfCheap) => GetCount(onlyIfCheap, _source, _predicate, _selector);
+            public override int GetCount(bool onlyIfCheap) => GetCount(onlyIfCheap, _source, _predicate, _selector);
 
             public static int GetCount(bool onlyIfCheap, ReadOnlySpan<TSource> source, Func<TSource, bool> predicate, Func<TSource, TResult> selector)
             {
@@ -385,7 +373,7 @@ public static int GetCount(bool onlyIfCheap, ReadOnlySpan<TSource> source, Func<
                 return count;
             }
 
-            public TResult[] ToArray() => ToArray(_source, _predicate, _selector);
+            public override TResult[] ToArray() => ToArray(_source, _predicate, _selector);
 
             public static TResult[] ToArray(ReadOnlySpan<TSource> source, Func<TSource, bool> predicate, Func<TSource, TResult> selector)
             {
@@ -406,7 +394,7 @@ public static TResult[] ToArray(ReadOnlySpan<TSource> source, Func<TSource, bool
                 return result;
             }
 
-            public List<TResult> ToList() => ToList(_source, _predicate, _selector);
+            public override List<TResult> ToList() => ToList(_source, _predicate, _selector);
 
             public static List<TResult> ToList(ReadOnlySpan<TSource> source, Func<TSource, bool> predicate, Func<TSource, TResult> selector)
             {
@@ -423,7 +411,7 @@ public static List<TResult> ToList(ReadOnlySpan<TSource> source, Func<TSource, b
                 return list;
             }
 
-            public TResult? TryGetFirst(out bool found) => TryGetFirst(_source, _predicate, _selector, out found);
+            public override TResult? TryGetFirst(out bool found) => TryGetFirst(_source, _predicate, _selector, out found);
 
             public static TResult? TryGetFirst(ReadOnlySpan<TSource> source, Func<TSource, bool> predicate, Func<TSource, TResult> selector, out bool found)
             {
@@ -440,7 +428,7 @@ public static List<TResult> ToList(ReadOnlySpan<TSource> source, Func<TSource, b
                 return default;
             }
 
-            public TResult? TryGetLast(out bool found) => TryGetLast(_source, _predicate, _selector, out found);
+            public override TResult? TryGetLast(out bool found) => TryGetLast(_source, _predicate, _selector, out found);
 
             public static TResult? TryGetLast(ReadOnlySpan<TSource> source, Func<TSource, bool> predicate, Func<TSource, TResult> selector, out bool found)
             {
@@ -457,7 +445,7 @@ public static List<TResult> ToList(ReadOnlySpan<TSource> source, Func<TSource, b
                 return default;
             }
 
-            public TResult? TryGetElementAt(int index, out bool found) => TryGetElementAt(_source, _predicate, _selector, index, out found);
+            public override TResult? TryGetElementAt(int index, out bool found) => TryGetElementAt(_source, _predicate, _selector, index, out found);
 
             public static TResult? TryGetElementAt(ReadOnlySpan<TSource> source, Func<TSource, bool> predicate, Func<TSource, TResult> selector, int index, out bool found)
             {
@@ -481,34 +469,26 @@ public static List<TResult> ToList(ReadOnlySpan<TSource> source, Func<TSource, b
                 found = false;
                 return default;
             }
-
-            public IPartition<TResult>? Skip(int count) => new EnumerablePartition<TResult>(this, count, -1);
-
-            public IPartition<TResult>? Take(int count) => new EnumerablePartition<TResult>(this, 0, count - 1);
         }
 
-        private sealed partial class WhereSelectListIterator<TSource, TResult> : IPartition<TResult>
+        private sealed partial class ListWhereSelectIterator<TSource, TResult>
         {
-            public int GetCount(bool onlyIfCheap) => WhereSelectArrayIterator<TSource, TResult>.GetCount(onlyIfCheap, CollectionsMarshal.AsSpan(_source), _predicate, _selector);
-
-            public TResult[] ToArray() => WhereSelectArrayIterator<TSource, TResult>.ToArray(CollectionsMarshal.AsSpan(_source), _predicate, _selector);
+            public override int GetCount(bool onlyIfCheap) => ArrayWhereSelectIterator<TSource, TResult>.GetCount(onlyIfCheap, CollectionsMarshal.AsSpan(_source), _predicate, _selector);
 
-            public List<TResult> ToList() => WhereSelectArrayIterator<TSource, TResult>.ToList(CollectionsMarshal.AsSpan(_source), _predicate, _selector);
+            public override TResult[] ToArray() => ArrayWhereSelectIterator<TSource, TResult>.ToArray(CollectionsMarshal.AsSpan(_source), _predicate, _selector);
 
-            public TResult? TryGetElementAt(int index, out bool found) => WhereSelectArrayIterator<TSource, TResult>.TryGetElementAt(CollectionsMarshal.AsSpan(_source), _predicate, _selector, index, out found);
+            public override List<TResult> ToList() => ArrayWhereSelectIterator<TSource, TResult>.ToList(CollectionsMarshal.AsSpan(_source), _predicate, _selector);
 
-            public TResult? TryGetFirst(out bool found) => WhereSelectArrayIterator<TSource, TResult>.TryGetFirst(CollectionsMarshal.AsSpan(_source), _predicate, _selector, out found);
+            public override TResult? TryGetElementAt(int index, out bool found) => ArrayWhereSelectIterator<TSource, TResult>.TryGetElementAt(CollectionsMarshal.AsSpan(_source), _predicate, _selector, index, out found);
 
-            public TResult? TryGetLast(out bool found) => WhereSelectArrayIterator<TSource, TResult>.TryGetLast(CollectionsMarshal.AsSpan(_source), _predicate, _selector, out found);
+            public override TResult? TryGetFirst(out bool found) => ArrayWhereSelectIterator<TSource, TResult>.TryGetFirst(CollectionsMarshal.AsSpan(_source), _predicate, _selector, out found);
 
-            public IPartition<TResult>? Skip(int count) => new EnumerablePartition<TResult>(this, count, -1);
-
-            public IPartition<TResult>? Take(int count) => new EnumerablePartition<TResult>(this, 0, count - 1);
+            public override TResult? TryGetLast(out bool found) => ArrayWhereSelectIterator<TSource, TResult>.TryGetLast(CollectionsMarshal.AsSpan(_source), _predicate, _selector, out found);
         }
 
-        private sealed partial class WhereSelectEnumerableIterator<TSource, TResult> : IPartition<TResult>
+        private sealed partial class IEnumerableWhereSelectIterator<TSource, TResult>
         {
-            public int GetCount(bool onlyIfCheap)
+            public override int GetCount(bool onlyIfCheap)
             {
                 // In case someone uses Count() to force evaluation of
                 // the selector, run it provided `onlyIfCheap` is false.
@@ -535,7 +515,7 @@ public int GetCount(bool onlyIfCheap)
                 return count;
             }
 
-            public TResult[] ToArray()
+            public override TResult[] ToArray()
             {
                 SegmentedArrayBuilder<TResult>.ScratchBuffer scratch = default;
                 SegmentedArrayBuilder<TResult> builder = new(scratch);
@@ -556,7 +536,7 @@ public TResult[] ToArray()
                 return result;
             }
 
-            public List<TResult> ToList()
+            public override List<TResult> ToList()
             {
                 var list = new List<TResult>();
 
@@ -573,7 +553,7 @@ public List<TResult> ToList()
                 return list;
             }
 
-            public TResult? TryGetFirst(out bool found)
+            public override TResult? TryGetFirst(out bool found)
             {
                 Func<TSource, bool> predicate = _predicate;
 
@@ -590,7 +570,7 @@ public List<TResult> ToList()
                 return default;
             }
 
-            public TResult? TryGetLast(out bool found)
+            public override TResult? TryGetLast(out bool found)
             {
                 using IEnumerator<TSource> e = _source.GetEnumerator();
 
@@ -625,7 +605,7 @@ public List<TResult> ToList()
                 return default;
             }
 
-            public TResult? TryGetElementAt(int index, out bool found)
+            public override TResult? TryGetElementAt(int index, out bool found)
             {
                 if (index >= 0)
                 {
@@ -649,10 +629,6 @@ public List<TResult> ToList()
                 found = false;
                 return default;
             }
-
-            public IPartition<TResult>? Skip(int count) => new EnumerablePartition<TResult>(this, count, -1);
-
-            public IPartition<TResult>? Take(int count) => new EnumerablePartition<TResult>(this, 0, count - 1);
         }
     }
 }
diff --git a/src/libraries/System.Linq/src/System/Linq/Where.cs b/src/libraries/System.Linq/src/System/Linq/Where.cs
index aec6370a330f..4371af8299fb 100644
--- a/src/libraries/System.Linq/src/System/Linq/Where.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Where.cs
@@ -11,12 +11,12 @@ public static partial class Enumerable
     {
         public static IEnumerable<TSource> Where<TSource>(this IEnumerable<TSource> source, Func<TSource, bool> predicate)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
@@ -33,25 +33,25 @@ public static IEnumerable<TSource> Where<TSource>(this IEnumerable<TSource> sour
                     return [];
                 }
 
-                return new WhereArrayIterator<TSource>(array, predicate);
+                return new ArrayWhereIterator<TSource>(array, predicate);
             }
 
             if (source is List<TSource> list)
             {
-                return new WhereListIterator<TSource>(list, predicate);
+                return new ListWhereIterator<TSource>(list, predicate);
             }
 
-            return new WhereEnumerableIterator<TSource>(source, predicate);
+            return new IEnumerableWhereIterator<TSource>(source, predicate);
         }
 
         public static IEnumerable<TSource> Where<TSource>(this IEnumerable<TSource> source, Func<TSource, int, bool> predicate)
         {
-            if (source == null)
+            if (source is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            if (predicate == null)
+            if (predicate is null)
             {
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
             }
@@ -85,25 +85,25 @@ private static IEnumerable<TSource> WhereIterator<TSource>(IEnumerable<TSource>
         /// An iterator that filters each item of an <see cref="IEnumerable{TSource}"/>.
         /// </summary>
         /// <typeparam name="TSource">The type of the source enumerable.</typeparam>
-        private sealed partial class WhereEnumerableIterator<TSource> : Iterator<TSource>
+        private sealed partial class IEnumerableWhereIterator<TSource> : Iterator<TSource>
         {
             private readonly IEnumerable<TSource> _source;
             private readonly Func<TSource, bool> _predicate;
             private IEnumerator<TSource>? _enumerator;
 
-            public WhereEnumerableIterator(IEnumerable<TSource> source, Func<TSource, bool> predicate)
+            public IEnumerableWhereIterator(IEnumerable<TSource> source, Func<TSource, bool> predicate)
             {
-                Debug.Assert(source != null);
-                Debug.Assert(predicate != null);
+                Debug.Assert(source is not null);
+                Debug.Assert(predicate is not null);
                 _source = source;
                 _predicate = predicate;
             }
 
-            public override Iterator<TSource> Clone() => new WhereEnumerableIterator<TSource>(_source, _predicate);
+            private protected override Iterator<TSource> Clone() => new IEnumerableWhereIterator<TSource>(_source, _predicate);
 
             public override void Dispose()
             {
-                if (_enumerator != null)
+                if (_enumerator is not null)
                 {
                     _enumerator.Dispose();
                     _enumerator = null;
@@ -121,7 +121,7 @@ public override bool MoveNext()
                         _state = 2;
                         goto case 2;
                     case 2:
-                        Debug.Assert(_enumerator != null);
+                        Debug.Assert(_enumerator is not null);
                         while (_enumerator.MoveNext())
                         {
                             TSource item = _enumerator.Current;
@@ -140,31 +140,31 @@ public override bool MoveNext()
             }
 
             public override IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> selector) =>
-                new WhereSelectEnumerableIterator<TSource, TResult>(_source, _predicate, selector);
+                new IEnumerableWhereSelectIterator<TSource, TResult>(_source, _predicate, selector);
 
             public override IEnumerable<TSource> Where(Func<TSource, bool> predicate) =>
-                new WhereEnumerableIterator<TSource>(_source, CombinePredicates(_predicate, predicate));
+                new IEnumerableWhereIterator<TSource>(_source, CombinePredicates(_predicate, predicate));
         }
 
         /// <summary>
         /// An iterator that filters each item of an array.
         /// </summary>
         /// <typeparam name="TSource">The type of the source array.</typeparam>
-        internal sealed partial class WhereArrayIterator<TSource> : Iterator<TSource>
+        private sealed partial class ArrayWhereIterator<TSource> : Iterator<TSource>
         {
             private readonly TSource[] _source;
             private readonly Func<TSource, bool> _predicate;
 
-            public WhereArrayIterator(TSource[] source, Func<TSource, bool> predicate)
+            public ArrayWhereIterator(TSource[] source, Func<TSource, bool> predicate)
             {
-                Debug.Assert(source != null && source.Length > 0);
-                Debug.Assert(predicate != null);
+                Debug.Assert(source is not null && source.Length > 0);
+                Debug.Assert(predicate is not null);
                 _source = source;
                 _predicate = predicate;
             }
 
-            public override Iterator<TSource> Clone() =>
-                new WhereArrayIterator<TSource>(_source, _predicate);
+            private protected override Iterator<TSource> Clone() =>
+                new ArrayWhereIterator<TSource>(_source, _predicate);
 
             public override bool MoveNext()
             {
@@ -187,32 +187,32 @@ public override bool MoveNext()
             }
 
             public override IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> selector) =>
-                new WhereSelectArrayIterator<TSource, TResult>(_source, _predicate, selector);
+                new ArrayWhereSelectIterator<TSource, TResult>(_source, _predicate, selector);
 
             public override IEnumerable<TSource> Where(Func<TSource, bool> predicate) =>
-                new WhereArrayIterator<TSource>(_source, CombinePredicates(_predicate, predicate));
+                new ArrayWhereIterator<TSource>(_source, CombinePredicates(_predicate, predicate));
         }
 
         /// <summary>
         /// An iterator that filters each item of a <see cref="List{TSource}"/>.
         /// </summary>
         /// <typeparam name="TSource">The type of the source list.</typeparam>
-        private sealed partial class WhereListIterator<TSource> : Iterator<TSource>
+        private sealed partial class ListWhereIterator<TSource> : Iterator<TSource>
         {
             private readonly List<TSource> _source;
             private readonly Func<TSource, bool> _predicate;
             private List<TSource>.Enumerator _enumerator;
 
-            public WhereListIterator(List<TSource> source, Func<TSource, bool> predicate)
+            public ListWhereIterator(List<TSource> source, Func<TSource, bool> predicate)
             {
-                Debug.Assert(source != null);
-                Debug.Assert(predicate != null);
+                Debug.Assert(source is not null);
+                Debug.Assert(predicate is not null);
                 _source = source;
                 _predicate = predicate;
             }
 
-            public override Iterator<TSource> Clone() =>
-                new WhereListIterator<TSource>(_source, _predicate);
+            private protected override Iterator<TSource> Clone() =>
+                new ListWhereIterator<TSource>(_source, _predicate);
 
             public override bool MoveNext()
             {
@@ -241,10 +241,10 @@ public override bool MoveNext()
             }
 
             public override IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> selector) =>
-                new WhereSelectListIterator<TSource, TResult>(_source, _predicate, selector);
+                new ListWhereSelectIterator<TSource, TResult>(_source, _predicate, selector);
 
             public override IEnumerable<TSource> Where(Func<TSource, bool> predicate) =>
-                new WhereListIterator<TSource>(_source, CombinePredicates(_predicate, predicate));
+                new ListWhereIterator<TSource>(_source, CombinePredicates(_predicate, predicate));
         }
 
         /// <summary>
@@ -252,24 +252,24 @@ public override IEnumerable<TSource> Where(Func<TSource, bool> predicate) =>
         /// </summary>
         /// <typeparam name="TSource">The type of the source array.</typeparam>
         /// <typeparam name="TResult">The type of the mapped items.</typeparam>
-        private sealed partial class WhereSelectArrayIterator<TSource, TResult> : Iterator<TResult>
+        private sealed partial class ArrayWhereSelectIterator<TSource, TResult> : Iterator<TResult>
         {
             private readonly TSource[] _source;
             private readonly Func<TSource, bool> _predicate;
             private readonly Func<TSource, TResult> _selector;
 
-            public WhereSelectArrayIterator(TSource[] source, Func<TSource, bool> predicate, Func<TSource, TResult> selector)
+            public ArrayWhereSelectIterator(TSource[] source, Func<TSource, bool> predicate, Func<TSource, TResult> selector)
             {
-                Debug.Assert(source != null && source.Length > 0);
-                Debug.Assert(predicate != null);
-                Debug.Assert(selector != null);
+                Debug.Assert(source is not null && source.Length > 0);
+                Debug.Assert(predicate is not null);
+                Debug.Assert(selector is not null);
                 _source = source;
                 _predicate = predicate;
                 _selector = selector;
             }
 
-            public override Iterator<TResult> Clone() =>
-                new WhereSelectArrayIterator<TSource, TResult>(_source, _predicate, _selector);
+            private protected override Iterator<TResult> Clone() =>
+                new ArrayWhereSelectIterator<TSource, TResult>(_source, _predicate, _selector);
 
             public override bool MoveNext()
             {
@@ -292,7 +292,7 @@ public override bool MoveNext()
             }
 
             public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector) =>
-                new WhereSelectArrayIterator<TSource, TResult2>(_source, _predicate, CombineSelectors(_selector, selector));
+                new ArrayWhereSelectIterator<TSource, TResult2>(_source, _predicate, CombineSelectors(_selector, selector));
         }
 
         /// <summary>
@@ -300,25 +300,25 @@ public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> s
         /// </summary>
         /// <typeparam name="TSource">The type of the source list.</typeparam>
         /// <typeparam name="TResult">The type of the mapped items.</typeparam>
-        private sealed partial class WhereSelectListIterator<TSource, TResult> : Iterator<TResult>
+        private sealed partial class ListWhereSelectIterator<TSource, TResult> : Iterator<TResult>
         {
             private readonly List<TSource> _source;
             private readonly Func<TSource, bool> _predicate;
             private readonly Func<TSource, TResult> _selector;
             private List<TSource>.Enumerator _enumerator;
 
-            public WhereSelectListIterator(List<TSource> source, Func<TSource, bool> predicate, Func<TSource, TResult> selector)
+            public ListWhereSelectIterator(List<TSource> source, Func<TSource, bool> predicate, Func<TSource, TResult> selector)
             {
-                Debug.Assert(source != null);
-                Debug.Assert(predicate != null);
-                Debug.Assert(selector != null);
+                Debug.Assert(source is not null);
+                Debug.Assert(predicate is not null);
+                Debug.Assert(selector is not null);
                 _source = source;
                 _predicate = predicate;
                 _selector = selector;
             }
 
-            public override Iterator<TResult> Clone() =>
-                new WhereSelectListIterator<TSource, TResult>(_source, _predicate, _selector);
+            private protected override Iterator<TResult> Clone() =>
+                new ListWhereSelectIterator<TSource, TResult>(_source, _predicate, _selector);
 
             public override bool MoveNext()
             {
@@ -347,7 +347,7 @@ public override bool MoveNext()
             }
 
             public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector) =>
-                new WhereSelectListIterator<TSource, TResult2>(_source, _predicate, CombineSelectors(_selector, selector));
+                new ListWhereSelectIterator<TSource, TResult2>(_source, _predicate, CombineSelectors(_selector, selector));
         }
 
         /// <summary>
@@ -355,29 +355,29 @@ public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> s
         /// </summary>
         /// <typeparam name="TSource">The type of the source enumerable.</typeparam>
         /// <typeparam name="TResult">The type of the mapped items.</typeparam>
-        private sealed partial class WhereSelectEnumerableIterator<TSource, TResult> : Iterator<TResult>
+        private sealed partial class IEnumerableWhereSelectIterator<TSource, TResult> : Iterator<TResult>
         {
             private readonly IEnumerable<TSource> _source;
             private readonly Func<TSource, bool> _predicate;
             private readonly Func<TSource, TResult> _selector;
             private IEnumerator<TSource>? _enumerator;
 
-            public WhereSelectEnumerableIterator(IEnumerable<TSource> source, Func<TSource, bool> predicate, Func<TSource, TResult> selector)
+            public IEnumerableWhereSelectIterator(IEnumerable<TSource> source, Func<TSource, bool> predicate, Func<TSource, TResult> selector)
             {
-                Debug.Assert(source != null);
-                Debug.Assert(predicate != null);
-                Debug.Assert(selector != null);
+                Debug.Assert(source is not null);
+                Debug.Assert(predicate is not null);
+                Debug.Assert(selector is not null);
                 _source = source;
                 _predicate = predicate;
                 _selector = selector;
             }
 
-            public override Iterator<TResult> Clone() =>
-                new WhereSelectEnumerableIterator<TSource, TResult>(_source, _predicate, _selector);
+            private protected override Iterator<TResult> Clone() =>
+                new IEnumerableWhereSelectIterator<TSource, TResult>(_source, _predicate, _selector);
 
             public override void Dispose()
             {
-                if (_enumerator != null)
+                if (_enumerator is not null)
                 {
                     _enumerator.Dispose();
                     _enumerator = null;
@@ -395,7 +395,7 @@ public override bool MoveNext()
                         _state = 2;
                         goto case 2;
                     case 2:
-                        Debug.Assert(_enumerator != null);
+                        Debug.Assert(_enumerator is not null);
                         while (_enumerator.MoveNext())
                         {
                             TSource item = _enumerator.Current;
@@ -414,7 +414,7 @@ public override bool MoveNext()
             }
 
             public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector) =>
-                new WhereSelectEnumerableIterator<TSource, TResult2>(_source, _predicate, CombineSelectors(_selector, selector));
+                new IEnumerableWhereSelectIterator<TSource, TResult2>(_source, _predicate, CombineSelectors(_selector, selector));
         }
     }
 }
diff --git a/src/libraries/System.Linq/tests/AggregateByTests.cs b/src/libraries/System.Linq/tests/AggregateByTests.cs
index 342ab412f879..6232ce24a6df 100644
--- a/src/libraries/System.Linq/tests/AggregateByTests.cs
+++ b/src/libraries/System.Linq/tests/AggregateByTests.cs
@@ -8,6 +8,16 @@ namespace System.Linq.Tests
 {
     public class AggregateByTests : EnumerableTests
     {
+        [Fact]
+        public void Empty()
+        {
+            Assert.All(IdentityTransforms<int>(), transform =>
+            {
+                Assert.Equal(Enumerable.Empty<KeyValuePair<int, int>>(), transform(Enumerable.Empty<int>()).AggregateBy(i => i, i => i, (a, i) => a + i));
+                Assert.Equal(Enumerable.Empty<KeyValuePair<int, int>>(), transform(Enumerable.Empty<int>()).AggregateBy(i => i, 0, (a, i) => a + i));
+            });
+        }
+
         [Fact]
         public void AggregateBy_SourceNull_ThrowsArgumentNullException()
         {
@@ -15,22 +25,26 @@ public void AggregateBy_SourceNull_ThrowsArgumentNullException()
 
             AssertExtensions.Throws<ArgumentNullException>("source", () => first.AggregateBy(x => x, string.Empty, (x, y) => x + y));
             AssertExtensions.Throws<ArgumentNullException>("source", () => first.AggregateBy(x => x, string.Empty, (x, y) => x + y, new AnagramEqualityComparer()));
+            AssertExtensions.Throws<ArgumentNullException>("source", () => first.AggregateBy(x => x, x => x, (x, y) => x + y));
+            AssertExtensions.Throws<ArgumentNullException>("source", () => first.AggregateBy(x => x, x => x, (x, y) => x + y, new AnagramEqualityComparer()));
         }
 
         [Fact]
         public void AggregateBy_KeySelectorNull_ThrowsArgumentNullException()
         {
-            string[] source = { };
+            string[] source = ["test"];
             Func<string, string> keySelector = null;
 
             AssertExtensions.Throws<ArgumentNullException>("keySelector", () => source.AggregateBy(keySelector, string.Empty, (x, y) => x + y));
             AssertExtensions.Throws<ArgumentNullException>("keySelector", () => source.AggregateBy(keySelector, string.Empty, (x, y) => x + y, new AnagramEqualityComparer()));
+            AssertExtensions.Throws<ArgumentNullException>("keySelector", () => source.AggregateBy(keySelector, x => x, (x, y) => x + y));
+            AssertExtensions.Throws<ArgumentNullException>("keySelector", () => source.AggregateBy(keySelector, x => x, (x, y) => x + y, new AnagramEqualityComparer()));
         }
 
         [Fact]
         public void AggregateBy_SeedSelectorNull_ThrowsArgumentNullException()
         {
-            string[] source = { };
+            string[] source = ["test"];
             Func<string, string> seedSelector = null;
 
             AssertExtensions.Throws<ArgumentNullException>("seedSelector", () => source.AggregateBy(x => x, seedSelector, (x, y) => x + y));
@@ -40,11 +54,13 @@ public void AggregateBy_SeedSelectorNull_ThrowsArgumentNullException()
         [Fact]
         public void AggregateBy_FuncNull_ThrowsArgumentNullException()
         {
-            string[] source = { };
+            string[] source = ["test"];
             Func<string, string, string> func = null;
 
             AssertExtensions.Throws<ArgumentNullException>("func", () => source.AggregateBy(x => x, string.Empty, func));
             AssertExtensions.Throws<ArgumentNullException>("func", () => source.AggregateBy(x => x, string.Empty, func, new AnagramEqualityComparer()));
+            AssertExtensions.Throws<ArgumentNullException>("func", () => source.AggregateBy(x => x, x => x, func));
+            AssertExtensions.Throws<ArgumentNullException>("func", () => source.AggregateBy(x => x, x => x, func, new AnagramEqualityComparer()));
         }
 
         [Fact]
@@ -109,7 +125,7 @@ public static IEnumerable<object[]> AggregateBy_TestData()
                 seedSelector: x => 0,
                 func: (x, y) => x + y,
                 comparer: null,
-                expected: Enumerable.Range(0, 10).ToDictionary(x => x, x => x));
+                expected: Enumerable.Range(0, 10).Select(x => new KeyValuePair<int, int>(x, x)));
 
             yield return WrapArgs(
                 source: Enumerable.Range(5, 10),
@@ -117,7 +133,7 @@ public static IEnumerable<object[]> AggregateBy_TestData()
                 seedSelector: x => 0,
                 func: (x, y) => x + y,
                 comparer: null,
-                expected: Enumerable.Repeat(true, 1).ToDictionary(x => x, x => 95));
+                expected: Enumerable.Repeat(true, 1).Select(x => new KeyValuePair<bool, int>(x, 95)));
 
             yield return WrapArgs(
                 source: Enumerable.Range(0, 20),
@@ -125,7 +141,7 @@ public static IEnumerable<object[]> AggregateBy_TestData()
                 seedSelector: x => 0,
                 func: (x, y) => x + y,
                 comparer: null,
-                expected: Enumerable.Range(0, 5).ToDictionary(x => x, x => 30 + 4 * x));
+                expected: Enumerable.Range(0, 5).Select(x => new KeyValuePair<int, int>(x, 30 + 4 * x)));
 
             yield return WrapArgs(
                 source: Enumerable.Repeat(5, 20),
@@ -133,7 +149,7 @@ public static IEnumerable<object[]> AggregateBy_TestData()
                 seedSelector: x => 0,
                 func: (x, y) => x + y,
                 comparer: null,
-                expected: Enumerable.Repeat(5, 1).ToDictionary(x => x, x => 100));
+                expected: Enumerable.Repeat(5, 1).Select(x => new KeyValuePair<int, int>(x, 100)));
 
             yield return WrapArgs(
                 source: new string[] { "Bob", "bob", "tim", "Bob", "Tim" },
@@ -141,13 +157,13 @@ public static IEnumerable<object[]> AggregateBy_TestData()
                 seedSelector: x => string.Empty,
                 func: (x, y) => x + y,
                 null,
-                expected: new Dictionary<string, string>
-                {
-                    { "Bob", "BobBob" },
-                    { "bob", "bob" },
-                    { "tim", "tim" },
-                    { "Tim", "Tim" },
-                });
+                expected:
+                [
+                    new("Bob", "BobBob"),
+                    new("bob", "bob"),
+                    new("tim", "tim"),
+                    new("Tim", "Tim"),
+                ]);
 
             yield return WrapArgs(
                 source: new string[] { "Bob", "bob", "tim", "Bob", "Tim" },
@@ -155,11 +171,11 @@ public static IEnumerable<object[]> AggregateBy_TestData()
                 seedSelector: x => string.Empty,
                 func: (x, y) => x + y,
                 StringComparer.OrdinalIgnoreCase,
-                expected: new Dictionary<string, string>
-                {
-                    { "Bob", "BobbobBob" },
-                    { "tim", "timTim" }
-                });
+                expected:
+                [
+                    new("Bob", "BobbobBob"),
+                    new("tim", "timTim")
+                ]);
 
             yield return WrapArgs(
                 source: new (string Name, int Age)[] { ("Tom", 20), ("Dick", 30), ("Harry", 40) },
@@ -167,12 +183,12 @@ public static IEnumerable<object[]> AggregateBy_TestData()
                 seedSelector: x => $"I am {x} and my name is ",
                 func: (x, y) => x + y.Name,
                 comparer: null,
-                expected: new Dictionary<int, string>
-                {
-                    { 20, "I am 20 and my name is Tom" },
-                    { 30, "I am 30 and my name is Dick" },
-                    { 40, "I am 40 and my name is Harry" }
-                });
+                expected:
+                [
+                    new(20, "I am 20 and my name is Tom"),
+                    new(30, "I am 30 and my name is Dick"),
+                    new(40, "I am 40 and my name is Harry")
+                ]);
 
             yield return WrapArgs(
                 source: new (string Name, int Age)[] { ("Tom", 20), ("Dick", 20), ("Harry", 40) },
@@ -180,11 +196,11 @@ public static IEnumerable<object[]> AggregateBy_TestData()
                 seedSelector: x => $"I am {x} and my name is",
                 func: (x, y) => $"{x} maybe {y.Name}",
                 comparer: null,
-                expected: new Dictionary<int, string>
-                {
-                    { 20, "I am 20 and my name is maybe Tom maybe Dick" },
-                    { 40, "I am 40 and my name is maybe Harry" }
-                });
+                expected:
+                [
+                    new(20, "I am 20 and my name is maybe Tom maybe Dick"),
+                    new(40, "I am 40 and my name is maybe Harry")
+                ]);
 
             yield return WrapArgs(
                 source: new (string Name, int Age)[] { ("Bob", 20), ("bob", 20), ("Harry", 20) },
@@ -192,7 +208,7 @@ public static IEnumerable<object[]> AggregateBy_TestData()
                 seedSelector: x => 0,
                 func: (x, y) => x + y.Age,
                 comparer: null,
-                expected: new string[] { "Bob", "bob", "Harry" }.ToDictionary(x => x, x => 20));
+                expected: new string[] { "Bob", "bob", "Harry" }.Select(x => new KeyValuePair<string, int>(x, 20)));
 
             yield return WrapArgs(
                 source: new (string Name, int Age)[] { ("Bob", 20), ("bob", 30), ("Harry", 40) },
@@ -200,11 +216,11 @@ public static IEnumerable<object[]> AggregateBy_TestData()
                 seedSelector: x => 0,
                 func: (x, y) => x + y.Age,
                 comparer: StringComparer.OrdinalIgnoreCase,
-                expected: new Dictionary<string, int>
-                {
-                    { "Bob", 50 },
-                    { "Harry", 40 }
-                });
+                expected:
+                [
+                    new("Bob", 50),
+                    new("Harry", 40)
+                ]);
 
             object[] WrapArgs<TSource, TKey, TAccumulate>(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TKey, TAccumulate> seedSelector, Func<TAccumulate, TSource, TAccumulate> func, IEqualityComparer<TKey>? comparer, IEnumerable<KeyValuePair<TKey, TAccumulate>> expected)
                 => new object[] { source, keySelector, seedSelector, func, comparer, expected };
@@ -286,11 +302,9 @@ public void Score()
                 keySelector: entry => entry.id,
                 seed: 0,
                 (totalScore, curr) => totalScore + curr.score)
-                .ToDictionary();
+                .ToArray();
 
-            Assert.Equal(67, scores["0"]);
-            Assert.Equal(15, scores["1"]);
-            Assert.Equal( 4, scores["2"]);
+            Assert.Equal([new("0", 67), new("1", 15), new("2", 4)], scores);
         }
     }
 }
diff --git a/src/libraries/System.Linq/tests/AnyTests.cs b/src/libraries/System.Linq/tests/AnyTests.cs
index 4d1732d3e339..555b5064c815 100644
--- a/src/libraries/System.Linq/tests/AnyTests.cs
+++ b/src/libraries/System.Linq/tests/AnyTests.cs
@@ -102,7 +102,7 @@ public static IEnumerable<object[]> TestDataWithPredicate()
         [MemberData(nameof(TestDataWithPredicate))]
         public void Any_Predicate(IEnumerable<int> source, Func<int, bool> predicate, bool expected)
         {
-            if (predicate == null)
+            if (predicate is null)
             {
                 Assert.Equal(expected, source.Any());
             }
@@ -115,7 +115,7 @@ public void Any_Predicate(IEnumerable<int> source, Func<int, bool> predicate, bo
         [Theory, MemberData(nameof(TestDataWithPredicate))]
         public void AnyRunOnce(IEnumerable<int> source, Func<int, bool> predicate, bool expected)
         {
-            if (predicate == null)
+            if (predicate is null)
             {
                 Assert.Equal(expected, source.RunOnce().Any());
             }
diff --git a/src/libraries/System.Linq/tests/AppendPrependTests.cs b/src/libraries/System.Linq/tests/AppendPrependTests.cs
index 9df154f46ff4..e1ac1ae11c86 100644
--- a/src/libraries/System.Linq/tests/AppendPrependTests.cs
+++ b/src/libraries/System.Linq/tests/AppendPrependTests.cs
@@ -89,7 +89,7 @@ public void ForcedToEnumeratorDoesntEnumeratePrepend()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Prepend(4);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -98,7 +98,7 @@ public void ForcedToEnumeratorDoesntEnumerateAppend()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Append(4);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -107,7 +107,7 @@ public void ForcedToEnumeratorDoesntEnumerateMultipleAppendsAndPrepends()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Append(4).Append(5).Prepend(-1).Prepend(-2);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -263,5 +263,27 @@ public void AppendPrependRunOnce()
             source = NumberRangeGuaranteedNotCollectionType(2, 2).Prepend(1).Prepend(0).Append(4).Append(5).RunOnce();
             Assert.Equal(Enumerable.Range(0, 6), source.ToList());
         }
+
+        [Fact]
+        public void AppendPrepend_First_Last_ElementAt()
+        {
+            Assert.Equal(42, new int[] { 42 }.Append(84).First());
+            Assert.Equal(42, new int[] { 84 }.Prepend(42).First());
+            Assert.Equal(84, new int[] { 42 }.Append(84).Last());
+            Assert.Equal(84, new int[] { 84 }.Prepend(42).Last());
+            Assert.Equal(42, new int[] { 42 }.Append(84).ElementAt(0));
+            Assert.Equal(42, new int[] { 84 }.Prepend(42).ElementAt(0));
+            Assert.Equal(84, new int[] { 42 }.Append(84).ElementAt(1));
+            Assert.Equal(84, new int[] { 84 }.Prepend(42).ElementAt(1));
+
+            Assert.Equal(42, NumberRangeGuaranteedNotCollectionType(42, 1).Append(84).First());
+            Assert.Equal(42, NumberRangeGuaranteedNotCollectionType(84, 1).Prepend(42).First());
+            Assert.Equal(84, NumberRangeGuaranteedNotCollectionType(42, 1).Append(84).Last());
+            Assert.Equal(84, NumberRangeGuaranteedNotCollectionType(84, 1).Prepend(42).Last());
+            Assert.Equal(42, NumberRangeGuaranteedNotCollectionType(42, 1).Append(84).ElementAt(0));
+            Assert.Equal(42, NumberRangeGuaranteedNotCollectionType(84, 1).Prepend(42).ElementAt(0));
+            Assert.Equal(84, NumberRangeGuaranteedNotCollectionType(42, 1).Append(84).ElementAt(1));
+            Assert.Equal(84, NumberRangeGuaranteedNotCollectionType(84, 1).Prepend(42).ElementAt(1));
+        }
     }
 }
diff --git a/src/libraries/System.Linq/tests/CastTests.cs b/src/libraries/System.Linq/tests/CastTests.cs
index 6577d2e72e35..125c2b57f43f 100644
--- a/src/libraries/System.Linq/tests/CastTests.cs
+++ b/src/libraries/System.Linq/tests/CastTests.cs
@@ -1,7 +1,6 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System;
 using System.Collections.Generic;
 using Xunit;
 
@@ -230,10 +229,106 @@ public void NullSource()
         [Fact]
         public void ForcedToEnumeratorDoesntEnumerate()
         {
-            var iterator = new object[0].Where(i => i != null).Cast<string>();
+            var iterator = new object[0].Where(i => i is not null).Cast<string>();
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<string>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
+        }
+
+        [Fact]
+        public void TargetTypeIsSourceType_Nop()
+        {
+            object[] values = new string[] { "hello", "world" };
+            Assert.Same(values, values.Cast<string>());
+        }
+
+        [Fact]
+        public void CastOnMultidimensionalArraySucceeds()
+        {
+            Array array = Array.CreateInstance(typeof(int), 2, 3);
+            for (int i = 0; i < 2; i++)
+            {
+                for (int j = 0; j < 3; j++)
+                {
+                    array.SetValue(i * 3 + j, i, j);
+                }
+            }
+
+            int[] result = array.Cast<int>().ToArray();
+            for (int i = 0; i < 6; i++)
+            {
+                Assert.Equal(i, result[i]);
+            }
+        }
+
+        [Fact]
+        public void CastCountReturnsExpectedLength()
+        {
+            object[] objects = new object[] { "hello", "world" };
+            Assert.Equal(2, objects.Cast<string>().Count());
+        }
+
+        [Fact]
+        public void CastFirstReturnsFirstElement()
+        {
+            object[] objects = new object[] { "hello", "world" };
+            Assert.Equal("hello", objects.Cast<string>().First());
+        }
+
+        [Fact]
+        public void CastFirstOnEmptySequenceThrows()
+        {
+            object[] objects = Array.Empty<object>();
+            Assert.Throws<InvalidOperationException>(() => objects.Cast<string>().First());
+        }
+
+        [Fact]
+        public void CastLastReturnsLastElement()
+        {
+            object[] objects = new object[] { "hello", "world" };
+            Assert.Equal("world", objects.Cast<string>().Last());
+        }
+
+        [Fact]
+        public void CastElementAtReturnsExpectedElement()
+        {
+            object[] objects = new object[] { "hello", "world" };
+            Assert.Equal("world", objects.Cast<string>().ElementAt(1));
+        }
+
+        [Fact]
+        public void CastElementAtOutOfRangeThrows()
+        {
+            object[] objects = new object[] { "hello", "world" };
+            Assert.Throws<ArgumentOutOfRangeException>(() => objects.Cast<string>().ElementAt(2));
+        }
+
+        [Fact]
+        public void CastLastOnEmptySequenceThrows()
+        {
+            object[] objects = Array.Empty<object>();
+            Assert.Throws<InvalidOperationException>(() => objects.Cast<string>().Last());
+        }
+
+        [Fact]
+        public void CastSelectProcessesEachElement()
+        {
+            object[] objects = new object[] { "hello", "world!" };
+            Assert.Equal(new[] { 5, 6 }, objects.Cast<string>().Select(s => s.Length));
+        }
+
+        [Fact]
+        public void CastSkipSkipsElements()
+        {
+            object[] objects = new object[] { "hello", "there", "world" };
+            Assert.Equal(new[] { "world" }, objects.Cast<string>().Skip(2));
+        }
+
+        [Fact]
+        public void CastTakeTakesElements()
+        {
+            object[] objects = new object[] { "hello", "there", "world" };
+            Assert.Equal(new[] { "hello", "there" }, objects.Cast<string>().Take(2));
         }
     }
 }
diff --git a/src/libraries/System.Linq/tests/ChunkTests.cs b/src/libraries/System.Linq/tests/ChunkTests.cs
index ee3486041927..31433ddabff3 100644
--- a/src/libraries/System.Linq/tests/ChunkTests.cs
+++ b/src/libraries/System.Linq/tests/ChunkTests.cs
@@ -7,6 +7,12 @@ namespace System.Linq.Tests
 {
     public class ChunkTests : EnumerableTests
     {
+        [Fact]
+        public void Empty()
+        {
+            Assert.Equal(Enumerable.Empty<int[]>(), Enumerable.Empty<int>().Chunk(4));
+        }
+
         [Fact]
         public void ThrowsOnNullSource()
         {
@@ -32,88 +38,82 @@ public void ChunkSourceLazily()
             Assert.True(chunks.MoveNext());
         }
 
-        private static IEnumerable<T> ConvertToType<T>(T[] array, Type type)
-        {
-            return type switch
-            {
-                {} x when x == typeof(TestReadOnlyCollection<T>) => new TestReadOnlyCollection<T>(array),
-                {} x when x == typeof(TestCollection<T>) => new TestCollection<T>(array),
-                {} x when x == typeof(TestEnumerable<T>) => new TestEnumerable<T>(array),
-                _ => throw new Exception()
-            };
-        }
-
         [Theory]
-        [InlineData(new[] {9999, 0, 888, -1, 66, -777, 1, 2, -12345}, typeof(TestReadOnlyCollection<int>))]
-        [InlineData(new[] {9999, 0, 888, -1, 66, -777, 1, 2, -12345}, typeof(TestCollection<int>))]
-        [InlineData(new[] {9999, 0, 888, -1, 66, -777, 1, 2, -12345}, typeof(TestEnumerable<int>))]
-        public void ChunkSourceRepeatCalls(int[] array, Type type)
+        [InlineData(new[] {9999, 0, 888, -1, 66, -777, 1, 2, -12345})]
+        public void ChunkSourceRepeatCalls(int[] array)
         {
-            IEnumerable<int> source = ConvertToType(array, type);
+            Assert.All(IdentityTransforms<int>(), t =>
+            {
+                IEnumerable<int> source = t(array);
 
-            Assert.Equal(source.Chunk(3), source.Chunk(3));
+                Assert.Equal(source.Chunk(3), source.Chunk(3));
+            });
         }
 
         [Theory]
-        [InlineData(new[] {9999, 0, 888, -1, 66, -777, 1, 2, -12345}, typeof(TestReadOnlyCollection<int>))]
-        [InlineData(new[] {9999, 0, 888, -1, 66, -777, 1, 2, -12345}, typeof(TestCollection<int>))]
-        [InlineData(new[] {9999, 0, 888, -1, 66, -777, 1, 2, -12345}, typeof(TestEnumerable<int>))]
-        public void ChunkSourceEvenly(int[] array, Type type)
+        [InlineData(new[] {9999, 0, 888, -1, 66, -777, 1, 2, -12345})]
+        public void ChunkSourceEvenly(int[] array)
         {
-            IEnumerable<int> source = ConvertToType(array, type);
-
-            using IEnumerator<int[]> chunks = source.Chunk(3).GetEnumerator();
-            chunks.MoveNext();
-            Assert.Equal(new[] {9999, 0, 888}, chunks.Current);
-            chunks.MoveNext();
-            Assert.Equal(new[] {-1, 66, -777}, chunks.Current);
-            chunks.MoveNext();
-            Assert.Equal(new[] {1, 2, -12345}, chunks.Current);
-            Assert.False(chunks.MoveNext());
+            Assert.All(IdentityTransforms<int>(), t =>
+            {
+                IEnumerable<int> source = t(array);
+
+                using IEnumerator<int[]> chunks = source.Chunk(3).GetEnumerator();
+                chunks.MoveNext();
+                Assert.Equal(new[] { 9999, 0, 888 }, chunks.Current);
+                chunks.MoveNext();
+                Assert.Equal(new[] { -1, 66, -777 }, chunks.Current);
+                chunks.MoveNext();
+                Assert.Equal(new[] { 1, 2, -12345 }, chunks.Current);
+                Assert.False(chunks.MoveNext());
+            });
         }
 
         [Theory]
-        [InlineData(new[] {9999, 0, 888, -1, 66, -777, 1, 2}, typeof(TestReadOnlyCollection<int>))]
-        [InlineData(new[] {9999, 0, 888, -1, 66, -777, 1, 2}, typeof(TestCollection<int>))]
-        [InlineData(new[] {9999, 0, 888, -1, 66, -777, 1, 2}, typeof(TestEnumerable<int>))]
-        public void ChunkSourceUnevenly(int[] array, Type type)
+        [InlineData(new[] {9999, 0, 888, -1, 66, -777, 1, 2})]
+        public void ChunkSourceUnevenly(int[] array)
         {
-            IEnumerable<int> source = ConvertToType(array, type);
-
-            using IEnumerator<int[]> chunks = source.Chunk(3).GetEnumerator();
-            chunks.MoveNext();
-            Assert.Equal(new[] {9999, 0, 888}, chunks.Current);
-            chunks.MoveNext();
-            Assert.Equal(new[] {-1, 66, -777}, chunks.Current);
-            chunks.MoveNext();
-            Assert.Equal(new[] {1, 2}, chunks.Current);
-            Assert.False(chunks.MoveNext());
+            Assert.All(IdentityTransforms<int>(), t =>
+            {
+                IEnumerable<int> source = t(array);
+
+                using IEnumerator<int[]> chunks = source.Chunk(3).GetEnumerator();
+                chunks.MoveNext();
+                Assert.Equal(new[] { 9999, 0, 888 }, chunks.Current);
+                chunks.MoveNext();
+                Assert.Equal(new[] { -1, 66, -777 }, chunks.Current);
+                chunks.MoveNext();
+                Assert.Equal(new[] { 1, 2 }, chunks.Current);
+                Assert.False(chunks.MoveNext());
+            });
         }
 
         [Theory]
-        [InlineData(new[] {9999, 0}, typeof(TestReadOnlyCollection<int>))]
-        [InlineData(new[] {9999, 0}, typeof(TestCollection<int>))]
-        [InlineData(new[] {9999, 0}, typeof(TestEnumerable<int>))]
-        public void ChunkSourceSmallerThanMaxSize(int[] array, Type type)
+        [InlineData(new[] {9999, 0})]
+        public void ChunkSourceSmallerThanMaxSize(int[] array)
         {
-            IEnumerable<int> source = ConvertToType(array, type);
+            Assert.All(IdentityTransforms<int>(), t =>
+            {
+                IEnumerable<int> source = t(array);
 
-            using IEnumerator<int[]> chunks = source.Chunk(3).GetEnumerator();
-            chunks.MoveNext();
-            Assert.Equal(new[] {9999, 0}, chunks.Current);
-            Assert.False(chunks.MoveNext());
+                using IEnumerator<int[]> chunks = source.Chunk(3).GetEnumerator();
+                chunks.MoveNext();
+                Assert.Equal(new[] { 9999, 0 }, chunks.Current);
+                Assert.False(chunks.MoveNext());
+            });
         }
 
         [Theory]
-        [InlineData(new int[] {}, typeof(TestReadOnlyCollection<int>))]
-        [InlineData(new int[] {}, typeof(TestCollection<int>))]
-        [InlineData(new int[] {}, typeof(TestEnumerable<int>))]
-        public void EmptySourceYieldsNoChunks(int[] array, Type type)
+        [InlineData(new int[0])]
+        public void EmptySourceYieldsNoChunks(int[] array)
         {
-            IEnumerable<int> source = ConvertToType(array, type);
+            Assert.All(IdentityTransforms<int>(), t =>
+            {
+                IEnumerable<int> source = t(array);
 
-            using IEnumerator<int[]> chunks = source.Chunk(3).GetEnumerator();
-            Assert.False(chunks.MoveNext());
+                using IEnumerator<int[]> chunks = source.Chunk(3).GetEnumerator();
+                Assert.False(chunks.MoveNext());
+            });
         }
 
         [Fact]
diff --git a/src/libraries/System.Linq/tests/ConcatTests.cs b/src/libraries/System.Linq/tests/ConcatTests.cs
index 6209d846ff24..357da40f2ea6 100644
--- a/src/libraries/System.Linq/tests/ConcatTests.cs
+++ b/src/libraries/System.Linq/tests/ConcatTests.cs
@@ -51,7 +51,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Concat(Enumerable.Range(0, 3));
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -83,6 +83,55 @@ public void VerifyEquals(IEnumerable<int> expected, IEnumerable<int> actual)
             VerifyEqualsWorker(expected, actual);
         }
 
+        [Theory]
+        [MemberData(nameof(ArraySourcesData))]
+        [MemberData(nameof(SelectArraySourcesData))]
+        [MemberData(nameof(EnumerableSourcesData))]
+        [MemberData(nameof(NonCollectionSourcesData))]
+        [MemberData(nameof(ListSourcesData))]
+        [MemberData(nameof(ConcatOfConcatsData))]
+        [MemberData(nameof(ConcatWithSelfData))]
+        [MemberData(nameof(ChainedCollectionConcatData))]
+        [MemberData(nameof(AppendedPrependedConcatAlternationsData))]
+        public void First_Last_ElementAt(IEnumerable<int> _, IEnumerable<int> actual)
+        {
+            int count = actual.Count();
+            if (count == 0)
+            {
+                Assert.Throws<InvalidOperationException>(() => actual.First());
+                Assert.Throws<InvalidOperationException>(() => actual.Last());
+                Assert.Throws<ArgumentOutOfRangeException>(() => actual.ElementAt(0));
+            }
+            else
+            {
+                int first = actual.First();
+                int last = actual.Last();
+                int elementAt = actual.ElementAt(count / 2);
+
+                int enumeratedFirst = 0, enumeratedLast = 0, enumeratedElementAt = 0;
+                int i = 0;
+                foreach (int item in actual)
+                {
+                    if (i == 0)
+                    {
+                        enumeratedFirst = item;
+                    }
+
+                    if (i == count / 2)
+                    {
+                        enumeratedElementAt = item;
+                    }
+
+                    enumeratedLast = item;
+                    i++;
+                }
+
+                Assert.Equal(enumeratedFirst, first);
+                Assert.Equal(enumeratedLast, last);
+                Assert.Equal(enumeratedElementAt, elementAt);
+            }
+        }
+
         private static void VerifyEqualsWorker<T>(IEnumerable<T> expected, IEnumerable<T> actual)
         {
             // Returns a list of functions that, when applied to enumerable, should return
diff --git a/src/libraries/System.Linq/tests/ConsistencyTests.cs b/src/libraries/System.Linq/tests/ConsistencyTests.cs
index b3c75affa6a9..746efa614394 100644
--- a/src/libraries/System.Linq/tests/ConsistencyTests.cs
+++ b/src/libraries/System.Linq/tests/ConsistencyTests.cs
@@ -18,7 +18,7 @@ public static void MatchSequencePattern()
         {
             MethodInfo enumerableNotInQueryable = GetMissingExtensionMethod(typeof(Enumerable), typeof(Queryable), GetExcludedMethods());
 
-            Assert.True(enumerableNotInQueryable == null, string.Format("Enumerable method {0} not defined by Queryable", enumerableNotInQueryable));
+            Assert.True(enumerableNotInQueryable is null, string.Format("Enumerable method {0} not defined by Queryable", enumerableNotInQueryable));
 
             MethodInfo queryableNotInEnumerable = GetMissingExtensionMethod(
                 typeof(Queryable),
@@ -28,7 +28,7 @@ public static void MatchSequencePattern()
                  }
                 );
 
-            Assert.True(queryableNotInEnumerable == null, string.Format("Queryable method {0} not defined by Enumerable", queryableNotInEnumerable));
+            Assert.True(queryableNotInEnumerable is null, string.Format("Queryable method {0} not defined by Enumerable", queryableNotInEnumerable));
         }
 
         // If a change to Enumerable has required a change to the exception list in this test
diff --git a/src/libraries/System.Linq/tests/ContainsTests.cs b/src/libraries/System.Linq/tests/ContainsTests.cs
index 9151284c0a13..eea9c04ac508 100644
--- a/src/libraries/System.Linq/tests/ContainsTests.cs
+++ b/src/libraries/System.Linq/tests/ContainsTests.cs
@@ -71,7 +71,7 @@ public static IEnumerable<object[]> String_TestData()
         [MemberData(nameof(String_TestData))]
         public void String(IEnumerable<string> source, IEqualityComparer<string> comparer, string value, bool expected)
         {
-            if (comparer == null)
+            if (comparer is null)
             {
                 Assert.Equal(expected, source.Contains(value));
             }
@@ -81,7 +81,7 @@ public void String(IEnumerable<string> source, IEqualityComparer<string> compare
         [Theory, MemberData(nameof(String_TestData))]
         public void StringRunOnce(IEnumerable<string> source, IEqualityComparer<string> comparer, string value, bool expected)
         {
-            if (comparer == null)
+            if (comparer is null)
             {
                 Assert.Equal(expected, source.RunOnce().Contains(value));
             }
diff --git a/src/libraries/System.Linq/tests/CountByTests.cs b/src/libraries/System.Linq/tests/CountByTests.cs
index cc2a4a5f3070..258a067ece50 100644
--- a/src/libraries/System.Linq/tests/CountByTests.cs
+++ b/src/libraries/System.Linq/tests/CountByTests.cs
@@ -85,79 +85,79 @@ public static IEnumerable<object[]> CountBy_TestData()
                 source: Enumerable.Range(0, 10),
                 keySelector: x => x,
                 comparer: null,
-                expected: Enumerable.Range(0, 10).ToDictionary(x => x, x => 1));
+                expected: Enumerable.Range(0, 10).Select(x => new KeyValuePair<int, int>(x, 1)));
 
             yield return WrapArgs(
                 source: Enumerable.Range(5, 10),
                 keySelector: x => true,
                 comparer: null,
-                expected: Enumerable.Repeat(true, 1).ToDictionary(x => x, x => 10));
+                expected: Enumerable.Repeat(true, 1).Select(x => new KeyValuePair<bool, int>(x, 10)));
 
             yield return WrapArgs(
                 source: Enumerable.Range(0, 20),
                 keySelector: x => x % 5,
                 comparer: null,
-                expected: Enumerable.Range(0, 5).ToDictionary(x => x, x => 4));
+                expected: Enumerable.Range(0, 5).Select(x => new KeyValuePair<int, int>(x, 4)));
 
             yield return WrapArgs(
                 source: Enumerable.Repeat(5, 20),
                 keySelector: x => x,
                 comparer: null,
-                expected: Enumerable.Repeat(5, 1).ToDictionary(x => x, x => 20));
+                expected: Enumerable.Repeat(5, 1).Select(x => new KeyValuePair<int, int>(x, 20)));
 
             yield return WrapArgs(
                 source: new string[] { "Bob", "bob", "tim", "Bob", "Tim" },
                 keySelector: x => x,
                 null,
-                expected: new Dictionary<string, int>()
-                {
-                    { "Bob", 2 },
-                    { "bob", 1 },
-                    { "tim", 1 },
-                    { "Tim", 1 }
-                });
+                expected:
+                [
+                    new("Bob", 2),
+                    new("bob", 1),
+                    new("tim", 1),
+                    new("Tim", 1)
+                ]);
 
             yield return WrapArgs(
                 source: new string[] { "Bob", "bob", "tim", "Bob", "Tim" },
                 keySelector: x => x,
                 StringComparer.OrdinalIgnoreCase,
-                expected: new Dictionary<string, int>()
-                {
-                    { "Bob", 3 },
-                    { "tim", 2 }
-                });
+                expected:
+                [
+                    new("Bob", 3),
+                    new("tim", 2)
+                ]);
 
             yield return WrapArgs(
                 source: new (string Name, int Age)[] { ("Tom", 20), ("Dick", 30), ("Harry", 40) },
                 keySelector: x => x.Age,
                 comparer: null,
-                expected: new int[] { 20, 30, 40 }.ToDictionary(x => x, x => 1));
+                expected: new int[] { 20, 30, 40 }.Select(x => new KeyValuePair<int, int>(x, 1)));
 
             yield return WrapArgs(
                 source: new (string Name, int Age)[] { ("Tom", 20), ("Dick", 20), ("Harry", 40) },
                 keySelector: x => x.Age,
                 comparer: null,
-                expected: new Dictionary<int, int>()
-                {
-                    { 20, 2 },
-                    { 40, 1 }
-                });
+                expected:
+                [
+                    new(20, 2),
+                    new(40, 1)
+                ]);
 
             yield return WrapArgs(
                 source: new (string Name, int Age)[] { ("Bob", 20), ("bob", 30), ("Harry", 40) },
                 keySelector: x => x.Name,
                 comparer: null,
-                expected: new string[] { "Bob", "bob", "Harry" }.ToDictionary(x => x, x => 1));
+                expected: new string[] { "Bob", "bob", "Harry" }.Select(x => new KeyValuePair<string, int>(x, 1)));
 
             yield return WrapArgs(
                 source: new (string Name, int Age)[] { ("Bob", 20), ("bob", 30), ("Harry", 40) },
                 keySelector: x => x.Name,
                 comparer: StringComparer.OrdinalIgnoreCase,
-                expected: new Dictionary<string, int>()
-                {
-                    { "Bob", 2 },
-                    { "Harry", 1 }
-                });
+                expected:
+                [
+                    new("Bob", 2),
+                    new("Harry", 1)
+                ]);
 
             object[] WrapArgs<TSource, TKey>(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IEqualityComparer<TKey>? comparer, IEnumerable<KeyValuePair<TKey, int>> expected)
                 => new object[] { source, keySelector, comparer, expected };
diff --git a/src/libraries/System.Linq/tests/CountTests.cs b/src/libraries/System.Linq/tests/CountTests.cs
index 2a93644c87e3..88404efb782f 100644
--- a/src/libraries/System.Linq/tests/CountTests.cs
+++ b/src/libraries/System.Linq/tests/CountTests.cs
@@ -48,7 +48,7 @@ public static IEnumerable<object[]> Int_TestData()
         [MemberData(nameof(Int_TestData))]
         public void Int(IEnumerable<int> source, Func<int, bool> predicate, int expected)
         {
-            if (predicate == null)
+            if (predicate is null)
             {
                 Assert.Equal(expected, source.Count());
             }
@@ -61,7 +61,7 @@ public void Int(IEnumerable<int> source, Func<int, bool> predicate, int expected
         [Theory, MemberData(nameof(Int_TestData))]
         public void IntRunOnce(IEnumerable<int> source, Func<int, bool> predicate, int expected)
         {
-            if (predicate == null)
+            if (predicate is null)
             {
                 Assert.Equal(expected, source.RunOnce().Count());
             }
diff --git a/src/libraries/System.Linq/tests/DefaultIfEmptyTests.cs b/src/libraries/System.Linq/tests/DefaultIfEmptyTests.cs
index b4e9e13f8042..3dadd6d67937 100644
--- a/src/libraries/System.Linq/tests/DefaultIfEmptyTests.cs
+++ b/src/libraries/System.Linq/tests/DefaultIfEmptyTests.cs
@@ -103,7 +103,26 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).DefaultIfEmpty();
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
+        }
+
+        [Fact]
+        public void First_Last_ElementAt()
+        {
+            IEnumerable<int> nonEmpty = Enumerable.Range(1, 3);
+            Assert.Equal(1, nonEmpty.First());
+            Assert.Equal(3, nonEmpty.Last());
+            Assert.Equal(1, nonEmpty.ElementAt(0));
+            Assert.Equal(2, nonEmpty.ElementAt(1));
+            Assert.Equal(3, nonEmpty.ElementAt(2));
+            Assert.Throws<ArgumentOutOfRangeException>(() => nonEmpty.ElementAt(-1));
+            Assert.Throws<ArgumentOutOfRangeException>(() => nonEmpty.ElementAt(4));
+
+            IEnumerable<int> empty = Enumerable.Empty<int>();
+            Assert.Equal(42, empty.DefaultIfEmpty(42).First());
+            Assert.Equal(42, empty.DefaultIfEmpty(42).Last());
+            Assert.Equal(42, empty.DefaultIfEmpty(42).ElementAt(0));
+            Assert.Throws<ArgumentOutOfRangeException>(() => empty.DefaultIfEmpty(42).ElementAt(1));
         }
     }
 }
diff --git a/src/libraries/System.Linq/tests/DistinctTests.cs b/src/libraries/System.Linq/tests/DistinctTests.cs
index 7408e96ddb38..24ba295f3d47 100644
--- a/src/libraries/System.Linq/tests/DistinctTests.cs
+++ b/src/libraries/System.Linq/tests/DistinctTests.cs
@@ -230,7 +230,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Distinct();
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -303,6 +303,12 @@ public static void DistinctBy_RunOnce_HasExpectedOutput<TSource, TKey>(IEnumerab
 
         public static IEnumerable<object[]> DistinctBy_TestData()
         {
+            yield return WrapArgs(
+                source: Array.Empty<int>(),
+                keySelector: x => x,
+                comparer: null,
+                expected: Enumerable.Empty<int>());
+
             yield return WrapArgs(
                 source: Enumerable.Range(0, 10),
                 keySelector: x => x,
diff --git a/src/libraries/System.Linq/tests/EnumerableTests.cs b/src/libraries/System.Linq/tests/EnumerableTests.cs
index 16869e2654a0..f647d037eb6d 100644
--- a/src/libraries/System.Linq/tests/EnumerableTests.cs
+++ b/src/libraries/System.Linq/tests/EnumerableTests.cs
@@ -87,7 +87,7 @@ protected class AnagramEqualityComparer : IEqualityComparer<string>
             public bool Equals(string x, string y)
             {
                 if (ReferenceEquals(x, y)) return true;
-                if (x == null | y == null) return false;
+                if (x is null | y is null) return false;
                 int length = x.Length;
                 if (length != y.Length) return false;
                 using (var en = x.OrderBy(i => i).GetEnumerator())
@@ -103,7 +103,7 @@ public bool Equals(string x, string y)
 
             public int GetHashCode(string obj)
             {
-                if (obj == null) return 0;
+                if (obj is null) return 0;
                 int hash = obj.Length;
                 foreach (char c in obj)
                     hash ^= c;
diff --git a/src/libraries/System.Linq/tests/ExceptTests.cs b/src/libraries/System.Linq/tests/ExceptTests.cs
index 36976b7cc57a..eeba02e81da4 100644
--- a/src/libraries/System.Linq/tests/ExceptTests.cs
+++ b/src/libraries/System.Linq/tests/ExceptTests.cs
@@ -42,7 +42,7 @@ public static IEnumerable<object[]> Int_TestData()
         [MemberData(nameof(Int_TestData))]
         public void Int(IEnumerable<int> first, IEnumerable<int> second, IEqualityComparer<int> comparer, IEnumerable<int> expected)
         {
-            if (comparer == null)
+            if (comparer is null)
             {
                 Assert.Equal(expected, first.Except(second));
             }
@@ -64,7 +64,7 @@ public static IEnumerable<object[]> String_TestData()
         [MemberData(nameof(String_TestData))]
         public void String(IEnumerable<string> first, IEnumerable<string> second, IEqualityComparer<string> comparer, IEnumerable<string> expected)
         {
-            if (comparer == null)
+            if (comparer is null)
             {
                 Assert.Equal(expected, first.Except(second));
             }
@@ -119,7 +119,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Except(Enumerable.Range(0, 3));
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
diff --git a/src/libraries/System.Linq/tests/GroupByTests.cs b/src/libraries/System.Linq/tests/GroupByTests.cs
index 4b8967a28a82..a1567cc4eced 100644
--- a/src/libraries/System.Linq/tests/GroupByTests.cs
+++ b/src/libraries/System.Linq/tests/GroupByTests.cs
@@ -2,7 +2,6 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
-using System.Diagnostics;
 using System.Reflection;
 using Xunit;
 
@@ -17,7 +16,7 @@ private static void AssertGroupingCorrect<TKey, TElement>(IEnumerable<TKey> keys
 
         private static void AssertGroupingCorrect<TKey, TElement>(IEnumerable<TKey> keys, IEnumerable<TElement> elements, IEnumerable<IGrouping<TKey, TElement>> grouping, IEqualityComparer<TKey> keyComparer)
         {
-            if (grouping == null)
+            if (grouping is null)
             {
                 Assert.Null(elements);
                 Assert.Null(keys);
@@ -38,7 +37,7 @@ private static void AssertGroupingCorrect<TKey, TElement>(IEnumerable<TKey> keys
 
                     TKey key = keyEn.Current;
 
-                    if (key == null)
+                    if (key is null)
                     {
                         groupingForNullKeys.Add(elEn.Current);
                     }
@@ -58,7 +57,7 @@ private static void AssertGroupingCorrect<TKey, TElement>(IEnumerable<TKey> keys
                 TKey key = group.Key;
                 List<TElement> list;
 
-                if (key == null)
+                if (key is null)
                 {
                     Assert.Equal(groupingForNullKeys, group);
                     groupingForNullKeys.Clear();
@@ -864,5 +863,56 @@ public static void GroupingKeyIsPublic()
             PropertyInfo key = grouptype.GetProperty("Key", BindingFlags.Instance | BindingFlags.Public);
             Assert.NotNull(key);
         }
+
+        [Fact]
+        public void MultipleIterationsOfSameEnumerable()
+        {
+            foreach (IEnumerable<IGrouping<int, int>> e1 in new[] { Enumerable.Range(0, 10).GroupBy(i => i), Enumerable.Range(0, 10).GroupBy(i => i, i => i) })
+            {
+                for (int trial = 0; trial < 3; trial++)
+                {
+                    int count = 0;
+                    foreach (IGrouping<int, int> g in e1) count++;
+                    Assert.Equal(10, count);
+                }
+            }
+
+            foreach (IEnumerable<int> e2 in new[] { Enumerable.Range(0, 10).GroupBy(i => i, (i, e) => i), Enumerable.Range(0, 10).GroupBy(i => i, i => i, (i, e) => i) })
+            {
+                for (int trial = 0; trial < 3; trial++)
+                {
+                    int count = 0;
+                    foreach (int i in e2) count++;
+                    Assert.Equal(10, count);
+                }
+            }
+        }
+
+        [Fact]
+        public void EnumerateGrouping()
+        {
+            IGrouping<string, int> g = Enumerable.Range(0, 42).GroupBy(i => "onegroup").First();
+            Assert.Equal("onegroup", g.Key);
+            Assert.Equal(42, g.Count());
+
+            using IEnumerator<int> e = g.GetEnumerator();
+
+            var values = new HashSet<int>();
+
+            for (int trial = 0; trial < 3; trial++)
+            {
+                values.Clear();
+
+                while (e.MoveNext())
+                {
+                    Assert.True(values.Add(e.Current));
+                }
+
+                Assert.Equal(42, values.Count);
+                Assert.Equal(Enumerable.Range(0, 42), values.Order());
+
+                e.Reset();
+            }
+        }
     }
 }
diff --git a/src/libraries/System.Linq/tests/GroupJoinTests.cs b/src/libraries/System.Linq/tests/GroupJoinTests.cs
index b800eb928ddd..a6fc58ed8d38 100644
--- a/src/libraries/System.Linq/tests/GroupJoinTests.cs
+++ b/src/libraries/System.Linq/tests/GroupJoinTests.cs
@@ -44,24 +44,24 @@ public override int GetHashCode()
             public bool Equals(JoinRec other)
             {
                 if (!string.Equals(name, other.name)) return false;
-                if (orderID == null)
+                if (orderID is null)
                 {
-                    if (other.orderID != null) return false;
+                    if (other.orderID is not null) return false;
                 }
                 else
                 {
-                    if (other.orderID == null) return false;
+                    if (other.orderID is null) return false;
                     if (orderID.Length != other.orderID.Length) return false;
                     for (int i = 0; i != other.orderID.Length; ++i)
                         if (orderID[i] != other.orderID[i]) return false;
                 }
-                if (total == null)
+                if (total is null)
                 {
-                    if (other.total != null) return false;
+                    if (other.total is not null) return false;
                 }
                 else
                 {
-                    if (other.total == null) return false;
+                    if (other.total is null) return false;
                     if (total.Length != other.total.Length) return false;
                     for (int i = 0; i != other.total.Length; ++i)
                         if (total[i] != other.total[i]) return false;
@@ -511,7 +511,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).GroupJoin(Enumerable.Empty<int>(), i => i, i => i, (o, i) => i);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<IEnumerable<int>>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
     }
 }
diff --git a/src/libraries/System.Linq/tests/IndexTests.cs b/src/libraries/System.Linq/tests/IndexTests.cs
index 4b08820fe0e3..0742569f787d 100644
--- a/src/libraries/System.Linq/tests/IndexTests.cs
+++ b/src/libraries/System.Linq/tests/IndexTests.cs
@@ -8,6 +8,12 @@ namespace System.Linq.Tests
 {
     public class IndexTests : EnumerableTests
     {
+        [Fact]
+        public void Empty()
+        {
+            Assert.Empty(Enumerable.Empty<int>().Index());
+        }
+
         [Fact]
         public void Index_SourceIsNull_ArgumentNullExceptionThrown()
         {
diff --git a/src/libraries/System.Linq/tests/IntersectTests.cs b/src/libraries/System.Linq/tests/IntersectTests.cs
index 7b1d5fdfbd87..9d1011a93b59 100644
--- a/src/libraries/System.Linq/tests/IntersectTests.cs
+++ b/src/libraries/System.Linq/tests/IntersectTests.cs
@@ -61,7 +61,7 @@ public static IEnumerable<object[]> String_TestData()
         [MemberData(nameof(String_TestData))]
         public void String(IEnumerable<string> first, IEnumerable<string> second, IEqualityComparer<string> comparer, string[] expected)
         {
-            if (comparer == null)
+            if (comparer is null)
             {
                 Assert.Equal(expected, first.Intersect(second));
             }
@@ -116,7 +116,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Intersect(Enumerable.Range(0, 3));
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
diff --git a/src/libraries/System.Linq/tests/JoinTests.cs b/src/libraries/System.Linq/tests/JoinTests.cs
index 05ef7059eb1a..496f49a6fcb5 100644
--- a/src/libraries/System.Linq/tests/JoinTests.cs
+++ b/src/libraries/System.Linq/tests/JoinTests.cs
@@ -415,7 +415,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Join(Enumerable.Empty<int>(), i => i, i => i, (o, i) => i);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
     }
 }
diff --git a/src/libraries/System.Linq/tests/LongCountTests.cs b/src/libraries/System.Linq/tests/LongCountTests.cs
index 4c7eb8e323c2..308ad764674b 100644
--- a/src/libraries/System.Linq/tests/LongCountTests.cs
+++ b/src/libraries/System.Linq/tests/LongCountTests.cs
@@ -45,7 +45,7 @@ public static IEnumerable<object[]> LongCount_TestData()
         [MemberData(nameof(LongCount_TestData))]
         public static void LongCount(IEnumerable<int> source, Func<int, bool> predicate, long expected)
         {
-            if (predicate == null)
+            if (predicate is null)
             {
                 Assert.Equal(expected, source.LongCount());
             }
@@ -59,7 +59,7 @@ public static void LongCount(IEnumerable<int> source, Func<int, bool> predicate,
         [MemberData(nameof(LongCount_TestData))]
         public static void LongCountRunOnce(IEnumerable<int> source, Func<int, bool> predicate, long expected)
         {
-            if (predicate == null)
+            if (predicate is null)
             {
                 Assert.Equal(expected, source.RunOnce().LongCount());
             }
diff --git a/src/libraries/System.Linq/tests/MaxTests.cs b/src/libraries/System.Linq/tests/MaxTests.cs
index a1509855091d..bb70a14d684c 100644
--- a/src/libraries/System.Linq/tests/MaxTests.cs
+++ b/src/libraries/System.Linq/tests/MaxTests.cs
@@ -251,6 +251,8 @@ public void Max_Float_EmptySource_ThrowsInvalidOperationException()
         {
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<float>().Max());
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<float>().Max(x => x));
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<float>()).Max());
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<float>()).Max(x => x));
             Assert.Throws<InvalidOperationException>(() => Array.Empty<float>().Max());
             Assert.Throws<InvalidOperationException>(() => new List<float>().Max());
         }
@@ -331,6 +333,8 @@ public void Max_Double_EmptySource_ThrowsInvalidOperationException()
         {
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<double>().Max());
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<double>().Max(x => x));
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<double>()).Max());
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<double>()).Max(x => x));
             Assert.Throws<InvalidOperationException>(() => Array.Empty<double>().Max());
             Assert.Throws<InvalidOperationException>(() => new List<double>().Max());
         }
@@ -397,6 +401,8 @@ public void Max_Decimal_EmptySource_ThrowsInvalidOperationException()
         {
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<decimal>().Max());
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<decimal>().Max(x => x));
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<decimal>()).Max());
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<decimal>()).Max(x => x));
             Assert.Throws<InvalidOperationException>(() => Array.Empty<decimal>().Max());
             Assert.Throws<InvalidOperationException>(() => new List<decimal>().Max(x => x));
         }
@@ -622,6 +628,8 @@ public void Max_DateTime_EmptySource_ThrowsInvalidOperationException()
         {
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<DateTime>().Max());
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<DateTime>().Max(i => i));
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<DateTime>()).Max());
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<DateTime>()).Max(i => i));
         }
 
         public static IEnumerable<object[]> Max_String_TestData()
@@ -888,6 +896,7 @@ public void Max_String_WithSelectorAccessingProperty()
         public void Max_Boolean_EmptySource_ThrowsInvalidOperationException()
         {
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<bool>().Max());
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<bool>()).Max());
         }
 
         [Fact]
diff --git a/src/libraries/System.Linq/tests/MinTests.cs b/src/libraries/System.Linq/tests/MinTests.cs
index feca6994d066..0cc72fa43a10 100644
--- a/src/libraries/System.Linq/tests/MinTests.cs
+++ b/src/libraries/System.Linq/tests/MinTests.cs
@@ -136,6 +136,8 @@ public void Min_Int_EmptySource_ThrowsInvalidOperationException()
         {
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<int>().Min());
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<int>().Min(x => x));
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<int>()).Min());
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<int>()).Min(x => x));
             Assert.Throws<InvalidOperationException>(() => Array.Empty<int>().Min());
             Assert.Throws<InvalidOperationException>(() => new List<int>().Min());
         }
@@ -182,6 +184,8 @@ public void Min_Long_EmptySource_ThrowsInvalidOperationException()
         {
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<long>().Min());
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<long>().Min(x => x));
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<long>()).Min());
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<long>()).Min(x => x));
             Assert.Throws<InvalidOperationException>(() => Array.Empty<long>().Min());
             Assert.Throws<InvalidOperationException>(() => new List<long>().Min());
         }
@@ -250,6 +254,8 @@ public void Min_Float_EmptySource_ThrowsInvalidOperationException()
         {
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<float>().Min());
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<float>().Min(x => x));
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<float>()).Min());
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<float>()).Min(x => x));
             Assert.Throws<InvalidOperationException>(() => Array.Empty<float>().Min());
             Assert.Throws<InvalidOperationException>(() => new List<float>().Min());
         }
@@ -316,6 +322,8 @@ public void Min_Double_EmptySource_ThrowsInvalidOperationException()
         {
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<double>().Min());
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<double>().Min(x => x));
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<double>()).Min());
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<double>()).Min(x => x));
             Assert.Throws<InvalidOperationException>(() => Array.Empty<double>().Min());
             Assert.Throws<InvalidOperationException>(() => new List<double>().Min());
         }
@@ -355,6 +363,8 @@ public void Min_Decimal_EmptySource_ThrowsInvalidOperationException()
         {
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<decimal>().Min());
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<decimal>().Min(x => x));
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<decimal>()).Min());
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<decimal>()).Min(x => x));
             Assert.Throws<InvalidOperationException>(() => Array.Empty<decimal>().Min());
             Assert.Throws<InvalidOperationException>(() => new List<decimal>().Min());
         }
@@ -595,6 +605,8 @@ public void Min_DateTime_EmptySource_ThrowsInvalidOperationException()
         {
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<DateTime>().Min());
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<DateTime>().Min(x => x));
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<DateTime>()).Min());
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<DateTime>()).Min(x => x));
             Assert.Throws<InvalidOperationException>(() => Array.Empty<DateTime>().Min());
             Assert.Throws<InvalidOperationException>(() => new List<DateTime>().Min());
         }
@@ -858,6 +870,7 @@ public void Min_String_NullSelector_ThrowsArgumentNullException()
         public void Min_Bool_EmptySource_ThrowsInvalodOperationException()
         {
             Assert.Throws<InvalidOperationException>(() => Enumerable.Empty<bool>().Min());
+            Assert.Throws<InvalidOperationException>(() => ForceNotCollection(Enumerable.Empty<bool>()).Min());
         }
 
         [Fact]
diff --git a/src/libraries/System.Linq/tests/OfTypeTests.cs b/src/libraries/System.Linq/tests/OfTypeTests.cs
index 3111d6a79c50..3f1c7eb51da2 100644
--- a/src/libraries/System.Linq/tests/OfTypeTests.cs
+++ b/src/libraries/System.Linq/tests/OfTypeTests.cs
@@ -1,7 +1,6 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System;
 using System.Collections.Generic;
 using Xunit;
 
@@ -131,7 +130,103 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).OfType<int>();
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
+        }
+
+        [Fact]
+        public void ValueType_ReturnsOriginal()
+        {
+            IEnumerable<int> e = Enumerable.Range(0, 10);
+            Assert.Same(e, e.OfType<int>());
+        }
+
+        [Fact]
+        public void NullableValueType_ReturnsNewEnumerable()
+        {
+            IEnumerable<int?> e = Enumerable.Range(0, 10).Select(i => (int?)i);
+            Assert.NotSame(e, e.OfType<int>());
+            Assert.NotSame(e, e.OfType<int?>());
+        }
+
+        [Fact]
+        public void ReferenceType_ReturnsNewEnumerable()
+        {
+            IEnumerable<object> e = Enumerable.Range(0, 10).Select(i => (object)i);
+            Assert.NotSame(e, e.OfType<int>());
+            Assert.NotSame(e, e.OfType<int?>());
+            Assert.NotSame(e, e.OfType<object>());
+            Assert.NotSame(e, e.OfType<object?>());
+        }
+
+        [Fact]
+        public void ToArray()
+        {
+            IEnumerable<object> source = new object[] { 1, 2, 3, 4, 5 };
+            Assert.Equal(new int[] { 1, 2, 3, 4, 5 }, source.OfType<int>().ToArray());
+            Assert.Empty(source.OfType<double>().ToArray());
+        }
+
+        [Fact]
+        public void ToList()
+        {
+            IEnumerable<object> source = new object[] { 1, 2, 3, 4, 5 };
+            Assert.Equal(new int[] { 1, 2, 3, 4, 5 }, source.OfType<int>().ToList());
+            Assert.Empty(source.OfType<double>().ToList());
+        }
+
+        [Fact]
+        public void Count()
+        {
+            Assert.Equal(0, new object[] { }.OfType<string>().Count());
+            Assert.Equal(1, new object[] { "abc" }.OfType<string>().Count());
+            Assert.Equal(2, new object[] { "abc", "def" }.OfType<string>().Count());
+            Assert.Equal(2, new object[] { "abc", 42, "def" }.OfType<string>().Count());
+            Assert.Equal(2, new object[] { "abc", 42, null, "def" }.OfType<string>().Count());
+            Assert.Equal(3, new object[] { null, new object(), null, new object(), new object(), null }.OfType<object>().Count());
+
+            Assert.False(new object[] { "abc" }.OfType<string>().TryGetNonEnumeratedCount(out _));
+            Assert.False(new object[] { "abc" }.OfType<int>().TryGetNonEnumeratedCount(out _));
+            Assert.False(new int[] { 42 }.OfType<object>().TryGetNonEnumeratedCount(out _));
+        }
+
+        [Fact]
+        public void First_Last_ElementAt()
+        {
+            IEnumerable<object> source = new object[] { 1, 2, 3, 4, 5 };
+
+            Assert.Equal(1, source.OfType<int>().First());
+            Assert.Equal(0, source.OfType<long>().FirstOrDefault());
+
+            Assert.Equal(5, source.OfType<int>().Last());
+            Assert.Equal(0, source.OfType<long>().LastOrDefault());
+
+            Assert.Equal(4, source.OfType<int>().ElementAt(3));
+            Assert.Equal(0, source.OfType<long>().ElementAtOrDefault(6));
+        }
+
+        [Fact]
+        public void OfTypeSelect()
+        {
+            IEnumerable<object> objects = new object[] { "1", null, "22", null, 3, 4, "55555" };
+            Assert.Equal(new int[] { 1, 2, 5 }, objects.OfType<string>().Select(s => s.Length));
+
+            Assert.Equal(new int[] { 1, 2, 3, 4, 5 }, new int[] { 1, 2, 3, 4, 5 }.OfType<object>().Select(o => (int)o));
+        }
+
+        [Fact]
+        public void MultipleIterations()
+        {
+            var orig = new object[] { null, null, null, null, null };
+            IEnumerable<object> objects = orig.OfType<object>();
+
+            for (int i = 0; i < orig.Length; i++)
+            {
+                orig[i] = i.ToString();
+
+                int count = 0;
+                foreach (object o in objects) count++;
+                Assert.Equal(i + 1, count);
+            }
         }
     }
 }
diff --git a/src/libraries/System.Linq/tests/OrderTests.cs b/src/libraries/System.Linq/tests/OrderTests.cs
index ed2dd9bfc876..dee76efe7382 100644
--- a/src/libraries/System.Linq/tests/OrderTests.cs
+++ b/src/libraries/System.Linq/tests/OrderTests.cs
@@ -196,6 +196,9 @@ public void FirstOnOrdered()
         {
             Assert.Equal(0, Enumerable.Range(0, 10).Shuffle().Order().First());
             Assert.Equal(9, Enumerable.Range(0, 10).Shuffle().OrderDescending().First());
+
+            Assert.Equal(0, ForceNotCollection(Enumerable.Range(0, 10).Shuffle()).Order().First());
+            Assert.Equal(9, ForceNotCollection(Enumerable.Range(0, 10).Shuffle()).OrderDescending().First());
         }
 
         [Fact]
@@ -281,6 +284,9 @@ public void LastOnOrdered()
         {
             Assert.Equal(9, Enumerable.Range(0, 10).Shuffle().Order().Last());
             Assert.Equal(0, Enumerable.Range(0, 10).Shuffle().OrderDescending().Last());
+
+            Assert.Equal(9, ForceNotCollection(Enumerable.Range(0, 10).Shuffle()).Order().Last());
+            Assert.Equal(0, ForceNotCollection(Enumerable.Range(0, 10).Shuffle()).OrderDescending().Last());
         }
 
         [Fact]
@@ -307,6 +313,16 @@ public void LastOrDefaultOnOrdered()
             Assert.Equal(0, Enumerable.Empty<int>().Order().LastOrDefault());
         }
 
+        [Fact]
+        public void ElementAtOnOrdered()
+        {
+            Assert.Equal(4, Enumerable.Range(0, 10).Shuffle().Order().ElementAt(4));
+            Assert.Equal(5, Enumerable.Range(0, 10).Shuffle().OrderDescending().ElementAt(4));
+
+            Assert.Equal(4, ForceNotCollection(Enumerable.Range(0, 10).Shuffle()).Order().ElementAt(4));
+            Assert.Equal(5, ForceNotCollection(Enumerable.Range(0, 10).Shuffle()).OrderDescending().ElementAt(4));
+        }
+
         [Fact]
         public void EnumeratorDoesntContinue()
         {
diff --git a/src/libraries/System.Linq/tests/OrderedSubsetting.cs b/src/libraries/System.Linq/tests/OrderedSubsetting.cs
index 6c3d5f57cd69..c39a61a86301 100644
--- a/src/libraries/System.Linq/tests/OrderedSubsetting.cs
+++ b/src/libraries/System.Linq/tests/OrderedSubsetting.cs
@@ -416,7 +416,7 @@ public void SelectForcedToEnumeratorDoesntEnumerate()
             var iterator = Enumerable.Range(-1, 8).Shuffle().OrderBy(i => i).Skip(1).Take(5).Select(i => i * 2);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
diff --git a/src/libraries/System.Linq/tests/RangeTests.cs b/src/libraries/System.Linq/tests/RangeTests.cs
index 8421a66ba890..2e331cfee7ec 100644
--- a/src/libraries/System.Linq/tests/RangeTests.cs
+++ b/src/libraries/System.Linq/tests/RangeTests.cs
@@ -243,6 +243,7 @@ static void Validate(IEnumerable<int> e, int[] expected)
                 Assert.Throws<NotSupportedException>(() => list.Insert(0, 42));
                 Assert.Throws<NotSupportedException>(() => list.Clear());
                 Assert.Throws<NotSupportedException>(() => list.Remove(42));
+                Assert.Throws<NotSupportedException>(() => list.RemoveAt(0));
                 Assert.Throws<NotSupportedException>(() => list[0] = 42);
                 AssertExtensions.Throws<ArgumentOutOfRangeException>("index", () => list[-1]);
                 AssertExtensions.Throws<ArgumentOutOfRangeException>("index", () => list[expected.Length]);
@@ -255,6 +256,8 @@ static void Validate(IEnumerable<int> e, int[] expected)
 
                 Assert.False(list.Contains(expected[0] - 1));
                 Assert.False(list.Contains(expected[^1] + 1));
+                Assert.Equal(-1, list.IndexOf(expected[0] - 1));
+                Assert.Equal(-1, list.IndexOf(expected[^1] + 1));
                 Assert.All(expected, i => Assert.True(list.Contains(i)));
                 Assert.All(expected, i => Assert.Equal(Array.IndexOf(expected, i), list.IndexOf(i)));
                 for (int i = 0; i < expected.Length; i++)
diff --git a/src/libraries/System.Linq/tests/RepeatTests.cs b/src/libraries/System.Linq/tests/RepeatTests.cs
index 625dff376de3..df8eebda3569 100644
--- a/src/libraries/System.Linq/tests/RepeatTests.cs
+++ b/src/libraries/System.Linq/tests/RepeatTests.cs
@@ -255,6 +255,7 @@ static void Validate(IEnumerable<int> e, int[] expected)
                 Assert.Throws<NotSupportedException>(() => list.Insert(0, 42));
                 Assert.Throws<NotSupportedException>(() => list.Clear());
                 Assert.Throws<NotSupportedException>(() => list.Remove(42));
+                Assert.Throws<NotSupportedException>(() => list.RemoveAt(0));
                 Assert.Throws<NotSupportedException>(() => list[0] = 42);
                 AssertExtensions.Throws<ArgumentOutOfRangeException>("index", () => list[-1]);
                 AssertExtensions.Throws<ArgumentOutOfRangeException>("index", () => list[expected.Length]);
@@ -267,6 +268,8 @@ static void Validate(IEnumerable<int> e, int[] expected)
 
                 Assert.False(list.Contains(expected[0] - 1));
                 Assert.False(list.Contains(expected[^1] + 1));
+                Assert.Equal(-1, list.IndexOf(expected[0] - 1));
+                Assert.Equal(-1, list.IndexOf(expected[^1] + 1));
                 Assert.All(expected, i => Assert.True(list.Contains(i)));
                 Assert.All(expected, i => Assert.Equal(Array.IndexOf(expected, i), list.IndexOf(i)));
                 for (int i = 0; i < expected.Length; i++)
diff --git a/src/libraries/System.Linq/tests/ReverseTests.cs b/src/libraries/System.Linq/tests/ReverseTests.cs
index 6ecb3874dad5..1cd337a97503 100644
--- a/src/libraries/System.Linq/tests/ReverseTests.cs
+++ b/src/libraries/System.Linq/tests/ReverseTests.cs
@@ -82,7 +82,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Reverse();
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
     }
 }
diff --git a/src/libraries/System.Linq/tests/SelectManyTests.cs b/src/libraries/System.Linq/tests/SelectManyTests.cs
index 246826ac9ed5..9a1446c1eb4e 100644
--- a/src/libraries/System.Linq/tests/SelectManyTests.cs
+++ b/src/libraries/System.Linq/tests/SelectManyTests.cs
@@ -345,7 +345,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).SelectMany(i => new int[0]);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -353,7 +353,7 @@ public void ForcedToEnumeratorDoesntEnumerateIndexed()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).SelectMany((e, i) => new int[0]);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -361,7 +361,7 @@ public void ForcedToEnumeratorDoesntEnumerateResultSel()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).SelectMany(i => new int[0], (e, i) => e);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -369,7 +369,7 @@ public void ForcedToEnumeratorDoesntEnumerateIndexedResultSel()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).SelectMany((e, i) => new int[0], (e, i) => e);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Theory]
diff --git a/src/libraries/System.Linq/tests/SelectTests.cs b/src/libraries/System.Linq/tests/SelectTests.cs
index 6cf77214a5b3..e1c642ae3162 100644
--- a/src/libraries/System.Linq/tests/SelectTests.cs
+++ b/src/libraries/System.Linq/tests/SelectTests.cs
@@ -747,7 +747,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Select(i => i);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -756,7 +756,7 @@ public void ForcedToEnumeratorDoesntEnumerateIndexed()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Select((e, i) => i);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -764,7 +764,7 @@ public void ForcedToEnumeratorDoesntEnumerateArray()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).ToArray().Select(i => i);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -772,7 +772,7 @@ public void ForcedToEnumeratorDoesntEnumerateList()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).ToList().Select(i => i);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -780,7 +780,7 @@ public void ForcedToEnumeratorDoesntEnumerateIList()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).ToList().AsReadOnly().Select(i => i);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -788,7 +788,7 @@ public void ForcedToEnumeratorDoesntEnumerateIPartition()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).ToList().AsReadOnly().Select(i => i).Skip(1);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
diff --git a/src/libraries/System.Linq/tests/SequenceEqualTests.cs b/src/libraries/System.Linq/tests/SequenceEqualTests.cs
index 380916550efd..7393d18947aa 100644
--- a/src/libraries/System.Linq/tests/SequenceEqualTests.cs
+++ b/src/libraries/System.Linq/tests/SequenceEqualTests.cs
@@ -1,8 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System;
-using System.Collections.Generic;
+using System.Collections.ObjectModel;
 using Xunit;
 
 namespace System.Linq.Tests
@@ -246,5 +245,31 @@ public void ByteArrays_SpecialCasedButExpectedBehavior()
                 }
             }
         }
+
+        [Fact]
+        public void ICollectionsCompareCorrectly()
+        {
+            Assert.True(new TestCollection<int>([]).SequenceEqual(new TestCollection<int>([])));
+            Assert.True(new TestCollection<int>([1]).SequenceEqual(new TestCollection<int>([1])));
+            Assert.True(new TestCollection<int>([1, 2, 3]).SequenceEqual(new TestCollection<int>([1, 2, 3])));
+
+            Assert.False(new TestCollection<int>([1, 2, 3, 4]).SequenceEqual(new TestCollection<int>([1, 2, 3])));
+            Assert.False(new TestCollection<int>([1, 2, 3]).SequenceEqual(new TestCollection<int>([1, 2, 3, 4])));
+            Assert.False(new TestCollection<int>([1, 2, 3]).SequenceEqual(new TestCollection<int>([1, 2, 4])));
+            Assert.False(new TestCollection<int>([-1, 2, 3]).SequenceEqual(new TestCollection<int>([-2, 2, 3])));
+        }
+
+        [Fact]
+        public void IListsCompareCorrectly()
+        {
+            Assert.True(new ReadOnlyCollection<int>([]).SequenceEqual(new ReadOnlyCollection<int>([])));
+            Assert.True(new ReadOnlyCollection<int>([1]).SequenceEqual(new ReadOnlyCollection<int>([1])));
+            Assert.True(new ReadOnlyCollection<int>([1, 2, 3]).SequenceEqual(new ReadOnlyCollection<int>([1, 2, 3])));
+
+            Assert.False(new ReadOnlyCollection<int>([1, 2, 3, 4]).SequenceEqual(new ReadOnlyCollection<int>([1, 2, 3])));
+            Assert.False(new ReadOnlyCollection<int>([1, 2, 3]).SequenceEqual(new ReadOnlyCollection<int>([1, 2, 3, 4])));
+            Assert.False(new ReadOnlyCollection<int>([1, 2, 3]).SequenceEqual(new ReadOnlyCollection<int>([1, 2, 4])));
+            Assert.False(new ReadOnlyCollection<int>([-1, 2, 3]).SequenceEqual(new ReadOnlyCollection<int>([-2, 2, 3])));
+        }
     }
 }
diff --git a/src/libraries/System.Linq/tests/SkipLastTests.cs b/src/libraries/System.Linq/tests/SkipLastTests.cs
index fe9652a875e9..c4770410870d 100644
--- a/src/libraries/System.Linq/tests/SkipLastTests.cs
+++ b/src/libraries/System.Linq/tests/SkipLastTests.cs
@@ -9,6 +9,12 @@ namespace System.Linq.Tests
 {
     public class SkipLastTests : EnumerableTests
     {
+        [Fact]
+        public void SkipLastThrowsOnNull()
+        {
+            AssertExtensions.Throws<ArgumentNullException>("source", () => ((IEnumerable<int>)null).SkipLast(10));
+        }
+
         [Theory]
         [MemberData(nameof(EnumerableData), MemberType = typeof(SkipTakeData))]
         public void SkipLast(IEnumerable<int> source, int count)
diff --git a/src/libraries/System.Linq/tests/SkipTests.cs b/src/libraries/System.Linq/tests/SkipTests.cs
index b2dd1cf62f44..cf4d16b13099 100644
--- a/src/libraries/System.Linq/tests/SkipTests.cs
+++ b/src/libraries/System.Linq/tests/SkipTests.cs
@@ -210,7 +210,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Skip(2);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -219,7 +219,7 @@ public void ForcedToEnumeratorDoesntEnumerateIList()
             var iterator = (new[] { 0, 1, 2 }).Skip(2);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -497,7 +497,7 @@ public void IteratorStateShouldNotChangeIfNumberOfElementsIsUnbounded()
 
             // On platforms that do not have this change, the optimization may not be present
             // and the iterator may not have a field named _state. In that case, nop.
-            if (state != null)
+            if (state is not null)
             {
                 state.SetValue(iterator, int.MaxValue);
 
diff --git a/src/libraries/System.Linq/tests/SkipWhileTests.cs b/src/libraries/System.Linq/tests/SkipWhileTests.cs
index 26281efc5f86..a13a686cef3f 100644
--- a/src/libraries/System.Linq/tests/SkipWhileTests.cs
+++ b/src/libraries/System.Linq/tests/SkipWhileTests.cs
@@ -1,15 +1,20 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System.Collections;
 using System.Collections.Generic;
 using Xunit;
-using Xunit.Abstractions;
 
 namespace System.Linq.Tests
 {
     public class SkipWhileTests : EnumerableTests
     {
+        [Fact]
+        public void Empty()
+        {
+            Assert.Equal(Enumerable.Empty<int>(), Enumerable.Empty<int>().SkipWhile(i => i < 40));
+            Assert.Equal(Enumerable.Empty<int>(), Enumerable.Empty<int>().SkipWhile((i, index) => i < 40));
+        }
+
         [Fact]
         public void SkipWhileAllTrue()
         {
@@ -161,7 +166,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).SkipWhile(e => true);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -170,7 +175,7 @@ public void ForcedToEnumeratorDoesntEnumerateIndexed()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).SkipWhile((e, i) => true);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
     }
 }
diff --git a/src/libraries/System.Linq/tests/TakeLastTests.cs b/src/libraries/System.Linq/tests/TakeLastTests.cs
index 31b58d5bf017..b39d59e94263 100644
--- a/src/libraries/System.Linq/tests/TakeLastTests.cs
+++ b/src/libraries/System.Linq/tests/TakeLastTests.cs
@@ -9,6 +9,12 @@ namespace System.Linq.Tests
 {
     public class TakeLastTests : EnumerableTests
     {
+        [Fact]
+        public void SkipLastThrowsOnNull()
+        {
+            AssertExtensions.Throws<ArgumentNullException>("source", () => ((IEnumerable<int>)null).TakeLast(10));
+        }
+
         [Theory]
         [MemberData(nameof(EnumerableData), MemberType = typeof(SkipTakeData))]
         public void TakeLast(IEnumerable<int> source, int count)
diff --git a/src/libraries/System.Linq/tests/TakeTests.cs b/src/libraries/System.Linq/tests/TakeTests.cs
index 93a0405bfaf3..d9408a157124 100644
--- a/src/libraries/System.Linq/tests/TakeTests.cs
+++ b/src/libraries/System.Linq/tests/TakeTests.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
+using System.Collections.ObjectModel;
 using Xunit;
 
 namespace System.Linq.Tests
@@ -270,23 +271,23 @@ public void ForcedToEnumeratorDoesNotEnumerate()
             var iterator1 = NumberRangeGuaranteedNotCollectionType(0, 3).Take(2);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en1 = iterator1 as IEnumerator<int>;
-            Assert.False(en1 != null && en1.MoveNext());
+            Assert.False(en1 is not null && en1.MoveNext());
 
             var iterator2 = NumberRangeGuaranteedNotCollectionType(0, 3).Take(0..2);
             var en2 = iterator2 as IEnumerator<int>;
-            Assert.False(en2 != null && en2.MoveNext());
+            Assert.False(en2 is not null && en2.MoveNext());
 
             var iterator3 = NumberRangeGuaranteedNotCollectionType(0, 3).Take(^3..2);
             var en3 = iterator3 as IEnumerator<int>;
-            Assert.False(en3 != null && en3.MoveNext());
+            Assert.False(en3 is not null && en3.MoveNext());
 
             var iterator4 = NumberRangeGuaranteedNotCollectionType(0, 3).Take(0..^1);
             var en4 = iterator4 as IEnumerator<int>;
-            Assert.False(en4 != null && en4.MoveNext());
+            Assert.False(en4 is not null && en4.MoveNext());
 
             var iterator5 = NumberRangeGuaranteedNotCollectionType(0, 3).Take(^3..^1);
             var en5 = iterator5 as IEnumerator<int>;
-            Assert.False(en5 != null && en5.MoveNext());
+            Assert.False(en5 is not null && en5.MoveNext());
         }
 
         [Fact]
@@ -319,23 +320,23 @@ public void ForcedToEnumeratorDoesntEnumerateIList()
             var iterator1 = NumberRangeGuaranteedNotCollectionType(0, 3).ToList().Take(2);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en1 = iterator1 as IEnumerator<int>;
-            Assert.False(en1 != null && en1.MoveNext());
+            Assert.False(en1 is not null && en1.MoveNext());
 
             var iterator2 = NumberRangeGuaranteedNotCollectionType(0, 3).ToList().Take(0..2);
             var en2 = iterator2 as IEnumerator<int>;
-            Assert.False(en2 != null && en2.MoveNext());
+            Assert.False(en2 is not null && en2.MoveNext());
 
             var iterator3 = NumberRangeGuaranteedNotCollectionType(0, 3).ToList().Take(^3..2);
             var en3 = iterator3 as IEnumerator<int>;
-            Assert.False(en3 != null && en3.MoveNext());
+            Assert.False(en3 is not null && en3.MoveNext());
 
             var iterator4 = NumberRangeGuaranteedNotCollectionType(0, 3).ToList().Take(0..^1);
             var en4 = iterator4 as IEnumerator<int>;
-            Assert.False(en4 != null && en4.MoveNext());
+            Assert.False(en4 is not null && en4.MoveNext());
 
             var iterator5 = NumberRangeGuaranteedNotCollectionType(0, 3).ToList().Take(^3..^1);
             var en5 = iterator5 as IEnumerator<int>;
-            Assert.False(en5 != null && en5.MoveNext());
+            Assert.False(en5 is not null && en5.MoveNext());
         }
 
         [Fact]
@@ -2031,5 +2032,37 @@ public void EmptySource_DoNotThrowException_EnumerablePartition()
             Assert.Empty(EnumerablePartitionOrEmpty(source).Take(3..^8));
             Assert.Empty(EnumerablePartitionOrEmpty(source).Take(^6..^7));
         }
+
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized))]
+        public void SkipTakeOnIListIsIList()
+        {
+            IList<int> list = new ReadOnlyCollection<int>(Enumerable.Range(0, 100).ToList());
+            IList<int> skipTake = Assert.IsAssignableFrom<IList<int>>(list.Skip(10).Take(20));
+
+            Assert.True(skipTake.IsReadOnly);
+            Assert.Equal(20, skipTake.Count);
+            int[] results = new int[20];
+            skipTake.CopyTo(results, 0);
+            for (int i = 0; i < 20; i++)
+            {
+                Assert.Equal(i + 10, skipTake[i]);
+                Assert.Equal(i + 10, results[i]);
+                Assert.True(skipTake.Contains(i + 10));
+                Assert.True(skipTake.IndexOf(i + 10) == i);
+            }
+
+            Assert.False(skipTake.Contains(9));
+            Assert.False(skipTake.Contains(30));
+
+            Assert.Throws<ArgumentOutOfRangeException>(() => skipTake[-1]);
+            Assert.Throws<ArgumentOutOfRangeException>(() => skipTake[20]);
+
+            Assert.Throws<NotSupportedException>(() => skipTake.Add(42));
+            Assert.Throws<NotSupportedException>(() => skipTake.Clear());
+            Assert.Throws<NotSupportedException>(() => skipTake.Insert(0, 42));
+            Assert.Throws<NotSupportedException>(() => skipTake.Remove(42));
+            Assert.Throws<NotSupportedException>(() => skipTake.RemoveAt(0));
+            Assert.Throws<NotSupportedException>(() => skipTake[0] = 42);
+        }
     }
 }
diff --git a/src/libraries/System.Linq/tests/TakeWhileTests.cs b/src/libraries/System.Linq/tests/TakeWhileTests.cs
index 55f02459978a..357b31a34e25 100644
--- a/src/libraries/System.Linq/tests/TakeWhileTests.cs
+++ b/src/libraries/System.Linq/tests/TakeWhileTests.cs
@@ -1,7 +1,6 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System.Collections;
 using System.Collections.Generic;
 using Xunit;
 
@@ -9,6 +8,13 @@ namespace System.Linq.Tests
 {
     public class TakeWhileTests : EnumerableTests
     {
+        [Fact]
+        public void Empty()
+        {
+            Assert.Equal(Enumerable.Empty<int>(), Enumerable.Empty<int>().TakeWhile(i => i < 40));
+            Assert.Equal(Enumerable.Empty<int>(), Enumerable.Empty<int>().TakeWhile((i, index) => i < 40));
+        }
+
         [Fact]
         public void SameResultsRepeatCallsIntQuery()
         {
@@ -168,7 +174,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).TakeWhile(e => true);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -177,7 +183,7 @@ public void ForcedToEnumeratorDoesntEnumerateIndexed()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).TakeWhile((e, i) => true);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
     }
 }
diff --git a/src/libraries/System.Linq/tests/TestExtensions.cs b/src/libraries/System.Linq/tests/TestExtensions.cs
index 4c0cfe6626a7..b27dd3b0e49e 100644
--- a/src/libraries/System.Linq/tests/TestExtensions.cs
+++ b/src/libraries/System.Linq/tests/TestExtensions.cs
@@ -10,10 +10,10 @@ namespace System.Linq.Tests
     public static class TestExtensions
     {
         public static IEnumerable<T> RunOnce<T>(this IEnumerable<T> source) =>
-            source == null ? null : (source as IList<T>)?.RunOnce() ?? new RunOnceEnumerable<T>(source);
+            source is null ? null : (source as IList<T>)?.RunOnce() ?? new RunOnceEnumerable<T>(source);
 
         public static IEnumerable<T> RunOnce<T>(this IList<T> source)
-            => source == null ? null : new RunOnceList<T>(source);
+            => source is null ? null : new RunOnceList<T>(source);
 
         private class RunOnceEnumerable<T> : IEnumerable<T>
         {
diff --git a/src/libraries/System.Linq/tests/ToArrayTests.cs b/src/libraries/System.Linq/tests/ToArrayTests.cs
index bc1c47ffcf91..23f51f503af2 100644
--- a/src/libraries/System.Linq/tests/ToArrayTests.cs
+++ b/src/libraries/System.Linq/tests/ToArrayTests.cs
@@ -152,8 +152,8 @@ public void ToArray_ArrayWhereSelect(int[] sourceIntegers, string[] convertedStr
             Assert.Equal(convertedStrings, sourceIntegers.Where(i => true).Select(i => i.ToString()).ToArray());
             Assert.Equal(Array.Empty<string>(), sourceIntegers.Where(i => false).Select(i => i.ToString()).ToArray());
 
-            Assert.Equal(convertedStrings, sourceIntegers.Select(i => i.ToString()).Where(s => s != null).ToArray());
-            Assert.Equal(Array.Empty<string>(), sourceIntegers.Select(i => i.ToString()).Where(s => s == null).ToArray());
+            Assert.Equal(convertedStrings, sourceIntegers.Select(i => i.ToString()).Where(s => s is not null).ToArray());
+            Assert.Equal(Array.Empty<string>(), sourceIntegers.Select(i => i.ToString()).Where(s => s is null).ToArray());
         }
 
         [Theory]
@@ -172,8 +172,8 @@ public void ToArray_ListWhereSelect(int[] sourceIntegers, string[] convertedStri
             Assert.Equal(convertedStrings, sourceList.Where(i => true).Select(i => i.ToString()).ToArray());
             Assert.Equal(Array.Empty<string>(), sourceList.Where(i => false).Select(i => i.ToString()).ToArray());
 
-            Assert.Equal(convertedStrings, sourceList.Select(i => i.ToString()).Where(s => s != null).ToArray());
-            Assert.Equal(Array.Empty<string>(), sourceList.Select(i => i.ToString()).Where(s => s == null).ToArray());
+            Assert.Equal(convertedStrings, sourceList.Select(i => i.ToString()).Where(s => s is not null).ToArray());
+            Assert.Equal(Array.Empty<string>(), sourceList.Select(i => i.ToString()).Where(s => s is null).ToArray());
         }
 
         [Fact]
diff --git a/src/libraries/System.Linq/tests/ToListTests.cs b/src/libraries/System.Linq/tests/ToListTests.cs
index 9d6b49ad128f..ec8d9b166508 100644
--- a/src/libraries/System.Linq/tests/ToListTests.cs
+++ b/src/libraries/System.Linq/tests/ToListTests.cs
@@ -121,8 +121,8 @@ public void ToList_ArrayWhereSelect(int[] sourceIntegers, string[] convertedStri
             Assert.Equal(convertedList, sourceIntegers.Where(i => true).Select(i => i.ToString()).ToList());
             Assert.Equal(emptyStringsList, sourceIntegers.Where(i => false).Select(i => i.ToString()).ToList());
 
-            Assert.Equal(convertedList, sourceIntegers.Select(i => i.ToString()).Where(s => s != null).ToList());
-            Assert.Equal(emptyStringsList, sourceIntegers.Select(i => i.ToString()).Where(s => s == null).ToList());
+            Assert.Equal(convertedList, sourceIntegers.Select(i => i.ToString()).Where(s => s is not null).ToList());
+            Assert.Equal(emptyStringsList, sourceIntegers.Select(i => i.ToString()).Where(s => s is null).ToList());
         }
 
         [Theory]
@@ -145,8 +145,8 @@ public void ToList_ListWhereSelect(int[] sourceIntegers, string[] convertedStrin
             Assert.Equal(convertedList, sourceList.Where(i => true).Select(i => i.ToString()).ToList());
             Assert.Equal(emptyStringsList, sourceList.Where(i => false).Select(i => i.ToString()).ToList());
 
-            Assert.Equal(convertedList, sourceList.Select(i => i.ToString()).Where(s => s != null).ToList());
-            Assert.Equal(emptyStringsList, sourceList.Select(i => i.ToString()).Where(s => s == null).ToList());
+            Assert.Equal(convertedList, sourceList.Select(i => i.ToString()).Where(s => s is not null).ToList());
+            Assert.Equal(emptyStringsList, sourceList.Select(i => i.ToString()).Where(s => s is null).ToList());
         }
 
         [Theory]
@@ -166,8 +166,8 @@ public void ToList_IListWhereSelect(int[] sourceIntegers, string[] convertedStri
             Assert.Equal(convertedList, sourceList.Where(i => true).Select(i => i.ToString()).ToList());
             Assert.Equal(ReadOnlyCollection<string>.Empty, sourceList.Where(i => false).Select(i => i.ToString()).ToList());
 
-            Assert.Equal(convertedList, sourceList.Select(i => i.ToString()).Where(s => s != null).ToList());
-            Assert.Equal(ReadOnlyCollection<string>.Empty, sourceList.Select(i => i.ToString()).Where(s => s == null).ToList());
+            Assert.Equal(convertedList, sourceList.Select(i => i.ToString()).Where(s => s is not null).ToList());
+            Assert.Equal(ReadOnlyCollection<string>.Empty, sourceList.Select(i => i.ToString()).Where(s => s is null).ToList());
         }
 
         [Fact]
diff --git a/src/libraries/System.Linq/tests/ToLookupTests.cs b/src/libraries/System.Linq/tests/ToLookupTests.cs
index 458aaa9e4dde..5361d6dc6ae5 100644
--- a/src/libraries/System.Linq/tests/ToLookupTests.cs
+++ b/src/libraries/System.Linq/tests/ToLookupTests.cs
@@ -1,10 +1,8 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System;
+using System.Collections;
 using System.Collections.Generic;
-using System.Diagnostics;
-using System.Reflection;
 using Xunit;
 
 namespace System.Linq.Tests
@@ -53,6 +51,14 @@ from x4 in q2
             Assert.Equal(q.ToLookup(e => e.a1), q.ToLookup(e => e.a1));
         }
 
+        [Fact]
+        public void Empty()
+        {
+            AssertMatches(Enumerable.Empty<int>(), Enumerable.Empty<int>(), Enumerable.Empty<int>().ToLookup(i => i));
+            Assert.False(Enumerable.Empty<int>().ToLookup(i => i).Contains(0));
+            Assert.Empty(Enumerable.Empty<int>().ToLookup(i => i)[0]);
+        }
+
         [Fact]
         public void NullKeyIncluded()
         {
@@ -289,6 +295,59 @@ public void ApplyResultSelectorForGroup(int enumType)
             Assert.Equal(expected, result);
         }
 
+        [Fact]
+        public void ApplyResultSelector()
+        {
+            Lookup<int, int> lookup = (Lookup<int, int>)new int[] { 1, 2, 2, 3, 3, 3 }.ToLookup(i => i);
+            IEnumerable<int> sums = lookup.ApplyResultSelector((key, elements) =>
+            {
+                Assert.Equal(key, elements.Count());
+                return elements.Sum();
+            });
+            Assert.Equal([1, 4, 9], sums);
+        }
+
+        [Theory]
+        [InlineData(0)]
+        [InlineData(1)]
+        [InlineData(10)]
+        public void LookupImplementsICollection(int count)
+        {
+            Assert.IsAssignableFrom<ICollection<IGrouping<string, int>>>(Enumerable.Range(0, count).ToLookup(i => i.ToString()));
+            Assert.IsAssignableFrom<ICollection<IGrouping<string, int>>>(Enumerable.Range(0, count).ToLookup(i => i.ToString(), StringComparer.OrdinalIgnoreCase));
+            Assert.IsAssignableFrom<ICollection<IGrouping<string, int>>>(Enumerable.Range(0, count).ToLookup(i => i.ToString(), i => i));
+            Assert.IsAssignableFrom<ICollection<IGrouping<string, int>>>(Enumerable.Range(0, count).ToLookup(i => i.ToString(), i => i, StringComparer.OrdinalIgnoreCase));
+
+            var collection = (ICollection<IGrouping<string, int>>)Enumerable.Range(0, count).ToLookup(i => i.ToString());
+            Assert.Equal(count, collection.Count);
+            Assert.True(collection.IsReadOnly);
+            Assert.Throws<NotSupportedException>(() => collection.Add(null));
+            Assert.Throws<NotSupportedException>(() => collection.Remove(null));
+            Assert.Throws<NotSupportedException>(() => collection.Clear());
+
+            if (count > 0)
+            {
+                IGrouping<string, int> first = collection.First();
+                IGrouping<string, int> last = collection.Last();
+                Assert.True(collection.Contains(first));
+                Assert.True(collection.Contains(last));
+            }
+            Assert.False(collection.Contains(new NopGrouping()));
+
+            IGrouping<string, int>[] items = new IGrouping<string, int>[count];
+            collection.CopyTo(items, 0);
+            Assert.Equal(collection.Select(i => i), items);
+            Assert.Equal(items, Enumerable.Range(0, count).ToLookup(i => i.ToString()).ToArray());
+            Assert.Equal(items, Enumerable.Range(0, count).ToLookup(i => i.ToString()).ToList());
+        }
+
+        private sealed class NopGrouping : IGrouping<string, int>
+        {
+            public string Key => "";
+            public IEnumerator<int> GetEnumerator() => ((IList<int>)Array.Empty<int>()).GetEnumerator();
+            IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+        }
+
         public class Membership
         {
             public int Id { get; set; }
@@ -301,7 +360,7 @@ public class Role : IEquatable<Role>
         {
             public int Id { get; set; }
 
-            public bool Equals(Role other) => other != null && Id == other.Id;
+            public bool Equals(Role other) => other is not null && Id == other.Id;
 
             public override bool Equals(object obj) => Equals(obj as Role);
 
@@ -315,7 +374,7 @@ public class RoleMetadata : IEquatable<RoleMetadata>
             public int CountrB { get; set; }
 
             public bool Equals(RoleMetadata other)
-                => other != null && Role.Equals(other.Role) && CountA == other.CountA && CountrB == other.CountrB;
+                => other is not null && Role.Equals(other.Role) && CountA == other.CountA && CountrB == other.CountrB;
 
             public override bool Equals(object obj) => Equals(obj as RoleMetadata);
 
diff --git a/src/libraries/System.Linq/tests/UnionTests.cs b/src/libraries/System.Linq/tests/UnionTests.cs
index 9dbfd822b163..f1a45effa45a 100644
--- a/src/libraries/System.Linq/tests/UnionTests.cs
+++ b/src/libraries/System.Linq/tests/UnionTests.cs
@@ -302,7 +302,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Union(Enumerable.Range(0, 3));
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -311,7 +311,7 @@ public void ForcedToEnumeratorDoesntEnumerateMultipleUnions()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Union(Enumerable.Range(0, 3)).Union(Enumerable.Range(2, 4)).Union(new[] { 9, 2, 4 });
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
diff --git a/src/libraries/System.Linq/tests/WhereTests.cs b/src/libraries/System.Linq/tests/WhereTests.cs
index a6bc625e9e49..0cf0ff50cd38 100644
--- a/src/libraries/System.Linq/tests/WhereTests.cs
+++ b/src/libraries/System.Linq/tests/WhereTests.cs
@@ -1016,7 +1016,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Where(i => true);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -1024,7 +1024,7 @@ public void ForcedToEnumeratorDoesntEnumerateArray()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).ToArray().Where(i => true);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -1032,7 +1032,7 @@ public void ForcedToEnumeratorDoesntEnumerateList()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).ToList().Where(i => true);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -1040,7 +1040,7 @@ public void ForcedToEnumeratorDoesntEnumerateIndexed()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Where((e, i) => true);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -1048,7 +1048,7 @@ public void ForcedToEnumeratorDoesntEnumerateWhereSelect()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Where(i => true).Select(i => i);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -1056,7 +1056,7 @@ public void ForcedToEnumeratorDoesntEnumerateWhereSelectArray()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).ToArray().Where(i => true).Select(i => i);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
@@ -1064,7 +1064,7 @@ public void ForcedToEnumeratorDoesntEnumerateWhereSelectList()
         {
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).ToList().Where(i => true).Select(i => i);
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Theory]
@@ -1094,6 +1094,50 @@ public void ToCollection(IEnumerable<int> source)
             }
         }
 
+        [Fact]
+        public void WhereFirstLast()
+        {
+            Assert.All(IdentityTransforms<int>(), transform =>
+            {
+                IEnumerable<int> data = transform(Enumerable.Range(0, 10));
+
+                Assert.Equal(3, data.Where(i => i == 3).First());
+                Assert.Equal(0, data.Where(i => i % 2 == 0).First());
+
+                Assert.Equal(3, data.Where(i => i == 3).Last());
+                Assert.Equal(8, data.Where(i => i % 2 == 0).Last());
+
+                Assert.Equal(3, data.Where(i => i == 3).ElementAt(0));
+                Assert.Equal(8, data.Where(i => i % 2 == 0).ElementAt(4));
+
+                Assert.Throws<InvalidOperationException>(() => data.Where(i => i == 10).First());
+                Assert.Throws<InvalidOperationException>(() => data.Where(i => i == 10).Last());
+                Assert.Throws<ArgumentOutOfRangeException>(() => data.Where(i => i == 10).ElementAt(0));
+            });
+        }
+
+        [Fact]
+        public void WhereSelectFirstLast()
+        {
+            Assert.All(IdentityTransforms<int>(), transform =>
+            {
+                IEnumerable<int> data = transform(Enumerable.Range(0, 10));
+
+                Assert.Equal(6, data.Where(i => i == 3).Select(i => i * 2).First());
+                Assert.Equal(0, data.Where(i => i % 2 == 0).Select(i => i * 2).First());
+
+                Assert.Equal(6, data.Where(i => i == 3).Select(i => i * 2).Last());
+                Assert.Equal(16, data.Where(i => i % 2 == 0).Select(i => i * 2).Last());
+
+                Assert.Equal(6, data.Where(i => i == 3).Select(i => i * 2).ElementAt(0));
+                Assert.Equal(16, data.Where(i => i % 2 == 0).Select(i => i * 2).ElementAt(4));
+
+                Assert.Throws<InvalidOperationException>(() => data.Where(i => i == 10).Select(i => i * 2).First());
+                Assert.Throws<InvalidOperationException>(() => data.Where(i => i == 10).Select(i => i * 2).Last());
+                Assert.Throws<ArgumentOutOfRangeException>(() => data.Where(i => i == 10).Select(i => i * 2).ElementAt(0));
+            });
+        }
+
         public static IEnumerable<object[]> ToCollectionData()
         {
             IEnumerable<int> seq = GenerateRandomSequnce(seed: 0xdeadbeef, count: 10);
diff --git a/src/libraries/System.Linq/tests/ZipTests.cs b/src/libraries/System.Linq/tests/ZipTests.cs
index cfaa8bade657..f15461931ad9 100644
--- a/src/libraries/System.Linq/tests/ZipTests.cs
+++ b/src/libraries/System.Linq/tests/ZipTests.cs
@@ -374,7 +374,7 @@ public void ForcedToEnumeratorDoesntEnumerate()
             var iterator = NumberRangeGuaranteedNotCollectionType(0, 3).Zip(Enumerable.Range(0, 3), (x, y) => x + y);
             // Don't insist on this behaviour, but check it's correct if it happens
             var en = iterator as IEnumerator<int>;
-            Assert.False(en != null && en.MoveNext());
+            Assert.False(en is not null && en.MoveNext());
         }
 
         [Fact]
diff --git a/src/libraries/System.Management/src/System.Management.csproj b/src/libraries/System.Management/src/System.Management.csproj
index 132be8f0f7df..40f16525cd72 100644
--- a/src/libraries/System.Management/src/System.Management.csproj
+++ b/src/libraries/System.Management/src/System.Management.csproj
@@ -5,7 +5,7 @@
     <TargetFrameworks Condition="'$(NetCoreAppPrevious)' != ''">$(TargetFrameworks);$(NetCoreAppPrevious)-windows;$(NetCoreAppPrevious)</TargetFrameworks>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <NoWarn>$(NoWarn);0618</NoWarn>
-    <NoWarn>$(NoWarn);IDE0059;IDE0060;CA1822</NoWarn>
+    <NoWarn>$(NoWarn);IDE0059;IDE0060;CA1822;CA1865</NoWarn>
     <IncludeDllSafeSearchPathAttribute>true</IncludeDllSafeSearchPathAttribute>
     <UseCompilerGeneratedDocXmlFile>false</UseCompilerGeneratedDocXmlFile>
     <IsPackable>true</IsPackable>
diff --git a/src/libraries/System.Management/src/System/Management/ManagementDateTime.cs b/src/libraries/System.Management/src/System/Management/ManagementDateTime.cs
index 34860c2ff7e2..2ba36e619f18 100644
--- a/src/libraries/System.Management/src/System/Management/ManagementDateTime.cs
+++ b/src/libraries/System.Management/src/System/Management/ManagementDateTime.cs
@@ -193,7 +193,7 @@ public static DateTime ToDateTime(string dmtfDate)
                 throw new ArgumentOutOfRangeException(nameof(dmtfDate));
             }
 
-
+            // codeql[cs/leap-year/unsafe-date-construction-from-two-elements] - DateTime not constructed from multiple elements - it's parsed from a string with defaults that are stable DateTime.MinValue.  It would be intentional to throw if an invalid combination occurred.
             var datetime = new DateTime(year, month, day, hour, minute, second, 0, DateTimeKind.Local);
             // Then add the ticks calculated from the microseconds
             datetime = datetime.AddTicks(ticks);
diff --git a/src/libraries/System.Memory/ref/System.Memory.cs b/src/libraries/System.Memory/ref/System.Memory.cs
index 991678fe5834..8aac4764a149 100644
--- a/src/libraries/System.Memory/ref/System.Memory.cs
+++ b/src/libraries/System.Memory/ref/System.Memory.cs
@@ -326,7 +326,7 @@ public static void CopyTo<T>(this T[]? source, System.Span<T> destination) { }
         public static int LastIndexOfAnyExceptInRange<T>(this System.Span<T> span, T lowInclusive, T highInclusive) where T : System.IComparable<T> { throw null; }
         public static int LastIndexOf<T>(this System.ReadOnlySpan<T> span, System.ReadOnlySpan<T> value) where T : System.IEquatable<T>? { throw null; }
         public static int LastIndexOf<T>(this System.ReadOnlySpan<T> span, T value) where T : System.IEquatable<T>? { throw null; }
-        public static int LastIndexOf<T>(this System.Span<T> span, System.ReadOnlySpan<T> value) where T : System.IEquatable<T?> { throw null; }
+        public static int LastIndexOf<T>(this System.Span<T> span, System.ReadOnlySpan<T> value) where T : System.IEquatable<T>? { throw null; }
         public static int LastIndexOf<T>(this System.Span<T> span, T value) where T : System.IEquatable<T>? { throw null; }
         public static int LastIndexOfAnyInRange<T>(this System.ReadOnlySpan<T> span, T lowInclusive, T highInclusive) where T : System.IComparable<T> { throw null; }
         public static int LastIndexOfAnyInRange<T>(this System.Span<T> span, T lowInclusive, T highInclusive) where T : System.IComparable<T> { throw null; }
diff --git a/src/libraries/System.Memory/tests/ArrayBufferWriter/ArrayBufferWriterTests.T.cs b/src/libraries/System.Memory/tests/ArrayBufferWriter/ArrayBufferWriterTests.T.cs
index 0cf99cbab5e4..f609e38dae34 100644
--- a/src/libraries/System.Memory/tests/ArrayBufferWriter/ArrayBufferWriterTests.T.cs
+++ b/src/libraries/System.Memory/tests/ArrayBufferWriter/ArrayBufferWriterTests.T.cs
@@ -424,14 +424,7 @@ public void MultipleCallsToGetSpan()
                 Assert.True(span.Length >= 256);
                 Span<T> newSpan = output.GetSpan();
                 Assert.Equal(span.Length, newSpan.Length);
-
-                unsafe
-                {
-                    void* pSpan = Unsafe.AsPointer(ref MemoryMarshal.GetReference(span));
-                    void* pNewSpan = Unsafe.AsPointer(ref MemoryMarshal.GetReference(newSpan));
-                    Assert.Equal((IntPtr)pSpan, (IntPtr)pNewSpan);
-                }
-
+                Assert.Equal(0, Unsafe.ByteOffset(ref MemoryMarshal.GetReference(span), ref MemoryMarshal.GetReference(newSpan)));
                 Assert.Equal(span.Length, output.GetSpan().Length);
             }
             finally
diff --git a/src/libraries/System.Memory/tests/MemoryMarshal/CreateFromPinnedArray.cs b/src/libraries/System.Memory/tests/MemoryMarshal/CreateFromPinnedArray.cs
index 3610ea0dbfbf..e56a24e5eea2 100644
--- a/src/libraries/System.Memory/tests/MemoryMarshal/CreateFromPinnedArray.cs
+++ b/src/libraries/System.Memory/tests/MemoryMarshal/CreateFromPinnedArray.cs
@@ -190,6 +190,7 @@ public static unsafe void CreateFromPinnedArrayVerifyPinning()
             int[] pinnedArray = { 90, 91, 92, 93, 94, 95, 96, 97, 98 };
             GCHandle pinnedGCHandle = GCHandle.Alloc(pinnedArray, GCHandleType.Pinned);
 
+            // Unsafe.AsPointer is used to ensure we catch if the GC moves the memory
             Memory<int> pinnedMemory = MemoryMarshal.CreateFromPinnedArray(pinnedArray, 0, 2);
             void* pinnedPtr = Unsafe.AsPointer(ref MemoryMarshal.GetReference(pinnedMemory.Span));
             void* memoryHandlePinnedPtr = pinnedMemory.Pin().Pointer;
@@ -197,8 +198,8 @@ public static unsafe void CreateFromPinnedArrayVerifyPinning()
             GC.Collect();
             GC.Collect(2);
 
-            Assert.Equal((int)pinnedPtr, (int)Unsafe.AsPointer(ref MemoryMarshal.GetReference(pinnedMemory.Span)));
-            Assert.Equal((int)memoryHandlePinnedPtr, (int)pinnedGCHandle.AddrOfPinnedObject().ToPointer());
+            Assert.Equal((IntPtr)pinnedPtr, (IntPtr)Unsafe.AsPointer(ref MemoryMarshal.GetReference(pinnedMemory.Span)));
+            Assert.Equal((IntPtr)memoryHandlePinnedPtr, pinnedGCHandle.AddrOfPinnedObject());
 
             pinnedGCHandle.Free();
         }
diff --git a/src/libraries/System.Memory/tests/MemoryMarshal/GetArrayDataReference.cs b/src/libraries/System.Memory/tests/MemoryMarshal/GetArrayDataReference.cs
index 0fc55c64c445..c3478421c358 100644
--- a/src/libraries/System.Memory/tests/MemoryMarshal/GetArrayDataReference.cs
+++ b/src/libraries/System.Memory/tests/MemoryMarshal/GetArrayDataReference.cs
@@ -43,7 +43,7 @@ public static unsafe void GetArrayDataReference_EmptyInput_ReturnsRefToWhereFirs
 
             ref int theRef = ref MemoryMarshal.GetArrayDataReference(theArray);
 
-            Assert.True(Unsafe.AsPointer(ref theRef) != null);
+            Assert.False(Unsafe.IsNullRef(ref theRef));
             Assert.True(Unsafe.AreSame(ref theRef, ref MemoryMarshal.GetReference(theArray.AsSpan())));
 
             ref int theMdArrayRef = ref Unsafe.As<byte, int>(ref MemoryMarshal.GetArrayDataReference((Array)theArray)); // szarray passed to generalized Array helper
diff --git a/src/libraries/System.Memory/tests/MemoryPool/MemoryPool.cs b/src/libraries/System.Memory/tests/MemoryPool/MemoryPool.cs
index e040a363493b..4ee663dc1dd8 100644
--- a/src/libraries/System.Memory/tests/MemoryPool/MemoryPool.cs
+++ b/src/libraries/System.Memory/tests/MemoryPool/MemoryPool.cs
@@ -52,6 +52,7 @@ public static void MemoryPoolSpan()
                 {
                     unsafe
                     {
+                        // Unsafe.AsPointer is safe here since it's pinned
                         void* pSpan = Unsafe.AsPointer(ref MemoryMarshal.GetReference(sp));
                         Assert.Equal((IntPtr)newMemoryHandle.Pointer, (IntPtr)pSpan);
                     }
@@ -77,6 +78,7 @@ public static void MemoryPoolPin(int elementIndex)
                 {
                     unsafe
                     {
+                        // Unsafe.AsPointer is safe here since it's pinned
                         void* pSpan = Unsafe.AsPointer(ref MemoryMarshal.GetReference(sp.Slice(elementIndex)));
                         Assert.Equal((IntPtr)pSpan, ((IntPtr)newMemoryHandle.Pointer));
                     }
@@ -112,6 +114,7 @@ public static void MemoryPoolPinOffsetAtEnd()
             {
                 unsafe
                 {
+                    // Unsafe.AsPointer is safe here since it's pinned
                     void* pSpan = Unsafe.AsPointer(ref MemoryMarshal.GetReference(sp.Slice(elementIndex)));
                     Assert.Equal((IntPtr)pSpan, ((IntPtr)newMemoryHandle.Pointer));
                 }
@@ -219,11 +222,7 @@ public static void MemoryPoolTryGetArray()
                 unsafe
                 {
                     Assert.True(MemoryMarshal.TryGetArray(memory, out arraySegment));
-                    fixed (int* pArray = arraySegment.Array)
-                    {
-                        void* pSpan = Unsafe.AsPointer(ref MemoryMarshal.GetReference(memory.Span));
-                        Assert.Equal((IntPtr)pSpan, (IntPtr)pArray);
-                    }
+                    Assert.Equal(0, Unsafe.ByteOffset(ref MemoryMarshal.GetArrayDataReference(arraySegment.Array), ref MemoryMarshal.GetReference(memory.Span)));
                 }
             }
         }
diff --git a/src/libraries/System.Memory/tests/ReadOnlySpan/AsSpan.cs b/src/libraries/System.Memory/tests/ReadOnlySpan/AsSpan.cs
index d6070d47c3ba..83387f22a2d2 100644
--- a/src/libraries/System.Memory/tests/ReadOnlySpan/AsSpan.cs
+++ b/src/libraries/System.Memory/tests/ReadOnlySpan/AsSpan.cs
@@ -91,6 +91,7 @@ static unsafe void Validate(string text, int start, int length, ReadOnlySpan<cha
                 Assert.Equal(length, span.Length);
                 fixed (char* pText = text)
                 {
+                    // Unsafe.AsPointer is safe here since it's pinned (since text and span should be the same string)
                     char* expected = pText + start;
                     void* actual = Unsafe.AsPointer(ref MemoryMarshal.GetReference(span));
                     Assert.Equal((IntPtr)expected, (IntPtr)actual);
diff --git a/src/libraries/System.Memory/tests/ReadOnlySpan/CastUp.cs b/src/libraries/System.Memory/tests/ReadOnlySpan/CastUp.cs
new file mode 100644
index 000000000000..bde5e90c2203
--- /dev/null
+++ b/src/libraries/System.Memory/tests/ReadOnlySpan/CastUp.cs
@@ -0,0 +1,18 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Xunit;
+
+namespace System.SpanTests
+{
+    public static partial class ReadOnlySpanTests
+    {
+        [Fact]
+        public static void CastUp()
+        {
+            ReadOnlySpan<string> strings = new string[] { "Hello", "World" };
+            ReadOnlySpan<object> span = ReadOnlySpan<object>.CastUp(strings);
+            span.ValidateReferenceType("Hello", "World");
+        }
+    }
+}
diff --git a/src/libraries/System.Memory/tests/Span/SearchValues.cs b/src/libraries/System.Memory/tests/Span/SearchValues.cs
index 9ef91acec80d..f2020279cdf4 100644
--- a/src/libraries/System.Memory/tests/Span/SearchValues.cs
+++ b/src/libraries/System.Memory/tests/Span/SearchValues.cs
@@ -44,6 +44,10 @@ public static IEnumerable<object[]> Values_MemberData()
                 "aaa",
                 "aaaa",
                 "aaaaa",
+                "Aa",
+                "AaBb",
+                "AaBbCc",
+                "[]{}",
                 "\uFFF0",
                 "\uFFF0\uFFF2",
                 "\uFFF0\uFFF2\uFFF4",
diff --git a/src/libraries/System.Memory/tests/Span/StringSearchValues.cs b/src/libraries/System.Memory/tests/Span/StringSearchValues.cs
index 5d1a51bde210..c0b80b5ab067 100644
--- a/src/libraries/System.Memory/tests/Span/StringSearchValues.cs
+++ b/src/libraries/System.Memory/tests/Span/StringSearchValues.cs
@@ -43,15 +43,18 @@ public static void Values_ImplementsSearchValuesBase(StringComparison comparison
 
             foreach (string value in values)
             {
+                Assert.True(stringValues.Contains(value));
+
                 string differentCase = value.ToLowerInvariant();
                 if (value == differentCase)
                 {
                     differentCase = value.ToUpperInvariant();
-                    Assert.NotEqual(value, differentCase);
                 }
 
-                Assert.True(stringValues.Contains(value));
-                Assert.Equal(comparisonType == StringComparison.OrdinalIgnoreCase, stringValues.Contains(differentCase));
+                if (value != differentCase)
+                {
+                    Assert.Equal(comparisonType == StringComparison.OrdinalIgnoreCase, stringValues.Contains(differentCase));
+                }
 
                 AssertIndexOfAnyAndFriends(new[] { value }, 0, -1, 0, -1);
                 AssertIndexOfAnyAndFriends(new[] { value, value }, 0, -1, 1, -1);
@@ -73,7 +76,7 @@ public static void Values_ImplementsSearchValuesBase(StringComparison comparison
                     AssertIndexOfAnyAndFriends(new[] { ValueNotInSet, differentCase, ValueNotInSet }, 1, 0, 1, 2);
                     AssertIndexOfAnyAndFriends(new[] { differentCase, ValueNotInSet, differentCase }, 0, 1, 2, 1);
                 }
-                else
+                else if (value != differentCase)
                 {
                     AssertIndexOfAnyAndFriends(new[] { differentCase }, -1, 0, -1, 0);
                     AssertIndexOfAnyAndFriends(new[] { differentCase, differentCase }, -1, 0, -1, 1);
@@ -173,6 +176,122 @@ public static void IndexOfAny(StringComparison comparisonType, int expected, str
 
             Assert.Equal(expected >= 0, text.AsSpan().ContainsAny(stringValues));
             Assert.Equal(expected >= 0, textSpan.ContainsAny(stringValues));
+
+            if (values is null || stringValues.Contains(string.Empty))
+            {
+                // The tests below don't work if an empty string is in the set.
+                return;
+            }
+
+            // The tests below assume none of the values contain these characters.
+            Assert.Equal(-1, IndexOfAnyReferenceImpl(new string('\0', 100), valuesArray, comparisonType));
+            Assert.Equal(-1, IndexOfAnyReferenceImpl(new string('\u00FC', 100), valuesArray, comparisonType));
+
+            string[] valuesWithDifferentCases = valuesArray;
+
+            if (comparisonType == StringComparison.OrdinalIgnoreCase)
+            {
+                valuesWithDifferentCases = valuesArray
+                    .SelectMany(v => new[] { v, v.ToUpperInvariant(), v.ToLowerInvariant() })
+                    .Distinct()
+                    // Invariant conversions may produce values that don't match under ordinal rules. Filter them out.
+                    .Where(v => valuesArray.Any(original => v.Equals(original, StringComparison.OrdinalIgnoreCase)))
+                    .ToArray();
+            }
+
+            // Test cases where the implementation changes based on the haystack length (e.g. swapping from Teddy to Rabin-Karp).
+            for (int haystackLength = 0; haystackLength < 50; haystackLength++)
+            {
+                TestWithPoisonPages(PoisonPagePlacement.Before, haystackLength);
+                TestWithPoisonPages(PoisonPagePlacement.After, haystackLength);
+            }
+
+            void TestWithPoisonPages(PoisonPagePlacement poisonPlacement, int haystackLength)
+            {
+                using BoundedMemory<char> memory = BoundedMemory.Allocate<char>(haystackLength, poisonPlacement);
+                Span<char> haystack = memory.Span;
+
+                char asciiNumberNotInSet = Enumerable.Range('0', 10).Select(c => (char)c)
+                    .First(c => !values.Contains(c));
+
+                char asciiLetterLowerNotInSet;
+                char asciiLetterUpperNotInSet;
+
+                if (comparisonType == StringComparison.Ordinal)
+                {
+                    asciiLetterLowerNotInSet = Enumerable.Range('a', 26).Select(c => (char)c).First(c => !values.Contains(c));
+                    asciiLetterUpperNotInSet = Enumerable.Range('A', 26).Select(c => (char)c).First(c => !values.Contains(c));
+                }
+                else
+                {
+                    asciiLetterLowerNotInSet = Enumerable.Range('a', 26).Select(c => (char)c)
+                        .First(c => !values.AsSpan().ContainsAny(c, char.ToUpperInvariant(c)));
+
+                    asciiLetterUpperNotInSet = Enumerable.Range(0, 26).Select(c => (char)('Z' - c))
+                        .First(c => !values.AsSpan().ContainsAny(c, char.ToLowerInvariant(c)));
+                }
+
+                TestWithDifferentMarkerChars(haystack, '\0');
+                TestWithDifferentMarkerChars(haystack, '\u00FC');
+                TestWithDifferentMarkerChars(haystack, asciiNumberNotInSet);
+                TestWithDifferentMarkerChars(haystack, asciiLetterLowerNotInSet);
+                TestWithDifferentMarkerChars(haystack, asciiLetterUpperNotInSet);
+            }
+
+            void TestWithDifferentMarkerChars(Span<char> haystack, char marker)
+            {
+                haystack.Fill(marker);
+                Assert.True(haystack.IndexOfAny(stringValues) == -1, marker.ToString());
+
+                string shortestValue = valuesArray.MinBy(value => value.Length);
+
+                // Test every value individually at every offset in the haystack.
+                foreach (string value in valuesWithDifferentCases)
+                {
+                    for (int startOffset = 0; startOffset <= haystack.Length - value.Length; startOffset++)
+                    {
+                        haystack.Fill(marker);
+
+                        // Place an unrelated matching value at the end of the haystack. It shouldn't affect the result.
+                        shortestValue.CopyTo(haystack.Slice(haystack.Length - shortestValue.Length));
+
+                        // Place a matching value at the offset position.
+                        value.CopyTo(haystack.Slice(startOffset));
+
+                        int actual = haystack.IndexOfAny(stringValues);
+                        if (startOffset != actual)
+                        {
+                            StringSearchValuesTestHelper.AssertionFailed(haystack, valuesArray, stringValues, comparisonType, startOffset, actual);
+                        }
+                    }
+                }
+
+                if (text == valuesArray[0])
+                {
+                    // Already tested above.
+                    return;
+                }
+
+                // Test the provided test case at various offsets in the haystack.
+                for (int startOffset = 0; startOffset <= haystack.Length - text.Length; startOffset++)
+                {
+                    haystack.Fill(marker);
+
+                    // Place the test case text at the end of the haystack. It shouldn't affect the result.
+                    text.CopyTo(haystack.Slice(haystack.Length - text.Length));
+
+                    // Place the test text at the offset position.
+                    text.CopyTo(haystack.Slice(startOffset));
+
+                    int expectedAtOffset = expected == -1 ? -1 : startOffset + expected;
+
+                    int actual = haystack.IndexOfAny(stringValues);
+                    if (expectedAtOffset != actual)
+                    {
+                        StringSearchValuesTestHelper.AssertionFailed(haystack, valuesArray, stringValues, comparisonType, expectedAtOffset, actual);
+                    }
+                }
+            }
         }
 
         [Fact]
@@ -197,6 +316,102 @@ public static void IndexOfAny_InvalidUtf16()
             IndexOfAny(StringComparison.OrdinalIgnoreCase, 1, "\uD801\uDCD8\uD8FB\uDCD8", "foo, \uDCD8");
         }
 
+        [Theory]
+        // Single value of various lengths
+        [InlineData("a")]
+        [InlineData("!")]
+        [InlineData("\u00F6")]
+        [InlineData("ab")]
+        [InlineData("a!")]
+        [InlineData("!a")]
+        [InlineData("!%")]
+        [InlineData("a\u00F6")]
+        [InlineData("\u00F6\u00F6")]
+        [InlineData("abc")]
+        [InlineData("ab!")]
+        [InlineData("a\u00F6b")]
+        [InlineData("\u00F6a\u00F6")]
+        [InlineData("abcd")]
+        [InlineData("ab!cd")]
+        [InlineData("abcde")]
+        [InlineData("abcd!")]
+        [InlineData("abcdefgh")]
+        [InlineData("abcdefghi")]
+        // Multiple values, but they all share the same prefix
+        [InlineData("abc", "ab", "abcd")]
+        // These should hit the Aho-Corasick implementation
+        [InlineData("a", "b")]
+        [InlineData("ab", "c")]
+        // Simple Teddy cases
+        [InlineData("abc", "cde")]
+        [InlineData("abc", "cd")]
+        // Teddy where all starting chars are letters, but not all other characters are
+        [InlineData("ab", "de%", "ghi", "jkl!")]
+        [InlineData("abc", "def%", "ghi", "jkl!")]
+        // Teddy where starting chars aren't only letters
+        [InlineData("ab", "d%e", "ghi", "jkl!")]
+        [InlineData("abc", "def%", "ghi", "!jkl")]
+        // Teddy where the starting chars aren't affected by case conversion
+        [InlineData("12", "45b", "789")]
+        [InlineData("123", "456", "789")]
+        [InlineData("123", "456a", "789b")]
+        // We'll expand these values to all case permutations
+        [InlineData("ab", "bc")]
+        [InlineData("ab", "c!")]
+        [InlineData("ab", "c!", "!%")]
+        // These won't be expanded as they would produce more than 8 permutations
+        [InlineData("ab", "bc", "c!")]
+        [InlineData("abc", "bc")]
+        // Rabin-Karp where one of the values is longer than what the implementation can match (17)
+        [InlineData("abc", "a012345678012345678")]
+        // Rabin-Karp where all of the values are longer than what the implementation can match (17)
+        [InlineData("a012345678012345678", "bc012345678012345678")]
+        // Teddy with exactly 8 values (filling all 8 buckets)
+        [InlineData("ab", "bc", "def", "ghi", "jkl", "mno", "pqr", "stu")]
+        [InlineData("abc", "def", "ghi", "jkl", "mno", "pqr", "stu", "vwx")]
+        // Teddy with more than 8 values
+        [InlineData("ab", "bc", "def", "ghi", "jkl", "mno", "pqr", "stu", "vwx")]
+        [InlineData("abc", "def", "ghi", "jkl", "mno", "pqr", "stu", "vwx", "yab")]
+        public static void SimpleIndexOfAnyValues(params string[] valuesArray)
+        {
+            TestCore(valuesArray);
+
+            // Test cases where the implementation differs for ASCII letters, different cases, non-letters.
+            if (valuesArray.Any(v => v.Contains('a')))
+            {
+                TestCore(valuesArray.Select(v => v.Replace('a', 'A')).ToArray());
+                TestCore(valuesArray.Select(v => v.Replace('a', '7')).ToArray());
+            }
+
+            int offset = valuesArray.Length / 2;
+            string original = valuesArray[offset];
+
+            // Test non-ASCII values
+            valuesArray[offset] = $"{original}\u00F6";
+            TestCore(valuesArray);
+
+            valuesArray[offset] = $"\u00F6{original}";
+            TestCore(valuesArray);
+
+            valuesArray[offset] = $"{original[0]}\u00F6{original.AsSpan(1)}";
+            TestCore(valuesArray);
+
+            // Test null chars in values
+            valuesArray[offset] = $"{original[0]}\0{original.AsSpan(1)}";
+            TestCore(valuesArray);
+
+            static void TestCore(string[] valuesArray)
+            {
+                Values_ImplementsSearchValuesBase(StringComparison.Ordinal, valuesArray);
+                Values_ImplementsSearchValuesBase(StringComparison.OrdinalIgnoreCase, valuesArray);
+
+                string values = string.Join(", ", valuesArray);
+
+                IndexOfAny(StringComparison.Ordinal, 0, valuesArray[0], values);
+                IndexOfAny(StringComparison.OrdinalIgnoreCase, 0, valuesArray[0], values);
+            }
+        }
+
         [Fact]
         [SkipOnPlatform(TestPlatforms.LinuxBionic, "Remote executor has problems with exit codes")]
         public static void IndexOfAny_CanProduceDifferentResultsUnderNls()
@@ -495,7 +710,7 @@ private static ReadOnlySpan<T> GetRandomSlice<T>(Random rng, ReadOnlySpan<T> spa
                 return slice.Slice(0, Math.Min(slice.Length, rng.Next(maxLength + 1)));
             }
 
-            private static void AssertionFailed(ReadOnlySpan<char> haystack, string[] needle, SearchValues<string> searchValues, StringComparison comparisonType, int expected, int actual)
+            public static void AssertionFailed(ReadOnlySpan<char> haystack, string[] needle, SearchValues<string> searchValues, StringComparison comparisonType, int expected, int actual)
             {
                 Type implType = searchValues.GetType();
                 string impl = $"{implType.Name} [{string.Join(", ", implType.GenericTypeArguments.Select(t => t.Name))}]";
diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj
index ba793f07c8f5..843d5e1b479c 100644
--- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj
+++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj
@@ -24,6 +24,7 @@
     <Compile Include="ParsersAndFormatters\Parser\RealParserTests.cs" />
     <Compile Include="ReadOnlySpan\Contains.byte.cs" />
     <Compile Include="ReadOnlySpan\Contains.T.cs" />
+    <Compile Include="ReadOnlySpan\CastUp.cs" />
     <Compile Include="Span\StringSearchValues.cs" />
     <Compile Include="Span\Reflection.cs" />
     <Compile Include="SequenceReader\Advance.cs" />
diff --git a/src/libraries/System.Memory/tests/TestHelpers.cs b/src/libraries/System.Memory/tests/TestHelpers.cs
index 54d314355f05..5fac5143a3b8 100644
--- a/src/libraries/System.Memory/tests/TestHelpers.cs
+++ b/src/libraries/System.Memory/tests/TestHelpers.cs
@@ -37,7 +37,7 @@ public static unsafe void ValidateNonNullEmpty<T>(this Span<T> span)
             Assert.True(span.IsEmpty);
 
             // Validate that empty Span is not normalized to null
-            Assert.True(Unsafe.AsPointer(ref MemoryMarshal.GetReference(span)) != null);
+            Assert.False(Unsafe.IsNullRef(ref MemoryMarshal.GetReference(span)));
         }
 
         public delegate void AssertThrowsAction<T>(Span<T> span);
@@ -98,7 +98,7 @@ public static unsafe void ValidateNonNullEmpty<T>(this ReadOnlySpan<T> span)
             Assert.True(span.IsEmpty);
 
             // Validate that empty Span is not normalized to null
-            Assert.True(Unsafe.AsPointer(ref MemoryMarshal.GetReference(span)) != null);
+            Assert.False(Unsafe.IsNullRef(ref MemoryMarshal.GetReference(span)));
         }
 
         public delegate void AssertThrowsActionReadOnly<T>(ReadOnlySpan<T> span);
diff --git a/src/libraries/System.Net.Http.WinHttpHandler/tests/FunctionalTests/WinHttpHandlerTest.cs b/src/libraries/System.Net.Http.WinHttpHandler/tests/FunctionalTests/WinHttpHandlerTest.cs
index f204e21536be..358344d6708a 100644
--- a/src/libraries/System.Net.Http.WinHttpHandler/tests/FunctionalTests/WinHttpHandlerTest.cs
+++ b/src/libraries/System.Net.Http.WinHttpHandler/tests/FunctionalTests/WinHttpHandlerTest.cs
@@ -98,7 +98,7 @@ public async Task SendAsync_SlowServerAndCancel_ThrowsTaskCanceledException()
 
         [OuterLoop]
         [Fact]
-        public async void SendAsync_SlowServerRespondsAfterDefaultReceiveTimeout_ThrowsHttpRequestException()
+        public async Task SendAsync_SlowServerRespondsAfterDefaultReceiveTimeout_ThrowsHttpRequestException()
         {
             var handler = new WinHttpHandler();
             using (var client = new HttpClient(handler))
@@ -122,6 +122,7 @@ await LoopbackServer.CreateServerAsync(async (server, url) =>
                         await triggerRequestWait.Task;
                         var _ = await t;
                     });
+                    _output.WriteLine($"ex: {ex}");
                     Assert.IsType<IOException>(ex.InnerException);
                     Assert.NotNull(ex.InnerException.InnerException);
                     Assert.Contains("The operation timed out", ex.InnerException.InnerException.Message);
diff --git a/src/libraries/System.Net.Http/src/System.Net.Http.csproj b/src/libraries/System.Net.Http/src/System.Net.Http.csproj
index 729f78dd752b..26e14365f292 100644
--- a/src/libraries/System.Net.Http/src/System.Net.Http.csproj
+++ b/src/libraries/System.Net.Http/src/System.Net.Http.csproj
@@ -169,7 +169,7 @@
   <!-- SocketsHttpHandler implementation -->
   <ItemGroup Condition="'$(TargetPlatformIdentifier)' != '' and '$(TargetPlatformIdentifier)' != 'browser'">
     <Compile Include="System\Net\Http\HttpHandlerDefaults.cs" />
-    <Compile Include="System\Net\Http\HttpMethod.Http3.cs" />
+    <Compile Include="System\Net\Http\HttpMethod.SocketsHttpHandler.cs" />
     <Compile Include="System\Net\Http\Headers\AltSvcHeaderParser.cs" />
     <Compile Include="System\Net\Http\Headers\AltSvcHeaderValue.cs" />
     <Compile Include="System\Net\Http\Headers\KnownHeader.Http2And3.cs" />
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/BrowserHttpHandler/BrowserHttpHandler.cs b/src/libraries/System.Net.Http/src/System/Net/Http/BrowserHttpHandler/BrowserHttpHandler.cs
index bbcd625d036d..050913e6f868 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/BrowserHttpHandler/BrowserHttpHandler.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/BrowserHttpHandler/BrowserHttpHandler.cs
@@ -511,7 +511,6 @@ public BrowserHttpReadStream(BrowserHttpController controller)
 
         public override async ValueTask<int> ReadAsync(Memory<byte> buffer, CancellationToken cancellationToken)
         {
-            ArgumentNullException.ThrowIfNull(buffer, nameof(buffer));
             _controller.ThrowIfDisposed();
 
             MemoryHandle pinBuffer = buffer.Pin();
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/BrowserHttpHandler/BrowserHttpInterop.cs b/src/libraries/System.Net.Http/src/System/Net/Http/BrowserHttpHandler/BrowserHttpInterop.cs
index eee79765c246..c003cb7615e2 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/BrowserHttpHandler/BrowserHttpInterop.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/BrowserHttpHandler/BrowserHttpInterop.cs
@@ -147,7 +147,7 @@ public static async Task CancellationHelper(Task promise, CancellationToken canc
                     }
                 }, (promise, jsController)))
                 {
-                    await promise.ConfigureAwait(true);
+                    await promise.ConfigureAwait(false);
                 }
             }
             catch (OperationCanceledException oce) when (cancellationToken.IsCancellationRequested)
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/FormUrlEncodedContent.cs b/src/libraries/System.Net.Http/src/System/Net/Http/FormUrlEncodedContent.cs
index b2c6a3d9d780..f13a22f170fe 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/FormUrlEncodedContent.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/FormUrlEncodedContent.cs
@@ -28,7 +28,8 @@ private static byte[] GetContentByteArray(IEnumerable<KeyValuePair<string?, stri
             ArgumentNullException.ThrowIfNull(nameValueCollection);
 
             // Encode and concatenate data
-            StringBuilder builder = new StringBuilder();
+            var builder = new ValueStringBuilder(stackalloc char[256]);
+
             foreach (KeyValuePair<string?, string?> pair in nameValueCollection)
             {
                 if (builder.Length > 0)
@@ -36,22 +37,53 @@ private static byte[] GetContentByteArray(IEnumerable<KeyValuePair<string?, stri
                     builder.Append('&');
                 }
 
-                builder.Append(Encode(pair.Key));
+                Encode(ref builder, pair.Key);
                 builder.Append('=');
-                builder.Append(Encode(pair.Value));
+                Encode(ref builder, pair.Value);
             }
 
-            return HttpRuleParser.DefaultHttpEncoding.GetBytes(builder.ToString());
+            // EscapeDataString will always return an ASCII string and DefaultHttpEncoding is Latin1,
+            // so we know the output byte size will be the same as the builder length.
+            byte[] bytes = new byte[builder.Length];
+            HttpRuleParser.DefaultHttpEncoding.GetBytes(builder.AsSpan(), bytes);
+            builder.Dispose();
+            return bytes;
         }
 
-        private static string Encode(string? data)
+        private static void Encode(ref ValueStringBuilder builder, string? data)
         {
-            if (string.IsNullOrEmpty(data))
+            if (!string.IsNullOrEmpty(data))
             {
-                return string.Empty;
+                int charsWritten;
+                while (!Uri.TryEscapeDataString(data, builder.RawChars.Slice(builder.Length), out charsWritten))
+                {
+                    builder.EnsureCapacity(builder.Capacity + 1);
+                }
+
+                // Escape spaces as '+'.
+                if (data.Contains(' '))
+                {
+                    ReadOnlySpan<char> escapedChars = builder.RawChars.Slice(builder.Length, charsWritten);
+
+                    while (true)
+                    {
+                        int indexOfEscapedSpace = escapedChars.IndexOf("%20", StringComparison.Ordinal);
+                        if (indexOfEscapedSpace < 0)
+                        {
+                            builder.Append(escapedChars);
+                            break;
+                        }
+
+                        builder.Append(escapedChars.Slice(0, indexOfEscapedSpace));
+                        builder.Append('+');
+                        escapedChars = escapedChars.Slice(indexOfEscapedSpace + 3); // Skip "%20"
+                    }
+                }
+                else
+                {
+                    builder.Length += charsWritten;
+                }
             }
-            // Escape spaces as '+'.
-            return Uri.EscapeDataString(data).Replace("%20", "+");
         }
 
         protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context, CancellationToken cancellationToken) =>
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderDescriptor.cs b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderDescriptor.cs
index fac5d58bf282..a4d44b2a3071 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderDescriptor.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderDescriptor.cs
@@ -277,5 +277,9 @@ private static bool TryDecodeUtf8(ReadOnlySpan<byte> input, [NotNullWhen(true)]
             decoded = null;
             return false;
         }
+
+        public string Separator => Parser is { } parser ? parser.Separator : HttpHeaderParser.DefaultSeparator;
+
+        public byte[] SeparatorBytes => Parser is { } parser ? parser.SeparatorBytes : HttpHeaderParser.DefaultSeparatorBytes;
     }
 }
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderStringValues.cs b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderStringValues.cs
index a313a2306e78..6b5f4d2a666a 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderStringValues.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderStringValues.cs
@@ -45,7 +45,7 @@ internal HeaderStringValues(HeaderDescriptor descriptor, string[] values)
         public override string ToString() => _value switch
         {
             string value => value,
-            string[] values => string.Join(_header.Parser is HttpHeaderParser parser && parser.SupportsMultipleValues ? parser.Separator : HttpHeaderParser.DefaultSeparator, values),
+            string[] values => string.Join(_header.Separator, values),
             _ => string.Empty,
         };
 
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HttpHeaderParser.cs b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HttpHeaderParser.cs
index 2fa79f1f4181..711e37cf1469 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HttpHeaderParser.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HttpHeaderParser.cs
@@ -4,53 +4,42 @@
 using System.Collections;
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
+using System.Text;
 
 namespace System.Net.Http.Headers
 {
     internal abstract class HttpHeaderParser
     {
-        internal const string DefaultSeparator = ", ";
+        public const string DefaultSeparator = ", ";
+        public static readonly byte[] DefaultSeparatorBytes = ", "u8.ToArray();
 
-        private readonly bool _supportsMultipleValues;
-        private readonly string? _separator;
+        public bool SupportsMultipleValues { get; private set; }
 
-        public bool SupportsMultipleValues
-        {
-            get { return _supportsMultipleValues; }
-        }
+        public string Separator { get; private set; }
 
-        public string? Separator
-        {
-            get
-            {
-                Debug.Assert(_supportsMultipleValues);
-                return _separator;
-            }
-        }
+        public byte[] SeparatorBytes { get; private set; }
 
         // If ValueType implements Equals() as required, there is no need to provide a comparer. A comparer is needed
         // e.g. if we want to compare strings using case-insensitive comparison.
-        public virtual IEqualityComparer? Comparer
-        {
-            get { return null; }
-        }
+        public virtual IEqualityComparer? Comparer => null;
 
         protected HttpHeaderParser(bool supportsMultipleValues)
         {
-            _supportsMultipleValues = supportsMultipleValues;
-
-            if (supportsMultipleValues)
-            {
-                _separator = DefaultSeparator;
-            }
+            SupportsMultipleValues = supportsMultipleValues;
+            Separator = DefaultSeparator;
+            SeparatorBytes = DefaultSeparatorBytes;
         }
 
-        protected HttpHeaderParser(bool supportsMultipleValues, string separator)
+        protected HttpHeaderParser(bool supportsMultipleValues, string separator) : this(supportsMultipleValues)
         {
             Debug.Assert(!string.IsNullOrEmpty(separator));
+            Debug.Assert(Ascii.IsValid(separator));
 
-            _supportsMultipleValues = supportsMultipleValues;
-            _separator = separator;
+            if (supportsMultipleValues)
+            {
+                Separator = separator;
+                SeparatorBytes = Encoding.ASCII.GetBytes(separator);
+            }
         }
 
         // If a parser supports multiple values, a call to ParseValue/TryParseValue should return a value for 'index'
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HttpHeaders.cs b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HttpHeaders.cs
index cf79171bda6c..56015f488aaa 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HttpHeaders.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HttpHeaders.cs
@@ -258,7 +258,7 @@ public override string ToString()
                 {
                     // Note that if we get multiple values for a header that doesn't support multiple values, we'll
                     // just separate the values using a comma (default separator).
-                    string? separator = entry.Key.Parser is HttpHeaderParser parser && parser.SupportsMultipleValues ? parser.Separator : HttpHeaderParser.DefaultSeparator;
+                    string separator = entry.Key.Separator;
 
                     Debug.Assert(multiValue is not null && multiValue.Length > 0);
                     vsb.Append(multiValue[0]);
@@ -289,8 +289,7 @@ internal string GetHeaderString(HeaderDescriptor descriptor)
 
                 // Note that if we get multiple values for a header that doesn't support multiple values, we'll
                 // just separate the values using a comma (default separator).
-                string? separator = descriptor.Parser != null && descriptor.Parser.SupportsMultipleValues ? descriptor.Parser.Separator : HttpHeaderParser.DefaultSeparator;
-                return string.Join(separator, multiValue!);
+                return string.Join(descriptor.Separator, multiValue!);
             }
 
             return string.Empty;
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/HttpMethod.Http3.cs b/src/libraries/System.Net.Http/src/System/Net/Http/HttpMethod.Http3.cs
deleted file mode 100644
index c8580d139456..000000000000
--- a/src/libraries/System.Net.Http/src/System/Net/Http/HttpMethod.Http3.cs
+++ /dev/null
@@ -1,29 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Net.Http.QPack;
-using System.Threading;
-
-namespace System.Net.Http
-{
-    public partial class HttpMethod
-    {
-        private byte[]? _http3EncodedBytes;
-
-        internal byte[] Http3EncodedBytes
-        {
-            get
-            {
-                byte[]? http3EncodedBytes = Volatile.Read(ref _http3EncodedBytes);
-                if (http3EncodedBytes is null)
-                {
-                    Volatile.Write(ref _http3EncodedBytes, http3EncodedBytes = _http3Index is int index && index >= 0 ?
-                        QPackEncoder.EncodeStaticIndexedHeaderFieldToArray(index) :
-                        QPackEncoder.EncodeLiteralHeaderFieldWithStaticNameReferenceToArray(H3StaticTable.MethodGet, _method));
-                }
-
-                return http3EncodedBytes;
-            }
-        }
-    }
-}
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/HttpMethod.SocketsHttpHandler.cs b/src/libraries/System.Net.Http/src/System/Net/Http/HttpMethod.SocketsHttpHandler.cs
new file mode 100644
index 000000000000..c833d1e4da71
--- /dev/null
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/HttpMethod.SocketsHttpHandler.cs
@@ -0,0 +1,118 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Net.Http.HPack;
+using System.Net.Http.QPack;
+using System.Text;
+
+namespace System.Net.Http
+{
+    public partial class HttpMethod
+    {
+        private byte[]? _http1EncodedBytes;
+        private byte[]? _http2EncodedBytes;
+        private byte[]? _http3EncodedBytes;
+        private int _http3Index;
+
+        internal bool MustHaveRequestBody { get; private set; }
+        internal bool IsConnect { get; private set; }
+        internal bool IsHead { get; private set; }
+
+        partial void Initialize(string method)
+        {
+            Initialize(GetKnownMethod(method)?._http3Index ?? 0);
+        }
+
+        partial void Initialize(int http3Index)
+        {
+            _http3Index = http3Index;
+
+            if (http3Index == H3StaticTable.MethodConnect)
+            {
+                IsConnect = true;
+            }
+            else if (http3Index == H3StaticTable.MethodHead)
+            {
+                IsHead = true;
+            }
+            else
+            {
+                MustHaveRequestBody = http3Index is not (H3StaticTable.MethodGet or H3StaticTable.MethodOptions or H3StaticTable.MethodDelete);
+            }
+        }
+
+        internal byte[] Http1EncodedBytes => _http1EncodedBytes ?? CreateHttp1EncodedBytes();
+        internal byte[] Http2EncodedBytes => _http2EncodedBytes ?? CreateHttp2EncodedBytes();
+        internal byte[] Http3EncodedBytes => _http3EncodedBytes ?? CreateHttp3EncodedBytes();
+
+        private byte[] CreateHttp1EncodedBytes()
+        {
+            HttpMethod? knownMethod = GetKnownMethod(Method);
+            byte[]? bytes = knownMethod?._http1EncodedBytes;
+
+            if (bytes is null)
+            {
+                Debug.Assert(Ascii.IsValid(Method));
+
+                string method = knownMethod?.Method ?? Method;
+                bytes = new byte[method.Length + 1];
+                Ascii.FromUtf16(method, bytes, out _);
+                bytes[^1] = (byte)' ';
+
+                if (knownMethod is not null)
+                {
+                    knownMethod._http1EncodedBytes = bytes;
+                }
+            }
+
+            _http1EncodedBytes = bytes;
+            return bytes;
+        }
+
+        private byte[] CreateHttp2EncodedBytes()
+        {
+            HttpMethod? knownMethod = GetKnownMethod(Method);
+            byte[]? bytes = knownMethod?._http2EncodedBytes;
+
+            if (bytes is null)
+            {
+                bytes = _http3Index switch
+                {
+                    H3StaticTable.MethodGet => [0x80 | H2StaticTable.MethodGet],
+                    H3StaticTable.MethodPost => [0x80 | H2StaticTable.MethodPost],
+                    _ => HPackEncoder.EncodeLiteralHeaderFieldWithoutIndexingToAllocatedArray(H2StaticTable.MethodGet, knownMethod?.Method ?? Method)
+                };
+
+                if (knownMethod is not null)
+                {
+                    knownMethod._http2EncodedBytes = bytes;
+                }
+            }
+
+            _http2EncodedBytes = bytes;
+            return bytes;
+        }
+
+        private byte[] CreateHttp3EncodedBytes()
+        {
+            HttpMethod? knownMethod = GetKnownMethod(Method);
+            byte[]? bytes = knownMethod?._http3EncodedBytes;
+
+            if (bytes is null)
+            {
+                bytes = _http3Index > 0
+                    ? QPackEncoder.EncodeStaticIndexedHeaderFieldToArray(_http3Index)
+                    : QPackEncoder.EncodeLiteralHeaderFieldWithStaticNameReferenceToArray(H3StaticTable.MethodGet, knownMethod?.Method ?? Method);
+
+                if (knownMethod is not null)
+                {
+                    knownMethod._http3EncodedBytes = bytes;
+                }
+            }
+
+            _http3EncodedBytes = bytes;
+            return bytes;
+        }
+    }
+}
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/HttpMethod.cs b/src/libraries/System.Net.Http/src/System/Net/Http/HttpMethod.cs
index e8ed93150167..fb3c7fe5ea43 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/HttpMethod.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/HttpMethod.cs
@@ -1,7 +1,6 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Net.Http.QPack;
 
@@ -10,71 +9,22 @@ namespace System.Net.Http
     public partial class HttpMethod : IEquatable<HttpMethod>
     {
         private readonly string _method;
-        private readonly int? _http3Index;
-
         private int _hashcode;
 
-        private static readonly HttpMethod s_getMethod = new HttpMethod("GET", http3StaticTableIndex: H3StaticTable.MethodGet);
-        private static readonly HttpMethod s_putMethod = new HttpMethod("PUT", http3StaticTableIndex: H3StaticTable.MethodPut);
-        private static readonly HttpMethod s_postMethod = new HttpMethod("POST", http3StaticTableIndex: H3StaticTable.MethodPost);
-        private static readonly HttpMethod s_deleteMethod = new HttpMethod("DELETE", http3StaticTableIndex: H3StaticTable.MethodDelete);
-        private static readonly HttpMethod s_headMethod = new HttpMethod("HEAD", http3StaticTableIndex: H3StaticTable.MethodHead);
-        private static readonly HttpMethod s_optionsMethod = new HttpMethod("OPTIONS", http3StaticTableIndex: H3StaticTable.MethodOptions);
-        private static readonly HttpMethod s_traceMethod = new HttpMethod("TRACE", -1);
-        private static readonly HttpMethod s_patchMethod = new HttpMethod("PATCH", -1);
-        private static readonly HttpMethod s_connectMethod = new HttpMethod("CONNECT", http3StaticTableIndex: H3StaticTable.MethodConnect);
-
-        public static HttpMethod Get
-        {
-            get { return s_getMethod; }
-        }
-
-        public static HttpMethod Put
-        {
-            get { return s_putMethod; }
-        }
-
-        public static HttpMethod Post
-        {
-            get { return s_postMethod; }
-        }
-
-        public static HttpMethod Delete
-        {
-            get { return s_deleteMethod; }
-        }
-
-        public static HttpMethod Head
-        {
-            get { return s_headMethod; }
-        }
-
-        public static HttpMethod Options
-        {
-            get { return s_optionsMethod; }
-        }
-
-        public static HttpMethod Trace
-        {
-            get { return s_traceMethod; }
-        }
-
-        public static HttpMethod Patch
-        {
-            get { return s_patchMethod; }
-        }
+        public static HttpMethod Get { get; } = new("GET", H3StaticTable.MethodGet);
+        public static HttpMethod Put { get; } = new("PUT", H3StaticTable.MethodPut);
+        public static HttpMethod Post { get; } = new("POST", H3StaticTable.MethodPost);
+        public static HttpMethod Delete { get; } = new("DELETE", H3StaticTable.MethodDelete);
+        public static HttpMethod Head { get; } = new("HEAD", H3StaticTable.MethodHead);
+        public static HttpMethod Options { get; } = new("OPTIONS", H3StaticTable.MethodOptions);
+        public static HttpMethod Trace { get; } = new("TRACE", http3StaticTableIndex: -1);
+        public static HttpMethod Patch { get; } = new("PATCH", http3StaticTableIndex: -1);
 
         /// <summary>Gets the HTTP CONNECT protocol method.</summary>
         /// <value>The HTTP CONNECT method.</value>
-        public static HttpMethod Connect
-        {
-            get { return s_connectMethod; }
-        }
+        public static HttpMethod Connect { get; } = new("CONNECT", H3StaticTable.MethodConnect);
 
-        public string Method
-        {
-            get { return _method; }
-        }
+        public string Method => _method;
 
         public HttpMethod(string method)
         {
@@ -85,39 +35,26 @@ public HttpMethod(string method)
             }
 
             _method = method;
+            Initialize(method);
         }
 
         private HttpMethod(string method, int http3StaticTableIndex)
         {
             _method = method;
-            _http3Index = http3StaticTableIndex;
+            Initialize(http3StaticTableIndex);
         }
 
-        #region IEquatable<HttpMethod> Members
-
-        public bool Equals([NotNullWhen(true)] HttpMethod? other)
-        {
-            if (other is null)
-            {
-                return false;
-            }
-
-            if (object.ReferenceEquals(_method, other._method))
-            {
-                // Strings are static, so there is a good chance that two equal methods use the same reference
-                // (unless they differ in case).
-                return true;
-            }
-
-            return string.Equals(_method, other._method, StringComparison.OrdinalIgnoreCase);
-        }
+        // SocketsHttpHandler-specific implementation has extra init logic.
+        partial void Initialize(int http3Index);
+        partial void Initialize(string method);
 
-        #endregion
+        public bool Equals([NotNullWhen(true)] HttpMethod? other) =>
+            other is not null &&
+            string.Equals(_method, other._method, StringComparison.OrdinalIgnoreCase);
 
-        public override bool Equals([NotNullWhen(true)] object? obj)
-        {
-            return Equals(obj as HttpMethod);
-        }
+        public override bool Equals([NotNullWhen(true)] object? obj) =>
+            obj is HttpMethod method &&
+            Equals(method);
 
         public override int GetHashCode()
         {
@@ -129,22 +66,15 @@ public override int GetHashCode()
             return _hashcode;
         }
 
-        public override string ToString()
-        {
-            return _method;
-        }
+        public override string ToString() => _method;
 
-        public static bool operator ==(HttpMethod? left, HttpMethod? right)
-        {
-            return left is null || right is null ?
-                ReferenceEquals(left, right) :
-                left.Equals(right);
-        }
+        public static bool operator ==(HttpMethod? left, HttpMethod? right) =>
+            left is null || right is null
+                ? ReferenceEquals(left, right)
+                : left.Equals(right);
 
-        public static bool operator !=(HttpMethod? left, HttpMethod? right)
-        {
-            return !(left == right);
-        }
+        public static bool operator !=(HttpMethod? left, HttpMethod? right) =>
+            !(left == right);
 
         /// <summary>Parses the provided <paramref name="method"/> into an <see cref="HttpMethod"/> instance.</summary>
         /// <param name="method">The method to parse.</param>
@@ -159,41 +89,24 @@ public static HttpMethod Parse(ReadOnlySpan<char> method) =>
             GetKnownMethod(method) ??
             new HttpMethod(method.ToString());
 
-        /// <summary>
-        /// Returns a singleton method instance with a capitalized method name for the supplied method
-        /// if it's known; otherwise, returns the original.
-        /// </summary>
-        internal static HttpMethod Normalize(HttpMethod method)
-        {
-            Debug.Assert(method != null);
-            Debug.Assert(!string.IsNullOrEmpty(method._method));
-
-            // _http3Index is only set for the singleton instances, so if it's not null,
-            // we can avoid the lookup.  Otherwise, look up the method instance and return the
-            // normalized instance if it's found.
-            return method._http3Index is null && GetKnownMethod(method._method) is HttpMethod match ?
-                match :
-                method;
-        }
-
         internal static HttpMethod? GetKnownMethod(ReadOnlySpan<char> method)
         {
             if (method.Length >= 3) // 3 == smallest known method
             {
                 HttpMethod? match = (method[0] | 0x20) switch
                 {
-                    'c' => s_connectMethod,
-                    'd' => s_deleteMethod,
-                    'g' => s_getMethod,
-                    'h' => s_headMethod,
-                    'o' => s_optionsMethod,
+                    'c' => Connect,
+                    'd' => Delete,
+                    'g' => Get,
+                    'h' => Head,
+                    'o' => Options,
                     'p' => method.Length switch
                     {
-                        3 => s_putMethod,
-                        4 => s_postMethod,
-                        _ => s_patchMethod,
+                        3 => Put,
+                        4 => Post,
+                        _ => Patch,
                     },
-                    't' => s_traceMethod,
+                    't' => Trace,
                     _ => null,
                 };
 
@@ -206,17 +119,5 @@ internal static HttpMethod Normalize(HttpMethod method)
 
             return null;
         }
-
-        internal bool MustHaveRequestBody
-        {
-            get
-            {
-                // Normalize before calling this
-                Debug.Assert(ReferenceEquals(this, Normalize(this)));
-
-                return !ReferenceEquals(this, HttpMethod.Get) && !ReferenceEquals(this, HttpMethod.Head) && !ReferenceEquals(this, HttpMethod.Connect) &&
-                       !ReferenceEquals(this, HttpMethod.Options) && !ReferenceEquals(this, HttpMethod.Delete);
-            }
-        }
     }
 }
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/HttpTelemetry.cs b/src/libraries/System.Net.Http/src/System/Net/Http/HttpTelemetry.cs
index 9989bc4568e6..3dfc789919b3 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/HttpTelemetry.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/HttpTelemetry.cs
@@ -339,13 +339,13 @@ private unsafe void WriteEvent(int eventId, byte arg1, byte arg2, long arg3, str
             arg5 ??= "";
             arg7 ??= "";
 
-            const int NumEventDatas = 7;
-            EventData* descrs = stackalloc EventData[NumEventDatas];
-
             fixed (char* arg4Ptr = arg4)
             fixed (char* arg5Ptr = arg5)
             fixed (char* arg7Ptr = arg7)
             {
+                const int NumEventDatas = 7;
+                EventData* descrs = stackalloc EventData[NumEventDatas];
+
                 descrs[0] = new EventData
                 {
                     DataPointer = (IntPtr)(&arg1),
@@ -381,9 +381,9 @@ private unsafe void WriteEvent(int eventId, byte arg1, byte arg2, long arg3, str
                     DataPointer = (IntPtr)arg7Ptr,
                     Size = (arg7.Length + 1) * sizeof(char)
                 };
-            }
 
-            WriteEventCore(eventId, NumEventDatas, descrs);
+                WriteEventCore(eventId, NumEventDatas, descrs);
+            }
         }
     }
 }
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/DecompressionHandler.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/DecompressionHandler.cs
index 939a29e06f39..1ea55ac07f61 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/DecompressionHandler.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/DecompressionHandler.cs
@@ -78,15 +78,15 @@ internal override async ValueTask<HttpResponseMessage> SendAsync(HttpRequestMess
                     last = encoding;
                 }
 
-                if (GZipEnabled && last == Gzip)
+                if (GZipEnabled && string.Equals(last, Gzip, StringComparison.OrdinalIgnoreCase))
                 {
                     response.Content = new GZipDecompressedContent(response.Content);
                 }
-                else if (DeflateEnabled && last == Deflate)
+                else if (DeflateEnabled && string.Equals(last, Deflate, StringComparison.OrdinalIgnoreCase))
                 {
                     response.Content = new DeflateDecompressedContent(response.Content);
                 }
-                else if (BrotliEnabled && last == Brotli)
+                else if (BrotliEnabled && string.Equals(last, Brotli, StringComparison.OrdinalIgnoreCase))
                 {
                     response.Content = new BrotliDecompressedContent(response.Content);
                 }
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http2Connection.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http2Connection.cs
index ba233513cbf0..e91ccefff630 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http2Connection.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http2Connection.cs
@@ -257,6 +257,7 @@ private void Shutdown()
             if (NetEventSource.Log.IsEnabled()) Trace($"{nameof(_shutdown)}={_shutdown}, {nameof(_abortException)}={_abortException}");
 
             Debug.Assert(Monitor.IsEntered(SyncObject));
+            Debug.Assert(!_pool.HasSyncObjLock);
 
             if (!_shutdown)
             {
@@ -276,6 +277,8 @@ private void Shutdown()
 
         public bool TryReserveStream()
         {
+            Debug.Assert(!_pool.HasSyncObjLock);
+
             lock (SyncObject)
             {
                 if (_shutdown)
@@ -302,6 +305,8 @@ public bool TryReserveStream()
         // Otherwise, will be called when the request is complete and stream is closed.
         public void ReleaseStream()
         {
+            Debug.Assert(!_pool.HasSyncObjLock);
+
             lock (SyncObject)
             {
                 if (NetEventSource.Log.IsEnabled()) Trace($"{nameof(_streamsInUse)}={_streamsInUse}");
@@ -333,6 +338,8 @@ public void ReleaseStream()
         // Returns false to indicate that the connection is shutting down and cannot be used anymore
         public Task<bool> WaitForAvailableStreamsAsync()
         {
+            Debug.Assert(!_pool.HasSyncObjLock);
+
             lock (SyncObject)
             {
                 Debug.Assert(_availableStreamsWaiter is null, "As used currently, shouldn't already have a waiter");
@@ -637,7 +644,7 @@ private async ValueTask ProcessHeadersFrame(FrameHeader frameHeader)
             if (http2Stream != null)
             {
                 http2Stream.OnHeadersStart();
-                _rttEstimator.OnDataOrHeadersReceived(this);
+                _rttEstimator.OnDataOrHeadersReceived(this, sendWindowUpdateBeforePing: true);
                 headersHandler = http2Stream;
             }
             else
@@ -730,6 +737,7 @@ private void ProcessAltSvcFrame(FrameHeader frameHeader)
         {
             if (NetEventSource.Log.IsEnabled()) Trace($"{frameHeader}");
             Debug.Assert(frameHeader.Type == FrameType.AltSvc);
+            Debug.Assert(!Monitor.IsEntered(SyncObject));
 
             ReadOnlySpan<byte> span = _incomingBuffer.ActiveSpan.Slice(0, frameHeader.PayloadLength);
 
@@ -765,22 +773,20 @@ private void ProcessDataFrame(FrameHeader frameHeader)
             // Just ignore the frame in this case.
 
             ReadOnlySpan<byte> frameData = GetFrameData(_incomingBuffer.ActiveSpan.Slice(0, frameHeader.PayloadLength), hasPad: frameHeader.PaddedFlag, hasPriority: false);
+            bool endStream = frameHeader.EndStreamFlag;
 
-            if (http2Stream != null)
+            if (frameData.Length > 0 || endStream)
             {
-                bool endStream = frameHeader.EndStreamFlag;
-
-                http2Stream.OnResponseData(frameData, endStream);
-
-                if (!endStream && frameData.Length > 0)
-                {
-                    _rttEstimator.OnDataOrHeadersReceived(this);
-                }
+                http2Stream?.OnResponseData(frameData, endStream);
             }
 
             if (frameData.Length > 0)
             {
-                ExtendWindow(frameData.Length);
+                bool windowUpdateSent = ExtendWindow(frameData.Length);
+                if (http2Stream is not null && !endStream)
+                {
+                    _rttEstimator.OnDataOrHeadersReceived(this, sendWindowUpdateBeforePing: !windowUpdateSent);
+                }
             }
 
             _incomingBuffer.Discard(frameHeader.PayloadLength);
@@ -1306,6 +1312,8 @@ private Task SendRstStreamAsync(int streamId, Http2ProtocolErrorCode errorCode)
 
         internal void HeartBeat()
         {
+            Debug.Assert(!_pool.HasSyncObjLock);
+
             if (_shutdown)
                 return;
 
@@ -1357,7 +1365,7 @@ private void WriteLiteralHeader(string name, ReadOnlySpan<string> values, Encodi
             if (NetEventSource.Log.IsEnabled()) Trace($"{nameof(name)}={name}, {nameof(values)}={string.Join(", ", values.ToArray())}");
 
             int bytesWritten;
-            while (!HPackEncoder.EncodeLiteralHeaderFieldWithoutIndexingNewName(name, values, HttpHeaderParser.DefaultSeparator, valueEncoding, headerBuffer.AvailableSpan, out bytesWritten))
+            while (!HPackEncoder.EncodeLiteralHeaderFieldWithoutIndexingNewName(name, values, HttpHeaderParser.DefaultSeparatorBytes, valueEncoding, headerBuffer.AvailableSpan, out bytesWritten))
             {
                 headerBuffer.Grow();
             }
@@ -1365,9 +1373,9 @@ private void WriteLiteralHeader(string name, ReadOnlySpan<string> values, Encodi
             headerBuffer.Commit(bytesWritten);
         }
 
-        private void WriteLiteralHeaderValues(ReadOnlySpan<string> values, string? separator, Encoding? valueEncoding, ref ArrayBuffer headerBuffer)
+        private void WriteLiteralHeaderValues(ReadOnlySpan<string> values, byte[]? separator, Encoding? valueEncoding, ref ArrayBuffer headerBuffer)
         {
-            if (NetEventSource.Log.IsEnabled()) Trace($"{nameof(values)}={string.Join(separator, values.ToArray())}");
+            if (NetEventSource.Log.IsEnabled()) Trace($"{nameof(values)}={string.Join(Encoding.ASCII.GetString(separator ?? []), values.ToArray())}");
 
             int bytesWritten;
             while (!HPackEncoder.EncodeStringLiterals(values, separator, valueEncoding, headerBuffer.AvailableSpan, out bytesWritten))
@@ -1446,22 +1454,19 @@ private int WriteHeaderCollection(HttpRequestMessage request, HttpHeaders header
                             continue;
                         }
 
-                        // For all other known headers, send them via their pre-encoded name and the associated value.
-                        WriteBytes(knownHeader.Http2EncodedName, ref headerBuffer);
-                        string? separator = null;
-                        if (headerValues.Length > 1)
+                        // Extended connect requests will use the response content stream for bidirectional communication.
+                        // We will ignore any content set for such requests in Http2Stream.SendRequestBodyAsync, as it has no defined semantics.
+                        // Drop the Content-Length header as well in the unlikely case it was set.
+                        if (knownHeader == KnownHeaders.ContentLength && request.IsExtendedConnectRequest)
                         {
-                            HttpHeaderParser? parser = header.Key.Parser;
-                            if (parser != null && parser.SupportsMultipleValues)
-                            {
-                                separator = parser.Separator;
-                            }
-                            else
-                            {
-                                separator = HttpHeaderParser.DefaultSeparator;
-                            }
+                            continue;
                         }
 
+                        // For all other known headers, send them via their pre-encoded name and the associated value.
+                        WriteBytes(knownHeader.Http2EncodedName, ref headerBuffer);
+
+                        byte[]? separator = headerValues.Length > 1 ? header.Key.SeparatorBytes : null;
+
                         WriteLiteralHeaderValues(headerValues, separator, valueEncoding, ref headerBuffer);
                     }
                 }
@@ -1479,27 +1484,7 @@ private void WriteHeaders(HttpRequestMessage request, ref ArrayBuffer headerBuff
         {
             if (NetEventSource.Log.IsEnabled()) Trace("");
 
-            // HTTP2 does not support Transfer-Encoding: chunked, so disable this on the request.
-            if (request.HasHeaders && request.Headers.TransferEncodingChunked == true)
-            {
-                request.Headers.TransferEncodingChunked = false;
-            }
-
-            HttpMethod normalizedMethod = HttpMethod.Normalize(request.Method);
-
-            // Method is normalized so we can do reference equality here.
-            if (ReferenceEquals(normalizedMethod, HttpMethod.Get))
-            {
-                WriteIndexedHeader(H2StaticTable.MethodGet, ref headerBuffer);
-            }
-            else if (ReferenceEquals(normalizedMethod, HttpMethod.Post))
-            {
-                WriteIndexedHeader(H2StaticTable.MethodPost, ref headerBuffer);
-            }
-            else
-            {
-                WriteIndexedHeader(H2StaticTable.MethodGet, normalizedMethod.Method, ref headerBuffer);
-            }
+            WriteBytes(request.Method.Http2EncodedBytes, ref headerBuffer);
 
             WriteIndexedHeader(_pool.IsSecure ? H2StaticTable.SchemeHttps : H2StaticTable.SchemeHttp, ref headerBuffer);
 
@@ -1527,6 +1512,12 @@ private void WriteHeaders(HttpRequestMessage request, ref ArrayBuffer headerBuff
 
             if (request.HasHeaders)
             {
+                // HTTP2 does not support Transfer-Encoding: chunked, so disable this on the request.
+                if (request.Headers.TransferEncodingChunked == true)
+                {
+                    request.Headers.TransferEncodingChunked = false;
+                }
+
                 if (request.Headers.Protocol is string protocol)
                 {
                     WriteBytes(ProtocolLiteralHeaderBytes, ref headerBuffer);
@@ -1555,7 +1546,7 @@ private void WriteHeaders(HttpRequestMessage request, ref ArrayBuffer headerBuff
             {
                 // Write out Content-Length: 0 header to indicate no body,
                 // unless this is a method that never has a body.
-                if (normalizedMethod.MustHaveRequestBody)
+                if (request.Method.MustHaveRequestBody)
                 {
                     WriteBytes(KnownHeaders.ContentLength.Http2EncodedName, ref headerBuffer);
                     WriteLiteralHeaderValue("0", valueEncoding: null, ref headerBuffer);
@@ -1570,7 +1561,7 @@ private void WriteHeaders(HttpRequestMessage request, ref ArrayBuffer headerBuff
             // The headerListSize is an approximation of the total header length.
             // This is acceptable as long as the value is always >= the actual length.
             // We must avoid ever sending more than the server allowed.
-            // This approach must be revisted if we ever support the dynamic table or compression when sending requests.
+            // This approach must be revisited if we ever support the dynamic table or compression when sending requests.
             headerListSize += headerBuffer.ActiveLength;
 
             uint maxHeaderListSize = _maxHeaderListSize;
@@ -1772,36 +1763,46 @@ private Task SendWindowUpdateAsync(int streamId, int amount)
             });
         }
 
-        private void ExtendWindow(int amount)
+        private bool ExtendWindow(int amount)
         {
             if (NetEventSource.Log.IsEnabled()) Trace($"{nameof(amount)}={amount}");
             Debug.Assert(amount > 0);
+            Debug.Assert(_pendingWindowUpdate < ConnectionWindowThreshold);
 
-            int windowUpdateSize;
-            lock (SyncObject)
+            _pendingWindowUpdate += amount;
+            if (_pendingWindowUpdate < ConnectionWindowThreshold)
             {
-                Debug.Assert(_pendingWindowUpdate < ConnectionWindowThreshold);
-
-                _pendingWindowUpdate += amount;
-                if (_pendingWindowUpdate < ConnectionWindowThreshold)
-                {
-                    if (NetEventSource.Log.IsEnabled()) Trace($"{nameof(_pendingWindowUpdate)} {_pendingWindowUpdate} < {ConnectionWindowThreshold}.");
-                    return;
-                }
-
-                windowUpdateSize = _pendingWindowUpdate;
-                _pendingWindowUpdate = 0;
+                if (NetEventSource.Log.IsEnabled()) Trace($"{nameof(_pendingWindowUpdate)} {_pendingWindowUpdate} < {ConnectionWindowThreshold}.");
+                return false;
             }
 
+            int windowUpdateSize = _pendingWindowUpdate;
+            _pendingWindowUpdate = 0;
+
             LogExceptions(SendWindowUpdateAsync(0, windowUpdateSize));
+            return true;
+        }
+
+        private bool ForceSendConnectionWindowUpdate()
+        {
+            if (NetEventSource.Log.IsEnabled()) Trace($"{nameof(_pendingWindowUpdate)}={_pendingWindowUpdate}");
+            if (_pendingWindowUpdate == 0) return false;
+
+            LogExceptions(SendWindowUpdateAsync(0, _pendingWindowUpdate));
+            _pendingWindowUpdate = 0;
+            return true;
         }
 
         public override long GetIdleTicks(long nowTicks)
         {
-            lock (SyncObject)
-            {
-                return _streamsInUse == 0 ? base.GetIdleTicks(nowTicks) : 0;
-            }
+            // The pool is holding the lock as part of its scavenging logic.
+            // We must not lock on Http2Connection.SyncObj here as that could lead to lock ordering problems.
+            Debug.Assert(_pool.HasSyncObjLock);
+
+            // There is a race condition here where the connection pool may see this connection as idle right before
+            // we start processing a new request and start its disposal. This is okay as we will either
+            // return false from TryReserveStream, or process pending requests before tearing down the transport.
+            return _streamsInUse == 0 ? base.GetIdleTicks(nowTicks) : 0;
         }
 
         /// <summary>Abort all streams and cause further processing to fail.</summary>
@@ -1990,6 +1991,7 @@ private static TaskCompletionSourceWithCancellation<bool> CreateSuccessfullyComp
         public async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, bool async, CancellationToken cancellationToken)
         {
             Debug.Assert(async);
+            Debug.Assert(!_pool.HasSyncObjLock);
             if (NetEventSource.Log.IsEnabled()) Trace($"Sending request: {request}");
 
             try
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http2Stream.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http2Stream.cs
index 8f8d81ec6b24..4b1f4ccb3ee6 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http2Stream.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http2Stream.cs
@@ -105,7 +105,9 @@ public Http2Stream(HttpRequestMessage request, Http2Connection connection)
 
                 _headerBudgetRemaining = connection._pool.Settings.MaxResponseHeadersByteLength;
 
-                if (_request.Content == null)
+                // Extended connect requests will use the response content stream for bidirectional communication.
+                // We will ignore any content set for such requests in SendRequestBodyAsync, as it has no defined semantics.
+                if (_request.Content == null || _request.IsExtendedConnectRequest)
                 {
                     _requestCompletionState = StreamCompletionState.Completed;
                     if (_request.IsExtendedConnectRequest)
@@ -173,7 +175,9 @@ public HttpResponseMessage GetAndClearResponse()
 
             public async Task SendRequestBodyAsync(CancellationToken cancellationToken)
             {
-                if (_request.Content == null)
+                // Extended connect requests will use the response content stream for bidirectional communication.
+                // Ignore any content set for such requests, as it has no defined semantics.
+                if (_request.Content == null || _request.IsExtendedConnectRequest)
                 {
                     Debug.Assert(_requestCompletionState == StreamCompletionState.Completed);
                     return;
@@ -250,6 +254,7 @@ public async Task SendRequestBodyAsync(CancellationToken cancellationToken)
                             // and we also don't want to propagate any error to the caller, in particular for non-duplex scenarios.
                             Debug.Assert(_responseCompletionState == StreamCompletionState.Completed);
                             _requestCompletionState = StreamCompletionState.Completed;
+                            Debug.Assert(!ConnectProtocolEstablished);
                             Complete();
                             return;
                         }
@@ -261,6 +266,7 @@ public async Task SendRequestBodyAsync(CancellationToken cancellationToken)
 
                         _requestCompletionState = StreamCompletionState.Failed;
                         SendReset();
+                        Debug.Assert(!ConnectProtocolEstablished);
                         Complete();
                     }
 
@@ -313,6 +319,7 @@ public async Task SendRequestBodyAsync(CancellationToken cancellationToken)
 
                         if (complete)
                         {
+                            Debug.Assert(!ConnectProtocolEstablished);
                             Complete();
                         }
                     }
@@ -420,7 +427,17 @@ private void Cancel()
                     if (sendReset)
                     {
                         SendReset();
-                        Complete();
+
+                        // Extended CONNECT notes:
+                        //
+                        // To prevent from calling it *twice*, Extended CONNECT stream's Complete() is only
+                        // called from CloseResponseBody(), as CloseResponseBody() is *always* called
+                        // from Extended CONNECT stream's Dispose().
+
+                        if (!ConnectProtocolEstablished)
+                        {
+                            Complete();
+                        }
                     }
                 }
 
@@ -810,7 +827,20 @@ public void OnHeadersComplete(bool endStream)
                         Debug.Assert(_responseCompletionState == StreamCompletionState.InProgress, $"Response already completed with state={_responseCompletionState}");
 
                         _responseCompletionState = StreamCompletionState.Completed;
-                        if (_requestCompletionState == StreamCompletionState.Completed)
+
+                        // Extended CONNECT notes:
+                        //
+                        // To prevent from calling it *prematurely*, Extended CONNECT stream's Complete() is only
+                        // called from CloseResponseBody(), as CloseResponseBody() is *only* called
+                        // from Extended CONNECT stream's Dispose().
+                        //
+                        // Due to bidirectional streaming nature of the Extended CONNECT request,
+                        // the *write side* of the stream can only be completed by calling Dispose().
+                        //
+                        // The streaming in both ways happens over the single "response" stream instance, which makes
+                        // _requestCompletionState *not indicative* of the actual state of the write side of the stream.
+
+                        if (_requestCompletionState == StreamCompletionState.Completed && !ConnectProtocolEstablished)
                         {
                             Complete();
                         }
@@ -871,7 +901,20 @@ public void OnResponseData(ReadOnlySpan<byte> buffer, bool endStream)
                         Debug.Assert(_responseCompletionState == StreamCompletionState.InProgress, $"Response already completed with state={_responseCompletionState}");
 
                         _responseCompletionState = StreamCompletionState.Completed;
-                        if (_requestCompletionState == StreamCompletionState.Completed)
+
+                        // Extended CONNECT notes:
+                        //
+                        // To prevent from calling it *prematurely*, Extended CONNECT stream's Complete() is only
+                        // called from CloseResponseBody(), as CloseResponseBody() is *only* called
+                        // from Extended CONNECT stream's Dispose().
+                        //
+                        // Due to bidirectional streaming nature of the Extended CONNECT request,
+                        // the *write side* of the stream can only be completed by calling Dispose().
+                        //
+                        // The streaming in both ways happens over the single "response" stream instance, which makes
+                        // _requestCompletionState *not indicative* of the actual state of the write side of the stream.
+
+                        if (_requestCompletionState == StreamCompletionState.Completed && !ConnectProtocolEstablished)
                         {
                             Complete();
                         }
@@ -1036,17 +1079,17 @@ public async Task ReadResponseHeadersAsync(CancellationToken cancellationToken)
                 Debug.Assert(_response != null && _response.Content != null);
                 // Start to process the response body.
                 var responseContent = (HttpConnectionResponseContent)_response.Content;
-                if (emptyResponse)
+                if (ConnectProtocolEstablished)
+                {
+                    responseContent.SetStream(new Http2ReadWriteStream(this, closeResponseBodyOnDispose: true));
+                }
+                else if (emptyResponse)
                 {
                     // If there are any trailers, copy them over to the response.  Normally this would be handled by
                     // the response stream hitting EOF, but if there is no response body, we do it here.
                     MoveTrailersToResponseMessage(_response);
                     responseContent.SetStream(EmptyReadStream.Instance);
                 }
-                else if (ConnectProtocolEstablished)
-                {
-                    responseContent.SetStream(new Http2ReadWriteStream(this));
-                }
                 else
                 {
                     responseContent.SetStream(new Http2ReadStream(this));
@@ -1309,8 +1352,25 @@ private async ValueTask SendDataAsync(ReadOnlyMemory<byte> buffer, CancellationT
                 }
             }
 
+            // This method should only be called from Http2ReadWriteStream.Dispose()
             private void CloseResponseBody()
             {
+                // Extended CONNECT notes:
+                //
+                // Due to bidirectional streaming nature of the Extended CONNECT request,
+                // the *write side* of the stream can only be completed by calling Dispose()
+                // (which, for Extended CONNECT case, will in turn call CloseResponseBody())
+                //
+                // Similarly to QuicStream, disposal *gracefully* closes the write side of the stream
+                // (unless we've received RST_STREAM before) and *abortively* closes the read side
+                // of the stream (unless we've received EOS before).
+
+                if (ConnectProtocolEstablished && _resetException is null)
+                {
+                    // Gracefully close the write side of the Extended CONNECT stream
+                    _connection.LogExceptions(_connection.SendEndStreamAsync(StreamId));
+                }
+
                 // Check if the response body has been fully consumed.
                 bool fullyConsumed = false;
                 Debug.Assert(!Monitor.IsEntered(SyncObject));
@@ -1323,6 +1383,7 @@ private void CloseResponseBody()
                 }
 
                 // If the response body isn't completed, cancel it now.
+                // This includes aborting the read side of the Extended CONNECT stream.
                 if (!fullyConsumed)
                 {
                     Cancel();
@@ -1337,6 +1398,12 @@ private void CloseResponseBody()
 
                 lock (SyncObject)
                 {
+                    if (ConnectProtocolEstablished)
+                    {
+                        // This should be the only place where Extended Connect stream is completed
+                        Complete();
+                    }
+
                     _responseBuffer.Dispose();
                 }
             }
@@ -1430,10 +1497,7 @@ private enum StreamCompletionState : byte
 
             private sealed class Http2ReadStream : Http2ReadWriteStream
             {
-                public Http2ReadStream(Http2Stream http2Stream) : base(http2Stream)
-                {
-                    base.CloseResponseBodyOnDispose = true;
-                }
+                public Http2ReadStream(Http2Stream http2Stream) : base(http2Stream, closeResponseBodyOnDispose: true) { }
 
                 public override bool CanWrite => false;
 
@@ -1482,12 +1546,13 @@ public class Http2ReadWriteStream : HttpBaseStream
                 private Http2Stream? _http2Stream;
                 private readonly HttpResponseMessage _responseMessage;
 
-                public Http2ReadWriteStream(Http2Stream http2Stream)
+                public Http2ReadWriteStream(Http2Stream http2Stream, bool closeResponseBodyOnDispose = false)
                 {
                     Debug.Assert(http2Stream != null);
                     Debug.Assert(http2Stream._response != null);
                     _http2Stream = http2Stream;
                     _responseMessage = _http2Stream._response;
+                    CloseResponseBodyOnDispose = closeResponseBodyOnDispose;
                 }
 
                 ~Http2ReadWriteStream()
@@ -1503,7 +1568,7 @@ public Http2ReadWriteStream(Http2Stream http2Stream)
                     }
                 }
 
-                protected bool CloseResponseBodyOnDispose { get; set; }
+                protected bool CloseResponseBodyOnDispose { get; private init; }
 
                 protected override void Dispose(bool disposing)
                 {
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http2StreamWindowManager.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http2StreamWindowManager.cs
index e5dd33aa9fd3..470cacbe2de3 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http2StreamWindowManager.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http2StreamWindowManager.cs
@@ -138,13 +138,18 @@ private void AdjustWindowDynamic(int bytesConsumed, Http2Stream stream)
         // Assuming that the network characteristics of the connection wouldn't change much within its lifetime, we are maintaining a running minimum value.
         // The more PINGs we send, the more accurate is the estimation of MinRtt, however we should be careful not to send too many of them,
         // to avoid triggering the server's PING flood protection which may result in an unexpected GOAWAY.
-        // With most servers we are fine to send PINGs, as long as we are reading their data, this rule is well formalized for gRPC:
+        //
+        // Several strategies have been implemented to conform with real life servers.
+        // 1. With most servers we are fine to send PINGs as long as we are reading their data, a rule formalized by a gRPC spec:
         // https://github.com/grpc/proposal/blob/master/A8-client-side-keepalive.md
-        // As a rule of thumb, we can send send a PING whenever we receive DATA or HEADERS, however, there are some servers which allow receiving only
-        // a limited amount of PINGs within a given timeframe.
-        // To deal with the conflicting requirements:
-        // - We send an initial burst of 'InitialBurstCount' PINGs, to get a relatively good estimation fast
-        // - Afterwards, we send PINGs with the maximum frequency of 'PingIntervalInSeconds' PINGs per second
+        // According to this rule, we are OK to send a PING whenever we receive DATA or HEADERS, since the servers conforming to this doc
+        // will reset their unsolicited ping counter whenever they *send* DATA or HEADERS.
+        // 2. Some servers allow receiving only a limited amount of PINGs within a given timeframe.
+        // To deal with this, we send an initial burst of 'InitialBurstCount' (=4) PINGs, to get a relatively good estimation fast. Afterwards,
+        // we send PINGs each 'PingIntervalInSeconds' second, to maintain our estimation without triggering these servers.
+        // 3. Some servers in Google's backends reset their unsolicited ping counter when they *receive* DATA, HEADERS, or WINDOW_UPDATE.
+        // To deal with this, we need to make sure to send a connection WINDOW_UPDATE before sending a PING. The initial burst is an exception
+        // to this rule, since the mentioned server can tolerate 4 PINGs without receiving a WINDOW_UPDATE.
         //
         // Threading:
         // OnInitialSettingsSent() is called during initialization, all other methods are triggered by HttpConnection.ProcessIncomingFramesAsync(),
@@ -194,7 +199,7 @@ internal void OnInitialSettingsAckReceived(Http2Connection connection)
                 _state = State.Waiting;
             }
 
-            internal void OnDataOrHeadersReceived(Http2Connection connection)
+            internal void OnDataOrHeadersReceived(Http2Connection connection, bool sendWindowUpdateBeforePing)
             {
                 if (_state != State.Waiting) return;
 
@@ -204,6 +209,14 @@ internal void OnDataOrHeadersReceived(Http2Connection connection)
                 {
                     if (initial) _initialBurst--;
 
+                    // When sendWindowUpdateBeforePing is true, try to send a WINDOW_UPDATE to make Google backends happy.
+                    // Unless we are doing the initial burst, do not send PING if we were not able to send the WINDOW_UPDATE.
+                    // See point 3. in the comments above the class definition for more info.
+                    if (sendWindowUpdateBeforePing && !connection.ForceSendConnectionWindowUpdate() && !initial)
+                    {
+                        return;
+                    }
+
                     // Send a PING
                     _pingCounter--;
                     if (NetEventSource.Log.IsEnabled()) connection.Trace($"[FlowControl] Sending RTT PING with payload {_pingCounter}");
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http3RequestStream.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http3RequestStream.cs
index 8ff03d84ca67..64ba4089d5db 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http3RequestStream.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/Http3RequestStream.cs
@@ -573,8 +573,7 @@ private void BufferHeaders(HttpRequestMessage request)
             _sendBuffer.AvailableSpan[1] = 0x00; // s + delta base.
             _sendBuffer.Commit(2);
 
-            HttpMethod normalizedMethod = HttpMethod.Normalize(request.Method);
-            BufferBytes(normalizedMethod.Http3EncodedBytes);
+            BufferBytes(request.Method.Http3EncodedBytes);
             BufferIndexedHeader(H3StaticTable.SchemeHttps);
 
             if (request.HasHeaders && request.Headers.Host is string host)
@@ -626,7 +625,7 @@ private void BufferHeaders(HttpRequestMessage request)
 
             if (request.Content == null)
             {
-                if (normalizedMethod.MustHaveRequestBody)
+                if (request.Method.MustHaveRequestBody)
                 {
                     BufferIndexedHeader(H3StaticTable.ContentLength0);
                     headerListSize += HttpKnownHeaderNames.ContentLength.Length + HeaderField.RfcOverhead;
@@ -708,19 +707,8 @@ private int BufferHeaderCollection(HttpHeaders headers)
 
                         // For all other known headers, send them via their pre-encoded name and the associated value.
                         BufferBytes(knownHeader.Http3EncodedName);
-                        string? separator = null;
-                        if (headerValues.Length > 1)
-                        {
-                            HttpHeaderParser? parser = header.Key.Parser;
-                            if (parser != null && parser.SupportsMultipleValues)
-                            {
-                                separator = parser.Separator;
-                            }
-                            else
-                            {
-                                separator = HttpHeaderParser.DefaultSeparator;
-                            }
-                        }
+
+                        byte[]? separator = headerValues.Length > 1 ? header.Key.SeparatorBytes : null;
 
                         BufferLiteralHeaderValues(headerValues, separator, valueEncoding);
                     }
@@ -728,7 +716,7 @@ private int BufferHeaderCollection(HttpHeaders headers)
                 else
                 {
                     // The header is not known: fall back to just encoding the header name and value(s).
-                    BufferLiteralHeaderWithoutNameReference(header.Key.Name, headerValues, HttpHeaderParser.DefaultSeparator, valueEncoding);
+                    BufferLiteralHeaderWithoutNameReference(header.Key.Name, headerValues, HttpHeaderParser.DefaultSeparatorBytes, valueEncoding);
                 }
             }
 
@@ -755,7 +743,7 @@ private void BufferLiteralHeaderWithStaticNameReference(int nameIndex, string va
             _sendBuffer.Commit(bytesWritten);
         }
 
-        private void BufferLiteralHeaderWithoutNameReference(string name, ReadOnlySpan<string> values, string separator, Encoding? valueEncoding)
+        private void BufferLiteralHeaderWithoutNameReference(string name, ReadOnlySpan<string> values, byte[] separator, Encoding? valueEncoding)
         {
             int bytesWritten;
             while (!QPackEncoder.EncodeLiteralHeaderFieldWithoutNameReference(name, values, separator, valueEncoding, _sendBuffer.AvailableSpan, out bytesWritten))
@@ -775,7 +763,7 @@ private void BufferLiteralHeaderWithoutNameReference(string name, string value,
             _sendBuffer.Commit(bytesWritten);
         }
 
-        private void BufferLiteralHeaderValues(ReadOnlySpan<string> values, string? separator, Encoding? valueEncoding)
+        private void BufferLiteralHeaderValues(ReadOnlySpan<string> values, byte[]? separator, Encoding? valueEncoding)
         {
             int bytesWritten;
             while (!QPackEncoder.EncodeValueString(values, separator, valueEncoding, _sendBuffer.AvailableSpan, out bytesWritten))
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnection.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnection.cs
index 5fb748ae5908..2208a6265c97 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnection.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnection.cs
@@ -270,17 +270,14 @@ private void ConsumeFromRemainingBuffer(int bytesToConsume)
             _readBuffer.Discard(bytesToConsume);
         }
 
-        private void WriteHeaders(HttpRequestMessage request, HttpMethod normalizedMethod)
+        private void WriteHeaders(HttpRequestMessage request)
         {
             Debug.Assert(request.RequestUri is not null);
 
             // Write the request line
-            WriteAsciiString(normalizedMethod.Method);
-            _writeBuffer.EnsureAvailableSpace(1);
-            _writeBuffer.AvailableSpan[0] = (byte)' ';
-            _writeBuffer.Commit(1);
+            WriteBytes(request.Method.Http1EncodedBytes);
 
-            if (ReferenceEquals(normalizedMethod, HttpMethod.Connect))
+            if (request.Method.IsConnect)
             {
                 // RFC 7231 #section-4.3.6.
                 // Write only CONNECT foo.com:345 HTTP/1.1
@@ -353,7 +350,7 @@ private void WriteHeaders(HttpRequestMessage request, HttpMethod normalizedMetho
             {
                 // Write out Content-Length: 0 header to indicate no body,
                 // unless this is a method that never has a body.
-                if (normalizedMethod.MustHaveRequestBody)
+                if (request.Method.MustHaveRequestBody)
                 {
                     WriteBytes("Content-Length: 0\r\n"u8);
                 }
@@ -417,16 +414,11 @@ private void WriteHeaderCollection(HttpHeaders headers, string? cookiesFromConta
                 // Some headers such as User-Agent and Server use space as a separator (see: ProductInfoHeaderParser)
                 if (headerValuesCount > 1)
                 {
-                    HttpHeaderParser? parser = header.Key.Parser;
-                    string separator = HttpHeaderParser.DefaultSeparator;
-                    if (parser != null && parser.SupportsMultipleValues)
-                    {
-                        separator = parser.Separator!;
-                    }
+                    byte[] separator = header.Key.SeparatorBytes;
 
                     for (int i = 1; i < headerValuesCount; i++)
                     {
-                        WriteAsciiString(separator);
+                        WriteBytes(separator);
                         WriteString(headerValues[i], valueEncoding);
                     }
                 }
@@ -460,11 +452,15 @@ private void WriteBytes(ReadOnlySpan<byte> bytes)
 
         private void WriteAsciiString(string s)
         {
+            Debug.Assert(Ascii.IsValid(s));
+
             _writeBuffer.EnsureAvailableSpace(s.Length);
-            int length = Encoding.ASCII.GetBytes(s, _writeBuffer.AvailableSpan);
-            Debug.Assert(length == s.Length);
-            Debug.Assert(Encoding.ASCII.GetString(_writeBuffer.AvailableSpan.Slice(0, length)) == s);
-            _writeBuffer.Commit(length);
+
+            OperationStatus status = Ascii.FromUtf16(s, _writeBuffer.AvailableSpan, out int bytesWritten);
+            Debug.Assert(status == OperationStatus.Done);
+            Debug.Assert(bytesWritten == s.Length);
+
+            _writeBuffer.Commit(s.Length);
         }
 
         private void WriteString(string s, Encoding? encoding)
@@ -509,7 +505,6 @@ public async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, boo
             Task? sendRequestContentTask = null;
 
             _currentRequest = request;
-            HttpMethod normalizedMethod = HttpMethod.Normalize(request.Method);
 
             _canRetry = false;
 
@@ -520,7 +515,7 @@ public async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, boo
             {
                 if (HttpTelemetry.Log.IsEnabled()) HttpTelemetry.Log.RequestHeadersStart(Id);
 
-                WriteHeaders(request, normalizedMethod);
+                WriteHeaders(request);
 
                 if (HttpTelemetry.Log.IsEnabled()) HttpTelemetry.Log.RequestHeadersStop();
 
@@ -744,12 +739,12 @@ public async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, boo
 
                 // Create the response stream.
                 Stream responseStream;
-                if (ReferenceEquals(normalizedMethod, HttpMethod.Head) || response.StatusCode == HttpStatusCode.NoContent || response.StatusCode == HttpStatusCode.NotModified)
+                if (request.Method.IsHead || response.StatusCode is HttpStatusCode.NoContent or HttpStatusCode.NotModified)
                 {
                     responseStream = EmptyReadStream.Instance;
                     CompleteResponse();
                 }
-                else if (ReferenceEquals(normalizedMethod, HttpMethod.Connect) && response.StatusCode == HttpStatusCode.OK)
+                else if (request.Method.IsConnect && response.StatusCode == HttpStatusCode.OK)
                 {
                     // Successful response to CONNECT does not have body.
                     // What ever comes next should be opaque.
@@ -1737,22 +1732,30 @@ private int Read(Span<byte> destination)
             return count;
         }
 
-        private async ValueTask<int> ReadAsync(Memory<byte> destination)
+        private ValueTask<int> ReadAsync(Memory<byte> destination)
         {
             // This is called when reading the response body.
 
             if (_readBuffer.ActiveLength > 0)
             {
                 // We have data in the read buffer.  Return it to the caller.
-                return ReadFromBuffer(destination.Span);
+                return new ValueTask<int>(ReadFromBuffer(destination.Span));
             }
 
             // No data in read buffer.
             // Do an unbuffered read directly against the underlying stream.
             Debug.Assert(_readAheadTask == default, "Read ahead task should have been consumed as part of the headers.");
-            int count = await _stream.ReadAsync(destination).ConfigureAwait(false);
-            if (NetEventSource.Log.IsEnabled()) Trace($"Received {count} bytes.");
-            return count;
+
+            return NetEventSource.Log.IsEnabled()
+                ? ReadAndLogBytesReadAsync(destination)
+                : _stream.ReadAsync(destination);
+
+            async ValueTask<int> ReadAndLogBytesReadAsync(Memory<byte> destination)
+            {
+                int count = await _stream.ReadAsync(destination).ConfigureAwait(false);
+                if (NetEventSource.Log.IsEnabled()) Trace($"Received {count} bytes.");
+                return count;
+            }
         }
 
         private int ReadBuffered(Span<byte> destination)
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnectionPool.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnectionPool.cs
index 0c70813838b4..e51669888352 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnectionPool.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnectionPool.cs
@@ -1,6 +1,8 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Buffers;
+using System.Collections.Concurrent;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
@@ -8,7 +10,6 @@
 using System.IO;
 using System.Net.Http.Headers;
 using System.Net.Http.HPack;
-using System.Net.Http.Metrics;
 using System.Net.Http.QPack;
 using System.Net.Quic;
 using System.Net.Security;
@@ -69,8 +70,10 @@ internal sealed class HttpConnectionPool : IDisposable
 
         // HTTP/1.1 connection pool
 
-        /// <summary>List of available HTTP/1.1 connections stored in the pool.</summary>
-        private readonly List<HttpConnection> _availableHttp11Connections = new List<HttpConnection>();
+        /// <summary>Stack of currently available HTTP/1.1 connections stored in the pool.</summary>
+        private readonly ConcurrentStack<HttpConnection> _http11Connections = new();
+        /// <summary>Controls whether we can use a fast path when returning connections to the pool and skip calling into <see cref="ProcessHttp11RequestQueue(HttpConnection?)"/>.</summary>
+        private bool _http11RequestQueueIsEmptyAndNotDisposed;
         /// <summary>The maximum number of HTTP/1.1 connections allowed to be associated with the pool.</summary>
         private readonly int _maxHttp11Connections;
         /// <summary>The number of HTTP/1.1 connections associated with the pool, including in use, available, and pending.</summary>
@@ -250,12 +253,6 @@ public HttpConnectionPool(HttpConnectionPoolManager poolManager, HttpConnectionK
                 _hostHeaderLineBytes = hostHeaderLine;
 
                 Debug.Assert(Encoding.ASCII.GetString(_hostHeaderLineBytes) == $"Host: {hostHeader}\r\n");
-
-                if (sslHostName == null)
-                {
-                    _http2EncodedAuthorityHostHeader = HPackEncoder.EncodeLiteralHeaderFieldWithoutIndexingToAllocatedArray(H2StaticTable.Authority, hostHeader);
-                    _http3EncodedAuthorityHostHeader = QPackEncoder.EncodeLiteralHeaderFieldWithStaticNameReferenceToArray(H3StaticTable.Authority, hostHeader);
-                }
             }
 
             if (sslHostName != null)
@@ -283,9 +280,18 @@ public HttpConnectionPool(HttpConnectionPoolManager poolManager, HttpConnectionK
                     // by which AllowRenegotiation could be set back to true in that case.
                     // For now, if an HTTP/2 server erroneously issues a renegotiation, we'll
                     // allow it.
+                }
+            }
 
-                    Debug.Assert(hostHeader != null);
+            if (hostHeader is not null)
+            {
+                if (_http2Enabled)
+                {
                     _http2EncodedAuthorityHostHeader = HPackEncoder.EncodeLiteralHeaderFieldWithoutIndexingToAllocatedArray(H2StaticTable.Authority, hostHeader);
+                }
+
+                if (IsHttp3Supported() && _http3Enabled)
+                {
                     _http3EncodedAuthorityHostHeader = QPackEncoder.EncodeLiteralHeaderFieldWithStaticNameReferenceToArray(H3StaticTable.Authority, hostHeader);
                 }
             }
@@ -401,12 +407,12 @@ private object SyncObj
         {
             get
             {
-                Debug.Assert(!Monitor.IsEntered(_availableHttp11Connections));
-                return _availableHttp11Connections;
+                Debug.Assert(!Monitor.IsEntered(_http11Connections));
+                return _http11Connections;
             }
         }
 
-        private bool HasSyncObjLock => Monitor.IsEntered(_availableHttp11Connections);
+        public bool HasSyncObjLock => Monitor.IsEntered(_http11Connections);
 
         // Overview of connection management (mostly HTTP version independent):
         //
@@ -459,6 +465,8 @@ private static void ThrowGetVersionException(HttpRequestMessage request, int des
 
         private bool CheckExpirationOnGet(HttpConnectionBase connection)
         {
+            Debug.Assert(!HasSyncObjLock);
+
             TimeSpan pooledConnectionLifetime = _poolManager.Settings._pooledConnectionLifetime;
             if (pooledConnectionLifetime != Timeout.InfiniteTimeSpan)
             {
@@ -514,7 +522,7 @@ private async Task AddHttp11ConnectionAsync(RequestQueue<HttpConnection>.QueueIt
             if (connection is not null)
             {
                 // Add the established connection to the pool.
-                ReturnHttp11Connection(connection, isNewConnection: true, queueItem.Waiter);
+                AddNewHttp11Connection(connection, queueItem.Waiter);
             }
             else
             {
@@ -530,14 +538,14 @@ private void CheckForHttp11ConnectionInjection()
             _http11RequestQueue.PruneCompletedRequestsFromHeadOfQueue(this);
 
             // Determine if we can and should add a new connection to the pool.
-            bool willInject = _availableHttp11Connections.Count == 0 &&             // No available connections
-                _http11RequestQueue.Count > _pendingHttp11ConnectionCount &&        // More requests queued than pending connections
-                _associatedHttp11ConnectionCount < _maxHttp11Connections &&         // Under the connection limit
-                _http11RequestQueue.RequestsWithoutAConnectionAttempt > 0;          // There are requests we haven't issued a connection attempt for
+            bool willInject =
+                _http11RequestQueue.Count > _pendingHttp11ConnectionCount &&    // More requests queued than pending connections
+                _associatedHttp11ConnectionCount < _maxHttp11Connections &&     // Under the connection limit
+                _http11RequestQueue.RequestsWithoutAConnectionAttempt > 0;      // There are requests we haven't issued a connection attempt for
 
             if (NetEventSource.Log.IsEnabled())
             {
-                Trace($"Available HTTP/1.1 connections: {_availableHttp11Connections.Count}, Requests in the queue: {_http11RequestQueue.Count}, " +
+                Trace($"Available HTTP/1.1 connections: {_http11Connections.Count}, Requests in the queue: {_http11RequestQueue.Count}, " +
                     $"Requests without a connection attempt: {_http11RequestQueue.RequestsWithoutAConnectionAttempt}, " +
                     $"Pending HTTP/1.1 connections: {_pendingHttp11ConnectionCount}, Total associated HTTP/1.1 connections: {_associatedHttp11ConnectionCount}, " +
                     $"Max HTTP/1.1 connection limit: {_maxHttp11Connections}, " +
@@ -554,36 +562,114 @@ private void CheckForHttp11ConnectionInjection()
             }
         }
 
-        private bool TryGetPooledHttp11Connection(HttpRequestMessage request, bool async, [NotNullWhen(true)] out HttpConnection? connection, [NotNullWhen(false)] out HttpConnectionWaiter<HttpConnection>? waiter)
+        /// <summary>
+        /// This method is called:
+        /// <br/>- When returning a connection and observing that the request queue is not empty (<see cref="_http11RequestQueueIsEmptyAndNotDisposed"/> is <see langword="false"/>).
+        /// <br/>- After adding a request to the queue if we fail to obtain a connection from <see cref="_http11Connections"/>.
+        /// <br/>- After scavenging or disposing the pool to ensure that any pending requests are handled or connections disposed.
+        /// <para>The method will attempt to match one request from the <see cref="_http11RequestQueue"/> to an available connection.
+        /// The <paramref name="connection"/> can either be provided as an argument (when returning a connection to the pool), or one will be rented from <see cref="_http11Connections"/>.
+        /// As we'll only process a single request, we are expecting the method to be called every time a request is enqueued, and every time a connection is returned while the request queue is not empty.</para>
+        /// <para>If the <see cref="_http11RequestQueue"/> becomes empty, this method will reset the <see cref="_http11RequestQueueIsEmptyAndNotDisposed"/> flag back to <see langword="true"/>,
+        /// such that returning connections will use the fast path again and skip calling into this method.</para>
+        /// <para>Notably, this method will not be called on the fast path as long as we have enough connections to handle all new requests.</para>
+        /// </summary>
+        /// <param name="connection">The connection to use for a pending request, or return to the pool.</param>
+        private void ProcessHttp11RequestQueue(HttpConnection? connection)
         {
+            // Loop in case the request we try to signal was already cancelled or handled by a different connection.
             while (true)
             {
+                HttpConnectionWaiter<HttpConnection>? waiter = null;
+
                 lock (SyncObj)
                 {
-                    _usedSinceLastCleanup = true;
+#if DEBUG
+                    // Other threads may still interact with the connections stack. Read the count once to keep the assert message accurate.
+                    int connectionCount = _http11Connections.Count;
+                    Debug.Assert(_associatedHttp11ConnectionCount >= connectionCount + _pendingHttp11ConnectionCount,
+                        $"Expected {_associatedHttp11ConnectionCount} >= {connectionCount} + {_pendingHttp11ConnectionCount}");
+#endif
+                    Debug.Assert(_associatedHttp11ConnectionCount <= _maxHttp11Connections,
+                        $"Expected {_associatedHttp11ConnectionCount} <= {_maxHttp11Connections}");
+                    Debug.Assert(_associatedHttp11ConnectionCount >= _pendingHttp11ConnectionCount,
+                        $"Expected {_associatedHttp11ConnectionCount} >= {_pendingHttp11ConnectionCount}");
 
-                    int availableConnectionCount = _availableHttp11Connections.Count;
-                    if (availableConnectionCount > 0)
+                    if (_http11RequestQueue.Count != 0)
                     {
-                        // We have a connection that we can attempt to use.
-                        // Validate it below outside the lock, to avoid doing expensive operations while holding the lock.
-                        connection = _availableHttp11Connections[availableConnectionCount - 1];
-                        _availableHttp11Connections.RemoveAt(availableConnectionCount - 1);
+                        if (connection is not null || _http11Connections.TryPop(out connection))
+                        {
+                            // TryDequeueWaiter will prune completed requests from the head of the queue,
+                            // so it's possible for it to return false even though we checked that Count != 0.
+                            bool success = _http11RequestQueue.TryDequeueWaiter(this, out waiter);
+                            Debug.Assert(success == waiter is not null);
+                        }
                     }
-                    else
+
+                    // Update the empty queue flag now.
+                    // If the request queue is now empty, returning connections will use the fast path and skip calling into this method.
+                    _http11RequestQueueIsEmptyAndNotDisposed = _http11RequestQueue.Count == 0 && !_disposed;
+
+                    if (waiter is null)
                     {
-                        // No available connections. Add to the request queue.
-                        waiter = _http11RequestQueue.EnqueueRequest(request);
+                        // We didn't find a waiter to signal, or there were no connections available.
 
-                        CheckForHttp11ConnectionInjection();
+                        if (connection is not null)
+                        {
+                            // A connection was provided to this method, or we rented one from the pool.
+                            // Return it back to the pool since we're not going to use it yet.
+
+                            // We're returning it while holding the lock to avoid a scenario where
+                            // - thread A sees no requests are waiting in the queue (current thread)
+                            // - thread B adds a request to the queue, and sees no connections are available
+                            // - thread A returns the connection to the pool
+                            // We'd have both a connection and a request waiting in the pool, but nothing to pair the two.
+
+                            // The main scenario where we'll reach this branch is when we enqueue a request to the queue
+                            // and set the _http11RequestQueueIsEmptyAndNotDisposed flag to false, followed by multiple
+                            // returning connections observing the flag and calling into this method before we clear the flag.
+                            // This should be a relatively rare case, so the added contention should be minimal.
+                            _http11Connections.Push(connection);
+                        }
+                        else
+                        {
+                            CheckForHttp11ConnectionInjection();
+                        }
 
-                        // There were no available idle connections. This request has been added to the request queue.
-                        if (NetEventSource.Log.IsEnabled()) Trace($"No available HTTP/1.1 connections; request queued.");
-                        connection = null;
-                        return false;
+                        break;
                     }
                 }
 
+                Debug.Assert(connection is not null);
+
+                if (TrySignalWaiter(waiter, connection))
+                {
+                    // Success. Note that we did not call connection.PrepareForReuse
+                    // before signaling the waiter. This is intentional, as the fact that
+                    // this method was called indicates that the connection is either new,
+                    // or was just returned to the pool and is still in a good state.
+                    return;
+                }
+
+                // The request was already cancelled or handled by a different connection.
+                // Loop again to try to find another request to signal, or return the connection.
+            }
+
+            if (_disposed)
+            {
+                // The pool is being disposed and there are no more requests to handle.
+                // Clean up any idle connections still waiting in the pool.
+                while (_http11Connections.TryPop(out connection))
+                {
+                    connection.Dispose();
+                }
+            }
+        }
+
+        private bool TryGetPooledHttp11Connection(HttpRequestMessage request, bool async, [NotNullWhen(true)] out HttpConnection? connection, [NotNullWhen(false)] out HttpConnectionWaiter<HttpConnection>? waiter)
+        {
+            while (_http11Connections.TryPop(out connection))
+            {
                 if (CheckExpirationOnGet(connection))
                 {
                     if (NetEventSource.Log.IsEnabled()) connection.Trace("Found expired HTTP/1.1 connection in pool.");
@@ -602,6 +688,27 @@ private bool TryGetPooledHttp11Connection(HttpRequestMessage request, bool async
                 waiter = null;
                 return true;
             }
+
+            // Slow path - no available connection found.
+            // Push the request onto the request queue and check if we should inject a new connection.
+
+            waiter = new HttpConnectionWaiter<HttpConnection>();
+
+            // Technically this block under the lock could be a part of ProcessHttp11RequestQueue to avoid taking the lock twice.
+            // It is kept separate to simplify that method (avoid extra arguments that are only relevant for this caller).
+            lock (SyncObj)
+            {
+                _http11RequestQueue.EnqueueRequest(request, waiter);
+
+                // Disable the fast path and force connections returned to the pool to check the request queue first.
+                _http11RequestQueueIsEmptyAndNotDisposed = false;
+            }
+
+            // Other threads may have added a connection to the pool before we were able to
+            // add the request to the queue, so we must check for an available connection again.
+
+            ProcessHttp11RequestQueue(null);
+            return false;
         }
 
         private async Task HandleHttp11Downgrade(HttpRequestMessage request, Stream stream, TransportContext? transportContext, IPEndPoint? remoteEndPoint, CancellationToken cancellationToken)
@@ -655,6 +762,7 @@ private async Task HandleHttp11Downgrade(HttpRequestMessage request, Stream stre
             {
                 if (NetEventSource.Log.IsEnabled()) Trace("Discarding downgraded HTTP/1.1 connection because HTTP/1.1 connection limit is exceeded");
                 stream.Dispose();
+                return;
             }
 
             HttpConnection http11Connection;
@@ -674,7 +782,7 @@ private async Task HandleHttp11Downgrade(HttpRequestMessage request, Stream stre
                 return;
             }
 
-            ReturnHttp11Connection(http11Connection, isNewConnection: true);
+            AddNewHttp11Connection(http11Connection, initialRequestWaiter: null);
         }
 
         private async Task AddHttp2ConnectionAsync(RequestQueue<Http2Connection?>.QueueItem queueItem)
@@ -769,7 +877,7 @@ private void CheckForHttp2ConnectionInjection()
             if (NetEventSource.Log.IsEnabled())
             {
                 Trace($"Available HTTP/2.0 connections: {availableHttp2ConnectionCount}, " +
-                    $"Pending HTTP/2.0 connection: {_pendingHttp2Connection}" +
+                    $"Pending HTTP/2.0 connection: {_pendingHttp2Connection}, " +
                     $"Requests in the queue: {_http2RequestQueue.Count}, " +
                     $"Requests without a connection attempt: {_http2RequestQueue.RequestsWithoutAConnectionAttempt}, " +
                     $"Total associated HTTP/2.0 connections: {_associatedHttp2ConnectionCount}, " +
@@ -795,8 +903,6 @@ private bool TryGetPooledHttp2Connection(HttpRequestMessage request, [NotNullWhe
             {
                 lock (SyncObj)
                 {
-                    _usedSinceLastCleanup = true;
-
                     if (!_http2Enabled)
                     {
                         waiter = null;
@@ -885,7 +991,6 @@ private async ValueTask<Http3Connection> GetHttp3ConnectionAsync(HttpRequestMess
                 {
                     // Connection exists and it is still good to use.
                     if (NetEventSource.Log.IsEnabled()) Trace("Using existing HTTP3 connection.");
-                    _usedSinceLastCleanup = true;
                     return http3Connection;
                 }
             }
@@ -1021,6 +1126,8 @@ private void ProcessAltSvc(HttpResponseMessage response)
 
         public async ValueTask<HttpResponseMessage> SendWithVersionDetectionAndRetryAsync(HttpRequestMessage request, bool async, bool doRequestAuth, CancellationToken cancellationToken)
         {
+            _usedSinceLastCleanup = true;
+
             // Loop on connection failures (or other problems like version downgrade) and retry if possible.
             int retryCount = 0;
             while (true)
@@ -1845,7 +1952,7 @@ public void InvalidateHttp11Connection(HttpConnection connection, bool disposing
             lock (SyncObj)
             {
                 Debug.Assert(_associatedHttp11ConnectionCount > 0);
-                Debug.Assert(!disposing || !_availableHttp11Connections.Contains(connection));
+                Debug.Assert(!disposing || Array.IndexOf(_http11Connections.ToArray(), connection) < 0);
 
                 _associatedHttp11ConnectionCount--;
 
@@ -1898,99 +2005,107 @@ private bool CheckExpirationOnReturn(HttpConnectionBase connection)
             return false;
         }
 
-        public void RecycleHttp11Connection(HttpConnection connection) => ReturnHttp11Connection(connection, false);
+        public void RecycleHttp11Connection(HttpConnection connection)
+        {
+            if (CheckExpirationOnReturn(connection))
+            {
+                if (NetEventSource.Log.IsEnabled()) connection.Trace("Disposing HTTP/1.1 connection when returning to pool. Connection lifetime expired.");
+                connection.Dispose();
+                return;
+            }
+
+            ReturnHttp11Connection(connection);
+        }
 
-        private void ReturnHttp11Connection(HttpConnection connection, bool isNewConnection, HttpConnectionWaiter<HttpConnection>? initialRequestWaiter = null)
+        private void AddNewHttp11Connection(HttpConnection connection, HttpConnectionWaiter<HttpConnection>? initialRequestWaiter)
         {
-            if (NetEventSource.Log.IsEnabled()) connection.Trace($"{nameof(isNewConnection)}={isNewConnection}");
+            if (NetEventSource.Log.IsEnabled()) Trace("");
 
-            Debug.Assert(isNewConnection || initialRequestWaiter is null, "Shouldn't have a request unless the connection is new");
+            lock (SyncObj)
+            {
+                Debug.Assert(_pendingHttp11ConnectionCount > 0);
+                _pendingHttp11ConnectionCount--;
 
-            if (!isNewConnection && CheckExpirationOnReturn(connection))
+                if (initialRequestWaiter is not null)
+                {
+                    // If we're about to signal the initial waiter, that request must be removed from the queue if it was at the head to avoid rooting it forever.
+                    // Normally, TryDequeueWaiter would handle the removal. TryDequeueSpecificWaiter matches this behavior for the initial request case.
+                    // We don't care if this fails; that means the request was previously canceled, handled by a different connection, or not at the head of the queue.
+                    _http11RequestQueue.TryDequeueSpecificWaiter(initialRequestWaiter);
+
+                    // There's no need for us to hold the lock while signaling the waiter.
+                }
+            }
+
+            if (initialRequestWaiter is not null &&
+                TrySignalWaiter(initialRequestWaiter, connection))
             {
-                if (NetEventSource.Log.IsEnabled()) connection.Trace("Disposing HTTP/1.1 connection return to pool. Connection lifetime expired.");
-                connection.Dispose();
                 return;
             }
 
-            // Loop in case we get a request that has already been canceled or handled by a different connection.
-            while (true)
+            ReturnHttp11Connection(connection);
+        }
+
+        private void ReturnHttp11Connection(HttpConnection connection)
+        {
+            connection.MarkConnectionAsIdle();
+
+            // The fast path when there are enough connections and no pending requests
+            // is that we'll see _http11RequestQueueIsEmptyAndNotDisposed being true both
+            // times, and all we'll have to do as part of returning the connection is
+            // a Push call on the concurrent stack.
+
+            if (Volatile.Read(ref _http11RequestQueueIsEmptyAndNotDisposed))
             {
-                HttpConnectionWaiter<HttpConnection>? waiter = null;
-                bool added = false;
-                lock (SyncObj)
-                {
-                    Debug.Assert(!_availableHttp11Connections.Contains(connection), $"Connection already in available list");
-                    Debug.Assert(_associatedHttp11ConnectionCount > _availableHttp11Connections.Count,
-                        $"Expected _associatedHttp11ConnectionCount={_associatedHttp11ConnectionCount} > _availableHttp11Connections.Count={_availableHttp11Connections.Count}");
-                    Debug.Assert(_associatedHttp11ConnectionCount <= _maxHttp11Connections,
-                        $"Expected _associatedHttp11ConnectionCount={_associatedHttp11ConnectionCount} <= _maxHttp11Connections={_maxHttp11Connections}");
+                _http11Connections.Push(connection);
 
-                    if (isNewConnection)
-                    {
-                        Debug.Assert(_pendingHttp11ConnectionCount > 0);
-                        _pendingHttp11ConnectionCount--;
-                        isNewConnection = false;
-                    }
+                // When we add a connection to the pool, we must ensure that there are
+                // either no pending requests waiting, or that _something_ will pair those
+                // requests with the connection we just added.
 
-                    if (initialRequestWaiter is not null)
-                    {
-                        // Try to handle the request that we initiated the connection for first
-                        waiter = initialRequestWaiter;
-                        initialRequestWaiter = null;
-
-                        // If this method found a request to service, that request must be removed from the queue if it was at the head to avoid rooting it forever.
-                        // Normally, TryDequeueWaiter would handle the removal. TryDequeueSpecificWaiter matches this behavior for the initial request case.
-                        // We don't care if this fails; that means the request was previously canceled, handled by a different connection, or not at the head of the queue.
-                        _http11RequestQueue.TryDequeueSpecificWaiter(waiter);
-                    }
-                    else if (_http11RequestQueue.TryDequeueWaiter(this, out waiter))
-                    {
-                        Debug.Assert(_availableHttp11Connections.Count == 0, $"With {_availableHttp11Connections.Count} available HTTP/1.1 connections, we shouldn't have a waiter.");
-                    }
-                    else if (!_disposed)
-                    {
-                        // Add connection to the pool.
-                        added = true;
-                        connection.MarkConnectionAsIdle();
-                        _availableHttp11Connections.Add(connection);
-                    }
+                // When adding a request to the queue, we'll first check if there's
+                // an available connection waiting in the pool that we could use.
+                // If there isn't, we'll set the _http11RequestQueueIsEmptyAndNotDisposed
+                // flag and check for available connections again.
 
-                    // If the pool has been disposed of, we will dispose the connection below outside the lock.
-                    // We do this after processing the queue above so that any queued requests will be handled by existing connections if possible.
-                }
+                // To avoid a race where we add the connection after a request was enqueued,
+                // we'll check the flag again and try to process one request from the queue.
 
-                if (waiter is not null)
+                if (!Volatile.Read(ref _http11RequestQueueIsEmptyAndNotDisposed))
                 {
-                    Debug.Assert(!added);
-                    if (waiter.TrySetResult(connection))
-                    {
-                        if (NetEventSource.Log.IsEnabled()) connection.Trace("Dequeued waiting HTTP/1.1 request.");
-                        return;
-                    }
-                    else
-                    {
-                        if (NetEventSource.Log.IsEnabled())
-                        {
-                            Trace(waiter.Task.IsCanceled
-                                ? "Discarding canceled HTTP/1.1 request from queue."
-                                : "Discarding signaled HTTP/1.1 request waiter from queue.");
-                        }
-                        // Loop and process the queue again
-                    }
-                }
-                else if (added)
-                {
-                    if (NetEventSource.Log.IsEnabled()) connection.Trace("Put HTTP/1.1 connection in pool.");
-                    return;
+                    ProcessHttp11RequestQueue(null);
                 }
-                else
+            }
+            else
+            {
+                // ProcessHttp11RequestQueue is responsible for handing the connection to a pending request,
+                // or to return it back to the pool if there aren't any.
+
+                // We hand over the connection directly instead of pushing it on the stack first to ensure
+                // that pending requests are processed in a fair (FIFO) order.
+                ProcessHttp11RequestQueue(connection);
+            }
+        }
+
+        private bool TrySignalWaiter<T>(HttpConnectionWaiter<T> waiter, T connection)
+            where T : HttpConnectionBase?
+        {
+            Debug.Assert(connection is not null);
+
+            if (waiter.TrySetResult(connection))
+            {
+                if (NetEventSource.Log.IsEnabled()) connection.Trace("Dequeued waiting request.");
+                return true;
+            }
+            else
+            {
+                if (NetEventSource.Log.IsEnabled())
                 {
-                    Debug.Assert(_disposed);
-                    if (NetEventSource.Log.IsEnabled()) connection.Trace("Disposing HTTP/1.1 connection returned to pool. Pool was disposed.");
-                    connection.Dispose();
-                    return;
+                    Trace(waiter.Task.IsCanceled
+                        ? "Discarding canceled request from queue."
+                        : "Discarding signaled request waiter from queue.");
                 }
+                return false;
             }
         }
 
@@ -1998,6 +2113,7 @@ private void ReturnHttp2Connection(Http2Connection connection, bool isNewConnect
         {
             if (NetEventSource.Log.IsEnabled()) connection.Trace($"{nameof(isNewConnection)}={isNewConnection}");
 
+            Debug.Assert(!HasSyncObjLock);
             Debug.Assert(isNewConnection || initialRequestWaiter is null, "Shouldn't have a request unless the connection is new");
 
             if (!isNewConnection && CheckExpirationOnReturn(connection))
@@ -2067,21 +2183,13 @@ private void ReturnHttp2Connection(Http2Connection connection, bool isNewConnect
                     if (waiter is not null)
                     {
                         Debug.Assert(!added);
-                        if (waiter.TrySetResult(connection))
+
+                        if (TrySignalWaiter(waiter, connection))
                         {
-                            if (NetEventSource.Log.IsEnabled()) connection.Trace("Dequeued waiting HTTP2 request.");
                             break;
                         }
-                        else
-                        {
-                            if (NetEventSource.Log.IsEnabled())
-                            {
-                                Trace(waiter.Task.IsCanceled
-                                    ? "Discarding canceled HTTP/2 request from queue."
-                                    : "Discarding signaled HTTP/2 request waiter from queue.");
-                            }
-                            // Loop and process the queue again
-                        }
+
+                        // Loop and process the queue again
                     }
                     else
                     {
@@ -2191,55 +2299,53 @@ public void Dispose()
 
             lock (SyncObj)
             {
-                if (!_disposed)
+                if (_disposed)
                 {
-                    if (NetEventSource.Log.IsEnabled()) Trace("Disposing pool.");
-
-                    _disposed = true;
+                    return;
+                }
 
-                    toDispose = new List<HttpConnectionBase>(_availableHttp11Connections.Count + (_availableHttp2Connections?.Count ?? 0));
-                    toDispose.AddRange(_availableHttp11Connections);
-                    if (_availableHttp2Connections is not null)
-                    {
-                        toDispose.AddRange(_availableHttp2Connections);
-                    }
+                _disposed = true;
+                _http11RequestQueueIsEmptyAndNotDisposed = false;
 
-                    // Note: Http11 connections will decrement the _associatedHttp11ConnectionCount when disposed.
-                    // Http2 connections will not, hence the difference in handing _associatedHttp2ConnectionCount.
+                if (NetEventSource.Log.IsEnabled()) Trace("Disposing the pool.");
 
-                    Debug.Assert(_associatedHttp11ConnectionCount >= _availableHttp11Connections.Count,
-                        $"Expected {nameof(_associatedHttp11ConnectionCount)}={_associatedHttp11ConnectionCount} >= {nameof(_availableHttp11Connections)}.Count={_availableHttp11Connections.Count}");
-                    _availableHttp11Connections.Clear();
-
-                    Debug.Assert(_associatedHttp2ConnectionCount >= (_availableHttp2Connections?.Count ?? 0));
-                    _associatedHttp2ConnectionCount -= (_availableHttp2Connections?.Count ?? 0);
-                    _availableHttp2Connections?.Clear();
+                if (_availableHttp2Connections is not null)
+                {
+                    toDispose = [.. _availableHttp2Connections];
+                    _associatedHttp2ConnectionCount -= _availableHttp2Connections.Count;
+                    _availableHttp2Connections.Clear();
+                }
 
-                    if (_http3Connection is not null)
-                    {
-                        toDispose.Add(_http3Connection);
-                        _http3Connection = null;
-                    }
+                if (_http3Connection is not null)
+                {
+                    toDispose ??= new();
+                    toDispose.Add(_http3Connection);
+                    _http3Connection = null;
+                }
 
-                    if (_authorityExpireTimer != null)
-                    {
-                        _authorityExpireTimer.Dispose();
-                        _authorityExpireTimer = null;
-                    }
+                if (_authorityExpireTimer != null)
+                {
+                    _authorityExpireTimer.Dispose();
+                    _authorityExpireTimer = null;
+                }
 
-                    if (_altSvcBlocklistTimerCancellation != null)
-                    {
-                        _altSvcBlocklistTimerCancellation.Cancel();
-                        _altSvcBlocklistTimerCancellation.Dispose();
-                        _altSvcBlocklistTimerCancellation = null;
-                    }
+                if (_altSvcBlocklistTimerCancellation != null)
+                {
+                    _altSvcBlocklistTimerCancellation.Cancel();
+                    _altSvcBlocklistTimerCancellation.Dispose();
+                    _altSvcBlocklistTimerCancellation = null;
                 }
 
-                Debug.Assert(_availableHttp11Connections.Count == 0, $"Expected {nameof(_availableHttp11Connections)}.{nameof(_availableHttp11Connections.Count)} == 0");
                 Debug.Assert((_availableHttp2Connections?.Count ?? 0) == 0, $"Expected {nameof(_availableHttp2Connections)}.{nameof(_availableHttp2Connections.Count)} == 0");
             }
 
-            // Dispose outside the lock to avoid lock re-entrancy issues.
+            // Dispose connections outside the lock to avoid lock re-entrancy issues.
+
+            // This will trigger the disposal of Http11 connections.
+            // Note: Http11 connections will decrement the _associatedHttp11ConnectionCount when disposed.
+            // Http2 connections will not, hence the difference in handing _associatedHttp2ConnectionCount.
+            ProcessHttp11RequestQueue(null);
+
             toDispose?.ForEach(c => c.Dispose());
         }
 
@@ -2275,7 +2381,8 @@ public bool CleanCacheAndDisposeIfUnused()
                 // will be purged next time around.
                 _usedSinceLastCleanup = false;
 
-                ScavengeConnectionList(_availableHttp11Connections, ref toDispose, nowTicks, pooledConnectionLifetime, pooledConnectionIdleTimeout);
+                ScavengeConnectionStack(this, _http11Connections, ref toDispose, nowTicks, pooledConnectionLifetime, pooledConnectionIdleTimeout);
+
                 if (_availableHttp2Connections is not null)
                 {
                     int removed = ScavengeConnectionList(_availableHttp2Connections, ref toDispose, nowTicks, pooledConnectionLifetime, pooledConnectionIdleTimeout);
@@ -2297,6 +2404,47 @@ public bool CleanCacheAndDisposeIfUnused()
             // Pool is active.  Should not be removed.
             return false;
 
+            static void ScavengeConnectionStack(HttpConnectionPool pool, ConcurrentStack<HttpConnection> connections, ref List<HttpConnectionBase>? toDispose, long nowTicks, TimeSpan pooledConnectionLifetime, TimeSpan pooledConnectionIdleTimeout)
+            {
+                // We can't simply enumerate the connections stack as other threads may still be adding and removing entries.
+                // If we want to check the state of a connection, we must take it from the stack first to ensure we own it.
+
+                // We're about to starve the connection pool of all available connections for a moment.
+                // We must be holding the lock while doing so to ensure that any new requests that
+                // come in during this time will be blocked waiting in ProcessHttp11RequestQueue.
+                // If this were not the case, requests would repeatedly call into CheckForHttp11ConnectionInjection
+                // and trigger new connection attempts, even if we have enough connections in our copy.
+                Debug.Assert(pool.HasSyncObjLock);
+                Debug.Assert(connections.Count <= pool._associatedHttp11ConnectionCount);
+
+                HttpConnection[] stackCopy = ArrayPool<HttpConnection>.Shared.Rent(pool._associatedHttp11ConnectionCount);
+                int usableConnections = 0;
+
+                while (connections.TryPop(out HttpConnection? connection))
+                {
+                    if (IsUsableConnection(connection, nowTicks, pooledConnectionLifetime, pooledConnectionIdleTimeout))
+                    {
+                        stackCopy[usableConnections++] = connection;
+                    }
+                    else
+                    {
+                        toDispose ??= new List<HttpConnectionBase>();
+                        toDispose.Add(connection);
+                    }
+                }
+
+                if (usableConnections > 0)
+                {
+                    // Add them back in reverse to maintain the LIFO order.
+                    Span<HttpConnection> usable = stackCopy.AsSpan(0, usableConnections);
+                    usable.Reverse();
+                    connections.PushRange(stackCopy, 0, usableConnections);
+                    usable.Clear();
+                }
+
+                ArrayPool<HttpConnection>.Shared.Return(stackCopy);
+            }
+
             static int ScavengeConnectionList<T>(List<T> list, ref List<HttpConnectionBase>? toDispose, long nowTicks, TimeSpan pooledConnectionLifetime, TimeSpan pooledConnectionIdleTimeout)
                 where T : HttpConnectionBase
             {
@@ -2401,6 +2549,7 @@ internal void HeartBeat()
                 localHttp2Connections = _availableHttp2Connections?.ToArray();
             }
 
+            // Avoid calling HeartBeat under the lock, as it may call back into HttpConnectionPool.InvalidateHttp2Connection.
             if (localHttp2Connections is not null)
             {
                 foreach (Http2Connection http2Connection in localHttp2Connections)
@@ -2537,10 +2686,16 @@ private void Grow()
             public HttpConnectionWaiter<T> EnqueueRequest(HttpRequestMessage request)
             {
                 var waiter = new HttpConnectionWaiter<T>();
-                Enqueue(new QueueItem { Request = request, Waiter = waiter });
+                EnqueueRequest(request, waiter);
                 return waiter;
             }
 
+
+            public void EnqueueRequest(HttpRequestMessage request, HttpConnectionWaiter<T> waiter)
+            {
+                Enqueue(new QueueItem { Request = request, Waiter = waiter });
+            }
+
             public void PruneCompletedRequestsFromHeadOfQueue(HttpConnectionPool pool)
             {
                 while (TryPeek(out QueueItem queueItem) && queueItem.Waiter.Task.IsCompleted)
diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpEnvironmentProxy.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpEnvironmentProxy.cs
index 226f0f449df5..629b212f4581 100644
--- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpEnvironmentProxy.cs
+++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpEnvironmentProxy.cs
@@ -170,16 +170,9 @@ private HttpEnvironmentProxy(Uri? httpProxy, Uri? httpsProxy, string? bypassList
             int separatorIndex = value.LastIndexOf('@');
             if (separatorIndex != -1)
             {
-                string auth = value.Substring(0, separatorIndex);
-
                 // The User and password may or may not be URL encoded.
-                // Curl seems to accept both. To match that,
-                // we do opportunistic decode and we use original string if it fails.
-                try
-                {
-                    auth = Uri.UnescapeDataString(auth);
-                }
-                catch { };
+                // Curl seems to accept both. To match that, we also decode the value.
+                string auth = Uri.UnescapeDataString(value.AsSpan(0, separatorIndex));
 
                 value = value.Substring(separatorIndex + 1);
                 separatorIndex = auth.IndexOf(':');
diff --git a/src/libraries/System.Net.Http/tests/FunctionalTests/HttpClientHandlerTest.Http2.cs b/src/libraries/System.Net.Http/tests/FunctionalTests/HttpClientHandlerTest.Http2.cs
index a4e312279dea..3036935ef553 100644
--- a/src/libraries/System.Net.Http/tests/FunctionalTests/HttpClientHandlerTest.Http2.cs
+++ b/src/libraries/System.Net.Http/tests/FunctionalTests/HttpClientHandlerTest.Http2.cs
@@ -205,6 +205,50 @@ public async Task Http2_ZeroLengthResponseBody_Success()
             }
         }
 
+        [Fact]
+        public async Task Http2_DataFrameOnlyPadding_Success()
+        {
+            using (Http2LoopbackServer server = Http2LoopbackServer.CreateServer())
+            using (HttpClient client = CreateHttpClient())
+            {
+                Task<HttpResponseMessage> sendTask = client.GetAsync(server.Address, HttpCompletionOption.ResponseHeadersRead);
+
+                Http2LoopbackConnection connection = await server.EstablishConnectionAsync();
+
+                int streamId = await connection.ReadRequestHeaderAsync();
+
+                await connection.SendDefaultResponseHeadersAsync(streamId);
+
+                // Send zero-length DATA frame with padding
+                byte paddingLength = byte.MaxValue;
+                int dataLength = 1024;
+                DataFrame frame = new DataFrame(new byte[0], FrameFlags.Padded, paddingLength, streamId);
+                await connection.WriteFrameAsync(frame);
+
+                HttpResponseMessage response = await sendTask;
+                Assert.Equal(HttpStatusCode.OK, response.StatusCode);
+
+                using var responseStream = response.Content.ReadAsStream();
+
+                // The read must pend because we havent received any data yet.
+                var buffer = new byte[dataLength];
+                var readTask = responseStream.ReadAtLeastAsync(buffer, dataLength);
+                Assert.False(readTask.IsCompleted);
+
+                // Send DATA frame with padding
+                frame = new DataFrame(new byte[dataLength], FrameFlags.Padded, paddingLength, streamId);
+                await connection.WriteFrameAsync(frame);
+
+                Assert.Equal(dataLength, await readTask);
+
+                // Send zero-length, end-stream DATA frame with padding
+                frame = new DataFrame(new byte[0], FrameFlags.Padded | FrameFlags.EndStream, paddingLength, streamId);
+                await connection.WriteFrameAsync(frame);
+
+                Assert.Equal(0, await responseStream.ReadAsync(buffer));
+            }
+        }
+
         [Theory]
         [InlineData("Client content", null)]
         [InlineData("Client content", "Server content")]
@@ -2453,6 +2497,7 @@ public async Task PostAsyncDuplex_ClientSendsEndStream_Success()
                     HttpResponseMessage response = await responseTask;
                     Stream responseStream = await response.Content.ReadAsStreamAsync();
 
+                    connection.IgnoreWindowUpdates();
                     // Send some data back and forth
                     await SendAndReceiveResponseDataAsync(contentBytes, responseStream, connection, streamId);
                     await SendAndReceiveResponseDataAsync(contentBytes, responseStream, connection, streamId);
@@ -2513,6 +2558,7 @@ public async Task PostAsyncDuplex_ServerSendsEndStream_Success()
                     HttpResponseMessage response = await responseTask;
                     Stream responseStream = await response.Content.ReadAsStreamAsync();
 
+                    connection.IgnoreWindowUpdates();
                     // Send some data back and forth
                     await SendAndReceiveResponseDataAsync(contentBytes, responseStream, connection, streamId);
                     await SendAndReceiveResponseDataAsync(contentBytes, responseStream, connection, streamId);
@@ -2833,6 +2879,7 @@ public async Task PostAsyncDuplex_DisposeResponseBodyBeforeEnd_ResetsStreamAndTh
                     // This allows the request processing to complete.
                     duplexContent.Fail(e);
 
+                    connection.IgnoreWindowUpdates(); // The RTT algorithm may send a WINDOW_UPDATE before RST_STREAM.
                     // Client should set RST_STREAM.
                     await connection.ReadRstStreamAsync(streamId);
                 }
@@ -2906,6 +2953,7 @@ public async Task PostAsyncDuplex_DisposeResponseBodyAfterEndReceivedButBeforeCo
                     // This allows the request processing to complete.
                     duplexContent.Fail(e);
 
+                    connection.IgnoreWindowUpdates(); // The RTT algorithm may send a WINDOW_UPDATE before RST_STREAM.
                     // Client should set RST_STREAM.
                     await connection.ReadRstStreamAsync(streamId);
                 }
diff --git a/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.Http2ExtendedConnect.cs b/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.Http2ExtendedConnect.cs
index 0ea6ae9e13f6..f3a46d4d0850 100644
--- a/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.Http2ExtendedConnect.cs
+++ b/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.Http2ExtendedConnect.cs
@@ -2,8 +2,10 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.IO;
 using System.Net.Test.Common;
+using System.Threading;
 using System.Threading.Tasks;
 using Xunit;
 using Xunit.Abstractions;
@@ -31,6 +33,7 @@ public static IEnumerable<object[]> UseSsl_MemberData()
         [MemberData(nameof(UseSsl_MemberData))]
         public async Task Connect_ReadWriteResponseStream(bool useSsl)
         {
+            const int MessageCount = 3;
             byte[] clientMessage = new byte[] { 1, 2, 3 };
             byte[] serverMessage = new byte[] { 4, 5, 6, 7 };
 
@@ -43,34 +46,62 @@ await Http2LoopbackServerFactory.Singleton.CreateClientAndServerAsync(async uri
                 HttpRequestMessage request = CreateRequest(HttpMethod.Connect, uri, UseVersion, exactVersion: true);
                 request.Headers.Protocol = "foo";
 
+                bool readFromContentStream = false;
+
+                // We won't send the content bytes, but we will send content headers.
+                // Since we're dropping the content, we'll also drop the Content-Length header.
+                request.Content = new StreamContent(new DelegateStream(
+                    readAsyncFunc: (_, _, _, _) =>
+                    {
+                        readFromContentStream = true;
+                        throw new UnreachableException();
+                    }));
+
+                request.Headers.Add("User-Agent", "foo");
+                request.Content.Headers.Add("Content-Language", "bar");
+                request.Content.Headers.ContentLength = 42;
+
                 using HttpResponseMessage response = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
 
                 using Stream responseStream = await response.Content.ReadAsStreamAsync();
 
-                await responseStream.WriteAsync(clientMessage);
-                await responseStream.FlushAsync();
+                for (int i = 0; i < MessageCount; i++)
+                {
+                    await responseStream.WriteAsync(clientMessage);
+                    await responseStream.FlushAsync();
 
-                byte[] readBuffer = new byte[serverMessage.Length];
-                await responseStream.ReadExactlyAsync(readBuffer);
-                Assert.Equal(serverMessage, readBuffer);
+                    byte[] readBuffer = new byte[serverMessage.Length];
+                    await responseStream.ReadExactlyAsync(readBuffer);
+                    Assert.Equal(serverMessage, readBuffer);
+                }
 
                 // Receive server's EOS
-                Assert.Equal(0, await responseStream.ReadAsync(readBuffer));
+                Assert.Equal(0, await responseStream.ReadAsync(new byte[1]));
+
+                Assert.False(readFromContentStream);
 
                 clientCompleted.SetResult();
             },
             async server =>
             {
                 await using Http2LoopbackConnection connection = await ((Http2LoopbackServer)server).EstablishConnectionAsync(new SettingsEntry { SettingId = SettingId.EnableConnect, Value = 1 });
+                connection.IgnoreWindowUpdates();
+
+                (int streamId, HttpRequestData request) = await connection.ReadAndParseRequestHeaderAsync(readBody: false);
 
-                (int streamId, _) = await connection.ReadAndParseRequestHeaderAsync(readBody: false);
+                Assert.Equal("foo", request.GetSingleHeaderValue("User-Agent"));
+                Assert.Equal("bar", request.GetSingleHeaderValue("Content-Language"));
+                Assert.Equal(0, request.GetHeaderValueCount("Content-Length"));
 
                 await connection.SendResponseHeadersAsync(streamId, endStream: false).ConfigureAwait(false);
 
-                DataFrame dataFrame = await connection.ReadDataFrameAsync();
-                Assert.Equal(clientMessage, dataFrame.Data.ToArray());
+                for (int i = 0; i < MessageCount; i++)
+                {
+                    DataFrame dataFrame = await connection.ReadDataFrameAsync();
+                    Assert.Equal(clientMessage, dataFrame.Data.ToArray());
 
-                await connection.SendResponseDataAsync(streamId, serverMessage, endStream: true);
+                    await connection.SendResponseDataAsync(streamId, serverMessage, endStream: i == MessageCount - 1);
+                }
 
                 await clientCompleted.Task.WaitAsync(TestHelper.PassingTestTimeout);
             }, options: new GenericLoopbackOptions { UseSsl = useSsl });
@@ -163,5 +194,114 @@ await server.AcceptConnectionAsync(async connection =>
 
             await new[] { serverTask, clientTask }.WhenAllOrAnyFailed().WaitAsync(TestHelper.PassingTestTimeout);
         }
+
+        [Theory]
+        [MemberData(nameof(UseSsl_MemberData))]
+        public async Task Connect_ServerSideEOS_ReceivedByClient(bool useSsl)
+        {
+            var timeoutTcs = new CancellationTokenSource(TestHelper.PassingTestTimeout);
+            var serverReceivedEOS = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
+
+            await Http2LoopbackServerFactory.Singleton.CreateClientAndServerAsync(
+                clientFunc: async uri =>
+                {
+                    var client = CreateHttpClient();
+                    var request = CreateRequest(HttpMethod.Connect, uri, UseVersion, exactVersion: true);
+                    request.Headers.Protocol = "foo";
+
+                    var response = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, timeoutTcs.Token);
+                    var responseStream = await response.Content.ReadAsStreamAsync(timeoutTcs.Token);
+
+                    // receive server's EOS
+                    Assert.Equal(0, await responseStream.ReadAsync(new byte[1], timeoutTcs.Token));
+
+                    // send client's EOS
+                    responseStream.Dispose();
+
+                    // wait for "ack" from server
+                    await serverReceivedEOS.Task.WaitAsync(timeoutTcs.Token);
+
+                    // can dispose handler now
+                    client.Dispose();
+                },
+                serverFunc: async server =>
+                {
+                    await using var connection = await ((Http2LoopbackServer)server).EstablishConnectionAsync(
+                        new SettingsEntry { SettingId = SettingId.EnableConnect, Value = 1 });
+                    connection.IgnoreWindowUpdates();
+
+                    (int streamId, _) = await connection.ReadAndParseRequestHeaderAsync(readBody: false);
+                    await connection.SendResponseHeadersAsync(streamId, endStream: false);
+
+                    // send server's EOS
+                    await connection.SendResponseDataAsync(streamId, Array.Empty<byte>(), endStream: true);
+
+                    // receive client's EOS "in response" to server's EOS
+                    var eosFrame = Assert.IsType<DataFrame>(await connection.ReadFrameAsync(timeoutTcs.Token));
+                    Assert.Equal(streamId, eosFrame.StreamId);
+                    Assert.Equal(0, eosFrame.Data.Length);
+                    Assert.True(eosFrame.EndStreamFlag);
+
+                    serverReceivedEOS.SetResult();
+
+                    // on handler dispose, client should shutdown the connection without sending additional frames
+                    await connection.WaitForClientDisconnectAsync().WaitAsync(timeoutTcs.Token);
+                },
+                options: new GenericLoopbackOptions { UseSsl = useSsl });
+        }
+
+        [Theory]
+        [MemberData(nameof(UseSsl_MemberData))]
+        public async Task Connect_ClientSideEOS_ReceivedByServer(bool useSsl)
+        {
+            var timeoutTcs = new CancellationTokenSource(TestHelper.PassingTestTimeout);
+            var serverReceivedRst = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
+
+            await Http2LoopbackServerFactory.Singleton.CreateClientAndServerAsync(
+                clientFunc: async uri =>
+                {
+                    var client = CreateHttpClient();
+                    var request = CreateRequest(HttpMethod.Connect, uri, UseVersion, exactVersion: true);
+                    request.Headers.Protocol = "foo";
+
+                    var response = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, timeoutTcs.Token);
+                    var responseStream = await response.Content.ReadAsStreamAsync(timeoutTcs.Token);
+
+                    // send client's EOS
+                    // this will also send RST_STREAM as we didn't receive server's EOS before
+                    responseStream.Dispose();
+
+                    // wait for "ack" from server
+                    await serverReceivedRst.Task.WaitAsync(timeoutTcs.Token);
+
+                    // can dispose handler now
+                    client.Dispose();
+                },
+                serverFunc: async server =>
+                {
+                    await using var connection = await ((Http2LoopbackServer)server).EstablishConnectionAsync(
+                        new SettingsEntry { SettingId = SettingId.EnableConnect, Value = 1 });
+                    connection.IgnoreWindowUpdates();
+
+                    (int streamId, _) = await connection.ReadAndParseRequestHeaderAsync(readBody: false);
+                    await connection.SendResponseHeadersAsync(streamId, endStream: false);
+
+                    // receive client's EOS
+                    var eosFrame = Assert.IsType<DataFrame>(await connection.ReadFrameAsync(timeoutTcs.Token));
+                    Assert.Equal(streamId, eosFrame.StreamId);
+                    Assert.Equal(0, eosFrame.Data.Length);
+                    Assert.True(eosFrame.EndStreamFlag);
+
+                    // receive client's RST_STREAM as we didn't send server's EOS before
+                    var rstFrame = Assert.IsType<RstStreamFrame>(await connection.ReadFrameAsync(timeoutTcs.Token));
+                    Assert.Equal(streamId, rstFrame.StreamId);
+
+                    serverReceivedRst.SetResult();
+
+                    // on handler dispose, client should shutdown the connection without sending additional frames
+                    await connection.WaitForClientDisconnectAsync().WaitAsync(timeoutTcs.Token);
+                },
+                options: new GenericLoopbackOptions { UseSsl = useSsl });
+        }
     }
 }
diff --git a/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.Http2FlowControl.cs b/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.Http2FlowControl.cs
index 4862c0a4ae52..f1dc09a9f735 100644
--- a/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.Http2FlowControl.cs
+++ b/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.Http2FlowControl.cs
@@ -128,7 +128,7 @@ static async Task RunTest()
                     TimeSpan.FromMilliseconds(30),
                     TimeSpan.Zero,
                     2 * 1024 * 1024,
-                    null);
+                    maxWindowForPingStopValidation: MaxWindow);
 
                 Assert.True(maxCredit <= MaxWindow);
             }
@@ -181,19 +181,34 @@ static async Task RunTest()
             RemoteExecutor.Invoke(RunTest, options).Dispose();
         }
 
+        [OuterLoop("Runs long")]
+        [Fact]
+        public async Task LongRunningSlowServerStream_NoInvalidPingsAreSent()
+        {
+            // A scenario similar to https://github.com/grpc/grpc-dotnet/issues/2361.
+            // We need to send a small amount of data so the connection window is not consumed and no "standard" WINDOW_UPDATEs are sent and
+            // we also need to do it very slowly to cover some RTT PINGs after the initial burst.
+            // This scenario should trigger the "forced WINDOW_UPDATE" logic in the implementation, ensuring that no more than 4 PINGs are sent without a WINDOW_UPDATE.
+            await TestClientWindowScalingAsync(
+                TimeSpan.FromMilliseconds(500),
+                TimeSpan.FromMilliseconds(500),
+                1024,
+                _output,
+                dataPerFrame: 32);
+        }
+
         private static async Task<int> TestClientWindowScalingAsync(
             TimeSpan networkDelay,
             TimeSpan slowBandwidthSimDelay,
             int bytesToDownload,
             ITestOutputHelper output = null,
-            int maxWindowForPingStopValidation = int.MaxValue, // set to actual maximum to test if we stop sending PING when window reached maximum
-            Action<SocketsHttpHandler> configureHandler = null)
+            int dataPerFrame = 16384,
+            int maxWindowForPingStopValidation = 16 * 1024 * 1024) // set to actual maximum to test if we stop sending PING when window reached maximum
         {
             TimeSpan timeout = TimeSpan.FromSeconds(30);
             CancellationTokenSource timeoutCts = new CancellationTokenSource(timeout);
 
             HttpClientHandler handler = CreateHttpClientHandler(HttpVersion20.Value);
-            configureHandler?.Invoke(GetUnderlyingSocketsHttpHandler(handler));
 
             using Http2LoopbackServer server = Http2LoopbackServer.CreateServer(NoAutoPingResponseHttp2Options);
             using HttpClient client = new HttpClient(handler, true);
@@ -225,13 +240,13 @@ private static async Task<int> TestClientWindowScalingAsync(
             using SemaphoreSlim writeSemaphore = new SemaphoreSlim(1);
             int remainingBytes = bytesToDownload;
 
-            bool pingReceivedAfterReachingMaxWindow = false;
+            string unexpectedPingReason = null;
             bool unexpectedFrameReceived = false;
             CancellationTokenSource stopFrameProcessingCts = new CancellationTokenSource();
             
             CancellationTokenSource linkedCts = CancellationTokenSource.CreateLinkedTokenSource(stopFrameProcessingCts.Token, timeoutCts.Token);
             Task processFramesTask = ProcessIncomingFramesAsync(linkedCts.Token);
-            byte[] buffer = new byte[16384];
+            byte[] buffer = new byte[dataPerFrame];
 
             while (remainingBytes > 0)
             {
@@ -259,7 +274,7 @@ private static async Task<int> TestClientWindowScalingAsync(
 
             int dataReceived = (await response.Content.ReadAsByteArrayAsync()).Length;
             Assert.Equal(bytesToDownload, dataReceived);
-            Assert.False(pingReceivedAfterReachingMaxWindow, "Server received a PING after reaching max window");
+            Assert.Null(unexpectedPingReason);
             Assert.False(unexpectedFrameReceived, "Server received an unexpected frame, see test output for more details.");
 
             return maxCredit;
@@ -270,6 +285,7 @@ async Task ProcessIncomingFramesAsync(CancellationToken cancellationToken)
                 // We should not receive any more RTT PING's after this point
                 int maxWindowCreditThreshold = (int) (0.9 * maxWindowForPingStopValidation);
                 output?.WriteLine($"maxWindowCreditThreshold: {maxWindowCreditThreshold} maxWindowForPingStopValidation: {maxWindowForPingStopValidation}");
+                int pingsWithoutWindowUpdate = 0;
 
                 try
                 {
@@ -284,10 +300,18 @@ async Task ProcessIncomingFramesAsync(CancellationToken cancellationToken)
 
                             output?.WriteLine($"Received PING ({pingFrame.Data})");
 
+                            pingsWithoutWindowUpdate++;
                             if (maxCredit > maxWindowCreditThreshold)
                             {
-                                output?.WriteLine("PING was unexpected");
-                                Volatile.Write(ref pingReceivedAfterReachingMaxWindow, true);
+                                Volatile.Write(ref unexpectedPingReason, "The server received a PING after reaching max window");
+                                output?.WriteLine($"PING was unexpected: {unexpectedPingReason}");
+                            }
+
+                            // Exceeding this limit may trigger a GOAWAY on some servers. See implementation comments for more details.
+                            if (pingsWithoutWindowUpdate > 4)
+                            {
+                                Volatile.Write(ref unexpectedPingReason, $"The server received {pingsWithoutWindowUpdate} PINGs without receiving a WINDOW_UPDATE");
+                                output?.WriteLine($"PING was unexpected: {unexpectedPingReason}");
                             }
 
                             await writeSemaphore.WaitAsync(cancellationToken);
@@ -296,6 +320,7 @@ async Task ProcessIncomingFramesAsync(CancellationToken cancellationToken)
                         }
                         else if (frame is WindowUpdateFrame windowUpdateFrame)
                         {
+                            pingsWithoutWindowUpdate = 0;
                             // Ignore connection window:
                             if (windowUpdateFrame.StreamId != streamId) continue;
 
diff --git a/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.cs b/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.cs
index 0fd9636a7cab..c44df51433c9 100644
--- a/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.cs
+++ b/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.cs
@@ -3531,6 +3531,76 @@ public sealed class SocketsHttpHandlerTest_ConnectCallback_Http2 : SocketsHttpHa
     {
         public SocketsHttpHandlerTest_ConnectCallback_Http2(ITestOutputHelper output) : base(output) { }
         protected override Version UseVersion => HttpVersion.Version20;
+
+        [Fact]
+        public async Task Http2Connection_DroppedWhenDowngradingToHttp11WhenAtMaxConnections()
+        {
+            // Regression test for https://github.com/dotnet/runtime/issues/99401
+            await LoopbackServer.CreateClientAndServerAsync(
+                async uri =>
+                {
+                    using HttpClientHandler handler = CreateHttpClientHandler();
+                    using HttpClient client = CreateHttpClient(handler);
+
+                    int connectionCount = 0;
+
+                    GetUnderlyingSocketsHttpHandler(handler).MaxConnectionsPerServer = 1;
+
+                    GetUnderlyingSocketsHttpHandler(handler).ConnectCallback = async (context, token) =>
+                    {
+                        connectionCount++;
+
+                        var socket = new Socket(SocketType.Stream, ProtocolType.Tcp) { NoDelay = true };
+                        await socket.ConnectAsync(context.DnsEndPoint, token);
+                        var stream = new NetworkStream(socket, ownsSocket: true);
+
+                        // Not using ALPN, so the client will attempt to downgrade to HTTP/1.1.
+                        var options = new SslClientAuthenticationOptions();
+                        options.RemoteCertificateValidationCallback = (_, _, _, _) => true;
+                        options.TargetHost = context.DnsEndPoint.Host;
+
+                        var sslStream = new SslStream(stream);
+                        await sslStream.AuthenticateAsClientAsync(options);
+                        return sslStream;
+                    };
+
+                    // Send a request to establish the first HTTP/1.1 connection.
+                    using HttpResponseMessage response1 = await client.SendAsync(CreateRequest(HttpMethod.Get, uri, HttpVersion.Version11));
+                    Assert.Equal(1, connectionCount);
+                    Assert.Equal("1", await response1.Content.ReadAsStringAsync());
+
+                    // Send an HTTP/2 request that will be downgraded to HTTP/1.1.
+                    // The new connection should be thrown away as we're at the connection limit,
+                    // and the request should be handled using the existing HTTP/1.1 connection.
+                    using HttpResponseMessage response2 = await client.SendAsync(CreateRequest(HttpMethod.Get, uri, HttpVersion.Version20));
+                    Assert.Equal(2, connectionCount);
+                    Assert.Equal("2", await response2.Content.ReadAsStringAsync());
+
+                    // If we now block the first connection, the second request should wait without attempting to open a new connection.
+                    Task<string> firstRequestTask = client.GetStringAsync(uri);
+                    Task<string> secondRequestTask = client.GetStringAsync(uri);
+
+                    Assert.Equal("3", await firstRequestTask);
+                    Assert.Equal("4", await secondRequestTask);
+
+                    Assert.Equal(2, connectionCount);
+                },
+                async server =>
+                {
+                    await using LoopbackServer.Connection connection = await server.EstablishConnectionAsync();
+
+                    await connection.ReadRequestHeaderAndSendResponseAsync(content: "1");
+
+                    // The client should throw away this connection as soon as it notices there's no ALPN for HTTP/2.
+                    await using var secondConnection = await server.EstablishConnectionAsync();
+                    await Assert.ThrowsAnyAsync<Exception>(() => secondConnection.ReadRequestDataAsync());
+
+                    await connection.ReadRequestHeaderAndSendResponseAsync(content: "2");
+                    await connection.ReadRequestHeaderAndSendResponseAsync(content: "3");
+                    await connection.ReadRequestHeaderAndSendResponseAsync(content: "4");
+                },
+                new LoopbackServer.Options { UseSsl = true });
+        }
     }
 
     public abstract class SocketsHttpHandlerTest_PlaintextStreamFilter : HttpClientHandlerTestBase
diff --git a/src/libraries/System.Net.Http/tests/FunctionalTests/System.Net.Http.Functional.Tests.csproj b/src/libraries/System.Net.Http/tests/FunctionalTests/System.Net.Http.Functional.Tests.csproj
index d195737dfeb0..26cc22001465 100644
--- a/src/libraries/System.Net.Http/tests/FunctionalTests/System.Net.Http.Functional.Tests.csproj
+++ b/src/libraries/System.Net.Http/tests/FunctionalTests/System.Net.Http.Functional.Tests.csproj
@@ -24,7 +24,7 @@
     <TestArchiveTestsRoot>$(TestArchiveRoot)browserornodejs/</TestArchiveTestsRoot>
     <TestArchiveTestsDir>$(TestArchiveTestsRoot)$(OSPlatformConfig)/</TestArchiveTestsDir>
     <DefineConstants>$(DefineConstants);TARGET_BROWSER</DefineConstants>
-    <WasmXHarnessMonoArgs>--setenv=XHARNESS_LOG_TEST_START=true</WasmXHarnessMonoArgs>
+    <XunitShowProgress>true</XunitShowProgress>
     <!-- This WASM test is problematic and slow right now. This sets the xharness timeout but there is also override in sendtohelix-browser.targets -->
     <WasmXHarnessTestsTimeout>01:15:00</WasmXHarnessTestsTimeout>
   </PropertyGroup>
diff --git a/src/libraries/System.Net.Http/tests/StressTests/HttpStress/Dockerfile b/src/libraries/System.Net.Http/tests/StressTests/HttpStress/Dockerfile
index 95f3cb0eb8c0..545041cdce47 100644
--- a/src/libraries/System.Net.Http/tests/StressTests/HttpStress/Dockerfile
+++ b/src/libraries/System.Net.Http/tests/StressTests/HttpStress/Dockerfile
@@ -7,13 +7,13 @@ RUN apt-get update -y && \
     apt-get upgrade -y && \
     apt-get install -y cmake clang ruby-dev gem lttng-tools libssl-dev && \
     gem install fpm
-RUN git clone --recursive https://github.com/dotnet/msquic
-RUN cd msquic/src/msquic && \
+RUN git clone --recursive https://github.com/microsoft/msquic --depth 1
+RUN cd msquic/ && \
     mkdir build && \
-    cmake -B build -DCMAKE_BUILD_TYPE=Release -DQUIC_ENABLE_LOGGING=false -DQUIC_USE_SYSTEM_LIBCRYPTO=true -DQUIC_BUILD_TOOLS=off -DQUIC_BUILD_TEST=off -DQUIC_BUILD_PERF=off -DQUIC_TLS=openssl3 && \
+    cmake -B build -DCMAKE_BUILD_TYPE=Debug -DQUIC_ENABLE_LOGGING=false -DQUIC_USE_SYSTEM_LIBCRYPTO=true -DQUIC_BUILD_TOOLS=off -DQUIC_BUILD_TEST=off -DQUIC_BUILD_PERF=off -DQUIC_TLS=openssl3 -DQUIC_ENABLE_SANITIZERS=on && \
     cd build && \
-    cmake --build . --config Release
-RUN cd msquic/src/msquic/build/bin/Release && \
+    cmake --build . --config Debug
+RUN cd msquic/build/bin/Debug && \
     rm libmsquic.so && \
     fpm -f -s dir -t deb -n libmsquic -v $( find -type f | cut -d "." -f 4- ) \
     --license MIT --url https://github.com/microsoft/msquic --log error \
@@ -40,6 +40,10 @@ ENV DOTNET_DbgMiniDumpName="/dumps-share/coredump.%p"
 
 EXPOSE 5001
 
+# configure adress sanitizer
+ENV ASAN_OPTIONS='detect_leaks=0'
+ENV LD_PRELOAD=/usr/lib/gcc/x86_64-linux-gnu/12/libasan.so
+
 ENV VERSION=$VERSION
 ENV CONFIGURATION=$CONFIGURATION
 ENV HTTPSTRESS_ARGS=''
diff --git a/src/libraries/System.Net.Http/tests/UnitTests/HPack/HPackRoundtripTests.cs b/src/libraries/System.Net.Http/tests/UnitTests/HPack/HPackRoundtripTests.cs
index 541fcb7dacd4..a73ddafe551b 100644
--- a/src/libraries/System.Net.Http/tests/UnitTests/HPack/HPackRoundtripTests.cs
+++ b/src/libraries/System.Net.Http/tests/UnitTests/HPack/HPackRoundtripTests.cs
@@ -71,19 +71,8 @@ private static Memory<byte> HPackEncode(HttpHeaders headers, Encoding? valueEnco
                 {
                     // For all other known headers, send them via their pre-encoded name and the associated value.
                     WriteBytes(knownHeader.Http2EncodedName);
-                    string separator = null;
-                    if (headerValuesSpan.Length > 1)
-                    {
-                        HttpHeaderParser parser = header.Key.Parser;
-                        if (parser != null && parser.SupportsMultipleValues)
-                        {
-                            separator = parser.Separator;
-                        }
-                        else
-                        {
-                            separator = HttpHeaderParser.DefaultSeparator;
-                        }
-                    }
+
+                    byte[]? separator = headerValuesSpan.Length > 1 ? header.Key.SeparatorBytes : null;
 
                     WriteLiteralHeaderValues(headerValuesSpan, separator);
                 }
@@ -105,7 +94,7 @@ void WriteBytes(ReadOnlySpan<byte> bytes)
                 buffer.Commit(bytes.Length);
             }
 
-            void WriteLiteralHeaderValues(ReadOnlySpan<string> values, string separator)
+            void WriteLiteralHeaderValues(ReadOnlySpan<string> values, byte[]? separator)
             {
                 int bytesWritten;
                 while (!HPackEncoder.EncodeStringLiterals(values, separator, valueEncoding, buffer.AvailableSpan, out bytesWritten))
@@ -120,7 +109,7 @@ void WriteLiteralHeaderValues(ReadOnlySpan<string> values, string separator)
             void WriteLiteralHeader(string name, ReadOnlySpan<string> values)
             {
                 int bytesWritten;
-                while (!HPackEncoder.EncodeLiteralHeaderFieldWithoutIndexingNewName(name, values, HttpHeaderParser.DefaultSeparator, valueEncoding, buffer.AvailableSpan, out bytesWritten))
+                while (!HPackEncoder.EncodeLiteralHeaderFieldWithoutIndexingNewName(name, values, HttpHeaderParser.DefaultSeparatorBytes, valueEncoding, buffer.AvailableSpan, out bytesWritten))
                 {
                     buffer.Grow();
                     FillAvailableSpaceWithOnes(buffer);
diff --git a/src/libraries/System.Net.HttpListener/src/System/Net/Windows/CookieExtensions.cs b/src/libraries/System.Net.HttpListener/src/System/Net/Windows/CookieExtensions.cs
index e5b3783c19e7..0e5366f9bc14 100644
--- a/src/libraries/System.Net.HttpListener/src/System/Net/Windows/CookieExtensions.cs
+++ b/src/libraries/System.Net.HttpListener/src/System/Net/Windows/CookieExtensions.cs
@@ -14,7 +14,7 @@ internal static class CookieExtensions
 
         public static string ToServerString(this Cookie cookie)
         {
-            s_toServerStringFunc ??= (Func<Cookie, string>)typeof(Cookie).GetMethod("ToServerString", BindingFlags.Instance | BindingFlags.NonPublic)!.CreateDelegate(typeof(Func<Cookie, string>));
+            s_toServerStringFunc ??= (Func<Cookie, string>)typeof(Cookie).GetMethod("ToServerString", BindingFlags.Instance | BindingFlags.NonPublic)?.CreateDelegate(typeof(Func<Cookie, string>))!;
             Debug.Assert(s_toServerStringFunc != null, "Reflection failed for Cookie.ToServerString().");
             return s_toServerStringFunc(cookie);
         }
@@ -23,7 +23,7 @@ public static string ToServerString(this Cookie cookie)
 
         public static Cookie Clone(this Cookie cookie)
         {
-            s_cloneFunc ??= (Func<Cookie, Cookie>)typeof(Cookie).GetMethod("Clone", BindingFlags.Instance | BindingFlags.NonPublic)!.CreateDelegate(typeof(Func<Cookie, Cookie>));
+            s_cloneFunc ??= (Func<Cookie, Cookie>)typeof(Cookie).GetMethod("Clone", BindingFlags.Instance | BindingFlags.NonPublic)?.CreateDelegate(typeof(Func<Cookie, Cookie>))!;
             Debug.Assert(s_cloneFunc != null, "Reflection failed for Cookie.Clone().");
             return s_cloneFunc(cookie);
         }
@@ -41,7 +41,7 @@ private enum CookieVariant
 
         public static bool IsRfc2965Variant(this Cookie cookie)
         {
-            s_getVariantFunc ??= (Func<Cookie, CookieVariant>)typeof(Cookie).GetProperty("Variant", BindingFlags.Instance | BindingFlags.NonPublic)!.GetGetMethod(true)!.CreateDelegate(typeof(Func<Cookie, CookieVariant>));
+            s_getVariantFunc ??= (Func<Cookie, CookieVariant>)typeof(Cookie).GetProperty("Variant", BindingFlags.Instance | BindingFlags.NonPublic)?.GetGetMethod(true)?.CreateDelegate(typeof(Func<Cookie, CookieVariant>))!;
             Debug.Assert(s_getVariantFunc != null, "Reflection failed for Cookie.Variant.");
             return s_getVariantFunc(cookie) == CookieVariant.Rfc2965;
         }
@@ -53,7 +53,7 @@ internal static class CookieCollectionExtensions
 
         public static int InternalAdd(this CookieCollection cookieCollection, Cookie cookie, bool isStrict)
         {
-            s_internalAddFunc ??= (Func<CookieCollection, Cookie, bool, int>)typeof(CookieCollection).GetMethod("InternalAdd", BindingFlags.Instance | BindingFlags.NonPublic)!.CreateDelegate(typeof(Func<CookieCollection, Cookie, bool, int>));
+            s_internalAddFunc ??= (Func<CookieCollection, Cookie, bool, int>)typeof(CookieCollection).GetMethod("InternalAdd", BindingFlags.Instance | BindingFlags.NonPublic)?.CreateDelegate(typeof(Func<CookieCollection, Cookie, bool, int>))!;
             Debug.Assert(s_internalAddFunc != null, "Reflection failed for CookieCollection.InternalAdd().");
             return s_internalAddFunc(cookieCollection, cookie, isStrict);
         }
diff --git a/src/libraries/System.Net.Primitives/src/System/Net/IPAddress.cs b/src/libraries/System.Net.Primitives/src/System/Net/IPAddress.cs
index bdd381fc0490..48f424407b40 100644
--- a/src/libraries/System.Net.Primitives/src/System/Net/IPAddress.cs
+++ b/src/libraries/System.Net.Primitives/src/System/Net/IPAddress.cs
@@ -142,7 +142,6 @@ public IPAddress(ReadOnlySpan<byte> address, long scopeid)
 
         internal IPAddress(ReadOnlySpan<ushort> numbers, uint scopeid)
         {
-            Debug.Assert(numbers != null);
             Debug.Assert(numbers.Length == NumberOfLabels);
 
             _numbers = numbers.ToArray();
diff --git a/src/libraries/System.Net.Primitives/src/System/Net/IPAddressParser.cs b/src/libraries/System.Net.Primitives/src/System/Net/IPAddressParser.cs
index 964cd4308366..afe80e3cd5b8 100644
--- a/src/libraries/System.Net.Primitives/src/System/Net/IPAddressParser.cs
+++ b/src/libraries/System.Net.Primitives/src/System/Net/IPAddressParser.cs
@@ -67,7 +67,6 @@ private static unsafe bool TryParseIpv4(ReadOnlySpan<char> ipSpan, out long addr
 
         private static unsafe bool TryParseIPv6(ReadOnlySpan<char> ipSpan, Span<ushort> numbers, int numbersLength, out uint scope)
         {
-            Debug.Assert(numbers != null);
             Debug.Assert(numbersLength >= IPAddressParserStatics.IPv6AddressShorts);
 
             int end = ipSpan.Length;
diff --git a/src/libraries/System.Net.Quic/src/System.Net.Quic.csproj b/src/libraries/System.Net.Quic/src/System.Net.Quic.csproj
index 403518646180..aafca7f3e93b 100644
--- a/src/libraries/System.Net.Quic/src/System.Net.Quic.csproj
+++ b/src/libraries/System.Net.Quic/src/System.Net.Quic.csproj
@@ -131,6 +131,7 @@
     <Reference Include="System.Collections.Concurrent" />
     <Reference Include="System.Collections.NonGeneric" />
     <Reference Include="System.Console" Condition="'$(Configuration)' == 'Debug'" />
+    <Reference Include="System.Diagnostics.DiagnosticSource" />
     <Reference Include="System.Diagnostics.Tracing" />
     <Reference Include="System.Memory" />
     <Reference Include="System.Net.NameResolution" />
@@ -161,7 +162,7 @@
   <ItemGroup Condition="'$(TargetPlatformIdentifier)' == 'windows' and
                         '$(TargetOS)' == 'windows' and
                         ('$(TargetArchitecture)' == 'x64' or '$(TargetArchitecture)' == 'x86' or '$(TargetArchitecture)' == 'arm64') and
-                        '$(DotNetBuildFromSource)' != 'true'">
+                        '$(DotNetBuildSourceOnly)' != 'true'">
 
     <!-- Project references -->
     <PackageReference Include="runtime.win-$(TargetArchitecture).runtime.native.System.Net.MsQuic.Transport"
@@ -171,7 +172,7 @@
                       GeneratePathProperty="true" />
 
     <PackageReference Include="Microsoft.Native.Quic.MsQuic.Schannel"
-                      Version="$(MicrosoftNativeQuicMsQuicVersion)"
+                      Version="$(MicrosoftNativeQuicMsQuicSchannelVersion)"
                       PrivateAssets="all"
                       Condition="'$(UseQuicTransportPackage)' != 'true'"
                       GeneratePathProperty="true" />
@@ -191,7 +192,7 @@
                           '$(TargetOS)' == 'osx' and
                           '$(TargetArchitecture)' == 'x64' and
                           '$(Configuration)' == 'Debug' and
-                          '$(DotNetBuildFromSource)' != 'true'">
+                          '$(DotNetBuildSourceOnly)' != 'true'">
       <!-- MsQuic packages do not have macOS binaries. Our transport package is only one source. -->
       <PackageReference Include="runtime.osx-$(TargetArchitecture).runtime.native.System.Net.MsQuic.Transport"
                         Version="$(SystemNetMsQuicTransportVersion)"
diff --git a/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicApi.NativeMethods.cs b/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicApi.NativeMethods.cs
index 206eac76ac78..6906392f79eb 100644
--- a/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicApi.NativeMethods.cs
+++ b/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicApi.NativeMethods.cs
@@ -375,4 +375,55 @@ public int StreamReceiveSetEnabled(MsQuicSafeHandle stream, byte enabled)
             }
         }
     }
+
+    public int DatagramSend(MsQuicSafeHandle connection, QUIC_BUFFER* buffers, uint buffersCount, QUIC_SEND_FLAGS flags, void* context)
+    {
+        bool success = false;
+        try
+        {
+            connection.DangerousAddRef(ref success);
+            return ApiTable->DatagramSend(connection.QuicHandle, buffers, buffersCount, flags, context);
+        }
+        finally
+        {
+            if (success)
+            {
+                connection.DangerousRelease();
+            }
+        }
+    }
+
+    public int ConnectionResumptionTicketValidationComplete(MsQuicSafeHandle connection, byte result)
+    {
+        bool success = false;
+        try
+        {
+            connection.DangerousAddRef(ref success);
+            return ApiTable->ConnectionResumptionTicketValidationComplete(connection.QuicHandle, result);
+        }
+        finally
+        {
+            if (success)
+            {
+                connection.DangerousRelease();
+            }
+        }
+    }
+
+    public int ConnectionCertificateValidationComplete(MsQuicSafeHandle connection, byte result, QUIC_TLS_ALERT_CODES alert)
+    {
+        bool success = false;
+        try
+        {
+            connection.DangerousAddRef(ref success);
+            return ApiTable->ConnectionCertificateValidationComplete(connection.QuicHandle, result, alert);
+        }
+        finally
+        {
+            if (success)
+            {
+                connection.DangerousRelease();
+            }
+        }
+    }
 }
diff --git a/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicApi.cs b/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicApi.cs
index e89119844c74..4b284284f526 100644
--- a/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicApi.cs
+++ b/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicApi.cs
@@ -54,11 +54,16 @@ private MsQuicApi(QUIC_API_TABLE* apiTable)
     private static readonly Lazy<MsQuicApi> _api = new Lazy<MsQuicApi>(AllocateMsQuicApi);
     internal static MsQuicApi Api => _api.Value;
 
+    internal static Version? Version { get; private set; }
+
     internal static bool IsQuicSupported { get; }
 
     internal static string MsQuicLibraryVersion { get; } = "unknown";
     internal static string? NotSupportedReason { get; }
 
+    // workaround for https://github.com/microsoft/msquic/issues/4132
+    internal static bool SupportsAsyncCertValidation => Version >= new Version(2, 4, 0);
+
     internal static bool UsesSChannelBackend { get; }
 
     internal static bool Tls13ServerMayBeDisabled { get; }
@@ -69,6 +74,7 @@ static MsQuicApi()
     {
         bool loaded = false;
         IntPtr msQuicHandle;
+        Version = default;
 
         // MsQuic is using DualMode sockets and that will fail even for IPv4 if AF_INET6 is not available.
         if (!Socket.OSSupportsIPv6)
@@ -135,7 +141,7 @@ static MsQuicApi()
                 }
                 return;
             }
-            Version version = new Version((int)libVersion[0], (int)libVersion[1], (int)libVersion[2], (int)libVersion[3]);
+            Version = new Version((int)libVersion[0], (int)libVersion[1], (int)libVersion[2], (int)libVersion[3]);
 
             paramSize = 64 * sizeof(sbyte);
             sbyte* libGitHash = stackalloc sbyte[64];
@@ -150,11 +156,11 @@ static MsQuicApi()
             }
             string? gitHash = Marshal.PtrToStringUTF8((IntPtr)libGitHash);
 
-            MsQuicLibraryVersion = $"{Interop.Libraries.MsQuic} {version} ({gitHash})";
+            MsQuicLibraryVersion = $"{Interop.Libraries.MsQuic} {Version} ({gitHash})";
 
-            if (version < s_minMsQuicVersion)
+            if (Version < s_minMsQuicVersion)
             {
-                NotSupportedReason = $"Incompatible MsQuic library version '{version}', expecting higher than '{s_minMsQuicVersion}'.";
+                NotSupportedReason = $"Incompatible MsQuic library version '{Version}', expecting higher than '{s_minMsQuicVersion}'.";
                 if (NetEventSource.Log.IsEnabled())
                 {
                     NetEventSource.Info(null, NotSupportedReason);
diff --git a/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicConfiguration.Cache.cs b/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicConfiguration.Cache.cs
new file mode 100644
index 000000000000..4fc86adfbd15
--- /dev/null
+++ b/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicConfiguration.Cache.cs
@@ -0,0 +1,235 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Collections.Generic;
+using System.Collections.Concurrent;
+using System.Collections.ObjectModel;
+using System.Security.Authentication;
+using System.Net.Security;
+using System.Security.Cryptography.X509Certificates;
+using System.Threading;
+using Microsoft.Quic;
+
+namespace System.Net.Quic;
+
+internal static partial class MsQuicConfiguration
+{
+    private const int CheckExpiredModulo = 32;
+
+    private const string DisableCacheEnvironmentVariable = "DOTNET_SYSTEM_NET_QUIC_DISABLE_CONFIGURATION_CACHE";
+    private const string DisableCacheCtxSwitch = "System.Net.Quic.DisableConfigurationCache";
+
+    internal static bool ConfigurationCacheEnabled { get; } = GetConfigurationCacheEnabled();
+    private static bool GetConfigurationCacheEnabled()
+    {
+        // AppContext switch takes precedence
+        if (AppContext.TryGetSwitch(DisableCacheCtxSwitch, out bool value))
+        {
+            return !value;
+        }
+        else
+        {
+            // check environment variable
+            return
+                Environment.GetEnvironmentVariable(DisableCacheEnvironmentVariable) is string envVar &&
+                !(envVar == "1" || envVar.Equals("true", StringComparison.OrdinalIgnoreCase));
+        }
+    }
+
+    private static readonly ConcurrentDictionary<CacheKey, MsQuicConfigurationSafeHandle> s_configurationCache = new();
+
+    private readonly struct CacheKey : IEquatable<CacheKey>
+    {
+        public readonly List<byte[]> CertificateThumbprints;
+        public readonly QUIC_CREDENTIAL_FLAGS Flags;
+        public readonly QUIC_SETTINGS Settings;
+        public readonly List<SslApplicationProtocol> ApplicationProtocols;
+        public readonly QUIC_ALLOWED_CIPHER_SUITE_FLAGS AllowedCipherSuites;
+
+        public CacheKey(QUIC_SETTINGS settings, QUIC_CREDENTIAL_FLAGS flags, X509Certificate? certificate, ReadOnlyCollection<X509Certificate2>? intermediates, List<SslApplicationProtocol> alpnProtocols, QUIC_ALLOWED_CIPHER_SUITE_FLAGS allowedCipherSuites)
+        {
+            CertificateThumbprints = certificate == null ? new List<byte[]>() : new List<byte[]> { certificate.GetCertHash() };
+
+            if (intermediates != null)
+            {
+                foreach (X509Certificate2 intermediate in intermediates)
+                {
+                    CertificateThumbprints.Add(intermediate.GetCertHash());
+                }
+            }
+
+            Flags = flags;
+            Settings = settings;
+            // make defensive copy to prevent modification (the list comes from user code)
+            ApplicationProtocols = new List<SslApplicationProtocol>(alpnProtocols);
+            AllowedCipherSuites = allowedCipherSuites;
+        }
+
+        public override bool Equals(object? obj) => obj is CacheKey key && Equals(key);
+
+        public bool Equals(CacheKey other)
+        {
+            if (CertificateThumbprints.Count != other.CertificateThumbprints.Count)
+            {
+                return false;
+            }
+
+            for (int i = 0; i < CertificateThumbprints.Count; i++)
+            {
+                if (!CertificateThumbprints[i].AsSpan().SequenceEqual(other.CertificateThumbprints[i]))
+                {
+                    return false;
+                }
+            }
+
+            if (ApplicationProtocols.Count != other.ApplicationProtocols.Count)
+            {
+                return false;
+            }
+
+            for (int i = 0; i < ApplicationProtocols.Count; i++)
+            {
+                if (ApplicationProtocols[i] != other.ApplicationProtocols[i])
+                {
+                    return false;
+                }
+            }
+
+            return
+                Flags == other.Flags &&
+                Settings.Equals(other.Settings) &&
+                AllowedCipherSuites == other.AllowedCipherSuites;
+        }
+
+        public override int GetHashCode()
+        {
+            HashCode hash = default;
+
+            foreach (var thumbprint in CertificateThumbprints)
+            {
+                hash.AddBytes(thumbprint);
+            }
+
+            hash.Add(Flags);
+            hash.Add(Settings);
+
+            foreach (var protocol in ApplicationProtocols)
+            {
+                hash.AddBytes(protocol.Protocol.Span);
+            }
+
+            hash.Add(AllowedCipherSuites);
+
+            return hash.ToHashCode();
+        }
+    }
+
+    private static MsQuicConfigurationSafeHandle GetCachedCredentialOrCreate(QUIC_SETTINGS settings, QUIC_CREDENTIAL_FLAGS flags, X509Certificate? certificate, ReadOnlyCollection<X509Certificate2>? intermediates, List<SslApplicationProtocol> alpnProtocols, QUIC_ALLOWED_CIPHER_SUITE_FLAGS allowedCipherSuites)
+    {
+        CacheKey key = new CacheKey(settings, flags, certificate, intermediates, alpnProtocols, allowedCipherSuites);
+
+        MsQuicConfigurationSafeHandle? handle;
+
+        if (s_configurationCache.TryGetValue(key, out handle) && handle.TryAddRentCount())
+        {
+            if (NetEventSource.Log.IsEnabled())
+            {
+                NetEventSource.Info(null, $"Found cached MsQuicConfiguration: {handle}.");
+            }
+            return handle;
+        }
+
+        // if we get here, the handle is either not in the cache, or we lost the race between
+        // TryAddRentCount on this thread and MarkForDispose on another thread doing cache cleanup.
+        // In either case, we need to create a new handle.
+
+        if (NetEventSource.Log.IsEnabled())
+        {
+            NetEventSource.Info(null, $"MsQuicConfiguration not found in cache, creating new.");
+        }
+
+        handle = CreateInternal(settings, flags, certificate, intermediates, alpnProtocols, allowedCipherSuites);
+        handle.TryAddRentCount(); // we are the first renter
+
+        MsQuicConfigurationSafeHandle cached;
+        do
+        {
+            cached = s_configurationCache.GetOrAdd(key, handle);
+        }
+        // If we get the same handle back, we successfully added it to the cache and we are done.
+        // If we get a different handle back, we need to increase the rent count.
+        // If we fail to add the rent count, then the existing/cached handle is in process of
+        // being removed from the cache and we can try again, eventually either succeeding to add our
+        // new handle or getting a fresh handle inserted by another thread meanwhile.
+        while (cached != handle && !cached.TryAddRentCount());
+
+        if (cached != handle)
+        {
+            // we lost a race with another thread to insert new handle into the cache
+            if (NetEventSource.Log.IsEnabled())
+            {
+                NetEventSource.Info(null, $"Discarding MsQuicConfiguration {handle} (preferring cached {cached}).");
+            }
+
+            // First dispose decrements the rent count we added before attempting the cache insertion
+            // and second closes the handle
+            handle.Dispose();
+            handle.Dispose();
+            Debug.Assert(handle.IsClosed);
+
+            return cached;
+        }
+
+        // we added a new handle, check if we need to cleanup
+        var count = s_configurationCache.Count;
+        if (count % CheckExpiredModulo == 0)
+        {
+            // let only one thread perform cleanup at a time
+            lock (s_configurationCache)
+            {
+                // check again, if another thread just cleaned up (and cached count went down) we are unlikely
+                // to clean anything
+                if (s_configurationCache.Count >= count)
+                {
+                    CleanupCache();
+                }
+            }
+        }
+
+        return handle;
+    }
+
+    private static void CleanupCache()
+    {
+        if (NetEventSource.Log.IsEnabled())
+        {
+            NetEventSource.Info(null, $"Cleaning up MsQuicConfiguration cache, current size: {s_configurationCache.Count}.");
+        }
+
+        foreach ((CacheKey key, MsQuicConfigurationSafeHandle handle) in s_configurationCache)
+        {
+            if (!handle.TryMarkForDispose())
+            {
+                // handle in use
+                continue;
+            }
+
+            // the handle is not in use and has been marked such that no new rents can be added.
+            if (NetEventSource.Log.IsEnabled())
+            {
+                NetEventSource.Info(null, $"Removing cached MsQuicConfiguration {handle}.");
+            }
+
+            bool removed = s_configurationCache.TryRemove(key, out _);
+            Debug.Assert(removed);
+            handle.Dispose();
+            Debug.Assert(handle.IsClosed);
+        }
+
+        if (NetEventSource.Log.IsEnabled())
+        {
+            NetEventSource.Info(null, $"Cleaning up MsQuicConfiguration cache, new size: {s_configurationCache.Count}.");
+        }
+    }
+}
diff --git a/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicConfiguration.cs b/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicConfiguration.cs
index 7e527e41bf95..d45be601ae86 100644
--- a/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicConfiguration.cs
+++ b/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicConfiguration.cs
@@ -11,12 +11,12 @@
 
 namespace System.Net.Quic;
 
-internal static class MsQuicConfiguration
+internal static partial class MsQuicConfiguration
 {
     private static bool HasPrivateKey(this X509Certificate certificate)
         => certificate is X509Certificate2 certificate2 && certificate2.Handle != IntPtr.Zero && certificate2.HasPrivateKey;
 
-    public static MsQuicSafeHandle Create(QuicClientConnectionOptions options)
+    public static MsQuicConfigurationSafeHandle Create(QuicClientConnectionOptions options)
     {
         SslClientAuthenticationOptions authenticationOptions = options.ClientAuthenticationOptions;
 
@@ -79,7 +79,7 @@ public static MsQuicSafeHandle Create(QuicClientConnectionOptions options)
         return Create(options, flags, certificate, intermediates, authenticationOptions.ApplicationProtocols, authenticationOptions.CipherSuitesPolicy, authenticationOptions.EncryptionPolicy);
     }
 
-    public static MsQuicSafeHandle Create(QuicServerConnectionOptions options, string? targetHost)
+    public static MsQuicConfigurationSafeHandle Create(QuicServerConnectionOptions options, string? targetHost)
     {
         SslServerAuthenticationOptions authenticationOptions = options.ServerAuthenticationOptions;
 
@@ -117,7 +117,7 @@ public static MsQuicSafeHandle Create(QuicServerConnectionOptions options, strin
         return Create(options, flags, certificate, intermediates, authenticationOptions.ApplicationProtocols, authenticationOptions.CipherSuitesPolicy, authenticationOptions.EncryptionPolicy);
     }
 
-    private static unsafe MsQuicSafeHandle Create(QuicConnectionOptions options, QUIC_CREDENTIAL_FLAGS flags, X509Certificate? certificate, ReadOnlyCollection<X509Certificate2>? intermediates, List<SslApplicationProtocol>? alpnProtocols, CipherSuitesPolicy? cipherSuitesPolicy, EncryptionPolicy encryptionPolicy)
+    private static MsQuicConfigurationSafeHandle Create(QuicConnectionOptions options, QUIC_CREDENTIAL_FLAGS flags, X509Certificate? certificate, ReadOnlyCollection<X509Certificate2>? intermediates, List<SslApplicationProtocol>? alpnProtocols, CipherSuitesPolicy? cipherSuitesPolicy, EncryptionPolicy encryptionPolicy)
     {
         // Validate options and SSL parameters.
         if (alpnProtocols is null || alpnProtocols.Count <= 0)
@@ -176,6 +176,38 @@ private static unsafe MsQuicSafeHandle Create(QuicConnectionOptions options, QUI
                     : 0; // 0 disables the timeout
         }
 
+        QUIC_ALLOWED_CIPHER_SUITE_FLAGS allowedCipherSuites = QUIC_ALLOWED_CIPHER_SUITE_FLAGS.NONE;
+
+        if (cipherSuitesPolicy != null)
+        {
+            flags |= QUIC_CREDENTIAL_FLAGS.SET_ALLOWED_CIPHER_SUITES;
+            allowedCipherSuites = CipherSuitePolicyToFlags(cipherSuitesPolicy);
+        }
+
+        if (!MsQuicApi.UsesSChannelBackend)
+        {
+            flags |= QUIC_CREDENTIAL_FLAGS.USE_PORTABLE_CERTIFICATES;
+        }
+
+        if (ConfigurationCacheEnabled)
+        {
+            return GetCachedCredentialOrCreate(settings, flags, certificate, intermediates, alpnProtocols, allowedCipherSuites);
+        }
+
+        return CreateInternal(settings, flags, certificate, intermediates, alpnProtocols, allowedCipherSuites);
+    }
+
+    private static unsafe MsQuicConfigurationSafeHandle CreateInternal(QUIC_SETTINGS settings, QUIC_CREDENTIAL_FLAGS flags, X509Certificate? certificate, ReadOnlyCollection<X509Certificate2>? intermediates, List<SslApplicationProtocol> alpnProtocols, QUIC_ALLOWED_CIPHER_SUITE_FLAGS allowedCipherSuites)
+    {
+        if (!MsQuicApi.UsesSChannelBackend && certificate is X509Certificate2 cert && intermediates is null)
+        {
+            // MsQuic will not lookup intermediates in local CA store if not explicitly provided,
+            // so we build the cert context to get on feature parity with SslStream. Note that this code
+            // path runs after the MsQuicConfigurationCache check.
+            SslStreamCertificateContext context = SslStreamCertificateContext.Create(cert, additionalCertificates: null, offline: true, trust: null);
+            intermediates = context.IntermediateCertificates;
+        }
+
         QUIC_HANDLE* handle;
 
         using MsQuicBuffers msquicBuffers = new MsQuicBuffers();
@@ -183,24 +215,21 @@ private static unsafe MsQuicSafeHandle Create(QuicConnectionOptions options, QUI
         ThrowHelper.ThrowIfMsQuicError(MsQuicApi.Api.ConfigurationOpen(
             MsQuicApi.Api.Registration,
             msquicBuffers.Buffers,
-            (uint)alpnProtocols.Count,
+            (uint)msquicBuffers.Count,
             &settings,
             (uint)sizeof(QUIC_SETTINGS),
             (void*)IntPtr.Zero,
             &handle),
             "ConfigurationOpen failed");
-        MsQuicSafeHandle configurationHandle = new MsQuicSafeHandle(handle, SafeHandleType.Configuration);
+        MsQuicConfigurationSafeHandle configurationHandle = new MsQuicConfigurationSafeHandle(handle);
 
         try
         {
-            QUIC_CREDENTIAL_CONFIG config = new QUIC_CREDENTIAL_CONFIG { Flags = flags };
-            config.Flags |= (MsQuicApi.UsesSChannelBackend ? QUIC_CREDENTIAL_FLAGS.NONE : QUIC_CREDENTIAL_FLAGS.USE_PORTABLE_CERTIFICATES);
-
-            if (cipherSuitesPolicy != null)
+            QUIC_CREDENTIAL_CONFIG config = new QUIC_CREDENTIAL_CONFIG
             {
-                config.Flags |= QUIC_CREDENTIAL_FLAGS.SET_ALLOWED_CIPHER_SUITES;
-                config.AllowedCipherSuites = CipherSuitePolicyToFlags(cipherSuitesPolicy);
-            }
+                Flags = flags,
+                AllowedCipherSuites = allowedCipherSuites
+            };
 
             int status;
             if (certificate is null)
diff --git a/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicSafeHandle.cs b/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicSafeHandle.cs
index 38a099ed9e49..cf7d70a18e08 100644
--- a/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicSafeHandle.cs
+++ b/src/libraries/System.Net.Quic/src/System/Net/Quic/Internal/MsQuicSafeHandle.cs
@@ -3,6 +3,7 @@
 
 using System.Diagnostics;
 using System.Runtime.InteropServices;
+using System.Threading;
 using Microsoft.Quic;
 
 namespace System.Net.Quic;
@@ -52,7 +53,8 @@ public MsQuicSafeHandle(QUIC_HANDLE* handle, SafeHandleType safeHandleType)
                 SafeHandleType.Stream => MsQuicApi.Api.ApiTable->StreamClose,
                 _ => throw new ArgumentException($"Unexpected value: {safeHandleType}", nameof(safeHandleType))
             },
-            safeHandleType) { }
+            safeHandleType)
+    { }
 
     protected override bool ReleaseHandle()
     {
@@ -142,3 +144,46 @@ protected override unsafe bool ReleaseHandle()
         return true;
     }
 }
+
+internal sealed class MsQuicConfigurationSafeHandle : MsQuicSafeHandle
+{
+    // MsQuicConfiguration handles are cached, so we need to keep track of the
+    // number of times a handle is rented. Once we decide to dispose the handle,
+    // we set the _rentCount to -1.
+    private volatile int _rentCount;
+
+    public unsafe MsQuicConfigurationSafeHandle(QUIC_HANDLE* handle)
+        : base(handle, SafeHandleType.Configuration) { }
+
+    public bool TryAddRentCount()
+    {
+        int oldCount;
+
+        do
+        {
+            oldCount = _rentCount;
+            if (oldCount < 0)
+            {
+                // The handle is already disposed.
+                return false;
+            }
+        } while (Interlocked.CompareExchange(ref _rentCount, oldCount + 1, oldCount) != oldCount);
+
+        return true;
+    }
+
+    public bool TryMarkForDispose()
+    {
+        return Interlocked.CompareExchange(ref _rentCount, -1, 0) == 0;
+    }
+
+    protected override void Dispose(bool disposing)
+    {
+        if (Interlocked.Decrement(ref _rentCount) < 0)
+        {
+            // _rentCount is 0 if the handle was never rented (e.g. failure during creation),
+            // and is -1 when evicted from cache.
+            base.Dispose(disposing);
+        }
+    }
+}
diff --git a/src/libraries/System.Net.Quic/src/System/Net/Quic/Interop/msquic_generated.cs b/src/libraries/System.Net.Quic/src/System/Net/Quic/Interop/msquic_generated.cs
index b8c0b092df1d..ad781e6ddd7c 100644
--- a/src/libraries/System.Net.Quic/src/System/Net/Quic/Interop/msquic_generated.cs
+++ b/src/libraries/System.Net.Quic/src/System/Net/Quic/Interop/msquic_generated.cs
@@ -929,6 +929,7 @@ internal enum QUIC_PERFORMANCE_COUNTERS
         PATH_FAILURE,
         SEND_STATELESS_RESET,
         SEND_STATELESS_RETRY,
+        CONN_LOAD_REJECT,
         MAX,
     }
 
diff --git a/src/libraries/System.Net.Quic/src/System/Net/Quic/NetEventSource.Quic.Counters.cs b/src/libraries/System.Net.Quic/src/System/Net/Quic/NetEventSource.Quic.Counters.cs
new file mode 100644
index 000000000000..93ec7e7532c7
--- /dev/null
+++ b/src/libraries/System.Net.Quic/src/System/Net/Quic/NetEventSource.Quic.Counters.cs
@@ -0,0 +1,249 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Diagnostics.Tracing;
+using System.Diagnostics.Metrics;
+using System.Net.Quic;
+
+using Microsoft.Quic;
+using static Microsoft.Quic.MsQuic;
+
+namespace System.Net
+{
+    internal sealed partial class NetEventSource
+    {
+        private static Meter s_meter = new Meter("Private.InternalDiagnostics.System.Net.Quic.MsQuic");
+        private static long s_countersLastFetched;
+        private static readonly long[] s_counters = new long[(int)QUIC_PERFORMANCE_COUNTERS.MAX];
+        public static readonly ObservableCounter<long> s_CONN_CREATED = s_meter.CreateObservableCounter<long>(
+            name: "msquic.connection.created",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_CREATED),
+            unit: "{connection}",
+            description: "New connections allocated");
+
+        public static readonly ObservableCounter<long> s_CONN_HANDSHAKE_FAIL = s_meter.CreateObservableCounter<long>(
+            name: "msquic.connection.handshake_failures",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_HANDSHAKE_FAIL),
+            unit: "{connection}",
+            description: "Connections that failed during handshake");
+
+        public static readonly ObservableCounter<long> s_CONN_APP_REJECT = s_meter.CreateObservableCounter<long>(
+            name: "msquic.connection.app_rejected",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_APP_REJECT),
+            unit: "{connection}",
+            description: "Connections rejected by the application");
+
+        public static readonly ObservableCounter<long> s_CONN_LOAD_REJECT = s_meter.CreateObservableCounter<long>(
+            name: "msquic.connection.load_rejected",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_LOAD_REJECT),
+            unit: "{connection}",
+            description: "Connections rejected due to worker load.");
+
+        public static readonly ObservableCounter<long> s_CONN_RESUMED = s_meter.CreateObservableCounter<long>(
+            name: "msquic.connection.resumed",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_RESUMED),
+            unit: "{connection}",
+            description: "Connections resumed");
+
+        public static readonly ObservableGauge<long> s_CONN_ACTIVE = s_meter.CreateObservableGauge<long>(
+            name: "msquic.connection.allocated",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_ACTIVE),
+            unit: "{connection}",
+            description: "Connections currently allocated");
+
+        public static readonly ObservableGauge<long> s_CONN_CONNECTED = s_meter.CreateObservableGauge<long>(
+            name: "msquic.connection.connected",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_CONNECTED),
+            unit: "{connection}",
+            description: "Connections currently in the connected state");
+
+        public static readonly ObservableCounter<long> s_CONN_PROTOCOL_ERRORS = s_meter.CreateObservableCounter<long>(
+            name: "msquic.connection.protocol_errors",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_PROTOCOL_ERRORS),
+            unit: "{connection}",
+            description: "Connections shutdown with a protocol error");
+
+        public static readonly ObservableCounter<long> s_CONN_NO_ALPN = s_meter.CreateObservableCounter<long>(
+            name: "msquic.connection.no_alpn",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_NO_ALPN),
+            unit: "{connection}",
+            description: "Connection attempts with no matching ALPN");
+
+        public static readonly ObservableGauge<long> s_STRM_ACTIVE = s_meter.CreateObservableGauge<long>(
+            name: "msquic.stream.allocated",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.STRM_ACTIVE),
+            unit: "{stream}",
+            description: "Current streams allocated");
+
+        public static readonly ObservableCounter<long> s_PKTS_SUSPECTED_LOST = s_meter.CreateObservableCounter<long>(
+            name: "msquic.packet.suspected_lost",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.PKTS_SUSPECTED_LOST),
+            unit: "{packet}",
+            description: "Packets suspected lost");
+
+        public static readonly ObservableCounter<long> s_PKTS_DROPPED = s_meter.CreateObservableCounter<long>(
+            name: "msquic.packet.dropped",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.PKTS_DROPPED),
+            unit: "{packet}",
+            description: "Packets dropped for any reason");
+
+        public static readonly ObservableCounter<long> s_PKTS_DECRYPTION_FAIL = s_meter.CreateObservableCounter<long>(
+            name: "msquic.packet.decryption_failures",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.PKTS_DECRYPTION_FAIL),
+            unit: "{packet}",
+            description: "Packets with decryption failures");
+
+        public static readonly ObservableCounter<long> s_UDP_RECV = s_meter.CreateObservableCounter<long>(
+            name: "msquic.udp.recv_datagrams",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.UDP_RECV),
+            unit: "{datagram}",
+            description: "UDP datagrams received");
+
+        public static readonly ObservableCounter<long> s_UDP_SEND = s_meter.CreateObservableCounter<long>(
+            name: "msquic.udp.send_datagrams",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.UDP_SEND),
+            unit: "{datagram}",
+            description: "UDP datagrams sent");
+
+        public static readonly ObservableCounter<long> s_UDP_RECV_BYTES = s_meter.CreateObservableCounter<long>(
+            name: "msquic.udp.recv_bytes",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.UDP_RECV_BYTES),
+            unit: "By",
+            description: "UDP payload bytes received");
+
+        public static readonly ObservableCounter<long> s_UDP_SEND_BYTES = s_meter.CreateObservableCounter<long>(
+            name: "msquic.udp.send_bytes",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.UDP_SEND_BYTES),
+            unit: "By",
+            description: "UDP payload bytes sent");
+
+        public static readonly ObservableCounter<long> s_UDP_RECV_EVENTS = s_meter.CreateObservableCounter<long>(
+            name: "msquic.udp.recv_events",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.UDP_RECV_EVENTS),
+            unit: "{event}",
+            description: "UDP receive events");
+
+        public static readonly ObservableCounter<long> s_UDP_SEND_CALLS = s_meter.CreateObservableCounter<long>(
+            name: "msquic.udp.send_calls",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.UDP_SEND_CALLS),
+            unit: "{call}",
+            description: "UDP send API calls");
+
+        public static readonly ObservableCounter<long> s_APP_SEND_BYTES = s_meter.CreateObservableCounter<long>(
+            name: "msquic.app.send_bytes",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.APP_SEND_BYTES),
+            unit: "By",
+            description: "Bytes sent by applications");
+
+        public static readonly ObservableCounter<long> s_APP_RECV_BYTES = s_meter.CreateObservableCounter<long>(
+            name: "msquic.app.recv_bytes",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.APP_RECV_BYTES),
+            unit: "By",
+            description: "Bytes received by applications");
+
+        public static readonly ObservableGauge<long> s_CONN_QUEUE_DEPTH = s_meter.CreateObservableGauge<long>(
+            name: "msquic.threadpool.conn_queue_depth",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_QUEUE_DEPTH),
+            unit: "{connection}",
+            description: "Current connections queued for processing");
+
+        public static readonly ObservableGauge<long> s_CONN_OPER_QUEUE_DEPTH = s_meter.CreateObservableGauge<long>(
+            name: "msquic.threadpool.conn_oper_queue_depth",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_OPER_QUEUE_DEPTH),
+            unit: "{operation}",
+            description: "Current connection operations queued");
+
+        public static readonly ObservableCounter<long> s_CONN_OPER_QUEUED = s_meter.CreateObservableCounter<long>(
+            name: "msquic.threadpool.conn_oper_queued",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_OPER_QUEUED),
+            unit: "{operation}",
+            description: "New connection operations queued");
+
+        public static readonly ObservableCounter<long> s_CONN_OPER_COMPLETED = s_meter.CreateObservableCounter<long>(
+            name: "msquic.threadpool.conn_oper_completed",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.CONN_OPER_COMPLETED),
+            unit: "{operation}",
+            description: "Connection operations processed");
+
+        public static readonly ObservableGauge<long> s_WORK_OPER_QUEUE_DEPTH = s_meter.CreateObservableGauge<long>(
+            name: "msquic.threadpool.work_oper_queue_depth",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.WORK_OPER_QUEUE_DEPTH),
+            unit: "{operation}",
+            description: "Current worker operations queued");
+
+        public static readonly ObservableCounter<long> s_WORK_OPER_QUEUED = s_meter.CreateObservableCounter<long>(
+            name: "msquic.threadpool.work_oper_queued",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.WORK_OPER_QUEUED),
+            unit: "{operation}",
+            description: "New worker operations queued");
+
+        public static readonly ObservableCounter<long> s_WORK_OPER_COMPLETED = s_meter.CreateObservableCounter<long>(
+            name: "msquic.threadpool.work_oper_completed",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.WORK_OPER_COMPLETED),
+            unit: "{operation}",
+            description: "Worker operations processed");
+
+        public static readonly ObservableCounter<long> s_PATH_VALIDATED = s_meter.CreateObservableCounter<long>(
+            name: "msquic.datapath.path_validated",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.PATH_VALIDATED),
+            unit: "{challenge}",
+            description: "Successful path challenges");
+
+        public static readonly ObservableCounter<long> s_PATH_FAILURE = s_meter.CreateObservableCounter<long>(
+            name: "msquic.datapath.path_failure",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.PATH_FAILURE),
+            unit: "{challenge}",
+            description: "Unsuccessful path challenges");
+
+        public static readonly ObservableCounter<long> s_SEND_STATELESS_RESET = s_meter.CreateObservableCounter<long>(
+            name: "msquic.datapath.send_stateless_reset",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.SEND_STATELESS_RESET),
+            unit: "{packet}",
+            description: "Stateless reset packets sent ever");
+
+        public static readonly ObservableCounter<long> s_SEND_STATELESS_RETRY = s_meter.CreateObservableCounter<long>(
+            name: "msquic.datapath.send_stateless_retry",
+            observeValue: () => GetCounterValue(QUIC_PERFORMANCE_COUNTERS.SEND_STATELESS_RETRY),
+            unit: "{packet}",
+            description: "Stateless retry packets sent");
+
+        [NonEvent]
+        private static void UpdateCounters()
+        {
+            if (!MsQuicApi.IsQuicSupported)
+            {
+                // Avoid calling into MsQuic if not supported (or not initialized yet)
+                return;
+            }
+
+            unsafe
+            {
+                fixed (long* pCounters = s_counters)
+                {
+                    uint size = (uint)s_counters.Length * sizeof(long);
+                    MsQuicApi.Api.ApiTable->GetParam(null, QUIC_PARAM_GLOBAL_PERF_COUNTERS, &size, (byte*)pCounters);
+                }
+            }
+        }
+
+        [NonEvent]
+        private static long GetCounterValue(QUIC_PERFORMANCE_COUNTERS counter)
+        {
+            //
+            // We wan't to avoid refreshing the counter values array for each counter callback,
+            // so we refresh the counters array only once every 50ms. This should be enough time
+            // for all the counters to be queried and at the same time but still low enough to not
+            // confuse any monitoring tool as their polling rate is usually in seconds.
+            //
+            if (s_countersLastFetched == 0 || Stopwatch.GetElapsedTime(s_countersLastFetched).TotalMilliseconds > 50)
+            {
+                UpdateCounters();
+                s_countersLastFetched = Stopwatch.GetTimestamp();
+            }
+
+            return s_counters[(int)counter];
+        }
+    }
+}
diff --git a/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicConnection.SslConnectionOptions.cs b/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicConnection.SslConnectionOptions.cs
index dad23bfc342c..1b352f100454 100644
--- a/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicConnection.SslConnectionOptions.cs
+++ b/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicConnection.SslConnectionOptions.cs
@@ -1,10 +1,13 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Buffers;
+using System.Diagnostics;
 using System.Net.Security;
 using System.Security.Authentication;
 using System.Security.Cryptography;
 using System.Security.Cryptography.X509Certificates;
+using System.Threading.Tasks;
 using Microsoft.Quic;
 using static Microsoft.Quic.MsQuic;
 
@@ -63,18 +66,122 @@ public SslConnectionOptions(QuicConnection connection, bool isClient,
             _certificateChainPolicy = certificateChainPolicy;
         }
 
-        public unsafe int ValidateCertificate(QUIC_BUFFER* certificatePtr, QUIC_BUFFER* chainPtr, out X509Certificate2? certificate)
+        internal async Task<bool> StartAsyncCertificateValidation(IntPtr certificatePtr, IntPtr chainPtr)
+        {
+            //
+            // The provided data pointers are valid only while still inside this function, so they need to be
+            // copied to separate buffers which are then handed off to threadpool.
+            //
+
+            X509Certificate2? certificate = null;
+
+            byte[]? certDataRented = null;
+            Memory<byte> certData = default;
+            byte[]? chainDataRented = null;
+            Memory<byte> chainData = default;
+
+            if (certificatePtr != IntPtr.Zero)
+            {
+                if (MsQuicApi.UsesSChannelBackend)
+                {
+                    // provided data is a pointer to a CERT_CONTEXT
+                    certificate = new X509Certificate2(certificatePtr);
+                    // TODO: what about chainPtr?
+                }
+                else
+                {
+                    unsafe
+                    {
+                        // On non-SChannel backends we specify USE_PORTABLE_CERTIFICATES and the contents are buffers
+                        // with DER encoded cert and chain.
+                        QUIC_BUFFER* certificateBuffer = (QUIC_BUFFER*)certificatePtr;
+                        QUIC_BUFFER* chainBuffer = (QUIC_BUFFER*)chainPtr;
+
+                        if (certificateBuffer->Length > 0)
+                        {
+                            certDataRented = ArrayPool<byte>.Shared.Rent((int)certificateBuffer->Length);
+                            certData = certDataRented.AsMemory(0, (int)certificateBuffer->Length);
+                            certificateBuffer->Span.CopyTo(certData.Span);
+                        }
+
+                        if (chainBuffer->Length > 0)
+                        {
+                            chainDataRented = ArrayPool<byte>.Shared.Rent((int)chainBuffer->Length);
+                            chainData = chainDataRented.AsMemory(0, (int)chainBuffer->Length);
+                            chainBuffer->Span.CopyTo(chainData.Span);
+                        }
+                    }
+                }
+            }
+
+            // We wan't to do the certificate validation asynchronously, but due to a bug in MsQuic, we need to call the callback synchronously on some versions
+            if (MsQuicApi.SupportsAsyncCertValidation)
+            {
+                // force yield to the thread pool to free up MsQuic worker thread.
+                await Task.CompletedTask.ConfigureAwait(ConfigureAwaitOptions.ForceYielding);
+            }
+
+            // certificatePtr and chainPtr are invalid beyond this point
+
+            QUIC_TLS_ALERT_CODES result;
+            try
+            {
+                if (certData.Length > 0)
+                {
+                    Debug.Assert(certificate == null);
+                    certificate = new X509Certificate2(certData.Span);
+                }
+
+                result = _connection._sslConnectionOptions.ValidateCertificate(certificate, certData.Span, chainData.Span);
+                _connection._remoteCertificate = certificate;
+            }
+            catch (Exception ex)
+            {
+                certificate?.Dispose();
+                _connection._connectedTcs.TrySetException(ex);
+                result = QUIC_TLS_ALERT_CODES.USER_CANCELED;
+            }
+            finally
+            {
+                if (certDataRented != null)
+                {
+                    ArrayPool<byte>.Shared.Return(certDataRented);
+                }
+
+                if (chainDataRented != null)
+                {
+                    ArrayPool<byte>.Shared.Return(chainDataRented);
+                }
+            }
+
+            if (MsQuicApi.SupportsAsyncCertValidation)
+            {
+                int status = MsQuicApi.Api.ConnectionCertificateValidationComplete(
+                    _connection._handle,
+                    result == QUIC_TLS_ALERT_CODES.SUCCESS ? (byte)1 : (byte)0,
+                    result);
+
+                if (MsQuic.StatusFailed(status))
+                {
+                    if (NetEventSource.Log.IsEnabled())
+                    {
+                        NetEventSource.Error(_connection, $"{_connection} ConnectionCertificateValidationComplete failed with {ThrowHelper.GetErrorMessageForStatus(status)}");
+                    }
+                }
+            }
+
+            return result == QUIC_TLS_ALERT_CODES.SUCCESS;
+        }
+
+        private QUIC_TLS_ALERT_CODES ValidateCertificate(X509Certificate2? certificate, Span<byte> certData, Span<byte> chainData)
         {
             SslPolicyErrors sslPolicyErrors = SslPolicyErrors.None;
-            IntPtr certificateBuffer = 0;
-            int certificateLength = 0;
             bool wrapException = false;
 
             X509Chain? chain = null;
-            X509Certificate2? result = null;
             try
             {
-                if (certificatePtr is not null)
+                if (certificate is not null)
                 {
                     chain = new X509Chain();
                     if (_certificateChainPolicy != null)
@@ -96,43 +203,26 @@ public unsafe int ValidateCertificate(QUIC_BUFFER* certificatePtr, QUIC_BUFFER*
                         chain.ChainPolicy.ApplicationPolicy.Add(_isClient ? s_serverAuthOid : s_clientAuthOid);
                     }
 
-                    if (MsQuicApi.UsesSChannelBackend)
+                    if (chainData.Length > 0)
                     {
-                        result = new X509Certificate2((IntPtr)certificatePtr);
+                        X509Certificate2Collection additionalCertificates = new X509Certificate2Collection();
+                        additionalCertificates.Import(chainData);
+                        chain.ChainPolicy.ExtraStore.AddRange(additionalCertificates);
                     }
-                    else
-                    {
-                        if (certificatePtr->Length > 0)
-                        {
-                            certificateBuffer = (IntPtr)certificatePtr->Buffer;
-                            certificateLength = (int)certificatePtr->Length;
-                            result = new X509Certificate2(certificatePtr->Span);
-                        }
 
-                        if (chainPtr->Length > 0)
-                        {
-                            X509Certificate2Collection additionalCertificates = new X509Certificate2Collection();
-                            additionalCertificates.Import(chainPtr->Span);
-                            chain.ChainPolicy.ExtraStore.AddRange(additionalCertificates);
-                        }
-                    }
-                }
-
-                if (result is not null)
-                {
                     bool checkCertName = !chain!.ChainPolicy!.VerificationFlags.HasFlag(X509VerificationFlags.IgnoreInvalidName);
-                    sslPolicyErrors |= CertificateValidation.BuildChainAndVerifyProperties(chain!, result, checkCertName, !_isClient, TargetHostNameHelper.NormalizeHostName(_targetHost), certificateBuffer, certificateLength);
+                    sslPolicyErrors |= CertificateValidation.BuildChainAndVerifyProperties(chain!, certificate, checkCertName, !_isClient, TargetHostNameHelper.NormalizeHostName(_targetHost), certData);
                 }
                 else if (_certificateRequired)
                 {
                     sslPolicyErrors |= SslPolicyErrors.RemoteCertificateNotAvailable;
                 }
 
-                int status = QUIC_STATUS_SUCCESS;
+                QUIC_TLS_ALERT_CODES result = QUIC_TLS_ALERT_CODES.SUCCESS;
                 if (_validationCallback is not null)
                 {
                     wrapException = true;
-                    if (!_validationCallback(_connection, result, chain, sslPolicyErrors))
+                    if (!_validationCallback(_connection, certificate, chain, sslPolicyErrors))
                     {
                         wrapException = false;
                         if (_isClient)
@@ -140,7 +230,7 @@ public unsafe int ValidateCertificate(QUIC_BUFFER* certificatePtr, QUIC_BUFFER*
                             throw new AuthenticationException(SR.net_quic_cert_custom_validation);
                         }
 
-                        status = QUIC_STATUS_USER_CANCELED;
+                        result = QUIC_TLS_ALERT_CODES.BAD_CERTIFICATE;
                     }
                 }
                 else if (sslPolicyErrors != SslPolicyErrors.None)
@@ -150,15 +240,13 @@ public unsafe int ValidateCertificate(QUIC_BUFFER* certificatePtr, QUIC_BUFFER*
                         throw new AuthenticationException(SR.Format(SR.net_quic_cert_chain_validation, sslPolicyErrors));
                     }
 
-                    status = QUIC_STATUS_HANDSHAKE_FAILURE;
+                    result = QUIC_TLS_ALERT_CODES.BAD_CERTIFICATE;
                 }
 
-                certificate = result;
-                return status;
+                return result;
             }
             catch (Exception ex)
             {
-                result?.Dispose();
                 if (wrapException)
                 {
                     throw new QuicException(QuicError.CallbackError, null, SR.net_quic_callback_error, ex);
diff --git a/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicConnection.cs b/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicConnection.cs
index db3adf776d54..0846543a6aee 100644
--- a/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicConnection.cs
+++ b/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicConnection.cs
@@ -571,15 +571,20 @@ private unsafe int HandleEventPeerStreamStarted(ref PEER_STREAM_STARTED_DATA dat
     }
     private unsafe int HandleEventPeerCertificateReceived(ref PEER_CERTIFICATE_RECEIVED_DATA data)
     {
-        try
-        {
-            return _sslConnectionOptions.ValidateCertificate((QUIC_BUFFER*)data.Certificate, (QUIC_BUFFER*)data.Chain, out _remoteCertificate);
-        }
-        catch (Exception ex)
+        //
+        // The certificate validation is an expensive operation and we don't want to delay MsQuic
+        // worker thread. So we offload the validation to the .NET threadpool. Incidentally, this
+        // also prevents potential user RemoteCertificateValidationCallback from blocking MsQuic
+        // worker threads.
+        //
+
+        var task = _sslConnectionOptions.StartAsyncCertificateValidation((IntPtr)data.Certificate, (IntPtr)data.Chain);
+        if (task.IsCompletedSuccessfully)
         {
-            _connectedTcs.TrySetException(ex);
-            return QUIC_STATUS_HANDSHAKE_FAILURE;
+            return task.Result ? QUIC_STATUS_SUCCESS : QUIC_STATUS_BAD_CERTIFICATE;
         }
+
+        return QUIC_STATUS_PENDING;
     }
 
     private unsafe int HandleConnectionEvent(ref QUIC_CONNECTION_EVENT connectionEvent)
@@ -671,7 +676,6 @@ public async ValueTask DisposeAsync()
         Debug.Assert(_connectedTcs.IsCompleted);
         _handle.Dispose();
         _shutdownTokenSource.Dispose();
-
         _configuration?.Dispose();
 
         // Dispose remote certificate only if it hasn't been accessed via getter, in which case the accessing code becomes the owner of the certificate lifetime.
diff --git a/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicListener.cs b/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicListener.cs
index 6f0a0d8bb5b7..88ea309054a7 100644
--- a/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicListener.cs
+++ b/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicListener.cs
@@ -209,6 +209,11 @@ public async ValueTask<QuicConnection> AcceptConnectionAsync(CancellationToken c
     /// <param name="clientHello">The TLS ClientHello data.</param>
     private async void StartConnectionHandshake(QuicConnection connection, SslClientHelloInfo clientHello)
     {
+        // Yield to the threadpool immediately. This makes sure the connection options callback
+        // provided by the user is not invoked from the MsQuic thread and cannot delay acks
+        // or other operations on other connections.
+        await Task.CompletedTask.ConfigureAwait(ConfigureAwaitOptions.ForceYielding);
+
         bool wrapException = false;
         CancellationToken cancellationToken = default;
 
diff --git a/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicStream.Stream.cs b/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicStream.Stream.cs
index dd6fa5a86932..8196c59a1c1b 100644
--- a/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicStream.Stream.cs
+++ b/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicStream.Stream.cs
@@ -107,7 +107,7 @@ public override int Read(byte[] buffer, int offset, int count)
     public override int ReadByte()
     {
         byte b = 0;
-        return Read(MemoryMarshal.CreateSpan(ref b, 1)) != 0 ? b : -1;
+        return Read(new Span<byte>(ref b)) != 0 ? b : -1;
     }
 
     /// <inheritdoc />
diff --git a/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicStream.cs b/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicStream.cs
index 2e8f6a50e7e6..06515f3310bf 100644
--- a/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicStream.cs
+++ b/src/libraries/System.Net.Quic/src/System/Net/Quic/QuicStream.cs
@@ -170,6 +170,7 @@ internal unsafe QuicStream(MsQuicContextSafeHandle connectionHandle, QuicStreamT
                 &handle),
                 "StreamOpen failed");
             _handle = new MsQuicContextSafeHandle(handle, context, SafeHandleType.Stream, connectionHandle);
+            _handle.Disposable = _sendBuffers;
         }
         catch
         {
@@ -201,6 +202,7 @@ internal unsafe QuicStream(MsQuicContextSafeHandle connectionHandle, QUIC_HANDLE
         try
         {
             _handle = new MsQuicContextSafeHandle(handle, context, SafeHandleType.Stream, connectionHandle);
+            _handle.Disposable = _sendBuffers;
             delegate* unmanaged[Cdecl]<QUIC_HANDLE*, void*, QUIC_STREAM_EVENT*, int> nativeCallback = &NativeCallback;
             MsQuicApi.Api.SetCallbackHandler(
                 _handle,
@@ -715,9 +717,6 @@ public override async ValueTask DisposeAsync()
         Debug.Assert(_startedTcs.IsCompleted);
         _handle.Dispose();
 
-        // TODO: memory leak if not disposed
-        _sendBuffers.Dispose();
-
         unsafe void StreamShutdown(QUIC_STREAM_SHUTDOWN_FLAGS flags, long errorCode)
         {
             int status = MsQuicApi.Api.StreamShutdown(
diff --git a/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicCipherSuitesPolicyTests.cs b/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicCipherSuitesPolicyTests.cs
index 459b9bce810d..8b15670ee88a 100644
--- a/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicCipherSuitesPolicyTests.cs
+++ b/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicCipherSuitesPolicyTests.cs
@@ -8,9 +8,10 @@
 
 namespace System.Net.Quic.Tests
 {
-    [Collection(nameof(DisableParallelization))]
+    [Collection(nameof(QuicTestCollection))]
     [ConditionalClass(typeof(QuicTestBase), nameof(QuicTestBase.IsSupported), nameof(QuicTestBase.IsNotArm32CoreClrStressTest))]
     [SkipOnPlatform(TestPlatforms.Windows, "CipherSuitesPolicy is not supported on Windows")]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/91757", typeof(PlatformDetection), nameof(PlatformDetection.IsArmProcess))]
     public class MsQuicCipherSuitesPolicyTests : QuicTestBase
     {
         public MsQuicCipherSuitesPolicyTests(ITestOutputHelper output) : base(output) { }
@@ -77,4 +78,4 @@ await Assert.ThrowsAsync<QuicException>(() => TestConnection(
             ));
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicPlatformDetectionTests.cs b/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicPlatformDetectionTests.cs
index 16f267fc7195..7c2511bbb6d1 100644
--- a/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicPlatformDetectionTests.cs
+++ b/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicPlatformDetectionTests.cs
@@ -8,6 +8,7 @@
 
 namespace System.Net.Quic.Tests
 {
+    [Collection(nameof(QuicTestCollection))]
     public class MsQuicPlatformDetectionTests : QuicTestBase
     {
         public MsQuicPlatformDetectionTests(ITestOutputHelper output) : base(output) { }
@@ -59,6 +60,7 @@ public async Task SupportedLinuxPlatformsWithMsQuic_IsSupportedIsTrue()
         [ActiveIssue("https://github.com/dotnet/runtime/issues/82154", typeof(PlatformDetection), nameof(PlatformDetection.IsRaspbian10), nameof(PlatformDetection.IsArmv6Process), nameof(PlatformDetection.IsInContainer))]
         [ActiveIssue("https://github.com/dotnet/runtime/issues/82154", typeof(PlatformDetection), nameof(PlatformDetection.IsPpc64leProcess))]
         [ActiveIssue("https://github.com/dotnet/runtime/issues/82154", typeof(PlatformDetection), nameof(PlatformDetection.IsUbuntu2004), nameof(PlatformDetection.IsS390xProcess))]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/91757", typeof(PlatformDetection), nameof(PlatformDetection.IsAlpine), nameof(PlatformDetection.IsArmProcess))]
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsInHelix))]
         [PlatformSpecific(TestPlatforms.Linux)]
         public void SupportedLinuxPlatforms_IsSupportedIsTrue()
diff --git a/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicRemoteExecutorTests.cs b/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicRemoteExecutorTests.cs
index 051ead9b3bb2..f57f4aef8ec6 100644
--- a/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicRemoteExecutorTests.cs
+++ b/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicRemoteExecutorTests.cs
@@ -12,8 +12,9 @@
 
 namespace System.Net.Quic.Tests
 {
-    [Collection(nameof(DisableParallelization))]
+    [Collection(nameof(QuicTestCollection))]
     [ConditionalClass(typeof(QuicTestBase), nameof(QuicTestBase.IsSupported), nameof(QuicTestBase.IsNotArm32CoreClrStressTest))]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/91757", typeof(PlatformDetection), nameof(PlatformDetection.IsArmProcess))]
     public class MsQuicRemoteExecutorTests : QuicTestBase
     {
         public MsQuicRemoteExecutorTests()
diff --git a/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicTests.cs b/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicTests.cs
index 4a11909d30bf..b2042adffe33 100644
--- a/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicTests.cs
+++ b/src/libraries/System.Net.Quic/tests/FunctionalTests/MsQuicTests.cs
@@ -46,8 +46,9 @@ public void Dispose()
         }
     }
 
-    [Collection(nameof(DisableParallelization))]
+    [Collection(nameof(QuicTestCollection))]
     [ConditionalClass(typeof(QuicTestBase), nameof(QuicTestBase.IsSupported), nameof(QuicTestBase.IsNotArm32CoreClrStressTest))]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/91757", typeof(PlatformDetection), nameof(PlatformDetection.IsArmProcess))]
     public class MsQuicTests : QuicTestBase, IClassFixture<CertificateSetup>
     {
         private static byte[] s_data = "Hello world!"u8.ToArray();
@@ -356,7 +357,10 @@ public async Task UntrustedClientCertificateFails()
             }
         }
 
+        static bool SupportsAsyncCertValidation => QuicTestCollection.MsQuicVersion >= new Version(2, 4);
+
         [Fact]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/99074", typeof(MsQuicTests), nameof(SupportsAsyncCertValidation))]
         public async Task CertificateCallbackThrowPropagates()
         {
             using CancellationTokenSource cts = new CancellationTokenSource(PassingTestTimeout);
@@ -1200,61 +1204,6 @@ public BufferSegment Append(ReadOnlyMemory<byte> memory)
             }
         }
 
-        [Fact]
-        [OuterLoop("May take several seconds")]
-        [ActiveIssue("https://github.com/dotnet/runtime/issues/85331", typeof(PlatformDetection), nameof(PlatformDetection.IsWindows10Version20348OrLower))]
-        public async Task ByteMixingOrNativeAVE_MinimalFailingTest()
-        {
-            const int writeSize = 64 * 1024;
-            const int NumberOfWrites = 512;
-            byte[] data1 = new byte[writeSize * NumberOfWrites];
-            byte[] data2 = new byte[writeSize * NumberOfWrites];
-            Array.Fill(data1, (byte)1);
-            Array.Fill(data2, (byte)2);
-
-            Task t1 = RunTest(data1);
-            Task t2 = RunTest(data2);
-
-            async Task RunTest(byte[] data)
-            {
-                await RunClientServer(
-                    iterations: 20,
-                    serverFunction: async connection =>
-                    {
-                        await using QuicStream stream = await connection.AcceptInboundStreamAsync();
-
-                        byte[] buffer = new byte[data.Length];
-                        int bytesRead = await ReadAll(stream, buffer);
-                        Assert.Equal(data.Length, bytesRead);
-                        AssertExtensions.SequenceEqual(data, buffer);
-
-                        for (int pos = 0; pos < data.Length; pos += writeSize)
-                        {
-                            await stream.WriteAsync(data[pos..(pos + writeSize)]);
-                        }
-                        await stream.WriteAsync(Memory<byte>.Empty, completeWrites: true);
-                    },
-                    clientFunction: async connection =>
-                    {
-                        await using QuicStream stream = await connection.OpenOutboundStreamAsync(QuicStreamType.Bidirectional);
-
-                        for (int pos = 0; pos < data.Length; pos += writeSize)
-                        {
-                            await stream.WriteAsync(data[pos..(pos + writeSize)]);
-                        }
-                        await stream.WriteAsync(Memory<byte>.Empty, completeWrites: true);
-
-                        byte[] buffer = new byte[data.Length];
-                        int bytesRead = await ReadAll(stream, buffer);
-                        Assert.Equal(data.Length, bytesRead);
-                        AssertExtensions.SequenceEqual(data, buffer);
-                    }
-                );
-            }
-
-            await (new[] { t1, t2 }).WhenAllOrAnyFailed(millisecondsTimeout: 1000000);
-        }
-
         [Fact]
         public async Task ManagedAVE_MinimalFailingTest()
         {
diff --git a/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicConnectionTests.cs b/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicConnectionTests.cs
index 98d72124f004..5125b33bec95 100644
--- a/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicConnectionTests.cs
+++ b/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicConnectionTests.cs
@@ -6,6 +6,7 @@
 using System.Security.Cryptography.X509Certificates;
 using System.Threading;
 using System.Threading.Tasks;
+using Microsoft.DotNet.XUnitExtensions;
 using Xunit;
 using Xunit.Abstractions;
 
@@ -13,8 +14,9 @@ namespace System.Net.Quic.Tests
 {
     using Configuration = System.Net.Test.Common.Configuration;
 
-    [Collection(nameof(DisableParallelization))]
+    [Collection(nameof(QuicTestCollection))]
     [ConditionalClass(typeof(QuicTestBase), nameof(QuicTestBase.IsSupported), nameof(QuicTestBase.IsNotArm32CoreClrStressTest))]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/91757", typeof(PlatformDetection), nameof(PlatformDetection.IsArmProcess))]
     public sealed class QuicConnectionTests : QuicTestBase
     {
         const int ExpectedErrorCode = 1234;
@@ -22,7 +24,7 @@ public sealed class QuicConnectionTests : QuicTestBase
 
         public QuicConnectionTests(ITestOutputHelper output) : base(output) { }
 
-        [Theory]
+        [ConditionalTheory]
         [MemberData(nameof(LocalAddresses))]
         public async Task TestConnect(IPAddress address)
         {
diff --git a/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicListenerTests.cs b/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicListenerTests.cs
index d9e27a9e394c..6e3971764d18 100644
--- a/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicListenerTests.cs
+++ b/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicListenerTests.cs
@@ -13,8 +13,9 @@
 
 namespace System.Net.Quic.Tests
 {
-    [Collection(nameof(DisableParallelization))]
+    [Collection(nameof(QuicTestCollection))]
     [ConditionalClass(typeof(QuicTestBase), nameof(QuicTestBase.IsSupported), nameof(QuicTestBase.IsNotArm32CoreClrStressTest))]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/91757", typeof(PlatformDetection), nameof(PlatformDetection.IsArmProcess))]
     public sealed class QuicListenerTests : QuicTestBase
     {
         public QuicListenerTests(ITestOutputHelper output) : base(output) { }
diff --git a/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicStreamConnectedStreamConformanceTests.cs b/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicStreamConnectedStreamConformanceTests.cs
index e224bf75c553..bb7285ff22d7 100644
--- a/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicStreamConnectedStreamConformanceTests.cs
+++ b/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicStreamConnectedStreamConformanceTests.cs
@@ -14,8 +14,9 @@
 
 namespace System.Net.Quic.Tests
 {
-    [Collection(nameof(DisableParallelization))]
+    [Collection(nameof(QuicTestCollection))]
     [ConditionalClass(typeof(QuicTestBase), nameof(QuicTestBase.IsSupported), nameof(QuicTestBase.IsNotArm32CoreClrStressTest))]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/91757", typeof(PlatformDetection), nameof(PlatformDetection.IsArmProcess))]
     public sealed class QuicStreamConformanceTests : ConnectedStreamConformanceTests
     {
         protected override bool UsableAfterCanceledReads => false;
diff --git a/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicStreamTests.cs b/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicStreamTests.cs
index 72d0995823ed..5bf718df7308 100644
--- a/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicStreamTests.cs
+++ b/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicStreamTests.cs
@@ -12,8 +12,9 @@
 
 namespace System.Net.Quic.Tests
 {
-    [Collection(nameof(DisableParallelization))]
+    [Collection(nameof(QuicTestCollection))]
     [ConditionalClass(typeof(QuicTestBase), nameof(QuicTestBase.IsSupported), nameof(QuicTestBase.IsNotArm32CoreClrStressTest))]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/91757", typeof(PlatformDetection), nameof(PlatformDetection.IsArmProcess))]
     public sealed class QuicStreamTests : QuicTestBase
     {
         private static byte[] s_data = "Hello world!"u8.ToArray();
@@ -1211,16 +1212,16 @@ async ValueTask ReleaseOnReadsClosedAsync()
 
         private const int SmallestPayload = 1;
         private const int SmallPayload = 1024;
-        private const int BufferPayload = 64*1024;
-        private const int BufferPlusPayload = 64*1024+1;
-        private const int BigPayload = 1024*1024*1024;
+        private const int BufferPayload = 64 * 1024;
+        private const int BufferPlusPayload = 64 * 1024 + 1;
+        private const int BigPayload = 1024 * 1024 * 1024;
 
         public static IEnumerable<object[]> PayloadSizeAndTwoBools()
         {
-            var boolValues = new [] { true, false };
+            var boolValues = new[] { true, false };
             var payloadValues = !PlatformDetection.IsInHelix ?
-                                    new [] { SmallestPayload, SmallPayload, BufferPayload, BufferPlusPayload, BigPayload } :
-                                    new [] { SmallestPayload, SmallPayload, BufferPayload, BufferPlusPayload };
+                                    new[] { SmallestPayload, SmallPayload, BufferPayload, BufferPlusPayload, BigPayload } :
+                                    new[] { SmallestPayload, SmallPayload, BufferPayload, BufferPlusPayload };
             return
                 from payload in payloadValues
                 from bool1 in boolValues
@@ -1248,6 +1249,9 @@ await RunClientServer(
                     {
                         await stream.WritesClosed;
                     }
+
+                    var _ = await stream.ReadAsync(new byte[0]);
+
                     serverSem.Release();
                     await clientSem.WaitAsync();
 
@@ -1278,6 +1282,9 @@ await RunClientServer(
                     {
                         await stream.WritesClosed;
                     }
+
+                    var _ = await stream.ReadAsync(new byte[0]);
+
                     clientSem.Release();
                     await serverSem.WaitAsync();
 
diff --git a/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicTestBase.cs b/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicTestBase.cs
index 79992aef5f16..c3e0e4e7372a 100644
--- a/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicTestBase.cs
+++ b/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicTestBase.cs
@@ -18,6 +18,8 @@
 
 namespace System.Net.Quic.Tests
 {
+    using Configuration = System.Net.Test.Common.Configuration;
+
     public abstract class QuicTestBase : IDisposable
     {
         public const long DefaultStreamErrorCodeClient = 123456;
@@ -31,8 +33,7 @@ public abstract class QuicTestBase : IDisposable
         public static bool IsSupported => QuicListener.IsSupported && QuicConnection.IsSupported;
         public static bool IsNotArm32CoreClrStressTest => !(CoreClrConfigurationDetection.IsStressTest && PlatformDetection.IsArmProcess);
 
-        private static readonly Lazy<bool> _isIPv6Available = new Lazy<bool>(GetIsIPv6Available);
-        public static bool IsIPv6Available => _isIPv6Available.Value;
+        public static bool IsIPv6Available => Configuration.Sockets.IsIPv6LoopbackAvailable;
 
         public static SslApplicationProtocol ApplicationProtocol { get; } = new SslApplicationProtocol("quictest");
 
@@ -43,29 +44,7 @@ public abstract class QuicTestBase : IDisposable
         public const int PassingTestTimeoutMilliseconds = 4 * 60 * 1000;
         public static TimeSpan PassingTestTimeout => TimeSpan.FromMilliseconds(PassingTestTimeoutMilliseconds);
 
-        static unsafe QuicTestBase()
-        {
-            // If any of the reflection bellow breaks due to changes in "System.Net.Quic.MsQuicApi", also check and fix HttpStress project as it uses the same hack.
-            Type msQuicApiType = Type.GetType("System.Net.Quic.MsQuicApi, System.Net.Quic");
-
-            string msQuicLibraryVersion = (string)msQuicApiType.GetProperty("MsQuicLibraryVersion", BindingFlags.NonPublic | BindingFlags.Static).GetGetMethod(true).Invoke(null, Array.Empty<object?>());
-            Console.WriteLine($"MsQuic {(IsSupported ? "supported" : "not supported")} and using '{msQuicLibraryVersion}'.");
-
-            if (IsSupported)
-            {
-                object msQuicApiInstance = msQuicApiType.GetProperty("Api", BindingFlags.NonPublic | BindingFlags.Static).GetGetMethod(true).Invoke(null, Array.Empty<object?>());
-                QUIC_API_TABLE* apiTable = (QUIC_API_TABLE*)(Pointer.Unbox(msQuicApiType.GetProperty("ApiTable").GetGetMethod().Invoke(msQuicApiInstance, Array.Empty<object?>())));
-                QUIC_SETTINGS settings = default(QUIC_SETTINGS);
-                settings.IsSet.MaxWorkerQueueDelayUs = 1;
-                settings.MaxWorkerQueueDelayUs = 2_500_000u; // 2.5s, 10x the default
-                if (MsQuic.StatusFailed(apiTable->SetParam(null, MsQuic.QUIC_PARAM_GLOBAL_SETTINGS, (uint)sizeof(QUIC_SETTINGS), (byte*)&settings)))
-                {
-                    Console.WriteLine($"Unable to set MsQuic MaxWorkerQueueDelayUs.");
-                }
-            }
-        }
-
-        public unsafe QuicTestBase(ITestOutputHelper output)
+        public QuicTestBase(ITestOutputHelper output)
         {
             _output = output;
         }
@@ -397,19 +376,5 @@ internal static async Task<int> WriteForever(QuicStream stream, int size = 1)
                 ArrayPool<byte>.Shared.Return(buffer);
             }
         }
-
-        internal static bool GetIsIPv6Available()
-        {
-            try
-            {
-                using Socket s = new Socket(AddressFamily.InterNetworkV6, SocketType.Dgram, ProtocolType.Udp);
-                s.Bind(new IPEndPoint(IPAddress.IPv6Loopback, 0));
-                return true;
-            }
-            catch (SocketException)
-            {
-                return false;
-            }
-        }
     }
 }
diff --git a/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicTestCollection.cs b/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicTestCollection.cs
new file mode 100644
index 000000000000..f8dd160acb00
--- /dev/null
+++ b/src/libraries/System.Net.Quic/tests/FunctionalTests/QuicTestCollection.cs
@@ -0,0 +1,107 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Microsoft.DotNet.XUnitExtensions;
+using System;
+using System.Net.Quic;
+using System.Reflection;
+using System.Text;
+using System.Linq;
+using Xunit;
+using Xunit.Abstractions;
+
+using Microsoft.Quic;
+using static Microsoft.Quic.MsQuic;
+
+namespace System.Net.Quic.Tests;
+
+[CollectionDefinition(nameof(QuicTestCollection), DisableParallelization = true)]
+public unsafe class QuicTestCollection : ICollectionFixture<QuicTestCollection>, IDisposable
+{
+    public static bool IsSupported => QuicListener.IsSupported && QuicConnection.IsSupported;
+
+    public static Version MsQuicVersion { get; } = GetMsQuicVersion();
+
+    public QuicTestCollection()
+    {
+        string msQuicLibraryVersion = GetMsQuicLibraryVersion();
+        // If any of the reflection bellow breaks due to changes in "System.Net.Quic.MsQuicApi", also check and fix HttpStress project as it uses the same hack.
+        Console.WriteLine($"MsQuic {(IsSupported ? "supported" : "not supported")} and using '{msQuicLibraryVersion}'.");
+
+        if (IsSupported)
+        {
+            QUIC_SETTINGS settings = default(QUIC_SETTINGS);
+            settings.IsSet.MaxWorkerQueueDelayUs = 1;
+            settings.MaxWorkerQueueDelayUs = 2_500_000u; // 2.5s, 10x the default
+            if (MsQuic.StatusFailed(GetApiTable()->SetParam(null, MsQuic.QUIC_PARAM_GLOBAL_SETTINGS, (uint)sizeof(QUIC_SETTINGS), (byte*)&settings)))
+            {
+                Console.WriteLine($"Unable to set MsQuic MaxWorkerQueueDelayUs.");
+            }
+        }
+    }
+
+    public unsafe void Dispose()
+    {
+        if (!IsSupported)
+        {
+            return;
+        }
+
+        long[] counters = new long[(int)QUIC_PERFORMANCE_COUNTERS.MAX];
+        int countersAvailable;
+
+        int status;
+        fixed (long* pCounters = counters)
+        {
+            uint size = (uint)counters.Length * sizeof(long);
+            status = GetApiTable()->GetParam(null, QUIC_PARAM_GLOBAL_PERF_COUNTERS, &size, (byte*)pCounters);
+            countersAvailable = (int)size / sizeof(long);
+        }
+
+        if (StatusFailed(status))
+        {
+            System.Console.WriteLine($"Failed to read MsQuic counters: {status}");
+            return;
+        }
+
+        StringBuilder sb = new StringBuilder();
+        sb.AppendLine("MsQuic Counters:");
+
+        int maxlen = Enum.GetNames(typeof(QUIC_PERFORMANCE_COUNTERS)).Max(s => s.Length);
+        void DumpCounter(QUIC_PERFORMANCE_COUNTERS counter)
+        {
+            var name = $"{counter}:".PadRight(maxlen + 1);
+            var index = (int)counter;
+            var value = index < countersAvailable ? counters[(int)counter].ToString() : "N/A";
+            sb.AppendLine($"    {counter} {value}");
+        }
+
+        DumpCounter(QUIC_PERFORMANCE_COUNTERS.CONN_CREATED);
+        DumpCounter(QUIC_PERFORMANCE_COUNTERS.CONN_HANDSHAKE_FAIL);
+        DumpCounter(QUIC_PERFORMANCE_COUNTERS.CONN_APP_REJECT);
+        DumpCounter(QUIC_PERFORMANCE_COUNTERS.CONN_LOAD_REJECT);
+
+        System.Console.WriteLine(sb.ToString());
+    }
+
+    private static Version GetMsQuicVersion()
+    {
+        Type msQuicApiType = Type.GetType("System.Net.Quic.MsQuicApi, System.Net.Quic");
+
+        return (Version)msQuicApiType.GetProperty("Version", BindingFlags.NonPublic | BindingFlags.Static).GetGetMethod(true).Invoke(null, Array.Empty<object?>());
+    }
+
+    private static string? GetMsQuicLibraryVersion()
+    {
+        Type msQuicApiType = Type.GetType("System.Net.Quic.MsQuicApi, System.Net.Quic");
+
+        return (string)msQuicApiType.GetProperty("MsQuicLibraryVersion", BindingFlags.NonPublic | BindingFlags.Static).GetGetMethod(true).Invoke(null, Array.Empty<object?>());
+    }
+
+    private static QUIC_API_TABLE* GetApiTable()
+    {
+        Type msQuicApiType = Type.GetType("System.Net.Quic.MsQuicApi, System.Net.Quic");
+        object msQuicApiInstance = msQuicApiType.GetProperty("Api", BindingFlags.NonPublic | BindingFlags.Static).GetGetMethod(true).Invoke(null, Array.Empty<object?>());
+        return (QUIC_API_TABLE*)(Pointer.Unbox(msQuicApiType.GetProperty("ApiTable").GetGetMethod().Invoke(msQuicApiInstance, Array.Empty<object?>())));
+    }
+}
diff --git a/src/libraries/System.Net.Requests/src/Resources/Strings.resx b/src/libraries/System.Net.Requests/src/Resources/Strings.resx
index b33f2a024403..8157909f3489 100644
--- a/src/libraries/System.Net.Requests/src/Resources/Strings.resx
+++ b/src/libraries/System.Net.Requests/src/Resources/Strings.resx
@@ -261,7 +261,7 @@
   <data name="CacheEntryNotFound" xml:space="preserve">
     <value>The request was aborted: The request cache-only policy does not allow a network request and the response is not found in cache.</value>
   </data>
-  <data name="net_proxyschemenotsupported" xml:space="preserve">
-    <value>The ServicePointManager does not support proxies with the {0} scheme.</value>
+  <data name="net_maximumbindretries" xml:space="preserve">
+    <value>Reached the maximum number of BindIPEndPointDelegate retries.</value>
   </data>
 </root>
diff --git a/src/libraries/System.Net.Requests/src/System.Net.Requests.csproj b/src/libraries/System.Net.Requests/src/System.Net.Requests.csproj
index 397622b4806a..46bda299d9a6 100644
--- a/src/libraries/System.Net.Requests/src/System.Net.Requests.csproj
+++ b/src/libraries/System.Net.Requests/src/System.Net.Requests.csproj
@@ -29,6 +29,7 @@
     <Compile Include="System\Net\IWebRequestCreate.cs" />
     <Compile Include="System\Net\ProtocolViolationException.cs" />
     <Compile Include="System\Net\RequestStream.cs" />
+    <Compile Include="System\Net\RequestBufferingStream.cs" />
     <Compile Include="System\Net\TaskExtensions.cs" />
     <Compile Include="System\Net\WebException.cs" />
     <Compile Include="System\Net\WebExceptionStatus.cs" />
@@ -48,6 +49,7 @@
     <Compile Include="System\Net\NetRes.cs" />
     <Compile Include="System\Net\NetworkStreamWrapper.cs" />
     <Compile Include="System\Net\TimerThread.cs" />
+    <Compile Include="System\Net\RequestStreamContent.cs" />
     <Compile Include="System\Net\Cache\HttpCacheAgeControl.cs" />
     <Compile Include="System\Net\Cache\HttpRequestCacheLevel.cs" />
     <Compile Include="System\Net\Cache\HttpRequestCachePolicy.cs" />
@@ -110,6 +112,7 @@
     <Reference Include="System.Diagnostics.Tracing" />
     <Reference Include="System.Memory" />
     <Reference Include="System.Net.Http" />
+    <Reference Include="System.Net.NameResolution" />
     <Reference Include="System.Net.Primitives" />
     <Reference Include="System.Net.Security" />
     <Reference Include="System.Net.Sockets" />
diff --git a/src/libraries/System.Net.Requests/src/System/Net/FtpWebRequest.cs b/src/libraries/System.Net.Requests/src/System/Net/FtpWebRequest.cs
index 2f214e3d99f7..4f55ea7b9048 100644
--- a/src/libraries/System.Net.Requests/src/System/Net/FtpWebRequest.cs
+++ b/src/libraries/System.Net.Requests/src/System/Net/FtpWebRequest.cs
@@ -495,17 +495,17 @@ internal FtpWebRequest(Uri uri)
             NetworkCredential? networkCredential = null;
             _uri = uri;
             _methodInfo = FtpMethodInfo.GetMethodInfo(WebRequestMethods.Ftp.DownloadFile);
-            if (!string.IsNullOrEmpty(_uri.UserInfo))
+
+            if (_uri.UserInfo is { Length: > 0 } userInfo)
             {
-                string userInfo = _uri.UserInfo;
                 string username = userInfo;
                 string password = "";
                 int index = userInfo.IndexOf(':');
                 if (index != -1)
                 {
-                    username = Uri.UnescapeDataString(userInfo.Substring(0, index));
+                    username = Uri.UnescapeDataString(userInfo.AsSpan(0, index));
                     index++; // skip ':'
-                    password = Uri.UnescapeDataString(userInfo.Substring(index));
+                    password = Uri.UnescapeDataString(userInfo.AsSpan(index));
                 }
                 networkCredential = new NetworkCredential(username, password);
             }
diff --git a/src/libraries/System.Net.Requests/src/System/Net/HttpWebRequest.cs b/src/libraries/System.Net.Requests/src/System/Net/HttpWebRequest.cs
index 2a54bbb4d8d5..8f37002cf206 100644
--- a/src/libraries/System.Net.Requests/src/System/Net/HttpWebRequest.cs
+++ b/src/libraries/System.Net.Requests/src/System/Net/HttpWebRequest.cs
@@ -1,16 +1,19 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Collections.Generic;
 using System.ComponentModel;
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Globalization;
 using System.IO;
+using System.Net;
 using System.Net.Cache;
 using System.Net.Http;
 using System.Net.Http.Headers;
 using System.Net.Security;
 using System.Net.Sockets;
+using System.Runtime.CompilerServices;
 using System.Runtime.Serialization;
 using System.Security.Authentication;
 using System.Security.Cryptography.X509Certificates;
@@ -39,8 +42,10 @@ public class HttpWebRequest : WebRequest, ISerializable
         private IWebProxy? _proxy = WebRequest.DefaultWebProxy;
 
         private Task<HttpResponseMessage>? _sendRequestTask;
+        private HttpRequestMessage? _sendRequestMessage;
 
         private static int _defaultMaxResponseHeadersLength = HttpHandlerDefaults.DefaultMaxResponseHeadersLength;
+        private static int _defaultMaximumErrorResponseLength = -1;
 
         private int _beginGetRequestStreamCalled;
         private int _beginGetResponseCalled;
@@ -60,7 +65,7 @@ public class HttpWebRequest : WebRequest, ISerializable
         private bool _hostHasPort;
         private Uri? _hostUri;
 
-        private RequestStream? _requestStream;
+        private Stream? _requestStream;
         private TaskCompletionSource<Stream>? _requestStreamOperation;
         private TaskCompletionSource<WebResponse>? _responseOperation;
         private AsyncCallback? _requestStreamCallback;
@@ -76,6 +81,8 @@ public class HttpWebRequest : WebRequest, ISerializable
         private static readonly object s_syncRoot = new object();
         private static volatile HttpClient? s_cachedHttpClient;
         private static HttpClientParameters? s_cachedHttpClientParameters;
+        private bool _disposeRequired;
+        private HttpClient? _httpClient;
 
         //these should be safe.
         [Flags]
@@ -420,11 +427,7 @@ public string? Referer
         /// <devdoc>
         ///    <para>Sets the media type header</para>
         /// </devdoc>
-        public string? MediaType
-        {
-            get;
-            set;
-        }
+        public string? MediaType { get; set; }
 
         /// <devdoc>
         ///    <para>
@@ -677,14 +680,22 @@ public static int DefaultMaximumResponseHeadersLength
             }
             set
             {
+                ArgumentOutOfRangeException.ThrowIfLessThan(value, 0);
                 _defaultMaxResponseHeadersLength = value;
             }
         }
 
-        // NOP
         public static int DefaultMaximumErrorResponseLength
         {
-            get; set;
+            get
+            {
+                return _defaultMaximumErrorResponseLength;
+            }
+            set
+            {
+                ArgumentOutOfRangeException.ThrowIfLessThan(value, -1);
+                _defaultMaximumErrorResponseLength = value;
+            }
         }
 
         private static RequestCachePolicy? _defaultCachePolicy = new RequestCachePolicy(RequestCacheLevel.BypassCache);
@@ -806,10 +817,12 @@ public Version ProtocolVersion
                 if (value.Equals(HttpVersion.Version11))
                 {
                     IsVersionHttp10 = false;
+                    ServicePoint.ProtocolVersion = HttpVersion.Version11;
                 }
                 else if (value.Equals(HttpVersion.Version10))
                 {
                     IsVersionHttp10 = true;
+                    ServicePoint.ProtocolVersion = HttpVersion.Version10;
                 }
                 else
                 {
@@ -995,17 +1008,17 @@ public override void Abort()
                 {
                     _responseCallback(_responseOperation.Task);
                 }
-
-                // Cancel the underlying send operation.
-                Debug.Assert(_sendRequestCts != null);
-                _sendRequestCts.Cancel();
             }
-            else if (_requestStreamOperation != null)
+            if (_requestStreamOperation != null)
             {
                 if (_requestStreamOperation.TrySetCanceled() && _requestStreamCallback != null)
                 {
                     _requestStreamCallback(_requestStreamOperation.Task);
                 }
+
+                // Cancel the underlying send operation.
+                Debug.Assert(_sendRequestCts != null);
+                _sendRequestCts.Cancel();
             }
         }
 
@@ -1033,8 +1046,7 @@ public override WebResponse GetResponse()
         {
             try
             {
-                _sendRequestCts = new CancellationTokenSource();
-                return SendRequest(async: false).GetAwaiter().GetResult();
+                return HandleResponse(async: false).GetAwaiter().GetResult();
             }
             catch (Exception ex)
             {
@@ -1044,10 +1056,11 @@ public override WebResponse GetResponse()
 
         public override Stream GetRequestStream()
         {
+            CheckRequestStream();
             return InternalGetRequestStream().Result;
         }
 
-        private Task<Stream> InternalGetRequestStream()
+        private void CheckRequestStream()
         {
             CheckAbort();
 
@@ -1065,10 +1078,28 @@ private Task<Stream> InternalGetRequestStream()
             {
                 throw new InvalidOperationException(SR.net_reqsubmitted);
             }
+        }
 
-            _requestStream = new RequestStream();
+        private async Task<Stream> InternalGetRequestStream()
+        {
+            // If we aren't buffering we need to open the connection right away.
+            // Because we need to send the data as soon as possible when it's available from the RequestStream.
+            // Making this allows us to keep the sync send request path for buffering cases.
+            if (AllowWriteStreamBuffering is false)
+            {
+                // We're calling SendRequest with async, because we need to open the connection and send the request
+                // Otherwise, sync path will block the current thread until the request is sent.
+                TaskCompletionSource<Stream> getStreamTcs = new();
+                TaskCompletionSource completeTcs = new();
+                _sendRequestTask = SendRequest(async: true, new RequestStreamContent(getStreamTcs, completeTcs));
+                _requestStream = new RequestStream(await getStreamTcs.Task.ConfigureAwait(false), completeTcs);
+            }
+            else
+            {
+                _requestStream = new RequestBufferingStream();
+            }
 
-            return Task.FromResult((Stream)_requestStream);
+            return _requestStream;
         }
 
         public Stream EndGetRequestStream(IAsyncResult asyncResult, out TransportContext? context)
@@ -1092,6 +1123,8 @@ public override IAsyncResult BeginGetRequestStream(AsyncCallback? callback, obje
                 throw new InvalidOperationException(SR.net_repcall);
             }
 
+            CheckRequestStream();
+
             _requestStreamCallback = callback;
             _requestStreamOperation = InternalGetRequestStream().ToApm(callback, state);
 
@@ -1125,78 +1158,95 @@ public override Stream EndGetRequestStream(IAsyncResult asyncResult)
             return stream;
         }
 
-        private async Task<WebResponse> SendRequest(bool async)
+        private Task<HttpResponseMessage> SendRequest(bool async, HttpContent? content = null)
         {
             if (RequestSubmitted)
             {
                 throw new InvalidOperationException(SR.net_reqsubmitted);
             }
 
-            var request = new HttpRequestMessage(HttpMethod.Parse(_originVerb), _requestUri);
+            _sendRequestMessage = new HttpRequestMessage(HttpMethod.Parse(_originVerb), _requestUri);
+            _sendRequestCts = new CancellationTokenSource();
+            _httpClient = GetCachedOrCreateHttpClient(async, out _disposeRequired);
 
-            bool disposeRequired = false;
-            HttpClient? client = null;
-            try
+            if (content is not null)
             {
-                client = GetCachedOrCreateHttpClient(async, out disposeRequired);
-                if (_requestStream != null)
+                _sendRequestMessage.Content = content;
+            }
+
+            if (_hostUri is not null)
+            {
+                _sendRequestMessage.Headers.Host = Host;
+            }
+
+            AddCacheControlHeaders(_sendRequestMessage);
+
+            // Copy the HttpWebRequest request headers from the WebHeaderCollection into HttpRequestMessage.Headers and
+            // HttpRequestMessage.Content.Headers.
+            foreach (string headerName in _webHeaderCollection)
+            {
+                // The System.Net.Http APIs require HttpRequestMessage headers to be properly divided between the request headers
+                // collection and the request content headers collection for all well-known header names.  And custom headers
+                // are only allowed in the request headers collection and not in the request content headers collection.
+                if (IsWellKnownContentHeader(headerName))
                 {
-                    ArraySegment<byte> bytes = _requestStream.GetBuffer();
-                    request.Content = new ByteArrayContent(bytes.Array!, bytes.Offset, bytes.Count);
+                    _sendRequestMessage.Content ??= new ByteArrayContent(Array.Empty<byte>());
+                    _sendRequestMessage.Content.Headers.TryAddWithoutValidation(headerName, _webHeaderCollection[headerName!]);
                 }
-
-                if (_hostUri != null)
+                else
                 {
-                    request.Headers.Host = Host;
+                    _sendRequestMessage.Headers.TryAddWithoutValidation(headerName, _webHeaderCollection[headerName!]);
                 }
+            }
 
-                AddCacheControlHeaders(request);
+            if (_servicePoint?.Expect100Continue == true)
+            {
+                _sendRequestMessage.Headers.ExpectContinue = true;
+            }
 
-                // Copy the HttpWebRequest request headers from the WebHeaderCollection into HttpRequestMessage.Headers and
-                // HttpRequestMessage.Content.Headers.
-                foreach (string headerName in _webHeaderCollection)
-                {
-                    // The System.Net.Http APIs require HttpRequestMessage headers to be properly divided between the request headers
-                    // collection and the request content headers collection for all well-known header names.  And custom headers
-                    // are only allowed in the request headers collection and not in the request content headers collection.
-                    if (IsWellKnownContentHeader(headerName))
-                    {
-                        // Create empty content so that we can send the entity-body header.
-                        request.Content ??= new ByteArrayContent(Array.Empty<byte>());
+            _sendRequestMessage.Headers.TransferEncodingChunked = SendChunked;
 
-                        request.Content.Headers.TryAddWithoutValidation(headerName, _webHeaderCollection[headerName!]);
-                    }
-                    else
-                    {
-                        request.Headers.TryAddWithoutValidation(headerName, _webHeaderCollection[headerName!]);
-                    }
-                }
+            if (KeepAlive)
+            {
+                _sendRequestMessage.Headers.Connection.Add(HttpKnownHeaderNames.KeepAlive);
+            }
+            else
+            {
+                _sendRequestMessage.Headers.ConnectionClose = true;
+            }
 
-                request.Headers.TransferEncodingChunked = SendChunked;
+            _sendRequestMessage.Version = ProtocolVersion;
+            HttpCompletionOption completionOption = _allowReadStreamBuffering ? HttpCompletionOption.ResponseContentRead : HttpCompletionOption.ResponseHeadersRead;
+            // If we're not buffering, there is no way to open the connection and not send the request without async.
+            // So we should use Async, if we're not buffering.
+            _sendRequestTask = async || !AllowWriteStreamBuffering ?
+                _httpClient.SendAsync(_sendRequestMessage, completionOption, _sendRequestCts.Token) :
+                Task.FromResult(_httpClient.Send(_sendRequestMessage, completionOption, _sendRequestCts.Token));
 
-                if (KeepAlive)
-                {
-                    request.Headers.Connection.Add(HttpKnownHeaderNames.KeepAlive);
-                }
-                else
-                {
-                    request.Headers.ConnectionClose = true;
-                }
+            return _sendRequestTask!;
+        }
 
-                if (_servicePoint?.Expect100Continue == true)
-                {
-                    request.Headers.ExpectContinue = true;
-                }
+        private async Task<WebResponse> HandleResponse(bool async)
+        {
+            // If user code used requestStream and didn't dispose it
+            // We're completing it here.
+            if (_requestStream is RequestStream requestStream)
+            {
+                requestStream.Complete();
+            }
 
-                request.Version = ProtocolVersion;
+            if (_sendRequestTask is null && _requestStream is RequestBufferingStream requestBufferingStream)
+            {
+                ArraySegment<byte> buffer = requestBufferingStream.GetBuffer();
+                _sendRequestTask = SendRequest(async, new ByteArrayContent(buffer.Array!, buffer.Offset, buffer.Count));
+            }
 
-                _sendRequestTask = async ?
-                    client.SendAsync(request, _allowReadStreamBuffering ? HttpCompletionOption.ResponseContentRead : HttpCompletionOption.ResponseHeadersRead, _sendRequestCts!.Token) :
-                    Task.FromResult(client.Send(request, _allowReadStreamBuffering ? HttpCompletionOption.ResponseContentRead : HttpCompletionOption.ResponseHeadersRead, _sendRequestCts!.Token));
+            _sendRequestTask ??= SendRequest(async);
 
+            try
+            {
                 HttpResponseMessage responseMessage = await _sendRequestTask.ConfigureAwait(false);
-
-                HttpWebResponse response = new HttpWebResponse(responseMessage, _requestUri, _cookieContainer);
+                HttpWebResponse response = new(responseMessage, _requestUri, _cookieContainer);
 
                 int maxSuccessStatusCode = AllowAutoRedirect ? 299 : 399;
                 if ((int)response.StatusCode > maxSuccessStatusCode || (int)response.StatusCode < 200)
@@ -1212,9 +1262,15 @@ private async Task<WebResponse> SendRequest(bool async)
             }
             finally
             {
-                if (disposeRequired)
+                _sendRequestMessage?.Dispose();
+                if (_requestStream is RequestBufferingStream bufferStream)
+                {
+                    bufferStream.GetMemoryStream().Dispose();
+                }
+
+                if (_disposeRequired)
                 {
-                    client?.Dispose();
+                    _httpClient?.Dispose();
                 }
             }
         }
@@ -1340,9 +1396,8 @@ public override IAsyncResult BeginGetResponse(AsyncCallback? callback, object? s
                 throw new InvalidOperationException(SR.net_repcall);
             }
 
-            _sendRequestCts = new CancellationTokenSource();
             _responseCallback = callback;
-            _responseOperation = SendRequest(async: true).ToApm(callback, state);
+            _responseOperation = HandleResponse(async: true).ToApm(callback, state);
 
             return _responseOperation.Task;
         }
@@ -1621,6 +1676,13 @@ private static HttpClient CreateHttpClient(HttpClientParameters parameters, Http
                     handler.UseCookies = false;
                 }
 
+                if (parameters.ServicePoint is { } servicePoint)
+                {
+                    handler.MaxConnectionsPerServer = servicePoint.ConnectionLimit;
+                    handler.PooledConnectionIdleTimeout = TimeSpan.FromMilliseconds(servicePoint.MaxIdleTime);
+                    handler.PooledConnectionLifetime = TimeSpan.FromMilliseconds(servicePoint.ConnectionLeaseTimeout);
+                }
+
                 Debug.Assert(handler.UseProxy); // Default of handler.UseProxy is true.
                 Debug.Assert(handler.Proxy == null); // Default of handler.Proxy is null.
 
@@ -1638,7 +1700,7 @@ private static HttpClient CreateHttpClient(HttpClientParameters parameters, Http
                 {
                     handler.UseProxy = false;
                 }
-                else if (!object.ReferenceEquals(parameters.Proxy, WebRequest.GetSystemWebProxy()))
+                else if (!ReferenceEquals(parameters.Proxy, GetSystemWebProxy()))
                 {
                     handler.Proxy = parameters.Proxy;
                 }
@@ -1659,10 +1721,20 @@ private static HttpClient CreateHttpClient(HttpClientParameters parameters, Http
                 handler.SslOptions.EnabledSslProtocols = (SslProtocols)parameters.SslProtocols;
                 handler.SslOptions.CertificateRevocationCheckMode = parameters.CheckCertificateRevocationList ? X509RevocationMode.Online : X509RevocationMode.NoCheck;
                 RemoteCertificateValidationCallback? rcvc = parameters.ServerCertificateValidationCallback;
-                if (rcvc != null)
+                handler.SslOptions.RemoteCertificateValidationCallback = (message, cert, chain, errors) =>
                 {
-                    handler.SslOptions.RemoteCertificateValidationCallback = (message, cert, chain, errors) => rcvc(request!, cert, chain, errors);
-                }
+                    if (parameters.ServicePoint is { } servicePoint)
+                    {
+                        servicePoint.Certificate = cert;
+                    }
+
+                    if (rcvc is not null)
+                    {
+                        return rcvc(request!, cert, chain, errors);
+                    }
+
+                    return errors == SslPolicyErrors.None;
+                };
 
                 // Set up a ConnectCallback so that we can control Socket-specific settings, like ReadWriteTimeout => socket.Send/ReceiveTimeout.
                 handler.ConnectCallback = async (context, cancellationToken) =>
@@ -1671,6 +1743,10 @@ private static HttpClient CreateHttpClient(HttpClientParameters parameters, Http
 
                     try
                     {
+                        IPAddress[] addresses = parameters.Async ?
+                            await Dns.GetHostAddressesAsync(context.DnsEndPoint.Host, cancellationToken).ConfigureAwait(false) :
+                            Dns.GetHostAddresses(context.DnsEndPoint.Host);
+
                         if (parameters.ServicePoint is { } servicePoint)
                         {
                             if (servicePoint.ReceiveBufferSize != -1)
@@ -1684,19 +1760,58 @@ private static HttpClient CreateHttpClient(HttpClientParameters parameters, Http
                                 socket.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveTime, keepAlive.Time);
                                 socket.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveInterval, keepAlive.Interval);
                             }
+
+                            BindHelper(servicePoint, ref addresses, socket, context.DnsEndPoint.Port);
+                            static void BindHelper(ServicePoint servicePoint, ref IPAddress[] addresses, Socket socket, int port)
+                            {
+                                if (servicePoint.BindIPEndPointDelegate is null)
+                                {
+                                    return;
+                                }
+
+                                const int MaxRetries = 100;
+                                foreach (IPAddress address in addresses)
+                                {
+                                    int retryCount = 0;
+                                    for (; retryCount < MaxRetries; retryCount++)
+                                    {
+                                        IPEndPoint? endPoint = servicePoint.BindIPEndPointDelegate(servicePoint, new IPEndPoint(address, port), retryCount);
+                                        if (endPoint is null) // Get other address to try
+                                        {
+                                            break;
+                                        }
+
+                                        try
+                                        {
+                                            socket.Bind(endPoint);
+                                            addresses = [address];
+                                            return; // Bind successful, exit loops.
+                                        }
+                                        catch
+                                        {
+                                            continue;
+                                        }
+                                    }
+
+                                    if (retryCount >= MaxRetries)
+                                    {
+                                        throw new OverflowException(SR.net_maximumbindretries);
+                                    }
+                                }
+                            }
                         }
 
-                        socket.NoDelay = true;
+                        socket.NoDelay = !(parameters.ServicePoint?.UseNagleAlgorithm) ?? true;
 
                         if (parameters.Async)
                         {
-                            await socket.ConnectAsync(context.DnsEndPoint, cancellationToken).ConfigureAwait(false);
+                            await socket.ConnectAsync(addresses, context.DnsEndPoint.Port, cancellationToken).ConfigureAwait(false);
                         }
                         else
                         {
                             using (cancellationToken.UnsafeRegister(s => ((Socket)s!).Dispose(), socket))
                             {
-                                socket.Connect(context.DnsEndPoint);
+                                socket.Connect(addresses, context.DnsEndPoint.Port);
                             }
 
                             // Throw in case cancellation caused the socket to be disposed after the Connect completed
diff --git a/src/libraries/System.Net.Requests/src/System/Net/HttpWebResponse.cs b/src/libraries/System.Net.Requests/src/System/Net/HttpWebResponse.cs
index f7fae7869b1e..7b0e9b90681f 100644
--- a/src/libraries/System.Net.Requests/src/System/Net/HttpWebResponse.cs
+++ b/src/libraries/System.Net.Requests/src/System/Net/HttpWebResponse.cs
@@ -8,6 +8,8 @@
 using System.Net.Http;
 using System.Runtime.Serialization;
 using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
 
 namespace System.Net
 {
@@ -337,7 +339,14 @@ public override Stream GetResponseStream()
             CheckDisposed();
             if (_httpResponseMessage.Content != null)
             {
-                return _httpResponseMessage.Content.ReadAsStream();
+                Stream contentStream = _httpResponseMessage.Content.ReadAsStream();
+                int maxErrorResponseLength = HttpWebRequest.DefaultMaximumErrorResponseLength;
+                if (maxErrorResponseLength < 0 || StatusCode < HttpStatusCode.BadRequest)
+                {
+                    return contentStream;
+                }
+
+                return new TruncatedReadStream(contentStream, maxErrorResponseLength);
             }
 
             return Stream.Null;
@@ -371,5 +380,56 @@ private void CheckDisposed()
         }
 
         private static string GetHeaderValueAsString(IEnumerable<string> values) => string.Join(", ", values);
+
+        internal sealed class TruncatedReadStream(Stream innerStream, int maxSize) : Stream
+        {
+            public override bool CanRead => true;
+            public override bool CanSeek => false;
+            public override bool CanWrite => false;
+
+            public override long Length => throw new NotSupportedException();
+            public override long Position { get => throw new NotSupportedException(); set => throw new NotSupportedException(); }
+
+            public override void Flush() => throw new NotSupportedException();
+
+            public override int Read(byte[] buffer, int offset, int count)
+            {
+                return Read(new Span<byte>(buffer, offset, count));
+            }
+
+            public override int Read(Span<byte> buffer)
+            {
+                int readBytes = innerStream.Read(buffer.Slice(0, Math.Min(buffer.Length, maxSize)));
+                maxSize -= readBytes;
+                return readBytes;
+            }
+
+            public override Task<int> ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken)
+            {
+                return ReadAsync(new Memory<byte>(buffer, offset, count), cancellationToken).AsTask();
+            }
+
+            public override async ValueTask<int> ReadAsync(Memory<byte> buffer, CancellationToken cancellationToken = default)
+            {
+                int readBytes = await innerStream.ReadAsync(buffer.Slice(0, Math.Min(buffer.Length, maxSize)), cancellationToken)
+                    .ConfigureAwait(false);
+                maxSize -= readBytes;
+                return readBytes;
+            }
+
+            public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();
+            public override void SetLength(long value) => throw new NotSupportedException();
+            public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException();
+
+            public override ValueTask DisposeAsync() => innerStream.DisposeAsync();
+
+            protected override void Dispose(bool disposing)
+            {
+                if (disposing)
+                {
+                    innerStream.Dispose();
+                }
+            }
+        }
     }
 }
diff --git a/src/libraries/System.Net.Requests/src/System/Net/RequestBufferingStream.cs b/src/libraries/System.Net.Requests/src/System/Net/RequestBufferingStream.cs
new file mode 100644
index 000000000000..3a5bb170314e
--- /dev/null
+++ b/src/libraries/System.Net.Requests/src/System/Net/RequestBufferingStream.cs
@@ -0,0 +1,134 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.IO;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace System.Net
+{
+    // Cache the request stream into a MemoryStream.
+    internal sealed class RequestBufferingStream : Stream
+    {
+        private bool _disposed;
+        private readonly MemoryStream _buffer = new MemoryStream();
+
+        public RequestBufferingStream()
+        {
+        }
+
+        public override bool CanRead => false;
+        public override bool CanSeek => false;
+        public override bool CanWrite => true;
+
+        public override void Flush() => ThrowIfDisposed(); // Nothing to do.
+
+        public override Task FlushAsync(CancellationToken cancellationToken)
+        {
+            ThrowIfDisposed();
+            // Nothing to do.
+            return cancellationToken.IsCancellationRequested ?
+                Task.FromCanceled(cancellationToken) :
+                Task.CompletedTask;
+        }
+
+        public override long Length
+        {
+            get
+            {
+                throw new NotSupportedException();
+            }
+        }
+
+        public override long Position
+        {
+            get
+            {
+                throw new NotSupportedException();
+            }
+            set
+            {
+                throw new NotSupportedException();
+            }
+        }
+
+        public override int Read(byte[] buffer, int offset, int count)
+        {
+            throw new NotSupportedException();
+        }
+
+        public override long Seek(long offset, SeekOrigin origin)
+        {
+            throw new NotSupportedException();
+        }
+
+        public override void SetLength(long value)
+        {
+            throw new NotSupportedException();
+        }
+
+        public override void Write(byte[] buffer, int offset, int count)
+        {
+            ThrowIfDisposed();
+            ValidateBufferArguments(buffer, offset, count);
+            _buffer.Write(buffer, offset, count);
+        }
+
+        public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken)
+        {
+            ThrowIfDisposed();
+            ValidateBufferArguments(buffer, offset, count);
+            return _buffer.WriteAsync(buffer, offset, count, cancellationToken);
+        }
+
+        public override ValueTask WriteAsync(ReadOnlyMemory<byte> buffer, CancellationToken cancellationToken = default)
+        {
+            ThrowIfDisposed();
+            return _buffer.WriteAsync(buffer, cancellationToken);
+        }
+
+        public override IAsyncResult BeginWrite(byte[] buffer, int offset, int count, AsyncCallback? asyncCallback, object? asyncState)
+        {
+            ThrowIfDisposed();
+            ValidateBufferArguments(buffer, offset, count);
+            return _buffer.BeginWrite(buffer, offset, count, asyncCallback, asyncState);
+        }
+
+        public override void EndWrite(IAsyncResult asyncResult)
+        {
+            ThrowIfDisposed();
+            _buffer.EndWrite(asyncResult);
+        }
+
+        public ArraySegment<byte> GetBuffer()
+        {
+            ArraySegment<byte> bytes;
+
+            bool success = _buffer.TryGetBuffer(out bytes);
+            Debug.Assert(success); // Buffer should always be visible since default MemoryStream constructor was used.
+
+            return bytes;
+        }
+
+        // We need this to dispose the MemoryStream.
+        public MemoryStream GetMemoryStream()
+        {
+            return _buffer;
+        }
+
+        protected override void Dispose(bool disposing)
+        {
+            if (disposing && !_disposed)
+            {
+                _disposed = true;
+            }
+            base.Dispose(disposing);
+        }
+
+        private void ThrowIfDisposed()
+        {
+            ObjectDisposedException.ThrowIf(_disposed, this);
+        }
+    }
+}
diff --git a/src/libraries/System.Net.Requests/src/System/Net/RequestStream.cs b/src/libraries/System.Net.Requests/src/System/Net/RequestStream.cs
index 5323c2ac836f..5961339576d3 100644
--- a/src/libraries/System.Net.Requests/src/System/Net/RequestStream.cs
+++ b/src/libraries/System.Net.Requests/src/System/Net/RequestStream.cs
@@ -3,22 +3,22 @@
 
 using System.Diagnostics;
 using System.IO;
+using System.Runtime.CompilerServices;
 using System.Threading;
 using System.Threading.Tasks;
 
 namespace System.Net
 {
-    // Cache the request stream into a MemoryStream.  This is the
-    // default behavior of Desktop HttpWebRequest.AllowWriteStreamBuffering (true).
-    // Unfortunately, this property is not exposed in .NET Core, so it can't be changed
-    // This will result in inefficient memory usage when sending (POST'ing) large
-    // amounts of data to the server such as from a file stream.
     internal sealed class RequestStream : Stream
     {
-        private readonly MemoryStream _buffer = new MemoryStream();
+        private bool _disposed;
+        private readonly TaskCompletionSource _completeTcs;
+        private readonly Stream _internalStream;
 
-        public RequestStream()
+        public RequestStream(Stream internalStream, TaskCompletionSource completeTcs)
         {
+            _internalStream = internalStream;
+            _completeTcs = completeTcs;
         }
 
         public override bool CanRead
@@ -47,15 +47,14 @@ public override bool CanWrite
 
         public override void Flush()
         {
-            // Nothing to do.
+            ThrowIfDisposed();
+            _internalStream.Flush();
         }
 
         public override Task FlushAsync(CancellationToken cancellationToken)
         {
-            // Nothing to do.
-            return cancellationToken.IsCancellationRequested ?
-                Task.FromCanceled(cancellationToken) :
-                Task.CompletedTask;
+            ThrowIfDisposed();
+            return _internalStream.FlushAsync(cancellationToken);
         }
 
         public override long Length
@@ -95,40 +94,67 @@ public override void SetLength(long value)
 
         public override void Write(byte[] buffer, int offset, int count)
         {
+            ThrowIfDisposed();
             ValidateBufferArguments(buffer, offset, count);
-            _buffer.Write(buffer, offset, count);
+            _internalStream.Write(new(buffer, offset, count));
         }
 
         public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken)
         {
+            ThrowIfDisposed();
             ValidateBufferArguments(buffer, offset, count);
-            return _buffer.WriteAsync(buffer, offset, count, cancellationToken);
+            return _internalStream.WriteAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask();
         }
 
         public override ValueTask WriteAsync(ReadOnlyMemory<byte> buffer, CancellationToken cancellationToken = default)
         {
-            return _buffer.WriteAsync(buffer, cancellationToken);
+            ThrowIfDisposed();
+            return _internalStream.WriteAsync(buffer, cancellationToken);
         }
 
         public override IAsyncResult BeginWrite(byte[] buffer, int offset, int count, AsyncCallback? asyncCallback, object? asyncState)
         {
+            ThrowIfDisposed();
             ValidateBufferArguments(buffer, offset, count);
-            return _buffer.BeginWrite(buffer, offset, count, asyncCallback, asyncState);
+            return _internalStream.BeginWrite(buffer, offset, count, asyncCallback, asyncState);
+        }
+
+        public void Complete()
+        {
+            _completeTcs.TrySetResult();
         }
 
         public override void EndWrite(IAsyncResult asyncResult)
         {
-            _buffer.EndWrite(asyncResult);
+            ThrowIfDisposed();
+            _internalStream.EndWrite(asyncResult);
         }
 
-        public ArraySegment<byte> GetBuffer()
+        protected override void Dispose(bool disposing)
         {
-            ArraySegment<byte> bytes;
+            if (disposing && !_disposed)
+            {
+                _disposed = true;
+            }
+            _internalStream.Flush();
+            Complete();
+            base.Dispose(disposing);
+        }
 
-            bool success = _buffer.TryGetBuffer(out bytes);
-            Debug.Assert(success); // Buffer should always be visible since default MemoryStream constructor was used.
+        public override async ValueTask DisposeAsync()
+        {
+            if (!_disposed)
+            {
+                _disposed = true;
+            }
+            await _internalStream.FlushAsync().ConfigureAwait(false);
+            Complete();
+            await base.DisposeAsync().ConfigureAwait(false);
+        }
 
-            return bytes;
+        private void ThrowIfDisposed()
+        {
+            ObjectDisposedException.ThrowIf(_disposed, this);
         }
     }
 }
diff --git a/src/libraries/System.Net.Requests/src/System/Net/RequestStreamContent.cs b/src/libraries/System.Net.Requests/src/System/Net/RequestStreamContent.cs
new file mode 100644
index 000000000000..b78829c22de7
--- /dev/null
+++ b/src/libraries/System.Net.Requests/src/System/Net/RequestStreamContent.cs
@@ -0,0 +1,31 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.IO;
+using System.Net.Http;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace System.Net
+{
+    internal sealed class RequestStreamContent(TaskCompletionSource<Stream> getStreamTcs, TaskCompletionSource completeTcs) : HttpContent
+    {
+        protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context)
+        {
+            return SerializeToStreamAsync(stream, context, default);
+        }
+        protected override async Task SerializeToStreamAsync(Stream stream, TransportContext? context, CancellationToken cancellationToken)
+        {
+            Debug.Assert(stream is not null);
+
+            getStreamTcs.TrySetResult(stream);
+            await completeTcs.Task.WaitAsync(cancellationToken).ConfigureAwait(false);
+        }
+        protected override bool TryComputeLength(out long length)
+        {
+            length = -1;
+            return false;
+        }
+    }
+}
diff --git a/src/libraries/System.Net.Requests/src/System/Net/ServicePoint/ServicePointManager.cs b/src/libraries/System.Net.Requests/src/System/Net/ServicePoint/ServicePointManager.cs
index a0cf9dcece15..8245e309efd9 100644
--- a/src/libraries/System.Net.Requests/src/System/Net/ServicePoint/ServicePointManager.cs
+++ b/src/libraries/System.Net.Requests/src/System/Net/ServicePoint/ServicePointManager.cs
@@ -78,7 +78,7 @@ public static int MaxServicePointIdleTime
             }
         }
 
-        public static bool UseNagleAlgorithm { get; set; } = true;
+        public static bool UseNagleAlgorithm { get; set; }
 
         public static bool Expect100Continue { get; set; } = true;
 
@@ -156,7 +156,8 @@ public static ServicePoint FindServicePoint(Uri address, IWebProxy? proxy)
                     IdleSince = DateTime.Now,
                     Expect100Continue = Expect100Continue,
                     UseNagleAlgorithm = UseNagleAlgorithm,
-                    KeepAlive = KeepAlive
+                    KeepAlive = KeepAlive,
+                    MaxIdleTime = MaxServicePointIdleTime
                 };
                 s_servicePointTable[tableKey] = new WeakReference<ServicePoint>(sp);
 
@@ -177,11 +178,6 @@ private static bool ProxyAddressIfNecessary(ref Uri address, IWebProxy? proxy)
                     Uri? proxyAddress = proxy.GetProxy(address);
                     if (proxyAddress != null)
                     {
-                        if (proxyAddress.Scheme != Uri.UriSchemeHttp)
-                        {
-                            throw new NotSupportedException(SR.Format(SR.net_proxyschemenotsupported, address.Scheme));
-                        }
-
                         address = proxyAddress;
                         return true;
                     }
diff --git a/src/libraries/System.Net.Requests/tests/HttpWebRequestTest.cs b/src/libraries/System.Net.Requests/tests/HttpWebRequestTest.cs
index 45563ccc3dd0..54ad032277d8 100644
--- a/src/libraries/System.Net.Requests/tests/HttpWebRequestTest.cs
+++ b/src/libraries/System.Net.Requests/tests/HttpWebRequestTest.cs
@@ -258,7 +258,7 @@ public void Ctor_VerifyDefaults_Success(Uri remoteServer)
             Assert.Equal(64, HttpWebRequest.DefaultMaximumResponseHeadersLength);
             Assert.NotNull(HttpWebRequest.DefaultCachePolicy);
             Assert.Equal(RequestCacheLevel.BypassCache, HttpWebRequest.DefaultCachePolicy.Level);
-            Assert.Equal(0, HttpWebRequest.DefaultMaximumErrorResponseLength);
+            Assert.Equal(-1, HttpWebRequest.DefaultMaximumErrorResponseLength);
             Assert.NotNull(request.Proxy);
             Assert.Equal(remoteServer, request.RequestUri);
             Assert.True(request.SupportsCookieContainer);
@@ -1914,7 +1914,7 @@ public void Abort_CreateRequestThenAbort_Success(Uri remoteServer)
         }
 
         [Theory]
-        [InlineData(HttpRequestCacheLevel.NoCacheNoStore, null, null, new string[] { "Pragma: no-cache", "Cache-Control: no-store, no-cache"})]
+        [InlineData(HttpRequestCacheLevel.NoCacheNoStore, null, null, new string[] { "Pragma: no-cache", "Cache-Control: no-store, no-cache" })]
         [InlineData(HttpRequestCacheLevel.Reload, null, null, new string[] { "Pragma: no-cache", "Cache-Control: no-cache" })]
         [InlineData(HttpRequestCacheLevel.CacheOrNextCacheOnly, null, null, new string[] { "Cache-Control: only-if-cached" })]
         [InlineData(HttpRequestCacheLevel.Default, HttpCacheAgeControl.MinFresh, 10, new string[] { "Cache-Control: min-fresh=10" })]
@@ -2077,6 +2077,125 @@ await server.AcceptConnectionAsync(async connection =>
             });
         }
 
+        [Fact]
+        public async Task SendHttpPostRequest_BufferingDisabled_ConnectionShouldStartWithRequestStream()
+        {
+            await LoopbackServer.CreateClientAndServerAsync(
+                async (uri) =>
+                {
+                    HttpWebRequest request = WebRequest.CreateHttp(uri);
+                    request.Method = "POST";
+                    request.AllowWriteStreamBuffering = false;
+                    request.SendChunked = true;
+                    var stream = await request.GetRequestStreamAsync();
+                    await Assert.ThrowsAnyAsync<Exception>(() => request.GetResponseAsync());
+                },
+                async (server) => 
+                {
+                    await server.AcceptConnectionAsync(_ =>
+                    {
+                        return Task.CompletedTask;
+                    });
+                }
+            );
+        }
+
+        [Theory]
+        [InlineData(true)]
+        [InlineData(false)]
+        public async Task SendHttpPostRequest_WhenBufferingChanges_Success(bool buffering)
+        {
+            byte[] randomData = Encoding.ASCII.GetBytes("Hello World!!!!\n");
+            await LoopbackServer.CreateClientAndServerAsync(
+                async (uri) =>
+                {
+                    int size = randomData.Length * 100;
+                    HttpWebRequest request = WebRequest.CreateHttp(uri);
+                    request.Method = "POST";
+                    request.AllowWriteStreamBuffering = buffering;
+                    using var stream = await request.GetRequestStreamAsync();
+                    for (int i = 0; i < size / randomData.Length; i++)
+                    {
+                        await stream.WriteAsync(new ReadOnlyMemory<byte>(randomData));
+                    }
+                    await request.GetResponseAsync();
+                },
+                async (server) =>
+                {
+                    await server.AcceptConnectionAsync(async connection =>
+                    {
+                        var data = await connection.ReadRequestDataAsync();
+                        for (int i = 0; i < data.Body.Length; i += randomData.Length)
+                        {
+                            Assert.Equal(randomData, data.Body[i..(i + randomData.Length)]);
+                        }
+                        await connection.SendResponseAsync();
+                    });
+                }
+            );
+        }
+
+        [Theory]
+        [InlineData(false)]
+        [InlineData(true)]
+        public async Task SendHttpRequest_WhenNotBuffering_SendSuccess(bool isChunked)
+        {
+            byte[] firstBlock = "Hello"u8.ToArray();
+            byte[] secondBlock = "WorlddD"u8.ToArray();
+            SemaphoreSlim sem = new(0);
+            await LoopbackServer.CreateClientAndServerAsync(
+                async (uri) =>
+                {
+                    HttpWebRequest request = WebRequest.CreateHttp(uri);
+                    request.Method = "POST";
+                    if (isChunked is false)
+                    {
+                        request.ContentLength = 5 + 7;
+                    }
+                    request.AllowWriteStreamBuffering = false;
+                    
+                    using (Stream requestStream = await request.GetRequestStreamAsync())
+                    {
+                        requestStream.Write(firstBlock);
+                        requestStream.Flush();
+                        await sem.WaitAsync();
+                        requestStream.Write(secondBlock);
+                        requestStream.Flush();
+                    }
+                    await request.GetResponseAsync();
+                    sem.Release();
+                },
+                async (server) =>
+                {
+                    await server.AcceptConnectionAsync(async (connection) =>
+                    {
+                        byte[] buffer = new byte[1024];
+                        await connection.ReadRequestHeaderAsync();
+                        if (isChunked)
+                        {
+                            // Discard chunk length and CRLF.
+                            await connection.ReadLineAsync();
+                        }
+                        int readBytes = await connection.ReadBlockAsync(buffer, 0, firstBlock.Length);
+                        Assert.Equal(firstBlock.Length, readBytes);
+                        Assert.Equal(firstBlock, buffer[..readBytes]);
+                        sem.Release();
+                        if (isChunked)
+                        {
+                            // Discard CRLF, chunk length and CRLF.
+                            await connection.ReadLineAsync();
+                            await connection.ReadLineAsync();
+                        }
+                        readBytes = await connection.ReadBlockAsync(buffer, 0, secondBlock.Length);
+                        Assert.Equal(secondBlock.Length, readBytes);
+                        Assert.Equal(secondBlock, buffer[..readBytes]);
+                        await connection.SendResponseAsync();
+                        await sem.WaitAsync();
+                    });
+                }
+            );
+        }
+        
         [Fact]
         public async Task SendHttpPostRequest_WithContinueTimeoutAndBody_BodyIsDelayed()
         {
@@ -2087,18 +2206,20 @@ await LoopbackServer.CreateClientAndServerAsync(
                     request.Method = "POST";
                     request.ServicePoint.Expect100Continue = true;
                     request.ContinueTimeout = 30000;
-                    Stream requestStream = await request.GetRequestStreamAsync();
-                    requestStream.Write("aaaa\r\n\r\n"u8);
-                    await request.GetResponseAsync();
+                    using (Stream requestStream = await request.GetRequestStreamAsync())
+                    {
+                        requestStream.Write("aaaa\r\n\r\n"u8);
+                    }
+                    await GetResponseAsync(request);
                 },
                 async (server) =>
                 {
-                    await server.AcceptConnectionAsync(async (client) => 
+                    await server.AcceptConnectionAsync(async (connection) => 
                     {
-                        await client.ReadRequestHeaderAsync();
+                        await connection.ReadRequestHeaderAsync();
                         // This should time out, because we're expecting the body itself but we'll get it after 30 sec.
-                        await Assert.ThrowsAsync<TimeoutException>(() => client.ReadLineAsync().WaitAsync(TimeSpan.FromMilliseconds(100)));
-                        await client.SendResponseAsync();
+                        await Assert.ThrowsAsync<TimeoutException>(() => connection.ReadLineAsync().WaitAsync(TimeSpan.FromMilliseconds(100)));
+                        await connection.SendResponseAsync();
                     });
                 }
             );
@@ -2116,19 +2237,21 @@ await LoopbackServer.CreateClientAndServerAsync(
                     request.Method = "POST";
                     request.ServicePoint.Expect100Continue = expect100Continue;
                     request.ContinueTimeout = continueTimeout;
-                    Stream requestStream = await request.GetRequestStreamAsync();
-                    requestStream.Write("aaaa\r\n\r\n"u8);
-                    await request.GetResponseAsync();
+                    using (Stream requestStream = await request.GetRequestStreamAsync())
+                    {
+                        requestStream.Write("aaaa\r\n\r\n"u8);
+                    }
+                    await GetResponseAsync(request);
                 },
                 async (server) =>
                 {
-                    await server.AcceptConnectionAsync(async (client) => 
+                    await server.AcceptConnectionAsync(async (connection) => 
                     {
-                        await client.ReadRequestHeaderAsync();
+                        await connection.ReadRequestHeaderAsync();
                         // This should not time out, because we're expecting the body itself and we should get it after 1 sec.
-                        string data = await client.ReadLineAsync().WaitAsync(TimeSpan.FromSeconds(10));
+                        string data = await connection.ReadLineAsync().WaitAsync(TimeSpan.FromSeconds(10));
                         Assert.StartsWith("aaaa", data);
-                        await client.SendResponseAsync();
+                        await connection.SendResponseAsync();
                     });
                 });
         }
@@ -2144,14 +2267,14 @@ await LoopbackServer.CreateClientAndServerAsync(
                     HttpWebRequest request = WebRequest.CreateHttp(uri);
                     request.Method = "POST";
                     request.ServicePoint.Expect100Continue = expect100Continue;
-                    await request.GetResponseAsync();
+                    await GetResponseAsync(request);
                 },
                 async (server) =>
                 {
                     await server.AcceptConnectionAsync(
-                        async (client) =>
+                        async (connection) =>
                         {
-                            List<string> headers = await client.ReadRequestHeaderAsync();
+                            List<string> headers = await connection.ReadRequestHeaderAsync();
                             if (expect100Continue)
                             {
                                 Assert.Contains("Expect: 100-continue", headers);
@@ -2160,13 +2283,138 @@ await server.AcceptConnectionAsync(
                             {
                                 Assert.DoesNotContain("Expect: 100-continue", headers);
                             }
-                            await client.SendResponseAsync();
+                            await connection.SendResponseAsync();
                         }
                     );
                 }
             );
         }
 
+        [ConditionalFact(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))]
+        public void SendHttpRequest_WhenDefaultMaximumErrorResponseLengthSet_Success()
+        {
+            RemoteExecutor.Invoke(async (async) =>
+            {
+                TaskCompletionSource tcs = new TaskCompletionSource();
+                await LoopbackServer.CreateClientAndServerAsync(
+                async (uri) =>
+                {
+                    HttpWebRequest request = WebRequest.CreateHttp(uri);
+                    HttpWebRequest.DefaultMaximumErrorResponseLength = 5;
+                    var exception = 
+                        await Assert.ThrowsAsync<WebException>(() => bool.Parse(async) ? request.GetResponseAsync() : Task.Run(() => request.GetResponse()));
+                    tcs.SetResult();
+                    Assert.NotNull(exception.Response);
+                    using (var responseStream = exception.Response.GetResponseStream())
+                    {
+                        var buffer = new byte[10];
+                        int readLen = responseStream.Read(buffer, 0, buffer.Length);
+                        Assert.Equal(5, readLen);
+                        Assert.Equal(new string('a', 5), Encoding.UTF8.GetString(buffer[0..readLen]));
+                        Assert.Equal(0, responseStream.Read(buffer));
+                    }
+                },
+                async (server) =>
+                {
+                    await server.AcceptConnectionAsync(
+                        async connection =>
+                        {
+                            await connection.SendResponseAsync(statusCode: HttpStatusCode.BadRequest, content: new string('a', 10));
+                            await tcs.Task;
+                        });
+                });
+            }, IsAsync.ToString()).Dispose();
+        }
+
+        [Fact]
+        public void HttpWebRequest_SetProtocolVersion_Success()
+        {
+            HttpWebRequest request = WebRequest.CreateHttp(Configuration.Http.RemoteEchoServer);
+
+            request.ProtocolVersion = HttpVersion.Version10;
+            Assert.Equal(HttpVersion.Version10, request.ServicePoint.ProtocolVersion);
+
+            request.ProtocolVersion = HttpVersion.Version11;
+            Assert.Equal(HttpVersion.Version11, request.ServicePoint.ProtocolVersion);
+        }
+
+        [ConditionalFact(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))]
+        public void SendHttpRequest_BindIPEndPoint_Success()
+        {
+            RemoteExecutor.Invoke(async (async) =>
+            {
+                TaskCompletionSource tcs = new TaskCompletionSource();
+                await LoopbackServer.CreateClientAndServerAsync(
+                    async (uri) =>
+                    {
+                        HttpWebRequest request = WebRequest.CreateHttp(uri);
+                        request.ServicePoint.BindIPEndPointDelegate = (_, _, _) => new IPEndPoint(IPAddress.Loopback, 27277);
+                        var responseTask = bool.Parse(async) ? request.GetResponseAsync() : Task.Run(() => request.GetResponse());
+                        using (var response = (HttpWebResponse)await responseTask)
+                        {
+                            Assert.Equal(HttpStatusCode.OK, response.StatusCode);
+                        }
+                        tcs.SetResult();
+                    },
+                    async (server) =>
+                    {
+                        await server.AcceptConnectionAsync(
+                            async connection =>
+                            {
+                                var ipEp = (IPEndPoint)connection.Socket.RemoteEndPoint;
+                                Assert.Equal(27277, ipEp.Port);
+                                await connection.SendResponseAsync();
+                                await tcs.Task;
+                            });
+                    });
+            }, IsAsync.ToString()).Dispose();
+        }
+
+        [ConditionalFact(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))]
+        public void SendHttpRequest_BindIPEndPoint_Throws()
+        {
+            RemoteExecutor.Invoke(async (async) =>
+            {
+                Socket socket = new Socket(SocketType.Stream, ProtocolType.Tcp);
+                socket.Bind(new IPEndPoint(IPAddress.Loopback, 0));
+                ValueTask<Socket>? clientSocket = null;
+                CancellationTokenSource cts = new CancellationTokenSource();
+                if (PlatformDetection.IsLinux)
+                {
+                    socket.Listen();
+                    clientSocket = socket.AcceptAsync(cts.Token);
+                }
+
+                try
+                {
+                    // URI shouldn't matter because it should throw exception before connection open.
+                    HttpWebRequest request = WebRequest.CreateHttp(Configuration.Http.RemoteEchoServer);
+                    request.ServicePoint.BindIPEndPointDelegate = (_, _, _) => (IPEndPoint)socket.LocalEndPoint!;
+                    var exception = await Assert.ThrowsAsync<WebException>(() =>
+                        bool.Parse(async) ? request.GetResponseAsync() : Task.Run(() => request.GetResponse()));
+                    Assert.IsType<OverflowException>(exception.InnerException?.InnerException);
+                }
+                finally
+                {
+                    if (clientSocket is not null)
+                    {
+                        await cts.CancelAsync();
+                    }
+                    socket.Dispose();
+                    cts.Dispose();
+                }
+            }, IsAsync.ToString()).Dispose();
+        }
+
+        [Fact]
+        public void HttpWebRequest_HttpsAddressWithProxySetProtocolVersion_ShouldNotThrow()
+        {
+            HttpWebRequest request = (HttpWebRequest) WebRequest.Create("https://microsoft.com");
+            request.Proxy = new WebProxy();
+            request.ProtocolVersion = HttpVersion.Version11;
+            Assert.Same(HttpVersion.Version11, request.ServicePoint.ProtocolVersion);
+        }
+
         private void RequestStreamCallback(IAsyncResult asynchronousResult)
         {
             RequestState state = (RequestState)asynchronousResult.AsyncState;
diff --git a/src/libraries/System.Net.Requests/tests/ServicePointTests/ServicePointManagerTest.cs b/src/libraries/System.Net.Requests/tests/ServicePointTests/ServicePointManagerTest.cs
index c1230598a8d4..33f7d1ac48ba 100644
--- a/src/libraries/System.Net.Requests/tests/ServicePointTests/ServicePointManagerTest.cs
+++ b/src/libraries/System.Net.Requests/tests/ServicePointTests/ServicePointManagerTest.cs
@@ -181,7 +181,7 @@ public static void ServerCertificateValidationCallback_Roundtrips()
         [Fact]
         public static void UseNagleAlgorithm_Roundtrips()
         {
-            Assert.True(ServicePointManager.UseNagleAlgorithm);
+            Assert.False(ServicePointManager.UseNagleAlgorithm);
             try
             {
                 ServicePointManager.UseNagleAlgorithm = false;
@@ -213,7 +213,6 @@ public static void InvalidArguments_Throw()
                 AssertExtensions.Throws<ArgumentNullException>("address", () => ServicePointManager.FindServicePoint(null));
                 AssertExtensions.Throws<ArgumentNullException>("uriString", () => ServicePointManager.FindServicePoint((string)null, null));
                 AssertExtensions.Throws<ArgumentNullException>("address", () => ServicePointManager.FindServicePoint((Uri)null, null));
-                Assert.Throws<NotSupportedException>(() => ServicePointManager.FindServicePoint("http://anything", new FixedWebProxy("https://anything")));
 
                 ServicePoint sp = ServicePointManager.FindServicePoint($"http://{Guid.NewGuid():N}", null);
                 AssertExtensions.Throws<ArgumentOutOfRangeException>("value", () => sp.ConnectionLeaseTimeout = -2);
@@ -325,7 +324,7 @@ public static void FindServicePoint_ReturnedServicePointMatchesExpectedValues()
                 Assert.Equal(new Version(1, 1), sp.ProtocolVersion);
                 Assert.Equal(-1, sp.ReceiveBufferSize);
                 Assert.True(sp.SupportsPipelining, "SupportsPipelining");
-                Assert.True(sp.UseNagleAlgorithm, "UseNagleAlgorithm");
+                Assert.False(sp.UseNagleAlgorithm, "UseNagleAlgorithm");
             }).Dispose();
         }
 
diff --git a/src/libraries/System.Net.Security/src/System/Net/CertificateValidationPal.Windows.cs b/src/libraries/System.Net.Security/src/System/Net/CertificateValidationPal.Windows.cs
index a3254540047f..27224b4e4be0 100644
--- a/src/libraries/System.Net.Security/src/System/Net/CertificateValidationPal.Windows.cs
+++ b/src/libraries/System.Net.Security/src/System/Net/CertificateValidationPal.Windows.cs
@@ -111,10 +111,9 @@ internal static bool IsLocalCertificateUsed(SafeFreeCredentials? _credentialsHan
             SafeFreeCertContext? localContext = null;
             try
             {
-                if (SSPIWrapper.QueryContextAttributes_SECPKG_ATTR_LOCAL_CERT_CONTEXT(GlobalSSPI.SSPISecureChannel, securityContext, out localContext) &&
-                    localContext != null)
+                if (SSPIWrapper.QueryContextAttributes_SECPKG_ATTR_LOCAL_CERT_CONTEXT(GlobalSSPI.SSPISecureChannel, securityContext, out localContext))
                 {
-                    return !localContext.IsInvalid;
+                    return localContext != null ? !localContext.IsInvalid : false;
                 }
             }
             finally
diff --git a/src/libraries/System.Net.Security/src/System/Net/NegotiateAuthenticationPal.ManagedNtlm.cs b/src/libraries/System.Net.Security/src/System/Net/NegotiateAuthenticationPal.ManagedNtlm.cs
index 147dd35e4e19..147097e521c7 100644
--- a/src/libraries/System.Net.Security/src/System/Net/NegotiateAuthenticationPal.ManagedNtlm.cs
+++ b/src/libraries/System.Net.Security/src/System/Net/NegotiateAuthenticationPal.ManagedNtlm.cs
@@ -270,8 +270,14 @@ public override void Dispose()
                 {
                     Debug.Assert(incomingBlob.IsEmpty);
 
+                    Flags requiredFlags = s_requiredFlags;
+                    if (_protectionLevel == ProtectionLevel.EncryptAndSign)
+                    {
+                        requiredFlags |= Flags.NegotiateSeal;
+                    }
+
                     _negotiateMessage = new byte[sizeof(NegotiateMessage)];
-                    CreateNtlmNegotiateMessage(_negotiateMessage);
+                    CreateNtlmNegotiateMessage(_negotiateMessage, requiredFlags);
 
                     outgoingBlob = _negotiateMessage;
                     statusCode = NegotiateAuthenticationStatusCode.ContinueNeeded;
@@ -286,7 +292,7 @@ public override void Dispose()
                 return outgoingBlob;
             }
 
-            private static unsafe void CreateNtlmNegotiateMessage(Span<byte> asBytes)
+            private static unsafe void CreateNtlmNegotiateMessage(Span<byte> asBytes, Flags requiredFlags)
             {
                 Debug.Assert(HeaderLength == NtlmHeader.Length);
                 Debug.Assert(asBytes.Length == sizeof(NegotiateMessage));
@@ -296,7 +302,7 @@ private static unsafe void CreateNtlmNegotiateMessage(Span<byte> asBytes)
                 asBytes.Clear();
                 NtlmHeader.CopyTo(asBytes);
                 message.Header.MessageType = MessageType.Negotiate;
-                message.Flags = s_requiredFlags;
+                message.Flags = requiredFlags;
                 message.Version = s_version;
             }
 
@@ -581,6 +587,13 @@ private static byte[] DeriveKey(ReadOnlySpan<byte> exportedSessionKey, ReadOnlyS
                     return null;
                 }
 
+                // We already negotiate signing, so we only need to check sealing/encryption.
+                if ((flags & Flags.NegotiateSeal) == 0 && _protectionLevel == ProtectionLevel.EncryptAndSign)
+                {
+                    statusCode = NegotiateAuthenticationStatusCode.QopNotSupported;
+                    return null;
+                }
+
                 ReadOnlySpan<byte> targetInfo = GetField(challengeMessage.TargetInfo, blob);
                 byte[] targetInfoBuffer = ProcessTargetInfo(targetInfo, out DateTime time, out bool hasNbNames);
 
@@ -615,7 +628,7 @@ private static byte[] DeriveKey(ReadOnlySpan<byte> exportedSessionKey, ReadOnlyS
                 NtlmHeader.CopyTo(responseAsSpan);
 
                 response.Header.MessageType = MessageType.Authenticate;
-                response.Flags = s_requiredFlags;
+                response.Flags = s_requiredFlags | (flags & Flags.NegotiateSeal);
                 response.Version = s_version;
 
                 // Calculate hash for hmac - same for lm2 and ntlm2
diff --git a/src/libraries/System.Net.Security/src/System/Net/Security/NegotiateStream.cs b/src/libraries/System.Net.Security/src/System/Net/Security/NegotiateStream.cs
index 033b82800dba..48e3923037cc 100644
--- a/src/libraries/System.Net.Security/src/System/Net/Security/NegotiateStream.cs
+++ b/src/libraries/System.Net.Security/src/System/Net/Security/NegotiateStream.cs
@@ -883,7 +883,16 @@ private async Task ReceiveBlobAsync<TIOAdapter>(CancellationToken cancellationTo
 
             if (_framer.ReadHeader.MessageId == FrameHeader.HandshakeDoneId)
             {
-                _remoteOk = true;
+                if (HandshakeComplete && message.Length > 0)
+                {
+                    Debug.Assert(_context != null);
+                    _context.GetOutgoingBlob(message, out NegotiateAuthenticationStatusCode statusCode);
+                    _remoteOk = statusCode is NegotiateAuthenticationStatusCode.Completed;
+                }
+                else
+                {
+                    _remoteOk = true;
+                }
             }
             else if (_framer.ReadHeader.MessageId != FrameHeader.HandshakeId)
             {
diff --git a/src/libraries/System.Net.Security/src/System/Net/Security/SslStream.Protocol.cs b/src/libraries/System.Net.Security/src/System/Net/Security/SslStream.Protocol.cs
index 8e6cb95ef77a..a3b8b2914404 100644
--- a/src/libraries/System.Net.Security/src/System/Net/Security/SslStream.Protocol.cs
+++ b/src/libraries/System.Net.Security/src/System/Net/Security/SslStream.Protocol.cs
@@ -16,6 +16,39 @@ namespace System.Net.Security
 {
     public partial class SslStream
     {
+        private const string DisableTlsResumeCtxSwitch = "System.Net.Security.DisableTlsResume";
+        private const string DisableTlsResumeEnvironmentVariable = "DOTNET_SYSTEM_NET_SECURITY_DISABLETLSRESUME";
+
+        private static volatile int s_disableTlsResume = -1;
+
+        internal static bool DisableTlsResume
+        {
+            get
+            {
+                int disableTlsResume = s_disableTlsResume;
+                if (disableTlsResume != -1)
+                {
+                    return disableTlsResume != 0;
+                }
+
+                // First check for the AppContext switch, giving it priority over the environment variable.
+                if (AppContext.TryGetSwitch(DisableTlsResumeCtxSwitch, out bool value))
+                {
+                    s_disableTlsResume = value ? 1 : 0;
+                }
+                else
+                {
+                    // AppContext switch wasn't used. Check the environment variable.
+                    s_disableTlsResume =
+                        Environment.GetEnvironmentVariable(DisableTlsResumeEnvironmentVariable) is string envVar &&
+                        (envVar == "1" || envVar.Equals("true", StringComparison.OrdinalIgnoreCase)) ? 1 : 0;
+                }
+
+                return s_disableTlsResume != 0;
+            }
+        }
+
+
         private SafeFreeCredentials? _credentialsHandle;
         private SafeDeleteSslContext? _securityContext;
 
diff --git a/src/libraries/System.Net.Security/src/System/Net/Security/SslStreamPal.Windows.cs b/src/libraries/System.Net.Security/src/System/Net/Security/SslStreamPal.Windows.cs
index a9fe7f0a4e74..63b9aea0e4bd 100644
--- a/src/libraries/System.Net.Security/src/System/Net/Security/SslStreamPal.Windows.cs
+++ b/src/libraries/System.Net.Security/src/System/Net/Security/SslStreamPal.Windows.cs
@@ -119,6 +119,7 @@ public static ProtocolToken AcceptSecurityContext(
             }
 
             ProtocolToken token = default;
+            token.RentBuffer = true;
 
             int errorCode = SSPIWrapper.AcceptSecurityContext(
                 GlobalSSPI.SSPISecureChannel,
@@ -163,6 +164,7 @@ public static ProtocolToken InitializeSecurityContext(
             }
 
             ProtocolToken token = default;
+            token.RentBuffer = true;
             int errorCode = SSPIWrapper.InitializeSecurityContext(
                                 GlobalSSPI.SSPISecureChannel,
                                 ref credentialsHandle,
@@ -176,7 +178,9 @@ public static ProtocolToken InitializeSecurityContext(
 
             token.Status = SecurityStatusAdapterPal.GetSecurityStatusPalFromNativeInt(errorCode);
 
-            if (!sslAuthenticationOptions.AllowTlsResume && newContext && context != null)
+            bool allowTlsResume = sslAuthenticationOptions.AllowTlsResume && !SslStream.DisableTlsResume;
+
+            if (!allowTlsResume && newContext && context != null)
             {
                 var securityBuffer = new SecurityBuffer(s_sessionTokenBuffer, SecurityBufferType.SECBUFFER_TOKEN);
 
@@ -279,6 +283,8 @@ public static unsafe SafeFreeCredentials AcquireCredentialsHandleSchannelCred(Ss
             Interop.SspiCli.SCHANNEL_CRED.Flags flags;
             Interop.SspiCli.CredentialUse direction;
 
+            bool allowTlsResume = authOptions.AllowTlsResume && !SslStream.DisableTlsResume;
+
             if (!isServer)
             {
                 direction = Interop.SspiCli.CredentialUse.SECPKG_CRED_OUTBOUND;
@@ -302,7 +308,7 @@ public static unsafe SafeFreeCredentials AcquireCredentialsHandleSchannelCred(Ss
                 flags =
                     Interop.SspiCli.SCHANNEL_CRED.Flags.SCH_SEND_AUX_RECORD |
                     Interop.SspiCli.SCHANNEL_CRED.Flags.SCH_CRED_NO_SYSTEM_MAPPER;
-                if (!authOptions.AllowTlsResume)
+                if (!allowTlsResume)
                 {
                     // Works only on server
                     flags |= Interop.SspiCli.SCHANNEL_CRED.Flags.SCH_CRED_DISABLE_RECONNECTS;
@@ -327,7 +333,7 @@ public static unsafe SafeFreeCredentials AcquireCredentialsHandleSchannelCred(Ss
                 protocolFlags,
                 policy);
 
-            if (!isServer && !authOptions.AllowTlsResume)
+            if (!isServer && !allowTlsResume)
             {
                 secureCredential.dwSessionLifespan = -1;
             }
@@ -351,6 +357,8 @@ public static unsafe SafeFreeCredentials AcquireCredentialsHandleSchCredentials(
             Interop.SspiCli.SCH_CREDENTIALS.Flags flags;
             Interop.SspiCli.CredentialUse direction;
 
+            bool allowTlsResume = authOptions.AllowTlsResume && !SslStream.DisableTlsResume;
+
             if (isServer)
             {
                 direction = Interop.SspiCli.CredentialUse.SECPKG_CRED_INBOUND;
@@ -359,7 +367,7 @@ public static unsafe SafeFreeCredentials AcquireCredentialsHandleSchCredentials(
                 {
                     flags |= Interop.SspiCli.SCH_CREDENTIALS.Flags.SCH_CRED_NO_SYSTEM_MAPPER;
                 }
-                if (!authOptions.AllowTlsResume)
+                if (!allowTlsResume)
                 {
                     // Works only on server
                     flags |= Interop.SspiCli.SCH_CREDENTIALS.Flags.SCH_CRED_DISABLE_RECONNECTS;
@@ -408,7 +416,7 @@ public static unsafe SafeFreeCredentials AcquireCredentialsHandleSchCredentials(
             Interop.SspiCli.SCH_CREDENTIALS credential = default;
             credential.dwVersion = Interop.SspiCli.SCH_CREDENTIALS.CurrentVersion;
             credential.dwFlags = flags;
-            if (!isServer && !authOptions.AllowTlsResume)
+            if (!isServer && !allowTlsResume)
             {
                 credential.dwSessionLifespan = -1;
             }
diff --git a/src/libraries/System.Net.Security/tests/FunctionalTests/CertificateValidationRemoteServer.cs b/src/libraries/System.Net.Security/tests/FunctionalTests/CertificateValidationRemoteServer.cs
index bb1ffc113eed..588c2d3160b8 100644
--- a/src/libraries/System.Net.Security/tests/FunctionalTests/CertificateValidationRemoteServer.cs
+++ b/src/libraries/System.Net.Security/tests/FunctionalTests/CertificateValidationRemoteServer.cs
@@ -7,6 +7,7 @@
 using System.Net.Sockets;
 using System.Net.Test.Common;
 using System.Reflection;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Security.Cryptography.X509Certificates;
 using System.Security.Cryptography.X509Certificates.Tests.Common;
@@ -95,7 +96,6 @@ public async Task DefaultConnect_EndToEnd_Ok(string host)
         [Theory]
         [InlineData(true)]
         [InlineData(false)]
-        [ActiveIssue("https://github.com/dotnet/runtime/issues/70981", TestPlatforms.OSX)]
         [ActiveIssue("https://github.com/dotnet/runtime/issues/68206", TestPlatforms.Android)]
         public Task ConnectWithRevocation_WithCallback(bool checkRevocation)
         {
@@ -188,7 +188,8 @@ static bool CertificateValidationCallback(
         private async Task ConnectWithRevocation_WithCallback_Core(
             X509RevocationMode revocationMode,
             bool? offlineContext = false,
-            bool noIntermediates = false)
+            bool noIntermediates = false,
+            [CallerMemberName] string testName = null)
         {
             string offlinePart = offlineContext.HasValue ? offlineContext.GetValueOrDefault().ToString().ToLower() : "null";
             string serverName = $"{revocationMode.ToString().ToLower()}.{offlinePart}.server.example";
@@ -201,6 +202,7 @@ private async Task ConnectWithRevocation_WithCallback_Core(
                 out CertificateAuthority rootAuthority,
                 out CertificateAuthority[] intermediateAuthorities,
                 out X509Certificate2 serverCert,
+                testName: testName,
                 intermediateAuthorityCount: noIntermediates ? 0 : 1,
                 subjectName: serverName,
                 keySize: 2048,
diff --git a/src/libraries/System.Net.Security/tests/FunctionalTests/SslStreamCertificateContextTests.cs b/src/libraries/System.Net.Security/tests/FunctionalTests/SslStreamCertificateContextTests.cs
index 38f6a84c360f..d2dceacce556 100644
--- a/src/libraries/System.Net.Security/tests/FunctionalTests/SslStreamCertificateContextTests.cs
+++ b/src/libraries/System.Net.Security/tests/FunctionalTests/SslStreamCertificateContextTests.cs
@@ -26,6 +26,7 @@ public static async Task Create_OcspDoesNotReturnOrCacheInvalidStapleData()
                 out CertificateAuthority rootAuthority,
                 out CertificateAuthority[] intermediateAuthorities,
                 out X509Certificate2 serverCert,
+                testName: nameof(Create_OcspDoesNotReturnOrCacheInvalidStapleData),
                 intermediateAuthorityCount: 1,
                 subjectName: serverName,
                 keySize: 2048,
diff --git a/src/libraries/System.Net.Security/tests/StressTests/SslStress/Program.cs b/src/libraries/System.Net.Security/tests/StressTests/SslStress/Program.cs
index e470e7621a18..ca8e0044b2f2 100644
--- a/src/libraries/System.Net.Security/tests/StressTests/SslStress/Program.cs
+++ b/src/libraries/System.Net.Security/tests/StressTests/SslStress/Program.cs
@@ -67,6 +67,8 @@ private static async Task<ExitCode> Run(Configuration config)
                 Console.WriteLine();
 
                 client = new StressClient(config);
+
+                await client.InitializeAsync();
                 client.Start();
             }
 
@@ -74,13 +76,13 @@ private static async Task<ExitCode> Run(Configuration config)
 
             try
             {
-                if (client != null) 
+                if (client != null)
                 {
                     await client.StopAsync();
                     Console.WriteLine("client stopped");
                 }
 
-                if (server != null) 
+                if (server != null)
                 {
                     await server.StopAsync();
                     Console.WriteLine("server stopped");
@@ -113,7 +115,7 @@ private static bool TryParseCli(string[] args, [NotNullWhen(true)] out Configura
             var cmd = new RootCommand();
             cmd.AddOption(new Option(new[] { "--help", "-h" }, "Display this help text."));
             cmd.AddOption(new Option(new[] { "--mode", "-m" }, "Stress suite execution mode. Defaults to 'both'.") { Argument = new Argument<RunMode>("runMode", RunMode.both) });
-            cmd.AddOption(new Option(new[] { "--cancellation-probability", "-p"}, "Cancellation probability 0 <= p <= 1 for a given connection. Defaults to 0.1") { Argument = new Argument<double>("probability", 0.1)});
+            cmd.AddOption(new Option(new[] { "--cancellation-probability", "-p" }, "Cancellation probability 0 <= p <= 1 for a given connection. Defaults to 0.1") { Argument = new Argument<double>("probability", 0.1) });
             cmd.AddOption(new Option(new[] { "--num-connections", "-n" }, "Max number of connections to open concurrently.") { Argument = new Argument<int>("connections", Environment.ProcessorCount) });
             cmd.AddOption(new Option(new[] { "--server-endpoint", "-e" }, "Endpoint to bind to if server, endpoint to listen to if client.") { Argument = new Argument<string>("ipEndpoint", "127.0.0.1:5002") });
             cmd.AddOption(new Option(new[] { "--max-execution-time", "-t" }, "Maximum stress suite execution time, in minutes. Defaults to infinity.") { Argument = new Argument<double?>("minutes", null) });
@@ -181,7 +183,7 @@ static IPEndPoint ParseEndpoint(string value)
                     {
                         string hostname = match.Groups[1].Value;
                         int port = int.Parse(match.Groups[2].Value);
-                        switch(hostname)
+                        switch (hostname)
                         {
                             case "+":
                             case "*":
diff --git a/src/libraries/System.Net.Security/tests/StressTests/SslStress/SslClientBase.cs b/src/libraries/System.Net.Security/tests/StressTests/SslStress/SslClientBase.cs
index a86438b41ef5..ece9d43e0ebb 100644
--- a/src/libraries/System.Net.Security/tests/StressTests/SslStress/SslClientBase.cs
+++ b/src/libraries/System.Net.Security/tests/StressTests/SslStress/SslClientBase.cs
@@ -30,12 +30,12 @@ public SslClientBase(Configuration config)
 
             _config = config;
             _aggregator = new StressResultAggregator(config.MaxConnections);
-            _clientTask = new Lazy<Task>(Task.Run(StartCore));
+            _clientTask = new Lazy<Task>(() => Task.Run(StartCore));
         }
 
         protected abstract Task HandleConnection(int workerId, long jobId, SslStream stream, TcpClient client, Random random, TimeSpan duration, CancellationToken token);
 
-        protected virtual async Task<SslStream> EstablishSslStream(Stream networkStream, Random random, CancellationToken token)
+        protected virtual async Task<SslStream> EstablishSslStream(Stream networkStream, CancellationToken token)
         {
             var sslStream = new SslStream(networkStream, leaveInnerStreamOpen: false);
             var clientOptions = new SslClientAuthenticationOptions
@@ -115,7 +115,7 @@ async Task RunSingleWorker(int workerId, Random random)
                         using var client = new TcpClient();
                         await client.ConnectAsync(_config.ServerEndpoint.Address, _config.ServerEndpoint.Port);
                         var stream = new CountingStream(client.GetStream(), counter);
-                        using SslStream sslStream = await EstablishSslStream(stream, random, cts.Token);
+                        using SslStream sslStream = await EstablishSslStream(stream, cts.Token);
                         await HandleConnection(workerId, jobId, sslStream, client, random, connectionLifetime, cts.Token);
 
                         _aggregator.RecordSuccess(workerId);
@@ -136,7 +136,7 @@ async Task RunSingleWorker(int workerId, Random random)
                     async void CheckForStalledConnection()
                     {
                         await Task.Delay(10_000);
-                        if(!isTestCompleted)
+                        if (!isTestCompleted)
                         {
                             lock (Console.Out)
                             {
diff --git a/src/libraries/System.Net.Security/tests/StressTests/SslStress/StressOperations.cs b/src/libraries/System.Net.Security/tests/StressTests/SslStress/StressOperations.cs
index eea046d392fb..1b951f30280b 100644
--- a/src/libraries/System.Net.Security/tests/StressTests/SslStress/StressOperations.cs
+++ b/src/libraries/System.Net.Security/tests/StressTests/SslStress/StressOperations.cs
@@ -194,6 +194,36 @@ public sealed class StressClient : SslClientBase
     {
         public StressClient(Configuration config) : base(config) { }
 
+        public async Task InitializeAsync()
+        {
+            Console.WriteLine($"Trying to connect to the server {_config.ServerEndpoint}");
+
+            // Before starting the full-blown test, make sure can communicate with the server
+            // Needed for scenaria where we're deploying server & client in separate containers, simultaneously.
+            await TestConnection(maxRetries: 10);
+
+            Console.WriteLine($"Connected successfully.");
+
+            async Task TestConnection(int maxRetries)
+            {
+                for (int remainingRetries = maxRetries; ; remainingRetries--)
+                {
+                    try
+                    {
+                        using var client = new TcpClient();
+                        await client.ConnectAsync(_config.ServerEndpoint.Address, _config.ServerEndpoint.Port);
+                        using var sslStream = await EstablishSslStream(client.GetStream(), CancellationToken.None);
+                        return;
+                    }
+                    catch (SocketException) when (remainingRetries > 0)
+                    {
+                        Console.WriteLine($"Test connection to {_config.ServerEndpoint} failed, {remainingRetries} attempts remaining");
+                        await Task.Delay(TimeSpan.FromSeconds(1));
+                    }
+                }
+            }
+        }
+
         protected override async Task HandleConnection(int workerId, long jobId, SslStream stream, TcpClient client, Random random, TimeSpan duration, CancellationToken token)
         {
             // token used for signalling cooperative cancellation; do not pass this to SslStream methods
@@ -258,7 +288,7 @@ async Task ApplyBackpressure()
                             await Task.Delay(20);
                         }
 
-                        if(isLogged)
+                        if (isLogged)
                         {
                             Console.WriteLine($"worker #{workerId}: resumed tx after {stopwatch.Elapsed}");
                         }
@@ -297,17 +327,17 @@ async Task Monitor(CancellationToken token)
                 {
                     await Task.Delay(500);
 
-                    if((DateTime.Now - lastWrite) >= TimeSpan.FromSeconds(10))
+                    if ((DateTime.Now - lastWrite) >= TimeSpan.FromSeconds(10))
                     {
                         throw new Exception($"worker #{workerId} job #{jobId} has stopped writing bytes to server");
                     }
 
-                    if((DateTime.Now - lastRead) >= TimeSpan.FromSeconds(10))
+                    if ((DateTime.Now - lastRead) >= TimeSpan.FromSeconds(10))
                     {
                         throw new Exception($"worker #{workerId} job #{jobId} has stopped receiving bytes from server");
                     }
                 }
-                while(!token.IsCancellationRequested && !connectionLifetimeToken.IsCancellationRequested);
+                while (!token.IsCancellationRequested && !connectionLifetimeToken.IsCancellationRequested);
             }
         }
     }
diff --git a/src/libraries/System.Net.Security/tests/UnitTests/Fakes/FakeSslStream.Implementation.cs b/src/libraries/System.Net.Security/tests/UnitTests/Fakes/FakeSslStream.Implementation.cs
index 593af0876591..3ad6be920392 100644
--- a/src/libraries/System.Net.Security/tests/UnitTests/Fakes/FakeSslStream.Implementation.cs
+++ b/src/libraries/System.Net.Security/tests/UnitTests/Fakes/FakeSslStream.Implementation.cs
@@ -14,6 +14,8 @@ namespace System.Net.Security
 {
     public partial class SslStream
     {
+        internal static bool DisableTlsResume { get; }
+
         private class FakeOptions
         {
             public string TargetHost;
diff --git a/src/libraries/System.Net.Security/tests/UnitTests/NegotiateAuthenticationTests.cs b/src/libraries/System.Net.Security/tests/UnitTests/NegotiateAuthenticationTests.cs
index 5be16753dc49..4a4dbefa6f36 100644
--- a/src/libraries/System.Net.Security/tests/UnitTests/NegotiateAuthenticationTests.cs
+++ b/src/libraries/System.Net.Security/tests/UnitTests/NegotiateAuthenticationTests.cs
@@ -211,6 +211,42 @@ public void NtlmIncorrectExchangeTest()
             Assert.False(fakeNtlmServer.IsAuthenticated);
         }
 
+        [ConditionalFact(nameof(IsNtlmAvailable))]
+        public void NtlmEncryptionTest()
+        {
+            using FakeNtlmServer fakeNtlmServer = new FakeNtlmServer(s_testCredentialRight);
+
+            NegotiateAuthentication ntAuth = new NegotiateAuthentication(
+                new NegotiateAuthenticationClientOptions
+                {
+                    Package = "NTLM",
+                    Credential = s_testCredentialRight,
+                    TargetName = "HTTP/foo",
+                    RequiredProtectionLevel = ProtectionLevel.EncryptAndSign
+                });
+
+            NegotiateAuthenticationStatusCode statusCode;
+            byte[]? negotiateBlob = ntAuth.GetOutgoingBlob((byte[])null, out statusCode);
+            Assert.Equal(NegotiateAuthenticationStatusCode.ContinueNeeded, statusCode);
+            Assert.NotNull(negotiateBlob);
+
+            byte[]? challengeBlob = fakeNtlmServer.GetOutgoingBlob(negotiateBlob);
+            Assert.NotNull(challengeBlob);
+            // Validate that the client sent NegotiateSeal flag
+            Assert.Equal(FakeNtlmServer.Flags.NegotiateSeal, (fakeNtlmServer.InitialClientFlags & FakeNtlmServer.Flags.NegotiateSeal));
+
+            byte[]? authenticateBlob = ntAuth.GetOutgoingBlob(challengeBlob, out statusCode);
+            Assert.Equal(NegotiateAuthenticationStatusCode.Completed, statusCode);
+            Assert.NotNull(authenticateBlob);
+
+            byte[]? empty = fakeNtlmServer.GetOutgoingBlob(authenticateBlob);
+            Assert.Null(empty);
+            Assert.True(fakeNtlmServer.IsAuthenticated);
+
+            // Validate that the NegotiateSeal flag survived the full exchange
+            Assert.Equal(FakeNtlmServer.Flags.NegotiateSeal, (fakeNtlmServer.NegotiatedFlags & FakeNtlmServer.Flags.NegotiateSeal));
+        }
+
         [ConditionalFact(nameof(IsNtlmAvailable))]
         public void NtlmSignatureTest()
         {
diff --git a/src/libraries/System.Net.Security/tests/UnitTests/SslStreamCertificateContextOcspLinuxTests.cs b/src/libraries/System.Net.Security/tests/UnitTests/SslStreamCertificateContextOcspLinuxTests.cs
index b712f814a02b..5e31aafc5cce 100644
--- a/src/libraries/System.Net.Security/tests/UnitTests/SslStreamCertificateContextOcspLinuxTests.cs
+++ b/src/libraries/System.Net.Security/tests/UnitTests/SslStreamCertificateContextOcspLinuxTests.cs
@@ -77,6 +77,7 @@ await SimpleTest(PkiOptions.OcspEverywhere, async (root, intermediate, endEntity
     }
 
     [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/97836")]
     public async Task FetchOcspResponse_FirstInvalidThenValid()
     {
         await SimpleTest(PkiOptions.OcspEverywhere, async (root, intermediate, endEntity, ctxFactory, responder) =>
@@ -94,6 +95,7 @@ await SimpleTest(PkiOptions.OcspEverywhere, async (root, intermediate, endEntity
     }
 
     [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/97779")]
     public async Task RefreshOcspResponse_BeforeExpiration()
     {
         await SimpleTest(PkiOptions.OcspEverywhere, async (root, intermediate, endEntity, ctxFactory, responder) =>
@@ -121,6 +123,7 @@ await SimpleTest(PkiOptions.OcspEverywhere, async (root, intermediate, endEntity
     }
 
     [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/97779")]
     public async Task RefreshOcspResponse_AfterExpiration()
     {
         await SimpleTest(PkiOptions.OcspEverywhere, async (root, intermediate, endEntity, ctxFactory, responder) =>
diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/Socket.Tasks.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/Socket.Tasks.cs
index e1891bef916f..3ba24e90cf10 100644
--- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/Socket.Tasks.cs
+++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/Socket.Tasks.cs
@@ -677,7 +677,6 @@ public ValueTask<int> SendToAsync(ReadOnlyMemory<byte> buffer, SocketFlags socke
             Debug.Assert(saea.BufferList == null);
             saea.SetBuffer(MemoryMarshal.AsMemory(buffer));
             saea.SocketFlags = socketFlags;
-            saea._socketAddress = null;
             saea.RemoteEndPoint = remoteEP;
             saea.WrapExceptionsForNetworkStream = false;
             return saea.SendToAsync(this, cancellationToken);
@@ -709,8 +708,17 @@ public ValueTask<int> SendToAsync(ReadOnlyMemory<byte> buffer, SocketFlags socke
             saea.SetBuffer(MemoryMarshal.AsMemory(buffer));
             saea.SocketFlags = socketFlags;
             saea._socketAddress = socketAddress;
+            saea.RemoteEndPoint = null;
             saea.WrapExceptionsForNetworkStream = false;
-            return saea.SendToAsync(this, cancellationToken);
+            try
+            {
+                return saea.SendToAsync(this, cancellationToken);
+            }
+            finally
+            {
+                // detach user provided SA so we do not accidentally stomp on it later.
+                saea._socketAddress = null;
+            }
         }
 
         /// <summary>
diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/Socket.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/Socket.cs
index 11b8674d681f..a8c95005154c 100644
--- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/Socket.cs
+++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/Socket.cs
@@ -3095,14 +3095,22 @@ private bool SendToAsync(SocketAsyncEventArgs e, CancellationToken cancellationT
             ArgumentNullException.ThrowIfNull(e);
 
             EndPoint? endPointSnapshot = e.RemoteEndPoint;
-            if (e._socketAddress == null)
+
+            // RemoteEndPoint should be set unless somebody used SendTo with their own SA.
+            // In that case RemoteEndPoint will be null and we take provided SA as given.
+            if (endPointSnapshot == null && e._socketAddress == null)
             {
-                if (endPointSnapshot == null)
-                {
-                    throw new ArgumentException(SR.Format(SR.InvalidNullArgument, "e.RemoteEndPoint"), nameof(e));
-                }
+                throw new ArgumentException(SR.Format(SR.InvalidNullArgument, "e.RemoteEndPoint"), nameof(e));
+            }
 
-                // Prepare SocketAddress
+            if (e._socketAddress != null && endPointSnapshot is IPEndPoint ipep && e._socketAddress.Family == endPointSnapshot?.AddressFamily)
+            {
+                // we have matching SocketAddress. Since this is only used internally, it is ok to overwrite it without
+                ipep.Serialize(e._socketAddress.Buffer.Span);
+            }
+            else if (endPointSnapshot != null)
+            {
+                // Prepare new SocketAddress
                 e._socketAddress = Serialize(ref endPointSnapshot);
             }
 
diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEventArgs.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEventArgs.cs
index e94d862571a0..78dd22e5eda7 100644
--- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEventArgs.cs
+++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEventArgs.cs
@@ -923,7 +923,12 @@ internal void FinishOperationSyncSuccess(int bytesTransferred, SocketFlags flags
                 case SocketAsyncOperation.ReceiveFrom:
                     // Deal with incoming address.
                     UpdateReceivedSocketAddress(_socketAddress!);
-                    if (_remoteEndPoint != null && !SocketAddressExtensions.Equals(_socketAddress!, _remoteEndPoint))
+                    if (_remoteEndPoint == null)
+                    {
+                        // detach user provided SA as it was updated in place.
+                        _socketAddress = null;
+                    }
+                    else if (!SocketAddressExtensions.Equals(_socketAddress!, _remoteEndPoint))
                     {
                         try
                         {
diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketPal.Unix.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketPal.Unix.cs
index d2ac959e0b4c..837743dfa344 100644
--- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketPal.Unix.cs
+++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketPal.Unix.cs
@@ -528,7 +528,6 @@ private static unsafe int SysReceiveMessageFrom(
             out SocketFlags receivedFlags, out IPPacketInformation ipPacketInformation, out Interop.Error errno)
         {
             Debug.Assert(socket.IsSocket);
-            Debug.Assert(socketAddress != null, "Expected non-null socketAddress");
 
             int buffersCount = buffers.Count;
             bool allocOnStack = buffersCount <= IovStackThreshold;
@@ -810,7 +809,6 @@ public static unsafe bool TryCompleteReceiveFrom(SafeSocketHandle socket, Span<b
                 {
                     Debug.Assert(flags == SocketFlags.None);
                     Debug.Assert(buffers == null);
-                    Debug.Assert(socketAddress == null);
 
                     receivedFlags = default;
                     received = SysRead(socket, buffer, out errno);
@@ -956,7 +954,7 @@ public static bool TryCompleteSendTo(SafeSocketHandle socket, ReadOnlySpan<byte>
                     {
                         sent = buffers != null ?
                             SysSend(socket, flags, buffers, ref bufferIndex, ref offset, socketAddress, out errno) :
-                            socketAddress == null ? SysSend(socket, flags, buffer, ref offset, ref count, out errno) :
+                            socketAddress.IsEmpty ? SysSend(socket, flags, buffer, ref offset, ref count, out errno) :
                                                     SysSend(socket, flags, buffer, ref offset, ref count, socketAddress, out errno);
                     }
                 }
diff --git a/src/libraries/System.Net.Sockets/tests/FunctionalTests/SendTo.cs b/src/libraries/System.Net.Sockets/tests/FunctionalTests/SendTo.cs
index bf0ad1465886..7a3c33b64bf7 100644
--- a/src/libraries/System.Net.Sockets/tests/FunctionalTests/SendTo.cs
+++ b/src/libraries/System.Net.Sockets/tests/FunctionalTests/SendTo.cs
@@ -173,6 +173,35 @@ public void SendToAsync_NullAsyncEventArgs_Throws_ArgumentNullException()
     public sealed class SendTo_Task : SendTo<SocketHelperTask>
     {
         public SendTo_Task(ITestOutputHelper output) : base(output) { }
+
+        [Theory]
+        [InlineData(false)]
+        [InlineData(true)]
+        public async Task SendTo_DifferentEP_Success(bool ipv4)
+        {
+            IPAddress address = ipv4 ? IPAddress.Loopback : IPAddress.IPv6Loopback;
+            IPEndPoint remoteEp = new IPEndPoint(address, 0);
+
+            using Socket receiver1 = new Socket(address.AddressFamily, SocketType.Dgram, ProtocolType.Udp);
+            using Socket receiver2 = new Socket(address.AddressFamily, SocketType.Dgram, ProtocolType.Udp);
+            using Socket sender = new Socket(address.AddressFamily, SocketType.Dgram, ProtocolType.Udp);
+
+            receiver1.BindToAnonymousPort(address);
+            receiver2.BindToAnonymousPort(address);
+
+            byte[] sendBuffer = new byte[32];
+            var receiveInternalBuffer = new byte[sendBuffer.Length];
+            ArraySegment<byte> receiveBuffer = new ArraySegment<byte>(receiveInternalBuffer, 0, receiveInternalBuffer.Length);
+
+
+            await sender.SendToAsync(sendBuffer, SocketFlags.None, receiver1.LocalEndPoint);
+            SocketReceiveFromResult result = await ReceiveFromAsync(receiver1, receiveBuffer, remoteEp).WaitAsync(TestSettings.PassingTestTimeout);
+            Assert.Equal(sendBuffer.Length, result.ReceivedBytes);
+
+            await sender.SendToAsync(sendBuffer, SocketFlags.None, receiver2.LocalEndPoint);
+            result = await ReceiveFromAsync(receiver2, receiveBuffer, remoteEp).WaitAsync(TestSettings.PassingTestTimeout);
+            Assert.Equal(sendBuffer.Length, result.ReceivedBytes);
+        }
     }
 
     public sealed class SendTo_CancellableTask : SendTo<SocketHelperCancellableTask>
diff --git a/src/libraries/System.Net.Sockets/tests/FunctionalTests/SocketAsyncEventArgsTest.cs b/src/libraries/System.Net.Sockets/tests/FunctionalTests/SocketAsyncEventArgsTest.cs
index ded34276f322..3d865cb86457 100644
--- a/src/libraries/System.Net.Sockets/tests/FunctionalTests/SocketAsyncEventArgsTest.cs
+++ b/src/libraries/System.Net.Sockets/tests/FunctionalTests/SocketAsyncEventArgsTest.cs
@@ -895,5 +895,52 @@ void CreateSocketAsyncEventArgs() // separated out so that JIT doesn't extend li
                 return cwt.Count() == 0; // validate that the cwt becomes empty
             }, 30_000));
         }
+
+        [Theory]
+        [InlineData(false)]
+        [InlineData(true)]
+        public async Task SendTo_DifferentEP_Success(bool ipv4)
+        {
+            IPAddress address = ipv4 ? IPAddress.Loopback : IPAddress.IPv6Loopback;
+            IPEndPoint remoteEp = new IPEndPoint(address, 0);
+
+            using Socket receiver1 = new Socket(address.AddressFamily, SocketType.Dgram, ProtocolType.Udp);
+            using Socket receiver2 = new Socket(address.AddressFamily, SocketType.Dgram, ProtocolType.Udp);
+            using Socket sender = new Socket(address.AddressFamily, SocketType.Dgram, ProtocolType.Udp);
+
+            receiver1.BindToAnonymousPort(address);
+            receiver2.BindToAnonymousPort(address);
+
+            byte[] sendBuffer = new byte[32];
+            var receiveInternalBuffer = new byte[sendBuffer.Length];
+            ArraySegment<byte> receiveBuffer = new ArraySegment<byte>(receiveInternalBuffer, 0, receiveInternalBuffer.Length);
+
+            using SocketAsyncEventArgs saea = new SocketAsyncEventArgs();
+            ManualResetEventSlim mres = new ManualResetEventSlim(false); 
+
+            saea.SetBuffer(sendBuffer);
+            saea.RemoteEndPoint = receiver1.LocalEndPoint;
+            saea.Completed += delegate { mres.Set(); };
+            if (sender.SendToAsync(saea))
+            {
+                // did not finish synchronously.
+                mres.Wait();
+            }
+
+            SocketReceiveFromResult result = await receiver1.ReceiveFromAsync(receiveBuffer, remoteEp).WaitAsync(TestSettings.PassingTestTimeout);
+            Assert.Equal(sendBuffer.Length, result.ReceivedBytes);
+            mres.Reset();
+
+
+            saea.RemoteEndPoint = receiver2.LocalEndPoint;
+            if (sender.SendToAsync(saea))
+            {
+                // did not finish synchronously.
+                mres.Wait();
+            }
+
+            result = await receiver2.ReceiveFromAsync(receiveBuffer, remoteEp).WaitAsync(TestSettings.PassingTestTimeout);
+            Assert.Equal(sendBuffer.Length, result.ReceivedBytes);
+        }
     }
 }
diff --git a/src/libraries/System.Net.Sockets/tests/FunctionalTests/SocketOptionNameTest.cs b/src/libraries/System.Net.Sockets/tests/FunctionalTests/SocketOptionNameTest.cs
index 2d87c58b4590..2a9c6a24f74b 100644
--- a/src/libraries/System.Net.Sockets/tests/FunctionalTests/SocketOptionNameTest.cs
+++ b/src/libraries/System.Net.Sockets/tests/FunctionalTests/SocketOptionNameTest.cs
@@ -125,12 +125,6 @@ public void MulticastInterface_Set_InvalidIndex_Throws()
         [ActiveIssue("https://github.com/dotnet/runtime/issues/52124", TestPlatforms.iOS | TestPlatforms.tvOS | TestPlatforms.MacCatalyst)]
         public async Task MulticastInterface_Set_IPv6_AnyInterface_Succeeds()
         {
-            if (PlatformDetection.IsRedHatFamily7)
-            {
-                // RH7 seems to have issues with multicast in Azure. Same code and setup can pass when executed outside of Azure.
-                throw new SkipTestException("IPv6 multicast environment not available");
-            }
-
             // On all platforms, index 0 means "any interface"
             await MulticastInterface_Set_IPv6_Helper(0);
         }
diff --git a/src/libraries/System.Net.WebSockets.Client/src/System/Net/WebSockets/BrowserWebSockets/BrowserInterop.cs b/src/libraries/System.Net.WebSockets.Client/src/System/Net/WebSockets/BrowserWebSockets/BrowserInterop.cs
index 53f43c3c8592..b18338d3e986 100644
--- a/src/libraries/System.Net.WebSockets.Client/src/System/Net/WebSockets/BrowserWebSockets/BrowserInterop.cs
+++ b/src/libraries/System.Net.WebSockets.Client/src/System/Net/WebSockets/BrowserWebSockets/BrowserInterop.cs
@@ -53,15 +53,13 @@ public static int GetReadyState(JSObject? webSocket)
                 return -1;
             }
 
-            int? readyState = webSocket.GetPropertyAsInt32("readyState");
-            if (!readyState.HasValue)
-            {
-                return -1;
-            }
-
-            return readyState.Value;
+            return BrowserInterop.WebSocketGetState(webSocket);
         }
 
+        [JSImport("INTERNAL.ws_get_state")]
+        public static partial int WebSocketGetState(
+            JSObject webSocket);
+
         [JSImport("INTERNAL.ws_wasm_create")]
         public static partial JSObject WebSocketCreate(
             string uri,
diff --git a/src/libraries/System.Net.WebSockets.Client/src/System/Net/WebSockets/BrowserWebSockets/BrowserWebSocket.cs b/src/libraries/System.Net.WebSockets.Client/src/System/Net/WebSockets/BrowserWebSockets/BrowserWebSocket.cs
index 6134d85656cc..d21e80ba41fe 100644
--- a/src/libraries/System.Net.WebSockets.Client/src/System/Net/WebSockets/BrowserWebSockets/BrowserWebSocket.cs
+++ b/src/libraries/System.Net.WebSockets.Client/src/System/Net/WebSockets/BrowserWebSockets/BrowserWebSocket.cs
@@ -135,9 +135,10 @@ public override string? SubProtocol
 
         internal Task ConnectAsync(Uri uri, List<string>? requestedSubProtocols, CancellationToken cancellationToken)
         {
+            AbortIfCancelationRequested(cancellationToken);
+
             lock (_lockObject)
             {
-                cancellationToken.ThrowIfCancellationRequested();
                 ThrowIfDisposed();
 
                 if (FastState != WebSocketState.None)
@@ -184,7 +185,6 @@ public override Task CloseOutputAsync(WebSocketCloseStatus closeStatus, string?
             // this validation should be synchronous
             WebSocketValidate.ValidateCloseStatus(closeStatus, statusDescription);
 
-            cancellationToken.ThrowIfCancellationRequested();
             ThrowIfDisposed();
 
             return CloseAsyncCore(closeStatus, statusDescription, false, cancellationToken);
@@ -195,7 +195,6 @@ public override Task CloseAsync(WebSocketCloseStatus closeStatus, string? status
             // this validation should be synchronous
             WebSocketValidate.ValidateCloseStatus(closeStatus, statusDescription);
 
-            cancellationToken.ThrowIfCancellationRequested();
             ThrowIfDisposed();
 
             return CloseAsyncCore(closeStatus, statusDescription, true, cancellationToken);
@@ -284,6 +283,17 @@ private void ThrowIfDisposed()
             } // lock
         }
 
+        private void AbortIfCancelationRequested(CancellationToken cancellationToken)
+        {
+            if (cancellationToken.IsCancellationRequested)
+            {
+                lock (_lockObject)
+                {
+                    Abort();
+                } // lock
+                cancellationToken.ThrowIfCancellationRequested();
+            }
+        }
 
         private void CreateCore(Uri uri, List<string>? requestedSubProtocols)
         {
@@ -366,7 +376,6 @@ private async Task SendAsyncCore(ArraySegment<byte> buffer, WebSocketMessageType
             {
                 lock (_lockObject)
                 {
-                    cancellationToken.ThrowIfCancellationRequested();
                     ThrowIfDisposed();
 
                     previousState = FastState;
@@ -374,6 +383,7 @@ private async Task SendAsyncCore(ArraySegment<byte> buffer, WebSocketMessageType
                     {
                         throw new InvalidOperationException(SR.net_WebSockets_NotConnected);
                     }
+                    AbortIfCancelationRequested(cancellationToken);
 
                     if (buffer.Count == 0)
                     {
@@ -416,7 +426,6 @@ private async Task<WebSocketReceiveResult> ReceiveAsyncCore(ArraySegment<byte> b
             {
                 lock (_lockObject)
                 {
-                    cancellationToken.ThrowIfCancellationRequested();
                     ThrowIfDisposed();
 
                     previousState = FastState;
@@ -424,6 +433,7 @@ private async Task<WebSocketReceiveResult> ReceiveAsyncCore(ArraySegment<byte> b
                     {
                         throw new WebSocketException(WebSocketError.InvalidState, SR.Format(SR.net_WebSockets_InvalidState, previousState, "Open, CloseSent"));
                     }
+                    AbortIfCancelationRequested(cancellationToken);
 
                     Memory<byte> bufferMemory = buffer.AsMemory();
                     pinBuffer = bufferMemory.Pin();
@@ -502,22 +512,22 @@ private async Task CloseAsyncCore(WebSocketCloseStatus closeStatus, string? stat
             WebSocketState previousState;
             lock (_lockObject)
             {
-                cancellationToken.ThrowIfCancellationRequested();
-
                 previousState = FastState;
                 if (_aborted)
                 {
                     return;
                 }
+                if (previousState == WebSocketState.None || previousState == WebSocketState.Closed)
+                {
+                    throw new WebSocketException(WebSocketError.InvalidState, SR.Format(SR.net_WebSockets_InvalidState, previousState, "Connecting, Open, CloseSent, CloseReceived, Aborted"));
+                }
+                AbortIfCancelationRequested(cancellationToken);
+
                 if (!_closeReceived)
                 {
                     _closeStatus = closeStatus;
                     _closeStatusDescription = statusDescription;
                 }
-                if (previousState == WebSocketState.None || previousState == WebSocketState.Closed)
-                {
-                    throw new WebSocketException(WebSocketError.InvalidState, SR.Format(SR.net_WebSockets_InvalidState, previousState, "Connecting, Open, CloseSent, Aborted"));
-                }
 
                 _closeSent = true;
 
@@ -544,7 +554,6 @@ private async Task CancellationHelper(Task promise, CancellationToken cancellati
         {
             try
             {
-                cancellationToken.ThrowIfCancellationRequested();
                 if (promise.IsCompletedSuccessfully)
                 {
                     disposable?.Dispose();
@@ -556,6 +565,7 @@ private async Task CancellationHelper(Task promise, CancellationToken cancellati
                     await promise.ConfigureAwait(false);
                     return;
                 }
+                AbortIfCancelationRequested(cancellationToken);
 
                 using (var receiveRegistration = cancellationToken.Register(static s =>
                 {
diff --git a/src/libraries/System.Net.WebSockets.Client/tests/AbortTest.Loopback.cs b/src/libraries/System.Net.WebSockets.Client/tests/AbortTest.Loopback.cs
new file mode 100644
index 000000000000..0aa83697a9de
--- /dev/null
+++ b/src/libraries/System.Net.WebSockets.Client/tests/AbortTest.Loopback.cs
@@ -0,0 +1,246 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Threading;
+using System.Threading.Tasks;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace System.Net.WebSockets.Client.Tests
+{
+    [ConditionalClass(typeof(ClientWebSocketTestBase), nameof(WebSocketsSupported))]
+    [SkipOnPlatform(TestPlatforms.Browser, "System.Net.Sockets are not supported on browser")]
+    public abstract class AbortTest_Loopback : ClientWebSocketTestBase
+    {
+        public AbortTest_Loopback(ITestOutputHelper output) : base(output) { }
+
+        protected virtual Version HttpVersion => Net.HttpVersion.Version11;
+
+        [Theory]
+        [MemberData(nameof(AbortClient_MemberData))]
+        public Task AbortClient_ServerGetsCorrectException(AbortType abortType, bool useSsl, bool verifySendReceive)
+        {
+            var clientMsg = new byte[] { 1, 2, 3, 4, 5, 6 };
+            var serverMsg = new byte[] { 42 };
+            var clientAckTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
+            var serverAckTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
+
+            var timeoutCts = new CancellationTokenSource(TimeOutMilliseconds);
+
+            return LoopbackWebSocketServer.RunAsync(
+                async (clientWebSocket, token) =>
+                {
+                    if (verifySendReceive)
+                    {
+                        await VerifySendReceiveAsync(clientWebSocket, clientMsg, serverMsg, clientAckTcs, serverAckTcs.Task, token);
+                    }
+
+                    switch (abortType)
+                    {
+                        case AbortType.Abort:
+                            clientWebSocket.Abort();
+                            break;
+                        case AbortType.Dispose:
+                            clientWebSocket.Dispose();
+                            break;
+                    }
+                },
+                async (serverWebSocket, token) =>
+                {
+                    if (verifySendReceive)
+                    {
+                        await VerifySendReceiveAsync(serverWebSocket, serverMsg, clientMsg, serverAckTcs, clientAckTcs.Task, token);
+                    }
+
+                    var readBuffer = new byte[1];
+                    var exception = await Assert.ThrowsAsync<WebSocketException>(async () =>
+                        await serverWebSocket.ReceiveAsync(readBuffer, token));
+
+                    Assert.Equal(WebSocketError.ConnectionClosedPrematurely, exception.WebSocketErrorCode);
+                    Assert.Equal(WebSocketState.Aborted, serverWebSocket.State);
+                },
+                new LoopbackWebSocketServer.Options(HttpVersion, useSsl, GetInvoker()),
+                timeoutCts.Token);
+        }
+
+        [Theory]
+        [MemberData(nameof(ServerPrematureEos_MemberData))]
+        public Task ServerPrematureEos_ClientGetsCorrectException(ServerEosType serverEosType, bool useSsl)
+        {
+            var clientMsg = new byte[] { 1, 2, 3, 4, 5, 6 };
+            var serverMsg = new byte[] { 42 };
+            var clientAckTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
+            var serverAckTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
+
+            var timeoutCts = new CancellationTokenSource(TimeOutMilliseconds);
+
+            var globalOptions = new LoopbackWebSocketServer.Options(HttpVersion, useSsl, HttpInvoker: null)
+            {
+                DisposeServerWebSocket = false,
+                ManualServerHandshakeResponse = true
+            };
+
+            var serverReceivedEosTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
+            var clientReceivedEosTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
+
+            return LoopbackWebSocketServer.RunAsync(
+                async uri =>
+                {
+                    var token = timeoutCts.Token;
+                    var clientOptions = globalOptions with { HttpInvoker = GetInvoker() };
+                    var clientWebSocket = await LoopbackWebSocketServer.GetConnectedClientAsync(uri, clientOptions, token).ConfigureAwait(false);
+
+                    if (serverEosType == ServerEosType.AfterSomeData)
+                    {
+                        await VerifySendReceiveAsync(clientWebSocket, clientMsg, serverMsg, clientAckTcs, serverAckTcs.Task, token).ConfigureAwait(false);
+                    }
+
+                    // only one side of the stream was closed. the other should work
+                    await clientWebSocket.SendAsync(clientMsg, WebSocketMessageType.Binary, endOfMessage: true, token).ConfigureAwait(false);
+
+                    var exception = await Assert.ThrowsAsync<WebSocketException>(() => clientWebSocket.ReceiveAsync(new byte[1], token));
+                    Assert.Equal(WebSocketError.ConnectionClosedPrematurely, exception.WebSocketErrorCode);
+
+                    clientReceivedEosTcs.SetResult();
+                    clientWebSocket.Dispose();
+                },
+                async (requestData, token) =>
+                {
+                    WebSocket serverWebSocket = null!;
+                    await SendServerResponseAndEosAsync(
+                        requestData,
+                        serverEosType,
+                        (wsData, ct) =>
+                        {
+                            var wsOptions = new WebSocketCreationOptions { IsServer = true };
+                            serverWebSocket = WebSocket.CreateFromStream(wsData.WebSocketStream, wsOptions);
+
+                            return serverEosType == ServerEosType.AfterSomeData
+                                ? VerifySendReceiveAsync(serverWebSocket, serverMsg, clientMsg, serverAckTcs, clientAckTcs.Task, ct)
+                                : Task.CompletedTask;
+                        },
+                        token);
+
+                    Assert.NotNull(serverWebSocket);
+
+                    // only one side of the stream was closed. the other should work
+                    var readBuffer = new byte[clientMsg.Length];
+                    var result = await serverWebSocket.ReceiveAsync(readBuffer, token);
+                    Assert.Equal(WebSocketMessageType.Binary, result.MessageType);
+                    Assert.Equal(clientMsg.Length, result.Count);
+                    Assert.True(result.EndOfMessage);
+                    Assert.Equal(clientMsg, readBuffer);
+
+                    await clientReceivedEosTcs.Task.WaitAsync(token).ConfigureAwait(false);
+
+                    var exception = await Assert.ThrowsAsync<WebSocketException>(() => serverWebSocket.ReceiveAsync(readBuffer, token));
+                    Assert.Equal(WebSocketError.ConnectionClosedPrematurely, exception.WebSocketErrorCode);
+
+                    serverWebSocket.Dispose();
+                },
+                globalOptions,
+                timeoutCts.Token);
+        }
+
+        protected virtual Task SendServerResponseAndEosAsync(WebSocketRequestData requestData, ServerEosType serverEosType, Func<WebSocketRequestData, CancellationToken, Task> serverFunc, CancellationToken cancellationToken)
+            => WebSocketHandshakeHelper.SendHttp11ServerResponseAndEosAsync(requestData, serverFunc, cancellationToken); // override for HTTP/2
+
+        private static readonly bool[] Bool_Values = new[] { false, true };
+        private static readonly bool[] UseSsl_Values = PlatformDetection.SupportsAlpn ? Bool_Values : new[] { false };
+
+        public static IEnumerable<object[]> AbortClient_MemberData()
+        {
+            foreach (var abortType in Enum.GetValues<AbortType>())
+            {
+                foreach (var useSsl in UseSsl_Values)
+                {
+                    foreach (var verifySendReceive in Bool_Values)
+                    {
+                        yield return new object[] { abortType, useSsl, verifySendReceive };
+                    }
+                }
+            }
+        }
+
+        public static IEnumerable<object[]> ServerPrematureEos_MemberData()
+        {
+            foreach (var serverEosType in Enum.GetValues<ServerEosType>())
+            {
+                foreach (var useSsl in UseSsl_Values)
+                {
+                    yield return new object[] { serverEosType, useSsl };
+                }
+            }
+        }
+
+        public enum AbortType
+        {
+            Abort,
+            Dispose
+        }
+
+        public enum ServerEosType
+        {
+            WithHeaders,
+            RightAfterHeaders,
+            AfterSomeData
+        }
+
+        private static async Task VerifySendReceiveAsync(WebSocket ws, byte[] localMsg, byte[] remoteMsg,
+            TaskCompletionSource localAckTcs, Task remoteAck, CancellationToken cancellationToken)
+        {
+            var sendTask = ws.SendAsync(localMsg, WebSocketMessageType.Binary, endOfMessage: true, cancellationToken);
+
+            var recvBuf = new byte[remoteMsg.Length * 2];
+            var recvResult = await ws.ReceiveAsync(recvBuf, cancellationToken).ConfigureAwait(false);
+
+            Assert.Equal(WebSocketMessageType.Binary, recvResult.MessageType);
+            Assert.Equal(remoteMsg.Length, recvResult.Count);
+            Assert.True(recvResult.EndOfMessage);
+            Assert.Equal(remoteMsg, recvBuf[..recvResult.Count]);
+
+            localAckTcs.SetResult();
+
+            await sendTask.ConfigureAwait(false);
+            await remoteAck.WaitAsync(cancellationToken).ConfigureAwait(false);
+        }
+    }
+
+    // --- HTTP/1.1 WebSocket loopback tests ---
+
+    public class AbortTest_Invoker_Loopback : AbortTest_Loopback
+    {
+        public AbortTest_Invoker_Loopback(ITestOutputHelper output) : base(output) { }
+        protected override bool UseCustomInvoker => true;
+    }
+
+    public class AbortTest_HttpClient_Loopback : AbortTest_Loopback
+    {
+        public AbortTest_HttpClient_Loopback(ITestOutputHelper output) : base(output) { }
+        protected override bool UseHttpClient => true;
+    }
+
+    public class AbortTest_SharedHandler_Loopback : AbortTest_Loopback
+    {
+        public AbortTest_SharedHandler_Loopback(ITestOutputHelper output) : base(output) { }
+    }
+
+    // --- HTTP/2 WebSocket loopback tests ---
+
+    public class AbortTest_Invoker_Http2 : AbortTest_Invoker_Loopback
+    {
+        public AbortTest_Invoker_Http2(ITestOutputHelper output) : base(output) { }
+        protected override Version HttpVersion => Net.HttpVersion.Version20;
+        protected override Task SendServerResponseAndEosAsync(WebSocketRequestData rd, ServerEosType eos, Func<WebSocketRequestData, CancellationToken, Task> callback, CancellationToken ct)
+            => WebSocketHandshakeHelper.SendHttp2ServerResponseAndEosAsync(rd, eosInHeadersFrame: eos == ServerEosType.WithHeaders, callback, ct);
+    }
+
+    public class AbortTest_HttpClient_Http2 : AbortTest_HttpClient_Loopback
+    {
+        public AbortTest_HttpClient_Http2(ITestOutputHelper output) : base(output) { }
+        protected override Version HttpVersion => Net.HttpVersion.Version20;
+        protected override Task SendServerResponseAndEosAsync(WebSocketRequestData rd, ServerEosType eos, Func<WebSocketRequestData, CancellationToken, Task> callback, CancellationToken ct)
+            => WebSocketHandshakeHelper.SendHttp2ServerResponseAndEosAsync(rd, eosInHeadersFrame: eos == ServerEosType.WithHeaders, callback, ct);
+    }
+}
diff --git a/src/libraries/System.Net.WebSockets.Client/tests/CloseTest.cs b/src/libraries/System.Net.WebSockets.Client/tests/CloseTest.cs
index c768fab5972a..fb73485fc7fe 100644
--- a/src/libraries/System.Net.WebSockets.Client/tests/CloseTest.cs
+++ b/src/libraries/System.Net.WebSockets.Client/tests/CloseTest.cs
@@ -8,6 +8,7 @@
 using System.Text;
 using System.Threading;
 using System.Threading.Tasks;
+using System.Linq;
 
 using Xunit;
 using Xunit.Abstractions;
@@ -263,8 +264,8 @@ public async Task CloseOutputAsync_ClientInitiated_CanReceive_CanClose(Uri serve
 
         [ActiveIssue("https://github.com/dotnet/runtime/issues/28957", typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))]
         [OuterLoop("Uses external servers", typeof(PlatformDetection), nameof(PlatformDetection.LocalEchoServerIsNotAvailable))]
-        [ConditionalTheory(nameof(WebSocketsSupported)), MemberData(nameof(EchoServers))]
-        public async Task CloseOutputAsync_ServerInitiated_CanReceive(Uri server)
+        [ConditionalTheory(nameof(WebSocketsSupported)), MemberData(nameof(EchoServersWithSwitch))]
+        public async Task CloseOutputAsync_ServerInitiated_CanReceive(Uri server, bool delayReceiving)
         {
             var expectedCloseStatus = WebSocketCloseStatus.NormalClosure;
             var expectedCloseDescription = ".shutdownafter";
@@ -279,6 +280,10 @@ await cws.SendAsync(
                     true,
                     cts.Token);
 
+                // let server close the output before we request receiving
+                if (delayReceiving)
+                    await Task.Delay(1000);
+
                 // Should be able to receive the message echoed by the server.
                 var recvBuffer = new byte[100];
                 var segmentRecv = new ArraySegment<byte>(recvBuffer);
@@ -362,6 +367,43 @@ await cws.SendAsync(
             }
         }
 
+        public static IEnumerable<object[]> EchoServersWithSwitch =>
+            EchoServers.SelectMany(server => new List<object[]>
+            {
+                new object[] { server[0], true },
+                new object[] { server[0], false }
+            });
+
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/28957", typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))]
+        [ConditionalTheory(nameof(WebSocketsSupported)), MemberData(nameof(EchoServersWithSwitch))]
+        public async Task CloseOutputAsync_ServerInitiated_CanReceiveAfterClose(Uri server, bool syncState)
+        {
+            using (ClientWebSocket cws = await GetConnectedWebSocket(server, TimeOutMilliseconds, _output))
+            {
+                var cts = new CancellationTokenSource(TimeOutMilliseconds);
+                await cws.SendAsync(
+                    WebSocketData.GetBufferFromText(".receiveMessageAfterClose"),
+                    WebSocketMessageType.Text,
+                    true,
+                    cts.Token);
+
+                await Task.Delay(2000);
+
+                if (syncState)
+                {
+                    var state = cws.State;
+                    Assert.Equal(WebSocketState.Open, state);
+                    // should be able to receive after this sync
+                }
+
+                var recvBuffer = new ArraySegment<byte>(new byte[1024]);
+                WebSocketReceiveResult recvResult = await cws.ReceiveAsync(recvBuffer, cts.Token);
+                var message = Encoding.UTF8.GetString(recvBuffer.ToArray(), 0, recvResult.Count);
+
+                Assert.Contains(".receiveMessageAfterClose", message);
+            }
+        }
+
         [OuterLoop("Uses external servers", typeof(PlatformDetection), nameof(PlatformDetection.LocalEchoServerIsNotAvailable))]
         [ConditionalTheory(nameof(WebSocketsSupported)), MemberData(nameof(EchoServers))]
         public async Task CloseOutputAsync_CloseDescriptionIsNull_Success(Uri server)
diff --git a/src/libraries/System.Net.WebSockets.Client/tests/LoopbackHelper.cs b/src/libraries/System.Net.WebSockets.Client/tests/LoopbackHelper.cs
index 48d167b072f7..cee509ee0684 100644
--- a/src/libraries/System.Net.WebSockets.Client/tests/LoopbackHelper.cs
+++ b/src/libraries/System.Net.WebSockets.Client/tests/LoopbackHelper.cs
@@ -28,14 +28,7 @@ public static async Task<Dictionary<string, string>> WebSocketHandshakeAsync(Loo
                     if (headerName == "Sec-WebSocket-Key")
                     {
                         string headerValue = tokens[1].Trim();
-                        string responseSecurityAcceptValue = ComputeWebSocketHandshakeSecurityAcceptValue(headerValue);
-                        serverResponse =
-                            "HTTP/1.1 101 Switching Protocols\r\n" +
-                            "Content-Length: 0\r\n" +
-                            "Upgrade: websocket\r\n" +
-                            "Connection: Upgrade\r\n" +
-                            (extensions is null ? null : $"Sec-WebSocket-Extensions: {extensions}\r\n") +
-                            "Sec-WebSocket-Accept: " + responseSecurityAcceptValue + "\r\n\r\n";
+                        serverResponse = GetServerResponseString(headerValue, extensions);
                     }
                 }
             }
@@ -50,6 +43,18 @@ public static async Task<Dictionary<string, string>> WebSocketHandshakeAsync(Loo
             return null;
         }
 
+        public static string GetServerResponseString(string secWebSocketKey, string? extensions = null)
+        {
+            var responseSecurityAcceptValue = ComputeWebSocketHandshakeSecurityAcceptValue(secWebSocketKey);
+            return
+                "HTTP/1.1 101 Switching Protocols\r\n" +
+                "Content-Length: 0\r\n" +
+                "Upgrade: websocket\r\n" +
+                "Connection: Upgrade\r\n" +
+                (extensions is null ? null : $"Sec-WebSocket-Extensions: {extensions}\r\n") +
+                "Sec-WebSocket-Accept: " + responseSecurityAcceptValue + "\r\n\r\n";
+        }
+
         private static string ComputeWebSocketHandshakeSecurityAcceptValue(string secWebSocketKey)
         {
             // GUID specified by RFC 6455.
diff --git a/src/libraries/System.Net.WebSockets.Client/tests/LoopbackServer/Http2LoopbackStream.cs b/src/libraries/System.Net.WebSockets.Client/tests/LoopbackServer/Http2LoopbackStream.cs
new file mode 100644
index 000000000000..1b3b51840ec9
--- /dev/null
+++ b/src/libraries/System.Net.WebSockets.Client/tests/LoopbackServer/Http2LoopbackStream.cs
@@ -0,0 +1,100 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.IO;
+using System.Net.Sockets;
+using System.Threading;
+using System.Threading.Tasks;
+using Xunit;
+
+namespace System.Net.Test.Common
+{
+    public class Http2LoopbackStream : Stream
+    {
+        private readonly Http2LoopbackConnection _connection;
+        private readonly int _streamId;
+        private bool _readEnded;
+        private ReadOnlyMemory<byte> _leftoverReadData;
+
+        public override bool CanRead => true;
+        public override bool CanSeek => false;
+        public override bool CanWrite => true;
+
+        public Http2LoopbackConnection Connection => _connection;
+        public int StreamId => _streamId;
+
+        public Http2LoopbackStream(Http2LoopbackConnection connection, int streamId)
+        {
+            _connection = connection;
+            _streamId = streamId;
+        }
+
+        public override async ValueTask<int> ReadAsync(Memory<byte> buffer, CancellationToken cancellationToken = default)
+        {
+            if (!_leftoverReadData.IsEmpty)
+            {
+                int read = Math.Min(buffer.Length, _leftoverReadData.Length);
+                _leftoverReadData.Span.Slice(0, read).CopyTo(buffer.Span);
+                _leftoverReadData = _leftoverReadData.Slice(read);
+                return read;
+            }
+
+            if (_readEnded)
+            {
+                return 0;
+            }
+
+            DataFrame dataFrame = (DataFrame)await _connection.ReadFrameAsync(cancellationToken);
+            Assert.Equal(_streamId, dataFrame.StreamId);
+            _leftoverReadData = dataFrame.Data;
+            _readEnded = dataFrame.EndStreamFlag;
+
+            return await ReadAsync(buffer, cancellationToken);
+        }
+
+        public override Task<int> ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) =>
+            ReadAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask();
+
+        public override async ValueTask WriteAsync(ReadOnlyMemory<byte> buffer, CancellationToken cancellationToken = default)
+        {
+            await _connection.SendResponseDataAsync(_streamId, buffer, endStream: false);
+        }
+
+        public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) =>
+            WriteAsync(buffer.AsMemory(offset, count), cancellationToken).AsTask();
+
+        protected override void Dispose(bool disposing) => DisposeAsync().GetAwaiter().GetResult();
+
+        public override async ValueTask DisposeAsync()
+        {
+            try
+            {
+                await _connection.SendResponseDataAsync(_streamId, Memory<byte>.Empty, endStream: true).ConfigureAwait(false);
+
+                if (!_readEnded)
+                {
+                    var rstFrame = new RstStreamFrame(FrameFlags.None, (int)ProtocolErrors.NO_ERROR, _streamId);
+                    await _connection.WriteFrameAsync(rstFrame).ConfigureAwait(false);
+                }
+            }
+            catch (IOException)
+            {
+                // Ignore connection errors
+            }
+            catch (SocketException)
+            {
+                // Ignore connection errors
+            }
+        }
+
+        public override void Flush() { }
+        public override Task FlushAsync(CancellationToken cancellationToken) => Task.CompletedTask;
+
+        public override int Read(byte[] buffer, int offset, int count) => throw new NotImplementedException();
+        public override long Seek(long offset, SeekOrigin origin) => throw new NotImplementedException();
+        public override void SetLength(long value) => throw new NotImplementedException();
+        public override void Write(byte[] buffer, int offset, int count) => throw new NotImplementedException();
+        public override long Length => throw new NotImplementedException();
+        public override long Position { get => throw new NotImplementedException(); set => throw new NotImplementedException(); }
+    }
+}
diff --git a/src/libraries/System.Net.WebSockets.Client/tests/LoopbackServer/LoopbackWebSocketServer.cs b/src/libraries/System.Net.WebSockets.Client/tests/LoopbackServer/LoopbackWebSocketServer.cs
new file mode 100644
index 000000000000..b24e2e20d40d
--- /dev/null
+++ b/src/libraries/System.Net.WebSockets.Client/tests/LoopbackServer/LoopbackWebSocketServer.cs
@@ -0,0 +1,148 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Net.Http;
+using System.Net.Test.Common;
+using System.Threading;
+using System.Threading.Tasks;
+using Xunit;
+
+namespace System.Net.WebSockets.Client.Tests
+{
+    public static class LoopbackWebSocketServer
+    {
+        public static Task RunAsync(
+            Func<ClientWebSocket, CancellationToken, Task> clientWebSocketFunc,
+            Func<WebSocket, CancellationToken, Task> serverWebSocketFunc,
+            Options options,
+            CancellationToken cancellationToken)
+        {
+            Assert.False(options.ManualServerHandshakeResponse, "Not supported in this overload");
+
+            return RunAsyncPrivate(
+                uri => RunClientAsync(uri, clientWebSocketFunc, options, cancellationToken),
+                (requestData, token) => RunServerAsync(requestData, serverWebSocketFunc, options, token),
+                options,
+                cancellationToken);
+        }
+
+        public static Task RunAsync(
+            Func<Uri, Task> loopbackClientFunc,
+            Func<WebSocketRequestData, CancellationToken, Task> loopbackServerFunc,
+            Options options,
+            CancellationToken cancellationToken)
+        {
+            Assert.False(options.DisposeClientWebSocket, "Not supported in this overload");
+            Assert.False(options.DisposeServerWebSocket, "Not supported in this overload");
+            Assert.False(options.DisposeHttpInvoker, "Not supported in this overload");
+            Assert.Null(options.HttpInvoker); // Not supported in this overload
+
+            return RunAsyncPrivate(loopbackClientFunc, loopbackServerFunc, options, cancellationToken);
+        }
+
+        private static Task RunAsyncPrivate(
+            Func<Uri, Task> loopbackClientFunc,
+            Func<WebSocketRequestData, CancellationToken, Task> loopbackServerFunc,
+            Options options,
+            CancellationToken cancellationToken)
+        {
+            bool sendDefaultServerHandshakeResponse = !options.ManualServerHandshakeResponse;
+            if (options.HttpVersion == HttpVersion.Version11)
+            {
+                return LoopbackServer.CreateClientAndServerAsync(
+                    loopbackClientFunc,
+                    async server =>
+                    {
+                        await server.AcceptConnectionAsync(async connection =>
+                        {
+                            var requestData = await WebSocketHandshakeHelper.ProcessHttp11RequestAsync(connection, sendDefaultServerHandshakeResponse, cancellationToken).ConfigureAwait(false);
+                            await loopbackServerFunc(requestData, cancellationToken).ConfigureAwait(false);
+                        });
+                    },
+                    new LoopbackServer.Options { WebSocketEndpoint = true, UseSsl = options.UseSsl });
+            }
+            else if (options.HttpVersion == HttpVersion.Version20)
+            {
+                return Http2LoopbackServer.CreateClientAndServerAsync(
+                    loopbackClientFunc,
+                    async server =>
+                    {
+                        var requestData = await WebSocketHandshakeHelper.ProcessHttp2RequestAsync(server, sendDefaultServerHandshakeResponse, cancellationToken).ConfigureAwait(false);
+                        var http2Connection = requestData.Http2Connection!;
+                        var http2StreamId = requestData.Http2StreamId.Value;
+
+                        await loopbackServerFunc(requestData, cancellationToken).ConfigureAwait(false);
+
+                        await http2Connection.DisposeAsync().ConfigureAwait(false);
+                    },
+                    new Http2Options { WebSocketEndpoint = true, UseSsl = options.UseSsl });
+            }
+            else
+            {
+                throw new ArgumentException(nameof(options.HttpVersion));
+            }
+        }
+
+        private static async Task RunServerAsync(
+            WebSocketRequestData requestData,
+            Func<WebSocket, CancellationToken, Task> serverWebSocketFunc,
+            Options options,
+            CancellationToken cancellationToken)
+        {
+            var wsOptions = new WebSocketCreationOptions { IsServer = true };
+            var serverWebSocket = WebSocket.CreateFromStream(requestData.WebSocketStream, wsOptions);
+
+            await serverWebSocketFunc(serverWebSocket, cancellationToken).ConfigureAwait(false);
+
+            if (options.DisposeServerWebSocket)
+            {
+                serverWebSocket.Dispose();
+            }
+        }
+
+        private static async Task RunClientAsync(
+            Uri uri,
+            Func<ClientWebSocket, CancellationToken, Task> clientWebSocketFunc,
+            Options options,
+            CancellationToken cancellationToken)
+        {
+            var clientWebSocket = await GetConnectedClientAsync(uri, options, cancellationToken).ConfigureAwait(false);
+
+            await clientWebSocketFunc(clientWebSocket, cancellationToken).ConfigureAwait(false);
+
+            if (options.DisposeClientWebSocket)
+            {
+                clientWebSocket.Dispose();
+            }
+
+            if (options.DisposeHttpInvoker)
+            {
+                options.HttpInvoker?.Dispose();
+            }
+        }
+
+        public static async Task<ClientWebSocket> GetConnectedClientAsync(Uri uri, Options options, CancellationToken cancellationToken)
+        {
+            var clientWebSocket = new ClientWebSocket();
+            clientWebSocket.Options.HttpVersion = options.HttpVersion;
+            clientWebSocket.Options.HttpVersionPolicy = HttpVersionPolicy.RequestVersionExact;
+
+            if (options.UseSsl && options.HttpInvoker is null)
+            {
+                clientWebSocket.Options.RemoteCertificateValidationCallback = delegate { return true; };
+            }
+
+            await clientWebSocket.ConnectAsync(uri, options.HttpInvoker, cancellationToken).ConfigureAwait(false);
+
+            return clientWebSocket;
+        }
+
+        public record class Options(Version HttpVersion, bool UseSsl, HttpMessageInvoker? HttpInvoker)
+        {
+            public bool DisposeServerWebSocket { get; set; } = true;
+            public bool DisposeClientWebSocket { get; set; }
+            public bool DisposeHttpInvoker { get; set; }
+            public bool ManualServerHandshakeResponse { get; set; }
+        }
+    }
+}
diff --git a/src/libraries/System.Net.WebSockets.Client/tests/LoopbackServer/WebSocketHandshakeHelper.cs b/src/libraries/System.Net.WebSockets.Client/tests/LoopbackServer/WebSocketHandshakeHelper.cs
new file mode 100644
index 000000000000..2a8c84e7de8e
--- /dev/null
+++ b/src/libraries/System.Net.WebSockets.Client/tests/LoopbackServer/WebSocketHandshakeHelper.cs
@@ -0,0 +1,135 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Linq;
+using System.Net.Http;
+using System.Net.Sockets;
+using System.Net.Test.Common;
+using System.Threading;
+using System.Threading.Tasks;
+using Xunit;
+
+namespace System.Net.WebSockets.Client.Tests
+{
+    public static class WebSocketHandshakeHelper
+    {
+        public static async Task<WebSocketRequestData> ProcessHttp11RequestAsync(LoopbackServer.Connection connection, bool sendServerResponse = true, CancellationToken cancellationToken = default)
+        {
+            List<string> headers = await connection.ReadRequestHeaderAsync().WaitAsync(cancellationToken).ConfigureAwait(false);
+
+            var data = new WebSocketRequestData()
+            {
+                HttpVersion = HttpVersion.Version11,
+                Http11Connection = connection
+            };
+
+            foreach (string header in headers.Skip(1))
+            {
+                string[] tokens = header.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries);
+                if (tokens.Length is 1 or 2)
+                {
+                    data.Headers.Add(
+                        tokens[0].Trim(),
+                        tokens.Length == 2 ? tokens[1].Trim() : null);
+                }
+            }
+
+            var isValidOpeningHandshake = data.Headers.TryGetValue("Sec-WebSocket-Key", out var secWebSocketKey);
+            Assert.True(isValidOpeningHandshake);
+
+            if (sendServerResponse)
+            {
+                await SendHttp11ServerResponseAsync(connection, secWebSocketKey, cancellationToken).ConfigureAwait(false);
+            }
+
+            data.WebSocketStream = connection.Stream;
+            return data;
+        }
+
+        private static async Task SendHttp11ServerResponseAsync(LoopbackServer.Connection connection, string secWebSocketKey, CancellationToken cancellationToken)
+        {
+            var serverResponse = LoopbackHelper.GetServerResponseString(secWebSocketKey);
+            await connection.WriteStringAsync(serverResponse).WaitAsync(cancellationToken).ConfigureAwait(false);
+        }
+
+        public static async Task<WebSocketRequestData> ProcessHttp2RequestAsync(Http2LoopbackServer server, bool sendServerResponse = true, CancellationToken cancellationToken = default)
+        {
+            var connection = await server.EstablishConnectionAsync(new SettingsEntry { SettingId = SettingId.EnableConnect, Value = 1 })
+                .WaitAsync(cancellationToken).ConfigureAwait(false);
+            connection.IgnoreWindowUpdates();
+
+            (int streamId, var httpRequestData) = await connection.ReadAndParseRequestHeaderAsync(readBody: false)
+                .WaitAsync(cancellationToken).ConfigureAwait(false);
+
+            var data = new WebSocketRequestData
+            {
+                HttpVersion = HttpVersion.Version20,
+                Http2Connection = connection,
+                Http2StreamId = streamId
+            };
+
+            foreach (var header in httpRequestData.Headers)
+            {
+                Assert.NotNull(header.Name);
+                data.Headers.Add(header.Name, header.Value);
+            }
+
+            var isValidOpeningHandshake = httpRequestData.Method == HttpMethod.Connect.ToString() && data.Headers.ContainsKey(":protocol");
+            Assert.True(isValidOpeningHandshake);
+
+            if (sendServerResponse)
+            {
+                await SendHttp2ServerResponseAsync(connection, streamId, cancellationToken: cancellationToken).ConfigureAwait(false);
+            }
+
+            data.WebSocketStream = new Http2LoopbackStream(connection, streamId);
+            return data;
+        }
+
+        private static async Task SendHttp2ServerResponseAsync(Http2LoopbackConnection connection, int streamId, bool endStream = false, CancellationToken cancellationToken = default)
+        {
+            // send status 200 OK to establish websocket
+            // we don't need to send anything additional as Sec-WebSocket-Key is not used for HTTP/2
+            // note: endStream=true is abnormal and used for testing premature EOS scenarios only
+            await connection.SendResponseHeadersAsync(streamId, endStream: endStream).WaitAsync(cancellationToken).ConfigureAwait(false);
+        }
+
+        public static async Task SendHttp11ServerResponseAndEosAsync(WebSocketRequestData requestData, Func<WebSocketRequestData, CancellationToken, Task>? requestDataCallback, CancellationToken cancellationToken)
+        {
+            Assert.Equal(HttpVersion.Version11, requestData.HttpVersion);
+
+            // sending default handshake response
+            await SendHttp11ServerResponseAsync(requestData.Http11Connection!, requestData.Headers["Sec-WebSocket-Key"], cancellationToken).ConfigureAwait(false);
+
+            if (requestDataCallback is not null)
+            {
+                await requestDataCallback(requestData, cancellationToken).ConfigureAwait(false);
+            }
+
+            // send server EOS (half-closing from server side)
+            requestData.Http11Connection!.Socket.Shutdown(SocketShutdown.Send);
+        }
+
+        public static async Task SendHttp2ServerResponseAndEosAsync(WebSocketRequestData requestData, bool eosInHeadersFrame, Func<WebSocketRequestData, CancellationToken, Task>? requestDataCallback, CancellationToken cancellationToken)
+        {
+            Assert.Equal(HttpVersion.Version20, requestData.HttpVersion);
+
+            var connection = requestData.Http2Connection!;
+            var streamId = requestData.Http2StreamId!.Value;
+
+            await SendHttp2ServerResponseAsync(connection, streamId, endStream: eosInHeadersFrame, cancellationToken).ConfigureAwait(false);
+
+            if (requestDataCallback is not null)
+            {
+                await requestDataCallback(requestData, cancellationToken).ConfigureAwait(false);
+            }
+
+            if (!eosInHeadersFrame)
+            {
+                // send server EOS (half-closing from server side)
+                await connection.SendResponseDataAsync(streamId, Array.Empty<byte>(), endStream: true).ConfigureAwait(false);
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Net.WebSockets.Client/tests/LoopbackServer/WebSocketRequestData.cs b/src/libraries/System.Net.WebSockets.Client/tests/LoopbackServer/WebSocketRequestData.cs
new file mode 100644
index 000000000000..799157a370f0
--- /dev/null
+++ b/src/libraries/System.Net.WebSockets.Client/tests/LoopbackServer/WebSocketRequestData.cs
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.IO;
+using System.Net.Test.Common;
+
+namespace System.Net.WebSockets.Client.Tests
+{
+    public class WebSocketRequestData
+    {
+        public Dictionary<string, string?> Headers { get; set; } = new Dictionary<string, string?>();
+        public Stream? WebSocketStream { get; set; }
+
+        public Version HttpVersion { get; set; }
+        public LoopbackServer.Connection? Http11Connection { get; set; }
+        public Http2LoopbackConnection? Http2Connection { get; set; }
+        public int? Http2StreamId { get; set; }
+    }
+}
diff --git a/src/libraries/System.Net.WebSockets.Client/tests/System.Net.WebSockets.Client.Tests.csproj b/src/libraries/System.Net.WebSockets.Client/tests/System.Net.WebSockets.Client.Tests.csproj
index 3ccc01e5e63f..2c7762cc6bc8 100644
--- a/src/libraries/System.Net.WebSockets.Client/tests/System.Net.WebSockets.Client.Tests.csproj
+++ b/src/libraries/System.Net.WebSockets.Client/tests/System.Net.WebSockets.Client.Tests.csproj
@@ -13,10 +13,11 @@
     <TestArchiveTestsRoot>$(TestArchiveRoot)browserornodejs/</TestArchiveTestsRoot>
     <TestArchiveTestsDir>$(TestArchiveTestsRoot)$(OSPlatformConfig)/</TestArchiveTestsDir>
     <DefineConstants>$(DefineConstants);TARGET_BROWSER</DefineConstants>
-    <WasmXHarnessMonoArgs>--setenv=XHARNESS_LOG_TEST_START=true</WasmXHarnessMonoArgs>
+    <XunitShowProgress>true</XunitShowProgress>
 
     <!-- This WASM test is problematic and slow right now. This sets the xharness timeout but there is also override in sendtohelix-browser.targets -->
     <WasmXHarnessTestsTimeout>01:15:00</WasmXHarnessTestsTimeout>
+    <WasmXHarnessMaxParallelThreads>1</WasmXHarnessMaxParallelThreads>
   </PropertyGroup>
 
   <ItemGroup>
@@ -52,6 +53,7 @@
     <Compile Include="$(CommonTestPath)System\Security\Cryptography\PlatformSupport.cs" Link="CommonTest\System\Security\Cryptography\PlatformSupport.cs" />
     <Compile Include="$(CommonTestPath)System\Threading\Tasks\TaskTimeoutExtensions.cs" Link="Common\System\Threading\Tasks\TaskTimeoutExtensions.cs" />
     <Compile Include="AbortTest.cs" />
+    <Compile Include="AbortTest.Loopback.cs" />
     <Compile Include="CancelTest.cs" />
     <Compile Include="ClientWebSocketOptionsTests.cs" />
     <Compile Include="ClientWebSocketTestBase.cs" />
@@ -61,6 +63,10 @@
     <Compile Include="ConnectTest.cs" />
     <Compile Include="KeepAliveTest.cs" />
     <Compile Include="LoopbackHelper.cs" />
+    <Compile Include="LoopbackServer\Http2LoopbackStream.cs" />
+    <Compile Include="LoopbackServer\LoopbackWebSocketServer.cs" />
+    <Compile Include="LoopbackServer\WebSocketHandshakeHelper.cs" />
+    <Compile Include="LoopbackServer\WebSocketRequestData.cs" />
     <Compile Include="ResourceHelper.cs" />
     <Compile Include="SendReceiveTest.cs" />
     <Compile Include="SendReceiveTest.Http2.cs" />
diff --git a/src/libraries/System.Numerics.Tensors/src/System.Numerics.Tensors.csproj b/src/libraries/System.Numerics.Tensors/src/System.Numerics.Tensors.csproj
index ae18ca1b2d93..f5bb6c851dfb 100644
--- a/src/libraries/System.Numerics.Tensors/src/System.Numerics.Tensors.csproj
+++ b/src/libraries/System.Numerics.Tensors/src/System.Numerics.Tensors.csproj
@@ -1,4 +1,4 @@
-<Project Sdk="Microsoft.NET.Sdk">
+﻿<Project Sdk="Microsoft.NET.Sdk">
   
   <PropertyGroup>
     <TargetFrameworks>$(NetCoreAppCurrent);$(NetCoreAppPrevious);$(NetCoreAppMinimum);netstandard2.0;$(NetFrameworkMinimum)</TargetFrameworks>
@@ -15,9 +15,108 @@
   </ItemGroup>
 
   <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETCoreApp'">
+    <Compile Include="System\Numerics\Tensors\netcore\Common\TensorPrimitives.IAggregationOperator.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\Common\TensorPrimitives.IBinaryOperator.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\Common\TensorPrimitives.IIndexOfOperator.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\Common\TensorPrimitives.IStatefulUnaryOperator.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\Common\TensorPrimitives.ITernaryOperator.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryInputBinaryOutput.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryOneToTwoOperator.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryOperator.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryTwoToOneOperator.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Abs.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Acos.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Acosh.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.AcosPi.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Add.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.AddMultiply.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Asin.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Asinh.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.AsinPi.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Atan.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Atan2.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Atan2Pi.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Atanh.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.AtanPi.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.BitwiseAnd.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.BitwiseOr.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Cbrt.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Ceiling.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.ConvertChecked.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.ConvertHelpers.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.ConvertSaturating.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.ConvertTruncating.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.CopySign.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Cos.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Cosh.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.CosineSimilarity.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.CosPi.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.DegreesToRadians.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Distance.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Divide.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Dot.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Exp.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Exp10.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Exp10M1.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Exp2.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Exp2M1.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.ExpM1.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.FloatHelpers.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Floor.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.FusedMultiplyAdd.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Half.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Hypot.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Ieee754Remainder.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.ILogB.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.IndexOfMax.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.IndexOfMaxMagnitude.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.IndexOfMin.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.IndexOfMinMagnitude.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.LeadingZeroCount.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Lerp.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Log.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Log10.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Log10P1.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Log2.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Log2P1.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.LogP1.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Max.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.MaxMagnitude.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Min.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.MinMagnitude.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Multiply.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.MultiplyAdd.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.MultiplyAddEstimate.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Negate.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Norm.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.OnesComplement.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.PopCount.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Pow.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Product.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.RadiansToDegrees.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Reciprocal.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.RootN.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Rotate.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Round.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.ScaleB.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.ShiftLeft.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Sigmoid.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Sin.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.SinCos.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.SinCosPi.cs" />
     <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Single.netcore.cs" />
-    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.T.cs" />
-    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.netcore.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Sinh.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.SinPi.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.SoftMax.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Sqrt.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Subtract.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Sum.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Tan.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Tanh.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.TanPi.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.TrailingZeroCount.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Truncate.cs" />
+    <Compile Include="System\Numerics\Tensors\netcore\TensorPrimitives.Xor.cs" />
   </ItemGroup>
 
   <ItemGroup Condition="'$(TargetFrameworkIdentifier)' != '.NETCoreApp'">
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.Helpers.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.Helpers.cs
index 4b5f40cec39d..1ddabb93dd0e 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.Helpers.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.Helpers.cs
@@ -6,6 +6,8 @@
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
 namespace System.Numerics.Tensors
 {
     /// <summary>Performs primitive tensor operations over spans of memory.</summary>
@@ -23,7 +25,31 @@ private static void ValidateInputOutputSpanNonOverlapping<T>(ReadOnlySpan<T> inp
         }
 
         /// <summary>Throws an <see cref="OverflowException"/> for trying to negate the minimum value of a two-complement value.</summary>
-        internal static void ThrowNegateTwosCompOverflow() => throw new OverflowException(SR.Overflow_NegateTwosCompNum);
+        private static void ThrowNegateTwosCompOverflow() => throw new OverflowException(SR.Overflow_NegateTwosCompNum);
+
+        /// <summary>Creates a span of <typeparamref name="TTo"/> from a <typeparamref name="TFrom"/> when they're the same type.</summary>
+        /// <remarks>
+        /// This is the same as MemoryMarshal.Cast, except only to be used when TFrom and TTo are the same type or effectively
+        /// the same type (e.g. int and nint in a 32-bit process). MemoryMarshal.Cast can't currently be used as it's
+        /// TFrom/TTo are constrained to be value types.
+        /// </remarks>
+        private static unsafe Span<TTo> Rename<TFrom, TTo>(Span<TFrom> span)
+        {
+            Debug.Assert(sizeof(TFrom) == sizeof(TTo));
+            return *(Span<TTo>*)(&span);
+        }
+
+        /// <summary>Creates a span of <typeparamref name="TTo"/> from a <typeparamref name="TFrom"/> when they're the same type.</summary>
+        /// <remarks>
+        /// This is the same as MemoryMarshal.Cast, except only to be used when TFrom and TTo are the same type or effectively
+        /// the same type (e.g. int and nint in a 32-bit process). MemoryMarshal.Cast can't currently be used as it's
+        /// TFrom/TTo are constrained to be value types.
+        /// </remarks>
+        private static unsafe ReadOnlySpan<TTo> Rename<TFrom, TTo>(ReadOnlySpan<TFrom> span)
+        {
+            Debug.Assert(sizeof(TFrom) == sizeof(TTo));
+            return *(ReadOnlySpan<TTo>*)(&span);
+        }
 
         /// <summary>Mask used to handle alignment elements before vectorized handling of the input.</summary>
         /// <remarks>
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IAggregationOperator.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IAggregationOperator.cs
new file mode 100644
index 000000000000..b91d9d2038fe
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IAggregationOperator.cs
@@ -0,0 +1,2490 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static unsafe partial class TensorPrimitives
+    {
+        /// <summary><see cref="IBinaryOperator{T}"/> that specializes horizontal aggregation of all elements in a vector.</summary>
+        private interface IAggregationOperator<T> : IBinaryOperator<T>
+        {
+            static abstract T Invoke(Vector128<T> x);
+            static abstract T Invoke(Vector256<T> x);
+            static abstract T Invoke(Vector512<T> x);
+
+            static virtual T IdentityValue => throw new NotSupportedException();
+        }
+
+        /// <summary>Performs an aggregation over all elements in <paramref name="x"/> to produce a single-precision floating-point value.</summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TTransformOperator">Specifies the transform operation that should be applied to each element loaded from <paramref name="x"/>.</typeparam>
+        /// <typeparam name="TAggregationOperator">
+        /// Specifies the aggregation binary operation that should be applied to multiple values to aggregate them into a single value.
+        /// The aggregation is applied after the transform is applied to each element.
+        /// </typeparam>
+        private static T Aggregate<T, TTransformOperator, TAggregationOperator>(
+            ReadOnlySpan<T> x)
+            where TTransformOperator : struct, IUnaryOperator<T, T>
+            where TAggregationOperator : struct, IAggregationOperator<T>
+        {
+            // Since every branch has a cost and since that cost is
+            // essentially lost for larger inputs, we do branches
+            // in a way that allows us to have the minimum possible
+            // for small sizes
+
+            ref T xRef = ref MemoryMarshal.GetReference(x);
+
+            nuint remainder = (uint)x.Length;
+
+            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TTransformOperator.Vectorizable)
+            {
+                T result;
+
+                if (remainder >= (uint)Vector512<T>.Count)
+                {
+                    result = Vectorized512(ref xRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    result = VectorizedSmall(ref xRef, remainder);
+                }
+
+                return result;
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && TTransformOperator.Vectorizable)
+            {
+                T result;
+
+                if (remainder >= (uint)Vector256<T>.Count)
+                {
+                    result = Vectorized256(ref xRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    result = VectorizedSmall(ref xRef, remainder);
+                }
+
+                return result;
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TTransformOperator.Vectorizable)
+            {
+                T result;
+
+                if (remainder >= (uint)Vector128<T>.Count)
+                {
+                    result = Vectorized128(ref xRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    result = VectorizedSmall(ref xRef, remainder);
+                }
+
+                return result;
+            }
+
+            // This is the software fallback when no acceleration is available.
+            // It requires no branches to hit.
+
+            return SoftwareFallback(ref xRef, remainder);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static T SoftwareFallback(ref T xRef, nuint length)
+            {
+                T result = TAggregationOperator.IdentityValue;
+
+                for (nuint i = 0; i < length; i++)
+                {
+                    result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, i)));
+                }
+
+                return result;
+            }
+
+            static T Vectorized128(ref T xRef, nuint remainder)
+            {
+                Vector128<T> vresult = Vector128.Create(TAggregationOperator.IdentityValue);
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                Vector128<T> end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
+
+                nuint misalignment = 0;
+
+                if (remainder > (uint)(Vector128<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    {
+                        T* xPtr = px;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)xPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+
+                            Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector128<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector128<T> vector1;
+                        Vector128<T> vector2;
+                        Vector128<T> vector3;
+                        Vector128<T> vector4;
+
+                        // We only need to load, so there isn't a lot of benefit to doing non-temporal operations
+
+                        while (remainder >= (uint)(Vector128<T>.Count * 8))
+                        {
+                            // We load, process, and store the first four vectors
+
+                            vector1 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)));
+                            vector2 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)));
+                            vector3 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)));
+                            vector4 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)));
+
+                            vresult = TAggregationOperator.Invoke(vresult, vector1);
+                            vresult = TAggregationOperator.Invoke(vresult, vector2);
+                            vresult = TAggregationOperator.Invoke(vresult, vector3);
+                            vresult = TAggregationOperator.Invoke(vresult, vector4);
+
+                            // We load, process, and store the next four vectors
+
+                            vector1 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)));
+                            vector2 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)));
+                            vector3 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)));
+                            vector4 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)));
+
+                            vresult = TAggregationOperator.Invoke(vresult, vector1);
+                            vresult = TAggregationOperator.Invoke(vresult, vector2);
+                            vresult = TAggregationOperator.Invoke(vresult, vector3);
+                            vresult = TAggregationOperator.Invoke(vresult, vector4);
+
+                            // We adjust the source and destination references, then update
+                            // the count of remaining elements to process.
+
+                            xPtr += (uint)(Vector128<T>.Count * 8);
+
+                            remainder -= (uint)(Vector128<T>.Count * 8);
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                    }
+                }
+
+                // Store the first block. Handling this separately simplifies the latter code as we know
+                // they come after and so we can relegate it to full blocks or the trailing elements
+
+                beg = Vector128.ConditionalSelect(CreateAlignmentMaskVector128<T>((int)misalignment), beg, Vector128.Create(TAggregationOperator.IdentityValue));
+                vresult = TAggregationOperator.Invoke(vresult, beg);
+
+                // Process the remaining [0, Count * 7] elements via a jump table
+                //
+                // We end up handling any trailing elements in case 0 and in the
+                // worst case end up just doing the identity operation here if there
+                // were no trailing elements.
+
+                (nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector128<T>.Count);
+                blocks -= (misalignment == 0) ? 1u : 0u;
+                remainder -= trailing;
+
+                switch (blocks)
+                {
+                    case 7:
+                        {
+                            Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 1)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)trailing), end, Vector128.Create(TAggregationOperator.IdentityValue));
+                            vresult = TAggregationOperator.Invoke(vresult, end);
+                            break;
+                        }
+                }
+
+                return TAggregationOperator.Invoke(vresult);
+            }
+
+            static T Vectorized256(ref T xRef, nuint remainder)
+            {
+                Vector256<T> vresult = Vector256.Create(TAggregationOperator.IdentityValue);
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                Vector256<T> end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
+
+                nuint misalignment = 0;
+
+                if (remainder > (uint)(Vector256<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    {
+                        T* xPtr = px;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)xPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+
+                            Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector256<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector256<T> vector1;
+                        Vector256<T> vector2;
+                        Vector256<T> vector3;
+                        Vector256<T> vector4;
+
+                        // We only need to load, so there isn't a lot of benefit to doing non-temporal operations
+
+                        while (remainder >= (uint)(Vector256<T>.Count * 8))
+                        {
+                            // We load, process, and store the first four vectors
+
+                            vector1 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)));
+                            vector2 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)));
+                            vector3 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)));
+                            vector4 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)));
+
+                            vresult = TAggregationOperator.Invoke(vresult, vector1);
+                            vresult = TAggregationOperator.Invoke(vresult, vector2);
+                            vresult = TAggregationOperator.Invoke(vresult, vector3);
+                            vresult = TAggregationOperator.Invoke(vresult, vector4);
+
+                            // We load, process, and store the next four vectors
+
+                            vector1 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)));
+                            vector2 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)));
+                            vector3 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)));
+                            vector4 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)));
+
+                            vresult = TAggregationOperator.Invoke(vresult, vector1);
+                            vresult = TAggregationOperator.Invoke(vresult, vector2);
+                            vresult = TAggregationOperator.Invoke(vresult, vector3);
+                            vresult = TAggregationOperator.Invoke(vresult, vector4);
+
+                            // We adjust the source and destination references, then update
+                            // the count of remaining elements to process.
+
+                            xPtr += (uint)(Vector256<T>.Count * 8);
+
+                            remainder -= (uint)(Vector256<T>.Count * 8);
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                    }
+                }
+
+                // Store the first block. Handling this separately simplifies the latter code as we know
+                // they come after and so we can relegate it to full blocks or the trailing elements
+
+                beg = Vector256.ConditionalSelect(CreateAlignmentMaskVector256<T>((int)misalignment), beg, Vector256.Create(TAggregationOperator.IdentityValue));
+                vresult = TAggregationOperator.Invoke(vresult, beg);
+
+                // Process the remaining [0, Count * 7] elements via a jump table
+                //
+                // We end up handling any trailing elements in case 0 and in the
+                // worst case end up just doing the identity operation here if there
+                // were no trailing elements.
+
+                (nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector256<T>.Count);
+                blocks -= (misalignment == 0) ? 1u : 0u;
+                remainder -= trailing;
+
+                switch (blocks)
+                {
+                    case 7:
+                        {
+                            Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 1)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)trailing), end, Vector256.Create(TAggregationOperator.IdentityValue));
+                            vresult = TAggregationOperator.Invoke(vresult, end);
+                            break;
+                        }
+                }
+
+                return TAggregationOperator.Invoke(vresult);
+            }
+
+            static T Vectorized512(ref T xRef, nuint remainder)
+            {
+                Vector512<T> vresult = Vector512.Create(TAggregationOperator.IdentityValue);
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector512<T> beg = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef));
+                Vector512<T> end = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count));
+
+                nuint misalignment = 0;
+
+                if (remainder > (uint)(Vector512<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    {
+                        T* xPtr = px;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)xPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+
+                            Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector512<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector512<T> vector1;
+                        Vector512<T> vector2;
+                        Vector512<T> vector3;
+                        Vector512<T> vector4;
+
+                        // We only need to load, so there isn't a lot of benefit to doing non-temporal operations
+
+                        while (remainder >= (uint)(Vector512<T>.Count * 8))
+                        {
+                            // We load, process, and store the first four vectors
+
+                            vector1 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)));
+                            vector2 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)));
+                            vector3 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)));
+                            vector4 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)));
+
+                            vresult = TAggregationOperator.Invoke(vresult, vector1);
+                            vresult = TAggregationOperator.Invoke(vresult, vector2);
+                            vresult = TAggregationOperator.Invoke(vresult, vector3);
+                            vresult = TAggregationOperator.Invoke(vresult, vector4);
+
+                            // We load, process, and store the next four vectors
+
+                            vector1 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)));
+                            vector2 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)));
+                            vector3 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)));
+                            vector4 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)));
+
+                            vresult = TAggregationOperator.Invoke(vresult, vector1);
+                            vresult = TAggregationOperator.Invoke(vresult, vector2);
+                            vresult = TAggregationOperator.Invoke(vresult, vector3);
+                            vresult = TAggregationOperator.Invoke(vresult, vector4);
+
+                            // We adjust the source and destination references, then update
+                            // the count of remaining elements to process.
+
+                            xPtr += (uint)(Vector512<T>.Count * 8);
+
+                            remainder -= (uint)(Vector512<T>.Count * 8);
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                    }
+                }
+
+                // Store the first block. Handling this separately simplifies the latter code as we know
+                // they come after and so we can relegate it to full blocks or the trailing elements
+
+                beg = Vector512.ConditionalSelect(CreateAlignmentMaskVector512<T>((int)misalignment), beg, Vector512.Create(TAggregationOperator.IdentityValue));
+                vresult = TAggregationOperator.Invoke(vresult, beg);
+
+                // Process the remaining [0, Count * 7] elements via a jump table
+                //
+                // We end up handling any trailing elements in case 0 and in the
+                // worst case end up just doing the identity operation here if there
+                // were no trailing elements.
+
+                (nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector512<T>.Count);
+                blocks -= (misalignment == 0) ? 1u : 0u;
+                remainder -= trailing;
+
+                switch (blocks)
+                {
+                    case 7:
+                        {
+                            Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 1)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end = Vector512.ConditionalSelect(CreateRemainderMaskVector512<T>((int)trailing), end, Vector512.Create(TAggregationOperator.IdentityValue));
+                            vresult = TAggregationOperator.Invoke(vresult, end);
+                            break;
+                        }
+                }
+
+                return TAggregationOperator.Invoke(vresult);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static T VectorizedSmall(ref T xRef, nuint remainder)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return VectorizedSmall1(ref xRef, remainder);
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return VectorizedSmall2(ref xRef, remainder);
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return VectorizedSmall4(ref xRef, remainder);
+                }
+                else
+                {
+                    Debug.Assert(sizeof(T) == 8);
+                    return VectorizedSmall8(ref xRef, remainder);
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static T VectorizedSmall1(ref T xRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 1);
+                T result = TAggregationOperator.IdentityValue;
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 63:
+                    case 62:
+                    case 61:
+                    case 60:
+                    case 59:
+                    case 58:
+                    case 57:
+                    case 56:
+                    case 55:
+                    case 54:
+                    case 53:
+                    case 52:
+                    case 51:
+                    case 50:
+                    case 49:
+                    case 48:
+                    case 47:
+                    case 46:
+                    case 45:
+                    case 44:
+                    case 43:
+                    case 42:
+                    case 41:
+                    case 40:
+                    case 39:
+                    case 38:
+                    case 37:
+                    case 36:
+                    case 35:
+                    case 34:
+                    case 33:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            Vector256<T> end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
+
+                            end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 32:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            Vector128<T> end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
+
+                            end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 15:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 14)));
+                        goto case 14;
+
+                    case 14:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 13)));
+                        goto case 13;
+
+                    case 13:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 12)));
+                        goto case 12;
+
+                    case 12:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 11)));
+                        goto case 11;
+
+                    case 11:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 10)));
+                        goto case 10;
+
+                    case 10:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 9)));
+                        goto case 9;
+
+                    case 9:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 8)));
+                        goto case 8;
+
+                    case 8:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 7)));
+                        goto case 7;
+
+                    case 7:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 6)));
+                        goto case 6;
+
+                    case 6:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 5)));
+                        goto case 5;
+
+                    case 5:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 4)));
+                        goto case 4;
+
+                    case 4:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 3)));
+                        goto case 3;
+
+                    case 3:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)));
+                        goto case 2;
+
+                    case 2:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)));
+                        goto case 1;
+
+                    case 1:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef));
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static T VectorizedSmall2(ref T xRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 2);
+                T result = TAggregationOperator.IdentityValue;
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            Vector256<T> end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
+
+                            end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            Vector128<T> end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
+
+                            end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 8:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 7:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 6)));
+                        goto case 6;
+
+                    case 6:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 5)));
+                        goto case 5;
+
+                    case 5:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 4)));
+                        goto case 4;
+
+                    case 4:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 3)));
+                        goto case 3;
+
+                    case 3:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)));
+                        goto case 2;
+
+                    case 2:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)));
+                        goto case 1;
+
+                    case 1:
+                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef));
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static T VectorizedSmall4(ref T xRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 4);
+                T result = TAggregationOperator.IdentityValue;
+
+                switch (remainder)
+                {
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            Vector256<T> end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
+
+                            end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    case 8:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            Vector128<T> end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
+
+                            end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef));
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static T VectorizedSmall8(ref T xRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 8);
+                T result = TAggregationOperator.IdentityValue;
+
+                switch (remainder)
+                {
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            Vector256<T> end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
+
+                            end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            Vector128<T> end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
+
+                            end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    case 2:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    case 1:
+                        {
+                            result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef));
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+
+                return result;
+            }
+        }
+
+        /// <summary>Performs an aggregation over all pair-wise elements in <paramref name="x"/> and <paramref name="y"/> to produce a single-precision floating-point value.</summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TBinaryOperator">Specifies the binary operation that should be applied to the pair-wise elements loaded from <paramref name="x"/> and <paramref name="y"/>.</typeparam>
+        /// <typeparam name="TAggregationOperator">
+        /// Specifies the aggregation binary operation that should be applied to multiple values to aggregate them into a single value.
+        /// The aggregation is applied to the results of the binary operations on the pair-wise values.
+        /// </typeparam>
+        private static T Aggregate<T, TBinaryOperator, TAggregationOperator>(
+            ReadOnlySpan<T> x, ReadOnlySpan<T> y)
+            where TBinaryOperator : struct, IBinaryOperator<T>
+            where TAggregationOperator : struct, IAggregationOperator<T>
+        {
+            if (x.Length != y.Length)
+            {
+                ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
+            }
+
+            // Since every branch has a cost and since that cost is
+            // essentially lost for larger inputs, we do branches
+            // in a way that allows us to have the minimum possible
+            // for small sizes
+
+            ref T xRef = ref MemoryMarshal.GetReference(x);
+            ref T yRef = ref MemoryMarshal.GetReference(y);
+
+            nuint remainder = (uint)x.Length;
+
+            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TBinaryOperator.Vectorizable)
+            {
+                T result;
+
+                if (remainder >= (uint)Vector512<T>.Count)
+                {
+                    result = Vectorized512(ref xRef, ref yRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    result = VectorizedSmall(ref xRef, ref yRef, remainder);
+                }
+
+                return result;
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && TBinaryOperator.Vectorizable)
+            {
+                T result;
+
+                if (remainder >= (uint)Vector256<T>.Count)
+                {
+                    result = Vectorized256(ref xRef, ref yRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    result = VectorizedSmall(ref xRef, ref yRef, remainder);
+                }
+
+                return result;
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TBinaryOperator.Vectorizable)
+            {
+                T result;
+
+                if (remainder >= (uint)Vector128<T>.Count)
+                {
+                    result = Vectorized128(ref xRef, ref yRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    result = VectorizedSmall(ref xRef, ref yRef, remainder);
+                }
+
+                return result;
+            }
+
+            // This is the software fallback when no acceleration is available
+            // It requires no branches to hit
+
+            return SoftwareFallback(ref xRef, ref yRef, remainder);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static T SoftwareFallback(ref T xRef, ref T yRef, nuint length)
+            {
+                T result = TAggregationOperator.IdentityValue;
+
+                for (nuint i = 0; i < length; i++)
+                {
+                    result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, i),
+                                                                                        Unsafe.Add(ref yRef, i)));
+                }
+
+                return result;
+            }
+
+            static T Vectorized128(ref T xRef, ref T yRef, nuint remainder)
+            {
+                Vector128<T> vresult = Vector128.Create(TAggregationOperator.IdentityValue);
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                          Vector128.LoadUnsafe(ref yRef));
+                Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
+
+                nuint misalignment = 0;
+
+                if (remainder > (uint)(Vector128<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* py = &yRef)
+                    {
+                        T* xPtr = px;
+                        T* yPtr = py;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)xPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            yPtr += misalignment;
+
+                            Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector128<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector128<T> vector1;
+                        Vector128<T> vector2;
+                        Vector128<T> vector3;
+                        Vector128<T> vector4;
+
+                        // We only need to load, so there isn't a lot of benefit to doing non-temporal operations
+
+                        while (remainder >= (uint)(Vector128<T>.Count * 8))
+                        {
+                            // We load, process, and store the first four vectors
+
+                            vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
+                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)));
+                            vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
+                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)));
+                            vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
+                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)));
+                            vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
+                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)));
+
+                            vresult = TAggregationOperator.Invoke(vresult, vector1);
+                            vresult = TAggregationOperator.Invoke(vresult, vector2);
+                            vresult = TAggregationOperator.Invoke(vresult, vector3);
+                            vresult = TAggregationOperator.Invoke(vresult, vector4);
+
+                            // We load, process, and store the next four vectors
+
+                            vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
+                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)));
+                            vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
+                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)));
+                            vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
+                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)));
+                            vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
+                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)));
+
+                            vresult = TAggregationOperator.Invoke(vresult, vector1);
+                            vresult = TAggregationOperator.Invoke(vresult, vector2);
+                            vresult = TAggregationOperator.Invoke(vresult, vector3);
+                            vresult = TAggregationOperator.Invoke(vresult, vector4);
+
+                            // We adjust the source and destination references, then update
+                            // the count of remaining elements to process.
+
+                            xPtr += (uint)(Vector128<T>.Count * 8);
+                            yPtr += (uint)(Vector128<T>.Count * 8);
+
+                            remainder -= (uint)(Vector128<T>.Count * 8);
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        yRef = ref *yPtr;
+                    }
+                }
+
+                // Store the first block. Handling this separately simplifies the latter code as we know
+                // they come after and so we can relegate it to full blocks or the trailing elements
+
+                beg = Vector128.ConditionalSelect(CreateAlignmentMaskVector128<T>((int)misalignment), beg, Vector128.Create(TAggregationOperator.IdentityValue));
+                vresult = TAggregationOperator.Invoke(vresult, beg);
+
+                // Process the remaining [0, Count * 7] elements via a jump table
+                //
+                // We end up handling any trailing elements in case 0 and in the
+                // worst case end up just doing the identity operation here if there
+                // were no trailing elements.
+
+                (nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector128<T>.Count);
+                blocks -= (misalignment == 0) ? 1u : 0u;
+                remainder -= trailing;
+
+                switch (blocks)
+                {
+                    case 7:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 7)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 6)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 5)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 4)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 3)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 2)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 1)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 1)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)trailing), end, Vector128.Create(TAggregationOperator.IdentityValue));
+                            vresult = TAggregationOperator.Invoke(vresult, end);
+                            break;
+                        }
+                }
+
+                return TAggregationOperator.Invoke(vresult);
+            }
+
+            static T Vectorized256(ref T xRef, ref T yRef, nuint remainder)
+            {
+                Vector256<T> vresult = Vector256.Create(TAggregationOperator.IdentityValue);
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                          Vector256.LoadUnsafe(ref yRef));
+                Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
+
+                nuint misalignment = 0;
+
+                if (remainder > (uint)(Vector256<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* py = &yRef)
+                    {
+                        T* xPtr = px;
+                        T* yPtr = py;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)xPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            yPtr += misalignment;
+
+                            Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector256<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector256<T> vector1;
+                        Vector256<T> vector2;
+                        Vector256<T> vector3;
+                        Vector256<T> vector4;
+
+                        // We only need to load, so there isn't a lot of benefit to doing non-temporal operations
+
+                        while (remainder >= (uint)(Vector256<T>.Count * 8))
+                        {
+                            // We load, process, and store the first four vectors
+
+                            vector1 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
+                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)));
+                            vector2 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
+                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)));
+                            vector3 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
+                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)));
+                            vector4 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
+                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)));
+
+                            vresult = TAggregationOperator.Invoke(vresult, vector1);
+                            vresult = TAggregationOperator.Invoke(vresult, vector2);
+                            vresult = TAggregationOperator.Invoke(vresult, vector3);
+                            vresult = TAggregationOperator.Invoke(vresult, vector4);
+
+                            // We load, process, and store the next four vectors
+
+                            vector1 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
+                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)));
+                            vector2 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
+                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)));
+                            vector3 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
+                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)));
+                            vector4 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
+                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)));
+
+                            vresult = TAggregationOperator.Invoke(vresult, vector1);
+                            vresult = TAggregationOperator.Invoke(vresult, vector2);
+                            vresult = TAggregationOperator.Invoke(vresult, vector3);
+                            vresult = TAggregationOperator.Invoke(vresult, vector4);
+
+                            // We adjust the source and destination references, then update
+                            // the count of remaining elements to process.
+
+                            xPtr += (uint)(Vector256<T>.Count * 8);
+                            yPtr += (uint)(Vector256<T>.Count * 8);
+
+                            remainder -= (uint)(Vector256<T>.Count * 8);
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        yRef = ref *yPtr;
+                    }
+                }
+
+                // Store the first block. Handling this separately simplifies the latter code as we know
+                // they come after and so we can relegate it to full blocks or the trailing elements
+
+                beg = Vector256.ConditionalSelect(CreateAlignmentMaskVector256<T>((int)misalignment), beg, Vector256.Create(TAggregationOperator.IdentityValue));
+                vresult = TAggregationOperator.Invoke(vresult, beg);
+
+                // Process the remaining [0, Count * 7] elements via a jump table
+                //
+                // We end up handling any trailing elements in case 0 and in the
+                // worst case end up just doing the identity operation here if there
+                // were no trailing elements.
+
+                (nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector256<T>.Count);
+                blocks -= (misalignment == 0) ? 1u : 0u;
+                remainder -= trailing;
+
+                switch (blocks)
+                {
+                    case 7:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 7)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 6)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 5)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 4)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 3)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 2)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 1)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 1)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)trailing), end, Vector256.Create(TAggregationOperator.IdentityValue));
+                            vresult = TAggregationOperator.Invoke(vresult, end);
+                            break;
+                        }
+                }
+
+                return TAggregationOperator.Invoke(vresult);
+            }
+
+            static T Vectorized512(ref T xRef, ref T yRef, nuint remainder)
+            {
+                Vector512<T> vresult = Vector512.Create(TAggregationOperator.IdentityValue);
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector512<T> beg = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
+                                                          Vector512.LoadUnsafe(ref yRef));
+                Vector512<T> end = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
+                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count));
+
+                nuint misalignment = 0;
+
+                if (remainder > (uint)(Vector512<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* py = &yRef)
+                    {
+                        T* xPtr = px;
+                        T* yPtr = py;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)xPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            yPtr += misalignment;
+
+                            Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector512<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector512<T> vector1;
+                        Vector512<T> vector2;
+                        Vector512<T> vector3;
+                        Vector512<T> vector4;
+
+                        // We only need to load, so there isn't a lot of benefit to doing non-temporal operations
+
+                        while (remainder >= (uint)(Vector512<T>.Count * 8))
+                        {
+                            // We load, process, and store the first four vectors
+
+                            vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
+                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)));
+                            vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
+                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)));
+                            vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
+                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)));
+                            vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
+                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)));
+
+                            vresult = TAggregationOperator.Invoke(vresult, vector1);
+                            vresult = TAggregationOperator.Invoke(vresult, vector2);
+                            vresult = TAggregationOperator.Invoke(vresult, vector3);
+                            vresult = TAggregationOperator.Invoke(vresult, vector4);
+
+                            // We load, process, and store the next four vectors
+
+                            vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
+                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)));
+                            vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
+                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)));
+                            vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
+                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)));
+                            vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
+                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)));
+
+                            vresult = TAggregationOperator.Invoke(vresult, vector1);
+                            vresult = TAggregationOperator.Invoke(vresult, vector2);
+                            vresult = TAggregationOperator.Invoke(vresult, vector3);
+                            vresult = TAggregationOperator.Invoke(vresult, vector4);
+
+                            // We adjust the source and destination references, then update
+                            // the count of remaining elements to process.
+
+                            xPtr += (uint)(Vector512<T>.Count * 8);
+                            yPtr += (uint)(Vector512<T>.Count * 8);
+
+                            remainder -= (uint)(Vector512<T>.Count * 8);
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        yRef = ref *yPtr;
+                    }
+                }
+
+                // Store the first block. Handling this separately simplifies the latter code as we know
+                // they come after and so we can relegate it to full blocks or the trailing elements
+
+                beg = Vector512.ConditionalSelect(CreateAlignmentMaskVector512<T>((int)misalignment), beg, Vector512.Create(TAggregationOperator.IdentityValue));
+                vresult = TAggregationOperator.Invoke(vresult, beg);
+
+                // Process the remaining [0, Count * 7] elements via a jump table
+                //
+                // We end up handling any trailing elements in case 0 and in the
+                // worst case end up just doing the identity operation here if there
+                // were no trailing elements.
+
+                (nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector512<T>.Count);
+                blocks -= (misalignment == 0) ? 1u : 0u;
+                remainder -= trailing;
+
+                switch (blocks)
+                {
+                    case 7:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 1)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 1)));
+                            vresult = TAggregationOperator.Invoke(vresult, vector);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end = Vector512.ConditionalSelect(CreateRemainderMaskVector512<T>((int)trailing), end, Vector512.Create(TAggregationOperator.IdentityValue));
+                            vresult = TAggregationOperator.Invoke(vresult, end);
+                            break;
+                        }
+                }
+
+                return TAggregationOperator.Invoke(vresult);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static T VectorizedSmall(ref T xRef, ref T yRef, nuint remainder)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return VectorizedSmall1(ref xRef, ref yRef, remainder);
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return VectorizedSmall2(ref xRef, ref yRef, remainder);
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return VectorizedSmall4(ref xRef, ref yRef, remainder);
+                }
+                else
+                {
+                    Debug.Assert(sizeof(T) == 8);
+                    return VectorizedSmall8(ref xRef, ref yRef, remainder);
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static T VectorizedSmall1(ref T xRef, ref T yRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 1);
+                T result = TAggregationOperator.IdentityValue;
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 63:
+                    case 62:
+                    case 61:
+                    case 60:
+                    case 59:
+                    case 58:
+                    case 57:
+                    case 56:
+                    case 55:
+                    case 54:
+                    case 53:
+                    case 52:
+                    case 51:
+                    case 50:
+                    case 49:
+                    case 48:
+                    case 47:
+                    case 46:
+                    case 45:
+                    case 44:
+                    case 43:
+                    case 42:
+                    case 41:
+                    case 40:
+                    case 39:
+                    case 38:
+                    case 37:
+                    case 36:
+                    case 35:
+                    case 34:
+                    case 33:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+                            Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
+
+                            end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 32:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+                            Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
+
+                            end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 15:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 14), Unsafe.Add(ref yRef, 14)));
+                        goto case 14;
+
+                    case 14:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 13), Unsafe.Add(ref yRef, 13)));
+                        goto case 13;
+
+                    case 13:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 12), Unsafe.Add(ref yRef, 12)));
+                        goto case 12;
+
+                    case 12:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 11), Unsafe.Add(ref yRef, 11)));
+                        goto case 11;
+
+                    case 11:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 10), Unsafe.Add(ref yRef, 10)));
+                        goto case 10;
+
+                    case 10:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 9), Unsafe.Add(ref yRef, 9)));
+                        goto case 9;
+
+                    case 9:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 8), Unsafe.Add(ref yRef, 8)));
+                        goto case 8;
+
+                    case 8:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 7), Unsafe.Add(ref yRef, 7)));
+                        goto case 7;
+
+                    case 7:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 6), Unsafe.Add(ref yRef, 6)));
+                        goto case 6;
+
+                    case 6:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 5), Unsafe.Add(ref yRef, 5)));
+                        goto case 5;
+
+                    case 5:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 4), Unsafe.Add(ref yRef, 4)));
+                        goto case 4;
+
+                    case 4:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 3), Unsafe.Add(ref yRef, 3)));
+                        goto case 3;
+
+                    case 3:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), Unsafe.Add(ref yRef, 2)));
+                        goto case 2;
+
+                    case 2:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), Unsafe.Add(ref yRef, 1)));
+                        goto case 1;
+
+                    case 1:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef));
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static T VectorizedSmall2(ref T xRef, ref T yRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 2);
+                T result = TAggregationOperator.IdentityValue;
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+                            Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
+
+                            end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+                            Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
+
+                            end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 8:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 7:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 6), Unsafe.Add(ref yRef, 6)));
+                        goto case 6;
+
+                    case 6:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 5), Unsafe.Add(ref yRef, 5)));
+                        goto case 5;
+
+                    case 5:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 4), Unsafe.Add(ref yRef, 4)));
+                        goto case 4;
+
+                    case 4:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 3), Unsafe.Add(ref yRef, 3)));
+                        goto case 3;
+
+                    case 3:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), Unsafe.Add(ref yRef, 2)));
+                        goto case 2;
+
+                    case 2:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), Unsafe.Add(ref yRef, 1)));
+                        goto case 1;
+
+                    case 1:
+                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef));
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static T VectorizedSmall4(ref T xRef, ref T yRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 4);
+                T result = TAggregationOperator.IdentityValue;
+
+                switch (remainder)
+                {
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+                            Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
+
+                            end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    case 8:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+                            Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
+
+                            end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                                                Unsafe.Add(ref yRef, 2)));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                                                Unsafe.Add(ref yRef, 1)));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef));
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static T VectorizedSmall8(ref T xRef, ref T yRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 8);
+                T result = TAggregationOperator.IdentityValue;
+
+                switch (remainder)
+                {
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+                            Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
+
+                            end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+                            Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
+
+                            end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
+
+                            result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
+                            break;
+                        }
+
+                    case 2:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+
+                            result = TAggregationOperator.Invoke(beg);
+                            break;
+                        }
+
+                    case 1:
+                        {
+                            result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef));
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+
+                return result;
+            }
+        }
+
+        /// <summary>
+        /// Gets a vector mask that will be all-ones-set for the last <paramref name="count"/> elements
+        /// and zero for all other elements.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector128<T> CreateAlignmentMaskVector128<T>(int count)
+        {
+            if (Unsafe.SizeOf<T>() == 1)
+            {
+                return Vector128.LoadUnsafe(
+                    ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(AlignmentByteMask_64x65)),
+                    (uint)(count * 64));
+            }
+
+            if (Unsafe.SizeOf<T>() == 2)
+            {
+                return Vector128.LoadUnsafe(
+                    ref Unsafe.As<ushort, T>(ref MemoryMarshal.GetReference(AlignmentUInt16Mask_32x33)),
+                    (uint)(count * 32));
+            }
+
+            if (Unsafe.SizeOf<T>() == 4)
+            {
+                return Vector128.LoadUnsafe(
+                    ref Unsafe.As<uint, T>(ref MemoryMarshal.GetReference(AlignmentUInt32Mask_16x17)),
+                    (uint)(count * 16));
+            }
+
+            Debug.Assert(Unsafe.SizeOf<T>() == 8);
+            {
+                return Vector128.LoadUnsafe(
+                    ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(AlignmentUInt64Mask_8x9)),
+                    (uint)(count * 8));
+            }
+        }
+
+        /// <summary>
+        /// Gets a vector mask that will be all-ones-set for the last <paramref name="count"/> elements
+        /// and zero for all other elements.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector256<T> CreateAlignmentMaskVector256<T>(int count)
+        {
+            if (Unsafe.SizeOf<T>() == 1)
+            {
+                return Vector256.LoadUnsafe(
+                    ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(AlignmentByteMask_64x65)),
+                    (uint)(count * 64));
+            }
+
+            if (Unsafe.SizeOf<T>() == 2)
+            {
+                return Vector256.LoadUnsafe(
+                    ref Unsafe.As<ushort, T>(ref MemoryMarshal.GetReference(AlignmentUInt16Mask_32x33)),
+                    (uint)(count * 32));
+            }
+
+            if (Unsafe.SizeOf<T>() == 4)
+            {
+                return Vector256.LoadUnsafe(
+                    ref Unsafe.As<uint, T>(ref MemoryMarshal.GetReference(AlignmentUInt32Mask_16x17)),
+                    (uint)(count * 16));
+            }
+
+            Debug.Assert(Unsafe.SizeOf<T>() == 8);
+            {
+                return Vector256.LoadUnsafe(
+                    ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(AlignmentUInt64Mask_8x9)),
+                    (uint)(count * 8));
+            }
+        }
+
+        /// <summary>
+        /// Gets a vector mask that will be all-ones-set for the last <paramref name="count"/> elements
+        /// and zero for all other elements.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector512<T> CreateAlignmentMaskVector512<T>(int count)
+        {
+            if (Unsafe.SizeOf<T>() == 1)
+            {
+                return Vector512.LoadUnsafe(
+                    ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(AlignmentByteMask_64x65)),
+                    (uint)(count * 64));
+            }
+
+            if (Unsafe.SizeOf<T>() == 2)
+            {
+                return Vector512.LoadUnsafe(
+                    ref Unsafe.As<ushort, T>(ref MemoryMarshal.GetReference(AlignmentUInt16Mask_32x33)),
+                    (uint)(count * 32));
+            }
+
+            if (Unsafe.SizeOf<T>() == 4)
+            {
+                return Vector512.LoadUnsafe(
+                    ref Unsafe.As<uint, T>(ref MemoryMarshal.GetReference(AlignmentUInt32Mask_16x17)),
+                    (uint)(count * 16));
+            }
+
+            Debug.Assert(Unsafe.SizeOf<T>() == 8);
+            {
+                return Vector512.LoadUnsafe(
+                    ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(AlignmentUInt64Mask_8x9)),
+                    (uint)(count * 8));
+            }
+        }
+
+        /// <summary>
+        /// Gets a vector mask that will be all-ones-set for the last <paramref name="count"/> elements
+        /// and zero for all other elements.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector128<T> CreateRemainderMaskVector128<T>(int count)
+        {
+            if (Unsafe.SizeOf<T>() == 1)
+            {
+                return Vector128.LoadUnsafe(
+                    ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(RemainderByteMask_64x65)),
+                    (uint)(count * 64) + 48); // last 16 bytes in the row
+            }
+
+            if (Unsafe.SizeOf<T>() == 2)
+            {
+                return Vector128.LoadUnsafe(
+                    ref Unsafe.As<ushort, T>(ref MemoryMarshal.GetReference(RemainderUInt16Mask_32x33)),
+                    (uint)(count * 32) + 24); // last 8 shorts in the row
+            }
+
+            if (Unsafe.SizeOf<T>() == 4)
+            {
+                return Vector128.LoadUnsafe(
+                    ref Unsafe.As<uint, T>(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16x17)),
+                    (uint)(count * 16) + 12); // last 4 ints in the row
+            }
+
+            Debug.Assert(Unsafe.SizeOf<T>() == 8);
+            {
+                return Vector128.LoadUnsafe(
+                    ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9)),
+                    (uint)(count * 8) + 6); // last 2 longs in the row
+            }
+        }
+
+        /// <summary>
+        /// Gets a vector mask that will be all-ones-set for the last <paramref name="count"/> elements
+        /// and zero for all other elements.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector256<T> CreateRemainderMaskVector256<T>(int count)
+        {
+            if (Unsafe.SizeOf<T>() == 1)
+            {
+                return Vector256.LoadUnsafe(
+                    ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(RemainderByteMask_64x65)),
+                    (uint)(count * 64) + 32); // last 32 bytes in the row
+            }
+
+            if (Unsafe.SizeOf<T>() == 2)
+            {
+                return Vector256.LoadUnsafe(
+                    ref Unsafe.As<ushort, T>(ref MemoryMarshal.GetReference(RemainderUInt16Mask_32x33)),
+                    (uint)(count * 32) + 16); // last 16 shorts in the row
+            }
+
+            if (Unsafe.SizeOf<T>() == 4)
+            {
+                return Vector256.LoadUnsafe(
+                    ref Unsafe.As<uint, T>(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16x17)),
+                    (uint)(count * 16) + 8); // last 8 ints in the row
+            }
+
+            Debug.Assert(Unsafe.SizeOf<T>() == 8);
+            {
+                return Vector256.LoadUnsafe(
+                    ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9)),
+                    (uint)(count * 8) + 4); // last 4 longs in the row
+            }
+        }
+
+        /// <summary>
+        /// Gets a vector mask that will be all-ones-set for the last <paramref name="count"/> elements
+        /// and zero for all other elements.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector512<T> CreateRemainderMaskVector512<T>(int count)
+        {
+            if (Unsafe.SizeOf<T>() == 1)
+            {
+                return Vector512.LoadUnsafe(
+                    ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(RemainderByteMask_64x65)),
+                    (uint)(count * 64));
+            }
+
+            if (Unsafe.SizeOf<T>() == 2)
+            {
+                return Vector512.LoadUnsafe(
+                    ref Unsafe.As<ushort, T>(ref MemoryMarshal.GetReference(RemainderUInt16Mask_32x33)),
+                    (uint)(count * 32));
+            }
+
+            if (Unsafe.SizeOf<T>() == 4)
+            {
+                return Vector512.LoadUnsafe(
+                    ref Unsafe.As<uint, T>(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16x17)),
+                    (uint)(count * 16));
+            }
+
+            Debug.Assert(Unsafe.SizeOf<T>() == 8);
+            {
+                return Vector512.LoadUnsafe(
+                    ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9)),
+                    (uint)(count * 8));
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IBinaryOperator.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IBinaryOperator.cs
new file mode 100644
index 000000000000..0ec37a519ec2
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IBinaryOperator.cs
@@ -0,0 +1,2780 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static unsafe partial class TensorPrimitives
+    {
+        /// <summary>Operator that takes two input values and returns a single value.</summary>
+        private interface IBinaryOperator<T>
+        {
+            static abstract bool Vectorizable { get; }
+            static abstract T Invoke(T x, T y);
+            static abstract Vector128<T> Invoke(Vector128<T> x, Vector128<T> y);
+            static abstract Vector256<T> Invoke(Vector256<T> x, Vector256<T> y);
+            static abstract Vector512<T> Invoke(Vector512<T> x, Vector512<T> y);
+        }
+
+        /// <summary>
+        /// Performs an element-wise operation on <paramref name="x"/> and <paramref name="y"/>,
+        /// and writes the results to <paramref name="destination"/>.
+        /// </summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TBinaryOperator">
+        /// Specifies the operation to perform on each element loaded from <paramref name="x"/> with <paramref name="y"/>.
+        /// </typeparam>
+        private static void InvokeScalarSpanIntoSpan<T, TBinaryOperator>(
+            T x, ReadOnlySpan<T> y, Span<T> destination)
+            where TBinaryOperator : struct, IBinaryOperator<T> =>
+            InvokeSpanScalarIntoSpan<T, IdentityOperator<T>, InvertedBinaryOperator<TBinaryOperator, T>>(y, x, destination);
+
+        /// <summary>
+        /// Performs an element-wise operation on <paramref name="x"/> and <paramref name="y"/>,
+        /// and writes the results to <paramref name="destination"/>.
+        /// </summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TBinaryOperator">
+        /// Specifies the operation to perform on each element loaded from <paramref name="x"/> with <paramref name="y"/>.
+        /// </typeparam>
+        private static void InvokeSpanScalarIntoSpan<T, TBinaryOperator>(
+            ReadOnlySpan<T> x, T y, Span<T> destination)
+            where TBinaryOperator : struct, IBinaryOperator<T> =>
+            InvokeSpanScalarIntoSpan<T, IdentityOperator<T>, TBinaryOperator>(x, y, destination);
+
+        /// <summary>
+        /// Performs an element-wise operation on <paramref name="x"/> and <paramref name="y"/>,
+        /// and writes the results to <paramref name="destination"/>.
+        /// </summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TBinaryOperator{T}">
+        /// Specifies the operation to perform on the pair-wise elements loaded from <paramref name="x"/> and <paramref name="y"/>.
+        /// </typeparam>
+        private static void InvokeSpanSpanIntoSpan<T, TBinaryOperator>(
+            ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where TBinaryOperator : struct, IBinaryOperator<T>
+        {
+            if (x.Length != y.Length)
+            {
+                ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
+            }
+
+            if (x.Length > destination.Length)
+            {
+                ThrowHelper.ThrowArgument_DestinationTooShort();
+            }
+
+            ValidateInputOutputSpanNonOverlapping(x, destination);
+            ValidateInputOutputSpanNonOverlapping(y, destination);
+
+            // Since every branch has a cost and since that cost is
+            // essentially lost for larger inputs, we do branches
+            // in a way that allows us to have the minimum possible
+            // for small sizes
+
+            ref T xRef = ref MemoryMarshal.GetReference(x);
+            ref T yRef = ref MemoryMarshal.GetReference(y);
+            ref T dRef = ref MemoryMarshal.GetReference(destination);
+
+            nuint remainder = (uint)x.Length;
+
+            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TBinaryOperator.Vectorizable)
+            {
+                if (remainder >= (uint)Vector512<T>.Count)
+                {
+                    Vectorized512(ref xRef, ref yRef, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref yRef, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && TBinaryOperator.Vectorizable)
+            {
+                if (remainder >= (uint)Vector256<T>.Count)
+                {
+                    Vectorized256(ref xRef, ref yRef, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref yRef, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TBinaryOperator.Vectorizable)
+            {
+                if (remainder >= (uint)Vector128<T>.Count)
+                {
+                    Vectorized128(ref xRef, ref yRef, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref yRef, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            // This is the software fallback when no acceleration is available
+            // It requires no branches to hit
+
+            SoftwareFallback(ref xRef, ref yRef, ref dRef, remainder);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void SoftwareFallback(ref T xRef, ref T yRef, ref T dRef, nuint length)
+            {
+                for (nuint i = 0; i < length; i++)
+                {
+                    Unsafe.Add(ref dRef, i) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, i),
+                                                                     Unsafe.Add(ref yRef, i));
+                }
+            }
+
+            static void Vectorized128(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                          Vector128.LoadUnsafe(ref yRef));
+                Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
+
+                if (remainder > (uint)(Vector128<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* py = &yRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* yPtr = py;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            yPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector128<T> vector1;
+                        Vector128<T> vector2;
+                        Vector128<T> vector3;
+                        Vector128<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector128<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)));
+                                vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)));
+                                vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)));
+                                vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)));
+                                vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)));
+                                vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)));
+                                vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<T>.Count * 8);
+                                yPtr += (uint)(Vector128<T>.Count * 8);
+                                dPtr += (uint)(Vector128<T>.Count * 8);
+
+                                remainder -= (uint)(Vector128<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector128<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)));
+                                vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)));
+                                vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)));
+                                vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)));
+                                vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)));
+                                vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)));
+                                vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
+                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<T>.Count * 8);
+                                yPtr += (uint)(Vector128<T>.Count * 8);
+                                dPtr += (uint)(Vector128<T>.Count * 8);
+
+                                remainder -= (uint)(Vector128<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        yRef = ref *yPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
+
+                switch (remainder / (uint)Vector128<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
+                                                                         Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized256(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                          Vector256.LoadUnsafe(ref yRef));
+                Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
+
+                if (remainder > (uint)(Vector256<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* py = &yRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* yPtr = py;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)dPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            yPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector256<T> vector1;
+                        Vector256<T> vector2;
+                        Vector256<T> vector3;
+                        Vector256<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector256<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)));
+                                vector2 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)));
+                                vector3 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)));
+                                vector4 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)));
+                                vector2 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)));
+                                vector3 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)));
+                                vector4 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<T>.Count * 8);
+                                yPtr += (uint)(Vector256<T>.Count * 8);
+                                dPtr += (uint)(Vector256<T>.Count * 8);
+
+                                remainder -= (uint)(Vector256<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector256<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)));
+                                vector2 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)));
+                                vector3 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)));
+                                vector4 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)));
+                                vector2 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)));
+                                vector3 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)));
+                                vector4 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
+                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<T>.Count * 8);
+                                yPtr += (uint)(Vector256<T>.Count * 8);
+                                dPtr += (uint)(Vector256<T>.Count * 8);
+
+                                remainder -= (uint)(Vector256<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        yRef = ref *yPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector256<T>.Count - 1)) & (nuint)(-Vector256<T>.Count);
+
+                switch (remainder / (uint)Vector256<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 8)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)),
+                                                                         Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized512(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector512<T> beg = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
+                                                          Vector512.LoadUnsafe(ref yRef));
+                Vector512<T> end = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
+                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count));
+
+                if (remainder > (uint)(Vector512<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* py = &yRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* yPtr = py;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            yPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector512<T> vector1;
+                        Vector512<T> vector2;
+                        Vector512<T> vector3;
+                        Vector512<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector512<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)));
+                                vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)));
+                                vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)));
+                                vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)));
+                                vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)));
+                                vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)));
+                                vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<T>.Count * 8);
+                                yPtr += (uint)(Vector512<T>.Count * 8);
+                                dPtr += (uint)(Vector512<T>.Count * 8);
+
+                                remainder -= (uint)(Vector512<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector512<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)));
+                                vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)));
+                                vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)));
+                                vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)));
+                                vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)));
+                                vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)));
+                                vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
+                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<T>.Count * 8);
+                                yPtr += (uint)(Vector512<T>.Count * 8);
+                                dPtr += (uint)(Vector512<T>.Count * 8);
+
+                                remainder -= (uint)(Vector512<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        yRef = ref *yPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
+
+                switch (remainder / (uint)Vector512<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
+                                                                         Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
+            {
+                if (sizeof(T) == 1)
+                {
+                    VectorizedSmall1(ref xRef, ref yRef, ref dRef, remainder);
+                }
+                else if (sizeof(T) == 2)
+                {
+                    VectorizedSmall2(ref xRef, ref yRef, ref dRef, remainder);
+                }
+                else if (sizeof(T) == 4)
+                {
+                    VectorizedSmall4(ref xRef, ref yRef, ref dRef, remainder);
+                }
+                else
+                {
+                    Debug.Assert(sizeof(T) == 8);
+                    VectorizedSmall8(ref xRef, ref yRef, ref dRef, remainder);
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall1(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 1);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 63:
+                    case 62:
+                    case 61:
+                    case 60:
+                    case 59:
+                    case 58:
+                    case 57:
+                    case 56:
+                    case 55:
+                    case 54:
+                    case 53:
+                    case 52:
+                    case 51:
+                    case 50:
+                    case 49:
+                    case 48:
+                    case 47:
+                    case 46:
+                    case 45:
+                    case 44:
+                    case 43:
+                    case 42:
+                    case 41:
+                    case 40:
+                    case 39:
+                    case 38:
+                    case 37:
+                    case 36:
+                    case 35:
+                    case 34:
+                    case 33:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+                            Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 32:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+                            Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 15:
+                        Unsafe.Add(ref dRef, 14) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 14),
+                                                                         Unsafe.Add(ref yRef, 14));
+                        goto case 14;
+
+                    case 14:
+                        Unsafe.Add(ref dRef, 13) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 13),
+                                                                         Unsafe.Add(ref yRef, 13));
+                        goto case 13;
+
+                    case 13:
+                        Unsafe.Add(ref dRef, 12) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 12),
+                                                                         Unsafe.Add(ref yRef, 12));
+                        goto case 12;
+
+                    case 12:
+                        Unsafe.Add(ref dRef, 11) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 11),
+                                                                         Unsafe.Add(ref yRef, 11));
+                        goto case 11;
+
+                    case 11:
+                        Unsafe.Add(ref dRef, 10) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 10),
+                                                                         Unsafe.Add(ref yRef, 10));
+                        goto case 10;
+
+                    case 10:
+                        Unsafe.Add(ref dRef, 9) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 9),
+                                                                         Unsafe.Add(ref yRef, 9));
+                        goto case 9;
+
+                    case 9:
+                        Unsafe.Add(ref dRef, 8) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 8),
+                                                                         Unsafe.Add(ref yRef, 8));
+                        goto case 8;
+
+                    case 8:
+                        Unsafe.Add(ref dRef, 7) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 7),
+                                                                         Unsafe.Add(ref yRef, 7));
+                        goto case 7;
+
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
+                                                                         Unsafe.Add(ref yRef, 6));
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
+                                                                         Unsafe.Add(ref yRef, 5));
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
+                                                                         Unsafe.Add(ref yRef, 4));
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
+                                                                         Unsafe.Add(ref yRef, 3));
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                         Unsafe.Add(ref yRef, 2));
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                         Unsafe.Add(ref yRef, 1));
+                        goto case 1;
+
+                    case 1:
+                        dRef = TBinaryOperator.Invoke(xRef, yRef);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall2(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 2);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+                            Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+                            Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 8:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
+                                                                         Unsafe.Add(ref yRef, 6));
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
+                                                                         Unsafe.Add(ref yRef, 5));
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
+                                                                         Unsafe.Add(ref yRef, 4));
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
+                                                                         Unsafe.Add(ref yRef, 3));
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                         Unsafe.Add(ref yRef, 2));
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                         Unsafe.Add(ref yRef, 1));
+                        goto case 1;
+
+                    case 1:
+                        dRef = TBinaryOperator.Invoke(xRef, yRef);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall4(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 4);
+
+                switch (remainder)
+                {
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+                            Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    case 8:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+                            Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                             Unsafe.Add(ref yRef, 2));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                             Unsafe.Add(ref yRef, 1));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            dRef = TBinaryOperator.Invoke(xRef, yRef);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall8(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 8);
+
+                switch (remainder)
+                {
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+                            Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                      Vector256.LoadUnsafe(ref yRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+                            Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    case 2:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                      Vector128.LoadUnsafe(ref yRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 1:
+                        {
+                            dRef = TBinaryOperator.Invoke(xRef, yRef);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+        }
+
+        /// <summary>
+        /// Performs an element-wise operation on <paramref name="x"/> and <paramref name="y"/>,
+        /// and writes the results to <paramref name="destination"/>.
+        /// </summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TTransformOperator">
+        /// Specifies the operation to perform on each element loaded from <paramref name="x"/>.
+        /// It is not used with <paramref name="y"/>.
+        /// </typeparam>
+        /// <typeparam name="TBinaryOperator">
+        /// Specifies the operation to perform on the transformed value from <paramref name="x"/> with <paramref name="y"/>.
+        /// </typeparam>
+        private static void InvokeSpanScalarIntoSpan<T, TTransformOperator, TBinaryOperator>(
+            ReadOnlySpan<T> x, T y, Span<T> destination)
+            where TTransformOperator : struct, IUnaryOperator<T, T>
+            where TBinaryOperator : struct, IBinaryOperator<T>
+        {
+            if (x.Length > destination.Length)
+            {
+                ThrowHelper.ThrowArgument_DestinationTooShort();
+            }
+
+            ValidateInputOutputSpanNonOverlapping(x, destination);
+
+            // Since every branch has a cost and since that cost is
+            // essentially lost for larger inputs, we do branches
+            // in a way that allows us to have the minimum possible
+            // for small sizes
+
+            ref T xRef = ref MemoryMarshal.GetReference(x);
+            ref T dRef = ref MemoryMarshal.GetReference(destination);
+
+            nuint remainder = (uint)x.Length;
+
+            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TTransformOperator.Vectorizable && TBinaryOperator.Vectorizable)
+            {
+                if (remainder >= (uint)Vector512<T>.Count)
+                {
+                    Vectorized512(ref xRef, y, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, y, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && TTransformOperator.Vectorizable && TBinaryOperator.Vectorizable)
+            {
+                if (remainder >= (uint)Vector256<T>.Count)
+                {
+                    Vectorized256(ref xRef, y, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, y, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TTransformOperator.Vectorizable && TBinaryOperator.Vectorizable)
+            {
+                if (remainder >= (uint)Vector128<T>.Count)
+                {
+                    Vectorized128(ref xRef, y, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, y, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            // This is the software fallback when no acceleration is available
+            // It requires no branches to hit
+
+            SoftwareFallback(ref xRef, y, ref dRef, remainder);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void SoftwareFallback(ref T xRef, T y, ref T dRef, nuint length)
+            {
+                for (nuint i = 0; i < length; i++)
+                {
+                    Unsafe.Add(ref dRef, i) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, i)),
+                                                                     y);
+                }
+            }
+
+            static void Vectorized128(ref T xRef, T y, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector128<T> yVec = Vector128.Create(y);
+
+                Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
+                                                          yVec);
+                Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
+                                                          yVec);
+
+                if (remainder > (uint)(Vector128<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector128<T> vector1;
+                        Vector128<T> vector2;
+                        Vector128<T> vector3;
+                        Vector128<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector128<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0))),
+                                                                 yVec);
+                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1))),
+                                                                 yVec);
+                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2))),
+                                                                 yVec);
+                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3))),
+                                                                 yVec);
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4))),
+                                                                 yVec);
+                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5))),
+                                                                 yVec);
+                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6))),
+                                                                 yVec);
+                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7))),
+                                                                 yVec);
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<T>.Count * 8);
+                                dPtr += (uint)(Vector128<T>.Count * 8);
+
+                                remainder -= (uint)(Vector128<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector128<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0))),
+                                                                 yVec);
+                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1))),
+                                                                 yVec);
+                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2))),
+                                                                 yVec);
+                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3))),
+                                                                 yVec);
+
+                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4))),
+                                                                 yVec);
+                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5))),
+                                                                 yVec);
+                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6))),
+                                                                 yVec);
+                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7))),
+                                                                 yVec);
+
+                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<T>.Count * 8);
+                                dPtr += (uint)(Vector128<T>.Count * 8);
+
+                                remainder -= (uint)(Vector128<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
+
+                switch (remainder / (uint)Vector128<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized256(ref T xRef, T y, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector256<T> yVec = Vector256.Create(y);
+
+                Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
+                                                          yVec);
+                Vector256<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count)),
+                                                          yVec);
+
+                if (remainder > (uint)(Vector256<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)dPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector256<T> vector1;
+                        Vector256<T> vector2;
+                        Vector256<T> vector3;
+                        Vector256<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector256<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0))),
+                                                                 yVec);
+                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1))),
+                                                                 yVec);
+                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2))),
+                                                                 yVec);
+                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3))),
+                                                                 yVec);
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4))),
+                                                                 yVec);
+                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5))),
+                                                                 yVec);
+                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6))),
+                                                                 yVec);
+                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7))),
+                                                                 yVec);
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<T>.Count * 8);
+                                dPtr += (uint)(Vector256<T>.Count * 8);
+
+                                remainder -= (uint)(Vector256<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector256<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0))),
+                                                                 yVec);
+                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1))),
+                                                                 yVec);
+                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2))),
+                                                                 yVec);
+                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3))),
+                                                                 yVec);
+
+                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4))),
+                                                                 yVec);
+                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5))),
+                                                                 yVec);
+                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6))),
+                                                                 yVec);
+                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7))),
+                                                                 yVec);
+
+                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<T>.Count * 8);
+                                dPtr += (uint)(Vector256<T>.Count * 8);
+
+                                remainder -= (uint)(Vector256<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector256<T>.Count - 1)) & (nuint)(-Vector256<T>.Count);
+
+                switch (remainder / (uint)Vector256<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 8))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized512(ref T xRef, T y, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector512<T> yVec = Vector512.Create(y);
+
+                Vector512<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef)),
+                                                          yVec);
+                Vector512<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count)),
+                                                          yVec);
+
+                if (remainder > (uint)(Vector512<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector512<T> vector1;
+                        Vector512<T> vector2;
+                        Vector512<T> vector3;
+                        Vector512<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector512<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0))),
+                                                                 yVec);
+                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1))),
+                                                                 yVec);
+                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2))),
+                                                                 yVec);
+                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3))),
+                                                                 yVec);
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4))),
+                                                                 yVec);
+                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5))),
+                                                                 yVec);
+                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6))),
+                                                                 yVec);
+                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7))),
+                                                                 yVec);
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<T>.Count * 8);
+                                dPtr += (uint)(Vector512<T>.Count * 8);
+
+                                remainder -= (uint)(Vector512<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector512<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0))),
+                                                                 yVec);
+                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1))),
+                                                                 yVec);
+                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2))),
+                                                                 yVec);
+                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3))),
+                                                                 yVec);
+
+                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4))),
+                                                                 yVec);
+                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5))),
+                                                                 yVec);
+                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6))),
+                                                                 yVec);
+                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7))),
+                                                                 yVec);
+
+                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<T>.Count * 8);
+                                dPtr += (uint)(Vector512<T>.Count * 8);
+
+                                remainder -= (uint)(Vector512<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
+
+                switch (remainder / (uint)Vector512<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2))),
+                                                                         yVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall(ref T xRef, T y, ref T dRef, nuint remainder)
+            {
+                if (sizeof(T) == 1)
+                {
+                    VectorizedSmall1(ref xRef, y, ref dRef, remainder);
+                }
+                else if (sizeof(T) == 2)
+                {
+                    VectorizedSmall2(ref xRef, y, ref dRef, remainder);
+                }
+                else if (sizeof(T) == 4)
+                {
+                    VectorizedSmall4(ref xRef, y, ref dRef, remainder);
+                }
+                else
+                {
+                    Debug.Assert(sizeof(T) == 8);
+                    VectorizedSmall8(ref xRef, y, ref dRef, remainder);
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall1(ref T xRef, T y, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 1);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 63:
+                    case 62:
+                    case 61:
+                    case 60:
+                    case 59:
+                    case 58:
+                    case 57:
+                    case 56:
+                    case 55:
+                    case 54:
+                    case 53:
+                    case 52:
+                    case 51:
+                    case 50:
+                    case 49:
+                    case 48:
+                    case 47:
+                    case 46:
+                    case 45:
+                    case 44:
+                    case 43:
+                    case 42:
+                    case 41:
+                    case 40:
+                    case 39:
+                    case 38:
+                    case 37:
+                    case 36:
+                    case 35:
+                    case 34:
+                    case 33:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> yVec = Vector256.Create(y);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
+                                                                      yVec);
+                            Vector256<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count)),
+                                                                      yVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 32:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
+                                                                      Vector256.Create(y));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> yVec = Vector128.Create(y);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
+                                                                                                yVec);
+                            Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
+                                                                                                yVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
+                                                                                                Vector128.Create(y));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 15:
+                        Unsafe.Add(ref dRef, 14) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 14)),
+                                                                          y);
+                        goto case 14;
+
+                    case 14:
+                        Unsafe.Add(ref dRef, 13) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 13)),
+                                                                          y);
+                        goto case 13;
+
+                    case 13:
+                        Unsafe.Add(ref dRef, 12) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 12)),
+                                                                          y);
+                        goto case 12;
+
+                    case 12:
+                        Unsafe.Add(ref dRef, 11) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 11)),
+                                                                          y);
+                        goto case 11;
+
+                    case 11:
+                        Unsafe.Add(ref dRef, 10) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 10)),
+                                                                          y);
+                        goto case 10;
+
+                    case 10:
+                        Unsafe.Add(ref dRef, 9) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 9)),
+                                                                         y);
+                        goto case 9;
+
+                    case 9:
+                        Unsafe.Add(ref dRef, 8) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 8)),
+                                                                         y);
+                        goto case 8;
+
+                    case 8:
+                        Unsafe.Add(ref dRef, 7) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 7)),
+                                                                         y);
+                        goto case 7;
+
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 6)),
+                                                                         y);
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 5)),
+                                                                         y);
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 4)),
+                                                                         y);
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 3)),
+                                                                         y);
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)),
+                                                                         y);
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)),
+                                                                         y);
+                        goto case 1;
+
+                    case 1:
+                        dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall2(ref T xRef, T y, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 2);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> yVec = Vector256.Create(y);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
+                                                                      yVec);
+                            Vector256<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count)),
+                                                                      yVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
+                                                                      Vector256.Create(y));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> yVec = Vector128.Create(y);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
+                                                                                                yVec);
+                            Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
+                                                                                                yVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 8:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
+                                                                                                Vector128.Create(y));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 6)),
+                                                                         y);
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 5)),
+                                                                         y);
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 4)),
+                                                                         y);
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 3)),
+                                                                         y);
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)),
+                                                                         y);
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)),
+                                                                         y);
+                        goto case 1;
+
+                    case 1:
+                        dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall4(ref T xRef, T y, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 4);
+
+                switch (remainder)
+                {
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> yVec = Vector256.Create(y);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
+                                                                      yVec);
+                            Vector256<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count)),
+                                                                      yVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    case 8:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
+                                                                      Vector256.Create(y));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> yVec = Vector128.Create(y);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
+                                                                                                yVec);
+                            Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
+                                                                                                yVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
+                                                                                                Vector128.Create(y));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)),
+                                                                             y);
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)),
+                                                                              y);
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall8(ref T xRef, T y, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 8);
+
+                switch (remainder)
+                {
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> yVec = Vector256.Create(y);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
+                                                                      yVec);
+                            Vector256<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count)),
+                                                                      yVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
+                                                                      Vector256.Create(y));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> yVec = Vector128.Create(y);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
+                                                                                                yVec);
+                            Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
+                                                                                                yVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    case 2:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
+                                                                                                Vector128.Create(y));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 1:
+                        {
+                            dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+        }
+
+        /// <summary>Aggregates all of the elements in the <paramref name="x"/> into a single value.</summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TAggregate">Specifies the operation to be performed on each pair of values.</typeparam>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static T HorizontalAggregate<T, TAggregate>(Vector256<T> x) where TAggregate : struct, IBinaryOperator<T> =>
+            HorizontalAggregate<T, TAggregate>(TAggregate.Invoke(x.GetLower(), x.GetUpper()));
+
+        /// <summary>Aggregates all of the elements in the <paramref name="x"/> into a single value.</summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TAggregate">Specifies the operation to be performed on each pair of values.</typeparam>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static T HorizontalAggregate<T, TAggregate>(Vector512<T> x) where TAggregate : struct, IBinaryOperator<T> =>
+            HorizontalAggregate<T, TAggregate>(TAggregate.Invoke(x.GetLower(), x.GetUpper()));
+
+        /// <summary>Aggregates all of the elements in the <paramref name="x"/> into a single value.</summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TAggregate">Specifies the operation to be performed on each pair of values.</typeparam>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static T HorizontalAggregate<T, TAggregate>(Vector128<T> x) where TAggregate : struct, IBinaryOperator<T>
+        {
+            // We need to do log2(count) operations to compute the total sum
+
+            if (Unsafe.SizeOf<T>() == 1)
+            {
+                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsByte(), Vector128.Create((byte)8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)).As<byte, T>());
+                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsByte(), Vector128.Create((byte)4, 5, 6, 7, 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>());
+                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsByte(), Vector128.Create((byte)2, 3, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>());
+                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsByte(), Vector128.Create((byte)1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>());
+            }
+            else if (Unsafe.SizeOf<T>() == 2)
+            {
+                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsInt16(), Vector128.Create(4, 5, 6, 7, 0, 1, 2, 3)).As<short, T>());
+                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsInt16(), Vector128.Create(2, 3, 0, 1, 4, 5, 6, 7)).As<short, T>());
+                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsInt16(), Vector128.Create(1, 0, 2, 3, 4, 5, 6, 7)).As<short, T>());
+            }
+            else if (Unsafe.SizeOf<T>() == 4)
+            {
+                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsInt32(), Vector128.Create(2, 3, 0, 1)).As<int, T>());
+                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsInt32(), Vector128.Create(1, 0, 3, 2)).As<int, T>());
+            }
+            else
+            {
+                Debug.Assert(Unsafe.SizeOf<T>() == 8);
+                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsInt64(), Vector128.Create(1, 0)).As<long, T>());
+            }
+
+            return x.ToScalar();
+        }
+
+        private readonly struct InvertedBinaryOperator<TOperator, T> : IBinaryOperator<T>
+            where TOperator : IBinaryOperator<T>
+        {
+            public static bool Vectorizable => TOperator.Vectorizable;
+            public static T Invoke(T x, T y) => TOperator.Invoke(y, x);
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => TOperator.Invoke(y, x);
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => TOperator.Invoke(y, x);
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => TOperator.Invoke(y, x);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IIndexOfOperator.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IIndexOfOperator.cs
new file mode 100644
index 000000000000..a90b8aaa2a33
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IIndexOfOperator.cs
@@ -0,0 +1,134 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static unsafe partial class TensorPrimitives
+    {
+        private interface IIndexOfOperator<T>
+        {
+            static abstract int Invoke(ref T result, T current, int resultIndex, int currentIndex);
+            static abstract void Invoke(ref Vector128<T> result, Vector128<T> current, ref Vector128<T> resultIndex, Vector128<T> currentIndex);
+            static abstract void Invoke(ref Vector256<T> result, Vector256<T> current, ref Vector256<T> resultIndex, Vector256<T> currentIndex);
+            static abstract void Invoke(ref Vector512<T> result, Vector512<T> current, ref Vector512<T> resultIndex, Vector512<T> currentIndex);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static int IndexOfFinalAggregate<T, TIndexOfOperator>(Vector128<T> result, Vector128<T> resultIndex)
+            where TIndexOfOperator : struct, IIndexOfOperator<T>
+        {
+            Vector128<T> tmpResult;
+            Vector128<T> tmpIndex;
+
+            if (sizeof(T) == 8)
+            {
+                // Compare 0 with 1
+                tmpResult = Vector128.Shuffle(result.AsInt64(), Vector128.Create(1, 0)).As<long, T>();
+                tmpIndex = Vector128.Shuffle(resultIndex.AsInt64(), Vector128.Create(1, 0)).As<long, T>();
+                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
+
+                // Return 0
+                return (int)resultIndex.As<T, long>().ToScalar();
+            }
+
+            if (sizeof(T) == 4)
+            {
+                // Compare 0,1 with 2,3
+                tmpResult = Vector128.Shuffle(result.AsInt32(), Vector128.Create(2, 3, 0, 1)).As<int, T>();
+                tmpIndex = Vector128.Shuffle(resultIndex.AsInt32(), Vector128.Create(2, 3, 0, 1)).As<int, T>();
+                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
+
+                // Compare 0 with 1
+                tmpResult = Vector128.Shuffle(result.AsInt32(), Vector128.Create(1, 0, 3, 2)).As<int, T>();
+                tmpIndex = Vector128.Shuffle(resultIndex.AsInt32(), Vector128.Create(1, 0, 3, 2)).As<int, T>();
+                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
+
+                // Return 0
+                return resultIndex.As<T, int>().ToScalar();
+            }
+
+            if (sizeof(T) == 2)
+            {
+                // Compare 0,1,2,3 with 4,5,6,7
+                tmpResult = Vector128.Shuffle(result.AsInt16(), Vector128.Create(4, 5, 6, 7, 0, 1, 2, 3)).As<short, T>();
+                tmpIndex = Vector128.Shuffle(resultIndex.AsInt16(), Vector128.Create(4, 5, 6, 7, 0, 1, 2, 3)).As<short, T>();
+                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
+
+                // Compare 0,1 with 2,3
+                tmpResult = Vector128.Shuffle(result.AsInt16(), Vector128.Create(2, 3, 0, 1, 4, 5, 6, 7)).As<short, T>();
+                tmpIndex = Vector128.Shuffle(resultIndex.AsInt16(), Vector128.Create(2, 3, 0, 1, 4, 5, 6, 7)).As<short, T>();
+                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
+
+                // Compare 0 with 1
+                tmpResult = Vector128.Shuffle(result.AsInt16(), Vector128.Create(1, 0, 2, 3, 4, 5, 6, 7)).As<short, T>();
+                tmpIndex = Vector128.Shuffle(resultIndex.AsInt16(), Vector128.Create(1, 0, 2, 3, 4, 5, 6, 7)).As<short, T>();
+                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
+
+                // Return 0
+                return resultIndex.As<T, short>().ToScalar();
+            }
+
+            Debug.Assert(sizeof(T) == 1);
+            {
+                // Compare 0,1,2,3,4,5,6,7 with 8,9,10,11,12,13,14,15
+                tmpResult = Vector128.Shuffle(result.AsByte(), Vector128.Create((byte)8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)).As<byte, T>();
+                tmpIndex = Vector128.Shuffle(resultIndex.AsByte(), Vector128.Create((byte)8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)).As<byte, T>();
+                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
+
+                // Compare 0,1,2,3 with 4,5,6,7
+                tmpResult = Vector128.Shuffle(result.AsByte(), Vector128.Create((byte)4, 5, 6, 7, 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>();
+                tmpIndex = Vector128.Shuffle(resultIndex.AsByte(), Vector128.Create((byte)4, 5, 6, 7, 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>();
+                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
+
+                // Compare 0,1 with 2,3
+                tmpResult = Vector128.Shuffle(result.AsByte(), Vector128.Create((byte)2, 3, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>();
+                tmpIndex = Vector128.Shuffle(resultIndex.AsByte(), Vector128.Create((byte)2, 3, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>();
+                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
+
+                // Compare 0 with 1
+                tmpResult = Vector128.Shuffle(result.AsByte(), Vector128.Create((byte)1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>();
+                tmpIndex = Vector128.Shuffle(resultIndex.AsByte(), Vector128.Create((byte)1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>();
+                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
+
+                // Return 0
+                return resultIndex.As<T, byte>().ToScalar();
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static int IndexOfFinalAggregate<T, TIndexOfOperator>(Vector256<T> result, Vector256<T> resultIndex)
+            where TIndexOfOperator : struct, IIndexOfOperator<T>
+        {
+            // Min the upper/lower halves of the Vector256
+            Vector128<T> resultLower = result.GetLower();
+            Vector128<T> indexLower = resultIndex.GetLower();
+
+            TIndexOfOperator.Invoke(ref resultLower, result.GetUpper(), ref indexLower, resultIndex.GetUpper());
+            return IndexOfFinalAggregate<T, TIndexOfOperator>(resultLower, indexLower);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static int IndexOfFinalAggregate<T, TIndexOfOperator>(Vector512<T> result, Vector512<T> resultIndex)
+            where TIndexOfOperator : struct, IIndexOfOperator<T>
+        {
+            Vector256<T> resultLower = result.GetLower();
+            Vector256<T> indexLower = resultIndex.GetLower();
+
+            TIndexOfOperator.Invoke(ref resultLower, result.GetUpper(), ref indexLower, resultIndex.GetUpper());
+            return IndexOfFinalAggregate<T, TIndexOfOperator>(resultLower, indexLower);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector128<T> IndexLessThan<T>(Vector128<T> indices1, Vector128<T> indices2) =>
+            sizeof(T) == sizeof(long) ? Vector128.LessThan(indices1.AsInt64(), indices2.AsInt64()).As<long, T>() :
+            sizeof(T) == sizeof(int) ? Vector128.LessThan(indices1.AsInt32(), indices2.AsInt32()).As<int, T>() :
+            sizeof(T) == sizeof(short) ? Vector128.LessThan(indices1.AsInt16(), indices2.AsInt16()).As<short, T>() :
+            Vector128.LessThan(indices1.AsByte(), indices2.AsByte()).As<byte, T>();
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IStatefulUnaryOperator.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IStatefulUnaryOperator.cs
new file mode 100644
index 000000000000..fc77a63a4c74
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IStatefulUnaryOperator.cs
@@ -0,0 +1,1211 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static unsafe partial class TensorPrimitives
+    {
+        /// <summary>Operator that takes one input value and returns a single value.</summary>
+        private interface IStatefulUnaryOperator<T>
+        {
+            static abstract bool Vectorizable { get; }
+            T Invoke(T x);
+            Vector128<T> Invoke(Vector128<T> x);
+            Vector256<T> Invoke(Vector256<T> x);
+            Vector512<T> Invoke(Vector512<T> x);
+        }
+
+        /// <summary>Performs an element-wise operation on <paramref name="x"/> and writes the results to <paramref name="destination"/>.</summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TStatefulUnaryOperator">Specifies the operation to perform on each element loaded from <paramref name="x"/>.</typeparam>
+        private static void InvokeSpanIntoSpan<T, TStatefulUnaryOperator>(
+            ReadOnlySpan<T> x, TStatefulUnaryOperator op, Span<T> destination)
+            where TStatefulUnaryOperator : struct, IStatefulUnaryOperator<T>
+        {
+            // NOTE: This implementation is an exact copy of InvokeSpanIntoSpan<T, TUnaryOperator>,
+            // except it accepts an operator that carries state with it, using instance rather than
+            // static invocation methods.
+
+            if (x.Length > destination.Length)
+            {
+                ThrowHelper.ThrowArgument_DestinationTooShort();
+            }
+
+            ValidateInputOutputSpanNonOverlapping(x, destination);
+
+            // Since every branch has a cost and since that cost is
+            // essentially lost for larger inputs, we do branches
+            // in a way that allows us to have the minimum possible
+            // for small sizes
+
+            ref T xRef = ref MemoryMarshal.GetReference(x);
+            ref T dRef = ref MemoryMarshal.GetReference(destination);
+
+            nuint remainder = (uint)x.Length;
+
+            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TStatefulUnaryOperator.Vectorizable)
+            {
+                if (remainder >= (uint)Vector512<T>.Count)
+                {
+                    Vectorized512(ref xRef, ref dRef, remainder, op);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref dRef, remainder, op);
+                }
+
+                return;
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && TStatefulUnaryOperator.Vectorizable)
+            {
+                if (remainder >= (uint)Vector256<T>.Count)
+                {
+                    Vectorized256(ref xRef, ref dRef, remainder, op);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref dRef, remainder, op);
+                }
+
+                return;
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TStatefulUnaryOperator.Vectorizable)
+            {
+                if (remainder >= (uint)Vector128<T>.Count)
+                {
+                    Vectorized128(ref xRef, ref dRef, remainder, op);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref dRef, remainder, op);
+                }
+
+                return;
+            }
+
+            // This is the software fallback when no acceleration is available
+            // It requires no branches to hit
+
+            SoftwareFallback(ref xRef, ref dRef, remainder, op);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void SoftwareFallback(ref T xRef, ref T dRef, nuint length, TStatefulUnaryOperator op)
+            {
+                for (nuint i = 0; i < length; i++)
+                {
+                    Unsafe.Add(ref dRef, i) = op.Invoke(Unsafe.Add(ref xRef, i));
+                }
+            }
+
+            static void Vectorized128(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
+                Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
+
+                if (remainder > (uint)(Vector128<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector128<T> vector1;
+                        Vector128<T> vector2;
+                        Vector128<T> vector3;
+                        Vector128<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector128<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)));
+                                vector2 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)));
+                                vector3 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)));
+                                vector4 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)));
+                                vector2 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)));
+                                vector3 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)));
+                                vector4 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<T>.Count * 8);
+                                dPtr += (uint)(Vector128<T>.Count * 8);
+
+                                remainder -= (uint)(Vector128<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector128<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)));
+                                vector2 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)));
+                                vector3 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)));
+                                vector4 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)));
+                                vector2 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)));
+                                vector3 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)));
+                                vector4 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<T>.Count * 8);
+                                dPtr += (uint)(Vector128<T>.Count * 8);
+
+                                remainder -= (uint)(Vector128<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
+
+                switch (remainder / (uint)Vector128<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized256(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
+                Vector256<T> end = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
+
+                if (remainder > (uint)(Vector256<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)dPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector256<T> vector1;
+                        Vector256<T> vector2;
+                        Vector256<T> vector3;
+                        Vector256<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector256<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)));
+                                vector2 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)));
+                                vector3 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)));
+                                vector4 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)));
+                                vector2 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)));
+                                vector3 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)));
+                                vector4 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<T>.Count * 8);
+                                dPtr += (uint)(Vector256<T>.Count * 8);
+
+                                remainder -= (uint)(Vector256<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector256<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)));
+                                vector2 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)));
+                                vector3 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)));
+                                vector4 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)));
+                                vector2 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)));
+                                vector3 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)));
+                                vector4 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<T>.Count * 8);
+                                dPtr += (uint)(Vector256<T>.Count * 8);
+
+                                remainder -= (uint)(Vector256<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector256<T>.Count - 1)) & (nuint)(-Vector256<T>.Count);
+
+                switch (remainder / (uint)Vector256<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized512(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector512<T> beg = op.Invoke(Vector512.LoadUnsafe(ref xRef));
+                Vector512<T> end = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count));
+
+                if (remainder > (uint)(Vector512<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector512<T> vector1;
+                        Vector512<T> vector2;
+                        Vector512<T> vector3;
+                        Vector512<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector512<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)));
+                                vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)));
+                                vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)));
+                                vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)));
+                                vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)));
+                                vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)));
+                                vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<T>.Count * 8);
+                                dPtr += (uint)(Vector512<T>.Count * 8);
+
+                                remainder -= (uint)(Vector512<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector512<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)));
+                                vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)));
+                                vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)));
+                                vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)));
+                                vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)));
+                                vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)));
+                                vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<T>.Count * 8);
+                                dPtr += (uint)(Vector512<T>.Count * 8);
+
+                                remainder -= (uint)(Vector512<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
+
+                switch (remainder / (uint)Vector512<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
+            {
+                if (sizeof(T) == 1)
+                {
+                    VectorizedSmall1(ref xRef, ref dRef, remainder, op);
+                }
+                else if (sizeof(T) == 2)
+                {
+                    VectorizedSmall2(ref xRef, ref dRef, remainder, op);
+                }
+                else if (sizeof(T) == 4)
+                {
+                    VectorizedSmall4(ref xRef, ref dRef, remainder, op);
+                }
+                else
+                {
+                    Debug.Assert(sizeof(T) == 8);
+                    VectorizedSmall8(ref xRef, ref dRef, remainder, op);
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall1(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
+            {
+                Debug.Assert(sizeof(T) == 1);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 63:
+                    case 62:
+                    case 61:
+                    case 60:
+                    case 59:
+                    case 58:
+                    case 57:
+                    case 56:
+                    case 55:
+                    case 54:
+                    case 53:
+                    case 52:
+                    case 51:
+                    case 50:
+                    case 49:
+                    case 48:
+                    case 47:
+                    case 46:
+                    case 45:
+                    case 44:
+                    case 43:
+                    case 42:
+                    case 41:
+                    case 40:
+                    case 39:
+                    case 38:
+                    case 37:
+                    case 36:
+                    case 35:
+                    case 34:
+                    case 33:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            Vector256<T> end = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 32:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 15:
+                        Unsafe.Add(ref dRef, 14) = op.Invoke(Unsafe.Add(ref xRef, 14));
+                        goto case 14;
+
+                    case 14:
+                        Unsafe.Add(ref dRef, 13) = op.Invoke(Unsafe.Add(ref xRef, 13));
+                        goto case 13;
+
+                    case 13:
+                        Unsafe.Add(ref dRef, 12) = op.Invoke(Unsafe.Add(ref xRef, 12));
+                        goto case 12;
+
+                    case 12:
+                        Unsafe.Add(ref dRef, 11) = op.Invoke(Unsafe.Add(ref xRef, 11));
+                        goto case 11;
+
+                    case 11:
+                        Unsafe.Add(ref dRef, 10) = op.Invoke(Unsafe.Add(ref xRef, 10));
+                        goto case 10;
+
+                    case 10:
+                        Unsafe.Add(ref dRef, 9) = op.Invoke(Unsafe.Add(ref xRef, 9));
+                        goto case 9;
+
+                    case 9:
+                        Unsafe.Add(ref dRef, 8) = op.Invoke(Unsafe.Add(ref xRef, 8));
+                        goto case 8;
+
+                    case 8:
+                        Unsafe.Add(ref dRef, 7) = op.Invoke(Unsafe.Add(ref xRef, 7));
+                        goto case 7;
+
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = op.Invoke(Unsafe.Add(ref xRef, 6));
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = op.Invoke(Unsafe.Add(ref xRef, 5));
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = op.Invoke(Unsafe.Add(ref xRef, 4));
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = op.Invoke(Unsafe.Add(ref xRef, 3));
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = op.Invoke(Unsafe.Add(ref xRef, 2));
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = op.Invoke(Unsafe.Add(ref xRef, 1));
+                        goto case 1;
+
+                    case 1:
+                        dRef = op.Invoke(xRef);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall2(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
+            {
+                Debug.Assert(sizeof(T) == 2);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            Vector256<T> end = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 8:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = op.Invoke(Unsafe.Add(ref xRef, 6));
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = op.Invoke(Unsafe.Add(ref xRef, 5));
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = op.Invoke(Unsafe.Add(ref xRef, 4));
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = op.Invoke(Unsafe.Add(ref xRef, 3));
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = op.Invoke(Unsafe.Add(ref xRef, 2));
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = op.Invoke(Unsafe.Add(ref xRef, 1));
+                        goto case 1;
+
+                    case 1:
+                        dRef = op.Invoke(xRef);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall4(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
+            {
+                Debug.Assert(sizeof(T) == 4);
+
+                switch (remainder)
+                {
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            Vector256<T> end = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    case 8:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Unsafe.Add(ref dRef, 2) = op.Invoke(Unsafe.Add(ref xRef, 2));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Unsafe.Add(ref dRef, 1) = op.Invoke(Unsafe.Add(ref xRef, 1));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            dRef = op.Invoke(xRef);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall8(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
+            {
+                Debug.Assert(sizeof(T) == 8);
+
+                switch (remainder)
+                {
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            Vector256<T> end = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    case 2:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 1:
+                        {
+                            dRef = op.Invoke(xRef);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.ITernaryOperator.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.ITernaryOperator.cs
new file mode 100644
index 000000000000..a36e52c108e6
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.ITernaryOperator.cs
@@ -0,0 +1,4459 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static unsafe partial class TensorPrimitives
+    {
+        /// <summary>Operator that takes three input values and returns a single value.</summary>
+        private interface ITernaryOperator<T>
+        {
+            static abstract T Invoke(T x, T y, T z);
+            static abstract Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z);
+            static abstract Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> z);
+            static abstract Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z);
+        }
+
+        /// <summary>
+        /// Performs an element-wise operation on <paramref name="x"/>, <paramref name="y"/>, and <paramref name="z"/>,
+        /// and writes the results to <paramref name="destination"/>.
+        /// </summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TTernaryOperator">
+        /// Specifies the operation to perform on the pair-wise elements loaded from <paramref name="x"/>, <paramref name="y"/>,
+        /// and <paramref name="z"/>.
+        /// </typeparam>
+        private static void InvokeSpanSpanSpanIntoSpan<T, TTernaryOperator>(
+            ReadOnlySpan<T> x, ReadOnlySpan<T> y, ReadOnlySpan<T> z, Span<T> destination)
+            where TTernaryOperator : struct, ITernaryOperator<T>
+        {
+            if (x.Length != y.Length || x.Length != z.Length)
+            {
+                ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
+            }
+
+            if (x.Length > destination.Length)
+            {
+                ThrowHelper.ThrowArgument_DestinationTooShort();
+            }
+
+            ValidateInputOutputSpanNonOverlapping(x, destination);
+            ValidateInputOutputSpanNonOverlapping(y, destination);
+            ValidateInputOutputSpanNonOverlapping(z, destination);
+
+            // Since every branch has a cost and since that cost is
+            // essentially lost for larger inputs, we do branches
+            // in a way that allows us to have the minimum possible
+            // for small sizes
+
+            ref T xRef = ref MemoryMarshal.GetReference(x);
+            ref T yRef = ref MemoryMarshal.GetReference(y);
+            ref T zRef = ref MemoryMarshal.GetReference(z);
+            ref T dRef = ref MemoryMarshal.GetReference(destination);
+
+            nuint remainder = (uint)x.Length;
+
+            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported)
+            {
+                if (remainder >= (uint)Vector512<T>.Count)
+                {
+                    Vectorized512(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported)
+            {
+                if (remainder >= (uint)Vector256<T>.Count)
+                {
+                    Vectorized256(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported)
+            {
+                if (remainder >= (uint)Vector128<T>.Count)
+                {
+                    Vectorized128(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            // This is the software fallback when no acceleration is available
+            // It requires no branches to hit
+
+            SoftwareFallback(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void SoftwareFallback(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint length)
+            {
+                for (nuint i = 0; i < length; i++)
+                {
+                    Unsafe.Add(ref dRef, i) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, i),
+                                                                      Unsafe.Add(ref yRef, i),
+                                                                      Unsafe.Add(ref zRef, i));
+                }
+            }
+
+            static void Vectorized128(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                           Vector128.LoadUnsafe(ref yRef),
+                                                           Vector128.LoadUnsafe(ref zRef));
+                Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                           Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
+                                                           Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
+
+                if (remainder > (uint)(Vector128<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* py = &yRef)
+                    fixed (T* pz = &zRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* yPtr = py;
+                        T* zPtr = pz;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            yPtr += misalignment;
+                            zPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector128<T> vector1;
+                        Vector128<T> vector2;
+                        Vector128<T> vector3;
+                        Vector128<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector128<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 0)));
+                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 1)));
+                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 2)));
+                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 4)));
+                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 5)));
+                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 6)));
+                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<T>.Count * 8);
+                                yPtr += (uint)(Vector128<T>.Count * 8);
+                                zPtr += (uint)(Vector128<T>.Count * 8);
+                                dPtr += (uint)(Vector128<T>.Count * 8);
+
+                                remainder -= (uint)(Vector128<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector128<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 0)));
+                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 1)));
+                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 2)));
+                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 4)));
+                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 5)));
+                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 6)));
+                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)),
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<T>.Count * 8);
+                                yPtr += (uint)(Vector128<T>.Count * 8);
+                                zPtr += (uint)(Vector128<T>.Count * 8);
+                                dPtr += (uint)(Vector128<T>.Count * 8);
+
+                                remainder -= (uint)(Vector128<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        yRef = ref *yPtr;
+                        zRef = ref *zPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
+
+                switch (remainder / (uint)Vector128<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 8)),
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 7)),
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 6)),
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 5)),
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 4)),
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 3)),
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 2)),
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized256(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                           Vector256.LoadUnsafe(ref yRef),
+                                                           Vector256.LoadUnsafe(ref zRef));
+                Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                           Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
+                                                           Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
+
+                if (remainder > (uint)(Vector256<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* py = &yRef)
+                    fixed (T* pz = &zRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* yPtr = py;
+                        T* zPtr = pz;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)dPtr % (uint)sizeof(Vector256<T>))) / (nuint)sizeof(T);
+
+                            xPtr += misalignment;
+                            yPtr += misalignment;
+                            zPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector256<T> vector1;
+                        Vector256<T> vector2;
+                        Vector256<T> vector3;
+                        Vector256<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector256<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 0)));
+                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 1)));
+                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 2)));
+                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 4)));
+                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 5)));
+                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 6)));
+                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<T>.Count * 8);
+                                yPtr += (uint)(Vector256<T>.Count * 8);
+                                zPtr += (uint)(Vector256<T>.Count * 8);
+                                dPtr += (uint)(Vector256<T>.Count * 8);
+
+                                remainder -= (uint)(Vector256<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector256<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 0)));
+                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 1)));
+                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 2)));
+                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 4)));
+                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 5)));
+                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 6)));
+                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)),
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<T>.Count * 8);
+                                yPtr += (uint)(Vector256<T>.Count * 8);
+                                zPtr += (uint)(Vector256<T>.Count * 8);
+                                dPtr += (uint)(Vector256<T>.Count * 8);
+
+                                remainder -= (uint)(Vector256<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        yRef = ref *yPtr;
+                        zRef = ref *zPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector256<T>.Count - 1)) & (nuint)(-Vector256<T>.Count);
+
+                switch (remainder / (uint)Vector256<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 8)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 8)),
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 7)),
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 6)),
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 5)),
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 4)),
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 3)),
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 2)),
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized512(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector512<T> beg = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
+                                                           Vector512.LoadUnsafe(ref yRef),
+                                                           Vector512.LoadUnsafe(ref zRef));
+                Vector512<T> end = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
+                                                           Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count),
+                                                           Vector512.LoadUnsafe(ref zRef, remainder - (uint)Vector512<T>.Count));
+
+                if (remainder > (uint)(Vector512<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* py = &yRef)
+                    fixed (T* pz = &zRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* yPtr = py;
+                        T* zPtr = pz;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            yPtr += misalignment;
+                            zPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector512<T> vector1;
+                        Vector512<T> vector2;
+                        Vector512<T> vector3;
+                        Vector512<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector512<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 0)));
+                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 1)));
+                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 2)));
+                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 4)));
+                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 5)));
+                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 6)));
+                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<T>.Count * 8);
+                                yPtr += (uint)(Vector512<T>.Count * 8);
+                                zPtr += (uint)(Vector512<T>.Count * 8);
+                                dPtr += (uint)(Vector512<T>.Count * 8);
+
+                                remainder -= (uint)(Vector512<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector512<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 0)));
+                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 1)));
+                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 2)));
+                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 4)));
+                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 5)));
+                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 6)));
+                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<T>.Count * 8);
+                                yPtr += (uint)(Vector512<T>.Count * 8);
+                                zPtr += (uint)(Vector512<T>.Count * 8);
+                                dPtr += (uint)(Vector512<T>.Count * 8);
+
+                                remainder -= (uint)(Vector512<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        yRef = ref *yPtr;
+                        zRef = ref *zPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
+
+                switch (remainder / (uint)Vector512<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 8)),
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)),
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)),
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)),
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)),
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)),
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)),
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
+            {
+                if (sizeof(T) == 1)
+                {
+                    VectorizedSmall1(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
+                }
+                else if (sizeof(T) == 2)
+                {
+                    VectorizedSmall2(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
+                }
+                else if (sizeof(T) == 4)
+                {
+                    VectorizedSmall4(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
+                }
+                else
+                {
+                    Debug.Assert(sizeof(T) == 8);
+                    VectorizedSmall8(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall1(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 1);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 63:
+                    case 62:
+                    case 61:
+                    case 60:
+                    case 59:
+                    case 58:
+                    case 57:
+                    case 56:
+                    case 55:
+                    case 54:
+                    case 53:
+                    case 52:
+                    case 51:
+                    case 50:
+                    case 49:
+                    case 48:
+                    case 47:
+                    case 46:
+                    case 45:
+                    case 44:
+                    case 43:
+                    case 42:
+                    case 41:
+                    case 40:
+                    case 39:
+                    case 38:
+                    case 37:
+                    case 36:
+                    case 35:
+                    case 34:
+                    case 33:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                       Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
+                                                                       Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 32:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                           Vector256.LoadUnsafe(ref yRef),
+                                                                           Vector256.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                           Vector128.LoadUnsafe(ref yRef),
+                                                                           Vector128.LoadUnsafe(ref zRef));
+                            Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                           Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
+                                                                           Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                           Vector128.LoadUnsafe(ref yRef),
+                                                                           Vector128.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 15:
+                        Unsafe.Add(ref dRef, 14) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 14),
+                                                                          Unsafe.Add(ref yRef, 14),
+                                                                          Unsafe.Add(ref zRef, 14));
+                        goto case 14;
+
+                    case 14:
+                        Unsafe.Add(ref dRef, 13) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 13),
+                                                                          Unsafe.Add(ref yRef, 13),
+                                                                          Unsafe.Add(ref zRef, 13));
+                        goto case 13;
+
+                    case 13:
+                        Unsafe.Add(ref dRef, 12) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 12),
+                                                                          Unsafe.Add(ref yRef, 12),
+                                                                          Unsafe.Add(ref zRef, 12));
+                        goto case 12;
+
+                    case 12:
+                        Unsafe.Add(ref dRef, 11) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 11),
+                                                                          Unsafe.Add(ref yRef, 11),
+                                                                          Unsafe.Add(ref zRef, 11));
+                        goto case 11;
+
+                    case 11:
+                        Unsafe.Add(ref dRef, 10) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 10),
+                                                                          Unsafe.Add(ref yRef, 10),
+                                                                          Unsafe.Add(ref zRef, 10));
+                        goto case 10;
+
+                    case 10:
+                        Unsafe.Add(ref dRef, 9) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 9),
+                                                                          Unsafe.Add(ref yRef, 9),
+                                                                          Unsafe.Add(ref zRef, 9));
+                        goto case 9;
+
+                    case 9:
+                        Unsafe.Add(ref dRef, 8) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 8),
+                                                                          Unsafe.Add(ref yRef, 8),
+                                                                          Unsafe.Add(ref zRef, 8));
+                        goto case 8;
+
+                    case 8:
+                        Unsafe.Add(ref dRef, 7) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 7),
+                                                                          Unsafe.Add(ref yRef, 7),
+                                                                          Unsafe.Add(ref zRef, 7));
+                        goto case 7;
+
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
+                                                                          Unsafe.Add(ref yRef, 6),
+                                                                          Unsafe.Add(ref zRef, 6));
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
+                                                                          Unsafe.Add(ref yRef, 5),
+                                                                          Unsafe.Add(ref zRef, 5));
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
+                                                                          Unsafe.Add(ref yRef, 4),
+                                                                          Unsafe.Add(ref zRef, 4));
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
+                                                                          Unsafe.Add(ref yRef, 3),
+                                                                          Unsafe.Add(ref zRef, 3));
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                          Unsafe.Add(ref yRef, 2),
+                                                                          Unsafe.Add(ref zRef, 2));
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                          Unsafe.Add(ref yRef, 1),
+                                                                          Unsafe.Add(ref zRef, 1));
+                        goto case 1;
+
+                    case 1:
+                        dRef = TTernaryOperator.Invoke(xRef, yRef, zRef);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall2(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 2);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                       Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
+                                                                       Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                           Vector256.LoadUnsafe(ref yRef),
+                                                                           Vector256.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                           Vector128.LoadUnsafe(ref yRef),
+                                                                           Vector128.LoadUnsafe(ref zRef));
+                            Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                           Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
+                                                                           Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 8:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                           Vector128.LoadUnsafe(ref yRef),
+                                                                           Vector128.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
+                                                                          Unsafe.Add(ref yRef, 6),
+                                                                          Unsafe.Add(ref zRef, 6));
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
+                                                                          Unsafe.Add(ref yRef, 5),
+                                                                          Unsafe.Add(ref zRef, 5));
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
+                                                                          Unsafe.Add(ref yRef, 4),
+                                                                          Unsafe.Add(ref zRef, 4));
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
+                                                                          Unsafe.Add(ref yRef, 3),
+                                                                          Unsafe.Add(ref zRef, 3));
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                          Unsafe.Add(ref yRef, 2),
+                                                                          Unsafe.Add(ref zRef, 2));
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                          Unsafe.Add(ref yRef, 1),
+                                                                          Unsafe.Add(ref zRef, 1));
+                        goto case 1;
+
+                    case 1:
+                        dRef = TTernaryOperator.Invoke(xRef, yRef, zRef);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall4(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 4);
+
+                switch (remainder)
+                {
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                       Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
+                                                                       Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    case 8:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                           Vector256.LoadUnsafe(ref yRef),
+                                                                           Vector256.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                           Vector128.LoadUnsafe(ref yRef),
+                                                                           Vector128.LoadUnsafe(ref zRef));
+                            Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                           Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
+                                                                           Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                           Vector128.LoadUnsafe(ref yRef),
+                                                                           Vector128.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                              Unsafe.Add(ref yRef, 2),
+                                                                              Unsafe.Add(ref zRef, 2));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                              Unsafe.Add(ref yRef, 1),
+                                                                              Unsafe.Add(ref zRef, 1));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            dRef = TTernaryOperator.Invoke(xRef, yRef, zRef);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall8(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 8);
+
+                switch (remainder)
+                {
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                       Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
+                                                                       Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.LoadUnsafe(ref yRef),
+                                                                       Vector128.LoadUnsafe(ref zRef));
+                            Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                       Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
+                                                                       Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    case 2:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.LoadUnsafe(ref yRef),
+                                                                       Vector128.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 1:
+                        {
+                            dRef = TTernaryOperator.Invoke(xRef, yRef, zRef);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+        }
+
+        /// <summary>
+        /// Performs an element-wise operation on <paramref name="x"/>, <paramref name="y"/>, and <paramref name="z"/>,
+        /// and writes the results to <paramref name="destination"/>.
+        /// </summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TTernaryOperator">
+        /// Specifies the operation to perform on the pair-wise elements loaded from <paramref name="x"/> and <paramref name="y"/>
+        /// with <paramref name="z"/>.
+        /// </typeparam>
+        private static void InvokeSpanSpanScalarIntoSpan<T, TTernaryOperator>(
+            ReadOnlySpan<T> x, ReadOnlySpan<T> y, T z, Span<T> destination)
+            where TTernaryOperator : struct, ITernaryOperator<T>
+        {
+            if (x.Length != y.Length)
+            {
+                ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
+            }
+
+            if (x.Length > destination.Length)
+            {
+                ThrowHelper.ThrowArgument_DestinationTooShort();
+            }
+
+            ValidateInputOutputSpanNonOverlapping(x, destination);
+            ValidateInputOutputSpanNonOverlapping(y, destination);
+
+            // Since every branch has a cost and since that cost is
+            // essentially lost for larger inputs, we do branches
+            // in a way that allows us to have the minimum possible
+            // for small sizes
+
+            ref T xRef = ref MemoryMarshal.GetReference(x);
+            ref T yRef = ref MemoryMarshal.GetReference(y);
+            ref T dRef = ref MemoryMarshal.GetReference(destination);
+
+            nuint remainder = (uint)x.Length;
+
+            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported)
+            {
+                if (remainder >= (uint)Vector512<T>.Count)
+                {
+                    Vectorized512(ref xRef, ref yRef, z, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref yRef, z, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported)
+            {
+                if (remainder >= (uint)Vector256<T>.Count)
+                {
+                    Vectorized256(ref xRef, ref yRef, z, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref yRef, z, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported)
+            {
+                if (remainder >= (uint)Vector128<T>.Count)
+                {
+                    Vectorized128(ref xRef, ref yRef, z, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref yRef, z, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            // This is the software fallback when no acceleration is available
+            // It requires no branches to hit
+
+            SoftwareFallback(ref xRef, ref yRef, z, ref dRef, remainder);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void SoftwareFallback(ref T xRef, ref T yRef, T z, ref T dRef, nuint length)
+            {
+                for (nuint i = 0; i < length; i++)
+                {
+                    Unsafe.Add(ref dRef, i) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, i),
+                                                                      Unsafe.Add(ref yRef, i),
+                                                                      z);
+                }
+            }
+
+            static void Vectorized128(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector128<T> zVec = Vector128.Create(z);
+
+                Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                               Vector128.LoadUnsafe(ref yRef),
+                                                               zVec);
+                Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                               Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
+                                                               zVec);
+
+                if (remainder > (uint)(Vector128<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* py = &yRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* yPtr = py;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            yPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector128<T> vector1;
+                        Vector128<T> vector2;
+                        Vector128<T> vector3;
+                        Vector128<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector128<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)),
+                                                                  zVec);
+                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)),
+                                                                  zVec);
+                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)),
+                                                                  zVec);
+                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)),
+                                                                  zVec);
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)),
+                                                                  zVec);
+                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)),
+                                                                  zVec);
+                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)),
+                                                                  zVec);
+                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)),
+                                                                  zVec);
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<T>.Count * 8);
+                                yPtr += (uint)(Vector128<T>.Count * 8);
+                                dPtr += (uint)(Vector128<T>.Count * 8);
+
+                                remainder -= (uint)(Vector128<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector128<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)),
+                                                                  zVec);
+                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)),
+                                                                  zVec);
+                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)),
+                                                                  zVec);
+                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)),
+                                                                  zVec);
+
+                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)),
+                                                                  zVec);
+                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)),
+                                                                  zVec);
+                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)),
+                                                                  zVec);
+                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
+                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)),
+                                                                  zVec);
+
+                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<T>.Count * 8);
+                                yPtr += (uint)(Vector128<T>.Count * 8);
+                                dPtr += (uint)(Vector128<T>.Count * 8);
+
+                                remainder -= (uint)(Vector128<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        yRef = ref *yPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
+
+                switch (remainder / (uint)Vector128<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 8)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 7)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 6)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 5)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 4)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 3)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
+                                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 2)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized256(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector256<T> zVec = Vector256.Create(z);
+
+                Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                               Vector256.LoadUnsafe(ref yRef),
+                                                               zVec);
+                Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                               Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
+                                                               zVec);
+
+                if (remainder > (uint)(Vector256<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* py = &yRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* yPtr = py;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)dPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            yPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector256<T> vector1;
+                        Vector256<T> vector2;
+                        Vector256<T> vector3;
+                        Vector256<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector256<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)),
+                                                                  zVec);
+                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)),
+                                                                  zVec);
+                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)),
+                                                                  zVec);
+                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)),
+                                                                  zVec);
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)),
+                                                                  zVec);
+                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)),
+                                                                  zVec);
+                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)),
+                                                                  zVec);
+                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)),
+                                                                  zVec);
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<T>.Count * 8);
+                                yPtr += (uint)(Vector256<T>.Count * 8);
+                                dPtr += (uint)(Vector256<T>.Count * 8);
+
+                                remainder -= (uint)(Vector256<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector256<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)),
+                                                                  zVec);
+                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)),
+                                                                  zVec);
+                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)),
+                                                                  zVec);
+                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)),
+                                                                  zVec);
+
+                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)),
+                                                                  zVec);
+                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)),
+                                                                  zVec);
+                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)),
+                                                                  zVec);
+                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
+                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)),
+                                                                  zVec);
+
+                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<T>.Count * 8);
+                                yPtr += (uint)(Vector256<T>.Count * 8);
+                                dPtr += (uint)(Vector256<T>.Count * 8);
+
+                                remainder -= (uint)(Vector256<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        yRef = ref *yPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector256<T>.Count - 1)) & (nuint)(-Vector256<T>.Count);
+
+                switch (remainder / (uint)Vector256<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 8)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 8)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 7)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 6)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 5)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 4)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 3)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)),
+                                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 2)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized512(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector512<T> zVec = Vector512.Create(z);
+
+                Vector512<T> beg = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
+                                                           Vector512.LoadUnsafe(ref yRef),
+                                                           zVec);
+                Vector512<T> end = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
+                                                           Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count),
+                                                           zVec);
+
+                if (remainder > (uint)(Vector512<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* py = &yRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* yPtr = py;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            yPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector512<T> vector1;
+                        Vector512<T> vector2;
+                        Vector512<T> vector3;
+                        Vector512<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector512<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
+                                                                  zVec);
+                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
+                                                                  zVec);
+                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
+                                                                  zVec);
+                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
+                                                                  zVec);
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
+                                                                  zVec);
+                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
+                                                                  zVec);
+                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
+                                                                  zVec);
+                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
+                                                                  zVec);
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<T>.Count * 8);
+                                yPtr += (uint)(Vector512<T>.Count * 8);
+                                dPtr += (uint)(Vector512<T>.Count * 8);
+
+                                remainder -= (uint)(Vector512<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector512<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
+                                                                  zVec);
+                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
+                                                                  zVec);
+                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
+                                                                  zVec);
+                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
+                                                                  zVec);
+
+                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
+                                                                  zVec);
+                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
+                                                                  zVec);
+                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
+                                                                  zVec);
+                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
+                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
+                                                                  zVec);
+
+                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<T>.Count * 8);
+                                yPtr += (uint)(Vector512<T>.Count * 8);
+                                dPtr += (uint)(Vector512<T>.Count * 8);
+
+                                remainder -= (uint)(Vector512<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        yRef = ref *yPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
+
+                switch (remainder / (uint)Vector512<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 8)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
+                                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)),
+                                                                          zVec);
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
+            {
+                if (sizeof(T) == 1)
+                {
+                    VectorizedSmall1(ref xRef, ref yRef, z, ref dRef, remainder);
+                }
+                else if (sizeof(T) == 2)
+                {
+                    VectorizedSmall2(ref xRef, ref yRef, z, ref dRef, remainder);
+                }
+                else if (sizeof(T) == 4)
+                {
+                    VectorizedSmall4(ref xRef, ref yRef, z, ref dRef, remainder);
+                }
+                else
+                {
+                    Debug.Assert(sizeof(T) == 8);
+                    VectorizedSmall8(ref xRef, ref yRef, z, ref dRef, remainder);
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall1(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 1);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 63:
+                    case 62:
+                    case 61:
+                    case 60:
+                    case 59:
+                    case 58:
+                    case 57:
+                    case 56:
+                    case 55:
+                    case 54:
+                    case 53:
+                    case 52:
+                    case 51:
+                    case 50:
+                    case 49:
+                    case 48:
+                    case 47:
+                    case 46:
+                    case 45:
+                    case 44:
+                    case 43:
+                    case 42:
+                    case 41:
+                    case 40:
+                    case 39:
+                    case 38:
+                    case 37:
+                    case 36:
+                    case 35:
+                    case 34:
+                    case 33:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> zVec = Vector256.Create(z);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       zVec);
+                            Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                       Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
+                                                                       zVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 32:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       Vector256.Create(z));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> zVec = Vector128.Create(z);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.LoadUnsafe(ref yRef),
+                                                                       zVec);
+                            Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                       Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
+                                                                       zVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.LoadUnsafe(ref yRef),
+                                                                       Vector128.Create(z));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 15:
+                        Unsafe.Add(ref dRef, 14) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 14),
+                                                                          Unsafe.Add(ref yRef, 14),
+                                                                          z);
+                        goto case 14;
+
+                    case 14:
+                        Unsafe.Add(ref dRef, 13) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 13),
+                                                                          Unsafe.Add(ref yRef, 13),
+                                                                          z);
+                        goto case 13;
+
+                    case 13:
+                        Unsafe.Add(ref dRef, 12) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 12),
+                                                                          Unsafe.Add(ref yRef, 12),
+                                                                          z);
+                        goto case 12;
+
+                    case 12:
+                        Unsafe.Add(ref dRef, 11) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 11),
+                                                                          Unsafe.Add(ref yRef, 11),
+                                                                          z);
+                        goto case 11;
+
+                    case 11:
+                        Unsafe.Add(ref dRef, 10) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 10),
+                                                                          Unsafe.Add(ref yRef, 10),
+                                                                          z);
+                        goto case 10;
+
+                    case 10:
+                        Unsafe.Add(ref dRef, 9) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 9),
+                                                                          Unsafe.Add(ref yRef, 9),
+                                                                          z);
+                        goto case 9;
+
+                    case 9:
+                        Unsafe.Add(ref dRef, 8) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 8),
+                                                                          Unsafe.Add(ref yRef, 8),
+                                                                          z);
+                        goto case 8;
+
+                    case 8:
+                        Unsafe.Add(ref dRef, 7) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 7),
+                                                                          Unsafe.Add(ref yRef, 7),
+                                                                          z);
+                        goto case 7;
+
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
+                                                                          Unsafe.Add(ref yRef, 6),
+                                                                          z);
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
+                                                                          Unsafe.Add(ref yRef, 5),
+                                                                          z);
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
+                                                                          Unsafe.Add(ref yRef, 4),
+                                                                          z);
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
+                                                                          Unsafe.Add(ref yRef, 3),
+                                                                          z);
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                          Unsafe.Add(ref yRef, 2),
+                                                                          z);
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                          Unsafe.Add(ref yRef, 1),
+                                                                          z);
+                        goto case 1;
+
+                    case 1:
+                        dRef = TTernaryOperator.Invoke(xRef, yRef, z);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall2(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 2);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> zVec = Vector256.Create(z);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       zVec);
+                            Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                       Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
+                                                                       zVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       Vector256.Create(z));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> zVec = Vector128.Create(z);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.LoadUnsafe(ref yRef),
+                                                                       zVec);
+                            Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                       Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
+                                                                       zVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 8:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.LoadUnsafe(ref yRef),
+                                                                       Vector128.Create(z));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
+                                                                          Unsafe.Add(ref yRef, 6),
+                                                                          z);
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
+                                                                          Unsafe.Add(ref yRef, 5),
+                                                                          z);
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
+                                                                          Unsafe.Add(ref yRef, 4),
+                                                                          z);
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
+                                                                          Unsafe.Add(ref yRef, 3),
+                                                                          z);
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                          Unsafe.Add(ref yRef, 2),
+                                                                          z);
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                          Unsafe.Add(ref yRef, 1),
+                                                                          z);
+                        goto case 1;
+
+                    case 1:
+                        dRef = TTernaryOperator.Invoke(xRef, yRef, z);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall4(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 4);
+
+                switch (remainder)
+                {
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> zVec = Vector256.Create(z);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       zVec);
+                            Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                       Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
+                                                                       zVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    case 8:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       Vector256.Create(z));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> zVec = Vector128.Create(z);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.LoadUnsafe(ref yRef),
+                                                                       zVec);
+                            Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                       Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
+                                                                       zVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.LoadUnsafe(ref yRef),
+                                                                       Vector128.Create(z));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                              Unsafe.Add(ref yRef, 2),
+                                                                              z);
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                              Unsafe.Add(ref yRef, 1),
+                                                                              z);
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            dRef = TTernaryOperator.Invoke(xRef, yRef, z);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall8(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 8);
+
+                switch (remainder)
+                {
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> zVec = Vector256.Create(z);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       zVec);
+                            Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                       Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
+                                                                       zVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.LoadUnsafe(ref yRef),
+                                                                       Vector256.Create(z));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> zVec = Vector128.Create(z);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.LoadUnsafe(ref yRef),
+                                                                       zVec);
+                            Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                       Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
+                                                                       zVec);
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    case 2:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.LoadUnsafe(ref yRef),
+                                                                       Vector128.Create(z));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 1:
+                        {
+                            dRef = TTernaryOperator.Invoke(xRef, yRef, z);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+        }
+
+        /// <summary>
+        /// Performs an element-wise operation on <paramref name="x"/>, <paramref name="y"/>, and <paramref name="z"/>,
+        /// and writes the results to <paramref name="destination"/>.
+        /// </summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TTernaryOperator">
+        /// Specifies the operation to perform on the pair-wise element loaded from <paramref name="x"/>, with <paramref name="y"/>,
+        /// and the element loaded from <paramref name="z"/>.
+        /// </typeparam>
+        private static void InvokeSpanScalarSpanIntoSpan<T, TTernaryOperator>(
+            ReadOnlySpan<T> x, T y, ReadOnlySpan<T> z, Span<T> destination)
+            where TTernaryOperator : struct, ITernaryOperator<T>
+        {
+            if (x.Length != z.Length)
+            {
+                ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
+            }
+
+            if (x.Length > destination.Length)
+            {
+                ThrowHelper.ThrowArgument_DestinationTooShort();
+            }
+
+            ValidateInputOutputSpanNonOverlapping(x, destination);
+            ValidateInputOutputSpanNonOverlapping(z, destination);
+
+            // Since every branch has a cost and since that cost is
+            // essentially lost for larger inputs, we do branches
+            // in a way that allows us to have the minimum possible
+            // for small sizes
+
+            ref T xRef = ref MemoryMarshal.GetReference(x);
+            ref T zRef = ref MemoryMarshal.GetReference(z);
+            ref T dRef = ref MemoryMarshal.GetReference(destination);
+
+            nuint remainder = (uint)x.Length;
+
+            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported)
+            {
+                if (remainder >= (uint)Vector512<T>.Count)
+                {
+                    Vectorized512(ref xRef, y, ref zRef, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, y, ref zRef, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported)
+            {
+                if (remainder >= (uint)Vector256<T>.Count)
+                {
+                    Vectorized256(ref xRef, y, ref zRef, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, y, ref zRef, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported)
+            {
+                if (remainder >= (uint)Vector128<T>.Count)
+                {
+                    Vectorized128(ref xRef, y, ref zRef, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, y, ref zRef, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            // This is the software fallback when no acceleration is available
+            // It requires no branches to hit
+
+            SoftwareFallback(ref xRef, y, ref zRef, ref dRef, remainder);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void SoftwareFallback(ref T xRef, T y, ref T zRef, ref T dRef, nuint length)
+            {
+                for (nuint i = 0; i < length; i++)
+                {
+                    Unsafe.Add(ref dRef, i) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, i),
+                                                                      y,
+                                                                      Unsafe.Add(ref zRef, i));
+                }
+            }
+
+            static void Vectorized128(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector128<T> yVec = Vector128.Create(y);
+
+                Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                               yVec,
+                                                               Vector128.LoadUnsafe(ref zRef));
+                Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                               yVec,
+                                                               Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
+
+                if (remainder > (uint)(Vector128<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* pz = &zRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* zPtr = pz;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            zPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector128<T> vector1;
+                        Vector128<T> vector2;
+                        Vector128<T> vector3;
+                        Vector128<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector128<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 0)));
+                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 1)));
+                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 2)));
+                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 4)));
+                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 5)));
+                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 6)));
+                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<T>.Count * 8);
+                                zPtr += (uint)(Vector128<T>.Count * 8);
+                                dPtr += (uint)(Vector128<T>.Count * 8);
+
+                                remainder -= (uint)(Vector128<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector128<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 0)));
+                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 1)));
+                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 2)));
+                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 4)));
+                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 5)));
+                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 6)));
+                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
+                                                                  yVec,
+                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<T>.Count * 8);
+                                zPtr += (uint)(Vector128<T>.Count * 8);
+                                dPtr += (uint)(Vector128<T>.Count * 8);
+
+                                remainder -= (uint)(Vector128<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        zRef = ref *zPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
+
+                switch (remainder / (uint)Vector128<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)),
+                                                                          yVec,
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
+                                                                          yVec,
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
+                                                                          yVec,
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
+                                                                          yVec,
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
+                                                                          yVec,
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
+                                                                          yVec,
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
+                                                                          yVec,
+                                                                          Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized256(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector256<T> yVec = Vector256.Create(y);
+
+                Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                               yVec,
+                                                               Vector256.LoadUnsafe(ref zRef));
+                Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                               yVec,
+                                                               Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
+
+                if (remainder > (uint)(Vector256<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* pz = &zRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* zPtr = pz;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)dPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            zPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector256<T> vector1;
+                        Vector256<T> vector2;
+                        Vector256<T> vector3;
+                        Vector256<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector256<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 0)));
+                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 1)));
+                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 2)));
+                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 4)));
+                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 5)));
+                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 6)));
+                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<T>.Count * 8);
+                                zPtr += (uint)(Vector256<T>.Count * 8);
+                                dPtr += (uint)(Vector256<T>.Count * 8);
+
+                                remainder -= (uint)(Vector256<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector256<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 0)));
+                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 1)));
+                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 2)));
+                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 4)));
+                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 5)));
+                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 6)));
+                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
+                                                                  yVec,
+                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<T>.Count * 8);
+                                zPtr += (uint)(Vector256<T>.Count * 8);
+                                dPtr += (uint)(Vector256<T>.Count * 8);
+
+                                remainder -= (uint)(Vector256<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        zRef = ref *zPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector256<T>.Count - 1)) & (nuint)(-Vector256<T>.Count);
+
+                switch (remainder / (uint)Vector256<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 8)),
+                                                                          yVec,
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)),
+                                                                          yVec,
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)),
+                                                                          yVec,
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)),
+                                                                          yVec,
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)),
+                                                                          yVec,
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)),
+                                                                          yVec,
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)),
+                                                                          yVec,
+                                                                          Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized512(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
+            {
+                ref T dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector512<T> yVec = Vector512.Create(y);
+
+                Vector512<T> beg = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
+                                                               yVec,
+                                                               Vector512.LoadUnsafe(ref zRef));
+                Vector512<T> end = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
+                                                               yVec,
+                                                               Vector512.LoadUnsafe(ref zRef, remainder - (uint)Vector512<T>.Count));
+
+                if (remainder > (uint)(Vector512<T>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (T* px = &xRef)
+                    fixed (T* pz = &zRef)
+                    fixed (T* pd = &dRef)
+                    {
+                        T* xPtr = px;
+                        T* zPtr = pz;
+                        T* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
+
+                            xPtr += misalignment;
+                            zPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector512<T> vector1;
+                        Vector512<T> vector2;
+                        Vector512<T> vector3;
+                        Vector512<T> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector512<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 0)));
+                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 1)));
+                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 2)));
+                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 4)));
+                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 5)));
+                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 6)));
+                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<T>.Count * 8);
+                                zPtr += (uint)(Vector512<T>.Count * 8);
+                                dPtr += (uint)(Vector512<T>.Count * 8);
+
+                                remainder -= (uint)(Vector512<T>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector512<T>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 0)));
+                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 1)));
+                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 2)));
+                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 4)));
+                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 5)));
+                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 6)));
+                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
+                                                                  yVec,
+                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<T>.Count * 8);
+                                zPtr += (uint)(Vector512<T>.Count * 8);
+                                dPtr += (uint)(Vector512<T>.Count * 8);
+
+                                remainder -= (uint)(Vector512<T>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        zRef = ref *zPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
+
+                switch (remainder / (uint)Vector512<T>.Count)
+                {
+                    case 8:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
+                                                                          yVec,
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
+                                                                          yVec,
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
+                                                                          yVec,
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
+                                                                          yVec,
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
+                                                                          yVec,
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
+                                                                          yVec,
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
+                                                                          yVec,
+                                                                          Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
+            {
+                if (sizeof(T) == 1)
+                {
+                    VectorizedSmall1(ref xRef, y, ref zRef, ref dRef, remainder);
+                }
+                else if (sizeof(T) == 2)
+                {
+                    VectorizedSmall2(ref xRef, y, ref zRef, ref dRef, remainder);
+                }
+                else if (sizeof(T) == 4)
+                {
+                    VectorizedSmall4(ref xRef, y, ref zRef, ref dRef, remainder);
+                }
+                else
+                {
+                    Debug.Assert(sizeof(T) == 8);
+                    VectorizedSmall8(ref xRef, y, ref zRef, ref dRef, remainder);
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall1(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 1);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 63:
+                    case 62:
+                    case 61:
+                    case 60:
+                    case 59:
+                    case 58:
+                    case 57:
+                    case 56:
+                    case 55:
+                    case 54:
+                    case 53:
+                    case 52:
+                    case 51:
+                    case 50:
+                    case 49:
+                    case 48:
+                    case 47:
+                    case 46:
+                    case 45:
+                    case 44:
+                    case 43:
+                    case 42:
+                    case 41:
+                    case 40:
+                    case 39:
+                    case 38:
+                    case 37:
+                    case 36:
+                    case 35:
+                    case 34:
+                    case 33:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> yVec = Vector256.Create(y);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       yVec,
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                       yVec,
+                                                                       Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 32:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.Create(y),
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> yVec = Vector128.Create(y);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       yVec,
+                                                                       Vector128.LoadUnsafe(ref zRef));
+                            Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                       yVec,
+                                                                       Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.Create(y),
+                                                                       Vector128.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 15:
+                        Unsafe.Add(ref dRef, 14) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 14),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 14));
+                        goto case 14;
+
+                    case 14:
+                        Unsafe.Add(ref dRef, 13) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 13),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 13));
+                        goto case 13;
+
+                    case 13:
+                        Unsafe.Add(ref dRef, 12) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 12),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 12));
+                        goto case 12;
+
+                    case 12:
+                        Unsafe.Add(ref dRef, 11) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 11),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 11));
+                        goto case 11;
+
+                    case 11:
+                        Unsafe.Add(ref dRef, 10) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 10),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 10));
+                        goto case 10;
+
+                    case 10:
+                        Unsafe.Add(ref dRef, 9) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 9),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 9));
+                        goto case 9;
+
+                    case 9:
+                        Unsafe.Add(ref dRef, 8) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 8),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 8));
+                        goto case 8;
+
+                    case 8:
+                        Unsafe.Add(ref dRef, 7) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 7),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 7));
+                        goto case 7;
+
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 6));
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 5));
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 4));
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 3));
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 2));
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 1));
+                        goto case 1;
+
+                    case 1:
+                        dRef = TTernaryOperator.Invoke(xRef, y, zRef);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall2(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 2);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> yVec = Vector256.Create(y);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       yVec,
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                       yVec,
+                                                                       Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.Create(y),
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> yVec = Vector128.Create(y);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       yVec,
+                                                                       Vector128.LoadUnsafe(ref zRef));
+                            Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                       yVec,
+                                                                       Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 8:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.Create(y),
+                                                                       Vector128.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 6));
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 5));
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 4));
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 3));
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 2));
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                          y,
+                                                                          Unsafe.Add(ref zRef, 1));
+                        goto case 1;
+
+                    case 1:
+                        dRef = TTernaryOperator.Invoke(xRef, y, zRef);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall4(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 4);
+
+                switch (remainder)
+                {
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> yVec = Vector256.Create(y);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       yVec,
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                       yVec,
+                                                                       Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    case 8:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.Create(y),
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> yVec = Vector128.Create(y);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       yVec,
+                                                                       Vector128.LoadUnsafe(ref zRef));
+                            Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                       yVec,
+                                                                       Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.Create(y),
+                                                                       Vector128.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
+                                                                              y,
+                                                                              Unsafe.Add(ref zRef, 2));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
+                                                                              y,
+                                                                              Unsafe.Add(ref zRef, 1));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            dRef = TTernaryOperator.Invoke(xRef, y, zRef);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall8(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(T) == 8);
+
+                switch (remainder)
+                {
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> yVec = Vector256.Create(y);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       yVec,
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
+                                                                       yVec,
+                                                                       Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
+                                                                       Vector256.Create(y),
+                                                                       Vector256.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> yVec = Vector128.Create(y);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       yVec,
+                                                                       Vector128.LoadUnsafe(ref zRef));
+                            Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
+                                                                       yVec,
+                                                                       Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
+
+                            break;
+                        }
+
+                    case 2:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
+                                                                       Vector128.Create(y),
+                                                                       Vector128.LoadUnsafe(ref zRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 1:
+                        {
+                            dRef = TTernaryOperator.Invoke(xRef, y, zRef);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IUnaryInputBinaryOutput.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IUnaryInputBinaryOutput.cs
new file mode 100644
index 000000000000..f84ccad005af
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IUnaryInputBinaryOutput.cs
@@ -0,0 +1,146 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static unsafe partial class TensorPrimitives
+    {
+        /// <summary>Operator that takes one input value and returns two output values.</summary>
+        private interface IUnaryInputBinaryOutput<T>
+        {
+            static abstract bool Vectorizable { get; }
+            static abstract (T, T) Invoke(T x);
+            static abstract (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x);
+            static abstract (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x);
+            static abstract (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x);
+        }
+
+        /// <summary>Performs an element-wise operation on <paramref name="x"/> and writes the results to <paramref name="destination1"/> and <paramref name="destination2"/>.</summary>
+        /// <typeparam name="T">The element type.</typeparam>
+        /// <typeparam name="TUnaryOperator">Specifies the operation to perform on each element loaded from <paramref name="x"/>.</typeparam>
+        private static void InvokeSpanIntoSpan_TwoOutputs<T, TUnaryOperator>(
+            ReadOnlySpan<T> x, Span<T> destination1, Span<T> destination2)
+            where TUnaryOperator : struct, IUnaryInputBinaryOutput<T>
+        {
+            if (x.Length > destination1.Length)
+            {
+                ThrowHelper.ThrowArgument_DestinationTooShort(nameof(destination1));
+            }
+
+            if (x.Length > destination2.Length)
+            {
+                ThrowHelper.ThrowArgument_DestinationTooShort(nameof(destination2));
+            }
+
+            ValidateInputOutputSpanNonOverlapping(x, destination1);
+            ValidateInputOutputSpanNonOverlapping(x, destination2);
+
+            ref T sourceRef = ref MemoryMarshal.GetReference(x);
+            ref T destination1Ref = ref MemoryMarshal.GetReference(destination1);
+            ref T destination2Ref = ref MemoryMarshal.GetReference(destination2);
+            int i = 0, oneVectorFromEnd;
+
+            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TUnaryOperator.Vectorizable)
+            {
+                oneVectorFromEnd = x.Length - Vector512<T>.Count;
+                if (i <= oneVectorFromEnd)
+                {
+                    // Loop handling one input vector / two destination vectors at a time.
+                    do
+                    {
+                        (Vector512<T> first, Vector512<T> second) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
+                        first.StoreUnsafe(ref destination1Ref, (uint)i);
+                        second.StoreUnsafe(ref destination2Ref, (uint)i);
+
+                        i += Vector512<T>.Count;
+                    }
+                    while (i <= oneVectorFromEnd);
+
+                    // Handle any remaining elements with a final input vector.
+                    if (i != x.Length)
+                    {
+                        i = x.Length - Vector512<T>.Count;
+
+                        (Vector512<T> first, Vector512<T> second) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
+                        first.StoreUnsafe(ref destination1Ref, (uint)i);
+                        second.StoreUnsafe(ref destination2Ref, (uint)i);
+                    }
+
+                    return;
+                }
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && TUnaryOperator.Vectorizable)
+            {
+                oneVectorFromEnd = x.Length - Vector256<T>.Count;
+                if (i <= oneVectorFromEnd)
+                {
+                    // Loop handling one input vector / two destination vectors at a time.
+                    do
+                    {
+                        (Vector256<T> first, Vector256<T> second) = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref sourceRef, (uint)i));
+                        first.StoreUnsafe(ref destination1Ref, (uint)i);
+                        second.StoreUnsafe(ref destination2Ref, (uint)i);
+
+                        i += Vector256<T>.Count;
+                    }
+                    while (i <= oneVectorFromEnd);
+
+                    // Handle any remaining elements with a final input vector.
+                    if (i != x.Length)
+                    {
+                        i = x.Length - Vector256<T>.Count;
+
+                        (Vector256<T> first, Vector256<T> second) = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref sourceRef, (uint)i));
+                        first.StoreUnsafe(ref destination1Ref, (uint)i);
+                        second.StoreUnsafe(ref destination2Ref, (uint)i);
+                    }
+
+                    return;
+                }
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TUnaryOperator.Vectorizable)
+            {
+                oneVectorFromEnd = x.Length - Vector128<T>.Count;
+                if (i <= oneVectorFromEnd)
+                {
+                    // Loop handling one input vector / two destination vectors at a time.
+                    do
+                    {
+                        (Vector128<T> first, Vector128<T> second) = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i));
+                        first.StoreUnsafe(ref destination1Ref, (uint)i);
+                        second.StoreUnsafe(ref destination2Ref, (uint)i);
+
+                        i += Vector128<T>.Count;
+                    }
+                    while (i <= oneVectorFromEnd);
+
+                    // Handle any remaining elements with a final input vector.
+                    if (i != x.Length)
+                    {
+                        i = x.Length - Vector128<T>.Count;
+
+                        (Vector128<T> first, Vector128<T> second) = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i));
+                        first.StoreUnsafe(ref destination1Ref, (uint)i);
+                        second.StoreUnsafe(ref destination2Ref, (uint)i);
+                    }
+
+                    return;
+                }
+            }
+
+            while (i < x.Length)
+            {
+                (T first, T second) = TUnaryOperator.Invoke(Unsafe.Add(ref sourceRef, i));
+                Unsafe.Add(ref destination1Ref, i) = first;
+                Unsafe.Add(ref destination2Ref, i) = second;
+                i++;
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IUnaryOneToTwoOperator.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IUnaryOneToTwoOperator.cs
new file mode 100644
index 000000000000..d8d906a603ee
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IUnaryOneToTwoOperator.cs
@@ -0,0 +1,152 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static unsafe partial class TensorPrimitives
+    {
+        /// <summary>Operator that takes one input value and returns a single value.</summary>
+        /// <remarks>The input type must be half the size of the output type.</remarks>
+        private interface IUnaryOneToTwoOperator<TInput, TOutput>
+        {
+            static abstract bool Vectorizable { get; }
+            static abstract TOutput Invoke(TInput x);
+            static abstract (Vector128<TOutput> Lower, Vector128<TOutput> Upper) Invoke(Vector128<TInput> x);
+            static abstract (Vector256<TOutput> Lower, Vector256<TOutput> Upper) Invoke(Vector256<TInput> x);
+            static abstract (Vector512<TOutput> Lower, Vector512<TOutput> Upper) Invoke(Vector512<TInput> x);
+        }
+
+        /// <summary>Performs an element-wise operation on <paramref name="x"/> and writes the results to <paramref name="destination"/>.</summary>
+        /// <typeparam name="TInput">The element input type.</typeparam>
+        /// <typeparam name="TOutput">The element output type. Must be the same size as TInput if TInput and TOutput both support vectorization.</typeparam>
+        /// <typeparam name="TUnaryOperator">Specifies the operation to perform on each element loaded from <paramref name="x"/>.</typeparam>
+        /// <remarks>This should only be used when it's known that TInput/TOutput are vectorizable and the size of TInput is half that of TOutput.</remarks>
+        private static void InvokeSpanIntoSpan_1to2<TInput, TOutput, TUnaryOperator>(
+            ReadOnlySpan<TInput> x, Span<TOutput> destination)
+            where TUnaryOperator : struct, IUnaryOneToTwoOperator<TInput, TOutput>
+        {
+            Debug.Assert(sizeof(TInput) * 2 == sizeof(TOutput));
+
+            if (x.Length > destination.Length)
+            {
+                ThrowHelper.ThrowArgument_DestinationTooShort();
+            }
+
+            ref TInput sourceRef = ref MemoryMarshal.GetReference(x);
+            ref TOutput destinationRef = ref MemoryMarshal.GetReference(destination);
+            int i = 0, oneVectorFromEnd;
+
+            if (Vector512.IsHardwareAccelerated && TUnaryOperator.Vectorizable)
+            {
+                Debug.Assert(Vector512<TInput>.IsSupported);
+                Debug.Assert(Vector512<TOutput>.IsSupported);
+
+                oneVectorFromEnd = x.Length - Vector512<TInput>.Count;
+                if (i <= oneVectorFromEnd)
+                {
+                    // Loop handling one input vector / two output vectors at a time.
+                    do
+                    {
+                        (Vector512<TOutput> lower, Vector512<TOutput> upper) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
+                        lower.StoreUnsafe(ref destinationRef, (uint)i);
+                        upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector512<TOutput>.Count));
+
+                        i += Vector512<TInput>.Count;
+                    }
+                    while (i <= oneVectorFromEnd);
+
+                    // Handle any remaining elements with a final input vector.
+                    if (i != x.Length)
+                    {
+                        i = x.Length - Vector512<TInput>.Count;
+
+                        (Vector512<TOutput> lower, Vector512<TOutput> upper) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
+                        lower.StoreUnsafe(ref destinationRef, (uint)i);
+                        upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector512<TOutput>.Count));
+                    }
+
+                    return;
+                }
+            }
+
+            if (Vector256.IsHardwareAccelerated && TUnaryOperator.Vectorizable)
+            {
+                Debug.Assert(Vector256<TInput>.IsSupported);
+                Debug.Assert(Vector256<TOutput>.IsSupported);
+
+                oneVectorFromEnd = x.Length - Vector256<TInput>.Count;
+                if (i <= oneVectorFromEnd)
+                {
+                    // Loop handling one input vector / two output vectors at a time.
+                    do
+                    {
+                        (Vector256<TOutput> lower, Vector256<TOutput> upper) = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref sourceRef, (uint)i));
+                        lower.StoreUnsafe(ref destinationRef, (uint)i);
+                        upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector256<TOutput>.Count));
+
+                        i += Vector256<TInput>.Count;
+                    }
+                    while (i <= oneVectorFromEnd);
+
+                    // Handle any remaining elements with a final input vector.
+                    if (i != x.Length)
+                    {
+                        i = x.Length - Vector256<TInput>.Count;
+
+                        (Vector256<TOutput> lower, Vector256<TOutput> upper) = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref sourceRef, (uint)i));
+                        lower.StoreUnsafe(ref destinationRef, (uint)i);
+                        upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector256<TOutput>.Count));
+                    }
+
+                    return;
+                }
+            }
+
+            if (Vector128.IsHardwareAccelerated && TUnaryOperator.Vectorizable)
+            {
+                Debug.Assert(Vector128<TInput>.IsSupported);
+                Debug.Assert(Vector128<TOutput>.IsSupported);
+
+                oneVectorFromEnd = x.Length - Vector128<TInput>.Count;
+                if (i <= oneVectorFromEnd)
+                {
+                    // Loop handling one input vector / two output vectors at a time.
+                    do
+                    {
+                        (Vector128<TOutput> lower, Vector128<TOutput> upper) = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i));
+                        lower.StoreUnsafe(ref destinationRef, (uint)i);
+                        upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector128<TOutput>.Count));
+
+                        i += Vector128<TInput>.Count;
+                    }
+                    while (i <= oneVectorFromEnd);
+
+                    // Handle any remaining elements with a final input vector.
+                    if (i != x.Length)
+                    {
+                        i = x.Length - Vector128<TInput>.Count;
+
+                        (Vector128<TOutput> lower, Vector128<TOutput> upper) = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i));
+                        lower.StoreUnsafe(ref destinationRef, (uint)i);
+                        upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector128<TOutput>.Count));
+                    }
+
+                    return;
+                }
+            }
+
+            while (i < x.Length)
+            {
+                Unsafe.Add(ref destinationRef, i) = TUnaryOperator.Invoke(Unsafe.Add(ref sourceRef, i));
+                i++;
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IUnaryOperator.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IUnaryOperator.cs
new file mode 100644
index 000000000000..c2c22950c577
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IUnaryOperator.cs
@@ -0,0 +1,1251 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static unsafe partial class TensorPrimitives
+    {
+        /// <summary>Defines the threshold, in bytes, at which non-temporal stores will be used.</summary>
+        /// <remarks>
+        ///     A non-temporal store is one that allows the CPU to bypass the cache when writing to memory.
+        ///
+        ///     This can be beneficial when working with large amounts of memory where the writes would otherwise
+        ///     cause large amounts of repeated updates and evictions. The hardware optimization manuals recommend
+        ///     the threshold to be roughly half the size of the last level of on-die cache -- that is, if you have approximately
+        ///     4MB of L3 cache per core, you'd want this to be approx. 1-2MB, depending on if hyperthreading was enabled.
+        ///
+        ///     However, actually computing the amount of L3 cache per core can be tricky or error prone. Native memcpy
+        ///     algorithms use a constant threshold that is typically around 256KB and we match that here for simplicity. This
+        ///     threshold accounts for most processors in the last 10-15 years that had approx. 1MB L3 per core and support
+        ///     hyperthreading, giving a per core last level cache of approx. 512KB.
+        /// </remarks>
+        private const nuint NonTemporalByteThreshold = 256 * 1024;
+
+        /// <summary>Operator that takes one input value and returns a single value.</summary>
+        /// <remarks>The input and output type must be of the same size if vectorization is desired.</remarks>
+        internal interface IUnaryOperator<TInput, TOutput>
+        {
+            static abstract bool Vectorizable { get; }
+            static abstract TOutput Invoke(TInput x);
+            static abstract Vector128<TOutput> Invoke(Vector128<TInput> x);
+            static abstract Vector256<TOutput> Invoke(Vector256<TInput> x);
+            static abstract Vector512<TOutput> Invoke(Vector512<TInput> x);
+        }
+
+        /// <summary>x</summary>
+        internal readonly struct IdentityOperator<T> : IUnaryOperator<T, T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x) => x;
+            public static Vector128<T> Invoke(Vector128<T> x) => x;
+            public static Vector256<T> Invoke(Vector256<T> x) => x;
+            public static Vector512<T> Invoke(Vector512<T> x) => x;
+        }
+
+        /// <summary>Performs an element-wise operation on <paramref name="x"/> and writes the results to <paramref name="destination"/>.</summary>
+        /// <typeparam name="T">The element input type.</typeparam>
+        /// <typeparam name="TUnaryOperator">Specifies the operation to perform on each element loaded from <paramref name="x"/>.</typeparam>
+        private static void InvokeSpanIntoSpan<T, TUnaryOperator>(
+            ReadOnlySpan<T> x, Span<T> destination)
+            where TUnaryOperator : struct, IUnaryOperator<T, T> =>
+            InvokeSpanIntoSpan<T, T, TUnaryOperator>(x, destination);
+
+        /// <summary>Performs an element-wise operation on <paramref name="x"/> and writes the results to <paramref name="destination"/>.</summary>
+        /// <typeparam name="TInput">The element input type.</typeparam>
+        /// <typeparam name="TOutput">The element output type. Must be the same size as TInput if TInput and TOutput both support vectorization.</typeparam>
+        /// <typeparam name="TUnaryOperator">Specifies the operation to perform on each element loaded from <paramref name="x"/>.</typeparam>
+        /// <remarks>
+        /// This supports vectorizing the operation if <typeparamref name="TInput"/> and <typeparamref name="TOutput"/> are the same size.
+        /// Otherwise, it'll fall back to scalar operations.
+        /// </remarks>
+        private static void InvokeSpanIntoSpan<TInput, TOutput, TUnaryOperator>(
+            ReadOnlySpan<TInput> x, Span<TOutput> destination)
+            where TUnaryOperator : struct, IUnaryOperator<TInput, TOutput>
+        {
+            if (x.Length > destination.Length)
+            {
+                ThrowHelper.ThrowArgument_DestinationTooShort();
+            }
+
+            if (typeof(TInput) == typeof(TOutput))
+            {
+                // This ignores the unsafe case where a developer passes in overlapping spans for distinct types.
+                ValidateInputOutputSpanNonOverlapping(x, Rename<TOutput, TInput>(destination));
+            }
+
+            // Since every branch has a cost and since that cost is
+            // essentially lost for larger inputs, we do branches
+            // in a way that allows us to have the minimum possible
+            // for small sizes
+
+            ref TInput xRef = ref MemoryMarshal.GetReference(x);
+            ref TOutput dRef = ref MemoryMarshal.GetReference(destination);
+
+            nuint remainder = (uint)x.Length;
+
+            if (Vector512.IsHardwareAccelerated && Vector512<TInput>.IsSupported && Vector512<TOutput>.IsSupported && TUnaryOperator.Vectorizable && Unsafe.SizeOf<TInput>() == Unsafe.SizeOf<TOutput>())
+            {
+                if (remainder >= (uint)Vector512<TInput>.Count)
+                {
+                    Vectorized512(ref xRef, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<TInput>.IsSupported && Vector256<TOutput>.IsSupported && TUnaryOperator.Vectorizable && Unsafe.SizeOf<TInput>() == Unsafe.SizeOf<TOutput>())
+            {
+                if (remainder >= (uint)Vector256<TInput>.Count)
+                {
+                    Vectorized256(ref xRef, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<TInput>.IsSupported && Vector128<TOutput>.IsSupported && TUnaryOperator.Vectorizable && Unsafe.SizeOf<TInput>() == Unsafe.SizeOf<TOutput>())
+            {
+                if (remainder >= (uint)Vector128<TInput>.Count)
+                {
+                    Vectorized128(ref xRef, ref dRef, remainder);
+                }
+                else
+                {
+                    // We have less than a vector and so we can only handle this as scalar. To do this
+                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
+                    // length check, single jump, and then linear execution.
+
+                    VectorizedSmall(ref xRef, ref dRef, remainder);
+                }
+
+                return;
+            }
+
+            // This is the software fallback when no acceleration is available
+            // It requires no branches to hit
+
+            SoftwareFallback(ref xRef, ref dRef, remainder);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void SoftwareFallback(ref TInput xRef, ref TOutput dRef, nuint length)
+            {
+                for (nuint i = 0; i < length; i++)
+                {
+                    Unsafe.Add(ref dRef, i) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, i));
+                }
+            }
+
+            static void Vectorized128(ref TInput xRef, ref TOutput dRef, nuint remainder)
+            {
+                ref TOutput dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
+
+                if (remainder > (uint)(Vector128<TInput>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (TInput* px = &xRef)
+                    fixed (TOutput* pd = &dRef)
+                    {
+                        TInput* xPtr = px;
+                        TOutput* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(TInput)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector128<TInput>) - ((nuint)dPtr % (uint)sizeof(Vector128<TInput>))) / (uint)sizeof(TInput);
+
+                            xPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<TInput>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector128<TOutput> vector1;
+                        Vector128<TOutput> vector2;
+                        Vector128<TOutput> vector3;
+                        Vector128<TOutput> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(TInput))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector128<TInput>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 0)));
+                                vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 1)));
+                                vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 2)));
+                                vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 4)));
+                                vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 5)));
+                                vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 6)));
+                                vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<TInput>.Count * 8);
+                                dPtr += (uint)(Vector128<TOutput>.Count * 8);
+
+                                remainder -= (uint)(Vector128<TInput>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector128<TInput>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 0)));
+                                vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 1)));
+                                vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 2)));
+                                vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector128<TOutput>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector128<TOutput>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector128<TOutput>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector128<TOutput>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 4)));
+                                vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 5)));
+                                vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 6)));
+                                vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector128<TOutput>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector128<TOutput>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector128<TOutput>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector128<TOutput>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector128<TInput>.Count * 8);
+                                dPtr += (uint)(Vector128<TOutput>.Count * 8);
+
+                                remainder -= (uint)(Vector128<TInput>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector128<TInput>.Count - 1)) & (nuint)(-Vector128<TInput>.Count);
+
+                switch (remainder / (uint)Vector128<TInput>.Count)
+                {
+                    case 8:
+                        {
+                            Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<TInput>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized256(ref TInput xRef, ref TOutput dRef, nuint remainder)
+            {
+                ref TOutput dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                Vector256<TOutput> end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<TInput>.Count));
+
+                if (remainder > (uint)(Vector256<TInput>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (TInput* px = &xRef)
+                    fixed (TOutput* pd = &dRef)
+                    {
+                        TInput* xPtr = px;
+                        TOutput* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(TInput)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector256<TInput>) - ((nuint)dPtr % (uint)sizeof(Vector256<TInput>))) / (uint)sizeof(TInput);
+
+                            xPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<TInput>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector256<TOutput> vector1;
+                        Vector256<TOutput> vector2;
+                        Vector256<TOutput> vector3;
+                        Vector256<TOutput> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(TInput))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector256<TInput>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 0)));
+                                vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 1)));
+                                vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 2)));
+                                vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 4)));
+                                vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 5)));
+                                vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 6)));
+                                vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<TInput>.Count * 8);
+                                dPtr += (uint)(Vector256<TOutput>.Count * 8);
+
+                                remainder -= (uint)(Vector256<TInput>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector256<TInput>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 0)));
+                                vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 1)));
+                                vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 2)));
+                                vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector256<TOutput>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector256<TOutput>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector256<TOutput>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector256<TOutput>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 4)));
+                                vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 5)));
+                                vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 6)));
+                                vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector256<TOutput>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector256<TOutput>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector256<TOutput>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector256<TOutput>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector256<TInput>.Count * 8);
+                                dPtr += (uint)(Vector256<TOutput>.Count * 8);
+
+                                remainder -= (uint)(Vector256<TInput>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector256<TInput>.Count - 1)) & (nuint)(-Vector256<TInput>.Count);
+
+                switch (remainder / (uint)Vector256<TInput>.Count)
+                {
+                    case 8:
+                        {
+                            Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<TOutput>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            static void Vectorized512(ref TInput xRef, ref TOutput dRef, nuint remainder)
+            {
+                ref TOutput dRefBeg = ref dRef;
+
+                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
+
+                Vector512<TOutput> beg = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef));
+                Vector512<TOutput> end = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<TInput>.Count));
+
+                if (remainder > (uint)(Vector512<TInput>.Count * 8))
+                {
+                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
+                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
+
+                    fixed (TInput* px = &xRef)
+                    fixed (TOutput* pd = &dRef)
+                    {
+                        TInput* xPtr = px;
+                        TOutput* dPtr = pd;
+
+                        // We need to the ensure the underlying data can be aligned and only align
+                        // it if it can. It is possible we have an unaligned ref, in which case we
+                        // can never achieve the required SIMD alignment.
+
+                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(TInput)) == 0;
+
+                        if (canAlign)
+                        {
+                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
+                            //
+                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
+                            // are more expensive than unaligned loads and aligning both is significantly more
+                            // complex.
+
+                            nuint misalignment = ((uint)sizeof(Vector512<TInput>) - ((nuint)dPtr % (uint)sizeof(Vector512<TInput>))) / (uint)sizeof(TInput);
+
+                            xPtr += misalignment;
+                            dPtr += misalignment;
+
+                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<TInput>)) == 0);
+
+                            remainder -= misalignment;
+                        }
+
+                        Vector512<TOutput> vector1;
+                        Vector512<TOutput> vector2;
+                        Vector512<TOutput> vector3;
+                        Vector512<TOutput> vector4;
+
+                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(TInput))) && canAlign)
+                        {
+                            // This loop stores the data non-temporally, which benefits us when there
+                            // is a large amount of data involved as it avoids polluting the cache.
+
+                            while (remainder >= (uint)(Vector512<TInput>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 0)));
+                                vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 1)));
+                                vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 2)));
+                                vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 3)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 0));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 1));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 2));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 4)));
+                                vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 5)));
+                                vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 6)));
+                                vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 7)));
+
+                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 4));
+                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 5));
+                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 6));
+                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<TInput>.Count * 8);
+                                dPtr += (uint)(Vector512<TInput>.Count * 8);
+
+                                remainder -= (uint)(Vector512<TInput>.Count * 8);
+                            }
+                        }
+                        else
+                        {
+                            while (remainder >= (uint)(Vector512<TInput>.Count * 8))
+                            {
+                                // We load, process, and store the first four vectors
+
+                                vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 0)));
+                                vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 1)));
+                                vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 2)));
+                                vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 3)));
+
+                                vector1.Store(dPtr + (uint)(Vector512<TOutput>.Count * 0));
+                                vector2.Store(dPtr + (uint)(Vector512<TOutput>.Count * 1));
+                                vector3.Store(dPtr + (uint)(Vector512<TOutput>.Count * 2));
+                                vector4.Store(dPtr + (uint)(Vector512<TOutput>.Count * 3));
+
+                                // We load, process, and store the next four vectors
+
+                                vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 4)));
+                                vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 5)));
+                                vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 6)));
+                                vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 7)));
+
+                                vector1.Store(dPtr + (uint)(Vector512<TOutput>.Count * 4));
+                                vector2.Store(dPtr + (uint)(Vector512<TOutput>.Count * 5));
+                                vector3.Store(dPtr + (uint)(Vector512<TOutput>.Count * 6));
+                                vector4.Store(dPtr + (uint)(Vector512<TOutput>.Count * 7));
+
+                                // We adjust the source and destination references, then update
+                                // the count of remaining elements to process.
+
+                                xPtr += (uint)(Vector512<TInput>.Count * 8);
+                                dPtr += (uint)(Vector512<TOutput>.Count * 8);
+
+                                remainder -= (uint)(Vector512<TInput>.Count * 8);
+                            }
+                        }
+
+                        // Adjusting the refs here allows us to avoid pinning for very small inputs
+
+                        xRef = ref *xPtr;
+                        dRef = ref *dPtr;
+                    }
+                }
+
+                // Process the remaining [Count, Count * 8] elements via a jump table
+                //
+                // Unless the original length was an exact multiple of Count, then we'll
+                // end up reprocessing a couple elements in case 1 for end. We'll also
+                // potentially reprocess a few elements in case 0 for beg, to handle any
+                // data before the first aligned address.
+
+                nuint endIndex = remainder;
+                remainder = (remainder + (uint)(Vector512<TInput>.Count - 1)) & (nuint)(-Vector512<TInput>.Count);
+
+                switch (remainder / (uint)Vector512<TInput>.Count)
+                {
+                    case 8:
+                        {
+                            Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 8)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 8));
+                            goto case 7;
+                        }
+
+                    case 7:
+                        {
+                            Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 7)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 7));
+                            goto case 6;
+                        }
+
+                    case 6:
+                        {
+                            Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 6)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 6));
+                            goto case 5;
+                        }
+
+                    case 5:
+                        {
+                            Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 5)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 5));
+                            goto case 4;
+                        }
+
+                    case 4:
+                        {
+                            Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 4)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 4));
+                            goto case 3;
+                        }
+
+                    case 3:
+                        {
+                            Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 3)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 3));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 2)));
+                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 2));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            // Store the last block, which includes any elements that wouldn't fill a full vector
+                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<TOutput>.Count);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            // Store the first block, which includes any elements preceding the first aligned block
+                            beg.StoreUnsafe(ref dRefBeg);
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall(ref TInput xRef, ref TOutput dRef, nuint remainder)
+            {
+                if (sizeof(TInput) == 1)
+                {
+                    VectorizedSmall1(ref xRef, ref dRef, remainder);
+                }
+                else if (sizeof(TInput) == 2)
+                {
+                    VectorizedSmall2(ref xRef, ref dRef, remainder);
+                }
+                else if (sizeof(TInput) == 4)
+                {
+                    VectorizedSmall4(ref xRef, ref dRef, remainder);
+                }
+                else
+                {
+                    Debug.Assert(sizeof(TInput) == 8);
+                    VectorizedSmall8(ref xRef, ref dRef, remainder);
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall1(ref TInput xRef, ref TOutput dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(TInput) == 1);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 63:
+                    case 62:
+                    case 61:
+                    case 60:
+                    case 59:
+                    case 58:
+                    case 57:
+                    case 56:
+                    case 55:
+                    case 54:
+                    case 53:
+                    case 52:
+                    case 51:
+                    case 50:
+                    case 49:
+                    case 48:
+                    case 47:
+                    case 46:
+                    case 45:
+                    case 44:
+                    case 43:
+                    case 42:
+                    case 41:
+                    case 40:
+                    case 39:
+                    case 38:
+                    case 37:
+                    case 36:
+                    case 35:
+                    case 34:
+                    case 33:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            Vector256<TOutput> end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<TInput>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<TOutput>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 32:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<TOutput>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 15:
+                        Unsafe.Add(ref dRef, 14) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 14));
+                        goto case 14;
+
+                    case 14:
+                        Unsafe.Add(ref dRef, 13) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 13));
+                        goto case 13;
+
+                    case 13:
+                        Unsafe.Add(ref dRef, 12) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 12));
+                        goto case 12;
+
+                    case 12:
+                        Unsafe.Add(ref dRef, 11) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 11));
+                        goto case 11;
+
+                    case 11:
+                        Unsafe.Add(ref dRef, 10) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 10));
+                        goto case 10;
+
+                    case 10:
+                        Unsafe.Add(ref dRef, 9) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 9));
+                        goto case 9;
+
+                    case 9:
+                        Unsafe.Add(ref dRef, 8) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 8));
+                        goto case 8;
+
+                    case 8:
+                        Unsafe.Add(ref dRef, 7) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 7));
+                        goto case 7;
+
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 6));
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 5));
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 4));
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 3));
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 2));
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 1));
+                        goto case 1;
+
+                    case 1:
+                        dRef = TUnaryOperator.Invoke(xRef);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall2(ref TInput xRef, ref TOutput dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(TInput) == 2);
+
+                switch (remainder)
+                {
+                    // Two Vector256's worth of data, with at least one element overlapping.
+                    case 31:
+                    case 30:
+                    case 29:
+                    case 28:
+                    case 27:
+                    case 26:
+                    case 25:
+                    case 24:
+                    case 23:
+                    case 22:
+                    case 21:
+                    case 20:
+                    case 19:
+                    case 18:
+                    case 17:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            Vector256<TOutput> end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<TInput>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<TOutput>.Count);
+
+                            break;
+                        }
+
+                    // One Vector256's worth of data.
+                    case 16:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Two Vector128's worth of data, with at least one element overlapping.
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<TOutput>.Count);
+
+                            break;
+                        }
+
+                    // One Vector128's worth of data.
+                    case 8:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
+                    // case to unroll the whole processing.
+                    case 7:
+                        Unsafe.Add(ref dRef, 6) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 6));
+                        goto case 6;
+
+                    case 6:
+                        Unsafe.Add(ref dRef, 5) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 5));
+                        goto case 5;
+
+                    case 5:
+                        Unsafe.Add(ref dRef, 4) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 4));
+                        goto case 4;
+
+                    case 4:
+                        Unsafe.Add(ref dRef, 3) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 3));
+                        goto case 3;
+
+                    case 3:
+                        Unsafe.Add(ref dRef, 2) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 2));
+                        goto case 2;
+
+                    case 2:
+                        Unsafe.Add(ref dRef, 1) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 1));
+                        goto case 1;
+
+                    case 1:
+                        dRef = TUnaryOperator.Invoke(xRef);
+                        goto case 0;
+
+                    case 0:
+                        break;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall4(ref TInput xRef, ref TOutput dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(TInput) == 4);
+
+                switch (remainder)
+                {
+                    case 15:
+                    case 14:
+                    case 13:
+                    case 12:
+                    case 11:
+                    case 10:
+                    case 9:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            Vector256<TOutput> end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<TInput>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<TOutput>.Count);
+
+                            break;
+                        }
+
+                    case 8:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<TOutput>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Unsafe.Add(ref dRef, 2) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 2));
+                            goto case 2;
+                        }
+
+                    case 2:
+                        {
+                            Unsafe.Add(ref dRef, 1) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 1));
+                            goto case 1;
+                        }
+
+                    case 1:
+                        {
+                            dRef = TUnaryOperator.Invoke(xRef);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static void VectorizedSmall8(ref TInput xRef, ref TOutput dRef, nuint remainder)
+            {
+                Debug.Assert(sizeof(TInput) == 8);
+
+                switch (remainder)
+                {
+                    case 7:
+                    case 6:
+                    case 5:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            Vector256<TOutput> end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<TInput>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<TOutput>.Count);
+
+                            break;
+                        }
+
+                    case 4:
+                        {
+                            Debug.Assert(Vector256.IsHardwareAccelerated);
+
+                            Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 3:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
+
+                            beg.StoreUnsafe(ref dRef);
+                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<TOutput>.Count);
+
+                            break;
+                        }
+
+                    case 2:
+                        {
+                            Debug.Assert(Vector128.IsHardwareAccelerated);
+
+                            Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
+                            beg.StoreUnsafe(ref dRef);
+
+                            break;
+                        }
+
+                    case 1:
+                        {
+                            dRef = TUnaryOperator.Invoke(xRef);
+                            goto case 0;
+                        }
+
+                    case 0:
+                        {
+                            break;
+                        }
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IUnaryTwoToOneOperator.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IUnaryTwoToOneOperator.cs
new file mode 100644
index 000000000000..e16c4ebca3a6
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IUnaryTwoToOneOperator.cs
@@ -0,0 +1,152 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static unsafe partial class TensorPrimitives
+    {
+        /// <summary>Operator that takes one input value and returns a single value.</summary>
+        /// <remarks>The input type must be twice the size of the output type.</remarks>
+        private interface IUnaryTwoToOneOperator<TInput, TOutput>
+        {
+            static abstract bool Vectorizable { get; }
+            static abstract TOutput Invoke(TInput x);
+            static abstract Vector128<TOutput> Invoke(Vector128<TInput> lower, Vector128<TInput> upper);
+            static abstract Vector256<TOutput> Invoke(Vector256<TInput> lower, Vector256<TInput> upper);
+            static abstract Vector512<TOutput> Invoke(Vector512<TInput> lower, Vector512<TInput> upper);
+        }
+
+        /// <summary>Performs an element-wise operation on <paramref name="x"/> and writes the results to <paramref name="destination"/>.</summary>
+        /// <typeparam name="TInput">The element input type.</typeparam>
+        /// <typeparam name="TOutput">The element output type. Must be the same size as TInput if TInput and TOutput both support vectorization.</typeparam>
+        /// <typeparam name="TUnaryOperator">Specifies the operation to perform on each element loaded from <paramref name="x"/>.</typeparam>
+        /// <remarks>This should only be used when it's known that TInput/TOutput are vectorizable and the size of TInput is twice that of TOutput.</remarks>
+        private static void InvokeSpanIntoSpan_2to1<TInput, TOutput, TUnaryOperator>(
+            ReadOnlySpan<TInput> x, Span<TOutput> destination)
+            where TUnaryOperator : struct, IUnaryTwoToOneOperator<TInput, TOutput>
+        {
+            Debug.Assert(sizeof(TInput) == sizeof(TOutput) * 2);
+
+            if (x.Length > destination.Length)
+            {
+                ThrowHelper.ThrowArgument_DestinationTooShort();
+            }
+
+            ref TInput xRef = ref MemoryMarshal.GetReference(x);
+            ref TOutput destinationRef = ref MemoryMarshal.GetReference(destination);
+            int i = 0, twoVectorsFromEnd;
+
+            if (Vector512.IsHardwareAccelerated && TUnaryOperator.Vectorizable)
+            {
+                Debug.Assert(Vector512<TInput>.IsSupported);
+                Debug.Assert(Vector512<TOutput>.IsSupported);
+
+                twoVectorsFromEnd = x.Length - (Vector512<TInput>.Count * 2);
+                if (i <= twoVectorsFromEnd)
+                {
+                    // Loop handling two input vectors / one output vector at a time.
+                    do
+                    {
+                        TUnaryOperator.Invoke(
+                            Vector512.LoadUnsafe(ref xRef, (uint)i),
+                            Vector512.LoadUnsafe(ref xRef, (uint)(i + Vector512<TInput>.Count))).StoreUnsafe(ref destinationRef, (uint)i);
+
+                        i += Vector512<TInput>.Count * 2;
+                    }
+                    while (i <= twoVectorsFromEnd);
+
+                    // Handle any remaining elements with final vectors.
+                    if (i != x.Length)
+                    {
+                        i = x.Length - (Vector512<TInput>.Count * 2);
+
+                        TUnaryOperator.Invoke(
+                            Vector512.LoadUnsafe(ref xRef, (uint)i),
+                            Vector512.LoadUnsafe(ref xRef, (uint)(i + Vector512<TInput>.Count))).StoreUnsafe(ref destinationRef, (uint)i);
+                    }
+
+                    return;
+                }
+            }
+
+            if (Vector256.IsHardwareAccelerated && TUnaryOperator.Vectorizable)
+            {
+                Debug.Assert(Vector256<TInput>.IsSupported);
+                Debug.Assert(Vector256<TOutput>.IsSupported);
+
+                twoVectorsFromEnd = x.Length - (Vector256<TInput>.Count * 2);
+                if (i <= twoVectorsFromEnd)
+                {
+                    // Loop handling two input vectors / one output vector at a time.
+                    do
+                    {
+                        TUnaryOperator.Invoke(
+                            Vector256.LoadUnsafe(ref xRef, (uint)i),
+                            Vector256.LoadUnsafe(ref xRef, (uint)(i + Vector256<TInput>.Count))).StoreUnsafe(ref destinationRef, (uint)i);
+
+                        i += Vector256<TInput>.Count * 2;
+                    }
+                    while (i <= twoVectorsFromEnd);
+
+                    // Handle any remaining elements with final vectors.
+                    if (i != x.Length)
+                    {
+                        i = x.Length - (Vector256<TInput>.Count * 2);
+
+                        TUnaryOperator.Invoke(
+                            Vector256.LoadUnsafe(ref xRef, (uint)i),
+                            Vector256.LoadUnsafe(ref xRef, (uint)(i + Vector256<TInput>.Count))).StoreUnsafe(ref destinationRef, (uint)i);
+                    }
+
+                    return;
+                }
+            }
+
+            if (Vector128.IsHardwareAccelerated && TUnaryOperator.Vectorizable)
+            {
+                Debug.Assert(Vector128<TInput>.IsSupported);
+                Debug.Assert(Vector128<TOutput>.IsSupported);
+
+                twoVectorsFromEnd = x.Length - (Vector128<TInput>.Count * 2);
+                if (i <= twoVectorsFromEnd)
+                {
+                    // Loop handling two input vectors / one output vector at a time.
+                    do
+                    {
+                        TUnaryOperator.Invoke(
+                            Vector128.LoadUnsafe(ref xRef, (uint)i),
+                            Vector128.LoadUnsafe(ref xRef, (uint)(i + Vector128<TInput>.Count))).StoreUnsafe(ref destinationRef, (uint)i);
+
+                        i += Vector128<TInput>.Count * 2;
+                    }
+                    while (i <= twoVectorsFromEnd);
+
+                    // Handle any remaining elements with final vectors.
+                    if (i != x.Length)
+                    {
+                        i = x.Length - (Vector128<TInput>.Count * 2);
+
+                        TUnaryOperator.Invoke(
+                            Vector128.LoadUnsafe(ref xRef, (uint)i),
+                            Vector128.LoadUnsafe(ref xRef, (uint)(i + Vector128<TInput>.Count))).StoreUnsafe(ref destinationRef, (uint)i);
+                    }
+
+                    return;
+                }
+            }
+
+            while (i < x.Length)
+            {
+                Unsafe.Add(ref destinationRef, i) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, i));
+                i++;
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Abs.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Abs.cs
new file mode 100644
index 000000000000..4ea417591579
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Abs.cs
@@ -0,0 +1,107 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise absolute value of each number in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="OverflowException"><typeparamref name="T"/> is a signed integer type and <paramref name="x"/> contained a value equal to <typeparamref name="T"/>'s minimum value.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Abs(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// The absolute value of a <typeparamref name="T"/> is its numeric value without its sign. For example, the absolute value of both 1.2e-03 and -1.2e03 is 1.2e03.
+        /// </para>
+        /// <para>
+        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/> or <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
+        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is the original NaN value with the sign bit removed.
+        /// </para>
+        /// </remarks>
+        public static void Abs<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : INumberBase<T> =>
+            InvokeSpanIntoSpan<T, AbsoluteOperator<T>>(x, destination);
+
+        /// <summary>T.Abs(x)</summary>
+        internal readonly struct AbsoluteOperator<T> : IUnaryOperator<T, T> where T : INumberBase<T>
+        {
+            public static bool Vectorizable => true;
+
+            public static T Invoke(T x) => T.Abs(x);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(sbyte) ||
+                    typeof(T) == typeof(short) ||
+                    typeof(T) == typeof(int) ||
+                    typeof(T) == typeof(long) ||
+                    typeof(T) == typeof(nint))
+                {
+                    // Handle signed integers specially, in order to throw if any attempt is made to
+                    // take the absolute value of the minimum value of the type, which doesn't have
+                    // a positive absolute value representation.
+                    Vector128<T> abs = Vector128.ConditionalSelect(Vector128.LessThan(x, Vector128<T>.Zero), -x, x);
+                    if (Vector128.LessThan(abs, Vector128<T>.Zero) != Vector128<T>.Zero)
+                    {
+                        ThrowNegateTwosCompOverflow();
+                    }
+                }
+
+                return Vector128.Abs(x);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(sbyte) ||
+                    typeof(T) == typeof(short) ||
+                    typeof(T) == typeof(int) ||
+                    typeof(T) == typeof(long) ||
+                    typeof(T) == typeof(nint))
+                {
+                    // Handle signed integers specially, in order to throw if any attempt is made to
+                    // take the absolute value of the minimum value of the type, which doesn't have
+                    // a positive absolute value representation.
+                    Vector256<T> abs = Vector256.ConditionalSelect(Vector256.LessThan(x, Vector256<T>.Zero), -x, x);
+                    if (Vector256.LessThan(abs, Vector256<T>.Zero) != Vector256<T>.Zero)
+                    {
+                        ThrowNegateTwosCompOverflow();
+                    }
+                }
+
+                return Vector256.Abs(x);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(sbyte) ||
+                    typeof(T) == typeof(short) ||
+                    typeof(T) == typeof(int) ||
+                    typeof(T) == typeof(long) ||
+                    typeof(T) == typeof(nint))
+                {
+                    // Handle signed integers specially, in order to throw if any attempt is made to
+                    // take the absolute value of the minimum value of the type, which doesn't have
+                    // a positive absolute value representation.
+                    Vector512<T> abs = Vector512.ConditionalSelect(Vector512.LessThan(x, Vector512<T>.Zero), -x, x);
+                    if (Vector512.LessThan(abs, Vector512<T>.Zero) != Vector512<T>.Zero)
+                    {
+                        ThrowNegateTwosCompOverflow();
+                    }
+                }
+
+                return Vector512.Abs(x);
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Acos.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Acos.cs
new file mode 100644
index 000000000000..cdd36a41dc7f
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Acos.cs
@@ -0,0 +1,39 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise angle in radians whose cosine is the specifed number.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Acos(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Acos<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, AcosOperator<T>>(x, destination);
+
+        /// <summary>T.Acos(x)</summary>
+        private readonly struct AcosOperator<T> : IUnaryOperator<T, T>
+            where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => false; // TODO: Vectorize
+            public static T Invoke(T x) => T.Acos(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.AcosPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.AcosPi.cs
new file mode 100644
index 000000000000..caf57f4e9d16
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.AcosPi.cs
@@ -0,0 +1,39 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise angle in radians whose cosine is the specifed number and divides the result by Pi.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.AcosPi(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void AcosPi<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, AcosPiOperator<T>>(x, destination);
+
+        /// <summary>T.AcosPi(x)</summary>
+        private readonly struct AcosPiOperator<T> : IUnaryOperator<T, T>
+            where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => AcosOperator<T>.Vectorizable;
+            public static T Invoke(T x) => T.AcosPi(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => AcosOperator<T>.Invoke(x) / Vector128.Create(T.Pi);
+            public static Vector256<T> Invoke(Vector256<T> x) => AcosOperator<T>.Invoke(x) / Vector256.Create(T.Pi);
+            public static Vector512<T> Invoke(Vector512<T> x) => AcosOperator<T>.Invoke(x) / Vector512.Create(T.Pi);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Acosh.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Acosh.cs
new file mode 100644
index 000000000000..7313cfe15f2d
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Acosh.cs
@@ -0,0 +1,39 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise hyperbolic arc-cosine of the specifed number.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Acosh(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Acosh<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IHyperbolicFunctions<T> =>
+            InvokeSpanIntoSpan<T, AcoshOperator<T>>(x, destination);
+
+        /// <summary>T.Acosh(x)</summary>
+        private readonly struct AcoshOperator<T> : IUnaryOperator<T, T>
+            where T : IHyperbolicFunctions<T>
+        {
+            public static bool Vectorizable => false; // TODO: Vectorize
+            public static T Invoke(T x) => T.Acosh(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Add.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Add.cs
new file mode 100644
index 000000000000..4c891bef8b04
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Add.cs
@@ -0,0 +1,65 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise addition of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] + <paramref name="y" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Add<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T> =>
+            InvokeSpanSpanIntoSpan<T, AddOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise addition of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] + <paramref name="y" /></c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Add<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T> =>
+            InvokeSpanScalarIntoSpan<T, AddOperator<T>>(x, y, destination);
+
+        /// <summary>x + y</summary>
+        internal readonly struct AddOperator<T> : IAggregationOperator<T> where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T>
+        {
+            public static bool Vectorizable => true;
+
+            public static T Invoke(T x, T y) => x + y;
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x + y;
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x + y;
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x + y;
+
+            public static T Invoke(Vector128<T> x) => Vector128.Sum(x);
+            public static T Invoke(Vector256<T> x) => Vector256.Sum(x);
+            public static T Invoke(Vector512<T> x) => Vector512.Sum(x);
+
+            public static T IdentityValue => T.AdditiveIdentity;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.AddMultiply.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.AddMultiply.cs
new file mode 100644
index 000000000000..c69f379b4b59
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.AddMultiply.cs
@@ -0,0 +1,83 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> + <paramref name="y" />) * <paramref name="multiplier" /></c> for the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="multiplier">The third tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" /> and the length of <paramref name="multiplier" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="multiplier"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] + <paramref name="y" />[i]) * <paramref name="multiplier" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If any of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void AddMultiply<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, ReadOnlySpan<T> multiplier, Span<T> destination)
+            where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T> =>
+            InvokeSpanSpanSpanIntoSpan<T, AddMultiplyOperator<T>>(x, y, multiplier, destination);
+
+        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> + <paramref name="y" />) * <paramref name="multiplier" /></c> for the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="multiplier">The third tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] + <paramref name="y" />[i]) * <paramref name="multiplier" /></c>.
+        /// </para>
+        /// <para>
+        /// If any of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void AddMultiply<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, T multiplier, Span<T> destination)
+            where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T> =>
+            InvokeSpanSpanScalarIntoSpan<T, AddMultiplyOperator<T>>(x, y, multiplier, destination);
+
+        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> + <paramref name="y" />) * <paramref name="multiplier" /></c> for the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="multiplier">The third tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="multiplier" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="multiplier"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] + <paramref name="y" />) * <paramref name="multiplier" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If any of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void AddMultiply<T>(ReadOnlySpan<T> x, T y, ReadOnlySpan<T> multiplier, Span<T> destination)
+            where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T> =>
+            InvokeSpanScalarSpanIntoSpan<T, AddMultiplyOperator<T>>(x, y, multiplier, destination);
+
+        /// <summary>(x + y) * z</summary>
+        internal readonly struct AddMultiplyOperator<T> : ITernaryOperator<T> where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T>
+        {
+            public static T Invoke(T x, T y, T z) => (x + y) * z;
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z) => (x + y) * z;
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> z) => (x + y) * z;
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z) => (x + y) * z;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Asin.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Asin.cs
new file mode 100644
index 000000000000..be216eaffe6c
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Asin.cs
@@ -0,0 +1,39 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise angle in radians whose sine is the specifed number.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Asin(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Asin<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, AsinOperator<T>>(x, destination);
+
+        /// <summary>T.Asin(x)</summary>
+        private readonly struct AsinOperator<T> : IUnaryOperator<T, T>
+            where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => false; // TODO: Vectorize
+            public static T Invoke(T x) => T.Asin(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.AsinPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.AsinPi.cs
new file mode 100644
index 000000000000..710628f5806e
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.AsinPi.cs
@@ -0,0 +1,39 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise angle in radians whose sine is the specifed number and divides the result by Pi.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.AsinPi(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void AsinPi<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, AsinPiOperator<T>>(x, destination);
+
+        /// <summary>T.AsinPi(x)</summary>
+        private readonly struct AsinPiOperator<T> : IUnaryOperator<T, T>
+            where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => AsinOperator<T>.Vectorizable;
+            public static T Invoke(T x) => T.AsinPi(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => AsinOperator<T>.Invoke(x) / Vector128.Create(T.Pi);
+            public static Vector256<T> Invoke(Vector256<T> x) => AsinOperator<T>.Invoke(x) / Vector256.Create(T.Pi);
+            public static Vector512<T> Invoke(Vector512<T> x) => AsinOperator<T>.Invoke(x) / Vector512.Create(T.Pi);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Asinh.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Asinh.cs
new file mode 100644
index 000000000000..d33168a42664
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Asinh.cs
@@ -0,0 +1,39 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise hyperbolic arc-sine of the specifed number.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Asinh(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Asinh<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IHyperbolicFunctions<T> =>
+            InvokeSpanIntoSpan<T, AsinhOperator<T>>(x, destination);
+
+        /// <summary>T.Asinh(x)</summary>
+        internal readonly struct AsinhOperator<T> : IUnaryOperator<T, T>
+            where T : IHyperbolicFunctions<T>
+        {
+            public static bool Vectorizable => false; // TODO: Vectorize
+            public static T Invoke(T x) => T.Asinh(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan.cs
new file mode 100644
index 000000000000..023dcc03d762
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan.cs
@@ -0,0 +1,39 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise angle in radians whose tangent is the specifed number.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Atan<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, AtanOperator<T>>(x, destination);
+
+        /// <summary>T.Atan(x)</summary>
+        internal readonly struct AtanOperator<T> : IUnaryOperator<T, T>
+            where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => false; // TODO: Vectorize
+            public static T Invoke(T x) => T.Atan(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan2.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan2.cs
new file mode 100644
index 000000000000..310738623184
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan2.cs
@@ -0,0 +1,80 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise arc-tangent for the quotient of two values in the specified tensors.</summary>
+        /// <param name="y">The first tensor, represented as a span.</param>
+        /// <param name="x">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="y" /> must be same as length of <paramref name="x" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan2(<paramref name="y" />[i], <paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Atan2<T>(ReadOnlySpan<T> y, ReadOnlySpan<T> x, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanSpanIntoSpan<T, Atan2Operator<T>>(y, x, destination);
+
+        /// <summary>Computes the element-wise arc-tangent for the quotient of two values in the specified tensors.</summary>
+        /// <param name="y">The first tensor, represented as a span.</param>
+        /// <param name="x">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan2(<paramref name="y" />[i], <paramref name="x" />)</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Atan2<T>(ReadOnlySpan<T> y, T x, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanScalarIntoSpan<T, Atan2Operator<T>>(y, x, destination);
+
+        /// <summary>Computes the element-wise arc-tangent for the quotient of two values in the specified tensors.</summary>
+        /// <param name="y">The first tensor, represented as a scalar.</param>
+        /// <param name="x">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan2(<paramref name="y" />, <paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Atan2<T>(T y, ReadOnlySpan<T> x, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeScalarSpanIntoSpan<T, Atan2Operator<T>>(y, x, destination);
+
+        /// <summary>T.Atan2(y, x)</summary>
+        private readonly struct Atan2Operator<T> : IBinaryOperator<T>
+            where T : IFloatingPointIeee754<T>
+        {
+            public static bool Vectorizable => false; // TODO: Vectorize
+            public static T Invoke(T y, T x) => T.Atan2(y, x);
+            public static Vector128<T> Invoke(Vector128<T> y, Vector128<T> x) => throw new NotSupportedException();
+            public static Vector256<T> Invoke(Vector256<T> y, Vector256<T> x) => throw new NotSupportedException();
+            public static Vector512<T> Invoke(Vector512<T> y, Vector512<T> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan2Pi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan2Pi.cs
new file mode 100644
index 000000000000..a055bdc08c21
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atan2Pi.cs
@@ -0,0 +1,80 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise arc-tangent for the quotient of two values in the specified tensors and divides the result by Pi.</summary>
+        /// <param name="y">The first tensor, represented as a span.</param>
+        /// <param name="x">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="y" /> must be same as length of <paramref name="x" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan2(<paramref name="y" />[i], <paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Atan2Pi<T>(ReadOnlySpan<T> y, ReadOnlySpan<T> x, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanSpanIntoSpan<T, Atan2PiOperator<T>>(y, x, destination);
+
+        /// <summary>Computes the element-wise arc-tangent for the quotient of two values in the specified tensors and divides the result by Pi.</summary>
+        /// <param name="y">The first tensor, represented as a span.</param>
+        /// <param name="x">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan2(<paramref name="y" />[i], <paramref name="x" />)</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Atan2Pi<T>(ReadOnlySpan<T> y, T x, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanScalarIntoSpan<T, Atan2PiOperator<T>>(y, x, destination);
+
+        /// <summary>Computes the element-wise arc-tangent for the quotient of two values in the specified tensors and divides the result by Pi.</summary>
+        /// <param name="y">The first tensor, represented as a scalar.</param>
+        /// <param name="x">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan2(<paramref name="y" />, <paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Atan2Pi<T>(T y, ReadOnlySpan<T> x, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeScalarSpanIntoSpan<T, Atan2PiOperator<T>>(y, x, destination);
+
+        /// <summary>T.Atan2Pi(y, x)</summary>
+        private readonly struct Atan2PiOperator<T> : IBinaryOperator<T>
+            where T : IFloatingPointIeee754<T>
+        {
+            public static bool Vectorizable => Atan2Operator<T>.Vectorizable;
+            public static T Invoke(T y, T x) => T.Atan2Pi(y, x);
+            public static Vector128<T> Invoke(Vector128<T> y, Vector128<T> x) => Atan2Operator<T>.Invoke(y, x) / Vector128.Create(T.Pi);
+            public static Vector256<T> Invoke(Vector256<T> y, Vector256<T> x) => Atan2Operator<T>.Invoke(y, x) / Vector256.Create(T.Pi);
+            public static Vector512<T> Invoke(Vector512<T> y, Vector512<T> x) => Atan2Operator<T>.Invoke(y, x) / Vector512.Create(T.Pi);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.AtanPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.AtanPi.cs
new file mode 100644
index 000000000000..67c350c50e06
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.AtanPi.cs
@@ -0,0 +1,39 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise angle in radians whose tangent is the specifed number and divides the result by Pi.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.AtanPi(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void AtanPi<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, AtanPiOperator<T>>(x, destination);
+
+        /// <summary>T.AtanPi(x)</summary>
+        internal readonly struct AtanPiOperator<T> : IUnaryOperator<T, T>
+            where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => AtanOperator<T>.Vectorizable;
+            public static T Invoke(T x) => T.AtanPi(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => AtanOperator<T>.Invoke(x) / Vector128.Create(T.Pi);
+            public static Vector256<T> Invoke(Vector256<T> x) => AtanOperator<T>.Invoke(x) / Vector256.Create(T.Pi);
+            public static Vector512<T> Invoke(Vector512<T> x) => AtanOperator<T>.Invoke(x) / Vector512.Create(T.Pi);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atanh.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atanh.cs
new file mode 100644
index 000000000000..83308d3d3a22
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Atanh.cs
@@ -0,0 +1,39 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise hyperbolic arc-tangent of the specifed number.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atanh(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Atanh<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IHyperbolicFunctions<T> =>
+            InvokeSpanIntoSpan<T, AtanhOperator<T>>(x, destination);
+
+        /// <summary>T.Atanh(x)</summary>
+        internal readonly struct AtanhOperator<T> : IUnaryOperator<T, T>
+            where T : IHyperbolicFunctions<T>
+        {
+            public static bool Vectorizable => false; // TODO: Vectorize
+            public static T Invoke(T x) => T.Atanh(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.BitwiseAnd.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.BitwiseAnd.cs
new file mode 100644
index 000000000000..90a94b4d5ff2
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.BitwiseAnd.cs
@@ -0,0 +1,52 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise bitwise AND of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] &amp; <paramref name="y" />[i]</c>.
+        /// </para>
+        /// </remarks>
+        public static void BitwiseAnd<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : IBitwiseOperators<T, T, T> =>
+            InvokeSpanSpanIntoSpan<T, BitwiseAndOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise bitwise AND of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] &amp; <paramref name="y" /></c>.
+        /// </para>
+        /// </remarks>
+        public static void BitwiseAnd<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : IBitwiseOperators<T, T, T> =>
+            InvokeSpanScalarIntoSpan<T, BitwiseAndOperator<T>>(x, y, destination);
+
+        /// <summary>x &amp; y</summary>
+        private readonly struct BitwiseAndOperator<T> : IBinaryOperator<T> where T : IBitwiseOperators<T, T, T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x, T y) => x & y;
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x & y;
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x & y;
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x & y;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.BitwiseOr.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.BitwiseOr.cs
new file mode 100644
index 000000000000..e052bcee899d
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.BitwiseOr.cs
@@ -0,0 +1,52 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise bitwise OR of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] | <paramref name="y" />[i]</c>.
+        /// </para>
+        /// </remarks>
+        public static void BitwiseOr<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : IBitwiseOperators<T, T, T> =>
+            InvokeSpanSpanIntoSpan<T, BitwiseOrOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise bitwise OR of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] | <paramref name="y" /></c>.
+        /// </para>
+        /// </remarks>
+        public static void BitwiseOr<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : IBitwiseOperators<T, T, T> =>
+            InvokeSpanScalarIntoSpan<T, BitwiseOrOperator<T>>(x, y, destination);
+
+        /// <summary>x | y</summary>
+        private readonly struct BitwiseOrOperator<T> : IBinaryOperator<T> where T : IBitwiseOperators<T, T, T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x, T y) => x | y;
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x | y;
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x | y;
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x | y;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cbrt.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cbrt.cs
new file mode 100644
index 000000000000..1bc8b85696c1
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cbrt.cs
@@ -0,0 +1,73 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise cube root of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Cbrt(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void Cbrt<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IRootFunctions<T> =>
+            InvokeSpanIntoSpan<T, CbrtOperator<T>>(x, destination);
+
+        /// <summary>T.Cbrt(x)</summary>
+        private readonly struct CbrtOperator<T> : IUnaryOperator<T, T>
+            where T : IRootFunctions<T>
+        {
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Cbrt(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return ExpOperator<float>.Invoke(LogOperator<float>.Invoke(x.AsSingle()) / Vector128.Create(3f)).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return ExpOperator<double>.Invoke(LogOperator<double>.Invoke(x.AsDouble()) / Vector128.Create(3d)).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return ExpOperator<float>.Invoke(LogOperator<float>.Invoke(x.AsSingle()) / Vector256.Create(3f)).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return ExpOperator<double>.Invoke(LogOperator<double>.Invoke(x.AsDouble()) / Vector256.Create(3d)).As<double, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return ExpOperator<float>.Invoke(LogOperator<float>.Invoke(x.AsSingle()) / Vector512.Create(3f)).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return ExpOperator<double>.Invoke(LogOperator<double>.Invoke(x.AsDouble()) / Vector512.Create(3d)).As<double, T>();
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Ceiling.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Ceiling.cs
new file mode 100644
index 000000000000..17d2466bfad3
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Ceiling.cs
@@ -0,0 +1,71 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise ceiling of numbers in the specified tensor.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Ceiling(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void Ceiling<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IFloatingPoint<T> =>
+            InvokeSpanIntoSpan<T, CeilingOperator<T>>(x, destination);
+
+        private readonly struct CeilingOperator<T> : IUnaryOperator<T, T> where T : IFloatingPoint<T>
+        {
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Ceiling(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Vector128.Ceiling(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return Vector128.Ceiling(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Vector256.Ceiling(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return Vector256.Ceiling(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Vector512.Ceiling(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return Vector512.Ceiling(x.AsDouble()).As<double, T>();
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ConvertChecked.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ConvertChecked.cs
new file mode 100644
index 000000000000..a8340b2d22ea
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ConvertChecked.cs
@@ -0,0 +1,43 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>
+        /// Copies <paramref name="source"/> to <paramref name="destination"/>, converting each <typeparamref name="TFrom"/>
+        /// value to a <typeparamref name="TTo"/> value.
+        /// </summary>
+        /// <param name="source">The source span from which to copy values.</param>
+        /// <param name="destination">The destination span into which the converted values should be written.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = TTo.CreateChecked(<paramref name="source"/>[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void ConvertChecked<TFrom, TTo>(ReadOnlySpan<TFrom> source, Span<TTo> destination)
+            where TFrom : INumberBase<TFrom>
+            where TTo : INumberBase<TTo>
+        {
+            if (!TryConvertUniversal(source, destination))
+            {
+                InvokeSpanIntoSpan<TFrom, TTo, ConvertCheckedFallbackOperator<TFrom, TTo>>(source, destination);
+            }
+        }
+
+        /// <summary>T.CreateChecked(x)</summary>
+        internal readonly struct ConvertCheckedFallbackOperator<TFrom, TTo> : IUnaryOperator<TFrom, TTo> where TFrom : INumberBase<TFrom> where TTo : INumberBase<TTo>
+        {
+            public static bool Vectorizable => false;
+
+            public static TTo Invoke(TFrom x) => TTo.CreateChecked(x);
+            public static Vector128<TTo> Invoke(Vector128<TFrom> x) => throw new NotSupportedException();
+            public static Vector256<TTo> Invoke(Vector256<TFrom> x) => throw new NotSupportedException();
+            public static Vector512<TTo> Invoke(Vector512<TFrom> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ConvertHelpers.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ConvertHelpers.cs
new file mode 100644
index 000000000000..eea5c5ed3a23
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ConvertHelpers.cs
@@ -0,0 +1,658 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Performs conversions that are the same regardless of checked, truncating, or saturation.</summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)] // at most one of the branches will be kept
+        private static bool TryConvertUniversal<TFrom, TTo>(ReadOnlySpan<TFrom> source, Span<TTo> destination)
+            where TFrom : INumberBase<TFrom>
+            where TTo : INumberBase<TTo>
+        {
+            if (typeof(TFrom) == typeof(TTo))
+            {
+                if (source.Length > destination.Length)
+                {
+                    ThrowHelper.ThrowArgument_DestinationTooShort();
+                }
+
+                ValidateInputOutputSpanNonOverlapping(source, Rename<TTo, TFrom>(destination));
+
+                source.CopyTo(Rename<TTo, TFrom>(destination));
+                return true;
+            }
+
+            if (IsInt32Like<TFrom>() && typeof(TTo) == typeof(float))
+            {
+                InvokeSpanIntoSpan<int, float, ConvertInt32ToSingle>(Rename<TFrom, int>(source), Rename<TTo, float>(destination));
+                return true;
+            }
+
+            if (IsUInt32Like<TFrom>() && typeof(TTo) == typeof(float))
+            {
+                InvokeSpanIntoSpan<uint, float, ConvertUInt32ToSingle>(Rename<TFrom, uint>(source), Rename<TTo, float>(destination));
+                return true;
+            }
+
+            if (IsInt64Like<TFrom>() && typeof(TTo) == typeof(double))
+            {
+                InvokeSpanIntoSpan<long, double, ConvertInt64ToDouble>(Rename<TFrom, long>(source), Rename<TTo, double>(destination));
+                return true;
+            }
+
+            if (IsUInt64Like<TFrom>() && typeof(TTo) == typeof(double))
+            {
+                InvokeSpanIntoSpan<ulong, double, ConvertUInt64ToDouble>(Rename<TFrom, ulong>(source), Rename<TTo, double>(destination));
+                return true;
+            }
+
+            if (typeof(TFrom) == typeof(float) && typeof(TTo) == typeof(Half))
+            {
+                InvokeSpanIntoSpan_2to1<float, ushort, NarrowSingleToHalfAsUInt16Operator>(Rename<TFrom, float>(source), Rename<TTo, ushort>(destination));
+                return true;
+            }
+
+            if (typeof(TFrom) == typeof(Half) && typeof(TTo) == typeof(float))
+            {
+                InvokeSpanIntoSpan_1to2<short, float, WidenHalfAsInt16ToSingleOperator>(Rename<TFrom, short>(source), Rename<TTo, float>(destination));
+                return true;
+            }
+
+            if (typeof(TFrom) == typeof(float) && typeof(TTo) == typeof(double))
+            {
+                InvokeSpanIntoSpan_1to2<float, double, WidenSingleToDoubleOperator>(Rename<TFrom, float>(source), Rename<TTo, double>(destination));
+                return true;
+            }
+
+            if (typeof(TFrom) == typeof(double) && typeof(TTo) == typeof(float))
+            {
+                InvokeSpanIntoSpan_2to1<double, float, NarrowDoubleToSingleOperator>(Rename<TFrom, double>(source), Rename<TTo, float>(destination));
+                return true;
+            }
+
+            if (typeof(TFrom) == typeof(byte) && typeof(TTo) == typeof(ushort))
+            {
+                InvokeSpanIntoSpan_1to2<byte, ushort, WidenByteToUInt16Operator>(Rename<TFrom, byte>(source), Rename<TTo, ushort>(destination));
+                return true;
+            }
+
+            if (typeof(TFrom) == typeof(sbyte) && typeof(TTo) == typeof(short))
+            {
+                InvokeSpanIntoSpan_1to2<sbyte, short, WidenSByteToInt16Operator>(Rename<TFrom, sbyte>(source), Rename<TTo, short>(destination));
+                return true;
+            }
+
+            if (typeof(TFrom) == typeof(ushort) && IsUInt32Like<TTo>())
+            {
+                InvokeSpanIntoSpan_1to2<ushort, uint, WidenUInt16ToUInt32Operator>(Rename<TFrom, ushort>(source), Rename<TTo, uint>(destination));
+                return true;
+            }
+
+            if (typeof(TFrom) == typeof(short) && IsInt32Like<TTo>())
+            {
+                InvokeSpanIntoSpan_1to2<short, int, WidenInt16ToInt32Operator>(Rename<TFrom, short>(source), Rename<TTo, int>(destination));
+                return true;
+            }
+
+            if (IsUInt32Like<TTo>() && IsUInt64Like<TTo>())
+            {
+                InvokeSpanIntoSpan_1to2<uint, ulong, WidenUInt32ToUInt64Operator>(Rename<TFrom, uint>(source), Rename<TTo, ulong>(destination));
+                return true;
+            }
+
+            if (IsInt32Like<TFrom>() && IsInt64Like<TTo>())
+            {
+                InvokeSpanIntoSpan_1to2<int, long, WidenInt32ToInt64Operator>(Rename<TFrom, int>(source), Rename<TTo, long>(destination));
+                return true;
+            }
+
+            return false;
+        }
+
+        /// <summary>(int)float</summary>
+        private readonly struct ConvertInt32ToSingle : IUnaryOperator<int, float>
+        {
+            public static bool Vectorizable => true;
+
+            public static float Invoke(int x) => x;
+            public static Vector128<float> Invoke(Vector128<int> x) => Vector128.ConvertToSingle(x);
+            public static Vector256<float> Invoke(Vector256<int> x) => Vector256.ConvertToSingle(x);
+            public static Vector512<float> Invoke(Vector512<int> x) => Vector512.ConvertToSingle(x);
+        }
+
+        /// <summary>(uint)float</summary>
+        private readonly struct ConvertUInt32ToSingle : IUnaryOperator<uint, float>
+        {
+            public static bool Vectorizable => true;
+
+            public static float Invoke(uint x) => x;
+            public static Vector128<float> Invoke(Vector128<uint> x) => Vector128.ConvertToSingle(x);
+            public static Vector256<float> Invoke(Vector256<uint> x) => Vector256.ConvertToSingle(x);
+            public static Vector512<float> Invoke(Vector512<uint> x) => Vector512.ConvertToSingle(x);
+        }
+
+        /// <summary>(double)ulong</summary>
+        private readonly struct ConvertUInt64ToDouble : IUnaryOperator<ulong, double>
+        {
+            public static bool Vectorizable => true;
+
+            public static double Invoke(ulong x) => x;
+            public static Vector128<double> Invoke(Vector128<ulong> x) => Vector128.ConvertToDouble(x);
+            public static Vector256<double> Invoke(Vector256<ulong> x) => Vector256.ConvertToDouble(x);
+            public static Vector512<double> Invoke(Vector512<ulong> x) => Vector512.ConvertToDouble(x);
+        }
+
+        /// <summary>(double)long</summary>
+        private readonly struct ConvertInt64ToDouble : IUnaryOperator<long, double>
+        {
+            public static bool Vectorizable => true;
+
+            public static double Invoke(long x) => x;
+            public static Vector128<double> Invoke(Vector128<long> x) => Vector128.ConvertToDouble(x);
+            public static Vector256<double> Invoke(Vector256<long> x) => Vector256.ConvertToDouble(x);
+            public static Vector512<double> Invoke(Vector512<long> x) => Vector512.ConvertToDouble(x);
+        }
+
+        /// <summary>(double)float</summary>
+        private readonly struct WidenSingleToDoubleOperator : IUnaryOneToTwoOperator<float, double>
+        {
+            public static bool Vectorizable => true;
+
+            public static double Invoke(float x) => x;
+            public static (Vector128<double> Lower, Vector128<double> Upper) Invoke(Vector128<float> x) => Vector128.Widen(x);
+            public static (Vector256<double> Lower, Vector256<double> Upper) Invoke(Vector256<float> x) => Vector256.Widen(x);
+            public static (Vector512<double> Lower, Vector512<double> Upper) Invoke(Vector512<float> x) => Vector512.Widen(x);
+        }
+
+        /// <summary>(float)double</summary>
+        private readonly struct NarrowDoubleToSingleOperator : IUnaryTwoToOneOperator<double, float>
+        {
+            public static bool Vectorizable => true;
+
+            public static float Invoke(double x) => (float)x;
+            public static Vector128<float> Invoke(Vector128<double> lower, Vector128<double> upper) => Vector128.Narrow(lower, upper);
+            public static Vector256<float> Invoke(Vector256<double> lower, Vector256<double> upper) => Vector256.Narrow(lower, upper);
+            public static Vector512<float> Invoke(Vector512<double> lower, Vector512<double> upper) => Vector512.Narrow(lower, upper);
+        }
+
+        /// <summary>(ushort)byte</summary>
+        private readonly struct WidenByteToUInt16Operator : IUnaryOneToTwoOperator<byte, ushort>
+        {
+            public static bool Vectorizable => true;
+
+            public static ushort Invoke(byte x) => x;
+            public static (Vector128<ushort> Lower, Vector128<ushort> Upper) Invoke(Vector128<byte> x) => Vector128.Widen(x);
+            public static (Vector256<ushort> Lower, Vector256<ushort> Upper) Invoke(Vector256<byte> x) => Vector256.Widen(x);
+            public static (Vector512<ushort> Lower, Vector512<ushort> Upper) Invoke(Vector512<byte> x) => Vector512.Widen(x);
+        }
+
+        /// <summary>(short)sbyte</summary>
+        private readonly struct WidenSByteToInt16Operator : IUnaryOneToTwoOperator<sbyte, short>
+        {
+            public static bool Vectorizable => true;
+
+            public static short Invoke(sbyte x) => x;
+            public static (Vector128<short> Lower, Vector128<short> Upper) Invoke(Vector128<sbyte> x) => Vector128.Widen(x);
+            public static (Vector256<short> Lower, Vector256<short> Upper) Invoke(Vector256<sbyte> x) => Vector256.Widen(x);
+            public static (Vector512<short> Lower, Vector512<short> Upper) Invoke(Vector512<sbyte> x) => Vector512.Widen(x);
+        }
+
+        /// <summary>(uint)ushort</summary>
+        private readonly struct WidenUInt16ToUInt32Operator : IUnaryOneToTwoOperator<ushort, uint>
+        {
+            public static bool Vectorizable => true;
+
+            public static uint Invoke(ushort x) => x;
+            public static (Vector128<uint> Lower, Vector128<uint> Upper) Invoke(Vector128<ushort> x) => Vector128.Widen(x);
+            public static (Vector256<uint> Lower, Vector256<uint> Upper) Invoke(Vector256<ushort> x) => Vector256.Widen(x);
+            public static (Vector512<uint> Lower, Vector512<uint> Upper) Invoke(Vector512<ushort> x) => Vector512.Widen(x);
+        }
+
+        /// <summary>(int)short</summary>
+        private readonly struct WidenInt16ToInt32Operator : IUnaryOneToTwoOperator<short, int>
+        {
+            public static bool Vectorizable => true;
+
+            public static int Invoke(short x) => x;
+            public static (Vector128<int> Lower, Vector128<int> Upper) Invoke(Vector128<short> x) => Vector128.Widen(x);
+            public static (Vector256<int> Lower, Vector256<int> Upper) Invoke(Vector256<short> x) => Vector256.Widen(x);
+            public static (Vector512<int> Lower, Vector512<int> Upper) Invoke(Vector512<short> x) => Vector512.Widen(x);
+        }
+
+        /// <summary>(ulong)uint</summary>
+        private readonly struct WidenUInt32ToUInt64Operator : IUnaryOneToTwoOperator<uint, ulong>
+        {
+            public static bool Vectorizable => true;
+
+            public static ulong Invoke(uint x) => x;
+            public static (Vector128<ulong> Lower, Vector128<ulong> Upper) Invoke(Vector128<uint> x) => Vector128.Widen(x);
+            public static (Vector256<ulong> Lower, Vector256<ulong> Upper) Invoke(Vector256<uint> x) => Vector256.Widen(x);
+            public static (Vector512<ulong> Lower, Vector512<ulong> Upper) Invoke(Vector512<uint> x) => Vector512.Widen(x);
+        }
+
+        /// <summary>(long)int</summary>
+        private readonly struct WidenInt32ToInt64Operator : IUnaryOneToTwoOperator<int, long>
+        {
+            public static bool Vectorizable => true;
+
+            public static long Invoke(int x) => x;
+            public static (Vector128<long> Lower, Vector128<long> Upper) Invoke(Vector128<int> x) => Vector128.Widen(x);
+            public static (Vector256<long> Lower, Vector256<long> Upper) Invoke(Vector256<int> x) => Vector256.Widen(x);
+            public static (Vector512<long> Lower, Vector512<long> Upper) Invoke(Vector512<int> x) => Vector512.Widen(x);
+        }
+
+        private readonly struct WidenHalfAsInt16ToSingleOperator : IUnaryOneToTwoOperator<short, float>
+        {
+            // This implements a vectorized version of the `explicit operator float(Half value) operator`.
+            // See detailed description of the algorithm used here:
+            //     https://github.com/dotnet/runtime/blob/3bf40a378f00cb5bf18ff62796bc7097719b974c/src/libraries/System.Private.CoreLib/src/System/Half.cs#L1010-L1040
+            // The cast operator converts a Half represented as uint to a float. This does the same, with an input VectorXx<uint> and an output VectorXx<float>.
+            // The VectorXx<uint> is created by reading a vector of Halfs as a VectorXx<short> then widened to two VectorXx<int>s and cast to VectorXx<uint>s.
+            // We loop handling one input vector at a time, producing two output float vectors.
+
+            private const uint ExponentLowerBound = 0x3880_0000u; // The smallest positive normal number in Half, converted to Single
+            private const uint ExponentOffset = 0x3800_0000u; // BitConverter.SingleToUInt32Bits(1.0f) - ((uint)BitConverter.HalfToUInt16Bits((Half)1.0f) << 13)
+            private const uint SingleSignMask = 0x8000_0000; // float.SignMask; // Mask for sign bit in Single
+            private const uint HalfExponentMask = 0x7C00; // Mask for exponent bits in Half
+            private const uint HalfToSingleBitsMask = 0x0FFF_E000; // Mask for bits in Single converted from Half
+
+            public static bool Vectorizable => true;
+
+            public static float Invoke(short x) => (float)Unsafe.BitCast<short, Half>(x);
+
+            public static (Vector128<float> Lower, Vector128<float> Upper) Invoke(Vector128<short> x)
+            {
+                (Vector128<int> lowerInt32, Vector128<int> upperInt32) = Vector128.Widen(x);
+                return
+                    (HalfAsWidenedUInt32ToSingle(lowerInt32.AsUInt32()),
+                     HalfAsWidenedUInt32ToSingle(upperInt32.AsUInt32()));
+
+                static Vector128<float> HalfAsWidenedUInt32ToSingle(Vector128<uint> value)
+                {
+                    // Extract sign bit of value
+                    Vector128<uint> sign = value & Vector128.Create(SingleSignMask);
+
+                    // Copy sign bit to upper bits
+                    Vector128<uint> bitValueInProcess = value;
+
+                    // Extract exponent bits of value (BiasedExponent is not for here as it performs unnecessary shift)
+                    Vector128<uint> offsetExponent = bitValueInProcess & Vector128.Create(HalfExponentMask);
+
+                    // ~0u when value is subnormal, 0 otherwise
+                    Vector128<uint> subnormalMask = Vector128.Equals(offsetExponent, Vector128<uint>.Zero);
+
+                    // ~0u when value is either Infinity or NaN, 0 otherwise
+                    Vector128<uint> infinityOrNaNMask = Vector128.Equals(offsetExponent, Vector128.Create(HalfExponentMask));
+
+                    // 0x3880_0000u if value is subnormal, 0 otherwise
+                    Vector128<uint> maskedExponentLowerBound = subnormalMask & Vector128.Create(ExponentLowerBound);
+
+                    // 0x3880_0000u if value is subnormal, 0x3800_0000u otherwise
+                    Vector128<uint> offsetMaskedExponentLowerBound = Vector128.Create(ExponentOffset) | maskedExponentLowerBound;
+
+                    // Match the position of the boundary of exponent bits and fraction bits with IEEE 754 Binary32(Single)
+                    bitValueInProcess = Vector128.ShiftLeft(bitValueInProcess, 13);
+
+                    // Double the offsetMaskedExponentLowerBound if value is either Infinity or NaN
+                    offsetMaskedExponentLowerBound = Vector128.ConditionalSelect(Vector128.Equals(infinityOrNaNMask, Vector128<uint>.Zero),
+                        offsetMaskedExponentLowerBound,
+                        Vector128.ShiftLeft(offsetMaskedExponentLowerBound, 1));
+
+                    // Extract exponent bits and fraction bits of value
+                    bitValueInProcess &= Vector128.Create(HalfToSingleBitsMask);
+
+                    // Adjust exponent to match the range of exponent
+                    bitValueInProcess += offsetMaskedExponentLowerBound;
+
+                    // If value is subnormal, remove unnecessary 1 on top of fraction bits.
+                    Vector128<uint> absoluteValue = (bitValueInProcess.AsSingle() - maskedExponentLowerBound.AsSingle()).AsUInt32();
+
+                    // Merge sign bit with rest
+                    return (absoluteValue | sign).AsSingle();
+                }
+            }
+
+            public static (Vector256<float> Lower, Vector256<float> Upper) Invoke(Vector256<short> x)
+            {
+                (Vector256<int> lowerInt32, Vector256<int> upperInt32) = Vector256.Widen(x);
+                return
+                    (HalfAsWidenedUInt32ToSingle(lowerInt32.AsUInt32()),
+                     HalfAsWidenedUInt32ToSingle(upperInt32.AsUInt32()));
+
+                static Vector256<float> HalfAsWidenedUInt32ToSingle(Vector256<uint> value)
+                {
+                    // Extract sign bit of value
+                    Vector256<uint> sign = value & Vector256.Create(SingleSignMask);
+
+                    // Copy sign bit to upper bits
+                    Vector256<uint> bitValueInProcess = value;
+
+                    // Extract exponent bits of value (BiasedExponent is not for here as it performs unnecessary shift)
+                    Vector256<uint> offsetExponent = bitValueInProcess & Vector256.Create(HalfExponentMask);
+
+                    // ~0u when value is subnormal, 0 otherwise
+                    Vector256<uint> subnormalMask = Vector256.Equals(offsetExponent, Vector256<uint>.Zero);
+
+                    // ~0u when value is either Infinity or NaN, 0 otherwise
+                    Vector256<uint> infinityOrNaNMask = Vector256.Equals(offsetExponent, Vector256.Create(HalfExponentMask));
+
+                    // 0x3880_0000u if value is subnormal, 0 otherwise
+                    Vector256<uint> maskedExponentLowerBound = subnormalMask & Vector256.Create(ExponentLowerBound);
+
+                    // 0x3880_0000u if value is subnormal, 0x3800_0000u otherwise
+                    Vector256<uint> offsetMaskedExponentLowerBound = Vector256.Create(ExponentOffset) | maskedExponentLowerBound;
+
+                    // Match the position of the boundary of exponent bits and fraction bits with IEEE 754 Binary32(Single)
+                    bitValueInProcess = Vector256.ShiftLeft(bitValueInProcess, 13);
+
+                    // Double the offsetMaskedExponentLowerBound if value is either Infinity or NaN
+                    offsetMaskedExponentLowerBound = Vector256.ConditionalSelect(Vector256.Equals(infinityOrNaNMask, Vector256<uint>.Zero),
+                        offsetMaskedExponentLowerBound,
+                        Vector256.ShiftLeft(offsetMaskedExponentLowerBound, 1));
+
+                    // Extract exponent bits and fraction bits of value
+                    bitValueInProcess &= Vector256.Create(HalfToSingleBitsMask);
+
+                    // Adjust exponent to match the range of exponent
+                    bitValueInProcess += offsetMaskedExponentLowerBound;
+
+                    // If value is subnormal, remove unnecessary 1 on top of fraction bits.
+                    Vector256<uint> absoluteValue = (bitValueInProcess.AsSingle() - maskedExponentLowerBound.AsSingle()).AsUInt32();
+
+                    // Merge sign bit with rest
+                    return (absoluteValue | sign).AsSingle();
+                }
+            }
+
+            public static (Vector512<float> Lower, Vector512<float> Upper) Invoke(Vector512<short> x)
+            {
+                (Vector512<int> lowerInt32, Vector512<int> upperInt32) = Vector512.Widen(x);
+                return
+                    (HalfAsWidenedUInt32ToSingle(lowerInt32.AsUInt32()),
+                     HalfAsWidenedUInt32ToSingle(upperInt32.AsUInt32()));
+
+                static Vector512<float> HalfAsWidenedUInt32ToSingle(Vector512<uint> value)
+                {
+                    // Extract sign bit of value
+                    Vector512<uint> sign = value & Vector512.Create(SingleSignMask);
+
+                    // Copy sign bit to upper bits
+                    Vector512<uint> bitValueInProcess = value;
+
+                    // Extract exponent bits of value (BiasedExponent is not for here as it performs unnecessary shift)
+                    Vector512<uint> offsetExponent = bitValueInProcess & Vector512.Create(HalfExponentMask);
+
+                    // ~0u when value is subnormal, 0 otherwise
+                    Vector512<uint> subnormalMask = Vector512.Equals(offsetExponent, Vector512<uint>.Zero);
+
+                    // ~0u when value is either Infinity or NaN, 0 otherwise
+                    Vector512<uint> infinityOrNaNMask = Vector512.Equals(offsetExponent, Vector512.Create(HalfExponentMask));
+
+                    // 0x3880_0000u if value is subnormal, 0 otherwise
+                    Vector512<uint> maskedExponentLowerBound = subnormalMask & Vector512.Create(ExponentLowerBound);
+
+                    // 0x3880_0000u if value is subnormal, 0x3800_0000u otherwise
+                    Vector512<uint> offsetMaskedExponentLowerBound = Vector512.Create(ExponentOffset) | maskedExponentLowerBound;
+
+                    // Match the position of the boundary of exponent bits and fraction bits with IEEE 754 Binary32(Single)
+                    bitValueInProcess = Vector512.ShiftLeft(bitValueInProcess, 13);
+
+                    // Double the offsetMaskedExponentLowerBound if value is either Infinity or NaN
+                    offsetMaskedExponentLowerBound = Vector512.ConditionalSelect(Vector512.Equals(infinityOrNaNMask, Vector512<uint>.Zero),
+                        offsetMaskedExponentLowerBound,
+                        Vector512.ShiftLeft(offsetMaskedExponentLowerBound, 1));
+
+                    // Extract exponent bits and fraction bits of value
+                    bitValueInProcess &= Vector512.Create(HalfToSingleBitsMask);
+
+                    // Adjust exponent to match the range of exponent
+                    bitValueInProcess += offsetMaskedExponentLowerBound;
+
+                    // If value is subnormal, remove unnecessary 1 on top of fraction bits.
+                    Vector512<uint> absoluteValue = (bitValueInProcess.AsSingle() - maskedExponentLowerBound.AsSingle()).AsUInt32();
+
+                    // Merge sign bit with rest
+                    return (absoluteValue | sign).AsSingle();
+                }
+            }
+        }
+
+        private readonly struct NarrowSingleToHalfAsUInt16Operator : IUnaryTwoToOneOperator<float, ushort>
+        {
+            // This implements a vectorized version of the `explicit operator Half(float value) operator`.
+            // See detailed description of the algorithm used here:
+            //     https://github.com/dotnet/runtime/blob/ca8d6f0420096831766ec11c7d400e4f7ccc7a34/src/libraries/System.Private.CoreLib/src/System/Half.cs#L606-L714
+            // The cast operator converts a float to a Half represented as a UInt32, then narrows to a UInt16, and reinterpret casts to Half.
+            // This does the same, with an input VectorXx<float> and an output VectorXx<uint>.
+            // Loop handling two input vectors at a time; each input float is double the size of each output Half,
+            // so we need two vectors of floats to produce one vector of Halfs. Half isn't supported in VectorXx<T>,
+            // so we convert the VectorXx<float> to a VectorXx<uint>, and the caller then uses this twice, narrows the combination
+            // into a VectorXx<ushort>, and then saves that out to the destination `ref Half` reinterpreted as `ref ushort`.
+
+            private const uint MinExp = 0x3880_0000u; // Minimum exponent for rounding
+            private const uint Exponent126 = 0x3f00_0000u; // Exponent displacement #1
+            private const uint SingleBiasedExponentMask = 0x7F80_0000; // float.BiasedExponentMask; // Exponent mask
+            private const uint Exponent13 = 0x0680_0000u; // Exponent displacement #2
+            private const float MaxHalfValueBelowInfinity = 65520.0f; // Maximum value that is not Infinity in Half
+            private const uint ExponentMask = 0x7C00; // Mask for exponent bits in Half
+            private const uint SingleSignMask = 0x8000_0000u; // float.SignMask; // Mask for sign bit in float
+
+            public static bool Vectorizable => true;
+
+            public static ushort Invoke(float x) => Unsafe.BitCast<Half, ushort>((Half)x);
+
+            public static Vector128<ushort> Invoke(Vector128<float> lower, Vector128<float> upper)
+            {
+                return Vector128.Narrow(
+                    SingleToHalfAsWidenedUInt32(lower),
+                    SingleToHalfAsWidenedUInt32(upper));
+
+                static Vector128<uint> SingleToHalfAsWidenedUInt32(Vector128<float> value)
+                {
+                    Vector128<uint> bitValue = value.AsUInt32();
+
+                    // Extract sign bit
+                    Vector128<uint> sign = Vector128.ShiftRightLogical(bitValue & Vector128.Create(SingleSignMask), 16);
+
+                    // Detecting NaN (0u if value is NaN; otherwise, ~0u)
+                    Vector128<uint> realMask = Vector128.Equals(value, value).AsUInt32();
+
+                    // Clear sign bit
+                    value = Vector128.Abs(value);
+
+                    // Rectify values that are Infinity in Half.
+                    value = Vector128.Min(Vector128.Create(MaxHalfValueBelowInfinity), value);
+
+                    // Rectify lower exponent
+                    Vector128<uint> exponentOffset0 = Vector128.Max(value, Vector128.Create(MinExp).AsSingle()).AsUInt32();
+
+                    // Extract exponent
+                    exponentOffset0 &= Vector128.Create(SingleBiasedExponentMask);
+
+                    // Add exponent by 13
+                    exponentOffset0 += Vector128.Create(Exponent13);
+
+                    // Round Single into Half's precision (NaN also gets modified here, just setting the MSB of fraction)
+                    value += exponentOffset0.AsSingle();
+                    bitValue = value.AsUInt32();
+
+                    // Only exponent bits will be modified if NaN
+                    Vector128<uint> maskedHalfExponentForNaN = ~realMask & Vector128.Create(ExponentMask);
+
+                    // Subtract exponent by 126
+                    bitValue -= Vector128.Create(Exponent126);
+
+                    // Shift bitValue right by 13 bits to match the boundary of exponent part and fraction part.
+                    Vector128<uint> newExponent = Vector128.ShiftRightLogical(bitValue, 13);
+
+                    // Clear the fraction parts if the value was NaN.
+                    bitValue &= realMask;
+
+                    // Merge the exponent part with fraction part, and add the exponent part and fraction part's overflow.
+                    bitValue += newExponent;
+
+                    // Clear exponents if value is NaN
+                    bitValue &= ~maskedHalfExponentForNaN;
+
+                    // Merge sign bit with possible NaN exponent
+                    Vector128<uint> signAndMaskedExponent = maskedHalfExponentForNaN | sign;
+
+                    // Merge sign bit and possible NaN exponent
+                    bitValue |= signAndMaskedExponent;
+
+                    // The final result
+                    return bitValue;
+                }
+            }
+
+            public static Vector256<ushort> Invoke(Vector256<float> lower, Vector256<float> upper)
+            {
+                return Vector256.Narrow(
+                    SingleToHalfAsWidenedUInt32(lower),
+                    SingleToHalfAsWidenedUInt32(upper));
+
+                static Vector256<uint> SingleToHalfAsWidenedUInt32(Vector256<float> value)
+                {
+                    Vector256<uint> bitValue = value.AsUInt32();
+
+                    // Extract sign bit
+                    Vector256<uint> sign = Vector256.ShiftRightLogical(bitValue & Vector256.Create(SingleSignMask), 16);
+
+                    // Detecting NaN (0u if value is NaN; otherwise, ~0u)
+                    Vector256<uint> realMask = Vector256.Equals(value, value).AsUInt32();
+
+                    // Clear sign bit
+                    value = Vector256.Abs(value);
+
+                    // Rectify values that are Infinity in Half.
+                    value = Vector256.Min(Vector256.Create(MaxHalfValueBelowInfinity), value);
+
+                    // Rectify lower exponent
+                    Vector256<uint> exponentOffset0 = Vector256.Max(value, Vector256.Create(MinExp).AsSingle()).AsUInt32();
+
+                    // Extract exponent
+                    exponentOffset0 &= Vector256.Create(SingleBiasedExponentMask);
+
+                    // Add exponent by 13
+                    exponentOffset0 += Vector256.Create(Exponent13);
+
+                    // Round Single into Half's precision (NaN also gets modified here, just setting the MSB of fraction)
+                    value += exponentOffset0.AsSingle();
+                    bitValue = value.AsUInt32();
+
+                    // Only exponent bits will be modified if NaN
+                    Vector256<uint> maskedHalfExponentForNaN = ~realMask & Vector256.Create(ExponentMask);
+
+                    // Subtract exponent by 126
+                    bitValue -= Vector256.Create(Exponent126);
+
+                    // Shift bitValue right by 13 bits to match the boundary of exponent part and fraction part.
+                    Vector256<uint> newExponent = Vector256.ShiftRightLogical(bitValue, 13);
+
+                    // Clear the fraction parts if the value was NaN.
+                    bitValue &= realMask;
+
+                    // Merge the exponent part with fraction part, and add the exponent part and fraction part's overflow.
+                    bitValue += newExponent;
+
+                    // Clear exponents if value is NaN
+                    bitValue &= ~maskedHalfExponentForNaN;
+
+                    // Merge sign bit with possible NaN exponent
+                    Vector256<uint> signAndMaskedExponent = maskedHalfExponentForNaN | sign;
+
+                    // Merge sign bit and possible NaN exponent
+                    bitValue |= signAndMaskedExponent;
+
+                    // The final result
+                    return bitValue;
+                }
+            }
+
+            public static Vector512<ushort> Invoke(Vector512<float> lower, Vector512<float> upper)
+            {
+                return Vector512.Narrow(
+                    SingleToHalfAsWidenedUInt32(lower),
+                    SingleToHalfAsWidenedUInt32(upper));
+
+                static Vector512<uint> SingleToHalfAsWidenedUInt32(Vector512<float> value)
+                {
+                    Vector512<uint> bitValue = value.AsUInt32();
+
+                    // Extract sign bit
+                    Vector512<uint> sign = Vector512.ShiftRightLogical(bitValue & Vector512.Create(SingleSignMask), 16);
+
+                    // Detecting NaN (0u if value is NaN; otherwise, ~0u)
+                    Vector512<uint> realMask = Vector512.Equals(value, value).AsUInt32();
+
+                    // Clear sign bit
+                    value = Vector512.Abs(value);
+
+                    // Rectify values that are Infinity in Half.
+                    value = Vector512.Min(Vector512.Create(MaxHalfValueBelowInfinity), value);
+
+                    // Rectify lower exponent
+                    Vector512<uint> exponentOffset0 = Vector512.Max(value, Vector512.Create(MinExp).AsSingle()).AsUInt32();
+
+                    // Extract exponent
+                    exponentOffset0 &= Vector512.Create(SingleBiasedExponentMask);
+
+                    // Add exponent by 13
+                    exponentOffset0 += Vector512.Create(Exponent13);
+
+                    // Round Single into Half's precision (NaN also gets modified here, just setting the MSB of fraction)
+                    value += exponentOffset0.AsSingle();
+                    bitValue = value.AsUInt32();
+
+                    // Only exponent bits will be modified if NaN
+                    Vector512<uint> maskedHalfExponentForNaN = ~realMask & Vector512.Create(ExponentMask);
+
+                    // Subtract exponent by 126
+                    bitValue -= Vector512.Create(Exponent126);
+
+                    // Shift bitValue right by 13 bits to match the boundary of exponent part and fraction part.
+                    Vector512<uint> newExponent = Vector512.ShiftRightLogical(bitValue, 13);
+
+                    // Clear the fraction parts if the value was NaN.
+                    bitValue &= realMask;
+
+                    // Merge the exponent part with fraction part, and add the exponent part and fraction part's overflow.
+                    bitValue += newExponent;
+
+                    // Clear exponents if value is NaN
+                    bitValue &= ~maskedHalfExponentForNaN;
+
+                    // Merge sign bit with possible NaN exponent
+                    Vector512<uint> signAndMaskedExponent = maskedHalfExponentForNaN | sign;
+
+                    // Merge sign bit and possible NaN exponent
+                    bitValue |= signAndMaskedExponent;
+
+                    // The final result
+                    return bitValue;
+                }
+            }
+        }
+
+        /// <summary>Gets whether <typeparamref name="T"/> is <see cref="uint"/> or <see cref="nuint"/> if in a 32-bit process.</summary>
+        private static bool IsUInt32Like<T>() => typeof(T) == typeof(uint) || (IntPtr.Size == 4 && typeof(T) == typeof(nuint));
+
+        /// <summary>Gets whether <typeparamref name="T"/> is <see cref="int"/> or <see cref="nint"/> if in a 32-bit process.</summary>
+        private static bool IsInt32Like<T>() => typeof(T) == typeof(int) || (IntPtr.Size == 4 && typeof(T) == typeof(nint));
+
+        /// <summary>Gets whether <typeparamref name="T"/> is <see cref="ulong"/> or <see cref="nuint"/> if in a 64-bit process.</summary>
+        private static bool IsUInt64Like<T>() => typeof(T) == typeof(ulong) || (IntPtr.Size == 8 && typeof(T) == typeof(nuint));
+
+        /// <summary>Gets whether <typeparamref name="T"/> is <see cref="long"/> or <see cref="nint"/> if in a 64-bit process.</summary>
+        private static bool IsInt64Like<T>() => typeof(T) == typeof(long) || (IntPtr.Size == 8 && typeof(T) == typeof(nint));
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ConvertSaturating.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ConvertSaturating.cs
new file mode 100644
index 000000000000..87a26c3c2164
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ConvertSaturating.cs
@@ -0,0 +1,43 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>
+        /// Copies <paramref name="source"/> to <paramref name="destination"/>, converting each <typeparamref name="TFrom"/>
+        /// value to a <typeparamref name="TTo"/> value.
+        /// </summary>
+        /// <param name="source">The source span from which to copy values.</param>
+        /// <param name="destination">The destination span into which the converted values should be written.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = TTo.CreateSaturating(<paramref name="source"/>[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void ConvertSaturating<TFrom, TTo>(ReadOnlySpan<TFrom> source, Span<TTo> destination)
+            where TFrom : INumberBase<TFrom>
+            where TTo : INumberBase<TTo>
+        {
+            if (!TryConvertUniversal(source, destination))
+            {
+                InvokeSpanIntoSpan<TFrom, TTo, ConvertSaturatingFallbackOperator<TFrom, TTo>>(source, destination);
+            }
+        }
+
+        /// <summary>T.CreateSaturating(x)</summary>
+        internal readonly struct ConvertSaturatingFallbackOperator<TFrom, TTo> : IUnaryOperator<TFrom, TTo> where TFrom : INumberBase<TFrom> where TTo : INumberBase<TTo>
+        {
+            public static bool Vectorizable => false;
+
+            public static TTo Invoke(TFrom x) => TTo.CreateSaturating(x);
+            public static Vector128<TTo> Invoke(Vector128<TFrom> x) => throw new NotSupportedException();
+            public static Vector256<TTo> Invoke(Vector256<TFrom> x) => throw new NotSupportedException();
+            public static Vector512<TTo> Invoke(Vector512<TFrom> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ConvertTruncating.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ConvertTruncating.cs
new file mode 100644
index 000000000000..29edffd4bcec
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ConvertTruncating.cs
@@ -0,0 +1,224 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>
+        /// Copies <paramref name="source"/> to <paramref name="destination"/>, converting each <typeparamref name="TFrom"/>
+        /// value to a <typeparamref name="TTo"/> value.
+        /// </summary>
+        /// <param name="source">The source span from which to copy values.</param>
+        /// <param name="destination">The destination span into which the converted values should be written.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = TTo.CreateTruncating(<paramref name="source"/>[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void ConvertTruncating<TFrom, TTo>(ReadOnlySpan<TFrom> source, Span<TTo> destination)
+            where TFrom : INumberBase<TFrom>
+            where TTo : INumberBase<TTo>
+        {
+            if (TryConvertUniversal(source, destination))
+            {
+                return;
+            }
+
+            if (((typeof(TFrom) == typeof(byte) || typeof(TFrom) == typeof(sbyte)) && (typeof(TTo) == typeof(byte) || typeof(TTo) == typeof(sbyte))) ||
+                ((typeof(TFrom) == typeof(ushort) || typeof(TFrom) == typeof(short)) && (typeof(TTo) == typeof(ushort) || typeof(TTo) == typeof(short))) ||
+                ((IsUInt32Like<TFrom>() || IsInt32Like<TFrom>()) && (IsUInt32Like<TTo>() || IsInt32Like<TTo>())) ||
+                ((IsUInt64Like<TFrom>() || IsInt64Like<TFrom>()) && (IsUInt64Like<TTo>() || IsInt64Like<TTo>())))
+            {
+                source.CopyTo(Rename<TTo, TFrom>(destination));
+                return;
+            }
+
+            if (typeof(TFrom) == typeof(float) && IsUInt32Like<TTo>())
+            {
+                InvokeSpanIntoSpan<float, uint, ConvertSingleToUInt32>(Rename<TFrom, float>(source), Rename<TTo, uint>(destination));
+                return;
+            }
+
+            if (typeof(TFrom) == typeof(float) && IsInt32Like<TTo>())
+            {
+                InvokeSpanIntoSpan<float, int, ConvertSingleToInt32>(Rename<TFrom, float>(source), Rename<TTo, int>(destination));
+                return;
+            }
+
+            if (typeof(TFrom) == typeof(double) && IsUInt64Like<TTo>())
+            {
+                InvokeSpanIntoSpan<double, ulong, ConvertDoubleToUInt64>(Rename<TFrom, double>(source), Rename<TTo, ulong>(destination));
+                return;
+            }
+
+            if (typeof(TFrom) == typeof(double) && IsInt64Like<TTo>())
+            {
+                InvokeSpanIntoSpan<double, long, ConvertDoubleToInt64>(Rename<TFrom, double>(source), Rename<TTo, long>(destination));
+                return;
+            }
+
+            if (typeof(TFrom) == typeof(ushort) && typeof(TTo) == typeof(byte))
+            {
+                InvokeSpanIntoSpan_2to1<ushort, byte, NarrowUInt16ToByteOperator>(Rename<TFrom, ushort>(source), Rename<TTo, byte>(destination));
+                return;
+            }
+
+            if (typeof(TFrom) == typeof(short) && typeof(TTo) == typeof(sbyte))
+            {
+                InvokeSpanIntoSpan_2to1<short, sbyte, NarrowInt16ToSByteOperator>(Rename<TFrom, short>(source), Rename<TTo, sbyte>(destination));
+                return;
+            }
+
+            if (IsUInt32Like<TFrom>() && typeof(TTo) == typeof(ushort))
+            {
+                InvokeSpanIntoSpan_2to1<uint, ushort, NarrowUInt32ToUInt16Operator>(Rename<TFrom, uint>(source), Rename<TTo, ushort>(destination));
+                return;
+            }
+
+            if (IsInt32Like<TFrom>() && typeof(TTo) == typeof(short))
+            {
+                InvokeSpanIntoSpan_2to1<int, short, NarrowInt32ToInt16Operator>(Rename<TFrom, int>(source), Rename<TTo, short>(destination));
+                return;
+            }
+
+            if (IsUInt64Like<TFrom>() && IsUInt32Like<TTo>())
+            {
+                InvokeSpanIntoSpan_2to1<ulong, uint, NarrowUInt64ToUInt32Operator>(Rename<TFrom, ulong>(source), Rename<TTo, uint>(destination));
+                return;
+            }
+
+            if (IsInt64Like<TFrom>() && IsInt32Like<TTo>())
+            {
+                InvokeSpanIntoSpan_2to1<long, int, NarrowInt64ToInt32Operator>(Rename<TFrom, long>(source), Rename<TTo, int>(destination));
+                return;
+            }
+
+            InvokeSpanIntoSpan<TFrom, TTo, ConvertTruncatingFallbackOperator<TFrom, TTo>>(source, destination);
+        }
+
+        /// <summary>(float)int</summary>
+        private readonly struct ConvertSingleToInt32 : IUnaryOperator<float, int>
+        {
+            public static bool Vectorizable => false; // TODO https://github.com/dotnet/runtime/pull/97529: make this true once vectorized behavior matches scalar
+
+            public static int Invoke(float x) => int.CreateTruncating(x);
+            public static Vector128<int> Invoke(Vector128<float> x) => Vector128.ConvertToInt32(x);
+            public static Vector256<int> Invoke(Vector256<float> x) => Vector256.ConvertToInt32(x);
+            public static Vector512<int> Invoke(Vector512<float> x) => Vector512.ConvertToInt32(x);
+        }
+
+        /// <summary>(float)uint</summary>
+        private readonly struct ConvertSingleToUInt32 : IUnaryOperator<float, uint>
+        {
+            public static bool Vectorizable => false; // TODO https://github.com/dotnet/runtime/pull/97529: make this true once vectorized behavior matches scalar
+
+            public static uint Invoke(float x) => uint.CreateTruncating(x);
+            public static Vector128<uint> Invoke(Vector128<float> x) => Vector128.ConvertToUInt32(x);
+            public static Vector256<uint> Invoke(Vector256<float> x) => Vector256.ConvertToUInt32(x);
+            public static Vector512<uint> Invoke(Vector512<float> x) => Vector512.ConvertToUInt32(x);
+        }
+
+        /// <summary>(ulong)double</summary>
+        private readonly struct ConvertDoubleToUInt64 : IUnaryOperator<double, ulong>
+        {
+            public static bool Vectorizable => false; // TODO https://github.com/dotnet/runtime/pull/97529: make this true once vectorized behavior matches scalar
+
+            public static ulong Invoke(double x) => ulong.CreateTruncating(x);
+            public static Vector128<ulong> Invoke(Vector128<double> x) => Vector128.ConvertToUInt64(x);
+            public static Vector256<ulong> Invoke(Vector256<double> x) => Vector256.ConvertToUInt64(x);
+            public static Vector512<ulong> Invoke(Vector512<double> x) => Vector512.ConvertToUInt64(x);
+        }
+
+        /// <summary>(long)double</summary>
+        private readonly struct ConvertDoubleToInt64 : IUnaryOperator<double, long>
+        {
+            public static bool Vectorizable => false; // TODO https://github.com/dotnet/runtime/pull/97529: make this true once vectorized behavior matches scalar
+
+            public static long Invoke(double x) => long.CreateTruncating(x);
+            public static Vector128<long> Invoke(Vector128<double> x) => Vector128.ConvertToInt64(x);
+            public static Vector256<long> Invoke(Vector256<double> x) => Vector256.ConvertToInt64(x);
+            public static Vector512<long> Invoke(Vector512<double> x) => Vector512.ConvertToInt64(x);
+        }
+
+        /// <summary>(byte)ushort</summary>
+        private readonly struct NarrowUInt16ToByteOperator : IUnaryTwoToOneOperator<ushort, byte>
+        {
+            public static bool Vectorizable => true;
+
+            public static byte Invoke(ushort x) => (byte)x;
+            public static Vector128<byte> Invoke(Vector128<ushort> lower, Vector128<ushort> upper) => Vector128.Narrow(lower, upper);
+            public static Vector256<byte> Invoke(Vector256<ushort> lower, Vector256<ushort> upper) => Vector256.Narrow(lower, upper);
+            public static Vector512<byte> Invoke(Vector512<ushort> lower, Vector512<ushort> upper) => Vector512.Narrow(lower, upper);
+        }
+
+        /// <summary>(sbyte)short</summary>
+        private readonly struct NarrowInt16ToSByteOperator : IUnaryTwoToOneOperator<short, sbyte>
+        {
+            public static bool Vectorizable => true;
+
+            public static sbyte Invoke(short x) => (sbyte)x;
+            public static Vector128<sbyte> Invoke(Vector128<short> lower, Vector128<short> upper) => Vector128.Narrow(lower, upper);
+            public static Vector256<sbyte> Invoke(Vector256<short> lower, Vector256<short> upper) => Vector256.Narrow(lower, upper);
+            public static Vector512<sbyte> Invoke(Vector512<short> lower, Vector512<short> upper) => Vector512.Narrow(lower, upper);
+        }
+
+        /// <summary>(ushort)uint</summary>
+        private readonly struct NarrowUInt32ToUInt16Operator : IUnaryTwoToOneOperator<uint, ushort>
+        {
+            public static bool Vectorizable => true;
+
+            public static ushort Invoke(uint x) => (ushort)x;
+            public static Vector128<ushort> Invoke(Vector128<uint> lower, Vector128<uint> upper) => Vector128.Narrow(lower, upper);
+            public static Vector256<ushort> Invoke(Vector256<uint> lower, Vector256<uint> upper) => Vector256.Narrow(lower, upper);
+            public static Vector512<ushort> Invoke(Vector512<uint> lower, Vector512<uint> upper) => Vector512.Narrow(lower, upper);
+        }
+
+        /// <summary>(short)int</summary>
+        private readonly struct NarrowInt32ToInt16Operator : IUnaryTwoToOneOperator<int, short>
+        {
+            public static bool Vectorizable => true;
+
+            public static short Invoke(int x) => (short)x;
+            public static Vector128<short> Invoke(Vector128<int> lower, Vector128<int> upper) => Vector128.Narrow(lower, upper);
+            public static Vector256<short> Invoke(Vector256<int> lower, Vector256<int> upper) => Vector256.Narrow(lower, upper);
+            public static Vector512<short> Invoke(Vector512<int> lower, Vector512<int> upper) => Vector512.Narrow(lower, upper);
+        }
+
+        /// <summary>(uint)ulong</summary>
+        private readonly struct NarrowUInt64ToUInt32Operator : IUnaryTwoToOneOperator<ulong, uint>
+        {
+            public static bool Vectorizable => true;
+
+            public static uint Invoke(ulong x) => (uint)x;
+            public static Vector128<uint> Invoke(Vector128<ulong> lower, Vector128<ulong> upper) => Vector128.Narrow(lower, upper);
+            public static Vector256<uint> Invoke(Vector256<ulong> lower, Vector256<ulong> upper) => Vector256.Narrow(lower, upper);
+            public static Vector512<uint> Invoke(Vector512<ulong> lower, Vector512<ulong> upper) => Vector512.Narrow(lower, upper);
+        }
+
+        /// <summary>(int)long</summary>
+        private readonly struct NarrowInt64ToInt32Operator : IUnaryTwoToOneOperator<long, int>
+        {
+            public static bool Vectorizable => true;
+
+            public static int Invoke(long x) => (int)x;
+            public static Vector128<int> Invoke(Vector128<long> lower, Vector128<long> upper) => Vector128.Narrow(lower, upper);
+            public static Vector256<int> Invoke(Vector256<long> lower, Vector256<long> upper) => Vector256.Narrow(lower, upper);
+            public static Vector512<int> Invoke(Vector512<long> lower, Vector512<long> upper) => Vector512.Narrow(lower, upper);
+        }
+
+        /// <summary>T.CreateTruncating(x)</summary>
+        private readonly struct ConvertTruncatingFallbackOperator<TFrom, TTo> : IUnaryOperator<TFrom, TTo> where TFrom : INumberBase<TFrom> where TTo : INumberBase<TTo>
+        {
+            public static bool Vectorizable => false;
+
+            public static TTo Invoke(TFrom x) => TTo.CreateTruncating(x);
+            public static Vector128<TTo> Invoke(Vector128<TFrom> x) => throw new NotSupportedException();
+            public static Vector256<TTo> Invoke(Vector256<TFrom> x) => throw new NotSupportedException();
+            public static Vector512<TTo> Invoke(Vector512<TFrom> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CopySign.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CopySign.cs
new file mode 100644
index 000000000000..0277c729a17a
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CopySign.cs
@@ -0,0 +1,133 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise result of copying the sign from one number to another number in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="sign">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="sign" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="sign"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.CopySign(<paramref name="x" />[i], <paramref name="sign" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void CopySign<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> sign, Span<T> destination)
+            where T : INumber<T> =>
+            InvokeSpanSpanIntoSpan<T, CopySignOperator<T>>(x, sign, destination);
+
+        /// <summary>Computes the element-wise result of copying the sign from one number to another number in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="sign">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.CopySign(<paramref name="x" />[i], <paramref name="sign" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void CopySign<T>(ReadOnlySpan<T> x, T sign, Span<T> destination)
+            where T : INumber<T> =>
+            InvokeSpanScalarIntoSpan<T, CopySignOperator<T>>(x, sign, destination);
+
+        private readonly struct CopySignOperator<T> : IBinaryOperator<T> where T : INumber<T>
+        {
+            public static bool Vectorizable => true;
+
+            public static T Invoke(T x, T y) => T.CopySign(x, y);
+
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Vector128.ConditionalSelect(Vector128.Create(-0.0f).As<float, T>(), y, x);
+                }
+
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector128.ConditionalSelect(Vector128.Create(-0.0d).As<double, T>(), y, x);
+                }
+
+                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
+                {
+                    Vector128<T> absValue = Vector128.Abs(x);
+                    Vector128<T> sign = Vector128.GreaterThanOrEqual(y, Vector128<T>.Zero);
+                    Vector128<T> error = sign & Vector128.LessThan(absValue, Vector128<T>.Zero);
+                    if (error != Vector128<T>.Zero)
+                    {
+                        Math.Abs(int.MinValue); // throw OverflowException
+                    }
+
+                    return Vector128.ConditionalSelect(sign, absValue, -absValue);
+                }
+
+                return x;
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Vector256.ConditionalSelect(Vector256.Create(-0.0f).As<float, T>(), y, x);
+                }
+
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector256.ConditionalSelect(Vector256.Create(-0.0d).As<double, T>(), y, x);
+                }
+
+                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
+                {
+                    Vector256<T> absValue = Vector256.Abs(x);
+                    Vector256<T> sign = Vector256.GreaterThanOrEqual(y, Vector256<T>.Zero);
+                    Vector256<T> error = sign & Vector256.LessThan(absValue, Vector256<T>.Zero);
+                    if (error != Vector256<T>.Zero)
+                    {
+                        Math.Abs(int.MinValue); // throw OverflowException
+                    }
+
+                    return Vector256.ConditionalSelect(sign, absValue, -absValue);
+                }
+
+                return x;
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Vector512.ConditionalSelect(Vector512.Create(-0.0f).As<float, T>(), y, x);
+                }
+
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector512.ConditionalSelect(Vector512.Create(-0.0d).As<double, T>(), y, x);
+                }
+
+                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
+                {
+                    Vector512<T> absValue = Vector512.Abs(x);
+                    Vector512<T> sign = Vector512.GreaterThanOrEqual(y, Vector512<T>.Zero);
+                    Vector512<T> error = sign & Vector512.LessThan(absValue, Vector512<T>.Zero);
+                    if (error != Vector512<T>.Zero)
+                    {
+                        Math.Abs(int.MinValue); // throw OverflowException
+                    }
+
+                    return Vector512.ConditionalSelect(sign, absValue, -absValue);
+                }
+
+                return x;
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs
new file mode 100644
index 000000000000..36bdcc82e337
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs
@@ -0,0 +1,351 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise cosine of the value in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Cos(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Cos<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, CosOperator<T>>(x, destination);
+
+        /// <summary>T.Cos(x)</summary>
+        private readonly struct CosOperator<T> : IUnaryOperator<T, T>
+            where T : ITrigonometricFunctions<T>
+        {
+            // This code is based on `vrs4_cos` and `vrd2_cos` from amd/aocl-libm-ose
+            // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation notes from amd/aocl-libm-ose:
+            // --------------------------------------------
+            // To compute cosf(float x)
+            // Using the identity,
+            // cos(x) = sin(x + pi/2)           (1)
+            //
+            // 1. Argument Reduction
+            //      Now, let x be represented as,
+            //          |x| = N * pi + f        (2) | N is an integer,
+            //                                        -pi/2 <= f <= pi/2
+            //
+            //      From (2), N = int( (x + pi/2) / pi) - 0.5
+            //                f = |x| - (N * pi)
+            //
+            // 2. Polynomial Evaluation
+            //       From (1) and (2),sin(f) can be calculated using a polynomial
+            //       sin(f) = f*(1 + C1*f^2 + C2*f^4 + C3*f^6 + c4*f^8)
+            //
+            // 3. Reconstruction
+            //      Hence, cos(x) = sin(x + pi/2) = (-1)^N * sin(f)
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Cos(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+        }
+
+        /// <summary>float.Cos(x)</summary>
+        private readonly struct CosOperatorSingle : IUnaryOperator<float, float>
+        {
+            internal const uint MaxVectorizedValue = 0x4A989680u;
+            internal const uint SignMask = 0x7FFFFFFFu;
+            private const float AlmHuge = 1.2582912e7f;
+            private const float Pi_Tail1 = 8.742278e-8f;
+            private const float Pi_Tail2 = 3.430249e-15f;
+            private const float C1 = -0.16666657f;
+            private const float C2 = 0.008332962f;
+            private const float C3 = -1.9801206e-4f;
+            private const float C4 = 2.5867037e-6f;
+
+            public static bool Vectorizable => true;
+
+            public static float Invoke(float x) => float.Cos(x);
+
+            public static Vector128<float> Invoke(Vector128<float> x)
+            {
+                Vector128<float> uxMasked = Vector128.Abs(x);
+                if (Vector128.GreaterThanAny(uxMasked.AsUInt32(), Vector128.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<CosOperatorSingle>(x);
+                }
+
+                Vector128<float> almHuge = Vector128.Create(AlmHuge);
+                Vector128<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked + Vector128.Create(float.Pi / 2), Vector128.Create(1 / float.Pi), almHuge);
+                Vector128<uint> odd = dn.AsUInt32() << 31;
+                dn = dn - almHuge - Vector128.Create(0.5f);
+
+                Vector128<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(-float.Pi), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(Pi_Tail2), f);
+
+                // POLY_EVAL_ODD_9
+                Vector128<float> f2 = f * f;
+                Vector128<float> f4 = f2 * f2;
+                Vector128<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1));
+                Vector128<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector128<float>.One);
+                Vector128<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C3), f2, Vector128.Create(C4) * f4);
+                Vector128<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector128<float> poly = f * a3;
+
+                return (poly.AsUInt32() ^ odd).AsSingle();
+            }
+
+            public static Vector256<float> Invoke(Vector256<float> x)
+            {
+                Vector256<float> uxMasked = Vector256.Abs(x);
+                if (Vector256.GreaterThanAny(uxMasked.AsUInt32(), Vector256.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<CosOperatorSingle>(x);
+                }
+
+                Vector256<float> almHuge = Vector256.Create(AlmHuge);
+                Vector256<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked + Vector256.Create(float.Pi / 2), Vector256.Create(1 / float.Pi), almHuge);
+                Vector256<uint> odd = dn.AsUInt32() << 31;
+                dn = dn - almHuge - Vector256.Create(0.5f);
+
+                Vector256<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(-float.Pi), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(Pi_Tail2), f);
+
+                // POLY_EVAL_ODD_9
+                Vector256<float> f2 = f * f;
+                Vector256<float> f4 = f2 * f2;
+                Vector256<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1));
+                Vector256<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector256<float>.One);
+                Vector256<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C3), f2, Vector256.Create(C4) * f4);
+                Vector256<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector256<float> poly = f * a3;
+
+                return (poly.AsUInt32() ^ odd).AsSingle();
+            }
+
+            public static Vector512<float> Invoke(Vector512<float> x)
+            {
+                Vector512<float> uxMasked = Vector512.Abs(x);
+                if (Vector512.GreaterThanAny(uxMasked.AsUInt32(), Vector512.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<CosOperatorSingle>(x);
+                }
+
+                Vector512<float> almHuge = Vector512.Create(AlmHuge);
+                Vector512<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked + Vector512.Create(float.Pi / 2), Vector512.Create(1 / float.Pi), almHuge);
+                Vector512<uint> odd = dn.AsUInt32() << 31;
+                dn = dn - almHuge - Vector512.Create(0.5f);
+
+                Vector512<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(-float.Pi), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(Pi_Tail2), f);
+
+                // POLY_EVAL_ODD_9
+                Vector512<float> f2 = f * f;
+                Vector512<float> f4 = f2 * f2;
+                Vector512<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
+                Vector512<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector512<float>.One);
+                Vector512<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C3), f2, Vector512.Create(C4) * f4);
+                Vector512<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector512<float> poly = f * a3;
+
+                return (poly.AsUInt32() ^ odd).AsSingle();
+            }
+        }
+
+        /// <summary>double.Cos(x)</summary>
+        private readonly struct CosOperatorDouble : IUnaryOperator<double, double>
+        {
+            internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul;
+            internal const ulong MaxVectorizedValue = 0x4160000000000000ul;
+            private const double AlmHuge = 6.755399441055744E15;
+            private const double Pi_Tail2 = -1.2246467991473532E-16;
+            private const double Pi_Tail3 = 2.9947698097183397E-33;
+            private const double C1 = -0.16666666666666666;
+            private const double C2 = 0.008333333333333165;
+            private const double C3 = -1.984126984120184E-4;
+            private const double C4 = 2.7557319210152756E-6;
+            private const double C5 = -2.5052106798274616E-8;
+            private const double C6 = 1.6058936490373254E-10;
+            private const double C7 = -7.642917806937501E-13;
+            private const double C8 = 2.7204790963151784E-15;
+
+            public static bool Vectorizable => true;
+
+            public static double Invoke(double x) => double.Cos(x);
+
+            public static Vector128<double> Invoke(Vector128<double> x)
+            {
+                Vector128<double> uxMasked = Vector128.Abs(x);
+                if (Vector128.GreaterThanAny(uxMasked.AsUInt64(), Vector128.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<CosOperatorDouble>(x);
+                }
+
+                // dn = int(x / pi + 1/2) - 1/2
+                Vector128<double> almHuge = Vector128.Create(AlmHuge);
+                Vector128<double> half = Vector128.Create(0.5);
+                Vector128<double> dn = (uxMasked * Vector128.Create(1 / double.Pi)) + half + almHuge;
+                Vector128<ulong> odd = dn.AsUInt64() << 63;
+                dn = dn - almHuge - half;
+
+                // f = x - (n*pi)
+                Vector128<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-double.Pi), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(Pi_Tail2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(Pi_Tail3), f);
+
+                // POLY_EVAL_ODD_17
+                Vector128<double> f2 = f * f;
+                Vector128<double> f4 = f2 * f2;
+                Vector128<double> f6 = f4 * f2;
+                Vector128<double> f10 = f6 * f4;
+                Vector128<double> f14 = f10 * f4;
+                Vector128<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1));
+                Vector128<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C4), f2, Vector128.Create(C3));
+                Vector128<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C6), f2, Vector128.Create(C5));
+                Vector128<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C8), f2, Vector128.Create(C7));
+                Vector128<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
+                Vector128<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
+                Vector128<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
+
+                return (poly.AsUInt64() ^ odd).AsDouble();
+            }
+
+            public static Vector256<double> Invoke(Vector256<double> x)
+            {
+                Vector256<double> uxMasked = Vector256.Abs(x);
+                if (Vector256.GreaterThanAny(uxMasked.AsUInt64(), Vector256.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<CosOperatorDouble>(x);
+                }
+
+                // dn = int(x / pi + 1/2) - 1/2
+                Vector256<double> almHuge = Vector256.Create(AlmHuge);
+                Vector256<double> half = Vector256.Create(0.5);
+                Vector256<double> dn = (uxMasked * Vector256.Create(1 / double.Pi)) + half + almHuge;
+                Vector256<ulong> odd = dn.AsUInt64() << 63;
+                dn = dn - almHuge - half;
+
+                // f = x - (n*pi)
+                Vector256<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-double.Pi), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(Pi_Tail2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(Pi_Tail3), f);
+
+                // POLY_EVAL_ODD_17
+                Vector256<double> f2 = f * f;
+                Vector256<double> f4 = f2 * f2;
+                Vector256<double> f6 = f4 * f2;
+                Vector256<double> f10 = f6 * f4;
+                Vector256<double> f14 = f10 * f4;
+                Vector256<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1));
+                Vector256<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C4), f2, Vector256.Create(C3));
+                Vector256<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C6), f2, Vector256.Create(C5));
+                Vector256<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C8), f2, Vector256.Create(C7));
+                Vector256<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
+                Vector256<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
+                Vector256<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
+
+                return (poly.AsUInt64() ^ odd).AsDouble();
+            }
+
+            public static Vector512<double> Invoke(Vector512<double> x)
+            {
+                Vector512<double> uxMasked = Vector512.Abs(x);
+                if (Vector512.GreaterThanAny(uxMasked.AsUInt64(), Vector512.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<CosOperatorDouble>(x);
+                }
+
+                // dn = int(x / pi + 1/2) - 1/2
+                Vector512<double> almHuge = Vector512.Create(AlmHuge);
+                Vector512<double> half = Vector512.Create(0.5);
+                Vector512<double> dn = (uxMasked * Vector512.Create(1 / double.Pi)) + half + almHuge;
+                Vector512<ulong> odd = dn.AsUInt64() << 63;
+                dn = dn - almHuge - half;
+
+                // f = x - (n*pi)
+                Vector512<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-double.Pi), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(Pi_Tail2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(Pi_Tail3), f);
+
+                // POLY_EVAL_ODD_17
+                Vector512<double> f2 = f * f;
+                Vector512<double> f4 = f2 * f2;
+                Vector512<double> f6 = f4 * f2;
+                Vector512<double> f10 = f6 * f4;
+                Vector512<double> f14 = f10 * f4;
+                Vector512<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
+                Vector512<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C4), f2, Vector512.Create(C3));
+                Vector512<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C6), f2, Vector512.Create(C5));
+                Vector512<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C8), f2, Vector512.Create(C7));
+                Vector512<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
+                Vector512<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
+                Vector512<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
+
+                return (poly.AsUInt64() ^ odd).AsDouble();
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs
new file mode 100644
index 000000000000..b286a18d0f94
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs
@@ -0,0 +1,107 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise cosine of the value in the specified tensor that has been multiplied by Pi.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.CosPi(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void CosPi<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, CosPiOperator<T>>(x, destination);
+
+        /// <summary>T.CosPi(x)</summary>
+        private readonly struct CosPiOperator<T> : IUnaryOperator<T, T>
+            where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.CosPi(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                Vector128<T> xpi = x * Vector128.Create(T.Pi);
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(CosOperatorSingle.SignMask), Vector128.Create(CosOperatorSingle.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<CosPiOperator<float>>(x.AsSingle()).As<float, T>();
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector128.GreaterThanAny(xpi.AsUInt64() & Vector128.Create(CosOperatorDouble.SignMask), Vector128.Create(CosOperatorDouble.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
+                    }
+                }
+
+                return CosOperator<T>.Invoke(xpi);
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                Vector256<T> xpi = x * Vector256.Create(T.Pi);
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(CosOperatorSingle.SignMask), Vector256.Create(CosOperatorSingle.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<CosPiOperator<float>>(x.AsSingle()).As<float, T>();
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector256.GreaterThanAny(xpi.AsUInt64() & Vector256.Create(CosOperatorDouble.SignMask), Vector256.Create(CosOperatorDouble.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
+                    }
+                }
+
+                return CosOperator<T>.Invoke(xpi);
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                Vector512<T> xpi = x * Vector512.Create(T.Pi);
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(CosOperatorSingle.SignMask), Vector512.Create(CosOperatorSingle.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<CosPiOperator<float>>(x.AsSingle()).As<float, T>();
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector512.GreaterThanAny(xpi.AsUInt64() & Vector512.Create(CosOperatorDouble.SignMask), Vector512.Create(CosOperatorDouble.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
+                    }
+                }
+
+                return CosOperator<T>.Invoke(xpi);
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cosh.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cosh.cs
new file mode 100644
index 000000000000..2047ee6a26f5
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cosh.cs
@@ -0,0 +1,137 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise hyperbolic cosine of each radian angle in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Cosh(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/> or <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
+        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is also NaN.
+        /// </para>
+        /// <para>
+        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Cosh<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IHyperbolicFunctions<T> =>
+            InvokeSpanIntoSpan<T, CoshOperator<T>>(x, destination);
+
+        /// <summary>T.Cosh(x)</summary>
+        internal readonly struct CoshOperator<T> : IUnaryOperator<T, T>
+            where T : IHyperbolicFunctions<T>
+        {
+            // This code is based on `vrs4_coshf` from amd/aocl-libm-ose
+            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Spec:
+            //   coshf(|x| > 89.415985107421875) = Infinity
+            //   coshf(Infinity)  = infinity
+            //   coshf(-Infinity) = infinity
+            //
+            // cosh(x) = (exp(x) + exp(-x))/2
+            // cosh(-x) = +cosh(x)
+            //
+            // checks for special cases
+            // if ( asint(x) > infinity) return x with overflow exception and
+            // return x.
+            // if x is NaN then raise invalid FP operation exception and return x.
+            //
+            // coshf = v/2 * exp(x - log(v)) where v = 0x1.0000e8p-1
+
+            private const float Single_LOGV = 0.693161f;
+            private const float Single_HALFV = 1.0000138f;
+            private const float Single_INVV2 = 0.24999309f;
+
+            private const double Double_LOGV = 0.6931471805599453;
+            private const double Double_HALFV = 1.0;
+            private const double Double_INVV2 = 0.25;
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Cosh(x);
+
+            public static Vector128<T> Invoke(Vector128<T> t)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    Vector128<float> x = t.AsSingle();
+
+                    Vector128<float> y = Vector128.Abs(x);
+                    Vector128<float> z = ExpOperator<float>.Invoke(y - Vector128.Create((float)Single_LOGV));
+                    return (Vector128.Create((float)Single_HALFV) * (z + (Vector128.Create((float)Single_INVV2) / z))).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    Vector128<double> x = t.AsDouble();
+
+                    Vector128<double> y = Vector128.Abs(x);
+                    Vector128<double> z = ExpOperator<double>.Invoke(y - Vector128.Create(Double_LOGV));
+                    return (Vector128.Create(Double_HALFV) * (z + (Vector128.Create(Double_INVV2) / z))).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> t)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    Vector256<float> x = t.AsSingle();
+
+                    Vector256<float> y = Vector256.Abs(x);
+                    Vector256<float> z = ExpOperator<float>.Invoke(y - Vector256.Create((float)Single_LOGV));
+                    return (Vector256.Create((float)Single_HALFV) * (z + (Vector256.Create((float)Single_INVV2) / z))).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    Vector256<double> x = t.AsDouble();
+
+                    Vector256<double> y = Vector256.Abs(x);
+                    Vector256<double> z = ExpOperator<double>.Invoke(y - Vector256.Create(Double_LOGV));
+                    return (Vector256.Create(Double_HALFV) * (z + (Vector256.Create(Double_INVV2) / z))).As<double, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> t)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    Vector512<float> x = t.AsSingle();
+
+                    Vector512<float> y = Vector512.Abs(x);
+                    Vector512<float> z = ExpOperator<float>.Invoke(y - Vector512.Create((float)Single_LOGV));
+                    return (Vector512.Create((float)Single_HALFV) * (z + (Vector512.Create((float)Single_INVV2) / z))).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    Vector512<double> x = t.AsDouble();
+
+                    Vector512<double> y = Vector512.Abs(x);
+                    Vector512<double> z = ExpOperator<double>.Invoke(y - Vector512.Create(Double_LOGV));
+                    return (Vector512.Create(Double_HALFV) * (z + (Vector512.Create(Double_INVV2) / z))).As<double, T>();
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosineSimilarity.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosineSimilarity.cs
new file mode 100644
index 000000000000..cacbcab02a42
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosineSimilarity.cs
@@ -0,0 +1,206 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public partial class TensorPrimitives
+    {
+        /// <summary>Computes the cosine similarity between the two specified non-empty, equal-length tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <returns>The cosine similarity of the two tensors.</returns>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x" /> and <paramref name="y" /> must not be empty.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c>TensorPrimitives.Dot(x, y) / (<typeparamref name="T"/>.Sqrt(TensorPrimitives.SumOfSquares(x)) * <typeparamref name="T"/>.Sqrt(TensorPrimitives.SumOfSquares(y)).</c>
+        /// </para>
+        /// <para>
+        /// If any element in either input tensor is equal to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>, <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>, or <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
+        /// NaN is returned.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T CosineSimilarity<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y)
+            where T : IRootFunctions<T> =>
+            CosineSimilarityCore(x, y);
+
+        /// <summary>Computes the cosine similarity between the two specified non-empty, equal-length tensors of single-precision floating-point numbers.</summary>
+        /// <remarks>Assumes arguments have already been validated to be non-empty and equal length.</remarks>
+        private static T CosineSimilarityCore<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y) where T : IRootFunctions<T>
+        {
+            if (x.IsEmpty)
+            {
+                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
+            }
+
+            if (x.Length != y.Length)
+            {
+                ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
+            }
+
+            // Compute the same as:
+            // TensorPrimitives.Dot(x, y) / (Math.Sqrt(TensorPrimitives.SumOfSquares(x)) * Math.Sqrt(TensorPrimitives.SumOfSquares(y)))
+            // but only looping over each span once.
+
+            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && x.Length >= Vector512<T>.Count)
+            {
+                ref T xRef = ref MemoryMarshal.GetReference(x);
+                ref T yRef = ref MemoryMarshal.GetReference(y);
+
+                Vector512<T> dotProductVector = Vector512<T>.Zero;
+                Vector512<T> xSumOfSquaresVector = Vector512<T>.Zero;
+                Vector512<T> ySumOfSquaresVector = Vector512<T>.Zero;
+
+                // Process vectors, summing their dot products and squares, as long as there's a vector's worth remaining.
+                int oneVectorFromEnd = x.Length - Vector512<T>.Count;
+                int i = 0;
+                do
+                {
+                    Vector512<T> xVec = Vector512.LoadUnsafe(ref xRef, (uint)i);
+                    Vector512<T> yVec = Vector512.LoadUnsafe(ref yRef, (uint)i);
+
+                    dotProductVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, yVec, dotProductVector);
+                    xSumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, xVec, xSumOfSquaresVector);
+                    ySumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(yVec, yVec, ySumOfSquaresVector);
+
+                    i += Vector512<T>.Count;
+                }
+                while (i <= oneVectorFromEnd);
+
+                // Process the last vector in the span, masking off elements already processed.
+                if (i != x.Length)
+                {
+                    Vector512<T> xVec = Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512<T>.Count));
+                    Vector512<T> yVec = Vector512.LoadUnsafe(ref yRef, (uint)(x.Length - Vector512<T>.Count));
+
+                    Vector512<T> remainderMask = CreateRemainderMaskVector512<T>(x.Length - i);
+                    xVec &= remainderMask;
+                    yVec &= remainderMask;
+
+                    dotProductVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, yVec, dotProductVector);
+                    xSumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, xVec, xSumOfSquaresVector);
+                    ySumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(yVec, yVec, ySumOfSquaresVector);
+                }
+
+                // Sum(X * Y) / (|X| * |Y|)
+                return
+                    Vector512.Sum(dotProductVector) /
+                    (T.Sqrt(Vector512.Sum(xSumOfSquaresVector)) * T.Sqrt(Vector512.Sum(ySumOfSquaresVector)));
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && x.Length >= Vector256<T>.Count)
+            {
+                ref T xRef = ref MemoryMarshal.GetReference(x);
+                ref T yRef = ref MemoryMarshal.GetReference(y);
+
+                Vector256<T> dotProductVector = Vector256<T>.Zero;
+                Vector256<T> xSumOfSquaresVector = Vector256<T>.Zero;
+                Vector256<T> ySumOfSquaresVector = Vector256<T>.Zero;
+
+                // Process vectors, summing their dot products and squares, as long as there's a vector's worth remaining.
+                int oneVectorFromEnd = x.Length - Vector256<T>.Count;
+                int i = 0;
+                do
+                {
+                    Vector256<T> xVec = Vector256.LoadUnsafe(ref xRef, (uint)i);
+                    Vector256<T> yVec = Vector256.LoadUnsafe(ref yRef, (uint)i);
+
+                    dotProductVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, yVec, dotProductVector);
+                    xSumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, xVec, xSumOfSquaresVector);
+                    ySumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(yVec, yVec, ySumOfSquaresVector);
+
+                    i += Vector256<T>.Count;
+                }
+                while (i <= oneVectorFromEnd);
+
+                // Process the last vector in the span, masking off elements already processed.
+                if (i != x.Length)
+                {
+                    Vector256<T> xVec = Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256<T>.Count));
+                    Vector256<T> yVec = Vector256.LoadUnsafe(ref yRef, (uint)(x.Length - Vector256<T>.Count));
+
+                    Vector256<T> remainderMask = CreateRemainderMaskVector256<T>(x.Length - i);
+                    xVec &= remainderMask;
+                    yVec &= remainderMask;
+
+                    dotProductVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, yVec, dotProductVector);
+                    xSumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, xVec, xSumOfSquaresVector);
+                    ySumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(yVec, yVec, ySumOfSquaresVector);
+                }
+
+                // Sum(X * Y) / (|X| * |Y|)
+                return
+                    Vector256.Sum(dotProductVector) /
+                    (T.Sqrt(Vector256.Sum(xSumOfSquaresVector)) * T.Sqrt(Vector256.Sum(ySumOfSquaresVector)));
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && x.Length >= Vector128<T>.Count)
+            {
+                ref T xRef = ref MemoryMarshal.GetReference(x);
+                ref T yRef = ref MemoryMarshal.GetReference(y);
+
+                Vector128<T> dotProductVector = Vector128<T>.Zero;
+                Vector128<T> xSumOfSquaresVector = Vector128<T>.Zero;
+                Vector128<T> ySumOfSquaresVector = Vector128<T>.Zero;
+
+                // Process vectors, summing their dot products and squares, as long as there's a vector's worth remaining.
+                int oneVectorFromEnd = x.Length - Vector128<T>.Count;
+                int i = 0;
+                do
+                {
+                    Vector128<T> xVec = Vector128.LoadUnsafe(ref xRef, (uint)i);
+                    Vector128<T> yVec = Vector128.LoadUnsafe(ref yRef, (uint)i);
+
+                    dotProductVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, yVec, dotProductVector);
+                    xSumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, xVec, xSumOfSquaresVector);
+                    ySumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(yVec, yVec, ySumOfSquaresVector);
+
+                    i += Vector128<T>.Count;
+                }
+                while (i <= oneVectorFromEnd);
+
+                // Process the last vector in the span, masking off elements already processed.
+                if (i != x.Length)
+                {
+                    Vector128<T> xVec = Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128<T>.Count));
+                    Vector128<T> yVec = Vector128.LoadUnsafe(ref yRef, (uint)(x.Length - Vector128<T>.Count));
+
+                    Vector128<T> remainderMask = CreateRemainderMaskVector128<T>(x.Length - i);
+                    xVec &= remainderMask;
+                    yVec &= remainderMask;
+
+                    dotProductVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, yVec, dotProductVector);
+                    xSumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, xVec, xSumOfSquaresVector);
+                    ySumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(yVec, yVec, ySumOfSquaresVector);
+                }
+
+                // Sum(X * Y) / (|X| * |Y|)
+                return
+                    Vector128.Sum(dotProductVector) /
+                    (T.Sqrt(Vector128.Sum(xSumOfSquaresVector)) * T.Sqrt(Vector128.Sum(ySumOfSquaresVector)));
+            }
+
+            // Vectorization isn't supported or there are too few elements to vectorize.
+            // Use a scalar implementation.
+            T dotProduct = T.Zero, xSumOfSquares = T.Zero, ySumOfSquares = T.Zero;
+            for (int i = 0; i < x.Length; i++)
+            {
+                dotProduct = MultiplyAddEstimateOperator<T>.Invoke(x[i], y[i], dotProduct);
+                xSumOfSquares = MultiplyAddEstimateOperator<T>.Invoke(x[i], x[i], xSumOfSquares);
+                ySumOfSquares = MultiplyAddEstimateOperator<T>.Invoke(y[i], y[i], ySumOfSquares);
+            }
+
+            // Sum(X * Y) / (|X| * |Y|)
+            return
+                dotProduct /
+                (T.Sqrt(xSumOfSquares) * T.Sqrt(ySumOfSquares));
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.DegreesToRadians.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.DegreesToRadians.cs
new file mode 100644
index 000000000000..68d7781f21cd
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.DegreesToRadians.cs
@@ -0,0 +1,34 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise conversion of each number of degrees in the specified tensor to radiansx.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.DegreesToRadians(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void DegreesToRadians<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, DegreesToRadiansOperator<T>>(x, destination);
+
+        /// <summary>T.DegreesToRadians(x)</summary>
+        private readonly struct DegreesToRadiansOperator<T> : IUnaryOperator<T, T> where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x) => T.DegreesToRadians(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => (x * T.Pi) / T.CreateChecked(180);
+            public static Vector256<T> Invoke(Vector256<T> x) => (x * T.Pi) / T.CreateChecked(180);
+            public static Vector512<T> Invoke(Vector512<T> x) => (x * T.Pi) / T.CreateChecked(180);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Distance.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Distance.cs
new file mode 100644
index 000000000000..bf3481a9c7ce
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Distance.cs
@@ -0,0 +1,75 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the distance between two points, specified as non-empty, equal-length tensors of numbers, in Euclidean space.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <returns>The Euclidean distance.</returns>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x" /> and <paramref name="y" /> must not be empty.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes the equivalent of:
+        /// <c>
+        ///     Span&lt;T&gt; difference = ...;
+        ///     TensorPrimitives.Subtract(x, y, difference);
+        ///     T result = <typeparamref name="T"/>.Sqrt(TensorPrimitives.SumOfSquares(difference));
+        /// </c>
+        /// but without requiring additional temporary storage for the intermediate differences.
+        /// </para>
+        /// <para>
+        /// If any element in either input tensor is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, NaN is returned.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T Distance<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y)
+            where T : IRootFunctions<T>
+        {
+            if (x.IsEmpty)
+            {
+                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
+            }
+
+            return T.Sqrt(Aggregate<T, SubtractSquaredOperator<T>, AddOperator<T>>(x, y));
+        }
+
+        /// <summary>(x - y) * (x - y)</summary>
+        internal readonly struct SubtractSquaredOperator<T> : IBinaryOperator<T> where T : ISubtractionOperators<T, T, T>, IMultiplyOperators<T, T, T>
+        {
+            public static bool Vectorizable => true;
+
+            public static T Invoke(T x, T y)
+            {
+                T tmp = x - y;
+                return tmp * tmp;
+            }
+
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
+            {
+                Vector128<T> tmp = x - y;
+                return tmp * tmp;
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
+            {
+                Vector256<T> tmp = x - y;
+                return tmp * tmp;
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
+            {
+                Vector512<T> tmp = x - y;
+                return tmp * tmp;
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Divide.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Divide.cs
new file mode 100644
index 000000000000..64a238face22
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Divide.cs
@@ -0,0 +1,79 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise division of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and an element in <paramref name="y"/> is equal to zero.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] / <paramref name="y" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Divide<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : IDivisionOperators<T, T, T> =>
+            InvokeSpanSpanIntoSpan<T, DivideOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise division of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and <paramref name="y"/> is equal to zero.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] / <paramref name="y" /></c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Divide<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : IDivisionOperators<T, T, T> =>
+            InvokeSpanScalarIntoSpan<T, DivideOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise division of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a scalar.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and an element in <paramref name="y"/> is equal to zero.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" /> / <paramref name="y" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Divide<T>(T x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : IDivisionOperators<T, T, T> =>
+            InvokeScalarSpanIntoSpan<T, DivideOperator<T>>(x, y, destination);
+
+        /// <summary>x / y</summary>
+        internal readonly struct DivideOperator<T> : IBinaryOperator<T> where T : IDivisionOperators<T, T, T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x, T y) => x / y;
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x / y;
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x / y;
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x / y;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Dot.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Dot.cs
new file mode 100644
index 000000000000..e66ec314ddbb
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Dot.cs
@@ -0,0 +1,35 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the dot product of two tensors containing numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <returns>The dot product.</returns>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes the equivalent of:
+        /// <c>
+        ///     Span&lt;T&gt; products = ...;
+        ///     TensorPrimitives.Multiply(x, y, products);
+        ///     T result = TensorPrimitives.Sum(products);
+        /// </c>
+        /// but without requiring additional temporary storage for the intermediate products. It corresponds to the <c>dot</c> method defined by <c>BLAS1</c>.
+        /// </para>
+        /// <para>
+        /// If any of the input elements is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting value is also NaN.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T Dot<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y)
+            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T>, IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T> =>
+            Aggregate<T, MultiplyOperator<T>, AddOperator<T>>(x, y);
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp.cs
new file mode 100644
index 000000000000..1147ec5bea5f
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp.cs
@@ -0,0 +1,619 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise result of raising <c>e</c> to the number powers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Exp(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// If a value equals <see cref="IFloatingPointIeee754{TSelf}.NaN"/> or <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>, the result stored into the corresponding destination location is set to NaN.
+        /// If a value equals <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>, the result stored into the corresponding destination location is set to 0.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Exp<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IExponentialFunctions<T> =>
+            InvokeSpanIntoSpan<T, ExpOperator<T>>(x, destination);
+
+        /// <summary>T.Exp(x)</summary>
+        internal readonly struct ExpOperator<T> : IUnaryOperator<T, T>
+            where T : IExponentialFunctions<T>
+        {
+            public static bool Vectorizable => (typeof(T) == typeof(double))
+                                            || (typeof(T) == typeof(float));
+
+            public static T Invoke(T x) => T.Exp(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector128.Exp(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector128.Exp(x.AsSingle()).As<float, T>();
+                }
+#else
+                if (typeof(T) == typeof(double))
+                {
+                    return ExpOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return ExpOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+#endif
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector256.Exp(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector256.Exp(x.AsSingle()).As<float, T>();
+                }
+#else
+                if (typeof(T) == typeof(double))
+                {
+                    return ExpOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return ExpOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+#endif
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector512.Exp(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector512.Exp(x.AsSingle()).As<float, T>();
+                }
+#else
+                if (typeof(T) == typeof(double))
+                {
+                    return ExpOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return ExpOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+#endif
+            }
+        }
+
+#if !NET9_0_OR_GREATER
+        /// <summary>double.Exp(x)</summary>
+        private readonly struct ExpOperatorDouble : IUnaryOperator<double, double>
+        {
+            // This code is based on `vrd2_exp` from amd/aocl-libm-ose
+            // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation Notes
+            // ----------------------
+            // 1. Argument Reduction:
+            //      e^x = 2^(x/ln2) = 2^(x*(64/ln(2))/64)     --- (1)
+            //
+            //      Choose 'n' and 'f', such that
+            //      x * 64/ln2 = n + f                        --- (2) | n is integer
+            //                            | |f| <= 0.5
+            //     Choose 'm' and 'j' such that,
+            //      n = (64 * m) + j                          --- (3)
+            //
+            //     From (1), (2) and (3),
+            //      e^x = 2^((64*m + j + f)/64)
+            //          = (2^m) * (2^(j/64)) * 2^(f/64)
+            //          = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64))
+            //
+            // 2. Table Lookup
+            //      Values of (2^(j/64)) are precomputed, j = 0, 1, 2, 3 ... 63
+            //
+            // 3. Polynomial Evaluation
+            //   From (2),
+            //     f = x*(64/ln(2)) - n
+            //   Let,
+            //     r  = f*(ln(2)/64) = x - n*(ln(2)/64)
+            //
+            // 4. Reconstruction
+            //      Thus,
+            //        e^x = (2^m) * (2^(j/64)) * e^r
+
+            private const ulong V_ARG_MAX = 0x40862000_00000000;
+            private const ulong V_DP64_BIAS = 1023;
+
+            private const double V_EXPF_MIN = -709.782712893384;
+            private const double V_EXPF_MAX = +709.782712893384;
+
+            private const double V_EXPF_HUGE = 6755399441055744;
+            private const double V_TBL_LN2 = 1.4426950408889634;
+
+            private const double V_LN2_HEAD = +0.693359375;
+            private const double V_LN2_TAIL = -0.00021219444005469057;
+
+            private const double C3 = 0.5000000000000018;
+            private const double C4 = 0.1666666666666617;
+            private const double C5 = 0.04166666666649277;
+            private const double C6 = 0.008333333333559272;
+            private const double C7 = 0.001388888895122404;
+            private const double C8 = 0.00019841269432677495;
+            private const double C9 = 2.4801486521374483E-05;
+            private const double C10 = 2.7557622532543023E-06;
+            private const double C11 = 2.7632293298250954E-07;
+            private const double C12 = 2.499430431958571E-08;
+
+            public static bool Vectorizable => true;
+
+            public static double Invoke(double x) => double.Exp(x);
+
+            public static Vector128<double> Invoke(Vector128<double> x)
+            {
+                // x * (64.0 / ln(2))
+                Vector128<double> z = x * Vector128.Create(V_TBL_LN2);
+
+                Vector128<double> dn = z + Vector128.Create(V_EXPF_HUGE);
+
+                // n = (int)z
+                Vector128<ulong> n = dn.AsUInt64();
+
+                // dn = (double)n
+                dn -= Vector128.Create(V_EXPF_HUGE);
+
+                // r = x - (dn * (ln(2) / 64))
+                // where ln(2) / 64 is split into Head and Tail values
+                Vector128<double> r = x - (dn * Vector128.Create(V_LN2_HEAD)) - (dn * Vector128.Create(V_LN2_TAIL));
+
+                Vector128<double> r2 = r * r;
+                Vector128<double> r4 = r2 * r2;
+                Vector128<double> r8 = r4 * r4;
+
+                // Compute polynomial
+                Vector128<double> poly = ((Vector128.Create(C12) * r + Vector128.Create(C11)) * r2 +
+                                           Vector128.Create(C10) * r + Vector128.Create(C9)) * r8 +
+                                         ((Vector128.Create(C8) * r + Vector128.Create(C7)) * r2 +
+                                          (Vector128.Create(C6) * r + Vector128.Create(C5))) * r4 +
+                                         ((Vector128.Create(C4) * r + Vector128.Create(C3)) * r2 + (r + Vector128<double>.One));
+
+                // m = (n - j) / 64
+                // result = polynomial * 2^m
+                Vector128<double> ret = poly * ((n + Vector128.Create(V_DP64_BIAS)) << 52).AsDouble();
+
+                // Check if -709 < vx < 709
+                if (Vector128.GreaterThanAny(Vector128.Abs(x).AsUInt64(), Vector128.Create(V_ARG_MAX)))
+                {
+                    // (x > V_EXPF_MAX) ? double.PositiveInfinity : x
+                    Vector128<double> infinityMask = Vector128.GreaterThan(x, Vector128.Create(V_EXPF_MAX));
+
+                    ret = Vector128.ConditionalSelect(
+                        infinityMask,
+                        Vector128.Create(double.PositiveInfinity),
+                        ret
+                    );
+
+                    // (x < V_EXPF_MIN) ? 0 : x
+                    ret = Vector128.AndNot(ret, Vector128.LessThan(x, Vector128.Create(V_EXPF_MIN)));
+                }
+
+                return ret;
+            }
+
+            public static Vector256<double> Invoke(Vector256<double> x)
+            {
+                // x * (64.0 / ln(2))
+                Vector256<double> z = x * Vector256.Create(V_TBL_LN2);
+
+                Vector256<double> dn = z + Vector256.Create(V_EXPF_HUGE);
+
+                // n = (int)z
+                Vector256<ulong> n = dn.AsUInt64();
+
+                // dn = (double)n
+                dn -= Vector256.Create(V_EXPF_HUGE);
+
+                // r = x - (dn * (ln(2) / 64))
+                // where ln(2) / 64 is split into Head and Tail values
+                Vector256<double> r = x - (dn * Vector256.Create(V_LN2_HEAD)) - (dn * Vector256.Create(V_LN2_TAIL));
+
+                Vector256<double> r2 = r * r;
+                Vector256<double> r4 = r2 * r2;
+                Vector256<double> r8 = r4 * r4;
+
+                // Compute polynomial
+                Vector256<double> poly = ((Vector256.Create(C12) * r + Vector256.Create(C11)) * r2 +
+                                           Vector256.Create(C10) * r + Vector256.Create(C9)) * r8 +
+                                         ((Vector256.Create(C8) * r + Vector256.Create(C7)) * r2 +
+                                          (Vector256.Create(C6) * r + Vector256.Create(C5))) * r4 +
+                                         ((Vector256.Create(C4) * r + Vector256.Create(C3)) * r2 + (r + Vector256<double>.One));
+
+                // m = (n - j) / 64
+                // result = polynomial * 2^m
+                Vector256<double> ret = poly * ((n + Vector256.Create(V_DP64_BIAS)) << 52).AsDouble();
+
+                // Check if -709 < vx < 709
+                if (Vector256.GreaterThanAny(Vector256.Abs(x).AsUInt64(), Vector256.Create(V_ARG_MAX)))
+                {
+                    // (x > V_EXPF_MAX) ? double.PositiveInfinity : x
+                    Vector256<double> infinityMask = Vector256.GreaterThan(x, Vector256.Create(V_EXPF_MAX));
+
+                    ret = Vector256.ConditionalSelect(
+                        infinityMask,
+                        Vector256.Create(double.PositiveInfinity),
+                        ret
+                    );
+
+                    // (x < V_EXPF_MIN) ? 0 : x
+                    ret = Vector256.AndNot(ret, Vector256.LessThan(x, Vector256.Create(V_EXPF_MIN)));
+                }
+
+                return ret;
+            }
+
+            public static Vector512<double> Invoke(Vector512<double> x)
+            {
+                // x * (64.0 / ln(2))
+                Vector512<double> z = x * Vector512.Create(V_TBL_LN2);
+
+                Vector512<double> dn = z + Vector512.Create(V_EXPF_HUGE);
+
+                // n = (int)z
+                Vector512<ulong> n = dn.AsUInt64();
+
+                // dn = (double)n
+                dn -= Vector512.Create(V_EXPF_HUGE);
+
+                // r = x - (dn * (ln(2) / 64))
+                // where ln(2) / 64 is split into Head and Tail values
+                Vector512<double> r = x - (dn * Vector512.Create(V_LN2_HEAD)) - (dn * Vector512.Create(V_LN2_TAIL));
+
+                Vector512<double> r2 = r * r;
+                Vector512<double> r4 = r2 * r2;
+                Vector512<double> r8 = r4 * r4;
+
+                // Compute polynomial
+                Vector512<double> poly = ((Vector512.Create(C12) * r + Vector512.Create(C11)) * r2 +
+                                           Vector512.Create(C10) * r + Vector512.Create(C9)) * r8 +
+                                         ((Vector512.Create(C8) * r + Vector512.Create(C7)) * r2 +
+                                          (Vector512.Create(C6) * r + Vector512.Create(C5))) * r4 +
+                                         ((Vector512.Create(C4) * r + Vector512.Create(C3)) * r2 + (r + Vector512<double>.One));
+
+                // m = (n - j) / 64
+                // result = polynomial * 2^m
+                Vector512<double> ret = poly * ((n + Vector512.Create(V_DP64_BIAS)) << 52).AsDouble();
+
+                // Check if -709 < vx < 709
+                if (Vector512.GreaterThanAny(Vector512.Abs(x).AsUInt64(), Vector512.Create(V_ARG_MAX)))
+                {
+                    // (x > V_EXPF_MAX) ? double.PositiveInfinity : x
+                    Vector512<double> infinityMask = Vector512.GreaterThan(x, Vector512.Create(V_EXPF_MAX));
+
+                    ret = Vector512.ConditionalSelect(
+                        infinityMask,
+                        Vector512.Create(double.PositiveInfinity),
+                        ret
+                    );
+
+                    // (x < V_EXPF_MIN) ? 0 : x
+                    ret = Vector512.AndNot(ret, Vector512.LessThan(x, Vector512.Create(V_EXPF_MIN)));
+                }
+
+                return ret;
+            }
+        }
+
+        /// <summary>float.Exp(x)</summary>
+        private readonly struct ExpOperatorSingle : IUnaryOperator<float, float>
+        {
+            // This code is based on `vrs4_expf` from amd/aocl-libm-ose
+            // Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation Notes:
+            // 1. Argument Reduction:
+            //      e^x = 2^(x/ln2)                          --- (1)
+            //
+            //      Let x/ln(2) = z                          --- (2)
+            //
+            //      Let z = n + r , where n is an integer    --- (3)
+            //                      |r| <= 1/2
+            //
+            //     From (1), (2) and (3),
+            //      e^x = 2^z
+            //          = 2^(N+r)
+            //          = (2^N)*(2^r)                        --- (4)
+            //
+            // 2. Polynomial Evaluation
+            //   From (4),
+            //     r   = z - N
+            //     2^r = C1 + C2*r + C3*r^2 + C4*r^3 + C5 *r^4 + C6*r^5
+            //
+            // 4. Reconstruction
+            //      Thus,
+            //        e^x = (2^N) * (2^r)
+
+            private const uint V_ARG_MAX = 0x42AE0000;
+
+            private const float V_EXPF_MIN = -103.97208f;
+            private const float V_EXPF_MAX = +88.72284f;
+
+            private const double V_EXPF_HUGE = 6755399441055744;
+            private const double V_TBL_LN2 = 1.4426950408889634;
+
+            private const double C1 = 1.0000000754895704;
+            private const double C2 = 0.6931472254087585;
+            private const double C3 = 0.2402210737432219;
+            private const double C4 = 0.05550297297702539;
+            private const double C5 = 0.009676036358193323;
+            private const double C6 = 0.001341000536524434;
+
+            public static bool Vectorizable => true;
+
+            public static float Invoke(float x) => float.Exp(x);
+
+            public static Vector128<float> Invoke(Vector128<float> x)
+            {
+                // Convert x to double precision
+                (Vector128<double> xl, Vector128<double> xu) = Vector128.Widen(x);
+
+                // x * (64.0 / ln(2))
+                Vector128<double> v_tbl_ln2 = Vector128.Create(V_TBL_LN2);
+
+                Vector128<double> zl = xl * v_tbl_ln2;
+                Vector128<double> zu = xu * v_tbl_ln2;
+
+                Vector128<double> v_expf_huge = Vector128.Create(V_EXPF_HUGE);
+
+                Vector128<double> dnl = zl + v_expf_huge;
+                Vector128<double> dnu = zu + v_expf_huge;
+
+                // n = (int)z
+                Vector128<ulong> nl = dnl.AsUInt64();
+                Vector128<ulong> nu = dnu.AsUInt64();
+
+                // dn = (double)n
+                dnl -= v_expf_huge;
+                dnu -= v_expf_huge;
+
+                // r = z - dn
+                Vector128<double> c1 = Vector128.Create(C1);
+                Vector128<double> c2 = Vector128.Create(C2);
+                Vector128<double> c3 = Vector128.Create(C3);
+                Vector128<double> c4 = Vector128.Create(C4);
+                Vector128<double> c5 = Vector128.Create(C5);
+                Vector128<double> c6 = Vector128.Create(C6);
+
+                Vector128<double> rl = zl - dnl;
+
+                Vector128<double> rl2 = rl * rl;
+                Vector128<double> rl4 = rl2 * rl2;
+
+                Vector128<double> polyl = (c4 * rl + c3) * rl2
+                                       + ((c6 * rl + c5) * rl4
+                                        + (c2 * rl + c1));
+
+
+                Vector128<double> ru = zu - dnu;
+
+                Vector128<double> ru2 = ru * ru;
+                Vector128<double> ru4 = ru2 * ru2;
+
+                Vector128<double> polyu = (c4 * ru + c3) * ru2
+                                       + ((c6 * ru + c5) * ru4
+                                        + (c2 * ru + c1));
+
+                // result = (float)(poly + (n << 52))
+                Vector128<float> ret = Vector128.Narrow(
+                    (polyl.AsUInt64() + (nl << 52)).AsDouble(),
+                    (polyu.AsUInt64() + (nu << 52)).AsDouble()
+                );
+
+                // Check if -103 < |x| < 88
+                if (Vector128.GreaterThanAny(Vector128.Abs(x).AsUInt32(), Vector128.Create(V_ARG_MAX)))
+                {
+                    // (x > V_EXPF_MAX) ? float.PositiveInfinity : x
+                    Vector128<float> infinityMask = Vector128.GreaterThan(x, Vector128.Create(V_EXPF_MAX));
+
+                    ret = Vector128.ConditionalSelect(
+                        infinityMask,
+                        Vector128.Create(float.PositiveInfinity),
+                        ret
+                    );
+
+                    // (x < V_EXPF_MIN) ? 0 : x
+                    ret = Vector128.AndNot(ret, Vector128.LessThan(x, Vector128.Create(V_EXPF_MIN)));
+                }
+
+                return ret;
+            }
+
+            public static Vector256<float> Invoke(Vector256<float> x)
+            {
+                // Convert x to double precision
+                (Vector256<double> xl, Vector256<double> xu) = Vector256.Widen(x);
+
+                // x * (64.0 / ln(2))
+                Vector256<double> v_tbl_ln2 = Vector256.Create(V_TBL_LN2);
+
+                Vector256<double> zl = xl * v_tbl_ln2;
+                Vector256<double> zu = xu * v_tbl_ln2;
+
+                Vector256<double> v_expf_huge = Vector256.Create(V_EXPF_HUGE);
+
+                Vector256<double> dnl = zl + v_expf_huge;
+                Vector256<double> dnu = zu + v_expf_huge;
+
+                // n = (int)z
+                Vector256<ulong> nl = dnl.AsUInt64();
+                Vector256<ulong> nu = dnu.AsUInt64();
+
+                // dn = (double)n
+                dnl -= v_expf_huge;
+                dnu -= v_expf_huge;
+
+                // r = z - dn
+                Vector256<double> c1 = Vector256.Create(C1);
+                Vector256<double> c2 = Vector256.Create(C2);
+                Vector256<double> c3 = Vector256.Create(C3);
+                Vector256<double> c4 = Vector256.Create(C4);
+                Vector256<double> c5 = Vector256.Create(C5);
+                Vector256<double> c6 = Vector256.Create(C6);
+
+                Vector256<double> rl = zl - dnl;
+
+                Vector256<double> rl2 = rl * rl;
+                Vector256<double> rl4 = rl2 * rl2;
+
+                Vector256<double> polyl = (c4 * rl + c3) * rl2
+                                       + ((c6 * rl + c5) * rl4
+                                        + (c2 * rl + c1));
+
+
+                Vector256<double> ru = zu - dnu;
+
+                Vector256<double> ru2 = ru * ru;
+                Vector256<double> ru4 = ru2 * ru2;
+
+                Vector256<double> polyu = (c4 * ru + c3) * ru2
+                                       + ((c6 * ru + c5) * ru4
+                                        + (c2 * ru + c1));
+
+                // result = (float)(poly + (n << 52))
+                Vector256<float> ret = Vector256.Narrow(
+                    (polyl.AsUInt64() + (nl << 52)).AsDouble(),
+                    (polyu.AsUInt64() + (nu << 52)).AsDouble()
+                );
+
+                // Check if -103 < |x| < 88
+                if (Vector256.GreaterThanAny(Vector256.Abs(x).AsUInt32(), Vector256.Create(V_ARG_MAX)))
+                {
+                    // (x > V_EXPF_MAX) ? float.PositiveInfinity : x
+                    Vector256<float> infinityMask = Vector256.GreaterThan(x, Vector256.Create(V_EXPF_MAX));
+
+                    ret = Vector256.ConditionalSelect(
+                        infinityMask,
+                        Vector256.Create(float.PositiveInfinity),
+                        ret
+                    );
+
+                    // (x < V_EXPF_MIN) ? 0 : x
+                    ret = Vector256.AndNot(ret, Vector256.LessThan(x, Vector256.Create(V_EXPF_MIN)));
+                }
+
+                return ret;
+            }
+
+            public static Vector512<float> Invoke(Vector512<float> x)
+            {
+                // Convert x to double precision
+                (Vector512<double> xl, Vector512<double> xu) = Vector512.Widen(x);
+
+                // x * (64.0 / ln(2))
+                Vector512<double> v_tbl_ln2 = Vector512.Create(V_TBL_LN2);
+
+                Vector512<double> zl = xl * v_tbl_ln2;
+                Vector512<double> zu = xu * v_tbl_ln2;
+
+                Vector512<double> v_expf_huge = Vector512.Create(V_EXPF_HUGE);
+
+                Vector512<double> dnl = zl + v_expf_huge;
+                Vector512<double> dnu = zu + v_expf_huge;
+
+                // n = (int)z
+                Vector512<ulong> nl = dnl.AsUInt64();
+                Vector512<ulong> nu = dnu.AsUInt64();
+
+                // dn = (double)n
+                dnl -= v_expf_huge;
+                dnu -= v_expf_huge;
+
+                // r = z - dn
+                Vector512<double> c1 = Vector512.Create(C1);
+                Vector512<double> c2 = Vector512.Create(C2);
+                Vector512<double> c3 = Vector512.Create(C3);
+                Vector512<double> c4 = Vector512.Create(C4);
+                Vector512<double> c5 = Vector512.Create(C5);
+                Vector512<double> c6 = Vector512.Create(C6);
+
+                Vector512<double> rl = zl - dnl;
+
+                Vector512<double> rl2 = rl * rl;
+                Vector512<double> rl4 = rl2 * rl2;
+
+                Vector512<double> polyl = (c4 * rl + c3) * rl2
+                                       + ((c6 * rl + c5) * rl4
+                                        + (c2 * rl + c1));
+
+
+                Vector512<double> ru = zu - dnu;
+
+                Vector512<double> ru2 = ru * ru;
+                Vector512<double> ru4 = ru2 * ru2;
+
+                Vector512<double> polyu = (c4 * ru + c3) * ru2
+                                       + ((c6 * ru + c5) * ru4
+                                        + (c2 * ru + c1));
+
+                // result = (float)(poly + (n << 52))
+                Vector512<float> ret = Vector512.Narrow(
+                    (polyl.AsUInt64() + (nl << 52)).AsDouble(),
+                    (polyu.AsUInt64() + (nu << 52)).AsDouble()
+                );
+
+                // Check if -103 < |x| < 88
+                if (Vector512.GreaterThanAny(Vector512.Abs(x).AsUInt32(), Vector512.Create(V_ARG_MAX)))
+                {
+                    // (x > V_EXPF_MAX) ? float.PositiveInfinity : x
+                    Vector512<float> infinityMask = Vector512.GreaterThan(x, Vector512.Create(V_EXPF_MAX));
+
+                    ret = Vector512.ConditionalSelect(
+                        infinityMask,
+                        Vector512.Create(float.PositiveInfinity),
+                        ret
+                    );
+
+                    // (x < V_EXPF_MIN) ? 0 : x
+                    ret = Vector512.AndNot(ret, Vector512.LessThan(x, Vector512.Create(V_EXPF_MIN)));
+                }
+
+                return ret;
+            }
+        }
+#endif
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp10.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp10.cs
new file mode 100644
index 000000000000..6e228d7932f6
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp10.cs
@@ -0,0 +1,42 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise result of raising 10 to the number powers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Exp10(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Exp10<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IExponentialFunctions<T> =>
+            InvokeSpanIntoSpan<T, Exp10Operator<T>>(x, destination);
+
+        /// <summary>T.Exp10(x)</summary>
+        private readonly struct Exp10Operator<T> : IUnaryOperator<T, T>
+            where T : IExponentialFunctions<T>
+        {
+            private const double NaturalLog10 = 2.302585092994046;
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Exp10(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => ExpOperator<T>.Invoke(x * Vector128.Create(T.CreateTruncating(NaturalLog10)));
+            public static Vector256<T> Invoke(Vector256<T> x) => ExpOperator<T>.Invoke(x * Vector256.Create(T.CreateTruncating(NaturalLog10)));
+            public static Vector512<T> Invoke(Vector512<T> x) => ExpOperator<T>.Invoke(x * Vector512.Create(T.CreateTruncating(NaturalLog10)));
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp10M1.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp10M1.cs
new file mode 100644
index 000000000000..5687a087af71
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp10M1.cs
@@ -0,0 +1,40 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise result of raising 10 to the number powers in the specified tensor, minus one.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Exp10M1(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Exp10M1<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IExponentialFunctions<T> =>
+            InvokeSpanIntoSpan<T, Exp10M1Operator<T>>(x, destination);
+
+        /// <summary>T.Exp10M1(x)</summary>
+        private readonly struct Exp10M1Operator<T> : IUnaryOperator<T, T>
+            where T : IExponentialFunctions<T>
+        {
+            public static bool Vectorizable => Exp2Operator<T>.Vectorizable;
+
+            public static T Invoke(T x) => T.Exp10M1(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => Exp10Operator<T>.Invoke(x) - Vector128<T>.One;
+            public static Vector256<T> Invoke(Vector256<T> x) => Exp10Operator<T>.Invoke(x) - Vector256<T>.One;
+            public static Vector512<T> Invoke(Vector512<T> x) => Exp10Operator<T>.Invoke(x) - Vector512<T>.One;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp2.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp2.cs
new file mode 100644
index 000000000000..a3aeacdefde9
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp2.cs
@@ -0,0 +1,42 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise result of raising 2 to the number powers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Exp2(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Exp2<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IExponentialFunctions<T> =>
+            InvokeSpanIntoSpan<T, Exp2Operator<T>>(x, destination);
+
+        /// <summary>T.Exp2(x)</summary>
+        private readonly struct Exp2Operator<T> : IUnaryOperator<T, T>
+            where T : IExponentialFunctions<T>
+        {
+            private const double NaturalLog2 = 0.6931471805599453;
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Exp2(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => ExpOperator<T>.Invoke(x * Vector128.Create(T.CreateTruncating(NaturalLog2)));
+            public static Vector256<T> Invoke(Vector256<T> x) => ExpOperator<T>.Invoke(x * Vector256.Create(T.CreateTruncating(NaturalLog2)));
+            public static Vector512<T> Invoke(Vector512<T> x) => ExpOperator<T>.Invoke(x * Vector512.Create(T.CreateTruncating(NaturalLog2)));
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp2M1.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp2M1.cs
new file mode 100644
index 000000000000..1aebd4cc65c3
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Exp2M1.cs
@@ -0,0 +1,40 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise result of raising 2 to the number powers in the specified tensor, minus one.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Exp2M1(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Exp2M1<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IExponentialFunctions<T> =>
+            InvokeSpanIntoSpan<T, Exp2M1Operator<T>>(x, destination);
+
+        /// <summary>T.Exp2M1(x)</summary>
+        private readonly struct Exp2M1Operator<T> : IUnaryOperator<T, T>
+            where T : IExponentialFunctions<T>
+        {
+            public static bool Vectorizable => Exp2Operator<T>.Vectorizable;
+
+            public static T Invoke(T x) => T.Exp2M1(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => Exp2Operator<T>.Invoke(x) - Vector128<T>.One;
+            public static Vector256<T> Invoke(Vector256<T> x) => Exp2Operator<T>.Invoke(x) - Vector256<T>.One;
+            public static Vector512<T> Invoke(Vector512<T> x) => Exp2Operator<T>.Invoke(x) - Vector512<T>.One;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ExpM1.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ExpM1.cs
new file mode 100644
index 000000000000..8cdc48b0947c
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ExpM1.cs
@@ -0,0 +1,40 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise result of raising <c>e</c> to the number powers in the specified tensor, minus 1.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.ExpM1(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void ExpM1<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IExponentialFunctions<T> =>
+            InvokeSpanIntoSpan<T, ExpM1Operator<T>>(x, destination);
+
+        /// <summary>T.ExpM1(x)</summary>
+        private readonly struct ExpM1Operator<T> : IUnaryOperator<T, T>
+            where T : IExponentialFunctions<T>
+        {
+            public static bool Vectorizable => ExpOperator<T>.Vectorizable;
+
+            public static T Invoke(T x) => T.ExpM1(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => ExpOperator<T>.Invoke(x) - Vector128<T>.One;
+            public static Vector256<T> Invoke(Vector256<T> x) => ExpOperator<T>.Invoke(x) - Vector256<T>.One;
+            public static Vector512<T> Invoke(Vector512<T> x) => ExpOperator<T>.Invoke(x) - Vector512<T>.One;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.FloatHelpers.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.FloatHelpers.cs
new file mode 100644
index 000000000000..ec97b9a61af9
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.FloatHelpers.cs
@@ -0,0 +1,28 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        private static Vector128<float> ApplyScalar<TOperator>(Vector128<float> floats) where TOperator : IUnaryOperator<float, float> =>
+            Vector128.Create(TOperator.Invoke(floats[0]), TOperator.Invoke(floats[1]), TOperator.Invoke(floats[2]), TOperator.Invoke(floats[3]));
+
+        private static Vector256<float> ApplyScalar<TOperator>(Vector256<float> floats) where TOperator : IUnaryOperator<float, float> =>
+            Vector256.Create(ApplyScalar<TOperator>(floats.GetLower()), ApplyScalar<TOperator>(floats.GetUpper()));
+
+        private static Vector512<float> ApplyScalar<TOperator>(Vector512<float> floats) where TOperator : IUnaryOperator<float, float> =>
+            Vector512.Create(ApplyScalar<TOperator>(floats.GetLower()), ApplyScalar<TOperator>(floats.GetUpper()));
+
+        private static Vector128<double> ApplyScalar<TOperator>(Vector128<double> doubles) where TOperator : IUnaryOperator<double, double> =>
+            Vector128.Create(TOperator.Invoke(doubles[0]), TOperator.Invoke(doubles[1]));
+
+        private static Vector256<double> ApplyScalar<TOperator>(Vector256<double> doubles) where TOperator : IUnaryOperator<double, double> =>
+            Vector256.Create(ApplyScalar<TOperator>(doubles.GetLower()), ApplyScalar<TOperator>(doubles.GetUpper()));
+
+        private static Vector512<double> ApplyScalar<TOperator>(Vector512<double> doubles) where TOperator : IUnaryOperator<double, double> =>
+            Vector512.Create(ApplyScalar<TOperator>(doubles.GetLower()), ApplyScalar<TOperator>(doubles.GetUpper()));
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Floor.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Floor.cs
new file mode 100644
index 000000000000..ae33b05099ee
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Floor.cs
@@ -0,0 +1,71 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise floor of numbers in the specified tensor.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Floor(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void Floor<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IFloatingPoint<T> =>
+            InvokeSpanIntoSpan<T, FloorOperator<T>>(x, destination);
+
+        private readonly struct FloorOperator<T> : IUnaryOperator<T, T> where T : IFloatingPoint<T>
+        {
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Floor(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Vector128.Floor(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return Vector128.Floor(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Vector256.Floor(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return Vector256.Floor(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Vector512.Floor(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return Vector512.Floor(x.AsDouble()).As<double, T>();
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.FusedMultiplyAdd.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.FusedMultiplyAdd.cs
new file mode 100644
index 000000000000..57cc8f3a373a
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.FusedMultiplyAdd.cs
@@ -0,0 +1,169 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.X86;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="addend">The third tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" /> and length of <paramref name="addend" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="addend"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// <para>
+        /// This computes (<paramref name="x"/> * <paramref name="y"/>) as if to infinite precision, adds <paramref name="addend"/> to that result as if to
+        /// infinite precision, and finally rounds to the nearest representable value. This differs from the non-fused sequence which would compute
+        /// (<paramref name="x"/> * <paramref name="y"/>) as if to infinite precision, round the result to the nearest representable value, add <paramref name="addend"/> to the
+        /// rounded result as if to infinite precision, and finally round to the nearest representable value.
+        /// </para>
+        /// </remarks>
+        public static void FusedMultiplyAdd<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, ReadOnlySpan<T> addend, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanSpanSpanIntoSpan<T, FusedMultiplyAddOperator<T>>(x, y, addend, destination);
+
+        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="addend">The third tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" /></c>.
+        /// It corresponds to the <c>axpy</c> method defined by <c>BLAS1</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// <para>
+        /// This computes (<paramref name="x"/> * <paramref name="y"/>) as if to infinite precision, adds <paramref name="addend"/> to that result as if to
+        /// infinite precision, and finally rounds to the nearest representable value. This differs from the non-fused sequence which would compute
+        /// (<paramref name="x"/> * <paramref name="y"/>) as if to infinite precision, round the result to the nearest representable value, add <paramref name="addend"/> to the
+        /// rounded result as if to infinite precision, and finally round to the nearest representable value.
+        /// </para>
+        /// </remarks>
+        public static void FusedMultiplyAdd<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, T addend, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanSpanScalarIntoSpan<T, FusedMultiplyAddOperator<T>>(x, y, addend, destination);
+
+        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="addend">The third tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="addend" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="addend"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />) + <paramref name="addend" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// <para>
+        /// This computes (<paramref name="x"/> * <paramref name="y"/>) as if to infinite precision, adds <paramref name="addend"/> to that result as if to
+        /// infinite precision, and finally rounds to the nearest representable value. This differs from the non-fused sequence which would compute
+        /// (<paramref name="x"/> * <paramref name="y"/>) as if to infinite precision, round the result to the nearest representable value, add <paramref name="addend"/> to the
+        /// rounded result as if to infinite precision, and finally round to the nearest representable value.
+        /// </para>
+        /// </remarks>
+        public static void FusedMultiplyAdd<T>(ReadOnlySpan<T> x, T y, ReadOnlySpan<T> addend, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanScalarSpanIntoSpan<T, FusedMultiplyAddOperator<T>>(x, y, addend, destination);
+
+        /// <summary>(x * y) + z</summary>
+        private readonly struct FusedMultiplyAddOperator<T> : ITernaryOperator<T> where T : IFloatingPointIeee754<T>
+        {
+            public static T Invoke(T x, T y, T z) => T.FusedMultiplyAdd(x, y, z);
+
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z)
+            {
+                if (Fma.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return Fma.MultiplyAdd(x.AsSingle(), y.AsSingle(), z.AsSingle()).As<float, T>();
+                    if (typeof(T) == typeof(double)) return Fma.MultiplyAdd(x.AsDouble(), y.AsDouble(), z.AsDouble()).As<double, T>();
+                }
+
+                if (AdvSimd.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return AdvSimd.FusedMultiplyAdd(z.AsSingle(), x.AsSingle(), y.AsSingle()).As<float, T>();
+                }
+
+                if (AdvSimd.Arm64.IsSupported)
+                {
+                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.FusedMultiplyAdd(z.AsDouble(), x.AsDouble(), y.AsDouble()).As<double, T>();
+                }
+
+                if (typeof(T) == typeof(float))
+                {
+                    Vector128<float> xFloats = x.AsSingle();
+                    Vector128<float> yFloats = y.AsSingle();
+                    Vector128<float> zFloats = z.AsSingle();
+                    return Vector128.Create(
+                        float.FusedMultiplyAdd(xFloats[0], yFloats[0], zFloats[0]),
+                        float.FusedMultiplyAdd(xFloats[1], yFloats[1], zFloats[1]),
+                        float.FusedMultiplyAdd(xFloats[2], yFloats[2], zFloats[2]),
+                        float.FusedMultiplyAdd(xFloats[3], yFloats[3], zFloats[3])).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    Vector128<double> xDoubles = x.AsDouble();
+                    Vector128<double> yDoubles = y.AsDouble();
+                    Vector128<double> zDoubles = z.AsDouble();
+                    return Vector128.Create(
+                        double.FusedMultiplyAdd(xDoubles[0], yDoubles[0], zDoubles[0]),
+                        double.FusedMultiplyAdd(xDoubles[1], yDoubles[1], zDoubles[1])).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> z)
+            {
+                if (Fma.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return Fma.MultiplyAdd(x.AsSingle(), y.AsSingle(), z.AsSingle()).As<float, T>();
+                    if (typeof(T) == typeof(double)) return Fma.MultiplyAdd(x.AsDouble(), y.AsDouble(), z.AsDouble()).As<double, T>();
+                }
+
+                return Vector256.Create(
+                    Invoke(x.GetLower(), y.GetLower(), z.GetLower()),
+                    Invoke(x.GetUpper(), y.GetUpper(), z.GetUpper()));
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z)
+            {
+                if (Avx512F.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return Avx512F.FusedMultiplyAdd(x.AsSingle(), y.AsSingle(), z.AsSingle()).As<float, T>();
+                    if (typeof(T) == typeof(double)) return Avx512F.FusedMultiplyAdd(x.AsDouble(), y.AsDouble(), z.AsDouble()).As<double, T>();
+                }
+
+                return Vector512.Create(
+                    Invoke(x.GetLower(), y.GetLower(), z.GetLower()),
+                    Invoke(x.GetUpper(), y.GetUpper(), z.GetUpper()));
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Half.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Half.cs
new file mode 100644
index 000000000000..c830a1f57f5b
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Half.cs
@@ -0,0 +1,44 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>
+        /// Copies <paramref name="source"/> to <paramref name="destination"/>, converting each <see cref="float" />
+        /// value to its nearest representable half-precision floating-point value.
+        /// </summary>
+        /// <param name="source">The source span from which to copy values.</param>
+        /// <param name="destination">The destination span into which the converted values should be written.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (Half)<paramref name="source" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// <paramref name="source"/> and <paramref name="destination"/> must not overlap. If they do, behavior is undefined.
+        /// </para>
+        /// </remarks>
+        public static void ConvertToHalf(ReadOnlySpan<float> source, Span<Half> destination) =>
+            ConvertTruncating(source, destination);
+
+        /// <summary>
+        /// Copies <paramref name="source"/> to <paramref name="destination"/>, converting each half-precision
+        /// floating-point value to its nearest representable <see cref="float"/> value.
+        /// </summary>
+        /// <param name="source">The source span from which to copy values.</param>
+        /// <param name="destination">The destination span into which the converted values should be written.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (float)<paramref name="source" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// <paramref name="source"/> and <paramref name="destination"/> must not overlap. If they do, behavior is undefined.
+        /// </para>
+        /// </remarks>
+        public static void ConvertToSingle(ReadOnlySpan<Half> source, Span<float> destination) =>
+            ConvertTruncating(source, destination);
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Hypot.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Hypot.cs
new file mode 100644
index 000000000000..30b9bc517851
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Hypot.cs
@@ -0,0 +1,38 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise hypotensue given values from two tensors representing the lengths of the shorter sides in a right-angled triangle.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Hypot(<paramref name="x" />[i], <paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void Hypot<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : IRootFunctions<T> =>
+            InvokeSpanSpanIntoSpan<T, HypotOperator<T>>(x, y, destination);
+
+        /// <summary>T.Hypot(x, y)</summary>
+        private readonly struct HypotOperator<T> : IBinaryOperator<T>
+            where T : IRootFunctions<T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x, T y) => T.Hypot(x, y);
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => Vector128.Sqrt((x * x) + (y * y));
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => Vector256.Sqrt((x * x) + (y * y));
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => Vector512.Sqrt((x * x) + (y * y));
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ILogB.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ILogB.cs
new file mode 100644
index 000000000000..36b0a5bd67c2
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ILogB.cs
@@ -0,0 +1,56 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise integer logarithm of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.ILogB(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void ILogB<T>(ReadOnlySpan<T> x, Span<int> destination)
+            where T : IFloatingPointIeee754<T>
+        {
+            if (typeof(T) == typeof(double))
+            {
+                // Special-case double as the only vectorizable floating-point type whose size != sizeof(int).
+                InvokeSpanIntoSpan_2to1<double, int, ILogBDoubleOperator>(Rename<T, double>(x), destination);
+            }
+            else
+            {
+                InvokeSpanIntoSpan<T, int, ILogBOperator<T>>(x, destination);
+            }
+        }
+
+        /// <summary>T.ILogB(x)</summary>
+        private readonly struct ILogBOperator<T> : IUnaryOperator<T, int> where T : IFloatingPointIeee754<T>
+        {
+            public static bool Vectorizable => false; // TODO: vectorize for float
+
+            public static int Invoke(T x) => T.ILogB(x);
+            public static Vector128<int> Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static Vector256<int> Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static Vector512<int> Invoke(Vector512<T> x) => throw new NotSupportedException();
+        }
+
+        /// <summary>double.ILogB(x)</summary>
+        private readonly struct ILogBDoubleOperator : IUnaryTwoToOneOperator<double, int>
+        {
+            public static bool Vectorizable => false; // TODO: vectorize
+
+            public static int Invoke(double x) => double.ILogB(x);
+            public static Vector128<int> Invoke(Vector128<double> lower, Vector128<double> upper) => throw new NotSupportedException();
+            public static Vector256<int> Invoke(Vector256<double> lower, Vector256<double> upper) => throw new NotSupportedException();
+            public static Vector512<int> Invoke(Vector512<double> lower, Vector512<double> upper) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Ieee754Remainder.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Ieee754Remainder.cs
new file mode 100644
index 000000000000..2aea50793a89
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Ieee754Remainder.cs
@@ -0,0 +1,68 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise remainder of the numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Ieee754Remainder(<paramref name="x" />[i], <paramref name="y" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void Ieee754Remainder<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanSpanIntoSpan<T, Ieee754RemainderOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise remainder of the numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Ieee754Remainder(<paramref name="x" />[i], <paramref name="y" />)</c>.
+        /// </para>
+        /// </remarks>
+        public static void Ieee754Remainder<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanScalarIntoSpan<T, Ieee754RemainderOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise remainder of the numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a scalar.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Ieee754Remainder(<paramref name="x" />, <paramref name="y" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void Ieee754Remainder<T>(T x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeScalarSpanIntoSpan<T, Ieee754RemainderOperator<T>>(x, y, destination);
+
+        /// <summary>T.Ieee754Remainder(x, y)</summary>
+        internal readonly struct Ieee754RemainderOperator<T> : IBinaryOperator<T> where T : IFloatingPointIeee754<T>
+        {
+            public static bool Vectorizable => false;
+            public static T Invoke(T x, T y) => T.Ieee754Remainder(x, y);
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => throw new NotSupportedException();
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => throw new NotSupportedException();
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs
new file mode 100644
index 000000000000..1b584d05a537
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs
@@ -0,0 +1,520 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Searches for the index of the largest number in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <returns>The index of the maximum element in <paramref name="x"/>, or -1 if <paramref name="x"/> is empty.</returns>
+        /// <remarks>
+        /// <para>
+        /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If any value equal to NaN
+        /// is present, the index of the first is returned. Positive 0 is considered greater than negative 0.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static int IndexOfMax<T>(ReadOnlySpan<T> x)
+            where T : INumber<T> =>
+            IndexOfMinMaxCore<T, IndexOfMaxOperator<T>>(x);
+
+        /// <summary>Returns the index of MathF.Max(x, y)</summary>
+        internal readonly struct IndexOfMaxOperator<T> : IIndexOfOperator<T> where T : INumber<T>
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static void Invoke(ref Vector128<T> result, Vector128<T> current, ref Vector128<T> resultIndex, Vector128<T> currentIndex)
+            {
+                Vector128<T> useResult = Vector128.GreaterThan(result, current);
+                Vector128<T> equalMask = Vector128.Equals(result, current);
+
+                if (equalMask != Vector128<T>.Zero)
+                {
+                    Vector128<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current));
+                        Vector128<T> currentNegative = IsNegative(current);
+                        Vector128<T> sameSign = Vector128.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
+                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative);
+                    }
+                    else
+                    {
+                        useResult |= equalMask & lessThanIndexMask;
+                    }
+                }
+
+                result = ElementWiseSelect(useResult, result, current);
+                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static void Invoke(ref Vector256<T> result, Vector256<T> current, ref Vector256<T> resultIndex, Vector256<T> currentIndex)
+            {
+                Vector256<T> useResult = Vector256.GreaterThan(result, current);
+                Vector256<T> equalMask = Vector256.Equals(result, current);
+
+                if (equalMask != Vector256<T>.Zero)
+                {
+                    Vector256<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current));
+                        Vector256<T> currentNegative = IsNegative(current);
+                        Vector256<T> sameSign = Vector256.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
+                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative);
+                    }
+                    else
+                    {
+                        useResult |= equalMask & lessThanIndexMask;
+                    }
+                }
+
+                result = ElementWiseSelect(useResult, result, current);
+                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static void Invoke(ref Vector512<T> result, Vector512<T> current, ref Vector512<T> resultIndex, Vector512<T> currentIndex)
+            {
+                Vector512<T> useResult = Vector512.GreaterThan(result, current);
+                Vector512<T> equalMask = Vector512.Equals(result, current);
+
+                if (equalMask != Vector512<T>.Zero)
+                {
+                    Vector512<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current));
+                        Vector512<T> currentNegative = IsNegative(current);
+                        Vector512<T> sameSign = Vector512.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
+                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative);
+                    }
+                    else
+                    {
+                        useResult |= equalMask & lessThanIndexMask;
+                    }
+                }
+
+                result = ElementWiseSelect(useResult, result, current);
+                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static int Invoke(ref T result, T current, int resultIndex, int currentIndex)
+            {
+                if (result == current)
+                {
+                    bool resultNegative = IsNegative(result);
+                    if ((resultNegative == IsNegative(current)) ? (currentIndex < resultIndex) : resultNegative)
+                    {
+                        result = current;
+                        return currentIndex;
+                    }
+                }
+                else if (current > result)
+                {
+                    result = current;
+                    return currentIndex;
+                }
+
+                return resultIndex;
+            }
+        }
+
+        private static unsafe int IndexOfMinMaxCore<T, TIndexOfMinMax>(ReadOnlySpan<T> x)
+    where T : INumber<T>
+    where TIndexOfMinMax : struct, IIndexOfOperator<T>
+        {
+            if (x.IsEmpty)
+            {
+                return -1;
+            }
+
+            // This matches the IEEE 754:2019 `maximum`/`minimum` functions.
+            // It propagates NaN inputs back to the caller and
+            // otherwise returns the index of the greater of the inputs.
+            // It treats +0 as greater than -0 as per the specification.
+
+            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && x.Length >= Vector512<T>.Count)
+            {
+                Debug.Assert(sizeof(T) is 1 or 2 or 4 or 8);
+
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                static Vector512<T> CreateVector512T(int i) =>
+                    sizeof(T) == sizeof(long) ? Vector512.Create((long)i).As<long, T>() :
+                    sizeof(T) == sizeof(int) ? Vector512.Create(i).As<int, T>() :
+                    sizeof(T) == sizeof(short) ? Vector512.Create((short)i).As<short, T>() :
+                    Vector512.Create((byte)i).As<byte, T>();
+
+                ref T xRef = ref MemoryMarshal.GetReference(x);
+                Vector512<T> resultIndex =
+#if NET9_0_OR_GREATER
+                    sizeof(T) == sizeof(long) ? Vector512<long>.Indices.As<long, T>() :
+                    sizeof(T) == sizeof(int) ? Vector512<int>.Indices.As<int, T>() :
+                    sizeof(T) == sizeof(short) ? Vector512<short>.Indices.As<short, T>() :
+                    Vector512<byte>.Indices.As<byte, T>();
+#else
+                    sizeof(T) == sizeof(long) ? Vector512.Create(0L, 1, 2, 3, 4, 5, 6, 7).As<long, T>() :
+                    sizeof(T) == sizeof(int) ? Vector512.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As<int, T>() :
+                    sizeof(T) == sizeof(short) ? Vector512.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31).As<short, T>() :
+                    Vector512.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63).As<byte, T>();
+#endif
+                Vector512<T> currentIndex = resultIndex;
+                Vector512<T> increment = CreateVector512T(Vector512<T>.Count);
+
+                // Load the first vector as the initial set of results, and bail immediately
+                // to scalar handling if it contains any NaNs (which don't compare equally to themselves).
+                Vector512<T> result = Vector512.LoadUnsafe(ref xRef);
+                Vector512<T> current;
+
+                Vector512<T> nanMask;
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    nanMask = ~Vector512.Equals(result, result);
+                    if (nanMask != Vector512<T>.Zero)
+                    {
+                        return IndexOfFirstMatch(nanMask);
+                    }
+                }
+
+                int oneVectorFromEnd = x.Length - Vector512<T>.Count;
+                int i = Vector512<T>.Count;
+
+                // Aggregate additional vectors into the result as long as there's at least one full vector left to process.
+                while (i <= oneVectorFromEnd)
+                {
+                    // Load the next vector, and early exit on NaN.
+                    current = Vector512.LoadUnsafe(ref xRef, (uint)i);
+                    currentIndex += increment;
+
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        nanMask = ~Vector512.Equals(current, current);
+                        if (nanMask != Vector512<T>.Zero)
+                        {
+                            return i + IndexOfFirstMatch(nanMask);
+                        }
+                    }
+
+                    TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex);
+
+                    i += Vector512<T>.Count;
+                }
+
+                // If any elements remain, handle them in one final vector.
+                if (i != x.Length)
+                {
+                    current = Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512<T>.Count));
+                    currentIndex += CreateVector512T(x.Length - i);
+
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        nanMask = ~Vector512.Equals(current, current);
+                        if (nanMask != Vector512<T>.Zero)
+                        {
+                            int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask);
+                            return typeof(T) == typeof(double) ?
+                                (int)(long)(object)currentIndex.As<T, long>()[indexInVectorOfFirstMatch] :
+                                (int)(object)currentIndex.As<T, int>()[indexInVectorOfFirstMatch];
+                        }
+                    }
+
+                    TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex);
+                }
+
+                // Aggregate the lanes in the vector to create the final scalar result.
+                return IndexOfFinalAggregate<T, TIndexOfMinMax>(result, resultIndex);
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && x.Length >= Vector256<T>.Count)
+            {
+                Debug.Assert(sizeof(T) is 1 or 2 or 4 or 8);
+
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                static Vector256<T> CreateVector256T(int i) =>
+                    sizeof(T) == sizeof(long) ? Vector256.Create((long)i).As<long, T>() :
+                    sizeof(T) == sizeof(int) ? Vector256.Create(i).As<int, T>() :
+                    sizeof(T) == sizeof(short) ? Vector256.Create((short)i).As<short, T>() :
+                    Vector256.Create((byte)i).As<byte, T>();
+
+                ref T xRef = ref MemoryMarshal.GetReference(x);
+                Vector256<T> resultIndex =
+#if NET9_0_OR_GREATER
+                    sizeof(T) == sizeof(long) ? Vector256<long>.Indices.As<long, T>() :
+                    sizeof(T) == sizeof(int) ? Vector256<int>.Indices.As<int, T>() :
+                    sizeof(T) == sizeof(short) ? Vector256<short>.Indices.As<short, T>() :
+                    Vector256<byte>.Indices.As<byte, T>();
+#else
+                    sizeof(T) == sizeof(long) ? Vector256.Create(0L, 1, 2, 3).As<long, T>() :
+                    sizeof(T) == sizeof(int) ? Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7).As<int, T>() :
+                    sizeof(T) == sizeof(short) ? Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As<short, T>() :
+                    Vector256.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31).As<byte, T>();
+#endif
+                Vector256<T> currentIndex = resultIndex;
+                Vector256<T> increment = CreateVector256T(Vector256<T>.Count);
+
+                // Load the first vector as the initial set of results, and bail immediately
+                // to scalar handling if it contains any NaNs (which don't compare equally to themselves).
+                Vector256<T> result = Vector256.LoadUnsafe(ref xRef);
+                Vector256<T> current;
+
+                Vector256<T> nanMask;
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    nanMask = ~Vector256.Equals(result, result);
+                    if (nanMask != Vector256<T>.Zero)
+                    {
+                        return IndexOfFirstMatch(nanMask);
+                    }
+                }
+
+                int oneVectorFromEnd = x.Length - Vector256<T>.Count;
+                int i = Vector256<T>.Count;
+
+                // Aggregate additional vectors into the result as long as there's at least one full vector left to process.
+                while (i <= oneVectorFromEnd)
+                {
+                    // Load the next vector, and early exit on NaN.
+                    current = Vector256.LoadUnsafe(ref xRef, (uint)i);
+                    currentIndex += increment;
+
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        nanMask = ~Vector256.Equals(current, current);
+                        if (nanMask != Vector256<T>.Zero)
+                        {
+                            return i + IndexOfFirstMatch(nanMask);
+                        }
+                    }
+
+                    TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex);
+
+                    i += Vector256<T>.Count;
+                }
+
+                // If any elements remain, handle them in one final vector.
+                if (i != x.Length)
+                {
+                    current = Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256<T>.Count));
+                    currentIndex += CreateVector256T(x.Length - i);
+
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        nanMask = ~Vector256.Equals(current, current);
+                        if (nanMask != Vector256<T>.Zero)
+                        {
+                            int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask);
+                            return typeof(T) == typeof(double) ?
+                                (int)(long)(object)currentIndex.As<T, long>()[indexInVectorOfFirstMatch] :
+                                (int)(object)currentIndex.As<T, int>()[indexInVectorOfFirstMatch];
+                        }
+                    }
+
+                    TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex);
+                }
+
+                // Aggregate the lanes in the vector to create the final scalar result.
+                return IndexOfFinalAggregate<T, TIndexOfMinMax>(result, resultIndex);
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && x.Length >= Vector128<T>.Count)
+            {
+                Debug.Assert(sizeof(T) is 1 or 2 or 4 or 8);
+
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                static Vector128<T> CreateVector128T(int i) =>
+                    sizeof(T) == sizeof(long) ? Vector128.Create((long)i).As<long, T>() :
+                    sizeof(T) == sizeof(int) ? Vector128.Create(i).As<int, T>() :
+                    sizeof(T) == sizeof(short) ? Vector128.Create((short)i).As<short, T>() :
+                    Vector128.Create((byte)i).As<byte, T>();
+
+                ref T xRef = ref MemoryMarshal.GetReference(x);
+                Vector128<T> resultIndex =
+#if NET9_0_OR_GREATER
+                    sizeof(T) == sizeof(long) ? Vector128<long>.Indices.As<long, T>() :
+                    sizeof(T) == sizeof(int) ? Vector128<int>.Indices.As<int, T>() :
+                    sizeof(T) == sizeof(short) ? Vector128<short>.Indices.As<short, T>() :
+                    Vector128<byte>.Indices.As<byte, T>();
+#else
+                    sizeof(T) == sizeof(long) ? Vector128.Create(0L, 1).As<long, T>() :
+                    sizeof(T) == sizeof(int) ? Vector128.Create(0, 1, 2, 3).As<int, T>() :
+                    sizeof(T) == sizeof(short) ? Vector128.Create(0, 1, 2, 3, 4, 5, 6, 7).As<short, T>() :
+                    Vector128.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As<byte, T>();
+#endif
+                Vector128<T> currentIndex = resultIndex;
+                Vector128<T> increment = CreateVector128T(Vector128<T>.Count);
+
+                // Load the first vector as the initial set of results, and bail immediately
+                // to scalar handling if it contains any NaNs (which don't compare equally to themselves).
+                Vector128<T> result = Vector128.LoadUnsafe(ref xRef);
+                Vector128<T> current;
+
+                Vector128<T> nanMask;
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    nanMask = ~Vector128.Equals(result, result);
+                    if (nanMask != Vector128<T>.Zero)
+                    {
+                        return IndexOfFirstMatch(nanMask);
+                    }
+                }
+
+                int oneVectorFromEnd = x.Length - Vector128<T>.Count;
+                int i = Vector128<T>.Count;
+
+                // Aggregate additional vectors into the result as long as there's at least one full vector left to process.
+                while (i <= oneVectorFromEnd)
+                {
+                    // Load the next vector, and early exit on NaN.
+                    current = Vector128.LoadUnsafe(ref xRef, (uint)i);
+                    currentIndex += increment;
+
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        nanMask = ~Vector128.Equals(current, current);
+                        if (nanMask != Vector128<T>.Zero)
+                        {
+                            return i + IndexOfFirstMatch(nanMask);
+                        }
+                    }
+
+                    TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex);
+
+                    i += Vector128<T>.Count;
+                }
+
+                // If any elements remain, handle them in one final vector.
+                if (i != x.Length)
+                {
+                    current = Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128<T>.Count));
+                    currentIndex += CreateVector128T(x.Length - i);
+
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        nanMask = ~Vector128.Equals(current, current);
+                        if (nanMask != Vector128<T>.Zero)
+                        {
+                            int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask);
+                            return typeof(T) == typeof(double) ?
+                                (int)(long)(object)currentIndex.As<T, long>()[indexInVectorOfFirstMatch] :
+                                (int)(object)currentIndex.As<T, int>()[indexInVectorOfFirstMatch];
+                        }
+                    }
+
+                    TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex);
+                }
+
+                // Aggregate the lanes in the vector to create the final scalar result.
+                return IndexOfFinalAggregate<T, TIndexOfMinMax>(result, resultIndex);
+            }
+
+            // Scalar path used when either vectorization is not supported or the input is too small to vectorize.
+            T curResult = x[0];
+            int curIn = 0;
+            if (T.IsNaN(curResult))
+            {
+                return curIn;
+            }
+
+            for (int i = 1; i < x.Length; i++)
+            {
+                T current = x[i];
+                if (T.IsNaN(current))
+                {
+                    return i;
+                }
+
+                curIn = TIndexOfMinMax.Invoke(ref curResult, current, curIn, i);
+            }
+
+            return curIn;
+        }
+
+        private static int IndexOfFirstMatch<T>(Vector128<T> mask) =>
+            BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits());
+
+        private static int IndexOfFirstMatch<T>(Vector256<T> mask) =>
+            BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits());
+
+        private static int IndexOfFirstMatch<T>(Vector512<T> mask) =>
+            BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits());
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static unsafe Vector256<T> IndexLessThan<T>(Vector256<T> indices1, Vector256<T> indices2) =>
+            sizeof(T) == sizeof(long) ? Vector256.LessThan(indices1.AsInt64(), indices2.AsInt64()).As<long, T>() :
+            sizeof(T) == sizeof(int) ? Vector256.LessThan(indices1.AsInt32(), indices2.AsInt32()).As<int, T>() :
+            sizeof(T) == sizeof(short) ? Vector256.LessThan(indices1.AsInt16(), indices2.AsInt16()).As<short, T>() :
+            Vector256.LessThan(indices1.AsByte(), indices2.AsByte()).As<byte, T>();
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static unsafe Vector512<T> IndexLessThan<T>(Vector512<T> indices1, Vector512<T> indices2) =>
+            sizeof(T) == sizeof(long) ? Vector512.LessThan(indices1.AsInt64(), indices2.AsInt64()).As<long, T>() :
+            sizeof(T) == sizeof(int) ? Vector512.LessThan(indices1.AsInt32(), indices2.AsInt32()).As<int, T>() :
+            sizeof(T) == sizeof(short) ? Vector512.LessThan(indices1.AsInt16(), indices2.AsInt16()).As<short, T>() :
+            Vector512.LessThan(indices1.AsByte(), indices2.AsByte()).As<byte, T>();
+
+        /// <summary>Gets whether the specified <see cref="float"/> is negative.</summary>
+        private static bool IsNegative<T>(T f) where T : INumberBase<T> => T.IsNegative(f);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static unsafe Vector128<T> ElementWiseSelect<T>(Vector128<T> mask, Vector128<T> left, Vector128<T> right)
+        {
+            if (Sse41.IsSupported)
+            {
+                if (typeof(T) == typeof(float)) return Sse41.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As<float, T>();
+                if (typeof(T) == typeof(double)) return Sse41.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As<double, T>();
+
+                if (sizeof(T) == 1) return Sse41.BlendVariable(left.AsByte(), right.AsByte(), (~mask).AsByte()).As<byte, T>();
+                if (sizeof(T) == 2) return Sse41.BlendVariable(left.AsUInt16(), right.AsUInt16(), (~mask).AsUInt16()).As<ushort, T>();
+                if (sizeof(T) == 4) return Sse41.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As<uint, T>();
+                if (sizeof(T) == 8) return Sse41.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As<ulong, T>();
+            }
+
+            return Vector128.ConditionalSelect(mask, left, right);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static unsafe Vector256<T> ElementWiseSelect<T>(Vector256<T> mask, Vector256<T> left, Vector256<T> right)
+        {
+            if (Avx2.IsSupported)
+            {
+                if (typeof(T) == typeof(float)) return Avx2.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As<float, T>();
+                if (typeof(T) == typeof(double)) return Avx2.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As<double, T>();
+
+                if (sizeof(T) == 1) return Avx2.BlendVariable(left.AsByte(), right.AsByte(), (~mask).AsByte()).As<byte, T>();
+                if (sizeof(T) == 2) return Avx2.BlendVariable(left.AsUInt16(), right.AsUInt16(), (~mask).AsUInt16()).As<ushort, T>();
+                if (sizeof(T) == 4) return Avx2.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As<uint, T>();
+                if (sizeof(T) == 8) return Avx2.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As<ulong, T>();
+            }
+
+            return Vector256.ConditionalSelect(mask, left, right);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static unsafe Vector512<T> ElementWiseSelect<T>(Vector512<T> mask, Vector512<T> left, Vector512<T> right)
+        {
+            if (Avx512F.IsSupported)
+            {
+                if (typeof(T) == typeof(float)) return Avx512F.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As<float, T>();
+                if (typeof(T) == typeof(double)) return Avx512F.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As<double, T>();
+
+                if (sizeof(T) == 4) return Avx512F.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As<uint, T>();
+                if (sizeof(T) == 8) return Avx512F.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As<ulong, T>();
+            }
+
+            return Vector512.ConditionalSelect(mask, left, right);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMaxMagnitude.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMaxMagnitude.cs
new file mode 100644
index 000000000000..f1f5016a86b1
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMaxMagnitude.cs
@@ -0,0 +1,137 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Searches for the index of the number with the largest magnitude in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <returns>The index of the element in <paramref name="x"/> with the largest magnitude (absolute value), or -1 if <paramref name="x"/> is empty.</returns>
+        /// <remarks>
+        /// <para>
+        /// The determination of the maximum magnitude matches the IEEE 754:2019 `maximumMagnitude` function. If any value equal to NaN
+        /// is present, the index of the first is returned. If two values have the same magnitude and one is positive and the other is negative,
+        /// the positive value is considered to have the larger magnitude.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static int IndexOfMaxMagnitude<T>(ReadOnlySpan<T> x)
+            where T : INumber<T> =>
+            IndexOfMinMaxCore<T, IndexOfMaxMagnitudeOperator<T>>(x);
+
+        internal readonly struct IndexOfMaxMagnitudeOperator<T> : IIndexOfOperator<T> where T : INumber<T>
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static void Invoke(ref Vector128<T> result, Vector128<T> current, ref Vector128<T> resultIndex, Vector128<T> currentIndex)
+            {
+                Vector128<T> resultMag = Vector128.Abs(result), currentMag = Vector128.Abs(current);
+                Vector128<T> useResult = Vector128.GreaterThan(resultMag, currentMag);
+                Vector128<T> equalMask = Vector128.Equals(resultMag, currentMag);
+
+                if (equalMask != Vector128<T>.Zero)
+                {
+                    Vector128<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current));
+                        Vector128<T> currentNegative = IsNegative(current);
+                        Vector128<T> sameSign = Vector128.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
+                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative);
+                    }
+                    else
+                    {
+                        useResult |= equalMask & lessThanIndexMask;
+                    }
+                }
+
+                result = ElementWiseSelect(useResult, result, current);
+                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static void Invoke(ref Vector256<T> result, Vector256<T> current, ref Vector256<T> resultIndex, Vector256<T> currentIndex)
+            {
+                Vector256<T> resultMag = Vector256.Abs(result), currentMag = Vector256.Abs(current);
+                Vector256<T> useResult = Vector256.GreaterThan(resultMag, currentMag);
+                Vector256<T> equalMask = Vector256.Equals(resultMag, currentMag);
+
+                if (equalMask != Vector256<T>.Zero)
+                {
+                    Vector256<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current));
+                        Vector256<T> currentNegative = IsNegative(current);
+                        Vector256<T> sameSign = Vector256.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
+                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative);
+                    }
+                    else
+                    {
+                        useResult |= equalMask & lessThanIndexMask;
+                    }
+                }
+
+                result = ElementWiseSelect(useResult, result, current);
+                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static void Invoke(ref Vector512<T> result, Vector512<T> current, ref Vector512<T> resultIndex, Vector512<T> currentIndex)
+            {
+                Vector512<T> resultMag = Vector512.Abs(result), currentMag = Vector512.Abs(current);
+                Vector512<T> useResult = Vector512.GreaterThan(resultMag, currentMag);
+                Vector512<T> equalMask = Vector512.Equals(resultMag, currentMag);
+
+                if (equalMask != Vector512<T>.Zero)
+                {
+                    Vector512<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current));
+                        Vector512<T> currentNegative = IsNegative(current);
+                        Vector512<T> sameSign = Vector512.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
+                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative);
+                    }
+                    else
+                    {
+                        useResult |= equalMask & lessThanIndexMask;
+                    }
+                }
+
+                result = ElementWiseSelect(useResult, result, current);
+                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static int Invoke(ref T result, T current, int resultIndex, int currentIndex)
+            {
+                T resultMag = T.Abs(result);
+                T currentMag = T.Abs(current);
+
+                if (resultMag == currentMag)
+                {
+                    bool resultNegative = IsNegative(result);
+                    if ((resultNegative == IsNegative(current)) ? (currentIndex < resultIndex) : resultNegative)
+                    {
+                        result = current;
+                        return currentIndex;
+                    }
+                }
+                else if (currentMag > resultMag)
+                {
+                    result = current;
+                    return currentIndex;
+                }
+
+                return resultIndex;
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMin.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMin.cs
new file mode 100644
index 000000000000..011021b6c001
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMin.cs
@@ -0,0 +1,131 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Searches for the index of the smallest number in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <returns>The index of the minimum element in <paramref name="x"/>, or -1 if <paramref name="x"/> is empty.</returns>
+        /// <remarks>
+        /// <para>
+        /// The determination of the minimum element matches the IEEE 754:2019 `minimum` function. If any value equal to NaN
+        /// is present, the index of the first is returned. Negative 0 is considered smaller than positive 0.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static int IndexOfMin<T>(ReadOnlySpan<T> x)
+            where T : INumber<T> =>
+            IndexOfMinMaxCore<T, IndexOfMinOperator<T>>(x);
+
+        /// <summary>Returns the index of MathF.Min(x, y)</summary>
+        internal readonly struct IndexOfMinOperator<T> : IIndexOfOperator<T> where T : INumber<T>
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static void Invoke(ref Vector128<T> result, Vector128<T> current, ref Vector128<T> resultIndex, Vector128<T> currentIndex)
+            {
+                Vector128<T> useResult = Vector128.LessThan(result, current);
+                Vector128<T> equalMask = Vector128.Equals(result, current);
+
+                if (equalMask != Vector128<T>.Zero)
+                {
+                    Vector128<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result));
+                        Vector128<T> resultNegative = IsNegative(result);
+                        Vector128<T> sameSign = Vector128.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As<int, T>();
+                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative);
+                    }
+                    else
+                    {
+                        useResult |= equalMask & lessThanIndexMask;
+                    }
+                }
+
+                result = ElementWiseSelect(useResult, result, current);
+                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static void Invoke(ref Vector256<T> result, Vector256<T> current, ref Vector256<T> resultIndex, Vector256<T> currentIndex)
+            {
+                Vector256<T> useResult = Vector256.LessThan(result, current);
+                Vector256<T> equalMask = Vector256.Equals(result, current);
+
+                if (equalMask != Vector256<T>.Zero)
+                {
+                    Vector256<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result));
+                        Vector256<T> resultNegative = IsNegative(result);
+                        Vector256<T> sameSign = Vector256.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As<int, T>();
+                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative);
+                    }
+                    else
+                    {
+                        useResult |= equalMask & lessThanIndexMask;
+                    }
+                }
+
+                result = ElementWiseSelect(useResult, result, current);
+                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static void Invoke(ref Vector512<T> result, Vector512<T> current, ref Vector512<T> resultIndex, Vector512<T> currentIndex)
+            {
+                Vector512<T> useResult = Vector512.LessThan(result, current);
+                Vector512<T> equalMask = Vector512.Equals(result, current);
+
+                if (equalMask != Vector512<T>.Zero)
+                {
+                    Vector512<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result));
+                        Vector512<T> resultNegative = IsNegative(result);
+                        Vector512<T> sameSign = Vector512.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As<int, T>();
+                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative);
+                    }
+                    else
+                    {
+                        useResult |= equalMask & lessThanIndexMask;
+                    }
+                }
+
+                result = ElementWiseSelect(useResult, result, current);
+                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static int Invoke(ref T result, T current, int resultIndex, int currentIndex)
+            {
+                if (result == current)
+                {
+                    bool currentNegative = IsNegative(current);
+                    if ((IsNegative(result) == currentNegative) ? (currentIndex < resultIndex) : currentNegative)
+                    {
+                        result = current;
+                        return currentIndex;
+                    }
+                }
+                else if (current < result)
+                {
+                    result = current;
+                    return currentIndex;
+                }
+
+                return resultIndex;
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMinMagnitude.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMinMagnitude.cs
new file mode 100644
index 000000000000..813bcf4637dd
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMinMagnitude.cs
@@ -0,0 +1,137 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Searches for the index of the number with the smallest magnitude in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <returns>The index of the element in <paramref name="x"/> with the smallest magnitude (absolute value), or -1 if <paramref name="x"/> is empty.</returns>
+        /// <remarks>
+        /// <para>
+        /// The determination of the minimum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If any value equal to NaN
+        /// is present, the index of the first is returned. If two values have the same magnitude and one is positive and the other is negative,
+        /// the negative value is considered to have the smaller magnitude.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static int IndexOfMinMagnitude<T>(ReadOnlySpan<T> x)
+            where T : INumber<T> =>
+            IndexOfMinMaxCore<T, IndexOfMinMagnitudeOperator<T>>(x);
+
+        internal readonly struct IndexOfMinMagnitudeOperator<T> : IIndexOfOperator<T> where T : INumber<T>
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static void Invoke(ref Vector128<T> result, Vector128<T> current, ref Vector128<T> resultIndex, Vector128<T> currentIndex)
+            {
+                Vector128<T> resultMag = Vector128.Abs(result), currentMag = Vector128.Abs(current);
+                Vector128<T> useResult = Vector128.LessThan(resultMag, currentMag);
+                Vector128<T> equalMask = Vector128.Equals(resultMag, currentMag);
+
+                if (equalMask != Vector128<T>.Zero)
+                {
+                    Vector128<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result));
+                        Vector128<T> resultNegative = IsNegative(result);
+                        Vector128<T> sameSign = Vector128.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As<int, T>();
+                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative);
+                    }
+                    else
+                    {
+                        useResult |= equalMask & lessThanIndexMask;
+                    }
+                }
+
+                result = ElementWiseSelect(useResult, result, current);
+                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static void Invoke(ref Vector256<T> result, Vector256<T> current, ref Vector256<T> resultIndex, Vector256<T> currentIndex)
+            {
+                Vector256<T> resultMag = Vector256.Abs(result), currentMag = Vector256.Abs(current);
+                Vector256<T> useResult = Vector256.LessThan(resultMag, currentMag);
+                Vector256<T> equalMask = Vector256.Equals(resultMag, currentMag);
+
+                if (equalMask != Vector256<T>.Zero)
+                {
+                    Vector256<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result));
+                        Vector256<T> resultNegative = IsNegative(result);
+                        Vector256<T> sameSign = Vector256.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As<int, T>();
+                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative);
+                    }
+                    else
+                    {
+                        useResult |= equalMask & lessThanIndexMask;
+                    }
+                }
+
+                result = ElementWiseSelect(useResult, result, current);
+                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static void Invoke(ref Vector512<T> result, Vector512<T> current, ref Vector512<T> resultIndex, Vector512<T> currentIndex)
+            {
+                Vector512<T> resultMag = Vector512.Abs(result), currentMag = Vector512.Abs(current);
+                Vector512<T> useResult = Vector512.LessThan(resultMag, currentMag);
+                Vector512<T> equalMask = Vector512.Equals(resultMag, currentMag);
+
+                if (equalMask != Vector512<T>.Zero)
+                {
+                    Vector512<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result));
+                        Vector512<T> resultNegative = IsNegative(result);
+                        Vector512<T> sameSign = Vector512.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As<int, T>();
+                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative);
+                    }
+                    else
+                    {
+                        useResult |= equalMask & lessThanIndexMask;
+                    }
+                }
+
+                result = ElementWiseSelect(useResult, result, current);
+                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static int Invoke(ref T result, T current, int resultIndex, int currentIndex)
+            {
+                T resultMag = T.Abs(result);
+                T currentMag = T.Abs(current);
+
+                if (resultMag == currentMag)
+                {
+                    bool currentNegative = IsNegative(current);
+                    if ((IsNegative(result) == currentNegative) ? (currentIndex < resultIndex) : currentNegative)
+                    {
+                        result = current;
+                        return currentIndex;
+                    }
+                }
+                else if (currentMag < resultMag)
+                {
+                    result = current;
+                    return currentIndex;
+                }
+
+                return resultIndex;
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.LeadingZeroCount.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.LeadingZeroCount.cs
new file mode 100644
index 000000000000..ab51042925f0
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.LeadingZeroCount.cs
@@ -0,0 +1,83 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.X86;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise leading zero count of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.LeadingZeroCount(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void LeadingZeroCount<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IBinaryInteger<T> =>
+            InvokeSpanIntoSpan<T, LeadingZeroCountOperator<T>>(x, destination);
+
+        /// <summary>T.LeadingZeroCount(x)</summary>
+        internal readonly unsafe struct LeadingZeroCountOperator<T> : IUnaryOperator<T, T> where T : IBinaryInteger<T>
+        {
+            public static bool Vectorizable =>
+                (Avx512CD.VL.IsSupported && (sizeof(T) == 4 || sizeof(T) == 8)) ||
+                (AdvSimd.IsSupported && (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4));
+
+            public static T Invoke(T x) => T.LeadingZeroCount(x);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (Avx512CD.VL.IsSupported)
+                {
+                    if (sizeof(T) == 4) return Avx512CD.VL.LeadingZeroCount(x.AsUInt32()).As<uint, T>();
+                    if (sizeof(T) == 8) return Avx512CD.VL.LeadingZeroCount(x.AsUInt64()).As<ulong, T>();
+                }
+
+                Debug.Assert(AdvSimd.IsSupported);
+                {
+                    if (sizeof(T) == 1) return AdvSimd.LeadingZeroCount(x.AsByte()).As<byte, T>();
+                    if (sizeof(T) == 2) return AdvSimd.LeadingZeroCount(x.AsUInt16()).As<ushort, T>();
+
+                    Debug.Assert(sizeof(T) == 4);
+                    return AdvSimd.LeadingZeroCount(x.AsUInt32()).As<uint, T>();
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (Avx512CD.VL.IsSupported)
+                {
+                    if (sizeof(T) == 4) return Avx512CD.VL.LeadingZeroCount(x.AsUInt32()).As<uint, T>();
+                    if (sizeof(T) == 8) return Avx512CD.VL.LeadingZeroCount(x.AsUInt64()).As<ulong, T>();
+                }
+
+                return Vector256.Create(Invoke(x.GetLower()), Invoke(x.GetUpper()));
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (Avx512CD.IsSupported)
+                {
+                    if (sizeof(T) == 4) return Avx512CD.LeadingZeroCount(x.AsUInt32()).As<uint, T>();
+                    if (sizeof(T) == 8) return Avx512CD.LeadingZeroCount(x.AsUInt64()).As<ulong, T>();
+                }
+
+                return Vector512.Create(Invoke(x.GetLower()), Invoke(x.GetUpper()));
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Lerp.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Lerp.cs
new file mode 100644
index 000000000000..a605b62430d3
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Lerp.cs
@@ -0,0 +1,83 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise linear interpolation between two values based on the given weight in the specified tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="amount">The third tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" /> and length of <paramref name="amount" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="amount"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Lerp(<paramref name="x" />[i], <paramref name="y" />[i], <paramref name="amount" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Lerp<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, ReadOnlySpan<T> amount, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanSpanSpanIntoSpan<T, LerpOperator<T>>(x, y, amount, destination);
+
+        /// <summary>Computes the element-wise linear interpolation between two values based on the given weight in the specified tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="amount">The third tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Lerp(<paramref name="x" />[i], <paramref name="y" />[i], <paramref name="amount" />)</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Lerp<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, T amount, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanSpanScalarIntoSpan<T, LerpOperator<T>>(x, y, amount, destination);
+
+        /// <summary>Computes the element-wise linear interpolation between two values based on the given weight in the specified tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="amount">The third tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="amount" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="amount"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Lerp(<paramref name="x" />[i], <paramref name="y" />, <paramref name="amount" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Lerp<T>(ReadOnlySpan<T> x, T y, ReadOnlySpan<T> amount, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanScalarSpanIntoSpan<T, LerpOperator<T>>(x, y, amount, destination);
+
+        /// <summary>(x * (1 - z)) + (y * z)</summary>
+        private readonly struct LerpOperator<T> : ITernaryOperator<T> where T : IFloatingPointIeee754<T>
+        {
+            public static T Invoke(T x, T y, T amount) => T.Lerp(x, y, amount);
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> amount) => (x * (Vector128<T>.One - amount)) + (y * amount);
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> amount) => (x * (Vector256<T>.One - amount)) + (y * amount);
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> amount) => (x * (Vector512<T>.One - amount)) + (y * amount);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log.cs
new file mode 100644
index 000000000000..74d17f036d94
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log.cs
@@ -0,0 +1,765 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise natural (base <c>e</c>) logarithm of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>.
+        /// If a value is negative or equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is set to NaN.
+        /// If a value is positive infinity, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
+        /// Otherwise, if a value is positive, its natural logarithm is stored into the corresponding destination location.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Log<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ILogarithmicFunctions<T> =>
+            InvokeSpanIntoSpan<T, LogOperator<T>>(x, destination);
+
+        /// <summary>Computes the element-wise logarithm of the numbers in a specified tensor to the specified base in another specified tensor.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log(<paramref name="x" />[i], <paramref name="y" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Log<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : ILogarithmicFunctions<T> =>
+            InvokeSpanSpanIntoSpan<T, LogBaseOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise logarithm of the numbers in a specified tensor to the specified base in another specified tensor.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log(<paramref name="x" />[i], <paramref name="y" />)</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Log<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : ILogarithmicFunctions<T> =>
+            InvokeSpanScalarIntoSpan<T, LogBaseOperator<T>>(x, y, destination);
+
+        /// <summary>T.Log(x)</summary>
+        internal readonly struct LogOperator<T> : IUnaryOperator<T, T>
+            where T : ILogarithmicFunctions<T>
+        {
+            public static bool Vectorizable => (typeof(T) == typeof(double))
+                                            || (typeof(T) == typeof(float));
+
+            public static T Invoke(T x) => T.Log(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector128.Log(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector128.Log(x.AsSingle()).As<float, T>();
+                }
+#else
+                if (typeof(T) == typeof(double))
+                {
+                    return LogOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return LogOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+#endif
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector256.Log(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector256.Log(x.AsSingle()).As<float, T>();
+                }
+#else
+                if (typeof(T) == typeof(double))
+                {
+                    return LogOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return LogOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+#endif
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector512.Log(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector512.Log(x.AsSingle()).As<float, T>();
+                }
+#else
+                if (typeof(T) == typeof(double))
+                {
+                    return LogOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return LogOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+#endif
+            }
+        }
+
+        /// <summary>T.Log(x, y)</summary>
+        private readonly struct LogBaseOperator<T> : IBinaryOperator<T>
+            where T : ILogarithmicFunctions<T>
+        {
+            public static bool Vectorizable => LogOperator<T>.Vectorizable;
+            public static T Invoke(T x, T y) => T.Log(x, y);
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => LogOperator<T>.Invoke(x) / LogOperator<T>.Invoke(y);
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => LogOperator<T>.Invoke(x) / LogOperator<T>.Invoke(y);
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => LogOperator<T>.Invoke(x) / LogOperator<T>.Invoke(y);
+        }
+
+#if !NET9_0_OR_GREATER
+        /// <summary>double.Log(x)</summary>
+        private readonly struct LogOperatorDouble : IUnaryOperator<double, double>
+        {
+            // This code is based on `vrd2_log` from amd/aocl-libm-ose
+            // Copyright (C) 2018-2020 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Reduce x into the form:
+            //        x = (-1)^s*2^n*m
+            // s will be always zero, as log is defined for positive numbers
+            // n is an integer known as the exponent
+            // m is mantissa
+            //
+            // x is reduced such that the mantissa, m lies in [2/3,4/3]
+            //      x = 2^n*m where m is in [2/3,4/3]
+            //      log(x) = log(2^n*m)                 We have log(a*b) = log(a)+log(b)
+            //             = log(2^n) + log(m)          We have log(a^n) = n*log(a)
+            //             = n*log(2) + log(m)
+            //             = n*log(2) + log(1+(m-1))
+            //             = n*log(2) + log(1+f)        Where f = m-1
+            //             = n*log(2) + log1p(f)        f lies in [-1/3,+1/3]
+            //
+            // Thus we have :
+            // log(x) = n*log(2) + log1p(f)
+            // In the above, the first term n*log(2), n can be calculated by using right shift operator and the value of log(2)
+            // is known and is stored as a constant
+            // The second term log1p(F) is approximated by using a polynomial
+
+            private const ulong V_MIN = 0x00100000_00000000;    // SmallestNormal
+            private const ulong V_MAX = 0x7FF00000_00000000;    // +Infinity
+            private const ulong V_MSK = 0x000FFFFF_FFFFFFFF;    // (1 << 52) - 1
+            private const ulong V_OFF = 0x3FE55555_55555555;    // 2.0 / 3.0
+
+            private const double LN2_HEAD = 0.693359375;
+            private const double LN2_TAIL = -0.00021219444005469057;
+
+            private const double C02 = -0.499999999999999560;
+            private const double C03 = +0.333333333333414750;
+            private const double C04 = -0.250000000000297430;
+            private const double C05 = +0.199999999975985220;
+            private const double C06 = -0.166666666608919500;
+            private const double C07 = +0.142857145600277100;
+            private const double C08 = -0.125000005127831270;
+            private const double C09 = +0.111110952357159440;
+            private const double C10 = -0.099999750495501240;
+            private const double C11 = +0.090914349823462390;
+            private const double C12 = -0.083340600527551860;
+            private const double C13 = +0.076817603328311300;
+            private const double C14 = -0.071296718946287310;
+            private const double C15 = +0.067963465211535730;
+            private const double C16 = -0.063995035098960040;
+            private const double C17 = +0.049370587082412105;
+            private const double C18 = -0.045370170994891980;
+            private const double C19 = +0.088970636003577750;
+            private const double C20 = -0.086906174116908760;
+
+            public static bool Vectorizable => true;
+
+            public static double Invoke(double x) => double.Log(x);
+
+            public static Vector128<double> Invoke(Vector128<double> x)
+            {
+                Vector128<double> specialResult = x;
+
+                // x is zero, subnormal, infinity, or NaN
+                Vector128<ulong> specialMask = Vector128.GreaterThanOrEqual(x.AsUInt64() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN));
+
+                if (specialMask != Vector128<ulong>.Zero)
+                {
+                    Vector128<long> xBits = x.AsInt64();
+
+                    // (x < 0) ? float.NaN : x
+                    Vector128<double> lessThanZeroMask = Vector128.LessThan(xBits, Vector128<long>.Zero).AsDouble();
+
+                    specialResult = Vector128.ConditionalSelect(
+                        lessThanZeroMask,
+                        Vector128.Create(double.NaN),
+                        specialResult
+                    );
+
+                    // double.IsZero(x) ? double.NegativeInfinity : x
+                    Vector128<double> zeroMask = Vector128.Equals(xBits << 1, Vector128<long>.Zero).AsDouble();
+
+                    specialResult = Vector128.ConditionalSelect(
+                        zeroMask,
+                        Vector128.Create(double.NegativeInfinity),
+                        specialResult
+                    );
+
+                    // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x)
+                    Vector128<double> temp = zeroMask
+                                           | lessThanZeroMask
+                                           | Vector128.GreaterThanOrEqual(xBits, Vector128.Create(double.PositiveInfinity).AsInt64()).AsDouble();
+
+                    // subnormal
+                    Vector128<double> subnormalMask = Vector128.AndNot(specialMask.AsDouble(), temp);
+
+                    // multiply by 2^52, then normalize
+                    x = Vector128.ConditionalSelect(
+                        subnormalMask,
+                        ((x * 4503599627370496.0).AsUInt64() - Vector128.Create(52ul << 52)).AsDouble(),
+                        x
+                    );
+
+                    specialMask = temp.AsUInt64();
+                }
+
+                // Reduce the mantissa to [+2/3, +4/3]
+                Vector128<ulong> vx = x.AsUInt64() - Vector128.Create(V_OFF);
+                Vector128<double> n = Vector128.ConvertToDouble(vx.AsInt64() >> 52);
+                vx = (vx & Vector128.Create(V_MSK)) + Vector128.Create(V_OFF);
+
+                // Adjust the mantissa to [-1/3, +1/3]
+                Vector128<double> r = vx.AsDouble() - Vector128<double>.One;
+
+                Vector128<double> r02 = r * r;
+                Vector128<double> r04 = r02 * r02;
+                Vector128<double> r08 = r04 * r04;
+                Vector128<double> r16 = r08 * r08;
+
+                // Compute log(x + 1) using Polynomial approximation
+                //      C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
+
+                Vector128<double> poly = (((r04 * C20)
+                                        + ((((r * C19) + Vector128.Create(C18)) * r02)
+                                          + ((r * C17) + Vector128.Create(C16)))) * r16)
+                                     + (((((((r * C15) + Vector128.Create(C14)) * r02)
+                                          + ((r * C13) + Vector128.Create(C12))) * r04)
+                                        + ((((r * C11) + Vector128.Create(C10)) * r02)
+                                          + ((r * C09) + Vector128.Create(C08)))) * r08)
+                                       + (((((r * C07) + Vector128.Create(C06)) * r02)
+                                          + ((r * C05) + Vector128.Create(C04))) * r04)
+                                        + ((((r * C03) + Vector128.Create(C02)) * r02) + r);
+
+                return Vector128.ConditionalSelect(
+                    specialMask.AsDouble(),
+                    specialResult,
+                    (n * LN2_HEAD) + ((n * LN2_TAIL) + poly)
+                );
+            }
+
+            public static Vector256<double> Invoke(Vector256<double> x)
+            {
+                Vector256<double> specialResult = x;
+
+                // x is zero, subnormal, infinity, or NaN
+                Vector256<ulong> specialMask = Vector256.GreaterThanOrEqual(x.AsUInt64() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN));
+
+                if (specialMask != Vector256<ulong>.Zero)
+                {
+                    Vector256<long> xBits = x.AsInt64();
+
+                    // (x < 0) ? float.NaN : x
+                    Vector256<double> lessThanZeroMask = Vector256.LessThan(xBits, Vector256<long>.Zero).AsDouble();
+
+                    specialResult = Vector256.ConditionalSelect(
+                        lessThanZeroMask,
+                        Vector256.Create(double.NaN),
+                        specialResult
+                    );
+
+                    // double.IsZero(x) ? double.NegativeInfinity : x
+                    Vector256<double> zeroMask = Vector256.Equals(xBits << 1, Vector256<long>.Zero).AsDouble();
+
+                    specialResult = Vector256.ConditionalSelect(
+                        zeroMask,
+                        Vector256.Create(double.NegativeInfinity),
+                        specialResult
+                    );
+
+                    // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x)
+                    Vector256<double> temp = zeroMask
+                                           | lessThanZeroMask
+                                           | Vector256.GreaterThanOrEqual(xBits, Vector256.Create(double.PositiveInfinity).AsInt64()).AsDouble();
+
+                    // subnormal
+                    Vector256<double> subnormalMask = Vector256.AndNot(specialMask.AsDouble(), temp);
+
+                    // multiply by 2^52, then normalize
+                    x = Vector256.ConditionalSelect(
+                        subnormalMask,
+                        ((x * 4503599627370496.0).AsUInt64() - Vector256.Create(52ul << 52)).AsDouble(),
+                        x
+                    );
+
+                    specialMask = temp.AsUInt64();
+                }
+
+                // Reduce the mantissa to [+2/3, +4/3]
+                Vector256<ulong> vx = x.AsUInt64() - Vector256.Create(V_OFF);
+                Vector256<double> n = Vector256.ConvertToDouble(vx.AsInt64() >> 52);
+                vx = (vx & Vector256.Create(V_MSK)) + Vector256.Create(V_OFF);
+
+                // Adjust the mantissa to [-1/3, +1/3]
+                Vector256<double> r = vx.AsDouble() - Vector256<double>.One;
+
+                Vector256<double> r02 = r * r;
+                Vector256<double> r04 = r02 * r02;
+                Vector256<double> r08 = r04 * r04;
+                Vector256<double> r16 = r08 * r08;
+
+                // Compute log(x + 1) using Polynomial approximation
+                //      C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
+
+                Vector256<double> poly = (((r04 * C20)
+                                        + ((((r * C19) + Vector256.Create(C18)) * r02)
+                                          + ((r * C17) + Vector256.Create(C16)))) * r16)
+                                     + (((((((r * C15) + Vector256.Create(C14)) * r02)
+                                          + ((r * C13) + Vector256.Create(C12))) * r04)
+                                        + ((((r * C11) + Vector256.Create(C10)) * r02)
+                                          + ((r * C09) + Vector256.Create(C08)))) * r08)
+                                       + (((((r * C07) + Vector256.Create(C06)) * r02)
+                                          + ((r * C05) + Vector256.Create(C04))) * r04)
+                                        + ((((r * C03) + Vector256.Create(C02)) * r02) + r);
+
+                return Vector256.ConditionalSelect(
+                    specialMask.AsDouble(),
+                    specialResult,
+                    (n * LN2_HEAD) + ((n * LN2_TAIL) + poly)
+                );
+            }
+
+            public static Vector512<double> Invoke(Vector512<double> x)
+            {
+                Vector512<double> specialResult = x;
+
+                // x is zero, subnormal, infinity, or NaN
+                Vector512<ulong> specialMask = Vector512.GreaterThanOrEqual(x.AsUInt64() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN));
+
+                if (specialMask != Vector512<ulong>.Zero)
+                {
+                    Vector512<long> xBits = x.AsInt64();
+
+                    // (x < 0) ? float.NaN : x
+                    Vector512<double> lessThanZeroMask = Vector512.LessThan(xBits, Vector512<long>.Zero).AsDouble();
+
+                    specialResult = Vector512.ConditionalSelect(
+                        lessThanZeroMask,
+                        Vector512.Create(double.NaN),
+                        specialResult
+                    );
+
+                    // double.IsZero(x) ? double.NegativeInfinity : x
+                    Vector512<double> zeroMask = Vector512.Equals(xBits << 1, Vector512<long>.Zero).AsDouble();
+
+                    specialResult = Vector512.ConditionalSelect(
+                        zeroMask,
+                        Vector512.Create(double.NegativeInfinity),
+                        specialResult
+                    );
+
+                    // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x)
+                    Vector512<double> temp = zeroMask
+                                           | lessThanZeroMask
+                                           | Vector512.GreaterThanOrEqual(xBits, Vector512.Create(double.PositiveInfinity).AsInt64()).AsDouble();
+
+                    // subnormal
+                    Vector512<double> subnormalMask = Vector512.AndNot(specialMask.AsDouble(), temp);
+
+                    // multiply by 2^52, then normalize
+                    x = Vector512.ConditionalSelect(
+                        subnormalMask,
+                        ((x * 4503599627370496.0).AsUInt64() - Vector512.Create(52ul << 52)).AsDouble(),
+                        x
+                    );
+
+                    specialMask = temp.AsUInt64();
+                }
+
+                // Reduce the mantissa to [+2/3, +4/3]
+                Vector512<ulong> vx = x.AsUInt64() - Vector512.Create(V_OFF);
+                Vector512<double> n = Vector512.ConvertToDouble(vx.AsInt64() >> 52);
+                vx = (vx & Vector512.Create(V_MSK)) + Vector512.Create(V_OFF);
+
+                // Adjust the mantissa to [-1/3, +1/3]
+                Vector512<double> r = vx.AsDouble() - Vector512<double>.One;
+
+                Vector512<double> r02 = r * r;
+                Vector512<double> r04 = r02 * r02;
+                Vector512<double> r08 = r04 * r04;
+                Vector512<double> r16 = r08 * r08;
+
+                // Compute log(x + 1) using Polynomial approximation
+                //      C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
+
+                Vector512<double> poly = (((r04 * C20)
+                                        + ((((r * C19) + Vector512.Create(C18)) * r02)
+                                          + ((r * C17) + Vector512.Create(C16)))) * r16)
+                                     + (((((((r * C15) + Vector512.Create(C14)) * r02)
+                                          + ((r * C13) + Vector512.Create(C12))) * r04)
+                                        + ((((r * C11) + Vector512.Create(C10)) * r02)
+                                          + ((r * C09) + Vector512.Create(C08)))) * r08)
+                                       + (((((r * C07) + Vector512.Create(C06)) * r02)
+                                          + ((r * C05) + Vector512.Create(C04))) * r04)
+                                        + ((((r * C03) + Vector512.Create(C02)) * r02) + r);
+
+                return Vector512.ConditionalSelect(
+                    specialMask.AsDouble(),
+                    specialResult,
+                    (n * LN2_HEAD) + ((n * LN2_TAIL) + poly)
+                );
+            }
+        }
+
+        /// <summary>float.Log(x)</summary>
+        private readonly struct LogOperatorSingle : IUnaryOperator<float, float>
+        {
+            // This code is based on `vrs4_logf` from amd/aocl-libm-ose
+            // Copyright (C) 2018-2019 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Spec:
+            //   logf(x)
+            //          = logf(x)           if x ∈ F and x > 0
+            //          = x                 if x = qNaN
+            //          = 0                 if x = 1
+            //          = -inf              if x = (-0, 0}
+            //          = NaN               otherwise
+            //
+            // Assumptions/Expectations
+            //      - ULP is derived to be << 4 (always)
+            // - Some FPU Exceptions may not be available
+            //      - Performance is at least 3x
+            //
+            // Implementation Notes:
+            //  1. Range Reduction:
+            //      x = 2^n*(1+f)                                          .... (1)
+            //         where n is exponent and is an integer
+            //             (1+f) is mantissa ∈ [1,2). i.e., 1 ≤ 1+f < 2    .... (2)
+            //
+            //    From (1), taking log on both sides
+            //      log(x) = log(2^n * (1+f))
+            //             = log(2^n) + log(1+f)
+            //             = n*log(2) + log(1+f)                           .... (3)
+            //
+            //      let z = 1 + f
+            //             log(z) = log(k) + log(z) - log(k)
+            //             log(z) = log(kz) - log(k)
+            //
+            //    From (2), range of z is [1, 2)
+            //       by simply dividing range by 'k', z is in [1/k, 2/k)  .... (4)
+            //       Best choice of k is the one which gives equal and opposite values
+            //       at extrema        +-      -+
+            //              1          | 2      |
+            //             --- - 1 = - |--- - 1 |
+            //              k          | k      |                         .... (5)
+            //                         +-      -+
+            //
+            //       Solving for k, k = 3/2,
+            //    From (4), using 'k' value, range is therefore [-0.3333, 0.3333]
+            //
+            //  2. Polynomial Approximation:
+            //     More information refer to tools/sollya/vrs4_logf.sollya
+            //
+            //     7th Deg -   Error abs: 0x1.04c4ac98p-22   rel: 0x1.2216e6f8p-19
+            //     6th Deg -   Error abs: 0x1.179e97d8p-19   rel: 0x1.db676c1p-17
+
+            private const uint V_MIN = 0x00800000;
+            private const uint V_MAX = 0x7F800000;
+            private const uint V_MASK = 0x007FFFFF;
+            private const uint V_OFF = 0x3F2AAAAB;
+
+            private const float V_LN2 = 0.6931472f;
+
+            private const float C0 = 0.0f;
+            private const float C1 = 1.0f;
+            private const float C2 = -0.5000001f;
+            private const float C3 = 0.33332965f;
+            private const float C4 = -0.24999046f;
+            private const float C5 = 0.20018855f;
+            private const float C6 = -0.16700386f;
+            private const float C7 = 0.13902695f;
+            private const float C8 = -0.1197452f;
+            private const float C9 = 0.14401625f;
+            private const float C10 = -0.13657966f;
+
+            public static bool Vectorizable => true;
+
+            public static float Invoke(float x) => float.Log(x);
+
+            public static Vector128<float> Invoke(Vector128<float> x)
+            {
+                Vector128<float> specialResult = x;
+
+                // x is subnormal or infinity or NaN
+                Vector128<uint> specialMask = Vector128.GreaterThanOrEqual(x.AsUInt32() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN));
+
+                if (specialMask != Vector128<uint>.Zero)
+                {
+                    // float.IsZero(x) ? float.NegativeInfinity : x
+                    Vector128<float> zeroMask = Vector128.Equals(x, Vector128<float>.Zero);
+
+                    specialResult = Vector128.ConditionalSelect(
+                        zeroMask,
+                        Vector128.Create(float.NegativeInfinity),
+                        specialResult
+                    );
+
+                    // (x < 0) ? float.NaN : x
+                    Vector128<float> lessThanZeroMask = Vector128.LessThan(x, Vector128<float>.Zero);
+
+                    specialResult = Vector128.ConditionalSelect(
+                        lessThanZeroMask,
+                        Vector128.Create(float.NaN),
+                        specialResult
+                    );
+
+                    // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x)
+                    Vector128<float> temp = zeroMask
+                                          | lessThanZeroMask
+                                          | ~Vector128.Equals(x, x)
+                                          | Vector128.Equals(x, Vector128.Create(float.PositiveInfinity));
+
+                    // subnormal
+                    Vector128<float> subnormalMask = Vector128.AndNot(specialMask.AsSingle(), temp);
+
+                    x = Vector128.ConditionalSelect(
+                        subnormalMask,
+                        ((x * 8388608.0f).AsUInt32() - Vector128.Create(23u << 23)).AsSingle(),
+                        x
+                    );
+
+                    specialMask = temp.AsUInt32();
+                }
+
+                Vector128<uint> vx = x.AsUInt32() - Vector128.Create(V_OFF);
+                Vector128<float> n = Vector128.ConvertToSingle(Vector128.ShiftRightArithmetic(vx.AsInt32(), 23));
+
+                vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF);
+
+                Vector128<float> r = vx.AsSingle() - Vector128<float>.One;
+
+                Vector128<float> r2 = r * r;
+                Vector128<float> r4 = r2 * r2;
+                Vector128<float> r8 = r4 * r4;
+
+                Vector128<float> q = (Vector128.Create(C10) * r2 + (Vector128.Create(C9) * r + Vector128.Create(C8)))
+                                                          * r8 + (((Vector128.Create(C7) * r + Vector128.Create(C6))
+                                                            * r2 + (Vector128.Create(C5) * r + Vector128.Create(C4)))
+                                                           * r4 + ((Vector128.Create(C3) * r + Vector128.Create(C2))
+                                                            * r2 + (Vector128.Create(C1) * r + Vector128.Create(C0))));
+
+                return Vector128.ConditionalSelect(
+                    specialMask.AsSingle(),
+                    specialResult,
+                    n * Vector128.Create(V_LN2) + q
+                );
+            }
+
+            public static Vector256<float> Invoke(Vector256<float> x)
+            {
+                Vector256<float> specialResult = x;
+
+                // x is subnormal or infinity or NaN
+                Vector256<uint> specialMask = Vector256.GreaterThanOrEqual(x.AsUInt32() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN));
+
+                if (specialMask != Vector256<uint>.Zero)
+                {
+                    // float.IsZero(x) ? float.NegativeInfinity : x
+                    Vector256<float> zeroMask = Vector256.Equals(x, Vector256<float>.Zero);
+
+                    specialResult = Vector256.ConditionalSelect(
+                        zeroMask,
+                        Vector256.Create(float.NegativeInfinity),
+                        specialResult
+                    );
+
+                    // (x < 0) ? float.NaN : x
+                    Vector256<float> lessThanZeroMask = Vector256.LessThan(x, Vector256<float>.Zero);
+
+                    specialResult = Vector256.ConditionalSelect(
+                        lessThanZeroMask,
+                        Vector256.Create(float.NaN),
+                        specialResult
+                    );
+
+                    // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x)
+                    Vector256<float> temp = zeroMask
+                                          | lessThanZeroMask
+                                          | ~Vector256.Equals(x, x)
+                                          | Vector256.Equals(x, Vector256.Create(float.PositiveInfinity));
+
+                    // subnormal
+                    Vector256<float> subnormalMask = Vector256.AndNot(specialMask.AsSingle(), temp);
+
+                    x = Vector256.ConditionalSelect(
+                        subnormalMask,
+                        ((x * 8388608.0f).AsUInt32() - Vector256.Create(23u << 23)).AsSingle(),
+                        x
+                    );
+
+                    specialMask = temp.AsUInt32();
+                }
+
+                Vector256<uint> vx = x.AsUInt32() - Vector256.Create(V_OFF);
+                Vector256<float> n = Vector256.ConvertToSingle(Vector256.ShiftRightArithmetic(vx.AsInt32(), 23));
+
+                vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF);
+
+                Vector256<float> r = vx.AsSingle() - Vector256<float>.One;
+
+                Vector256<float> r2 = r * r;
+                Vector256<float> r4 = r2 * r2;
+                Vector256<float> r8 = r4 * r4;
+
+                Vector256<float> q = (Vector256.Create(C10) * r2 + (Vector256.Create(C9) * r + Vector256.Create(C8)))
+                                                          * r8 + (((Vector256.Create(C7) * r + Vector256.Create(C6))
+                                                            * r2 + (Vector256.Create(C5) * r + Vector256.Create(C4)))
+                                                           * r4 + ((Vector256.Create(C3) * r + Vector256.Create(C2))
+                                                            * r2 + (Vector256.Create(C1) * r + Vector256.Create(C0))));
+
+                return Vector256.ConditionalSelect(
+                    specialMask.AsSingle(),
+                    specialResult,
+                    n * Vector256.Create(V_LN2) + q
+                );
+            }
+
+            public static Vector512<float> Invoke(Vector512<float> x)
+            {
+                Vector512<float> specialResult = x;
+
+                // x is subnormal or infinity or NaN
+                Vector512<uint> specialMask = Vector512.GreaterThanOrEqual(x.AsUInt32() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN));
+
+                if (specialMask != Vector512<uint>.Zero)
+                {
+                    // float.IsZero(x) ? float.NegativeInfinity : x
+                    Vector512<float> zeroMask = Vector512.Equals(x, Vector512<float>.Zero);
+
+                    specialResult = Vector512.ConditionalSelect(
+                        zeroMask,
+                        Vector512.Create(float.NegativeInfinity),
+                        specialResult
+                    );
+
+                    // (x < 0) ? float.NaN : x
+                    Vector512<float> lessThanZeroMask = Vector512.LessThan(x, Vector512<float>.Zero);
+
+                    specialResult = Vector512.ConditionalSelect(
+                        lessThanZeroMask,
+                        Vector512.Create(float.NaN),
+                        specialResult
+                    );
+
+                    // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x)
+                    Vector512<float> temp = zeroMask
+                                          | lessThanZeroMask
+                                          | ~Vector512.Equals(x, x)
+                                          | Vector512.Equals(x, Vector512.Create(float.PositiveInfinity));
+
+                    // subnormal
+                    Vector512<float> subnormalMask = Vector512.AndNot(specialMask.AsSingle(), temp);
+
+                    x = Vector512.ConditionalSelect(
+                        subnormalMask,
+                        ((x * 8388608.0f).AsUInt32() - Vector512.Create(23u << 23)).AsSingle(),
+                        x
+                    );
+
+                    specialMask = temp.AsUInt32();
+                }
+
+                Vector512<uint> vx = x.AsUInt32() - Vector512.Create(V_OFF);
+                Vector512<float> n = Vector512.ConvertToSingle(Vector512.ShiftRightArithmetic(vx.AsInt32(), 23));
+
+                vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF);
+
+                Vector512<float> r = vx.AsSingle() - Vector512<float>.One;
+
+                Vector512<float> r2 = r * r;
+                Vector512<float> r4 = r2 * r2;
+                Vector512<float> r8 = r4 * r4;
+
+                Vector512<float> q = (Vector512.Create(C10) * r2 + (Vector512.Create(C9) * r + Vector512.Create(C8)))
+                                                          * r8 + (((Vector512.Create(C7) * r + Vector512.Create(C6))
+                                                            * r2 + (Vector512.Create(C5) * r + Vector512.Create(C4)))
+                                                           * r4 + ((Vector512.Create(C3) * r + Vector512.Create(C2))
+                                                            * r2 + (Vector512.Create(C1) * r + Vector512.Create(C0))));
+
+                return Vector512.ConditionalSelect(
+                    specialMask.AsSingle(),
+                    specialResult,
+                    n * Vector512.Create(V_LN2) + q
+                );
+            }
+        }
+#endif
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log10.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log10.cs
new file mode 100644
index 000000000000..7f82a41e473d
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log10.cs
@@ -0,0 +1,46 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise base 10 logarithm of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log10(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>.
+        /// If a value is negative or equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is set to NaN.
+        /// If a value is positive infinity, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
+        /// Otherwise, if a value is positive, its base 10 logarithm is stored into the corresponding destination location.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Log10<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ILogarithmicFunctions<T> =>
+            InvokeSpanIntoSpan<T, Log10Operator<T>>(x, destination);
+
+        /// <summary>T.Log10(x)</summary>
+        private readonly struct Log10Operator<T> : IUnaryOperator<T, T>
+            where T : ILogarithmicFunctions<T>
+        {
+            private const double NaturalLog10 = 2.302585092994046;
+            public static bool Vectorizable => LogOperator<T>.Vectorizable;
+            public static T Invoke(T x) => T.Log10(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => LogOperator<T>.Invoke(x) / Vector128.Create(T.CreateTruncating(NaturalLog10));
+            public static Vector256<T> Invoke(Vector256<T> x) => LogOperator<T>.Invoke(x) / Vector256.Create(T.CreateTruncating(NaturalLog10));
+            public static Vector512<T> Invoke(Vector512<T> x) => LogOperator<T>.Invoke(x) / Vector512.Create(T.CreateTruncating(NaturalLog10));
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log10P1.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log10P1.cs
new file mode 100644
index 000000000000..c1a55018312a
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log10P1.cs
@@ -0,0 +1,45 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise base 10 logarithm of numbers in the specified tensor plus 1.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log10P1(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>.
+        /// If a value is negative or equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is set to NaN.
+        /// If a value is positive infinity, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
+        /// Otherwise, if a value is positive, its base 10 logarithm plus 1 is stored into the corresponding destination location.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Log10P1<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ILogarithmicFunctions<T> =>
+            InvokeSpanIntoSpan<T, Log10P1Operator<T>>(x, destination);
+
+        /// <summary>T.Log10P1(x)</summary>
+        private readonly struct Log10P1Operator<T> : IUnaryOperator<T, T>
+            where T : ILogarithmicFunctions<T>
+        {
+            public static bool Vectorizable => Log10Operator<T>.Vectorizable;
+            public static T Invoke(T x) => T.Log10P1(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => Log10Operator<T>.Invoke(x + Vector128<T>.One);
+            public static Vector256<T> Invoke(Vector256<T> x) => Log10Operator<T>.Invoke(x + Vector256<T>.One);
+            public static Vector512<T> Invoke(Vector512<T> x) => Log10Operator<T>.Invoke(x + Vector512<T>.One);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log2.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log2.cs
new file mode 100644
index 000000000000..b8679ea9faee
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log2.cs
@@ -0,0 +1,707 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise base 2 logarithm of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log2(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>.
+        /// If a value is negative or equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is set to NaN.
+        /// If a value is positive infinity, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
+        /// Otherwise, if a value is positive, its base 2 logarithm is stored into the corresponding destination location.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Log2<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ILogarithmicFunctions<T> =>
+            InvokeSpanIntoSpan<T, Log2Operator<T>>(x, destination);
+
+        /// <summary>T.Log2(x)</summary>
+        internal readonly struct Log2Operator<T> : IUnaryOperator<T, T>
+            where T : ILogarithmicFunctions<T>
+        {
+            public static bool Vectorizable => (typeof(T) == typeof(double))
+                                            || (typeof(T) == typeof(float));
+
+            public static T Invoke(T x) => T.Log2(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector128.Log2(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector128.Log2(x.AsSingle()).As<float, T>();
+                }
+#else
+                if (typeof(T) == typeof(double))
+                {
+                    return Log2OperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Log2OperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+#endif
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector256.Log2(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector256.Log2(x.AsSingle()).As<float, T>();
+                }
+#else
+                if (typeof(T) == typeof(double))
+                {
+                    return Log2OperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Log2OperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+#endif
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector512.Log2(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector512.Log2(x.AsSingle()).As<float, T>();
+                }
+#else
+                if (typeof(T) == typeof(double))
+                {
+                    return Log2OperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Log2OperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+#endif
+            }
+        }
+
+#if !NET9_0_OR_GREATER
+        /// <summary>double.Log2(x)</summary>
+        private readonly struct Log2OperatorDouble : IUnaryOperator<double, double>
+        {
+            // This code is based on `vrd2_log2` from amd/aocl-libm-ose
+            // Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Reduce x into the form:
+            //        x = (-1)^s*2^n*m
+            // s will be always zero, as log is defined for positive numbers
+            // n is an integer known as the exponent
+            // m is mantissa
+            //
+            // x is reduced such that the mantissa, m lies in [2/3,4/3]
+            //      x = 2^n*m where m is in [2/3,4/3]
+            //      log2(x) = log2(2^n*m)              We have log(a*b) = log(a)+log(b)
+            //             = log2(2^n) + log2(m)       We have log(a^n) = n*log(a)
+            //             = n + log2(m)
+            //             = n + log2(1+(m-1))
+            //             = n + ln(1+f) * log2(e)          Where f = m-1
+            //             = n + log1p(f) * log2(e)         f lies in [-1/3,+1/3]
+            //
+            // Thus we have :
+            // log(x) = n + log1p(f) * log2(e)
+            // The second term log1p(F) is approximated by using a polynomial
+
+            private const ulong V_MIN = 0x00100000_00000000;    // SmallestNormal
+            private const ulong V_MAX = 0x7FF00000_00000000;    // +Infinity
+            private const ulong V_MSK = 0x000FFFFF_FFFFFFFF;    // (1 << 52) - 1
+            private const ulong V_OFF = 0x3FE55555_55555555;    // 2.0 / 3.0
+
+            private const double LN2_HEAD = 1.44269180297851562500E+00;
+            private const double LN2_TAIL = 3.23791044778235969970E-06;
+
+            private const double C02 = -0.499999999999999560;
+            private const double C03 = +0.333333333333414750;
+            private const double C04 = -0.250000000000297430;
+            private const double C05 = +0.199999999975985220;
+            private const double C06 = -0.166666666608919500;
+            private const double C07 = +0.142857145600277100;
+            private const double C08 = -0.125000005127831270;
+            private const double C09 = +0.111110952357159440;
+            private const double C10 = -0.099999750495501240;
+            private const double C11 = +0.090914349823462390;
+            private const double C12 = -0.083340600527551860;
+            private const double C13 = +0.076817603328311300;
+            private const double C14 = -0.071296718946287310;
+            private const double C15 = +0.067963465211535730;
+            private const double C16 = -0.063995035098960040;
+            private const double C17 = +0.049370587082412105;
+            private const double C18 = -0.045370170994891980;
+            private const double C19 = +0.088970636003577750;
+            private const double C20 = -0.086906174116908760;
+
+            public static bool Vectorizable => true;
+
+            public static double Invoke(double x) => double.Log2(x);
+
+            public static Vector128<double> Invoke(Vector128<double> x)
+            {
+                Vector128<double> specialResult = x;
+
+                // x is zero, subnormal, infinity, or NaN
+                Vector128<ulong> specialMask = Vector128.GreaterThanOrEqual(x.AsUInt64() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN));
+
+                if (specialMask != Vector128<ulong>.Zero)
+                {
+                    Vector128<long> xBits = x.AsInt64();
+
+                    // (x < 0) ? float.NaN : x
+                    Vector128<double> lessThanZeroMask = Vector128.LessThan(xBits, Vector128<long>.Zero).AsDouble();
+
+                    specialResult = Vector128.ConditionalSelect(
+                        lessThanZeroMask,
+                        Vector128.Create(double.NaN),
+                        specialResult
+                    );
+
+                    // double.IsZero(x) ? double.NegativeInfinity : x
+                    Vector128<double> zeroMask = Vector128.Equals(xBits << 1, Vector128<long>.Zero).AsDouble();
+
+                    specialResult = Vector128.ConditionalSelect(
+                        zeroMask,
+                        Vector128.Create(double.NegativeInfinity),
+                        specialResult
+                    );
+
+                    // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x)
+                    Vector128<double> temp = zeroMask
+                                           | lessThanZeroMask
+                                           | Vector128.GreaterThanOrEqual(xBits, Vector128.Create(double.PositiveInfinity).AsInt64()).AsDouble();
+
+                    // subnormal
+                    Vector128<double> subnormalMask = Vector128.AndNot(specialMask.AsDouble(), temp);
+
+                    // multiply by 2^52, then normalize
+                    x = Vector128.ConditionalSelect(
+                        subnormalMask,
+                        ((x * 4503599627370496.0).AsUInt64() - Vector128.Create(52ul << 52)).AsDouble(),
+                        x
+                    );
+
+                    specialMask = temp.AsUInt64();
+                }
+
+                // Reduce the mantissa to [+2/3, +4/3]
+                Vector128<ulong> vx = x.AsUInt64() - Vector128.Create(V_OFF);
+                Vector128<double> n = Vector128.ConvertToDouble(vx.AsInt64() >> 52);
+                vx = (vx & Vector128.Create(V_MSK)) + Vector128.Create(V_OFF);
+
+                // Adjust the mantissa to [-1/3, +1/3]
+                Vector128<double> r = vx.AsDouble() - Vector128<double>.One;
+
+                Vector128<double> r02 = r * r;
+                Vector128<double> r04 = r02 * r02;
+                Vector128<double> r08 = r04 * r04;
+                Vector128<double> r16 = r08 * r08;
+
+                // Compute log(x + 1) using polynomial approximation
+                //      C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
+
+                Vector128<double> poly = (((r04 * C20)
+                                        + ((((r * C19) + Vector128.Create(C18)) * r02)
+                                          + ((r * C17) + Vector128.Create(C16)))) * r16)
+                                     + (((((((r * C15) + Vector128.Create(C14)) * r02)
+                                          + ((r * C13) + Vector128.Create(C12))) * r04)
+                                        + ((((r * C11) + Vector128.Create(C10)) * r02)
+                                          + ((r * C09) + Vector128.Create(C08)))) * r08)
+                                       + (((((r * C07) + Vector128.Create(C06)) * r02)
+                                          + ((r * C05) + Vector128.Create(C04))) * r04)
+                                        + ((((r * C03) + Vector128.Create(C02)) * r02) + r);
+
+                return Vector128.ConditionalSelect(
+                    specialMask.AsDouble(),
+                    specialResult,
+                    (poly * LN2_HEAD) + ((poly * LN2_TAIL) + n)
+                );
+            }
+
+            public static Vector256<double> Invoke(Vector256<double> x)
+            {
+                Vector256<double> specialResult = x;
+
+                // x is zero, subnormal, infinity, or NaN
+                Vector256<ulong> specialMask = Vector256.GreaterThanOrEqual(x.AsUInt64() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN));
+
+                if (specialMask != Vector256<ulong>.Zero)
+                {
+                    Vector256<long> xBits = x.AsInt64();
+
+                    // (x < 0) ? float.NaN : x
+                    Vector256<double> lessThanZeroMask = Vector256.LessThan(xBits, Vector256<long>.Zero).AsDouble();
+
+                    specialResult = Vector256.ConditionalSelect(
+                        lessThanZeroMask,
+                        Vector256.Create(double.NaN),
+                        specialResult
+                    );
+
+                    // double.IsZero(x) ? double.NegativeInfinity : x
+                    Vector256<double> zeroMask = Vector256.Equals(xBits << 1, Vector256<long>.Zero).AsDouble();
+
+                    specialResult = Vector256.ConditionalSelect(
+                        zeroMask,
+                        Vector256.Create(double.NegativeInfinity),
+                        specialResult
+                    );
+
+                    // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x)
+                    Vector256<double> temp = zeroMask
+                                           | lessThanZeroMask
+                                           | Vector256.GreaterThanOrEqual(xBits, Vector256.Create(double.PositiveInfinity).AsInt64()).AsDouble();
+
+                    // subnormal
+                    Vector256<double> subnormalMask = Vector256.AndNot(specialMask.AsDouble(), temp);
+
+                    // multiply by 2^52, then normalize
+                    x = Vector256.ConditionalSelect(
+                        subnormalMask,
+                        ((x * 4503599627370496.0).AsUInt64() - Vector256.Create(52ul << 52)).AsDouble(),
+                        x
+                    );
+
+                    specialMask = temp.AsUInt64();
+                }
+
+                // Reduce the mantissa to [+2/3, +4/3]
+                Vector256<ulong> vx = x.AsUInt64() - Vector256.Create(V_OFF);
+                Vector256<double> n = Vector256.ConvertToDouble(vx.AsInt64() >> 52);
+                vx = (vx & Vector256.Create(V_MSK)) + Vector256.Create(V_OFF);
+
+                // Adjust the mantissa to [-1/3, +1/3]
+                Vector256<double> r = vx.AsDouble() - Vector256<double>.One;
+
+                Vector256<double> r02 = r * r;
+                Vector256<double> r04 = r02 * r02;
+                Vector256<double> r08 = r04 * r04;
+                Vector256<double> r16 = r08 * r08;
+
+                // Compute log(x + 1) using polynomial approximation
+                //      C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
+
+                Vector256<double> poly = (((r04 * C20)
+                                        + ((((r * C19) + Vector256.Create(C18)) * r02)
+                                          + ((r * C17) + Vector256.Create(C16)))) * r16)
+                                     + (((((((r * C15) + Vector256.Create(C14)) * r02)
+                                          + ((r * C13) + Vector256.Create(C12))) * r04)
+                                        + ((((r * C11) + Vector256.Create(C10)) * r02)
+                                          + ((r * C09) + Vector256.Create(C08)))) * r08)
+                                       + (((((r * C07) + Vector256.Create(C06)) * r02)
+                                          + ((r * C05) + Vector256.Create(C04))) * r04)
+                                        + ((((r * C03) + Vector256.Create(C02)) * r02) + r);
+
+                return Vector256.ConditionalSelect(
+                    specialMask.AsDouble(),
+                    specialResult,
+                    (poly * LN2_HEAD) + ((poly * LN2_TAIL) + n)
+                );
+            }
+
+            public static Vector512<double> Invoke(Vector512<double> x)
+            {
+                Vector512<double> specialResult = x;
+
+                // x is zero, subnormal, infinity, or NaN
+                Vector512<ulong> specialMask = Vector512.GreaterThanOrEqual(x.AsUInt64() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN));
+
+                if (specialMask != Vector512<ulong>.Zero)
+                {
+                    Vector512<long> xBits = x.AsInt64();
+
+                    // (x < 0) ? float.NaN : x
+                    Vector512<double> lessThanZeroMask = Vector512.LessThan(xBits, Vector512<long>.Zero).AsDouble();
+
+                    specialResult = Vector512.ConditionalSelect(
+                        lessThanZeroMask,
+                        Vector512.Create(double.NaN),
+                        specialResult
+                    );
+
+                    // double.IsZero(x) ? double.NegativeInfinity : x
+                    Vector512<double> zeroMask = Vector512.Equals(xBits << 1, Vector512<long>.Zero).AsDouble();
+
+                    specialResult = Vector512.ConditionalSelect(
+                        zeroMask,
+                        Vector512.Create(double.NegativeInfinity),
+                        specialResult
+                    );
+
+                    // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x)
+                    Vector512<double> temp = zeroMask
+                                           | lessThanZeroMask
+                                           | Vector512.GreaterThanOrEqual(xBits, Vector512.Create(double.PositiveInfinity).AsInt64()).AsDouble();
+
+                    // subnormal
+                    Vector512<double> subnormalMask = Vector512.AndNot(specialMask.AsDouble(), temp);
+
+                    // multiply by 2^52, then normalize
+                    x = Vector512.ConditionalSelect(
+                        subnormalMask,
+                        ((x * 4503599627370496.0).AsUInt64() - Vector512.Create(52ul << 52)).AsDouble(),
+                        x
+                    );
+
+                    specialMask = temp.AsUInt64();
+                }
+
+                // Reduce the mantissa to [+2/3, +4/3]
+                Vector512<ulong> vx = x.AsUInt64() - Vector512.Create(V_OFF);
+                Vector512<double> n = Vector512.ConvertToDouble(vx.AsInt64() >> 52);
+                vx = (vx & Vector512.Create(V_MSK)) + Vector512.Create(V_OFF);
+
+                // Adjust the mantissa to [-1/3, +1/3]
+                Vector512<double> r = vx.AsDouble() - Vector512<double>.One;
+
+                Vector512<double> r02 = r * r;
+                Vector512<double> r04 = r02 * r02;
+                Vector512<double> r08 = r04 * r04;
+                Vector512<double> r16 = r08 * r08;
+
+                // Compute log(x + 1) using polynomial approximation
+                //      C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
+
+                Vector512<double> poly = (((r04 * C20)
+                                        + ((((r * C19) + Vector512.Create(C18)) * r02)
+                                          + ((r * C17) + Vector512.Create(C16)))) * r16)
+                                     + (((((((r * C15) + Vector512.Create(C14)) * r02)
+                                          + ((r * C13) + Vector512.Create(C12))) * r04)
+                                        + ((((r * C11) + Vector512.Create(C10)) * r02)
+                                          + ((r * C09) + Vector512.Create(C08)))) * r08)
+                                       + (((((r * C07) + Vector512.Create(C06)) * r02)
+                                          + ((r * C05) + Vector512.Create(C04))) * r04)
+                                        + ((((r * C03) + Vector512.Create(C02)) * r02) + r);
+
+                return Vector512.ConditionalSelect(
+                    specialMask.AsDouble(),
+                    specialResult,
+                    (poly * LN2_HEAD) + ((poly * LN2_TAIL) + n)
+                );
+            }
+        }
+
+        /// <summary>float.Log2(x)</summary>
+        private readonly struct Log2OperatorSingle : IUnaryOperator<float, float>
+        {
+            // This code is based on `vrs4_log2f` from amd/aocl-libm-ose
+            // Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Spec:
+            //   log2f(x)
+            //          = log2f(x)          if x ∈ F and x > 0
+            //          = x                 if x = qNaN
+            //          = 0                 if x = 1
+            //          = -inf              if x = (-0, 0}
+            //          = NaN               otherwise
+            //
+            // Assumptions/Expectations
+            //      - Maximum ULP is observed to be at 4
+            //      - Some FPU Exceptions may not be available
+            //      - Performance is at least 3x
+            //
+            // Implementation Notes:
+            //  1. Range Reduction:
+            //      x = 2^n*(1+f)                                          .... (1)
+            //         where n is exponent and is an integer
+            //             (1+f) is mantissa ∈ [1,2). i.e., 1 ≤ 1+f < 2    .... (2)
+            //
+            //    From (1), taking log on both sides
+            //      log2(x) = log2(2^n * (1+f))
+            //             = n + log2(1+f)                           .... (3)
+            //
+            //      let z = 1 + f
+            //             log2(z) = log2(k) + log2(z) - log2(k)
+            //             log2(z) = log2(kz) - log2(k)
+            //
+            //    From (2), range of z is [1, 2)
+            //       by simply dividing range by 'k', z is in [1/k, 2/k)  .... (4)
+            //       Best choice of k is the one which gives equal and opposite values
+            //       at extrema        +-      -+
+            //              1          | 2      |
+            //             --- - 1 = - |--- - 1 |
+            //              k          | k      |                         .... (5)
+            //                         +-      -+
+            //
+            //       Solving for k, k = 3/2,
+            //    From (4), using 'k' value, range is therefore [-0.3333, 0.3333]
+            //
+            //  2. Polynomial Approximation:
+            //     More information refer to tools/sollya/vrs4_logf.sollya
+            //
+            //     7th Deg -   Error abs: 0x1.04c4ac98p-22   rel: 0x1.2216e6f8p-19
+
+            private const uint V_MIN = 0x00800000;
+            private const uint V_MAX = 0x7F800000;
+            private const uint V_MASK = 0x007FFFFF;
+            private const uint V_OFF = 0x3F2AAAAB;
+
+            private const float C0 = 0.0f;
+            private const float C1 = 1.4426951f;
+            private const float C2 = -0.72134554f;
+            private const float C3 = 0.48089063f;
+            private const float C4 = -0.36084408f;
+            private const float C5 = 0.2888971f;
+            private const float C6 = -0.23594281f;
+            private const float C7 = 0.19948183f;
+            private const float C8 = -0.22616665f;
+            private const float C9 = 0.21228963f;
+
+            public static bool Vectorizable => true;
+
+            public static float Invoke(float x) => float.Log2(x);
+
+            public static Vector128<float> Invoke(Vector128<float> x)
+            {
+                Vector128<float> specialResult = x;
+
+                // x is subnormal or infinity or NaN
+                Vector128<uint> specialMask = Vector128.GreaterThanOrEqual(x.AsUInt32() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN));
+
+                if (specialMask != Vector128<uint>.Zero)
+                {
+                    // float.IsZero(x) ? float.NegativeInfinity : x
+                    Vector128<float> zeroMask = Vector128.Equals(x, Vector128<float>.Zero);
+
+                    specialResult = Vector128.ConditionalSelect(
+                        zeroMask,
+                        Vector128.Create(float.NegativeInfinity),
+                        specialResult
+                    );
+
+                    // (x < 0) ? float.NaN : x
+                    Vector128<float> lessThanZeroMask = Vector128.LessThan(x, Vector128<float>.Zero);
+
+                    specialResult = Vector128.ConditionalSelect(
+                        lessThanZeroMask,
+                        Vector128.Create(float.NaN),
+                        specialResult
+                    );
+
+                    // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x)
+                    Vector128<float> temp = zeroMask
+                                          | lessThanZeroMask
+                                          | ~Vector128.Equals(x, x)
+                                          | Vector128.Equals(x, Vector128.Create(float.PositiveInfinity));
+
+                    // subnormal
+                    Vector128<float> subnormalMask = Vector128.AndNot(specialMask.AsSingle(), temp);
+
+                    x = Vector128.ConditionalSelect(
+                        subnormalMask,
+                        ((x * 8388608.0f).AsUInt32() - Vector128.Create(23u << 23)).AsSingle(),
+                        x
+                    );
+
+                    specialMask = temp.AsUInt32();
+                }
+
+                Vector128<uint> vx = x.AsUInt32() - Vector128.Create(V_OFF);
+                Vector128<float> n = Vector128.ConvertToSingle(Vector128.ShiftRightArithmetic(vx.AsInt32(), 23));
+
+                vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF);
+
+                Vector128<float> r = vx.AsSingle() - Vector128<float>.One;
+
+                Vector128<float> r2 = r * r;
+                Vector128<float> r4 = r2 * r2;
+                Vector128<float> r8 = r4 * r4;
+
+                Vector128<float> poly = (Vector128.Create(C9) * r + Vector128.Create(C8)) * r8
+                                    + (((Vector128.Create(C7) * r + Vector128.Create(C6)) * r2
+                                      + (Vector128.Create(C5) * r + Vector128.Create(C4))) * r4
+                                     + ((Vector128.Create(C3) * r + Vector128.Create(C2)) * r2
+                                      + (Vector128.Create(C1) * r + Vector128.Create(C0))));
+
+                return Vector128.ConditionalSelect(
+                    specialMask.AsSingle(),
+                    specialResult,
+                    n + poly
+                );
+            }
+
+            public static Vector256<float> Invoke(Vector256<float> x)
+            {
+                Vector256<float> specialResult = x;
+
+                // x is subnormal or infinity or NaN
+                Vector256<uint> specialMask = Vector256.GreaterThanOrEqual(x.AsUInt32() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN));
+
+                if (specialMask != Vector256<uint>.Zero)
+                {
+                    // float.IsZero(x) ? float.NegativeInfinity : x
+                    Vector256<float> zeroMask = Vector256.Equals(x, Vector256<float>.Zero);
+
+                    specialResult = Vector256.ConditionalSelect(
+                        zeroMask,
+                        Vector256.Create(float.NegativeInfinity),
+                        specialResult
+                    );
+
+                    // (x < 0) ? float.NaN : x
+                    Vector256<float> lessThanZeroMask = Vector256.LessThan(x, Vector256<float>.Zero);
+
+                    specialResult = Vector256.ConditionalSelect(
+                        lessThanZeroMask,
+                        Vector256.Create(float.NaN),
+                        specialResult
+                    );
+
+                    // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x)
+                    Vector256<float> temp = zeroMask
+                                          | lessThanZeroMask
+                                          | ~Vector256.Equals(x, x)
+                                          | Vector256.Equals(x, Vector256.Create(float.PositiveInfinity));
+
+                    // subnormal
+                    Vector256<float> subnormalMask = Vector256.AndNot(specialMask.AsSingle(), temp);
+
+                    x = Vector256.ConditionalSelect(
+                        subnormalMask,
+                        ((x * 8388608.0f).AsUInt32() - Vector256.Create(23u << 23)).AsSingle(),
+                        x
+                    );
+
+                    specialMask = temp.AsUInt32();
+                }
+
+                Vector256<uint> vx = x.AsUInt32() - Vector256.Create(V_OFF);
+                Vector256<float> n = Vector256.ConvertToSingle(Vector256.ShiftRightArithmetic(vx.AsInt32(), 23));
+
+                vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF);
+
+                Vector256<float> r = vx.AsSingle() - Vector256<float>.One;
+
+                Vector256<float> r2 = r * r;
+                Vector256<float> r4 = r2 * r2;
+                Vector256<float> r8 = r4 * r4;
+
+                Vector256<float> poly = (Vector256.Create(C9) * r + Vector256.Create(C8)) * r8
+                                    + (((Vector256.Create(C7) * r + Vector256.Create(C6)) * r2
+                                      + (Vector256.Create(C5) * r + Vector256.Create(C4))) * r4
+                                     + ((Vector256.Create(C3) * r + Vector256.Create(C2)) * r2
+                                      + (Vector256.Create(C1) * r + Vector256.Create(C0))));
+
+                return Vector256.ConditionalSelect(
+                    specialMask.AsSingle(),
+                    specialResult,
+                    n + poly
+                );
+            }
+
+            public static Vector512<float> Invoke(Vector512<float> x)
+            {
+                Vector512<float> specialResult = x;
+
+                // x is subnormal or infinity or NaN
+                Vector512<uint> specialMask = Vector512.GreaterThanOrEqual(x.AsUInt32() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN));
+
+                if (specialMask != Vector512<uint>.Zero)
+                {
+                    // float.IsZero(x) ? float.NegativeInfinity : x
+                    Vector512<float> zeroMask = Vector512.Equals(x, Vector512<float>.Zero);
+
+                    specialResult = Vector512.ConditionalSelect(
+                        zeroMask,
+                        Vector512.Create(float.NegativeInfinity),
+                        specialResult
+                    );
+
+                    // (x < 0) ? float.NaN : x
+                    Vector512<float> lessThanZeroMask = Vector512.LessThan(x, Vector512<float>.Zero);
+
+                    specialResult = Vector512.ConditionalSelect(
+                        lessThanZeroMask,
+                        Vector512.Create(float.NaN),
+                        specialResult
+                    );
+
+                    // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x)
+                    Vector512<float> temp = zeroMask
+                                          | lessThanZeroMask
+                                          | ~Vector512.Equals(x, x)
+                                          | Vector512.Equals(x, Vector512.Create(float.PositiveInfinity));
+
+                    // subnormal
+                    Vector512<float> subnormalMask = Vector512.AndNot(specialMask.AsSingle(), temp);
+
+                    x = Vector512.ConditionalSelect(
+                        subnormalMask,
+                        ((x * 8388608.0f).AsUInt32() - Vector512.Create(23u << 23)).AsSingle(),
+                        x
+                    );
+
+                    specialMask = temp.AsUInt32();
+                }
+
+                Vector512<uint> vx = x.AsUInt32() - Vector512.Create(V_OFF);
+                Vector512<float> n = Vector512.ConvertToSingle(Vector512.ShiftRightArithmetic(vx.AsInt32(), 23));
+
+                vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF);
+
+                Vector512<float> r = vx.AsSingle() - Vector512<float>.One;
+
+                Vector512<float> r2 = r * r;
+                Vector512<float> r4 = r2 * r2;
+                Vector512<float> r8 = r4 * r4;
+
+                Vector512<float> poly = (Vector512.Create(C9) * r + Vector512.Create(C8)) * r8
+                                    + (((Vector512.Create(C7) * r + Vector512.Create(C6)) * r2
+                                      + (Vector512.Create(C5) * r + Vector512.Create(C4))) * r4
+                                     + ((Vector512.Create(C3) * r + Vector512.Create(C2)) * r2
+                                      + (Vector512.Create(C1) * r + Vector512.Create(C0))));
+
+                return Vector512.ConditionalSelect(
+                    specialMask.AsSingle(),
+                    specialResult,
+                    n + poly
+                );
+            }
+        }
+#endif
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log2P1.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log2P1.cs
new file mode 100644
index 000000000000..7fa38203addd
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Log2P1.cs
@@ -0,0 +1,45 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise base 2 logarithm of numbers in the specified tensor plus 1.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log2P1(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>.
+        /// If a value is negative or equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is set to NaN.
+        /// If a value is positive infinity, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
+        /// Otherwise, if a value is positive, its base 2 logarithm plus 1 is stored into the corresponding destination location.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Log2P1<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ILogarithmicFunctions<T> =>
+            InvokeSpanIntoSpan<T, Log2P1Operator<T>>(x, destination);
+
+        /// <summary>T.Log2P1(x)</summary>
+        private readonly struct Log2P1Operator<T> : IUnaryOperator<T, T>
+            where T : ILogarithmicFunctions<T>
+        {
+            public static bool Vectorizable => Log2Operator<T>.Vectorizable;
+            public static T Invoke(T x) => T.Log2P1(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => Log2Operator<T>.Invoke(x + Vector128<T>.One);
+            public static Vector256<T> Invoke(Vector256<T> x) => Log2Operator<T>.Invoke(x + Vector256<T>.One);
+            public static Vector512<T> Invoke(Vector512<T> x) => Log2Operator<T>.Invoke(x + Vector512<T>.One);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.LogP1.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.LogP1.cs
new file mode 100644
index 000000000000..2985f57de25c
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.LogP1.cs
@@ -0,0 +1,45 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise natural (base <c>e</c>) logarithm of numbers in the specified tensor plus 1.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.LogP1(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>.
+        /// If a value is negative or equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is set to NaN.
+        /// If a value is positive infinity, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
+        /// Otherwise, if a value is positive, its natural logarithm plus 1 is stored into the corresponding destination location.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void LogP1<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ILogarithmicFunctions<T> =>
+            InvokeSpanIntoSpan<T, LogP1Operator<T>>(x, destination);
+
+        /// <summary>T.LogP1(x)</summary>
+        private readonly struct LogP1Operator<T> : IUnaryOperator<T, T>
+            where T : ILogarithmicFunctions<T>
+        {
+            public static bool Vectorizable => LogOperator<T>.Vectorizable;
+            public static T Invoke(T x) => T.LogP1(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => LogOperator<T>.Invoke(x + Vector128<T>.One);
+            public static Vector256<T> Invoke(Vector256<T> x) => LogOperator<T>.Invoke(x + Vector256<T>.One);
+            public static Vector512<T> Invoke(Vector512<T> x) => LogOperator<T>.Invoke(x + Vector512<T>.One);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Max.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Max.cs
new file mode 100644
index 000000000000..0e0566f2d935
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Max.cs
@@ -0,0 +1,549 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Searches for the largest number in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <returns>The maximum element in <paramref name="x"/>.</returns>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be greater than zero.</exception>
+        /// <remarks>
+        /// <para>
+        /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If any value equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>
+        /// is present, the first is returned. Positive 0 is considered greater than negative 0.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T Max<T>(ReadOnlySpan<T> x)
+            where T : INumber<T> =>
+            MinMaxCore<T, MaxOperator<T>>(x);
+
+        /// <summary>Computes the element-wise maximum of the numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Max(<paramref name="x" />[i], <paramref name="y" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
+        /// that value is stored as the result. Positive 0 is considered greater than negative 0.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Max<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : INumber<T> =>
+            InvokeSpanSpanIntoSpan<T, MaxPropagateNaNOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise maximum of the numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Max(<paramref name="x" />[i], <paramref name="y" />)</c>.
+        /// </para>
+        /// <para>
+        /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
+        /// that value is stored as the result. Positive 0 is considered greater than negative 0.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Max<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : INumber<T> =>
+            InvokeSpanScalarIntoSpan<T, MaxPropagateNaNOperator<T>>(x, y, destination);
+
+        /// <summary>T.Max(x, y) (but NaNs may not be propagated)</summary>
+        internal readonly struct MaxOperator<T> : IAggregationOperator<T> where T : INumber<T>
+        {
+            public static bool Vectorizable => true;
+
+            public static T Invoke(T x, T y)
+            {
+                if (typeof(T) == typeof(Half) || typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    return x == y ?
+                        (IsNegative(x) ? y : x) :
+                        (y > x ? y : x);
+                }
+
+                return T.Max(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
+            {
+                if (AdvSimd.IsSupported)
+                {
+                    if (typeof(T) == typeof(byte)) return AdvSimd.Max(x.AsByte(), y.AsByte()).As<byte, T>();
+                    if (typeof(T) == typeof(sbyte)) return AdvSimd.Max(x.AsSByte(), y.AsSByte()).As<sbyte, T>();
+                    if (typeof(T) == typeof(short)) return AdvSimd.Max(x.AsInt16(), y.AsInt16()).As<short, T>();
+                    if (typeof(T) == typeof(ushort)) return AdvSimd.Max(x.AsUInt16(), y.AsUInt16()).As<ushort, T>();
+                    if (typeof(T) == typeof(int)) return AdvSimd.Max(x.AsInt32(), y.AsInt32()).As<int, T>();
+                    if (typeof(T) == typeof(uint)) return AdvSimd.Max(x.AsUInt32(), y.AsUInt32()).As<uint, T>();
+                    if (typeof(T) == typeof(float)) return AdvSimd.Max(x.AsSingle(), y.AsSingle()).As<float, T>();
+                }
+
+                if (AdvSimd.Arm64.IsSupported)
+                {
+                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.Max(x.AsDouble(), y.AsDouble()).As<double, T>();
+                }
+
+                if (typeof(T) == typeof(float))
+                {
+                    return
+                        Vector128.ConditionalSelect(Vector128.Equals(x, y),
+                            Vector128.ConditionalSelect(IsNegative(x.AsSingle()).As<float, T>(), y, x),
+                            Vector128.Max(x, y));
+                }
+
+                if (typeof(T) == typeof(double))
+                {
+                    return
+                        Vector128.ConditionalSelect(Vector128.Equals(x, y),
+                            Vector128.ConditionalSelect(IsNegative(x.AsDouble()).As<double, T>(), y, x),
+                            Vector128.Max(x, y));
+                }
+
+                return Vector128.Max(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return
+                        Vector256.ConditionalSelect(Vector256.Equals(x, y),
+                            Vector256.ConditionalSelect(IsNegative(x.AsSingle()).As<float, T>(), y, x),
+                            Vector256.Max(x, y));
+                }
+
+                if (typeof(T) == typeof(double))
+                {
+                    return
+                        Vector256.ConditionalSelect(Vector256.Equals(x, y),
+                            Vector256.ConditionalSelect(IsNegative(x.AsDouble()).As<double, T>(), y, x),
+                            Vector256.Max(x, y));
+                }
+
+                return Vector256.Max(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return
+                        Vector512.ConditionalSelect(Vector512.Equals(x, y),
+                            Vector512.ConditionalSelect(IsNegative(x.AsSingle()).As<float, T>(), y, x),
+                            Vector512.Max(x, y));
+                }
+
+                if (typeof(T) == typeof(double))
+                {
+                    return
+                        Vector512.ConditionalSelect(Vector512.Equals(x, y),
+                            Vector512.ConditionalSelect(IsNegative(x.AsDouble()).As<double, T>(), y, x),
+                            Vector512.Max(x, y));
+                }
+
+                return Vector512.Max(x, y);
+            }
+
+            public static T Invoke(Vector128<T> x) => HorizontalAggregate<T, MaxOperator<T>>(x);
+            public static T Invoke(Vector256<T> x) => HorizontalAggregate<T, MaxOperator<T>>(x);
+            public static T Invoke(Vector512<T> x) => HorizontalAggregate<T, MaxOperator<T>>(x);
+        }
+
+        /// <summary>Max(x, y)</summary>
+        internal readonly struct MaxPropagateNaNOperator<T> : IBinaryOperator<T>
+             where T : INumber<T>
+        {
+            public static bool Vectorizable => true;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static T Invoke(T x, T y) => T.Max(x, y);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
+            {
+                if (AdvSimd.IsSupported)
+                {
+                    if (typeof(T) == typeof(byte)) return AdvSimd.Max(x.AsByte(), y.AsByte()).As<byte, T>();
+                    if (typeof(T) == typeof(sbyte)) return AdvSimd.Max(x.AsSByte(), y.AsSByte()).As<sbyte, T>();
+                    if (typeof(T) == typeof(ushort)) return AdvSimd.Max(x.AsUInt16(), y.AsUInt16()).As<ushort, T>();
+                    if (typeof(T) == typeof(short)) return AdvSimd.Max(x.AsInt16(), y.AsInt16()).As<short, T>();
+                    if (typeof(T) == typeof(uint)) return AdvSimd.Max(x.AsUInt32(), y.AsUInt32()).As<uint, T>();
+                    if (typeof(T) == typeof(int)) return AdvSimd.Max(x.AsInt32(), y.AsInt32()).As<int, T>();
+                    if (typeof(T) == typeof(float)) return AdvSimd.Max(x.AsSingle(), y.AsSingle()).As<float, T>();
+                }
+
+                if (AdvSimd.Arm64.IsSupported)
+                {
+                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.Max(x.AsDouble(), y.AsDouble()).As<double, T>();
+                }
+
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    return
+                        Vector128.ConditionalSelect(Vector128.Equals(x, x),
+                            Vector128.ConditionalSelect(Vector128.Equals(y, y),
+                                Vector128.ConditionalSelect(Vector128.Equals(x, y),
+                                    Vector128.ConditionalSelect(IsNegative(x), y, x),
+                                    Vector128.Max(x, y)),
+                                y),
+                            x);
+                }
+
+                return Vector128.Max(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
+            {
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    return
+                        Vector256.ConditionalSelect(Vector256.Equals(x, x),
+                            Vector256.ConditionalSelect(Vector256.Equals(y, y),
+                                Vector256.ConditionalSelect(Vector256.Equals(x, y),
+                                    Vector256.ConditionalSelect(IsNegative(x), y, x),
+                                    Vector256.Max(x, y)),
+                                y),
+                            x);
+                }
+
+                return Vector256.Max(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
+            {
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    return
+                        Vector512.ConditionalSelect(Vector512.Equals(x, x),
+                            Vector512.ConditionalSelect(Vector512.Equals(y, y),
+                                Vector512.ConditionalSelect(Vector512.Equals(x, y),
+                                    Vector512.ConditionalSelect(IsNegative(x), y, x),
+                                    Vector512.Max(x, y)),
+                                y),
+                            x);
+                }
+
+                return Vector512.Max(x, y);
+            }
+        }
+
+        /// <summary>Gets whether each specified <see cref="float"/> is negative.</summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector128<T> IsNegative<T>(Vector128<T> vector)
+        {
+            if (typeof(T) == typeof(float))
+            {
+                return Vector128.LessThan(vector.AsInt32(), Vector128<int>.Zero).As<int, T>();
+            }
+
+            if (typeof(T) == typeof(double))
+            {
+                return Vector128.LessThan(vector.AsInt64(), Vector128<long>.Zero).As<long, T>();
+            }
+
+            return Vector128.LessThan(vector, Vector128<T>.Zero);
+        }
+
+        /// <summary>Gets whether each specified <see cref="float"/> is negative.</summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector256<T> IsNegative<T>(Vector256<T> vector)
+        {
+            if (typeof(T) == typeof(float))
+            {
+                return Vector256.LessThan(vector.AsInt32(), Vector256<int>.Zero).As<int, T>();
+            }
+
+            if (typeof(T) == typeof(double))
+            {
+                return Vector256.LessThan(vector.AsInt64(), Vector256<long>.Zero).As<long, T>();
+            }
+
+            return Vector256.LessThan(vector, Vector256<T>.Zero);
+        }
+
+        /// <summary>Gets whether each specified <see cref="float"/> is negative.</summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector512<T> IsNegative<T>(Vector512<T> vector)
+        {
+            if (typeof(T) == typeof(float))
+            {
+                return Vector512.LessThan(vector.AsInt32(), Vector512<int>.Zero).As<int, T>();
+            }
+
+            if (typeof(T) == typeof(double))
+            {
+                return Vector512.LessThan(vector.AsInt64(), Vector512<long>.Zero).As<long, T>();
+            }
+
+            return Vector512.LessThan(vector, Vector512<T>.Zero);
+        }
+
+        /// <remarks>
+        /// This is the same as <see cref="Aggregate{T, TTransformOperator, TAggregationOperator}(ReadOnlySpan{T})"/>
+        /// with an identity transform, except it early exits on NaN.
+        /// </remarks>
+        private static T MinMaxCore<T, TMinMaxOperator>(ReadOnlySpan<T> x)
+            where T : INumberBase<T>
+            where TMinMaxOperator : struct, IAggregationOperator<T>
+        {
+            if (x.IsEmpty)
+            {
+                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
+            }
+
+            // This matches the IEEE 754:2019 `maximum`/`minimum` functions.
+            // It propagates NaN inputs back to the caller and
+            // otherwise returns the greater of the inputs.
+            // It treats +0 as greater than -0 as per the specification.
+
+            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && x.Length >= Vector512<T>.Count)
+            {
+                ref T xRef = ref MemoryMarshal.GetReference(x);
+
+                // Load the first vector as the initial set of results, and bail immediately
+                // to scalar handling if it contains any NaNs (which don't compare equally to themselves).
+                Vector512<T> result = Vector512.LoadUnsafe(ref xRef, 0);
+                Vector512<T> current;
+
+                Vector512<T> nanMask;
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    // Check for NaNs
+                    nanMask = ~Vector512.Equals(result, result);
+                    if (nanMask != Vector512<T>.Zero)
+                    {
+                        return result.GetElement(IndexOfFirstMatch(nanMask));
+                    }
+                }
+
+                int oneVectorFromEnd = x.Length - Vector512<T>.Count;
+                int i = Vector512<T>.Count;
+
+                // Aggregate additional vectors into the result as long as there's at least one full vector left to process.
+                while (i <= oneVectorFromEnd)
+                {
+                    // Load the next vector, and early exit on NaN.
+                    current = Vector512.LoadUnsafe(ref xRef, (uint)i);
+
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // Check for NaNs
+                        nanMask = ~Vector512.Equals(current, current);
+                        if (nanMask != Vector512<T>.Zero)
+                        {
+                            return current.GetElement(IndexOfFirstMatch(nanMask));
+                        }
+                    }
+
+                    result = TMinMaxOperator.Invoke(result, current);
+                    i += Vector512<T>.Count;
+                }
+
+                // If any elements remain, handle them in one final vector.
+                if (i != x.Length)
+                {
+                    current = Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512<T>.Count));
+
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // Check for NaNs
+                        nanMask = ~Vector512.Equals(current, current);
+                        if (nanMask != Vector512<T>.Zero)
+                        {
+                            return current.GetElement(IndexOfFirstMatch(nanMask));
+                        }
+                    }
+
+                    result = TMinMaxOperator.Invoke(result, current);
+                }
+
+                // Aggregate the lanes in the vector to create the final scalar result.
+                return TMinMaxOperator.Invoke(result);
+            }
+
+            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && x.Length >= Vector256<T>.Count)
+            {
+                ref T xRef = ref MemoryMarshal.GetReference(x);
+
+                // Load the first vector as the initial set of results, and bail immediately
+                // to scalar handling if it contains any NaNs (which don't compare equally to themselves).
+                Vector256<T> result = Vector256.LoadUnsafe(ref xRef, 0);
+                Vector256<T> current;
+
+                Vector256<T> nanMask;
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    // Check for NaNs
+                    nanMask = ~Vector256.Equals(result, result);
+                    if (nanMask != Vector256<T>.Zero)
+                    {
+                        return result.GetElement(IndexOfFirstMatch(nanMask));
+                    }
+                }
+
+                int oneVectorFromEnd = x.Length - Vector256<T>.Count;
+                int i = Vector256<T>.Count;
+
+                // Aggregate additional vectors into the result as long as there's at least one full vector left to process.
+                while (i <= oneVectorFromEnd)
+                {
+                    // Load the next vector, and early exit on NaN.
+                    current = Vector256.LoadUnsafe(ref xRef, (uint)i);
+
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // Check for NaNs
+                        nanMask = ~Vector256.Equals(current, current);
+                        if (nanMask != Vector256<T>.Zero)
+                        {
+                            return current.GetElement(IndexOfFirstMatch(nanMask));
+                        }
+                    }
+
+                    result = TMinMaxOperator.Invoke(result, current);
+                    i += Vector256<T>.Count;
+                }
+
+                // If any elements remain, handle them in one final vector.
+                if (i != x.Length)
+                {
+                    current = Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256<T>.Count));
+
+
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // Check for NaNs
+                        nanMask = ~Vector256.Equals(current, current);
+                        if (nanMask != Vector256<T>.Zero)
+                        {
+                            return current.GetElement(IndexOfFirstMatch(nanMask));
+                        }
+                    }
+
+                    result = TMinMaxOperator.Invoke(result, current);
+                }
+
+                // Aggregate the lanes in the vector to create the final scalar result.
+                return TMinMaxOperator.Invoke(result);
+            }
+
+            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && x.Length >= Vector128<T>.Count)
+            {
+                ref T xRef = ref MemoryMarshal.GetReference(x);
+
+                // Load the first vector as the initial set of results, and bail immediately
+                // to scalar handling if it contains any NaNs (which don't compare equally to themselves).
+                Vector128<T> result = Vector128.LoadUnsafe(ref xRef, 0);
+                Vector128<T> current;
+
+                Vector128<T> nanMask;
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    // Check for NaNs
+                    nanMask = ~Vector128.Equals(result, result);
+                    if (nanMask != Vector128<T>.Zero)
+                    {
+                        return result.GetElement(IndexOfFirstMatch(nanMask));
+                    }
+                }
+
+                int oneVectorFromEnd = x.Length - Vector128<T>.Count;
+                int i = Vector128<T>.Count;
+
+                // Aggregate additional vectors into the result as long as there's at least one full vector left to process.
+                while (i <= oneVectorFromEnd)
+                {
+                    // Load the next vector, and early exit on NaN.
+                    current = Vector128.LoadUnsafe(ref xRef, (uint)i);
+
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // Check for NaNs
+                        nanMask = ~Vector128.Equals(current, current);
+                        if (nanMask != Vector128<T>.Zero)
+                        {
+                            return current.GetElement(IndexOfFirstMatch(nanMask));
+                        }
+                    }
+
+                    result = TMinMaxOperator.Invoke(result, current);
+                    i += Vector128<T>.Count;
+                }
+
+                // If any elements remain, handle them in one final vector.
+                if (i != x.Length)
+                {
+                    current = Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128<T>.Count));
+
+                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                    {
+                        // Check for NaNs
+                        nanMask = ~Vector128.Equals(current, current);
+                        if (nanMask != Vector128<T>.Zero)
+                        {
+                            return current.GetElement(IndexOfFirstMatch(nanMask));
+                        }
+                    }
+
+                    result = TMinMaxOperator.Invoke(result, current);
+                }
+
+                // Aggregate the lanes in the vector to create the final scalar result.
+                return TMinMaxOperator.Invoke(result);
+            }
+
+            // Scalar path used when either vectorization is not supported or the input is too small to vectorize.
+            T curResult = x[0];
+            if (T.IsNaN(curResult))
+            {
+                return curResult;
+            }
+
+            for (int i = 1; i < x.Length; i++)
+            {
+                T current = x[i];
+                if (T.IsNaN(current))
+                {
+                    return current;
+                }
+
+                curResult = TMinMaxOperator.Invoke(curResult, current);
+            }
+
+            return curResult;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.MaxMagnitude.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.MaxMagnitude.cs
new file mode 100644
index 000000000000..eb28249ed1ea
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.MaxMagnitude.cs
@@ -0,0 +1,243 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Searches for the number with the largest magnitude in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <returns>The element in <paramref name="x"/> with the largest magnitude (absolute value).</returns>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be greater than zero.</exception>
+        /// <remarks>
+        /// <para>
+        /// The determination of the maximum magnitude matches the IEEE 754:2019 `maximumMagnitude` function. If any value equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>
+        /// is present, the first is returned. If two values have the same magnitude and one is positive and the other is negative,
+        /// the positive value is considered to have the larger magnitude.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T MaxMagnitude<T>(ReadOnlySpan<T> x)
+            where T : INumberBase<T> =>
+            MinMaxCore<T, MaxMagnitudeOperator<T>>(x);
+
+        /// <summary>Computes the element-wise number with the largest magnitude in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.MaxMagnitude(<paramref name="x" />[i], <paramref name="y" />[i])</c>.</remarks>
+        /// <remarks>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void MaxMagnitude<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : INumberBase<T> =>
+            InvokeSpanSpanIntoSpan<T, MaxMagnitudePropagateNaNOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise number with the largest magnitude in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.MaxMagnitude(<paramref name="x" />[i], <paramref name="y" />)</c>.</remarks>
+        /// <remarks>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void MaxMagnitude<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : INumberBase<T> =>
+            InvokeSpanScalarIntoSpan<T, MaxMagnitudePropagateNaNOperator<T>>(x, y, destination);
+
+        /// <summary>Searches for the smallest number in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <returns>The minimum element in <paramref name="x"/>.</returns>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be greater than zero.</exception>
+        /// <remarks>
+        /// <para>
+        /// The determination of the minimum element matches the IEEE 754:2019 `minimum` function. If any value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>
+        /// is present, the first is returned. Negative 0 is considered smaller than positive 0.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T Min<T>(ReadOnlySpan<T> x)
+            where T : INumber<T> =>
+            MinMaxCore<T, MinOperator<T>>(x);
+
+        /// <summary>Operator to get x or y based on which has the larger MathF.Abs (but NaNs may not be propagated)</summary>
+        internal readonly struct MaxMagnitudeOperator<T> : IAggregationOperator<T>
+            where T : INumberBase<T>
+        {
+            public static bool Vectorizable => true;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static T Invoke(T x, T y) => T.MaxMagnitude(x, y);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
+            {
+                Vector128<T> xMag = Vector128.Abs(x), yMag = Vector128.Abs(y);
+
+                Vector128<T> result =
+                    Vector128.ConditionalSelect(Vector128.Equals(xMag, yMag),
+                        Vector128.ConditionalSelect(IsNegative(x), y, x),
+                        Vector128.ConditionalSelect(Vector128.GreaterThan(xMag, yMag), x, y));
+
+                // Handle minimum signed value that should have the largest magnitude
+                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
+                {
+                    Vector128<T> negativeMagnitudeX = Vector128.LessThan(xMag, Vector128<T>.Zero);
+                    Vector128<T> negativeMagnitudeY = Vector128.LessThan(yMag, Vector128<T>.Zero);
+                    result = Vector128.ConditionalSelect(negativeMagnitudeX,
+                        x,
+                        Vector128.ConditionalSelect(negativeMagnitudeY,
+                            y,
+                            result));
+                }
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
+            {
+                Vector256<T> xMag = Vector256.Abs(x), yMag = Vector256.Abs(y);
+
+                Vector256<T> result =
+                    Vector256.ConditionalSelect(Vector256.Equals(xMag, yMag),
+                        Vector256.ConditionalSelect(IsNegative(x), y, x),
+                        Vector256.ConditionalSelect(Vector256.GreaterThan(xMag, yMag), x, y));
+
+                // Handle minimum signed value that should have the largest magnitude
+                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
+                {
+                    Vector256<T> negativeMagnitudeX = Vector256.LessThan(xMag, Vector256<T>.Zero);
+                    Vector256<T> negativeMagnitudeY = Vector256.LessThan(yMag, Vector256<T>.Zero);
+                    result = Vector256.ConditionalSelect(negativeMagnitudeX,
+                        x,
+                        Vector256.ConditionalSelect(negativeMagnitudeY,
+                            y,
+                            result));
+                }
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
+            {
+                Vector512<T> xMag = Vector512.Abs(x), yMag = Vector512.Abs(y);
+
+                Vector512<T> result =
+                    Vector512.ConditionalSelect(Vector512.Equals(xMag, yMag),
+                        Vector512.ConditionalSelect(IsNegative(x), y, x),
+                        Vector512.ConditionalSelect(Vector512.GreaterThan(xMag, yMag), x, y));
+
+                // Handle minimum signed value that should have the largest magnitude
+                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
+                {
+                    Vector512<T> negativeMagnitudeX = Vector512.LessThan(xMag, Vector512<T>.Zero);
+                    Vector512<T> negativeMagnitudeY = Vector512.LessThan(yMag, Vector512<T>.Zero);
+                    result = Vector512.ConditionalSelect(negativeMagnitudeX,
+                        x,
+                        Vector512.ConditionalSelect(negativeMagnitudeY,
+                            y,
+                            result));
+                }
+
+                return result;
+            }
+
+            public static T Invoke(Vector128<T> x) => HorizontalAggregate<T, MaxMagnitudeOperator<T>>(x);
+            public static T Invoke(Vector256<T> x) => HorizontalAggregate<T, MaxMagnitudeOperator<T>>(x);
+            public static T Invoke(Vector512<T> x) => HorizontalAggregate<T, MaxMagnitudeOperator<T>>(x);
+        }
+
+        /// <summary>Operator to get x or y based on which has the larger MathF.Abs</summary>
+        internal readonly struct MaxMagnitudePropagateNaNOperator<T> : IBinaryOperator<T>
+            where T : INumberBase<T>
+        {
+            public static bool Vectorizable => true;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static T Invoke(T x, T y) => T.MaxMagnitude(x, y);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
+            {
+                // Handle NaNs
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    Vector128<T> xMag = Vector128.Abs(x), yMag = Vector128.Abs(y);
+                    return
+                        Vector128.ConditionalSelect(Vector128.Equals(x, x),
+                            Vector128.ConditionalSelect(Vector128.Equals(y, y),
+                                Vector128.ConditionalSelect(Vector128.Equals(yMag, xMag),
+                                    Vector128.ConditionalSelect(IsNegative(x), y, x),
+                                    Vector128.ConditionalSelect(Vector128.GreaterThan(yMag, xMag), y, x)),
+                                y),
+                            x);
+                }
+
+                return MaxMagnitudeOperator<T>.Invoke(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
+            {
+                // Handle NaNs
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    Vector256<T> xMag = Vector256.Abs(x), yMag = Vector256.Abs(y);
+                    return
+                        Vector256.ConditionalSelect(Vector256.Equals(x, x),
+                            Vector256.ConditionalSelect(Vector256.Equals(y, y),
+                                Vector256.ConditionalSelect(Vector256.Equals(xMag, yMag),
+                                    Vector256.ConditionalSelect(IsNegative(x), y, x),
+                                    Vector256.ConditionalSelect(Vector256.GreaterThan(xMag, yMag), x, y)),
+                                y),
+                            x);
+                }
+
+                return MaxMagnitudeOperator<T>.Invoke(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
+            {
+                // Handle NaNs
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    Vector512<T> xMag = Vector512.Abs(x), yMag = Vector512.Abs(y);
+                    return
+                        Vector512.ConditionalSelect(Vector512.Equals(x, x),
+                        Vector512.ConditionalSelect(Vector512.Equals(y, y),
+                            Vector512.ConditionalSelect(Vector512.Equals(xMag, yMag),
+                                Vector512.ConditionalSelect(IsNegative(x), y, x),
+                                Vector512.ConditionalSelect(Vector512.GreaterThan(xMag, yMag), x, y)),
+                            y),
+                        x);
+                }
+
+                return MaxMagnitudeOperator<T>.Invoke(x, y);
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Min.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Min.cs
new file mode 100644
index 000000000000..faea47805798
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Min.cs
@@ -0,0 +1,220 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise minimum of the numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Max(<paramref name="x" />[i], <paramref name="y" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
+        /// that value is stored as the result. Positive 0 is considered greater than negative 0.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Min<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : INumber<T> =>
+            InvokeSpanSpanIntoSpan<T, MinPropagateNaNOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise minimum of the numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Max(<paramref name="x" />[i], <paramref name="y" />)</c>.
+        /// </para>
+        /// <para>
+        /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
+        /// that value is stored as the result. Positive 0 is considered greater than negative 0.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Min<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : INumber<T> =>
+            InvokeSpanScalarIntoSpan<T, MinPropagateNaNOperator<T>>(x, y, destination);
+
+        /// <summary>T.Min(x, y) (but NaNs may not be propagated)</summary>
+        internal readonly struct MinOperator<T> : IAggregationOperator<T>
+            where T : INumber<T>
+        {
+            public static bool Vectorizable => true;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static T Invoke(T x, T y)
+            {
+                if (typeof(T) == typeof(Half) || typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    return x == y ?
+                        (IsNegative(y) ? y : x) :
+                        (y < x ? y : x);
+                }
+
+                return T.Min(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
+            {
+                if (AdvSimd.IsSupported)
+                {
+                    if (typeof(T) == typeof(byte)) return AdvSimd.Min(x.AsByte(), y.AsByte()).As<byte, T>();
+                    if (typeof(T) == typeof(sbyte)) return AdvSimd.Min(x.AsSByte(), y.AsSByte()).As<sbyte, T>();
+                    if (typeof(T) == typeof(short)) return AdvSimd.Min(x.AsInt16(), y.AsInt16()).As<short, T>();
+                    if (typeof(T) == typeof(ushort)) return AdvSimd.Min(x.AsUInt16(), y.AsUInt16()).As<ushort, T>();
+                    if (typeof(T) == typeof(int)) return AdvSimd.Min(x.AsInt32(), y.AsInt32()).As<int, T>();
+                    if (typeof(T) == typeof(uint)) return AdvSimd.Min(x.AsUInt32(), y.AsUInt32()).As<uint, T>();
+                    if (typeof(T) == typeof(float)) return AdvSimd.Min(x.AsSingle(), y.AsSingle()).As<float, T>();
+                }
+
+                if (AdvSimd.Arm64.IsSupported)
+                {
+                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.Min(x.AsDouble(), y.AsDouble()).As<double, T>();
+                }
+
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    return
+                        Vector128.ConditionalSelect(Vector128.Equals(x, y),
+                            Vector128.ConditionalSelect(IsNegative(y), y, x),
+                            Vector128.Min(x, y));
+                }
+
+                return Vector128.Min(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
+            {
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    return Vector256.ConditionalSelect(Vector256.Equals(x, y),
+                        Vector256.ConditionalSelect(IsNegative(y), y, x),
+                        Vector256.Min(x, y));
+                }
+
+                return Vector256.Min(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
+            {
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    return Vector512.ConditionalSelect(Vector512.Equals(x, y),
+                        Vector512.ConditionalSelect(IsNegative(y), y, x),
+                        Vector512.Min(x, y));
+                }
+
+                return Vector512.Min(x, y);
+            }
+
+            public static T Invoke(Vector128<T> x) => HorizontalAggregate<T, MinOperator<T>>(x);
+            public static T Invoke(Vector256<T> x) => HorizontalAggregate<T, MinOperator<T>>(x);
+            public static T Invoke(Vector512<T> x) => HorizontalAggregate<T, MinOperator<T>>(x);
+        }
+
+        /// <summary>T.Min(x, y)</summary>
+        internal readonly struct MinPropagateNaNOperator<T> : IBinaryOperator<T>
+            where T : INumber<T>
+        {
+            public static bool Vectorizable => true;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static T Invoke(T x, T y) => T.Min(x, y);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
+            {
+                if (AdvSimd.IsSupported)
+                {
+                    if (typeof(T) == typeof(byte)) return AdvSimd.Min(x.AsByte(), y.AsByte()).As<byte, T>();
+                    if (typeof(T) == typeof(sbyte)) return AdvSimd.Min(x.AsSByte(), y.AsSByte()).As<sbyte, T>();
+                    if (typeof(T) == typeof(short)) return AdvSimd.Min(x.AsInt16(), y.AsInt16()).As<short, T>();
+                    if (typeof(T) == typeof(ushort)) return AdvSimd.Min(x.AsUInt16(), y.AsUInt16()).As<ushort, T>();
+                    if (typeof(T) == typeof(int)) return AdvSimd.Min(x.AsInt32(), y.AsInt32()).As<int, T>();
+                    if (typeof(T) == typeof(uint)) return AdvSimd.Min(x.AsUInt32(), y.AsUInt32()).As<uint, T>();
+                    if (typeof(T) == typeof(float)) return AdvSimd.Min(x.AsSingle(), y.AsSingle()).As<float, T>();
+                }
+
+                if (AdvSimd.Arm64.IsSupported)
+                {
+                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.Min(x.AsDouble(), y.AsDouble()).As<double, T>();
+                }
+
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    return
+                        Vector128.ConditionalSelect(Vector128.Equals(x, x),
+                            Vector128.ConditionalSelect(Vector128.Equals(y, y),
+                                Vector128.ConditionalSelect(Vector128.Equals(x, y),
+                                    Vector128.ConditionalSelect(IsNegative(x), x, y),
+                                    Vector128.Min(x, y)),
+                                y),
+                            x);
+                }
+
+                return Vector128.Min(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
+            {
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    return
+                        Vector256.ConditionalSelect(Vector256.Equals(x, x),
+                            Vector256.ConditionalSelect(Vector256.Equals(y, y),
+                                Vector256.ConditionalSelect(Vector256.Equals(x, y),
+                                    Vector256.ConditionalSelect(IsNegative(x), x, y),
+                                    Vector256.Min(x, y)),
+                                y),
+                            x);
+                }
+
+                return Vector256.Min(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
+            {
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    return
+                        Vector512.ConditionalSelect(Vector512.Equals(x, x),
+                            Vector512.ConditionalSelect(Vector512.Equals(y, y),
+                                Vector512.ConditionalSelect(Vector512.Equals(x, y),
+                                    Vector512.ConditionalSelect(IsNegative(x), x, y),
+                                    Vector512.Min(x, y)),
+                                y),
+                            x);
+                }
+
+                return Vector512.Min(x, y);
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.MinMagnitude.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.MinMagnitude.cs
new file mode 100644
index 000000000000..47b492eaffb8
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.MinMagnitude.cs
@@ -0,0 +1,232 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Searches for the number with the smallest magnitude in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <returns>The element in <paramref name="x"/> with the smallest magnitude (absolute value).</returns>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be greater than zero.</exception>
+        /// <remarks>
+        /// <para>
+        /// The determination of the minimum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If any value equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>
+        /// is present, the first is returned. If two values have the same magnitude and one is positive and the other is negative,
+        /// the negative value is considered to have the smaller magnitude.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T MinMagnitude<T>(ReadOnlySpan<T> x)
+            where T : INumberBase<T> =>
+            MinMaxCore<T, MinMagnitudeOperator<T>>(x);
+
+        /// <summary>Computes the element-wise number with the smallest magnitude in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.MinMagnitude(<paramref name="x" />[i], <paramref name="y" />[i])</c>.</remarks>
+        /// <remarks>
+        /// <para>
+        /// The determination of the maximum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If either value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
+        /// that value is stored as the result. If the two values have the same magnitude and one is positive and the other is negative,
+        /// the negative value is considered to have the smaller magnitude.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void MinMagnitude<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : INumberBase<T> =>
+            InvokeSpanSpanIntoSpan<T, MinMagnitudePropagateNaNOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise number with the smallest magnitude in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.MinMagnitude(<paramref name="x" />[i], <paramref name="y" />)</c>.</remarks>
+        /// <remarks>
+        /// <para>
+        /// The determination of the maximum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If either value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
+        /// that value is stored as the result. If the two values have the same magnitude and one is positive and the other is negative,
+        /// the negative value is considered to have the smaller magnitude.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void MinMagnitude<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : INumberBase<T> =>
+            InvokeSpanScalarIntoSpan<T, MinMagnitudePropagateNaNOperator<T>>(x, y, destination);
+
+        /// <summary>Operator to get x or y based on which has the smaller MathF.Abs (but NaNs may not be propagated)</summary>
+        internal readonly struct MinMagnitudeOperator<T> : IAggregationOperator<T>
+            where T : INumberBase<T>
+        {
+            public static bool Vectorizable => true;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static T Invoke(T x, T y) => T.MinMagnitude(x, y);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
+            {
+                Vector128<T> xMag = Vector128.Abs(x), yMag = Vector128.Abs(y);
+
+                Vector128<T> result =
+                    Vector128.ConditionalSelect(Vector128.Equals(yMag, xMag),
+                        Vector128.ConditionalSelect(IsNegative(y), y, x),
+                        Vector128.ConditionalSelect(Vector128.LessThan(yMag, xMag), y, x));
+
+                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
+                {
+                    Vector128<T> negativeMagnitudeX = Vector128.LessThan(xMag, Vector128<T>.Zero);
+                    Vector128<T> negativeMagnitudeY = Vector128.LessThan(yMag, Vector128<T>.Zero);
+                    result = Vector128.ConditionalSelect(negativeMagnitudeX,
+                        y,
+                        Vector128.ConditionalSelect(negativeMagnitudeY,
+                            x,
+                            result));
+                }
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
+            {
+                Vector256<T> xMag = Vector256.Abs(x), yMag = Vector256.Abs(y);
+
+                Vector256<T> result =
+                    Vector256.ConditionalSelect(Vector256.Equals(yMag, xMag),
+                        Vector256.ConditionalSelect(IsNegative(y), y, x),
+                        Vector256.ConditionalSelect(Vector256.LessThan(yMag, xMag), y, x));
+
+                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
+                {
+                    Vector256<T> negativeMagnitudeX = Vector256.LessThan(xMag, Vector256<T>.Zero);
+                    Vector256<T> negativeMagnitudeY = Vector256.LessThan(yMag, Vector256<T>.Zero);
+                    result = Vector256.ConditionalSelect(negativeMagnitudeX,
+                        y,
+                        Vector256.ConditionalSelect(negativeMagnitudeY,
+                            x,
+                            result));
+                }
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
+            {
+                Vector512<T> xMag = Vector512.Abs(x), yMag = Vector512.Abs(y);
+
+                Vector512<T> result =
+                    Vector512.ConditionalSelect(Vector512.Equals(yMag, xMag),
+                        Vector512.ConditionalSelect(IsNegative(y), y, x),
+                        Vector512.ConditionalSelect(Vector512.LessThan(yMag, xMag), y, x));
+
+                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
+                {
+                    Vector512<T> negativeMagnitudeX = Vector512.LessThan(xMag, Vector512<T>.Zero);
+                    Vector512<T> negativeMagnitudeY = Vector512.LessThan(yMag, Vector512<T>.Zero);
+                    result = Vector512.ConditionalSelect(negativeMagnitudeX,
+                        y,
+                        Vector512.ConditionalSelect(negativeMagnitudeY,
+                            x,
+                            result));
+                }
+
+                return result;
+            }
+
+            public static T Invoke(Vector128<T> x) => HorizontalAggregate<T, MinMagnitudeOperator<T>>(x);
+            public static T Invoke(Vector256<T> x) => HorizontalAggregate<T, MinMagnitudeOperator<T>>(x);
+            public static T Invoke(Vector512<T> x) => HorizontalAggregate<T, MinMagnitudeOperator<T>>(x);
+        }
+
+        /// <summary>Operator to get x or y based on which has the smaller MathF.Abs</summary>
+        internal readonly struct MinMagnitudePropagateNaNOperator<T> : IBinaryOperator<T>
+            where T : INumberBase<T>
+        {
+            public static bool Vectorizable => true;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static T Invoke(T x, T y) => T.MinMagnitude(x, y);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
+            {
+                // Handle NaNs
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    Vector128<T> xMag = Vector128.Abs(x), yMag = Vector128.Abs(y);
+                    return
+                        Vector128.ConditionalSelect(Vector128.Equals(x, x),
+                            Vector128.ConditionalSelect(Vector128.Equals(y, y),
+                                Vector128.ConditionalSelect(Vector128.Equals(yMag, xMag),
+                                    Vector128.ConditionalSelect(IsNegative(x), x, y),
+                                    Vector128.ConditionalSelect(Vector128.LessThan(xMag, yMag), x, y)),
+                                y),
+                            x);
+                }
+
+                return MinMagnitudeOperator<T>.Invoke(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
+            {
+                // Handle NaNs
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    Vector256<T> xMag = Vector256.Abs(x), yMag = Vector256.Abs(y);
+                    return
+                        Vector256.ConditionalSelect(Vector256.Equals(x, x),
+                            Vector256.ConditionalSelect(Vector256.Equals(y, y),
+                                Vector256.ConditionalSelect(Vector256.Equals(yMag, xMag),
+                                    Vector256.ConditionalSelect(IsNegative(x), x, y),
+                                    Vector256.ConditionalSelect(Vector256.LessThan(xMag, yMag), x, y)),
+                                y),
+                            x);
+                }
+
+                return MinMagnitudeOperator<T>.Invoke(x, y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
+            {
+                // Handle NaNs
+                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
+                {
+                    Vector512<T> xMag = Vector512.Abs(x), yMag = Vector512.Abs(y);
+                    return
+                        Vector512.ConditionalSelect(Vector512.Equals(x, x),
+                            Vector512.ConditionalSelect(Vector512.Equals(y, y),
+                                Vector512.ConditionalSelect(Vector512.Equals(yMag, xMag),
+                                    Vector512.ConditionalSelect(IsNegative(x), x, y),
+                                    Vector512.ConditionalSelect(Vector512.LessThan(xMag, yMag), x, y)),
+                                y),
+                            x);
+                }
+
+                return MinMagnitudeOperator<T>.Invoke(x, y);
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Multiply.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Multiply.cs
new file mode 100644
index 000000000000..80d0f488fd97
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Multiply.cs
@@ -0,0 +1,66 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise product of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] * <paramref name="y" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Multiply<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T> =>
+            InvokeSpanSpanIntoSpan<T, MultiplyOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise product of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] * <paramref name="y" /></c>.
+        /// It corresponds to the <c>scal</c> method defined by <c>BLAS1</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Multiply<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T> =>
+            InvokeSpanScalarIntoSpan<T, MultiplyOperator<T>>(x, y, destination);
+
+        /// <summary>x * y</summary>
+        internal readonly struct MultiplyOperator<T> : IAggregationOperator<T> where T : IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T>
+        {
+            public static bool Vectorizable => true;
+
+            public static T Invoke(T x, T y) => x * y;
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x * y;
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x * y;
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x * y;
+
+            public static T Invoke(Vector128<T> x) => HorizontalAggregate<T, MultiplyOperator<T>>(x);
+            public static T Invoke(Vector256<T> x) => HorizontalAggregate<T, MultiplyOperator<T>>(x);
+            public static T Invoke(Vector512<T> x) => HorizontalAggregate<T, MultiplyOperator<T>>(x);
+
+            public static T IdentityValue => T.MultiplicativeIdentity;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.MultiplyAdd.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.MultiplyAdd.cs
new file mode 100644
index 000000000000..07042b32c379
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.MultiplyAdd.cs
@@ -0,0 +1,84 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="addend">The third tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" /> and length of <paramref name="addend" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="addend"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void MultiplyAdd<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, ReadOnlySpan<T> addend, Span<T> destination)
+            where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T> =>
+            InvokeSpanSpanSpanIntoSpan<T, MultiplyAddOperator<T>>(x, y, addend, destination);
+
+        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="addend">The third tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" /></c>.
+        /// It corresponds to the <c>axpy</c> method defined by <c>BLAS1</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void MultiplyAdd<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, T addend, Span<T> destination)
+            where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T> =>
+            InvokeSpanSpanScalarIntoSpan<T, MultiplyAddOperator<T>>(x, y, addend, destination);
+
+        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="addend">The third tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="addend" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="addend"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />) + <paramref name="addend" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void MultiplyAdd<T>(ReadOnlySpan<T> x, T y, ReadOnlySpan<T> addend, Span<T> destination)
+            where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T> =>
+            InvokeSpanScalarSpanIntoSpan<T, MultiplyAddOperator<T>>(x, y, addend, destination);
+
+        /// <summary>(x * y) + z</summary>
+        internal readonly struct MultiplyAddOperator<T> : ITernaryOperator<T> where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T>
+        {
+            public static T Invoke(T x, T y, T z) => (x * y) + z;
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z) => (x * y) + z;
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> z) => (x * y) + z;
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z) => (x * y) + z;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.MultiplyAddEstimate.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.MultiplyAddEstimate.cs
new file mode 100644
index 000000000000..3bd615e54701
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.MultiplyAddEstimate.cs
@@ -0,0 +1,169 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.X86;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="addend">The third tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" /> and length of <paramref name="addend" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="addend"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// <para>
+        /// Behaves the same as either <see cref="MultiplyAdd{T}(ReadOnlySpan{T}, ReadOnlySpan{T}, ReadOnlySpan{T}, Span{T})"/> or
+        /// <see cref="FusedMultiplyAdd{T}(ReadOnlySpan{T}, ReadOnlySpan{T}, ReadOnlySpan{T}, Span{T})"/> depending on the current machine's capabilities.
+        /// </para>
+        /// </remarks>
+        public static void MultiplyAddEstimate<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, ReadOnlySpan<T> addend, Span<T> destination)
+            where T : INumberBase<T> =>
+            InvokeSpanSpanSpanIntoSpan<T, MultiplyAddEstimateOperator<T>>(x, y, addend, destination);
+
+        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="addend">The third tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" /></c>.
+        /// It corresponds to the <c>axpy</c> method defined by <c>BLAS1</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// <para>
+        /// Behaves the same as either <see cref="MultiplyAdd{T}(ReadOnlySpan{T}, ReadOnlySpan{T}, T, Span{T})"/> or
+        /// <see cref="FusedMultiplyAdd{T}(ReadOnlySpan{T}, ReadOnlySpan{T}, T, Span{T})"/> depending on the current machine's capabilities.
+        /// </para>
+        /// </remarks>
+        public static void MultiplyAddEstimate<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, T addend, Span<T> destination)
+            where T : INumberBase<T> =>
+            InvokeSpanSpanScalarIntoSpan<T, MultiplyAddEstimateOperator<T>>(x, y, addend, destination);
+
+        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="addend">The third tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="addend" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="addend"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />) + <paramref name="addend" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// <para>
+        /// Behaves the same as either <see cref="MultiplyAdd{T}(ReadOnlySpan{T}, T, ReadOnlySpan{T}, Span{T})"/> or
+        /// <see cref="FusedMultiplyAdd{T}(ReadOnlySpan{T}, T, ReadOnlySpan{T}, Span{T})"/> depending on the current machine's capabilities.
+        /// </para>
+        /// </remarks>
+        public static void MultiplyAddEstimate<T>(ReadOnlySpan<T> x, T y, ReadOnlySpan<T> addend, Span<T> destination)
+            where T : INumberBase<T> =>
+            InvokeSpanScalarSpanIntoSpan<T, MultiplyAddEstimateOperator<T>>(x, y, addend, destination);
+
+        /// <summary>(x * y) + z</summary>
+        private readonly struct MultiplyAddEstimateOperator<T> : ITernaryOperator<T> where T : INumberBase<T>
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static T Invoke(T x, T y, T z)
+            {
+                // TODO https://github.com/dotnet/runtime/issues/98053: Use T.MultiplyAddEstimate when it's available.
+
+                if (Fma.IsSupported || AdvSimd.IsSupported)
+                {
+                    if (typeof(T) == typeof(Half))
+                    {
+                        Half result = Half.FusedMultiplyAdd(Unsafe.As<T, Half>(ref x), Unsafe.As<T, Half>(ref y), Unsafe.As<T, Half>(ref z));
+                        return Unsafe.As<Half, T>(ref result);
+                    }
+
+                    if (typeof(T) == typeof(float))
+                    {
+                        float result = float.FusedMultiplyAdd(Unsafe.As<T, float>(ref x), Unsafe.As<T, float>(ref y), Unsafe.As<T, float>(ref z));
+                        return Unsafe.As<float, T>(ref result);
+                    }
+
+                    if (typeof(T) == typeof(double))
+                    {
+                        double result = double.FusedMultiplyAdd(Unsafe.As<T, double>(ref x), Unsafe.As<T, double>(ref y), Unsafe.As<T, double>(ref z));
+                        return Unsafe.As<double, T>(ref result);
+                    }
+                }
+
+                return (x * y) + z;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z)
+            {
+                if (Fma.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return Fma.MultiplyAdd(x.AsSingle(), y.AsSingle(), z.AsSingle()).As<float, T>();
+                    if (typeof(T) == typeof(double)) return Fma.MultiplyAdd(x.AsDouble(), y.AsDouble(), z.AsDouble()).As<double, T>();
+                }
+
+                if (AdvSimd.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return AdvSimd.FusedMultiplyAdd(z.AsSingle(), x.AsSingle(), y.AsSingle()).As<float, T>();
+                }
+
+                if (AdvSimd.Arm64.IsSupported)
+                {
+                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.FusedMultiplyAdd(z.AsDouble(), x.AsDouble(), y.AsDouble()).As<double, T>();
+                }
+
+                return (x * y) + z;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> z)
+            {
+                if (Fma.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return Fma.MultiplyAdd(x.AsSingle(), y.AsSingle(), z.AsSingle()).As<float, T>();
+                    if (typeof(T) == typeof(double)) return Fma.MultiplyAdd(x.AsDouble(), y.AsDouble(), z.AsDouble()).As<double, T>();
+                }
+
+                return (x * y) + z;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z)
+            {
+                if (Avx512F.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return Avx512F.FusedMultiplyAdd(x.AsSingle(), y.AsSingle(), z.AsSingle()).As<float, T>();
+                    if (typeof(T) == typeof(double)) return Avx512F.FusedMultiplyAdd(x.AsDouble(), y.AsDouble(), z.AsDouble()).As<double, T>();
+                }
+
+                return (x * y) + z;
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Negate.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Negate.cs
new file mode 100644
index 000000000000..15e349475918
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Negate.cs
@@ -0,0 +1,37 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise negation of each number in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = -<paramref name="x" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If any of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Negate<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IUnaryNegationOperators<T, T> =>
+            InvokeSpanIntoSpan<T, NegateOperator<T>>(x, destination);
+
+        /// <summary>-x</summary>
+        internal readonly struct NegateOperator<T> : IUnaryOperator<T, T> where T : IUnaryNegationOperators<T, T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x) => -x;
+            public static Vector128<T> Invoke(Vector128<T> x) => -x;
+            public static Vector256<T> Invoke(Vector256<T> x) => -x;
+            public static Vector512<T> Invoke(Vector512<T> x) => -x;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Norm.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Norm.cs
new file mode 100644
index 000000000000..4f12b8338008
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Norm.cs
@@ -0,0 +1,29 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the Euclidean norm of the specified tensor of numbers.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <returns>The norm.</returns>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><typeparamref name="T"/>.Sqrt(TensorPrimitives.SumOfSquares(x))</c>.
+        /// This is often referred to as the Euclidean norm or L2 norm.
+        /// It corresponds to the <c>nrm2</c> method defined by <c>BLAS1</c>.
+        /// </para>
+        /// <para>
+        /// If any of the input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result value is also NaN.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T Norm<T>(ReadOnlySpan<T> x)
+            where T : IRootFunctions<T> =>
+            T.Sqrt(SumOfSquares(x));
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.OnesComplement.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.OnesComplement.cs
new file mode 100644
index 000000000000..fc9dea6c420b
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.OnesComplement.cs
@@ -0,0 +1,34 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise one's complement of numbers in the specified tensor.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = ~<paramref name="x" />[i]</c>.
+        /// </para>
+        /// </remarks>
+        public static void OnesComplement<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IBitwiseOperators<T, T, T> =>
+            InvokeSpanIntoSpan<T, OnesComplementOperator<T>>(x, destination);
+
+        /// <summary>~x</summary>
+        private readonly struct OnesComplementOperator<T> : IUnaryOperator<T, T> where T : IBitwiseOperators<T, T, T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x) => ~x;
+            public static Vector128<T> Invoke(Vector128<T> x) => ~x;
+            public static Vector256<T> Invoke(Vector256<T> x) => ~x;
+            public static Vector512<T> Invoke(Vector512<T> x) => ~x;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.PopCount.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.PopCount.cs
new file mode 100644
index 000000000000..8bc90f3c6968
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.PopCount.cs
@@ -0,0 +1,200 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.Wasm;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise population count of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.PopCount(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void PopCount<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IBinaryInteger<T> =>
+            InvokeSpanIntoSpan<T, PopCountOperator<T>>(x, destination);
+
+        /// <summary>T.PopCount(x)</summary>
+        private readonly unsafe struct PopCountOperator<T> : IUnaryOperator<T, T> where T : IBinaryInteger<T>
+        {
+            // TODO https://github.com/dotnet/runtime/issues/96162: Use AVX512 popcount operations when available
+
+            public static bool Vectorizable =>
+                // The implementation uses a vectorized version of the BitOperations.PopCount software fallback:
+                // https://github.com/dotnet/runtime/blob/aff061bab1b6d9ccd5731bd16fa8e89ad82ab75a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs#L496-L508
+                // This relies on 64-bit shifts for sizeof(T) == 8, and such shifts aren't accelerated on today's hardware.
+                // Alternative approaches, such as doing two 32-bit operations and combining them were observed to not
+                // provide any meaningfuls speedup over scalar. So for now, we don't vectorize when sizeof(T) == 8.
+                sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4;
+
+            public static T Invoke(T x) => T.PopCount(x);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (sizeof(T) == 1)
+                {
+                    if (AdvSimd.IsSupported)
+                    {
+                        return AdvSimd.PopCount(x.AsByte()).As<byte, T>();
+                    }
+
+                    if (PackedSimd.IsSupported)
+                    {
+                        return PackedSimd.PopCount(x.AsByte()).As<byte, T>();
+                    }
+
+                    Vector128<byte> c1 = Vector128.Create((byte)0x55);
+                    Vector128<byte> c2 = Vector128.Create((byte)0x33);
+                    Vector128<byte> c3 = Vector128.Create((byte)0x0F);
+
+                    // We don't have a per element shuffle for byte on some platforms.
+                    // However, we do currently always have a 16-bit shift available and
+                    // due to how the algorithm works, we don't need to worry about
+                    // any bits that shift into the lower 8-bits from the upper 8-bits.
+                    Vector128<byte> tmp = x.AsByte();
+                    tmp -= (x.AsUInt16() >> 1).AsByte() & c1;
+                    tmp = (tmp & c2) + ((tmp.AsUInt16() >> 2).AsByte() & c2);
+                    return ((tmp + (tmp.AsUInt16() >> 4).AsByte()) & c3).As<byte, T>();
+                }
+
+                if (sizeof(T) == 2)
+                {
+                    Vector128<ushort> c1 = Vector128.Create((ushort)0x5555);
+                    Vector128<ushort> c2 = Vector128.Create((ushort)0x3333);
+                    Vector128<ushort> c3 = Vector128.Create((ushort)0x0F0F);
+                    Vector128<ushort> c4 = Vector128.Create((ushort)0x0101);
+
+                    Vector128<ushort> tmp = x.AsUInt16();
+                    tmp -= (tmp >> 1) & c1;
+                    tmp = (tmp & c2) + ((tmp >> 2) & c2);
+                    tmp = (((tmp + (tmp >> 4)) & c3) * c4) >> 8;
+                    return tmp.As<ushort, T>();
+                }
+
+                Debug.Assert(sizeof(T) == 4);
+                {
+                    Vector128<uint> c1 = Vector128.Create(0x55555555u);
+                    Vector128<uint> c2 = Vector128.Create(0x33333333u);
+                    Vector128<uint> c3 = Vector128.Create(0x0F0F0F0Fu);
+                    Vector128<uint> c4 = Vector128.Create(0x01010101u);
+
+                    Vector128<uint> tmp = x.AsUInt32();
+                    tmp -= (tmp >> 1) & c1;
+                    tmp = (tmp & c2) + ((tmp >> 2) & c2);
+                    tmp = (((tmp + (tmp >> 4)) & c3) * c4) >> 24;
+                    return tmp.As<uint, T>();
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (sizeof(T) == 1)
+                {
+                    Vector256<byte> c1 = Vector256.Create((byte)0x55);
+                    Vector256<byte> c2 = Vector256.Create((byte)0x33);
+                    Vector256<byte> c3 = Vector256.Create((byte)0x0F);
+
+                    // We don't have a per element shuffle for byte on some platforms.
+                    // However, we do currently always have a 16-bit shift available and
+                    // due to how the algorithm works, we don't need to worry about
+                    // any bits that shift into the lower 8-bits from the upper 8-bits.
+                    Vector256<byte> tmp = x.AsByte();
+                    tmp -= (x.AsUInt16() >> 1).AsByte() & c1;
+                    tmp = (tmp & c2) + ((tmp.AsUInt16() >> 2).AsByte() & c2);
+                    return ((tmp + (tmp.AsUInt16() >> 4).AsByte()) & c3).As<byte, T>();
+                }
+
+                if (sizeof(T) == 2)
+                {
+                    Vector256<ushort> c1 = Vector256.Create((ushort)0x5555);
+                    Vector256<ushort> c2 = Vector256.Create((ushort)0x3333);
+                    Vector256<ushort> c3 = Vector256.Create((ushort)0x0F0F);
+                    Vector256<ushort> c4 = Vector256.Create((ushort)0x0101);
+
+                    Vector256<ushort> tmp = x.AsUInt16();
+                    tmp -= (tmp >> 1) & c1;
+                    tmp = (tmp & c2) + ((tmp >> 2) & c2);
+                    tmp = (((tmp + (tmp >> 4)) & c3) * c4) >> 8;
+                    return tmp.As<ushort, T>();
+                }
+
+                Debug.Assert(sizeof(T) == 4);
+                {
+                    Vector256<uint> c1 = Vector256.Create(0x55555555u);
+                    Vector256<uint> c2 = Vector256.Create(0x33333333u);
+                    Vector256<uint> c3 = Vector256.Create(0x0F0F0F0Fu);
+                    Vector256<uint> c4 = Vector256.Create(0x01010101u);
+
+                    Vector256<uint> tmp = x.AsUInt32();
+                    tmp -= (tmp >> 1) & c1;
+                    tmp = (tmp & c2) + ((tmp >> 2) & c2);
+                    tmp = (((tmp + (tmp >> 4)) & c3) * c4) >> 24;
+                    return tmp.As<uint, T>();
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (sizeof(T) == 1)
+                {
+                    Vector512<byte> c1 = Vector512.Create((byte)0x55);
+                    Vector512<byte> c2 = Vector512.Create((byte)0x33);
+                    Vector512<byte> c3 = Vector512.Create((byte)0x0F);
+
+                    // We don't have a per element shuffle for byte on some platforms.
+                    // However, we do currently always have a 16-bit shift available and
+                    // due to how the algorithm works, we don't need to worry about
+                    // any bits that shift into the lower 8-bits from the upper 8-bits.
+                    Vector512<byte> tmp = x.AsByte();
+                    tmp -= (x.AsUInt16() >> 1).AsByte() & c1;
+                    tmp = (tmp & c2) + ((tmp.AsUInt16() >> 2).AsByte() & c2);
+                    return ((tmp + (tmp.AsUInt16() >> 4).AsByte()) & c3).As<byte, T>();
+                }
+
+                if (sizeof(T) == 2)
+                {
+                    Vector512<ushort> c1 = Vector512.Create((ushort)0x5555);
+                    Vector512<ushort> c2 = Vector512.Create((ushort)0x3333);
+                    Vector512<ushort> c3 = Vector512.Create((ushort)0x0F0F);
+                    Vector512<ushort> c4 = Vector512.Create((ushort)0x0101);
+
+                    Vector512<ushort> tmp = x.AsUInt16();
+                    tmp -= (tmp >> 1) & c1;
+                    tmp = (tmp & c2) + ((tmp >> 2) & c2);
+                    tmp = (((tmp + (tmp >> 4)) & c3) * c4) >> 8;
+                    return tmp.As<ushort, T>();
+                }
+
+                Debug.Assert(sizeof(T) == 4);
+                {
+                    Vector512<uint> c1 = Vector512.Create(0x55555555u);
+                    Vector512<uint> c2 = Vector512.Create(0x33333333u);
+                    Vector512<uint> c3 = Vector512.Create(0x0F0F0F0Fu);
+                    Vector512<uint> c4 = Vector512.Create(0x01010101u);
+
+                    Vector512<uint> tmp = x.AsUInt32();
+                    tmp -= (tmp >> 1) & c1;
+                    tmp = (tmp & c2) + ((tmp >> 2) & c2);
+                    tmp = (((tmp + (tmp >> 4)) & c3) * c4) >> 24;
+                    return tmp.As<uint, T>();
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Pow.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Pow.cs
new file mode 100644
index 000000000000..72d35ed5be77
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Pow.cs
@@ -0,0 +1,106 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise power of a number in a specified tensor raised to a number in another specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Pow(<paramref name="x" />[i], <paramref name="y" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void Pow<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : IPowerFunctions<T> =>
+            InvokeSpanSpanIntoSpan<T, PowOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise power of a number in a specified tensor raised to a number in another specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Pow(<paramref name="x" />[i], <paramref name="y" />)</c>.
+        /// </para>
+        /// </remarks>
+        public static void Pow<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : IPowerFunctions<T> =>
+            InvokeSpanScalarIntoSpan<T, PowOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise power of a number in a specified tensor raised to a number in another specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a scalar.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Pow(<paramref name="x" />, <paramref name="y" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void Pow<T>(T x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : IPowerFunctions<T> =>
+            InvokeScalarSpanIntoSpan<T, PowOperator<T>>(x, y, destination);
+
+        /// <summary>T.Pow(x, y)</summary>
+        private readonly struct PowOperator<T> : IBinaryOperator<T>
+            where T : IPowerFunctions<T>
+        {
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x, T y) => T.Pow(x, y);
+
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return ExpOperator<float>.Invoke(y.AsSingle() * LogOperator<float>.Invoke(x.AsSingle())).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return ExpOperator<double>.Invoke(y.AsDouble() * LogOperator<double>.Invoke(x.AsDouble())).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return ExpOperator<float>.Invoke(y.AsSingle() * LogOperator<float>.Invoke(x.AsSingle())).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return ExpOperator<double>.Invoke(y.AsDouble() * LogOperator<double>.Invoke(x.AsDouble())).As<double, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return ExpOperator<float>.Invoke(y.AsSingle() * LogOperator<float>.Invoke(x.AsSingle())).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return ExpOperator<double>.Invoke(y.AsDouble() * LogOperator<double>.Invoke(x.AsDouble())).As<double, T>();
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Product.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Product.cs
new file mode 100644
index 000000000000..a43d2c45c387
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Product.cs
@@ -0,0 +1,96 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the product of all elements in the specified non-empty tensor of numbers.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <returns>The result of multiplying all elements in <paramref name="x"/>.</returns>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be greater than zero.</exception>
+        /// <remarks>
+        /// <para>
+        /// If any of the input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result value is also NaN.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T Product<T>(ReadOnlySpan<T> x)
+            where T : IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T>
+        {
+            if (x.IsEmpty)
+            {
+                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
+            }
+
+            return Aggregate<T, IdentityOperator<T>, MultiplyOperator<T>>(x);
+        }
+
+        /// <summary>Computes the product of the element-wise differences of the numbers in the specified non-empty tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <returns>The result of multiplying the element-wise subtraction of the elements in the second tensor from the first tensor.</returns>
+        /// <exception cref="ArgumentException">Length of both input spans must be greater than zero.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="y"/> must have the same length.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes:
+        /// <c>
+        ///     Span&lt;T&gt; differences = ...;
+        ///     TensorPrimitives.Subtract(x, y, differences);
+        ///     T result = TensorPrimitives.Product(differences);
+        /// </c>
+        /// but without requiring additional temporary storage for the intermediate differences.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T ProductOfDifferences<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y)
+            where T : ISubtractionOperators<T, T, T>, IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T>
+        {
+            if (x.IsEmpty)
+            {
+                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
+            }
+
+            return Aggregate<T, SubtractOperator<T>, MultiplyOperator<T>>(x, y);
+        }
+
+        /// <summary>Computes the product of the element-wise sums of the numbers in the specified non-empty tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <returns>The result of multiplying the element-wise additions of the elements in each tensor.</returns>
+        /// <exception cref="ArgumentException">Length of both input spans must be greater than zero.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="y"/> must have the same length.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes:
+        /// <c>
+        ///     Span&lt;T&gt; sums = ...;
+        ///     TensorPrimitives.Add(x, y, sums);
+        ///     T result = TensorPrimitives.Product(sums);
+        /// </c>
+        /// but without requiring additional temporary storage for the intermediate sums.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T ProductOfSums<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y)
+            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T>, IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T>
+        {
+            if (x.IsEmpty)
+            {
+                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
+            }
+
+            return Aggregate<T, AddOperator<T>, MultiplyOperator<T>>(x, y);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.RadiansToDegrees.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.RadiansToDegrees.cs
new file mode 100644
index 000000000000..53298f5cf3c0
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.RadiansToDegrees.cs
@@ -0,0 +1,34 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise conversion of each number of radians in the specified tensor to degrees.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.RadiansToDegrees(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void RadiansToDegrees<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, RadiansToDegreesOperator<T>>(x, destination);
+
+        /// <summary>T.RadiansToDegrees(x)</summary>
+        private readonly struct RadiansToDegreesOperator<T> : IUnaryOperator<T, T> where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x) => T.RadiansToDegrees(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => (x * T.CreateChecked(180)) / T.Pi;
+            public static Vector256<T> Invoke(Vector256<T> x) => (x * T.CreateChecked(180)) / T.Pi;
+            public static Vector512<T> Invoke(Vector512<T> x) => (x * T.CreateChecked(180)) / T.Pi;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Reciprocal.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Reciprocal.cs
new file mode 100644
index 000000000000..50ef635a21ad
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Reciprocal.cs
@@ -0,0 +1,186 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.X86;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise reciprocal of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and an element in <paramref name="x"/> is equal to zero.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = 1 / <paramref name="x" />[i]</c>.
+        /// </para>
+        /// </remarks>
+        public static void Reciprocal<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IFloatingPoint<T> =>
+            InvokeSpanIntoSpan<T, ReciprocalOperator<T>>(x, destination);
+
+        /// <summary>Computes the element-wise reciprocal of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and an element in <paramref name="x"/> is equal to zero.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = 1 / <paramref name="x" />[i]</c>.
+        /// </para>
+        /// </remarks>
+        public static void ReciprocalEstimate<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanIntoSpan<T, ReciprocalEstimateOperator<T>>(x, destination);
+
+        /// <summary>Computes the element-wise reciprocal of the square root of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and an element in <paramref name="x"/> is equal to zero.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = 1 / <paramref name="x" />[i]</c>.
+        /// </para>
+        /// </remarks>
+        public static void ReciprocalSqrt<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanIntoSpan<T, ReciprocalSqrtOperator<T>>(x, destination);
+
+        /// <summary>Computes the element-wise reciprocal of the square root of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and an element in <paramref name="x"/> is equal to zero.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = 1 / <paramref name="x" />[i]</c>.
+        /// </para>
+        /// </remarks>
+        public static void ReciprocalSqrtEstimate<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanIntoSpan<T, ReciprocalSqrtEstimateOperator<T>>(x, destination);
+
+        private readonly struct ReciprocalOperator<T> : IUnaryOperator<T, T> where T : IFloatingPoint<T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x) => T.One / x;
+            public static Vector128<T> Invoke(Vector128<T> x) => Vector128<T>.One / x;
+            public static Vector256<T> Invoke(Vector256<T> x) => Vector256<T>.One / x;
+            public static Vector512<T> Invoke(Vector512<T> x) => Vector512<T>.One / x;
+        }
+
+        private readonly struct ReciprocalSqrtOperator<T> : IUnaryOperator<T, T> where T : IFloatingPointIeee754<T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x) => T.One / T.Sqrt(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => Vector128<T>.One / Vector128.Sqrt(x);
+            public static Vector256<T> Invoke(Vector256<T> x) => Vector256<T>.One / Vector256.Sqrt(x);
+            public static Vector512<T> Invoke(Vector512<T> x) => Vector512<T>.One / Vector512.Sqrt(x);
+        }
+
+        private readonly struct ReciprocalEstimateOperator<T> : IUnaryOperator<T, T> where T : IFloatingPointIeee754<T>
+        {
+            public static bool Vectorizable => true;
+
+            public static T Invoke(T x) => T.ReciprocalEstimate(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (Sse.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return Sse.Reciprocal(x.AsSingle()).As<float, T>();
+                }
+
+                if (AdvSimd.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return AdvSimd.ReciprocalEstimate(x.AsSingle()).As<float, T>();
+                }
+
+                if (AdvSimd.Arm64.IsSupported)
+                {
+                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.ReciprocalEstimate(x.AsDouble()).As<double, T>();
+                }
+
+                return Vector128<T>.One / x;
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (Avx.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return Avx.Reciprocal(x.AsSingle()).As<float, T>();
+                }
+
+                return Vector256<T>.One / x;
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (Avx512F.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return Avx512F.Reciprocal14(x.AsSingle()).As<float, T>();
+                    if (typeof(T) == typeof(double)) return Avx512F.Reciprocal14(x.AsDouble()).As<double, T>();
+                }
+
+                return Vector512<T>.One / x;
+            }
+        }
+
+        private readonly struct ReciprocalSqrtEstimateOperator<T> : IUnaryOperator<T, T> where T : IFloatingPointIeee754<T>
+        {
+            public static bool Vectorizable => true;
+
+            public static T Invoke(T x) => T.ReciprocalSqrtEstimate(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (Sse.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return Sse.ReciprocalSqrt(x.AsSingle()).As<float, T>();
+                }
+
+                if (AdvSimd.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return AdvSimd.ReciprocalSquareRootEstimate(x.AsSingle()).As<float, T>();
+                }
+
+                if (AdvSimd.Arm64.IsSupported)
+                {
+                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.ReciprocalSquareRootEstimate(x.AsDouble()).As<double, T>();
+                }
+
+                return Vector128<T>.One / Vector128.Sqrt(x);
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (Avx.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return Avx.ReciprocalSqrt(x.AsSingle()).As<float, T>();
+                }
+
+                return Vector256<T>.One / Vector256.Sqrt(x);
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (Avx512F.IsSupported)
+                {
+                    if (typeof(T) == typeof(float)) return Avx512F.ReciprocalSqrt14(x.AsSingle()).As<float, T>();
+                    if (typeof(T) == typeof(double)) return Avx512F.ReciprocalSqrt14(x.AsDouble()).As<double, T>();
+                }
+
+                return Vector512<T>.One / Vector512.Sqrt(x);
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.RootN.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.RootN.cs
new file mode 100644
index 000000000000..e7c394892950
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.RootN.cs
@@ -0,0 +1,75 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise n-th root of the values in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <param name="n">The degree of the root to be computed, represented as a scalar.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.RootN(<paramref name="x" />[i], <paramref name="n"/>)</c>.
+        /// </para>
+        /// </remarks>
+        public static void RootN<T>(ReadOnlySpan<T> x, int n, Span<T> destination)
+            where T : IRootFunctions<T> =>
+            InvokeSpanIntoSpan(x, new RootNOperator<T>(n), destination);
+
+        /// <summary>T.RootN(x, n)</summary>
+        private readonly struct RootNOperator<T>(int n) : IStatefulUnaryOperator<T> where T : IRootFunctions<T>
+        {
+            private readonly int _n = n;
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public T Invoke(T x) => T.RootN(x, _n);
+
+            public Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return ExpOperator<float>.Invoke(LogOperator<float>.Invoke(x.AsSingle()) / Vector128.Create((float)_n)).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return ExpOperator<double>.Invoke(LogOperator<double>.Invoke(x.AsDouble()) / Vector128.Create((double)_n)).As<double, T>();
+                }
+            }
+
+            public Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return ExpOperator<float>.Invoke(LogOperator<float>.Invoke(x.AsSingle()) / Vector256.Create((float)_n)).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return ExpOperator<double>.Invoke(LogOperator<double>.Invoke(x.AsDouble()) / Vector256.Create((double)_n)).As<double, T>();
+                }
+            }
+
+            public Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return ExpOperator<float>.Invoke(LogOperator<float>.Invoke(x.AsSingle()) / Vector512.Create((float)_n)).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return ExpOperator<double>.Invoke(LogOperator<double>.Invoke(x.AsDouble()) / Vector512.Create((double)_n)).As<double, T>();
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Rotate.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Rotate.cs
new file mode 100644
index 000000000000..e22ee978c3a6
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Rotate.cs
@@ -0,0 +1,68 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise rotation left of numbers in the specified tensor by the specified rotation amount.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <param name="rotateAmount">The number of bits to rotate, represented as a scalar.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.RotateLeft(<paramref name="x" />[i], <paramref name="rotateAmount"/>)</c>.
+        /// </para>
+        /// </remarks>
+        public static void RotateLeft<T>(ReadOnlySpan<T> x, int rotateAmount, Span<T> destination)
+            where T : IBinaryInteger<T> =>
+            InvokeSpanIntoSpan(x, new RotateLeftOperator<T>(rotateAmount), destination);
+
+        /// <summary>Computes the element-wise rotation right of numbers in the specified tensor by the specified rotation amount.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <param name="rotateAmount">The number of bits to rotate, represented as a scalar.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.RotateRight(<paramref name="x" />[i], <paramref name="rotateAmount"/>)</c>.
+        /// </para>
+        /// </remarks>
+        public static void RotateRight<T>(ReadOnlySpan<T> x, int rotateAmount, Span<T> destination)
+            where T : IBinaryInteger<T> =>
+            InvokeSpanIntoSpan(x, new RotateRightOperator<T>(rotateAmount), destination);
+
+        /// <summary>T.RotateLeft(amount)</summary>
+        private readonly unsafe struct RotateLeftOperator<T>(int amount) : IStatefulUnaryOperator<T> where T : IBinaryInteger<T>
+        {
+            private readonly int _amount = amount;
+
+            public static bool Vectorizable => true;
+
+            public T Invoke(T x) => T.RotateLeft(x, _amount);
+            public Vector128<T> Invoke(Vector128<T> x) => (x << _amount) | (x >>> ((sizeof(T) * 8) - _amount));
+            public Vector256<T> Invoke(Vector256<T> x) => (x << _amount) | (x >>> ((sizeof(T) * 8) - _amount));
+            public Vector512<T> Invoke(Vector512<T> x) => (x << _amount) | (x >>> ((sizeof(T) * 8) - _amount));
+        }
+
+        /// <summary>T.RotateRight(amount)</summary>
+        private readonly unsafe struct RotateRightOperator<T>(int amount) : IStatefulUnaryOperator<T> where T : IBinaryInteger<T>
+        {
+            private readonly int _amount = amount;
+
+            public static bool Vectorizable => true;
+
+            public T Invoke(T x) => T.RotateRight(x, _amount);
+            public Vector128<T> Invoke(Vector128<T> x) => (x >>> _amount) | (x << ((sizeof(T) * 8) - _amount));
+            public Vector256<T> Invoke(Vector256<T> x) => (x >>> _amount) | (x << ((sizeof(T) * 8) - _amount));
+            public Vector512<T> Invoke(Vector512<T> x) => (x >>> _amount) | (x << ((sizeof(T) * 8) - _amount));
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Round.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Round.cs
new file mode 100644
index 000000000000..83cd73d18443
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Round.cs
@@ -0,0 +1,323 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise rounding of the numbers in the specified tensor</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Round(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void Round<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IFloatingPoint<T> =>
+            InvokeSpanIntoSpan<T, RoundToEvenOperator<T>>(x, destination);
+
+        /// <summary>Computes the element-wise rounding of the numbers in the specified tensor</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="mode">The mode under which <paramref name="x" /> should be rounded.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Round(<paramref name="x" />[i], <paramref name="mode"/>)</c>.
+        /// </para>
+        /// </remarks>
+        public static void Round<T>(ReadOnlySpan<T> x, MidpointRounding mode, Span<T> destination)
+            where T : IFloatingPoint<T>
+        {
+            switch (mode)
+            {
+                case MidpointRounding.ToEven:
+                    Round(x, destination);
+                    return;
+
+                case MidpointRounding.AwayFromZero:
+                    InvokeSpanIntoSpan<T, RoundAwayFromZeroOperator<T>>(x, destination);
+                    break;
+
+                case MidpointRounding.ToZero:
+                    Truncate(x, destination);
+                    return;
+
+                case MidpointRounding.ToNegativeInfinity:
+                    Floor(x, destination);
+                    return;
+
+                case MidpointRounding.ToPositiveInfinity:
+                    Ceiling(x, destination);
+                    return;
+
+                default:
+                    throw new ArgumentException(SR.Format(SR.Argument_InvalidEnumValue, mode, typeof(MidpointRounding)), nameof(mode));
+            }
+        }
+
+        /// <summary>Computes the element-wise rounding of the numbers in the specified tensor</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="digits">The number of fractional digits to which the numbers in <paramref name="x" /> should be rounded.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Round(<paramref name="x" />[i], <paramref name="digits"/>)</c>.
+        /// </para>
+        /// </remarks>
+        public static void Round<T>(ReadOnlySpan<T> x, int digits, Span<T> destination) where T : IFloatingPoint<T> =>
+            Round(x, digits, MidpointRounding.ToEven, destination);
+
+        /// <summary>Computes the element-wise rounding of the numbers in the specified tensor</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="digits">The number of fractional digits to which the numbers in <paramref name="x" /> should be rounded.</param>
+        /// <param name="mode">The mode under which <paramref name="x" /> should be rounded.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentOutOfRangeException"><paramref name="digits"/> is invalid.</exception>
+        /// <exception cref="ArgumentException"><paramref name="mode"/> is invalid.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Round(<paramref name="x" />[i], <paramref name="digits"/>, <paramref name="mode"/>)</c>.
+        /// </para>
+        /// </remarks>
+        public static void Round<T>(ReadOnlySpan<T> x, int digits, MidpointRounding mode, Span<T> destination)
+            where T : IFloatingPoint<T>
+        {
+            if (digits == 0)
+            {
+                Round(x, mode, destination);
+            }
+
+            ReadOnlySpan<T> roundPower10;
+            if (typeof(T) == typeof(float))
+            {
+                ReadOnlySpan<float> roundPower10Single = [1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, 1e6f];
+                roundPower10 = Rename<float, T>(roundPower10Single);
+            }
+            else if (typeof(T) == typeof(double))
+            {
+                Debug.Assert(typeof(T) == typeof(double));
+                ReadOnlySpan<double> roundPower10Double = [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15];
+                roundPower10 = Rename<double, T>(roundPower10Double);
+            }
+            else
+            {
+                if ((uint)mode > (uint)MidpointRounding.ToPositiveInfinity)
+                {
+                    throw new ArgumentException(SR.Format(SR.Argument_InvalidEnumValue, mode, typeof(MidpointRounding)), nameof(mode));
+                }
+
+                InvokeSpanIntoSpan(x, new RoundFallbackOperator<T>(digits, mode), destination);
+                return;
+            }
+
+            if ((uint)digits >= (uint)roundPower10.Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(digits));
+            }
+
+            T power10 = roundPower10[digits];
+            switch (mode)
+            {
+                case MidpointRounding.ToEven:
+                    InvokeSpanIntoSpan(x, new MultiplyRoundDivideOperator<T, RoundToEvenOperator<T>>(power10), destination);
+                    return;
+
+                case MidpointRounding.AwayFromZero:
+                    InvokeSpanIntoSpan(x, new MultiplyRoundDivideOperator<T, RoundAwayFromZeroOperator<T>>(power10), destination);
+                    break;
+
+                case MidpointRounding.ToZero:
+                    InvokeSpanIntoSpan(x, new MultiplyRoundDivideOperator<T, TruncateOperator<T>>(power10), destination);
+                    return;
+
+                case MidpointRounding.ToNegativeInfinity:
+                    InvokeSpanIntoSpan(x, new MultiplyRoundDivideOperator<T, FloorOperator<T>>(power10), destination);
+                    return;
+
+                case MidpointRounding.ToPositiveInfinity:
+                    InvokeSpanIntoSpan(x, new MultiplyRoundDivideOperator<T, CeilingOperator<T>>(power10), destination);
+                    return;
+
+                default:
+                    throw new ArgumentException(SR.Format(SR.Argument_InvalidEnumValue, mode, typeof(MidpointRounding)), nameof(mode));
+            }
+        }
+
+        /// <summary>T.Round(x)</summary>
+        private readonly struct RoundToEvenOperator<T> : IUnaryOperator<T, T> where T : IFloatingPoint<T>
+        {
+            // This code is based on `nearbyint` from amd/aocl-libm-ose
+            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Round(x);
+
+            private const float SingleBoundary = 8388608.0f; // 2^23
+            private const double DoubleBoundary = 4503599627370496.0; // 2^52
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                Vector128<T> boundary = Vector128.Create(typeof(T) == typeof(float) ? T.CreateTruncating(SingleBoundary) : T.CreateTruncating(DoubleBoundary));
+                Vector128<T> temp = CopySignOperator<T>.Invoke(boundary, x);
+                return Vector128.ConditionalSelect(Vector128.GreaterThan(Vector128.Abs(x), boundary), x, CopySignOperator<T>.Invoke((x + temp) - temp, x));
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                Vector256<T> boundary = Vector256.Create(typeof(T) == typeof(float) ? T.CreateTruncating(SingleBoundary) : T.CreateTruncating(DoubleBoundary));
+                Vector256<T> temp = CopySignOperator<T>.Invoke(boundary, x);
+                return Vector256.ConditionalSelect(Vector256.GreaterThan(Vector256.Abs(x), boundary), x, CopySignOperator<T>.Invoke((x + temp) - temp, x));
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                Vector512<T> boundary = Vector512.Create(typeof(T) == typeof(float) ? T.CreateTruncating(SingleBoundary) : T.CreateTruncating(DoubleBoundary));
+                Vector512<T> temp = CopySignOperator<T>.Invoke(boundary, x);
+                return Vector512.ConditionalSelect(Vector512.GreaterThan(Vector512.Abs(x), boundary), x, CopySignOperator<T>.Invoke((x + temp) - temp, x));
+            }
+        }
+
+        /// <summary>T.Round(x, MidpointRounding.AwayFromZero)</summary>
+        private readonly struct RoundAwayFromZeroOperator<T> : IUnaryOperator<T, T> where T : IFloatingPoint<T>
+        {
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Round(x, MidpointRounding.AwayFromZero);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    if (AdvSimd.IsSupported)
+                    {
+                        return AdvSimd.RoundAwayFromZero(x.AsSingle()).As<float, T>();
+                    }
+
+                    return TruncateOperator<float>.Invoke(x.AsSingle() + CopySignOperator<float>.Invoke(Vector128.Create(0.49999997f), x.AsSingle())).As<float, T>();
+                }
+                else
+                {
+                    if (AdvSimd.Arm64.IsSupported)
+                    {
+                        return AdvSimd.Arm64.RoundAwayFromZero(x.AsDouble()).As<double, T>();
+                    }
+
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return TruncateOperator<double>.Invoke(x.AsDouble() + CopySignOperator<double>.Invoke(Vector128.Create(0.49999999999999994), x.AsDouble())).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return TruncateOperator<float>.Invoke(x.AsSingle() + CopySignOperator<float>.Invoke(Vector256.Create(0.49999997f), x.AsSingle())).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return TruncateOperator<double>.Invoke(x.AsDouble() + CopySignOperator<double>.Invoke(Vector256.Create(0.49999999999999994), x.AsDouble())).As<double, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return TruncateOperator<float>.Invoke(x.AsSingle() + CopySignOperator<float>.Invoke(Vector512.Create(0.49999997f), x.AsSingle())).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return TruncateOperator<double>.Invoke(x.AsDouble() + CopySignOperator<double>.Invoke(Vector512.Create(0.49999999999999994), x.AsDouble())).As<double, T>();
+                }
+            }
+        }
+
+        /// <summary>(T.Round(x * power10, digits, mode)) / power10</summary>
+        private readonly struct MultiplyRoundDivideOperator<T, TDelegatedRound> : IStatefulUnaryOperator<T>
+            where T : IFloatingPoint<T>
+            where TDelegatedRound : IUnaryOperator<T, T>
+        {
+            private readonly T _factor;
+
+            public MultiplyRoundDivideOperator(T factor)
+            {
+                Debug.Assert(typeof(T) == typeof(float) || typeof(T) == typeof(double));
+                _factor = factor;
+            }
+
+            public static bool Vectorizable => true;
+
+            private const float Single_RoundLimit = 1e8f;
+            private const double Double_RoundLimit = 1e16d;
+
+            public T Invoke(T x)
+            {
+                T limit = typeof(T) == typeof(float) ? T.CreateTruncating(Single_RoundLimit) : T.CreateTruncating(Double_RoundLimit);
+                return T.Abs(x) < limit ?
+                    TDelegatedRound.Invoke(x * _factor) / _factor :
+                    x;
+            }
+
+            public Vector128<T> Invoke(Vector128<T> x)
+            {
+                Vector128<T> limit = Vector128.Create(typeof(T) == typeof(float) ? T.CreateTruncating(Single_RoundLimit) : T.CreateTruncating(Double_RoundLimit));
+                return Vector128.ConditionalSelect(Vector128.LessThan(Vector128.Abs(x), limit),
+                    TDelegatedRound.Invoke(x * _factor) / _factor,
+                    x);
+            }
+
+            public Vector256<T> Invoke(Vector256<T> x)
+            {
+                Vector256<T> limit = Vector256.Create(typeof(T) == typeof(float) ? T.CreateTruncating(Single_RoundLimit) : T.CreateTruncating(Double_RoundLimit));
+                return Vector256.ConditionalSelect(Vector256.LessThan(Vector256.Abs(x), limit),
+                    TDelegatedRound.Invoke(x * _factor) / _factor,
+                    x);
+            }
+
+            public Vector512<T> Invoke(Vector512<T> x)
+            {
+                Vector512<T> limit = Vector512.Create(typeof(T) == typeof(float) ? T.CreateTruncating(Single_RoundLimit) : T.CreateTruncating(Double_RoundLimit));
+                return Vector512.ConditionalSelect(Vector512.LessThan(Vector512.Abs(x), limit),
+                    TDelegatedRound.Invoke(x * _factor) / _factor,
+                    x);
+            }
+        }
+
+        /// <summary>T.Round(x, digits, mode)</summary>
+        private readonly struct RoundFallbackOperator<T>(int digits, MidpointRounding mode) : IStatefulUnaryOperator<T>
+            where T : IFloatingPoint<T>
+        {
+            private readonly int _digits = digits;
+            private readonly MidpointRounding _mode = mode;
+
+            public static bool Vectorizable => false;
+
+            public T Invoke(T x) => T.Round(x, _digits, _mode);
+
+            public Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ScaleB.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ScaleB.cs
new file mode 100644
index 000000000000..34c84782d40a
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ScaleB.cs
@@ -0,0 +1,39 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise product of numbers in the specified tensor and their base-radix raised to the specified power.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="n">The value to which base-radix is raised before multipliying x, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.ILogB(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void ScaleB<T>(ReadOnlySpan<T> x, int n, Span<T> destination)
+            where T : IFloatingPointIeee754<T> =>
+            InvokeSpanIntoSpan(x, new ScaleBOperator<T>(n), destination);
+
+        /// <summary>T.ScaleB(x, n)</summary>
+        private readonly struct ScaleBOperator<T>(int n) : IStatefulUnaryOperator<T> where T : IFloatingPointIeee754<T>
+        {
+            private readonly int _n = n;
+            private readonly T _pow2n = typeof(T) == typeof(float) || typeof(T) == typeof(double) ? T.Pow(T.CreateTruncating(2), T.CreateTruncating(n)) : default!;
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public T Invoke(T x) => T.ScaleB(x, _n);
+            public Vector128<T> Invoke(Vector128<T> x) => x * Vector128.Create(_pow2n);
+            public Vector256<T> Invoke(Vector256<T> x) => x * Vector256.Create(_pow2n);
+            public Vector512<T> Invoke(Vector512<T> x) => x * Vector512.Create(_pow2n);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ShiftLeft.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ShiftLeft.cs
new file mode 100644
index 000000000000..22d5ea4c2686
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.ShiftLeft.cs
@@ -0,0 +1,94 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise shifting left of numbers in the specified tensor by the specified shift amount.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <param name="shiftAmount">The number of bits to shift, represented as a scalar.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] &lt;&lt; <paramref name="shiftAmount"/></c>.
+        /// </para>
+        /// </remarks>
+        public static void ShiftLeft<T>(ReadOnlySpan<T> x, int shiftAmount, Span<T> destination)
+            where T : IShiftOperators<T, int, T> =>
+            InvokeSpanIntoSpan(x, new ShiftLeftOperator<T>(shiftAmount), destination);
+
+        /// <summary>Computes the element-wise arithmetic (signed) shifting right of numbers in the specified tensor by the specified shift amount.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <param name="shiftAmount">The number of bits to shift, represented as a scalar.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] &gt;&gt; <paramref name="shiftAmount"/></c>.
+        /// </para>
+        /// </remarks>
+        public static void ShiftRightArithmetic<T>(ReadOnlySpan<T> x, int shiftAmount, Span<T> destination)
+            where T : IShiftOperators<T, int, T> =>
+            InvokeSpanIntoSpan(x, new ShiftRightArithmeticOperator<T>(shiftAmount), destination);
+
+        /// <summary>Computes the element-wise logical (unsigned) shifting right of numbers in the specified tensor by the specified shift amount.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <param name="shiftAmount">The number of bits to shift, represented as a scalar.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] &gt;&gt;&gt; <paramref name="shiftAmount"/></c>.
+        /// </para>
+        /// </remarks>
+        public static void ShiftRightLogical<T>(ReadOnlySpan<T> x, int shiftAmount, Span<T> destination)
+            where T : IShiftOperators<T, int, T> =>
+            InvokeSpanIntoSpan(x, new ShiftRightLogicalOperator<T>(shiftAmount), destination);
+
+        /// <summary>T &lt;&lt; amount</summary>
+        private readonly struct ShiftLeftOperator<T>(int amount) : IStatefulUnaryOperator<T> where T : IShiftOperators<T, int, T>
+        {
+            private readonly int _amount = amount;
+
+            public static bool Vectorizable => true;
+
+            public T Invoke(T x) => x << _amount;
+            public Vector128<T> Invoke(Vector128<T> x) => x << _amount;
+            public Vector256<T> Invoke(Vector256<T> x) => x << _amount;
+            public Vector512<T> Invoke(Vector512<T> x) => x << _amount;
+        }
+
+        /// <summary>T &gt;&gt; amount</summary>
+        private readonly struct ShiftRightArithmeticOperator<T>(int amount) : IStatefulUnaryOperator<T> where T : IShiftOperators<T, int, T>
+        {
+            private readonly int _amount = amount;
+
+            public static bool Vectorizable => true;
+
+            public T Invoke(T x) => x >> _amount;
+            public Vector128<T> Invoke(Vector128<T> x) => x >> _amount;
+            public Vector256<T> Invoke(Vector256<T> x) => x >> _amount;
+            public Vector512<T> Invoke(Vector512<T> x) => x >> _amount;
+        }
+
+        /// <summary>T &gt;&gt;&gt; amount</summary>
+        private readonly struct ShiftRightLogicalOperator<T>(int amount) : IStatefulUnaryOperator<T> where T : IShiftOperators<T, int, T>
+        {
+            private readonly int _amount = amount;
+
+            public static bool Vectorizable => true;
+
+            public T Invoke(T x) => x >>> _amount;
+            public Vector128<T> Invoke(Vector128<T> x) => x >>> _amount;
+            public Vector256<T> Invoke(Vector256<T> x) => x >>> _amount;
+            public Vector512<T> Invoke(Vector512<T> x) => x >>> _amount;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sigmoid.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sigmoid.cs
new file mode 100644
index 000000000000..2fdcf9f11e3e
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sigmoid.cs
@@ -0,0 +1,46 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise sigmoid function on the specified non-empty tensor of numbers.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x" /> must not be empty.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = 1f / (1f + <typeparamref name="T"/>.Exp(-<paramref name="x" />[i]))</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Sigmoid<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IExponentialFunctions<T>
+        {
+            if (x.IsEmpty)
+            {
+                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
+            }
+
+            InvokeSpanIntoSpan<T, SigmoidOperator<T>>(x, destination);
+        }
+
+        /// <summary>1 / (1 + T.Exp(-x))</summary>
+        internal readonly struct SigmoidOperator<T> : IUnaryOperator<T, T> where T : IExponentialFunctions<T>
+        {
+            public static bool Vectorizable => ExpOperator<T>.Vectorizable;
+            public static T Invoke(T x) => T.One / (T.One + T.Exp(-x));
+            public static Vector128<T> Invoke(Vector128<T> x) => Vector128.Create(T.One) / (Vector128.Create(T.One) + ExpOperator<T>.Invoke(-x));
+            public static Vector256<T> Invoke(Vector256<T> x) => Vector256.Create(T.One) / (Vector256.Create(T.One) + ExpOperator<T>.Invoke(-x));
+            public static Vector512<T> Invoke(Vector512<T> x) => Vector512.Create(T.One) / (Vector512.Create(T.One) + ExpOperator<T>.Invoke(-x));
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs
new file mode 100644
index 000000000000..6976a35b3d23
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs
@@ -0,0 +1,338 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise sine of the value in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Sin(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Sin<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, SinOperator<T>>(x, destination);
+
+        /// <summary>T.Sin(x)</summary>
+        internal readonly struct SinOperator<T> : IUnaryOperator<T, T>
+            where T : ITrigonometricFunctions<T>
+        {
+            // This code is based on `vrs4_sin` and `vrd2_sin` from amd/aocl-libm-ose
+            // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation notes from amd/aocl-libm-ose:
+            // -----------------------------------------------------------------
+            // Convert given x into the form
+            // |x| = N * pi + f where N is an integer and f lies in [-pi/2,pi/2]
+            // N is obtained by : N = round(x/pi)
+            // f is obtained by : f = abs(x)-N*pi
+            // sin(x) = sin(N * pi + f) = sin(N * pi)*cos(f) + cos(N*pi)*sin(f)
+            // sin(x) = sign(x)*sin(f)*(-1)**N
+            //
+            // The term sin(f) can be approximated by using a polynomial
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Sin(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+        }
+
+        /// <summary>float.Sin(x)</summary>
+        private readonly struct SinOperatorSingle : IUnaryOperator<float, float>
+        {
+            internal const uint MaxVectorizedValue = 0x49800000u;
+            internal const uint SignMask = 0x7FFFFFFFu;
+            private const float AlmHuge = 1.2582912e7f;
+            private const float Pi_Tail1 = 8.742278e-8f;
+            private const float Pi_Tail2 = 3.430249e-15f;
+            private const float C1 = -0.16666657f;
+            private const float C2 = 0.0083330255f;
+            private const float C3 = -1.980742e-4f;
+            private const float C4 = 2.6019031e-6f;
+
+            public static bool Vectorizable => true;
+
+            public static float Invoke(float x) => float.Sin(x);
+
+            public static Vector128<float> Invoke(Vector128<float> x)
+            {
+                Vector128<float> uxMasked = Vector128.Abs(x);
+                if (Vector128.GreaterThanAny(uxMasked.AsUInt32(), Vector128.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<SinOperatorSingle>(x);
+                }
+
+                Vector128<float> almHuge = Vector128.Create(AlmHuge);
+                Vector128<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector128.Create(1 / float.Pi), almHuge);
+                Vector128<uint> odd = dn.AsUInt32() << 31;
+                dn -= almHuge;
+
+                Vector128<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(-float.Pi), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(Pi_Tail2), f);
+
+                // POLY_EVAL_ODD_9
+                Vector128<float> f2 = f * f;
+                Vector128<float> f4 = f2 * f2;
+                Vector128<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1));
+                Vector128<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector128<float>.One);
+                Vector128<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C3), f2, Vector128.Create(C4) * f4);
+                Vector128<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector128<float> poly = f * a3;
+
+                return (poly.AsUInt32() ^ (x.AsUInt32() & Vector128.Create(~SignMask)) ^ odd).AsSingle();
+            }
+
+            public static Vector256<float> Invoke(Vector256<float> x)
+            {
+                Vector256<float> uxMasked = Vector256.Abs(x);
+                if (Vector256.GreaterThanAny(uxMasked.AsUInt32(), Vector256.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<SinOperatorSingle>(x);
+                }
+
+                Vector256<float> almHuge = Vector256.Create(AlmHuge);
+                Vector256<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector256.Create(1 / float.Pi), almHuge);
+                Vector256<uint> odd = dn.AsUInt32() << 31;
+                dn -= almHuge;
+
+                Vector256<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(-float.Pi), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(Pi_Tail2), f);
+
+                // POLY_EVAL_ODD_9
+                Vector256<float> f2 = f * f;
+                Vector256<float> f4 = f2 * f2;
+                Vector256<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1));
+                Vector256<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector256<float>.One);
+                Vector256<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C3), f2, Vector256.Create(C4) * f4);
+                Vector256<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector256<float> poly = f * a3;
+
+                return (poly.AsUInt32() ^ (x.AsUInt32() & Vector256.Create(~SignMask)) ^ odd).AsSingle();
+            }
+
+            public static Vector512<float> Invoke(Vector512<float> x)
+            {
+                Vector512<float> uxMasked = Vector512.Abs(x);
+                if (Vector512.GreaterThanAny(uxMasked.AsUInt32(), Vector512.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<SinOperatorSingle>(x);
+                }
+
+                Vector512<float> almHuge = Vector512.Create(AlmHuge);
+                Vector512<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector512.Create(1 / float.Pi), almHuge);
+                Vector512<uint> odd = dn.AsUInt32() << 31;
+                dn -= almHuge;
+
+                Vector512<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(-float.Pi), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(Pi_Tail2), f);
+
+                // POLY_EVAL_ODD_9
+                Vector512<float> f2 = f * f;
+                Vector512<float> f4 = f2 * f2;
+                Vector512<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
+                Vector512<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector512<float>.One);
+                Vector512<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C3), f2, Vector512.Create(C4) * f4);
+                Vector512<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector512<float> poly = f * a3;
+
+                return (poly.AsUInt32() ^ (x.AsUInt32() & Vector512.Create(~SignMask)) ^ odd).AsSingle();
+            }
+        }
+
+        /// <summary>double.Sin(x)</summary>
+        private readonly struct SinOperatorDouble : IUnaryOperator<double, double>
+        {
+            internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul;
+            internal const ulong MaxVectorizedValue = 0x4160000000000000ul;
+            private const double AlmHuge = 6.755399441055744e15;
+            private const double Pi_Tail1 = 1.224646799147353e-16;
+            private const double Pi_Tail2 = 2.165713347843828e-32;
+            private const double C0 = -0.16666666666666666;
+            private const double C2 = 0.008333333333333165;
+            private const double C4 = -1.984126984120184e-4;
+            private const double C6 = 2.7557319210152756e-6;
+            private const double C8 = -2.5052106798274583e-8;
+            private const double C10 = 1.605893649037159e-10;
+            private const double C12 = -7.642917806891047e-13;
+            private const double C14 = 2.7204790957888847e-15;
+
+            public static bool Vectorizable => true;
+
+            public static double Invoke(double x) => double.Sin(x);
+
+            public static Vector128<double> Invoke(Vector128<double> x)
+            {
+                Vector128<double> uxMasked = Vector128.Abs(x);
+                if (Vector128.GreaterThanAny(uxMasked.AsUInt64(), Vector128.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<SinOperatorDouble>(x);
+                }
+
+                // dn = |x| * (1 / π)
+                Vector128<double> almHuge = Vector128.Create(AlmHuge);
+                Vector128<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector128.Create(1 / double.Pi), almHuge);
+                Vector128<ulong> odd = dn.AsUInt64() << 63;
+                dn -= almHuge;
+
+                // f = |x| - (dn * π)
+                Vector128<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-double.Pi), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-Pi_Tail2), f);
+
+                // POLY_EVAL_ODD_17
+                Vector128<double> f2 = f * f;
+                Vector128<double> f4 = f2 * f2;
+                Vector128<double> f6 = f4 * f2;
+                Vector128<double> f10 = f6 * f4;
+                Vector128<double> f14 = f10 * f4;
+                Vector128<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C0));
+                Vector128<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C6), f2, Vector128.Create(C4));
+                Vector128<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C10), f2, Vector128.Create(C8));
+                Vector128<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C14), f2, Vector128.Create(C12));
+                Vector128<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
+                Vector128<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
+                Vector128<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
+
+                return (poly.AsUInt64() ^ (x.AsUInt64() & Vector128.Create(~SignMask)) ^ odd).AsDouble();
+            }
+
+            public static Vector256<double> Invoke(Vector256<double> x)
+            {
+                Vector256<double> uxMasked = Vector256.Abs(x);
+                if (Vector256.GreaterThanAny(uxMasked.AsUInt64(), Vector256.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<SinOperatorDouble>(x);
+                }
+
+                // dn = |x| * (1 / π)
+                Vector256<double> almHuge = Vector256.Create(AlmHuge);
+                Vector256<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector256.Create(1 / double.Pi), almHuge);
+                Vector256<ulong> odd = dn.AsUInt64() << 63;
+                dn -= almHuge;
+
+                // f = |x| - (dn * π)
+                Vector256<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-double.Pi), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-Pi_Tail2), f);
+
+                // POLY_EVAL_ODD_17
+                Vector256<double> f2 = f * f;
+                Vector256<double> f4 = f2 * f2;
+                Vector256<double> f6 = f4 * f2;
+                Vector256<double> f10 = f6 * f4;
+                Vector256<double> f14 = f10 * f4;
+                Vector256<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C0));
+                Vector256<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C6), f2, Vector256.Create(C4));
+                Vector256<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C10), f2, Vector256.Create(C8));
+                Vector256<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C14), f2, Vector256.Create(C12));
+                Vector256<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
+                Vector256<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
+                Vector256<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
+
+                return (poly.AsUInt64() ^ (x.AsUInt64() & Vector256.Create(~SignMask)) ^ odd).AsDouble();
+            }
+
+            public static Vector512<double> Invoke(Vector512<double> x)
+            {
+                Vector512<double> uxMasked = Vector512.Abs(x);
+                if (Vector512.GreaterThanAny(uxMasked.AsUInt64(), Vector512.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<SinOperatorDouble>(x);
+                }
+
+                // dn = |x| * (1 / π)
+                Vector512<double> almHuge = Vector512.Create(AlmHuge);
+                Vector512<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector512.Create(1 / double.Pi), almHuge);
+                Vector512<ulong> odd = dn.AsUInt64() << 63;
+                dn -= almHuge;
+
+                // f = |x| - (dn * π)
+                Vector512<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-double.Pi), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-Pi_Tail2), f);
+
+                // POLY_EVAL_ODD_17
+                Vector512<double> f2 = f * f;
+                Vector512<double> f4 = f2 * f2;
+                Vector512<double> f6 = f4 * f2;
+                Vector512<double> f10 = f6 * f4;
+                Vector512<double> f14 = f10 * f4;
+                Vector512<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C0));
+                Vector512<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C6), f2, Vector512.Create(C4));
+                Vector512<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C10), f2, Vector512.Create(C8));
+                Vector512<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C14), f2, Vector512.Create(C12));
+                Vector512<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
+                Vector512<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
+                Vector512<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
+
+                return (poly.AsUInt64() ^ (x.AsUInt64() & Vector512.Create(~SignMask)) ^ odd).AsDouble();
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCos.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCos.cs
new file mode 100644
index 000000000000..766269957a2e
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCos.cs
@@ -0,0 +1,40 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise sine and cosine of the value in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="sinDestination">The destination tensor for the element-wise sine result, represented as a span.</param>
+        /// <param name="cosDestination">The destination tensor for the element-wise cosine result, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="sinDestination"/> or <paramref name="cosDestination" /> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c>(<paramref name="sinDestination" />[i], <paramref name="cosDestination" />[i]) = <typeparamref name="T"/>.SinCos(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void SinCos<T>(ReadOnlySpan<T> x, Span<T> sinDestination, Span<T> cosDestination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan_TwoOutputs<T, SinCosOperator<T>>(x, sinDestination, cosDestination);
+
+        /// <summary>T.SinCos(x)</summary>
+        private readonly struct SinCosOperator<T> : IUnaryInputBinaryOutput<T> where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => false; // TODO: vectorize
+
+            public static (T, T) Invoke(T x) => T.SinCos(x);
+            public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs
new file mode 100644
index 000000000000..574db7667be0
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs
@@ -0,0 +1,40 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise sine and cosine of the value in the specified tensor that has been multiplied by Pi.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="sinPiDestination">The destination tensor for the element-wise sine result, represented as a span.</param>
+        /// <param name="cosPiDestination">The destination tensor for the element-wise cosine result, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="sinPiDestination"/> or <paramref name="cosPiDestination" /> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c>(<paramref name="sinPiDestination" />[i], <paramref name="cosPiDestination" />[i]) = <typeparamref name="T"/>.SinCos(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void SinCosPi<T>(ReadOnlySpan<T> x, Span<T> sinPiDestination, Span<T> cosPiDestination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan_TwoOutputs<T, SinCosPiOperator<T>>(x, sinPiDestination, cosPiDestination);
+
+        /// <summary>T.SinCosPi(x)</summary>
+        private readonly struct SinCosPiOperator<T> : IUnaryInputBinaryOutput<T> where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => false; // TODO: vectorize
+
+            public static (T, T) Invoke(T x) => T.SinCosPi(x);
+            public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs
new file mode 100644
index 000000000000..3ee43ecd58c0
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs
@@ -0,0 +1,107 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise sine of the value in the specified tensor that has been multiplied by Pi.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.SinPi(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void SinPi<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, SinPiOperator<T>>(x, destination);
+
+        /// <summary>T.SinPi(x)</summary>
+        private readonly struct SinPiOperator<T> : IUnaryOperator<T, T>
+            where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.SinPi(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                Vector128<T> xpi = x * Vector128.Create(T.Pi);
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(SinOperatorSingle.SignMask), Vector128.Create(SinOperatorSingle.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<SinPiOperator<float>>(x.AsSingle()).As<float, T>();
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector128.GreaterThanAny(xpi.AsUInt64() & Vector128.Create(SinOperatorDouble.SignMask), Vector128.Create(SinOperatorDouble.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
+                    }
+                }
+
+                return SinOperator<T>.Invoke(xpi);
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                Vector256<T> xpi = x * Vector256.Create(T.Pi);
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(SinOperatorSingle.SignMask), Vector256.Create(SinOperatorSingle.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<SinPiOperator<float>>(x.AsSingle()).As<float, T>();
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector256.GreaterThanAny(xpi.AsUInt64() & Vector256.Create(SinOperatorDouble.SignMask), Vector256.Create(SinOperatorDouble.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
+                    }
+                }
+
+                return SinOperator<T>.Invoke(xpi);
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                Vector512<T> xpi = x * Vector512.Create(T.Pi);
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(SinOperatorSingle.SignMask), Vector512.Create(SinOperatorSingle.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<SinPiOperator<float>>(x.AsSingle()).As<float, T>();
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector512.GreaterThanAny(xpi.AsUInt64() & Vector512.Create(SinOperatorDouble.SignMask), Vector512.Create(SinOperatorDouble.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
+                    }
+                }
+
+                return SinOperator<T>.Invoke(xpi);
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Single.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Single.netcore.cs
index 32723d2b3c3c..b1f6309e6dbc 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Single.netcore.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Single.netcore.cs
@@ -39,7 +39,7 @@
 
 namespace System.Numerics.Tensors
 {
-    public static unsafe partial class TensorPrimitives
+    public static partial class TensorPrimitives
     {
         private static void InvokeSpanIntoSpan<TSingleUnaryOperator>(
             ReadOnlySpan<float> x, Span<float> destination)
@@ -54,7 +54,7 @@ private static void InvokeSpanSpanIntoSpan<TSingleBinaryOperator>(
         private static void InvokeSpanScalarIntoSpan<TSingleBinaryOperator>(
             ReadOnlySpan<float> x, float y, Span<float> destination)
             where TSingleBinaryOperator : struct, IBinaryOperator<float> =>
-            InvokeSpanScalarIntoSpan<float, IdentityOperator<float>, TSingleBinaryOperator>(x, y, destination);
+            InvokeSpanScalarIntoSpan<float, IdentityOperator_Single, TSingleBinaryOperator>(x, y, destination);
 
         private static unsafe void InvokeSpanScalarIntoSpan<TSingleTransformOperator, TSingleBinaryOperator>(
             ReadOnlySpan<float> x, float y, Span<float> destination)
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sinh.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sinh.cs
new file mode 100644
index 000000000000..a154e3d9edce
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sinh.cs
@@ -0,0 +1,131 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise hyperbolic sine of each radian angle in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Sinh(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>, <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>, or <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
+        /// the corresponding destination location is set to that value.
+        /// </para>
+        /// <para>
+        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi / 180 to convert degrees to radians.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Sinh<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IHyperbolicFunctions<T> =>
+            InvokeSpanIntoSpan<T, SinhOperator<T>>(x, destination);
+
+        /// <summary>T.Sinh(x)</summary>
+        internal readonly struct SinhOperator<T> : IUnaryOperator<T, T>
+            where T : IHyperbolicFunctions<T>
+        {
+            // Same as cosh, but with `z -` rather than `z +`, and with the sign
+            // flipped on the result based on the sign of the input.
+
+            private const float Single_LOGV = 0.693161f;
+            private const float Single_HALFV = 1.0000138f;
+            private const float Single_INVV2 = 0.24999309f;
+
+            private const double Double_LOGV = 0.6931471805599453;
+            private const double Double_HALFV = 1.0;
+            private const double Double_INVV2 = 0.25;
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Sinh(x);
+
+            public static Vector128<T> Invoke(Vector128<T> t)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    Vector128<float> x = t.AsSingle();
+
+                    Vector128<float> y = Vector128.Abs(x);
+                    Vector128<float> z = ExpOperator<float>.Invoke(y - Vector128.Create((float)Single_LOGV));
+                    Vector128<float> result = Vector128.Create((float)Single_HALFV) * (z - (Vector128.Create((float)Single_INVV2) / z));
+                    Vector128<uint> sign = x.AsUInt32() & Vector128.Create(~(uint)int.MaxValue);
+                    return (sign ^ result.AsUInt32()).As<uint, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    Vector128<double> x = t.AsDouble();
+
+                    Vector128<double> y = Vector128.Abs(x);
+                    Vector128<double> z = ExpOperator<double>.Invoke(y - Vector128.Create(Double_LOGV));
+                    Vector128<double> result = Vector128.Create(Double_HALFV) * (z - (Vector128.Create(Double_INVV2) / z));
+                    Vector128<ulong> sign = x.AsUInt64() & Vector128.Create(~(ulong)long.MaxValue);
+                    return (sign ^ result.AsUInt64()).As<ulong, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> t)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    Vector256<float> x = t.AsSingle();
+
+                    Vector256<float> y = Vector256.Abs(x);
+                    Vector256<float> z = ExpOperator<float>.Invoke(y - Vector256.Create((float)Single_LOGV));
+                    Vector256<float> result = Vector256.Create((float)Single_HALFV) * (z - (Vector256.Create((float)Single_INVV2) / z));
+                    Vector256<uint> sign = x.AsUInt32() & Vector256.Create(~(uint)int.MaxValue);
+                    return (sign ^ result.AsUInt32()).As<uint, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    Vector256<double> x = t.AsDouble();
+
+                    Vector256<double> y = Vector256.Abs(x);
+                    Vector256<double> z = ExpOperator<double>.Invoke(y - Vector256.Create(Double_LOGV));
+                    Vector256<double> result = Vector256.Create(Double_HALFV) * (z - (Vector256.Create(Double_INVV2) / z));
+                    Vector256<ulong> sign = x.AsUInt64() & Vector256.Create(~(ulong)long.MaxValue);
+                    return (sign ^ result.AsUInt64()).As<ulong, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> t)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    Vector512<float> x = t.AsSingle();
+
+                    Vector512<float> y = Vector512.Abs(x);
+                    Vector512<float> z = ExpOperator<float>.Invoke(y - Vector512.Create((float)Single_LOGV));
+                    Vector512<float> result = Vector512.Create((float)Single_HALFV) * (z - (Vector512.Create((float)Single_INVV2) / z));
+                    Vector512<uint> sign = x.AsUInt32() & Vector512.Create(~(uint)int.MaxValue);
+                    return (sign ^ result.AsUInt32()).As<uint, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    Vector512<double> x = t.AsDouble();
+
+                    Vector512<double> y = Vector512.Abs(x);
+                    Vector512<double> z = ExpOperator<double>.Invoke(y - Vector512.Create(Double_LOGV));
+                    Vector512<double> result = Vector512.Create(Double_HALFV) * (z - (Vector512.Create(Double_INVV2) / z));
+                    Vector512<ulong> sign = x.AsUInt64() & Vector512.Create(~(ulong)long.MaxValue);
+                    return (sign ^ result.AsUInt64()).As<ulong, T>();
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SoftMax.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SoftMax.cs
new file mode 100644
index 000000000000..429f7baf59c4
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SoftMax.cs
@@ -0,0 +1,44 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the softmax function over the specified non-empty tensor of numbers.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x" /> must not be empty.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes a sum of <c><typeparamref name="T"/>.Exp(x[i])</c> for all elements in <paramref name="x"/>.
+        /// It then effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Exp(<paramref name="x" />[i]) / sum</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void SoftMax<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IExponentialFunctions<T>
+        {
+            if (x.IsEmpty)
+            {
+                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
+            }
+
+            if (x.Length > destination.Length)
+            {
+                ThrowHelper.ThrowArgument_DestinationTooShort();
+            }
+
+            ValidateInputOutputSpanNonOverlapping(x, destination);
+
+            T expSum = Aggregate<T, ExpOperator<T>, AddOperator<T>>(x);
+
+            InvokeSpanScalarIntoSpan<T, ExpOperator<T>, DivideOperator<T>>(x, expSum, destination);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sqrt.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sqrt.cs
new file mode 100644
index 000000000000..f5c9525e7e92
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sqrt.cs
@@ -0,0 +1,35 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise square root of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Sqrt(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void Sqrt<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IRootFunctions<T> =>
+            InvokeSpanIntoSpan<T, SqrtOperator<T>>(x, destination);
+
+        /// <summary>T.Sqrt(x)</summary>
+        private readonly struct SqrtOperator<T> : IUnaryOperator<T, T>
+            where T : IRootFunctions<T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x) => T.Sqrt(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => Vector128.Sqrt(x);
+            public static Vector256<T> Invoke(Vector256<T> x) => Vector256.Sqrt(x);
+            public static Vector512<T> Invoke(Vector512<T> x) => Vector512.Sqrt(x);
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Subtract.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Subtract.cs
new file mode 100644
index 000000000000..2130745cee7b
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Subtract.cs
@@ -0,0 +1,76 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise difference between numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] - <paramref name="y" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Subtract<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : ISubtractionOperators<T, T, T> =>
+            InvokeSpanSpanIntoSpan<T, SubtractOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise difference between numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] - <paramref name="y" /></c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Subtract<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : ISubtractionOperators<T, T, T> =>
+            InvokeSpanScalarIntoSpan<T, SubtractOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise difference between numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a scalar.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" /> - <paramref name="y" />[i]</c>.
+        /// </para>
+        /// <para>
+        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
+        /// </para>
+        /// </remarks>
+        public static void Subtract<T>(T x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : ISubtractionOperators<T, T, T> =>
+            InvokeScalarSpanIntoSpan<T, SubtractOperator<T>>(x, y, destination);
+
+        /// <summary>x - y</summary>
+        internal readonly struct SubtractOperator<T> : IBinaryOperator<T> where T : ISubtractionOperators<T, T, T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x, T y) => x - y;
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x - y;
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x - y;
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x - y;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sum.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sum.cs
new file mode 100644
index 000000000000..1abd51b2def8
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sum.cs
@@ -0,0 +1,81 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the sum of all elements in the specified tensor of numbers.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <returns>The result of adding all elements in <paramref name="x"/>, or zero if <paramref name="x"/> is empty.</returns>
+        /// <remarks>
+        /// <para>
+        /// If any of the values in the input is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result is also NaN.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T Sum<T>(ReadOnlySpan<T> x)
+            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T> =>
+            Aggregate<T, IdentityOperator<T>, AddOperator<T>>(x);
+
+        /// <summary>Computes the sum of the absolute values of every element in the specified tensor of numbers.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <returns>The result of adding the absolute value of every element in <paramref name="x"/>, or zero if <paramref name="x"/> is empty.</returns>
+        /// <exception cref="OverflowException"><typeparamref name="T"/> is a signed integer type and <paramref name="x"/> contained a value equal to <typeparamref name="T"/>'s minimum value.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes:
+        /// <c>
+        ///     Span&lt;T&gt; absoluteValues = ...;
+        ///     TensorPrimitives.Abs(x, absoluteValues);
+        ///     T result = TensorPrimitives.Sum(absoluteValues);
+        /// </c>
+        /// but without requiring intermediate storage for the absolute values. It corresponds to the <c>asum</c> method defined by <c>BLAS1</c>.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T SumOfMagnitudes<T>(ReadOnlySpan<T> x)
+            where T : INumberBase<T> =>
+            Aggregate<T, AbsoluteOperator<T>, AddOperator<T>>(x);
+
+        /// <summary>Computes the sum of the square of every element in the specified tensor of numbers.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <returns>The result of adding the square of every element in <paramref name="x"/>, or zero if <paramref name="x"/> is empty.</returns>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes:
+        /// <c>
+        ///     Span&lt;T&gt; squaredValues = ...;
+        ///     TensorPrimitives.Multiply(x, x, squaredValues);
+        ///     T result = TensorPrimitives.Sum(squaredValues);
+        /// </c>
+        /// but without requiring intermediate storage for the squared values.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static T SumOfSquares<T>(ReadOnlySpan<T> x)
+            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T>, IMultiplyOperators<T, T, T> =>
+            Aggregate<T, SquaredOperator<T>, AddOperator<T>>(x);
+
+        /// <summary>x * x</summary>
+        internal readonly struct SquaredOperator<T> : IUnaryOperator<T, T> where T : IMultiplyOperators<T, T, T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x) => x * x;
+            public static Vector128<T> Invoke(Vector128<T> x) => x * x;
+            public static Vector256<T> Invoke(Vector256<T> x) => x * x;
+            public static Vector512<T> Invoke(Vector512<T> x) => x * x;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs
deleted file mode 100644
index 0604768ba9fc..000000000000
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs
+++ /dev/null
@@ -1,3025 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-namespace System.Numerics.Tensors
-{
-    /// <summary>Performs primitive tensor operations over spans of memory.</summary>
-    public static partial class TensorPrimitives
-    {
-        /// <summary>Computes the element-wise absolute value of each number in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="OverflowException"><typeparamref name="T"/> is a signed integer type and <paramref name="x"/> contained a value equal to <typeparamref name="T"/>'s minimum value.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Abs(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// The absolute value of a <typeparamref name="T"/> is its numeric value without its sign. For example, the absolute value of both 1.2e-03 and -1.2e03 is 1.2e03.
-        /// </para>
-        /// <para>
-        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/> or <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
-        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is the original NaN value with the sign bit removed.
-        /// </para>
-        /// </remarks>
-        public static void Abs<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : INumberBase<T> =>
-            InvokeSpanIntoSpan<T, AbsoluteOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise angle in radians whose cosine is the specifed number.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Acos(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Acos<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, AcosOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise hyperbolic arc-cosine of the specifed number.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Acosh(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Acosh<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IHyperbolicFunctions<T> =>
-            InvokeSpanIntoSpan<T, AcoshOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise angle in radians whose cosine is the specifed number and divides the result by Pi.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.AcosPi(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void AcosPi<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, AcosPiOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise angle in radians whose sine is the specifed number.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Asin(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Asin<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, AsinOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise hyperbolic arc-sine of the specifed number.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Asinh(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Asinh<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IHyperbolicFunctions<T> =>
-            InvokeSpanIntoSpan<T, AsinhOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise angle in radians whose sine is the specifed number and divides the result by Pi.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.AsinPi(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void AsinPi<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, AsinPiOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise angle in radians whose tangent is the specifed number.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Atan<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, AtanOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise hyperbolic arc-tangent of the specifed number.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atanh(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Atanh<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IHyperbolicFunctions<T> =>
-            InvokeSpanIntoSpan<T, AtanhOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise angle in radians whose tangent is the specifed number and divides the result by Pi.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.AtanPi(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void AtanPi<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, AtanPiOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise arc-tangent for the quotient of two values in the specified tensors.</summary>
-        /// <param name="y">The first tensor, represented as a span.</param>
-        /// <param name="x">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="y" /> must be same as length of <paramref name="x" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan2(<paramref name="y" />[i], <paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Atan2<T>(ReadOnlySpan<T> y, ReadOnlySpan<T> x, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanSpanIntoSpan<T, Atan2Operator<T>>(y, x, destination);
-
-        /// <summary>Computes the element-wise arc-tangent for the quotient of two values in the specified tensors.</summary>
-        /// <param name="y">The first tensor, represented as a span.</param>
-        /// <param name="x">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan2(<paramref name="y" />[i], <paramref name="x" />)</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Atan2<T>(ReadOnlySpan<T> y, T x, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanScalarIntoSpan<T, Atan2Operator<T>>(y, x, destination);
-
-        /// <summary>Computes the element-wise arc-tangent for the quotient of two values in the specified tensors.</summary>
-        /// <param name="y">The first tensor, represented as a scalar.</param>
-        /// <param name="x">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan2(<paramref name="y" />, <paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Atan2<T>(T y, ReadOnlySpan<T> x, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeScalarSpanIntoSpan<T, Atan2Operator<T>>(y, x, destination);
-
-        /// <summary>Computes the element-wise arc-tangent for the quotient of two values in the specified tensors and divides the result by Pi.</summary>
-        /// <param name="y">The first tensor, represented as a span.</param>
-        /// <param name="x">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="y" /> must be same as length of <paramref name="x" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan2(<paramref name="y" />[i], <paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Atan2Pi<T>(ReadOnlySpan<T> y, ReadOnlySpan<T> x, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanSpanIntoSpan<T, Atan2PiOperator<T>>(y, x, destination);
-
-        /// <summary>Computes the element-wise arc-tangent for the quotient of two values in the specified tensors and divides the result by Pi.</summary>
-        /// <param name="y">The first tensor, represented as a span.</param>
-        /// <param name="x">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan2(<paramref name="y" />[i], <paramref name="x" />)</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Atan2Pi<T>(ReadOnlySpan<T> y, T x, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanScalarIntoSpan<T, Atan2PiOperator<T>>(y, x, destination);
-
-        /// <summary>Computes the element-wise arc-tangent for the quotient of two values in the specified tensors and divides the result by Pi.</summary>
-        /// <param name="y">The first tensor, represented as a scalar.</param>
-        /// <param name="x">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Atan2(<paramref name="y" />, <paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Atan2Pi<T>(T y, ReadOnlySpan<T> x, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeScalarSpanIntoSpan<T, Atan2PiOperator<T>>(y, x, destination);
-
-        /// <summary>Computes the element-wise addition of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] + <paramref name="y" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Add<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T> =>
-            InvokeSpanSpanIntoSpan<T, AddOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise addition of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] + <paramref name="y" /></c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Add<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T> =>
-            InvokeSpanScalarIntoSpan<T, AddOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> + <paramref name="y" />) * <paramref name="multiplier" /></c> for the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="multiplier">The third tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" /> and the length of <paramref name="multiplier" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="multiplier"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] + <paramref name="y" />[i]) * <paramref name="multiplier" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If any of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void AddMultiply<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, ReadOnlySpan<T> multiplier, Span<T> destination)
-            where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T> =>
-            InvokeSpanSpanSpanIntoSpan<T, AddMultiplyOperator<T>>(x, y, multiplier, destination);
-
-        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> + <paramref name="y" />) * <paramref name="multiplier" /></c> for the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="multiplier">The third tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] + <paramref name="y" />[i]) * <paramref name="multiplier" /></c>.
-        /// </para>
-        /// <para>
-        /// If any of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void AddMultiply<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, T multiplier, Span<T> destination)
-            where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T> =>
-            InvokeSpanSpanScalarIntoSpan<T, AddMultiplyOperator<T>>(x, y, multiplier, destination);
-
-        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> + <paramref name="y" />) * <paramref name="multiplier" /></c> for the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="multiplier">The third tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="multiplier" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="multiplier"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] + <paramref name="y" />) * <paramref name="multiplier" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If any of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void AddMultiply<T>(ReadOnlySpan<T> x, T y, ReadOnlySpan<T> multiplier, Span<T> destination)
-            where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T> =>
-            InvokeSpanScalarSpanIntoSpan<T, AddMultiplyOperator<T>>(x, y, multiplier, destination);
-
-        /// <summary>Computes the element-wise bitwise AND of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] &amp; <paramref name="y" />[i]</c>.
-        /// </para>
-        /// </remarks>
-        public static void BitwiseAnd<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : IBitwiseOperators<T, T, T> =>
-            InvokeSpanSpanIntoSpan<T, BitwiseAndOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise bitwise AND of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] &amp; <paramref name="y" /></c>.
-        /// </para>
-        /// </remarks>
-        public static void BitwiseAnd<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : IBitwiseOperators<T, T, T> =>
-            InvokeSpanScalarIntoSpan<T, BitwiseAndOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise bitwise OR of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] | <paramref name="y" />[i]</c>.
-        /// </para>
-        /// </remarks>
-        public static void BitwiseOr<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : IBitwiseOperators<T, T, T> =>
-            InvokeSpanSpanIntoSpan<T, BitwiseOrOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise bitwise OR of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] | <paramref name="y" /></c>.
-        /// </para>
-        /// </remarks>
-        public static void BitwiseOr<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : IBitwiseOperators<T, T, T> =>
-            InvokeSpanScalarIntoSpan<T, BitwiseOrOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise ceiling of numbers in the specified tensor.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Ceiling(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void Ceiling<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IFloatingPoint<T> =>
-            InvokeSpanIntoSpan<T, CeilingOperator<T>>(x, destination);
-
-        /// <summary>
-        /// Copies <paramref name="source"/> to <paramref name="destination"/>, converting each <typeparamref name="TFrom"/>
-        /// value to a <typeparamref name="TTo"/> value.
-        /// </summary>
-        /// <param name="source">The source span from which to copy values.</param>
-        /// <param name="destination">The destination span into which the converted values should be written.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = TTo.CreateChecked(<paramref name="source"/>[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void ConvertChecked<TFrom, TTo>(ReadOnlySpan<TFrom> source, Span<TTo> destination)
-            where TFrom : INumberBase<TFrom>
-            where TTo : INumberBase<TTo>
-        {
-            if (!TryConvertUniversal(source, destination))
-            {
-                InvokeSpanIntoSpan<TFrom, TTo, ConvertCheckedFallbackOperator<TFrom, TTo>>(source, destination);
-            }
-        }
-
-        /// <summary>
-        /// Copies <paramref name="source"/> to <paramref name="destination"/>, converting each <typeparamref name="TFrom"/>
-        /// value to a <typeparamref name="TTo"/> value.
-        /// </summary>
-        /// <param name="source">The source span from which to copy values.</param>
-        /// <param name="destination">The destination span into which the converted values should be written.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = TTo.CreateSaturating(<paramref name="source"/>[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void ConvertSaturating<TFrom, TTo>(ReadOnlySpan<TFrom> source, Span<TTo> destination)
-            where TFrom : INumberBase<TFrom>
-            where TTo : INumberBase<TTo>
-        {
-            if (!TryConvertUniversal(source, destination))
-            {
-                InvokeSpanIntoSpan<TFrom, TTo, ConvertSaturatingFallbackOperator<TFrom, TTo>>(source, destination);
-            }
-        }
-
-        /// <summary>
-        /// Copies <paramref name="source"/> to <paramref name="destination"/>, converting each <typeparamref name="TFrom"/>
-        /// value to a <typeparamref name="TTo"/> value.
-        /// </summary>
-        /// <param name="source">The source span from which to copy values.</param>
-        /// <param name="destination">The destination span into which the converted values should be written.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = TTo.CreateTruncating(<paramref name="source"/>[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void ConvertTruncating<TFrom, TTo>(ReadOnlySpan<TFrom> source, Span<TTo> destination)
-            where TFrom : INumberBase<TFrom>
-            where TTo : INumberBase<TTo>
-        {
-            if (TryConvertUniversal(source, destination))
-            {
-                return;
-            }
-
-            if (((typeof(TFrom) == typeof(byte) || typeof(TFrom) == typeof(sbyte)) && (typeof(TTo) == typeof(byte) || typeof(TTo) == typeof(sbyte))) ||
-                ((typeof(TFrom) == typeof(ushort) || typeof(TFrom) == typeof(short)) && (typeof(TTo) == typeof(ushort) || typeof(TTo) == typeof(short))) ||
-                ((IsUInt32Like<TFrom>() || IsInt32Like<TFrom>()) && (IsUInt32Like<TTo>() || IsInt32Like<TTo>())) ||
-                ((IsUInt64Like<TFrom>() || IsInt64Like<TFrom>()) && (IsUInt64Like<TTo>() || IsInt64Like<TTo>())))
-            {
-                source.CopyTo(Rename<TTo, TFrom>(destination));
-                return;
-            }
-
-            if (typeof(TFrom) == typeof(float) && IsUInt32Like<TTo>())
-            {
-                InvokeSpanIntoSpan<float, uint, ConvertSingleToUInt32>(Rename<TFrom, float>(source), Rename<TTo, uint>(destination));
-                return;
-            }
-
-            if (typeof(TFrom) == typeof(float) && IsInt32Like<TTo>())
-            {
-                InvokeSpanIntoSpan<float, int, ConvertSingleToInt32>(Rename<TFrom, float>(source), Rename<TTo, int>(destination));
-                return;
-            }
-
-            if (typeof(TFrom) == typeof(double) && IsUInt64Like<TTo>())
-            {
-                InvokeSpanIntoSpan<double, ulong, ConvertDoubleToUInt64>(Rename<TFrom, double>(source), Rename<TTo, ulong>(destination));
-                return;
-            }
-
-            if (typeof(TFrom) == typeof(double) && IsInt64Like<TTo>())
-            {
-                InvokeSpanIntoSpan<double, long, ConvertDoubleToInt64>(Rename<TFrom, double>(source), Rename<TTo, long>(destination));
-                return;
-            }
-
-            if (typeof(TFrom) == typeof(ushort) && typeof(TTo) == typeof(byte))
-            {
-                InvokeSpanIntoSpan_2to1<ushort, byte, NarrowUInt16ToByteOperator>(Rename<TFrom, ushort>(source), Rename<TTo, byte>(destination));
-                return;
-            }
-
-            if (typeof(TFrom) == typeof(short) && typeof(TTo) == typeof(sbyte))
-            {
-                InvokeSpanIntoSpan_2to1<short, sbyte, NarrowInt16ToSByteOperator>(Rename<TFrom, short>(source), Rename<TTo, sbyte>(destination));
-                return;
-            }
-
-            if (IsUInt32Like<TFrom>() && typeof(TTo) == typeof(ushort))
-            {
-                InvokeSpanIntoSpan_2to1<uint, ushort, NarrowUInt32ToUInt16Operator>(Rename<TFrom, uint>(source), Rename<TTo, ushort>(destination));
-                return;
-            }
-
-            if (IsInt32Like<TFrom>() && typeof(TTo) == typeof(short))
-            {
-                InvokeSpanIntoSpan_2to1<int, short, NarrowInt32ToInt16Operator>(Rename<TFrom, int>(source), Rename<TTo, short>(destination));
-                return;
-            }
-
-            if (IsUInt64Like<TFrom>() && IsUInt32Like<TTo>())
-            {
-                InvokeSpanIntoSpan_2to1<ulong, uint, NarrowUInt64ToUInt32Operator>(Rename<TFrom, ulong>(source), Rename<TTo, uint>(destination));
-                return;
-            }
-
-            if (IsInt64Like<TFrom>() && IsInt32Like<TTo>())
-            {
-                InvokeSpanIntoSpan_2to1<long, int, NarrowInt64ToInt32Operator>(Rename<TFrom, long>(source), Rename<TTo, int>(destination));
-                return;
-            }
-
-            InvokeSpanIntoSpan<TFrom, TTo, ConvertTruncatingFallbackOperator<TFrom, TTo>>(source, destination);
-        }
-
-        /// <summary>Performs conversions that are the same regardless of checked, truncating, or saturation.</summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)] // at most one of the branches will be kept
-        private static bool TryConvertUniversal<TFrom, TTo>(ReadOnlySpan<TFrom> source, Span<TTo> destination)
-            where TFrom : INumberBase<TFrom>
-            where TTo : INumberBase<TTo>
-        {
-            if (typeof(TFrom) == typeof(TTo))
-            {
-                if (source.Length > destination.Length)
-                {
-                    ThrowHelper.ThrowArgument_DestinationTooShort();
-                }
-
-                ValidateInputOutputSpanNonOverlapping(source, Rename<TTo, TFrom>(destination));
-
-                source.CopyTo(Rename<TTo, TFrom>(destination));
-                return true;
-            }
-
-            if (IsInt32Like<TFrom>() && typeof(TTo) == typeof(float))
-            {
-                InvokeSpanIntoSpan<int, float, ConvertInt32ToSingle>(Rename<TFrom, int>(source), Rename<TTo, float>(destination));
-                return true;
-            }
-
-            if (IsUInt32Like<TFrom>() && typeof(TTo) == typeof(float))
-            {
-                InvokeSpanIntoSpan<uint, float, ConvertUInt32ToSingle>(Rename<TFrom, uint>(source), Rename<TTo, float>(destination));
-                return true;
-            }
-
-            if (IsInt64Like<TFrom>() && typeof(TTo) == typeof(double))
-            {
-                InvokeSpanIntoSpan<long, double, ConvertInt64ToDouble>(Rename<TFrom, long>(source), Rename<TTo, double>(destination));
-                return true;
-            }
-
-            if (IsUInt64Like<TFrom>() && typeof(TTo) == typeof(double))
-            {
-                InvokeSpanIntoSpan<ulong, double, ConvertUInt64ToDouble>(Rename<TFrom, ulong>(source), Rename<TTo, double>(destination));
-                return true;
-            }
-
-            if (typeof(TFrom) == typeof(float) && typeof(TTo) == typeof(Half))
-            {
-                InvokeSpanIntoSpan_2to1<float, ushort, NarrowSingleToHalfAsUInt16Operator>(Rename<TFrom, float>(source), Rename<TTo, ushort>(destination));
-                return true;
-            }
-
-            if (typeof(TFrom) == typeof(Half) && typeof(TTo) == typeof(float))
-            {
-                InvokeSpanIntoSpan_1to2<short, float, WidenHalfAsInt16ToSingleOperator>(Rename<TFrom, short>(source), Rename<TTo, float>(destination));
-                return true;
-            }
-
-            if (typeof(TFrom) == typeof(float) && typeof(TTo) == typeof(double))
-            {
-                InvokeSpanIntoSpan_1to2<float, double, WidenSingleToDoubleOperator>(Rename<TFrom, float>(source), Rename<TTo, double>(destination));
-                return true;
-            }
-
-            if (typeof(TFrom) == typeof(double) && typeof(TTo) == typeof(float))
-            {
-                InvokeSpanIntoSpan_2to1<double, float, NarrowDoubleToSingleOperator>(Rename<TFrom, double>(source), Rename<TTo, float>(destination));
-                return true;
-            }
-
-            if (typeof(TFrom) == typeof(byte) && typeof(TTo) == typeof(ushort))
-            {
-                InvokeSpanIntoSpan_1to2<byte, ushort, WidenByteToUInt16Operator>(Rename<TFrom, byte>(source), Rename<TTo, ushort>(destination));
-                return true;
-            }
-
-            if (typeof(TFrom) == typeof(sbyte) && typeof(TTo) == typeof(short))
-            {
-                InvokeSpanIntoSpan_1to2<sbyte, short, WidenSByteToInt16Operator>(Rename<TFrom, sbyte>(source), Rename<TTo, short>(destination));
-                return true;
-            }
-
-            if (typeof(TFrom) == typeof(ushort) && IsUInt32Like<TTo>())
-            {
-                InvokeSpanIntoSpan_1to2<ushort, uint, WidenUInt16ToUInt32Operator>(Rename<TFrom, ushort>(source), Rename<TTo, uint>(destination));
-                return true;
-            }
-
-            if (typeof(TFrom) == typeof(short) && IsInt32Like<TTo>())
-            {
-                InvokeSpanIntoSpan_1to2<short, int, WidenInt16ToInt32Operator>(Rename<TFrom, short>(source), Rename<TTo, int>(destination));
-                return true;
-            }
-
-            if (IsUInt32Like<TTo>() && IsUInt64Like<TTo>())
-            {
-                InvokeSpanIntoSpan_1to2<uint, ulong, WidenUInt32ToUInt64Operator>(Rename<TFrom, uint>(source), Rename<TTo, ulong>(destination));
-                return true;
-            }
-
-            if (IsInt32Like<TFrom>() && IsInt64Like<TTo>())
-            {
-                InvokeSpanIntoSpan_1to2<int, long, WidenInt32ToInt64Operator>(Rename<TFrom, int>(source), Rename<TTo, long>(destination));
-                return true;
-            }
-
-            return false;
-        }
-
-        /// <summary>Computes the element-wise result of copying the sign from one number to another number in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="sign">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="sign" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="sign"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.CopySign(<paramref name="x" />[i], <paramref name="sign" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void CopySign<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> sign, Span<T> destination)
-            where T : INumber<T> =>
-            InvokeSpanSpanIntoSpan<T, CopySignOperator<T>>(x, sign, destination);
-
-        /// <summary>Computes the element-wise result of copying the sign from one number to another number in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="sign">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.CopySign(<paramref name="x" />[i], <paramref name="sign" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void CopySign<T>(ReadOnlySpan<T> x, T sign, Span<T> destination)
-            where T : INumber<T> =>
-            InvokeSpanScalarIntoSpan<T, CopySignOperator<T>>(x, sign, destination);
-
-        /// <summary>Computes the element-wise cosine of the value in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Cos(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Cos<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, CosOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise cosine of the value in the specified tensor that has been multiplied by Pi.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.CosPi(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void CosPi<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, CosPiOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise hyperbolic cosine of each radian angle in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Cosh(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/> or <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
-        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is also NaN.
-        /// </para>
-        /// <para>
-        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Cosh<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IHyperbolicFunctions<T> =>
-            InvokeSpanIntoSpan<T, CoshOperator<T>>(x, destination);
-
-        /// <summary>Computes the cosine similarity between the two specified non-empty, equal-length tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <returns>The cosine similarity of the two tensors.</returns>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x" /> and <paramref name="y" /> must not be empty.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c>TensorPrimitives.Dot(x, y) / (<typeparamref name="T"/>.Sqrt(TensorPrimitives.SumOfSquares(x)) * <typeparamref name="T"/>.Sqrt(TensorPrimitives.SumOfSquares(y)).</c>
-        /// </para>
-        /// <para>
-        /// If any element in either input tensor is equal to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>, <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>, or <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
-        /// NaN is returned.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T CosineSimilarity<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y)
-            where T : IRootFunctions<T> =>
-            CosineSimilarityCore(x, y);
-
-        /// <summary>Computes the element-wise cube root of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Cbrt(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void Cbrt<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IRootFunctions<T> =>
-            InvokeSpanIntoSpan<T, CbrtOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise conversion of each number of degrees in the specified tensor to radiansx.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.DegreesToRadians(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void DegreesToRadians<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, DegreesToRadiansOperator<T>>(x, destination);
-
-        /// <summary>Computes the distance between two points, specified as non-empty, equal-length tensors of numbers, in Euclidean space.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <returns>The Euclidean distance.</returns>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x" /> and <paramref name="y" /> must not be empty.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes the equivalent of:
-        /// <c>
-        ///     Span&lt;T&gt; difference = ...;
-        ///     TensorPrimitives.Subtract(x, y, difference);
-        ///     T result = <typeparamref name="T"/>.Sqrt(TensorPrimitives.SumOfSquares(difference));
-        /// </c>
-        /// but without requiring additional temporary storage for the intermediate differences.
-        /// </para>
-        /// <para>
-        /// If any element in either input tensor is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, NaN is returned.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T Distance<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y)
-            where T : IRootFunctions<T>
-        {
-            if (x.IsEmpty)
-            {
-                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
-            }
-
-            return T.Sqrt(Aggregate<T, SubtractSquaredOperator<T>, AddOperator<T>>(x, y));
-        }
-
-        /// <summary>Computes the element-wise division of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and an element in <paramref name="y"/> is equal to zero.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] / <paramref name="y" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Divide<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : IDivisionOperators<T, T, T> =>
-            InvokeSpanSpanIntoSpan<T, DivideOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise division of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and <paramref name="y"/> is equal to zero.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] / <paramref name="y" /></c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Divide<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : IDivisionOperators<T, T, T> =>
-            InvokeSpanScalarIntoSpan<T, DivideOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise division of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a scalar.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and an element in <paramref name="y"/> is equal to zero.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" /> / <paramref name="y" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Divide<T>(T x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : IDivisionOperators<T, T, T> =>
-            InvokeScalarSpanIntoSpan<T, DivideOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the dot product of two tensors containing numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <returns>The dot product.</returns>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes the equivalent of:
-        /// <c>
-        ///     Span&lt;T&gt; products = ...;
-        ///     TensorPrimitives.Multiply(x, y, products);
-        ///     T result = TensorPrimitives.Sum(products);
-        /// </c>
-        /// but without requiring additional temporary storage for the intermediate products. It corresponds to the <c>dot</c> method defined by <c>BLAS1</c>.
-        /// </para>
-        /// <para>
-        /// If any of the input elements is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting value is also NaN.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T Dot<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y)
-            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T>, IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T> =>
-            Aggregate<T, MultiplyOperator<T>, AddOperator<T>>(x, y);
-
-        /// <summary>Computes the element-wise result of raising <c>e</c> to the number powers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Exp(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// If a value equals <see cref="IFloatingPointIeee754{TSelf}.NaN"/> or <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>, the result stored into the corresponding destination location is set to NaN.
-        /// If a value equals <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>, the result stored into the corresponding destination location is set to 0.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Exp<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IExponentialFunctions<T> =>
-            InvokeSpanIntoSpan<T, ExpOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise result of raising <c>e</c> to the number powers in the specified tensor, minus 1.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.ExpM1(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void ExpM1<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IExponentialFunctions<T> =>
-            InvokeSpanIntoSpan<T, ExpM1Operator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise result of raising 2 to the number powers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Exp2(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Exp2<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IExponentialFunctions<T> =>
-            InvokeSpanIntoSpan<T, Exp2Operator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise result of raising 2 to the number powers in the specified tensor, minus one.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Exp2M1(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Exp2M1<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IExponentialFunctions<T> =>
-            InvokeSpanIntoSpan<T, Exp2M1Operator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise result of raising 10 to the number powers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Exp10(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Exp10<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IExponentialFunctions<T> =>
-            InvokeSpanIntoSpan<T, Exp10Operator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise result of raising 10 to the number powers in the specified tensor, minus one.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Exp10M1(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Exp10M1<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IExponentialFunctions<T> =>
-            InvokeSpanIntoSpan<T, Exp10M1Operator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise floor of numbers in the specified tensor.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Floor(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void Floor<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IFloatingPoint<T> =>
-            InvokeSpanIntoSpan<T, FloorOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise hypotensue given values from two tensors representing the lengths of the shorter sides in a right-angled triangle.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Hypot(<paramref name="x" />[i], <paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void Hypot<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : IRootFunctions<T> =>
-            InvokeSpanSpanIntoSpan<T, HypotOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise remainder of the numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Ieee754Remainder(<paramref name="x" />[i], <paramref name="y" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void Ieee754Remainder<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanSpanIntoSpan<T, Ieee754RemainderOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise remainder of the numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Ieee754Remainder(<paramref name="x" />[i], <paramref name="y" />)</c>.
-        /// </para>
-        /// </remarks>
-        public static void Ieee754Remainder<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanScalarIntoSpan<T, Ieee754RemainderOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise remainder of the numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a scalar.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Ieee754Remainder(<paramref name="x" />, <paramref name="y" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void Ieee754Remainder<T>(T x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeScalarSpanIntoSpan<T, Ieee754RemainderOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise integer logarithm of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.ILogB(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void ILogB<T>(ReadOnlySpan<T> x, Span<int> destination)
-            where T : IFloatingPointIeee754<T>
-        {
-            if (typeof(T) == typeof(double))
-            {
-                // Special-case double as the only vectorizable floating-point type whose size != sizeof(int).
-                InvokeSpanIntoSpan_2to1<double, int, ILogBDoubleOperator>(Rename<T, double>(x), destination);
-            }
-            else
-            {
-                InvokeSpanIntoSpan<T, int, ILogBOperator<T>>(x, destination);
-            }
-        }
-
-        /// <summary>Searches for the index of the largest number in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <returns>The index of the maximum element in <paramref name="x"/>, or -1 if <paramref name="x"/> is empty.</returns>
-        /// <remarks>
-        /// <para>
-        /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If any value equal to NaN
-        /// is present, the index of the first is returned. Positive 0 is considered greater than negative 0.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static int IndexOfMax<T>(ReadOnlySpan<T> x)
-            where T : INumber<T> =>
-            IndexOfMinMaxCore<T, IndexOfMaxOperator<T>>(x);
-
-        /// <summary>Searches for the index of the number with the largest magnitude in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <returns>The index of the element in <paramref name="x"/> with the largest magnitude (absolute value), or -1 if <paramref name="x"/> is empty.</returns>
-        /// <remarks>
-        /// <para>
-        /// The determination of the maximum magnitude matches the IEEE 754:2019 `maximumMagnitude` function. If any value equal to NaN
-        /// is present, the index of the first is returned. If two values have the same magnitude and one is positive and the other is negative,
-        /// the positive value is considered to have the larger magnitude.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static int IndexOfMaxMagnitude<T>(ReadOnlySpan<T> x)
-            where T : INumber<T> =>
-            IndexOfMinMaxCore<T, IndexOfMaxMagnitudeOperator<T>>(x);
-
-        /// <summary>Searches for the index of the smallest number in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <returns>The index of the minimum element in <paramref name="x"/>, or -1 if <paramref name="x"/> is empty.</returns>
-        /// <remarks>
-        /// <para>
-        /// The determination of the minimum element matches the IEEE 754:2019 `minimum` function. If any value equal to NaN
-        /// is present, the index of the first is returned. Negative 0 is considered smaller than positive 0.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static int IndexOfMin<T>(ReadOnlySpan<T> x)
-            where T : INumber<T> =>
-            IndexOfMinMaxCore<T, IndexOfMinOperator<T>>(x);
-
-        /// <summary>Searches for the index of the number with the smallest magnitude in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <returns>The index of the element in <paramref name="x"/> with the smallest magnitude (absolute value), or -1 if <paramref name="x"/> is empty.</returns>
-        /// <remarks>
-        /// <para>
-        /// The determination of the minimum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If any value equal to NaN
-        /// is present, the index of the first is returned. If two values have the same magnitude and one is positive and the other is negative,
-        /// the negative value is considered to have the smaller magnitude.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static int IndexOfMinMagnitude<T>(ReadOnlySpan<T> x)
-            where T : INumber<T> =>
-            IndexOfMinMaxCore<T, IndexOfMinMagnitudeOperator<T>>(x);
-
-        /// <summary>Computes the element-wise leading zero count of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.LeadingZeroCount(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void LeadingZeroCount<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IBinaryInteger<T> =>
-            InvokeSpanIntoSpan<T, LeadingZeroCountOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise linear interpolation between two values based on the given weight in the specified tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="amount">The third tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" /> and length of <paramref name="amount" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="amount"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Lerp(<paramref name="x" />[i], <paramref name="y" />[i], <paramref name="amount" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Lerp<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, ReadOnlySpan<T> amount, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanSpanSpanIntoSpan<T, LerpOperator<T>>(x, y, amount, destination);
-
-        /// <summary>Computes the element-wise linear interpolation between two values based on the given weight in the specified tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="amount">The third tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Lerp(<paramref name="x" />[i], <paramref name="y" />[i], <paramref name="amount" />)</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Lerp<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, T amount, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanSpanScalarIntoSpan<T, LerpOperator<T>>(x, y, amount, destination);
-
-        /// <summary>Computes the element-wise linear interpolation between two values based on the given weight in the specified tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="amount">The third tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="amount" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="amount"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Lerp(<paramref name="x" />[i], <paramref name="y" />, <paramref name="amount" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Lerp<T>(ReadOnlySpan<T> x, T y, ReadOnlySpan<T> amount, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanScalarSpanIntoSpan<T, LerpOperator<T>>(x, y, amount, destination);
-
-        /// <summary>Computes the element-wise natural (base <c>e</c>) logarithm of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>.
-        /// If a value is negative or equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is set to NaN.
-        /// If a value is positive infinity, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
-        /// Otherwise, if a value is positive, its natural logarithm is stored into the corresponding destination location.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Log<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ILogarithmicFunctions<T> =>
-            InvokeSpanIntoSpan<T, LogOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise base 2 logarithm of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log2(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>.
-        /// If a value is negative or equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is set to NaN.
-        /// If a value is positive infinity, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
-        /// Otherwise, if a value is positive, its base 2 logarithm is stored into the corresponding destination location.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Log2<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ILogarithmicFunctions<T> =>
-            InvokeSpanIntoSpan<T, Log2Operator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise base 10 logarithm of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log10(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>.
-        /// If a value is negative or equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is set to NaN.
-        /// If a value is positive infinity, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
-        /// Otherwise, if a value is positive, its base 10 logarithm is stored into the corresponding destination location.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Log10<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ILogarithmicFunctions<T> =>
-            InvokeSpanIntoSpan<T, Log10Operator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise natural (base <c>e</c>) logarithm of numbers in the specified tensor plus 1.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.LogP1(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>.
-        /// If a value is negative or equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is set to NaN.
-        /// If a value is positive infinity, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
-        /// Otherwise, if a value is positive, its natural logarithm plus 1 is stored into the corresponding destination location.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void LogP1<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ILogarithmicFunctions<T> =>
-            InvokeSpanIntoSpan<T, LogP1Operator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise base 2 logarithm of numbers in the specified tensor plus 1.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log2P1(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>.
-        /// If a value is negative or equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is set to NaN.
-        /// If a value is positive infinity, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
-        /// Otherwise, if a value is positive, its base 2 logarithm plus 1 is stored into the corresponding destination location.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Log2P1<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ILogarithmicFunctions<T> =>
-            InvokeSpanIntoSpan<T, Log2P1Operator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise base 10 logarithm of numbers in the specified tensor plus 1.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log10P1(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>.
-        /// If a value is negative or equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result stored into the corresponding destination location is set to NaN.
-        /// If a value is positive infinity, the result stored into the corresponding destination location is set to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>.
-        /// Otherwise, if a value is positive, its base 10 logarithm plus 1 is stored into the corresponding destination location.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Log10P1<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ILogarithmicFunctions<T> =>
-            InvokeSpanIntoSpan<T, Log10P1Operator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise logarithm of the numbers in a specified tensor to the specified base in another specified tensor.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log(<paramref name="x" />[i], <paramref name="y" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Log<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : ILogarithmicFunctions<T> =>
-            InvokeSpanSpanIntoSpan<T, LogBaseOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise logarithm of the numbers in a specified tensor to the specified base in another specified tensor.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Log(<paramref name="x" />[i], <paramref name="y" />)</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Log<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : ILogarithmicFunctions<T> =>
-            InvokeSpanScalarIntoSpan<T, LogBaseOperator<T>>(x, y, destination);
-
-        /// <summary>Searches for the largest number in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <returns>The maximum element in <paramref name="x"/>.</returns>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be greater than zero.</exception>
-        /// <remarks>
-        /// <para>
-        /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If any value equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>
-        /// is present, the first is returned. Positive 0 is considered greater than negative 0.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T Max<T>(ReadOnlySpan<T> x)
-            where T : INumber<T> =>
-            MinMaxCore<T, MaxOperator<T>>(x);
-
-        /// <summary>Computes the element-wise maximum of the numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Max(<paramref name="x" />[i], <paramref name="y" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
-        /// that value is stored as the result. Positive 0 is considered greater than negative 0.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Max<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : INumber<T> =>
-            InvokeSpanSpanIntoSpan<T, MaxPropagateNaNOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise maximum of the numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Max(<paramref name="x" />[i], <paramref name="y" />)</c>.
-        /// </para>
-        /// <para>
-        /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
-        /// that value is stored as the result. Positive 0 is considered greater than negative 0.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Max<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : INumber<T> =>
-            InvokeSpanScalarIntoSpan<T, MaxPropagateNaNOperator<T>>(x, y, destination);
-
-        /// <summary>Searches for the number with the largest magnitude in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <returns>The element in <paramref name="x"/> with the largest magnitude (absolute value).</returns>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be greater than zero.</exception>
-        /// <remarks>
-        /// <para>
-        /// The determination of the maximum magnitude matches the IEEE 754:2019 `maximumMagnitude` function. If any value equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>
-        /// is present, the first is returned. If two values have the same magnitude and one is positive and the other is negative,
-        /// the positive value is considered to have the larger magnitude.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T MaxMagnitude<T>(ReadOnlySpan<T> x)
-            where T : INumberBase<T> =>
-            MinMaxCore<T, MaxMagnitudeOperator<T>>(x);
-
-        /// <summary>Computes the element-wise number with the largest magnitude in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.MaxMagnitude(<paramref name="x" />[i], <paramref name="y" />[i])</c>.</remarks>
-        /// <remarks>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void MaxMagnitude<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : INumberBase<T> =>
-            InvokeSpanSpanIntoSpan<T, MaxMagnitudePropagateNaNOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise number with the largest magnitude in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.MaxMagnitude(<paramref name="x" />[i], <paramref name="y" />)</c>.</remarks>
-        /// <remarks>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void MaxMagnitude<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : INumberBase<T> =>
-            InvokeSpanScalarIntoSpan<T, MaxMagnitudePropagateNaNOperator<T>>(x, y, destination);
-
-        /// <summary>Searches for the smallest number in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <returns>The minimum element in <paramref name="x"/>.</returns>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be greater than zero.</exception>
-        /// <remarks>
-        /// <para>
-        /// The determination of the minimum element matches the IEEE 754:2019 `minimum` function. If any value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>
-        /// is present, the first is returned. Negative 0 is considered smaller than positive 0.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T Min<T>(ReadOnlySpan<T> x)
-            where T : INumber<T> =>
-            MinMaxCore<T, MinOperator<T>>(x);
-
-        /// <summary>Computes the element-wise minimum of the numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Max(<paramref name="x" />[i], <paramref name="y" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
-        /// that value is stored as the result. Positive 0 is considered greater than negative 0.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Min<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : INumber<T> =>
-            InvokeSpanSpanIntoSpan<T, MinPropagateNaNOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise minimum of the numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Max(<paramref name="x" />[i], <paramref name="y" />)</c>.
-        /// </para>
-        /// <para>
-        /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
-        /// that value is stored as the result. Positive 0 is considered greater than negative 0.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Min<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : INumber<T> =>
-            InvokeSpanScalarIntoSpan<T, MinPropagateNaNOperator<T>>(x, y, destination);
-
-        /// <summary>Searches for the number with the smallest magnitude in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <returns>The element in <paramref name="x"/> with the smallest magnitude (absolute value).</returns>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be greater than zero.</exception>
-        /// <remarks>
-        /// <para>
-        /// The determination of the minimum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If any value equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>
-        /// is present, the first is returned. If two values have the same magnitude and one is positive and the other is negative,
-        /// the negative value is considered to have the smaller magnitude.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T MinMagnitude<T>(ReadOnlySpan<T> x)
-            where T : INumberBase<T> =>
-            MinMaxCore<T, MinMagnitudeOperator<T>>(x);
-
-        /// <summary>Computes the element-wise number with the smallest magnitude in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.MinMagnitude(<paramref name="x" />[i], <paramref name="y" />[i])</c>.</remarks>
-        /// <remarks>
-        /// <para>
-        /// The determination of the maximum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If either value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
-        /// that value is stored as the result. If the two values have the same magnitude and one is positive and the other is negative,
-        /// the negative value is considered to have the smaller magnitude.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void MinMagnitude<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : INumberBase<T> =>
-            InvokeSpanSpanIntoSpan<T, MinMagnitudePropagateNaNOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise number with the smallest magnitude in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.MinMagnitude(<paramref name="x" />[i], <paramref name="y" />)</c>.</remarks>
-        /// <remarks>
-        /// <para>
-        /// The determination of the maximum magnitude matches the IEEE 754:2019 `minimumMagnitude` function. If either value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
-        /// that value is stored as the result. If the two values have the same magnitude and one is positive and the other is negative,
-        /// the negative value is considered to have the smaller magnitude.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void MinMagnitude<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : INumberBase<T> =>
-            InvokeSpanScalarIntoSpan<T, MinMagnitudePropagateNaNOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise product of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] * <paramref name="y" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Multiply<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T> =>
-            InvokeSpanSpanIntoSpan<T, MultiplyOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise product of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] * <paramref name="y" /></c>.
-        /// It corresponds to the <c>scal</c> method defined by <c>BLAS1</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Multiply<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T> =>
-            InvokeSpanScalarIntoSpan<T, MultiplyOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="addend">The third tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" /> and length of <paramref name="addend" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="addend"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void MultiplyAdd<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, ReadOnlySpan<T> addend, Span<T> destination)
-            where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T> =>
-            InvokeSpanSpanSpanIntoSpan<T, MultiplyAddOperator<T>>(x, y, addend, destination);
-
-        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="addend">The third tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" /></c>.
-        /// It corresponds to the <c>axpy</c> method defined by <c>BLAS1</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void MultiplyAdd<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, T addend, Span<T> destination)
-            where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T> =>
-            InvokeSpanSpanScalarIntoSpan<T, MultiplyAddOperator<T>>(x, y, addend, destination);
-
-        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="addend">The third tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="addend" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="addend"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />) + <paramref name="addend" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void MultiplyAdd<T>(ReadOnlySpan<T> x, T y, ReadOnlySpan<T> addend, Span<T> destination)
-            where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T> =>
-            InvokeSpanScalarSpanIntoSpan<T, MultiplyAddOperator<T>>(x, y, addend, destination);
-
-        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="addend">The third tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" /> and length of <paramref name="addend" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="addend"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// <para>
-        /// Behaves the same as either <see cref="MultiplyAdd{T}(ReadOnlySpan{T}, ReadOnlySpan{T}, ReadOnlySpan{T}, Span{T})"/> or
-        /// <see cref="FusedMultiplyAdd{T}(ReadOnlySpan{T}, ReadOnlySpan{T}, ReadOnlySpan{T}, Span{T})"/> depending on the current machine's capabilities.
-        /// </para>
-        /// </remarks>
-        public static void MultiplyAddEstimate<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, ReadOnlySpan<T> addend, Span<T> destination)
-            where T : INumberBase<T> =>
-            InvokeSpanSpanSpanIntoSpan<T, MultiplyAddEstimateOperator<T>>(x, y, addend, destination);
-
-        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="addend">The third tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" /></c>.
-        /// It corresponds to the <c>axpy</c> method defined by <c>BLAS1</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// <para>
-        /// Behaves the same as either <see cref="MultiplyAdd{T}(ReadOnlySpan{T}, ReadOnlySpan{T}, T, Span{T})"/> or
-        /// <see cref="FusedMultiplyAdd{T}(ReadOnlySpan{T}, ReadOnlySpan{T}, T, Span{T})"/> depending on the current machine's capabilities.
-        /// </para>
-        /// </remarks>
-        public static void MultiplyAddEstimate<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, T addend, Span<T> destination)
-            where T : INumberBase<T> =>
-            InvokeSpanSpanScalarIntoSpan<T, MultiplyAddEstimateOperator<T>>(x, y, addend, destination);
-
-        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="addend">The third tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="addend" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="addend"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />) + <paramref name="addend" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// <para>
-        /// Behaves the same as either <see cref="MultiplyAdd{T}(ReadOnlySpan{T}, T, ReadOnlySpan{T}, Span{T})"/> or
-        /// <see cref="FusedMultiplyAdd{T}(ReadOnlySpan{T}, T, ReadOnlySpan{T}, Span{T})"/> depending on the current machine's capabilities.
-        /// </para>
-        /// </remarks>
-        public static void MultiplyAddEstimate<T>(ReadOnlySpan<T> x, T y, ReadOnlySpan<T> addend, Span<T> destination)
-            where T : INumberBase<T> =>
-            InvokeSpanScalarSpanIntoSpan<T, MultiplyAddEstimateOperator<T>>(x, y, addend, destination);
-
-        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="addend">The third tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" /> and length of <paramref name="addend" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="addend"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// <para>
-        /// This computes (<paramref name="x"/> * <paramref name="y"/>) as if to infinite precision, adds <paramref name="addend"/> to that result as if to
-        /// infinite precision, and finally rounds to the nearest representable value. This differs from the non-fused sequence which would compute
-        /// (<paramref name="x"/> * <paramref name="y"/>) as if to infinite precision, round the result to the nearest representable value, add <paramref name="addend"/> to the
-        /// rounded result as if to infinite precision, and finally round to the nearest representable value.
-        /// </para>
-        /// </remarks>
-        public static void FusedMultiplyAdd<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, ReadOnlySpan<T> addend, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanSpanSpanIntoSpan<T, FusedMultiplyAddOperator<T>>(x, y, addend, destination);
-
-        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="addend">The third tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" /></c>.
-        /// It corresponds to the <c>axpy</c> method defined by <c>BLAS1</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// <para>
-        /// This computes (<paramref name="x"/> * <paramref name="y"/>) as if to infinite precision, adds <paramref name="addend"/> to that result as if to
-        /// infinite precision, and finally rounds to the nearest representable value. This differs from the non-fused sequence which would compute
-        /// (<paramref name="x"/> * <paramref name="y"/>) as if to infinite precision, round the result to the nearest representable value, add <paramref name="addend"/> to the
-        /// rounded result as if to infinite precision, and finally round to the nearest representable value.
-        /// </para>
-        /// </remarks>
-        public static void FusedMultiplyAdd<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, T addend, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanSpanScalarIntoSpan<T, FusedMultiplyAddOperator<T>>(x, y, addend, destination);
-
-        /// <summary>Computes the element-wise result of <c>(<paramref name="x" /> * <paramref name="y" />) * <paramref name="addend" /></c> for the specified tensors of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="addend">The third tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="addend" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="addend"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />) + <paramref name="addend" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// <para>
-        /// This computes (<paramref name="x"/> * <paramref name="y"/>) as if to infinite precision, adds <paramref name="addend"/> to that result as if to
-        /// infinite precision, and finally rounds to the nearest representable value. This differs from the non-fused sequence which would compute
-        /// (<paramref name="x"/> * <paramref name="y"/>) as if to infinite precision, round the result to the nearest representable value, add <paramref name="addend"/> to the
-        /// rounded result as if to infinite precision, and finally round to the nearest representable value.
-        /// </para>
-        /// </remarks>
-        public static void FusedMultiplyAdd<T>(ReadOnlySpan<T> x, T y, ReadOnlySpan<T> addend, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanScalarSpanIntoSpan<T, FusedMultiplyAddOperator<T>>(x, y, addend, destination);
-
-        /// <summary>Computes the element-wise negation of each number in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = -<paramref name="x" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If any of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Negate<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IUnaryNegationOperators<T, T> =>
-            InvokeSpanIntoSpan<T, NegateOperator<T>>(x, destination);
-
-        /// <summary>Computes the Euclidean norm of the specified tensor of numbers.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <returns>The norm.</returns>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><typeparamref name="T"/>.Sqrt(TensorPrimitives.SumOfSquares(x))</c>.
-        /// This is often referred to as the Euclidean norm or L2 norm.
-        /// It corresponds to the <c>nrm2</c> method defined by <c>BLAS1</c>.
-        /// </para>
-        /// <para>
-        /// If any of the input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result value is also NaN.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T Norm<T>(ReadOnlySpan<T> x)
-            where T : IRootFunctions<T> =>
-            T.Sqrt(SumOfSquares(x));
-
-        /// <summary>Computes the element-wise one's complement of numbers in the specified tensor.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = ~<paramref name="x" />[i]</c>.
-        /// </para>
-        /// </remarks>
-        public static void OnesComplement<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IBitwiseOperators<T, T, T> =>
-            InvokeSpanIntoSpan<T, OnesComplementOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise population count of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.PopCount(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void PopCount<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IBinaryInteger<T> =>
-            InvokeSpanIntoSpan<T, PopCountOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise power of a number in a specified tensor raised to a number in another specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Pow(<paramref name="x" />[i], <paramref name="y" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void Pow<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : IPowerFunctions<T> =>
-            InvokeSpanSpanIntoSpan<T, PowOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise power of a number in a specified tensor raised to a number in another specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Pow(<paramref name="x" />[i], <paramref name="y" />)</c>.
-        /// </para>
-        /// </remarks>
-        public static void Pow<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : IPowerFunctions<T> =>
-            InvokeSpanScalarIntoSpan<T, PowOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise power of a number in a specified tensor raised to a number in another specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a scalar.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Pow(<paramref name="x" />, <paramref name="y" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void Pow<T>(T x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : IPowerFunctions<T> =>
-            InvokeScalarSpanIntoSpan<T, PowOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the product of all elements in the specified non-empty tensor of numbers.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <returns>The result of multiplying all elements in <paramref name="x"/>.</returns>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be greater than zero.</exception>
-        /// <remarks>
-        /// <para>
-        /// If any of the input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result value is also NaN.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T Product<T>(ReadOnlySpan<T> x)
-            where T : IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T>
-        {
-            if (x.IsEmpty)
-            {
-                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
-            }
-
-            return Aggregate<T, IdentityOperator<T>, MultiplyOperator<T>>(x);
-        }
-
-        /// <summary>Computes the product of the element-wise differences of the numbers in the specified non-empty tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <returns>The result of multiplying the element-wise subtraction of the elements in the second tensor from the first tensor.</returns>
-        /// <exception cref="ArgumentException">Length of both input spans must be greater than zero.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="y"/> must have the same length.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes:
-        /// <c>
-        ///     Span&lt;T&gt; differences = ...;
-        ///     TensorPrimitives.Subtract(x, y, differences);
-        ///     T result = TensorPrimitives.Product(differences);
-        /// </c>
-        /// but without requiring additional temporary storage for the intermediate differences.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T ProductOfDifferences<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y)
-            where T : ISubtractionOperators<T, T, T>, IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T>
-        {
-            if (x.IsEmpty)
-            {
-                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
-            }
-
-            return Aggregate<T, SubtractOperator<T>, MultiplyOperator<T>>(x, y);
-        }
-
-        /// <summary>Computes the product of the element-wise sums of the numbers in the specified non-empty tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <returns>The result of multiplying the element-wise additions of the elements in each tensor.</returns>
-        /// <exception cref="ArgumentException">Length of both input spans must be greater than zero.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="y"/> must have the same length.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes:
-        /// <c>
-        ///     Span&lt;T&gt; sums = ...;
-        ///     TensorPrimitives.Add(x, y, sums);
-        ///     T result = TensorPrimitives.Product(sums);
-        /// </c>
-        /// but without requiring additional temporary storage for the intermediate sums.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T ProductOfSums<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y)
-            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T>, IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T>
-        {
-            if (x.IsEmpty)
-            {
-                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
-            }
-
-            return Aggregate<T, AddOperator<T>, MultiplyOperator<T>>(x, y);
-        }
-
-        /// <summary>Computes the element-wise conversion of each number of radians in the specified tensor to degrees.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.RadiansToDegrees(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void RadiansToDegrees<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, RadiansToDegreesOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise reciprocal of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and an element in <paramref name="x"/> is equal to zero.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = 1 / <paramref name="x" />[i]</c>.
-        /// </para>
-        /// </remarks>
-        public static void Reciprocal<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IFloatingPoint<T> =>
-            InvokeSpanIntoSpan<T, ReciprocalOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise reciprocal of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and an element in <paramref name="x"/> is equal to zero.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = 1 / <paramref name="x" />[i]</c>.
-        /// </para>
-        /// </remarks>
-        public static void ReciprocalEstimate<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanIntoSpan<T, ReciprocalEstimateOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise reciprocal of the square root of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and an element in <paramref name="x"/> is equal to zero.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = 1 / <paramref name="x" />[i]</c>.
-        /// </para>
-        /// </remarks>
-        public static void ReciprocalSqrt<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanIntoSpan<T, ReciprocalSqrtOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise reciprocal of the square root of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="DivideByZeroException"><typeparamref name="T"/> is an integer type and an element in <paramref name="x"/> is equal to zero.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = 1 / <paramref name="x" />[i]</c>.
-        /// </para>
-        /// </remarks>
-        public static void ReciprocalSqrtEstimate<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanIntoSpan<T, ReciprocalSqrtEstimateOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise n-th root of the values in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <param name="n">The degree of the root to be computed, represented as a scalar.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.RootN(<paramref name="x" />[i], <paramref name="n"/>)</c>.
-        /// </para>
-        /// </remarks>
-        public static void RootN<T>(ReadOnlySpan<T> x, int n, Span<T> destination)
-            where T : IRootFunctions<T> =>
-            InvokeSpanIntoSpan(x, new RootNOperator<T>(n), destination);
-
-        /// <summary>Computes the element-wise rotation left of numbers in the specified tensor by the specified rotation amount.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <param name="rotateAmount">The number of bits to rotate, represented as a scalar.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.RotateLeft(<paramref name="x" />[i], <paramref name="rotateAmount"/>)</c>.
-        /// </para>
-        /// </remarks>
-        public static void RotateLeft<T>(ReadOnlySpan<T> x, int rotateAmount, Span<T> destination)
-            where T : IBinaryInteger<T> =>
-            InvokeSpanIntoSpan(x, new RotateLeftOperator<T>(rotateAmount), destination);
-
-        /// <summary>Computes the element-wise rotation right of numbers in the specified tensor by the specified rotation amount.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <param name="rotateAmount">The number of bits to rotate, represented as a scalar.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.RotateRight(<paramref name="x" />[i], <paramref name="rotateAmount"/>)</c>.
-        /// </para>
-        /// </remarks>
-        public static void RotateRight<T>(ReadOnlySpan<T> x, int rotateAmount, Span<T> destination)
-            where T : IBinaryInteger<T> =>
-            InvokeSpanIntoSpan(x, new RotateRightOperator<T>(rotateAmount), destination);
-
-        /// <summary>Computes the element-wise rounding of the numbers in the specified tensor</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Round(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void Round<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IFloatingPoint<T> =>
-            InvokeSpanIntoSpan<T, RoundToEvenOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise rounding of the numbers in the specified tensor</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="mode">The mode under which <paramref name="x" /> should be rounded.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Round(<paramref name="x" />[i], <paramref name="mode"/>)</c>.
-        /// </para>
-        /// </remarks>
-        public static void Round<T>(ReadOnlySpan<T> x, MidpointRounding mode, Span<T> destination)
-            where T : IFloatingPoint<T>
-        {
-            switch (mode)
-            {
-                case MidpointRounding.ToEven:
-                    Round(x, destination);
-                    return;
-
-                case MidpointRounding.AwayFromZero:
-                    InvokeSpanIntoSpan<T, RoundAwayFromZeroOperator<T>>(x, destination);
-                    break;
-
-                case MidpointRounding.ToZero:
-                    Truncate(x, destination);
-                    return;
-
-                case MidpointRounding.ToNegativeInfinity:
-                    Floor(x, destination);
-                    return;
-
-                case MidpointRounding.ToPositiveInfinity:
-                    Ceiling(x, destination);
-                    return;
-
-                default:
-                    throw new ArgumentException(SR.Format(SR.Argument_InvalidEnumValue, mode, typeof(MidpointRounding)), nameof(mode));
-            }
-        }
-
-        /// <summary>Computes the element-wise rounding of the numbers in the specified tensor</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="digits">The number of fractional digits to which the numbers in <paramref name="x" /> should be rounded.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Round(<paramref name="x" />[i], <paramref name="digits"/>)</c>.
-        /// </para>
-        /// </remarks>
-        public static void Round<T>(ReadOnlySpan<T> x, int digits, Span<T> destination) where T : IFloatingPoint<T> =>
-            Round(x, digits, MidpointRounding.ToEven, destination);
-
-        /// <summary>Computes the element-wise rounding of the numbers in the specified tensor</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="digits">The number of fractional digits to which the numbers in <paramref name="x" /> should be rounded.</param>
-        /// <param name="mode">The mode under which <paramref name="x" /> should be rounded.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentOutOfRangeException"><paramref name="digits"/> is invalid.</exception>
-        /// <exception cref="ArgumentException"><paramref name="mode"/> is invalid.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Round(<paramref name="x" />[i], <paramref name="digits"/>, <paramref name="mode"/>)</c>.
-        /// </para>
-        /// </remarks>
-        public static void Round<T>(ReadOnlySpan<T> x, int digits, MidpointRounding mode, Span<T> destination)
-            where T : IFloatingPoint<T>
-        {
-            if (digits == 0)
-            {
-                Round(x, mode, destination);
-            }
-
-            ReadOnlySpan<T> roundPower10;
-            if (typeof(T) == typeof(float))
-            {
-                ReadOnlySpan<float> roundPower10Single = [1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, 1e6f];
-                roundPower10 = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<float, T>(ref MemoryMarshal.GetReference(roundPower10Single)), roundPower10Single.Length);
-            }
-            else if (typeof(T) == typeof(double))
-            {
-                Debug.Assert(typeof(T) == typeof(double));
-                ReadOnlySpan<double> roundPower10Double = [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15];
-                roundPower10 = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<double, T>(ref MemoryMarshal.GetReference(roundPower10Double)), roundPower10Double.Length);
-            }
-            else
-            {
-                if ((uint)mode > (uint)MidpointRounding.ToPositiveInfinity)
-                {
-                    throw new ArgumentException(SR.Format(SR.Argument_InvalidEnumValue, mode, typeof(MidpointRounding)), nameof(mode));
-                }
-
-                InvokeSpanIntoSpan(x, new RoundFallbackOperator<T>(digits, mode), destination);
-                return;
-            }
-
-            if ((uint)digits >= (uint)roundPower10.Length)
-            {
-                throw new ArgumentOutOfRangeException(nameof(digits));
-            }
-
-            T power10 = roundPower10[digits];
-            switch (mode)
-            {
-                case MidpointRounding.ToEven:
-                    InvokeSpanIntoSpan(x, new MultiplyRoundDivideOperator<T, RoundToEvenOperator<T>>(power10), destination);
-                    return;
-
-                case MidpointRounding.AwayFromZero:
-                    InvokeSpanIntoSpan(x, new MultiplyRoundDivideOperator<T, RoundAwayFromZeroOperator<T>>(power10), destination);
-                    break;
-
-                case MidpointRounding.ToZero:
-                    InvokeSpanIntoSpan(x, new MultiplyRoundDivideOperator<T, TruncateOperator<T>>(power10), destination);
-                    return;
-
-                case MidpointRounding.ToNegativeInfinity:
-                    InvokeSpanIntoSpan(x, new MultiplyRoundDivideOperator<T, FloorOperator<T>>(power10), destination);
-                    return;
-
-                case MidpointRounding.ToPositiveInfinity:
-                    InvokeSpanIntoSpan(x, new MultiplyRoundDivideOperator<T, CeilingOperator<T>>(power10), destination);
-                    return;
-
-                default:
-                    throw new ArgumentException(SR.Format(SR.Argument_InvalidEnumValue, mode, typeof(MidpointRounding)), nameof(mode));
-            }
-        }
-
-        /// <summary>Computes the element-wise product of numbers in the specified tensor and their base-radix raised to the specified power.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="n">The value to which base-radix is raised before multipliying x, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.ILogB(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void ScaleB<T>(ReadOnlySpan<T> x, int n, Span<T> destination)
-            where T : IFloatingPointIeee754<T> =>
-            InvokeSpanIntoSpan(x, new ScaleBOperator<T>(n), destination);
-
-        /// <summary>Computes the element-wise shifting left of numbers in the specified tensor by the specified shift amount.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <param name="shiftAmount">The number of bits to shift, represented as a scalar.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] &lt;&lt; <paramref name="shiftAmount"/></c>.
-        /// </para>
-        /// </remarks>
-        public static void ShiftLeft<T>(ReadOnlySpan<T> x, int shiftAmount, Span<T> destination)
-            where T : IShiftOperators<T, int, T> =>
-            InvokeSpanIntoSpan(x, new ShiftLeftOperator<T>(shiftAmount), destination);
-
-        /// <summary>Computes the element-wise arithmetic (signed) shifting right of numbers in the specified tensor by the specified shift amount.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <param name="shiftAmount">The number of bits to shift, represented as a scalar.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] &gt;&gt; <paramref name="shiftAmount"/></c>.
-        /// </para>
-        /// </remarks>
-        public static void ShiftRightArithmetic<T>(ReadOnlySpan<T> x, int shiftAmount, Span<T> destination)
-            where T : IShiftOperators<T, int, T> =>
-            InvokeSpanIntoSpan(x, new ShiftRightArithmeticOperator<T>(shiftAmount), destination);
-
-        /// <summary>Computes the element-wise logical (unsigned) shifting right of numbers in the specified tensor by the specified shift amount.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <param name="shiftAmount">The number of bits to shift, represented as a scalar.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] &gt;&gt;&gt; <paramref name="shiftAmount"/></c>.
-        /// </para>
-        /// </remarks>
-        public static void ShiftRightLogical<T>(ReadOnlySpan<T> x, int shiftAmount, Span<T> destination)
-            where T : IShiftOperators<T, int, T> =>
-            InvokeSpanIntoSpan(x, new ShiftRightLogicalOperator<T>(shiftAmount), destination);
-
-        /// <summary>Computes the element-wise sigmoid function on the specified non-empty tensor of numbers.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x" /> must not be empty.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = 1f / (1f + <typeparamref name="T"/>.Exp(-<paramref name="x" />[i]))</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Sigmoid<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IExponentialFunctions<T>
-        {
-            if (x.IsEmpty)
-            {
-                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
-            }
-
-            InvokeSpanIntoSpan<T, SigmoidOperator<T>>(x, destination);
-        }
-
-        /// <summary>Computes the element-wise sine of the value in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Sin(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Sin<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, SinOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise sine of the value in the specified tensor that has been multiplied by Pi.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.SinPi(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void SinPi<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, SinPiOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise hyperbolic sine of each radian angle in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Sinh(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>, <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>, or <see cref="IFloatingPointIeee754{TSelf}.NaN"/>,
-        /// the corresponding destination location is set to that value.
-        /// </para>
-        /// <para>
-        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi / 180 to convert degrees to radians.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Sinh<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IHyperbolicFunctions<T> =>
-            InvokeSpanIntoSpan<T, SinhOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise sine and cosine of the value in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="sinDestination">The destination tensor for the element-wise sine result, represented as a span.</param>
-        /// <param name="cosDestination">The destination tensor for the element-wise cosine result, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="sinDestination"/> or <paramref name="cosDestination" /> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c>(<paramref name="sinDestination" />[i], <paramref name="cosDestination" />[i]) = <typeparamref name="T"/>.SinCos(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void SinCos<T>(ReadOnlySpan<T> x, Span<T> sinDestination, Span<T> cosDestination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan_TwoOutputs<T, SinCosOperator<T>>(x, sinDestination, cosDestination);
-
-        /// <summary>Computes the element-wise sine and cosine of the value in the specified tensor that has been multiplied by Pi.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="sinPiDestination">The destination tensor for the element-wise sine result, represented as a span.</param>
-        /// <param name="cosPiDestination">The destination tensor for the element-wise cosine result, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="sinPiDestination"/> or <paramref name="cosPiDestination" /> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c>(<paramref name="sinPiDestination" />[i], <paramref name="cosPiDestination" />[i]) = <typeparamref name="T"/>.SinCos(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void SinCosPi<T>(ReadOnlySpan<T> x, Span<T> sinPiDestination, Span<T> cosPiDestination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan_TwoOutputs<T, SinCosPiOperator<T>>(x, sinPiDestination, cosPiDestination);
-
-        /// <summary>Computes the softmax function over the specified non-empty tensor of numbers.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x" /> must not be empty.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes a sum of <c><typeparamref name="T"/>.Exp(x[i])</c> for all elements in <paramref name="x"/>.
-        /// It then effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Exp(<paramref name="x" />[i]) / sum</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void SoftMax<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IExponentialFunctions<T>
-        {
-            if (x.IsEmpty)
-            {
-                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
-            }
-
-            if (x.Length > destination.Length)
-            {
-                ThrowHelper.ThrowArgument_DestinationTooShort();
-            }
-
-            ValidateInputOutputSpanNonOverlapping(x, destination);
-
-            T expSum = Aggregate<T, ExpOperator<T>, AddOperator<T>>(x);
-
-            InvokeSpanScalarIntoSpan<T, ExpOperator<T>, DivideOperator<T>>(x, expSum, destination);
-        }
-
-        /// <summary>Computes the element-wise square root of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Sqrt(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void Sqrt<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IRootFunctions<T> =>
-            InvokeSpanIntoSpan<T, SqrtOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise difference between numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] - <paramref name="y" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Subtract<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : ISubtractionOperators<T, T, T> =>
-            InvokeSpanSpanIntoSpan<T, SubtractOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise difference between numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] - <paramref name="y" /></c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Subtract<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : ISubtractionOperators<T, T, T> =>
-            InvokeSpanScalarIntoSpan<T, SubtractOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise difference between numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a scalar.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" /> - <paramref name="y" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// If either of the element-wise input values is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the resulting element-wise value is also NaN.
-        /// </para>
-        /// </remarks>
-        public static void Subtract<T>(T x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : ISubtractionOperators<T, T, T> =>
-            InvokeScalarSpanIntoSpan<T, SubtractOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the sum of all elements in the specified tensor of numbers.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <returns>The result of adding all elements in <paramref name="x"/>, or zero if <paramref name="x"/> is empty.</returns>
-        /// <remarks>
-        /// <para>
-        /// If any of the values in the input is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the result is also NaN.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T Sum<T>(ReadOnlySpan<T> x)
-            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T> =>
-            Aggregate<T, IdentityOperator<T>, AddOperator<T>>(x);
-
-        /// <summary>Computes the sum of the absolute values of every element in the specified tensor of numbers.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <returns>The result of adding the absolute value of every element in <paramref name="x"/>, or zero if <paramref name="x"/> is empty.</returns>
-        /// <exception cref="OverflowException"><typeparamref name="T"/> is a signed integer type and <paramref name="x"/> contained a value equal to <typeparamref name="T"/>'s minimum value.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes:
-        /// <c>
-        ///     Span&lt;T&gt; absoluteValues = ...;
-        ///     TensorPrimitives.Abs(x, absoluteValues);
-        ///     T result = TensorPrimitives.Sum(absoluteValues);
-        /// </c>
-        /// but without requiring intermediate storage for the absolute values. It corresponds to the <c>asum</c> method defined by <c>BLAS1</c>.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T SumOfMagnitudes<T>(ReadOnlySpan<T> x)
-            where T : INumberBase<T> =>
-            Aggregate<T, AbsoluteOperator<T>, AddOperator<T>>(x);
-
-        /// <summary>Computes the sum of the square of every element in the specified tensor of numbers.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <returns>The result of adding the square of every element in <paramref name="x"/>, or zero if <paramref name="x"/> is empty.</returns>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes:
-        /// <c>
-        ///     Span&lt;T&gt; squaredValues = ...;
-        ///     TensorPrimitives.Multiply(x, x, squaredValues);
-        ///     T result = TensorPrimitives.Sum(squaredValues);
-        /// </c>
-        /// but without requiring intermediate storage for the squared values.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static T SumOfSquares<T>(ReadOnlySpan<T> x)
-            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T>, IMultiplyOperators<T, T, T> =>
-            Aggregate<T, SquaredOperator<T>, AddOperator<T>>(x);
-
-        /// <summary>Computes the element-wise tangent of the value in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Tan(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Tan<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, TanOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise tangent of the value in the specified tensor that has been multiplied by Pi.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.TanPi(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void TanPi<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : ITrigonometricFunctions<T> =>
-            InvokeSpanIntoSpan<T, TanPiOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise hyperbolic tangent of each radian angle in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Tanh(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// <para>
-        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>, the corresponding destination location is set to -1.
-        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>, the corresponding destination location is set to 1.
-        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the corresponding destination location is set to NaN.
-        /// </para>
-        /// <para>
-        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi / 180 to convert degrees to radians.
-        /// </para>
-        /// <para>
-        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
-        /// operating systems or architectures.
-        /// </para>
-        /// </remarks>
-        public static void Tanh<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IHyperbolicFunctions<T> =>
-            InvokeSpanIntoSpan<T, TanhOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise trailing zero count of numbers in the specified tensor.</summary>
-        /// <param name="x">The tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.TrailingZeroCount(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void TrailingZeroCount<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IBinaryInteger<T> =>
-            InvokeSpanIntoSpan<T, TrailingZeroCountOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise truncation of numbers in the specified tensor.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = T.Truncate(<paramref name="x" />[i])</c>.
-        /// </para>
-        /// </remarks>
-        public static void Truncate<T>(ReadOnlySpan<T> x, Span<T> destination)
-            where T : IFloatingPoint<T> =>
-            InvokeSpanIntoSpan<T, TruncateOperator<T>>(x, destination);
-
-        /// <summary>Computes the element-wise XOR of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a span.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] ^ <paramref name="y" />[i]</c>.
-        /// </para>
-        /// </remarks>
-        public static void Xor<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where T : IBitwiseOperators<T, T, T> =>
-            InvokeSpanSpanIntoSpan<T, XorOperator<T>>(x, y, destination);
-
-        /// <summary>Computes the element-wise XOR of numbers in the specified tensors.</summary>
-        /// <param name="x">The first tensor, represented as a span.</param>
-        /// <param name="y">The second tensor, represented as a scalar.</param>
-        /// <param name="destination">The destination tensor, represented as a span.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] ^ <paramref name="y" /></c>.
-        /// </para>
-        /// </remarks>
-        public static void Xor<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
-            where T : IBitwiseOperators<T, T, T> =>
-            InvokeSpanScalarIntoSpan<T, XorOperator<T>>(x, y, destination);
-    }
-}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Tan.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Tan.cs
new file mode 100644
index 000000000000..926bca5221a6
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Tan.cs
@@ -0,0 +1,396 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise tangent of the value in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Tan(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Tan<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, TanOperator<T>>(x, destination);
+
+        /// <summary>T.Tan(x)</summary>
+        private readonly struct TanOperator<T> : IUnaryOperator<T, T>
+            where T : ITrigonometricFunctions<T>
+        {
+            // This code is based on `vrs4_tan` and `vrd2_tan` from amd/aocl-libm-ose
+            // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation notes from amd/aocl-libm-ose:
+            // --------------------------------------------
+            // A given x is reduced into the form:
+            //          |x| = (N * π/2) + F
+            // Where N is an integer obtained using:
+            //         N = round(x * 2/π)
+            // And F is a fraction part lying in the interval
+            //         [-π/4, +π/4];
+            // obtained as F = |x| - (N * π/2)
+            // Thus tan(x) is given by
+            //         tan(x) = tan((N * π/2) + F) = tan(F)
+            //         when N is even, = -cot(F) = -1/tan(F)
+            //         when N is odd, tan(F) is approximated using a polynomial
+            //         obtained from Remez approximation from Sollya.
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Tan(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return TanOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return TanOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return TanOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return TanOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return TanOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return TanOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+        }
+
+        /// <summary>float.Tan(x)</summary>
+        private readonly struct TanOperatorSingle : IUnaryOperator<float, float>
+        {
+            internal const uint SignMask = 0x7FFFFFFFu;
+            internal const uint MaxVectorizedValue = 0x49800000u;
+            private const float AlmHuge = 1.2582912e7f;
+            private const float Pi_Tail2 = 4.371139e-8f;
+            private const float Pi_Tail3 = 1.7151245e-15f;
+            private const float C1 = 0.33333358f;
+            private const float C2 = 0.13332522f;
+            private const float C3 = 0.05407107f;
+            private const float C4 = 0.021237267f;
+            private const float C5 = 0.010932301f;
+            private const float C6 = -1.5722344e-5f;
+            private const float C7 = 0.0044221194f;
+
+            public static bool Vectorizable => true;
+
+            public static float Invoke(float x) => float.Tan(x);
+
+            public static Vector128<float> Invoke(Vector128<float> x)
+            {
+                Vector128<float> uxMasked = Vector128.Abs(x);
+                if (Vector128.GreaterThanAny(uxMasked.AsUInt32(), Vector128.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<TanOperatorSingle>(x);
+                }
+
+                Vector128<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector128.Create(2 / float.Pi), Vector128.Create(AlmHuge));
+                Vector128<uint> odd = dn.AsUInt32() << 31;
+                dn -= Vector128.Create(AlmHuge);
+
+                Vector128<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(-float.Pi / 2), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(Pi_Tail2), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(Pi_Tail3), f);
+
+                // POLY_EVAL_ODD_15
+                Vector128<float> f2 = f * f;
+                Vector128<float> f4 = f2 * f2;
+                Vector128<float> f8 = f4 * f4;
+                Vector128<float> f12 = f8 * f4;
+                Vector128<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1));
+                Vector128<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C4), f2, Vector128.Create(C3));
+                Vector128<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C6), f2, Vector128.Create(C5));
+                Vector128<float> b1 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector128<float> b2 = MultiplyAddEstimateOperator<float>.Invoke(f8, a3, f12 * Vector128.Create(C7));
+                Vector128<float> poly = MultiplyAddEstimateOperator<float>.Invoke(f * f2, b1 + b2, f);
+
+                Vector128<float> result = (poly.AsUInt32() ^ (x.AsUInt32() & Vector128.Create(~SignMask))).AsSingle();
+                return Vector128.ConditionalSelect(Vector128.Equals(odd, Vector128<uint>.Zero).AsSingle(),
+                    result,
+                    Vector128.Create(-1f) / result);
+            }
+
+            public static Vector256<float> Invoke(Vector256<float> x)
+            {
+                Vector256<float> uxMasked = Vector256.Abs(x);
+                if (Vector256.GreaterThanAny(uxMasked.AsUInt32(), Vector256.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<TanOperatorSingle>(x);
+                }
+
+                Vector256<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector256.Create(2 / float.Pi), Vector256.Create(AlmHuge));
+                Vector256<uint> odd = dn.AsUInt32() << 31;
+                dn -= Vector256.Create(AlmHuge);
+
+                Vector256<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(-float.Pi / 2), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(Pi_Tail2), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(Pi_Tail3), f);
+
+                // POLY_EVAL_ODD_15
+                Vector256<float> f2 = f * f;
+                Vector256<float> f4 = f2 * f2;
+                Vector256<float> f8 = f4 * f4;
+                Vector256<float> f12 = f8 * f4;
+                Vector256<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1));
+                Vector256<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C4), f2, Vector256.Create(C3));
+                Vector256<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C6), f2, Vector256.Create(C5));
+                Vector256<float> b1 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector256<float> b2 = MultiplyAddEstimateOperator<float>.Invoke(f8, a3, f12 * Vector256.Create(C7));
+                Vector256<float> poly = MultiplyAddEstimateOperator<float>.Invoke(f * f2, b1 + b2, f);
+
+                Vector256<float> result = (poly.AsUInt32() ^ (x.AsUInt32() & Vector256.Create(~SignMask))).AsSingle();
+                return Vector256.ConditionalSelect(Vector256.Equals(odd, Vector256<uint>.Zero).AsSingle(),
+                    result,
+                    Vector256.Create(-1f) / result);
+            }
+
+            public static Vector512<float> Invoke(Vector512<float> x)
+            {
+                Vector512<float> uxMasked = Vector512.Abs(x);
+                if (Vector512.GreaterThanAny(uxMasked.AsUInt32(), Vector512.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<TanOperatorSingle>(x);
+                }
+
+                Vector512<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector512.Create(2 / float.Pi), Vector512.Create(AlmHuge));
+                Vector512<uint> odd = dn.AsUInt32() << 31;
+                dn -= Vector512.Create(AlmHuge);
+
+                Vector512<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(-float.Pi / 2), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(Pi_Tail2), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(Pi_Tail3), f);
+
+                // POLY_EVAL_ODD_15
+                Vector512<float> f2 = f * f;
+                Vector512<float> f4 = f2 * f2;
+                Vector512<float> f8 = f4 * f4;
+                Vector512<float> f12 = f8 * f4;
+                Vector512<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
+                Vector512<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C4), f2, Vector512.Create(C3));
+                Vector512<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C6), f2, Vector512.Create(C5));
+                Vector512<float> b1 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector512<float> b2 = MultiplyAddEstimateOperator<float>.Invoke(f8, a3, f12 * Vector512.Create(C7));
+                Vector512<float> poly = MultiplyAddEstimateOperator<float>.Invoke(f * f2, b1 + b2, f);
+
+                Vector512<float> result = (poly.AsUInt32() ^ (x.AsUInt32() & Vector512.Create(~SignMask))).AsSingle();
+                return Vector512.ConditionalSelect(Vector512.Equals(odd, Vector512<uint>.Zero).AsSingle(),
+                    result,
+                    Vector512.Create(-1f) / result);
+            }
+        }
+
+        /// <summary>double.Tan(x)</summary>
+        private readonly struct TanOperatorDouble : IUnaryOperator<double, double>
+        {
+            internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul;
+            internal const ulong MaxVectorizedValue = 0x4160000000000000ul;
+            private const double AlmHuge = 6.755399441055744e15;
+            private const double HalfPi2 = 6.123233995736766E-17;
+            private const double HalfPi3 = -1.4973849048591698E-33;
+            private const double C1 = 0.33333333333332493;
+            private const double C3 = 0.133333333334343;
+            private const double C5 = 0.0539682539203796;
+            private const double C7 = 0.02186948972198256;
+            private const double C9 = 0.008863217894198291;
+            private const double C11 = 0.003592298593761111;
+            private const double C13 = 0.0014547086183165365;
+            private const double C15 = 5.952456856028558E-4;
+            private const double C17 = 2.2190741289936845E-4;
+            private const double C19 = 1.3739809957985104E-4;
+            private const double C21 = -2.7500197359895707E-5;
+            private const double C23 = 9.038741690184683E-5;
+            private const double C25 = -4.534076545538694E-5;
+            private const double C27 = 2.0966522562190197E-5;
+
+            public static bool Vectorizable => true;
+
+            public static double Invoke(double x) => double.Tan(x);
+
+            public static Vector128<double> Invoke(Vector128<double> x)
+            {
+                Vector128<double> uxMasked = Vector128.Abs(x);
+                if (Vector128.GreaterThanAny(uxMasked.AsUInt64(), Vector128.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<TanOperatorDouble>(x);
+                }
+
+                // dn = |x| * (2/π)
+                Vector128<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector128.Create(2 / double.Pi), Vector128.Create(AlmHuge));
+                Vector128<ulong> odd = dn.AsUInt64() << 63;
+                dn -= Vector128.Create(AlmHuge);
+
+                // f = |x| - (dn * π/2)
+                Vector128<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-double.Pi / 2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-HalfPi2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-HalfPi3), f);
+
+                // POLY_EVAL_ODD_29
+                Vector128<double> g = f * f;
+                Vector128<double> g2 = g * g;
+                Vector128<double> g3 = g * g2;
+                Vector128<double> g5 = g3 * g2;
+                Vector128<double> g7 = g5 * g2;
+                Vector128<double> g9 = g7 * g2;
+                Vector128<double> g11 = g9 * g2;
+                Vector128<double> g13 = g11 * g2;
+                Vector128<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C3), g, Vector128.Create(C1));
+                Vector128<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C7), g, Vector128.Create(C5));
+                Vector128<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C11), g, Vector128.Create(C9));
+                Vector128<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C15), g, Vector128.Create(C13));
+                Vector128<double> a5 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C19), g, Vector128.Create(C17));
+                Vector128<double> a6 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C23), g, Vector128.Create(C21));
+                Vector128<double> a7 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C27), g, Vector128.Create(C25));
+                Vector128<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(g, a1, g3 * a2);
+                Vector128<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(g5, a3, g7 * a4);
+                Vector128<double> b3 = MultiplyAddEstimateOperator<double>.Invoke(g9, a5, g11 * a6);
+                Vector128<double> q = MultiplyAddEstimateOperator<double>.Invoke(g13, a7, b1 + b2 + b3);
+                Vector128<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, q, f);
+
+                Vector128<double> result = (poly.AsUInt64() ^ (x.AsUInt64() & Vector128.Create(~SignMask))).AsDouble();
+                return Vector128.ConditionalSelect(Vector128.Equals(odd, Vector128<ulong>.Zero).AsDouble(),
+                    result,
+                    Vector128.Create(-1.0) / result);
+            }
+
+            public static Vector256<double> Invoke(Vector256<double> x)
+            {
+                Vector256<double> uxMasked = Vector256.Abs(x);
+                if (Vector256.GreaterThanAny(uxMasked.AsUInt64(), Vector256.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<TanOperatorDouble>(x);
+                }
+
+                // dn = |x| * (2/π)
+                Vector256<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector256.Create(2 / double.Pi), Vector256.Create(AlmHuge));
+                Vector256<ulong> odd = dn.AsUInt64() << 63;
+                dn -= Vector256.Create(AlmHuge);
+
+                // f = |x| - (dn * π/2)
+                Vector256<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-double.Pi / 2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-HalfPi2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-HalfPi3), f);
+
+                // POLY_EVAL_ODD_29
+                Vector256<double> g = f * f;
+                Vector256<double> g2 = g * g;
+                Vector256<double> g3 = g * g2;
+                Vector256<double> g5 = g3 * g2;
+                Vector256<double> g7 = g5 * g2;
+                Vector256<double> g9 = g7 * g2;
+                Vector256<double> g11 = g9 * g2;
+                Vector256<double> g13 = g11 * g2;
+                Vector256<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C3), g, Vector256.Create(C1));
+                Vector256<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C7), g, Vector256.Create(C5));
+                Vector256<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C11), g, Vector256.Create(C9));
+                Vector256<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C15), g, Vector256.Create(C13));
+                Vector256<double> a5 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C19), g, Vector256.Create(C17));
+                Vector256<double> a6 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C23), g, Vector256.Create(C21));
+                Vector256<double> a7 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C27), g, Vector256.Create(C25));
+                Vector256<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(g, a1, g3 * a2);
+                Vector256<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(g5, a3, g7 * a4);
+                Vector256<double> b3 = MultiplyAddEstimateOperator<double>.Invoke(g9, a5, g11 * a6);
+                Vector256<double> q = MultiplyAddEstimateOperator<double>.Invoke(g13, a7, b1 + b2 + b3);
+                Vector256<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, q, f);
+
+                Vector256<double> result = (poly.AsUInt64() ^ (x.AsUInt64() & Vector256.Create(~SignMask))).AsDouble();
+                return Vector256.ConditionalSelect(Vector256.Equals(odd, Vector256<ulong>.Zero).AsDouble(),
+                    result,
+                    Vector256.Create(-1.0) / result);
+            }
+
+            public static Vector512<double> Invoke(Vector512<double> x)
+            {
+                Vector512<double> uxMasked = Vector512.Abs(x);
+                if (Vector512.GreaterThanAny(uxMasked.AsUInt64(), Vector512.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<TanOperatorDouble>(x);
+                }
+
+                // dn = |x| * (2/π)
+                Vector512<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector512.Create(2 / double.Pi), Vector512.Create(AlmHuge));
+                Vector512<ulong> odd = dn.AsUInt64() << 63;
+                dn -= Vector512.Create(AlmHuge);
+
+                // f = |x| - (dn * π/2)
+                Vector512<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-double.Pi / 2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-HalfPi2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-HalfPi3), f);
+
+                // POLY_EVAL_ODD_29
+                Vector512<double> g = f * f;
+                Vector512<double> g2 = g * g;
+                Vector512<double> g3 = g * g2;
+                Vector512<double> g5 = g3 * g2;
+                Vector512<double> g7 = g5 * g2;
+                Vector512<double> g9 = g7 * g2;
+                Vector512<double> g11 = g9 * g2;
+                Vector512<double> g13 = g11 * g2;
+                Vector512<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C3), g, Vector512.Create(C1));
+                Vector512<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C7), g, Vector512.Create(C5));
+                Vector512<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C11), g, Vector512.Create(C9));
+                Vector512<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C15), g, Vector512.Create(C13));
+                Vector512<double> a5 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C19), g, Vector512.Create(C17));
+                Vector512<double> a6 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C23), g, Vector512.Create(C21));
+                Vector512<double> a7 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C27), g, Vector512.Create(C25));
+                Vector512<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(g, a1, g3 * a2);
+                Vector512<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(g5, a3, g7 * a4);
+                Vector512<double> b3 = MultiplyAddEstimateOperator<double>.Invoke(g9, a5, g11 * a6);
+                Vector512<double> q = MultiplyAddEstimateOperator<double>.Invoke(g13, a7, b1 + b2 + b3);
+                Vector512<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, q, f);
+
+                Vector512<double> result = (poly.AsUInt64() ^ (x.AsUInt64() & Vector512.Create(~SignMask))).AsDouble();
+                return Vector512.ConditionalSelect(Vector512.Equals(odd, Vector512<ulong>.Zero).AsDouble(),
+                    result,
+                    Vector512.Create(-1.0) / result);
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.TanPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.TanPi.cs
new file mode 100644
index 000000000000..962dba7c1858
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.TanPi.cs
@@ -0,0 +1,42 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise tangent of the value in the specified tensor that has been multiplied by Pi.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.TanPi(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi/180 to convert degrees to radians.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void TanPi<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : ITrigonometricFunctions<T> =>
+            InvokeSpanIntoSpan<T, TanPiOperator<T>>(x, destination);
+
+        /// <summary>T.TanPi(x)</summary>
+        private readonly struct TanPiOperator<T> : IUnaryOperator<T, T>
+            where T : ITrigonometricFunctions<T>
+        {
+            public static bool Vectorizable => false;
+            public static T Invoke(T x) => T.TanPi(x);
+            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Tanh.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Tanh.cs
new file mode 100644
index 000000000000..7ac7d9b37619
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Tanh.cs
@@ -0,0 +1,130 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise hyperbolic tangent of each radian angle in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <typeparamref name="T"/>.Tanh(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// <para>
+        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NegativeInfinity"/>, the corresponding destination location is set to -1.
+        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.PositiveInfinity"/>, the corresponding destination location is set to 1.
+        /// If a value is equal to <see cref="IFloatingPointIeee754{TSelf}.NaN"/>, the corresponding destination location is set to NaN.
+        /// </para>
+        /// <para>
+        /// The angles in x must be in radians. Use <see cref="M:System.Single.DegreesToRadians"/> or multiply by <typeparamref name="T"/>.Pi / 180 to convert degrees to radians.
+        /// </para>
+        /// <para>
+        /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
+        /// operating systems or architectures.
+        /// </para>
+        /// </remarks>
+        public static void Tanh<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IHyperbolicFunctions<T> =>
+            InvokeSpanIntoSpan<T, TanhOperator<T>>(x, destination);
+
+        /// <summary>T.Tanh(x)</summary>
+        internal readonly struct TanhOperator<T> : IUnaryOperator<T, T>
+            where T : IHyperbolicFunctions<T>
+        {
+            // This code is based on `vrs4_tanhf` from amd/aocl-libm-ose
+            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // To compute vrs4_tanhf(v_f32x4_t x)
+            // Let y = |x|
+            // If 0 <= y < 0x1.154246p3
+            //    Let z = e^(-2.0 * y) - 1      -(1)
+            //
+            //    Using (1), tanhf(y) can be calculated as,
+            //    tanhf(y) = -z / (z + 2.0)
+            //
+            // For other cases, call scalar tanhf()
+            //
+            // If x < 0, then we use the identity
+            //    tanhf(-x) = -tanhf(x)
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Tanh(x);
+
+            public static Vector128<T> Invoke(Vector128<T> t)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    Vector128<float> x = t.AsSingle();
+
+                    Vector128<float> y = Vector128.Abs(x);
+                    Vector128<float> z = ExpM1Operator<float>.Invoke(Vector128.Create(-2f) * y);
+                    Vector128<uint> sign = x.AsUInt32() & Vector128.Create(~(uint)int.MaxValue);
+                    return (sign ^ (-z / (z + Vector128.Create(2f))).AsUInt32()).As<uint, T>();
+                }
+                else
+                {
+                    Vector128<double> x = t.AsDouble();
+
+                    Vector128<double> y = Vector128.Abs(x);
+                    Vector128<double> z = ExpM1Operator<double>.Invoke(Vector128.Create(-2d) * y);
+                    Vector128<ulong> sign = x.AsUInt64() & Vector128.Create(~(ulong)long.MaxValue);
+                    return (sign ^ (-z / (z + Vector128.Create(2d))).AsUInt64()).As<ulong, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> t)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    Vector256<float> x = t.AsSingle();
+
+                    Vector256<float> y = Vector256.Abs(x);
+                    Vector256<float> z = ExpM1Operator<float>.Invoke(Vector256.Create(-2f) * y);
+                    Vector256<uint> sign = x.AsUInt32() & Vector256.Create(~(uint)int.MaxValue);
+                    return (sign ^ (-z / (z + Vector256.Create(2f))).AsUInt32()).As<uint, T>();
+                }
+                else
+                {
+                    Vector256<double> x = t.AsDouble();
+
+                    Vector256<double> y = Vector256.Abs(x);
+                    Vector256<double> z = ExpM1Operator<double>.Invoke(Vector256.Create(-2d) * y);
+                    Vector256<ulong> sign = x.AsUInt64() & Vector256.Create(~(ulong)long.MaxValue);
+                    return (sign ^ (-z / (z + Vector256.Create(2d))).AsUInt64()).As<ulong, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> t)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    Vector512<float> x = t.AsSingle();
+
+                    Vector512<float> y = Vector512.Abs(x);
+                    Vector512<float> z = ExpM1Operator<float>.Invoke(Vector512.Create(-2f) * y);
+                    Vector512<uint> sign = x.AsUInt32() & Vector512.Create(~(uint)int.MaxValue);
+                    return (sign ^ (-z / (z + Vector512.Create(2f))).AsUInt32()).As<uint, T>();
+                }
+                else
+                {
+                    Vector512<double> x = t.AsDouble();
+
+                    Vector512<double> y = Vector512.Abs(x);
+                    Vector512<double> z = ExpM1Operator<double>.Invoke(Vector512.Create(-2d) * y);
+                    Vector512<ulong> sign = x.AsUInt64() & Vector512.Create(~(ulong)long.MaxValue);
+                    return (sign ^ (-z / (z + Vector512.Create(2d))).AsUInt64()).As<ulong, T>();
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.TrailingZeroCount.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.TrailingZeroCount.cs
new file mode 100644
index 000000000000..156bafd3697b
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.TrailingZeroCount.cs
@@ -0,0 +1,73 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise trailing zero count of numbers in the specified tensor.</summary>
+        /// <param name="x">The tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.TrailingZeroCount(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void TrailingZeroCount<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IBinaryInteger<T> =>
+            InvokeSpanIntoSpan<T, TrailingZeroCountOperator<T>>(x, destination);
+
+        /// <summary>T.TrailingZeroCount(x)</summary>
+        private readonly unsafe struct TrailingZeroCountOperator<T> : IUnaryOperator<T, T> where T : IBinaryInteger<T>
+        {
+            public static bool Vectorizable =>
+                (AdvSimd.Arm64.IsSupported && sizeof(T) == 1) ||
+                PopCountOperator<T>.Vectorizable; // http://0x80.pl/notesen/2023-01-31-avx512-bsf.html#trailing-zeros-simplified
+
+            public static T Invoke(T x) => T.TrailingZeroCount(x);
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (AdvSimd.Arm64.IsSupported && sizeof(T) == 1)
+                {
+                    return AdvSimd.LeadingZeroCount(AdvSimd.Arm64.ReverseElementBits(x.AsByte())).As<byte, T>();
+                }
+
+                Debug.Assert(PopCountOperator<T>.Vectorizable);
+                return PopCountOperator<T>.Invoke(~x & (x - Vector128<T>.One));
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (PopCountOperator<T>.Vectorizable)
+                {
+                    return PopCountOperator<T>.Invoke(~x & (x - Vector256<T>.One));
+                }
+
+                return Vector256.Create(Invoke(x.GetLower()), Invoke(x.GetUpper()));
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (PopCountOperator<T>.Vectorizable)
+                {
+                    return PopCountOperator<T>.Invoke(~x & (x - Vector512<T>.One));
+                }
+
+                return Vector512.Create(Invoke(x.GetLower()), Invoke(x.GetUpper()));
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Truncate.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Truncate.cs
new file mode 100644
index 000000000000..3a2e3cea290a
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Truncate.cs
@@ -0,0 +1,102 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.X86;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise truncation of numbers in the specified tensor.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = T.Truncate(<paramref name="x" />[i])</c>.
+        /// </para>
+        /// </remarks>
+        public static void Truncate<T>(ReadOnlySpan<T> x, Span<T> destination)
+            where T : IFloatingPoint<T> =>
+            InvokeSpanIntoSpan<T, TruncateOperator<T>>(x, destination);
+
+        private readonly struct TruncateOperator<T> : IUnaryOperator<T, T> where T : IFloatingPoint<T>
+        {
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
+            public static T Invoke(T x) => T.Truncate(x);
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    if (Sse41.IsSupported) return Sse41.RoundToZero(x.AsSingle()).As<float, T>();
+                    if (AdvSimd.IsSupported) return AdvSimd.RoundToZero(x.AsSingle()).As<float, T>();
+
+                    return Vector128.ConditionalSelect(Vector128.GreaterThanOrEqual(x, Vector128<T>.Zero),
+                        Vector128.Floor(x.AsSingle()).As<float, T>(),
+                        Vector128.Ceiling(x.AsSingle()).As<float, T>());
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+
+                    if (Sse41.IsSupported) return Sse41.RoundToZero(x.AsDouble()).As<double, T>();
+                    if (AdvSimd.Arm64.IsSupported) return AdvSimd.Arm64.RoundToZero(x.AsDouble()).As<double, T>();
+
+                    return Vector128.ConditionalSelect(Vector128.GreaterThanOrEqual(x, Vector128<T>.Zero),
+                        Vector128.Floor(x.AsDouble()).As<double, T>(),
+                        Vector128.Ceiling(x.AsDouble()).As<double, T>());
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    if (Avx.IsSupported) return Avx.RoundToZero(x.AsSingle()).As<float, T>();
+
+                    return Vector256.ConditionalSelect(Vector256.GreaterThanOrEqual(x, Vector256<T>.Zero),
+                        Vector256.Floor(x.AsSingle()).As<float, T>(),
+                        Vector256.Ceiling(x.AsSingle()).As<float, T>());
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+
+                    if (Avx.IsSupported) return Avx.RoundToZero(x.AsDouble()).As<double, T>();
+
+                    return Vector256.ConditionalSelect(Vector256.GreaterThanOrEqual(x, Vector256<T>.Zero),
+                        Vector256.Floor(x.AsDouble()).As<double, T>(),
+                        Vector256.Ceiling(x.AsDouble()).As<double, T>());
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    if (Avx512F.IsSupported) return Avx512F.RoundScale(x.AsSingle(), 0b11).As<float, T>();
+
+                    return Vector512.ConditionalSelect(Vector512.GreaterThanOrEqual(x, Vector512<T>.Zero),
+                        Vector512.Floor(x.AsSingle()).As<float, T>(),
+                        Vector512.Ceiling(x.AsSingle()).As<float, T>());
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+
+                    if (Avx512F.IsSupported) return Avx512F.RoundScale(x.AsDouble(), 0b11).As<double, T>();
+
+                    return Vector512.ConditionalSelect(Vector512.GreaterThanOrEqual(x, Vector512<T>.Zero),
+                        Vector512.Floor(x.AsDouble()).As<double, T>(),
+                        Vector512.Ceiling(x.AsDouble()).As<double, T>());
+                }
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Xor.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Xor.cs
new file mode 100644
index 000000000000..545298192dd7
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Xor.cs
@@ -0,0 +1,52 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+    public static partial class TensorPrimitives
+    {
+        /// <summary>Computes the element-wise XOR of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a span.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Length of <paramref name="x" /> must be same as length of <paramref name="y" />.</exception>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <exception cref="ArgumentException"><paramref name="y"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] ^ <paramref name="y" />[i]</c>.
+        /// </para>
+        /// </remarks>
+        public static void Xor<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
+            where T : IBitwiseOperators<T, T, T> =>
+            InvokeSpanSpanIntoSpan<T, XorOperator<T>>(x, y, destination);
+
+        /// <summary>Computes the element-wise XOR of numbers in the specified tensors.</summary>
+        /// <param name="x">The first tensor, represented as a span.</param>
+        /// <param name="y">The second tensor, represented as a scalar.</param>
+        /// <param name="destination">The destination tensor, represented as a span.</param>
+        /// <exception cref="ArgumentException">Destination is too short.</exception>
+        /// <exception cref="ArgumentException"><paramref name="x"/> and <paramref name="destination"/> reference overlapping memory locations and do not begin at the same location.</exception>
+        /// <remarks>
+        /// <para>
+        /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] ^ <paramref name="y" /></c>.
+        /// </para>
+        /// </remarks>
+        public static void Xor<T>(ReadOnlySpan<T> x, T y, Span<T> destination)
+            where T : IBitwiseOperators<T, T, T> =>
+            InvokeSpanScalarIntoSpan<T, XorOperator<T>>(x, y, destination);
+
+        /// <summary>x ^ y</summary>
+        private readonly struct XorOperator<T> : IBinaryOperator<T> where T : IBitwiseOperators<T, T, T>
+        {
+            public static bool Vectorizable => true;
+            public static T Invoke(T x, T y) => x ^ y;
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x ^ y;
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x ^ y;
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x ^ y;
+        }
+    }
+}
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs
deleted file mode 100644
index dec6446cd765..000000000000
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs
+++ /dev/null
@@ -1,19898 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.Arm;
-using System.Runtime.Intrinsics.Wasm;
-using System.Runtime.Intrinsics.X86;
-
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
-
-namespace System.Numerics.Tensors
-{
-    public static unsafe partial class TensorPrimitives
-    {
-        /// <summary>Defines the threshold, in bytes, at which non-temporal stores will be used.</summary>
-        /// <remarks>
-        ///     A non-temporal store is one that allows the CPU to bypass the cache when writing to memory.
-        ///
-        ///     This can be beneficial when working with large amounts of memory where the writes would otherwise
-        ///     cause large amounts of repeated updates and evictions. The hardware optimization manuals recommend
-        ///     the threshold to be roughly half the size of the last level of on-die cache -- that is, if you have approximately
-        ///     4MB of L3 cache per core, you'd want this to be approx. 1-2MB, depending on if hyperthreading was enabled.
-        ///
-        ///     However, actually computing the amount of L3 cache per core can be tricky or error prone. Native memcpy
-        ///     algorithms use a constant threshold that is typically around 256KB and we match that here for simplicity. This
-        ///     threshold accounts for most processors in the last 10-15 years that had approx. 1MB L3 per core and support
-        ///     hyperthreading, giving a per core last level cache of approx. 512KB.
-        /// </remarks>
-        private const nuint NonTemporalByteThreshold = 256 * 1024;
-
-        /// <summary>
-        /// Copies <paramref name="source"/> to <paramref name="destination"/>, converting each <see cref="float" />
-        /// value to its nearest representable half-precision floating-point value.
-        /// </summary>
-        /// <param name="source">The source span from which to copy values.</param>
-        /// <param name="destination">The destination span into which the converted values should be written.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (Half)<paramref name="source" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// <paramref name="source"/> and <paramref name="destination"/> must not overlap. If they do, behavior is undefined.
-        /// </para>
-        /// </remarks>
-        public static void ConvertToHalf(ReadOnlySpan<float> source, Span<Half> destination) =>
-            ConvertTruncating(source, destination);
-
-        /// <summary>
-        /// Copies <paramref name="source"/> to <paramref name="destination"/>, converting each half-precision
-        /// floating-point value to its nearest representable <see cref="float"/> value.
-        /// </summary>
-        /// <param name="source">The source span from which to copy values.</param>
-        /// <param name="destination">The destination span into which the converted values should be written.</param>
-        /// <exception cref="ArgumentException">Destination is too short.</exception>
-        /// <remarks>
-        /// <para>
-        /// This method effectively computes <c><paramref name="destination" />[i] = (float)<paramref name="source" />[i]</c>.
-        /// </para>
-        /// <para>
-        /// <paramref name="source"/> and <paramref name="destination"/> must not overlap. If they do, behavior is undefined.
-        /// </para>
-        /// </remarks>
-        public static void ConvertToSingle(ReadOnlySpan<Half> source, Span<float> destination) =>
-            ConvertTruncating(source, destination);
-
-        /// <summary>Computes the cosine similarity between the two specified non-empty, equal-length tensors of single-precision floating-point numbers.</summary>
-        /// <remarks>Assumes arguments have already been validated to be non-empty and equal length.</remarks>
-        private static T CosineSimilarityCore<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y) where T : IRootFunctions<T>
-        {
-            if (x.IsEmpty)
-            {
-                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
-            }
-
-            if (x.Length != y.Length)
-            {
-                ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
-            }
-
-            // Compute the same as:
-            // TensorPrimitives.Dot(x, y) / (Math.Sqrt(TensorPrimitives.SumOfSquares(x)) * Math.Sqrt(TensorPrimitives.SumOfSquares(y)))
-            // but only looping over each span once.
-
-            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && x.Length >= Vector512<T>.Count)
-            {
-                ref T xRef = ref MemoryMarshal.GetReference(x);
-                ref T yRef = ref MemoryMarshal.GetReference(y);
-
-                Vector512<T> dotProductVector = Vector512<T>.Zero;
-                Vector512<T> xSumOfSquaresVector = Vector512<T>.Zero;
-                Vector512<T> ySumOfSquaresVector = Vector512<T>.Zero;
-
-                // Process vectors, summing their dot products and squares, as long as there's a vector's worth remaining.
-                int oneVectorFromEnd = x.Length - Vector512<T>.Count;
-                int i = 0;
-                do
-                {
-                    Vector512<T> xVec = Vector512.LoadUnsafe(ref xRef, (uint)i);
-                    Vector512<T> yVec = Vector512.LoadUnsafe(ref yRef, (uint)i);
-
-                    dotProductVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, yVec, dotProductVector);
-                    xSumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, xVec, xSumOfSquaresVector);
-                    ySumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(yVec, yVec, ySumOfSquaresVector);
-
-                    i += Vector512<T>.Count;
-                }
-                while (i <= oneVectorFromEnd);
-
-                // Process the last vector in the span, masking off elements already processed.
-                if (i != x.Length)
-                {
-                    Vector512<T> xVec = Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512<T>.Count));
-                    Vector512<T> yVec = Vector512.LoadUnsafe(ref yRef, (uint)(x.Length - Vector512<T>.Count));
-
-                    Vector512<T> remainderMask = CreateRemainderMaskVector512<T>(x.Length - i);
-                    xVec &= remainderMask;
-                    yVec &= remainderMask;
-
-                    dotProductVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, yVec, dotProductVector);
-                    xSumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, xVec, xSumOfSquaresVector);
-                    ySumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(yVec, yVec, ySumOfSquaresVector);
-                }
-
-                // Sum(X * Y) / (|X| * |Y|)
-                return
-                    Vector512.Sum(dotProductVector) /
-                    (T.Sqrt(Vector512.Sum(xSumOfSquaresVector)) * T.Sqrt(Vector512.Sum(ySumOfSquaresVector)));
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && x.Length >= Vector256<T>.Count)
-            {
-                ref T xRef = ref MemoryMarshal.GetReference(x);
-                ref T yRef = ref MemoryMarshal.GetReference(y);
-
-                Vector256<T> dotProductVector = Vector256<T>.Zero;
-                Vector256<T> xSumOfSquaresVector = Vector256<T>.Zero;
-                Vector256<T> ySumOfSquaresVector = Vector256<T>.Zero;
-
-                // Process vectors, summing their dot products and squares, as long as there's a vector's worth remaining.
-                int oneVectorFromEnd = x.Length - Vector256<T>.Count;
-                int i = 0;
-                do
-                {
-                    Vector256<T> xVec = Vector256.LoadUnsafe(ref xRef, (uint)i);
-                    Vector256<T> yVec = Vector256.LoadUnsafe(ref yRef, (uint)i);
-
-                    dotProductVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, yVec, dotProductVector);
-                    xSumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, xVec, xSumOfSquaresVector);
-                    ySumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(yVec, yVec, ySumOfSquaresVector);
-
-                    i += Vector256<T>.Count;
-                }
-                while (i <= oneVectorFromEnd);
-
-                // Process the last vector in the span, masking off elements already processed.
-                if (i != x.Length)
-                {
-                    Vector256<T> xVec = Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256<T>.Count));
-                    Vector256<T> yVec = Vector256.LoadUnsafe(ref yRef, (uint)(x.Length - Vector256<T>.Count));
-
-                    Vector256<T> remainderMask = CreateRemainderMaskVector256<T>(x.Length - i);
-                    xVec &= remainderMask;
-                    yVec &= remainderMask;
-
-                    dotProductVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, yVec, dotProductVector);
-                    xSumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, xVec, xSumOfSquaresVector);
-                    ySumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(yVec, yVec, ySumOfSquaresVector);
-                }
-
-                // Sum(X * Y) / (|X| * |Y|)
-                return
-                    Vector256.Sum(dotProductVector) /
-                    (T.Sqrt(Vector256.Sum(xSumOfSquaresVector)) * T.Sqrt(Vector256.Sum(ySumOfSquaresVector)));
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && x.Length >= Vector128<T>.Count)
-            {
-                ref T xRef = ref MemoryMarshal.GetReference(x);
-                ref T yRef = ref MemoryMarshal.GetReference(y);
-
-                Vector128<T> dotProductVector = Vector128<T>.Zero;
-                Vector128<T> xSumOfSquaresVector = Vector128<T>.Zero;
-                Vector128<T> ySumOfSquaresVector = Vector128<T>.Zero;
-
-                // Process vectors, summing their dot products and squares, as long as there's a vector's worth remaining.
-                int oneVectorFromEnd = x.Length - Vector128<T>.Count;
-                int i = 0;
-                do
-                {
-                    Vector128<T> xVec = Vector128.LoadUnsafe(ref xRef, (uint)i);
-                    Vector128<T> yVec = Vector128.LoadUnsafe(ref yRef, (uint)i);
-
-                    dotProductVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, yVec, dotProductVector);
-                    xSumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, xVec, xSumOfSquaresVector);
-                    ySumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(yVec, yVec, ySumOfSquaresVector);
-
-                    i += Vector128<T>.Count;
-                }
-                while (i <= oneVectorFromEnd);
-
-                // Process the last vector in the span, masking off elements already processed.
-                if (i != x.Length)
-                {
-                    Vector128<T> xVec = Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128<T>.Count));
-                    Vector128<T> yVec = Vector128.LoadUnsafe(ref yRef, (uint)(x.Length - Vector128<T>.Count));
-
-                    Vector128<T> remainderMask = CreateRemainderMaskVector128<T>(x.Length - i);
-                    xVec &= remainderMask;
-                    yVec &= remainderMask;
-
-                    dotProductVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, yVec, dotProductVector);
-                    xSumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(xVec, xVec, xSumOfSquaresVector);
-                    ySumOfSquaresVector = MultiplyAddEstimateOperator<T>.Invoke(yVec, yVec, ySumOfSquaresVector);
-                }
-
-                // Sum(X * Y) / (|X| * |Y|)
-                return
-                    Vector128.Sum(dotProductVector) /
-                    (T.Sqrt(Vector128.Sum(xSumOfSquaresVector)) * T.Sqrt(Vector128.Sum(ySumOfSquaresVector)));
-            }
-
-            // Vectorization isn't supported or there are too few elements to vectorize.
-            // Use a scalar implementation.
-            T dotProduct = T.Zero, xSumOfSquares = T.Zero, ySumOfSquares = T.Zero;
-            for (int i = 0; i < x.Length; i++)
-            {
-                dotProduct = MultiplyAddEstimateOperator<T>.Invoke(x[i], y[i], dotProduct);
-                xSumOfSquares = MultiplyAddEstimateOperator<T>.Invoke(x[i], x[i], xSumOfSquares);
-                ySumOfSquares = MultiplyAddEstimateOperator<T>.Invoke(y[i], y[i], ySumOfSquares);
-            }
-
-            // Sum(X * Y) / (|X| * |Y|)
-            return
-                dotProduct /
-                (T.Sqrt(xSumOfSquares) * T.Sqrt(ySumOfSquares));
-        }
-
-        /// <summary>Performs an aggregation over all elements in <paramref name="x"/> to produce a single-precision floating-point value.</summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TTransformOperator">Specifies the transform operation that should be applied to each element loaded from <paramref name="x"/>.</typeparam>
-        /// <typeparam name="TAggregationOperator">
-        /// Specifies the aggregation binary operation that should be applied to multiple values to aggregate them into a single value.
-        /// The aggregation is applied after the transform is applied to each element.
-        /// </typeparam>
-        private static T Aggregate<T, TTransformOperator, TAggregationOperator>(
-            ReadOnlySpan<T> x)
-            where TTransformOperator : struct, IUnaryOperator<T, T>
-            where TAggregationOperator : struct, IAggregationOperator<T>
-        {
-            // Since every branch has a cost and since that cost is
-            // essentially lost for larger inputs, we do branches
-            // in a way that allows us to have the minimum possible
-            // for small sizes
-
-            ref T xRef = ref MemoryMarshal.GetReference(x);
-
-            nuint remainder = (uint)x.Length;
-
-            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TTransformOperator.Vectorizable)
-            {
-                T result;
-
-                if (remainder >= (uint)Vector512<T>.Count)
-                {
-                    result = Vectorized512(ref xRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    result = VectorizedSmall(ref xRef, remainder);
-                }
-
-                return result;
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && TTransformOperator.Vectorizable)
-            {
-                T result;
-
-                if (remainder >= (uint)Vector256<T>.Count)
-                {
-                    result = Vectorized256(ref xRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    result = VectorizedSmall(ref xRef, remainder);
-                }
-
-                return result;
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TTransformOperator.Vectorizable)
-            {
-                T result;
-
-                if (remainder >= (uint)Vector128<T>.Count)
-                {
-                    result = Vectorized128(ref xRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    result = VectorizedSmall(ref xRef, remainder);
-                }
-
-                return result;
-            }
-
-            // This is the software fallback when no acceleration is available.
-            // It requires no branches to hit.
-
-            return SoftwareFallback(ref xRef, remainder);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static T SoftwareFallback(ref T xRef, nuint length)
-            {
-                T result = TAggregationOperator.IdentityValue;
-
-                for (nuint i = 0; i < length; i++)
-                {
-                    result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, i)));
-                }
-
-                return result;
-            }
-
-            static T Vectorized128(ref T xRef, nuint remainder)
-            {
-                Vector128<T> vresult = Vector128.Create(TAggregationOperator.IdentityValue);
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                Vector128<T> end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
-
-                nuint misalignment = 0;
-
-                if (remainder > (uint)(Vector128<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    {
-                        T* xPtr = px;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)xPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-
-                            Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector128<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector128<T> vector1;
-                        Vector128<T> vector2;
-                        Vector128<T> vector3;
-                        Vector128<T> vector4;
-
-                        // We only need to load, so there isn't a lot of benefit to doing non-temporal operations
-
-                        while (remainder >= (uint)(Vector128<T>.Count * 8))
-                        {
-                            // We load, process, and store the first four vectors
-
-                            vector1 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)));
-                            vector2 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)));
-                            vector3 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)));
-                            vector4 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)));
-
-                            vresult = TAggregationOperator.Invoke(vresult, vector1);
-                            vresult = TAggregationOperator.Invoke(vresult, vector2);
-                            vresult = TAggregationOperator.Invoke(vresult, vector3);
-                            vresult = TAggregationOperator.Invoke(vresult, vector4);
-
-                            // We load, process, and store the next four vectors
-
-                            vector1 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)));
-                            vector2 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)));
-                            vector3 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)));
-                            vector4 = TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)));
-
-                            vresult = TAggregationOperator.Invoke(vresult, vector1);
-                            vresult = TAggregationOperator.Invoke(vresult, vector2);
-                            vresult = TAggregationOperator.Invoke(vresult, vector3);
-                            vresult = TAggregationOperator.Invoke(vresult, vector4);
-
-                            // We adjust the source and destination references, then update
-                            // the count of remaining elements to process.
-
-                            xPtr += (uint)(Vector128<T>.Count * 8);
-
-                            remainder -= (uint)(Vector128<T>.Count * 8);
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                    }
-                }
-
-                // Store the first block. Handling this separately simplifies the latter code as we know
-                // they come after and so we can relegate it to full blocks or the trailing elements
-
-                beg = Vector128.ConditionalSelect(CreateAlignmentMaskVector128<T>((int)misalignment), beg, Vector128.Create(TAggregationOperator.IdentityValue));
-                vresult = TAggregationOperator.Invoke(vresult, beg);
-
-                // Process the remaining [0, Count * 7] elements via a jump table
-                //
-                // We end up handling any trailing elements in case 0 and in the
-                // worst case end up just doing the identity operation here if there
-                // were no trailing elements.
-
-                (nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector128<T>.Count);
-                blocks -= (misalignment == 0) ? 1u : 0u;
-                remainder -= trailing;
-
-                switch (blocks)
-                {
-                    case 7:
-                    {
-                        Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        Vector128<T> vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 1)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)trailing), end, Vector128.Create(TAggregationOperator.IdentityValue));
-                        vresult = TAggregationOperator.Invoke(vresult, end);
-                        break;
-                    }
-                }
-
-                return TAggregationOperator.Invoke(vresult);
-            }
-
-            static T Vectorized256(ref T xRef, nuint remainder)
-            {
-                Vector256<T> vresult = Vector256.Create(TAggregationOperator.IdentityValue);
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                Vector256<T> end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
-
-                nuint misalignment = 0;
-
-                if (remainder > (uint)(Vector256<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    {
-                        T* xPtr = px;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)xPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-
-                            Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector256<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector256<T> vector1;
-                        Vector256<T> vector2;
-                        Vector256<T> vector3;
-                        Vector256<T> vector4;
-
-                        // We only need to load, so there isn't a lot of benefit to doing non-temporal operations
-
-                        while (remainder >= (uint)(Vector256<T>.Count * 8))
-                        {
-                            // We load, process, and store the first four vectors
-
-                            vector1 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)));
-                            vector2 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)));
-                            vector3 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)));
-                            vector4 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)));
-
-                            vresult = TAggregationOperator.Invoke(vresult, vector1);
-                            vresult = TAggregationOperator.Invoke(vresult, vector2);
-                            vresult = TAggregationOperator.Invoke(vresult, vector3);
-                            vresult = TAggregationOperator.Invoke(vresult, vector4);
-
-                            // We load, process, and store the next four vectors
-
-                            vector1 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)));
-                            vector2 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)));
-                            vector3 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)));
-                            vector4 = TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)));
-
-                            vresult = TAggregationOperator.Invoke(vresult, vector1);
-                            vresult = TAggregationOperator.Invoke(vresult, vector2);
-                            vresult = TAggregationOperator.Invoke(vresult, vector3);
-                            vresult = TAggregationOperator.Invoke(vresult, vector4);
-
-                            // We adjust the source and destination references, then update
-                            // the count of remaining elements to process.
-
-                            xPtr += (uint)(Vector256<T>.Count * 8);
-
-                            remainder -= (uint)(Vector256<T>.Count * 8);
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                    }
-                }
-
-                // Store the first block. Handling this separately simplifies the latter code as we know
-                // they come after and so we can relegate it to full blocks or the trailing elements
-
-                beg = Vector256.ConditionalSelect(CreateAlignmentMaskVector256<T>((int)misalignment), beg, Vector256.Create(TAggregationOperator.IdentityValue));
-                vresult = TAggregationOperator.Invoke(vresult, beg);
-
-                // Process the remaining [0, Count * 7] elements via a jump table
-                //
-                // We end up handling any trailing elements in case 0 and in the
-                // worst case end up just doing the identity operation here if there
-                // were no trailing elements.
-
-                (nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector256<T>.Count);
-                blocks -= (misalignment == 0) ? 1u : 0u;
-                remainder -= trailing;
-
-                switch (blocks)
-                {
-                    case 7:
-                    {
-                        Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        Vector256<T> vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 1)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)trailing), end, Vector256.Create(TAggregationOperator.IdentityValue));
-                        vresult = TAggregationOperator.Invoke(vresult, end);
-                        break;
-                    }
-                }
-
-                return TAggregationOperator.Invoke(vresult);
-            }
-
-            static T Vectorized512(ref T xRef, nuint remainder)
-            {
-                Vector512<T> vresult = Vector512.Create(TAggregationOperator.IdentityValue);
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector512<T> beg = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef));
-                Vector512<T> end = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count));
-
-                nuint misalignment = 0;
-
-                if (remainder > (uint)(Vector512<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    {
-                        T* xPtr = px;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)xPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-
-                            Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector512<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector512<T> vector1;
-                        Vector512<T> vector2;
-                        Vector512<T> vector3;
-                        Vector512<T> vector4;
-
-                        // We only need to load, so there isn't a lot of benefit to doing non-temporal operations
-
-                        while (remainder >= (uint)(Vector512<T>.Count * 8))
-                        {
-                            // We load, process, and store the first four vectors
-
-                            vector1 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)));
-                            vector2 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)));
-                            vector3 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)));
-                            vector4 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)));
-
-                            vresult = TAggregationOperator.Invoke(vresult, vector1);
-                            vresult = TAggregationOperator.Invoke(vresult, vector2);
-                            vresult = TAggregationOperator.Invoke(vresult, vector3);
-                            vresult = TAggregationOperator.Invoke(vresult, vector4);
-
-                            // We load, process, and store the next four vectors
-
-                            vector1 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)));
-                            vector2 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)));
-                            vector3 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)));
-                            vector4 = TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)));
-
-                            vresult = TAggregationOperator.Invoke(vresult, vector1);
-                            vresult = TAggregationOperator.Invoke(vresult, vector2);
-                            vresult = TAggregationOperator.Invoke(vresult, vector3);
-                            vresult = TAggregationOperator.Invoke(vresult, vector4);
-
-                            // We adjust the source and destination references, then update
-                            // the count of remaining elements to process.
-
-                            xPtr += (uint)(Vector512<T>.Count * 8);
-
-                            remainder -= (uint)(Vector512<T>.Count * 8);
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                    }
-                }
-
-                // Store the first block. Handling this separately simplifies the latter code as we know
-                // they come after and so we can relegate it to full blocks or the trailing elements
-
-                beg = Vector512.ConditionalSelect(CreateAlignmentMaskVector512<T>((int)misalignment), beg, Vector512.Create(TAggregationOperator.IdentityValue));
-                vresult = TAggregationOperator.Invoke(vresult, beg);
-
-                // Process the remaining [0, Count * 7] elements via a jump table
-                //
-                // We end up handling any trailing elements in case 0 and in the
-                // worst case end up just doing the identity operation here if there
-                // were no trailing elements.
-
-                (nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector512<T>.Count);
-                blocks -= (misalignment == 0) ? 1u : 0u;
-                remainder -= trailing;
-
-                switch (blocks)
-                {
-                    case 7:
-                    {
-                        Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        Vector512<T> vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 1)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end = Vector512.ConditionalSelect(CreateRemainderMaskVector512<T>((int)trailing), end, Vector512.Create(TAggregationOperator.IdentityValue));
-                        vresult = TAggregationOperator.Invoke(vresult, end);
-                        break;
-                    }
-                }
-
-                return TAggregationOperator.Invoke(vresult);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static T VectorizedSmall(ref T xRef, nuint remainder)
-            {
-                if (sizeof(T) == 1)
-                {
-                    return VectorizedSmall1(ref xRef, remainder);
-                }
-                else if (sizeof(T) == 2)
-                {
-                    return VectorizedSmall2(ref xRef, remainder);
-                }
-                else if (sizeof(T) == 4)
-                {
-                    return VectorizedSmall4(ref xRef, remainder);
-                }
-                else
-                {
-                    Debug.Assert(sizeof(T) == 8);
-                    return VectorizedSmall8(ref xRef, remainder);
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static T VectorizedSmall1(ref T xRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 1);
-                T result = TAggregationOperator.IdentityValue;
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 63:
-                    case 62:
-                    case 61:
-                    case 60:
-                    case 59:
-                    case 58:
-                    case 57:
-                    case 56:
-                    case 55:
-                    case 54:
-                    case 53:
-                    case 52:
-                    case 51:
-                    case 50:
-                    case 49:
-                    case 48:
-                    case 47:
-                    case 46:
-                    case 45:
-                    case 44:
-                    case 43:
-                    case 42:
-                    case 41:
-                    case 40:
-                    case 39:
-                    case 38:
-                    case 37:
-                    case 36:
-                    case 35:
-                    case 34:
-                    case 33:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        Vector256<T> end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
-
-                        end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 32:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        Vector128<T> end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
-
-                        end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 15:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 14)));
-                        goto case 14;
-
-                    case 14:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 13)));
-                        goto case 13;
-
-                    case 13:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 12)));
-                        goto case 12;
-
-                    case 12:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 11)));
-                        goto case 11;
-
-                    case 11:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 10)));
-                        goto case 10;
-
-                    case 10:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 9)));
-                        goto case 9;
-
-                    case 9:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 8)));
-                        goto case 8;
-
-                    case 8:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 7)));
-                        goto case 7;
-
-                    case 7:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 6)));
-                        goto case 6;
-
-                    case 6:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 5)));
-                        goto case 5;
-
-                    case 5:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 4)));
-                        goto case 4;
-
-                    case 4:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 3)));
-                        goto case 3;
-
-                    case 3:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)));
-                        goto case 2;
-
-                    case 2:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)));
-                        goto case 1;
-
-                    case 1:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef));
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-
-                return result;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static T VectorizedSmall2(ref T xRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 2);
-                T result = TAggregationOperator.IdentityValue;
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        Vector256<T> end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
-
-                        end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        Vector128<T> end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
-
-                        end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 8:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 7:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 6)));
-                        goto case 6;
-
-                    case 6:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 5)));
-                        goto case 5;
-
-                    case 5:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 4)));
-                        goto case 4;
-
-                    case 4:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 3)));
-                        goto case 3;
-
-                    case 3:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)));
-                        goto case 2;
-
-                    case 2:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)));
-                        goto case 1;
-
-                    case 1:
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef));
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-
-                return result;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static T VectorizedSmall4(ref T xRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 4);
-                T result = TAggregationOperator.IdentityValue;
-
-                switch (remainder)
-                {
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        Vector256<T> end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
-
-                        end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    case 8:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        Vector128<T> end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
-
-                        end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef));
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-
-                return result;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static T VectorizedSmall8(ref T xRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 8);
-                T result = TAggregationOperator.IdentityValue;
-
-                switch (remainder)
-                {
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        Vector256<T> end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
-
-                        end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        Vector128<T> end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
-
-                        end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    case 2:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    case 1:
-                    {
-                        result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef));
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-
-                return result;
-            }
-        }
-
-        /// <summary>Performs an aggregation over all pair-wise elements in <paramref name="x"/> and <paramref name="y"/> to produce a single-precision floating-point value.</summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TBinaryOperator">Specifies the binary operation that should be applied to the pair-wise elements loaded from <paramref name="x"/> and <paramref name="y"/>.</typeparam>
-        /// <typeparam name="TAggregationOperator">
-        /// Specifies the aggregation binary operation that should be applied to multiple values to aggregate them into a single value.
-        /// The aggregation is applied to the results of the binary operations on the pair-wise values.
-        /// </typeparam>
-        private static T Aggregate<T, TBinaryOperator, TAggregationOperator>(
-            ReadOnlySpan<T> x, ReadOnlySpan<T> y)
-            where TBinaryOperator : struct, IBinaryOperator<T>
-            where TAggregationOperator : struct, IAggregationOperator<T>
-        {
-            if (x.Length != y.Length)
-            {
-                ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
-            }
-
-            // Since every branch has a cost and since that cost is
-            // essentially lost for larger inputs, we do branches
-            // in a way that allows us to have the minimum possible
-            // for small sizes
-
-            ref T xRef = ref MemoryMarshal.GetReference(x);
-            ref T yRef = ref MemoryMarshal.GetReference(y);
-
-            nuint remainder = (uint)x.Length;
-
-            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TBinaryOperator.Vectorizable)
-            {
-                T result;
-
-                if (remainder >= (uint)Vector512<T>.Count)
-                {
-                    result = Vectorized512(ref xRef, ref yRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    result = VectorizedSmall(ref xRef, ref yRef, remainder);
-                }
-
-                return result;
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && TBinaryOperator.Vectorizable)
-            {
-                T result;
-
-                if (remainder >= (uint)Vector256<T>.Count)
-                {
-                    result = Vectorized256(ref xRef, ref yRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    result = VectorizedSmall(ref xRef, ref yRef, remainder);
-                }
-
-                return result;
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TBinaryOperator.Vectorizable)
-            {
-                T result;
-
-                if (remainder >= (uint)Vector128<T>.Count)
-                {
-                    result = Vectorized128(ref xRef, ref yRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    result = VectorizedSmall(ref xRef, ref yRef, remainder);
-                }
-
-                return result;
-            }
-
-            // This is the software fallback when no acceleration is available
-            // It requires no branches to hit
-
-            return SoftwareFallback(ref xRef, ref yRef, remainder);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static T SoftwareFallback(ref T xRef, ref T yRef, nuint length)
-            {
-                T result = TAggregationOperator.IdentityValue;
-
-                for (nuint i = 0; i < length; i++)
-                {
-                    result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, i),
-                                                                                        Unsafe.Add(ref yRef, i)));
-                }
-
-                return result;
-            }
-
-            static T Vectorized128(ref T xRef, ref T yRef, nuint remainder)
-            {
-                Vector128<T> vresult = Vector128.Create(TAggregationOperator.IdentityValue);
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                          Vector128.LoadUnsafe(ref yRef));
-                Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
-
-                nuint misalignment = 0;
-
-                if (remainder > (uint)(Vector128<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* py = &yRef)
-                    {
-                        T* xPtr = px;
-                        T* yPtr = py;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)xPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            yPtr += misalignment;
-
-                            Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector128<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector128<T> vector1;
-                        Vector128<T> vector2;
-                        Vector128<T> vector3;
-                        Vector128<T> vector4;
-
-                        // We only need to load, so there isn't a lot of benefit to doing non-temporal operations
-
-                        while (remainder >= (uint)(Vector128<T>.Count * 8))
-                        {
-                            // We load, process, and store the first four vectors
-
-                            vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
-                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)));
-                            vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
-                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)));
-                            vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
-                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)));
-                            vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
-                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)));
-
-                            vresult = TAggregationOperator.Invoke(vresult, vector1);
-                            vresult = TAggregationOperator.Invoke(vresult, vector2);
-                            vresult = TAggregationOperator.Invoke(vresult, vector3);
-                            vresult = TAggregationOperator.Invoke(vresult, vector4);
-
-                            // We load, process, and store the next four vectors
-
-                            vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
-                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)));
-                            vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
-                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)));
-                            vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
-                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)));
-                            vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
-                                                             Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)));
-
-                            vresult = TAggregationOperator.Invoke(vresult, vector1);
-                            vresult = TAggregationOperator.Invoke(vresult, vector2);
-                            vresult = TAggregationOperator.Invoke(vresult, vector3);
-                            vresult = TAggregationOperator.Invoke(vresult, vector4);
-
-                            // We adjust the source and destination references, then update
-                            // the count of remaining elements to process.
-
-                            xPtr += (uint)(Vector128<T>.Count * 8);
-                            yPtr += (uint)(Vector128<T>.Count * 8);
-
-                            remainder -= (uint)(Vector128<T>.Count * 8);
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        yRef = ref *yPtr;
-                    }
-                }
-
-                // Store the first block. Handling this separately simplifies the latter code as we know
-                // they come after and so we can relegate it to full blocks or the trailing elements
-
-                beg = Vector128.ConditionalSelect(CreateAlignmentMaskVector128<T>((int)misalignment), beg, Vector128.Create(TAggregationOperator.IdentityValue));
-                vresult = TAggregationOperator.Invoke(vresult, beg);
-
-                // Process the remaining [0, Count * 7] elements via a jump table
-                //
-                // We end up handling any trailing elements in case 0 and in the
-                // worst case end up just doing the identity operation here if there
-                // were no trailing elements.
-
-                (nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector128<T>.Count);
-                blocks -= (misalignment == 0) ? 1u : 0u;
-                remainder -= trailing;
-
-                switch (blocks)
-                {
-                    case 7:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 7)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 6)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 5)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 4)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 3)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 2)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 1)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 1)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)trailing), end, Vector128.Create(TAggregationOperator.IdentityValue));
-                        vresult = TAggregationOperator.Invoke(vresult, end);
-                        break;
-                    }
-                }
-
-                return TAggregationOperator.Invoke(vresult);
-            }
-
-            static T Vectorized256(ref T xRef, ref T yRef, nuint remainder)
-            {
-                Vector256<T> vresult = Vector256.Create(TAggregationOperator.IdentityValue);
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                          Vector256.LoadUnsafe(ref yRef));
-                Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
-
-                nuint misalignment = 0;
-
-                if (remainder > (uint)(Vector256<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* py = &yRef)
-                    {
-                        T* xPtr = px;
-                        T* yPtr = py;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)xPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            yPtr += misalignment;
-
-                            Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector256<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector256<T> vector1;
-                        Vector256<T> vector2;
-                        Vector256<T> vector3;
-                        Vector256<T> vector4;
-
-                        // We only need to load, so there isn't a lot of benefit to doing non-temporal operations
-
-                        while (remainder >= (uint)(Vector256<T>.Count * 8))
-                        {
-                            // We load, process, and store the first four vectors
-
-                            vector1 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
-                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)));
-                            vector2 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
-                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)));
-                            vector3 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
-                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)));
-                            vector4 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
-                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)));
-
-                            vresult = TAggregationOperator.Invoke(vresult, vector1);
-                            vresult = TAggregationOperator.Invoke(vresult, vector2);
-                            vresult = TAggregationOperator.Invoke(vresult, vector3);
-                            vresult = TAggregationOperator.Invoke(vresult, vector4);
-
-                            // We load, process, and store the next four vectors
-
-                            vector1 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
-                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)));
-                            vector2 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
-                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)));
-                            vector3 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
-                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)));
-                            vector4 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
-                                                             Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)));
-
-                            vresult = TAggregationOperator.Invoke(vresult, vector1);
-                            vresult = TAggregationOperator.Invoke(vresult, vector2);
-                            vresult = TAggregationOperator.Invoke(vresult, vector3);
-                            vresult = TAggregationOperator.Invoke(vresult, vector4);
-
-                            // We adjust the source and destination references, then update
-                            // the count of remaining elements to process.
-
-                            xPtr += (uint)(Vector256<T>.Count * 8);
-                            yPtr += (uint)(Vector256<T>.Count * 8);
-
-                            remainder -= (uint)(Vector256<T>.Count * 8);
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        yRef = ref *yPtr;
-                    }
-                }
-
-                // Store the first block. Handling this separately simplifies the latter code as we know
-                // they come after and so we can relegate it to full blocks or the trailing elements
-
-                beg = Vector256.ConditionalSelect(CreateAlignmentMaskVector256<T>((int)misalignment), beg, Vector256.Create(TAggregationOperator.IdentityValue));
-                vresult = TAggregationOperator.Invoke(vresult, beg);
-
-                // Process the remaining [0, Count * 7] elements via a jump table
-                //
-                // We end up handling any trailing elements in case 0 and in the
-                // worst case end up just doing the identity operation here if there
-                // were no trailing elements.
-
-                (nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector256<T>.Count);
-                blocks -= (misalignment == 0) ? 1u : 0u;
-                remainder -= trailing;
-
-                switch (blocks)
-                {
-                    case 7:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 7)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 6)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 5)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 4)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 3)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 2)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 1)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 1)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)trailing), end, Vector256.Create(TAggregationOperator.IdentityValue));
-                        vresult = TAggregationOperator.Invoke(vresult, end);
-                        break;
-                    }
-                }
-
-                return TAggregationOperator.Invoke(vresult);
-            }
-
-            static T Vectorized512(ref T xRef, ref T yRef, nuint remainder)
-            {
-                Vector512<T> vresult = Vector512.Create(TAggregationOperator.IdentityValue);
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector512<T> beg = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
-                                                          Vector512.LoadUnsafe(ref yRef));
-                Vector512<T> end = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
-                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count));
-
-                nuint misalignment = 0;
-
-                if (remainder > (uint)(Vector512<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* py = &yRef)
-                    {
-                        T* xPtr = px;
-                        T* yPtr = py;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)xPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            yPtr += misalignment;
-
-                            Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector512<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector512<T> vector1;
-                        Vector512<T> vector2;
-                        Vector512<T> vector3;
-                        Vector512<T> vector4;
-
-                        // We only need to load, so there isn't a lot of benefit to doing non-temporal operations
-
-                        while (remainder >= (uint)(Vector512<T>.Count * 8))
-                        {
-                            // We load, process, and store the first four vectors
-
-                            vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
-                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)));
-                            vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
-                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)));
-                            vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
-                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)));
-                            vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
-                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)));
-
-                            vresult = TAggregationOperator.Invoke(vresult, vector1);
-                            vresult = TAggregationOperator.Invoke(vresult, vector2);
-                            vresult = TAggregationOperator.Invoke(vresult, vector3);
-                            vresult = TAggregationOperator.Invoke(vresult, vector4);
-
-                            // We load, process, and store the next four vectors
-
-                            vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
-                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)));
-                            vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
-                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)));
-                            vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
-                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)));
-                            vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
-                                                             Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)));
-
-                            vresult = TAggregationOperator.Invoke(vresult, vector1);
-                            vresult = TAggregationOperator.Invoke(vresult, vector2);
-                            vresult = TAggregationOperator.Invoke(vresult, vector3);
-                            vresult = TAggregationOperator.Invoke(vresult, vector4);
-
-                            // We adjust the source and destination references, then update
-                            // the count of remaining elements to process.
-
-                            xPtr += (uint)(Vector512<T>.Count * 8);
-                            yPtr += (uint)(Vector512<T>.Count * 8);
-
-                            remainder -= (uint)(Vector512<T>.Count * 8);
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        yRef = ref *yPtr;
-                    }
-                }
-
-                // Store the first block. Handling this separately simplifies the latter code as we know
-                // they come after and so we can relegate it to full blocks or the trailing elements
-
-                beg = Vector512.ConditionalSelect(CreateAlignmentMaskVector512<T>((int)misalignment), beg, Vector512.Create(TAggregationOperator.IdentityValue));
-                vresult = TAggregationOperator.Invoke(vresult, beg);
-
-                // Process the remaining [0, Count * 7] elements via a jump table
-                //
-                // We end up handling any trailing elements in case 0 and in the
-                // worst case end up just doing the identity operation here if there
-                // were no trailing elements.
-
-                (nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector512<T>.Count);
-                blocks -= (misalignment == 0) ? 1u : 0u;
-                remainder -= trailing;
-
-                switch (blocks)
-                {
-                    case 7:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 1)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 1)));
-                        vresult = TAggregationOperator.Invoke(vresult, vector);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end = Vector512.ConditionalSelect(CreateRemainderMaskVector512<T>((int)trailing), end, Vector512.Create(TAggregationOperator.IdentityValue));
-                        vresult = TAggregationOperator.Invoke(vresult, end);
-                        break;
-                    }
-                }
-
-                return TAggregationOperator.Invoke(vresult);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static T VectorizedSmall(ref T xRef, ref T yRef, nuint remainder)
-            {
-                if (sizeof(T) == 1)
-                {
-                    return VectorizedSmall1(ref xRef, ref yRef, remainder);
-                }
-                else if (sizeof(T) == 2)
-                {
-                    return VectorizedSmall2(ref xRef, ref yRef, remainder);
-                }
-                else if (sizeof(T) == 4)
-                {
-                    return VectorizedSmall4(ref xRef, ref yRef, remainder);
-                }
-                else
-                {
-                    Debug.Assert(sizeof(T) == 8);
-                    return VectorizedSmall8(ref xRef, ref yRef, remainder);
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static T VectorizedSmall1(ref T xRef, ref T yRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 1);
-                T result = TAggregationOperator.IdentityValue;
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 63:
-                    case 62:
-                    case 61:
-                    case 60:
-                    case 59:
-                    case 58:
-                    case 57:
-                    case 56:
-                    case 55:
-                    case 54:
-                    case 53:
-                    case 52:
-                    case 51:
-                    case 50:
-                    case 49:
-                    case 48:
-                    case 47:
-                    case 46:
-                    case 45:
-                    case 44:
-                    case 43:
-                    case 42:
-                    case 41:
-                    case 40:
-                    case 39:
-                    case 38:
-                    case 37:
-                    case 36:
-                    case 35:
-                    case 34:
-                    case 33:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-                        Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                  Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
-
-                        end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 32:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-                        Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                  Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
-
-                        end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 15:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 14), Unsafe.Add(ref yRef, 14)));
-                        goto case 14;
-
-                    case 14:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 13), Unsafe.Add(ref yRef, 13)));
-                        goto case 13;
-
-                    case 13:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 12), Unsafe.Add(ref yRef, 12)));
-                        goto case 12;
-
-                    case 12:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 11), Unsafe.Add(ref yRef, 11)));
-                        goto case 11;
-
-                    case 11:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 10), Unsafe.Add(ref yRef, 10)));
-                        goto case 10;
-
-                    case 10:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 9), Unsafe.Add(ref yRef, 9)));
-                        goto case 9;
-
-                    case 9:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 8), Unsafe.Add(ref yRef, 8)));
-                        goto case 8;
-
-                    case 8:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 7), Unsafe.Add(ref yRef, 7)));
-                        goto case 7;
-
-                    case 7:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 6), Unsafe.Add(ref yRef, 6)));
-                        goto case 6;
-
-                    case 6:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 5), Unsafe.Add(ref yRef, 5)));
-                        goto case 5;
-
-                    case 5:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 4), Unsafe.Add(ref yRef, 4)));
-                        goto case 4;
-
-                    case 4:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 3), Unsafe.Add(ref yRef, 3)));
-                        goto case 3;
-
-                    case 3:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), Unsafe.Add(ref yRef, 2)));
-                        goto case 2;
-
-                    case 2:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), Unsafe.Add(ref yRef, 1)));
-                        goto case 1;
-
-                    case 1:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef));
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-
-                return result;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static T VectorizedSmall2(ref T xRef, ref T yRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 2);
-                T result = TAggregationOperator.IdentityValue;
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-                        Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                  Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
-
-                        end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-                        Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                  Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
-
-                        end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 8:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 7:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 6), Unsafe.Add(ref yRef, 6)));
-                        goto case 6;
-
-                    case 6:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 5), Unsafe.Add(ref yRef, 5)));
-                        goto case 5;
-
-                    case 5:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 4), Unsafe.Add(ref yRef, 4)));
-                        goto case 4;
-
-                    case 4:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 3), Unsafe.Add(ref yRef, 3)));
-                        goto case 3;
-
-                    case 3:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), Unsafe.Add(ref yRef, 2)));
-                        goto case 2;
-
-                    case 2:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), Unsafe.Add(ref yRef, 1)));
-                        goto case 1;
-
-                    case 1:
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef));
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-
-                return result;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static T VectorizedSmall4(ref T xRef, ref T yRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 4);
-                T result = TAggregationOperator.IdentityValue;
-
-                switch (remainder)
-                {
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-                        Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                  Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
-
-                        end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    case 8:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-                        Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                  Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
-
-                        end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                                            Unsafe.Add(ref yRef, 2)));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                                            Unsafe.Add(ref yRef, 1)));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef));
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-
-                return result;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static T VectorizedSmall8(ref T xRef, ref T yRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 8);
-                T result = TAggregationOperator.IdentityValue;
-
-                switch (remainder)
-                {
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-                        Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                  Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
-
-                        end = Vector256.ConditionalSelect(CreateRemainderMaskVector256<T>((int)(remainder % (uint)Vector256<T>.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-                        Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                  Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
-
-                        end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
-
-                        result = TAggregationOperator.Invoke(TAggregationOperator.Invoke(beg, end));
-                        break;
-                    }
-
-                    case 2:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-
-                        result = TAggregationOperator.Invoke(beg);
-                        break;
-                    }
-
-                    case 1:
-                    {
-                        result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef));
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-
-                return result;
-            }
-        }
-
-        /// <remarks>
-        /// This is the same as <see cref="Aggregate{T, TTransformOperator, TAggregationOperator}(ReadOnlySpan{T})"/>
-        /// with an identity transform, except it early exits on NaN.
-        /// </remarks>
-        private static T MinMaxCore<T, TMinMaxOperator>(ReadOnlySpan<T> x)
-            where T : INumberBase<T>
-            where TMinMaxOperator : struct, IAggregationOperator<T>
-        {
-            if (x.IsEmpty)
-            {
-                ThrowHelper.ThrowArgument_SpansMustBeNonEmpty();
-            }
-
-            // This matches the IEEE 754:2019 `maximum`/`minimum` functions.
-            // It propagates NaN inputs back to the caller and
-            // otherwise returns the greater of the inputs.
-            // It treats +0 as greater than -0 as per the specification.
-
-            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && x.Length >= Vector512<T>.Count)
-            {
-                ref T xRef = ref MemoryMarshal.GetReference(x);
-
-                // Load the first vector as the initial set of results, and bail immediately
-                // to scalar handling if it contains any NaNs (which don't compare equally to themselves).
-                Vector512<T> result = Vector512.LoadUnsafe(ref xRef, 0);
-                Vector512<T> current;
-
-                Vector512<T> nanMask;
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    // Check for NaNs
-                    nanMask = ~Vector512.Equals(result, result);
-                    if (nanMask != Vector512<T>.Zero)
-                    {
-                        return result.GetElement(IndexOfFirstMatch(nanMask));
-                    }
-                }
-
-                int oneVectorFromEnd = x.Length - Vector512<T>.Count;
-                int i = Vector512<T>.Count;
-
-                // Aggregate additional vectors into the result as long as there's at least one full vector left to process.
-                while (i <= oneVectorFromEnd)
-                {
-                    // Load the next vector, and early exit on NaN.
-                    current = Vector512.LoadUnsafe(ref xRef, (uint)i);
-
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // Check for NaNs
-                        nanMask = ~Vector512.Equals(current, current);
-                        if (nanMask != Vector512<T>.Zero)
-                        {
-                            return current.GetElement(IndexOfFirstMatch(nanMask));
-                        }
-                    }
-
-                    result = TMinMaxOperator.Invoke(result, current);
-                    i += Vector512<T>.Count;
-                }
-
-                // If any elements remain, handle them in one final vector.
-                if (i != x.Length)
-                {
-                    current = Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512<T>.Count));
-
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // Check for NaNs
-                        nanMask = ~Vector512.Equals(current, current);
-                        if (nanMask != Vector512<T>.Zero)
-                        {
-                            return current.GetElement(IndexOfFirstMatch(nanMask));
-                        }
-                    }
-
-                    result = TMinMaxOperator.Invoke(result, current);
-                }
-
-                // Aggregate the lanes in the vector to create the final scalar result.
-                return TMinMaxOperator.Invoke(result);
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && x.Length >= Vector256<T>.Count)
-            {
-                ref T xRef = ref MemoryMarshal.GetReference(x);
-
-                // Load the first vector as the initial set of results, and bail immediately
-                // to scalar handling if it contains any NaNs (which don't compare equally to themselves).
-                Vector256<T> result = Vector256.LoadUnsafe(ref xRef, 0);
-                Vector256<T> current;
-
-                Vector256<T> nanMask;
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    // Check for NaNs
-                    nanMask = ~Vector256.Equals(result, result);
-                    if (nanMask != Vector256<T>.Zero)
-                    {
-                        return result.GetElement(IndexOfFirstMatch(nanMask));
-                    }
-                }
-
-                int oneVectorFromEnd = x.Length - Vector256<T>.Count;
-                int i = Vector256<T>.Count;
-
-                // Aggregate additional vectors into the result as long as there's at least one full vector left to process.
-                while (i <= oneVectorFromEnd)
-                {
-                    // Load the next vector, and early exit on NaN.
-                    current = Vector256.LoadUnsafe(ref xRef, (uint)i);
-
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // Check for NaNs
-                        nanMask = ~Vector256.Equals(current, current);
-                        if (nanMask != Vector256<T>.Zero)
-                        {
-                            return current.GetElement(IndexOfFirstMatch(nanMask));
-                        }
-                    }
-
-                    result = TMinMaxOperator.Invoke(result, current);
-                    i += Vector256<T>.Count;
-                }
-
-                // If any elements remain, handle them in one final vector.
-                if (i != x.Length)
-                {
-                    current = Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256<T>.Count));
-
-
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // Check for NaNs
-                        nanMask = ~Vector256.Equals(current, current);
-                        if (nanMask != Vector256<T>.Zero)
-                        {
-                            return current.GetElement(IndexOfFirstMatch(nanMask));
-                        }
-                    }
-
-                    result = TMinMaxOperator.Invoke(result, current);
-                }
-
-                // Aggregate the lanes in the vector to create the final scalar result.
-                return TMinMaxOperator.Invoke(result);
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && x.Length >= Vector128<T>.Count)
-            {
-                ref T xRef = ref MemoryMarshal.GetReference(x);
-
-                // Load the first vector as the initial set of results, and bail immediately
-                // to scalar handling if it contains any NaNs (which don't compare equally to themselves).
-                Vector128<T> result = Vector128.LoadUnsafe(ref xRef, 0);
-                Vector128<T> current;
-
-                Vector128<T> nanMask;
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    // Check for NaNs
-                    nanMask = ~Vector128.Equals(result, result);
-                    if (nanMask != Vector128<T>.Zero)
-                    {
-                        return result.GetElement(IndexOfFirstMatch(nanMask));
-                    }
-                }
-
-                int oneVectorFromEnd = x.Length - Vector128<T>.Count;
-                int i = Vector128<T>.Count;
-
-                // Aggregate additional vectors into the result as long as there's at least one full vector left to process.
-                while (i <= oneVectorFromEnd)
-                {
-                    // Load the next vector, and early exit on NaN.
-                    current = Vector128.LoadUnsafe(ref xRef, (uint)i);
-
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // Check for NaNs
-                        nanMask = ~Vector128.Equals(current, current);
-                        if (nanMask != Vector128<T>.Zero)
-                        {
-                            return current.GetElement(IndexOfFirstMatch(nanMask));
-                        }
-                    }
-
-                    result = TMinMaxOperator.Invoke(result, current);
-                    i += Vector128<T>.Count;
-                }
-
-                // If any elements remain, handle them in one final vector.
-                if (i != x.Length)
-                {
-                    current = Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128<T>.Count));
-
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // Check for NaNs
-                        nanMask = ~Vector128.Equals(current, current);
-                        if (nanMask != Vector128<T>.Zero)
-                        {
-                            return current.GetElement(IndexOfFirstMatch(nanMask));
-                        }
-                    }
-
-                    result = TMinMaxOperator.Invoke(result, current);
-                }
-
-                // Aggregate the lanes in the vector to create the final scalar result.
-                return TMinMaxOperator.Invoke(result);
-            }
-
-            // Scalar path used when either vectorization is not supported or the input is too small to vectorize.
-            T curResult = x[0];
-            if (T.IsNaN(curResult))
-            {
-                return curResult;
-            }
-
-            for (int i = 1; i < x.Length; i++)
-            {
-                T current = x[i];
-                if (T.IsNaN(current))
-                {
-                    return current;
-                }
-
-                curResult = TMinMaxOperator.Invoke(curResult, current);
-            }
-
-            return curResult;
-        }
-
-        private static int IndexOfMinMaxCore<T, TIndexOfMinMax>(ReadOnlySpan<T> x)
-            where T : INumber<T>
-            where TIndexOfMinMax : struct, IIndexOfOperator<T>
-        {
-            if (x.IsEmpty)
-            {
-                return -1;
-            }
-
-            // This matches the IEEE 754:2019 `maximum`/`minimum` functions.
-            // It propagates NaN inputs back to the caller and
-            // otherwise returns the index of the greater of the inputs.
-            // It treats +0 as greater than -0 as per the specification.
-
-            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && x.Length >= Vector512<T>.Count)
-            {
-                Debug.Assert(sizeof(T) is 1 or 2 or 4 or 8);
-
-                [MethodImpl(MethodImplOptions.AggressiveInlining)]
-                static Vector512<T> CreateVector512T(int i) =>
-                    sizeof(T) == sizeof(long) ? Vector512.Create((long)i).As<long, T>() :
-                    sizeof(T) == sizeof(int) ? Vector512.Create(i).As<int, T>() :
-                    sizeof(T) == sizeof(short) ? Vector512.Create((short)i).As<short, T>() :
-                    Vector512.Create((byte)i).As<byte, T>();
-
-                ref T xRef = ref MemoryMarshal.GetReference(x);
-                Vector512<T> resultIndex =
-#if NET9_0_OR_GREATER
-                    sizeof(T) == sizeof(long) ? Vector512<long>.Indices.As<long, T>() :
-                    sizeof(T) == sizeof(int) ? Vector512<int>.Indices.As<int, T>() :
-                    sizeof(T) == sizeof(short) ? Vector512<short>.Indices.As<short, T>() :
-                    Vector512<byte>.Indices.As<byte, T>();
-#else
-                    sizeof(T) == sizeof(long) ? Vector512.Create(0L, 1, 2, 3, 4, 5, 6, 7).As<long, T>() :
-                    sizeof(T) == sizeof(int) ? Vector512.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As<int, T>() :
-                    sizeof(T) == sizeof(short) ? Vector512.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31).As<short, T>() :
-                    Vector512.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63).As<byte, T>();
-#endif
-                Vector512<T> currentIndex = resultIndex;
-                Vector512<T> increment = CreateVector512T(Vector512<T>.Count);
-
-                // Load the first vector as the initial set of results, and bail immediately
-                // to scalar handling if it contains any NaNs (which don't compare equally to themselves).
-                Vector512<T> result = Vector512.LoadUnsafe(ref xRef);
-                Vector512<T> current;
-
-                Vector512<T> nanMask;
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    nanMask = ~Vector512.Equals(result, result);
-                    if (nanMask != Vector512<T>.Zero)
-                    {
-                        return IndexOfFirstMatch(nanMask);
-                    }
-                }
-
-                int oneVectorFromEnd = x.Length - Vector512<T>.Count;
-                int i = Vector512<T>.Count;
-
-                // Aggregate additional vectors into the result as long as there's at least one full vector left to process.
-                while (i <= oneVectorFromEnd)
-                {
-                    // Load the next vector, and early exit on NaN.
-                    current = Vector512.LoadUnsafe(ref xRef, (uint)i);
-                    currentIndex += increment;
-
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        nanMask = ~Vector512.Equals(current, current);
-                        if (nanMask != Vector512<T>.Zero)
-                        {
-                            return i + IndexOfFirstMatch(nanMask);
-                        }
-                    }
-
-                    TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex);
-
-                    i += Vector512<T>.Count;
-                }
-
-                // If any elements remain, handle them in one final vector.
-                if (i != x.Length)
-                {
-                    current = Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512<T>.Count));
-                    currentIndex += CreateVector512T(x.Length - i);
-
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        nanMask = ~Vector512.Equals(current, current);
-                        if (nanMask != Vector512<T>.Zero)
-                        {
-                            int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask);
-                            return typeof(T) == typeof(double) ?
-                                (int)(long)(object)currentIndex.As<T, long>()[indexInVectorOfFirstMatch] :
-                                (int)(object)currentIndex.As<T, int>()[indexInVectorOfFirstMatch];
-                        }
-                    }
-
-                    TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex);
-                }
-
-                // Aggregate the lanes in the vector to create the final scalar result.
-                return IndexOfFinalAggregate<T, TIndexOfMinMax>(result, resultIndex);
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && x.Length >= Vector256<T>.Count)
-            {
-                Debug.Assert(sizeof(T) is 1 or 2 or 4 or 8);
-
-                [MethodImpl(MethodImplOptions.AggressiveInlining)]
-                static Vector256<T> CreateVector256T(int i) =>
-                    sizeof(T) == sizeof(long) ? Vector256.Create((long)i).As<long, T>() :
-                    sizeof(T) == sizeof(int) ? Vector256.Create(i).As<int, T>() :
-                    sizeof(T) == sizeof(short) ? Vector256.Create((short)i).As<short, T>() :
-                    Vector256.Create((byte)i).As<byte, T>();
-
-                ref T xRef = ref MemoryMarshal.GetReference(x);
-                Vector256<T> resultIndex =
-#if NET9_0_OR_GREATER
-                    sizeof(T) == sizeof(long) ? Vector256<long>.Indices.As<long, T>() :
-                    sizeof(T) == sizeof(int) ? Vector256<int>.Indices.As<int, T>() :
-                    sizeof(T) == sizeof(short) ? Vector256<short>.Indices.As<short, T>() :
-                    Vector256<byte>.Indices.As<byte, T>();
-#else
-                    sizeof(T) == sizeof(long) ? Vector256.Create(0L, 1, 2, 3).As<long, T>() :
-                    sizeof(T) == sizeof(int) ? Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7).As<int, T>() :
-                    sizeof(T) == sizeof(short) ? Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As<short, T>() :
-                    Vector256.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31).As<byte, T>();
-#endif
-                Vector256<T> currentIndex = resultIndex;
-                Vector256<T> increment = CreateVector256T(Vector256<T>.Count);
-
-                // Load the first vector as the initial set of results, and bail immediately
-                // to scalar handling if it contains any NaNs (which don't compare equally to themselves).
-                Vector256<T> result = Vector256.LoadUnsafe(ref xRef);
-                Vector256<T> current;
-
-                Vector256<T> nanMask;
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    nanMask = ~Vector256.Equals(result, result);
-                    if (nanMask != Vector256<T>.Zero)
-                    {
-                        return IndexOfFirstMatch(nanMask);
-                    }
-                }
-
-                int oneVectorFromEnd = x.Length - Vector256<T>.Count;
-                int i = Vector256<T>.Count;
-
-                // Aggregate additional vectors into the result as long as there's at least one full vector left to process.
-                while (i <= oneVectorFromEnd)
-                {
-                    // Load the next vector, and early exit on NaN.
-                    current = Vector256.LoadUnsafe(ref xRef, (uint)i);
-                    currentIndex += increment;
-
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        nanMask = ~Vector256.Equals(current, current);
-                        if (nanMask != Vector256<T>.Zero)
-                        {
-                            return i + IndexOfFirstMatch(nanMask);
-                        }
-                    }
-
-                    TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex);
-
-                    i += Vector256<T>.Count;
-                }
-
-                // If any elements remain, handle them in one final vector.
-                if (i != x.Length)
-                {
-                    current = Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256<T>.Count));
-                    currentIndex += CreateVector256T(x.Length - i);
-
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        nanMask = ~Vector256.Equals(current, current);
-                        if (nanMask != Vector256<T>.Zero)
-                        {
-                            int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask);
-                            return typeof(T) == typeof(double) ?
-                                (int)(long)(object)currentIndex.As<T, long>()[indexInVectorOfFirstMatch] :
-                                (int)(object)currentIndex.As<T, int>()[indexInVectorOfFirstMatch];
-                        }
-                    }
-
-                    TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex);
-                }
-
-                // Aggregate the lanes in the vector to create the final scalar result.
-                return IndexOfFinalAggregate<T, TIndexOfMinMax>(result, resultIndex);
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && x.Length >= Vector128<T>.Count)
-            {
-                Debug.Assert(sizeof(T) is 1 or 2 or 4 or 8);
-
-                [MethodImpl(MethodImplOptions.AggressiveInlining)]
-                static Vector128<T> CreateVector128T(int i) =>
-                    sizeof(T) == sizeof(long) ? Vector128.Create((long)i).As<long, T>() :
-                    sizeof(T) == sizeof(int) ? Vector128.Create(i).As<int, T>() :
-                    sizeof(T) == sizeof(short) ? Vector128.Create((short)i).As<short, T>() :
-                    Vector128.Create((byte)i).As<byte, T>();
-
-                ref T xRef = ref MemoryMarshal.GetReference(x);
-                Vector128<T> resultIndex =
-#if NET9_0_OR_GREATER
-                    sizeof(T) == sizeof(long) ? Vector128<long>.Indices.As<long, T>() :
-                    sizeof(T) == sizeof(int) ? Vector128<int>.Indices.As<int, T>() :
-                    sizeof(T) == sizeof(short) ? Vector128<short>.Indices.As<short, T>() :
-                    Vector128<byte>.Indices.As<byte, T>();
-#else
-                    sizeof(T) == sizeof(long) ? Vector128.Create(0L, 1).As<long, T>() :
-                    sizeof(T) == sizeof(int) ? Vector128.Create(0, 1, 2, 3).As<int, T>() :
-                    sizeof(T) == sizeof(short) ? Vector128.Create(0, 1, 2, 3, 4, 5, 6, 7).As<short, T>() :
-                    Vector128.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).As<byte, T>();
-#endif
-                Vector128<T> currentIndex = resultIndex;
-                Vector128<T> increment = CreateVector128T(Vector128<T>.Count);
-
-                // Load the first vector as the initial set of results, and bail immediately
-                // to scalar handling if it contains any NaNs (which don't compare equally to themselves).
-                Vector128<T> result = Vector128.LoadUnsafe(ref xRef);
-                Vector128<T> current;
-
-                Vector128<T> nanMask;
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    nanMask = ~Vector128.Equals(result, result);
-                    if (nanMask != Vector128<T>.Zero)
-                    {
-                        return IndexOfFirstMatch(nanMask);
-                    }
-                }
-
-                int oneVectorFromEnd = x.Length - Vector128<T>.Count;
-                int i = Vector128<T>.Count;
-
-                // Aggregate additional vectors into the result as long as there's at least one full vector left to process.
-                while (i <= oneVectorFromEnd)
-                {
-                    // Load the next vector, and early exit on NaN.
-                    current = Vector128.LoadUnsafe(ref xRef, (uint)i);
-                    currentIndex += increment;
-
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        nanMask = ~Vector128.Equals(current, current);
-                        if (nanMask != Vector128<T>.Zero)
-                        {
-                            return i + IndexOfFirstMatch(nanMask);
-                        }
-                    }
-
-                    TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex);
-
-                    i += Vector128<T>.Count;
-                }
-
-                // If any elements remain, handle them in one final vector.
-                if (i != x.Length)
-                {
-                    current = Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128<T>.Count));
-                    currentIndex += CreateVector128T(x.Length - i);
-
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        nanMask = ~Vector128.Equals(current, current);
-                        if (nanMask != Vector128<T>.Zero)
-                        {
-                            int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask);
-                            return typeof(T) == typeof(double) ?
-                                (int)(long)(object)currentIndex.As<T, long>()[indexInVectorOfFirstMatch] :
-                                (int)(object)currentIndex.As<T, int>()[indexInVectorOfFirstMatch];
-                        }
-                    }
-
-                    TIndexOfMinMax.Invoke(ref result, current, ref resultIndex, currentIndex);
-                }
-
-                // Aggregate the lanes in the vector to create the final scalar result.
-                return IndexOfFinalAggregate<T, TIndexOfMinMax>(result, resultIndex);
-            }
-
-            // Scalar path used when either vectorization is not supported or the input is too small to vectorize.
-            T curResult = x[0];
-            int curIn = 0;
-            if (T.IsNaN(curResult))
-            {
-                return curIn;
-            }
-
-            for (int i = 1; i < x.Length; i++)
-            {
-                T current = x[i];
-                if (T.IsNaN(current))
-                {
-                    return i;
-                }
-
-                curIn = TIndexOfMinMax.Invoke(ref curResult, current, curIn, i);
-            }
-
-            return curIn;
-        }
-
-        private static int IndexOfFirstMatch<T>(Vector128<T> mask) =>
-            BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits());
-
-        private static int IndexOfFirstMatch<T>(Vector256<T> mask) =>
-            BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits());
-
-        private static int IndexOfFirstMatch<T>(Vector512<T> mask) =>
-            BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits());
-
-        /// <summary>Performs an element-wise operation on <paramref name="x"/> and writes the results to <paramref name="destination"/>.</summary>
-        /// <typeparam name="T">The element input type.</typeparam>
-        /// <typeparam name="TUnaryOperator">Specifies the operation to perform on each element loaded from <paramref name="x"/>.</typeparam>
-        private static void InvokeSpanIntoSpan<T, TUnaryOperator>(
-            ReadOnlySpan<T> x, Span<T> destination)
-            where TUnaryOperator : struct, IUnaryOperator<T, T> =>
-            InvokeSpanIntoSpan<T, T, TUnaryOperator>(x, destination);
-
-        /// <summary>Performs an element-wise operation on <paramref name="x"/> and writes the results to <paramref name="destination"/>.</summary>
-        /// <typeparam name="TInput">The element input type.</typeparam>
-        /// <typeparam name="TOutput">The element output type. Must be the same size as TInput if TInput and TOutput both support vectorization.</typeparam>
-        /// <typeparam name="TUnaryOperator">Specifies the operation to perform on each element loaded from <paramref name="x"/>.</typeparam>
-        /// <remarks>
-        /// This supports vectorizing the operation if <typeparamref name="TInput"/> and <typeparamref name="TOutput"/> are the same size.
-        /// Otherwise, it'll fall back to scalar operations.
-        /// </remarks>
-        private static void InvokeSpanIntoSpan<TInput, TOutput, TUnaryOperator>(
-            ReadOnlySpan<TInput> x, Span<TOutput> destination)
-            where TUnaryOperator : struct, IUnaryOperator<TInput, TOutput>
-        {
-            if (x.Length > destination.Length)
-            {
-                ThrowHelper.ThrowArgument_DestinationTooShort();
-            }
-
-            if (typeof(TInput) == typeof(TOutput))
-            {
-                // This ignores the unsafe case where a developer passes in overlapping spans for distinct types.
-                ValidateInputOutputSpanNonOverlapping(x, Rename<TOutput, TInput>(destination));
-            }
-
-            // Since every branch has a cost and since that cost is
-            // essentially lost for larger inputs, we do branches
-            // in a way that allows us to have the minimum possible
-            // for small sizes
-
-            ref TInput xRef = ref MemoryMarshal.GetReference(x);
-            ref TOutput dRef = ref MemoryMarshal.GetReference(destination);
-
-            nuint remainder = (uint)x.Length;
-
-            if (Vector512.IsHardwareAccelerated && Vector512<TInput>.IsSupported && Vector512<TOutput>.IsSupported && TUnaryOperator.Vectorizable && Unsafe.SizeOf<TInput>() == Unsafe.SizeOf<TOutput>())
-            {
-                if (remainder >= (uint)Vector512<TInput>.Count)
-                {
-                    Vectorized512(ref xRef, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<TInput>.IsSupported && Vector256<TOutput>.IsSupported && TUnaryOperator.Vectorizable && Unsafe.SizeOf<TInput>() == Unsafe.SizeOf<TOutput>())
-            {
-                if (remainder >= (uint)Vector256<TInput>.Count)
-                {
-                    Vectorized256(ref xRef, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<TInput>.IsSupported && Vector128<TOutput>.IsSupported && TUnaryOperator.Vectorizable && Unsafe.SizeOf<TInput>() == Unsafe.SizeOf<TOutput>())
-            {
-                if (remainder >= (uint)Vector128<TInput>.Count)
-                {
-                    Vectorized128(ref xRef, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            // This is the software fallback when no acceleration is available
-            // It requires no branches to hit
-
-            SoftwareFallback(ref xRef, ref dRef, remainder);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void SoftwareFallback(ref TInput xRef, ref TOutput dRef, nuint length)
-            {
-                for (nuint i = 0; i < length; i++)
-                {
-                    Unsafe.Add(ref dRef, i) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, i));
-                }
-            }
-
-            static void Vectorized128(ref TInput xRef, ref TOutput dRef, nuint remainder)
-            {
-                ref TOutput dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
-
-                if (remainder > (uint)(Vector128<TInput>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (TInput* px = &xRef)
-                    fixed (TOutput* pd = &dRef)
-                    {
-                        TInput* xPtr = px;
-                        TOutput* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(TInput)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector128<TInput>) - ((nuint)dPtr % (uint)sizeof(Vector128<TInput>))) / (uint)sizeof(TInput);
-
-                            xPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<TInput>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector128<TOutput> vector1;
-                        Vector128<TOutput> vector2;
-                        Vector128<TOutput> vector3;
-                        Vector128<TOutput> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(TInput))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector128<TInput>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 0)));
-                                vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 1)));
-                                vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 2)));
-                                vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 4)));
-                                vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 5)));
-                                vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 6)));
-                                vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<TInput>.Count * 8);
-                                dPtr += (uint)(Vector128<TOutput>.Count * 8);
-
-                                remainder -= (uint)(Vector128<TInput>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector128<TInput>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 0)));
-                                vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 1)));
-                                vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 2)));
-                                vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector128<TOutput>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector128<TOutput>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector128<TOutput>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector128<TOutput>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 4)));
-                                vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 5)));
-                                vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 6)));
-                                vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector128<TOutput>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector128<TOutput>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector128<TOutput>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector128<TOutput>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<TInput>.Count * 8);
-                                dPtr += (uint)(Vector128<TOutput>.Count * 8);
-
-                                remainder -= (uint)(Vector128<TInput>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector128<TInput>.Count - 1)) & (nuint)(-Vector128<TInput>.Count);
-
-                switch (remainder / (uint)Vector128<TInput>.Count)
-                {
-                    case 8:
-                    {
-                        Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 8)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 7)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 6)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 5)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 4)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 3)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 2)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<TInput>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            static void Vectorized256(ref TInput xRef, ref TOutput dRef, nuint remainder)
-            {
-                ref TOutput dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                Vector256<TOutput> end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<TInput>.Count));
-
-                if (remainder > (uint)(Vector256<TInput>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (TInput* px = &xRef)
-                    fixed (TOutput* pd = &dRef)
-                    {
-                        TInput* xPtr = px;
-                        TOutput* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(TInput)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector256<TInput>) - ((nuint)dPtr % (uint)sizeof(Vector256<TInput>))) / (uint)sizeof(TInput);
-
-                            xPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<TInput>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector256<TOutput> vector1;
-                        Vector256<TOutput> vector2;
-                        Vector256<TOutput> vector3;
-                        Vector256<TOutput> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(TInput))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector256<TInput>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 0)));
-                                vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 1)));
-                                vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 2)));
-                                vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 4)));
-                                vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 5)));
-                                vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 6)));
-                                vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<TOutput>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<TInput>.Count * 8);
-                                dPtr += (uint)(Vector256<TOutput>.Count * 8);
-
-                                remainder -= (uint)(Vector256<TInput>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector256<TInput>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 0)));
-                                vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 1)));
-                                vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 2)));
-                                vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector256<TOutput>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector256<TOutput>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector256<TOutput>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector256<TOutput>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 4)));
-                                vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 5)));
-                                vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 6)));
-                                vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<TInput>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector256<TOutput>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector256<TOutput>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector256<TOutput>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector256<TOutput>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<TInput>.Count * 8);
-                                dPtr += (uint)(Vector256<TOutput>.Count * 8);
-
-                                remainder -= (uint)(Vector256<TInput>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector256<TInput>.Count - 1)) & (nuint)(-Vector256<TInput>.Count);
-
-                switch (remainder / (uint)Vector256<TInput>.Count)
-                {
-                    case 8:
-                    {
-                        Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 8)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 7)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 6)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 5)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 4)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 3)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector256<TOutput> vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<TInput>.Count * 2)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<TOutput>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<TOutput>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            static void Vectorized512(ref TInput xRef, ref TOutput dRef, nuint remainder)
-            {
-                ref TOutput dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector512<TOutput> beg = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef));
-                Vector512<TOutput> end = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<TInput>.Count));
-
-                if (remainder > (uint)(Vector512<TInput>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (TInput* px = &xRef)
-                    fixed (TOutput* pd = &dRef)
-                    {
-                        TInput* xPtr = px;
-                        TOutput* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(TInput)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector512<TInput>) - ((nuint)dPtr % (uint)sizeof(Vector512<TInput>))) / (uint)sizeof(TInput);
-
-                            xPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<TInput>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector512<TOutput> vector1;
-                        Vector512<TOutput> vector2;
-                        Vector512<TOutput> vector3;
-                        Vector512<TOutput> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(TInput))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector512<TInput>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 0)));
-                                vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 1)));
-                                vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 2)));
-                                vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 4)));
-                                vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 5)));
-                                vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 6)));
-                                vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<TInput>.Count * 8);
-                                dPtr += (uint)(Vector512<TInput>.Count * 8);
-
-                                remainder -= (uint)(Vector512<TInput>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector512<TInput>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 0)));
-                                vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 1)));
-                                vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 2)));
-                                vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector512<TOutput>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector512<TOutput>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector512<TOutput>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector512<TOutput>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 4)));
-                                vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 5)));
-                                vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 6)));
-                                vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector512<TOutput>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector512<TOutput>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector512<TOutput>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector512<TOutput>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<TInput>.Count * 8);
-                                dPtr += (uint)(Vector512<TOutput>.Count * 8);
-
-                                remainder -= (uint)(Vector512<TInput>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector512<TInput>.Count - 1)) & (nuint)(-Vector512<TInput>.Count);
-
-                switch (remainder / (uint)Vector512<TInput>.Count)
-                {
-                    case 8:
-                    {
-                        Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 8)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 7)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 6)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 5)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 4)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 3)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 2)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<TOutput>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall(ref TInput xRef, ref TOutput dRef, nuint remainder)
-            {
-                if (sizeof(TInput) == 1)
-                {
-                    VectorizedSmall1(ref xRef, ref dRef, remainder);
-                }
-                else if (sizeof(TInput) == 2)
-                {
-                    VectorizedSmall2(ref xRef, ref dRef, remainder);
-                }
-                else if (sizeof(TInput) == 4)
-                {
-                    VectorizedSmall4(ref xRef, ref dRef, remainder);
-                }
-                else
-                {
-                    Debug.Assert(sizeof(TInput) == 8);
-                    VectorizedSmall8(ref xRef, ref dRef, remainder);
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall1(ref TInput xRef, ref TOutput dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(TInput) == 1);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 63:
-                    case 62:
-                    case 61:
-                    case 60:
-                    case 59:
-                    case 58:
-                    case 57:
-                    case 56:
-                    case 55:
-                    case 54:
-                    case 53:
-                    case 52:
-                    case 51:
-                    case 50:
-                    case 49:
-                    case 48:
-                    case 47:
-                    case 46:
-                    case 45:
-                    case 44:
-                    case 43:
-                    case 42:
-                    case 41:
-                    case 40:
-                    case 39:
-                    case 38:
-                    case 37:
-                    case 36:
-                    case 35:
-                    case 34:
-                    case 33:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        Vector256<TOutput> end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<TInput>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<TOutput>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 32:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<TOutput>.Count);
-
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 15:
-                        Unsafe.Add(ref dRef, 14) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 14));
-                        goto case 14;
-
-                    case 14:
-                        Unsafe.Add(ref dRef, 13) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 13));
-                        goto case 13;
-
-                    case 13:
-                        Unsafe.Add(ref dRef, 12) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 12));
-                        goto case 12;
-
-                    case 12:
-                        Unsafe.Add(ref dRef, 11) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 11));
-                        goto case 11;
-
-                    case 11:
-                        Unsafe.Add(ref dRef, 10) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 10));
-                        goto case 10;
-
-                    case 10:
-                        Unsafe.Add(ref dRef, 9) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 9));
-                        goto case 9;
-
-                    case 9:
-                        Unsafe.Add(ref dRef, 8) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 8));
-                        goto case 8;
-
-                    case 8:
-                        Unsafe.Add(ref dRef, 7) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 7));
-                        goto case 7;
-
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 6));
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 5));
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 4));
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 3));
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 2));
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 1));
-                        goto case 1;
-
-                    case 1:
-                        dRef = TUnaryOperator.Invoke(xRef);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall2(ref TInput xRef, ref TOutput dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(TInput) == 2);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        Vector256<TOutput> end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<TInput>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<TOutput>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<TOutput>.Count);
-
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 8:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 6));
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 5));
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 4));
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 3));
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 2));
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 1));
-                        goto case 1;
-
-                    case 1:
-                        dRef = TUnaryOperator.Invoke(xRef);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall4(ref TInput xRef, ref TOutput dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(TInput) == 4);
-
-                switch (remainder)
-                {
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        Vector256<TOutput> end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<TInput>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<TOutput>.Count);
-
-                        break;
-                    }
-
-                    case 8:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<TOutput>.Count);
-
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Unsafe.Add(ref dRef, 2) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 2));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Unsafe.Add(ref dRef, 1) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 1));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        dRef = TUnaryOperator.Invoke(xRef);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall8(ref TInput xRef, ref TOutput dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(TInput) == 8);
-
-                switch (remainder)
-                {
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        Vector256<TOutput> end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<TInput>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<TOutput>.Count);
-
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<TOutput> beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<TOutput>.Count);
-
-                        break;
-                    }
-
-                    case 2:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 1:
-                    {
-                        dRef = TUnaryOperator.Invoke(xRef);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-            }
-        }
-
-        /// <summary>Performs an element-wise operation on <paramref name="x"/> and writes the results to <paramref name="destination"/>.</summary>
-        /// <typeparam name="TInput">The element input type.</typeparam>
-        /// <typeparam name="TOutput">The element output type. Must be the same size as TInput if TInput and TOutput both support vectorization.</typeparam>
-        /// <typeparam name="TUnaryOperator">Specifies the operation to perform on each element loaded from <paramref name="x"/>.</typeparam>
-        /// <remarks>This should only be used when it's known that TInput/TOutput are vectorizable and the size of TInput is twice that of TOutput.</remarks>
-        private static void InvokeSpanIntoSpan_2to1<TInput, TOutput, TUnaryOperator>(
-            ReadOnlySpan<TInput> x, Span<TOutput> destination)
-            where TUnaryOperator : struct, IUnaryTwoToOneOperator<TInput, TOutput>
-        {
-            Debug.Assert(sizeof(TInput) == sizeof(TOutput) * 2);
-
-            if (x.Length > destination.Length)
-            {
-                ThrowHelper.ThrowArgument_DestinationTooShort();
-            }
-
-            ref TInput xRef = ref MemoryMarshal.GetReference(x);
-            ref TOutput destinationRef = ref MemoryMarshal.GetReference(destination);
-            int i = 0, twoVectorsFromEnd;
-
-            if (Vector512.IsHardwareAccelerated && TUnaryOperator.Vectorizable)
-            {
-                Debug.Assert(Vector512<TInput>.IsSupported);
-                Debug.Assert(Vector512<TOutput>.IsSupported);
-
-                twoVectorsFromEnd = x.Length - (Vector512<TInput>.Count * 2);
-                if (i <= twoVectorsFromEnd)
-                {
-                    // Loop handling two input vectors / one output vector at a time.
-                    do
-                    {
-                        TUnaryOperator.Invoke(
-                            Vector512.LoadUnsafe(ref xRef, (uint)i),
-                            Vector512.LoadUnsafe(ref xRef, (uint)(i + Vector512<TInput>.Count))).StoreUnsafe(ref destinationRef, (uint)i);
-
-                        i += Vector512<TInput>.Count * 2;
-                    }
-                    while (i <= twoVectorsFromEnd);
-
-                    // Handle any remaining elements with final vectors.
-                    if (i != x.Length)
-                    {
-                        i = x.Length - (Vector512<TInput>.Count * 2);
-
-                        TUnaryOperator.Invoke(
-                            Vector512.LoadUnsafe(ref xRef, (uint)i),
-                            Vector512.LoadUnsafe(ref xRef, (uint)(i + Vector512<TInput>.Count))).StoreUnsafe(ref destinationRef, (uint)i);
-                    }
-
-                    return;
-                }
-            }
-
-            if (Vector256.IsHardwareAccelerated && TUnaryOperator.Vectorizable)
-            {
-                Debug.Assert(Vector256<TInput>.IsSupported);
-                Debug.Assert(Vector256<TOutput>.IsSupported);
-
-                twoVectorsFromEnd = x.Length - (Vector256<TInput>.Count * 2);
-                if (i <= twoVectorsFromEnd)
-                {
-                    // Loop handling two input vectors / one output vector at a time.
-                    do
-                    {
-                        TUnaryOperator.Invoke(
-                            Vector256.LoadUnsafe(ref xRef, (uint)i),
-                            Vector256.LoadUnsafe(ref xRef, (uint)(i + Vector256<TInput>.Count))).StoreUnsafe(ref destinationRef, (uint)i);
-
-                        i += Vector256<TInput>.Count * 2;
-                    }
-                    while (i <= twoVectorsFromEnd);
-
-                    // Handle any remaining elements with final vectors.
-                    if (i != x.Length)
-                    {
-                        i = x.Length - (Vector256<TInput>.Count * 2);
-
-                        TUnaryOperator.Invoke(
-                            Vector256.LoadUnsafe(ref xRef, (uint)i),
-                            Vector256.LoadUnsafe(ref xRef, (uint)(i + Vector256<TInput>.Count))).StoreUnsafe(ref destinationRef, (uint)i);
-                    }
-
-                    return;
-                }
-            }
-
-            if (Vector128.IsHardwareAccelerated && TUnaryOperator.Vectorizable)
-            {
-                Debug.Assert(Vector128<TInput>.IsSupported);
-                Debug.Assert(Vector128<TOutput>.IsSupported);
-
-                twoVectorsFromEnd = x.Length - (Vector128<TInput>.Count * 2);
-                if (i <= twoVectorsFromEnd)
-                {
-                    // Loop handling two input vectors / one output vector at a time.
-                    do
-                    {
-                        TUnaryOperator.Invoke(
-                            Vector128.LoadUnsafe(ref xRef, (uint)i),
-                            Vector128.LoadUnsafe(ref xRef, (uint)(i + Vector128<TInput>.Count))).StoreUnsafe(ref destinationRef, (uint)i);
-
-                        i += Vector128<TInput>.Count * 2;
-                    }
-                    while (i <= twoVectorsFromEnd);
-
-                    // Handle any remaining elements with final vectors.
-                    if (i != x.Length)
-                    {
-                        i = x.Length - (Vector128<TInput>.Count * 2);
-
-                        TUnaryOperator.Invoke(
-                            Vector128.LoadUnsafe(ref xRef, (uint)i),
-                            Vector128.LoadUnsafe(ref xRef, (uint)(i + Vector128<TInput>.Count))).StoreUnsafe(ref destinationRef, (uint)i);
-                    }
-
-                    return;
-                }
-            }
-
-            while (i < x.Length)
-            {
-                Unsafe.Add(ref destinationRef, i) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, i));
-                i++;
-            }
-        }
-
-        /// <summary>Performs an element-wise operation on <paramref name="x"/> and writes the results to <paramref name="destination"/>.</summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TStatefulUnaryOperator">Specifies the operation to perform on each element loaded from <paramref name="x"/>.</typeparam>
-        private static void InvokeSpanIntoSpan<T, TStatefulUnaryOperator>(
-            ReadOnlySpan<T> x, TStatefulUnaryOperator op, Span<T> destination)
-            where TStatefulUnaryOperator : struct, IStatefulUnaryOperator<T>
-        {
-            // NOTE: This implementation is an exact copy of InvokeSpanIntoSpan<T, TUnaryOperator>,
-            // except it accepts an operator that carries state with it, using instance rather than
-            // static invocation methods.
-
-            if (x.Length > destination.Length)
-            {
-                ThrowHelper.ThrowArgument_DestinationTooShort();
-            }
-
-            ValidateInputOutputSpanNonOverlapping(x, destination);
-
-            // Since every branch has a cost and since that cost is
-            // essentially lost for larger inputs, we do branches
-            // in a way that allows us to have the minimum possible
-            // for small sizes
-
-            ref T xRef = ref MemoryMarshal.GetReference(x);
-            ref T dRef = ref MemoryMarshal.GetReference(destination);
-
-            nuint remainder = (uint)x.Length;
-
-            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TStatefulUnaryOperator.Vectorizable)
-            {
-                if (remainder >= (uint)Vector512<T>.Count)
-                {
-                    Vectorized512(ref xRef, ref dRef, remainder, op);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref dRef, remainder, op);
-                }
-
-                return;
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && TStatefulUnaryOperator.Vectorizable)
-            {
-                if (remainder >= (uint)Vector256<T>.Count)
-                {
-                    Vectorized256(ref xRef, ref dRef, remainder, op);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref dRef, remainder, op);
-                }
-
-                return;
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TStatefulUnaryOperator.Vectorizable)
-            {
-                if (remainder >= (uint)Vector128<T>.Count)
-                {
-                    Vectorized128(ref xRef, ref dRef, remainder, op);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref dRef, remainder, op);
-                }
-
-                return;
-            }
-
-            // This is the software fallback when no acceleration is available
-            // It requires no branches to hit
-
-            SoftwareFallback(ref xRef, ref dRef, remainder, op);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void SoftwareFallback(ref T xRef, ref T dRef, nuint length, TStatefulUnaryOperator op)
-            {
-                for (nuint i = 0; i < length; i++)
-                {
-                    Unsafe.Add(ref dRef, i) = op.Invoke(Unsafe.Add(ref xRef, i));
-                }
-            }
-
-            static void Vectorized128(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
-                Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
-
-                if (remainder > (uint)(Vector128<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector128<T> vector1;
-                        Vector128<T> vector2;
-                        Vector128<T> vector3;
-                        Vector128<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector128<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)));
-                                vector2 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)));
-                                vector3 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)));
-                                vector4 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)));
-                                vector2 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)));
-                                vector3 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)));
-                                vector4 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<T>.Count * 8);
-                                dPtr += (uint)(Vector128<T>.Count * 8);
-
-                                remainder -= (uint)(Vector128<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector128<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)));
-                                vector2 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)));
-                                vector3 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)));
-                                vector4 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)));
-                                vector2 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)));
-                                vector3 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)));
-                                vector4 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<T>.Count * 8);
-                                dPtr += (uint)(Vector128<T>.Count * 8);
-
-                                remainder -= (uint)(Vector128<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
-
-                switch (remainder / (uint)Vector128<T>.Count)
-                {
-                    case 8:
-                        {
-                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
-                            goto case 7;
-                        }
-
-                    case 7:
-                        {
-                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
-                            goto case 6;
-                        }
-
-                    case 6:
-                        {
-                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
-                            goto case 5;
-                        }
-
-                    case 5:
-                        {
-                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
-                            goto case 4;
-                        }
-
-                    case 4:
-                        {
-                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
-                            goto case 3;
-                        }
-
-                    case 3:
-                        {
-                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
-                            goto case 2;
-                        }
-
-                    case 2:
-                        {
-                            Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
-                            goto case 1;
-                        }
-
-                    case 1:
-                        {
-                            // Store the last block, which includes any elements that wouldn't fill a full vector
-                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
-                            goto case 0;
-                        }
-
-                    case 0:
-                        {
-                            // Store the first block, which includes any elements preceding the first aligned block
-                            beg.StoreUnsafe(ref dRefBeg);
-                            break;
-                        }
-                }
-            }
-
-            static void Vectorized256(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
-                Vector256<T> end = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
-
-                if (remainder > (uint)(Vector256<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)dPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector256<T> vector1;
-                        Vector256<T> vector2;
-                        Vector256<T> vector3;
-                        Vector256<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector256<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)));
-                                vector2 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)));
-                                vector3 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)));
-                                vector4 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)));
-                                vector2 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)));
-                                vector3 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)));
-                                vector4 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<T>.Count * 8);
-                                dPtr += (uint)(Vector256<T>.Count * 8);
-
-                                remainder -= (uint)(Vector256<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector256<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)));
-                                vector2 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)));
-                                vector3 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)));
-                                vector4 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)));
-                                vector2 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)));
-                                vector3 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)));
-                                vector4 = op.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<T>.Count * 8);
-                                dPtr += (uint)(Vector256<T>.Count * 8);
-
-                                remainder -= (uint)(Vector256<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector256<T>.Count - 1)) & (nuint)(-Vector256<T>.Count);
-
-                switch (remainder / (uint)Vector256<T>.Count)
-                {
-                    case 8:
-                        {
-                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 8)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 8));
-                            goto case 7;
-                        }
-
-                    case 7:
-                        {
-                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 7));
-                            goto case 6;
-                        }
-
-                    case 6:
-                        {
-                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 6));
-                            goto case 5;
-                        }
-
-                    case 5:
-                        {
-                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 5));
-                            goto case 4;
-                        }
-
-                    case 4:
-                        {
-                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 4));
-                            goto case 3;
-                        }
-
-                    case 3:
-                        {
-                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 3));
-                            goto case 2;
-                        }
-
-                    case 2:
-                        {
-                            Vector256<T> vector = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 2));
-                            goto case 1;
-                        }
-
-                    case 1:
-                        {
-                            // Store the last block, which includes any elements that wouldn't fill a full vector
-                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<T>.Count);
-                            goto case 0;
-                        }
-
-                    case 0:
-                        {
-                            // Store the first block, which includes any elements preceding the first aligned block
-                            beg.StoreUnsafe(ref dRefBeg);
-                            break;
-                        }
-                }
-            }
-
-            static void Vectorized512(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector512<T> beg = op.Invoke(Vector512.LoadUnsafe(ref xRef));
-                Vector512<T> end = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count));
-
-                if (remainder > (uint)(Vector512<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector512<T> vector1;
-                        Vector512<T> vector2;
-                        Vector512<T> vector3;
-                        Vector512<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector512<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)));
-                                vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)));
-                                vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)));
-                                vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)));
-                                vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)));
-                                vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)));
-                                vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<T>.Count * 8);
-                                dPtr += (uint)(Vector512<T>.Count * 8);
-
-                                remainder -= (uint)(Vector512<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector512<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)));
-                                vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)));
-                                vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)));
-                                vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)));
-                                vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)));
-                                vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)));
-                                vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<T>.Count * 8);
-                                dPtr += (uint)(Vector512<T>.Count * 8);
-
-                                remainder -= (uint)(Vector512<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
-
-                switch (remainder / (uint)Vector512<T>.Count)
-                {
-                    case 8:
-                        {
-                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
-                            goto case 7;
-                        }
-
-                    case 7:
-                        {
-                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
-                            goto case 6;
-                        }
-
-                    case 6:
-                        {
-                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
-                            goto case 5;
-                        }
-
-                    case 5:
-                        {
-                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
-                            goto case 4;
-                        }
-
-                    case 4:
-                        {
-                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
-                            goto case 3;
-                        }
-
-                    case 3:
-                        {
-                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
-                            goto case 2;
-                        }
-
-                    case 2:
-                        {
-                            Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)));
-                            vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
-                            goto case 1;
-                        }
-
-                    case 1:
-                        {
-                            // Store the last block, which includes any elements that wouldn't fill a full vector
-                            end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
-                            goto case 0;
-                        }
-
-                    case 0:
-                        {
-                            // Store the first block, which includes any elements preceding the first aligned block
-                            beg.StoreUnsafe(ref dRefBeg);
-                            break;
-                        }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
-            {
-                if (sizeof(T) == 1)
-                {
-                    VectorizedSmall1(ref xRef, ref dRef, remainder, op);
-                }
-                else if (sizeof(T) == 2)
-                {
-                    VectorizedSmall2(ref xRef, ref dRef, remainder, op);
-                }
-                else if (sizeof(T) == 4)
-                {
-                    VectorizedSmall4(ref xRef, ref dRef, remainder, op);
-                }
-                else
-                {
-                    Debug.Assert(sizeof(T) == 8);
-                    VectorizedSmall8(ref xRef, ref dRef, remainder, op);
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall1(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
-            {
-                Debug.Assert(sizeof(T) == 1);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 63:
-                    case 62:
-                    case 61:
-                    case 60:
-                    case 59:
-                    case 58:
-                    case 57:
-                    case 56:
-                    case 55:
-                    case 54:
-                    case 53:
-                    case 52:
-                    case 51:
-                    case 50:
-                    case 49:
-                    case 48:
-                    case 47:
-                    case 46:
-                    case 45:
-                    case 44:
-                    case 43:
-                    case 42:
-                    case 41:
-                    case 40:
-                    case 39:
-                    case 38:
-                    case 37:
-                    case 36:
-                    case 35:
-                    case 34:
-                    case 33:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        Vector256<T> end = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 32:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 15:
-                        Unsafe.Add(ref dRef, 14) = op.Invoke(Unsafe.Add(ref xRef, 14));
-                        goto case 14;
-
-                    case 14:
-                        Unsafe.Add(ref dRef, 13) = op.Invoke(Unsafe.Add(ref xRef, 13));
-                        goto case 13;
-
-                    case 13:
-                        Unsafe.Add(ref dRef, 12) = op.Invoke(Unsafe.Add(ref xRef, 12));
-                        goto case 12;
-
-                    case 12:
-                        Unsafe.Add(ref dRef, 11) = op.Invoke(Unsafe.Add(ref xRef, 11));
-                        goto case 11;
-
-                    case 11:
-                        Unsafe.Add(ref dRef, 10) = op.Invoke(Unsafe.Add(ref xRef, 10));
-                        goto case 10;
-
-                    case 10:
-                        Unsafe.Add(ref dRef, 9) = op.Invoke(Unsafe.Add(ref xRef, 9));
-                        goto case 9;
-
-                    case 9:
-                        Unsafe.Add(ref dRef, 8) = op.Invoke(Unsafe.Add(ref xRef, 8));
-                        goto case 8;
-
-                    case 8:
-                        Unsafe.Add(ref dRef, 7) = op.Invoke(Unsafe.Add(ref xRef, 7));
-                        goto case 7;
-
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = op.Invoke(Unsafe.Add(ref xRef, 6));
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = op.Invoke(Unsafe.Add(ref xRef, 5));
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = op.Invoke(Unsafe.Add(ref xRef, 4));
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = op.Invoke(Unsafe.Add(ref xRef, 3));
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = op.Invoke(Unsafe.Add(ref xRef, 2));
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = op.Invoke(Unsafe.Add(ref xRef, 1));
-                        goto case 1;
-
-                    case 1:
-                        dRef = op.Invoke(xRef);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall2(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
-            {
-                Debug.Assert(sizeof(T) == 2);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        Vector256<T> end = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 8:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = op.Invoke(Unsafe.Add(ref xRef, 6));
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = op.Invoke(Unsafe.Add(ref xRef, 5));
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = op.Invoke(Unsafe.Add(ref xRef, 4));
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = op.Invoke(Unsafe.Add(ref xRef, 3));
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = op.Invoke(Unsafe.Add(ref xRef, 2));
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = op.Invoke(Unsafe.Add(ref xRef, 1));
-                        goto case 1;
-
-                    case 1:
-                        dRef = op.Invoke(xRef);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall4(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
-            {
-                Debug.Assert(sizeof(T) == 4);
-
-                switch (remainder)
-                {
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                        {
-                            Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                            Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
-                            Vector256<T> end = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
-
-                            beg.StoreUnsafe(ref dRef);
-                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                            break;
-                        }
-
-                    case 8:
-                        {
-                            Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                            Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
-                            beg.StoreUnsafe(ref dRef);
-
-                            break;
-                        }
-
-                    case 7:
-                    case 6:
-                    case 5:
-                        {
-                            Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                            Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
-                            Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
-
-                            beg.StoreUnsafe(ref dRef);
-                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                            break;
-                        }
-
-                    case 4:
-                        {
-                            Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                            Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
-                            beg.StoreUnsafe(ref dRef);
-
-                            break;
-                        }
-
-                    case 3:
-                        {
-                            Unsafe.Add(ref dRef, 2) = op.Invoke(Unsafe.Add(ref xRef, 2));
-                            goto case 2;
-                        }
-
-                    case 2:
-                        {
-                            Unsafe.Add(ref dRef, 1) = op.Invoke(Unsafe.Add(ref xRef, 1));
-                            goto case 1;
-                        }
-
-                    case 1:
-                        {
-                            dRef = op.Invoke(xRef);
-                            goto case 0;
-                        }
-
-                    case 0:
-                        {
-                            break;
-                        }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall8(ref T xRef, ref T dRef, nuint remainder, TStatefulUnaryOperator op)
-            {
-                Debug.Assert(sizeof(T) == 8);
-
-                switch (remainder)
-                {
-                    case 7:
-                    case 6:
-                    case 5:
-                        {
-                            Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                            Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
-                            Vector256<T> end = op.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count));
-
-                            beg.StoreUnsafe(ref dRef);
-                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                            break;
-                        }
-
-                    case 4:
-                        {
-                            Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                            Vector256<T> beg = op.Invoke(Vector256.LoadUnsafe(ref xRef));
-                            beg.StoreUnsafe(ref dRef);
-
-                            break;
-                        }
-
-                    case 3:
-                        {
-                            Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                            Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
-                            Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
-
-                            beg.StoreUnsafe(ref dRef);
-                            end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                            break;
-                        }
-
-                    case 2:
-                        {
-                            Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                            Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
-                            beg.StoreUnsafe(ref dRef);
-
-                            break;
-                        }
-
-                    case 1:
-                        {
-                            dRef = op.Invoke(xRef);
-                            goto case 0;
-                        }
-
-                    case 0:
-                        {
-                            break;
-                        }
-                }
-            }
-        }
-
-        /// <summary>Performs an element-wise operation on <paramref name="x"/> and writes the results to <paramref name="destination"/>.</summary>
-        /// <typeparam name="TInput">The element input type.</typeparam>
-        /// <typeparam name="TOutput">The element output type. Must be the same size as TInput if TInput and TOutput both support vectorization.</typeparam>
-        /// <typeparam name="TUnaryOperator">Specifies the operation to perform on each element loaded from <paramref name="x"/>.</typeparam>
-        /// <remarks>This should only be used when it's known that TInput/TOutput are vectorizable and the size of TInput is half that of TOutput.</remarks>
-        private static void InvokeSpanIntoSpan_1to2<TInput, TOutput, TUnaryOperator>(
-            ReadOnlySpan<TInput> x, Span<TOutput> destination)
-            where TUnaryOperator : struct, IUnaryOneToTwoOperator<TInput, TOutput>
-        {
-            Debug.Assert(sizeof(TInput) * 2 == sizeof(TOutput));
-
-            if (x.Length > destination.Length)
-            {
-                ThrowHelper.ThrowArgument_DestinationTooShort();
-            }
-
-            ref TInput sourceRef = ref MemoryMarshal.GetReference(x);
-            ref TOutput destinationRef = ref MemoryMarshal.GetReference(destination);
-            int i = 0, oneVectorFromEnd;
-
-            if (Vector512.IsHardwareAccelerated && TUnaryOperator.Vectorizable)
-            {
-                Debug.Assert(Vector512<TInput>.IsSupported);
-                Debug.Assert(Vector512<TOutput>.IsSupported);
-
-                oneVectorFromEnd = x.Length - Vector512<TInput>.Count;
-                if (i <= oneVectorFromEnd)
-                {
-                    // Loop handling one input vector / two output vectors at a time.
-                    do
-                    {
-                        (Vector512<TOutput> lower, Vector512<TOutput> upper) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
-                        lower.StoreUnsafe(ref destinationRef, (uint)i);
-                        upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector512<TOutput>.Count));
-
-                        i += Vector512<TInput>.Count;
-                    }
-                    while (i <= oneVectorFromEnd);
-
-                    // Handle any remaining elements with a final input vector.
-                    if (i != x.Length)
-                    {
-                        i = x.Length - Vector512<TInput>.Count;
-
-                        (Vector512<TOutput> lower, Vector512<TOutput> upper) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
-                        lower.StoreUnsafe(ref destinationRef, (uint)i);
-                        upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector512<TOutput>.Count));
-                    }
-
-                    return;
-                }
-            }
-
-            if (Vector256.IsHardwareAccelerated && TUnaryOperator.Vectorizable)
-            {
-                Debug.Assert(Vector256<TInput>.IsSupported);
-                Debug.Assert(Vector256<TOutput>.IsSupported);
-
-                oneVectorFromEnd = x.Length - Vector256<TInput>.Count;
-                if (i <= oneVectorFromEnd)
-                {
-                    // Loop handling one input vector / two output vectors at a time.
-                    do
-                    {
-                        (Vector256<TOutput> lower, Vector256<TOutput> upper) = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref sourceRef, (uint)i));
-                        lower.StoreUnsafe(ref destinationRef, (uint)i);
-                        upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector256<TOutput>.Count));
-
-                        i += Vector256<TInput>.Count;
-                    }
-                    while (i <= oneVectorFromEnd);
-
-                    // Handle any remaining elements with a final input vector.
-                    if (i != x.Length)
-                    {
-                        i = x.Length - Vector256<TInput>.Count;
-
-                        (Vector256<TOutput> lower, Vector256<TOutput> upper) = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref sourceRef, (uint)i));
-                        lower.StoreUnsafe(ref destinationRef, (uint)i);
-                        upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector256<TOutput>.Count));
-                    }
-
-                    return;
-                }
-            }
-
-            if (Vector128.IsHardwareAccelerated && TUnaryOperator.Vectorizable)
-            {
-                Debug.Assert(Vector128<TInput>.IsSupported);
-                Debug.Assert(Vector128<TOutput>.IsSupported);
-
-                oneVectorFromEnd = x.Length - Vector128<TInput>.Count;
-                if (i <= oneVectorFromEnd)
-                {
-                    // Loop handling one input vector / two output vectors at a time.
-                    do
-                    {
-                        (Vector128<TOutput> lower, Vector128<TOutput> upper) = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i));
-                        lower.StoreUnsafe(ref destinationRef, (uint)i);
-                        upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector128<TOutput>.Count));
-
-                        i += Vector128<TInput>.Count;
-                    }
-                    while (i <= oneVectorFromEnd);
-
-                    // Handle any remaining elements with a final input vector.
-                    if (i != x.Length)
-                    {
-                        i = x.Length - Vector128<TInput>.Count;
-
-                        (Vector128<TOutput> lower, Vector128<TOutput> upper) = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i));
-                        lower.StoreUnsafe(ref destinationRef, (uint)i);
-                        upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector128<TOutput>.Count));
-                    }
-
-                    return;
-                }
-            }
-
-            while (i < x.Length)
-            {
-                Unsafe.Add(ref destinationRef, i) = TUnaryOperator.Invoke(Unsafe.Add(ref sourceRef, i));
-                i++;
-            }
-        }
-
-        /// <summary>Performs an element-wise operation on <paramref name="x"/> and writes the results to <paramref name="destination1"/> and <paramref name="destination2"/>.</summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TUnaryOperator">Specifies the operation to perform on each element loaded from <paramref name="x"/>.</typeparam>
-        private static void InvokeSpanIntoSpan_TwoOutputs<T, TUnaryOperator>(
-            ReadOnlySpan<T> x, Span<T> destination1, Span<T> destination2)
-            where TUnaryOperator : struct, IUnaryInputBinaryOutput<T>
-        {
-            if (x.Length > destination1.Length)
-            {
-                ThrowHelper.ThrowArgument_DestinationTooShort(nameof(destination1));
-            }
-
-            if (x.Length > destination2.Length)
-            {
-                ThrowHelper.ThrowArgument_DestinationTooShort(nameof(destination2));
-            }
-
-            ValidateInputOutputSpanNonOverlapping(x, destination1);
-            ValidateInputOutputSpanNonOverlapping(x, destination2);
-
-            ref T sourceRef = ref MemoryMarshal.GetReference(x);
-            ref T destination1Ref = ref MemoryMarshal.GetReference(destination1);
-            ref T destination2Ref = ref MemoryMarshal.GetReference(destination2);
-            int i = 0, oneVectorFromEnd;
-
-            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TUnaryOperator.Vectorizable)
-            {
-                oneVectorFromEnd = x.Length - Vector512<T>.Count;
-                if (i <= oneVectorFromEnd)
-                {
-                    // Loop handling one input vector / two destination vectors at a time.
-                    do
-                    {
-                        (Vector512<T> first, Vector512<T> second) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
-                        first.StoreUnsafe(ref destination1Ref, (uint)i);
-                        second.StoreUnsafe(ref destination2Ref, (uint)i);
-
-                        i += Vector512<T>.Count;
-                    }
-                    while (i <= oneVectorFromEnd);
-
-                    // Handle any remaining elements with a final input vector.
-                    if (i != x.Length)
-                    {
-                        i = x.Length - Vector512<T>.Count;
-
-                        (Vector512<T> first, Vector512<T> second) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
-                        first.StoreUnsafe(ref destination1Ref, (uint)i);
-                        second.StoreUnsafe(ref destination2Ref, (uint)i);
-                    }
-
-                    return;
-                }
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && TUnaryOperator.Vectorizable)
-            {
-                oneVectorFromEnd = x.Length - Vector256<T>.Count;
-                if (i <= oneVectorFromEnd)
-                {
-                    // Loop handling one input vector / two destination vectors at a time.
-                    do
-                    {
-                        (Vector256<T> first, Vector256<T> second) = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref sourceRef, (uint)i));
-                        first.StoreUnsafe(ref destination1Ref, (uint)i);
-                        second.StoreUnsafe(ref destination2Ref, (uint)i);
-
-                        i += Vector256<T>.Count;
-                    }
-                    while (i <= oneVectorFromEnd);
-
-                    // Handle any remaining elements with a final input vector.
-                    if (i != x.Length)
-                    {
-                        i = x.Length - Vector256<T>.Count;
-
-                        (Vector256<T> first, Vector256<T> second) = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref sourceRef, (uint)i));
-                        first.StoreUnsafe(ref destination1Ref, (uint)i);
-                        second.StoreUnsafe(ref destination2Ref, (uint)i);
-                    }
-
-                    return;
-                }
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TUnaryOperator.Vectorizable)
-            {
-                oneVectorFromEnd = x.Length - Vector128<T>.Count;
-                if (i <= oneVectorFromEnd)
-                {
-                    // Loop handling one input vector / two destination vectors at a time.
-                    do
-                    {
-                        (Vector128<T> first, Vector128<T> second) = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i));
-                        first.StoreUnsafe(ref destination1Ref, (uint)i);
-                        second.StoreUnsafe(ref destination2Ref, (uint)i);
-
-                        i += Vector128<T>.Count;
-                    }
-                    while (i <= oneVectorFromEnd);
-
-                    // Handle any remaining elements with a final input vector.
-                    if (i != x.Length)
-                    {
-                        i = x.Length - Vector128<T>.Count;
-
-                        (Vector128<T> first, Vector128<T> second) = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i));
-                        first.StoreUnsafe(ref destination1Ref, (uint)i);
-                        second.StoreUnsafe(ref destination2Ref, (uint)i);
-                    }
-
-                    return;
-                }
-            }
-
-            while (i < x.Length)
-            {
-                (T first, T second) = TUnaryOperator.Invoke(Unsafe.Add(ref sourceRef, i));
-                Unsafe.Add(ref destination1Ref, i) = first;
-                Unsafe.Add(ref destination2Ref, i) = second;
-                i++;
-            }
-        }
-
-        /// <summary>
-        /// Performs an element-wise operation on <paramref name="x"/> and <paramref name="y"/>,
-        /// and writes the results to <paramref name="destination"/>.
-        /// </summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TBinaryOperator{T}">
-        /// Specifies the operation to perform on the pair-wise elements loaded from <paramref name="x"/> and <paramref name="y"/>.
-        /// </typeparam>
-        private static void InvokeSpanSpanIntoSpan<T, TBinaryOperator>(
-            ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination)
-            where TBinaryOperator : struct, IBinaryOperator<T>
-        {
-            if (x.Length != y.Length)
-            {
-                ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
-            }
-
-            if (x.Length > destination.Length)
-            {
-                ThrowHelper.ThrowArgument_DestinationTooShort();
-            }
-
-            ValidateInputOutputSpanNonOverlapping(x, destination);
-            ValidateInputOutputSpanNonOverlapping(y, destination);
-
-            // Since every branch has a cost and since that cost is
-            // essentially lost for larger inputs, we do branches
-            // in a way that allows us to have the minimum possible
-            // for small sizes
-
-            ref T xRef = ref MemoryMarshal.GetReference(x);
-            ref T yRef = ref MemoryMarshal.GetReference(y);
-            ref T dRef = ref MemoryMarshal.GetReference(destination);
-
-            nuint remainder = (uint)x.Length;
-
-            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TBinaryOperator.Vectorizable)
-            {
-                if (remainder >= (uint)Vector512<T>.Count)
-                {
-                    Vectorized512(ref xRef, ref yRef, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref yRef, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && TBinaryOperator.Vectorizable)
-            {
-                if (remainder >= (uint)Vector256<T>.Count)
-                {
-                    Vectorized256(ref xRef, ref yRef, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref yRef, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TBinaryOperator.Vectorizable)
-            {
-                if (remainder >= (uint)Vector128<T>.Count)
-                {
-                    Vectorized128(ref xRef, ref yRef, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref yRef, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            // This is the software fallback when no acceleration is available
-            // It requires no branches to hit
-
-            SoftwareFallback(ref xRef, ref yRef, ref dRef, remainder);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void SoftwareFallback(ref T xRef, ref T yRef, ref T dRef, nuint length)
-            {
-                for (nuint i = 0; i < length; i++)
-                {
-                    Unsafe.Add(ref dRef, i) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, i),
-                                                                     Unsafe.Add(ref yRef, i));
-                }
-            }
-
-            static void Vectorized128(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                          Vector128.LoadUnsafe(ref yRef));
-                Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                          Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
-
-                if (remainder > (uint)(Vector128<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* py = &yRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* yPtr = py;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            yPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector128<T> vector1;
-                        Vector128<T> vector2;
-                        Vector128<T> vector3;
-                        Vector128<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector128<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)));
-                                vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)));
-                                vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)));
-                                vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)));
-                                vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)));
-                                vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)));
-                                vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<T>.Count * 8);
-                                yPtr += (uint)(Vector128<T>.Count * 8);
-                                dPtr += (uint)(Vector128<T>.Count * 8);
-
-                                remainder -= (uint)(Vector128<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector128<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)));
-                                vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)));
-                                vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)));
-                                vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)));
-                                vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)));
-                                vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)));
-                                vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
-                                                                 Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<T>.Count * 8);
-                                yPtr += (uint)(Vector128<T>.Count * 8);
-                                dPtr += (uint)(Vector128<T>.Count * 8);
-
-                                remainder -= (uint)(Vector128<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        yRef = ref *yPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
-
-                switch (remainder / (uint)Vector128<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 8)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 7)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 6)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 5)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 4)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 3)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
-                                                                     Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 2)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            static void Vectorized256(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                          Vector256.LoadUnsafe(ref yRef));
-                Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                          Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
-
-                if (remainder > (uint)(Vector256<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* py = &yRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* yPtr = py;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)dPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            yPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector256<T> vector1;
-                        Vector256<T> vector2;
-                        Vector256<T> vector3;
-                        Vector256<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector256<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)));
-                                vector2 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)));
-                                vector3 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)));
-                                vector4 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)));
-                                vector2 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)));
-                                vector3 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)));
-                                vector4 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<T>.Count * 8);
-                                yPtr += (uint)(Vector256<T>.Count * 8);
-                                dPtr += (uint)(Vector256<T>.Count * 8);
-
-                                remainder -= (uint)(Vector256<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector256<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)));
-                                vector2 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)));
-                                vector3 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)));
-                                vector4 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)));
-                                vector2 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)));
-                                vector3 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)));
-                                vector4 = TBinaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
-                                                                 Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<T>.Count * 8);
-                                yPtr += (uint)(Vector256<T>.Count * 8);
-                                dPtr += (uint)(Vector256<T>.Count * 8);
-
-                                remainder -= (uint)(Vector256<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        yRef = ref *yPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector256<T>.Count - 1)) & (nuint)(-Vector256<T>.Count);
-
-                switch (remainder / (uint)Vector256<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 8)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 8)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 7)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 6)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 5)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 4)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 3)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)),
-                                                                     Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 2)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            static void Vectorized512(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector512<T> beg = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
-                                                          Vector512.LoadUnsafe(ref yRef));
-                Vector512<T> end = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
-                                                          Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count));
-
-                if (remainder > (uint)(Vector512<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* py = &yRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* yPtr = py;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            yPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector512<T> vector1;
-                        Vector512<T> vector2;
-                        Vector512<T> vector3;
-                        Vector512<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector512<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)));
-                                vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)));
-                                vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)));
-                                vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)));
-                                vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)));
-                                vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)));
-                                vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<T>.Count * 8);
-                                yPtr += (uint)(Vector512<T>.Count * 8);
-                                dPtr += (uint)(Vector512<T>.Count * 8);
-
-                                remainder -= (uint)(Vector512<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector512<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)));
-                                vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)));
-                                vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)));
-                                vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)));
-                                vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)));
-                                vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)));
-                                vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
-                                                                 Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<T>.Count * 8);
-                                yPtr += (uint)(Vector512<T>.Count * 8);
-                                dPtr += (uint)(Vector512<T>.Count * 8);
-
-                                remainder -= (uint)(Vector512<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        yRef = ref *yPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
-
-                switch (remainder / (uint)Vector512<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 8)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
-                                                                     Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
-            {
-                if (sizeof(T) == 1)
-                {
-                    VectorizedSmall1(ref xRef, ref yRef, ref dRef, remainder);
-                }
-                else if (sizeof(T) == 2)
-                {
-                    VectorizedSmall2(ref xRef, ref yRef, ref dRef, remainder);
-                }
-                else if (sizeof(T) == 4)
-                {
-                    VectorizedSmall4(ref xRef, ref yRef, ref dRef, remainder);
-                }
-                else
-                {
-                    Debug.Assert(sizeof(T) == 8);
-                    VectorizedSmall8(ref xRef, ref yRef, ref dRef, remainder);
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall1(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 1);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 63:
-                    case 62:
-                    case 61:
-                    case 60:
-                    case 59:
-                    case 58:
-                    case 57:
-                    case 56:
-                    case 55:
-                    case 54:
-                    case 53:
-                    case 52:
-                    case 51:
-                    case 50:
-                    case 49:
-                    case 48:
-                    case 47:
-                    case 46:
-                    case 45:
-                    case 44:
-                    case 43:
-                    case 42:
-                    case 41:
-                    case 40:
-                    case 39:
-                    case 38:
-                    case 37:
-                    case 36:
-                    case 35:
-                    case 34:
-                    case 33:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-                        Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                  Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 32:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-                        Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                  Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 15:
-                        Unsafe.Add(ref dRef, 14) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 14),
-                                                                         Unsafe.Add(ref yRef, 14));
-                        goto case 14;
-
-                    case 14:
-                        Unsafe.Add(ref dRef, 13) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 13),
-                                                                         Unsafe.Add(ref yRef, 13));
-                        goto case 13;
-
-                    case 13:
-                        Unsafe.Add(ref dRef, 12) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 12),
-                                                                         Unsafe.Add(ref yRef, 12));
-                        goto case 12;
-
-                    case 12:
-                        Unsafe.Add(ref dRef, 11) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 11),
-                                                                         Unsafe.Add(ref yRef, 11));
-                        goto case 11;
-
-                    case 11:
-                        Unsafe.Add(ref dRef, 10) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 10),
-                                                                         Unsafe.Add(ref yRef, 10));
-                        goto case 10;
-
-                    case 10:
-                        Unsafe.Add(ref dRef, 9) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 9),
-                                                                         Unsafe.Add(ref yRef, 9));
-                        goto case 9;
-
-                    case 9:
-                        Unsafe.Add(ref dRef, 8) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 8),
-                                                                         Unsafe.Add(ref yRef, 8));
-                        goto case 8;
-
-                    case 8:
-                        Unsafe.Add(ref dRef, 7) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 7),
-                                                                         Unsafe.Add(ref yRef, 7));
-                        goto case 7;
-
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
-                                                                         Unsafe.Add(ref yRef, 6));
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
-                                                                         Unsafe.Add(ref yRef, 5));
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
-                                                                         Unsafe.Add(ref yRef, 4));
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
-                                                                         Unsafe.Add(ref yRef, 3));
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                         Unsafe.Add(ref yRef, 2));
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                         Unsafe.Add(ref yRef, 1));
-                        goto case 1;
-
-                    case 1:
-                        dRef = TBinaryOperator.Invoke(xRef, yRef);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall2(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 2);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-                        Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                  Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-                        Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                  Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 8:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
-                                                                         Unsafe.Add(ref yRef, 6));
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
-                                                                         Unsafe.Add(ref yRef, 5));
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
-                                                                         Unsafe.Add(ref yRef, 4));
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
-                                                                         Unsafe.Add(ref yRef, 3));
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                         Unsafe.Add(ref yRef, 2));
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                         Unsafe.Add(ref yRef, 1));
-                        goto case 1;
-
-                    case 1:
-                        dRef = TBinaryOperator.Invoke(xRef, yRef);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall4(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 4);
-
-                switch (remainder)
-                {
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-                        Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                  Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    case 8:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-                        Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                  Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                         Unsafe.Add(ref yRef, 2));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                         Unsafe.Add(ref yRef, 1));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        dRef = TBinaryOperator.Invoke(xRef, yRef);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall8(ref T xRef, ref T yRef, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 8);
-
-                switch (remainder)
-                {
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-                        Vector256<T> end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                  Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                  Vector256.LoadUnsafe(ref yRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-                        Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                  Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    case 2:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                  Vector128.LoadUnsafe(ref yRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 1:
-                    {
-                        dRef = TBinaryOperator.Invoke(xRef, yRef);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-            }
-        }
-
-        /// <summary>
-        /// Performs an element-wise operation on <paramref name="x"/> and <paramref name="y"/>,
-        /// and writes the results to <paramref name="destination"/>.
-        /// </summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TBinaryOperator">
-        /// Specifies the operation to perform on each element loaded from <paramref name="x"/> with <paramref name="y"/>.
-        /// </typeparam>
-        private static void InvokeScalarSpanIntoSpan<T, TBinaryOperator>(
-            T x, ReadOnlySpan<T> y, Span<T> destination)
-            where TBinaryOperator : struct, IBinaryOperator<T> =>
-            InvokeSpanScalarIntoSpan<T, IdentityOperator<T>, InvertedBinaryOperator<TBinaryOperator, T>>(y, x, destination);
-
-        /// <summary>
-        /// Performs an element-wise operation on <paramref name="x"/> and <paramref name="y"/>,
-        /// and writes the results to <paramref name="destination"/>.
-        /// </summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TBinaryOperator">
-        /// Specifies the operation to perform on each element loaded from <paramref name="x"/> with <paramref name="y"/>.
-        /// </typeparam>
-        private static void InvokeSpanScalarIntoSpan<T, TBinaryOperator>(
-            ReadOnlySpan<T> x, T y, Span<T> destination)
-            where TBinaryOperator : struct, IBinaryOperator<T> =>
-            InvokeSpanScalarIntoSpan<T, IdentityOperator<T>, TBinaryOperator>(x, y, destination);
-
-        /// <summary>
-        /// Performs an element-wise operation on <paramref name="x"/> and <paramref name="y"/>,
-        /// and writes the results to <paramref name="destination"/>.
-        /// </summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TTransformOperator">
-        /// Specifies the operation to perform on each element loaded from <paramref name="x"/>.
-        /// It is not used with <paramref name="y"/>.
-        /// </typeparam>
-        /// <typeparam name="TBinaryOperator">
-        /// Specifies the operation to perform on the transformed value from <paramref name="x"/> with <paramref name="y"/>.
-        /// </typeparam>
-        private static void InvokeSpanScalarIntoSpan<T, TTransformOperator, TBinaryOperator>(
-            ReadOnlySpan<T> x, T y, Span<T> destination)
-            where TTransformOperator : struct, IUnaryOperator<T, T>
-            where TBinaryOperator : struct, IBinaryOperator<T>
-        {
-            if (x.Length > destination.Length)
-            {
-                ThrowHelper.ThrowArgument_DestinationTooShort();
-            }
-
-            ValidateInputOutputSpanNonOverlapping(x, destination);
-
-            // Since every branch has a cost and since that cost is
-            // essentially lost for larger inputs, we do branches
-            // in a way that allows us to have the minimum possible
-            // for small sizes
-
-            ref T xRef = ref MemoryMarshal.GetReference(x);
-            ref T dRef = ref MemoryMarshal.GetReference(destination);
-
-            nuint remainder = (uint)x.Length;
-
-            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TTransformOperator.Vectorizable && TBinaryOperator.Vectorizable)
-            {
-                if (remainder >= (uint)Vector512<T>.Count)
-                {
-                    Vectorized512(ref xRef, y, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, y, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported && TTransformOperator.Vectorizable && TBinaryOperator.Vectorizable)
-            {
-                if (remainder >= (uint)Vector256<T>.Count)
-                {
-                    Vectorized256(ref xRef, y, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, y, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TTransformOperator.Vectorizable && TBinaryOperator.Vectorizable)
-            {
-                if (remainder >= (uint)Vector128<T>.Count)
-                {
-                    Vectorized128(ref xRef, y, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, y, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            // This is the software fallback when no acceleration is available
-            // It requires no branches to hit
-
-            SoftwareFallback(ref xRef, y, ref dRef, remainder);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void SoftwareFallback(ref T xRef, T y, ref T dRef, nuint length)
-            {
-                for (nuint i = 0; i < length; i++)
-                {
-                    Unsafe.Add(ref dRef, i) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, i)),
-                                                                     y);
-                }
-            }
-
-            static void Vectorized128(ref T xRef, T y, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector128<T> yVec = Vector128.Create(y);
-
-                Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
-                                                          yVec);
-                Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
-                                                          yVec);
-
-                if (remainder > (uint)(Vector128<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector128<T> vector1;
-                        Vector128<T> vector2;
-                        Vector128<T> vector3;
-                        Vector128<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector128<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0))),
-                                                                 yVec);
-                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1))),
-                                                                 yVec);
-                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2))),
-                                                                 yVec);
-                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3))),
-                                                                 yVec);
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4))),
-                                                                 yVec);
-                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5))),
-                                                                 yVec);
-                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6))),
-                                                                 yVec);
-                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7))),
-                                                                 yVec);
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<T>.Count * 8);
-                                dPtr += (uint)(Vector128<T>.Count * 8);
-
-                                remainder -= (uint)(Vector128<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector128<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0))),
-                                                                 yVec);
-                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1))),
-                                                                 yVec);
-                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2))),
-                                                                 yVec);
-                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3))),
-                                                                 yVec);
-
-                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4))),
-                                                                 yVec);
-                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5))),
-                                                                 yVec);
-                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6))),
-                                                                 yVec);
-                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7))),
-                                                                 yVec);
-
-                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<T>.Count * 8);
-                                dPtr += (uint)(Vector128<T>.Count * 8);
-
-                                remainder -= (uint)(Vector128<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
-
-                switch (remainder / (uint)Vector128<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            static void Vectorized256(ref T xRef, T y, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector256<T> yVec = Vector256.Create(y);
-
-                Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
-                                                          yVec);
-                Vector256<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count)),
-                                                          yVec);
-
-                if (remainder > (uint)(Vector256<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)dPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector256<T> vector1;
-                        Vector256<T> vector2;
-                        Vector256<T> vector3;
-                        Vector256<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector256<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0))),
-                                                                 yVec);
-                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1))),
-                                                                 yVec);
-                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2))),
-                                                                 yVec);
-                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3))),
-                                                                 yVec);
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4))),
-                                                                 yVec);
-                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5))),
-                                                                 yVec);
-                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6))),
-                                                                 yVec);
-                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7))),
-                                                                 yVec);
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<T>.Count * 8);
-                                dPtr += (uint)(Vector256<T>.Count * 8);
-
-                                remainder -= (uint)(Vector256<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector256<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0))),
-                                                                 yVec);
-                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1))),
-                                                                 yVec);
-                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2))),
-                                                                 yVec);
-                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3))),
-                                                                 yVec);
-
-                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4))),
-                                                                 yVec);
-                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5))),
-                                                                 yVec);
-                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6))),
-                                                                 yVec);
-                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7))),
-                                                                 yVec);
-
-                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<T>.Count * 8);
-                                dPtr += (uint)(Vector256<T>.Count * 8);
-
-                                remainder -= (uint)(Vector256<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector256<T>.Count - 1)) & (nuint)(-Vector256<T>.Count);
-
-                switch (remainder / (uint)Vector256<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 8))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector256<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            static void Vectorized512(ref T xRef, T y, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector512<T> yVec = Vector512.Create(y);
-
-                Vector512<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef)),
-                                                          yVec);
-                Vector512<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count)),
-                                                          yVec);
-
-                if (remainder > (uint)(Vector512<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector512<T> vector1;
-                        Vector512<T> vector2;
-                        Vector512<T> vector3;
-                        Vector512<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector512<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0))),
-                                                                 yVec);
-                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1))),
-                                                                 yVec);
-                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2))),
-                                                                 yVec);
-                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3))),
-                                                                 yVec);
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4))),
-                                                                 yVec);
-                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5))),
-                                                                 yVec);
-                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6))),
-                                                                 yVec);
-                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7))),
-                                                                 yVec);
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<T>.Count * 8);
-                                dPtr += (uint)(Vector512<T>.Count * 8);
-
-                                remainder -= (uint)(Vector512<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector512<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0))),
-                                                                 yVec);
-                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1))),
-                                                                 yVec);
-                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2))),
-                                                                 yVec);
-                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3))),
-                                                                 yVec);
-
-                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4))),
-                                                                 yVec);
-                                vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5))),
-                                                                 yVec);
-                                vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6))),
-                                                                 yVec);
-                                vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7))),
-                                                                 yVec);
-
-                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<T>.Count * 8);
-                                dPtr += (uint)(Vector512<T>.Count * 8);
-
-                                remainder -= (uint)(Vector512<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
-
-                switch (remainder / (uint)Vector512<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2))),
-                                                                     yVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall(ref T xRef, T y, ref T dRef, nuint remainder)
-            {
-                if (sizeof(T) == 1)
-                {
-                    VectorizedSmall1(ref xRef, y, ref dRef, remainder);
-                }
-                else if (sizeof(T) == 2)
-                {
-                    VectorizedSmall2(ref xRef, y, ref dRef, remainder);
-                }
-                else if (sizeof(T) == 4)
-                {
-                    VectorizedSmall4(ref xRef, y, ref dRef, remainder);
-                }
-                else
-                {
-                    Debug.Assert(sizeof(T) == 8);
-                    VectorizedSmall8(ref xRef, y, ref dRef, remainder);
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall1(ref T xRef, T y, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 1);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 63:
-                    case 62:
-                    case 61:
-                    case 60:
-                    case 59:
-                    case 58:
-                    case 57:
-                    case 56:
-                    case 55:
-                    case 54:
-                    case 53:
-                    case 52:
-                    case 51:
-                    case 50:
-                    case 49:
-                    case 48:
-                    case 47:
-                    case 46:
-                    case 45:
-                    case 44:
-                    case 43:
-                    case 42:
-                    case 41:
-                    case 40:
-                    case 39:
-                    case 38:
-                    case 37:
-                    case 36:
-                    case 35:
-                    case 34:
-                    case 33:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> yVec = Vector256.Create(y);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
-                                                                  yVec);
-                        Vector256<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count)),
-                                                                  yVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 32:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
-                                                                  Vector256.Create(y));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> yVec = Vector128.Create(y);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
-                                                                                            yVec);
-                        Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
-                                                                                            yVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
-                                                                                            Vector128.Create(y));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 15:
-                        Unsafe.Add(ref dRef, 14) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 14)),
-                                                                          y);
-                        goto case 14;
-
-                    case 14:
-                        Unsafe.Add(ref dRef, 13) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 13)),
-                                                                          y);
-                        goto case 13;
-
-                    case 13:
-                        Unsafe.Add(ref dRef, 12) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 12)),
-                                                                          y);
-                        goto case 12;
-
-                    case 12:
-                        Unsafe.Add(ref dRef, 11) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 11)),
-                                                                          y);
-                        goto case 11;
-
-                    case 11:
-                        Unsafe.Add(ref dRef, 10) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 10)),
-                                                                          y);
-                        goto case 10;
-
-                    case 10:
-                        Unsafe.Add(ref dRef, 9) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 9)),
-                                                                         y);
-                        goto case 9;
-
-                    case 9:
-                        Unsafe.Add(ref dRef, 8) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 8)),
-                                                                         y);
-                        goto case 8;
-
-                    case 8:
-                        Unsafe.Add(ref dRef, 7) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 7)),
-                                                                         y);
-                        goto case 7;
-
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 6)),
-                                                                         y);
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 5)),
-                                                                         y);
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 4)),
-                                                                         y);
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 3)),
-                                                                         y);
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)),
-                                                                         y);
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)),
-                                                                         y);
-                        goto case 1;
-
-                    case 1:
-                        dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall2(ref T xRef, T y, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 2);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> yVec = Vector256.Create(y);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
-                                                                  yVec);
-                        Vector256<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count)),
-                                                                  yVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
-                                                                  Vector256.Create(y));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> yVec = Vector128.Create(y);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
-                                                                                            yVec);
-                        Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
-                                                                                            yVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 8:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
-                                                                                            Vector128.Create(y));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 6)),
-                                                                         y);
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 5)),
-                                                                         y);
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 4)),
-                                                                         y);
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 3)),
-                                                                         y);
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)),
-                                                                         y);
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)),
-                                                                         y);
-                        goto case 1;
-
-                    case 1:
-                        dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall4(ref T xRef, T y, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 4);
-
-                switch (remainder)
-                {
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> yVec = Vector256.Create(y);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
-                                                                  yVec);
-                        Vector256<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count)),
-                                                                  yVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    case 8:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
-                                                                  Vector256.Create(y));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> yVec = Vector128.Create(y);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
-                                                                                            yVec);
-                        Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
-                                                                                            yVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
-                                                                                            Vector128.Create(y));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)),
-                                                                         y);
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)),
-                                                                          y);
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall8(ref T xRef, T y, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 8);
-
-                switch (remainder)
-                {
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> yVec = Vector256.Create(y);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
-                                                                  yVec);
-                        Vector256<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count)),
-                                                                  yVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)),
-                                                                  Vector256.Create(y));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> yVec = Vector128.Create(y);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
-                                                                                            yVec);
-                        Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
-                                                                                            yVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    case 2:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
-                                                                                            Vector128.Create(y));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 1:
-                    {
-                        dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-            }
-        }
-
-        /// <summary>
-        /// Performs an element-wise operation on <paramref name="x"/>, <paramref name="y"/>, and <paramref name="z"/>,
-        /// and writes the results to <paramref name="destination"/>.
-        /// </summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TTernaryOperator">
-        /// Specifies the operation to perform on the pair-wise elements loaded from <paramref name="x"/>, <paramref name="y"/>,
-        /// and <paramref name="z"/>.
-        /// </typeparam>
-        private static void InvokeSpanSpanSpanIntoSpan<T, TTernaryOperator>(
-            ReadOnlySpan<T> x, ReadOnlySpan<T> y, ReadOnlySpan<T> z, Span<T> destination)
-            where TTernaryOperator : struct, ITernaryOperator<T>
-        {
-            if (x.Length != y.Length || x.Length != z.Length)
-            {
-                ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
-            }
-
-            if (x.Length > destination.Length)
-            {
-                ThrowHelper.ThrowArgument_DestinationTooShort();
-            }
-
-            ValidateInputOutputSpanNonOverlapping(x, destination);
-            ValidateInputOutputSpanNonOverlapping(y, destination);
-            ValidateInputOutputSpanNonOverlapping(z, destination);
-
-            // Since every branch has a cost and since that cost is
-            // essentially lost for larger inputs, we do branches
-            // in a way that allows us to have the minimum possible
-            // for small sizes
-
-            ref T xRef = ref MemoryMarshal.GetReference(x);
-            ref T yRef = ref MemoryMarshal.GetReference(y);
-            ref T zRef = ref MemoryMarshal.GetReference(z);
-            ref T dRef = ref MemoryMarshal.GetReference(destination);
-
-            nuint remainder = (uint)x.Length;
-
-            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported)
-            {
-                if (remainder >= (uint)Vector512<T>.Count)
-                {
-                    Vectorized512(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported)
-            {
-                if (remainder >= (uint)Vector256<T>.Count)
-                {
-                    Vectorized256(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported)
-            {
-                if (remainder >= (uint)Vector128<T>.Count)
-                {
-                    Vectorized128(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            // This is the software fallback when no acceleration is available
-            // It requires no branches to hit
-
-            SoftwareFallback(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void SoftwareFallback(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint length)
-            {
-                for (nuint i = 0; i < length; i++)
-                {
-                    Unsafe.Add(ref dRef, i) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, i),
-                                                                      Unsafe.Add(ref yRef, i),
-                                                                      Unsafe.Add(ref zRef, i));
-                }
-            }
-
-            static void Vectorized128(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                           Vector128.LoadUnsafe(ref yRef),
-                                                           Vector128.LoadUnsafe(ref zRef));
-                Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                           Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
-                                                           Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
-
-                if (remainder > (uint)(Vector128<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* py = &yRef)
-                    fixed (T* pz = &zRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* yPtr = py;
-                        T* zPtr = pz;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            yPtr += misalignment;
-                            zPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector128<T> vector1;
-                        Vector128<T> vector2;
-                        Vector128<T> vector3;
-                        Vector128<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector128<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 0)));
-                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 1)));
-                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 2)));
-                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 4)));
-                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 5)));
-                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 6)));
-                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<T>.Count * 8);
-                                yPtr += (uint)(Vector128<T>.Count * 8);
-                                zPtr += (uint)(Vector128<T>.Count * 8);
-                                dPtr += (uint)(Vector128<T>.Count * 8);
-
-                                remainder -= (uint)(Vector128<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector128<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 0)));
-                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 1)));
-                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 2)));
-                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 4)));
-                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 5)));
-                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 6)));
-                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)),
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<T>.Count * 8);
-                                yPtr += (uint)(Vector128<T>.Count * 8);
-                                zPtr += (uint)(Vector128<T>.Count * 8);
-                                dPtr += (uint)(Vector128<T>.Count * 8);
-
-                                remainder -= (uint)(Vector128<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        yRef = ref *yPtr;
-                        zRef = ref *zPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
-
-                switch (remainder / (uint)Vector128<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 8)),
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 8)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 7)),
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 7)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 6)),
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 6)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 5)),
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 5)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 4)),
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 4)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 3)),
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 3)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 2)),
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 2)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            static void Vectorized256(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                           Vector256.LoadUnsafe(ref yRef),
-                                                           Vector256.LoadUnsafe(ref zRef));
-                Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                           Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
-                                                           Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
-
-                if (remainder > (uint)(Vector256<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* py = &yRef)
-                    fixed (T* pz = &zRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* yPtr = py;
-                        T* zPtr = pz;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)dPtr % (uint)sizeof(Vector256<T>))) / (nuint)sizeof(T);
-
-                            xPtr += misalignment;
-                            yPtr += misalignment;
-                            zPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector256<T> vector1;
-                        Vector256<T> vector2;
-                        Vector256<T> vector3;
-                        Vector256<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector256<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 0)));
-                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 1)));
-                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 2)));
-                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 4)));
-                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 5)));
-                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 6)));
-                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<T>.Count * 8);
-                                yPtr += (uint)(Vector256<T>.Count * 8);
-                                zPtr += (uint)(Vector256<T>.Count * 8);
-                                dPtr += (uint)(Vector256<T>.Count * 8);
-
-                                remainder -= (uint)(Vector256<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector256<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 0)));
-                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 1)));
-                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 2)));
-                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 4)));
-                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 5)));
-                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 6)));
-                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)),
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<T>.Count * 8);
-                                yPtr += (uint)(Vector256<T>.Count * 8);
-                                zPtr += (uint)(Vector256<T>.Count * 8);
-                                dPtr += (uint)(Vector256<T>.Count * 8);
-
-                                remainder -= (uint)(Vector256<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        yRef = ref *yPtr;
-                        zRef = ref *zPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector256<T>.Count - 1)) & (nuint)(-Vector256<T>.Count);
-
-                switch (remainder / (uint)Vector256<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 8)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 8)),
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 8)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 7)),
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 7)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 6)),
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 6)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 5)),
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 5)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 4)),
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 4)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 3)),
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 3)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 2)),
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 2)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            static void Vectorized512(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector512<T> beg = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
-                                                           Vector512.LoadUnsafe(ref yRef),
-                                                           Vector512.LoadUnsafe(ref zRef));
-                Vector512<T> end = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
-                                                           Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count),
-                                                           Vector512.LoadUnsafe(ref zRef, remainder - (uint)Vector512<T>.Count));
-
-                if (remainder > (uint)(Vector512<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* py = &yRef)
-                    fixed (T* pz = &zRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* yPtr = py;
-                        T* zPtr = pz;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            yPtr += misalignment;
-                            zPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector512<T> vector1;
-                        Vector512<T> vector2;
-                        Vector512<T> vector3;
-                        Vector512<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector512<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 0)));
-                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 1)));
-                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 2)));
-                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 4)));
-                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 5)));
-                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 6)));
-                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<T>.Count * 8);
-                                yPtr += (uint)(Vector512<T>.Count * 8);
-                                zPtr += (uint)(Vector512<T>.Count * 8);
-                                dPtr += (uint)(Vector512<T>.Count * 8);
-
-                                remainder -= (uint)(Vector512<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector512<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 0)));
-                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 1)));
-                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 2)));
-                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 4)));
-                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 5)));
-                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 6)));
-                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<T>.Count * 8);
-                                yPtr += (uint)(Vector512<T>.Count * 8);
-                                zPtr += (uint)(Vector512<T>.Count * 8);
-                                dPtr += (uint)(Vector512<T>.Count * 8);
-
-                                remainder -= (uint)(Vector512<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        yRef = ref *yPtr;
-                        zRef = ref *zPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
-
-                switch (remainder / (uint)Vector512<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 8)),
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 8)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)),
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 7)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)),
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 6)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)),
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 5)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)),
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 4)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)),
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 3)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)),
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 2)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
-            {
-                if (sizeof(T) == 1)
-                {
-                    VectorizedSmall1(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
-                }
-                else if (sizeof(T) == 2)
-                {
-                    VectorizedSmall2(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
-                }
-                else if (sizeof(T) == 4)
-                {
-                    VectorizedSmall4(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
-                }
-                else
-                {
-                    Debug.Assert(sizeof(T) == 8);
-                    VectorizedSmall8(ref xRef, ref yRef, ref zRef, ref dRef, remainder);
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall1(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 1);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 63:
-                    case 62:
-                    case 61:
-                    case 60:
-                    case 59:
-                    case 58:
-                    case 57:
-                    case 56:
-                    case 55:
-                    case 54:
-                    case 53:
-                    case 52:
-                    case 51:
-                    case 50:
-                    case 49:
-                    case 48:
-                    case 47:
-                    case 46:
-                    case 45:
-                    case 44:
-                    case 43:
-                    case 42:
-                    case 41:
-                    case 40:
-                    case 39:
-                    case 38:
-                    case 37:
-                    case 36:
-                    case 35:
-                    case 34:
-                    case 33:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                   Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
-                                                                   Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 32:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                       Vector256.LoadUnsafe(ref yRef),
-                                                                       Vector256.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                       Vector128.LoadUnsafe(ref yRef),
-                                                                       Vector128.LoadUnsafe(ref zRef));
-                        Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                       Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
-                                                                       Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                       Vector128.LoadUnsafe(ref yRef),
-                                                                       Vector128.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 15:
-                        Unsafe.Add(ref dRef, 14) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 14),
-                                                                          Unsafe.Add(ref yRef, 14),
-                                                                          Unsafe.Add(ref zRef, 14));
-                        goto case 14;
-
-                    case 14:
-                        Unsafe.Add(ref dRef, 13) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 13),
-                                                                          Unsafe.Add(ref yRef, 13),
-                                                                          Unsafe.Add(ref zRef, 13));
-                        goto case 13;
-
-                    case 13:
-                        Unsafe.Add(ref dRef, 12) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 12),
-                                                                          Unsafe.Add(ref yRef, 12),
-                                                                          Unsafe.Add(ref zRef, 12));
-                        goto case 12;
-
-                    case 12:
-                        Unsafe.Add(ref dRef, 11) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 11),
-                                                                          Unsafe.Add(ref yRef, 11),
-                                                                          Unsafe.Add(ref zRef, 11));
-                        goto case 11;
-
-                    case 11:
-                        Unsafe.Add(ref dRef, 10) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 10),
-                                                                          Unsafe.Add(ref yRef, 10),
-                                                                          Unsafe.Add(ref zRef, 10));
-                        goto case 10;
-
-                    case 10:
-                        Unsafe.Add(ref dRef, 9) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 9),
-                                                                          Unsafe.Add(ref yRef, 9),
-                                                                          Unsafe.Add(ref zRef, 9));
-                        goto case 9;
-
-                    case 9:
-                        Unsafe.Add(ref dRef, 8) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 8),
-                                                                          Unsafe.Add(ref yRef, 8),
-                                                                          Unsafe.Add(ref zRef, 8));
-                        goto case 8;
-
-                    case 8:
-                        Unsafe.Add(ref dRef, 7) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 7),
-                                                                          Unsafe.Add(ref yRef, 7),
-                                                                          Unsafe.Add(ref zRef, 7));
-                        goto case 7;
-
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
-                                                                          Unsafe.Add(ref yRef, 6),
-                                                                          Unsafe.Add(ref zRef, 6));
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
-                                                                          Unsafe.Add(ref yRef, 5),
-                                                                          Unsafe.Add(ref zRef, 5));
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
-                                                                          Unsafe.Add(ref yRef, 4),
-                                                                          Unsafe.Add(ref zRef, 4));
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
-                                                                          Unsafe.Add(ref yRef, 3),
-                                                                          Unsafe.Add(ref zRef, 3));
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                          Unsafe.Add(ref yRef, 2),
-                                                                          Unsafe.Add(ref zRef, 2));
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                          Unsafe.Add(ref yRef, 1),
-                                                                          Unsafe.Add(ref zRef, 1));
-                        goto case 1;
-
-                    case 1:
-                        dRef = TTernaryOperator.Invoke(xRef, yRef, zRef);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall2(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 2);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                   Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
-                                                                   Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                       Vector256.LoadUnsafe(ref yRef),
-                                                                       Vector256.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                       Vector128.LoadUnsafe(ref yRef),
-                                                                       Vector128.LoadUnsafe(ref zRef));
-                        Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                       Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
-                                                                       Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 8:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                       Vector128.LoadUnsafe(ref yRef),
-                                                                       Vector128.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
-                                                                          Unsafe.Add(ref yRef, 6),
-                                                                          Unsafe.Add(ref zRef, 6));
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
-                                                                          Unsafe.Add(ref yRef, 5),
-                                                                          Unsafe.Add(ref zRef, 5));
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
-                                                                          Unsafe.Add(ref yRef, 4),
-                                                                          Unsafe.Add(ref zRef, 4));
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
-                                                                          Unsafe.Add(ref yRef, 3),
-                                                                          Unsafe.Add(ref zRef, 3));
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                          Unsafe.Add(ref yRef, 2),
-                                                                          Unsafe.Add(ref zRef, 2));
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                          Unsafe.Add(ref yRef, 1),
-                                                                          Unsafe.Add(ref zRef, 1));
-                        goto case 1;
-
-                    case 1:
-                        dRef = TTernaryOperator.Invoke(xRef, yRef, zRef);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall4(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 4);
-
-                switch (remainder)
-                {
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                   Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
-                                                                   Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    case 8:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                       Vector256.LoadUnsafe(ref yRef),
-                                                                       Vector256.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                       Vector128.LoadUnsafe(ref yRef),
-                                                                       Vector128.LoadUnsafe(ref zRef));
-                        Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                       Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
-                                                                       Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                       Vector128.LoadUnsafe(ref yRef),
-                                                                       Vector128.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                          Unsafe.Add(ref yRef, 2),
-                                                                          Unsafe.Add(ref zRef, 2));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                          Unsafe.Add(ref yRef, 1),
-                                                                          Unsafe.Add(ref zRef, 1));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        dRef = TTernaryOperator.Invoke(xRef, yRef, zRef);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall8(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 8);
-
-                switch (remainder)
-                {
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                   Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
-                                                                   Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.LoadUnsafe(ref yRef),
-                                                                   Vector128.LoadUnsafe(ref zRef));
-                        Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                   Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
-                                                                   Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    case 2:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.LoadUnsafe(ref yRef),
-                                                                   Vector128.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 1:
-                    {
-                        dRef = TTernaryOperator.Invoke(xRef, yRef, zRef);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-            }
-        }
-
-        /// <summary>
-        /// Performs an element-wise operation on <paramref name="x"/>, <paramref name="y"/>, and <paramref name="z"/>,
-        /// and writes the results to <paramref name="destination"/>.
-        /// </summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TTernaryOperator">
-        /// Specifies the operation to perform on the pair-wise elements loaded from <paramref name="x"/> and <paramref name="y"/>
-        /// with <paramref name="z"/>.
-        /// </typeparam>
-        private static void InvokeSpanSpanScalarIntoSpan<T, TTernaryOperator>(
-            ReadOnlySpan<T> x, ReadOnlySpan<T> y, T z, Span<T> destination)
-            where TTernaryOperator : struct, ITernaryOperator<T>
-        {
-            if (x.Length != y.Length)
-            {
-                ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
-            }
-
-            if (x.Length > destination.Length)
-            {
-                ThrowHelper.ThrowArgument_DestinationTooShort();
-            }
-
-            ValidateInputOutputSpanNonOverlapping(x, destination);
-            ValidateInputOutputSpanNonOverlapping(y, destination);
-
-            // Since every branch has a cost and since that cost is
-            // essentially lost for larger inputs, we do branches
-            // in a way that allows us to have the minimum possible
-            // for small sizes
-
-            ref T xRef = ref MemoryMarshal.GetReference(x);
-            ref T yRef = ref MemoryMarshal.GetReference(y);
-            ref T dRef = ref MemoryMarshal.GetReference(destination);
-
-            nuint remainder = (uint)x.Length;
-
-            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported)
-            {
-                if (remainder >= (uint)Vector512<T>.Count)
-                {
-                    Vectorized512(ref xRef, ref yRef, z, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref yRef, z, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported)
-            {
-                if (remainder >= (uint)Vector256<T>.Count)
-                {
-                    Vectorized256(ref xRef, ref yRef, z, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref yRef, z, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported)
-            {
-                if (remainder >= (uint)Vector128<T>.Count)
-                {
-                    Vectorized128(ref xRef, ref yRef, z, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, ref yRef, z, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            // This is the software fallback when no acceleration is available
-            // It requires no branches to hit
-
-            SoftwareFallback(ref xRef, ref yRef, z, ref dRef, remainder);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void SoftwareFallback(ref T xRef, ref T yRef, T z, ref T dRef, nuint length)
-            {
-                for (nuint i = 0; i < length; i++)
-                {
-                    Unsafe.Add(ref dRef, i) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, i),
-                                                                      Unsafe.Add(ref yRef, i),
-                                                                      z);
-                }
-            }
-
-            static void Vectorized128(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector128<T> zVec = Vector128.Create(z);
-
-                Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                               Vector128.LoadUnsafe(ref yRef),
-                                                               zVec);
-                Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                               Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
-                                                               zVec);
-
-                if (remainder > (uint)(Vector128<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* py = &yRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* yPtr = py;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            yPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector128<T> vector1;
-                        Vector128<T> vector2;
-                        Vector128<T> vector3;
-                        Vector128<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector128<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)),
-                                                                  zVec);
-                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)),
-                                                                  zVec);
-                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)),
-                                                                  zVec);
-                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)),
-                                                                  zVec);
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)),
-                                                                  zVec);
-                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)),
-                                                                  zVec);
-                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)),
-                                                                  zVec);
-                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)),
-                                                                  zVec);
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<T>.Count * 8);
-                                yPtr += (uint)(Vector128<T>.Count * 8);
-                                dPtr += (uint)(Vector128<T>.Count * 8);
-
-                                remainder -= (uint)(Vector128<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector128<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)),
-                                                                  zVec);
-                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)),
-                                                                  zVec);
-                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)),
-                                                                  zVec);
-                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)),
-                                                                  zVec);
-
-                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)),
-                                                                  zVec);
-                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)),
-                                                                  zVec);
-                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)),
-                                                                  zVec);
-                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
-                                                                  Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)),
-                                                                  zVec);
-
-                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<T>.Count * 8);
-                                yPtr += (uint)(Vector128<T>.Count * 8);
-                                dPtr += (uint)(Vector128<T>.Count * 8);
-
-                                remainder -= (uint)(Vector128<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        yRef = ref *yPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
-
-                switch (remainder / (uint)Vector128<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 8)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 7)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 6)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 5)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 4)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 3)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
-                                                                      Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 2)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            static void Vectorized256(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector256<T> zVec = Vector256.Create(z);
-
-                Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                               Vector256.LoadUnsafe(ref yRef),
-                                                               zVec);
-                Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                               Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
-                                                               zVec);
-
-                if (remainder > (uint)(Vector256<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* py = &yRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* yPtr = py;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)dPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            yPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector256<T> vector1;
-                        Vector256<T> vector2;
-                        Vector256<T> vector3;
-                        Vector256<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector256<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)),
-                                                                  zVec);
-                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)),
-                                                                  zVec);
-                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)),
-                                                                  zVec);
-                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)),
-                                                                  zVec);
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)),
-                                                                  zVec);
-                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)),
-                                                                  zVec);
-                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)),
-                                                                  zVec);
-                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)),
-                                                                  zVec);
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<T>.Count * 8);
-                                yPtr += (uint)(Vector256<T>.Count * 8);
-                                dPtr += (uint)(Vector256<T>.Count * 8);
-
-                                remainder -= (uint)(Vector256<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector256<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 0)),
-                                                                  zVec);
-                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 1)),
-                                                                  zVec);
-                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 2)),
-                                                                  zVec);
-                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 3)),
-                                                                  zVec);
-
-                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 4)),
-                                                                  zVec);
-                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 5)),
-                                                                  zVec);
-                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 6)),
-                                                                  zVec);
-                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
-                                                                  Vector256.Load(yPtr + (uint)(Vector256<T>.Count * 7)),
-                                                                  zVec);
-
-                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<T>.Count * 8);
-                                yPtr += (uint)(Vector256<T>.Count * 8);
-                                dPtr += (uint)(Vector256<T>.Count * 8);
-
-                                remainder -= (uint)(Vector256<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        yRef = ref *yPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector256<T>.Count - 1)) & (nuint)(-Vector256<T>.Count);
-
-                switch (remainder / (uint)Vector256<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 8)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 8)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 7)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 6)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 5)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 4)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 3)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)),
-                                                                      Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256<T>.Count * 2)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            static void Vectorized512(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector512<T> zVec = Vector512.Create(z);
-
-                Vector512<T> beg = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
-                                                           Vector512.LoadUnsafe(ref yRef),
-                                                           zVec);
-                Vector512<T> end = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
-                                                           Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count),
-                                                           zVec);
-
-                if (remainder > (uint)(Vector512<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* py = &yRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* yPtr = py;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            yPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector512<T> vector1;
-                        Vector512<T> vector2;
-                        Vector512<T> vector3;
-                        Vector512<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector512<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
-                                                                  zVec);
-                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
-                                                                  zVec);
-                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
-                                                                  zVec);
-                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
-                                                                  zVec);
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
-                                                                  zVec);
-                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
-                                                                  zVec);
-                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
-                                                                  zVec);
-                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
-                                                                  zVec);
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<T>.Count * 8);
-                                yPtr += (uint)(Vector512<T>.Count * 8);
-                                dPtr += (uint)(Vector512<T>.Count * 8);
-
-                                remainder -= (uint)(Vector512<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector512<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
-                                                                  zVec);
-                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
-                                                                  zVec);
-                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
-                                                                  zVec);
-                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
-                                                                  zVec);
-
-                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
-                                                                  zVec);
-                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
-                                                                  zVec);
-                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
-                                                                  zVec);
-                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
-                                                                  Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
-                                                                  zVec);
-
-                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<T>.Count * 8);
-                                yPtr += (uint)(Vector512<T>.Count * 8);
-                                dPtr += (uint)(Vector512<T>.Count * 8);
-
-                                remainder -= (uint)(Vector512<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        yRef = ref *yPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
-
-                switch (remainder / (uint)Vector512<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 8)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
-                                                                      Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)),
-                                                                      zVec);
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
-            {
-                if (sizeof(T) == 1)
-                {
-                    VectorizedSmall1(ref xRef, ref yRef, z, ref dRef, remainder);
-                }
-                else if (sizeof(T) == 2)
-                {
-                    VectorizedSmall2(ref xRef, ref yRef, z, ref dRef, remainder);
-                }
-                else if (sizeof(T) == 4)
-                {
-                    VectorizedSmall4(ref xRef, ref yRef, z, ref dRef, remainder);
-                }
-                else
-                {
-                    Debug.Assert(sizeof(T) == 8);
-                    VectorizedSmall8(ref xRef, ref yRef, z, ref dRef, remainder);
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall1(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 1);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 63:
-                    case 62:
-                    case 61:
-                    case 60:
-                    case 59:
-                    case 58:
-                    case 57:
-                    case 56:
-                    case 55:
-                    case 54:
-                    case 53:
-                    case 52:
-                    case 51:
-                    case 50:
-                    case 49:
-                    case 48:
-                    case 47:
-                    case 46:
-                    case 45:
-                    case 44:
-                    case 43:
-                    case 42:
-                    case 41:
-                    case 40:
-                    case 39:
-                    case 38:
-                    case 37:
-                    case 36:
-                    case 35:
-                    case 34:
-                    case 33:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> zVec = Vector256.Create(z);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   zVec);
-                        Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                   Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
-                                                                   zVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 32:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   Vector256.Create(z));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> zVec = Vector128.Create(z);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.LoadUnsafe(ref yRef),
-                                                                   zVec);
-                        Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                   Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
-                                                                   zVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                     // One Vector128's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.LoadUnsafe(ref yRef),
-                                                                   Vector128.Create(z));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 15:
-                        Unsafe.Add(ref dRef, 14) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 14),
-                                                                          Unsafe.Add(ref yRef, 14),
-                                                                          z);
-                        goto case 14;
-
-                    case 14:
-                        Unsafe.Add(ref dRef, 13) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 13),
-                                                                          Unsafe.Add(ref yRef, 13),
-                                                                          z);
-                        goto case 13;
-
-                    case 13:
-                        Unsafe.Add(ref dRef, 12) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 12),
-                                                                          Unsafe.Add(ref yRef, 12),
-                                                                          z);
-                        goto case 12;
-
-                    case 12:
-                        Unsafe.Add(ref dRef, 11) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 11),
-                                                                          Unsafe.Add(ref yRef, 11),
-                                                                          z);
-                        goto case 11;
-
-                    case 11:
-                        Unsafe.Add(ref dRef, 10) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 10),
-                                                                          Unsafe.Add(ref yRef, 10),
-                                                                          z);
-                        goto case 10;
-
-                    case 10:
-                        Unsafe.Add(ref dRef, 9) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 9),
-                                                                          Unsafe.Add(ref yRef, 9),
-                                                                          z);
-                        goto case 9;
-
-                    case 9:
-                        Unsafe.Add(ref dRef, 8) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 8),
-                                                                          Unsafe.Add(ref yRef, 8),
-                                                                          z);
-                        goto case 8;
-
-                    case 8:
-                        Unsafe.Add(ref dRef, 7) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 7),
-                                                                          Unsafe.Add(ref yRef, 7),
-                                                                          z);
-                        goto case 7;
-
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
-                                                                          Unsafe.Add(ref yRef, 6),
-                                                                          z);
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
-                                                                          Unsafe.Add(ref yRef, 5),
-                                                                          z);
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
-                                                                          Unsafe.Add(ref yRef, 4),
-                                                                          z);
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
-                                                                          Unsafe.Add(ref yRef, 3),
-                                                                          z);
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                          Unsafe.Add(ref yRef, 2),
-                                                                          z);
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                          Unsafe.Add(ref yRef, 1),
-                                                                          z);
-                        goto case 1;
-
-                    case 1:
-                        dRef = TTernaryOperator.Invoke(xRef, yRef, z);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall2(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 2);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> zVec = Vector256.Create(z);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   zVec);
-                        Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                   Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
-                                                                   zVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   Vector256.Create(z));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> zVec = Vector128.Create(z);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.LoadUnsafe(ref yRef),
-                                                                   zVec);
-                        Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                   Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
-                                                                   zVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                     // One Vector128's worth of data.
-                    case 8:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.LoadUnsafe(ref yRef),
-                                                                   Vector128.Create(z));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
-                                                                          Unsafe.Add(ref yRef, 6),
-                                                                          z);
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
-                                                                          Unsafe.Add(ref yRef, 5),
-                                                                          z);
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
-                                                                          Unsafe.Add(ref yRef, 4),
-                                                                          z);
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
-                                                                          Unsafe.Add(ref yRef, 3),
-                                                                          z);
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                          Unsafe.Add(ref yRef, 2),
-                                                                          z);
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                          Unsafe.Add(ref yRef, 1),
-                                                                          z);
-                        goto case 1;
-
-                    case 1:
-                        dRef = TTernaryOperator.Invoke(xRef, yRef, z);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall4(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 4);
-
-                switch (remainder)
-                {
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> zVec = Vector256.Create(z);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   zVec);
-                        Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                   Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
-                                                                   zVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    case 8:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   Vector256.Create(z));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> zVec = Vector128.Create(z);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.LoadUnsafe(ref yRef),
-                                                                   zVec);
-                        Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                   Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
-                                                                   zVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.LoadUnsafe(ref yRef),
-                                                                   Vector128.Create(z));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                          Unsafe.Add(ref yRef, 2),
-                                                                          z);
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                          Unsafe.Add(ref yRef, 1),
-                                                                          z);
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        dRef = TTernaryOperator.Invoke(xRef, yRef, z);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall8(ref T xRef, ref T yRef, T z, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 8);
-
-                switch (remainder)
-                {
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> zVec = Vector256.Create(z);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   zVec);
-                        Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                   Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256<T>.Count),
-                                                                   zVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.LoadUnsafe(ref yRef),
-                                                                   Vector256.Create(z));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> zVec = Vector128.Create(z);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.LoadUnsafe(ref yRef),
-                                                                   zVec);
-                        Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                   Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
-                                                                   zVec);
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    case 2:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.LoadUnsafe(ref yRef),
-                                                                   Vector128.Create(z));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 1:
-                    {
-                        dRef = TTernaryOperator.Invoke(xRef, yRef, z);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-            }
-        }
-
-        /// <summary>
-        /// Performs an element-wise operation on <paramref name="x"/>, <paramref name="y"/>, and <paramref name="z"/>,
-        /// and writes the results to <paramref name="destination"/>.
-        /// </summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TTernaryOperator">
-        /// Specifies the operation to perform on the pair-wise element loaded from <paramref name="x"/>, with <paramref name="y"/>,
-        /// and the element loaded from <paramref name="z"/>.
-        /// </typeparam>
-        private static void InvokeSpanScalarSpanIntoSpan<T, TTernaryOperator>(
-            ReadOnlySpan<T> x, T y, ReadOnlySpan<T> z, Span<T> destination)
-            where TTernaryOperator : struct, ITernaryOperator<T>
-        {
-            if (x.Length != z.Length)
-            {
-                ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
-            }
-
-            if (x.Length > destination.Length)
-            {
-                ThrowHelper.ThrowArgument_DestinationTooShort();
-            }
-
-            ValidateInputOutputSpanNonOverlapping(x, destination);
-            ValidateInputOutputSpanNonOverlapping(z, destination);
-
-            // Since every branch has a cost and since that cost is
-            // essentially lost for larger inputs, we do branches
-            // in a way that allows us to have the minimum possible
-            // for small sizes
-
-            ref T xRef = ref MemoryMarshal.GetReference(x);
-            ref T zRef = ref MemoryMarshal.GetReference(z);
-            ref T dRef = ref MemoryMarshal.GetReference(destination);
-
-            nuint remainder = (uint)x.Length;
-
-            if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported)
-            {
-                if (remainder >= (uint)Vector512<T>.Count)
-                {
-                    Vectorized512(ref xRef, y, ref zRef, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, y, ref zRef, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            if (Vector256.IsHardwareAccelerated && Vector256<T>.IsSupported)
-            {
-                if (remainder >= (uint)Vector256<T>.Count)
-                {
-                    Vectorized256(ref xRef, y, ref zRef, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, y, ref zRef, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported)
-            {
-                if (remainder >= (uint)Vector128<T>.Count)
-                {
-                    Vectorized128(ref xRef, y, ref zRef, ref dRef, remainder);
-                }
-                else
-                {
-                    // We have less than a vector and so we can only handle this as scalar. To do this
-                    // efficiently, we simply have a small jump table and fallthrough. So we get a simple
-                    // length check, single jump, and then linear execution.
-
-                    VectorizedSmall(ref xRef, y, ref zRef, ref dRef, remainder);
-                }
-
-                return;
-            }
-
-            // This is the software fallback when no acceleration is available
-            // It requires no branches to hit
-
-            SoftwareFallback(ref xRef, y, ref zRef, ref dRef, remainder);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void SoftwareFallback(ref T xRef, T y, ref T zRef, ref T dRef, nuint length)
-            {
-                for (nuint i = 0; i < length; i++)
-                {
-                    Unsafe.Add(ref dRef, i) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, i),
-                                                                      y,
-                                                                      Unsafe.Add(ref zRef, i));
-                }
-            }
-
-            static void Vectorized128(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector128<T> yVec = Vector128.Create(y);
-
-                Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                               yVec,
-                                                               Vector128.LoadUnsafe(ref zRef));
-                Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                               yVec,
-                                                               Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
-
-                if (remainder > (uint)(Vector128<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* pz = &zRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* zPtr = pz;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            zPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector128<T> vector1;
-                        Vector128<T> vector2;
-                        Vector128<T> vector3;
-                        Vector128<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector128<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 0)));
-                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 1)));
-                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 2)));
-                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 4)));
-                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 5)));
-                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 6)));
-                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<T>.Count * 8);
-                                zPtr += (uint)(Vector128<T>.Count * 8);
-                                dPtr += (uint)(Vector128<T>.Count * 8);
-
-                                remainder -= (uint)(Vector128<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector128<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 0)));
-                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 1)));
-                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 2)));
-                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 4)));
-                                vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 5)));
-                                vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 6)));
-                                vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
-                                                                  yVec,
-                                                                  Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector128<T>.Count * 8);
-                                zPtr += (uint)(Vector128<T>.Count * 8);
-                                dPtr += (uint)(Vector128<T>.Count * 8);
-
-                                remainder -= (uint)(Vector128<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        zRef = ref *zPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
-
-                switch (remainder / (uint)Vector128<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)),
-                                                                      yVec,
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 8)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
-                                                                      yVec,
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 7)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
-                                                                      yVec,
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 6)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
-                                                                      yVec,
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 5)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
-                                                                      yVec,
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 4)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
-                                                                      yVec,
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 3)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
-                                                                      yVec,
-                                                                      Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 2)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            static void Vectorized256(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector256<T> yVec = Vector256.Create(y);
-
-                Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                               yVec,
-                                                               Vector256.LoadUnsafe(ref zRef));
-                Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                               yVec,
-                                                               Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
-
-                if (remainder > (uint)(Vector256<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* pz = &zRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* zPtr = pz;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector256<T>) - ((nuint)dPtr % (uint)sizeof(Vector256<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            zPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector256<T> vector1;
-                        Vector256<T> vector2;
-                        Vector256<T> vector3;
-                        Vector256<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector256<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 0)));
-                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 1)));
-                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 2)));
-                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 4)));
-                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 5)));
-                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 6)));
-                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<T>.Count * 8);
-                                zPtr += (uint)(Vector256<T>.Count * 8);
-                                dPtr += (uint)(Vector256<T>.Count * 8);
-
-                                remainder -= (uint)(Vector256<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector256<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 0)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 0)));
-                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 1)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 1)));
-                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 2)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 2)));
-                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 3)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 4)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 4)));
-                                vector2 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 5)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 5)));
-                                vector3 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 6)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 6)));
-                                vector4 = TTernaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256<T>.Count * 7)),
-                                                                  yVec,
-                                                                  Vector256.Load(zPtr + (uint)(Vector256<T>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector256<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector256<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector256<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector256<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector256<T>.Count * 8);
-                                zPtr += (uint)(Vector256<T>.Count * 8);
-                                dPtr += (uint)(Vector256<T>.Count * 8);
-
-                                remainder -= (uint)(Vector256<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        zRef = ref *zPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector256<T>.Count - 1)) & (nuint)(-Vector256<T>.Count);
-
-                switch (remainder / (uint)Vector256<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 8)),
-                                                                      yVec,
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 8)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 7)),
-                                                                      yVec,
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 7)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 6)),
-                                                                      yVec,
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 6)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 5)),
-                                                                      yVec,
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 5)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 4)),
-                                                                      yVec,
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 4)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 3)),
-                                                                      yVec,
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 3)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector256<T> vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256<T>.Count * 2)),
-                                                                      yVec,
-                                                                      Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256<T>.Count * 2)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            static void Vectorized512(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
-            {
-                ref T dRefBeg = ref dRef;
-
-                // Preload the beginning and end so that overlapping accesses don't negatively impact the data
-
-                Vector512<T> yVec = Vector512.Create(y);
-
-                Vector512<T> beg = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
-                                                               yVec,
-                                                               Vector512.LoadUnsafe(ref zRef));
-                Vector512<T> end = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
-                                                               yVec,
-                                                               Vector512.LoadUnsafe(ref zRef, remainder - (uint)Vector512<T>.Count));
-
-                if (remainder > (uint)(Vector512<T>.Count * 8))
-                {
-                    // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful
-                    // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted.
-
-                    fixed (T* px = &xRef)
-                    fixed (T* pz = &zRef)
-                    fixed (T* pd = &dRef)
-                    {
-                        T* xPtr = px;
-                        T* zPtr = pz;
-                        T* dPtr = pd;
-
-                        // We need to the ensure the underlying data can be aligned and only align
-                        // it if it can. It is possible we have an unaligned ref, in which case we
-                        // can never achieve the required SIMD alignment.
-
-                        bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0;
-
-                        if (canAlign)
-                        {
-                            // Compute by how many elements we're misaligned and adjust the pointers accordingly
-                            //
-                            // Noting that we are only actually aligning dPtr. This is because unaligned stores
-                            // are more expensive than unaligned loads and aligning both is significantly more
-                            // complex.
-
-                            nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
-
-                            xPtr += misalignment;
-                            zPtr += misalignment;
-                            dPtr += misalignment;
-
-                            Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
-
-                            remainder -= misalignment;
-                        }
-
-                        Vector512<T> vector1;
-                        Vector512<T> vector2;
-                        Vector512<T> vector3;
-                        Vector512<T> vector4;
-
-                        if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign)
-                        {
-                            // This loop stores the data non-temporally, which benefits us when there
-                            // is a large amount of data involved as it avoids polluting the cache.
-
-                            while (remainder >= (uint)(Vector512<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 0)));
-                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 1)));
-                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 2)));
-                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 3)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 4)));
-                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 5)));
-                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 6)));
-                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 7)));
-
-                                vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
-                                vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
-                                vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
-                                vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<T>.Count * 8);
-                                zPtr += (uint)(Vector512<T>.Count * 8);
-                                dPtr += (uint)(Vector512<T>.Count * 8);
-
-                                remainder -= (uint)(Vector512<T>.Count * 8);
-                            }
-                        }
-                        else
-                        {
-                            while (remainder >= (uint)(Vector512<T>.Count * 8))
-                            {
-                                // We load, process, and store the first four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 0)));
-                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 1)));
-                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 2)));
-                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 3)));
-
-                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
-                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
-                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
-                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
-
-                                // We load, process, and store the next four vectors
-
-                                vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 4)));
-                                vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 5)));
-                                vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 6)));
-                                vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
-                                                                  yVec,
-                                                                  Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 7)));
-
-                                vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
-                                vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
-                                vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
-                                vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
-
-                                // We adjust the source and destination references, then update
-                                // the count of remaining elements to process.
-
-                                xPtr += (uint)(Vector512<T>.Count * 8);
-                                zPtr += (uint)(Vector512<T>.Count * 8);
-                                dPtr += (uint)(Vector512<T>.Count * 8);
-
-                                remainder -= (uint)(Vector512<T>.Count * 8);
-                            }
-                        }
-
-                        // Adjusting the refs here allows us to avoid pinning for very small inputs
-
-                        xRef = ref *xPtr;
-                        zRef = ref *zPtr;
-                        dRef = ref *dPtr;
-                    }
-                }
-
-                // Process the remaining [Count, Count * 8] elements via a jump table
-                //
-                // Unless the original length was an exact multiple of Count, then we'll
-                // end up reprocessing a couple elements in case 1 for end. We'll also
-                // potentially reprocess a few elements in case 0 for beg, to handle any
-                // data before the first aligned address.
-
-                nuint endIndex = remainder;
-                remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
-
-                switch (remainder / (uint)Vector512<T>.Count)
-                {
-                    case 8:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
-                                                                      yVec,
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 8)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
-                        goto case 7;
-                    }
-
-                    case 7:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
-                                                                      yVec,
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 7)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
-                        goto case 6;
-                    }
-
-                    case 6:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
-                                                                      yVec,
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 6)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
-                        goto case 5;
-                    }
-
-                    case 5:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
-                                                                      yVec,
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 5)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
-                        goto case 4;
-                    }
-
-                    case 4:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
-                                                                      yVec,
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 4)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
-                        goto case 3;
-                    }
-
-                    case 3:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
-                                                                      yVec,
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 3)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
-                                                                      yVec,
-                                                                      Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 2)));
-                        vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        // Store the last block, which includes any elements that wouldn't fill a full vector
-                        end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        // Store the first block, which includes any elements preceding the first aligned block
-                        beg.StoreUnsafe(ref dRefBeg);
-                        break;
-                    }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
-            {
-                if (sizeof(T) == 1)
-                {
-                    VectorizedSmall1(ref xRef, y, ref zRef, ref dRef, remainder);
-                }
-                else if (sizeof(T) == 2)
-                {
-                    VectorizedSmall2(ref xRef, y, ref zRef, ref dRef, remainder);
-                }
-                else if (sizeof(T) == 4)
-                {
-                    VectorizedSmall4(ref xRef, y, ref zRef, ref dRef, remainder);
-                }
-                else
-                {
-                    Debug.Assert(sizeof(T) == 8);
-                    VectorizedSmall8(ref xRef, y, ref zRef, ref dRef, remainder);
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall1(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 1);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 63:
-                    case 62:
-                    case 61:
-                    case 60:
-                    case 59:
-                    case 58:
-                    case 57:
-                    case 56:
-                    case 55:
-                    case 54:
-                    case 53:
-                    case 52:
-                    case 51:
-                    case 50:
-                    case 49:
-                    case 48:
-                    case 47:
-                    case 46:
-                    case 45:
-                    case 44:
-                    case 43:
-                    case 42:
-                    case 41:
-                    case 40:
-                    case 39:
-                    case 38:
-                    case 37:
-                    case 36:
-                    case 35:
-                    case 34:
-                    case 33:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> yVec = Vector256.Create(y);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   yVec,
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                   yVec,
-                                                                   Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 32:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.Create(y),
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> yVec = Vector128.Create(y);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   yVec,
-                                                                   Vector128.LoadUnsafe(ref zRef));
-                        Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                   yVec,
-                                                                   Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.Create(y),
-                                                                   Vector128.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 15:
-                        Unsafe.Add(ref dRef, 14) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 14),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 14));
-                        goto case 14;
-
-                    case 14:
-                        Unsafe.Add(ref dRef, 13) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 13),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 13));
-                        goto case 13;
-
-                    case 13:
-                        Unsafe.Add(ref dRef, 12) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 12),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 12));
-                        goto case 12;
-
-                    case 12:
-                        Unsafe.Add(ref dRef, 11) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 11),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 11));
-                        goto case 11;
-
-                    case 11:
-                        Unsafe.Add(ref dRef, 10) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 10),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 10));
-                        goto case 10;
-
-                    case 10:
-                        Unsafe.Add(ref dRef, 9) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 9),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 9));
-                        goto case 9;
-
-                    case 9:
-                        Unsafe.Add(ref dRef, 8) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 8),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 8));
-                        goto case 8;
-
-                    case 8:
-                        Unsafe.Add(ref dRef, 7) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 7),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 7));
-                        goto case 7;
-
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 6));
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 5));
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 4));
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 3));
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 2));
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 1));
-                        goto case 1;
-
-                    case 1:
-                        dRef = TTernaryOperator.Invoke(xRef, y, zRef);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall2(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 2);
-
-                switch (remainder)
-                {
-                    // Two Vector256's worth of data, with at least one element overlapping.
-                    case 31:
-                    case 30:
-                    case 29:
-                    case 28:
-                    case 27:
-                    case 26:
-                    case 25:
-                    case 24:
-                    case 23:
-                    case 22:
-                    case 21:
-                    case 20:
-                    case 19:
-                    case 18:
-                    case 17:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> yVec = Vector256.Create(y);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   yVec,
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                   yVec,
-                                                                   Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector256's worth of data.
-                    case 16:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.Create(y),
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Two Vector128's worth of data, with at least one element overlapping.
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> yVec = Vector128.Create(y);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   yVec,
-                                                                   Vector128.LoadUnsafe(ref zRef));
-                        Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                   yVec,
-                                                                   Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    // One Vector128's worth of data.
-                    case 8:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.Create(y),
-                                                                   Vector128.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    // Cases that are smaller than a single vector. No SIMD; just jump to the length and fall through each
-                    // case to unroll the whole processing.
-                    case 7:
-                        Unsafe.Add(ref dRef, 6) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 6),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 6));
-                        goto case 6;
-
-                    case 6:
-                        Unsafe.Add(ref dRef, 5) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 5),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 5));
-                        goto case 5;
-
-                    case 5:
-                        Unsafe.Add(ref dRef, 4) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 4),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 4));
-                        goto case 4;
-
-                    case 4:
-                        Unsafe.Add(ref dRef, 3) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 3),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 3));
-                        goto case 3;
-
-                    case 3:
-                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 2));
-                        goto case 2;
-
-                    case 2:
-                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 1));
-                        goto case 1;
-
-                    case 1:
-                        dRef = TTernaryOperator.Invoke(xRef, y, zRef);
-                        goto case 0;
-
-                    case 0:
-                        break;
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall4(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 4);
-
-                switch (remainder)
-                {
-                    case 15:
-                    case 14:
-                    case 13:
-                    case 12:
-                    case 11:
-                    case 10:
-                    case 9:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> yVec = Vector256.Create(y);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   yVec,
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                   yVec,
-                                                                   Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    case 8:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.Create(y),
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> yVec = Vector128.Create(y);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   yVec,
-                                                                   Vector128.LoadUnsafe(ref zRef));
-                        Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                   yVec,
-                                                                   Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.Create(y),
-                                                                   Vector128.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 2));
-                        goto case 2;
-                    }
-
-                    case 2:
-                    {
-                        Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1),
-                                                                          y,
-                                                                          Unsafe.Add(ref zRef, 1));
-                        goto case 1;
-                    }
-
-                    case 1:
-                    {
-                        dRef = TTernaryOperator.Invoke(xRef, y, zRef);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static void VectorizedSmall8(ref T xRef, T y, ref T zRef, ref T dRef, nuint remainder)
-            {
-                Debug.Assert(sizeof(T) == 8);
-
-                switch (remainder)
-                {
-                    case 7:
-                    case 6:
-                    case 5:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> yVec = Vector256.Create(y);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   yVec,
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        Vector256<T> end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256<T>.Count),
-                                                                   yVec,
-                                                                   Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector256<T>.Count);
-
-                        break;
-                    }
-
-                    case 4:
-                    {
-                        Debug.Assert(Vector256.IsHardwareAccelerated);
-
-                        Vector256<T> beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef),
-                                                                   Vector256.Create(y),
-                                                                   Vector256.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 3:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> yVec = Vector128.Create(y);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   yVec,
-                                                                   Vector128.LoadUnsafe(ref zRef));
-                        Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
-                                                                   yVec,
-                                                                   Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
-
-                        beg.StoreUnsafe(ref dRef);
-                        end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
-
-                        break;
-                    }
-
-                    case 2:
-                    {
-                        Debug.Assert(Vector128.IsHardwareAccelerated);
-
-                        Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
-                                                                   Vector128.Create(y),
-                                                                   Vector128.LoadUnsafe(ref zRef));
-                        beg.StoreUnsafe(ref dRef);
-
-                        break;
-                    }
-
-                    case 1:
-                    {
-                        dRef = TTernaryOperator.Invoke(xRef, y, zRef);
-                        goto case 0;
-                    }
-
-                    case 0:
-                    {
-                        break;
-                    }
-                }
-            }
-        }
-
-        /// <summary>Aggregates all of the elements in the <paramref name="x"/> into a single value.</summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TAggregate">Specifies the operation to be performed on each pair of values.</typeparam>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static T HorizontalAggregate<T, TAggregate>(Vector128<T> x) where TAggregate : struct, IBinaryOperator<T>
-        {
-            // We need to do log2(count) operations to compute the total sum
-
-            if (Unsafe.SizeOf<T>() == 1)
-            {
-                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsByte(), Vector128.Create((byte)8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)).As<byte, T>());
-                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsByte(), Vector128.Create((byte)4, 5, 6, 7, 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>());
-                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsByte(), Vector128.Create((byte)2, 3, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>());
-                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsByte(), Vector128.Create((byte)1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>());
-            }
-            else if (Unsafe.SizeOf<T>() == 2)
-            {
-                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsInt16(), Vector128.Create(4, 5, 6, 7, 0, 1, 2, 3)).As<short, T>());
-                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsInt16(), Vector128.Create(2, 3, 0, 1, 4, 5, 6, 7)).As<short, T>());
-                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsInt16(), Vector128.Create(1, 0, 2, 3, 4, 5, 6, 7)).As<short, T>());
-            }
-            else if (Unsafe.SizeOf<T>() == 4)
-            {
-                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsInt32(), Vector128.Create(2, 3, 0, 1)).As<int, T>());
-                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsInt32(), Vector128.Create(1, 0, 3, 2)).As<int, T>());
-            }
-            else if (Unsafe.SizeOf<T>() == 8)
-            {
-                x = TAggregate.Invoke(x, Vector128.Shuffle(x.AsInt64(), Vector128.Create(1, 0)).As<long, T>());
-            }
-            else
-            {
-                Debug.Fail("Should not be reachable");
-                throw new NotSupportedException();
-            }
-
-            return x.ToScalar();
-        }
-
-        /// <summary>Aggregates all of the elements in the <paramref name="x"/> into a single value.</summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TAggregate">Specifies the operation to be performed on each pair of values.</typeparam>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static T HorizontalAggregate<T, TAggregate>(Vector256<T> x) where TAggregate : struct, IBinaryOperator<T> =>
-            HorizontalAggregate<T, TAggregate>(TAggregate.Invoke(x.GetLower(), x.GetUpper()));
-
-        /// <summary>Aggregates all of the elements in the <paramref name="x"/> into a single value.</summary>
-        /// <typeparam name="T">The element type.</typeparam>
-        /// <typeparam name="TAggregate">Specifies the operation to be performed on each pair of values.</typeparam>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static T HorizontalAggregate<T, TAggregate>(Vector512<T> x) where TAggregate : struct, IBinaryOperator<T> =>
-            HorizontalAggregate<T, TAggregate>(TAggregate.Invoke(x.GetLower(), x.GetUpper()));
-
-        /// <summary>Gets whether the specified <see cref="float"/> is negative.</summary>
-        private static bool IsNegative<T>(T f) where T : INumberBase<T> => T.IsNegative(f);
-
-        /// <summary>Gets whether each specified <see cref="float"/> is negative.</summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector128<T> IsNegative<T>(Vector128<T> vector)
-        {
-            if (typeof(T) == typeof(float))
-            {
-                return Vector128.LessThan(vector.AsInt32(), Vector128<int>.Zero).As<int, T>();
-            }
-
-            if (typeof(T) == typeof(double))
-            {
-                return Vector128.LessThan(vector.AsInt64(), Vector128<long>.Zero).As<long, T>();
-            }
-
-            return Vector128.LessThan(vector, Vector128<T>.Zero);
-        }
-
-        /// <summary>Gets whether each specified <see cref="float"/> is negative.</summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector256<T> IsNegative<T>(Vector256<T> vector)
-        {
-            if (typeof(T) == typeof(float))
-            {
-                return Vector256.LessThan(vector.AsInt32(), Vector256<int>.Zero).As<int, T>();
-            }
-
-            if (typeof(T) == typeof(double))
-            {
-                return Vector256.LessThan(vector.AsInt64(), Vector256<long>.Zero).As<long, T>();
-            }
-
-            return Vector256.LessThan(vector, Vector256<T>.Zero);
-        }
-
-        /// <summary>Gets whether each specified <see cref="float"/> is negative.</summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector512<T> IsNegative<T>(Vector512<T> vector)
-        {
-            if (typeof(T) == typeof(float))
-            {
-                return Vector512.LessThan(vector.AsInt32(), Vector512<int>.Zero).As<int, T>();
-            }
-
-            if (typeof(T) == typeof(double))
-            {
-                return Vector512.LessThan(vector.AsInt64(), Vector512<long>.Zero).As<long, T>();
-            }
-
-            return Vector512.LessThan(vector, Vector512<T>.Zero);
-        }
-
-        /// <summary>
-        /// Gets a vector mask that will be all-ones-set for the last <paramref name="count"/> elements
-        /// and zero for all other elements.
-        /// </summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector128<T> CreateAlignmentMaskVector128<T>(int count)
-        {
-            if (Unsafe.SizeOf<T>() == 1)
-            {
-                return Vector128.LoadUnsafe(
-                    ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(AlignmentByteMask_64x65)),
-                    (uint)(count * 64));
-            }
-
-            if (Unsafe.SizeOf<T>() == 2)
-            {
-                return Vector128.LoadUnsafe(
-                    ref Unsafe.As<ushort, T>(ref MemoryMarshal.GetReference(AlignmentUInt16Mask_32x33)),
-                    (uint)(count * 32));
-            }
-
-            if (Unsafe.SizeOf<T>() == 4)
-            {
-                return Vector128.LoadUnsafe(
-                    ref Unsafe.As<uint, T>(ref MemoryMarshal.GetReference(AlignmentUInt32Mask_16x17)),
-                    (uint)(count * 16));
-            }
-
-            if (Unsafe.SizeOf<T>() == 8)
-            {
-                return Vector128.LoadUnsafe(
-                    ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(AlignmentUInt64Mask_8x9)),
-                    (uint)(count * 8));
-            }
-
-            Debug.Fail("Shouldn't get here");
-            throw new NotSupportedException();
-        }
-
-        /// <summary>
-        /// Gets a vector mask that will be all-ones-set for the last <paramref name="count"/> elements
-        /// and zero for all other elements.
-        /// </summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector256<T> CreateAlignmentMaskVector256<T>(int count)
-        {
-            if (Unsafe.SizeOf<T>() == 1)
-            {
-                return Vector256.LoadUnsafe(
-                    ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(AlignmentByteMask_64x65)),
-                    (uint)(count * 64));
-            }
-
-            if (Unsafe.SizeOf<T>() == 2)
-            {
-                return Vector256.LoadUnsafe(
-                    ref Unsafe.As<ushort, T>(ref MemoryMarshal.GetReference(AlignmentUInt16Mask_32x33)),
-                    (uint)(count * 32));
-            }
-
-            if (Unsafe.SizeOf<T>() == 4)
-            {
-                return Vector256.LoadUnsafe(
-                    ref Unsafe.As<uint, T>(ref MemoryMarshal.GetReference(AlignmentUInt32Mask_16x17)),
-                    (uint)(count * 16));
-            }
-
-            if (Unsafe.SizeOf<T>() == 8)
-            {
-                return Vector256.LoadUnsafe(
-                    ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(AlignmentUInt64Mask_8x9)),
-                    (uint)(count * 8));
-            }
-
-            Debug.Fail("Shouldn't get here");
-            throw new NotSupportedException();
-        }
-
-        /// <summary>
-        /// Gets a vector mask that will be all-ones-set for the last <paramref name="count"/> elements
-        /// and zero for all other elements.
-        /// </summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector512<T> CreateAlignmentMaskVector512<T>(int count)
-        {
-            if (Unsafe.SizeOf<T>() == 1)
-            {
-                return Vector512.LoadUnsafe(
-                    ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(AlignmentByteMask_64x65)),
-                    (uint)(count * 64));
-            }
-
-            if (Unsafe.SizeOf<T>() == 2)
-            {
-                return Vector512.LoadUnsafe(
-                    ref Unsafe.As<ushort, T>(ref MemoryMarshal.GetReference(AlignmentUInt16Mask_32x33)),
-                    (uint)(count * 32));
-            }
-
-            if (Unsafe.SizeOf<T>() == 4)
-            {
-                return Vector512.LoadUnsafe(
-                    ref Unsafe.As<uint, T>(ref MemoryMarshal.GetReference(AlignmentUInt32Mask_16x17)),
-                    (uint)(count * 16));
-            }
-
-            if (Unsafe.SizeOf<T>() == 8)
-            {
-                return Vector512.LoadUnsafe(
-                    ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(AlignmentUInt64Mask_8x9)),
-                    (uint)(count * 8));
-            }
-
-            Debug.Fail("Shouldn't get here - CreateAlignmentMaskVector512");
-            throw new NotSupportedException();
-        }
-
-        /// <summary>
-        /// Gets a vector mask that will be all-ones-set for the last <paramref name="count"/> elements
-        /// and zero for all other elements.
-        /// </summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector128<T> CreateRemainderMaskVector128<T>(int count)
-        {
-            if (Unsafe.SizeOf<T>() == 1)
-            {
-                return Vector128.LoadUnsafe(
-                    ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(RemainderByteMask_64x65)),
-                    (uint)(count * 64) + 48); // last 16 bytes in the row
-            }
-
-            if (Unsafe.SizeOf<T>() == 2)
-            {
-                return Vector128.LoadUnsafe(
-                    ref Unsafe.As<ushort, T>(ref MemoryMarshal.GetReference(RemainderUInt16Mask_32x33)),
-                    (uint)(count * 32) + 24); // last 8 shorts in the row
-            }
-
-            if (Unsafe.SizeOf<T>() == 4)
-            {
-                return Vector128.LoadUnsafe(
-                    ref Unsafe.As<uint, T>(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16x17)),
-                    (uint)(count * 16) + 12); // last 4 ints in the row
-            }
-
-            if (Unsafe.SizeOf<T>() == 8)
-            {
-                return Vector128.LoadUnsafe(
-                    ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9)),
-                    (uint)(count * 8) + 6); // last 2 longs in the row
-            }
-
-            Debug.Fail("Shouldn't get here - CreateRemainderMaskVector128");
-            throw new NotSupportedException();
-        }
-
-        /// <summary>
-        /// Gets a vector mask that will be all-ones-set for the last <paramref name="count"/> elements
-        /// and zero for all other elements.
-        /// </summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector256<T> CreateRemainderMaskVector256<T>(int count)
-        {
-            if (Unsafe.SizeOf<T>() == 1)
-            {
-                return Vector256.LoadUnsafe(
-                    ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(RemainderByteMask_64x65)),
-                    (uint)(count * 64) + 32); // last 32 bytes in the row
-            }
-
-            if (Unsafe.SizeOf<T>() == 2)
-            {
-                return Vector256.LoadUnsafe(
-                    ref Unsafe.As<ushort, T>(ref MemoryMarshal.GetReference(RemainderUInt16Mask_32x33)),
-                    (uint)(count * 32) + 16); // last 16 shorts in the row
-            }
-
-            if (Unsafe.SizeOf<T>() == 4)
-            {
-                return Vector256.LoadUnsafe(
-                    ref Unsafe.As<uint, T>(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16x17)),
-                    (uint)(count * 16) + 8); // last 8 ints in the row
-            }
-
-            if (Unsafe.SizeOf<T>() == 8)
-            {
-                return Vector256.LoadUnsafe(
-                    ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9)),
-                    (uint)(count * 8) + 4); // last 4 longs in the row
-            }
-
-            Debug.Fail("Shouldn't get here - CreateRemainderMaskVector256");
-            throw new NotSupportedException();
-        }
-
-        /// <summary>
-        /// Gets a vector mask that will be all-ones-set for the last <paramref name="count"/> elements
-        /// and zero for all other elements.
-        /// </summary>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector512<T> CreateRemainderMaskVector512<T>(int count)
-        {
-            if (Unsafe.SizeOf<T>() == 1)
-            {
-                return Vector512.LoadUnsafe(
-                    ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(RemainderByteMask_64x65)),
-                    (uint)(count * 64));
-            }
-
-            if (Unsafe.SizeOf<T>() == 2)
-            {
-                return Vector512.LoadUnsafe(
-                    ref Unsafe.As<ushort, T>(ref MemoryMarshal.GetReference(RemainderUInt16Mask_32x33)),
-                    (uint)(count * 32));
-            }
-
-            if (Unsafe.SizeOf<T>() == 4)
-            {
-                return Vector512.LoadUnsafe(
-                    ref Unsafe.As<uint, T>(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16x17)),
-                    (uint)(count * 16));
-            }
-
-            if (Unsafe.SizeOf<T>() == 8)
-            {
-                return Vector512.LoadUnsafe(
-                    ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9)),
-                    (uint)(count * 8));
-            }
-
-            Debug.Fail("Shouldn't get here - CreateRemainderMaskVector512");
-            throw new NotSupportedException();
-        }
-
-        // TODO: The uses of these ApplyScalar methods are all as part of operators when handling edge cases (NaN, Infinity, really large inputs, etc.)
-        // Currently, these edge cases are not handled in a vectorized way and instead fall back to scalar processing. We can look into
-        // handling those in a vectorized manner as well.
-
-        private static Vector128<float> ApplyScalar<TOperator>(Vector128<float> floats) where TOperator : IUnaryOperator<float, float> =>
-            Vector128.Create(TOperator.Invoke(floats[0]), TOperator.Invoke(floats[1]), TOperator.Invoke(floats[2]), TOperator.Invoke(floats[3]));
-
-        private static Vector256<float> ApplyScalar<TOperator>(Vector256<float> floats) where TOperator : IUnaryOperator<float, float> =>
-            Vector256.Create(ApplyScalar<TOperator>(floats.GetLower()), ApplyScalar<TOperator>(floats.GetUpper()));
-
-        private static Vector512<float> ApplyScalar<TOperator>(Vector512<float> floats) where TOperator : IUnaryOperator<float, float> =>
-            Vector512.Create(ApplyScalar<TOperator>(floats.GetLower()), ApplyScalar<TOperator>(floats.GetUpper()));
-
-        private static Vector128<double> ApplyScalar<TOperator>(Vector128<double> doubles) where TOperator : IUnaryOperator<double, double> =>
-            Vector128.Create(TOperator.Invoke(doubles[0]), TOperator.Invoke(doubles[1]));
-
-        private static Vector256<double> ApplyScalar<TOperator>(Vector256<double> doubles) where TOperator : IUnaryOperator<double, double> =>
-            Vector256.Create(ApplyScalar<TOperator>(doubles.GetLower()), ApplyScalar<TOperator>(doubles.GetUpper()));
-
-        private static Vector512<double> ApplyScalar<TOperator>(Vector512<double> doubles) where TOperator : IUnaryOperator<double, double> =>
-            Vector512.Create(ApplyScalar<TOperator>(doubles.GetLower()), ApplyScalar<TOperator>(doubles.GetUpper()));
-
-        /// <summary>Creates a span of <typeparamref name="TTo"/> from a <typeparamref name="TTo"/> when they're the same type.</summary>
-        private static unsafe ReadOnlySpan<TTo> Rename<TFrom, TTo>(ReadOnlySpan<TFrom> span)
-        {
-            Debug.Assert(sizeof(TFrom) == sizeof(TTo));
-            return MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<TFrom, TTo>(ref MemoryMarshal.GetReference(span)), span.Length);
-        }
-
-        /// <summary>Creates a span of <typeparamref name="TTo"/> from a <typeparamref name="TTo"/> when they're the same type.</summary>
-        private static unsafe Span<TTo> Rename<TFrom, TTo>(Span<TFrom> span)
-        {
-            Debug.Assert(sizeof(TFrom) == sizeof(TTo));
-            return MemoryMarshal.CreateSpan(ref Unsafe.As<TFrom, TTo>(ref MemoryMarshal.GetReference(span)), span.Length);
-        }
-
-        /// <summary>Gets whether <typeparamref name="T"/> is <see cref="uint"/> or <see cref="nuint"/> if in a 32-bit process.</summary>
-        private static bool IsUInt32Like<T>() => typeof(T) == typeof(uint) || (IntPtr.Size == 4 && typeof(T) == typeof(nuint));
-
-        /// <summary>Gets whether <typeparamref name="T"/> is <see cref="int"/> or <see cref="nint"/> if in a 32-bit process.</summary>
-        private static bool IsInt32Like<T>() => typeof(T) == typeof(int) || (IntPtr.Size == 4 && typeof(T) == typeof(nint));
-
-        /// <summary>Gets whether <typeparamref name="T"/> is <see cref="ulong"/> or <see cref="nuint"/> if in a 64-bit process.</summary>
-        private static bool IsUInt64Like<T>() => typeof(T) == typeof(ulong) || (IntPtr.Size == 8 && typeof(T) == typeof(nuint));
-
-        /// <summary>Gets whether <typeparamref name="T"/> is <see cref="long"/> or <see cref="nint"/> if in a 64-bit process.</summary>
-        private static bool IsInt64Like<T>() => typeof(T) == typeof(long) || (IntPtr.Size == 8 && typeof(T) == typeof(nint));
-
-        /// <summary>x + y</summary>
-        internal readonly struct AddOperator<T> : IAggregationOperator<T> where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T>
-        {
-            public static bool Vectorizable => true;
-
-            public static T Invoke(T x, T y) => x + y;
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x + y;
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x + y;
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x + y;
-
-            public static T Invoke(Vector128<T> x) => Vector128.Sum(x);
-            public static T Invoke(Vector256<T> x) => Vector256.Sum(x);
-            public static T Invoke(Vector512<T> x) => Vector512.Sum(x);
-
-            public static T IdentityValue => T.AdditiveIdentity;
-        }
-
-        private readonly struct InvertedBinaryOperator<TOperator, T> : IBinaryOperator<T>
-            where TOperator : IBinaryOperator<T>
-        {
-            public static bool Vectorizable => TOperator.Vectorizable;
-            public static T Invoke(T x, T y) => TOperator.Invoke(y, x);
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => TOperator.Invoke(y, x);
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => TOperator.Invoke(y, x);
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => TOperator.Invoke(y, x);
-        }
-
-        /// <summary>x - y</summary>
-        internal readonly struct SubtractOperator<T> : IBinaryOperator<T> where T : ISubtractionOperators<T, T, T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x, T y) => x - y;
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x - y;
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x - y;
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x - y;
-        }
-
-        /// <summary>(x - y) * (x - y)</summary>
-        internal readonly struct SubtractSquaredOperator<T> : IBinaryOperator<T> where T : ISubtractionOperators<T, T, T>, IMultiplyOperators<T, T, T>
-        {
-            public static bool Vectorizable => true;
-
-            public static T Invoke(T x, T y)
-            {
-                T tmp = x - y;
-                return tmp * tmp;
-            }
-
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
-            {
-                Vector128<T> tmp = x - y;
-                return tmp * tmp;
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
-            {
-                Vector256<T> tmp = x - y;
-                return tmp * tmp;
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
-            {
-                Vector512<T> tmp = x - y;
-                return tmp * tmp;
-            }
-        }
-
-        /// <summary>x * y</summary>
-        internal readonly struct MultiplyOperator<T> : IAggregationOperator<T> where T : IMultiplyOperators<T, T, T>, IMultiplicativeIdentity<T, T>
-        {
-            public static bool Vectorizable => true;
-
-            public static T Invoke(T x, T y) => x * y;
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x * y;
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x * y;
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x * y;
-
-            public static T Invoke(Vector128<T> x) => HorizontalAggregate<T, MultiplyOperator<T>>(x);
-            public static T Invoke(Vector256<T> x) => HorizontalAggregate<T, MultiplyOperator<T>>(x);
-            public static T Invoke(Vector512<T> x) => HorizontalAggregate<T, MultiplyOperator<T>>(x);
-
-            public static T IdentityValue => T.MultiplicativeIdentity;
-        }
-
-        /// <summary>x / y</summary>
-        internal readonly struct DivideOperator<T> : IBinaryOperator<T> where T : IDivisionOperators<T, T, T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x, T y) => x / y;
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x / y;
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x / y;
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x / y;
-        }
-
-        /// <summary>T.Ieee754Remainder(x, y)</summary>
-        internal readonly struct Ieee754RemainderOperator<T> : IBinaryOperator<T> where T : IFloatingPointIeee754<T>
-        {
-            public static bool Vectorizable => false;
-            public static T Invoke(T x, T y) => T.Ieee754Remainder(x, y);
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => throw new NotSupportedException();
-        }
-
-        // Ieee754Remainder
-
-        internal readonly struct ReciprocalOperator<T> : IUnaryOperator<T, T> where T : IFloatingPoint<T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x) => T.One / x;
-            public static Vector128<T> Invoke(Vector128<T> x) => Vector128<T>.One / x;
-            public static Vector256<T> Invoke(Vector256<T> x) => Vector256<T>.One / x;
-            public static Vector512<T> Invoke(Vector512<T> x) => Vector512<T>.One / x;
-        }
-
-        private readonly struct ReciprocalSqrtOperator<T> : IUnaryOperator<T, T> where T : IFloatingPointIeee754<T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x) => T.One / T.Sqrt(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => Vector128<T>.One / Vector128.Sqrt(x);
-            public static Vector256<T> Invoke(Vector256<T> x) => Vector256<T>.One / Vector256.Sqrt(x);
-            public static Vector512<T> Invoke(Vector512<T> x) => Vector512<T>.One / Vector512.Sqrt(x);
-        }
-
-        private readonly struct ReciprocalEstimateOperator<T> : IUnaryOperator<T, T> where T : IFloatingPointIeee754<T>
-        {
-            public static bool Vectorizable => true;
-
-            public static T Invoke(T x) => T.ReciprocalEstimate(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                if (Sse.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return Sse.Reciprocal(x.AsSingle()).As<float, T>();
-                }
-
-                if (AdvSimd.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return AdvSimd.ReciprocalEstimate(x.AsSingle()).As<float, T>();
-                }
-
-                if (AdvSimd.Arm64.IsSupported)
-                {
-                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.ReciprocalEstimate(x.AsDouble()).As<double, T>();
-                }
-
-                return Vector128<T>.One / x;
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                if (Avx.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return Avx.Reciprocal(x.AsSingle()).As<float, T>();
-                }
-
-                return Vector256<T>.One / x;
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                if (Avx512F.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return Avx512F.Reciprocal14(x.AsSingle()).As<float, T>();
-                    if (typeof(T) == typeof(double)) return Avx512F.Reciprocal14(x.AsDouble()).As<double, T>();
-                }
-
-                return Vector512<T>.One / x;
-            }
-        }
-
-        private readonly struct ReciprocalSqrtEstimateOperator<T> : IUnaryOperator<T, T> where T : IFloatingPointIeee754<T>
-        {
-            public static bool Vectorizable => true;
-
-            public static T Invoke(T x) => T.ReciprocalSqrtEstimate(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                if (Sse.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return Sse.ReciprocalSqrt(x.AsSingle()).As<float, T>();
-                }
-
-                if (AdvSimd.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return AdvSimd.ReciprocalSquareRootEstimate(x.AsSingle()).As<float, T>();
-                }
-
-                if (AdvSimd.Arm64.IsSupported)
-                {
-                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.ReciprocalSquareRootEstimate(x.AsDouble()).As<double, T>();
-                }
-
-                return Vector128<T>.One / Vector128.Sqrt(x);
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                if (Avx.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return Avx.ReciprocalSqrt(x.AsSingle()).As<float, T>();
-                }
-
-                return Vector256<T>.One / Vector256.Sqrt(x);
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                if (Avx512F.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return Avx512F.ReciprocalSqrt14(x.AsSingle()).As<float, T>();
-                    if (typeof(T) == typeof(double)) return Avx512F.ReciprocalSqrt14(x.AsDouble()).As<double, T>();
-                }
-
-                return Vector512<T>.One / Vector512.Sqrt(x);
-            }
-        }
-
-        /// <summary>x &amp; y</summary>
-        internal readonly struct BitwiseAndOperator<T> : IBinaryOperator<T> where T : IBitwiseOperators<T, T, T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x, T y) => x & y;
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x & y;
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x & y;
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x & y;
-        }
-
-        /// <summary>x | y</summary>
-        internal readonly struct BitwiseOrOperator<T> : IBinaryOperator<T> where T : IBitwiseOperators<T, T, T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x, T y) => x | y;
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x | y;
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x | y;
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x | y;
-        }
-
-        /// <summary>x ^ y</summary>
-        internal readonly struct XorOperator<T> : IBinaryOperator<T> where T : IBitwiseOperators<T, T, T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x, T y) => x ^ y;
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x ^ y;
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x ^ y;
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x ^ y;
-        }
-
-        /// <summary>~x</summary>
-        internal readonly struct OnesComplementOperator<T> : IUnaryOperator<T, T> where T : IBitwiseOperators<T, T, T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x) => ~x;
-            public static Vector128<T> Invoke(Vector128<T> x) => ~x;
-            public static Vector256<T> Invoke(Vector256<T> x) => ~x;
-            public static Vector512<T> Invoke(Vector512<T> x) => ~x;
-        }
-
-        /// <summary>T.Max(x, y) (but NaNs may not be propagated)</summary>
-        internal readonly struct MaxOperator<T> : IAggregationOperator<T> where T : INumber<T>
-        {
-            public static bool Vectorizable => true;
-
-            public static T Invoke(T x, T y)
-            {
-                if (typeof(T) == typeof(Half) || typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    return x == y ?
-                        (IsNegative(x) ? y : x) :
-                        (y > x ? y : x);
-                }
-
-                return T.Max(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
-            {
-                if (AdvSimd.IsSupported)
-                {
-                    if (typeof(T) == typeof(byte)) return AdvSimd.Max(x.AsByte(), y.AsByte()).As<byte, T>();
-                    if (typeof(T) == typeof(sbyte)) return AdvSimd.Max(x.AsSByte(), y.AsSByte()).As<sbyte, T>();
-                    if (typeof(T) == typeof(short)) return AdvSimd.Max(x.AsInt16(), y.AsInt16()).As<short, T>();
-                    if (typeof(T) == typeof(ushort)) return AdvSimd.Max(x.AsUInt16(), y.AsUInt16()).As<ushort, T>();
-                    if (typeof(T) == typeof(int)) return AdvSimd.Max(x.AsInt32(), y.AsInt32()).As<int, T>();
-                    if (typeof(T) == typeof(uint)) return AdvSimd.Max(x.AsUInt32(), y.AsUInt32()).As<uint, T>();
-                    if (typeof(T) == typeof(float)) return AdvSimd.Max(x.AsSingle(), y.AsSingle()).As<float, T>();
-                }
-
-                if (AdvSimd.Arm64.IsSupported)
-                {
-                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.Max(x.AsDouble(), y.AsDouble()).As<double, T>();
-                }
-
-                if (typeof(T) == typeof(float))
-                {
-                    return
-                        Vector128.ConditionalSelect(Vector128.Equals(x, y),
-                            Vector128.ConditionalSelect(IsNegative(x.AsSingle()).As<float, T>(), y, x),
-                            Vector128.Max(x, y));
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    return
-                        Vector128.ConditionalSelect(Vector128.Equals(x, y),
-                            Vector128.ConditionalSelect(IsNegative(x.AsDouble()).As<double, T>(), y, x),
-                            Vector128.Max(x, y));
-                }
-
-                return Vector128.Max(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return
-                        Vector256.ConditionalSelect(Vector256.Equals(x, y),
-                            Vector256.ConditionalSelect(IsNegative(x.AsSingle()).As<float, T>(), y, x),
-                            Vector256.Max(x, y));
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    return
-                        Vector256.ConditionalSelect(Vector256.Equals(x, y),
-                            Vector256.ConditionalSelect(IsNegative(x.AsDouble()).As<double, T>(), y, x),
-                            Vector256.Max(x, y));
-                }
-
-                return Vector256.Max(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return
-                        Vector512.ConditionalSelect(Vector512.Equals(x, y),
-                            Vector512.ConditionalSelect(IsNegative(x.AsSingle()).As<float, T>(), y, x),
-                            Vector512.Max(x, y));
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    return
-                        Vector512.ConditionalSelect(Vector512.Equals(x, y),
-                            Vector512.ConditionalSelect(IsNegative(x.AsDouble()).As<double, T>(), y, x),
-                            Vector512.Max(x, y));
-                }
-
-                return Vector512.Max(x, y);
-            }
-
-            public static T Invoke(Vector128<T> x) => HorizontalAggregate<T, MaxOperator<T>>(x);
-            public static T Invoke(Vector256<T> x) => HorizontalAggregate<T, MaxOperator<T>>(x);
-            public static T Invoke(Vector512<T> x) => HorizontalAggregate<T, MaxOperator<T>>(x);
-        }
-
-        private interface IIndexOfOperator<T> where T : INumber<T>
-        {
-            static abstract int Invoke(ref T result, T current, int resultIndex, int currentIndex);
-            static abstract void Invoke(ref Vector128<T> result, Vector128<T> current, ref Vector128<T> resultIndex, Vector128<T> currentIndex);
-            static abstract void Invoke(ref Vector256<T> result, Vector256<T> current, ref Vector256<T> resultIndex, Vector256<T> currentIndex);
-            static abstract void Invoke(ref Vector512<T> result, Vector512<T> current, ref Vector512<T> resultIndex, Vector512<T> currentIndex);
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static int IndexOfFinalAggregate<T, TIndexOfOperator>(Vector128<T> result, Vector128<T> resultIndex)
-            where T : INumber<T>
-            where TIndexOfOperator : struct, IIndexOfOperator<T>
-        {
-            Vector128<T> tmpResult;
-            Vector128<T> tmpIndex;
-
-            if (sizeof(T) == 8)
-            {
-                // Compare 0 with 1
-                tmpResult = Vector128.Shuffle(result.AsInt64(), Vector128.Create(1, 0)).As<long, T>();
-                tmpIndex = Vector128.Shuffle(resultIndex.AsInt64(), Vector128.Create(1, 0)).As<long, T>();
-                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
-
-                // Return 0
-                return (int)resultIndex.As<T, long>().ToScalar();
-            }
-
-            if (sizeof(T) == 4)
-            {
-                // Compare 0,1 with 2,3
-                tmpResult = Vector128.Shuffle(result.AsInt32(), Vector128.Create(2, 3, 0, 1)).As<int, T>();
-                tmpIndex = Vector128.Shuffle(resultIndex.AsInt32(), Vector128.Create(2, 3, 0, 1)).As<int, T>();
-                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
-
-                // Compare 0 with 1
-                tmpResult = Vector128.Shuffle(result.AsInt32(), Vector128.Create(1, 0, 3, 2)).As<int, T>();
-                tmpIndex = Vector128.Shuffle(resultIndex.AsInt32(), Vector128.Create(1, 0, 3, 2)).As<int, T>();
-                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
-
-                // Return 0
-                return resultIndex.As<T, int>().ToScalar();
-            }
-
-            if (sizeof(T) == 2)
-            {
-                // Compare 0,1,2,3 with 4,5,6,7
-                tmpResult = Vector128.Shuffle(result.AsInt16(), Vector128.Create(4, 5, 6, 7, 0, 1, 2, 3)).As<short, T>();
-                tmpIndex = Vector128.Shuffle(resultIndex.AsInt16(), Vector128.Create(4, 5, 6, 7, 0, 1, 2, 3)).As<short, T>();
-                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
-
-                // Compare 0,1 with 2,3
-                tmpResult = Vector128.Shuffle(result.AsInt16(), Vector128.Create(2, 3, 0, 1, 4, 5, 6, 7)).As<short, T>();
-                tmpIndex = Vector128.Shuffle(resultIndex.AsInt16(), Vector128.Create(2, 3, 0, 1, 4, 5, 6, 7)).As<short, T>();
-                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
-
-                // Compare 0 with 1
-                tmpResult = Vector128.Shuffle(result.AsInt16(), Vector128.Create(1, 0, 2, 3, 4, 5, 6, 7)).As<short, T>();
-                tmpIndex = Vector128.Shuffle(resultIndex.AsInt16(), Vector128.Create(1, 0, 2, 3, 4, 5, 6, 7)).As<short, T>();
-                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
-
-                // Return 0
-                return resultIndex.As<T, short>().ToScalar();
-            }
-
-            if (sizeof(T) == 1)
-            {
-                // Compare 0,1,2,3,4,5,6,7 with 8,9,10,11,12,13,14,15
-                tmpResult = Vector128.Shuffle(result.AsByte(), Vector128.Create((byte)8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)).As<byte, T>();
-                tmpIndex = Vector128.Shuffle(resultIndex.AsByte(), Vector128.Create((byte)8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)).As<byte, T>();
-                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
-
-                // Compare 0,1,2,3 with 4,5,6,7
-                tmpResult = Vector128.Shuffle(result.AsByte(), Vector128.Create((byte)4, 5, 6, 7, 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>();
-                tmpIndex = Vector128.Shuffle(resultIndex.AsByte(), Vector128.Create((byte)4, 5, 6, 7, 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>();
-                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
-
-                // Compare 0,1 with 2,3
-                tmpResult = Vector128.Shuffle(result.AsByte(), Vector128.Create((byte)2, 3, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>();
-                tmpIndex = Vector128.Shuffle(resultIndex.AsByte(), Vector128.Create((byte)2, 3, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>();
-                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
-
-                // Compare 0 with 1
-                tmpResult = Vector128.Shuffle(result.AsByte(), Vector128.Create((byte)1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>();
-                tmpIndex = Vector128.Shuffle(resultIndex.AsByte(), Vector128.Create((byte)1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)).As<byte, T>();
-                TIndexOfOperator.Invoke(ref result, tmpResult, ref resultIndex, tmpIndex);
-
-                // Return 0
-                return resultIndex.As<T, byte>().ToScalar();
-            }
-
-            throw new NotSupportedException();
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static int IndexOfFinalAggregate<T, TIndexOfOperator>(Vector256<T> result, Vector256<T> resultIndex)
-            where T : INumber<T>
-            where TIndexOfOperator : struct, IIndexOfOperator<T>
-        {
-            // Min the upper/lower halves of the Vector256
-            Vector128<T> resultLower = result.GetLower();
-            Vector128<T> indexLower = resultIndex.GetLower();
-
-            TIndexOfOperator.Invoke(ref resultLower, result.GetUpper(), ref indexLower, resultIndex.GetUpper());
-            return IndexOfFinalAggregate<T, TIndexOfOperator>(resultLower, indexLower);
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static int IndexOfFinalAggregate<T, TIndexOfOperator>(Vector512<T> result, Vector512<T> resultIndex)
-            where T : INumber<T>
-            where TIndexOfOperator : struct, IIndexOfOperator<T>
-        {
-            Vector256<T> resultLower = result.GetLower();
-            Vector256<T> indexLower = resultIndex.GetLower();
-
-            TIndexOfOperator.Invoke(ref resultLower, result.GetUpper(), ref indexLower, resultIndex.GetUpper());
-            return IndexOfFinalAggregate<T, TIndexOfOperator>(resultLower, indexLower);
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector128<T> IndexLessThan<T>(Vector128<T> indices1, Vector128<T> indices2) =>
-            sizeof(T) == sizeof(long) ? Vector128.LessThan(indices1.AsInt64(), indices2.AsInt64()).As<long, T>() :
-            sizeof(T) == sizeof(int) ? Vector128.LessThan(indices1.AsInt32(), indices2.AsInt32()).As<int, T>() :
-            sizeof(T) == sizeof(short) ? Vector128.LessThan(indices1.AsInt16(), indices2.AsInt16()).As<short, T>() :
-            Vector128.LessThan(indices1.AsByte(), indices2.AsByte()).As<byte, T>();
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector256<T> IndexLessThan<T>(Vector256<T> indices1, Vector256<T> indices2) =>
-            sizeof(T) == sizeof(long) ? Vector256.LessThan(indices1.AsInt64(), indices2.AsInt64()).As<long, T>() :
-            sizeof(T) == sizeof(int) ? Vector256.LessThan(indices1.AsInt32(), indices2.AsInt32()).As<int, T>() :
-            sizeof(T) == sizeof(short) ? Vector256.LessThan(indices1.AsInt16(), indices2.AsInt16()).As<short, T>() :
-            Vector256.LessThan(indices1.AsByte(), indices2.AsByte()).As<byte, T>();
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector512<T> IndexLessThan<T>(Vector512<T> indices1, Vector512<T> indices2) =>
-            sizeof(T) == sizeof(long) ? Vector512.LessThan(indices1.AsInt64(), indices2.AsInt64()).As<long, T>() :
-            sizeof(T) == sizeof(int) ? Vector512.LessThan(indices1.AsInt32(), indices2.AsInt32()).As<int, T>() :
-            sizeof(T) == sizeof(short) ? Vector512.LessThan(indices1.AsInt16(), indices2.AsInt16()).As<short, T>() :
-            Vector512.LessThan(indices1.AsByte(), indices2.AsByte()).As<byte, T>();
-
-        /// <summary>Returns the index of MathF.Max(x, y)</summary>
-        internal readonly struct IndexOfMaxOperator<T> : IIndexOfOperator<T> where T : INumber<T>
-        {
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static void Invoke(ref Vector128<T> result, Vector128<T> current, ref Vector128<T> resultIndex, Vector128<T> currentIndex)
-            {
-                Vector128<T> useResult = Vector128.GreaterThan(result, current);
-                Vector128<T> equalMask = Vector128.Equals(result, current);
-
-                if (equalMask != Vector128<T>.Zero)
-                {
-                    Vector128<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current));
-                        Vector128<T> currentNegative = IsNegative(current);
-                        Vector128<T> sameSign = Vector128.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
-                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative);
-                    }
-                    else
-                    {
-                        useResult |= equalMask & lessThanIndexMask;
-                    }
-                }
-
-                result = ElementWiseSelect(useResult, result, current);
-                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static void Invoke(ref Vector256<T> result, Vector256<T> current, ref Vector256<T> resultIndex, Vector256<T> currentIndex)
-            {
-                Vector256<T> useResult = Vector256.GreaterThan(result, current);
-                Vector256<T> equalMask = Vector256.Equals(result, current);
-
-                if (equalMask != Vector256<T>.Zero)
-                {
-                    Vector256<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current));
-                        Vector256<T> currentNegative = IsNegative(current);
-                        Vector256<T> sameSign = Vector256.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
-                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative);
-                    }
-                    else
-                    {
-                        useResult |= equalMask & lessThanIndexMask;
-                    }
-                }
-
-                result = ElementWiseSelect(useResult, result, current);
-                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static void Invoke(ref Vector512<T> result, Vector512<T> current, ref Vector512<T> resultIndex, Vector512<T> currentIndex)
-            {
-                Vector512<T> useResult = Vector512.GreaterThan(result, current);
-                Vector512<T> equalMask = Vector512.Equals(result, current);
-
-                if (equalMask != Vector512<T>.Zero)
-                {
-                    Vector512<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current));
-                        Vector512<T> currentNegative = IsNegative(current);
-                        Vector512<T> sameSign = Vector512.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
-                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative);
-                    }
-                    else
-                    {
-                        useResult |= equalMask & lessThanIndexMask;
-                    }
-                }
-
-                result = ElementWiseSelect(useResult, result, current);
-                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static int Invoke(ref T result, T current, int resultIndex, int currentIndex)
-            {
-                if (result == current)
-                {
-                    bool resultNegative = IsNegative(result);
-                    if ((resultNegative == IsNegative(current)) ? (currentIndex < resultIndex) : resultNegative)
-                    {
-                        result = current;
-                        return currentIndex;
-                    }
-                }
-                else if (current > result)
-                {
-                    result = current;
-                    return currentIndex;
-                }
-
-                return resultIndex;
-            }
-        }
-
-        internal readonly struct IndexOfMaxMagnitudeOperator<T> : IIndexOfOperator<T> where T : INumber<T>
-        {
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static void Invoke(ref Vector128<T> result, Vector128<T> current, ref Vector128<T> resultIndex, Vector128<T> currentIndex)
-            {
-                Vector128<T> resultMag = Vector128.Abs(result), currentMag = Vector128.Abs(current);
-                Vector128<T> useResult = Vector128.GreaterThan(resultMag, currentMag);
-                Vector128<T> equalMask = Vector128.Equals(resultMag, currentMag);
-
-                if (equalMask != Vector128<T>.Zero)
-                {
-                    Vector128<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current));
-                        Vector128<T> currentNegative = IsNegative(current);
-                        Vector128<T> sameSign = Vector128.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
-                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative);
-                    }
-                    else
-                    {
-                        useResult |= equalMask & lessThanIndexMask;
-                    }
-                }
-
-                result = ElementWiseSelect(useResult, result, current);
-                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static void Invoke(ref Vector256<T> result, Vector256<T> current, ref Vector256<T> resultIndex, Vector256<T> currentIndex)
-            {
-                Vector256<T> resultMag = Vector256.Abs(result), currentMag = Vector256.Abs(current);
-                Vector256<T> useResult = Vector256.GreaterThan(resultMag, currentMag);
-                Vector256<T> equalMask = Vector256.Equals(resultMag, currentMag);
-
-                if (equalMask != Vector256<T>.Zero)
-                {
-                    Vector256<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current));
-                        Vector256<T> currentNegative = IsNegative(current);
-                        Vector256<T> sameSign = Vector256.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
-                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative);
-                    }
-                    else
-                    {
-                        useResult |= equalMask & lessThanIndexMask;
-                    }
-                }
-
-                result = ElementWiseSelect(useResult, result, current);
-                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static void Invoke(ref Vector512<T> result, Vector512<T> current, ref Vector512<T> resultIndex, Vector512<T> currentIndex)
-            {
-                Vector512<T> resultMag = Vector512.Abs(result), currentMag = Vector512.Abs(current);
-                Vector512<T> useResult = Vector512.GreaterThan(resultMag, currentMag);
-                Vector512<T> equalMask = Vector512.Equals(resultMag, currentMag);
-
-                if (equalMask != Vector512<T>.Zero)
-                {
-                    Vector512<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(current));
-                        Vector512<T> currentNegative = IsNegative(current);
-                        Vector512<T> sameSign = Vector512.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
-                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, currentNegative);
-                    }
-                    else
-                    {
-                        useResult |= equalMask & lessThanIndexMask;
-                    }
-                }
-
-                result = ElementWiseSelect(useResult, result, current);
-                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static int Invoke(ref T result, T current, int resultIndex, int currentIndex)
-            {
-                T resultMag = T.Abs(result);
-                T currentMag = T.Abs(current);
-
-                if (resultMag == currentMag)
-                {
-                    bool resultNegative = IsNegative(result);
-                    if ((resultNegative == IsNegative(current)) ? (currentIndex < resultIndex) : resultNegative)
-                    {
-                        result = current;
-                        return currentIndex;
-                    }
-                }
-                else if (currentMag > resultMag)
-                {
-                    result = current;
-                    return currentIndex;
-                }
-
-                return resultIndex;
-            }
-        }
-
-        /// <summary>Returns the index of MathF.Min(x, y)</summary>
-        internal readonly struct IndexOfMinOperator<T> : IIndexOfOperator<T> where T : INumber<T>
-        {
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static void Invoke(ref Vector128<T> result, Vector128<T> current, ref Vector128<T> resultIndex, Vector128<T> currentIndex)
-            {
-                Vector128<T> useResult = Vector128.LessThan(result, current);
-                Vector128<T> equalMask = Vector128.Equals(result, current);
-
-                if (equalMask != Vector128<T>.Zero)
-                {
-                    Vector128<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result));
-                        Vector128<T> resultNegative = IsNegative(result);
-                        Vector128<T> sameSign = Vector128.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As<int, T>();
-                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative);
-                    }
-                    else
-                    {
-                        useResult |= equalMask & lessThanIndexMask;
-                    }
-                }
-
-                result = ElementWiseSelect(useResult, result, current);
-                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static void Invoke(ref Vector256<T> result, Vector256<T> current, ref Vector256<T> resultIndex, Vector256<T> currentIndex)
-            {
-                Vector256<T> useResult = Vector256.LessThan(result, current);
-                Vector256<T> equalMask = Vector256.Equals(result, current);
-
-                if (equalMask != Vector256<T>.Zero)
-                {
-                    Vector256<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result));
-                        Vector256<T> resultNegative = IsNegative(result);
-                        Vector256<T> sameSign = Vector256.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As<int, T>();
-                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative);
-                    }
-                    else
-                    {
-                        useResult |= equalMask & lessThanIndexMask;
-                    }
-                }
-
-                result = ElementWiseSelect(useResult, result, current);
-                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static void Invoke(ref Vector512<T> result, Vector512<T> current, ref Vector512<T> resultIndex, Vector512<T> currentIndex)
-            {
-                Vector512<T> useResult = Vector512.LessThan(result, current);
-                Vector512<T> equalMask = Vector512.Equals(result, current);
-
-                if (equalMask != Vector512<T>.Zero)
-                {
-                    Vector512<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result));
-                        Vector512<T> resultNegative = IsNegative(result);
-                        Vector512<T> sameSign = Vector512.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As<int, T>();
-                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative);
-                    }
-                    else
-                    {
-                        useResult |= equalMask & lessThanIndexMask;
-                    }
-                }
-
-                result = ElementWiseSelect(useResult, result, current);
-                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static int Invoke(ref T result, T current, int resultIndex, int currentIndex)
-            {
-                if (result == current)
-                {
-                    bool currentNegative = IsNegative(current);
-                    if ((IsNegative(result) == currentNegative) ? (currentIndex < resultIndex) : currentNegative)
-                    {
-                        result = current;
-                        return currentIndex;
-                    }
-                }
-                else if (current < result)
-                {
-                    result = current;
-                    return currentIndex;
-                }
-
-                return resultIndex;
-            }
-        }
-
-        internal readonly struct IndexOfMinMagnitudeOperator<T> : IIndexOfOperator<T> where T : INumber<T>
-        {
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static void Invoke(ref Vector128<T> result, Vector128<T> current, ref Vector128<T> resultIndex, Vector128<T> currentIndex)
-            {
-                Vector128<T> resultMag = Vector128.Abs(result), currentMag = Vector128.Abs(current);
-                Vector128<T> useResult = Vector128.LessThan(resultMag, currentMag);
-                Vector128<T> equalMask = Vector128.Equals(resultMag, currentMag);
-
-                if (equalMask != Vector128<T>.Zero)
-                {
-                    Vector128<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result));
-                        Vector128<T> resultNegative = IsNegative(result);
-                        Vector128<T> sameSign = Vector128.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As<int, T>();
-                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative);
-                    }
-                    else
-                    {
-                        useResult |= equalMask & lessThanIndexMask;
-                    }
-                }
-
-                result = ElementWiseSelect(useResult, result, current);
-                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static void Invoke(ref Vector256<T> result, Vector256<T> current, ref Vector256<T> resultIndex, Vector256<T> currentIndex)
-            {
-                Vector256<T> resultMag = Vector256.Abs(result), currentMag = Vector256.Abs(current);
-                Vector256<T> useResult = Vector256.LessThan(resultMag, currentMag);
-                Vector256<T> equalMask = Vector256.Equals(resultMag, currentMag);
-
-                if (equalMask != Vector256<T>.Zero)
-                {
-                    Vector256<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result));
-                        Vector256<T> resultNegative = IsNegative(result);
-                        Vector256<T> sameSign = Vector256.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As<int, T>();
-                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative);
-                    }
-                    else
-                    {
-                        useResult |= equalMask & lessThanIndexMask;
-                    }
-                }
-
-                result = ElementWiseSelect(useResult, result, current);
-                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static void Invoke(ref Vector512<T> result, Vector512<T> current, ref Vector512<T> resultIndex, Vector512<T> currentIndex)
-            {
-                Vector512<T> resultMag = Vector512.Abs(result), currentMag = Vector512.Abs(current);
-                Vector512<T> useResult = Vector512.LessThan(resultMag, currentMag);
-                Vector512<T> equalMask = Vector512.Equals(resultMag, currentMag);
-
-                if (equalMask != Vector512<T>.Zero)
-                {
-                    Vector512<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
-                    if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                    {
-                        // bool useResult = equal && ((IsNegative(result) == IsNegative(current)) ? (resultIndex < currentIndex) : IsNegative(result));
-                        Vector512<T> resultNegative = IsNegative(result);
-                        Vector512<T> sameSign = Vector512.Equals(resultNegative.AsInt32(), IsNegative(current).AsInt32()).As<int, T>();
-                        useResult |= equalMask & ElementWiseSelect(sameSign, lessThanIndexMask, resultNegative);
-                    }
-                    else
-                    {
-                        useResult |= equalMask & lessThanIndexMask;
-                    }
-                }
-
-                result = ElementWiseSelect(useResult, result, current);
-                resultIndex = ElementWiseSelect(useResult, resultIndex, currentIndex);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static int Invoke(ref T result, T current, int resultIndex, int currentIndex)
-            {
-                T resultMag = T.Abs(result);
-                T currentMag = T.Abs(current);
-
-                if (resultMag == currentMag)
-                {
-                    bool currentNegative = IsNegative(current);
-                    if ((IsNegative(result) == currentNegative) ? (currentIndex < resultIndex) : currentNegative)
-                    {
-                        result = current;
-                        return currentIndex;
-                    }
-                }
-                else if (currentMag < resultMag)
-                {
-                    result = current;
-                    return currentIndex;
-                }
-
-                return resultIndex;
-            }
-        }
-
-        /// <summary>Max(x, y)</summary>
-        internal readonly struct MaxPropagateNaNOperator<T> : IBinaryOperator<T>
-             where T : INumber<T>
-        {
-            public static bool Vectorizable => true;
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static T Invoke(T x, T y) => T.Max(x, y);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
-            {
-                if (AdvSimd.IsSupported)
-                {
-                    if (typeof(T) == typeof(byte)) return AdvSimd.Max(x.AsByte(), y.AsByte()).As<byte, T>();
-                    if (typeof(T) == typeof(sbyte)) return AdvSimd.Max(x.AsSByte(), y.AsSByte()).As<sbyte, T>();
-                    if (typeof(T) == typeof(ushort)) return AdvSimd.Max(x.AsUInt16(), y.AsUInt16()).As<ushort, T>();
-                    if (typeof(T) == typeof(short)) return AdvSimd.Max(x.AsInt16(), y.AsInt16()).As<short, T>();
-                    if (typeof(T) == typeof(uint)) return AdvSimd.Max(x.AsUInt32(), y.AsUInt32()).As<uint, T>();
-                    if (typeof(T) == typeof(int)) return AdvSimd.Max(x.AsInt32(), y.AsInt32()).As<int, T>();
-                    if (typeof(T) == typeof(float)) return AdvSimd.Max(x.AsSingle(), y.AsSingle()).As<float, T>();
-                }
-
-                if (AdvSimd.Arm64.IsSupported)
-                {
-                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.Max(x.AsDouble(), y.AsDouble()).As<double, T>();
-                }
-
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    return
-                        Vector128.ConditionalSelect(Vector128.Equals(x, x),
-                            Vector128.ConditionalSelect(Vector128.Equals(y, y),
-                                Vector128.ConditionalSelect(Vector128.Equals(x, y),
-                                    Vector128.ConditionalSelect(IsNegative(x), y, x),
-                                    Vector128.Max(x, y)),
-                                y),
-                            x);
-                }
-
-                return Vector128.Max(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
-            {
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    return
-                        Vector256.ConditionalSelect(Vector256.Equals(x, x),
-                            Vector256.ConditionalSelect(Vector256.Equals(y, y),
-                                Vector256.ConditionalSelect(Vector256.Equals(x, y),
-                                    Vector256.ConditionalSelect(IsNegative(x), y, x),
-                                    Vector256.Max(x, y)),
-                                y),
-                            x);
-                }
-
-                return Vector256.Max(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
-            {
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    return
-                        Vector512.ConditionalSelect(Vector512.Equals(x, x),
-                            Vector512.ConditionalSelect(Vector512.Equals(y, y),
-                                Vector512.ConditionalSelect(Vector512.Equals(x, y),
-                                    Vector512.ConditionalSelect(IsNegative(x), y, x),
-                                    Vector512.Max(x, y)),
-                                y),
-                            x);
-                }
-
-                return Vector512.Max(x, y);
-            }
-        }
-
-        /// <summary>Operator to get x or y based on which has the larger MathF.Abs (but NaNs may not be propagated)</summary>
-        internal readonly struct MaxMagnitudeOperator<T> : IAggregationOperator<T>
-            where T : INumberBase<T>
-        {
-            public static bool Vectorizable => true;
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static T Invoke(T x, T y) => T.MaxMagnitude(x, y);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
-            {
-                Vector128<T> xMag = Vector128.Abs(x), yMag = Vector128.Abs(y);
-
-                Vector128<T> result =
-                    Vector128.ConditionalSelect(Vector128.Equals(xMag, yMag),
-                        Vector128.ConditionalSelect(IsNegative(x), y, x),
-                        Vector128.ConditionalSelect(Vector128.GreaterThan(xMag, yMag), x, y));
-
-                // Handle minimum signed value that should have the largest magnitude
-                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
-                {
-                    Vector128<T> negativeMagnitudeX = Vector128.LessThan(xMag, Vector128<T>.Zero);
-                    Vector128<T> negativeMagnitudeY = Vector128.LessThan(yMag, Vector128<T>.Zero);
-                    result = Vector128.ConditionalSelect(negativeMagnitudeX,
-                        x,
-                        Vector128.ConditionalSelect(negativeMagnitudeY,
-                            y,
-                            result));
-                }
-
-                return result;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
-            {
-                Vector256<T> xMag = Vector256.Abs(x), yMag = Vector256.Abs(y);
-
-                Vector256<T> result =
-                    Vector256.ConditionalSelect(Vector256.Equals(xMag, yMag),
-                        Vector256.ConditionalSelect(IsNegative(x), y, x),
-                        Vector256.ConditionalSelect(Vector256.GreaterThan(xMag, yMag), x, y));
-
-                // Handle minimum signed value that should have the largest magnitude
-                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
-                {
-                    Vector256<T> negativeMagnitudeX = Vector256.LessThan(xMag, Vector256<T>.Zero);
-                    Vector256<T> negativeMagnitudeY = Vector256.LessThan(yMag, Vector256<T>.Zero);
-                    result = Vector256.ConditionalSelect(negativeMagnitudeX,
-                        x,
-                        Vector256.ConditionalSelect(negativeMagnitudeY,
-                            y,
-                            result));
-                }
-
-                return result;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
-            {
-                Vector512<T> xMag = Vector512.Abs(x), yMag = Vector512.Abs(y);
-
-                Vector512<T> result =
-                    Vector512.ConditionalSelect(Vector512.Equals(xMag, yMag),
-                        Vector512.ConditionalSelect(IsNegative(x), y, x),
-                        Vector512.ConditionalSelect(Vector512.GreaterThan(xMag, yMag), x, y));
-
-                // Handle minimum signed value that should have the largest magnitude
-                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
-                {
-                    Vector512<T> negativeMagnitudeX = Vector512.LessThan(xMag, Vector512<T>.Zero);
-                    Vector512<T> negativeMagnitudeY = Vector512.LessThan(yMag, Vector512<T>.Zero);
-                    result = Vector512.ConditionalSelect(negativeMagnitudeX,
-                        x,
-                        Vector512.ConditionalSelect(negativeMagnitudeY,
-                            y,
-                            result));
-                }
-
-                return result;
-            }
-
-            public static T Invoke(Vector128<T> x) => HorizontalAggregate<T, MaxMagnitudeOperator<T>>(x);
-            public static T Invoke(Vector256<T> x) => HorizontalAggregate<T, MaxMagnitudeOperator<T>>(x);
-            public static T Invoke(Vector512<T> x) => HorizontalAggregate<T, MaxMagnitudeOperator<T>>(x);
-        }
-
-        /// <summary>Operator to get x or y based on which has the larger MathF.Abs</summary>
-        internal readonly struct MaxMagnitudePropagateNaNOperator<T> : IBinaryOperator<T>
-            where T : INumberBase<T>
-        {
-            public static bool Vectorizable => true;
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static T Invoke(T x, T y) => T.MaxMagnitude(x, y);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
-            {
-                // Handle NaNs
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    Vector128<T> xMag = Vector128.Abs(x), yMag = Vector128.Abs(y);
-                    return
-                        Vector128.ConditionalSelect(Vector128.Equals(x, x),
-                            Vector128.ConditionalSelect(Vector128.Equals(y, y),
-                                Vector128.ConditionalSelect(Vector128.Equals(yMag, xMag),
-                                    Vector128.ConditionalSelect(IsNegative(x), y, x),
-                                    Vector128.ConditionalSelect(Vector128.GreaterThan(yMag, xMag), y, x)),
-                                y),
-                            x);
-                }
-
-                return MaxMagnitudeOperator<T>.Invoke(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
-            {
-                // Handle NaNs
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    Vector256<T> xMag = Vector256.Abs(x), yMag = Vector256.Abs(y);
-                    return
-                        Vector256.ConditionalSelect(Vector256.Equals(x, x),
-                            Vector256.ConditionalSelect(Vector256.Equals(y, y),
-                                Vector256.ConditionalSelect(Vector256.Equals(xMag, yMag),
-                                    Vector256.ConditionalSelect(IsNegative(x), y, x),
-                                    Vector256.ConditionalSelect(Vector256.GreaterThan(xMag, yMag), x, y)),
-                                y),
-                            x);
-                }
-
-                return MaxMagnitudeOperator<T>.Invoke(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
-            {
-                // Handle NaNs
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    Vector512<T> xMag = Vector512.Abs(x), yMag = Vector512.Abs(y);
-                    return
-                        Vector512.ConditionalSelect(Vector512.Equals(x, x),
-                        Vector512.ConditionalSelect(Vector512.Equals(y, y),
-                            Vector512.ConditionalSelect(Vector512.Equals(xMag, yMag),
-                                Vector512.ConditionalSelect(IsNegative(x), y, x),
-                                Vector512.ConditionalSelect(Vector512.GreaterThan(xMag, yMag), x, y)),
-                            y),
-                        x);
-                }
-
-                return MaxMagnitudeOperator<T>.Invoke(x, y);
-            }
-        }
-
-        /// <summary>T.Min(x, y) (but NaNs may not be propagated)</summary>
-        internal readonly struct MinOperator<T> : IAggregationOperator<T>
-            where T : INumber<T>
-        {
-            public static bool Vectorizable => true;
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static T Invoke(T x, T y)
-            {
-                if (typeof(T) == typeof(Half) || typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    return x == y ?
-                        (IsNegative(y) ? y : x) :
-                        (y < x ? y : x);
-                }
-
-                return T.Min(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
-            {
-                if (AdvSimd.IsSupported)
-                {
-                    if (typeof(T) == typeof(byte)) return AdvSimd.Min(x.AsByte(), y.AsByte()).As<byte, T>();
-                    if (typeof(T) == typeof(sbyte)) return AdvSimd.Min(x.AsSByte(), y.AsSByte()).As<sbyte, T>();
-                    if (typeof(T) == typeof(short)) return AdvSimd.Min(x.AsInt16(), y.AsInt16()).As<short, T>();
-                    if (typeof(T) == typeof(ushort)) return AdvSimd.Min(x.AsUInt16(), y.AsUInt16()).As<ushort, T>();
-                    if (typeof(T) == typeof(int)) return AdvSimd.Min(x.AsInt32(), y.AsInt32()).As<int, T>();
-                    if (typeof(T) == typeof(uint)) return AdvSimd.Min(x.AsUInt32(), y.AsUInt32()).As<uint, T>();
-                    if (typeof(T) == typeof(float)) return AdvSimd.Min(x.AsSingle(), y.AsSingle()).As<float, T>();
-                }
-
-                if (AdvSimd.Arm64.IsSupported)
-                {
-                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.Min(x.AsDouble(), y.AsDouble()).As<double, T>();
-                }
-
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    return
-                        Vector128.ConditionalSelect(Vector128.Equals(x, y),
-                            Vector128.ConditionalSelect(IsNegative(y), y, x),
-                            Vector128.Min(x, y));
-                }
-
-                return Vector128.Min(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
-            {
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    return Vector256.ConditionalSelect(Vector256.Equals(x, y),
-                        Vector256.ConditionalSelect(IsNegative(y), y, x),
-                        Vector256.Min(x, y));
-                }
-
-                return Vector256.Min(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
-            {
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    return Vector512.ConditionalSelect(Vector512.Equals(x, y),
-                        Vector512.ConditionalSelect(IsNegative(y), y, x),
-                        Vector512.Min(x, y));
-                }
-
-                return Vector512.Min(x, y);
-            }
-
-            public static T Invoke(Vector128<T> x) => HorizontalAggregate<T, MinOperator<T>>(x);
-            public static T Invoke(Vector256<T> x) => HorizontalAggregate<T, MinOperator<T>>(x);
-            public static T Invoke(Vector512<T> x) => HorizontalAggregate<T, MinOperator<T>>(x);
-        }
-
-        /// <summary>T.Min(x, y)</summary>
-        internal readonly struct MinPropagateNaNOperator<T> : IBinaryOperator<T>
-            where T : INumber<T>
-        {
-            public static bool Vectorizable => true;
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static T Invoke(T x, T y) => T.Min(x, y);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
-            {
-                if (AdvSimd.IsSupported)
-                {
-                    if (typeof(T) == typeof(byte)) return AdvSimd.Min(x.AsByte(), y.AsByte()).As<byte, T>();
-                    if (typeof(T) == typeof(sbyte)) return AdvSimd.Min(x.AsSByte(), y.AsSByte()).As<sbyte, T>();
-                    if (typeof(T) == typeof(short)) return AdvSimd.Min(x.AsInt16(), y.AsInt16()).As<short, T>();
-                    if (typeof(T) == typeof(ushort)) return AdvSimd.Min(x.AsUInt16(), y.AsUInt16()).As<ushort, T>();
-                    if (typeof(T) == typeof(int)) return AdvSimd.Min(x.AsInt32(), y.AsInt32()).As<int, T>();
-                    if (typeof(T) == typeof(uint)) return AdvSimd.Min(x.AsUInt32(), y.AsUInt32()).As<uint, T>();
-                    if (typeof(T) == typeof(float)) return AdvSimd.Min(x.AsSingle(), y.AsSingle()).As<float, T>();
-                }
-
-                if (AdvSimd.Arm64.IsSupported)
-                {
-                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.Min(x.AsDouble(), y.AsDouble()).As<double, T>();
-                }
-
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    return
-                        Vector128.ConditionalSelect(Vector128.Equals(x, x),
-                            Vector128.ConditionalSelect(Vector128.Equals(y, y),
-                                Vector128.ConditionalSelect(Vector128.Equals(x, y),
-                                    Vector128.ConditionalSelect(IsNegative(x), x, y),
-                                    Vector128.Min(x, y)),
-                                y),
-                            x);
-                }
-
-                return Vector128.Min(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
-            {
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    return
-                        Vector256.ConditionalSelect(Vector256.Equals(x, x),
-                            Vector256.ConditionalSelect(Vector256.Equals(y, y),
-                                Vector256.ConditionalSelect(Vector256.Equals(x, y),
-                                    Vector256.ConditionalSelect(IsNegative(x), x, y),
-                                    Vector256.Min(x, y)),
-                                y),
-                            x);
-                }
-
-                return Vector256.Min(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
-            {
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    return
-                        Vector512.ConditionalSelect(Vector512.Equals(x, x),
-                            Vector512.ConditionalSelect(Vector512.Equals(y, y),
-                                Vector512.ConditionalSelect(Vector512.Equals(x, y),
-                                    Vector512.ConditionalSelect(IsNegative(x), x, y),
-                                    Vector512.Min(x, y)),
-                                y),
-                            x);
-                }
-
-                return Vector512.Min(x, y);
-            }
-        }
-
-        /// <summary>Operator to get x or y based on which has the smaller MathF.Abs (but NaNs may not be propagated)</summary>
-        internal readonly struct MinMagnitudeOperator<T> : IAggregationOperator<T>
-            where T : INumberBase<T>
-        {
-            public static bool Vectorizable => true;
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static T Invoke(T x, T y) => T.MinMagnitude(x, y);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
-            {
-                Vector128<T> xMag = Vector128.Abs(x), yMag = Vector128.Abs(y);
-
-                Vector128<T> result =
-                    Vector128.ConditionalSelect(Vector128.Equals(yMag, xMag),
-                        Vector128.ConditionalSelect(IsNegative(y), y, x),
-                        Vector128.ConditionalSelect(Vector128.LessThan(yMag, xMag), y, x));
-
-                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
-                {
-                    Vector128<T> negativeMagnitudeX = Vector128.LessThan(xMag, Vector128<T>.Zero);
-                    Vector128<T> negativeMagnitudeY = Vector128.LessThan(yMag, Vector128<T>.Zero);
-                    result = Vector128.ConditionalSelect(negativeMagnitudeX,
-                        y,
-                        Vector128.ConditionalSelect(negativeMagnitudeY,
-                            x,
-                            result));
-                }
-
-                return result;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
-            {
-                Vector256<T> xMag = Vector256.Abs(x), yMag = Vector256.Abs(y);
-
-                Vector256<T> result =
-                    Vector256.ConditionalSelect(Vector256.Equals(yMag, xMag),
-                        Vector256.ConditionalSelect(IsNegative(y), y, x),
-                        Vector256.ConditionalSelect(Vector256.LessThan(yMag, xMag), y, x));
-
-                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
-                {
-                    Vector256<T> negativeMagnitudeX = Vector256.LessThan(xMag, Vector256<T>.Zero);
-                    Vector256<T> negativeMagnitudeY = Vector256.LessThan(yMag, Vector256<T>.Zero);
-                    result = Vector256.ConditionalSelect(negativeMagnitudeX,
-                        y,
-                        Vector256.ConditionalSelect(negativeMagnitudeY,
-                            x,
-                            result));
-                }
-
-                return result;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
-            {
-                Vector512<T> xMag = Vector512.Abs(x), yMag = Vector512.Abs(y);
-
-                Vector512<T> result =
-                    Vector512.ConditionalSelect(Vector512.Equals(yMag, xMag),
-                        Vector512.ConditionalSelect(IsNegative(y), y, x),
-                        Vector512.ConditionalSelect(Vector512.LessThan(yMag, xMag), y, x));
-
-                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
-                {
-                    Vector512<T> negativeMagnitudeX = Vector512.LessThan(xMag, Vector512<T>.Zero);
-                    Vector512<T> negativeMagnitudeY = Vector512.LessThan(yMag, Vector512<T>.Zero);
-                    result = Vector512.ConditionalSelect(negativeMagnitudeX,
-                        y,
-                        Vector512.ConditionalSelect(negativeMagnitudeY,
-                            x,
-                            result));
-                }
-
-                return result;
-            }
-
-            public static T Invoke(Vector128<T> x) => HorizontalAggregate<T, MinMagnitudeOperator<T>>(x);
-            public static T Invoke(Vector256<T> x) => HorizontalAggregate<T, MinMagnitudeOperator<T>>(x);
-            public static T Invoke(Vector512<T> x) => HorizontalAggregate<T, MinMagnitudeOperator<T>>(x);
-        }
-
-        /// <summary>Operator to get x or y based on which has the smaller MathF.Abs</summary>
-        internal readonly struct MinMagnitudePropagateNaNOperator<T> : IBinaryOperator<T>
-            where T : INumberBase<T>
-        {
-            public static bool Vectorizable => true;
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static T Invoke(T x, T y) => T.MinMagnitude(x, y);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
-            {
-                // Handle NaNs
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    Vector128<T> xMag = Vector128.Abs(x), yMag = Vector128.Abs(y);
-                    return
-                        Vector128.ConditionalSelect(Vector128.Equals(x, x),
-                            Vector128.ConditionalSelect(Vector128.Equals(y, y),
-                                Vector128.ConditionalSelect(Vector128.Equals(yMag, xMag),
-                                    Vector128.ConditionalSelect(IsNegative(x), x, y),
-                                    Vector128.ConditionalSelect(Vector128.LessThan(xMag, yMag), x, y)),
-                                y),
-                            x);
-                }
-
-                return MinMagnitudeOperator<T>.Invoke(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
-            {
-                // Handle NaNs
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    Vector256<T> xMag = Vector256.Abs(x), yMag = Vector256.Abs(y);
-                    return
-                        Vector256.ConditionalSelect(Vector256.Equals(x, x),
-                            Vector256.ConditionalSelect(Vector256.Equals(y, y),
-                                Vector256.ConditionalSelect(Vector256.Equals(yMag, xMag),
-                                    Vector256.ConditionalSelect(IsNegative(x), x, y),
-                                    Vector256.ConditionalSelect(Vector256.LessThan(xMag, yMag), x, y)),
-                                y),
-                            x);
-                }
-
-                return MinMagnitudeOperator<T>.Invoke(x, y);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
-            {
-                // Handle NaNs
-                if (typeof(T) == typeof(float) || typeof(T) == typeof(double))
-                {
-                    Vector512<T> xMag = Vector512.Abs(x), yMag = Vector512.Abs(y);
-                    return
-                        Vector512.ConditionalSelect(Vector512.Equals(x, x),
-                            Vector512.ConditionalSelect(Vector512.Equals(y, y),
-                                Vector512.ConditionalSelect(Vector512.Equals(yMag, xMag),
-                                    Vector512.ConditionalSelect(IsNegative(x), x, y),
-                                    Vector512.ConditionalSelect(Vector512.LessThan(xMag, yMag), x, y)),
-                                y),
-                            x);
-                }
-
-                return MinMagnitudeOperator<T>.Invoke(x, y);
-            }
-        }
-
-        /// <summary>-x</summary>
-        internal readonly struct NegateOperator<T> : IUnaryOperator<T, T> where T : IUnaryNegationOperators<T, T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x) => -x;
-            public static Vector128<T> Invoke(Vector128<T> x) => -x;
-            public static Vector256<T> Invoke(Vector256<T> x) => -x;
-            public static Vector512<T> Invoke(Vector512<T> x) => -x;
-        }
-
-        /// <summary>(x + y) * z</summary>
-        internal readonly struct AddMultiplyOperator<T> : ITernaryOperator<T> where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T>
-        {
-            public static T Invoke(T x, T y, T z) => (x + y) * z;
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z) => (x + y) * z;
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> z) => (x + y) * z;
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z) => (x + y) * z;
-        }
-
-        /// <summary>(x * y) + z</summary>
-        internal readonly struct MultiplyAddOperator<T> : ITernaryOperator<T> where T : IAdditionOperators<T, T, T>, IMultiplyOperators<T, T, T>
-        {
-            public static T Invoke(T x, T y, T z) => (x * y) + z;
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z) => (x * y) + z;
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> z) => (x * y) + z;
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z) => (x * y) + z;
-        }
-
-        /// <summary>(x * y) + z</summary>
-        internal readonly struct MultiplyAddEstimateOperator<T> : ITernaryOperator<T> where T : INumberBase<T>
-        {
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static T Invoke(T x, T y, T z)
-            {
-                // TODO https://github.com/dotnet/runtime/issues/98053: Use T.MultiplyAddEstimate when it's available.
-
-                if (Fma.IsSupported || AdvSimd.IsSupported)
-                {
-                    if (typeof(T) == typeof(Half))
-                    {
-                        Half result = Half.FusedMultiplyAdd(Unsafe.As<T, Half>(ref x), Unsafe.As<T, Half>(ref y), Unsafe.As<T, Half>(ref z));
-                        return Unsafe.As<Half, T>(ref result);
-                    }
-
-                    if (typeof(T) == typeof(float))
-                    {
-                        float result = float.FusedMultiplyAdd(Unsafe.As<T, float>(ref x), Unsafe.As<T, float>(ref y), Unsafe.As<T, float>(ref z));
-                        return Unsafe.As<float, T>(ref result);
-                    }
-
-                    if (typeof(T) == typeof(double))
-                    {
-                        double result = double.FusedMultiplyAdd(Unsafe.As<T, double>(ref x), Unsafe.As<T, double>(ref y), Unsafe.As<T, double>(ref z));
-                        return Unsafe.As<double, T>(ref result);
-                    }
-                }
-
-                return (x * y) + z;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z)
-            {
-                if (Fma.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return Fma.MultiplyAdd(x.AsSingle(), y.AsSingle(), z.AsSingle()).As<float, T>();
-                    if (typeof(T) == typeof(double)) return Fma.MultiplyAdd(x.AsDouble(), y.AsDouble(), z.AsDouble()).As<double, T>();
-                }
-
-                if (AdvSimd.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return AdvSimd.FusedMultiplyAdd(z.AsSingle(), x.AsSingle(), y.AsSingle()).As<float, T>();
-                }
-
-                if (AdvSimd.Arm64.IsSupported)
-                {
-                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.FusedMultiplyAdd(z.AsDouble(), x.AsDouble(), y.AsDouble()).As<double, T>();
-                }
-
-                return (x * y) + z;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> z)
-            {
-                if (Fma.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return Fma.MultiplyAdd(x.AsSingle(), y.AsSingle(), z.AsSingle()).As<float, T>();
-                    if (typeof(T) == typeof(double)) return Fma.MultiplyAdd(x.AsDouble(), y.AsDouble(), z.AsDouble()).As<double, T>();
-                }
-
-                return (x * y) + z;
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z)
-            {
-                if (Avx512F.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return Avx512F.FusedMultiplyAdd(x.AsSingle(), y.AsSingle(), z.AsSingle()).As<float, T>();
-                    if (typeof(T) == typeof(double)) return Avx512F.FusedMultiplyAdd(x.AsDouble(), y.AsDouble(), z.AsDouble()).As<double, T>();
-                }
-
-                return (x * y) + z;
-            }
-        }
-
-        /// <summary>(x * y) + z</summary>
-        internal readonly struct FusedMultiplyAddOperator<T> : ITernaryOperator<T> where T : IFloatingPointIeee754<T>
-        {
-            public static T Invoke(T x, T y, T z) => T.FusedMultiplyAdd(x, y, z);
-
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z)
-            {
-                if (Fma.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return Fma.MultiplyAdd(x.AsSingle(), y.AsSingle(), z.AsSingle()).As<float, T>();
-                    if (typeof(T) == typeof(double)) return Fma.MultiplyAdd(x.AsDouble(), y.AsDouble(), z.AsDouble()).As<double, T>();
-                }
-
-                if (AdvSimd.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return AdvSimd.FusedMultiplyAdd(z.AsSingle(), x.AsSingle(), y.AsSingle()).As<float, T>();
-                }
-
-                if (AdvSimd.Arm64.IsSupported)
-                {
-                    if (typeof(T) == typeof(double)) return AdvSimd.Arm64.FusedMultiplyAdd(z.AsDouble(), x.AsDouble(), y.AsDouble()).As<double, T>();
-                }
-
-                if (typeof(T) == typeof(float))
-                {
-                    Vector128<float> xFloats = x.AsSingle();
-                    Vector128<float> yFloats = y.AsSingle();
-                    Vector128<float> zFloats = z.AsSingle();
-                    return Vector128.Create(
-                        float.FusedMultiplyAdd(xFloats[0], yFloats[0], zFloats[0]),
-                        float.FusedMultiplyAdd(xFloats[1], yFloats[1], zFloats[1]),
-                        float.FusedMultiplyAdd(xFloats[2], yFloats[2], zFloats[2]),
-                        float.FusedMultiplyAdd(xFloats[3], yFloats[3], zFloats[3])).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    Vector128<double> xDoubles = x.AsDouble();
-                    Vector128<double> yDoubles = y.AsDouble();
-                    Vector128<double> zDoubles = z.AsDouble();
-                    return Vector128.Create(
-                        double.FusedMultiplyAdd(xDoubles[0], yDoubles[0], zDoubles[0]),
-                        double.FusedMultiplyAdd(xDoubles[1], yDoubles[1], zDoubles[1])).As<double, T>();
-                }
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> z)
-            {
-                if (Fma.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return Fma.MultiplyAdd(x.AsSingle(), y.AsSingle(), z.AsSingle()).As<float, T>();
-                    if (typeof(T) == typeof(double)) return Fma.MultiplyAdd(x.AsDouble(), y.AsDouble(), z.AsDouble()).As<double, T>();
-                }
-
-                return Vector256.Create(
-                    Invoke(x.GetLower(), y.GetLower(), z.GetLower()),
-                    Invoke(x.GetUpper(), y.GetUpper(), z.GetUpper()));
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z)
-            {
-                if (Avx512F.IsSupported)
-                {
-                    if (typeof(T) == typeof(float)) return Avx512F.FusedMultiplyAdd(x.AsSingle(), y.AsSingle(), z.AsSingle()).As<float, T>();
-                    if (typeof(T) == typeof(double)) return Avx512F.FusedMultiplyAdd(x.AsDouble(), y.AsDouble(), z.AsDouble()).As<double, T>();
-                }
-
-                return Vector512.Create(
-                    Invoke(x.GetLower(), y.GetLower(), z.GetLower()),
-                    Invoke(x.GetUpper(), y.GetUpper(), z.GetUpper()));
-            }
-        }
-
-        /// <summary>(x * (1 - z)) + (y * z)</summary>
-        internal readonly struct LerpOperator<T> : ITernaryOperator<T> where T : IFloatingPointIeee754<T>
-        {
-            public static T Invoke(T x, T y, T amount) => T.Lerp(x, y, amount);
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> amount) => (x * (Vector128<T>.One - amount)) + (y * amount);
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> amount) => (x * (Vector256<T>.One - amount)) + (y * amount);
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> amount) => (x * (Vector512<T>.One - amount)) + (y * amount);
-        }
-
-        /// <summary>x</summary>
-        internal readonly struct IdentityOperator<T> : IUnaryOperator<T, T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x) => x;
-            public static Vector128<T> Invoke(Vector128<T> x) => x;
-            public static Vector256<T> Invoke(Vector256<T> x) => x;
-            public static Vector512<T> Invoke(Vector512<T> x) => x;
-        }
-
-        /// <summary>x * x</summary>
-        internal readonly struct SquaredOperator<T> : IUnaryOperator<T, T> where T : IMultiplyOperators<T, T, T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x) => x * x;
-            public static Vector128<T> Invoke(Vector128<T> x) => x * x;
-            public static Vector256<T> Invoke(Vector256<T> x) => x * x;
-            public static Vector512<T> Invoke(Vector512<T> x) => x * x;
-        }
-
-        /// <summary>T.Abs(x)</summary>
-        internal readonly struct AbsoluteOperator<T> : IUnaryOperator<T, T> where T : INumberBase<T>
-        {
-            public static bool Vectorizable => true;
-
-            public static T Invoke(T x) => T.Abs(x);
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                if (typeof(T) == typeof(sbyte) ||
-                    typeof(T) == typeof(short) ||
-                    typeof(T) == typeof(int) ||
-                    typeof(T) == typeof(long) ||
-                    typeof(T) == typeof(nint))
-                {
-                    // Handle signed integers specially, in order to throw if any attempt is made to
-                    // take the absolute value of the minimum value of the type, which doesn't have
-                    // a positive absolute value representation.
-                    Vector128<T> abs = Vector128.ConditionalSelect(Vector128.LessThan(x, Vector128<T>.Zero), -x, x);
-                    if (Vector128.LessThan(abs, Vector128<T>.Zero) != Vector128<T>.Zero)
-                    {
-                        ThrowNegateTwosCompOverflow();
-                    }
-                }
-
-                return Vector128.Abs(x);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                if (typeof(T) == typeof(sbyte) ||
-                    typeof(T) == typeof(short) ||
-                    typeof(T) == typeof(int) ||
-                    typeof(T) == typeof(long) ||
-                    typeof(T) == typeof(nint))
-                {
-                    // Handle signed integers specially, in order to throw if any attempt is made to
-                    // take the absolute value of the minimum value of the type, which doesn't have
-                    // a positive absolute value representation.
-                    Vector256<T> abs = Vector256.ConditionalSelect(Vector256.LessThan(x, Vector256<T>.Zero), -x, x);
-                    if (Vector256.LessThan(abs, Vector256<T>.Zero) != Vector256<T>.Zero)
-                    {
-                        ThrowNegateTwosCompOverflow();
-                    }
-                }
-
-                return Vector256.Abs(x);
-            }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                if (typeof(T) == typeof(sbyte) ||
-                    typeof(T) == typeof(short) ||
-                    typeof(T) == typeof(int) ||
-                    typeof(T) == typeof(long) ||
-                    typeof(T) == typeof(nint))
-                {
-                    // Handle signed integers specially, in order to throw if any attempt is made to
-                    // take the absolute value of the minimum value of the type, which doesn't have
-                    // a positive absolute value representation.
-                    Vector512<T> abs = Vector512.ConditionalSelect(Vector512.LessThan(x, Vector512<T>.Zero), -x, x);
-                    if (Vector512.LessThan(abs, Vector512<T>.Zero) != Vector512<T>.Zero)
-                    {
-                        ThrowNegateTwosCompOverflow();
-                    }
-                }
-
-                return Vector512.Abs(x);
-            }
-        }
-
-        /// <summary>T.Exp(x)</summary>
-        internal readonly struct ExpOperator<T> : IUnaryOperator<T, T>
-            where T : IExponentialFunctions<T>
-        {
-            public static bool Vectorizable => (typeof(T) == typeof(double))
-                                            || (typeof(T) == typeof(float));
-
-            public static T Invoke(T x) => T.Exp(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-#if NET9_0_OR_GREATER
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector128.Exp(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return Vector128.Exp(x.AsSingle()).As<float, T>();
-                }
-#else
-                if (typeof(T) == typeof(double))
-                {
-                    return ExpOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return ExpOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-#endif
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-#if NET9_0_OR_GREATER
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector256.Exp(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return Vector256.Exp(x.AsSingle()).As<float, T>();
-                }
-#else
-                if (typeof(T) == typeof(double))
-                {
-                    return ExpOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return ExpOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-#endif
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-#if NET9_0_OR_GREATER
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector512.Exp(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return Vector512.Exp(x.AsSingle()).As<float, T>();
-                }
-#else
-                if (typeof(T) == typeof(double))
-                {
-                    return ExpOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return ExpOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-#endif
-            }
-        }
-
-#if !NET9_0_OR_GREATER
-        /// <summary>double.Exp(x)</summary>
-        internal readonly struct ExpOperatorDouble : IUnaryOperator<double, double>
-        {
-            // This code is based on `vrd2_exp` from amd/aocl-libm-ose
-            // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
-            //
-            // Licensed under the BSD 3-Clause "New" or "Revised" License
-            // See THIRD-PARTY-NOTICES.TXT for the full license text
-
-            // Implementation Notes
-            // ----------------------
-            // 1. Argument Reduction:
-            //      e^x = 2^(x/ln2) = 2^(x*(64/ln(2))/64)     --- (1)
-            //
-            //      Choose 'n' and 'f', such that
-            //      x * 64/ln2 = n + f                        --- (2) | n is integer
-            //                            | |f| <= 0.5
-            //     Choose 'm' and 'j' such that,
-            //      n = (64 * m) + j                          --- (3)
-            //
-            //     From (1), (2) and (3),
-            //      e^x = 2^((64*m + j + f)/64)
-            //          = (2^m) * (2^(j/64)) * 2^(f/64)
-            //          = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64))
-            //
-            // 2. Table Lookup
-            //      Values of (2^(j/64)) are precomputed, j = 0, 1, 2, 3 ... 63
-            //
-            // 3. Polynomial Evaluation
-            //   From (2),
-            //     f = x*(64/ln(2)) - n
-            //   Let,
-            //     r  = f*(ln(2)/64) = x - n*(ln(2)/64)
-            //
-            // 4. Reconstruction
-            //      Thus,
-            //        e^x = (2^m) * (2^(j/64)) * e^r
-
-            private const ulong V_ARG_MAX = 0x40862000_00000000;
-            private const ulong V_DP64_BIAS = 1023;
-
-            private const double V_EXPF_MIN = -709.782712893384;
-            private const double V_EXPF_MAX = +709.782712893384;
-
-            private const double V_EXPF_HUGE = 6755399441055744;
-            private const double V_TBL_LN2 = 1.4426950408889634;
-
-            private const double V_LN2_HEAD = +0.693359375;
-            private const double V_LN2_TAIL = -0.00021219444005469057;
-
-            private const double C3 = 0.5000000000000018;
-            private const double C4 = 0.1666666666666617;
-            private const double C5 = 0.04166666666649277;
-            private const double C6 = 0.008333333333559272;
-            private const double C7 = 0.001388888895122404;
-            private const double C8 = 0.00019841269432677495;
-            private const double C9 = 2.4801486521374483E-05;
-            private const double C10 = 2.7557622532543023E-06;
-            private const double C11 = 2.7632293298250954E-07;
-            private const double C12 = 2.499430431958571E-08;
-
-            public static bool Vectorizable => true;
-
-            public static double Invoke(double x) => double.Exp(x);
-
-            public static Vector128<double> Invoke(Vector128<double> x)
-            {
-                // x * (64.0 / ln(2))
-                Vector128<double> z = x * Vector128.Create(V_TBL_LN2);
-
-                Vector128<double> dn = z + Vector128.Create(V_EXPF_HUGE);
-
-                // n = (int)z
-                Vector128<ulong> n = dn.AsUInt64();
-
-                // dn = (double)n
-                dn -= Vector128.Create(V_EXPF_HUGE);
-
-                // r = x - (dn * (ln(2) / 64))
-                // where ln(2) / 64 is split into Head and Tail values
-                Vector128<double> r = x - (dn * Vector128.Create(V_LN2_HEAD)) - (dn * Vector128.Create(V_LN2_TAIL));
-
-                Vector128<double> r2 = r * r;
-                Vector128<double> r4 = r2 * r2;
-                Vector128<double> r8 = r4 * r4;
-
-                // Compute polynomial
-                Vector128<double> poly = ((Vector128.Create(C12) * r + Vector128.Create(C11)) * r2 +
-                                           Vector128.Create(C10) * r + Vector128.Create(C9))  * r8 +
-                                         ((Vector128.Create(C8)  * r + Vector128.Create(C7))  * r2 +
-                                          (Vector128.Create(C6)  * r + Vector128.Create(C5))) * r4 +
-                                         ((Vector128.Create(C4)  * r + Vector128.Create(C3))  * r2 + (r + Vector128<double>.One));
-
-                // m = (n - j) / 64
-                // result = polynomial * 2^m
-                Vector128<double> ret = poly * ((n + Vector128.Create(V_DP64_BIAS)) << 52).AsDouble();
-
-                // Check if -709 < vx < 709
-                if (Vector128.GreaterThanAny(Vector128.Abs(x).AsUInt64(), Vector128.Create(V_ARG_MAX)))
-                {
-                    // (x > V_EXPF_MAX) ? double.PositiveInfinity : x
-                    Vector128<double> infinityMask = Vector128.GreaterThan(x, Vector128.Create(V_EXPF_MAX));
-
-                    ret = Vector128.ConditionalSelect(
-                        infinityMask,
-                        Vector128.Create(double.PositiveInfinity),
-                        ret
-                    );
-
-                    // (x < V_EXPF_MIN) ? 0 : x
-                    ret = Vector128.AndNot(ret, Vector128.LessThan(x, Vector128.Create(V_EXPF_MIN)));
-                }
-
-                return ret;
-            }
-
-            public static Vector256<double> Invoke(Vector256<double> x)
-            {
-                // x * (64.0 / ln(2))
-                Vector256<double> z = x * Vector256.Create(V_TBL_LN2);
-
-                Vector256<double> dn = z + Vector256.Create(V_EXPF_HUGE);
-
-                // n = (int)z
-                Vector256<ulong> n = dn.AsUInt64();
-
-                // dn = (double)n
-                dn -= Vector256.Create(V_EXPF_HUGE);
-
-                // r = x - (dn * (ln(2) / 64))
-                // where ln(2) / 64 is split into Head and Tail values
-                Vector256<double> r = x - (dn * Vector256.Create(V_LN2_HEAD)) - (dn * Vector256.Create(V_LN2_TAIL));
-
-                Vector256<double> r2 = r * r;
-                Vector256<double> r4 = r2 * r2;
-                Vector256<double> r8 = r4 * r4;
-
-                // Compute polynomial
-                Vector256<double> poly = ((Vector256.Create(C12) * r + Vector256.Create(C11)) * r2 +
-                                           Vector256.Create(C10) * r + Vector256.Create(C9))  * r8 +
-                                         ((Vector256.Create(C8)  * r + Vector256.Create(C7))  * r2 +
-                                          (Vector256.Create(C6)  * r + Vector256.Create(C5))) * r4 +
-                                         ((Vector256.Create(C4)  * r + Vector256.Create(C3))  * r2 + (r + Vector256<double>.One));
-
-                // m = (n - j) / 64
-                // result = polynomial * 2^m
-                Vector256<double> ret = poly * ((n + Vector256.Create(V_DP64_BIAS)) << 52).AsDouble();
-
-                // Check if -709 < vx < 709
-                if (Vector256.GreaterThanAny(Vector256.Abs(x).AsUInt64(), Vector256.Create(V_ARG_MAX)))
-                {
-                    // (x > V_EXPF_MAX) ? double.PositiveInfinity : x
-                    Vector256<double> infinityMask = Vector256.GreaterThan(x, Vector256.Create(V_EXPF_MAX));
-
-                    ret = Vector256.ConditionalSelect(
-                        infinityMask,
-                        Vector256.Create(double.PositiveInfinity),
-                        ret
-                    );
-
-                    // (x < V_EXPF_MIN) ? 0 : x
-                    ret = Vector256.AndNot(ret, Vector256.LessThan(x, Vector256.Create(V_EXPF_MIN)));
-                }
-
-                return ret;
-            }
-
-            public static Vector512<double> Invoke(Vector512<double> x)
-            {
-                // x * (64.0 / ln(2))
-                Vector512<double> z = x * Vector512.Create(V_TBL_LN2);
-
-                Vector512<double> dn = z + Vector512.Create(V_EXPF_HUGE);
-
-                // n = (int)z
-                Vector512<ulong> n = dn.AsUInt64();
-
-                // dn = (double)n
-                dn -= Vector512.Create(V_EXPF_HUGE);
-
-                // r = x - (dn * (ln(2) / 64))
-                // where ln(2) / 64 is split into Head and Tail values
-                Vector512<double> r = x - (dn * Vector512.Create(V_LN2_HEAD)) - (dn * Vector512.Create(V_LN2_TAIL));
-
-                Vector512<double> r2 = r * r;
-                Vector512<double> r4 = r2 * r2;
-                Vector512<double> r8 = r4 * r4;
-
-                // Compute polynomial
-                Vector512<double> poly = ((Vector512.Create(C12) * r + Vector512.Create(C11)) * r2 +
-                                           Vector512.Create(C10) * r + Vector512.Create(C9))  * r8 +
-                                         ((Vector512.Create(C8)  * r + Vector512.Create(C7))  * r2 +
-                                          (Vector512.Create(C6)  * r + Vector512.Create(C5))) * r4 +
-                                         ((Vector512.Create(C4)  * r + Vector512.Create(C3))  * r2 + (r + Vector512<double>.One));
-
-                // m = (n - j) / 64
-                // result = polynomial * 2^m
-                Vector512<double> ret = poly * ((n + Vector512.Create(V_DP64_BIAS)) << 52).AsDouble();
-
-                // Check if -709 < vx < 709
-                if (Vector512.GreaterThanAny(Vector512.Abs(x).AsUInt64(), Vector512.Create(V_ARG_MAX)))
-                {
-                    // (x > V_EXPF_MAX) ? double.PositiveInfinity : x
-                    Vector512<double> infinityMask = Vector512.GreaterThan(x, Vector512.Create(V_EXPF_MAX));
-
-                    ret = Vector512.ConditionalSelect(
-                        infinityMask,
-                        Vector512.Create(double.PositiveInfinity),
-                        ret
-                    );
-
-                    // (x < V_EXPF_MIN) ? 0 : x
-                    ret = Vector512.AndNot(ret, Vector512.LessThan(x, Vector512.Create(V_EXPF_MIN)));
-                }
-
-                return ret;
-            }
-        }
-
-        /// <summary>float.Exp(x)</summary>
-        internal readonly struct ExpOperatorSingle : IUnaryOperator<float, float>
-        {
-            // This code is based on `vrs4_expf` from amd/aocl-libm-ose
-            // Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved.
-            //
-            // Licensed under the BSD 3-Clause "New" or "Revised" License
-            // See THIRD-PARTY-NOTICES.TXT for the full license text
-
-            // Implementation Notes:
-            // 1. Argument Reduction:
-            //      e^x = 2^(x/ln2)                          --- (1)
-            //
-            //      Let x/ln(2) = z                          --- (2)
-            //
-            //      Let z = n + r , where n is an integer    --- (3)
-            //                      |r| <= 1/2
-            //
-            //     From (1), (2) and (3),
-            //      e^x = 2^z
-            //          = 2^(N+r)
-            //          = (2^N)*(2^r)                        --- (4)
-            //
-            // 2. Polynomial Evaluation
-            //   From (4),
-            //     r   = z - N
-            //     2^r = C1 + C2*r + C3*r^2 + C4*r^3 + C5 *r^4 + C6*r^5
-            //
-            // 4. Reconstruction
-            //      Thus,
-            //        e^x = (2^N) * (2^r)
-
-            private const uint V_ARG_MAX = 0x42AE0000;
-
-            private const float V_EXPF_MIN = -103.97208f;
-            private const float V_EXPF_MAX = +88.72284f;
-
-            private const double V_EXPF_HUGE = 6755399441055744;
-            private const double V_TBL_LN2 = 1.4426950408889634;
-
-            private const double C1 = 1.0000000754895704;
-            private const double C2 = 0.6931472254087585;
-            private const double C3 = 0.2402210737432219;
-            private const double C4 = 0.05550297297702539;
-            private const double C5 = 0.009676036358193323;
-            private const double C6 = 0.001341000536524434;
-
-            public static bool Vectorizable => true;
-
-            public static float Invoke(float x) => float.Exp(x);
-
-            public static Vector128<float> Invoke(Vector128<float> x)
-            {
-                // Convert x to double precision
-                (Vector128<double> xl, Vector128<double> xu) = Vector128.Widen(x);
-
-                // x * (64.0 / ln(2))
-                Vector128<double> v_tbl_ln2 = Vector128.Create(V_TBL_LN2);
-
-                Vector128<double> zl = xl * v_tbl_ln2;
-                Vector128<double> zu = xu * v_tbl_ln2;
-
-                Vector128<double> v_expf_huge = Vector128.Create(V_EXPF_HUGE);
-
-                Vector128<double> dnl = zl + v_expf_huge;
-                Vector128<double> dnu = zu + v_expf_huge;
-
-                // n = (int)z
-                Vector128<ulong> nl = dnl.AsUInt64();
-                Vector128<ulong> nu = dnu.AsUInt64();
-
-                // dn = (double)n
-                dnl -= v_expf_huge;
-                dnu -= v_expf_huge;
-
-                // r = z - dn
-                Vector128<double> c1 = Vector128.Create(C1);
-                Vector128<double> c2 = Vector128.Create(C2);
-                Vector128<double> c3 = Vector128.Create(C3);
-                Vector128<double> c4 = Vector128.Create(C4);
-                Vector128<double> c5 = Vector128.Create(C5);
-                Vector128<double> c6 = Vector128.Create(C6);
-
-                Vector128<double> rl = zl - dnl;
-
-                Vector128<double> rl2 = rl * rl;
-                Vector128<double> rl4 = rl2 * rl2;
-
-                Vector128<double> polyl = (c4 * rl + c3) * rl2
-                                       + ((c6 * rl + c5) * rl4
-                                        + (c2 * rl + c1));
-
-
-                Vector128<double> ru = zu - dnu;
-
-                Vector128<double> ru2 = ru * ru;
-                Vector128<double> ru4 = ru2 * ru2;
-
-                Vector128<double> polyu = (c4 * ru + c3) * ru2
-                                       + ((c6 * ru + c5) * ru4
-                                        + (c2 * ru + c1));
-
-                // result = (float)(poly + (n << 52))
-                Vector128<float> ret = Vector128.Narrow(
-                    (polyl.AsUInt64() + (nl << 52)).AsDouble(),
-                    (polyu.AsUInt64() + (nu << 52)).AsDouble()
-                );
-
-                // Check if -103 < |x| < 88
-                if (Vector128.GreaterThanAny(Vector128.Abs(x).AsUInt32(), Vector128.Create(V_ARG_MAX)))
-                {
-                    // (x > V_EXPF_MAX) ? float.PositiveInfinity : x
-                    Vector128<float> infinityMask = Vector128.GreaterThan(x, Vector128.Create(V_EXPF_MAX));
-
-                    ret = Vector128.ConditionalSelect(
-                        infinityMask,
-                        Vector128.Create(float.PositiveInfinity),
-                        ret
-                    );
-
-                    // (x < V_EXPF_MIN) ? 0 : x
-                    ret = Vector128.AndNot(ret, Vector128.LessThan(x, Vector128.Create(V_EXPF_MIN)));
-                }
-
-                return ret;
-            }
-
-            public static Vector256<float> Invoke(Vector256<float> x)
-            {
-                // Convert x to double precision
-                (Vector256<double> xl, Vector256<double> xu) = Vector256.Widen(x);
-
-                // x * (64.0 / ln(2))
-                Vector256<double> v_tbl_ln2 = Vector256.Create(V_TBL_LN2);
-
-                Vector256<double> zl = xl * v_tbl_ln2;
-                Vector256<double> zu = xu * v_tbl_ln2;
-
-                Vector256<double> v_expf_huge = Vector256.Create(V_EXPF_HUGE);
-
-                Vector256<double> dnl = zl + v_expf_huge;
-                Vector256<double> dnu = zu + v_expf_huge;
-
-                // n = (int)z
-                Vector256<ulong> nl = dnl.AsUInt64();
-                Vector256<ulong> nu = dnu.AsUInt64();
-
-                // dn = (double)n
-                dnl -= v_expf_huge;
-                dnu -= v_expf_huge;
-
-                // r = z - dn
-                Vector256<double> c1 = Vector256.Create(C1);
-                Vector256<double> c2 = Vector256.Create(C2);
-                Vector256<double> c3 = Vector256.Create(C3);
-                Vector256<double> c4 = Vector256.Create(C4);
-                Vector256<double> c5 = Vector256.Create(C5);
-                Vector256<double> c6 = Vector256.Create(C6);
-
-                Vector256<double> rl = zl - dnl;
-
-                Vector256<double> rl2 = rl * rl;
-                Vector256<double> rl4 = rl2 * rl2;
-
-                Vector256<double> polyl = (c4 * rl + c3) * rl2
-                                       + ((c6 * rl + c5) * rl4
-                                        + (c2 * rl + c1));
-
-
-                Vector256<double> ru = zu - dnu;
-
-                Vector256<double> ru2 = ru * ru;
-                Vector256<double> ru4 = ru2 * ru2;
-
-                Vector256<double> polyu = (c4 * ru + c3) * ru2
-                                       + ((c6 * ru + c5) * ru4
-                                        + (c2 * ru + c1));
-
-                // result = (float)(poly + (n << 52))
-                Vector256<float> ret = Vector256.Narrow(
-                    (polyl.AsUInt64() + (nl << 52)).AsDouble(),
-                    (polyu.AsUInt64() + (nu << 52)).AsDouble()
-                );
-
-                // Check if -103 < |x| < 88
-                if (Vector256.GreaterThanAny(Vector256.Abs(x).AsUInt32(), Vector256.Create(V_ARG_MAX)))
-                {
-                    // (x > V_EXPF_MAX) ? float.PositiveInfinity : x
-                    Vector256<float> infinityMask = Vector256.GreaterThan(x, Vector256.Create(V_EXPF_MAX));
-
-                    ret = Vector256.ConditionalSelect(
-                        infinityMask,
-                        Vector256.Create(float.PositiveInfinity),
-                        ret
-                    );
-
-                    // (x < V_EXPF_MIN) ? 0 : x
-                    ret = Vector256.AndNot(ret, Vector256.LessThan(x, Vector256.Create(V_EXPF_MIN)));
-                }
-
-                return ret;
-            }
-
-            public static Vector512<float> Invoke(Vector512<float> x)
-            {
-                // Convert x to double precision
-                (Vector512<double> xl, Vector512<double> xu) = Vector512.Widen(x);
-
-                // x * (64.0 / ln(2))
-                Vector512<double> v_tbl_ln2 = Vector512.Create(V_TBL_LN2);
-
-                Vector512<double> zl = xl * v_tbl_ln2;
-                Vector512<double> zu = xu * v_tbl_ln2;
-
-                Vector512<double> v_expf_huge = Vector512.Create(V_EXPF_HUGE);
-
-                Vector512<double> dnl = zl + v_expf_huge;
-                Vector512<double> dnu = zu + v_expf_huge;
-
-                // n = (int)z
-                Vector512<ulong> nl = dnl.AsUInt64();
-                Vector512<ulong> nu = dnu.AsUInt64();
-
-                // dn = (double)n
-                dnl -= v_expf_huge;
-                dnu -= v_expf_huge;
-
-                // r = z - dn
-                Vector512<double> c1 = Vector512.Create(C1);
-                Vector512<double> c2 = Vector512.Create(C2);
-                Vector512<double> c3 = Vector512.Create(C3);
-                Vector512<double> c4 = Vector512.Create(C4);
-                Vector512<double> c5 = Vector512.Create(C5);
-                Vector512<double> c6 = Vector512.Create(C6);
-
-                Vector512<double> rl = zl - dnl;
-
-                Vector512<double> rl2 = rl * rl;
-                Vector512<double> rl4 = rl2 * rl2;
-
-                Vector512<double> polyl = (c4 * rl + c3) * rl2
-                                       + ((c6 * rl + c5) * rl4
-                                        + (c2 * rl + c1));
-
-
-                Vector512<double> ru = zu - dnu;
-
-                Vector512<double> ru2 = ru * ru;
-                Vector512<double> ru4 = ru2 * ru2;
-
-                Vector512<double> polyu = (c4 * ru + c3) * ru2
-                                       + ((c6 * ru + c5) * ru4
-                                        + (c2 * ru + c1));
-
-                // result = (float)(poly + (n << 52))
-                Vector512<float> ret = Vector512.Narrow(
-                    (polyl.AsUInt64() + (nl << 52)).AsDouble(),
-                    (polyu.AsUInt64() + (nu << 52)).AsDouble()
-                );
-
-                // Check if -103 < |x| < 88
-                if (Vector512.GreaterThanAny(Vector512.Abs(x).AsUInt32(), Vector512.Create(V_ARG_MAX)))
-                {
-                    // (x > V_EXPF_MAX) ? float.PositiveInfinity : x
-                    Vector512<float> infinityMask = Vector512.GreaterThan(x, Vector512.Create(V_EXPF_MAX));
-
-                    ret = Vector512.ConditionalSelect(
-                        infinityMask,
-                        Vector512.Create(float.PositiveInfinity),
-                        ret
-                    );
-
-                    // (x < V_EXPF_MIN) ? 0 : x
-                    ret = Vector512.AndNot(ret, Vector512.LessThan(x, Vector512.Create(V_EXPF_MIN)));
-                }
-
-                return ret;
-            }
-        }
-#endif
-
-        /// <summary>T.ExpM1(x)</summary>
-        internal readonly struct ExpM1Operator<T> : IUnaryOperator<T, T>
-            where T : IExponentialFunctions<T>
-        {
-            public static bool Vectorizable => ExpOperator<T>.Vectorizable;
-
-            public static T Invoke(T x) => T.ExpM1(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => ExpOperator<T>.Invoke(x) - Vector128<T>.One;
-            public static Vector256<T> Invoke(Vector256<T> x) => ExpOperator<T>.Invoke(x) - Vector256<T>.One;
-            public static Vector512<T> Invoke(Vector512<T> x) => ExpOperator<T>.Invoke(x) - Vector512<T>.One;
-        }
-
-        /// <summary>T.Exp2(x)</summary>
-        internal readonly struct Exp2Operator<T> : IUnaryOperator<T, T>
-            where T : IExponentialFunctions<T>
-        {
-            private const double NaturalLog2 = 0.6931471805599453;
-
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Exp2(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => ExpOperator<T>.Invoke(x * Vector128.Create(T.CreateTruncating(NaturalLog2)));
-            public static Vector256<T> Invoke(Vector256<T> x) => ExpOperator<T>.Invoke(x * Vector256.Create(T.CreateTruncating(NaturalLog2)));
-            public static Vector512<T> Invoke(Vector512<T> x) => ExpOperator<T>.Invoke(x * Vector512.Create(T.CreateTruncating(NaturalLog2)));
-        }
-
-        /// <summary>T.Exp2M1(x)</summary>
-        internal readonly struct Exp2M1Operator<T> : IUnaryOperator<T, T>
-            where T : IExponentialFunctions<T>
-        {
-            public static bool Vectorizable => Exp2Operator<T>.Vectorizable;
-
-            public static T Invoke(T x) => T.Exp2M1(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => Exp2Operator<T>.Invoke(x) - Vector128<T>.One;
-            public static Vector256<T> Invoke(Vector256<T> x) => Exp2Operator<T>.Invoke(x) - Vector256<T>.One;
-            public static Vector512<T> Invoke(Vector512<T> x) => Exp2Operator<T>.Invoke(x) - Vector512<T>.One;
-        }
-
-        /// <summary>T.Exp10(x)</summary>
-        internal readonly struct Exp10Operator<T> : IUnaryOperator<T, T>
-            where T : IExponentialFunctions<T>
-        {
-            private const double NaturalLog10 = 2.302585092994046;
-
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Exp10(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => ExpOperator<T>.Invoke(x * Vector128.Create(T.CreateTruncating(NaturalLog10)));
-            public static Vector256<T> Invoke(Vector256<T> x) => ExpOperator<T>.Invoke(x * Vector256.Create(T.CreateTruncating(NaturalLog10)));
-            public static Vector512<T> Invoke(Vector512<T> x) => ExpOperator<T>.Invoke(x * Vector512.Create(T.CreateTruncating(NaturalLog10)));
-        }
-
-        /// <summary>T.Exp10M1(x)</summary>
-        internal readonly struct Exp10M1Operator<T> : IUnaryOperator<T, T>
-            where T : IExponentialFunctions<T>
-        {
-            public static bool Vectorizable => Exp2Operator<T>.Vectorizable;
-
-            public static T Invoke(T x) => T.Exp10M1(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => Exp10Operator<T>.Invoke(x) - Vector128<T>.One;
-            public static Vector256<T> Invoke(Vector256<T> x) => Exp10Operator<T>.Invoke(x) - Vector256<T>.One;
-            public static Vector512<T> Invoke(Vector512<T> x) => Exp10Operator<T>.Invoke(x) - Vector512<T>.One;
-        }
-
-        /// <summary>T.Pow(x, y)</summary>
-        internal readonly struct PowOperator<T> : IBinaryOperator<T>
-            where T : IPowerFunctions<T>
-        {
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x, T y) => T.Pow(x, y);
-
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return ExpOperator<float>.Invoke(y.AsSingle() * LogOperator<float>.Invoke(x.AsSingle())).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return ExpOperator<double>.Invoke(y.AsDouble() * LogOperator<double>.Invoke(x.AsDouble())).As<double, T>();
-                }
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return ExpOperator<float>.Invoke(y.AsSingle() * LogOperator<float>.Invoke(x.AsSingle())).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return ExpOperator<double>.Invoke(y.AsDouble() * LogOperator<double>.Invoke(x.AsDouble())).As<double, T>();
-                }
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return ExpOperator<float>.Invoke(y.AsSingle() * LogOperator<float>.Invoke(x.AsSingle())).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return ExpOperator<double>.Invoke(y.AsDouble() * LogOperator<double>.Invoke(x.AsDouble())).As<double, T>();
-                }
-            }
-        }
-
-        /// <summary>T.Sqrt(x)</summary>
-        internal readonly struct SqrtOperator<T> : IUnaryOperator<T, T>
-            where T : IRootFunctions<T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x) => T.Sqrt(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => Vector128.Sqrt(x);
-            public static Vector256<T> Invoke(Vector256<T> x) => Vector256.Sqrt(x);
-            public static Vector512<T> Invoke(Vector512<T> x) => Vector512.Sqrt(x);
-        }
-
-        /// <summary>T.Cbrt(x)</summary>
-        internal readonly struct CbrtOperator<T> : IUnaryOperator<T, T>
-            where T : IRootFunctions<T>
-        {
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Cbrt(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return ExpOperator<float>.Invoke(LogOperator<float>.Invoke(x.AsSingle()) / Vector128.Create(3f)).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return ExpOperator<double>.Invoke(LogOperator<double>.Invoke(x.AsDouble()) / Vector128.Create(3d)).As<double, T>();
-                }
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return ExpOperator<float>.Invoke(LogOperator<float>.Invoke(x.AsSingle()) / Vector256.Create(3f)).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return ExpOperator<double>.Invoke(LogOperator<double>.Invoke(x.AsDouble()) / Vector256.Create(3d)).As<double, T>();
-                }
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return ExpOperator<float>.Invoke(LogOperator<float>.Invoke(x.AsSingle()) / Vector512.Create(3f)).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return ExpOperator<double>.Invoke(LogOperator<double>.Invoke(x.AsDouble()) / Vector512.Create(3d)).As<double, T>();
-                }
-            }
-        }
-
-        /// <summary>T.Hypot(x, y)</summary>
-        internal readonly struct HypotOperator<T> : IBinaryOperator<T>
-            where T : IRootFunctions<T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x, T y) => T.Hypot(x, y);
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => Vector128.Sqrt((x * x) + (y * y));
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => Vector256.Sqrt((x * x) + (y * y));
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => Vector512.Sqrt((x * x) + (y * y));
-        }
-
-        /// <summary>T.Acos(x)</summary>
-        internal readonly struct AcosOperator<T> : IUnaryOperator<T, T>
-            where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => false; // TODO: Vectorize
-            public static T Invoke(T x) => T.Acos(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.Acosh(x)</summary>
-        internal readonly struct AcoshOperator<T> : IUnaryOperator<T, T>
-            where T : IHyperbolicFunctions<T>
-        {
-            public static bool Vectorizable => false; // TODO: Vectorize
-            public static T Invoke(T x) => T.Acosh(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.AcosPi(x)</summary>
-        internal readonly struct AcosPiOperator<T> : IUnaryOperator<T, T>
-            where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => AcosOperator<T>.Vectorizable;
-            public static T Invoke(T x) => T.AcosPi(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => AcosOperator<T>.Invoke(x) / Vector128.Create(T.Pi);
-            public static Vector256<T> Invoke(Vector256<T> x) => AcosOperator<T>.Invoke(x) / Vector256.Create(T.Pi);
-            public static Vector512<T> Invoke(Vector512<T> x) => AcosOperator<T>.Invoke(x) / Vector512.Create(T.Pi);
-        }
-
-        /// <summary>T.Asin(x)</summary>
-        internal readonly struct AsinOperator<T> : IUnaryOperator<T, T>
-            where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => false; // TODO: Vectorize
-            public static T Invoke(T x) => T.Asin(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.Asinh(x)</summary>
-        internal readonly struct AsinhOperator<T> : IUnaryOperator<T, T>
-            where T : IHyperbolicFunctions<T>
-        {
-            public static bool Vectorizable => false; // TODO: Vectorize
-            public static T Invoke(T x) => T.Asinh(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.AsinPi(x)</summary>
-        internal readonly struct AsinPiOperator<T> : IUnaryOperator<T, T>
-            where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => AsinOperator<T>.Vectorizable;
-            public static T Invoke(T x) => T.AsinPi(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => AsinOperator<T>.Invoke(x) / Vector128.Create(T.Pi);
-            public static Vector256<T> Invoke(Vector256<T> x) => AsinOperator<T>.Invoke(x) / Vector256.Create(T.Pi);
-            public static Vector512<T> Invoke(Vector512<T> x) => AsinOperator<T>.Invoke(x) / Vector512.Create(T.Pi);
-        }
-
-        /// <summary>T.Atan(x)</summary>
-        internal readonly struct AtanOperator<T> : IUnaryOperator<T, T>
-            where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => false; // TODO: Vectorize
-            public static T Invoke(T x) => T.Atan(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.Atanh(x)</summary>
-        internal readonly struct AtanhOperator<T> : IUnaryOperator<T, T>
-            where T : IHyperbolicFunctions<T>
-        {
-            public static bool Vectorizable => false; // TODO: Vectorize
-            public static T Invoke(T x) => T.Atanh(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.AtanPi(x)</summary>
-        internal readonly struct AtanPiOperator<T> : IUnaryOperator<T, T>
-            where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => AtanOperator<T>.Vectorizable;
-            public static T Invoke(T x) => T.AtanPi(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => AtanOperator<T>.Invoke(x) / Vector128.Create(T.Pi);
-            public static Vector256<T> Invoke(Vector256<T> x) => AtanOperator<T>.Invoke(x) / Vector256.Create(T.Pi);
-            public static Vector512<T> Invoke(Vector512<T> x) => AtanOperator<T>.Invoke(x) / Vector512.Create(T.Pi);
-        }
-
-        /// <summary>T.Atan2(y, x)</summary>
-        internal readonly struct Atan2Operator<T> : IBinaryOperator<T>
-            where T : IFloatingPointIeee754<T>
-        {
-            public static bool Vectorizable => false; // TODO: Vectorize
-            public static T Invoke(T y, T x) => T.Atan2(y, x);
-            public static Vector128<T> Invoke(Vector128<T> y, Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> y, Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> y, Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.Atan2Pi(y, x)</summary>
-        internal readonly struct Atan2PiOperator<T> : IBinaryOperator<T>
-            where T : IFloatingPointIeee754<T>
-        {
-            public static bool Vectorizable => Atan2Operator<T>.Vectorizable;
-            public static T Invoke(T y, T x) => T.Atan2Pi(y, x);
-            public static Vector128<T> Invoke(Vector128<T> y, Vector128<T> x) => Atan2Operator<T>.Invoke(y, x) / Vector128.Create(T.Pi);
-            public static Vector256<T> Invoke(Vector256<T> y, Vector256<T> x) => Atan2Operator<T>.Invoke(y, x) / Vector256.Create(T.Pi);
-            public static Vector512<T> Invoke(Vector512<T> y, Vector512<T> x) => Atan2Operator<T>.Invoke(y, x) / Vector512.Create(T.Pi);
-        }
-
-        /// <summary>T.Cos(x)</summary>
-        internal readonly struct CosOperator<T> : IUnaryOperator<T, T>
-            where T : ITrigonometricFunctions<T>
-        {
-            // This code is based on `vrs4_cos` and `vrd2_cos` from amd/aocl-libm-ose
-            // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
-            //
-            // Licensed under the BSD 3-Clause "New" or "Revised" License
-            // See THIRD-PARTY-NOTICES.TXT for the full license text
-
-            // Implementation notes from amd/aocl-libm-ose:
-            // --------------------------------------------
-            // To compute cosf(float x)
-            // Using the identity,
-            // cos(x) = sin(x + pi/2)           (1)
-            //
-            // 1. Argument Reduction
-            //      Now, let x be represented as,
-            //          |x| = N * pi + f        (2) | N is an integer,
-            //                                        -pi/2 <= f <= pi/2
-            //
-            //      From (2), N = int( (x + pi/2) / pi) - 0.5
-            //                f = |x| - (N * pi)
-            //
-            // 2. Polynomial Evaluation
-            //       From (1) and (2),sin(f) can be calculated using a polynomial
-            //       sin(f) = f*(1 + C1*f^2 + C2*f^4 + C3*f^6 + c4*f^8)
-            //
-            // 3. Reconstruction
-            //      Hence, cos(x) = sin(x + pi/2) = (-1)^N * sin(f)
-
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Cos(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-            }
-        }
-
-        /// <summary>float.Cos(x)</summary>
-        private readonly struct CosOperatorSingle : IUnaryOperator<float, float>
-        {
-            internal const uint MaxVectorizedValue = 0x4A989680u;
-            internal const uint SignMask = 0x7FFFFFFFu;
-            private const float AlmHuge = 1.2582912e7f;
-            private const float Pi_Tail1 = 8.742278e-8f;
-            private const float Pi_Tail2 = 3.430249e-15f;
-            private const float C1 = -0.16666657f;
-            private const float C2 = 0.008332962f;
-            private const float C3 = -1.9801206e-4f;
-            private const float C4 = 2.5867037e-6f;
-
-            public static bool Vectorizable => true;
-
-            public static float Invoke(float x) => float.Cos(x);
-
-            public static Vector128<float> Invoke(Vector128<float> x)
-            {
-                Vector128<uint> uxMasked = Vector128.Abs(x).AsUInt32();
-                if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue)))
-                {
-                    return ApplyScalar<CosOperatorSingle>(x);
-                }
-
-                Vector128<float> r = uxMasked.AsSingle();
-                Vector128<float> almHuge = Vector128.Create(AlmHuge);
-                Vector128<float> dn = ((r + Vector128.Create(float.Pi / 2)) * Vector128.Create(1 / float.Pi)) + almHuge;
-                Vector128<uint> odd = dn.AsUInt32() << 31;
-                dn = dn - almHuge - Vector128.Create(0.5f);
-                Vector128<float> f = r + (dn * Vector128.Create(-float.Pi)) + (dn * Vector128.Create(Pi_Tail1)) + (dn * Vector128.Create(Pi_Tail2));
-
-                // POLY_EVAL_ODD_9
-                Vector128<float> f2 = f * f;
-                Vector128<float> f4 = f2 * f2;
-                Vector128<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1));
-                Vector128<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector128<float>.One);
-                Vector128<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C3), f2, Vector128.Create(C4) * f4);
-                Vector128<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
-                Vector128<float> poly = f * a3;
-
-                return (poly.AsUInt32() ^ odd).AsSingle();
-            }
-
-            public static Vector256<float> Invoke(Vector256<float> x)
-            {
-                Vector256<uint> uxMasked = Vector256.Abs(x).AsUInt32();
-                if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue)))
-                {
-                    return ApplyScalar<CosOperatorSingle>(x);
-                }
-
-                Vector256<float> r = uxMasked.AsSingle();
-                Vector256<float> almHuge = Vector256.Create(AlmHuge);
-                Vector256<float> dn = ((r + Vector256.Create(float.Pi / 2)) * Vector256.Create(1 / float.Pi)) + almHuge;
-                Vector256<uint> odd = dn.AsUInt32() << 31;
-                dn = dn - almHuge - Vector256.Create(0.5f);
-                Vector256<float> f = r + (dn * Vector256.Create(-float.Pi)) + (dn * Vector256.Create(Pi_Tail1)) + (dn * Vector256.Create(Pi_Tail2));
-
-                // POLY_EVAL_ODD_9
-                Vector256<float> f2 = f * f;
-                Vector256<float> f4 = f2 * f2;
-                Vector256<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1));
-                Vector256<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector256<float>.One);
-                Vector256<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C3), f2, Vector256.Create(C4) * f4);
-                Vector256<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
-                Vector256<float> poly = f * a3;
-
-                return (poly.AsUInt32() ^ odd).AsSingle();
-            }
-
-            public static Vector512<float> Invoke(Vector512<float> x)
-            {
-                Vector512<uint> uxMasked = Vector512.Abs(x).AsUInt32();
-                if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue)))
-                {
-                    return ApplyScalar<CosOperatorSingle>(x);
-                }
-
-                Vector512<float> r = uxMasked.AsSingle();
-                Vector512<float> almHuge = Vector512.Create(AlmHuge);
-                Vector512<float> dn = ((r + Vector512.Create(float.Pi / 2)) * Vector512.Create(1 / float.Pi)) + almHuge;
-                Vector512<uint> odd = dn.AsUInt32() << 31;
-                dn = dn - almHuge - Vector512.Create(0.5f);
-                Vector512<float> f = r + (dn * Vector512.Create(-float.Pi)) + (dn * Vector512.Create(Pi_Tail1)) + (dn * Vector512.Create(Pi_Tail2));
-
-                // POLY_EVAL_ODD_9
-                Vector512<float> f2 = f * f;
-                Vector512<float> f4 = f2 * f2;
-                Vector512<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
-                Vector512<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector512<float>.One);
-                Vector512<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C3), f2, Vector512.Create(C4) * f4);
-                Vector512<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
-                Vector512<float> poly = f * a3;
-
-                return (poly.AsUInt32() ^ odd).AsSingle();
-            }
-        }
-
-        /// <summary>double.Cos(x)</summary>
-        internal readonly struct CosOperatorDouble : IUnaryOperator<double, double>
-        {
-            internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul;
-            internal const ulong MaxVectorizedValue = 0x4160000000000000ul;
-            private const double AlmHuge = 6.755399441055744E15;
-            private const double Pi_Tail2 = -1.2246467991473532E-16;
-            private const double Pi_Tail3 = 2.9947698097183397E-33;
-            private const double C1 = -0.16666666666666666;
-            private const double C2 = 0.008333333333333165;
-            private const double C3 = -1.984126984120184E-4;
-            private const double C4 = 2.7557319210152756E-6;
-            private const double C5 = -2.5052106798274616E-8;
-            private const double C6 = 1.6058936490373254E-10;
-            private const double C7 = -7.642917806937501E-13;
-            private const double C8 = 2.7204790963151784E-15;
-
-            public static bool Vectorizable => true;
-
-            public static double Invoke(double x) => double.Cos(x);
-
-            public static Vector128<double> Invoke(Vector128<double> x)
-            {
-                Vector128<ulong> uxMasked = Vector128.Abs(x).AsUInt64();
-                if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue)))
-                {
-                    return ApplyScalar<CosOperatorDouble>(x);
-                }
-
-                Vector128<double> r = uxMasked.AsDouble();
-                Vector128<double> almHuge = Vector128.Create(AlmHuge);
-                Vector128<double> dn = (r * Vector128.Create(1 / double.Pi)) + Vector128.Create(double.Pi / 2) + almHuge;
-                Vector128<ulong> odd = dn.AsUInt64() << 63;
-                dn = dn - almHuge - Vector128.Create(0.5);
-                Vector128<double> f = r + (dn * Vector128.Create(-double.Pi)) + (dn * Vector128.Create(Pi_Tail2)) + (dn * Vector128.Create(Pi_Tail3));
-
-                // POLY_EVAL_ODD_17
-                Vector128<double> f2 = f * f;
-                Vector128<double> f4 = f2 * f2;
-                Vector128<double> f6 = f4 * f2;
-                Vector128<double> f10 = f6 * f4;
-                Vector128<double> f14 = f10 * f4;
-                Vector128<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1));
-                Vector128<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C4), f2, Vector128.Create(C3));
-                Vector128<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C6), f2, Vector128.Create(C5));
-                Vector128<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C8), f2, Vector128.Create(C7));
-                Vector128<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
-                Vector128<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
-                Vector128<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
-
-                return (poly.AsUInt64() ^ odd).AsDouble();
-            }
-
-            public static Vector256<double> Invoke(Vector256<double> x)
-            {
-                Vector256<ulong> uxMasked = Vector256.Abs(x).AsUInt64();
-                if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue)))
-                {
-                    return ApplyScalar<CosOperatorDouble>(x);
-                }
-
-                Vector256<double> r = uxMasked.AsDouble();
-                Vector256<double> almHuge = Vector256.Create(AlmHuge);
-                Vector256<double> dn = (r * Vector256.Create(1 / double.Pi)) + Vector256.Create(double.Pi / 2) + almHuge;
-                Vector256<ulong> odd = dn.AsUInt64() << 63;
-                dn = dn - almHuge - Vector256.Create(0.5);
-                Vector256<double> f = r + (dn * Vector256.Create(-double.Pi)) + (dn * Vector256.Create(Pi_Tail2)) + (dn * Vector256.Create(Pi_Tail3));
-
-                // POLY_EVAL_ODD_17
-                Vector256<double> f2 = f * f;
-                Vector256<double> f4 = f2 * f2;
-                Vector256<double> f6 = f4 * f2;
-                Vector256<double> f10 = f6 * f4;
-                Vector256<double> f14 = f10 * f4;
-                Vector256<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1));
-                Vector256<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C4), f2, Vector256.Create(C3));
-                Vector256<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C6), f2, Vector256.Create(C5));
-                Vector256<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C8), f2, Vector256.Create(C7));
-                Vector256<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
-                Vector256<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
-                Vector256<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
-
-                return (poly.AsUInt64() ^ odd).AsDouble();
-            }
-
-            public static Vector512<double> Invoke(Vector512<double> x)
-            {
-                Vector512<ulong> uxMasked = Vector512.Abs(x).AsUInt64();
-                if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue)))
-                {
-                    return ApplyScalar<CosOperatorDouble>(x);
-                }
-
-                Vector512<double> r = uxMasked.AsDouble();
-                Vector512<double> almHuge = Vector512.Create(AlmHuge);
-                Vector512<double> dn = (r * Vector512.Create(1 / double.Pi)) + Vector512.Create(double.Pi / 2) + almHuge;
-                Vector512<ulong> odd = dn.AsUInt64() << 63;
-                dn = dn - almHuge - Vector512.Create(0.5);
-                Vector512<double> f = r + (dn * Vector512.Create(-double.Pi)) + (dn * Vector512.Create(Pi_Tail2)) + (dn * Vector512.Create(Pi_Tail3));
-
-                // POLY_EVAL_ODD_17
-                Vector512<double> f2 = f * f;
-                Vector512<double> f4 = f2 * f2;
-                Vector512<double> f6 = f4 * f2;
-                Vector512<double> f10 = f6 * f4;
-                Vector512<double> f14 = f10 * f4;
-                Vector512<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
-                Vector512<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C4), f2, Vector512.Create(C3));
-                Vector512<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C6), f2, Vector512.Create(C5));
-                Vector512<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C8), f2, Vector512.Create(C7));
-                Vector512<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
-                Vector512<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
-                Vector512<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
-
-                return (poly.AsUInt64() ^ odd).AsDouble();
-            }
-        }
-
-        /// <summary>T.CosPi(x)</summary>
-        internal readonly struct CosPiOperator<T> : IUnaryOperator<T, T>
-            where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.CosPi(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                Vector128<T> xpi = x * Vector128.Create(T.Pi);
-                if (typeof(T) == typeof(float))
-                {
-                    if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(CosOperatorSingle.SignMask), Vector128.Create(CosOperatorSingle.MaxVectorizedValue)))
-                    {
-                        return ApplyScalar<CosPiOperator<float>>(x.AsSingle()).As<float, T>();
-                    }
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    if (Vector128.GreaterThanAny(xpi.AsUInt64() & Vector128.Create(CosOperatorDouble.SignMask), Vector128.Create(CosOperatorDouble.MaxVectorizedValue)))
-                    {
-                        return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
-                    }
-                }
-
-                return CosOperator<T>.Invoke(xpi);
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                Vector256<T> xpi = x * Vector256.Create(T.Pi);
-                if (typeof(T) == typeof(float))
-                {
-                    if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(CosOperatorSingle.SignMask), Vector256.Create(CosOperatorSingle.MaxVectorizedValue)))
-                    {
-                        return ApplyScalar<CosPiOperator<float>>(x.AsSingle()).As<float, T>();
-                    }
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    if (Vector256.GreaterThanAny(xpi.AsUInt64() & Vector256.Create(CosOperatorDouble.SignMask), Vector256.Create(CosOperatorDouble.MaxVectorizedValue)))
-                    {
-                        return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
-                    }
-                }
-
-                return CosOperator<T>.Invoke(xpi);
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                Vector512<T> xpi = x * Vector512.Create(T.Pi);
-                if (typeof(T) == typeof(float))
-                {
-                    if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(CosOperatorSingle.SignMask), Vector512.Create(CosOperatorSingle.MaxVectorizedValue)))
-                    {
-                        return ApplyScalar<CosPiOperator<float>>(x.AsSingle()).As<float, T>();
-                    }
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    if (Vector512.GreaterThanAny(xpi.AsUInt64() & Vector512.Create(CosOperatorDouble.SignMask), Vector512.Create(CosOperatorDouble.MaxVectorizedValue)))
-                    {
-                        return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
-                    }
-                }
-
-                return CosOperator<T>.Invoke(xpi);
-            }
-        }
-
-        /// <summary>T.Cosh(x)</summary>
-        internal readonly struct CoshOperator<T> : IUnaryOperator<T, T>
-            where T : IHyperbolicFunctions<T>
-        {
-            // This code is based on `vrs4_coshf` from amd/aocl-libm-ose
-            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
-            //
-            // Licensed under the BSD 3-Clause "New" or "Revised" License
-            // See THIRD-PARTY-NOTICES.TXT for the full license text
-
-            // Spec:
-            //   coshf(|x| > 89.415985107421875) = Infinity
-            //   coshf(Infinity)  = infinity
-            //   coshf(-Infinity) = infinity
-            //
-            // cosh(x) = (exp(x) + exp(-x))/2
-            // cosh(-x) = +cosh(x)
-            //
-            // checks for special cases
-            // if ( asint(x) > infinity) return x with overflow exception and
-            // return x.
-            // if x is NaN then raise invalid FP operation exception and return x.
-            //
-            // coshf = v/2 * exp(x - log(v)) where v = 0x1.0000e8p-1
-
-            private const float Single_LOGV = 0.693161f;
-            private const float Single_HALFV = 1.0000138f;
-            private const float Single_INVV2 = 0.24999309f;
-
-            private const double Double_LOGV = 0.6931471805599453;
-            private const double Double_HALFV = 1.0;
-            private const double Double_INVV2 = 0.25;
-
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Cosh(x);
-
-            public static Vector128<T> Invoke(Vector128<T> t)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    Vector128<float> x = t.AsSingle();
-
-                    Vector128<float> y = Vector128.Abs(x);
-                    Vector128<float> z = ExpOperator<float>.Invoke(y - Vector128.Create((float)Single_LOGV));
-                    return (Vector128.Create((float)Single_HALFV) * (z + (Vector128.Create((float)Single_INVV2) / z))).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    Vector128<double> x = t.AsDouble();
-
-                    Vector128<double> y = Vector128.Abs(x);
-                    Vector128<double> z = ExpOperator<double>.Invoke(y - Vector128.Create(Double_LOGV));
-                    return (Vector128.Create(Double_HALFV) * (z + (Vector128.Create(Double_INVV2) / z))).As<double, T>();
-                }
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> t)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    Vector256<float> x = t.AsSingle();
-
-                    Vector256<float> y = Vector256.Abs(x);
-                    Vector256<float> z = ExpOperator<float>.Invoke(y - Vector256.Create((float)Single_LOGV));
-                    return (Vector256.Create((float)Single_HALFV) * (z + (Vector256.Create((float)Single_INVV2) / z))).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    Vector256<double> x = t.AsDouble();
-
-                    Vector256<double> y = Vector256.Abs(x);
-                    Vector256<double> z = ExpOperator<double>.Invoke(y - Vector256.Create(Double_LOGV));
-                    return (Vector256.Create(Double_HALFV) * (z + (Vector256.Create(Double_INVV2) / z))).As<double, T>();
-                }
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> t)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    Vector512<float> x = t.AsSingle();
-
-                    Vector512<float> y = Vector512.Abs(x);
-                    Vector512<float> z = ExpOperator<float>.Invoke(y - Vector512.Create((float)Single_LOGV));
-                    return (Vector512.Create((float)Single_HALFV) * (z + (Vector512.Create((float)Single_INVV2) / z))).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    Vector512<double> x = t.AsDouble();
-
-                    Vector512<double> y = Vector512.Abs(x);
-                    Vector512<double> z = ExpOperator<double>.Invoke(y - Vector512.Create(Double_LOGV));
-                    return (Vector512.Create(Double_HALFV) * (z + (Vector512.Create(Double_INVV2) / z))).As<double, T>();
-                }
-            }
-        }
-
-        /// <summary>T.Sin(x)</summary>
-        internal readonly struct SinOperator<T> : IUnaryOperator<T, T>
-            where T : ITrigonometricFunctions<T>
-        {
-            // This code is based on `vrs4_sin` and `vrd2_sin` from amd/aocl-libm-ose
-            // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
-            //
-            // Licensed under the BSD 3-Clause "New" or "Revised" License
-            // See THIRD-PARTY-NOTICES.TXT for the full license text
-
-            // Implementation notes from amd/aocl-libm-ose:
-            // -----------------------------------------------------------------
-            // Convert given x into the form
-            // |x| = N * pi + f where N is an integer and f lies in [-pi/2,pi/2]
-            // N is obtained by : N = round(x/pi)
-            // f is obtained by : f = abs(x)-N*pi
-            // sin(x) = sin(N * pi + f) = sin(N * pi)*cos(f) + cos(N*pi)*sin(f)
-            // sin(x) = sign(x)*sin(f)*(-1)**N
-            //
-            // The term sin(f) can be approximated by using a polynomial
-
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Sin(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-            }
-        }
-
-        /// <summary>float.Sin(x)</summary>
-        private readonly struct SinOperatorSingle : IUnaryOperator<float, float>
-        {
-            internal const uint SignMask = 0x7FFFFFFFu;
-            internal const uint MaxVectorizedValue = 0x49800000u;
-            private const float AlmHuge = 1.2582912e7f;
-            private const float Pi_Tail1 = 8.742278e-8f;
-            private const float Pi_Tail2 = 3.430249e-15f;
-            private const float C1 = -0.16666657f;
-            private const float C2 = 0.0083330255f;
-            private const float C3 = -1.980742e-4f;
-            private const float C4 = 2.6019031e-6f;
-
-            public static bool Vectorizable => true;
-
-            public static float Invoke(float x) => float.Sin(x);
-
-            public static Vector128<float> Invoke(Vector128<float> x)
-            {
-                Vector128<uint> sign = x.AsUInt32() & Vector128.Create(~SignMask);
-                Vector128<uint> uxMasked = Vector128.Abs(x).AsUInt32();
-
-                if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue)))
-                {
-                    return ApplyScalar<SinOperatorSingle>(x);
-                }
-
-                Vector128<float> r = uxMasked.AsSingle();
-                Vector128<float> almHuge = Vector128.Create(AlmHuge);
-                Vector128<float> dn = (r * Vector128.Create(1 / float.Pi)) + almHuge;
-                Vector128<uint> odd = dn.AsUInt32() << 31;
-                dn -= almHuge;
-                Vector128<float> f = r + (dn * Vector128.Create(-float.Pi)) + (dn * Vector128.Create(Pi_Tail1)) + (dn * Vector128.Create(Pi_Tail2));
-
-                // POLY_EVAL_ODD_9
-                Vector128<float> f2 = f * f;
-                Vector128<float> f4 = f2 * f2;
-                Vector128<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1));
-                Vector128<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector128<float>.One);
-                Vector128<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C3), f2, Vector128.Create(C4) * f4);
-                Vector128<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
-                Vector128<float> poly = f * a3;
-
-                return (poly.AsUInt32() ^ sign ^ odd).AsSingle();
-            }
-
-            public static Vector256<float> Invoke(Vector256<float> x)
-            {
-                Vector256<uint> sign = x.AsUInt32() & Vector256.Create(~SignMask);
-                Vector256<uint> uxMasked = Vector256.Abs(x).AsUInt32();
-
-                if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue)))
-                {
-                    return ApplyScalar<SinOperatorSingle>(x);
-                }
-
-                Vector256<float> r = uxMasked.AsSingle();
-                Vector256<float> almHuge = Vector256.Create(AlmHuge);
-                Vector256<float> dn = (r * Vector256.Create(1 / float.Pi)) + almHuge;
-                Vector256<uint> odd = dn.AsUInt32() << 31;
-                dn -= almHuge;
-                Vector256<float> f = r + (dn * Vector256.Create(-float.Pi)) + (dn * Vector256.Create(Pi_Tail1)) + (dn * Vector256.Create(Pi_Tail2));
-
-                // POLY_EVAL_ODD_9
-                Vector256<float> f2 = f * f;
-                Vector256<float> f4 = f2 * f2;
-                Vector256<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1));
-                Vector256<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector256<float>.One);
-                Vector256<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C3), f2, Vector256.Create(C4) * f4);
-                Vector256<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
-                Vector256<float> poly = f * a3;
-
-                return (poly.AsUInt32() ^ sign ^ odd).AsSingle();
-            }
-
-            public static Vector512<float> Invoke(Vector512<float> x)
-            {
-                Vector512<uint> sign = x.AsUInt32() & Vector512.Create(~SignMask);
-                Vector512<uint> uxMasked = Vector512.Abs(x).AsUInt32();
-
-                if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue)))
-                {
-                    return ApplyScalar<SinOperatorSingle>(x);
-                }
-
-                Vector512<float> r = uxMasked.AsSingle();
-                Vector512<float> almHuge = Vector512.Create(AlmHuge);
-                Vector512<float> dn = (r * Vector512.Create(1 / float.Pi)) + almHuge;
-                Vector512<uint> odd = dn.AsUInt32() << 31;
-                dn -= almHuge;
-                Vector512<float> f = r + (dn * Vector512.Create(-float.Pi)) + (dn * Vector512.Create(Pi_Tail1)) + (dn * Vector512.Create(Pi_Tail2));
-
-                // POLY_EVAL_ODD_9
-                Vector512<float> f2 = f * f;
-                Vector512<float> f4 = f2 * f2;
-                Vector512<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
-                Vector512<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector512<float>.One);
-                Vector512<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C3), f2, Vector512.Create(C4) * f4);
-                Vector512<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
-                Vector512<float> poly = f * a3;
-
-                return (poly.AsUInt32() ^ sign ^ odd).AsSingle();
-            }
-        }
-
-        /// <summary>double.Sin(x)</summary>
-        private readonly struct SinOperatorDouble : IUnaryOperator<double, double>
-        {
-            internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul;
-            internal const ulong MaxVectorizedValue = 0x4160000000000000ul;
-            private const double AlmHuge = 6.755399441055744e15;
-            private const double Pi_Tail1 = 1.224646799147353e-16;
-            private const double Pi_Tail2 = 2.165713347843828e-32;
-            private const double C0 = -0.16666666666666666;
-            private const double C2 = 0.008333333333333165;
-            private const double C4 = -1.984126984120184e-4;
-            private const double C6 = 2.7557319210152756e-6;
-            private const double C8 = -2.5052106798274583e-8;
-            private const double C10 = 1.605893649037159e-10;
-            private const double C12 = -7.642917806891047e-13;
-            private const double C14 = 2.7204790957888847e-15;
-
-            public static bool Vectorizable => true;
-
-            public static double Invoke(double x) => double.Sin(x);
-
-            public static Vector128<double> Invoke(Vector128<double> x)
-            {
-                Vector128<ulong> sign = x.AsUInt64() & Vector128.Create(~SignMask);
-                Vector128<ulong> uxMasked = Vector128.Abs(x).AsUInt64();
-
-                if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue)))
-                {
-                    return ApplyScalar<SinOperatorDouble>(x);
-                }
-
-                Vector128<double> r = uxMasked.AsDouble();
-                Vector128<double> almHuge = Vector128.Create(AlmHuge);
-                Vector128<double> dn = (r * Vector128.Create(1 / double.Pi)) + almHuge;
-                Vector128<ulong> odd = dn.AsUInt64() << 63;
-                dn -= almHuge;
-                Vector128<double> f = r - (dn * Vector128.Create(double.Pi)) - (dn * Vector128.Create(Pi_Tail1)) - (dn * Vector128.Create(Pi_Tail2));
-
-                // POLY_EVAL_ODD_17
-                Vector128<double> f2 = f * f;
-                Vector128<double> f4 = f2 * f2;
-                Vector128<double> f6 = f4 * f2;
-                Vector128<double> f10 = f6 * f4;
-                Vector128<double> f14 = f10 * f4;
-                Vector128<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C0));
-                Vector128<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C6), f2, Vector128.Create(C4));
-                Vector128<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C10), f2, Vector128.Create(C8));
-                Vector128<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C14), f2, Vector128.Create(C12));
-                Vector128<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
-                Vector128<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
-                Vector128<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
-
-                return (poly.AsUInt64() ^ sign ^ odd).AsDouble();
-            }
-
-            public static Vector256<double> Invoke(Vector256<double> x)
-            {
-                Vector256<ulong> sign = x.AsUInt64() & Vector256.Create(~SignMask);
-                Vector256<ulong> uxMasked = Vector256.Abs(x).AsUInt64();
-
-                if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue)))
-                {
-                    return ApplyScalar<SinOperatorDouble>(x);
-                }
-
-                Vector256<double> r = uxMasked.AsDouble();
-                Vector256<double> almHuge = Vector256.Create(AlmHuge);
-                Vector256<double> dn = (r * Vector256.Create(1 / double.Pi)) + almHuge;
-                Vector256<ulong> odd = dn.AsUInt64() << 63;
-                dn -= almHuge;
-                Vector256<double> f = r - (dn * Vector256.Create(double.Pi)) - (dn * Vector256.Create(Pi_Tail1)) - (dn * Vector256.Create(Pi_Tail2));
-
-                // POLY_EVAL_ODD_17
-                Vector256<double> f2 = f * f;
-                Vector256<double> f4 = f2 * f2;
-                Vector256<double> f6 = f4 * f2;
-                Vector256<double> f10 = f6 * f4;
-                Vector256<double> f14 = f10 * f4;
-                Vector256<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C0));
-                Vector256<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C6), f2, Vector256.Create(C4));
-                Vector256<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C10), f2, Vector256.Create(C8));
-                Vector256<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C14), f2, Vector256.Create(C12));
-                Vector256<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
-                Vector256<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
-                Vector256<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
-
-                return (poly.AsUInt64() ^ sign ^ odd).AsDouble();
-            }
-
-            public static Vector512<double> Invoke(Vector512<double> x)
-            {
-                Vector512<ulong> sign = x.AsUInt64() & Vector512.Create(~SignMask);
-                Vector512<ulong> uxMasked = Vector512.Abs(x).AsUInt64();
-
-                if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue)))
-                {
-                    return ApplyScalar<SinOperatorDouble>(x);
-                }
-
-                Vector512<double> r = uxMasked.AsDouble();
-                Vector512<double> almHuge = Vector512.Create(AlmHuge);
-                Vector512<double> dn = (r * Vector512.Create(1 / double.Pi)) + almHuge;
-                Vector512<ulong> odd = dn.AsUInt64() << 63;
-                dn -= almHuge;
-                Vector512<double> f = r - (dn * Vector512.Create(double.Pi)) - (dn * Vector512.Create(Pi_Tail1)) - (dn * Vector512.Create(Pi_Tail2));
-
-                // POLY_EVAL_ODD_17
-                Vector512<double> f2 = f * f;
-                Vector512<double> f4 = f2 * f2;
-                Vector512<double> f6 = f4 * f2;
-                Vector512<double> f10 = f6 * f4;
-                Vector512<double> f14 = f10 * f4;
-                Vector512<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C0));
-                Vector512<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C6), f2, Vector512.Create(C4));
-                Vector512<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C10), f2, Vector512.Create(C8));
-                Vector512<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C14), f2, Vector512.Create(C12));
-                Vector512<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
-                Vector512<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
-                Vector512<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
-
-                return (poly.AsUInt64() ^ sign ^ odd).AsDouble();
-            }
-        }
-
-        /// <summary>T.SinPi(x)</summary>
-        internal readonly struct SinPiOperator<T> : IUnaryOperator<T, T>
-            where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.SinPi(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                Vector128<T> xpi = x * Vector128.Create(T.Pi);
-                if (typeof(T) == typeof(float))
-                {
-                    if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(SinOperatorSingle.SignMask), Vector128.Create(SinOperatorSingle.MaxVectorizedValue)))
-                    {
-                        return ApplyScalar<SinPiOperator<float>>(x.AsSingle()).As<float, T>();
-                    }
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    if (Vector128.GreaterThanAny(xpi.AsUInt64() & Vector128.Create(SinOperatorDouble.SignMask), Vector128.Create(SinOperatorDouble.MaxVectorizedValue)))
-                    {
-                        return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
-                    }
-                }
-
-                return SinOperator<T>.Invoke(xpi);
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                Vector256<T> xpi = x * Vector256.Create(T.Pi);
-                if (typeof(T) == typeof(float))
-                {
-                    if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(SinOperatorSingle.SignMask), Vector256.Create(SinOperatorSingle.MaxVectorizedValue)))
-                    {
-                        return ApplyScalar<SinPiOperator<float>>(x.AsSingle()).As<float, T>();
-                    }
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    if (Vector256.GreaterThanAny(xpi.AsUInt64() & Vector256.Create(SinOperatorDouble.SignMask), Vector256.Create(SinOperatorDouble.MaxVectorizedValue)))
-                    {
-                        return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
-                    }
-                }
-
-                return SinOperator<T>.Invoke(xpi);
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                Vector512<T> xpi = x * Vector512.Create(T.Pi);
-                if (typeof(T) == typeof(float))
-                {
-                    if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(SinOperatorSingle.SignMask), Vector512.Create(SinOperatorSingle.MaxVectorizedValue)))
-                    {
-                        return ApplyScalar<SinPiOperator<float>>(x.AsSingle()).As<float, T>();
-                    }
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    if (Vector512.GreaterThanAny(xpi.AsUInt64() & Vector512.Create(SinOperatorDouble.SignMask), Vector512.Create(SinOperatorDouble.MaxVectorizedValue)))
-                    {
-                        return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
-                    }
-                }
-
-                return SinOperator<T>.Invoke(xpi);
-            }
-        }
-
-        /// <summary>T.Sinh(x)</summary>
-        internal readonly struct SinhOperator<T> : IUnaryOperator<T, T>
-            where T : IHyperbolicFunctions<T>
-        {
-            // Same as cosh, but with `z -` rather than `z +`, and with the sign
-            // flipped on the result based on the sign of the input.
-
-            private const float Single_LOGV = 0.693161f;
-            private const float Single_HALFV = 1.0000138f;
-            private const float Single_INVV2 = 0.24999309f;
-
-            private const double Double_LOGV = 0.6931471805599453;
-            private const double Double_HALFV = 1.0;
-            private const double Double_INVV2 = 0.25;
-
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Sinh(x);
-
-            public static Vector128<T> Invoke(Vector128<T> t)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    Vector128<float> x = t.AsSingle();
-
-                    Vector128<float> y = Vector128.Abs(x);
-                    Vector128<float> z = ExpOperator<float>.Invoke(y - Vector128.Create((float)Single_LOGV));
-                    Vector128<float> result = Vector128.Create((float)Single_HALFV) * (z - (Vector128.Create((float)Single_INVV2) / z));
-                    Vector128<uint> sign = x.AsUInt32() & Vector128.Create(~(uint)int.MaxValue);
-                    return (sign ^ result.AsUInt32()).As<uint, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    Vector128<double> x = t.AsDouble();
-
-                    Vector128<double> y = Vector128.Abs(x);
-                    Vector128<double> z = ExpOperator<double>.Invoke(y - Vector128.Create(Double_LOGV));
-                    Vector128<double> result = Vector128.Create(Double_HALFV) * (z - (Vector128.Create(Double_INVV2) / z));
-                    Vector128<ulong> sign = x.AsUInt64() & Vector128.Create(~(ulong)long.MaxValue);
-                    return (sign ^ result.AsUInt64()).As<ulong, T>();
-                }
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> t)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    Vector256<float> x = t.AsSingle();
-
-                    Vector256<float> y = Vector256.Abs(x);
-                    Vector256<float> z = ExpOperator<float>.Invoke(y - Vector256.Create((float)Single_LOGV));
-                    Vector256<float> result = Vector256.Create((float)Single_HALFV) * (z - (Vector256.Create((float)Single_INVV2) / z));
-                    Vector256<uint> sign = x.AsUInt32() & Vector256.Create(~(uint)int.MaxValue);
-                    return (sign ^ result.AsUInt32()).As<uint, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    Vector256<double> x = t.AsDouble();
-
-                    Vector256<double> y = Vector256.Abs(x);
-                    Vector256<double> z = ExpOperator<double>.Invoke(y - Vector256.Create(Double_LOGV));
-                    Vector256<double> result = Vector256.Create(Double_HALFV) * (z - (Vector256.Create(Double_INVV2) / z));
-                    Vector256<ulong> sign = x.AsUInt64() & Vector256.Create(~(ulong)long.MaxValue);
-                    return (sign ^ result.AsUInt64()).As<ulong, T>();
-                }
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> t)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    Vector512<float> x = t.AsSingle();
-
-                    Vector512<float> y = Vector512.Abs(x);
-                    Vector512<float> z = ExpOperator<float>.Invoke(y - Vector512.Create((float)Single_LOGV));
-                    Vector512<float> result = Vector512.Create((float)Single_HALFV) * (z - (Vector512.Create((float)Single_INVV2) / z));
-                    Vector512<uint> sign = x.AsUInt32() & Vector512.Create(~(uint)int.MaxValue);
-                    return (sign ^ result.AsUInt32()).As<uint, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    Vector512<double> x = t.AsDouble();
-
-                    Vector512<double> y = Vector512.Abs(x);
-                    Vector512<double> z = ExpOperator<double>.Invoke(y - Vector512.Create(Double_LOGV));
-                    Vector512<double> result = Vector512.Create(Double_HALFV) * (z - (Vector512.Create(Double_INVV2) / z));
-                    Vector512<ulong> sign = x.AsUInt64() & Vector512.Create(~(ulong)long.MaxValue);
-                    return (sign ^ result.AsUInt64()).As<ulong, T>();
-                }
-            }
-        }
-
-        /// <summary>T.Tan(x)</summary>
-        internal readonly struct TanOperator<T> : IUnaryOperator<T, T>
-            where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => false; // TODO: Vectorize
-            public static T Invoke(T x) => T.Tan(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.TanPi(x)</summary>
-        internal readonly struct TanPiOperator<T> : IUnaryOperator<T, T>
-            where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => TanOperator<T>.Vectorizable;
-            public static T Invoke(T x) => T.TanPi(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => TanOperator<T>.Invoke(x * Vector128.Create(T.Pi));
-            public static Vector256<T> Invoke(Vector256<T> x) => TanOperator<T>.Invoke(x * Vector256.Create(T.Pi));
-            public static Vector512<T> Invoke(Vector512<T> x) => TanOperator<T>.Invoke(x * Vector512.Create(T.Pi));
-        }
-
-        /// <summary>T.Tanh(x)</summary>
-        internal readonly struct TanhOperator<T> : IUnaryOperator<T, T>
-            where T : IHyperbolicFunctions<T>
-        {
-            // This code is based on `vrs4_tanhf` from amd/aocl-libm-ose
-            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
-            //
-            // Licensed under the BSD 3-Clause "New" or "Revised" License
-            // See THIRD-PARTY-NOTICES.TXT for the full license text
-
-            // To compute vrs4_tanhf(v_f32x4_t x)
-            // Let y = |x|
-            // If 0 <= y < 0x1.154246p3
-            //    Let z = e^(-2.0 * y) - 1      -(1)
-            //
-            //    Using (1), tanhf(y) can be calculated as,
-            //    tanhf(y) = -z / (z + 2.0)
-            //
-            // For other cases, call scalar tanhf()
-            //
-            // If x < 0, then we use the identity
-            //    tanhf(-x) = -tanhf(x)
-
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Tanh(x);
-
-            public static Vector128<T> Invoke(Vector128<T> t)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    Vector128<float> x = t.AsSingle();
-
-                    Vector128<float> y = Vector128.Abs(x);
-                    Vector128<float> z = ExpM1Operator<float>.Invoke(Vector128.Create(-2f) * y);
-                    Vector128<uint> sign = x.AsUInt32() & Vector128.Create(~(uint)int.MaxValue);
-                    return (sign ^ (-z / (z + Vector128.Create(2f))).AsUInt32()).As<uint, T>();
-                }
-                else
-                {
-                    Vector128<double> x = t.AsDouble();
-
-                    Vector128<double> y = Vector128.Abs(x);
-                    Vector128<double> z = ExpM1Operator<double>.Invoke(Vector128.Create(-2d) * y);
-                    Vector128<ulong> sign = x.AsUInt64() & Vector128.Create(~(ulong)long.MaxValue);
-                    return (sign ^ (-z / (z + Vector128.Create(2d))).AsUInt64()).As<ulong, T>();
-                }
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> t)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    Vector256<float> x = t.AsSingle();
-
-                    Vector256<float> y = Vector256.Abs(x);
-                    Vector256<float> z = ExpM1Operator<float>.Invoke(Vector256.Create(-2f) * y);
-                    Vector256<uint> sign = x.AsUInt32() & Vector256.Create(~(uint)int.MaxValue);
-                    return (sign ^ (-z / (z + Vector256.Create(2f))).AsUInt32()).As<uint, T>();
-                }
-                else
-                {
-                    Vector256<double> x = t.AsDouble();
-
-                    Vector256<double> y = Vector256.Abs(x);
-                    Vector256<double> z = ExpM1Operator<double>.Invoke(Vector256.Create(-2d) * y);
-                    Vector256<ulong> sign = x.AsUInt64() & Vector256.Create(~(ulong)long.MaxValue);
-                    return (sign ^ (-z / (z + Vector256.Create(2d))).AsUInt64()).As<ulong, T>();
-                }
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> t)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    Vector512<float> x = t.AsSingle();
-
-                    Vector512<float> y = Vector512.Abs(x);
-                    Vector512<float> z = ExpM1Operator<float>.Invoke(Vector512.Create(-2f) * y);
-                    Vector512<uint> sign = x.AsUInt32() & Vector512.Create(~(uint)int.MaxValue);
-                    return (sign ^ (-z / (z + Vector512.Create(2f))).AsUInt32()).As<uint, T>();
-                }
-                else
-                {
-                    Vector512<double> x = t.AsDouble();
-
-                    Vector512<double> y = Vector512.Abs(x);
-                    Vector512<double> z = ExpM1Operator<double>.Invoke(Vector512.Create(-2d) * y);
-                    Vector512<ulong> sign = x.AsUInt64() & Vector512.Create(~(ulong)long.MaxValue);
-                    return (sign ^ (-z / (z + Vector512.Create(2d))).AsUInt64()).As<ulong, T>();
-                }
-            }
-        }
-
-        /// <summary>T.Log(x)</summary>
-        internal readonly struct LogOperator<T> : IUnaryOperator<T, T>
-            where T : ILogarithmicFunctions<T>
-        {
-            public static bool Vectorizable => (typeof(T) == typeof(double))
-                                            || (typeof(T) == typeof(float));
-
-            public static T Invoke(T x) => T.Log(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-#if NET9_0_OR_GREATER
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector128.Log(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return Vector128.Log(x.AsSingle()).As<float, T>();
-                }
-#else
-                if (typeof(T) == typeof(double))
-                {
-                    return LogOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return LogOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-#endif
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-#if NET9_0_OR_GREATER
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector256.Log(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return Vector256.Log(x.AsSingle()).As<float, T>();
-                }
-#else
-                if (typeof(T) == typeof(double))
-                {
-                    return LogOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return LogOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-#endif
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-#if NET9_0_OR_GREATER
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector512.Log(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return Vector512.Log(x.AsSingle()).As<float, T>();
-                }
-#else
-                if (typeof(T) == typeof(double))
-                {
-                    return LogOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return LogOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-#endif
-            }
-        }
-
-#if !NET9_0_OR_GREATER
-        /// <summary>double.Log(x)</summary>
-        internal readonly struct LogOperatorDouble : IUnaryOperator<double, double>
-        {
-            // This code is based on `vrd2_log` from amd/aocl-libm-ose
-            // Copyright (C) 2018-2020 Advanced Micro Devices, Inc. All rights reserved.
-            //
-            // Licensed under the BSD 3-Clause "New" or "Revised" License
-            // See THIRD-PARTY-NOTICES.TXT for the full license text
-
-            // Reduce x into the form:
-            //        x = (-1)^s*2^n*m
-            // s will be always zero, as log is defined for positive numbers
-            // n is an integer known as the exponent
-            // m is mantissa
-            //
-            // x is reduced such that the mantissa, m lies in [2/3,4/3]
-            //      x = 2^n*m where m is in [2/3,4/3]
-            //      log(x) = log(2^n*m)                 We have log(a*b) = log(a)+log(b)
-            //             = log(2^n) + log(m)          We have log(a^n) = n*log(a)
-            //             = n*log(2) + log(m)
-            //             = n*log(2) + log(1+(m-1))
-            //             = n*log(2) + log(1+f)        Where f = m-1
-            //             = n*log(2) + log1p(f)        f lies in [-1/3,+1/3]
-            //
-            // Thus we have :
-            // log(x) = n*log(2) + log1p(f)
-            // In the above, the first term n*log(2), n can be calculated by using right shift operator and the value of log(2)
-            // is known and is stored as a constant
-            // The second term log1p(F) is approximated by using a polynomial
-
-            private const ulong V_MIN = 0x00100000_00000000;    // SmallestNormal
-            private const ulong V_MAX = 0x7FF00000_00000000;    // +Infinity
-            private const ulong V_MSK = 0x000FFFFF_FFFFFFFF;    // (1 << 52) - 1
-            private const ulong V_OFF = 0x3FE55555_55555555;    // 2.0 / 3.0
-
-            private const double LN2_HEAD = 0.693359375;
-            private const double LN2_TAIL = -0.00021219444005469057;
-
-            private const double C02 = -0.499999999999999560;
-            private const double C03 = +0.333333333333414750;
-            private const double C04 = -0.250000000000297430;
-            private const double C05 = +0.199999999975985220;
-            private const double C06 = -0.166666666608919500;
-            private const double C07 = +0.142857145600277100;
-            private const double C08 = -0.125000005127831270;
-            private const double C09 = +0.111110952357159440;
-            private const double C10 = -0.099999750495501240;
-            private const double C11 = +0.090914349823462390;
-            private const double C12 = -0.083340600527551860;
-            private const double C13 = +0.076817603328311300;
-            private const double C14 = -0.071296718946287310;
-            private const double C15 = +0.067963465211535730;
-            private const double C16 = -0.063995035098960040;
-            private const double C17 = +0.049370587082412105;
-            private const double C18 = -0.045370170994891980;
-            private const double C19 = +0.088970636003577750;
-            private const double C20 = -0.086906174116908760;
-
-            public static bool Vectorizable => true;
-
-            public static double Invoke(double x) => double.Log(x);
-
-            public static Vector128<double> Invoke(Vector128<double> x)
-            {
-                Vector128<double> specialResult = x;
-
-                // x is zero, subnormal, infinity, or NaN
-                Vector128<ulong> specialMask = Vector128.GreaterThanOrEqual(x.AsUInt64() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN));
-
-                if (specialMask != Vector128<ulong>.Zero)
-                {
-                    Vector128<long> xBits = x.AsInt64();
-
-                    // (x < 0) ? float.NaN : x
-                    Vector128<double> lessThanZeroMask = Vector128.LessThan(xBits, Vector128<long>.Zero).AsDouble();
-
-                    specialResult = Vector128.ConditionalSelect(
-                        lessThanZeroMask,
-                        Vector128.Create(double.NaN),
-                        specialResult
-                    );
-
-                    // double.IsZero(x) ? double.NegativeInfinity : x
-                    Vector128<double> zeroMask = Vector128.Equals(xBits << 1, Vector128<long>.Zero).AsDouble();
-
-                    specialResult = Vector128.ConditionalSelect(
-                        zeroMask,
-                        Vector128.Create(double.NegativeInfinity),
-                        specialResult
-                    );
-
-                    // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x)
-                    Vector128<double> temp = zeroMask
-                                           | lessThanZeroMask
-                                           | Vector128.GreaterThanOrEqual(xBits, Vector128.Create(double.PositiveInfinity).AsInt64()).AsDouble();
-
-                    // subnormal
-                    Vector128<double> subnormalMask = Vector128.AndNot(specialMask.AsDouble(), temp);
-
-                    // multiply by 2^52, then normalize
-                    x = Vector128.ConditionalSelect(
-                        subnormalMask,
-                        ((x * 4503599627370496.0).AsUInt64() - Vector128.Create(52ul << 52)).AsDouble(),
-                        x
-                    );
-
-                    specialMask = temp.AsUInt64();
-                }
-
-                // Reduce the mantissa to [+2/3, +4/3]
-                Vector128<ulong> vx = x.AsUInt64() - Vector128.Create(V_OFF);
-                Vector128<double> n = Vector128.ConvertToDouble(vx.AsInt64() >> 52);
-                vx = (vx & Vector128.Create(V_MSK)) + Vector128.Create(V_OFF);
-
-                // Adjust the mantissa to [-1/3, +1/3]
-                Vector128<double> r = vx.AsDouble() - Vector128<double>.One;
-
-                Vector128<double> r02 = r * r;
-                Vector128<double> r04 = r02 * r02;
-                Vector128<double> r08 = r04 * r04;
-                Vector128<double> r16 = r08 * r08;
-
-                // Compute log(x + 1) using Polynomial approximation
-                //      C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
-
-                Vector128<double> poly = (((r04 * C20)
-                                        + ((((r * C19) + Vector128.Create(C18)) * r02)
-                                          + ((r * C17) + Vector128.Create(C16)))) * r16)
-                                     + (((((((r * C15) + Vector128.Create(C14)) * r02)
-                                          + ((r * C13) + Vector128.Create(C12))) * r04)
-                                        + ((((r * C11) + Vector128.Create(C10)) * r02)
-                                          + ((r * C09) + Vector128.Create(C08)))) * r08)
-                                       + (((((r * C07) + Vector128.Create(C06)) * r02)
-                                          + ((r * C05) + Vector128.Create(C04))) * r04)
-                                        + ((((r * C03) + Vector128.Create(C02)) * r02) + r);
-
-                return Vector128.ConditionalSelect(
-                    specialMask.AsDouble(),
-                    specialResult,
-                    (n * LN2_HEAD) + ((n * LN2_TAIL) + poly)
-                );
-            }
-
-            public static Vector256<double> Invoke(Vector256<double> x)
-            {
-                Vector256<double> specialResult = x;
-
-                // x is zero, subnormal, infinity, or NaN
-                Vector256<ulong> specialMask = Vector256.GreaterThanOrEqual(x.AsUInt64() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN));
-
-                if (specialMask != Vector256<ulong>.Zero)
-                {
-                    Vector256<long> xBits = x.AsInt64();
-
-                    // (x < 0) ? float.NaN : x
-                    Vector256<double> lessThanZeroMask = Vector256.LessThan(xBits, Vector256<long>.Zero).AsDouble();
-
-                    specialResult = Vector256.ConditionalSelect(
-                        lessThanZeroMask,
-                        Vector256.Create(double.NaN),
-                        specialResult
-                    );
-
-                    // double.IsZero(x) ? double.NegativeInfinity : x
-                    Vector256<double> zeroMask = Vector256.Equals(xBits << 1, Vector256<long>.Zero).AsDouble();
-
-                    specialResult = Vector256.ConditionalSelect(
-                        zeroMask,
-                        Vector256.Create(double.NegativeInfinity),
-                        specialResult
-                    );
-
-                    // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x)
-                    Vector256<double> temp = zeroMask
-                                           | lessThanZeroMask
-                                           | Vector256.GreaterThanOrEqual(xBits, Vector256.Create(double.PositiveInfinity).AsInt64()).AsDouble();
-
-                    // subnormal
-                    Vector256<double> subnormalMask = Vector256.AndNot(specialMask.AsDouble(), temp);
-
-                    // multiply by 2^52, then normalize
-                    x = Vector256.ConditionalSelect(
-                        subnormalMask,
-                        ((x * 4503599627370496.0).AsUInt64() - Vector256.Create(52ul << 52)).AsDouble(),
-                        x
-                    );
-
-                    specialMask = temp.AsUInt64();
-                }
-
-                // Reduce the mantissa to [+2/3, +4/3]
-                Vector256<ulong> vx = x.AsUInt64() - Vector256.Create(V_OFF);
-                Vector256<double> n = Vector256.ConvertToDouble(vx.AsInt64() >> 52);
-                vx = (vx & Vector256.Create(V_MSK)) + Vector256.Create(V_OFF);
-
-                // Adjust the mantissa to [-1/3, +1/3]
-                Vector256<double> r = vx.AsDouble() - Vector256<double>.One;
-
-                Vector256<double> r02 = r * r;
-                Vector256<double> r04 = r02 * r02;
-                Vector256<double> r08 = r04 * r04;
-                Vector256<double> r16 = r08 * r08;
-
-                // Compute log(x + 1) using Polynomial approximation
-                //      C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
-
-                Vector256<double> poly = (((r04 * C20)
-                                        + ((((r * C19) + Vector256.Create(C18)) * r02)
-                                          + ((r * C17) + Vector256.Create(C16)))) * r16)
-                                     + (((((((r * C15) + Vector256.Create(C14)) * r02)
-                                          + ((r * C13) + Vector256.Create(C12))) * r04)
-                                        + ((((r * C11) + Vector256.Create(C10)) * r02)
-                                          + ((r * C09) + Vector256.Create(C08)))) * r08)
-                                       + (((((r * C07) + Vector256.Create(C06)) * r02)
-                                          + ((r * C05) + Vector256.Create(C04))) * r04)
-                                        + ((((r * C03) + Vector256.Create(C02)) * r02) + r);
-
-                return Vector256.ConditionalSelect(
-                    specialMask.AsDouble(),
-                    specialResult,
-                    (n * LN2_HEAD) + ((n * LN2_TAIL) + poly)
-                );
-            }
-
-            public static Vector512<double> Invoke(Vector512<double> x)
-            {
-                Vector512<double> specialResult = x;
-
-                // x is zero, subnormal, infinity, or NaN
-                Vector512<ulong> specialMask = Vector512.GreaterThanOrEqual(x.AsUInt64() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN));
-
-                if (specialMask != Vector512<ulong>.Zero)
-                {
-                    Vector512<long> xBits = x.AsInt64();
-
-                    // (x < 0) ? float.NaN : x
-                    Vector512<double> lessThanZeroMask = Vector512.LessThan(xBits, Vector512<long>.Zero).AsDouble();
-
-                    specialResult = Vector512.ConditionalSelect(
-                        lessThanZeroMask,
-                        Vector512.Create(double.NaN),
-                        specialResult
-                    );
-
-                    // double.IsZero(x) ? double.NegativeInfinity : x
-                    Vector512<double> zeroMask = Vector512.Equals(xBits << 1, Vector512<long>.Zero).AsDouble();
-
-                    specialResult = Vector512.ConditionalSelect(
-                        zeroMask,
-                        Vector512.Create(double.NegativeInfinity),
-                        specialResult
-                    );
-
-                    // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x)
-                    Vector512<double> temp = zeroMask
-                                           | lessThanZeroMask
-                                           | Vector512.GreaterThanOrEqual(xBits, Vector512.Create(double.PositiveInfinity).AsInt64()).AsDouble();
-
-                    // subnormal
-                    Vector512<double> subnormalMask = Vector512.AndNot(specialMask.AsDouble(), temp);
-
-                    // multiply by 2^52, then normalize
-                    x = Vector512.ConditionalSelect(
-                        subnormalMask,
-                        ((x * 4503599627370496.0).AsUInt64() - Vector512.Create(52ul << 52)).AsDouble(),
-                        x
-                    );
-
-                    specialMask = temp.AsUInt64();
-                }
-
-                // Reduce the mantissa to [+2/3, +4/3]
-                Vector512<ulong> vx = x.AsUInt64() - Vector512.Create(V_OFF);
-                Vector512<double> n = Vector512.ConvertToDouble(vx.AsInt64() >> 52);
-                vx = (vx & Vector512.Create(V_MSK)) + Vector512.Create(V_OFF);
-
-                // Adjust the mantissa to [-1/3, +1/3]
-                Vector512<double> r = vx.AsDouble() - Vector512<double>.One;
-
-                Vector512<double> r02 = r * r;
-                Vector512<double> r04 = r02 * r02;
-                Vector512<double> r08 = r04 * r04;
-                Vector512<double> r16 = r08 * r08;
-
-                // Compute log(x + 1) using Polynomial approximation
-                //      C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
-
-                Vector512<double> poly = (((r04 * C20)
-                                        + ((((r * C19) + Vector512.Create(C18)) * r02)
-                                          + ((r * C17) + Vector512.Create(C16)))) * r16)
-                                     + (((((((r * C15) + Vector512.Create(C14)) * r02)
-                                          + ((r * C13) + Vector512.Create(C12))) * r04)
-                                        + ((((r * C11) + Vector512.Create(C10)) * r02)
-                                          + ((r * C09) + Vector512.Create(C08)))) * r08)
-                                       + (((((r * C07) + Vector512.Create(C06)) * r02)
-                                          + ((r * C05) + Vector512.Create(C04))) * r04)
-                                        + ((((r * C03) + Vector512.Create(C02)) * r02) + r);
-
-                return Vector512.ConditionalSelect(
-                    specialMask.AsDouble(),
-                    specialResult,
-                    (n * LN2_HEAD) + ((n * LN2_TAIL) + poly)
-                );
-            }
-        }
-
-        /// <summary>float.Log(x)</summary>
-        internal readonly struct LogOperatorSingle : IUnaryOperator<float, float>
-        {
-            // This code is based on `vrs4_logf` from amd/aocl-libm-ose
-            // Copyright (C) 2018-2019 Advanced Micro Devices, Inc. All rights reserved.
-            //
-            // Licensed under the BSD 3-Clause "New" or "Revised" License
-            // See THIRD-PARTY-NOTICES.TXT for the full license text
-
-            // Spec:
-            //   logf(x)
-            //          = logf(x)           if x ∈ F and x > 0
-            //          = x                 if x = qNaN
-            //          = 0                 if x = 1
-            //          = -inf              if x = (-0, 0}
-            //          = NaN               otherwise
-            //
-            // Assumptions/Expectations
-            //      - ULP is derived to be << 4 (always)
-            // - Some FPU Exceptions may not be available
-            //      - Performance is at least 3x
-            //
-            // Implementation Notes:
-            //  1. Range Reduction:
-            //      x = 2^n*(1+f)                                          .... (1)
-            //         where n is exponent and is an integer
-            //             (1+f) is mantissa ∈ [1,2). i.e., 1 ≤ 1+f < 2    .... (2)
-            //
-            //    From (1), taking log on both sides
-            //      log(x) = log(2^n * (1+f))
-            //             = log(2^n) + log(1+f)
-            //             = n*log(2) + log(1+f)                           .... (3)
-            //
-            //      let z = 1 + f
-            //             log(z) = log(k) + log(z) - log(k)
-            //             log(z) = log(kz) - log(k)
-            //
-            //    From (2), range of z is [1, 2)
-            //       by simply dividing range by 'k', z is in [1/k, 2/k)  .... (4)
-            //       Best choice of k is the one which gives equal and opposite values
-            //       at extrema        +-      -+
-            //              1          | 2      |
-            //             --- - 1 = - |--- - 1 |
-            //              k          | k      |                         .... (5)
-            //                         +-      -+
-            //
-            //       Solving for k, k = 3/2,
-            //    From (4), using 'k' value, range is therefore [-0.3333, 0.3333]
-            //
-            //  2. Polynomial Approximation:
-            //     More information refer to tools/sollya/vrs4_logf.sollya
-            //
-            //     7th Deg -   Error abs: 0x1.04c4ac98p-22   rel: 0x1.2216e6f8p-19
-            //     6th Deg -   Error abs: 0x1.179e97d8p-19   rel: 0x1.db676c1p-17
-
-            private const uint V_MIN = 0x00800000;
-            private const uint V_MAX = 0x7F800000;
-            private const uint V_MASK = 0x007FFFFF;
-            private const uint V_OFF = 0x3F2AAAAB;
-
-            private const float V_LN2 = 0.6931472f;
-
-            private const float C0 = 0.0f;
-            private const float C1 = 1.0f;
-            private const float C2 = -0.5000001f;
-            private const float C3 = 0.33332965f;
-            private const float C4 = -0.24999046f;
-            private const float C5 = 0.20018855f;
-            private const float C6 = -0.16700386f;
-            private const float C7 = 0.13902695f;
-            private const float C8 = -0.1197452f;
-            private const float C9 = 0.14401625f;
-            private const float C10 = -0.13657966f;
-
-            public static bool Vectorizable => true;
-
-            public static float Invoke(float x) => float.Log(x);
-
-            public static Vector128<float> Invoke(Vector128<float> x)
-            {
-                Vector128<float> specialResult = x;
-
-                // x is subnormal or infinity or NaN
-                Vector128<uint> specialMask = Vector128.GreaterThanOrEqual(x.AsUInt32() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN));
-
-                if (specialMask != Vector128<uint>.Zero)
-                {
-                    // float.IsZero(x) ? float.NegativeInfinity : x
-                    Vector128<float> zeroMask = Vector128.Equals(x, Vector128<float>.Zero);
-
-                    specialResult = Vector128.ConditionalSelect(
-                        zeroMask,
-                        Vector128.Create(float.NegativeInfinity),
-                        specialResult
-                    );
-
-                    // (x < 0) ? float.NaN : x
-                    Vector128<float> lessThanZeroMask = Vector128.LessThan(x, Vector128<float>.Zero);
-
-                    specialResult = Vector128.ConditionalSelect(
-                        lessThanZeroMask,
-                        Vector128.Create(float.NaN),
-                        specialResult
-                    );
-
-                    // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x)
-                    Vector128<float> temp = zeroMask
-                                          | lessThanZeroMask
-                                          | ~Vector128.Equals(x, x)
-                                          | Vector128.Equals(x, Vector128.Create(float.PositiveInfinity));
-
-                    // subnormal
-                    Vector128<float> subnormalMask = Vector128.AndNot(specialMask.AsSingle(), temp);
-
-                    x = Vector128.ConditionalSelect(
-                        subnormalMask,
-                        ((x * 8388608.0f).AsUInt32() - Vector128.Create(23u << 23)).AsSingle(),
-                        x
-                    );
-
-                    specialMask = temp.AsUInt32();
-                }
-
-                Vector128<uint> vx = x.AsUInt32() - Vector128.Create(V_OFF);
-                Vector128<float> n = Vector128.ConvertToSingle(Vector128.ShiftRightArithmetic(vx.AsInt32(), 23));
-
-                vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF);
-
-                Vector128<float> r = vx.AsSingle() - Vector128<float>.One;
-
-                Vector128<float> r2 = r * r;
-                Vector128<float> r4 = r2 * r2;
-                Vector128<float> r8 = r4 * r4;
-
-                Vector128<float> q = (Vector128.Create(C10) * r2 + (Vector128.Create(C9) * r + Vector128.Create(C8)))
-                                                          * r8 + (((Vector128.Create(C7) * r + Vector128.Create(C6))
-                                                            * r2 + (Vector128.Create(C5) * r + Vector128.Create(C4)))
-                                                           * r4 + ((Vector128.Create(C3) * r + Vector128.Create(C2))
-                                                            * r2 + (Vector128.Create(C1) * r + Vector128.Create(C0))));
-
-                return Vector128.ConditionalSelect(
-                    specialMask.AsSingle(),
-                    specialResult,
-                    n * Vector128.Create(V_LN2) + q
-                );
-            }
-
-            public static Vector256<float> Invoke(Vector256<float> x)
-            {
-                Vector256<float> specialResult = x;
-
-                // x is subnormal or infinity or NaN
-                Vector256<uint> specialMask = Vector256.GreaterThanOrEqual(x.AsUInt32() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN));
-
-                if (specialMask != Vector256<uint>.Zero)
-                {
-                    // float.IsZero(x) ? float.NegativeInfinity : x
-                    Vector256<float> zeroMask = Vector256.Equals(x, Vector256<float>.Zero);
-
-                    specialResult = Vector256.ConditionalSelect(
-                        zeroMask,
-                        Vector256.Create(float.NegativeInfinity),
-                        specialResult
-                    );
-
-                    // (x < 0) ? float.NaN : x
-                    Vector256<float> lessThanZeroMask = Vector256.LessThan(x, Vector256<float>.Zero);
-
-                    specialResult = Vector256.ConditionalSelect(
-                        lessThanZeroMask,
-                        Vector256.Create(float.NaN),
-                        specialResult
-                    );
-
-                    // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x)
-                    Vector256<float> temp = zeroMask
-                                          | lessThanZeroMask
-                                          | ~Vector256.Equals(x, x)
-                                          | Vector256.Equals(x, Vector256.Create(float.PositiveInfinity));
-
-                    // subnormal
-                    Vector256<float> subnormalMask = Vector256.AndNot(specialMask.AsSingle(), temp);
-
-                    x = Vector256.ConditionalSelect(
-                        subnormalMask,
-                        ((x * 8388608.0f).AsUInt32() - Vector256.Create(23u << 23)).AsSingle(),
-                        x
-                    );
-
-                    specialMask = temp.AsUInt32();
-                }
-
-                Vector256<uint> vx = x.AsUInt32() - Vector256.Create(V_OFF);
-                Vector256<float> n = Vector256.ConvertToSingle(Vector256.ShiftRightArithmetic(vx.AsInt32(), 23));
-
-                vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF);
-
-                Vector256<float> r = vx.AsSingle() - Vector256<float>.One;
-
-                Vector256<float> r2 = r * r;
-                Vector256<float> r4 = r2 * r2;
-                Vector256<float> r8 = r4 * r4;
-
-                Vector256<float> q = (Vector256.Create(C10) * r2 + (Vector256.Create(C9) * r + Vector256.Create(C8)))
-                                                          * r8 + (((Vector256.Create(C7) * r + Vector256.Create(C6))
-                                                            * r2 + (Vector256.Create(C5) * r + Vector256.Create(C4)))
-                                                           * r4 + ((Vector256.Create(C3) * r + Vector256.Create(C2))
-                                                            * r2 + (Vector256.Create(C1) * r + Vector256.Create(C0))));
-
-                return Vector256.ConditionalSelect(
-                    specialMask.AsSingle(),
-                    specialResult,
-                    n * Vector256.Create(V_LN2) + q
-                );
-            }
-
-            public static Vector512<float> Invoke(Vector512<float> x)
-            {
-                Vector512<float> specialResult = x;
-
-                // x is subnormal or infinity or NaN
-                Vector512<uint> specialMask = Vector512.GreaterThanOrEqual(x.AsUInt32() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN));
-
-                if (specialMask != Vector512<uint>.Zero)
-                {
-                    // float.IsZero(x) ? float.NegativeInfinity : x
-                    Vector512<float> zeroMask = Vector512.Equals(x, Vector512<float>.Zero);
-
-                    specialResult = Vector512.ConditionalSelect(
-                        zeroMask,
-                        Vector512.Create(float.NegativeInfinity),
-                        specialResult
-                    );
-
-                    // (x < 0) ? float.NaN : x
-                    Vector512<float> lessThanZeroMask = Vector512.LessThan(x, Vector512<float>.Zero);
-
-                    specialResult = Vector512.ConditionalSelect(
-                        lessThanZeroMask,
-                        Vector512.Create(float.NaN),
-                        specialResult
-                    );
-
-                    // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x)
-                    Vector512<float> temp = zeroMask
-                                          | lessThanZeroMask
-                                          | ~Vector512.Equals(x, x)
-                                          | Vector512.Equals(x, Vector512.Create(float.PositiveInfinity));
-
-                    // subnormal
-                    Vector512<float> subnormalMask = Vector512.AndNot(specialMask.AsSingle(), temp);
-
-                    x = Vector512.ConditionalSelect(
-                        subnormalMask,
-                        ((x * 8388608.0f).AsUInt32() - Vector512.Create(23u << 23)).AsSingle(),
-                        x
-                    );
-
-                    specialMask = temp.AsUInt32();
-                }
-
-                Vector512<uint> vx = x.AsUInt32() - Vector512.Create(V_OFF);
-                Vector512<float> n = Vector512.ConvertToSingle(Vector512.ShiftRightArithmetic(vx.AsInt32(), 23));
-
-                vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF);
-
-                Vector512<float> r = vx.AsSingle() - Vector512<float>.One;
-
-                Vector512<float> r2 = r * r;
-                Vector512<float> r4 = r2 * r2;
-                Vector512<float> r8 = r4 * r4;
-
-                Vector512<float> q = (Vector512.Create(C10) * r2 + (Vector512.Create(C9) * r + Vector512.Create(C8)))
-                                                          * r8 + (((Vector512.Create(C7) * r + Vector512.Create(C6))
-                                                            * r2 + (Vector512.Create(C5) * r + Vector512.Create(C4)))
-                                                           * r4 + ((Vector512.Create(C3) * r + Vector512.Create(C2))
-                                                            * r2 + (Vector512.Create(C1) * r + Vector512.Create(C0))));
-
-                return Vector512.ConditionalSelect(
-                    specialMask.AsSingle(),
-                    specialResult,
-                    n * Vector512.Create(V_LN2) + q
-                );
-            }
-        }
-#endif
-
-        /// <summary>T.Log2(x)</summary>
-        internal readonly struct Log2Operator<T> : IUnaryOperator<T, T>
-            where T : ILogarithmicFunctions<T>
-        {
-            public static bool Vectorizable => (typeof(T) == typeof(double))
-                                            || (typeof(T) == typeof(float));
-
-            public static T Invoke(T x) => T.Log2(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-#if NET9_0_OR_GREATER
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector128.Log2(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return Vector128.Log2(x.AsSingle()).As<float, T>();
-                }
-#else
-                if (typeof(T) == typeof(double))
-                {
-                    return Log2OperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return Log2OperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-#endif
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-#if NET9_0_OR_GREATER
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector256.Log2(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return Vector256.Log2(x.AsSingle()).As<float, T>();
-                }
-#else
-                if (typeof(T) == typeof(double))
-                {
-                    return Log2OperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return Log2OperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-#endif
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-#if NET9_0_OR_GREATER
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector512.Log2(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return Vector512.Log2(x.AsSingle()).As<float, T>();
-                }
-#else
-                if (typeof(T) == typeof(double))
-                {
-                    return Log2OperatorDouble.Invoke(x.AsDouble()).As<double, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    return Log2OperatorSingle.Invoke(x.AsSingle()).As<float, T>();
-                }
-#endif
-            }
-        }
-
-#if !NET9_0_OR_GREATER
-        /// <summary>double.Log2(x)</summary>
-        internal readonly struct Log2OperatorDouble : IUnaryOperator<double, double>
-        {
-            // This code is based on `vrd2_log2` from amd/aocl-libm-ose
-            // Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved.
-            //
-            // Licensed under the BSD 3-Clause "New" or "Revised" License
-            // See THIRD-PARTY-NOTICES.TXT for the full license text
-
-            // Reduce x into the form:
-            //        x = (-1)^s*2^n*m
-            // s will be always zero, as log is defined for positive numbers
-            // n is an integer known as the exponent
-            // m is mantissa
-            //
-            // x is reduced such that the mantissa, m lies in [2/3,4/3]
-            //      x = 2^n*m where m is in [2/3,4/3]
-            //      log2(x) = log2(2^n*m)              We have log(a*b) = log(a)+log(b)
-            //             = log2(2^n) + log2(m)       We have log(a^n) = n*log(a)
-            //             = n + log2(m)
-            //             = n + log2(1+(m-1))
-            //             = n + ln(1+f) * log2(e)          Where f = m-1
-            //             = n + log1p(f) * log2(e)         f lies in [-1/3,+1/3]
-            //
-            // Thus we have :
-            // log(x) = n + log1p(f) * log2(e)
-            // The second term log1p(F) is approximated by using a polynomial
-
-            private const ulong V_MIN = 0x00100000_00000000;    // SmallestNormal
-            private const ulong V_MAX = 0x7FF00000_00000000;    // +Infinity
-            private const ulong V_MSK = 0x000FFFFF_FFFFFFFF;    // (1 << 52) - 1
-            private const ulong V_OFF = 0x3FE55555_55555555;    // 2.0 / 3.0
-
-            private const double LN2_HEAD = 1.44269180297851562500E+00;
-            private const double LN2_TAIL = 3.23791044778235969970E-06;
-
-            private const double C02 = -0.499999999999999560;
-            private const double C03 = +0.333333333333414750;
-            private const double C04 = -0.250000000000297430;
-            private const double C05 = +0.199999999975985220;
-            private const double C06 = -0.166666666608919500;
-            private const double C07 = +0.142857145600277100;
-            private const double C08 = -0.125000005127831270;
-            private const double C09 = +0.111110952357159440;
-            private const double C10 = -0.099999750495501240;
-            private const double C11 = +0.090914349823462390;
-            private const double C12 = -0.083340600527551860;
-            private const double C13 = +0.076817603328311300;
-            private const double C14 = -0.071296718946287310;
-            private const double C15 = +0.067963465211535730;
-            private const double C16 = -0.063995035098960040;
-            private const double C17 = +0.049370587082412105;
-            private const double C18 = -0.045370170994891980;
-            private const double C19 = +0.088970636003577750;
-            private const double C20 = -0.086906174116908760;
-
-            public static bool Vectorizable => true;
-
-            public static double Invoke(double x) => double.Log2(x);
-
-            public static Vector128<double> Invoke(Vector128<double> x)
-            {
-                Vector128<double> specialResult = x;
-
-                // x is zero, subnormal, infinity, or NaN
-                Vector128<ulong> specialMask = Vector128.GreaterThanOrEqual(x.AsUInt64() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN));
-
-                if (specialMask != Vector128<ulong>.Zero)
-                {
-                    Vector128<long> xBits = x.AsInt64();
-
-                    // (x < 0) ? float.NaN : x
-                    Vector128<double> lessThanZeroMask = Vector128.LessThan(xBits, Vector128<long>.Zero).AsDouble();
-
-                    specialResult = Vector128.ConditionalSelect(
-                        lessThanZeroMask,
-                        Vector128.Create(double.NaN),
-                        specialResult
-                    );
-
-                    // double.IsZero(x) ? double.NegativeInfinity : x
-                    Vector128<double> zeroMask = Vector128.Equals(xBits << 1, Vector128<long>.Zero).AsDouble();
-
-                    specialResult = Vector128.ConditionalSelect(
-                        zeroMask,
-                        Vector128.Create(double.NegativeInfinity),
-                        specialResult
-                    );
-
-                    // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x)
-                    Vector128<double> temp = zeroMask
-                                           | lessThanZeroMask
-                                           | Vector128.GreaterThanOrEqual(xBits, Vector128.Create(double.PositiveInfinity).AsInt64()).AsDouble();
-
-                    // subnormal
-                    Vector128<double> subnormalMask = Vector128.AndNot(specialMask.AsDouble(), temp);
-
-                    // multiply by 2^52, then normalize
-                    x = Vector128.ConditionalSelect(
-                        subnormalMask,
-                        ((x * 4503599627370496.0).AsUInt64() - Vector128.Create(52ul << 52)).AsDouble(),
-                        x
-                    );
-
-                    specialMask = temp.AsUInt64();
-                }
-
-                // Reduce the mantissa to [+2/3, +4/3]
-                Vector128<ulong> vx = x.AsUInt64() - Vector128.Create(V_OFF);
-                Vector128<double> n = Vector128.ConvertToDouble(vx.AsInt64() >> 52);
-                vx = (vx & Vector128.Create(V_MSK)) + Vector128.Create(V_OFF);
-
-                // Adjust the mantissa to [-1/3, +1/3]
-                Vector128<double> r = vx.AsDouble() - Vector128<double>.One;
-
-                Vector128<double> r02 = r * r;
-                Vector128<double> r04 = r02 * r02;
-                Vector128<double> r08 = r04 * r04;
-                Vector128<double> r16 = r08 * r08;
-
-                // Compute log(x + 1) using polynomial approximation
-                //      C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
-
-                Vector128<double> poly = (((r04 * C20)
-                                        + ((((r * C19) + Vector128.Create(C18)) * r02)
-                                          + ((r * C17) + Vector128.Create(C16)))) * r16)
-                                     + (((((((r * C15) + Vector128.Create(C14)) * r02)
-                                          + ((r * C13) + Vector128.Create(C12))) * r04)
-                                        + ((((r * C11) + Vector128.Create(C10)) * r02)
-                                          + ((r * C09) + Vector128.Create(C08)))) * r08)
-                                       + (((((r * C07) + Vector128.Create(C06)) * r02)
-                                          + ((r * C05) + Vector128.Create(C04))) * r04)
-                                        + ((((r * C03) + Vector128.Create(C02)) * r02) + r);
-
-                return Vector128.ConditionalSelect(
-                    specialMask.AsDouble(),
-                    specialResult,
-                    (poly * LN2_HEAD) + ((poly * LN2_TAIL) + n)
-                );
-            }
-
-            public static Vector256<double> Invoke(Vector256<double> x)
-            {
-                Vector256<double> specialResult = x;
-
-                // x is zero, subnormal, infinity, or NaN
-                Vector256<ulong> specialMask = Vector256.GreaterThanOrEqual(x.AsUInt64() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN));
-
-                if (specialMask != Vector256<ulong>.Zero)
-                {
-                    Vector256<long> xBits = x.AsInt64();
-
-                    // (x < 0) ? float.NaN : x
-                    Vector256<double> lessThanZeroMask = Vector256.LessThan(xBits, Vector256<long>.Zero).AsDouble();
-
-                    specialResult = Vector256.ConditionalSelect(
-                        lessThanZeroMask,
-                        Vector256.Create(double.NaN),
-                        specialResult
-                    );
-
-                    // double.IsZero(x) ? double.NegativeInfinity : x
-                    Vector256<double> zeroMask = Vector256.Equals(xBits << 1, Vector256<long>.Zero).AsDouble();
-
-                    specialResult = Vector256.ConditionalSelect(
-                        zeroMask,
-                        Vector256.Create(double.NegativeInfinity),
-                        specialResult
-                    );
-
-                    // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x)
-                    Vector256<double> temp = zeroMask
-                                           | lessThanZeroMask
-                                           | Vector256.GreaterThanOrEqual(xBits, Vector256.Create(double.PositiveInfinity).AsInt64()).AsDouble();
-
-                    // subnormal
-                    Vector256<double> subnormalMask = Vector256.AndNot(specialMask.AsDouble(), temp);
-
-                    // multiply by 2^52, then normalize
-                    x = Vector256.ConditionalSelect(
-                        subnormalMask,
-                        ((x * 4503599627370496.0).AsUInt64() - Vector256.Create(52ul << 52)).AsDouble(),
-                        x
-                    );
-
-                    specialMask = temp.AsUInt64();
-                }
-
-                // Reduce the mantissa to [+2/3, +4/3]
-                Vector256<ulong> vx = x.AsUInt64() - Vector256.Create(V_OFF);
-                Vector256<double> n = Vector256.ConvertToDouble(vx.AsInt64() >> 52);
-                vx = (vx & Vector256.Create(V_MSK)) + Vector256.Create(V_OFF);
-
-                // Adjust the mantissa to [-1/3, +1/3]
-                Vector256<double> r = vx.AsDouble() - Vector256<double>.One;
-
-                Vector256<double> r02 = r * r;
-                Vector256<double> r04 = r02 * r02;
-                Vector256<double> r08 = r04 * r04;
-                Vector256<double> r16 = r08 * r08;
-
-                // Compute log(x + 1) using polynomial approximation
-                //      C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
-
-                Vector256<double> poly = (((r04 * C20)
-                                        + ((((r * C19) + Vector256.Create(C18)) * r02)
-                                          + ((r * C17) + Vector256.Create(C16)))) * r16)
-                                     + (((((((r * C15) + Vector256.Create(C14)) * r02)
-                                          + ((r * C13) + Vector256.Create(C12))) * r04)
-                                        + ((((r * C11) + Vector256.Create(C10)) * r02)
-                                          + ((r * C09) + Vector256.Create(C08)))) * r08)
-                                       + (((((r * C07) + Vector256.Create(C06)) * r02)
-                                          + ((r * C05) + Vector256.Create(C04))) * r04)
-                                        + ((((r * C03) + Vector256.Create(C02)) * r02) + r);
-
-                return Vector256.ConditionalSelect(
-                    specialMask.AsDouble(),
-                    specialResult,
-                    (poly * LN2_HEAD) + ((poly * LN2_TAIL) + n)
-                );
-            }
-
-            public static Vector512<double> Invoke(Vector512<double> x)
-            {
-                Vector512<double> specialResult = x;
-
-                // x is zero, subnormal, infinity, or NaN
-                Vector512<ulong> specialMask = Vector512.GreaterThanOrEqual(x.AsUInt64() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN));
-
-                if (specialMask != Vector512<ulong>.Zero)
-                {
-                    Vector512<long> xBits = x.AsInt64();
-
-                    // (x < 0) ? float.NaN : x
-                    Vector512<double> lessThanZeroMask = Vector512.LessThan(xBits, Vector512<long>.Zero).AsDouble();
-
-                    specialResult = Vector512.ConditionalSelect(
-                        lessThanZeroMask,
-                        Vector512.Create(double.NaN),
-                        specialResult
-                    );
-
-                    // double.IsZero(x) ? double.NegativeInfinity : x
-                    Vector512<double> zeroMask = Vector512.Equals(xBits << 1, Vector512<long>.Zero).AsDouble();
-
-                    specialResult = Vector512.ConditionalSelect(
-                        zeroMask,
-                        Vector512.Create(double.NegativeInfinity),
-                        specialResult
-                    );
-
-                    // double.IsZero(x) | (x < 0) | double.IsNaN(x) | double.IsPositiveInfinity(x)
-                    Vector512<double> temp = zeroMask
-                                           | lessThanZeroMask
-                                           | Vector512.GreaterThanOrEqual(xBits, Vector512.Create(double.PositiveInfinity).AsInt64()).AsDouble();
-
-                    // subnormal
-                    Vector512<double> subnormalMask = Vector512.AndNot(specialMask.AsDouble(), temp);
-
-                    // multiply by 2^52, then normalize
-                    x = Vector512.ConditionalSelect(
-                        subnormalMask,
-                        ((x * 4503599627370496.0).AsUInt64() - Vector512.Create(52ul << 52)).AsDouble(),
-                        x
-                    );
-
-                    specialMask = temp.AsUInt64();
-                }
-
-                // Reduce the mantissa to [+2/3, +4/3]
-                Vector512<ulong> vx = x.AsUInt64() - Vector512.Create(V_OFF);
-                Vector512<double> n = Vector512.ConvertToDouble(vx.AsInt64() >> 52);
-                vx = (vx & Vector512.Create(V_MSK)) + Vector512.Create(V_OFF);
-
-                // Adjust the mantissa to [-1/3, +1/3]
-                Vector512<double> r = vx.AsDouble() - Vector512<double>.One;
-
-                Vector512<double> r02 = r * r;
-                Vector512<double> r04 = r02 * r02;
-                Vector512<double> r08 = r04 * r04;
-                Vector512<double> r16 = r08 * r08;
-
-                // Compute log(x + 1) using polynomial approximation
-                //      C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
-
-                Vector512<double> poly = (((r04 * C20)
-                                        + ((((r * C19) + Vector512.Create(C18)) * r02)
-                                          + ((r * C17) + Vector512.Create(C16)))) * r16)
-                                     + (((((((r * C15) + Vector512.Create(C14)) * r02)
-                                          + ((r * C13) + Vector512.Create(C12))) * r04)
-                                        + ((((r * C11) + Vector512.Create(C10)) * r02)
-                                          + ((r * C09) + Vector512.Create(C08)))) * r08)
-                                       + (((((r * C07) + Vector512.Create(C06)) * r02)
-                                          + ((r * C05) + Vector512.Create(C04))) * r04)
-                                        + ((((r * C03) + Vector512.Create(C02)) * r02) + r);
-
-                return Vector512.ConditionalSelect(
-                    specialMask.AsDouble(),
-                    specialResult,
-                    (poly * LN2_HEAD) + ((poly * LN2_TAIL) + n)
-                );
-            }
-        }
-
-        /// <summary>float.Log2(x)</summary>
-        internal readonly struct Log2OperatorSingle : IUnaryOperator<float, float>
-        {
-            // This code is based on `vrs4_log2f` from amd/aocl-libm-ose
-            // Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved.
-            //
-            // Licensed under the BSD 3-Clause "New" or "Revised" License
-            // See THIRD-PARTY-NOTICES.TXT for the full license text
-
-            // Spec:
-            //   log2f(x)
-            //          = log2f(x)          if x ∈ F and x > 0
-            //          = x                 if x = qNaN
-            //          = 0                 if x = 1
-            //          = -inf              if x = (-0, 0}
-            //          = NaN               otherwise
-            //
-            // Assumptions/Expectations
-            //      - Maximum ULP is observed to be at 4
-            //      - Some FPU Exceptions may not be available
-            //      - Performance is at least 3x
-            //
-            // Implementation Notes:
-            //  1. Range Reduction:
-            //      x = 2^n*(1+f)                                          .... (1)
-            //         where n is exponent and is an integer
-            //             (1+f) is mantissa ∈ [1,2). i.e., 1 ≤ 1+f < 2    .... (2)
-            //
-            //    From (1), taking log on both sides
-            //      log2(x) = log2(2^n * (1+f))
-            //             = n + log2(1+f)                           .... (3)
-            //
-            //      let z = 1 + f
-            //             log2(z) = log2(k) + log2(z) - log2(k)
-            //             log2(z) = log2(kz) - log2(k)
-            //
-            //    From (2), range of z is [1, 2)
-            //       by simply dividing range by 'k', z is in [1/k, 2/k)  .... (4)
-            //       Best choice of k is the one which gives equal and opposite values
-            //       at extrema        +-      -+
-            //              1          | 2      |
-            //             --- - 1 = - |--- - 1 |
-            //              k          | k      |                         .... (5)
-            //                         +-      -+
-            //
-            //       Solving for k, k = 3/2,
-            //    From (4), using 'k' value, range is therefore [-0.3333, 0.3333]
-            //
-            //  2. Polynomial Approximation:
-            //     More information refer to tools/sollya/vrs4_logf.sollya
-            //
-            //     7th Deg -   Error abs: 0x1.04c4ac98p-22   rel: 0x1.2216e6f8p-19
-
-            private const uint V_MIN = 0x00800000;
-            private const uint V_MAX = 0x7F800000;
-            private const uint V_MASK = 0x007FFFFF;
-            private const uint V_OFF = 0x3F2AAAAB;
-
-            private const float C0 = 0.0f;
-            private const float C1 = 1.4426951f;
-            private const float C2 = -0.72134554f;
-            private const float C3 = 0.48089063f;
-            private const float C4 = -0.36084408f;
-            private const float C5 = 0.2888971f;
-            private const float C6 = -0.23594281f;
-            private const float C7 = 0.19948183f;
-            private const float C8 = -0.22616665f;
-            private const float C9 = 0.21228963f;
-
-            public static bool Vectorizable => true;
-
-            public static float Invoke(float x) => float.Log2(x);
-
-            public static Vector128<float> Invoke(Vector128<float> x)
-            {
-                Vector128<float> specialResult = x;
-
-                // x is subnormal or infinity or NaN
-                Vector128<uint> specialMask = Vector128.GreaterThanOrEqual(x.AsUInt32() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN));
-
-                if (specialMask != Vector128<uint>.Zero)
-                {
-                    // float.IsZero(x) ? float.NegativeInfinity : x
-                    Vector128<float> zeroMask = Vector128.Equals(x, Vector128<float>.Zero);
-
-                    specialResult = Vector128.ConditionalSelect(
-                        zeroMask,
-                        Vector128.Create(float.NegativeInfinity),
-                        specialResult
-                    );
-
-                    // (x < 0) ? float.NaN : x
-                    Vector128<float> lessThanZeroMask = Vector128.LessThan(x, Vector128<float>.Zero);
-
-                    specialResult = Vector128.ConditionalSelect(
-                        lessThanZeroMask,
-                        Vector128.Create(float.NaN),
-                        specialResult
-                    );
-
-                    // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x)
-                    Vector128<float> temp = zeroMask
-                                          | lessThanZeroMask
-                                          | ~Vector128.Equals(x, x)
-                                          | Vector128.Equals(x, Vector128.Create(float.PositiveInfinity));
-
-                    // subnormal
-                    Vector128<float> subnormalMask = Vector128.AndNot(specialMask.AsSingle(), temp);
-
-                    x = Vector128.ConditionalSelect(
-                        subnormalMask,
-                        ((x * 8388608.0f).AsUInt32() - Vector128.Create(23u << 23)).AsSingle(),
-                        x
-                    );
-
-                    specialMask = temp.AsUInt32();
-                }
-
-                Vector128<uint> vx = x.AsUInt32() - Vector128.Create(V_OFF);
-                Vector128<float> n = Vector128.ConvertToSingle(Vector128.ShiftRightArithmetic(vx.AsInt32(), 23));
-
-                vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF);
-
-                Vector128<float> r = vx.AsSingle() - Vector128<float>.One;
-
-                Vector128<float> r2 = r * r;
-                Vector128<float> r4 = r2 * r2;
-                Vector128<float> r8 = r4 * r4;
-
-                Vector128<float> poly = (Vector128.Create(C9) * r + Vector128.Create(C8)) * r8
-                                    + (((Vector128.Create(C7) * r + Vector128.Create(C6)) * r2
-                                      + (Vector128.Create(C5) * r + Vector128.Create(C4))) * r4
-                                     + ((Vector128.Create(C3) * r + Vector128.Create(C2)) * r2
-                                      + (Vector128.Create(C1) * r + Vector128.Create(C0))));
-
-                return Vector128.ConditionalSelect(
-                    specialMask.AsSingle(),
-                    specialResult,
-                    n + poly
-                );
-            }
-
-            public static Vector256<float> Invoke(Vector256<float> x)
-            {
-                Vector256<float> specialResult = x;
-
-                // x is subnormal or infinity or NaN
-                Vector256<uint> specialMask = Vector256.GreaterThanOrEqual(x.AsUInt32() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN));
-
-                if (specialMask != Vector256<uint>.Zero)
-                {
-                    // float.IsZero(x) ? float.NegativeInfinity : x
-                    Vector256<float> zeroMask = Vector256.Equals(x, Vector256<float>.Zero);
-
-                    specialResult = Vector256.ConditionalSelect(
-                        zeroMask,
-                        Vector256.Create(float.NegativeInfinity),
-                        specialResult
-                    );
-
-                    // (x < 0) ? float.NaN : x
-                    Vector256<float> lessThanZeroMask = Vector256.LessThan(x, Vector256<float>.Zero);
-
-                    specialResult = Vector256.ConditionalSelect(
-                        lessThanZeroMask,
-                        Vector256.Create(float.NaN),
-                        specialResult
-                    );
-
-                    // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x)
-                    Vector256<float> temp = zeroMask
-                                          | lessThanZeroMask
-                                          | ~Vector256.Equals(x, x)
-                                          | Vector256.Equals(x, Vector256.Create(float.PositiveInfinity));
-
-                    // subnormal
-                    Vector256<float> subnormalMask = Vector256.AndNot(specialMask.AsSingle(), temp);
-
-                    x = Vector256.ConditionalSelect(
-                        subnormalMask,
-                        ((x * 8388608.0f).AsUInt32() - Vector256.Create(23u << 23)).AsSingle(),
-                        x
-                    );
-
-                    specialMask = temp.AsUInt32();
-                }
-
-                Vector256<uint> vx = x.AsUInt32() - Vector256.Create(V_OFF);
-                Vector256<float> n = Vector256.ConvertToSingle(Vector256.ShiftRightArithmetic(vx.AsInt32(), 23));
-
-                vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF);
-
-                Vector256<float> r = vx.AsSingle() - Vector256<float>.One;
-
-                Vector256<float> r2 = r * r;
-                Vector256<float> r4 = r2 * r2;
-                Vector256<float> r8 = r4 * r4;
-
-                Vector256<float> poly = (Vector256.Create(C9) * r + Vector256.Create(C8)) * r8
-                                    + (((Vector256.Create(C7) * r + Vector256.Create(C6)) * r2
-                                      + (Vector256.Create(C5) * r + Vector256.Create(C4))) * r4
-                                     + ((Vector256.Create(C3) * r + Vector256.Create(C2)) * r2
-                                      + (Vector256.Create(C1) * r + Vector256.Create(C0))));
-
-                return Vector256.ConditionalSelect(
-                    specialMask.AsSingle(),
-                    specialResult,
-                    n + poly
-                );
-            }
-
-            public static Vector512<float> Invoke(Vector512<float> x)
-            {
-                Vector512<float> specialResult = x;
-
-                // x is subnormal or infinity or NaN
-                Vector512<uint> specialMask = Vector512.GreaterThanOrEqual(x.AsUInt32() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN));
-
-                if (specialMask != Vector512<uint>.Zero)
-                {
-                    // float.IsZero(x) ? float.NegativeInfinity : x
-                    Vector512<float> zeroMask = Vector512.Equals(x, Vector512<float>.Zero);
-
-                    specialResult = Vector512.ConditionalSelect(
-                        zeroMask,
-                        Vector512.Create(float.NegativeInfinity),
-                        specialResult
-                    );
-
-                    // (x < 0) ? float.NaN : x
-                    Vector512<float> lessThanZeroMask = Vector512.LessThan(x, Vector512<float>.Zero);
-
-                    specialResult = Vector512.ConditionalSelect(
-                        lessThanZeroMask,
-                        Vector512.Create(float.NaN),
-                        specialResult
-                    );
-
-                    // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x)
-                    Vector512<float> temp = zeroMask
-                                          | lessThanZeroMask
-                                          | ~Vector512.Equals(x, x)
-                                          | Vector512.Equals(x, Vector512.Create(float.PositiveInfinity));
-
-                    // subnormal
-                    Vector512<float> subnormalMask = Vector512.AndNot(specialMask.AsSingle(), temp);
-
-                    x = Vector512.ConditionalSelect(
-                        subnormalMask,
-                        ((x * 8388608.0f).AsUInt32() - Vector512.Create(23u << 23)).AsSingle(),
-                        x
-                    );
-
-                    specialMask = temp.AsUInt32();
-                }
-
-                Vector512<uint> vx = x.AsUInt32() - Vector512.Create(V_OFF);
-                Vector512<float> n = Vector512.ConvertToSingle(Vector512.ShiftRightArithmetic(vx.AsInt32(), 23));
-
-                vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF);
-
-                Vector512<float> r = vx.AsSingle() - Vector512<float>.One;
-
-                Vector512<float> r2 = r * r;
-                Vector512<float> r4 = r2 * r2;
-                Vector512<float> r8 = r4 * r4;
-
-                Vector512<float> poly = (Vector512.Create(C9) * r + Vector512.Create(C8)) * r8
-                                    + (((Vector512.Create(C7) * r + Vector512.Create(C6)) * r2
-                                      + (Vector512.Create(C5) * r + Vector512.Create(C4))) * r4
-                                     + ((Vector512.Create(C3) * r + Vector512.Create(C2)) * r2
-                                      + (Vector512.Create(C1) * r + Vector512.Create(C0))));
-
-                return Vector512.ConditionalSelect(
-                    specialMask.AsSingle(),
-                    specialResult,
-                    n + poly
-                );
-            }
-        }
-#endif
-
-        /// <summary>T.Log10(x)</summary>
-        internal readonly struct Log10Operator<T> : IUnaryOperator<T, T>
-            where T : ILogarithmicFunctions<T>
-        {
-            private const double NaturalLog10 = 2.302585092994046;
-            public static bool Vectorizable => LogOperator<T>.Vectorizable;
-            public static T Invoke(T x) => T.Log10(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => LogOperator<T>.Invoke(x) / Vector128.Create(T.CreateTruncating(NaturalLog10));
-            public static Vector256<T> Invoke(Vector256<T> x) => LogOperator<T>.Invoke(x) / Vector256.Create(T.CreateTruncating(NaturalLog10));
-            public static Vector512<T> Invoke(Vector512<T> x) => LogOperator<T>.Invoke(x) / Vector512.Create(T.CreateTruncating(NaturalLog10));
-        }
-
-        /// <summary>T.LogP1(x)</summary>
-        internal readonly struct LogP1Operator<T> : IUnaryOperator<T, T>
-            where T : ILogarithmicFunctions<T>
-        {
-            public static bool Vectorizable => LogOperator<T>.Vectorizable;
-            public static T Invoke(T x) => T.LogP1(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => LogOperator<T>.Invoke(x + Vector128<T>.One);
-            public static Vector256<T> Invoke(Vector256<T> x) => LogOperator<T>.Invoke(x + Vector256<T>.One);
-            public static Vector512<T> Invoke(Vector512<T> x) => LogOperator<T>.Invoke(x + Vector512<T>.One);
-        }
-
-        /// <summary>T.Log2P1(x)</summary>
-        internal readonly struct Log2P1Operator<T> : IUnaryOperator<T, T>
-            where T : ILogarithmicFunctions<T>
-        {
-            public static bool Vectorizable => Log2Operator<T>.Vectorizable;
-            public static T Invoke(T x) => T.Log2P1(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => Log2Operator<T>.Invoke(x + Vector128<T>.One);
-            public static Vector256<T> Invoke(Vector256<T> x) => Log2Operator<T>.Invoke(x + Vector256<T>.One);
-            public static Vector512<T> Invoke(Vector512<T> x) => Log2Operator<T>.Invoke(x + Vector512<T>.One);
-        }
-
-        /// <summary>T.Log10P1(x)</summary>
-        internal readonly struct Log10P1Operator<T> : IUnaryOperator<T, T>
-            where T : ILogarithmicFunctions<T>
-        {
-            public static bool Vectorizable => Log10Operator<T>.Vectorizable;
-            public static T Invoke(T x) => T.Log10P1(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => Log10Operator<T>.Invoke(x + Vector128<T>.One);
-            public static Vector256<T> Invoke(Vector256<T> x) => Log10Operator<T>.Invoke(x + Vector256<T>.One);
-            public static Vector512<T> Invoke(Vector512<T> x) => Log10Operator<T>.Invoke(x + Vector512<T>.One);
-        }
-
-        /// <summary>T.Log(x, y)</summary>
-        internal readonly struct LogBaseOperator<T> : IBinaryOperator<T>
-            where T : ILogarithmicFunctions<T>
-        {
-            public static bool Vectorizable => LogOperator<T>.Vectorizable;
-            public static T Invoke(T x, T y) => T.Log(x, y);
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => LogOperator<T>.Invoke(x) / LogOperator<T>.Invoke(y);
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => LogOperator<T>.Invoke(x) / LogOperator<T>.Invoke(y);
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => LogOperator<T>.Invoke(x) / LogOperator<T>.Invoke(y);
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector128<T> ElementWiseSelect<T>(Vector128<T> mask, Vector128<T> left, Vector128<T> right)
-        {
-            if (Sse41.IsSupported)
-            {
-                if (typeof(T) == typeof(float)) return Sse41.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As<float, T>();
-                if (typeof(T) == typeof(double)) return Sse41.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As<double, T>();
-
-                if (sizeof(T) == 1) return Sse41.BlendVariable(left.AsByte(), right.AsByte(), (~mask).AsByte()).As<byte, T>();
-                if (sizeof(T) == 2) return Sse41.BlendVariable(left.AsUInt16(), right.AsUInt16(), (~mask).AsUInt16()).As<ushort, T>();
-                if (sizeof(T) == 4) return Sse41.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As<uint, T>();
-                if (sizeof(T) == 8) return Sse41.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As<ulong, T>();
-            }
-
-            return Vector128.ConditionalSelect(mask, left, right);
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector256<T> ElementWiseSelect<T>(Vector256<T> mask, Vector256<T> left, Vector256<T> right)
-        {
-            if (Avx2.IsSupported)
-            {
-                if (typeof(T) == typeof(float)) return Avx2.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As<float, T>();
-                if (typeof(T) == typeof(double)) return Avx2.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As<double, T>();
-
-                if (sizeof(T) == 1) return Avx2.BlendVariable(left.AsByte(), right.AsByte(), (~mask).AsByte()).As<byte, T>();
-                if (sizeof(T) == 2) return Avx2.BlendVariable(left.AsUInt16(), right.AsUInt16(), (~mask).AsUInt16()).As<ushort, T>();
-                if (sizeof(T) == 4) return Avx2.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As<uint, T>();
-                if (sizeof(T) == 8) return Avx2.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As<ulong, T>();
-            }
-
-            return Vector256.ConditionalSelect(mask, left, right);
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector512<T> ElementWiseSelect<T>(Vector512<T> mask, Vector512<T> left, Vector512<T> right)
-        {
-            if (Avx512F.IsSupported)
-            {
-                if (typeof(T) == typeof(float)) return Avx512F.BlendVariable(left.AsSingle(), right.AsSingle(), (~mask).AsSingle()).As<float, T>();
-                if (typeof(T) == typeof(double)) return Avx512F.BlendVariable(left.AsDouble(), right.AsDouble(), (~mask).AsDouble()).As<double, T>();
-
-                if (sizeof(T) == 4) return Avx512F.BlendVariable(left.AsUInt32(), right.AsUInt32(), (~mask).AsUInt32()).As<uint, T>();
-                if (sizeof(T) == 8) return Avx512F.BlendVariable(left.AsUInt64(), right.AsUInt64(), (~mask).AsUInt64()).As<ulong, T>();
-            }
-
-            return Vector512.ConditionalSelect(mask, left, right);
-        }
-
-        /// <summary>1 / (1 + T.Exp(-x))</summary>
-        internal readonly struct SigmoidOperator<T> : IUnaryOperator<T, T> where T : IExponentialFunctions<T>
-        {
-            public static bool Vectorizable => ExpOperator<T>.Vectorizable;
-            public static T Invoke(T x) => T.One / (T.One + T.Exp(-x));
-            public static Vector128<T> Invoke(Vector128<T> x) => Vector128.Create(T.One) / (Vector128.Create(T.One) + ExpOperator<T>.Invoke(-x));
-            public static Vector256<T> Invoke(Vector256<T> x) => Vector256.Create(T.One) / (Vector256.Create(T.One) + ExpOperator<T>.Invoke(-x));
-            public static Vector512<T> Invoke(Vector512<T> x) => Vector512.Create(T.One) / (Vector512.Create(T.One) + ExpOperator<T>.Invoke(-x));
-        }
-
-        internal readonly struct CeilingOperator<T> : IUnaryOperator<T, T> where T : IFloatingPoint<T>
-        {
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Ceiling(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return Vector128.Ceiling(x.AsSingle()).As<float, T>();
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector128.Ceiling(x.AsDouble()).As<double, T>();
-                }
-
-                throw new NotSupportedException();
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return Vector256.Ceiling(x.AsSingle()).As<float, T>();
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector256.Ceiling(x.AsDouble()).As<double, T>();
-                }
-
-                throw new NotSupportedException();
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return Vector512.Ceiling(x.AsSingle()).As<float, T>();
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector512.Ceiling(x.AsDouble()).As<double, T>();
-                }
-
-                throw new NotSupportedException();
-            }
-        }
-
-        internal readonly struct FloorOperator<T> : IUnaryOperator<T, T> where T : IFloatingPoint<T>
-        {
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Floor(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return Vector128.Floor(x.AsSingle()).As<float, T>();
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector128.Floor(x.AsDouble()).As<double, T>();
-                }
-
-                throw new NotSupportedException();
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return Vector256.Floor(x.AsSingle()).As<float, T>();
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector256.Floor(x.AsDouble()).As<double, T>();
-                }
-
-                throw new NotSupportedException();
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return Vector512.Floor(x.AsSingle()).As<float, T>();
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector512.Floor(x.AsDouble()).As<double, T>();
-                }
-
-                throw new NotSupportedException();
-            }
-        }
-
-        private readonly struct TruncateOperator<T> : IUnaryOperator<T, T> where T : IFloatingPoint<T>
-        {
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Truncate(x);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    if (Sse41.IsSupported) return Sse41.RoundToZero(x.AsSingle()).As<float, T>();
-                    if (AdvSimd.IsSupported) return AdvSimd.RoundToZero(x.AsSingle()).As<float, T>();
-
-                    return Vector128.ConditionalSelect(Vector128.GreaterThanOrEqual(x, Vector128<T>.Zero),
-                        Vector128.Floor(x.AsSingle()).As<float, T>(),
-                        Vector128.Ceiling(x.AsSingle()).As<float, T>());
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    if (Sse41.IsSupported) return Sse41.RoundToZero(x.AsDouble()).As<double, T>();
-                    if (AdvSimd.Arm64.IsSupported) return AdvSimd.Arm64.RoundToZero(x.AsDouble()).As<double, T>();
-
-                    return Vector128.ConditionalSelect(Vector128.GreaterThanOrEqual(x, Vector128<T>.Zero),
-                        Vector128.Floor(x.AsDouble()).As<double, T>(),
-                        Vector128.Ceiling(x.AsDouble()).As<double, T>());
-                }
-
-                throw new NotSupportedException();
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    if (Avx.IsSupported) return Avx.RoundToZero(x.AsSingle()).As<float, T>();
-
-                    return Vector256.ConditionalSelect(Vector256.GreaterThanOrEqual(x, Vector256<T>.Zero),
-                        Vector256.Floor(x.AsSingle()).As<float, T>(),
-                        Vector256.Ceiling(x.AsSingle()).As<float, T>());
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    if (Avx.IsSupported) return Avx.RoundToZero(x.AsDouble()).As<double, T>();
-
-                    return Vector256.ConditionalSelect(Vector256.GreaterThanOrEqual(x, Vector256<T>.Zero),
-                        Vector256.Floor(x.AsDouble()).As<double, T>(),
-                        Vector256.Ceiling(x.AsDouble()).As<double, T>());
-                }
-
-                throw new NotSupportedException();
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    if (Avx512F.IsSupported) return Avx512F.RoundScale(x.AsSingle(), 0b11).As<float, T>();
-
-                    return Vector512.ConditionalSelect(Vector512.GreaterThanOrEqual(x, Vector512<T>.Zero),
-                        Vector512.Floor(x.AsSingle()).As<float, T>(),
-                        Vector512.Ceiling(x.AsSingle()).As<float, T>());
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    if (Avx512F.IsSupported) return Avx512F.RoundScale(x.AsDouble(), 0b11).As<double, T>();
-
-                    return Vector512.ConditionalSelect(Vector512.GreaterThanOrEqual(x, Vector512<T>.Zero),
-                        Vector512.Floor(x.AsDouble()).As<double, T>(),
-                        Vector512.Ceiling(x.AsDouble()).As<double, T>());
-                }
-
-                throw new NotSupportedException();
-            }
-        }
-
-        /// <summary>T.PopCount(x)</summary>
-        internal readonly struct PopCountOperator<T> : IUnaryOperator<T, T> where T : IBinaryInteger<T>
-        {
-            public static bool Vectorizable => false; // TODO: Vectorize
-            public static T Invoke(T x) => T.PopCount(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.LeadingZeroCount(x)</summary>
-        internal readonly struct LeadingZeroCountOperator<T> : IUnaryOperator<T, T> where T : IBinaryInteger<T>
-        {
-            public static bool Vectorizable => false; // TODO: Vectorize
-            public static T Invoke(T x) => T.LeadingZeroCount(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.TrailingZeroCount(x)</summary>
-        internal readonly struct TrailingZeroCountOperator<T> : IUnaryOperator<T, T> where T : IBinaryInteger<T>
-        {
-            public static bool Vectorizable => false; // TODO: Vectorize
-            public static T Invoke(T x) => T.TrailingZeroCount(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        private readonly struct CopySignOperator<T> : IBinaryOperator<T> where T : INumber<T>
-        {
-            public static bool Vectorizable => true;
-
-            public static T Invoke(T x, T y) => T.CopySign(x, y);
-
-            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return Vector128.ConditionalSelect(Vector128.Create(-0.0f).As<float, T>(), y, x);
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector128.ConditionalSelect(Vector128.Create(-0.0d).As<double, T>(), y, x);
-                }
-
-                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
-                {
-                    Vector128<T> absValue = Vector128.Abs(x);
-                    Vector128<T> sign = Vector128.GreaterThanOrEqual(y, Vector128<T>.Zero);
-                    Vector128<T> error = sign & Vector128.LessThan(absValue, Vector128<T>.Zero);
-                    if (error != Vector128<T>.Zero)
-                    {
-                        Math.Abs(int.MinValue); // throw OverflowException
-                    }
-
-                    return Vector128.ConditionalSelect(sign, absValue, -absValue);
-                }
-
-                return x;
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return Vector256.ConditionalSelect(Vector256.Create(-0.0f).As<float, T>(), y, x);
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector256.ConditionalSelect(Vector256.Create(-0.0d).As<double, T>(), y, x);
-                }
-
-                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
-                {
-                    Vector256<T> absValue = Vector256.Abs(x);
-                    Vector256<T> sign = Vector256.GreaterThanOrEqual(y, Vector256<T>.Zero);
-                    Vector256<T> error = sign & Vector256.LessThan(absValue, Vector256<T>.Zero);
-                    if (error != Vector256<T>.Zero)
-                    {
-                        Math.Abs(int.MinValue); // throw OverflowException
-                    }
-
-                    return Vector256.ConditionalSelect(sign, absValue, -absValue);
-                }
-
-                return x;
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return Vector512.ConditionalSelect(Vector512.Create(-0.0f).As<float, T>(), y, x);
-                }
-
-                if (typeof(T) == typeof(double))
-                {
-                    return Vector512.ConditionalSelect(Vector512.Create(-0.0d).As<double, T>(), y, x);
-                }
-
-                if (typeof(T) == typeof(sbyte) || typeof(T) == typeof(short) || typeof(T) == typeof(int) || typeof(T) == typeof(long) || typeof(T) == typeof(nint))
-                {
-                    Vector512<T> absValue = Vector512.Abs(x);
-                    Vector512<T> sign = Vector512.GreaterThanOrEqual(y, Vector512<T>.Zero);
-                    Vector512<T> error = sign & Vector512.LessThan(absValue, Vector512<T>.Zero);
-                    if (error != Vector512<T>.Zero)
-                    {
-                        Math.Abs(int.MinValue); // throw OverflowException
-                    }
-
-                    return Vector512.ConditionalSelect(sign, absValue, -absValue);
-                }
-
-                return x;
-            }
-        }
-
-        /// <summary>T.DegreesToRadians(x)</summary>
-        internal readonly struct DegreesToRadiansOperator<T> : IUnaryOperator<T, T> where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x) => T.DegreesToRadians(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => (x * T.Pi) / T.CreateChecked(180);
-            public static Vector256<T> Invoke(Vector256<T> x) => (x * T.Pi) / T.CreateChecked(180);
-            public static Vector512<T> Invoke(Vector512<T> x) => (x * T.Pi) / T.CreateChecked(180);
-        }
-
-        /// <summary>T.RadiansToDegrees(x)</summary>
-        internal readonly struct RadiansToDegreesOperator<T> : IUnaryOperator<T, T> where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => true;
-            public static T Invoke(T x) => T.RadiansToDegrees(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => (x * T.CreateChecked(180)) / T.Pi;
-            public static Vector256<T> Invoke(Vector256<T> x) => (x * T.CreateChecked(180)) / T.Pi;
-            public static Vector512<T> Invoke(Vector512<T> x) => (x * T.CreateChecked(180)) / T.Pi;
-        }
-
-        /// <summary>T &lt;&lt; amount</summary>
-        internal readonly struct ShiftLeftOperator<T>(int amount) : IStatefulUnaryOperator<T> where T : IShiftOperators<T, int, T>
-        {
-            private readonly int _amount = amount;
-
-            public static bool Vectorizable => true;
-
-            public T Invoke(T x) => x << _amount;
-            public Vector128<T> Invoke(Vector128<T> x) => x << _amount;
-            public Vector256<T> Invoke(Vector256<T> x) => x << _amount;
-            public Vector512<T> Invoke(Vector512<T> x) => x << _amount;
-        }
-
-        /// <summary>T &gt;&gt; amount</summary>
-        internal readonly struct ShiftRightArithmeticOperator<T>(int amount) : IStatefulUnaryOperator<T> where T : IShiftOperators<T, int, T>
-        {
-            private readonly int _amount = amount;
-
-            public static bool Vectorizable => true;
-
-            public T Invoke(T x) => x >> _amount;
-            public Vector128<T> Invoke(Vector128<T> x) => x >> _amount;
-            public Vector256<T> Invoke(Vector256<T> x) => x >> _amount;
-            public Vector512<T> Invoke(Vector512<T> x) => x >> _amount;
-        }
-
-        /// <summary>T &gt;&gt;&gt; amount</summary>
-        internal readonly struct ShiftRightLogicalOperator<T>(int amount) : IStatefulUnaryOperator<T> where T : IShiftOperators<T, int, T>
-        {
-            private readonly int _amount = amount;
-
-            public static bool Vectorizable => true;
-
-            public T Invoke(T x) => x >>> _amount;
-            public Vector128<T> Invoke(Vector128<T> x) => x >>> _amount;
-            public Vector256<T> Invoke(Vector256<T> x) => x >>> _amount;
-            public Vector512<T> Invoke(Vector512<T> x) => x >>> _amount;
-        }
-
-        /// <summary>T.RotateLeft(amount)</summary>
-        internal readonly struct RotateLeftOperator<T>(int amount) : IStatefulUnaryOperator<T> where T : IBinaryInteger<T>
-        {
-            private readonly int _amount = amount;
-
-            public static bool Vectorizable => true;
-
-            public T Invoke(T x) => T.RotateLeft(x, _amount);
-            public Vector128<T> Invoke(Vector128<T> x) => (x << _amount) | (x >>> ((sizeof(T) * 8) - _amount));
-            public Vector256<T> Invoke(Vector256<T> x) => (x << _amount) | (x >>> ((sizeof(T) * 8) - _amount));
-            public Vector512<T> Invoke(Vector512<T> x) => (x << _amount) | (x >>> ((sizeof(T) * 8) - _amount));
-        }
-
-        /// <summary>T.RotateRight(amount)</summary>
-        internal readonly struct RotateRightOperator<T>(int amount) : IStatefulUnaryOperator<T> where T : IBinaryInteger<T>
-        {
-            private readonly int _amount = amount;
-
-            public static bool Vectorizable => true;
-
-            public T Invoke(T x) => T.RotateRight(x, _amount);
-            public Vector128<T> Invoke(Vector128<T> x) => (x >>> _amount) | (x << ((sizeof(T) * 8) - _amount));
-            public Vector256<T> Invoke(Vector256<T> x) => (x >>> _amount) | (x << ((sizeof(T) * 8) - _amount));
-            public Vector512<T> Invoke(Vector512<T> x) => (x >>> _amount) | (x << ((sizeof(T) * 8) - _amount));
-        }
-
-        /// <summary>T.ScaleB(x, n)</summary>
-        internal readonly struct ScaleBOperator<T>(int n) : IStatefulUnaryOperator<T> where T : IFloatingPointIeee754<T>
-        {
-            private readonly int _n = n;
-            private readonly T _pow2n = typeof(T) == typeof(float) || typeof(T) == typeof(double) ? T.Pow(T.CreateTruncating(2), T.CreateTruncating(n)) : default!;
-
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public T Invoke(T x) => T.ScaleB(x, _n);
-            public Vector128<T> Invoke(Vector128<T> x) => x * Vector128.Create(_pow2n);
-            public Vector256<T> Invoke(Vector256<T> x) => x * Vector256.Create(_pow2n);
-            public Vector512<T> Invoke(Vector512<T> x) => x * Vector512.Create(_pow2n);
-        }
-
-        /// <summary>T.RootN(x, n)</summary>
-        internal readonly struct RootNOperator<T>(int n) : IStatefulUnaryOperator<T> where T : IRootFunctions<T>
-        {
-            private readonly int _n = n;
-
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public T Invoke(T x) => T.RootN(x, _n);
-
-            public Vector128<T> Invoke(Vector128<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return ExpOperator<float>.Invoke(LogOperator<float>.Invoke(x.AsSingle()) / Vector128.Create((float)_n)).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return ExpOperator<double>.Invoke(LogOperator<double>.Invoke(x.AsDouble()) / Vector128.Create((double)_n)).As<double, T>();
-                }
-            }
-
-            public Vector256<T> Invoke(Vector256<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return ExpOperator<float>.Invoke(LogOperator<float>.Invoke(x.AsSingle()) / Vector256.Create((float)_n)).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return ExpOperator<double>.Invoke(LogOperator<double>.Invoke(x.AsDouble()) / Vector256.Create((double)_n)).As<double, T>();
-                }
-            }
-
-            public Vector512<T> Invoke(Vector512<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return ExpOperator<float>.Invoke(LogOperator<float>.Invoke(x.AsSingle()) / Vector512.Create((float)_n)).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return ExpOperator<double>.Invoke(LogOperator<double>.Invoke(x.AsDouble()) / Vector512.Create((double)_n)).As<double, T>();
-                }
-            }
-        }
-
-        /// <summary>T.Round(x)</summary>
-        internal readonly struct RoundToEvenOperator<T> : IUnaryOperator<T, T> where T : IFloatingPoint<T>
-        {
-            // This code is based on `nearbyint` from amd/aocl-libm-ose
-            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
-            //
-            // Licensed under the BSD 3-Clause "New" or "Revised" License
-            // See THIRD-PARTY-NOTICES.TXT for the full license text
-
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Round(x);
-
-            private const float SingleBoundary = 8388608.0f; // 2^23
-            private const double DoubleBoundary = 4503599627370496.0; // 2^52
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                Vector128<T> boundary = Vector128.Create(typeof(T) == typeof(float) ? T.CreateTruncating(SingleBoundary) : T.CreateTruncating(DoubleBoundary));
-                Vector128<T> temp = CopySignOperator<T>.Invoke(boundary, x);
-                return Vector128.ConditionalSelect(Vector128.GreaterThan(Vector128.Abs(x), boundary), x, CopySignOperator<T>.Invoke((x + temp) - temp, x));
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                Vector256<T> boundary = Vector256.Create(typeof(T) == typeof(float) ? T.CreateTruncating(SingleBoundary) : T.CreateTruncating(DoubleBoundary));
-                Vector256<T> temp = CopySignOperator<T>.Invoke(boundary, x);
-                return Vector256.ConditionalSelect(Vector256.GreaterThan(Vector256.Abs(x), boundary), x, CopySignOperator<T>.Invoke((x + temp) - temp, x));
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                Vector512<T> boundary = Vector512.Create(typeof(T) == typeof(float) ? T.CreateTruncating(SingleBoundary) : T.CreateTruncating(DoubleBoundary));
-                Vector512<T> temp = CopySignOperator<T>.Invoke(boundary, x);
-                return Vector512.ConditionalSelect(Vector512.GreaterThan(Vector512.Abs(x), boundary), x, CopySignOperator<T>.Invoke((x + temp) - temp, x));
-            }
-        }
-
-        /// <summary>T.Round(x, MidpointRounding.AwayFromZero)</summary>
-        internal readonly struct RoundAwayFromZeroOperator<T> : IUnaryOperator<T, T> where T : IFloatingPoint<T>
-        {
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
-
-            public static T Invoke(T x) => T.Round(x, MidpointRounding.AwayFromZero);
-
-            public static Vector128<T> Invoke(Vector128<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    if (AdvSimd.IsSupported)
-                    {
-                        return AdvSimd.RoundAwayFromZero(x.AsSingle()).As<float, T>();
-                    }
-
-                    return TruncateOperator<float>.Invoke(x.AsSingle() + CopySignOperator<float>.Invoke(Vector128.Create(0.49999997f), x.AsSingle())).As<float, T>();
-                }
-                else
-                {
-                    if (AdvSimd.Arm64.IsSupported)
-                    {
-                        return AdvSimd.Arm64.RoundAwayFromZero(x.AsDouble()).As<double, T>();
-                    }
-
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return TruncateOperator<double>.Invoke(x.AsDouble() + CopySignOperator<double>.Invoke(Vector128.Create(0.49999999999999994), x.AsDouble())).As<double, T>();
-                }
-            }
-
-            public static Vector256<T> Invoke(Vector256<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return TruncateOperator<float>.Invoke(x.AsSingle() + CopySignOperator<float>.Invoke(Vector256.Create(0.49999997f), x.AsSingle())).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return TruncateOperator<double>.Invoke(x.AsDouble() + CopySignOperator<double>.Invoke(Vector256.Create(0.49999999999999994), x.AsDouble())).As<double, T>();
-                }
-            }
-
-            public static Vector512<T> Invoke(Vector512<T> x)
-            {
-                if (typeof(T) == typeof(float))
-                {
-                    return TruncateOperator<float>.Invoke(x.AsSingle() + CopySignOperator<float>.Invoke(Vector512.Create(0.49999997f), x.AsSingle())).As<float, T>();
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return TruncateOperator<double>.Invoke(x.AsDouble() + CopySignOperator<double>.Invoke(Vector512.Create(0.49999999999999994), x.AsDouble())).As<double, T>();
-                }
-            }
-        }
-
-        /// <summary>(T.Round(x * power10, digits, mode)) / power10</summary>
-        internal readonly struct MultiplyRoundDivideOperator<T, TDelegatedRound> : IStatefulUnaryOperator<T>
-            where T : IFloatingPoint<T>
-            where TDelegatedRound : IUnaryOperator<T, T>
-        {
-            private readonly T _factor;
-
-            public MultiplyRoundDivideOperator(T factor)
-            {
-                Debug.Assert(typeof(T) == typeof(float) || typeof(T) == typeof(double));
-                _factor = factor;
-            }
-
-            public static bool Vectorizable => true;
-
-            private const float Single_RoundLimit = 1e8f;
-            private const double Double_RoundLimit = 1e16d;
-
-            public T Invoke(T x)
-            {
-                T limit = typeof(T) == typeof(float) ? T.CreateTruncating(Single_RoundLimit) : T.CreateTruncating(Double_RoundLimit);
-                return T.Abs(x) < limit ?
-                    TDelegatedRound.Invoke(x * _factor) / _factor :
-                    x;
-            }
-
-            public Vector128<T> Invoke(Vector128<T> x)
-            {
-                Vector128<T> limit = Vector128.Create(typeof(T) == typeof(float) ? T.CreateTruncating(Single_RoundLimit) : T.CreateTruncating(Double_RoundLimit));
-                return Vector128.ConditionalSelect(Vector128.LessThan(Vector128.Abs(x), limit),
-                    TDelegatedRound.Invoke(x * _factor) / _factor,
-                    x);
-            }
-
-            public Vector256<T> Invoke(Vector256<T> x)
-            {
-                Vector256<T> limit = Vector256.Create(typeof(T) == typeof(float) ? T.CreateTruncating(Single_RoundLimit) : T.CreateTruncating(Double_RoundLimit));
-                return Vector256.ConditionalSelect(Vector256.LessThan(Vector256.Abs(x), limit),
-                    TDelegatedRound.Invoke(x * _factor) / _factor,
-                    x);
-            }
-
-            public Vector512<T> Invoke(Vector512<T> x)
-            {
-                Vector512<T> limit = Vector512.Create(typeof(T) == typeof(float) ? T.CreateTruncating(Single_RoundLimit) : T.CreateTruncating(Double_RoundLimit));
-                return Vector512.ConditionalSelect(Vector512.LessThan(Vector512.Abs(x), limit),
-                    TDelegatedRound.Invoke(x * _factor) / _factor,
-                    x);
-            }
-        }
-
-        /// <summary>T.Round(x, digits, mode)</summary>
-        internal readonly struct RoundFallbackOperator<T>(int digits, MidpointRounding mode) : IStatefulUnaryOperator<T>
-            where T : IFloatingPoint<T>
-        {
-            private readonly int _digits = digits;
-            private readonly MidpointRounding _mode = mode;
-
-            public static bool Vectorizable => false;
-
-            public T Invoke(T x) => T.Round(x, _digits, _mode);
-
-            public Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.ILogB(x)</summary>
-        internal readonly struct ILogBOperator<T> : IUnaryOperator<T, int> where T : IFloatingPointIeee754<T>
-        {
-            public static bool Vectorizable => false; // TODO: vectorize for float
-
-            public static int Invoke(T x) => T.ILogB(x);
-            public static Vector128<int> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<int> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<int> Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>double.ILogB(x)</summary>
-        internal readonly struct ILogBDoubleOperator : IUnaryTwoToOneOperator<double, int>
-        {
-            public static bool Vectorizable => false; // TODO: vectorize
-
-            public static int Invoke(double x) => double.ILogB(x);
-            public static Vector128<int> Invoke(Vector128<double> lower, Vector128<double> upper) => throw new NotSupportedException();
-            public static Vector256<int> Invoke(Vector256<double> lower, Vector256<double> upper) => throw new NotSupportedException();
-            public static Vector512<int> Invoke(Vector512<double> lower, Vector512<double> upper) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.CreateChecked(x)</summary>
-        internal readonly struct ConvertCheckedFallbackOperator<TFrom, TTo> : IUnaryOperator<TFrom, TTo> where TFrom : INumberBase<TFrom> where TTo : INumberBase<TTo>
-        {
-            public static bool Vectorizable => false;
-
-            public static TTo Invoke(TFrom x) => TTo.CreateChecked(x);
-            public static Vector128<TTo> Invoke(Vector128<TFrom> x) => throw new NotSupportedException();
-            public static Vector256<TTo> Invoke(Vector256<TFrom> x) => throw new NotSupportedException();
-            public static Vector512<TTo> Invoke(Vector512<TFrom> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.CreateSaturating(x)</summary>
-        internal readonly struct ConvertSaturatingFallbackOperator<TFrom, TTo> : IUnaryOperator<TFrom, TTo> where TFrom : INumberBase<TFrom> where TTo : INumberBase<TTo>
-        {
-            public static bool Vectorizable => false;
-
-            public static TTo Invoke(TFrom x) => TTo.CreateSaturating(x);
-            public static Vector128<TTo> Invoke(Vector128<TFrom> x) => throw new NotSupportedException();
-            public static Vector256<TTo> Invoke(Vector256<TFrom> x) => throw new NotSupportedException();
-            public static Vector512<TTo> Invoke(Vector512<TFrom> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.CreateTruncating(x)</summary>
-        internal readonly struct ConvertTruncatingFallbackOperator<TFrom, TTo> : IUnaryOperator<TFrom, TTo> where TFrom : INumberBase<TFrom> where TTo : INumberBase<TTo>
-        {
-            public static bool Vectorizable => false;
-
-            public static TTo Invoke(TFrom x) => TTo.CreateTruncating(x);
-            public static Vector128<TTo> Invoke(Vector128<TFrom> x) => throw new NotSupportedException();
-            public static Vector256<TTo> Invoke(Vector256<TFrom> x) => throw new NotSupportedException();
-            public static Vector512<TTo> Invoke(Vector512<TFrom> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>(uint)float</summary>
-        internal readonly struct ConvertUInt32ToSingle : IUnaryOperator<uint, float>
-        {
-            public static bool Vectorizable => true;
-
-            public static float Invoke(uint x) => x;
-            public static Vector128<float> Invoke(Vector128<uint> x) => Vector128.ConvertToSingle(x);
-            public static Vector256<float> Invoke(Vector256<uint> x) => Vector256.ConvertToSingle(x);
-            public static Vector512<float> Invoke(Vector512<uint> x) => Vector512.ConvertToSingle(x);
-        }
-
-        /// <summary>(int)float</summary>
-        internal readonly struct ConvertInt32ToSingle : IUnaryOperator<int, float>
-        {
-            public static bool Vectorizable => true;
-
-            public static float Invoke(int x) => x;
-            public static Vector128<float> Invoke(Vector128<int> x) => Vector128.ConvertToSingle(x);
-            public static Vector256<float> Invoke(Vector256<int> x) => Vector256.ConvertToSingle(x);
-            public static Vector512<float> Invoke(Vector512<int> x) => Vector512.ConvertToSingle(x);
-        }
-
-        /// <summary>(float)uint</summary>
-        internal readonly struct ConvertSingleToUInt32 : IUnaryOperator<float, uint>
-        {
-            public static bool Vectorizable => false; // TODO https://github.com/dotnet/runtime/pull/97529: make this true once vectorized behavior matches scalar
-
-            public static uint Invoke(float x) => uint.CreateTruncating(x);
-            public static Vector128<uint> Invoke(Vector128<float> x) => Vector128.ConvertToUInt32(x);
-            public static Vector256<uint> Invoke(Vector256<float> x) => Vector256.ConvertToUInt32(x);
-            public static Vector512<uint> Invoke(Vector512<float> x) => Vector512.ConvertToUInt32(x);
-        }
-
-        /// <summary>(float)int</summary>
-        internal readonly struct ConvertSingleToInt32 : IUnaryOperator<float, int>
-        {
-            public static bool Vectorizable => false; // TODO https://github.com/dotnet/runtime/pull/97529: make this true once vectorized behavior matches scalar
-
-            public static int Invoke(float x) => int.CreateTruncating(x);
-            public static Vector128<int> Invoke(Vector128<float> x) => Vector128.ConvertToInt32(x);
-            public static Vector256<int> Invoke(Vector256<float> x) => Vector256.ConvertToInt32(x);
-            public static Vector512<int> Invoke(Vector512<float> x) => Vector512.ConvertToInt32(x);
-        }
-
-        /// <summary>(double)ulong</summary>
-        internal readonly struct ConvertUInt64ToDouble : IUnaryOperator<ulong, double>
-        {
-            public static bool Vectorizable => true;
-
-            public static double Invoke(ulong x) => x;
-            public static Vector128<double> Invoke(Vector128<ulong> x) => Vector128.ConvertToDouble(x);
-            public static Vector256<double> Invoke(Vector256<ulong> x) => Vector256.ConvertToDouble(x);
-            public static Vector512<double> Invoke(Vector512<ulong> x) => Vector512.ConvertToDouble(x);
-        }
-
-        /// <summary>(double)long</summary>
-        internal readonly struct ConvertInt64ToDouble : IUnaryOperator<long, double>
-        {
-            public static bool Vectorizable => true;
-
-            public static double Invoke(long x) => x;
-            public static Vector128<double> Invoke(Vector128<long> x) => Vector128.ConvertToDouble(x);
-            public static Vector256<double> Invoke(Vector256<long> x) => Vector256.ConvertToDouble(x);
-            public static Vector512<double> Invoke(Vector512<long> x) => Vector512.ConvertToDouble(x);
-        }
-
-        /// <summary>(ulong)double</summary>
-        internal readonly struct ConvertDoubleToUInt64 : IUnaryOperator<double, ulong>
-        {
-            public static bool Vectorizable => false; // TODO https://github.com/dotnet/runtime/pull/97529: make this true once vectorized behavior matches scalar
-
-            public static ulong Invoke(double x) => ulong.CreateTruncating(x);
-            public static Vector128<ulong> Invoke(Vector128<double> x) => Vector128.ConvertToUInt64(x);
-            public static Vector256<ulong> Invoke(Vector256<double> x) => Vector256.ConvertToUInt64(x);
-            public static Vector512<ulong> Invoke(Vector512<double> x) => Vector512.ConvertToUInt64(x);
-        }
-
-        /// <summary>(long)double</summary>
-        internal readonly struct ConvertDoubleToInt64 : IUnaryOperator<double, long>
-        {
-            public static bool Vectorizable => false; // TODO https://github.com/dotnet/runtime/pull/97529: make this true once vectorized behavior matches scalar
-
-            public static long Invoke(double x) => long.CreateTruncating(x);
-            public static Vector128<long> Invoke(Vector128<double> x) => Vector128.ConvertToInt64(x);
-            public static Vector256<long> Invoke(Vector256<double> x) => Vector256.ConvertToInt64(x);
-            public static Vector512<long> Invoke(Vector512<double> x) => Vector512.ConvertToInt64(x);
-        }
-
-        /// <summary>(double)float</summary>
-        internal readonly struct WidenSingleToDoubleOperator : IUnaryOneToTwoOperator<float, double>
-        {
-            public static bool Vectorizable => true;
-
-            public static double Invoke(float x) => x;
-            public static (Vector128<double> Lower, Vector128<double> Upper) Invoke(Vector128<float> x) => Vector128.Widen(x);
-            public static (Vector256<double> Lower, Vector256<double> Upper) Invoke(Vector256<float> x) => Vector256.Widen(x);
-            public static (Vector512<double> Lower, Vector512<double> Upper) Invoke(Vector512<float> x) => Vector512.Widen(x);
-        }
-
-        /// <summary>(float)double</summary>
-        internal readonly struct NarrowDoubleToSingleOperator : IUnaryTwoToOneOperator<double, float>
-        {
-            public static bool Vectorizable => true;
-
-            public static float Invoke(double x) => (float)x;
-            public static Vector128<float> Invoke(Vector128<double> lower, Vector128<double> upper) => Vector128.Narrow(lower, upper);
-            public static Vector256<float> Invoke(Vector256<double> lower, Vector256<double> upper) => Vector256.Narrow(lower, upper);
-            public static Vector512<float> Invoke(Vector512<double> lower, Vector512<double> upper) => Vector512.Narrow(lower, upper);
-        }
-
-        /// <summary>(ushort)byte</summary>
-        internal readonly struct WidenByteToUInt16Operator : IUnaryOneToTwoOperator<byte, ushort>
-        {
-            public static bool Vectorizable => true;
-
-            public static ushort Invoke(byte x) => x;
-            public static (Vector128<ushort> Lower, Vector128<ushort> Upper) Invoke(Vector128<byte> x) => Vector128.Widen(x);
-            public static (Vector256<ushort> Lower, Vector256<ushort> Upper) Invoke(Vector256<byte> x) => Vector256.Widen(x);
-            public static (Vector512<ushort> Lower, Vector512<ushort> Upper) Invoke(Vector512<byte> x) => Vector512.Widen(x);
-        }
-
-        /// <summary>(byte)ushort</summary>
-        internal readonly struct NarrowUInt16ToByteOperator : IUnaryTwoToOneOperator<ushort, byte>
-        {
-            public static bool Vectorizable => true;
-
-            public static byte Invoke(ushort x) => (byte)x;
-            public static Vector128<byte> Invoke(Vector128<ushort> lower, Vector128<ushort> upper) => Vector128.Narrow(lower, upper);
-            public static Vector256<byte> Invoke(Vector256<ushort> lower, Vector256<ushort> upper) => Vector256.Narrow(lower, upper);
-            public static Vector512<byte> Invoke(Vector512<ushort> lower, Vector512<ushort> upper) => Vector512.Narrow(lower, upper);
-        }
-
-        /// <summary>(short)sbyte</summary>
-        internal readonly struct WidenSByteToInt16Operator : IUnaryOneToTwoOperator<sbyte, short>
-        {
-            public static bool Vectorizable => true;
-
-            public static short Invoke(sbyte x) => x;
-            public static (Vector128<short> Lower, Vector128<short> Upper) Invoke(Vector128<sbyte> x) => Vector128.Widen(x);
-            public static (Vector256<short> Lower, Vector256<short> Upper) Invoke(Vector256<sbyte> x) => Vector256.Widen(x);
-            public static (Vector512<short> Lower, Vector512<short> Upper) Invoke(Vector512<sbyte> x) => Vector512.Widen(x);
-        }
-
-        /// <summary>(sbyte)short</summary>
-        internal readonly struct NarrowInt16ToSByteOperator : IUnaryTwoToOneOperator<short, sbyte>
-        {
-            public static bool Vectorizable => true;
-
-            public static sbyte Invoke(short x) => (sbyte)x;
-            public static Vector128<sbyte> Invoke(Vector128<short> lower, Vector128<short> upper) => Vector128.Narrow(lower, upper);
-            public static Vector256<sbyte> Invoke(Vector256<short> lower, Vector256<short> upper) => Vector256.Narrow(lower, upper);
-            public static Vector512<sbyte> Invoke(Vector512<short> lower, Vector512<short> upper) => Vector512.Narrow(lower, upper);
-        }
-
-        /// <summary>(uint)ushort</summary>
-        internal readonly struct WidenUInt16ToUInt32Operator : IUnaryOneToTwoOperator<ushort, uint>
-        {
-            public static bool Vectorizable => true;
-
-            public static uint Invoke(ushort x) => x;
-            public static (Vector128<uint> Lower, Vector128<uint> Upper) Invoke(Vector128<ushort> x) => Vector128.Widen(x);
-            public static (Vector256<uint> Lower, Vector256<uint> Upper) Invoke(Vector256<ushort> x) => Vector256.Widen(x);
-            public static (Vector512<uint> Lower, Vector512<uint> Upper) Invoke(Vector512<ushort> x) => Vector512.Widen(x);
-        }
-
-        /// <summary>(ushort)uint</summary>
-        internal readonly struct NarrowUInt32ToUInt16Operator : IUnaryTwoToOneOperator<uint, ushort>
-        {
-            public static bool Vectorizable => true;
-
-            public static ushort Invoke(uint x) => (ushort)x;
-            public static Vector128<ushort> Invoke(Vector128<uint> lower, Vector128<uint> upper) => Vector128.Narrow(lower, upper);
-            public static Vector256<ushort> Invoke(Vector256<uint> lower, Vector256<uint> upper) => Vector256.Narrow(lower, upper);
-            public static Vector512<ushort> Invoke(Vector512<uint> lower, Vector512<uint> upper) => Vector512.Narrow(lower, upper);
-        }
-
-        /// <summary>(int)short</summary>
-        internal readonly struct WidenInt16ToInt32Operator : IUnaryOneToTwoOperator<short, int>
-        {
-            public static bool Vectorizable => true;
-
-            public static int Invoke(short x) => x;
-            public static (Vector128<int> Lower, Vector128<int> Upper) Invoke(Vector128<short> x) => Vector128.Widen(x);
-            public static (Vector256<int> Lower, Vector256<int> Upper) Invoke(Vector256<short> x) => Vector256.Widen(x);
-            public static (Vector512<int> Lower, Vector512<int> Upper) Invoke(Vector512<short> x) => Vector512.Widen(x);
-        }
-
-        /// <summary>(short)int</summary>
-        internal readonly struct NarrowInt32ToInt16Operator : IUnaryTwoToOneOperator<int, short>
-        {
-            public static bool Vectorizable => true;
-
-            public static short Invoke(int x) => (short)x;
-            public static Vector128<short> Invoke(Vector128<int> lower, Vector128<int> upper) => Vector128.Narrow(lower, upper);
-            public static Vector256<short> Invoke(Vector256<int> lower, Vector256<int> upper) => Vector256.Narrow(lower, upper);
-            public static Vector512<short> Invoke(Vector512<int> lower, Vector512<int> upper) => Vector512.Narrow(lower, upper);
-        }
-
-        /// <summary>(ulong)uint</summary>
-        internal readonly struct WidenUInt32ToUInt64Operator : IUnaryOneToTwoOperator<uint, ulong>
-        {
-            public static bool Vectorizable => true;
-
-            public static ulong Invoke(uint x) => x;
-            public static (Vector128<ulong> Lower, Vector128<ulong> Upper) Invoke(Vector128<uint> x) => Vector128.Widen(x);
-            public static (Vector256<ulong> Lower, Vector256<ulong> Upper) Invoke(Vector256<uint> x) => Vector256.Widen(x);
-            public static (Vector512<ulong> Lower, Vector512<ulong> Upper) Invoke(Vector512<uint> x) => Vector512.Widen(x);
-        }
-
-        /// <summary>(uint)ulong</summary>
-        internal readonly struct NarrowUInt64ToUInt32Operator : IUnaryTwoToOneOperator<ulong, uint>
-        {
-            public static bool Vectorizable => true;
-
-            public static uint Invoke(ulong x) => (uint)x;
-            public static Vector128<uint> Invoke(Vector128<ulong> lower, Vector128<ulong> upper) => Vector128.Narrow(lower, upper);
-            public static Vector256<uint> Invoke(Vector256<ulong> lower, Vector256<ulong> upper) => Vector256.Narrow(lower, upper);
-            public static Vector512<uint> Invoke(Vector512<ulong> lower, Vector512<ulong> upper) => Vector512.Narrow(lower, upper);
-        }
-
-        /// <summary>(long)int</summary>
-        internal readonly struct WidenInt32ToInt64Operator : IUnaryOneToTwoOperator<int, long>
-        {
-            public static bool Vectorizable => true;
-
-            public static long Invoke(int x) => x;
-            public static (Vector128<long> Lower, Vector128<long> Upper) Invoke(Vector128<int> x) => Vector128.Widen(x);
-            public static (Vector256<long> Lower, Vector256<long> Upper) Invoke(Vector256<int> x) => Vector256.Widen(x);
-            public static (Vector512<long> Lower, Vector512<long> Upper) Invoke(Vector512<int> x) => Vector512.Widen(x);
-        }
-
-        /// <summary>(int)long</summary>
-        internal readonly struct NarrowInt64ToInt32Operator : IUnaryTwoToOneOperator<long, int>
-        {
-            public static bool Vectorizable => true;
-
-            public static int Invoke(long x) => (int)x;
-            public static Vector128<int> Invoke(Vector128<long> lower, Vector128<long> upper) => Vector128.Narrow(lower, upper);
-            public static Vector256<int> Invoke(Vector256<long> lower, Vector256<long> upper) => Vector256.Narrow(lower, upper);
-            public static Vector512<int> Invoke(Vector512<long> lower, Vector512<long> upper) => Vector512.Narrow(lower, upper);
-        }
-
-        internal readonly struct WidenHalfAsInt16ToSingleOperator : IUnaryOneToTwoOperator<short, float>
-        {
-            // This implements a vectorized version of the `explicit operator float(Half value) operator`.
-            // See detailed description of the algorithm used here:
-            //     https://github.com/dotnet/runtime/blob/3bf40a378f00cb5bf18ff62796bc7097719b974c/src/libraries/System.Private.CoreLib/src/System/Half.cs#L1010-L1040
-            // The cast operator converts a Half represented as uint to a float. This does the same, with an input VectorXx<uint> and an output VectorXx<float>.
-            // The VectorXx<uint> is created by reading a vector of Halfs as a VectorXx<short> then widened to two VectorXx<int>s and cast to VectorXx<uint>s.
-            // We loop handling one input vector at a time, producing two output float vectors.
-
-            private const uint ExponentLowerBound = 0x3880_0000u; // The smallest positive normal number in Half, converted to Single
-            private const uint ExponentOffset = 0x3800_0000u; // BitConverter.SingleToUInt32Bits(1.0f) - ((uint)BitConverter.HalfToUInt16Bits((Half)1.0f) << 13)
-            private const uint SingleSignMask = 0x8000_0000; // float.SignMask; // Mask for sign bit in Single
-            private const uint HalfExponentMask = 0x7C00; // Mask for exponent bits in Half
-            private const uint HalfToSingleBitsMask = 0x0FFF_E000; // Mask for bits in Single converted from Half
-
-            public static bool Vectorizable => true;
-
-            public static float Invoke(short x) => (float)Unsafe.BitCast<short, Half>(x);
-
-            public static (Vector128<float> Lower, Vector128<float> Upper) Invoke(Vector128<short> x)
-            {
-                (Vector128<int> lowerInt32, Vector128<int> upperInt32) = Vector128.Widen(x);
-                return
-                    (HalfAsWidenedUInt32ToSingle(lowerInt32.AsUInt32()),
-                     HalfAsWidenedUInt32ToSingle(upperInt32.AsUInt32()));
-
-                static Vector128<float> HalfAsWidenedUInt32ToSingle(Vector128<uint> value)
-                {
-                    // Extract sign bit of value
-                    Vector128<uint> sign = value & Vector128.Create(SingleSignMask);
-
-                    // Copy sign bit to upper bits
-                    Vector128<uint> bitValueInProcess = value;
-
-                    // Extract exponent bits of value (BiasedExponent is not for here as it performs unnecessary shift)
-                    Vector128<uint> offsetExponent = bitValueInProcess & Vector128.Create(HalfExponentMask);
-
-                    // ~0u when value is subnormal, 0 otherwise
-                    Vector128<uint> subnormalMask = Vector128.Equals(offsetExponent, Vector128<uint>.Zero);
-
-                    // ~0u when value is either Infinity or NaN, 0 otherwise
-                    Vector128<uint> infinityOrNaNMask = Vector128.Equals(offsetExponent, Vector128.Create(HalfExponentMask));
-
-                    // 0x3880_0000u if value is subnormal, 0 otherwise
-                    Vector128<uint> maskedExponentLowerBound = subnormalMask & Vector128.Create(ExponentLowerBound);
-
-                    // 0x3880_0000u if value is subnormal, 0x3800_0000u otherwise
-                    Vector128<uint> offsetMaskedExponentLowerBound = Vector128.Create(ExponentOffset) | maskedExponentLowerBound;
-
-                    // Match the position of the boundary of exponent bits and fraction bits with IEEE 754 Binary32(Single)
-                    bitValueInProcess = Vector128.ShiftLeft(bitValueInProcess, 13);
-
-                    // Double the offsetMaskedExponentLowerBound if value is either Infinity or NaN
-                    offsetMaskedExponentLowerBound = Vector128.ConditionalSelect(Vector128.Equals(infinityOrNaNMask, Vector128<uint>.Zero),
-                        offsetMaskedExponentLowerBound,
-                        Vector128.ShiftLeft(offsetMaskedExponentLowerBound, 1));
-
-                    // Extract exponent bits and fraction bits of value
-                    bitValueInProcess &= Vector128.Create(HalfToSingleBitsMask);
-
-                    // Adjust exponent to match the range of exponent
-                    bitValueInProcess += offsetMaskedExponentLowerBound;
-
-                    // If value is subnormal, remove unnecessary 1 on top of fraction bits.
-                    Vector128<uint> absoluteValue = (bitValueInProcess.AsSingle() - maskedExponentLowerBound.AsSingle()).AsUInt32();
-
-                    // Merge sign bit with rest
-                    return (absoluteValue | sign).AsSingle();
-                }
-            }
-
-            public static (Vector256<float> Lower, Vector256<float> Upper) Invoke(Vector256<short> x)
-            {
-                (Vector256<int> lowerInt32, Vector256<int> upperInt32) = Vector256.Widen(x);
-                return
-                    (HalfAsWidenedUInt32ToSingle(lowerInt32.AsUInt32()),
-                     HalfAsWidenedUInt32ToSingle(upperInt32.AsUInt32()));
-
-                static Vector256<float> HalfAsWidenedUInt32ToSingle(Vector256<uint> value)
-                {
-                    // Extract sign bit of value
-                    Vector256<uint> sign = value & Vector256.Create(SingleSignMask);
-
-                    // Copy sign bit to upper bits
-                    Vector256<uint> bitValueInProcess = value;
-
-                    // Extract exponent bits of value (BiasedExponent is not for here as it performs unnecessary shift)
-                    Vector256<uint> offsetExponent = bitValueInProcess & Vector256.Create(HalfExponentMask);
-
-                    // ~0u when value is subnormal, 0 otherwise
-                    Vector256<uint> subnormalMask = Vector256.Equals(offsetExponent, Vector256<uint>.Zero);
-
-                    // ~0u when value is either Infinity or NaN, 0 otherwise
-                    Vector256<uint> infinityOrNaNMask = Vector256.Equals(offsetExponent, Vector256.Create(HalfExponentMask));
-
-                    // 0x3880_0000u if value is subnormal, 0 otherwise
-                    Vector256<uint> maskedExponentLowerBound = subnormalMask & Vector256.Create(ExponentLowerBound);
-
-                    // 0x3880_0000u if value is subnormal, 0x3800_0000u otherwise
-                    Vector256<uint> offsetMaskedExponentLowerBound = Vector256.Create(ExponentOffset) | maskedExponentLowerBound;
-
-                    // Match the position of the boundary of exponent bits and fraction bits with IEEE 754 Binary32(Single)
-                    bitValueInProcess = Vector256.ShiftLeft(bitValueInProcess, 13);
-
-                    // Double the offsetMaskedExponentLowerBound if value is either Infinity or NaN
-                    offsetMaskedExponentLowerBound = Vector256.ConditionalSelect(Vector256.Equals(infinityOrNaNMask, Vector256<uint>.Zero),
-                        offsetMaskedExponentLowerBound,
-                        Vector256.ShiftLeft(offsetMaskedExponentLowerBound, 1));
-
-                    // Extract exponent bits and fraction bits of value
-                    bitValueInProcess &= Vector256.Create(HalfToSingleBitsMask);
-
-                    // Adjust exponent to match the range of exponent
-                    bitValueInProcess += offsetMaskedExponentLowerBound;
-
-                    // If value is subnormal, remove unnecessary 1 on top of fraction bits.
-                    Vector256<uint> absoluteValue = (bitValueInProcess.AsSingle() - maskedExponentLowerBound.AsSingle()).AsUInt32();
-
-                    // Merge sign bit with rest
-                    return (absoluteValue | sign).AsSingle();
-                }
-            }
-
-            public static (Vector512<float> Lower, Vector512<float> Upper) Invoke(Vector512<short> x)
-            {
-                (Vector512<int> lowerInt32, Vector512<int> upperInt32) = Vector512.Widen(x);
-                return
-                    (HalfAsWidenedUInt32ToSingle(lowerInt32.AsUInt32()),
-                     HalfAsWidenedUInt32ToSingle(upperInt32.AsUInt32()));
-
-                static Vector512<float> HalfAsWidenedUInt32ToSingle(Vector512<uint> value)
-                {
-                    // Extract sign bit of value
-                    Vector512<uint> sign = value & Vector512.Create(SingleSignMask);
-
-                    // Copy sign bit to upper bits
-                    Vector512<uint> bitValueInProcess = value;
-
-                    // Extract exponent bits of value (BiasedExponent is not for here as it performs unnecessary shift)
-                    Vector512<uint> offsetExponent = bitValueInProcess & Vector512.Create(HalfExponentMask);
-
-                    // ~0u when value is subnormal, 0 otherwise
-                    Vector512<uint> subnormalMask = Vector512.Equals(offsetExponent, Vector512<uint>.Zero);
-
-                    // ~0u when value is either Infinity or NaN, 0 otherwise
-                    Vector512<uint> infinityOrNaNMask = Vector512.Equals(offsetExponent, Vector512.Create(HalfExponentMask));
-
-                    // 0x3880_0000u if value is subnormal, 0 otherwise
-                    Vector512<uint> maskedExponentLowerBound = subnormalMask & Vector512.Create(ExponentLowerBound);
-
-                    // 0x3880_0000u if value is subnormal, 0x3800_0000u otherwise
-                    Vector512<uint> offsetMaskedExponentLowerBound = Vector512.Create(ExponentOffset) | maskedExponentLowerBound;
-
-                    // Match the position of the boundary of exponent bits and fraction bits with IEEE 754 Binary32(Single)
-                    bitValueInProcess = Vector512.ShiftLeft(bitValueInProcess, 13);
-
-                    // Double the offsetMaskedExponentLowerBound if value is either Infinity or NaN
-                    offsetMaskedExponentLowerBound = Vector512.ConditionalSelect(Vector512.Equals(infinityOrNaNMask, Vector512<uint>.Zero),
-                        offsetMaskedExponentLowerBound,
-                        Vector512.ShiftLeft(offsetMaskedExponentLowerBound, 1));
-
-                    // Extract exponent bits and fraction bits of value
-                    bitValueInProcess &= Vector512.Create(HalfToSingleBitsMask);
-
-                    // Adjust exponent to match the range of exponent
-                    bitValueInProcess += offsetMaskedExponentLowerBound;
-
-                    // If value is subnormal, remove unnecessary 1 on top of fraction bits.
-                    Vector512<uint> absoluteValue = (bitValueInProcess.AsSingle() - maskedExponentLowerBound.AsSingle()).AsUInt32();
-
-                    // Merge sign bit with rest
-                    return (absoluteValue | sign).AsSingle();
-                }
-            }
-        }
-
-        internal readonly struct NarrowSingleToHalfAsUInt16Operator : IUnaryTwoToOneOperator<float, ushort>
-        {
-            // This implements a vectorized version of the `explicit operator Half(float value) operator`.
-            // See detailed description of the algorithm used here:
-            //     https://github.com/dotnet/runtime/blob/ca8d6f0420096831766ec11c7d400e4f7ccc7a34/src/libraries/System.Private.CoreLib/src/System/Half.cs#L606-L714
-            // The cast operator converts a float to a Half represented as a UInt32, then narrows to a UInt16, and reinterpret casts to Half.
-            // This does the same, with an input VectorXx<float> and an output VectorXx<uint>.
-            // Loop handling two input vectors at a time; each input float is double the size of each output Half,
-            // so we need two vectors of floats to produce one vector of Halfs. Half isn't supported in VectorXx<T>,
-            // so we convert the VectorXx<float> to a VectorXx<uint>, and the caller then uses this twice, narrows the combination
-            // into a VectorXx<ushort>, and then saves that out to the destination `ref Half` reinterpreted as `ref ushort`.
-
-            private const uint MinExp = 0x3880_0000u; // Minimum exponent for rounding
-            private const uint Exponent126 = 0x3f00_0000u; // Exponent displacement #1
-            private const uint SingleBiasedExponentMask = 0x7F80_0000; // float.BiasedExponentMask; // Exponent mask
-            private const uint Exponent13 = 0x0680_0000u; // Exponent displacement #2
-            private const float MaxHalfValueBelowInfinity = 65520.0f; // Maximum value that is not Infinity in Half
-            private const uint ExponentMask = 0x7C00; // Mask for exponent bits in Half
-            private const uint SingleSignMask = 0x8000_0000u; // float.SignMask; // Mask for sign bit in float
-
-            public static bool Vectorizable => true;
-
-            public static ushort Invoke(float x) => Unsafe.BitCast<Half, ushort>((Half)x);
-
-            public static Vector128<ushort> Invoke(Vector128<float> lower, Vector128<float> upper)
-            {
-                return Vector128.Narrow(
-                    SingleToHalfAsWidenedUInt32(lower),
-                    SingleToHalfAsWidenedUInt32(upper));
-
-                static Vector128<uint> SingleToHalfAsWidenedUInt32(Vector128<float> value)
-                {
-                    Vector128<uint> bitValue = value.AsUInt32();
-
-                    // Extract sign bit
-                    Vector128<uint> sign = Vector128.ShiftRightLogical(bitValue & Vector128.Create(SingleSignMask), 16);
-
-                    // Detecting NaN (0u if value is NaN; otherwise, ~0u)
-                    Vector128<uint> realMask = Vector128.Equals(value, value).AsUInt32();
-
-                    // Clear sign bit
-                    value = Vector128.Abs(value);
-
-                    // Rectify values that are Infinity in Half.
-                    value = Vector128.Min(Vector128.Create(MaxHalfValueBelowInfinity), value);
-
-                    // Rectify lower exponent
-                    Vector128<uint> exponentOffset0 = Vector128.Max(value, Vector128.Create(MinExp).AsSingle()).AsUInt32();
-
-                    // Extract exponent
-                    exponentOffset0 &= Vector128.Create(SingleBiasedExponentMask);
-
-                    // Add exponent by 13
-                    exponentOffset0 += Vector128.Create(Exponent13);
-
-                    // Round Single into Half's precision (NaN also gets modified here, just setting the MSB of fraction)
-                    value += exponentOffset0.AsSingle();
-                    bitValue = value.AsUInt32();
-
-                    // Only exponent bits will be modified if NaN
-                    Vector128<uint> maskedHalfExponentForNaN = ~realMask & Vector128.Create(ExponentMask);
-
-                    // Subtract exponent by 126
-                    bitValue -= Vector128.Create(Exponent126);
-
-                    // Shift bitValue right by 13 bits to match the boundary of exponent part and fraction part.
-                    Vector128<uint> newExponent = Vector128.ShiftRightLogical(bitValue, 13);
-
-                    // Clear the fraction parts if the value was NaN.
-                    bitValue &= realMask;
-
-                    // Merge the exponent part with fraction part, and add the exponent part and fraction part's overflow.
-                    bitValue += newExponent;
-
-                    // Clear exponents if value is NaN
-                    bitValue &= ~maskedHalfExponentForNaN;
-
-                    // Merge sign bit with possible NaN exponent
-                    Vector128<uint> signAndMaskedExponent = maskedHalfExponentForNaN | sign;
-
-                    // Merge sign bit and possible NaN exponent
-                    bitValue |= signAndMaskedExponent;
-
-                    // The final result
-                    return bitValue;
-                }
-            }
-
-            public static Vector256<ushort> Invoke(Vector256<float> lower, Vector256<float> upper)
-            {
-                return Vector256.Narrow(
-                    SingleToHalfAsWidenedUInt32(lower),
-                    SingleToHalfAsWidenedUInt32(upper));
-
-                static Vector256<uint> SingleToHalfAsWidenedUInt32(Vector256<float> value)
-                {
-                    Vector256<uint> bitValue = value.AsUInt32();
-
-                    // Extract sign bit
-                    Vector256<uint> sign = Vector256.ShiftRightLogical(bitValue & Vector256.Create(SingleSignMask), 16);
-
-                    // Detecting NaN (0u if value is NaN; otherwise, ~0u)
-                    Vector256<uint> realMask = Vector256.Equals(value, value).AsUInt32();
-
-                    // Clear sign bit
-                    value = Vector256.Abs(value);
-
-                    // Rectify values that are Infinity in Half.
-                    value = Vector256.Min(Vector256.Create(MaxHalfValueBelowInfinity), value);
-
-                    // Rectify lower exponent
-                    Vector256<uint> exponentOffset0 = Vector256.Max(value, Vector256.Create(MinExp).AsSingle()).AsUInt32();
-
-                    // Extract exponent
-                    exponentOffset0 &= Vector256.Create(SingleBiasedExponentMask);
-
-                    // Add exponent by 13
-                    exponentOffset0 += Vector256.Create(Exponent13);
-
-                    // Round Single into Half's precision (NaN also gets modified here, just setting the MSB of fraction)
-                    value += exponentOffset0.AsSingle();
-                    bitValue = value.AsUInt32();
-
-                    // Only exponent bits will be modified if NaN
-                    Vector256<uint> maskedHalfExponentForNaN = ~realMask & Vector256.Create(ExponentMask);
-
-                    // Subtract exponent by 126
-                    bitValue -= Vector256.Create(Exponent126);
-
-                    // Shift bitValue right by 13 bits to match the boundary of exponent part and fraction part.
-                    Vector256<uint> newExponent = Vector256.ShiftRightLogical(bitValue, 13);
-
-                    // Clear the fraction parts if the value was NaN.
-                    bitValue &= realMask;
-
-                    // Merge the exponent part with fraction part, and add the exponent part and fraction part's overflow.
-                    bitValue += newExponent;
-
-                    // Clear exponents if value is NaN
-                    bitValue &= ~maskedHalfExponentForNaN;
-
-                    // Merge sign bit with possible NaN exponent
-                    Vector256<uint> signAndMaskedExponent = maskedHalfExponentForNaN | sign;
-
-                    // Merge sign bit and possible NaN exponent
-                    bitValue |= signAndMaskedExponent;
-
-                    // The final result
-                    return bitValue;
-                }
-            }
-
-            public static Vector512<ushort> Invoke(Vector512<float> lower, Vector512<float> upper)
-            {
-                return Vector512.Narrow(
-                    SingleToHalfAsWidenedUInt32(lower),
-                    SingleToHalfAsWidenedUInt32(upper));
-
-                static Vector512<uint> SingleToHalfAsWidenedUInt32(Vector512<float> value)
-                {
-                    Vector512<uint> bitValue = value.AsUInt32();
-
-                    // Extract sign bit
-                    Vector512<uint> sign = Vector512.ShiftRightLogical(bitValue & Vector512.Create(SingleSignMask), 16);
-
-                    // Detecting NaN (0u if value is NaN; otherwise, ~0u)
-                    Vector512<uint> realMask = Vector512.Equals(value, value).AsUInt32();
-
-                    // Clear sign bit
-                    value = Vector512.Abs(value);
-
-                    // Rectify values that are Infinity in Half.
-                    value = Vector512.Min(Vector512.Create(MaxHalfValueBelowInfinity), value);
-
-                    // Rectify lower exponent
-                    Vector512<uint> exponentOffset0 = Vector512.Max(value, Vector512.Create(MinExp).AsSingle()).AsUInt32();
-
-                    // Extract exponent
-                    exponentOffset0 &= Vector512.Create(SingleBiasedExponentMask);
-
-                    // Add exponent by 13
-                    exponentOffset0 += Vector512.Create(Exponent13);
-
-                    // Round Single into Half's precision (NaN also gets modified here, just setting the MSB of fraction)
-                    value += exponentOffset0.AsSingle();
-                    bitValue = value.AsUInt32();
-
-                    // Only exponent bits will be modified if NaN
-                    Vector512<uint> maskedHalfExponentForNaN = ~realMask & Vector512.Create(ExponentMask);
-
-                    // Subtract exponent by 126
-                    bitValue -= Vector512.Create(Exponent126);
-
-                    // Shift bitValue right by 13 bits to match the boundary of exponent part and fraction part.
-                    Vector512<uint> newExponent = Vector512.ShiftRightLogical(bitValue, 13);
-
-                    // Clear the fraction parts if the value was NaN.
-                    bitValue &= realMask;
-
-                    // Merge the exponent part with fraction part, and add the exponent part and fraction part's overflow.
-                    bitValue += newExponent;
-
-                    // Clear exponents if value is NaN
-                    bitValue &= ~maskedHalfExponentForNaN;
-
-                    // Merge sign bit with possible NaN exponent
-                    Vector512<uint> signAndMaskedExponent = maskedHalfExponentForNaN | sign;
-
-                    // Merge sign bit and possible NaN exponent
-                    bitValue |= signAndMaskedExponent;
-
-                    // The final result
-                    return bitValue;
-                }
-            }
-        }
-
-        /// <summary>T.SinCos(x)</summary>
-        internal readonly struct SinCosOperator<T> : IUnaryInputBinaryOutput<T> where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => false; // TODO: vectorize
-
-            public static (T, T) Invoke(T x) => T.SinCos(x);
-            public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>T.SinCosPi(x)</summary>
-        internal readonly struct SinCosPiOperator<T> : IUnaryInputBinaryOutput<T> where T : ITrigonometricFunctions<T>
-        {
-            public static bool Vectorizable => false; // TODO: vectorize
-
-            public static (T, T) Invoke(T x) => T.SinCosPi(x);
-            public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x) => throw new NotSupportedException();
-        }
-
-        /// <summary>Operator that takes one input value and returns a single value.</summary>
-        /// <remarks>The input and output type must be of the same size if vectorization is desired.</remarks>
-        internal interface IUnaryOperator<TInput, TOutput>
-        {
-            static abstract bool Vectorizable { get; }
-            static abstract TOutput Invoke(TInput x);
-            static abstract Vector128<TOutput> Invoke(Vector128<TInput> x);
-            static abstract Vector256<TOutput> Invoke(Vector256<TInput> x);
-            static abstract Vector512<TOutput> Invoke(Vector512<TInput> x);
-        }
-
-        /// <summary>Operator that takes one input value and returns a single value.</summary>
-        /// <remarks>The input type must be half the size of the output type.</remarks>
-        private interface IUnaryOneToTwoOperator<TInput, TOutput>
-        {
-            static abstract bool Vectorizable { get; }
-            static abstract TOutput Invoke(TInput x);
-            static abstract (Vector128<TOutput> Lower, Vector128<TOutput> Upper) Invoke(Vector128<TInput> x);
-            static abstract (Vector256<TOutput> Lower, Vector256<TOutput> Upper) Invoke(Vector256<TInput> x);
-            static abstract (Vector512<TOutput> Lower, Vector512<TOutput> Upper) Invoke(Vector512<TInput> x);
-        }
-
-        /// <summary>Operator that takes one input value and returns a single value.</summary>
-        /// <remarks>The input type must be twice the size of the output type.</remarks>
-        private interface IUnaryTwoToOneOperator<TInput, TOutput>
-        {
-            static abstract bool Vectorizable { get; }
-            static abstract TOutput Invoke(TInput x);
-            static abstract Vector128<TOutput> Invoke(Vector128<TInput> lower, Vector128<TInput> upper);
-            static abstract Vector256<TOutput> Invoke(Vector256<TInput> lower, Vector256<TInput> upper);
-            static abstract Vector512<TOutput> Invoke(Vector512<TInput> lower, Vector512<TInput> upper);
-        }
-
-        /// <summary>Operator that takes one input value and returns two output values.</summary>
-        private interface IUnaryInputBinaryOutput<T>
-        {
-            static abstract bool Vectorizable { get; }
-            static abstract (T, T) Invoke(T x);
-            static abstract (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x);
-            static abstract (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x);
-            static abstract (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x);
-        }
-
-        /// <summary>Operator that takes one input value and returns a single value.</summary>
-        private interface IStatefulUnaryOperator<T>
-        {
-            static abstract bool Vectorizable { get; }
-            T Invoke(T x);
-            Vector128<T> Invoke(Vector128<T> x);
-            Vector256<T> Invoke(Vector256<T> x);
-            Vector512<T> Invoke(Vector512<T> x);
-        }
-
-        /// <summary>Operator that takes two input values and returns a single value.</summary>
-        private interface IBinaryOperator<T>
-        {
-            static abstract bool Vectorizable { get; }
-            static abstract T Invoke(T x, T y);
-            static abstract Vector128<T> Invoke(Vector128<T> x, Vector128<T> y);
-            static abstract Vector256<T> Invoke(Vector256<T> x, Vector256<T> y);
-            static abstract Vector512<T> Invoke(Vector512<T> x, Vector512<T> y);
-        }
-
-        /// <summary><see cref="IBinaryOperator{T}"/> that specializes horizontal aggregation of all elements in a vector.</summary>
-        private interface IAggregationOperator<T> : IBinaryOperator<T>
-        {
-            static abstract T Invoke(Vector128<T> x);
-            static abstract T Invoke(Vector256<T> x);
-            static abstract T Invoke(Vector512<T> x);
-
-            static virtual T IdentityValue => throw new NotSupportedException();
-        }
-
-        /// <summary>Operator that takes three input values and returns a single value.</summary>
-        private interface ITernaryOperator<T>
-        {
-            static abstract T Invoke(T x, T y, T z);
-            static abstract Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z);
-            static abstract Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> z);
-            static abstract Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z);
-        }
-    }
-}
diff --git a/src/libraries/System.Numerics.Tensors/tests/Helpers.cs b/src/libraries/System.Numerics.Tensors/tests/Helpers.cs
index d6b5eef63d9d..729cacda3516 100644
--- a/src/libraries/System.Numerics.Tensors/tests/Helpers.cs
+++ b/src/libraries/System.Numerics.Tensors/tests/Helpers.cs
@@ -11,5 +11,60 @@ internal static class Helpers
         public static IEnumerable<int> TensorLengthsIncluding0 => Enumerable.Range(0, 257);
 
         public static IEnumerable<int> TensorLengths => Enumerable.Range(1, 256);
+
+        // Tolerances taken from testing in the scalar math routines:
+        // cf. https://github.com/dotnet/runtime/blob/89f7ad3b276fb0b48f20cb4e8408bdce85c2b415/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Math.cs
+        // and https://github.com/dotnet/runtime/blob/fd48b6f5d1ff81a81d09e9d72982cc9e8d139852/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/HalfTests.cs
+        public const double DefaultDoubleTolerance = 8.8817841970012523e-16;
+        public const float DefaultFloatTolerance = 4.76837158e-07f;
+        public const float DefaultHalfTolerance = 3.90625e-03f;
+        public const double DefaultToleranceForEstimates = 1.171875e-02;
+
+#if NETCOREAPP
+        private static class DefaultTolerance<T> where T : unmanaged, INumber<T>
+        {
+            public static readonly T Value = DetermineTolerance<T>(DefaultDoubleTolerance, DefaultFloatTolerance, Half.CreateTruncating(DefaultHalfTolerance)) ?? T.CreateTruncating(0);
+        }
+
+        public static bool IsEqualWithTolerance<T>(T expected, T actual, T? tolerance = null) where T : unmanaged, INumber<T>
+        {
+            tolerance = tolerance ?? DefaultTolerance<T>.Value;
+            T diff = T.Abs(expected - actual);
+            return !(diff > tolerance && diff > T.Max(T.Abs(expected), T.Abs(actual)) * tolerance);
+        }
+#else
+        public static bool IsEqualWithTolerance(float expected, float actual, float? tolerance = null)
+        {
+            tolerance ??= DefaultFloatTolerance;
+            float diff = MathF.Abs(expected - actual);
+            return !(diff > tolerance && diff > MathF.Max(MathF.Abs(expected), MathF.Abs(actual)) * tolerance);
+        }
+#endif
+
+        public static T? DetermineTolerance<T>(
+            double? doubleTolerance = null,
+            float? floatTolerance = null
+#if NETCOREAPP
+            , Half? halfTolerance = null
+#endif
+            ) where T : struct
+        {
+            if (typeof(T) == typeof(double) && doubleTolerance != null)
+            {
+                return (T?)(object)doubleTolerance;
+            }
+            else if (typeof(T) == typeof(float) && floatTolerance != null)
+            {
+                return (T?)(object)floatTolerance;
+            }
+#if NETCOREAPP
+            else if (typeof(T) == typeof(Half) && halfTolerance != null)
+            {
+                return (T?)(object)halfTolerance;
+            }
+#endif
+
+            return null;
+        }
     }
 }
diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs
index cd6ae2455491..0cd20f1d647a 100644
--- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs
+++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs
@@ -6,6 +6,8 @@
 using System.Linq;
 using System.Reflection;
 using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.X86;
 using Xunit;
 using Xunit.Sdk;
 
@@ -113,7 +115,7 @@ private static void ConvertTruncatingImpl<TFrom, TTo>()
 
                 for (int i = 0; i < tensorLength; i++)
                 {
-                    if (!IsEqualWithTolerance(TTo.CreateTruncating(source.Span[i]), destination.Span[i]))
+                    if (!Helpers.IsEqualWithTolerance(TTo.CreateTruncating(source.Span[i]), destination.Span[i]))
                     {
                         throw new XunitException($"{typeof(TFrom).Name} => {typeof(TTo).Name}. Input: {source.Span[i]}. Actual: {destination.Span[i]}. Expected: {TTo.CreateTruncating(source.Span[i])}.");
                     }
@@ -145,7 +147,7 @@ private static void ConvertSaturatingImpl<TFrom, TTo>()
 
                 for (int i = 0; i < tensorLength; i++)
                 {
-                    if (!IsEqualWithTolerance(TTo.CreateSaturating(source.Span[i]), destination.Span[i]))
+                    if (!Helpers.IsEqualWithTolerance(TTo.CreateSaturating(source.Span[i]), destination.Span[i]))
                     {
                         throw new XunitException($"{typeof(TFrom).Name} => {typeof(TTo).Name}. Input: {source.Span[i]}. Actual: {destination.Span[i]}. Expected: {TTo.CreateSaturating(source.Span[i])}.");
                     }
@@ -177,7 +179,7 @@ private static void ConvertCheckedImpl<TFrom, TTo>()
 
                 for (int i = 0; i < tensorLength; i++)
                 {
-                    if (!IsEqualWithTolerance(TTo.CreateChecked(source.Span[i]), destination.Span[i]))
+                    if (!Helpers.IsEqualWithTolerance(TTo.CreateChecked(source.Span[i]), destination.Span[i]))
                     {
                         throw new XunitException($"{typeof(TFrom).Name} => {typeof(TTo).Name}. Input: {source.Span[i]}. Actual: {destination.Span[i]}. Expected: {TTo.CreateChecked(source.Span[i])}.");
                     }
@@ -199,7 +201,7 @@ private static void ConvertCheckedImpl<TFrom, TTo>(TFrom valid, TFrom invalid)
                 TensorPrimitives.ConvertChecked<TFrom, TTo>(source.Span, destination.Span);
                 foreach (TTo result in destination.Span)
                 {
-                    Assert.True(IsEqualWithTolerance(TTo.CreateChecked(valid), result));
+                    Assert.True(Helpers.IsEqualWithTolerance(TTo.CreateChecked(valid), result));
                 }
 
                 // Test with at least one invalid
@@ -211,19 +213,6 @@ private static void ConvertCheckedImpl<TFrom, TTo>(TFrom valid, TFrom invalid)
                 }
             };
         }
-
-        private static bool IsEqualWithTolerance<T>(T expected, T actual, T? tolerance = null) where T : unmanaged, INumber<T>
-        {
-            tolerance ??= T.CreateTruncating(0.0001);
-
-            T diff = T.Abs(expected - actual);
-            if (diff > tolerance && diff > T.Max(T.Abs(expected), T.Abs(actual)) * tolerance)
-            {
-                return false;
-            }
-
-            return true;
-        }
     }
 
     // The tests for some types have been marked as OuterLoop simply to decrease inner loop testing time.
@@ -362,48 +351,54 @@ protected override void SetSpecialValues(Span<T> x, Span<T> y)
         #region Span -> Destination
         public static IEnumerable<object[]> SpanDestinationFunctionsToTest()
         {
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Acosh), new Func<T, T>(T.Acosh) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.AcosPi), new Func<T, T>(T.AcosPi) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Acos), new Func<T, T>(T.Acos) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Asinh), new Func<T, T>(T.Asinh) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.AsinPi), new Func<T, T>(T.AsinPi) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Asin), new Func<T, T>(T.Asin) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Atanh), new Func<T, T>(T.Atanh) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.AtanPi), new Func<T, T>(T.AtanPi) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Atan), new Func<T, T>(T.Atan) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Cbrt), new Func<T, T>(T.Cbrt) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Ceiling), new Func<T, T>(T.Ceiling) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Cos), new Func<T, T>(T.Cos) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Cosh), new Func<T, T>(T.Cosh) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.CosPi), new Func<T, T>(T.CosPi) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.DegreesToRadians), new Func<T, T>(T.DegreesToRadians) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Exp), new Func<T, T>(T.Exp) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Exp2), new Func<T, T>(T.Exp2) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Exp10), new Func<T, T>(T.Exp10) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.ExpM1), new Func<T, T>(T.ExpM1) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Exp2M1), new Func<T, T>(T.Exp2M1) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Exp10M1), new Func<T, T>(T.Exp10M1) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Floor), new Func<T, T>(T.Floor) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Log), new Func<T, T>(T.Log) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Log2), new Func<T, T>(T.Log2) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Log10), new Func<T, T>(T.Log10) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.LogP1), new Func<T, T>(T.LogP1) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Log2P1), new Func<T, T>(T.Log2P1) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Log10P1), new Func<T, T>(T.Log10P1) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.RadiansToDegrees), new Func<T, T>(T.RadiansToDegrees) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Reciprocal), new Func<T, T>(f => T.One / f) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.ReciprocalEstimate), new Func<T, T>(T.ReciprocalEstimate), T.CreateTruncating(1.171875e-02) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.ReciprocalSqrt), new Func<T, T>(f => T.One / T.Sqrt(f)) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.ReciprocalSqrtEstimate), new Func<T, T>(T.ReciprocalSqrtEstimate), T.CreateTruncating(1.171875e-02) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Round), new Func<T, T>(T.Round) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Sin), new Func<T, T>(T.Sin) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Sinh), new Func<T, T>(T.Sinh) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.SinPi), new Func<T, T>(T.SinPi) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Sqrt), new Func<T, T>(T.Sqrt) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Tan), new Func<T, T>(T.Tan) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Tanh), new Func<T, T>(T.Tanh) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.TanPi), new Func<T, T>(T.TanPi) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.Truncate), new Func<T, T>(T.Truncate) };
+            // The current trigonometric algorithm depends on hardware FMA support for best precision.
+            T? trigTolerance = IsFmaSupported ? null : Helpers.DetermineTolerance<T>(doubleTolerance: 1e-10, floatTolerance: 1e-4f);
+
+            yield return Create(TensorPrimitives.Acosh, T.Acosh);
+            yield return Create(TensorPrimitives.AcosPi, T.AcosPi);
+            yield return Create(TensorPrimitives.Acos, T.Acos);
+            yield return Create(TensorPrimitives.Asinh, T.Asinh);
+            yield return Create(TensorPrimitives.AsinPi, T.AsinPi);
+            yield return Create(TensorPrimitives.Asin, T.Asin);
+            yield return Create(TensorPrimitives.Atanh, T.Atanh);
+            yield return Create(TensorPrimitives.AtanPi, T.AtanPi);
+            yield return Create(TensorPrimitives.Atan, T.Atan);
+            yield return Create(TensorPrimitives.Cbrt, T.Cbrt, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-13));
+            yield return Create(TensorPrimitives.Ceiling, T.Ceiling);
+            yield return Create(TensorPrimitives.Cos, T.Cos, trigTolerance);
+            yield return Create(TensorPrimitives.Cosh, T.Cosh, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14));
+            yield return Create(TensorPrimitives.CosPi, T.CosPi, trigTolerance ?? Helpers.DetermineTolerance<T>(floatTolerance: 1e-5f));
+            yield return Create(TensorPrimitives.DegreesToRadians, T.DegreesToRadians);
+            yield return Create(TensorPrimitives.Exp, T.Exp);
+            yield return Create(TensorPrimitives.Exp2, T.Exp2, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14, floatTolerance: 1e-5f));
+            yield return Create(TensorPrimitives.Exp10, T.Exp10, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-13, floatTolerance: 1e-5f));
+            yield return Create(TensorPrimitives.ExpM1, T.ExpM1);
+            yield return Create(TensorPrimitives.Exp2M1, T.Exp2M1, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14, floatTolerance: 1e-5f));
+            yield return Create(TensorPrimitives.Exp10M1, T.Exp10M1, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-13, floatTolerance: 1e-5f));
+            yield return Create(TensorPrimitives.Floor, T.Floor);
+            yield return Create(TensorPrimitives.Log, T.Log);
+            yield return Create(TensorPrimitives.Log2, T.Log2);
+            yield return Create(TensorPrimitives.Log10, T.Log10);
+            yield return Create(TensorPrimitives.LogP1, T.LogP1);
+            yield return Create(TensorPrimitives.Log2P1, T.Log2P1);
+            yield return Create(TensorPrimitives.Log10P1, T.Log10P1);
+            yield return Create(TensorPrimitives.RadiansToDegrees, T.RadiansToDegrees);
+            yield return Create(TensorPrimitives.Reciprocal, f => T.One / f);
+            yield return Create(TensorPrimitives.ReciprocalEstimate, T.ReciprocalEstimate, T.CreateTruncating(Helpers.DefaultToleranceForEstimates));
+            yield return Create(TensorPrimitives.ReciprocalSqrt, f => T.One / T.Sqrt(f));
+            yield return Create(TensorPrimitives.ReciprocalSqrtEstimate, T.ReciprocalSqrtEstimate, T.CreateTruncating(Helpers.DefaultToleranceForEstimates));
+            yield return Create(TensorPrimitives.Round, T.Round);
+            yield return Create(TensorPrimitives.Sin, T.Sin, trigTolerance);
+            yield return Create(TensorPrimitives.Sinh, T.Sinh, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14));
+            yield return Create(TensorPrimitives.SinPi, T.SinPi, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-13, floatTolerance: 1e-4f));
+            yield return Create(TensorPrimitives.Sqrt, T.Sqrt);
+            yield return Create(TensorPrimitives.Tan, T.Tan, trigTolerance);
+            yield return Create(TensorPrimitives.Tanh, T.Tanh);
+            yield return Create(TensorPrimitives.TanPi, T.TanPi);
+            yield return Create(TensorPrimitives.Truncate, T.Truncate);
+
+            static object[] Create(SpanDestinationDelegate tensorPrimitivesMethod, Func<T, T> expectedMethod, T? tolerance = null)
+                => new object[] { tensorPrimitivesMethod, expectedMethod, tolerance };
         }
 
         [Theory]
@@ -510,18 +505,21 @@ public void SpanDestinationFunctions_ThrowsForOverlapppingInputsWithOutputs(Span
         #region Span,Span -> Destination
         public static IEnumerable<object[]> SpanSpanDestinationFunctionsToTest()
         {
-            yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Atan2), new Func<T, T, T>(T.Atan2) };
-            yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Atan2Pi), new Func<T, T, T>(T.Atan2Pi) };
-            yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.CopySign), new Func<T, T, T>(T.CopySign) };
-            yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Hypot), new Func<T, T, T>(T.Hypot) };
-            yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Ieee754Remainder), new Func<T, T, T>(T.Ieee754Remainder) };
-            yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Log), new Func<T, T, T>(T.Log) };
-            yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Pow), new Func<T, T, T>(T.Pow) };
+            yield return Create(TensorPrimitives.Atan2, T.Atan2);
+            yield return Create(TensorPrimitives.Atan2Pi, T.Atan2Pi);
+            yield return Create(TensorPrimitives.CopySign, T.CopySign);
+            yield return Create(TensorPrimitives.Hypot, T.Hypot);
+            yield return Create(TensorPrimitives.Ieee754Remainder, T.Ieee754Remainder);
+            yield return Create(TensorPrimitives.Log, T.Log);
+            yield return Create(TensorPrimitives.Pow, T.Pow, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-13, floatTolerance: 1e-5f));
+
+            static object[] Create(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
+                => new object[] { tensorPrimitivesMethod, expectedMethod, tolerance };
         }
 
         [Theory]
         [MemberData(nameof(SpanSpanDestinationFunctionsToTest))]
-        public void SpanSpanDestination_AllLengths(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod)
+        public void SpanSpanDestination_AllLengths(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
             Assert.All(Helpers.TensorLengthsIncluding0, tensorLength =>
             {
@@ -532,14 +530,14 @@ public void SpanSpanDestination_AllLengths(SpanSpanDestinationDelegate tensorPri
                 tensorPrimitivesMethod(x, y, destination);
                 for (int i = 0; i < tensorLength; i++)
                 {
-                    AssertEqualTolerance(expectedMethod(x[i], y[i]), destination[i]);
+                    AssertEqualTolerance(expectedMethod(x[i], y[i]), destination[i], tolerance);
                 }
             });
         }
 
         [Theory]
         [MemberData(nameof(SpanSpanDestinationFunctionsToTest))]
-        public void SpanSpanDestination_InPlace(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod)
+        public void SpanSpanDestination_InPlace(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
             Assert.All(Helpers.TensorLengthsIncluding0, tensorLength =>
             {
@@ -550,14 +548,14 @@ public void SpanSpanDestination_InPlace(SpanSpanDestinationDelegate tensorPrimit
 
                 for (int i = 0; i < tensorLength; i++)
                 {
-                    AssertEqualTolerance(expectedMethod(xOrig[i], xOrig[i]), x[i]);
+                    AssertEqualTolerance(expectedMethod(xOrig[i], xOrig[i]), x[i], tolerance);
                 }
             });
         }
 
         [Theory]
         [MemberData(nameof(SpanSpanDestinationFunctionsToTest))]
-        public void SpanSpanDestination_SpecialValues(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod)
+        public void SpanSpanDestination_SpecialValues(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
             Assert.All(Helpers.TensorLengths, tensorLength =>
             {
@@ -570,7 +568,7 @@ public void SpanSpanDestination_SpecialValues(SpanSpanDestinationDelegate tensor
                     tensorPrimitivesMethod(x.Span, y.Span, destination.Span);
                     for (int i = 0; i < tensorLength; i++)
                     {
-                        AssertEqualTolerance(expectedMethod(x[i], y[i]), destination[i]);
+                        AssertEqualTolerance(expectedMethod(x[i], y[i]), destination[i], tolerance);
                     }
                 }, x);
 
@@ -579,7 +577,7 @@ public void SpanSpanDestination_SpecialValues(SpanSpanDestinationDelegate tensor
                     tensorPrimitivesMethod(x.Span, y.Span, destination.Span);
                     for (int i = 0; i < tensorLength; i++)
                     {
-                        AssertEqualTolerance(expectedMethod(x[i], y[i]), destination[i]);
+                        AssertEqualTolerance(expectedMethod(x[i], y[i]), destination[i], tolerance);
                     }
                 }, y);
             });
@@ -587,8 +585,11 @@ public void SpanSpanDestination_SpecialValues(SpanSpanDestinationDelegate tensor
 
         [Theory]
         [MemberData(nameof(SpanSpanDestinationFunctionsToTest))]
-        public void SpanSpanDestination_ThrowsForMismatchedLengths(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> _)
+        public void SpanSpanDestination_ThrowsForMismatchedLengths(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
+            _ = expectedMethod;
+            _ = tolerance;
+
             Assert.All(Helpers.TensorLengths, tensorLength =>
             {
                 using BoundedMemory<T> x = CreateAndFillTensor(tensorLength);
@@ -602,8 +603,11 @@ public void SpanSpanDestination_ThrowsForMismatchedLengths(SpanSpanDestinationDe
 
         [Theory]
         [MemberData(nameof(SpanSpanDestinationFunctionsToTest))]
-        public void SpanSpanDestination_ThrowsForTooShortDestination(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> _)
+        public void SpanSpanDestination_ThrowsForTooShortDestination(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
+            _ = expectedMethod;
+            _ = tolerance;
+
             Assert.All(Helpers.TensorLengths, tensorLength =>
             {
                 using BoundedMemory<T> x = CreateAndFillTensor(tensorLength);
@@ -616,8 +620,11 @@ public void SpanSpanDestination_ThrowsForTooShortDestination(SpanSpanDestination
 
         [Theory]
         [MemberData(nameof(SpanSpanDestinationFunctionsToTest))]
-        public void SpanSpanDestination_ThrowsForOverlapppingInputsWithOutputs(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> _)
+        public void SpanSpanDestination_ThrowsForOverlapppingInputsWithOutputs(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
+            _ = expectedMethod;
+            _ = tolerance;
+
             T[] array = new T[10];
             AssertExtensions.Throws<ArgumentException>("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(0, 2)));
             AssertExtensions.Throws<ArgumentException>("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), array.AsSpan(5, 2), array.AsSpan(2, 2)));
@@ -629,21 +636,24 @@ public void SpanSpanDestination_ThrowsForOverlapppingInputsWithOutputs(SpanSpanD
         #region Span,Scalar -> Destination
         public static IEnumerable<object[]> SpanScalarDestinationFunctionsToTest()
         {
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.Atan2), new Func<T, T, T>(T.Atan2) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.Atan2Pi), new Func<T, T, T>(T.Atan2Pi) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.CopySign), new Func<T, T, T>(T.CopySign) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.Ieee754Remainder), new Func<T, T, T>(T.Ieee754Remainder) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.Pow), new Func<T, T, T>(T.Pow) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.Log), new Func<T, T, T>(T.Log) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.Max), new Func<T, T, T>(T.Max) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.MaxMagnitude), new Func<T, T, T>(T.MaxMagnitude) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.Min), new Func<T, T, T>(T.Min) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.MinMagnitude), new Func<T, T, T>(T.MinMagnitude) };
+            yield return Create(TensorPrimitives.Atan2, T.Atan2);
+            yield return Create(TensorPrimitives.Atan2Pi, T.Atan2Pi);
+            yield return Create(TensorPrimitives.CopySign, T.CopySign);
+            yield return Create(TensorPrimitives.Ieee754Remainder, T.Ieee754Remainder);
+            yield return Create(TensorPrimitives.Pow, T.Pow, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-13, floatTolerance: 1e-5f));
+            yield return Create(TensorPrimitives.Log, T.Log);
+            yield return Create(TensorPrimitives.Max, T.Max);
+            yield return Create(TensorPrimitives.MaxMagnitude, T.MaxMagnitude);
+            yield return Create(TensorPrimitives.Min, T.Min);
+            yield return Create(TensorPrimitives.MinMagnitude, T.MinMagnitude);
+
+            static object[] Create(SpanScalarDestinationDelegate<T, T, T> tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
+                => new object[] { tensorPrimitivesMethod, expectedMethod, tolerance };
         }
 
         [Theory]
         [MemberData(nameof(SpanScalarDestinationFunctionsToTest))]
-        public void SpanScalarDestination_AllLengths(SpanScalarDestinationDelegate<T, T, T> tensorPrimitivesMethod, Func<T, T, T> expectedMethod)
+        public void SpanScalarDestination_AllLengths(SpanScalarDestinationDelegate<T, T, T> tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
             Assert.All(Helpers.TensorLengthsIncluding0, tensorLength =>
             {
@@ -654,14 +664,14 @@ public void SpanScalarDestination_AllLengths(SpanScalarDestinationDelegate<T, T,
                 tensorPrimitivesMethod(x, y, destination);
                 for (int i = 0; i < tensorLength; i++)
                 {
-                    AssertEqualTolerance(expectedMethod(x[i], y), destination[i]);
+                    AssertEqualTolerance(expectedMethod(x[i], y), destination[i], tolerance);
                 }
             });
         }
 
         [Theory]
         [MemberData(nameof(SpanScalarDestinationFunctionsToTest))]
-        public void SpanScalarDestination_InPlace(SpanScalarDestinationDelegate<T, T, T> tensorPrimitivesMethod, Func<T, T, T> expectedMethod)
+        public void SpanScalarDestination_InPlace(SpanScalarDestinationDelegate<T, T, T> tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
             Assert.All(Helpers.TensorLengthsIncluding0, tensorLength =>
             {
@@ -673,14 +683,14 @@ public void SpanScalarDestination_InPlace(SpanScalarDestinationDelegate<T, T, T>
 
                 for (int i = 0; i < tensorLength; i++)
                 {
-                    AssertEqualTolerance(expectedMethod(xOrig[i], y), x[i]);
+                    AssertEqualTolerance(expectedMethod(xOrig[i], y), x[i], tolerance);
                 }
             });
         }
 
         [Theory]
         [MemberData(nameof(SpanScalarDestinationFunctionsToTest))]
-        public void SpanScalarDestination_SpecialValues(SpanScalarDestinationDelegate<T, T, T> tensorPrimitivesMethod, Func<T, T, T> expectedMethod)
+        public void SpanScalarDestination_SpecialValues(SpanScalarDestinationDelegate<T, T, T> tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
             Assert.All(Helpers.TensorLengths, tensorLength =>
             {
@@ -693,7 +703,7 @@ public void SpanScalarDestination_SpecialValues(SpanScalarDestinationDelegate<T,
                     tensorPrimitivesMethod(x.Span, y, destination.Span);
                     for (int i = 0; i < tensorLength; i++)
                     {
-                        AssertEqualTolerance(expectedMethod(x[i], y), destination[i]);
+                        AssertEqualTolerance(expectedMethod(x[i], y), destination[i], tolerance);
                     }
                 }, x);
             });
@@ -701,8 +711,11 @@ public void SpanScalarDestination_SpecialValues(SpanScalarDestinationDelegate<T,
 
         [Theory]
         [MemberData(nameof(SpanScalarDestinationFunctionsToTest))]
-        public void SpanScalarDestination_ThrowsForTooShortDestination(SpanScalarDestinationDelegate<T, T, T> tensorPrimitivesMethod, Func<T, T, T> _)
+        public void SpanScalarDestination_ThrowsForTooShortDestination(SpanScalarDestinationDelegate<T, T, T> tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
+            _ = expectedMethod;
+            _ = tolerance;
+
             Assert.All(Helpers.TensorLengths, tensorLength =>
             {
                 using BoundedMemory<T> x = CreateAndFillTensor(tensorLength);
@@ -715,8 +728,11 @@ public void SpanScalarDestination_ThrowsForTooShortDestination(SpanScalarDestina
 
         [Theory]
         [MemberData(nameof(SpanScalarDestinationFunctionsToTest))]
-        public void SpanScalarDestination_ThrowsForOverlapppingInputsWithOutputs(SpanScalarDestinationDelegate<T, T, T> tensorPrimitivesMethod, Func<T, T, T> _)
+        public void SpanScalarDestination_ThrowsForOverlapppingInputsWithOutputs(SpanScalarDestinationDelegate<T, T, T> tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
+            _ = expectedMethod;
+            _ = tolerance;
+
             T[] array = new T[10];
             AssertExtensions.Throws<ArgumentException>("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), default, array.AsSpan(0, 2)));
             AssertExtensions.Throws<ArgumentException>("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), default, array.AsSpan(2, 2)));
@@ -726,15 +742,18 @@ public void SpanScalarDestination_ThrowsForOverlapppingInputsWithOutputs(SpanSca
         #region Scalar,Span -> Destination
         public static IEnumerable<object[]> ScalarSpanFloatDestinationFunctionsToTest()
         {
-            yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Atan2), new Func<T, T, T>(T.Atan2) };
-            yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Atan2Pi), new Func<T, T, T>(T.Atan2Pi) };
-            yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Pow), new Func<T, T, T>(T.Pow) };
-            yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Ieee754Remainder), new Func<T, T, T>(T.Ieee754Remainder) };
+            yield return Create(TensorPrimitives.Atan2, T.Atan2);
+            yield return Create(TensorPrimitives.Atan2Pi, T.Atan2Pi);
+            yield return Create(TensorPrimitives.Pow, T.Pow, Helpers.DetermineTolerance<T>(floatTolerance: 1e-5f));
+            yield return Create(TensorPrimitives.Ieee754Remainder, T.Ieee754Remainder);
+
+            static object[] Create(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
+                => new object[] { tensorPrimitivesMethod, expectedMethod, tolerance };
         }
 
         [Theory]
         [MemberData(nameof(ScalarSpanFloatDestinationFunctionsToTest))]
-        public void SpanScalarFloatDestination_AllLengths(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod)
+        public void SpanScalarFloatDestination_AllLengths(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
             Assert.All(Helpers.TensorLengthsIncluding0, tensorLength =>
             {
@@ -745,14 +764,14 @@ public void SpanScalarFloatDestination_AllLengths(ScalarSpanDestinationDelegate
                 tensorPrimitivesMethod(x, y, destination);
                 for (int i = 0; i < tensorLength; i++)
                 {
-                    AssertEqualTolerance(expectedMethod(x, y[i]), destination[i]);
+                    AssertEqualTolerance(expectedMethod(x, y[i]), destination[i], tolerance);
                 }
             });
         }
 
         [Theory]
         [MemberData(nameof(ScalarSpanFloatDestinationFunctionsToTest))]
-        public void SpanScalarFloatDestination_InPlace(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod)
+        public void SpanScalarFloatDestination_InPlace(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
             Assert.All(Helpers.TensorLengthsIncluding0, tensorLength =>
             {
@@ -764,14 +783,14 @@ public void SpanScalarFloatDestination_InPlace(ScalarSpanDestinationDelegate ten
 
                 for (int i = 0; i < tensorLength; i++)
                 {
-                    AssertEqualTolerance(expectedMethod(x, yOrig[i]), y[i]);
+                    AssertEqualTolerance(expectedMethod(x, yOrig[i]), y[i], tolerance);
                 }
             });
         }
 
         [Theory]
         [MemberData(nameof(ScalarSpanFloatDestinationFunctionsToTest))]
-        public void ScalarSpanDestination_SpecialValues(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod)
+        public void ScalarSpanDestination_SpecialValues(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
             Assert.All(Helpers.TensorLengths, tensorLength =>
             {
@@ -784,7 +803,7 @@ public void ScalarSpanDestination_SpecialValues(ScalarSpanDestinationDelegate te
                     tensorPrimitivesMethod(x, y.Span, destination.Span);
                     for (int i = 0; i < tensorLength; i++)
                     {
-                        AssertEqualTolerance(expectedMethod(x, y[i]), destination[i]);
+                        AssertEqualTolerance(expectedMethod(x, y[i]), destination[i], tolerance);
                     }
                 }, y);
             });
@@ -792,8 +811,11 @@ public void ScalarSpanDestination_SpecialValues(ScalarSpanDestinationDelegate te
 
         [Theory]
         [MemberData(nameof(ScalarSpanFloatDestinationFunctionsToTest))]
-        public void SpanScalarFloatDestination_ThrowsForTooShortDestination(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> _)
+        public void SpanScalarFloatDestination_ThrowsForTooShortDestination(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
+            _ = expectedMethod;
+            _ = tolerance;
+
             Assert.All(Helpers.TensorLengths, tensorLength =>
             {
                 T x = NextRandom();
@@ -806,8 +828,11 @@ public void SpanScalarFloatDestination_ThrowsForTooShortDestination(ScalarSpanDe
 
         [Theory]
         [MemberData(nameof(ScalarSpanFloatDestinationFunctionsToTest))]
-        public void SpanScalarFloatDestination_ThrowsForOverlapppingInputsWithOutputs(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> _)
+        public void SpanScalarFloatDestination_ThrowsForOverlapppingInputsWithOutputs(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod, T? tolerance = null)
         {
+            _ = expectedMethod;
+            _ = tolerance;
+
             T[] array = new T[10];
             AssertExtensions.Throws<ArgumentException>("destination", () => tensorPrimitivesMethod(default, array.AsSpan(1, 2), array.AsSpan(0, 2)));
             AssertExtensions.Throws<ArgumentException>("destination", () => tensorPrimitivesMethod(default, array.AsSpan(1, 2), array.AsSpan(2, 2)));
@@ -817,13 +842,16 @@ public void SpanScalarFloatDestination_ThrowsForOverlapppingInputsWithOutputs(Sc
         #region Span,Int,Span -> Destination
         public static IEnumerable<object[]> SpanIntDestinationFunctionsToTest()
         {
-            yield return new object[] { new SpanScalarDestinationDelegate<T, int, T>(TensorPrimitives.RootN), new Func<T, int, T>(T.RootN) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, int, T>(TensorPrimitives.ScaleB), new Func<T, int, T>(T.ScaleB) };
+            yield return Create(TensorPrimitives.RootN, T.RootN, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-13));
+            yield return Create(TensorPrimitives.ScaleB, T.ScaleB);
+
+            static object[] Create(SpanScalarDestinationDelegate<T, int, T> tensorPrimitivesMethod, Func<T, int, T> expectedMethod, T? tolerance = null)
+                => new object[] { tensorPrimitivesMethod, expectedMethod, tolerance };
         }
 
         [Theory]
         [MemberData(nameof(SpanIntDestinationFunctionsToTest))]
-        public void SpanIntDestination_AllLengths(SpanScalarDestinationDelegate<T, int, T> tensorPrimitivesMethod, Func<T, int, T> expectedMethod)
+        public void SpanIntDestination_AllLengths(SpanScalarDestinationDelegate<T, int, T> tensorPrimitivesMethod, Func<T, int, T> expectedMethod, T? tolerance = null)
         {
             Assert.All(Helpers.TensorLengthsIncluding0, tensorLength =>
             {
@@ -834,14 +862,14 @@ public void SpanIntDestination_AllLengths(SpanScalarDestinationDelegate<T, int,
                 tensorPrimitivesMethod(x, y, destination);
                 for (int i = 0; i < tensorLength; i++)
                 {
-                    AssertEqualTolerance(expectedMethod(x[i], y), destination[i]);
+                    AssertEqualTolerance(expectedMethod(x[i], y), destination[i], tolerance);
                 }
             });
         }
 
         [Theory]
         [MemberData(nameof(SpanIntDestinationFunctionsToTest))]
-        public void SpanIntDestination_InPlace(SpanScalarDestinationDelegate<T, int, T> tensorPrimitivesMethod, Func<T, int, T> expectedMethod)
+        public void SpanIntDestination_InPlace(SpanScalarDestinationDelegate<T, int, T> tensorPrimitivesMethod, Func<T, int, T> expectedMethod, T? tolerance = null)
         {
             Assert.All(Helpers.TensorLengthsIncluding0, tensorLength =>
             {
@@ -853,14 +881,14 @@ public void SpanIntDestination_InPlace(SpanScalarDestinationDelegate<T, int, T>
 
                 for (int i = 0; i < tensorLength; i++)
                 {
-                    AssertEqualTolerance(expectedMethod(xOrig[i], y), x[i]);
+                    AssertEqualTolerance(expectedMethod(xOrig[i], y), x[i], tolerance);
                 }
             });
         }
 
         [Theory]
         [MemberData(nameof(SpanIntDestinationFunctionsToTest))]
-        public void SpanIntDestination_SpecialValues(SpanScalarDestinationDelegate<T, int, T> tensorPrimitivesMethod, Func<T, int, T> expectedMethod)
+        public void SpanIntDestination_SpecialValues(SpanScalarDestinationDelegate<T, int, T> tensorPrimitivesMethod, Func<T, int, T> expectedMethod, T? tolerance = null)
         {
             Assert.All(Helpers.TensorLengths, tensorLength =>
             {
@@ -873,7 +901,7 @@ public void SpanIntDestination_SpecialValues(SpanScalarDestinationDelegate<T, in
                     tensorPrimitivesMethod(x.Span, y, destination.Span);
                     for (int i = 0; i < tensorLength; i++)
                     {
-                        AssertEqualTolerance(expectedMethod(x[i], y), destination[i]);
+                        AssertEqualTolerance(expectedMethod(x[i], y), destination[i], tolerance);
                     }
                 }, x);
             });
@@ -881,8 +909,11 @@ public void SpanIntDestination_SpecialValues(SpanScalarDestinationDelegate<T, in
 
         [Theory]
         [MemberData(nameof(SpanIntDestinationFunctionsToTest))]
-        public void SpanIntDestination_ThrowsForTooShortDestination(SpanScalarDestinationDelegate<T, int, T> tensorPrimitivesMethod, Func<T, int, T> _)
+        public void SpanIntDestination_ThrowsForTooShortDestination(SpanScalarDestinationDelegate<T, int, T> tensorPrimitivesMethod, Func<T, int, T> expectedMethod, T? tolerance = null)
         {
+            _ = expectedMethod;
+            _ = tolerance;
+
             Assert.All(Helpers.TensorLengths, tensorLength =>
             {
                 using BoundedMemory<T> x = CreateAndFillTensor(tensorLength);
@@ -895,8 +926,11 @@ public void SpanIntDestination_ThrowsForTooShortDestination(SpanScalarDestinatio
 
         [Theory]
         [MemberData(nameof(SpanIntDestinationFunctionsToTest))]
-        public void SpanIntDestination_ThrowsForOverlapppingInputsWithOutputs(SpanScalarDestinationDelegate<T, int, T> tensorPrimitivesMethod, Func<T, int, T> _)
+        public void SpanIntDestination_ThrowsForOverlapppingInputsWithOutputs(SpanScalarDestinationDelegate<T, int, T> tensorPrimitivesMethod, Func<T, int, T> expectedMethod, T? tolerance = null)
         {
+            _ = expectedMethod;
+            _ = tolerance;
+
             T[] array = new T[10];
             AssertExtensions.Throws<ArgumentException>("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), 2, array.AsSpan(0, 2)));
             AssertExtensions.Throws<ArgumentException>("destination", () => tensorPrimitivesMethod(array.AsSpan(1, 2), 2, array.AsSpan(2, 2)));
@@ -906,9 +940,12 @@ public void SpanIntDestination_ThrowsForOverlapppingInputsWithOutputs(SpanScalar
         #region Span,Span,Span -> Destination
         public static IEnumerable<object[]> SpanSpanSpanDestinationFunctionsToTest()
         {
-            yield return new object[] { new SpanSpanSpanDestinationDelegate(TensorPrimitives.FusedMultiplyAdd), new Func<T, T, T, T>(T.FusedMultiplyAdd) };
-            yield return new object[] { new SpanSpanSpanDestinationDelegate(TensorPrimitives.Lerp), new Func<T, T, T, T>(T.Lerp) };
-            yield return new object[] { new SpanSpanSpanDestinationDelegate(TensorPrimitives.MultiplyAddEstimate), new Func<T, T, T, T>(T.FusedMultiplyAdd) }; // TODO: Change T.FusedMultiplyAdd to T.MultiplyAddEstimate when available
+            yield return Create(TensorPrimitives.FusedMultiplyAdd, T.FusedMultiplyAdd);
+            yield return Create(TensorPrimitives.Lerp, T.Lerp);
+            yield return Create(TensorPrimitives.MultiplyAddEstimate, T.FusedMultiplyAdd); // TODO: Change T.FusedMultiplyAdd to T.MultiplyAddEstimate when available
+
+            static object[] Create(SpanSpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T, T> expectedMethod)
+                => new object[] { tensorPrimitivesMethod, expectedMethod };
         }
 
         [Theory]
@@ -1039,9 +1076,12 @@ public void SpanSpanSpanDestination_ThrowsForOverlapppingInputsWithOutputs(SpanS
         #region Span,Span,Scalar -> Destination
         public static IEnumerable<object[]> SpanSpanScalarDestinationFunctionsToTest()
         {
-            yield return new object[] { new SpanSpanScalarDestinationDelegate(TensorPrimitives.FusedMultiplyAdd), new Func<T, T, T, T>(T.FusedMultiplyAdd) };
-            yield return new object[] { new SpanSpanScalarDestinationDelegate(TensorPrimitives.Lerp), new Func<T, T, T, T>(T.Lerp) };
-            yield return new object[] { new SpanSpanScalarDestinationDelegate(TensorPrimitives.MultiplyAddEstimate), new Func<T, T, T, T>(T.FusedMultiplyAdd) }; // TODO: Change T.FusedMultiplyAdd to T.MultiplyAddEstimate when available
+            yield return Create(TensorPrimitives.FusedMultiplyAdd, T.FusedMultiplyAdd);
+            yield return Create(TensorPrimitives.Lerp, T.Lerp);
+            yield return Create(TensorPrimitives.MultiplyAddEstimate, T.FusedMultiplyAdd); // TODO: Change T.FusedMultiplyAdd to T.MultiplyAddEstimate when available
+
+            static object[] Create(SpanSpanScalarDestinationDelegate tensorPrimitivesMethod, Func<T, T, T, T> expectedMethod)
+                => new object[] { tensorPrimitivesMethod, expectedMethod };
         }
 
         [Theory]
@@ -1144,9 +1184,12 @@ public void SpanSpanScalarDestination_ThrowsForOverlapppingInputsWithOutputs(Spa
         #region Span,Scalar,Span -> Destination
         public static IEnumerable<object[]> SpanScalarSpanDestinationFunctionsToTest()
         {
-            yield return new object[] { new SpanScalarSpanDestinationDelegate(TensorPrimitives.FusedMultiplyAdd), new Func<T, T, T, T>(T.FusedMultiplyAdd) };
-            yield return new object[] { new SpanScalarSpanDestinationDelegate(TensorPrimitives.Lerp), new Func<T, T, T, T>(T.Lerp) };
-            yield return new object[] { new SpanScalarSpanDestinationDelegate(TensorPrimitives.MultiplyAddEstimate), new Func<T, T, T, T>(T.FusedMultiplyAdd) }; // TODO: Change T.FusedMultiplyAdd to T.MultiplyAddEstimate when available
+            yield return Create(TensorPrimitives.FusedMultiplyAdd, T.FusedMultiplyAdd);
+            yield return Create(TensorPrimitives.Lerp, T.Lerp);
+            yield return Create(TensorPrimitives.MultiplyAddEstimate, T.FusedMultiplyAdd); // TODO: Change T.FusedMultiplyAdd to T.MultiplyAddEstimate when available
+
+            static object[] Create(SpanScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T, T> expectedMethod)
+                => new object[] { tensorPrimitivesMethod, expectedMethod };
         }
 
         [Theory]
@@ -1249,8 +1292,11 @@ public void SpanScalarSpanDestination_ThrowsForOverlapppingInputsWithOutputs(Spa
         #region Span -> Destination,Destination
         public static IEnumerable<object[]> SpanDestinationDestinationFunctionsToTest()
         {
-            yield return new object[] { new SpanDestinationDestinationDelegate(TensorPrimitives.SinCos), new Func<T, (T, T)>(T.SinCos) };
-            yield return new object[] { new SpanDestinationDestinationDelegate(TensorPrimitives.SinCosPi), new Func<T, (T, T)>(T.SinCosPi) };
+            yield return Create(TensorPrimitives.SinCos, T.SinCos);
+            yield return Create(TensorPrimitives.SinCosPi, T.SinCosPi);
+
+            static object[] Create(SpanDestinationDestinationDelegate tensorPrimitivesMethod, Func<T, (T, T)> expectedMethod)
+                => new object[] { tensorPrimitivesMethod, expectedMethod };
         }
 
         [Theory]
@@ -1597,10 +1643,13 @@ public void Divide_ScalarTensor_ByZero_Throw()
         #region Span -> Destination
         public static IEnumerable<object[]> SpanDestinationFunctionsToTest()
         {
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.OnesComplement), new Func<T, T>(i => ~i) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.PopCount), new Func<T, T>(T.PopCount) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.LeadingZeroCount), new Func<T, T>(T.LeadingZeroCount) };
-            yield return new object[] { new SpanDestinationDelegate(TensorPrimitives.TrailingZeroCount), new Func<T, T>(T.TrailingZeroCount) };
+            yield return Create(TensorPrimitives.OnesComplement, i => ~i);
+            yield return Create(TensorPrimitives.PopCount, T.PopCount);
+            yield return Create(TensorPrimitives.LeadingZeroCount, T.LeadingZeroCount);
+            yield return Create(TensorPrimitives.TrailingZeroCount, T.TrailingZeroCount);
+
+            static object[] Create(SpanDestinationDelegate tensorPrimitivesMethod, Func<T, T> expectedMethod)
+                => new object[] { tensorPrimitivesMethod, expectedMethod };
         }
 
         [Theory]
@@ -1665,9 +1714,12 @@ public void SpanDestinationFunctions_ThrowsForOverlapppingInputsWithOutputs(Span
         #region Span,Span -> Destination
         public static IEnumerable<object[]> SpanSpanDestinationFunctionsToTest()
         {
-            yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.BitwiseAnd), new Func<T, T, T>((x, y) => x & y) };
-            yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.BitwiseOr), new Func<T, T, T>((x, y) => x | y) };
-            yield return new object[] { new SpanSpanDestinationDelegate(TensorPrimitives.Xor), new Func<T, T, T>((x, y) => x ^ y) };
+            yield return Create(TensorPrimitives.BitwiseAnd, (x, y) => x & y);
+            yield return Create(TensorPrimitives.BitwiseOr, (x, y) => x | y);
+            yield return Create(TensorPrimitives.Xor, (x, y) => x ^ y);
+
+            static object[] Create(SpanSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod)
+                => new object[] { tensorPrimitivesMethod, expectedMethod };
         }
 
         [Theory]
@@ -1750,13 +1802,16 @@ public void SpanSpanDestination_ThrowsForOverlapppingInputsWithOutputs(SpanSpanD
         #region Span,Scalar -> Destination
         public static IEnumerable<object[]> SpanScalarDestinationFunctionsToTest()
         {
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.BitwiseAnd), new Func<T, T, T>((x, y) => x & y) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.BitwiseOr), new Func<T, T, T>((x, y) => x | y) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.Max), new Func<T, T, T>(T.Max) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.MaxMagnitude), new Func<T, T, T>(T.MaxMagnitude) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.Min), new Func<T, T, T>(T.Min) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.MinMagnitude), new Func<T, T, T>(T.MinMagnitude) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, T, T>(TensorPrimitives.Xor), new Func<T, T, T>((x, y) => x ^ y) };
+            yield return Create(TensorPrimitives.BitwiseAnd, (x, y) => x & y);
+            yield return Create(TensorPrimitives.BitwiseOr, (x, y) => x | y);
+            yield return Create(TensorPrimitives.Max, T.Max);
+            yield return Create(TensorPrimitives.MaxMagnitude, T.MaxMagnitude);
+            yield return Create(TensorPrimitives.Min, T.Min);
+            yield return Create(TensorPrimitives.MinMagnitude, T.MinMagnitude);
+            yield return Create(TensorPrimitives.Xor, (x, y) => x ^ y);
+
+            static object[] Create(SpanScalarDestinationDelegate<T, T, T> tensorPrimitivesMethod, Func<T, T, T> expectedMethod)
+                => new object[] { tensorPrimitivesMethod, expectedMethod };
         }
 
         [Theory]
@@ -1824,11 +1879,14 @@ public void SpanScalarDestination_ThrowsForOverlapppingInputWithOutputs(SpanScal
         #region Shifting/Rotating
         public static IEnumerable<object[]> ShiftRotateDestinationFunctionsToTest()
         {
-            yield return new object[] { new SpanScalarDestinationDelegate<T, int, T>(TensorPrimitives.ShiftLeft), new Func<T, int, T>((x, n) => x << n) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, int, T>(TensorPrimitives.ShiftRightArithmetic), new Func<T, int, T>((x, n) => x >> n) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, int, T>(TensorPrimitives.ShiftRightLogical), new Func<T, int, T>((x, n) => x >>> n) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, int, T>(TensorPrimitives.RotateLeft), new Func<T, int, T>(T.RotateLeft) };
-            yield return new object[] { new SpanScalarDestinationDelegate<T, int, T>(TensorPrimitives.RotateRight), new Func<T, int, T>(T.RotateRight) };
+            yield return Create(TensorPrimitives.ShiftLeft, (x, n) => x << n);
+            yield return Create(TensorPrimitives.ShiftRightArithmetic, (x, n) => x >> n);
+            yield return Create(TensorPrimitives.ShiftRightLogical, (x, n) => x >>> n);
+            yield return Create(TensorPrimitives.RotateLeft, T.RotateLeft);
+            yield return Create(TensorPrimitives.RotateRight, T.RotateRight);
+
+            static object[] Create(SpanScalarDestinationDelegate<T, int, T> tensorPrimitivesMethod, Func<T, int, T> expectedMethod)
+                => new object[] { tensorPrimitivesMethod, expectedMethod };
         }
 
         [Theory]
@@ -1996,6 +2054,7 @@ public void CopySign_ThrowsForOverlapppingInputsWithOutputs()
     public unsafe abstract class GenericNumberTensorPrimitivesTests<T> : TensorPrimitivesTests<T>
         where T : unmanaged, INumber<T>, IMinMaxValue<T>
     {
+        protected static bool IsFmaSupported => Fma.IsSupported || AdvSimd.Arm64.IsSupported || (AdvSimd.IsSupported && typeof(T) == typeof(float));
         protected override void Abs(ReadOnlySpan<T> x, Span<T> destination) => TensorPrimitives.Abs(x, destination);
         protected override T Abs(T x) => T.Abs(x);
         protected override void Add(ReadOnlySpan<T> x, ReadOnlySpan<T> y, Span<T> destination) => TensorPrimitives.Add(x, y, destination);
@@ -2071,10 +2130,7 @@ protected override T NextRandom()
 
         protected override void AssertEqualTolerance(T expected, T actual, T? tolerance = null)
         {
-            tolerance ??= T.CreateTruncating(0.0001);
-
-            T diff = T.Abs(expected - actual);
-            if (diff > tolerance && diff > T.Max(T.Abs(expected), T.Abs(actual)) * tolerance)
+            if (!Helpers.IsEqualWithTolerance(expected, actual, tolerance))
             {
                 throw EqualException.ForMismatchedValues(expected, actual);
             }
@@ -2105,8 +2161,11 @@ protected override void SetSpecialValues(Span<T> x, Span<T> y) { }
         #region Scalar,Span -> Destination
         public static IEnumerable<object[]> ScalarSpanDestinationFunctionsToTest()
         {
-            yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Divide), new Func<T, T, T>((x, y) => x / y) };
-            yield return new object[] { new ScalarSpanDestinationDelegate(TensorPrimitives.Subtract), new Func<T, T, T>((x, y) => x - y) };
+            yield return Create(TensorPrimitives.Divide, (x, y) => x / y);
+            yield return Create(TensorPrimitives.Subtract, (x, y) => x - y);
+
+            static object[] Create(ScalarSpanDestinationDelegate tensorPrimitivesMethod, Func<T, T, T> expectedMethod)
+                => new object[] { tensorPrimitivesMethod, expectedMethod };
         }
 
         [Theory]
diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.NonGeneric.Single.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.NonGeneric.Single.cs
index ac883851299d..d6df8365c59d 100644
--- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.NonGeneric.Single.cs
+++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.NonGeneric.Single.cs
@@ -106,10 +106,7 @@ protected override float MinMagnitude(float x, float y)
 
         protected override void AssertEqualTolerance(float expected, float actual, float? tolerance = null)
         {
-            tolerance ??= 0.0001f;
-
-            double diff = Math.Abs((double)expected - (double)actual);
-            if (diff > tolerance && diff > Math.Max(Math.Abs(expected), Math.Abs(actual)) * tolerance)
+            if (!Helpers.IsEqualWithTolerance(expected, actual, tolerance))
             {
                 throw EqualException.ForMismatchedValues(expected, actual);
             }
diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs
index b0b44fddb0ae..278ab39938b4 100644
--- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs
+++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs
@@ -663,6 +663,8 @@ public void Cosh_ValueRange()
         {
             if (!IsFloatingPoint) return;
 
+            T? tolerance = Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14);
+
             Assert.All(VectorLengthAndIteratedRange(ConvertFromSingle(-100f), ConvertFromSingle(100f), ConvertFromSingle(3f)), arg =>
             {
                 T[] x = new T[arg.Length];
@@ -674,7 +676,7 @@ public void Cosh_ValueRange()
                 T expected = Cosh(arg.Element);
                 foreach (T actual in dest)
                 {
-                    AssertEqualTolerance(expected, actual);
+                    AssertEqualTolerance(expected, actual, tolerance);
                 }
             });
         }
@@ -952,6 +954,8 @@ public void Dot_ThrowsForMismatchedLengths_x_y()
         [Fact]
         public void Dot_AllLengths()
         {
+            T? tolerance = Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14f, floatTolerance: 1e-5f);
+
             Assert.All(Helpers.TensorLengthsIncluding0, tensorLength =>
             {
                 using BoundedMemory<T> x = CreateAndFillTensor(tensorLength);
@@ -963,7 +967,7 @@ public void Dot_AllLengths()
                     dot = Add(dot, Multiply(x[i], y[i]));
                 }
 
-                AssertEqualTolerance(dot, Dot(x, y));
+                AssertEqualTolerance(dot, Dot(x, y), tolerance);
             });
         }
         #endregion
@@ -2879,6 +2883,8 @@ public void Sinh_ValueRange()
         {
             if (!IsFloatingPoint) return;
 
+            T? tolerance = Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14);
+
             Assert.All(VectorLengthAndIteratedRange(ConvertFromSingle(-100f), ConvertFromSingle(100f), ConvertFromSingle(3f)), args =>
             {
                 T[] x = new T[args.Length];
@@ -2890,7 +2896,7 @@ public void Sinh_ValueRange()
                 T expected = Sinh(args.Element);
                 foreach (T actual in dest)
                 {
-                    AssertEqualTolerance(expected, actual);
+                    AssertEqualTolerance(expected, actual, tolerance);
                 }
             });
         }
@@ -3139,6 +3145,8 @@ public void Subtract_TensorScalar_ThrowsForOverlapppingInputsWithOutputs()
         [Fact]
         public void Sum_AllLengths()
         {
+            T? tolerance = Helpers.DetermineTolerance<T>(doubleTolerance: 1e-13, floatTolerance: 1e-5f);
+
             Assert.All(Helpers.TensorLengths, tensorLength =>
             {
                 using BoundedMemory<T> x = CreateAndFillTensor(tensorLength);
@@ -3148,7 +3156,7 @@ public void Sum_AllLengths()
                 {
                     sum = Add(sum, value);
                 }
-                AssertEqualTolerance(sum, Sum(x));
+                AssertEqualTolerance(sum, Sum(x), tolerance);
             });
         }
         #endregion
@@ -3157,6 +3165,8 @@ public void Sum_AllLengths()
         [Fact]
         public void SumOfMagnitudes_AllLengths()
         {
+            T? tolerance = Helpers.DetermineTolerance<T>(doubleTolerance: 1e-12, floatTolerance: 1e-6f);
+
             Assert.All(Helpers.TensorLengths, tensorLength =>
             {
                 using BoundedMemory<T> x = CreateTensor(tensorLength);
@@ -3167,7 +3177,7 @@ public void SumOfMagnitudes_AllLengths()
                 {
                     sum = Add(sum, Abs(value));
                 }
-                AssertEqualTolerance(sum, SumOfMagnitudes(x));
+                AssertEqualTolerance(sum, SumOfMagnitudes(x), tolerance);
             });
         }
         #endregion
@@ -3176,6 +3186,8 @@ public void SumOfMagnitudes_AllLengths()
         [Fact]
         public void SumOfSquares_AllLengths()
         {
+            T? tolerance = Helpers.DetermineTolerance<T>(doubleTolerance: 1e-12, floatTolerance: 1e-6f);
+
             Assert.All(Helpers.TensorLengths, tensorLength =>
             {
                 using BoundedMemory<T> x = CreateAndFillTensor(tensorLength);
@@ -3185,7 +3197,7 @@ public void SumOfSquares_AllLengths()
                 {
                     sum = Add(sum, Multiply(value, value));
                 }
-                AssertEqualTolerance(sum, SumOfSquares(x));
+                AssertEqualTolerance(sum, SumOfSquares(x), tolerance);
             });
         }
         #endregion
diff --git a/src/libraries/System.Private.CoreLib/gen/IntrinsicsInSystemPrivateCoreLibAnalyzer.cs b/src/libraries/System.Private.CoreLib/gen/IntrinsicsInSystemPrivateCoreLibAnalyzer.cs
index 75f702eaca8e..f9a5eff99648 100644
--- a/src/libraries/System.Private.CoreLib/gen/IntrinsicsInSystemPrivateCoreLibAnalyzer.cs
+++ b/src/libraries/System.Private.CoreLib/gen/IntrinsicsInSystemPrivateCoreLibAnalyzer.cs
@@ -293,6 +293,12 @@ private static INamedTypeSymbol[][] DecomposePropertySymbolForIsSupportedGroups_
                 if (propertyDefiningSyntax is PropertyDeclarationSyntax propertyDeclaration
                     && propertyDeclaration.ExpressionBody is ArrowExpressionClauseSyntax arrowExpression)
                 {
+                    if (model.SyntaxTree != arrowExpression.SyntaxTree)
+                    {
+#pragma warning disable RS1030
+                        model = model.Compilation.GetSemanticModel(arrowExpression.SyntaxTree);
+#pragma warning restore RS1030
+                    }
                     return DecomposeConditionForIsSupportedGroups(context, model, arrowExpression.Expression);
                 }
             }
diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs
index b16548c7b4c3..45a5b9f03726 100644
--- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs
+++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs
@@ -45,6 +45,8 @@ public partial class Thread
     {
         [ThreadStatic]
         public static bool ThrowOnBlockingWaitOnJSInteropThread;
+        [ThreadStatic]
+        public static bool WarnOnBlockingWaitOnJSInteropThread;
 
         public static void AssureBlockingPossible() { throw null; }
         public static void ForceBlockingWait(Action<object?> action, object? state) { throw null; }
diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt
index 3b80cb0de675..2a6434973ff1 100644
--- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt
+++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt
@@ -7,4 +7,5 @@ T:System.Diagnostics.DebugProvider
 M:System.Diagnostics.Debug.SetProvider(System.Diagnostics.DebugProvider)
 M:System.Threading.Thread.AssureBlockingPossible
 F:System.Threading.Thread.ThrowOnBlockingWaitOnJSInteropThread
+F:System.Threading.Thread.WarnOnBlockingWaitOnJSInteropThread
 F:System.Threading.Thread.ForceBlockingWait
diff --git a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.LinkAttributes.Shared.xml b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.LinkAttributes.Shared.xml
index adbcd6f00ee4..7706dd2ce18f 100644
--- a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.LinkAttributes.Shared.xml
+++ b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.LinkAttributes.Shared.xml
@@ -250,6 +250,12 @@
     <type fullname="System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute">
       <attribute internal="RemoveAttributeInstances" />
     </type>
+    <type fullname="System.Diagnostics.CodeAnalysis.FeatureGuardAttribute">
+      <attribute internal="RemoveAttributeInstances" />
+    </type>
+    <type fullname="System.Diagnostics.CodeAnalysis.FeatureSwitchDefinitionAttribute">
+      <attribute internal="RemoveAttributeInstances" />
+    </type>
     <type fullname="System.Diagnostics.CodeAnalysis.RequiresAssemblyFilesAttribute">
       <attribute internal="RemoveAttributeInstances" />
     </type>
diff --git a/src/libraries/System.Private.CoreLib/src/Resources/Strings.resx b/src/libraries/System.Private.CoreLib/src/Resources/Strings.resx
index bf27c2f870db..2bc7699526df 100644
--- a/src/libraries/System.Private.CoreLib/src/Resources/Strings.resx
+++ b/src/libraries/System.Private.CoreLib/src/Resources/Strings.resx
@@ -1101,6 +1101,12 @@
   <data name="Argument_GenConstraintViolation" xml:space="preserve">
     <value>GenericArguments[{0}], '{1}', on '{2}' violates the constraint of type '{3}'.</value>
   </data>
+  <data name="Argument_GenTypeConstraintsNotEqual" xml:space="preserve">
+    <value>Generic type constraints do not match.</value>
+  </data>
+  <data name="Argument_GenMethodConstraintsNotEqual" xml:space="preserve">
+    <value>Generic method constraints do not match.</value>
+  </data>
   <data name="Argument_GenericArgsCount" xml:space="preserve">
     <value>The number of generic arguments provided doesn't equal the arity of the generic type definition.</value>
   </data>
@@ -3344,12 +3350,18 @@
   <data name="RFLCT_Targ_ITargMismatch" xml:space="preserve">
     <value>Object does not match target type.</value>
   </data>
+  <data name="RFLCT_Targ_ITargMismatch_WithType" xml:space="preserve">
+    <value>Object type {0} does not match target type {1}.</value>
+  </data>
   <data name="RFLCT_Targ_StatFldReqTarg" xml:space="preserve">
     <value>Non-static field requires a target.</value>
   </data>
   <data name="RFLCT_Targ_StatMethReqTarg" xml:space="preserve">
     <value>Non-static method requires a target.</value>
   </data>
+  <data name="RuntimeInstanceNotAllowed" xml:space="preserve">
+    <value>Runtime instantiation of this attribute is not allowed.</value>
+  </data>
   <data name="RuntimeWrappedException" xml:space="preserve">
     <value>An object that does not derive from System.Exception has been wrapped in a RuntimeWrappedException.</value>
   </data>
@@ -4298,10 +4310,10 @@
   <data name="Reflection_Disabled" xml:space="preserve">
     <value>This operation is not available because the reflection support was disabled at compile time.</value>
   </data>
-  <data name="NotSupported_AssemblySave" xml:space="preserve">
-    <value>This AssemblyBuilder instance doesn't support saving. Use AssemblyBuilder.DefinePersistedAssembly to create an AssemblyBuilder instance that supports saving.</value>
+  <data name="RFLCT_CannotSetInitonlyStaticField" xml:space="preserve">
+    <value>Cannot set initonly static field '{0}' after type '{1}' is initialized.</value>
   </data>
   <data name="WasmThreads_BlockingWaitNotSupportedOnJSInterop" xml:space="preserve">
     <value>Blocking wait is not supported on the JS interop threads.</value>
   </data>
-</root>
\ No newline at end of file
+</root>
diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
index a16d1660708c..61e511427510 100644
--- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
+++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
@@ -268,6 +268,8 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\CodeAnalysis\DynamicDependencyAttribute.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\CodeAnalysis\ExcludeFromCodeCoverageAttribute.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\CodeAnalysis\ExperimentalAttribute.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\CodeAnalysis\FeatureGuardAttribute.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\CodeAnalysis\FeatureSwitchDefinitionAttribute.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\CodeAnalysis\NullableAttributes.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\CodeAnalysis\UnscopedRefAttribute.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\CodeAnalysis\RequiresAssemblyFilesAttribute.cs" />
@@ -423,14 +425,16 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\IFormatProvider.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\IFormattable.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Index.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any1CharPackedSearchValues.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any1CharPackedIgnoreCaseSearchValues.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any2CharPackedIgnoreCaseSearchValues.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any3CharPackedSearchValues.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any2CharPackedSearchValues.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any1SearchValues.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any2SearchValues.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any3SearchValues.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\BitVector256.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\ProbabilisticWithAsciiCharSearchValues.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SingleCharSearchValues.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SingleByteSearchValues.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any2ByteSearchValues.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any2CharSearchValues.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any3ByteSearchValues.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any3CharSearchValues.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any4SearchValues.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any5SearchValues.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\AsciiByteSearchValues.cs" />
@@ -450,7 +454,6 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\AhoCorasickBuilder.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\AhoCorasickNode.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\CharacterFrequencyHelper.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\EightPackedReferences.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\RabinKarp.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\StringSearchValuesHelper.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\Helpers\TeddyBucketizer.cs" />
@@ -468,6 +471,7 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\StringSearchValuesAhoCorasick.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\StringSearchValuesRabinKarp.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\IndexOutOfRangeException.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\InlineArrays.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\InsufficientExecutionStackException.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\InsufficientMemoryException.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Int16.cs" />
@@ -611,7 +615,6 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\OutOfMemoryException.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\OverflowException.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\ParamArrayAttribute.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\ParamsArray.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\ParseNumbers.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\PasteArguments.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\PlatformID.cs" />
@@ -702,6 +705,7 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Reflection\EventInfo.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Reflection\ExceptionHandlingClause.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Reflection\ExceptionHandlingClauseOptions.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Reflection\FieldAccessor.cs" Condition="'$(FeatureNativeAot)' != 'true' and '$(FeatureMono)' != 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Reflection\FieldAttributes.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Reflection\FieldInfo.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Reflection\GenericParameterAttributes.cs" />
@@ -869,6 +873,7 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\NullableContextAttribute.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\NullablePublicOnlyAttribute.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\ReferenceAssemblyAttribute.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\ParamCollectionAttribute.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\PoolingAsyncValueTaskMethodBuilder.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\PoolingAsyncValueTaskMethodBuilderT.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\CompilerServices\PreserveBaseOverridesAttribute.cs" />
@@ -1025,6 +1030,7 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\VarEnum.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\VariantWrapper.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\WasmImportLinkageAttribute.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Enums.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\ISimdVector_2.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Scalar.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\SimdVectorExtensions.cs" />
@@ -1081,6 +1087,7 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\SByte.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Security\AllowPartiallyTrustedCallersAttribute.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Security\CryptographicException.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Security\DynamicSecurityMethodAttribute.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Security\IPermission.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Security\ISecurityEncodable.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Security\IStackWalk.cs" />
@@ -1115,6 +1122,7 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\SpanDebugView.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SpanHelpers.BinarySearch.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SpanHelpers.Byte.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\SpanHelpers.ByteMemOps.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SpanHelpers.Char.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SpanHelpers.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\SpanHelpers.Packed.cs" />
@@ -1422,6 +1430,9 @@
     <Compile Include="$(CommonPath)System\NotImplemented.cs">
       <Link>Common\System\NotImplemented.cs</Link>
     </Compile>
+    <Compile Include="$(CommonPath)System\Number.Formatting.Common.cs">
+      <Link>System\Number.Formatting.Common.cs</Link>
+    </Compile>
     <Compile Include="$(CommonPath)System\Number.NumberBuffer.cs">
       <Link>System\Number.NumberBuffer.cs</Link>
     </Compile>
@@ -1516,6 +1527,7 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\EventCounter.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\EventDescriptor.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\EventPipe.cs" Condition="'$(FeaturePerfTracing)' == 'true'" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\EventPipe.Internal.cs" Condition="'$(FeaturePerfTracing)' == 'true' and '$(FeatureMono)' != 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\EventPipeEventDispatcher.cs" Condition="'$(FeaturePerfTracing)' == 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\EventPipeEventProvider.cs" Condition="'$(FeaturePerfTracing)' == 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\EventPipeMetadataGenerator.cs" Condition="'$(FeaturePerfTracing)' == 'true'" />
@@ -1529,6 +1541,7 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\NativeRuntimeEventSource.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\NativeRuntimeEventSource.Threading.cs" Condition="'$(FeaturePerfTracing)' != 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\NativeRuntimeEventSource.Threading.NativeSinks.cs" Condition="'$(FeaturePerfTracing)' == 'true'" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\NativeRuntimeEventSource.Threading.NativeSinks.Internal.cs" Condition="'$(FeaturePerfTracing)' == 'true' and '$(FeatureMono)' != 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\PollingCounter.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\RuntimeEventSource.cs" Condition="'$(FeaturePerfTracing)' == 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Tracing\Winmeta.cs" />
@@ -2137,7 +2150,6 @@
     <Compile Include="$(MSBuildThisFileDirectory)Microsoft\Win32\SafeHandles\SafeRegistryHandle.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)Microsoft\Win32\SafeHandles\SafeThreadPoolIOHandle.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\AppDomain.Windows.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\Buffer.Windows.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\DateTime.Windows.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Environment.Win32.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Environment.Windows.cs" />
@@ -2455,7 +2467,6 @@
     <Compile Include="$(MSBuildThisFileDirectory)Microsoft\Win32\SafeHandles\SafeFileHandle.ThreadPoolValueTaskSource.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)Microsoft\Win32\SafeHandles\SafeFileHandle.Unix.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\AppDomain.Unix.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\Buffer.Unix.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\DateTime.Unix.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\DebugProvider.Unix.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Diagnostics\Stopwatch.Unix.cs" />
@@ -2677,7 +2688,6 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\PortableThreadPool.ThreadCounts.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\PortableThreadPool.WaitThread.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\PortableThreadPool.WorkerThread.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\Threading\PortableThreadPool.WorkerThread.NonBrowser.cs" Condition="'$(TargetsBrowser)' != 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\PortableThreadPool.WorkerTracking.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\PortableThreadPool.Unix.cs" Condition="'$(TargetsUnix)' == 'true' or ('$(TargetsBrowser)' == 'true' and '$(FeatureWasmManagedThreads)' != 'true') or '$(TargetsWasi)' == 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\PortableThreadPool.Windows.cs" Condition="'$(TargetsWindows)' == 'true'" />
@@ -2689,7 +2699,6 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\RegisteredWaitHandle.Windows.cs" Condition="'$(TargetsWindows)' == 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\RegisteredWaitHandle.Unix.cs" Condition="'$(TargetsWindows)' != 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\RegisteredWaitHandle.Portable.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\Threading\LowLevelLifoSemaphoreBase.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\ThreadPoolBoundHandle.Portable.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\ThreadPoolBoundHandle.Unix.cs" Condition="'$(TargetsWindows)' != 'true'" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\ThreadPoolBoundHandle.Windows.cs" Condition="'$(TargetsWindows)' == 'true'" />
diff --git a/src/libraries/System.Private.CoreLib/src/System/Array.cs b/src/libraries/System.Private.CoreLib/src/System/Array.cs
index c21caa8cc1d7..84bd5ed20eed 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Array.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Array.cs
@@ -58,7 +58,7 @@ public static void Resize<T>([NotNull] ref T[]? array, int newSize)
                 // actually of type U[], where U:T; or that an int[] <-> uint[] or
                 // similar cast has occurred. In any case, since it's always legal
                 // to reinterpret U as T in this scenario (but not necessarily the
-                // other way around), we can use Buffer.Memmove here.
+                // other way around), we can use SpanHelpers.Memmove here.
 
                 T[] newArray = new T[newSize];
                 Buffer.Memmove(
@@ -377,7 +377,7 @@ public static unsafe void Copy(Array sourceArray, Array destinationArray, int le
                 if (pMT->ContainsGCPointers)
                     Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount);
                 else
-                    Buffer.Memmove(ref dst, ref src, byteCount);
+                    SpanHelpers.Memmove(ref dst, ref src, byteCount);
 
                 // GC.KeepAlive(sourceArray) not required. pMT kept alive via sourceArray
                 return;
@@ -408,7 +408,7 @@ public static unsafe void Copy(Array sourceArray, int sourceIndex, Array destina
                     if (pMT->ContainsGCPointers)
                         Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount);
                     else
-                        Buffer.Memmove(ref dst, ref src, byteCount);
+                        SpanHelpers.Memmove(ref dst, ref src, byteCount);
 
                     // GC.KeepAlive(sourceArray) not required. pMT kept alive via sourceArray
                     return;
diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs
deleted file mode 100644
index 008bc9310a24..000000000000
--- a/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs
+++ /dev/null
@@ -1,19 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-namespace System
-{
-    public static partial class Buffer
-    {
-#if TARGET_ARM64 || TARGET_LOONGARCH64
-        // Managed code is currently faster than glibc unoptimized memmove
-        // TODO-ARM64-UNIX-OPT revisit when glibc optimized memmove is in Linux distros
-        // https://github.com/dotnet/runtime/issues/8897
-        private static nuint MemmoveNativeThreshold => nuint.MaxValue;
-#elif TARGET_ARM
-        private const nuint MemmoveNativeThreshold = 512;
-#else
-        private const nuint MemmoveNativeThreshold = 2048;
-#endif
-    }
-}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs
deleted file mode 100644
index 4dea08790b91..000000000000
--- a/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs
+++ /dev/null
@@ -1,16 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-namespace System
-{
-    public static partial class Buffer
-    {
-#if TARGET_ARM64
-        // Determine optimal value for Windows.
-        // https://github.com/dotnet/runtime/issues/8896
-        private static nuint MemmoveNativeThreshold => nuint.MaxValue;
-#else
-        private const nuint MemmoveNativeThreshold = 2048;
-#endif
-    }
-}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.cs
index 51ec733aaef5..24f8794d852a 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Buffer.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Buffer.cs
@@ -1,14 +1,9 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-#if TARGET_AMD64 || TARGET_ARM64 || (TARGET_32BIT && !TARGET_ARM) || TARGET_LOONGARCH64
-#define HAS_CUSTOM_BLOCKS
-#endif
-
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
-using System.Runtime.Intrinsics;
 
 namespace System
 {
@@ -128,227 +123,16 @@ public static unsafe void MemoryCopy(void* source, void* destination, ulong dest
             Memmove(ref *(byte*)destination, ref *(byte*)source, checked((nuint)sourceBytesToCopy));
         }
 
-        [Intrinsic] // Unrolled for small constant lengths
-        internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len)
-        {
-            // P/Invoke into the native version when the buffers are overlapping.
-            if (((nuint)(nint)Unsafe.ByteOffset(ref src, ref dest) < len) || ((nuint)(nint)Unsafe.ByteOffset(ref dest, ref src) < len))
-            {
-                goto BuffersOverlap;
-            }
-
-            // Use "(IntPtr)(nint)len" to avoid overflow checking on the explicit cast to IntPtr
-
-            ref byte srcEnd = ref Unsafe.Add(ref src, (IntPtr)(nint)len);
-            ref byte destEnd = ref Unsafe.Add(ref dest, (IntPtr)(nint)len);
-
-            if (len <= 16)
-                goto MCPY02;
-            if (len > 64)
-                goto MCPY05;
-
-        MCPY00:
-            // Copy bytes which are multiples of 16 and leave the remainder for MCPY01 to handle.
-            Debug.Assert(len > 16 && len <= 64);
-#if HAS_CUSTOM_BLOCKS
-            Unsafe.As<byte, Block16>(ref dest) = Unsafe.As<byte, Block16>(ref src); // [0,16]
-#elif TARGET_64BIT
-            Unsafe.As<byte, long>(ref dest) = Unsafe.As<byte, long>(ref src);
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 8)); // [0,16]
-#else
-            Unsafe.As<byte, int>(ref dest) = Unsafe.As<byte, int>(ref src);
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 4));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 8));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 12)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 12)); // [0,16]
-#endif
-            if (len <= 32)
-                goto MCPY01;
-#if HAS_CUSTOM_BLOCKS
-            Unsafe.As<byte, Block16>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, Block16>(ref Unsafe.Add(ref src, 16)); // [0,32]
-#elif TARGET_64BIT
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 16));
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 24)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 24)); // [0,32]
-#else
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 16));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 20)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 20));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 24)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 24));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 28)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 28)); // [0,32]
-#endif
-            if (len <= 48)
-                goto MCPY01;
-#if HAS_CUSTOM_BLOCKS
-            Unsafe.As<byte, Block16>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, Block16>(ref Unsafe.Add(ref src, 32)); // [0,48]
-#elif TARGET_64BIT
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 32));
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 40)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 40)); // [0,48]
-#else
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 32));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 36)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 36));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 40)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 40));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 44)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 44)); // [0,48]
-#endif
-
-        MCPY01:
-            // Unconditionally copy the last 16 bytes using destEnd and srcEnd and return.
-            Debug.Assert(len > 16 && len <= 64);
-#if HAS_CUSTOM_BLOCKS
-            Unsafe.As<byte, Block16>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, Block16>(ref Unsafe.Add(ref srcEnd, -16));
-#elif TARGET_64BIT
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -16));
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -8));
-#else
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -16));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -12));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -8));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -4));
-#endif
-            return;
-
-        MCPY02:
-            // Copy the first 8 bytes and then unconditionally copy the last 8 bytes and return.
-            if ((len & 24) == 0)
-                goto MCPY03;
-            Debug.Assert(len >= 8 && len <= 16);
-#if TARGET_64BIT
-            Unsafe.As<byte, long>(ref dest) = Unsafe.As<byte, long>(ref src);
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -8));
-#else
-            Unsafe.As<byte, int>(ref dest) = Unsafe.As<byte, int>(ref src);
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 4));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -8));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -4));
-#endif
-            return;
-
-        MCPY03:
-            // Copy the first 4 bytes and then unconditionally copy the last 4 bytes and return.
-            if ((len & 4) == 0)
-                goto MCPY04;
-            Debug.Assert(len >= 4 && len < 8);
-            Unsafe.As<byte, int>(ref dest) = Unsafe.As<byte, int>(ref src);
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -4));
-            return;
-
-        MCPY04:
-            // Copy the first byte. For pending bytes, do an unconditionally copy of the last 2 bytes and return.
-            Debug.Assert(len < 4);
-            if (len == 0)
-                return;
-            dest = src;
-            if ((len & 2) == 0)
-                return;
-            Unsafe.As<byte, short>(ref Unsafe.Add(ref destEnd, -2)) = Unsafe.As<byte, short>(ref Unsafe.Add(ref srcEnd, -2));
-            return;
-
-        MCPY05:
-            // PInvoke to the native version when the copy length exceeds the threshold.
-            if (len > MemmoveNativeThreshold)
-            {
-                goto PInvoke;
-            }
-
-#if HAS_CUSTOM_BLOCKS
-            if (len >= 256)
-            {
-                // Try to opportunistically align the destination below. The input isn't pinned, so the GC
-                // is free to move the references. We're therefore assuming that reads may still be unaligned.
-                //
-                // dest is more important to align than src because an unaligned store is more expensive
-                // than an unaligned load.
-                nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref dest) & 63;
-                Unsafe.As<byte, Block64>(ref dest) = Unsafe.As<byte, Block64>(ref src);
-                src = ref Unsafe.Add(ref src, misalignedElements);
-                dest = ref Unsafe.Add(ref dest, misalignedElements);
-                len -= misalignedElements;
-            }
-#endif
-
-            // Copy 64-bytes at a time until the remainder is less than 64.
-            // If remainder is greater than 16 bytes, then jump to MCPY00. Otherwise, unconditionally copy the last 16 bytes and return.
-            Debug.Assert(len > 64 && len <= MemmoveNativeThreshold);
-            nuint n = len >> 6;
-
-        MCPY06:
-#if HAS_CUSTOM_BLOCKS
-            Unsafe.As<byte, Block64>(ref dest) = Unsafe.As<byte, Block64>(ref src);
-#elif TARGET_64BIT
-            Unsafe.As<byte, long>(ref dest) = Unsafe.As<byte, long>(ref src);
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 8));
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 16));
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 24)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 24));
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 32));
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 40)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 40));
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 48)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 48));
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 56)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 56));
-#else
-            Unsafe.As<byte, int>(ref dest) = Unsafe.As<byte, int>(ref src);
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 4));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 8));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 12)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 12));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 16));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 20)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 20));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 24)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 24));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 28)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 28));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 32));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 36)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 36));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 40)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 40));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 44)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 44));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 48)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 48));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 52)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 52));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 56)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 56));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 60)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 60));
-#endif
-            dest = ref Unsafe.Add(ref dest, 64);
-            src = ref Unsafe.Add(ref src, 64);
-            n--;
-            if (n != 0)
-                goto MCPY06;
-
-            len %= 64;
-            if (len > 16)
-                goto MCPY00;
-#if HAS_CUSTOM_BLOCKS
-            Unsafe.As<byte, Block16>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, Block16>(ref Unsafe.Add(ref srcEnd, -16));
-#elif TARGET_64BIT
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -16));
-            Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -8));
-#else
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -16));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -12));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -8));
-            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -4));
-#endif
-            return;
-
-        BuffersOverlap:
-            // If the buffers overlap perfectly, there's no point to copying the data.
-            if (Unsafe.AreSame(ref dest, ref src))
-            {
-                return;
-            }
-
-        PInvoke:
-            _Memmove(ref dest, ref src, len);
-        }
-
         // Non-inlinable wrapper around the QCall that avoids polluting the fast path
         // with P/Invoke prolog/epilog.
         [MethodImpl(MethodImplOptions.NoInlining)]
-        private static unsafe void _Memmove(ref byte dest, ref byte src, nuint len)
+        internal static unsafe void _Memmove(ref byte dest, ref byte src, nuint len)
         {
             fixed (byte* pDest = &dest)
             fixed (byte* pSrc = &src)
                 __Memmove(pDest, pSrc, len);
         }
 
-#if HAS_CUSTOM_BLOCKS
-        [StructLayout(LayoutKind.Sequential, Size = 16)]
-        private struct Block16 { }
-
-        [StructLayout(LayoutKind.Sequential, Size = 64)]
-        private struct Block64 { }
-#endif // HAS_CUSTOM_BLOCKS
-
         // Non-inlinable wrapper around the QCall that avoids polluting the fast path
         // with P/Invoke prolog/epilog.
         [MethodImpl(MethodImplOptions.NoInlining)]
@@ -370,7 +154,7 @@ internal static unsafe void Memmove<T>(ref T destination, ref T source, nuint el
             if (!RuntimeHelpers.IsReferenceOrContainsReferences<T>())
             {
                 // Blittable memmove
-                Memmove(
+                SpanHelpers.Memmove(
                     ref Unsafe.As<T, byte>(ref destination),
                     ref Unsafe.As<T, byte>(ref source),
                     elementCount * (nuint)sizeof(T));
@@ -401,7 +185,6 @@ internal static void BulkMoveWithWriteBarrier(ref byte destination, ref byte sou
                 _BulkMoveWithWriteBarrier(ref destination, ref source, byteCount);
         }
 
-#pragma warning disable IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
         // Non-inlinable wrapper around the loop for copying large blocks in chunks
         [MethodImpl(MethodImplOptions.NoInlining)]
         private static void _BulkMoveWithWriteBarrier(ref byte destination, ref byte source, nuint byteCount)
@@ -436,7 +219,6 @@ private static void _BulkMoveWithWriteBarrier(ref byte destination, ref byte sou
             }
             __BulkMoveWithWriteBarrier(ref destination, ref source, byteCount);
         }
-#pragma warning restore IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
 
 #endif // !MONO
     }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Encoder.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Encoder.cs
index 08ca62b533f5..b63c711e4103 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Encoder.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Encoder.cs
@@ -85,6 +85,15 @@ public static unsafe OperationStatus EncodeToUtf8(ReadOnlySpan<byte> bytes, Span
                             goto DoneExit;
                     }
 
+                    end = srcMax - 48;
+                    if (AdvSimd.Arm64.IsSupported && (end >= src))
+                    {
+                        AdvSimdEncode(ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes);
+
+                        if (src == srcEnd)
+                            goto DoneExit;
+                    }
+
                     end = srcMax - 16;
                     if ((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && BitConverter.IsLittleEndian && (end >= src))
                     {
@@ -480,6 +489,64 @@ private static unsafe void Avx2Encode(ref byte* srcBytes, ref byte* destBytes, b
             destBytes = dest;
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
+        private static unsafe void AdvSimdEncode(ref byte* srcBytes, ref byte* destBytes, byte* srcEnd, int sourceLength, int destLength, byte* srcStart, byte* destStart)
+        {
+            // C# implementation of https://github.com/aklomp/base64/blob/3a5add8652076612a8407627a42c768736a4263f/lib/arch/neon64/enc_loop.c
+            Vector128<byte> str1;
+            Vector128<byte> str2;
+            Vector128<byte> str3;
+            Vector128<byte> res1;
+            Vector128<byte> res2;
+            Vector128<byte> res3;
+            Vector128<byte> res4;
+            Vector128<byte> tblEnc1 = Vector128.Create("ABCDEFGHIJKLMNOP"u8).AsByte();
+            Vector128<byte> tblEnc2 = Vector128.Create("QRSTUVWXYZabcdef"u8).AsByte();
+            Vector128<byte> tblEnc3 = Vector128.Create("ghijklmnopqrstuv"u8).AsByte();
+            Vector128<byte> tblEnc4 = Vector128.Create("wxyz0123456789+/"u8).AsByte();
+            byte* src = srcBytes;
+            byte* dest = destBytes;
+
+            // If we have Neon support, pick off 48 bytes at a time for as long as we can.
+            do
+            {
+                // Load 48 bytes and deinterleave:
+                AssertRead<Vector128<byte>>(src, srcStart, sourceLength);
+                (str1, str2, str3) = AdvSimd.Arm64.LoadVector128x3AndUnzip(src);
+
+                // Divide bits of three input bytes over four output bytes:
+                res1 = AdvSimd.ShiftRightLogical(str1, 2);
+                res2 = AdvSimd.ShiftRightLogical(str2, 4);
+                res3 = AdvSimd.ShiftRightLogical(str3, 6);
+                res2 = AdvSimd.ShiftLeftAndInsert(res2, str1, 4);
+                res3 = AdvSimd.ShiftLeftAndInsert(res3, str2, 2);
+
+                // Clear top two bits:
+                res2 &= AdvSimd.DuplicateToVector128((byte)0x3F);
+                res3 &= AdvSimd.DuplicateToVector128((byte)0x3F);
+                res4 = str3 & AdvSimd.DuplicateToVector128((byte)0x3F);
+
+                // The bits have now been shifted to the right locations;
+                // translate their values 0..63 to the Base64 alphabet.
+                // Use a 64-byte table lookup:
+                res1 = AdvSimd.Arm64.VectorTableLookup((tblEnc1, tblEnc2, tblEnc3, tblEnc4), res1);
+                res2 = AdvSimd.Arm64.VectorTableLookup((tblEnc1, tblEnc2, tblEnc3, tblEnc4), res2);
+                res3 = AdvSimd.Arm64.VectorTableLookup((tblEnc1, tblEnc2, tblEnc3, tblEnc4), res3);
+                res4 = AdvSimd.Arm64.VectorTableLookup((tblEnc1, tblEnc2, tblEnc3, tblEnc4), res4);
+
+                // Interleave and store result:
+                AssertWrite<Vector128<byte>>(dest, destStart, destLength);
+                AdvSimd.Arm64.StoreVector128x4AndZip(dest, (res1, res2, res3, res4));
+
+                src += 48;
+                dest += 64;
+            } while (src <= srcEnd);
+
+            srcBytes = src;
+            destBytes = dest;
+        }
+
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CompExactlyDependsOn(typeof(Ssse3))]
         [CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
diff --git a/src/libraries/System.Private.CoreLib/src/System/ComponentModel/DefaultValueAttribute.cs b/src/libraries/System.Private.CoreLib/src/System/ComponentModel/DefaultValueAttribute.cs
index e86b53d640d3..322a6ae242b0 100644
--- a/src/libraries/System.Private.CoreLib/src/System/ComponentModel/DefaultValueAttribute.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/ComponentModel/DefaultValueAttribute.cs
@@ -1,6 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Globalization;
 using System.Reflection;
@@ -22,6 +23,9 @@ public class DefaultValueAttribute : Attribute
         // Delegate ad hoc created 'TypeDescriptor.ConvertFromInvariantString' reflection object cache
         private static object? s_convertFromInvariantString;
 
+        [FeatureSwitchDefinition("System.ComponentModel.DefaultValueAttribute.IsSupported")]
+        internal static bool IsSupported => AppContext.TryGetSwitch("System.ComponentModel.DefaultValueAttribute.IsSupported", out bool isSupported) ? isSupported : true;
+
         /// <summary>
         /// Initializes a new instance of the <see cref='DefaultValueAttribute'/>
         /// class, converting the specified value to the specified type, and using the U.S. English
@@ -35,6 +39,12 @@ public DefaultValueAttribute(
             // The null check and try/catch here are because attributes should never throw exceptions.
             // We would fail to load an otherwise normal class.
 
+            if (!IsSupported)
+            {
+                Debug.Assert(!IsSupported, "Runtime instantiation of this attribute is not allowed.");
+                return;
+            }
+
             if (type == null)
             {
                 return;
@@ -73,7 +83,7 @@ static bool TryConvertFromInvariantString(
                     {
                         Type? typeDescriptorType = Type.GetType("System.ComponentModel.TypeDescriptor, System.ComponentModel.TypeConverter", throwOnError: false);
                         MethodInfo? mi = typeDescriptorType?.GetMethod("ConvertFromInvariantString", BindingFlags.NonPublic | BindingFlags.Static);
-                        Volatile.Write(ref s_convertFromInvariantString, mi == null ? new object() : mi.CreateDelegate(typeof(Func<Type, string, object>)));
+                        Volatile.Write(ref s_convertFromInvariantString, mi == null ? new object() : mi.CreateDelegate<Func<Type, string, object>>());
                     }
 
                     if (!(s_convertFromInvariantString is Func<Type, string?, object> convertFromInvariantString))
@@ -229,7 +239,18 @@ public DefaultValueAttribute(ulong value)
         /// <summary>
         /// Gets the default value of the property this attribute is bound to.
         /// </summary>
-        public virtual object? Value => _value;
+        public virtual object? Value
+        {
+            get
+            {
+                if (!IsSupported)
+                {
+                    throw new ArgumentException(SR.RuntimeInstanceNotAllowed);
+                }
+                return _value;
+            }
+        }
+
 
         public override bool Equals([NotNullWhen(true)] object? obj)
         {
diff --git a/src/libraries/System.Private.CoreLib/src/System/DateTime.cs b/src/libraries/System.Private.CoreLib/src/System/DateTime.cs
index efcf7155c0f1..3eeaaabbd358 100644
--- a/src/libraries/System.Private.CoreLib/src/System/DateTime.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/DateTime.cs
@@ -332,6 +332,7 @@ public DateTime(int year, int month, int day, int hour, int minute, int second)
             else
             {
                 // if we have a leap second, then we adjust it to 59 so that DateTime will consider it the last in the specified minute.
+                // codeql[cs/leap-year/unsafe-date-construction-from-two-elements] - DateTime is constructed using the user specified values, not a combination of different sources.  It would be intentional to throw if an invalid combination occurred.
                 this = new DateTime(year, month, day, hour, minute, 59);
                 ValidateLeapSecond();
             }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/CodeAnalysis/FeatureGuardAttribute.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/CodeAnalysis/FeatureGuardAttribute.cs
new file mode 100644
index 000000000000..f0ac084e94b0
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/CodeAnalysis/FeatureGuardAttribute.cs
@@ -0,0 +1,40 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Diagnostics.CodeAnalysis
+{
+    /// <summary>
+    /// Indicates that the specified public static boolean get-only property
+    /// guards access to the specified feature.
+    /// </summary>
+    /// <remarks>
+    /// Analyzers can use this to prevent warnings on calls to code that is
+    /// annotated as requiring that feature, when the callsite is guarded by a
+    /// call to the property.
+    /// </remarks>
+    [AttributeUsage(AttributeTargets.Property, Inherited = false, AllowMultiple = true)]
+#if SYSTEM_PRIVATE_CORELIB
+    public
+#else
+    internal
+#endif
+        sealed class FeatureGuardAttribute : Attribute
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="FeatureGuardAttribute"/> class
+        /// with the specified feature type.
+        /// </summary>
+        /// <param name="featureType">
+        /// The type that represents the feature guarded by the property.
+        /// </param>
+        public FeatureGuardAttribute(Type featureType)
+        {
+            FeatureType = featureType;
+        }
+
+        /// <summary>
+        /// The type that represents the feature guarded by the property.
+        /// </summary>
+        public Type FeatureType { get; }
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/CodeAnalysis/FeatureSwitchDefinitionAttribute.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/CodeAnalysis/FeatureSwitchDefinitionAttribute.cs
new file mode 100644
index 000000000000..2089d87ef5d0
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/CodeAnalysis/FeatureSwitchDefinitionAttribute.cs
@@ -0,0 +1,39 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Diagnostics.CodeAnalysis
+{
+    /// <summary>
+    /// Indicates that the specified public static boolean get-only property
+    /// corresponds to the feature switch specified by name.
+    /// </summary>
+    /// <remarks>
+    /// IL rewriters and compilers can use this to substitute the return value
+    /// of the specified property with the value of the feature switch.
+    /// </remarks>
+    [AttributeUsage(AttributeTargets.Property, Inherited = false)]
+#if SYSTEM_PRIVATE_CORELIB
+    public
+#else
+    internal
+#endif
+        sealed class FeatureSwitchDefinitionAttribute : Attribute
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="FeatureSwitchDefinitionAttribute"/> class
+        /// with the specified feature switch name.
+        /// </summary>
+        /// <param name="switchName">
+        /// The name of the feature switch that provides the value for the specified property.
+        /// </param>
+        public FeatureSwitchDefinitionAttribute(string switchName)
+        {
+            SwitchName = switchName;
+        }
+
+        /// <summary>
+        /// The name of the feature switch that provides the value for the specified property.
+        /// </summary>
+        public string SwitchName { get; }
+    }
+}
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Diagnostics/Eventing/EventPipe.CoreCLR.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipe.Internal.cs
similarity index 98%
rename from src/coreclr/System.Private.CoreLib/src/System/Diagnostics/Eventing/EventPipe.CoreCLR.cs
rename to src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipe.Internal.cs
index db76c99413e6..6039bcfaa4f7 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Diagnostics/Eventing/EventPipe.CoreCLR.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipe.Internal.cs
@@ -5,8 +5,6 @@
 using System.Runtime.InteropServices;
 using System.Threading;
 
-#if FEATURE_PERFTRACING
-
 namespace System.Diagnostics.Tracing
 {
     internal static partial class EventPipeInternal
@@ -68,5 +66,3 @@ internal static unsafe partial IntPtr CreateProvider(string providerName,
         internal static unsafe partial bool WaitForSessionSignal(ulong sessionID, int timeoutMs);
     }
 }
-
-#endif // FEATURE_PERFTRACING
diff --git a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipe.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipe.cs
index 74acfef13fcc..971451981044 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipe.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipe.cs
@@ -5,8 +5,6 @@
 using System.Runtime.InteropServices;
 using System.Threading;
 
-#if FEATURE_PERFTRACING
-
 namespace System.Diagnostics.Tracing
 {
     [StructLayout(LayoutKind.Sequential)]
@@ -150,5 +148,3 @@ internal static unsafe ulong Enable(
         }
     }
 }
-
-#endif // FEATURE_PERFTRACING
diff --git a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipeEventDispatcher.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipeEventDispatcher.cs
index 030560b20021..548f792b5243 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipeEventDispatcher.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipeEventDispatcher.cs
@@ -7,7 +7,6 @@
 
 namespace System.Diagnostics.Tracing
 {
-#if FEATURE_PERFTRACING
     internal sealed class EventPipeEventDispatcher
     {
         internal sealed class EventListenerSubscription
@@ -227,5 +226,4 @@ private static DateTime TimeStampToDateTime(long timeStamp, DateTime syncTimeUtc
             return new DateTime(inTicks, DateTimeKind.Utc);
         }
     }
-#endif // FEATURE_PERFTRACING
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipeMetadataGenerator.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipeMetadataGenerator.cs
index 4ffa0a5895d4..ed19ea2178e3 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipeMetadataGenerator.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipeMetadataGenerator.cs
@@ -5,7 +5,6 @@
 
 namespace System.Diagnostics.Tracing
 {
-#if FEATURE_PERFTRACING
     internal sealed class EventPipeMetadataGenerator
     {
         private enum MetadataTag
@@ -761,6 +760,4 @@ private static bool GetMetadataLengthForNamedTypeV2(string name, TraceLoggingTyp
             return true;
         }
     }
-
-#endif // FEATURE_PERFTRACING
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipePayloadDecoder.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipePayloadDecoder.cs
index ae9727156845..661910025d8b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipePayloadDecoder.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipePayloadDecoder.cs
@@ -7,7 +7,6 @@
 
 namespace System.Diagnostics.Tracing
 {
-#if FEATURE_PERFTRACING
     internal static class EventPipePayloadDecoder
     {
         /// <summary>
@@ -138,5 +137,4 @@ internal static object[] DecodePayload(ref EventSource.EventMetadata metadata, R
             return decodedFields;
         }
     }
-#endif // FEATURE_PERFTRACING
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventSource.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventSource.cs
index 7a292e019fdd..61c7d5218bc6 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventSource.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventSource.cs
@@ -1120,6 +1120,14 @@ protected unsafe void WriteEvent(int eventId, long arg1, byte[]? arg2)
             }
         }
 
+        // Returns the object as a IntPtr - safe when only used for logging
+        internal static unsafe nint ObjectIDForEvents(object? o)
+        {
+#pragma warning disable CS8500 // takes address of managed type
+            return *(nint*)&o;
+#pragma warning restore CS8500
+        }
+
 #pragma warning restore 1591
 
         /// <summary>
@@ -5490,7 +5498,7 @@ private string CreateManifestString()
                     if (channelInfo.Attribs != null)
                     {
                         EventChannelAttribute attribs = channelInfo.Attribs;
-                        if (Enum.IsDefined(typeof(EventChannelType), attribs.EventChannelType))
+                        if (Enum.IsDefined(attribs.EventChannelType))
                             channelType = attribs.EventChannelType.ToString();
                         enabled = attribs.Enabled;
                     }
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Diagnostics/Eventing/NativeRuntimeEventSource.Threading.NativeSinks.CoreCLR.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/NativeRuntimeEventSource.Threading.NativeSinks.Internal.cs
similarity index 57%
rename from src/coreclr/System.Private.CoreLib/src/System/Diagnostics/Eventing/NativeRuntimeEventSource.Threading.NativeSinks.CoreCLR.cs
rename to src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/NativeRuntimeEventSource.Threading.NativeSinks.Internal.cs
index 95942b6291c3..7e9368dd3e92 100644
--- a/src/coreclr/System.Private.CoreLib/src/System/Diagnostics/Eventing/NativeRuntimeEventSource.Threading.NativeSinks.CoreCLR.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/NativeRuntimeEventSource.Threading.NativeSinks.Internal.cs
@@ -10,12 +10,23 @@ namespace System.Diagnostics.Tracing
     // It contains the runtime specific interop to native event sinks.
     internal sealed partial class NativeRuntimeEventSource : EventSource
     {
-        [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+#if NATIVEAOT
+        // We don't have these keywords defined from the genRuntimeEventSources.py, so we need to manually define them here.
+        public static partial class Keywords
+        {
+            public const EventKeywords ContentionKeyword = (EventKeywords)0x4000;
+            public const EventKeywords ThreadingKeyword = (EventKeywords)0x10000;
+            public const EventKeywords ThreadTransferKeyword = (EventKeywords)0x80000000;
+            public const EventKeywords WaitHandleKeyword = (EventKeywords)0x40000000000;
+        }
+#endif
+
+        [NonEvent]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogContentionLockCreated")]
         private static partial void LogContentionLockCreated(nint LockID, nint AssociatedObjectID, ushort ClrInstanceID);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogContentionStart")]
         private static partial void LogContentionStart(
             ContentionFlagsMap ContentionFlags,
             ushort ClrInstanceID,
@@ -24,38 +35,38 @@ private static partial void LogContentionStart(
             ulong LockOwnerThreadID);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogContentionStop")]
         private static partial void LogContentionStop(
             ContentionFlagsMap ContentionFlags,
             ushort ClrInstanceID,
             double DurationNs);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogThreadPoolWorkerThreadStart")]
         private static partial void LogThreadPoolWorkerThreadStart(uint ActiveWorkerThreadCount, uint RetiredWorkerThreadCount, ushort ClrInstanceID);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogThreadPoolWorkerThreadStop")]
         private static partial void LogThreadPoolWorkerThreadStop(uint ActiveWorkerThreadCount, uint RetiredWorkerThreadCount, ushort ClrInstanceID);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogThreadPoolWorkerThreadWait")]
         private static partial void LogThreadPoolWorkerThreadWait(uint ActiveWorkerThreadCount, uint RetiredWorkerThreadCount, ushort ClrInstanceID);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogThreadPoolMinMaxThreads")]
         private static partial void LogThreadPoolMinMaxThreads(ushort MinWorkerThreads, ushort MaxWorkerThreads, ushort MinIOCompletionThreads, ushort MaxIOCompletionThreads, ushort ClrInstanceID);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentSample")]
         private static partial void LogThreadPoolWorkerThreadAdjustmentSample(double Throughput, ushort ClrInstanceID);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentAdjustment")]
         private static partial void LogThreadPoolWorkerThreadAdjustmentAdjustment(double AverageThroughput, uint NewWorkerThreadCount, ThreadAdjustmentReasonMap Reason, ushort ClrInstanceID);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogThreadPoolWorkerThreadAdjustmentStats")]
         private static partial void LogThreadPoolWorkerThreadAdjustmentStats(
             double Duration,
             double Throughput,
@@ -70,7 +81,7 @@ private static partial void LogThreadPoolWorkerThreadAdjustmentStats(
             ushort ClrInstanceID);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogThreadPoolIOEnqueue")]
         private static partial void LogThreadPoolIOEnqueue(
             IntPtr NativeOverlapped,
             IntPtr Overlapped,
@@ -78,37 +89,34 @@ private static partial void LogThreadPoolIOEnqueue(
             ushort ClrInstanceID);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogThreadPoolIODequeue")]
         private static partial void LogThreadPoolIODequeue(
             IntPtr NativeOverlapped,
             IntPtr Overlapped,
             ushort ClrInstanceID);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogThreadPoolWorkingThreadCount")]
         private static partial void LogThreadPoolWorkingThreadCount(
             uint Count,
-            ushort ClrInstanceID
-        );
+            ushort ClrInstanceID);
 
         [NonEvent]
-        [LibraryImport(RuntimeHelpers.QCall)]
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogThreadPoolIOPack")]
         private static partial void LogThreadPoolIOPack(
             IntPtr NativeOverlapped,
             IntPtr Overlapped,
             ushort ClrInstanceID);
 
-#pragma warning disable IDE0060 // Remove unused parameter
         [NonEvent]
-        private static void LogWaitHandleWaitStart(
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogWaitHandleWaitStart")]
+        private static partial void LogWaitHandleWaitStart(
             WaitHandleWaitSourceMap WaitSource,
             IntPtr AssociatedObjectID,
-            ushort ClrInstanceID) =>
-            Debug.Fail("This event is currently not expected to be raised by managed code in CoreCLR.");
+            ushort ClrInstanceID);
 
         [NonEvent]
-        private static void LogWaitHandleWaitStop(ushort ClrInstanceID) =>
-            Debug.Fail("This event is currently not expected to be raised by managed code in CoreCLR.");
-#pragma warning restore IDE0060
+        [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "NativeRuntimeEventSource_LogWaitHandleWaitStop")]
+        private static partial void LogWaitHandleWaitStop(ushort ClrInstanceID);
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/NativeRuntimeEventSource.Threading.NativeSinks.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/NativeRuntimeEventSource.Threading.NativeSinks.cs
index 5ba348edd7fe..d1609f8775e1 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/NativeRuntimeEventSource.Threading.NativeSinks.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/NativeRuntimeEventSource.Threading.NativeSinks.cs
@@ -94,7 +94,12 @@ private void ContentionLockCreated(nint LockID, nint AssociatedObjectID, ushort
 
         [NonEvent]
         [MethodImpl(MethodImplOptions.NoInlining)]
-        public void ContentionLockCreated(Lock lockObj) => ContentionLockCreated(lockObj.LockIdForEvents, lockObj.ObjectIdForEvents);
+        public void ContentionLockCreated(Lock lockObj) =>
+            ContentionLockCreated(
+                lockObj.LockIdForEvents,
+#pragma warning disable CS9216 // A value of type 'System.Threading.Lock' converted to a different type will use likely unintended monitor-based locking in 'lock' statement.
+                ObjectIDForEvents(lockObj));
+#pragma warning restore CS9216
 
         [Event(81, Level = EventLevel.Informational, Message = Messages.ContentionStart, Task = Tasks.Contention, Opcode = EventOpcode.Start, Version = 2, Keywords = Keywords.ContentionKeyword)]
         private void ContentionStart(
@@ -115,7 +120,9 @@ public void ContentionStart(Lock lockObj) =>
                 ContentionFlagsMap.Managed,
                 DefaultClrInstanceId,
                 lockObj.LockIdForEvents,
-                lockObj.ObjectIdForEvents,
+#pragma warning disable CS9216 // A value of type 'System.Threading.Lock' converted to a different type will use likely unintended monitor-based locking in 'lock' statement.
+                ObjectIDForEvents(lockObj),
+#pragma warning restore CS9216
                 lockObj.OwningThreadId);
 
         [Event(91, Level = EventLevel.Informational, Message = Messages.ContentionStop, Task = Tasks.Contention, Opcode = EventOpcode.Stop, Version = 1, Keywords = Keywords.ContentionKeyword)]
@@ -360,7 +367,7 @@ private void WaitHandleWaitStart(
         public unsafe void WaitHandleWaitStart(
             WaitHandleWaitSourceMap waitSource = WaitHandleWaitSourceMap.Unknown,
             object? associatedObject = null) =>
-            WaitHandleWaitStart(waitSource, *(nint*)Unsafe.AsPointer(ref associatedObject));
+            WaitHandleWaitStart(waitSource, ObjectIDForEvents(associatedObject));
 
         [Event(302, Level = EventLevel.Verbose, Message = Messages.WaitHandleWaitStop, Task = Tasks.WaitHandleWait, Opcode = EventOpcode.Stop, Version = 0, Keywords = Keywords.WaitHandleKeyword)]
         public void WaitHandleWaitStop(ushort ClrInstanceID = DefaultClrInstanceId)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/NativeRuntimeEventSource.Threading.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/NativeRuntimeEventSource.Threading.cs
index 11c4dfcfe6c5..6dcb332112dc 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/NativeRuntimeEventSource.Threading.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/NativeRuntimeEventSource.Threading.cs
@@ -107,7 +107,12 @@ private unsafe void ContentionLockCreated(nint LockID, nint AssociatedObjectID,
 
         [NonEvent]
         [MethodImpl(MethodImplOptions.NoInlining)]
-        public void ContentionLockCreated(Lock lockObj) => ContentionLockCreated(lockObj.LockIdForEvents, lockObj.ObjectIdForEvents);
+        public void ContentionLockCreated(Lock lockObj) =>
+            ContentionLockCreated(
+                lockObj.LockIdForEvents,
+#pragma warning disable CS9216 // A value of type 'System.Threading.Lock' converted to a different type will use likely unintended monitor-based locking in 'lock' statement.
+                ObjectIDForEvents(lockObj));
+#pragma warning restore CS9216
 
         [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2026:UnrecognizedReflectionPattern", Justification = "Parameters to this method are primitive and are trimmer safe")]
         [Event(81, Level = EventLevel.Informational, Message = Messages.ContentionStart, Task = Tasks.Contention, Opcode = EventOpcode.Start, Version = 2, Keywords = Keywords.ContentionKeyword)]
@@ -146,7 +151,9 @@ public void ContentionStart(Lock lockObj) =>
                 ContentionFlagsMap.Managed,
                 DefaultClrInstanceId,
                 lockObj.LockIdForEvents,
-                lockObj.ObjectIdForEvents,
+#pragma warning disable CS9216 // A value of type 'System.Threading.Lock' converted to a different type will use likely unintended monitor-based locking in 'lock' statement.
+                ObjectIDForEvents(lockObj),
+#pragma warning restore CS9216
                 lockObj.OwningThreadId);
 
         [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2026:UnrecognizedReflectionPattern", Justification = "Parameters to this method are primitive and are trimmer safe")]
@@ -557,7 +564,7 @@ private unsafe void WaitHandleWaitStart(
         public unsafe void WaitHandleWaitStart(
             WaitHandleWaitSourceMap waitSource = WaitHandleWaitSourceMap.Unknown,
             object? associatedObject = null) =>
-            WaitHandleWaitStart(waitSource, *(nint*)Unsafe.AsPointer(ref associatedObject));
+            WaitHandleWaitStart(waitSource, ObjectIDForEvents(associatedObject));
 
         [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2026:UnrecognizedReflectionPattern", Justification = "Parameters to this method are primitive and are trimmer safe")]
         [Event(302, Level = EventLevel.Verbose, Message = Messages.WaitHandleWaitStop, Task = Tasks.WaitHandleWait, Opcode = EventOpcode.Stop, Version = 0, Keywords = Keywords.WaitHandleKeyword)]
diff --git a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/TraceLogging/XplatEventLogger.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/TraceLogging/XplatEventLogger.cs
index 25e19b88db52..17da3860ec2b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/TraceLogging/XplatEventLogger.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/TraceLogging/XplatEventLogger.cs
@@ -1,13 +1,10 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System;
 using System.Collections.Generic;
 using System.Collections.ObjectModel;
-using System.Diagnostics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
-using System.Runtime.Versioning;
 using System.Text;
 
 #if FEATURE_EVENTSOURCE_XPLAT
@@ -18,8 +15,13 @@ internal sealed partial class XplatEventLogger : EventListener
     {
         public XplatEventLogger() { }
 
-        private static readonly Lazy<string?> eventSourceNameFilter = new Lazy<string?>(() => CompatibilitySwitch.GetValueInternal("EventSourceFilter"));
-        private static readonly Lazy<string?> eventSourceEventFilter = new Lazy<string?>(() => CompatibilitySwitch.GetValueInternal("EventNameFilter"));
+        private static readonly string s_eventSourceNameFilter = GetClrConfig("EventSourceFilter");
+        private static readonly string s_eventSourceEventFilter = GetClrConfig("EventNameFilter");
+
+        private static unsafe string GetClrConfig(string configName) => new string(EventSource_GetClrConfig(configName));
+
+        [LibraryImport(RuntimeHelpers.QCall, StringMarshalling = StringMarshalling.Utf16)]
+        private static unsafe partial char* EventSource_GetClrConfig(string configName);
 
         private static bool initializedPersistentListener;
 
@@ -153,9 +155,7 @@ private static void AppendByteArrayAsHexString(ref ValueStringBuilder builder, b
 
         protected internal override void OnEventSourceCreated(EventSource eventSource)
         {
-
-            string? eventSourceFilter = eventSourceNameFilter.Value;
-            if (string.IsNullOrEmpty(eventSourceFilter) || (eventSource.Name.Contains(eventSourceFilter, StringComparison.OrdinalIgnoreCase)))
+            if (string.IsNullOrEmpty(s_eventSourceNameFilter) || (eventSource.Name.Contains(s_eventSourceNameFilter, StringComparison.OrdinalIgnoreCase)))
             {
                 EnableEvents(eventSource, EventLevel.LogAlways, EventKeywords.All, null);
             }
@@ -173,8 +173,7 @@ protected internal override void OnEventWritten(EventWrittenEventArgs eventData)
                 return;
             }
 
-            string? eventFilter = eventSourceEventFilter.Value;
-            if (string.IsNullOrEmpty(eventFilter) || (eventData.EventName!.Contains(eventFilter, StringComparison.OrdinalIgnoreCase)))
+            if (string.IsNullOrEmpty(s_eventSourceEventFilter) || (eventData.EventName!.Contains(s_eventSourceEventFilter, StringComparison.OrdinalIgnoreCase)))
             {
                 LogOnEventWritten(eventData);
             }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CalendarData.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CalendarData.Icu.cs
index 4d3314f22ec5..26919ba0d50a 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CalendarData.Icu.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CalendarData.Icu.cs
@@ -135,14 +135,16 @@ private static unsafe bool GetCalendarInfo(string localeName, CalendarId calenda
                 out calendarString);
         }
 
-        private static bool EnumDatePatterns(string localeName, CalendarId calendarId, CalendarDataType dataType, out string[]? datePatterns)
+        private static unsafe bool EnumDatePatterns(string localeName, CalendarId calendarId, CalendarDataType dataType, out string[]? datePatterns)
         {
             datePatterns = null;
 
             IcuEnumCalendarsData callbackContext = default;
             callbackContext.Results = new List<string>();
             callbackContext.DisallowDuplicates = true;
-            bool result = EnumCalendarInfo(localeName, calendarId, dataType, ref callbackContext);
+#pragma warning disable CS8500 // takes address of managed type
+            bool result = EnumCalendarInfo(localeName, calendarId, dataType, &callbackContext);
+#pragma warning restore CS8500
             if (result)
             {
                 List<string> datePatternsList = callbackContext.Results;
@@ -362,13 +364,15 @@ private static int CountOccurrences(string input, char value, ref int index)
             return index - startIndex;
         }
 
-        private static bool EnumMonthNames(string localeName, CalendarId calendarId, CalendarDataType dataType, out string[]? monthNames, ref string? leapHebrewMonthName)
+        private static unsafe bool EnumMonthNames(string localeName, CalendarId calendarId, CalendarDataType dataType, out string[]? monthNames, ref string? leapHebrewMonthName)
         {
             monthNames = null;
 
             IcuEnumCalendarsData callbackContext = default;
             callbackContext.Results = new List<string>();
-            bool result = EnumCalendarInfo(localeName, calendarId, dataType, ref callbackContext);
+#pragma warning disable CS8500 // takes address of managed type
+            bool result = EnumCalendarInfo(localeName, calendarId, dataType, &callbackContext);
+#pragma warning restore CS8500
             if (result)
             {
                 // the month-name arrays are expected to have 13 elements.  If ICU only returns 12, add an
@@ -410,13 +414,15 @@ private static bool EnumEraNames(string localeName, CalendarId calendarId, Calen
             return result;
         }
 
-        internal static bool EnumCalendarInfo(string localeName, CalendarId calendarId, CalendarDataType dataType, out string[]? calendarData)
+        internal static unsafe bool EnumCalendarInfo(string localeName, CalendarId calendarId, CalendarDataType dataType, out string[]? calendarData)
         {
             calendarData = null;
 
             IcuEnumCalendarsData callbackContext = default;
             callbackContext.Results = new List<string>();
-            bool result = EnumCalendarInfo(localeName, calendarId, dataType, ref callbackContext);
+#pragma warning disable CS8500 // takes address of managed type
+            bool result = EnumCalendarInfo(localeName, calendarId, dataType, &callbackContext);
+#pragma warning restore CS8500
             if (result)
             {
                 calendarData = callbackContext.Results.ToArray();
@@ -425,10 +431,12 @@ internal static bool EnumCalendarInfo(string localeName, CalendarId calendarId,
             return result;
         }
 
-        private static unsafe bool EnumCalendarInfo(string localeName, CalendarId calendarId, CalendarDataType dataType, ref IcuEnumCalendarsData callbackContext)
+#pragma warning disable CS8500 // takes address of managed type
+        private static unsafe bool EnumCalendarInfo(string localeName, CalendarId calendarId, CalendarDataType dataType, IcuEnumCalendarsData* callbackContext)
         {
-            return Interop.Globalization.EnumCalendarInfo(&EnumCalendarInfoCallback, localeName, calendarId, dataType, (IntPtr)Unsafe.AsPointer(ref callbackContext));
+            return Interop.Globalization.EnumCalendarInfo(&EnumCalendarInfoCallback, localeName, calendarId, dataType, (IntPtr)callbackContext);
         }
+#pragma warning restore CS8500
 
         [UnmanagedCallersOnly]
         private static unsafe void EnumCalendarInfoCallback(char* calendarStringPtr, IntPtr context)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Browser.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Browser.cs
index fcc5943e7b76..8a8edadaf326 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Browser.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Browser.cs
@@ -10,13 +10,62 @@ namespace System.Globalization
     internal sealed partial class CultureData
     {
         private const int CULTURE_INFO_BUFFER_LEN = 50;
+        private const int LOCALE_INFO_BUFFER_LEN = 80;
+
+        private void JSInitLocaleInfo()
+        {
+            string? localeName = _sName;
+            if (string.IsNullOrEmpty(localeName))
+            {
+                _sEnglishLanguage = "Invariant Language";
+                _sNativeLanguage = _sEnglishLanguage;
+                _sEnglishCountry = "Invariant Country";
+                _sNativeCountry = _sEnglishCountry;
+                _sEnglishDisplayName = $"{_sEnglishLanguage} ({_sEnglishCountry})";
+                _sNativeDisplayName = _sEnglishDisplayName;
+            }
+            else
+            {
+                // English locale info
+                (_sEnglishLanguage, _sEnglishCountry) = JSGetLocaleInfo("en-US", localeName);
+                _sEnglishDisplayName = string.IsNullOrEmpty(_sEnglishCountry) ?
+                    _sEnglishLanguage :
+                    $"{_sEnglishLanguage} ({_sEnglishCountry})";
+                // Native locale info
+                (_sNativeLanguage, _sNativeCountry) = JSGetLocaleInfo(localeName, localeName);
+                _sNativeDisplayName = string.IsNullOrEmpty(_sNativeCountry) ?
+                    _sNativeLanguage :
+                    $"{_sNativeLanguage} ({_sNativeCountry})";
+            }
+        }
+
+        private unsafe (string, string) JSGetLocaleInfo(string cultureName, string localeName)
+        {
+            char* buffer = stackalloc char[LOCALE_INFO_BUFFER_LEN];
+            int resultLength = Interop.JsGlobalization.GetLocaleInfo(cultureName, localeName, buffer, LOCALE_INFO_BUFFER_LEN, out int exception, out object exResult);
+            if (exception != 0)
+                throw new Exception((string)exResult);
+            string result = new string(buffer, 0, resultLength);
+            string[] subresults = result.Split("##");
+            if (subresults.Length == 0)
+                throw new Exception("LocaleInfo recieved from the Browser is in incorrect format.");
+            if (subresults.Length == 1)
+                return (subresults[0], ""); // Neutral culture
+            return (subresults[0], subresults[1]);
+        }
+
+        private string JSGetNativeDisplayName(string localeName, string cultureName)
+        {
+            (string languageName, string countryName) = JSGetLocaleInfo(localeName, cultureName);
+            return string.IsNullOrEmpty(countryName) ?
+                    languageName :
+                    $"{languageName} ({countryName})";
+        }
 
         private static unsafe CultureData JSLoadCultureInfoFromBrowser(string localeName, CultureData culture)
         {
             char* buffer = stackalloc char[CULTURE_INFO_BUFFER_LEN];
-            int exception;
-            object exResult;
-            int resultLength = Interop.JsGlobalization.GetCultureInfo(localeName, buffer, CULTURE_INFO_BUFFER_LEN, out exception, out exResult);
+            int resultLength = Interop.JsGlobalization.GetCultureInfo(localeName, buffer, CULTURE_INFO_BUFFER_LEN, out int exception, out object exResult);
             if (exception != 0)
                 throw new Exception((string)exResult);
             string result = new string(buffer, 0, resultLength);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Icu.cs
index 435e7ba9d45f..31cc5bdd6015 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Icu.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.Icu.cs
@@ -203,6 +203,12 @@ private string IcuGetLocaleInfo(LocaleStringData type, string? uiCultureName = n
             Debug.Assert(!GlobalizationMode.Invariant);
             Debug.Assert(!GlobalizationMode.UseNls);
             Debug.Assert(_sWindowsName != null, "[CultureData.IcuGetLocaleInfo] Expected _sWindowsName to be populated already");
+#if TARGET_BROWSER && !FEATURE_WASM_MANAGED_THREADS
+            if (type == LocaleStringData.NativeDisplayName)
+            {
+                return JSGetNativeDisplayName(_sWindowsName, uiCultureName ?? _sWindowsName);
+            }
+#endif
             return IcuGetLocaleInfo(_sWindowsName, type, uiCultureName);
         }
 
@@ -302,7 +308,14 @@ private unsafe string IcuGetTimeFormatString(bool shortFormat)
         // no support to lookup by region name, other than the hard-coded list in CultureData
         private static CultureData? IcuGetCultureDataFromRegionName() => null;
 
-        private string IcuGetLanguageDisplayName(string cultureName) => IcuGetLocaleInfo(cultureName, LocaleStringData.LocalizedDisplayName, CultureInfo.CurrentUICulture.Name);
+        private string IcuGetLanguageDisplayName(string cultureName)
+        {
+#if TARGET_BROWSER && !FEATURE_WASM_MANAGED_THREADS
+            return JSGetNativeDisplayName(CultureInfo.CurrentUICulture.Name, cultureName);
+#else
+            return IcuGetLocaleInfo(cultureName, LocaleStringData.LocalizedDisplayName, CultureInfo.CurrentUICulture.Name);
+#endif
+        }
 
         // use the fallback which is to return NativeName
         private static string? IcuGetRegionDisplayName() => null;
diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs
index 7013429ad4c0..f049aef6c4c4 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CultureData.cs
@@ -816,12 +816,15 @@ private static string NormalizeCultureName(string name, out bool isNeutralName)
             {
                 return null;
             }
-#if TARGET_BROWSER
+#if TARGET_BROWSER && !FEATURE_WASM_MANAGED_THREADS
             // populate fields for which ICU does not provide data in Hybrid mode
             if (GlobalizationMode.Hybrid && !string.IsNullOrEmpty(culture._sName))
             {
                 culture = JSLoadCultureInfoFromBrowser(culture._sName, culture);
             }
+#if !NATIVEAOT // TODO-LLVM: https://github.com/dotnet/runtimelab/issues/2557
+            culture.JSInitLocaleInfo();
+#endif
 #endif
 
             // We need _sWindowsName to be initialized to know if we're using overrides.
diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/GregorianCalendarHelper.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/GregorianCalendarHelper.cs
index b100b633e9ac..04298c12e7f6 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Globalization/GregorianCalendarHelper.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/GregorianCalendarHelper.cs
@@ -35,6 +35,7 @@ internal EraInfo(int era, int startYear, int startMonth, int startDay, int yearO
             this.yearOffset = yearOffset;
             this.minEraYear = minEraYear;
             this.maxEraYear = maxEraYear;
+            // codeql[cs/leap-year/unsafe-date-construction-from-two-elements] - A DateTime object is created using values obtained from the machine configuration.
             this.ticks = new DateTime(startYear, startMonth, startDay).Ticks;
             this.eraName = eraName;
             this.abbrevEraName = abbrevEraName;
diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs
index b7de19aab570..5b18e8d2b6fc 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs
@@ -397,7 +397,7 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
                 // Do a quick search for the first element of "value".
                 int relativeIndex = isLetter ?
                     PackedSpanHelpers.PackedIndexOfIsSupported
-                        ? PackedSpanHelpers.IndexOfAny(ref Unsafe.Add(ref searchSpace, offset), valueCharU, valueCharL, searchSpaceMinusValueTailLength)
+                        ? PackedSpanHelpers.IndexOfAnyIgnoreCase(ref Unsafe.Add(ref searchSpace, offset), valueCharL, searchSpaceMinusValueTailLength)
                         : SpanHelpers.IndexOfAnyChar(ref Unsafe.Add(ref searchSpace, offset), valueCharU, valueCharL, searchSpaceMinusValueTailLength) :
                     SpanHelpers.IndexOfChar(ref Unsafe.Add(ref searchSpace, offset), valueChar, searchSpaceMinusValueTailLength);
                 if (relativeIndex < 0)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/TimeSpanFormat.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/TimeSpanFormat.cs
index 4e11f14c8090..8c8bea12bbf6 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Globalization/TimeSpanFormat.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/TimeSpanFormat.cs
@@ -268,12 +268,11 @@ internal static unsafe bool TryFormatStandard<TChar>(TimeSpan value, StandardFor
                 // Write fraction and separator, if necessary
                 if (fractionDigits != 0)
                 {
-                    Debug.Assert(format == StandardFormat.C || decimalSeparator != null);
                     if (format == StandardFormat.C)
                     {
                         *p++ = TChar.CastFrom('.');
                     }
-                    else if (decimalSeparator!.Length == 1)
+                    else if (decimalSeparator.Length == 1)
                     {
                         *p++ = decimalSeparator[0];
                     }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Guid.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Guid.Windows.cs
index e5ea8d8955ef..acef4c79a976 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Guid.Windows.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Guid.Windows.cs
@@ -5,22 +5,26 @@ namespace System
 {
     public partial struct Guid
     {
-        public static Guid NewGuid()
+        public static unsafe Guid NewGuid()
         {
-            // CoCreateGuid should never return Guid.Empty, since it attempts to maintain some
-            // uniqueness guarantees.
+            // CoCreateGuid should never return Guid.Empty, since it attempts to maintain some uniqueness guarantees.
 
-            int hr = Interop.Ole32.CoCreateGuid(out Guid g);
-            // We don't expect that this will ever throw an error, none are even documented, and so we don't want to pull
-            // in the HR to ComException mappings into the core library just for this so we will try a generic exception if
-            // we ever hit this condition.
+            Guid g;
+            int hr = Interop.Ole32.CoCreateGuid(&g);
             if (hr != 0)
             {
-                Exception ex = new Exception();
-                ex.HResult = hr;
-                throw ex;
+                ThrowForHr(hr);
             }
+
             return g;
         }
+
+        private static void ThrowForHr(int hr)
+        {
+            // We don't expect that this will ever throw an error, none are even documented, and so we don't want to pull
+            // in the HR to ComException mappings into the core library just for this so we will try a generic exception if
+            // we ever hit this condition.
+            throw new Exception() { HResult = hr };
+        }
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Guid.cs b/src/libraries/System.Private.CoreLib/src/System/Guid.cs
index b6946be5ec57..911fb9f7184a 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Guid.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Guid.cs
@@ -841,10 +841,6 @@ private static bool IsHexPrefix(ReadOnlySpan<char> str, int i) =>
             str[i] == '0' &&
             (str[i + 1] | 0x20) == 'x';
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static unsafe ReadOnlySpan<byte> AsBytes(in Guid source) =>
-            new ReadOnlySpan<byte>(Unsafe.AsPointer(ref Unsafe.AsRef(in source)), sizeof(Guid));
-
         // Returns an unsigned byte array containing the GUID.
         public byte[] ToByteArray()
         {
@@ -856,7 +852,7 @@ public byte[] ToByteArray()
             else
             {
                 // slower path for BigEndian
-                Guid guid = new Guid(AsBytes(this), false);
+                Guid guid = new Guid(MemoryMarshal.AsBytes(new ReadOnlySpan<Guid>(in this)), false);
                 MemoryMarshal.TryWrite(g, in guid);
             }
             return g;
@@ -874,7 +870,7 @@ public byte[] ToByteArray(bool bigEndian)
             else
             {
                 // slower path for Reverse
-                Guid guid = new Guid(AsBytes(this), bigEndian);
+                Guid guid = new Guid(MemoryMarshal.AsBytes(new ReadOnlySpan<Guid>(in this)), bigEndian);
                 MemoryMarshal.TryWrite(g, in guid);
             }
             return g;
@@ -893,7 +889,7 @@ public bool TryWriteBytes(Span<byte> destination)
             else
             {
                 // slower path for BigEndian
-                Guid guid = new Guid(AsBytes(this), false);
+                Guid guid = new Guid(MemoryMarshal.AsBytes(new ReadOnlySpan<Guid>(in this)), false);
                 MemoryMarshal.TryWrite(destination, in guid);
             }
             return true;
@@ -915,7 +911,7 @@ public bool TryWriteBytes(Span<byte> destination, bool bigEndian, out int bytesW
             else
             {
                 // slower path for Reverse
-                Guid guid = new Guid(AsBytes(this), bigEndian);
+                Guid guid = new Guid(MemoryMarshal.AsBytes(new ReadOnlySpan<Guid>(in this)), bigEndian);
                 MemoryMarshal.TryWrite(destination, in guid);
             }
             bytesWritten = 16;
diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/File.cs b/src/libraries/System.Private.CoreLib/src/System/IO/File.cs
index ec17866f0792..9326f6533306 100644
--- a/src/libraries/System.Private.CoreLib/src/System/IO/File.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/IO/File.cs
@@ -911,11 +911,6 @@ private static StreamReader AsyncStreamReader(string path, Encoding encoding)
                 new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, DefaultBufferSize, FileOptions.Asynchronous | FileOptions.SequentialScan),
                 encoding, detectEncodingFromByteOrderMarks: true);
 
-        private static StreamWriter AsyncStreamWriter(string path, Encoding encoding, bool append)
-            => new StreamWriter(
-                new FileStream(path, append ? FileMode.Append : FileMode.Create, FileAccess.Write, FileShare.Read, DefaultBufferSize, FileOptions.Asynchronous),
-                encoding);
-
         public static Task<string> ReadAllTextAsync(string path, CancellationToken cancellationToken = default(CancellationToken))
             => ReadAllTextAsync(path, Encoding.UTF8, cancellationToken);
 
@@ -1116,13 +1111,31 @@ private static async Task<string[]> InternalReadAllLinesAsync(string path, Encod
         public static Task WriteAllLinesAsync(string path, IEnumerable<string> contents, CancellationToken cancellationToken = default(CancellationToken))
             => WriteAllLinesAsync(path, contents, UTF8NoBOM, cancellationToken);
 
-        public static Task WriteAllLinesAsync(string path, IEnumerable<string> contents, Encoding encoding, CancellationToken cancellationToken = default(CancellationToken))
+        public static Task WriteAllLinesAsync(string path, IEnumerable<string> contents, Encoding encoding, CancellationToken cancellationToken = default(CancellationToken)) =>
+            WriteAllLinesAsync(path, contents, encoding, append: false, cancellationToken);
+
+        private static Task WriteAllLinesAsync(string path, IEnumerable<string> contents, Encoding encoding, bool append, CancellationToken cancellationToken)
         {
             Validate(path, encoding);
             ArgumentNullException.ThrowIfNull(contents);
-            return cancellationToken.IsCancellationRequested
-                ? Task.FromCanceled(cancellationToken)
-                : InternalWriteAllLinesAsync(AsyncStreamWriter(path, encoding, append: false), contents, cancellationToken);
+            if (cancellationToken.IsCancellationRequested)
+            {
+                return Task.FromCanceled(cancellationToken);
+            }
+
+            StreamWriter writer;
+            try
+            {
+                writer = new StreamWriter(
+                    new FileStream(path, append ? FileMode.Append : FileMode.Create, FileAccess.Write, FileShare.Read, DefaultBufferSize, FileOptions.Asynchronous),
+                    encoding);
+            }
+            catch (Exception e)
+            {
+                return Task.FromException(e);
+            }
+
+            return InternalWriteAllLinesAsync(writer, contents, cancellationToken);
         }
 
         private static async Task InternalWriteAllLinesAsync(StreamWriter writer, IEnumerable<string> contents, CancellationToken cancellationToken)
@@ -1159,14 +1172,8 @@ private static async Task InternalWriteAllLinesAsync(StreamWriter writer, IEnume
         public static Task AppendAllLinesAsync(string path, IEnumerable<string> contents, CancellationToken cancellationToken = default(CancellationToken))
             => AppendAllLinesAsync(path, contents, UTF8NoBOM, cancellationToken);
 
-        public static Task AppendAllLinesAsync(string path, IEnumerable<string> contents, Encoding encoding, CancellationToken cancellationToken = default(CancellationToken))
-        {
-            Validate(path, encoding);
-            ArgumentNullException.ThrowIfNull(contents);
-            return cancellationToken.IsCancellationRequested
-                ? Task.FromCanceled(cancellationToken)
-                : InternalWriteAllLinesAsync(AsyncStreamWriter(path, encoding, append: true), contents, cancellationToken);
-        }
+        public static Task AppendAllLinesAsync(string path, IEnumerable<string> contents, Encoding encoding, CancellationToken cancellationToken = default(CancellationToken)) =>
+            WriteAllLinesAsync(path, contents, encoding, append: true, cancellationToken);
 
         /// <summary>
         /// Creates a file symbolic link identified by <paramref name="path"/> that points to <paramref name="pathToTarget"/>.
diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/Path.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/IO/Path.Windows.cs
index e5ee3e8fec49..81f2e063a847 100644
--- a/src/libraries/System.Private.CoreLib/src/System/IO/Path.Windows.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/IO/Path.Windows.cs
@@ -89,7 +89,7 @@ public static string GetFullPath(string path, string basePath)
                 // Drive relative paths
                 Debug.Assert(length == 2 || !PathInternal.IsDirectorySeparator(path[2]));
 
-                if (GetVolumeName(path.AsSpan()).EqualsOrdinal(GetVolumeName(basePath.AsSpan())))
+                if (GetVolumeName(path.AsSpan()).EqualsOrdinalIgnoreCase(GetVolumeName(basePath.AsSpan())))
                 {
                     // Matching root
                     // "C:Foo" and "C:\Bar" => "C:\Bar\Foo"
@@ -349,8 +349,8 @@ internal static int GetUncRootLength(ReadOnlySpan<char> path)
             if (!isDevice && path.Slice(0, 2).EqualsOrdinal(@"\\".AsSpan()))
                 return 2;
             else if (isDevice && path.Length >= 8
-                && (path.Slice(0, 8).EqualsOrdinal(PathInternal.UncExtendedPathPrefix.AsSpan())
-                || path.Slice(5, 4).EqualsOrdinal(@"UNC\".AsSpan())))
+                && (path.Slice(0, 8).EqualsOrdinalIgnoreCase(PathInternal.UncExtendedPathPrefix.AsSpan())
+                || path.Slice(5, 4).EqualsOrdinalIgnoreCase(@"UNC\".AsSpan())))
                 return 8;
 
             return -1;
diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/StreamWriter.cs b/src/libraries/System.Private.CoreLib/src/System/IO/StreamWriter.cs
index df0c9e95cc6c..d11c1313ba84 100644
--- a/src/libraries/System.Private.CoreLib/src/System/IO/StreamWriter.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/IO/StreamWriter.cs
@@ -3,6 +3,7 @@
 
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
+using System.Globalization;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Text;
@@ -27,7 +28,7 @@ public class StreamWriter : TextWriter
         private const int MinBufferSize = 128;
 
         // Bit bucket - Null has no backing store. Non closable.
-        public static new readonly StreamWriter Null = new StreamWriter(Stream.Null, UTF8NoBOM, MinBufferSize, leaveOpen: true);
+        public static new readonly StreamWriter Null = new NullStreamWriter();
 
         private readonly Stream _stream;
         private readonly Encoding _encoding;
@@ -159,6 +160,15 @@ public StreamWriter(string path, Encoding encoding, FileStreamOptions options)
         {
         }
 
+        private StreamWriter()
+        {
+            Debug.Assert(GetType() == typeof(NullStreamWriter));
+            _stream = Stream.Null;
+            _encoding = UTF8NoBOM;
+            _encoder = null!;
+            _charBuffer = Array.Empty<char>();
+        }
+
         private static FileStream ValidateArgsAndOpenPath(string path, Encoding encoding, FileStreamOptions options)
         {
             ArgumentException.ThrowIfNullOrEmpty(path);
@@ -521,7 +531,7 @@ public override void Write([StringSyntax(StringSyntaxAttribute.CompositeFormat)]
             if (GetType() == typeof(StreamWriter))
             {
                 TwoObjects two = new TwoObjects(arg0, arg1);
-                WriteFormatHelper(format, MemoryMarshal.CreateReadOnlySpan(ref two.Arg0, 2), appendNewLine: false);
+                WriteFormatHelper(format, two, appendNewLine: false);
             }
             else
             {
@@ -534,7 +544,7 @@ public override void Write([StringSyntax(StringSyntaxAttribute.CompositeFormat)]
             if (GetType() == typeof(StreamWriter))
             {
                 ThreeObjects three = new ThreeObjects(arg0, arg1, arg2);
-                WriteFormatHelper(format, MemoryMarshal.CreateReadOnlySpan(ref three.Arg0, 3), appendNewLine: false);
+                WriteFormatHelper(format, three, appendNewLine: false);
             }
             else
             {
@@ -575,7 +585,7 @@ public override void WriteLine([StringSyntax(StringSyntaxAttribute.CompositeForm
             if (GetType() == typeof(StreamWriter))
             {
                 TwoObjects two = new TwoObjects(arg0, arg1);
-                WriteFormatHelper(format, MemoryMarshal.CreateReadOnlySpan(ref two.Arg0, 2), appendNewLine: true);
+                WriteFormatHelper(format, two, appendNewLine: true);
             }
             else
             {
@@ -588,7 +598,7 @@ public override void WriteLine([StringSyntax(StringSyntaxAttribute.CompositeForm
             if (GetType() == typeof(StreamWriter))
             {
                 ThreeObjects three = new ThreeObjects(arg0, arg1, arg2);
-                WriteFormatHelper(format, MemoryMarshal.CreateReadOnlySpan(ref three.Arg0, 3), appendNewLine: true);
+                WriteFormatHelper(format, three, appendNewLine: true);
             }
             else
             {
@@ -973,5 +983,72 @@ private void ThrowIfDisposed()
 
             void ThrowObjectDisposedException() => throw new ObjectDisposedException(GetType().Name, SR.ObjectDisposed_WriterClosed);
         }
+
+        private sealed class NullStreamWriter : StreamWriter
+        {
+            public override bool AutoFlush { get => false; set { } }
+            [AllowNull]
+            public override string NewLine { get => base.NewLine; set { } }
+            public override IFormatProvider FormatProvider => CultureInfo.InvariantCulture;
+
+            // To avoid all unnecessary overhead in the base, and to ensure StreamWriter's uninitialized state is never touched,
+            // override all methods as pure nops.
+            public override void Close() { }
+            protected override void Dispose(bool disposing) { }
+            public override ValueTask DisposeAsync() => default;
+            public override void Flush() { }
+            public override Task FlushAsync() => Task.CompletedTask;
+            public override Task FlushAsync(CancellationToken cancellationToken) => Task.CompletedTask;
+            public override void Write(char value) { }
+            public override void Write(char[]? buffer) { }
+            public override void Write(char[] buffer, int index, int count) { }
+            public override void Write(ReadOnlySpan<char> buffer) { }
+            public override void Write(bool value) { }
+            public override void Write(int value) { }
+            public override void Write(uint value) { }
+            public override void Write(long value) { }
+            public override void Write(ulong value) { }
+            public override void Write(float value) { }
+            public override void Write(double value) { }
+            public override void Write(decimal value) { }
+            public override void Write(string? value) { }
+            public override void Write(object? value) { }
+            public override void Write(StringBuilder? value) { }
+            public override void Write([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0) { }
+            public override void Write([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0, object? arg1) { }
+            public override void Write([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0, object? arg1, object? arg2) { }
+            public override void Write([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, params object?[] arg) { }
+            public override Task WriteAsync(char value) => Task.CompletedTask;
+            public override Task WriteAsync(string? value) => Task.CompletedTask;
+            public override Task WriteAsync(StringBuilder? value, CancellationToken cancellationToken = default) => Task.CompletedTask;
+            public override Task WriteAsync(char[] buffer, int index, int count) => Task.CompletedTask;
+            public override Task WriteAsync(ReadOnlyMemory<char> buffer, CancellationToken cancellationToken = default) => Task.CompletedTask;
+            public override void WriteLine() { }
+            public override void WriteLine(char value) { }
+            public override void WriteLine(char[]? buffer) { }
+            public override void WriteLine(char[] buffer, int index, int count) { }
+            public override void WriteLine(ReadOnlySpan<char> buffer) { }
+            public override void WriteLine(bool value) { }
+            public override void WriteLine(int value) { }
+            public override void WriteLine(uint value) { }
+            public override void WriteLine(long value) { }
+            public override void WriteLine(ulong value) { }
+            public override void WriteLine(float value) { }
+            public override void WriteLine(double value) { }
+            public override void WriteLine(decimal value) { }
+            public override void WriteLine(string? value) { }
+            public override void WriteLine(StringBuilder? value) { }
+            public override void WriteLine(object? value) { }
+            public override void WriteLine([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0) { }
+            public override void WriteLine([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0, object? arg1) { }
+            public override void WriteLine([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0, object? arg1, object? arg2) { }
+            public override void WriteLine([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, params object?[] arg) { }
+            public override Task WriteLineAsync(char value) => Task.CompletedTask;
+            public override Task WriteLineAsync(string? value) => Task.CompletedTask;
+            public override Task WriteLineAsync(StringBuilder? value, CancellationToken cancellationToken = default) => Task.CompletedTask;
+            public override Task WriteLineAsync(char[] buffer, int index, int count) => Task.CompletedTask;
+            public override Task WriteLineAsync(ReadOnlyMemory<char> buffer, CancellationToken cancellationToken = default) => Task.CompletedTask;
+            public override Task WriteLineAsync() => Task.CompletedTask;
+        }
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/TextWriter.cs b/src/libraries/System.Private.CoreLib/src/System/IO/TextWriter.cs
index 874638ee34fd..289ec5fac7a5 100644
--- a/src/libraries/System.Private.CoreLib/src/System/IO/TextWriter.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/IO/TextWriter.cs
@@ -672,6 +672,8 @@ internal NullTextWriter() { }
 
             public override IFormatProvider FormatProvider => CultureInfo.InvariantCulture;
             public override Encoding Encoding => Encoding.Unicode;
+            [AllowNull]
+            public override string NewLine { get => base.NewLine; set { } }
 
             // To avoid all unnecessary overhead in the base, override all Flush/Write methods as pure nops.
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/UnmanagedMemoryStream.cs b/src/libraries/System.Private.CoreLib/src/System/IO/UnmanagedMemoryStream.cs
index b1b18a2c3431..68adbf72bc6b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/IO/UnmanagedMemoryStream.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/IO/UnmanagedMemoryStream.cs
@@ -390,7 +390,7 @@ internal int ReadCore(Span<byte> buffer)
                     try
                     {
                         _buffer.AcquirePointer(ref pointer);
-                        Buffer.Memmove(ref MemoryMarshal.GetReference(buffer), ref *(pointer + pos + _offset), (nuint)nInt);
+                        SpanHelpers.Memmove(ref MemoryMarshal.GetReference(buffer), ref *(pointer + pos + _offset), (nuint)nInt);
                     }
                     finally
                     {
@@ -402,7 +402,7 @@ internal int ReadCore(Span<byte> buffer)
                 }
                 else
                 {
-                    Buffer.Memmove(ref MemoryMarshal.GetReference(buffer), ref *(_mem + pos), (nuint)nInt);
+                    SpanHelpers.Memmove(ref MemoryMarshal.GetReference(buffer), ref *(_mem + pos), (nuint)nInt);
                 }
             }
 
@@ -669,7 +669,7 @@ internal unsafe void WriteCore(ReadOnlySpan<byte> buffer)
                 try
                 {
                     _buffer.AcquirePointer(ref pointer);
-                    Buffer.Memmove(ref *(pointer + pos + _offset), ref MemoryMarshal.GetReference(buffer), (nuint)buffer.Length);
+                    SpanHelpers.Memmove(ref *(pointer + pos + _offset), ref MemoryMarshal.GetReference(buffer), (nuint)buffer.Length);
                 }
                 finally
                 {
@@ -681,7 +681,7 @@ internal unsafe void WriteCore(ReadOnlySpan<byte> buffer)
             }
             else
             {
-                Buffer.Memmove(ref *(_mem + pos), ref MemoryMarshal.GetReference(buffer), (nuint)buffer.Length);
+                SpanHelpers.Memmove(ref *(_mem + pos), ref MemoryMarshal.GetReference(buffer), (nuint)buffer.Length);
             }
 
             _position = n;
diff --git a/src/libraries/System.Private.CoreLib/src/System/ParamsArray.cs b/src/libraries/System.Private.CoreLib/src/System/InlineArrays.cs
similarity index 63%
rename from src/libraries/System.Private.CoreLib/src/System/ParamsArray.cs
rename to src/libraries/System.Private.CoreLib/src/System/InlineArrays.cs
index b1dde8c03013..78394062ec94 100644
--- a/src/libraries/System.Private.CoreLib/src/System/ParamsArray.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/InlineArrays.cs
@@ -1,13 +1,6 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-// These types are temporary workarounds for an inability to stackalloc object references.
-// Once we're able to do `stackalloc object[n]`, these can be removed.
-
-// Suppress warnings for unused private fields
-#pragma warning disable CS0169, CA1823, IDE0051, IDE0044
-
-using System.Diagnostics.CodeAnalysis;
 using System.Runtime.CompilerServices;
 
 namespace System
@@ -15,7 +8,7 @@ namespace System
     [InlineArray(2)]
     internal struct TwoObjects
     {
-        internal object? Arg0;
+        private object? _arg0;
 
         public TwoObjects(object? arg0, object? arg1)
         {
@@ -27,7 +20,7 @@ public TwoObjects(object? arg0, object? arg1)
     [InlineArray(3)]
     internal struct ThreeObjects
     {
-        internal object? Arg0;
+        private object? _arg0;
 
         public ThreeObjects(object? arg0, object? arg1, object? arg2)
         {
@@ -36,4 +29,10 @@ public ThreeObjects(object? arg0, object? arg1, object? arg2)
             this[2] = arg2;
         }
     }
+
+    [InlineArray(8)]
+    internal struct EightObjects
+    {
+        private object? _ref0;
+    }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Int128.cs b/src/libraries/System.Private.CoreLib/src/System/Int128.cs
index 12cc6c7dec56..c1e6c459f7b5 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Int128.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Int128.cs
@@ -262,17 +262,12 @@ public static explicit operator Half(Int128 value)
         /// <exception cref="OverflowException"><paramref name="value" /> is not representable by <see cref="Int128" />.</exception>
         public static explicit operator checked short(Int128 value)
         {
-            if (~value._upper == 0)
-            {
-                long lower = (long)value._lower;
-                return checked((short)lower);
-            }
-
-            if (value._upper != 0)
+            long lower = (long)value._lower;
+            if ((long)value._upper != lower >> 63)
             {
                 ThrowHelper.ThrowOverflowException();
             }
-            return checked((short)value._lower);
+            return checked((short)lower);
         }
 
         /// <summary>Explicitly converts a 128-bit signed integer to a <see cref="int" /> value.</summary>
@@ -286,17 +281,12 @@ public static explicit operator checked short(Int128 value)
         /// <exception cref="OverflowException"><paramref name="value" /> is not representable by <see cref="Int128" />.</exception>
         public static explicit operator checked int(Int128 value)
         {
-            if (~value._upper == 0)
-            {
-                long lower = (long)value._lower;
-                return checked((int)lower);
-            }
-
-            if (value._upper != 0)
+            long lower = (long)value._lower;
+            if ((long)value._upper != lower >> 63)
             {
                 ThrowHelper.ThrowOverflowException();
             }
-            return checked((int)value._lower);
+            return checked((int)lower);
         }
 
         /// <summary>Explicitly converts a 128-bit signed integer to a <see cref="long" /> value.</summary>
@@ -310,17 +300,12 @@ public static explicit operator checked int(Int128 value)
         /// <exception cref="OverflowException"><paramref name="value" /> is not representable by <see cref="Int128" />.</exception>
         public static explicit operator checked long(Int128 value)
         {
-            if (~value._upper == 0)
-            {
-                long lower = (long)value._lower;
-                return lower;
-            }
-
-            if (value._upper != 0)
+            long lower = (long)value._lower;
+            if ((long)value._upper != lower >> 63)
             {
                 ThrowHelper.ThrowOverflowException();
             }
-            return checked((long)value._lower);
+            return lower;
         }
 
         /// <summary>Explicitly converts a 128-bit signed integer to a <see cref="IntPtr" /> value.</summary>
@@ -334,17 +319,12 @@ public static explicit operator checked long(Int128 value)
         /// <exception cref="OverflowException"><paramref name="value" /> is not representable by <see cref="Int128" />.</exception>
         public static explicit operator checked nint(Int128 value)
         {
-            if (~value._upper == 0)
-            {
-                long lower = (long)value._lower;
-                return checked((nint)lower);
-            }
-
-            if (value._upper != 0)
+            long lower = (long)value._lower;
+            if ((long)value._upper != lower >> 63)
             {
                 ThrowHelper.ThrowOverflowException();
             }
-            return checked((nint)value._lower);
+            return checked((nint)lower);
         }
 
         /// <summary>Explicitly converts a 128-bit signed integer to a <see cref="sbyte" /> value.</summary>
@@ -360,17 +340,12 @@ public static explicit operator checked nint(Int128 value)
         [CLSCompliant(false)]
         public static explicit operator checked sbyte(Int128 value)
         {
-            if (~value._upper == 0)
-            {
-                long lower = (long)value._lower;
-                return checked((sbyte)lower);
-            }
-
-            if (value._upper != 0)
+            long lower = (long)value._lower;
+            if ((long)value._upper != lower >> 63)
             {
                 ThrowHelper.ThrowOverflowException();
             }
-            return checked((sbyte)value._lower);
+            return checked((sbyte)lower);
         }
 
         /// <summary>Explicitly converts a 128-bit signed integer to a <see cref="float" /> value.</summary>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs
index 1d1c50a4e2b5..e2fe2051ce5c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Math.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs
@@ -151,6 +151,7 @@ internal static void ThrowNegateTwosCompOverflow()
             throw new OverflowException(SR.Overflow_NegateTwosCompNum);
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static unsafe ulong BigMul(uint a, uint b)
         {
 #if TARGET_32BIT
@@ -235,6 +236,16 @@ public static long BigMul(long a, long b, out long low)
             return (long)high - ((a >> 63) & b) - ((b >> 63) & a);
         }
 
+        /// <summary>Produces the full product of two 64-bit numbers.</summary>
+        /// <param name="a">The first number to multiply.</param>
+        /// <param name="b">The second number to multiply.</param>
+        /// <returns>The full product of the specified numbers.</returns>
+        internal static Int128 BigMul(long a, long b)
+        {
+            long high = Math.BigMul(a, b, out long low);
+            return new Int128((ulong)high, (ulong)low);
+        }
+
         public static double BitDecrement(double x)
         {
             ulong bits = BitConverter.DoubleToUInt64Bits(x);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Memory.cs b/src/libraries/System.Private.CoreLib/src/System/Memory.cs
index 25e9778d66b5..989cac29c57c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Memory.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Memory.cs
@@ -398,6 +398,7 @@ public unsafe MemoryHandle Pin()
             {
                 if (typeof(T) == typeof(char) && tmpObject is string s)
                 {
+                    // Unsafe.AsPointer is safe since the handle pins it
                     GCHandle handle = GCHandle.Alloc(tmpObject, GCHandleType.Pinned);
                     ref char stringData = ref Unsafe.Add(ref s.GetRawStringData(), _index);
                     return new MemoryHandle(Unsafe.AsPointer(ref stringData), handle);
@@ -410,11 +411,13 @@ public unsafe MemoryHandle Pin()
                     // Array is already pre-pinned
                     if (_index < 0)
                     {
+                        // Unsafe.AsPointer is safe since it's pinned
                         void* pointer = Unsafe.Add<T>(Unsafe.AsPointer(ref MemoryMarshal.GetArrayDataReference(Unsafe.As<T[]>(tmpObject))), _index & ReadOnlyMemory<T>.RemoveFlagsBitMask);
                         return new MemoryHandle(pointer);
                     }
                     else
                     {
+                        // Unsafe.AsPointer is safe since the handle pins it
                         GCHandle handle = GCHandle.Alloc(tmpObject, GCHandleType.Pinned);
                         void* pointer = Unsafe.Add<T>(Unsafe.AsPointer(ref MemoryMarshal.GetArrayDataReference(Unsafe.As<T[]>(tmpObject))), _index);
                         return new MemoryHandle(pointer, handle);
diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs
index 4039f244d77b..fac474c1b369 100644
--- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.Globalization.cs
@@ -294,6 +294,7 @@ public static int ToUpperInvariant(this ReadOnlySpan<char> source, Span<char> de
         /// <param name="span">The source span.</param>
         /// <param name="value">The sequence to compare to the end of the source span.</param>
         /// <param name="comparisonType">One of the enumeration values that determines how the <paramref name="span"/> and <paramref name="value"/> are compared.</param>
+        [Intrinsic] // Unrolled and vectorized for half-constant input (Ordinal)
         public static bool EndsWith(this ReadOnlySpan<char> span, ReadOnlySpan<char> value, StringComparison comparisonType)
         {
             string.CheckStringComparison(comparisonType);
diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs
index bffab304afb0..83ea307fc857 100644
--- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs
@@ -2573,6 +2573,7 @@ ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(value)),
         /// Determines whether the specified sequence appears at the end of the span.
         /// </summary>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [Intrinsic] // Unrolled and vectorized for half-constant input
         public static unsafe bool EndsWith<T>(this Span<T> span, ReadOnlySpan<T> value) where T : IEquatable<T>?
         {
             int spanLength = span.Length;
@@ -2597,6 +2598,7 @@ ref MemoryMarshal.GetReference(value),
         /// Determines whether the specified sequence appears at the end of the span.
         /// </summary>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [Intrinsic] // Unrolled and vectorized for half-constant input
         public static unsafe bool EndsWith<T>(this ReadOnlySpan<T> span, ReadOnlySpan<T> value) where T : IEquatable<T>?
         {
             int spanLength = span.Length;
diff --git a/src/libraries/System.Private.CoreLib/src/System/Number.Formatting.cs b/src/libraries/System.Private.CoreLib/src/System/Number.Formatting.cs
index 3bb61ebcf736..ad75d88cbda7 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Number.Formatting.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Number.Formatting.cs
@@ -269,11 +269,7 @@ internal static partial class Number
         private const int SinglePrecisionCustomFormat = 7;
         private const int DoublePrecisionCustomFormat = 15;
 
-        private const int DefaultPrecisionExponentialFormat = 6;
-
-        private const int MaxUInt32DecDigits = 10;
         private const int CharStackBufferSize = 32;
-        private const string PosNumberFormat = "#";
 
         /// <summary>The non-inclusive upper bound of <see cref="s_smallNumberCache"/>.</summary>
         /// <remarks>
@@ -293,39 +289,6 @@ internal static partial class Number
         /// <summary>Lazily-populated cache of strings for uint values in the range [0, <see cref="SmallNumberCacheLength"/>).</summary>
         private static readonly string[] s_smallNumberCache = new string[SmallNumberCacheLength];
 
-        private static readonly string[] s_posCurrencyFormats =
-        {
-            "$#", "#$", "$ #", "# $"
-        };
-
-        private static readonly string[] s_negCurrencyFormats =
-        {
-            "($#)", "-$#", "$-#", "$#-",
-            "(#$)", "-#$", "#-$", "#$-",
-            "-# $", "-$ #", "# $-", "$ #-",
-            "$ -#", "#- $", "($ #)", "(# $)",
-            "$- #"
-        };
-
-        private static readonly string[] s_posPercentFormats =
-        {
-            "# %", "#%", "%#", "% #"
-        };
-
-        private static readonly string[] s_negPercentFormats =
-        {
-            "-# %", "-#%", "-%#",
-            "%-#", "%#-",
-            "#-%", "#%-",
-            "-% #", "# %-", "% #-",
-            "% -#", "#- %"
-        };
-
-        private static readonly string[] s_negNumberFormats =
-        {
-            "(#)", "-#", "- #", "#-", "# -",
-        };
-
         // Optimizations using "TwoDigits" inspired by:
         // https://engineering.fb.com/2013/03/15/developer-tools/three-optimization-tips-for-c/
         private static readonly byte[] TwoDigitsCharsAsBytes =
@@ -407,7 +370,7 @@ public static unsafe bool TryFormatDecimal<TChar>(decimal value, ReadOnlySpan<ch
 
         internal static unsafe void DecimalToNumber(scoped ref decimal d, ref NumberBuffer number)
         {
-            byte* buffer = number.GetDigitsPointer();
+            byte* buffer = number.DigitsPtr;
             number.DigitsCount = DecimalPrecision;
             number.IsNegative = decimal.IsNegative(d);
 
@@ -423,7 +386,7 @@ internal static unsafe void DecimalToNumber(scoped ref decimal d, ref NumberBuff
             number.DigitsCount = i;
             number.Scale = i - d.Scale;
 
-            byte* dst = number.GetDigitsPointer();
+            byte* dst = number.DigitsPtr;
             while (--i >= 0)
             {
                 *dst++ = *p++;
@@ -1627,7 +1590,7 @@ private static unsafe void Int32ToNumber(int value, ref NumberBuffer number)
                 value = -value;
             }
 
-            byte* buffer = number.GetDigitsPointer();
+            byte* buffer = number.DigitsPtr;
             byte* p = UInt32ToDecChars(buffer + Int32Precision, (uint)value, 0);
 
             int i = (int)(buffer + Int32Precision - p);
@@ -1635,7 +1598,7 @@ private static unsafe void Int32ToNumber(int value, ref NumberBuffer number)
             number.DigitsCount = i;
             number.Scale = i;
 
-            byte* dst = number.GetDigitsPointer();
+            byte* dst = number.DigitsPtr;
             while (--i >= 0)
             {
                 *dst++ = *p++;
@@ -1824,7 +1787,7 @@ private static unsafe void UInt32ToNumber(uint value, ref NumberBuffer number)
             number.DigitsCount = UInt32Precision;
             number.IsNegative = false;
 
-            byte* buffer = number.GetDigitsPointer();
+            byte* buffer = number.DigitsPtr;
             byte* p = UInt32ToDecChars(buffer + UInt32Precision, value, 0);
 
             int i = (int)(buffer + UInt32Precision - p);
@@ -1832,7 +1795,7 @@ private static unsafe void UInt32ToNumber(uint value, ref NumberBuffer number)
             number.DigitsCount = i;
             number.Scale = i;
 
-            byte* dst = number.GetDigitsPointer();
+            byte* dst = number.DigitsPtr;
             while (--i >= 0)
             {
                 *dst++ = *p++;
@@ -2058,7 +2021,7 @@ private static unsafe void Int64ToNumber(long value, ref NumberBuffer number)
                 value = -value;
             }
 
-            byte* buffer = number.GetDigitsPointer();
+            byte* buffer = number.DigitsPtr;
             byte* p = UInt64ToDecChars(buffer + Int64Precision, (ulong)value, 0);
 
             int i = (int)(buffer + Int64Precision - p);
@@ -2066,7 +2029,7 @@ private static unsafe void Int64ToNumber(long value, ref NumberBuffer number)
             number.DigitsCount = i;
             number.Scale = i;
 
-            byte* dst = number.GetDigitsPointer();
+            byte* dst = number.DigitsPtr;
             while (--i >= 0)
             {
                 *dst++ = *p++;
@@ -2289,7 +2252,7 @@ private static unsafe void UInt64ToNumber(ulong value, ref NumberBuffer number)
             number.DigitsCount = UInt64Precision;
             number.IsNegative = false;
 
-            byte* buffer = number.GetDigitsPointer();
+            byte* buffer = number.DigitsPtr;
             byte* p = UInt64ToDecChars(buffer + UInt64Precision, value, 0);
 
             int i = (int)(buffer + UInt64Precision - p);
@@ -2297,7 +2260,7 @@ private static unsafe void UInt64ToNumber(ulong value, ref NumberBuffer number)
             number.DigitsCount = i;
             number.Scale = i;
 
-            byte* dst = number.GetDigitsPointer();
+            byte* dst = number.DigitsPtr;
             while (--i >= 0)
             {
                 *dst++ = *p++;
@@ -2484,7 +2447,7 @@ private static unsafe void Int128ToNumber(Int128 value, ref NumberBuffer number)
                 value = -value;
             }
 
-            byte* buffer = number.GetDigitsPointer();
+            byte* buffer = number.DigitsPtr;
             byte* p = UInt128ToDecChars(buffer + Int128Precision, (UInt128)value, 0);
 
             int i = (int)(buffer + Int128Precision - p);
@@ -2492,7 +2455,7 @@ private static unsafe void Int128ToNumber(Int128 value, ref NumberBuffer number)
             number.DigitsCount = i;
             number.Scale = i;
 
-            byte* dst = number.GetDigitsPointer();
+            byte* dst = number.DigitsPtr;
             while (--i >= 0)
             {
                 *dst++ = *p++;
@@ -2701,7 +2664,7 @@ private static unsafe void UInt128ToNumber(UInt128 value, ref NumberBuffer numbe
             number.DigitsCount = UInt128Precision;
             number.IsNegative = false;
 
-            byte* buffer = number.GetDigitsPointer();
+            byte* buffer = number.DigitsPtr;
             byte* p = UInt128ToDecChars(buffer + UInt128Precision, value, 0);
 
             int i = (int)(buffer + UInt128Precision - p);
@@ -2709,7 +2672,7 @@ private static unsafe void UInt128ToNumber(UInt128 value, ref NumberBuffer numbe
             number.DigitsCount = i;
             number.Scale = i;
 
-            byte* dst = number.GetDigitsPointer();
+            byte* dst = number.DigitsPtr;
             while (--i >= 0)
             {
                 *dst++ = *p++;
@@ -2811,1117 +2774,6 @@ private static unsafe bool TryUInt128ToDecStr<TChar>(UInt128 value, int digits,
             return false;
         }
 
-        internal static unsafe char ParseFormatSpecifier(ReadOnlySpan<char> format, out int digits)
-        {
-            char c = default;
-            if (format.Length > 0)
-            {
-                // If the format begins with a symbol, see if it's a standard format
-                // with or without a specified number of digits.
-                c = format[0];
-                if (char.IsAsciiLetter(c))
-                {
-                    // Fast path for sole symbol, e.g. "D"
-                    if (format.Length == 1)
-                    {
-                        digits = -1;
-                        return c;
-                    }
-
-                    if (format.Length == 2)
-                    {
-                        // Fast path for symbol and single digit, e.g. "X4"
-                        int d = format[1] - '0';
-                        if ((uint)d < 10)
-                        {
-                            digits = d;
-                            return c;
-                        }
-                    }
-                    else if (format.Length == 3)
-                    {
-                        // Fast path for symbol and double digit, e.g. "F12"
-                        int d1 = format[1] - '0', d2 = format[2] - '0';
-                        if ((uint)d1 < 10 && (uint)d2 < 10)
-                        {
-                            digits = d1 * 10 + d2;
-                            return c;
-                        }
-                    }
-
-                    // Fallback for symbol and any length digits.  The digits value must be >= 0 && <= 999_999_999,
-                    // but it can begin with any number of 0s, and thus we may need to check more than 9
-                    // digits.  Further, for compat, we need to stop when we hit a null char.
-                    int n = 0;
-                    int i = 1;
-                    while ((uint)i < (uint)format.Length && char.IsAsciiDigit(format[i]))
-                    {
-                        // Check if we are about to overflow past our limit of 9 digits
-                        if (n >= 100_000_000)
-                        {
-                            ThrowHelper.ThrowFormatException_BadFormatSpecifier();
-                        }
-                        n = (n * 10) + format[i++] - '0';
-                    }
-
-                    // If we're at the end of the digits rather than having stopped because we hit something
-                    // other than a digit or overflowed, return the standard format info.
-                    if ((uint)i >= (uint)format.Length || format[i] == '\0')
-                    {
-                        digits = n;
-                        return c;
-                    }
-                }
-            }
-
-            // Default empty format to be "G"; custom format is signified with '\0'.
-            digits = -1;
-            return format.Length == 0 || c == '\0' ? // For compat, treat '\0' as the end of the specifier, even if the specifier extends beyond it.
-                'G' :
-                '\0';
-        }
-
-        internal static unsafe void NumberToString<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, char format, int nMaxDigits, NumberFormatInfo info) where TChar : unmanaged, IUtfChar<TChar>
-        {
-            Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte));
-
-            number.CheckConsistency();
-            bool isCorrectlyRounded = (number.Kind == NumberBufferKind.FloatingPoint);
-
-            switch (format)
-            {
-                case 'C':
-                case 'c':
-                    {
-                        if (nMaxDigits < 0)
-                        {
-                            nMaxDigits = info.CurrencyDecimalDigits;
-                        }
-
-                        RoundNumber(ref number, number.Scale + nMaxDigits, isCorrectlyRounded); // Don't change this line to use digPos since digCount could have its sign changed.
-
-                        FormatCurrency(ref vlb, ref number, nMaxDigits, info);
-
-                        break;
-                    }
-
-                case 'F':
-                case 'f':
-                    {
-                        if (nMaxDigits < 0)
-                        {
-                            nMaxDigits = info.NumberDecimalDigits;
-                        }
-
-                        RoundNumber(ref number, number.Scale + nMaxDigits, isCorrectlyRounded);
-
-                        if (number.IsNegative)
-                        {
-                            vlb.Append(info.NegativeSignTChar<TChar>());
-                        }
-
-                        FormatFixed(ref vlb, ref number, nMaxDigits, null, info.NumberDecimalSeparatorTChar<TChar>(), null);
-
-                        break;
-                    }
-
-                case 'N':
-                case 'n':
-                    {
-                        if (nMaxDigits < 0)
-                        {
-                            nMaxDigits = info.NumberDecimalDigits; // Since we are using digits in our calculation
-                        }
-
-                        RoundNumber(ref number, number.Scale + nMaxDigits, isCorrectlyRounded);
-
-                        FormatNumber(ref vlb, ref number, nMaxDigits, info);
-
-                        break;
-                    }
-
-                case 'E':
-                case 'e':
-                    {
-                        if (nMaxDigits < 0)
-                        {
-                            nMaxDigits = DefaultPrecisionExponentialFormat;
-                        }
-                        nMaxDigits++;
-
-                        RoundNumber(ref number, nMaxDigits, isCorrectlyRounded);
-
-                        if (number.IsNegative)
-                        {
-                            vlb.Append(info.NegativeSignTChar<TChar>());
-                        }
-
-                        FormatScientific(ref vlb, ref number, nMaxDigits, info, format);
-
-                        break;
-                    }
-
-                case 'G':
-                case 'g':
-                    {
-                        bool noRounding = false;
-                        if (nMaxDigits < 1)
-                        {
-                            if ((number.Kind == NumberBufferKind.Decimal) && (nMaxDigits == -1))
-                            {
-                                noRounding = true;  // Turn off rounding for ECMA compliance to output trailing 0's after decimal as significant
-
-                                if (number.Digits[0] == 0)
-                                {
-                                    // -0 should be formatted as 0 for decimal. This is normally handled by RoundNumber (which we are skipping)
-                                    goto SkipSign;
-                                }
-
-                                goto SkipRounding;
-                            }
-                            else
-                            {
-                                // This ensures that the PAL code pads out to the correct place even when we use the default precision
-                                nMaxDigits = number.DigitsCount;
-                            }
-                        }
-
-                        RoundNumber(ref number, nMaxDigits, isCorrectlyRounded);
-
-                    SkipRounding:
-                        if (number.IsNegative)
-                        {
-                            vlb.Append(info.NegativeSignTChar<TChar>());
-                        }
-
-                        SkipSign:
-                        FormatGeneral(ref vlb, ref number, nMaxDigits, info, (char)(format - ('G' - 'E')), noRounding);
-
-                        break;
-                    }
-
-                case 'P':
-                case 'p':
-                    {
-                        if (nMaxDigits < 0)
-                        {
-                            nMaxDigits = info.PercentDecimalDigits;
-                        }
-                        number.Scale += 2;
-
-                        RoundNumber(ref number, number.Scale + nMaxDigits, isCorrectlyRounded);
-
-                        FormatPercent(ref vlb, ref number, nMaxDigits, info);
-
-                        break;
-                    }
-
-                case 'R':
-                case 'r':
-                    {
-                        format = (char)(format - ('R' - 'G'));
-                        Debug.Assert(format is 'G' or 'g');
-                        goto case 'G';
-                    }
-
-                default:
-                    ThrowHelper.ThrowFormatException_BadFormatSpecifier();
-                    break;
-            }
-        }
-
-        internal static unsafe void NumberToStringFormat<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, ReadOnlySpan<char> format, NumberFormatInfo info) where TChar : unmanaged, IUtfChar<TChar>
-        {
-            Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte));
-
-            number.CheckConsistency();
-
-            int digitCount;
-            int decimalPos;
-            int firstDigit;
-            int lastDigit;
-            int digPos;
-            bool scientific;
-            int thousandPos;
-            int thousandCount = 0;
-            bool thousandSeps;
-            int scaleAdjust;
-            int adjust;
-
-            int section;
-            int src;
-            byte* dig = number.GetDigitsPointer();
-            char ch;
-
-            section = FindSection(format, dig[0] == 0 ? 2 : number.IsNegative ? 1 : 0);
-
-            while (true)
-            {
-                digitCount = 0;
-                decimalPos = -1;
-                firstDigit = 0x7FFFFFFF;
-                lastDigit = 0;
-                scientific = false;
-                thousandPos = -1;
-                thousandSeps = false;
-                scaleAdjust = 0;
-                src = section;
-
-                fixed (char* pFormat = &MemoryMarshal.GetReference(format))
-                {
-                    while (src < format.Length && (ch = pFormat[src++]) != 0 && ch != ';')
-                    {
-                        switch (ch)
-                        {
-                            case '#':
-                                digitCount++;
-                                break;
-
-                            case '0':
-                                if (firstDigit == 0x7FFFFFFF)
-                                {
-                                    firstDigit = digitCount;
-                                }
-                                digitCount++;
-                                lastDigit = digitCount;
-                                break;
-
-                            case '.':
-                                if (decimalPos < 0)
-                                {
-                                    decimalPos = digitCount;
-                                }
-                                break;
-
-                            case ',':
-                                if (digitCount > 0 && decimalPos < 0)
-                                {
-                                    if (thousandPos >= 0)
-                                    {
-                                        if (thousandPos == digitCount)
-                                        {
-                                            thousandCount++;
-                                            break;
-                                        }
-                                        thousandSeps = true;
-                                    }
-                                    thousandPos = digitCount;
-                                    thousandCount = 1;
-                                }
-                                break;
-
-                            case '%':
-                                scaleAdjust += 2;
-                                break;
-
-                            case '\x2030':
-                                scaleAdjust += 3;
-                                break;
-
-                            case '\'':
-                            case '"':
-                                while (src < format.Length && pFormat[src] != 0 && pFormat[src++] != ch);
-                                break;
-
-                            case '\\':
-                                if (src < format.Length && pFormat[src] != 0)
-                                {
-                                    src++;
-                                }
-                                break;
-
-                            case 'E':
-                            case 'e':
-                                if ((src < format.Length && pFormat[src] == '0') ||
-                                    (src + 1 < format.Length && (pFormat[src] == '+' || pFormat[src] == '-') && pFormat[src + 1] == '0'))
-                                {
-                                    while (++src < format.Length && pFormat[src] == '0');
-                                    scientific = true;
-                                }
-                                break;
-                        }
-                    }
-                }
-
-                if (decimalPos < 0)
-                {
-                    decimalPos = digitCount;
-                }
-
-                if (thousandPos >= 0)
-                {
-                    if (thousandPos == decimalPos)
-                    {
-                        scaleAdjust -= thousandCount * 3;
-                    }
-                    else
-                    {
-                        thousandSeps = true;
-                    }
-                }
-
-                if (dig[0] != 0)
-                {
-                    number.Scale += scaleAdjust;
-                    int pos = scientific ? digitCount : number.Scale + digitCount - decimalPos;
-                    RoundNumber(ref number, pos, isCorrectlyRounded: false);
-                    if (dig[0] == 0)
-                    {
-                        src = FindSection(format, 2);
-                        if (src != section)
-                        {
-                            section = src;
-                            continue;
-                        }
-                    }
-                }
-                else
-                {
-                    if (number.Kind != NumberBufferKind.FloatingPoint)
-                    {
-                        // The integer types don't have a concept of -0 and decimal always format -0 as 0
-                        number.IsNegative = false;
-                    }
-                    number.Scale = 0;      // Decimals with scale ('0.00') should be rounded.
-                }
-
-                break;
-            }
-
-            firstDigit = firstDigit < decimalPos ? decimalPos - firstDigit : 0;
-            lastDigit = lastDigit > decimalPos ? decimalPos - lastDigit : 0;
-            if (scientific)
-            {
-                digPos = decimalPos;
-                adjust = 0;
-            }
-            else
-            {
-                digPos = number.Scale > decimalPos ? number.Scale : decimalPos;
-                adjust = number.Scale - decimalPos;
-            }
-            src = section;
-
-            // Adjust can be negative, so we make this an int instead of an unsigned int.
-            // Adjust represents the number of characters over the formatting e.g. format string is "0000" and you are trying to
-            // format 100000 (6 digits). Means adjust will be 2. On the other hand if you are trying to format 10 adjust will be
-            // -2 and we'll need to fixup these digits with 0 padding if we have 0 formatting as in this example.
-            Span<int> thousandsSepPos = stackalloc int[4];
-            int thousandsSepCtr = -1;
-
-            if (thousandSeps)
-            {
-                // We need to precompute this outside the number formatting loop
-                if (info.NumberGroupSeparator.Length > 0)
-                {
-                    // We need this array to figure out where to insert the thousands separator. We would have to traverse the string
-                    // backwards. PIC formatting always traverses forwards. These indices are precomputed to tell us where to insert
-                    // the thousands separator so we can get away with traversing forwards. Note we only have to compute up to digPos.
-                    // The max is not bound since you can have formatting strings of the form "000,000..", and this
-                    // should handle that case too.
-
-                    int[] groupDigits = info._numberGroupSizes;
-
-                    int groupSizeIndex = 0;     // Index into the groupDigits array.
-                    int groupTotalSizeCount = 0;
-                    int groupSizeLen = groupDigits.Length;    // The length of groupDigits array.
-                    if (groupSizeLen != 0)
-                    {
-                        groupTotalSizeCount = groupDigits[groupSizeIndex];   // The current running total of group size.
-                    }
-                    int groupSize = groupTotalSizeCount;
-
-                    int totalDigits = digPos + ((adjust < 0) ? adjust : 0); // Actual number of digits in o/p
-                    int numDigits = (firstDigit > totalDigits) ? firstDigit : totalDigits;
-                    while (numDigits > groupTotalSizeCount)
-                    {
-                        if (groupSize == 0)
-                        {
-                            break;
-                        }
-
-                        ++thousandsSepCtr;
-                        if (thousandsSepCtr >= thousandsSepPos.Length)
-                        {
-                            var newThousandsSepPos = new int[thousandsSepPos.Length * 2];
-                            thousandsSepPos.CopyTo(newThousandsSepPos);
-                            thousandsSepPos = newThousandsSepPos;
-                        }
-
-                        thousandsSepPos[thousandsSepCtr] = groupTotalSizeCount;
-                        if (groupSizeIndex < groupSizeLen - 1)
-                        {
-                            groupSizeIndex++;
-                            groupSize = groupDigits[groupSizeIndex];
-                        }
-                        groupTotalSizeCount += groupSize;
-                    }
-                }
-            }
-
-            if (number.IsNegative && (section == 0) && (number.Scale != 0))
-            {
-                vlb.Append(info.NegativeSignTChar<TChar>());
-            }
-
-            bool decimalWritten = false;
-
-            fixed (char* pFormat = &MemoryMarshal.GetReference(format))
-            {
-                byte* cur = dig;
-
-                while (src < format.Length && (ch = pFormat[src++]) != 0 && ch != ';')
-                {
-                    if (adjust > 0)
-                    {
-                        switch (ch)
-                        {
-                            case '#':
-                            case '0':
-                            case '.':
-                                while (adjust > 0)
-                                {
-                                    // digPos will be one greater than thousandsSepPos[thousandsSepCtr] since we are at
-                                    // the character after which the groupSeparator needs to be appended.
-                                    vlb.Append(TChar.CastFrom(*cur != 0 ? (char)(*cur++) : '0'));
-                                    if (thousandSeps && digPos > 1 && thousandsSepCtr >= 0)
-                                    {
-                                        if (digPos == thousandsSepPos[thousandsSepCtr] + 1)
-                                        {
-                                            vlb.Append(info.NumberGroupSeparatorTChar<TChar>());
-                                            thousandsSepCtr--;
-                                        }
-                                    }
-                                    digPos--;
-                                    adjust--;
-                                }
-                                break;
-                        }
-                    }
-
-                    switch (ch)
-                    {
-                        case '#':
-                        case '0':
-                            {
-                                if (adjust < 0)
-                                {
-                                    adjust++;
-                                    ch = digPos <= firstDigit ? '0' : '\0';
-                                }
-                                else
-                                {
-                                    ch = *cur != 0 ? (char)(*cur++) : digPos > lastDigit ? '0' : '\0';
-                                }
-
-                                if (ch != 0)
-                                {
-                                    vlb.Append(TChar.CastFrom(ch));
-                                    if (thousandSeps && digPos > 1 && thousandsSepCtr >= 0)
-                                    {
-                                        if (digPos == thousandsSepPos[thousandsSepCtr] + 1)
-                                        {
-                                            vlb.Append(info.NumberGroupSeparatorTChar<TChar>());
-                                            thousandsSepCtr--;
-                                        }
-                                    }
-                                }
-
-                                digPos--;
-                                break;
-                            }
-
-                        case '.':
-                            {
-                                if (digPos != 0 || decimalWritten)
-                                {
-                                    // For compatibility, don't echo repeated decimals
-                                    break;
-                                }
-
-                                // If the format has trailing zeros or the format has a decimal and digits remain
-                                if (lastDigit < 0 || (decimalPos < digitCount && *cur != 0))
-                                {
-                                    vlb.Append(info.NumberDecimalSeparatorTChar<TChar>());
-                                    decimalWritten = true;
-                                }
-                                break;
-                            }
-
-                        case '\x2030':
-                            vlb.Append(info.PerMilleSymbolTChar<TChar>());
-                            break;
-
-                        case '%':
-                            vlb.Append(info.PercentSymbolTChar<TChar>());
-                            break;
-
-                        case ',':
-                            break;
-
-                        case '\'':
-                        case '"':
-                            while (src < format.Length && pFormat[src] != 0 && pFormat[src] != ch)
-                            {
-                                AppendUnknownChar(ref vlb, pFormat[src++]);
-                            }
-
-                            if (src < format.Length && pFormat[src] != 0)
-                            {
-                                src++;
-                            }
-                            break;
-
-                        case '\\':
-                            if (src < format.Length && pFormat[src] != 0)
-                            {
-                                AppendUnknownChar(ref vlb, pFormat[src++]);
-                            }
-                            break;
-
-                        case 'E':
-                        case 'e':
-                            {
-                                bool positiveSign = false;
-                                int i = 0;
-                                if (scientific)
-                                {
-                                    if (src < format.Length && pFormat[src] == '0')
-                                    {
-                                        // Handles E0, which should format the same as E-0
-                                        i++;
-                                    }
-                                    else if (src + 1 < format.Length && pFormat[src] == '+' && pFormat[src + 1] == '0')
-                                    {
-                                        // Handles E+0
-                                        positiveSign = true;
-                                    }
-                                    else if (src + 1 < format.Length && pFormat[src] == '-' && pFormat[src + 1] == '0')
-                                    {
-                                        // Handles E-0
-                                        // Do nothing, this is just a place holder s.t. we don't break out of the loop.
-                                    }
-                                    else
-                                    {
-                                        vlb.Append(TChar.CastFrom(ch));
-                                        break;
-                                    }
-
-                                    while (++src < format.Length && pFormat[src] == '0')
-                                    {
-                                        i++;
-                                    }
-
-                                    if (i > 10)
-                                    {
-                                        i = 10;
-                                    }
-
-                                    int exp = dig[0] == 0 ? 0 : number.Scale - decimalPos;
-                                    FormatExponent(ref vlb, info, exp, ch, i, positiveSign);
-                                    scientific = false;
-                                }
-                                else
-                                {
-                                    vlb.Append(TChar.CastFrom(ch));
-                                    if (src < format.Length)
-                                    {
-                                        if (pFormat[src] == '+' || pFormat[src] == '-')
-                                        {
-                                            AppendUnknownChar(ref vlb, pFormat[src++]);
-                                        }
-
-                                        while (src < format.Length && pFormat[src] == '0')
-                                        {
-                                            AppendUnknownChar(ref vlb, pFormat[src++]);
-                                        }
-                                    }
-                                }
-                                break;
-                            }
-
-                        default:
-                            AppendUnknownChar(ref vlb, ch);
-                            break;
-                    }
-                }
-            }
-
-            if (number.IsNegative && (section == 0) && (number.Scale == 0) && (vlb.Length > 0))
-            {
-                vlb.Insert(0, info.NegativeSignTChar<TChar>());
-            }
-        }
-
-        private static void FormatCurrency<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, int nMaxDigits, NumberFormatInfo info) where TChar : unmanaged, IUtfChar<TChar>
-        {
-            Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte));
-
-            string fmt = number.IsNegative ?
-                s_negCurrencyFormats[info.CurrencyNegativePattern] :
-                s_posCurrencyFormats[info.CurrencyPositivePattern];
-
-            foreach (char ch in fmt)
-            {
-                switch (ch)
-                {
-                    case '#':
-                        FormatFixed(ref vlb, ref number, nMaxDigits, info._currencyGroupSizes, info.CurrencyDecimalSeparatorTChar<TChar>(), info.CurrencyGroupSeparatorTChar<TChar>());
-                        break;
-
-                    case '-':
-                        vlb.Append(info.NegativeSignTChar<TChar>());
-                        break;
-
-                    case '$':
-                        vlb.Append(info.CurrencySymbolTChar<TChar>());
-                        break;
-
-                    default:
-                        vlb.Append(TChar.CastFrom(ch));
-                        break;
-                }
-            }
-        }
-
-        private static unsafe void FormatFixed<TChar>(
-            ref ValueListBuilder<TChar> vlb, ref NumberBuffer number,
-            int nMaxDigits, int[]? groupDigits,
-            ReadOnlySpan<TChar> sDecimal, ReadOnlySpan<TChar> sGroup) where TChar : unmanaged, IUtfChar<TChar>
-        {
-            Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte));
-
-            int digPos = number.Scale;
-            byte* dig = number.GetDigitsPointer();
-
-            if (digPos > 0)
-            {
-                if (groupDigits != null)
-                {
-                    Debug.Assert(sGroup != null, "Must be null when groupDigits != null");
-                    int groupSizeIndex = 0;                             // Index into the groupDigits array.
-                    int bufferSize = digPos;                            // The length of the result buffer string.
-                    int groupSize = 0;                                  // The current group size.
-
-                    // Find out the size of the string buffer for the result.
-                    if (groupDigits.Length != 0) // You can pass in 0 length arrays
-                    {
-                        int groupSizeCount = groupDigits[groupSizeIndex];   // The current total of group size.
-
-                        while (digPos > groupSizeCount)
-                        {
-                            groupSize = groupDigits[groupSizeIndex];
-                            if (groupSize == 0)
-                            {
-                                break;
-                            }
-
-                            bufferSize += sGroup.Length;
-                            if (groupSizeIndex < groupDigits.Length - 1)
-                            {
-                                groupSizeIndex++;
-                            }
-
-                            groupSizeCount += groupDigits[groupSizeIndex];
-                            if ((groupSizeCount | bufferSize) < 0)
-                            {
-                                ThrowHelper.ThrowArgumentOutOfRangeException(); // If we overflow
-                            }
-                        }
-
-                        groupSize = groupSizeCount == 0 ? 0 : groupDigits[0]; // If you passed in an array with one entry as 0, groupSizeCount == 0
-                    }
-
-                    groupSizeIndex = 0;
-                    int digitCount = 0;
-                    int digLength = number.DigitsCount;
-                    int digStart = (digPos < digLength) ? digPos : digLength;
-                    fixed (TChar* spanPtr = &MemoryMarshal.GetReference(vlb.AppendSpan(bufferSize)))
-                    {
-                        TChar* p = spanPtr + bufferSize - 1;
-                        for (int i = digPos - 1; i >= 0; i--)
-                        {
-                            *(p--) = TChar.CastFrom((i < digStart) ? (char)dig[i] : '0');
-
-                            if (groupSize > 0)
-                            {
-                                digitCount++;
-                                if ((digitCount == groupSize) && (i != 0))
-                                {
-                                    for (int j = sGroup.Length - 1; j >= 0; j--)
-                                    {
-                                        *(p--) = sGroup[j];
-                                    }
-
-                                    if (groupSizeIndex < groupDigits.Length - 1)
-                                    {
-                                        groupSizeIndex++;
-                                        groupSize = groupDigits[groupSizeIndex];
-                                    }
-                                    digitCount = 0;
-                                }
-                            }
-                        }
-
-                        Debug.Assert(p >= spanPtr - 1, "Underflow");
-                        dig += digStart;
-                    }
-                }
-                else
-                {
-                    do
-                    {
-                        vlb.Append(TChar.CastFrom(*dig != 0 ? (char)(*dig++) : '0'));
-                    }
-                    while (--digPos > 0);
-                }
-            }
-            else
-            {
-                vlb.Append(TChar.CastFrom('0'));
-            }
-
-            if (nMaxDigits > 0)
-            {
-                Debug.Assert(sDecimal != null);
-                vlb.Append(sDecimal);
-                if ((digPos < 0) && (nMaxDigits > 0))
-                {
-                    int zeroes = Math.Min(-digPos, nMaxDigits);
-                    for (int i = 0; i < zeroes; i++)
-                    {
-                        vlb.Append(TChar.CastFrom('0'));
-                    }
-                    digPos += zeroes;
-                    nMaxDigits -= zeroes;
-                }
-
-                while (nMaxDigits > 0)
-                {
-                    vlb.Append(TChar.CastFrom((*dig != 0) ? (char)(*dig++) : '0'));
-                    nMaxDigits--;
-                }
-            }
-        }
-
-        /// <summary>Appends a char to the builder when the char is not known to be ASCII.</summary>
-        /// <remarks>This requires a helper as if the character isn't ASCII, for UTF-8 encoding it will result in multiple bytes added.</remarks>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static void AppendUnknownChar<TChar>(ref ValueListBuilder<TChar> vlb, char ch) where TChar : unmanaged, IUtfChar<TChar>
-        {
-            Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte));
-
-            if (typeof(TChar) == typeof(char) || char.IsAscii(ch))
-            {
-                vlb.Append(TChar.CastFrom(ch));
-            }
-            else
-            {
-                AppendNonAsciiBytes(ref vlb, ch);
-            }
-
-            [MethodImpl(MethodImplOptions.NoInlining)]
-            static void AppendNonAsciiBytes(ref ValueListBuilder<TChar> vlb, char ch)
-            {
-                var r = new Rune(ch);
-                r.EncodeToUtf8(MemoryMarshal.AsBytes(vlb.AppendSpan(r.Utf8SequenceLength)));
-            }
-        }
-
-        private static void FormatNumber<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, int nMaxDigits, NumberFormatInfo info) where TChar : unmanaged, IUtfChar<TChar>
-        {
-            Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte));
-
-            string fmt = number.IsNegative ?
-                s_negNumberFormats[info.NumberNegativePattern] :
-                PosNumberFormat;
-
-            foreach (char ch in fmt)
-            {
-                switch (ch)
-                {
-                    case '#':
-                        FormatFixed(ref vlb, ref number, nMaxDigits, info._numberGroupSizes, info.NumberDecimalSeparatorTChar<TChar>(), info.NumberGroupSeparatorTChar<TChar>());
-                        break;
-
-                    case '-':
-                        vlb.Append(info.NegativeSignTChar<TChar>());
-                        break;
-
-                    default:
-                        vlb.Append(TChar.CastFrom(ch));
-                        break;
-                }
-            }
-        }
-
-        private static unsafe void FormatScientific<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, int nMaxDigits, NumberFormatInfo info, char expChar) where TChar : unmanaged, IUtfChar<TChar>
-        {
-            Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte));
-
-            byte* dig = number.GetDigitsPointer();
-
-            vlb.Append(TChar.CastFrom((*dig != 0) ? (char)(*dig++) : '0'));
-
-            if (nMaxDigits != 1) // For E0 we would like to suppress the decimal point
-            {
-                vlb.Append(info.NumberDecimalSeparatorTChar<TChar>());
-            }
-
-            while (--nMaxDigits > 0)
-            {
-                vlb.Append(TChar.CastFrom((*dig != 0) ? (char)(*dig++) : '0'));
-            }
-
-            int e = number.Digits[0] == 0 ? 0 : number.Scale - 1;
-            FormatExponent(ref vlb, info, e, expChar, 3, true);
-        }
-
-        private static unsafe void FormatExponent<TChar>(ref ValueListBuilder<TChar> vlb, NumberFormatInfo info, int value, char expChar, int minDigits, bool positiveSign) where TChar : unmanaged, IUtfChar<TChar>
-        {
-            Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte));
-
-            vlb.Append(TChar.CastFrom(expChar));
-
-            if (value < 0)
-            {
-                vlb.Append(info.NegativeSignTChar<TChar>());
-                value = -value;
-            }
-            else
-            {
-                if (positiveSign)
-                {
-                    vlb.Append(info.PositiveSignTChar<TChar>());
-                }
-            }
-
-            TChar* digits = stackalloc TChar[MaxUInt32DecDigits];
-            TChar* p = UInt32ToDecChars(digits + MaxUInt32DecDigits, (uint)value, minDigits);
-            vlb.Append(new ReadOnlySpan<TChar>(p, (int)(digits + MaxUInt32DecDigits - p)));
-        }
-
-        private static unsafe void FormatGeneral<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, int nMaxDigits, NumberFormatInfo info, char expChar, bool suppressScientific) where TChar : unmanaged, IUtfChar<TChar>
-        {
-            Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte));
-
-            int digPos = number.Scale;
-            bool scientific = false;
-
-            if (!suppressScientific)
-            {
-                // Don't switch to scientific notation
-                if (digPos > nMaxDigits || digPos < -3)
-                {
-                    digPos = 1;
-                    scientific = true;
-                }
-            }
-
-            byte* dig = number.GetDigitsPointer();
-
-            if (digPos > 0)
-            {
-                do
-                {
-                    vlb.Append(TChar.CastFrom((*dig != 0) ? (char)(*dig++) : '0'));
-                }
-                while (--digPos > 0);
-            }
-            else
-            {
-                vlb.Append(TChar.CastFrom('0'));
-            }
-
-            if (*dig != 0 || digPos < 0)
-            {
-                vlb.Append(info.NumberDecimalSeparatorTChar<TChar>());
-
-                while (digPos < 0)
-                {
-                    vlb.Append(TChar.CastFrom('0'));
-                    digPos++;
-                }
-
-                while (*dig != 0)
-                {
-                    vlb.Append(TChar.CastFrom(*dig++));
-                }
-            }
-
-            if (scientific)
-            {
-                FormatExponent(ref vlb, info, number.Scale - 1, expChar, 2, true);
-            }
-        }
-
-        private static void FormatPercent<TChar>(ref ValueListBuilder<TChar> vlb, ref NumberBuffer number, int nMaxDigits, NumberFormatInfo info) where TChar : unmanaged, IUtfChar<TChar>
-        {
-            Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte));
-
-            string fmt = number.IsNegative ?
-                s_negPercentFormats[info.PercentNegativePattern] :
-                s_posPercentFormats[info.PercentPositivePattern];
-
-            foreach (char ch in fmt)
-            {
-                switch (ch)
-                {
-                    case '#':
-                        FormatFixed(ref vlb, ref number, nMaxDigits, info._percentGroupSizes, info.PercentDecimalSeparatorTChar<TChar>(), info.PercentGroupSeparatorTChar<TChar>());
-                        break;
-
-                    case '-':
-                        vlb.Append(info.NegativeSignTChar<TChar>());
-                        break;
-
-                    case '%':
-                        vlb.Append(info.PercentSymbolTChar<TChar>());
-                        break;
-
-                    default:
-                        vlb.Append(TChar.CastFrom(ch));
-                        break;
-                }
-            }
-        }
-
-        internal static unsafe void RoundNumber(ref NumberBuffer number, int pos, bool isCorrectlyRounded)
-        {
-            byte* dig = number.GetDigitsPointer();
-
-            int i = 0;
-            while (i < pos && dig[i] != '\0')
-            {
-                i++;
-            }
-
-            if ((i == pos) && ShouldRoundUp(dig, i, number.Kind, isCorrectlyRounded))
-            {
-                while (i > 0 && dig[i - 1] == '9')
-                {
-                    i--;
-                }
-
-                if (i > 0)
-                {
-                    dig[i - 1]++;
-                }
-                else
-                {
-                    number.Scale++;
-                    dig[0] = (byte)('1');
-                    i = 1;
-                }
-            }
-            else
-            {
-                while (i > 0 && dig[i - 1] == '0')
-                {
-                    i--;
-                }
-            }
-
-            if (i == 0)
-            {
-                if (number.Kind != NumberBufferKind.FloatingPoint)
-                {
-                    // The integer types don't have a concept of -0 and decimal always format -0 as 0
-                    number.IsNegative = false;
-                }
-                number.Scale = 0;      // Decimals with scale ('0.00') should be rounded.
-            }
-
-            dig[i] = (byte)('\0');
-            number.DigitsCount = i;
-            number.CheckConsistency();
-
-            static bool ShouldRoundUp(byte* dig, int i, NumberBufferKind numberKind, bool isCorrectlyRounded)
-            {
-                // We only want to round up if the digit is greater than or equal to 5 and we are
-                // not rounding a floating-point number. If we are rounding a floating-point number
-                // we have one of two cases.
-                //
-                // In the case of a standard numeric-format specifier, the exact and correctly rounded
-                // string will have been produced. In this scenario, pos will have pointed to the
-                // terminating null for the buffer and so this will return false.
-                //
-                // However, in the case of a custom numeric-format specifier, we currently fall back
-                // to generating Single/DoublePrecisionCustomFormat digits and then rely on this
-                // function to round correctly instead. This can unfortunately lead to double-rounding
-                // bugs but is the best we have right now due to back-compat concerns.
-
-                byte digit = dig[i];
-
-                if ((digit == '\0') || isCorrectlyRounded)
-                {
-                    // Fast path for the common case with no rounding
-                    return false;
-                }
-
-                // Values greater than or equal to 5 should round up, otherwise we round down. The IEEE
-                // 754 spec actually dictates that ties (exactly 5) should round to the nearest even number
-                // but that can have undesired behavior for custom numeric format strings. This probably
-                // needs further thought for .NET 5 so that we can be spec compliant and so that users
-                // can get the desired rounding behavior for their needs.
-
-                return digit >= '5';
-            }
-        }
-
-        private static unsafe int FindSection(ReadOnlySpan<char> format, int section)
-        {
-            int src;
-            char ch;
-
-            if (section == 0)
-            {
-                return 0;
-            }
-
-            fixed (char* pFormat = &MemoryMarshal.GetReference(format))
-            {
-                src = 0;
-                while (true)
-                {
-                    if (src >= format.Length)
-                    {
-                        return 0;
-                    }
-
-                    switch (ch = pFormat[src++])
-                    {
-                        case '\'':
-                        case '"':
-                            while (src < format.Length && pFormat[src] != 0 && pFormat[src++] != ch) ;
-                            break;
-
-                        case '\\':
-                            if (src < format.Length && pFormat[src] != 0)
-                            {
-                                src++;
-                            }
-                            break;
-
-                        case ';':
-                            if (--section != 0)
-                            {
-                                break;
-                            }
-
-                            if (src < format.Length && pFormat[src] != 0 && pFormat[src] != ';')
-                            {
-                                return src;
-                            }
-                            goto case '\0';
-
-                        case '\0':
-                            return 0;
-                    }
-                }
-            }
-        }
-
         private static ulong ExtractFractionAndBiasedExponent(double value, out int exponent)
         {
             ulong bits = BitConverter.DoubleToUInt64Bits(value);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Number.NumberToFloatingPointBits.cs b/src/libraries/System.Private.CoreLib/src/System/Number.NumberToFloatingPointBits.cs
index 8043171f596d..303ae2f43c92 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Number.NumberToFloatingPointBits.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Number.NumberToFloatingPointBits.cs
@@ -700,7 +700,7 @@ private static void AccumulateDecimalDigitsIntoBigInteger(scoped ref NumberBuffe
         {
             BigInteger.SetZero(out result);
 
-            byte* src = number.GetDigitsPointer() + firstIndex;
+            byte* src = number.DigitsPtr + firstIndex;
             uint remaining = lastIndex - firstIndex;
 
             while (remaining != 0)
@@ -974,7 +974,7 @@ private static ulong NumberToFloatingPointBits<TFloat>(ref NumberBuffer number)
         {
             Debug.Assert(TFloat.DenormalMantissaBits <= FloatingPointMaxDenormalMantissaBits);
 
-            Debug.Assert(number.GetDigitsPointer()[0] != '0');
+            Debug.Assert(number.DigitsPtr[0] != '0');
 
             Debug.Assert(number.Scale <= FloatingPointMaxExponent);
             Debug.Assert(number.Scale >= FloatingPointMinExponent);
@@ -998,7 +998,7 @@ private static ulong NumberToFloatingPointBits<TFloat>(ref NumberBuffer number)
             // Above 19 digits, we rely on slow path
             if (totalDigits <= 19)
             {
-                byte* src = number.GetDigitsPointer();
+                byte* src = number.DigitsPtr;
 
                 ulong mantissa = DigitsToUInt64(src, (int)(totalDigits));
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Number.Parsing.cs b/src/libraries/System.Private.CoreLib/src/System/Number.Parsing.cs
index 952733c9268d..852b979492c2 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Number.Parsing.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Number.Parsing.cs
@@ -113,7 +113,7 @@ private static unsafe bool TryNumberBufferToBinaryInteger<TInteger>(ref NumberBu
                 return false;
             }
 
-            byte* p = number.GetDigitsPointer();
+            byte* p = number.DigitsPtr;
 
             Debug.Assert(p != null);
             TInteger n = TInteger.Zero;
@@ -725,7 +725,7 @@ internal static unsafe bool TryNumberToDecimal(ref NumberBuffer number, ref deci
         {
             number.CheckConsistency();
 
-            byte* p = number.GetDigitsPointer();
+            byte* p = number.DigitsPtr;
             int e = number.Scale;
             bool sign = number.IsNegative;
             uint c = *p;
@@ -883,16 +883,16 @@ internal static bool SpanStartsWith<TChar>(ReadOnlySpan<TChar> span, ReadOnlySpa
         {
             if (typeof(TChar) == typeof(char))
             {
-                ReadOnlySpan<char> typedSpan = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<TChar, char>(ref MemoryMarshal.GetReference(span)), span.Length);
-                ReadOnlySpan<char> typedValue = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<TChar, char>(ref MemoryMarshal.GetReference(value)), value.Length);
+                ReadOnlySpan<char> typedSpan = MemoryMarshal.Cast<TChar, char>(span);
+                ReadOnlySpan<char> typedValue = MemoryMarshal.Cast<TChar, char>(value);
                 return typedSpan.StartsWith(typedValue, comparisonType);
             }
             else
             {
                 Debug.Assert(typeof(TChar) == typeof(byte));
 
-                ReadOnlySpan<byte> typedSpan = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<TChar, byte>(ref MemoryMarshal.GetReference(span)), span.Length);
-                ReadOnlySpan<byte> typedValue = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<TChar, byte>(ref MemoryMarshal.GetReference(value)), value.Length);
+                ReadOnlySpan<byte> typedSpan = MemoryMarshal.Cast<TChar, byte>(span);
+                ReadOnlySpan<byte> typedValue = MemoryMarshal.Cast<TChar, byte>(value);
                 return typedSpan.StartsWithUtf8(typedValue, comparisonType);
             }
         }
@@ -903,17 +903,13 @@ internal static ReadOnlySpan<TChar> SpanTrim<TChar>(ReadOnlySpan<TChar> span)
         {
             if (typeof(TChar) == typeof(char))
             {
-                ReadOnlySpan<char> typedSpan = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<TChar, char>(ref MemoryMarshal.GetReference(span)), span.Length);
-                ReadOnlySpan<char> result = typedSpan.Trim();
-                return MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<char, TChar>(ref MemoryMarshal.GetReference(result)), result.Length);
+                return MemoryMarshal.Cast<char, TChar>(MemoryMarshal.Cast<TChar, char>(span).Trim());
             }
             else
             {
                 Debug.Assert(typeof(TChar) == typeof(byte));
 
-                ReadOnlySpan<byte> typedSpan = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<TChar, byte>(ref MemoryMarshal.GetReference(span)), span.Length);
-                ReadOnlySpan<byte> result = typedSpan.TrimUtf8();
-                return MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<byte, TChar>(ref MemoryMarshal.GetReference(result)), result.Length);
+                return MemoryMarshal.Cast<byte, TChar>(MemoryMarshal.Cast<TChar, byte>(span).TrimUtf8());
             }
         }
 
@@ -923,16 +919,16 @@ internal static bool SpanEqualsOrdinalIgnoreCase<TChar>(ReadOnlySpan<TChar> span
         {
             if (typeof(TChar) == typeof(char))
             {
-                ReadOnlySpan<char> typedSpan = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<TChar, char>(ref MemoryMarshal.GetReference(span)), span.Length);
-                ReadOnlySpan<char> typedValue = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<TChar, char>(ref MemoryMarshal.GetReference(value)), value.Length);
+                ReadOnlySpan<char> typedSpan = MemoryMarshal.Cast<TChar, char>(span);
+                ReadOnlySpan<char> typedValue = MemoryMarshal.Cast<TChar, char>(value);
                 return typedSpan.EqualsOrdinalIgnoreCase(typedValue);
             }
             else
             {
                 Debug.Assert(typeof(TChar) == typeof(byte));
 
-                ReadOnlySpan<byte> typedSpan = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<TChar, byte>(ref MemoryMarshal.GetReference(span)), span.Length);
-                ReadOnlySpan<byte> typedValue = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As<TChar, byte>(ref MemoryMarshal.GetReference(value)), value.Length);
+                ReadOnlySpan<byte> typedSpan = MemoryMarshal.Cast<TChar, byte>(span);
+                ReadOnlySpan<byte> typedValue = MemoryMarshal.Cast<TChar, byte>(value);
                 return typedSpan.EqualsOrdinalIgnoreCaseUtf8(typedValue);
             }
         }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/IBinaryInteger.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/IBinaryInteger.cs
index bc1be9b3569f..a150b61e2c40 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/IBinaryInteger.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/IBinaryInteger.cs
@@ -37,7 +37,7 @@ static virtual TSelf LeadingZeroCount(TSelf value)
                 return TSelf.CreateChecked(bitCount);
             }
 
-            return (bitCount - TSelf.One) ^ TSelf.Log2(value);
+            return TSelf.IsNegative(value) ? TSelf.Zero : ((bitCount - TSelf.One) ^ TSelf.Log2(value));
         }
 
         /// <summary>Computes the number of bits that are set in a value.</summary>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/INumber.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/INumber.cs
index e41d0b999850..59aa0a761efa 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/INumber.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/INumber.cs
@@ -37,7 +37,7 @@ static virtual TSelf Clamp(TSelf value, TSelf min, TSelf max)
             return result;
         }
 
-        /// <summary>Copies the sign of a value to the sign of another value..</summary>
+        /// <summary>Copies the sign of a value to the sign of another value.</summary>
         /// <param name="value">The value whose magnitude is used in the result.</param>
         /// <param name="sign">The value whose sign is used in the result.</param>
         /// <returns>A value with the magnitude of <paramref name="value" /> and the sign of <paramref name="sign" />.</returns>
@@ -118,9 +118,14 @@ static virtual TSelf Min(TSelf x, TSelf y)
             // otherwise returns the larger of the inputs. It
             // treats +0 as larger than -0 as per the specification.
 
-            if ((x != y) && !TSelf.IsNaN(x))
+            if (x != y)
             {
-                return x < y ? x : y;
+                if (!TSelf.IsNaN(x))
+                {
+                    return x < y ? x : y;
+                }
+
+                return x;
             }
 
             return TSelf.IsNegative(x) ? x : y;
@@ -154,12 +159,16 @@ static virtual TSelf MinNumber(TSelf x, TSelf y)
 
         /// <summary>Computes the sign of a value.</summary>
         /// <param name="value">The value whose sign is to be computed.</param>
-        /// <returns>A positive value if <paramref name="value" /> is positive, <see cref="INumberBase{TSelf}.Zero" /> if <paramref name="value" /> is zero, and a negative value if <paramref name="value" /> is negative.</returns>
+        /// <returns>A positive value if <paramref name="value" /> is positive, <c>0</c> if <paramref name="value" /> is zero, and a negative value if <paramref name="value" /> is negative.</returns>
         /// <remarks>It is recommended that a function return <c>1</c>, <c>0</c>, and <c>-1</c>, respectively.</remarks>
         static virtual int Sign(TSelf value)
         {
             if (value != TSelf.Zero)
             {
+                if (TSelf.IsNaN(value))
+                {
+                    ThrowHelper.ThrowArithmeticException(SR.Arithmetic_NaN);
+                }
                 return TSelf.IsNegative(value) ? -1 : +1;
             }
             return 0;
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/INumberBase.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/INumberBase.cs
index cd31d571ca3d..7e5acfafe75a 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/INumberBase.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/INumberBase.cs
@@ -195,7 +195,7 @@ static virtual TSelf CreateTruncating<TOther>(TOther value)
         /// <returns><c>true</c> if <paramref name="value" /> is an odd integer; otherwise, <c>false</c>.</returns>
         /// <remarks>
         ///     <para>This correctly handles floating-point values and so <c>3.0</c> will return <c>true</c> while <c>3.3</c> will return <c>false</c>.</para>
-        ///     <para>This functioning returning <c>false</c> does not imply that <see cref="IsOddInteger(TSelf)" /> will return <c>true</c>. A number with a fractional portion, <c>3.3</c>, is neither even nor odd.</para>
+        ///     <para>This functioning returning <c>false</c> does not imply that <see cref="IsEvenInteger(TSelf)" /> will return <c>true</c>. A number with a fractional portion, <c>3.3</c>, is neither even nor odd.</para>
         /// </remarks>
         static abstract bool IsOddInteger(TSelf value);
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/IUnaryPlusOperators.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/IUnaryPlusOperators.cs
index 5ba2d43f6a94..3e3d9c25704d 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/IUnaryPlusOperators.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/IUnaryPlusOperators.cs
@@ -5,7 +5,7 @@ namespace System.Numerics
 {
     /// <summary>Defines a mechanism for computing the unary plus of a value.</summary>
     /// <typeparam name="TSelf">The type that implements this interface.</typeparam>
-    /// <typeparam name="TResult">The type that contains the result of negating <typeparamref name="TSelf" />.</typeparam>
+    /// <typeparam name="TResult">The type that contains the result of converting <typeparamref name="TSelf" />.</typeparam>
     public interface IUnaryPlusOperators<TSelf, TResult>
         where TSelf : IUnaryPlusOperators<TSelf, TResult>?
     {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs
index efedc7f7d18d..4ad327aebc9e 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs
@@ -181,6 +181,7 @@ public static Vector<T> Indices
         /// <returns><c>true</c> if <typeparamref name="T" /> is supported; otherwise, <c>false</c>.</returns>
         public static bool IsSupported
         {
+            [Intrinsic]
             [MethodImpl(MethodImplOptions.AggressiveInlining)]
             get => (typeof(T) == typeof(byte)) ||
                    (typeof(T) == typeof(double)) ||
diff --git a/src/libraries/System.Private.CoreLib/src/System/Random.cs b/src/libraries/System.Private.CoreLib/src/System/Random.cs
index 65043bf4c00f..190fa3583caa 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Random.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Random.cs
@@ -294,7 +294,9 @@ public T[] GetItems<T>(ReadOnlySpan<T> choices, int length)
         public void Shuffle<T>(T[] values)
         {
             ArgumentNullException.ThrowIfNull(values);
-            Shuffle(values.AsSpan());
+            // this can't use AsSpan due to array covariance
+            // forcing it like this is safe due to everything being in the array already
+            Shuffle(new Span<T>(ref MemoryMarshal.GetArrayDataReference(values), values.Length));
         }
 
         /// <summary>
diff --git a/src/libraries/System.Private.CoreLib/src/System/ReadOnlyMemory.cs b/src/libraries/System.Private.CoreLib/src/System/ReadOnlyMemory.cs
index 9037e4110817..6b59ac75e576 100644
--- a/src/libraries/System.Private.CoreLib/src/System/ReadOnlyMemory.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/ReadOnlyMemory.cs
@@ -313,6 +313,7 @@ public unsafe MemoryHandle Pin()
             {
                 if (typeof(T) == typeof(char) && tmpObject is string s)
                 {
+                    // Unsafe.AsPointer is safe since the handle pins it
                     GCHandle handle = GCHandle.Alloc(tmpObject, GCHandleType.Pinned);
                     ref char stringData = ref Unsafe.Add(ref s.GetRawStringData(), _index);
                     return new MemoryHandle(Unsafe.AsPointer(ref stringData), handle);
@@ -325,11 +326,13 @@ public unsafe MemoryHandle Pin()
                     // Array is already pre-pinned
                     if (_index < 0)
                     {
+                        // Unsafe.AsPointer is safe since it's pinned
                         void* pointer = Unsafe.Add<T>(Unsafe.AsPointer(ref MemoryMarshal.GetArrayDataReference(Unsafe.As<T[]>(tmpObject))), _index & RemoveFlagsBitMask);
                         return new MemoryHandle(pointer);
                     }
                     else
                     {
+                        // Unsafe.AsPointer is safe since the handle pins it
                         GCHandle handle = GCHandle.Alloc(tmpObject, GCHandleType.Pinned);
                         void* pointer = Unsafe.Add<T>(Unsafe.AsPointer(ref MemoryMarshal.GetArrayDataReference(Unsafe.As<T[]>(tmpObject))), _index);
                         return new MemoryHandle(pointer, handle);
diff --git a/src/libraries/System.Private.CoreLib/src/System/ReadOnlySpan.cs b/src/libraries/System.Private.CoreLib/src/System/ReadOnlySpan.cs
index d198fed12fa3..b182d42b66ec 100644
--- a/src/libraries/System.Private.CoreLib/src/System/ReadOnlySpan.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/ReadOnlySpan.cs
@@ -120,7 +120,6 @@ public ReadOnlySpan(ref readonly T reference)
             _length = 1;
         }
 
-#pragma warning disable IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
         // Constructor for internal use only. It is not safe to expose publicly, and is instead exposed via the unsafe MemoryMarshal.CreateReadOnlySpan.
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal ReadOnlySpan(ref T reference, int length)
@@ -130,7 +129,6 @@ internal ReadOnlySpan(ref T reference, int length)
             _reference = ref reference;
             _length = length;
         }
-#pragma warning restore IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
 
         /// <summary>
         /// Returns the specified element of the read-only span.
@@ -217,6 +215,18 @@ public static implicit operator ReadOnlySpan<T>(ArraySegment<T> segment)
         /// </summary>
         public static ReadOnlySpan<T> Empty => default;
 
+        /// <summary>
+        /// Casts a read-only span of <typeparamref name="TDerived"/> to a read-only span of <typeparamref name="T"/>.
+        /// </summary>
+        /// <typeparam name="TDerived">The element type of the source read-only span, which must be derived from <typeparamref name="T"/>.</typeparam>
+        /// <param name="items">The source read-only span. No copy is made.</param>
+        /// <returns>A read-only span with elements cast to the new type.</returns>
+        /// <remarks>This method uses a covariant cast, producing a read-only span that shares the same memory as the source. The relationships expressed in the type constraints ensure that the cast is a safe operation.</remarks>
+        public static ReadOnlySpan<T> CastUp<TDerived>(ReadOnlySpan<TDerived> items) where TDerived : class?, T
+        {
+            return new ReadOnlySpan<T>(ref Unsafe.As<TDerived, T>(ref items._reference), items.Length);
+        }
+
         /// <summary>Gets an enumerator for this span.</summary>
         public Enumerator GetEnumerator() => new Enumerator(this);
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/ConstructorInvoker.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/ConstructorInvoker.cs
index 2be8185c4ca6..9778b00d1080 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Reflection/ConstructorInvoker.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/ConstructorInvoker.cs
@@ -245,8 +245,8 @@ internal object InvokeWithFewArgs(Span<object?> arguments)
             Debug.Assert(_argCount <= MaxStackAllocArgCount);
 
             StackAllocatedArgumentsWithCopyBack stackArgStorage = default;
-            Span<object?> copyOfArgs = stackArgStorage._args.AsSpan(_argCount);
-            scoped Span<bool> shouldCopyBack = stackArgStorage._shouldCopyBack.AsSpan(_argCount);
+            Span<object?> copyOfArgs = ((Span<object?>)stackArgStorage._args).Slice(0, _argCount);
+            scoped Span<bool> shouldCopyBack = ((Span<bool>)stackArgStorage._shouldCopyBack).Slice(0, _argCount);
 
             for (int i = 0; i < _argCount; i++)
             {
@@ -279,7 +279,7 @@ internal object InvokeWithFewArgs(Span<object?> arguments)
         internal object InvokeDirectByRef(object? arg1 = null, object? arg2 = null, object? arg3 = null, object? arg4 = null)
         {
             StackAllocatedArguments stackStorage = new(arg1, arg2, arg3, arg4);
-            return InvokeDirectByRefWithFewArgs(stackStorage._args.AsSpan(_argCount));
+            return InvokeDirectByRefWithFewArgs(((Span<object?>)stackStorage._args).Slice(0, _argCount));
         }
 
         internal unsafe object InvokeDirectByRefWithFewArgs(Span<object?> copyOfArgs)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/Emit/AssemblyBuilder.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/Emit/AssemblyBuilder.cs
index 8599d39aa4b1..1b1220536b9e 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Reflection/Emit/AssemblyBuilder.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/Emit/AssemblyBuilder.cs
@@ -33,56 +33,8 @@ public ModuleBuilder DefineDynamicModule(string name)
             return GetDynamicModuleCore(name);
         }
 
-        /// <summary>
-        /// Defines an <see cref="AssemblyBuilder"/> that can be saved to a file or stream.
-        /// </summary>
-        /// <param name="name">The name of the assembly.</param>
-        /// <param name="coreAssembly">The assembly that denotes the "system assembly" that houses the well-known types such as <see cref="object"/></param>
-        /// <param name="assemblyAttributes">A collection that contains the attributes of the assembly.</param>
-        /// <returns>An <see cref="AssemblyBuilder"/> that can be persisted.</returns>
-        /// <exception cref="ArgumentNullException">The <paramref name="name"/> or <paramref name="name.Name"/> or <paramref name="coreAssembly"/> is null.</exception>
-        /// <remarks>Currently the persisted assembly doesn't support running, need to save it and load back to run.</remarks>
-        public static AssemblyBuilder DefinePersistedAssembly(AssemblyName name, Assembly coreAssembly, IEnumerable<CustomAttributeBuilder>? assemblyAttributes = null)
-        {
-            ArgumentNullException.ThrowIfNull(name);
-            ArgumentException.ThrowIfNullOrEmpty(name.Name, "AssemblyName.Name");
-            ArgumentNullException.ThrowIfNull(coreAssembly);
-
-            Type assemblyType = Type.GetType("System.Reflection.Emit.AssemblyBuilderImpl, System.Reflection.Emit", throwOnError: true)!;
-            ConstructorInfo con = assemblyType.GetConstructor(BindingFlags.NonPublic | BindingFlags.Instance, [typeof(AssemblyName), typeof(Assembly), typeof(IEnumerable<CustomAttributeBuilder>)])!;
-            return (AssemblyBuilder)con.Invoke([name, coreAssembly, assemblyAttributes]);
-        }
-
         protected abstract ModuleBuilder? GetDynamicModuleCore(string name);
 
-        /// <summary>
-        /// Serializes the assembly to <see cref="Stream"/>.
-        /// </summary>
-        /// <param name="stream">The <see cref="Stream"/> to which the assembly serialized.</param>
-        /// <exception cref="ArgumentNullException"><paramref name="stream"/> is null.</exception>
-        /// <exception cref="NotSupportedException">The AssemblyBuilder instance doesn't support saving.</exception>
-        public void Save(Stream stream) => SaveCore(stream);
-
-        /// <summary>
-        /// Saves the assembly to disk.
-        /// </summary>
-        /// <param name="assemblyFileName">The file name of the assembly.</param>
-        /// <exception cref="ArgumentNullException"><paramref name="assemblyFileName"/> is null.</exception>
-        /// <exception cref="NotSupportedException">The AssemblyBuilder instance doesn't support saving.</exception>
-        public void Save(string assemblyFileName)
-        {
-            ArgumentNullException.ThrowIfNull(assemblyFileName);
-
-            using var peStream = new FileStream(assemblyFileName, FileMode.Create, FileAccess.Write);
-            SaveCore(peStream);
-        }
-
-        /// <summary>
-        /// When implemented in a derived type, serializes the assembly to a stream.
-        /// </summary>
-        /// <param name="stream">The stream to which the assembly serialized.</param>
-        protected virtual void SaveCore(Stream stream) => throw new NotSupportedException(SR.NotSupported_AssemblySave);
-
         public void SetCustomAttribute(ConstructorInfo con, byte[] binaryAttribute)
         {
             ArgumentNullException.ThrowIfNull(con);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/Emit/MethodOnTypeBuilderInstantiation.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/Emit/MethodOnTypeBuilderInstantiation.cs
index a1256cf7faab..bfa9ef0359c5 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Reflection/Emit/MethodOnTypeBuilderInstantiation.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/Emit/MethodOnTypeBuilderInstantiation.cs
@@ -68,13 +68,6 @@ public override object Invoke(object? obj, BindingFlags invokeAttr, Binder? bind
             throw new NotSupportedException();
         }
         public override CallingConventions CallingConvention => _method.CallingConvention;
-#if !MONO
-        public override MethodInfo GetGenericMethodDefinition() { return _method; }
-        public override bool IsGenericMethodDefinition => _method.IsGenericMethodDefinition;
-        public override Type[] GetGenericArguments()
-        {
-            return _method.GetGenericArguments();
-        }
         public override bool ContainsGenericParameters
         {
             get
@@ -101,6 +94,14 @@ public override bool ContainsGenericParameters
                 return false;
             }
         }
+#if !MONO
+        public override MethodInfo GetGenericMethodDefinition() { return _method; }
+        public override bool IsGenericMethodDefinition => _method.IsGenericMethodDefinition;
+        public override Type[] GetGenericArguments()
+        {
+            return _method.GetGenericArguments();
+        }
+
         [RequiresUnreferencedCode("If some of the generic arguments are annotated (either with DynamicallyAccessedMembersAttribute, or generic constraints), trimming can't validate that the requirements of those annotations are met.")]
         public override MethodInfo MakeGenericMethod(params Type[] typeArgs)
         {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/FieldAccessor.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/FieldAccessor.cs
new file mode 100644
index 000000000000..f8d9f6b1ba17
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/FieldAccessor.cs
@@ -0,0 +1,508 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Globalization;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+namespace System.Reflection
+{
+    internal sealed class FieldAccessor
+    {
+        private readonly RtFieldInfo _fieldInfo;
+        private IntPtr _addressOrOffset;
+        private unsafe MethodTable* _methodTable;
+        private volatile FieldAccessorType _fieldAccessType;
+
+        internal FieldAccessor(FieldInfo fieldInfo)
+        {
+            _fieldInfo = (RtFieldInfo)fieldInfo;
+            Debug.Assert(_fieldInfo.m_declaringType != null);
+
+            if (_fieldInfo.m_declaringType.ContainsGenericParameters ||
+                _fieldInfo.m_declaringType.IsNullableOfT)
+            {
+                _fieldAccessType = FieldAccessorType.NoInvoke;
+            }
+            else
+            {
+                _fieldAccessType = FieldAccessorType.SlowPathUntilClassInitialized;
+            }
+        }
+
+        private void Initialize()
+        {
+            if (!RuntimeFieldHandle.IsFastPathSupported(_fieldInfo))
+            {
+                // Currently this is true for [ThreadStatic] cases, for fields added from EnC, and for fields on unloadable types.
+                _fieldAccessType = FieldAccessorType.SlowPath;
+                return;
+            }
+
+            RuntimeType fieldType = (RuntimeType)_fieldInfo.FieldType;
+
+            unsafe
+            {
+                if (_fieldInfo.IsStatic)
+                {
+                    _addressOrOffset = RuntimeFieldHandle.GetStaticFieldAddress(_fieldInfo);
+
+                    if (fieldType.IsValueType)
+                    {
+                        if (fieldType.IsEnum)
+                        {
+                            _methodTable = (MethodTable*)fieldType.TypeHandle.Value;
+                            _fieldAccessType = GetPrimitiveAccessorTypeForStatic(fieldType.GetEnumUnderlyingType());
+                        }
+                        else if (RuntimeTypeHandle.GetCorElementType(fieldType) == CorElementType.ELEMENT_TYPE_VALUETYPE)
+                        {
+                            // The runtime stores non-primitive value types as a boxed value.
+                            _methodTable = (MethodTable*)fieldType.TypeHandle.Value;
+                            _fieldAccessType = FieldAccessorType.StaticValueTypeBoxed;
+                        }
+                        else
+                        {
+                            _methodTable = (MethodTable*)fieldType.TypeHandle.Value;
+                            _fieldAccessType = GetPrimitiveAccessorTypeForStatic(fieldType);
+                        }
+                    }
+                    else if (fieldType.IsPointer)
+                    {
+                        _fieldAccessType = FieldAccessorType.StaticPointerType;
+                    }
+                    else if (fieldType.IsFunctionPointer)
+                    {
+                        _methodTable = (MethodTable*)typeof(IntPtr).TypeHandle.Value;
+                        _fieldAccessType = GetIntPtrAccessorTypeForStatic();
+                    }
+                    else
+                    {
+                        _fieldAccessType = FieldAccessorType.StaticReferenceType;
+                    }
+                }
+                else
+                {
+                    _addressOrOffset = RuntimeFieldHandle.GetInstanceFieldOffset(_fieldInfo);
+
+                    if (fieldType.IsEnum)
+                    {
+                        _methodTable = (MethodTable*)fieldType.TypeHandle.Value;
+                        _fieldAccessType = GetPrimitiveAccessorTypeForInstance(fieldType.GetEnumUnderlyingType());
+                    }
+                    else if (fieldType.IsValueType)
+                    {
+                        _methodTable = (MethodTable*)fieldType.TypeHandle.Value;
+                        _fieldAccessType = GetPrimitiveAccessorTypeForInstance(fieldType);
+                    }
+                    else if (fieldType.IsPointer)
+                    {
+                        _fieldAccessType = FieldAccessorType.InstancePointerType;
+                    }
+                    else if (fieldType.IsFunctionPointer)
+                    {
+                        _methodTable = (MethodTable*)typeof(IntPtr).TypeHandle.Value;
+                        _fieldAccessType = GetIntPtrAccessorTypeForInstance();
+                    }
+                    else
+                    {
+                        _fieldAccessType = FieldAccessorType.InstanceReferenceType;
+                    }
+                }
+            }
+        }
+
+        public object? GetValue(object? obj)
+        {
+            bool isClassInitialized;
+
+            unsafe
+            {
+                switch (_fieldAccessType)
+                {
+                    case FieldAccessorType.InstanceReferenceType:
+                        VerifyTarget(obj);
+                        Debug.Assert(obj != null);
+                        return Volatile.Read(ref Unsafe.As<byte, object>(ref Unsafe.AddByteOffset(ref obj.GetRawData(), _addressOrOffset)));
+
+                    case FieldAccessorType.InstanceValueType:
+                    case FieldAccessorType.InstanceValueTypeSize1:
+                    case FieldAccessorType.InstanceValueTypeSize2:
+                    case FieldAccessorType.InstanceValueTypeSize4:
+                    case FieldAccessorType.InstanceValueTypeSize8:
+                        VerifyTarget(obj);
+                        Debug.Assert(obj != null);
+                        return RuntimeHelpers.Box(
+                            _methodTable,
+                            ref Unsafe.AddByteOffset(ref obj.GetRawData(), _addressOrOffset));
+
+                    case FieldAccessorType.InstancePointerType:
+                        VerifyTarget(obj);
+                        Debug.Assert(obj != null);
+                        return Pointer.Box(
+                            (void*)Unsafe.As<byte, IntPtr>(ref Unsafe.AddByteOffset(ref obj.GetRawData(), _addressOrOffset)),
+                            _fieldInfo.FieldType);
+
+                    case FieldAccessorType.StaticReferenceType:
+                        return Volatile.Read(ref Unsafe.As<IntPtr, object>(ref *(IntPtr*)_addressOrOffset));
+
+                    case FieldAccessorType.StaticValueType:
+                    case FieldAccessorType.StaticValueTypeSize1:
+                    case FieldAccessorType.StaticValueTypeSize2:
+                    case FieldAccessorType.StaticValueTypeSize4:
+                    case FieldAccessorType.StaticValueTypeSize8:
+                        return RuntimeHelpers.Box(_methodTable, ref Unsafe.AsRef<byte>(_addressOrOffset.ToPointer()));
+
+                    case FieldAccessorType.StaticValueTypeBoxed:
+                        // Re-box the value.
+                        return RuntimeHelpers.Box(
+                            _methodTable,
+                            ref Unsafe.As<IntPtr, object>(ref *(IntPtr*)_addressOrOffset).GetRawData());
+
+                    case FieldAccessorType.StaticPointerType:
+                        return Pointer.Box((void*)Unsafe.As<byte, IntPtr>(
+                            ref Unsafe.AsRef<byte>(_addressOrOffset.ToPointer())), _fieldInfo.FieldType);
+
+                    case FieldAccessorType.SlowPathUntilClassInitialized:
+                        if (!IsStatic())
+                        {
+                            VerifyTarget(obj);
+                        }
+
+                        isClassInitialized = false;
+                        object? ret = RuntimeFieldHandle.GetValue(_fieldInfo, obj, (RuntimeType)_fieldInfo.FieldType, _fieldInfo.m_declaringType, ref isClassInitialized);
+                        if (isClassInitialized)
+                        {
+                            Initialize();
+                        }
+
+                        return ret;
+
+                    case FieldAccessorType.SlowPath:
+                        if (!IsStatic())
+                        {
+                            VerifyTarget(obj);
+                        }
+
+                        isClassInitialized = true;
+                        return RuntimeFieldHandle.GetValue(_fieldInfo, obj, (RuntimeType)_fieldInfo.FieldType, _fieldInfo.m_declaringType, ref isClassInitialized);
+
+                    case FieldAccessorType.NoInvoke:
+                        if (_fieldInfo.DeclaringType is not null && _fieldInfo.DeclaringType.ContainsGenericParameters)
+                            throw new InvalidOperationException(SR.Arg_UnboundGenField);
+
+                        if (_fieldInfo.DeclaringType is not null && ((RuntimeType)_fieldInfo.FieldType).IsNullableOfT)
+                            throw new NotSupportedException();
+
+                        throw new FieldAccessException();
+
+                    default:
+                        Debug.Assert(false, "Unknown enum value");
+                        return null;
+                }
+            }
+        }
+
+        public void SetValue(object? obj, object? value, BindingFlags invokeAttr, Binder? binder, CultureInfo? culture)
+        {
+            bool isClassInitialized;
+
+            unsafe
+            {
+                switch (_fieldAccessType)
+                {
+                    case FieldAccessorType.InstanceReferenceType:
+                        VerifyInstanceField(obj, ref value, invokeAttr, binder, culture);
+                        Debug.Assert(obj != null);
+                        Volatile.Write(
+                            ref Unsafe.As<byte, object?>(ref Unsafe.AddByteOffset(ref obj.GetRawData(), _addressOrOffset)),
+                            value);
+                        return;
+
+                    case FieldAccessorType.InstanceValueTypeSize1:
+                        VerifyInstanceField(obj, ref value, invokeAttr, binder, culture);
+                        Debug.Assert(obj != null);
+                        Volatile.Write(
+                            ref Unsafe.AddByteOffset(ref obj.GetRawData(), _addressOrOffset),
+                            value!.GetRawData());
+                        return;
+
+                    case FieldAccessorType.InstanceValueTypeSize2:
+                        VerifyInstanceField(obj, ref value, invokeAttr, binder, culture);
+                        Debug.Assert(obj != null);
+                        Volatile.Write(
+                            ref Unsafe.As<byte, short>(ref Unsafe.AddByteOffset(ref obj.GetRawData(), _addressOrOffset)),
+                            Unsafe.As<byte, short>(ref value!.GetRawData()));
+                        return;
+
+                    case FieldAccessorType.InstanceValueTypeSize4:
+                        VerifyInstanceField(obj, ref value, invokeAttr, binder, culture);
+                        Debug.Assert(obj != null);
+                        Volatile.Write(
+                            ref Unsafe.As<byte, int>(ref Unsafe.AddByteOffset(ref obj.GetRawData(), _addressOrOffset)),
+                            Unsafe.As<byte, int>(ref value!.GetRawData()));
+                        return;
+
+                    case FieldAccessorType.InstanceValueTypeSize8:
+                        VerifyInstanceField(obj, ref value, invokeAttr, binder, culture);
+                        Debug.Assert(obj != null);
+                        Volatile.Write(
+                            ref Unsafe.As<byte, long>(ref Unsafe.AddByteOffset(ref obj.GetRawData(), _addressOrOffset)),
+                            Unsafe.As<byte, long>(ref value!.GetRawData()));
+                        return;
+
+                    case FieldAccessorType.StaticReferenceType:
+                        VerifyStaticField(ref value, invokeAttr, binder, culture);
+                        Volatile.Write(ref Unsafe.As<IntPtr, object?>(ref *(IntPtr*)_addressOrOffset), value);
+                        return;
+
+                    case FieldAccessorType.StaticValueTypeSize1:
+                        VerifyStaticField(ref value, invokeAttr, binder, culture);
+                        Volatile.Write(
+                            ref Unsafe.AsRef<byte>(_addressOrOffset.ToPointer()),
+                            value!.GetRawData());
+                        return;
+
+                    case FieldAccessorType.StaticValueTypeSize2:
+                        VerifyStaticField(ref value, invokeAttr, binder, culture);
+                        Volatile.Write(
+                            ref Unsafe.AsRef<short>(_addressOrOffset.ToPointer()),
+                            Unsafe.As<byte, short>(ref value!.GetRawData()));
+                        return;
+
+                    case FieldAccessorType.StaticValueTypeSize4:
+                        VerifyStaticField(ref value, invokeAttr, binder, culture);
+                        Volatile.Write(
+                            ref Unsafe.AsRef<int>(_addressOrOffset.ToPointer()),
+                            Unsafe.As<byte, int>(ref value!.GetRawData()));
+                        return;
+
+                    case FieldAccessorType.StaticValueTypeSize8:
+                        VerifyStaticField(ref value, invokeAttr, binder, culture);
+                        Volatile.Write(
+                            ref Unsafe.AsRef<long>(_addressOrOffset.ToPointer()),
+                            Unsafe.As<byte, long>(ref value!.GetRawData()));
+                        return;
+
+                    case FieldAccessorType.SlowPathUntilClassInitialized:
+                        if (IsStatic())
+                        {
+                            VerifyStaticField(ref value, invokeAttr, binder, culture);
+                        }
+                        else
+                        {
+                            VerifyInstanceField(obj, ref value, invokeAttr, binder, culture);
+                        }
+
+                        isClassInitialized = false;
+                        RuntimeFieldHandle.SetValue(_fieldInfo, obj, value, (RuntimeType)_fieldInfo.FieldType, _fieldInfo.m_declaringType, ref isClassInitialized);
+                        if (isClassInitialized)
+                        {
+                            Initialize();
+                        }
+
+                        return;
+
+                    case FieldAccessorType.NoInvoke:
+                        if (_fieldInfo.DeclaringType is not null && _fieldInfo.DeclaringType.ContainsGenericParameters)
+                            throw new InvalidOperationException(SR.Arg_UnboundGenField);
+
+                        throw new FieldAccessException();
+                }
+            }
+
+            // All other cases use the slow path.
+            if (IsStatic())
+            {
+                VerifyStaticField(ref value, invokeAttr, binder, culture);
+            }
+            else
+            {
+                VerifyInstanceField(obj, ref value, invokeAttr, binder, culture);
+            }
+
+            isClassInitialized = true;
+            RuntimeFieldHandle.SetValue(_fieldInfo, obj, value, (RuntimeType)_fieldInfo.FieldType, _fieldInfo.m_declaringType, ref isClassInitialized);
+        }
+
+        private bool IsStatic() => (_fieldInfo.Attributes & FieldAttributes.Static) == FieldAttributes.Static;
+
+        private void VerifyStaticField(ref object? value, BindingFlags invokeAttr, Binder? binder, CultureInfo? culture)
+        {
+            VerifyInitOnly();
+            CheckValue(ref value, invokeAttr, binder, culture);
+        }
+
+        private void VerifyInstanceField(object? obj, ref object? value, BindingFlags invokeAttr, Binder? binder, CultureInfo? culture)
+        {
+            VerifyTarget(obj);
+            CheckValue(ref value, invokeAttr, binder, culture);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal void VerifyTarget(object? target)
+        {
+            Debug.Assert(!IsStatic());
+
+            if (!_fieldInfo.m_declaringType.IsInstanceOfType(target))
+            {
+                if (target == null)
+                {
+                    ThrowHelperTargetException();
+                }
+                else
+                {
+                    ThrowHelperArgumentException(target, _fieldInfo);
+                }
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private void CheckValue(ref object? value, BindingFlags invokeAttr, Binder? binder, CultureInfo? culture)
+        {
+            if (value is null)
+            {
+                if (((RuntimeType)_fieldInfo.FieldType).IsActualValueType)
+                {
+                    ((RuntimeType)_fieldInfo.FieldType).CheckValue(ref value, binder, culture, invokeAttr);
+                }
+            }
+            else if (!ReferenceEquals(value.GetType(), _fieldInfo.FieldType))
+            {
+                ((RuntimeType)_fieldInfo.FieldType).CheckValue(ref value, binder, culture, invokeAttr);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal void VerifyInitOnly()
+        {
+            Debug.Assert(IsStatic());
+
+            if ((_fieldInfo.Attributes & FieldAttributes.InitOnly) == FieldAttributes.InitOnly &&
+                _fieldAccessType != FieldAccessorType.SlowPathUntilClassInitialized)
+            {
+                ThrowHelperFieldAccessException(_fieldInfo);
+            }
+        }
+
+        /// <summary>
+        /// Currently we only optimize for primitive types and not all value types. Primitive types support atomic write operations, are
+        /// not boxed by the runtime when stored as a static field, and don't need special nullable, GC or alignment checks.
+        /// </summary>
+        private static FieldAccessorType GetPrimitiveAccessorTypeForInstance(Type fieldType)
+        {
+            FieldAccessorType accessorType = FieldAccessorType.InstanceValueType;
+
+            if (fieldType == typeof(byte) ||
+                fieldType == typeof(sbyte) ||
+                fieldType == typeof(bool))
+                accessorType = FieldAccessorType.InstanceValueTypeSize1;
+            else if (fieldType == typeof(short) ||
+                fieldType == typeof(ushort) ||
+                fieldType == typeof(char))
+                accessorType = FieldAccessorType.InstanceValueTypeSize2;
+            else if (fieldType == typeof(int) ||
+                fieldType == typeof(uint) ||
+                fieldType == typeof(float))
+                accessorType = FieldAccessorType.InstanceValueTypeSize4;
+            else if (fieldType == typeof(long) ||
+                fieldType == typeof(ulong) ||
+                fieldType == typeof(double))
+                accessorType = FieldAccessorType.InstanceValueTypeSize8;
+            else if (fieldType == typeof(IntPtr) ||
+                fieldType == typeof(UIntPtr))
+                accessorType = GetIntPtrAccessorTypeForInstance();
+
+            return accessorType;
+        }
+
+        private static FieldAccessorType GetPrimitiveAccessorTypeForStatic(Type fieldType)
+        {
+            FieldAccessorType accessorType = FieldAccessorType.StaticValueType;
+
+            if (fieldType == typeof(byte) ||
+                fieldType == typeof(sbyte) ||
+                fieldType == typeof(bool))
+                accessorType = FieldAccessorType.StaticValueTypeSize1;
+            else if (fieldType == typeof(short) ||
+                fieldType == typeof(ushort) ||
+                fieldType == typeof(char))
+                accessorType = FieldAccessorType.StaticValueTypeSize2;
+            else if (fieldType == typeof(int) ||
+                fieldType == typeof(uint) ||
+                fieldType == typeof(float))
+                accessorType = FieldAccessorType.StaticValueTypeSize4;
+            else if (fieldType == typeof(long) ||
+                fieldType == typeof(ulong) ||
+                fieldType == typeof(double))
+                accessorType = FieldAccessorType.StaticValueTypeSize8;
+            else if (fieldType == typeof(IntPtr) ||
+                fieldType == typeof(UIntPtr))
+                accessorType = GetIntPtrAccessorTypeForStatic();
+
+            return accessorType;
+        }
+
+        private static FieldAccessorType GetIntPtrAccessorTypeForInstance()
+        {
+            FieldAccessorType accessorType = FieldAccessorType.InstanceValueType;
+
+            if (IntPtr.Size == 4)
+            {
+                accessorType = FieldAccessorType.InstanceValueTypeSize4;
+            }
+            else if (IntPtr.Size == 8)
+            {
+                accessorType = FieldAccessorType.InstanceValueTypeSize8;
+            }
+
+            return accessorType;
+        }
+
+        private static FieldAccessorType GetIntPtrAccessorTypeForStatic()
+        {
+            FieldAccessorType accessorType = FieldAccessorType.StaticValueType;
+
+            if (IntPtr.Size == 4)
+            {
+                accessorType = FieldAccessorType.StaticValueTypeSize4;
+            }
+            else if (IntPtr.Size == 8)
+            {
+                accessorType = FieldAccessorType.StaticValueTypeSize8;
+            }
+
+            return accessorType;
+        }
+
+        private static void ThrowHelperTargetException() => throw new TargetException(SR.RFLCT_Targ_StatFldReqTarg);
+
+        private static void ThrowHelperArgumentException(object target, FieldInfo fieldInfo) =>
+            throw new ArgumentException(SR.Format(SR.Arg_FieldDeclTarget, fieldInfo.Name, fieldInfo.DeclaringType, target.GetType()));
+
+        private static void ThrowHelperFieldAccessException(FieldInfo fieldInfo) =>
+            throw new FieldAccessException(SR.Format(SR.RFLCT_CannotSetInitonlyStaticField, fieldInfo.Name, fieldInfo.DeclaringType));
+
+        private enum FieldAccessorType
+        {
+            InstanceReferenceType,
+            InstanceValueType,
+            InstanceValueTypeSize1,
+            InstanceValueTypeSize2,
+            InstanceValueTypeSize4,
+            InstanceValueTypeSize8,
+            InstancePointerType,
+            StaticReferenceType,
+            StaticValueType,
+            StaticValueTypeSize1,
+            StaticValueTypeSize2,
+            StaticValueTypeSize4,
+            StaticValueTypeSize8,
+            StaticValueTypeBoxed,
+            StaticPointerType,
+            SlowPathUntilClassInitialized,
+            SlowPath,
+            NoInvoke,
+        }
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/GenericParameterAttributes.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/GenericParameterAttributes.cs
index c05d0ec6f200..26bb5d929a4f 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Reflection/GenericParameterAttributes.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/GenericParameterAttributes.cs
@@ -14,5 +14,6 @@ public enum GenericParameterAttributes
         ReferenceTypeConstraint = 0x0004,
         NotNullableValueTypeConstraint = 0x0008,
         DefaultConstructorConstraint = 0x0010,
+        AllowByRefLike = 0x0020,
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodBase.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodBase.cs
index 087f9953a3af..72be9d4897d5 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodBase.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodBase.cs
@@ -190,19 +190,6 @@ internal enum InvokerArgFlags : int
         internal struct ArgumentData<T>
         {
             private T _arg0;
-
-            [UnscopedRef]
-            public Span<T> AsSpan(int length)
-            {
-                Debug.Assert((uint)length <= MaxStackAllocArgCount);
-                return new Span<T>(ref _arg0, length);
-            }
-
-            public void Set(int index, T value)
-            {
-                Debug.Assert((uint)index < MaxStackAllocArgCount);
-                Unsafe.Add(ref _arg0, index) = value;
-            }
         }
 
         // Helper struct to avoid intermediate object[] allocation in calls to the native reflection stack.
@@ -214,10 +201,10 @@ internal ref struct StackAllocatedArguments
         {
             public StackAllocatedArguments(object? obj1, object? obj2, object? obj3, object? obj4)
             {
-                _args.Set(0, obj1);
-                _args.Set(1, obj2);
-                _args.Set(2, obj3);
-                _args.Set(3, obj4);
+                _args[0] = obj1;
+                _args[1] = obj2;
+                _args[2] = obj3;
+                _args[3] = obj4;
             }
 
             internal ArgumentData<object?> _args;
diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodBaseInvoker.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodBaseInvoker.cs
index 12d8c62b0725..531a8ed9f816 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodBaseInvoker.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodBaseInvoker.cs
@@ -118,8 +118,8 @@ internal static void ThrowTargetParameterCountException()
             Debug.Assert(_argCount <= MaxStackAllocArgCount);
 
             StackAllocatedArgumentsWithCopyBack stackArgStorage = default;
-            Span<object?> copyOfArgs = stackArgStorage._args.AsSpan(_argCount);
-            Span<bool> shouldCopyBack = stackArgStorage._shouldCopyBack.AsSpan(_argCount);
+            Span<object?> copyOfArgs = ((Span<object?>)stackArgStorage._args).Slice(0, _argCount);
+            Span<bool> shouldCopyBack = ((Span<bool>)stackArgStorage._shouldCopyBack).Slice(0, _argCount);
 
             object? ret;
             if ((_strategy & InvokerStrategy.StrategyDetermined_ObjSpanArgs) == 0)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodInvoker.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodInvoker.cs
index 68380efb9098..5b4b9048b23f 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodInvoker.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodInvoker.cs
@@ -298,8 +298,8 @@ private void ThrowForBadInvocationFlags()
             Debug.Assert(_argCount <= MaxStackAllocArgCount);
 
             StackAllocatedArgumentsWithCopyBack stackArgStorage = default;
-            Span<object?> copyOfArgs = stackArgStorage._args.AsSpan(_argCount);
-            scoped Span<bool> shouldCopyBack = stackArgStorage._shouldCopyBack.AsSpan(_argCount);
+            Span<object?> copyOfArgs = ((Span<object?>)stackArgStorage._args).Slice(0, _argCount);
+            scoped Span<bool> shouldCopyBack = ((Span<bool>)stackArgStorage._shouldCopyBack).Slice(0, _argCount);
 
             for (int i = 0; i < _argCount; i++)
             {
@@ -332,7 +332,7 @@ private void ThrowForBadInvocationFlags()
         internal object? InvokeDirectByRef(object? obj, object? arg1 = null, object? arg2 = null, object? arg3 = null, object? arg4 = null)
         {
             StackAllocatedArguments stackStorage = new(arg1, arg2, arg3, arg4);
-            return InvokeDirectByRefWithFewArgs(obj, stackStorage._args.AsSpan(_argCount));
+            return InvokeDirectByRefWithFewArgs(obj, ((Span<object?>)stackStorage._args).Slice(0, _argCount));
         }
 
         internal unsafe object? InvokeDirectByRefWithFewArgs(object? obj, Span<object?> copyOfArgs)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodInvokerCommon.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodInvokerCommon.cs
index 0096a9f397c3..d54640f8449d 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodInvokerCommon.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Reflection/MethodInvokerCommon.cs
@@ -96,7 +96,7 @@ internal static void ValidateInvokeTarget(object? target, MethodBase method)
 
             if (!method.DeclaringType!.IsInstanceOfType(target))
             {
-                throw new TargetException(SR.RFLCT_Targ_ITargMismatch);
+                throw new TargetException(SR.Format(SR.RFLCT_Targ_ITargMismatch_WithType, method.DeclaringType, target.GetType()));
             }
         }
 
@@ -113,9 +113,10 @@ ref InvokeFunc_ObjSpanArgs?
                 // If ByRefs are used, we can't use this strategy.
                 strategy |= InvokerStrategy.StrategyDetermined_ObjSpanArgs;
             }
-            else if ((strategy & InvokerStrategy.HasBeenInvoked_ObjSpanArgs) == 0)
+            else if (((strategy & InvokerStrategy.HasBeenInvoked_ObjSpanArgs) == 0) && !Debugger.IsAttached)
             {
-                // The first time, ignoring race conditions, use the slow path.
+                // The first time, ignoring race conditions, use the slow path, except for the case when running under a debugger.
+                // This is a workaround for the debugger issues with understanding exceptions propagation over the slow path.
                 strategy |= InvokerStrategy.HasBeenInvoked_ObjSpanArgs;
             }
             else
@@ -141,9 +142,10 @@ internal static void DetermineStrategy_Obj4Args(
                 // If ByRefs are used, we can't use this strategy.
                 strategy |= InvokerStrategy.StrategyDetermined_Obj4Args;
             }
-            else if ((strategy & InvokerStrategy.HasBeenInvoked_Obj4Args) == 0)
+            else if (((strategy & InvokerStrategy.HasBeenInvoked_Obj4Args) == 0) && !Debugger.IsAttached)
             {
-                // The first time, ignoring race conditions, use the slow path.
+                // The first time, ignoring race conditions, use the slow path, except for the case when running under a debugger.
+                // This is a workaround for the debugger issues with understanding exceptions propagation over the slow path.
                 strategy |= InvokerStrategy.HasBeenInvoked_Obj4Args;
             }
             else
@@ -163,9 +165,10 @@ internal static void DetermineStrategy_RefArgs(
             MethodBase method,
             bool backwardsCompat)
         {
-            if ((strategy & InvokerStrategy.HasBeenInvoked_RefArgs) == 0)
+            if (((strategy & InvokerStrategy.HasBeenInvoked_RefArgs) == 0) && !Debugger.IsAttached)
             {
-                // The first time, ignoring race conditions, use the slow path.
+                // The first time, ignoring race conditions, use the slow path, except for the case when running under a debugger.
+                // This is a workaround for the debugger issues with understanding exceptions propagation over the slow path.
                 strategy |= InvokerStrategy.HasBeenInvoked_RefArgs;
             }
             else
diff --git a/src/libraries/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.cs b/src/libraries/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.cs
index e4aa24dd7edd..7cfe087de94c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.cs
@@ -269,6 +269,11 @@ internal ResourceSet CreateResourceSet(Stream store, Assembly assembly)
             }
         }
 
+        private static Assembly? InternalGetSatelliteAssembly(Assembly mainAssembly, CultureInfo culture, Version? version)
+        {
+            return RuntimeAssembly.InternalGetSatelliteAssembly(mainAssembly, culture, version, throwOnFileNotFound: false);
+        }
+
         [RequiresUnreferencedCode("The CustomResourceTypesSupport feature switch has been enabled for this app which is being trimmed. " +
             "Custom readers as well as custom objects on the resources file are not observable by the trimmer and so required assemblies, types and members may be removed.")]
         private static ResourceSet InternalGetResourceSetFromSerializedData(Stream store, string readerTypeName, string? resSetTypeName, ResourceManager.ResourceManagerMediator mediator)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncVoidMethodBuilder.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncVoidMethodBuilder.cs
index c4719b861327..0caebcee6203 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncVoidMethodBuilder.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncVoidMethodBuilder.cs
@@ -80,13 +80,27 @@ public void SetResult()
                 TplEventSource.Log.TraceOperationEnd(this.Task.Id, AsyncCausalityStatus.Completed);
             }
 
+            // Grab the context. Calling SetResult will complete the builder which can cause the state
+            // to be cleared out of the builder, so we can't touch anything on this builder after calling Set*.
+            // This clearing is done as part of the AsyncStateMachineBox.MoveNext method after it calls
+            // MoveNext on the state machine: it's possible to have a chain of events like this:
+            // Thread 1: Calls AsyncStateMachineBox.MoveNext, which calls StateMachine.MoveNext.
+            // Thread 1: StateMachine.MoveNext hooks up a continuation and returns
+            //     Thread 2: That continuation runs and calls AsyncStateMachineBox.MoveNext, which calls SetResult on the builder (below)
+            //               which will result in the state machine task being marked completed.
+            // Thread 1: The original AsyncStateMachineBox.MoveNext call continues and sees that the task is now completed
+            // Thread 1: Clears the builder
+            //     Thread 2: Continues in this call to AsyncVoidMethodBuilder. If it touches anything on this instance, it will be cleared.
+            SynchronizationContext? context = _synchronizationContext;
+
             // Mark the builder as completed.  As this is a void-returning method, this mostly
             // doesn't matter, but it can affect things like debug events related to finalization.
+            // Marking the task completed will also then enable the MoveNext code to clear state.
             _builder.SetResult();
 
-            if (_synchronizationContext != null)
+            if (context != null)
             {
-                NotifySynchronizationContextOfCompletion();
+                NotifySynchronizationContextOfCompletion(context);
             }
         }
 
@@ -106,17 +120,18 @@ public void SetException(Exception exception)
                 TplEventSource.Log.TraceOperationEnd(this.Task.Id, AsyncCausalityStatus.Error);
             }
 
-            if (_synchronizationContext != null)
+            SynchronizationContext? context = _synchronizationContext;
+            if (context != null)
             {
                 // If we captured a synchronization context, Post the throwing of the exception to it
                 // and decrement its outstanding operation count.
                 try
                 {
-                    Task.ThrowAsync(exception, targetContext: _synchronizationContext);
+                    Task.ThrowAsync(exception, targetContext: context);
                 }
                 finally
                 {
-                    NotifySynchronizationContextOfCompletion();
+                    NotifySynchronizationContextOfCompletion(context);
                 }
             }
             else
@@ -132,12 +147,12 @@ public void SetException(Exception exception)
         }
 
         /// <summary>Notifies the current synchronization context that the operation completed.</summary>
-        private void NotifySynchronizationContextOfCompletion()
+        private static void NotifySynchronizationContextOfCompletion(SynchronizationContext context)
         {
-            Debug.Assert(_synchronizationContext != null, "Must only be used with a non-null context.");
+            Debug.Assert(context != null, "Must only be used with a non-null context.");
             try
             {
-                _synchronizationContext.OperationCompleted();
+                context.OperationCompleted();
             }
             catch (Exception exc)
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/ParamCollectionAttribute.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/ParamCollectionAttribute.cs
new file mode 100644
index 000000000000..5ca0d96ecb62
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/ParamCollectionAttribute.cs
@@ -0,0 +1,13 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Runtime.CompilerServices
+{
+    /// <summary>
+    /// Indicates that a method will allow a variable number of arguments in its invocation.
+    /// </summary>
+    [AttributeUsage(AttributeTargets.Parameter, Inherited = true, AllowMultiple = false)]
+    public sealed class ParamCollectionAttribute : Attribute
+    {
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeFeature.NonNativeAot.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeFeature.NonNativeAot.cs
index 1ae0005d941e..4495e552342c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeFeature.NonNativeAot.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeFeature.NonNativeAot.cs
@@ -1,10 +1,13 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Diagnostics.CodeAnalysis;
+
 namespace System.Runtime.CompilerServices
 {
     public static partial class RuntimeFeature
     {
+        [FeatureSwitchDefinition("System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeSupported")]
         public static bool IsDynamicCodeSupported
         {
 #if MONO
@@ -13,6 +16,7 @@ public static bool IsDynamicCodeSupported
             get;
         } = AppContext.TryGetSwitch("System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeSupported", out bool isDynamicCodeSupported) ? isDynamicCodeSupported : true;
 
+        [FeatureGuard(typeof(RequiresDynamicCodeAttribute))]
         public static bool IsDynamicCodeCompiled
         {
 #if MONO
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeFeature.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeFeature.cs
index e69436494251..392a4902337a 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeFeature.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeFeature.cs
@@ -32,6 +32,11 @@ public static partial class RuntimeFeature
         /// </summary>
         public const string ByRefFields = nameof(ByRefFields);
 
+        /// <summary>
+        /// Represents a runtime feature where byref-like types can be used in Generic parameters.
+        /// </summary>
+        public const string ByRefLikeGenerics = nameof(ByRefLikeGenerics);
+
         /// <summary>
         /// Indicates that this version of runtime supports virtual static members of interfaces.
         /// </summary>
@@ -52,6 +57,7 @@ public static bool IsSupported(string feature)
                 case PortablePdb:
                 case CovariantReturnsOfClasses:
                 case ByRefFields:
+                case ByRefLikeGenerics:
                 case UnmanagedSignatureCallingConvention:
                 case DefaultImplementationsOfInterfaces:
                 case VirtualStaticsInInterfaces:
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs
index cf2135908514..d33471813491 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs
@@ -9,7 +9,11 @@ namespace System.Runtime.CompilerServices
     public static partial class RuntimeHelpers
     {
         // The special dll name to be used for DllImport of QCalls
+#if NATIVEAOT
+        internal const string QCall = "*";
+#else
         internal const string QCall = "QCall";
+#endif
 
         public delegate void TryCode(object? userData);
 
@@ -50,7 +54,7 @@ public static T[] GetSubArray<T>(T[] array, Range range)
             }
 
             // In either case, the newly-allocated array is the exact same type as the
-            // original incoming array. It's safe for us to Buffer.Memmove the contents
+            // original incoming array. It's safe for us to SpanHelpers.Memmove the contents
             // from the source array to the destination array, otherwise the contents
             // wouldn't have been valid for the source array in the first place.
 
@@ -125,9 +129,6 @@ internal static bool IsPrimitiveType(this CorElementType et)
 
         [Intrinsic]
         internal static bool IsKnownConstant(char t) => false;
-
-        [Intrinsic]
-        internal static bool IsKnownConstant(int t) => false;
 #pragma warning restore IDE0060
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/Unsafe.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/Unsafe.cs
index b6dc25bb4364..901d354cfc7c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/Unsafe.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/Unsafe.cs
@@ -3,6 +3,7 @@
 
 #pragma warning disable IDE0060 // implementations provided as intrinsics
 using System;
+using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Runtime.Versioning;
 
@@ -907,5 +908,32 @@ public static ref T Unbox<T>(object box)
             // unbox !!T
             // ret
         }
+
+
+        // Internal helper methods:
+
+        // Determines if the address is aligned at least to `alignment` bytes.
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static bool IsOpportunisticallyAligned<T>(ref readonly T address, nuint alignment)
+        {
+            // `alignment` is expected to be a power of 2 in bytes.
+            // We use Unsafe.AsPointer to convert to a pointer,
+            // GC will keep alignment when moving objects (up to sizeof(void*)),
+            // otherwise alignment should be considered a hint if not pinned.
+            Debug.Assert(nuint.IsPow2(alignment));
+            return ((nuint)AsPointer(ref AsRef(in address)) & (alignment - 1)) == 0;
+        }
+
+        // Determines the misalignment of the address with respect to the specified `alignment`.
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static nuint OpportunisticMisalignment<T>(ref readonly T address, nuint alignment)
+        {
+            // `alignment` is expected to be a power of 2 in bytes.
+            // We use Unsafe.AsPointer to convert to a pointer,
+            // GC will keep alignment when moving objects (up to sizeof(void*)),
+            // otherwise alignment should be considered a hint if not pinned.
+            Debug.Assert(nuint.IsPow2(alignment));
+            return (nuint)AsPointer(ref AsRef(in address)) & (alignment - 1);
+        }
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/ComTypes/ITypeLib2.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/ComTypes/ITypeLib2.cs
index da3dd8411a50..0253739e8da8 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/ComTypes/ITypeLib2.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/ComTypes/ITypeLib2.cs
@@ -25,9 +25,9 @@ public interface ITypeLib2 : ITypeLib
         [PreserveSig]
         new void ReleaseTLibAttr(IntPtr pTLibAttr);
         void GetCustData(ref Guid guid, out object pVarVal);
+        void GetLibStatistics(IntPtr pcUniqueNames, out int pcchUniqueNames);
         [LCIDConversion(1)]
         void GetDocumentation2(int index, out string pbstrHelpString, out int pdwHelpStringContext, out string pbstrHelpStringDll);
-        void GetLibStatistics(IntPtr pcUniqueNames, out int pcchUniqueNames);
         void GetAllCustData(IntPtr pCustData);
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/GCHandle.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/GCHandle.cs
index 7a45e868a3d0..14151c1c0270 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/GCHandle.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/GCHandle.cs
@@ -127,6 +127,7 @@ public readonly IntPtr AddrOfPinnedObject()
 
             unsafe
             {
+                // Unsafe.AsPointer calls are safe since object is pinned.
                 if (RuntimeHelpers.ObjectHasComponentSize(target))
                 {
                     if (target.GetType() == typeof(string))
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.cs
index 109efc9da447..74ef8257749a 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.cs
@@ -165,6 +165,7 @@ public static unsafe IntPtr UnsafeAddrOfPinnedArrayElement(Array arr, int index)
         {
             ArgumentNullException.ThrowIfNull(arr);
 
+            // Unsafe.AsPointer is safe since array must be pinned
             void* pRawData = Unsafe.AsPointer(ref MemoryMarshal.GetArrayDataReference(arr));
             return (IntPtr)((byte*)pRawData + (uint)index * (nuint)arr.GetElementSize());
         }
@@ -173,6 +174,7 @@ public static unsafe IntPtr UnsafeAddrOfPinnedArrayElement<T>(T[] arr, int index
         {
             ArgumentNullException.ThrowIfNull(arr);
 
+            // Unsafe.AsPointer is safe since array must be pinned
             void* pRawData = Unsafe.AsPointer(ref MemoryMarshal.GetArrayDataReference(arr));
 #pragma warning disable 8500 // sizeof of managed types
             return (IntPtr)((byte*)pRawData + (uint)index * (nuint)sizeof(T));
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/AnsiStringMarshaller.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/AnsiStringMarshaller.cs
index 4588f6912efd..34cfccd08755 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/AnsiStringMarshaller.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/AnsiStringMarshaller.cs
@@ -87,6 +87,7 @@ public void FromManaged(string? managed, Span<byte> buffer)
                     }
                 }
 
+                // Unsafe.AsPointer is safe since buffer must be pinned
                 _unmanagedValue = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(buffer));
 
                 Marshal.GetAnsiStringBytes(managed, buffer); // Includes null terminator
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/ArrayMarshaller.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/ArrayMarshaller.cs
index 0800b623d449..823c31917e76 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/ArrayMarshaller.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/ArrayMarshaller.cs
@@ -173,7 +173,11 @@ public void FromManaged(T[]? array, Span<TUnmanagedElement> buffer)
             /// Returns the unmanaged value representing the array.
             /// </summary>
             /// <returns>A pointer to the beginning of the unmanaged value.</returns>
-            public TUnmanagedElement* ToUnmanaged() => (TUnmanagedElement*)Unsafe.AsPointer(ref GetPinnableReference());
+            public TUnmanagedElement* ToUnmanaged()
+            {
+                // Unsafe.AsPointer is safe since buffer must be pinned
+                return (TUnmanagedElement*)Unsafe.AsPointer(ref GetPinnableReference());
+            }
 
             /// <summary>
             /// Frees resources.
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/BStrStringMarshaller.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/BStrStringMarshaller.cs
index 38b033d0a9bd..561f8ce8de31 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/BStrStringMarshaller.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/BStrStringMarshaller.cs
@@ -86,6 +86,7 @@ public void FromManaged(string? managed, Span<byte> buffer)
                 else
                 {
                     // Set length and update buffer target
+                    // Unsafe.AsPointer is safe since buffer must be pinned
                     byte* pBuffer = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(buffer));
                     *((uint*)pBuffer) = (uint)lengthInBytes;
                     ptrToFirstChar = (ushort*)(pBuffer + sizeof(uint));
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/ComVariant.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/ComVariant.cs
index af1c58b9e97a..3b9640e285ab 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/ComVariant.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/ComVariant.cs
@@ -404,7 +404,11 @@ public static unsafe ComVariant CreateRaw<T>(VarEnum vt, T rawValue)
                 (VarEnum.VT_UNKNOWN or VarEnum.VT_DISPATCH or VarEnum.VT_LPSTR or VarEnum.VT_BSTR or VarEnum.VT_LPWSTR or VarEnum.VT_SAFEARRAY
                     or VarEnum.VT_CLSID or VarEnum.VT_STREAM or VarEnum.VT_STREAMED_OBJECT or VarEnum.VT_STORAGE or VarEnum.VT_STORED_OBJECT or VarEnum.VT_CF or VT_VERSIONED_STREAM, _) when sizeof(T) == nint.Size => rawValue,
                 (VarEnum.VT_CY or VarEnum.VT_FILETIME, 8) => rawValue,
-                (VarEnum.VT_RECORD, _) when sizeof(T) == sizeof(Record) => rawValue,
+
+                // VT_RECORDs are weird in that regardless of whether the VT_BYREF flag is set or not
+                // they have the same internal representation.
+                (VarEnum.VT_RECORD or VarEnum.VT_RECORD | VarEnum.VT_BYREF, _) when sizeof(T) == sizeof(Record) => rawValue,
+
                 _ when vt.HasFlag(VarEnum.VT_BYREF) && sizeof(T) == nint.Size => rawValue,
                 _ when vt.HasFlag(VarEnum.VT_VECTOR) && sizeof(T) == sizeof(Vector<byte>) => rawValue,
                 _ when vt.HasFlag(VarEnum.VT_ARRAY) && sizeof(T) == nint.Size => rawValue,
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/PointerArrayMarshaller.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/PointerArrayMarshaller.cs
index ee7a3a134229..846879583d5a 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/PointerArrayMarshaller.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/PointerArrayMarshaller.cs
@@ -174,7 +174,11 @@ public void FromManaged(T*[]? array, Span<TUnmanagedElement> buffer)
             /// Returns the unmanaged value representing the array.
             /// </summary>
             /// <returns>A pointer to the beginning of the unmanaged value.</returns>
-            public TUnmanagedElement* ToUnmanaged() => (TUnmanagedElement*)Unsafe.AsPointer(ref GetPinnableReference());
+            public TUnmanagedElement* ToUnmanaged()
+            {
+                // Unsafe.AsPointer is safe since buffer must be pinned
+                return (TUnmanagedElement*)Unsafe.AsPointer(ref GetPinnableReference());
+            }
 
             /// <summary>
             /// Frees resources.
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/ReadOnlySpanMarshaller.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/ReadOnlySpanMarshaller.cs
index 8fe502608dce..bab60b629e31 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/ReadOnlySpanMarshaller.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/ReadOnlySpanMarshaller.cs
@@ -142,7 +142,11 @@ public void FromManaged(ReadOnlySpan<T> managed, Span<TUnmanagedElement> buffer)
             /// <summary>
             /// Returns the unmanaged value representing the array.
             /// </summary>
-            public TUnmanagedElement* ToUnmanaged() => (TUnmanagedElement*)Unsafe.AsPointer(ref GetPinnableReference());
+            public TUnmanagedElement* ToUnmanaged()
+            {
+                // Unsafe.AsPointer is safe since buffer must be pinned
+                return (TUnmanagedElement*)Unsafe.AsPointer(ref GetPinnableReference());
+            }
 
             /// <summary>
             /// Frees resources.
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/SpanMarshaller.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/SpanMarshaller.cs
index a9d42299848d..fb8aa49b12b6 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/SpanMarshaller.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/SpanMarshaller.cs
@@ -170,7 +170,11 @@ public void FromManaged(Span<T> managed, Span<TUnmanagedElement> buffer)
             /// <summary>
             /// Returns the unmanaged value representing the array.
             /// </summary>
-            public TUnmanagedElement* ToUnmanaged() => (TUnmanagedElement*)Unsafe.AsPointer(ref GetPinnableReference());
+            public TUnmanagedElement* ToUnmanaged()
+            {
+                // Unsafe.AsPointer is safe since buffer must be pinned
+                return (TUnmanagedElement*)Unsafe.AsPointer(ref GetPinnableReference());
+            }
 
             /// <summary>
             /// Frees resources.
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/Utf8StringMarshaller.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/Utf8StringMarshaller.cs
index e6d529392bc9..ee231616eaad 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/Utf8StringMarshaller.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshalling/Utf8StringMarshaller.cs
@@ -91,6 +91,7 @@ public void FromManaged(string? managed, Span<byte> buffer)
                     }
                 }
 
+                // Unsafe.AsPointer is safe since buffer must be pinned
                 _unmanagedValue = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(buffer));
 
                 int byteCount = Encoding.UTF8.GetBytes(managed, buffer);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/MemoryMarshal.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/MemoryMarshal.cs
index 2fc9946f0380..0b0da448e4eb 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/MemoryMarshal.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/MemoryMarshal.cs
@@ -87,7 +87,6 @@ public static Memory<T> AsMemory<T>(ReadOnlyMemory<T> memory) =>
         /// </summary>
         public static ref T GetReference<T>(ReadOnlySpan<T> span) => ref span._reference;
 
-#pragma warning disable IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
         /// <summary>
         /// Returns a reference to the 0th element of the Span. If the Span is empty, returns a reference to fake non-null pointer. Such a reference can be used
         /// for pinning but must never be dereferenced. This is useful for interop with methods that do not accept null pointers for zero-sized buffers.
@@ -101,7 +100,6 @@ public static Memory<T> AsMemory<T>(ReadOnlyMemory<T> memory) =>
         /// </summary>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static unsafe ref T GetNonNullPinnableReference<T>(ReadOnlySpan<T> span) => ref (span.Length != 0) ? ref Unsafe.AsRef(in span._reference) : ref Unsafe.AsRef<T>((void*)1);
-#pragma warning restore IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
 
         /// <summary>
         /// Casts a Span of one primitive type <typeparamref name="TFrom"/> to another primitive type <typeparamref name="TTo"/>.
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeMemory.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeMemory.cs
index 069d67e5e462..7fb4af35480a 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeMemory.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeMemory.cs
@@ -61,7 +61,7 @@ public static unsafe void Clear(void* ptr, nuint byteCount)
         [CLSCompliant(false)]
         public static void Copy(void* source, void* destination, nuint byteCount)
         {
-            Buffer.Memmove(ref *(byte*)destination, ref *(byte*)source, byteCount);
+            SpanHelpers.Memmove(ref *(byte*)destination, ref *(byte*)source, byteCount);
         }
 
         /// <summary>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/SafeBuffer.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/SafeBuffer.cs
index d35b5dd174fc..76858298feb2 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/SafeBuffer.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/SafeBuffer.cs
@@ -194,7 +194,7 @@ public T Read<T>(ulong byteOffset) where T : struct
             {
                 DangerousAddRef(ref mustCallRelease);
 
-                Buffer.Memmove(ref Unsafe.As<T, byte>(ref value), ref *ptr, sizeofT);
+                SpanHelpers.Memmove(ref Unsafe.As<T, byte>(ref value), ref *ptr, sizeofT);
             }
             finally
             {
@@ -281,7 +281,7 @@ public void Write<T>(ulong byteOffset, T value) where T : struct
             {
                 DangerousAddRef(ref mustCallRelease);
 
-                Buffer.Memmove(ref *ptr, ref Unsafe.As<T, byte>(ref value), sizeofT);
+                SpanHelpers.Memmove(ref *ptr, ref Unsafe.As<T, byte>(ref value), sizeofT);
             }
             finally
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs
index 489f977ebe0b..3202d6dc94a6 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs
@@ -1550,9 +1550,6 @@ internal Arm64() { }
             /// </summary>
             public static Vector128<ulong> InsertSelectedScalar(Vector128<ulong> result, [ConstantExpected(Max = (byte)(1))] byte resultIndex, Vector128<ulong> value, [ConstantExpected(Max = (byte)(1))] byte valueIndex) { throw new PlatformNotSupportedException(); }
 
-#if false
-            // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
             /// <summary>
             ///   A64: LD2 { Vn.16B, Vn+1.16B }[Vm], [Xn]
             /// </summary>
@@ -1702,7 +1699,6 @@ internal Arm64() { }
             ///   A64: LD4 { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }[Vm], [Xn]
             /// </summary>
             public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadAndInsertScalar((Vector128<double>, Vector128<double>, Vector128<double>, Vector128<double>) values, [ConstantExpected(Max = (byte)(1))] byte index, double* address) { throw new PlatformNotSupportedException(); }
-#endif
 
             /// <summary>
             /// float64x2_t vld1q_dup_f64 (float64_t const * ptr)
@@ -1722,9 +1718,6 @@ internal Arm64() { }
             /// </summary>
             public static unsafe Vector128<ulong> LoadAndReplicateToVector128(ulong* address) { throw new PlatformNotSupportedException(); }
 
-#if false
-            // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
             /// <summary>
             ///   A64: LD2R { Vn.16B, Vn+1.16B }, [Xn]
             /// </summary>
@@ -1874,7 +1867,6 @@ internal Arm64() { }
             ///   A64: LD4R { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }, [Xn]
             /// </summary>
             public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadAndReplicateToVector128x4(double* address) { throw new PlatformNotSupportedException(); }
-#endif
 
             /// <summary>
             ///   A64: LDP Dt1, Dt2, [Xn]
@@ -2106,9 +2098,6 @@ internal Arm64() { }
             /// </summary>
             public static unsafe (Vector128<ulong> Value1, Vector128<ulong> Value2) LoadPairVector128NonTemporal(ulong* address) { throw new PlatformNotSupportedException(); }
 
-#if false
-            // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
             /// <summary>
             ///   A64: LD2 { Vn.16B, Vn+1.16B }, [Xn]
             /// </summary>
@@ -2408,7 +2397,6 @@ internal Arm64() { }
             ///   A64: LD1 { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }, [Xn]
             /// </summary>
             public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadVector128x4(double* address) { throw new PlatformNotSupportedException(); }
-#endif
 
             /// <summary>
             /// float64x2_t vmaxq_f64 (float64x2_t a, float64x2_t b)
@@ -3788,9 +3776,6 @@ internal Arm64() { }
             /// </summary>
             public static unsafe void StorePairScalarNonTemporal(uint* address, Vector64<uint> value1, Vector64<uint> value2) { throw new PlatformNotSupportedException(); }
 
-#if false
-            // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
             /// <summary>
             /// void vst2_lane_s8 (int8_t * ptr, int8x16x2_t val, const int lane)
             ///   A64: ST2 { Vt.16B, Vt+1.16B }[index], [Xn]
@@ -4261,7 +4246,6 @@ internal Arm64() { }
             ///   A64: ST1 { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }, [Xn]
             /// </summary>
             public static unsafe void StoreVector128x4(double* address, (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) value) { throw new PlatformNotSupportedException(); }
-#endif
 
             /// <summary>
             /// float64x2_t vsubq_f64 (float64x2_t a, float64x2_t b)
@@ -9176,9 +9160,6 @@ internal Arm64() { }
         /// </summary>
         public static unsafe Vector128<ulong> LoadAndInsertScalar(Vector128<ulong> value, [ConstantExpected(Max = (byte)(1))] byte index, ulong* address) { throw new PlatformNotSupportedException(); }
 
-#if false
-        // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
         /// <summary>
         ///   A64: LD2 { Vn.8B, Vn+1.8B }[Vm], [Xn]
         /// </summary>
@@ -9283,7 +9264,6 @@ internal Arm64() { }
         ///   A64: LD4 { Vn.2S, Vn+1.2S, Vn+2.2S, Vn+3.2S }[Vm], [Xn]
         /// </summary>
         public static unsafe (Vector64<float> Value1, Vector64<float> Value2, Vector64<float> Value3, Vector64<float> Value4) LoadAndInsertScalar((Vector64<float>, Vector64<float>, Vector64<float>, Vector64<float>) values, [ConstantExpected(Max = (byte)(1))] byte index, float* address) { throw new PlatformNotSupportedException(); }
-#endif
 
         /// <summary>
         /// uint8x8_t vld1_dup_u8 (uint8_t const * ptr)
@@ -9383,9 +9363,6 @@ internal Arm64() { }
         /// </summary>
         public static unsafe Vector128<uint> LoadAndReplicateToVector128(uint* address) { throw new PlatformNotSupportedException(); }
 
-#if false
-        // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
         /// <summary>
         ///   A64: LD2R { Vn.8B, Vn+1.8B }, [Xn]
         /// </summary>
@@ -9490,7 +9467,6 @@ internal Arm64() { }
         ///   A64: LD4R { Vn.2S, Vn+1.2S, Vn+2.2S, Vn+3.2S }, [Xn]
         /// </summary>
         public static unsafe (Vector64<float> Value1, Vector64<float> Value2, Vector64<float> Value3, Vector64<float> Value4) LoadAndReplicateToVector64x4(float* address) { throw new PlatformNotSupportedException(); }
-#endif
 
         /// <summary>
         /// uint8x8_t vld1_u8 (uint8_t const * ptr)
@@ -9632,9 +9608,6 @@ internal Arm64() { }
         /// </summary>
         public static unsafe Vector128<ulong> LoadVector128(ulong* address) { throw new PlatformNotSupportedException(); }
 
-#if false
-        // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
         /// <summary>
         ///   A64: LD2 { Vn.8B, Vn+1.8B }, [Xn]
         /// </summary>
@@ -9844,7 +9817,6 @@ internal Arm64() { }
         ///   A64: LD1 { Vn.2S, Vn+1.2S, Vn+2.2S, Vn+3.2S }, [Xn]
         /// </summary>
         public static unsafe (Vector64<float> Value1, Vector64<float> Value2, Vector64<float> Value3, Vector64<float> Value4) LoadVector64x4(float* address) { throw new PlatformNotSupportedException(); }
-#endif
 
         /// <summary>
         /// uint8x8_t vmax_u8 (uint8x8_t a, uint8x8_t b)
@@ -15956,9 +15928,6 @@ internal Arm64() { }
         /// </summary>
         public static unsafe void StoreSelectedScalar(ulong* address, Vector128<ulong> value, [ConstantExpected(Max = (byte)(1))] byte index) { throw new PlatformNotSupportedException(); }
 
-#if false
-        // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
         /// <summary>
         ///   A64: ST2 { Vt.8B, Vt+1.8B }[index], [Xn]
         /// </summary>
@@ -16273,7 +16242,6 @@ internal Arm64() { }
         ///   A64: ST1 { Vn.2S, Vn+1.2S, Vn+2.2S, Vn+3.2S }, [Xn]
         /// </summary>
         public static unsafe void StoreVector64x4(float* address, (Vector64<float> Value1, Vector64<float> Value2, Vector64<float> Value3, Vector64<float> Value4) value) { throw new PlatformNotSupportedException(); }
-#endif
 
         /// <summary>
         /// uint8x8_t vsub_u8 (uint8x8_t a, uint8x8_t b)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs
index 9b58258f66dd..d343750838cd 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs
@@ -1548,9 +1548,6 @@ internal Arm64() { }
             /// </summary>
             public static Vector128<ulong> InsertSelectedScalar(Vector128<ulong> result, [ConstantExpected(Max = (byte)(1))] byte resultIndex, Vector128<ulong> value, [ConstantExpected(Max = (byte)(1))] byte valueIndex) => Insert(result, resultIndex, Extract(value, valueIndex));
 
-#if false
-            // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-        
             /// <summary>
             ///   A64: LD2 { Vn.16B, Vn+1.16B }[Vm], [Xn]
             /// </summary>
@@ -1700,7 +1697,7 @@ internal Arm64() { }
             ///   A64: LD4 { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }[Vm], [Xn]
             /// </summary>
             public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadAndInsertScalar((Vector128<double>, Vector128<double>, Vector128<double>, Vector128<double>) values, [ConstantExpected(Max = (byte)(1))] byte index, double* address) => LoadAndInsertScalar(values, index, address);
-#endif
+
             /// <summary>
             /// float64x2_t vld1q_dup_f64 (float64_t const * ptr)
             ///   A64: LD1R { Vt.2D }, [Xn]
@@ -1719,9 +1716,6 @@ internal Arm64() { }
             /// </summary>
             public static unsafe Vector128<ulong> LoadAndReplicateToVector128(ulong* address) => LoadAndReplicateToVector128(address);
 
-#if false
-            // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
             /// <summary>
             ///   A64: LD2R { Vn.16B, Vn+1.16B }, [Xn]
             /// </summary>
@@ -1871,7 +1865,6 @@ internal Arm64() { }
             ///   A64: LD4R { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }, [Xn]
             /// </summary>
             public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadAndReplicateToVector128x4(double* address) => LoadAndReplicateToVector128x4(address);
-#endif
 
             /// <summary>
             ///   A64: LDP Dt1, Dt2, [Xn]
@@ -2103,9 +2096,6 @@ internal Arm64() { }
             /// </summary>
             public static unsafe (Vector128<ulong> Value1, Vector128<ulong> Value2) LoadPairVector128NonTemporal(ulong* address) => LoadPairVector128NonTemporal(address);
 
-#if false
-            // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
             /// <summary>
             ///   A64: LD2 { Vn.16B, Vn+1.16B }, [Xn]
             /// </summary>
@@ -2405,7 +2395,6 @@ internal Arm64() { }
             ///   A64: LD1 { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }, [Xn]
             /// </summary>
             public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadVector128x4(double* address) => LoadVector128x4(address);
-#endif
 
             /// <summary>
             /// float64x2_t vmaxq_f64 (float64x2_t a, float64x2_t b)
@@ -3785,9 +3774,6 @@ internal Arm64() { }
             /// </summary>
             public static unsafe void StorePairScalarNonTemporal(uint* address, Vector64<uint> value1, Vector64<uint> value2) => StorePairScalarNonTemporal(address, value1, value2);
 
-#if false
-            // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
             /// <summary>
             /// void vst2_lane_s8 (int8_t * ptr, int8x16x2_t val, const int lane)
             ///   A64: ST2 { Vt.16B, Vt+1.16B }[index], [Xn]
@@ -4258,7 +4244,6 @@ internal Arm64() { }
             ///   A64: ST1 { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }, [Xn]
             /// </summary>
             public static unsafe void StoreVector128x4(double* address, (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) value) => StoreVector128x4(address, value);
-#endif
 
             /// <summary>
             /// float64x2_t vsubq_f64 (float64x2_t a, float64x2_t b)
@@ -9172,9 +9157,6 @@ internal Arm64() { }
         /// </summary>
         public static unsafe Vector128<ulong> LoadAndInsertScalar(Vector128<ulong> value, [ConstantExpected(Max = (byte)(1))] byte index, ulong* address) => LoadAndInsertScalar(value, index, address);
 
-#if false
-        // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
         /// <summary>
         ///   A64: LD2 { Vn.8B, Vn+1.8B }[Vm], [Xn]
         /// </summary>
@@ -9279,7 +9261,6 @@ internal Arm64() { }
         ///   A64: LD4 { Vn.2S, Vn+1.2S, Vn+2.2S, Vn+3.2S }[Vm], [Xn]
         /// </summary>
         public static unsafe (Vector64<float> Value1, Vector64<float> Value2, Vector64<float> Value3, Vector64<float> Value4) LoadAndInsertScalar((Vector64<float>, Vector64<float>, Vector64<float>, Vector64<float>) values, [ConstantExpected(Max = (byte)(1))] byte index, float* address) => LoadAndInsertScalar(values, index, address);
-#endif
 
         /// <summary>
         /// uint8x8_t vld1_dup_u8 (uint8_t const * ptr)
@@ -9379,9 +9360,6 @@ internal Arm64() { }
         /// </summary>
         public static unsafe Vector128<uint> LoadAndReplicateToVector128(uint* address) => LoadAndReplicateToVector128(address);
 
-#if false
-        // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
         /// <summary>
         ///   A64: LD2R { Vn.8B, Vn+1.8B }, [Xn]
         /// </summary>
@@ -9486,7 +9464,6 @@ internal Arm64() { }
         ///   A64: LD4R { Vn.2S, Vn+1.2S, Vn+2.2S, Vn+3.2S }, [Xn]
         /// </summary>
         public static unsafe (Vector64<float> Value1, Vector64<float> Value2, Vector64<float> Value3, Vector64<float> Value4) LoadAndReplicateToVector64x4(float* address) => LoadAndReplicateToVector64x4(address);
-#endif
 
         /// <summary>
         /// uint8x8_t vld1_u8 (uint8_t const * ptr)
@@ -9628,9 +9605,6 @@ internal Arm64() { }
         /// </summary>
         public static unsafe Vector128<ulong> LoadVector128(ulong* address) => LoadVector128(address);
 
-#if false
-        // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
         /// <summary>
         ///   A64: LD2 { Vn.8B, Vn+1.8B }, [Xn]
         /// </summary>
@@ -9840,7 +9814,6 @@ internal Arm64() { }
         ///   A64: LD1 { Vn.2S, Vn+1.2S, Vn+2.2S, Vn+3.2S }, [Xn]
         /// </summary>
         public static unsafe (Vector64<float> Value1, Vector64<float> Value2, Vector64<float> Value3, Vector64<float> Value4) LoadVector64x4(float* address) => LoadVector64x4(address);
-#endif
 
         /// <summary>
         /// uint8x8_t vmax_u8 (uint8x8_t a, uint8x8_t b)
@@ -15952,9 +15925,6 @@ internal Arm64() { }
         /// </summary>
         public static unsafe void StoreSelectedScalar(ulong* address, Vector128<ulong> value, [ConstantExpected(Max = (byte)(1))] byte index) => StoreSelectedScalar(address, value, index);
 
-#if false
-        // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-
         /// <summary>
         ///   A64: ST2 { Vt.8B, Vt+1.8B }[index], [Xn]
         /// </summary>
@@ -16269,7 +16239,6 @@ internal Arm64() { }
         ///   A64: ST1 { Vn.2S, Vn+1.2S, Vn+2.2S, Vn+3.2S }, [Xn]
         /// </summary>
         public static unsafe void StoreVector64x4(float* address, (Vector64<float> Value1, Vector64<float> Value2, Vector64<float> Value3, Vector64<float> Value4) value) => StoreVector64x4(address, value);
-#endif
 
         /// <summary>
         /// uint8x8_t vsub_u8 (uint8x8_t a, uint8x8_t b)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs
new file mode 100644
index 000000000000..868300bf14ac
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Enums.cs
@@ -0,0 +1,95 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Runtime.Intrinsics.Arm
+{
+    // Used to specify or limit the number of elements used within an method.
+    // Matches the field "pattern" within the Arm Architecture Reference Manual
+    public enum SveMaskPattern : byte
+    {
+        /// <summary>
+        /// POW2
+        /// </summary>
+        LargestPowerOf2 = 0,      // The largest power of 2.
+
+        /// <summary>
+        /// VL1
+        /// </summary>
+        VectorCount1 = 1,         // Exactly 1 element.
+
+        /// <summary>
+        /// VL2
+        /// </summary>
+        VectorCount2 = 2,         // Exactly 2 elements.
+
+        /// <summary>
+        /// VL3
+        /// </summary>
+        VectorCount3 = 3,         // Exactly 3 elements.
+
+        /// <summary>
+        /// VL4
+        /// </summary>
+        VectorCount4 = 4,         // Exactly 4 elements.
+
+        /// <summary>
+        /// VL5
+        /// </summary>
+        VectorCount5 = 5,         // Exactly 5 elements.
+
+        /// <summary>
+        /// VL6
+        /// </summary>
+        VectorCount6 = 6,         // Exactly 6 elements.
+
+        /// <summary>
+        /// VL7
+        /// </summary>
+        VectorCount7 = 7,         // Exactly 7 elements.
+
+        /// <summary>
+        /// VL8
+        /// </summary>
+        VectorCount8 = 8,         // Exactly 8 elements.
+
+        /// <summary>
+        /// VL16
+        /// </summary>
+        VectorCount16 = 9,        // Exactly 16 elements.
+
+        /// <summary>
+        /// VL32
+        /// </summary>
+        VectorCount32 = 10,       // Exactly 32 elements.
+
+        /// <summary>
+        /// VL64
+        /// </summary>
+        VectorCount64 = 11,       // Exactly 64 elements.
+
+        /// <summary>
+        /// VL128
+        /// </summary>
+        VectorCount128 = 12,      // Exactly 128 elements.
+
+        /// <summary>
+        /// VL256
+        /// </summary>
+        VectorCount256 = 13,      // Exactly 256 elements.
+
+        /// <summary>
+        /// MUL4
+        /// </summary>
+        LargestMultipleOf4 = 29,  // The largest multiple of 4.
+
+        /// <summary>
+        /// MUL3
+        /// </summary>
+        LargestMultipleOf3 = 30,  // The largest multiple of 3.
+
+        /// <summary>
+        /// ALL
+        /// </summary>
+        All  = 31                 // All available (implicitly a multiple of two).
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs
index 3eeb40d5d9de..fbd5ee65ca74 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs
@@ -30,5 +30,170 @@ internal Arm64() { }
 
             public static new bool IsSupported { [Intrinsic] get { return false; } }
         }
+
+        ///  CreateTrueMaskByte : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<byte> CreateTrueMaskByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); }
+
+
+        ///  CreateTrueMaskDouble : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<double> CreateTrueMaskDouble([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); }
+
+
+        ///  CreateTrueMaskInt16 : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<short> CreateTrueMaskInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); }
+
+
+        ///  CreateTrueMaskInt32 : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<int> CreateTrueMaskInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); }
+
+
+        ///  CreateTrueMaskInt64 : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<long> CreateTrueMaskInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); }
+
+
+        ///  CreateTrueMaskSByte : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<sbyte> CreateTrueMaskSByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); }
+
+
+        ///  CreateTrueMaskSingle : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<float> CreateTrueMaskSingle([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); }
+
+
+        ///  CreateTrueMaskUInt16 : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b16(enum svpattern pattern)
+        ///   PTRUE Presult.H, pattern
+        /// </summary>
+        public static unsafe Vector<ushort> CreateTrueMaskUInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); }
+
+
+        ///  CreateTrueMaskUInt32 : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b32(enum svpattern pattern)
+        ///   PTRUE Presult.S, pattern
+        /// </summary>
+        public static unsafe Vector<uint> CreateTrueMaskUInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); }
+
+
+        ///  CreateTrueMaskUInt64 : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b64(enum svpattern pattern)
+        ///   PTRUE Presult.D, pattern
+        /// </summary>
+        public static unsafe Vector<ulong> CreateTrueMaskUInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); }
+
+
+
+        ///  LoadVector : Unextended load
+
+        /// <summary>
+        /// svint8_t svld1[_s8](svbool_t pg, const int8_t *base)
+        ///   LD1B Zresult.B, Pg/Z, [Xarray, Xindex]
+        ///   LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<sbyte> LoadVector(Vector<sbyte> mask, sbyte* address) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// svint16_t svld1[_s16](svbool_t pg, const int16_t *base)
+        ///   LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]
+        ///   LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<short> LoadVector(Vector<short> mask, short* address) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// svint32_t svld1[_s32](svbool_t pg, const int32_t *base)
+        ///   LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]
+        ///   LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<int> LoadVector(Vector<int> mask, int* address) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// svint64_t svld1[_s64](svbool_t pg, const int64_t *base)
+        ///   LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]
+        ///   LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<long> LoadVector(Vector<long> mask, long* address) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// svuint8_t svld1[_u8](svbool_t pg, const uint8_t *base)
+        ///   LD1B Zresult.B, Pg/Z, [Xarray, Xindex]
+        ///   LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<byte> LoadVector(Vector<byte> mask, byte* address) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// svuint16_t svld1[_u16](svbool_t pg, const uint16_t *base)
+        ///   LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]
+        ///   LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<ushort> LoadVector(Vector<ushort> mask, ushort* address) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// svuint32_t svld1[_u32](svbool_t pg, const uint32_t *base)
+        ///   LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]
+        ///   LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<uint> LoadVector(Vector<uint> mask, uint* address) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// svuint64_t svld1[_u64](svbool_t pg, const uint64_t *base)
+        ///   LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]
+        ///   LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<ulong> LoadVector(Vector<ulong> mask, ulong* address) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// svfloat32_t svld1[_f32](svbool_t pg, const float32_t *base)
+        ///   LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]
+        ///   LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<float> LoadVector(Vector<float> mask, float* address) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// svfloat64_t svld1[_f64](svbool_t pg, const float64_t *base)
+        ///   LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]
+        ///   LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<double> LoadVector(Vector<double> mask, double* address) { throw new PlatformNotSupportedException(); }
+
+
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs
index 7a71144e0bc3..6ba2a2c67bc8 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs
@@ -27,5 +27,170 @@ internal Arm64() { }
 
             public static new bool IsSupported { get => IsSupported; }
         }
+
+
+        ///  CreateTrueMaskByte : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<byte> CreateTrueMaskByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskByte(pattern);
+
+
+        ///  CreateTrueMaskDouble : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<double> CreateTrueMaskDouble([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskDouble(pattern);
+
+
+        ///  CreateTrueMaskInt16 : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<short> CreateTrueMaskInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskInt16(pattern);
+
+
+        ///  CreateTrueMaskInt32 : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<int> CreateTrueMaskInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskInt32(pattern);
+
+
+        ///  CreateTrueMaskInt64 : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<long> CreateTrueMaskInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskInt64(pattern);
+
+
+        ///  CreateTrueMaskSByte : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<sbyte> CreateTrueMaskSByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskSByte(pattern);
+
+
+        ///  CreateTrueMaskSingle : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b8(enum svpattern pattern)
+        ///   PTRUE Presult.B, pattern
+        /// </summary>
+        public static unsafe Vector<float> CreateTrueMaskSingle([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskSingle(pattern);
+
+
+        ///  CreateTrueMaskUInt16 : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b16(enum svpattern pattern)
+        ///   PTRUE Presult.H, pattern
+        /// </summary>
+        public static unsafe Vector<ushort> CreateTrueMaskUInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskUInt16(pattern);
+
+
+        ///  CreateTrueMaskUInt32 : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b32(enum svpattern pattern)
+        ///   PTRUE Presult.S, pattern
+        /// </summary>
+        public static unsafe Vector<uint> CreateTrueMaskUInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskUInt32(pattern);
+
+
+        ///  CreateTrueMaskUInt64 : Set predicate elements to true
+
+        /// <summary>
+        /// svbool_t svptrue_pat_b64(enum svpattern pattern)
+        ///   PTRUE Presult.D, pattern
+        /// </summary>
+        public static unsafe Vector<ulong> CreateTrueMaskUInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskUInt64(pattern);
+
+
+
+        ///  LoadVector : Unextended load
+
+        /// <summary>
+        /// svint8_t svld1[_s8](svbool_t pg, const int8_t *base)
+        ///   LD1B Zresult.B, Pg/Z, [Xarray, Xindex]
+        ///   LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<sbyte> LoadVector(Vector<sbyte> mask, sbyte* address) => LoadVector(mask, address);
+
+        /// <summary>
+        /// svint16_t svld1[_s16](svbool_t pg, const int16_t *base)
+        ///   LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]
+        ///   LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<short> LoadVector(Vector<short> mask, short* address) => LoadVector(mask, address);
+
+        /// <summary>
+        /// svint32_t svld1[_s32](svbool_t pg, const int32_t *base)
+        ///   LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]
+        ///   LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<int> LoadVector(Vector<int> mask, int* address) => LoadVector(mask, address);
+
+        /// <summary>
+        /// svint64_t svld1[_s64](svbool_t pg, const int64_t *base)
+        ///   LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]
+        ///   LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<long> LoadVector(Vector<long> mask, long* address) => LoadVector(mask, address);
+
+        /// <summary>
+        /// svuint8_t svld1[_u8](svbool_t pg, const uint8_t *base)
+        ///   LD1B Zresult.B, Pg/Z, [Xarray, Xindex]
+        ///   LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<byte> LoadVector(Vector<byte> mask, byte* address) => LoadVector(mask, address);
+
+        /// <summary>
+        /// svuint16_t svld1[_u16](svbool_t pg, const uint16_t *base)
+        ///   LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]
+        ///   LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<ushort> LoadVector(Vector<ushort> mask, ushort* address) => LoadVector(mask, address);
+
+        /// <summary>
+        /// svuint32_t svld1[_u32](svbool_t pg, const uint32_t *base)
+        ///   LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]
+        ///   LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<uint> LoadVector(Vector<uint> mask, uint* address) => LoadVector(mask, address);
+
+        /// <summary>
+        /// svuint64_t svld1[_u64](svbool_t pg, const uint64_t *base)
+        ///   LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]
+        ///   LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<ulong> LoadVector(Vector<ulong> mask, ulong* address) => LoadVector(mask, address);
+
+        /// <summary>
+        /// svfloat32_t svld1[_f32](svbool_t pg, const float32_t *base)
+        ///   LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]
+        ///   LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<float> LoadVector(Vector<float> mask, float* address) => LoadVector(mask, address);
+
+        /// <summary>
+        /// svfloat64_t svld1[_f64](svbool_t pg, const float64_t *base)
+        ///   LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]
+        ///   LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
+        /// </summary>
+        public static unsafe Vector<double> LoadVector(Vector<double> mask, double* address) => LoadVector(mask, address);
+
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
index f2eae1dad349..1347a082afab 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
@@ -36,6 +36,9 @@ public static unsafe class Vector256
         internal const int Alignment = 8;
 #elif TARGET_ARM64
         internal const int Alignment = 16;
+#elif TARGET_RISCV64
+        // TODO-RISCV64: Update alignment to proper value when we implement RISC-V intrinsic.
+        internal const int Alignment = 16;
 #else
         internal const int Alignment = 32;
 #endif
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
index d18e705c3b13..edb84585a987 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
@@ -36,6 +36,9 @@ public static unsafe class Vector512
         internal const int Alignment = 8;
 #elif TARGET_ARM64
         internal const int Alignment = 16;
+#elif TARGET_RISCV64
+        // TODO-RISCV64: Update alignment to proper value when we implement RISC-V intrinsic.
+        internal const int Alignment = 16;
 #else
         internal const int Alignment = 64;
 #endif
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
index ddd59d1adcab..50e9f697c94f 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
@@ -700,18 +700,16 @@ public static Vector64<T> Create<T>(ReadOnlySpan<T> values)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector64<byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7)
         {
-            byte* pResult = stackalloc byte[8]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-                e4,
-                e5,
-                e6,
-                e7,
-            };
-            return Unsafe.AsRef<Vector64<byte>>(pResult);
+            Unsafe.SkipInit(out Vector64<byte> result);
+            result.SetElementUnsafe(0, e0);
+            result.SetElementUnsafe(1, e1);
+            result.SetElementUnsafe(2, e2);
+            result.SetElementUnsafe(3, e3);
+            result.SetElementUnsafe(4, e4);
+            result.SetElementUnsafe(5, e5);
+            result.SetElementUnsafe(6, e6);
+            result.SetElementUnsafe(7, e7);
+            return result;
         }
 
         /// <summary>Creates a new <see cref="Vector64{Int16}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -725,14 +723,12 @@ public static unsafe Vector64<byte> Create(byte e0, byte e1, byte e2, byte e3, b
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector64<short> Create(short e0, short e1, short e2, short e3)
         {
-            short* pResult = stackalloc short[4]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-            };
-            return Unsafe.AsRef<Vector64<short>>(pResult);
+            Unsafe.SkipInit(out Vector64<short> result);
+            result.SetElementUnsafe(0, e0);
+            result.SetElementUnsafe(1, e1);
+            result.SetElementUnsafe(2, e2);
+            result.SetElementUnsafe(3, e3);
+            return result;
         }
 
         /// <summary>Creates a new <see cref="Vector64{Int32}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -744,12 +740,10 @@ public static unsafe Vector64<short> Create(short e0, short e1, short e2, short
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector64<int> Create(int e0, int e1)
         {
-            int* pResult = stackalloc int[2]
-            {
-                e0,
-                e1,
-            };
-            return Unsafe.AsRef<Vector64<int>>(pResult);
+            Unsafe.SkipInit(out Vector64<int> result);
+            result.SetElementUnsafe(0, e0);
+            result.SetElementUnsafe(1, e1);
+            return result;
         }
 
         /// <summary>Creates a new <see cref="Vector64{SByte}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -768,18 +762,16 @@ public static unsafe Vector64<int> Create(int e0, int e1)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector64<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7)
         {
-            sbyte* pResult = stackalloc sbyte[8]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-                e4,
-                e5,
-                e6,
-                e7,
-            };
-            return Unsafe.AsRef<Vector64<sbyte>>(pResult);
+            Unsafe.SkipInit(out Vector64<sbyte> result);
+            result.SetElementUnsafe(0, e0);
+            result.SetElementUnsafe(1, e1);
+            result.SetElementUnsafe(2, e2);
+            result.SetElementUnsafe(3, e3);
+            result.SetElementUnsafe(4, e4);
+            result.SetElementUnsafe(5, e5);
+            result.SetElementUnsafe(6, e6);
+            result.SetElementUnsafe(7, e7);
+            return result;
         }
 
         /// <summary>Creates a new <see cref="Vector64{Single}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -790,12 +782,10 @@ public static unsafe Vector64<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector64<float> Create(float e0, float e1)
         {
-            float* pResult = stackalloc float[2]
-            {
-                e0,
-                e1,
-            };
-            return Unsafe.AsRef<Vector64<float>>(pResult);
+            Unsafe.SkipInit(out Vector64<float> result);
+            result.SetElementUnsafe(0, e0);
+            result.SetElementUnsafe(1, e1);
+            return result;
         }
 
         /// <summary>Creates a new <see cref="Vector64{UInt16}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -810,14 +800,12 @@ public static unsafe Vector64<float> Create(float e0, float e1)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector64<ushort> Create(ushort e0, ushort e1, ushort e2, ushort e3)
         {
-            ushort* pResult = stackalloc ushort[4]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-            };
-            return Unsafe.AsRef<Vector64<ushort>>(pResult);
+            Unsafe.SkipInit(out Vector64<ushort> result);
+            result.SetElementUnsafe(0, e0);
+            result.SetElementUnsafe(1, e1);
+            result.SetElementUnsafe(2, e2);
+            result.SetElementUnsafe(3, e3);
+            return result;
         }
 
         /// <summary>Creates a new <see cref="Vector64{UInt32}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -830,12 +818,10 @@ public static unsafe Vector64<ushort> Create(ushort e0, ushort e1, ushort e2, us
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector64<uint> Create(uint e0, uint e1)
         {
-            uint* pResult = stackalloc uint[2]
-            {
-                e0,
-                e1,
-            };
-            return Unsafe.AsRef<Vector64<uint>>(pResult);
+            Unsafe.SkipInit(out Vector64<uint> result);
+            result.SetElementUnsafe(0, e0);
+            result.SetElementUnsafe(1, e1);
+            return result;
         }
 
         /// <summary>Creates a new <see cref="Vector64{T}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.PlatformNotSupported.cs
index 6bac345ed9d3..305e7333dbc5 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.PlatformNotSupported.cs
@@ -322,7 +322,16 @@ internal X64() { }
         ///   VCVTUQQ2PS ymm1 {k1}{z}, zmm2/m512/m64bcst
         /// </summary>
         public static Vector256<float> ConvertToVector256Single(Vector512<ulong> value) { throw new PlatformNotSupportedException(); }
-
+        /// <summary>
+        /// __m256 _mm512_cvt_roundepi64_ps (__m512i a, int r)
+        ///   VCVTQQ2PS ymm1, zmm2 {er}
+        /// </summary>
+        public static Vector256<float> ConvertToVector256Single(Vector512<long> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256 _mm512_cvt_roundepu64_ps (__m512i a, int r)
+        ///   VCVTUQQ2PS ymm1, zmm2 {er}
+        /// </summary>
+        public static Vector256<float> ConvertToVector256Single(Vector512<ulong> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m512d _mm512_cvtepi64_pd (__m512i a)
         ///   VCVTQQ2PD zmm1 {k1}{z}, zmm2/m512/m64bcst
@@ -334,6 +343,16 @@ internal X64() { }
         /// </summary>
         public static Vector512<double> ConvertToVector512Double(Vector512<ulong> value) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m512d _mm512_cvt_roundepi64_pd (__m512i a, int r)
+        ///   VCVTQQ2PD zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<double> ConvertToVector512Double(Vector512<long> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512d _mm512_cvt_roundepu64_pd (__m512i a, int r)
+        ///   VCVTUQQ2PD zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<double> ConvertToVector512Double(Vector512<ulong> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m512i _mm512_cvtps_epi64 (__m512 a)
         ///   VCVTPS2QQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
         /// </summary>
@@ -344,6 +363,16 @@ internal X64() { }
         /// </summary>
         public static Vector512<long> ConvertToVector512Int64(Vector512<double> value) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m512i _mm512_cvt_roundps_epi64 (__m512 a, int r)
+        ///   VCVTPS2QQ zmm1, ymm2 {er}
+        /// </summary>
+        public static Vector512<long> ConvertToVector512Int64(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512i _mm512_cvt_roundpd_epi64 (__m512d a, int r)
+        ///   VCVTPD2QQ zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<long> ConvertToVector512Int64(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m512i _mm512_cvttps_epi64 (__m512 a)
         ///   VCVTTPS2QQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
         /// </summary>
@@ -364,6 +393,16 @@ internal X64() { }
         /// </summary>
         public static Vector512<ulong> ConvertToVector512UInt64(Vector512<double> value) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m512i _mm512_cvt_roundps_epu64 (__m512 a, int r)
+        ///   VCVTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
+        /// </summary>
+        public static Vector512<ulong> ConvertToVector512UInt64(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512i _mm512_cvt_roundpd_epu64 (__m512d a, int r)
+        ///   VCVTPD2UQQ zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
+        /// </summary>
+        public static Vector512<ulong> ConvertToVector512UInt64(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m512i _mm512_cvttps_epu64 (__m512 a)
         ///   VCVTTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
         /// </summary>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.cs
index 97898da3ff6e..403a851dbd1f 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512DQ.cs
@@ -323,6 +323,16 @@ internal X64() { }
         ///   VCVTUQQ2PS ymm1 {k1}{z}, zmm2/m512/m64bcst
         /// </summary>
         public static Vector256<float> ConvertToVector256Single(Vector512<ulong> value) => ConvertToVector256Single(value);
+        /// <summary>
+        /// __m256 _mm512_cvt_roundepi64_ps (__m512i a, int r)
+        ///   VCVTQQ2PS ymm1, zmm2 {er}
+        /// </summary>
+        public static Vector256<float> ConvertToVector256Single(Vector512<long> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Single(value, mode);
+        /// <summary>
+        /// __m256 _mm512_cvt_roundepu64_ps (__m512i a, int r)
+        ///   VCVTUQQ2PS ymm1, zmm2 {er}
+        /// </summary>
+        public static Vector256<float> ConvertToVector256Single(Vector512<ulong> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Single(value, mode);
 
         /// <summary>
         /// __m512d _mm512_cvtepi64_pd (__m512i a)
@@ -334,6 +344,17 @@ internal X64() { }
         ///   VCVTUQQ2PD zmm1 {k1}{z}, zmm2/m512/m64bcst
         /// </summary>
         public static Vector512<double> ConvertToVector512Double(Vector512<ulong> value) => ConvertToVector512Double(value);
+        /// <summary>
+        /// __m512d _mm512_cvt_roundepi64_pd (__m512i a, int r)
+        ///   VCVTQQ2PD zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<double> ConvertToVector512Double(Vector512<long> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512Double(value, mode);
+        /// <summary>
+        /// __m512d _mm512_cvt_roundepu64_pd (__m512i a, int r)
+        ///   VCVTUQQ2PD zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<double> ConvertToVector512Double(Vector512<ulong> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512Double(value, mode);
+
         /// <summary>
         /// __m512i _mm512_cvtps_epi64 (__m512 a)
         ///   VCVTPS2QQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
@@ -344,6 +365,17 @@ internal X64() { }
         ///   VCVTPD2QQ zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
         /// </summary>
         public static Vector512<long> ConvertToVector512Int64(Vector512<double> value) => ConvertToVector512Int64(value);
+        /// <summary>
+        /// __m512i _mm512_cvt_roundps_epi64 (__m512 a, int r)
+        ///   VCVTPS2QQ zmm1, ymm2 {er}
+        /// </summary>
+        public static Vector512<long> ConvertToVector512Int64(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512Int64(value, mode);
+        /// <summary>
+        /// __m512i _mm512_cvt_roundpd_epi64 (__m512d a, int r)
+        ///   VCVTPD2QQ zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<long> ConvertToVector512Int64(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512Int64(value, mode);
+
         /// <summary>
         /// __m512i _mm512_cvttps_epi64 (__m512 a)
         ///   VCVTTPS2QQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
@@ -365,6 +397,16 @@ internal X64() { }
         /// </summary>
         public static Vector512<ulong> ConvertToVector512UInt64(Vector512<double> value) => ConvertToVector512UInt64(value);
         /// <summary>
+        /// __m512i _mm512_cvt_roundps_epu64 (__m512 a, int r)
+        ///   VCVTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
+        /// </summary>
+        public static Vector512<ulong> ConvertToVector512UInt64(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512UInt64(value, mode);
+        /// <summary>
+        /// __m512i _mm512_cvt_roundpd_epu64 (__m512d a, int r)
+        ///   VCVTPD2UQQ zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
+        /// </summary>
+        public static Vector512<ulong> ConvertToVector512UInt64(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512UInt64(value, mode);
+        /// <summary>
         /// __m512i _mm512_cvttps_epu64 (__m512 a)
         ///   VCVTTPS2UQQ zmm1 {k1}{z}, ymm2/m256/m32bcst{er}
         /// </summary>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs
index 5e6bc6e2023e..249d3af06fbb 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.PlatformNotSupported.cs
@@ -1261,19 +1261,54 @@ internal X64() { }
 
             public static new bool IsSupported { [Intrinsic] get { return false; } }
 
+            /// <summary>
+            /// __m128 _mm_cvt_roundi64_ss (__m128 a, __int64 b, int rounding)
+            ///   VCVTSI2SS xmm1, xmm2, r64 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, ulong value) { throw new PlatformNotSupportedException(); }
             /// <summary>
             /// __m128 _mm_cvtsi64_ss (__m128 a, __int64 b)
             ///   VCVTUSI2SS xmm1, xmm2, r/m64
             /// This intrinsic is only available on 64-bit processes
             /// </summary>
-            public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, ulong value) { throw new PlatformNotSupportedException(); }
+            public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, long value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+            /// <summary>
+            /// __m128 _mm_cvt_roundu64_ss (__m128 a, unsigned __int64 b, int rounding)
+            ///   VCVTUSI2SS xmm1, xmm2, r64 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, ulong value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
             /// <summary>
             /// __m128d _mm_cvtsi64_sd (__m128d a, __int64 b)
             ///   VCVTUSI2SD xmm1, xmm2, r/m64
             /// This intrinsic is only available on 64-bit processes
             /// </summary>
             public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, ulong value) { throw new PlatformNotSupportedException(); }
-
+            /// <summary>
+            /// __m128d _mm_cvt_roundsi64_sd (__m128d a, __int64 b, int rounding)
+            ///   VCVTSI2SD xmm1, xmm2, r64 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, long value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+            /// <summary>
+            /// __m128d _mm_cvt_roundu64_sd (__m128d a, unsigned __int64 b, int rounding)
+            ///   VCVTUSI2SD xmm1, xmm2, r64 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, ulong value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+            /// <summary>
+            /// __int64 _mm_cvt_roundss_i64 (__m128 a, int rounding)
+            ///   VCVTSS2SI r64, xmm1 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static long ConvertToInt64(Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+            /// <summary>
+            /// __int64 _mm_cvt_roundsd_i64 (__m128d a, int rounding)
+            ///   VCVTSD2SI r64, xmm1 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static long ConvertToInt64(Vector128<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
             /// <summary>
             /// unsigned __int64 _mm_cvtss_u64 (__m128 a)
             ///   VCVTSS2USI r64, xmm1/m32{er}
@@ -1281,11 +1316,23 @@ internal X64() { }
             /// </summary>
             public static ulong ConvertToUInt64(Vector128<float> value) { throw new PlatformNotSupportedException(); }
             /// <summary>
+            /// unsigned __int64 _mm_cvt_roundss_u64 (__m128 a, int rounding)
+            ///   VCVTSS2USI r64, xmm1 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static ulong ConvertToUInt64(Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+            /// <summary>
             /// unsigned __int64 _mm_cvtsd_u64 (__m128d a)
             ///   VCVTSD2USI r64, xmm1/m64{er}
             /// This intrinsic is only available on 64-bit processes
             /// </summary>
             public static ulong ConvertToUInt64(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+            /// <summary>
+            /// unsigned __int64 _mm_cvt_roundsd_u64 (__m128d a, int rounding)
+            ///   VCVTSD2USI r64, xmm1 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static ulong ConvertToUInt64(Vector128<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
 
             /// <summary>
             /// unsigned __int64 _mm_cvttss_u64 (__m128 a)
@@ -1338,8 +1385,8 @@ internal X64() { }
         /// </summary>
         public static Vector512<double> Add(Vector512<double> left, Vector512<double> right) { throw new PlatformNotSupportedException(); }
         /// <summary>
-        /// __m512d _mm512_add_pd (__m512d a, __m512d b)
-        ///   VADDPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst{er}
+        /// __m512d _mm512_add_round_pd (__m512d a, __m512d b, int rounding)
+        ///   VADDPD zmm1, zmm2, zmm3 {er}
         /// </summary>
         public static Vector512<double> Add(Vector512<double> left, Vector512<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
@@ -1347,7 +1394,21 @@ internal X64() { }
         ///   VADDPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst{er}
         /// </summary>
         public static Vector512<float> Add(Vector512<float> left, Vector512<float> right) { throw new PlatformNotSupportedException(); }
-
+        /// <summary>
+        /// __m512 _mm512_add_round_ps (__m512 a, __m512 b, int rounding)
+        ///   VADDPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> Add(Vector512<float> left, Vector512<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_add_round_sd (__m128d a, __m128d b, int rounding)
+        ///   VADDSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> AddScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_add_round_ss (__m128 a, __m128 b, int rounding)
+        ///   VADDSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> AddScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m512i _mm512_alignr_epi32 (__m512i a, __m512i b, const int count)
         ///   VALIGND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst, imm8
@@ -1832,22 +1893,56 @@ internal X64() { }
         /// </summary>
         public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, uint value) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m128 _mm_cvt_roundi32_ss (__m128 a, int b, int rounding)
+        /// VCVTUSI2SS xmm1, xmm2, r32 {er}
+        /// </summary>
+        public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, uint value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertScalarToVector128Single(upper, value, mode);
+        /// <summary>
+        /// __m128 _mm_cvt_roundi32_ss (__m128 a, int b, int rounding)
+        /// VCVTSI2SS xmm1, xmm2, r32 {er}
+        /// </summary>
+        public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, int value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_cvt_roundsd_ss (__m128 a, __m128d b, int rounding)
+        /// VCVTSD2SS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, Vector128<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode){ throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m128d _mm_cvtsi32_sd (__m128d a, int b)
         ///   VCVTUSI2SD xmm1, xmm2, r/m32
         /// </summary>
         public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, uint value) { throw new PlatformNotSupportedException(); }
-
+        /// <summary>
+        /// int _mm_cvt_roundss_i32 (__m128 a, int rounding)
+        ///   VCVTSS2SIK r32, xmm1 {er}
+        /// </summary>
+        public static int ConvertToInt32(Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// int _mm_cvt_roundsd_i32 (__m128d a, int rounding)
+        ///   VCVTSD2SI r32, xmm1 {er}
+        /// </summary>
+        public static int ConvertToInt32(Vector128<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// unsigned int _mm_cvtss_u32 (__m128 a)
         ///   VCVTSS2USI r32, xmm1/m32{er}
         /// </summary>
         public static uint ConvertToUInt32(Vector128<float> value) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// unsigned int _mm_cvt_roundss_u32 (__m128 a, int rounding)
+        ///   VCVTSS2USI r32, xmm1 {er}
+        /// </summary>
+        public static uint ConvertToUInt32(Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// unsigned int _mm_cvtsd_u32 (__m128d a)
         ///   VCVTSD2USI r32, xmm1/m64{er}
         /// </summary>
         public static uint ConvertToUInt32(Vector128<double> value) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// unsigned int _mm_cvt_roundsd_u32 (__m128d a, int rounding)
+        ///   VCVTSD2USI r32, xmm1 {er}
+        /// </summary>
+        public static uint ConvertToUInt32(Vector128<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// unsigned int _mm_cvttss_u32 (__m128 a)
         ///   VCVTTSS2USI r32, xmm1/m32{er}
         /// </summary>
@@ -1974,6 +2069,11 @@ internal X64() { }
         /// </summary>
         public static Vector256<int> ConvertToVector256Int32(Vector512<double> value) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m256i _mm512_cvt_roundpd_epi32 (__m512d a, int rounding)
+        ///   VCVTPD2DQ ymm1, zmm2 {er}
+        /// </summary>
+        public static Vector256<int> ConvertToVector256Int32(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m256i _mm512_cvtepi64_epi32 (__m512i a)
         ///   VPMOVQD ymm1/m256 {k1}{z}, zmm2
         /// </summary>
@@ -2000,7 +2100,11 @@ internal X64() { }
         ///   VCVTPD2PS ymm1 {k1}{z}, zmm2/m512/m64bcst{er}
         /// </summary>
         public static Vector256<float> ConvertToVector256Single(Vector512<double> value) { throw new PlatformNotSupportedException(); }
-
+        /// <summary>
+        /// __m256 _mm512_cvt_roundpd_ps (__m512d a, int rounding)
+        ///   VCVTPD2PS ymm1, zmm2 {er}
+        /// </summary>
+        public static Vector256<float> ConvertToVector256Single(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m256i _mm512_cvtepi32_epi16 (__m512i a)
         ///   VPMOVDW ymm1/m256 {k1}{z}, zmm2
@@ -2023,6 +2127,11 @@ internal X64() { }
         /// </summary>
         public static Vector256<uint> ConvertToVector256UInt32(Vector512<double> value) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        ///__m256i _mm512_cvt_roundpd_epu32 (__m512d a, int rounding)
+        ///   VCVTPD2UDQ ymm1, zmm2 {er}
+        /// </summary>
+        public static Vector256<uint> ConvertToVector256UInt32(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m256i _mm512_cvtepi64_epi32 (__m512i a)
         ///   VPMOVQD ymm1/m256 {k1}{z}, zmm2
         /// </summary>
@@ -2064,6 +2173,16 @@ internal X64() { }
         /// </summary>
         public static Vector512<int> ConvertToVector512Int32(Vector128<sbyte> value) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m512 _mm512_cvt_roundepi32_ps (__m512i a, int rounding)
+        ///   VCVTDQ2PS zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<float> ConvertToVector512Single(Vector512<int> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512 _mm512_cvt_roundepi32_ps (__m512i a, int rounding)
+        ///   VCVTUDQ2PS zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<float> ConvertToVector512Single(Vector512<uint> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m512i _mm512_cvtepu8_epi32 (__m128i a)
         ///   VPMOVZXBD zmm1 {k1}{z}, xmm2/m128
         /// </summary>
@@ -2084,6 +2203,11 @@ internal X64() { }
         /// </summary>
         public static Vector512<int> ConvertToVector512Int32(Vector512<float> value) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m512i _mm512_cvt_roundps_epi32 (__m512 a, int rounding)
+        ///   VCVTPS2DQ zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<int> ConvertToVector512Int32(Vector512<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m512i _mm512_cvttps_epi32 (__m512 a)
         ///   VCVTTPS2DQ zmm1 {k1}{z}, zmm2/m512/m32bcst{sae}
         /// </summary>
@@ -2154,6 +2278,11 @@ internal X64() { }
         /// </summary>
         public static Vector512<uint> ConvertToVector512UInt32(Vector512<float> value) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m512i _mm512_cvt_roundps_epu32 (__m512 a, int rounding)
+        ///   VCVTPS2UDQ zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<uint> ConvertToVector512UInt32(Vector512<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m512i _mm512_cvttps_epu32 (__m512 a)
         ///   VCVTTPS2UDQ zmm1 {k1}{z}, zmm2/m512/m32bcst{er}
         /// </summary>
@@ -2199,7 +2328,26 @@ internal X64() { }
         ///   VDIVPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst{er}
         /// </summary>
         public static Vector512<double> Divide(Vector512<double> left, Vector512<double> right) { throw new PlatformNotSupportedException(); }
-
+        /// <summary>
+        /// __m512 _mm512_div_round_ps (__m512 a, __m512 b, int rounding)
+        ///   VDIVPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> Divide(Vector512<float> left, Vector512<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512d _mm512_div_round_pd (__m512d a, __m512d b, int rounding)
+        ///   VDIVPD zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> Divide(Vector512<double> left, Vector512<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_div_round_sd (__m128d a, __m128d b, int rounding)
+        ///   VDIVSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_div_round_ss (__m128 a, __m128 b, int rounding)
+        ///   VDIVSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> DivideScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m512 _mm512_moveldup_ps (__m512 a)
         ///   VMOVSLDUP zmm1 {k1}{z}, zmm2/m512
@@ -2345,11 +2493,31 @@ internal X64() { }
         ///   VFMADDPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst
         /// </summary>
         public static Vector512<float> FusedMultiplyAdd(Vector512<float> a, Vector512<float> b, Vector512<float> c) { throw new PlatformNotSupportedException(); }
+                /// <summary>
+        /// __m512 _mm512_fmadd_round_ps (__m512 a, __m512 b, __m512 c, int r)
+        ///   VFMADDPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> FusedMultiplyAdd(Vector512<float> a, Vector512<float> b, Vector512<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m512d _mm512_fmadd_pd (__m512d a, __m512d b, __m512d c)
         ///   VFMADDPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
         /// </summary>
         public static Vector512<double> FusedMultiplyAdd(Vector512<double> a, Vector512<double> b, Vector512<double> c) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512d _mm512_fmadd_round_pd (__m512d a, __m512d b, __m512d c, int r)
+        ///   VFMADDPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> FusedMultiplyAdd(Vector512<double> a, Vector512<double> b, Vector512<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_fmadd_round_ss (__m128 a, __m128 b, __m128 c, int r)
+        ///   VFMADDSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> FusedMultiplyAddScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_fmadd_round_sd (__m128d a, __m128d b, __m128d c, int r)
+        ///   VFMADDSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> FusedMultiplyAddScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
 
         /// <summary>
         /// __m512 _mm512_fmaddsub_ps (__m512 a, __m512 b, __m512 c)
@@ -2357,10 +2525,20 @@ internal X64() { }
         /// </summary>
         public static Vector512<float> FusedMultiplyAddSubtract(Vector512<float> a, Vector512<float> b, Vector512<float> c) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m512 _mm512_fmaddsub_ps (__m512 a, __m512 b, __m512 c, int c)
+        ///   VFMADDSUBPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> FusedMultiplyAddSubtract(Vector512<float> a, Vector512<float> b, Vector512<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m512d _mm512_fmaddsub_pd (__m512d a, __m512d b, __m512d c)
         ///   VFMADDSUBPD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst
         /// </summary>
         public static Vector512<double> FusedMultiplyAddSubtract(Vector512<double> a, Vector512<double> b, Vector512<double> c) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512d _mm512_fmaddsub_pd (__m512d a, __m512d b, __m512d c, int c)
+        ///   VFMADDSUBPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> FusedMultiplyAddSubtract(Vector512<double> a, Vector512<double> b, Vector512<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
 
         /// <summary>
         /// __m512 _mm512_fmsub_ps (__m512 a, __m512 b, __m512 c)
@@ -2368,21 +2546,50 @@ internal X64() { }
         /// </summary>
         public static Vector512<float> FusedMultiplySubtract(Vector512<float> a, Vector512<float> b, Vector512<float> c) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m512 _mm512_fmsub_round_ps (__m512 a, __m512 b, __m512 c, int r)
+        ///   VFMSUBPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> FusedMultiplySubtract(Vector512<float> a, Vector512<float> b, Vector512<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m512d _mm512_fmsub_pd (__m512d a, __m512d b, __m512d c)
         ///   VFMSUBPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
         /// </summary>
         public static Vector512<double> FusedMultiplySubtract(Vector512<double> a, Vector512<double> b, Vector512<double> c) { throw new PlatformNotSupportedException(); }
-
+        /// <summary>
+        /// __m512d _mm512_fmsub_round_pd (__m512d a, __m512d b, __m512d c, int r)
+        ///   VFMSUBPD zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> FusedMultiplySubtract(Vector512<double> a, Vector512<double> b, Vector512<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_fmsub_round_ss (__m128 a, __m128 b, __m128 c, int r)
+        ///   VFMSUBSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> FusedMultiplySubtractScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_fmsub_round_sd (__m128d a, __m128d b, __m128d c, int r)
+        ///   VFMSUBSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> FusedMultiplySubtractScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m512 _mm512_fmsubadd_ps (__m512 a, __m512 b, __m512 c)
         ///   VFMSUBADDPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst
         /// </summary>
         public static Vector512<float> FusedMultiplySubtractAdd(Vector512<float> a, Vector512<float> b, Vector512<float> c) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m512 _mm512_fmsubadd_round_ps (__m512 a, __m512 b, __m512 c)
+        ///   VFMSUBADDPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> FusedMultiplySubtractAdd(Vector512<float> a, Vector512<float> b, Vector512<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m512d _mm512_fmsubadd_pd (__m512d a, __m512d b, __m512d c)
         ///   VFMSUBADDPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
         /// </summary>
         public static Vector512<double> FusedMultiplySubtractAdd(Vector512<double> a, Vector512<double> b, Vector512<double> c) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512d _mm512_fmsubadd_round_ps (__m512d a, __m512d b, __m512d c)
+        ///   VFMSUBADDPD zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> FusedMultiplySubtractAdd(Vector512<double> a, Vector512<double> b, Vector512<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
 
         /// <summary>
         /// __m512 _mm512_fnmadd_ps (__m512 a, __m512 b, __m512 c)
@@ -2390,21 +2597,60 @@ internal X64() { }
         /// </summary>
         public static Vector512<float> FusedMultiplyAddNegated(Vector512<float> a, Vector512<float> b, Vector512<float> c) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m512 _mm512_fnmadd_round_ps (__m512 a, __m512 b, __m512 c, int r)
+        ///   VFNMADDPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> FusedMultiplyAddNegated(Vector512<float> a, Vector512<float> b, Vector512<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m512d _mm512_fnmadd_pd (__m512d a, __m512d b, __m512d c)
         ///   VFNMADDPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
         /// </summary>
         public static Vector512<double> FusedMultiplyAddNegated(Vector512<double> a, Vector512<double> b, Vector512<double> c) { throw new PlatformNotSupportedException(); }
-
+        /// <summary>
+        /// __m512d _mm512_fnmadd_round_pdd (__m512d a, __m512d b, __m512d c, int r)
+        ///   VFNMADDPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> FusedMultiplyAddNegated(Vector512<double> a, Vector512<double> b, Vector512<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_fnmadd_round_ss (__m128 a, __m128 b, __m128 c, int r)
+        ///   VFNMADDSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> FusedMultiplyAddNegatedScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_fnmadd_round_sd (__m128d a, __m128d b, __m128d c, int r)
+        ///   VFNMADDSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> FusedMultiplyAddNegatedScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m512 _mm512_fnmsub_ps (__m512 a, __m512 b, __m512 c)
         ///   VFNMSUBPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst
         /// </summary>
         public static Vector512<float> FusedMultiplySubtractNegated(Vector512<float> a, Vector512<float> b, Vector512<float> c) { throw new PlatformNotSupportedException(); }
         /// <summary>
+        /// __m512 _mm512_fnmsub_round_ps (__m512 a, __m512 b, __m512 c, int r)
+        ///   VFNMSUBPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> FusedMultiplySubtractNegated(Vector512<float> a, Vector512<float> b, Vector512<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
         /// __m512d _mm512_fnmsub_pd (__m512d a, __m512d b, __m512d c)
         ///   VFNMSUBPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
         /// </summary>
         public static Vector512<double> FusedMultiplySubtractNegated(Vector512<double> a, Vector512<double> b, Vector512<double> c) { throw new PlatformNotSupportedException(); }
+/// <summary>
+        /// __m512d _mm512_fnmsub_round_pd (__m512d a, __m512d b, __m512d c, int r)
+        ///   VFNMSUBPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> FusedMultiplySubtractNegated(Vector512<double> a, Vector512<double> b, Vector512<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_fnmsub_round_ss (__m128 a, __m128 b, __m128 c, int r)
+        ///   VFNMSUBSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> FusedMultiplySubtractNegatedScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_fnmsub_round_sd (__m128d a, __m128d b, __m128d c, int r)
+        ///   VFNMSUBSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> FusedMultiplySubtractNegatedScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
 
         /// <summary>
         /// __m512 _mm512_getexp_ps (__m512 a)
@@ -2801,7 +3047,26 @@ internal X64() { }
         ///   VMULPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst{er}
         /// </summary>
         public static Vector512<double> Multiply(Vector512<double> left, Vector512<double> right) { throw new PlatformNotSupportedException(); }
-
+        /// <summary>
+        /// __m512 _mm512_mul_round_ps (__m512 a, __m512 b, int rounding)
+        ///   VMULPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> Multiply(Vector512<float> left, Vector512<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512d _mm512_mul_round_pd (__m512d a, __m512d b, int rounding)
+        ///   VMULPD zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> Multiply(Vector512<double> left, Vector512<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_mul_round_ss (__m128 a, __m128 b, int rounding)
+        ///   VMULSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> MultiplyScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_mul_round_sd (__m128d a, __m128d b, int rounding)
+        ///   VMULSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> MultiplyScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m512i _mm512_mullo_epi32 (__m512i a, __m512i b)
         ///   VPMULLD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst
@@ -3160,7 +3425,16 @@ internal X64() { }
         ///   VSCALEFPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst{er}
         /// </summary>
         public static Vector512<double> Scale(Vector512<double> left, Vector512<double> right) { throw new PlatformNotSupportedException(); }
-
+        /// <summary>
+        /// __m512 _mm512_scalef_round_ps (__m512 a, __m512 b, int rounding)
+        ///   VSCALEFPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> Scale(Vector512<float> left, Vector512<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512d _mm512_scalef_round_pd (__m512d a, __m512d b, int rounding)
+        ///   VSCALEFPD zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> Scale(Vector512<double> left, Vector512<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m128 _mm_scalef_ss (__m128 a, __m128 b)
         ///   VSCALEFSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
@@ -3171,7 +3445,16 @@ internal X64() { }
         ///   VSCALEFSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
         /// </summary>
         public static Vector128<double> ScaleScalar(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        /// <summary>
+        /// __m128 _mm_scalef_round_ss (__m128 a, __m128 b)
+        ///   VSCALEFSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> ScaleScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_scalef_round_sd (__m128d a, __m128d b)
+        ///   VSCALEFSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> ScaleScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m512i _mm512_sll_epi32 (__m512i a, __m128i count)
         ///   VPSLLD zmm1 {k1}{z}, zmm2, xmm3/m128
@@ -3394,7 +3677,26 @@ internal X64() { }
         ///   VSQRTPD zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
         /// </summary>
         public static Vector512<double> Sqrt(Vector512<double> value) { throw new PlatformNotSupportedException(); }
-
+        /// <summary>
+        /// __m512 _mm512_sqrt_round_ps (__m512 a, int rounding)
+        ///   VSQRTPS zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<float> Sqrt(Vector512<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512d _mm512_sqrt_round_pd (__m512d a, int rounding)
+        ///   VSQRTPD zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<double> Sqrt(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_sqrt_round_ss (__m128 a, __m128 b, int rounding)
+        ///   VSQRTSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> SqrtScalar(Vector128<float> upper, Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_sqrt_round_sd (__m128d a, __m128d b, int rounding)
+        ///   VSQRTSD xmm1, xmm2 xmm3 {er}
+        /// </summary>
+        public static Vector128<double> SqrtScalar(Vector128<double> upper, Vector128<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a)
         ///   VMOVDQU32 m512 {k1}{z}, zmm1
@@ -3578,6 +3880,26 @@ internal X64() { }
         ///   VSUBPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst{er}
         /// </summary>
         public static Vector512<double> Subtract(Vector512<double> left, Vector512<double> right) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512 _mm512_sub_round_ps (__m512 a, __m512 b, int rounding)
+        ///   VSUBPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> Subtract(Vector512<float> left, Vector512<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m512d _mm512_sub_round_pd (__m512d a, __m512d b, int rounding)
+        ///   VSUBPD zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> Subtract(Vector512<double> left, Vector512<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_sub_round_ss (__m128 a, __m128 b, int rounding)
+        ///   VSUBSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> SubtractScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_sub_round_sd (__m128d a, __m128d b, int rounding)
+        ///   VSUBSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> SubtractScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); }
 
         /// <summary>
         /// __m512i _mm512_ternarylogic_si512 (__m512i a, __m512i b, __m512i c, int imm)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.cs
index fa0ebeaa8162..3227da13b99c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512F.cs
@@ -1262,6 +1262,12 @@ internal X64() { }
 
             public static new bool IsSupported { get => IsSupported; }
 
+            /// <summary>
+            /// __m128 _mm_cvt_roundi64_ss (__m128 a, __int64 b, int rounding)
+            ///   VCVTSI2SS xmm1, xmm2, r64 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, long value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertScalarToVector128Single(upper, value, mode);
             /// <summary>
             /// __m128 _mm_cvtsi64_ss (__m128 a, __int64 b)
             ///   VCVTUSI2SS xmm1, xmm2, r/m64
@@ -1269,12 +1275,42 @@ internal X64() { }
             /// </summary>
             public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, ulong value) => ConvertScalarToVector128Single(upper, value);
             /// <summary>
+            /// __m128 _mm_cvt_roundu64_ss (__m128 a, unsigned __int64 b, int rounding)
+            ///   VCVTUSI2SS xmm1, xmm2, r64 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, ulong value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertScalarToVector128Single(upper, value, mode);
+            /// <summary>
+            /// __m128d _mm_cvt_roundsi64_sd (__m128d a, __int64 b, int rounding)
+            ///   VCVTSI2SD xmm1, xmm2, r64 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, long value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertScalarToVector128Double(upper, value, mode);
+            /// <summary>
             /// __m128d _mm_cvtsi64_sd (__m128d a, __int64 b)
             ///   VCVTUSI2SD xmm1, xmm2, r/m64
             /// This intrinsic is only available on 64-bit processes
             /// </summary>
             public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, ulong value) => ConvertScalarToVector128Double(upper, value);
+            /// <summary>
+            /// __m128d _mm_cvt_roundu64_sd (__m128d a, unsigned __int64 b, int rounding)
+            ///   VCVTUSI2SD xmm1, xmm2, r64 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, ulong value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertScalarToVector128Double(upper, value, mode);
 
+            /// <summary>
+            /// __int64 _mm_cvt_roundss_i64 (__m128 a, int rounding)
+            ///   VCVTSS2SI r64, xmm1 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static long ConvertToInt64(Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToInt64(value, mode);
+            /// <summary>
+            /// __int64 _mm_cvt_roundsd_i64 (__m128d a, int rounding)
+            ///   VCVTSD2SI r64, xmm1 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static long ConvertToInt64(Vector128<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToInt64(value, mode);
             /// <summary>
             /// unsigned __int64 _mm_cvtss_u64 (__m128 a)
             ///   VCVTSS2USI r64, xmm1/m32{er}
@@ -1282,11 +1318,23 @@ internal X64() { }
             /// </summary>
             public static ulong ConvertToUInt64(Vector128<float> value) => ConvertToUInt64(value);
             /// <summary>
+            /// unsigned __int64 _mm_cvt_roundss_u64 (__m128 a, int rounding)
+            ///   VCVTSS2USI r64, xmm1 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static ulong ConvertToUInt64(Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToUInt64(value, mode);
+            /// <summary>
             /// unsigned __int64 _mm_cvtsd_u64 (__m128d a)
             ///   VCVTSD2USI r64, xmm1/m64{er}
             /// This intrinsic is only available on 64-bit processes
             /// </summary>
             public static ulong ConvertToUInt64(Vector128<double> value) => ConvertToUInt64(value);
+            /// <summary>
+            /// unsigned __int64 _mm_cvt_roundsd_u64 (__m128d a, int rounding)
+            ///   VCVTSD2USI r64, xmm1 {er}
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static ulong ConvertToUInt64(Vector128<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToUInt64(value, mode);
 
             /// <summary>
             /// unsigned __int64 _mm_cvttss_u64 (__m128 a)
@@ -1339,8 +1387,8 @@ internal X64() { }
         /// </summary>
         public static Vector512<double> Add(Vector512<double> left, Vector512<double> right) => Add(left, right);
         /// <summary>
-        /// __m512d _mm512_add_pd (__m512d a, __m512d b)
-        ///   VADDPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst{er}
+        /// __m512d _mm512_add_round_pd (__m512d a, __m512d b, int rounding)
+        ///   VADDPD zmm1, zmm2, zmm3 {er}
         /// </summary>
         public static Vector512<double> Add(Vector512<double> left, Vector512<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Add(left, right, mode);
         /// <summary>
@@ -1348,7 +1396,21 @@ internal X64() { }
         ///   VADDPS zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst{er}
         /// </summary>
         public static Vector512<float> Add(Vector512<float> left, Vector512<float> right) => Add(left, right);
-
+        /// <summary>
+        /// __m512 _mm512_add_round_ps (__m512 a, __m512 b, int rounding)
+        ///   VADDPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> Add(Vector512<float> left, Vector512<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Add(left, right, mode);
+        /// <summary>
+        /// __m128 _mm_add_round_ss (__m128 a, __m128 b, int rounding)
+        ///   VADDSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> AddScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => AddScalar(left, right, mode);
+        /// <summary>
+        /// __m128d _mm_add_round_sd (__m128d a, __m128d b, int rounding)
+        ///   VADDSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> AddScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => AddScalar(left, right, mode);
         /// <summary>
         /// __m512i _mm512_alignr_epi32 (__m512i a, __m512i b, const int count)
         ///   VALIGND zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst, imm8
@@ -1833,22 +1895,57 @@ internal X64() { }
         /// </summary>
         public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, uint value) => ConvertScalarToVector128Single(upper, value);
         /// <summary>
+        /// __m128 _mm_cvt_roundi32_ss (__m128 a, int b, int rounding)
+        /// VCVTUSI2SS xmm1, xmm2, r32 {er}
+        /// </summary>
+        public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, uint value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertScalarToVector128Single(upper, value, mode);
+        /// <summary>
+        /// __m128 _mm_cvt_roundi32_ss (__m128 a, int b, int rounding)
+        /// VCVTSI2SS xmm1, xmm2, r32 {er}
+        /// </summary>
+        public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, int value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertScalarToVector128Single(upper, value, mode);
+        /// <summary>
+        /// __m128 _mm_cvt_roundsd_ss (__m128 a, __m128d b, int rounding)
+        /// VCVTSD2SS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, Vector128<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertScalarToVector128Single(upper, value, mode);
+        /// <summary>
         /// __m128d _mm_cvtsi32_sd (__m128d a, int b)
         ///   VCVTUSI2SD xmm1, xmm2, r/m32
         /// </summary>
         public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, uint value) => ConvertScalarToVector128Double(upper, value);
 
+        /// <summary>
+        /// int _mm_cvt_roundss_i32 (__m128 a, int rounding)
+        ///   VCVTSS2SIK r32, xmm1 {er}
+        /// </summary>
+        public static int ConvertToInt32(Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToInt32(value, mode);
+        /// <summary>
+        /// int _mm_cvt_roundsd_i32 (__m128d a, int rounding)
+        ///   VCVTSD2SI r32, xmm1 {er}
+        /// </summary>
+        public static int ConvertToInt32(Vector128<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToInt32(value, mode);
         /// <summary>
         /// unsigned int _mm_cvtss_u32 (__m128 a)
         ///   VCVTSS2USI r32, xmm1/m32{er}
         /// </summary>
         public static uint ConvertToUInt32(Vector128<float> value) => ConvertToUInt32(value);
         /// <summary>
+        /// unsigned int _mm_cvt_roundss_u32 (__m128 a, int rounding)
+        ///   VCVTSS2USI r32, xmm1 {er}
+        /// </summary>
+        public static uint ConvertToUInt32(Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToUInt32(value, mode);
+        /// <summary>
         /// unsigned int _mm_cvtsd_u32 (__m128d a)
         ///   VCVTSD2USI r32, xmm1/m64{er}
         /// </summary>
         public static uint ConvertToUInt32(Vector128<double> value) => ConvertToUInt32(value);
         /// <summary>
+        /// unsigned int _mm_cvt_roundsd_u32 (__m128d a, int rounding)
+        ///   VCVTSD2USI r32, xmm1 {er}
+        /// </summary>
+        public static uint ConvertToUInt32(Vector128<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToUInt32(value, mode);
+        /// <summary>
         /// unsigned int _mm_cvttss_u32 (__m128 a)
         ///   VCVTTSS2USI r32, xmm1/m32{er}
         /// </summary>
@@ -1975,6 +2072,11 @@ internal X64() { }
         /// </summary>
         public static Vector256<int> ConvertToVector256Int32(Vector512<double> value) => ConvertToVector256Int32(value);
         /// <summary>
+        /// __m256i _mm512_cvt_roundpd_epi32 (__m512d a, int rounding)
+        ///   VCVTPD2DQ ymm1, zmm2 {er}
+        /// </summary>
+        public static Vector256<int> ConvertToVector256Int32(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Int32(value, mode);
+        /// <summary>
         /// __m256i _mm512_cvtepi64_epi32 (__m512i a)
         ///   VPMOVQD ymm1/m256 {k1}{z}, zmm2
         /// </summary>
@@ -2001,6 +2103,11 @@ internal X64() { }
         ///   VCVTPD2PS ymm1 {k1}{z}, zmm2/m512/m64bcst{er}
         /// </summary>
         public static Vector256<float> ConvertToVector256Single(Vector512<double> value) => ConvertToVector256Single(value);
+        /// <summary>
+        /// __m256 _mm512_cvt_roundpd_ps (__m512d a, int rounding)
+        ///   VCVTPD2PS ymm1, zmm2 {er}
+        /// </summary>
+        public static Vector256<float> ConvertToVector256Single(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Single(value, mode);
 
         /// <summary>
         /// __m256i _mm512_cvtepi32_epi16 (__m512i a)
@@ -2024,6 +2131,11 @@ internal X64() { }
         /// </summary>
         public static Vector256<uint> ConvertToVector256UInt32(Vector512<double> value) => ConvertToVector256UInt32(value);
         /// <summary>
+        ///__m256i _mm512_cvt_roundpd_epu32 (__m512d a, int rounding)
+        ///   VCVTPD2UDQ ymm1, zmm2 {er}
+        /// </summary>
+        public static Vector256<uint> ConvertToVector256UInt32(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256UInt32(value, mode);
+        /// <summary>
         /// __m256i _mm512_cvtepi64_epi32 (__m512i a)
         ///   VPMOVQD ymm1/m256 {k1}{z}, zmm2
         /// </summary>
@@ -2085,6 +2197,11 @@ internal X64() { }
         /// </summary>
         public static Vector512<int> ConvertToVector512Int32(Vector512<float> value) => ConvertToVector512Int32(value);
         /// <summary>
+        /// __m512i _mm512_cvt_roundps_epi32 (__m512 a, int rounding)
+        ///   VCVTPS2DQ zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<int> ConvertToVector512Int32(Vector512<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512Int32(value, mode);
+        /// <summary>
         /// __m512i _mm512_cvttps_epi32 (__m512 a)
         ///   VCVTTPS2DQ zmm1 {k1}{z}, zmm2/m512/m32bcst{sae}
         /// </summary>
@@ -2125,11 +2242,21 @@ internal X64() { }
         /// </summary>
         public static Vector512<float> ConvertToVector512Single(Vector512<int> value) => ConvertToVector512Single(value);
         /// <summary>
+        /// __m512 _mm512_cvt_roundepi32_ps (__m512i a, int rounding)
+        ///   VCVTDQ2PS zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<float> ConvertToVector512Single(Vector512<int> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512Single(value, mode);
+        /// <summary>
         /// __m512 _mm512_cvtepu32_ps (__m512i a)
         ///   VCVTUDQ2PS zmm1 {k1}{z}, zmm2/m512/m32bcst{er}
         /// </summary>
         public static Vector512<float> ConvertToVector512Single(Vector512<uint> value) => ConvertToVector512Single(value);
         /// <summary>
+        /// __m512 _mm512_cvt_roundepi32_ps (__m512i a, int rounding)
+        ///   VCVTUDQ2PS zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<float> ConvertToVector512Single(Vector512<uint> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512Single(value, mode);
+        /// <summary>
         /// __m512i _mm512_cvtepi8_epi32 (__m128i a)
         ///   VPMOVSXBD zmm1 {k1}{z}, xmm2/m128
         /// </summary>
@@ -2155,6 +2282,11 @@ internal X64() { }
         /// </summary>
         public static Vector512<uint> ConvertToVector512UInt32(Vector512<float> value) => ConvertToVector512UInt32(value);
         /// <summary>
+        /// __m512i _mm512_cvt_roundps_epu32 (__m512 a, int rounding)
+        ///   VCVTPS2UDQ zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<uint> ConvertToVector512UInt32(Vector512<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector512UInt32(value, mode);
+        /// <summary>
         /// __m512i _mm512_cvttps_epu32 (__m512 a)
         ///   VCVTTPS2UDQ zmm1 {k1}{z}, zmm2/m512/m32bcst{er}
         /// </summary>
@@ -2200,7 +2332,26 @@ internal X64() { }
         ///   VDIVPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst{er}
         /// </summary>
         public static Vector512<double> Divide(Vector512<double> left, Vector512<double> right) => Divide(left, right);
-
+        /// <summary>
+        /// __m512 _mm512_div_round_ps (__m512 a, __m512 b, int rounding)
+        ///   VDIVPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> Divide(Vector512<float> left, Vector512<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Divide(left, right, mode);
+        /// <summary>
+        /// __m512d _mm512_div_round_pd (__m512d a, __m512d b, int rounding)
+        ///   VDIVPD zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> Divide(Vector512<double> left, Vector512<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Divide(left, right, mode);
+        /// <summary>
+        /// __m128 _mm_div_round_ss (__m128 a, __m128 b, int rounding)
+        ///   VDIVSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> DivideScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => DivideScalar(left, right, mode);
+        /// <summary>
+        /// __m128d _mm_div_round_sd (__m128d a, __m128d b, int rounding)
+        ///   VDIVSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => DivideScalar(left, right, mode);
         /// <summary>
         /// __m512 _mm512_moveldup_ps (__m512 a)
         ///   VMOVSLDUP zmm1 {k1}{z}, zmm2/m512
@@ -2347,10 +2498,30 @@ internal X64() { }
         /// </summary>
         public static Vector512<float> FusedMultiplyAdd(Vector512<float> a, Vector512<float> b, Vector512<float> c) => FusedMultiplyAdd(a, b, c);
         /// <summary>
+        /// __m512 _mm512_fmadd_round_ps (__m512 a, __m512 b, __m512 c, int r)
+        ///   VFMADDPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> FusedMultiplyAdd(Vector512<float> a, Vector512<float> b, Vector512<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplyAdd(a, b, c, mode);
+        /// <summary>
         /// __m512d _mm512_fmadd_pd (__m512d a, __m512d b, __m512d c)
         ///   VFMADDPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
         /// </summary>
         public static Vector512<double> FusedMultiplyAdd(Vector512<double> a, Vector512<double> b, Vector512<double> c) => FusedMultiplyAdd(a, b, c);
+        /// <summary>
+        /// __m512d _mm512_fmadd_round_pd (__m512d a, __m512d b, __m512d c, int r)
+        ///   VFMADDPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> FusedMultiplyAdd(Vector512<double> a, Vector512<double> b, Vector512<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplyAdd(a, b, c, mode);
+        /// <summary>
+        /// __m128 _mm_fmadd_round_ss (__m128 a, __m128 b, __m128 c, int r)
+        ///   VFMADDSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> FusedMultiplyAddScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplyAddScalar(a, b, c, mode);
+        /// <summary>
+        /// __m128d _mm_fmadd_round_sd (__m128d a, __m128d b, __m128d c, int r)
+        ///   VFMADDSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> FusedMultiplyAddScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplyAddScalar(a, b, c, mode);
 
         /// <summary>
         /// __m512 _mm512_fmaddsub_ps (__m512 a, __m512 b, __m512 c)
@@ -2358,10 +2529,20 @@ internal X64() { }
         /// </summary>
         public static Vector512<float> FusedMultiplyAddSubtract(Vector512<float> a, Vector512<float> b, Vector512<float> c) => FusedMultiplyAddSubtract(a, b, c);
         /// <summary>
+        /// __m512 _mm512_fmaddsub_ps (__m512 a, __m512 b, __m512 c, int c)
+        ///   VFMADDSUBPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> FusedMultiplyAddSubtract(Vector512<float> a, Vector512<float> b, Vector512<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplyAddSubtract(a, b, c, mode);
+        /// <summary>
         /// __m512d _mm512_fmaddsub_pd (__m512d a, __m512d b, __m512d c)
         ///   VFMADDSUBPD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst
         /// </summary>
         public static Vector512<double> FusedMultiplyAddSubtract(Vector512<double> a, Vector512<double> b, Vector512<double> c) => FusedMultiplyAddSubtract(a, b, c);
+        /// <summary>
+        /// __m512d _mm512_fmaddsub_pd (__m512d a, __m512d b, __m512d c, int c)
+        ///   VFMADDSUBPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> FusedMultiplyAddSubtract(Vector512<double> a, Vector512<double> b, Vector512<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplyAddSubtract(a, b, c, mode);
 
         /// <summary>
         /// __m512 _mm512_fmsub_ps (__m512 a, __m512 b, __m512 c)
@@ -2369,10 +2550,30 @@ internal X64() { }
         /// </summary>
         public static Vector512<float> FusedMultiplySubtract(Vector512<float> a, Vector512<float> b, Vector512<float> c) => FusedMultiplySubtract(a, b, c);
         /// <summary>
+        /// __m512 _mm512_fmsub_round_ps (__m512 a, __m512 b, __m512 c, int r)
+        ///   VFMSUBPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> FusedMultiplySubtract(Vector512<float> a, Vector512<float> b, Vector512<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplySubtract(a, b, c, mode);
+        /// <summary>
         /// __m512d _mm512_fmsub_pd (__m512d a, __m512d b, __m512d c)
         ///   VFMSUBPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
         /// </summary>
         public static Vector512<double> FusedMultiplySubtract(Vector512<double> a, Vector512<double> b, Vector512<double> c) => FusedMultiplySubtract(a, b, c);
+        /// <summary>
+        /// __m512d _mm512_fmsub_round_pd (__m512d a, __m512d b, __m512d c, int r)
+        ///   VFMSUBPD zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> FusedMultiplySubtract(Vector512<double> a, Vector512<double> b, Vector512<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplySubtract(a, b, c, mode);
+        /// <summary>
+        /// __m128 _mm_fmsub_round_ss (__m128 a, __m128 b, __m128 c, int r)
+        ///   VFMSUBSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> FusedMultiplySubtractScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplySubtractScalar(a, b, c, mode);
+        /// <summary>
+        /// __m128d _mm_fmsub_round_sd (__m128d a, __m128d b, __m128d c, int r)
+        ///   VFMSUBSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> FusedMultiplySubtractScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplySubtractScalar(a, b, c, mode);
 
         /// <summary>
         /// __m512 _mm512_fmsubadd_ps (__m512 a, __m512 b, __m512 c)
@@ -2380,10 +2581,20 @@ internal X64() { }
         /// </summary>
         public static Vector512<float> FusedMultiplySubtractAdd(Vector512<float> a, Vector512<float> b, Vector512<float> c) => FusedMultiplySubtractAdd(a, b, c);
         /// <summary>
+        /// __m512 _mm512_fmsubadd_round_ps (__m512 a, __m512 b, __m512 c)
+        ///   VFMSUBADDPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> FusedMultiplySubtractAdd(Vector512<float> a, Vector512<float> b, Vector512<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplySubtractAdd(a, b, c, mode);
+        /// <summary>
         /// __m512d _mm512_fmsubadd_pd (__m512d a, __m512d b, __m512d c)
         ///   VFMSUBADDPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
         /// </summary>
         public static Vector512<double> FusedMultiplySubtractAdd(Vector512<double> a, Vector512<double> b, Vector512<double> c) => FusedMultiplySubtractAdd(a, b, c);
+        /// <summary>
+        /// __m512d _mm512_fmsubadd_round_ps (__m512d a, __m512d b, __m512d c)
+        ///   VFMSUBADDPD zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> FusedMultiplySubtractAdd(Vector512<double> a, Vector512<double> b, Vector512<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplySubtractAdd(a, b, c, mode);
 
         /// <summary>
         /// __m512 _mm512_fnmadd_ps (__m512 a, __m512 b, __m512 c)
@@ -2391,10 +2602,30 @@ internal X64() { }
         /// </summary>
         public static Vector512<float> FusedMultiplyAddNegated(Vector512<float> a, Vector512<float> b, Vector512<float> c) => FusedMultiplyAddNegated(a, b, c);
         /// <summary>
+        /// __m512 _mm512_fnmadd_round_ps (__m512 a, __m512 b, __m512 c, int r)
+        ///   VFNMADDPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> FusedMultiplyAddNegated(Vector512<float> a, Vector512<float> b, Vector512<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplyAddNegated(a, b, c, mode);
+        /// <summary>
         /// __m512d _mm512_fnmadd_pd (__m512d a, __m512d b, __m512d c)
         ///   VFNMADDPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
         /// </summary>
         public static Vector512<double> FusedMultiplyAddNegated(Vector512<double> a, Vector512<double> b, Vector512<double> c) => FusedMultiplyAddNegated(a, b, c);
+        /// <summary>
+        /// __m512d _mm512_fnmadd_round_pdd (__m512d a, __m512d b, __m512d c, int r)
+        ///   VFNMADDPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> FusedMultiplyAddNegated(Vector512<double> a, Vector512<double> b, Vector512<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplyAddNegated(a, b, c, mode);
+        /// <summary>
+        /// __m128 _mm_fnmadd_round_ss (__m128 a, __m128 b, __m128 c, int r)
+        ///   VFNMADDSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> FusedMultiplyAddNegatedScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplyAddNegatedScalar(a, b, c, mode);
+        /// <summary>
+        /// __m128d _mm_fnmadd_round_sd (__m128d a, __m128d b, __m128d c, int r)
+        ///   VFNMADDSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> FusedMultiplyAddNegatedScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplyAddNegatedScalar(a, b, c, mode);
 
         /// <summary>
         /// __m512 _mm512_fnmsub_ps (__m512 a, __m512 b, __m512 c)
@@ -2402,10 +2633,30 @@ internal X64() { }
         /// </summary>
         public static Vector512<float> FusedMultiplySubtractNegated(Vector512<float> a, Vector512<float> b, Vector512<float> c) => FusedMultiplySubtractNegated(a, b, c);
         /// <summary>
+        /// __m512 _mm512_fnmsub_round_ps (__m512 a, __m512 b, __m512 c, int r)
+        ///   VFNMSUBPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> FusedMultiplySubtractNegated(Vector512<float> a, Vector512<float> b, Vector512<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplySubtractNegated(a, b, c, mode);
+        /// <summary>
         /// __m512d _mm512_fnmsub_pd (__m512d a, __m512d b, __m512d c)
         ///   VFNMSUBPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst
         /// </summary>
         public static Vector512<double> FusedMultiplySubtractNegated(Vector512<double> a, Vector512<double> b, Vector512<double> c) => FusedMultiplySubtractNegated(a, b, c);
+        /// <summary>
+        /// __m512d _mm512_fnmsub_round_pd (__m512d a, __m512d b, __m512d c, int r)
+        ///   VFNMSUBPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> FusedMultiplySubtractNegated(Vector512<double> a, Vector512<double> b, Vector512<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplySubtractNegated(a, b, c, mode);
+        /// <summary>
+        /// __m128 _mm_fnmsub_round_ss (__m128 a, __m128 b, __m128 c, int r)
+        ///   VFNMSUBSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> FusedMultiplySubtractNegatedScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplySubtractNegatedScalar(a, b, c, mode);
+        /// <summary>
+        /// __m128d _mm_fnmsub_round_sd (__m128d a, __m128d b, __m128d c, int r)
+        ///   VFNMSUBSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> FusedMultiplySubtractNegatedScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => FusedMultiplySubtractNegatedScalar(a, b, c, mode);
 
         /// <summary>
         /// __m512 _mm512_getexp_ps (__m512 a)
@@ -2802,7 +3053,26 @@ internal X64() { }
         ///   VMULPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst{er}
         /// </summary>
         public static Vector512<double> Multiply(Vector512<double> left, Vector512<double> right) => Multiply(left, right);
-
+        /// <summary>
+        /// __m512 _mm512_mul_round_ps (__m512 a, __m512 b, int rounding)
+        ///   VMULPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> Multiply(Vector512<float> left, Vector512<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Multiply(left, right, mode);
+        /// <summary>
+        /// __m512d _mm512_mul_round_pd (__m512d a, __m512d b, int rounding)
+        ///   VMULPD zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> Multiply(Vector512<double> left, Vector512<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Multiply(left, right, mode);
+        /// <summary>
+        /// __m128 _mm_mul_round_ss (__m128 a, __m128 b, int rounding)
+        ///   VMULSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> MultiplyScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => MultiplyScalar(left, right, mode);
+        /// <summary>
+        /// __m128d _mm_mul_round_sd (__m128d a, __m128d b, int rounding)
+        ///   VMULSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> MultiplyScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => MultiplyScalar(left, right, mode);
         /// <summary>
         /// __m512i _mm512_mullo_epi32 (__m512i a, __m512i b)
         ///   VPMULLD zmm1 {k1}{z}, zmm2, zmm3/m512/m32bcst
@@ -3161,6 +3431,16 @@ internal X64() { }
         ///   VSCALEFPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst{er}
         /// </summary>
         public static Vector512<double> Scale(Vector512<double> left, Vector512<double> right) => Scale(left, right);
+        /// <summary>
+        /// __m512 _mm512_scalef_round_ps (__m512 a, __m512 b, int rounding)
+        ///   VSCALEFPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> Scale(Vector512<float> left, Vector512<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Scale(left, right, mode);
+        /// <summary>
+        /// __m512d _mm512_scalef_round_pd (__m512d a, __m512d b, int rounding)
+        ///   VSCALEFPD zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> Scale(Vector512<double> left, Vector512<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Scale(left, right, mode);
 
         /// <summary>
         /// __m128 _mm_scalef_ss (__m128 a, __m128 b)
@@ -3172,6 +3452,16 @@ internal X64() { }
         ///   VSCALEFSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
         /// </summary>
         public static Vector128<double> ScaleScalar(Vector128<double> left, Vector128<double> right) => ScaleScalar(left, right);
+        /// <summary>
+        /// __m128 _mm_scalef_round_ss (__m128 a, __m128 b)
+        ///   VSCALEFSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> ScaleScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ScaleScalar(left, right, mode);
+        /// <summary>
+        /// __m128d _mm_scalef_round_sd (__m128d a, __m128d b)
+        ///   VSCALEFSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> ScaleScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ScaleScalar(left, right, mode);
 
         /// <summary>
         /// __m512i _mm512_sll_epi32 (__m512i a, __m128i count)
@@ -3394,6 +3684,26 @@ internal X64() { }
         ///   VSQRTPD zmm1 {k1}{z}, zmm2/m512/m64bcst{er}
         /// </summary>
         public static Vector512<double> Sqrt(Vector512<double> value) => Sqrt(value);
+        /// <summary>
+        /// __m512 _mm512_sqrt_round_ps (__m512 a, int rounding)
+        ///   VSQRTPS zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<float> Sqrt(Vector512<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Sqrt(value, mode);
+        /// <summary>
+        /// __m512d _mm512_sqrt_round_pd (__m512d a, int rounding)
+        ///   VSQRTPD zmm1, zmm2 {er}
+        /// </summary>
+        public static Vector512<double> Sqrt(Vector512<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Sqrt(value, mode);
+        /// <summary>
+        /// __m128 _mm_sqrt_round_ss (__m128 a, __m128 b, int rounding)
+        ///   VSQRTSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> SqrtScalar(Vector128<float> upper, Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => SqrtScalar(upper, value, mode);
+        /// <summary>
+        /// __m128d _mm_sqrt_round_sd (__m128d a, __m128d b, int rounding)
+        ///   VSQRTSD xmm1, xmm2 xmm3 {er}
+        /// </summary>
+        public static Vector128<double> SqrtScalar(Vector128<double> upper, Vector128<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => SqrtScalar(upper, value, mode);
 
         /// <summary>
         /// void _mm512_storeu_si512 (__m512i * mem_addr, __m512i a)
@@ -3578,6 +3888,26 @@ internal X64() { }
         ///   VSUBPD zmm1 {k1}{z}, zmm2, zmm3/m512/m64bcst{er}
         /// </summary>
         public static Vector512<double> Subtract(Vector512<double> left, Vector512<double> right) => Subtract(left, right);
+        /// <summary>
+        /// __m512 _mm512_sub_round_ps (__m512 a, __m512 b, int rounding)
+        ///   VSUBPS zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<float> Subtract(Vector512<float> left, Vector512<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Subtract(left, right, mode);
+        /// <summary>
+        /// __m512d _mm512_sub_round_pd (__m512d a, __m512d b, int rounding)
+        ///   VSUBPD zmm1, zmm2, zmm3 {er}
+        /// </summary>
+        public static Vector512<double> Subtract(Vector512<double> left, Vector512<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Subtract(left, right, mode);
+        /// <summary>
+        /// __m128 _mm_sub_round_ss (__m128 a, __m128 b, int rounding)
+        ///   VSUBSS xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<float> SubtractScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => SubtractScalar(left, right, mode);
+        /// <summary>
+        /// __m128d _mm_sub_round_sd (__m128d a, __m128d b, int rounding)
+        ///   VSUBSD xmm1, xmm2, xmm3 {er}
+        /// </summary>
+        public static Vector128<double> SubtractScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => SubtractScalar(left, right, mode);
 
         /// <summary>
         /// __m512i _mm512_ternarylogic_si512 (__m512i a, __m512i b, __m512i c, int imm)
diff --git a/src/libraries/System.Private.CoreLib/src/System/RuntimeType.cs b/src/libraries/System.Private.CoreLib/src/System/RuntimeType.cs
index bd6a6f89bb13..17fbb2950c6c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/RuntimeType.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/RuntimeType.cs
@@ -692,7 +692,7 @@ public override bool IsAssignableFrom([NotNullWhen(true)] Type? c)
 
                     if (constraint.IsGenericParameter)
                     {
-                        GenericParameterAttributes special = constraint.GenericParameterAttributes & GenericParameterAttributes.SpecialConstraintMask;
+                        GenericParameterAttributes special = constraint.GenericParameterAttributes;
 
                         if ((special & GenericParameterAttributes.ReferenceTypeConstraint) == 0 &&
                             (special & GenericParameterAttributes.NotNullableValueTypeConstraint) == 0)
@@ -704,7 +704,7 @@ public override bool IsAssignableFrom([NotNullWhen(true)] Type? c)
 
                 if (baseType == ObjectType)
                 {
-                    GenericParameterAttributes special = GenericParameterAttributes & GenericParameterAttributes.SpecialConstraintMask;
+                    GenericParameterAttributes special = GenericParameterAttributes;
                     if ((special & GenericParameterAttributes.NotNullableValueTypeConstraint) != 0)
                         baseType = ValueType;
                 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any1CharPackedIgnoreCaseSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any1CharPackedIgnoreCaseSearchValues.cs
new file mode 100644
index 000000000000..dfe6b3631f70
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any1CharPackedIgnoreCaseSearchValues.cs
@@ -0,0 +1,52 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+
+namespace System.Buffers
+{
+    internal sealed class Any1CharPackedIgnoreCaseSearchValues : SearchValues<char>
+    {
+        // While this most commonly applies to ASCII letters, it also works for other values that differ by 0x20 (e.g. "[{" => "{").
+        // _lowerCase is therefore not necessarily a lower case ASCII letter, but just the higher value (the one with the 0x20 bit set).
+        private readonly char _lowerCase, _upperCase;
+        private readonly uint _lowerCaseUint;
+
+        public Any1CharPackedIgnoreCaseSearchValues(char value)
+        {
+            Debug.Assert((value | 0x20) == value);
+
+            _lowerCase = value;
+            _upperCase = (char)(value & ~0x20);
+            _lowerCaseUint = value;
+        }
+
+        internal override char[] GetValues() =>
+            [_upperCase, _lowerCase];
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override bool ContainsCore(char value) =>
+            (uint)(value | 0x20) == _lowerCaseUint;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        internal override int IndexOfAny(ReadOnlySpan<char> span) =>
+            PackedSpanHelpers.IndexOfAnyIgnoreCase(ref MemoryMarshal.GetReference(span), _lowerCase, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) =>
+            PackedSpanHelpers.IndexOfAnyExceptIgnoreCase(ref MemoryMarshal.GetReference(span), _lowerCase, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
+            span.LastIndexOfAny(_lowerCase, _upperCase);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
+            span.LastIndexOfAnyExcept(_lowerCase, _upperCase);
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any1CharPackedSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any1CharPackedSearchValues.cs
new file mode 100644
index 000000000000..e6aa0c870486
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any1CharPackedSearchValues.cs
@@ -0,0 +1,42 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+
+namespace System.Buffers
+{
+    internal sealed class Any1CharPackedSearchValues : SearchValues<char>
+    {
+        private readonly char _e0;
+
+        public Any1CharPackedSearchValues(char value) =>
+            _e0 = value;
+
+        internal override char[] GetValues() =>
+            [_e0];
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override bool ContainsCore(char value) =>
+            value == _e0;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        internal override int IndexOfAny(ReadOnlySpan<char> span) =>
+            PackedSpanHelpers.IndexOf(ref MemoryMarshal.GetReference(span), _e0, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) =>
+            PackedSpanHelpers.IndexOfAnyExcept(ref MemoryMarshal.GetReference(span), _e0, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
+            span.LastIndexOf(_e0);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
+            span.LastIndexOfAnyExcept(_e0);
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any1SearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any1SearchValues.cs
new file mode 100644
index 000000000000..9a52511b9875
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any1SearchValues.cs
@@ -0,0 +1,52 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+#pragma warning disable 8500 // address of managed types
+
+namespace System.Buffers
+{
+    internal sealed class Any1SearchValues<T, TImpl> : SearchValues<T>
+        where T : struct, IEquatable<T>
+        where TImpl : struct, INumber<TImpl>
+    {
+        private readonly TImpl _e0;
+
+        public Any1SearchValues(ReadOnlySpan<TImpl> values)
+        {
+            Debug.Assert(Unsafe.SizeOf<T>() == Unsafe.SizeOf<TImpl>());
+            Debug.Assert(values.Length == 1);
+            _e0 = values[0];
+        }
+
+        internal override unsafe T[] GetValues()
+        {
+            TImpl e0 = _e0;
+            return new[] { *(T*)&e0 };
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override unsafe bool ContainsCore(T value) =>
+            *(TImpl*)&value == _e0;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int IndexOfAny(ReadOnlySpan<T> span) =>
+            SpanHelpers.NonPackedIndexOfValueType<TImpl, SpanHelpers.DontNegate<TImpl>>(ref Unsafe.As<T, TImpl>(ref MemoryMarshal.GetReference(span)), _e0, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int IndexOfAnyExcept(ReadOnlySpan<T> span) =>
+            SpanHelpers.NonPackedIndexOfValueType<TImpl, SpanHelpers.Negate<TImpl>>(ref Unsafe.As<T, TImpl>(ref MemoryMarshal.GetReference(span)), _e0, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAny(ReadOnlySpan<T> span) =>
+            SpanHelpers.LastIndexOfValueType(ref Unsafe.As<T, TImpl>(ref MemoryMarshal.GetReference(span)), _e0, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAnyExcept(ReadOnlySpan<T> span) =>
+            SpanHelpers.LastIndexOfAnyExceptValueType(ref Unsafe.As<T, TImpl>(ref MemoryMarshal.GetReference(span)), _e0, span.Length);
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2ByteSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2ByteSearchValues.cs
deleted file mode 100644
index 42f4acbfebf8..000000000000
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2ByteSearchValues.cs
+++ /dev/null
@@ -1,41 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-using System.Runtime.CompilerServices;
-
-namespace System.Buffers
-{
-    internal sealed class Any2ByteSearchValues : SearchValues<byte>
-    {
-        private readonly byte _e0, _e1;
-
-        public Any2ByteSearchValues(ReadOnlySpan<byte> values)
-        {
-            Debug.Assert(values.Length == 2);
-            (_e0, _e1) = (values[0], values[1]);
-        }
-
-        internal override byte[] GetValues() => new[] { _e0, _e1 };
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override bool ContainsCore(byte value) =>
-            value == _e0 || value == _e1;
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int IndexOfAny(ReadOnlySpan<byte> span) =>
-            span.IndexOfAny(_e0, _e1);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int IndexOfAnyExcept(ReadOnlySpan<byte> span) =>
-            span.IndexOfAnyExcept(_e0, _e1);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int LastIndexOfAny(ReadOnlySpan<byte> span) =>
-            span.LastIndexOfAny(_e0, _e1);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int LastIndexOfAnyExcept(ReadOnlySpan<byte> span) =>
-            span.LastIndexOfAnyExcept(_e0, _e1);
-    }
-}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedIgnoreCaseSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedIgnoreCaseSearchValues.cs
new file mode 100644
index 000000000000..1073fcf3c818
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedIgnoreCaseSearchValues.cs
@@ -0,0 +1,67 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.Wasm;
+using System.Runtime.Intrinsics.X86;
+
+namespace System.Buffers
+{
+    internal sealed class Any2CharPackedIgnoreCaseSearchValues : SearchValues<char>
+    {
+        // While this most commonly applies to ASCII letters, it also works for other values that differ by 0x20 (e.g. "[]{}" => "{}").
+        // _e0 and _e1 are therefore not necessarily lower case ASCII letters, but just the higher values (the ones with the 0x20 bit set).
+        private readonly char _e0, _e1;
+        private readonly uint _uint0, _uint1;
+        private IndexOfAnyAsciiSearcher.AsciiState _state;
+
+        public Any2CharPackedIgnoreCaseSearchValues(char value0, char value1)
+        {
+            Debug.Assert((value0 | 0x20) == value0 && char.IsAscii(value0));
+            Debug.Assert((value1 | 0x20) == value1 && char.IsAscii(value1));
+
+            (_e0, _e1) = (value0, value1);
+            (_uint0, _uint1) = (value0, value1);
+            IndexOfAnyAsciiSearcher.ComputeAsciiState([(char)(_e0 & ~0x20), _e0, (char)(_e1 & ~0x20), _e1], out _state);
+        }
+
+        internal override char[] GetValues() =>
+            [(char)(_e0 & ~0x20), _e0, (char)(_e1 & ~0x20), _e1];
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override bool ContainsCore(char value)
+        {
+            uint lowerCase = (uint)(value | 0x20);
+            return lowerCase == _uint0 || lowerCase == _uint1;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        internal override int IndexOfAny(ReadOnlySpan<char> span) =>
+            PackedSpanHelpers.IndexOfAnyIgnoreCase(ref MemoryMarshal.GetReference(span), _e0, _e1, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) =>
+            PackedSpanHelpers.IndexOfAnyExceptIgnoreCase(ref MemoryMarshal.GetReference(span), _e0, _e1, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Ssse3))]
+        [CompExactlyDependsOn(typeof(AdvSimd))]
+        [CompExactlyDependsOn(typeof(PackedSimd))]
+        internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
+            IndexOfAnyAsciiSearcher.LastIndexOfAny<IndexOfAnyAsciiSearcher.DontNegate, IndexOfAnyAsciiSearcher.Default>(
+                ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)), span.Length, ref _state);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Ssse3))]
+        [CompExactlyDependsOn(typeof(AdvSimd))]
+        [CompExactlyDependsOn(typeof(PackedSimd))]
+        internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
+            IndexOfAnyAsciiSearcher.LastIndexOfAny<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Default>(
+                ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)), span.Length, ref _state);
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedSearchValues.cs
new file mode 100644
index 000000000000..d951b8e2375e
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedSearchValues.cs
@@ -0,0 +1,42 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+
+namespace System.Buffers
+{
+    internal sealed class Any2CharPackedSearchValues : SearchValues<char>
+    {
+        private readonly char _e0, _e1;
+
+        public Any2CharPackedSearchValues(char value0, char value1) =>
+            (_e0, _e1) = (value0, value1);
+
+        internal override char[] GetValues() =>
+            [_e0, _e1];
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override bool ContainsCore(char value) =>
+            value == _e0 || value == _e1;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        internal override int IndexOfAny(ReadOnlySpan<char> span) =>
+            PackedSpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), _e0, _e1, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) =>
+            PackedSpanHelpers.IndexOfAnyExcept(ref MemoryMarshal.GetReference(span), _e0, _e1, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
+            span.LastIndexOfAny(_e0, _e1);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
+            span.LastIndexOfAnyExcept(_e0, _e1);
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharSearchValues.cs
deleted file mode 100644
index a2bfde7f6fab..000000000000
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharSearchValues.cs
+++ /dev/null
@@ -1,51 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-namespace System.Buffers
-{
-    internal sealed class Any2CharSearchValues<TShouldUsePacked> : SearchValues<char>
-        where TShouldUsePacked : struct, SearchValues.IRuntimeConst
-    {
-        private char _e0, _e1;
-
-        public Any2CharSearchValues(char value0, char value1) =>
-            (_e0, _e1) = (value0, value1);
-
-        internal override char[] GetValues() => new[] { _e0, _e1 };
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override bool ContainsCore(char value) =>
-            value == _e0 || value == _e1;
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int IndexOfAny(ReadOnlySpan<char> span) =>
-            (PackedSpanHelpers.PackedIndexOfIsSupported && TShouldUsePacked.Value)
-                ? PackedSpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), _e0, _e1, span.Length)
-                : SpanHelpers.NonPackedIndexOfAnyValueType<short, SpanHelpers.DontNegate<short>>(
-                    ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
-                    Unsafe.As<char, short>(ref _e0),
-                    Unsafe.As<char, short>(ref _e1),
-                    span.Length);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) =>
-            (PackedSpanHelpers.PackedIndexOfIsSupported && TShouldUsePacked.Value)
-                ? PackedSpanHelpers.IndexOfAnyExcept(ref MemoryMarshal.GetReference(span), _e0, _e1, span.Length)
-                : SpanHelpers.NonPackedIndexOfAnyValueType<short, SpanHelpers.Negate<short>>(
-                    ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
-                    Unsafe.As<char, short>(ref _e0),
-                    Unsafe.As<char, short>(ref _e1),
-                    span.Length);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
-            span.LastIndexOfAny(_e0, _e1);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
-            span.LastIndexOfAnyExcept(_e0, _e1);
-    }
-}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2SearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2SearchValues.cs
new file mode 100644
index 000000000000..640a15f05c8f
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2SearchValues.cs
@@ -0,0 +1,52 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+#pragma warning disable 8500 // address of managed types
+
+namespace System.Buffers
+{
+    internal sealed class Any2SearchValues<T, TImpl> : SearchValues<T>
+        where T : struct, IEquatable<T>
+        where TImpl : struct, INumber<TImpl>
+    {
+        private readonly TImpl _e0, _e1;
+
+        public Any2SearchValues(ReadOnlySpan<TImpl> values)
+        {
+            Debug.Assert(Unsafe.SizeOf<T>() == Unsafe.SizeOf<TImpl>());
+            Debug.Assert(values.Length == 2);
+            (_e0, _e1) = (values[0], values[1]);
+        }
+
+        internal override unsafe T[] GetValues()
+        {
+            TImpl e0 = _e0, e1 = _e1;
+            return new[] { *(T*)&e0, *(T*)&e1 };
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override unsafe bool ContainsCore(T value) =>
+            *(TImpl*)&value == _e0 || *(TImpl*)&value == _e1;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int IndexOfAny(ReadOnlySpan<T> span) =>
+            SpanHelpers.NonPackedIndexOfAnyValueType<TImpl, SpanHelpers.DontNegate<TImpl>>(ref Unsafe.As<T, TImpl>(ref MemoryMarshal.GetReference(span)), _e0, _e1, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int IndexOfAnyExcept(ReadOnlySpan<T> span) =>
+            SpanHelpers.NonPackedIndexOfAnyValueType<TImpl, SpanHelpers.Negate<TImpl>>(ref Unsafe.As<T, TImpl>(ref MemoryMarshal.GetReference(span)), _e0, _e1, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAny(ReadOnlySpan<T> span) =>
+            SpanHelpers.LastIndexOfAnyValueType(ref Unsafe.As<T, TImpl>(ref MemoryMarshal.GetReference(span)), _e0, _e1, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAnyExcept(ReadOnlySpan<T> span) =>
+            SpanHelpers.LastIndexOfAnyExceptValueType(ref Unsafe.As<T, TImpl>(ref MemoryMarshal.GetReference(span)), _e0, _e1, span.Length);
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any3ByteSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any3ByteSearchValues.cs
deleted file mode 100644
index d7208dea4350..000000000000
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any3ByteSearchValues.cs
+++ /dev/null
@@ -1,41 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-using System.Runtime.CompilerServices;
-
-namespace System.Buffers
-{
-    internal sealed class Any3ByteSearchValues : SearchValues<byte>
-    {
-        private readonly byte _e0, _e1, _e2;
-
-        public Any3ByteSearchValues(ReadOnlySpan<byte> values)
-        {
-            Debug.Assert(values.Length == 3);
-            (_e0, _e1, _e2) = (values[0], values[1], values[2]);
-        }
-
-        internal override byte[] GetValues() => new[] { _e0, _e1, _e2 };
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override bool ContainsCore(byte value) =>
-            value == _e0 || value == _e1 || value == _e2;
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int IndexOfAny(ReadOnlySpan<byte> span) =>
-            span.IndexOfAny(_e0, _e1, _e2);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int IndexOfAnyExcept(ReadOnlySpan<byte> span) =>
-            span.IndexOfAnyExcept(_e0, _e1, _e2);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int LastIndexOfAny(ReadOnlySpan<byte> span) =>
-            span.LastIndexOfAny(_e0, _e1, _e2);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int LastIndexOfAnyExcept(ReadOnlySpan<byte> span) =>
-            span.LastIndexOfAnyExcept(_e0, _e1, _e2);
-    }
-}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any3CharPackedSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any3CharPackedSearchValues.cs
new file mode 100644
index 000000000000..cebb695aa3fb
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any3CharPackedSearchValues.cs
@@ -0,0 +1,42 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+
+namespace System.Buffers
+{
+    internal sealed class Any3CharPackedSearchValues : SearchValues<char>
+    {
+        private readonly char _e0, _e1, _e2;
+
+        public Any3CharPackedSearchValues(char value0, char value1, char value2) =>
+            (_e0, _e1, _e2) = (value0, value1, value2);
+
+        internal override char[] GetValues() =>
+            [_e0, _e1, _e2];
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override bool ContainsCore(char value) =>
+            value == _e0 || value == _e1 || value == _e2;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        internal override int IndexOfAny(ReadOnlySpan<char> span) =>
+            PackedSpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), _e0, _e1, _e2, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) =>
+            PackedSpanHelpers.IndexOfAnyExcept(ref MemoryMarshal.GetReference(span), _e0, _e1, _e2, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
+            span.LastIndexOfAny(_e0, _e1, _e2);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
+            span.LastIndexOfAnyExcept(_e0, _e1, _e2);
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any3CharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any3CharSearchValues.cs
deleted file mode 100644
index c69dbff8d0a4..000000000000
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any3CharSearchValues.cs
+++ /dev/null
@@ -1,53 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-namespace System.Buffers
-{
-    internal sealed class Any3CharSearchValues<TShouldUsePacked> : SearchValues<char>
-        where TShouldUsePacked : struct, SearchValues.IRuntimeConst
-    {
-        private char _e0, _e1, _e2;
-
-        public Any3CharSearchValues(char value0, char value1, char value2) =>
-            (_e0, _e1, _e2) = (value0, value1, value2);
-
-        internal override char[] GetValues() => new[] { _e0, _e1, _e2 };
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override bool ContainsCore(char value) =>
-            value == _e0 || value == _e1 || value == _e2;
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int IndexOfAny(ReadOnlySpan<char> span) =>
-            (PackedSpanHelpers.PackedIndexOfIsSupported && TShouldUsePacked.Value)
-                ? PackedSpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), _e0, _e1, _e2, span.Length)
-                : SpanHelpers.NonPackedIndexOfAnyValueType<short, SpanHelpers.DontNegate<short>>(
-                    ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
-                    Unsafe.As<char, short>(ref _e0),
-                    Unsafe.As<char, short>(ref _e1),
-                    Unsafe.As<char, short>(ref _e2),
-                    span.Length);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) =>
-            (PackedSpanHelpers.PackedIndexOfIsSupported && TShouldUsePacked.Value)
-                ? PackedSpanHelpers.IndexOfAnyExcept(ref MemoryMarshal.GetReference(span), _e0, _e1, _e2, span.Length)
-                : SpanHelpers.NonPackedIndexOfAnyValueType<short, SpanHelpers.Negate<short>>(
-                    ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
-                    Unsafe.As<char, short>(ref _e0),
-                    Unsafe.As<char, short>(ref _e1),
-                    Unsafe.As<char, short>(ref _e2),
-                    span.Length);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
-            span.LastIndexOfAny(_e0, _e1, _e2);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
-            span.LastIndexOfAnyExcept(_e0, _e1, _e2);
-    }
-}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any3SearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any3SearchValues.cs
new file mode 100644
index 000000000000..f166aef42703
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any3SearchValues.cs
@@ -0,0 +1,52 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+#pragma warning disable 8500 // address of managed types
+
+namespace System.Buffers
+{
+    internal sealed class Any3SearchValues<T, TImpl> : SearchValues<T>
+        where T : struct, IEquatable<T>
+        where TImpl : struct, INumber<TImpl>
+    {
+        private readonly TImpl _e0, _e1, _e2;
+
+        public Any3SearchValues(ReadOnlySpan<TImpl> values)
+        {
+            Debug.Assert(Unsafe.SizeOf<T>() == Unsafe.SizeOf<TImpl>());
+            Debug.Assert(values.Length == 3);
+            (_e0, _e1, _e2) = (values[0], values[1], values[2]);
+        }
+
+        internal override unsafe T[] GetValues()
+        {
+            TImpl e0 = _e0, e1 = _e1, e2 = _e2;
+            return new[] { *(T*)&e0, *(T*)&e1, *(T*)&e2 };
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override unsafe bool ContainsCore(T value) =>
+            *(TImpl*)&value == _e0 || *(TImpl*)&value == _e1 || *(TImpl*)&value == _e2;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int IndexOfAny(ReadOnlySpan<T> span) =>
+            SpanHelpers.NonPackedIndexOfAnyValueType<TImpl, SpanHelpers.DontNegate<TImpl>>(ref Unsafe.As<T, TImpl>(ref MemoryMarshal.GetReference(span)), _e0, _e1, _e2, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int IndexOfAnyExcept(ReadOnlySpan<T> span) =>
+            SpanHelpers.NonPackedIndexOfAnyValueType<TImpl, SpanHelpers.Negate<TImpl>>(ref Unsafe.As<T, TImpl>(ref MemoryMarshal.GetReference(span)), _e0, _e1, _e2, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAny(ReadOnlySpan<T> span) =>
+            SpanHelpers.LastIndexOfAnyValueType(ref Unsafe.As<T, TImpl>(ref MemoryMarshal.GetReference(span)), _e0, _e1, _e2, span.Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal override int LastIndexOfAnyExcept(ReadOnlySpan<T> span) =>
+            SpanHelpers.LastIndexOfAnyExceptValueType(ref Unsafe.As<T, TImpl>(ref MemoryMarshal.GetReference(span)), _e0, _e1, _e2, span.Length);
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs
index bc8b3fd0c6c8..deccdddb8fec 100644
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs
@@ -10,7 +10,6 @@
 using System.Runtime.Intrinsics.X86;
 
 #pragma warning disable 8500 // sizeof of managed types
-#pragma warning disable IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
 
 namespace System.Buffers
 {
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs
index 150372914d8b..076340bebeba 100644
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs
@@ -10,7 +10,6 @@
 using System.Runtime.Intrinsics.Wasm;
 using System.Runtime.Intrinsics.X86;
 
-#pragma warning disable IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
 
 namespace System.Buffers
 {
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs
index 13c6779a7d98..3c1805afdf61 100644
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs
@@ -34,7 +34,7 @@ public static SearchValues<byte> Create(ReadOnlySpan<byte> values)
 
             if (values.Length == 1)
             {
-                return new SingleByteSearchValues(values);
+                return new Any1SearchValues<byte, byte>(values);
             }
 
             // RangeByteSearchValues is slower than SingleByteSearchValues, but faster than Any2ByteSearchValues
@@ -48,8 +48,8 @@ public static SearchValues<byte> Create(ReadOnlySpan<byte> values)
                 Debug.Assert(values.Length is 2 or 3 or 4 or 5);
                 return values.Length switch
                 {
-                    2 => new Any2ByteSearchValues(values),
-                    3 => new Any3ByteSearchValues(values),
+                    2 => new Any2SearchValues<byte, byte>(values),
+                    3 => new Any3SearchValues<byte, byte>(values),
                     4 => new Any4SearchValues<byte, byte>(values),
                     _ => new Any5SearchValues<byte, byte>(values),
                 };
@@ -75,12 +75,16 @@ public static SearchValues<char> Create(ReadOnlySpan<char> values)
                 return new EmptySearchValues<char>();
             }
 
+            // Vector128<char> isn't valid. Treat the values as shorts instead.
+            ReadOnlySpan<short> shortValues = MemoryMarshal.Cast<char, short>(values);
+
             if (values.Length == 1)
             {
                 char value = values[0];
+
                 return PackedSpanHelpers.PackedIndexOfIsSupported && PackedSpanHelpers.CanUsePackedIndexOf(value)
-                    ? new SingleCharSearchValues<TrueConst>(value)
-                    : new SingleCharSearchValues<FalseConst>(value);
+                    ? new Any1CharPackedSearchValues(value)
+                    : new Any1SearchValues<char, short>(shortValues);
             }
 
             // RangeCharSearchValues is slower than SingleCharSearchValues, but faster than Any2CharSearchValues
@@ -95,9 +99,18 @@ public static SearchValues<char> Create(ReadOnlySpan<char> values)
             {
                 char value0 = values[0];
                 char value1 = values[1];
-                return PackedSpanHelpers.PackedIndexOfIsSupported && PackedSpanHelpers.CanUsePackedIndexOf(value0) && PackedSpanHelpers.CanUsePackedIndexOf(value1)
-                    ? new Any2CharSearchValues<TrueConst>(value0, value1)
-                    : new Any2CharSearchValues<FalseConst>(value0, value1);
+
+                if (PackedSpanHelpers.PackedIndexOfIsSupported && PackedSpanHelpers.CanUsePackedIndexOf(value0) && PackedSpanHelpers.CanUsePackedIndexOf(value1))
+                {
+                    // If the two values are the same ASCII letter with both cases, we can use an approach that
+                    // reduces the number of comparisons by masking off the bit that differs between lower and upper case (0x20).
+                    // While this most commonly applies to ASCII letters, it also works for other values that differ by 0x20 (e.g. "[{" => "{").
+                    return (value0 ^ value1) == 0x20
+                        ? new Any1CharPackedIgnoreCaseSearchValues((char)Math.Max(value0, value1))
+                        : new Any2CharPackedSearchValues(value0, value1);
+                }
+
+                return new Any2SearchValues<char, short>(shortValues);
             }
 
             if (values.Length == 3)
@@ -105,24 +118,37 @@ public static SearchValues<char> Create(ReadOnlySpan<char> values)
                 char value0 = values[0];
                 char value1 = values[1];
                 char value2 = values[2];
+
                 return PackedSpanHelpers.PackedIndexOfIsSupported && PackedSpanHelpers.CanUsePackedIndexOf(value0) && PackedSpanHelpers.CanUsePackedIndexOf(value1) && PackedSpanHelpers.CanUsePackedIndexOf(value2)
-                    ? new Any3CharSearchValues<TrueConst>(value0, value1, value2)
-                    : new Any3CharSearchValues<FalseConst>(value0, value1, value2);
+                    ? new Any3CharPackedSearchValues(value0, value1, value2)
+                    : new Any3SearchValues<char, short>(shortValues);
             }
 
             // IndexOfAnyAsciiSearcher for chars is slower than Any3CharSearchValues, but faster than Any4SearchValues
             if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && maxInclusive < 128)
             {
+                // If the values are sets of 2 ASCII letters with both cases, we can use an approach that
+                // reduces the number of comparisons by masking off the bit that differs between lower and upper case (0x20).
+                // While this most commonly applies to ASCII letters, it also works for other values that differ by 0x20 (e.g. "[]{}" => "{}").
+                if (PackedSpanHelpers.PackedIndexOfIsSupported && values.Length == 4 && minInclusive > 0)
+                {
+                    Span<char> copy = stackalloc char[4];
+                    values.CopyTo(copy);
+                    copy.Sort();
+
+                    if ((copy[0] ^ copy[2]) == 0x20 &&
+                        (copy[1] ^ copy[3]) == 0x20)
+                    {
+                        // We pick the higher two values (with the 0x20 bit set). "AaBb" => 'a', 'b'
+                        return new Any2CharPackedIgnoreCaseSearchValues(copy[2], copy[3]);
+                    }
+                }
+
                 return (Ssse3.IsSupported || PackedSimd.IsSupported) && minInclusive == 0
                     ? new AsciiCharSearchValues<IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(values)
                     : new AsciiCharSearchValues<IndexOfAnyAsciiSearcher.Default>(values);
             }
 
-            // Vector128<char> isn't valid. Treat the values as shorts instead.
-            ReadOnlySpan<short> shortValues = MemoryMarshal.CreateReadOnlySpan(
-                ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(values)),
-                values.Length);
-
             if (values.Length == 4)
             {
                 return new Any4SearchValues<char, short>(shortValues);
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SingleByteSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SingleByteSearchValues.cs
deleted file mode 100644
index b768c2541562..000000000000
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SingleByteSearchValues.cs
+++ /dev/null
@@ -1,41 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-using System.Runtime.CompilerServices;
-
-namespace System.Buffers
-{
-    internal sealed class SingleByteSearchValues : SearchValues<byte>
-    {
-        private readonly byte _e0;
-
-        public SingleByteSearchValues(ReadOnlySpan<byte> values)
-        {
-            Debug.Assert(values.Length == 1);
-            _e0 = values[0];
-        }
-
-        internal override byte[] GetValues() => new[] { _e0 };
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override bool ContainsCore(byte value) =>
-            value == _e0;
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int IndexOfAny(ReadOnlySpan<byte> span) =>
-            span.IndexOf(_e0);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int IndexOfAnyExcept(ReadOnlySpan<byte> span) =>
-            span.IndexOfAnyExcept(_e0);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int LastIndexOfAny(ReadOnlySpan<byte> span) =>
-            span.LastIndexOf(_e0);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int LastIndexOfAnyExcept(ReadOnlySpan<byte> span) =>
-            span.LastIndexOfAnyExcept(_e0);
-    }
-}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SingleCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SingleCharSearchValues.cs
deleted file mode 100644
index b1348e3859b0..000000000000
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SingleCharSearchValues.cs
+++ /dev/null
@@ -1,49 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-namespace System.Buffers
-{
-    internal sealed class SingleCharSearchValues<TShouldUsePacked> : SearchValues<char>
-        where TShouldUsePacked : struct, SearchValues.IRuntimeConst
-    {
-        private char _e0;
-
-        public SingleCharSearchValues(char value) =>
-            _e0 = value;
-
-        internal override char[] GetValues() => new[] { _e0 };
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override bool ContainsCore(char value) =>
-            value == _e0;
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int IndexOfAny(ReadOnlySpan<char> span) =>
-            (PackedSpanHelpers.PackedIndexOfIsSupported && TShouldUsePacked.Value)
-                ? PackedSpanHelpers.IndexOf(ref MemoryMarshal.GetReference(span), _e0, span.Length)
-                : SpanHelpers.NonPackedIndexOfValueType<short, SpanHelpers.DontNegate<short>>(
-                    ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
-                    Unsafe.As<char, short>(ref _e0),
-                    span.Length);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) =>
-            (PackedSpanHelpers.PackedIndexOfIsSupported && TShouldUsePacked.Value)
-                ? PackedSpanHelpers.IndexOfAnyExcept(ref MemoryMarshal.GetReference(span), _e0, span.Length)
-                : SpanHelpers.NonPackedIndexOfValueType<short, SpanHelpers.Negate<short>>(
-                    ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
-                    Unsafe.As<char, short>(ref _e0),
-                    span.Length);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int LastIndexOfAny(ReadOnlySpan<char> span) =>
-            span.LastIndexOf(_e0);
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) =>
-            span.LastIndexOfAnyExcept(_e0);
-    }
-}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs
index e465aae605fb..df6c8a90ac47 100644
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs
@@ -109,7 +109,7 @@ internal abstract class AsciiStringSearchValuesTeddyBase<TBucketized, TStartCase
         // We may have up to 8 buckets.
         // If we have <= 8 strings, the buckets will be the strings themselves, and TBucketized.Value will be false.
         // If we have more than 8, the buckets will be string[], and TBucketized.Value will be true.
-        private readonly EightPackedReferences _buckets;
+        private readonly EightObjects _buckets;
 
         private readonly Vector512<byte>
             _n0Low, _n0High,
@@ -121,9 +121,7 @@ protected AsciiStringSearchValuesTeddyBase(ReadOnlySpan<string> values, HashSet<
             Debug.Assert(!TBucketized.Value);
             Debug.Assert(n is 2 or 3);
 
-            _buckets = new EightPackedReferences(MemoryMarshal.CreateReadOnlySpan(
-                ref Unsafe.As<string, object>(ref MemoryMarshal.GetReference(values)),
-                values.Length));
+            ReadOnlySpan<object?>.CastUp(values).CopyTo(_buckets);
 
             (_n0Low, _n0High) = TeddyBucketizer.GenerateNonBucketizedFingerprint(values, offset: 0);
             (_n1Low, _n1High) = TeddyBucketizer.GenerateNonBucketizedFingerprint(values, offset: 1);
@@ -139,7 +137,7 @@ protected AsciiStringSearchValuesTeddyBase(string[][] buckets, ReadOnlySpan<stri
             Debug.Assert(TBucketized.Value);
             Debug.Assert(n is 2 or 3);
 
-            _buckets = new EightPackedReferences(buckets);
+            ((ReadOnlySpan<object?>)buckets).CopyTo(_buckets);
 
             (_n0Low, _n0High) = TeddyBucketizer.GenerateBucketizedFingerprint(buckets, offset: 0);
             (_n1Low, _n1High) = TeddyBucketizer.GenerateBucketizedFingerprint(buckets, offset: 1);
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/EightPackedReferences.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/EightPackedReferences.cs
deleted file mode 100644
index b85a7e145c51..000000000000
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/EightPackedReferences.cs
+++ /dev/null
@@ -1,23 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-using System.Runtime.CompilerServices;
-
-namespace System.Buffers
-{
-    [InlineArray(8)]
-    internal struct EightPackedReferences
-    {
-#pragma warning disable CA1823 // Unused field -- https://github.com/dotnet/roslyn-analyzers/issues/6788
-        private object? _ref0;
-#pragma warning restore CA1823
-
-        public EightPackedReferences(ReadOnlySpan<object> values)
-        {
-            Debug.Assert(values.Length is > 0 and <= 8, $"Got {values.Length} values");
-
-            values.CopyTo(this!);
-        }
-    }
-}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/RabinKarp.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/RabinKarp.cs
index d420c70d16c1..d8970655cb31 100644
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/RabinKarp.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/RabinKarp.cs
@@ -64,6 +64,12 @@ public RabinKarp(ReadOnlySpan<string> values)
 
             foreach (string value in values)
             {
+                if (value.Length > MaxInputLength)
+                {
+                    // This value can never match. There's no point in including it in the buckets.
+                    continue;
+                }
+
                 nuint hash = 0;
                 for (int i = 0; i < minimumLength; i++)
                 {
diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs
index 86a13dd04b9b..e2ae3c61b044 100644
--- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs
@@ -3,6 +3,7 @@
 
 using System.Collections.Generic;
 using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
 using System.Globalization;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.Arm;
@@ -14,11 +15,13 @@ namespace System.Buffers
 {
     internal static class StringSearchValues
     {
+        private const int TeddyBucketCount = 8;
+
         private static readonly SearchValues<char> s_asciiLetters =
             SearchValues.Create("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
 
         private static readonly SearchValues<char> s_allAsciiExceptLowercase =
-            SearchValues.Create("\0\u0001\u0002\u0003\u0004\u0005\u0006\a\b\t\n\v\f\r\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`{|}~\u007F");
+            SearchValues.Create("\0\u0001\u0002\u0003\u0004\u0005\u0006\a\b\t\n\v\f\r\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\e\u001C\u001D\u001E\u001F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`{|}~\u007F");
 
         public static SearchValues<string> Create(ReadOnlySpan<string> values, bool ignoreCase)
         {
@@ -248,6 +251,18 @@ static SearchValues<string> PickAhoCorasickImplementation<TCaseSensitivity>(AhoC
 
             Debug.Assert(!(asciiStartLettersOnly && asciiStartUnaffectedByCaseConversion));
 
+            // If we still have empty buckets we could use and we're ignoring case, we may be able to
+            // generate all possible permutations of the first N characters and switch to case-sensitive searching.
+            // E.g. ["ab", "c!"] => ["ab", "Ab" "aB", "AB", "c!", "C!"].
+            // This won't apply to inputs with many letters (e.g. "abc" => 8 permutations on its own).
+            if (!asciiStartUnaffectedByCaseConversion &&
+                values.Length < TeddyBucketCount &&
+                TryGenerateAllCasePermutationsForPrefixes(values, n, TeddyBucketCount, out string[]? newValues))
+            {
+                asciiStartUnaffectedByCaseConversion = true;
+                values = newValues;
+            }
+
             if (asciiStartUnaffectedByCaseConversion)
             {
                 return nonAsciiAffectedByCaseConversion
@@ -278,9 +293,9 @@ private static SearchValues<string> PickTeddyImplementation<TStartCaseSensitivit
             Debug.Assert(values.Length > 1);
             Debug.Assert(n is 2 or 3);
 
-            if (values.Length > 8)
+            if (values.Length > TeddyBucketCount)
             {
-                string[][] buckets = TeddyBucketizer.Bucketize(values, bucketCount: 8, n);
+                string[][] buckets = TeddyBucketizer.Bucketize(values, TeddyBucketCount, n);
 
                 // Potential optimization: We don't have to pick the first N characters for the fingerprint.
                 // Different offset selection can noticeably improve throughput (e.g. 2x).
@@ -297,6 +312,68 @@ private static SearchValues<string> PickTeddyImplementation<TStartCaseSensitivit
             }
         }
 
+        private static bool TryGenerateAllCasePermutationsForPrefixes(ReadOnlySpan<string> values, int n, int maxValues, [NotNullWhen(true)] out string[]? newValues)
+        {
+            Debug.Assert(n is 2 or 3);
+            Debug.Assert(values.Length < maxValues);
+
+            // Count how many possible permutations there are.
+            int newValuesCount = 0;
+
+            foreach (string value in values)
+            {
+                int permutations = 1;
+
+                foreach (char c in value.AsSpan(0, n))
+                {
+                    Debug.Assert(char.IsAscii(c));
+
+                    if (char.IsAsciiLetter(c))
+                    {
+                        permutations *= 2;
+                    }
+                }
+
+                newValuesCount += permutations;
+            }
+
+            Debug.Assert(newValuesCount > values.Length, "Shouldn't have been called if there were no letters present");
+
+            if (newValuesCount > maxValues)
+            {
+                newValues = null;
+                return false;
+            }
+
+            // Generate the permutations.
+            newValues = new string[newValuesCount];
+            newValuesCount = 0;
+
+            foreach (string value in values)
+            {
+                int start = newValuesCount;
+
+                newValues[newValuesCount++] = value;
+
+                for (int i = 0; i < n; i++)
+                {
+                    char c = value[i];
+
+                    if (char.IsAsciiLetter(c))
+                    {
+                        // Copy all the previous permutations of this value but change the casing of the i-th character.
+                        foreach (string previous in newValues.AsSpan(start, newValuesCount - start))
+                        {
+                            newValues[newValuesCount++] = $"{previous.AsSpan(0, i)}{(char)(c ^ 0x20)}{previous.AsSpan(i + 1)}";
+                        }
+                    }
+                }
+            }
+
+            Debug.Assert(newValuesCount == newValues.Length);
+            return true;
+        }
+
         private static SearchValues<string> CreateForSingleValue(
             string value,
             HashSet<string>? uniqueValues,
diff --git a/src/coreclr/System.Private.CoreLib/src/System/Security/DynamicSecurityMethodAttribute.cs b/src/libraries/System.Private.CoreLib/src/System/Security/DynamicSecurityMethodAttribute.cs
similarity index 100%
rename from src/coreclr/System.Private.CoreLib/src/System/Security/DynamicSecurityMethodAttribute.cs
rename to src/libraries/System.Private.CoreLib/src/System/Security/DynamicSecurityMethodAttribute.cs
diff --git a/src/libraries/System.Private.CoreLib/src/System/Span.cs b/src/libraries/System.Private.CoreLib/src/System/Span.cs
index aaf3763d81b7..38b94e872b0c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Span.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Span.cs
@@ -126,7 +126,6 @@ public Span(ref T reference)
             _length = 1;
         }
 
-#pragma warning disable IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
         // Constructor for internal use only. It is not safe to expose publicly, and is instead exposed via the unsafe MemoryMarshal.CreateSpan.
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal Span(ref T reference, int length)
@@ -136,7 +135,6 @@ internal Span(ref T reference, int length)
             _reference = ref reference;
             _length = length;
         }
-#pragma warning restore IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
 
         /// <summary>
         /// Returns a reference to specified element of the Span.
@@ -300,19 +298,7 @@ public unsafe void Clear()
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public unsafe void Fill(T value)
         {
-            if (sizeof(T) == 1)
-            {
-                // Special-case single-byte types like byte / sbyte / bool.
-                // The runtime eventually calls memset, which can efficiently support large buffers.
-                // We don't need to check IsReferenceOrContainsReferences because no references
-                // can ever be stored in types this small.
-                Unsafe.InitBlockUnaligned(ref Unsafe.As<T, byte>(ref _reference), *(byte*)&value, (uint)_length);
-            }
-            else
-            {
-                // Call our optimized workhorse method for all other types.
-                SpanHelpers.Fill(ref _reference, (uint)_length, value);
-            }
+            SpanHelpers.Fill(ref _reference, (uint)_length, value);
         }
 
         /// <summary>
diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs
new file mode 100644
index 000000000000..b4aa563b2774
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs
@@ -0,0 +1,537 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#if TARGET_AMD64 || TARGET_ARM64 || (TARGET_32BIT && !TARGET_ARM) || TARGET_LOONGARCH64
+// JIT is guaranteed to unroll blocks up to 64 bytes in size
+#define HAS_CUSTOM_BLOCKS
+#endif
+
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace System
+{
+    internal static partial class SpanHelpers // .ByteMemOps
+    {
+#if TARGET_ARM64 || TARGET_LOONGARCH64
+        private const ulong MemmoveNativeThreshold = ulong.MaxValue;
+#elif TARGET_ARM
+        private const nuint MemmoveNativeThreshold = 512;
+#else
+        private const nuint MemmoveNativeThreshold = 2048;
+#endif
+        private const nuint ZeroMemoryNativeThreshold = 1024;
+
+
+#if HAS_CUSTOM_BLOCKS
+        [StructLayout(LayoutKind.Sequential, Size = 16)]
+        private struct Block16 {}
+
+        [StructLayout(LayoutKind.Sequential, Size = 64)]
+        private struct Block64 {}
+#endif // HAS_CUSTOM_BLOCKS
+
+#if NATIVEAOT
+        [System.Runtime.RuntimeExport("RhSpanHelpers_MemCopy")]
+#endif
+        [Intrinsic] // Unrolled for small constant lengths
+        internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len)
+        {
+            // P/Invoke into the native version when the buffers are overlapping.
+            if ((nuint)Unsafe.ByteOffset(ref src, ref dest) < len ||
+                (nuint)Unsafe.ByteOffset(ref dest, ref src) < len)
+            {
+                goto BuffersOverlap;
+            }
+
+            ref byte srcEnd = ref Unsafe.Add(ref src, len);
+            ref byte destEnd = ref Unsafe.Add(ref dest, len);
+
+            if (len <= 16)
+                goto MCPY02;
+            if (len > 64)
+                goto MCPY05;
+
+        MCPY00:
+            // Copy bytes which are multiples of 16 and leave the remainder for MCPY01 to handle.
+            Debug.Assert(len > 16 && len <= 64);
+#if HAS_CUSTOM_BLOCKS
+            Unsafe.As<byte, Block16>(ref dest) = Unsafe.As<byte, Block16>(ref src); // [0,16]
+#elif TARGET_64BIT
+            Unsafe.As<byte, long>(ref dest) = Unsafe.As<byte, long>(ref src);
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 8)); // [0,16]
+#else
+            Unsafe.As<byte, int>(ref dest) = Unsafe.As<byte, int>(ref src);
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 4));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 8));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 12)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 12)); // [0,16]
+#endif
+            if (len <= 32)
+                goto MCPY01;
+#if HAS_CUSTOM_BLOCKS
+            Unsafe.As<byte, Block16>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, Block16>(ref Unsafe.Add(ref src, 16)); // [0,32]
+#elif TARGET_64BIT
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 16));
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 24)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 24)); // [0,32]
+#else
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 16));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 20)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 20));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 24)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 24));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 28)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 28)); // [0,32]
+#endif
+            if (len <= 48)
+                goto MCPY01;
+#if HAS_CUSTOM_BLOCKS
+            Unsafe.As<byte, Block16>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, Block16>(ref Unsafe.Add(ref src, 32)); // [0,48]
+#elif TARGET_64BIT
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 32));
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 40)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 40)); // [0,48]
+#else
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 32));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 36)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 36));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 40)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 40));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 44)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 44)); // [0,48]
+#endif
+
+        MCPY01:
+            // Unconditionally copy the last 16 bytes using destEnd and srcEnd and return.
+            Debug.Assert(len > 16 && len <= 64);
+#if HAS_CUSTOM_BLOCKS
+            Unsafe.As<byte, Block16>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, Block16>(ref Unsafe.Add(ref srcEnd, -16));
+#elif TARGET_64BIT
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -16));
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -8));
+#else
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -16));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -12));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -8));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -4));
+#endif
+            return;
+
+        MCPY02:
+            // Copy the first 8 bytes and then unconditionally copy the last 8 bytes and return.
+            if ((len & 24) == 0)
+                goto MCPY03;
+            Debug.Assert(len >= 8 && len <= 16);
+#if TARGET_64BIT
+            Unsafe.As<byte, long>(ref dest) = Unsafe.As<byte, long>(ref src);
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -8));
+#else
+            Unsafe.As<byte, int>(ref dest) = Unsafe.As<byte, int>(ref src);
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 4));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -8));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -4));
+#endif
+            return;
+
+        MCPY03:
+            // Copy the first 4 bytes and then unconditionally copy the last 4 bytes and return.
+            if ((len & 4) == 0)
+                goto MCPY04;
+            Debug.Assert(len >= 4 && len < 8);
+            Unsafe.As<byte, int>(ref dest) = Unsafe.As<byte, int>(ref src);
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -4));
+            return;
+
+        MCPY04:
+            // Copy the first byte. For pending bytes, do an unconditionally copy of the last 2 bytes and return.
+            Debug.Assert(len < 4);
+            if (len == 0)
+                return;
+            dest = src;
+            if ((len & 2) == 0)
+                return;
+            Unsafe.As<byte, short>(ref Unsafe.Add(ref destEnd, -2)) = Unsafe.As<byte, short>(ref Unsafe.Add(ref srcEnd, -2));
+            return;
+
+        MCPY05:
+            // PInvoke to the native version when the copy length exceeds the threshold.
+            if (len > MemmoveNativeThreshold)
+            {
+                goto PInvoke;
+            }
+
+#if HAS_CUSTOM_BLOCKS
+            if (len >= 256)
+            {
+                // Try to opportunistically align the destination below. The input isn't pinned, so the GC
+                // is free to move the references. We're therefore assuming that reads may still be unaligned.
+                //
+                // dest is more important to align than src because an unaligned store is more expensive
+                // than an unaligned load.
+                nuint misalignedElements = 64 - Unsafe.OpportunisticMisalignment(ref dest, 64);
+                Unsafe.As<byte, Block64>(ref dest) = Unsafe.As<byte, Block64>(ref src);
+                src = ref Unsafe.Add(ref src, misalignedElements);
+                dest = ref Unsafe.Add(ref dest, misalignedElements);
+                len -= misalignedElements;
+            }
+#endif
+
+            // Copy 64-bytes at a time until the remainder is less than 64.
+            // If remainder is greater than 16 bytes, then jump to MCPY00. Otherwise, unconditionally copy the last 16 bytes and return.
+            Debug.Assert(len > 64 && len <= MemmoveNativeThreshold);
+            nuint n = len >> 6;
+
+        MCPY06:
+#if HAS_CUSTOM_BLOCKS
+            Unsafe.As<byte, Block64>(ref dest) = Unsafe.As<byte, Block64>(ref src);
+#elif TARGET_64BIT
+            Unsafe.As<byte, long>(ref dest) = Unsafe.As<byte, long>(ref src);
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 8));
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 16));
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 24)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 24));
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 32));
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 40)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 40));
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 48)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 48));
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 56)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 56));
+#else
+            Unsafe.As<byte, int>(ref dest) = Unsafe.As<byte, int>(ref src);
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 4));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 8));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 12)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 12));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 16));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 20)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 20));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 24)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 24));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 28)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 28));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 32));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 36)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 36));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 40)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 40));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 44)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 44));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 48)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 48));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 52)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 52));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 56)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 56));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 60)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 60));
+#endif
+            dest = ref Unsafe.Add(ref dest, 64);
+            src = ref Unsafe.Add(ref src, 64);
+            n--;
+            if (n != 0)
+                goto MCPY06;
+
+            len %= 64;
+            if (len > 16)
+                goto MCPY00;
+#if HAS_CUSTOM_BLOCKS
+            Unsafe.As<byte, Block16>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, Block16>(ref Unsafe.Add(ref srcEnd, -16));
+#elif TARGET_64BIT
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -16));
+            Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -8));
+#else
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -16));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -12));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -8));
+            Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -4));
+#endif
+            return;
+
+        BuffersOverlap:
+            Debug.Assert(len > 0);
+            // If the buffers overlap perfectly, there's no point to copying the data.
+            if (Unsafe.AreSame(ref dest, ref src))
+            {
+                // Both could be null with a non-zero length, perform an implicit null check.
+                _ = Unsafe.ReadUnaligned<byte>(ref dest);
+                return;
+            }
+
+        PInvoke:
+            // Implicit nullchecks
+            Debug.Assert(len > 0);
+            _ = Unsafe.ReadUnaligned<byte>(ref dest);
+            _ = Unsafe.ReadUnaligned<byte>(ref src);
+            Buffer._Memmove(ref dest, ref src, len);
+        }
+
+#if NATIVEAOT
+        [System.Runtime.RuntimeExport("RhSpanHelpers_MemZero")]
+#endif
+        [Intrinsic] // Unrolled for small sizes
+        public static unsafe void ClearWithoutReferences(ref byte dest, nuint len)
+        {
+            if (len == 0)
+                return;
+
+            ref byte destEnd = ref Unsafe.Add(ref dest, len);
+
+            if (len <= 16)
+                goto MZER02;
+            if (len > 64)
+                goto MZER05;
+
+        MZER00:
+            // Clear bytes which are multiples of 16 and leave the remainder for MZER01 to handle.
+            Debug.Assert(len > 16 && len <= 64);
+#if HAS_CUSTOM_BLOCKS
+            Unsafe.WriteUnaligned<Block16>(ref dest, default);
+#elif TARGET_64BIT
+            Unsafe.WriteUnaligned<long>(ref dest, 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref dest, 8), 0);
+#else
+            Unsafe.WriteUnaligned<int>(ref dest, 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 4), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 8), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 12), 0);
+#endif
+            if (len <= 32)
+                goto MZER01;
+#if HAS_CUSTOM_BLOCKS
+            Unsafe.WriteUnaligned<Block16>(ref Unsafe.Add(ref dest, 16), default);
+#elif TARGET_64BIT
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref dest, 16), 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref dest, 24), 0);
+#else
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 16), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 20), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 24), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 28), 0);
+#endif
+            if (len <= 48)
+                goto MZER01;
+#if HAS_CUSTOM_BLOCKS
+            Unsafe.WriteUnaligned<Block16>(ref Unsafe.Add(ref dest, 32), default);
+#elif TARGET_64BIT
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref dest, 32), 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref dest, 40), 0);
+#else
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 32), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 36), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 40), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 44), 0);
+#endif
+
+        MZER01:
+            // Unconditionally clear the last 16 bytes using destEnd and return.
+            Debug.Assert(len > 16 && len <= 64);
+#if HAS_CUSTOM_BLOCKS
+            Unsafe.WriteUnaligned<Block16>(ref Unsafe.Add(ref destEnd, -16), default);
+#elif TARGET_64BIT
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref destEnd, -16), 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref destEnd, -8), 0);
+#else
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref destEnd, -16), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref destEnd, -12), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref destEnd, -8), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref destEnd, -4), 0);
+#endif
+            return;
+
+        MZER02:
+            // Clear the first 8 bytes and then unconditionally clear the last 8 bytes and return.
+            if ((len & 24) == 0)
+                goto MZER03;
+            Debug.Assert(len >= 8 && len <= 16);
+#if TARGET_64BIT
+            Unsafe.WriteUnaligned<long>(ref dest, 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref destEnd, -8), 0);
+#else
+            Unsafe.WriteUnaligned<int>(ref dest, 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 4), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref destEnd, -8), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref destEnd, -4), 0);
+#endif
+            return;
+
+        MZER03:
+            // Clear the first 4 bytes and then unconditionally clear the last 4 bytes and return.
+            if ((len & 4) == 0)
+                goto MZER04;
+            Debug.Assert(len >= 4 && len < 8);
+            Unsafe.WriteUnaligned<int>(ref dest, 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref destEnd, -4), 0);
+            return;
+
+        MZER04:
+            // Clear the first byte. For pending bytes, do an unconditionally clear of the last 2 bytes and return.
+            Debug.Assert(len < 4);
+            if (len == 0)
+                return;
+            dest = 0;
+            if ((len & 2) == 0)
+                return;
+            Unsafe.WriteUnaligned<short>(ref Unsafe.Add(ref destEnd, -2), 0);
+            return;
+
+        MZER05:
+            // PInvoke to the native version when the clear length exceeds the threshold.
+            if (len > ZeroMemoryNativeThreshold)
+            {
+                goto PInvoke;
+            }
+
+#if HAS_CUSTOM_BLOCKS
+            if (len >= 256)
+            {
+                // Try to opportunistically align the destination below. The input isn't pinned, so the GC
+                // is free to move the references. We're therefore assuming that reads may still be unaligned.
+                nuint misalignedElements = 64 - Unsafe.OpportunisticMisalignment(ref dest, 64);
+                Unsafe.WriteUnaligned<Block64>(ref dest, default);
+                dest = ref Unsafe.Add(ref dest, misalignedElements);
+                len -= misalignedElements;
+            }
+#endif
+            // Clear 64-bytes at a time until the remainder is less than 64.
+            // If remainder is greater than 16 bytes, then jump to MZER00. Otherwise, unconditionally clear the last 16 bytes and return.
+            Debug.Assert(len > 64 && len <= ZeroMemoryNativeThreshold);
+            nuint n = len >> 6;
+
+        MZER06:
+#if HAS_CUSTOM_BLOCKS
+            Unsafe.WriteUnaligned<Block64>(ref dest, default);
+#elif TARGET_64BIT
+            Unsafe.WriteUnaligned<long>(ref dest, 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref dest, 8), 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref dest, 16), 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref dest, 24), 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref dest, 32), 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref dest, 40), 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref dest, 48), 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref dest, 56), 0);
+#else
+            Unsafe.WriteUnaligned<int>(ref dest, 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 4), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 8), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 12), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 16), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 20), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 24), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 28), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 32), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 36), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 40), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 44), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 48), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 52), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 56), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref dest, 60), 0);
+#endif
+            dest = ref Unsafe.Add(ref dest, 64);
+            n--;
+            if (n != 0)
+                goto MZER06;
+
+            len %= 64;
+            if (len > 16)
+                goto MZER00;
+#if HAS_CUSTOM_BLOCKS
+            Unsafe.WriteUnaligned<Block16>(ref Unsafe.Add(ref destEnd, -16), default);
+#elif TARGET_64BIT
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref destEnd, -16), 0);
+            Unsafe.WriteUnaligned<long>(ref Unsafe.Add(ref destEnd, -8), 0);
+#else
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref destEnd, -16), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref destEnd, -12), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref destEnd, -8), 0);
+            Unsafe.WriteUnaligned<int>(ref Unsafe.Add(ref destEnd, -4), 0);
+#endif
+            return;
+
+        PInvoke:
+            // Implicit nullchecks
+            _ = Unsafe.ReadUnaligned<byte>(ref dest);
+            Buffer._ZeroMemory(ref dest, len);
+        }
+
+#if NATIVEAOT
+        [System.Runtime.RuntimeExport("RhSpanHelpers_MemSet")]
+#endif
+        internal static void Fill(ref byte dest, byte value, nuint len)
+        {
+            if (!Vector.IsHardwareAccelerated)
+            {
+                goto CannotVectorize;
+            }
+
+            if (len >= (nuint)Vector<byte>.Count)
+            {
+                // We have enough data for at least one vectorized write.
+                Vector<byte> vector = new (value);
+                nuint stopLoopAtOffset = len & (nuint)(nint)(2 * (int)-Vector<byte>.Count); // intentional sign extension carries the negative bit
+                nuint offset = 0;
+
+                // Loop, writing 2 vectors at a time.
+                // Compare 'numElements' rather than 'stopLoopAtOffset' because we don't want a dependency
+                // on the very recently calculated 'stopLoopAtOffset' value.
+                if (len >= (uint)(2 * Vector<byte>.Count))
+                {
+                    do
+                    {
+                        Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset), vector);
+                        Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset + (nuint)Vector<byte>.Count), vector);
+                        offset += (uint)(2 * Vector<byte>.Count);
+                    } while (offset < stopLoopAtOffset);
+                }
+
+                // At this point, if any data remains to be written, it's strictly less than
+                // 2 * sizeof(Vector) bytes. The loop above had us write an even number of vectors.
+                // If the total byte length instead involves us writing an odd number of vectors, write
+                // one additional vector now. The bit check below tells us if we're in an "odd vector
+                // count" situation.
+                if ((len & (nuint)Vector<byte>.Count) != 0)
+                {
+                    Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset), vector);
+                }
+
+                // It's possible that some small buffer remains to be populated - something that won't
+                // fit an entire vector's worth of data. Instead of falling back to a loop, we'll write
+                // a vector at the very end of the buffer. This may involve overwriting previously
+                // populated data, which is fine since we're splatting the same value for all entries.
+                // There's no need to perform a length check here because we already performed this
+                // check before entering the vectorized code path.
+                Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, len - (nuint)Vector<byte>.Count), vector);
+
+                // And we're done!
+                return;
+            }
+
+        CannotVectorize:
+
+            // If we reached this point, we cannot vectorize this T, or there are too few
+            // elements for us to vectorize. Fall back to an unrolled loop.
+            nuint i = 0;
+
+            // Write 8 elements at a time
+            if (len >= 8)
+            {
+                nuint stopLoopAtOffset = len & ~(nuint)7;
+                do
+                {
+                    Unsafe.Add(ref dest, (nint)i + 0) = value;
+                    Unsafe.Add(ref dest, (nint)i + 1) = value;
+                    Unsafe.Add(ref dest, (nint)i + 2) = value;
+                    Unsafe.Add(ref dest, (nint)i + 3) = value;
+                    Unsafe.Add(ref dest, (nint)i + 4) = value;
+                    Unsafe.Add(ref dest, (nint)i + 5) = value;
+                    Unsafe.Add(ref dest, (nint)i + 6) = value;
+                    Unsafe.Add(ref dest, (nint)i + 7) = value;
+                } while ((i += 8) < stopLoopAtOffset);
+            }
+
+            // Write next 4 elements if needed
+            if ((len & 4) != 0)
+            {
+                Unsafe.Add(ref dest, (nint)i + 0) = value;
+                Unsafe.Add(ref dest, (nint)i + 1) = value;
+                Unsafe.Add(ref dest, (nint)i + 2) = value;
+                Unsafe.Add(ref dest, (nint)i + 3) = value;
+                i += 4;
+            }
+
+            // Write next 2 elements if needed
+            if ((len & 2) != 0)
+            {
+                Unsafe.Add(ref dest, (nint)i + 0) = value;
+                Unsafe.Add(ref dest, (nint)i + 1) = value;
+                i += 2;
+            }
+
+            // Write final element if needed
+            if ((len & 1) != 0)
+            {
+                Unsafe.Add(ref dest, (nint)i) = value;
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs
index fca176fe4381..37f90c695090 100644
--- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs
@@ -1,15 +1,12 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System.Buffers.Binary;
 using System.Diagnostics;
 using System.Numerics;
-using System.Runtime;
 using System.Runtime.CompilerServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
 
-#pragma warning disable IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
 
 #pragma warning disable 8500 // sizeof of managed types
 
@@ -37,22 +34,22 @@ public static unsafe bool CanUsePackedIndexOf<T>(T value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CompExactlyDependsOn(typeof(Sse2))]
         public static int IndexOf(ref char searchSpace, char value, int length) =>
-            IndexOf<SpanHelpers.DontNegate<short>>(ref Unsafe.As<char, short>(ref searchSpace), (short)value, length);
+            IndexOf<SpanHelpers.DontNegate<short>, NopTransform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value, length);
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CompExactlyDependsOn(typeof(Sse2))]
         public static int IndexOfAnyExcept(ref char searchSpace, char value, int length) =>
-            IndexOf<SpanHelpers.Negate<short>>(ref Unsafe.As<char, short>(ref searchSpace), (short)value, length);
+            IndexOf<SpanHelpers.Negate<short>, NopTransform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value, length);
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CompExactlyDependsOn(typeof(Sse2))]
         public static int IndexOfAny(ref char searchSpace, char value0, char value1, int length) =>
-            IndexOfAny<SpanHelpers.DontNegate<short>>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, length);
+            IndexOfAny<SpanHelpers.DontNegate<short>, NopTransform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, length);
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CompExactlyDependsOn(typeof(Sse2))]
         public static int IndexOfAnyExcept(ref char searchSpace, char value0, char value1, int length) =>
-            IndexOfAny<SpanHelpers.Negate<short>>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, length);
+            IndexOfAny<SpanHelpers.Negate<short>, NopTransform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, length);
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CompExactlyDependsOn(typeof(Sse2))]
@@ -64,6 +61,44 @@ public static int IndexOfAny(ref char searchSpace, char value0, char value1, cha
         public static int IndexOfAnyExcept(ref char searchSpace, char value0, char value1, char value2, int length) =>
             IndexOfAny<SpanHelpers.Negate<short>>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, (short)value2, length);
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        public static int IndexOfAnyIgnoreCase(ref char searchSpace, char value, int length)
+        {
+            Debug.Assert((value | 0x20) == value);
+
+            return IndexOf<SpanHelpers.DontNegate<short>, Or20Transform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value, length);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        public static int IndexOfAnyExceptIgnoreCase(ref char searchSpace, char value, int length)
+        {
+            Debug.Assert((value | 0x20) == value);
+
+            return IndexOf<SpanHelpers.Negate<short>, Or20Transform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value, length);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        public static int IndexOfAnyIgnoreCase(ref char searchSpace, char value0, char value1, int length)
+        {
+            Debug.Assert((value0 | 0x20) == value0);
+            Debug.Assert((value1 | 0x20) == value1);
+
+            return IndexOfAny<SpanHelpers.DontNegate<short>, Or20Transform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, length);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [CompExactlyDependsOn(typeof(Sse2))]
+        public static int IndexOfAnyExceptIgnoreCase(ref char searchSpace, char value0, char value1, int length)
+        {
+            Debug.Assert((value0 | 0x20) == value0);
+            Debug.Assert((value1 | 0x20) == value1);
+
+            return IndexOfAny<SpanHelpers.Negate<short>, Or20Transform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, length);
+        }
+
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CompExactlyDependsOn(typeof(Sse2))]
         public static int IndexOfAnyInRange(ref char searchSpace, char lowInclusive, char rangeInclusive, int length) =>
@@ -277,8 +312,9 @@ public static bool Contains(ref short searchSpace, short value, int length)
         }
 
         [CompExactlyDependsOn(typeof(Sse2))]
-        private static int IndexOf<TNegator>(ref short searchSpace, short value, int length)
+        private static int IndexOf<TNegator, TTransform>(ref short searchSpace, short value, int length)
             where TNegator : struct, SpanHelpers.INegator<short>
+            where TTransform : struct, ITransform
         {
             Debug.Assert(CanUsePackedIndexOf(value));
 
@@ -290,10 +326,10 @@ private static int IndexOf<TNegator>(ref short searchSpace, short value, int len
                 {
                     length -= 4;
 
-                    if (TNegator.NegateIfNeeded(searchSpace == value)) return 0;
-                    if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, 1) == value)) return 1;
-                    if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, 2) == value)) return 2;
-                    if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, 3) == value)) return 3;
+                    if (TNegator.NegateIfNeeded(TTransform.TransformInput(searchSpace) == value)) return 0;
+                    if (TNegator.NegateIfNeeded(TTransform.TransformInput(Unsafe.Add(ref searchSpace, 1)) == value)) return 1;
+                    if (TNegator.NegateIfNeeded(TTransform.TransformInput(Unsafe.Add(ref searchSpace, 2)) == value)) return 2;
+                    if (TNegator.NegateIfNeeded(TTransform.TransformInput(Unsafe.Add(ref searchSpace, 3)) == value)) return 3;
 
                     offset = 4;
                 }
@@ -302,7 +338,7 @@ private static int IndexOf<TNegator>(ref short searchSpace, short value, int len
                 {
                     length -= 1;
 
-                    if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset;
+                    if (TNegator.NegateIfNeeded(TTransform.TransformInput(Unsafe.Add(ref searchSpace, offset)) == value)) return (int)offset;
 
                     offset += 1;
                 }
@@ -329,7 +365,7 @@ private static int IndexOf<TNegator>(ref short searchSpace, short value, int len
                         {
                             Vector512<short> source0 = Vector512.LoadUnsafe(ref currentSearchSpace);
                             Vector512<short> source1 = Vector512.LoadUnsafe(ref currentSearchSpace, (nuint)Vector512<short>.Count);
-                            Vector512<byte> packedSource = PackSources(source0, source1);
+                            Vector512<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
 
                             if (HasMatch<TNegator>(packedValue, packedSource))
                             {
@@ -352,7 +388,7 @@ private static int IndexOf<TNegator>(ref short searchSpace, short value, int len
 
                         Vector512<short> source0 = Vector512.LoadUnsafe(ref firstVector);
                         Vector512<short> source1 = Vector512.LoadUnsafe(ref oneVectorAwayFromEnd);
-                        Vector512<byte> packedSource = PackSources(source0, source1);
+                        Vector512<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
 
                         if (HasMatch<TNegator>(packedValue, packedSource))
                         {
@@ -378,7 +414,7 @@ private static int IndexOf<TNegator>(ref short searchSpace, short value, int len
                         {
                             Vector256<short> source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
                             Vector256<short> source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256<short>.Count);
-                            Vector256<byte> packedSource = PackSources(source0, source1);
+                            Vector256<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
                             Vector256<byte> result = Vector256.Equals(packedValue, packedSource);
                             result = NegateIfNeeded<TNegator>(result);
 
@@ -403,7 +439,7 @@ private static int IndexOf<TNegator>(ref short searchSpace, short value, int len
 
                         Vector256<short> source0 = Vector256.LoadUnsafe(ref firstVector);
                         Vector256<short> source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
-                        Vector256<byte> packedSource = PackSources(source0, source1);
+                        Vector256<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
                         Vector256<byte> result = Vector256.Equals(packedValue, packedSource);
                         result = NegateIfNeeded<TNegator>(result);
 
@@ -438,7 +474,7 @@ private static int IndexOf<TNegator>(ref short searchSpace, short value, int len
                         {
                             Vector128<short> source0 = Vector128.LoadUnsafe(ref currentSearchSpace);
                             Vector128<short> source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128<short>.Count);
-                            Vector128<byte> packedSource = PackSources(source0, source1);
+                            Vector128<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
                             Vector128<byte> result = Vector128.Equals(packedValue, packedSource);
                             result = NegateIfNeeded<TNegator>(result);
 
@@ -463,7 +499,7 @@ private static int IndexOf<TNegator>(ref short searchSpace, short value, int len
 
                         Vector128<short> source0 = Vector128.LoadUnsafe(ref firstVector);
                         Vector128<short> source1 = Vector128.LoadUnsafe(ref oneVectorAwayFromEnd);
-                        Vector128<byte> packedSource = PackSources(source0, source1);
+                        Vector128<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
                         Vector128<byte> result = Vector128.Equals(packedValue, packedSource);
                         result = NegateIfNeeded<TNegator>(result);
 
@@ -479,8 +515,9 @@ private static int IndexOf<TNegator>(ref short searchSpace, short value, int len
         }
 
         [CompExactlyDependsOn(typeof(Sse2))]
-        private static int IndexOfAny<TNegator>(ref short searchSpace, short value0, short value1, int length)
+        private static int IndexOfAny<TNegator, TTransform>(ref short searchSpace, short value0, short value1, int length)
             where TNegator : struct, SpanHelpers.INegator<short>
+            where TTransform : struct, ITransform
         {
             Debug.Assert(CanUsePackedIndexOf(value0));
             Debug.Assert(CanUsePackedIndexOf(value1));
@@ -494,13 +531,13 @@ private static int IndexOfAny<TNegator>(ref short searchSpace, short value0, sho
                 {
                     length -= 4;
 
-                    lookUp = searchSpace;
+                    lookUp = TTransform.TransformInput(searchSpace);
                     if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return 0;
-                    lookUp = Unsafe.Add(ref searchSpace, 1);
+                    lookUp = TTransform.TransformInput(Unsafe.Add(ref searchSpace, 1));
                     if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return 1;
-                    lookUp = Unsafe.Add(ref searchSpace, 2);
+                    lookUp = TTransform.TransformInput(Unsafe.Add(ref searchSpace, 2));
                     if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return 2;
-                    lookUp = Unsafe.Add(ref searchSpace, 3);
+                    lookUp = TTransform.TransformInput(Unsafe.Add(ref searchSpace, 3));
                     if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return 3;
 
                     offset = 4;
@@ -510,7 +547,7 @@ private static int IndexOfAny<TNegator>(ref short searchSpace, short value0, sho
                 {
                     length -= 1;
 
-                    lookUp = Unsafe.Add(ref searchSpace, offset);
+                    lookUp = TTransform.TransformInput(Unsafe.Add(ref searchSpace, offset));
                     if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset;
 
                     offset += 1;
@@ -538,7 +575,7 @@ private static int IndexOfAny<TNegator>(ref short searchSpace, short value0, sho
                         {
                             Vector512<short> source0 = Vector512.LoadUnsafe(ref currentSearchSpace);
                             Vector512<short> source1 = Vector512.LoadUnsafe(ref currentSearchSpace, (nuint)Vector512<short>.Count);
-                            Vector512<byte> packedSource = PackSources(source0, source1);
+                            Vector512<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
                             Vector512<byte> result = NegateIfNeeded<TNegator>(Vector512.Equals(packedValue0, packedSource) | Vector512.Equals(packedValue1, packedSource));
 
                             if (result != Vector512<byte>.Zero)
@@ -562,7 +599,7 @@ private static int IndexOfAny<TNegator>(ref short searchSpace, short value0, sho
 
                         Vector512<short> source0 = Vector512.LoadUnsafe(ref firstVector);
                         Vector512<short> source1 = Vector512.LoadUnsafe(ref oneVectorAwayFromEnd);
-                        Vector512<byte> packedSource = PackSources(source0, source1);
+                        Vector512<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
                         Vector512<byte> result = NegateIfNeeded<TNegator>(Vector512.Equals(packedValue0, packedSource) | Vector512.Equals(packedValue1, packedSource));
 
                         if (result != Vector512<byte>.Zero)
@@ -590,7 +627,7 @@ private static int IndexOfAny<TNegator>(ref short searchSpace, short value0, sho
                         {
                             Vector256<short> source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
                             Vector256<short> source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256<short>.Count);
-                            Vector256<byte> packedSource = PackSources(source0, source1);
+                            Vector256<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
                             Vector256<byte> result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource);
                             result = NegateIfNeeded<TNegator>(result);
 
@@ -615,7 +652,7 @@ private static int IndexOfAny<TNegator>(ref short searchSpace, short value0, sho
 
                         Vector256<short> source0 = Vector256.LoadUnsafe(ref firstVector);
                         Vector256<short> source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
-                        Vector256<byte> packedSource = PackSources(source0, source1);
+                        Vector256<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
                         Vector256<byte> result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource);
                         result = NegateIfNeeded<TNegator>(result);
 
@@ -651,7 +688,7 @@ private static int IndexOfAny<TNegator>(ref short searchSpace, short value0, sho
                         {
                             Vector128<short> source0 = Vector128.LoadUnsafe(ref currentSearchSpace);
                             Vector128<short> source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128<short>.Count);
-                            Vector128<byte> packedSource = PackSources(source0, source1);
+                            Vector128<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
                             Vector128<byte> result = Vector128.Equals(packedValue0, packedSource) | Vector128.Equals(packedValue1, packedSource);
                             result = NegateIfNeeded<TNegator>(result);
 
@@ -676,7 +713,7 @@ private static int IndexOfAny<TNegator>(ref short searchSpace, short value0, sho
 
                         Vector128<short> source0 = Vector128.LoadUnsafe(ref firstVector);
                         Vector128<short> source1 = Vector128.LoadUnsafe(ref oneVectorAwayFromEnd);
-                        Vector128<byte> packedSource = PackSources(source0, source1);
+                        Vector128<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
                         Vector128<byte> result = Vector128.Equals(packedValue0, packedSource) | Vector128.Equals(packedValue1, packedSource);
                         result = NegateIfNeeded<TNegator>(result);
 
@@ -1283,5 +1320,29 @@ internal static Vector512<byte> FixUpPackedVector512Result(Vector512<byte> resul
             // We want to preserve the order of the two input vectors, so we deinterleave the packed value.
             return Avx512F.PermuteVar8x64(result.AsInt64(), Vector512.Create(0, 2, 4, 6, 1, 3, 5, 7)).AsByte();
         }
+
+        private interface ITransform
+        {
+            static abstract short TransformInput(short input);
+            static abstract Vector128<byte> TransformInput(Vector128<byte> input);
+            static abstract Vector256<byte> TransformInput(Vector256<byte> input);
+            static abstract Vector512<byte> TransformInput(Vector512<byte> input);
+        }
+
+        private readonly struct NopTransform : ITransform
+        {
+            public static short TransformInput(short input) => input;
+            public static Vector128<byte> TransformInput(Vector128<byte> input) => input;
+            public static Vector256<byte> TransformInput(Vector256<byte> input) => input;
+            public static Vector512<byte> TransformInput(Vector512<byte> input) => input;
+        }
+
+        private readonly struct Or20Transform : ITransform
+        {
+            public static short TransformInput(short input) => (short)(input | 0x20);
+            public static Vector128<byte> TransformInput(Vector128<byte> input) => input | Vector128.Create((byte)0x20);
+            public static Vector256<byte> TransformInput(Vector256<byte> input) => input | Vector256.Create((byte)0x20);
+            public static Vector512<byte> TransformInput(Vector512<byte> input) => input | Vector512.Create((byte)0x20);
+        }
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs
index 2cc6dc440564..ee378b7646b5 100644
--- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs
@@ -8,7 +8,6 @@
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 
-#pragma warning disable IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
 
 #pragma warning disable 8500 // sizeof of managed types
 
@@ -16,6 +15,7 @@ namespace System
 {
     internal static partial class SpanHelpers // .T
     {
+        [Intrinsic] // Unrolled for small sizes
         public static unsafe void Fill<T>(ref T refData, nuint numElements, T value)
         {
             // Early checks to see if it's even possible to vectorize - JIT will turn these checks into consts.
@@ -1672,6 +1672,15 @@ private static unsafe int IndexOfAnyValueType<TValue, TNegator>(ref TValue searc
         {
             if (PackedSpanHelpers.PackedIndexOfIsSupported && typeof(TValue) == typeof(short) && PackedSpanHelpers.CanUsePackedIndexOf(value0) && PackedSpanHelpers.CanUsePackedIndexOf(value1))
             {
+                if ((*(char*)&value0 ^ *(char*)&value1) == 0x20)
+                {
+                    char lowerCase = (char)Math.Max(*(char*)&value0, *(char*)&value1);
+
+                    return typeof(TNegator) == typeof(DontNegate<short>)
+                        ? PackedSpanHelpers.IndexOfAnyIgnoreCase(ref Unsafe.As<TValue, char>(ref searchSpace), lowerCase, length)
+                        : PackedSpanHelpers.IndexOfAnyExceptIgnoreCase(ref Unsafe.As<TValue, char>(ref searchSpace), lowerCase, length);
+                }
+
                 return typeof(TNegator) == typeof(DontNegate<short>)
                     ? PackedSpanHelpers.IndexOfAny(ref Unsafe.As<TValue, char>(ref searchSpace), *(char*)&value0, *(char*)&value1, length)
                     : PackedSpanHelpers.IndexOfAnyExcept(ref Unsafe.As<TValue, char>(ref searchSpace), *(char*)&value0, *(char*)&value1, length);
diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs
index a7e5f48d6318..7776024aaeb1 100644
--- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs
@@ -12,330 +12,9 @@ namespace System
 {
     internal static partial class SpanHelpers
     {
-        public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength)
-        {
-            if (byteLength == 0)
-                return;
-
-#if TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64
-            // The exact matrix on when ZeroMemory is faster than InitBlockUnaligned is very complex. The factors to consider include
-            // type of hardware and memory alignment. This threshold was chosen as a good balance across different configurations.
-            if (byteLength > 768)
-                goto PInvoke;
-            Unsafe.InitBlockUnaligned(ref b, 0, (uint)byteLength);
-            return;
-#else
-            // TODO: Optimize other platforms to be on par with AMD64 CoreCLR
-            // Note: It's important that this switch handles lengths at least up to 22.
-            // See notes below near the main loop for why.
-
-            // The switch will be very fast since it can be implemented using a jump
-            // table in assembly. See http://stackoverflow.com/a/449297/4077294 for more info.
-
-            switch (byteLength)
-            {
-                case 1:
-                    b = 0;
-                    return;
-                case 2:
-                    Unsafe.As<byte, short>(ref b) = 0;
-                    return;
-                case 3:
-                    Unsafe.As<byte, short>(ref b) = 0;
-                    Unsafe.Add(ref b, 2) = 0;
-                    return;
-                case 4:
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    return;
-                case 5:
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.Add(ref b, 4) = 0;
-                    return;
-                case 6:
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, short>(ref Unsafe.Add(ref b, 4)) = 0;
-                    return;
-                case 7:
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, short>(ref Unsafe.Add(ref b, 4)) = 0;
-                    Unsafe.Add(ref b, 6) = 0;
-                    return;
-                case 8:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-#endif
-                    return;
-                case 9:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-#endif
-                    Unsafe.Add(ref b, 8) = 0;
-                    return;
-                case 10:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-#endif
-                    Unsafe.As<byte, short>(ref Unsafe.Add(ref b, 8)) = 0;
-                    return;
-                case 11:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-#endif
-                    Unsafe.As<byte, short>(ref Unsafe.Add(ref b, 8)) = 0;
-                    Unsafe.Add(ref b, 10) = 0;
-                    return;
-                case 12:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-#endif
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 8)) = 0;
-                    return;
-                case 13:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-#endif
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 8)) = 0;
-                    Unsafe.Add(ref b, 12) = 0;
-                    return;
-                case 14:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-#endif
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 8)) = 0;
-                    Unsafe.As<byte, short>(ref Unsafe.Add(ref b, 12)) = 0;
-                    return;
-                case 15:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-#endif
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 8)) = 0;
-                    Unsafe.As<byte, short>(ref Unsafe.Add(ref b, 12)) = 0;
-                    Unsafe.Add(ref b, 14) = 0;
-                    return;
-                case 16:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-                    Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 8)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 12)) = 0;
-#endif
-                    return;
-                case 17:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-                    Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 8)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 12)) = 0;
-#endif
-                    Unsafe.Add(ref b, 16) = 0;
-                    return;
-                case 18:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-                    Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 8)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 12)) = 0;
-#endif
-                    Unsafe.As<byte, short>(ref Unsafe.Add(ref b, 16)) = 0;
-                    return;
-                case 19:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-                    Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 8)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 12)) = 0;
-#endif
-                    Unsafe.As<byte, short>(ref Unsafe.Add(ref b, 16)) = 0;
-                    Unsafe.Add(ref b, 18) = 0;
-                    return;
-                case 20:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-                    Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 8)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 12)) = 0;
-#endif
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 16)) = 0;
-                    return;
-                case 21:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-                    Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 8)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 12)) = 0;
-#endif
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 16)) = 0;
-                    Unsafe.Add(ref b, 20) = 0;
-                    return;
-                case 22:
-#if TARGET_64BIT
-                    Unsafe.As<byte, long>(ref b) = 0;
-                    Unsafe.As<byte, long>(ref Unsafe.Add<byte>(ref b, 8)) = 0;
-#else
-                    Unsafe.As<byte, int>(ref b) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 4)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 8)) = 0;
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 12)) = 0;
-#endif
-                    Unsafe.As<byte, int>(ref Unsafe.Add(ref b, 16)) = 0;
-                    Unsafe.As<byte, short>(ref Unsafe.Add(ref b, 20)) = 0;
-                    return;
-            }
-
-            // P/Invoke into the native version for large lengths
-            if (byteLength >= 512) goto PInvoke;
-
-            nuint i = 0; // byte offset at which we're copying
-
-            if (((nuint)Unsafe.AsPointer(ref b) & 3) != 0)
-            {
-                if (((nuint)Unsafe.AsPointer(ref b) & 1) != 0)
-                {
-                    b = 0;
-                    i += 1;
-                    if (((nuint)Unsafe.AsPointer(ref b) & 2) != 0)
-                        goto IntAligned;
-                }
-                Unsafe.As<byte, short>(ref Unsafe.AddByteOffset(ref b, i)) = 0;
-                i += 2;
-            }
-
-            IntAligned:
-
-            // On 64-bit IntPtr.Size == 8, so we want to advance to the next 8-aligned address. If
-            // (int)b % 8 is 0, 5, 6, or 7, we will already have advanced by 0, 3, 2, or 1
-            // bytes to the next aligned address (respectively), so do nothing. On the other hand,
-            // if it is 1, 2, 3, or 4 we will want to copy-and-advance another 4 bytes until
-            // we're aligned.
-            // The thing 1, 2, 3, and 4 have in common that the others don't is that if you
-            // subtract one from them, their 3rd lsb will not be set. Hence, the below check.
-
-            if ((((nuint)Unsafe.AsPointer(ref b) - 1) & 4) == 0)
-            {
-                Unsafe.As<byte, int>(ref Unsafe.AddByteOffset(ref b, i)) = 0;
-                i += 4;
-            }
-
-            nuint end = byteLength - 16;
-            byteLength -= i; // lower 4 bits of byteLength represent how many bytes are left *after* the unrolled loop
-
-            // We know due to the above switch-case that this loop will always run 1 iteration; max
-            // bytes we clear before checking is 23 (7 to align the pointers, 16 for 1 iteration) so
-            // the switch handles lengths 0-22.
-            Debug.Assert(end >= 7 && i <= end);
-
-            // This is separated out into a different variable, so the i + 16 addition can be
-            // performed at the start of the pipeline and the loop condition does not have
-            // a dependency on the writes.
-            nuint counter;
-
-            do
-            {
-                counter = i + 16;
-
-                // This loop looks very costly since there appear to be a bunch of temporary values
-                // being created with the adds, but the jit (for x86 anyways) will convert each of
-                // these to use memory addressing operands.
-
-                // So the only cost is a bit of code size, which is made up for by the fact that
-                // we save on writes to b.
-
-#if TARGET_64BIT
-                Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
-                Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i + 8)) = 0;
-#else
-                Unsafe.As<byte, int>(ref Unsafe.AddByteOffset(ref b, i)) = 0;
-                Unsafe.As<byte, int>(ref Unsafe.AddByteOffset(ref b, i + 4)) = 0;
-                Unsafe.As<byte, int>(ref Unsafe.AddByteOffset(ref b, i + 8)) = 0;
-                Unsafe.As<byte, int>(ref Unsafe.AddByteOffset(ref b, i + 12)) = 0;
-#endif
-
-                i = counter;
-
-                // See notes above for why this wasn't used instead
-                // i += 16;
-            }
-            while (counter <= end);
-
-            if ((byteLength & 8) != 0)
-            {
-#if TARGET_64BIT
-                Unsafe.As<byte, long>(ref Unsafe.AddByteOffset<byte>(ref b, i)) = 0;
-#else
-                Unsafe.As<byte, int>(ref Unsafe.AddByteOffset(ref b, i)) = 0;
-                Unsafe.As<byte, int>(ref Unsafe.AddByteOffset(ref b, i + 4)) = 0;
-#endif
-                i += 8;
-            }
-            if ((byteLength & 4) != 0)
-            {
-                Unsafe.As<byte, int>(ref Unsafe.AddByteOffset(ref b, i)) = 0;
-                i += 4;
-            }
-            if ((byteLength & 2) != 0)
-            {
-                Unsafe.As<byte, short>(ref Unsafe.AddByteOffset(ref b, i)) = 0;
-                i += 2;
-            }
-            if ((byteLength & 1) != 0)
-            {
-                Unsafe.AddByteOffset(ref b, i) = 0;
-                // We're not using i after this, so not needed
-                // i += 1;
-            }
-
-            return;
-#endif
-
-        PInvoke:
-            Buffer._ZeroMemory(ref b, byteLength);
-        }
-
         public static unsafe void ClearWithReferences(ref IntPtr ip, nuint pointerSizeLength)
         {
-            Debug.Assert((int)Unsafe.AsPointer(ref ip) % sizeof(IntPtr) == 0, "Should've been aligned on natural word boundary.");
+            Debug.Assert(Unsafe.IsOpportunisticallyAligned(ref ip, (uint)sizeof(IntPtr)), "Should've been aligned on natural word boundary.");
 
             // First write backward 8 natural words at a time.
             // Writing backward allows us to get away with only simple modifications to the
@@ -650,7 +329,6 @@ public static unsafe void Reverse<T>(ref T elements, nuint length)
             ReverseInner(ref elements, length);
         }
 
-#pragma warning disable IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static void ReverseInner<T>(ref T elements, nuint length)
         {
@@ -667,6 +345,5 @@ private static void ReverseInner<T>(ref T elements, nuint length)
                 last = ref Unsafe.Subtract(ref last, 1);
             } while (Unsafe.IsAddressLessThan(ref first, ref last));
         }
-#pragma warning restore IDE0060 // https://github.com/dotnet/roslyn-analyzers/issues/6228
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs
index 66b9432dfe24..356903063d15 100644
--- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs
@@ -533,6 +533,7 @@ public bool EndsWith(string value)
             return EndsWith(value, StringComparison.CurrentCulture);
         }
 
+        [Intrinsic] // Unrolled and vectorized for half-constant input (Ordinal)
         public bool EndsWith(string value, StringComparison comparisonType)
         {
             ArgumentNullException.ThrowIfNull(value);
diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs
index d568696836d7..7f4f642526d4 100644
--- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs
@@ -427,13 +427,13 @@ public static string Format([StringSyntax(StringSyntaxAttribute.CompositeFormat)
         public static string Format([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0, object? arg1)
         {
             TwoObjects two = new TwoObjects(arg0, arg1);
-            return FormatHelper(null, format, MemoryMarshal.CreateReadOnlySpan(ref two.Arg0, 2));
+            return FormatHelper(null, format, two);
         }
 
         public static string Format([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0, object? arg1, object? arg2)
         {
             ThreeObjects three = new ThreeObjects(arg0, arg1, arg2);
-            return FormatHelper(null, format, MemoryMarshal.CreateReadOnlySpan(ref three.Arg0, 3));
+            return FormatHelper(null, format, three);
         }
 
         public static string Format([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, params object?[] args)
@@ -456,13 +456,13 @@ public static string Format(IFormatProvider? provider, [StringSyntax(StringSynta
         public static string Format(IFormatProvider? provider, [StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0, object? arg1)
         {
             TwoObjects two = new TwoObjects(arg0, arg1);
-            return FormatHelper(provider, format, MemoryMarshal.CreateReadOnlySpan(ref two.Arg0, 2));
+            return FormatHelper(provider, format, two);
         }
 
         public static string Format(IFormatProvider? provider, [StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0, object? arg1, object? arg2)
         {
             ThreeObjects three = new ThreeObjects(arg0, arg1, arg2);
-            return FormatHelper(provider, format, MemoryMarshal.CreateReadOnlySpan(ref three.Arg0, 3));
+            return FormatHelper(provider, format, three);
         }
 
         public static string Format(IFormatProvider? provider, [StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, params object?[] args)
diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs
index 2affdb008575..56d471168632 100644
--- a/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/String.Searching.cs
@@ -84,10 +84,10 @@ private int IndexOfCharOrdinalIgnoreCase(char value)
 
             if (char.IsAsciiLetter(value))
             {
-                char valueUc = (char)(value | 0x20);
-                char valueLc = (char)(value & ~0x20);
+                char valueLc = (char)(value | 0x20);
+                char valueUc = (char)(value & ~0x20);
                 return PackedSpanHelpers.PackedIndexOfIsSupported
-                    ? PackedSpanHelpers.IndexOfAny(ref _firstChar, valueLc, valueUc, Length)
+                    ? PackedSpanHelpers.IndexOfAnyIgnoreCase(ref _firstChar, valueLc, Length)
                     : SpanHelpers.IndexOfAnyChar(ref _firstChar, valueLc, valueUc, Length);
             }
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs
index 85801e101a17..5507aed9c770 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs
@@ -141,7 +141,7 @@ private static unsafe bool IsValidCore<T>(ref T searchSpace, int length) where T
                     // Try to opportunistically align the reads below. The input isn't pinned, so the GC
                     // is free to move the references. We're therefore assuming that reads may still be unaligned.
                     // They may also be unaligned if the input chars aren't 2-byte aligned.
-                    nuint misalignedElements = ((nuint)Unsafe.AsPointer(ref searchSpace) & (nuint)(Vector256<byte>.Count - 1)) / (nuint)sizeof(T);
+                    nuint misalignedElements = Unsafe.OpportunisticMisalignment(ref searchSpace, (uint)Vector256<byte>.Count) / (nuint)sizeof(T);
                     i -= misalignedElements;
                     Debug.Assert((int)i > 3 * Vector256<T>.Count);
 
@@ -193,7 +193,7 @@ private static unsafe bool IsValidCore<T>(ref T searchSpace, int length) where T
                     // Try to opportunistically align the reads below. The input isn't pinned, so the GC
                     // is free to move the references. We're therefore assuming that reads may still be unaligned.
                     // They may also be unaligned if the input chars aren't 2-byte aligned.
-                    nuint misalignedElements = ((nuint)Unsafe.AsPointer(ref searchSpace) & (nuint)(Vector128<byte>.Count - 1)) / (nuint)sizeof(T);
+                    nuint misalignedElements = Unsafe.OpportunisticMisalignment(ref searchSpace, (uint)Vector128<byte>.Count) / (nuint)sizeof(T);
                     i -= misalignedElements;
                     Debug.Assert((int)i > 3 * Vector128<T>.Count);
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs
index 5e4e6c696739..95ebfdc7737e 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs
@@ -1297,7 +1297,7 @@ internal unsafe bool AddChar(char ch, int numBytes)
                     {
                         // Throw maybe
                         _bytes -= numBytes;                                        // Didn't encode these bytes
-                        _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart);    // Throw?
+                        _enc.ThrowCharsOverflow(_decoder, _chars == _charStart);    // Throw?
                         return false;                                           // No throw, but no store either
                     }
 
@@ -1316,7 +1316,7 @@ internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
                 {
                     // Throw maybe
                     _bytes -= numBytes;                                        // Didn't encode these bytes
-                    _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart);    // Throw?
+                    _enc.ThrowCharsOverflow(_decoder, _chars == _charStart);    // Throw?
                     return false;                                           // No throw, but no store either
                 }
                 return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/StringBuilder.cs b/src/libraries/System.Private.CoreLib/src/System/Text/StringBuilder.cs
index c1f86a1ca2d1..6f7b0abfa9b9 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Text/StringBuilder.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Text/StringBuilder.cs
@@ -1366,13 +1366,13 @@ public StringBuilder AppendFormat([StringSyntax(StringSyntaxAttribute.CompositeF
         public StringBuilder AppendFormat([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0, object? arg1)
         {
             TwoObjects two = new TwoObjects(arg0, arg1);
-            return AppendFormatHelper(null, format, MemoryMarshal.CreateReadOnlySpan(ref two.Arg0, 2));
+            return AppendFormatHelper(null, format, two);
         }
 
         public StringBuilder AppendFormat([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0, object? arg1, object? arg2)
         {
             ThreeObjects three = new ThreeObjects(arg0, arg1, arg2);
-            return AppendFormatHelper(null, format, MemoryMarshal.CreateReadOnlySpan(ref three.Arg0, 3));
+            return AppendFormatHelper(null, format, three);
         }
 
         public StringBuilder AppendFormat([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, params object?[] args)
@@ -1395,13 +1395,13 @@ public StringBuilder AppendFormat(IFormatProvider? provider, [StringSyntax(Strin
         public StringBuilder AppendFormat(IFormatProvider? provider, [StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0, object? arg1)
         {
             TwoObjects two = new TwoObjects(arg0, arg1);
-            return AppendFormatHelper(provider, format, MemoryMarshal.CreateReadOnlySpan(ref two.Arg0, 2));
+            return AppendFormatHelper(provider, format, two);
         }
 
         public StringBuilder AppendFormat(IFormatProvider? provider, [StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, object? arg0, object? arg1, object? arg2)
         {
             ThreeObjects three = new ThreeObjects(arg0, arg1, arg2);
-            return AppendFormatHelper(provider, format, MemoryMarshal.CreateReadOnlySpan(ref three.Arg0, 3));
+            return AppendFormatHelper(provider, format, three);
         }
 
         public StringBuilder AppendFormat(IFormatProvider? provider, [StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format, params object?[] args)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/UTF32Encoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/UTF32Encoding.cs
index 83d4e2ee6daa..022c87f070fe 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Text/UTF32Encoding.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Text/UTF32Encoding.cs
@@ -934,7 +934,7 @@ internal override unsafe int GetChars(byte* bytes, int byteCount,
                 if (iChar >= 0x10000)
                 {
                     // Surrogates take 2
-                    if (chars >= charEnd - 1)
+                    if (charEnd - chars < 2)
                     {
                         // Throwing or stopping
                         // We either read enough bytes for bytes-=4 to work, or we're
diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/UnicodeEncoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/UnicodeEncoding.cs
index 6bee6e9d6aae..00fe83247e44 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Text/UnicodeEncoding.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Text/UnicodeEncoding.cs
@@ -1535,7 +1535,7 @@ internal sealed override unsafe int GetChars(
                     }
 
                     // Valid surrogate pair, add our lastChar (will need 2 chars)
-                    if (chars >= charEnd - 1)
+                    if (charEnd - chars < 2)
                     {
                         // couldn't find room for this surrogate pair
                         // We either advanced bytes or chars should == charStart and throw below
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs
index 31e10ce2b11e..a5fb311877a3 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs
@@ -1,6 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Diagnostics;
 using System.Runtime.CompilerServices;
 
 namespace System.Threading
@@ -67,9 +68,85 @@ public static sbyte Exchange(ref sbyte location1, sbyte value) =>
         /// <exception cref="NullReferenceException">The address of location1 is a null pointer.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static short Exchange(ref short location1, short value) =>
+            (short)Exchange(ref Unsafe.As<short, ushort>(ref location1), (ushort)value);
+
+        /// <summary>Sets a 8-bit unsigned integer to a specified value and returns the original value, as an atomic operation.</summary>
+        /// <param name="location1">The variable to set to the specified value.</param>
+        /// <param name="value">The value to which the <paramref name="location1"/> parameter is set.</param>
+        /// <returns>The original value of <paramref name="location1"/>.</returns>
+        /// <exception cref="NullReferenceException">The address of location1 is a null pointer.</exception>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe byte Exchange(ref byte location1, byte value)
+        {
+#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_WASM)
+            return Exchange(ref location1, value); // Must expand intrinsic
+#else
+            // this relies on GC keeping 4B alignment for refs and on subtracting to such alignment being in the same object
+            nuint offset = Unsafe.OpportunisticMisalignment(ref location1, sizeof(uint));
+            ref uint alignedRef = ref Unsafe.As<byte, uint>(ref Unsafe.SubtractByteOffset(ref location1, offset));
+            int bitOffset =
+                (int)((BitConverter.IsLittleEndian ? offset : sizeof(uint) - offset - sizeof(byte)) * 8); // to bit offset
+            Debug.Assert(bitOffset is 0 or 8 or 16 or 24);
+            uint mask = ~((uint)byte.MaxValue << bitOffset);
+            uint shiftedValue = (uint)value << bitOffset;
+
+            // this doesn't need to be volatile since CompareExchange will update stale values
+            uint originalValue = alignedRef;
+            uint newValue;
+            do
+            {
+                // make sure the ref is still aligned
+                Debug.Assert(Unsafe.IsOpportunisticallyAligned(ref alignedRef, sizeof(uint)));
+                newValue = originalValue & mask | shiftedValue;
+            } while (originalValue !=
+                     (originalValue = CompareExchange(ref alignedRef, newValue, originalValue)));
+
+            // verify the GC hasn't broken the ref
+            Debug.Assert((nuint)Unsafe.ByteOffset(ref Unsafe.As<uint, byte>(ref alignedRef), ref location1) == offset);
+            return (byte)(originalValue >> bitOffset);
+#endif
+        }
+
+        /// <summary>Sets a 16-bit signed integer to a specified value and returns the original value, as an atomic operation.</summary>
+        /// <param name="location1">The variable to set to the specified value.</param>
+        /// <param name="value">The value to which the <paramref name="location1"/> parameter is set.</param>
+        /// <returns>The original value of <paramref name="location1"/>.</returns>
+        /// <exception cref="NullReferenceException">The address of location1 is a null pointer.</exception>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
-        public static ushort Exchange(ref ushort location1, ushort value) =>
-            (ushort)Exchange(ref Unsafe.As<ushort, short>(ref location1), (short)value);
+        public static unsafe ushort Exchange(ref ushort location1, ushort value)
+        {
+#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_WASM)
+            return Exchange(ref location1, value); // Must expand intrinsic
+#else
+            // this relies on GC keeping 4B alignment for refs and on subtracting to such alignment being in the same object
+            nuint offset = Unsafe.OpportunisticMisalignment(ref location1, sizeof(uint));
+            ref uint alignedRef = ref Unsafe.As<ushort, uint>(ref Unsafe.SubtractByteOffset(ref location1, offset));
+            int bitOffset =
+                (int)((BitConverter.IsLittleEndian ? offset : sizeof(uint) - offset - sizeof(byte)) * 8); // to bit offset
+            Debug.Assert(bitOffset is 0 or 16);
+            uint mask = ~((uint)ushort.MaxValue << bitOffset);
+            uint shiftedValue = (uint)value << bitOffset;
+
+            // this doesn't need to be volatile since CompareExchange will update stale values
+            uint originalValue = alignedRef;
+            uint newValue;
+            do
+            {
+                // make sure the ref is still aligned
+                Debug.Assert(Unsafe.IsOpportunisticallyAligned(ref alignedRef, sizeof(uint)));
+                newValue = originalValue & mask | shiftedValue;
+            } while (originalValue !=
+                     (originalValue = CompareExchange(ref alignedRef, newValue, originalValue)));
+
+            // verify the GC hasn't broken the ref
+            Debug.Assert((nuint)Unsafe.ByteOffset(ref Unsafe.As<uint, ushort>(ref alignedRef), ref location1) == offset);
+            return (ushort)(originalValue >> bitOffset);
+#endif
+        }
 
         /// <summary>Sets a 32-bit unsigned integer to a specified value and returns the original value, as an atomic operation.</summary>
         /// <param name="location1">The variable to set to the specified value.</param>
@@ -168,9 +245,93 @@ public static sbyte CompareExchange(ref sbyte location1, sbyte value, sbyte comp
         /// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static short CompareExchange(ref short location1, short value, short comparand) =>
+            (short)CompareExchange(ref Unsafe.As<short, ushort>(ref location1), (ushort)value, (ushort)comparand);
+
+        /// <summary>Compares two 8-bit unsigned integers for equality and, if they are equal, replaces the first value.</summary>
+        /// <param name="location1">The destination, whose value is compared with <paramref name="comparand"/> and possibly replaced.</param>
+        /// <param name="value">The value that replaces the destination value if the comparison results in equality.</param>
+        /// <param name="comparand">The value that is compared to the value at <paramref name="location1"/>.</param>
+        /// <returns>The original value in <paramref name="location1"/>.</returns>
+        /// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe byte CompareExchange(ref byte location1, byte value, byte comparand)
+        {
+#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_WASM)
+            return CompareExchange(ref location1, value, comparand); // Must expand intrinsic
+#else
+            // this relies on GC keeping 4B alignment for refs and on subtracting to such alignment being in the same object
+            nuint offset = Unsafe.OpportunisticMisalignment(ref location1, sizeof(uint));
+            ref uint alignedRef = ref Unsafe.As<byte, uint>(ref Unsafe.SubtractByteOffset(ref location1, offset));
+            int bitOffset =
+                (int)((BitConverter.IsLittleEndian ? offset : sizeof(uint) - offset - sizeof(byte)) * 8); // to bit offset
+            Debug.Assert(bitOffset is 0 or 8 or 16 or 24);
+            uint mask = ~((uint)byte.MaxValue << bitOffset);
+            uint shiftedValue = (uint)value << bitOffset;
+            uint shiftedComparand = (uint)comparand << bitOffset;
+
+            // this doesn't need to be volatile since CompareExchange will update stale values
+            uint originalValue = alignedRef;
+            uint fullComparand, newValue;
+            do
+            {
+                // make sure the ref is still aligned
+                Debug.Assert(Unsafe.IsOpportunisticallyAligned(ref alignedRef, sizeof(uint)));
+                uint otherMemory = originalValue & mask;
+                fullComparand = otherMemory | shiftedComparand;
+                newValue = otherMemory | shiftedValue;
+            } while (originalValue !=
+                     (originalValue = CompareExchange(ref alignedRef, newValue, fullComparand)));
+
+            // verify the GC hasn't broken the ref
+            Debug.Assert((nuint)Unsafe.ByteOffset(ref Unsafe.As<uint, byte>(ref alignedRef), ref location1) == offset);
+            return (byte)(originalValue >> bitOffset);
+#endif
+        }
+
+        /// <summary>Compares two 16-bit signed integers for equality and, if they are equal, replaces the first value.</summary>
+        /// <param name="location1">The destination, whose value is compared with <paramref name="comparand"/> and possibly replaced.</param>
+        /// <param name="value">The value that replaces the destination value if the comparison results in equality.</param>
+        /// <param name="comparand">The value that is compared to the value at <paramref name="location1"/>.</param>
+        /// <returns>The original value in <paramref name="location1"/>.</returns>
+        /// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
-        public static ushort CompareExchange(ref ushort location1, ushort value, ushort comparand) =>
-            (ushort)CompareExchange(ref Unsafe.As<ushort, short>(ref location1), (short)value, (short)comparand);
+        public static unsafe ushort CompareExchange(ref ushort location1, ushort value, ushort comparand)
+        {
+#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_WASM)
+            return CompareExchange(ref location1, value, comparand); // Must expand intrinsic
+#else
+            // this relies on GC keeping 4B alignment for refs and on subtracting to such alignment being in the same object
+            nuint offset = Unsafe.OpportunisticMisalignment(ref location1, sizeof(uint));
+            ref uint alignedRef = ref Unsafe.As<ushort, uint>(ref Unsafe.SubtractByteOffset(ref location1, offset));
+            int bitOffset =
+                (int)((BitConverter.IsLittleEndian ? offset : sizeof(uint) - offset - sizeof(byte)) * 8); // to bit offset
+            Debug.Assert(bitOffset is 0 or 16);
+            uint mask = ~((uint)ushort.MaxValue << bitOffset);
+            uint shiftedValue = (uint)value << bitOffset;
+            uint shiftedComparand = (uint)comparand << bitOffset;
+
+            // this doesn't need to be volatile since CompareExchange will update stale values
+            uint originalValue = alignedRef;
+            uint fullComparand, newValue;
+            do
+            {
+                // make sure the ref is still aligned
+                Debug.Assert(Unsafe.IsOpportunisticallyAligned(ref alignedRef, sizeof(uint)));
+                uint otherMemory = originalValue & mask;
+                fullComparand = otherMemory | shiftedComparand;
+                newValue = otherMemory | shiftedValue;
+            } while (originalValue !=
+                     (originalValue = CompareExchange(ref alignedRef, newValue, fullComparand)));
+
+            // verify the GC hasn't broken the ref
+            Debug.Assert((nuint)Unsafe.ByteOffset(ref Unsafe.As<uint, ushort>(ref alignedRef), ref location1) == offset);
+            return (ushort)(originalValue >> bitOffset);
+#endif
+        }
 
         /// <summary>Compares two 32-bit unsigned integers for equality and, if they are equal, replaces the first value.</summary>
         /// <param name="location1">The destination, whose value is compared with <paramref name="comparand"/> and possibly replaced.</param>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/Lock.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/Lock.cs
index 66cf6a03f607..b7961869eac5 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/Lock.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/Lock.cs
@@ -38,10 +38,7 @@ public sealed partial class Lock
         private uint _state; // see State for layout
         private uint _recursionCount;
         private short _spinCount;
-
-        // The lowest bit is a flag, when set it indicates that the lock should use trivial waits
-        private ushort _waiterStartTimeMsAndFlags;
-
+        private ushort _waiterStartTimeMs;
         private AutoResetEvent? _waitEvent;
 
 #if NATIVEAOT // The method needs to be public in NativeAOT so that other private libraries can access it
@@ -51,10 +48,7 @@ internal Lock(bool useTrivialWaits)
 #endif
             : this()
         {
-            if (useTrivialWaits)
-            {
-                _waiterStartTimeMsAndFlags = 1;
-            }
+            State.InitializeUseTrivialWaits(this, useTrivialWaits);
         }
 
         /// <summary>
@@ -136,7 +130,7 @@ internal Scope(Lock lockObj, ThreadId currentThreadId)
             public void Dispose()
             {
                 Lock? lockObj = _lockObj;
-                if (lockObj != null)
+                if (lockObj is not null)
                 {
                     _lockObj = null;
                     lockObj.Exit(_currentThreadId);
@@ -488,7 +482,7 @@ private ThreadId TryEnterSlow(int timeoutMs, ThreadId currentThreadId)
                 int remainingTimeoutMs = timeoutMs;
                 while (true)
                 {
-                    if (!waitEvent.WaitOneNoCheck(remainingTimeoutMs, UseTrivialWaits))
+                    if (!waitEvent.WaitOneNoCheck(remainingTimeoutMs, new State(this).UseTrivialWaits))
                     {
                         break;
                     }
@@ -567,19 +561,7 @@ private ThreadId TryEnterSlow(int timeoutMs, ThreadId currentThreadId)
             return new ThreadId(0);
         }
 
-        // Trivial waits are:
-        // - Not interruptible by Thread.Interrupt
-        // - Don't allow reentrance through APCs or message pumping
-        // - Not forwarded to SynchronizationContext wait overrides
-        private bool UseTrivialWaits => (_waiterStartTimeMsAndFlags & 1) != 0;
-
-        private ushort WaiterStartTimeMs
-        {
-            get => (ushort)(_waiterStartTimeMsAndFlags >> 1);
-            set => _waiterStartTimeMsAndFlags = (ushort)((value << 1) | (_waiterStartTimeMsAndFlags & 1));
-        }
-
-        private void ResetWaiterStartTime() => WaiterStartTimeMs = 0;
+        private void ResetWaiterStartTime() => _waiterStartTimeMs = 0;
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private void RecordWaiterStartTime()
@@ -590,7 +572,7 @@ private void RecordWaiterStartTime()
                 // Don't record zero, that value is reserved for indicating that a time is not recorded
                 currentTimeMs--;
             }
-            WaiterStartTimeMs = currentTimeMs;
+            _waiterStartTimeMs = currentTimeMs;
         }
 
         private bool ShouldStopPreemptingWaiters
@@ -599,10 +581,10 @@ private bool ShouldStopPreemptingWaiters
             get
             {
                 // If the recorded time is zero, a time has not been recorded yet
-                ushort waiterStartTimeMs = WaiterStartTimeMs;
+                ushort waiterStartTimeMs = _waiterStartTimeMs;
                 return
                     waiterStartTimeMs != 0 &&
-                    (ushort)Environment.TickCount - waiterStartTimeMs >= MaxDurationMsForPreemptingWaiters;
+                    (ushort)(Environment.TickCount - waiterStartTimeMs) >= MaxDurationMsForPreemptingWaiters;
             }
         }
 
@@ -664,15 +646,6 @@ internal nint LockIdForEvents
             }
         }
 
-        internal unsafe nint ObjectIdForEvents
-        {
-            get
-            {
-                Lock lockObj = this;
-                return *(nint*)Unsafe.AsPointer(ref lockObj);
-            }
-        }
-
         internal ulong OwningThreadId => _owningThreadId;
 
         private static short DetermineMaxSpinCount() =>
@@ -707,8 +680,8 @@ private struct State : IEquatable<State>
             private const uint SpinnerCountIncrement = (uint)1 << 2; // bits 2-4
             private const uint SpinnerCountMask = (uint)0x7 << 2;
             private const uint IsWaiterSignaledToWakeMask = (uint)1 << 5; // bit 5
-            private const byte WaiterCountShift = 6;
-            private const uint WaiterCountIncrement = (uint)1 << WaiterCountShift; // bits 6-31
+            private const uint UseTrivialWaitsMask = (uint)1 << 6; // bit 6
+            private const uint WaiterCountIncrement = (uint)1 << 7; // bits 7-31
 
             private uint _state;
 
@@ -787,6 +760,22 @@ private void ClearIsWaiterSignaledToWake()
                 _state -= IsWaiterSignaledToWakeMask;
             }
 
+            // Trivial waits are:
+            // - Not interruptible by Thread.Interrupt
+            // - Don't allow reentrance through APCs or message pumping
+            // - Not forwarded to SynchronizationContext wait overrides
+            public bool UseTrivialWaits => (_state & UseTrivialWaitsMask) != 0;
+
+            public static void InitializeUseTrivialWaits(Lock lockObj, bool useTrivialWaits)
+            {
+                Debug.Assert(lockObj._state == 0);
+
+                if (useTrivialWaits)
+                {
+                    lockObj._state = UseTrivialWaitsMask;
+                }
+            }
+
             public bool HasAnyWaiters => _state >= WaiterCountIncrement;
 
             private bool TryIncrementWaiterCount()
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Windows.cs
index c9d5582e7275..cc64808ab9ca 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Windows.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Windows.cs
@@ -50,7 +50,7 @@ public bool WaitCore(int timeoutMs)
             return success;
         }
 
-        protected override void ReleaseCore(int count)
+        private void ReleaseCore(int count)
         {
             Debug.Assert(count > 0);
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.cs
index 6975cfa6356f..39233c87c15c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.cs
@@ -11,13 +11,29 @@ namespace System.Threading
     /// A LIFO semaphore.
     /// Waits on this semaphore are uninterruptible.
     /// </summary>
-    internal sealed partial class LowLevelLifoSemaphore : LowLevelLifoSemaphoreBase, IDisposable
+    internal sealed partial class LowLevelLifoSemaphore : IDisposable
     {
+        private CacheLineSeparatedCounts _separated;
+
+        private readonly int _maximumSignalCount;
+        private readonly int _spinCount;
+        private readonly Action _onWait;
+
         private const int SpinSleep0Threshold = 10;
 
         public LowLevelLifoSemaphore(int initialSignalCount, int maximumSignalCount, int spinCount, Action onWait)
-            : base(initialSignalCount, maximumSignalCount, spinCount, onWait)
         {
+            Debug.Assert(initialSignalCount >= 0);
+            Debug.Assert(initialSignalCount <= maximumSignalCount);
+            Debug.Assert(maximumSignalCount > 0);
+            Debug.Assert(spinCount >= 0);
+
+            _separated = default;
+            _separated._counts.SignalCount = (uint)initialSignalCount;
+            _maximumSignalCount = maximumSignalCount;
+            _spinCount = spinCount;
+            _onWait = onWait;
+
             Create(maximumSignalCount);
         }
 
@@ -25,6 +41,10 @@ public bool Wait(int timeoutMs, bool spinWait)
         {
             Debug.Assert(timeoutMs >= -1);
 
+#if FEATURE_WASM_MANAGED_THREADS
+            Thread.AssureBlockingPossible();
+#endif
+
             int spinCount = spinWait ? _spinCount : 0;
 
             // Try to acquire the semaphore or
@@ -185,5 +205,178 @@ private bool WaitForSignal(int timeoutMs)
                 }
             }
         }
+
+        public void Release(int releaseCount)
+        {
+            Debug.Assert(releaseCount > 0);
+            Debug.Assert(releaseCount <= _maximumSignalCount);
+
+            int countOfWaitersToWake;
+            Counts counts = _separated._counts;
+            while (true)
+            {
+                Counts newCounts = counts;
+
+                // Increase the signal count. The addition doesn't overflow because of the limit on the max signal count in constructor.
+                newCounts.AddSignalCount((uint)releaseCount);
+
+                // Determine how many waiters to wake, taking into account how many spinners and waiters there are and how many waiters
+                // have previously been signaled to wake but have not yet woken
+                countOfWaitersToWake =
+                    (int)Math.Min(newCounts.SignalCount, (uint)counts.WaiterCount + counts.SpinnerCount) -
+                    counts.SpinnerCount -
+                    counts.CountOfWaitersSignaledToWake;
+                if (countOfWaitersToWake > 0)
+                {
+                    // Ideally, limiting to a maximum of releaseCount would not be necessary and could be an assert instead, but since
+                    // WaitForSignal() does not have enough information to tell whether a woken thread was signaled, and due to the cap
+                    // below, it's possible for countOfWaitersSignaledToWake to be less than the number of threads that have actually
+                    // been signaled to wake.
+                    if (countOfWaitersToWake > releaseCount)
+                    {
+                        countOfWaitersToWake = releaseCount;
+                    }
+
+                    // Cap countOfWaitersSignaledToWake to its max value. It's ok to ignore some woken threads in this count, it just
+                    // means some more threads will be woken next time. Typically, it won't reach the max anyway.
+                    newCounts.AddUpToMaxCountOfWaitersSignaledToWake((uint)countOfWaitersToWake);
+                }
+
+                Counts countsBeforeUpdate = _separated._counts.InterlockedCompareExchange(newCounts, counts);
+                if (countsBeforeUpdate == counts)
+                {
+                    Debug.Assert(releaseCount <= _maximumSignalCount - counts.SignalCount);
+                    if (countOfWaitersToWake > 0)
+                        ReleaseCore(countOfWaitersToWake);
+                    return;
+                }
+
+                counts = countsBeforeUpdate;
+            }
+        }
+
+        private struct Counts : IEquatable<Counts>
+        {
+            private const byte SignalCountShift = 0;
+            private const byte WaiterCountShift = 32;
+            private const byte SpinnerCountShift = 48;
+            private const byte CountOfWaitersSignaledToWakeShift = 56;
+
+            private ulong _data;
+
+            private Counts(ulong data) => _data = data;
+
+            private uint GetUInt32Value(byte shift) => (uint)(_data >> shift);
+            private void SetUInt32Value(uint value, byte shift) =>
+                _data = (_data & ~((ulong)uint.MaxValue << shift)) | ((ulong)value << shift);
+            private ushort GetUInt16Value(byte shift) => (ushort)(_data >> shift);
+            private void SetUInt16Value(ushort value, byte shift) =>
+                _data = (_data & ~((ulong)ushort.MaxValue << shift)) | ((ulong)value << shift);
+            private byte GetByteValue(byte shift) => (byte)(_data >> shift);
+            private void SetByteValue(byte value, byte shift) =>
+                _data = (_data & ~((ulong)byte.MaxValue << shift)) | ((ulong)value << shift);
+
+            public uint SignalCount
+            {
+                get => GetUInt32Value(SignalCountShift);
+                set => SetUInt32Value(value, SignalCountShift);
+            }
+
+            public void AddSignalCount(uint value)
+            {
+                Debug.Assert(value <= uint.MaxValue - SignalCount);
+                _data += (ulong)value << SignalCountShift;
+            }
+
+            public void IncrementSignalCount() => AddSignalCount(1);
+
+            public void DecrementSignalCount()
+            {
+                Debug.Assert(SignalCount != 0);
+                _data -= (ulong)1 << SignalCountShift;
+            }
+
+            public ushort WaiterCount
+            {
+                get => GetUInt16Value(WaiterCountShift);
+                set => SetUInt16Value(value, WaiterCountShift);
+            }
+
+            public void IncrementWaiterCount()
+            {
+                Debug.Assert(WaiterCount < ushort.MaxValue);
+                _data += (ulong)1 << WaiterCountShift;
+            }
+
+            public void DecrementWaiterCount()
+            {
+                Debug.Assert(WaiterCount != 0);
+                _data -= (ulong)1 << WaiterCountShift;
+            }
+
+            public void InterlockedDecrementWaiterCount()
+            {
+                var countsAfterUpdate = new Counts(Interlocked.Add(ref _data, unchecked((ulong)-1) << WaiterCountShift));
+                Debug.Assert(countsAfterUpdate.WaiterCount != ushort.MaxValue); // underflow check
+            }
+
+            public byte SpinnerCount
+            {
+                get => GetByteValue(SpinnerCountShift);
+                set => SetByteValue(value, SpinnerCountShift);
+            }
+
+            public void IncrementSpinnerCount()
+            {
+                Debug.Assert(SpinnerCount < byte.MaxValue);
+                _data += (ulong)1 << SpinnerCountShift;
+            }
+
+            public void DecrementSpinnerCount()
+            {
+                Debug.Assert(SpinnerCount != 0);
+                _data -= (ulong)1 << SpinnerCountShift;
+            }
+
+            public byte CountOfWaitersSignaledToWake
+            {
+                get => GetByteValue(CountOfWaitersSignaledToWakeShift);
+                set => SetByteValue(value, CountOfWaitersSignaledToWakeShift);
+            }
+
+            public void AddUpToMaxCountOfWaitersSignaledToWake(uint value)
+            {
+                uint availableCount = (uint)(byte.MaxValue - CountOfWaitersSignaledToWake);
+                if (value > availableCount)
+                {
+                    value = availableCount;
+                }
+                _data += (ulong)value << CountOfWaitersSignaledToWakeShift;
+            }
+
+            public void DecrementCountOfWaitersSignaledToWake()
+            {
+                Debug.Assert(CountOfWaitersSignaledToWake != 0);
+                _data -= (ulong)1 << CountOfWaitersSignaledToWakeShift;
+            }
+
+            public Counts InterlockedCompareExchange(Counts newCounts, Counts oldCounts) =>
+                new Counts(Interlocked.CompareExchange(ref _data, newCounts._data, oldCounts._data));
+
+            public static bool operator ==(Counts lhs, Counts rhs) => lhs.Equals(rhs);
+            public static bool operator !=(Counts lhs, Counts rhs) => !lhs.Equals(rhs);
+
+            public override bool Equals([NotNullWhen(true)] object? obj) => obj is Counts other && Equals(other);
+            public bool Equals(Counts other) => _data == other._data;
+            public override int GetHashCode() => (int)_data + (int)(_data >> 32);
+        }
+
+        [StructLayout(LayoutKind.Sequential)]
+        private struct CacheLineSeparatedCounts
+        {
+            private readonly Internal.PaddingFor32 _pad1;
+            public Counts _counts;
+            private readonly Internal.PaddingFor32 _pad2;
+        }
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphoreBase.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphoreBase.cs
deleted file mode 100644
index cb4df549b080..000000000000
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphoreBase.cs
+++ /dev/null
@@ -1,211 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-using System.Diagnostics.CodeAnalysis;
-using System.Runtime.InteropServices;
-
-namespace System.Threading
-{
-    /// <summary>
-    /// A LIFO semaphore.
-    /// Waits on this semaphore are uninterruptible.
-    /// </summary>
-    internal abstract class LowLevelLifoSemaphoreBase
-    {
-        protected CacheLineSeparatedCounts _separated;
-
-        protected readonly int _maximumSignalCount;
-        protected readonly int _spinCount;
-        protected readonly Action _onWait;
-
-        public LowLevelLifoSemaphoreBase(int initialSignalCount, int maximumSignalCount, int spinCount, Action onWait)
-        {
-            Debug.Assert(initialSignalCount >= 0);
-            Debug.Assert(initialSignalCount <= maximumSignalCount);
-            Debug.Assert(maximumSignalCount > 0);
-            Debug.Assert(spinCount >= 0);
-
-            _separated = default;
-            _separated._counts.SignalCount = (uint)initialSignalCount;
-            _maximumSignalCount = maximumSignalCount;
-            _spinCount = spinCount;
-            _onWait = onWait;
-        }
-
-        protected abstract void ReleaseCore(int count);
-
-        public void Release(int releaseCount)
-        {
-            Debug.Assert(releaseCount > 0);
-            Debug.Assert(releaseCount <= _maximumSignalCount);
-
-            int countOfWaitersToWake;
-            Counts counts = _separated._counts;
-            while (true)
-            {
-                Counts newCounts = counts;
-
-                // Increase the signal count. The addition doesn't overflow because of the limit on the max signal count in constructor.
-                newCounts.AddSignalCount((uint)releaseCount);
-
-                // Determine how many waiters to wake, taking into account how many spinners and waiters there are and how many waiters
-                // have previously been signaled to wake but have not yet woken
-                countOfWaitersToWake =
-                    (int)Math.Min(newCounts.SignalCount, (uint)counts.WaiterCount + counts.SpinnerCount) -
-                    counts.SpinnerCount -
-                    counts.CountOfWaitersSignaledToWake;
-                if (countOfWaitersToWake > 0)
-                {
-                    // Ideally, limiting to a maximum of releaseCount would not be necessary and could be an assert instead, but since
-                    // WaitForSignal() does not have enough information to tell whether a woken thread was signaled, and due to the cap
-                    // below, it's possible for countOfWaitersSignaledToWake to be less than the number of threads that have actually
-                    // been signaled to wake.
-                    if (countOfWaitersToWake > releaseCount)
-                    {
-                        countOfWaitersToWake = releaseCount;
-                    }
-
-                    // Cap countOfWaitersSignaledToWake to its max value. It's ok to ignore some woken threads in this count, it just
-                    // means some more threads will be woken next time. Typically, it won't reach the max anyway.
-                    newCounts.AddUpToMaxCountOfWaitersSignaledToWake((uint)countOfWaitersToWake);
-                }
-
-                Counts countsBeforeUpdate = _separated._counts.InterlockedCompareExchange(newCounts, counts);
-                if (countsBeforeUpdate == counts)
-                {
-                    Debug.Assert(releaseCount <= _maximumSignalCount - counts.SignalCount);
-                    if (countOfWaitersToWake > 0)
-                        ReleaseCore(countOfWaitersToWake);
-                    return;
-                }
-
-                counts = countsBeforeUpdate;
-            }
-        }
-
-        protected struct Counts : IEquatable<Counts>
-        {
-            private const byte SignalCountShift = 0;
-            private const byte WaiterCountShift = 32;
-            private const byte SpinnerCountShift = 48;
-            private const byte CountOfWaitersSignaledToWakeShift = 56;
-
-            private ulong _data;
-
-            private Counts(ulong data) => _data = data;
-
-            private uint GetUInt32Value(byte shift) => (uint)(_data >> shift);
-            private void SetUInt32Value(uint value, byte shift) =>
-                _data = (_data & ~((ulong)uint.MaxValue << shift)) | ((ulong)value << shift);
-            private ushort GetUInt16Value(byte shift) => (ushort)(_data >> shift);
-            private void SetUInt16Value(ushort value, byte shift) =>
-                _data = (_data & ~((ulong)ushort.MaxValue << shift)) | ((ulong)value << shift);
-            private byte GetByteValue(byte shift) => (byte)(_data >> shift);
-            private void SetByteValue(byte value, byte shift) =>
-                _data = (_data & ~((ulong)byte.MaxValue << shift)) | ((ulong)value << shift);
-
-            public uint SignalCount
-            {
-                get => GetUInt32Value(SignalCountShift);
-                set => SetUInt32Value(value, SignalCountShift);
-            }
-
-            public void AddSignalCount(uint value)
-            {
-                Debug.Assert(value <= uint.MaxValue - SignalCount);
-                _data += (ulong)value << SignalCountShift;
-            }
-
-            public void IncrementSignalCount() => AddSignalCount(1);
-
-            public void DecrementSignalCount()
-            {
-                Debug.Assert(SignalCount != 0);
-                _data -= (ulong)1 << SignalCountShift;
-            }
-
-            public ushort WaiterCount
-            {
-                get => GetUInt16Value(WaiterCountShift);
-                set => SetUInt16Value(value, WaiterCountShift);
-            }
-
-            public void IncrementWaiterCount()
-            {
-                Debug.Assert(WaiterCount < ushort.MaxValue);
-                _data += (ulong)1 << WaiterCountShift;
-            }
-
-            public void DecrementWaiterCount()
-            {
-                Debug.Assert(WaiterCount != 0);
-                _data -= (ulong)1 << WaiterCountShift;
-            }
-
-            public void InterlockedDecrementWaiterCount()
-            {
-                var countsAfterUpdate = new Counts(Interlocked.Add(ref _data, unchecked((ulong)-1) << WaiterCountShift));
-                Debug.Assert(countsAfterUpdate.WaiterCount != ushort.MaxValue); // underflow check
-            }
-
-            public byte SpinnerCount
-            {
-                get => GetByteValue(SpinnerCountShift);
-                set => SetByteValue(value, SpinnerCountShift);
-            }
-
-            public void IncrementSpinnerCount()
-            {
-                Debug.Assert(SpinnerCount < byte.MaxValue);
-                _data += (ulong)1 << SpinnerCountShift;
-            }
-
-            public void DecrementSpinnerCount()
-            {
-                Debug.Assert(SpinnerCount != 0);
-                _data -= (ulong)1 << SpinnerCountShift;
-            }
-
-            public byte CountOfWaitersSignaledToWake
-            {
-                get => GetByteValue(CountOfWaitersSignaledToWakeShift);
-                set => SetByteValue(value, CountOfWaitersSignaledToWakeShift);
-            }
-
-            public void AddUpToMaxCountOfWaitersSignaledToWake(uint value)
-            {
-                uint availableCount = (uint)(byte.MaxValue - CountOfWaitersSignaledToWake);
-                if (value > availableCount)
-                {
-                    value = availableCount;
-                }
-                _data += (ulong)value << CountOfWaitersSignaledToWakeShift;
-            }
-
-            public void DecrementCountOfWaitersSignaledToWake()
-            {
-                Debug.Assert(CountOfWaitersSignaledToWake != 0);
-                _data -= (ulong)1 << CountOfWaitersSignaledToWakeShift;
-            }
-
-            public Counts InterlockedCompareExchange(Counts newCounts, Counts oldCounts) =>
-                new Counts(Interlocked.CompareExchange(ref _data, newCounts._data, oldCounts._data));
-
-            public static bool operator ==(Counts lhs, Counts rhs) => lhs.Equals(rhs);
-            public static bool operator !=(Counts lhs, Counts rhs) => !lhs.Equals(rhs);
-
-            public override bool Equals([NotNullWhen(true)] object? obj) => obj is Counts other && Equals(other);
-            public bool Equals(Counts other) => _data == other._data;
-            public override int GetHashCode() => (int)_data + (int)(_data >> 32);
-        }
-
-        [StructLayout(LayoutKind.Sequential)]
-        protected struct CacheLineSeparatedCounts
-        {
-            private readonly Internal.PaddingFor32 _pad1;
-            public Counts _counts;
-            private readonly Internal.PaddingFor32 _pad2;
-        }
-    }
-}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/ManualResetEventSlim.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/ManualResetEventSlim.cs
index a385543f9174..516fb42bf0a5 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/ManualResetEventSlim.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/ManualResetEventSlim.cs
@@ -485,6 +485,10 @@ public bool Wait(int millisecondsTimeout, CancellationToken cancellationToken)
 
             ArgumentOutOfRangeException.ThrowIfLessThan(millisecondsTimeout, -1);
 
+#if FEATURE_WASM_MANAGED_THREADS
+            Thread.AssureBlockingPossible();
+#endif
+
             if (!IsSet)
             {
                 if (millisecondsTimeout == 0)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/PortableThreadPool.WorkerThread.NonBrowser.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/PortableThreadPool.WorkerThread.NonBrowser.cs
deleted file mode 100644
index c3b278019f6d..000000000000
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/PortableThreadPool.WorkerThread.NonBrowser.cs
+++ /dev/null
@@ -1,124 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics.Tracing;
-
-namespace System.Threading
-{
-    internal sealed partial class PortableThreadPool
-    {
-        private int _numThreadsBeingKeptAlive;
-
-        /// <summary>
-        /// The worker thread infastructure for the CLR thread pool.
-        /// </summary>
-        private static partial class WorkerThread
-        {
-            private static readonly short ThreadsToKeepAlive = DetermineThreadsToKeepAlive();
-
-            private static short DetermineThreadsToKeepAlive()
-            {
-                const short DefaultThreadsToKeepAlive = 0;
-
-                // The number of worker threads to keep alive after they are created. Set to -1 to keep all created worker
-                // threads alive. When the ThreadTimeoutMs config value is also set, for worker threads the timeout applies to
-                // worker threads that are in excess of the number configured for ThreadsToKeepAlive.
-                short threadsToKeepAlive =
-                    AppContextConfigHelper.GetInt16Config(
-                        "System.Threading.ThreadPool.ThreadsToKeepAlive",
-                        "DOTNET_ThreadPool_ThreadsToKeepAlive",
-                        DefaultThreadsToKeepAlive);
-                return threadsToKeepAlive >= -1 ? threadsToKeepAlive : DefaultThreadsToKeepAlive;
-            }
-
-            /// <summary>
-            /// Semaphore for controlling how many threads are currently working.
-            /// </summary>
-            private static readonly LowLevelLifoSemaphore s_semaphore =
-                new LowLevelLifoSemaphore(
-                    0,
-                    MaxPossibleThreadCount,
-                    AppContextConfigHelper.GetInt32Config(
-                        "System.Threading.ThreadPool.UnfairSemaphoreSpinLimit",
-                        SemaphoreSpinCountDefault,
-                        false),
-                    onWait: () =>
-                    {
-                        if (NativeRuntimeEventSource.Log.IsEnabled())
-                        {
-                            NativeRuntimeEventSource.Log.ThreadPoolWorkerThreadWait(
-                                (uint)ThreadPoolInstance._separated.counts.VolatileRead().NumExistingThreads);
-                        }
-                    });
-
-            private static readonly ThreadStart s_workerThreadStart = WorkerThreadStart;
-
-            private static void WorkerThreadStart()
-            {
-                Thread.CurrentThread.SetThreadPoolWorkerThreadName();
-
-                PortableThreadPool threadPoolInstance = ThreadPoolInstance;
-
-                if (NativeRuntimeEventSource.Log.IsEnabled())
-                {
-                    NativeRuntimeEventSource.Log.ThreadPoolWorkerThreadStart(
-                        (uint)threadPoolInstance._separated.counts.VolatileRead().NumExistingThreads);
-                }
-
-                LowLevelLock threadAdjustmentLock = threadPoolInstance._threadAdjustmentLock;
-                LowLevelLifoSemaphore semaphore = s_semaphore;
-
-                // Determine the idle timeout to use for this thread. Some threads may always be kept alive based on config.
-                int timeoutMs = ThreadPoolThreadTimeoutMs;
-                if (ThreadsToKeepAlive != 0)
-                {
-                    if (ThreadsToKeepAlive < 0)
-                    {
-                        timeoutMs = Timeout.Infinite;
-                    }
-                    else
-                    {
-                        int count = threadPoolInstance._numThreadsBeingKeptAlive;
-                        while (count < ThreadsToKeepAlive)
-                        {
-                            int countBeforeUpdate =
-                                Interlocked.CompareExchange(ref threadPoolInstance._numThreadsBeingKeptAlive, count + 1, count);
-                            if (countBeforeUpdate == count)
-                            {
-                                timeoutMs = Timeout.Infinite;
-                                break;
-                            }
-
-                            count = countBeforeUpdate;
-                        }
-                    }
-                }
-
-                while (true)
-                {
-                    bool spinWait = true;
-                    while (semaphore.Wait(timeoutMs, spinWait))
-                    {
-                        WorkerDoWork(threadPoolInstance, ref spinWait);
-                    }
-
-                    if (ShouldExitWorker(threadPoolInstance, threadAdjustmentLock))
-                    {
-                        break;
-                    }
-                }
-            }
-
-            private static void CreateWorkerThread()
-            {
-                // Thread pool threads must start in the default execution context without transferring the context, so
-                // using UnsafeStart() instead of Start()
-                Thread workerThread = new Thread(s_workerThreadStart);
-                workerThread.IsThreadPoolThread = true;
-                workerThread.IsBackground = true;
-                // thread name will be set in thread proc
-                workerThread.UnsafeStart();
-            }
-        }
-    }
-}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/PortableThreadPool.WorkerThread.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/PortableThreadPool.WorkerThread.cs
index e776d05ff0ee..40c14ae102c4 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/PortableThreadPool.WorkerThread.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/PortableThreadPool.WorkerThread.cs
@@ -13,6 +13,8 @@ internal sealed partial class PortableThreadPool
         /// </summary>
         private static partial class WorkerThread
         {
+            private static readonly short ThreadsToKeepAlive = DetermineThreadsToKeepAlive();
+
             private const int SemaphoreSpinCountDefaultBaseline = 70;
 #if !TARGET_ARM64 && !TARGET_ARM && !TARGET_LOONGARCH64
             private const int SemaphoreSpinCountDefault = SemaphoreSpinCountDefaultBaseline;
@@ -29,6 +31,110 @@ private static partial class WorkerThread
             // preexisting threads from running out of memory when using new stack space in low-memory situations.
             public const int EstimatedAdditionalStackUsagePerThreadBytes = 64 << 10; // 64 KB
 
+            private static short DetermineThreadsToKeepAlive()
+            {
+                const short DefaultThreadsToKeepAlive = 0;
+
+                // The number of worker threads to keep alive after they are created. Set to -1 to keep all created worker
+                // threads alive. When the ThreadTimeoutMs config value is also set, for worker threads the timeout applies to
+                // worker threads that are in excess of the number configured for ThreadsToKeepAlive.
+                short threadsToKeepAlive =
+                    AppContextConfigHelper.GetInt16Config(
+                        "System.Threading.ThreadPool.ThreadsToKeepAlive",
+                        "DOTNET_ThreadPool_ThreadsToKeepAlive",
+                        DefaultThreadsToKeepAlive);
+                return threadsToKeepAlive >= -1 ? threadsToKeepAlive : DefaultThreadsToKeepAlive;
+            }
+
+            /// <summary>
+            /// Semaphore for controlling how many threads are currently working.
+            /// </summary>
+            private static readonly LowLevelLifoSemaphore s_semaphore =
+                new LowLevelLifoSemaphore(
+                    0,
+                    MaxPossibleThreadCount,
+                    AppContextConfigHelper.GetInt32Config(
+                        "System.Threading.ThreadPool.UnfairSemaphoreSpinLimit",
+                        SemaphoreSpinCountDefault,
+                        false),
+                    onWait: () =>
+                    {
+                        if (NativeRuntimeEventSource.Log.IsEnabled())
+                        {
+                            NativeRuntimeEventSource.Log.ThreadPoolWorkerThreadWait(
+                                (uint)ThreadPoolInstance._separated.counts.VolatileRead().NumExistingThreads);
+                        }
+                    });
+
+            private static readonly ThreadStart s_workerThreadStart = WorkerThreadStart;
+
+            private static void CreateWorkerThread()
+            {
+                // Thread pool threads must start in the default execution context without transferring the context, so
+                // using UnsafeStart() instead of Start()
+                Thread workerThread = new Thread(s_workerThreadStart);
+                workerThread.IsThreadPoolThread = true;
+                workerThread.IsBackground = true;
+                // thread name will be set in thread proc
+                workerThread.UnsafeStart();
+            }
+
+            private static void WorkerThreadStart()
+            {
+                Thread.CurrentThread.SetThreadPoolWorkerThreadName();
+
+                PortableThreadPool threadPoolInstance = ThreadPoolInstance;
+
+                if (NativeRuntimeEventSource.Log.IsEnabled())
+                {
+                    NativeRuntimeEventSource.Log.ThreadPoolWorkerThreadStart(
+                        (uint)threadPoolInstance._separated.counts.VolatileRead().NumExistingThreads);
+                }
+
+                LowLevelLock threadAdjustmentLock = threadPoolInstance._threadAdjustmentLock;
+                LowLevelLifoSemaphore semaphore = s_semaphore;
+
+                // Determine the idle timeout to use for this thread. Some threads may always be kept alive based on config.
+                int timeoutMs = ThreadPoolThreadTimeoutMs;
+                if (ThreadsToKeepAlive != 0)
+                {
+                    if (ThreadsToKeepAlive < 0)
+                    {
+                        timeoutMs = Timeout.Infinite;
+                    }
+                    else
+                    {
+                        int count = threadPoolInstance._numThreadsBeingKeptAlive;
+                        while (count < ThreadsToKeepAlive)
+                        {
+                            int countBeforeUpdate =
+                                Interlocked.CompareExchange(ref threadPoolInstance._numThreadsBeingKeptAlive, count + 1, count);
+                            if (countBeforeUpdate == count)
+                            {
+                                timeoutMs = Timeout.Infinite;
+                                break;
+                            }
+
+                            count = countBeforeUpdate;
+                        }
+                    }
+                }
+
+                while (true)
+                {
+                    bool spinWait = true;
+                    while (semaphore.Wait(timeoutMs, spinWait))
+                    {
+                        WorkerDoWork(threadPoolInstance, ref spinWait);
+                    }
+
+                    if (ShouldExitWorker(threadPoolInstance, threadAdjustmentLock))
+                    {
+                        break;
+                    }
+                }
+            }
+
             [MethodImpl(MethodImplOptions.AggressiveInlining)]
             private static void WorkerDoWork(PortableThreadPool threadPoolInstance, ref bool spinWait)
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/PortableThreadPool.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/PortableThreadPool.cs
index 51142edb03ce..2523213110ba 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/PortableThreadPool.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/PortableThreadPool.cs
@@ -72,6 +72,7 @@ private static int DetermineThreadPoolThreadTimeoutMs()
         private short _maxThreads;
         private short _legacy_minIOCompletionThreads;
         private short _legacy_maxIOCompletionThreads;
+        private int _numThreadsBeingKeptAlive;
 
         [StructLayout(LayoutKind.Explicit, Size = Internal.PaddingHelpers.CACHE_LINE_SIZE * 6)]
         private struct CacheLineSeparated
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/Future.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/Future.cs
index 827d6915dd32..ce4f18b6099b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/Future.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/Future.cs
@@ -66,17 +66,6 @@ public class Task<TResult> : Task
         // The value itself, if set.
         internal TResult? m_result;
 
-        // Extract rarely used helper for a static method in a separate type so that the Func<Task<Task>, Task<TResult>>
-        // generic instantiations don't contribute to all Task instantiations, but only those where WhenAny is used.
-        internal static class TaskWhenAnyCast
-        {
-            // Delegate used by:
-            //     public static Task<Task<TResult>> WhenAny<TResult>(IEnumerable<Task<TResult>> tasks);
-            //     public static Task<Task<TResult>> WhenAny<TResult>(params Task<TResult>[] tasks);
-            // Used to "cast" from Task<Task> to Task<Task<TResult>>.
-            internal static readonly Func<Task<Task>, Task<TResult>> Value = completed => (Task<TResult>)completed.Result;
-        }
-
         // Construct a promise-style task without any options.
         internal Task()
         {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/Task.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/Task.cs
index 09eb579f4edd..b8a2196ec69b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/Task.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/Task.cs
@@ -17,6 +17,7 @@
 using System.Runtime.ExceptionServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Versioning;
+using System.Threading.Tasks.Sources;
 
 namespace System.Threading.Tasks
 {
@@ -3473,15 +3474,16 @@ private void RunContinuations(object continuationObject) // separated out of Fin
             }
 
             // Not a single; it must be a list.
-            List<object?> continuations = (List<object?>)continuationObject;
+            List<object?> list = (List<object?>)continuationObject;
 
             //
             // Begin processing of continuation list
             //
 
             // Wait for any concurrent adds or removes to be retired
-            lock (continuations) { }
-            int continuationCount = continuations.Count;
+            Monitor.Enter(list);
+            Monitor.Exit(list);
+            Span<object?> continuations = CollectionsMarshal.AsSpan(list);
 
             // Fire the asynchronous continuations first. However, if we're not able to run any continuations synchronously,
             // then we can skip this first pass, since the second pass that tries to run everything synchronously will instead
@@ -3489,7 +3491,7 @@ private void RunContinuations(object continuationObject) // separated out of Fin
             if (canInlineContinuations)
             {
                 bool forceContinuationsAsync = false;
-                for (int i = 0; i < continuationCount; i++)
+                for (int i = 0; i < continuations.Length; i++)
                 {
                     // For StandardTaskContinuations, we respect the TaskContinuationOptions.ExecuteSynchronously option,
                     // as the developer needs to explicitly opt-into running the continuation synchronously, and if they do,
@@ -3543,7 +3545,7 @@ private void RunContinuations(object continuationObject) // separated out of Fin
             }
 
             // ... and then fire the synchronous continuations (if there are any).
-            for (int i = 0; i < continuationCount; i++)
+            for (int i = 0; i < continuations.Length; i++)
             {
                 object? currentContinuation = continuations[i];
                 if (currentContinuation == null)
@@ -4510,62 +4512,79 @@ internal void AddCompletionAction(ITaskCompletionAction action, bool addBeforeOt
 
         // Support method for AddTaskContinuation that takes care of multi-continuation logic.
         // Returns true if and only if the continuation was successfully queued.
-        // THIS METHOD ASSUMES THAT m_continuationObject IS NOT NULL.  That case was taken
-        // care of in the calling method, AddTaskContinuation().
         private bool AddTaskContinuationComplex(object tc, bool addBeforeOthers)
         {
             Debug.Assert(tc != null, "Expected non-null tc object in AddTaskContinuationComplex");
 
             object? oldValue = m_continuationObject;
+            Debug.Assert(oldValue is not null, "Expected non-null m_continuationObject object");
+            if (oldValue == s_taskCompletionSentinel)
+            {
+                return false;
+            }
 
             // Logic for the case where we were previously storing a single continuation
-            if ((oldValue != s_taskCompletionSentinel) && (!(oldValue is List<object?>)))
+            List<object?>? list = oldValue as List<object?>;
+            if (list is null)
             {
                 // Construct a new TaskContinuation list and CAS it in.
-                Interlocked.CompareExchange(ref m_continuationObject, new List<object?> { oldValue }, oldValue);
+                list = new List<object?>();
+                if (addBeforeOthers)
+                {
+                    list.Add(tc);
+                    list.Add(oldValue);
+                }
+                else
+                {
+                    list.Add(oldValue);
+                    list.Add(tc);
+                }
+
+                object? expected = oldValue;
+                oldValue = Interlocked.CompareExchange(ref m_continuationObject, list, expected);
+                if (oldValue == expected)
+                {
+                    // We successfully stored the new list with both continuations in it, so we're done.
+                    return true;
+                }
 
                 // We might be racing against another thread converting the single into
-                // a list, or we might be racing against task completion, so resample "list"
-                // below.
+                // a list, or we might be racing against task completion, so recheck for list again.
+                list = oldValue as List<object?>;
+                if (list is null)
+                {
+                    Debug.Assert(oldValue == s_taskCompletionSentinel, "Expected m_continuationObject to be list or sentinel");
+                    return false;
+                }
             }
 
-            // m_continuationObject is guaranteed at this point to be either a List or
-            // s_taskCompletionSentinel.
-            List<object?>? list = m_continuationObject as List<object?>;
-            Debug.Assert((list != null) || (m_continuationObject == s_taskCompletionSentinel),
-                "Expected m_continuationObject to be list or sentinel");
-
-            // If list is null, it can only mean that s_taskCompletionSentinel has been exchanged
-            // into m_continuationObject.  Thus, the task has completed and we should return false
-            // from this method, as we will not be queuing up the continuation.
-            if (list != null)
+            lock (list)
             {
-                lock (list)
+                // It is possible for the task to complete right after we snap the copy of
+                // the list.  If so, then return false without queuing the continuation.
+                if (m_continuationObject == s_taskCompletionSentinel)
                 {
-                    // It is possible for the task to complete right after we snap the copy of
-                    // the list.  If so, then fall through and return false without queuing the
-                    // continuation.
-                    if (m_continuationObject != s_taskCompletionSentinel)
-                    {
-                        // Before growing the list we remove possible null entries that are the
-                        // result from RemoveContinuations()
-                        if (list.Count == list.Capacity)
-                        {
-                            list.RemoveAll(l => l == null);
-                        }
+                    return false;
+                }
 
-                        if (addBeforeOthers)
-                            list.Insert(0, tc);
-                        else
-                            list.Add(tc);
+                // Before growing the list we remove possible null entries that are the
+                // result from RemoveContinuations()
+                if (list.Count == list.Capacity)
+                {
+                    list.RemoveAll(l => l == null);
+                }
 
-                        return true; // continuation successfully queued, so return true.
-                    }
+                if (addBeforeOthers)
+                {
+                    list.Insert(0, tc);
+                }
+                else
+                {
+                    list.Add(tc);
                 }
             }
 
-            // We didn't succeed in queuing the continuation, so return false.
-            return false;
+            return true; // continuation successfully queued, so return true.
         }
 
         // Record a continuation task or action.
@@ -4603,12 +4622,15 @@ internal void RemoveContinuation(object continuationObject) // could be TaskCont
             {
                 // This is not a list. If we have a single object (the one we want to remove) we try to replace it with an empty list.
                 // Note we cannot go back to a null state, since it will mess up the AddTaskContinuation logic.
-                if (Interlocked.CompareExchange(ref m_continuationObject, new List<object?>(), continuationObject) != continuationObject)
+                continuationsLocalRef = Interlocked.CompareExchange(ref m_continuationObject, new List<object?>(), continuationObject);
+                if (continuationsLocalRef != continuationObject)
                 {
                     // If we fail it means that either AddContinuationComplex won the race condition and m_continuationObject is now a List
                     // that contains the element we want to remove. Or FinishContinuations set the s_taskCompletionSentinel.
-                    // So we should try to get a list one more time
-                    continuationsLocalListRef = m_continuationObject as List<object?>;
+                    // So we should try to get a list one more time and if it's null then there is nothing else to do.
+                    continuationsLocalListRef = continuationsLocalRef as List<object?>;
+                    if (continuationsLocalListRef is null)
+                        return;
                 }
                 else
                 {
@@ -4617,24 +4639,20 @@ internal void RemoveContinuation(object continuationObject) // could be TaskCont
                 }
             }
 
-            // if continuationsLocalRef == null it means s_taskCompletionSentinel has been set already and there is nothing else to do.
-            if (continuationsLocalListRef != null)
+            lock (continuationsLocalListRef)
             {
-                lock (continuationsLocalListRef)
-                {
-                    // There is a small chance that this task completed since we took a local snapshot into
-                    // continuationsLocalRef.  In that case, just return; we don't want to be manipulating the
-                    // continuation list as it is being processed.
-                    if (m_continuationObject == s_taskCompletionSentinel) return;
+                // There is a small chance that this task completed since we took a local snapshot into
+                // continuationsLocalRef.  In that case, just return; we don't want to be manipulating the
+                // continuation list as it is being processed.
+                if (m_continuationObject == s_taskCompletionSentinel) return;
 
-                    // Find continuationObject in the continuation list
-                    int index = continuationsLocalListRef.IndexOf(continuationObject);
+                // Find continuationObject in the continuation list
+                int index = continuationsLocalListRef.IndexOf(continuationObject);
 
-                    if (index >= 0)
-                    {
-                        // null out that TaskContinuation entry, which will be interpreted as "to be cleaned up"
-                        continuationsLocalListRef[index] = null;
-                    }
+                if (index >= 0)
+                {
+                    // null out that TaskContinuation entry, which will be interpreted as "to be cleaned up"
+                    continuationsLocalListRef[index] = null;
                 }
             }
         }
@@ -5902,14 +5920,14 @@ internal static Task WhenAll(ReadOnlySpan<Task> tasks) => // TODO https://github
         /// <summary>A Task that gets completed when all of its constituent tasks complete.</summary>
         private sealed class WhenAllPromise : Task, ITaskCompletionAction
         {
-            /// <summary>Either a single faulted/canceled task, or a list of faulted/canceled tasks.</summary>
-            private object? _failedOrCanceled;
             /// <summary>The number of tasks remaining to complete.</summary>
             private int _remainingToComplete;
 
             internal WhenAllPromise(ReadOnlySpan<Task> tasks)
             {
                 Debug.Assert(tasks.Length != 0, "Expected a non-zero length task array");
+                Debug.Assert(m_stateObject is null, "Expected to be able to use the state object field for faulted/canceled tasks.");
+                m_stateFlags |= (int)InternalTaskOptions.HiddenState;
 
                 // Throw if any of the provided tasks is null. This is best effort to inform the caller
                 // they've made a mistake.  If between the time we check for nulls and the time we hook
@@ -5966,16 +5984,14 @@ public void Invoke(Task? completedTask)
                     if (!completedTask.IsCompletedSuccessfully)
                     {
                         // Try to store the completed task as the first that's failed or faulted.
-                        if (Interlocked.CompareExchange(ref _failedOrCanceled, completedTask, null) != null)
+                        object? failedOrCanceled = Interlocked.CompareExchange(ref m_stateObject, completedTask, null);
+                        if (failedOrCanceled != null)
                         {
                             // There was already something there.
                             while (true)
                             {
-                                object? failedOrCanceled = _failedOrCanceled;
-                                Debug.Assert(failedOrCanceled is not null);
-
                                 // If it was a list, add it to the list.
-                                if (_failedOrCanceled is List<Task> list)
+                                if (failedOrCanceled is List<Task> list)
                                 {
                                     lock (list)
                                     {
@@ -5986,13 +6002,15 @@ public void Invoke(Task? completedTask)
 
                                 // Otherwise, it was a Task. Create a new list containing that task and this one, and store it in.
                                 Debug.Assert(failedOrCanceled is Task, $"Expected Task, got {failedOrCanceled}");
-                                if (Interlocked.CompareExchange(ref _failedOrCanceled, new List<Task> { (Task)failedOrCanceled, completedTask }, failedOrCanceled) == failedOrCanceled)
+                                Task first = (Task)failedOrCanceled;
+                                failedOrCanceled = Interlocked.CompareExchange(ref m_stateObject, new List<Task> { first, completedTask }, first);
+                                if (failedOrCanceled == first)
                                 {
                                     break;
                                 }
 
                                 // We lost the race, which means we should loop around one more time and it'll be a list.
-                                Debug.Assert(_failedOrCanceled is List<Task>);
+                                Debug.Assert(failedOrCanceled is List<Task>);
                             }
                         }
                     }
@@ -6001,7 +6019,7 @@ public void Invoke(Task? completedTask)
                 // Decrement the count, and only continue to complete the promise if we're the last one.
                 if (Interlocked.Decrement(ref _remainingToComplete) == 0)
                 {
-                    object? failedOrCanceled = _failedOrCanceled;
+                    object? failedOrCanceled = m_stateObject;
                     if (failedOrCanceled is null)
                     {
                         if (TplEventSource.Log.IsEnabled())
@@ -6642,6 +6660,191 @@ public static Task<Task<TResult>> WhenAny<TResult>(IEnumerable<Task<TResult>> ta
             WhenAny<Task<TResult>>(tasks);
         #endregion
 
+        #region WhenEach
+        /// <summary>Creates an <see cref="IAsyncEnumerable{T}"/> that will yield the supplied tasks as those tasks complete.</summary>
+        /// <param name="tasks">The task to iterate through when completed.</param>
+        /// <returns>An <see cref="IAsyncEnumerable{T}"/> for iterating through the supplied tasks.</returns>
+        /// <remarks>
+        /// The supplied tasks will become available to be output via the enumerable once they've completed. The exact order
+        /// in which the tasks will become available is not defined.
+        /// </remarks>
+        /// <exception cref="ArgumentNullException"><paramref name="tasks"/> is null.</exception>
+        /// <exception cref="ArgumentException"><paramref name="tasks"/> contains a null.</exception>
+        public static IAsyncEnumerable<Task> WhenEach(params Task[] tasks)
+        {
+            ArgumentNullException.ThrowIfNull(tasks);
+            return WhenEach((ReadOnlySpan<Task>)tasks);
+        }
+
+        /// <inheritdoc cref="WhenEach(Task[])"/>
+        public static IAsyncEnumerable<Task> WhenEach(ReadOnlySpan<Task> tasks) => // TODO https://github.com/dotnet/runtime/issues/77873: Add params
+            WhenEachState.Iterate<Task>(WhenEachState.Create(tasks));
+
+        /// <inheritdoc cref="WhenEach(Task[])"/>
+        public static IAsyncEnumerable<Task> WhenEach(IEnumerable<Task> tasks) =>
+            WhenEachState.Iterate<Task>(WhenEachState.Create(tasks));
+
+        /// <inheritdoc cref="WhenEach(Task[])"/>
+        public static IAsyncEnumerable<Task<TResult>> WhenEach<TResult>(params Task<TResult>[] tasks)
+        {
+            ArgumentNullException.ThrowIfNull(tasks);
+            return WhenEach((ReadOnlySpan<Task<TResult>>)tasks);
+        }
+
+        /// <inheritdoc cref="WhenEach(Task[])"/>
+        public static IAsyncEnumerable<Task<TResult>> WhenEach<TResult>(ReadOnlySpan<Task<TResult>> tasks) => // TODO https://github.com/dotnet/runtime/issues/77873: Add params
+            WhenEachState.Iterate<Task<TResult>>(WhenEachState.Create(ReadOnlySpan<Task>.CastUp(tasks)));
+
+        /// <inheritdoc cref="WhenEach(Task[])"/>
+        public static IAsyncEnumerable<Task<TResult>> WhenEach<TResult>(IEnumerable<Task<TResult>> tasks) =>
+            WhenEachState.Iterate<Task<TResult>>(WhenEachState.Create(tasks));
+
+        /// <summary>Object used by <see cref="Iterate"/> to store its state.</summary>
+        private sealed class WhenEachState : Queue<Task>, IValueTaskSource, ITaskCompletionAction
+        {
+            /// <summary>Implementation backing the ValueTask used to wait for the next task to be available.</summary>
+            /// <remarks>This is a mutable struct. Do not make it readonly.</remarks>
+            private ManualResetValueTaskSourceCore<bool> _waitForNextCompletedTask = new() { RunContinuationsAsynchronously = true }; // _waitForNextCompletedTask.Set is called while holding a lock
+            /// <summary>0 if this has never been used in an iteration; 1 if it has.</summary>
+            /// <remarks>This is used to ensure we only ever iterate through the tasks once.</remarks>
+            private int _enumerated;
+
+            /// <summary>Called at the beginning of the iterator to assume ownership of the state.</summary>
+            /// <returns>true if the caller owns the state; false if the caller should end immediately.</returns>
+            public bool TryStart() => Interlocked.Exchange(ref _enumerated, 1) == 0;
+
+            /// <summary>Gets or sets the number of tasks that haven't yet been yielded.</summary>
+            public int Remaining { get; set; }
+
+            void ITaskCompletionAction.Invoke(Task completingTask)
+            {
+                lock (this)
+                {
+                    // Enqueue the task into the queue. If the Count is now 1, we transitioned from
+                    // empty to non-empty, which means we need to signal the MRVTSC, as the consumer
+                    // could be waiting on a ValueTask representing a completed task being available.
+                    Enqueue(completingTask);
+                    if (Count == 1)
+                    {
+                        Debug.Assert(_waitForNextCompletedTask.GetStatus(_waitForNextCompletedTask.Version) == ValueTaskSourceStatus.Pending);
+                        _waitForNextCompletedTask.SetResult(default);
+                    }
+                }
+            }
+            bool ITaskCompletionAction.InvokeMayRunArbitraryCode => false;
+
+            // Delegate to _waitForNextCompletedTask for IValueTaskSource implementation.
+            void IValueTaskSource.GetResult(short token) => _waitForNextCompletedTask.GetResult(token);
+            ValueTaskSourceStatus IValueTaskSource.GetStatus(short token) => _waitForNextCompletedTask.GetStatus(token);
+            void IValueTaskSource.OnCompleted(Action<object?> continuation, object? state, short token, ValueTaskSourceOnCompletedFlags flags) =>
+                _waitForNextCompletedTask.OnCompleted(continuation, state, token, flags);
+
+            /// <summary>Creates a <see cref="WhenEachState"/> from the specified tasks.</summary>
+            public static WhenEachState? Create(ReadOnlySpan<Task> tasks)
+            {
+                WhenEachState? waiter = null;
+
+                if (tasks.Length != 0)
+                {
+                    waiter = new();
+                    foreach (Task task in tasks)
+                    {
+                        if (task is null)
+                        {
+                            ThrowHelper.ThrowArgumentException(ExceptionResource.Task_MultiTaskContinuation_NullTask, ExceptionArgument.tasks);
+                        }
+
+                        waiter.Remaining++;
+                        task.AddCompletionAction(waiter);
+                    }
+                }
+
+                return waiter;
+            }
+
+            /// <inheritdoc cref="Create(ReadOnlySpan{Task})"/>
+            public static WhenEachState? Create(IEnumerable<Task> tasks)
+            {
+                ArgumentNullException.ThrowIfNull(tasks);
+
+                WhenEachState? waiter = null;
+
+                IEnumerator<Task> e = tasks.GetEnumerator();
+                if (e.MoveNext())
+                {
+                    waiter = new();
+                    do
+                    {
+                        Task task = e.Current;
+                        if (task is null)
+                        {
+                            ThrowHelper.ThrowArgumentException(ExceptionResource.Task_MultiTaskContinuation_NullTask, ExceptionArgument.tasks);
+                        }
+
+                        waiter.Remaining++;
+                        task.AddCompletionAction(waiter);
+                    }
+                    while (e.MoveNext());
+                }
+
+                return waiter;
+            }
+
+            /// <summary>Iterates through the tasks represented by the provided waiter.</summary>
+            public static async IAsyncEnumerable<T> Iterate<T>(WhenEachState? waiter, [EnumeratorCancellation] CancellationToken cancellationToken = default) where T : Task
+            {
+                // The enumerable could have GetAsyncEnumerator called on it multiple times. As we're dealing with Tasks that
+                // only ever transition from non-completed to completed, re-enumeration doesn't have much benefit, so we take
+                // advantage of the optimizations possible by not supporting that and simply have the semantics that, no matter
+                // how many times the enumerable is enumerated, every task is yielded only once. The original GetAsyncEnumerator
+                // call will give back all the tasks, and all subsequent iterations will be empty.
+                if (waiter?.TryStart() is not true)
+                {
+                    yield break;
+                }
+
+                // Loop until we've yielded all tasks.
+                while (waiter.Remaining > 0)
+                {
+                    // Either get the next completed task from the queue, or get a
+                    // ValueTask with which to wait for the next task to complete.
+                    Task? next;
+                    ValueTask waitTask = default;
+                    lock (waiter)
+                    {
+                        // Reset the MRVTSC if it was signaled, then try to dequeue a task and
+                        // either return one we got or return a ValueTask that will be signaled
+                        // when the next completed task is available.
+                        waiter._waitForNextCompletedTask.Reset();
+                        if (!waiter.TryDequeue(out next))
+                        {
+                            waitTask = new(waiter, waiter._waitForNextCompletedTask.Version);
+                        }
+                    }
+
+                    // If we got a completed Task, yield it.
+                    if (next is not null)
+                    {
+                        cancellationToken.ThrowIfCancellationRequested();
+                        waiter.Remaining--;
+                        yield return (T)next;
+                        continue;
+                    }
+
+                    // If we have a cancellation token and the ValueTask isn't already completed,
+                    // get a Task from the ValueTask so we can use WaitAsync to make the wait cancelable.
+                    // Otherwise, just await the ValueTask directly. We don't need to be concerned
+                    // about suppressing exceptions, as the ValueTask is only ever completed successfully.
+                    if (cancellationToken.CanBeCanceled && !waitTask.IsCompleted)
+                    {
+                        waitTask = new ValueTask(waitTask.AsTask().WaitAsync(cancellationToken));
+                    }
+                    await waitTask.ConfigureAwait(false);
+                }
+            }
+        }
+        #endregion
+
         internal static Task<TResult> CreateUnwrapPromise<TResult>(Task outerTask, bool lookForOce)
         {
             Debug.Assert(outerTask != null);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/TplEventSource.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/TplEventSource.cs
index c80e3f300e5d..048306cdf507 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/TplEventSource.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/Tasks/TplEventSource.cs
@@ -502,7 +502,7 @@ public void TraceSynchronousWorkEnd(CausalitySynchronousWork Work)
         }
 
         [NonEvent]
-        public unsafe void RunningContinuation(int TaskID, object Object) { RunningContinuation(TaskID, (long)*((void**)Unsafe.AsPointer(ref Object))); }
+        public unsafe void RunningContinuation(int TaskID, object Object) => RunningContinuation(TaskID, ObjectIDForEvents(Object));
         [Event(20, Keywords = Keywords.Debug)]
         private void RunningContinuation(int TaskID, long Object)
         {
@@ -511,7 +511,7 @@ private void RunningContinuation(int TaskID, long Object)
         }
 
         [NonEvent]
-        public unsafe void RunningContinuationList(int TaskID, int Index, object Object) { RunningContinuationList(TaskID, Index, (long)*((void**)Unsafe.AsPointer(ref Object))); }
+        public unsafe void RunningContinuationList(int TaskID, int Index, object Object) => RunningContinuationList(TaskID, Index, ObjectIDForEvents(Object));
 
         [Event(21, Keywords = Keywords.Debug)]
         public void RunningContinuationList(int TaskID, int Index, long Object)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/Thread.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/Thread.cs
index 1e400eec097b..3ef77076a019 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/Thread.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/Thread.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
+using System.ComponentModel;
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Globalization;
@@ -542,41 +543,93 @@ public void SetCompressedStack(CompressedStack stack)
         public static void MemoryBarrier() => Interlocked.MemoryBarrier();
         public static void Sleep(TimeSpan timeout) => Sleep(WaitHandle.ToTimeoutMilliseconds(timeout));
 
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static byte VolatileRead(ref byte address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static double VolatileRead(ref double address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static short VolatileRead(ref short address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static int VolatileRead(ref int address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static long VolatileRead(ref long address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static IntPtr VolatileRead(ref IntPtr address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         [return: NotNullIfNotNull(nameof(address))]
         public static object? VolatileRead([NotNullIfNotNull(nameof(address))] ref object? address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         [CLSCompliant(false)]
         public static sbyte VolatileRead(ref sbyte address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static float VolatileRead(ref float address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         [CLSCompliant(false)]
         public static ushort VolatileRead(ref ushort address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         [CLSCompliant(false)]
         public static uint VolatileRead(ref uint address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         [CLSCompliant(false)]
         public static ulong VolatileRead(ref ulong address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         [CLSCompliant(false)]
         public static UIntPtr VolatileRead(ref UIntPtr address) => Volatile.Read(ref address);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static void VolatileWrite(ref byte address, byte value) => Volatile.Write(ref address, value);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static void VolatileWrite(ref double address, double value) => Volatile.Write(ref address, value);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static void VolatileWrite(ref short address, short value) => Volatile.Write(ref address, value);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static void VolatileWrite(ref int address, int value) => Volatile.Write(ref address, value);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static void VolatileWrite(ref long address, long value) => Volatile.Write(ref address, value);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static void VolatileWrite(ref IntPtr address, IntPtr value) => Volatile.Write(ref address, value);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static void VolatileWrite([NotNullIfNotNull(nameof(value))] ref object? address, object? value) => Volatile.Write(ref address, value);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         [CLSCompliant(false)]
         public static void VolatileWrite(ref sbyte address, sbyte value) => Volatile.Write(ref address, value);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public static void VolatileWrite(ref float address, float value) => Volatile.Write(ref address, value);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         [CLSCompliant(false)]
         public static void VolatileWrite(ref ushort address, ushort value) => Volatile.Write(ref address, value);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         [CLSCompliant(false)]
         public static void VolatileWrite(ref uint address, uint value) => Volatile.Write(ref address, value);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         [CLSCompliant(false)]
         public static void VolatileWrite(ref ulong address, ulong value) => Volatile.Write(ref address, value);
+        [Obsolete(Obsoletions.ThreadVolatileReadWriteMessage, DiagnosticId = Obsoletions.ThreadVolatileReadWriteDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
         [CLSCompliant(false)]
         public static void VolatileWrite(ref UIntPtr address, UIntPtr value) => Volatile.Write(ref address, value);
 
@@ -676,26 +729,46 @@ public static int GetCurrentProcessorId()
         [ThreadStatic]
         public static bool ThrowOnBlockingWaitOnJSInteropThread;
 
-        public static void AssureBlockingPossible()
+        [ThreadStatic]
+        public static bool WarnOnBlockingWaitOnJSInteropThread;
+
+#pragma warning disable CS3001
+        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        private static extern unsafe void WarnAboutBlockingWait(char* stack, int length);
+
+        public static unsafe void AssureBlockingPossible()
         {
             if (ThrowOnBlockingWaitOnJSInteropThread)
             {
                 throw new PlatformNotSupportedException(SR.WasmThreads_BlockingWaitNotSupportedOnJSInterop);
             }
+            else if (WarnOnBlockingWaitOnJSInteropThread)
+            {
+                var st = $"Blocking the thread with JS interop is dangerous and could lead to deadlock. ManagedThreadId: {Environment.CurrentManagedThreadId}\n{Environment.StackTrace}";
+                fixed (char* stack = st)
+                {
+                    WarnAboutBlockingWait(stack, st.Length);
+                }
+            }
         }
 
+#pragma warning restore CS3001
+
         public static void ForceBlockingWait(Action<object?> action, object? state = null)
         {
             var flag = ThrowOnBlockingWaitOnJSInteropThread;
+            var wflag = WarnOnBlockingWaitOnJSInteropThread;
             try
             {
                 ThrowOnBlockingWaitOnJSInteropThread = false;
+                WarnOnBlockingWaitOnJSInteropThread = false;
 
                 action(state);
             }
             finally
             {
                 ThrowOnBlockingWaitOnJSInteropThread = flag;
+                WarnOnBlockingWaitOnJSInteropThread = wflag;
             }
         }
 #endif
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/WaitHandle.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/WaitHandle.cs
index 21920bc39b75..d215a82cd323 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/WaitHandle.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/WaitHandle.cs
@@ -117,6 +117,10 @@ internal bool WaitOneNoCheck(
             SafeWaitHandle? waitHandle = _waitHandle;
             ObjectDisposedException.ThrowIf(waitHandle is null, this);
 
+#if FEATURE_WASM_MANAGED_THREADS
+            Thread.AssureBlockingPossible();
+#endif
+
             bool success = false;
             try
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/WaitSubsystem.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/WaitSubsystem.Unix.cs
index d8cb5da15cd9..4a349d8b3031 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Threading/WaitSubsystem.Unix.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Threading/WaitSubsystem.Unix.cs
@@ -344,7 +344,6 @@ public static int Wait(
             bool waitForAll,
             int timeoutMilliseconds)
         {
-            Debug.Assert(waitHandles != null);
             Debug.Assert(waitHandles.Length > 0);
             Debug.Assert(waitHandles.Length <= WaitHandle.MaxWaitHandles);
             Debug.Assert(timeoutMilliseconds >= -1);
diff --git a/src/libraries/System.Private.CoreLib/src/System/ThrowHelper.cs b/src/libraries/System.Private.CoreLib/src/System/ThrowHelper.cs
index b158acb9c7cf..eb9b5b7eb026 100644
--- a/src/libraries/System.Private.CoreLib/src/System/ThrowHelper.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/ThrowHelper.cs
@@ -53,6 +53,12 @@ namespace System
     [StackTraceHidden]
     internal static class ThrowHelper
     {
+        [DoesNotReturn]
+        internal static void ThrowArithmeticException(string message)
+        {
+            throw new ArithmeticException(message);
+        }
+
         [DoesNotReturn]
         internal static void ThrowAccessViolationException()
         {
diff --git a/src/libraries/System.Private.CoreLib/src/System/TimeSpan.cs b/src/libraries/System.Private.CoreLib/src/System/TimeSpan.cs
index ed952a89499d..98d6b9523f22 100644
--- a/src/libraries/System.Private.CoreLib/src/System/TimeSpan.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/TimeSpan.cs
@@ -308,6 +308,195 @@ public TimeSpan Duration()
 
         public override int GetHashCode() => _ticks.GetHashCode();
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TimeSpan FromUnits(long units, long ticksPerUnit, long minUnits, long maxUnits)
+        {
+            System.Diagnostics.Debug.Assert(minUnits < 0);
+            System.Diagnostics.Debug.Assert(maxUnits > 0);
+
+            if (units > maxUnits || units < minUnits)
+            {
+                ThrowHelper.ThrowArgumentOutOfRange_TimeSpanTooLong();
+            }
+            return TimeSpan.FromTicks(units * ticksPerUnit);
+        }
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="TimeSpan"/> structure to a specified number of
+        /// days.
+        /// </summary>
+        /// <param name="days">Number of days.</param>
+        /// <returns>Returns a <see cref="TimeSpan"/> that represents a specified number of days.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">
+        /// The parameters specify a <see cref="TimeSpan"/> value less than <see cref="MinValue"/> or greater than <see cref="MaxValue"/>
+        /// </exception>
+        public static TimeSpan FromDays(int days) => FromUnits(days, TicksPerDay, MinDays, MaxDays);
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="TimeSpan"/> structure to a specified number of
+        /// days, hours, minutes, seconds, milliseconds, and microseconds.
+        /// </summary>
+        /// <param name="days">Number of days.</param>
+        /// <param name="hours">Number of hours.</param>
+        /// <param name="minutes">Number of minutes.</param>
+        /// <param name="seconds">Number of seconds.</param>
+        /// <param name="milliseconds">Number of milliseconds.</param>
+        /// <param name="microseconds">Number of microseconds.</param>
+        /// <returns>Returns a <see cref="TimeSpan"/> that represents a specified number of days, hours, minutes, seconds, milliseconds, and microseconds.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">
+        /// The parameters specify a <see cref="TimeSpan"/> value less than <see cref="MinValue"/> or greater than <see cref="MaxValue"/>
+        /// </exception>
+        public static TimeSpan FromDays(int days, int hours = 0, long minutes = 0, long seconds = 0, long milliseconds = 0, long microseconds = 0)
+        {
+            Int128 totalMicroseconds = Math.BigMul(days, MicrosecondsPerDay)
+                                     + Math.BigMul(hours, MicrosecondsPerHour)
+                                     + Math.BigMul(minutes, MicrosecondsPerMinute)
+                                     + Math.BigMul(seconds, MicrosecondsPerSecond)
+                                     + Math.BigMul(milliseconds, MicrosecondsPerMillisecond)
+                                     + microseconds;
+
+            return FromMicroseconds(totalMicroseconds);
+        }
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="TimeSpan"/> structure to a specified number of
+        /// hours.
+        /// </summary>
+        /// <param name="hours">Number of hours.</param>
+        /// <returns>Returns a <see cref="TimeSpan"/> that represents a specified number of hours.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">
+        /// The parameters specify a <see cref="TimeSpan"/> value less than <see cref="MinValue"/> or greater than <see cref="MaxValue"/>
+        /// </exception>
+        public static TimeSpan FromHours(int hours) => FromUnits(hours, TicksPerHour, MinHours, MaxHours);
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="TimeSpan"/> structure to a specified number of
+        /// hours, minutes, seconds, milliseconds, and microseconds.
+        /// </summary>
+        /// <param name="hours">Number of hours.</param>
+        /// <param name="minutes">Number of minutes.</param>
+        /// <param name="seconds">Number of seconds.</param>
+        /// <param name="milliseconds">Number of milliseconds.</param>
+        /// <param name="microseconds">Number of microseconds.</param>
+        /// <returns>Returns a <see cref="TimeSpan"/> that represents a specified number of hours, minutes, seconds, milliseconds, and microseconds.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">
+        /// The parameters specify a <see cref="TimeSpan"/> value less than <see cref="MinValue"/> or greater than <see cref="MaxValue"/>
+        /// </exception>
+        public static TimeSpan FromHours(int hours, long minutes = 0, long seconds = 0, long milliseconds = 0, long microseconds = 0)
+        {
+            Int128 totalMicroseconds = Math.BigMul(hours, MicrosecondsPerHour)
+                                     + Math.BigMul(minutes, MicrosecondsPerMinute)
+                                     + Math.BigMul(seconds, MicrosecondsPerSecond)
+                                     + Math.BigMul(milliseconds, MicrosecondsPerMillisecond)
+                                     + microseconds;
+
+            return FromMicroseconds(totalMicroseconds);
+        }
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="TimeSpan"/> structure to a specified number of
+        /// minutes.
+        /// </summary>
+        /// <param name="minutes">Number of minutes.</param>
+        /// <returns>Returns a <see cref="TimeSpan"/> that represents a specified number of minutes.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">
+        /// The parameters specify a <see cref="TimeSpan"/> value less than <see cref="MinValue"/> or greater than <see cref="MaxValue"/>
+        /// </exception>
+        public static TimeSpan FromMinutes(long minutes) => FromUnits(minutes, TicksPerMinute, MinMinutes, MaxMinutes);
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="TimeSpan"/> structure to a specified number of
+        /// minutes, seconds, milliseconds, and microseconds.
+        /// </summary>
+        /// <param name="minutes">Number of minutes.</param>
+        /// <param name="seconds">Number of seconds.</param>
+        /// <param name="milliseconds">Number of milliseconds.</param>
+        /// <param name="microseconds">Number of microseconds.</param>
+        /// <returns>Returns a <see cref="TimeSpan"/> that represents a specified number of minutes, seconds, milliseconds, and microseconds.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">
+        /// The parameters specify a <see cref="TimeSpan"/> value less than <see cref="MinValue"/> or greater than <see cref="MaxValue"/>
+        /// </exception>
+        public static TimeSpan FromMinutes(long minutes, long seconds = 0, long milliseconds = 0, long microseconds = 0)
+        {
+            Int128 totalMicroseconds = Math.BigMul(minutes, MicrosecondsPerMinute)
+                                     + Math.BigMul(seconds, MicrosecondsPerSecond)
+                                     + Math.BigMul(milliseconds, MicrosecondsPerMillisecond)
+                                     + microseconds;
+
+            return FromMicroseconds(totalMicroseconds);
+        }
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="TimeSpan"/> structure to a specified number of
+        /// seconds.
+        /// </summary>
+        /// <param name="seconds">Number of seconds.</param>
+        /// <returns>Returns a <see cref="TimeSpan"/> that represents a specified number of seconds.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">
+        /// The parameters specify a <see cref="TimeSpan"/> value less than <see cref="MinValue"/> or greater than <see cref="MaxValue"/>
+        /// </exception>
+        public static TimeSpan FromSeconds(long seconds) => FromUnits(seconds, TicksPerSecond, MinSeconds, MaxSeconds);
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="TimeSpan"/> structure to a specified number of
+        /// seconds, milliseconds, and microseconds.
+        /// </summary>
+        /// <param name="seconds">Number of seconds.</param>
+        /// <param name="milliseconds">Number of milliseconds.</param>
+        /// <param name="microseconds">Number of microseconds.</param>
+        /// <returns>Returns a <see cref="TimeSpan"/> that represents a specified number of seconds, milliseconds, and microseconds.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">
+        /// The parameters specify a <see cref="TimeSpan"/> value less than <see cref="MinValue"/> or greater than <see cref="MaxValue"/>
+        /// </exception>
+        public static TimeSpan FromSeconds(long seconds, long milliseconds = 0, long microseconds = 0)
+        {
+            Int128 totalMicroseconds = Math.BigMul(seconds, MicrosecondsPerSecond)
+                                     + Math.BigMul(milliseconds, MicrosecondsPerMillisecond)
+                                     + microseconds;
+
+            return FromMicroseconds(totalMicroseconds);
+        }
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="TimeSpan"/> structure to a specified number of
+        /// milliseconds, and microseconds.
+        /// </summary>
+        /// <param name="milliseconds">Number of milliseconds.</param>
+        /// <param name="microseconds">Number of microseconds.</param>
+        /// <returns>Returns a <see cref="TimeSpan"/> that represents a specified number of milliseconds, and microseconds.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">
+        /// The parameters specify a <see cref="TimeSpan"/> value less than <see cref="MinValue"/> or greater than <see cref="MaxValue"/>
+        /// </exception>
+        public static TimeSpan FromMilliseconds(long milliseconds, long microseconds = 0)
+        {
+            Int128 totalMicroseconds = Math.BigMul(milliseconds, MicrosecondsPerMillisecond)
+                                     + microseconds;
+
+            return FromMicroseconds(totalMicroseconds);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TimeSpan FromMicroseconds(Int128 microseconds)
+        {
+            if ((microseconds > MaxMicroseconds) || (microseconds < MinMicroseconds))
+            {
+                ThrowHelper.ThrowArgumentOutOfRange_TimeSpanTooLong();
+            }
+            long ticks = (long)microseconds * TicksPerMicrosecond;
+            return TimeSpan.FromTicks(ticks);
+        }
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="TimeSpan"/> structure to a specified number of
+        /// microseconds.
+        /// </summary>
+        /// <param name="microseconds">Number of microseconds.</param>
+        /// <returns>Returns a <see cref="TimeSpan"/> that represents a specified number of microseconds.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">
+        /// The parameters specify a <see cref="TimeSpan"/> value less than <see cref="MinValue"/> or greater than <see cref="MaxValue"/>
+        /// </exception>
+        public static TimeSpan FromMicroseconds(long microseconds) => FromUnits(microseconds, TicksPerMicrosecond, MinMicroseconds, MaxMicroseconds);
+
         public static TimeSpan FromHours(double value) => Interval(value, TicksPerHour);
 
         private static TimeSpan Interval(double value, double scale)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Type.cs b/src/libraries/System.Private.CoreLib/src/System/Type.cs
index 17e4636296c1..24be352778c8 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Type.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Type.cs
@@ -135,7 +135,15 @@ public bool IsPrimitive
             get => IsPrimitiveImpl();
         }
         protected abstract bool IsPrimitiveImpl();
-        public bool IsValueType { [Intrinsic] get => IsValueTypeImpl(); }
+        public bool IsValueType
+        {
+#if NATIVEAOT
+            // https://github.com/dotnet/runtime/issues/97272
+            [MethodImpl(MethodImplOptions.NoOptimization)]
+#endif
+            [Intrinsic]
+            get => IsValueTypeImpl();
+        }
         protected virtual bool IsValueTypeImpl() => IsSubclassOf(typeof(ValueType));
 
         [Intrinsic]
diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Runtime/Serialization/DataContract.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Runtime/Serialization/DataContract.cs
index bcf5c1064e31..79bfc1a51f2a 100644
--- a/src/libraries/System.Private.DataContractSerialization/src/System/Runtime/Serialization/DataContract.cs
+++ b/src/libraries/System.Private.DataContractSerialization/src/System/Runtime/Serialization/DataContract.cs
@@ -1516,8 +1516,8 @@ private static string GetDefaultDataContractNamespace(Type type)
         {
             string? clrNs = type.Namespace ?? string.Empty;
             string? ns =
-                GetGlobalDataContractNamespace(clrNs, type.Module.GetCustomAttributes(typeof(ContractNamespaceAttribute)).ToArray()) ??
-                GetGlobalDataContractNamespace(clrNs, type.Assembly.GetCustomAttributes(typeof(ContractNamespaceAttribute)).ToArray());
+                GetGlobalDataContractNamespace(clrNs, type.Module.GetCustomAttributes<ContractNamespaceAttribute>().ToArray()) ??
+                GetGlobalDataContractNamespace(clrNs, type.Assembly.GetCustomAttributes<ContractNamespaceAttribute>().ToArray());
 
             if (ns == null)
             {
@@ -2228,7 +2228,7 @@ private static bool IsMemberVisibleInSerializationModule(MemberInfo member)
         /// </SecurityNote>
         internal static bool IsAssemblyFriendOfSerialization(Assembly assembly)
         {
-            InternalsVisibleToAttribute[] internalsVisibleAttributes = (InternalsVisibleToAttribute[])assembly.GetCustomAttributes(typeof(InternalsVisibleToAttribute));
+            InternalsVisibleToAttribute[] internalsVisibleAttributes = (InternalsVisibleToAttribute[])assembly.GetCustomAttributes<InternalsVisibleToAttribute>();
             foreach (InternalsVisibleToAttribute internalsVisibleAttribute in internalsVisibleAttributes)
             {
                 string internalsVisibleAttributeAssemblyName = internalsVisibleAttribute.AssemblyName;
diff --git a/src/libraries/System.Private.Uri/src/System/UriExt.cs b/src/libraries/System.Private.Uri/src/System/UriExt.cs
index 1aedf02d9299..6f2f61c76721 100644
--- a/src/libraries/System.Private.Uri/src/System/UriExt.cs
+++ b/src/libraries/System.Private.Uri/src/System/UriExt.cs
@@ -1,9 +1,11 @@
-// Licensed to the .NET Foundation under one or more agreements.
+﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Globalization;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 using System.Text;
 
 namespace System
@@ -552,28 +554,122 @@ internal unsafe bool InternalIsWellFormedOriginalString()
             return true;
         }
 
+        /// <summary>Converts a string to its unescaped representation.</summary>
+        /// <param name="stringToUnescape">The string to unescape.</param>
+        /// <returns>The unescaped representation of <paramref name="stringToUnescape"/>.</returns>
         public static string UnescapeDataString(string stringToUnescape)
         {
             ArgumentNullException.ThrowIfNull(stringToUnescape);
 
-            if (stringToUnescape.Length == 0)
-                return string.Empty;
+            return UnescapeDataString(stringToUnescape, stringToUnescape);
+        }
 
-            int position = stringToUnescape.IndexOf('%');
-            if (position == -1)
-                return stringToUnescape;
+        /// <summary>Converts a span to its unescaped representation.</summary>
+        /// <param name="charsToUnescape">The span to unescape.</param>
+        /// <returns>The unescaped representation of <paramref name="charsToUnescape"/>.</returns>
+        public static string UnescapeDataString(ReadOnlySpan<char> charsToUnescape)
+        {
+            return UnescapeDataString(charsToUnescape, backingString: null);
+        }
+
+        private static string UnescapeDataString(ReadOnlySpan<char> charsToUnescape, string? backingString = null)
+        {
+            Debug.Assert(backingString is null || backingString.Length == charsToUnescape.Length);
+
+            int indexOfFirstToUnescape = charsToUnescape.IndexOf('%');
+            if (indexOfFirstToUnescape < 0)
+            {
+                // Nothing to unescape, just return the original value.
+                return backingString ?? charsToUnescape.ToString();
+            }
 
             var vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]);
-            vsb.EnsureCapacity(stringToUnescape.Length);
 
-            vsb.Append(stringToUnescape.AsSpan(0, position));
+            // We may throw for very large inputs (when growing the ValueStringBuilder).
+            vsb.EnsureCapacity(charsToUnescape.Length - indexOfFirstToUnescape);
+
             UriHelper.UnescapeString(
-                stringToUnescape, position, stringToUnescape.Length, ref vsb,
+                charsToUnescape.Slice(indexOfFirstToUnescape), ref vsb,
                 c_DummyChar, c_DummyChar, c_DummyChar,
                 UnescapeMode.Unescape | UnescapeMode.UnescapeAll,
                 syntax: null, isQuery: false);
 
-            return vsb.ToString();
+            string result = string.Concat(charsToUnescape.Slice(0, indexOfFirstToUnescape), vsb.AsSpan());
+            vsb.Dispose();
+            return result;
+        }
+
+        /// <summary>Attempts to convert a span to its unescaped representation.</summary>
+        /// <param name="charsToUnescape">The span to unescape.</param>
+        /// <param name="destination">The output span that contains the unescaped result of the operation.</param>
+        /// <param name="charsWritten">When this method returns, contains the number of chars that were written into <paramref name="destination"/>.</param>
+        /// <returns><see langword="true"/> if the <paramref name="destination"/> was large enough to hold the entire result; otherwise, <see langword="false"/>.</returns>
+        public static bool TryUnescapeDataString(ReadOnlySpan<char> charsToUnescape, Span<char> destination, out int charsWritten)
+        {
+            int indexOfFirstToUnescape = charsToUnescape.IndexOf('%');
+            if (indexOfFirstToUnescape < 0)
+            {
+                // Nothing to unescape, just copy the original chars.
+                if (charsToUnescape.TryCopyTo(destination))
+                {
+                    charsWritten = charsToUnescape.Length;
+                    return true;
+                }
+
+                charsWritten = 0;
+                return false;
+            }
+
+            // We may throw for very large inputs (when growing the ValueStringBuilder).
+            scoped ValueStringBuilder vsb;
+
+            // If the input and destination buffers overlap, we must take care not to overwrite parts of the input before we've processed it.
+            // If the buffers start at the same location, we can still use the destination as the output length is strictly <= input length.
+            bool overlapped = charsToUnescape.Overlaps(destination) &&
+                !Unsafe.AreSame(ref MemoryMarshal.GetReference(charsToUnescape), ref MemoryMarshal.GetReference(destination));
+
+            if (overlapped)
+            {
+                vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]);
+                vsb.EnsureCapacity(charsToUnescape.Length - indexOfFirstToUnescape);
+            }
+            else
+            {
+                vsb = new ValueStringBuilder(destination.Slice(indexOfFirstToUnescape));
+            }
+
+            UriHelper.UnescapeString(
+                charsToUnescape.Slice(indexOfFirstToUnescape), ref vsb,
+                c_DummyChar, c_DummyChar, c_DummyChar,
+                UnescapeMode.Unescape | UnescapeMode.UnescapeAll,
+                syntax: null, isQuery: false);
+
+            int newLength = indexOfFirstToUnescape + vsb.Length;
+            Debug.Assert(newLength <= charsToUnescape.Length);
+
+            if (destination.Length >= newLength)
+            {
+                charsToUnescape.Slice(0, indexOfFirstToUnescape).CopyTo(destination);
+
+                if (overlapped)
+                {
+                    vsb.AsSpan().CopyTo(destination.Slice(indexOfFirstToUnescape));
+                    vsb.Dispose();
+                }
+                else
+                {
+                    // We are expecting the builder not to grow if the original span was large enough.
+                    // This means that we MUST NOT over allocate anywhere in UnescapeString (e.g. append and then decrease the length).
+                    Debug.Assert(vsb.RawChars.Overlaps(destination));
+                }
+
+                charsWritten = newLength;
+                return true;
+            }
+
+            vsb.Dispose();
+            charsWritten = 0;
+            return false;
         }
 
         // Where stringToEscape is intended to be a completely unescaped URI string.
@@ -584,9 +680,27 @@ public static string EscapeUriString(string stringToEscape) =>
 
         // Where stringToEscape is intended to be URI data, but not an entire URI.
         // This method will escape any character that is not an unreserved character, including percent signs.
+
+        /// <summary>Converts a string to its escaped representation.</summary>
+        /// <param name="stringToEscape">The string to escape.</param>
+        /// <returns>The escaped representation of <paramref name="stringToEscape"/>.</returns>
         public static string EscapeDataString(string stringToEscape) =>
             UriHelper.EscapeString(stringToEscape, checkExistingEscaped: false, UriHelper.Unreserved);
 
+        /// <summary>Converts a span to its escaped representation.</summary>
+        /// <param name="charsToEscape">The span to escape.</param>
+        /// <returns>The escaped representation of <paramref name="charsToEscape"/>.</returns>
+        public static string EscapeDataString(ReadOnlySpan<char> charsToEscape) =>
+            UriHelper.EscapeString(charsToEscape, checkExistingEscaped: false, UriHelper.Unreserved, backingString: null);
+
+        /// <summary>Attempts to convert a span to its escaped representation.</summary>
+        /// <param name="charsToEscape">The span to escape.</param>
+        /// <param name="destination">The output span that contains the escaped result of the operation.</param>
+        /// <param name="charsWritten">When this method returns, contains the number of chars that were written into <paramref name="destination"/>.</param>
+        /// <returns><see langword="true"/> if the <paramref name="destination"/> was large enough to hold the entire result; otherwise, <see langword="false"/>.</returns>
+        public static bool TryEscapeDataString(ReadOnlySpan<char> charsToEscape, Span<char> destination, out int charsWritten) =>
+            UriHelper.TryEscapeDataString(charsToEscape, destination, out charsWritten);
+
         //
         // Cleans up the specified component according to Iri rules
         // a) Chars allowed by iri in a component are unescaped if found escaped
diff --git a/src/libraries/System.Private.Uri/src/System/UriHelper.cs b/src/libraries/System.Private.Uri/src/System/UriHelper.cs
index 8577ed88c84d..fbdfd0314a3e 100644
--- a/src/libraries/System.Private.Uri/src/System/UriHelper.cs
+++ b/src/libraries/System.Private.Uri/src/System/UriHelper.cs
@@ -1,4 +1,4 @@
-// Licensed to the .NET Foundation under one or more agreements.
+﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Buffers;
@@ -118,26 +118,100 @@ internal static unsafe bool TestForSubPath(char* selfPtr, int selfLength, char*
             return true;
         }
 
-        internal static string EscapeString(string stringToEscape, bool checkExistingEscaped, SearchValues<char> noEscape)
+        public static bool TryEscapeDataString(ReadOnlySpan<char> charsToEscape, Span<char> destination, out int charsWritten)
+        {
+            if (destination.Length < charsToEscape.Length)
+            {
+                charsWritten = 0;
+                return false;
+            }
+
+            int indexOfFirstToEscape = charsToEscape.IndexOfAnyExcept(Unreserved);
+            if (indexOfFirstToEscape < 0)
+            {
+                // Nothing to escape, just copy the original chars.
+                charsToEscape.CopyTo(destination);
+                charsWritten = charsToEscape.Length;
+                return true;
+            }
+
+            // We may throw for very large inputs (when growing the ValueStringBuilder).
+            scoped ValueStringBuilder vsb;
+
+            // If the input and destination buffers overlap, we must take care not to overwrite parts of the input before we've processed it.
+            bool overlapped = charsToEscape.Overlaps(destination);
+
+            if (overlapped)
+            {
+                vsb = new ValueStringBuilder(stackalloc char[Uri.StackallocThreshold]);
+                vsb.EnsureCapacity(charsToEscape.Length);
+            }
+            else
+            {
+                vsb = new ValueStringBuilder(destination.Slice(indexOfFirstToEscape));
+            }
+
+            EscapeStringToBuilder(charsToEscape.Slice(indexOfFirstToEscape), ref vsb, Unreserved, checkExistingEscaped: false);
+
+            int newLength = checked(indexOfFirstToEscape + vsb.Length);
+            Debug.Assert(newLength > charsToEscape.Length);
+
+            if (destination.Length >= newLength)
+            {
+                charsToEscape.Slice(0, indexOfFirstToEscape).CopyTo(destination);
+
+                if (overlapped)
+                {
+                    vsb.AsSpan().CopyTo(destination.Slice(indexOfFirstToEscape));
+                    vsb.Dispose();
+                }
+                else
+                {
+                    // We are expecting the builder not to grow if the original span was large enough.
+                    // This means that we MUST NOT over allocate anywhere in EscapeStringToBuilder (e.g. append and then decrease the length).
+                    Debug.Assert(vsb.RawChars.Overlaps(destination));
+                }
+
+                charsWritten = newLength;
+                return true;
+            }
+
+            vsb.Dispose();
+            charsWritten = 0;
+            return false;
+        }
+
+        public static string EscapeString(string stringToEscape, bool checkExistingEscaped, SearchValues<char> noEscape)
         {
             ArgumentNullException.ThrowIfNull(stringToEscape);
 
+            return EscapeString(stringToEscape, checkExistingEscaped, noEscape, stringToEscape);
+        }
+
+        public static string EscapeString(ReadOnlySpan<char> charsToEscape, bool checkExistingEscaped, SearchValues<char> noEscape, string? backingString)
+        {
             Debug.Assert(!noEscape.Contains('%'), "Need to treat % specially; it should be part of any escaped set");
+            Debug.Assert(backingString is null || backingString.Length == charsToEscape.Length);
 
-            int indexOfFirstToEscape = stringToEscape.AsSpan().IndexOfAnyExcept(noEscape);
+            int indexOfFirstToEscape = charsToEscape.IndexOfAnyExcept(noEscape);
             if (indexOfFirstToEscape < 0)
             {
-                // Nothing to escape, just return the original string.
-                return stringToEscape;
+                // Nothing to escape, just return the original value.
+                return backingString ?? charsToEscape.ToString();
             }
 
             // Otherwise, create a ValueStringBuilder to store the escaped data into,
-            // append to it all of the noEscape chars we already iterated through,
-            // escape the rest, and return the result as a string.
+            // escape the rest, and concat the result with the characters we skipped above.
             var vsb = new ValueStringBuilder(stackalloc char[Uri.StackallocThreshold]);
-            vsb.Append(stringToEscape.AsSpan(0, indexOfFirstToEscape));
-            EscapeStringToBuilder(stringToEscape.AsSpan(indexOfFirstToEscape), ref vsb, noEscape, checkExistingEscaped);
-            return vsb.ToString();
+
+            // We may throw for very large inputs (when growing the ValueStringBuilder).
+            vsb.EnsureCapacity(charsToEscape.Length);
+
+            EscapeStringToBuilder(charsToEscape.Slice(indexOfFirstToEscape), ref vsb, noEscape, checkExistingEscaped);
+
+            string result = string.Concat(charsToEscape.Slice(0, indexOfFirstToEscape), vsb.AsSpan());
+            vsb.Dispose();
+            return result;
         }
 
         internal static unsafe void EscapeString(scoped ReadOnlySpan<char> stringToEscape, ref ValueStringBuilder dest,
diff --git a/src/libraries/System.Private.Uri/src/System/ValueStringBuilderExtensions.cs b/src/libraries/System.Private.Uri/src/System/ValueStringBuilderExtensions.cs
index c93ee77fda6a..37a32dbebbcb 100644
--- a/src/libraries/System.Private.Uri/src/System/ValueStringBuilderExtensions.cs
+++ b/src/libraries/System.Private.Uri/src/System/ValueStringBuilderExtensions.cs
@@ -35,8 +35,15 @@ public void Append(Rune rune)
         [MethodImpl(MethodImplOptions.NoInlining)]
         private void GrowAndAppend(Rune rune)
         {
-            Grow(2);
-            Append(rune);
+            if (rune.Value <= 0xFFFF)
+            {
+                Append((char)rune.Value);
+            }
+            else
+            {
+                Grow(2);
+                Append(rune);
+            }
         }
     }
 }
diff --git a/src/libraries/System.Private.Uri/tests/FunctionalTests/PercentEncodingHelperTests.cs b/src/libraries/System.Private.Uri/tests/FunctionalTests/PercentEncodingHelperTests.cs
deleted file mode 100644
index 54161fd38afe..000000000000
--- a/src/libraries/System.Private.Uri/tests/FunctionalTests/PercentEncodingHelperTests.cs
+++ /dev/null
@@ -1,82 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Collections.Generic;
-using Xunit;
-
-namespace System.PrivateUri.Tests
-{
-    public class PercentEncodingHelperTests
-    {
-        private const string OneByteUtf8 = "%41";           // A
-        private const string TwoByteUtf8 = "%C3%BC";        // \u00FC
-        private const string ThreeByteUtf8 = "%E8%AF%B6";   // \u8BF6
-        private const string FourByteUtf8 = "%F0%9F%98%80"; // \uD83D\uDE00
-
-        private const string InvalidOneByteUtf8 = "%FF";
-        private const string OverlongTwoByteUtf8 = "%C1%81";        // A
-        private const string OverlongThreeByteUtf8 = "%E0%83%BC";   // \u00FC
-        private const string OverlongFourByteUtf8 = "%F0%88%AF%B6"; // \u8BF6;
-
-        public static IEnumerable<object[]> PercentEncodedAndDecodedUTF8Sequences()
-        {
-            static object[] Pair(string s1, string s2) => new object[] { s1, s2 };
-
-            yield return Pair(OneByteUtf8, "A");
-            yield return Pair(TwoByteUtf8, "\u00FC");
-            yield return Pair(ThreeByteUtf8, "\u8BF6");
-            yield return Pair(FourByteUtf8, "\uD83D\uDE00");
-
-            yield return Pair(OneByteUtf8 + OneByteUtf8, "AA");
-            yield return Pair(TwoByteUtf8 + TwoByteUtf8, "\u00FC\u00FC");
-            yield return Pair(ThreeByteUtf8 + ThreeByteUtf8, "\u8BF6\u8BF6");
-            yield return Pair(FourByteUtf8 + FourByteUtf8, "\uD83D\uDE00\uD83D\uDE00");
-
-            yield return Pair(OneByteUtf8 + TwoByteUtf8 + OneByteUtf8, "A\u00FCA");
-            yield return Pair(TwoByteUtf8 + ThreeByteUtf8 + TwoByteUtf8, "\u00FC\u8BF6\u00FC");
-
-            yield return Pair(InvalidOneByteUtf8 + OneByteUtf8, InvalidOneByteUtf8 + "A");
-            yield return Pair(OverlongTwoByteUtf8 + TwoByteUtf8, OverlongTwoByteUtf8 + "\u00FC");
-            yield return Pair(OverlongThreeByteUtf8 + ThreeByteUtf8, OverlongThreeByteUtf8 + "\u8BF6");
-            yield return Pair(OverlongFourByteUtf8 + FourByteUtf8, OverlongFourByteUtf8 + "\uD83D\uDE00");
-
-            yield return Pair(InvalidOneByteUtf8, InvalidOneByteUtf8);
-            yield return Pair(InvalidOneByteUtf8 + InvalidOneByteUtf8, InvalidOneByteUtf8 + InvalidOneByteUtf8);
-            yield return Pair(InvalidOneByteUtf8 + InvalidOneByteUtf8 + InvalidOneByteUtf8, InvalidOneByteUtf8 + InvalidOneByteUtf8 + InvalidOneByteUtf8);
-
-            // 11001010 11100100 10001000 10110010 - 2-byte marker followed by 3-byte sequence
-            yield return Pair("%CA" + "%E4%88%B2", "%CA" + '\u4232');
-
-            // 4 valid UTF8 bytes followed by 5 invalid UTF8 bytes
-            yield return Pair("%F4%80%80%BA" + "%FD%80%80%BA%CD", "\U0010003A" + "%FD%80%80%BA%CD");
-
-            // BIDI char
-            yield return Pair("%E2%80%8E", "\u200E");
-
-            // Char Block: 3400..4DBF-CJK Unified Ideographs Extension A
-            yield return Pair("%E4%88%B2", "\u4232");
-
-            // BIDI char followed by a valid 3-byte UTF8 sequence (\u30AF)
-            yield return Pair("%E2%80%8E" + "%E3%82%AF", "\u200E" + "\u30AF");
-
-            // BIDI char followed by invalid UTF8 bytes
-            yield return Pair("%E2%80%8E" + "%F0%90%90", "\u200E" + "%F0%90%90");
-
-            // Input string:                %98%C8%D4%F3 %D4%A8 %7A %CF%DE %41 %16
-            // Valid Unicode sequences:                  %D4%A8 %7A        %41 %16
-            yield return Pair("%98%C8%D4%F3" + "%D4%A8" + "%7A" + "%CF%DE" + "%41" + "%16",
-                "%98%C8%D4%F3" + '\u0528' + 'z' + "%CF%DE" + 'A' + '\x16');
-
-            // 2-byte marker, valid 4-byte sequence, continuation byte
-            yield return Pair("%C6" + "%F3%BC%A1%B8" + "%B5",
-                "%C6" + "\U000FC878" + "%B5");
-        }
-
-        [Theory]
-        [MemberData(nameof(PercentEncodedAndDecodedUTF8Sequences))]
-        public static void UnescapeDataString_UnescapesUtf8Sequences(string stringToUnescape, string expected)
-        {
-            Assert.Equal(expected, Uri.UnescapeDataString(stringToUnescape));
-        }
-    }
-}
diff --git a/src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj b/src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj
index 151547aba2ce..980804ded97c 100644
--- a/src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj
+++ b/src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj
@@ -13,7 +13,6 @@
     <Compile Include="IriRelativeFileResolutionTest.cs" />
     <Compile Include="IriTest.cs" />
     <Compile Include="UncTest.cs" />
-    <Compile Include="PercentEncodingHelperTests.cs" />
     <Compile Include="UriBuilderParameterTest.cs" />
     <Compile Include="UriBuilderRefreshTest.cs" />
     <Compile Include="UriBuilderTests.cs" />
diff --git a/src/libraries/System.Private.Uri/tests/FunctionalTests/UriEscapingTest.cs b/src/libraries/System.Private.Uri/tests/FunctionalTests/UriEscapingTest.cs
index 3e9385206bfe..883f48a420e9 100644
--- a/src/libraries/System.Private.Uri/tests/FunctionalTests/UriEscapingTest.cs
+++ b/src/libraries/System.Private.Uri/tests/FunctionalTests/UriEscapingTest.cs
@@ -24,193 +24,304 @@ public class UriEscapingTest
             "\u6570\u636E eq '\uD840\uDC00\uD840\uDC01\uD840\uDC02\uD840\uDC03\uD869\uDED1\uD869\uDED2\uD869\uDED3"
             + "\uD869\uDED4\uD869\uDED5\uD869\uDED6'";
 
-        #region EscapeDataString
+        #region EscapeUnescapeDataString
 
         [Fact]
-        public void UriEscapingDataString_JustAlphaNumeric_NothingEscaped()
+        public void EscapeUnescapeDataString_NullArgument()
         {
-            string output = Uri.EscapeDataString(AlphaNumeric);
-            Assert.Equal(AlphaNumeric, output);
+            AssertExtensions.Throws<ArgumentNullException>("stringToEscape", () => Uri.EscapeDataString(null));
+            AssertExtensions.Throws<ArgumentNullException>("stringToUnescape", () => Uri.UnescapeDataString(null));
         }
 
-        [Fact]
-        public void UriEscapingDataString_RFC2396Reserved_Escaped()
+        private static IEnumerable<(string Unescaped, string Escaped)> CombinationsWithDifferentSections(string unescaped, string escaped)
         {
-            string input = RFC2396Reserved;
-            string output = Uri.EscapeDataString(input);
-            Assert.Equal(Escape(RFC2396Reserved), output);
-        }
+            yield return (unescaped, escaped);
+            yield return (unescaped + unescaped, escaped + escaped);
 
-        [Fact]
-        public void UriEscapingDataString_RFC3986Unreserved_NothingEscaped()
-        {
-            string input = RFC3986Unreserved;
-            string output = Uri.EscapeDataString(input);
-            Assert.Equal(input, output);
+            foreach ((string padding, string escapedPadding) in new[]
+            {
+                (" ", "%20"), ("abc", "abc"), ("a b%", "a%20b%25"), ("\u00FC", "%C3%BC"), ("\uD83C\uDF49", "%F0%9F%8D%89")
+            })
+            {
+                yield return ($"{padding}{unescaped}", $"{escapedPadding}{escaped}");
+                yield return ($"{unescaped}{padding}", $"{escaped}{escapedPadding}");
+                yield return ($"{padding}{unescaped}{padding}", $"{escapedPadding}{escaped}{escapedPadding}");
+                yield return ($"{unescaped}{padding}{unescaped}", $"{escaped}{escapedPadding}{escaped}");
+                yield return ($"{padding}{unescaped}{padding}{unescaped}{padding}", $"{escapedPadding}{escaped}{escapedPadding}{escaped}{escapedPadding}");
+            }
         }
 
-        [Fact]
-        public void UriEscapingDataString_RFC3986Reserved_Escaped()
+        private static IEnumerable<(string Unescaped, string Escaped)> UriEscapeUnescapeDataStringTestInputs()
         {
-            string input = RFC3986Reserved;
-            string output = Uri.EscapeDataString(input);
-            Assert.Equal(Escape(RFC3986Reserved), output);
-        }
+            yield return ("", "");
+            yield return ("He\\l/lo", "He%5Cl%2Flo");
 
-        [Fact]
-        public void UriEscapingDataString_RFC3986ReservedWithIRI_Escaped()
-        {
-            // Note that \ and % are not officialy reserved, but we treat it as reserved.
-            string input = RFC3986Reserved;
-            string output = Uri.EscapeDataString(input);
-            Assert.Equal(Escape(RFC3986Reserved), output);
-        }
+            yield return (AlphaNumeric, AlphaNumeric);
+            yield return (RFC3986Unreserved, RFC3986Unreserved);
 
-        [Fact]
-        public void UriEscapingDataString_Unicode_Escaped()
-        {
-            string input = "\u30AF";
-            string output = Uri.EscapeDataString(input);
-            Assert.Equal("%E3%82%AF", output);
-        }
+            yield return (RFC2396Reserved, EscapeAscii(RFC2396Reserved));
+            yield return (RFC3986Reserved, EscapeAscii(RFC3986Reserved));
 
-        [Fact]
-        public void UriEscapingDataString_UnicodeWithIRI_Escaped()
-        {
-            string input = "\u30AF";
+            // Note that \ and % are not officially reserved, but we treat it as reserved.
+            yield return (RFC3986Reserved + "\\%", EscapeAscii(RFC3986Reserved + "\\%"));
 
-            string output = Uri.EscapeDataString(input);
-            Assert.Equal("%E3%82%AF", output);
+            yield return ("\u30AF", "%E3%82%AF");
+            yield return (GB18030CertificationString1, "%E6%95%B0%E6%8D%AE%20eq%20%27%F0%A0%80%80%F0%A0%80%81%F0%A0%80%82%F0%A0%80%83%F0%AA%9B%91%F0%AA%9B%92%F0%AA%9B%93%F0%AA%9B%94%F0%AA%9B%95%F0%AA%9B%96%27");
 
-            using (new ThreadCultureChange("zh-cn"))
+            // Test all ASCII that should be escaped
+            for (int i = 0; i < 128; i++)
             {
-                Assert.Equal(output, Uri.EscapeDataString(input)); //, "Same normalized result expected in different locales."
+                if (!RFC3986Unreserved.Contains((char)i))
+                {
+                    string s = new string((char)i, 42);
+                    yield return (s, EscapeAscii(s));
+                }
             }
+
+            // Valid surrogate pairs
+            yield return ("\uD800\uDC00", "%F0%90%80%80");
+            yield return ("\uD83C\uDF49", "%F0%9F%8D%89");
         }
 
-        [Fact]
-        public void UriEscapingDataString_Unicode_SurrogatePair()
+        public static IEnumerable<object[]> UriEscapeDataString_MemberData()
         {
-            string output = Uri.EscapeDataString(GB18030CertificationString1);
-            Assert.Equal(
-                 @"%E6%95%B0%E6%8D%AE%20eq" +
-                "%20%27%F0%A0%80%80%F0%A0%80%81%F0%A0%80%82%F0%A0%80%83%F0%AA%9B%91" +
-                "%F0%AA%9B%92%F0%AA%9B%93%F0%AA%9B%94%F0%AA%9B%95%F0%AA%9B%96%27",
-                output);
+            (string Unescaped, string Escaped)[] pairs =
+            [
+                .. UriEscapeUnescapeDataStringTestInputs(),
 
-            using (new ThreadCultureChange("zh-cn"))
-            {
-                Assert.Equal(output, Uri.EscapeDataString(GB18030CertificationString1)); //"Same normalized result expected in different locales."
-            }
+                // Invalid surrogate pairs
+                ("\uD800", "%EF%BF%BD"),
+                ("abc\uD800", "abc%EF%BF%BD"),
+                ("abc\uD800\uD800abc", "abc%EF%BF%BD%EF%BF%BDabc"),
+                ("\xD800\xD800\xDFFF", "%EF%BF%BD%F0%90%8F%BF"),
+            ];
+
+            return pairs
+                .SelectMany(p => CombinationsWithDifferentSections(p.Unescaped, p.Escaped))
+                .Select(p => new[] { p.Unescaped, p.Escaped });
         }
 
-        public static IEnumerable<object[]> UriEscapeUnescapeDataString_Roundtrip_MemberData()
+        public static IEnumerable<object[]> UriUnescapeDataString_MemberData()
         {
-            // Test the no-longer-existing "c_MaxUriBufferSize" limit of 0xFFF0,
-            // as well as lengths longer than the max Uri length of ushort.MaxValue.
-            foreach (int length in new[] { 1, 0xFFF0, 0xFFF1, ushort.MaxValue + 10 })
-            {
-                yield return new object[] { new string('s', length), string.Concat(Enumerable.Repeat("s", length)) };
-                yield return new object[] { new string('/', length), string.Concat(Enumerable.Repeat("%2F", length)) };
-            }
+            const string OneByteUtf8 = "%41";           // A
+            const string TwoByteUtf8 = "%C3%BC";        // \u00FC
+            const string ThreeByteUtf8 = "%E8%AF%B6";   // \u8BF6
+            const string FourByteUtf8 = "%F0%9F%98%80"; // \uD83D\uDE00
+
+            const string InvalidOneByteUtf8 = "%FF";
+            const string OverlongTwoByteUtf8 = "%C1%81";        // A
+            const string OverlongThreeByteUtf8 = "%E0%83%BC";   // \u00FC
+            const string OverlongFourByteUtf8 = "%F0%88%AF%B6"; // \u8BF6;
+
+            (string Unescaped, string Escaped)[] pairs =
+            [
+                .. UriEscapeUnescapeDataStringTestInputs(),
+
+                // Many combinations that include non-ASCII to test the PercentEncodingHelper
+                ("A", OneByteUtf8),
+                ("\u00FC", TwoByteUtf8),
+                ("\u8BF6", ThreeByteUtf8),
+                ("\uD83D\uDE00", FourByteUtf8),
+
+                ("AA", OneByteUtf8 + OneByteUtf8),
+                ("\u00FC\u00FC", TwoByteUtf8 + TwoByteUtf8),
+                ("\u8BF6\u8BF6", ThreeByteUtf8 + ThreeByteUtf8),
+                ("\uD83D\uDE00\uD83D\uDE00", FourByteUtf8 + FourByteUtf8),
+
+                ("A\u00FCA", OneByteUtf8 + TwoByteUtf8 + OneByteUtf8),
+                ("\u00FC\u8BF6\u00FC", TwoByteUtf8 + ThreeByteUtf8 + TwoByteUtf8),
+
+                (InvalidOneByteUtf8 + "A", InvalidOneByteUtf8 + OneByteUtf8),
+                (OverlongTwoByteUtf8 + "\u00FC", OverlongTwoByteUtf8 + TwoByteUtf8),
+                (OverlongThreeByteUtf8 + "\u8BF6", OverlongThreeByteUtf8 + ThreeByteUtf8),
+                (OverlongFourByteUtf8 + "\uD83D\uDE00", OverlongFourByteUtf8 + FourByteUtf8),
+
+                (InvalidOneByteUtf8, InvalidOneByteUtf8),
+                (InvalidOneByteUtf8 + InvalidOneByteUtf8, InvalidOneByteUtf8 + InvalidOneByteUtf8),
+                (InvalidOneByteUtf8 + InvalidOneByteUtf8 + InvalidOneByteUtf8, InvalidOneByteUtf8 + InvalidOneByteUtf8 + InvalidOneByteUtf8),
+
+                // 11001010 11100100 10001000 10110010 - 2-byte marker followed by 3-byte sequence
+                ("%CA" + '\u4232', "%CA" + "%E4%88%B2"),
+
+                // 4 valid UTF8 bytes followed by 5 invalid UTF8 bytes
+                ("\U0010003A" + "%FD%80%80%BA%CD", "%F4%80%80%BA" + "%FD%80%80%BA%CD"),
+
+                // BIDI char
+                ("\u200E", "%E2%80%8E"),
+
+                // Char Block: 3400..4DBF-CJK Unified Ideographs Extension A
+                ("\u4232", "%E4%88%B2"),
+
+                // BIDI char followed by a valid 3-byte UTF8 sequence (\u30AF)
+                ("\u200E" + "\u30AF", "%E2%80%8E" + "%E3%82%AF"),
+
+                // BIDI char followed by invalid UTF8 bytes
+                ("\u200E" + "%F0%90%90", "%E2%80%8E" + "%F0%90%90"),
+
+                // Input string:                %98%C8%D4%F3 %D4%A8 %7A %CF%DE %41 %16
+                // Valid Unicode sequences:                  %D4%A8 %7A        %41 %16
+                ("%98%C8%D4%F3" + '\u0528' + 'z' + "%CF%DE" + 'A' + '\x16', "%98%C8%D4%F3" + "%D4%A8" + "%7A" + "%CF%DE" + "%41" + "%16"),
+
+                // 2-byte marker, valid 4-byte sequence, continuation byte
+                ("%C6" + "\U000FC878" + "%B5", "%C6" + "%F3%BC%A1%B8" + "%B5"),
+            ];
+
+            return pairs
+                .SelectMany(p => CombinationsWithDifferentSections(p.Unescaped, p.Escaped))
+                .Select(p => new[] { p.Unescaped, p.Escaped });
         }
 
         [Theory]
-        [MemberData(nameof(UriEscapeUnescapeDataString_Roundtrip_MemberData))]
-        public void UriEscapeUnescapeDataString_Roundtrip(string input, string expectedEscaped)
+        [MemberData(nameof(UriEscapeDataString_MemberData))]
+        public void UriEscapeDataString(string unescaped, string escaped)
         {
-            string output = Uri.EscapeDataString(input);
-            Assert.Equal(expectedEscaped, output);
-            Assert.Equal(input, Uri.UnescapeDataString(output));
-        }
+            ValidateEscape(unescaped, escaped);
 
-        #endregion EscapeDataString
+            using (new ThreadCultureChange("zh-cn"))
+            {
+                // Same result expected in different locales.
+                ValidateEscape(unescaped, escaped);
+            }
 
-        #region UnescapeDataString
+            static void ValidateEscape(string input, string expectedOutput)
+            {
+                Assert.True(input.Length <= expectedOutput.Length);
 
-        [Fact]
-        public void UriUnescapingDataString_JustAlphaNumeric_Unescaped()
-        {
-            string output = Uri.UnescapeDataString(Escape(AlphaNumeric));
-            Assert.Equal(AlphaNumeric, output);
-        }
+                // String overload
+                string output = Uri.EscapeDataString(input);
+                Assert.Equal(expectedOutput, output);
 
-        [Fact]
-        public void UriUnescapingDataString_RFC2396Unreserved_Unescaped()
-        {
-            string input = RFC2396Unreserved;
-            string output = Uri.UnescapeDataString(Escape(input));
-            Assert.Equal(input, output);
-        }
+                if (input == expectedOutput)
+                {
+                    Assert.Same(input, output);
+                }
 
-        [Fact]
-        public void UriUnescapingDataString_RFC2396Reserved_Unescaped()
-        {
-            string input = RFC2396Reserved;
-            string output = Uri.UnescapeDataString(Escape(input));
-            Assert.Equal(input, output);
-        }
+                // Span overload
+                output = Uri.EscapeDataString(input.AsSpan());
+                Assert.Equal(expectedOutput, output);
 
-        [Fact]
-        public void UriUnescapingDataString_RFC3986Unreserved_Unescaped()
-        {
-            string input = RFC3986Unreserved;
-            string output = Uri.UnescapeDataString(Escape(input));
-            Assert.Equal(input, output);
-        }
+                char[] destination = new char[expectedOutput.Length + 2];
 
-        [Fact]
-        public void UriUnescapingDataString_RFC3986Reserved_Unescaped()
-        {
-            string input = RFC3986Reserved;
-            string output = Uri.UnescapeDataString(Escape(input));
-            Assert.Equal(input, output);
-        }
+                // Exact destination size
+                Assert.True(Uri.TryEscapeDataString(input, destination.AsSpan(0, expectedOutput.Length), out int charsWritten));
+                Assert.Equal(expectedOutput.Length, charsWritten);
+                Assert.Equal(expectedOutput, destination.AsSpan(0, charsWritten));
 
-        [Fact]
-        public void UriUnescapingDataString_RFC3986ReservedWithIRI_Unescaped()
-        {
-            // Note that \ and % are not officialy reserved, but we treat it as reserved.
-            string input = RFC3986Reserved;
-            string output = Uri.UnescapeDataString(Escape(input));
-            Assert.Equal(input, output);
-        }
+                // Larger destination
+                Assert.True(Uri.TryEscapeDataString(input, destination.AsSpan(1), out charsWritten));
+                Assert.Equal(expectedOutput.Length, charsWritten);
+                Assert.Equal(expectedOutput, destination.AsSpan(1, charsWritten));
 
-        [Fact]
-        public void UriUnescapingDataString_Unicode_Unescaped()
-        {
-            string input = @"\u30AF";
-            string output = Uri.UnescapeDataString(Escape(input));
-            Assert.Equal(input, output);
+                // Destination too small
+                if (expectedOutput.Length > 0)
+                {
+                    Assert.False(Uri.TryEscapeDataString(input, destination.AsSpan(0, expectedOutput.Length - 1), out charsWritten));
+                    Assert.Equal(0, charsWritten);
+                }
+
+                // Overlapped source/destination
+                input.CopyTo(destination);
+                Assert.True(Uri.TryEscapeDataString(destination.AsSpan(0, input.Length), destination, out charsWritten));
+                Assert.Equal(expectedOutput.Length, charsWritten);
+                Assert.Equal(expectedOutput, destination.AsSpan(0, charsWritten));
+
+                // Overlapped source/destination with different starts
+                input.CopyTo(destination.AsSpan(1));
+                Assert.True(Uri.TryEscapeDataString(destination.AsSpan(1, input.Length), destination, out charsWritten));
+                Assert.Equal(expectedOutput.Length, charsWritten);
+                Assert.Equal(expectedOutput, destination.AsSpan(0, charsWritten));
+
+                input.CopyTo(destination);
+                Assert.True(Uri.TryEscapeDataString(destination.AsSpan(0, input.Length), destination.AsSpan(1), out charsWritten));
+                Assert.Equal(expectedOutput.Length, charsWritten);
+                Assert.Equal(expectedOutput, destination.AsSpan(1, charsWritten));
+            }
         }
 
-        [Fact]
-        public void UriUnescapingDataString_UnicodeWithIRI_Unescaped()
+        [Theory]
+        [MemberData(nameof(UriUnescapeDataString_MemberData))]
+        public void UriUnescapeDataString(string unescaped, string escaped)
         {
-            string input = @"\u30AF";
-            string output = Uri.UnescapeDataString(Escape(input));
-            Assert.Equal(input, output);
+            ValidateUnescape(escaped, unescaped);
 
             using (new ThreadCultureChange("zh-cn"))
             {
-                Assert.Equal(output, Uri.UnescapeDataString(Escape(input))); // Same normalized result expected in different locales.
+                // Same result expected in different locales.
+                ValidateUnescape(escaped, unescaped);
+            }
+
+            static void ValidateUnescape(string input, string expectedOutput)
+            {
+                Assert.True(input.Length >= expectedOutput.Length);
+
+                // String overload
+                string output = Uri.UnescapeDataString(input);
+                Assert.Equal(expectedOutput, output);
+
+                // Span overload
+                output = Uri.UnescapeDataString(input.AsSpan());
+                Assert.Equal(expectedOutput, output);
+
+                char[] destination = new char[input.Length + 2];
+
+                // Exact destination size
+                Assert.True(Uri.TryUnescapeDataString(input, destination.AsSpan(0, expectedOutput.Length), out int charsWritten));
+                Assert.Equal(expectedOutput.Length, charsWritten);
+                Assert.Equal(expectedOutput, destination.AsSpan(0, charsWritten));
+
+                // Larger destination
+                Assert.True(Uri.TryUnescapeDataString(input, destination.AsSpan(1), out charsWritten));
+                Assert.Equal(expectedOutput.Length, charsWritten);
+                Assert.Equal(expectedOutput, destination.AsSpan(1, charsWritten));
+
+                // Destination too small
+                if (expectedOutput.Length > 0)
+                {
+                    Assert.False(Uri.TryUnescapeDataString(input, destination.AsSpan(0, expectedOutput.Length - 1), out charsWritten));
+                    Assert.Equal(0, charsWritten);
+                }
+
+                // Overlapped source/destination
+                input.CopyTo(destination);
+                Assert.True(Uri.TryUnescapeDataString(destination.AsSpan(0, input.Length), destination, out charsWritten));
+                Assert.Equal(expectedOutput.Length, charsWritten);
+                Assert.Equal(expectedOutput, destination.AsSpan(0, charsWritten));
+
+                // Overlapped source/destination with different starts
+                input.CopyTo(destination.AsSpan(1));
+                Assert.True(Uri.TryUnescapeDataString(destination.AsSpan(1, input.Length), destination, out charsWritten));
+                Assert.Equal(expectedOutput.Length, charsWritten);
+                Assert.Equal(expectedOutput, destination.AsSpan(0, charsWritten));
+
+                input.CopyTo(destination);
+                Assert.True(Uri.TryUnescapeDataString(destination.AsSpan(0, input.Length), destination.AsSpan(1), out charsWritten));
+                Assert.Equal(expectedOutput.Length, charsWritten);
+                Assert.Equal(expectedOutput, destination.AsSpan(1, charsWritten));
             }
         }
 
         [Fact]
-        public void UriUnescapingDataString_Unicode_SurrogatePair()
+        public void UriEscapeUnescapeDataString_LongInputs()
         {
-            string escapedInput = Uri.EscapeDataString(GB18030CertificationString1);
-            string output = Uri.UnescapeDataString(escapedInput);
-            Assert.Equal(GB18030CertificationString1, output);
-
-            using (new ThreadCultureChange("zh-cn"))
+            // Test the no-longer-existing "c_MaxUriBufferSize" limit of 0xFFF0,
+            // as well as lengths longer than the max Uri length of ushort.MaxValue.
+            foreach (int length in new[] { 1, 0xFFF0, 0xFFF1, ushort.MaxValue + 10 })
             {
-                Assert.Equal(output, Uri.UnescapeDataString(escapedInput)); // Same normalized result expected in different locales.
+                string unescaped = new string('s', length);
+                string escaped = unescaped;
+
+                Assert.Equal(Uri.EscapeDataString(unescaped), escaped);
+                Assert.Equal(Uri.UnescapeDataString(escaped), unescaped);
+
+                unescaped = new string('/', length);
+                escaped = EscapeAscii(unescaped);
+
+                Assert.Equal(Uri.EscapeDataString(unescaped), escaped);
+                Assert.Equal(Uri.UnescapeDataString(escaped), unescaped);
             }
         }
 
-        #endregion UnescapeDataString
+        #endregion EscapeUnescapeDataString
 
         #region EscapeUriString
 
@@ -332,7 +443,7 @@ public void UriAbsoluteEscaping_AlphaNumeric_NoEscaping()
         [Fact]
         public void UriAbsoluteUnEscaping_AlphaNumericEscapedIriOn_UnEscaping()
         {
-            string escapedAlphaNum = Escape(AlphaNumeric);
+            string escapedAlphaNum = EscapeAscii(AlphaNumeric);
             string input = "http://" + AlphaNumeric.ToLowerInvariant() + "/" + escapedAlphaNum
                 + "?" + escapedAlphaNum + "#" + escapedAlphaNum;
             string expectedOutput = "http://" + AlphaNumeric.ToLowerInvariant() + "/" + AlphaNumeric
@@ -353,7 +464,7 @@ public void UriAbsoluteEscaping_RFC2396Unreserved_NoEscaping()
         [Fact]
         public void UriAbsoluteUnEscaping_RFC3986UnreservedEscaped_AllUnescaped()
         {
-            string escaped = Escape(RFC3986Unreserved);
+            string escaped = EscapeAscii(RFC3986Unreserved);
             string input = "http://" + AlphaNumeric.ToLowerInvariant() + "/" + escaped
                 + "?" + escaped + "#" + escaped;
             string expectedOutput = "http://" + AlphaNumeric.ToLowerInvariant() + "/" + RFC3986Unreserved
@@ -375,7 +486,7 @@ public void UriAbsoluteEscaping_RFC2396Reserved_NoEscaping()
         [Fact]
         public void UriAbsoluteUnEscaping_RFC2396ReservedEscaped_NoUnEscaping()
         {
-            string escaped = Escape(RFC2396Reserved);
+            string escaped = EscapeAscii(RFC2396Reserved);
             string input = "http://host/" + escaped + "?" + escaped + "#" + escaped;
 
             Uri testUri = new Uri(input);
@@ -404,7 +515,7 @@ public void UriAbsoluteEscaping_RFC3986Reserved_NothingEscaped()
         [Fact]
         public void UriAbsoluteUnEscaping_RFC3986ReservedEscaped_NothingUnescaped()
         {
-            string escaped = Escape(RFC3986Reserved);
+            string escaped = EscapeAscii(RFC3986Reserved);
             string input = "http://host/" + escaped + "?" + escaped + "#" + escaped;
 
             Uri testUri = new Uri(input);
@@ -724,28 +835,12 @@ public void UriUnescapeInvalid_ValidUtf8IncompleteUtf8AsciiIriOn_InvalidUtf8Left
 
         #region Helpers
 
-        private static readonly char[] s_hexUpperChars = {
-                                   '0', '1', '2', '3', '4', '5', '6', '7',
-                                   '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
-                                   };
-
         // Percent encode every character
-        private static string Escape(string input)
+        private static string EscapeAscii(string input)
         {
-            byte[] bytes = new byte[4];
-            StringBuilder output = new StringBuilder();
-            for (int index = 0; index < input.Length; index++)
-            {
-                // Non-Ascii is escaped as UTF-8
-                int byteCount = Encoding.UTF8.GetBytes(input, index, 1, bytes, 0);
-                for (int byteIndex = 0; byteIndex < byteCount; byteIndex++)
-                {
-                    output.Append("%");
-                    output.Append(s_hexUpperChars[(bytes[byteIndex] & 0xf0) >> 4]);
-                    output.Append(s_hexUpperChars[bytes[byteIndex] & 0xf]);
-                }
-            }
-            return output.ToString();
+            Assert.True(Ascii.IsValid(input));
+
+            return string.Concat(input.Select(c => $"%{(int)c:X2}"));
         }
 
         #endregion Helpers
diff --git a/src/libraries/System.Private.Xml/src/System/Xml/Schema/XsdDateTime.cs b/src/libraries/System.Private.Xml/src/System/Xml/Schema/XsdDateTime.cs
index 10b78d608233..7e75cc55afad 100644
--- a/src/libraries/System.Private.Xml/src/System/Xml/Schema/XsdDateTime.cs
+++ b/src/libraries/System.Private.Xml/src/System/Xml/Schema/XsdDateTime.cs
@@ -396,6 +396,7 @@ public static implicit operator DateTime(XsdDateTime xdt)
             {
                 case DateTimeTypeCode.GMonth:
                 case DateTimeTypeCode.GDay:
+                    // codeql[cs/leap-year/unsafe-date-construction-from-two-elements] - The XML specification does not explicitly define this behavior for parsing in a non-leap year. We intentionally throw here. Altering this behavior to be more resilient, producing dates like 2/28 or 3/1, could introduce unintended consequences and may not be desirable for user.
                     result = new DateTime(DateTime.Now.Year, xdt.Month, xdt.Day);
                     break;
                 case DateTimeTypeCode.Time:
diff --git a/src/libraries/System.Private.Xml/src/System/Xml/Serialization/ReflectionXmlSerializationReader.cs b/src/libraries/System.Private.Xml/src/System/Xml/Serialization/ReflectionXmlSerializationReader.cs
index e9e2fdfe39bc..ba4c82f89539 100644
--- a/src/libraries/System.Private.Xml/src/System/Xml/Serialization/ReflectionXmlSerializationReader.cs
+++ b/src/libraries/System.Private.Xml/src/System/Xml/Serialization/ReflectionXmlSerializationReader.cs
@@ -651,7 +651,7 @@ private static ReflectionXmlSerializationReaderHelper.SetMemberValueDelegate Get
 
                         MethodInfo getSetMemberValueDelegateWithTypeGenericMi = typeof(ReflectionXmlSerializationReaderHelper).GetMethod("GetSetMemberValueDelegateWithType", BindingFlags.Static | BindingFlags.Public)!;
                         MethodInfo getSetMemberValueDelegateWithTypeMi = getSetMemberValueDelegateWithTypeGenericMi.MakeGenericMethod(o.GetType(), memberType);
-                        var getSetMemberValueDelegateWithType = (Func<MemberInfo, ReflectionXmlSerializationReaderHelper.SetMemberValueDelegate>)getSetMemberValueDelegateWithTypeMi.CreateDelegate(typeof(Func<MemberInfo, ReflectionXmlSerializationReaderHelper.SetMemberValueDelegate>));
+                        var getSetMemberValueDelegateWithType = getSetMemberValueDelegateWithTypeMi.CreateDelegate<Func<MemberInfo, ReflectionXmlSerializationReaderHelper.SetMemberValueDelegate>>();
                         result = getSetMemberValueDelegateWithType(memberInfo);
                         delegateCacheForType[memberName] = result;
                     }
@@ -2121,7 +2121,7 @@ public static SetMemberValueDelegate GetSetMemberValueDelegateWithType<TObj, TPa
                         return propInfo.SetValue;
                     }
 
-                    setTypedDelegate = (Action<TObj, TParam>)setMethod.CreateDelegate(typeof(Action<TObj, TParam>));
+                    setTypedDelegate = setMethod.CreateDelegate<Action<TObj, TParam>>();
                 }
                 else if (memberInfo is FieldInfo fieldInfo)
                 {
diff --git a/src/libraries/System.Private.Xml/src/System/Xml/Xsl/QIL/QilXmlWriter.cs b/src/libraries/System.Private.Xml/src/System/Xml/Xsl/QIL/QilXmlWriter.cs
index 574901728e1e..7480892901b5 100644
--- a/src/libraries/System.Private.Xml/src/System/Xml/Xsl/QIL/QilXmlWriter.cs
+++ b/src/libraries/System.Private.Xml/src/System/Xml/Xsl/QIL/QilXmlWriter.cs
@@ -206,7 +206,7 @@ protected override QilNode VisitQilExpression(QilExpression qil)
                 foreach (QilNode n in fdecls)
                 {
                     // i.e. <Function id="$a"/>
-                    this.writer.WriteStartElement(Enum.GetName(typeof(QilNodeType), n.NodeType)!);
+                    this.writer.WriteStartElement(Enum.GetName(n.NodeType)!);
                     this.writer.WriteAttributeString("id", _ngen.NameOf(n));
                     WriteXmlType(n);
 
@@ -277,7 +277,7 @@ protected override void BeforeVisit(QilNode node)
                 WriteAnnotations(node.Annotation);
 
             // Call WriteStartElement
-            this.writer.WriteStartElement("", Enum.GetName(typeof(QilNodeType), node.NodeType)!, "");
+            this.writer.WriteStartElement("", Enum.GetName(node.NodeType)!, "");
 
             // Write common attributes
 #if QIL_TRACE_NODE_CREATION
diff --git a/src/libraries/System.Private.Xml/src/System/Xml/Xslt/XslCompiledTransform.cs b/src/libraries/System.Private.Xml/src/System/Xml/Xslt/XslCompiledTransform.cs
index 7c5ca1d4ca35..9b2f875ddb6c 100644
--- a/src/libraries/System.Private.Xml/src/System/Xml/Xslt/XslCompiledTransform.cs
+++ b/src/libraries/System.Private.Xml/src/System/Xml/Xslt/XslCompiledTransform.cs
@@ -229,7 +229,7 @@ public void Load(MethodInfo executeMethod, byte[] queryData, Type[]? earlyBoundT
 
             Delegate delExec = executeMethod is DynamicMethod dm
                 ? dm.CreateDelegate(typeof(ExecuteDelegate))
-                : executeMethod.CreateDelegate(typeof(ExecuteDelegate));
+                : executeMethod.CreateDelegate<ExecuteDelegate>();
 
             _command = new XmlILCommand((ExecuteDelegate)delExec, new XmlQueryStaticData(queryData, earlyBoundTypes));
             OutputSettings = _command.StaticData.DefaultWriterSettings;
diff --git a/src/libraries/System.Private.Xml/tests/System.Private.Xml.Tests.csproj b/src/libraries/System.Private.Xml/tests/System.Private.Xml.Tests.csproj
index 3983ac03f420..30fe5d7a261a 100644
--- a/src/libraries/System.Private.Xml/tests/System.Private.Xml.Tests.csproj
+++ b/src/libraries/System.Private.Xml/tests/System.Private.Xml.Tests.csproj
@@ -14,7 +14,7 @@
     <TestArchiveTestsRoot>$(TestArchiveRoot)browserornodejs/</TestArchiveTestsRoot>
     <TestArchiveTestsDir>$(TestArchiveTestsRoot)$(OSPlatformConfig)/</TestArchiveTestsDir>
     <DefineConstants>$(DefineConstants);TARGET_BROWSER</DefineConstants>
-    <WasmXHarnessMonoArgs>--setenv=XHARNESS_LOG_TEST_START=true</WasmXHarnessMonoArgs>
+    <XunitShowProgress>true</XunitShowProgress>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_AllowXmlAttributes.cs b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_AllowXmlAttributes.cs
index 2bed5cd37c19..2d5ffe75de7f 100644
--- a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_AllowXmlAttributes.cs
+++ b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_AllowXmlAttributes.cs
@@ -273,7 +273,7 @@ public void v1(string xmlFile, string xsdFile, bool allowXmlAttributes, int expe
             if (xsdFile != null)
                 xss.Add(null, Path.Combine(testData, xsdFile));
 
-            XmlReader vr = CreateReader(Path.Combine(testData, xmlFile), xss, allowXmlAttributes);
+            using XmlReader vr = CreateReader(Path.Combine(testData, xmlFile), xss, allowXmlAttributes);
             while (vr.Read()) ;
 
             Assert.Equal(warningCount, expectedWarningCount);
diff --git a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_EnableUpaCheck.cs b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_EnableUpaCheck.cs
index 5de2beac167d..9576efe83784 100644
--- a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_EnableUpaCheck.cs
+++ b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_EnableUpaCheck.cs
@@ -146,7 +146,7 @@ public void v1(object param0, object param1, object param2, int[] expectedErrorL
             xss.ValidationEventHandler += new ValidationEventHandler(ValidationCallback);
             xss.Add(null, Path.Combine(testData, xsdFile));
 
-            XmlReader vr = CreateReader(Path.Combine(testData, xmlFile), xss, false);
+            using XmlReader vr = CreateReader(Path.Combine(testData, xmlFile), xss, false);
             while (vr.Read()) ;
 
             CError.Compare(errorCount, expectedErrorCount, "Error Count mismatch");
diff --git a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_Misc.cs b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_Misc.cs
index 0cec7918b378..cd8de8da2835 100644
--- a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_Misc.cs
+++ b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_Misc.cs
@@ -89,7 +89,7 @@ public void v2()
                                        XmlSchemaValidationFlags.ProcessInlineSchema;
             settings.ValidationEventHandler += new ValidationEventHandler(ValidationCallback);
             settings.Schemas.Add(ss);
-            XmlReader vr = XmlReader.Create(Path.Combine(TestData._Root, "bug115049.xml"), settings);
+            using XmlReader vr = XmlReader.Create(Path.Combine(TestData._Root, "bug115049.xml"), settings);
             while (vr.Read()) ;
             CError.Compare(errorCount, 1, "Error Count mismatch!");
             return;
@@ -108,7 +108,7 @@ public void v4()
                                        XmlSchemaValidationFlags.ProcessSchemaLocation |
                                        XmlSchemaValidationFlags.ProcessInlineSchema;
             settings.ValidationEventHandler += new ValidationEventHandler(ValidationCallback);
-            XmlReader vr = XmlReader.Create(new StringReader(xml), settings, (string)null);
+            using XmlReader vr = XmlReader.Create(new StringReader(xml), settings, (string)null);
             while (vr.Read()) ;
             CError.Compare(errorCount, 0, "Error Count mismatch!");
             CError.Compare(warningCount, 1, "Warning Count mismatch!");
@@ -531,7 +531,7 @@ public void v106()
 #pragma warning disable 0618
             settings.ProhibitDtd = false;
 #pragma warning restore 0618
-            XmlReader r = XmlReader.Create(Path.Combine(TestData._Root, "XMLSchema.xsd"), settings);
+            using XmlReader r = XmlReader.Create(Path.Combine(TestData._Root, "XMLSchema.xsd"), settings);
             ss1.Add(null, r);
             ss1.Compile();
 
@@ -568,7 +568,7 @@ public void v107()
             settings.Schemas.Add(schemaSet);
             settings.ValidationEventHandler += new ValidationEventHandler(ValidationCallback);
             settings.ValidationType = ValidationType.Schema;
-            XmlReader vr = XmlReader.Create(new StringReader(strXml), settings);
+            using XmlReader vr = XmlReader.Create(new StringReader(strXml), settings);
 
             while (vr.Read()) ;
 
@@ -742,7 +742,7 @@ public void v112()
             XmlSchema mainSchema = set.Add(null, Path.Combine(TestData._Root, "bug382035a.xsd"));
             set.Compile();
 
-            XmlReader r = XmlReader.Create(Path.Combine(TestData._Root, "bug382035a1.xsd"));
+            using XmlReader r = XmlReader.Create(Path.Combine(TestData._Root, "bug382035a1.xsd"));
             XmlSchema reParsedInclude = XmlSchema.Read(r, new ValidationEventHandler(ValidationCallback));
 
             ((XmlSchemaExternal)mainSchema.Includes[0]).Schema = reParsedInclude;
@@ -766,7 +766,7 @@ public void v113()
             settings.ValidationFlags |= XmlSchemaValidationFlags.ReportValidationWarnings | XmlSchemaValidationFlags.ProcessSchemaLocation;
             settings.ValidationEventHandler += new ValidationEventHandler(ValidationCallback);
             settings.ValidationType = ValidationType.Schema;
-            XmlReader vr = XmlReader.Create(new StringReader(strXml), settings);
+            using XmlReader vr = XmlReader.Create(new StringReader(strXml), settings);
 
             while (vr.Read()) ;
 
@@ -1056,7 +1056,7 @@ public void Dev10_40509()
             string xsd = Path.Combine(TestData._Root, "bug511217.xsd");
             XmlSchemaSet s = new XmlSchemaSet();
             s.XmlResolver = new XmlUrlResolver();
-            XmlReader r = XmlReader.Create(xsd);
+            using XmlReader r = XmlReader.Create(xsd);
             s.Add(null, r);
             s.Compile();
             XmlReaderSettings rs = new XmlReaderSettings();
diff --git a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_NmTokens.cs b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_NmTokens.cs
index c9844ef69306..86d8c82d1e33 100644
--- a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_NmTokens.cs
+++ b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_NmTokens.cs
@@ -30,7 +30,7 @@ public void TestSchemaCompile(string fileName, bool negative)
                 Assert.True(negative, args.Message);
                 numevents++;
             };            
-            XmlReader r = XmlReader.Create(xsd);
+            using XmlReader r = XmlReader.Create(xsd);
             s.Add(null, r);
             s.Compile();            
             Assert.False(negative && numevents != 1);
diff --git a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_ProhibitDTD.cs b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_ProhibitDTD.cs
index c1737a3395d0..497253ceb83c 100644
--- a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_ProhibitDTD.cs
+++ b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_ProhibitDTD.cs
@@ -149,7 +149,7 @@ public void v2()
             Initialize();
             XmlSchemaSet xss = new XmlSchemaSet();
             xss.ValidationEventHandler += ValidationCallback;
-            XmlReader r = CreateReader(Path.Combine(TestData._Root, "bug356711_a.xsd"));
+            using XmlReader r = CreateReader(Path.Combine(TestData._Root, "bug356711_a.xsd"));
             try
             {
                 xss.Add(null, r);
@@ -190,7 +190,7 @@ public void v4()
             XmlSchemaSet xss = new XmlSchemaSet();
             xss.XmlResolver = new XmlUrlResolver();
             xss.ValidationEventHandler += ValidationCallback;
-            XmlReader r = CreateReader(Path.Combine(TestData._Root, "bug356711.xsd"));
+            using XmlReader r = CreateReader(Path.Combine(TestData._Root, "bug356711.xsd"));
             try
             {
                 xss.Add(null, r);
@@ -314,7 +314,7 @@ public void v10(object param0)
             xss.XmlResolver = new XmlUrlResolver();
             xss.ValidationEventHandler += ValidationCallback;
 
-            XmlReader r = CreateReader(Path.Combine(TestData._Root, param0.ToString()), false);
+            using XmlReader r = CreateReader(Path.Combine(TestData._Root, param0.ToString()), false);
             try
             {
                 xss.Add(null, r);
@@ -363,8 +363,8 @@ public void v12(object param0)
             XmlSchemaSet xss = new XmlSchemaSet();
             xss.ValidationEventHandler += ValidationCallback;
 
-            XmlReader r = CreateReader(Path.Combine(TestData._Root, param0.ToString()), false);
-            XmlReader r2 = CreateReader(r, true);
+            using XmlReader r = CreateReader(Path.Combine(TestData._Root, param0.ToString()), false);
+            using XmlReader r2 = CreateReader(r, true);
             try
             {
                 xss.Add(null, r2);
@@ -387,8 +387,8 @@ public void v13(object param0)
             xss.XmlResolver = new XmlUrlResolver();
             xss.ValidationEventHandler += ValidationCallback;
 
-            XmlReader r = CreateReader(Path.Combine(TestData._Root, param0.ToString()), false);
-            XmlReader r2 = CreateReader(r, true);
+            using XmlReader r = CreateReader(Path.Combine(TestData._Root, param0.ToString()), false);
+            using XmlReader r2 = CreateReader(r, true);
 
             try
             {
@@ -413,7 +413,7 @@ public void v14()
             xss.XmlResolver = new XmlUrlResolver();
             xss.ValidationEventHandler += ValidationCallback;
 
-            XmlReader r = CreateReader(Path.Combine(TestData._Root, "bug356711.xsd"), false);
+            using XmlReader r = CreateReader(Path.Combine(TestData._Root, "bug356711.xsd"), false);
 
             try
             {
@@ -437,8 +437,8 @@ public void v15()
             XmlSchemaSet xss = new XmlSchemaSet();
             xss.ValidationEventHandler += ValidationCallback;
 
-            XmlReader r1 = CreateReader(Path.Combine(TestData._Root, "bug356711_a.xsd"));
-            XmlReader r2 = CreateReader(Path.Combine(TestData._Root, "bug356711_b.xsd"), false);
+            using XmlReader r1 = CreateReader(Path.Combine(TestData._Root, "bug356711_a.xsd"));
+            using XmlReader r2 = CreateReader(Path.Combine(TestData._Root, "bug356711_b.xsd"), false);
 
             try
             {
@@ -482,7 +482,7 @@ public void v20(object param0)
 
             try
             {
-                XmlReader reader = CreateReader(Path.Combine(TestData._Root, param0.ToString()), xss, true);
+                using XmlReader reader = CreateReader(Path.Combine(TestData._Root, param0.ToString()), xss, true);
                 while (reader.Read()) ;
             }
             catch (XmlException)
@@ -539,7 +539,7 @@ public void v22(object param0)
 
             try
             {
-                XmlReader reader = CreateReader(Path.Combine(TestData._Root, param0.ToString()), xss, false);
+                using XmlReader reader = CreateReader(Path.Combine(TestData._Root, param0.ToString()), xss, false);
                 while (reader.Read()) ;
             }
             catch (XmlException)
@@ -561,8 +561,8 @@ public void v23()
 
             try
             {
-                XmlReader r1 = CreateReader(Path.Combine(TestData._Root, "bug356711_1.xml"), true);
-                XmlReader r2 = CreateReader(r1, xss, false);
+                using XmlReader r1 = CreateReader(Path.Combine(TestData._Root, "bug356711_1.xml"), true);
+                using XmlReader r2 = CreateReader(r1, xss, false);
                 while (r2.Read()) ;
             }
             catch (XmlException)
diff --git a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_Reprocess.cs b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_Reprocess.cs
index 9aa0e6042237..894569f9e3fd 100644
--- a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_Reprocess.cs
+++ b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaSet/TC_SchemaSet_Reprocess.cs
@@ -560,7 +560,7 @@ public void v51(object param0, object param1, object param2, object param3)
             settings.ValidationEventHandler += new ValidationEventHandler(ValidationCallback);
             settings.ValidationType = ValidationType.Schema;
             settings.Schemas = set;
-            XmlReader reader = XmlReader.Create(xmlFile, settings);
+            using XmlReader reader = XmlReader.Create(xmlFile, settings);
             while (reader.Read()) { }
 
             CError.Compare(bWarningCallback, false, "Warning count mismatch");
@@ -581,8 +581,8 @@ public void v51(object param0, object param1, object param2, object param3)
             bErrorCallback = false;
             _output.WriteLine("Second validation ***************");
             settings.Schemas = set;
-            reader = XmlReader.Create(xmlFile, settings);
-            while (reader.Read()) { }
+            using XmlReader reader2 = XmlReader.Create(xmlFile, settings);
+            while (reader2.Read()) { }
 
             CError.Compare(bWarningCallback, false, "Warning count mismatch");
             CError.Compare(bErrorCallback, false, "Error count mismatch");
@@ -606,8 +606,8 @@ public void v51(object param0, object param1, object param2, object param3)
 
             _output.WriteLine("Third validation, Expecting errors ***************");
             settings.Schemas = set;
-            reader = XmlReader.Create(xmlFile, settings);
-            while (reader.Read()) { }
+            using XmlReader reader3 = XmlReader.Create(xmlFile, settings);
+            while (reader3.Read()) { }
 
             CError.Compare(bWarningCallback, false, "Warning count mismatch");
             CError.Compare(bErrorCallback, true, "Error count mismatch");
@@ -623,7 +623,7 @@ public XmlSchema LoadSchema(string path, string baseuri)
             _output.WriteLine("Correct uri: " + correctUri);
             using (Stream s = new FileStream(Path.GetFullPath(path), FileMode.Open, FileAccess.Read, FileShare.Read, 1))
             {
-                XmlReader r = XmlReader.Create(s, new XmlReaderSettings(), includeUri);
+                using XmlReader r = XmlReader.Create(s, new XmlReaderSettings(), includeUri);
                 _output.WriteLine("Reader uri: " + r.BaseURI);
                 using (r)
                 {
diff --git a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaValidatorApi/PropertiesTests.cs b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaValidatorApi/PropertiesTests.cs
index 70fc7f882ce0..0675045b014e 100644
--- a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaValidatorApi/PropertiesTests.cs
+++ b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaValidatorApi/PropertiesTests.cs
@@ -306,7 +306,7 @@ public void XmlReaderAsALineInfoProvider()
             XmlSchemaInfo info = new XmlSchemaInfo();
 
             XmlSchemaValidator val = CreateValidator(CreateSchemaSetFromXml(xmlSrc));
-            XmlReader r = XmlReader.Create(new StringReader(xmlSrc));
+            using XmlReader r = XmlReader.Create(new StringReader(xmlSrc));
 
             val.LineInfoProvider = (r as IXmlLineInfo);
 
diff --git a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaValidatorApi/ValidateMisc.cs b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaValidatorApi/ValidateMisc.cs
index 1e9fb3a25599..25cb496ed38a 100644
--- a/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaValidatorApi/ValidateMisc.cs
+++ b/src/libraries/System.Private.Xml/tests/XmlSchema/XmlSchemaValidatorApi/ValidateMisc.cs
@@ -901,7 +901,7 @@ public void XSDValidationGeneratesInvalidError_1()
                 // TempDirectory path must end with a DirectorySeratorChar, otherwise it will throw in the Xml validation.
                 settings.Schemas.Add("mainschema", XmlReader.Create(new StringReader(xsd), null, EnsureTrailingSlash(tempDirectory.Path)));
                 settings.ValidationType = ValidationType.Schema;
-                XmlReader reader = XmlReader.Create(new StringReader(xml), settings);
+                using XmlReader reader = XmlReader.Create(new StringReader(xml), settings);
                 XmlDocument doc = new XmlDocument();
 
                 doc.Load(reader);
@@ -926,7 +926,7 @@ public void XSDValidationGeneratesInvalidError_2()
                 // TempDirectory path must end with a DirectorySeratorChar, otherwise it will throw in the Xml validation.
                 settings.Schemas.Add("mainschema", XmlReader.Create(new StringReader(xsd), null, EnsureTrailingSlash(tempDirectory.Path)));
                 settings.ValidationType = ValidationType.Schema;
-                XmlReader reader = XmlReader.Create(new StringReader(xml), settings);
+                using XmlReader reader = XmlReader.Create(new StringReader(xml), settings);
                 XmlDocument doc = new XmlDocument();
 
                 doc.Load(reader);
diff --git a/src/libraries/System.Private.Xml/tests/Xslt/XslCompiledTransformApi/Errata4.cs b/src/libraries/System.Private.Xml/tests/Xslt/XslCompiledTransformApi/Errata4.cs
index 68e0964199f1..0b5c13f8d95c 100644
--- a/src/libraries/System.Private.Xml/tests/Xslt/XslCompiledTransformApi/Errata4.cs
+++ b/src/libraries/System.Private.Xml/tests/Xslt/XslCompiledTransformApi/Errata4.cs
@@ -22,7 +22,7 @@ public Errata4(ITestOutputHelper output) : base(output)
             _output = output;
         }
 
-        private Random _rand = new Random(unchecked((int)DateTime.Now.Ticks));
+        private Random _rand = new Random(12345678);
 
         #region private const string xmlDocTemplate = ...
 
diff --git a/src/libraries/System.Reflection.Emit/ref/System.Reflection.Emit.cs b/src/libraries/System.Reflection.Emit/ref/System.Reflection.Emit.cs
index 1cd9aa71da74..3690bf72a4c8 100644
--- a/src/libraries/System.Reflection.Emit/ref/System.Reflection.Emit.cs
+++ b/src/libraries/System.Reflection.Emit/ref/System.Reflection.Emit.cs
@@ -24,7 +24,6 @@ protected AssemblyBuilder() { }
         public static System.Reflection.Emit.AssemblyBuilder DefineDynamicAssembly(System.Reflection.AssemblyName name, System.Reflection.Emit.AssemblyBuilderAccess access) { throw null; }
         [System.Diagnostics.CodeAnalysis.RequiresDynamicCode("Defining a dynamic assembly requires dynamic code.")]
         public static System.Reflection.Emit.AssemblyBuilder DefineDynamicAssembly(System.Reflection.AssemblyName name, System.Reflection.Emit.AssemblyBuilderAccess access, System.Collections.Generic.IEnumerable<System.Reflection.Emit.CustomAttributeBuilder>? assemblyAttributes) { throw null; }
-        public static System.Reflection.Emit.AssemblyBuilder DefinePersistedAssembly(System.Reflection.AssemblyName name, System.Reflection.Assembly coreAssembly, System.Collections.Generic.IEnumerable<System.Reflection.Emit.CustomAttributeBuilder>? assemblyAttributes = null) { throw null; }
         public System.Reflection.Emit.ModuleBuilder DefineDynamicModule(string name) { throw null; }
         protected abstract System.Reflection.Emit.ModuleBuilder DefineDynamicModuleCore(string name);
         public override bool Equals(object? obj) { throw null; }
@@ -55,9 +54,6 @@ protected AssemblyBuilder() { }
         [System.Diagnostics.CodeAnalysis.RequiresUnreferencedCodeAttribute("Types might be removed by trimming. If the type name is a string literal, consider using Type.GetType instead.")]
         public override System.Type? GetType(string name, bool throwOnError, bool ignoreCase) { throw null; }
         public override bool IsDefined(System.Type attributeType, bool inherit) { throw null; }
-        public void Save(string assemblyFileName) { throw null; }
-        public void Save(System.IO.Stream stream) { throw null; }
-        protected virtual void SaveCore(System.IO.Stream stream) { }
         public void SetCustomAttribute(System.Reflection.ConstructorInfo con, byte[] binaryAttribute) { }
         public void SetCustomAttribute(System.Reflection.Emit.CustomAttributeBuilder customBuilder) { }
         protected abstract void SetCustomAttributeCore(System.Reflection.ConstructorInfo con, System.ReadOnlySpan<byte> binaryAttribute);
@@ -474,6 +470,24 @@ public void SetCustomAttribute(System.Reflection.ConstructorInfo con, byte[] bin
         public void SetCustomAttribute(System.Reflection.Emit.CustomAttributeBuilder customBuilder) { }
         protected abstract void SetCustomAttributeCore(System.Reflection.ConstructorInfo con, System.ReadOnlySpan<byte> binaryAttribute);
     }
+#if !BUILDING_CORELIB_REFERENCE
+    public sealed class PersistedAssemblyBuilder : System.Reflection.Emit.AssemblyBuilder
+    {
+        public PersistedAssemblyBuilder(System.Reflection.AssemblyName name, System.Reflection.Assembly coreAssembly, System.Collections.Generic.IEnumerable<System.Reflection.Emit.CustomAttributeBuilder>? assemblyAttributes = null) { }
+        public override string? FullName { get { throw null; } }
+        public override bool IsDynamic { get { throw null; } }
+        public override System.Reflection.Module ManifestModule { get { throw null; } }
+        [System.Diagnostics.CodeAnalysis.RequiresDynamicCode("Defining a dynamic assembly requires dynamic code.")]
+        protected override System.Reflection.Emit.ModuleBuilder DefineDynamicModuleCore(string name) { throw null; }
+        protected override System.Reflection.Emit.ModuleBuilder? GetDynamicModuleCore(string name) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public System.Reflection.Metadata.Ecma335.MetadataBuilder GenerateMetadata(out System.Reflection.Metadata.BlobBuilder ilStream, out System.Reflection.Metadata.BlobBuilder mappedFieldData) { throw null; }
+        public override System.Reflection.AssemblyName GetName(bool copiedName) { throw null; }
+        public void Save(string assemblyFileName) { throw null; }
+        public void Save(System.IO.Stream stream) { throw null; }
+        protected override void SetCustomAttributeCore(System.Reflection.ConstructorInfo con, System.ReadOnlySpan<byte> binaryAttribute) { throw null; }
+    }
+#endif
     public abstract partial class PropertyBuilder : System.Reflection.PropertyInfo
     {
         protected PropertyBuilder() { }
diff --git a/src/libraries/System.Reflection.Emit/ref/System.Reflection.Emit.csproj b/src/libraries/System.Reflection.Emit/ref/System.Reflection.Emit.csproj
index 4872ff7f5b6b..44f0feefe67d 100644
--- a/src/libraries/System.Reflection.Emit/ref/System.Reflection.Emit.csproj
+++ b/src/libraries/System.Reflection.Emit/ref/System.Reflection.Emit.csproj
@@ -10,5 +10,6 @@
     <ProjectReference Include="..\..\System.Runtime.InteropServices\ref\System.Runtime.InteropServices.csproj" />
     <ProjectReference Include="..\..\System.Reflection.Primitives\ref\System.Reflection.Primitives.csproj" />
     <ProjectReference Include="..\..\System.Reflection.Emit.ILGeneration\ref\System.Reflection.Emit.ILGeneration.csproj" />
+    <ProjectReference Include="..\..\System.Reflection.Metadata\ref\System.Reflection.Metadata.csproj" />
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/src/libraries/System.Reflection.Emit/src/ILLink/ILLink.Descriptors.LibraryBuild.xml b/src/libraries/System.Reflection.Emit/src/ILLink/ILLink.Descriptors.LibraryBuild.xml
deleted file mode 100644
index 8cf11ab4c01f..000000000000
--- a/src/libraries/System.Reflection.Emit/src/ILLink/ILLink.Descriptors.LibraryBuild.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<linker>
-  <assembly fullname="System.Reflection.Emit">
-    <type fullname="System.Reflection.Emit.AssemblyBuilderImpl">
-      <!-- Internal API called through Reflection by another assembly. -->
-      <method name=".ctor" />
-    </type>
-  </assembly>
-</linker>
diff --git a/src/libraries/System.Reflection.Emit/src/Resources/Strings.resx b/src/libraries/System.Reflection.Emit/src/Resources/Strings.resx
index 068855157322..4ab50a83684c 100644
--- a/src/libraries/System.Reflection.Emit/src/Resources/Strings.resx
+++ b/src/libraries/System.Reflection.Emit/src/Resources/Strings.resx
@@ -117,8 +117,8 @@
   <resheader name="writer">
     <value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
   </resheader>
-  <data name="InvalidOperation_CannotSaveMultipleTimes" xml:space="preserve">
-    <value>Cannot save an assembly multiple times.</value>
+  <data name="InvalidOperation_CannotPopulateMultipleTimes" xml:space="preserve">
+    <value>Cannot populate assembly metadata multiple times.</value>
   </data>
   <data name="Argument_CannotSetParentToInterface" xml:space="preserve">
     <value>Cannot set parent to an interface.</value>
@@ -249,9 +249,6 @@
   <data name="Argument_MustBeInterface" xml:space="preserve">
     <value>Type passed must be an interface.</value>
   </data>
-  <data name="TypeLoad_MissingMethod" xml:space="preserve">
-    <value>Abstract method '{0}' in type '{1}' does not have an implementation.</value>
-  </data>
   <data name="InvalidOperation_BadMethodBody" xml:space="preserve">
     <value>Method '{0}' cannot have a method body</value>
   </data>
diff --git a/src/libraries/System.Reflection.Emit/src/System.Reflection.Emit.csproj b/src/libraries/System.Reflection.Emit/src/System.Reflection.Emit.csproj
index 3b2f29ec3672..0c34ef45f448 100644
--- a/src/libraries/System.Reflection.Emit/src/System.Reflection.Emit.csproj
+++ b/src/libraries/System.Reflection.Emit/src/System.Reflection.Emit.csproj
@@ -14,7 +14,6 @@
     <Compile Include="System\Reflection\Emit\ArrayMethod.cs" />
     <Compile Include="System\Reflection\Emit\ConstructorBuilderImpl.cs" />
     <Compile Include="System\Reflection\Emit\CustomAttributeWrapper.cs" />
-    <Compile Include="System\Reflection\Emit\AssemblyBuilderImpl.cs" />
     <Compile Include="System\Reflection\Emit\EnumBuilderImpl.cs" />
     <Compile Include="System\Reflection\Emit\EventBuilderImpl.cs" />
     <Compile Include="System\Reflection\Emit\FieldBuilderImpl.cs" />
@@ -24,6 +23,7 @@
     <Compile Include="System\Reflection\Emit\MethodBuilderImpl.cs" />
     <Compile Include="System\Reflection\Emit\ModuleBuilderImpl.cs" />
     <Compile Include="System\Reflection\Emit\ParameterBuilderImpl.cs" />
+    <Compile Include="System\Reflection\Emit\PersistedAssemblyBuilder.cs" />
     <Compile Include="System\Reflection\Emit\PropertyBuilderImpl.cs" />
     <Compile Include="System\Reflection\Emit\PseudoCustomAttributesData.cs" />
     <Compile Include="System\Reflection\Emit\SignatureHelper.cs" />
diff --git a/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/MethodBuilderImpl.cs b/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/MethodBuilderImpl.cs
index fe6b7c5e3f1b..5881f753eb8f 100644
--- a/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/MethodBuilderImpl.cs
+++ b/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/MethodBuilderImpl.cs
@@ -252,7 +252,21 @@ protected override void SetSignatureCore(Type? returnType, Type[]? returnTypeReq
         public override int MetadataToken => _handle == default ? 0 : MetadataTokens.GetToken(_handle);
         public override RuntimeMethodHandle MethodHandle => throw new NotSupportedException(SR.NotSupported_DynamicModule);
         public override Type? ReflectedType => DeclaringType;
-        public override ParameterInfo ReturnParameter { get => throw new NotImplementedException(); }
+        public override ParameterInfo ReturnParameter
+        {
+            get
+            {
+                if (_parameterBuilders == null || _parameterBuilders[0] == null)
+                {
+                    return new ParameterInfoWrapper(new ParameterBuilderImpl(this, 0, ParameterAttributes.Retval, null), _returnType);
+                }
+                else
+                {
+                    return new ParameterInfoWrapper(_parameterBuilders[0], _returnType);
+                }
+            }
+        }
+
         public override Type ReturnType => _returnType;
         public override ICustomAttributeProvider ReturnTypeCustomAttributes { get => throw new NotImplementedException(); }
 
diff --git a/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/ModuleBuilderImpl.cs b/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/ModuleBuilderImpl.cs
index de6cb832bb96..0349186b717f 100644
--- a/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/ModuleBuilderImpl.cs
+++ b/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/ModuleBuilderImpl.cs
@@ -16,7 +16,7 @@ internal sealed class ModuleBuilderImpl : ModuleBuilder
         private readonly Assembly _coreAssembly;
         private readonly string _name;
         private readonly MetadataBuilder _metadataBuilder;
-        private readonly AssemblyBuilderImpl _assemblyBuilder;
+        private readonly PersistedAssemblyBuilder _assemblyBuilder;
         private readonly TypeBuilderImpl _globalTypeBuilder;
         private readonly Dictionary<Assembly, AssemblyReferenceHandle> _assemblyReferences = new();
         private readonly Dictionary<Type, EntityHandle> _typeReferences = new();
@@ -37,7 +37,7 @@ internal sealed class ModuleBuilderImpl : ModuleBuilder
         private static readonly Type[] s_coreTypes = { typeof(void), typeof(object), typeof(bool), typeof(char), typeof(sbyte), typeof(byte), typeof(short), typeof(ushort), typeof(int),
                                                        typeof(uint), typeof(long), typeof(ulong), typeof(float), typeof(double), typeof(string), typeof(nint), typeof(nuint), typeof(TypedReference) };
 
-        internal ModuleBuilderImpl(string name, Assembly coreAssembly, MetadataBuilder builder, AssemblyBuilderImpl assemblyBuilder)
+        internal ModuleBuilderImpl(string name, Assembly coreAssembly, MetadataBuilder builder, PersistedAssemblyBuilder assemblyBuilder)
         {
             _coreAssembly = coreAssembly;
             _name = name;
@@ -538,6 +538,7 @@ private EntityHandle GetMemberReferenceHandle(MemberInfo memberInfo)
                             MetadataSignatureHelper.GetFieldSignature(field.FieldType, field.GetRequiredCustomModifiers(), field.GetOptionalCustomModifiers(), this));
                         break;
                     case ConstructorInfo ctor:
+                        ctor = (ConstructorInfo)GetOriginalMemberIfConstructedType(ctor);
                         memberHandle = AddMemberReference(ctor.Name, GetTypeHandle(memberInfo.DeclaringType!), MetadataSignatureHelper.GetConstructorSignature(ctor.GetParameters(), this));
                         break;
                     case MethodInfo method:
@@ -606,8 +607,9 @@ internal static SignatureCallingConvention GetSignatureConvention(CallingConvent
         private static MemberInfo GetOriginalMemberIfConstructedType(MethodBase methodBase)
         {
             Type declaringType = methodBase.DeclaringType!;
-            if (declaringType.IsConstructedGenericType && !methodBase.ContainsGenericParameters &&
-                declaringType.GetGenericTypeDefinition() is not TypeBuilderImpl)
+            if (declaringType.IsConstructedGenericType &&
+                declaringType.GetGenericTypeDefinition() is not TypeBuilderImpl &&
+                !ContainsTypeBuilder(declaringType.GetGenericArguments()))
             {
                 return declaringType.GetGenericTypeDefinition().GetMemberWithSameMetadataDefinitionAs(methodBase);
             }
@@ -881,7 +883,7 @@ private static int GetTokenForHandle(EntityHandle handle)
 
         private EntityHandle GetHandleForMember(MemberInfo member)
         {
-            if (IsConstructedFromNotBakedTypeBuilder(member.DeclaringType!))
+            if (IsConstructedFromTypeBuilder(member.DeclaringType!))
             {
                 return default;
             }
@@ -889,20 +891,20 @@ private EntityHandle GetHandleForMember(MemberInfo member)
             return GetMemberReferenceHandle(member);
         }
 
-        private static bool IsConstructedFromNotBakedTypeBuilder(Type type) => type.IsConstructedGenericType &&
-            (type.GetGenericTypeDefinition() is TypeBuilderImpl tb && tb._handle == default ||
-             ContainsNotBakedTypeBuilder(type.GetGenericArguments()));
+        private static bool IsConstructedFromTypeBuilder(Type type) => type.IsConstructedGenericType &&
+            (type.GetGenericTypeDefinition() is TypeBuilderImpl ||
+             ContainsTypeBuilder(type.GetGenericArguments()));
 
-        private static bool ContainsNotBakedTypeBuilder(Type[] genericArguments)
+        internal static bool ContainsTypeBuilder(Type[] genericArguments)
         {
             foreach (Type type in genericArguments)
             {
-                if (type is TypeBuilderImpl tb && tb._handle == default)
+                if (type is TypeBuilderImpl || type is GenericTypeParameterBuilderImpl)
                 {
                     return true;
                 }
 
-                if (IsConstructedFromNotBakedTypeBuilder(type))
+                if (IsConstructedFromTypeBuilder(type))
                 {
                     return true;
                 }
@@ -923,7 +925,7 @@ internal EntityHandle TryGetTypeHandle(Type type)
                 return eb._typeBuilder._handle;
             }
 
-            if (IsConstructedFromNotBakedTypeBuilder(type))
+            if (IsConstructedFromTypeBuilder(type))
             {
                 return default;
             }
@@ -952,8 +954,8 @@ internal EntityHandle TryGetMethodHandle(MethodInfo method)
                 return mb._handle;
             }
 
-            if (IsConstructedMethodFromNotBakedMethodBuilder(method) ||
-                IsArrayMethodFromNotBakedTypeBuilder(method))
+            if (IsConstructedFromMethodBuilderOrTypeBuilder(method) ||
+                IsArrayMethodTypeIsTypeBuilder(method))
             {
                 return default;
             }
@@ -961,11 +963,11 @@ internal EntityHandle TryGetMethodHandle(MethodInfo method)
             return GetHandleForMember(method);
         }
 
-        private static bool IsArrayMethodFromNotBakedTypeBuilder(MethodInfo method) => method is ArrayMethod arrayMethod &&
-            arrayMethod.DeclaringType!.GetElementType() is TypeBuilderImpl tb && tb._handle == default;
+        private static bool IsArrayMethodTypeIsTypeBuilder(MethodInfo method) => method is ArrayMethod arrayMethod &&
+            arrayMethod.DeclaringType!.GetElementType() is TypeBuilderImpl;
 
-        private static bool IsConstructedMethodFromNotBakedMethodBuilder(MethodInfo method) =>
-            method.IsConstructedGenericMethod && method.GetGenericMethodDefinition() is MethodBuilderImpl mb && mb._handle == default;
+        private static bool IsConstructedFromMethodBuilderOrTypeBuilder(MethodInfo method) => method.IsConstructedGenericMethod &&
+            (method.GetGenericMethodDefinition() is MethodBuilderImpl || ContainsTypeBuilder(method.GetGenericArguments()));
 
         internal EntityHandle TryGetMethodHandle(MethodInfo method, Type[] optionalParameterTypes)
         {
@@ -980,7 +982,7 @@ internal EntityHandle TryGetMethodHandle(MethodInfo method, Type[] optionalParam
                 return mb._handle;
             }
 
-            if (IsConstructedMethodFromNotBakedMethodBuilder(method))
+            if (IsConstructedFromMethodBuilderOrTypeBuilder(method))
             {
                 return default;
             }
diff --git a/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/AssemblyBuilderImpl.cs b/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/PersistedAssemblyBuilder.cs
similarity index 50%
rename from src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/AssemblyBuilderImpl.cs
rename to src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/PersistedAssemblyBuilder.cs
index b52bec9f11c4..12fbf4704b7b 100644
--- a/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/AssemblyBuilderImpl.cs
+++ b/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/PersistedAssemblyBuilder.cs
@@ -9,18 +9,34 @@
 
 namespace System.Reflection.Emit
 {
-    internal sealed class AssemblyBuilderImpl : AssemblyBuilder
+    /// <summary>
+    /// AssemblyBuilder implementation that can persist assembly to a disk or stream.
+    /// </summary>
+    public sealed class PersistedAssemblyBuilder : AssemblyBuilder
     {
         private readonly AssemblyName _assemblyName;
         private readonly Assembly _coreAssembly;
         private readonly MetadataBuilder _metadataBuilder;
         private ModuleBuilderImpl? _module;
-        private bool _previouslySaved;
+        private bool _isMetadataPopulated;
 
         internal List<CustomAttributeWrapper>? _customAttributes;
 
-        internal AssemblyBuilderImpl(AssemblyName name, Assembly coreAssembly, IEnumerable<CustomAttributeBuilder>? assemblyAttributes = null)
+        /// <summary>
+        /// Creates a <see cref="PersistedAssemblyBuilder"/> instance that can be saved to a file or stream.
+        /// </summary>
+        /// <param name="name">The name of the assembly.</param>
+        /// <param name="coreAssembly">The assembly that denotes the "system assembly" that houses the well-known types such as <see cref="object"/></param>
+        /// <param name="assemblyAttributes">A collection that contains the attributes of the assembly.</param>
+        /// <returns>An <see cref="PersistedAssemblyBuilder"/> that can be persisted.</returns>
+        /// <exception cref="ArgumentNullException">The <paramref name="name"/> or <paramref name="name.Name"/> or <paramref name="coreAssembly"/> is null.</exception>
+        /// <remarks>Currently the persisted assembly doesn't support running, need to save it and load back to run.</remarks>
+        public PersistedAssemblyBuilder(AssemblyName name, Assembly coreAssembly, IEnumerable<CustomAttributeBuilder>? assemblyAttributes = null)
         {
+            ArgumentNullException.ThrowIfNull(name);
+            ArgumentException.ThrowIfNullOrEmpty(name.Name, "AssemblyName.Name");
+            ArgumentNullException.ThrowIfNull(coreAssembly);
+
             _assemblyName = (AssemblyName)name.Clone();
             _coreAssembly = coreAssembly;
             _metadataBuilder = new MetadataBuilder();
@@ -54,20 +70,70 @@ private void WritePEImage(Stream peStream, BlobBuilder ilBuilder, BlobBuilder fi
             peBlob.WriteContentTo(peStream);
         }
 
-        protected override void SaveCore(Stream stream)
+        /// <summary>
+        /// Serializes the assembly to <see cref="Stream"/>.
+        /// </summary>
+        /// <param name="stream">The <see cref="Stream"/> to which the assembly serialized.</param>
+        /// <exception cref="ArgumentNullException"><paramref name="stream"/> is null.</exception>
+        /// <exception cref="InvalidOperationException">A module not defined for the assembly.</exception>
+        /// <exception cref="InvalidOperationException">The metadata already populated for the assembly before.</exception>
+        public void Save(Stream stream) => SaveInternal(stream);
+
+        /// <summary>
+        /// Saves the assembly to disk.
+        /// </summary>
+        /// <param name="assemblyFileName">The file name of the assembly.</param>
+        /// <exception cref="ArgumentNullException"><paramref name="assemblyFileName"/> is null.</exception>
+        /// <exception cref="InvalidOperationException">A module not defined for the assembly.</exception>
+        /// <exception cref="InvalidOperationException">The metadata already populated for the assembly before.</exception>
+        public void Save(string assemblyFileName)
+        {
+            ArgumentNullException.ThrowIfNull(assemblyFileName);
+
+            using var peStream = new FileStream(assemblyFileName, FileMode.Create, FileAccess.Write);
+            SaveInternal(peStream);
+        }
+
+        private void SaveInternal(Stream stream)
         {
             ArgumentNullException.ThrowIfNull(stream);
 
+            PopulateAssemblyMetadata(out BlobBuilder ilStream, out BlobBuilder fieldData);
+            WritePEImage(stream, ilStream, fieldData);
+        }
+
+
+        /// <summary>
+        /// Generates the metadata for the <see cref="PersistedAssemblyBuilder"/>.
+        /// </summary>
+        /// <param name="ilStream">Outputs <see cref="BlobBuilder"/> bytes that includes all method's IL (body) emitted.</param>
+        /// <param name="mappedFieldData">Outputs <see cref="BlobBuilder"/> bytes that includes all field RVA data defined in the assembly.</param>
+        /// <returns>A <see cref="MetadataBuilder"/> that includes all members defined in the Assembly.</returns>
+        /// <exception cref="InvalidOperationException">A module not defined for the assembly.</exception>
+        /// <exception cref="InvalidOperationException">The metadata already populated for the assembly before.</exception>
+        [CLSCompliant(false)]
+        public MetadataBuilder GenerateMetadata(out BlobBuilder ilStream, out BlobBuilder mappedFieldData)
+        {
+            PopulateAssemblyMetadata(out ilStream, out mappedFieldData);
+
+            return _metadataBuilder;
+        }
+
+        private void PopulateAssemblyMetadata(out BlobBuilder ilStream, out BlobBuilder fieldData)
+        {
             if (_module == null)
             {
                 throw new InvalidOperationException(SR.InvalidOperation_AModuleRequired);
             }
 
-            if (_previouslySaved) // Cannot save an assembly multiple times. This is consistent with Save() in .Net Framework.
+            if (_isMetadataPopulated) // Cannot populate assembly metadata multiple times. This is consistent with Save() in .Net Framework.
             {
-                throw new InvalidOperationException(SR.InvalidOperation_CannotSaveMultipleTimes);
+                throw new InvalidOperationException(SR.InvalidOperation_CannotPopulateMultipleTimes);
             }
 
+            ilStream = new BlobBuilder();
+            fieldData = new BlobBuilder();
+
             // Add assembly metadata
             AssemblyDefinitionHandle assemblyHandle = _metadataBuilder.AddAssembly(
                _metadataBuilder.GetOrAddString(value: _assemblyName.Name!),
@@ -79,15 +145,10 @@ protected override void SaveCore(Stream stream)
                hashAlgorithm: (AssemblyHashAlgorithm)_assemblyName.HashAlgorithm
 #pragma warning restore SYSLIB0037
                );
-            _module.WriteCustomAttributes(_customAttributes, assemblyHandle);
 
-            var ilBuilder = new BlobBuilder();
-            var fieldDataBuilder = new BlobBuilder();
-            MethodBodyStreamEncoder methodBodyEncoder = new MethodBodyStreamEncoder(ilBuilder);
-            _module.AppendMetadata(methodBodyEncoder, fieldDataBuilder);
-
-            WritePEImage(stream, ilBuilder, fieldDataBuilder);
-            _previouslySaved = true;
+            _module.WriteCustomAttributes(_customAttributes, assemblyHandle);
+            _module.AppendMetadata(new MethodBodyStreamEncoder(ilStream), fieldData);
+            _isMetadataPopulated = true;
         }
 
         private static AssemblyFlags AddContentType(AssemblyFlags flags, AssemblyContentType contentType)
diff --git a/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/TypeBuilderImpl.cs b/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/TypeBuilderImpl.cs
index 4aea4ab3ee2c..a698d744212a 100644
--- a/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/TypeBuilderImpl.cs
+++ b/src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/TypeBuilderImpl.cs
@@ -211,9 +211,11 @@ private ConstructorBuilderImpl DefineDefaultConstructorInternal(MethodAttributes
         {
             // Get the parent class's default constructor and add it to the IL
             ConstructorInfo? con;
-            if (_typeParent!.IsConstructedGenericType && _typeParent.GetGenericTypeDefinition() is TypeBuilderImpl typeBuilder)
+            if (_typeParent!.IsConstructedGenericType &&
+                (_typeParent.GetGenericTypeDefinition() is TypeBuilderImpl || ModuleBuilderImpl.ContainsTypeBuilder(_typeParent.GetGenericArguments())))
             {
-                con = GetConstructor(_typeParent, typeBuilder.GetConstructor(
+                // When TypeBuilder involved need to construct the parent constructor using TypeBuilder.GetConstructor() static method
+                con = GetConstructor(_typeParent, _typeParent.GetGenericTypeDefinition().GetConstructor(
                     BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic, null, EmptyTypes, null)!);
             }
             else
diff --git a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveAssemblyBuilder.cs b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveAssemblyBuilderTests.cs
similarity index 85%
rename from src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveAssemblyBuilder.cs
rename to src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveAssemblyBuilderTests.cs
index 5dbabe683e07..45f926b1a67b 100644
--- a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveAssemblyBuilder.cs
+++ b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveAssemblyBuilderTests.cs
@@ -5,14 +5,19 @@
 using System.Globalization;
 using System.IO;
 using System.Linq;
+using System.Reflection.Metadata;
+using System.Reflection.Metadata.Ecma335;
+using System.Reflection.PortableExecutable;
 using System.Runtime.InteropServices;
+using System.Runtime.Loader;
 using Xunit;
 
 namespace System.Reflection.Emit.Tests
 {
     [ConditionalClass(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))]
-    public class AssemblySaveAssemblyBuilder
+    public class AssemblySaveAssemblyBuilderTests
     {
+        private readonly AssemblyName _assemblyName = new AssemblyName("MyAssembly");
         public class Outer
         {
             public class Inner
@@ -24,6 +29,82 @@ void DoNothing () { }
             }
         }
 
+        [Fact]
+        public void PersistedAssemblyBuilder_ConstructorValidations()
+        {
+            Assert.Throws<ArgumentNullException>("name", () => new PersistedAssemblyBuilder(null, typeof(object).Assembly));
+            Assert.Throws<ArgumentNullException>("coreAssembly", () => new PersistedAssemblyBuilder(_assemblyName, null));
+            Assert.Throws<ArgumentNullException>("AssemblyName.Name", () => AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName()));
+        }
+
+        [Fact]
+        public void PersistedAssemblyBuilder_SaveValidations()
+        {
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(_assemblyName);
+
+            Assert.Throws<ArgumentNullException>("assemblyFileName", () => ab.Save(assemblyFileName: null));
+            Assert.Throws<ArgumentNullException>("stream", () => ab.Save(stream: null));
+            Assert.Throws<InvalidOperationException>(() => ab.Save(assemblyFileName: "File")); // no module defined
+        }
+
+        [Fact]
+        public void PersistedAssemblyBuilder_GenerateMetadataValidation()
+        {
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(_assemblyName);
+            Assert.Throws<InvalidOperationException>(() => ab.GenerateMetadata(out var _, out var _)); // no module defined
+            ab.DefineDynamicModule("MyModule");
+            MetadataBuilder metadata = ab.GenerateMetadata(out var ilStream, out var mappedFieldData);
+            Assert.NotNull(metadata);
+            Assert.NotNull(ilStream);
+            Assert.NotNull(mappedFieldData);
+            Assert.Throws<InvalidOperationException>(() => ab.GenerateMetadata(out var _, out var _)); // cannot re-generate metadata
+        }
+
+        [Fact]
+        public void PersistedAssemblyBuilder_GenerateMetadataWithEntryPoint()
+        {
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(_assemblyName);
+            TypeBuilder tb = ab.DefineDynamicModule("MyModule").DefineType("MyType", TypeAttributes.Public | TypeAttributes.Class);
+            MethodBuilder mb1 = tb.DefineMethod("SumMethod", MethodAttributes.Public | MethodAttributes.Static, typeof(int), [typeof(int), typeof(int)]);
+            ILGenerator il = mb1.GetILGenerator();
+            il.Emit(OpCodes.Ldarg_0);
+            il.Emit(OpCodes.Ldarg_1);
+            il.Emit(OpCodes.Add);
+            il.Emit(OpCodes.Ret);
+            MethodBuilder entryPoint = tb.DefineMethod("Main", MethodAttributes.HideBySig | MethodAttributes.Public | MethodAttributes.Static, typeof(int), null);
+            ILGenerator il2 = entryPoint.GetILGenerator();
+            il2.Emit(OpCodes.Ldc_I4_S, 10);
+            il2.Emit(OpCodes.Ldc_I4_2);
+            il2.Emit(OpCodes.Call, mb1);
+            il2.Emit(OpCodes.Ret);
+            tb.CreateType();
+
+            MetadataBuilder metadataBuilder = ab.GenerateMetadata(out BlobBuilder ilStream, out BlobBuilder fieldData);
+            PEHeaderBuilder peHeaderBuilder = new PEHeaderBuilder(
+                            imageCharacteristics: Characteristics.ExecutableImage,
+                            subsystem: Subsystem.WindowsCui);
+
+            ManagedPEBuilder peBuilder = new ManagedPEBuilder(
+                            header: peHeaderBuilder,
+                            metadataRootBuilder: new MetadataRootBuilder(metadataBuilder),
+                            ilStream: ilStream,
+                            mappedFieldData: fieldData,
+                            entryPoint: MetadataTokens.MethodDefinitionHandle(entryPoint.MetadataToken));
+
+            BlobBuilder peBlob = new BlobBuilder();
+            peBuilder.Serialize(peBlob);
+
+            // in case saving to a file:
+            using var stream = new MemoryStream();
+            peBlob.WriteContentTo(stream);
+
+            stream.Seek(0, SeekOrigin.Begin);
+            Assembly assembly = AssemblyLoadContext.Default.LoadFromStream(stream);
+            MethodInfo method = assembly.EntryPoint;
+            Assert.Equal("Main", method.Name);
+            Assert.Equal(12, method.Invoke(null, null));
+        }
+
         [Fact]
         public void AssemblyWithDifferentTypes()
         {
@@ -34,7 +115,7 @@ public void AssemblyWithDifferentTypes()
                 aName.CultureInfo = new CultureInfo("en");
                 aName.Flags = AssemblyNameFlags.Retargetable;
 
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(aName);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(aName);
 
                 ab.SetCustomAttribute(new CustomAttributeBuilder(typeof(AssemblyDelaySignAttribute).GetConstructor([typeof(bool)]), [true]));
 
diff --git a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveConstructorBuilderTests.cs b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveConstructorBuilderTests.cs
similarity index 94%
rename from src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveConstructorBuilderTests.cs
rename to src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveConstructorBuilderTests.cs
index 618b01b2a9ae..34fe5c2aec32 100644
--- a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveConstructorBuilderTests.cs
+++ b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveConstructorBuilderTests.cs
@@ -15,7 +15,7 @@ public void DefineConstructorsTest()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 ConstructorBuilder constructor = type.DefineDefaultConstructor(MethodAttributes.Public);
                 ConstructorBuilder constructor2 = type.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, [typeof(int)]);
                 constructor2.DefineParameter(1, ParameterAttributes.None, "parameter1");
@@ -51,7 +51,7 @@ public void DefineConstructorsTest()
         [Fact]
         public void DefineDefaultConstructor_WithTypeBuilderParent()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             type.CreateType();
             TypeBuilder child = ab.GetDynamicModule("MyModule").DefineType("ChildType", TypeAttributes.Public | TypeAttributes.Class);
             child.SetParent(type);
@@ -69,7 +69,7 @@ public void DefineDefaultConstructor_TypesWithGenericParents()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 type.DefineGenericParameters("T");
                 ConstructorBuilder constructor = type.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, Type.EmptyTypes);
                 FieldBuilder field = type.DefineField("TestField", typeof(bool), FieldAttributes.Public | FieldAttributes.Static);
@@ -112,7 +112,7 @@ public void DefineDefaultConstructor_TypesWithGenericParents()
         [Fact]
         public void DefineDefaultConstructor_Interface_ThrowsInvalidOperationException()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
             TypeBuilder type = ab.DefineDynamicModule("MyModule").DefineType("MyType", TypeAttributes.Public | TypeAttributes.Interface | TypeAttributes.Abstract);
             Assert.Throws<InvalidOperationException>(() => type.DefineDefaultConstructor(MethodAttributes.Public));
         }
@@ -120,7 +120,7 @@ public void DefineDefaultConstructor_Interface_ThrowsInvalidOperationException()
         [Fact]
         public void DefineDefaultConstructor_ThrowsNotSupportedException_IfParentNotCreated()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             TypeBuilder  child = ab.GetDynamicModule("MyModule").DefineType("MyType", TypeAttributes.Public);
             child.SetParent(type);
             Assert.Throws<NotSupportedException>(() => child.DefineDefaultConstructor(MethodAttributes.Public));
@@ -136,7 +136,7 @@ public void DefineDefaultConstructor_StaticVirtual_ThrowsArgumentException()
         [Fact]
         public void DefineDefaultConstructor_ParentNoDefaultConstructor_ThrowsNotSupportedException()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             FieldBuilder field = type.DefineField("TestField", typeof(int), FieldAttributes.Family);
 
             ConstructorBuilder constructor = type.DefineConstructor(MethodAttributes.Public, CallingConventions.HasThis, new[] { typeof(int) });
diff --git a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveCustomAttributeTests.cs b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveCustomAttributeTests.cs
similarity index 98%
rename from src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveCustomAttributeTests.cs
rename to src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveCustomAttributeTests.cs
index 35f04f3776cc..1fd2d5318836 100644
--- a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveCustomAttributeTests.cs
+++ b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveCustomAttributeTests.cs
@@ -125,7 +125,7 @@ private static void WriteAssemblyToDisk(AssemblyName assemblyName, Type[] types,
             List<CustomAttributeBuilder>? moduleAttributes = null, List<CustomAttributeBuilder>? typeAttributes = null,
             List<CustomAttributeBuilder>? methodAttributes = null, List<CustomAttributeBuilder>? fieldAttributes = null)
         {
-            AssemblyBuilder assemblyBuilder = AssemblySaveTools.PopulateAssemblyBuilder(assemblyName, assemblyAttributes);
+            PersistedAssemblyBuilder assemblyBuilder = AssemblySaveTools.PopulateAssemblyBuilder(assemblyName, assemblyAttributes);
             ModuleBuilder mb = assemblyBuilder.DefineDynamicModule(assemblyName.Name);
             PopulateMembersForModule(mb, types, moduleAttributes, typeAttributes, methodAttributes, fieldAttributes);
             assemblyBuilder.Save(fileLocation);
@@ -194,7 +194,7 @@ public void CreateStructWithPseudoCustomAttributesTest()
                                                               new CustomAttributeBuilder(typeof(SpecialNameAttribute).GetConstructor(Type.EmptyTypes), new object[] { })
                                                             };
 
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(PopulateAssemblyName());
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(PopulateAssemblyName());
                 TypeBuilder tb = ab.DefineDynamicModule("Module").DefineType(type.FullName, type.Attributes, type.BaseType);
                 DefineFieldsAndSetAttributes(fieldAttributes.ToList(), type.GetFields(), tb);
                 typeAttributes.ForEach(tb.SetCustomAttribute);
@@ -286,7 +286,7 @@ public void InterfacesWithPseudoCustomAttributes()
                         new CustomAttributeBuilder(marshalAsEnumCtor, new object[] { UnmanagedType.CustomMarshaler },
                                 new FieldInfo[] { typeof(MarshalAsAttribute).GetField("MarshalType")}, new object[] { typeof(EmptyTestClass).AssemblyQualifiedName })};
 
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(PopulateAssemblyName());
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(PopulateAssemblyName());
                 TypeBuilder tb = ab.DefineDynamicModule("Module").DefineType(type.FullName, type.Attributes);
                 typeAttributes.ForEach(tb.SetCustomAttribute);
                 DefineMethodsAndSetAttributes(methodAttributes, tb, type.GetMethods(), parameterAttributes);
@@ -439,7 +439,7 @@ public void MarshalAsPseudoCustomAttributesTest(CustomAttributeBuilder attribute
             using (TempFile file = TempFile.Create())
             {
                 Type type = typeof(StructWithFields);
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(PopulateAssemblyName());
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(PopulateAssemblyName());
                 TypeBuilder tb = ab.DefineDynamicModule("Module").DefineType(type.FullName, type.Attributes, type.BaseType);
                 FieldInfo stringField = type.GetFields()[1];
                 FieldBuilder fb = tb.DefineField(stringField.Name, stringField.FieldType, stringField.Attributes);
@@ -477,7 +477,7 @@ public void EnumBuilderSetCustomAttributesTest()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(PopulateAssemblyName());
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(PopulateAssemblyName());
                 EnumBuilder enumBuilder = ab.DefineDynamicModule("Module").DefineEnum("TestEnum", TypeAttributes.Public, typeof(int));
 
                 ConstructorInfo attributeConstructor = typeof(BoolAttribute).GetConstructor(new Type[] { typeof(bool) });
diff --git a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveEnumBuilderTests.cs b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveEnumBuilderTests.cs
similarity index 95%
rename from src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveEnumBuilderTests.cs
rename to src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveEnumBuilderTests.cs
index 2711ba48db97..29a94bacd15d 100644
--- a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveEnumBuilderTests.cs
+++ b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveEnumBuilderTests.cs
@@ -64,7 +64,7 @@ public void DefineLiteral(Type underlyingType, object literalValue)
         {
             using (TempFile file = TempFile.Create())
             {
-                EnumBuilder enumBuilder = CreateAssemblyAndDefineEnum(out AssemblyBuilder assemblyBuilder, out TypeBuilder type, underlyingType);
+                EnumBuilder enumBuilder = CreateAssemblyAndDefineEnum(out PersistedAssemblyBuilder assemblyBuilder, out TypeBuilder type, underlyingType);
                 FieldBuilder literal = enumBuilder.DefineLiteral("FieldOne", literalValue);
                 enumBuilder.CreateTypeInfo();
                 type.CreateTypeInfo();
@@ -97,7 +97,7 @@ public void SaveArrayTypeSignature(int rank, string name)
         {
             using (TempFile file = TempFile.Create())
             {
-                EnumBuilder enumBuilder = CreateAssemblyAndDefineEnum(out AssemblyBuilder ab, out TypeBuilder tb);
+                EnumBuilder enumBuilder = CreateAssemblyAndDefineEnum(out PersistedAssemblyBuilder ab, out TypeBuilder tb);
                 Type arrayType = rank == 0 ? enumBuilder.MakeArrayType() : enumBuilder.MakeArrayType(rank);
                 MethodBuilder mb = tb.DefineMethod("TestMethod", MethodAttributes.Public);
                 mb.SetReturnType(arrayType);
@@ -118,7 +118,7 @@ public void SaveArrayTypeSignature(int rank, string name)
             }
         }
 
-        private EnumBuilder CreateAssemblyAndDefineEnum(out AssemblyBuilder assemblyBuilder,
+        private EnumBuilder CreateAssemblyAndDefineEnum(out PersistedAssemblyBuilder assemblyBuilder,
             out TypeBuilder type, Type? underlyingType = null)
         {
             assemblyBuilder = AssemblySaveTools.PopulateAssemblyBuilder(PopulateAssemblyName());
@@ -140,7 +140,7 @@ public void SaveByRefTypeSignature()
         {
             using (TempFile file = TempFile.Create())
             {
-                EnumBuilder eb = CreateAssemblyAndDefineEnum(out AssemblyBuilder assemblyBuilder, out TypeBuilder tb);
+                EnumBuilder eb = CreateAssemblyAndDefineEnum(out PersistedAssemblyBuilder assemblyBuilder, out TypeBuilder tb);
                 Type byrefType = eb.MakeByRefType();
                 MethodBuilder mb = tb.DefineMethod("TestMethod", MethodAttributes.Public);
                 mb.SetReturnType(byrefType);
@@ -173,7 +173,7 @@ public void SavePointerTypeSignature()
         {
             using (TempFile file = TempFile.Create())
             {
-                EnumBuilder eb = CreateAssemblyAndDefineEnum(out AssemblyBuilder assemblyBuilder, out TypeBuilder tb);
+                EnumBuilder eb = CreateAssemblyAndDefineEnum(out PersistedAssemblyBuilder assemblyBuilder, out TypeBuilder tb);
                 Type pointerType = eb.MakePointerType();
                 MethodBuilder mb = tb.DefineMethod("TestMethod", MethodAttributes.Public);
                 mb.SetReturnType(pointerType);
@@ -213,7 +213,7 @@ public void EnumTypeField_DefaultValueShouldMatchUnderlyingType()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder assemblyBuilder = AssemblySaveTools.PopulateAssemblyBuilder(PopulateAssemblyName());
+                PersistedAssemblyBuilder assemblyBuilder = AssemblySaveTools.PopulateAssemblyBuilder(PopulateAssemblyName());
                 ModuleBuilder mb = assemblyBuilder.DefineDynamicModule("My Module");
                 TypeBuilder tb = mb.DefineType("TestType", TypeAttributes.Class | TypeAttributes.Public);
                 EnumBuilder eb =  mb.DefineEnum("TestEnum", TypeAttributes.Public, typeof(int));
diff --git a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveEventBuilderTests.cs b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveEventBuilderTests.cs
similarity index 57%
rename from src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveEventBuilderTests.cs
rename to src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveEventBuilderTests.cs
index d7c03fa49fc2..f4692afe1949 100644
--- a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveEventBuilderTests.cs
+++ b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveEventBuilderTests.cs
@@ -4,6 +4,7 @@
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
+using System.Threading;
 using Xunit;
 
 namespace System.Reflection.Emit.Tests
@@ -16,7 +17,7 @@ public void DefineEventAndItsAccessors()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 EventBuilder eventType = type.DefineEvent("TestEvent", EventAttributes.SpecialName, typeof(int));
                 MethodBuilder addMethod = type.DefineMethod("AddMethod", MethodAttributes.Public | MethodAttributes.SpecialName);
                 MethodBuilder addMethod2 = type.DefineMethod("AddMethod2", MethodAttributes.Public | MethodAttributes.HideBySig, typeof(int), Type.EmptyTypes);
@@ -95,5 +96,68 @@ public void Set_WhenTypeAlreadyCreated_ThrowsInvalidOperationException()
             Assert.Throws<InvalidOperationException>(() => eventBuilder.AddOtherMethod(method));
             Assert.Throws<InvalidOperationException>(() => eventBuilder.SetCustomAttribute(customAttrBuilder));
         }
+
+        [Fact]
+        public void ReferenceEventInIL()
+        {
+            using (TempFile file = TempFile.Create())
+            {
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                TypeBuilder delegateType = ab.GetDynamicModule("MyModule").DefineType("OnMissingString", TypeAttributes.Public | TypeAttributes.Sealed, typeof(MulticastDelegate));
+                delegateType.DefineMethod("Invoke", MethodAttributes.Public | MethodAttributes.HideBySig | MethodAttributes.NewSlot | MethodAttributes.Virtual,
+                    typeof(void), [typeof(string)]).SetImplementationFlags(MethodImplAttributes.Runtime);
+                delegateType.DefineMethod("BeginInvoke", MethodAttributes.Public | MethodAttributes.HideBySig | MethodAttributes.NewSlot | MethodAttributes.Virtual,
+                    typeof(IAsyncResult), [typeof(string), typeof(AsyncCallback), typeof(object)]).SetImplementationFlags(MethodImplAttributes.Runtime);
+                delegateType.DefineMethod("EndInvoke", MethodAttributes.Public | MethodAttributes.HideBySig | MethodAttributes.NewSlot | MethodAttributes.Virtual,
+                    typeof(void), [typeof(IAsyncResult)]).SetImplementationFlags(MethodImplAttributes.Runtime);
+                delegateType.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, [typeof(object), typeof(IntPtr)]).
+                    SetImplementationFlags(MethodImplAttributes.Runtime);
+                MethodInfo combineMethod = typeof(Delegate).GetMethod("Combine", [typeof(Delegate), typeof(Delegate)]);
+                MethodInfo interlockedGenericMethod = typeof(Interlocked).GetMethods(BindingFlags.Public | BindingFlags.Static).
+                    Where(m => m.Name == "CompareExchange" && m.IsGenericMethodDefinition && m.GetGenericArguments().Length == 1).First().MakeGenericMethod(delegateType);
+                EventBuilder eventBuilder = type.DefineEvent("MissingString", EventAttributes.SpecialName, delegateType);
+                FieldBuilder field = type.DefineField("MissingString", delegateType, FieldAttributes.Private);
+                MethodBuilder addMethod = type.DefineMethod("add_MissingString", MethodAttributes.Public | MethodAttributes.SpecialName | MethodAttributes.HideBySig, null, [delegateType]);
+                ILGenerator addIL = addMethod.GetILGenerator();
+                addIL.DeclareLocal(delegateType);
+                addIL.DeclareLocal(delegateType);
+                addIL.DeclareLocal(delegateType);
+                Label loop = addIL.DefineLabel();
+                addIL.Emit(OpCodes.Ldarg_0);
+                addIL.Emit(OpCodes.Ldfld, field);
+                addIL.Emit(OpCodes.Stloc_0);
+                addIL.MarkLabel(loop);
+                addIL.Emit(OpCodes.Ldloc_0);
+                addIL.Emit(OpCodes.Stloc_1);
+                addIL.Emit(OpCodes.Ldloc_1);
+                addIL.Emit(OpCodes.Ldarg_1);
+                addIL.Emit(OpCodes.Call, combineMethod);
+                addIL.Emit(OpCodes.Castclass, delegateType);
+                addIL.Emit(OpCodes.Stloc_2);
+                addIL.Emit(OpCodes.Ldarg_0);
+                addIL.Emit(OpCodes.Ldflda, field);
+                addIL.Emit(OpCodes.Ldloc_2);
+                addIL.Emit(OpCodes.Ldloc_1);
+                addIL.Emit(OpCodes.Call, interlockedGenericMethod);
+                addIL.Emit(OpCodes.Stloc_0);
+                addIL.Emit(OpCodes.Ldloc_0);
+                addIL.Emit(OpCodes.Ldloc_1);
+                addIL.Emit(OpCodes.Bne_Un_S, loop);
+                addIL.Emit(OpCodes.Ret);
+                eventBuilder.SetAddOnMethod(addMethod);
+
+                delegateType.CreateType();
+                type.CreateType();
+                ab.Save(file.Path);
+
+                using (MetadataLoadContext mlc = new MetadataLoadContext(new CoreMetadataAssemblyResolver()))
+                {
+                    Assembly assemblyFromDisk = mlc.LoadFromAssemblyPath(file.Path);
+                    Type typeFromDisk = assemblyFromDisk.Modules.First().GetType("MyType");
+                    EventInfo eventFromDisk = typeFromDisk.GetEvent("MissingString");
+                    Assert.Equal(addMethod.Name, eventFromDisk.AddMethod.Name);
+                    Assert.Equal(delegateType.FullName, eventFromDisk.EventHandlerType.FullName);                }
+            }
+        }
     }
 }
diff --git a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveILGeneratorTests.cs b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveILGeneratorTests.cs
similarity index 95%
rename from src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveILGeneratorTests.cs
rename to src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveILGeneratorTests.cs
index c2fd512b85e8..faded2adaa8b 100644
--- a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveILGeneratorTests.cs
+++ b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveILGeneratorTests.cs
@@ -17,7 +17,7 @@ public void MethodWithEmptyBody()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 MethodBuilder methodBuilder = type.DefineMethod("EmptyMethod", MethodAttributes.Public, typeof(void), [typeof(Version)]);
                 ILGenerator il = methodBuilder.GetILGenerator();
                 il.Emit(OpCodes.Ret);
@@ -46,7 +46,7 @@ public void MethodReturning_Int(int size)
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 MethodBuilder method = type.DefineMethod("TestMethod", MethodAttributes.Public | MethodAttributes.Static, typeof(int), Type.EmptyTypes);
 
                 ILGenerator ilGenerator = method.GetILGenerator(size);
@@ -77,7 +77,7 @@ public void TypeWithTwoMethod_ReferenceMethodArguments(int multiplier)
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 MethodBuilder multiplyMethod = type.DefineMethod("MultiplyMethod", MethodAttributes.Public | MethodAttributes.Static, typeof(int), [typeof(int)]);
                 multiplyMethod.DefineParameter(1, ParameterAttributes.None, "myParam");
                 MethodBuilder addMethod = type.DefineMethod("AddMethod", MethodAttributes.Public | MethodAttributes.Static, typeof(int), [typeof(int), typeof(int)]);
@@ -123,7 +123,7 @@ public void MultipleTypesWithMultipleMethods()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 MethodBuilder multiplyMethod = type.DefineMethod("MultiplyMethod", MethodAttributes.Public, typeof(short), [typeof(short)]);
                 MethodBuilder addMethod = type.DefineMethod("AddMethod", MethodAttributes.Public | MethodAttributes.Static, typeof(double), [typeof(double)]);
 
@@ -187,7 +187,7 @@ public void MultipleTypesWithMultipleMethods()
         [Fact]
         public void ILOffset_Test()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             MethodBuilder method = type.DefineMethod("Method1", MethodAttributes.Public | MethodAttributes.Static, typeof(Type), Type.EmptyTypes);
             ILGenerator ilGenerator = method.GetILGenerator();
 
@@ -201,7 +201,7 @@ public void ILMaxStack_Test()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 MethodBuilder method1 = type.DefineMethod("Method1", MethodAttributes.Public, typeof(long), [typeof(int), typeof(long), typeof(short), typeof(byte)]);
                 ILGenerator il1 = method1.GetILGenerator();
 
@@ -275,7 +275,7 @@ public void Label_ConditionalBranching()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 MethodBuilder methodBuilder = type.DefineMethod("Method1", MethodAttributes.Public, typeof(int), [typeof(int), typeof(int)]);
                 ILGenerator il = methodBuilder.GetILGenerator();
                 Label failed = il.DefineLabel();
@@ -329,7 +329,7 @@ public void Label_SwitchCase()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 MethodBuilder methodBuilder = type.DefineMethod("Method1", MethodAttributes.Public, typeof(string), [typeof(int)]);
                 ILGenerator il = methodBuilder.GetILGenerator();
                 Label defaultCase = il.DefineLabel();
@@ -397,7 +397,7 @@ public void LocalBuilderMultipleLocalsUsage()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 MethodBuilder methodBuilder = type.DefineMethod("Method1", MethodAttributes.Public | MethodAttributes.Static, typeof(int), [typeof(int), typeof(string)]);
                 ILGenerator il = methodBuilder.GetILGenerator();
                 LocalBuilder intLocal = il.DeclareLocal(typeof(int));
@@ -481,7 +481,7 @@ public void LocalBuilderMultipleTypesWithMultipleMethodsWithLocals()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 MethodBuilder methodBuilder = type.DefineMethod("Method1", MethodAttributes.Public | MethodAttributes.Static, typeof(string), [typeof(int), typeof(string)]);
                 ILGenerator il = methodBuilder.GetILGenerator();
                 LocalBuilder intLocal = il.DeclareLocal(typeof(int));
@@ -569,7 +569,7 @@ public void LocalBuilderMultipleTypesWithMultipleMethodsWithLocals()
         [Fact]
         public void LocalBuilderExceptions()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             ILGenerator il = type.DefineMethod("Method1", MethodAttributes.Public).GetILGenerator();
             ILGenerator anotherIL = type.DefineMethod("AnotherMethod", MethodAttributes.Public).GetILGenerator();
             LocalBuilder stringLocal = il.DeclareLocal(typeof(string));
@@ -585,7 +585,7 @@ public void ReferenceFieldInIL()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder methodBuilder = tb.DefineMethod("Method1", MethodAttributes.Public, typeof(int), [typeof(int)]);
                 FieldBuilder fbNumber = tb.DefineField("_number", typeof(int), FieldAttributes.Private);
                 Assert.Equal(0, fbNumber.MetadataToken);
@@ -621,7 +621,7 @@ public void ReferenceFieldAndMethodsInIL()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder methodMain = tb.DefineMethod("Main", MethodAttributes.Public, typeof(void), [typeof(int)]);
                 FieldBuilder field = tb.DefineField("_field", typeof(int), FieldAttributes.Private);
                 MethodInfo writeLineString = typeof(Console).GetMethod("WriteLine", [typeof(string)]);
@@ -699,7 +699,7 @@ public void ReferenceConstructedGenericMethod()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 ConstructorBuilder ctor = type.DefineDefaultConstructor(MethodAttributes.Public);
                 MethodBuilder genericMethod = type.DefineMethod("GM", MethodAttributes.Public | MethodAttributes.Static);
                 GenericTypeParameterBuilder[] methodParams = genericMethod.DefineGenericParameters("U");
@@ -738,7 +738,7 @@ public void ReferenceConstructedGenericMethodFieldOfConstructedType()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 GenericTypeParameterBuilder[] typeParams = type.DefineGenericParameters(["T"]);
                 ConstructorBuilder ctor = type.DefineDefaultConstructor(MethodAttributes.PrivateScope | MethodAttributes.Public |
                     MethodAttributes.HideBySig | MethodAttributes.SpecialName | MethodAttributes.RTSpecialName);
@@ -833,7 +833,7 @@ public void EmitWriteLineMacroTest()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type1);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type1);
                 MethodBuilder method = type1.DefineMethod("meth", MethodAttributes.Public, typeof(int), Type.EmptyTypes);
                 FieldBuilder field = type1.DefineField("field", typeof(int), FieldAttributes.Public | FieldAttributes.Static);
                 ILGenerator ilGenerator = method.GetILGenerator();
@@ -881,7 +881,7 @@ public void ReferenceStaticFieldAndMethodsInIL()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder methodMain = tb.DefineMethod("Main", MethodAttributes.Public, typeof(int), [typeof(int)]);
                 TypeBuilder anotherType = ab.GetDynamicModule("MyModule").DefineType("AnotherType", TypeAttributes.Public);
                 FieldBuilder field = anotherType.DefineField("StaticField", typeof(int), FieldAttributes.Public | FieldAttributes.Static);
@@ -934,7 +934,7 @@ public void ReferenceConstructorInIL()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder methodBuilder = tb.DefineMethod("Method1", MethodAttributes.Public, typeof(Version), [typeof(int), typeof(int)]);
                 ConstructorInfo ctor = typeof(Version).GetConstructor([typeof(int), typeof(int)]);
 
@@ -964,7 +964,7 @@ public void ReferenceAType()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder method = tb.DefineMethod("meth1", MethodAttributes.Public | MethodAttributes.Static, typeof(bool), Type.EmptyTypes);
                 ILGenerator ilGenerator = method.GetILGenerator();
                 LocalBuilder lb0 = ilGenerator.DeclareLocal(typeof(ValueTuple));
@@ -993,7 +993,7 @@ public void ReferenceAType()
         [Fact]
         public void MemberReferenceExceptions()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             MethodBuilder method = type.DefineMethod("Method1", MethodAttributes.Public);
             ILGenerator il = method.GetILGenerator();
             MethodInfo nullMethod = null;
@@ -1026,7 +1026,7 @@ public void SimpleTryCatchBlock()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder method = tb.DefineMethod("Method", MethodAttributes.Public | MethodAttributes.Static, typeof(float), [typeof(int), typeof(int)]);
                 Type dBZException = typeof(DivideByZeroException);
                 ILGenerator ilGenerator = method.GetILGenerator();
@@ -1080,7 +1080,7 @@ public void TryMultipleCatchBlocks()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder method = tb.DefineMethod("Method", MethodAttributes.Public | MethodAttributes.Static, typeof(float), [typeof(int), typeof(int)]);
                 Type dBZException = typeof(DivideByZeroException);
                 Type exception = typeof(Exception);
@@ -1156,7 +1156,7 @@ public void TryFilterCatchBlock()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder method = tb.DefineMethod("Method", MethodAttributes.Public | MethodAttributes.Static, typeof(float), [typeof(int), typeof(int)]);
                 Type dBZException = typeof(DivideByZeroException);
                 Type exception = typeof(Exception);
@@ -1219,7 +1219,7 @@ public void TryCatchFilterCatchBlock()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder method = tb.DefineMethod("Method", MethodAttributes.Public | MethodAttributes.Static, typeof(float), [typeof(int), typeof(int)]);
                 Type dBZException = typeof(DivideByZeroException);
                 Type overflowException = typeof(OverflowException);
@@ -1294,7 +1294,7 @@ public void TryFinallyBlock()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder method = tb.DefineMethod("Method", MethodAttributes.Public | MethodAttributes.Static, typeof(float), [typeof(int), typeof(int)]);
                 ILGenerator ilGenerator = method.GetILGenerator();
                 LocalBuilder local = ilGenerator.DeclareLocal(typeof(float));
@@ -1341,7 +1341,7 @@ public void TryCatchFinallyBlock()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder method = tb.DefineMethod("Method", MethodAttributes.Public | MethodAttributes.Static, typeof(void), [typeof(int), typeof(int)]);
                 Type exception = typeof(Exception);
                 ILGenerator ilGenerator = method.GetILGenerator();
@@ -1379,7 +1379,7 @@ public void TryFilterCatchFinallyBlock()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder method = tb.DefineMethod("Method", MethodAttributes.Public | MethodAttributes.Static, typeof(int), [typeof(int), typeof(int)]);
                 Type overflowEType = typeof(OverflowException);
                 ConstructorInfo myConstructorInfo = overflowEType.GetConstructor([typeof(string)]);
@@ -1452,7 +1452,7 @@ public void TryFaultBlock()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder method = tb.DefineMethod("Method", MethodAttributes.Public | MethodAttributes.Static, typeof(float), [typeof(int), typeof(int)]);
                 ILGenerator ilGenerator = method.GetILGenerator();
                 Label exBlock = ilGenerator.BeginExceptionBlock();
@@ -1502,7 +1502,7 @@ public void NestedTryCatchBlocks()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder method = tb.DefineMethod("Method", MethodAttributes.Public | MethodAttributes.Static, typeof(void), [typeof(int), typeof(int)]);
                 Type exception = typeof(Exception);
                 ILGenerator ilGenerator = method.GetILGenerator();
@@ -1572,7 +1572,7 @@ public void DeeperNestedTryCatchFilterFinallyBlocks()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder method = tb.DefineMethod("Method", MethodAttributes.Public | MethodAttributes.Static, typeof(int), [typeof(int), typeof(int)]);
                 Type exception = typeof(Exception);
                 ILGenerator ilGenerator = method.GetILGenerator();
@@ -1678,7 +1678,7 @@ public void EmitCalliBlittable()
             int a = 1, b = 1, result = 2;
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("EmitCalliBlittable"));
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("EmitCalliBlittable"));
                 TypeBuilder tb = ab.DefineDynamicModule("MyModule").DefineType("MyType", TypeAttributes.Public | TypeAttributes.Class);
                 Type returnType = typeof(int);
                 MethodBuilder methodBuilder = tb.DefineMethod("F", MethodAttributes.Public | MethodAttributes.Static, returnType, [typeof(IntPtr), typeof(int), typeof(int)]);
@@ -1712,7 +1712,7 @@ public void EmitCalliManagedBlittable()
             int a = 1, b = 1, result = 2;
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("EmitCalliManagedBlittable"));
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("EmitCalliManagedBlittable"));
                 TypeBuilder tb = ab.DefineDynamicModule("MyModule").DefineType("MyType", TypeAttributes.Public | TypeAttributes.Class);
                 Type returnType = typeof(int);
                 MethodBuilder methodBuilder = tb.DefineMethod("F", MethodAttributes.Public | MethodAttributes.Static, returnType, [typeof(IntPtr), typeof(int), typeof(int)]);
@@ -1750,7 +1750,7 @@ public void EmitCalliNonBlittable()
                 string input = "Test string!", result = "!gnirts tseT";
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("EmitCalliNonBlittable"));
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("EmitCalliNonBlittable"));
                 TypeBuilder tb = ab.DefineDynamicModule("MyModule").DefineType("MyType", TypeAttributes.Public | TypeAttributes.Class);
                 Type returnType = typeof(string);
                 MethodBuilder methodBuilder = tb.DefineMethod("F", MethodAttributes.Public | MethodAttributes.Static, returnType, [typeof(IntPtr), typeof(string)]);
@@ -1783,7 +1783,7 @@ public void EmitCall_VarArgsMethodInIL()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder mb1 = tb.DefineMethod("VarArgMethod", MethodAttributes.Public | MethodAttributes.Static, CallingConventions.VarArgs, null, [typeof(string)]);
                 ILGenerator il1 = mb1.GetILGenerator();
                 LocalBuilder locAi = il1.DeclareLocal(typeof(ArgIterator));
@@ -1872,7 +1872,7 @@ public void Emit_CallBySignature()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder mb1 = tb.DefineMethod("VarArgMethod", MethodAttributes.Public | MethodAttributes.Static, CallingConventions.VarArgs, null, [typeof(string)]);
                 ILGenerator il1 = mb1.GetILGenerator();
                 FieldInfo maxStack = GetMaxStackDepthAndCurrentStackDepthField(out FieldInfo currentStack);
@@ -2191,7 +2191,7 @@ public void SimpleForLoopTest()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder mb2 = tb.DefineMethod("SumMethod", MethodAttributes.Public | MethodAttributes.Static, typeof(int), [typeof(int)]);
                 ILGenerator il = mb2.GetILGenerator();
                 LocalBuilder sum = il.DeclareLocal(typeof(int));
@@ -2238,7 +2238,7 @@ public void RecursiveSumTest()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("RecursiveSumTest"));
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("RecursiveSumTest"));
                 TypeBuilder tb = ab.DefineDynamicModule("MyModule").DefineType("MyType", TypeAttributes.Public | TypeAttributes.Class);
                 MethodBuilder mb2 = tb.DefineMethod("RecursiveMethod", MethodAttributes.Public | MethodAttributes.Static, typeof(int), [typeof(int)]);
                 ILGenerator il = mb2.GetILGenerator();
@@ -2277,7 +2277,7 @@ public void CallOpenGenericMembersFromConstructedGenericType()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 MethodBuilder method = type.DefineMethod("M1", MethodAttributes.Public, typeof(string), null);
 
                 ILGenerator ilGenerator = method.GetILGenerator();
@@ -2312,7 +2312,7 @@ public void ReferenceMethodsOfDictionaryFieldInGenericTypeWorks()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 TypeBuilder tb = ab.GetDynamicModule("MyModule").DefineType("EnumNameCache", TypeAttributes.NotPublic);
                 GenericTypeParameterBuilder[] param = tb.DefineGenericParameters(["TEnum"]);
                 Type fieldType = typeof(Dictionary<,>).MakeGenericType(param[0], typeof(string));
@@ -2368,7 +2368,7 @@ public void ANestedTypeUsedAsGenericArgumentWorks()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 TypeBuilder nested = type.DefineNestedType("Nested", TypeAttributes.NestedPrivate);
                 Type nestedFType = typeof(Dictionary<,>).MakeGenericType(typeof(Type), nested);
                 FieldBuilder nestedField = nested.DefineField("Helpers", nestedFType, FieldAttributes.Static | FieldAttributes.Private);
@@ -2425,7 +2425,7 @@ public void ReferenceNestedGenericCollectionsWithTypeBuilderParameterInIL()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 TypeBuilder nestedType = type.DefineNestedType("NestedType", TypeAttributes.NestedPublic);
 
                 Type returnType = typeof(List<>).MakeGenericType(typeof(Dictionary<,>).MakeGenericType(nestedType, typeof(bool)));
@@ -2458,7 +2458,7 @@ public void ReferenceNestedGenericTypeWithConstructedTypeBuilderParameterInIL()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 string[] genParams = new string[] { "T" };
                 GenericTypeParameterBuilder[] param = type.DefineGenericParameters(genParams);
                 TypeBuilder nestedItem = type.DefineNestedType("ItemInfo", TypeAttributes.NestedPublic);
@@ -2503,5 +2503,34 @@ public void ReferenceNestedGenericTypeWithConstructedTypeBuilderParameterInIL()
                 }
             }
         }
+
+        [Fact]
+        public void ConstructorOfGenericTypeReferencedCorrectly()
+        {
+            using (TempFile file = TempFile.Create())
+            {
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                FieldBuilder field = type.DefineField("Field", typeof(int?), FieldAttributes.Public);
+                ConstructorBuilder ctor = type.DefineConstructor(MethodAttributes.Public, CallingConventions.HasThis, Type.EmptyTypes);
+                ILGenerator ctorIL = ctor.GetILGenerator();
+                ctorIL.Emit(OpCodes.Ldarg_0);
+                ctorIL.Emit(OpCodes.Call, typeof(object).GetConstructor(Type.EmptyTypes));
+                ctorIL.Emit(OpCodes.Ldarg_0);
+                ctorIL.Emit(OpCodes.Ldc_I4_1);
+                ctorIL.Emit(OpCodes.Newobj, typeof(int?).GetConstructor([typeof(int)]));
+                ctorIL.Emit(OpCodes.Stfld, field);
+                ctorIL.Emit(OpCodes.Ret);
+                type.CreateType();
+                ab.Save(file.Path);
+
+                TestAssemblyLoadContext tlc = new TestAssemblyLoadContext();
+                Type typeFromDisk = tlc.LoadFromAssemblyPath(file.Path).GetType("MyType");
+                FieldInfo fieldFromDisk = typeFromDisk.GetField("Field");
+                object obj = Activator.CreateInstance(typeFromDisk);
+                Assert.Equal(typeof(int?), fieldFromDisk.FieldType);
+                Assert.Equal(1, fieldFromDisk.GetValue(obj));
+                tlc.Unload();
+            }
+        }
     }
 }
diff --git a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveModuleBuilderTests.cs b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveModuleBuilderTests.cs
similarity index 94%
rename from src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveModuleBuilderTests.cs
rename to src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveModuleBuilderTests.cs
index 7a009e638f58..959ea0022d5c 100644
--- a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveModuleBuilderTests.cs
+++ b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveModuleBuilderTests.cs
@@ -15,7 +15,7 @@ public void DefineGlobalMethodAndCreateGlobalFunctionsTest()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
                 ModuleBuilder module = ab.DefineDynamicModule("MyModule");
                 MethodBuilder method = module.DefineGlobalMethod("TestMethod", MethodAttributes.Static | MethodAttributes.Public, null, null);
                 ILGenerator ilGenerator = method.GetILGenerator();
@@ -63,7 +63,7 @@ public void DefineGlobalMethodAndCreateGlobalFunctionsTest()
         [Fact]
         public void DefineGlobalMethodAndCreateGlobalFunctions_Validations()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
             ModuleBuilder module = ab.DefineDynamicModule("MyModule");
             Assert.Throws<ArgumentException>(() => module.DefineGlobalMethod("TestMethod", MethodAttributes.Public, null, null)); // must be static
             MethodBuilder method = module.DefineGlobalMethod("TestMethod", MethodAttributes.Static | MethodAttributes.Public, null, null);
@@ -85,7 +85,7 @@ public static void DefinePInvokeMethodTest()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
                 DpmParams p = new DpmParams() { MethodName = "A2", LibName = "Foo2.dll", EntrypointName = "Wha2", ReturnType = typeof(int),
                     ParameterTypes = [typeof(int)], NativeCallConv = CallingConvention.Cdecl };
 
@@ -117,7 +117,7 @@ public static void DefinePInvokeMethodTest()
         [InlineData(FieldAttributes.Assembly | FieldAttributes.SpecialName)]
         public void DefineUninitializedDataTest(FieldAttributes attributes)
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
             ModuleBuilder module = ab.DefineDynamicModule("MyModule");
             foreach (int size in new int[] { 1, 2, 0x003f0000 - 1 })
             {
@@ -133,7 +133,7 @@ public void DefineUninitializedDataTest(FieldAttributes attributes)
         [Fact]
         public void DefineUninitializedData_Validations()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
             ModuleBuilder module = ab.DefineDynamicModule("MyModule");
 
             AssertExtensions.Throws<ArgumentNullException>("name", () => module.DefineUninitializedData(null, 1, FieldAttributes.Family));
@@ -154,7 +154,7 @@ public void DefineUninitializedData_Validations()
         [InlineData(FieldAttributes.Private)]
         public void DefineInitializedDataTest(FieldAttributes attributes)
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
             ModuleBuilder module = ab.DefineDynamicModule("MyModule");
             FieldBuilder field = module.DefineInitializedData("MyField", [01, 00, 01], attributes);
 
@@ -167,7 +167,7 @@ public void DefineInitializedDataTest(FieldAttributes attributes)
         [Fact]
         public void DefineInitializedData_Validations()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
             ModuleBuilder module = ab.DefineDynamicModule("MyModule");
 
             AssertExtensions.Throws<ArgumentNullException>("name", () => module.DefineInitializedData(null, [1, 0, 1], FieldAttributes.Public));
@@ -190,7 +190,7 @@ public void DefineInitializedData_EnsureAlignmentIsMinimumNeededForUseOfCreateSp
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
                 ModuleBuilder module = ab.DefineDynamicModule("MyModule");
                 TypeBuilder tb = module.DefineType("MyType", TypeAttributes.Public);
                 // Create static field data in a variety of orders that requires the runtime to actively apply alignment
@@ -265,7 +265,7 @@ void CheckMethod(string name, int minAlignmentRequired, byte[] dataToVerify)
         [ActiveIssue("https://github.com/dotnet/runtime/issues/96389", TestRuntimes.Mono)]
         public void GetABCMetadataToken_Validations()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
             ModuleBuilder module = ab.DefineDynamicModule("MyModule");
             TypeBuilder type = module.DefineType("MyType", TypeAttributes.Public);
             MethodBuilder method = type.DefineMethod("TestMethod", MethodAttributes.Static | MethodAttributes.Public);
@@ -294,7 +294,7 @@ public static void GetArrayMethodTest()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("MyAssembly"));
 
                 ModuleBuilder mb = ab.DefineDynamicModule("MyModule");
                 TypeBuilder tb = mb.DefineType("TestClass", TypeAttributes.Public);
diff --git a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySavePropertyBuilderTests.cs b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySavePropertyBuilderTests.cs
similarity index 98%
rename from src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySavePropertyBuilderTests.cs
rename to src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySavePropertyBuilderTests.cs
index c18eb017c142..06caf8dbb6a7 100644
--- a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySavePropertyBuilderTests.cs
+++ b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySavePropertyBuilderTests.cs
@@ -18,7 +18,7 @@ public void SetPropertyAccessorsAndOtherValues()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 FieldBuilder field = type.DefineField("TestField", typeof(int), FieldAttributes.Private);
                 PropertyBuilder property = type.DefineProperty("TestProperty", PropertyAttributes.SpecialName | PropertyAttributes.HasDefault, typeof(int), null);
                 MethodBuilder getMethod = type.DefineMethod("GetMethod", MethodAttributes.Public | MethodAttributes.HideBySig, typeof(int), null);
@@ -84,7 +84,7 @@ public void SetVariousCustomAttributes_ForProperty()
                 PropertyInfo prop = typeof(CustomAttributeBuilder).GetProperty("Data", BindingFlags.NonPublic | BindingFlags.Instance);
                 byte[] binaryData = (byte[])prop.GetValue(customAttrBuilder, null);
 
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 PropertyBuilder property = type.DefineProperty("TestProperty", PropertyAttributes.HasDefault, typeof(int), null);
                 property.SetCustomAttribute(con, binaryData);
                 property.SetCustomAttribute(new CustomAttributeBuilder(typeof(SpecialNameAttribute).GetConstructor(Type.EmptyTypes), []));
diff --git a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveTools.cs b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveTools.cs
similarity index 93%
rename from src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveTools.cs
rename to src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveTools.cs
index dc083e521815..6c2d65fb7ad4 100644
--- a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveTools.cs
+++ b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveTools.cs
@@ -31,7 +31,7 @@ internal static class AssemblySaveTools
 
         internal static void WriteAssemblyToDisk(AssemblyName assemblyName, Type[] types, string fileLocation)
         {
-            AssemblyBuilder assemblyBuilder = PopulateAssemblyBuilder(assemblyName);
+            PersistedAssemblyBuilder assemblyBuilder = PopulateAssemblyBuilder(assemblyName);
 
             ModuleBuilder mb = assemblyBuilder.DefineDynamicModule(assemblyName.Name);
             PopulateMembersForModule(mb, types);
@@ -67,7 +67,7 @@ private static void PopulateMembersForModule(ModuleBuilder mb, Type[] types)
 
         internal static void WriteAssemblyToStream(AssemblyName assemblyName, Type[] types, Stream stream)
         {
-            AssemblyBuilder assemblyBuilder = PopulateAssemblyBuilder(assemblyName);
+            PersistedAssemblyBuilder assemblyBuilder = PopulateAssemblyBuilder(assemblyName);
 
             ModuleBuilder mb = assemblyBuilder.DefineDynamicModule(assemblyName.Name);
             PopulateMembersForModule(mb, types);
@@ -75,15 +75,15 @@ internal static void WriteAssemblyToStream(AssemblyName assemblyName, Type[] typ
             assemblyBuilder.Save(stream);
         }
 
-        internal static AssemblyBuilder PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder typeBuilder)
+        internal static PersistedAssemblyBuilder PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder typeBuilder)
         {
-            AssemblyBuilder ab = PopulateAssemblyBuilder(s_assemblyName, null);
+            PersistedAssemblyBuilder ab = PopulateAssemblyBuilder(s_assemblyName, null);
             typeBuilder = ab.DefineDynamicModule("MyModule").DefineType("MyType", TypeAttributes.Public | TypeAttributes.Class);
             return ab;
         }
 
-        internal static AssemblyBuilder PopulateAssemblyBuilder(AssemblyName assemblyName, List<CustomAttributeBuilder>? assemblyAttributes = null) =>
-            AssemblyBuilder.DefinePersistedAssembly(assemblyName, CoreMetadataAssemblyResolver.s_coreAssembly, assemblyAttributes);
+        internal static PersistedAssemblyBuilder PopulateAssemblyBuilder(AssemblyName assemblyName, List<CustomAttributeBuilder>? assemblyAttributes = null) =>
+            new PersistedAssemblyBuilder(assemblyName, CoreMetadataAssemblyResolver.s_coreAssembly, assemblyAttributes);
 
         internal static void AssertAssemblyNameAndModule(AssemblyName sourceAName, AssemblyName aNameFromDisk, Module moduleFromDisk)
         {
diff --git a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveTypeBuilderAPIsTests.cs b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveTypeBuilderAPIsTests.cs
similarity index 93%
rename from src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveTypeBuilderAPIsTests.cs
rename to src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveTypeBuilderAPIsTests.cs
index 4b04b03acd2a..e533a2344662 100644
--- a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveTypeBuilderAPIsTests.cs
+++ b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveTypeBuilderAPIsTests.cs
@@ -17,7 +17,7 @@ public void DefineMethodOverride_InterfaceMethod()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 MethodBuilder method = type.DefineMethod("MImpl", MethodAttributes.Public | MethodAttributes.Virtual, typeof(int), null);
                 ILGenerator ilGenerator = method.GetILGenerator();
                 ilGenerator.Emit(OpCodes.Ldc_I4, 2);
@@ -49,7 +49,7 @@ public void DefineMethodOverride_BaseTypeImplementation()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 type.SetParent(typeof(DefineMethodOverrideClass));
                 MethodBuilder method = type.DefineMethod("M2", MethodAttributes.Public | MethodAttributes.Virtual, typeof(int), null);
                 ILGenerator ilGenerator = method.GetILGenerator();
@@ -73,7 +73,7 @@ public void DefineMethodOverride_GenericInterface_Succeeds()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 type.AddInterfaceImplementation(typeof(GenericInterface<string>));
                 MethodBuilder method = type.DefineMethod("Method", MethodAttributes.Public | MethodAttributes.Virtual, typeof(string), Type.EmptyTypes);
                 ILGenerator ilGenerator = method.GetILGenerator();
@@ -219,7 +219,7 @@ public interface InterfaceWithMethod
         [Fact]
         public void DefineMethodOverride_StaticVirtualInterfaceMethodWorks()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             ModuleBuilder module = ab.GetDynamicModule("MyModule");
 
             TypeBuilder interfaceType = module.DefineType("InterfaceType", TypeAttributes.Public | TypeAttributes.Interface | TypeAttributes.Abstract, parent: null);
@@ -249,7 +249,7 @@ public abstract class Impl : InterfaceWithMethod
         [Fact]
         public void DefineMethodOverride_InterfaceImplementationWithByRefArrayTypes()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             ModuleBuilder module = ab.GetDynamicModule("MyModule");
 
             TypeBuilder interfaceType = module.DefineType("InterfaceType", TypeAttributes.Public | TypeAttributes.Interface | TypeAttributes.Abstract);
@@ -279,7 +279,7 @@ public void DefineMethodOverride_InterfaceImplementationWithByRefArrayTypes()
         [Fact]
         public void TypeBuilderImplementsGenericInterfaceWithTypeBuilderGenericConstraint()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             ModuleBuilder module = ab.GetDynamicModule("MyModule");
             TypeBuilder ifaceType = module.DefineType("InterfaceType", TypeAttributes.Public | TypeAttributes.Interface | TypeAttributes.Abstract);
             TypeBuilder implType = module.DefineType("ImplType", TypeAttributes.Public);
@@ -302,7 +302,7 @@ public void TypeBuilderImplementsGenericInterfaceWithTypeBuilderGenericConstrain
         [Fact]
         public void TypeBuilderImplementsGenericInterfaceWithTypeBuilderArgument()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             ModuleBuilder module = ab.GetDynamicModule("MyModule");
             Type constructedGenericInterface = typeof(IComparable<>).MakeGenericType(type);
 
@@ -320,7 +320,7 @@ public void TypeBuilderImplementsGenericInterfaceWithTypeBuilderArgument()
         [Fact]
         public void TypeBuilderImplementsGenericInterface()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             ModuleBuilder module = ab.GetDynamicModule("MyModule");
             TypeBuilder implType = module.DefineType("ImplType", TypeAttributes.Public);
 
@@ -341,7 +341,7 @@ public void TypeBuilderImplementsGenericInterface()
         [Fact]
         public void TypeBuilderImplementsConstructedGenericInterface()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             ModuleBuilder module = ab.GetDynamicModule("MyModule");
 
             TypeBuilder implType = module.DefineType("ImplType", TypeAttributes.Public, parent: typeof(object), [typeof(IComparable<string>)]);
@@ -358,7 +358,7 @@ public void TypeBuilderImplementsConstructedGenericInterface()
         [Fact]
         public void GetInterfaceMap_WithImplicitOverride_DefineMethodOverride()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
             ModuleBuilder module = ab.GetDynamicModule("MyModule");
 
             TypeBuilder interfaceType = module.DefineType("InterfaceType", TypeAttributes.Public | TypeAttributes.Interface | TypeAttributes.Abstract, parent: null);
@@ -448,7 +448,7 @@ public interface IStaticAbstract
         [Fact]
         public void CreateType_ValidateMethods()
         {
-            AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder concreteTypeWithAbstractMethod);
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder concreteTypeWithAbstractMethod);
             concreteTypeWithAbstractMethod.DefineMethod("AbstractMethod", MethodAttributes.Public | MethodAttributes.Abstract);
             Assert.Throws<InvalidOperationException>(() => concreteTypeWithAbstractMethod.CreateType()); // Type must be declared abstract if any of its methods are abstract.
 
@@ -514,7 +514,7 @@ public void ReturnTypeAndParameterRequiredOptionalCustomModifiers()
                 Type[] cmodsReq2 = [typeof(uint)];
                 Type[] cmodsOpt1 = [typeof(int)];
                 Type[] cmodsOpt2 = [typeof(long), typeof(byte), typeof(bool)];
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 MethodBuilder methodAll = type.DefineMethod("AllModifiers", MethodAttributes.Public | MethodAttributes.Static, CallingConventions.Standard,
                     typeof(string), [typeof(int), typeof(short)], [typeof(Version)], [typeof(int), typeof(long)], [cmodsReq1, cmodsReq2], [cmodsOpt1, cmodsOpt2]);
                 ILGenerator ilGenerator = methodAll.GetILGenerator();
@@ -555,7 +555,7 @@ public static void DefinePInvokeMethodExecution_Windows()
 
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("DefinePInvokeMethodExecution_Windows"));
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilder(new AssemblyName("DefinePInvokeMethodExecution_Windows"));
                 TypeBuilder tb = ab.DefineDynamicModule("MyModule").DefineType("MyType", TypeAttributes.Public | TypeAttributes.Class);
                 MethodBuilder mb = tb.DefinePInvokeMethod(
                     "GetEnvironmentVariableW",
@@ -624,7 +624,7 @@ public static void TestDefinePInvokeMethod(DpmParams p)
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 MethodBuilder mb = tb.DefinePInvokeMethod(p.MethodName, p.LibName, p.EntrypointName, p.Attributes, p.ManagedCallConv, p.ReturnType,
                     p.ReturnTypeReqMods, p.ReturnTypeOptMods, p.ParameterTypes, p.ParameterTypeReqMods, p.ParameterTypeOptMods, p.NativeCallConv, p.Charset);
                 mb.SetImplementationFlags(mb.GetMethodImplementationFlags() | MethodImplAttributes.PreserveSig);
@@ -756,7 +756,7 @@ public void DefineTypeInitializer()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 FieldBuilder greetingField = tb.DefineField("Greeting", typeof(string), FieldAttributes.Private | FieldAttributes.Static);
                 ConstructorBuilder constructor = tb.DefineTypeInitializer();
                 ILGenerator constructorIlGenerator = constructor.GetILGenerator();
@@ -780,7 +780,7 @@ public static void DefineUninitializedDataTest()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
                 FieldBuilder myFieldBuilder = tb.DefineUninitializedData("MyGreeting", 4, FieldAttributes.Public);
                 var loadAddressMethod = tb.DefineMethod("LoadAddress", MethodAttributes.Public | MethodAttributes.Static, typeof(IntPtr), null);
                 var methodIL = loadAddressMethod.GetILGenerator();
@@ -870,7 +870,7 @@ public void AbstractBaseMethodImplementationReturnsDifferentType()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+                PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
                 TypeBuilder baseType = ab.GetDynamicModule("MyModule").DefineType("Base", TypeAttributes.Public | TypeAttributes.Abstract);
                 MethodBuilder getBase = baseType.DefineMethod("Get", MethodAttributes.Public | MethodAttributes.Abstract | MethodAttributes.Virtual, baseType, null);
                 type.SetParent(baseType);
@@ -892,5 +892,22 @@ public void AbstractBaseMethodImplementationReturnsDifferentType()
                 Assert.IsType(typeFromDisk, obj);
             }
         }
+
+        [Fact]
+        public void TestContainsGenericParametersOnMethodCtorOfConstructedGenericType()
+        {
+            AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder tb);
+            GenericTypeParameterBuilder[] typeParameters = tb.DefineGenericParameters("T");
+            ConstructorBuilder constructorBuilder = tb.DefineDefaultConstructor(MethodAttributes.Public);
+            MethodBuilder methodBuilder = tb.DefineMethod("Method", MethodAttributes.Public);
+
+            Type instantiatedTypeBuilder1 = tb.MakeGenericType(typeof(List<>).GetGenericArguments()[0]);
+            Assert.True(TypeBuilder.GetConstructor(instantiatedTypeBuilder1, constructorBuilder).ContainsGenericParameters);
+            Assert.True(TypeBuilder.GetMethod(instantiatedTypeBuilder1, methodBuilder).ContainsGenericParameters);
+
+            Type instantiatedTypeBuilder2 = tb.MakeGenericType(typeof(int));
+            Assert.False(TypeBuilder.GetConstructor(instantiatedTypeBuilder2, constructorBuilder).ContainsGenericParameters);
+            Assert.False(TypeBuilder.GetMethod(instantiatedTypeBuilder2, methodBuilder).ContainsGenericParameters);
+        }
     }
 }
diff --git a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveTypeBuilderTests.cs b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveTypeBuilderTests.cs
similarity index 89%
rename from src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveTypeBuilderTests.cs
rename to src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveTypeBuilderTests.cs
index 6489fe2715fa..31536bc266c0 100644
--- a/src/libraries/System.Reflection.Emit/tests/PersistableAssemblyBuilder/AssemblySaveTypeBuilderTests.cs
+++ b/src/libraries/System.Reflection.Emit/tests/PersistedAssemblyBuilder/AssemblySaveTypeBuilderTests.cs
@@ -100,7 +100,7 @@ public void CreateMembersThatUsesTypeLoadedFromCoreAssemblyTest()
         {
             using (TempFile file = TempFile.Create())
             {
-                TypeBuilder tb = CreateAssemblyAndDefineType(out AssemblyBuilder assemblyBuilder);
+                TypeBuilder tb = CreateAssemblyAndDefineType(out PersistedAssemblyBuilder assemblyBuilder);
                 tb.DefineMethod("TestMethod", MethodAttributes.Public).GetILGenerator().Emit(OpCodes.Ret);
                 tb.CreateType();
                 assemblyBuilder.Save(file.Path);
@@ -124,7 +124,7 @@ public void CreateMembersThatUsesTypeLoadedFromCoreAssemblyTest()
             }
         }
 
-        private static TypeBuilder CreateAssemblyAndDefineType(out AssemblyBuilder assemblyBuilder)
+        private static TypeBuilder CreateAssemblyAndDefineType(out PersistedAssemblyBuilder assemblyBuilder)
         {
             assemblyBuilder = AssemblySaveTools.PopulateAssemblyBuilder(s_assemblyName);
             return assemblyBuilder.DefineDynamicModule("MyModule")
@@ -136,7 +136,7 @@ public void AddInterfaceImplementationTest()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder assemblyBuilder = AssemblySaveTools.PopulateAssemblyBuilder(s_assemblyName);
+                PersistedAssemblyBuilder assemblyBuilder = AssemblySaveTools.PopulateAssemblyBuilder(s_assemblyName);
                 ModuleBuilder mb = assemblyBuilder.DefineDynamicModule("My Module");
                 TypeBuilder tb = mb.DefineType("TestInterface", TypeAttributes.Interface | TypeAttributes.Abstract, null, [typeof(IOneMethod)]);
                 tb.AddInterfaceImplementation(typeof(INoMethod));
@@ -176,7 +176,7 @@ public void SaveGenericTypeParametersForAType(string[] typeParamNames)
         {
             using (TempFile file = TempFile.Create())
             {
-                TypeBuilder tb = CreateAssemblyAndDefineType(out AssemblyBuilder assemblyBuilder);
+                TypeBuilder tb = CreateAssemblyAndDefineType(out PersistedAssemblyBuilder assemblyBuilder);
                 MethodBuilder method = tb.DefineMethod("TestMethod", MethodAttributes.Public);
                 method.GetILGenerator().Emit(OpCodes.Ldarg_0);
                 GenericTypeParameterBuilder[] typeParams = tb.DefineGenericParameters(typeParamNames);
@@ -245,7 +245,7 @@ public void SaveGenericTypeParametersForAMethod(string[] typeParamNames)
         {
             using (TempFile file = TempFile.Create())
             {
-                TypeBuilder tb = CreateAssemblyAndDefineType(out AssemblyBuilder assemblyBuilder);
+                TypeBuilder tb = CreateAssemblyAndDefineType(out PersistedAssemblyBuilder assemblyBuilder);
                 MethodBuilder method = tb.DefineMethod("TestMethod", MethodAttributes.Public);
                 GenericTypeParameterBuilder[] typeParams = method.DefineGenericParameters(typeParamNames);
                 method.GetILGenerator().Emit(OpCodes.Ldarg_0);
@@ -282,7 +282,7 @@ public void SaveArrayTypeSignature(int rank, string name)
         {
             using (TempFile file = TempFile.Create())
             {
-                TypeBuilder tb = CreateAssemblyAndDefineType(out AssemblyBuilder assemblyBuilder);
+                TypeBuilder tb = CreateAssemblyAndDefineType(out PersistedAssemblyBuilder assemblyBuilder);
                 Type arrayType = rank == 0 ? tb.MakeArrayType() : tb.MakeArrayType(rank);
                 MethodBuilder mb = tb.DefineMethod("TestMethod", MethodAttributes.Public);
                 mb.SetReturnType(arrayType);
@@ -320,7 +320,7 @@ public void SaveByRefTypeSignature()
         {
             using (TempFile file = TempFile.Create())
             {
-                TypeBuilder tb = CreateAssemblyAndDefineType(out AssemblyBuilder assemblyBuilder);
+                TypeBuilder tb = CreateAssemblyAndDefineType(out PersistedAssemblyBuilder assemblyBuilder);
                 Type byrefType = tb.MakeByRefType();
                 MethodBuilder mb = tb.DefineMethod("TestMethod", MethodAttributes.Public);
                 mb.SetReturnType(byrefType);
@@ -352,7 +352,7 @@ public void SavePointerTypeSignature()
         {
             using (TempFile file = TempFile.Create())
             {
-                TypeBuilder tb = CreateAssemblyAndDefineType(out AssemblyBuilder assemblyBuilder);
+                TypeBuilder tb = CreateAssemblyAndDefineType(out PersistedAssemblyBuilder assemblyBuilder);
                 Type pointerType = tb.MakePointerType();
                 MethodBuilder mb = tb.DefineMethod("TestMethod", MethodAttributes.Public);
                 mb.SetReturnType(pointerType);
@@ -423,7 +423,7 @@ public void SaveGenericTypeSignature(string[] genericParams, Type[] typeArgument
         {
             using (TempFile file = TempFile.Create())
             {
-                TypeBuilder tb = CreateAssemblyAndDefineType(out AssemblyBuilder assemblyBuilder);
+                TypeBuilder tb = CreateAssemblyAndDefineType(out PersistedAssemblyBuilder assemblyBuilder);
                 GenericTypeParameterBuilder[] typeGenParam = tb.DefineGenericParameters(genericParams);
                 Type genericType = tb.MakeGenericType(typeArguments);
                 MethodBuilder mb = tb.DefineMethod("TestMethod", MethodAttributes.Public);
@@ -504,7 +504,7 @@ public void SaveGenericTypeSignatureWithGenericParameter()
         {
             using (TempFile file = TempFile.Create())
             {
-                TypeBuilder tb = CreateAssemblyAndDefineType(out AssemblyBuilder assemblyBuilder);
+                TypeBuilder tb = CreateAssemblyAndDefineType(out PersistedAssemblyBuilder assemblyBuilder);
                 GenericTypeParameterBuilder[] typeParams = tb.DefineGenericParameters(["U", "T", "P"]);
                 MethodBuilder mb = tb.DefineMethod("TestMethod", MethodAttributes.Public);
                 GenericTypeParameterBuilder[] methodParams = mb.DefineGenericParameters(["M", "N"]);
@@ -545,7 +545,7 @@ public void SaveMultipleGenericTypeParametersToEnsureSortingWorks()
         {
             using (TempFile file = TempFile.Create())
             {
-                AssemblyBuilder assemblyBuilder = AssemblySaveTools.PopulateAssemblyBuilder(s_assemblyName);
+                PersistedAssemblyBuilder assemblyBuilder = AssemblySaveTools.PopulateAssemblyBuilder(s_assemblyName);
                 ModuleBuilder mb = assemblyBuilder.DefineDynamicModule("My Module");
                 TypeBuilder tb = mb.DefineType("TestInterface1", TypeAttributes.Interface | TypeAttributes.Abstract);
                 GenericTypeParameterBuilder[] typeParams = tb.DefineGenericParameters(["U", "T"]);
@@ -606,6 +606,66 @@ public void SaveMultipleGenericTypeParametersToEnsureSortingWorks()
                 }
             }
         }
+
+        [Fact]
+        public void MethodBuilderGetParametersReturnParameterTest()
+        {
+            PersistedAssemblyBuilder assemblyBuilder = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder type);
+            MethodBuilder method1 = type.DefineMethod("Method1", MethodAttributes.Public, typeof(long), [typeof(int), typeof(string)]);
+            MethodBuilder method2 = type.DefineMethod("Method2", MethodAttributes.Static);
+            MethodBuilder method3 = type.DefineMethod("Method1", MethodAttributes.Public, typeof(int), [typeof(string)]);
+            method1.DefineParameter(0, ParameterAttributes.Retval, null);
+            method1.DefineParameter(1, ParameterAttributes.None, "index");
+            method1.DefineParameter(2, ParameterAttributes.Out, "outParam");
+            ParameterBuilder pb = method3.DefineParameter(1, ParameterAttributes.Optional, "name");
+            pb.SetConstant("defaultName");
+            //type.CreateType();
+
+            ParameterInfo[] params1 = method1.GetParameters();
+            Assert.Equal(2, params1.Length);
+            Assert.Equal("index", params1[0].Name);
+            Assert.Equal(typeof(int), params1[0].ParameterType);
+            Assert.Equal("outParam", params1[1].Name);
+            Assert.Equal(typeof(string), params1[1].ParameterType);
+            Assert.Equal(ParameterAttributes.Out, params1[1].Attributes);
+            Assert.True(params1[1].IsOut);
+            Assert.Equal(typeof(long), method1.ReturnParameter.ParameterType);
+            Assert.Null(method1.ReturnParameter.Name);
+            Assert.True(method1.ReturnParameter.IsRetval);
+
+            Assert.Empty(method2.GetParameters());
+            Assert.Equal(typeof(void), method2.ReturnParameter.ParameterType);
+            Assert.Null(method2.ReturnParameter.Name);
+            Assert.True(method2.ReturnParameter.IsRetval);
+
+            ParameterInfo[] params3 = method3.GetParameters();
+            Assert.Equal(1, params3.Length);
+            Assert.Equal("name", params3[0].Name);
+            Assert.Equal(typeof(string), params3[0].ParameterType);
+            Assert.True(params3[0].HasDefaultValue);
+            Assert.Equal("defaultName", params3[0].DefaultValue);
+
+            Assert.Equal(typeof(int), method3.ReturnParameter.ParameterType);
+            Assert.Null(method3.ReturnParameter.Name);
+            Assert.True(method3.ReturnParameter.IsRetval);
+        }
+
+        public class BaseType<T> { }
+
+        [Fact]
+        public void GenericTypeWithTypeBuilderGenericParameter_UsedAsParent()
+        {
+            PersistedAssemblyBuilder ab = AssemblySaveTools.PopulateAssemblyBuilderAndTypeBuilder(out TypeBuilder typeBuilder);
+
+            Type type = typeBuilder.CreateType();
+            var baseType = typeof(BaseType<>).GetGenericTypeDefinition().MakeGenericType(type);
+
+            var typeBuilder2 = ab.GetDynamicModule("MyModule")
+                .DefineType("TestService", TypeAttributes.Public | TypeAttributes.Class, baseType);
+            typeBuilder2.CreateType();
+
+            Assert.NotNull(type.GetConstructor(Type.EmptyTypes)); // Default constructor created
+        }
     }
 
     // Test Types
diff --git a/src/libraries/System.Reflection.Emit/tests/System.Reflection.Emit.Tests.csproj b/src/libraries/System.Reflection.Emit/tests/System.Reflection.Emit.Tests.csproj
index f70ec0e3d751..5317b25145a5 100644
--- a/src/libraries/System.Reflection.Emit/tests/System.Reflection.Emit.Tests.csproj
+++ b/src/libraries/System.Reflection.Emit/tests/System.Reflection.Emit.Tests.csproj
@@ -62,17 +62,17 @@
     <Compile Include="ModuleBuilder\ModuleBuilderGetArrayMethod.cs" />
     <Compile Include="ModuleBuilder\ModuleBuilderSetCustomAttribute.cs" />
     <Compile Include="ParameterBuilder\ParameterBuilderSetConstant.cs" />
-    <Compile Include="PersistableAssemblyBuilder\AssemblySaveAssemblyBuilder.cs" />
-    <Compile Include="PersistableAssemblyBuilder\AssemblySaveCustomAttributeTests.cs" />
-    <Compile Include="PersistableAssemblyBuilder\AssemblySaveConstructorBuilderTests.cs" />
-    <Compile Include="PersistableAssemblyBuilder\AssemblySaveEnumBuilderTests.cs" />
-    <Compile Include="PersistableAssemblyBuilder\AssemblySaveEventBuilderTests.cs" />
-    <Compile Include="PersistableAssemblyBuilder\AssemblySaveILGeneratorTests.cs" />
-    <Compile Include="PersistableAssemblyBuilder\AssemblySaveTypeBuilderAPIsTests.cs" />
-    <Compile Include="PersistableAssemblyBuilder\AssemblySaveModuleBuilderTests.cs" />
-    <Compile Include="PersistableAssemblyBuilder\AssemblySavePropertyBuilderTests.cs" />
-    <Compile Include="PersistableAssemblyBuilder\AssemblySaveTools.cs" />
-    <Compile Include="PersistableAssemblyBuilder\AssemblySaveTypeBuilderTests.cs" />
+    <Compile Include="PersistedAssemblyBuilder\AssemblySaveAssemblyBuilderTests.cs" />
+    <Compile Include="PersistedAssemblyBuilder\AssemblySaveCustomAttributeTests.cs" />
+    <Compile Include="PersistedAssemblyBuilder\AssemblySaveConstructorBuilderTests.cs" />
+    <Compile Include="PersistedAssemblyBuilder\AssemblySaveEnumBuilderTests.cs" />
+    <Compile Include="PersistedAssemblyBuilder\AssemblySaveEventBuilderTests.cs" />
+    <Compile Include="PersistedAssemblyBuilder\AssemblySaveILGeneratorTests.cs" />
+    <Compile Include="PersistedAssemblyBuilder\AssemblySaveTypeBuilderAPIsTests.cs" />
+    <Compile Include="PersistedAssemblyBuilder\AssemblySaveModuleBuilderTests.cs" />
+    <Compile Include="PersistedAssemblyBuilder\AssemblySavePropertyBuilderTests.cs" />
+    <Compile Include="PersistedAssemblyBuilder\AssemblySaveTools.cs" />
+    <Compile Include="PersistedAssemblyBuilder\AssemblySaveTypeBuilderTests.cs" />
     <Compile Include="PropertyBuilder\PropertyBuilderAddOtherMethod.cs" />
     <Compile Include="PropertyBuilder\PropertyBuilderAttributes.cs" />
     <Compile Include="PropertyBuilder\PropertyBuilderCanRead.cs" />
diff --git a/src/libraries/System.Reflection.Metadata/src/System/Reflection/Metadata/BlobBuilder.cs b/src/libraries/System.Reflection.Metadata/src/System/Reflection/Metadata/BlobBuilder.cs
index 093f60cf7197..97967bba0800 100644
--- a/src/libraries/System.Reflection.Metadata/src/System/Reflection/Metadata/BlobBuilder.cs
+++ b/src/libraries/System.Reflection.Metadata/src/System/Reflection/Metadata/BlobBuilder.cs
@@ -47,7 +47,7 @@ public partial class BlobBuilder
         private uint _length;
 
         private const uint IsFrozenMask = 0x80000000;
-        private bool IsHead => (_length & IsFrozenMask) == 0;
+        internal bool IsHead => (_length & IsFrozenMask) == 0;
         private int Length => (int)(_length & ~IsFrozenMask);
         private uint FrozenLength => _length | IsFrozenMask;
         private Span<byte> Span => _buffer.AsSpan(0, Length);
@@ -97,8 +97,7 @@ public void Clear()
             {
                 if (chunk != this)
                 {
-                    chunk.ClearChunk();
-                    chunk.FreeChunk();
+                    chunk.ClearAndFreeChunk();
                 }
             }
 
@@ -396,6 +395,7 @@ public void LinkPrefix(BlobBuilder prefix)
             // avoid chaining empty chunks:
             if (prefix.Count == 0)
             {
+                prefix.ClearAndFreeChunk();
                 return;
             }
 
@@ -456,6 +456,7 @@ public void LinkSuffix(BlobBuilder suffix)
             // avoid chaining empty chunks:
             if (suffix.Count == 0)
             {
+                suffix.ClearAndFreeChunk();
                 return;
             }
 
@@ -1177,5 +1178,11 @@ private static string Display(byte[] bytes, int length)
                 BitConverter.ToString(bytes, 0, length) :
                 BitConverter.ToString(bytes, 0, MaxDisplaySize / 2) + "-...-" + BitConverter.ToString(bytes, length - MaxDisplaySize / 2, MaxDisplaySize / 2);
         }
+
+        private void ClearAndFreeChunk()
+        {
+            ClearChunk();
+            FreeChunk();
+        }
     }
 }
diff --git a/src/libraries/System.Reflection.Metadata/src/System/Reflection/PortableExecutable/PEReader.cs b/src/libraries/System.Reflection.Metadata/src/System/Reflection/PortableExecutable/PEReader.cs
index 6b179a54c1cc..37533d8c3c7c 100644
--- a/src/libraries/System.Reflection.Metadata/src/System/Reflection/PortableExecutable/PEReader.cs
+++ b/src/libraries/System.Reflection.Metadata/src/System/Reflection/PortableExecutable/PEReader.cs
@@ -201,7 +201,11 @@ public unsafe PEReader(Stream peStream, PEStreamOptions options, int size)
                     {
                         // The peImage is left null, but the lazyMetadataBlock is initialized up front.
                         _lazyPEHeaders = new PEHeaders(peStream, actualSize, IsLoadedImage);
-                        _lazyMetadataBlock = StreamMemoryBlockProvider.ReadMemoryBlockNoLock(peStream, _lazyPEHeaders.MetadataStartOffset, _lazyPEHeaders.MetadataSize);
+
+                        if (_lazyPEHeaders.MetadataStartOffset != -1)
+                        {
+                            _lazyMetadataBlock = StreamMemoryBlockProvider.ReadMemoryBlockNoLock(peStream, _lazyPEHeaders.MetadataStartOffset, _lazyPEHeaders.MetadataSize);
+                        }
                     }
                     // We read all we need, the stream is going to be closed.
                 }
diff --git a/src/libraries/System.Reflection.Metadata/tests/Metadata/BlobTests.cs b/src/libraries/System.Reflection.Metadata/tests/Metadata/BlobTests.cs
index 6c05f36d046c..a04d2b6cf19e 100644
--- a/src/libraries/System.Reflection.Metadata/tests/Metadata/BlobTests.cs
+++ b/src/libraries/System.Reflection.Metadata/tests/Metadata/BlobTests.cs
@@ -1090,5 +1090,31 @@ public void PrematureEndOfStream()
 
             AssertEx.Equal(sourceArray, builder.ToArray());
         }
+
+        [Fact]
+        public void LinkEmptySuffixAndPrefixShouldFreeThem()
+        {
+            var b1 = PooledBlobBuilder.GetInstance();
+            var b2 = PooledBlobBuilder.GetInstance();
+            var b3 = PooledBlobBuilder.GetInstance();
+            var b4 = PooledBlobBuilder.GetInstance();
+            var b5 = PooledBlobBuilder.GetInstance();
+
+            b1.WriteBytes(1, 1);
+            b2.WriteBytes(1, 1);
+            b3.WriteBytes(1, 1);
+
+            b1.LinkSuffix(b2);
+            Assert.False(b2.IsHead);
+
+            b1.LinkPrefix(b3);
+            Assert.False(b3.IsHead);
+
+            b1.LinkSuffix(b4);
+            Assert.True(b4.IsHead);
+
+            b1.LinkPrefix(b5);
+            Assert.True(b4.IsHead);
+        }
     }
 }
diff --git a/src/libraries/System.Reflection.Metadata/tests/PortableExecutable/PEReaderTests.cs b/src/libraries/System.Reflection.Metadata/tests/PortableExecutable/PEReaderTests.cs
index 20477b5a44b9..80312c66a442 100644
--- a/src/libraries/System.Reflection.Metadata/tests/PortableExecutable/PEReaderTests.cs
+++ b/src/libraries/System.Reflection.Metadata/tests/PortableExecutable/PEReaderTests.cs
@@ -870,5 +870,15 @@ public unsafe void InvokeCtorWithIsLoadedImageAndPrefetchMetadataOptions2()
                 }
             }
         }
+
+        [Fact]
+        public void HasMetadataShouldReturnFalseWhenPrefetchingMetadataOfImageWithoutMetadata()
+        {
+            using (var fileStream = new MemoryStream(Misc.KeyPair))
+            using (var peReader = new PEReader(fileStream, PEStreamOptions.PrefetchMetadata | PEStreamOptions.LeaveOpen))
+            {
+                Assert.False(peReader.HasMetadata);
+            }
+        }
     }
 }
diff --git a/src/libraries/System.Reflection.MetadataLoadContext/src/System.Reflection.MetadataLoadContext.csproj b/src/libraries/System.Reflection.MetadataLoadContext/src/System.Reflection.MetadataLoadContext.csproj
index ca859bf2f459..26bfe897c08d 100644
--- a/src/libraries/System.Reflection.MetadataLoadContext/src/System.Reflection.MetadataLoadContext.csproj
+++ b/src/libraries/System.Reflection.MetadataLoadContext/src/System.Reflection.MetadataLoadContext.csproj
@@ -7,6 +7,7 @@
     <EnableAOTAnalyzer>false</EnableAOTAnalyzer>
     <UseCompilerGeneratedDocXmlFile>false</UseCompilerGeneratedDocXmlFile>
     <IsPackable>true</IsPackable>
+    <NoWarn>$(NoWarn);CA1865</NoWarn>
     <PackageDescription>Provides read-only reflection on assemblies in an isolated context with support for assemblies that target different processor architectures and runtimes. Using MetadataLoadContext enables you to inspect assemblies without loading them into the main execution context. Assemblies in MetadataLoadContext are treated only as metadata, that is, you can read information about their members, but cannot execute any code contained in them.</PackageDescription>
   </PropertyGroup>
 
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/Constants.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/Constants.cs
index 45f0b1be382a..9421db62ded6 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/Constants.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/Constants.cs
@@ -18,6 +18,7 @@ internal static class Constants
         public const string CompilerGeneratedAttributeGlobal = "global::System.Runtime.CompilerServices.CompilerGeneratedAttribute";
         public const string DynamicDependencyAttributeGlobal = "global::System.Diagnostics.CodeAnalysis.DynamicDependencyAttribute";
         public const string UnmanagedCallersOnlyAttributeGlobal = "global::System.Runtime.InteropServices.UnmanagedCallersOnlyAttribute";
+        public const string DynamicallyAccessedMemberTypesGlobal = "global::System.Diagnostics.CodeAnalysis.DynamicallyAccessedMemberTypes";
         public const string ThreadStaticGlobal = "global::System.ThreadStaticAttribute";
         public const string TaskGlobal = "global::System.Threading.Tasks.Task";
         public const string SpanGlobal = "global::System.Span";
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/JSExportGenerator.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/JSExportGenerator.cs
index 8c31172d32af..b950f7bf172d 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/JSExportGenerator.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/JSExportGenerator.cs
@@ -282,7 +282,7 @@ private static NamespaceDeclarationSyntax GenerateRegSource(
             const string generatedNamespace = "System.Runtime.InteropServices.JavaScript";
             const string initializerClass = "__GeneratedInitializer";
             const string initializerName = "__Register_";
-            const string selfInitName = "__Net7SelfInit_";
+            const string trimmingPreserveName = "__TrimmingPreserve_";
 
             if (methods.IsEmpty) return NamespaceDeclaration(IdentifierName(generatedNamespace));
 
@@ -343,22 +343,42 @@ private static NamespaceDeclarationSyntax GenerateRegSource(
                 )
             );
 
-            // when we are running code generated by .NET8 on .NET7 runtime we need to auto initialize the assembly, because .NET7 doesn't call the registration from JS
-            // this also keeps the code protected from trimming
-            MemberDeclarationSyntax initializerMethod = MethodDeclaration(PredefinedType(Token(SyntaxKind.VoidKeyword)), Identifier(selfInitName))
-                            .WithAttributeLists(List(new[]{
-                                    AttributeList(SingletonSeparatedList(Attribute(IdentifierName(Constants.ModuleInitializerAttributeGlobal)))),
-                                }))
+            // HACK: protect the code from trimming with DynamicDependency attached to a ModuleInitializer
+            MemberDeclarationSyntax initializerMethod = MethodDeclaration(PredefinedType(Token(SyntaxKind.VoidKeyword)), Identifier(trimmingPreserveName))
+                            .WithAttributeLists(
+                                SingletonList<AttributeListSyntax>(
+                                    AttributeList(
+                                        SeparatedList<AttributeSyntax>(
+                                            new SyntaxNodeOrToken[]{
+                                                Attribute(
+                                                    IdentifierName(Constants.ModuleInitializerAttributeGlobal)),
+                                                Token(SyntaxKind.CommaToken),
+                                                Attribute(
+                                                    IdentifierName(Constants.DynamicDependencyAttributeGlobal))
+                                                .WithArgumentList(
+                                                    AttributeArgumentList(
+                                                        SeparatedList<AttributeArgumentSyntax>(
+                                                            new SyntaxNodeOrToken[]{
+                                                                AttributeArgument(
+                                                                    BinaryExpression(
+                                                                        SyntaxKind.BitwiseOrExpression,
+                                                                        MemberAccessExpression(
+                                                                            SyntaxKind.SimpleMemberAccessExpression,
+                                                                            IdentifierName(Constants.DynamicallyAccessedMemberTypesGlobal),
+                                                                            IdentifierName("PublicMethods")),
+                                                                        MemberAccessExpression(
+                                                                            SyntaxKind.SimpleMemberAccessExpression,
+                                                                            IdentifierName(Constants.DynamicallyAccessedMemberTypesGlobal),
+                                                                            IdentifierName("NonPublicMethods")))),
+                                                                Token(SyntaxKind.CommaToken),
+                                                                AttributeArgument(
+                                                                    TypeOfExpression(
+                                                                        IdentifierName(initializerClass)))})))}))))
                             .WithModifiers(TokenList(new[] {
                                 Token(SyntaxKind.StaticKeyword),
                                 Token(SyntaxKind.InternalKeyword)
                             }))
-                            .WithBody(Block(
-                                IfStatement(BinaryExpression(SyntaxKind.EqualsExpression,
-                                    IdentifierName("Environment.Version.Major"),
-                                    LiteralExpression(SyntaxKind.NumericLiteralExpression, Literal(7))),
-                                    Block(SingletonList<StatementSyntax>(
-                                        ExpressionStatement(InvocationExpression(IdentifierName(initializerName))))))));
+                            .WithBody(Block());
 
             var ns = NamespaceDeclaration(IdentifierName(generatedNamespace))
                         .WithMembers(
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/JSGeneratorFactory.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/JSGeneratorFactory.cs
index a1cbebb3fda5..16f2dda426a1 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/JSGeneratorFactory.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/JSGeneratorFactory.cs
@@ -42,6 +42,8 @@ ResolvedGenerator fail(string failReason)
                     return ResolvedGenerator.NotSupported(new(info, context));
 
                 // void
+                case { TypeInfo: JSSimpleTypeInfo(KnownManagedType.Void), JSType: JSTypeFlags.DiscardNoWait }:
+                    return ResolvedGenerator.Resolved(new VoidGenerator(MarshalerType.DiscardNoWait));
                 case { TypeInfo: JSSimpleTypeInfo(KnownManagedType.Void), JSType: JSTypeFlags.Discard }:
                 case { TypeInfo: JSSimpleTypeInfo(KnownManagedType.Void), JSType: JSTypeFlags.Void }:
                 case { TypeInfo: JSSimpleTypeInfo(KnownManagedType.Void), JSType: JSTypeFlags.None }:
@@ -52,6 +54,10 @@ ResolvedGenerator fail(string failReason)
                 case { JSType: JSTypeFlags.Discard }:
                     return fail(SR.DiscardOnlyVoid);
 
+                // oneway no void
+                case { JSType: JSTypeFlags.DiscardNoWait }:
+                    return fail(SR.DiscardNoWaitOnlyVoid);
+
                 // primitive
                 case { TypeInfo: JSSimpleTypeInfo simple }:
                     return Create(info, isToJs, simple.KnownType, Array.Empty<KnownManagedType>(), jsMarshalingInfo.JSType, Array.Empty<JSTypeFlags>(), fail);
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/JSTypeFlags.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/JSTypeFlags.cs
index ab498c62d55f..5b34284f2225 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/JSTypeFlags.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/JSTypeFlags.cs
@@ -22,6 +22,7 @@ internal enum JSTypeFlags : int
         MemoryView = 0x800,
         Any = 0x1000,
         Discard = 0x2000,
+        DiscardNoWait = 0x4000,
         Missing = 0x4000_0000,
     }
 }
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/Resources/Strings.resx b/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/Resources/Strings.resx
index 1c6e47ef214f..6aa6b8b9bb33 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/Resources/Strings.resx
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/gen/JSImportGenerator/Resources/Strings.resx
@@ -184,6 +184,9 @@
   <data name="DiscardOnlyVoid" xml:space="preserve">
     <value>'JSType.Discard' could be only used with void return argument.</value>
   </data>
+  <data name="DiscardNoWaitOnlyVoid" xml:space="preserve">
+    <value>'JSType.DiscardNoWait' could be only used with void returning method.</value>
+  </data>
   <data name="FuncArgumentNotSupported" xml:space="preserve">
     <value>Type {0} is not supported as argument of marshaled function.</value>
     <comment>{0} is a type of the argument</comment>
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/CompatibilitySuppressions.xml b/src/libraries/System.Runtime.InteropServices.JavaScript/src/CompatibilitySuppressions.xml
index 93694b543159..07a5ec1d2531 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/CompatibilitySuppressions.xml
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/CompatibilitySuppressions.xml
@@ -12,4 +12,16 @@
     <Left>ref/net9.0/System.Runtime.InteropServices.JavaScript.dll</Left>
     <Right>runtimes/browser/lib/net9.0/System.Runtime.InteropServices.JavaScript.dll</Right>
   </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0001</DiagnosticId>
+    <Target>T:System.Runtime.InteropServices.JavaScript.JSType.DiscardNoWait</Target>
+    <Left>ref/net9.0/System.Runtime.InteropServices.JavaScript.dll</Left>
+    <Right>runtimes/browser/lib/net9.0/System.Runtime.InteropServices.JavaScript.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0002</DiagnosticId>
+    <Target>M:System.Runtime.InteropServices.JavaScript.JSMarshalerType.get_DiscardNoWait</Target>
+    <Left>ref/net9.0/System.Runtime.InteropServices.JavaScript.dll</Left>
+    <Right>runtimes/browser/lib/net9.0/System.Runtime.InteropServices.JavaScript.dll</Right>
+  </Suppression>
 </Suppressions>
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System.Runtime.InteropServices.JavaScript.csproj b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System.Runtime.InteropServices.JavaScript.csproj
index d600543391c0..053ba1721909 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System.Runtime.InteropServices.JavaScript.csproj
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System.Runtime.InteropServices.JavaScript.csproj
@@ -20,6 +20,10 @@
     <EmitCompilerGeneratedFiles Condition="'$(Configuration)' == 'Debug' and '$(TargetPlatformIdentifier)' == 'browser'">true</EmitCompilerGeneratedFiles>
   </PropertyGroup>
 
+  <!-- TODO-LLVM: This is not upstreamable because it makes the build runtime-specific. -->
+  <PropertyGroup>
+    <DefineConstants Condition="'$(RuntimeFlavor)' == 'CoreCLR'">$(DefineConstants);NATIVE_AOT</DefineConstants>
+  </PropertyGroup>
   <ItemGroup>
     <Compile Include="System.Runtime.InteropServices.JavaScript.SupportedOSPlatform.cs" />
   </ItemGroup>
@@ -71,6 +75,7 @@
   <ItemGroup Condition="'$(TargetPlatformIdentifier)' == 'browser' and '$(FeatureWasmManagedThreads)' == 'true'">
     <Compile Include="System\Runtime\InteropServices\JavaScript\JSWebWorker.cs" />
     <Compile Include="System\Runtime\InteropServices\JavaScript\JSSynchronizationContext.cs" />
+    <Compile Include="System\Runtime\InteropServices\JavaScript\JSAsyncTaskScheduler.cs" />
   </ItemGroup>
   <ItemGroup Condition="'$(WasmEnableThreads)' == 'true'">
     <ApiCompatSuppressionFile Include="CompatibilitySuppressions.xml;CompatibilitySuppressions.WasmThreads.xml" />
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/CancelablePromise.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/CancelablePromise.cs
index f96e0f4f0cf0..361e5febe4ff 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/CancelablePromise.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/CancelablePromise.cs
@@ -9,7 +9,7 @@ namespace System.Runtime.InteropServices.JavaScript
 {
     public static partial class CancelablePromise
     {
-        public static void CancelPromise(Task promise)
+        public static unsafe void CancelPromise(Task promise)
         {
             // this check makes sure that promiseGCHandle is still valid handle
             if (promise.IsCompleted)
@@ -24,7 +24,6 @@ public static void CancelPromise(Task promise)
             {
                 return;
             }
-            holder.IsCanceling = true;
             Interop.Runtime.CancelPromise(holder.GCHandle);
 #else
 
@@ -34,7 +33,11 @@ public static void CancelPromise(Task promise)
                 {
                     return;
                 }
-                holder.IsCanceling = true;
+
+                if (Interlocked.CompareExchange(ref (*holder.State).IsResolving, 1, 0) != 0)
+                {
+                    return;
+                }
 
                 if (holder.ProxyContext.IsCurrentThread())
                 {
@@ -42,10 +45,7 @@ public static void CancelPromise(Task promise)
                 }
                 else
                 {
-                    // FIXME: race condition
-                    // we know that holder.GCHandle is still valid because we hold the ProxyContext lock
-                    // but the message may arrive to the target thread after it was resolved, making GCHandle invalid
-                    Interop.Runtime.CancelPromisePost(holder.ProxyContext.NativeTID, holder.GCHandle);
+                    Interop.Runtime.CancelPromisePost(holder.ProxyContext.JSNativeTID, holder.GCHandle);
                 }
             }
 #endif
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Interop/JavaScriptExports.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Interop/JavaScriptExports.cs
index 74a67b477648..c7bb4a81d3bd 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Interop/JavaScriptExports.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Interop/JavaScriptExports.cs
@@ -2,106 +2,54 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
-using System.IO;
 using System.Reflection;
 using System.Runtime.CompilerServices;
 using System.Threading;
 using System.Threading.Tasks;
-using static System.Runtime.InteropServices.JavaScript.JSHostImplementation;
 
 namespace System.Runtime.InteropServices.JavaScript
 {
     // this maps to src\mono\browser\runtime\managed-exports.ts
     // the public methods are protected from trimming by DynamicDependency on JSFunctionBinding.BindJSFunction
-    // TODO: all the calls here should be running on deputy or TP in MT, not in UI thread
+    // TODO: change all of these to [UnmanagedCallersOnly] and drop the reflection in mono_wasm_invoke_jsexport
     internal static unsafe partial class JavaScriptExports
     {
-        // the marshaled signature is:
-        // Task<int>? CallEntrypoint(MonoMethod* entrypointPtr, string[] args)
+        // the marshaled signature is: Task<int>? CallEntrypoint(char* assemblyNamePtr, string[] args)
         public static void CallEntrypoint(JSMarshalerArgument* arguments_buffer)
         {
             ref JSMarshalerArgument arg_exc = ref arguments_buffer[0]; // initialized by caller in alloc_stack_frame()
-            ref JSMarshalerArgument arg_result = ref arguments_buffer[1]; // initialized by caller in alloc_stack_frame()
+            ref JSMarshalerArgument arg_res = ref arguments_buffer[1]; // initialized by caller in alloc_stack_frame()
             ref JSMarshalerArgument arg_1 = ref arguments_buffer[2]; // initialized and set by caller
             ref JSMarshalerArgument arg_2 = ref arguments_buffer[3]; // initialized and set by caller
+            ref JSMarshalerArgument arg_3 = ref arguments_buffer[4]; // initialized and set by caller
             try
             {
 #if FEATURE_WASM_MANAGED_THREADS
                 // when we arrive here, we are on the thread which owns the proxies
                 arg_exc.AssertCurrentThreadContext();
+                Debug.Assert(arg_res.slot.Type == MarshalerType.TaskPreCreated);
 #endif
 
-                arg_1.ToManaged(out IntPtr entrypointPtr);
-                if (entrypointPtr == IntPtr.Zero)
-                {
-                    throw new MissingMethodException(SR.MissingManagedEntrypointHandle);
-                }
+                arg_1.ToManaged(out IntPtr assemblyNamePtr);
+                arg_2.ToManaged(out string?[]? args);
+                arg_3.ToManaged(out bool waitForDebugger);
 
-                RuntimeMethodHandle methodHandle = GetMethodHandleFromIntPtr(entrypointPtr);
-                // this would not work for generic types. But Main() could not be generic, so we are fine.
-                MethodInfo? method = MethodBase.GetMethodFromHandle(methodHandle) as MethodInfo;
-                if (method == null)
-                {
-                    throw new InvalidOperationException(SR.CannotResolveManagedEntrypointHandle);
-                }
+                Task<int>? result = JSHostImplementation.CallEntrypoint(assemblyNamePtr, args, waitForDebugger);
 
-                arg_2.ToManaged(out string?[]? args);
-                object[] argsToPass = System.Array.Empty<object>();
-                Task<int>? result = null;
-                var parameterInfos = method.GetParameters();
-                if (parameterInfos.Length > 0 && parameterInfos[0].ParameterType == typeof(string[]))
-                {
-                    argsToPass = new object[] { args ?? System.Array.Empty<string>() };
-                }
-                if (method.ReturnType == typeof(void))
-                {
-                    method.Invoke(null, argsToPass);
-                }
-                else if (method.ReturnType == typeof(int))
-                {
-                    int intResult = (int)method.Invoke(null, argsToPass)!;
-                    result = Task.FromResult(intResult);
-                }
-                else if (method.ReturnType == typeof(Task))
-                {
-                    Task methodResult = (Task)method.Invoke(null, argsToPass)!;
-                    TaskCompletionSource<int> tcs = new TaskCompletionSource<int>();
-                    result = tcs.Task;
-                    methodResult.ContinueWith((t) =>
-                    {
-                        if (t.IsFaulted)
-                        {
-                            tcs.SetException(t.Exception!);
-                        }
-                        else
-                        {
-                            tcs.SetResult(0);
-                        }
-                    }, TaskScheduler.Default);
-                }
-                else if (method.ReturnType == typeof(Task<int>))
-                {
-                    result = (Task<int>)method.Invoke(null, argsToPass)!;
-                }
-                else
-                {
-                    throw new InvalidOperationException(SR.Format(SR.ReturnTypeNotSupportedForMain, method.ReturnType.FullName));
-                }
-                arg_result.ToJS(result, (ref JSMarshalerArgument arg, int value) =>
+                arg_res.ToJS(result, (ref JSMarshalerArgument arg, int value) =>
                 {
                     arg.ToJS(value);
                 });
             }
             catch (Exception ex)
             {
-                if (ex is TargetInvocationException refEx && refEx.InnerException != null)
-                    ex = refEx.InnerException;
-
-                arg_exc.ToJS(ex);
+                Environment.FailFast($"CallEntrypoint: Unexpected synchronous failure (ManagedThreadId {Environment.CurrentManagedThreadId}): " + ex);
             }
         }
 
+        // the marshaled signature is: void LoadLazyAssembly(byte[] dll, byte[] pdb)
         public static void LoadLazyAssembly(JSMarshalerArgument* arguments_buffer)
         {
             ref JSMarshalerArgument arg_exc = ref arguments_buffer[0];
@@ -125,6 +73,7 @@ public static void LoadLazyAssembly(JSMarshalerArgument* arguments_buffer)
             }
         }
 
+        // the marshaled signature is: void LoadSatelliteAssembly(byte[] dll)
         public static void LoadSatelliteAssembly(JSMarshalerArgument* arguments_buffer)
         {
             ref JSMarshalerArgument arg_exc = ref arguments_buffer[0];
@@ -146,10 +95,8 @@ public static void LoadSatelliteAssembly(JSMarshalerArgument* arguments_buffer)
             }
         }
 
-        // The JS layer invokes this method when the JS wrapper for a JS owned object
-        //  has been collected by the JS garbage collector
-        // the marshaled signature is:
-        // void ReleaseJSOwnedObjectByGCHandle(GCHandle gcHandle)
+        // The JS layer invokes this method when the JS wrapper for a JS owned object has been collected by the JS garbage collector
+        // the marshaled signature is: void ReleaseJSOwnedObjectByGCHandle(GCHandle gcHandle)
         public static void ReleaseJSOwnedObjectByGCHandle(JSMarshalerArgument* arguments_buffer)
         {
             ref JSMarshalerArgument arg_exc = ref arguments_buffer[0]; // initialized by caller in alloc_stack_frame()
@@ -157,18 +104,17 @@ public static void ReleaseJSOwnedObjectByGCHandle(JSMarshalerArgument* arguments
 
             try
             {
-                // when we arrive here, we are on the thread which owns the proxies
-                var ctx = arg_exc.AssertCurrentThreadContext();
+                // when we arrive here, we are on the thread which owns the proxies or on IO thread
+                var ctx = arg_exc.ToManagedContext;
                 ctx.ReleaseJSOwnedObjectByGCHandle(arg_1.slot.GCHandle);
             }
             catch (Exception ex)
             {
-                arg_exc.ToJS(ex);
+                Environment.FailFast($"ReleaseJSOwnedObjectByGCHandle: Unexpected synchronous failure (ManagedThreadId {Environment.CurrentManagedThreadId}): " + ex);
             }
         }
 
-        // the marshaled signature is:
-        // TRes? CallDelegate<T1,T2,T3TRes>(GCHandle callback, T1? arg1, T2? arg2, T3? arg3)
+        // the marshaled signature is: TRes? CallDelegate<T1,T2,T3TRes>(GCHandle callback, T1? arg1, T2? arg2, T3? arg3)
         public static void CallDelegate(JSMarshalerArgument* arguments_buffer)
         {
             ref JSMarshalerArgument arg_exc = ref arguments_buffer[0]; // initialized by JS caller in alloc_stack_frame()
@@ -177,18 +123,30 @@ public static void CallDelegate(JSMarshalerArgument* arguments_buffer)
             // arg_2 set by JS caller when there are arguments
             // arg_3 set by JS caller when there are arguments
             // arg_4 set by JS caller when there are arguments
+#if !FEATURE_WASM_MANAGED_THREADS
             try
             {
-#if FEATURE_WASM_MANAGED_THREADS
-                // when we arrive here, we are on the thread which owns the proxies
-                // if we need to dispatch the call to another thread in the future
-                // we may need to consider how to solve blocking of the synchronous call
-                // see also https://github.com/dotnet/runtime/issues/76958#issuecomment-1921418290
-                arg_exc.AssertCurrentThreadContext();
+#else
+            // when we arrive here, we are on the thread which owns the proxies
+            var ctx = arg_exc.AssertCurrentThreadContext();
+
+            try
+            {
+                if (ctx.IsMainThread)
+                {
+                    if (JSProxyContext.ThreadBlockingMode == JSHostImplementation.JSThreadBlockingMode.ThrowWhenBlockingWait)
+                    {
+                        Thread.ThrowOnBlockingWaitOnJSInteropThread = true;
+                    }
+                    else if (JSProxyContext.ThreadBlockingMode == JSHostImplementation.JSThreadBlockingMode.WarnWhenBlockingWait)
+                    {
+                        Thread.WarnOnBlockingWaitOnJSInteropThread = true;
+                    }
+                }
 #endif
 
                 GCHandle callback_gc_handle = (GCHandle)arg_1.slot.GCHandle;
-                if (callback_gc_handle.Target is ToManagedCallback callback)
+                if (callback_gc_handle.Target is JSHostImplementation.ToManagedCallback callback)
                 {
                     // arg_2, arg_3, arg_4, arg_res are processed by the callback
                     callback(arguments_buffer);
@@ -202,10 +160,24 @@ public static void CallDelegate(JSMarshalerArgument* arguments_buffer)
             {
                 arg_exc.ToJS(ex);
             }
+#if FEATURE_WASM_MANAGED_THREADS
+            finally
+            {
+                if (ctx.IsMainThread)
+                {
+                    if (JSProxyContext.ThreadBlockingMode == JSHostImplementation.JSThreadBlockingMode.ThrowWhenBlockingWait)
+                    {
+                        Thread.ThrowOnBlockingWaitOnJSInteropThread = false;
+                    }
+                    else if (JSProxyContext.ThreadBlockingMode == JSHostImplementation.JSThreadBlockingMode.WarnWhenBlockingWait)
+                    {
+                        Thread.WarnOnBlockingWaitOnJSInteropThread = false;
+                    }
+                }
+            }
+#endif
         }
-
-        // the marshaled signature is:
-        // void CompleteTask<T>(GCHandle holder, Exception? exceptionResult, T? result)
+        // the marshaled signature is: void CompleteTask<T>(GCHandle holder, Exception? exceptionResult, T? result)
         public static void CompleteTask(JSMarshalerArgument* arguments_buffer)
         {
             ref JSMarshalerArgument arg_exc = ref arguments_buffer[0]; // initialized by caller in alloc_stack_frame()
@@ -216,10 +188,10 @@ public static void CompleteTask(JSMarshalerArgument* arguments_buffer)
 
             try
             {
-                // when we arrive here, we are on the thread which owns the proxies
-                var ctx = arg_exc.AssertCurrentThreadContext();
+                // when we arrive here, we are on the thread which owns the proxies or on IO thread
+                var ctx = arg_exc.ToManagedContext;
                 var holder = ctx.GetPromiseHolder(arg_1.slot.GCHandle);
-                ToManagedCallback callback;
+                JSHostImplementation.ToManagedCallback callback;
 
 #if FEATURE_WASM_MANAGED_THREADS
                 lock (ctx)
@@ -231,23 +203,14 @@ public static void CompleteTask(JSMarshalerArgument* arguments_buffer)
                     }
                 }
 
-                if (holder.CallbackReady != null)
-                {
-#pragma warning disable CA1416 // Validate platform compatibility
-                    Thread.ForceBlockingWait(static (callbackReady) => ((ManualResetEventSlim)callbackReady!).Wait(), holder.CallbackReady);
-#pragma warning restore CA1416 // Validate platform compatibility
-                }
+                // this is always running on I/O thread, so it will not throw PNSE
+                // it's also OK to block here, because we know we will only block shortly, as this is just race with the other thread.
+                holder.CallbackReady?.Wait();
 
                 lock (ctx)
                 {
                     callback = holder.Callback!;
-                    // if Interop.Runtime.CancelPromisePost is in flight, we can't free the GCHandle, because it's needed in JS
-                    var isOutOfOrderCancellation = holder.IsCanceling && arg_res.slot.Type != MarshalerType.Discard;
-                    // FIXME: when it happens we are leaking GCHandle + holder
-                    if (!isOutOfOrderCancellation)
-                    {
-                        ctx.ReleasePromiseHolder(arg_1.slot.GCHandle);
-                    }
+                    ctx.ReleasePromiseHolder(arg_1.slot.GCHandle);
                 }
 #else
                 callback = holder.Callback!;
@@ -260,16 +223,15 @@ public static void CompleteTask(JSMarshalerArgument* arguments_buffer)
             }
             catch (Exception ex)
             {
-                arg_exc.ToJS(ex);
+                Environment.FailFast($"CompleteTask: Unexpected synchronous failure (ManagedThreadId {Environment.CurrentManagedThreadId}): " + ex);
             }
         }
 
-        // the marshaled signature is:
-        // string GetManagedStackTrace(GCHandle exception)
+        // the marshaled signature is: string GetManagedStackTrace(GCHandle exception)
         public static void GetManagedStackTrace(JSMarshalerArgument* arguments_buffer)
         {
             ref JSMarshalerArgument arg_exc = ref arguments_buffer[0]; // initialized by caller in alloc_stack_frame()
-            ref JSMarshalerArgument arg_return = ref arguments_buffer[1]; // used as return value
+            ref JSMarshalerArgument arg_res = ref arguments_buffer[1]; // used as return value
             ref JSMarshalerArgument arg_1 = ref arguments_buffer[2];// initialized and set by caller
             try
             {
@@ -279,7 +241,7 @@ public static void GetManagedStackTrace(JSMarshalerArgument* arguments_buffer)
                 GCHandle exception_gc_handle = (GCHandle)arg_1.slot.GCHandle;
                 if (exception_gc_handle.Target is Exception exception)
                 {
-                    arg_return.ToJS(exception.StackTrace);
+                    arg_res.ToJS(exception.StackTrace);
                 }
                 else
                 {
@@ -296,15 +258,110 @@ public static void GetManagedStackTrace(JSMarshalerArgument* arguments_buffer)
 
         // this is here temporarily, until JSWebWorker becomes public API
         [DynamicDependency(DynamicallyAccessedMemberTypes.NonPublicMethods, "System.Runtime.InteropServices.JavaScript.JSWebWorker", "System.Runtime.InteropServices.JavaScript")]
-        // the marshaled signature is:
-        // void InstallMainSynchronizationContext()
-        public static void InstallMainSynchronizationContext()
+        // the marshaled signature is: GCHandle InstallMainSynchronizationContext(nint jsNativeTID, JSThreadBlockingMode jsThreadBlockingMode)
+        public static void InstallMainSynchronizationContext(JSMarshalerArgument* arguments_buffer)
+        {
+            ref JSMarshalerArgument arg_exc = ref arguments_buffer[0]; // initialized by caller in alloc_stack_frame()
+            ref JSMarshalerArgument arg_res = ref arguments_buffer[1];// initialized and set by caller
+            ref JSMarshalerArgument arg_1 = ref arguments_buffer[2];// initialized and set by caller
+            ref JSMarshalerArgument arg_2 = ref arguments_buffer[3];// initialized and set by caller
+
+            try
+            {
+                JSProxyContext.ThreadBlockingMode = (JSHostImplementation.JSThreadBlockingMode)arg_2.slot.Int32Value;
+                var jsSynchronizationContext = JSSynchronizationContext.InstallWebWorkerInterop(true, CancellationToken.None);
+                jsSynchronizationContext.ProxyContext.JSNativeTID = arg_1.slot.IntPtrValue;
+                arg_res.slot.GCHandle = jsSynchronizationContext.ProxyContext.ContextHandle;
+            }
+            catch (Exception ex)
+            {
+                arg_exc.ToJS(ex);
+            }
+        }
+
+#pragma warning disable CS3016 // Arrays as attribute arguments is not CLS-compliant
+        [UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvCdecl) })]
+#pragma warning restore CS3016
+        // TODO ideally this would be public API callable from generated C# code for JSExport
+        public static void BeforeSyncJSExport(JSMarshalerArgument* arguments_buffer)
+        {
+            ref JSMarshalerArgument arg_exc = ref arguments_buffer[0];
+            try
+            {
+                var ctx = arg_exc.AssertCurrentThreadContext();
+                ctx.IsPendingSynchronousCall = true;
+                if (ctx.IsMainThread)
+                {
+                    if (JSProxyContext.ThreadBlockingMode == JSHostImplementation.JSThreadBlockingMode.ThrowWhenBlockingWait)
+                    {
+                        Thread.ThrowOnBlockingWaitOnJSInteropThread = true;
+                    }
+                    else if (JSProxyContext.ThreadBlockingMode == JSHostImplementation.JSThreadBlockingMode.WarnWhenBlockingWait)
+                    {
+                        Thread.WarnOnBlockingWaitOnJSInteropThread = true;
+                    }
+                }
+            }
+            catch (Exception ex)
+            {
+                Environment.FailFast($"BeforeSyncJSExport: Unexpected synchronous failure (ManagedThreadId {Environment.CurrentManagedThreadId}): " + ex);
+            }
+        }
+
+#pragma warning disable CS3016 // Arrays as attribute arguments is not CLS-compliant
+        [UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvCdecl) })]
+#pragma warning restore CS3016
+        // TODO ideally this would be public API callable from generated C# code for JSExport
+        public static void AfterSyncJSExport(JSMarshalerArgument* arguments_buffer)
         {
-            JSSynchronizationContext.InstallWebWorkerInterop(true, CancellationToken.None);
+            ref JSMarshalerArgument arg_exc = ref arguments_buffer[0];
+            try
+            {
+                var ctx = arg_exc.AssertCurrentThreadContext();
+                ctx.IsPendingSynchronousCall = false;
+                if (ctx.IsMainThread)
+                {
+                    if (JSProxyContext.ThreadBlockingMode == JSHostImplementation.JSThreadBlockingMode.ThrowWhenBlockingWait)
+                    {
+                        Thread.ThrowOnBlockingWaitOnJSInteropThread = false;
+                    }
+                    else if (JSProxyContext.ThreadBlockingMode == JSHostImplementation.JSThreadBlockingMode.WarnWhenBlockingWait)
+                    {
+                        Thread.WarnOnBlockingWaitOnJSInteropThread = false;
+                    }
+                }
+            }
+            catch (Exception ex)
+            {
+                Environment.FailFast($"AfterSyncJSExport: Unexpected synchronous failure (ManagedThreadId {Environment.CurrentManagedThreadId}): " + ex);
+            }
         }
 
 #endif
 
+        // the marshaled signature is: Task BindAssemblyExports(string assemblyName)
+        public static void BindAssemblyExports(JSMarshalerArgument* arguments_buffer)
+        {
+            ref JSMarshalerArgument arg_exc = ref arguments_buffer[0]; // initialized by caller in alloc_stack_frame()
+            ref JSMarshalerArgument arg_res = ref arguments_buffer[1]; // used as return value
+            ref JSMarshalerArgument arg_1 = ref arguments_buffer[2];// initialized and set by caller
+            try
+            {
+                string? assemblyName;
+                // when we arrive here, we are on the thread which owns the proxies
+                arg_exc.AssertCurrentThreadContext();
+                arg_1.ToManaged(out assemblyName);
+
+                var result = JSHostImplementation.BindAssemblyExports(assemblyName);
+
+                arg_res.ToJS(result);
+            }
+            catch (Exception ex)
+            {
+                Environment.FailFast($"BindAssemblyExports: Unexpected synchronous failure (ManagedThreadId {Environment.CurrentManagedThreadId}): " + ex);
+            }
+        }
+
         [MethodImpl(MethodImplOptions.NoInlining)] // profiler needs to find it executed under this name
         public static void StopProfile()
         {
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Interop/JavaScriptImports.Generated.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Interop/JavaScriptImports.Generated.cs
index 8c5c3782a2f3..e7ba1f9aadd6 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Interop/JavaScriptImports.Generated.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Interop/JavaScriptImports.Generated.cs
@@ -43,16 +43,14 @@ internal static unsafe partial class JavaScriptImports
         [JSImport("INTERNAL.get_dotnet_instance")]
         public static partial JSObject GetDotnetInstance();
         [JSImport("INTERNAL.dynamic_import")]
-        // TODO: the continuation should be running on deputy or TP in MT
         public static partial Task<JSObject> DynamicImport(string moduleName, string moduleUrl);
-#if FEATURE_WASM_MANAGED_THREADS
-        [JSImport("INTERNAL.thread_available")]
-        // TODO: the continuation should be running on deputy or TP in MT
-        public static partial Task ThreadAvailable();
-#endif
+
+        [JSImport("INTERNAL.mono_wasm_bind_cs_function")]
+        public static partial void BindCSFunction(IntPtr monoMethod, string assemblyName, string namespaceName, string shortClassName, string methodName, int signatureHash, IntPtr signature);
 
 #if DEBUG
         [JSImport("globalThis.console.log")]
+        [return: JSMarshalAs<JSType.DiscardNoWait>] // this means that the message will arrive out of order, especially across threads.
         public static partial void Log([JSMarshalAs<JSType.String>] string message);
 #endif
     }
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSAsyncTaskScheduler.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSAsyncTaskScheduler.cs
new file mode 100644
index 000000000000..423843af3301
--- /dev/null
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSAsyncTaskScheduler.cs
@@ -0,0 +1,43 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Threading.Tasks;
+
+namespace System.Runtime.InteropServices.JavaScript
+{
+    // executes all tasks thru queue, never inline
+    internal sealed class JSAsyncTaskScheduler : TaskScheduler
+    {
+        private readonly JSSynchronizationContext m_synchronizationContext;
+
+        internal JSAsyncTaskScheduler(JSSynchronizationContext synchronizationContext)
+        {
+            m_synchronizationContext = synchronizationContext;
+        }
+
+        protected override void QueueTask(Task task)
+        {
+            m_synchronizationContext.Post((_) =>
+            {
+                if (!TryExecuteTask(task))
+                {
+                    Environment.FailFast("Unexpected failure in JSAsyncTaskScheduler" + Environment.CurrentManagedThreadId);
+                }
+            }, null);
+        }
+
+        // this is the main difference from the SynchronizationContextTaskScheduler
+        protected override bool TryExecuteTaskInline(Task task, bool taskWasPreviouslyQueued)
+        {
+            return false;
+        }
+
+        protected override IEnumerable<Task>? GetScheduledTasks()
+        {
+            return null;
+        }
+
+        public override int MaximumConcurrencyLevel => 1;
+    }
+}
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSFunctionBinding.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSFunctionBinding.cs
index 5761fe0010bb..f2d908d94707 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSFunctionBinding.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSFunctionBinding.cs
@@ -30,53 +30,51 @@ internal JSFunctionBinding() { }
         internal static volatile uint nextImportHandle = 1;
         internal int ImportHandle;
         internal bool IsAsync;
+        internal bool IsDiscardNoWait;
 #if DEBUG
         internal string? FunctionName;
 #endif
 
-        [StructLayout(LayoutKind.Sequential, Pack = 4)]
+        // keep in sync with JSBindingHeaderOffsets in marshal.ts
+        [StructLayout(LayoutKind.Explicit, Pack = 4)]
         internal struct JSBindingHeader
         {
             internal const int JSMarshalerSignatureHeaderSize = 4 * 8; // without Exception and Result
 
+            [FieldOffset(0)]
             public int Version;
+            [FieldOffset(4)]
             public int ArgumentCount;
+            [FieldOffset(8)]
             public int ImportHandle;
-            public int _Reserved;
+            [FieldOffset(16)]
             public int FunctionNameOffset;
+            [FieldOffset(20)]
             public int FunctionNameLength;
+            [FieldOffset(24)]
             public int ModuleNameOffset;
+            [FieldOffset(28)]
             public int ModuleNameLength;
+            [FieldOffset(32)]
             public JSBindingType Exception;
+            [FieldOffset(64)]
             public JSBindingType Result;
         }
 
-        [StructLayout(LayoutKind.Sequential, Pack = 4, Size = 32)]
+        // keep in sync with JSBindingTypeOffsets in marshal.ts
+        [StructLayout(LayoutKind.Explicit, Pack = 4, Size = 32)]
         internal struct JSBindingType
         {
+            [FieldOffset(0)]
             internal MarshalerType Type;
-            internal MarshalerType __ReservedB1;
-            internal MarshalerType __ReservedB2;
-            internal MarshalerType __ReservedB3;
-            internal IntPtr __Reserved;
-            internal IntPtr JSCustomMarshallerCode;
-            internal int JSCustomMarshallerCodeLength;
+            [FieldOffset(16)]
             internal MarshalerType ResultMarshalerType;
-            internal MarshalerType __ReservedB4;
-            internal MarshalerType __ReservedB5;
-            internal MarshalerType __ReservedB6;
+            [FieldOffset(20)]
             internal MarshalerType Arg1MarshalerType;
-            internal MarshalerType __ReservedB7;
-            internal MarshalerType __ReservedB8;
-            internal MarshalerType __ReservedB9;
+            [FieldOffset(24)]
             internal MarshalerType Arg2MarshalerType;
-            internal MarshalerType __ReservedB10;
-            internal MarshalerType __ReservedB11;
-            internal MarshalerType __ReservedB12;
+            [FieldOffset(28)]
             internal MarshalerType Arg3MarshalerType;
-            internal MarshalerType __ReservedB13;
-            internal MarshalerType __ReservedB14;
-            internal MarshalerType __ReservedB15;
         }
 
         internal unsafe int ArgumentCount
@@ -153,7 +151,9 @@ internal unsafe JSBindingType this[int position]
         /// </summary>
         /// <param name="signature">Generated metadata about the method signature used for marshaling.</param>
         /// <param name="arguments">The intermediate buffer with marshalled arguments.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public static void InvokeJS(JSFunctionBinding signature, Span<JSMarshalerArgument> arguments)
         {
             InvokeJSImportImpl(signature, arguments);
@@ -191,10 +191,11 @@ public static JSFunctionBinding BindManagedFunction(string fullyQualifiedName, i
         {
             if (RuntimeInformation.OSArchitecture != Architecture.Wasm)
                 throw new PlatformNotSupportedException();
-#if FEATURE_WASM_MANAGED_THREADS
-            JSProxyContext.AssertIsInteropThread();
-#endif
-            return BindManagedFunctionImpl(fullyQualifiedName, signatureHash, signatures);
+
+            // this could be called by assembly module initializer from Net7 code-gen
+            // on wrong thread, in which case we will bind it to UI thread
+
+            return JSHostImplementation.BindManagedFunction(fullyQualifiedName, signatureHash, signatures);
         }
 
 #if !DEBUG
@@ -205,6 +206,9 @@ internal static unsafe void InvokeJSFunction(JSObject jsFunction, Span<JSMarshal
             jsFunction.AssertNotDisposed();
 
 #if FEATURE_WASM_MANAGED_THREADS
+            ref JSMarshalerArgument exc = ref arguments[0];
+            exc.slot.CallerNativeTID = JSProxyContext.GetNativeThreadId();
+
             // if we are on correct thread already, just call it
             if (jsFunction.ProxyContext.IsCurrentThread())
             {
@@ -225,6 +229,13 @@ internal static unsafe void InvokeJSFunction(JSObject jsFunction, Span<JSMarshal
 #endif
         internal static unsafe void InvokeJSFunctionCurrent(JSObject jsFunction, Span<JSMarshalerArgument> arguments)
         {
+#if FEATURE_WASM_MANAGED_THREADS
+            if (jsFunction.ProxyContext.IsPendingSynchronousCall && jsFunction.ProxyContext.IsMainThread)
+            {
+                throw new PlatformNotSupportedException("Cannot call synchronous JS function from inside a synchronous call to a C# method.");
+            }
+#endif
+
             var functionHandle = (int)jsFunction.JSHandle;
             fixed (JSMarshalerArgument* ptr = arguments)
             {
@@ -244,16 +255,20 @@ internal static unsafe void InvokeJSFunctionCurrent(JSObject jsFunction, Span<JS
 #endif
         internal static unsafe void DispatchJSFunctionSync(JSObject jsFunction, Span<JSMarshalerArgument> arguments)
         {
+#if FEATURE_WASM_MANAGED_THREADS
+            if (jsFunction.ProxyContext.IsPendingSynchronousCall && jsFunction.ProxyContext.IsMainThread)
+            {
+                throw new PlatformNotSupportedException("Cannot call synchronous JS function from inside a synchronous call to a C# method.");
+            }
+#endif
             var args = (nint)Unsafe.AsPointer(ref arguments[0]);
             var functionHandle = jsFunction.JSHandle;
 
             // we already know that we are not on the right thread
             // this will be blocking until resolved by that thread
-            // we don't have to disable ThrowOnBlockingWaitOnJSInteropThread, because this is lock in native code
-            // we also don't throw PNSE here, because we know that the target has JS interop installed and that it could not block
+            // we know that the target has JS interop installed and that it could not block
             // so it could take some time, while target is CPU busy, but not forever
-            // see also https://github.com/dotnet/runtime/issues/76958#issuecomment-1921418290
-            Interop.Runtime.InvokeJSFunctionSend(jsFunction.ProxyContext.NativeTID, functionHandle, args);
+            Interop.Runtime.InvokeJSFunctionSend(jsFunction.ProxyContext.JSNativeTID, functionHandle, args);
 
             ref JSMarshalerArgument exceptionArg = ref arguments[0];
             if (exceptionArg.slot.Type != MarshalerType.None)
@@ -268,10 +283,13 @@ internal static unsafe void DispatchJSFunctionSync(JSObject jsFunction, Span<JSM
 #endif
         internal static unsafe void InvokeJSImportImpl(JSFunctionBinding signature, Span<JSMarshalerArgument> arguments)
         {
+            ref JSMarshalerArgument exc = ref arguments[0];
+            ref JSMarshalerArgument res = ref arguments[1];
 #if FEATURE_WASM_MANAGED_THREADS
             var targetContext = JSProxyContext.SealJSImportCapturing();
-            arguments[0].slot.ContextHandle = targetContext.ContextHandle;
-            arguments[1].slot.ContextHandle = targetContext.ContextHandle;
+            exc.slot.CallerNativeTID = JSProxyContext.GetNativeThreadId();
+            exc.slot.ContextHandle = targetContext.ContextHandle;
+            res.slot.ContextHandle = targetContext.ContextHandle;
 #else
             var targetContext = JSProxyContext.MainThreadContext;
 #endif
@@ -280,8 +298,25 @@ internal static unsafe void InvokeJSImportImpl(JSFunctionBinding signature, Span
             {
                 // pre-allocate the result handle and Task
                 var holder = targetContext.CreatePromiseHolder();
-                arguments[1].slot.Type = MarshalerType.TaskPreCreated;
-                arguments[1].slot.GCHandle = holder.GCHandle;
+                res.slot.Type = MarshalerType.TaskPreCreated;
+                res.slot.GCHandle = holder.GCHandle;
+#if FEATURE_WASM_MANAGED_THREADS
+                res.slot.IntPtrValue = (IntPtr)holder.State;
+#endif
+            }
+#if FEATURE_WASM_MANAGED_THREADS
+            else
+            {
+                if (targetContext.IsPendingSynchronousCall && targetContext.IsMainThread)
+                {
+                    throw new PlatformNotSupportedException("Cannot call synchronous JS function from inside a synchronous call to a C# method.");
+                }
+            }
+#endif
+
+            if (signature.IsDiscardNoWait)
+            {
+                arguments[1].slot.Type = MarshalerType.DiscardNoWait;
             }
 
 #if FEATURE_WASM_MANAGED_THREADS
@@ -298,15 +333,15 @@ internal static unsafe void InvokeJSImportImpl(JSFunctionBinding signature, Span
 #endif
 
             }
-            else if (!signature.IsAsync)
+            else if (signature.IsAsync || signature.IsDiscardNoWait)
             {
-                //sync
-                DispatchJSImportSyncSend(signature, targetContext, arguments);
+                //async
+                DispatchJSImportAsyncPost(signature, targetContext, arguments);
             }
             else
             {
-                //async
-                DispatchJSImportAsyncPost(signature, targetContext, arguments);
+                //sync
+                DispatchJSImportSyncSend(signature, targetContext, arguments);
             }
 #else
             InvokeJSImportCurrent(signature, arguments);
@@ -331,9 +366,9 @@ internal static unsafe void InvokeJSImportCurrent(JSFunctionBinding signature, S
             fixed (JSMarshalerArgument* args = arguments)
             {
 #if FEATURE_WASM_MANAGED_THREADS
-                Interop.Runtime.InvokeJSImportSync((nint)args, (nint)signature.Header);
+                Interop.Runtime.InvokeJSImportSync((nint)signature.Header, (nint)args);
 #else
-                Interop.Runtime.InvokeJSImport(signature.ImportHandle, (nint)args);
+                Interop.Runtime.InvokeJSImportST(signature.ImportHandle, (nint)args);
 #endif
             }
 
@@ -354,18 +389,15 @@ internal static unsafe void DispatchJSImportSyncSend(JSFunctionBinding signature
             var args = (nint)Unsafe.AsPointer(ref arguments[0]);
             var sig = (nint)signature.Header;
 
+            ref JSMarshalerArgument exc = ref arguments[0];
+
             // we already know that we are not on the right thread
             // this will be blocking until resolved by that thread
-            // we don't have to disable ThrowOnBlockingWaitOnJSInteropThread, because this is lock in native code
-            // we also don't throw PNSE here, because we know that the target has JS interop installed and that it could not block
-            // so it could take some time, while target is CPU busy, but not forever
-            // see also https://github.com/dotnet/runtime/issues/76958#issuecomment-1921418290
-            Interop.Runtime.InvokeJSImportSyncSend(targetContext.NativeTID, args, sig);
+            Interop.Runtime.InvokeJSImportSyncSend(targetContext.JSNativeTID, sig, args);
 
-            ref JSMarshalerArgument exceptionArg = ref arguments[0];
-            if (exceptionArg.slot.Type != MarshalerType.None)
+            if (exc.slot.Type != MarshalerType.None)
             {
-                JSHostImplementation.ThrowException(ref exceptionArg);
+                JSHostImplementation.ThrowException(ref exc);
             }
         }
 
@@ -374,7 +406,10 @@ internal static unsafe void DispatchJSImportSyncSend(JSFunctionBinding signature
 #endif
         internal static unsafe void DispatchJSImportAsyncPost(JSFunctionBinding signature, JSProxyContext targetContext, Span<JSMarshalerArgument> arguments)
         {
-            // this copy is freed in mono_wasm_invoke_import_async
+            // meaning JS side needs to dispose it
+            ref JSMarshalerArgument exc = ref arguments[0];
+            exc.slot.ReceiverShouldFree = true;
+
             var bytes = sizeof(JSMarshalerArgument) * arguments.Length;
             void* cpy = (void*)Marshal.AllocHGlobal(bytes);
             void* src = Unsafe.AsPointer(ref arguments[0]);
@@ -384,7 +419,7 @@ internal static unsafe void DispatchJSImportAsyncPost(JSFunctionBinding signatur
             // we already know that we are not on the right thread
             // this will return quickly after sending the message
             // async
-            Interop.Runtime.InvokeJSImportAsyncPost(targetContext.NativeTID, (nint)cpy, sig);
+            Interop.Runtime.InvokeJSImportAsyncPost(targetContext.JSNativeTID, sig, (nint)cpy);
 
         }
 
@@ -396,29 +431,18 @@ internal static unsafe JSFunctionBinding BindJSImportImpl(string functionName, s
 
 #if !FEATURE_WASM_MANAGED_THREADS
 
-            Interop.Runtime.BindJSImport(signature.Header, out int isException, out object exceptionMessage);
-            if (isException != 0)
-                throw new JSException((string)exceptionMessage);
-
-            JSHostImplementation.FreeMethodSignatureBuffer(signature);
-
-#endif
-
-            return signature;
-        }
-
-        internal static unsafe JSFunctionBinding BindManagedFunctionImpl(string fullyQualifiedName, int signatureHash, ReadOnlySpan<JSMarshalerType> signatures)
-        {
-            var signature = JSHostImplementation.GetMethodSignature(signatures, null, null);
-
-            Interop.Runtime.BindCSFunction(fullyQualifiedName, signatureHash, signature.Header, out int isException, out object exceptionMessage);
-            if (isException != 0)
+            nint exceptionPtr = Interop.Runtime.BindJSImportST(signature.Header);
+            if (exceptionPtr != IntPtr.Zero)
             {
-                throw new JSException((string)exceptionMessage);
+                var message = Marshal.PtrToStringUni(exceptionPtr)!;
+                Marshal.FreeHGlobal(exceptionPtr);
+                throw new JSException(message);
             }
 
             JSHostImplementation.FreeMethodSignatureBuffer(signature);
 
+#endif
+
             return signature;
         }
 
@@ -427,17 +451,19 @@ internal static unsafe JSFunctionBinding BindManagedFunctionImpl(string fullyQua
 #endif
         internal static unsafe void ResolveOrRejectPromise(JSProxyContext targetContext, Span<JSMarshalerArgument> arguments)
         {
+            ref JSMarshalerArgument exc = ref arguments[0];
 #if FEATURE_WASM_MANAGED_THREADS
+            exc.slot.CallerNativeTID = JSProxyContext.GetNativeThreadId();
+
             if (targetContext.IsCurrentThread())
 #endif
             {
                 fixed (JSMarshalerArgument* ptr = arguments)
                 {
                     Interop.Runtime.ResolveOrRejectPromise((nint)ptr);
-                    ref JSMarshalerArgument exceptionArg = ref arguments[0];
-                    if (exceptionArg.slot.Type != MarshalerType.None)
+                    if (exc.slot.Type != MarshalerType.None)
                     {
-                        JSHostImplementation.ThrowException(ref exceptionArg);
+                        JSHostImplementation.ThrowException(ref exc);
                     }
                 }
             }
@@ -445,8 +471,7 @@ internal static unsafe void ResolveOrRejectPromise(JSProxyContext targetContext,
             else
             {
                 // meaning JS side needs to dispose it
-                ref JSMarshalerArgument res = ref arguments[1];
-                res.slot.BooleanValue = true;
+                exc.slot.ReceiverShouldFree = true;
 
                 // this copy is freed in mono_wasm_resolve_or_reject_promise
                 var bytes = sizeof(JSMarshalerArgument) * arguments.Length;
@@ -455,7 +480,7 @@ internal static unsafe void ResolveOrRejectPromise(JSProxyContext targetContext,
                 Unsafe.CopyBlock(cpy, src, (uint)bytes);
 
                 // async
-                Interop.Runtime.ResolveOrRejectPromisePost(targetContext.NativeTID, (nint)cpy);
+                Interop.Runtime.ResolveOrRejectPromisePost(targetContext.JSNativeTID, (nint)cpy);
 
                 // this never throws directly
             }
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSHost.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSHost.cs
index 0a685e996882..20570bbab0ff 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSHost.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSHost.cs
@@ -44,7 +44,9 @@ public static JSObject DotnetInstance
         /// <param name="moduleUrl">The location of the module file.</param>
         /// <param name="cancellationToken">The token to monitor for cancellation requests.</param>
         /// <returns>A proxy for the JavaScript object that contains the module's exports.</returns>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public static Task<JSObject> ImportAsync(string moduleName, string moduleUrl, CancellationToken cancellationToken = default)
         {
             return JSHostImplementation.ImportAsync(moduleName, moduleUrl, cancellationToken);
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSHostImplementation.Types.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSHostImplementation.Types.cs
index 2aa59d1814d4..d65421592248 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSHostImplementation.Types.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSHostImplementation.Types.cs
@@ -10,30 +10,46 @@ internal static partial class JSHostImplementation
     {
         internal unsafe delegate void ToManagedCallback(JSMarshalerArgument* arguments_buffer);
 
-        public sealed class PromiseHolder
+        public sealed unsafe class PromiseHolder
         {
+            public bool IsDisposed;
             public readonly nint GCHandle; // could be also virtual GCVHandle
             public ToManagedCallback? Callback;
             public JSProxyContext ProxyContext;
-            public bool IsDisposed;
-            public bool IsCanceling;
 #if FEATURE_WASM_MANAGED_THREADS
             public ManualResetEventSlim? CallbackReady;
+            public PromiseHolderState* State;
 #endif
 
             public PromiseHolder(JSProxyContext targetContext)
             {
                 GCHandle = (IntPtr)InteropServices.GCHandle.Alloc(this, GCHandleType.Normal);
                 ProxyContext = targetContext;
+#if FEATURE_WASM_MANAGED_THREADS
+                State = (PromiseHolderState*)Marshal.AllocHGlobal(sizeof(PromiseHolderState));
+                Interlocked.Exchange(ref (*State).IsResolving, 0);
+#endif
             }
 
             public PromiseHolder(JSProxyContext targetContext, nint gcvHandle)
             {
                 GCHandle = gcvHandle;
                 ProxyContext = targetContext;
+#if FEATURE_WASM_MANAGED_THREADS
+                State = (PromiseHolderState*)Marshal.AllocHGlobal(sizeof(PromiseHolderState));
+                Interlocked.Exchange(ref (*State).IsResolving, 0);
+#endif
             }
         }
 
+        // NOTE: layout has to match PromiseHolderState in marshal-to-cs.ts
+        [StructLayout(LayoutKind.Explicit)]
+        public struct PromiseHolderState
+        {
+            [FieldOffset(0)]
+            public volatile int IsResolving;
+        }
+
         [StructLayout(LayoutKind.Explicit)]
         public struct IntPtrAndHandle
         {
@@ -46,5 +62,14 @@ public struct IntPtrAndHandle
             [FieldOffset(0)]
             internal RuntimeTypeHandle typeHandle;
         }
+
+        // keep in sync with types\internal.ts
+        public enum JSThreadBlockingMode : int
+        {
+            PreventSynchronousJSExport = 0,
+            ThrowWhenBlockingWait = 1,
+            WarnWhenBlockingWait = 2,
+            DangerousAllowBlockingWait = 100,
+        }
     }
 }
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSHostImplementation.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSHostImplementation.cs
index 3035781eb730..7e24adb6e1e6 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSHostImplementation.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSHostImplementation.cs
@@ -33,14 +33,6 @@ public static bool GetTaskResultDynamic(Task task, out object? value)
             throw new InvalidOperationException();
         }
 
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static RuntimeMethodHandle GetMethodHandleFromIntPtr(IntPtr ptr)
-        {
-            var temp = new IntPtrAndHandle { ptr = ptr };
-            return temp.methodHandle;
-        }
-
         /// <summary>
         /// Gets the MethodInfo for the Task{T}.Result property getter.
         /// </summary>
@@ -72,7 +64,9 @@ public static MethodInfo GetTaskResultMethodInfo(Type taskType)
             throw new InvalidOperationException();
         }
 
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public static void ThrowException(ref JSMarshalerArgument arg)
         {
             arg.ToManaged(out Exception? ex);
@@ -93,7 +87,9 @@ public static async Task<JSObject> ImportAsync(string moduleName, string moduleU
                 ConfigureAwaitOptions.ForceYielding); // this helps to finish the import before we bind the module in [JSImport]
         }
 
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public static async Task<JSObject> CancellationHelper(Task<JSObject> jsTask, CancellationToken cancellationToken)
         {
             if (jsTask.IsCompletedSuccessfully)
@@ -105,7 +101,7 @@ public static async Task<JSObject> CancellationHelper(Task<JSObject> jsTask, Can
                 CancelablePromise.CancelPromise((Task<JSObject>)s!);
             }, jsTask))
             {
-                return await jsTask.ConfigureAwait(true);
+                return await jsTask.ConfigureAwait(false);
             }
         }
 
@@ -161,6 +157,7 @@ public static unsafe JSFunctionBinding GetMethodSignature(ReadOnlySpan<JSMarshal
                 var type = signature.Sigs[i] = types[i + 1]._signatureType;
             }
             signature.IsAsync = types[0]._signatureType.Type == MarshalerType.Task;
+            signature.IsDiscardNoWait = types[0]._signatureType.Type == MarshalerType.DiscardNoWait;
 
             signature.Header[0].ImportHandle = signature.ImportHandle;
             signature.Header[0].FunctionNameLength = functionNameBytes;
@@ -208,6 +205,120 @@ public static void LoadSatelliteAssembly(byte[] dllBytes)
             AssemblyLoadContext.Default.LoadFromStream(new MemoryStream(dllBytes));
         }
 
+        public static unsafe Task<int>? CallEntrypoint(IntPtr assemblyNamePtr, string?[]? args, bool waitForDebugger)
+        {
+            try
+            {
+                void* ptr;
+                Interop.Runtime.AssemblyGetEntryPoint(assemblyNamePtr, waitForDebugger ? 1 : 0, &ptr);
+                RuntimeMethodHandle methodHandle = GetMethodHandleFromIntPtr((IntPtr)ptr);
+                // this would not work for generic types. But Main() could not be generic, so we are fine.
+                MethodInfo? method = MethodBase.GetMethodFromHandle(methodHandle) as MethodInfo;
+                if (method == null)
+                {
+                    throw new InvalidOperationException(SR.CannotResolveManagedEntrypointHandle);
+                }
+
+                object[] argsToPass = System.Array.Empty<object>();
+                Task<int>? result = null;
+                var parameterInfos = method.GetParameters();
+                if (parameterInfos.Length > 0 && parameterInfos[0].ParameterType == typeof(string[]))
+                {
+                    argsToPass = new object[] { args ?? System.Array.Empty<string>() };
+                }
+                if (method.ReturnType == typeof(void))
+                {
+                    method.Invoke(null, argsToPass);
+#if FEATURE_WASM_MANAGED_THREADS
+                    result = Task.FromResult(0);
+#endif
+                }
+                else if (method.ReturnType == typeof(int))
+                {
+                    int intResult = (int)method.Invoke(null, argsToPass)!;
+                    result = Task.FromResult(intResult);
+                }
+                else if (method.ReturnType == typeof(Task))
+                {
+                    Task methodResult = (Task)method.Invoke(null, argsToPass)!;
+                    TaskCompletionSource<int> tcs = new TaskCompletionSource<int>();
+                    result = tcs.Task;
+                    methodResult.ContinueWith((t) =>
+                    {
+                        if (t.IsFaulted)
+                        {
+                            tcs.SetException(t.Exception!);
+                        }
+                        else
+                        {
+                            tcs.SetResult(0);
+                        }
+                    }, TaskScheduler.Default);
+                }
+                else if (method.ReturnType == typeof(Task<int>))
+                {
+                    result = (Task<int>)method.Invoke(null, argsToPass)!;
+                }
+                else
+                {
+                    throw new InvalidOperationException(SR.Format(SR.ReturnTypeNotSupportedForMain, method.ReturnType.FullName));
+                }
+                return result;
+            }
+            catch (Exception ex)
+            {
+                if (ex is TargetInvocationException refEx && refEx.InnerException != null)
+                    ex = refEx.InnerException;
+                return Task.FromException<int>(ex);
+            }
+        }
+
+        public static unsafe Task BindAssemblyExports(string? assemblyName)
+        {
+            Interop.Runtime.BindAssemblyExports(Marshal.StringToCoTaskMemUTF8(assemblyName));
+            return Task.CompletedTask;
+        }
+
+        public static unsafe JSFunctionBinding BindManagedFunction(string fullyQualifiedName, int signatureHash, ReadOnlySpan<JSMarshalerType> signatures)
+        {
+#if NATIVE_AOT
+            var signature = GetMethodSignature(signatures, null, null);
+            Interop.Runtime.BindCSFunction(fullyQualifiedName, signatureHash, signature.Header, out int isException, out object exceptionMessage);
+            if (isException != 0)
+            {
+                throw new JSException((string)exceptionMessage);
+            }
+            FreeMethodSignatureBuffer(signature);
+            return signature;
+#else
+            var (assemblyName, nameSpace, shortClassName, methodName) = ParseFQN(fullyQualifiedName);
+            var wrapper_name = $"__Wrapper_{methodName}_{signatureHash}";
+            var dllName = assemblyName + ".dll";
+
+            IntPtr monoMethod;
+            Interop.Runtime.GetAssemblyExport(
+                Marshal.StringToCoTaskMemUTF8(dllName),
+                Marshal.StringToCoTaskMemUTF8(nameSpace),
+                Marshal.StringToCoTaskMemUTF8(shortClassName),
+                Marshal.StringToCoTaskMemUTF8(wrapper_name),
+                &monoMethod);
+
+            if (monoMethod == IntPtr.Zero)
+            {
+                Environment.FailFast($"Can't find {nameSpace}{shortClassName}{methodName} in {assemblyName}.dll");
+            }
+
+            var signature = GetMethodSignature(signatures, null, null);
+
+            // this will hit JS side possibly on another thread, depending on JSProxyContext.CurrentThreadContext
+            JavaScriptImports.BindCSFunction(monoMethod, assemblyName, nameSpace, shortClassName, methodName, signatureHash, (IntPtr)signature.Header);
+
+            FreeMethodSignatureBuffer(signature);
+
+            return signature;
+#endif
+        }
+
 #if FEATURE_WASM_MANAGED_THREADS
         [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "external_eventloop")]
         private static extern ref bool GetThreadExternalEventloop(Thread @this);
@@ -218,5 +329,38 @@ public static void SetHasExternalEventLoop(Thread thread)
         }
 #endif
 
+#if !DEBUG
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
+        public static RuntimeMethodHandle GetMethodHandleFromIntPtr(IntPtr ptr)
+        {
+            var temp = new IntPtrAndHandle { ptr = ptr };
+            return temp.methodHandle;
+        }
+
+        public static (string assemblyName, string nameSpace, string shortClassName, string methodName) ParseFQN(string fqn)
+        {
+            var assembly = fqn.Substring(fqn.IndexOf('[') + 1, fqn.IndexOf(']') - 1).Trim();
+            fqn = fqn.Substring(fqn.IndexOf(']') + 1).Trim();
+            var methodName = fqn.Substring(fqn.IndexOf(':') + 1);
+            var className = fqn.Substring(0, fqn.IndexOf(':')).Trim();
+
+            var nameSpace = "";
+            var shortClassName = className;
+            var idx = fqn.LastIndexOf('.');
+            if (idx != -1)
+            {
+                nameSpace = fqn.Substring(0, idx);
+                shortClassName = className.Substring(idx + 1);
+            }
+
+            if (string.IsNullOrEmpty(assembly))
+                throw new InvalidOperationException("No assembly name specified " + fqn);
+            if (string.IsNullOrEmpty(className))
+                throw new InvalidOperationException("No class name specified " + fqn);
+            if (string.IsNullOrEmpty(methodName))
+                throw new InvalidOperationException("No method name specified " + fqn);
+            return (assembly, nameSpace, shortClassName, methodName);
+        }
     }
 }
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSMarshalerArgument.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSMarshalerArgument.cs
index eecd01128007..4312973a388a 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSMarshalerArgument.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSMarshalerArgument.cs
@@ -20,11 +20,12 @@ public partial struct JSMarshalerArgument
     {
         internal JSMarshalerArgumentImpl slot;
 
+        // keep in sync with JSMarshalerArgumentOffsets in marshal.ts
         [StructLayout(LayoutKind.Explicit, Pack = 32, Size = 32)]
         internal struct JSMarshalerArgumentImpl
         {
             [FieldOffset(0)]
-            internal bool BooleanValue;
+            internal bool BooleanValue;  // note this is 1 byte
             [FieldOffset(0)]
             internal byte ByteValue;
             [FieldOffset(0)]
@@ -58,14 +59,27 @@ internal struct JSMarshalerArgumentImpl
             [FieldOffset(13)]
             internal MarshalerType ElementType;
 
+#if FEATURE_WASM_MANAGED_THREADS
             [FieldOffset(16)]
             internal IntPtr ContextHandle;
+
+            [FieldOffset(20)]
+            internal bool ReceiverShouldFree; // note this is 1 byte
+
+            [FieldOffset(24)]
+            internal IntPtr CallerNativeTID;
+
+            [FieldOffset(28)]
+            internal IntPtr SyncDoneSemaphorePtr;
+#endif
         }
 
         /// <summary>
         /// This API supports JSImport infrastructure and is not intended to be used directly from your code.
         /// </summary>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void Initialize()
         {
             slot.Type = MarshalerType.None;
@@ -74,15 +88,19 @@ public unsafe void Initialize()
             // also this is called multiple times
             JSProxyContext.JSImportWithUnknownContext();
             slot.ContextHandle = IntPtr.Zero;
+            slot.ReceiverShouldFree = false;
 #endif
         }
 
 #if FEATURE_WASM_MANAGED_THREADS
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         internal unsafe void InitializeWithContext(JSProxyContext knownProxyContext)
         {
             slot.Type = MarshalerType.None;
             slot.ContextHandle = knownProxyContext.ContextHandle;
+            slot.ReceiverShouldFree = false;
         }
 #endif
         // this is always called from ToManaged() marshaler
@@ -103,11 +121,11 @@ internal JSProxyContext ToManagedContext
                 // during JSExport, this is marshaling parameters and it would be set by:
                 //    - alloc_stack_frame
                 //    - set_js_handle/set_gc_handle
-                var proxyContextGCHandle = (GCHandle)slot.ContextHandle;
-                if (proxyContextGCHandle == default)
+                if (slot.ContextHandle == IntPtr.Zero)
                 {
-                    Environment.FailFast($"ContextHandle not set, ManagedThreadId: {Environment.CurrentManagedThreadId}. {Environment.NewLine} {Environment.StackTrace}");
+                    Environment.FailFast($"ContextHandle not set (ManagedThreadId {Environment.CurrentManagedThreadId}): {Environment.NewLine} {Environment.StackTrace}");
                 }
+                var proxyContextGCHandle = (GCHandle)slot.ContextHandle;
                 var argumentContext = (JSProxyContext)proxyContextGCHandle.Target!;
                 return argumentContext;
 #endif
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSMarshalerType.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSMarshalerType.cs
index b76ffdf9d615..7a8eecf4cc2c 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSMarshalerType.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSMarshalerType.cs
@@ -44,6 +44,15 @@ private JSMarshalerType(JSFunctionBinding.JSBindingType signatureType)
             Type = MarshalerType.Discard
         });
 
+        /// <summary>
+        /// Dispatches the call asynchronously and doesn't wait for result.
+        /// </summary>
+        /// <returns>The marshaler metadata.</returns>
+        public static JSMarshalerType DiscardNoWait { get; } = new JSMarshalerType(new JSFunctionBinding.JSBindingType
+        {
+            Type = MarshalerType.DiscardNoWait
+        });
+
         /// <summary>
         /// Marshal as JavaScript <see href="https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean">Boolean</see> type.
         /// </summary>
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSObject.References.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSObject.References.cs
index 35424deea557..48c6cbc32862 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSObject.References.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSObject.References.cs
@@ -42,7 +42,9 @@ internal JSObject(IntPtr jsHandle, JSProxyContext ctx)
         /// <inheritdoc />
         public override string ToString() => $"(js-obj js '{JSHandle}')";
 
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         internal void AssertNotDisposed()
         {
             lock (ProxyContext)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSProxyContext.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSProxyContext.cs
index 375b3082bdf0..e8ce55892f74 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSProxyContext.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSProxyContext.cs
@@ -28,17 +28,29 @@ internal sealed class JSProxyContext : IDisposable
         private JSProxyContext()
         {
         }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#pragma warning disable CA1822 // Mark members as static
+        public bool IsCurrentThread() => true;
+#pragma warning restore CA1822 // Mark members as static
 #else
         public nint ContextHandle;
-        public nint NativeTID;
-        public int ManagedTID;
+        public nint JSNativeTID; // target thread where JavaScript is running
+        public nint NativeTID; // current pthread id
+        public int ManagedTID; // current managed thread id
         public bool IsMainThread;
         public JSSynchronizationContext SynchronizationContext;
+        public JSAsyncTaskScheduler? AsyncTaskScheduler;
 
+        public static JSThreadBlockingMode ThreadBlockingMode = JSThreadBlockingMode.PreventSynchronousJSExport;
+        public bool IsPendingSynchronousCall;
+
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public bool IsCurrentThread()
         {
-            return ManagedTID == Environment.CurrentManagedThreadId;
+            return ManagedTID == Environment.CurrentManagedThreadId && !IsMainThread;
         }
 
         [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "thread_id")]
@@ -53,7 +65,7 @@ public static IntPtr GetNativeThreadId()
         public JSProxyContext(bool isMainThread, JSSynchronizationContext synchronizationContext)
         {
             SynchronizationContext = synchronizationContext;
-            NativeTID = GetNativeThreadId();
+            NativeTID = JSNativeTID = GetNativeThreadId();
             ManagedTID = Environment.CurrentManagedThreadId;
             IsMainThread = isMainThread;
             ContextHandle = (nint)GCHandle.Alloc(this, GCHandleType.Normal);
@@ -226,7 +238,9 @@ public static JSProxyContext CurrentOperationContext
 
 #endif
 
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public static JSProxyContext AssertIsInteropThread()
         {
 #if FEATURE_WASM_MANAGED_THREADS
@@ -265,16 +279,14 @@ public nint AllocJSVHandle()
         {
             lock (this)
             {
+                ObjectDisposedException.ThrowIf(_isDisposed, this);
+
                 if (JSVHandleFreeList.Count > 0)
                 {
                     var jsvHandle = JSVHandleFreeList[JSVHandleFreeList.Count - 1];
                     JSVHandleFreeList.RemoveAt(JSVHandleFreeList.Count - 1);
                     return jsvHandle;
                 }
-                if (NextJSVHandle == IntPtr.Zero)
-                {
-                    NextJSVHandle = -2;
-                }
                 return NextJSVHandle--;
             }
         }
@@ -371,6 +383,10 @@ public unsafe void ReleasePromiseHolder(nint holderGCHandle)
                     holder.IsDisposed = true;
                     handle.Free();
                 }
+#if FEATURE_WASM_MANAGED_THREADS
+                Marshal.FreeHGlobal((IntPtr)holder.State);
+                holder.State = null;
+#endif
             }
         }
 
@@ -408,6 +424,10 @@ public unsafe void ReleaseJSOwnedObjectByGCHandle(nint gcHandle)
                 {
                     holderCallback = holder.Callback;
                     holder.IsDisposed = true;
+#if FEATURE_WASM_MANAGED_THREADS
+                    Marshal.FreeHGlobal((IntPtr)holder.State);
+                    holder.State = null;
+#endif
                 }
             }
             holderCallback?.Invoke(null);
@@ -462,12 +482,12 @@ public static void ReleaseCSOwnedObject(JSObject jso, bool skipJS)
                     {
                         if (IsJSVHandle(jsHandle))
                         {
-                            Environment.FailFast("TODO implement blocking ReleaseCSOwnedObjectSend to make sure the order of FreeJSVHandle is correct.");
+                            Environment.FailFast($"TODO implement blocking ReleaseCSOwnedObjectSend to make sure the order of FreeJSVHandle is correct, ManagedThreadId: {Environment.CurrentManagedThreadId}. {Environment.NewLine} {Environment.StackTrace}");
                         }
 
                         // this is async message, we need to call this as the last thing
                         // the same jsHandle would not be re-used until JS side considers it free
-                        Interop.Runtime.ReleaseCSOwnedObjectPost(ctx.NativeTID, jsHandle);
+                        Interop.Runtime.ReleaseCSOwnedObjectPost(ctx.JSNativeTID, jsHandle);
                     }
 #else
                     Interop.Runtime.ReleaseCSOwnedObject(jsHandle);
@@ -480,7 +500,7 @@ public static void ReleaseCSOwnedObject(JSObject jso, bool skipJS)
             }
         }
 
-#endregion
+        #endregion
 
         #region Dispose
 
@@ -495,7 +515,6 @@ private void Dispose(bool disposing)
                     {
                         Environment.FailFast($"JSProxyContext must be disposed on the thread which owns it, ManagedThreadId: {Environment.CurrentManagedThreadId}. {Environment.NewLine} {Environment.StackTrace}");
                     }
-                    ((GCHandle)ContextHandle).Free();
 #endif
 
                     List<WeakReference<JSObject>> copy = new(ThreadCsOwnedObjects.Values);
@@ -509,6 +528,7 @@ private void Dispose(bool disposing)
 
 #if FEATURE_WASM_MANAGED_THREADS
                     Interop.Runtime.UninstallWebWorkerInterop();
+                    ((GCHandle)ContextHandle).Free();
 #endif
 
                     foreach (var gch in ThreadJsOwnedObjects.Values)
@@ -522,6 +542,7 @@ private void Dispose(bool disposing)
                         {
                             holder.Callback!.Invoke(null);
                         }
+                        ((GCHandle)holder.GCHandle).Free();
                     }
 
                     ThreadCsOwnedObjects.Clear();
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSSynchronizationContext.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSSynchronizationContext.cs
index a1e6b6c93b09..e93b1774d687 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSSynchronizationContext.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSSynchronizationContext.cs
@@ -44,30 +44,28 @@ public WorkItem(SendOrPostCallback callback, object? data, ManualResetEventSlim?
         }
 
         // this need to be called from JSWebWorker or UI thread
-        public static JSSynchronizationContext InstallWebWorkerInterop(bool isMainThread, CancellationToken cancellationToken)
+        public static unsafe JSSynchronizationContext InstallWebWorkerInterop(bool isMainThread, CancellationToken cancellationToken)
         {
             var ctx = new JSSynchronizationContext(isMainThread, cancellationToken);
             ctx.previousSynchronizationContext = SynchronizationContext.Current;
             SynchronizationContext.SetSynchronizationContext(ctx);
 
-            // FIXME: make this configurable
-            // we could have 3 different modes of this
-            // 1) throwing on UI + JSWebWorker
-            // 2) throwing only on UI - small risk, more convenient.
-            // 3) not throwing at all - quite risky
-            // deadlock scenarios are:
-            // - .Wait for more than 5000ms and deadlock the GC suspend
-            // - .Wait on the Task from HTTP client, on the same thread as the HTTP client needs to resolve the Task/Promise. This could be also be a chain of promises.
-            // - try to create new pthread when UI thread is blocked and we run out of posix/emscripten pool of loaded workers.
-            // Things which lead to it are
-            // - Task.Wait, Signal.Wait etc
-            // - Monitor.Enter etc, if the lock is held by another thread for long time
-            // - synchronous [JSExport] into managed code, which would block
-            // - synchronous [JSImport] to another thread, which would block
-            // see also https://github.com/dotnet/runtime/issues/76958#issuecomment-1921418290
-            Thread.ThrowOnBlockingWaitOnJSInteropThread = true;
+            if (!isMainThread)
+            {
+                if (JSProxyContext.ThreadBlockingMode == JSHostImplementation.JSThreadBlockingMode.ThrowWhenBlockingWait)
+                {
+                    Thread.ThrowOnBlockingWaitOnJSInteropThread = true;
+                }
+                else if (JSProxyContext.ThreadBlockingMode == JSHostImplementation.JSThreadBlockingMode.WarnWhenBlockingWait
+                    || JSProxyContext.ThreadBlockingMode == JSHostImplementation.JSThreadBlockingMode.PreventSynchronousJSExport
+                    )
+                {
+                    Thread.WarnOnBlockingWaitOnJSInteropThread = true;
+                }
+            }
 
             var proxyContext = ctx.ProxyContext;
+            proxyContext.AsyncTaskScheduler = new JSAsyncTaskScheduler(ctx);
             JSProxyContext.CurrentThreadContext = proxyContext;
             JSProxyContext.ExecutionContext = proxyContext;
             if (isMainThread)
@@ -77,7 +75,10 @@ public static JSSynchronizationContext InstallWebWorkerInterop(bool isMainThread
 
             ctx.AwaitNewData();
 
-            Interop.Runtime.InstallWebWorkerInterop(proxyContext.ContextHandle);
+            Interop.Runtime.InstallWebWorkerInterop(proxyContext.ContextHandle,
+                (delegate* unmanaged[Cdecl]<JSMarshalerArgument*, void>)&JavaScriptExports.BeforeSyncJSExport,
+                (delegate* unmanaged[Cdecl]<JSMarshalerArgument*, void>)&JavaScriptExports.AfterSyncJSExport,
+                (delegate* unmanaged[Cdecl]<void>)&PumpHandler);
 
             return ctx;
         }
@@ -179,7 +180,7 @@ private unsafe void ScheduleJSPump()
         {
             // While we COULD pump here, we don't want to. We want the pump to happen on the next event loop turn.
             // Otherwise we could get a chain where a pump generates a new work item and that makes us pump again, forever.
-            TargetThreadScheduleBackgroundJob(ProxyContext.NativeTID, (void*)(delegate* unmanaged[Cdecl]<void>)&BackgroundJobHandler);
+            ScheduleSynchronizationContext(ProxyContext.NativeTID);
         }
 
         public override void Post(SendOrPostCallback d, object? state)
@@ -245,13 +246,13 @@ public override void Send(SendOrPostCallback d, object? state)
         }
 
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        internal static extern unsafe void TargetThreadScheduleBackgroundJob(IntPtr targetTID, void* callback);
+        internal static extern unsafe void ScheduleSynchronizationContext(IntPtr targetTID);
 
 #pragma warning disable CS3016 // Arrays as attribute arguments is not CLS-compliant
         [UnmanagedCallersOnly(CallConvs = new[] { typeof(CallConvCdecl) })]
 #pragma warning restore CS3016
         // this callback will arrive on the target thread, called from mono_background_exec
-        private static void BackgroundJobHandler()
+        private static void PumpHandler()
         {
             var ctx = JSProxyContext.AssertIsInteropThread();
             ctx.SynchronizationContext.Pump();
@@ -266,6 +267,10 @@ private void Pump()
             }
             try
             {
+                if (SynchronizationContext.Current == null)
+                {
+                    SetSynchronizationContext(this);
+                }
                 while (Queue.Reader.TryRead(out var item))
                 {
                     try
@@ -291,7 +296,7 @@ private void Pump()
             }
             catch (Exception e)
             {
-                Environment.FailFast($"JSSynchronizationContext.BackgroundJobHandler failed, ManagedThreadId: {Environment.CurrentManagedThreadId}. {Environment.NewLine} {e.StackTrace}");
+                Environment.FailFast($"JSSynchronizationContext.Pump failed, ManagedThreadId: {Environment.CurrentManagedThreadId}. {Environment.NewLine} {e.StackTrace}");
             }
         }
 
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSType.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSType.cs
index 1e2df7713719..7d86122eae7f 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSType.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSType.cs
@@ -28,6 +28,15 @@ public sealed class Discard : JSType
             internal Discard() { }
         }
 
+        /// <summary>
+        /// Could return immediately without waiting for the execution to finish, when dispatching the call to another thread.
+        /// Suppresses marshaling of the JavaScript function's return value.
+        /// </summary>
+        public sealed class DiscardNoWait : JSType
+        {
+            internal DiscardNoWait() { }
+        }
+
         /// <summary>
         /// Marshal as JavaScript <see href="https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean">Boolean</see> type.
         /// </summary>
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSWebWorker.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSWebWorker.cs
index 5721c67d5ea2..776f6dc3d5db 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSWebWorker.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/JSWebWorker.cs
@@ -66,6 +66,7 @@ public JSWebWorkerInstance(Func<Task<T>> body, CancellationToken cancellationTok
                 // TODO TaskCreationOptions.HideScheduler ?
                 _taskCompletionSource = new TaskCompletionSource<T>(TaskCreationOptions.RunContinuationsAsynchronously);
                 _thread = new Thread(ThreadMain);
+                _thread.Name = "JSWebWorker";
                 _resultTask = null;
                 _cancellationToken = cancellationToken;
                 _cancellationRegistration = null;
@@ -75,30 +76,7 @@ public JSWebWorkerInstance(Func<Task<T>> body, CancellationToken cancellationTok
 
             public Task<T> Start()
             {
-                if (JSProxyContext.MainThreadContext.IsCurrentThread())
-                {
-                    // give browser chance to load more threads
-                    // until there at least one thread loaded, it doesn't make sense to `Start`
-                    // because that would also hang, but in a way blocking the UI thread, much worse.
-                    JavaScriptImports.ThreadAvailable().ContinueWith(static (t, o) =>
-                    {
-                        var self = (JSWebWorkerInstance<T>)o!;
-                        if (t.IsCompletedSuccessfully)
-                        {
-                            self._thread.Start();
-                        }
-                        if (t.IsCanceled)
-                        {
-                            throw new OperationCanceledException("Cancelled while waiting for underlying WebWorker to become available.", self._cancellationToken);
-                        }
-                        throw t.Exception!;
-                        // ideally this will execute on UI thread quickly: ExecuteSynchronously
-                    }, this, _cancellationToken, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.FromCurrentSynchronizationContext());
-                }
-                else
-                {
-                    _thread.Start();
-                }
+                _thread.Start();
                 return _taskCompletionSource.Task;
             }
 
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/MarshalerType.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/MarshalerType.cs
index 25a52d82b274..ee9e4e247b25 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/MarshalerType.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/MarshalerType.cs
@@ -34,6 +34,7 @@ internal enum MarshalerType : byte
         Span,
         Action,
         Function,
+        DiscardNoWait,
 
 #if !JSIMPORTGENERATOR
         // only on runtime
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.BigInt64.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.BigInt64.cs
index a2bce80d13b8..87eaa7696603 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.BigInt64.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.BigInt64.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManagedBig(out long value)
         {
             if (slot.Type == MarshalerType.None)
@@ -28,7 +30,9 @@ public unsafe void ToManagedBig(out long value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJSBig(long value)
         {
             slot.Type = MarshalerType.BigInt64;
@@ -40,7 +44,9 @@ public void ToJSBig(long value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManagedBig(out long? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -56,7 +62,9 @@ public unsafe void ToManagedBig(out long? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJSBig(long? value)
         {
             if (value.HasValue)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Bool.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Bool.cs
index 375e4b97f4a5..51ae3b6aed04 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Bool.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Bool.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out bool value)
         {
             if (slot.Type == MarshalerType.None)
@@ -28,7 +30,9 @@ public unsafe void ToManaged(out bool value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(bool value)
         {
             slot.Type = MarshalerType.Boolean;
@@ -40,7 +44,9 @@ public void ToJS(bool value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out bool? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -56,7 +62,9 @@ public unsafe void ToManaged(out bool? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(bool? value)
         {
             if (value.HasValue)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Byte.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Byte.cs
index 5392fca48fae..113d4f1a0690 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Byte.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Byte.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out byte value)
         {
             if (slot.Type == MarshalerType.None)
@@ -28,7 +30,9 @@ public unsafe void ToManaged(out byte value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(byte value)
         {
             slot.Type = MarshalerType.Byte;
@@ -40,7 +44,9 @@ public void ToJS(byte value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out byte? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -56,7 +62,9 @@ public unsafe void ToManaged(out byte? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(byte? value)
         {
             if (value.HasValue)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Char.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Char.cs
index 7daddfb0fd44..d31a8d3cd35c 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Char.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Char.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out char value)
         {
             if (slot.Type == MarshalerType.None)
@@ -28,7 +30,9 @@ public unsafe void ToManaged(out char value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(char value)
         {
             slot.Type = MarshalerType.Char;
@@ -40,7 +44,9 @@ public void ToJS(char value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out char? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -56,7 +62,9 @@ public unsafe void ToManaged(out char? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(char? value)
         {
             if (value.HasValue)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.DateTime.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.DateTime.cs
index 6521ac0c54b9..7f63e5034c3b 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.DateTime.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.DateTime.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out DateTimeOffset value)
         {
             if (slot.Type == MarshalerType.None)
@@ -28,7 +30,9 @@ public unsafe void ToManaged(out DateTimeOffset value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(DateTimeOffset value)
         {
             slot.Type = MarshalerType.DateTimeOffset;
@@ -40,7 +44,9 @@ public void ToJS(DateTimeOffset value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out DateTimeOffset? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -56,7 +62,9 @@ public unsafe void ToManaged(out DateTimeOffset? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(DateTimeOffset? value)
         {
             if (value.HasValue)
@@ -75,7 +83,9 @@ public void ToJS(DateTimeOffset? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out DateTime value)
         {
             if (slot.Type == MarshalerType.None)
@@ -91,7 +101,9 @@ public unsafe void ToManaged(out DateTime value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(DateTime value)
         {
             slot.Type = MarshalerType.DateTime;
@@ -103,7 +115,9 @@ public void ToJS(DateTime value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out DateTime? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -119,7 +133,9 @@ public unsafe void ToManaged(out DateTime? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(DateTime? value)
         {
             if (value.HasValue)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Double.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Double.cs
index 9b7f48ed4b3a..c83930bda825 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Double.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Double.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out double value)
         {
             if (slot.Type == MarshalerType.None)
@@ -28,7 +30,9 @@ public unsafe void ToManaged(out double value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(double value)
         {
             slot.Type = MarshalerType.Double;
@@ -40,7 +44,9 @@ public void ToJS(double value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out double? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -56,7 +62,9 @@ public unsafe void ToManaged(out double? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(double? value)
         {
             if (value.HasValue)
@@ -75,7 +83,9 @@ public void ToJS(double? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out double[]? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -93,7 +103,9 @@ public unsafe void ToManaged(out double[]? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(double[] value)
         {
             if (value == null)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Exception.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Exception.cs
index e526fe4b52c8..86a57b48b345 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Exception.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Exception.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out Exception? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -48,7 +50,9 @@ public unsafe void ToManaged(out Exception? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToJS(Exception? value)
         {
             if (value == null)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Func.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Func.cs
index 271acab66bae..d53c92400755 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Func.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Func.cs
@@ -20,7 +20,7 @@ public void InvokeJS()
                 // and would also allow the JS function to be collected
 
 
-                Span<JSMarshalerArgument> arguments = stackalloc JSMarshalerArgument[4];
+                Span<JSMarshalerArgument> arguments = stackalloc JSMarshalerArgument[2];
                 ref JSMarshalerArgument args_exception = ref arguments[0];
                 ref JSMarshalerArgument args_return = ref arguments[1];
 #if FEATURE_WASM_MANAGED_THREADS
@@ -51,7 +51,7 @@ public ActionJS(JSObject holder, ArgumentToJSCallback<T> arg1Marshaler)
             public void InvokeJS(T arg1)
             {
 
-                Span<JSMarshalerArgument> arguments = stackalloc JSMarshalerArgument[4];
+                Span<JSMarshalerArgument> arguments = stackalloc JSMarshalerArgument[3];
                 ref JSMarshalerArgument args_exception = ref arguments[0];
                 ref JSMarshalerArgument args_return = ref arguments[1];
                 ref JSMarshalerArgument args_arg1 = ref arguments[2];
@@ -258,7 +258,7 @@ public TResult InvokeJS()
                 // JSObject (held by this lambda) would be collected by GC after the lambda is collected
                 // and would also allow the JS function to be collected
 
-                Span<JSMarshalerArgument> arguments = stackalloc JSMarshalerArgument[4];
+                Span<JSMarshalerArgument> arguments = stackalloc JSMarshalerArgument[2];
                 ref JSMarshalerArgument args_exception = ref arguments[0];
                 ref JSMarshalerArgument args_return = ref arguments[1];
 #if FEATURE_WASM_MANAGED_THREADS
@@ -295,7 +295,7 @@ public FuncJS(JSObject holder, ArgumentToJSCallback<T> arg1Marshaler, ArgumentTo
             public TResult InvokeJS(T arg1)
             {
 
-                Span<JSMarshalerArgument> arguments = stackalloc JSMarshalerArgument[4];
+                Span<JSMarshalerArgument> arguments = stackalloc JSMarshalerArgument[3];
                 ref JSMarshalerArgument args_exception = ref arguments[0];
                 ref JSMarshalerArgument args_return = ref arguments[1];
                 ref JSMarshalerArgument args_arg1 = ref arguments[2];
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Int16.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Int16.cs
index 6a2fec5e0f2f..54ef3ee53c24 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Int16.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Int16.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out short value)
         {
             if (slot.Type == MarshalerType.None)
@@ -28,7 +30,9 @@ public unsafe void ToManaged(out short value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(short value)
         {
             slot.Type = MarshalerType.Int16;
@@ -40,7 +44,9 @@ public void ToJS(short value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out short? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -56,7 +62,9 @@ public unsafe void ToManaged(out short? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(short? value)
         {
             if (value.HasValue)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Int32.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Int32.cs
index 501484af3ab4..a6990113c8ff 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Int32.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Int32.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out int value)
         {
             if (slot.Type == MarshalerType.None)
@@ -28,7 +30,9 @@ public unsafe void ToManaged(out int value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(int value)
         {
             slot.Type = MarshalerType.Int32;
@@ -40,7 +44,9 @@ public void ToJS(int value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out int? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -56,7 +62,9 @@ public unsafe void ToManaged(out int? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(int? value)
         {
             if (value.HasValue)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Int52.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Int52.cs
index 4893f32f5f20..6b639ae77ef8 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Int52.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Int52.cs
@@ -15,7 +15,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out long value)
         {
             if (slot.Type == MarshalerType.None)
@@ -31,7 +33,9 @@ public unsafe void ToManaged(out long value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(long value)
         {
             if (value < I52_MIN_VALUE || value > I52_MAX_VALUE)
@@ -48,7 +52,9 @@ public void ToJS(long value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out long? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -64,7 +70,9 @@ public unsafe void ToManaged(out long? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(long? value)
         {
             if (value.HasValue)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.IntPtr.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.IntPtr.cs
index 251db1621512..2737005542df 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.IntPtr.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.IntPtr.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out IntPtr value)
         {
             if (slot.Type == MarshalerType.None)
@@ -28,7 +30,9 @@ public unsafe void ToManaged(out IntPtr value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(IntPtr value)
         {
             slot.Type = MarshalerType.IntPtr;
@@ -40,7 +44,9 @@ public void ToJS(IntPtr value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out IntPtr? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -56,7 +62,9 @@ public unsafe void ToManaged(out IntPtr? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(IntPtr? value)
         {
             if (value.HasValue)
@@ -75,7 +83,9 @@ public void ToJS(IntPtr? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out void* value)
         {
             if (slot.Type == MarshalerType.None)
@@ -91,7 +101,9 @@ public unsafe void ToManaged(out void* value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToJS(void* value)
         {
             slot.Type = MarshalerType.IntPtr;
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.JSObject.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.JSObject.cs
index 7eb4440c565d..76f8ad8fd9c9 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.JSObject.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.JSObject.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out JSObject? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -29,7 +31,9 @@ public unsafe void ToManaged(out JSObject? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(JSObject? value)
         {
             if (value == null)
@@ -64,7 +68,9 @@ public void ToJS(JSObject? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out JSObject?[]? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -90,7 +96,9 @@ public unsafe void ToManaged(out JSObject?[]? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToJS(JSObject?[] value)
         {
             if (value == null)
@@ -99,7 +107,7 @@ public unsafe void ToJS(JSObject?[] value)
                 return;
             }
             slot.Length = value.Length;
-            int bytes = value.Length * Marshal.SizeOf(typeof(JSMarshalerArgument));
+            int bytes = value.Length * sizeof(JSMarshalerArgument);
             slot.Type = MarshalerType.Array;
             slot.ElementType = MarshalerType.JSObject;
             JSMarshalerArgument* payload = (JSMarshalerArgument*)Marshal.AllocHGlobal(bytes);
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Object.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Object.cs
index 3ce627aeff21..d9ed7cd42858 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Object.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Object.cs
@@ -18,7 +18,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out object? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -110,7 +112,9 @@ public unsafe void ToManaged(out object? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(object? value)
         {
             if (value == null)
@@ -327,7 +331,9 @@ public void ToJS(object? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out object?[]? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -356,7 +362,9 @@ public unsafe void ToManaged(out object?[]? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToJS(object?[] value)
         {
             if (value == null)
@@ -365,7 +373,7 @@ public unsafe void ToJS(object?[] value)
                 return;
             }
             slot.Length = value.Length;
-            int bytes = value.Length * Marshal.SizeOf(typeof(JSMarshalerArgument));
+            int bytes = value.Length * sizeof(JSMarshalerArgument);
             slot.Type = MarshalerType.Array;
             JSMarshalerArgument* payload = (JSMarshalerArgument*)Marshal.AllocHGlobal(bytes);
             Unsafe.InitBlock(payload, 0, (uint)bytes);
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Single.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Single.cs
index c22d26c86520..696cc9a60089 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Single.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Single.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out float value)
         {
             if (slot.Type == MarshalerType.None)
@@ -28,7 +30,9 @@ public unsafe void ToManaged(out float value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(float value)
         {
             slot.Type = MarshalerType.Single;
@@ -40,7 +44,9 @@ public void ToJS(float value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out float? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -56,7 +62,9 @@ public unsafe void ToManaged(out float? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public void ToJS(float? value)
         {
             if (value.HasValue)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.String.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.String.cs
index efe764cd837f..d0e2d9cb7f9f 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.String.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.String.cs
@@ -12,7 +12,9 @@ public partial struct JSMarshalerArgument
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out string? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -36,7 +38,9 @@ public unsafe void ToManaged(out string? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToJS(string? value)
         {
             if (value == null)
@@ -69,7 +73,9 @@ public unsafe void ToJS(string? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToManaged(out string?[]? value)
         {
             if (slot.Type == MarshalerType.None)
@@ -98,7 +104,9 @@ public unsafe void ToManaged(out string?[]? value)
         /// It's used by JSImport code generator and should not be used by developers in source code.
         /// </summary>
         /// <param name="value">The value to be marshaled.</param>
+#if !DEBUG
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public unsafe void ToJS(string?[] value)
         {
             if (value == null)
@@ -107,7 +115,7 @@ public unsafe void ToJS(string?[] value)
                 return;
             }
             slot.Length = value.Length;
-            int bytes = value.Length * Marshal.SizeOf(typeof(JSMarshalerArgument));
+            int bytes = value.Length * sizeof(JSMarshalerArgument);
             slot.Type = MarshalerType.Array;
             JSMarshalerArgument* payload = (JSMarshalerArgument*)Marshal.AllocHGlobal(bytes);
             Unsafe.InitBlock(payload, 0, (uint)bytes);
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Task.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Task.cs
index 4d8b10cfd452..cf636727ce62 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Task.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/src/System/Runtime/InteropServices/JavaScript/Marshaling/JSMarshalerArgument.Task.cs
@@ -7,6 +7,7 @@
 using System.ComponentModel;
 using System.Threading;
 using static System.Runtime.InteropServices.JavaScript.JSHostImplementation;
+using System.Runtime.CompilerServices;
 
 namespace System.Runtime.InteropServices.JavaScript
 {
@@ -49,14 +50,15 @@ public unsafe void ToManaged(out Task? value)
             lock (ctx)
             {
                 PromiseHolder holder = ctx.GetPromiseHolder(slot.GCHandle);
-                // we want to run the continuations on the original thread which called the JSImport, so RunContinuationsAsynchronously, rather than ExecuteSynchronously
-                // TODO TaskCreationOptions.RunContinuationsAsynchronously
-                TaskCompletionSource tcs = new TaskCompletionSource(holder);
+                TaskCompletionSource tcs = new TaskCompletionSource(holder, TaskCreationOptions.RunContinuationsAsynchronously);
                 ToManagedCallback callback = (JSMarshalerArgument* arguments_buffer) =>
                 {
                     if (arguments_buffer == null)
                     {
-                        tcs.TrySetException(new TaskCanceledException("WebWorker which is origin of the Promise is being terminated."));
+                        if (!tcs.TrySetException(new TaskCanceledException("WebWorker which is origin of the Promise is being terminated.")))
+                        {
+                            Environment.FailFast("Failed to set exception to TaskCompletionSource (arguments buffer is null)");
+                        }
                         return;
                     }
                     ref JSMarshalerArgument arg_2 = ref arguments_buffer[3]; // set by caller when this is SetException call
@@ -64,11 +66,17 @@ public unsafe void ToManaged(out Task? value)
                     if (arg_2.slot.Type != MarshalerType.None)
                     {
                         arg_2.ToManaged(out Exception? fail);
-                        tcs.TrySetException(fail!);
+                        if (!tcs.TrySetException(fail!))
+                        {
+                            Environment.FailFast("Failed to set exception to TaskCompletionSource (exception raised)");
+                        }
                     }
                     else
                     {
-                        tcs.TrySetResult();
+                        if (!tcs.TrySetResult())
+                        {
+                            Environment.FailFast("Failed to set result to TaskCompletionSource (marshaler type is none)");
+                        }
                     }
                     // eventual exception is handled by caller
                 };
@@ -100,14 +108,15 @@ public unsafe void ToManaged<T>(out Task<T>? value, ArgumentToManagedCallback<T>
             lock (ctx)
             {
                 var holder = ctx.GetPromiseHolder(slot.GCHandle);
-                // we want to run the continuations on the original thread which called the JSImport, so RunContinuationsAsynchronously, rather than ExecuteSynchronously
-                // TODO TaskCreationOptions.RunContinuationsAsynchronously
-                TaskCompletionSource<T> tcs = new TaskCompletionSource<T>(holder);
+                TaskCompletionSource<T> tcs = new TaskCompletionSource<T>(holder, TaskCreationOptions.RunContinuationsAsynchronously);
                 ToManagedCallback callback = (JSMarshalerArgument* arguments_buffer) =>
                 {
                     if (arguments_buffer == null)
                     {
-                        tcs.TrySetException(new TaskCanceledException("WebWorker which is origin of the Promise is being terminated."));
+                        if (!tcs.TrySetException(new TaskCanceledException("WebWorker which is origin of the Promise is being terminated.")))
+                        {
+                            Environment.FailFast("Failed to set exception to TaskCompletionSource (arguments buffer is null)");
+                        }
                         return;
                     }
 
@@ -117,12 +126,18 @@ public unsafe void ToManaged<T>(out Task<T>? value, ArgumentToManagedCallback<T>
                     {
                         arg_2.ToManaged(out Exception? fail);
                         if (fail == null) throw new InvalidOperationException(SR.FailedToMarshalException);
-                        tcs.TrySetException(fail);
+                        if (!tcs.TrySetException(fail))
+                        {
+                            Environment.FailFast("Failed to set exception to TaskCompletionSource (exception raised)");
+                        }
                     }
                     else
                     {
                         marshaler(ref arg_3, out T result);
-                        tcs.TrySetResult(result);
+                        if(!tcs.TrySetResult(result))
+                        {
+                            Environment.FailFast("Failed to set result to TaskCompletionSource (marshaler type is none)");
+                        }
                     }
                     // eventual exception is handled by caller
                 };
@@ -140,13 +155,20 @@ internal void ToJSDynamic(Task? value)
         {
             Task? task = value;
 
+            var ctx = ToJSContext;
+            var canMarshalTaskResultOnSameCall = CanMarshalTaskResultOnSameCall(ctx);
+
             if (task == null)
             {
+                if (!canMarshalTaskResultOnSameCall)
+                {
+                    Environment.FailFast("Marshalling null return Task to JS is not supported in MT");
+                }
                 slot.Type = MarshalerType.None;
                 return;
             }
 
-            if (task.IsCompleted)
+            if (canMarshalTaskResultOnSameCall && task.IsCompleted)
             {
                 if (task.Exception != null)
                 {
@@ -172,7 +194,6 @@ internal void ToJSDynamic(Task? value)
                 }
             }
 
-            var ctx = ToJSContext;
 
             if (slot.Type != MarshalerType.TaskPreCreated)
             {
@@ -189,7 +210,9 @@ internal void ToJSDynamic(Task? value)
             var taskHolder = ctx.CreateCSOwnedProxy(slot.JSHandle);
 
 #if FEATURE_WASM_MANAGED_THREADS
-            task.ContinueWith(Complete, taskHolder, TaskScheduler.FromCurrentSynchronizationContext());
+            // AsyncTaskScheduler will make sure that the resolve message is always sent after this call is completed
+            // that is: synchronous marshaling and eventually message to the target thread, which need to arrive before the resolve message
+            task.ContinueWith(Complete, taskHolder, ctx.AsyncTaskScheduler!);
 #else
             task.ContinueWith(Complete, taskHolder, TaskScheduler.Current);
 #endif
@@ -228,13 +251,19 @@ static void MarshalResult(ref JSMarshalerArgument arg, object? taskResult)
         public void ToJS(Task? value)
         {
             Task? task = value;
+            var ctx = ToJSContext;
+            var canMarshalTaskResultOnSameCall = CanMarshalTaskResultOnSameCall(ctx);
 
             if (task == null)
             {
+                if (!canMarshalTaskResultOnSameCall)
+                {
+                    Environment.FailFast("Marshalling null return Task to JS is not supported in MT");
+                }
                 slot.Type = MarshalerType.None;
                 return;
             }
-            if (task.IsCompleted)
+            if (canMarshalTaskResultOnSameCall && task.IsCompleted)
             {
                 if (task.Exception != null)
                 {
@@ -252,8 +281,6 @@ public void ToJS(Task? value)
                 }
             }
 
-            var ctx = ToJSContext;
-
             if (slot.Type != MarshalerType.TaskPreCreated)
             {
                 // this path should only happen when the Task is passed as argument of JSImport
@@ -269,7 +296,9 @@ public void ToJS(Task? value)
             var taskHolder = ctx.CreateCSOwnedProxy(slot.JSHandle);
 
 #if FEATURE_WASM_MANAGED_THREADS
-            task.ContinueWith(Complete, taskHolder, TaskScheduler.FromCurrentSynchronizationContext());
+            // AsyncTaskScheduler will make sure that the resolve message is always sent after this call is completed
+            // that is: synchronous marshaling and eventually message to the target thread, which need to arrive before the resolve message
+            task.ContinueWith(Complete, taskHolder, ctx.AsyncTaskScheduler!);
 #else
             task.ContinueWith(Complete, taskHolder, TaskScheduler.Current);
 #endif
@@ -298,14 +327,20 @@ static void Complete(Task task, object? th)
         public void ToJS<T>(Task<T>? value, ArgumentToJSCallback<T> marshaler)
         {
             Task<T>? task = value;
+            var ctx = ToJSContext;
+            var canMarshalTaskResultOnSameCall = CanMarshalTaskResultOnSameCall(ctx);
 
             if (task == null)
             {
+                if (!canMarshalTaskResultOnSameCall)
+                {
+                    Environment.FailFast("Marshalling null return Task to JS is not supported in MT");
+                }
                 slot.Type = MarshalerType.None;
                 return;
             }
 
-            if (task.IsCompleted)
+            if (canMarshalTaskResultOnSameCall && task.IsCompleted)
             {
                 if (task.Exception != null)
                 {
@@ -325,7 +360,6 @@ public void ToJS<T>(Task<T>? value, ArgumentToJSCallback<T> marshaler)
                 }
             }
 
-            var ctx = ToJSContext;
             if (slot.Type != MarshalerType.TaskPreCreated)
             {
                 // this path should only happen when the Task is passed as argument of JSImport
@@ -341,7 +375,9 @@ public void ToJS<T>(Task<T>? value, ArgumentToJSCallback<T> marshaler)
             var taskHolder = ctx.CreateCSOwnedProxy(slot.JSHandle);
 
 #if FEATURE_WASM_MANAGED_THREADS
-            task.ContinueWith(Complete, new HolderAndMarshaler<T>(taskHolder, marshaler), TaskScheduler.FromCurrentSynchronizationContext());
+            // AsyncTaskScheduler will make sure that the resolve message is always sent after this call is completed
+            // that is: synchronous marshaling and eventually message to the target thread, which need to arrive before the resolve message
+            task.ContinueWith(Complete, new HolderAndMarshaler<T>(taskHolder, marshaler), ctx.AsyncTaskScheduler!);
 #else
             task.ContinueWith(Complete, new HolderAndMarshaler<T>(taskHolder, marshaler), TaskScheduler.Current);
 #endif
@@ -361,6 +397,44 @@ static void Complete(Task<T> task, object? thm)
             }
         }
 
+#if !DEBUG
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
+#if FEATURE_WASM_MANAGED_THREADS
+        // We can't marshal resolved/rejected/null Task.Result directly into current argument when this is marshaling return of JSExport across threads
+        private bool CanMarshalTaskResultOnSameCall(JSProxyContext ctx)
+        {
+            if (slot.Type != MarshalerType.TaskPreCreated)
+            {
+                // this means that we are not in the return value of JSExport
+                // we are marshaling parameter of JSImport
+                return true;
+            }
+
+            if (ctx.IsCurrentThread())
+            {
+                // If the JS and Managed is running on the same thread we can use the args buffer,
+                // because the call is synchronous and the buffer will be processed.
+                // In that case the pre-allocated Promise would be discarded as necessary
+                // and the result will be marshaled by `try_marshal_sync_task_to_js`
+                return true;
+            }
+
+            // Otherwise this is JSExport return value and we can't use the args buffer, because the args buffer arrived in async message and nobody is reading after this.
+            // In such case the JS side already pre-created the Promise and we have to use it, to resolve it in separate call via `mono_wasm_resolve_or_reject_promise_post`
+            // there is JSVHandle in this arg
+            return false;
+        }
+#else
+#pragma warning disable CA1822 // Mark members as static
+        private bool CanMarshalTaskResultOnSameCall(JSProxyContext _)
+        {
+            // in ST build this is always synchronous and we can marshal the result directly
+            return true;
+        }
+#pragma warning restore CA1822 // Mark members as static
+#endif
+
         private sealed record HolderAndMarshaler<T>(JSObject TaskHolder, ArgumentToJSCallback<T> Marshaler);
 
         private static void RejectPromise(JSObject holder, Exception ex)
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System.Runtime.InteropServices.JavaScript.Tests.csproj b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System.Runtime.InteropServices.JavaScript.Tests.csproj
index f390f63deaeb..135b1b78297a 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System.Runtime.InteropServices.JavaScript.Tests.csproj
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System.Runtime.InteropServices.JavaScript.Tests.csproj
@@ -13,22 +13,26 @@
     <!-- Use following lines to write the generated files to disk. -->
     <EmitCompilerGeneratedFiles>true</EmitCompilerGeneratedFiles>
     <!-- to see timing and which test aborted the runtime -->
-    <WasmXHarnessMonoArgs>$(WasmXHarnessMonoArgs) --setenv=XHARNESS_LOG_TEST_START=true</WasmXHarnessMonoArgs>
+    <XunitShowProgress>true</XunitShowProgress>
+    <WasmXHarnessMaxParallelThreads>1</WasmXHarnessMaxParallelThreads>
   </PropertyGroup>
   <!-- Make debugging easier -->
   <PropertyGroup Condition="'$(Configuration)' == 'Debug'">
+    <WasmNativeDebugSymbols>true</WasmNativeDebugSymbols>
     <WasmNativeStrip>false</WasmNativeStrip>
   </PropertyGroup>
   <ItemGroup>
-    <Compile Include="System\Runtime\InteropServices\JavaScript\JavaScriptTestHelper.cs" />
-    <Compile Include="System\Runtime\InteropServices\JavaScript\JSImportExportTest.cs" />
-    <Compile Include="System\Runtime\InteropServices\JavaScript\Utils.cs" />
-    <None Include="System\Runtime\InteropServices\JavaScript\JavaScriptTestHelper.mjs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\JavaScriptTestHelper.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\JSImportTest.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\JSExportTest.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\JSInteropTestBase.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\Utils.cs" />
+    <None Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\JavaScriptTestHelper.mjs" />
     <None Include="$(CompilerGeneratedFilesOutputPath)\..\browser-wasm\generated\Microsoft.Interop.JavaScript.JSImportGenerator\Microsoft.Interop.JavaScript.JSImportGenerator\JSImports.g.cs" />
     <None Include="$(CompilerGeneratedFilesOutputPath)\..\browser-wasm\generated\Microsoft.Interop.JavaScript.JSImportGenerator\Microsoft.Interop.JavaScript.JsExportGenerator\JSExports.g.cs" />
-    <WasmExtraFilesToDeploy Include="System\Runtime\InteropServices\JavaScript\JavaScriptTestHelper.mjs" />
-    <WasmExtraFilesToDeploy Include="System\Runtime\InteropServices\JavaScript\SecondRuntimeTest.js" />
-    <WasmExtraFilesToDeploy Include="System\Runtime\InteropServices\JavaScript\timers.mjs" />
+    <WasmExtraFilesToDeploy Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\JavaScriptTestHelper.mjs" />
+    <WasmExtraFilesToDeploy Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\SecondRuntimeTest.js" />
+    <WasmExtraFilesToDeploy Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\timers.mjs" />
     <ProjectReference Include="$(CoreLibProject)" />
     <ProjectReference Include="$(LibrariesProjectRoot)System.Private.Uri\src\System.Private.Uri.csproj" PrivateAssets="all" />
     <ProjectReference Include="$(LibrariesProjectRoot)System.Collections\src\System.Collections.csproj" />
@@ -39,19 +43,19 @@
     <ProjectReference Include="$(LibrariesProjectRoot)System.Runtime.InteropServices.JavaScript\src\System.Runtime.InteropServices.JavaScript.csproj" SkipUseReferenceAssembly="true" />
   </ItemGroup>
   <ItemGroup Condition="'$(FeatureWasmManagedThreads)' != 'true'">
-    <Compile Include="System\Runtime\InteropServices\JavaScript\SecondRuntimeTest.cs" />
-    <Compile Include="System\Runtime\InteropServices\JavaScript\HttpRequestMessageTest.cs" />
-    <Compile Include="System\Runtime\InteropServices\JavaScript\TimerTests.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\SecondRuntimeTest.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\HttpRequestMessageTest.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\TimerTests.cs" />
   </ItemGroup>
   <ItemGroup Condition="'$(FeatureWasmManagedThreads)' == 'true'">
-    <Compile Include="System\Runtime\InteropServices\JavaScript\WebWorkerTestBase.cs" />
-    <Compile Include="System\Runtime\InteropServices\JavaScript\WebWorkerTest.cs" />
-    <Compile Include="System\Runtime\InteropServices\JavaScript\WebWorkerTest.Http.cs" />
-    <Compile Include="System\Runtime\InteropServices\JavaScript\WebWorkerTest.WebSocket.cs" />
-    <Compile Include="System\Runtime\InteropServices\JavaScript\WebWorkerTestHelper.cs" />
-    <None Include="System\Runtime\InteropServices\JavaScript\WebWorkerTestHelper.mjs" />
-    <None Include="System\Runtime\InteropServices\JavaScript\test.json" />
-    <WasmExtraFilesToDeploy Include="System\Runtime\InteropServices\JavaScript\WebWorkerTestHelper.mjs" />
-    <WasmExtraFilesToDeploy Include="System\Runtime\InteropServices\JavaScript\test.json" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\WebWorkerTestBase.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\WebWorkerTest.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\WebWorkerTest.Http.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\WebWorkerTest.WebSocket.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\WebWorkerTestHelper.cs" />
+    <None Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\WebWorkerTestHelper.mjs" />
+    <None Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\test.json" />
+    <WasmExtraFilesToDeploy Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\WebWorkerTestHelper.mjs" />
+    <WasmExtraFilesToDeploy Include="$(MSBuildThisFileDirectory)System\Runtime\InteropServices\JavaScript\test.json" />
   </ItemGroup>
 </Project>
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JSExportTest.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JSExportTest.cs
new file mode 100644
index 000000000000..170f393deb7c
--- /dev/null
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JSExportTest.cs
@@ -0,0 +1,436 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Threading.Tasks;
+using System.Threading;
+using Xunit;
+using System.Diagnostics.CodeAnalysis;
+#pragma warning disable xUnit1026 // Theory methods should use all of their parameters
+
+namespace System.Runtime.InteropServices.JavaScript.Tests
+{
+    public class JSExportAsyncTest : JSInteropTestBase, IAsyncLifetime
+    {
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedNotBrowserBackgroundExec))]
+        public void SyncJsImportJsExportThrows()
+        {
+            var ex = Assert.Throws<JSException>(()=>JavaScriptTestHelper.invoke1_Boolean(true, nameof(JavaScriptTestHelper.EchoBoolean)));
+            Assert.Contains("Cannot call synchronous C# method", ex.Message);
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalBooleanCases))]
+        public async Task JsExportBooleanAsync(bool value)
+        {
+            await JsExportTestAsync(value,
+                JavaScriptTestHelper.invoke1_BooleanAsync,
+                nameof(JavaScriptTestHelper.EchoBoolean),
+                "boolean");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalInt32Cases))]
+        public async Task JsExportInt32DiscardNoWait(int value)
+        {
+            JavaScriptTestHelper.optimizedReached=0;
+            JavaScriptTestHelper.invoke1O(value);
+            await JavaScriptTestHelper.Delay(50);
+            Assert.Equal(value, JavaScriptTestHelper.optimizedReached);
+        }
+
+        private async Task JsExportTestAsync<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.Interfaces)] T>(T value
+        , Func<T, string, Task<T>> invoke, string echoName, string jsType, string? jsClass = null)
+        {
+            T res;
+            res = await invoke(value, echoName);
+            Assert.Equal<T>(value, res);
+        }
+    }
+
+    [ConditionalClass(typeof(PlatformDetection), nameof(PlatformDetection.IsWasmBackgroundExecOrSingleThread))]
+    public class JSExportTest : JSInteropTestBase, IAsyncLifetime
+    {
+        [Theory]
+        [MemberData(nameof(MarshalBooleanCases))]
+        public void JsExportBoolean(bool value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_Boolean,
+                nameof(JavaScriptTestHelper.EchoBoolean),
+                "boolean");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalCharCases))]
+        public void JsExportChar(char value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_Char,
+                nameof(JavaScriptTestHelper.EchoChar),
+                "number");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalByteCases))]
+        public void JsExportByte(byte value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_Byte,
+                nameof(JavaScriptTestHelper.EchoByte),
+                "number");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalInt16Cases))]
+        public void JsExportInt16(short value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_Int16,
+                nameof(JavaScriptTestHelper.EchoInt16),
+                "number");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalInt32Cases))]
+        public void JsExportInt32(int value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_Int32,
+                nameof(JavaScriptTestHelper.EchoInt32),
+                "number");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalInt52Cases))]
+        public void JsExportInt52(long value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_Int52,
+                nameof(JavaScriptTestHelper.EchoInt52),
+                "number");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalBigInt64Cases))]
+        public void JsExportBigInt64(long value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_BigInt64,
+                nameof(JavaScriptTestHelper.EchoBigInt64),
+                "bigint");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalDoubleCases))]
+        public void JsExportDouble(double value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_Double,
+                nameof(JavaScriptTestHelper.EchoDouble),
+                "number");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalSingleCases))]
+        public void JsExportSingle(float value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_Single,
+                nameof(JavaScriptTestHelper.EchoSingle),
+                "number");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalIntPtrCases))]
+        public void JsExportIntPtr(IntPtr value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_IntPtr,
+                nameof(JavaScriptTestHelper.EchoIntPtr),
+                "number");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalIntPtrCases))]
+        public unsafe void JsExportVoidPtr(IntPtr xvalue)
+        {
+            JavaScriptTestHelper.AssertWasmBackgroundExec();
+            void* value = (void*)xvalue;
+            void* res = JavaScriptTestHelper.invoke1_VoidPtr(value, nameof(JavaScriptTestHelper.EchoVoidPtr));
+            Assert.True(value == res);
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalDateTimeCases))]
+        public void JsExportDateTime(DateTime value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_DateTime,
+                nameof(JavaScriptTestHelper.EchoDateTime),
+                "object", "Date");
+        }
+
+
+        [Theory]
+        [MemberData(nameof(MarshalDateTimeOffsetCases))]
+        public void JsExportDateTimeOffset(DateTimeOffset value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_DateTimeOffset,
+                nameof(JavaScriptTestHelper.EchoDateTimeOffset),
+                "object", "Date");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalNullableBooleanCases))]
+        public void JsExportNullableBoolean(bool? value)
+        {
+            JavaScriptTestHelper.AssertWasmBackgroundExec();
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_NullableBoolean,
+                nameof(JavaScriptTestHelper.EchoNullableBoolean),
+                "boolean");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalNullableInt32Cases))]
+        public void JsExportNullableInt32(int? value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_NullableInt32,
+                nameof(JavaScriptTestHelper.EchoNullableInt32),
+                "number");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalNullableBigInt64Cases))]
+        public void JsExportNullableBigInt64(long? value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_NullableBigInt64,
+                nameof(JavaScriptTestHelper.EchoNullableBigInt64),
+                "bigint");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalNullableIntPtrCases))]
+        public void JsExportNullableIntPtr(IntPtr? value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_NullableIntPtr,
+                nameof(JavaScriptTestHelper.EchoNullableIntPtr),
+                "number");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalNullableDoubleCases))]
+        public void JsExportNullableDouble(double? value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_NullableDouble,
+                nameof(JavaScriptTestHelper.EchoNullableDouble),
+                "number");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalNullableDateTimeCases))]
+        public void JsExportNullableDateTime(DateTime? value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_NullableDateTime,
+                nameof(JavaScriptTestHelper.EchoNullableDateTime),
+                "object");
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalStringCases))]
+        public void JsExportString(string value)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_String,
+                nameof(JavaScriptTestHelper.EchoString),
+                "string");
+        }
+
+        [Fact]
+        public void JsExportStringNoNs()
+        {
+            var actual = JavaScriptTestHelper.invoke2_String("test", nameof(JavaScriptTestHelperNoNamespace.EchoString));
+            Assert.Equal("test51", actual);
+        }
+
+        [Fact]
+        public void JsExportStructClassRecords()
+        {
+            var actual = JavaScriptTestHelper.invokeStructClassRecords("test");
+            Assert.Equal(48, actual.Length);
+            Assert.Equal("test11", actual[0]);
+            Assert.Equal("test12", actual[1]);
+            Assert.Equal("test13", actual[2]);
+            Assert.Equal("test14", actual[3]);
+            Assert.Equal("test15", actual[4]);
+            Assert.Equal("test16", actual[5]);
+            Assert.Equal("test17", actual[6]);
+            Assert.Equal("test18", actual[7]);
+            Assert.Equal("test19", actual[8]);
+            Assert.Equal("test21", actual[9]);
+            Assert.Equal("test22", actual[10]);
+            Assert.Equal("test23", actual[11]);
+            Assert.Equal("test24", actual[12]);
+            Assert.Equal("test25", actual[13]);
+            Assert.Equal("test31", actual[14]);
+            Assert.Equal("test32", actual[15]);
+            Assert.Equal("test33", actual[16]);
+            Assert.Equal("test34", actual[17]);
+            Assert.Equal("test35", actual[18]);
+            Assert.Equal("test41", actual[19]);
+            Assert.Equal("test42", actual[20]);
+            Assert.Equal("test43", actual[21]);
+            Assert.Equal("test44", actual[22]);
+            Assert.Equal("test45", actual[23]);
+            Assert.Equal("test51", actual[24]);
+            Assert.Equal("test52", actual[25]);
+            Assert.Equal("test53", actual[26]);
+            Assert.Equal("test54", actual[27]);
+            Assert.Equal("test55", actual[28]);
+            Assert.Equal("test56", actual[29]);
+            Assert.Equal("test57", actual[30]);
+            Assert.Equal("test58", actual[31]);
+            Assert.Equal("test59", actual[32]);
+            Assert.Equal("test61", actual[33]);
+            Assert.Equal("test62", actual[34]);
+            Assert.Equal("test63", actual[35]);
+            Assert.Equal("test64", actual[36]);
+            Assert.Equal("test65", actual[37]);
+            Assert.Equal("test71", actual[38]);
+            Assert.Equal("test72", actual[39]);
+            Assert.Equal("test73", actual[40]);
+            Assert.Equal("test74", actual[41]);
+            Assert.Equal("test75", actual[42]);
+            Assert.Equal("test81", actual[43]);
+            Assert.Equal("test82", actual[44]);
+            Assert.Equal("test83", actual[45]);
+            Assert.Equal("test84", actual[46]);
+            Assert.Equal("test85", actual[47]);
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalObjectCases))]
+        public void JsExportObject(object value, string clazz)
+        {
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_Object,
+                nameof(JavaScriptTestHelper.EchoObject),
+                "object", clazz);
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalExceptionCases))]
+        public void JsExportException(Exception value, string clazz)
+        {
+            if (clazz == "JSTestError")
+            {
+                value = JavaScriptTestHelper.createException("!CreateEx!");
+            }
+
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_Exception,
+                nameof(JavaScriptTestHelper.EchoException),
+                "object", clazz);
+        }
+
+        [Fact]
+        public void JsExportCatchToString()
+        {
+            var toString = JavaScriptTestHelper.catch1toString("-t-e-s-t-", nameof(JavaScriptTestHelper.ThrowFromJSExport));
+            Assert.DoesNotContain("Unexpected error", toString);
+            Assert.Contains("-t-e-s-t-", toString);
+            Assert.DoesNotContain(nameof(JavaScriptTestHelper.ThrowFromJSExport), toString);
+        }
+
+        [Fact]
+        public void JsExportCatchStack()
+        {
+            var stack = JavaScriptTestHelper.catch1stack("-t-e-s-t-", nameof(JavaScriptTestHelper.ThrowFromJSExport));
+            Assert.Contains(nameof(JavaScriptTestHelper.ThrowFromJSExport), stack);
+            if (PlatformDetection.IsBrowserDomSupportedOrNodeJS)
+            {
+                Assert.Contains("catch1stack", stack);
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalIJSObjectCases))]
+        public void JsExportIJSObject(JSObject value, string clazz)
+        {
+            if (clazz == "JSData")
+            {
+                value = JavaScriptTestHelper.createData("!CreateJS!");
+            }
+
+            JsExportTest(value,
+                JavaScriptTestHelper.invoke1_JSObject,
+                nameof(JavaScriptTestHelper.EchoIJSObject),
+                "object", clazz);
+        }
+
+        [Theory]
+        [MemberData(nameof(MarshalInt32Cases))]
+        public async Task JsExportTaskOfInt(int value)
+        {
+            TaskCompletionSource<int> tcs = new TaskCompletionSource<int>();
+
+            var res = JavaScriptTestHelper.invoke1_TaskOfInt(tcs.Task, nameof(JavaScriptTestHelper.AwaitTaskOfObject));
+            tcs.SetResult(value);
+            await Task.Yield();
+            var rr = await res;
+            await Task.Yield();
+            Assert.Equal(value, rr);
+            //GC.Collect();
+        }
+
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
+        public void JsExportCallback_FunctionIntInt()
+        {
+            int called = -1;
+            var chain = JavaScriptTestHelper.invoke1_FuncOfIntInt((int a) =>
+            {
+                called = a;
+                return a;
+            }, nameof(JavaScriptTestHelper.BackFuncOfIntInt));
+
+            Assert.Equal(-1, called);
+            var actual = chain(42);
+            Assert.Equal(42, actual);
+            Assert.Equal(42, called);
+        }
+
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
+        public void JsExportCallback_FunctionIntIntThrow()
+        {
+            int called = -1;
+            var expected = new Exception("test!!");
+            var chain = JavaScriptTestHelper.invoke1_FuncOfIntInt((int a) =>
+            {
+                called = a;
+                throw expected;
+            }, nameof(JavaScriptTestHelper.BackFuncOfIntInt));
+
+            Assert.Equal(-1, called);
+            var actual = Assert.Throws<Exception>(() => chain(42));
+            Assert.Equal(42, called);
+            Assert.Same(expected, actual);
+        }
+
+        private void JsExportTest<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.Interfaces)] T>(T value
+        , Func<T, string, T> invoke, string echoName, string jsType, string? jsClass = null)
+        {
+            T res;
+            res = invoke(value, echoName);
+            Assert.Equal<T>(value, res);
+        }
+    }
+}
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JSImportExportTest.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JSImportTest.cs
similarity index 65%
rename from src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JSImportExportTest.cs
rename to src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JSImportTest.cs
index bd76d9e80f8f..b4c220519da0 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JSImportExportTest.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JSImportTest.cs
@@ -12,7 +12,7 @@
 
 namespace System.Runtime.InteropServices.JavaScript.Tests
 {
-    public class JSImportExportTest : IAsyncLifetime
+    public class JSImportTest : JSInteropTestBase, IAsyncLifetime
     {
         [Fact]
         public unsafe void StructSize()
@@ -36,6 +36,20 @@ public async Task MultipleImportAsync()
             instance1.Dispose();
         }
 
+        [Fact]
+        public void MissingImport()
+        {
+            var ex = Assert.Throws<JSException>(() => JavaScriptTestHelper.IntentionallyMissingImport());
+            Assert.Contains("intentionallyMissingImport must be a Function but was undefined", ex.Message);
+        }
+
+        [Fact]
+        public async Task MissingImportAsync()
+        {
+            var ex = await Assert.ThrowsAsync<JSException>(() => JavaScriptTestHelper.IntentionallyMissingImportAsync());
+            Assert.Contains("intentionallyMissingImportAsync must be a Function but was undefined", ex.Message);
+        }
+
 #if !FEATURE_WASM_MANAGED_THREADS // because in MT JSHost.ImportAsync is really async, it will finish before the caller could cancel it
         [Fact]
         public async Task CancelableImportAsync()
@@ -113,9 +127,10 @@ public unsafe void OutOfRange()
             Assert.Contains("Overflow: value 9007199254740991 is out of -2147483648 2147483647 range", ex.Message);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsWasmBackgroundExecOrSingleThread))]
         public unsafe void OptimizedPaths()
         {
+            JavaScriptTestHelper.AssertWasmBackgroundExec();
             JavaScriptTestHelper.optimizedReached = 0;
             JavaScriptTestHelper.invoke0V();
             Assert.Equal(1, JavaScriptTestHelper.optimizedReached);
@@ -211,13 +226,6 @@ public unsafe void CreateFunctionInternal()
 
         #region Arrays
 
-        public static IEnumerable<object[]> MarshalByteArrayCases()
-        {
-            yield return new object[] { new byte[] { 1, 2, 3, byte.MaxValue, byte.MinValue } };
-            yield return new object[] { new byte[] { } };
-            yield return new object[] { null };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalByteArrayCases))]
         public unsafe void JsImportByteArray(byte[]? expected)
@@ -231,13 +239,6 @@ public unsafe void JsImportByteArray(byte[]? expected)
                 }
         }
 
-        public static IEnumerable<object[]> MarshalIntArrayCases()
-        {
-            yield return new object[] { new int[] { 1, 2, 3, int.MaxValue, int.MinValue } };
-            yield return new object[] { new int[] { } };
-            yield return new object[] { null };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalIntArrayCases))]
         public unsafe void JsImportIntArray(int[]? expected)
@@ -251,13 +252,6 @@ public unsafe void JsImportIntArray(int[]? expected)
                 }
         }
 
-        public static IEnumerable<object[]> MarshalDoubleArrayCases()
-        {
-            yield return new object[] { new double[] { 1, 2, 3, double.MaxValue, double.MinValue, double.Pi, double.NegativeInfinity, double.PositiveInfinity, double.NaN } };
-            yield return new object[] { new double[] { } };
-            yield return new object[] { null };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalDoubleArrayCases))]
         public unsafe void JsImportDoubleArray(double[]? expected)
@@ -271,14 +265,6 @@ public unsafe void JsImportDoubleArray(double[]? expected)
                 }
         }
 
-        public static IEnumerable<object[]> MarshalStringArrayCases()
-        {
-            yield return new object[] { new string[] { "\u0050\u0159\u00ed\u006c\u0069\u0161", "\u017e\u006c\u0075\u0165\u006f\u0075\u010d\u006b\u00fd" } };
-            yield return new object[] { new string[] { string.Intern("hello"), string.Empty, null } };
-            yield return new object[] { new string[] { } };
-            yield return new object[] { null };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalStringArrayCases))]
         public unsafe void JsImportStringArray(string[]? expected)
@@ -293,28 +279,6 @@ public unsafe void JsImportStringArray(string[]? expected)
                 }
         }
 
-        public class SomethingRef
-        {
-        }
-
-        public class SomethingStruct
-        {
-        }
-
-        public static IEnumerable<object[]> MarshalObjectArrayCases()
-        {
-            yield return new object[] { new object[] { string.Intern("hello"), string.Empty } };
-            yield return new object[] { new object[] { 1.1d, new DateTime(2022, 5, 8, 14, 55, 01, DateTimeKind.Utc), false, true } };
-            yield return new object[] { new object[] { new double?(1.1d), new DateTime?(new DateTime(2022, 5, 8, 14, 55, 01, DateTimeKind.Utc)), new bool?(false), new bool?(true) } };
-            yield return new object[] { new object[] { null, new object(), new SomethingRef(), new SomethingStruct(), new Exception("test") } };
-            yield return new object[] { new object[] { "JSData" } }; // special cased, so we call createData in the test itself
-            yield return new object[] { new object[] { new byte[] { }, new int[] { }, new double[] { }, new string[] { }, new object[] { } } };
-            yield return new object[] { new object[] { new byte[] { 1, 2, 3 }, new int[] { 1, 2, 3 }, new double[] { 1, 2, 3 }, new string[] { "a", "b", "c" }, new object[] { } } };
-            yield return new object[] { new object[] { new object[] { new byte[] { 1, 2, 3 }, new int[] { 1, 2, 3 }, new double[] { 1, 2, 3 }, new string[] { "a", "b", "c" }, new object(), new SomethingRef(), new SomethingStruct(), new Exception("test") } } };
-            yield return new object[] { new object[] { } };
-            yield return new object[] { null };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalObjectArrayCases))]
         public unsafe void JsImportObjectArray(object[]? expected)
@@ -327,19 +291,10 @@ public unsafe void JsImportObjectArray(object[]? expected)
             Assert.Equal(expected, actual);
 
             if (expected != null) for (int i = 0; i < expected.Length; i++)
-            {
-                var actualI = JavaScriptTestHelper.store_ObjectArray(expected, i);
-                Assert.Equal(expected[i], actualI);
-            }
-        }
-
-        public static IEnumerable<object[]> MarshalObjectArrayCasesToDouble()
-        {
-            yield return new object[] { new object[] { (byte)42 } };
-            yield return new object[] { new object[] { (short)42 } };
-            yield return new object[] { new object[] { 42 } };
-            yield return new object[] { new object[] { 3.14f } };
-            yield return new object[] { new object[] { 'A' } };
+                {
+                    var actualI = JavaScriptTestHelper.store_ObjectArray(expected, i);
+                    Assert.Equal(expected[i], actualI);
+                }
         }
 
         [Theory]
@@ -360,25 +315,6 @@ public unsafe void JsImportObjectArrayToDouble(object[]? expected)
                 }
         }
 
-        public static IEnumerable<object[]> MarshalObjectArrayCasesThrow()
-        {
-            yield return new object[] { new object[] { () => { } } };
-            yield return new object[] { new object[] { (int a) => { } } };
-            yield return new object[] { new object[] { (int a) => { return a; } } };
-            yield return new object[] { new object[] { (dummyDelegate)dummyDelegateA } };
-            yield return new object[] { new object[] { 0L } };
-            yield return new object[] { new object[] { 0UL } };
-            yield return new object[] { new object[] { (sbyte)0 } };
-            yield return new object[] { new object[] { (ushort)0 } };
-            yield return new object[] { new object[] { new SomethingStruct[] { } } };
-            yield return new object[] { new object[] { new SomethingRef[] { }, } };
-            yield return new object[] { new object[] { new ArraySegment<byte>(new byte[] { 11 }), } };
-        }
-        delegate void dummyDelegate();
-        static void dummyDelegateA()
-        {
-        }
-
         [Theory]
         [MemberData(nameof(MarshalObjectArrayCasesThrow))]
         public void JsImportObjectArrayThrows(object[]? expected)
@@ -532,11 +468,6 @@ public unsafe void JsImportArraySegmentOfDouble()
         #endregion
 
         #region Boolean
-        public static IEnumerable<object[]> MarshalBooleanCases()
-        {
-            yield return new object[] { true };
-            yield return new object[] { false };
-        }
 
         [Theory]
         [MemberData(nameof(MarshalBooleanCases))]
@@ -551,28 +482,9 @@ public void JsImportBoolean(bool value)
                 "boolean");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalBooleanCases))]
-        public void JsExportBoolean(bool value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_Boolean,
-                nameof(JavaScriptTestHelper.EchoBoolean),
-                "boolean");
-        }
         #endregion Boolean
 
         #region Char
-        public static IEnumerable<object[]> MarshalCharCases()
-        {
-            yield return new object[] { (char)42 };
-            yield return new object[] { (char)1 };
-            yield return new object[] { '\u017D' };
-            yield return new object[] { '\u2661' };
-            yield return new object[] { char.MaxValue };
-            yield return new object[] { char.MinValue };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalCharCases))]
         public void JsImportChar(char value)
@@ -586,26 +498,9 @@ public void JsImportChar(char value)
                 "number");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalCharCases))]
-        public void JsExportChar(char value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_Char,
-                nameof(JavaScriptTestHelper.EchoChar),
-                "number");
-        }
         #endregion Char
 
         #region Byte
-        public static IEnumerable<object[]> MarshalByteCases()
-        {
-            yield return new object[] { (byte)42 };
-            yield return new object[] { (byte)1 };
-            yield return new object[] { byte.MaxValue };
-            yield return new object[] { byte.MinValue };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalByteCases))]
         public void JsImportByte(byte value)
@@ -619,16 +514,6 @@ public void JsImportByte(byte value)
                 "number");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalByteCases))]
-        public void JsExportByte(byte value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_Byte,
-                nameof(JavaScriptTestHelper.EchoByte),
-                "number");
-        }
-
         [Theory]
         [MemberData(nameof(OutOfRangeCases))]
         public void ByteOutOfRange(double value, string message)
@@ -641,16 +526,6 @@ public void ByteOutOfRange(double value, string message)
         #endregion Byte
 
         #region Int16
-        public static IEnumerable<object[]> MarshalInt16Cases()
-        {
-            yield return new object[] { 42 };
-            yield return new object[] { 0 };
-            yield return new object[] { 1 };
-            yield return new object[] { -1 };
-            yield return new object[] { short.MaxValue };
-            yield return new object[] { short.MinValue };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalInt16Cases))]
         public void JsImportInt16(short value)
@@ -664,36 +539,16 @@ public void JsImportInt16(short value)
                 "number");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalInt16Cases))]
-        public void JsExportInt16(short value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_Int16,
-                nameof(JavaScriptTestHelper.EchoInt16),
-                "number");
-        }
         #endregion Int16
 
         #region Int32
-        public static IEnumerable<object[]> MarshalInt32Cases()
-        {
-            yield return new object[] { 42 };
-            yield return new object[] { 0 };
-            yield return new object[] { 1 };
-            yield return new object[] { -1 };
-            yield return new object[] { int.MaxValue };
-            yield return new object[] { int.MinValue };
-        }
-
-        public static IEnumerable<object[]> OutOfRangeCases()
+        [Theory]
+        [MemberData(nameof(MarshalInt32Cases))]
+        public async Task JsImportInt32DiscardNoWait(int value)
         {
-            yield return new object[] { double.MaxValue, "Value is not an integer" };
-            yield return new object[] { double.MinValue, "Value is not an integer" };
-            yield return new object[] { double.NaN, "Value is not an integer" };
-            yield return new object[] { double.NegativeInfinity, "Value is not an integer" };
-            yield return new object[] { double.PositiveInfinity, "Value is not an integer" };
-            yield return new object[] { (double)MAX_SAFE_INTEGER, "Overflow" };
+            JavaScriptTestHelper.store1DiscardNoWait_Int32(value);
+            await JavaScriptTestHelper.Delay(0);
+            Assert.Equal(value, JavaScriptTestHelper.retrieve1_Int32());
         }
 
         [Theory]
@@ -709,16 +564,6 @@ public void JsImportInt32(int value)
                 "number");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalInt32Cases))]
-        public void JsExportInt32(int value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_Int32,
-                nameof(JavaScriptTestHelper.EchoInt32),
-                "number");
-        }
-
         [Theory]
         [MemberData(nameof(OutOfRangeCases))]
         public void Int32OutOfRange(double value, string message)
@@ -731,17 +576,6 @@ public void Int32OutOfRange(double value, string message)
         #endregion Int32
 
         #region Int52
-        const long MAX_SAFE_INTEGER = 9007199254740991L;// Number.MAX_SAFE_INTEGER
-        const long MIN_SAFE_INTEGER = -9007199254740991L;// Number.MIN_SAFE_INTEGER
-        public static IEnumerable<object[]> MarshalInt52Cases()
-        {
-            yield return new object[] { -1 };
-            yield return new object[] { 42 };
-            yield return new object[] { 0 };
-            yield return new object[] { 1 };
-            yield return new object[] { MAX_SAFE_INTEGER };
-            yield return new object[] { MIN_SAFE_INTEGER };
-        }
 
         [Theory]
         [MemberData(nameof(MarshalInt52Cases))]
@@ -756,30 +590,9 @@ public void JsImportInt52(long value)
                 "number");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalInt52Cases))]
-        public void JsExportInt52(long value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_Int52,
-                nameof(JavaScriptTestHelper.EchoInt52),
-                "number");
-        }
         #endregion Int52
 
         #region BigInt64
-        public static IEnumerable<object[]> MarshalBigInt64Cases()
-        {
-            yield return new object[] { -1 };
-            yield return new object[] { 42 };
-            yield return new object[] { 0 };
-            yield return new object[] { 1 };
-            yield return new object[] { MAX_SAFE_INTEGER };
-            yield return new object[] { MIN_SAFE_INTEGER };
-            yield return new object[] { long.MinValue };
-            yield return new object[] { long.MaxValue };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalBigInt64Cases))]
         public void JsImportBigInt64(long value)
@@ -793,29 +606,9 @@ public void JsImportBigInt64(long value)
                 "bigint");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalBigInt64Cases))]
-        public void JsExportBigInt64(long value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_BigInt64,
-                nameof(JavaScriptTestHelper.EchoBigInt64),
-                "bigint");
-        }
         #endregion BigInt64
 
         #region Double
-        public static IEnumerable<object[]> MarshalDoubleCases()
-        {
-            yield return new object[] { Math.PI };
-            yield return new object[] { 0.0 };
-            yield return new object[] { double.MaxValue };
-            yield return new object[] { double.MinValue };
-            yield return new object[] { double.NegativeInfinity };
-            yield return new object[] { double.PositiveInfinity };
-            yield return new object[] { double.NaN };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalDoubleCases))]
         public void JsImportDouble(double value)
@@ -829,29 +622,9 @@ public void JsImportDouble(double value)
                 "number");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalDoubleCases))]
-        public void JsExportDouble(double value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_Double,
-                nameof(JavaScriptTestHelper.EchoDouble),
-                "number");
-        }
         #endregion Double
 
         #region Single
-        public static IEnumerable<object[]> MarshalSingleCases()
-        {
-            yield return new object[] { (float)Math.PI };
-            yield return new object[] { 0.0f };
-            yield return new object[] { float.MaxValue };
-            yield return new object[] { float.MinValue };
-            yield return new object[] { float.NegativeInfinity };
-            yield return new object[] { float.PositiveInfinity };
-            yield return new object[] { float.NaN };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalSingleCases))]
         public void JsImportSingle(float value)
@@ -865,28 +638,9 @@ public void JsImportSingle(float value)
                 "number");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalSingleCases))]
-        public void JsExportSingle(float value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_Single,
-                nameof(JavaScriptTestHelper.EchoSingle),
-                "number");
-        }
         #endregion Single
 
         #region IntPtr
-        public static IEnumerable<object[]> MarshalIntPtrCases()
-        {
-            yield return new object[] { (IntPtr)42 };
-            yield return new object[] { IntPtr.Zero };
-            yield return new object[] { (IntPtr)1 };
-            yield return new object[] { (IntPtr)(-1) };
-            yield return new object[] { IntPtr.MaxValue };
-            yield return new object[] { IntPtr.MinValue };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalIntPtrCases))]
         public void JsImportIntPtr(IntPtr value)
@@ -900,15 +654,6 @@ public void JsImportIntPtr(IntPtr value)
                 "number");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalIntPtrCases))]
-        public void JsExportIntPtr(IntPtr value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_IntPtr,
-                nameof(JavaScriptTestHelper.EchoIntPtr),
-                "number");
-        }
         #endregion IntPtr
 
         #region VoidPtr
@@ -929,24 +674,9 @@ public unsafe void JsImportVoidPtr(IntPtr xvalue)
             Assert.Equal("number", actualJsType);
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalIntPtrCases))]
-        public unsafe void JsExportVoidPtr(IntPtr xvalue)
-        {
-            void* value = (void*)xvalue;
-            void* res = JavaScriptTestHelper.invoke1_VoidPtr(value, nameof(JavaScriptTestHelper.EchoVoidPtr));
-            Assert.True(value == res);
-        }
         #endregion VoidPtr
 
         #region Datetime
-        public static IEnumerable<object[]> MarshalDateTimeCases()
-        {
-            yield return new object[] { new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc) };
-            yield return new object[] { TrimNano(DateTime.UtcNow) };
-            yield return new object[] { TrimNano(DateTime.MaxValue) };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalDateTimeCases))]
         public void JSImportDateTime(DateTime value)
@@ -960,25 +690,9 @@ public void JSImportDateTime(DateTime value)
                 "object", "Date");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalDateTimeCases))]
-        public void JsExportDateTime(DateTime value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_DateTime,
-                nameof(JavaScriptTestHelper.EchoDateTime),
-                "object", "Date");
-        }
         #endregion Datetime
 
         #region DateTimeOffset
-        public static IEnumerable<object[]> MarshalDateTimeOffsetCases()
-        {
-            yield return new object[] { DateTimeOffset.FromUnixTimeSeconds(0) };
-            yield return new object[] { TrimNano(DateTimeOffset.UtcNow) };
-            yield return new object[] { TrimNano(DateTimeOffset.MaxValue) };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalDateTimeOffsetCases))]
         public void JSImportDateTimeOffset(DateTimeOffset value)
@@ -992,25 +706,9 @@ public void JSImportDateTimeOffset(DateTimeOffset value)
                 "object", "Date");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalDateTimeOffsetCases))]
-        public void JsExportDateTimeOffset(DateTimeOffset value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_DateTimeOffset,
-                nameof(JavaScriptTestHelper.EchoDateTimeOffset),
-                "object", "Date");
-        }
         #endregion DateTimeOffset
 
         #region NullableBoolean
-        public static IEnumerable<object[]> MarshalNullableBooleanCases()
-        {
-            yield return new object[] { null };
-            yield return new object[] { true };
-            yield return new object[] { false };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalNullableBooleanCases))]
         public void JsImportNullableBoolean(bool? value)
@@ -1024,29 +722,9 @@ public void JsImportNullableBoolean(bool? value)
                 "boolean");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalNullableBooleanCases))]
-        public void JsExportNullableBoolean(bool? value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_NullableBoolean,
-                nameof(JavaScriptTestHelper.EchoNullableBoolean),
-                "boolean");
-        }
         #endregion NullableBoolean
 
         #region NullableInt32
-        public static IEnumerable<object[]> MarshalNullableInt32Cases()
-        {
-            yield return new object[] { null };
-            yield return new object[] { 42 };
-            yield return new object[] { 0 };
-            yield return new object[] { 1 };
-            yield return new object[] { -1 };
-            yield return new object[] { int.MaxValue };
-            yield return new object[] { int.MinValue };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalNullableInt32Cases))]
         public void JsImportNullableInt32(int? value)
@@ -1060,31 +738,9 @@ public void JsImportNullableInt32(int? value)
                 "number");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalNullableInt32Cases))]
-        public void JsExportNullableInt32(int? value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_NullableInt32,
-                nameof(JavaScriptTestHelper.EchoNullableInt32),
-                "number");
-        }
         #endregion NullableInt32
 
         #region NullableBigInt64
-        public static IEnumerable<object[]> MarshalNullableBigInt64Cases()
-        {
-            yield return new object[] { null };
-            yield return new object[] { 42L };
-            yield return new object[] { 0L };
-            yield return new object[] { 1L };
-            yield return new object[] { -1L };
-            yield return new object[] { MAX_SAFE_INTEGER };
-            yield return new object[] { MIN_SAFE_INTEGER };
-            yield return new object[] { long.MaxValue };
-            yield return new object[] { long.MinValue };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalNullableBigInt64Cases))]
         public void JsImportNullableBigInt64(long? value)
@@ -1098,29 +754,9 @@ public void JsImportNullableBigInt64(long? value)
                 "bigint");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalNullableBigInt64Cases))]
-        public void JsExportNullableBigInt64(long? value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_NullableBigInt64,
-                nameof(JavaScriptTestHelper.EchoNullableBigInt64),
-                "bigint");
-        }
         #endregion NullableBigInt64
 
         #region NullableIntPtr
-        public static IEnumerable<object[]> MarshalNullableIntPtrCases()
-        {
-            yield return new object[] { null };
-            yield return new object[] { (IntPtr)42 };
-            yield return new object[] { IntPtr.Zero };
-            yield return new object[] { (IntPtr)1 };
-            yield return new object[] { (IntPtr)(-1) };
-            yield return new object[] { IntPtr.MaxValue };
-            yield return new object[] { IntPtr.MinValue };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalNullableIntPtrCases))]
         public void JsImportNullableIntPtr(IntPtr? value)
@@ -1134,30 +770,9 @@ public void JsImportNullableIntPtr(IntPtr? value)
                 "number");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalNullableIntPtrCases))]
-        public void JsExportNullableIntPtr(IntPtr? value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_NullableIntPtr,
-                nameof(JavaScriptTestHelper.EchoNullableIntPtr),
-                "number");
-        }
         #endregion NullableIntPtr
 
         #region NullableDouble
-        public static IEnumerable<object[]> MarshalNullableDoubleCases()
-        {
-            yield return new object[] { null };
-            yield return new object[] { Math.PI };
-            yield return new object[] { 0.0 };
-            yield return new object[] { double.MaxValue };
-            yield return new object[] { double.MinValue };
-            yield return new object[] { double.NegativeInfinity };
-            yield return new object[] { double.PositiveInfinity };
-            yield return new object[] { double.NaN };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalNullableDoubleCases))]
         public void JsImportNullableDouble(double? value)
@@ -1171,26 +786,9 @@ public void JsImportNullableDouble(double? value)
                 "number");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalNullableDoubleCases))]
-        public void JsExportNullableDouble(double? value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_NullableDouble,
-                nameof(JavaScriptTestHelper.EchoNullableDouble),
-                "number");
-        }
         #endregion NullableDouble
 
         #region NullableDateTime
-        public static IEnumerable<object[]> MarshalNullableDateTimeCases()
-        {
-            yield return new object[] { null };
-            yield return new object[] { new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc) };
-            yield return new object[] { TrimNano(DateTime.UtcNow) };
-            yield return new object[] { TrimNano(DateTime.MaxValue) };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalNullableDateTimeCases))]
         public void JsImportNullableDateTime(DateTime? value)
@@ -1204,27 +802,9 @@ public void JsImportNullableDateTime(DateTime? value)
                 "object");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalNullableDateTimeCases))]
-        public void JsExportNullableDateTime(DateTime? value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_NullableDateTime,
-                nameof(JavaScriptTestHelper.EchoNullableDateTime),
-                "object");
-        }
         #endregion NullableDateTime
 
         #region String
-        public static IEnumerable<object[]> MarshalStringCases()
-        {
-            yield return new object[] { null };
-            yield return new object[] { string.Empty };
-            yield return new object[] { "Ahoj" + Random.Shared.Next() };// shorted than 256 -> check in JS interned
-            yield return new object[] { "Ahoj" + new string('!', 300) };// longer than 256 -> no check in JS interned
-            yield return new object[] { string.Intern("dotnet") };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalStringCases))]
         public void JsImportString(string value)
@@ -1238,78 +818,6 @@ public void JsImportString(string value)
                 , "string");
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalStringCases))]
-        public void JsExportString(string value)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_String,
-                nameof(JavaScriptTestHelper.EchoString),
-                "string");
-        }
-
-        [Fact]
-        public void JsExportStringNoNs()
-        {
-            var actual = JavaScriptTestHelper.invoke2_String("test", nameof(JavaScriptTestHelperNoNamespace.EchoString));
-            Assert.Equal("test51", actual);
-        }
-
-        [Fact]
-        public void JsExportStructClassRecords()
-        {
-            var actual = JavaScriptTestHelper.invokeStructClassRecords("test");
-            Assert.Equal(48, actual.Length);
-            Assert.Equal("test11", actual[0]);
-            Assert.Equal("test12", actual[1]);
-            Assert.Equal("test13", actual[2]);
-            Assert.Equal("test14", actual[3]);
-            Assert.Equal("test15", actual[4]);
-            Assert.Equal("test16", actual[5]);
-            Assert.Equal("test17", actual[6]);
-            Assert.Equal("test18", actual[7]);
-            Assert.Equal("test19", actual[8]);
-            Assert.Equal("test21", actual[9]);
-            Assert.Equal("test22", actual[10]);
-            Assert.Equal("test23", actual[11]);
-            Assert.Equal("test24", actual[12]);
-            Assert.Equal("test25", actual[13]);
-            Assert.Equal("test31", actual[14]);
-            Assert.Equal("test32", actual[15]);
-            Assert.Equal("test33", actual[16]);
-            Assert.Equal("test34", actual[17]);
-            Assert.Equal("test35", actual[18]);
-            Assert.Equal("test41", actual[19]);
-            Assert.Equal("test42", actual[20]);
-            Assert.Equal("test43", actual[21]);
-            Assert.Equal("test44", actual[22]);
-            Assert.Equal("test45", actual[23]);
-            Assert.Equal("test51", actual[24]);
-            Assert.Equal("test52", actual[25]);
-            Assert.Equal("test53", actual[26]);
-            Assert.Equal("test54", actual[27]);
-            Assert.Equal("test55", actual[28]);
-            Assert.Equal("test56", actual[29]);
-            Assert.Equal("test57", actual[30]);
-            Assert.Equal("test58", actual[31]);
-            Assert.Equal("test59", actual[32]);
-            Assert.Equal("test61", actual[33]);
-            Assert.Equal("test62", actual[34]);
-            Assert.Equal("test63", actual[35]);
-            Assert.Equal("test64", actual[36]);
-            Assert.Equal("test65", actual[37]);
-            Assert.Equal("test71", actual[38]);
-            Assert.Equal("test72", actual[39]);
-            Assert.Equal("test73", actual[40]);
-            Assert.Equal("test74", actual[41]);
-            Assert.Equal("test75", actual[42]);
-            Assert.Equal("test81", actual[43]);
-            Assert.Equal("test82", actual[44]);
-            Assert.Equal("test83", actual[45]);
-            Assert.Equal("test84", actual[46]);
-            Assert.Equal("test85", actual[47]);
-        }
-
         [Fact]
         public void JsImportNative()
         {
@@ -1337,12 +845,6 @@ public void JsImportReboundInstanceMember()
         #endregion String
 
         #region Object
-        public static IEnumerable<object[]> MarshalObjectCases()
-        {
-            yield return new object[] { new object(), "ManagedObject" };
-            yield return new object[] { null, null };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalObjectCases))]
         public void JSImportObject(object value, string clazz)
@@ -1356,25 +858,9 @@ public void JSImportObject(object value, string clazz)
                 "object", clazz);
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalObjectCases))]
-        public void JsExportObject(object value, string clazz)
-        {
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_Object,
-                nameof(JavaScriptTestHelper.EchoObject),
-                "object", clazz);
-        }
         #endregion Object
 
         #region Exception
-        public static IEnumerable<object[]> MarshalExceptionCases()
-        {
-            yield return new object[] { new Exception("Test"), "ManagedError" };
-            yield return new object[] { null, "JSTestError" };
-            yield return new object[] { null, null };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalExceptionCases))]
         public void JSImportException(Exception value, string clazz)
@@ -1393,29 +879,6 @@ public void JSImportException(Exception value, string clazz)
                 "object", clazz);
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalExceptionCases))]
-        public void JsExportException(Exception value, string clazz)
-        {
-            if (clazz == "JSTestError")
-            {
-                value = JavaScriptTestHelper.createException("!CreateEx!");
-            }
-
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_Exception,
-                nameof(JavaScriptTestHelper.EchoException),
-                "object", clazz);
-        }
-
-        [Fact]
-        public void JsExportThrows()
-        {
-            var ex = Assert.Throws<ArgumentException>(() => JavaScriptTestHelper.invoke1_String("-t-e-s-t-", nameof(JavaScriptTestHelper.ThrowFromJSExport)));
-            Assert.DoesNotContain("Unexpected error", ex.Message);
-            Assert.Contains("-t-e-s-t-", ex.Message);
-        }
-
         [Fact]
         public void JSImportReturnError()
         {
@@ -1424,35 +887,9 @@ public void JSImportReturnError()
             Assert.Contains("this-is-error", err.Message);
         }
 
-        [Fact]
-        public void JsExportCatchToString()
-        {
-            var toString = JavaScriptTestHelper.catch1toString("-t-e-s-t-", nameof(JavaScriptTestHelper.ThrowFromJSExport));
-            Assert.DoesNotContain("Unexpected error", toString);
-            Assert.Contains("-t-e-s-t-", toString);
-            Assert.DoesNotContain(nameof(JavaScriptTestHelper.ThrowFromJSExport), toString);
-        }
-
-        [Fact]
-        public void JsExportCatchStack()
-        {
-            var stack = JavaScriptTestHelper.catch1stack("-t-e-s-t-", nameof(JavaScriptTestHelper.ThrowFromJSExport));
-            Assert.Contains(nameof(JavaScriptTestHelper.ThrowFromJSExport), stack);
-            if (PlatformDetection.IsBrowserDomSupportedOrNodeJS)
-            {
-                Assert.Contains("catch1stack", stack);
-            }
-        }
-
         #endregion Exception
 
         #region JSObject
-        public static IEnumerable<object[]> MarshalIJSObjectCases()
-        {
-            yield return new object[] { null, "JSData" };
-            yield return new object[] { null, null };
-        }
-
         [Theory]
         [MemberData(nameof(MarshalIJSObjectCases))]
         public void JSImportIJSObject(JSObject value, string clazz)
@@ -1471,20 +908,6 @@ public void JSImportIJSObject(JSObject value, string clazz)
                 "object", clazz);
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalIJSObjectCases))]
-        public void JsExportIJSObject(JSObject value, string clazz)
-        {
-            if (clazz == "JSData")
-            {
-                value = JavaScriptTestHelper.createData("!CreateJS!");
-            }
-
-            JsExportTest(value,
-                JavaScriptTestHelper.invoke1_JSObject,
-                nameof(JavaScriptTestHelper.EchoIJSObject),
-                "object", clazz);
-        }
         #endregion JSObject
 
         #region ProxyOfProxy
@@ -1514,10 +937,10 @@ public async Task JsImportSleep()
             await JavaScriptTestHelper.sleep(100);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))] // slow
         public async Task JsImportTaskTypes()
         {
-            for(int i=0;i<100;i++)
+            for (int i = 0; i < 100; i++)
             {
                 object a = new object();
                 Exception e = new Exception();
@@ -1641,14 +1064,6 @@ public async Task JsImportTaskEchoPendingException()
             await Assert.ThrowsAsync<Exception>(async () => await task);
         }
 
-        public static IEnumerable<object[]> TaskCases()
-        {
-            yield return new object[] { Math.PI };
-            yield return new object[] { 0 };
-            yield return new object[] { "test" };
-            yield return new object[] { null };
-        }
-
         [Theory]
         [MemberData(nameof(TaskCases))]
         public async Task JsImportTaskAwaitPendingResult(object result)
@@ -1713,26 +1128,11 @@ public async Task JsImportTaskAwait()
             await task;
         }
 
-        [Theory]
-        [MemberData(nameof(MarshalInt32Cases))]
-        public async Task JsExportTaskOfInt(int value)
-        {
-            TaskCompletionSource<int> tcs = new TaskCompletionSource<int>();
-
-            var res = JavaScriptTestHelper.invoke1_TaskOfInt(tcs.Task, nameof(JavaScriptTestHelper.AwaitTaskOfObject));
-            tcs.SetResult(value);
-            await Task.Yield();
-            var rr = await res;
-            await Task.Yield();
-            Assert.Equal(value, rr);
-            //GC.Collect();
-        }
-
         #endregion
 
         #region Action
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
         public void JsImportCallback_EchoAction()
         {
             bool called = false;
@@ -1747,6 +1147,37 @@ public void JsImportCallback_EchoAction()
             Assert.True(called);
         }
 
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsWasmThreadingSupported))]
+        public void JsImportCallback_EchoActionThrows_MT()
+        {
+            bool called = false;
+            Action expected = () =>
+            {
+                called = true;
+            };
+            var actual = JavaScriptTestHelper.echo1_ActionAction(expected);
+            Assert.NotEqual(expected, actual);
+            Assert.False(called);
+            // with deputy thread, call back to C# from synchronous JS function is not allowed
+            Assert.Throws<JSException>(()=>actual());
+            Assert.False(called);
+        }
+
+        [Fact]
+        public async Task JsImportCallback_Async()
+        {
+            bool called = false;
+            var promise = JavaScriptTestHelper.backback_FuncIntIntFuncIntIntAsync((a,b) =>
+            {
+                called = true;
+                return a + b;
+            }, 123, 321);
+            Assert.False(called);
+            var actual = await promise;
+            Assert.True(called);
+            Assert.Equal(444, actual);
+        }
+
 
         [Fact]
         [OuterLoop]
@@ -1769,7 +1200,7 @@ public async Task JsImportCallback_EchoActionMany()
             }
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
         public void JsImportCallback_Action()
         {
             bool called = false;
@@ -1780,7 +1211,7 @@ public void JsImportCallback_Action()
             Assert.True(called);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
         public void JsImportEcho_ActionAction()
         {
             bool called = false;
@@ -1793,7 +1224,7 @@ public void JsImportEcho_ActionAction()
             Assert.True(called);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
         public void JsImportEcho_ActionIntActionInt()
         {
             int calledA = -1;
@@ -1806,7 +1237,7 @@ public void JsImportEcho_ActionIntActionInt()
             Assert.Equal(42, calledA);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
         public void JsImportCallback_ActionInt()
         {
             int called = -1;
@@ -1817,7 +1248,7 @@ public void JsImportCallback_ActionInt()
             Assert.Equal(42, called);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
         public void JsImportCallback_FunctionIntInt()
         {
             int called = -1;
@@ -1830,7 +1261,7 @@ public void JsImportCallback_FunctionIntInt()
             Assert.Equal(42, res);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
         public void JsImportBackCallback_FunctionIntInt()
         {
             int called = -1;
@@ -1845,7 +1276,7 @@ public void JsImportBackCallback_FunctionIntInt()
             Assert.Equal(84, called);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
         public void JsImportBackCallback_FunctionIntIntIntInt()
         {
             int calledA = -1;
@@ -1864,7 +1295,7 @@ public void JsImportBackCallback_FunctionIntIntIntInt()
             Assert.Equal(84, calledB);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
         public void JsImportCallback_ActionIntInt()
         {
             int calledA = -1;
@@ -1878,7 +1309,7 @@ public void JsImportCallback_ActionIntInt()
             Assert.Equal(43, calledB);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
         public void JsImportCallback_ActionLongLong()
         {
             long calledA = -1;
@@ -1892,7 +1323,7 @@ public void JsImportCallback_ActionLongLong()
             Assert.Equal(43, calledB);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
         public void JsImportCallback_ActionIntLong()
         {
             int calledA = -1;
@@ -1906,7 +1337,7 @@ public void JsImportCallback_ActionIntLong()
             Assert.Equal(43, calledB);
         }
 
-        [Fact]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotWasmThreadingSupported))]
         public void JsImportCallback_ActionIntThrow()
         {
             int called = -1;
@@ -1920,39 +1351,6 @@ public void JsImportCallback_ActionIntThrow()
             Assert.Same(expected, actual);
         }
 
-        [Fact]
-        public void JsExportCallback_FunctionIntInt()
-        {
-            int called = -1;
-            var chain = JavaScriptTestHelper.invoke1_FuncOfIntInt((int a) =>
-            {
-                called = a;
-                return a;
-            }, nameof(JavaScriptTestHelper.BackFuncOfIntInt));
-
-            Assert.Equal(-1, called);
-            var actual = chain(42);
-            Assert.Equal(42, actual);
-            Assert.Equal(42, called);
-        }
-
-        [Fact]
-        public void JsExportCallback_FunctionIntIntThrow()
-        {
-            int called = -1;
-            var expected = new Exception("test!!");
-            var chain = JavaScriptTestHelper.invoke1_FuncOfIntInt((int a) =>
-            {
-                called = a;
-                throw expected;
-            }, nameof(JavaScriptTestHelper.BackFuncOfIntInt));
-
-            Assert.Equal(-1, called);
-            var actual = Assert.Throws<Exception>(() => chain(42));
-            Assert.Equal(42, called);
-            Assert.Same(expected, actual);
-        }
-
         [Fact]
         public void JsImportMath()
         {
@@ -1962,14 +1360,6 @@ public void JsImportMath()
 
         #endregion
 
-        private void JsExportTest<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.Interfaces)] T>(T value
-        , Func<T, string, T> invoke, string echoName, string jsType, string? jsClass = null)
-        {
-            T res;
-            res = invoke(value, echoName);
-            Assert.Equal<T>(value, res);
-        }
-
         private void JsImportTest<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.Interfaces)] T>(T value
             , Action<T> store1
             , Func<T> retrieve1
@@ -2112,26 +1502,5 @@ public void JsImportMath()
                 Assert.Equal((Exception)(object)value, resEx);
             }
         }
-
-        public async Task InitializeAsync()
-        {
-            await JavaScriptTestHelper.InitializeAsync();
-        }
-
-        public async Task DisposeAsync()
-        {
-            await JavaScriptTestHelper.DisposeAsync();
-        }
-
-        // js Date doesn't have nanosecond precision
-        public static DateTime TrimNano(DateTime date)
-        {
-            return new DateTime(date.Ticks - (date.Ticks % TimeSpan.TicksPerMillisecond), DateTimeKind.Utc);
-        }
-
-        public static DateTimeOffset TrimNano(DateTimeOffset date)
-        {
-            return new DateTime(date.Ticks - (date.Ticks % TimeSpan.TicksPerMillisecond), DateTimeKind.Utc);
-        }
     }
 }
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JSInteropTestBase.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JSInteropTestBase.cs
new file mode 100644
index 000000000000..288ad2b227d7
--- /dev/null
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JSInteropTestBase.cs
@@ -0,0 +1,340 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Threading.Tasks;
+using System.Threading;
+using Xunit;
+using System.Diagnostics.CodeAnalysis;
+#pragma warning disable xUnit1026 // Theory methods should use all of their parameters
+
+namespace System.Runtime.InteropServices.JavaScript.Tests
+{
+    public class JSInteropTestBase
+    {
+        public static IEnumerable<object[]> MarshalCharCases()
+        {
+            yield return new object[] { (char)42 };
+            yield return new object[] { (char)1 };
+            yield return new object[] { '\u017D' };
+            yield return new object[] { '\u2661' };
+            yield return new object[] { char.MaxValue };
+            yield return new object[] { char.MinValue };
+        }
+
+        public static IEnumerable<object[]> MarshalByteCases()
+        {
+            yield return new object[] { (byte)42 };
+            yield return new object[] { (byte)1 };
+            yield return new object[] { byte.MaxValue };
+            yield return new object[] { byte.MinValue };
+        }
+
+        public static IEnumerable<object[]> MarshalInt16Cases()
+        {
+            yield return new object[] { 42 };
+            yield return new object[] { 0 };
+            yield return new object[] { 1 };
+            yield return new object[] { -1 };
+            yield return new object[] { short.MaxValue };
+            yield return new object[] { short.MinValue };
+        }
+
+        public static IEnumerable<object[]> MarshalInt32Cases()
+        {
+            yield return new object[] { 42 };
+            yield return new object[] { 0 };
+            yield return new object[] { 1 };
+            yield return new object[] { -1 };
+            yield return new object[] { int.MaxValue };
+            yield return new object[] { int.MinValue };
+        }
+
+        public static IEnumerable<object[]> OutOfRangeCases()
+        {
+            yield return new object[] { double.MaxValue, "Value is not an integer" };
+            yield return new object[] { double.MinValue, "Value is not an integer" };
+            yield return new object[] { double.NaN, "Value is not an integer" };
+            yield return new object[] { double.NegativeInfinity, "Value is not an integer" };
+            yield return new object[] { double.PositiveInfinity, "Value is not an integer" };
+            yield return new object[] { (double)MAX_SAFE_INTEGER, "Overflow" };
+        }
+
+        const long MAX_SAFE_INTEGER = 9007199254740991L;// Number.MAX_SAFE_INTEGER
+        const long MIN_SAFE_INTEGER = -9007199254740991L;// Number.MIN_SAFE_INTEGER
+        public static IEnumerable<object[]> MarshalInt52Cases()
+        {
+            yield return new object[] { -1 };
+            yield return new object[] { 42 };
+            yield return new object[] { 0 };
+            yield return new object[] { 1 };
+            yield return new object[] { MAX_SAFE_INTEGER };
+            yield return new object[] { MIN_SAFE_INTEGER };
+        }
+
+        public static IEnumerable<object[]> MarshalBigInt64Cases()
+        {
+            yield return new object[] { -1 };
+            yield return new object[] { 42 };
+            yield return new object[] { 0 };
+            yield return new object[] { 1 };
+            yield return new object[] { MAX_SAFE_INTEGER };
+            yield return new object[] { MIN_SAFE_INTEGER };
+            yield return new object[] { long.MinValue };
+            yield return new object[] { long.MaxValue };
+        }
+
+        public static IEnumerable<object[]> MarshalDoubleCases()
+        {
+            yield return new object[] { Math.PI };
+            yield return new object[] { 0.0 };
+            yield return new object[] { double.MaxValue };
+            yield return new object[] { double.MinValue };
+            yield return new object[] { double.NegativeInfinity };
+            yield return new object[] { double.PositiveInfinity };
+            yield return new object[] { double.NaN };
+        }
+
+        public static IEnumerable<object[]> MarshalSingleCases()
+        {
+            yield return new object[] { (float)Math.PI };
+            yield return new object[] { 0.0f };
+            yield return new object[] { float.MaxValue };
+            yield return new object[] { float.MinValue };
+            yield return new object[] { float.NegativeInfinity };
+            yield return new object[] { float.PositiveInfinity };
+            yield return new object[] { float.NaN };
+        }
+
+        public static IEnumerable<object[]> MarshalIntPtrCases()
+        {
+            yield return new object[] { (IntPtr)42 };
+            yield return new object[] { IntPtr.Zero };
+            yield return new object[] { (IntPtr)1 };
+            yield return new object[] { (IntPtr)(-1) };
+            yield return new object[] { IntPtr.MaxValue };
+            yield return new object[] { IntPtr.MinValue };
+        }
+
+        public static IEnumerable<object[]> MarshalDateTimeCases()
+        {
+            yield return new object[] { new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc) };
+            yield return new object[] { TrimNano(DateTime.UtcNow) };
+            yield return new object[] { TrimNano(DateTime.MaxValue) };
+        }
+
+        public static IEnumerable<object[]> MarshalDateTimeOffsetCases()
+        {
+            yield return new object[] { DateTimeOffset.FromUnixTimeSeconds(0) };
+            yield return new object[] { TrimNano(DateTimeOffset.UtcNow) };
+            yield return new object[] { TrimNano(DateTimeOffset.MaxValue) };
+        }
+
+        public static IEnumerable<object[]> MarshalByteArrayCases()
+        {
+            yield return new object[] { new byte[] { 1, 2, 3, byte.MaxValue, byte.MinValue } };
+            yield return new object[] { new byte[] { } };
+            yield return new object[] { null };
+        }
+
+        public static IEnumerable<object[]> MarshalIntArrayCases()
+        {
+            yield return new object[] { new int[] { 1, 2, 3, int.MaxValue, int.MinValue } };
+            yield return new object[] { new int[] { } };
+            yield return new object[] { null };
+        }
+
+        public static IEnumerable<object[]> MarshalDoubleArrayCases()
+        {
+            yield return new object[] { new double[] { 1, 2, 3, double.MaxValue, double.MinValue, double.Pi, double.NegativeInfinity, double.PositiveInfinity, double.NaN } };
+            yield return new object[] { new double[] { } };
+            yield return new object[] { null };
+        }
+
+        public static IEnumerable<object[]> MarshalStringArrayCases()
+        {
+            yield return new object[] { new string[] { "\u0050\u0159\u00ed\u006c\u0069\u0161", "\u017e\u006c\u0075\u0165\u006f\u0075\u010d\u006b\u00fd" } };
+            yield return new object[] { new string[] { string.Intern("hello"), string.Empty, null } };
+            yield return new object[] { new string[] { } };
+            yield return new object[] { null };
+        }
+
+        public static IEnumerable<object[]> MarshalBooleanCases()
+        {
+            yield return new object[] { true };
+            yield return new object[] { false };
+        }
+
+        public static IEnumerable<object[]> MarshalObjectArrayCasesToDouble()
+        {
+            yield return new object[] { new object[] { (byte)42 } };
+            yield return new object[] { new object[] { (short)42 } };
+            yield return new object[] { new object[] { 42 } };
+            yield return new object[] { new object[] { 3.14f } };
+            yield return new object[] { new object[] { 'A' } };
+        }
+
+        protected delegate void dummyDelegate();
+        protected static void dummyDelegateA()
+        {
+        }
+
+        public class SomethingRef
+        {
+        }
+
+        public class SomethingStruct
+        {
+        }
+
+        public static IEnumerable<object[]> MarshalObjectArrayCasesThrow()
+        {
+            yield return new object[] { new object[] { () => { } } };
+            yield return new object[] { new object[] { (int a) => { } } };
+            yield return new object[] { new object[] { (int a) => { return a; } } };
+            yield return new object[] { new object[] { (dummyDelegate)dummyDelegateA } };
+            yield return new object[] { new object[] { 0L } };
+            yield return new object[] { new object[] { 0UL } };
+            yield return new object[] { new object[] { (sbyte)0 } };
+            yield return new object[] { new object[] { (ushort)0 } };
+            yield return new object[] { new object[] { new SomethingStruct[] { } } };
+            yield return new object[] { new object[] { new SomethingRef[] { }, } };
+            yield return new object[] { new object[] { new ArraySegment<byte>(new byte[] { 11 }), } };
+        }
+
+        public static IEnumerable<object[]> MarshalObjectArrayCases()
+        {
+            yield return new object[] { new object[] { string.Intern("hello"), string.Empty } };
+            yield return new object[] { new object[] { 1.1d, new DateTime(2022, 5, 8, 14, 55, 01, DateTimeKind.Utc), false, true } };
+            yield return new object[] { new object[] { new double?(1.1d), new DateTime?(new DateTime(2022, 5, 8, 14, 55, 01, DateTimeKind.Utc)), new bool?(false), new bool?(true) } };
+            yield return new object[] { new object[] { null, new object(), new SomethingRef(), new SomethingStruct(), new Exception("test") } };
+            yield return new object[] { new object[] { "JSData" } }; // special cased, so we call createData in the test itself
+            yield return new object[] { new object[] { new byte[] { }, new int[] { }, new double[] { }, new string[] { }, new object[] { } } };
+            yield return new object[] { new object[] { new byte[] { 1, 2, 3 }, new int[] { 1, 2, 3 }, new double[] { 1, 2, 3 }, new string[] { "a", "b", "c" }, new object[] { } } };
+            yield return new object[] { new object[] { new object[] { new byte[] { 1, 2, 3 }, new int[] { 1, 2, 3 }, new double[] { 1, 2, 3 }, new string[] { "a", "b", "c" }, new object(), new SomethingRef(), new SomethingStruct(), new Exception("test") } } };
+            yield return new object[] { new object[] { } };
+            yield return new object[] { null };
+        }
+
+        public static IEnumerable<object[]> MarshalNullableBooleanCases()
+        {
+            yield return new object[] { null };
+            yield return new object[] { true };
+            yield return new object[] { false };
+        }
+
+        public static IEnumerable<object[]> MarshalNullableInt32Cases()
+        {
+            yield return new object[] { null };
+            yield return new object[] { 42 };
+            yield return new object[] { 0 };
+            yield return new object[] { 1 };
+            yield return new object[] { -1 };
+            yield return new object[] { int.MaxValue };
+            yield return new object[] { int.MinValue };
+        }
+
+        public static IEnumerable<object[]> MarshalNullableBigInt64Cases()
+        {
+            yield return new object[] { null };
+            yield return new object[] { 42L };
+            yield return new object[] { 0L };
+            yield return new object[] { 1L };
+            yield return new object[] { -1L };
+            yield return new object[] { MAX_SAFE_INTEGER };
+            yield return new object[] { MIN_SAFE_INTEGER };
+            yield return new object[] { long.MaxValue };
+            yield return new object[] { long.MinValue };
+        }
+
+        public static IEnumerable<object[]> MarshalNullableIntPtrCases()
+        {
+            yield return new object[] { null };
+            yield return new object[] { (IntPtr)42 };
+            yield return new object[] { IntPtr.Zero };
+            yield return new object[] { (IntPtr)1 };
+            yield return new object[] { (IntPtr)(-1) };
+            yield return new object[] { IntPtr.MaxValue };
+            yield return new object[] { IntPtr.MinValue };
+        }
+
+        public static IEnumerable<object[]> MarshalNullableDoubleCases()
+        {
+            yield return new object[] { null };
+            yield return new object[] { Math.PI };
+            yield return new object[] { 0.0 };
+            yield return new object[] { double.MaxValue };
+            yield return new object[] { double.MinValue };
+            yield return new object[] { double.NegativeInfinity };
+            yield return new object[] { double.PositiveInfinity };
+            yield return new object[] { double.NaN };
+        }
+
+        public static IEnumerable<object[]> MarshalNullableDateTimeCases()
+        {
+            yield return new object[] { null };
+            yield return new object[] { new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc) };
+            yield return new object[] { TrimNano(DateTime.UtcNow) };
+            yield return new object[] { TrimNano(DateTime.MaxValue) };
+        }
+
+        public static IEnumerable<object[]> MarshalStringCases()
+        {
+            yield return new object[] { null };
+            yield return new object[] { string.Empty };
+            yield return new object[] { "Ahoj" + Random.Shared.Next() };// shorted than 256 -> check in JS interned
+            yield return new object[] { "Ahoj" + new string('!', 300) };// longer than 256 -> no check in JS interned
+            yield return new object[] { string.Intern("dotnet") };
+        }
+
+        public static IEnumerable<object[]> MarshalObjectCases()
+        {
+            yield return new object[] { new object(), "ManagedObject" };
+            yield return new object[] { null, null };
+        }
+
+        public static IEnumerable<object[]> MarshalExceptionCases()
+        {
+            yield return new object[] { new Exception("Test"), "ManagedError" };
+            yield return new object[] { null, "JSTestError" };
+            yield return new object[] { null, null };
+        }
+
+        public static IEnumerable<object[]> MarshalIJSObjectCases()
+        {
+            yield return new object[] { null, "JSData" };
+            yield return new object[] { null, null };
+        }
+
+        public static IEnumerable<object[]> TaskCases()
+        {
+            yield return new object[] { Math.PI };
+            yield return new object[] { 0 };
+            yield return new object[] { "test" };
+            yield return new object[] { null };
+        }
+
+        public async Task InitializeAsync()
+        {
+            await JavaScriptTestHelper.InitializeAsync();
+        }
+
+        public async Task DisposeAsync()
+        {
+            await JavaScriptTestHelper.DisposeAsync();
+        }
+
+        // js Date doesn't have nanosecond precision
+        public static DateTime TrimNano(DateTime date)
+        {
+            return new DateTime(date.Ticks - (date.Ticks % TimeSpan.TicksPerMillisecond), DateTimeKind.Utc);
+        }
+
+        public static DateTimeOffset TrimNano(DateTimeOffset date)
+        {
+            return new DateTime(date.Ticks - (date.Ticks % TimeSpan.TicksPerMillisecond), DateTimeKind.Utc);
+        }
+    }
+}
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JavaScriptTestHelper.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JavaScriptTestHelper.cs
index bfd645039b56..b2127558a152 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JavaScriptTestHelper.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JavaScriptTestHelper.cs
@@ -15,6 +15,7 @@ namespace System.Runtime.InteropServices.JavaScript.Tests
     public partial class JavaScriptTestHelper
     {
         [JSImport("globalThis.console.log")]
+        [return: JSMarshalAs<JSType.DiscardNoWait>]
         public static partial void Log([JSMarshalAs<JSType.String>] string message);
 
         [JSImport("globalThis.window.location.toString")]
@@ -27,11 +28,21 @@ public partial class JavaScriptTestHelper
         public static partial string ReboundMemberEcho(string message);
 
         [JSExport]
+        [return: JSMarshalAs<JSType.DiscardNoWait>] // this means that the message will arrive out of order, especially across threads.
         public static void ConsoleWriteLine([JSMarshalAs<JSType.String>] string message)
         {
             Console.WriteLine(message);
         }
 
+        [JSImport("delay", "JavaScriptTestHelper")]
+        public static partial Task Delay(int ms);
+
+        [JSImport("intentionallyMissingImport", "JavaScriptTestHelper")]
+        public static partial void IntentionallyMissingImport();
+
+        [JSImport("intentionallyMissingImportAsync", "JavaScriptTestHelper")]
+        public static partial Task IntentionallyMissingImportAsync();
+
         [JSImport("catch1toString", "JavaScriptTestHelper")]
         public static partial string catch1toString(string message, string functionName);
 
@@ -73,6 +84,15 @@ public static void Optimized1V(int a1)
         [JSImport("invoke1V", "JavaScriptTestHelper")]
         public static partial void invoke1V(int a1);
 
+        [JSExport]
+        [return: JSMarshalAs<JSType.DiscardNoWait>] // this means that the message will arrive out of order, especially across threads.
+        public static void Optimized1O(int a1)
+        {
+            optimizedReached += a1;
+        }
+        [JSImport("invoke1O", "JavaScriptTestHelper")]
+        public static partial void invoke1O(int a1);
+
         [JSExport]
         public static int Optimized1R(int a1)
         {
@@ -261,6 +281,11 @@ internal static partial void Relaxed(string a1, Exception ex,
         [JSImport("store1", "JavaScriptTestHelper")]
         [return: JSMarshalAs<JSType.Void>]
         internal static partial void store1_Int32([JSMarshalAs<JSType.Number>] int value);
+
+        [JSImport("store1", "JavaScriptTestHelper")]
+        [return: JSMarshalAs<JSType.DiscardNoWait>] // this means that the message will arrive out of order, especially across threads.
+        internal static partial void store1DiscardNoWait_Int32([JSMarshalAs<JSType.Number>] int value);
+
         [JSImport("retrieve1", "JavaScriptTestHelper")]
         [return: JSMarshalAs<JSType.Number>]
         internal static partial int retrieve1_Int32();
@@ -449,6 +474,9 @@ public static async Task<object> AwaitTaskOfObject([JSMarshalAs<JSType.Promise<J
         [return: JSMarshalAs<JSType.Function<JSType.Number, JSType.Number, JSType.Number>>]
         internal static partial Func<int, int, int> backback_FuncIntIntFuncIntInt([JSMarshalAs<JSType.Function<JSType.Number, JSType.Number, JSType.Number>>] Func<int, int, int> fun, [JSMarshalAs<JSType.Number>] int a, [JSMarshalAs<JSType.Number>] int b);
 
+        [JSImport("backbackAsync", "JavaScriptTestHelper")]
+        internal static partial Task<int> backback_FuncIntIntFuncIntIntAsync([JSMarshalAs<JSType.Function<JSType.Number, JSType.Number, JSType.Number>>] Func<int, int, int> fun, [JSMarshalAs<JSType.Number>] int a, [JSMarshalAs<JSType.Number>] int b);
+
         [JSImport("back3", "JavaScriptTestHelper")]
         internal static partial void back3_ActionInt([JSMarshalAs<JSType.Function<JSType.Number>>] Action<int>? action, [JSMarshalAs<JSType.Number>] int a);
 
@@ -500,6 +528,10 @@ public static Func<int, int> BackFuncOfIntInt([JSMarshalAs<JSType.Function<JSTyp
         [JSImport("invoke1", "JavaScriptTestHelper")]
         [return: JSMarshalAs<JSType.Boolean>]
         internal static partial bool invoke1_Boolean([JSMarshalAs<JSType.Boolean>] bool value, [JSMarshalAs<JSType.String>] string name);
+
+        [JSImport("invoke1Async", "JavaScriptTestHelper")]
+        internal static partial Task<bool> invoke1_BooleanAsync(bool value, string name);
+
         [JSExport]
         [return: JSMarshalAs<JSType.Boolean>]
         public static bool EchoBoolean([JSMarshalAs<JSType.Boolean>] bool arg1)
@@ -994,14 +1026,24 @@ public static JSObject EchoIJSObject([JSMarshalAs<JSType.Object>] JSObject arg1)
         [JSImport("INTERNAL.forceDisposeProxies")]
         internal static partial void ForceDisposeProxies(bool disposeMethods, bool verbose);
 
+        public static void AssertWasmBackgroundExec()
+        {
+            if (PlatformDetection.IsWasmBackgroundExec && Environment.CurrentManagedThreadId == 1)
+            {
+                throw new Exception("With WasmBackgroundExec we are expecting to run tests on the thread pool");
+            }
+        }
+
         static JSObject _module;
         public static async Task InitializeAsync()
         {
+            AssertWasmBackgroundExec();
             if (_module == null)
             {
                 _module = await JSHost.ImportAsync("JavaScriptTestHelper", "../JavaScriptTestHelper.mjs"); ;
                 await Setup();
             }
+            AssertWasmBackgroundExec();
 
 #if FEATURE_WASM_MANAGED_THREADS
             // are we in the UI thread ?
@@ -1011,6 +1053,7 @@ public static async Task InitializeAsync()
                 // this gives browser chance to serve UI thread event loop before every test
                 await Task.Yield();
             }
+            AssertWasmBackgroundExec();
         }
 
         public static Task DisposeAsync()
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JavaScriptTestHelper.mjs b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JavaScriptTestHelper.mjs
index baec1cb231c2..e0cf11376d8d 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JavaScriptTestHelper.mjs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/JavaScriptTestHelper.mjs
@@ -186,6 +186,12 @@ export function invoke1V(arg1) {
     fn(arg1);
 }
 
+export function invoke1O(arg1) {
+    const JavaScriptTestHelper = dllExports.System.Runtime.InteropServices.JavaScript.Tests.JavaScriptTestHelper;
+    const fn = JavaScriptTestHelper['Optimized1O'];
+    fn(arg1);
+}
+
 export function invoke1R(arg1) {
     const JavaScriptTestHelper = dllExports.System.Runtime.InteropServices.JavaScript.Tests.JavaScriptTestHelper;
     const fn = JavaScriptTestHelper['Optimized1R'];
@@ -214,6 +220,24 @@ export function invoke1(arg1, name) {
     return res;
 }
 
+export async function invoke1Async(arg1, name) {
+    if (globalThis.gc) {
+        // console.log('globalThis.gc');
+        globalThis.gc();
+    }
+    // console.log(`invoke1: ${name}(arg1:${arg1 !== null ? typeof arg1 : '<null>'})`)
+    const JavaScriptTestHelper = dllExports.System.Runtime.InteropServices.JavaScript.Tests.JavaScriptTestHelper;
+    const fn = JavaScriptTestHelper[name];
+
+    await delay(10);
+
+    // console.log("invoke1:" + typeof fn);
+    // console.log("invoke1:" + fn.toString());
+    const res = fn(arg1);
+    // console.log(`invoke1: res ${res !== null ? typeof res : '<null>'}`)
+    return res;
+}
+
 export function invoke2(arg1, name) {
     const fn = dllExports.JavaScriptTestHelperNoNamespace[name];
     //console.log("invoke1:" + fn.toString());
@@ -379,6 +403,15 @@ export function backback(arg1, arg2, arg3) {
     }
 }
 
+export async function backbackAsync(arg1, arg2, arg3) {
+    if (globalThis.gc) {
+        // console.log('globalThis.gc');
+        globalThis.gc();
+    }
+    await delay(10);
+    return arg1(arg2, arg3);
+}
+
 export const instance = {}
 
 globalThis.javaScriptTestHelper = instance;
@@ -396,3 +429,7 @@ export async function setup() {
 }
 
 // console.log('JavaScriptTestHelper:' Object.keys(globalThis.JavaScriptTestHelper));
+
+export function delay(ms) {
+    return new Promise(resolve => globalThis.setTimeout(resolve, ms));
+}
\ No newline at end of file
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTest.Http.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTest.Http.cs
index 8939cca759b1..489120ee0e47 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTest.Http.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTest.Http.cs
@@ -82,8 +82,10 @@ await Assert.ThrowsAsync<TaskCanceledException>(async () =>
                 {
                     CancellationTokenSource cts = new CancellationTokenSource();
                     var promise = response.Content.ReadAsStringAsync(cts.Token);
+                    WebWorkerTestHelper.Log("HttpClient_CancelInDifferentThread: ManagedThreadId: " + Environment.CurrentManagedThreadId + " NativeThreadId: " + WebWorkerTestHelper.NativeThreadId);
                     cts.Cancel();
-                    await promise;
+                    var res = await promise;
+                    throw new Exception("This should be unreachable: " + res);
                 });
             });
         }
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTest.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTest.cs
index c88d32277da8..0a2ae44142dc 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTest.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTest.cs
@@ -11,15 +11,9 @@ namespace System.Runtime.InteropServices.JavaScript.Tests
 
     // TODO test:
     // JSExport 2x
-    // JSExport async
-    // lock
-    // thread allocation, many threads
     // ProxyContext flow, child thread, child task
     // use JSObject after JSWebWorker finished, especially HTTP
-    // WS on JSWebWorker
-    // HTTP continue on TP
     // event pipe
-    // FS
     // JS setTimeout till after JSWebWorker close
     // synchronous .Wait for JS setTimeout on the same thread -> deadlock problem **7)**
 
@@ -68,6 +62,34 @@ public async Task JSDelay_Cancellation(Executor executor)
             await Assert.ThrowsAnyAsync<OperationCanceledException>(() => canceledTask);
         }
 
+        [Theory, MemberData(nameof(GetBlockingFriendlyTargetThreads))]
+        public async Task JSDelay_Blocking_Wait(Executor executor)
+        {
+            var cts = new CancellationTokenSource();
+            var blockedTask = executor.Execute(async () =>
+            {
+                await executor.StickyAwait(WebWorkerTestHelper.CreateDelay(), cts.Token);
+                var promise = WebWorkerTestHelper.JSDelay(100);
+                promise.Wait();
+            }, cts.Token);
+
+            await blockedTask;
+        }
+
+        [Theory, MemberData(nameof(GetBlockingFriendlyTargetThreads))]
+        public async Task JSDelay_Blocking_GetResult(Executor executor)
+        {
+            var cts = new CancellationTokenSource();
+            var blockedTask = executor.Execute(async () =>
+            {
+                await executor.StickyAwait(WebWorkerTestHelper.CreateDelay(), cts.Token);
+                var promise = WebWorkerTestHelper.JSDelay(100);
+                promise.GetAwaiter().GetResult();
+            }, cts.Token);
+
+            await blockedTask;
+        }
+
         [Fact]
         public async Task JSSynchronizationContext_Send_Post_Items_Cancellation()
         {
@@ -131,7 +153,7 @@ public async Task JSSynchronizationContext_Send_Post_Items_Cancellation()
                 }
                 catch (Exception ex)
                 {
-                    Console.WriteLine("Unexpected exception " + ex);
+                    WebWorkerTestHelper.Log("Unexpected exception " + ex);
                     postReady.SetException(ex);
                     return Task.FromException(ex);
                 }
@@ -316,7 +338,7 @@ public async Task ManagedConsole(Executor executor)
             using var cts = CreateTestCaseTimeoutSource();
             await executor.Execute(() =>
             {
-                Console.WriteLine("C# Hello from ManagedThreadId: " + Environment.CurrentManagedThreadId);
+                WebWorkerTestHelper.Log("C# Hello from ManagedThreadId: " + Environment.CurrentManagedThreadId);
                 Console.Clear();
                 return Task.CompletedTask;
             }, cts.Token);
@@ -343,7 +365,7 @@ await executor.Execute(async () =>
 
                 var jsTid = WebWorkerTestHelper.GetTid();
                 var csTid = WebWorkerTestHelper.NativeThreadId;
-                if (executor.Type == ExecutorType.Main || executor.Type == ExecutorType.JSWebWorker)
+                if (executor.Type == ExecutorType.JSWebWorker)
                 {
                     Assert.Equal(jsTid, csTid);
                 }
@@ -364,23 +386,25 @@ public async Task ThreadingTimer(Executor executor)
             await executor.Execute(async () =>
             {
                 TaskCompletionSource tcs = new TaskCompletionSource();
+                WebWorkerTestHelper.Log("ThreadingTimer: Start Time: " + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff") + " ManagedThreadId: " + Environment.CurrentManagedThreadId + " NativeThreadId: " + WebWorkerTestHelper.NativeThreadId);
 
                 using var timer = new Timer(_ =>
                 {
                     Assert.NotEqual(1, Environment.CurrentManagedThreadId);
                     Assert.True(Thread.CurrentThread.IsThreadPoolThread);
-                    tcs.SetResult();
                     hit = true;
+                    tcs.SetResult();
                 }, null, 100, Timeout.Infinite);
 
                 await tcs.Task;
             }, cts.Token);
 
+            WebWorkerTestHelper.Log("ThreadingTimer: End Time: " + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff") + " ManagedThreadId: " + Environment.CurrentManagedThreadId + " NativeThreadId: " + WebWorkerTestHelper.NativeThreadId);
             Assert.True(hit);
         }
 
         [Theory, MemberData(nameof(GetTargetThreads))]
-        public async Task JSDelay_ContinueWith(Executor executor)
+        public async Task JSDelay_ContinueWith_Async(Executor executor)
         {
             using var cts = CreateTestCaseTimeoutSource();
             await executor.Execute(async () =>
@@ -389,9 +413,23 @@ await executor.Execute(async () =>
 
                 await WebWorkerTestHelper.JSDelay(10).ContinueWith(_ =>
                 {
-                    // continue on the context of the target JS interop
-                    executor.AssertInteropThread();
-                }, TaskContinuationOptions.ExecuteSynchronously);
+                    Assert.True(Thread.CurrentThread.IsThreadPoolThread);
+                }, TaskContinuationOptions.RunContinuationsAsynchronously);
+            }, cts.Token);
+        }
+
+        [Theory, MemberData(nameof(GetTargetThreads))]
+        public async Task JSDelay_ContinueWith_Sync(Executor executor)
+        {
+            using var cts = CreateTestCaseTimeoutSource();
+            await executor.Execute(async () =>
+            {
+                await executor.StickyAwait(WebWorkerTestHelper.CreateDelay(), cts.Token);
+
+                await WebWorkerTestHelper.JSDelay(10).ContinueWith(_ =>
+                {
+                    Assert.True(Thread.CurrentThread.IsThreadPoolThread);
+                }, TaskContinuationOptions.ExecuteSynchronously); // ExecuteSynchronously is ignored
             }, cts.Token);
         }
 
@@ -409,6 +447,21 @@ await executor.Execute(async () =>
             }, cts.Token);
         }
 
+        [Theory, MemberData(nameof(GetTargetThreads))]
+        public async Task JSDelay_ConfigureAwait_False(Executor executor)
+        {
+            using var cts = CreateTestCaseTimeoutSource();
+            await executor.Execute(async () =>
+            {
+                await executor.StickyAwait(WebWorkerTestHelper.CreateDelay(), cts.Token);
+
+                await WebWorkerTestHelper.JSDelay(10).ConfigureAwait(false);
+
+                // resolve/reject on I/O thread -> thread pool
+                Assert.True(Thread.CurrentThread.IsThreadPoolThread);
+            }, cts.Token);
+        }
+
         [Theory, MemberData(nameof(GetTargetThreads))]
         public async Task ManagedDelay_ContinueWith(Executor executor)
         {
@@ -437,11 +490,13 @@ await executor.Execute(async () =>
         }
 
         [Theory, MemberData(nameof(GetTargetThreadsAndBlockingCalls))]
-        public async Task WaitAssertsOnJSInteropThreads(Executor executor, NamedCall method)
+        public async Task WaitInAsyncAssertsOnlyOnJSWebWorker(Executor executor, NamedCall method)
         {
-            var cts = CreateTestCaseTimeoutSource();
-            await executor.Execute(Task () =>
+            using var cts = CreateTestCaseTimeoutSource();
+            await executor.Execute(async () =>
             {
+                await executor.StickyAwait(WebWorkerTestHelper.InitializeAsync(), cts.Token);
+
                 Exception? exception = null;
                 try
                 {
@@ -452,9 +507,75 @@ await executor.Execute(Task () =>
                     exception = ex;
                 }
 
-                executor.AssertBlockingWait(exception);
+                if (method.IsBlocking && executor.Type == ExecutorType.JSWebWorker)
+                {
+                    Assert.NotNull(exception);
+                    Assert.IsType<PlatformNotSupportedException>(exception);
+                }
+                else
+                {
+                    Assert.Null(exception);
+                }
+            }, cts.Token);
+        }
 
-                return Task.CompletedTask;
+        [Theory, MemberData(nameof(GetTargetThreadsAndBlockingCalls))]
+        public async Task WaitAssertsOnSyncCallback(Executor executor, NamedCall method)
+        {
+            using var cts = CreateTestCaseTimeoutSource();
+            await executor.Execute(async () =>
+            {
+                await executor.StickyAwait(WebWorkerTestHelper.InitializeAsync(), cts.Token);
+
+                Exception? exception = null;
+                // the callback will hit Main or JSWebWorker, not the original executor thread
+                await WebWorkerTestHelper.CallMeBackSync(() =>
+                {
+                    // when we are inside of synchronous callback, all blocking .Wait is forbidden
+                    try
+                    {
+                        method.Call(cts.Token);
+                    }
+                    catch (Exception ex)
+                    {
+                        exception = ex;
+                    }
+                });
+
+                if (method.IsBlocking)
+                {
+                    Assert.NotNull(exception);
+                    Assert.IsType<PlatformNotSupportedException>(exception);
+                }
+                else
+                {
+                    Assert.Null(exception);
+                }
+            }, cts.Token);
+        }
+
+        [Theory, MemberData(nameof(GetTargetThreadsAndBlockingCalls))]
+        public async Task WaitAssertsOnSyncJSExport(Executor executor, NamedCall method)
+        {
+            using var cts = CreateTestCaseTimeoutSource();
+            await executor.Execute(async () =>
+            {
+                await executor.StickyAwait(WebWorkerTestHelper.InitializeAsync(), cts.Token);
+
+                WebWorkerTestHelper.CurrentCallback = method;
+                WebWorkerTestHelper.CurrentCancellationToken = cts.Token;
+                // the callback will hit Main or JSWebWorker, not the original executor thread
+                await WebWorkerTestHelper.CallExportBackSync(nameof(WebWorkerTestHelper.CallCurrentCallback));
+
+                if (method.IsBlocking)
+                {
+                    Assert.NotNull(WebWorkerTestHelper.LastException);
+                    Assert.IsType<PlatformNotSupportedException>(WebWorkerTestHelper.LastException);
+                }
+                else
+                {
+                    Assert.Null(WebWorkerTestHelper.LastException);
+                }
             }, cts.Token);
         }
 
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTestBase.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTestBase.cs
index 87f88745377b..77aef0857a8d 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTestBase.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTestBase.cs
@@ -1,6 +1,7 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.IO;
 using System.Threading.Tasks;
 using System.Threading;
 using Xunit;
@@ -35,7 +36,7 @@ protected CancellationTokenSource CreateTestCaseTimeoutSource([CallerMemberName]
             cts.Token.Register(() =>
             {
                 var end = DateTime.Now;
-                Console.WriteLine($"Unexpected test case {memberName} timeout after {end - start} ManagedThreadId:{Environment.CurrentManagedThreadId}");
+                WebWorkerTestHelper.Log($"Unexpected test case {memberName} timeout after {end - start} ManagedThreadId:{Environment.CurrentManagedThreadId}");
             });
             return cts;
         }
@@ -45,9 +46,17 @@ public static IEnumerable<object[]> GetTargetThreads()
             return Enum.GetValues<ExecutorType>().Select(type => new object[] { new Executor(type) });
         }
 
-        public static IEnumerable<object[]> GetSpecificTargetThreads()
+        public static IEnumerable<object[]> GetBlockingFriendlyTargetThreads()
         {
-            yield return new object[] { new Executor(ExecutorType.JSWebWorker), new Executor(ExecutorType.Main) };
+            yield return new object[] { new Executor(ExecutorType.Main) };
+            yield return new object[] { new Executor(ExecutorType.NewThread) };
+            yield return new object[] { new Executor(ExecutorType.ThreadPool) };
+            // JSWebWorker is missing here because JS can't resolve promises while blocked
+        }
+
+        public static IEnumerable<object[]> GetSpecificTargetThreads2x()
+        {
+            yield return new object[] { new Executor(ExecutorType.Main), new Executor(ExecutorType.Main) };
             yield break;
         }
 
@@ -82,7 +91,7 @@ async Task ActionsInDifferentThreads1()
                 }
                 catch (Exception ex)
                 {
-                    Console.WriteLine("ActionsInDifferentThreads1 failed\n" + ex);
+                    WebWorkerTestHelper.Log("ActionsInDifferentThreads1 failed\n" + ex);
                     job1ReadyTCS.SetResult(default);
                     e1Failed = true;
                     throw;
@@ -127,44 +136,76 @@ async Task ActionsInDifferentThreads2()
                 {
                     throw;
                 }
-                Console.WriteLine("ActionsInDifferentThreads failed with: \n" + ex);
                 if (!e1Done || !e2Done)
                 {
-                    Console.WriteLine("ActionsInDifferentThreads canceling!");
+                    WebWorkerTestHelper.Log("ActionsInDifferentThreads canceling because of unexpected fail: \n" + ex);
                     cts.Cancel();
                 }
+                else
+                {
+                    WebWorkerTestHelper.Log("ActionsInDifferentThreads failed with: \n" + ex);
+                }
                 throw;
             }
         }
 
-        public class NamedCall
+        static void LocalCtsIgnoringCall(Action<CancellationToken> action)
         {
-            public string Name { get; set; }
-            public delegate void Method(CancellationToken ct);
-            public Method Call { get; set; }
-
-            override public string ToString() => Name;
+            var cts = new CancellationTokenSource(8);
+            try
+            {
+                action(cts.Token);
+            }
+            catch (OperationCanceledException exception)
+            {
+                if (exception.CancellationToken != cts.Token)
+                {
+                    throw;
+                }
+                /* ignore the local one */
+            }
         }
 
         public static IEnumerable<NamedCall> BlockingCalls = new List<NamedCall>
         {
-                new NamedCall { Name = "Task.Wait", Call = delegate (CancellationToken ct) { Task.Delay(10, ct).Wait(ct); }},
-                new NamedCall { Name = "Task.WaitAll", Call = delegate (CancellationToken ct) { Task.WaitAll(Task.Delay(10, ct)); }},
-                new NamedCall { Name = "Task.WaitAny", Call = delegate (CancellationToken ct) { Task.WaitAny(Task.Delay(10, ct)); }},
-                new NamedCall { Name = "ManualResetEventSlim.Wait", Call = delegate (CancellationToken ct) {
-                    using var mr = new ManualResetEventSlim(false);
-                    using var cts = new CancellationTokenSource(8);
-                    try {
-                        mr.Wait(cts.Token);
-                    } catch (OperationCanceledException) { /* ignore */ }
-                }},
-                new NamedCall { Name = "SemaphoreSlim.Wait", Call = delegate (CancellationToken ct) {
-                    using var sem = new SemaphoreSlim(2);
-                    var cts = new CancellationTokenSource(8);
-                    try {
-                        sem.Wait(cts.Token);
-                    } catch (OperationCanceledException) { /* ignore */ }
-                }},
+            // things that should NOT throw PNSE
+            new NamedCall { IsBlocking = false, Name = "Console.WriteLine", Call = delegate (CancellationToken ct) { Console.WriteLine("Blocking"); }},
+            new NamedCall { IsBlocking = false, Name = "Directory.GetCurrentDirectory", Call = delegate (CancellationToken ct) { Directory.GetCurrentDirectory(); }},
+            new NamedCall { IsBlocking = false, Name = "CancellationTokenSource.ctor", Call = delegate (CancellationToken ct) {
+                using var cts = new CancellationTokenSource(8);
+            }},
+            new NamedCall { IsBlocking = false, Name = "Task.Delay", Call = delegate (CancellationToken ct) {
+                Task.Delay(30, ct);
+            }},
+            new NamedCall { IsBlocking = false, Name = "new Timer", Call = delegate (CancellationToken ct) {
+                new Timer((_) => { }, null, 1, -1);
+            }},
+
+            // things which should throw PNSE on sync JSExport and JSWebWorker
+            new NamedCall { IsBlocking = true, Name = "Task.Wait", Call = delegate (CancellationToken ct) { Task.Delay(30, ct).Wait(ct); }},
+            new NamedCall { IsBlocking = true, Name = "Task.WaitAll", Call = delegate (CancellationToken ct) { Task.WaitAll(Task.Delay(30, ct)); }},
+            new NamedCall { IsBlocking = true, Name = "Task.WaitAny", Call = delegate (CancellationToken ct) { Task.WaitAny(Task.Delay(30, ct)); }},
+            new NamedCall { IsBlocking = true, Name = "ManualResetEventSlim.Wait", Call = delegate (CancellationToken ct) {
+                using var mr = new ManualResetEventSlim(false);
+                LocalCtsIgnoringCall(mr.Wait);
+            }},
+            new NamedCall { IsBlocking = true, Name = "SemaphoreSlim.Wait", Call = delegate (CancellationToken ct) {
+                using var sem = new SemaphoreSlim(2);
+                LocalCtsIgnoringCall(sem.Wait);
+            }},
+            new NamedCall { IsBlocking = true, Name = "Mutex.WaitOne", Call = delegate (CancellationToken ct) {
+                using var mr = new ManualResetEventSlim(false);
+                var mutex = new Mutex();
+                var thread = new Thread(() => {
+                    mutex.WaitOne();
+                    mr.Set();
+                    Thread.Sleep(50);
+                    mutex.ReleaseMutex();
+                });
+                thread.Start();
+                Thread.ForceBlockingWait(static (b) => ((ManualResetEventSlim)b).Wait(), mr);
+                mutex.WaitOne();
+            }},
         };
 
         public static IEnumerable<object[]> GetTargetThreadsAndBlockingCalls()
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTestHelper.cs b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTestHelper.cs
index 35cdd8ff1858..fa83846f1492 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTestHelper.cs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTestHelper.cs
@@ -18,6 +18,7 @@ public partial class WebWorkerTestHelper
         public static readonly string LocalWsEcho = "ws://" + Environment.GetEnvironmentVariable("DOTNET_TEST_WEBSOCKETHOST") + "/WebSocket/EchoWebSocket.ashx";
 
         [JSImport("globalThis.console.log")]
+        [return: JSMarshalAs<JSType.DiscardNoWait>]
         public static partial void Log(string message);
 
         [JSImport("delay", "InlineTestHelper")]
@@ -38,6 +39,30 @@ public partial class WebWorkerTestHelper
         [JSImport("promiseValidateState", "WebWorkerTestHelper")]
         public static partial Task<bool> PromiseValidateState(JSObject state);
 
+        [JSImport("callMeBackSync", "WebWorkerTestHelper")]
+        public static partial Task CallMeBackSync([JSMarshalAs<JSType.Function>] Action syncCallback);
+
+        [JSImport("callExportBackSync", "WebWorkerTestHelper")]
+        public static partial Task CallExportBackSync(string syncExportName);
+
+        public static NamedCall CurrentCallback;
+        public static CancellationToken CurrentCancellationToken = CancellationToken.None;
+        public static Exception? LastException = null;
+
+        [JSExport]
+        public static void CallCurrentCallback()
+        {
+            LastException = null;
+            try
+            {
+                CurrentCallback.Call(CurrentCancellationToken);
+            }
+            catch (Exception ex)
+            {
+                LastException = ex;
+            }
+        }
+
         public static string GetOriginUrl()
         {
             using var globalThis = JSHost.GlobalThis;
@@ -121,7 +146,6 @@ public enum ExecutorType
     public class Executor
     {
         public int ExecutorTID;
-        public SynchronizationContext ExecutorSynchronizationContext;
         private static SynchronizationContext _mainSynchronizationContext;
         public static SynchronizationContext MainSynchronizationContext
         {
@@ -156,7 +180,6 @@ public Task Execute(Func<Task> job, CancellationToken cancellationToken)
             Task wrapExecute()
             {
                 ExecutorTID = Environment.CurrentManagedThreadId;
-                ExecutorSynchronizationContext = SynchronizationContext.Current ?? MainSynchronizationContext;
                 AssertTargetThread();
                 return job();
             }
@@ -194,6 +217,15 @@ public void AssertTargetThread()
             {
                 Assert.False(Thread.CurrentThread.IsThreadPoolThread, "IsThreadPoolThread:" + Thread.CurrentThread.IsThreadPoolThread + " Type " + Type);
             }
+            if (Type == ExecutorType.Main || Type == ExecutorType.JSWebWorker)
+            {
+                Assert.NotNull(SynchronizationContext.Current);
+                Assert.Equal("System.Runtime.InteropServices.JavaScript.JSSynchronizationContext", SynchronizationContext.Current.GetType().FullName);
+            }
+            else
+            {
+                Assert.Null(SynchronizationContext.Current);
+            }
         }
 
         public void AssertAwaitCapturedContext()
@@ -230,51 +262,6 @@ public void AssertAwaitCapturedContext()
             }
         }
 
-        public void AssertBlockingWait(Exception? exception)
-        {
-            switch (Type)
-            {
-                case ExecutorType.Main:
-                case ExecutorType.JSWebWorker:
-                    Assert.NotNull(exception);
-                    Assert.IsType<PlatformNotSupportedException>(exception);
-                    break;
-                case ExecutorType.NewThread:
-                case ExecutorType.ThreadPool:
-                    Assert.Null(exception);
-                    break;
-            }
-        }
-
-        public void AssertInteropThread()
-        {
-            switch (Type)
-            {
-                case ExecutorType.Main:
-                    Assert.Equal(1, Environment.CurrentManagedThreadId);
-                    Assert.Equal(ExecutorTID, Environment.CurrentManagedThreadId);
-                    Assert.False(Thread.CurrentThread.IsThreadPoolThread);
-                    break;
-                case ExecutorType.JSWebWorker:
-                    Assert.NotEqual(1, Environment.CurrentManagedThreadId);
-                    Assert.Equal(ExecutorTID, Environment.CurrentManagedThreadId);
-                    Assert.False(Thread.CurrentThread.IsThreadPoolThread);
-                    break;
-                case ExecutorType.NewThread:
-                    // it will synchronously continue on the UI thread
-                    Assert.Equal(1, Environment.CurrentManagedThreadId);
-                    Assert.NotEqual(ExecutorTID, Environment.CurrentManagedThreadId);
-                    Assert.False(Thread.CurrentThread.IsThreadPoolThread);
-                    break;
-                case ExecutorType.ThreadPool:
-                    // it will synchronously continue on the UI thread
-                    Assert.Equal(1, Environment.CurrentManagedThreadId);
-                    Assert.NotEqual(ExecutorTID, Environment.CurrentManagedThreadId);
-                    Assert.False(Thread.CurrentThread.IsThreadPoolThread);
-                    break;
-            }
-        }
-
         public override string ToString() => Type.ToString();
 
         // make sure we stay on the executor
@@ -346,7 +333,7 @@ public static Task RunOnNewThread(Func<Task> job, CancellationToken cancellation
                 }
                 catch (Exception ex)
                 {
-                    if(ex is AggregateException agg)
+                    if (ex is AggregateException agg)
                     {
                         tcs.TrySetException(agg.InnerException);
                     }
@@ -394,4 +381,14 @@ public static Task RunOnTargetAsync(SynchronizationContext ctx, Func<Task> job,
     }
 
     #endregion
+
+    public class NamedCall
+    {
+        public string Name { get; set; }
+        public bool IsBlocking { get; set; }
+        public delegate void Method(CancellationToken ct);
+        public Method Call { get; set; }
+
+        override public string ToString() => Name;
+    }
 }
diff --git a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTestHelper.mjs b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTestHelper.mjs
index 558fb181b47d..e2a8cfadfaea 100644
--- a/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTestHelper.mjs
+++ b/src/libraries/System.Runtime.InteropServices.JavaScript/tests/System.Runtime.InteropServices.JavaScript.UnitTests/System/Runtime/InteropServices/JavaScript/WebWorkerTestHelper.mjs
@@ -73,3 +73,13 @@ export function delay(ms) {
 export function getRndInteger(min, max) {
     return Math.floor(Math.random() * (max - min)) + min;
 }
+
+export async function callMeBackSync(syncCallback) {
+    syncCallback();
+}
+
+export async function callExportBackSync(syncExportName) {
+    const WebWorkerTestHelper = dllExports.System.Runtime.InteropServices.JavaScript.Tests.WebWorkerTestHelper;
+    const method = WebWorkerTestHelper[syncExportName]
+    method();
+}
\ No newline at end of file
diff --git a/src/libraries/System.Runtime.InteropServices/gen/ComInterfaceGenerator/Analyzers/ConvertComImportToGeneratedComInterfaceAnalyzer.cs b/src/libraries/System.Runtime.InteropServices/gen/ComInterfaceGenerator/Analyzers/ConvertComImportToGeneratedComInterfaceAnalyzer.cs
index 3035552c59dd..f9b96eb7e301 100644
--- a/src/libraries/System.Runtime.InteropServices/gen/ComInterfaceGenerator/Analyzers/ConvertComImportToGeneratedComInterfaceAnalyzer.cs
+++ b/src/libraries/System.Runtime.InteropServices/gen/ComInterfaceGenerator/Analyzers/ConvertComImportToGeneratedComInterfaceAnalyzer.cs
@@ -50,7 +50,7 @@ public override void Initialize(AnalysisContext context)
                     INamedTypeSymbol type = (INamedTypeSymbol)context.Symbol;
                     AttributeData? interfaceTypeAttributeData = type.GetAttributes().FirstOrDefault(a => a.AttributeClass.Equals(interfaceTypeAttribute, SymbolEqualityComparer.Default));
                     if (type is not { TypeKind: TypeKind.Interface, IsComImport: true }
-                        || interfaceTypeAttributeData.ConstructorArguments.Length == 1 && (int)interfaceTypeAttributeData.ConstructorArguments[0].Value != (int)ComInterfaceType.InterfaceIsIUnknown)
+                        || interfaceTypeAttributeData is not { ConstructorArguments: [{ Value: (int)ComInterfaceType.InterfaceIsIUnknown }] })
                     {
                         return;
                     }
diff --git a/src/libraries/System.Runtime.InteropServices/gen/ComInterfaceGenerator/ComInterfaceGenerator.cs b/src/libraries/System.Runtime.InteropServices/gen/ComInterfaceGenerator/ComInterfaceGenerator.cs
index dd04872d97b4..d3c5c631bb0b 100644
--- a/src/libraries/System.Runtime.InteropServices/gen/ComInterfaceGenerator/ComInterfaceGenerator.cs
+++ b/src/libraries/System.Runtime.InteropServices/gen/ComInterfaceGenerator/ComInterfaceGenerator.cs
@@ -637,24 +637,15 @@ private static ClassDeclarationSyntax GenerateInterfaceInformation(ComInterfaceI
 
             static ExpressionSyntax CreateEmbeddedDataBlobCreationStatement(ReadOnlySpan<byte> bytes)
             {
-                var literals = new LiteralExpressionSyntax[bytes.Length];
+                var literals = new CollectionElementSyntax[bytes.Length];
 
                 for (int i = 0; i < bytes.Length; i++)
                 {
-                    literals[i] = LiteralExpression(SyntaxKind.NumericLiteralExpression, Literal(bytes[i]));
+                    literals[i] = ExpressionElement(LiteralExpression(SyntaxKind.NumericLiteralExpression, Literal(bytes[i])));
                 }
 
-                // new System.ReadOnlySpan<byte>(new[] { <byte literals> } )
-                return ObjectCreationExpression(
-                    GenericName(TypeNames.System_ReadOnlySpan)
-                        .AddTypeArgumentListArguments(PredefinedType(Token(SyntaxKind.ByteKeyword))))
-                    .AddArgumentListArguments(
-                        Argument(
-                            ArrayCreationExpression(
-                                    ArrayType(PredefinedType(Token(SyntaxKind.ByteKeyword)), SingletonList(ArrayRankSpecifier())),
-                                    InitializerExpression(
-                                        SyntaxKind.ArrayInitializerExpression,
-                                        SeparatedList<ExpressionSyntax>(literals)))));
+                // [ <byte literals> ]
+                return CollectionExpression(SeparatedList(literals));
             }
         }
     }
diff --git a/src/libraries/System.Runtime.InteropServices/tests/ComInterfaceGenerator.Unit.Tests/CompileFails.cs b/src/libraries/System.Runtime.InteropServices/tests/ComInterfaceGenerator.Unit.Tests/CompileFails.cs
index a4fad790817c..7586edccb364 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/ComInterfaceGenerator.Unit.Tests/CompileFails.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/ComInterfaceGenerator.Unit.Tests/CompileFails.cs
@@ -41,7 +41,7 @@ public static IEnumerable<object[]> ComInterfaceGeneratorSnippetsToCompile()
             } };
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(ComInterfaceGeneratorSnippetsToCompile))]
         public async Task ValidateComInterfaceGeneratorSnippets(string id, string source, DiagnosticResult[] expectedDiagnostics)
         {
@@ -335,7 +335,7 @@ public static IEnumerable<object[]> InvalidManagedToUnmanagedCodeSnippetsToCompi
             yield return new[] { ID(), customStructMarshallingCodeSnippets.Stateful.ByValueOutParameter };
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(InvalidUnmanagedToManagedCodeSnippetsToCompile), GeneratorKind.ComInterfaceGenerator)]
         public async Task ValidateInvalidUnmanagedToManagedCodeSnippets(string id, string source, DiagnosticResult[] expectedDiagnostics)
         {
@@ -349,7 +349,7 @@ public async Task ValidateInvalidUnmanagedToManagedCodeSnippets(string id, strin
             await test.RunAsync();
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(InvalidManagedToUnmanagedCodeSnippetsToCompile), GeneratorKind.ComInterfaceGenerator)]
         public async Task ValidateInvalidManagedToUnmanagedCodeSnippets(string id, string source)
         {
@@ -361,7 +361,7 @@ public async Task ValidateInvalidManagedToUnmanagedCodeSnippets(string id, strin
             await VerifyComInterfaceGenerator.VerifySourceGeneratorAsync(source, expectedDiagnostic);
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(StringMarshallingCodeSnippets), GeneratorKind.ComInterfaceGenerator)]
         public async Task ValidateStringMarshallingDiagnostics(string id, string source, DiagnosticResult[] expectedDiagnostics)
         {
@@ -512,7 +512,7 @@ public static unsafe partial class Test {
             }
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(StringMarshallingCustomTypeVisibilities))]
         public async Task VerifyStringMarshallingCustomTypeWithLessVisibilityThanInterfaceWarns(string id, string source, DiagnosticResult[] diagnostics)
         {
@@ -520,7 +520,7 @@ public async Task VerifyStringMarshallingCustomTypeWithLessVisibilityThanInterfa
             await VerifyComInterfaceGenerator.VerifySourceGeneratorAsync(source, diagnostics);
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(InterfaceVisibilities))]
         public async Task VerifyInterfaceWithLessVisibilityThanInterfaceWarns(string id, string source, DiagnosticResult[] diagnostics)
         {
@@ -772,7 +772,7 @@ string Source(
             }
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(CountParameterIsOutSnippets))]
         public async Task ValidateSizeParameterRefKindDiagnostics(string ID, string source, params DiagnosticResult[] diagnostics)
         {
@@ -889,7 +889,7 @@ partial interface I
             }
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(IntAndEnumReturnTypeSnippets))]
         public async Task ValidateReturnTypeInfoDiagnostics(string id, string source, DiagnosticResult[] diagnostics)
         {
diff --git a/src/libraries/System.Runtime.InteropServices/tests/ComInterfaceGenerator.Unit.Tests/Compiles.cs b/src/libraries/System.Runtime.InteropServices/tests/ComInterfaceGenerator.Unit.Tests/Compiles.cs
index 421e989f870b..feb69cc01da9 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/ComInterfaceGenerator.Unit.Tests/Compiles.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/ComInterfaceGenerator.Unit.Tests/Compiles.cs
@@ -320,7 +320,7 @@ public static IEnumerable<object[]> CustomCollectionsManagedToUnmanaged(Generato
             yield return new[] { ID(), customCollectionMarshallingCodeSnippetsManagedToUnmanaged.Stateful.NonBlittableElementNativeToManagedOnlyReturnValue };
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(CodeSnippetsToCompile), GeneratorKind.VTableIndexStubGenerator)]
         [MemberData(nameof(ManagedToUnmanagedCodeSnippetsToCompile), GeneratorKind.VTableIndexStubGenerator)]
         [MemberData(nameof(UnmanagedToManagedCodeSnippetsToCompile), GeneratorKind.VTableIndexStubGenerator)]
@@ -348,7 +348,7 @@ public static IEnumerable<object[]> ManagedToUnmanagedComInterfaceSnippetsToComp
             yield return new[] { ID(), codeSnippets.MarshalAsParameterAndModifiers("object", System.Runtime.InteropServices.UnmanagedType.Struct) };
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(CodeSnippetsToCompile), GeneratorKind.ComInterfaceGenerator)]
         [MemberData(nameof(CustomCollections), GeneratorKind.ComInterfaceGenerator)]
         [MemberData(nameof(ManagedToUnmanagedCodeSnippetsToCompile), GeneratorKind.ComInterfaceGeneratorComObjectWrapper)]
diff --git a/src/libraries/System.Runtime.InteropServices/tests/ComInterfaceGenerator.Unit.Tests/ConvertToGeneratedComInterfaceTests.cs b/src/libraries/System.Runtime.InteropServices/tests/ComInterfaceGenerator.Unit.Tests/ConvertToGeneratedComInterfaceTests.cs
index 308cc66f48b9..7260a4d4d392 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/ComInterfaceGenerator.Unit.Tests/ConvertToGeneratedComInterfaceTests.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/ComInterfaceGenerator.Unit.Tests/ConvertToGeneratedComInterfaceTests.cs
@@ -370,5 +370,43 @@ public struct HResult
 
             await VerifyCS.VerifyCodeFixAsync(source, fixedSource);
         }
+
+        [Fact]
+        public async Task UnsupportedInterfaceTypes_DoesNotReportDiagnostic()
+        {
+            // This also tests the case where InterfaceType is missing (defaulting to ComInterfaceType.InterfaceIsDual).
+            string source = """
+                 using System.Runtime.InteropServices;
+
+                 [ComImport]
+                 [Guid("73EB4AF8-BE9C-4b49-B3A4-24F4FF657B26")]
+                 public interface IInterfaceIsDualMissingAttribute
+                 {
+                 }
+
+                 [ComImport]
+                 [Guid("5DA39CDF-DCAD-447A-836E-EA80DB34D81B")]
+                 [InterfaceType(ComInterfaceType.InterfaceIsDual)]
+                 public interface IInterfaceIsDual
+                 {
+                 }
+
+                 [ComImport]
+                 [Guid("F59AB2FE-523D-4B28-911C-21363808C51E")]
+                 [InterfaceType(ComInterfaceType.InterfaceIsIDispatch)]
+                 public interface IInterfaceIsIDispatch
+                 {
+                 }
+
+                 [ComImport]
+                 [Guid("DC1C5A9C-E88A-4dde-A5A1-60F82A20AEF7")]
+                 [InterfaceType(ComInterfaceType.InterfaceIsIInspectable)]
+                 public interface IInterfaceIsIInspectable
+                 {
+                 }
+                 """;
+
+            await VerifyCS.VerifyCodeFixAsync(source, source);
+        }
     }
 }
diff --git a/src/libraries/System.Runtime.InteropServices/tests/LibraryImportGenerator.UnitTests/CompileFails.cs b/src/libraries/System.Runtime.InteropServices/tests/LibraryImportGenerator.UnitTests/CompileFails.cs
index 676cd56e2e6a..ecbe1c3d19e0 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/LibraryImportGenerator.UnitTests/CompileFails.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/LibraryImportGenerator.UnitTests/CompileFails.cs
@@ -785,7 +785,7 @@ public static IEnumerable<object[]> CodeSnippetsToCompile()
             } };
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(CodeSnippetsToCompile))]
         public async Task ValidateSnippets(string id, string source, DiagnosticResult[] diagnostics)
         {
@@ -804,7 +804,7 @@ public static IEnumerable<object[]> CodeSnippetsToCompile_InvalidCode()
             yield return new[] { ID(), CodeSnippets.IncorrectAttributeFieldType };
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(CodeSnippetsToCompile_InvalidCode))]
         public async Task ValidateSnippets_InvalidCodeGracefulFailure(string id, string source)
         {
diff --git a/src/libraries/System.Runtime.InteropServices/tests/LibraryImportGenerator.UnitTests/Compiles.cs b/src/libraries/System.Runtime.InteropServices/tests/LibraryImportGenerator.UnitTests/Compiles.cs
index b78fbc8552f2..4c80ac61cc46 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/LibraryImportGenerator.UnitTests/Compiles.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/LibraryImportGenerator.UnitTests/Compiles.cs
@@ -440,7 +440,7 @@ public static IEnumerable<object[]> CustomCollections()
             yield return new[] { ID(), CodeSnippets.CollectionsOfCollectionsStress };
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(CodeSnippetsToCompile))]
         [MemberData(nameof(CustomCollections))]
         public async Task ValidateSnippets(string id, string source)
@@ -461,7 +461,7 @@ public static IEnumerable<object[]> CodeSnippetsToCompileWithPreprocessorSymbols
             yield return new object[] { ID(), CodeSnippets.PreprocessorIfAfterAttributeAroundFunctionAdditionalFunctionAfter("Foo"), new string[] { "Foo" } };
             yield return new object[] { ID(), CodeSnippets.PreprocessorIfAfterAttributeAroundFunctionAdditionalFunctionAfter("Foo"), Array.Empty<string>() };
         }
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(CodeSnippetsToCompileWithPreprocessorSymbols))]
         public async Task ValidateSnippetsWithPreprocessorDefinitions(string id, string source, IEnumerable<string> preprocessorSymbols)
         {
@@ -530,7 +530,7 @@ public static IEnumerable<object[]> CodeSnippetsToValidateFallbackForwarder()
             }
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(CodeSnippetsToValidateFallbackForwarder))]
         [OuterLoop("Uses the network for downlevel ref packs")]
         public async Task ValidateSnippetsFallbackForwarder(string id, string source, TestTargetFramework targetFramework, bool expectFallbackForwarder)
@@ -576,7 +576,7 @@ public static IEnumerable<object[]> FullyBlittableSnippetsToCompile()
             yield return new[] { ID(), CodeSnippets.BasicParameterByValue("int") };
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(FullyBlittableSnippetsToCompile))]
         public async Task ValidateSnippetsWithBlittableAutoForwarding(string id, string source)
         {
@@ -616,7 +616,7 @@ public static IEnumerable<object[]> SnippetsWithBlittableTypesButNonBlittableDat
             yield return new[] { ID(), CodeSnippets.SetLastErrorTrue<int>() };
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(SnippetsWithBlittableTypesButNonBlittableDataToCompile))]
         public async Task ValidateSnippetsWithBlittableTypesButNonBlittableMetadataDoNotAutoForward(string id, string source)
         {
@@ -689,7 +689,7 @@ public static IEnumerable<object[]> CodeSnippetsToCompileMultipleSources()
             yield return new object[] { ID(), new[] { CodeSnippets.BasicParameterByValue("int[]", CodeSnippets.DisableRuntimeMarshalling), CodeSnippets.BasicParameterWithByRefModifier("ref", "int") } };
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(CodeSnippetsToCompileMultipleSources))]
         public async Task ValidateSnippetsWithMultipleSources(string id, string[] sources)
         {
@@ -717,7 +717,7 @@ public class Basic { }
             yield return new object[] { ID(), source, TestTargetFramework.Net };
         }
 
-        [ParallelTheory]
+        [Theory]
         [MemberData(nameof(CodeSnippetsToVerifyNoTreesProduced))]
         public async Task ValidateNoGeneratedOuptutForNoImport(string id, string source, TestTargetFramework framework)
         {
diff --git a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/GetDelegateForFunctionPointerTests.cs b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/GetDelegateForFunctionPointerTests.cs
index abcf51482278..f0745272af90 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/GetDelegateForFunctionPointerTests.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/GetDelegateForFunctionPointerTests.cs
@@ -5,6 +5,7 @@
 using System.Linq;
 using System.Reflection;
 using System.Reflection.Emit;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices.Tests.Common;
 using Xunit;
 
@@ -153,6 +154,66 @@ public void GetDelegateForFunctionPointer_CantCast_ThrowsInvalidCastException()
             GC.KeepAlive(d);
         }
 
+        [Fact]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/99478", TestRuntimes.Mono)]
+        public void GetDelegateForFunctionPointer_Resurrection()
+        {
+            GCHandle handle = Alloc();
+
+            if (PlatformDetection.IsPreciseGcSupported)
+            {
+                while (!IsNullTarget(handle))
+                {
+                    GC.Collect();
+                    GC.WaitForPendingFinalizers();
+                }
+            }
+
+            handle.Free();
+
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static GCHandle Alloc()
+            {
+                GCHandle gcHandle = default;
+                gcHandle = GCHandle.Alloc(new FreachableObject(), GCHandleType.WeakTrackResurrection);
+                return gcHandle;
+            }
+
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static bool IsNullTarget(GCHandle handle)
+            {
+                return handle.Target is null;
+            }
+        }
+
+        private class FreachableObject
+        {
+            private readonly Action _del;
+            private readonly IntPtr _fnptr;
+            private int _count;
+
+            internal FreachableObject()
+            {
+                _del = new Action(SomeFunction);
+                _fnptr = Marshal.GetFunctionPointerForDelegate(_del);
+            }
+
+            // Note: This method cannot be replaced by a lambda for the test to trigger the delegate resurrection
+            private void SomeFunction()
+            {
+            }
+
+            ~FreachableObject()
+            {
+                Assert.Same(Marshal.GetDelegateForFunctionPointer<Action>(_fnptr), _del);
+
+                if (_count++ < 3)
+                {
+                    GC.ReRegisterForFinalize(this);
+                }
+            }
+        }
+
         public delegate void GenericDelegate<T>(T t);
         public delegate void NonGenericDelegate(string t);
         public delegate void OtherNonGenericDelegate(string t);
diff --git a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/GetNativeVariantForObjectTests.cs b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/GetNativeVariantForObjectTests.cs
index c25c59a205c9..108a842a3c8b 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/GetNativeVariantForObjectTests.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/GetNativeVariantForObjectTests.cs
@@ -167,6 +167,40 @@ public void GetNativeVariantForObject_String_Success(string obj)
             }
         }
 
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsBuiltInComEnabled))]
+        public unsafe void GetNativeVariantForObject_Guid_Success()
+        {
+            var guid = new Guid("0DD3E51B-3162-4D13-B906-030F402C5BA2");
+            var v = new Variant();
+            IntPtr pNative = Marshal.AllocHGlobal(Marshal.SizeOf(v));
+            try
+            {
+                if (PlatformDetection.IsWindowsNanoServer)
+                {
+                    Assert.Throws<NotSupportedException>(() => Marshal.GetNativeVariantForObject(guid, pNative));
+                }
+                else
+                {
+                    Marshal.GetNativeVariantForObject(guid, pNative);
+
+                    Variant result = Marshal.PtrToStructure<Variant>(pNative);
+                    Assert.Equal(VarEnum.VT_RECORD, (VarEnum)result.vt);
+                    Assert.NotEqual(nint.Zero, result.pRecInfo); // We should have an IRecordInfo instance.
+
+                    var expectedBytes = new ReadOnlySpan<byte>(guid.ToByteArray());
+                    var actualBytes = new ReadOnlySpan<byte>((void*)result.bstrVal, expectedBytes.Length);
+                    Assert.Equal(expectedBytes, actualBytes);
+
+                    object o = Marshal.GetObjectForNativeVariant(pNative);
+                    Assert.Equal(guid, o);
+                }
+            }
+            finally
+            {
+                Marshal.FreeHGlobal(pNative);
+            }
+        }
+
         [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsBuiltInComEnabled))]
         [InlineData(3.14)]
         public unsafe void GetNativeVariantForObject_Double_Success(double obj)
diff --git a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/GetObjectForNativeVariantTests.cs b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/GetObjectForNativeVariantTests.cs
index c0b68f5d899e..f4e01dc3d87e 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/GetObjectForNativeVariantTests.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/GetObjectForNativeVariantTests.cs
@@ -246,14 +246,38 @@ public void GetObjectForNativeVariant_InvalidDate_ThrowsArgumentException(double
         }
 
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsBuiltInComEnabled))]
-        public void GetObjectForNativeVariant_NoDataForRecord_ThrowsArgumentException()
+        public void GetObjectForNativeVariant_NoRecordInfo_ThrowsArgumentException()
         {
             Variant variant = CreateVariant(VT_RECORD, new UnionTypes { _record = new Record { _recordInfo = IntPtr.Zero } });
             AssertExtensions.Throws<ArgumentException>(null, () => GetObjectForNativeVariant(variant));
         }
 
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsBuiltInComEnabled))]
+        public void GetObjectForNativeVariant_NoRecordData_ReturnsNull()
+        {
+            var recordInfo = new RecordInfo();
+            IntPtr pRecordInfo = Marshal.GetComInterfaceForObject<RecordInfo, IRecordInfo>(recordInfo);
+            try
+            {
+                Variant variant = CreateVariant(VT_RECORD, new UnionTypes
+                {
+                    _record = new Record
+                    {
+                        _record = IntPtr.Zero,
+                        _recordInfo = pRecordInfo
+                    }
+                });
+                Assert.Null(GetObjectForNativeVariant(variant));
+            }
+            finally
+            {
+                Marshal.Release(pRecordInfo);
+            }
+        }
+
         public static IEnumerable<object[]> GetObjectForNativeVariant_NoSuchGuid_TestData()
         {
+            yield return new object[] { typeof(object).GUID };
             yield return new object[] { typeof(string).GUID };
             yield return new object[] { Guid.Empty };
         }
diff --git a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/SecureStringToCoTaskMemAnsiTests.cs b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/SecureStringToCoTaskMemAnsiTests.cs
index 7bc52efa15e0..6beaf2d3a67a 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/SecureStringToCoTaskMemAnsiTests.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/SecureStringToCoTaskMemAnsiTests.cs
@@ -33,9 +33,9 @@ public void SecureStringToCoTaskMemAnsi_InvokePtrToStringAnsi_Roundtrips(string
                 {
                     Assert.NotEqual(IntPtr.Zero, ptr);
 
-                    // Unix is incorrect with unicode chars.
+                    // The check is incorrect for UTF8 encoding of non-Ansi chars. Detect UTF8 encoding via SystemMaxDBCSCharSize.
                     bool containsNonAnsiChars = s.Any(c => c > 0xFF);
-                    if (!containsNonAnsiChars || PlatformDetection.IsWindows)
+                    if (!containsNonAnsiChars || Marshal.SystemMaxDBCSCharSize < 3)
                     {
                         // Make sure the native memory is correctly laid out.
                         for (int i = 0; i < s.Length; i++)
diff --git a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/SecureStringToGlobalAllocAnsiTests.cs b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/SecureStringToGlobalAllocAnsiTests.cs
index a97ea54fb3b1..5c1ca653c757 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/SecureStringToGlobalAllocAnsiTests.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/SecureStringToGlobalAllocAnsiTests.cs
@@ -33,9 +33,9 @@ public void SecureStringToGlobalAllocAnsi_InvokePtrToStringAnsi_Roundtrips(strin
                 {
                     Assert.NotEqual(IntPtr.Zero, ptr);
 
-                    // Unix uses UTF8 for Ansi marshalling.
+                    // The check is incorrect for UTF8 encoding of non-Ansi chars. Detect UTF8 encoding via SystemMaxDBCSCharSize.
                     bool containsNonAnsiChars = s.Any(c => c > 0xFF);
-                    if (!containsNonAnsiChars || PlatformDetection.IsWindows)
+                    if (!containsNonAnsiChars || Marshal.SystemMaxDBCSCharSize < 3)
                     {
                         // Make sure the native memory is correctly laid out.
                         for (int i = 0; i < s.Length; i++)
diff --git a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/StringToCoTaskMemAnsiTests.cs b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/StringToCoTaskMemAnsiTests.cs
index abb9666544c7..9375b6095e36 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/StringToCoTaskMemAnsiTests.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/StringToCoTaskMemAnsiTests.cs
@@ -30,9 +30,9 @@ public void StringToCoTaskMemAnsi_InvokePtrToStringAnsi_Roundtrips(string s)
             {
                 Assert.NotEqual(IntPtr.Zero, ptr);
 
-                // Unix uses UTF8 for Ansi marshalling.
+                // The check is incorrect for UTF8 encoding of non-Ansi chars. Detect UTF8 encoding via SystemMaxDBCSCharSize.
                 bool containsNonAnsiChars = s.Any(c => c > 0xFF);
-                if (!containsNonAnsiChars || PlatformDetection.IsWindows)
+                if (!containsNonAnsiChars || Marshal.SystemMaxDBCSCharSize < 3)
                 {
                     // Make sure the native memory is correctly laid out.
                     for (int i = 0; i < s.Length; i++)
diff --git a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/StringToHGlobalAnsiTests.cs b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/StringToHGlobalAnsiTests.cs
index 7802a4b99f2d..b089a5406788 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/StringToHGlobalAnsiTests.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/System.Runtime.InteropServices.UnitTests/System/Runtime/InteropServices/Marshal/StringToHGlobalAnsiTests.cs
@@ -30,9 +30,9 @@ public void StringToHGlobalAnsi_InvokePtrToStringAnsi_Roundtrips(string s)
             {
                 Assert.NotEqual(IntPtr.Zero, ptr);
 
-                // Unix uses UTF8 for Ansi marshalling.
+                // The check is incorrect for UTF8 encoding of non-Ansi chars. Detect UTF8 encoding via SystemMaxDBCSCharSize.
                 bool containsNonAnsiChars = s.Any(c => c > 0xFF);
-                if (!containsNonAnsiChars || PlatformDetection.IsWindows)
+                if (!containsNonAnsiChars || Marshal.SystemMaxDBCSCharSize < 3)
                 {
                     // Make sure the native memory is correctly laid out.
                     for (int i = 0; i < s.Length; i++)
diff --git a/src/libraries/System.Runtime.InteropServices/tests/TestAssets/SharedTypes/ComInterfaces/IStatefulPinnedMarshalling.cs b/src/libraries/System.Runtime.InteropServices/tests/TestAssets/SharedTypes/ComInterfaces/IStatefulPinnedMarshalling.cs
index eaac2608d818..1347a2858e37 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/TestAssets/SharedTypes/ComInterfaces/IStatefulPinnedMarshalling.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/TestAssets/SharedTypes/ComInterfaces/IStatefulPinnedMarshalling.cs
@@ -86,6 +86,7 @@ public ref StatefulPinnedNative GetPinnableReference()
                 _canFree = true;
                 if (_isPinned)
                 {
+                    // Unsafe.AsPointer is safe, because the result from GetPinnableReference is pinned
                     _refNativeStruct = new StatefulPinnedNative() { I = _managed.I };
                     return (StatefulPinnedNative*)Unsafe.AsPointer(ref _refNativeStruct);
                 }
diff --git a/src/libraries/System.Runtime.InteropServices/tests/TestAssets/SharedTypes/ComInterfaces/IStatelessCallerAllocateBufferMarshalling.cs b/src/libraries/System.Runtime.InteropServices/tests/TestAssets/SharedTypes/ComInterfaces/IStatelessCallerAllocateBufferMarshalling.cs
index f8b1174799b1..6388ab299011 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/TestAssets/SharedTypes/ComInterfaces/IStatelessCallerAllocateBufferMarshalling.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/TestAssets/SharedTypes/ComInterfaces/IStatelessCallerAllocateBufferMarshalling.cs
@@ -94,6 +94,7 @@ public static class ManagedToUnmanagedIn
             {
                 var unmanaged = new StatelessCallerAllocatedBufferNative() { I = managed.I };
                 MemoryMarshal.Write(buffer, in unmanaged);
+                // Unsafe.AsPointer is safe since buffer is pinned
                 return (StatelessCallerAllocatedBufferNative*)Unsafe.AsPointer(ref MemoryMarshal.AsRef<StatelessCallerAllocatedBufferNative>(buffer));
             }
 
diff --git a/src/libraries/System.Runtime.InteropServices/tests/TestAssets/SharedTypes/NonBlittable.cs b/src/libraries/System.Runtime.InteropServices/tests/TestAssets/SharedTypes/NonBlittable.cs
index 88e594d9f6a2..aa4df06e002f 100644
--- a/src/libraries/System.Runtime.InteropServices/tests/TestAssets/SharedTypes/NonBlittable.cs
+++ b/src/libraries/System.Runtime.InteropServices/tests/TestAssets/SharedTypes/NonBlittable.cs
@@ -84,6 +84,7 @@ public static unsafe class DoubleToBytesBigEndianMarshaller
 
         public static byte* ConvertToUnmanaged(double managed, Span<byte> buffer)
         {
+            // Unsafe.AsPointer is safe since buffer must be pinned
             BinaryPrimitives.WriteDoubleBigEndian(buffer, managed);
             return (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(buffer));
         }
@@ -305,6 +306,7 @@ public struct StatefulGetPinnableReference
             private IntWrapperWithoutGetPinnableReference _managed;
             public void FromManaged(IntWrapperWithoutGetPinnableReference managed) => _managed = managed;
 
+            // Unsafe.AsPointer is safe since buffer must be pinned
             public int* ToUnmanaged() => (int*)Unsafe.AsPointer(ref _managed.i);
 
             public ref int GetPinnableReference() => ref _managed.i;
@@ -463,6 +465,7 @@ public unsafe static class ListMarshallerWithBuffer<T, TUnmanagedElement> where
             if (spaceRequired > buffer.Length)
                 throw new InvalidOperationException();
 
+            // Unsafe.AsPointer is safe since buffer must be pinned
             return (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(buffer));
         }
 
@@ -520,6 +523,7 @@ public void FromManaged(List<T> managed, Span<TUnmanagedElement> buffer)
 
             public ref TUnmanagedElement GetPinnableReference() => ref MemoryMarshal.GetReference(_span);
 
+            // Unsafe.AsPointer is safe since buffer must be pinned
             public byte* ToUnmanaged() => (byte*)Unsafe.AsPointer(ref GetPinnableReference());
 
             public Span<T> GetManagedValuesDestination(int length)
diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
index 9eb01deebe0f..77fe06ddc5c0 100644
--- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
+++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
@@ -1916,8 +1916,6 @@ internal AdvSimd() { }
         public static unsafe System.Runtime.Intrinsics.Vector64<float> LoadAndInsertScalar(System.Runtime.Intrinsics.Vector64<float> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index, float* address) { throw null; }
         public static unsafe System.Runtime.Intrinsics.Vector64<ushort> LoadAndInsertScalar(System.Runtime.Intrinsics.Vector64<ushort> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(3))] byte index, ushort* address) { throw null; }
         public static unsafe System.Runtime.Intrinsics.Vector64<uint> LoadAndInsertScalar(System.Runtime.Intrinsics.Vector64<uint> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index, uint* address) { throw null; }
-#if false
-        // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
         public static unsafe (System.Runtime.Intrinsics.Vector64<byte> Value1, System.Runtime.Intrinsics.Vector64<byte> Value2) LoadAndInsertScalar((System.Runtime.Intrinsics.Vector64<byte>, System.Runtime.Intrinsics.Vector64<byte>) values, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(7))] byte index, byte* address) { throw null; }
         public static unsafe (System.Runtime.Intrinsics.Vector64<sbyte> Value1, System.Runtime.Intrinsics.Vector64<sbyte> Value2) LoadAndInsertScalar((System.Runtime.Intrinsics.Vector64<sbyte>, System.Runtime.Intrinsics.Vector64<sbyte>) values, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(7))] byte index, sbyte* address) { throw null; }
         public static unsafe (System.Runtime.Intrinsics.Vector64<short> Value1, System.Runtime.Intrinsics.Vector64<short> Value2) LoadAndInsertScalar((System.Runtime.Intrinsics.Vector64<short>, System.Runtime.Intrinsics.Vector64<short>) values, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(3))] byte index, short* address) { throw null; }
@@ -1939,7 +1937,6 @@ internal AdvSimd() { }
         public static unsafe (System.Runtime.Intrinsics.Vector64<int> Value1, System.Runtime.Intrinsics.Vector64<int> Value2, System.Runtime.Intrinsics.Vector64<int> Value3, System.Runtime.Intrinsics.Vector64<int> Value4) LoadAndInsertScalar((System.Runtime.Intrinsics.Vector64<int>, System.Runtime.Intrinsics.Vector64<int>, System.Runtime.Intrinsics.Vector64<int>, System.Runtime.Intrinsics.Vector64<int>) values, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index, int* address) { throw null; }
         public static unsafe (System.Runtime.Intrinsics.Vector64<uint> Value1, System.Runtime.Intrinsics.Vector64<uint> Value2, System.Runtime.Intrinsics.Vector64<uint> Value3, System.Runtime.Intrinsics.Vector64<uint> Value4) LoadAndInsertScalar((System.Runtime.Intrinsics.Vector64<uint>, System.Runtime.Intrinsics.Vector64<uint>, System.Runtime.Intrinsics.Vector64<uint>, System.Runtime.Intrinsics.Vector64<uint>) values, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index, uint* address) { throw null; }
         public static unsafe (System.Runtime.Intrinsics.Vector64<float> Value1, System.Runtime.Intrinsics.Vector64<float> Value2, System.Runtime.Intrinsics.Vector64<float> Value3, System.Runtime.Intrinsics.Vector64<float> Value4) LoadAndInsertScalar((System.Runtime.Intrinsics.Vector64<float>, System.Runtime.Intrinsics.Vector64<float>, System.Runtime.Intrinsics.Vector64<float>, System.Runtime.Intrinsics.Vector64<float>) values, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index, float* address) { throw null; }
-#endif
         public static unsafe System.Runtime.Intrinsics.Vector128<byte> LoadAndReplicateToVector128(byte* address) { throw null; }
         public static unsafe System.Runtime.Intrinsics.Vector128<short> LoadAndReplicateToVector128(short* address) { throw null; }
         public static unsafe System.Runtime.Intrinsics.Vector128<int> LoadAndReplicateToVector128(int* address) { throw null; }
@@ -1954,8 +1951,6 @@ internal AdvSimd() { }
         public static unsafe System.Runtime.Intrinsics.Vector64<float> LoadAndReplicateToVector64(float* address) { throw null; }
         public static unsafe System.Runtime.Intrinsics.Vector64<ushort> LoadAndReplicateToVector64(ushort* address) { throw null; }
         public static unsafe System.Runtime.Intrinsics.Vector64<uint> LoadAndReplicateToVector64(uint* address) { throw null; }
-#if false
-        // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
         public static unsafe (System.Runtime.Intrinsics.Vector64<byte> Value1, System.Runtime.Intrinsics.Vector64<byte> Value2) LoadAndReplicateToVector64x2(byte* address) { throw null; }
         public static unsafe (System.Runtime.Intrinsics.Vector64<sbyte> Value1, System.Runtime.Intrinsics.Vector64<sbyte> Value2) LoadAndReplicateToVector64x2(sbyte* address) { throw null; }
         public static unsafe (System.Runtime.Intrinsics.Vector64<short> Value1, System.Runtime.Intrinsics.Vector64<short> Value2) LoadAndReplicateToVector64x2(short* address) { throw null; }
@@ -1977,7 +1972,6 @@ internal AdvSimd() { }
         public static unsafe (System.Runtime.Intrinsics.Vector64<int> Value1, System.Runtime.Intrinsics.Vector64<int> Value2, System.Runtime.Intrinsics.Vector64<int> Value3, System.Runtime.Intrinsics.Vector64<int> Value4) LoadAndReplicateToVector64x4(int* address) { throw null; }
         public static unsafe (System.Runtime.Intrinsics.Vector64<uint> Value1, System.Runtime.Intrinsics.Vector64<uint> Value2, System.Runtime.Intrinsics.Vector64<uint> Value3, System.Runtime.Intrinsics.Vector64<uint> Value4) LoadAndReplicateToVector64x4(uint* address) { throw null; }
         public static unsafe (System.Runtime.Intrinsics.Vector64<float> Value1, System.Runtime.Intrinsics.Vector64<float> Value2, System.Runtime.Intrinsics.Vector64<float> Value3, System.Runtime.Intrinsics.Vector64<float> Value4) LoadAndReplicateToVector64x4(float* address) { throw null; }
-#endif
         public static unsafe System.Runtime.Intrinsics.Vector128<byte> LoadVector128(byte* address) { throw null; }
         public static unsafe System.Runtime.Intrinsics.Vector128<double> LoadVector128(double* address) { throw null; }
         public static unsafe System.Runtime.Intrinsics.Vector128<short> LoadVector128(short* address) { throw null; }
@@ -1998,8 +1992,6 @@ internal AdvSimd() { }
         public static unsafe System.Runtime.Intrinsics.Vector64<ushort> LoadVector64(ushort* address) { throw null; }
         public static unsafe System.Runtime.Intrinsics.Vector64<uint> LoadVector64(uint* address) { throw null; }
         public static unsafe System.Runtime.Intrinsics.Vector64<ulong> LoadVector64(ulong* address) { throw null; }
-#if false
-        // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
         public static unsafe (System.Runtime.Intrinsics.Vector64<byte> Value1, System.Runtime.Intrinsics.Vector64<byte> Value2) LoadVector64x2AndUnzip(byte* address) { throw null; }
         public static unsafe (System.Runtime.Intrinsics.Vector64<sbyte> Value1, System.Runtime.Intrinsics.Vector64<sbyte> Value2) LoadVector64x2AndUnzip(sbyte* address) { throw null; }
         public static unsafe (System.Runtime.Intrinsics.Vector64<short> Value1, System.Runtime.Intrinsics.Vector64<short> Value2) LoadVector64x2AndUnzip(short* address) { throw null; }
@@ -2042,7 +2034,6 @@ internal AdvSimd() { }
         public static unsafe (System.Runtime.Intrinsics.Vector64<int> Value1, System.Runtime.Intrinsics.Vector64<int> Value2, System.Runtime.Intrinsics.Vector64<int> Value3, System.Runtime.Intrinsics.Vector64<int> Value4) LoadVector64x4(int* address) { throw null; }
         public static unsafe (System.Runtime.Intrinsics.Vector64<uint> Value1, System.Runtime.Intrinsics.Vector64<uint> Value2, System.Runtime.Intrinsics.Vector64<uint> Value3, System.Runtime.Intrinsics.Vector64<uint> Value4) LoadVector64x4(uint* address) { throw null; }
         public static unsafe (System.Runtime.Intrinsics.Vector64<float> Value1, System.Runtime.Intrinsics.Vector64<float> Value2, System.Runtime.Intrinsics.Vector64<float> Value3, System.Runtime.Intrinsics.Vector64<float> Value4) LoadVector64x4(float* address) { throw null; }
-#endif
         public static System.Runtime.Intrinsics.Vector128<byte> Max(System.Runtime.Intrinsics.Vector128<byte> left, System.Runtime.Intrinsics.Vector128<byte> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<short> Max(System.Runtime.Intrinsics.Vector128<short> left, System.Runtime.Intrinsics.Vector128<short> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<int> Max(System.Runtime.Intrinsics.Vector128<int> left, System.Runtime.Intrinsics.Vector128<int> right) { throw null; }
@@ -2913,8 +2904,6 @@ public static unsafe void StoreSelectedScalar(ushort* address, System.Runtime.In
         public static unsafe void StoreSelectedScalar(uint* address, System.Runtime.Intrinsics.Vector128<uint> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(3))] byte index) { }
         public static unsafe void StoreSelectedScalar(uint* address, System.Runtime.Intrinsics.Vector64<uint> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index) { }
         public static unsafe void StoreSelectedScalar(ulong* address, System.Runtime.Intrinsics.Vector128<ulong> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index) { }
-#if false
-        // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
         public static unsafe void StoreSelectedScalar(byte* address, (System.Runtime.Intrinsics.Vector64<byte> value1, System.Runtime.Intrinsics.Vector64<byte> value2) value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(7))] byte index) { throw null; }
         public static unsafe void StoreSelectedScalar(sbyte* address, (System.Runtime.Intrinsics.Vector64<sbyte> value1, System.Runtime.Intrinsics.Vector64<sbyte> value2) value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(7))] byte index) { throw null; }
         public static unsafe void StoreSelectedScalar(short* address, (System.Runtime.Intrinsics.Vector64<short> value1, System.Runtime.Intrinsics.Vector64<short> value2) value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(3))] byte index) { throw null; }
@@ -2978,7 +2967,6 @@ public static unsafe void StoreSelectedScalar(ulong* address, System.Runtime.Int
         public static unsafe void StoreVector64x4(int* address, (System.Runtime.Intrinsics.Vector64<int> Value1, System.Runtime.Intrinsics.Vector64<int> Value2, System.Runtime.Intrinsics.Vector64<int> Value3, System.Runtime.Intrinsics.Vector64<int> Value4) value) { throw null; }
         public static unsafe void StoreVector64x4(uint* address, (System.Runtime.Intrinsics.Vector64<uint> Value1, System.Runtime.Intrinsics.Vector64<uint> Value2, System.Runtime.Intrinsics.Vector64<uint> Value3, System.Runtime.Intrinsics.Vector64<uint> Value4) value) { throw null; }
         public static unsafe void StoreVector64x4(float* address, (System.Runtime.Intrinsics.Vector64<float> Value1, System.Runtime.Intrinsics.Vector64<float> Value2, System.Runtime.Intrinsics.Vector64<float> Value3, System.Runtime.Intrinsics.Vector64<float> Value4) value) { throw null; }
-#endif
         public static System.Runtime.Intrinsics.Vector128<byte> Subtract(System.Runtime.Intrinsics.Vector128<byte> left, System.Runtime.Intrinsics.Vector128<byte> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<short> Subtract(System.Runtime.Intrinsics.Vector128<short> left, System.Runtime.Intrinsics.Vector128<short> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<int> Subtract(System.Runtime.Intrinsics.Vector128<int> left, System.Runtime.Intrinsics.Vector128<int> right) { throw null; }
@@ -3347,8 +3335,6 @@ internal Arm64() { }
             public static System.Runtime.Intrinsics.Vector64<ushort> InsertSelectedScalar(System.Runtime.Intrinsics.Vector64<ushort> result, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(3))] byte resultIndex, System.Runtime.Intrinsics.Vector64<ushort> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(3))] byte valueIndex) { throw null; }
             public static System.Runtime.Intrinsics.Vector64<uint> InsertSelectedScalar(System.Runtime.Intrinsics.Vector64<uint> result, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte resultIndex, System.Runtime.Intrinsics.Vector128<uint> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(3))] byte valueIndex) { throw null; }
             public static System.Runtime.Intrinsics.Vector64<uint> InsertSelectedScalar(System.Runtime.Intrinsics.Vector64<uint> result, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte resultIndex, System.Runtime.Intrinsics.Vector64<uint> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte valueIndex) { throw null; }
-#if false
-            // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
             public static unsafe (System.Runtime.Intrinsics.Vector128<byte> Value1, System.Runtime.Intrinsics.Vector128<byte> Value2) LoadAndInsertScalar((System.Runtime.Intrinsics.Vector128<byte>, System.Runtime.Intrinsics.Vector128<byte>) values, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(15))] byte index, byte* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector128<sbyte> Value1, System.Runtime.Intrinsics.Vector128<sbyte> Value2) LoadAndInsertScalar((System.Runtime.Intrinsics.Vector128<sbyte>, System.Runtime.Intrinsics.Vector128<sbyte>) values, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(15))] byte index, sbyte* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector128<short> Value1, System.Runtime.Intrinsics.Vector128<short> Value2) LoadAndInsertScalar((System.Runtime.Intrinsics.Vector128<short>, System.Runtime.Intrinsics.Vector128<short>) values, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(7))] byte index, short* address) { throw null; }
@@ -3379,12 +3365,9 @@ internal Arm64() { }
             public static unsafe (System.Runtime.Intrinsics.Vector128<ulong> Value1, System.Runtime.Intrinsics.Vector128<ulong> Value2, System.Runtime.Intrinsics.Vector128<ulong> Value3, System.Runtime.Intrinsics.Vector128<ulong> Value4) LoadAndInsertScalar((System.Runtime.Intrinsics.Vector128<ulong>, System.Runtime.Intrinsics.Vector128<ulong>, System.Runtime.Intrinsics.Vector128<ulong>, System.Runtime.Intrinsics.Vector128<ulong>) values, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index, ulong* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector128<float> Value1, System.Runtime.Intrinsics.Vector128<float> Value2, System.Runtime.Intrinsics.Vector128<float> Value3, System.Runtime.Intrinsics.Vector128<float> Value4) LoadAndInsertScalar((System.Runtime.Intrinsics.Vector128<float>, System.Runtime.Intrinsics.Vector128<float>, System.Runtime.Intrinsics.Vector128<float>, System.Runtime.Intrinsics.Vector128<float>) values, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(3))] byte index, float* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector128<double> Value1, System.Runtime.Intrinsics.Vector128<double> Value2, System.Runtime.Intrinsics.Vector128<double> Value3, System.Runtime.Intrinsics.Vector128<double> Value4) LoadAndInsertScalar((System.Runtime.Intrinsics.Vector128<double>, System.Runtime.Intrinsics.Vector128<double>, System.Runtime.Intrinsics.Vector128<double>, System.Runtime.Intrinsics.Vector128<double>) values, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index, double* address) { throw null; }
-#endif
             public static unsafe System.Runtime.Intrinsics.Vector128<double> LoadAndReplicateToVector128(double* address) { throw null; }
             public static unsafe System.Runtime.Intrinsics.Vector128<long> LoadAndReplicateToVector128(long* address) { throw null; }
             public static unsafe System.Runtime.Intrinsics.Vector128<ulong> LoadAndReplicateToVector128(ulong* address) { throw null; }
-#if false
-            // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
             public static unsafe (System.Runtime.Intrinsics.Vector128<byte> Value1, System.Runtime.Intrinsics.Vector128<byte> Value2) LoadAndReplicateToVector128x2(byte* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector128<sbyte> Value1, System.Runtime.Intrinsics.Vector128<sbyte> Value2) LoadAndReplicateToVector128x2(sbyte* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector128<short> Value1, System.Runtime.Intrinsics.Vector128<short> Value2) LoadAndReplicateToVector128x2(short* address) { throw null; }
@@ -3415,7 +3398,6 @@ internal Arm64() { }
             public static unsafe (System.Runtime.Intrinsics.Vector128<ulong> Value1, System.Runtime.Intrinsics.Vector128<ulong> Value2, System.Runtime.Intrinsics.Vector128<ulong> Value3, System.Runtime.Intrinsics.Vector128<ulong> Value4) LoadAndReplicateToVector128x4(ulong* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector128<float> Value1, System.Runtime.Intrinsics.Vector128<float> Value2, System.Runtime.Intrinsics.Vector128<float> Value3, System.Runtime.Intrinsics.Vector128<float> Value4) LoadAndReplicateToVector128x4(float* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector128<double> Value1, System.Runtime.Intrinsics.Vector128<double> Value2, System.Runtime.Intrinsics.Vector128<double> Value3, System.Runtime.Intrinsics.Vector128<double> Value4) LoadAndReplicateToVector128x4(double* address) { throw null; }
-#endif
             public static unsafe (System.Runtime.Intrinsics.Vector64<int> Value1, System.Runtime.Intrinsics.Vector64<int> Value2) LoadPairScalarVector64(int* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector64<float> Value1, System.Runtime.Intrinsics.Vector64<float> Value2) LoadPairScalarVector64(float* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector64<uint> Value1, System.Runtime.Intrinsics.Vector64<uint> Value2) LoadPairScalarVector64(uint* address) { throw null; }
@@ -3462,8 +3444,6 @@ internal Arm64() { }
             public static unsafe (System.Runtime.Intrinsics.Vector64<ushort> Value1, System.Runtime.Intrinsics.Vector64<ushort> Value2) LoadPairVector64NonTemporal(ushort* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector64<uint> Value1, System.Runtime.Intrinsics.Vector64<uint> Value2) LoadPairVector64NonTemporal(uint* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector64<ulong> Value1, System.Runtime.Intrinsics.Vector64<ulong> Value2) LoadPairVector64NonTemporal(ulong* address) { throw null; }
-#if false
-            // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
             public static unsafe (System.Runtime.Intrinsics.Vector128<byte> Value1, System.Runtime.Intrinsics.Vector128<byte> Value2) LoadVector128x2AndUnzip(byte* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector128<sbyte> Value1, System.Runtime.Intrinsics.Vector128<sbyte> Value2) LoadVector128x2AndUnzip(sbyte* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector128<short> Value1, System.Runtime.Intrinsics.Vector128<short> Value2) LoadVector128x2AndUnzip(short* address) { throw null; }
@@ -3524,7 +3504,6 @@ internal Arm64() { }
             public static unsafe (System.Runtime.Intrinsics.Vector128<ulong> Value1, System.Runtime.Intrinsics.Vector128<ulong> Value2, System.Runtime.Intrinsics.Vector128<ulong> Value3, System.Runtime.Intrinsics.Vector128<ulong> Value4) LoadVector128x4(ulong* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector128<float> Value1, System.Runtime.Intrinsics.Vector128<float> Value2, System.Runtime.Intrinsics.Vector128<float> Value3, System.Runtime.Intrinsics.Vector128<float> Value4) LoadVector128x4(float* address) { throw null; }
             public static unsafe (System.Runtime.Intrinsics.Vector128<double> Value1, System.Runtime.Intrinsics.Vector128<double> Value2, System.Runtime.Intrinsics.Vector128<double> Value3, System.Runtime.Intrinsics.Vector128<double> Value4) LoadVector128x4(double* address) { throw null; }
-#endif
             public static System.Runtime.Intrinsics.Vector128<double> Max(System.Runtime.Intrinsics.Vector128<double> left, System.Runtime.Intrinsics.Vector128<double> right) { throw null; }
             public static System.Runtime.Intrinsics.Vector64<byte> MaxAcross(System.Runtime.Intrinsics.Vector128<byte> value) { throw null; }
             public static System.Runtime.Intrinsics.Vector64<short> MaxAcross(System.Runtime.Intrinsics.Vector128<short> value) { throw null; }
@@ -3766,8 +3745,6 @@ public static unsafe void StorePairScalar(uint* address, System.Runtime.Intrinsi
             public static unsafe void StorePairScalarNonTemporal(int* address, System.Runtime.Intrinsics.Vector64<int> value1, System.Runtime.Intrinsics.Vector64<int> value2) { }
             public static unsafe void StorePairScalarNonTemporal(float* address, System.Runtime.Intrinsics.Vector64<float> value1, System.Runtime.Intrinsics.Vector64<float> value2) { }
             public static unsafe void StorePairScalarNonTemporal(uint* address, System.Runtime.Intrinsics.Vector64<uint> value1, System.Runtime.Intrinsics.Vector64<uint> value2) { }
-#if false
-            // Should be disabled until Mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
             public static unsafe void StoreSelectedScalar(byte* address, (System.Runtime.Intrinsics.Vector128<byte> value1, System.Runtime.Intrinsics.Vector128<byte> value2) value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(15))] byte index) { throw null; }
             public static unsafe void StoreSelectedScalar(sbyte* address, (System.Runtime.Intrinsics.Vector128<sbyte> value1, System.Runtime.Intrinsics.Vector128<sbyte> value2) value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(15))] byte index) { throw null; }
             public static unsafe void StoreSelectedScalar(short* address, (System.Runtime.Intrinsics.Vector128<short> value1, System.Runtime.Intrinsics.Vector128<short> value2) value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(7))] byte index) { throw null; }
@@ -3858,7 +3835,6 @@ public static unsafe void StorePairScalarNonTemporal(uint* address, System.Runti
             public static unsafe void StoreVector128x4(ulong* address, (System.Runtime.Intrinsics.Vector128<ulong> Value1, System.Runtime.Intrinsics.Vector128<ulong> Value2, System.Runtime.Intrinsics.Vector128<ulong> Value3, System.Runtime.Intrinsics.Vector128<ulong> Value4) value) { throw null; }
             public static unsafe void StoreVector128x4(float* address, (System.Runtime.Intrinsics.Vector128<float> Value1, System.Runtime.Intrinsics.Vector128<float> Value2, System.Runtime.Intrinsics.Vector128<float> Value3, System.Runtime.Intrinsics.Vector128<float> Value4) value) { throw null; }
             public static unsafe void StoreVector128x4(double* address, (System.Runtime.Intrinsics.Vector128<double> Value1, System.Runtime.Intrinsics.Vector128<double> Value2, System.Runtime.Intrinsics.Vector128<double> Value3, System.Runtime.Intrinsics.Vector128<double> Value4) value) { throw null; }
-#endif
             public static System.Runtime.Intrinsics.Vector128<double> Subtract(System.Runtime.Intrinsics.Vector128<double> left, System.Runtime.Intrinsics.Vector128<double> right) { throw null; }
             public static System.Runtime.Intrinsics.Vector64<byte> SubtractSaturateScalar(System.Runtime.Intrinsics.Vector64<byte> left, System.Runtime.Intrinsics.Vector64<byte> right) { throw null; }
             public static System.Runtime.Intrinsics.Vector64<short> SubtractSaturateScalar(System.Runtime.Intrinsics.Vector64<short> left, System.Runtime.Intrinsics.Vector64<short> right) { throw null; }
@@ -4162,7 +4138,51 @@ internal Sve() { }
             internal Arm64() { }
             public static new bool IsSupported { get { throw null; } }
         }
+
+        public static System.Numerics.Vector<byte> CreateTrueMaskByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; }
+        public static System.Numerics.Vector<double> CreateTrueMaskDouble([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; }
+        public static System.Numerics.Vector<short> CreateTrueMaskInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; }
+        public static System.Numerics.Vector<int> CreateTrueMaskInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; }
+        public static System.Numerics.Vector<long> CreateTrueMaskInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; }
+        public static System.Numerics.Vector<sbyte> CreateTrueMaskSByte([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; }
+        public static System.Numerics.Vector<float> CreateTrueMaskSingle([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; }
+        public static System.Numerics.Vector<ushort> CreateTrueMaskUInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; }
+        public static System.Numerics.Vector<uint> CreateTrueMaskUInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; }
+        public static System.Numerics.Vector<ulong> CreateTrueMaskUInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; }
+
+        public static unsafe System.Numerics.Vector<sbyte> LoadVector(System.Numerics.Vector<sbyte> mask, sbyte* address) { throw null; }
+        public static unsafe System.Numerics.Vector<short> LoadVector(System.Numerics.Vector<short> mask, short* address) { throw null; }
+        public static unsafe System.Numerics.Vector<int> LoadVector(System.Numerics.Vector<int> mask, int* address) { throw null; }
+        public static unsafe System.Numerics.Vector<long> LoadVector(System.Numerics.Vector<long> mask, long* address) { throw null; }
+        public static unsafe System.Numerics.Vector<byte> LoadVector(System.Numerics.Vector<byte> mask, byte* address) { throw null; }
+        public static unsafe System.Numerics.Vector<ushort> LoadVector(System.Numerics.Vector<ushort> mask, ushort* address) { throw null; }
+        public static unsafe System.Numerics.Vector<uint> LoadVector(System.Numerics.Vector<uint> mask, uint* address) { throw null; }
+        public static unsafe System.Numerics.Vector<ulong> LoadVector(System.Numerics.Vector<ulong> mask, ulong* address) { throw null; }
+        public static unsafe System.Numerics.Vector<float> LoadVector(System.Numerics.Vector<float> mask, float* address) { throw null; }
+        public static unsafe System.Numerics.Vector<double> LoadVector(System.Numerics.Vector<double> mask, double* address) { throw null; }
+
     }
+
+    public enum SveMaskPattern : byte
+    {
+        LargestPowerOf2 = 0,      // The largest power of 2.
+        VectorCount1 = 1,         // 1 element.
+        VectorCount2 = 2,         // 2 elements.
+        VectorCount3 = 3,         // 3 elements.
+        VectorCount4 = 4,         // 4 elements.
+        VectorCount5 = 5,         // 5 elements.
+        VectorCount6 = 6,         // 6 elements.
+        VectorCount7 = 7,         // 7 elements.
+        VectorCount8 = 8,         // 8 elements.
+        VectorCount16 = 9,        // 16 elements.
+        VectorCount32 = 10,       // 32 elements.
+        VectorCount64 = 11,       // 64 elements.
+        VectorCount128 = 12,      // 128 elements.
+        VectorCount256 = 13,      // 256 elements.
+        LargestMultipleOf4 = 29,  // The largest multiple of 4.
+        LargestMultipleOf3 = 30,  // The largest multiple of 3.
+        All  = 31                 // All available (implicitly a multiple of two).
+    };
 }
 namespace System.Runtime.Intrinsics.X86
 {
@@ -5119,10 +5139,18 @@ internal Avx512DQ() { }
         public static System.Runtime.Intrinsics.Vector512<double> ConvertToVector512Double(System.Runtime.Intrinsics.Vector512<ulong> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<long> ConvertToVector512Int64(System.Runtime.Intrinsics.Vector512<double> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<long> ConvertToVector512Int64(System.Runtime.Intrinsics.Vector256<float> value) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<float> ConvertToVector256Single(System.Runtime.Intrinsics.Vector512<long> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<float> ConvertToVector256Single(System.Runtime.Intrinsics.Vector512<ulong> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> ConvertToVector512Double(System.Runtime.Intrinsics.Vector512<long> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> ConvertToVector512Double(System.Runtime.Intrinsics.Vector512<ulong> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<long> ConvertToVector512Int64(System.Runtime.Intrinsics.Vector512<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<long> ConvertToVector512Int64(System.Runtime.Intrinsics.Vector256<float> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<long> ConvertToVector512Int64WithTruncation(System.Runtime.Intrinsics.Vector512<double> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<long> ConvertToVector512Int64WithTruncation(System.Runtime.Intrinsics.Vector256<float> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<ulong> ConvertToVector512UInt64(System.Runtime.Intrinsics.Vector512<double> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<ulong> ConvertToVector512UInt64(System.Runtime.Intrinsics.Vector256<float> value) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<ulong> ConvertToVector512UInt64(System.Runtime.Intrinsics.Vector512<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<ulong> ConvertToVector512UInt64(System.Runtime.Intrinsics.Vector256<float> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<ulong> ConvertToVector512UInt64WithTruncation(System.Runtime.Intrinsics.Vector512<double> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<ulong> ConvertToVector512UInt64WithTruncation(System.Runtime.Intrinsics.Vector256<float> value) { throw null; }
         public static new System.Runtime.Intrinsics.Vector128<double> ExtractVector128(System.Runtime.Intrinsics.Vector512<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte index) { throw null; }
@@ -5217,8 +5245,11 @@ internal Avx512F() { }
         public static System.Runtime.Intrinsics.Vector512<int> Add(System.Runtime.Intrinsics.Vector512<int> left, System.Runtime.Intrinsics.Vector512<int> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<long> Add(System.Runtime.Intrinsics.Vector512<long> left, System.Runtime.Intrinsics.Vector512<long> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> Add(System.Runtime.Intrinsics.Vector512<float> left, System.Runtime.Intrinsics.Vector512<float> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> Add(System.Runtime.Intrinsics.Vector512<float> left, System.Runtime.Intrinsics.Vector512<float> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<uint> Add(System.Runtime.Intrinsics.Vector512<uint> left, System.Runtime.Intrinsics.Vector512<uint> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<ulong> Add(System.Runtime.Intrinsics.Vector512<ulong> left, System.Runtime.Intrinsics.Vector512<ulong> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> AddScalar(System.Runtime.Intrinsics.Vector128<float> left, System.Runtime.Intrinsics.Vector128<float> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> AddScalar(System.Runtime.Intrinsics.Vector128<double> left, System.Runtime.Intrinsics.Vector128<double> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<int> AlignRight32(System.Runtime.Intrinsics.Vector512<int> left, System.Runtime.Intrinsics.Vector512<int> right, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte mask) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<uint> AlignRight32(System.Runtime.Intrinsics.Vector512<uint> left, System.Runtime.Intrinsics.Vector512<uint> right, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte mask) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<long> AlignRight64(System.Runtime.Intrinsics.Vector512<long> left, System.Runtime.Intrinsics.Vector512<long> right, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte mask) { throw null; }
@@ -5309,8 +5340,15 @@ internal Avx512F() { }
         public static System.Runtime.Intrinsics.Vector512<float> CompareUnordered(System.Runtime.Intrinsics.Vector512<float> left, System.Runtime.Intrinsics.Vector512<float> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<double> ConvertScalarToVector128Double(System.Runtime.Intrinsics.Vector128<double> upper, uint value) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<float> ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128<float> upper, uint value) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128<float> upper, int value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128<float> upper, uint value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128<float> upper, System.Runtime.Intrinsics.Vector128<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static int ConvertToInt32(System.Runtime.Intrinsics.Vector128<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static int ConvertToInt32(System.Runtime.Intrinsics.Vector128<float> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static uint ConvertToUInt32(System.Runtime.Intrinsics.Vector128<double> value) { throw null; }
         public static uint ConvertToUInt32(System.Runtime.Intrinsics.Vector128<float> value) { throw null; }
+        public static uint ConvertToUInt32(System.Runtime.Intrinsics.Vector128<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static uint ConvertToUInt32(System.Runtime.Intrinsics.Vector128<float> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static uint ConvertToUInt32WithTruncation(System.Runtime.Intrinsics.Vector128<double> value) { throw null; }
         public static uint ConvertToUInt32WithTruncation(System.Runtime.Intrinsics.Vector128<float> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<byte> ConvertToVector128Byte(System.Runtime.Intrinsics.Vector512<int> value) { throw null; }
@@ -5335,15 +5373,18 @@ internal Avx512F() { }
         public static System.Runtime.Intrinsics.Vector256<short> ConvertToVector256Int16(System.Runtime.Intrinsics.Vector512<uint> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<short> ConvertToVector256Int16WithSaturation(System.Runtime.Intrinsics.Vector512<int> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<int> ConvertToVector256Int32(System.Runtime.Intrinsics.Vector512<double> value) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<int> ConvertToVector256Int32(System.Runtime.Intrinsics.Vector512<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<int> ConvertToVector256Int32(System.Runtime.Intrinsics.Vector512<long> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<int> ConvertToVector256Int32(System.Runtime.Intrinsics.Vector512<ulong> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<int> ConvertToVector256Int32WithSaturation(System.Runtime.Intrinsics.Vector512<long> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<int> ConvertToVector256Int32WithTruncation(System.Runtime.Intrinsics.Vector512<double> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<float> ConvertToVector256Single(System.Runtime.Intrinsics.Vector512<double> value) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<float> ConvertToVector256Single(System.Runtime.Intrinsics.Vector512<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<ushort> ConvertToVector256UInt16(System.Runtime.Intrinsics.Vector512<int> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<ushort> ConvertToVector256UInt16(System.Runtime.Intrinsics.Vector512<uint> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<ushort> ConvertToVector256UInt16WithSaturation(System.Runtime.Intrinsics.Vector512<uint> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<uint> ConvertToVector256UInt32(System.Runtime.Intrinsics.Vector512<double> value) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<uint> ConvertToVector256UInt32(System.Runtime.Intrinsics.Vector512<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<uint> ConvertToVector256UInt32(System.Runtime.Intrinsics.Vector512<long> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<uint> ConvertToVector256UInt32(System.Runtime.Intrinsics.Vector512<ulong> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<uint> ConvertToVector256UInt32WithSaturation(System.Runtime.Intrinsics.Vector512<ulong> value) { throw null; }
@@ -5355,6 +5396,7 @@ internal Avx512F() { }
         public static System.Runtime.Intrinsics.Vector512<int> ConvertToVector512Int32(System.Runtime.Intrinsics.Vector256<short> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<int> ConvertToVector512Int32(System.Runtime.Intrinsics.Vector128<sbyte> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<int> ConvertToVector512Int32(System.Runtime.Intrinsics.Vector512<float> value) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<int> ConvertToVector512Int32(System.Runtime.Intrinsics.Vector512<float> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<int> ConvertToVector512Int32(System.Runtime.Intrinsics.Vector256<ushort> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<int> ConvertToVector512Int32WithTruncation(System.Runtime.Intrinsics.Vector512<float> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<long> ConvertToVector512Int64(System.Runtime.Intrinsics.Vector128<byte> value) { throw null; }
@@ -5364,11 +5406,14 @@ internal Avx512F() { }
         public static System.Runtime.Intrinsics.Vector512<long> ConvertToVector512Int64(System.Runtime.Intrinsics.Vector128<ushort> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<long> ConvertToVector512Int64(System.Runtime.Intrinsics.Vector256<uint> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> ConvertToVector512Single(System.Runtime.Intrinsics.Vector512<int> value) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> ConvertToVector512Single(System.Runtime.Intrinsics.Vector512<int> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> ConvertToVector512Single(System.Runtime.Intrinsics.Vector512<uint> value) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> ConvertToVector512Single(System.Runtime.Intrinsics.Vector512<uint> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<uint> ConvertToVector512UInt32(System.Runtime.Intrinsics.Vector128<byte> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<uint> ConvertToVector512UInt32(System.Runtime.Intrinsics.Vector256<short> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<uint> ConvertToVector512UInt32(System.Runtime.Intrinsics.Vector128<sbyte> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<uint> ConvertToVector512UInt32(System.Runtime.Intrinsics.Vector512<float> value) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<uint> ConvertToVector512UInt32(System.Runtime.Intrinsics.Vector512<float> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<uint> ConvertToVector512UInt32(System.Runtime.Intrinsics.Vector256<ushort> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<uint> ConvertToVector512UInt32WithTruncation(System.Runtime.Intrinsics.Vector512<float> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<ulong> ConvertToVector512UInt64(System.Runtime.Intrinsics.Vector128<byte> value) { throw null; }
@@ -5379,6 +5424,10 @@ internal Avx512F() { }
         public static System.Runtime.Intrinsics.Vector512<ulong> ConvertToVector512UInt64(System.Runtime.Intrinsics.Vector256<uint> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> Divide(System.Runtime.Intrinsics.Vector512<float> left, System.Runtime.Intrinsics.Vector512<float> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> Divide(System.Runtime.Intrinsics.Vector512<double> left, System.Runtime.Intrinsics.Vector512<double> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> Divide(System.Runtime.Intrinsics.Vector512<float> left, System.Runtime.Intrinsics.Vector512<float> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> Divide(System.Runtime.Intrinsics.Vector512<double> left, System.Runtime.Intrinsics.Vector512<double> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> DivideScalar(System.Runtime.Intrinsics.Vector128<double> left, System.Runtime.Intrinsics.Vector128<double> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> DivideScalar(System.Runtime.Intrinsics.Vector128<float> left, System.Runtime.Intrinsics.Vector128<float> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> DuplicateEvenIndexed(System.Runtime.Intrinsics.Vector512<float> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> DuplicateEvenIndexed(System.Runtime.Intrinsics.Vector512<double> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> DuplicateOddIndexed(System.Runtime.Intrinsics.Vector512<float> value) { throw null; }
@@ -5407,17 +5456,37 @@ internal Avx512F() { }
         public static System.Runtime.Intrinsics.Vector128<double> FixupScalar(System.Runtime.Intrinsics.Vector128<double> left, System.Runtime.Intrinsics.Vector128<double> right, System.Runtime.Intrinsics.Vector128<long> table, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte control) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<float> FixupScalar(System.Runtime.Intrinsics.Vector128<float> left, System.Runtime.Intrinsics.Vector128<float> right, System.Runtime.Intrinsics.Vector128<int> table, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte control) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> FusedMultiplyAdd(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> FusedMultiplyAdd(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> FusedMultiplyAdd(System.Runtime.Intrinsics.Vector512<float> a, System.Runtime.Intrinsics.Vector512<float> b, System.Runtime.Intrinsics.Vector512<float> c) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> FusedMultiplyAdd(System.Runtime.Intrinsics.Vector512<float> a, System.Runtime.Intrinsics.Vector512<float> b, System.Runtime.Intrinsics.Vector512<float> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> FusedMultiplyAddScalar(System.Runtime.Intrinsics.Vector128<double> a, System.Runtime.Intrinsics.Vector128<double> b, System.Runtime.Intrinsics.Vector128<double> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> FusedMultiplyAddScalar(System.Runtime.Intrinsics.Vector128<float> a, System.Runtime.Intrinsics.Vector128<float> b, System.Runtime.Intrinsics.Vector128<float> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> FusedMultiplyAddNegated(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> FusedMultiplyAddNegated(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> FusedMultiplyAddNegated(System.Runtime.Intrinsics.Vector512<float> a, System.Runtime.Intrinsics.Vector512<float> b, System.Runtime.Intrinsics.Vector512<float> c) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> FusedMultiplyAddNegated(System.Runtime.Intrinsics.Vector512<float> a, System.Runtime.Intrinsics.Vector512<float> b, System.Runtime.Intrinsics.Vector512<float> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> FusedMultiplyAddNegatedScalar(System.Runtime.Intrinsics.Vector128<double> a, System.Runtime.Intrinsics.Vector128<double> b, System.Runtime.Intrinsics.Vector128<double> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> FusedMultiplyAddNegatedScalar(System.Runtime.Intrinsics.Vector128<float> a, System.Runtime.Intrinsics.Vector128<float> b, System.Runtime.Intrinsics.Vector128<float> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> FusedMultiplyAddSubtract(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> FusedMultiplyAddSubtract(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> FusedMultiplyAddSubtract(System.Runtime.Intrinsics.Vector512<float> a, System.Runtime.Intrinsics.Vector512<float> b, System.Runtime.Intrinsics.Vector512<float> c) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> FusedMultiplyAddSubtract(System.Runtime.Intrinsics.Vector512<float> a, System.Runtime.Intrinsics.Vector512<float> b, System.Runtime.Intrinsics.Vector512<float> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> FusedMultiplySubtract(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> FusedMultiplySubtract(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> FusedMultiplySubtract(System.Runtime.Intrinsics.Vector512<float> a, System.Runtime.Intrinsics.Vector512<float> b, System.Runtime.Intrinsics.Vector512<float> c) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> FusedMultiplySubtract(System.Runtime.Intrinsics.Vector512<float> a, System.Runtime.Intrinsics.Vector512<float> b, System.Runtime.Intrinsics.Vector512<float> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> FusedMultiplySubtractScalar(System.Runtime.Intrinsics.Vector128<double> a, System.Runtime.Intrinsics.Vector128<double> b, System.Runtime.Intrinsics.Vector128<double> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> FusedMultiplySubtractScalar(System.Runtime.Intrinsics.Vector128<float> a, System.Runtime.Intrinsics.Vector128<float> b, System.Runtime.Intrinsics.Vector128<float> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> FusedMultiplySubtractAdd(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> FusedMultiplySubtractAdd(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> FusedMultiplySubtractAdd(System.Runtime.Intrinsics.Vector512<float> a, System.Runtime.Intrinsics.Vector512<float> b, System.Runtime.Intrinsics.Vector512<float> c) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> FusedMultiplySubtractAdd(System.Runtime.Intrinsics.Vector512<float> a, System.Runtime.Intrinsics.Vector512<float> b, System.Runtime.Intrinsics.Vector512<float> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> FusedMultiplySubtractNegated(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> FusedMultiplySubtractNegated(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> FusedMultiplySubtractNegated(System.Runtime.Intrinsics.Vector512<float> a, System.Runtime.Intrinsics.Vector512<float> b, System.Runtime.Intrinsics.Vector512<float> c) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> FusedMultiplySubtractNegated(System.Runtime.Intrinsics.Vector512<float> a, System.Runtime.Intrinsics.Vector512<float> b, System.Runtime.Intrinsics.Vector512<float> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> FusedMultiplySubtractNegatedScalar(System.Runtime.Intrinsics.Vector128<double> a, System.Runtime.Intrinsics.Vector128<double> b, System.Runtime.Intrinsics.Vector128<double> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> FusedMultiplySubtractNegatedScalar(System.Runtime.Intrinsics.Vector128<float> a, System.Runtime.Intrinsics.Vector128<float> b, System.Runtime.Intrinsics.Vector128<float> c, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> GetExponent(System.Runtime.Intrinsics.Vector512<double> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> GetExponent(System.Runtime.Intrinsics.Vector512<float> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<double> GetExponentScalar(System.Runtime.Intrinsics.Vector128<double> value) { throw null; }
@@ -5494,6 +5563,10 @@ internal Avx512F() { }
         public static System.Runtime.Intrinsics.Vector512<ulong> Multiply(System.Runtime.Intrinsics.Vector512<uint> left, System.Runtime.Intrinsics.Vector512<uint> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> Multiply(System.Runtime.Intrinsics.Vector512<float> left, System.Runtime.Intrinsics.Vector512<float> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> Multiply(System.Runtime.Intrinsics.Vector512<double> left, System.Runtime.Intrinsics.Vector512<double> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> Multiply(System.Runtime.Intrinsics.Vector512<float> left, System.Runtime.Intrinsics.Vector512<float> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> Multiply(System.Runtime.Intrinsics.Vector512<double> left, System.Runtime.Intrinsics.Vector512<double> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> MultiplyScalar(System.Runtime.Intrinsics.Vector128<double> left, System.Runtime.Intrinsics.Vector128<double> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> MultiplyScalar(System.Runtime.Intrinsics.Vector128<float> left, System.Runtime.Intrinsics.Vector128<float> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<int> MultiplyLow(System.Runtime.Intrinsics.Vector512<int> left, System.Runtime.Intrinsics.Vector512<int> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<uint> MultiplyLow(System.Runtime.Intrinsics.Vector512<uint> left, System.Runtime.Intrinsics.Vector512<uint> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<byte> Or(System.Runtime.Intrinsics.Vector512<byte> left, System.Runtime.Intrinsics.Vector512<byte> right) { throw null; }
@@ -5559,8 +5632,12 @@ internal Avx512F() { }
         public static System.Runtime.Intrinsics.Vector128<float> RoundScaleScalar(System.Runtime.Intrinsics.Vector128<float> upper, System.Runtime.Intrinsics.Vector128<float> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte control) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> Scale(System.Runtime.Intrinsics.Vector512<double> left, System.Runtime.Intrinsics.Vector512<double> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> Scale(System.Runtime.Intrinsics.Vector512<float> left, System.Runtime.Intrinsics.Vector512<float> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> Scale(System.Runtime.Intrinsics.Vector512<double> left, System.Runtime.Intrinsics.Vector512<double> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> Scale(System.Runtime.Intrinsics.Vector512<float> left, System.Runtime.Intrinsics.Vector512<float> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<double> ScaleScalar(System.Runtime.Intrinsics.Vector128<double> left, System.Runtime.Intrinsics.Vector128<double> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<float> ScaleScalar(System.Runtime.Intrinsics.Vector128<float> left, System.Runtime.Intrinsics.Vector128<float> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> ScaleScalar(System.Runtime.Intrinsics.Vector128<double> left, System.Runtime.Intrinsics.Vector128<double> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> ScaleScalar(System.Runtime.Intrinsics.Vector128<float> left, System.Runtime.Intrinsics.Vector128<float> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<int> ShiftLeftLogical(System.Runtime.Intrinsics.Vector512<int> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte count) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<int> ShiftLeftLogical(System.Runtime.Intrinsics.Vector512<int> value, System.Runtime.Intrinsics.Vector128<int> count) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<long> ShiftLeftLogical(System.Runtime.Intrinsics.Vector512<long> value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte count) { throw null; }
@@ -5603,6 +5680,10 @@ internal Avx512F() { }
         public static System.Runtime.Intrinsics.Vector512<ulong> Shuffle4x128(System.Runtime.Intrinsics.Vector512<ulong> left, System.Runtime.Intrinsics.Vector512<ulong> right, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte control) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<float> Sqrt(System.Runtime.Intrinsics.Vector512<float> value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> Sqrt(System.Runtime.Intrinsics.Vector512<double> value) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> Sqrt(System.Runtime.Intrinsics.Vector512<float> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> Sqrt(System.Runtime.Intrinsics.Vector512<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> SqrtScalar(System.Runtime.Intrinsics.Vector128<double> upper, System.Runtime.Intrinsics.Vector128<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> SqrtScalar(System.Runtime.Intrinsics.Vector128<float> upper, System.Runtime.Intrinsics.Vector128<float> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static unsafe void Store(byte* address, System.Runtime.Intrinsics.Vector512<byte> source) { }
         public static unsafe void Store(double* address, System.Runtime.Intrinsics.Vector512<double> source) { }
         public static unsafe void Store(short* address, System.Runtime.Intrinsics.Vector512<short> source) { }
@@ -5639,6 +5720,10 @@ public static unsafe void StoreAlignedNonTemporal(ulong* address, System.Runtime
         public static System.Runtime.Intrinsics.Vector512<float> Subtract(System.Runtime.Intrinsics.Vector512<float> left, System.Runtime.Intrinsics.Vector512<float> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<uint> Subtract(System.Runtime.Intrinsics.Vector512<uint> left, System.Runtime.Intrinsics.Vector512<uint> right) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<ulong> Subtract(System.Runtime.Intrinsics.Vector512<ulong> left, System.Runtime.Intrinsics.Vector512<ulong> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> Subtract(System.Runtime.Intrinsics.Vector512<float> left, System.Runtime.Intrinsics.Vector512<float> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> Subtract(System.Runtime.Intrinsics.Vector512<double> left, System.Runtime.Intrinsics.Vector512<double> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> SubtractScalar(System.Runtime.Intrinsics.Vector128<float> left, System.Runtime.Intrinsics.Vector128<float> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> SubtractScalar(System.Runtime.Intrinsics.Vector128<double> left, System.Runtime.Intrinsics.Vector128<double> right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<byte> TernaryLogic(System.Runtime.Intrinsics.Vector512<byte> a, System.Runtime.Intrinsics.Vector512<byte> b, System.Runtime.Intrinsics.Vector512<byte> c, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte control) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> TernaryLogic(System.Runtime.Intrinsics.Vector512<double> a, System.Runtime.Intrinsics.Vector512<double> b, System.Runtime.Intrinsics.Vector512<double> c, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte control) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<int> TernaryLogic(System.Runtime.Intrinsics.Vector512<int> a, System.Runtime.Intrinsics.Vector512<int> b, System.Runtime.Intrinsics.Vector512<int> c, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte control) { throw null; }
@@ -5912,9 +5997,17 @@ internal VL() { }
             internal X64() { }
             public static new bool IsSupported { get { throw null; } }
             public static System.Runtime.Intrinsics.Vector128<double> ConvertScalarToVector128Double(System.Runtime.Intrinsics.Vector128<double> upper, ulong value) { throw null; }
-            public static System.Runtime.Intrinsics.Vector128<float> ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128<float> upper, ulong value) { throw null; }
+            public static System.Runtime.Intrinsics.Vector128<float> ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128<float> upper, ulong value) { throw null; }            
+            public static System.Runtime.Intrinsics.Vector128<double> ConvertScalarToVector128Double(System.Runtime.Intrinsics.Vector128<double> upper, ulong value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+            public static System.Runtime.Intrinsics.Vector128<float> ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128<float> upper, ulong value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+            public static System.Runtime.Intrinsics.Vector128<double> ConvertScalarToVector128Double(System.Runtime.Intrinsics.Vector128<double> upper, long value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+            public static System.Runtime.Intrinsics.Vector128<float> ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128<float> upper, long value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+            public static long ConvertToInt64(System.Runtime.Intrinsics.Vector128<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+            public static long ConvertToInt64(System.Runtime.Intrinsics.Vector128<float> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
             public static ulong ConvertToUInt64(System.Runtime.Intrinsics.Vector128<double> value) { throw null; }
             public static ulong ConvertToUInt64(System.Runtime.Intrinsics.Vector128<float> value) { throw null; }
+            public static ulong ConvertToUInt64(System.Runtime.Intrinsics.Vector128<double> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
+            public static ulong ConvertToUInt64(System.Runtime.Intrinsics.Vector128<float> value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; }
             public static ulong ConvertToUInt64WithTruncation(System.Runtime.Intrinsics.Vector128<double> value) { throw null; }
             public static ulong ConvertToUInt64WithTruncation(System.Runtime.Intrinsics.Vector128<float> value) { throw null; }
         }
diff --git a/src/libraries/System.Runtime.Loader/tests/AssemblyExtensionsTest.cs b/src/libraries/System.Runtime.Loader/tests/AssemblyExtensionsTest.cs
new file mode 100644
index 000000000000..71a4cb434583
--- /dev/null
+++ b/src/libraries/System.Runtime.Loader/tests/AssemblyExtensionsTest.cs
@@ -0,0 +1,32 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Xunit;
+using System.Reflection.Metadata;
+using System.Reflection;
+
+namespace System.Runtime.Loader.Tests
+{
+    public unsafe class AssemblyExtensionsTest
+    {
+        [Fact]
+        public void TryGetRawMetadata()
+        {
+            bool supportsRawMetadata = PlatformDetection.IsNotMonoRuntime && PlatformDetection.IsNotNativeAot;
+
+            Assembly assembly = typeof(AssemblyExtensionsTest).Assembly;
+            bool hasMetadata = assembly.TryGetRawMetadata(out byte* blob, out int length);
+
+            Assert.Equal(supportsRawMetadata, hasMetadata);
+            Assert.Equal(supportsRawMetadata, blob != null);
+            Assert.Equal(supportsRawMetadata, length > 0);
+
+            if (supportsRawMetadata)
+            {
+                var metadataReader = new MetadataReader(blob, length);
+                string assemblyName = metadataReader.GetString(metadataReader.GetAssemblyDefinition().Name);
+                Assert.Equal(assembly.GetName().Name, assemblyName);
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Runtime.Loader/tests/System.Runtime.Loader.Tests.csproj b/src/libraries/System.Runtime.Loader/tests/System.Runtime.Loader.Tests.csproj
index 0ad5f15337a6..1874724006ee 100644
--- a/src/libraries/System.Runtime.Loader/tests/System.Runtime.Loader.Tests.csproj
+++ b/src/libraries/System.Runtime.Loader/tests/System.Runtime.Loader.Tests.csproj
@@ -2,6 +2,7 @@
   <PropertyGroup>
     <RootNamespace>System.Runtime.Loader.Tests</RootNamespace>
     <TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <TestRuntime>true</TestRuntime>
     <IncludeRemoteExecutor>true</IncludeRemoteExecutor>
     <!-- Some tests rely on no deps.json file being present. -->
@@ -16,6 +17,7 @@
   <ItemGroup>
     <Compile Include="ApplyUpdateTest.cs" />
     <Compile Include="ApplyUpdateUtil.cs" />
+    <Compile Include="AssemblyExtensionsTest.cs" />
     <Compile Include="AssemblyLoadContextTest.cs" />
     <Compile Include="CollectibleAssemblyLoadContextTest.cs" />
     <Compile Include="ContextualReflection.cs" />
diff --git a/src/libraries/System.Runtime.Numerics/src/System.Runtime.Numerics.csproj b/src/libraries/System.Runtime.Numerics/src/System.Runtime.Numerics.csproj
index d6ec3b9ae50f..d2003757c60f 100644
--- a/src/libraries/System.Runtime.Numerics/src/System.Runtime.Numerics.csproj
+++ b/src/libraries/System.Runtime.Numerics/src/System.Runtime.Numerics.csproj
@@ -21,17 +21,18 @@
     <Compile Include="System\Number.Polyfill.cs" />
     <Compile Include="System\Numerics\NumericsHelpers.cs" />
     <Compile Include="System\Numerics\Complex.cs" />
-    <Compile Include="System\Globalization\FormatProvider.BigInteger.cs" />
-    <Compile Include="System\Globalization\FormatProvider.Number.cs" />
-    <Compile Include="System\Globalization\FormatProvider.NumberBuffer.cs" />
     <Compile Include="Properties\InternalsVisibleTo.cs" />
   </ItemGroup>
 
   <ItemGroup>
+    <Compile Include="$(CoreLibSharedDir)System\Collections\Generic\ValueListBuilder.cs"
+             Link="CoreLib\System\Collections\Generic\ValueListBuilder.cs" />
     <Compile Include="$(CommonPath)System\Text\ValueStringBuilder.cs"
              Link="CoreLib\System\Text\ValueStringBuilder.cs" />
     <Compile Include="$(CommonPath)System\HexConverter.cs"
              Link="Common\System\HexConverter.cs" />
+    <Compile Include="$(CommonPath)System\Number.Formatting.Common.cs"
+             Link="Common\System\Number.Formatting.Common.cs" />
     <Compile Include="$(CommonPath)System\Number.NumberBuffer.cs"
              Link="Common\System\Number.NumberBuffer.cs" />
     <Compile Include="$(CommonPath)System\Number.Parsing.Common.cs"
diff --git a/src/libraries/System.Runtime.Numerics/src/System/Globalization/FormatProvider.BigInteger.cs b/src/libraries/System.Runtime.Numerics/src/System/Globalization/FormatProvider.BigInteger.cs
deleted file mode 100644
index a81f40c844cc..000000000000
--- a/src/libraries/System.Runtime.Numerics/src/System/Globalization/FormatProvider.BigInteger.cs
+++ /dev/null
@@ -1,35 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Text;
-
-namespace System.Globalization
-{
-    internal static partial class FormatProvider
-    {
-        internal static void FormatBigInteger(ref ValueStringBuilder sb, int precision, int scale, bool sign, ReadOnlySpan<char> format, NumberFormatInfo numberFormatInfo, char[] digits, int startIndex)
-        {
-            unsafe
-            {
-                fixed (char* overrideDigits = digits)
-                {
-                    Number.NumberBuffer numberBuffer = default;
-                    numberBuffer.overrideDigits = overrideDigits + startIndex;
-                    numberBuffer.precision = precision;
-                    numberBuffer.scale = scale;
-                    numberBuffer.sign = sign;
-
-                    char fmt = Number.ParseFormatSpecifier(format, out int maxDigits);
-                    if (fmt != 0)
-                    {
-                        Number.NumberToString(ref sb, ref numberBuffer, fmt, maxDigits, numberFormatInfo, isDecimal: false);
-                    }
-                    else
-                    {
-                        Number.NumberToStringFormat(ref sb, ref numberBuffer, format, numberFormatInfo);
-                    }
-                }
-            }
-        }
-    }
-}
diff --git a/src/libraries/System.Runtime.Numerics/src/System/Globalization/FormatProvider.Number.cs b/src/libraries/System.Runtime.Numerics/src/System/Globalization/FormatProvider.Number.cs
deleted file mode 100644
index 49706430b574..000000000000
--- a/src/libraries/System.Runtime.Numerics/src/System/Globalization/FormatProvider.Number.cs
+++ /dev/null
@@ -1,1352 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-using System.Runtime.InteropServices;
-using System.Text;
-
-namespace System.Globalization
-{
-    internal partial class FormatProvider
-    {
-        // The Number class implements methods for formatting and parsing
-        // numeric values. To format and parse numeric values, applications should
-        // use the Format and Parse methods provided by the numeric
-        // classes (Byte, Int16, Int32, Int64,
-        // Single, Double, Currency, and Decimal). Those
-        // Format and Parse methods share a common implementation
-        // provided by this class, and are thus documented in detail here.
-        //
-        // Formatting
-        //
-        // The Format methods provided by the numeric classes are all of the
-        // form
-        //
-        //  public static String Format(XXX value, String format);
-        //  public static String Format(XXX value, String format, NumberFormatInfo info);
-        //
-        // where XXX is the name of the particular numeric class. The methods convert
-        // the numeric value to a string using the format string given by the
-        // format parameter. If the format parameter is null or
-        // an empty string, the number is formatted as if the string "G" (general
-        // format) was specified. The info parameter specifies the
-        // NumberFormatInfo instance to use when formatting the number. If the
-        // info parameter is null or omitted, the numeric formatting information
-        // is obtained from the current culture. The NumberFormatInfo supplies
-        // such information as the characters to use for decimal and thousand
-        // separators, and the spelling and placement of currency symbols in monetary
-        // values.
-        //
-        // Format strings fall into two categories: Standard format strings and
-        // user-defined format strings. A format string consisting of a single
-        // alphabetic character (A-Z or a-z), optionally followed by a sequence of
-        // digits (0-9), is a standard format string. All other format strings are
-        // used-defined format strings.
-        //
-        // A standard format string takes the form Axx, where A is an
-        // alphabetic character called the format specifier and xx is a
-        // sequence of digits called the precision specifier. The format
-        // specifier controls the type of formatting applied to the number and the
-        // precision specifier controls the number of significant digits or decimal
-        // places of the formatting operation. The following table describes the
-        // supported standard formats.
-        //
-        // C c - Currency format. The number is
-        // converted to a string that represents a currency amount. The conversion is
-        // controlled by the currency format information of the NumberFormatInfo
-        // used to format the number. The precision specifier indicates the desired
-        // number of decimal places. If the precision specifier is omitted, the default
-        // currency precision given by the NumberFormatInfo is used.
-        //
-        // D d - Decimal format. This format is
-        // supported for integral types only. The number is converted to a string of
-        // decimal digits, prefixed by a minus sign if the number is negative. The
-        // precision specifier indicates the minimum number of digits desired in the
-        // resulting string. If required, the number will be left-padded with zeros to
-        // produce the number of digits given by the precision specifier.
-        //
-        // E e Engineering (scientific) format.
-        // The number is converted to a string of the form
-        // "-d.ddd...E+ddd" or "-d.ddd...e+ddd", where each
-        // 'd' indicates a digit (0-9). The string starts with a minus sign if the
-        // number is negative, and one digit always precedes the decimal point. The
-        // precision specifier indicates the desired number of digits after the decimal
-        // point. If the precision specifier is omitted, a default of 6 digits after
-        // the decimal point is used. The format specifier indicates whether to prefix
-        // the exponent with an 'E' or an 'e'. The exponent is always consists of a
-        // plus or minus sign and three digits.
-        //
-        // F f Fixed point format. The number is
-        // converted to a string of the form "-ddd.ddd....", where each
-        // 'd' indicates a digit (0-9). The string starts with a minus sign if the
-        // number is negative. The precision specifier indicates the desired number of
-        // decimal places. If the precision specifier is omitted, the default numeric
-        // precision given by the NumberFormatInfo is used.
-        //
-        // G g - General format. The number is
-        // converted to the shortest possible decimal representation using fixed point
-        // or scientific format. The precision specifier determines the number of
-        // significant digits in the resulting string. If the precision specifier is
-        // omitted, the number of significant digits is determined by the type of the
-        // number being converted (10 for int, 19 for long, 7 for
-        // float, 15 for double, 19 for Currency, and 29 for
-        // Decimal). Trailing zeros after the decimal point are removed, and the
-        // resulting string contains a decimal point only if required. The resulting
-        // string uses fixed point format if the exponent of the number is less than
-        // the number of significant digits and greater than or equal to -4. Otherwise,
-        // the resulting string uses scientific format, and the case of the format
-        // specifier controls whether the exponent is prefixed with an 'E' or an
-        // 'e'.
-        //
-        // N n Number format. The number is
-        // converted to a string of the form "-d,ddd,ddd.ddd....", where
-        // each 'd' indicates a digit (0-9). The string starts with a minus sign if the
-        // number is negative. Thousand separators are inserted between each group of
-        // three digits to the left of the decimal point. The precision specifier
-        // indicates the desired number of decimal places. If the precision specifier
-        // is omitted, the default numeric precision given by the
-        // NumberFormatInfo is used.
-        //
-        // X x - Hexadecimal format. This format is
-        // supported for integral types only. The number is converted to a string of
-        // hexadecimal digits. The format specifier indicates whether to use upper or
-        // lower case characters for the hexadecimal digits above 9 ('X' for 'ABCDEF',
-        // and 'x' for 'abcdef'). The precision specifier indicates the minimum number
-        // of digits desired in the resulting string. If required, the number will be
-        // left-padded with zeros to produce the number of digits given by the
-        // precision specifier.
-        //
-        // Some examples of standard format strings and their results are shown in the
-        // table below. (The examples all assume a default NumberFormatInfo.)
-        //
-        // Value        Format  Result
-        // 12345.6789   C       $12,345.68
-        // -12345.6789  C       ($12,345.68)
-        // 12345        D       12345
-        // 12345        D8      00012345
-        // 12345.6789   E       1.234568E+004
-        // 12345.6789   E10     1.2345678900E+004
-        // 12345.6789   e4      1.2346e+004
-        // 12345.6789   F       12345.68
-        // 12345.6789   F0      12346
-        // 12345.6789   F6      12345.678900
-        // 12345.6789   G       12345.6789
-        // 12345.6789   G7      12345.68
-        // 123456789    G7      1.234568E8
-        // 12345.6789   N       12,345.68
-        // 123456789    N4      123,456,789.0000
-        // 0x2c45e      x       2c45e
-        // 0x2c45e      X       2C45E
-        // 0x2c45e      X8      0002C45E
-        //
-        // Format strings that do not start with an alphabetic character, or that start
-        // with an alphabetic character followed by a non-digit, are called
-        // user-defined format strings. The following table describes the formatting
-        // characters that are supported in user defined format strings.
-        //
-        //
-        // 0 - Digit placeholder. If the value being
-        // formatted has a digit in the position where the '0' appears in the format
-        // string, then that digit is copied to the output string. Otherwise, a '0' is
-        // stored in that position in the output string. The position of the leftmost
-        // '0' before the decimal point and the rightmost '0' after the decimal point
-        // determines the range of digits that are always present in the output
-        // string.
-        //
-        // # - Digit placeholder. If the value being
-        // formatted has a digit in the position where the '#' appears in the format
-        // string, then that digit is copied to the output string. Otherwise, nothing
-        // is stored in that position in the output string.
-        //
-        // . - Decimal point. The first '.' character
-        // in the format string determines the location of the decimal separator in the
-        // formatted value; any additional '.' characters are ignored. The actual
-        // character used as a the decimal separator in the output string is given by
-        // the NumberFormatInfo used to format the number.
-        //
-        // , - Thousand separator and number scaling.
-        // The ',' character serves two purposes. First, if the format string contains
-        // a ',' character between two digit placeholders (0 or #) and to the left of
-        // the decimal point if one is present, then the output will have thousand
-        // separators inserted between each group of three digits to the left of the
-        // decimal separator. The actual character used as a the decimal separator in
-        // the output string is given by the NumberFormatInfo used to format the
-        // number. Second, if the format string contains one or more ',' characters
-        // immediately to the left of the decimal point, or after the last digit
-        // placeholder if there is no decimal point, then the number will be divided by
-        // 1000 times the number of ',' characters before it is formatted. For example,
-        // the format string '0,,' will represent 100 million as just 100. Use of the
-        // ',' character to indicate scaling does not also cause the formatted number
-        // to have thousand separators. Thus, to scale a number by 1 million and insert
-        // thousand separators you would use the format string '#,##0,,'.
-        //
-        // % - Percentage placeholder. The presence of
-        // a '%' character in the format string causes the number to be multiplied by
-        // 100 before it is formatted. The '%' character itself is inserted in the
-        // output string where it appears in the format string.
-        //
-        // E+ E- e+ e-   - Scientific notation.
-        // If any of the strings 'E+', 'E-', 'e+', or 'e-' are present in the format
-        // string and are immediately followed by at least one '0' character, then the
-        // number is formatted using scientific notation with an 'E' or 'e' inserted
-        // between the number and the exponent. The number of '0' characters following
-        // the scientific notation indicator determines the minimum number of digits to
-        // output for the exponent. The 'E+' and 'e+' formats indicate that a sign
-        // character (plus or minus) should always precede the exponent. The 'E-' and
-        // 'e-' formats indicate that a sign character should only precede negative
-        // exponents.
-        //
-        // \ - Literal character. A backslash character
-        // causes the next character in the format string to be copied to the output
-        // string as-is. The backslash itself isn't copied, so to place a backslash
-        // character in the output string, use two backslashes (\\) in the format
-        // string.
-        //
-        // 'ABC' "ABC" - Literal string. Characters
-        // enclosed in single or double quotation marks are copied to the output string
-        // as-is and do not affect formatting.
-        //
-        // ; - Section separator. The ';' character is
-        // used to separate sections for positive, negative, and zero numbers in the
-        // format string.
-        //
-        // Other - All other characters are copied to
-        // the output string in the position they appear.
-        //
-        // For fixed point formats (formats not containing an 'E+', 'E-', 'e+', or
-        // 'e-'), the number is rounded to as many decimal places as there are digit
-        // placeholders to the right of the decimal point. If the format string does
-        // not contain a decimal point, the number is rounded to the nearest
-        // integer. If the number has more digits than there are digit placeholders to
-        // the left of the decimal point, the extra digits are copied to the output
-        // string immediately before the first digit placeholder.
-        //
-        // For scientific formats, the number is rounded to as many significant digits
-        // as there are digit placeholders in the format string.
-        //
-        // To allow for different formatting of positive, negative, and zero values, a
-        // user-defined format string may contain up to three sections separated by
-        // semicolons. The results of having one, two, or three sections in the format
-        // string are described in the table below.
-        //
-        // Sections:
-        //
-        // One - The format string applies to all values.
-        //
-        // Two - The first section applies to positive values
-        // and zeros, and the second section applies to negative values. If the number
-        // to be formatted is negative, but becomes zero after rounding according to
-        // the format in the second section, then the resulting zero is formatted
-        // according to the first section.
-        //
-        // Three - The first section applies to positive
-        // values, the second section applies to negative values, and the third section
-        // applies to zeros. The second section may be left empty (by having no
-        // characters between the semicolons), in which case the first section applies
-        // to all non-zero values. If the number to be formatted is non-zero, but
-        // becomes zero after rounding according to the format in the first or second
-        // section, then the resulting zero is formatted according to the third
-        // section.
-        //
-        // For both standard and user-defined formatting operations on values of type
-        // float and double, if the value being formatted is a NaN (Not
-        // a Number) or a positive or negative infinity, then regardless of the format
-        // string, the resulting string is given by the NaNSymbol,
-        // PositiveInfinitySymbol, or NegativeInfinitySymbol property of
-        // the NumberFormatInfo used to format the number.
-        //
-        // Parsing
-        //
-        // The Parse methods provided by the numeric classes are all of the form
-        //
-        //  public static XXX Parse(String s);
-        //  public static XXX Parse(String s, int style);
-        //  public static XXX Parse(String s, int style, NumberFormatInfo info);
-        //
-        // where XXX is the name of the particular numeric class. The methods convert a
-        // string to a numeric value. The optional style parameter specifies the
-        // permitted style of the numeric string. It must be a combination of bit flags
-        // from the NumberStyles enumeration. The optional info parameter
-        // specifies the NumberFormatInfo instance to use when parsing the
-        // string. If the info parameter is null or omitted, the numeric
-        // formatting information is obtained from the current culture.
-        //
-        // Numeric strings produced by the Format methods using the Currency,
-        // Decimal, Engineering, Fixed point, General, or Number standard formats
-        // (the C, D, E, F, G, and N format specifiers) are guaranteed to be parseable
-        // by the Parse methods if the NumberStyles.Any style is
-        // specified. Note, however, that the Parse methods do not accept
-        // NaNs or Infinities.
-        //
-        // This class contains only static members and does not need to be serializable
-
-        private static partial class Number
-        {
-            internal const int DECIMAL_PRECISION = 29; // Decimal.DecCalc also uses this value
-
-            // **********************************************************************************************************
-            //
-            // The remaining code in this module is an almost direct translation from the original unmanaged version in
-            // the CLR. The code uses NumberBuffer directly instead of an analog of the NUMBER unmanaged data structure
-            // but this causes next to no differences since we've modified NumberBuffer to take account of the changes (it
-            // has an inline array of digits and no need of a pack operation to prepare for use by the "unmanaged" code).
-            //
-            // Some minor cleanup has been done (e.g. taking advantage of StringBuilder instead of having to precompute
-            // string buffer sizes) but there's still plenty of opportunity to further C#'ize this code and potentially
-            // better unify it with the code above.
-            //
-
-            private static readonly string[] s_posCurrencyFormats =
-            {
-                "$#", "#$", "$ #", "# $"
-            };
-
-            private static readonly string[] s_negCurrencyFormats =
-            {
-                "($#)", "-$#", "$-#", "$#-",
-                "(#$)", "-#$", "#-$", "#$-",
-                "-# $", "-$ #", "# $-", "$ #-",
-                "$ -#", "#- $", "($ #)", "(# $)",
-                "$- #"
-            };
-
-            private static readonly string[] s_posPercentFormats =
-            {
-                "# %", "#%", "%#", "% #"
-            };
-
-            private static readonly string[] s_negPercentFormats =
-            {
-                "-# %", "-#%", "-%#",
-                "%-#", "%#-",
-                "#-%", "#%-",
-                "-% #", "# %-", "% #-",
-                "% -#", "#- %"
-            };
-
-            private static readonly string[] s_negNumberFormats =
-            {
-                "(#)", "-#", "- #", "#-", "# -",
-            };
-
-            private const string PosNumberFormat = "#";
-
-            internal static unsafe void Int32ToDecChars(char* buffer, ref int index, uint value, int digits)
-            {
-                while (--digits >= 0 || value != 0)
-                {
-                    buffer[--index] = (char)(value % 10 + '0');
-                    value /= 10;
-                }
-            }
-
-            internal static char ParseFormatSpecifier(ReadOnlySpan<char> format, out int digits)
-            {
-                char c = default;
-                if (format.Length > 0)
-                {
-                    // If the format begins with a symbol, see if it's a standard format
-                    // with or without a specified number of digits.
-                    c = format[0];
-                    if (char.IsAsciiLetter(c))
-                    {
-                        // Fast path for sole symbol, e.g. "D"
-                        if (format.Length == 1)
-                        {
-                            digits = -1;
-                            return c;
-                        }
-
-                        if (format.Length == 2)
-                        {
-                            // Fast path for symbol and single digit, e.g. "X4"
-                            int d = format[1] - '0';
-                            if ((uint)d < 10)
-                            {
-                                digits = d;
-                                return c;
-                            }
-                        }
-                        else if (format.Length == 3)
-                        {
-                            // Fast path for symbol and double digit, e.g. "F12"
-                            int d1 = format[1] - '0', d2 = format[2] - '0';
-                            if ((uint)d1 < 10 && (uint)d2 < 10)
-                            {
-                                digits = d1 * 10 + d2;
-                                return c;
-                            }
-                        }
-
-                        // Fallback for symbol and any length digits.  The digits value must be >= 0 && <= 999_999_999,
-                        // but it can begin with any number of 0s, and thus we may need to check more than 9
-                        // digits.  Further, for compat, we need to stop when we hit a null char.
-                        int n = 0;
-                        int i = 1;
-                        while ((uint)i < (uint)format.Length && char.IsAsciiDigit(format[i]))
-                        {
-                            // Check if we are about to overflow past our limit of 9 digits
-                            if (n >= 100_000_000)
-                            {
-                                throw new FormatException(SR.Argument_BadFormatSpecifier);
-                            }
-                            n = ((n * 10) + format[i++] - '0');
-                        }
-
-                        // If we're at the end of the digits rather than having stopped because we hit something
-                        // other than a digit or overflowed, return the standard format info.
-                        if (i == format.Length || format[i] == '\0')
-                        {
-                            digits = n;
-                            return c;
-                        }
-                    }
-                }
-
-                // Default empty format to be "G"; custom format is signified with '\0'.
-                digits = -1;
-                return format.Length == 0 || c == '\0' ? // For compat, treat '\0' as the end of the specifier, even if the specifier extends beyond it.
-                    'G' :
-                    '\0';
-            }
-
-            internal static unsafe void NumberToString(ref ValueStringBuilder sb, scoped ref NumberBuffer number, char format, int nMaxDigits, NumberFormatInfo info, bool isDecimal)
-            {
-                int nMinDigits = -1;
-
-                switch (format)
-                {
-                    case 'C':
-                    case 'c':
-                        {
-                            nMinDigits = nMaxDigits >= 0 ? nMaxDigits : info.CurrencyDecimalDigits;
-                            if (nMaxDigits < 0)
-                            {
-                                nMaxDigits = info.CurrencyDecimalDigits;
-                            }
-
-                            RoundNumber(ref number, number.scale + nMaxDigits); // Don't change this line to use digPos since digCount could have its sign changed.
-
-                            FormatCurrency(ref sb, ref number, nMinDigits, nMaxDigits, info);
-
-                            break;
-                        }
-
-                    case 'F':
-                    case 'f':
-                        {
-                            if (nMaxDigits < 0)
-                            {
-                                nMaxDigits = nMinDigits = info.NumberDecimalDigits;
-                            }
-                            else
-                            {
-                                nMinDigits = nMaxDigits;
-                            }
-
-                            RoundNumber(ref number, number.scale + nMaxDigits);
-
-                            if (number.sign)
-                            {
-                                sb.Append(info.NegativeSign);
-                            }
-
-                            FormatFixed(ref sb, ref number, nMinDigits, nMaxDigits, info, null, info.NumberDecimalSeparator, null);
-
-                            break;
-                        }
-
-                    case 'N':
-                    case 'n':
-                        {
-                            if (nMaxDigits < 0)
-                            {
-                                nMaxDigits = nMinDigits = info.NumberDecimalDigits; // Since we are using digits in our calculation
-                            }
-                            else
-                            {
-                                nMinDigits = nMaxDigits;
-                            }
-
-                            RoundNumber(ref number, number.scale + nMaxDigits);
-
-                            FormatNumber(ref sb, ref number, nMinDigits, nMaxDigits, info);
-
-                            break;
-                        }
-
-                    case 'E':
-                    case 'e':
-                        {
-                            if (nMaxDigits < 0)
-                            {
-                                nMaxDigits = nMinDigits = 6;
-                            }
-                            else
-                            {
-                                nMinDigits = nMaxDigits;
-                            }
-                            nMaxDigits++;
-
-                            RoundNumber(ref number, nMaxDigits);
-
-                            if (number.sign)
-                            {
-                                sb.Append(info.NegativeSign);
-                            }
-
-                            FormatScientific(ref sb, ref number, nMinDigits, nMaxDigits, info, format);
-
-                            break;
-                        }
-
-                    case 'G':
-                    case 'g':
-                        {
-                            bool enableRounding = true;
-                            if (nMaxDigits < 1)
-                            {
-                                if (isDecimal && (nMaxDigits == -1))
-                                {
-                                    // Default to 29 digits precision only for G formatting without a precision specifier
-                                    // This ensures that the PAL code pads out to the correct place even when we use the default precision
-                                    nMaxDigits = nMinDigits = DECIMAL_PRECISION;
-                                    enableRounding = false;  // Turn off rounding for ECMA compliance to output trailing 0's after decimal as significant
-                                }
-                                else
-                                {
-                                    // This ensures that the PAL code pads out to the correct place even when we use the default precision
-                                    nMaxDigits = nMinDigits = number.precision;
-                                }
-                            }
-                            else
-                                nMinDigits = nMaxDigits;
-
-                            if (enableRounding) // Don't round for G formatting without precision
-                            {
-                                RoundNumber(ref number, nMaxDigits); // This also fixes up the minus zero case
-                            }
-                            else
-                            {
-                                if (isDecimal && (number.digits[0] == 0))
-                                {
-                                    // Minus zero should be formatted as 0
-                                    number.sign = false;
-                                }
-                            }
-
-                            if (number.sign)
-                            {
-                                sb.Append(info.NegativeSign);
-                            }
-
-                            FormatGeneral(ref sb, ref number, nMinDigits, nMaxDigits, info, (char)(format - ('G' - 'E')), !enableRounding);
-
-                            break;
-                        }
-
-                    case 'P':
-                    case 'p':
-                        {
-                            if (nMaxDigits < 0)
-                            {
-                                nMaxDigits = nMinDigits = info.PercentDecimalDigits;
-                            }
-                            else
-                            {
-                                nMinDigits = nMaxDigits;
-                            }
-                            number.scale += 2;
-
-                            RoundNumber(ref number, number.scale + nMaxDigits);
-
-                            FormatPercent(ref sb, ref number, nMinDigits, nMaxDigits, info);
-
-                            break;
-                        }
-
-                    default:
-                        throw new FormatException(SR.Argument_BadFormatSpecifier);
-                }
-            }
-
-            private static void FormatCurrency(ref ValueStringBuilder sb, scoped ref NumberBuffer number, int nMinDigits, int nMaxDigits, NumberFormatInfo info)
-            {
-                string fmt = number.sign ?
-                    s_negCurrencyFormats[info.CurrencyNegativePattern] :
-                    s_posCurrencyFormats[info.CurrencyPositivePattern];
-
-                foreach (char ch in fmt)
-                {
-                    switch (ch)
-                    {
-                        case '#':
-                            FormatFixed(ref sb, ref number, nMinDigits, nMaxDigits, info, info.CurrencyGroupSizes, info.CurrencyDecimalSeparator, info.CurrencyGroupSeparator);
-                            break;
-                        case '-':
-                            sb.Append(info.NegativeSign);
-                            break;
-                        case '$':
-                            sb.Append(info.CurrencySymbol);
-                            break;
-                        default:
-                            sb.Append(ch);
-                            break;
-                    }
-                }
-            }
-
-            private static unsafe void FormatFixed(ref ValueStringBuilder sb, scoped ref NumberBuffer number, int nMinDigits, int nMaxDigits, NumberFormatInfo info, int[]? groupDigits, string sDecimal, string? sGroup)
-            {
-                Debug.Assert(sGroup != null || groupDigits == null);
-
-                int digPos = number.scale;
-                char* dig = number.digits;
-                int digLength = MemoryMarshal.CreateReadOnlySpanFromNullTerminated(dig).Length;
-
-                if (digPos > 0)
-                {
-                    if (groupDigits != null)
-                    {
-                        int groupSizeIndex = 0;                             // Index into the groupDigits array.
-                        int groupSizeCount = groupDigits[groupSizeIndex];   // The current total of group size.
-                        int groupSizeLen = groupDigits.Length;              // The length of groupDigits array.
-                        int bufferSize = digPos;                            // The length of the result buffer string.
-                        int groupSeparatorLen = sGroup!.Length;              // The length of the group separator string.
-                        int groupSize = 0;                                  // The current group size.
-
-                        // Find out the size of the string buffer for the result.
-                        if (groupSizeLen != 0) // You can pass in 0 length arrays
-                        {
-                            while (digPos > groupSizeCount)
-                            {
-                                groupSize = groupDigits[groupSizeIndex];
-                                if (groupSize == 0)
-                                {
-                                    break;
-                                }
-
-                                bufferSize += groupSeparatorLen;
-                                if (groupSizeIndex < groupSizeLen - 1)
-                                {
-                                    groupSizeIndex++;
-                                }
-
-                                groupSizeCount += groupDigits[groupSizeIndex];
-                                ArgumentOutOfRangeException.ThrowIfNegative(groupSizeCount); // If we overflow
-                                ArgumentOutOfRangeException.ThrowIfNegative(bufferSize);
-                            }
-
-                            if (groupSizeCount == 0) // If you passed in an array with one entry as 0, groupSizeCount == 0
-                            {
-                                groupSize = 0;
-                            }
-                            else
-                            {
-                                groupSize = groupDigits[0];
-                            }
-                        }
-
-                        char* tmpBuffer = stackalloc char[bufferSize];
-                        groupSizeIndex = 0;
-                        int digitCount = 0;
-                        int digStart;
-                        digStart = (digPos < digLength) ? digPos : digLength;
-                        char* p = tmpBuffer + bufferSize - 1;
-                        for (int i = digPos - 1; i >= 0; i--)
-                        {
-                            *(p--) = (i < digStart) ? dig[i] : '0';
-
-                            if (groupSize > 0)
-                            {
-                                digitCount++;
-                                if ((digitCount == groupSize) && (i != 0))
-                                {
-                                    for (int j = groupSeparatorLen - 1; j >= 0; j--)
-                                    {
-                                        *(p--) = sGroup[j];
-                                    }
-
-                                    if (groupSizeIndex < groupSizeLen - 1)
-                                    {
-                                        groupSizeIndex++;
-                                        groupSize = groupDigits[groupSizeIndex];
-                                    }
-                                    digitCount = 0;
-                                }
-                            }
-                        }
-
-                        sb.Append(tmpBuffer, bufferSize);
-                        dig += digStart;
-                    }
-                    else
-                    {
-                        int digits = Math.Min(digLength, digPos);
-                        sb.Append(dig, digits);
-                        dig += digits;
-                        if (digPos > digLength)
-                        {
-                            sb.Append('0', digPos - digLength);
-                        }
-                    }
-                }
-                else
-                {
-                    sb.Append('0');
-                }
-
-                if (nMaxDigits > 0)
-                {
-                    sb.Append(sDecimal);
-                    if ((digPos < 0) && (nMaxDigits > 0))
-                    {
-                        int zeroes = Math.Min(-digPos, nMaxDigits);
-                        sb.Append('0', zeroes);
-                        digPos += zeroes;
-                        nMaxDigits -= zeroes;
-                    }
-
-                    while (nMaxDigits > 0)
-                    {
-                        sb.Append((*dig != 0) ? *dig++ : '0');
-                        nMaxDigits--;
-                    }
-                }
-            }
-
-            private static void FormatNumber(ref ValueStringBuilder sb, scoped ref NumberBuffer number, int nMinDigits, int nMaxDigits, NumberFormatInfo info)
-            {
-                string fmt = number.sign ?
-                    s_negNumberFormats[info.NumberNegativePattern] :
-                    PosNumberFormat;
-
-                foreach (char ch in fmt)
-                {
-                    switch (ch)
-                    {
-                        case '#':
-                            FormatFixed(ref sb, ref number, nMinDigits, nMaxDigits, info, info.NumberGroupSizes, info.NumberDecimalSeparator, info.NumberGroupSeparator);
-                            break;
-                        case '-':
-                            sb.Append(info.NegativeSign);
-                            break;
-                        default:
-                            sb.Append(ch);
-                            break;
-                    }
-                }
-            }
-
-            private static unsafe void FormatScientific(ref ValueStringBuilder sb, scoped ref NumberBuffer number, int nMinDigits, int nMaxDigits, NumberFormatInfo info, char expChar)
-            {
-                char* dig = number.digits;
-
-                sb.Append((*dig != 0) ? *dig++ : '0');
-
-                if (nMaxDigits != 1) // For E0 we would like to suppress the decimal point
-                {
-                    sb.Append(info.NumberDecimalSeparator);
-                }
-
-                while (--nMaxDigits > 0)
-                {
-                    sb.Append((*dig != 0) ? *dig++ : '0');
-                }
-
-                int e = number.digits[0] == 0 ? 0 : number.scale - 1;
-                FormatExponent(ref sb, info, e, expChar, 3, true);
-            }
-
-            private static unsafe void FormatExponent(ref ValueStringBuilder sb, NumberFormatInfo info, int value, char expChar, int minDigits, bool positiveSign)
-            {
-                sb.Append(expChar);
-
-                if (value < 0)
-                {
-                    sb.Append(info.NegativeSign);
-                    value = -value;
-                }
-                else
-                {
-                    if (positiveSign)
-                    {
-                        sb.Append(info.PositiveSign);
-                    }
-                }
-
-                char* digits = stackalloc char[11];
-                int index = 10;
-                Int32ToDecChars(digits, ref index, (uint)value, minDigits);
-                int i = 10 - index;
-                while (--i >= 0)
-                {
-                    sb.Append(digits[index++]);
-                }
-            }
-
-            private static unsafe void FormatGeneral(ref ValueStringBuilder sb, scoped ref NumberBuffer number, int nMinDigits, int nMaxDigits, NumberFormatInfo info, char expChar, bool bSuppressScientific)
-            {
-                int digPos = number.scale;
-                bool scientific = false;
-
-                if (!bSuppressScientific)
-                {
-                    // Don't switch to scientific notation
-                    if (digPos > nMaxDigits || digPos < -3)
-                    {
-                        digPos = 1;
-                        scientific = true;
-                    }
-                }
-
-                char* dig = number.digits;
-
-                if (digPos > 0)
-                {
-                    do
-                    {
-                        sb.Append((*dig != 0) ? *dig++ : '0');
-                    } while (--digPos > 0);
-                }
-                else
-                {
-                    sb.Append('0');
-                }
-
-                if (*dig != 0 || digPos < 0)
-                {
-                    sb.Append(info.NumberDecimalSeparator);
-
-                    while (digPos < 0)
-                    {
-                        sb.Append('0');
-                        digPos++;
-                    }
-
-                    while (*dig != 0)
-                    {
-                        sb.Append(*dig++);
-                    }
-                }
-
-                if (scientific)
-                {
-                    FormatExponent(ref sb, info, number.scale - 1, expChar, 2, true);
-                }
-            }
-
-            private static void FormatPercent(ref ValueStringBuilder sb, scoped ref NumberBuffer number, int nMinDigits, int nMaxDigits, NumberFormatInfo info)
-            {
-                string fmt = number.sign ?
-                    s_negPercentFormats[info.PercentNegativePattern] :
-                    s_posPercentFormats[info.PercentPositivePattern];
-
-                foreach (char ch in fmt)
-                {
-                    switch (ch)
-                    {
-                        case '#':
-                            FormatFixed(ref sb, ref number, nMinDigits, nMaxDigits, info, info.PercentGroupSizes, info.PercentDecimalSeparator, info.PercentGroupSeparator);
-                            break;
-                        case '-':
-                            sb.Append(info.NegativeSign);
-                            break;
-                        case '%':
-                            sb.Append(info.PercentSymbol);
-                            break;
-                        default:
-                            sb.Append(ch);
-                            break;
-                    }
-                }
-            }
-
-            private static unsafe void RoundNumber(ref NumberBuffer number, int pos)
-            {
-                char* dig = number.digits;
-
-                int i = 0;
-                while (i < pos && dig[i] != 0)
-                {
-                    i++;
-                }
-
-                if (i == pos && dig[i] >= '5')
-                {
-                    while (i > 0 && dig[i - 1] == '9')
-                    {
-                        i--;
-                    }
-
-                    if (i > 0)
-                    {
-                        dig[i - 1]++;
-                    }
-                    else
-                    {
-                        number.scale++;
-                        dig[0] = '1';
-                        i = 1;
-                    }
-                }
-                else
-                {
-                    while (i > 0 && dig[i - 1] == '0')
-                    {
-                        i--;
-                    }
-                }
-
-                if (i == 0)
-                {
-                    number.scale = 0;
-                    number.sign = false;
-                }
-                dig[i] = '\0';
-            }
-
-            private static unsafe int FindSection(ReadOnlySpan<char> format, int section)
-            {
-                if (section == 0)
-                {
-                    return 0;
-                }
-
-                fixed (char* pFormat = &MemoryMarshal.GetReference(format))
-                {
-                    int src = 0;
-                    while (true)
-                    {
-                        if (src >= format.Length)
-                        {
-                            return 0;
-                        }
-
-                        char ch;
-                        switch (ch = pFormat[src++])
-                        {
-                            case '\'':
-                            case '"':
-                                while (src < format.Length && pFormat[src] != 0 && pFormat[src++] != ch)
-                                    ;
-                                break;
-                            case '\\':
-                                if (src < format.Length && pFormat[src] != 0)
-                                {
-                                    src++;
-                                }
-                                break;
-                            case ';':
-                                if (--section != 0)
-                                {
-                                    break;
-                                }
-
-                                if (src < format.Length && pFormat[src] != 0 && pFormat[src] != ';')
-                                {
-                                    return src;
-                                }
-
-                                goto case '\0';
-                            case '\0':
-                                return 0;
-                        }
-                    }
-                }
-            }
-
-            internal static unsafe void NumberToStringFormat(ref ValueStringBuilder sb, scoped ref NumberBuffer number, ReadOnlySpan<char> format, NumberFormatInfo info)
-            {
-                int digitCount;
-                int decimalPos;
-                int firstDigit;
-                int lastDigit;
-                int digPos;
-                bool scientific;
-                int thousandPos;
-                int thousandCount = 0;
-                bool thousandSeps;
-                int scaleAdjust;
-                int adjust;
-
-                int section;
-                int src;
-                char* dig = number.digits;
-                char ch;
-
-                section = FindSection(format, dig[0] == 0 ? 2 : number.sign ? 1 : 0);
-
-                while (true)
-                {
-                    digitCount = 0;
-                    decimalPos = -1;
-                    firstDigit = 0x7FFFFFFF;
-                    lastDigit = 0;
-                    scientific = false;
-                    thousandPos = -1;
-                    thousandSeps = false;
-                    scaleAdjust = 0;
-                    src = section;
-
-                    fixed (char* pFormat = &MemoryMarshal.GetReference(format))
-                    {
-                        while (src < format.Length && (ch = pFormat[src++]) != 0 && ch != ';')
-                        {
-                            switch (ch)
-                            {
-                                case '#':
-                                    digitCount++;
-                                    break;
-                                case '0':
-                                    if (firstDigit == 0x7FFFFFFF)
-                                    {
-                                        firstDigit = digitCount;
-                                    }
-                                    digitCount++;
-                                    lastDigit = digitCount;
-                                    break;
-                                case '.':
-                                    if (decimalPos < 0)
-                                    {
-                                        decimalPos = digitCount;
-                                    }
-                                    break;
-                                case ',':
-                                    if (digitCount > 0 && decimalPos < 0)
-                                    {
-                                        if (thousandPos >= 0)
-                                        {
-                                            if (thousandPos == digitCount)
-                                            {
-                                                thousandCount++;
-                                                break;
-                                            }
-                                            thousandSeps = true;
-                                        }
-                                        thousandPos = digitCount;
-                                        thousandCount = 1;
-                                    }
-                                    break;
-                                case '%':
-                                    scaleAdjust += 2;
-                                    break;
-                                case '\x2030':
-                                    scaleAdjust += 3;
-                                    break;
-                                case '\'':
-                                case '"':
-                                    while (src < format.Length && pFormat[src] != 0 && pFormat[src++] != ch)
-                                        ;
-                                    break;
-                                case '\\':
-                                    if (src < format.Length && pFormat[src] != 0)
-                                    {
-                                        src++;
-                                    }
-                                    break;
-                                case 'E':
-                                case 'e':
-                                    if ((src < format.Length && pFormat[src] == '0') ||
-                                        (src + 1 < format.Length && (pFormat[src] == '+' || pFormat[src] == '-') && pFormat[src + 1] == '0'))
-                                    {
-                                        while (++src < format.Length && pFormat[src] == '0');
-                                        scientific = true;
-                                    }
-                                    break;
-                            }
-                        }
-                    }
-
-                    if (decimalPos < 0)
-                    {
-                        decimalPos = digitCount;
-                    }
-
-                    if (thousandPos >= 0)
-                    {
-                        if (thousandPos == decimalPos)
-                        {
-                            scaleAdjust -= thousandCount * 3;
-                        }
-                        else
-                        {
-                            thousandSeps = true;
-                        }
-                    }
-
-                    if (dig[0] != 0)
-                    {
-                        number.scale += scaleAdjust;
-                        int pos = scientific ? digitCount : number.scale + digitCount - decimalPos;
-                        RoundNumber(ref number, pos);
-                        if (dig[0] == 0)
-                        {
-                            src = FindSection(format, 2);
-                            if (src != section)
-                            {
-                                section = src;
-                                continue;
-                            }
-                        }
-                    }
-                    else
-                    {
-                        number.sign = false;   // We need to format -0 without the sign set.
-                        number.scale = 0;      // Decimals with scale ('0.00') should be rounded.
-                    }
-
-                    break;
-                }
-
-                firstDigit = firstDigit < decimalPos ? decimalPos - firstDigit : 0;
-                lastDigit = lastDigit > decimalPos ? decimalPos - lastDigit : 0;
-                if (scientific)
-                {
-                    digPos = decimalPos;
-                    adjust = 0;
-                }
-                else
-                {
-                    digPos = number.scale > decimalPos ? number.scale : decimalPos;
-                    adjust = number.scale - decimalPos;
-                }
-                src = section;
-
-                // Adjust can be negative, so we make this an int instead of an unsigned int.
-                // Adjust represents the number of characters over the formatting e.g. format string is "0000" and you are trying to
-                // format 100000 (6 digits). Means adjust will be 2. On the other hand if you are trying to format 10 adjust will be
-                // -2 and we'll need to fixup these digits with 0 padding if we have 0 formatting as in this example.
-                Span<int> thousandsSepPos = stackalloc int[4];
-                int thousandsSepCtr = -1;
-
-                if (thousandSeps)
-                {
-                    // We need to precompute this outside the number formatting loop
-                    if (info.NumberGroupSeparator.Length > 0)
-                    {
-                        // We need this array to figure out where to insert the thousands separator. We would have to traverse the string
-                        // backwards. PIC formatting always traverses forwards. These indices are precomputed to tell us where to insert
-                        // the thousands separator so we can get away with traversing forwards. Note we only have to compute up to digPos.
-                        // The max is not bound since you can have formatting strings of the form "000,000..", and this
-                        // should handle that case too.
-
-                        int[] groupDigits = info.NumberGroupSizes;
-
-                        int groupSizeIndex = 0;     // Index into the groupDigits array.
-                        int groupTotalSizeCount = 0;
-                        int groupSizeLen = groupDigits.Length;    // The length of groupDigits array.
-                        if (groupSizeLen != 0)
-                        {
-                            groupTotalSizeCount = groupDigits[groupSizeIndex];   // The current running total of group size.
-                        }
-
-                        int groupSize = groupTotalSizeCount;
-
-                        int totalDigits = digPos + ((adjust < 0) ? adjust : 0); // Actual number of digits in o/p
-                        int numDigits = (firstDigit > totalDigits) ? firstDigit : totalDigits;
-                        while (numDigits > groupTotalSizeCount)
-                        {
-                            if (groupSize == 0)
-                            {
-                                break;
-                            }
-
-                            ++thousandsSepCtr;
-                            if (thousandsSepCtr >= thousandsSepPos.Length)
-                            {
-                                var newThousandsSepPos = new int[thousandsSepPos.Length * 2];
-                                thousandsSepPos.CopyTo(newThousandsSepPos);
-                                thousandsSepPos = newThousandsSepPos;
-                            }
-
-                            thousandsSepPos[thousandsSepCtr] = groupTotalSizeCount;
-                            if (groupSizeIndex < groupSizeLen - 1)
-                            {
-                                groupSizeIndex++;
-                                groupSize = groupDigits[groupSizeIndex];
-                            }
-                            groupTotalSizeCount += groupSize;
-                        }
-                    }
-                }
-
-                if (number.sign && section == 0)
-                {
-                    sb.Append(info.NegativeSign);
-                }
-
-                bool decimalWritten = false;
-
-                fixed (char* pFormat = &MemoryMarshal.GetReference(format))
-                {
-                    char* cur = dig;
-
-                    while (src < format.Length && (ch = pFormat[src++]) != 0 && ch != ';')
-                    {
-                        if (adjust > 0)
-                        {
-                            switch (ch)
-                            {
-                                case '#':
-                                case '0':
-                                case '.':
-                                    while (adjust > 0)
-                                    {
-                                        // digPos will be one greater than thousandsSepPos[thousandsSepCtr] since we are at
-                                        // the character after which the groupSeparator needs to be appended.
-                                        sb.Append(*cur != 0 ? *cur++ : '0');
-                                        if (thousandSeps && digPos > 1 && thousandsSepCtr >= 0)
-                                        {
-                                            if (digPos == thousandsSepPos[thousandsSepCtr] + 1)
-                                            {
-                                                sb.Append(info.NumberGroupSeparator);
-                                                thousandsSepCtr--;
-                                            }
-                                        }
-                                        digPos--;
-                                        adjust--;
-                                    }
-                                    break;
-                            }
-                        }
-
-                        switch (ch)
-                        {
-                            case '#':
-                            case '0':
-                                {
-                                    if (adjust < 0)
-                                    {
-                                        adjust++;
-                                        ch = digPos <= firstDigit ? '0' : '\0';
-                                    }
-                                    else
-                                    {
-                                        ch = *cur != 0 ? *cur++ : digPos > lastDigit ? '0' : '\0';
-                                    }
-                                    if (ch != 0)
-                                    {
-                                        sb.Append(ch);
-                                        if (thousandSeps && digPos > 1 && thousandsSepCtr >= 0)
-                                        {
-                                            if (digPos == thousandsSepPos[thousandsSepCtr] + 1)
-                                            {
-                                                sb.Append(info.NumberGroupSeparator);
-                                                thousandsSepCtr--;
-                                            }
-                                        }
-                                    }
-
-                                    digPos--;
-                                    break;
-                                }
-                            case '.':
-                                {
-                                    if (digPos != 0 || decimalWritten)
-                                    {
-                                        // For compatibility, don't echo repeated decimals
-                                        break;
-                                    }
-                                    // If the format has trailing zeros or the format has a decimal and digits remain
-                                    if (lastDigit < 0 || (decimalPos < digitCount && *cur != 0))
-                                    {
-                                        sb.Append(info.NumberDecimalSeparator);
-                                        decimalWritten = true;
-                                    }
-                                    break;
-                                }
-                            case '\x2030':
-                                sb.Append(info.PerMilleSymbol);
-                                break;
-                            case '%':
-                                sb.Append(info.PercentSymbol);
-                                break;
-                            case ',':
-                                break;
-                            case '\'':
-                            case '"':
-                                while (src < format.Length && pFormat[src] != 0 && pFormat[src] != ch)
-                                {
-                                    sb.Append(pFormat[src++]);
-                                }
-
-                                if (src < format.Length && pFormat[src] != 0)
-                                {
-                                    src++;
-                                }
-                                break;
-                            case '\\':
-                                if (src < format.Length && pFormat[src] != 0)
-                                {
-                                    sb.Append(pFormat[src++]);
-                                }
-                                break;
-                            case 'E':
-                            case 'e':
-                                {
-                                    bool positiveSign = false;
-                                    int i = 0;
-                                    if (scientific)
-                                    {
-                                        if (src < format.Length && pFormat[src] == '0')
-                                        {
-                                            // Handles E0, which should format the same as E-0
-                                            i++;
-                                        }
-                                        else if (src + 1 < format.Length && pFormat[src] == '+' && pFormat[src + 1] == '0')
-                                        {
-                                            // Handles E+0
-                                            positiveSign = true;
-                                        }
-                                        else if (src + 1 < format.Length && pFormat[src] == '-' && pFormat[src + 1] == '0')
-                                        {
-                                            // Handles E-0
-                                            // Do nothing, this is just a place holder s.t. we don't break out of the loop.
-                                        }
-                                        else
-                                        {
-                                            sb.Append(ch);
-                                            break;
-                                        }
-
-                                        while (++src < format.Length && pFormat[src] == '0')
-                                        {
-                                            i++;
-                                        }
-                                        if (i > 10)
-                                        {
-                                            i = 10;
-                                        }
-
-                                        int exp = dig[0] == 0 ? 0 : number.scale - decimalPos;
-                                        FormatExponent(ref sb, info, exp, ch, i, positiveSign);
-                                        scientific = false;
-                                    }
-                                    else
-                                    {
-                                        sb.Append(ch); // Copy E or e to output
-                                        if (src < format.Length)
-                                        {
-                                            if (pFormat[src] == '+' || pFormat[src] == '-')
-                                            {
-                                                sb.Append(pFormat[src++]);
-                                            }
-                                            while (src < format.Length && pFormat[src] == '0')
-                                            {
-                                                sb.Append(pFormat[src++]);
-                                            }
-                                        }
-                                    }
-                                    break;
-                                }
-                            default:
-                                sb.Append(ch);
-                                break;
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
diff --git a/src/libraries/System.Runtime.Numerics/src/System/Globalization/FormatProvider.NumberBuffer.cs b/src/libraries/System.Runtime.Numerics/src/System/Globalization/FormatProvider.NumberBuffer.cs
deleted file mode 100644
index 9f4f62cf64fd..000000000000
--- a/src/libraries/System.Runtime.Numerics/src/System/Globalization/FormatProvider.NumberBuffer.cs
+++ /dev/null
@@ -1,32 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Runtime.InteropServices;
-using System.Security;
-
-namespace System.Globalization
-{
-    internal static partial class FormatProvider
-    {
-        private static partial class Number
-        {
-            [StructLayout(LayoutKind.Sequential)]
-            internal unsafe struct NumberBuffer
-            {
-                public int precision;
-                public int scale;
-                public bool sign;
-
-                public char* digits
-                {
-                    get
-                    {
-                        return overrideDigits;
-                    }
-                }
-
-                public char* overrideDigits; // Used for BigNumber support which can't be limited to 32 characters.
-            }
-        }
-    }
-}
diff --git a/src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs b/src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs
index a5fc0e6f1f1d..0ade5d0aae43 100644
--- a/src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs
+++ b/src/libraries/System.Runtime.Numerics/src/System/Number.BigInteger.cs
@@ -1,276 +1,8 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-// The BigNumber class implements methods for formatting and parsing
-// big numeric values. To format and parse numeric values, applications should
-// use the Format and Parse methods provided by the numeric
-// classes (BigInteger). Those
-// Format and Parse methods share a common implementation
-// provided by this class, and are thus documented in detail here.
-//
-// Formatting
-//
-// The Format methods provided by the numeric classes are all of the
-// form
-//
-//  public static String Format(XXX value, String format);
-//  public static String Format(XXX value, String format, NumberFormatInfo info);
-//
-// where XXX is the name of the particular numeric class. The methods convert
-// the numeric value to a string using the format string given by the
-// format parameter. If the format parameter is null or
-// an empty string, the number is formatted as if the string "G" (general
-// format) was specified. The info parameter specifies the
-// NumberFormatInfo instance to use when formatting the number. If the
-// info parameter is null or omitted, the numeric formatting information
-// is obtained from the current culture. The NumberFormatInfo supplies
-// such information as the characters to use for decimal and thousand
-// separators, and the spelling and placement of currency symbols in monetary
-// values.
-//
-// Format strings fall into two categories: Standard format strings and
-// user-defined format strings. A format string consisting of a single
-// alphabetic character (A-Z or a-z), optionally followed by a sequence of
-// digits (0-9), is a standard format string. All other format strings are
-// used-defined format strings.
-//
-// A standard format string takes the form Axx, where A is an
-// alphabetic character called the format specifier and xx is a
-// sequence of digits called the precision specifier. The format
-// specifier controls the type of formatting applied to the number and the
-// precision specifier controls the number of significant digits or decimal
-// places of the formatting operation. The following table describes the
-// supported standard formats.
-//
-// C c - Currency format. The number is
-// converted to a string that represents a currency amount. The conversion is
-// controlled by the currency format information of the NumberFormatInfo
-// used to format the number. The precision specifier indicates the desired
-// number of decimal places. If the precision specifier is omitted, the default
-// currency precision given by the NumberFormatInfo is used.
-//
-// D d - Decimal format. This format is
-// supported for integral types only. The number is converted to a string of
-// decimal digits, prefixed by a minus sign if the number is negative. The
-// precision specifier indicates the minimum number of digits desired in the
-// resulting string. If required, the number will be left-padded with zeros to
-// produce the number of digits given by the precision specifier.
-//
-// E e Engineering (scientific) format.
-// The number is converted to a string of the form
-// "-d.ddd...E+ddd" or "-d.ddd...e+ddd", where each
-// 'd' indicates a digit (0-9). The string starts with a minus sign if the
-// number is negative, and one digit always precedes the decimal point. The
-// precision specifier indicates the desired number of digits after the decimal
-// point. If the precision specifier is omitted, a default of 6 digits after
-// the decimal point is used. The format specifier indicates whether to prefix
-// the exponent with an 'E' or an 'e'. The exponent is always consists of a
-// plus or minus sign and three digits.
-//
-// F f Fixed point format. The number is
-// converted to a string of the form "-ddd.ddd....", where each
-// 'd' indicates a digit (0-9). The string starts with a minus sign if the
-// number is negative. The precision specifier indicates the desired number of
-// decimal places. If the precision specifier is omitted, the default numeric
-// precision given by the NumberFormatInfo is used.
-//
-// G g - General format. The number is
-// converted to the shortest possible decimal representation using fixed point
-// or scientific format. The precision specifier determines the number of
-// significant digits in the resulting string. If the precision specifier is
-// omitted, the number of significant digits is determined by the type of the
-// number being converted (10 for int, 19 for long, 7 for
-// float, 15 for double, 19 for Currency, and 29 for
-// Decimal). Trailing zeros after the decimal point are removed, and the
-// resulting string contains a decimal point only if required. The resulting
-// string uses fixed point format if the exponent of the number is less than
-// the number of significant digits and greater than or equal to -4. Otherwise,
-// the resulting string uses scientific format, and the case of the format
-// specifier controls whether the exponent is prefixed with an 'E' or an
-// 'e'.
-//
-// N n Number format. The number is
-// converted to a string of the form "-d,ddd,ddd.ddd....", where
-// each 'd' indicates a digit (0-9). The string starts with a minus sign if the
-// number is negative. Thousand separators are inserted between each group of
-// three digits to the left of the decimal point. The precision specifier
-// indicates the desired number of decimal places. If the precision specifier
-// is omitted, the default numeric precision given by the
-// NumberFormatInfo is used.
-//
-// X x - Hexadecimal format. This format is
-// supported for integral types only. The number is converted to a string of
-// hexadecimal digits. The format specifier indicates whether to use upper or
-// lower case characters for the hexadecimal digits above 9 ('X' for 'ABCDEF',
-// and 'x' for 'abcdef'). The precision specifier indicates the minimum number
-// of digits desired in the resulting string. If required, the number will be
-// left-padded with zeros to produce the number of digits given by the
-// precision specifier.
-//
-// Some examples of standard format strings and their results are shown in the
-// table below. (The examples all assume a default NumberFormatInfo.)
-//
-// Value        Format  Result
-// 12345.6789   C       $12,345.68
-// -12345.6789  C       ($12,345.68)
-// 12345        D       12345
-// 12345        D8      00012345
-// 12345.6789   E       1.234568E+004
-// 12345.6789   E10     1.2345678900E+004
-// 12345.6789   e4      1.2346e+004
-// 12345.6789   F       12345.68
-// 12345.6789   F0      12346
-// 12345.6789   F6      12345.678900
-// 12345.6789   G       12345.6789
-// 12345.6789   G7      12345.68
-// 123456789    G7      1.234568E8
-// 12345.6789   N       12,345.68
-// 123456789    N4      123,456,789.0000
-// 0x2c45e      x       2c45e
-// 0x2c45e      X       2C45E
-// 0x2c45e      X8      0002C45E
-//
-// Format strings that do not start with an alphabetic character, or that start
-// with an alphabetic character followed by a non-digit, are called
-// user-defined format strings. The following table describes the formatting
-// characters that are supported in user defined format strings.
-//
-//
-// 0 - Digit placeholder. If the value being
-// formatted has a digit in the position where the '0' appears in the format
-// string, then that digit is copied to the output string. Otherwise, a '0' is
-// stored in that position in the output string. The position of the leftmost
-// '0' before the decimal point and the rightmost '0' after the decimal point
-// determines the range of digits that are always present in the output
-// string.
-//
-// # - Digit placeholder. If the value being
-// formatted has a digit in the position where the '#' appears in the format
-// string, then that digit is copied to the output string. Otherwise, nothing
-// is stored in that position in the output string.
-//
-// . - Decimal point. The first '.' character
-// in the format string determines the location of the decimal separator in the
-// formatted value; any additional '.' characters are ignored. The actual
-// character used as a the decimal separator in the output string is given by
-// the NumberFormatInfo used to format the number.
-//
-// , - Thousand separator and number scaling.
-// The ',' character serves two purposes. First, if the format string contains
-// a ',' character between two digit placeholders (0 or #) and to the left of
-// the decimal point if one is present, then the output will have thousand
-// separators inserted between each group of three digits to the left of the
-// decimal separator. The actual character used as a the decimal separator in
-// the output string is given by the NumberFormatInfo used to format the
-// number. Second, if the format string contains one or more ',' characters
-// immediately to the left of the decimal point, or after the last digit
-// placeholder if there is no decimal point, then the number will be divided by
-// 1000 times the number of ',' characters before it is formatted. For example,
-// the format string '0,,' will represent 100 million as just 100. Use of the
-// ',' character to indicate scaling does not also cause the formatted number
-// to have thousand separators. Thus, to scale a number by 1 million and insert
-// thousand separators you would use the format string '#,##0,,'.
-//
-// % - Percentage placeholder. The presence of
-// a '%' character in the format string causes the number to be multiplied by
-// 100 before it is formatted. The '%' character itself is inserted in the
-// output string where it appears in the format string.
-//
-// E+ E- e+ e-   - Scientific notation.
-// If any of the strings 'E+', 'E-', 'e+', or 'e-' are present in the format
-// string and are immediately followed by at least one '0' character, then the
-// number is formatted using scientific notation with an 'E' or 'e' inserted
-// between the number and the exponent. The number of '0' characters following
-// the scientific notation indicator determines the minimum number of digits to
-// output for the exponent. The 'E+' and 'e+' formats indicate that a sign
-// character (plus or minus) should always precede the exponent. The 'E-' and
-// 'e-' formats indicate that a sign character should only precede negative
-// exponents.
-//
-// \ - Literal character. A backslash character
-// causes the next character in the format string to be copied to the output
-// string as-is. The backslash itself isn't copied, so to place a backslash
-// character in the output string, use two backslashes (\\) in the format
-// string.
-//
-// 'ABC' "ABC" - Literal string. Characters
-// enclosed in single or double quotation marks are copied to the output string
-// as-is and do not affect formatting.
-//
-// ; - Section separator. The ';' character is
-// used to separate sections for positive, negative, and zero numbers in the
-// format string.
-//
-// Other - All other characters are copied to
-// the output string in the position they appear.
-//
-// For fixed point formats (formats not containing an 'E+', 'E-', 'e+', or
-// 'e-'), the number is rounded to as many decimal places as there are digit
-// placeholders to the right of the decimal point. If the format string does
-// not contain a decimal point, the number is rounded to the nearest
-// integer. If the number has more digits than there are digit placeholders to
-// the left of the decimal point, the extra digits are copied to the output
-// string immediately before the first digit placeholder.
-//
-// For scientific formats, the number is rounded to as many significant digits
-// as there are digit placeholders in the format string.
-//
-// To allow for different formatting of positive, negative, and zero values, a
-// user-defined format string may contain up to three sections separated by
-// semicolons. The results of having one, two, or three sections in the format
-// string are described in the table below.
-//
-// Sections:
-//
-// One - The format string applies to all values.
-//
-// Two - The first section applies to positive values
-// and zeros, and the second section applies to negative values. If the number
-// to be formatted is negative, but becomes zero after rounding according to
-// the format in the second section, then the resulting zero is formatted
-// according to the first section.
-//
-// Three - The first section applies to positive
-// values, the second section applies to negative values, and the third section
-// applies to zeros. The second section may be left empty (by having no
-// characters between the semicolons), in which case the first section applies
-// to all non-zero values. If the number to be formatted is non-zero, but
-// becomes zero after rounding according to the format in the first or second
-// section, then the resulting zero is formatted according to the third
-// section.
-//
-// For both standard and user-defined formatting operations on values of type
-// float and double, if the value being formatted is a NaN (Not
-// a Number) or a positive or negative infinity, then regardless of the format
-// string, the resulting string is given by the NaNSymbol,
-// PositiveInfinitySymbol, or NegativeInfinitySymbol property of
-// the NumberFormatInfo used to format the number.
-//
-// Parsing
-//
-// The Parse methods provided by the numeric classes are all of the form
-//
-//  public static XXX Parse(String s);
-//  public static XXX Parse(String s, int style);
-//  public static XXX Parse(String s, int style, NumberFormatInfo info);
-//
-// where XXX is the name of the particular numeric class. The methods convert a
-// string to a numeric value. The optional style parameter specifies the
-// permitted style of the numeric string. It must be a combination of bit flags
-// from the NumberStyles enumeration. The optional info parameter
-// specifies the NumberFormatInfo instance to use when parsing the
-// string. If the info parameter is null or omitted, the numeric
-// formatting information is obtained from the current culture.
-//
-// Numeric strings produced by the Format methods using the Currency,
-// Decimal, Engineering, Fixed point, General, or Number standard formats
-// (the C, D, E, F, G, and N format specifiers) are guaranteed to be parseable
-// by the Parse methods if the NumberStyles.Any style is
-// specified. Note, however, that the Parse methods do not accept
-// NaNs or Infinities.
-//
-
 using System.Buffers;
+using System.Collections.Generic;
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Globalization;
@@ -445,7 +177,7 @@ internal static ParsingStatus TryParseBigIntegerHexOrBinaryNumberStyle<TParser,
             int leadingBitsCount = value.Length % TParser.DigitsPerBlock;
 
             uint leading = signBits;
-            // First parse unanligned leading block if exists.
+            // First parse unaligned leading block if exists.
             if (leadingBitsCount != 0)
             {
                 if (!TParser.TryParseUnalignedBlock(value[0..leadingBitsCount], out leading))
@@ -484,8 +216,8 @@ internal static ParsingStatus TryParseBigIntegerHexOrBinaryNumberStyle<TParser,
                     // Require to store in _bits.
 
                     // Positive: sign=1, bits=[leading]
-                    // Negative: sign=-1, bits=[leading ^ -1 + 1]=[-leading]
-                    result = new BigInteger((int)signBits | 1, [leading ^ signBits - signBits]);
+                    // Negative: sign=-1, bits=[(leading ^ -1) + 1]=[-leading]
+                    result = new BigInteger((int)signBits | 1, [(leading ^ signBits) - signBits]);
                     return ParsingStatus.OK;
                 }
                 else
@@ -925,44 +657,6 @@ void MultiplyAdd(ref Span<uint> currentBuffer, uint multiplier, uint addValue)
             }
         }
 
-        internal static char ParseFormatSpecifier(ReadOnlySpan<char> format, out int digits)
-        {
-            digits = -1;
-            if (format.Length == 0)
-            {
-                return 'R';
-            }
-
-            int i = 0;
-            char ch = format[i];
-            if (char.IsAsciiLetter(ch))
-            {
-                // The digits value must be >= 0 && <= 999_999_999,
-                // but it can begin with any number of 0s, and thus we may need to check more than 9
-                // digits.  Further, for compat, we need to stop when we hit a null char.
-                i++;
-                int n = 0;
-                while ((uint)i < (uint)format.Length && char.IsAsciiDigit(format[i]))
-                {
-                    // Check if we are about to overflow past our limit of 9 digits
-                    if (n >= 100_000_000)
-                    {
-                        throw new FormatException(SR.Argument_BadFormatSpecifier);
-                    }
-                    n = ((n * 10) + format[i++] - '0');
-                }
-
-                // If we're at the end of the digits rather than having stopped because we hit something
-                // other than a digit or overflowed, return the standard format info.
-                if (i >= format.Length || format[i] == '\0')
-                {
-                    digits = n;
-                    return ch;
-                }
-            }
-            return (char)0; // Custom format
-        }
-
         private static string? FormatBigIntegerToHex(bool targetSpan, BigInteger value, char format, int digits, NumberFormatInfo info, Span<char> destination, out int charsWritten, out bool spanSuccess)
         {
             Debug.Assert(format == 'x' || format == 'X');
@@ -1156,7 +850,7 @@ internal static bool TryFormatBigInteger(BigInteger value, ReadOnlySpan<char> fo
             return spanSuccess;
         }
 
-        private static string? FormatBigInteger(
+        private static unsafe string? FormatBigInteger(
             bool targetSpan, BigInteger value,
             string? formatString, ReadOnlySpan<char> formatSpan,
             NumberFormatInfo info, Span<char> destination, out int charsWritten, out bool spanSuccess)
@@ -1285,23 +979,45 @@ internal static bool TryFormatBigInteger(BigInteger value, ReadOnlySpan<char> fo
             {
                 // sign = true for negative and false for 0 and positive values
                 bool sign = (value._sign < 0);
-                // The cut-off point to switch (G)eneral from (F)ixed-point to (E)xponential form
-                int precision = 29;
                 int scale = cchMax - ichDst;
 
-                var sb = new ValueStringBuilder(stackalloc char[128]); // arbitrary stack cut-off
-                FormatProvider.FormatBigInteger(ref sb, precision, scale, sign, formatSpan, info, rgch, ichDst);
-
-                if (targetSpan)
+                byte[]? buffer = ArrayPool<byte>.Shared.Rent(rgchBufSize + 1);
+                fixed (byte* ptr = buffer) // NumberBuffer expects pinned Digits
                 {
-                    spanSuccess = sb.TryCopyTo(destination, out charsWritten);
-                    return null;
-                }
-                else
-                {
-                    charsWritten = 0;
-                    spanSuccess = false;
-                    return sb.ToString();
+                    scoped NumberBuffer number = new NumberBuffer(NumberBufferKind.Integer, buffer);
+
+                    for (int i = 0; i < rgch.Length - ichDst; i++)
+                        number.Digits[i] = (byte)rgch[ichDst + i];
+                    number.Digits[rgch.Length - ichDst] = 0;
+                    number.DigitsCount = rgch.Length - ichDst - 1; // The cut-off point to switch (G)eneral from (F)ixed-point to (E)xponential form
+                    number.Scale = scale;
+                    number.IsNegative = sign;
+
+                    scoped var vlb = new ValueListBuilder<Utf16Char>(stackalloc Utf16Char[128]); // arbitrary stack cut-off
+
+                    if (fmt != 0)
+                    {
+                        NumberToString(ref vlb, ref number, fmt, digits, info);
+                    }
+                    else
+                    {
+                        NumberToStringFormat(ref vlb, ref number, formatSpan, info);
+                    }
+
+                    if (targetSpan)
+                    {
+                        spanSuccess = vlb.TryCopyTo(MemoryMarshal.Cast<char, Utf16Char>(destination), out charsWritten);
+                        vlb.Dispose();
+                        return null;
+                    }
+                    else
+                    {
+                        charsWritten = 0;
+                        spanSuccess = false;
+                        string result = MemoryMarshal.Cast<Utf16Char, char>(vlb.AsSpan()).ToString();
+                        vlb.Dispose();
+                        return result;
+                    }
                 }
             }
 
diff --git a/src/libraries/System.Runtime.Numerics/src/System/Number.Polyfill.cs b/src/libraries/System.Runtime.Numerics/src/System/Number.Polyfill.cs
index 8db22bd8a2d0..d5de46b5842e 100644
--- a/src/libraries/System.Runtime.Numerics/src/System/Number.Polyfill.cs
+++ b/src/libraries/System.Runtime.Numerics/src/System/Number.Polyfill.cs
@@ -86,6 +86,22 @@ internal static ReadOnlySpan<TChar> CurrencySymbolTChar<TChar>(this NumberFormat
             return MemoryMarshal.Cast<char, TChar>(info.CurrencySymbol);
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static ReadOnlySpan<TChar> PercentSymbolTChar<TChar>(this NumberFormatInfo info)
+            where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(typeof(TChar) == typeof(Utf16Char));
+            return MemoryMarshal.Cast<char, TChar>(info.PercentSymbol);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static ReadOnlySpan<TChar> PerMilleSymbolTChar<TChar>(this NumberFormatInfo info)
+            where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(typeof(TChar) == typeof(Utf16Char));
+            return MemoryMarshal.Cast<char, TChar>(info.PerMilleSymbol);
+        }
+
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static ReadOnlySpan<TChar> CurrencyDecimalSeparatorTChar<TChar>(this NumberFormatInfo info)
             where TChar : unmanaged, IUtfChar<TChar>
@@ -117,5 +133,21 @@ internal static ReadOnlySpan<TChar> NumberGroupSeparatorTChar<TChar>(this Number
             Debug.Assert(typeof(TChar) == typeof(Utf16Char));
             return MemoryMarshal.Cast<char, TChar>(info.NumberGroupSeparator);
         }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static ReadOnlySpan<TChar> PercentDecimalSeparatorTChar<TChar>(this NumberFormatInfo info)
+            where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(typeof(TChar) == typeof(Utf16Char));
+            return MemoryMarshal.Cast<char, TChar>(info.PercentDecimalSeparator);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static ReadOnlySpan<TChar> PercentGroupSeparatorTChar<TChar>(this NumberFormatInfo info)
+            where TChar : unmanaged, IUtfChar<TChar>
+        {
+            Debug.Assert(typeof(TChar) == typeof(Utf16Char));
+            return MemoryMarshal.Cast<char, TChar>(info.PercentGroupSeparator);
+        }
     }
 }
diff --git a/src/libraries/System.Runtime.Numerics/src/System/ThrowHelper.cs b/src/libraries/System.Runtime.Numerics/src/System/ThrowHelper.cs
index 3a73f6e2c946..a81589e2526d 100644
--- a/src/libraries/System.Runtime.Numerics/src/System/ThrowHelper.cs
+++ b/src/libraries/System.Runtime.Numerics/src/System/ThrowHelper.cs
@@ -26,5 +26,11 @@ internal static void ThrowValueArgumentOutOfRange_NeedNonNegNumException()
         {
             throw new ArgumentOutOfRangeException("value", SR.ArgumentOutOfRange_NeedNonNegNum);
         }
+
+        [DoesNotReturn]
+        internal static void ThrowFormatException_BadFormatSpecifier()
+        {
+            throw new FormatException(SR.Argument_BadFormatSpecifier);
+        }
     }
 }
diff --git a/src/libraries/System.Runtime.Numerics/tests/BigInteger/parse.cs b/src/libraries/System.Runtime.Numerics/tests/BigInteger/parse.cs
index 86652ef6550a..9852ea93bf1a 100644
--- a/src/libraries/System.Runtime.Numerics/tests/BigInteger/parse.cs
+++ b/src/libraries/System.Runtime.Numerics/tests/BigInteger/parse.cs
@@ -143,6 +143,18 @@ public void Parse_Hex32Bits()
             Assert.True(BigInteger.TryParse("080000001", NumberStyles.HexNumber, null, out result));
             Assert.Equal(0x80000001u, result);
 
+            Assert.True(BigInteger.TryParse("F0000001", NumberStyles.HexNumber, null, out result));
+            Assert.Equal(-0xFFFFFFFL, result);
+
+            Assert.True(BigInteger.TryParse("0F0000001", NumberStyles.HexNumber, null, out result));
+            Assert.Equal(0xF0000001u, result);
+            
+            Assert.True(BigInteger.TryParse("F00000001", NumberStyles.HexNumber, null, out result));
+            Assert.Equal(-0xFFFFFFFFL, result);
+
+            Assert.True(BigInteger.TryParse("0F00000001", NumberStyles.HexNumber, null, out result));
+            Assert.Equal(0xF00000001u, result);
+
             // Regression test for: https://github.com/dotnet/runtime/issues/74758
             Assert.True(BigInteger.TryParse("FFFFFFFFE", NumberStyles.HexNumber, null, out result));
             Assert.Equal(new BigInteger(-2), result);
@@ -175,6 +187,7 @@ public void Parse_Hex32Bits()
         [InlineData("10000000000000000000000000000000", (long)int.MinValue)]
         [InlineData("010000000000000000000000000000001", 0x080000001L)]
         [InlineData("111111111111111111111111111111110", -2L)]
+        [InlineData("100000000000000000000000000000001", -0xFFFFFFFFL)]
         [InlineData("0111111111111111111111111111111111", 0x1FFFFFFFFL)]
         public void Parse_BinSpecialCases(string input, long expectedValue)
         {
diff --git a/src/libraries/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/Formatters/Binary/BinaryObjectInfo.cs b/src/libraries/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/Formatters/Binary/BinaryObjectInfo.cs
index 3a5476f069eb..10449accd7ce 100644
--- a/src/libraries/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/Formatters/Binary/BinaryObjectInfo.cs
+++ b/src/libraries/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/Formatters/Binary/BinaryObjectInfo.cs
@@ -709,7 +709,7 @@ private int Position(string? name)
                         // A field on the type isn't found. See if the field has OptionalFieldAttribute.  We only throw
                         // when the assembly format is set appropriately.
                         if (!_isSimpleAssembly &&
-                            _cache._memberInfos[i].GetCustomAttribute(typeof(OptionalFieldAttribute), inherit: false) == null)
+                            _cache._memberInfos[i].GetCustomAttribute<OptionalFieldAttribute>(inherit: false) == null)
                         {
                             Debug.Assert(_cache._memberNames != null);
                             throw new SerializationException(SR.Format(SR.Serialization_MissingMember, _cache._memberNames[i], objectType, typeof(OptionalFieldAttribute).FullName));
diff --git a/src/libraries/System.Runtime.Serialization.Schema/tests/System/Runtime/Serialization/Schema/Import/SurrogateTests.cs b/src/libraries/System.Runtime.Serialization.Schema/tests/System/Runtime/Serialization/Schema/Import/SurrogateTests.cs
index 35d64bde333b..0f7f015caa7c 100644
--- a/src/libraries/System.Runtime.Serialization.Schema/tests/System/Runtime/Serialization/Schema/Import/SurrogateTests.cs
+++ b/src/libraries/System.Runtime.Serialization.Schema/tests/System/Runtime/Serialization/Schema/Import/SurrogateTests.cs
@@ -59,7 +59,7 @@ public void DefaultScenario()
             Assert.Contains(@"public partial class SerializableSquare : object, System.Runtime.Serialization.IExtensibleDataObject", code);
 
             Assert.Contains(@"namespace System.Runtime.Serialization.Schema.Tests.DataContracts", code);
-            Assert.Matches(@"\[System.Runtime.Serialization.DataContractAttribute\(Name\s*=\s*""SerializableNode"", Namespace\s*=\s*""http://schemas.datacontract.org/2004/07/System.Runtime.Serialization.Schema.Tests""\s*\+\s*"".DataContracts""\)\]\s*public partial class SerializableNode : object, System.Runtime.Serialization.IExtensibleDataObject", code);
+            Assert.Matches(@"\[System.Runtime.Serialization.DataContractAttribute\(Name\s*=\s*""SerializableNode"", Namespace\s*=\s*\(""http://schemas.datacontract.org/2004/07/System.Runtime.Serialization.Schema.Tests""\s*\+\s*"".DataContracts""\)\)\]\s*public partial class SerializableNode : object, System.Runtime.Serialization.IExtensibleDataObject", code);
             Assert.Matches(@"\[System.Xml.Serialization.XmlSchemaProviderAttribute\(""ExportSchema""\)\]\s*\[System.Xml.Serialization.XmlRootAttribute\(ElementName\s*=\s*""XmlSerializerPersonElement"", Namespace\s*=\s*""""\)\]\s*public partial class XmlSerializerPerson : object, System.Xml.Serialization.IXmlSerializable", code);
         }
 
@@ -92,7 +92,7 @@ public void WithReferencedType()
             Assert.Matches(@"\[System.Runtime.Serialization.DataMemberAttribute\(\)\]\s*public System.Runtime.Serialization.Schema.Tests.DataContracts.SerializableCircle Circle", code);
 
             Assert.Contains(@"namespace System.Runtime.Serialization.Schema.Tests.DataContracts", code);
-            Assert.Matches(@"\[System.Runtime.Serialization.DataContractAttribute\(Name\s*=\s*""SerializableNode"", Namespace\s*=\s*""http://schemas.datacontract.org/2004/07/System.Runtime.Serialization.Schema.Tests""\s*\+\s*"".DataContracts""\)\]\s*public partial class SerializableNode : object, System.Runtime.Serialization.IExtensibleDataObject", code);
+            Assert.Matches(@"\[System.Runtime.Serialization.DataContractAttribute\(Name\s*=\s*""SerializableNode"", Namespace\s*=\s*\(""http://schemas.datacontract.org/2004/07/System.Runtime.Serialization.Schema.Tests""\s*\+\s*"".DataContracts""\)\)\]\s*public partial class SerializableNode : object, System.Runtime.Serialization.IExtensibleDataObject", code);
             Assert.Matches(@"\[System.Xml.Serialization.XmlSchemaProviderAttribute\(""ExportSchema""\)\]\s*\[System.Xml.Serialization.XmlRootAttribute\(ElementName\s*=\s*""XmlSerializerPersonElement"", Namespace\s*=\s*""""\)\]\s*public partial class XmlSerializerPerson : object, System.Xml.Serialization.IXmlSerializable", code);
             Assert.DoesNotContain(@"public partial class SerializableSquare : object, System.Runtime.Serialization.IExtensibleDataObject", code);
         }
diff --git a/src/libraries/System.Runtime.Serialization.Schema/tests/System/Runtime/Serialization/Schema/RoundTripTest.cs b/src/libraries/System.Runtime.Serialization.Schema/tests/System/Runtime/Serialization/Schema/RoundTripTest.cs
index eaad37ff1546..b8fe734b1a55 100644
--- a/src/libraries/System.Runtime.Serialization.Schema/tests/System/Runtime/Serialization/Schema/RoundTripTest.cs
+++ b/src/libraries/System.Runtime.Serialization.Schema/tests/System/Runtime/Serialization/Schema/RoundTripTest.cs
@@ -21,7 +21,6 @@ public RoundTripTest(ITestOutputHelper output)
 
         [Fact]
         [ActiveIssue("https://github.com/dotnet/runtime/issues/73961", typeof(PlatformDetection), nameof(PlatformDetection.IsBuiltWithAggressiveTrimming), nameof(PlatformDetection.IsBrowser))]
-        [ActiveIssue("https://github.com/dotnet/runtime/issues/95981", typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnBrowser))]
         public void RountTripTest()
         {
             // AppContext SetSwitch seems to be unreliable in the unit test case. So let's not rely on it
diff --git a/src/libraries/System.Runtime.Serialization.Xml/tests/DataContractSerializer.cs b/src/libraries/System.Runtime.Serialization.Xml/tests/DataContractSerializer.cs
index 8a7acf080059..bbf437b40207 100644
--- a/src/libraries/System.Runtime.Serialization.Xml/tests/DataContractSerializer.cs
+++ b/src/libraries/System.Runtime.Serialization.Xml/tests/DataContractSerializer.cs
@@ -4475,7 +4475,6 @@ public static void DCS_TypeWithPrimitiveKnownTypes()
         Assert.NotNull(actual);
     }
 
-    [ActiveIssue("https://github.com/dotnet/runtime/issues/1417", TestPlatforms.OSX)]
     [SkipOnPlatform(TestPlatforms.Browser, "Causes a stack overflow")]
     [Fact]
     public static void DCS_DeeplyLinkedData()
diff --git a/src/libraries/System.Runtime/System.Runtime.sln b/src/libraries/System.Runtime/System.Runtime.sln
index 6672fa99e730..8a8c28245564 100644
--- a/src/libraries/System.Runtime/System.Runtime.sln
+++ b/src/libraries/System.Runtime/System.Runtime.sln
@@ -1,4 +1,8 @@
-﻿Microsoft Visual Studio Solution File, Format Version 12.00
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 17
+VisualStudioVersion = 17.9.34414.90
+MinimumVisualStudioVersion = 10.0.40219.1
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.CoreLib", "..\..\coreclr\System.Private.CoreLib\System.Private.CoreLib.csproj", "{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}"
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "StreamConformanceTests", "..\Common\tests\StreamConformanceTests\StreamConformanceTests.csproj", "{F86D6534-1A96-489E-A807-C14E616686D6}"
@@ -225,55 +229,55 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Mono.Linker", "..\..\tools\
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{28140562-A65A-48E9-ABAB-53BA939084F0}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Buffers.Tests", "tests\System.Buffers.Tests", "{A8F66678-BCB4-4F6B-B25A-600FE36C8564}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Buffers.Tests", "System.Buffers.Tests", "{A8F66678-BCB4-4F6B-B25A-600FE36C8564}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Diagnostics.Debug.Tests", "tests\System.Diagnostics.Debug.Tests", "{21065BE2-2867-4CE8-A903-8B740CC64B78}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Diagnostics.Debug.Tests", "System.Diagnostics.Debug.Tests", "{21065BE2-2867-4CE8-A903-8B740CC64B78}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Diagnostics.Tools.Tests", "tests\System.Diagnostics.Tools.Tests", "{23D9BABE-C96D-44F2-A9F6-C26D5AD62AF9}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Diagnostics.Tools.Tests", "System.Diagnostics.Tools.Tests", "{23D9BABE-C96D-44F2-A9F6-C26D5AD62AF9}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Dynamic.Runtime.Tests", "tests\System.Dynamic.Runtime.Tests", "{3DFDB754-40F3-4D77-92DE-C5375D6C7A57}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Dynamic.Runtime.Tests", "System.Dynamic.Runtime.Tests", "{3DFDB754-40F3-4D77-92DE-C5375D6C7A57}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Globalization.Calendars.Tests", "tests\System.Globalization.Calendars.Tests", "{606124FE-6314-43D7-AA19-35334046FF29}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Globalization.Calendars.Tests", "System.Globalization.Calendars.Tests", "{606124FE-6314-43D7-AA19-35334046FF29}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Globalization.Extensions.Tests", "tests\System.Globalization.Extensions.Tests", "{BB82924E-1218-401C-85C2-F455FFA797F4}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Globalization.Extensions.Tests", "System.Globalization.Extensions.Tests", "{BB82924E-1218-401C-85C2-F455FFA797F4}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Globalization.Tests", "tests\System.Globalization.Tests", "{8518F031-3709-45D6-B577-356E32BB5FF7}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Globalization.Tests", "System.Globalization.Tests", "{8518F031-3709-45D6-B577-356E32BB5FF7}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.IO.FileSystem.Primitives.Tests", "tests\System.IO.FileSystem.Primitives.Tests", "{9DAF4ABC-9D97-4B12-9524-02DEEA8BF5B0}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.IO.FileSystem.Primitives.Tests", "System.IO.FileSystem.Primitives.Tests", "{9DAF4ABC-9D97-4B12-9524-02DEEA8BF5B0}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.IO.FileSystem.Tests", "tests\System.IO.FileSystem.Tests", "{242CC51B-8B02-4679-8899-77631E6B6AD0}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.IO.FileSystem.Tests", "System.IO.FileSystem.Tests", "{242CC51B-8B02-4679-8899-77631E6B6AD0}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.IO.Tests", "tests\System.IO.Tests", "{60ED493D-0A65-4052-9B5E-547451CB1E6B}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.IO.Tests", "System.IO.Tests", "{60ED493D-0A65-4052-9B5E-547451CB1E6B}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.IO.UnmanagedMemoryStream.Tests", "tests\System.IO.UnmanagedMemoryStream.Tests", "{34FC303C-B543-4E5B-BDA8-DE9B3C4BFEFA}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.IO.UnmanagedMemoryStream.Tests", "System.IO.UnmanagedMemoryStream.Tests", "{34FC303C-B543-4E5B-BDA8-DE9B3C4BFEFA}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Reflection.Tests", "tests\System.Reflection.Tests", "{0A2BFEE4-98EE-4AB5-8811-BB1B3207D013}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Reflection.Tests", "System.Reflection.Tests", "{0A2BFEE4-98EE-4AB5-8811-BB1B3207D013}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Resources.Reader.Tests", "tests\System.Resources.Reader.Tests", "{AAC70548-7854-4CEE-A06F-C148602C1993}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Resources.Reader.Tests", "System.Resources.Reader.Tests", "{AAC70548-7854-4CEE-A06F-C148602C1993}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Resources.ResourceManager.Tests", "tests\System.Resources.ResourceManager.Tests", "{BBB16624-A02E-443C-AFE2-A4A98F42B59C}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Resources.ResourceManager.Tests", "System.Resources.ResourceManager.Tests", "{BBB16624-A02E-443C-AFE2-A4A98F42B59C}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Runtime.CompilerServices.Unsafe.Tests", "tests\System.Runtime.CompilerServices.Unsafe.Tests", "{E0CBE95A-2098-4848-BB89-87D92AA9FE0D}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Runtime.CompilerServices.Unsafe.Tests", "System.Runtime.CompilerServices.Unsafe.Tests", "{E0CBE95A-2098-4848-BB89-87D92AA9FE0D}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Runtime.Extensions.Tests", "tests\System.Runtime.Extensions.Tests", "{E38C85C0-9609-4CAA-B55B-2B78B0BC70FC}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Runtime.Extensions.Tests", "System.Runtime.Extensions.Tests", "{E38C85C0-9609-4CAA-B55B-2B78B0BC70FC}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Runtime.Handles.Tests", "tests\System.Runtime.Handles.Tests", "{B7E9D0DA-FE45-4CB8-9CE1-D5E52917E3B3}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Runtime.Handles.Tests", "System.Runtime.Handles.Tests", "{B7E9D0DA-FE45-4CB8-9CE1-D5E52917E3B3}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Runtime.InteropServices.RuntimeInformation.Tests", "tests\System.Runtime.InteropServices.RuntimeInformation.Tests", "{AF750AE8-AB38-44E3-BECD-5267DD578957}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Runtime.InteropServices.RuntimeInformation.Tests", "System.Runtime.InteropServices.RuntimeInformation.Tests", "{AF750AE8-AB38-44E3-BECD-5267DD578957}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Runtime.Tests", "tests\System.Runtime.Tests", "{88C12684-74BD-4E2F-8F95-587EC30FE39B}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Runtime.Tests", "System.Runtime.Tests", "{88C12684-74BD-4E2F-8F95-587EC30FE39B}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Security.SecureString.Tests", "tests\System.Security.SecureString.Tests", "{6433F4E3-DFC6-4549-8632-797CB749D880}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Security.SecureString.Tests", "System.Security.SecureString.Tests", "{6433F4E3-DFC6-4549-8632-797CB749D880}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Text.Encoding.Tests", "tests\System.Text.Encoding.Tests", "{5961F488-A1DD-4B28-A610-F3D616ED6766}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Text.Encoding.Tests", "System.Text.Encoding.Tests", "{5961F488-A1DD-4B28-A610-F3D616ED6766}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Threading.Tasks.Extensions.Tests", "tests\System.Threading.Tasks.Extensions.Tests", "{87ED1E88-3518-4BAA-9DEA-3379A48DCFD4}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Threading.Tasks.Extensions.Tests", "System.Threading.Tasks.Extensions.Tests", "{87ED1E88-3518-4BAA-9DEA-3379A48DCFD4}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Threading.Tasks.Tests", "tests\System.Threading.Tasks.Tests", "{2B160F79-3016-4753-A009-00DC97F2730C}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Threading.Tasks.Tests", "System.Threading.Tasks.Tests", "{2B160F79-3016-4753-A009-00DC97F2730C}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Threading.Timer.Tests", "tests\System.Threading.Timer.Tests", "{0527FC3D-2997-4C7B-B63C-306F7A52BEBD}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.Threading.Timer.Tests", "System.Threading.Timer.Tests", "{0527FC3D-2997-4C7B-B63C-306F7A52BEBD}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.ValueTuple.Tests", "tests\System.ValueTuple.Tests", "{DBFE9843-8DB5-46BC-B243-B3A0B98326C9}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "System.ValueTuple.Tests", "System.ValueTuple.Tests", "{DBFE9843-8DB5-46BC-B243-B3A0B98326C9}"
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{FD72C125-C10D-457B-8AFC-6B4E5237AF6A}"
 EndProject
@@ -281,16 +285,21 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "ref", "{5B2B5E7E-A2F
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "gen", "{F362E63A-2B1A-445B-B198-3071D7DDE8CF}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "tools\gen", "{13818769-DC01-4715-9590-E000D03E42A9}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "gen", "{13818769-DC01-4715-9590-E000D03E42A9}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "tools\src", "{04D0E381-5B43-42C0-8E08-FADBFCECB353}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{04D0E381-5B43-42C0-8E08-FADBFCECB353}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "tools\ref", "{F65030D7-DDBD-4D4C-B6E3-D3C0DD7FD569}"
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "ref", "{F65030D7-DDBD-4D4C-B6E3-D3C0DD7FD569}"
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{67DCB1C2-0B95-40B6-ACE8-9812BF57EB19}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Checked|Any CPU = Checked|Any CPU
+		Checked|arm = Checked|arm
+		Checked|arm64 = Checked|arm64
+		Checked|x64 = Checked|x64
+		Checked|x86 = Checked|x86
 		Debug|Any CPU = Debug|Any CPU
 		Debug|arm = Debug|arm
 		Debug|arm64 = Debug|arm64
@@ -301,13 +310,18 @@ Global
 		Release|arm64 = Release|arm64
 		Release|x64 = Release|x64
 		Release|x86 = Release|x86
-		Checked|Any CPU = Checked|Any CPU
-		Checked|arm = Checked|arm
-		Checked|arm64 = Checked|arm64
-		Checked|x64 = Checked|x64
-		Checked|x86 = Checked|x86
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|Any CPU.ActiveCfg = Checked|x64
+		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|Any CPU.Build.0 = Checked|x64
+		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|arm.ActiveCfg = Checked|arm
+		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|arm.Build.0 = Checked|arm
+		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|arm64.ActiveCfg = Checked|arm64
+		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|arm64.Build.0 = Checked|arm64
+		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|x64.ActiveCfg = Checked|x64
+		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|x64.Build.0 = Checked|x64
+		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|x86.ActiveCfg = Checked|x86
+		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|x86.Build.0 = Checked|x86
 		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Debug|Any CPU.ActiveCfg = Debug|x64
 		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Debug|Any CPU.Build.0 = Debug|x64
 		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Debug|arm.ActiveCfg = Debug|arm
@@ -328,16 +342,11 @@ Global
 		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Release|x64.Build.0 = Release|x64
 		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Release|x86.ActiveCfg = Release|x86
 		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Release|x86.Build.0 = Release|x86
-		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|Any CPU.ActiveCfg = Checked|x64
-		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|Any CPU.Build.0 = Checked|x64
-		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|arm.ActiveCfg = Checked|arm
-		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|arm.Build.0 = Checked|arm
-		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|arm64.ActiveCfg = Checked|arm64
-		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|arm64.Build.0 = Checked|arm64
-		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|x64.ActiveCfg = Checked|x64
-		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|x64.Build.0 = Checked|x64
-		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|x86.ActiveCfg = Checked|x86
-		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9}.Checked|x86.Build.0 = Checked|x86
+		{F86D6534-1A96-489E-A807-C14E616686D6}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{F86D6534-1A96-489E-A807-C14E616686D6}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{F86D6534-1A96-489E-A807-C14E616686D6}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{F86D6534-1A96-489E-A807-C14E616686D6}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{F86D6534-1A96-489E-A807-C14E616686D6}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{F86D6534-1A96-489E-A807-C14E616686D6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{F86D6534-1A96-489E-A807-C14E616686D6}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{F86D6534-1A96-489E-A807-C14E616686D6}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -354,11 +363,11 @@ Global
 		{F86D6534-1A96-489E-A807-C14E616686D6}.Release|x64.Build.0 = Release|Any CPU
 		{F86D6534-1A96-489E-A807-C14E616686D6}.Release|x86.ActiveCfg = Release|Any CPU
 		{F86D6534-1A96-489E-A807-C14E616686D6}.Release|x86.Build.0 = Release|Any CPU
-		{F86D6534-1A96-489E-A807-C14E616686D6}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{F86D6534-1A96-489E-A807-C14E616686D6}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{F86D6534-1A96-489E-A807-C14E616686D6}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{F86D6534-1A96-489E-A807-C14E616686D6}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{F86D6534-1A96-489E-A807-C14E616686D6}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -375,11 +384,11 @@ Global
 		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Release|x64.Build.0 = Release|Any CPU
 		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Release|x86.ActiveCfg = Release|Any CPU
 		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Release|x86.Build.0 = Release|Any CPU
-		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -396,11 +405,11 @@ Global
 		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Release|x64.Build.0 = Release|Any CPU
 		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Release|x86.ActiveCfg = Release|Any CPU
 		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Release|x86.Build.0 = Release|Any CPU
-		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{FB17AC52-1633-4845-932B-9218DF895957}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{FB17AC52-1633-4845-932B-9218DF895957}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{FB17AC52-1633-4845-932B-9218DF895957}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{FB17AC52-1633-4845-932B-9218DF895957}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{FB17AC52-1633-4845-932B-9218DF895957}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{FB17AC52-1633-4845-932B-9218DF895957}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{FB17AC52-1633-4845-932B-9218DF895957}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{FB17AC52-1633-4845-932B-9218DF895957}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -417,11 +426,11 @@ Global
 		{FB17AC52-1633-4845-932B-9218DF895957}.Release|x64.Build.0 = Release|Any CPU
 		{FB17AC52-1633-4845-932B-9218DF895957}.Release|x86.ActiveCfg = Release|Any CPU
 		{FB17AC52-1633-4845-932B-9218DF895957}.Release|x86.Build.0 = Release|Any CPU
-		{FB17AC52-1633-4845-932B-9218DF895957}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{FB17AC52-1633-4845-932B-9218DF895957}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{FB17AC52-1633-4845-932B-9218DF895957}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{FB17AC52-1633-4845-932B-9218DF895957}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{FB17AC52-1633-4845-932B-9218DF895957}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -438,11 +447,11 @@ Global
 		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Release|x64.Build.0 = Release|Any CPU
 		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Release|x86.ActiveCfg = Release|Any CPU
 		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Release|x86.Build.0 = Release|Any CPU
-		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{21791340-49C4-4C07-97FD-CAA1B72D3256}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{86CF47B3-D607-4F59-896F-982FEA116086}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{86CF47B3-D607-4F59-896F-982FEA116086}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{86CF47B3-D607-4F59-896F-982FEA116086}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{86CF47B3-D607-4F59-896F-982FEA116086}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{86CF47B3-D607-4F59-896F-982FEA116086}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{86CF47B3-D607-4F59-896F-982FEA116086}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{86CF47B3-D607-4F59-896F-982FEA116086}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{86CF47B3-D607-4F59-896F-982FEA116086}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -459,11 +468,11 @@ Global
 		{86CF47B3-D607-4F59-896F-982FEA116086}.Release|x64.Build.0 = Release|Any CPU
 		{86CF47B3-D607-4F59-896F-982FEA116086}.Release|x86.ActiveCfg = Release|Any CPU
 		{86CF47B3-D607-4F59-896F-982FEA116086}.Release|x86.Build.0 = Release|Any CPU
-		{86CF47B3-D607-4F59-896F-982FEA116086}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{86CF47B3-D607-4F59-896F-982FEA116086}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{86CF47B3-D607-4F59-896F-982FEA116086}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{86CF47B3-D607-4F59-896F-982FEA116086}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{86CF47B3-D607-4F59-896F-982FEA116086}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{484B12B8-F027-4960-BAA9-14D646C80A28}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{484B12B8-F027-4960-BAA9-14D646C80A28}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{484B12B8-F027-4960-BAA9-14D646C80A28}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{484B12B8-F027-4960-BAA9-14D646C80A28}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{484B12B8-F027-4960-BAA9-14D646C80A28}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{484B12B8-F027-4960-BAA9-14D646C80A28}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{484B12B8-F027-4960-BAA9-14D646C80A28}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{484B12B8-F027-4960-BAA9-14D646C80A28}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -480,11 +489,11 @@ Global
 		{484B12B8-F027-4960-BAA9-14D646C80A28}.Release|x64.Build.0 = Release|Any CPU
 		{484B12B8-F027-4960-BAA9-14D646C80A28}.Release|x86.ActiveCfg = Release|Any CPU
 		{484B12B8-F027-4960-BAA9-14D646C80A28}.Release|x86.Build.0 = Release|Any CPU
-		{484B12B8-F027-4960-BAA9-14D646C80A28}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{484B12B8-F027-4960-BAA9-14D646C80A28}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{484B12B8-F027-4960-BAA9-14D646C80A28}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{484B12B8-F027-4960-BAA9-14D646C80A28}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{484B12B8-F027-4960-BAA9-14D646C80A28}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -501,11 +510,11 @@ Global
 		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Release|x64.Build.0 = Release|Any CPU
 		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Release|x86.ActiveCfg = Release|Any CPU
 		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Release|x86.Build.0 = Release|Any CPU
-		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{D16B3A49-5709-44CB-B6F8-8E3D585D236F}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -522,11 +531,11 @@ Global
 		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Release|x64.Build.0 = Release|Any CPU
 		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Release|x86.ActiveCfg = Release|Any CPU
 		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Release|x86.Build.0 = Release|Any CPU
-		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{F0BB4F76-7697-49A8-8204-FD4516EB325C}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{B876CC90-CB87-4B1B-B6F5-247990192578}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{B876CC90-CB87-4B1B-B6F5-247990192578}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{B876CC90-CB87-4B1B-B6F5-247990192578}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{B876CC90-CB87-4B1B-B6F5-247990192578}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{B876CC90-CB87-4B1B-B6F5-247990192578}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{B876CC90-CB87-4B1B-B6F5-247990192578}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{B876CC90-CB87-4B1B-B6F5-247990192578}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{B876CC90-CB87-4B1B-B6F5-247990192578}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -543,11 +552,11 @@ Global
 		{B876CC90-CB87-4B1B-B6F5-247990192578}.Release|x64.Build.0 = Release|Any CPU
 		{B876CC90-CB87-4B1B-B6F5-247990192578}.Release|x86.ActiveCfg = Release|Any CPU
 		{B876CC90-CB87-4B1B-B6F5-247990192578}.Release|x86.Build.0 = Release|Any CPU
-		{B876CC90-CB87-4B1B-B6F5-247990192578}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{B876CC90-CB87-4B1B-B6F5-247990192578}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{B876CC90-CB87-4B1B-B6F5-247990192578}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{B876CC90-CB87-4B1B-B6F5-247990192578}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{B876CC90-CB87-4B1B-B6F5-247990192578}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -564,11 +573,11 @@ Global
 		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Release|x64.Build.0 = Release|Any CPU
 		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Release|x86.ActiveCfg = Release|Any CPU
 		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Release|x86.Build.0 = Release|Any CPU
-		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{999B1A08-2C7F-43AD-BC50-5F950320BBFF}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{019A13D1-3493-4024-8223-FCB6763F80B4}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{019A13D1-3493-4024-8223-FCB6763F80B4}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{019A13D1-3493-4024-8223-FCB6763F80B4}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{019A13D1-3493-4024-8223-FCB6763F80B4}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{019A13D1-3493-4024-8223-FCB6763F80B4}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{019A13D1-3493-4024-8223-FCB6763F80B4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{019A13D1-3493-4024-8223-FCB6763F80B4}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{019A13D1-3493-4024-8223-FCB6763F80B4}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -585,11 +594,11 @@ Global
 		{019A13D1-3493-4024-8223-FCB6763F80B4}.Release|x64.Build.0 = Release|Any CPU
 		{019A13D1-3493-4024-8223-FCB6763F80B4}.Release|x86.ActiveCfg = Release|Any CPU
 		{019A13D1-3493-4024-8223-FCB6763F80B4}.Release|x86.Build.0 = Release|Any CPU
-		{019A13D1-3493-4024-8223-FCB6763F80B4}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{019A13D1-3493-4024-8223-FCB6763F80B4}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{019A13D1-3493-4024-8223-FCB6763F80B4}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{019A13D1-3493-4024-8223-FCB6763F80B4}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{019A13D1-3493-4024-8223-FCB6763F80B4}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -606,11 +615,11 @@ Global
 		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Release|x64.Build.0 = Release|Any CPU
 		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Release|x86.ActiveCfg = Release|Any CPU
 		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Release|x86.Build.0 = Release|Any CPU
-		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{9ECF9E5C-860F-49C3-95D0-501147F19548}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -627,11 +636,11 @@ Global
 		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Release|x64.Build.0 = Release|Any CPU
 		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Release|x86.ActiveCfg = Release|Any CPU
 		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Release|x86.Build.0 = Release|Any CPU
-		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{8F97C1DE-07F7-449F-AA22-84A6D6836D82}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -648,11 +657,11 @@ Global
 		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Release|x64.Build.0 = Release|Any CPU
 		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Release|x86.ActiveCfg = Release|Any CPU
 		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Release|x86.Build.0 = Release|Any CPU
-		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -669,11 +678,11 @@ Global
 		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Release|x64.Build.0 = Release|Any CPU
 		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Release|x86.ActiveCfg = Release|Any CPU
 		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Release|x86.Build.0 = Release|Any CPU
-		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -690,11 +699,11 @@ Global
 		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Release|x64.Build.0 = Release|Any CPU
 		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Release|x86.ActiveCfg = Release|Any CPU
 		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Release|x86.Build.0 = Release|Any CPU
-		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{E64D31D0-8F38-4FDF-B60D-F955D2475566}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -711,11 +720,11 @@ Global
 		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Release|x64.Build.0 = Release|Any CPU
 		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Release|x86.ActiveCfg = Release|Any CPU
 		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Release|x86.Build.0 = Release|Any CPU
-		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -732,11 +741,11 @@ Global
 		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Release|x64.Build.0 = Release|Any CPU
 		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Release|x86.ActiveCfg = Release|Any CPU
 		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Release|x86.Build.0 = Release|Any CPU
-		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{4367BB9C-7EC2-4238-82E2-643DE24CC23E}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{F977B04F-675C-4B78-8FCE-19D70504166D}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{F977B04F-675C-4B78-8FCE-19D70504166D}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{F977B04F-675C-4B78-8FCE-19D70504166D}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{F977B04F-675C-4B78-8FCE-19D70504166D}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{F977B04F-675C-4B78-8FCE-19D70504166D}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{F977B04F-675C-4B78-8FCE-19D70504166D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{F977B04F-675C-4B78-8FCE-19D70504166D}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{F977B04F-675C-4B78-8FCE-19D70504166D}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -753,11 +762,11 @@ Global
 		{F977B04F-675C-4B78-8FCE-19D70504166D}.Release|x64.Build.0 = Release|Any CPU
 		{F977B04F-675C-4B78-8FCE-19D70504166D}.Release|x86.ActiveCfg = Release|Any CPU
 		{F977B04F-675C-4B78-8FCE-19D70504166D}.Release|x86.Build.0 = Release|Any CPU
-		{F977B04F-675C-4B78-8FCE-19D70504166D}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{F977B04F-675C-4B78-8FCE-19D70504166D}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{F977B04F-675C-4B78-8FCE-19D70504166D}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{F977B04F-675C-4B78-8FCE-19D70504166D}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{F977B04F-675C-4B78-8FCE-19D70504166D}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -774,11 +783,11 @@ Global
 		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Release|x64.Build.0 = Release|Any CPU
 		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Release|x86.ActiveCfg = Release|Any CPU
 		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Release|x86.Build.0 = Release|Any CPU
-		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{379BC6E6-1900-44F8-8D8C-AA2968A70008}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -795,11 +804,11 @@ Global
 		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Release|x64.Build.0 = Release|Any CPU
 		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Release|x86.ActiveCfg = Release|Any CPU
 		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Release|x86.Build.0 = Release|Any CPU
-		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -816,11 +825,11 @@ Global
 		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Release|x64.Build.0 = Release|Any CPU
 		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Release|x86.ActiveCfg = Release|Any CPU
 		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Release|x86.Build.0 = Release|Any CPU
-		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -837,11 +846,11 @@ Global
 		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Release|x64.Build.0 = Release|Any CPU
 		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Release|x86.ActiveCfg = Release|Any CPU
 		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Release|x86.Build.0 = Release|Any CPU
-		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -858,11 +867,11 @@ Global
 		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Release|x64.Build.0 = Release|Any CPU
 		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Release|x86.ActiveCfg = Release|Any CPU
 		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Release|x86.Build.0 = Release|Any CPU
-		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{A83A8520-F5E2-49B4-83BC-0F82A412951D}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -879,11 +888,11 @@ Global
 		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Release|x64.Build.0 = Release|Any CPU
 		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Release|x86.ActiveCfg = Release|Any CPU
 		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Release|x86.Build.0 = Release|Any CPU
-		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{A3873DDB-47E7-4DB6-872C-4B46A779913A}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -900,11 +909,11 @@ Global
 		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Release|x64.Build.0 = Release|Any CPU
 		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Release|x86.ActiveCfg = Release|Any CPU
 		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Release|x86.Build.0 = Release|Any CPU
-		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{23D41678-453F-4F2A-85F1-167E63DA6D67}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{6790611D-ACE0-47C6-83E0-E404364B5210}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{6790611D-ACE0-47C6-83E0-E404364B5210}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{6790611D-ACE0-47C6-83E0-E404364B5210}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{6790611D-ACE0-47C6-83E0-E404364B5210}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{6790611D-ACE0-47C6-83E0-E404364B5210}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{6790611D-ACE0-47C6-83E0-E404364B5210}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{6790611D-ACE0-47C6-83E0-E404364B5210}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{6790611D-ACE0-47C6-83E0-E404364B5210}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -921,11 +930,11 @@ Global
 		{6790611D-ACE0-47C6-83E0-E404364B5210}.Release|x64.Build.0 = Release|Any CPU
 		{6790611D-ACE0-47C6-83E0-E404364B5210}.Release|x86.ActiveCfg = Release|Any CPU
 		{6790611D-ACE0-47C6-83E0-E404364B5210}.Release|x86.Build.0 = Release|Any CPU
-		{6790611D-ACE0-47C6-83E0-E404364B5210}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{6790611D-ACE0-47C6-83E0-E404364B5210}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{6790611D-ACE0-47C6-83E0-E404364B5210}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{6790611D-ACE0-47C6-83E0-E404364B5210}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{6790611D-ACE0-47C6-83E0-E404364B5210}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -942,11 +951,11 @@ Global
 		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Release|x64.Build.0 = Release|Any CPU
 		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Release|x86.ActiveCfg = Release|Any CPU
 		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Release|x86.Build.0 = Release|Any CPU
-		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -963,11 +972,11 @@ Global
 		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Release|x64.Build.0 = Release|Any CPU
 		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Release|x86.ActiveCfg = Release|Any CPU
 		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Release|x86.Build.0 = Release|Any CPU
-		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{B73090B8-20CB-4586-A586-B7F37C1A06FF}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -984,11 +993,11 @@ Global
 		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Release|x64.Build.0 = Release|Any CPU
 		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Release|x86.ActiveCfg = Release|Any CPU
 		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Release|x86.Build.0 = Release|Any CPU
-		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{4C1F2761-857C-40A4-8CDD-7139380DA4D7}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1005,11 +1014,11 @@ Global
 		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Release|x64.Build.0 = Release|Any CPU
 		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Release|x86.ActiveCfg = Release|Any CPU
 		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Release|x86.Build.0 = Release|Any CPU
-		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{70441C80-1F14-42F9-8225-A891E3C9A82A}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1026,11 +1035,11 @@ Global
 		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Release|x64.Build.0 = Release|Any CPU
 		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Release|x86.ActiveCfg = Release|Any CPU
 		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Release|x86.Build.0 = Release|Any CPU
-		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1047,11 +1056,11 @@ Global
 		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Release|x64.Build.0 = Release|Any CPU
 		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Release|x86.ActiveCfg = Release|Any CPU
 		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Release|x86.Build.0 = Release|Any CPU
-		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{1C44735B-C77E-479A-ABBB-8B6EB83299CF}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1068,11 +1077,11 @@ Global
 		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Release|x64.Build.0 = Release|Any CPU
 		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Release|x86.ActiveCfg = Release|Any CPU
 		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Release|x86.Build.0 = Release|Any CPU
-		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1089,11 +1098,11 @@ Global
 		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Release|x64.Build.0 = Release|Any CPU
 		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Release|x86.ActiveCfg = Release|Any CPU
 		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Release|x86.Build.0 = Release|Any CPU
-		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{C5F86889-E147-4424-9165-D2DF453741F2}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{C5F86889-E147-4424-9165-D2DF453741F2}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{C5F86889-E147-4424-9165-D2DF453741F2}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{C5F86889-E147-4424-9165-D2DF453741F2}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{C5F86889-E147-4424-9165-D2DF453741F2}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{C5F86889-E147-4424-9165-D2DF453741F2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{C5F86889-E147-4424-9165-D2DF453741F2}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{C5F86889-E147-4424-9165-D2DF453741F2}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1110,11 +1119,11 @@ Global
 		{C5F86889-E147-4424-9165-D2DF453741F2}.Release|x64.Build.0 = Release|Any CPU
 		{C5F86889-E147-4424-9165-D2DF453741F2}.Release|x86.ActiveCfg = Release|Any CPU
 		{C5F86889-E147-4424-9165-D2DF453741F2}.Release|x86.Build.0 = Release|Any CPU
-		{C5F86889-E147-4424-9165-D2DF453741F2}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{C5F86889-E147-4424-9165-D2DF453741F2}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{C5F86889-E147-4424-9165-D2DF453741F2}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{C5F86889-E147-4424-9165-D2DF453741F2}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{C5F86889-E147-4424-9165-D2DF453741F2}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1131,11 +1140,11 @@ Global
 		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Release|x64.Build.0 = Release|Any CPU
 		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Release|x86.ActiveCfg = Release|Any CPU
 		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Release|x86.Build.0 = Release|Any CPU
-		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1152,11 +1161,11 @@ Global
 		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Release|x64.Build.0 = Release|Any CPU
 		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Release|x86.ActiveCfg = Release|Any CPU
 		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Release|x86.Build.0 = Release|Any CPU
-		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1173,11 +1182,11 @@ Global
 		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Release|x64.Build.0 = Release|Any CPU
 		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Release|x86.ActiveCfg = Release|Any CPU
 		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Release|x86.Build.0 = Release|Any CPU
-		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{32247916-74DE-4A62-AE68-04976D2B0149}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{32247916-74DE-4A62-AE68-04976D2B0149}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{32247916-74DE-4A62-AE68-04976D2B0149}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{32247916-74DE-4A62-AE68-04976D2B0149}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{32247916-74DE-4A62-AE68-04976D2B0149}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{32247916-74DE-4A62-AE68-04976D2B0149}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{32247916-74DE-4A62-AE68-04976D2B0149}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{32247916-74DE-4A62-AE68-04976D2B0149}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1194,11 +1203,11 @@ Global
 		{32247916-74DE-4A62-AE68-04976D2B0149}.Release|x64.Build.0 = Release|Any CPU
 		{32247916-74DE-4A62-AE68-04976D2B0149}.Release|x86.ActiveCfg = Release|Any CPU
 		{32247916-74DE-4A62-AE68-04976D2B0149}.Release|x86.Build.0 = Release|Any CPU
-		{32247916-74DE-4A62-AE68-04976D2B0149}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{32247916-74DE-4A62-AE68-04976D2B0149}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{32247916-74DE-4A62-AE68-04976D2B0149}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{32247916-74DE-4A62-AE68-04976D2B0149}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{32247916-74DE-4A62-AE68-04976D2B0149}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{5090E2BE-4BC9-4027-BF67-452049996F43}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{5090E2BE-4BC9-4027-BF67-452049996F43}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{5090E2BE-4BC9-4027-BF67-452049996F43}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{5090E2BE-4BC9-4027-BF67-452049996F43}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{5090E2BE-4BC9-4027-BF67-452049996F43}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{5090E2BE-4BC9-4027-BF67-452049996F43}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{5090E2BE-4BC9-4027-BF67-452049996F43}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{5090E2BE-4BC9-4027-BF67-452049996F43}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1215,11 +1224,11 @@ Global
 		{5090E2BE-4BC9-4027-BF67-452049996F43}.Release|x64.Build.0 = Release|Any CPU
 		{5090E2BE-4BC9-4027-BF67-452049996F43}.Release|x86.ActiveCfg = Release|Any CPU
 		{5090E2BE-4BC9-4027-BF67-452049996F43}.Release|x86.Build.0 = Release|Any CPU
-		{5090E2BE-4BC9-4027-BF67-452049996F43}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{5090E2BE-4BC9-4027-BF67-452049996F43}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{5090E2BE-4BC9-4027-BF67-452049996F43}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{5090E2BE-4BC9-4027-BF67-452049996F43}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{5090E2BE-4BC9-4027-BF67-452049996F43}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1236,11 +1245,11 @@ Global
 		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Release|x64.Build.0 = Release|Any CPU
 		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Release|x86.ActiveCfg = Release|Any CPU
 		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Release|x86.Build.0 = Release|Any CPU
-		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{61164A2A-D90F-4122-AF5D-9704564E80E0}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1257,11 +1266,11 @@ Global
 		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Release|x64.Build.0 = Release|Any CPU
 		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Release|x86.ActiveCfg = Release|Any CPU
 		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Release|x86.Build.0 = Release|Any CPU
-		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1278,11 +1287,11 @@ Global
 		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Release|x64.Build.0 = Release|Any CPU
 		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Release|x86.ActiveCfg = Release|Any CPU
 		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Release|x86.Build.0 = Release|Any CPU
-		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{652F0921-C134-4882-A9D0-0CBB2F8D75B2}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1299,11 +1308,11 @@ Global
 		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Release|x64.Build.0 = Release|Any CPU
 		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Release|x86.ActiveCfg = Release|Any CPU
 		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Release|x86.Build.0 = Release|Any CPU
-		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1320,11 +1329,11 @@ Global
 		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Release|x64.Build.0 = Release|Any CPU
 		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Release|x86.ActiveCfg = Release|Any CPU
 		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Release|x86.Build.0 = Release|Any CPU
-		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1341,13 +1350,13 @@ Global
 		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Release|x64.Build.0 = Release|Any CPU
 		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Release|x86.ActiveCfg = Release|Any CPU
 		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Release|x86.Build.0 = Release|Any CPU
-		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B}.Checked|x86.ActiveCfg = Debug|Any CPU
-		{E73952E5-C929-4566-962A-B9AF65289871}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{E73952E5-C929-4566-962A-B9AF65289871}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{E73952E5-C929-4566-962A-B9AF65289871}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{E73952E5-C929-4566-962A-B9AF65289871}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{E73952E5-C929-4566-962A-B9AF65289871}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{E73952E5-C929-4566-962A-B9AF65289871}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{E73952E5-C929-4566-962A-B9AF65289871}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{E73952E5-C929-4566-962A-B9AF65289871}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{E73952E5-C929-4566-962A-B9AF65289871}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{E73952E5-C929-4566-962A-B9AF65289871}.Debug|arm.ActiveCfg = Debug|Any CPU
 		{E73952E5-C929-4566-962A-B9AF65289871}.Debug|arm64.ActiveCfg = Debug|Any CPU
 		{E73952E5-C929-4566-962A-B9AF65289871}.Debug|x64.ActiveCfg = Debug|Any CPU
@@ -1362,11 +1371,11 @@ Global
 		{E73952E5-C929-4566-962A-B9AF65289871}.Release|x64.Build.0 = Release|Any CPU
 		{E73952E5-C929-4566-962A-B9AF65289871}.Release|x86.ActiveCfg = Release|Any CPU
 		{E73952E5-C929-4566-962A-B9AF65289871}.Release|x86.Build.0 = Release|Any CPU
-		{E73952E5-C929-4566-962A-B9AF65289871}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{E73952E5-C929-4566-962A-B9AF65289871}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{E73952E5-C929-4566-962A-B9AF65289871}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{E73952E5-C929-4566-962A-B9AF65289871}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{E73952E5-C929-4566-962A-B9AF65289871}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1383,11 +1392,11 @@ Global
 		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Release|x64.Build.0 = Release|Any CPU
 		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Release|x86.ActiveCfg = Release|Any CPU
 		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Release|x86.Build.0 = Release|Any CPU
-		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{A6D86695-D570-43F9-99A3-6C7445362D53}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{A6D86695-D570-43F9-99A3-6C7445362D53}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{A6D86695-D570-43F9-99A3-6C7445362D53}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{A6D86695-D570-43F9-99A3-6C7445362D53}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{A6D86695-D570-43F9-99A3-6C7445362D53}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{A6D86695-D570-43F9-99A3-6C7445362D53}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{A6D86695-D570-43F9-99A3-6C7445362D53}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{A6D86695-D570-43F9-99A3-6C7445362D53}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1404,11 +1413,11 @@ Global
 		{A6D86695-D570-43F9-99A3-6C7445362D53}.Release|x64.Build.0 = Release|Any CPU
 		{A6D86695-D570-43F9-99A3-6C7445362D53}.Release|x86.ActiveCfg = Release|Any CPU
 		{A6D86695-D570-43F9-99A3-6C7445362D53}.Release|x86.Build.0 = Release|Any CPU
-		{A6D86695-D570-43F9-99A3-6C7445362D53}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{A6D86695-D570-43F9-99A3-6C7445362D53}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{A6D86695-D570-43F9-99A3-6C7445362D53}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{A6D86695-D570-43F9-99A3-6C7445362D53}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{A6D86695-D570-43F9-99A3-6C7445362D53}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1425,11 +1434,11 @@ Global
 		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Release|x64.Build.0 = Release|Any CPU
 		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Release|x86.ActiveCfg = Release|Any CPU
 		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Release|x86.Build.0 = Release|Any CPU
-		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{852EA6A6-CED9-467C-9B58-9983ADBEA89A}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1446,11 +1455,11 @@ Global
 		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Release|x64.Build.0 = Release|Any CPU
 		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Release|x86.ActiveCfg = Release|Any CPU
 		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Release|x86.Build.0 = Release|Any CPU
-		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{259CC89C-F5E0-4CF0-92FA-3075E0142589}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1467,11 +1476,11 @@ Global
 		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Release|x64.Build.0 = Release|Any CPU
 		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Release|x86.ActiveCfg = Release|Any CPU
 		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Release|x86.Build.0 = Release|Any CPU
-		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{E884B43D-FD9D-41C6-9C1D-5F49C472032D}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1488,11 +1497,11 @@ Global
 		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Release|x64.Build.0 = Release|Any CPU
 		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Release|x86.ActiveCfg = Release|Any CPU
 		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Release|x86.Build.0 = Release|Any CPU
-		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{8C5E25F3-C1ED-44DC-9019-D60E5CD24BDB}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1509,11 +1518,11 @@ Global
 		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Release|x64.Build.0 = Release|Any CPU
 		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Release|x86.ActiveCfg = Release|Any CPU
 		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Release|x86.Build.0 = Release|Any CPU
-		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{0B62ADE9-B3E6-4727-8F35-6C7A46B0DB1C}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1530,11 +1539,11 @@ Global
 		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Release|x64.Build.0 = Release|Any CPU
 		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Release|x86.ActiveCfg = Release|Any CPU
 		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Release|x86.Build.0 = Release|Any CPU
-		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{9DE2AE90-EB4B-45D6-84AD-A4C4FAEF5658}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1551,11 +1560,11 @@ Global
 		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Release|x64.Build.0 = Release|Any CPU
 		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Release|x86.ActiveCfg = Release|Any CPU
 		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Release|x86.Build.0 = Release|Any CPU
-		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{A5260207-E621-4792-A09D-9DF5DA16FFE6}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1572,11 +1581,11 @@ Global
 		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Release|x64.Build.0 = Release|Any CPU
 		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Release|x86.ActiveCfg = Release|Any CPU
 		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Release|x86.Build.0 = Release|Any CPU
-		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{55C65AC8-0FC0-4A3B-B342-61D4686ABB9A}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1593,11 +1602,11 @@ Global
 		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Release|x64.Build.0 = Release|Any CPU
 		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Release|x86.ActiveCfg = Release|Any CPU
 		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Release|x86.Build.0 = Release|Any CPU
-		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{D592CC73-099B-499C-80E6-FFBE5E4FA14A}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1614,11 +1623,11 @@ Global
 		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Release|x64.Build.0 = Release|Any CPU
 		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Release|x86.ActiveCfg = Release|Any CPU
 		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Release|x86.Build.0 = Release|Any CPU
-		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{3E97D9E1-4C2B-4FA2-98F7-977B593F8DB5}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1635,11 +1644,11 @@ Global
 		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Release|x64.Build.0 = Release|Any CPU
 		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Release|x86.ActiveCfg = Release|Any CPU
 		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Release|x86.Build.0 = Release|Any CPU
-		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{673597FC-FDC9-46A8-B503-D670FC9BD22E}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1656,11 +1665,11 @@ Global
 		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Release|x64.Build.0 = Release|Any CPU
 		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Release|x86.ActiveCfg = Release|Any CPU
 		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Release|x86.Build.0 = Release|Any CPU
-		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{F17096B7-5C4E-4FE0-BD5A-0180C0D34B6A}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1677,11 +1686,11 @@ Global
 		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Release|x64.Build.0 = Release|Any CPU
 		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Release|x86.ActiveCfg = Release|Any CPU
 		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Release|x86.Build.0 = Release|Any CPU
-		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{CC8D4A15-9101-4041-B992-B6AA4D5F3C64}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1698,11 +1707,11 @@ Global
 		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Release|x64.Build.0 = Release|Any CPU
 		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Release|x86.ActiveCfg = Release|Any CPU
 		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Release|x86.Build.0 = Release|Any CPU
-		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{049319F0-D438-404C-A6D4-4D1E99DAE647}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{691D460F-764F-48E7-9A3F-7D1A32388542}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{691D460F-764F-48E7-9A3F-7D1A32388542}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{691D460F-764F-48E7-9A3F-7D1A32388542}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{691D460F-764F-48E7-9A3F-7D1A32388542}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{691D460F-764F-48E7-9A3F-7D1A32388542}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{691D460F-764F-48E7-9A3F-7D1A32388542}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{691D460F-764F-48E7-9A3F-7D1A32388542}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{691D460F-764F-48E7-9A3F-7D1A32388542}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1719,11 +1728,11 @@ Global
 		{691D460F-764F-48E7-9A3F-7D1A32388542}.Release|x64.Build.0 = Release|Any CPU
 		{691D460F-764F-48E7-9A3F-7D1A32388542}.Release|x86.ActiveCfg = Release|Any CPU
 		{691D460F-764F-48E7-9A3F-7D1A32388542}.Release|x86.Build.0 = Release|Any CPU
-		{691D460F-764F-48E7-9A3F-7D1A32388542}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{691D460F-764F-48E7-9A3F-7D1A32388542}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{691D460F-764F-48E7-9A3F-7D1A32388542}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{691D460F-764F-48E7-9A3F-7D1A32388542}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{691D460F-764F-48E7-9A3F-7D1A32388542}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1740,11 +1749,11 @@ Global
 		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Release|x64.Build.0 = Release|Any CPU
 		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Release|x86.ActiveCfg = Release|Any CPU
 		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Release|x86.Build.0 = Release|Any CPU
-		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1761,11 +1770,11 @@ Global
 		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Release|x64.Build.0 = Release|Any CPU
 		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Release|x86.ActiveCfg = Release|Any CPU
 		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Release|x86.Build.0 = Release|Any CPU
-		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1782,11 +1791,11 @@ Global
 		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Release|x64.Build.0 = Release|Any CPU
 		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Release|x86.ActiveCfg = Release|Any CPU
 		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Release|x86.Build.0 = Release|Any CPU
-		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1803,11 +1812,11 @@ Global
 		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Release|x64.Build.0 = Release|Any CPU
 		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Release|x86.ActiveCfg = Release|Any CPU
 		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Release|x86.Build.0 = Release|Any CPU
-		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1824,11 +1833,11 @@ Global
 		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Release|x64.Build.0 = Release|Any CPU
 		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Release|x86.ActiveCfg = Release|Any CPU
 		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Release|x86.Build.0 = Release|Any CPU
-		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1845,11 +1854,11 @@ Global
 		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Release|x64.Build.0 = Release|Any CPU
 		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Release|x86.ActiveCfg = Release|Any CPU
 		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Release|x86.Build.0 = Release|Any CPU
-		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{E1847313-0072-49CA-A1E6-6C05CECAB77A}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1866,15 +1875,15 @@ Global
 		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Release|x64.Build.0 = Release|Any CPU
 		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Release|x86.ActiveCfg = Release|Any CPU
 		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Release|x86.Build.0 = Release|Any CPU
-		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{D86CC877-79A0-4AFA-9A76-7263B414614D}.Checked|x86.ActiveCfg = Debug|Any CPU
-		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Debug|arm.ActiveCfg = Debug|Any CPU
-		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Debug|arm64.ActiveCfg = Debug|Any CPU
+		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Debug|arm.ActiveCfg = Debug|Any CPU
+		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Debug|arm64.ActiveCfg = Debug|Any CPU
 		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Debug|x64.ActiveCfg = Debug|Any CPU
 		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Debug|x64.Build.0 = Debug|Any CPU
 		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Debug|x86.ActiveCfg = Debug|Any CPU
@@ -1887,11 +1896,11 @@ Global
 		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Release|x64.Build.0 = Release|Any CPU
 		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Release|x86.ActiveCfg = Release|Any CPU
 		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Release|x86.Build.0 = Release|Any CPU
-		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1908,11 +1917,11 @@ Global
 		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Release|x64.Build.0 = Release|Any CPU
 		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Release|x86.ActiveCfg = Release|Any CPU
 		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Release|x86.Build.0 = Release|Any CPU
-		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{59CD73F2-1310-46EE-B99A-594859FD8A37}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1929,11 +1938,11 @@ Global
 		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Release|x64.Build.0 = Release|Any CPU
 		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Release|x86.ActiveCfg = Release|Any CPU
 		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Release|x86.Build.0 = Release|Any CPU
-		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{3D58505D-F17F-49E0-9131-42F273E3F5B9}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1950,11 +1959,11 @@ Global
 		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Release|x64.Build.0 = Release|Any CPU
 		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Release|x86.ActiveCfg = Release|Any CPU
 		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Release|x86.Build.0 = Release|Any CPU
-		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{0BF9F165-888D-486A-B6FD-6F3029913D70}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1971,11 +1980,11 @@ Global
 		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Release|x64.Build.0 = Release|Any CPU
 		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Release|x86.ActiveCfg = Release|Any CPU
 		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Release|x86.Build.0 = Release|Any CPU
-		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{C485C170-2B88-4EA9-8826-7BC4C9BA2324}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -1992,11 +2001,11 @@ Global
 		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Release|x64.Build.0 = Release|Any CPU
 		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Release|x86.ActiveCfg = Release|Any CPU
 		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Release|x86.Build.0 = Release|Any CPU
-		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2013,11 +2022,11 @@ Global
 		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Release|x64.Build.0 = Release|Any CPU
 		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Release|x86.ActiveCfg = Release|Any CPU
 		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Release|x86.Build.0 = Release|Any CPU
-		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2034,11 +2043,11 @@ Global
 		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Release|x64.Build.0 = Release|Any CPU
 		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Release|x86.ActiveCfg = Release|Any CPU
 		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Release|x86.Build.0 = Release|Any CPU
-		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{4EE36055-AD7C-4779-B3F6-08687960DCC3}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2055,11 +2064,11 @@ Global
 		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Release|x64.Build.0 = Release|Any CPU
 		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Release|x86.ActiveCfg = Release|Any CPU
 		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Release|x86.Build.0 = Release|Any CPU
-		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{C3F25EEF-04B4-407A-960B-0C1CE9C04430}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2076,11 +2085,11 @@ Global
 		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Release|x64.Build.0 = Release|Any CPU
 		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Release|x86.ActiveCfg = Release|Any CPU
 		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Release|x86.Build.0 = Release|Any CPU
-		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{47E26787-7C27-4572-AD8B-868DE44E2C48}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2097,11 +2106,11 @@ Global
 		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Release|x64.Build.0 = Release|Any CPU
 		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Release|x86.ActiveCfg = Release|Any CPU
 		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Release|x86.Build.0 = Release|Any CPU
-		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{C230AC88-A377-4BEB-824F-AB174C14DC86}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2118,11 +2127,11 @@ Global
 		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Release|x64.Build.0 = Release|Any CPU
 		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Release|x86.ActiveCfg = Release|Any CPU
 		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Release|x86.Build.0 = Release|Any CPU
-		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2139,11 +2148,11 @@ Global
 		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Release|x64.Build.0 = Release|Any CPU
 		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Release|x86.ActiveCfg = Release|Any CPU
 		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Release|x86.Build.0 = Release|Any CPU
-		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2160,11 +2169,11 @@ Global
 		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Release|x64.Build.0 = Release|Any CPU
 		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Release|x86.ActiveCfg = Release|Any CPU
 		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Release|x86.Build.0 = Release|Any CPU
-		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2181,11 +2190,11 @@ Global
 		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Release|x64.Build.0 = Release|Any CPU
 		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Release|x86.ActiveCfg = Release|Any CPU
 		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Release|x86.Build.0 = Release|Any CPU
-		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{697C63A2-2517-4F85-8B88-C94E538BE407}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{697C63A2-2517-4F85-8B88-C94E538BE407}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{697C63A2-2517-4F85-8B88-C94E538BE407}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{697C63A2-2517-4F85-8B88-C94E538BE407}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{697C63A2-2517-4F85-8B88-C94E538BE407}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{697C63A2-2517-4F85-8B88-C94E538BE407}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{697C63A2-2517-4F85-8B88-C94E538BE407}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{697C63A2-2517-4F85-8B88-C94E538BE407}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2202,11 +2211,11 @@ Global
 		{697C63A2-2517-4F85-8B88-C94E538BE407}.Release|x64.Build.0 = Release|Any CPU
 		{697C63A2-2517-4F85-8B88-C94E538BE407}.Release|x86.ActiveCfg = Release|Any CPU
 		{697C63A2-2517-4F85-8B88-C94E538BE407}.Release|x86.Build.0 = Release|Any CPU
-		{697C63A2-2517-4F85-8B88-C94E538BE407}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{697C63A2-2517-4F85-8B88-C94E538BE407}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{697C63A2-2517-4F85-8B88-C94E538BE407}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{697C63A2-2517-4F85-8B88-C94E538BE407}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{697C63A2-2517-4F85-8B88-C94E538BE407}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2223,11 +2232,11 @@ Global
 		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Release|x64.Build.0 = Release|Any CPU
 		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Release|x86.ActiveCfg = Release|Any CPU
 		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Release|x86.Build.0 = Release|Any CPU
-		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2244,11 +2253,11 @@ Global
 		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Release|x64.Build.0 = Release|Any CPU
 		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Release|x86.ActiveCfg = Release|Any CPU
 		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Release|x86.Build.0 = Release|Any CPU
-		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{172F6EB9-6001-4657-8AE2-83DB23B371CA}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2265,11 +2274,11 @@ Global
 		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Release|x64.Build.0 = Release|Any CPU
 		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Release|x86.ActiveCfg = Release|Any CPU
 		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Release|x86.Build.0 = Release|Any CPU
-		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2286,11 +2295,11 @@ Global
 		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Release|x64.Build.0 = Release|Any CPU
 		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Release|x86.ActiveCfg = Release|Any CPU
 		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Release|x86.Build.0 = Release|Any CPU
-		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{BBC59E42-DC0B-4847-B336-13ACF4279F17}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2307,11 +2316,11 @@ Global
 		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Release|x64.Build.0 = Release|Any CPU
 		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Release|x86.ActiveCfg = Release|Any CPU
 		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Release|x86.Build.0 = Release|Any CPU
-		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{78C45C87-93B6-4FCE-B174-520756DE4E74}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2328,11 +2337,11 @@ Global
 		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Release|x64.Build.0 = Release|Any CPU
 		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Release|x86.ActiveCfg = Release|Any CPU
 		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Release|x86.Build.0 = Release|Any CPU
-		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{07197CBF-7C41-47B6-9E52-88A6D4485219}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2349,11 +2358,11 @@ Global
 		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Release|x64.Build.0 = Release|Any CPU
 		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Release|x86.ActiveCfg = Release|Any CPU
 		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Release|x86.Build.0 = Release|Any CPU
-		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2370,11 +2379,11 @@ Global
 		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Release|x64.Build.0 = Release|Any CPU
 		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Release|x86.ActiveCfg = Release|Any CPU
 		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Release|x86.Build.0 = Release|Any CPU
-		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{F6A8185B-07C6-401D-9B40-3C560239E05F}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2391,11 +2400,11 @@ Global
 		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Release|x64.Build.0 = Release|Any CPU
 		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Release|x86.ActiveCfg = Release|Any CPU
 		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Release|x86.Build.0 = Release|Any CPU
-		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2412,11 +2421,11 @@ Global
 		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Release|x64.Build.0 = Release|Any CPU
 		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Release|x86.ActiveCfg = Release|Any CPU
 		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Release|x86.Build.0 = Release|Any CPU
-		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{12E1EFEA-60DE-41D7-B148-AB0182594C1B}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2433,11 +2442,11 @@ Global
 		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Release|x64.Build.0 = Release|Any CPU
 		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Release|x86.ActiveCfg = Release|Any CPU
 		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Release|x86.Build.0 = Release|Any CPU
-		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{8C0E3201-1F0E-45A0-9897-A679C0C4F684}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2454,11 +2463,11 @@ Global
 		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Release|x64.Build.0 = Release|Any CPU
 		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Release|x86.ActiveCfg = Release|Any CPU
 		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Release|x86.Build.0 = Release|Any CPU
-		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{25E8AB9D-2D10-44F5-9F83-5A5134526771}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2475,11 +2484,11 @@ Global
 		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Release|x64.Build.0 = Release|Any CPU
 		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Release|x86.ActiveCfg = Release|Any CPU
 		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Release|x86.Build.0 = Release|Any CPU
-		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2496,11 +2505,11 @@ Global
 		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Release|x64.Build.0 = Release|Any CPU
 		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Release|x86.ActiveCfg = Release|Any CPU
 		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Release|x86.Build.0 = Release|Any CPU
-		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{50F1165C-5F71-472C-B317-35FFC14665EA}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{50F1165C-5F71-472C-B317-35FFC14665EA}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{50F1165C-5F71-472C-B317-35FFC14665EA}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{50F1165C-5F71-472C-B317-35FFC14665EA}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{50F1165C-5F71-472C-B317-35FFC14665EA}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{50F1165C-5F71-472C-B317-35FFC14665EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{50F1165C-5F71-472C-B317-35FFC14665EA}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{50F1165C-5F71-472C-B317-35FFC14665EA}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2517,11 +2526,11 @@ Global
 		{50F1165C-5F71-472C-B317-35FFC14665EA}.Release|x64.Build.0 = Release|Any CPU
 		{50F1165C-5F71-472C-B317-35FFC14665EA}.Release|x86.ActiveCfg = Release|Any CPU
 		{50F1165C-5F71-472C-B317-35FFC14665EA}.Release|x86.Build.0 = Release|Any CPU
-		{50F1165C-5F71-472C-B317-35FFC14665EA}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{50F1165C-5F71-472C-B317-35FFC14665EA}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{50F1165C-5F71-472C-B317-35FFC14665EA}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{50F1165C-5F71-472C-B317-35FFC14665EA}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{50F1165C-5F71-472C-B317-35FFC14665EA}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{82728202-1098-4E16-B598-5762EAF67D08}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{82728202-1098-4E16-B598-5762EAF67D08}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{82728202-1098-4E16-B598-5762EAF67D08}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{82728202-1098-4E16-B598-5762EAF67D08}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{82728202-1098-4E16-B598-5762EAF67D08}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{82728202-1098-4E16-B598-5762EAF67D08}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{82728202-1098-4E16-B598-5762EAF67D08}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{82728202-1098-4E16-B598-5762EAF67D08}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2538,11 +2547,11 @@ Global
 		{82728202-1098-4E16-B598-5762EAF67D08}.Release|x64.Build.0 = Release|Any CPU
 		{82728202-1098-4E16-B598-5762EAF67D08}.Release|x86.ActiveCfg = Release|Any CPU
 		{82728202-1098-4E16-B598-5762EAF67D08}.Release|x86.Build.0 = Release|Any CPU
-		{82728202-1098-4E16-B598-5762EAF67D08}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{82728202-1098-4E16-B598-5762EAF67D08}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{82728202-1098-4E16-B598-5762EAF67D08}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{82728202-1098-4E16-B598-5762EAF67D08}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{82728202-1098-4E16-B598-5762EAF67D08}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2559,11 +2568,11 @@ Global
 		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Release|x64.Build.0 = Release|Any CPU
 		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Release|x86.ActiveCfg = Release|Any CPU
 		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Release|x86.Build.0 = Release|Any CPU
-		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2580,11 +2589,11 @@ Global
 		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Release|x64.Build.0 = Release|Any CPU
 		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Release|x86.ActiveCfg = Release|Any CPU
 		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Release|x86.Build.0 = Release|Any CPU
-		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2601,11 +2610,11 @@ Global
 		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Release|x64.Build.0 = Release|Any CPU
 		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Release|x86.ActiveCfg = Release|Any CPU
 		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Release|x86.Build.0 = Release|Any CPU
-		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{4CBDF585-FD15-44E9-9795-1BED79BC4960}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2622,11 +2631,11 @@ Global
 		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Release|x64.Build.0 = Release|Any CPU
 		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Release|x86.ActiveCfg = Release|Any CPU
 		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Release|x86.Build.0 = Release|Any CPU
-		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2643,11 +2652,11 @@ Global
 		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Release|x64.Build.0 = Release|Any CPU
 		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Release|x86.ActiveCfg = Release|Any CPU
 		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Release|x86.Build.0 = Release|Any CPU
-		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{AF7CC240-B4D5-4C37-9B04-473CBCC52330}.Checked|x86.ActiveCfg = Debug|Any CPU
+		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
+		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Checked|arm.ActiveCfg = Debug|Any CPU
+		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Checked|arm64.ActiveCfg = Debug|Any CPU
+		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Checked|x64.ActiveCfg = Debug|Any CPU
+		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Checked|x86.ActiveCfg = Debug|Any CPU
 		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Debug|arm.ActiveCfg = Debug|Any CPU
@@ -2664,65 +2673,60 @@ Global
 		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Release|x64.Build.0 = Release|Any CPU
 		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Release|x86.ActiveCfg = Release|Any CPU
 		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Release|x86.Build.0 = Release|Any CPU
-		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Checked|Any CPU.ActiveCfg = Debug|Any CPU
-		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Checked|arm.ActiveCfg = Debug|Any CPU
-		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Checked|arm64.ActiveCfg = Debug|Any CPU
-		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Checked|x64.ActiveCfg = Debug|Any CPU
-		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744}.Checked|x86.ActiveCfg = Debug|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
 	EndGlobalSection
 	GlobalSection(NestedProjects) = preSolution
 		{71AB8240-F179-4B21-A8BE-8BE6CD774ED9} = {28140562-A65A-48E9-ABAB-53BA939084F0}
+		{F86D6534-1A96-489E-A807-C14E616686D6} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{FB17AC52-1633-4845-932B-9218DF895957} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{21791340-49C4-4C07-97FD-CAA1B72D3256} = {28140562-A65A-48E9-ABAB-53BA939084F0}
+		{86CF47B3-D607-4F59-896F-982FEA116086} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
+		{484B12B8-F027-4960-BAA9-14D646C80A28} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
 		{D16B3A49-5709-44CB-B6F8-8E3D585D236F} = {28140562-A65A-48E9-ABAB-53BA939084F0}
+		{F0BB4F76-7697-49A8-8204-FD4516EB325C} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
 		{B876CC90-CB87-4B1B-B6F5-247990192578} = {28140562-A65A-48E9-ABAB-53BA939084F0}
 		{999B1A08-2C7F-43AD-BC50-5F950320BBFF} = {28140562-A65A-48E9-ABAB-53BA939084F0}
+		{019A13D1-3493-4024-8223-FCB6763F80B4} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
 		{9ECF9E5C-860F-49C3-95D0-501147F19548} = {28140562-A65A-48E9-ABAB-53BA939084F0}
+		{8F97C1DE-07F7-449F-AA22-84A6D6836D82} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
 		{B11DD674-FFF7-4343-BA1B-F4C788B16DDA} = {28140562-A65A-48E9-ABAB-53BA939084F0}
+		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3} = {F362E63A-2B1A-445B-B198-3071D7DDE8CF}
+		{E64D31D0-8F38-4FDF-B60D-F955D2475566} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
 		{E7A05515-DABE-4C09-83CB-CE84EFDCD4CC} = {28140562-A65A-48E9-ABAB-53BA939084F0}
+		{4367BB9C-7EC2-4238-82E2-643DE24CC23E} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
 		{F977B04F-675C-4B78-8FCE-19D70504166D} = {28140562-A65A-48E9-ABAB-53BA939084F0}
+		{379BC6E6-1900-44F8-8D8C-AA2968A70008} = {F362E63A-2B1A-445B-B198-3071D7DDE8CF}
+		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9} = {F362E63A-2B1A-445B-B198-3071D7DDE8CF}
+		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37} = {F362E63A-2B1A-445B-B198-3071D7DDE8CF}
+		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
 		{A83A8520-F5E2-49B4-83BC-0F82A412951D} = {28140562-A65A-48E9-ABAB-53BA939084F0}
-		{F6A8185B-07C6-401D-9B40-3C560239E05F} = {28140562-A65A-48E9-ABAB-53BA939084F0}
-		{8C0E3201-1F0E-45A0-9897-A679C0C4F684} = {28140562-A65A-48E9-ABAB-53BA939084F0}
-		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13} = {28140562-A65A-48E9-ABAB-53BA939084F0}
-		{50F1165C-5F71-472C-B317-35FFC14665EA} = {28140562-A65A-48E9-ABAB-53BA939084F0}
-		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA} = {28140562-A65A-48E9-ABAB-53BA939084F0}
 		{A3873DDB-47E7-4DB6-872C-4B46A779913A} = {A8F66678-BCB4-4F6B-B25A-600FE36C8564}
-		{A8F66678-BCB4-4F6B-B25A-600FE36C8564} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{23D41678-453F-4F2A-85F1-167E63DA6D67} = {21065BE2-2867-4CE8-A903-8B740CC64B78}
-		{21065BE2-2867-4CE8-A903-8B740CC64B78} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{6790611D-ACE0-47C6-83E0-E404364B5210} = {23D9BABE-C96D-44F2-A9F6-C26D5AD62AF9}
-		{23D9BABE-C96D-44F2-A9F6-C26D5AD62AF9} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{68DE62F3-AB3A-4AC9-BCEB-CAD95B48D5F9} = {3DFDB754-40F3-4D77-92DE-C5375D6C7A57}
-		{3DFDB754-40F3-4D77-92DE-C5375D6C7A57} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{B73090B8-20CB-4586-A586-B7F37C1A06FF} = {606124FE-6314-43D7-AA19-35334046FF29}
 		{4C1F2761-857C-40A4-8CDD-7139380DA4D7} = {606124FE-6314-43D7-AA19-35334046FF29}
 		{70441C80-1F14-42F9-8225-A891E3C9A82A} = {606124FE-6314-43D7-AA19-35334046FF29}
 		{EA3FA657-060E-43A3-9CF0-45FCC8E7E5B6} = {606124FE-6314-43D7-AA19-35334046FF29}
-		{606124FE-6314-43D7-AA19-35334046FF29} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{1C44735B-C77E-479A-ABBB-8B6EB83299CF} = {BB82924E-1218-401C-85C2-F455FFA797F4}
 		{0AB5F4E7-A0D5-44C5-A1CF-37CDBDC2F531} = {BB82924E-1218-401C-85C2-F455FFA797F4}
 		{CA97D5F2-4D71-4448-8DEB-E18C237C76B3} = {BB82924E-1218-401C-85C2-F455FFA797F4}
-		{BB82924E-1218-401C-85C2-F455FFA797F4} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{C5F86889-E147-4424-9165-D2DF453741F2} = {8518F031-3709-45D6-B577-356E32BB5FF7}
 		{67D9B289-AA6D-4FD8-A99D-F8651A51BE7E} = {8518F031-3709-45D6-B577-356E32BB5FF7}
 		{B6F2F0D5-9275-4F00-A2C3-2048AF0CAF12} = {8518F031-3709-45D6-B577-356E32BB5FF7}
 		{EF24E79E-0E96-4228-9D8E-44E1C2D8BDA9} = {8518F031-3709-45D6-B577-356E32BB5FF7}
 		{32247916-74DE-4A62-AE68-04976D2B0149} = {8518F031-3709-45D6-B577-356E32BB5FF7}
 		{5090E2BE-4BC9-4027-BF67-452049996F43} = {8518F031-3709-45D6-B577-356E32BB5FF7}
-		{8518F031-3709-45D6-B577-356E32BB5FF7} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{61164A2A-D90F-4122-AF5D-9704564E80E0} = {9DAF4ABC-9D97-4B12-9524-02DEEA8BF5B0}
-		{9DAF4ABC-9D97-4B12-9524-02DEEA8BF5B0} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{FDAB957F-5C83-4946-ACF5-825B2B6DAFE6} = {242CC51B-8B02-4679-8899-77631E6B6AD0}
 		{652F0921-C134-4882-A9D0-0CBB2F8D75B2} = {242CC51B-8B02-4679-8899-77631E6B6AD0}
 		{C51DBBBF-3BBA-4345-AEAA-E6A21F9F7016} = {242CC51B-8B02-4679-8899-77631E6B6AD0}
-		{242CC51B-8B02-4679-8899-77631E6B6AD0} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{CFC724F4-18A2-401F-AED4-7D7A779CE3EA} = {60ED493D-0A65-4052-9B5E-547451CB1E6B}
-		{60ED493D-0A65-4052-9B5E-547451CB1E6B} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{9AA6AAD7-E09B-4F9E-B398-1734A5815B6B} = {34FC303C-B543-4E5B-BDA8-DE9B3C4BFEFA}
-		{34FC303C-B543-4E5B-BDA8-DE9B3C4BFEFA} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{E73952E5-C929-4566-962A-B9AF65289871} = {0A2BFEE4-98EE-4AB5-8811-BB1B3207D013}
 		{9299BE78-5A00-425A-A38F-7F1DC9C3F63E} = {0A2BFEE4-98EE-4AB5-8811-BB1B3207D013}
 		{A6D86695-D570-43F9-99A3-6C7445362D53} = {0A2BFEE4-98EE-4AB5-8811-BB1B3207D013}
@@ -2742,24 +2746,17 @@ Global
 		{049319F0-D438-404C-A6D4-4D1E99DAE647} = {0A2BFEE4-98EE-4AB5-8811-BB1B3207D013}
 		{691D460F-764F-48E7-9A3F-7D1A32388542} = {0A2BFEE4-98EE-4AB5-8811-BB1B3207D013}
 		{9F1AC402-BFAD-4EA2-AD31-BBCA73375953} = {0A2BFEE4-98EE-4AB5-8811-BB1B3207D013}
-		{0A2BFEE4-98EE-4AB5-8811-BB1B3207D013} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{DBDA55A9-4BB9-4FF8-9066-EE7157E627C1} = {AAC70548-7854-4CEE-A06F-C148602C1993}
-		{AAC70548-7854-4CEE-A06F-C148602C1993} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{00807FA3-A9B3-4AF4-86CD-CF10255E6E8C} = {BBB16624-A02E-443C-AFE2-A4A98F42B59C}
-		{BBB16624-A02E-443C-AFE2-A4A98F42B59C} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{E88FDBA9-3D1D-480D-8AB3-341C9E442D03} = {E0CBE95A-2098-4848-BB89-87D92AA9FE0D}
-		{E0CBE95A-2098-4848-BB89-87D92AA9FE0D} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{AD4767E9-57F6-47DD-ABD3-D3AFDF384703} = {E38C85C0-9609-4CAA-B55B-2B78B0BC70FC}
 		{E1847313-0072-49CA-A1E6-6C05CECAB77A} = {E38C85C0-9609-4CAA-B55B-2B78B0BC70FC}
 		{D86CC877-79A0-4AFA-9A76-7263B414614D} = {E38C85C0-9609-4CAA-B55B-2B78B0BC70FC}
 		{E5E5C278-EBB9-4704-B7BA-56D39A5A343C} = {E38C85C0-9609-4CAA-B55B-2B78B0BC70FC}
 		{59CD73F2-1310-46EE-B99A-594859FD8A37} = {E38C85C0-9609-4CAA-B55B-2B78B0BC70FC}
 		{3D58505D-F17F-49E0-9131-42F273E3F5B9} = {E38C85C0-9609-4CAA-B55B-2B78B0BC70FC}
-		{E38C85C0-9609-4CAA-B55B-2B78B0BC70FC} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{0BF9F165-888D-486A-B6FD-6F3029913D70} = {B7E9D0DA-FE45-4CB8-9CE1-D5E52917E3B3}
-		{B7E9D0DA-FE45-4CB8-9CE1-D5E52917E3B3} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{C485C170-2B88-4EA9-8826-7BC4C9BA2324} = {AF750AE8-AB38-44E3-BECD-5267DD578957}
-		{AF750AE8-AB38-44E3-BECD-5267DD578957} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{43C40A0B-0B0E-4D27-8534-11CD5A540F7C} = {88C12684-74BD-4E2F-8F95-587EC30FE39B}
 		{3B79DD71-8C2F-41BC-A1A7-86A490D6C726} = {88C12684-74BD-4E2F-8F95-587EC30FE39B}
 		{4EE36055-AD7C-4779-B3F6-08687960DCC3} = {88C12684-74BD-4E2F-8F95-587EC30FE39B}
@@ -2769,53 +2766,65 @@ Global
 		{1BCCD2F5-A561-4641-8A0B-51F3EDCA35DC} = {88C12684-74BD-4E2F-8F95-587EC30FE39B}
 		{0F83B07B-2E3F-4708-BE6D-7A8DA8168803} = {88C12684-74BD-4E2F-8F95-587EC30FE39B}
 		{833C1D45-9BBB-4A92-93B7-4EFFD9E945AD} = {88C12684-74BD-4E2F-8F95-587EC30FE39B}
-		{88C12684-74BD-4E2F-8F95-587EC30FE39B} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{26676FE3-DDF7-4A5E-9ABA-417E0C24CA7B} = {6433F4E3-DFC6-4549-8632-797CB749D880}
-		{6433F4E3-DFC6-4549-8632-797CB749D880} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{697C63A2-2517-4F85-8B88-C94E538BE407} = {5961F488-A1DD-4B28-A610-F3D616ED6766}
-		{5961F488-A1DD-4B28-A610-F3D616ED6766} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{90F8C08F-A6D0-4AA0-8615-9279E5E4FC19} = {87ED1E88-3518-4BAA-9DEA-3379A48DCFD4}
-		{87ED1E88-3518-4BAA-9DEA-3379A48DCFD4} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{172F6EB9-6001-4657-8AE2-83DB23B371CA} = {2B160F79-3016-4753-A009-00DC97F2730C}
-		{2B160F79-3016-4753-A009-00DC97F2730C} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{7E3B4C81-9010-4473-BD3C-5B90F9533CD7} = {0527FC3D-2997-4C7B-B63C-306F7A52BEBD}
-		{0527FC3D-2997-4C7B-B63C-306F7A52BEBD} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
 		{BBC59E42-DC0B-4847-B336-13ACF4279F17} = {DBFE9843-8DB5-46BC-B243-B3A0B98326C9}
-		{DBFE9843-8DB5-46BC-B243-B3A0B98326C9} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
-		{F86D6534-1A96-489E-A807-C14E616686D6} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
-		{AF7BA66D-EA0E-4755-8DA8-4CFE9B935F83} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
-		{9DF0247E-5B81-4EF3-82CA-3E70B3A56742} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
-		{FB17AC52-1633-4845-932B-9218DF895957} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
-		{86CF47B3-D607-4F59-896F-982FEA116086} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
-		{484B12B8-F027-4960-BAA9-14D646C80A28} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
-		{F0BB4F76-7697-49A8-8204-FD4516EB325C} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
-		{019A13D1-3493-4024-8223-FCB6763F80B4} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
-		{8F97C1DE-07F7-449F-AA22-84A6D6836D82} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
-		{E64D31D0-8F38-4FDF-B60D-F955D2475566} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
-		{4367BB9C-7EC2-4238-82E2-643DE24CC23E} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
-		{F39E2C7E-5FE1-460C-AC2C-7E2B50955F2C} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
 		{78C45C87-93B6-4FCE-B174-520756DE4E74} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
 		{07197CBF-7C41-47B6-9E52-88A6D4485219} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
 		{1B4552A4-91FD-4C6F-9EB4-3454C4BE428F} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
+		{F6A8185B-07C6-401D-9B40-3C560239E05F} = {28140562-A65A-48E9-ABAB-53BA939084F0}
 		{1E6C7D88-7584-444C-97CD-2FAAB5BEF465} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
 		{12E1EFEA-60DE-41D7-B148-AB0182594C1B} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
+		{8C0E3201-1F0E-45A0-9897-A679C0C4F684} = {28140562-A65A-48E9-ABAB-53BA939084F0}
 		{25E8AB9D-2D10-44F5-9F83-5A5134526771} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
+		{9CF6C6E6-0E9F-4A95-84B5-6083EAB6FA13} = {28140562-A65A-48E9-ABAB-53BA939084F0}
 		{17D73C46-97FF-40EE-B0D9-FB0EBA14B8D8} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
+		{50F1165C-5F71-472C-B317-35FFC14665EA} = {28140562-A65A-48E9-ABAB-53BA939084F0}
 		{82728202-1098-4E16-B598-5762EAF67D08} = {5B2B5E7E-A2FB-4095-9E79-404BF53E0133}
-		{CF79B5AE-38CB-4B80-BF92-CF634C0B7EC3} = {F362E63A-2B1A-445B-B198-3071D7DDE8CF}
-		{379BC6E6-1900-44F8-8D8C-AA2968A70008} = {F362E63A-2B1A-445B-B198-3071D7DDE8CF}
-		{4FA4A9A6-1D38-414B-96F0-3CFB63C687C9} = {F362E63A-2B1A-445B-B198-3071D7DDE8CF}
-		{A7B7DE04-7261-4D4C-AA78-9F2D9B5A1C37} = {F362E63A-2B1A-445B-B198-3071D7DDE8CF}
+		{069C2B51-069A-4FBB-BFE9-42D573F1CEEA} = {28140562-A65A-48E9-ABAB-53BA939084F0}
 		{CFAB1236-51C3-4A13-A57F-16022FD0A7EE} = {13818769-DC01-4715-9590-E000D03E42A9}
 		{4CBDF585-FD15-44E9-9795-1BED79BC4960} = {13818769-DC01-4715-9590-E000D03E42A9}
-		{13818769-DC01-4715-9590-E000D03E42A9} = {67DCB1C2-0B95-40B6-ACE8-9812BF57EB19}
 		{1CBBF9E2-4EBD-40F0-BB9E-66BCE3CA5AA8} = {04D0E381-5B43-42C0-8E08-FADBFCECB353}
 		{AF7CC240-B4D5-4C37-9B04-473CBCC52330} = {04D0E381-5B43-42C0-8E08-FADBFCECB353}
-		{04D0E381-5B43-42C0-8E08-FADBFCECB353} = {67DCB1C2-0B95-40B6-ACE8-9812BF57EB19}
 		{3F5ABC5D-42DE-44C4-BEFC-741F4974C744} = {F65030D7-DDBD-4D4C-B6E3-D3C0DD7FD569}
+		{A8F66678-BCB4-4F6B-B25A-600FE36C8564} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{21065BE2-2867-4CE8-A903-8B740CC64B78} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{23D9BABE-C96D-44F2-A9F6-C26D5AD62AF9} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{3DFDB754-40F3-4D77-92DE-C5375D6C7A57} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{606124FE-6314-43D7-AA19-35334046FF29} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{BB82924E-1218-401C-85C2-F455FFA797F4} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{8518F031-3709-45D6-B577-356E32BB5FF7} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{9DAF4ABC-9D97-4B12-9524-02DEEA8BF5B0} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{242CC51B-8B02-4679-8899-77631E6B6AD0} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{60ED493D-0A65-4052-9B5E-547451CB1E6B} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{34FC303C-B543-4E5B-BDA8-DE9B3C4BFEFA} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{0A2BFEE4-98EE-4AB5-8811-BB1B3207D013} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{AAC70548-7854-4CEE-A06F-C148602C1993} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{BBB16624-A02E-443C-AFE2-A4A98F42B59C} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{E0CBE95A-2098-4848-BB89-87D92AA9FE0D} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{E38C85C0-9609-4CAA-B55B-2B78B0BC70FC} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{B7E9D0DA-FE45-4CB8-9CE1-D5E52917E3B3} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{AF750AE8-AB38-44E3-BECD-5267DD578957} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{88C12684-74BD-4E2F-8F95-587EC30FE39B} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{6433F4E3-DFC6-4549-8632-797CB749D880} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{5961F488-A1DD-4B28-A610-F3D616ED6766} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{87ED1E88-3518-4BAA-9DEA-3379A48DCFD4} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{2B160F79-3016-4753-A009-00DC97F2730C} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{0527FC3D-2997-4C7B-B63C-306F7A52BEBD} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{DBFE9843-8DB5-46BC-B243-B3A0B98326C9} = {FD72C125-C10D-457B-8AFC-6B4E5237AF6A}
+		{13818769-DC01-4715-9590-E000D03E42A9} = {67DCB1C2-0B95-40B6-ACE8-9812BF57EB19}
+		{04D0E381-5B43-42C0-8E08-FADBFCECB353} = {67DCB1C2-0B95-40B6-ACE8-9812BF57EB19}
 		{F65030D7-DDBD-4D4C-B6E3-D3C0DD7FD569} = {67DCB1C2-0B95-40B6-ACE8-9812BF57EB19}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {19706846-1F47-42ED-B649-B0982EE96E6B}
 	EndGlobalSection
+	GlobalSection(SharedMSBuildProjectFiles) = preSolution
+		..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{4cbdf585-fd15-44e9-9795-1bed79bc4960}*SharedItemsImports = 5
+		..\System.Private.CoreLib\src\System.Private.CoreLib.Shared.projitems*{71ab8240-f179-4b21-a8be-8be6cd774ed9}*SharedItemsImports = 5
+		..\..\tools\illink\src\ILLink.Shared\ILLink.Shared.projitems*{af7cc240-b4d5-4c37-9b04-473cbcc52330}*SharedItemsImports = 5
+	EndGlobalSection
 EndGlobal
diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs
index 4c6ffd326a68..0371e3037e26 100644
--- a/src/libraries/System.Runtime/ref/System.Runtime.cs
+++ b/src/libraries/System.Runtime/ref/System.Runtime.cs
@@ -4710,6 +4710,7 @@ public readonly ref partial struct ReadOnlySpan<T>
         public bool IsEmpty { get { throw null; } }
         public ref readonly T this[int index] { get { throw null; } }
         public int Length { get { throw null; } }
+        public static System.ReadOnlySpan<T> CastUp<TDerived>(System.ReadOnlySpan<TDerived> items) where TDerived : class?, T { throw null; }
         public void CopyTo(System.Span<T> destination) { }
         [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         [System.ObsoleteAttribute("Equals() on ReadOnlySpan will always throw an exception. Use the equality operator instead.")]
@@ -5613,6 +5614,16 @@ protected TimeProvider() { }
         public override bool Equals([System.Diagnostics.CodeAnalysis.NotNullWhenAttribute(true)] object? value) { throw null; }
         public bool Equals(System.TimeSpan obj) { throw null; }
         public static bool Equals(System.TimeSpan t1, System.TimeSpan t2) { throw null; }
+        public static System.TimeSpan FromDays(int days) { throw null; }
+        public static System.TimeSpan FromDays(int days, int hours = 0, long minutes = 0, long seconds = 0, long milliseconds = 0, long microseconds = 0) { throw null; }
+        public static System.TimeSpan FromHours(int hours) { throw null; }
+        public static System.TimeSpan FromHours(int hours, long minutes = 0, long seconds = 0, long milliseconds = 0, long microseconds = 0) { throw null; }
+        public static System.TimeSpan FromMinutes(long minutes) { throw null; }
+        public static System.TimeSpan FromMinutes(long minutes, long seconds = 0, long milliseconds = 0, long microseconds = 0) { throw null; }
+        public static System.TimeSpan FromSeconds(long seconds) { throw null; }
+        public static System.TimeSpan FromSeconds(long seconds, long milliseconds = 0, long microseconds = 0) { throw null; }
+        public static System.TimeSpan FromMilliseconds(long milliseconds, long microseconds = 0) { throw null; }
+        public static System.TimeSpan FromMicroseconds(long microseconds) { throw null; }
         public static System.TimeSpan FromDays(double value) { throw null; }
         public static System.TimeSpan FromHours(double value) { throw null; }
         public static System.TimeSpan FromMicroseconds(double value) { throw null; }
@@ -8417,6 +8428,18 @@ public ExperimentalAttribute(string diagnosticId) { }
         public string DiagnosticId { get { throw null; } }
         public string? UrlFormat { get { throw null; } set { } }
     }
+    [System.AttributeUsageAttribute(System.AttributeTargets.Property, Inherited = false, AllowMultiple = true)]
+    public sealed class FeatureGuardAttribute : System.Attribute
+    {
+        public FeatureGuardAttribute(System.Type featureType) { }
+        public System.Type FeatureType { get { throw null; } }
+    }
+    [System.AttributeUsage(System.AttributeTargets.Property, Inherited = false)]
+    public sealed class FeatureSwitchDefinitionAttribute : Attribute
+    {
+        public FeatureSwitchDefinitionAttribute(string switchName) { }
+        public string SwitchName { get { throw null; } }
+    }
     [System.AttributeUsageAttribute(System.AttributeTargets.Field | System.AttributeTargets.Parameter | System.AttributeTargets.Property | System.AttributeTargets.ReturnValue, Inherited=false)]
     public sealed partial class MaybeNullAttribute : System.Attribute
     {
@@ -11588,6 +11611,7 @@ public enum GenericParameterAttributes
         NotNullableValueTypeConstraint = 8,
         DefaultConstructorConstraint = 16,
         SpecialConstraintMask = 28,
+        AllowByRefLike = 32,
     }
     public partial interface ICustomAttributeProvider
     {
@@ -13035,6 +13059,11 @@ public sealed partial class NullablePublicOnlyAttribute : System.Attribute
         public readonly bool IncludesInternals;
         public NullablePublicOnlyAttribute(bool value) { }
     }
+    [System.AttributeUsageAttribute(System.AttributeTargets.Parameter, Inherited = true, AllowMultiple = false)]
+    public sealed partial class ParamCollectionAttribute : System.Attribute
+    {
+        public ParamCollectionAttribute() { }
+    }
     public partial struct PoolingAsyncValueTaskMethodBuilder
     {
         private object _dummy;
@@ -13102,13 +13131,16 @@ public RuntimeCompatibilityAttribute() { }
     public static partial class RuntimeFeature
     {
         public const string ByRefFields = "ByRefFields";
+        public const string ByRefLikeGenerics = "ByRefLikeGenerics";
         public const string CovariantReturnsOfClasses = "CovariantReturnsOfClasses";
         public const string DefaultImplementationsOfInterfaces = "DefaultImplementationsOfInterfaces";
         public const string NumericIntPtr = "NumericIntPtr";
         public const string PortablePdb = "PortablePdb";
         public const string UnmanagedSignatureCallingConvention = "UnmanagedSignatureCallingConvention";
         public const string VirtualStaticsInInterfaces = "VirtualStaticsInInterfaces";
+        [System.Diagnostics.CodeAnalysis.FeatureGuard(typeof(System.Diagnostics.CodeAnalysis.RequiresDynamicCodeAttribute))]
         public static bool IsDynamicCodeCompiled { get { throw null; } }
+        [System.Diagnostics.CodeAnalysis.FeatureSwitchDefinition("System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeSupported")]
         public static bool IsDynamicCodeSupported { get { throw null; } }
         public static bool IsSupported(string feature) { throw null; }
     }
@@ -13117,6 +13149,7 @@ public static partial class RuntimeHelpers
         [System.ObsoleteAttribute("OffsetToStringData has been deprecated. Use string.GetPinnableReference() instead.")]
         public static int OffsetToStringData { get { throw null; } }
         public static System.IntPtr AllocateTypeAssociatedMemory(System.Type type, int size) { throw null; }
+        public static object? Box(ref byte target, System.RuntimeTypeHandle type) { throw null; }
         public static System.ReadOnlySpan<T> CreateSpan<T>(System.RuntimeFieldHandle fldHandle) { throw null; }
         public static void EnsureSufficientExecutionStack() { }
         public static new bool Equals(object? o1, object? o2) { throw null; }
@@ -15303,6 +15336,12 @@ public static void WaitAll(System.Threading.Tasks.Task[] tasks, System.Threading
         public static System.Threading.Tasks.Task<System.Threading.Tasks.Task<TResult>> WhenAny<TResult>(System.Collections.Generic.IEnumerable<System.Threading.Tasks.Task<TResult>> tasks) { throw null; }
         public static System.Threading.Tasks.Task<System.Threading.Tasks.Task<TResult>> WhenAny<TResult>(System.Threading.Tasks.Task<TResult> task1, System.Threading.Tasks.Task<TResult> task2) { throw null; }
         public static System.Threading.Tasks.Task<System.Threading.Tasks.Task<TResult>> WhenAny<TResult>(params System.Threading.Tasks.Task<TResult>[] tasks) { throw null; }
+        public static System.Collections.Generic.IAsyncEnumerable<System.Threading.Tasks.Task> WhenEach(System.Collections.Generic.IEnumerable<System.Threading.Tasks.Task> tasks) { throw null; }
+        public static System.Collections.Generic.IAsyncEnumerable<System.Threading.Tasks.Task> WhenEach(params System.Threading.Tasks.Task[] tasks) { throw null; }
+        public static System.Collections.Generic.IAsyncEnumerable<System.Threading.Tasks.Task> WhenEach(System.ReadOnlySpan<System.Threading.Tasks.Task> tasks) { throw null; }
+        public static System.Collections.Generic.IAsyncEnumerable<System.Threading.Tasks.Task<TResult>> WhenEach<TResult>(System.Collections.Generic.IEnumerable<System.Threading.Tasks.Task<TResult>> tasks) { throw null; }
+        public static System.Collections.Generic.IAsyncEnumerable<System.Threading.Tasks.Task<TResult>> WhenEach<TResult>(params System.Threading.Tasks.Task<TResult>[] tasks) { throw null; }
+        public static System.Collections.Generic.IAsyncEnumerable<System.Threading.Tasks.Task<TResult>> WhenEach<TResult>(System.ReadOnlySpan<System.Threading.Tasks.Task<TResult>> tasks) { throw null; }
         public static System.Runtime.CompilerServices.YieldAwaitable Yield() { throw null; }
     }
     public static partial class TaskAsyncEnumerableExtensions
@@ -15836,6 +15875,8 @@ protected virtual void CheckSecurity() { }
         [System.ObsoleteAttribute("Uri.Escape has been deprecated and is not supported.")]
         protected virtual void Escape() { }
         public static string EscapeDataString(string stringToEscape) { throw null; }
+        public static string EscapeDataString(System.ReadOnlySpan<char> charsToEscape) { throw null; }
+        public static bool TryEscapeDataString(System.ReadOnlySpan<char> charsToEscape, System.Span<char> destination, out int charsWritten) { throw null; }
         [System.ObsoleteAttribute("Uri.EscapeString has been deprecated. Use GetComponents() or Uri.EscapeDataString to escape a Uri component or a string.")]
         protected static string EscapeString(string? str) { throw null; }
         [System.ObsoleteAttribute("Uri.EscapeUriString can corrupt the Uri string in some cases. Consider using Uri.EscapeDataString for query string components instead.", DiagnosticId="SYSLIB0013", UrlFormat="https://aka.ms/dotnet-warnings/{0}")]
@@ -15877,6 +15918,8 @@ void System.Runtime.Serialization.ISerializable.GetObjectData(System.Runtime.Ser
         [System.ObsoleteAttribute("Uri.Unescape has been deprecated. Use GetComponents() or Uri.UnescapeDataString() to unescape a Uri component or a string.")]
         protected virtual string Unescape(string path) { throw null; }
         public static string UnescapeDataString(string stringToUnescape) { throw null; }
+        public static string UnescapeDataString(System.ReadOnlySpan<char> charsToUnescape) { throw null; }
+        public static bool TryUnescapeDataString(System.ReadOnlySpan<char> charsToUnescape, System.Span<char> destination, out int charsWritten) { throw null; }
     }
     public partial class UriBuilder
     {
diff --git a/src/libraries/System.Runtime/tests/System.Buffers.Tests/ArrayPool/UnitTests.cs b/src/libraries/System.Runtime/tests/System.Buffers.Tests/ArrayPool/UnitTests.cs
index cc704c68fac1..d58a8838d90a 100644
--- a/src/libraries/System.Runtime/tests/System.Buffers.Tests/ArrayPool/UnitTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Buffers.Tests/ArrayPool/UnitTests.cs
@@ -272,7 +272,7 @@ public static void RentingReturningThenRentingABufferShouldNotAllocate()
             Assert.Equal(id, bt.GetHashCode());
         }
 
-        [Theory]
+        [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupportedOrBrowserBackgroundExec))]
         [MemberData(nameof(BytePoolInstances))]
         public static void CanRentManySizedBuffers(ArrayPool<byte> pool)
         {
diff --git a/src/libraries/System.Runtime/tests/System.Buffers.Tests/System.Buffers.Tests.csproj b/src/libraries/System.Runtime/tests/System.Buffers.Tests/System.Buffers.Tests.csproj
index 10b79281bf52..66ae6833a50e 100644
--- a/src/libraries/System.Runtime/tests/System.Buffers.Tests/System.Buffers.Tests.csproj
+++ b/src/libraries/System.Runtime/tests/System.Buffers.Tests/System.Buffers.Tests.csproj
@@ -5,6 +5,9 @@
     <IncludeRemoteExecutor>true</IncludeRemoteExecutor>
     <TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(TargetOS)' == 'browser'">
+    <WasmXHarnessMaxParallelThreads>1</WasmXHarnessMaxParallelThreads>
+  </PropertyGroup>
   <ItemGroup>
     <Compile Include="ArrayPool\ArrayPoolTest.cs" />
     <Compile Include="ArrayPool\CollectionTests.cs" />
diff --git a/src/libraries/System.Runtime/tests/System.Dynamic.Runtime.Tests/Dynamic.Context/Conformance.dynamic.context.operator.regclass.cs b/src/libraries/System.Runtime/tests/System.Dynamic.Runtime.Tests/Dynamic.Context/Conformance.dynamic.context.operator.regclass.cs
index 789077324c42..c5af627116a4 100644
--- a/src/libraries/System.Runtime/tests/System.Dynamic.Runtime.Tests/Dynamic.Context/Conformance.dynamic.context.operator.regclass.cs
+++ b/src/libraries/System.Runtime/tests/System.Dynamic.Runtime.Tests/Dynamic.Context/Conformance.dynamic.context.operator.regclass.cs
@@ -4426,6 +4426,10 @@ public static void Foo(string x)
         {
         }
 
+        public static void Foo(int x)
+        {
+        }
+
         public void Foo2()
         {
         }
diff --git a/src/libraries/System.Runtime/tests/System.Dynamic.Runtime.Tests/Dynamic.Declarations/Conformance.dynamic.declarations.returnType.indexers.cs b/src/libraries/System.Runtime/tests/System.Dynamic.Runtime.Tests/Dynamic.Declarations/Conformance.dynamic.declarations.returnType.indexers.cs
index 131cbebf009e..e8f7e2f9869f 100644
--- a/src/libraries/System.Runtime/tests/System.Dynamic.Runtime.Tests/Dynamic.Declarations/Conformance.dynamic.declarations.returnType.indexers.cs
+++ b/src/libraries/System.Runtime/tests/System.Dynamic.Runtime.Tests/Dynamic.Declarations/Conformance.dynamic.declarations.returnType.indexers.cs
@@ -262,6 +262,14 @@ public class MyClass
                     return d;
                 }
             }
+            public dynamic this[long x, dynamic d]
+            {
+                get
+                {
+                    MyClass.Status = 2;
+                    return d;
+                }
+            }
         }
 
         [Fact]
diff --git a/src/libraries/System.Runtime/tests/System.Dynamic.Runtime.Tests/Dynamic.NamedAndOptional/Conformance.dynamic.namedandoptional.usage.other.cs b/src/libraries/System.Runtime/tests/System.Dynamic.Runtime.Tests/Dynamic.NamedAndOptional/Conformance.dynamic.namedandoptional.usage.other.cs
index 983c44015b4d..3fa9d8c21946 100644
--- a/src/libraries/System.Runtime/tests/System.Dynamic.Runtime.Tests/Dynamic.NamedAndOptional/Conformance.dynamic.namedandoptional.usage.other.cs
+++ b/src/libraries/System.Runtime/tests/System.Dynamic.Runtime.Tests/Dynamic.NamedAndOptional/Conformance.dynamic.namedandoptional.usage.other.cs
@@ -2394,56 +2394,58 @@ public static int MainMethod()
 }
 
 
-
-namespace ManagedTests.DynamicCSharp.Conformance.dynamic.namedandoptional.usage.other.ref03a.ref03a
-{
-    // <Area>Declaration of Methods with Optional Parameters</Area>
-    // <Title>calling with a ref parameter </Title>
-    // <Description>Should be able to call a ref parameter</Description>
-    // <Expects status=success></Expects>
-    // <Code>
-    using System.Runtime.InteropServices;
-
-    public class Parent
-    {
-        public int Foo(
-        [Optional]
-        ref int x)
-        {
-            if (x == 2)
-                return 1;
-            return 1;
-        }
-    }
-
-    public class Test
-    {
-        [Fact]
-        public static void DynamicCSharpRunTest()
-        {
-            Assert.Equal(0, MainMethod());
-        }
-
-        public static int MainMethod()
-        {
-            Parent p = new Parent();
-            dynamic i = 2;
-            try
-            {
-                p.Foo(x: i);
-            }
-            catch (Microsoft.CSharp.RuntimeBinder.RuntimeBinderException e)
-            {
-                bool ret = ErrorVerifier.Verify(ErrorMessageId.BadArgTypes, e.Message, "Parent.Foo(ref int)");
-                if (ret)
-                    return 0;
-            }
-
-            return 1;
-        }
-    }
-    //</Code>
-}
+// This test is failing to compile now due to this C# breaking change:
+// https://github.com/dotnet/roslyn/blob/291255bc40c2ccd8ef1ca12ed580820c79f527cc/docs/compilers/CSharp/Compiler%20Breaking%20Changes%20-%20DotNet%208.md#ref-modifiers-of-dynamic-arguments-should-be-compatible-with-ref-modifiers-of-corresponding-parameters
+
+// namespace ManagedTests.DynamicCSharp.Conformance.dynamic.namedandoptional.usage.other.ref03a.ref03a
+// {
+//     // <Area>Declaration of Methods with Optional Parameters</Area>
+//     // <Title>calling with a ref parameter </Title>
+//     // <Description>Should be able to call a ref parameter</Description>
+//     // <Expects status=success></Expects>
+//     // <Code>
+//     using System.Runtime.InteropServices;
+
+//     public class Parent
+//     {
+//         public int Foo(
+//         [Optional]
+//         ref int x)
+//         {
+//             if (x == 2)
+//                 return 1;
+//             return 1;
+//         }
+//     }
+
+//     public class Test
+//     {
+//         [Fact]
+//         public static void DynamicCSharpRunTest()
+//         {
+//             Assert.Equal(0, MainMethod());
+//         }
+
+//         public static int MainMethod()
+//         {
+//             Parent p = new Parent();
+//             dynamic i = 2;
+//             try
+//             {
+//                 p.Foo(x: i);
+//             }
+//             catch (Microsoft.CSharp.RuntimeBinder.RuntimeBinderException e)
+//             {
+//                 bool ret = ErrorVerifier.Verify(ErrorMessageId.BadArgTypes, e.Message, "Parent.Foo(ref int)");
+//                 if (ret)
+//                     return 0;
+//             }
+
+//             return 1;
+//         }
+//     }
+//     //</Code>
+// }
 
 
 
@@ -2498,56 +2500,58 @@ public static int MainMethod()
 }
 
 
-
-namespace ManagedTests.DynamicCSharp.Conformance.dynamic.namedandoptional.usage.other.ref03c.ref03c
-{
-    // <Area>Declaration of Methods with Optional Parameters</Area>
-    // <Title>calling with a ref parameter </Title>
-    // <Description>Should be able to call a ref parameter</Description>
-    // <Expects status=success></Expects>
-    // <Code>
-    using System.Runtime.InteropServices;
-
-    public class Parent
-    {
-        public int Foo(
-        [Optional]
-        ref dynamic x)
-        {
-            if (x == 2)
-                return 1;
-            return 1;
-        }
-    }
-
-    public class Test
-    {
-        [Fact]
-        public static void DynamicCSharpRunTest()
-        {
-            Assert.Equal(0, MainMethod());
-        }
-
-        public static int MainMethod()
-        {
-            Parent p = new Parent();
-            dynamic i = 2;
-            try
-            {
-                p.Foo(x: i);
-            }
-            catch (Microsoft.CSharp.RuntimeBinder.RuntimeBinderException e)
-            {
-                bool ret = ErrorVerifier.Verify(ErrorMessageId.BadArgTypes, e.Message, "Parent.Foo(ref object)");
-                if (ret)
-                    return 0;
-            }
-
-            return 1;
-        }
-    }
-    //</Code>
-}
+// This test is failing to compile now due to this C# breaking change:
+// https://github.com/dotnet/roslyn/blob/291255bc40c2ccd8ef1ca12ed580820c79f527cc/docs/compilers/CSharp/Compiler%20Breaking%20Changes%20-%20DotNet%208.md#ref-modifiers-of-dynamic-arguments-should-be-compatible-with-ref-modifiers-of-corresponding-parameters
+
+// namespace ManagedTests.DynamicCSharp.Conformance.dynamic.namedandoptional.usage.other.ref03c.ref03c
+// {
+//     // <Area>Declaration of Methods with Optional Parameters</Area>
+//     // <Title>calling with a ref parameter </Title>
+//     // <Description>Should be able to call a ref parameter</Description>
+//     // <Expects status=success></Expects>
+//     // <Code>
+//     using System.Runtime.InteropServices;
+
+//     public class Parent
+//     {
+//         public int Foo(
+//         [Optional]
+//         ref dynamic x)
+//         {
+//             if (x == 2)
+//                 return 1;
+//             return 1;
+//         }
+//     }
+
+//     public class Test
+//     {
+//         [Fact]
+//         public static void DynamicCSharpRunTest()
+//         {
+//             Assert.Equal(0, MainMethod());
+//         }
+
+//         public static int MainMethod()
+//         {
+//             Parent p = new Parent();
+//             dynamic i = 2;
+//             try
+//             {
+//                 p.Foo(x: i);
+//             }
+//             catch (Microsoft.CSharp.RuntimeBinder.RuntimeBinderException e)
+//             {
+//                 bool ret = ErrorVerifier.Verify(ErrorMessageId.BadArgTypes, e.Message, "Parent.Foo(ref object)");
+//                 if (ret)
+//                     return 0;
+//             }
+
+//             return 1;
+//         }
+//     }
+//     //</Code>
+// }
 
 
 
diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CultureInfo/CultureInfoEnglishName.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CultureInfo/CultureInfoEnglishName.cs
index 02ee539981ad..0868869622a6 100644
--- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CultureInfo/CultureInfoEnglishName.cs
+++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CultureInfo/CultureInfoEnglishName.cs
@@ -9,13 +9,15 @@ namespace System.Globalization.Tests
     public class CultureInfoEnglishName
     {
         // Android has its own ICU, which doesn't 100% map to UsingLimitedCultures
-        public static bool SupportFullGlobalizationData => PlatformDetection.IsNotUsingLimitedCultures || PlatformDetection.IsAndroid;
+        // Browser uses JS to get the NativeName that is missing in ICU (in the singlethreaded runtime only)
+        public static bool SupportFullGlobalizationData =>
+            (!PlatformDetection.IsWasi || PlatformDetection.IsHybridGlobalizationOnApplePlatform) && !PlatformDetection.IsWasmThreadingSupported;
 
         public static IEnumerable<object[]> EnglishName_TestData()
         {
             yield return new object[] { CultureInfo.CurrentCulture.Name, CultureInfo.CurrentCulture.EnglishName };
 
-            if (SupportFullGlobalizationData || PlatformDetection.IsHybridGlobalizationOnApplePlatform)
+            if (SupportFullGlobalizationData)
             {
                 yield return new object[] { "en-US", "English (United States)" };
                 yield return new object[] { "fr-FR", "French (France)" };
@@ -23,7 +25,6 @@ public static IEnumerable<object[]> EnglishName_TestData()
             }
             else
             {
-                // Mobile / Browser ICU doesn't contain CultureInfo.EnglishName
                 yield return new object[] { "en-US", "en (US)" };
                 yield return new object[] { "fr-FR", "fr (FR)" };
             }
@@ -41,12 +42,12 @@ public void EnglishName(string name, string expected)
         public void ChineseNeutralEnglishName()
         {
             CultureInfo ci = new CultureInfo("zh-Hans");
-            Assert.True(ci.EnglishName == "Chinese (Simplified)" || ci.EnglishName == "Chinese, Simplified",
-                        $"'{ci.EnglishName}' not equal to `Chinese (Simplified)` nor `Chinese, Simplified`");
+            Assert.True(ci.EnglishName == "Chinese (Simplified)" || ci.EnglishName == "Chinese, Simplified" || ci.EnglishName == "Simplified Chinese",
+                        $"'{ci.EnglishName}' not equal to `Chinese (Simplified)` nor `Chinese, Simplified` nor `Simplified Chinese`");
 
             ci = new CultureInfo("zh-HanT");
-            Assert.True(ci.EnglishName == "Chinese (Traditional)" || ci.EnglishName == "Chinese, Traditional",
-                        $"'{ci.EnglishName}' not equal to `Chinese (Traditional)` nor `Chinese, Traditional`");
+            Assert.True(ci.EnglishName == "Chinese (Traditional)" || ci.EnglishName == "Chinese, Traditional" || ci.EnglishName == "Traditional Chinese",
+                        $"'{ci.EnglishName}' not equal to `Chinese (Traditional)` nor `Chinese, Traditional` nor `Traditional Chinese`");
         }
     }
 }
diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CultureInfo/CultureInfoNames.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CultureInfo/CultureInfoNames.cs
index d2c0828bc685..b4ba35adb85e 100644
--- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CultureInfo/CultureInfoNames.cs
+++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CultureInfo/CultureInfoNames.cs
@@ -10,19 +10,31 @@ namespace System.Globalization.Tests
 {
     public class CultureInfoNames
     {
-        private static bool SupportFullIcuResources => (PlatformDetection.IsNotMobile && PlatformDetection.IsIcuGlobalization) || PlatformDetection.IsHybridGlobalizationOnApplePlatform;
+        // Android has its own ICU, which doesn't 100% map to UsingLimitedCultures
+        // Browser uses JS to get the NativeName that is missing in ICU (in the singlethreaded runtime only)
+        private static bool SupportFullIcuResources =>
+            !PlatformDetection.IsWasi && !PlatformDetection.IsAndroid && PlatformDetection.IsIcuGlobalization && !PlatformDetection.IsWasmThreadingSupported;
+        
+        public static IEnumerable<object[]> SupportedCultures_TestData()
+        {
+            // Browser does not support all ICU locales but it uses JS to get the correct native name
+            if (!PlatformDetection.IsBrowser)
+            {
+                yield return new object[] { "aa", "aa", "Afar", "Afar" };
+                yield return new object[] { "aa-ER", "aa-ER", "Afar (Eritrea)", "Afar (Eritrea)" };
+            }
+            yield return new object[] { "en", "en", "English", "English" };
+            yield return new object[] { "en", "fr", "English", "anglais" };
+            yield return new object[] { "en-US", "en-US", "English (United States)", "English (United States)" };
+            yield return new object[] { "en-US", "fr-FR", "English (United States)", "anglais (\u00C9tats-Unis)" };
+            yield return new object[] { "en-US", "de-DE", "English (United States)", "Englisch (Vereinigte Staaten)" };
+            yield return new object[] { "", "en-US", "Invariant Language (Invariant Country)", "Invariant Language (Invariant Country)" };
+            yield return new object[] { "", "fr-FR", "Invariant Language (Invariant Country)", "Invariant Language (Invariant Country)" };
+            yield return new object[] { "", "", "Invariant Language (Invariant Country)", "Invariant Language (Invariant Country)" };
+        }
 
         [ConditionalTheory(nameof(SupportFullIcuResources))]
-        [InlineData("en", "en", "English", "English")]
-        [InlineData("en", "fr", "English", "anglais")]
-        [InlineData("aa", "aa", "Afar", "Afar")]
-        [InlineData("en-US", "en-US", "English (United States)", "English (United States)")]
-        [InlineData("en-US", "fr-FR", "English (United States)", "anglais (\u00C9tats-Unis)")]
-        [InlineData("en-US", "de-DE", "English (United States)", "Englisch (Vereinigte Staaten)")]
-        [InlineData("aa-ER", "aa-ER", "Afar (Eritrea)", "Afar (Eritrea)")]
-        [InlineData("", "en-US", "Invariant Language (Invariant Country)", "Invariant Language (Invariant Country)")]
-        [InlineData("", "fr-FR", "Invariant Language (Invariant Country)", "Invariant Language (Invariant Country)")]
-        [InlineData("", "", "Invariant Language (Invariant Country)", "Invariant Language (Invariant Country)")]
+        [MemberData(nameof(SupportedCultures_TestData))]
         public void TestDisplayName(string cultureName, string uiCultureName, string nativeName, string displayName)
         {
             using (new ThreadCultureChange(null, CultureInfo.GetCultureInfo(uiCultureName)))
diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CultureInfo/CultureInfoNativeName.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CultureInfo/CultureInfoNativeName.cs
index 9a834dbf2d1d..c61f8364e20c 100644
--- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CultureInfo/CultureInfoNativeName.cs
+++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CultureInfo/CultureInfoNativeName.cs
@@ -8,12 +8,16 @@ namespace System.Globalization.Tests
 {
     public class CultureInfoNativeName
     {
+        // Android has its own ICU, which doesn't 100% map to UsingLimitedCultures
+        // Browser uses JS to get the NativeName that is missing in ICU (in the singlethreaded runtime only)
+        private static bool SupportFullIcuResources =>
+            (!PlatformDetection.IsWasi || PlatformDetection.IsHybridGlobalizationOnApplePlatform) && !PlatformDetection.IsWasmThreadingSupported;
+
         public static IEnumerable<object[]> NativeName_TestData()
         {
             yield return new object[] { CultureInfo.CurrentCulture.Name, CultureInfo.CurrentCulture.NativeName };
 
-            // Android has its own ICU, which doesn't 100% map to UsingLimitedCultures
-            if (PlatformDetection.IsNotUsingLimitedCultures || PlatformDetection.IsAndroid || PlatformDetection.IsHybridGlobalizationOnApplePlatform)
+            if (SupportFullIcuResources)
             {
                 yield return new object[] { "en-US", "English (United States)" };
                 yield return new object[] { "en-CA", "English (Canada)" };
@@ -21,7 +25,6 @@ public static IEnumerable<object[]> NativeName_TestData()
             }
             else
             {
-                // Mobile / Browser ICU doesn't contain CultureInfo.NativeName
                 yield return new object[] { "en-US", "en (US)" };
                 yield return new object[] { "en-CA", "en (CA)" };
             }
diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/System/Globalization/RegionInfoTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/System/Globalization/RegionInfoTests.cs
index e05fd4e1a742..ee39b219adb3 100644
--- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/System/Globalization/RegionInfoTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/System/Globalization/RegionInfoTests.cs
@@ -12,6 +12,11 @@ namespace System.Globalization.Tests
 {
     public class RegionInfoPropertyTests
     {
+        // Android has its own ICU, which doesn't 100% map to UsingLimitedCultures
+        // Browser uses JS to get the NativeName that is missing in ICU (in the singlethreaded runtime only)
+        public static bool SupportFullGlobalizationData =>
+            (!PlatformDetection.IsWasi || PlatformDetection.IsHybridGlobalizationOnApplePlatform) && !PlatformDetection.IsWasmThreadingSupported;
+
         [Theory]
         [InlineData("US", "US", "US")]
         [InlineData("IT", "IT", "IT")]
@@ -100,7 +105,6 @@ public void ValidateUsingCasedRegionName(string regionName)
         [Theory]
         [InlineData("en-US", "United States")]
         [OuterLoop("May fail on machines with multiple language packs installed")] // see https://github.com/dotnet/runtime/issues/30132
-        [ActiveIssue("https://github.com/dotnet/runtime/issues/45951", TestPlatforms.Browser)]
         public void DisplayName(string name, string expected)
         {
             using (new ThreadCultureChange(null, new CultureInfo(name)))
@@ -111,8 +115,7 @@ public void DisplayName(string name, string expected)
 
         public static IEnumerable<object[]> NativeName_TestData()
         {
-            // Android has its own ICU, which doesn't 100% map to UsingLimitedCultures
-            if (PlatformDetection.IsNotUsingLimitedCultures || PlatformDetection.IsAndroid || PlatformDetection.IsHybridGlobalizationOnApplePlatform)
+            if (SupportFullGlobalizationData)
             {
                 yield return new object[] { "GB", "United Kingdom" };
                 yield return new object[] { "SE", "Sverige" };
@@ -120,7 +123,6 @@ public static IEnumerable<object[]> NativeName_TestData()
             }
             else
             {
-                // Browser's ICU doesn't contain RegionInfo.NativeName
                 yield return new object[] { "GB", "GB" };
                 yield return new object[] { "SE", "SE" };
                 yield return new object[] { "FR", "FR" };
@@ -136,8 +138,7 @@ public void NativeName(string name, string expected)
 
         public static IEnumerable<object[]> EnglishName_TestData()
         {
-            // Android has its own ICU, which doesn't 100% map to UsingLimitedCultures
-            if (PlatformDetection.IsNotUsingLimitedCultures || PlatformDetection.IsAndroid || PlatformDetection.IsHybridGlobalizationOnApplePlatform)
+            if (SupportFullGlobalizationData)
             {
                 yield return new object[] { "en-US", new string[] { "United States" } };
                 yield return new object[] { "US", new string[] { "United States" } };
@@ -146,7 +147,6 @@ public static IEnumerable<object[]> EnglishName_TestData()
             }
             else
             {
-                // Browser's ICU doesn't contain RegionInfo.EnglishName
                 yield return new object[] { "en-US", new string[] { "US" } };
                 yield return new object[] { "US", new string[] { "US" } };
                 yield return new object[] { "zh-CN", new string[] { "CN" }};
diff --git a/src/libraries/System.Runtime/tests/System.IO.FileSystem.Tests/DisabledFileLockingTests/System.IO.FileSystem.DisabledFileLocking.Tests.csproj b/src/libraries/System.Runtime/tests/System.IO.FileSystem.Tests/DisabledFileLockingTests/System.IO.FileSystem.DisabledFileLocking.Tests.csproj
index 8f474ca378fc..62425e583e91 100644
--- a/src/libraries/System.Runtime/tests/System.IO.FileSystem.Tests/DisabledFileLockingTests/System.IO.FileSystem.DisabledFileLocking.Tests.csproj
+++ b/src/libraries/System.Runtime/tests/System.IO.FileSystem.Tests/DisabledFileLockingTests/System.IO.FileSystem.DisabledFileLocking.Tests.csproj
@@ -7,6 +7,9 @@
 
     <WasmXHarnessMonoArgs>--working-dir=/test-dir</WasmXHarnessMonoArgs>
   </PropertyGroup>
+  <ItemGroup Condition="'$(TargetOS)' == 'browser'">
+    <WasmFilesToIncludeFromPublishDir Include="$(AssemblyName).dll" TargetDir="test-dir" />
+  </ItemGroup>
   <ItemGroup>
     <Compile Include="DisabledFileLockingSwitchTests.cs" />
     <Compile Include="..\FSAssert.cs" />
diff --git a/src/libraries/System.Runtime/tests/System.IO.FileSystem.Tests/File/ReadWriteAllLinesAsync.cs b/src/libraries/System.Runtime/tests/System.IO.FileSystem.Tests/File/ReadWriteAllLinesAsync.cs
index 441771f05675..0dc72cab3f42 100644
--- a/src/libraries/System.Runtime/tests/System.IO.FileSystem.Tests/File/ReadWriteAllLinesAsync.cs
+++ b/src/libraries/System.Runtime/tests/System.IO.FileSystem.Tests/File/ReadWriteAllLinesAsync.cs
@@ -98,8 +98,11 @@ public async Task OpenFile_ThrowsIOExceptionAsync()
 
             using (File.Create(path))
             {
-                await Assert.ThrowsAsync<IOException>(async () => await WriteAsync(path, lines));
-                await Assert.ThrowsAsync<IOException>(async () => await ReadAsync(path));
+                Task t = WriteAsync(path, lines);
+                await Assert.ThrowsAsync<IOException>(async () => await t);
+
+                t = ReadAsync(path);
+                await Assert.ThrowsAsync<IOException>(async () => await t);
             }
         }
 
diff --git a/src/libraries/System.Runtime/tests/System.IO.FileSystem.Tests/System.IO.FileSystem.Tests.csproj b/src/libraries/System.Runtime/tests/System.IO.FileSystem.Tests/System.IO.FileSystem.Tests.csproj
index 738960903f50..c2d3f67d0d55 100644
--- a/src/libraries/System.Runtime/tests/System.IO.FileSystem.Tests/System.IO.FileSystem.Tests.csproj
+++ b/src/libraries/System.Runtime/tests/System.IO.FileSystem.Tests/System.IO.FileSystem.Tests.csproj
@@ -6,6 +6,9 @@
 
     <WasmXHarnessMonoArgs>--working-dir=/test-dir</WasmXHarnessMonoArgs>
   </PropertyGroup>
+  <ItemGroup Condition="'$(TargetOS)' == 'browser'">
+    <WasmFilesToIncludeFromPublishDir Include="$(AssemblyName).dll" TargetDir="test-dir" />
+  </ItemGroup>
   <ItemGroup>
     <Compile Include="Base\BaseGetSetAttributes.cs" />
     <Compile Include="Base\BaseGetSetTimes.cs" />
diff --git a/src/libraries/System.Runtime/tests/System.IO.Tests/MemoryStream/MemoryStreamTests.cs b/src/libraries/System.Runtime/tests/System.IO.Tests/MemoryStream/MemoryStreamTests.cs
index 1e7eb491a369..baa158060cfc 100644
--- a/src/libraries/System.Runtime/tests/System.IO.Tests/MemoryStream/MemoryStreamTests.cs
+++ b/src/libraries/System.Runtime/tests/System.IO.Tests/MemoryStream/MemoryStreamTests.cs
@@ -103,6 +103,7 @@ from bufferContext in
         [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))]
         [MemberData(nameof(MemoryStream_PositionOverflow_Throws_MemberData))]
         [SkipOnPlatform(TestPlatforms.iOS | TestPlatforms.tvOS, "https://github.com/dotnet/runtime/issues/92467")]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/100225", typeof(PlatformDetection), nameof(PlatformDetection.IsMonoRuntime), nameof(PlatformDetection.IsWindows), nameof(PlatformDetection.IsX64Process))]
         public void MemoryStream_SeekOverflow_Throws(SeekMode mode, int bufferSize, int origin)
         {
             byte[] buffer = new byte[bufferSize];
diff --git a/src/libraries/System.Runtime/tests/System.IO.Tests/Stream/Stream.NullTests.cs b/src/libraries/System.Runtime/tests/System.IO.Tests/Stream/Stream.NullTests.cs
index efe700c1e9c7..86ebfba5efbe 100644
--- a/src/libraries/System.Runtime/tests/System.IO.Tests/Stream/Stream.NullTests.cs
+++ b/src/libraries/System.Runtime/tests/System.IO.Tests/Stream/Stream.NullTests.cs
@@ -2,7 +2,9 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
+using System.Globalization;
 using System.Linq;
+using System.Text;
 using System.Threading;
 using System.Threading.Tasks;
 using Xunit;
@@ -196,14 +198,93 @@ public static async Task TestCanceledNullTextReaderAsync(TextReader input)
         [MemberData(nameof(NullWriters))]
         public static void TextNullTextWriter(TextWriter output)
         {
-            output.Flush();
+            // Use every method to make sure they don't throw
+
+            output.Close();
             output.Dispose();
+            Assert.True(output.DisposeAsync().IsCompletedSuccessfully);
+
+            Assert.NotNull(output.Encoding);
+            Assert.Same(CultureInfo.InvariantCulture, output.FormatProvider);
+            Assert.Equal(Environment.NewLine, output.NewLine);
+            output.NewLine = "hello";
+            Assert.Equal(Environment.NewLine, output.NewLine);
 
-            output.WriteLine(decimal.MinValue);
-            output.WriteLine(Math.PI);
-            output.WriteLine();
             output.Flush();
-            output.Dispose();
+            Assert.True(output.FlushAsync().IsCompletedSuccessfully);
+            Assert.True(output.FlushAsync(CancellationToken.None).IsCompletedSuccessfully);
+            Assert.True(output.FlushAsync(new CancellationToken(true)).IsCompletedSuccessfully);
+
+            output.Write('a');
+            output.Write((char[])null);
+            output.Write(new char[] { 'b', 'c' });
+            output.Write(42m);
+            output.Write(43d);
+            output.Write(44f);
+            output.Write(45);
+            output.Write(46L);
+            output.Write(DayOfWeek.Monday);
+            output.Write((string)null);
+            output.Write("Tuesday");
+            output.Write((StringBuilder)null);
+            output.Write(new StringBuilder("Wednesday"));
+            output.Write(47u);
+            output.Write(48ul);
+            output.Write("Thursday".AsSpan());
+            output.Write(" {0} ", "Friday");
+            output.Write(" {0}{1} ", "Saturday", "Sunday");
+            output.Write(" {0} {1}  {2}", TimeSpan.FromSeconds(1), TimeSpan.FromMinutes(2), TimeSpan.FromDays(3));
+            output.Write(" {0} {1}  {2}    {3}", (Int128)4, (UInt128)5, (nint)6, (nuint)7);
+            output.WriteLine();
+            output.WriteLine(true);
+            output.WriteLine('a');
+            output.WriteLine((char[])null);
+            output.WriteLine(new char[] { 'b', 'c' });
+            output.WriteLine(42m);
+            output.WriteLine(43d);
+            output.WriteLine(44f);
+            output.WriteLine(45);
+            output.WriteLine(46L);
+            output.WriteLine(DayOfWeek.Monday);
+            output.WriteLine((string)null);
+            output.WriteLine("Tuesday");
+            output.WriteLine((StringBuilder)null);
+            output.WriteLine(new StringBuilder("Wednesday"));
+            output.WriteLine(47u);
+            output.WriteLine(48ul);
+            output.WriteLine("Thursday".AsSpan());
+            output.WriteLine(" {0} ", "Friday");
+            output.WriteLine(" {0}{1} ", "Saturday", "Sunday");
+            output.WriteLine(" {0} {1}  {2}", TimeSpan.FromSeconds(1), TimeSpan.FromMinutes(2), TimeSpan.FromDays(3));
+            output.WriteLine(" {0} {1}  {2}    {3}", (Int128)4, (UInt128)5, (nint)6, (nuint)7);
+            Assert.True(output.WriteAsync('a').IsCompletedSuccessfully);
+            Assert.True(output.WriteAsync((char[])null).IsCompletedSuccessfully);
+            Assert.True(output.WriteAsync(new char[] { 'b', 'c' }).IsCompletedSuccessfully);
+            Assert.True(output.WriteAsync((string)null).IsCompletedSuccessfully);
+            Assert.True(output.WriteAsync("Tuesday").IsCompletedSuccessfully);
+            Assert.True(output.WriteAsync((StringBuilder)null).IsCompletedSuccessfully);
+            Assert.True(output.WriteAsync(new StringBuilder("Wednesday")).IsCompletedSuccessfully);
+            Assert.True(output.WriteLineAsync().IsCompletedSuccessfully);
+            Assert.True(output.WriteLineAsync('a').IsCompletedSuccessfully);
+            Assert.True(output.WriteLineAsync((char[])null).IsCompletedSuccessfully);
+            Assert.True(output.WriteLineAsync(new char[] { 'b', 'c' }).IsCompletedSuccessfully);
+            Assert.True(output.WriteLineAsync((string)null).IsCompletedSuccessfully);
+            Assert.True(output.WriteLineAsync("Tuesday").IsCompletedSuccessfully);
+            Assert.True(output.WriteLineAsync((StringBuilder)null).IsCompletedSuccessfully);
+            Assert.True(output.WriteLineAsync(new StringBuilder("Wednesday")).IsCompletedSuccessfully);
+
+            if (output is StreamWriter sw)
+            {
+                Assert.False(sw.AutoFlush);
+                sw.AutoFlush = true;
+                Assert.False(sw.AutoFlush);
+
+                Assert.Same(Stream.Null, sw.BaseStream);
+            }
+
+            // Use some parallelism in an attempt to validate statelessness
+            string longLine = new string('#', 100_000);
+            Parallel.For(0, 25, i => output.WriteLine(longLine));
         }
 
         [Theory]
@@ -248,7 +329,6 @@ public static IEnumerable<object[]> NullReaders
             {
                 yield return new object[] { TextReader.Null };
                 yield return new object[] { StreamReader.Null };
-                yield return new object[] { StringReader.Null };
             }
         }
 
@@ -258,7 +338,6 @@ public static IEnumerable<object[]> NullWriters
             {
                 yield return new object[] { TextWriter.Null };
                 yield return new object[] { StreamWriter.Null };
-                yield return new object[] { StringWriter.Null };
             }
         }
 
diff --git a/src/libraries/System.Runtime/tests/System.Reflection.Tests/DefaultBinderTests.cs b/src/libraries/System.Runtime/tests/System.Reflection.Tests/DefaultBinderTests.cs
index 874e1f3d5864..d1c1f63d830e 100644
--- a/src/libraries/System.Runtime/tests/System.Reflection.Tests/DefaultBinderTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Reflection.Tests/DefaultBinderTests.cs
@@ -107,6 +107,18 @@ public static void DefaultBinderNamedParametersSkippedAndOutOfOrderTest()
             Assert.Equal("MethodMoreParameters", method.Name);
         }
 
+        [Fact]
+        public void InvokeWithIncorrectTargetTypeThrowsCorrectException()
+        {
+            Type t = typeof(Sample);
+            object incorrectInstance = Activator.CreateInstance(t);
+            MethodInvoker invoker = MethodInvoker.Create(typeof(Test).GetMethod(nameof(Test.TestMethod)));
+
+            TargetException ex = Assert.Throws<TargetException>(() => invoker.Invoke(obj: incorrectInstance, "NotAnInt"));
+            Assert.Contains(nameof(Test), ex.Message);
+            Assert.Contains(nameof(Sample), ex.Message);
+        }
+
         [Fact]
         public static void InvokeWithNamedParameters1st2ndTest()
         {
diff --git a/src/libraries/System.Runtime/tests/System.Reflection.Tests/FieldInfoTests.cs b/src/libraries/System.Runtime/tests/System.Reflection.Tests/FieldInfoTests.cs
index 5f107d9b44e5..d854e19c71ad 100644
--- a/src/libraries/System.Runtime/tests/System.Reflection.Tests/FieldInfoTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Reflection.Tests/FieldInfoTests.cs
@@ -2,6 +2,8 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
 using System.Linq;
 using Xunit;
 
@@ -29,7 +31,7 @@ public void SetValue_ConstantField_ThrowsFieldAccessException(string field, obje
         [Fact]
         public void SetValue_ReadonlyField()
         {
-            FieldInfo fieldInfo = typeof(FieldInfoTests).GetTypeInfo().GetDeclaredField("readonlyIntField");
+            FieldInfo fieldInfo = typeof(FieldInfoTests).GetTypeInfo().GetDeclaredField(nameof(readonlyIntField));
             FieldInfoTests myInstance = new FieldInfoTests();
 
             object current = fieldInfo.GetValue(myInstance);
@@ -55,11 +57,39 @@ public static void CustomAttributes(Type type, string expectedToString)
 
         public static IEnumerable<object[]> GetValue_TestData()
         {
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.s_intField), new FieldInfoTests(), 100 };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.s_intField), null, 100 };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.intField), new FieldInfoTests(), 101 };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.s_stringField), new FieldInfoTests(), "static" };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.stringField), new FieldInfoTests(), "non static" };
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_boolField), null, true };
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_intField), new FieldInfoTests(), 100 }; // Non-null 'obj' ignored.
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_intField), null, 100 };
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_stringField), null, "static" };
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_myStruct), null, new MyStruct() };
+
+            yield return new object[] { typeof(FieldInfoTests), nameof(boolField), new FieldInfoTests(), true };
+            yield return new object[] { typeof(FieldInfoTests), nameof(intField), new FieldInfoTests(), 101 };
+            yield return new object[] { typeof(FieldInfoTests), nameof(stringField), new FieldInfoTests(), "non static" };
+            yield return new object[] { typeof(FieldInfoTests), nameof(_myStruct), new FieldInfoTests(), new MyStruct() };
+
+            yield return new object[] { typeof(FieldInfoTests), nameof(shortEnumField), new FieldInfoTests(), default(ShortEnum) };
+            yield return new object[] { typeof(FieldInfoTests), nameof(intEnumField), new FieldInfoTests(), default(IntEnum) };
+            yield return new object[] { typeof(FieldInfoTests), nameof(longEnumField), new FieldInfoTests(), default(LongEnum) };
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_shortEnumField), null, default(ShortEnum) };
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_intEnumField), null, default(IntEnum) };
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_longEnumField), null, default(LongEnum) };
+
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_boolField), null, true };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_intField), null, 100 };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_intField), null, 100 };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_constIntField), null, 102 };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_stringField), null, "static" };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_readonlyStringField), null, "readonlyStatic" };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_objectField), null, MyStruct.s_objectField };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_intPtr), null, MyStruct.s_intPtrForComparison };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_rvaIntField), null, new int[] { 1, 2, 3 } };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.threadStatic_intField), null, 100 };
+
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.stringField), new MyStruct(), "non static" };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.intField), new MyStruct(), 101 };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.intPtr), new MyStruct(), MyStruct.intPtrForComparison };
+            yield return new object[] { typeof(MyStruct_OnlyPrimitiveTypes), nameof(MyStruct_OnlyPrimitiveTypes.intField), new MyStruct_OnlyPrimitiveTypes(), 101 };
         }
 
         [Theory]
@@ -68,6 +98,76 @@ public void GetValue(Type type, string name, object obj, object expected)
         {
             FieldInfo fieldInfo = GetField(type, name);
             Assert.Equal(expected, fieldInfo.GetValue(obj));
+
+            // Perform a second time to rule out cases of slow-path vs. fast-path.
+            Assert.Equal(expected, fieldInfo.GetValue(obj));
+        }
+
+        public static IEnumerable<object[]> GetValue_TestData_WithFunctionPointers()
+        {
+            yield return new object[] { typeof(MyStructWithFunctionPointers), nameof(MyStructWithFunctionPointers.s_fcnPtr), null, (IntPtr)45 };
+            yield return new object[] { typeof(MyStructWithFunctionPointers), nameof(MyStructWithFunctionPointers.fcnPtr), new MyStructWithFunctionPointers(), (IntPtr)44 };
+        }
+
+        [Theory]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/97833", typeof(PlatformDetection), nameof(PlatformDetection.IsNativeAot))]
+        [MemberData(nameof(GetValue_TestData_WithFunctionPointers))]
+        public void GetValueWithFunctionPointers(Type type, string name, object obj, object expected)
+        {
+            FieldInfo fieldInfo = GetField(type, name);
+            Assert.Equal(expected, fieldInfo.GetValue(obj));
+
+            // Perform a second time to rule out cases of slow-path vs. fast-path.
+            Assert.Equal(expected, fieldInfo.GetValue(obj));
+        }
+
+        [Fact]
+        public void GetAndSetValueTypeFromStatic()
+        {
+            FieldInfo fieldInfo = GetField(typeof(FieldInfoTests), nameof(s_myStruct_GetAndSet));
+            s_myStruct_GetAndSet.intField = 10;
+            object obj = fieldInfo.GetValue(null);
+            Assert.Equal(10, ((MyStruct)obj).intField);
+            s_myStruct_GetAndSet.intField = 11;
+
+            // Make sure the previously boxed value didn't change. The runtime boxes non-primitive value types internally.
+            Assert.Equal(10, ((MyStruct)obj).intField);
+
+            obj = fieldInfo.GetValue(null);
+            Assert.Equal(11, ((MyStruct)obj).intField);
+        }
+
+        [Fact]
+        public void ClassInitializerCalledOnceOnException()
+        {
+            TestThrows();
+            for (int j = 0; j < 100; j++) GC.Collect(); // Encourage the type to unload.
+            TestThrows();
+
+            for (int j = 0; j < 100; j++) GC.Collect(); // Encourage the type to unload.
+            InitializerNotCalledAfterThrow();
+
+            static void TestThrows()
+            {
+                FieldInfo fi = typeof(MyTypeThatThrowsInClassInitializer).GetField(nameof(MyTypeThatThrowsInClassInitializer.s_field));
+                for (int i = 0; i < 3; i++)
+                {
+                    // The actual exception may be TargetInvocationException or TypeInitializationException; there is no guarantee on when
+                    // exactly the class initializer is called (e.g. it could happen before the GetValue\SetValue operation actually runs).
+                    Assert.ThrowsAny<Exception>(() => fi.GetValue(null));
+                    Assert.ThrowsAny<Exception>(() => fi.SetValue(null, 100));
+                }
+            }
+
+            static void InitializerNotCalledAfterThrow()
+            {
+                // Setting this stops the class initializer's code from throwing, but the runtime caches the previous exception so it never runs.
+                SettingsForMyTypeThatThrowsInClassInitializer.s_shouldThrow = false;
+
+                FieldInfo fi = typeof(MyTypeThatThrowsInClassInitializer).GetField(nameof(MyTypeThatThrowsInClassInitializer.s_field));
+                Assert.ThrowsAny<Exception>(() => fi.GetValue(null));
+                Assert.ThrowsAny<Exception>(() => fi.SetValue(null, 100));
+            }
         }
 
         public static IEnumerable<object[]> GetValue_Invalid_TestData()
@@ -82,18 +182,42 @@ public void GetValue_Invalid(Type type, string name, object obj, Type exceptionT
         {
             FieldInfo fieldInfo = GetField(type, name);
             Assert.Throws(exceptionType, () => fieldInfo.GetValue(obj));
+
+            // Perform a second time to rule out cases of slow-path vs. fast-path.
+            Assert.Throws(exceptionType, () => fieldInfo.GetValue(obj));
         }
 
         public static IEnumerable<object[]> SetValue_TestData()
         {
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.s_intField), new FieldInfoTests(), 1000, 1000 };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.s_intField), null, 1000, 1000 };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.intField), new FieldInfoTests(), 1000, 1000 };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.s_stringField), new FieldInfoTests(), "new", "new" };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.stringField), new FieldInfoTests(), "new", "new" };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.shortEnumField), new FieldInfoTests(), (byte)1, (ShortEnum)1 };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.intEnumField), new FieldInfoTests(), (short)2, (IntEnum)2 };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.longEnumField), new FieldInfoTests(), (int)3, (LongEnum)3 };
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_boolField_Set), null, true, true };
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_boolField_Set), null, false, false };
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_intField_Set), new FieldInfoTests(), 1000, 1000 }; // Non-null 'obj' ignored.
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_intField_Set), null, 1001, 1001 };
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_stringField_Set), null, "new", "new" };
+            yield return new object[] { typeof(FieldInfoTests), nameof(s_myStruct_Set), null, s_myStruct_Set, s_myStruct_Set };
+
+            yield return new object[] { typeof(FieldInfoTests), nameof(boolField), new FieldInfoTests(), true, true };
+            yield return new object[] { typeof(FieldInfoTests), nameof(boolField), new FieldInfoTests(), false, false };
+            yield return new object[] { typeof(FieldInfoTests), nameof(stringField), new FieldInfoTests(), "new", "new" };
+            yield return new object[] { typeof(FieldInfoTests), nameof(shortEnumField), new FieldInfoTests(), (byte)1, (ShortEnum)1 };
+            yield return new object[] { typeof(FieldInfoTests), nameof(intEnumField), new FieldInfoTests(), (short)2, (IntEnum)2 };
+            yield return new object[] { typeof(FieldInfoTests), nameof(longEnumField), new FieldInfoTests(), (int)3, (LongEnum)3 };
+            yield return new object[] { typeof(FieldInfoTests), nameof(_myStruct), new FieldInfoTests(), s_myStruct_Set, s_myStruct_Set };
+
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_boolField_Set), null, true, true };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_boolField_Set), null, false, false };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_intField_Set), null, 1001, 1001 };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_stringField_Set), null, "new", "new" };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_objectField_Set), null, MyStruct.s_objectField, MyStruct.s_objectField };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_intPtr_Set), null, MyStruct.s_intPtrForComparison, MyStruct.s_intPtrForComparison };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.threadStatic_intField_Set), null, 100, 100 };
+
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.boolField), new MyStruct(), true, true };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.boolField), new MyStruct(), false, false };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.intField), new MyStruct(), 1002, 1002 };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.stringField), new MyStruct(), "new", "new" };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.objectField), new MyStruct(), MyStruct.s_objectField, MyStruct.s_objectField };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.intPtr), new MyStruct(), MyStruct.s_intPtrForComparison, MyStruct.s_intPtrForComparison };
         }
 
         [Theory]
@@ -106,6 +230,38 @@ public void SetValue(Type type, string name, object obj, object value, object ex
             {
                 fieldInfo.SetValue(obj, value);
                 Assert.Equal(expected, fieldInfo.GetValue(obj));
+
+                // Perform a second time to rule out cases of slow-path vs. fast-path.
+                fieldInfo.SetValue(obj, value);
+                Assert.Equal(expected, fieldInfo.GetValue(obj));
+            }
+            finally
+            {
+                fieldInfo.SetValue(obj, original);
+            }
+        }
+
+        public static IEnumerable<object[]> SetValue_TestData_FunctionPointers()
+        {
+            yield return new object[] { typeof(MyStructWithFunctionPointers), nameof(MyStructWithFunctionPointers.s_fcnPtr_Set), null, (IntPtr)201, (IntPtr)201 };
+            yield return new object[] { typeof(MyStructWithFunctionPointers), nameof(MyStructWithFunctionPointers.fcnPtr), new MyStructWithFunctionPointers(), (IntPtr)200, (IntPtr)200 };
+        }
+
+        [Theory]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/97833", typeof(PlatformDetection), nameof(PlatformDetection.IsNativeAot))]
+        [MemberData(nameof(SetValue_TestData_FunctionPointers))]
+        public void SetValueWithFunctionPointers(Type type, string name, object obj, object value, object expected)
+        {
+            FieldInfo fieldInfo = GetField(type, name);
+            object original = fieldInfo.GetValue(obj);
+            try
+            {
+                fieldInfo.SetValue(obj, value);
+                Assert.Equal(expected, fieldInfo.GetValue(obj));
+
+                // Perform a second time to rule out cases of slow-path vs. fast-path.
+                fieldInfo.SetValue(obj, value);
+                Assert.Equal(expected, fieldInfo.GetValue(obj));
             }
             finally
             {
@@ -115,17 +271,23 @@ public void SetValue(Type type, string name, object obj, object value, object ex
 
         public static IEnumerable<object[]> SetValue_Invalid_TestData()
         {
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.stringField), null, "new", typeof(TargetException) };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.stringField), new object(), "new", typeof(ArgumentException) };
-            yield return new object[] { typeof(FieldInfoTests), nameof(FieldInfoTests.stringField), new FieldInfoTests(), 100, typeof(ArgumentException) };
+            yield return new object[] { typeof(FieldInfoTests), nameof(stringField), null, "new", typeof(TargetException) };
+            yield return new object[] { typeof(FieldInfoTests), nameof(stringField), new object(), "new", typeof(ArgumentException) };
+            yield return new object[] { typeof(FieldInfoTests), nameof(stringField), new FieldInfoTests(), 100, typeof(ArgumentException) };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_constIntField), null, 100, typeof(FieldAccessException) };
+            yield return new object[] { typeof(MyStruct), nameof(MyStruct.s_rvaIntField), null, new int[] { 3, 4, 5 }, typeof(FieldAccessException) };
         }
 
         [Theory]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/97829", TestRuntimes.Mono)]
         [MemberData(nameof(SetValue_Invalid_TestData))]
         public void SetValue_Invalid(Type type, string name, object obj, object value, Type exceptionType)
         {
             FieldInfo fieldInfo = GetField(type, name);
             Assert.Throws(exceptionType, () => fieldInfo.SetValue(obj, value));
+
+            // Perform a second time to rule out cases of slow-path vs. fast-path.
+            Assert.Throws(exceptionType, () => fieldInfo.SetValue(obj, value));
         }
 
         [Theory]
@@ -239,6 +401,7 @@ public void IsPrivate(Type type, string name, bool expected)
 
         [Theory]
         [InlineData(typeof(FieldInfoTests), nameof(FieldInfoTests.readonlyIntField), true)]
+        [InlineData(typeof(FieldInfoTests), nameof(FieldInfoTests.s_readonlyIntField), true)]
         [InlineData(typeof(FieldInfoTests), nameof(FieldInfoTests.intField), false)]
         public void IsInitOnly(Type type, string name, bool expected)
         {
@@ -462,18 +625,32 @@ private static FieldInfo GetField(Type type, string name)
         public const long ConstInt64Field = 1000;
         public const byte ConstByteField = 0;
 
+        public static bool s_boolField = true;
+        public static bool s_boolField_Set = false;
         public static int s_intField = 100;
+        public static int s_intField_Set = 0;
         public static string s_stringField = "static";
+        public static string s_stringField_Set = "static";
+        public static readonly int s_readonlyIntField = 100;
 
+        public bool boolField = true;
         public int intField = 101;
         public string stringField = "non static";
 
-        public enum ShortEnum : short {}
-        public enum IntEnum {}
-        public enum LongEnum : long {}
+        public MyStruct _myStruct = new MyStruct();
+        public static MyStruct s_myStruct = new MyStruct();
+        public static MyStruct s_myStruct_Set = new MyStruct();
+        public static MyStruct s_myStruct_GetAndSet = new MyStruct();
+
+        public enum ShortEnum : short { }
+        public enum IntEnum { }
+        public enum LongEnum : long { }
         public ShortEnum shortEnumField;
         public IntEnum intEnumField;
         public LongEnum longEnumField;
+        public static ShortEnum s_shortEnumField;
+        public static IntEnum s_intEnumField;
+        public static LongEnum s_longEnumField;
 
         private int privateIntField = 1;
         private string privateStringField = "privateStringField";
@@ -586,5 +763,81 @@ public static void SetValueDirect_GetValueDirectRoundDataTest(object value)
 
             Assert.Equal(value, result);
         }
+
+
+        public struct MyStruct_OnlyPrimitiveTypes
+        {
+            public int intField = 101;
+
+            public MyStruct_OnlyPrimitiveTypes()
+            {
+            }
+        }
+
+        public struct MyStruct
+        {
+            public static bool s_boolField = true;
+            public static bool s_boolField_Set = false;
+            public static int s_intField = 100;
+            public static int s_intField_Set = 0;
+            [ThreadStatic] public static int threadStatic_intField = 100;
+            [ThreadStatic] public static int threadStatic_intField_Set = 0;
+            public static string s_stringField = "static";
+            public static readonly string s_readonlyStringField = "readonlyStatic";
+            public static string s_stringField_Set = null;
+            public static object s_objectField = new MyClass1();
+            public static object s_objectField_Set = null;
+
+            // This does not report FieldAttributes.HasFieldRVA since Roslyn wraps the .data with generated helper class.
+            public static readonly int[] s_rvaIntField = [1, 2, 3];
+
+            public unsafe static object intPtrForComparison = Pointer.Box((void*)42, typeof(int*));
+            public unsafe static int* s_intPtr = (int*)43;
+            public unsafe static int* s_intPtr_Set = (int*)0;
+            public unsafe static object s_intPtrForComparison = Pointer.Box((void*)43, typeof(int*));
+            public bool boolField = true;
+            public int intField = 101;
+            public object objectField = null;
+            public string stringField = "non static";
+            public const int s_constIntField = 102;
+            public unsafe int* intPtr = (int*)42;
+
+            public MyStruct() { }
+        }
+
+        public struct MyStructWithFunctionPointers
+        {
+            public unsafe static delegate*<void> s_fcnPtr = (delegate*<void>)45;
+            public unsafe static delegate*<void> s_fcnPtr_Set = (delegate*<void>)0;
+            public unsafe delegate*<void> fcnPtr = (delegate*<void>)44;
+
+            public MyStructWithFunctionPointers() { }
+        }
+
+        public class MyTypeThatThrowsInClassInitializer
+        {
+            public static int s_field;
+
+            static MyTypeThatThrowsInClassInitializer()
+            {
+                FieldInfo fi = typeof(MyTypeThatThrowsInClassInitializer).GetField(nameof(s_field));
+
+                // Ensure that the runtime doesn't treat this type has having been initialized due to successful GetValue().
+                for (int i = 0; i < 3; i++)
+                {
+                    fi.GetValue(null);
+                }
+
+                if (SettingsForMyTypeThatThrowsInClassInitializer.s_shouldThrow)
+                {
+                    throw new Exception();
+                }
+            }
+        }
+
+        public static class SettingsForMyTypeThatThrowsInClassInitializer
+        {
+            public static bool s_shouldThrow = true;
+        }
     }
 }
diff --git a/src/libraries/System.Runtime/tests/System.Reflection.Tests/GetTypeTests.cs b/src/libraries/System.Runtime/tests/System.Reflection.Tests/GetTypeTests.cs
index efa17feb0d2d..ba1cb8a203c4 100644
--- a/src/libraries/System.Runtime/tests/System.Reflection.Tests/GetTypeTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Reflection.Tests/GetTypeTests.cs
@@ -3,6 +3,8 @@
 
 using System.Collections.Generic;
 using System.IO;
+using System.Reflection.Emit;
+using System.Runtime.CompilerServices;
 using Xunit;
 
 namespace System.Reflection.Tests
@@ -297,6 +299,142 @@ public void TestTypeIdentifierAttribute()
             args = new object[1] { Activator.CreateInstance(otherEquivalentValueType) };
             Assert.Equal(42, mi.Invoke(null, args));
         }
+
+        [Fact]
+        public void IgnoreLeadingDotForTypeNamesWithoutNamespace()
+        {
+            Type typeWithNoNamespace = typeof(NoNamespace);
+
+            Assert.Equal(typeWithNoNamespace, Type.GetType($".{typeWithNoNamespace.AssemblyQualifiedName}"));
+            Assert.Equal(typeWithNoNamespace, Type.GetType(typeWithNoNamespace.AssemblyQualifiedName));
+
+            Assert.Equal(typeWithNoNamespace, typeWithNoNamespace.Assembly.GetType($".{typeWithNoNamespace.FullName}"));
+            Assert.Equal(typeWithNoNamespace, typeWithNoNamespace.Assembly.GetType(typeWithNoNamespace.FullName));
+
+            Assert.Equal(typeof(List<NoNamespace>), Type.GetType($"{typeof(List<>).FullName}[[{typeWithNoNamespace.AssemblyQualifiedName}]]"));
+            Assert.Equal(typeof(List<NoNamespace>), Type.GetType($"{typeof(List<>).FullName}[[.{typeWithNoNamespace.AssemblyQualifiedName}]]"));
+
+            Type typeWithNamespace = typeof(int);
+
+            Assert.Equal(typeWithNamespace, Type.GetType(typeWithNamespace.AssemblyQualifiedName));
+            Assert.Null(Type.GetType($".{typeWithNamespace.AssemblyQualifiedName}"));
+        }
+
+        public static IEnumerable<object[]> GetTypesThatRequireEscaping()
+        {
+            if (RuntimeFeature.IsDynamicCodeSupported)
+            {
+                AssemblyBuilder assembly = AssemblyBuilder.DefineDynamicAssembly(new AssemblyName("TypeNamesThatRequireEscaping"), AssemblyBuilderAccess.Run);
+                ModuleBuilder module = assembly.DefineDynamicModule("TypeNamesThatRequireEscapingModule");
+
+                yield return new object[] { module.DefineType("TypeNameWith+ThatIsNotNestedType").CreateType(), assembly };
+                yield return new object[] { module.DefineType("TypeNameWith\\TheEscapingCharacter").CreateType(), assembly };
+                yield return new object[] { module.DefineType("TypeNameWith&Ampersand").CreateType(), assembly };
+                yield return new object[] { module.DefineType("TypeNameWith*Asterisk").CreateType(), assembly };
+                yield return new object[] { module.DefineType("TypeNameWith[OpeningSquareBracket").CreateType(), assembly };
+                yield return new object[] { module.DefineType("TypeNameWith]ClosingSquareBracket").CreateType(), assembly };
+                yield return new object[] { module.DefineType("TypeNameWith[]BothSquareBrackets").CreateType(), assembly };
+                yield return new object[] { module.DefineType("TypeNameWith[[]]NestedSquareBrackets").CreateType(), assembly };
+                yield return new object[] { module.DefineType("TypeNameWith,Comma").CreateType(), assembly };
+                yield return new object[] { module.DefineType("TypeNameWith\\[]+*&,AllSpecialCharacters").CreateType(), assembly };
+
+                TypeBuilder containingType = module.DefineType("ContainingTypeWithA+Plus");
+                _ = containingType.CreateType(); // containing type must exist!
+                yield return new object[] { containingType.DefineNestedType("NoSpecialCharacters").CreateType(), assembly };
+                yield return new object[] { containingType.DefineNestedType("Contains+Plus").CreateType(), assembly };
+            }
+        }
+
+        [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsReflectionEmitSupported))]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/45033", typeof(PlatformDetection), nameof(PlatformDetection.IsMonoRuntime))]
+        [MemberData(nameof(GetTypesThatRequireEscaping))]
+        public void TypeNamesThatRequireEscaping(Type type, Assembly assembly)
+        {
+            Assert.Contains('\\', type.FullName);
+
+            Assert.Equal(type, assembly.GetType(type.FullName));
+            Assert.Equal(type, assembly.GetType(type.FullName.ToLower(), throwOnError: true, ignoreCase: true));
+            Assert.Equal(type, assembly.GetType(type.FullName.ToUpper(), throwOnError: true, ignoreCase: true));
+        }
+
+        [Fact]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/45033", typeof(PlatformDetection), nameof(PlatformDetection.IsMonoRuntime))]
+        public void EscapingCharacterThatDoesNotRequireEscapingIsTreatedAsError()
+        {
+            for (char character = (char)0; character <= 255; character++)
+            {
+                Func<Type> testCode = () => Type.GetType($"System.\\{character}", throwOnError: true);
+
+                if (character is '\\' or '[' or ']' or '+' or '*' or '&' or ',')
+                {
+                    Assert.Throws<TypeLoadException>(testCode); // such type does not exist
+                }
+                else
+                {
+                    Assert.Throws<ArgumentException>(testCode); // such name is invalid
+                }
+
+                Assert.Null(Type.GetType($"System.\\{character}", throwOnError: false));
+            }
+        }
+
+        public static IEnumerable<object[]> AllWhitespacesArguments()
+        {
+            // leading whitespaces are allowed for type names:
+            yield return new object[]
+            {
+                " \t\r\nSystem.Int32",
+                typeof(int)
+            };
+            yield return new object[]
+            {
+                $"System.Collections.Generic.List`1[\r\n\t [\t\r\n {typeof(int).AssemblyQualifiedName}]], {typeof(List<>).Assembly.FullName}",
+                typeof(List<int>)
+            };
+            yield return new object[]
+            {
+                $"System.Collections.Generic.List`1[\r\n\t{typeof(int).FullName}]",
+                typeof(List<int>)
+            };
+            // leading whitespaces are NOT allowed for modifiers:
+            yield return new object[]
+            {
+                "System.Int32\t\r\n []",
+                null
+            };
+            yield return new object[]
+            {
+                "System.Int32\r\n\t [,]",
+                null
+            };
+            yield return new object[]
+            {
+                "System.Int32 \r\n\t [*]",
+                null
+            };
+            yield return new object[]
+            {
+                "System.Int32 *",
+                null
+            };
+            yield return new object[]
+            {
+                "System.Int32\t&",
+                null
+            };
+            // trailing whitespaces are NOT allowed:
+            yield return new object[]
+            {
+                $"System.Int32 \t\r\n",
+                null
+            };
+        }
+
+        [Theory]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/45033", typeof(PlatformDetection), nameof(PlatformDetection.IsMonoRuntime))]
+        [MemberData(nameof(AllWhitespacesArguments))]
+        public void AllWhitespaces(string input, Type? expectedType)
+            => Assert.Equal(expectedType, Type.GetType(input));
     }
 
     namespace MyNamespace1
@@ -352,3 +490,8 @@ public class MyClass1 { }
 
     public class GenericClass<T> { }
 }
+
+public class NoNamespace
+{
+
+}
diff --git a/src/libraries/System.Runtime/tests/System.Reflection.Tests/MethodInfoTests.cs b/src/libraries/System.Runtime/tests/System.Reflection.Tests/MethodInfoTests.cs
index 341fb7fcb9de..c0f95b434255 100644
--- a/src/libraries/System.Runtime/tests/System.Reflection.Tests/MethodInfoTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Reflection.Tests/MethodInfoTests.cs
@@ -300,6 +300,12 @@ public void ContainsGenericParameters(Type type, string name, bool expected)
             Assert.Equal(expected, GetMethod(type, name).ContainsGenericParameters);
         }
 
+        [Fact]
+        public void InvokeUninstantiatedGenericMethod()
+        {
+            Assert.Throws<InvalidOperationException>(() => GetMethod(typeof(MI_SubClass), nameof(MI_SubClass.StaticGenericMethod)).Invoke(null, [null]));
+        }
+
         [Fact]
         public void GetHashCodeTest()
         {
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.CompilerServices.Unsafe.Tests/UnsafeTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.CompilerServices.Unsafe.Tests/UnsafeTests.cs
index 13488161781d..955bc189b433 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.CompilerServices.Unsafe.Tests/UnsafeTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.CompilerServices.Unsafe.Tests/UnsafeTests.cs
@@ -14,7 +14,7 @@ public class UnsafeTests
         public static unsafe void ReadInt32()
         {
             int expected = 10;
-            void* address = Unsafe.AsPointer(ref expected);
+            void* address = Unsafe.AsPointer(ref expected); // Unsafe.AsPointer is safe since expected is on stack
             int ret = Unsafe.Read<int>(address);
             Assert.Equal(expected, ret);
         }
@@ -23,7 +23,7 @@ public static unsafe void ReadInt32()
         public static unsafe void WriteInt32()
         {
             int value = 10;
-            int* address = (int*)Unsafe.AsPointer(ref value);
+            int* address = (int*)Unsafe.AsPointer(ref value); // Unsafe.AsPointer is safe since value is on stack
             int expected = 20;
             Unsafe.Write(address, expected);
 
@@ -36,7 +36,7 @@ public static unsafe void WriteInt32()
         public static unsafe void WriteBytesIntoInt32()
         {
             int value = 20;
-            int* intAddress = (int*)Unsafe.AsPointer(ref value);
+            int* intAddress = (int*)Unsafe.AsPointer(ref value); // Unsafe.AsPointer is safe since value is on stack
             byte* byteAddress = (byte*)intAddress;
             for (int i = 0; i < 4; i++)
             {
@@ -70,7 +70,7 @@ public static unsafe void WriteBytesIntoInt32()
         public static unsafe void LongIntoCompoundStruct()
         {
             long value = 1234567891011121314L;
-            long* longAddress = (long*)Unsafe.AsPointer(ref value);
+            long* longAddress = (long*)Unsafe.AsPointer(ref value); // Unsafe.AsPointer is safe since value is on stack
             Byte4Short2 b4s2 = Unsafe.Read<Byte4Short2>(longAddress);
             if (BitConverter.IsLittleEndian)
             {
@@ -117,7 +117,7 @@ public static unsafe void ReadWriteDoublePointer()
         {
             int value1 = 10;
             int value2 = 20;
-            int* valueAddress = (int*)Unsafe.AsPointer(ref value1);
+            int* valueAddress = (int*)Unsafe.AsPointer(ref value1); // Unsafe.AsPointer is safe since value1 is on stack
             int** valueAddressPtr = &valueAddress;
             Unsafe.Write(valueAddressPtr, new IntPtr(&value2));
 
@@ -132,7 +132,7 @@ public static unsafe void CopyToRef()
         {
             int value = 10;
             int destination = -1;
-            Unsafe.Copy(ref destination, Unsafe.AsPointer(ref value));
+            Unsafe.Copy(ref destination, Unsafe.AsPointer(ref value)); // Unsafe.AsPointer is safe since value is on stack
             Assert.Equal(10, destination);
             Assert.Equal(10, value);
 
@@ -147,7 +147,7 @@ public static unsafe void CopyToVoidPtr()
         {
             int value = 10;
             int destination = -1;
-            Unsafe.Copy(Unsafe.AsPointer(ref destination), ref value);
+            Unsafe.Copy(Unsafe.AsPointer(ref destination), ref value); // Unsafe.AsPointer is safe since destination is on stack
             Assert.Equal(10, destination);
             Assert.Equal(10, value);
 
@@ -163,7 +163,7 @@ public static unsafe void CopyToRefGenericStruct()
             Int32Generic<string> destination = default;
             Int32Generic<string> value = new() { Int32 = 5, Value = "a" };
 
-            Unsafe.Copy(ref destination, Unsafe.AsPointer(ref value));
+            Unsafe.Copy(ref destination, Unsafe.AsPointer(ref value)); // Unsafe.AsPointer is safe since value is on stack
 
             Assert.Equal(5, destination.Int32);
             Assert.Equal("a", destination.Value);
@@ -175,7 +175,7 @@ public static unsafe void CopyToVoidPtrGenericStruct()
             Int32Generic<string> destination = default;
             Int32Generic<string> value = new() { Int32 = 5, Value = "a" };
 
-            Unsafe.Copy(Unsafe.AsPointer(ref destination), ref value);
+            Unsafe.Copy(Unsafe.AsPointer(ref destination), ref value); // Unsafe.AsPointer is safe since destination is on stack
 
             Assert.Equal(5, destination.Int32);
             Assert.Equal("a", destination.Value);
@@ -462,6 +462,26 @@ public static void ByteOffsetStackByte4()
             Assert.Equal(new IntPtr(-3), Unsafe.ByteOffset(ref byte4.B3, ref byte4.B0));
         }
 
+        private static unsafe class StaticReadonlyHolder
+        {
+            public static readonly void* Pointer = (void*)RuntimeHelpers.AllocateTypeAssociatedMemory(typeof(StaticReadonlyHolder), 1);
+        }
+
+        [Fact]
+        public static unsafe void ByteOffsetConstantRef()
+        {
+            // https://github.com/dotnet/runtime/pull/99019
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static nint NullTest(ref byte origin) => Unsafe.ByteOffset(ref origin, ref Unsafe.NullRef<byte>());
+            Assert.Equal(0, NullTest(ref Unsafe.NullRef<byte>()));
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static ref byte GetStatic(ref byte x) => ref x;
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static nint StaticReadonlyTest(ref byte x) => Unsafe.ByteOffset(ref GetStatic(ref Unsafe.AsRef<byte>(StaticReadonlyHolder.Pointer)), ref x);
+            Assert.Equal(0, StaticReadonlyTest(ref Unsafe.AsRef<byte>(StaticReadonlyHolder.Pointer)));
+        }
+
         [Fact]
         public static unsafe void AsRef()
         {
@@ -597,7 +617,7 @@ public static void RefAddNuintByteOffset()
         }
 
         [Fact]
-        public static void RefSubtract()
+        public static unsafe void RefSubtract()
         {
             string[] a = new string[] { "abc", "def", "ghi", "jkl" };
 
@@ -609,6 +629,11 @@ public static void RefSubtract()
 
             ref string r3 = ref Unsafe.Subtract(ref r2, 3);
             Assert.Equal("abc", r3);
+
+            // https://github.com/dotnet/runtime/pull/99019
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static ref byte NullTest(nuint offset) => ref Unsafe.Subtract(ref Unsafe.NullRef<byte>(), offset);
+            Assert.True(Unsafe.IsNullRef(ref NullTest(0)));
         }
 
         [Fact]
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/IO/PathTests_Windows.cs b/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/IO/PathTests_Windows.cs
index f1b27f9fef81..46d7bf01c4ec 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/IO/PathTests_Windows.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/IO/PathTests_Windows.cs
@@ -563,6 +563,8 @@ public void GetFullPath_CommonDevice_Windows(string path, string basePath, strin
             { @"C:tmp", @"C:\git\runtime", @"C:\git\runtime\tmp" },
             { @"C:", @"C:\git\runtime", @"C:\git\runtime" },
             { @"C", @"C:\git\runtime", @"C:\git\runtime\C" },
+            { @"c:", @"C:\git\runtime", @"C:\git\runtime" },
+            { @"C:tmp", @"c:\git\runtime", @"c:\git\runtime\tmp" },
 
             { @"Z:tmp\foo\..", @"C:\git\runtime", @"Z:\tmp" },
             { @"Z:tmp\foo\.", @"C:\git\runtime", @"Z:\tmp\foo" },
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Math.cs b/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Math.cs
index 86ca388b1142..bff81bdbe385 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Math.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Math.cs
@@ -1497,6 +1497,7 @@ public static void Sin(double value, double expectedResult, double allowedVarian
 
         [Theory]
         [InlineData( double.NegativeInfinity,  double.NaN,           double.NaN,          0.0,                              0.0)]
+        [InlineData(-1e18,                     0.9929693207404051,   0.11837199021871073, 0.0002,                           0.002)]                             // https://github.com/dotnet/runtime/issues/98204
         [InlineData(-3.1415926535897932,      -0.0,                 -1.0,                 CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon * 10)]  // value: -(pi)
         [InlineData(-2.7182818284590452,      -0.41078129050290870, -0.91173391478696510, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(e)
         [InlineData(-2.3025850929940457,      -0.74398033695749319, -0.66820151019031295, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(ln(10))
@@ -1528,6 +1529,7 @@ public static void Sin(double value, double expectedResult, double allowedVarian
         [InlineData( 2.3025850929940457,       0.74398033695749319, -0.66820151019031295, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (ln(10))
         [InlineData( 2.7182818284590452,       0.41078129050290870, -0.91173391478696510, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (e)
         [InlineData( 3.1415926535897932,       0.0,                 -1.0,                 CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon * 10)]  // value:  (pi)
+        [InlineData( 1e18,                    -0.9929693207404051,   0.11837199021871073, 0.0002,                           0.002)]                             // https://github.com/dotnet/runtime/issues/98204
         [InlineData( double.PositiveInfinity,  double.NaN,           double.NaN,          0.0,                              0.0)]
         public static void SinCos(double value, double expectedResultSin, double expectedResultCos, double allowedVarianceSin, double allowedVarianceCos)
         {
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/MathF.cs b/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/MathF.cs
index 85e62a190ad7..59040cd799c4 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/MathF.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/MathF.cs
@@ -1677,6 +1677,7 @@ public static void Sin(float value, float expectedResult, float allowedVariance)
 
         [Theory]
         [InlineData( float.NegativeInfinity,  float.NaN,     float.NaN,    0.0f,                             0.0f)]
+        [InlineData(-1e8f,                   -0.931639,     -0.36338508,   CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // https://github.com/dotnet/runtime/issues/98204
         [InlineData(-3.14159265f,            -0.0f,         -1.0f,         CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon * 10)]  // value: -(pi)
         [InlineData(-2.71828183f,            -0.410781291f, -0.911733918f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(e)
         [InlineData(-2.30258509f,            -0.743980337f, -0.668201510f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(ln(10))
@@ -1708,6 +1709,7 @@ public static void Sin(float value, float expectedResult, float allowedVariance)
         [InlineData( 2.30258509f,             0.743980337f, -0.668201510f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (ln(10))
         [InlineData( 2.71828183f,             0.410781291f, -0.911733918f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (e)
         [InlineData( 3.14159265f,             0.0f,         -1.0f,         CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon * 10)]  // value:  (pi)
+        [InlineData( 1e8f,                    0.931639,     -0.36338508,   CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // https://github.com/dotnet/runtime/issues/98204
         [InlineData( float.PositiveInfinity,  float.NaN,     float.NaN,    0.0f,                             0.0f)]
         public static void SinCos(float value, float expectedResultSin, float expectedResultCos, float allowedVarianceSin, float allowedVarianceCos)
         {
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Random.cs b/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Random.cs
index 16c3a9a2353b..27cfa26920e0 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Random.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Random.cs
@@ -727,6 +727,16 @@ public static void Shuffle_Array_Seeded(bool emptyShuffle)
             AssertExtensions.SequenceEqual(stackalloc int[] { 1, 4, 3, 2 }, items);
         }
 
+        [Fact]
+        public static void Shuffle_Array_Covariance()
+        {
+            Random random = new Random(0x70636A61);
+			string[] items = ["", ""];
+			object[] array = items;
+            random.Shuffle(array);
+            AssertExtensions.SequenceEqual((ReadOnlySpan<string>)["", ""], items);
+        }
+
         [Fact]
         public static void Shuffle_Array_ArgValidation()
         {
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System.Runtime.Tests.csproj b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System.Runtime.Tests.csproj
index 5a210ace3a84..066a9994e31e 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System.Runtime.Tests.csproj
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System.Runtime.Tests.csproj
@@ -1,4 +1,4 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
     <TargetFrameworks>$(NetCoreAppCurrent)-windows;$(NetCoreAppCurrent)-unix;$(NetCoreAppCurrent)-browser</TargetFrameworks>
     <DefineConstants Condition="'$(TargetOS)' == 'browser'">$(DefineConstants);TARGET_BROWSER</DefineConstants>
@@ -203,6 +203,8 @@
     <Compile Include="System\Diagnostics\CodeAnalysis\ConstantExpectedAttributeTests.cs" />
     <Compile Include="System\Diagnostics\CodeAnalysis\DynamicallyAccessedMembersAttributeTests.cs" />
     <Compile Include="System\Diagnostics\CodeAnalysis\DynamicDependencyAttributeTests.cs" />
+    <Compile Include="System\Diagnostics\CodeAnalysis\FeatureGuardAttributeTests.cs" />
+    <Compile Include="System\Diagnostics\CodeAnalysis\FeatureSwitchDefinitionAttributeTests.cs" />
     <Compile Include="System\Diagnostics\CodeAnalysis\RequiresAssemblyFilesAttributeTests.cs" />
     <Compile Include="System\Diagnostics\CodeAnalysis\RequiresDynamicCodeAttributeTests.cs" />
     <Compile Include="System\Diagnostics\CodeAnalysis\RequiresUnreferencedCodeAttributeTests.cs" />
@@ -273,6 +275,7 @@
     <Compile Include="System\Runtime\CompilerServices\DefaultInterpolatedStringHandlerTests.cs" />
     <Compile Include="System\Runtime\CompilerServices\FormattableStringFactoryTests.cs" />
     <Compile Include="System\Runtime\CompilerServices\StrongBoxTests.cs" />
+    <Compile Include="System\Runtime\CompilerServices\ParamCollectionAttributeTests.cs" />
     <Compile Include="System\Runtime\CompilerServices\RuntimeHelpersTests.cs" />
     <Compile Include="System\Runtime\ConstrainedExecution\PrePrepareMethodAttributeTests.cs" />
     <Compile Include="System\Runtime\ExceptionServices\HandleProcessCorruptedStateExceptions.cs" />
@@ -299,6 +302,7 @@
 
   <ItemGroup>
     <Compile Include="$(CommonTestPath)System\GenericMathHelpers.cs" Link="Common\System\GenericMathHelpers.cs" />
+    <Compile Include="System\Numerics\DimTests.GenericMath.cs" />
     <Compile Include="System\Numerics\GenericMathDimHelpers.cs" />
     <Compile Include="System\Numerics\IBinaryNumberTests.cs" />
     <Compile Include="System\ByteTests.GenericMath.cs" />
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Diagnostics/CodeAnalysis/FeatureGuardAttributeTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Diagnostics/CodeAnalysis/FeatureGuardAttributeTests.cs
new file mode 100644
index 000000000000..3a3b88ff339d
--- /dev/null
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Diagnostics/CodeAnalysis/FeatureGuardAttributeTests.cs
@@ -0,0 +1,18 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics.CodeAnalysis;
+using Xunit;
+
+namespace System.Diagnostics.CodeAnalysis.Tests
+{
+    public class FeatureGuardAttributeTests
+    {
+        [Fact]
+        public void TestConstructor()
+        {
+            var attr = new FeatureGuardAttribute(typeof(RequiresUnreferencedCodeAttribute));
+            Assert.Equal(typeof(RequiresUnreferencedCodeAttribute), attr.FeatureType);
+        }
+    }
+}
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Diagnostics/CodeAnalysis/FeatureSwitchDefinitionAttributeTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Diagnostics/CodeAnalysis/FeatureSwitchDefinitionAttributeTests.cs
new file mode 100644
index 000000000000..9d67be3ec1ab
--- /dev/null
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Diagnostics/CodeAnalysis/FeatureSwitchDefinitionAttributeTests.cs
@@ -0,0 +1,17 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Xunit;
+
+namespace System.Diagnostics.CodeAnalysis.Tests
+{
+    public class FeatureSwitchDefinitionAttributeTests
+    {
+        [Fact]
+        public void TestConstructor()
+        {
+            var attr = new FeatureSwitchDefinitionAttribute("SwitchName");
+            Assert.Equal("SwitchName", attr.SwitchName);
+        }
+    }
+}
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Diagnostics/StackTraceHiddenAttributeTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Diagnostics/StackTraceHiddenAttributeTests.cs
index 5a1431b642eb..0cd295921045 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Diagnostics/StackTraceHiddenAttributeTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Diagnostics/StackTraceHiddenAttributeTests.cs
@@ -8,7 +8,6 @@
 namespace System.Tests
 {
     [ActiveIssue("https://github.com/dotnet/runtime/issues/50957", typeof(PlatformDetection), nameof(PlatformDetection.IsBrowser), nameof(PlatformDetection.IsMonoAOT))]
-    [ActiveIssue("https://github.com/dotnet/runtime/issues/95981", typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnBrowser))]
     public class StackTraceHiddenAttributeTests
     {
         [Fact]
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/GCTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/GCTests.cs
index 59cedd9a5cea..e20c3877b5c9 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/GCTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/GCTests.cs
@@ -1096,9 +1096,11 @@ private unsafe static void AllocateArrayPinned_ManagedValueType_CanRoundtripThro
             var rng = new Random(0xAF);
 
             EmbeddedValueType<string>[] array = uninitialized ? GC.AllocateUninitializedArray<EmbeddedValueType<string>>(length, pinned: true) : GC.AllocateArray<EmbeddedValueType<string>>(length, pinned: true);
-            byte* pointer = (byte*)Unsafe.AsPointer(ref array[0]);
+            byte* pointer = (byte*)Unsafe.AsPointer(ref array[0]); // Unsafe.AsPointer is safe since array is pinned
             var size = Unsafe.SizeOf<EmbeddedValueType<string>>();
 
+            GC.Collect();
+
             for(int i = 0; i < length; ++i)
             {
                 int idx = rng.Next(length);
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/HalfTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/HalfTests.cs
index 600326b39d85..cba358e484ac 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/HalfTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/HalfTests.cs
@@ -522,12 +522,16 @@ public static IEnumerable<object[]> ExplicitConversion_FromSingle_TestData()
                                   BitConverter.UInt16BitsToHalf(0b0_11001_0000000100)), // 1027.5 rounds to even
                 (BitConverter.Int32BitsToSingle(0b0_10001001_00000000110111111111111),
                                   BitConverter.UInt16BitsToHalf(0b0_11001_0000000011)), // 1027.5-ULP rounds down
+                (BitConverter.Int32BitsToSingle(0b0_10001001_00000000101000000000000),
+                                  BitConverter.UInt16BitsToHalf(0b0_11001_0000000010)), // 1026.5 rounds to even
                 (BitConverter.Int32BitsToSingle(unchecked((int)0b1_10001001_00000000110111111111111)),
                                                  BitConverter.UInt16BitsToHalf(0b1_11001_0000000011)), // -1027.5+ULP rounds towards zero
                 (BitConverter.Int32BitsToSingle(unchecked((int)0b1_10001001_00000000111000000000000)),
                                                  BitConverter.UInt16BitsToHalf(0b1_11001_0000000100)), // -1027.5 rounds to even
                 (BitConverter.Int32BitsToSingle(unchecked((int)0b1_10001001_00000000111000000000001)),
                                                  BitConverter.UInt16BitsToHalf(0b1_11001_0000000100)), // -1027.5-ULP rounds away from zero
+                (BitConverter.Int32BitsToSingle(unchecked((int)0b1_10001001_00000000101000000000000)),
+                                                 BitConverter.UInt16BitsToHalf(0b1_11001_0000000010)), // -1026.5 rounds to even
                 (BitConverter.Int32BitsToSingle(0b0_01110000_00000001110000000000001),
                                  BitConverter.UInt16BitsToHalf(0b0_00000_1000000100)), // subnormal + ULP rounds up
                 (BitConverter.Int32BitsToSingle(0b0_01110000_00000001110000000000000),
@@ -538,8 +542,8 @@ public static IEnumerable<object[]> ExplicitConversion_FromSingle_TestData()
                                                 BitConverter.UInt16BitsToHalf(0b1_00000_1000000011)), // neg subnormal + ULP rounds higher
                 (BitConverter.Int32BitsToSingle(unchecked((int)0b1_01110000_00000001110000000000000)),
                                                 BitConverter.UInt16BitsToHalf(0b1_00000_1000000100)), // neg subnormal rounds to even
-                (BitConverter.Int32BitsToSingle(unchecked((int)0b1_01110000_00000001101111111111111)),
-                                                BitConverter.UInt16BitsToHalf(0b1_00000_1000000011)), // neg subnormal - ULP rounds lower,
+                (BitConverter.Int32BitsToSingle(unchecked((int)0b1_01110000_00000001110000000000001)),
+                                                BitConverter.UInt16BitsToHalf(0b1_00000_1000000100)), // neg subnormal - ULP rounds lower,
                 (BitConverter.Int32BitsToSingle(0x33000000), BitConverter.UInt16BitsToHalf(0b0_00000_000000000)), // (half-precision minimum subnormal / 2) should underflow to zero
             };
 
@@ -616,12 +620,16 @@ public static IEnumerable<object[]> ExplicitConversion_FromDouble_TestData()
                     BitConverter.UInt16BitsToHalf(0b0_11001_0000000100)), // 1027.5 rounds to even
                 (BitConverter.Int64BitsToDouble(0x40900DFFFFFFFFFF),
                     BitConverter.UInt16BitsToHalf(0b0_11001_0000000011)), // 1027.5-ULP rounds down
+                (BitConverter.Int64BitsToDouble(0x40900A0000000000),
+                    BitConverter.UInt16BitsToHalf(0b0_11001_0000000010)), // 1026.5 rounds to even
                 (BitConverter.Int64BitsToDouble(unchecked((long)0xC0900DFFFFFFFFFF)),
                     BitConverter.UInt16BitsToHalf(0b1_11001_0000000011)), // -1027.5+ULP rounds towards zero
                 (BitConverter.Int64BitsToDouble(unchecked((long)0xC0900E0000000000)),
                     BitConverter.UInt16BitsToHalf(0b1_11001_0000000100)), // -1027.5 rounds to even
                 (BitConverter.Int64BitsToDouble(unchecked((long)0xC0900E0000000001)),
                     BitConverter.UInt16BitsToHalf(0b1_11001_0000000100)), // -1027.5-ULP rounds away from zero
+                (BitConverter.Int64BitsToDouble(unchecked((long)0xC0900A0000000000)),
+                    BitConverter.UInt16BitsToHalf(0b1_11001_0000000010)), // -1026.5 rounds to even
                 (BitConverter.Int64BitsToDouble(0x3F001C0000000001),
                     BitConverter.UInt16BitsToHalf(0b0_00000_1000000100)), // subnormal + ULP rounds up
                 (BitConverter.Int64BitsToDouble(0x3F001C0000000001),
@@ -1117,6 +1125,14 @@ public static void ToStringRoundtrip_R(object o_value)
             AssertExtensions.Equal((Half)value, result);
         }
 
+        [Fact] // https://github.com/dotnet/runtime/issues/98841
+        public static void ToString_MaxPrecision()
+        {
+            Half value = BitConverter.Int16BitsToHalf(0x07FF);
+            string str = value.ToString("F24");
+            Assert.Equal("0.000122010707855224609375", str);
+        }
+
         public static IEnumerable<object[]> RoundTripFloat_CornerCases()
         {
             // Magnitude smaller than 2^-24 maps to 0
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Int128Tests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Int128Tests.cs
index 8c1910e2fbd8..9b9103b56ec2 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Int128Tests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Int128Tests.cs
@@ -107,6 +107,51 @@ public static void EqualsTest(Int128 i1, object obj, bool expected)
             Assert.Equal(expected, i1.Equals(obj));
         }
 
+        [Fact]
+        public static void CheckedConvertToInt64()
+        {
+            Assert.Equal(123L, checked((long)new Int128(0, 123)));
+            Assert.Equal(-123L, checked((long)(Int128)(-123)));
+            Assert.Throws<OverflowException>(() => checked((long)new Int128(1, 1)));
+            Assert.Throws<OverflowException>(() => checked((long)new Int128(ulong.MaxValue, 42)));
+        }
+
+        [Fact]
+        public static void CheckedConvertToInt32()
+        {
+            Assert.Equal(123, checked((int)new Int128(0, 123)));
+            Assert.Equal(-123, checked((int)(Int128)(-123)));
+            Assert.Throws<OverflowException>(() => checked((int)new Int128(1, 1)));
+            Assert.Throws<OverflowException>(() => checked((int)new Int128(ulong.MaxValue, 42)));
+        }
+
+        [Fact]
+        public static void CheckedConvertToInt16()
+        {
+            Assert.Equal((short)123, checked((short)new Int128(0, 123)));
+            Assert.Equal((short)(-123), checked((short)(Int128)(-123)));
+            Assert.Throws<OverflowException>(() => checked((short)new Int128(1, 1)));
+            Assert.Throws<OverflowException>(() => checked((short)new Int128(ulong.MaxValue, 42)));
+        }
+
+        [Fact]
+        public static void CheckedConvertToSByte()
+        {
+            Assert.Equal((sbyte)123, checked((sbyte)new Int128(0, 123)));
+            Assert.Equal((sbyte)(-123), checked((sbyte)(Int128)(-123)));
+            Assert.Throws<OverflowException>(() => checked((sbyte)new Int128(1, 1)));
+            Assert.Throws<OverflowException>(() => checked((sbyte)new Int128(ulong.MaxValue, 42)));
+        }
+
+        [Fact]
+        public static void CheckedConvertToIntPtr()
+        {
+            Assert.Equal((nint)123, checked((nint)new Int128(0, 123)));
+            Assert.Equal((nint)(-123), checked((nint)(Int128)(-123)));
+            Assert.Throws<OverflowException>(() => checked((nint)new Int128(1, 1)));
+            Assert.Throws<OverflowException>(() => checked((nint)new Int128(ulong.MaxValue, 42)));
+        }
+
         public static IEnumerable<object[]> ToString_TestData()
         {
             foreach (NumberFormatInfo defaultFormat in new[] { null, NumberFormatInfo.CurrentInfo })
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/MulticastDelegateTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/MulticastDelegateTests.cs
index 354f997ed5c7..9994fa02f10f 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/MulticastDelegateTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/MulticastDelegateTests.cs
@@ -66,6 +66,21 @@ public static void EqualsTest()
             Assert.Equal(d1.GetHashCode(), d1.GetHashCode());
         }
 
+        [Fact]
+        public static void ArrayDelegates()
+        {
+            // Delegate implementation may use Delegate[] arrays as sentinels. Validate that
+            // the sentinels are not confused with user provided targets.
+
+            Action da = new Delegate[5].MyExtension;
+            Assert.True(da.HasSingleTarget);
+            Assert.Equal(1, da.GetInvocationList().Length);
+
+            Func<int, int> dd = new Delegate[10].GetLength;
+            Assert.True(dd.HasSingleTarget);
+            Assert.Equal(1, dd.GetInvocationList().Length);
+        }
+
         [Fact]
         public static void CombineReturn()
         {
@@ -199,7 +214,7 @@ private static void CheckInvokeList(D[] expected, D combo, Tracker target)
             {
                 CheckIsSingletonDelegate((D)(expected[i]), (D)(invokeList[i]), target);
             }
-            Assert.Same(combo.Target, expected[expected.Length - 1].Target);
+            Assert.Same(combo.Target, expected[^1].Target);
             Assert.Same(combo.Target, target);
             Assert.Equal(combo.HasSingleTarget, invokeList.Length == 1);
             int count = 0;
@@ -209,6 +224,7 @@ private static void CheckInvokeList(D[] expected, D combo, Tracker target)
                 count++;
             }
             Assert.Equal(count, invokeList.Length);
+            Assert.Equal(combo.Method, invokeList[^1].Method);
         }
 
         private static void CheckIsSingletonDelegate(D expected, D actual, Tracker target)
@@ -283,4 +299,11 @@ private class C
             public string Goo(int x) => new string('A', x);
         }
     }
+
+    static class MulticastDelegateTestsExtensions
+    {
+        public static void MyExtension(this Delegate[] delegates)
+        {
+        }
+    }
 }
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Numerics/DimTests.GenericMath.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Numerics/DimTests.GenericMath.cs
index dc6870e1f65a..0da59d597f7b 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Numerics/DimTests.GenericMath.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Numerics/DimTests.GenericMath.cs
@@ -9,6 +9,10 @@ namespace System.Numerics.Tests
 {
     public class DimTests_GenericMath
     {
+        private const float MinNormalSingle = 1.17549435E-38f;
+
+        private const float MaxSubnormalSingle = 1.17549421E-38f;
+
         //
         // IBinaryNumber
         //
@@ -20,6 +24,13 @@ public static void AllBitsSetInt32Test()
             Assert.Equal((BinaryIntegerWrapper<int>)0, ~BinaryNumberHelper<BinaryIntegerWrapper<int>>.AllBitsSet);
         }
 
+        [Fact]
+        public static void AllBitsSetSingleTest()
+        {
+            Assert.Equal(0xFFFF_FFFF, BitConverter.SingleToUInt32Bits(BinaryNumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.AllBitsSet.Value));
+            Assert.Equal(0U, ~BitConverter.SingleToUInt32Bits(BinaryNumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.AllBitsSet.Value));
+        }
+
         [Fact]
         public static void AllBitsSetUInt32Test()
         {
@@ -125,6 +136,26 @@ public static void ClampInt32Test()
             Assert.Equal((BinaryIntegerWrapper<int>)unchecked((int)0xFFFFFFFF), NumberHelper<BinaryIntegerWrapper<int>>.Clamp(unchecked((int)0xFFFFFFFF), unchecked((int)0xFFFFFFC0), 0x003F));
         }
 
+        [Fact]
+        public static void ClampSingleTest()
+        {
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(float.NegativeInfinity, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(float.MinValue, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(-1.0f, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(-MinNormalSingle, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(-MaxSubnormalSingle, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(-float.Epsilon, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(-0.0f, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.NaN, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(float.NaN, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(0.0f, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(float.Epsilon, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(MaxSubnormalSingle, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(MinNormalSingle, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(1.0f, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)63.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(float.MaxValue, 1.0f, 63.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)63.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Clamp(float.PositiveInfinity, 1.0f, 63.0f));
+        }
+
         [Fact]
         public static void ClampUInt32Test()
         {
@@ -145,6 +176,26 @@ public static void MaxInt32Test()
             Assert.Equal((BinaryIntegerWrapper<int>)0x00000001, NumberHelper<BinaryIntegerWrapper<int>>.Max(unchecked((int)0xFFFFFFFF), 1));
         }
 
+        [Fact]
+        public static void MaxSingleTest()
+        {
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(float.NegativeInfinity, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(float.MinValue, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(-1.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(-MinNormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(-MaxSubnormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(-float.Epsilon, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(-0.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.NaN, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(float.NaN, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(0.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(float.Epsilon, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(MaxSubnormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(MinNormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(1.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.MaxValue, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(float.MaxValue, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.PositiveInfinity, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Max(float.PositiveInfinity, 1.0f));
+        }
+
         [Fact]
         public static void MaxUInt32Test()
         {
@@ -165,6 +216,26 @@ public static void MaxNumberInt32Test()
             Assert.Equal((BinaryIntegerWrapper<int>)0x00000001, NumberHelper<BinaryIntegerWrapper<int>>.MaxNumber(unchecked((int)0xFFFFFFFF), 1));
         }
 
+        [Fact]
+        public static void MaxNumberSingleTest()
+        {
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(float.NegativeInfinity, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(float.MinValue, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(-1.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(-MinNormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(-MaxSubnormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(-float.Epsilon, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(-0.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(float.NaN, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(0.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(float.Epsilon, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(MaxSubnormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(MinNormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(1.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.MaxValue, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(float.MaxValue, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.PositiveInfinity, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MaxNumber(float.PositiveInfinity, 1.0f));
+        }
+
         [Fact]
         public static void MaxNumberUInt32Test()
         {
@@ -185,6 +256,26 @@ public static void MinInt32Test()
             Assert.Equal((BinaryIntegerWrapper<int>)unchecked((int)0xFFFFFFFF), NumberHelper<BinaryIntegerWrapper<int>>.Min(unchecked((int)0xFFFFFFFF), 1));
         }
 
+        [Fact]
+        public static void MinSingleTest()
+        {
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.NegativeInfinity, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(float.NegativeInfinity, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.MinValue, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(float.MinValue, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)(-1.0f), NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(-1.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)(-MinNormalSingle), NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(-MinNormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)(-MaxSubnormalSingle), NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(-MaxSubnormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)(-float.Epsilon), NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(-float.Epsilon, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)(-0.0f), NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(-0.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.NaN, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(float.NaN, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)0.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(0.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.Epsilon, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(float.Epsilon, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)MaxSubnormalSingle, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(MaxSubnormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)MinNormalSingle, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(MinNormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(1.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(float.MaxValue, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Min(float.PositiveInfinity, 1.0f));
+        }
+
         [Fact]
         public static void MinUInt32Test()
         {
@@ -205,6 +296,26 @@ public static void MinNumberInt32Test()
             Assert.Equal((BinaryIntegerWrapper<int>)unchecked((int)0xFFFFFFFF), NumberHelper<BinaryIntegerWrapper<int>>.MinNumber(unchecked((int)0xFFFFFFFF), 1));
         }
 
+        [Fact]
+        public static void MinNumberSingleTest()
+        {
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.NegativeInfinity, NumberHelper< BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(float.NegativeInfinity, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.MinValue, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(float.MinValue, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)(-1.0f), NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(-1.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)(-MinNormalSingle), NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(-MinNormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)(-MaxSubnormalSingle), NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(-MaxSubnormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)(-float.Epsilon), NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(-float.Epsilon, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)(-0.0f), NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(-0.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(float.NaN, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)0.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(0.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)float.Epsilon, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(float.Epsilon, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)MaxSubnormalSingle, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(MaxSubnormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)MinNormalSingle, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(MinNormalSingle, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(1.0f, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(float.MaxValue, 1.0f));
+            AssertBitwiseEqual((BinaryFloatingPointIeee754Wrapper<float>)1.0f, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.MinNumber(float.PositiveInfinity, 1.0f));
+        }
+
         [Fact]
         public static void MinNumberUInt32Test()
         {
@@ -225,6 +336,29 @@ public static void SignInt32Test()
             Assert.Equal(-1, NumberHelper<BinaryIntegerWrapper<int>>.Sign(unchecked((int)0xFFFFFFFF)));
         }
 
+        [Fact]
+        public static void SignSingleTest()
+        {
+            Assert.Equal(-1, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(float.NegativeInfinity));
+            Assert.Equal(-1, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(float.MinValue));
+            Assert.Equal(-1, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(-1.0f));
+            Assert.Equal(-1, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(-MinNormalSingle));
+            Assert.Equal(-1, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(-MaxSubnormalSingle));
+            Assert.Equal(-1, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(-float.Epsilon));
+
+            Assert.Equal(0, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(-0.0f));
+            Assert.Equal(0, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(0.0f));
+
+            Assert.Equal(1, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(float.Epsilon));
+            Assert.Equal(1, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(MaxSubnormalSingle));
+            Assert.Equal(1, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(MinNormalSingle));
+            Assert.Equal(1, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(1.0f));
+            Assert.Equal(1, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(float.MaxValue));
+            Assert.Equal(1, NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(float.PositiveInfinity));
+
+            Assert.Throws<ArithmeticException>(() => NumberHelper<BinaryFloatingPointIeee754Wrapper<float>>.Sign(float.NaN));
+        }
+
         [Fact]
         public static void SignUInt32Test()
         {
@@ -251,16 +385,16 @@ public BinaryIntegerWrapper(T value)
 
             // Required Generic Math Surface Area
 
+            public static BinaryIntegerWrapper<T> AdditiveIdentity => T.AdditiveIdentity;
+
+            public static BinaryIntegerWrapper<T> MultiplicativeIdentity => T.MultiplicativeIdentity;
+
             public static BinaryIntegerWrapper<T> One => T.One;
 
             public static int Radix => T.Radix;
 
             public static BinaryIntegerWrapper<T> Zero => T.Zero;
 
-            public static BinaryIntegerWrapper<T> AdditiveIdentity => T.AdditiveIdentity;
-
-            public static BinaryIntegerWrapper<T> MultiplicativeIdentity => T.MultiplicativeIdentity;
-
             public static BinaryIntegerWrapper<T> Abs(BinaryIntegerWrapper<T> value) => T.Abs(value);
             public static bool IsCanonical(BinaryIntegerWrapper<T> value) => T.IsCanonical(value);
             public static bool IsComplexNumber(BinaryIntegerWrapper<T> value) => T.IsComplexNumber(value);
@@ -349,35 +483,50 @@ public int CompareTo(object? obj)
             static bool INumberBase<BinaryIntegerWrapper<T>>.TryConvertFromChecked<TOther>(TOther value, out BinaryIntegerWrapper<T> result)
             {
                 bool succeeded = T.TryConvertFromChecked(value, out T actualResult);
+
+                if (!succeeded)
+                {
+                    succeeded = TOther.TryConvertToChecked(value, out actualResult);
+                }
+
                 result = actualResult;
                 return succeeded;
-
             }
             static bool INumberBase<BinaryIntegerWrapper<T>>.TryConvertFromSaturating<TOther>(TOther value, out BinaryIntegerWrapper<T> result)
             {
                 bool succeeded = T.TryConvertFromSaturating(value, out T actualResult);
+
+                if (!succeeded)
+                {
+                    succeeded = TOther.TryConvertToSaturating(value, out actualResult);
+                }
+
                 result = actualResult;
                 return succeeded;
-
             }
             static bool INumberBase<BinaryIntegerWrapper<T>>.TryConvertFromTruncating<TOther>(TOther value, out BinaryIntegerWrapper<T> result)
             {
                 bool succeeded = T.TryConvertFromTruncating(value, out T actualResult);
+
+                if (!succeeded)
+                {
+                    succeeded = TOther.TryConvertToTruncating(value, out actualResult);
+                }
+
                 result = actualResult;
                 return succeeded;
-
             }
-            static bool INumberBase<BinaryIntegerWrapper<T>>.TryConvertToChecked<TOther>(BinaryIntegerWrapper<T> value, out TOther result) => T.TryConvertToChecked(value, out result);
-            static bool INumberBase<BinaryIntegerWrapper<T>>.TryConvertToSaturating<TOther>(BinaryIntegerWrapper<T> value, out TOther result) => T.TryConvertToSaturating(value, out result);
-            static bool INumberBase<BinaryIntegerWrapper<T>>.TryConvertToTruncating<TOther>(BinaryIntegerWrapper<T> value, out TOther result) => T.TryConvertToTruncating(value, out result);
+            static bool INumberBase<BinaryIntegerWrapper<T>>.TryConvertToChecked<TOther>(BinaryIntegerWrapper<T> value, out TOther result) => T.TryConvertToChecked(value.Value, out result);
+            static bool INumberBase<BinaryIntegerWrapper<T>>.TryConvertToSaturating<TOther>(BinaryIntegerWrapper<T> value, out TOther result) => T.TryConvertToSaturating(value.Value, out result);
+            static bool INumberBase<BinaryIntegerWrapper<T>>.TryConvertToTruncating<TOther>(BinaryIntegerWrapper<T> value, out TOther result) => T.TryConvertToTruncating(value.Value, out result);
 
             public static BinaryIntegerWrapper<T> operator +(BinaryIntegerWrapper<T> value) => +value.Value;
             public static BinaryIntegerWrapper<T> operator +(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value + right.Value;
             public static BinaryIntegerWrapper<T> operator -(BinaryIntegerWrapper<T> value) => -value.Value;
             public static BinaryIntegerWrapper<T> operator -(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value - right.Value;
             public static BinaryIntegerWrapper<T> operator ~(BinaryIntegerWrapper<T> value) => ~value.Value;
-            public static BinaryIntegerWrapper<T> operator ++(BinaryIntegerWrapper<T> value) => value.Value++;
-            public static BinaryIntegerWrapper<T> operator --(BinaryIntegerWrapper<T> value) => value.Value--;
+            public static BinaryIntegerWrapper<T> operator ++(BinaryIntegerWrapper<T> value) => value.Value + T.One;
+            public static BinaryIntegerWrapper<T> operator --(BinaryIntegerWrapper<T> value) => value.Value - T.One;
             public static BinaryIntegerWrapper<T> operator *(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value * right.Value;
             public static BinaryIntegerWrapper<T> operator /(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value / right.Value;
             public static BinaryIntegerWrapper<T> operator %(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value % right.Value;
@@ -388,11 +537,250 @@ static bool INumberBase<BinaryIntegerWrapper<T>>.TryConvertFromTruncating<TOther
             public static BinaryIntegerWrapper<T> operator >>(BinaryIntegerWrapper<T> value, int shiftAmount) => value.Value >> shiftAmount;
             public static bool operator ==(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value == right.Value;
             public static bool operator !=(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value != right.Value;
-            public static bool operator <(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value > right.Value;
-            public static bool operator >(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value < right.Value;
+            public static bool operator <(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value < right.Value;
+            public static bool operator >(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value > right.Value;
             public static bool operator <=(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value <= right.Value;
             public static bool operator >=(BinaryIntegerWrapper<T> left, BinaryIntegerWrapper<T> right) => left.Value >= right.Value;
             public static BinaryIntegerWrapper<T> operator >>>(BinaryIntegerWrapper<T> value, int shiftAmount) => value.Value >>> shiftAmount;
         }
+
+        public struct BinaryFloatingPointIeee754Wrapper<T> : IBinaryFloatingPointIeee754<BinaryFloatingPointIeee754Wrapper<T>>
+            where T : IBinaryFloatingPointIeee754<T>
+        {
+            public T Value;
+
+            public BinaryFloatingPointIeee754Wrapper(T value)
+            {
+                Value = value;
+            }
+
+            public static implicit operator BinaryFloatingPointIeee754Wrapper<T>(T value) => new BinaryFloatingPointIeee754Wrapper<T>(value);
+
+            public static implicit operator T(BinaryFloatingPointIeee754Wrapper<T> value) => value.Value;
+
+            // Required Generic Math Surface Area
+
+            public static BinaryFloatingPointIeee754Wrapper<T> AdditiveIdentity => T.AdditiveIdentity;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> E => T.E;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> Epsilon => T.Epsilon;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> MultiplicativeIdentity => T.MultiplicativeIdentity;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> NaN => T.NaN;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> NegativeInfinity => T.NegativeInfinity;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> NegativeOne => T.NegativeOne;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> NegativeZero => T.NegativeZero;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> One => T.One;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> Pi => T.Pi;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> PositiveInfinity => T.PositiveInfinity;
+
+            public static int Radix => T.Radix;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> Tau => T.Tau;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> Zero => T.Zero;
+
+            public static BinaryFloatingPointIeee754Wrapper<T> Abs(BinaryFloatingPointIeee754Wrapper<T> value) => T.Abs(value);
+            public static BinaryFloatingPointIeee754Wrapper<T> Acos(BinaryFloatingPointIeee754Wrapper<T> x) => T.Acos(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Acosh(BinaryFloatingPointIeee754Wrapper<T> x) => T.Acosh(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> AcosPi(BinaryFloatingPointIeee754Wrapper<T> x) => T.AcosPi(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Asin(BinaryFloatingPointIeee754Wrapper<T> x) => T.Asin(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Asinh(BinaryFloatingPointIeee754Wrapper<T> x) => T.Asinh(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> AsinPi(BinaryFloatingPointIeee754Wrapper<T> x) => T.AsinPi(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Atan(BinaryFloatingPointIeee754Wrapper<T> x) => T.Atan(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Atan2(BinaryFloatingPointIeee754Wrapper<T> y, BinaryFloatingPointIeee754Wrapper<T> x) => T.Atan2(y, x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Atan2Pi(BinaryFloatingPointIeee754Wrapper<T> y, BinaryFloatingPointIeee754Wrapper<T> x) => T.Atan2Pi(y, x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Atanh(BinaryFloatingPointIeee754Wrapper<T> x) => T.Atanh(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> AtanPi(BinaryFloatingPointIeee754Wrapper<T> x) => T.AtanPi(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> BitDecrement(BinaryFloatingPointIeee754Wrapper<T> x) => T.BitDecrement(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> BitIncrement(BinaryFloatingPointIeee754Wrapper<T> x) => T.BitIncrement(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Cbrt(BinaryFloatingPointIeee754Wrapper<T> x) => T.Cbrt(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Cos(BinaryFloatingPointIeee754Wrapper<T> x) => T.Cos(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Cosh(BinaryFloatingPointIeee754Wrapper<T> x) => T.Cosh(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> CosPi(BinaryFloatingPointIeee754Wrapper<T> x) => T.CosPi(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Exp(BinaryFloatingPointIeee754Wrapper<T> x) => T.Exp(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Exp2(BinaryFloatingPointIeee754Wrapper<T> x) => T.Exp2(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Exp10(BinaryFloatingPointIeee754Wrapper<T> x) => T.Exp10(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> FusedMultiplyAdd(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right, BinaryFloatingPointIeee754Wrapper<T> addend) => T.FusedMultiplyAdd(left, right, addend);
+            public static BinaryFloatingPointIeee754Wrapper<T> Hypot(BinaryFloatingPointIeee754Wrapper<T> x, BinaryFloatingPointIeee754Wrapper<T> y) => T.Hypot(x, y);
+            public static BinaryFloatingPointIeee754Wrapper<T> Ieee754Remainder(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => T.Ieee754Remainder(left, right);
+            public static int ILogB(BinaryFloatingPointIeee754Wrapper<T> x) => T.ILogB(x);
+            public static bool IsCanonical(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsCanonical(value);
+            public static bool IsComplexNumber(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsComplexNumber(value);
+            public static bool IsEvenInteger(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsEvenInteger(value);
+            public static bool IsFinite(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsFinite(value);
+            public static bool IsImaginaryNumber(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsImaginaryNumber(value);
+            public static bool IsInfinity(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsInfinity(value);
+            public static bool IsInteger(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsInteger(value);
+            public static bool IsNaN(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsNaN(value);
+            public static bool IsNegative(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsNegative(value);
+            public static bool IsNegativeInfinity(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsNegativeInfinity(value);
+            public static bool IsNormal(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsNormal(value);
+            public static bool IsOddInteger(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsOddInteger(value);
+            public static bool IsPositive(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsPositive(value);
+            public static bool IsPositiveInfinity(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsPositiveInfinity(value);
+            public static bool IsPow2(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsPow2(value);
+            public static bool IsRealNumber(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsRealNumber(value);
+            public static bool IsSubnormal(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsSubnormal(value);
+            public static bool IsZero(BinaryFloatingPointIeee754Wrapper<T> value) => T.IsZero(value);
+            public static BinaryFloatingPointIeee754Wrapper<T> Log(BinaryFloatingPointIeee754Wrapper<T> x) => T.Log(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Log(BinaryFloatingPointIeee754Wrapper<T> x, BinaryFloatingPointIeee754Wrapper<T> newBase) => T.Log(x, newBase);
+            public static BinaryFloatingPointIeee754Wrapper<T> Log2(BinaryFloatingPointIeee754Wrapper<T> x) => BinaryNumberHelper<T>.Log2(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Log10(BinaryFloatingPointIeee754Wrapper<T> x) => T.Log10(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> MaxMagnitude(BinaryFloatingPointIeee754Wrapper<T> x, BinaryFloatingPointIeee754Wrapper<T> y) => T.MaxMagnitude(x, y);
+            public static BinaryFloatingPointIeee754Wrapper<T> MaxMagnitudeNumber(BinaryFloatingPointIeee754Wrapper<T> x, BinaryFloatingPointIeee754Wrapper<T> y) => T.MaxMagnitudeNumber(x, y);
+            public static BinaryFloatingPointIeee754Wrapper<T> MinMagnitude(BinaryFloatingPointIeee754Wrapper<T> x, BinaryFloatingPointIeee754Wrapper<T> y) => T.MinMagnitude(x, y);
+            public static BinaryFloatingPointIeee754Wrapper<T> MinMagnitudeNumber(BinaryFloatingPointIeee754Wrapper<T> x, BinaryFloatingPointIeee754Wrapper<T> y) => T.MinMagnitudeNumber(x, y);
+            public static BinaryFloatingPointIeee754Wrapper<T> Pow(BinaryFloatingPointIeee754Wrapper<T> x, BinaryFloatingPointIeee754Wrapper<T> y) => T.Pow(x, y);
+            public static BinaryFloatingPointIeee754Wrapper<T> Parse(ReadOnlySpan<char> s, NumberStyles style, IFormatProvider? provider) => T.Parse(s, style, provider);
+            public static BinaryFloatingPointIeee754Wrapper<T> Parse(string s, NumberStyles style, IFormatProvider? provider) => T.Parse(s, style, provider);
+            public static BinaryFloatingPointIeee754Wrapper<T> Parse(ReadOnlySpan<char> s, IFormatProvider? provider) => T.Parse(s, provider);
+            public static BinaryFloatingPointIeee754Wrapper<T> Parse(string s, IFormatProvider? provider) => T.Parse(s, provider);
+            public static BinaryFloatingPointIeee754Wrapper<T> RootN(BinaryFloatingPointIeee754Wrapper<T> x, int n) => T.RootN(x, n);
+            public static BinaryFloatingPointIeee754Wrapper<T> Round(BinaryFloatingPointIeee754Wrapper<T> x, int digits, MidpointRounding mode) => T.Round(x, digits, mode);
+            public static BinaryFloatingPointIeee754Wrapper<T> ScaleB(BinaryFloatingPointIeee754Wrapper<T> x, int n) => T.ScaleB(x, n);
+            public static BinaryFloatingPointIeee754Wrapper<T> Sin(BinaryFloatingPointIeee754Wrapper<T> x) => T.Sin(x);
+            public static (BinaryFloatingPointIeee754Wrapper<T> Sin, BinaryFloatingPointIeee754Wrapper<T> Cos) SinCos(BinaryFloatingPointIeee754Wrapper<T> x) => T.SinCos(x);
+            public static (BinaryFloatingPointIeee754Wrapper<T> SinPi, BinaryFloatingPointIeee754Wrapper<T> CosPi) SinCosPi(BinaryFloatingPointIeee754Wrapper<T> x) => T.SinCosPi(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Sinh(BinaryFloatingPointIeee754Wrapper<T> x) => T.Sinh(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> SinPi(BinaryFloatingPointIeee754Wrapper<T> x) => T.SinPi(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Sqrt(BinaryFloatingPointIeee754Wrapper<T> x) => T.Sqrt(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Tan(BinaryFloatingPointIeee754Wrapper<T> x) => T.Tan(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> Tanh(BinaryFloatingPointIeee754Wrapper<T> x) => T.Tanh(x);
+            public static BinaryFloatingPointIeee754Wrapper<T> TanPi(BinaryFloatingPointIeee754Wrapper<T> x) => T.TanPi(x);
+            public static bool TryParse(ReadOnlySpan<char> s, NumberStyles style, IFormatProvider? provider, [MaybeNullWhen(false)] out BinaryFloatingPointIeee754Wrapper<T> result)
+            {
+                var succeeded = T.TryParse(s, style, provider, out T actualResult);
+                result = actualResult;
+                return succeeded;
+            }
+            public static bool TryParse([NotNullWhen(true)] string? s, NumberStyles style, IFormatProvider? provider, [MaybeNullWhen(false)] out BinaryFloatingPointIeee754Wrapper<T> result)
+            {
+                var succeeded = T.TryParse(s, style, provider, out T actualResult);
+                result = actualResult;
+                return succeeded;
+            }
+            public static bool TryParse(ReadOnlySpan<char> s, IFormatProvider? provider, [MaybeNullWhen(false)] out BinaryFloatingPointIeee754Wrapper<T> result)
+            {
+                var succeeded = T.TryParse(s, provider, out T actualResult);
+                result = actualResult;
+                return succeeded;
+            }
+            public static bool TryParse([NotNullWhen(true)] string? s, IFormatProvider? provider, [MaybeNullWhen(false)] out BinaryFloatingPointIeee754Wrapper<T> result)
+            {
+                var succeeded = T.TryParse(s, provider, out T actualResult);
+                result = actualResult;
+                return succeeded;
+            }
+            public int CompareTo(object? obj)
+            {
+                if (obj is not BinaryFloatingPointIeee754Wrapper<T> other)
+                {
+                    return (obj is null) ? 1 : throw new ArgumentException();
+                }
+                return CompareTo(other);
+            }
+            public int CompareTo(BinaryFloatingPointIeee754Wrapper<T> other) => Value.CompareTo(other.Value);
+            public override bool Equals([NotNullWhen(true)] object? obj) => (obj is BinaryFloatingPointIeee754Wrapper<T> other) && Equals(other);
+            public bool Equals(BinaryFloatingPointIeee754Wrapper<T> other) => Value.Equals(other.Value);
+            public int GetExponentByteCount() => Value.GetExponentByteCount();
+            public int GetExponentShortestBitLength() => Value.GetExponentShortestBitLength();
+            public override int GetHashCode() => Value.GetHashCode();
+            public int GetSignificandBitLength() => Value.GetSignificandBitLength();
+            public int GetSignificandByteCount() => Value.GetSignificandByteCount();
+            public string ToString(string? format, IFormatProvider? formatProvider) => Value.ToString(format, formatProvider);
+            public bool TryFormat(Span<char> destination, out int charsWritten, ReadOnlySpan<char> format, IFormatProvider? provider) => Value.TryFormat(destination, out charsWritten, format, provider);
+            public bool TryWriteExponentBigEndian(Span<byte> destination, out int bytesWritten) => Value.TryWriteExponentBigEndian(destination, out bytesWritten);
+            public bool TryWriteExponentLittleEndian(Span<byte> destination, out int bytesWritten) => Value.TryWriteExponentLittleEndian(destination, out bytesWritten);
+            public bool TryWriteSignificandBigEndian(Span<byte> destination, out int bytesWritten) => Value.TryWriteSignificandBigEndian(destination, out bytesWritten);
+            public bool TryWriteSignificandLittleEndian(Span<byte> destination, out int bytesWritten) => Value.TryWriteSignificandLittleEndian(destination, out bytesWritten);
+
+            static bool INumberBase<BinaryFloatingPointIeee754Wrapper<T>>.TryConvertFromChecked<TOther>(TOther value, out BinaryFloatingPointIeee754Wrapper<T> result)
+            {
+                bool succeeded = T.TryConvertFromChecked(value, out T actualResult);
+
+                if (!succeeded)
+                {
+                    succeeded = TOther.TryConvertToChecked(value, out actualResult);
+                }
+
+                result = actualResult;
+                return succeeded;
+            }
+            static bool INumberBase<BinaryFloatingPointIeee754Wrapper<T>>.TryConvertFromSaturating<TOther>(TOther value, out BinaryFloatingPointIeee754Wrapper<T> result)
+            {
+                bool succeeded = T.TryConvertFromSaturating(value, out T actualResult);
+
+                if (!succeeded)
+                {
+                    succeeded = TOther.TryConvertToSaturating(value, out actualResult);
+                }
+
+                result = actualResult;
+                return succeeded;
+
+            }
+            static bool INumberBase<BinaryFloatingPointIeee754Wrapper<T>>.TryConvertFromTruncating<TOther>(TOther value, out BinaryFloatingPointIeee754Wrapper<T> result)
+            {
+                bool succeeded = T.TryConvertFromTruncating(value, out T actualResult);
+
+                if (!succeeded)
+                {
+                    succeeded = TOther.TryConvertToTruncating(value, out actualResult);
+                }
+
+                result = actualResult;
+                return succeeded;
+            }
+            static bool INumberBase<BinaryFloatingPointIeee754Wrapper<T>>.TryConvertToChecked<TOther>(BinaryFloatingPointIeee754Wrapper<T> value, out TOther result) => T.TryConvertToChecked(value.Value, out result);
+            static bool INumberBase<BinaryFloatingPointIeee754Wrapper<T>>.TryConvertToSaturating<TOther>(BinaryFloatingPointIeee754Wrapper<T> value, out TOther result) => T.TryConvertToSaturating(value.Value, out result);
+            static bool INumberBase<BinaryFloatingPointIeee754Wrapper<T>>.TryConvertToTruncating<TOther>(BinaryFloatingPointIeee754Wrapper<T> value, out TOther result) => T.TryConvertToTruncating(value.Value, out result);
+
+            public static BinaryFloatingPointIeee754Wrapper<T> operator +(BinaryFloatingPointIeee754Wrapper<T> value) => +value.Value;
+            public static BinaryFloatingPointIeee754Wrapper<T> operator +(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value + right.Value;
+            public static BinaryFloatingPointIeee754Wrapper<T> operator -(BinaryFloatingPointIeee754Wrapper<T> value) => -value.Value;
+            public static BinaryFloatingPointIeee754Wrapper<T> operator -(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value - right.Value;
+            public static BinaryFloatingPointIeee754Wrapper<T> operator ~(BinaryFloatingPointIeee754Wrapper<T> value) => ~value.Value;
+            public static BinaryFloatingPointIeee754Wrapper<T> operator ++(BinaryFloatingPointIeee754Wrapper<T> value) => value.Value + T.One;
+            public static BinaryFloatingPointIeee754Wrapper<T> operator --(BinaryFloatingPointIeee754Wrapper<T> value) => value.Value - T.One;
+            public static BinaryFloatingPointIeee754Wrapper<T> operator *(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value * right.Value;
+            public static BinaryFloatingPointIeee754Wrapper<T> operator /(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value / right.Value;
+            public static BinaryFloatingPointIeee754Wrapper<T> operator %(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value % right.Value;
+            public static BinaryFloatingPointIeee754Wrapper<T> operator &(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value & right.Value;
+            public static BinaryFloatingPointIeee754Wrapper<T> operator |(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value | right.Value;
+            public static BinaryFloatingPointIeee754Wrapper<T> operator ^(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value ^ right.Value;
+            public static bool operator ==(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value == right.Value;
+            public static bool operator !=(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value != right.Value;
+            public static bool operator <(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value < right.Value;
+            public static bool operator >(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value > right.Value;
+            public static bool operator <=(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value <= right.Value;
+            public static bool operator >=(BinaryFloatingPointIeee754Wrapper<T> left, BinaryFloatingPointIeee754Wrapper<T> right) => left.Value >= right.Value;
+        }
+
+        private static void AssertBitwiseEqual(BinaryFloatingPointIeee754Wrapper<float> expected, BinaryFloatingPointIeee754Wrapper<float> actual)
+        {
+            uint expectedBits = BitConverter.SingleToUInt32Bits(expected.Value);
+            uint actualBits = BitConverter.SingleToUInt32Bits(actual.Value);
+
+            if (expectedBits == actualBits)
+            {
+                return;
+            }
+
+            if (float.IsNaN(expected.Value) && float.IsNaN(actual.Value))
+            {
+                return;
+            }
+
+            throw Xunit.Sdk.EqualException.ForMismatchedValues(expected.Value, actual.Value);
+        }
     }
 }
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Runtime/CompilerServices/ParamCollectionAttributeTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Runtime/CompilerServices/ParamCollectionAttributeTests.cs
new file mode 100644
index 000000000000..25d2f5e87b4e
--- /dev/null
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Runtime/CompilerServices/ParamCollectionAttributeTests.cs
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Xunit;
+
+namespace System.Runtime.CompilerServices.Tests
+{
+    public static class ParamCollectionAttributeTests
+    {
+        [Fact]
+        public static void Ctor()
+        {
+            var attribute = new ParamCollectionAttribute();
+        }
+    }
+}
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Runtime/CompilerServices/RuntimeHelpersTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Runtime/CompilerServices/RuntimeHelpersTests.cs
index 7359e11283f7..afb72bbe0949 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Runtime/CompilerServices/RuntimeHelpersTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Runtime/CompilerServices/RuntimeHelpersTests.cs
@@ -68,6 +68,26 @@ public static unsafe void GetObjectValue()
             Assert.Equal(i, (int)iOV);
         }
 
+        [Fact]
+        public static void EqualsTest()
+        {
+            // Boolean RuntimeHelpers.Equals(Object, Object)
+
+            Assert.True(RuntimeHelpers.Equals(Guid.Empty, Guid.Empty));
+            Assert.False(RuntimeHelpers.Equals(Guid.Empty, Guid.NewGuid()));
+
+            // Reference equal
+            object o = new object();
+            Assert.True(RuntimeHelpers.Equals(o, o));
+
+            // Type mismatch
+            Assert.False(RuntimeHelpers.Equals(Guid.Empty, string.Empty));
+
+            // Non value types
+            Assert.False(RuntimeHelpers.Equals(new object(), new object()));
+            Assert.False(RuntimeHelpers.Equals(new int[] { 1, 2, 3 }, new int[] { 1, 2, 3 }));
+        }
+
         [Fact]
         public static void InitializeArray()
         {
@@ -374,7 +394,6 @@ public static void ArrayGetSubArrayCoVarianceTest()
         }
 
         [Fact]
-        [SkipOnMono("Not presently implemented on Mono")]
         public static void AllocateTypeAssociatedMemoryInvalidArguments()
         {
             Assert.Throws<ArgumentException>(() => { RuntimeHelpers.AllocateTypeAssociatedMemory(null, 10); });
@@ -382,7 +401,6 @@ public static void AllocateTypeAssociatedMemoryInvalidArguments()
         }
 
         [Fact]
-        [SkipOnMono("Not presently implemented on Mono")]
         public static unsafe void AllocateTypeAssociatedMemoryValidArguments()
         {
             IntPtr memory = RuntimeHelpers.AllocateTypeAssociatedMemory(typeof(RuntimeHelpersTests), 32);
@@ -419,6 +437,173 @@ public static void FixedAddressValueTypeTest()
 
             Assert.Equal(fixedPtr1, fixedPtr2);
         }
+
+        [Fact]
+        public static void BoxPrimitive()
+        {
+            int value = 4;
+            object result = RuntimeHelpers.Box(ref Unsafe.As<int, byte>(ref value), typeof(int).TypeHandle);
+            Assert.Equal(value, Assert.IsType<int>(result));
+        }
+
+        [Fact]
+        public static void BoxPointer()
+        {
+            Assert.Throws<ArgumentException>(() =>
+            {
+                nint value = 3;
+                object result = RuntimeHelpers.Box(ref Unsafe.As<nint, byte>(ref value), typeof(void*).TypeHandle);
+            });
+        }
+
+        [StructLayout(LayoutKind.Sequential)]
+        private ref struct ByRefLikeType
+        {
+            public int i;
+        }
+
+        [Fact]
+        public static void BoxByRefLike()
+        {
+            Assert.Throws<NotSupportedException>(() =>
+            {
+                int value = 3;
+                object result = RuntimeHelpers.Box(ref Unsafe.As<int, byte>(ref value), typeof(ByRefLikeType).TypeHandle);
+            });
+        }
+
+        [Fact]
+        public static void BoxStruct()
+        {
+            Span<int> buffer = [0, 42, int.MaxValue];
+            StructWithoutReferences expected = new()
+            {
+                a = buffer[0],
+                b = buffer[1],
+                c = buffer[2]
+            };
+            object result = RuntimeHelpers.Box(ref MemoryMarshal.AsBytes(buffer)[0], typeof(StructWithoutReferences).TypeHandle);
+
+            Assert.Equal(expected, Assert.IsType<StructWithoutReferences>(result));
+        }
+
+        [StructLayout(LayoutKind.Sequential)]
+        private struct GenericStruct<T>
+        {
+            public T data;
+        }
+
+        [Fact]
+        public static void BoxUnmanagedGenericStruct()
+        {
+            int value = 3;
+            object result = RuntimeHelpers.Box(ref Unsafe.As<int, byte>(ref value), typeof(GenericStruct<int>).TypeHandle);
+            
+            Assert.Equal(value, Assert.IsType<GenericStruct<int>>(result).data);
+        }
+
+        [Fact]
+        public static void BoxManagedGenericStruct()
+        {
+            object value = new();
+            object result = RuntimeHelpers.Box(ref Unsafe.As<object, byte>(ref value), typeof(GenericStruct<object>).TypeHandle);
+            
+            Assert.Same(value, Assert.IsType<GenericStruct<object>>(result).data);
+        }
+
+        [Fact]
+        public static void BoxNullable()
+        {
+            float? value = 3.14f;
+            object result = RuntimeHelpers.Box(ref Unsafe.As<float?, byte>(ref value), typeof(float?).TypeHandle);
+            Assert.Equal(value, Assert.IsType<float>(result));
+        }
+
+        [Fact]
+        public static void BoxNullNullable()
+        {
+            float? value = null;
+            object? result = RuntimeHelpers.Box(ref Unsafe.As<float?, byte>(ref value), typeof(float?).TypeHandle);
+            Assert.Null(result);
+        }
+
+        [Fact]
+        public static void NullBox()
+        {
+            Assert.Throws<NullReferenceException>(() => RuntimeHelpers.Box(ref Unsafe.NullRef<byte>(), typeof(byte).TypeHandle));
+        }
+
+        [Fact]
+        public static void BoxNullTypeHandle()
+        {
+            Assert.Throws<ArgumentNullException>(() =>
+            {
+                byte value = 3;
+                RuntimeHelpers.Box(ref value, default(RuntimeTypeHandle));
+            });
+        }
+
+        [Fact]
+        public static void BoxReferenceType()
+        {
+            string str = "ABC";
+            Assert.Same(str, RuntimeHelpers.Box(ref Unsafe.As<string, byte>(ref str), typeof(string).TypeHandle));
+        }
+
+        [Fact]
+        public static void BoxArrayType()
+        {
+            string[] arr = ["a", "b", "c"];
+            Assert.Same(arr, RuntimeHelpers.Box(ref Unsafe.As<string[], byte>(ref arr), typeof(string[]).TypeHandle));
+        }
+
+        // We can't even get a RuntimeTypeHandle for a generic parameter type on NativeAOT,
+        // so we don't even get to the method we're testing.
+        // So, let's not even waste time running this test on NativeAOT
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotNativeAot))]
+        public static void BoxGenericParameterType()
+        {
+            Type t = typeof(List<>).GetGenericArguments()[0];
+            Assert.Throws<ArgumentException>(() =>
+            {
+                byte value = 3;
+                RuntimeHelpers.Box(ref value, t.TypeHandle);
+            });
+        }
+
+        // We can't even get a RuntimeTypeHandle for a partially instantiated generic type on NativeAOT,
+        // so we don't even get to the method we're testing.
+        // So, let's not even waste time running this test on NativeAOT
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotNativeAot))]
+        public static void BoxPartiallyOpenGeneric()
+        {
+            Type t = typeof(Dictionary<,>).MakeGenericType(typeof(object), typeof(Dictionary<,>).GetGenericArguments()[1]);
+            Assert.Throws<ArgumentException>(() =>
+            {
+                byte value = 3;
+                RuntimeHelpers.Box(ref value, t.TypeHandle);
+            });
+        }
+
+        [Fact]
+        public static void BoxGenericTypeDefinition()
+        {
+            Assert.Throws<ArgumentException>(() =>
+            {
+                byte value = 3;
+                RuntimeHelpers.Box(ref value, typeof(List<>).TypeHandle);
+            });
+        }
+
+        [Fact]
+        public static void BoxVoid()
+        {
+            Assert.Throws<ArgumentException>(() =>
+            {
+                byte value = 3;
+                RuntimeHelpers.Box(ref value, typeof(void).TypeHandle);
+            });
+        }
     }
 
     public struct Age
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs
index dbd6bf7cbee5..9f59853b136d 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs
@@ -999,6 +999,7 @@ public static unsafe void GetPinnableReference_ReturnsSameAsGCHandleAndLegacyFix
             GCHandle gcHandle = GCHandle.Alloc(input, GCHandleType.Pinned);
             try
             {
+                // Unsafe.AsPointer is safe since it's pinned by the gc handle
                 Assert.Equal((IntPtr)Unsafe.AsPointer(ref Unsafe.AsRef(in rChar)), gcHandle.AddrOfPinnedObject());
             }
             finally
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/TimeSpanTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/TimeSpanTests.cs
index de434b059fad..9085d957fa7e 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/TimeSpanTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/TimeSpanTests.cs
@@ -347,6 +347,412 @@ public static void EqualsTest(TimeSpan timeSpan1, object obj, bool expected)
             Assert.Equal(expected, timeSpan1.Equals(obj));
         }
 
+#region FromX_int_overloads
+        [Fact]
+        public static void FromDays_Int_Positive()
+        {
+            var expectedaaa = new TimeSpan(1, 2, 3, 4, 5, 6);
+            var actual = TimeSpan.FromDays(1, 2, 3, 4, 5, 6);
+            Assert.Equal(expectedaaa, actual);
+        }
+        [Fact]
+        public static void FromDays_Int_Negative()
+        {
+            var expectedaaa = new TimeSpan(-1, -2, -3, -4, -5, -6);
+            var actual = TimeSpan.FromDays(-1, -2, -3, -4, -5, -6);
+            Assert.Equal(expectedaaa, actual);
+        }
+        [Fact]
+        public static void FromDays_Int_Zero()
+        {
+            var expectedaaa = new TimeSpan(0, 0, 0, 0, 0, 0);
+            var actual = TimeSpan.FromDays(0, 0, 0, 0, 0, 0);
+            Assert.Equal(expectedaaa, actual);
+        }
+
+        [Fact]
+        public static void FromSeconds_Int_ShouldGiveResultWithPrecision()
+        {
+            // Given example of problem with double in: https://github.com/dotnet/runtime/issues/93890#issue-1957706751
+            Assert.Equal(new TimeSpan(0, 0, 0, 101, 832), TimeSpan.FromSeconds(101, 832));
+        }
+
+        [Fact]
+        public static void FromDays_Int_ShouldOverflow_WhenIntermediateCalculationCouldOverflowBackIntoValidRange()
+        {
+            // Given example of problematic day count in comment in abandoned pr https://github.com/dotnet/runtime/pull/95779/files#r1439772903
+            Assert.Throws<ArgumentOutOfRangeException>(() => TimeSpan.FromDays(1067519900));
+        }
+
+        [Fact]
+        public static void FromDays_Int_ShouldConstructMaxValueAproximation()
+        {
+            var expected = TimeSpan.MaxValue;
+            var actual = TimeSpan.FromDays(expected.Days, expected.Hours, expected.Minutes, expected.Seconds, expected.Milliseconds, expected.Microseconds);
+            // Should be within TicksPerMicrosecond (10) ticks of expected
+            var diffTicks = (expected - actual).Ticks;
+            Assert.True(Math.Abs(diffTicks) < 10, $"Diff ticks was {diffTicks}");
+        }
+        [Fact]
+        public static void FromDays_Int_ShouldConstructMinValueAproximation()
+        {
+            var expected = TimeSpan.MinValue;
+            var actual = TimeSpan.FromDays(expected.Days, expected.Hours, expected.Minutes, expected.Seconds, expected.Milliseconds, expected.Microseconds);
+            // Should be within TicksPerMicrosecond (10) ticks of expected
+            var diffTicks = (actual - expected).Ticks;
+            Assert.True(Math.Abs(diffTicks) < 10, $"Diff ticks was {diffTicks}");
+        }
+
+        // Consts copied from internal const in TimeSpan
+        // Max and Min are symmetrical
+        private const long maxMicroseconds = 922_337_203_685_477_580;
+        private const long maxMilliseconds = 922_337_203_685_477;
+        private const long maxSeconds = 922_337_203_685;
+        private const long maxMinutes = 15_372_286_728;
+        private const int maxHours = 256_204_778;
+        private const int maxDays = 10_675_199;
+        public static IEnumerable<object[]> FromDays_Int_ShouldOverflowOrUnderflow_Data()
+        {
+            long[] individualMaxValues = [ maxDays, maxHours, maxMinutes, maxSeconds, maxMilliseconds, maxMicroseconds ];
+            // Each possibility for individual property to overflow or underflow
+            for (var i = 0; i < individualMaxValues.Length; i++)
+            {
+                var iVal = individualMaxValues[i] + 1;
+               object[] resultPos = [ 0, 0, 0, 0, 0, 0 ];
+               resultPos[i] = iVal;
+               yield return resultPos;
+               object[] resultNeg = [ 0, 0, 0, 0, 0, 0 ];
+               resultNeg[i] = -iVal;
+               yield return resultNeg;
+            }
+            // Each possibility for 2 properties to overflow or underflow
+            // while neither of them individually overflow or underflow
+            for (var i = 0; i < individualMaxValues.Length; i++)
+            {
+                for (var j = i + 1; j < individualMaxValues.Length; j++)
+                {
+                    var iVal = individualMaxValues[i];
+                    var jVal = individualMaxValues[j];
+                    object[] resultPos = [ 0, 0, 0, 0, 0, 0 ];
+                    resultPos[i] = iVal;
+                    resultPos[j] = jVal;
+                    yield return resultPos;
+                    object[] resultNeg = [ 0, 0, 0, 0, 0, 0 ];
+                    resultNeg[i] = -iVal;
+                    resultNeg[j] = -jVal;
+                    yield return resultNeg;
+                }
+            }
+        }
+        [Theory]
+        [MemberData(nameof(FromDays_Int_ShouldOverflowOrUnderflow_Data))]
+        public static void FromDays_Int_ShouldOverflowOrUnderflow(int days, int hours, long minutes, long seconds, long milliseconds, long microseconds)
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() => TimeSpan.FromDays(days, hours, minutes, seconds, milliseconds, microseconds));
+        }
+
+        public static IEnumerable<object[]> FromDays_Int_ShouldNotOverflow_WhenOverflowingParamIsCounteredByOppositeSignParam_Data()
+        {
+            long[] individualMaxValues = [ maxDays, maxHours, maxMinutes, maxSeconds, maxMilliseconds, maxMicroseconds ];
+            for (var i = 0; i < individualMaxValues.Length; i++)
+            {
+                for (var j = 0; j < individualMaxValues.Length; j++)
+                {
+                    if (i == j)
+                    {
+                        continue;
+                    }
+                    var iVal = individualMaxValues[i] + 1;
+                    var jVal = individualMaxValues[j] + 1;
+                    object[] result = [ 0, 0, 0, 0, 0, 0 ];
+                    result[i] = iVal;
+                    result[j] = -jVal;
+                    yield return result;
+                }
+            }
+        }
+        [Theory]
+        [MemberData(nameof(FromDays_Int_ShouldNotOverflow_WhenOverflowingParamIsCounteredByOppositeSignParam_Data))]
+        public static void FromDays_Int_ShouldNotOverflow_WhenOverflowingParamIsCounteredByOppositeSignParam(int days, int hours, long minutes, long seconds, long milliseconds, long microseconds)
+        {
+            var actual = TimeSpan.FromDays(days, hours, minutes, seconds, milliseconds, microseconds);
+            // 2 individually overflowing or underflowing params with opposite sign should end up close to TimeSpan.FromDays(0)
+            // This is an implementation detail of the chosen test data, but a nice sanity check of expected result
+            Assert.True(actual > TimeSpan.FromDays(-1));
+            Assert.True(actual < TimeSpan.FromDays(1));
+        }
+        [Theory]
+        [InlineData(maxDays, maxHours, maxMinutes, maxSeconds, maxMilliseconds, maxMicroseconds)]
+        [InlineData(-maxDays, -maxHours, -maxMinutes, -maxSeconds, -maxMilliseconds, -maxMicroseconds)]
+        [InlineData(int.MaxValue, int.MaxValue, long.MaxValue, long.MaxValue, long.MaxValue, long.MaxValue)]
+        [InlineData(int.MinValue, int.MinValue, long.MinValue, long.MinValue, long.MinValue, long.MinValue)]
+        [InlineData(int.MaxValue, 0, 0, 0, 0, 0)]
+        [InlineData(int.MinValue, 0, 0, 0, 0, 0)]
+        [InlineData(0, int.MaxValue, 0, 0, 0, 0)]
+        [InlineData(0, int.MinValue, 0, 0, 0, 0)]
+        [InlineData(0, 0, long.MaxValue, 0, 0, 0)]
+        [InlineData(0, 0, long.MinValue, 0, 0, 0)]
+        [InlineData(0, 0, 0, long.MaxValue, 0, 0)]
+        [InlineData(0, 0, 0, long.MinValue, 0, 0)]
+        [InlineData(0, 0, 0, 0, long.MaxValue, 0)]
+        [InlineData(0, 0, 0, 0, long.MinValue, 0)]
+        [InlineData(0, 0, 0, 0, 0, long.MaxValue)]
+        [InlineData(0, 0, 0, 0, 0, long.MinValue)]
+        public static void FromDays_Int_ShouldOverflow(int days, int hours, long minutes, long seconds, long milliseconds, long microseconds)
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() => TimeSpan.FromDays(days, hours, minutes, seconds, milliseconds, microseconds));
+        }
+
+        [Theory]
+        [InlineData(0)]
+        [InlineData(1)]
+        [InlineData(-1)]
+        [InlineData(maxDays)]
+        [InlineData(-maxDays)]
+        public static void FromDays_Int_Single_ShouldCreate(int days)
+        {
+            Assert.Equal(new TimeSpan(days, 0, 0, 0), TimeSpan.FromDays(days));
+        }
+        [Theory]
+        [InlineData(maxDays + 1)]
+        [InlineData(-(maxDays + 1))]
+        [InlineData(int.MaxValue)]
+        [InlineData(int.MinValue)]
+        public static void FromDays_Int_Single_ShouldOverflow(int days)
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() => TimeSpan.FromDays(days));
+        }
+
+        [Theory]
+        [InlineData(0)]
+        [InlineData(1)]
+        [InlineData(-1)]
+        [InlineData(maxHours)]
+        [InlineData(-maxHours)]
+        public static void FromHours_Int_Single_ShouldCreate(int hours)
+        {
+            Assert.Equal(new TimeSpan(0, hours, 0, 0), TimeSpan.FromHours(hours));
+        }
+        [Theory]
+        [InlineData(maxHours + 1)]
+        [InlineData(-(maxHours + 1))]
+        [InlineData(int.MaxValue)]
+        [InlineData(int.MinValue)]
+        public static void FromHours_Int_Single_ShouldOverflow(int hours)
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() => TimeSpan.FromHours(hours));
+        }
+
+        [Theory]
+        [InlineData(0, 0, 0, 0, 0)]
+        [InlineData(1, 1, 1, 1, 1)]
+        [InlineData(-1, -1, -1, -1, -1)]
+        [InlineData(maxHours, 0, 0, 0, 0)]
+        [InlineData(-maxHours, 0, 0, 0, 0)]
+        [InlineData(0, maxMinutes, 0, 0, 0)]
+        [InlineData(0, -maxMinutes, 0, 0, 0)]
+        [InlineData(0, 0, maxSeconds, 0, 0)]
+        [InlineData(0, 0, -maxSeconds, 0, 0)]
+        [InlineData(0, 0, 0, maxMilliseconds, 0)]
+        [InlineData(0, 0, 0, -maxMilliseconds, 0)]
+        [InlineData(0, 0, 0, 0, maxMicroseconds)]
+        [InlineData(0, 0, 0, 0, -maxMicroseconds)]
+        public static void FromHours_Int_ShouldCreate(int hours, long minutes, long seconds, long milliseconds, long microseconds)
+        {
+            var ticksFromHours = hours * TimeSpan.TicksPerHour;
+            var ticksFromMinutes = minutes * TimeSpan.TicksPerMinute;
+            var ticksFromSeconds = seconds * TimeSpan.TicksPerSecond;
+            var ticksFromMilliseconds = milliseconds * TimeSpan.TicksPerMillisecond;
+            var ticksFromMicroseconds = microseconds * TimeSpan.TicksPerMicrosecond;
+            var expected = TimeSpan.FromTicks(ticksFromHours + ticksFromMinutes + ticksFromSeconds + ticksFromMilliseconds + ticksFromMicroseconds);
+            Assert.Equal(expected, TimeSpan.FromHours(hours, minutes, seconds, milliseconds, microseconds));
+        }
+        [Theory]
+        [InlineData(maxHours + 1, 0, 0, 0, 0)]
+        [InlineData(-(maxHours + 1), 0, 0, 0, 0)]
+        [InlineData(0, maxMinutes + 1, 0, 0, 0)]
+        [InlineData(0, -(maxMinutes + 1), 0, 0, 0)]
+        [InlineData(0, 0, maxSeconds + 1, 0, 0)]
+        [InlineData(0, 0, -(maxSeconds + 1), 0, 0)]
+        [InlineData(0, 0, 0, maxMilliseconds + 1, 0)]
+        [InlineData(0, 0, 0, -(maxMilliseconds + 1), 0)]
+        [InlineData(0, 0, 0, 0, maxMicroseconds + 1)]
+        [InlineData(0, 0, 0, 0, -(maxMicroseconds + 1))]
+        public static void FromHours_Int_ShouldOverflow(int hours, long minutes, long seconds, long milliseconds, long microseconds)
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() => TimeSpan.FromHours(hours, minutes, seconds, milliseconds, microseconds));
+        }
+
+        [Theory]
+        [InlineData(0)]
+        [InlineData(1)]
+        [InlineData(-1)]
+        [InlineData(maxMinutes)]
+        [InlineData(-maxMinutes)]
+        public static void FromMinutes_Int_Single_ShouldCreate(long minutes)
+        {
+            Assert.Equal(TimeSpan.FromDays(0, minutes: minutes), TimeSpan.FromMinutes(minutes));
+        }
+        [Theory]
+        [InlineData(maxMinutes + 1)]
+        [InlineData(-(maxMinutes + 1))]
+        [InlineData(long.MaxValue)]
+        [InlineData(long.MinValue)]
+        public static void FromMinutes_Int_Single_ShouldOverflow(long minutes)
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() => TimeSpan.FromMinutes(minutes));
+        }
+
+        [Theory]
+        [InlineData(0, 0, 0, 0)]
+        [InlineData(1, 1, 1, 1)]
+        [InlineData(-1, -1, -1, -1)]
+        [InlineData(maxMinutes, 0, 0, 0)]
+        [InlineData(-maxMinutes, 0, 0, 0)]
+        [InlineData(0, maxSeconds, 0, 0)]
+        [InlineData(0, -maxSeconds, 0, 0)]
+        [InlineData(0, 0, maxMilliseconds, 0)]
+        [InlineData(0, 0, -maxMilliseconds, 0)]
+        [InlineData(0, 0, 0, maxMicroseconds)]
+        [InlineData(0, 0, 0, -maxMicroseconds)]
+        public static void FromMinutes_Int_ShouldCreate(long minutes, long seconds, long milliseconds, long microseconds)
+        {
+            var ticksFromMinutes = minutes * TimeSpan.TicksPerMinute;
+            var ticksFromSeconds = seconds * TimeSpan.TicksPerSecond;
+            var ticksFromMilliseconds = milliseconds * TimeSpan.TicksPerMillisecond;
+            var ticksFromMicroseconds = microseconds * TimeSpan.TicksPerMicrosecond;
+            var expected = TimeSpan.FromTicks(ticksFromMinutes + ticksFromSeconds + ticksFromMilliseconds + ticksFromMicroseconds);
+            Assert.Equal(expected, TimeSpan.FromMinutes(minutes, seconds, milliseconds, microseconds));
+        }
+        [Theory]
+        [InlineData(maxMinutes + 1, 0, 0, 0)]
+        [InlineData(-(maxMinutes + 1), 0, 0, 0)]
+        [InlineData(0, maxSeconds + 1, 0, 0)]
+        [InlineData(0, -(maxSeconds + 1), 0, 0)]
+        [InlineData(0, 0, maxMilliseconds + 1, 0)]
+        [InlineData(0, 0, -(maxMilliseconds + 1), 0)]
+        [InlineData(0, 0, 0, maxMicroseconds + 1)]        
+        [InlineData(0, 0, 0, -(maxMicroseconds + 1))]
+        public static void FromMinutes_Int_ShouldOverflow(long minutes, long seconds, long milliseconds, long microseconds)
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() => TimeSpan.FromMinutes(minutes, seconds, milliseconds, microseconds));
+        }
+
+        [Theory]
+        [InlineData(0)]
+        [InlineData(1)]
+        [InlineData(-1)]
+        [InlineData(maxSeconds)]
+        [InlineData(-maxSeconds)]
+        public static void FromSeconds_Int_Single_ShouldCreate(long seconds)
+        {
+            Assert.Equal(TimeSpan.FromDays(0, seconds: seconds), TimeSpan.FromSeconds(seconds));
+        }
+        [Theory]
+        [InlineData(maxSeconds + 1)]
+        [InlineData(-(maxSeconds + 1))]
+        [InlineData(long.MaxValue)]
+        [InlineData(long.MinValue)]
+        public static void FromSeconds_Int_Single_ShouldOverflow(long seconds)
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() => TimeSpan.FromSeconds(seconds));
+        }
+
+        [Theory]
+        [InlineData(0, 0, 0)]
+        [InlineData(1, 1, 1)]
+        [InlineData(-1, -1, -1)]
+        [InlineData(maxSeconds, 0, 0)]
+        [InlineData(-maxSeconds, 0, 0)]
+        [InlineData(0, maxMilliseconds, 0)]
+        [InlineData(0, -maxMilliseconds, 0)]
+        [InlineData(0, 0, maxMicroseconds)]
+        [InlineData(0, 0, -maxMicroseconds)]
+        public static void FromSeconds_Int_ShouldCreate(long seconds, long milliseconds, long microseconds)
+        {
+            var ticksFromSeconds = seconds * TimeSpan.TicksPerSecond;
+            var ticksFromMilliseconds = milliseconds * TimeSpan.TicksPerMillisecond;
+            var ticksFromMicroseconds = microseconds * TimeSpan.TicksPerMicrosecond;
+            var expected = TimeSpan.FromTicks(ticksFromSeconds + ticksFromMilliseconds + ticksFromMicroseconds);
+            Assert.Equal(expected, TimeSpan.FromSeconds(seconds, milliseconds, microseconds));
+        }
+        [Theory]
+        [InlineData(maxSeconds + 1, 0, 0)]
+        [InlineData(-(maxSeconds + 1), 0, 0)]
+        [InlineData(0, maxMilliseconds + 1, 0)]
+        [InlineData(0, -(maxMilliseconds + 1), 0)]
+        [InlineData(0, 0, maxMicroseconds + 1)]
+        [InlineData(0, 0, -(maxMicroseconds + 1))]
+        [InlineData(long.MaxValue, 0, 0)]
+        [InlineData(long.MinValue, 0, 0)]
+        [InlineData(0, long.MaxValue, 0)]
+        [InlineData(0, long.MinValue, 0)]
+        [InlineData(0, 0, long.MaxValue)]
+        [InlineData(0, 0, long.MinValue)]
+        [InlineData(maxSeconds, maxMilliseconds, 0)]
+        [InlineData(0, maxMilliseconds, maxMicroseconds)]
+        [InlineData(maxSeconds, 0, maxMicroseconds)]
+        public static void FromSeconds_Int_ShouldOverflow(long seconds, long milliseconds, long microseconds)
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() => TimeSpan.FromSeconds(seconds, milliseconds, microseconds));
+        }
+
+        [Theory]
+        [InlineData(0, 0)]
+        [InlineData(1, 0)]
+        [InlineData(0, 1)]
+        [InlineData(-1, 0)]
+        [InlineData(0, -1)]
+        [InlineData(maxMilliseconds, 0)]
+        [InlineData(-maxMilliseconds, 0)]
+        [InlineData(0, maxMicroseconds)]
+        [InlineData(0, -maxMicroseconds)]
+        public static void FromMilliseconds_Int_ShouldCreate(long milliseconds, long microseconds)
+        {
+            long ticksFromMilliseconds = milliseconds * TimeSpan.TicksPerMillisecond;
+            long ticksFromMicroseconds = microseconds * TimeSpan.TicksPerMicrosecond;
+            var expected = TimeSpan.FromTicks(ticksFromMilliseconds + ticksFromMicroseconds);
+            Assert.Equal(expected, TimeSpan.FromMilliseconds(milliseconds, microseconds));
+        }
+        [Theory]
+        [InlineData(maxMilliseconds + 1, 0)]
+        [InlineData(-(maxMilliseconds + 1), 0)]
+        [InlineData(long.MaxValue, 0)]
+        [InlineData(long.MinValue, 0)]
+        [InlineData(0, maxMicroseconds + 1)]
+        [InlineData(0, -(maxMicroseconds + 1))]
+        [InlineData(0, long.MaxValue)]
+        [InlineData(0, long.MinValue)]
+        [InlineData(maxMilliseconds, 1000)]
+        [InlineData(-maxMilliseconds, -1000)]
+        [InlineData(1, maxMicroseconds)]
+        [InlineData(-1, -maxMicroseconds)]
+        public static void FromMilliseconds_Int_ShouldOverflow(long milliseconds, long microseconds)
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() => TimeSpan.FromMilliseconds(milliseconds, microseconds));
+        }
+
+        [Theory]
+        [InlineData(0)]
+        [InlineData(1)]
+        [InlineData(-1)]
+        [InlineData(maxMicroseconds)]
+        [InlineData(-maxMicroseconds)]
+        public static void FromMicroseconds_Int_Single_ShouldCreate(long microseconds)
+        {
+            Assert.Equal(TimeSpan.FromDays(0, microseconds: microseconds), TimeSpan.FromMicroseconds(microseconds));
+        }
+        [Theory]
+        [InlineData(maxMicroseconds + 1)]
+        [InlineData(-(maxMicroseconds + 1))]
+        [InlineData(long.MaxValue)]
+        [InlineData(long.MinValue)]
+        public static void FromMicroseconds_Int_Single_ShouldOverflow(long microseconds)
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() => TimeSpan.FromMicroseconds(microseconds));
+        }
+#endregion
+
         public static IEnumerable<object[]> FromDays_TestData()
         {
             yield return new object[] { 100.5, new TimeSpan(100, 12, 0, 0) };
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Type/TypeTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Type/TypeTests.cs
index d604b588621e..ab1d6c66c127 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Type/TypeTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Type/TypeTests.cs
@@ -513,6 +513,26 @@ public void GetTypeByName_ValidType_ReturnsExpected(string typeName, Type expect
             Assert.Equal(expectedType, Type.GetType(typeName.ToLower(), throwOnError: false, ignoreCase: true));
         }
 
+        public static IEnumerable<object[]> GetTypeByName_InvalidElementType()
+        {
+            Type expectedException = PlatformDetection.IsMonoRuntime
+                ? typeof(ArgumentException) // https://github.com/dotnet/runtime/issues/45033
+                : typeof(TypeLoadException);
+
+            yield return new object[] { "System.Int32&&", expectedException, true };
+            yield return new object[] { "System.Int32&*", expectedException, true };
+            yield return new object[] { "System.Int32&[]", expectedException, true };
+            yield return new object[] { "System.Int32&[*]", expectedException, true };
+            yield return new object[] { "System.Int32&[,]", expectedException, true };
+
+            // https://github.com/dotnet/runtime/issues/45033
+            if (!PlatformDetection.IsMonoRuntime)
+            {
+                yield return new object[] { "System.Void[]", expectedException, true };
+                yield return new object[] { "System.TypedReference[]", expectedException, true };
+            }
+        }
+
         [Theory]
         [InlineData("system.nullable`1[system.int32]", typeof(TypeLoadException), false)]
         [InlineData("System.NonExistingType", typeof(TypeLoadException), false)]
@@ -522,6 +542,7 @@ public void GetTypeByName_ValidType_ReturnsExpected(string typeName, Type expect
         [InlineData("Outside`1[System.Boolean, System.Int32]", typeof(ArgumentException), true)]
         [InlineData(".System.Int32", typeof(TypeLoadException), false)]
         [InlineData("..Outside`1", typeof(TypeLoadException), false)]
+        [MemberData(nameof(GetTypeByName_InvalidElementType))]
         public void GetTypeByName_Invalid(string typeName, Type expectedException, bool alwaysThrowsException)
         {
             if (!alwaysThrowsException)
@@ -1167,9 +1188,6 @@ public static IEnumerable<object[]> GetInterfaceMap_TestData()
                     }
                     else
                     {
-                        // [ActiveIssue("https://github.com/dotnet/runtime/issues/90863")]
-                        if (classType.Type == typeof(SIMs.C2Implicit<string>) && interfaceType.Type == typeof(SIMs.I1<string>)) continue;
-
                         // It's implemented implicitly by the level 2 interface
                         MTarget = interfaceType.Level2InterfaceType.GetMethod(interfaceType.MethodNamePrefix + "M", bindingFlags);
                         GTarget = interfaceType.Level2InterfaceType.GetMethod(interfaceType.MethodNamePrefix + "G", bindingFlags);
@@ -1294,7 +1312,7 @@ public void Method(string arg) { }
 
         static class DIMs
         {
-            
+
             internal interface I1
             {
                 void M() { throw new Exception("e"); }
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Uri.MethodsTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Uri.MethodsTests.cs
index 5a393231204e..cc71bf8d1d4a 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Uri.MethodsTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Uri.MethodsTests.cs
@@ -412,60 +412,6 @@ public void EqualsTest(Uri uri1, object obj, bool expected)
             }
         }
 
-        [Theory]
-        [InlineData("", "")]
-        [InlineData("Hello", "Hello")]
-        [InlineData("He\\l/lo", "He%5Cl%2Flo")]
-        [InlineData("\uD800\uDC00", "%F0%90%80%80")] // With surrogate pair
-        public void EscapeDataString(string stringToEscape, string expected)
-        {
-            Assert.Equal(expected, Uri.EscapeDataString(stringToEscape));
-        }
-
-        [Fact]
-        public void EscapeDataString_InvalidSurrogatePairs()
-        {
-            EscapeDataString("\uD800", "%EF%BF%BD");
-            EscapeDataString("abc\uD800", "abc%EF%BF%BD");
-            EscapeDataString("abc\uD800\uD800abc", "abc%EF%BF%BD%EF%BF%BDabc");
-            EscapeDataString("\xD800\xD800\xDFFF", "%EF%BF%BD%F0%90%8F%BF");
-        }
-
-        [Fact]
-        public void EscapeDataString_Long_Success()
-        {
-            string s;
-            const int LongCount = 65520 + 1;
-
-            s = new string('a', LongCount);
-            Assert.Equal(s, Uri.EscapeDataString(s));
-
-            s = new string('/', LongCount);
-            Assert.Equal(string.Concat(Enumerable.Repeat("%2F", LongCount)), Uri.EscapeDataString(s));
-        }
-
-        [Fact]
-        public void EscapeDataString_NullArgument()
-        {
-            AssertExtensions.Throws<ArgumentNullException>("stringToEscape", () => Uri.EscapeDataString(null));
-        }
-
-        [Theory]
-        [InlineData("", "")]
-        [InlineData("Hello", "Hello")]
-        [InlineData("He%5Cl/lo", "He\\l/lo")]
-        [InlineData("%F0%90%80%80", "\uD800\uDC00")] // Surrogate pair
-        public void UnescapeDataString(string stringToUnEscape, string expected)
-        {
-            Assert.Equal(expected, Uri.UnescapeDataString(stringToUnEscape));
-        }
-
-        [Fact]
-        public void UnescapedDataString_Null_ThrowsArgumentNullException()
-        {
-            AssertExtensions.Throws<ArgumentNullException>("stringToUnescape", () => Uri.UnescapeDataString(null)); // StringToUnescape is null
-        }
-
         [Theory]
         [InlineData("", "")]
         [InlineData("Hello", "Hello")]
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/ValueTypeTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/ValueTypeTests.cs
index 422f71e11c04..92c7000ed414 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/ValueTypeTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/ValueTypeTests.cs
@@ -299,6 +299,36 @@ public static void StructContainsPointerCompareTest()
             Assert.True(obj1.Equals(obj2));
             Assert.Equal(obj1.GetHashCode(), obj2.GetHashCode());
         }
+        
+        [Fact]
+        public static void StructContainsPointerNestedCompareTest()
+        {
+            StructContainsPointerNested obj1 = new StructContainsPointerNested();
+            obj1.o = null;
+            obj1.value.value = 1;
+
+            StructContainsPointerNested obj2 = new StructContainsPointerNested();
+            obj2.o = null;
+            obj2.value.value = 1;
+
+            Assert.True(obj1.Equals(obj2));
+            Assert.Equal(obj1.GetHashCode(), obj2.GetHashCode());
+        }
+
+        [Fact]
+        public static void StructWithNestedOverriddenNotBitwiseComparableTest()
+        {
+            StructWithNestedOverriddenNotBitwiseComparable obj1 = new StructWithNestedOverriddenNotBitwiseComparable();
+            obj1.value1.value = 1;
+            obj1.value2.value = 0;
+
+            StructWithNestedOverriddenNotBitwiseComparable obj2 = new StructWithNestedOverriddenNotBitwiseComparable();
+            obj2.value1.value = -1;
+            obj2.value2.value = 0;
+
+            Assert.True(obj1.Equals(obj2));
+            Assert.Equal(obj1.GetHashCode(), obj2.GetHashCode());
+        }
 
         public struct S
         {
@@ -392,5 +422,26 @@ public struct StructContainsPointer
             public double value1;
             public double value2;
         }
+
+        public struct StructContainsPointerNested
+        {
+            public object o;
+            public StructNonOverriddenEqualsOrGetHasCode value;
+        }
+
+        public struct StructOverriddenNotBitwiseComparable
+        {
+            public int value;
+
+            public override bool Equals(object obj) => obj is StructOverriddenNotBitwiseComparable other && (value == other.value || value == -other.value);
+
+            public override int GetHashCode() => value < 0 ? -value : value;
+        }
+
+        public struct StructWithNestedOverriddenNotBitwiseComparable
+        {
+            public StructOverriddenNotBitwiseComparable value1;
+            public StructOverriddenNotBitwiseComparable value2;
+        }
     }
 }
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/TrimmingTests/AggressiveAttributeTrimmingTest.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/TrimmingTests/AggressiveAttributeTrimmingTest.cs
new file mode 100644
index 000000000000..dfdce520e190
--- /dev/null
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/TrimmingTests/AggressiveAttributeTrimmingTest.cs
@@ -0,0 +1,80 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#nullable enable
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+using System.Reflection;
+
+/// <summary>
+/// Ensures setting _AggressiveAttributeTrimming = true causes various attributes to be trimmed
+/// </summary>
+class Program
+{
+    [UnconditionalSuppressMessage ("ReflectionAnalysis", "IL2111", Justification = "Expected trim warning for reflection over annotated members.")]
+    [UnconditionalSuppressMessage ("ReflectionAnalysis", "IL2026", Justification = "Expected trim warning for reflection over annotated members.")]
+    static int Main(string[] args)
+    {
+        // Reference to IsDynamicCodeSupported (which has FeatureGuard(typeof(RequiresDynamicCodeAttribute)))
+        // should not produce a warning because both RequiresDynamicCodeAttribute and FeatureGuardAttribute are removed.
+        if (RuntimeFeature.IsDynamicCodeSupported)
+        {
+            UseDynamicCode();
+        }
+
+        // Check that a few attribute instances are indeed removed
+        CheckRemovedAttributes(typeof(MembersWithRemovedAttributes));
+
+        return 100;
+    }
+
+    [RequiresDynamicCode(nameof(UseDynamicCode))]
+    static void UseDynamicCode() { }
+
+    class MembersWithRemovedAttributes
+    {
+        static void DynamicallyAccessedMembers([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicMethods)] Type t) { }
+
+        [FeatureGuard(typeof(RequiresUnreferencedCodeAttribute))]
+        static bool FeatureGuard => throw null!;
+
+        [FeatureSwitchDefinition("Program.MembersWithRemovedAttributes.FeatureSwitchDefinition")]
+        static bool FeatureSwitchDefinition => throw null!;
+
+        [RequiresDynamicCode(nameof(RequiresDynamicCode))]
+        static void RequiresDynamicCode() { }
+
+        [RequiresUnreferencedCode(nameof(RequiresUnreferencedCode))]
+        static void RequiresUnreferencedCode() { }
+    }
+
+    static void CheckRemovedAttributes([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)] Type type)
+    {
+        Console.WriteLine($"Validating {type}");
+        foreach (var member in type.GetMembers(BindingFlags.Static | BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.DeclaredOnly))
+        {
+            CheckRemovedAttributes(member);
+            
+            if (member is MethodInfo method)
+            {
+                foreach (var parameter in method.GetParameters())
+                {
+                    CheckRemovedAttributes(parameter);
+                }
+            }
+        }
+    }
+
+    static void CheckRemovedAttributes(ICustomAttributeProvider provider)
+    {
+        foreach (var attribute in provider.GetCustomAttributes(false))
+        {
+            if (attribute is NullableContextAttribute)
+                continue;
+
+            throw new Exception($"Unexpected attribute {attribute.GetType()} on {provider}");
+        }
+    }
+}
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/TrimmingTests/System.Runtime.TrimmingTests.proj b/src/libraries/System.Runtime/tests/System.Runtime.Tests/TrimmingTests/System.Runtime.TrimmingTests.proj
index c786baba78e3..c9c42b2c2023 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/TrimmingTests/System.Runtime.TrimmingTests.proj
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/TrimmingTests/System.Runtime.TrimmingTests.proj
@@ -2,6 +2,10 @@
   <Import Project="$([MSBuild]::GetPathOfFileAbove(Directory.Build.props))" />
 
   <ItemGroup>
+    <TestConsoleAppSourceFiles Include="AggressiveAttributeTrimmingTest.cs">
+      <EnabledProperties>_AggressiveAttributeTrimming</EnabledProperties>
+      <DisabledProperties>SuppressTrimAnalysisWarnings;TrimmerSingleWarn</DisabledProperties>
+    </TestConsoleAppSourceFiles>
     <TestConsoleAppSourceFiles Include="AppDomainGetThreadGenericPrincipalTest.cs" />
     <TestConsoleAppSourceFiles Include="AppDomainGetThreadWindowsPrincipalTest.cs">
       <SkipOnTestRuntimes>osx-x64;linux-x64;browser-wasm</SkipOnTestRuntimes>
diff --git a/src/libraries/System.Runtime/tests/System.Text.Encoding.Tests/NegativeEncodingTests.cs b/src/libraries/System.Runtime/tests/System.Text.Encoding.Tests/NegativeEncodingTests.cs
index 3eb90f87286f..8ae05a1faa40 100644
--- a/src/libraries/System.Runtime/tests/System.Text.Encoding.Tests/NegativeEncodingTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Text.Encoding.Tests/NegativeEncodingTests.cs
@@ -623,6 +623,9 @@ void VerifyOutParams()
             // Chars does not have enough space
             AssertExtensions.Throws<ArgumentException>("chars", () => decoder.Convert(new byte[4], 0, 4, new char[0], 0, 0, flush, out charsUsed, out bytesUsed, out completed));
             VerifyOutParams();
+
+            AssertExtensions.Throws<ArgumentException>("chars", () => decoder.Convert(encoding.GetBytes("\uD800\uDC00".ToCharArray()).AsSpan(), new char[0].AsSpan(), flush, out charsUsed, out bytesUsed, out completed));
+            VerifyOutParams();
         }
 
         [Theory]
diff --git a/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/CancellationTokenTests.cs b/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/CancellationTokenTests.cs
index 72df26fa4c98..ba75f617bf0b 100644
--- a/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/CancellationTokenTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/CancellationTokenTests.cs
@@ -874,6 +874,7 @@ static void FinalizeHelper(DisposeTracker disposeTracker)
 
         // Several tests for deriving custom user types from CancellationTokenSource
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/99519", typeof(PlatformDetection), nameof(PlatformDetection.IsWasmThreadingSupported))]
         public static void DerivedCancellationTokenSource()
         {
             // Verify that a derived CTS is functional
diff --git a/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/MethodCoverage.cs b/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/MethodCoverage.cs
index 3509d10843bc..2634e8340ef9 100644
--- a/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/MethodCoverage.cs
+++ b/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/MethodCoverage.cs
@@ -279,6 +279,7 @@ public static async Task Task_WhenAny_TwoTasks_WakesOnFirstCompletion()
         }
 
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/99500", typeof(PlatformDetection), nameof(PlatformDetection.IsWasmThreadingSupported))]
         public static void CancellationTokenRegitration()
         {
             ManualResetEvent mre = new ManualResetEvent(false);
@@ -296,6 +297,7 @@ public static void CancellationTokenRegitration()
         /// verify that the taskawaiter.UnsafeOnCompleted is invoked
         /// </summary>
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/99519", typeof(PlatformDetection), nameof(PlatformDetection.IsWasmThreadingSupported))]
         public static void TaskAwaiter()
         {
             ManualResetEvent mre = new ManualResetEvent(false);
@@ -922,5 +924,118 @@ public static void Task_WhenAll_TwoTasks_WakesOnBothCompletionWithExceptionAndCa
                 Assert.Equal(e1, twa.Exception?.InnerException);
             }
         }
+
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        public void Task_WhenEach_NullsTriggerExceptions()
+        {
+            AssertExtensions.Throws<ArgumentNullException>("tasks", () => Task.WhenEach((Task[])null));
+            AssertExtensions.Throws<ArgumentNullException>("tasks", () => Task.WhenEach((Task<int>[])null));
+            AssertExtensions.Throws<ArgumentNullException>("tasks", () => Task.WhenEach((IEnumerable<Task>)null));
+            AssertExtensions.Throws<ArgumentNullException>("tasks", () => Task.WhenEach((IEnumerable<Task<int>>)null));
+
+            AssertExtensions.Throws<ArgumentException>("tasks", () => Task.WhenEach((Task[])[null]));
+            AssertExtensions.Throws<ArgumentException>("tasks", () => Task.WhenEach((ReadOnlySpan<Task>)[null]));
+            AssertExtensions.Throws<ArgumentException>("tasks", () => Task.WhenEach((IEnumerable<Task>)[null]));
+            AssertExtensions.Throws<ArgumentException>("tasks", () => Task.WhenEach((Task<int>[])[null]));
+            AssertExtensions.Throws<ArgumentException>("tasks", () => Task.WhenEach((ReadOnlySpan<Task<int>>)[null]));
+            AssertExtensions.Throws<ArgumentException>("tasks", () => Task.WhenEach((IEnumerable<Task<int>>)[null]));
+        }
+
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        public async Task Task_WhenEach_EmptyInputsCompleteImmediately()
+        {
+            Assert.False(await Task.WhenEach((Task[])[]).GetAsyncEnumerator().MoveNextAsync());
+            Assert.False(await Task.WhenEach((ReadOnlySpan<Task>)[]).GetAsyncEnumerator().MoveNextAsync());
+            Assert.False(await Task.WhenEach((IEnumerable<Task>)[]).GetAsyncEnumerator().MoveNextAsync());
+            Assert.False(await Task.WhenEach((Task<int>[])[]).GetAsyncEnumerator().MoveNextAsync());
+            Assert.False(await Task.WhenEach((ReadOnlySpan<Task<int>>)[]).GetAsyncEnumerator().MoveNextAsync());
+            Assert.False(await Task.WhenEach((IEnumerable<Task<int>>)[]).GetAsyncEnumerator().MoveNextAsync());
+        }
+
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        public async Task Task_WhenEach_TasksOnlyEnumerableOnce()
+        {
+            IAsyncEnumerable<Task>[] enumerables =
+            [
+                Task.WhenEach((Task[])[Task.CompletedTask, Task.CompletedTask]),
+                Task.WhenEach((ReadOnlySpan<Task>)[Task.CompletedTask, Task.CompletedTask]),
+                Task.WhenEach((IEnumerable<Task>)[Task.CompletedTask, Task.CompletedTask]),
+                Task.WhenEach((Task<int>[])[Task.FromResult(0), Task.FromResult(0)]),
+                Task.WhenEach((ReadOnlySpan<Task<int>>)[Task.FromResult(0), Task.FromResult(0)]),
+                Task.WhenEach((IEnumerable<Task<int>>)[Task.FromResult(0), Task.FromResult(0)]),
+            ];
+
+            foreach (IAsyncEnumerable<Task> e in enumerables)
+            {
+                IAsyncEnumerator<Task> e1 = e.GetAsyncEnumerator();
+                IAsyncEnumerator<Task> e2 = e.GetAsyncEnumerator();
+                IAsyncEnumerator<Task> e3 = e.GetAsyncEnumerator();
+
+                Assert.True(await e1.MoveNextAsync());
+                Assert.False(await e2.MoveNextAsync());
+                Assert.False(await e3.MoveNextAsync());
+
+                int count = 0;
+                do
+                {
+                    count++;
+                }
+                while (await e1.MoveNextAsync());
+                Assert.Equal(2, count);
+
+                Assert.False(await e.GetAsyncEnumerator().MoveNextAsync());
+            }
+        }
+
+        [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [InlineData(0)]
+        [InlineData(1)]
+        [InlineData(2)]
+        [InlineData(3)]
+        [InlineData(4)]
+        [InlineData(5)]
+        public async Task Task_WhenEach_IteratesThroughCompleteAndIncompleteTasks(int mode)
+        {
+            TaskCompletionSource<int> tcs1 = new(), tcs2 = new(), tcs3 = new();
+            Task<int>[] array = [Task.FromResult(1), tcs1.Task, Task.FromResult(2), tcs2.Task, Task.FromResult(3), tcs3.Task];
+
+            IAsyncEnumerable<Task> tasks = mode switch
+            {
+                0 => Task.WhenEach((ReadOnlySpan<Task>)array),
+                1 => Task.WhenEach((Task[])array),
+                2 => Task.WhenEach((IEnumerable<Task>)array),
+                3 => Task.WhenEach((ReadOnlySpan<Task<int>>)array),
+                4 => Task.WhenEach((Task<int>[])array),
+                _ => Task.WhenEach((IEnumerable<Task<int>>)array),
+            };
+
+            Assert.NotNull(tasks);
+
+            IAsyncEnumerator<Task> e = tasks.GetAsyncEnumerator();
+            Assert.NotNull(tasks);
+
+            ValueTask<bool> moveNext;
+
+            for (int i = 1; i <= 3; i++)
+            {
+                moveNext = e.MoveNextAsync();
+                Assert.True(moveNext.IsCompletedSuccessfully);
+                Assert.True(moveNext.Result);
+                Assert.Same(Task.FromResult(i), e.Current);
+            }
+
+            foreach (TaskCompletionSource<int> tcs in new[] { tcs2, tcs1, tcs3 })
+            {
+                moveNext = e.MoveNextAsync();
+                Assert.False(moveNext.IsCompleted);
+                tcs.SetResult(42);
+                Assert.True(await moveNext);
+                Assert.Same(tcs.Task, e.Current);
+            }
+
+            moveNext = e.MoveNextAsync();
+            Assert.True(moveNext.IsCompletedSuccessfully);
+            Assert.False(moveNext.Result);
+        }
     }
 }
diff --git a/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/System.Threading.Tasks.Tests.csproj b/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/System.Threading.Tasks.Tests.csproj
index eb1b13766e64..57ce44b8d41a 100644
--- a/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/System.Threading.Tasks.Tests.csproj
+++ b/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/System.Threading.Tasks.Tests.csproj
@@ -4,6 +4,10 @@
     <IncludeRemoteExecutor>true</IncludeRemoteExecutor>
     <TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(TargetOS)' == 'browser'">
+    <XunitShowProgress>true</XunitShowProgress>
+    <_WasmPThreadPoolUnusedSize>10</_WasmPThreadPoolUnusedSize>
+  </PropertyGroup>
   <ItemGroup>
     <Compile Include="Helpers.cs" />
     <Compile Include="Task\TaskArgumentValidationTests.cs" />
diff --git a/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/Task/AsyncEnumerableToBlockingEnumerableTests.cs b/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/Task/AsyncEnumerableToBlockingEnumerableTests.cs
index 0692aedb514f..786734ad8391 100644
--- a/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/Task/AsyncEnumerableToBlockingEnumerableTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/Task/AsyncEnumerableToBlockingEnumerableTests.cs
@@ -70,6 +70,7 @@ static async IAsyncEnumerable<int> CreateSourceEnumerable()
         }
 
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/99519", typeof(PlatformDetection), nameof(PlatformDetection.IsWasmThreadingSupported))]
         public static void AsyncEnumerableWithDelays()
         {
             var source = new InstrumentedAsyncEnumerable<int>(CreateSourceEnumerable());
@@ -104,6 +105,7 @@ static async IAsyncEnumerable<int> CreateSourceEnumerable()
         }
 
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/99519", typeof(PlatformDetection), nameof(PlatformDetection.IsWasmThreadingSupported))]
         public static void AsyncEnumerableWithException()
         {
             var source = new InstrumentedAsyncEnumerable<int>(CreateSourceEnumerable());
@@ -132,6 +134,7 @@ static async IAsyncEnumerable<int> CreateSourceEnumerable()
         }
 
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/99519", typeof(PlatformDetection), nameof(PlatformDetection.IsWasmThreadingSupported))]
         public static void AsyncEnumerableWithCancellation()
         {
             var source = new InstrumentedAsyncEnumerable<string>(CreateSourceEnumerable());
diff --git a/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/Task/TaskContinueWithTests.cs b/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/Task/TaskContinueWithTests.cs
index b426a576d982..76803afb0195 100644
--- a/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/Task/TaskContinueWithTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Threading.Tasks.Tests/Task/TaskContinueWithTests.cs
@@ -1076,6 +1076,7 @@ public static void RunContinuationCancelTest_State()
         }
 
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/99519", typeof(PlatformDetection), nameof(PlatformDetection.IsWasmThreadingSupported))]
         public static void TestNoDeadlockOnContinueWith()
         {
             Debug.WriteLine("TestNoDeadlockOnContinueWith:  shouldn't deadlock if it passes.");
@@ -1223,7 +1224,7 @@ public static void RunLazyCancellationTests_Negative()
         }
 
         [Fact]
-        [ActiveIssue("https://github.com/dotnet/runtime/issues/2084", TestRuntimes.Mono)]
+        [SkipOnPlatform(TestPlatforms.Browser, "Causes a stack overflow")]
         public static void LongContinuationChain_ContinueWith_DoesNotStackOverflow()
         {
             const int DiveDepth = 12_000;
@@ -1255,6 +1256,7 @@ public static void LongContinuationChain_Unwrap_DoesNotStackOverflow()
         }
 
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/99519", typeof(PlatformDetection), nameof(PlatformDetection.IsWasmThreadingSupported))]
         public static void LongContinuationChain_Await_DoesNotStackOverflow()
         {
             const int DiveDepth = 12_000;
diff --git a/src/libraries/System.Security.Cryptography.Pkcs/tests/SignedCms/SignedCmsTests.netcoreapp.cs b/src/libraries/System.Security.Cryptography.Pkcs/tests/SignedCms/SignedCmsTests.netcoreapp.cs
index e5ef61d996a8..2e1911ba11d3 100644
--- a/src/libraries/System.Security.Cryptography.Pkcs/tests/SignedCms/SignedCmsTests.netcoreapp.cs
+++ b/src/libraries/System.Security.Cryptography.Pkcs/tests/SignedCms/SignedCmsTests.netcoreapp.cs
@@ -399,7 +399,10 @@ public static void AddSigner_RSA_EphemeralKey()
                 {
                     ContentInfo content = new ContentInfo(new byte[] { 1, 2, 3 });
                     SignedCms cms = new SignedCms(content, false);
-                    CmsSigner signer = new CmsSigner(certWithEphemeralKey);
+                    CmsSigner signer = new CmsSigner(certWithEphemeralKey)
+                    {
+                        IncludeOption = X509IncludeOption.EndCertOnly
+                    };
                     cms.ComputeSignature(signer);
                 }
             }
@@ -429,7 +432,8 @@ public static void AddSigner_DSA_EphemeralKey()
                     SignedCms cms = new SignedCms(content, false);
                     CmsSigner signer = new CmsSigner(certWithEphemeralKey)
                     {
-                        DigestAlgorithm = new Oid(Oids.Sha1, Oids.Sha1)
+                        DigestAlgorithm = new Oid(Oids.Sha1, Oids.Sha1),
+                        IncludeOption = X509IncludeOption.EndCertOnly
                     };
                     cms.ComputeSignature(signer);
                 }
@@ -458,7 +462,10 @@ public static void AddSigner_ECDSA_EphemeralKey()
                 {
                     ContentInfo content = new ContentInfo(new byte[] { 1, 2, 3 });
                     SignedCms cms = new SignedCms(content, false);
-                    CmsSigner signer = new CmsSigner(certWithEphemeralKey);
+                    CmsSigner signer = new CmsSigner(certWithEphemeralKey)
+                    {
+                        IncludeOption = X509IncludeOption.EndCertOnly
+                    };
                     cms.ComputeSignature(signer);
                 }
             }
diff --git a/src/libraries/System.Security.Cryptography.Xml/src/System/Security/Cryptography/Xml/Reference.cs b/src/libraries/System.Security.Cryptography.Xml/src/System/Security/Cryptography/Xml/Reference.cs
index 001aaf26a21a..ec6e6cabc2cd 100644
--- a/src/libraries/System.Security.Cryptography.Xml/src/System/Security/Cryptography/Xml/Reference.cs
+++ b/src/libraries/System.Security.Cryptography.Xml/src/System/Security/Cryptography/Xml/Reference.cs
@@ -266,18 +266,31 @@ public void LoadXml(XmlElement value)
                         // let the transform read the children of the transformElement for data
                         transform.LoadInnerXml(transformElement.ChildNodes);
                         // Hack! this is done to get around the lack of here() function support in XPath
-                        if (transform is XmlDsigEnvelopedSignatureTransform
-                            && _uri != null && (_uri.Length == 0 || _uri[0] == '#'))
+                        if (transform is XmlDsigEnvelopedSignatureTransform)
                         {
                             // Walk back to the Signature tag. Find the nearest signature ancestor
                             // Signature-->SignedInfo-->Reference-->Transforms-->Transform
                             XmlNode? signatureTag = transformElement.SelectSingleNode("ancestor::ds:Signature[1]", nsm);
 
                             // Resolve the reference to get starting point for position calculation.
-                            XmlNode? referenceTarget =
-                                _uri.Length == 0
-                                ? transformElement.OwnerDocument
-                                : SignedXml!.GetIdElement(transformElement.OwnerDocument, Utils.GetIdFromLocalUri(_uri, out bool _));
+                            // This needs to match the way CalculateSignature resolves URI references.
+                            XmlNode? referenceTarget = null;
+                            if (_uri == null || _uri.Length == 0)
+                            {
+                                referenceTarget = transformElement.OwnerDocument;
+                            }
+                            else if (_uri[0] == '#')
+                            {
+                                string idref = Utils.ExtractIdFromLocalUri(_uri);
+                                if (idref == "xpointer(/)")
+                                {
+                                    referenceTarget = transformElement.OwnerDocument;
+                                }
+                                else
+                                {
+                                    referenceTarget = SignedXml!.GetIdElement(transformElement.OwnerDocument, idref);
+                                }
+                            }
 
                             XmlNodeList? signatureList = referenceTarget?.SelectNodes(".//ds:Signature", nsm);
                             if (signatureList != null)
diff --git a/src/libraries/System.Security.Cryptography.Xml/tests/SignedXmlTest.cs b/src/libraries/System.Security.Cryptography.Xml/tests/SignedXmlTest.cs
index 3db8c44aed82..bd22c5b835ee 100644
--- a/src/libraries/System.Security.Cryptography.Xml/tests/SignedXmlTest.cs
+++ b/src/libraries/System.Security.Cryptography.Xml/tests/SignedXmlTest.cs
@@ -9,6 +9,7 @@
 // (C) 2002, 2003 Motus Technologies Inc. (http://www.motus.com)
 // Copyright (C) 2004-2005, 2008 Novell, Inc (http://www.novell.com)
 
+using System.Collections.Generic;
 using System.Globalization;
 using System.IO;
 using System.Net;
@@ -1993,5 +1994,45 @@ public void CheckSignatureHandlesIncorrectOrTamperedReferenceWithMultipleEnvelop
 
             Assert.False(subject.CheckSignature());
         }
+
+        public static object[][] EnvelopedSignatureWithRootXpointerReference = new object[][]
+        {
+            new object[] { true,  """<?xml version="1.0" encoding="UTF-8"?><hello><world>Hi</world><Signature xmlns="http://www.w3.org/2000/09/xmldsig#"><SignedInfo><CanonicalizationMethod Algorithm="http://www.w3.org/2001/10/xml-exc-c14n#WithComments" /><SignatureMethod Algorithm="http://www.w3.org/2001/04/xmldsig-more#rsa-sha256" /><Reference URI="#xpointer(/)"><Transforms><Transform Algorithm="http://www.w3.org/2000/09/xmldsig#enveloped-signature" /><Transform Algorithm="http://www.w3.org/2001/10/xml-exc-c14n#WithComments" /></Transforms><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256" /><DigestValue>SVaCE5w9iLXTVYTKP1t/yjjmPXvWovMYpgljGgpgz2Y=</DigestValue></Reference></SignedInfo><SignatureValue>dqcBmS1ZvDJNhmCEgobpAb+A2XaiuB69dfGIhisZvqoxaWqAqv/0w49jp38+usJ5t3wcq3aMC631QE8iln+lHWrarojDMDWLa00isv3oE3q9UgOIV9e6MUSoRTTvQkmlK/LSYV9T/SKx6h03vLLcIkUMXaTkC/n2kthlJTGkLbU=</SignatureValue><KeyInfo><KeyValue><RSAKeyValue><Modulus>t6qV1iTlkCPoaIeOTvnDczQv5pytUxMoyNXws5vaMQYxfJMKos47dvmiLtfWUDLYXFX3Yf/JMC14plJw2JA5jLrlHLnZj/vCjRtXckmWW/wGYewXUqrgR1CytStUeQKj9mNsi76erukua10UhzIrWG+H6YQ/qS4AMMJZU6jBvO0=</Modulus><Exponent>AQAB</Exponent></RSAKeyValue></KeyValue></KeyInfo></Signature></hello>""" },
+            new object[] { false, """<?xml version="1.0" encoding="UTF-8"?><hello>Tempered world<Signature xmlns="http://www.w3.org/2000/09/xmldsig#"><SignedInfo><CanonicalizationMethod Algorithm="http://www.w3.org/2001/10/xml-exc-c14n#WithComments" /><SignatureMethod Algorithm="http://www.w3.org/2001/04/xmldsig-more#rsa-sha256" /><Reference URI="#xpointer(/)"><Transforms><Transform Algorithm="http://www.w3.org/2000/09/xmldsig#enveloped-signature" /><Transform Algorithm="http://www.w3.org/2001/10/xml-exc-c14n#WithComments" /></Transforms><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256" /><DigestValue>SVaCE5w9iLXTVYTKP1t/yjjmPXvWovMYpgljGgpgz2Y=</DigestValue></Reference></SignedInfo><SignatureValue>dqcBmS1ZvDJNhmCEgobpAb+A2XaiuB69dfGIhisZvqoxaWqAqv/0w49jp38+usJ5t3wcq3aMC631QE8iln+lHWrarojDMDWLa00isv3oE3q9UgOIV9e6MUSoRTTvQkmlK/LSYV9T/SKx6h03vLLcIkUMXaTkC/n2kthlJTGkLbU=</SignatureValue><KeyInfo><KeyValue><RSAKeyValue><Modulus>t6qV1iTlkCPoaIeOTvnDczQv5pytUxMoyNXws5vaMQYxfJMKos47dvmiLtfWUDLYXFX3Yf/JMC14plJw2JA5jLrlHLnZj/vCjRtXckmWW/wGYewXUqrgR1CytStUeQKj9mNsi76erukua10UhzIrWG+H6YQ/qS4AMMJZU6jBvO0=</Modulus><Exponent>AQAB</Exponent></RSAKeyValue></KeyValue></KeyInfo></Signature></hello>""" },
+        };
+
+        [Theory]
+        [MemberData(nameof(EnvelopedSignatureWithRootXpointerReference))]
+        public void CheckSignatureHandlesEnvelopedSignatureWithRootXpointerReference(bool isValid, string xml)
+        {
+            XmlDocument xmlDoc = new ();
+            xmlDoc.LoadXml(xml);
+            SignedXml signedXml = new (xmlDoc);
+            signedXml.LoadXml(xmlDoc.GetElementsByTagName("Signature", SignedXml.XmlDsigNamespaceUrl)[0] as XmlElement);
+
+            Assert.Equal(isValid, signedXml.CheckSignature());
+        }
+
+
+        public static object[][] EnvelopedSignatureWithEmptyReference = new object[][]
+        {
+            new object[] { true,  """<?xml version="1.0" encoding="UTF-8"?><hello><world>Hi</world><Signature xmlns="http://www.w3.org/2000/09/xmldsig#"><SignedInfo><CanonicalizationMethod Algorithm="http://www.w3.org/2001/10/xml-exc-c14n#WithComments" /><SignatureMethod Algorithm="http://www.w3.org/2001/04/xmldsig-more#rsa-sha256" /><Reference><Transforms><Transform Algorithm="http://www.w3.org/2000/09/xmldsig#enveloped-signature" /></Transforms><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256" /><DigestValue>SVaCE5w9iLXTVYTKP1t/yjjmPXvWovMYpgljGgpgz2Y=</DigestValue></Reference></SignedInfo><SignatureValue>CiB9jgIS7+Wq+lpyzCGsBZQcQ2BxqQuEU9VCvb3Li5jMtjwRV1bMO+4Wfnb4VWhEtEUq6NdiVGXhC1xvtVLnnLDX7CD/jG6NvM1Yd0/rf0UUceBhzYLFE9HLsopsBmmm3t8FO6ZtRr1QqKM0XDaQleGK9vYd2m2Jq8OR3r/w4OY=</SignatureValue><KeyInfo><KeyValue><RSAKeyValue><Modulus>vcM1wQVmLB9DwdnAym8l8nw63/HlTVzgTDhIwNzWPhsPE/qr2wlK4TEQ3rjU+RAdNytfFNCnuuh75ZVMjAWCV9h6VDlp0DOvBhb6GenhymtTAdJJKzBXKJP6mNPga9cPOP31IZ36Ui00G3fjBBPrHa7nStludgL9Wi0dBU28DjU=</Modulus><Exponent>AQAB</Exponent></RSAKeyValue></KeyValue></KeyInfo></Signature></hello>""" },
+            new object[] { false, """<?xml version="1.0" encoding="UTF-8"?><hello><WORLD>HI</WORLD><Signature xmlns="http://www.w3.org/2000/09/xmldsig#"><SignedInfo><CanonicalizationMethod Algorithm="http://www.w3.org/2001/10/xml-exc-c14n#WithComments" /><SignatureMethod Algorithm="http://www.w3.org/2001/04/xmldsig-more#rsa-sha256" /><Reference><Transforms><Transform Algorithm="http://www.w3.org/2000/09/xmldsig#enveloped-signature" /></Transforms><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256" /><DigestValue>SVaCE5w9iLXTVYTKP1t/yjjmPXvWovMYpgljGgpgz2Y=</DigestValue></Reference></SignedInfo><SignatureValue>CiB9jgIS7+Wq+lpyzCGsBZQcQ2BxqQuEU9VCvb3Li5jMtjwRV1bMO+4Wfnb4VWhEtEUq6NdiVGXhC1xvtVLnnLDX7CD/jG6NvM1Yd0/rf0UUceBhzYLFE9HLsopsBmmm3t8FO6ZtRr1QqKM0XDaQleGK9vYd2m2Jq8OR3r/w4OY=</SignatureValue><KeyInfo><KeyValue><RSAKeyValue><Modulus>vcM1wQVmLB9DwdnAym8l8nw63/HlTVzgTDhIwNzWPhsPE/qr2wlK4TEQ3rjU+RAdNytfFNCnuuh75ZVMjAWCV9h6VDlp0DOvBhb6GenhymtTAdJJKzBXKJP6mNPga9cPOP31IZ36Ui00G3fjBBPrHa7nStludgL9Wi0dBU28DjU=</Modulus><Exponent>AQAB</Exponent></RSAKeyValue></KeyValue></KeyInfo></Signature></hello>""" },
+        };
+
+        [Theory]
+        [MemberData(nameof(EnvelopedSignatureWithEmptyReference))]
+        public void CheckSignatureHandlesEnvelopedSignatureWithEmptyReference(bool isValid, string xml)
+        {
+            XmlDocument xmlDoc = new ();
+            xmlDoc.LoadXml(xml);
+            SignedXml signedXml = new (xmlDoc);
+            signedXml.LoadXml(xmlDoc.GetElementsByTagName("Signature", SignedXml.XmlDsigNamespaceUrl)[0] as XmlElement);
+
+            // without this, CheckSignature throws
+            ((Reference)signedXml.SignedInfo.References[0]).TransformChain[0].LoadInput(xmlDoc);
+
+            Assert.Equal(isValid, signedXml.CheckSignature());
+        }
     }
 }
diff --git a/src/libraries/System.Security.Cryptography/ref/System.Security.Cryptography.cs b/src/libraries/System.Security.Cryptography/ref/System.Security.Cryptography.cs
index 6220729b74a4..a6ee388c8cec 100644
--- a/src/libraries/System.Security.Cryptography/ref/System.Security.Cryptography.cs
+++ b/src/libraries/System.Security.Cryptography/ref/System.Security.Cryptography.cs
@@ -3612,6 +3612,12 @@ public enum X509SubjectKeyIdentifierHashAlgorithm
         Sha1 = 0,
         ShortSha1 = 1,
         CapiSha1 = 2,
+        Sha256 = 3,
+        Sha384 = 4,
+        Sha512 = 5,
+        ShortSha256 = 6,
+        ShortSha384 = 7,
+        ShortSha512 = 8,
     }
     [System.FlagsAttribute]
     public enum X509VerificationFlags
diff --git a/src/libraries/System.Security.Cryptography/src/Microsoft/Win32/SafeHandles/SafePasswordHandle.cs b/src/libraries/System.Security.Cryptography/src/Microsoft/Win32/SafeHandles/SafePasswordHandle.cs
index f53e582af6e2..ec3c1d6e995f 100644
--- a/src/libraries/System.Security.Cryptography/src/Microsoft/Win32/SafeHandles/SafePasswordHandle.cs
+++ b/src/libraries/System.Security.Cryptography/src/Microsoft/Win32/SafeHandles/SafePasswordHandle.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Security;
 
@@ -35,8 +36,8 @@ public SafePasswordHandle(string? password, bool passwordProvided)
         public SafePasswordHandle(ReadOnlySpan<char> password, bool passwordProvided)
             : base(ownsHandle: true)
         {
-            // "".AsSpan() is not default, so this is compat for "null tries NULL first".
-            if (password != default)
+            // "".AsSpan() does not contain a null ref, so this is compat for "null tries NULL first".
+            if (!Unsafe.IsNullRef(ref MemoryMarshal.GetReference(password)))
             {
                 int spanLen;
 
diff --git a/src/libraries/System.Security.Cryptography/src/Resources/Strings.resx b/src/libraries/System.Security.Cryptography/src/Resources/Strings.resx
index e447937b7b6c..a0a9e1e4afcf 100644
--- a/src/libraries/System.Security.Cryptography/src/Resources/Strings.resx
+++ b/src/libraries/System.Security.Cryptography/src/Resources/Strings.resx
@@ -465,6 +465,9 @@
   <data name="Cryptography_KeyBlobParsingError" xml:space="preserve">
     <value>Key Blob not in expected format.</value>
   </data>
+  <data name="Cryptography_KeyNotExtractable" xml:space="preserve">
+    <value>The key does not permit being exported.</value>
+  </data>
   <data name="Cryptography_KeyTooSmall" xml:space="preserve">
     <value>The key is too small for the requested operation.</value>
   </data>
@@ -672,6 +675,9 @@
   <data name="Cryptography_X509_CertificateCorrupted" xml:space="preserve">
     <value>Certificate '{0}' is corrupted.</value>
   </data>
+  <data name="Cryptography_X509_ChainBuildingFailed" xml:space="preserve">
+    <value>An unknown chain building error occurred.</value>
+  </data>
   <data name="Cryptography_X509_ExportFailed" xml:space="preserve">
     <value>The certificate export operation failed.</value>
   </data>
diff --git a/src/libraries/System.Security.Cryptography/src/System.Security.Cryptography.csproj b/src/libraries/System.Security.Cryptography/src/System.Security.Cryptography.csproj
index 06de21c43ea9..7e24c20a7fc4 100644
--- a/src/libraries/System.Security.Cryptography/src/System.Security.Cryptography.csproj
+++ b/src/libraries/System.Security.Cryptography/src/System.Security.Cryptography.csproj
@@ -403,6 +403,7 @@
     <Compile Include="System\Security\Cryptography\ECKeyXmlFormat.cs" />
     <Compile Include="System\Security\Cryptography\ECParameters.cs" />
     <Compile Include="System\Security\Cryptography\ECPoint.cs" />
+    <Compile Include="System\Security\Cryptography\FixedMemoryKeyBox.cs" />
     <Compile Include="System\Security\Cryptography\HashAlgorithm.cs" />
     <Compile Include="System\Security\Cryptography\HashAlgorithmName.cs" />
     <Compile Include="System\Security\Cryptography\HashAlgorithmNames.cs" />
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/AesCcm.Android.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/AesCcm.Android.cs
index 15fd39ef995d..0f7297af6aef 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/AesCcm.Android.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/AesCcm.Android.cs
@@ -9,16 +9,16 @@ namespace System.Security.Cryptography
 {
     public sealed partial class AesCcm
     {
-        private byte[]? _key;
+        private FixedMemoryKeyBox _keyBox;
 
         public static bool IsSupported => true;
 
-        [MemberNotNull(nameof(_key))]
+        [MemberNotNull(nameof(_keyBox))]
         private void ImportKey(ReadOnlySpan<byte> key)
         {
-            // Pin the array on the POH so that the GC doesn't move it around to allow zeroing to be more effective.
-            _key = GC.AllocateArray<byte>(key.Length, pinned: true);
-            key.CopyTo(_key);
+            // We should only be calling this in the constructor, so there shouldn't be a previous key.
+            Debug.Assert(_keyBox is null);
+            _keyBox = new FixedMemoryKeyBox(key);
         }
 
         private void EncryptCore(
@@ -28,82 +28,95 @@ private void EncryptCore(
             Span<byte> tag,
             ReadOnlySpan<byte> associatedData = default)
         {
-            CheckDisposed();
+            bool acquired = false;
 
-            // Convert key length to bits.
-            using (SafeEvpCipherCtxHandle ctx = Interop.Crypto.EvpCipherCreatePartial(GetCipher(_key.Length * 8)))
+            try
             {
-                if (ctx.IsInvalid)
-                {
-                    throw new CryptographicException();
-                }
-
-                if (!Interop.Crypto.CipherSetTagLength(ctx, tag.Length))
-                {
-                    throw new CryptographicException();
-                }
-
-                Interop.Crypto.CipherSetNonceLength(ctx, nonce.Length);
-                Interop.Crypto.EvpCipherSetKeyAndIV(ctx, _key, nonce, Interop.Crypto.EvpCipherDirection.Encrypt);
-
-                if (associatedData.Length != 0)
-                {
-                    Interop.Crypto.CipherUpdateAAD(ctx, associatedData);
-                }
+                _keyBox.DangerousAddRef(ref acquired);
+                ReadOnlySpan<byte> key = _keyBox.DangerousKeySpan;
 
-                byte[]? rented = null;
-                try
+                // Convert key length to bits.
+                using (SafeEvpCipherCtxHandle ctx = Interop.Crypto.EvpCipherCreatePartial(GetCipher(key.Length * 8)))
                 {
-                    scoped Span<byte> ciphertextAndTag;
-
-                    // Arbitrary limit.
-                    const int StackAllocMax = 128;
-                    if (checked(ciphertext.Length + tag.Length) <= StackAllocMax)
-                    {
-                        ciphertextAndTag = stackalloc byte[ciphertext.Length + tag.Length];
-                    }
-                    else
-                    {
-                        rented = CryptoPool.Rent(ciphertext.Length + tag.Length);
-                        ciphertextAndTag = new Span<byte>(rented, 0, ciphertext.Length + tag.Length);
-                    }
-
-                    if (!Interop.Crypto.EvpCipherUpdate(ctx, ciphertextAndTag, out int ciphertextBytesWritten, plaintext))
+                    if (ctx.IsInvalid)
                     {
                         throw new CryptographicException();
                     }
 
-                    if (!Interop.Crypto.EvpAeadCipherFinalEx(
-                        ctx,
-                        ciphertextAndTag.Slice(ciphertextBytesWritten),
-                        out int bytesWritten,
-                        out bool authTagMismatch))
+                    if (!Interop.Crypto.CipherSetTagLength(ctx, tag.Length))
                     {
-                        Debug.Assert(!authTagMismatch);
                         throw new CryptographicException();
                     }
 
-                    ciphertextBytesWritten += bytesWritten;
+                    Interop.Crypto.CipherSetNonceLength(ctx, nonce.Length);
+                    Interop.Crypto.EvpCipherSetKeyAndIV(ctx, key, nonce, Interop.Crypto.EvpCipherDirection.Encrypt);
 
-                    // NOTE: Android appends tag to the end of the ciphertext in case of CCM/GCM and "encryption" mode
-
-                    if (ciphertextBytesWritten != ciphertextAndTag.Length)
+                    if (associatedData.Length != 0)
                     {
-                        Debug.Fail($"CCM encrypt wrote {ciphertextBytesWritten} of {ciphertextAndTag.Length} bytes.");
-                        throw new CryptographicException();
+                        Interop.Crypto.CipherUpdateAAD(ctx, associatedData);
                     }
 
-                    ciphertextAndTag[..ciphertext.Length].CopyTo(ciphertext);
-                    ciphertextAndTag[ciphertext.Length..].CopyTo(tag);
-                }
-                finally
-                {
-                    if (rented != null)
+                    byte[]? rented = null;
+                    try
                     {
-                        CryptoPool.Return(rented, ciphertext.Length + tag.Length);
+                        scoped Span<byte> ciphertextAndTag;
+
+                        // Arbitrary limit.
+                        const int StackAllocMax = 128;
+                        if (checked(ciphertext.Length + tag.Length) <= StackAllocMax)
+                        {
+                            ciphertextAndTag = stackalloc byte[ciphertext.Length + tag.Length];
+                        }
+                        else
+                        {
+                            rented = CryptoPool.Rent(ciphertext.Length + tag.Length);
+                            ciphertextAndTag = new Span<byte>(rented, 0, ciphertext.Length + tag.Length);
+                        }
+
+                        if (!Interop.Crypto.EvpCipherUpdate(ctx, ciphertextAndTag, out int ciphertextBytesWritten, plaintext))
+                        {
+                            throw new CryptographicException();
+                        }
+
+                        if (!Interop.Crypto.EvpAeadCipherFinalEx(
+                            ctx,
+                            ciphertextAndTag.Slice(ciphertextBytesWritten),
+                            out int bytesWritten,
+                            out bool authTagMismatch))
+                        {
+                            Debug.Assert(!authTagMismatch);
+                            throw new CryptographicException();
+                        }
+
+                        ciphertextBytesWritten += bytesWritten;
+
+                        // NOTE: Android appends tag to the end of the ciphertext in case of CCM/GCM and "encryption" mode
+
+                        if (ciphertextBytesWritten != ciphertextAndTag.Length)
+                        {
+                            Debug.Fail($"CCM encrypt wrote {ciphertextBytesWritten} of {ciphertextAndTag.Length} bytes.");
+                            throw new CryptographicException();
+                        }
+
+                        ciphertextAndTag[..ciphertext.Length].CopyTo(ciphertext);
+                        ciphertextAndTag[ciphertext.Length..].CopyTo(tag);
+                    }
+                    finally
+                    {
+                        if (rented != null)
+                        {
+                            CryptoPool.Return(rented, ciphertext.Length + tag.Length);
+                        }
                     }
                 }
             }
+            finally
+            {
+                if (acquired)
+                {
+                    _keyBox.DangerousRelease();
+                }
+            }
         }
 
         private void DecryptCore(
@@ -113,64 +126,77 @@ private void DecryptCore(
             Span<byte> plaintext,
             ReadOnlySpan<byte> associatedData)
         {
-            CheckDisposed();
+            bool acquired = false;
 
-            using (SafeEvpCipherCtxHandle ctx = Interop.Crypto.EvpCipherCreatePartial(GetCipher(_key.Length * 8)))
+            try
             {
-                if (ctx.IsInvalid)
-                {
-                    throw new CryptographicException();
-                }
-                Interop.Crypto.CipherSetNonceLength(ctx, nonce.Length);
+                _keyBox.DangerousAddRef(ref acquired);
+                ReadOnlySpan<byte> key = _keyBox.DangerousKeySpan;
 
-                if (!Interop.Crypto.CipherSetTagLength(ctx, tag.Length))
+                using (SafeEvpCipherCtxHandle ctx = Interop.Crypto.EvpCipherCreatePartial(GetCipher(key.Length * 8)))
                 {
-                    throw new CryptographicException();
-                }
+                    if (ctx.IsInvalid)
+                    {
+                        throw new CryptographicException();
+                    }
+                    Interop.Crypto.CipherSetNonceLength(ctx, nonce.Length);
 
-                Interop.Crypto.EvpCipherSetKeyAndIV(ctx, _key, nonce, Interop.Crypto.EvpCipherDirection.Decrypt);
+                    if (!Interop.Crypto.CipherSetTagLength(ctx, tag.Length))
+                    {
+                        throw new CryptographicException();
+                    }
 
-                if (associatedData.Length != 0)
-                {
-                    Interop.Crypto.CipherUpdateAAD(ctx, associatedData);
-                }
+                    Interop.Crypto.EvpCipherSetKeyAndIV(ctx, key, nonce, Interop.Crypto.EvpCipherDirection.Decrypt);
 
-                if (!Interop.Crypto.EvpCipherUpdate(ctx, plaintext, out int plaintextBytesWritten, ciphertext))
-                {
-                    CryptographicOperations.ZeroMemory(plaintext);
-                    throw new CryptographicException();
-                }
+                    if (associatedData.Length != 0)
+                    {
+                        Interop.Crypto.CipherUpdateAAD(ctx, associatedData);
+                    }
 
-                if (!Interop.Crypto.EvpCipherUpdate(ctx, plaintext.Slice(plaintextBytesWritten), out int bytesWritten, tag))
-                {
-                    CryptographicOperations.ZeroMemory(plaintext);
-                    throw new CryptographicException();
-                }
+                    if (!Interop.Crypto.EvpCipherUpdate(ctx, plaintext, out int plaintextBytesWritten, ciphertext))
+                    {
+                        CryptographicOperations.ZeroMemory(plaintext);
+                        throw new CryptographicException();
+                    }
 
-                plaintextBytesWritten += bytesWritten;
+                    if (!Interop.Crypto.EvpCipherUpdate(ctx, plaintext.Slice(plaintextBytesWritten), out int bytesWritten, tag))
+                    {
+                        CryptographicOperations.ZeroMemory(plaintext);
+                        throw new CryptographicException();
+                    }
 
-                if (!Interop.Crypto.EvpAeadCipherFinalEx(
-                    ctx,
-                    plaintext.Slice(plaintextBytesWritten),
-                    out bytesWritten,
-                    out bool authTagMismatch))
-                {
-                    CryptographicOperations.ZeroMemory(plaintext);
+                    plaintextBytesWritten += bytesWritten;
 
-                    if (authTagMismatch)
+                    if (!Interop.Crypto.EvpAeadCipherFinalEx(
+                        ctx,
+                        plaintext.Slice(plaintextBytesWritten),
+                        out bytesWritten,
+                        out bool authTagMismatch))
                     {
-                        throw new AuthenticationTagMismatchException();
-                    }
+                        CryptographicOperations.ZeroMemory(plaintext);
 
-                    throw new CryptographicException(SR.Arg_CryptographyException);
-                }
+                        if (authTagMismatch)
+                        {
+                            throw new AuthenticationTagMismatchException();
+                        }
 
-                plaintextBytesWritten += bytesWritten;
+                        throw new CryptographicException(SR.Arg_CryptographyException);
+                    }
 
-                if (plaintextBytesWritten != plaintext.Length)
+                    plaintextBytesWritten += bytesWritten;
+
+                    if (plaintextBytesWritten != plaintext.Length)
+                    {
+                        Debug.Fail($"CCM decrypt wrote {plaintextBytesWritten} of {plaintext.Length} bytes.");
+                        throw new CryptographicException();
+                    }
+                }
+            }
+            finally
+            {
+                if (acquired)
                 {
-                    Debug.Fail($"CCM decrypt wrote {plaintextBytesWritten} of {plaintext.Length} bytes.");
-                    throw new CryptographicException();
+                    _keyBox.DangerousRelease();
                 }
             }
         }
@@ -186,16 +212,6 @@ private static IntPtr GetCipher(int keySizeInBits)
             };
         }
 
-        [MemberNotNull(nameof(_key))]
-        private void CheckDisposed()
-        {
-            ObjectDisposedException.ThrowIf(_key is null, this);
-        }
-
-        public void Dispose()
-        {
-            CryptographicOperations.ZeroMemory(_key);
-            _key = null;
-        }
+        public void Dispose() => _keyBox.Dispose();
     }
 }
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/AesCcm.OpenSsl.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/AesCcm.OpenSsl.cs
index 8f96682ca325..554d759acf40 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/AesCcm.OpenSsl.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/AesCcm.OpenSsl.cs
@@ -9,18 +9,18 @@ namespace System.Security.Cryptography
 {
     public sealed partial class AesCcm
     {
-        private byte[]? _key;
+        private FixedMemoryKeyBox _keyBox;
 
         public static bool IsSupported { get; } = Interop.OpenSslNoInit.OpenSslIsAvailable;
 
-        [MemberNotNull(nameof(_key))]
+        [MemberNotNull(nameof(_keyBox))]
         private void ImportKey(ReadOnlySpan<byte> key)
         {
             // OpenSSL does not allow setting nonce length after setting the key
             // we need to store it as bytes instead
-            // Pin the array on the POH so that the GC doesn't move it around to allow zeroing to be more effective.
-            _key = GC.AllocateArray<byte>(key.Length, pinned: true);
-            key.CopyTo(_key);
+            // We should only be calling this in the constructor, so there shouldn't be a previous key.
+            Debug.Assert(_keyBox is null);
+            _keyBox = new FixedMemoryKeyBox(key);
         }
 
         private void EncryptCore(
@@ -30,52 +30,65 @@ private void EncryptCore(
             Span<byte> tag,
             ReadOnlySpan<byte> associatedData = default)
         {
-            CheckDisposed();
+            bool acquired = false;
 
-            using (SafeEvpCipherCtxHandle ctx = Interop.Crypto.EvpCipherCreatePartial(GetCipher(_key.Length * 8)))
+            try
             {
-                Interop.Crypto.CheckValidOpenSslHandle(ctx);
+                _keyBox.DangerousAddRef(ref acquired);
+                ReadOnlySpan<byte> key = _keyBox.DangerousKeySpan;
 
-                // We need to set mode to encryption before setting the tag and nonce length
-                // otherwise older versions of OpenSSL (i.e. 1.0.1f which can be found on Ubuntu 14.04) will fail
-                Interop.Crypto.EvpCipherSetKeyAndIV(ctx, Span<byte>.Empty, Span<byte>.Empty, Interop.Crypto.EvpCipherDirection.Encrypt);
-                Interop.Crypto.EvpCipherSetCcmTagLength(ctx, tag.Length);
-                Interop.Crypto.EvpCipherSetCcmNonceLength(ctx, nonce.Length);
-                Interop.Crypto.EvpCipherSetKeyAndIV(ctx, _key, nonce, Interop.Crypto.EvpCipherDirection.NoChange);
-
-                if (associatedData.Length != 0)
+                using (SafeEvpCipherCtxHandle ctx = Interop.Crypto.EvpCipherCreatePartial(GetCipher(key.Length * 8)))
                 {
-                    // length needs to be known ahead of time in CCM mode
-                    Interop.Crypto.EvpCipherSetInputLength(ctx, plaintext.Length);
+                    Interop.Crypto.CheckValidOpenSslHandle(ctx);
+
+                    // We need to set mode to encryption before setting the tag and nonce length
+                    // otherwise older versions of OpenSSL (i.e. 1.0.1f which can be found on Ubuntu 14.04) will fail
+                    Interop.Crypto.EvpCipherSetKeyAndIV(ctx, Span<byte>.Empty, Span<byte>.Empty, Interop.Crypto.EvpCipherDirection.Encrypt);
+                    Interop.Crypto.EvpCipherSetCcmTagLength(ctx, tag.Length);
+                    Interop.Crypto.EvpCipherSetCcmNonceLength(ctx, nonce.Length);
+                    Interop.Crypto.EvpCipherSetKeyAndIV(ctx, key, nonce, Interop.Crypto.EvpCipherDirection.NoChange);
+
+                    if (associatedData.Length != 0)
+                    {
+                        // length needs to be known ahead of time in CCM mode
+                        Interop.Crypto.EvpCipherSetInputLength(ctx, plaintext.Length);
+
+                        if (!Interop.Crypto.EvpCipherUpdate(ctx, Span<byte>.Empty, out _, associatedData))
+                        {
+                            throw Interop.Crypto.CreateOpenSslCryptographicException();
+                        }
+                    }
 
-                    if (!Interop.Crypto.EvpCipherUpdate(ctx, Span<byte>.Empty, out _, associatedData))
+                    if (!Interop.Crypto.EvpCipherUpdate(ctx, ciphertext, out int ciphertextBytesWritten, plaintext))
                     {
                         throw Interop.Crypto.CreateOpenSslCryptographicException();
                     }
-                }
 
-                if (!Interop.Crypto.EvpCipherUpdate(ctx, ciphertext, out int ciphertextBytesWritten, plaintext))
-                {
-                    throw Interop.Crypto.CreateOpenSslCryptographicException();
-                }
+                    if (!Interop.Crypto.EvpCipherFinalEx(
+                        ctx,
+                        ciphertext.Slice(ciphertextBytesWritten),
+                        out int bytesWritten))
+                    {
+                        throw Interop.Crypto.CreateOpenSslCryptographicException();
+                    }
 
-                if (!Interop.Crypto.EvpCipherFinalEx(
-                    ctx,
-                    ciphertext.Slice(ciphertextBytesWritten),
-                    out int bytesWritten))
-                {
-                    throw Interop.Crypto.CreateOpenSslCryptographicException();
-                }
+                    ciphertextBytesWritten += bytesWritten;
 
-                ciphertextBytesWritten += bytesWritten;
+                    if (ciphertextBytesWritten != ciphertext.Length)
+                    {
+                        Debug.Fail($"CCM encrypt wrote {ciphertextBytesWritten} of {ciphertext.Length} bytes.");
+                        throw new CryptographicException();
+                    }
 
-                if (ciphertextBytesWritten != ciphertext.Length)
+                    Interop.Crypto.EvpCipherGetCcmTag(ctx, tag);
+                }
+            }
+            finally
+            {
+                if (acquired)
                 {
-                    Debug.Fail($"CCM encrypt wrote {ciphertextBytesWritten} of {ciphertext.Length} bytes.");
-                    throw new CryptographicException();
+                    _keyBox.DangerousRelease();
                 }
-
-                Interop.Crypto.EvpCipherGetCcmTag(ctx, tag);
             }
         }
 
@@ -86,41 +99,54 @@ private void DecryptCore(
             Span<byte> plaintext,
             ReadOnlySpan<byte> associatedData)
         {
-            CheckDisposed();
+            bool acquired = false;
 
-            using (SafeEvpCipherCtxHandle ctx = Interop.Crypto.EvpCipherCreatePartial(GetCipher(_key.Length * 8)))
+            try
             {
-                Interop.Crypto.CheckValidOpenSslHandle(ctx);
-                Interop.Crypto.EvpCipherSetCcmNonceLength(ctx, nonce.Length);
-                Interop.Crypto.EvpCipherSetCcmTag(ctx, tag);
-
-                Interop.Crypto.EvpCipherSetKeyAndIV(ctx, _key, nonce, Interop.Crypto.EvpCipherDirection.Decrypt);
+                _keyBox.DangerousAddRef(ref acquired);
+                ReadOnlySpan<byte> key = _keyBox.DangerousKeySpan;
 
-                if (associatedData.Length != 0)
+                using (SafeEvpCipherCtxHandle ctx = Interop.Crypto.EvpCipherCreatePartial(GetCipher(key.Length * 8)))
                 {
-                    // length needs to be known ahead of time in CCM mode
-                    Interop.Crypto.EvpCipherSetInputLength(ctx, ciphertext.Length);
+                    Interop.Crypto.CheckValidOpenSslHandle(ctx);
+                    Interop.Crypto.EvpCipherSetCcmNonceLength(ctx, nonce.Length);
+                    Interop.Crypto.EvpCipherSetCcmTag(ctx, tag);
 
-                    if (!Interop.Crypto.EvpCipherUpdate(ctx, Span<byte>.Empty, out _, associatedData))
+                    Interop.Crypto.EvpCipherSetKeyAndIV(ctx, key, nonce, Interop.Crypto.EvpCipherDirection.Decrypt);
+
+                    if (associatedData.Length != 0)
                     {
-                        throw Interop.Crypto.CreateOpenSslCryptographicException();
+                        // length needs to be known ahead of time in CCM mode
+                        Interop.Crypto.EvpCipherSetInputLength(ctx, ciphertext.Length);
+
+                        if (!Interop.Crypto.EvpCipherUpdate(ctx, Span<byte>.Empty, out _, associatedData))
+                        {
+                            throw Interop.Crypto.CreateOpenSslCryptographicException();
+                        }
                     }
-                }
 
-                if (!Interop.Crypto.EvpCipherUpdate(ctx, plaintext, out int plaintextBytesWritten, ciphertext))
-                {
-                    plaintext.Clear();
-                    throw new AuthenticationTagMismatchException();
-                }
+                    if (!Interop.Crypto.EvpCipherUpdate(ctx, plaintext, out int plaintextBytesWritten, ciphertext))
+                    {
+                        plaintext.Clear();
+                        throw new AuthenticationTagMismatchException();
+                    }
+
+                    if (plaintextBytesWritten != plaintext.Length)
+                    {
+                        Debug.Fail($"CCM decrypt wrote {plaintextBytesWritten} of {plaintext.Length} bytes.");
+                        throw new CryptographicException();
+                    }
 
-                if (plaintextBytesWritten != plaintext.Length)
+                    // The OpenSSL documentation says not to call EvpCipherFinalEx for CCM decryption, and calling it will report failure.
+                    // https://wiki.openssl.org/index.php/EVP_Authenticated_Encryption_and_Decryption#Authenticated_Decryption_using_CCM_mode
+                }
+            }
+            finally
+            {
+                if (acquired)
                 {
-                    Debug.Fail($"CCM decrypt wrote {plaintextBytesWritten} of {plaintext.Length} bytes.");
-                    throw new CryptographicException();
+                    _keyBox.DangerousRelease();
                 }
-
-                // The OpenSSL documentation says not to call EvpCipherFinalEx for CCM decryption, and calling it will report failure.
-                // https://wiki.openssl.org/index.php/EVP_Authenticated_Encryption_and_Decryption#Authenticated_Decryption_using_CCM_mode
             }
         }
 
@@ -137,16 +163,6 @@ private static IntPtr GetCipher(int keySizeInBits)
             }
         }
 
-        public void Dispose()
-        {
-            CryptographicOperations.ZeroMemory(_key);
-            _key = null;
-        }
-
-        [MemberNotNull(nameof(_key))]
-        private void CheckDisposed()
-        {
-            ObjectDisposedException.ThrowIf(_key is null, this);
-        }
+        public void Dispose() => _keyBox.Dispose();
     }
 }
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/AesGcm.macOS.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/AesGcm.macOS.cs
index 5b5b2638164f..3b528f496f4d 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/AesGcm.macOS.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/AesGcm.macOS.cs
@@ -9,7 +9,7 @@ namespace System.Security.Cryptography
 {
     public sealed partial class AesGcm
     {
-        private byte[]? _key;
+        private FixedMemoryKeyBox _keyBox;
 
         // CryptoKit added AES.GCM in macOS 10.15, which is our minimum target for macOS.
         public static bool IsSupported => true;
@@ -17,15 +17,12 @@ public sealed partial class AesGcm
         // CryptoKit only supports 16 byte tags.
         public static KeySizes TagByteSizes { get; } = new KeySizes(16, 16, 1);
 
-        [MemberNotNull(nameof(_key))]
+        [MemberNotNull(nameof(_keyBox))]
         private void ImportKey(ReadOnlySpan<byte> key)
         {
             // We should only be calling this in the constructor, so there shouldn't be a previous key.
-            Debug.Assert(_key is null);
-
-            // Pin the array on the POH so that the GC doesn't move it around to allow zeroing to be more effective.
-            _key = GC.AllocateArray<byte>(key.Length, pinned: true);
-            key.CopyTo(_key);
+            Debug.Assert(_keyBox is null);
+            _keyBox = new FixedMemoryKeyBox(key);
         }
 
         private void EncryptCore(
@@ -35,14 +32,26 @@ private void EncryptCore(
             Span<byte> tag,
             ReadOnlySpan<byte> associatedData)
         {
-            CheckDisposed();
-            Interop.AppleCrypto.AesGcmEncrypt(
-                _key,
-                nonce,
-                plaintext,
-                ciphertext,
-                tag,
-                associatedData);
+            bool acquired = false;
+
+            try
+            {
+                _keyBox.DangerousAddRef(ref acquired);
+                Interop.AppleCrypto.AesGcmEncrypt(
+                    _keyBox.DangerousKeySpan,
+                    nonce,
+                    plaintext,
+                    ciphertext,
+                    tag,
+                    associatedData);
+            }
+            finally
+            {
+                if (acquired)
+                {
+                    _keyBox.DangerousRelease();
+                }
+            }
         }
 
         private void DecryptCore(
@@ -52,26 +61,28 @@ private void DecryptCore(
             Span<byte> plaintext,
             ReadOnlySpan<byte> associatedData)
         {
-            CheckDisposed();
-            Interop.AppleCrypto.AesGcmDecrypt(
-                _key,
-                nonce,
-                ciphertext,
-                tag,
-                plaintext,
-                associatedData);
-        }
+            bool acquired = false;
 
-        public void Dispose()
-        {
-            CryptographicOperations.ZeroMemory(_key);
-            _key = null;
+            try
+            {
+                _keyBox.DangerousAddRef(ref acquired);
+                Interop.AppleCrypto.AesGcmDecrypt(
+                    _keyBox.DangerousKeySpan,
+                    nonce,
+                    ciphertext,
+                    tag,
+                    plaintext,
+                    associatedData);
+            }
+            finally
+            {
+                if (acquired)
+                {
+                    _keyBox.DangerousRelease();
+                }
+            }
         }
 
-        [MemberNotNull(nameof(_key))]
-        private void CheckDisposed()
-        {
-            ObjectDisposedException.ThrowIf(_key is null, this);
-        }
+        public void Dispose() => _keyBox.Dispose();
     }
 }
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/ChaCha20Poly1305.macOS.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/ChaCha20Poly1305.macOS.cs
index 82f1633c1882..aa2cba5f234b 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/ChaCha20Poly1305.macOS.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/ChaCha20Poly1305.macOS.cs
@@ -10,17 +10,14 @@ public sealed partial class ChaCha20Poly1305
     {
         // CryptoKit added ChaCha20Poly1305 in macOS 10.15, which is our minimum target for macOS.
         public static bool IsSupported => true;
-        private byte[]? _key;
+        private FixedMemoryKeyBox _keyBox;
 
-        [MemberNotNull(nameof(_key))]
+        [MemberNotNull(nameof(_keyBox))]
         private void ImportKey(ReadOnlySpan<byte> key)
         {
             // We should only be calling this in the constructor, so there shouldn't be a previous key.
-            Debug.Assert(_key is null);
-
-            // Pin the array on the POH so that the GC doesn't move it around to allow zeroing to be more effective.
-            _key = GC.AllocateArray<byte>(key.Length, pinned: true);
-            key.CopyTo(_key);
+            Debug.Assert(_keyBox is null);
+            _keyBox = new FixedMemoryKeyBox(key);
         }
 
         private void EncryptCore(
@@ -30,14 +27,26 @@ private void EncryptCore(
             Span<byte> tag,
             ReadOnlySpan<byte> associatedData = default)
         {
-            CheckDisposed();
-            Interop.AppleCrypto.ChaCha20Poly1305Encrypt(
-                _key,
-                nonce,
-                plaintext,
-                ciphertext,
-                tag,
-                associatedData);
+            bool acquired = false;
+
+            try
+            {
+                _keyBox.DangerousAddRef(ref acquired);
+                Interop.AppleCrypto.ChaCha20Poly1305Encrypt(
+                    _keyBox.DangerousKeySpan,
+                    nonce,
+                    plaintext,
+                    ciphertext,
+                    tag,
+                    associatedData);
+            }
+            finally
+            {
+                if (acquired)
+                {
+                    _keyBox.DangerousRelease();
+                }
+            }
         }
 
         private void DecryptCore(
@@ -47,26 +56,28 @@ private void DecryptCore(
             Span<byte> plaintext,
             ReadOnlySpan<byte> associatedData = default)
         {
-            CheckDisposed();
-            Interop.AppleCrypto.ChaCha20Poly1305Decrypt(
-                _key,
-                nonce,
-                ciphertext,
-                tag,
-                plaintext,
-                associatedData);
-        }
+            bool acquired = false;
 
-        public void Dispose()
-        {
-            CryptographicOperations.ZeroMemory(_key);
-            _key = null;
+            try
+            {
+                _keyBox.DangerousAddRef(ref acquired);
+                Interop.AppleCrypto.ChaCha20Poly1305Decrypt(
+                    _keyBox.DangerousKeySpan,
+                    nonce,
+                    ciphertext,
+                    tag,
+                    plaintext,
+                    associatedData);
+            }
+            finally
+            {
+                if (acquired)
+                {
+                    _keyBox.DangerousRelease();
+                }
+            }
         }
 
-        [MemberNotNull(nameof(_key))]
-        private void CheckDisposed()
-        {
-            ObjectDisposedException.ThrowIf(_key is null, this);
-        }
+        public void Dispose() => _keyBox.Dispose();
     }
 }
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/CngKey.StandardProperties.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/CngKey.StandardProperties.cs
index 51584566f560..ceb080af407a 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/CngKey.StandardProperties.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/CngKey.StandardProperties.cs
@@ -15,12 +15,14 @@ namespace System.Security.Cryptography
     /// </summary>
     public sealed partial class CngKey : IDisposable
     {
-        //
-        // Key properties
-        //
-
         private const int CachedKeySizeUninitializedSentinel = -1;
-        private int _cachedKeySize = CachedKeySizeUninitializedSentinel;
+        private volatile int _cachedKeySize = CachedKeySizeUninitializedSentinel;
+
+        private volatile CngAlgorithm? _cachedAlgorithm;
+        private volatile bool _hasCachedAlgorithmGroup;
+        private volatile CngAlgorithmGroup? _cachedAlgorithmGroup;
+        private volatile bool _hasCachedProvider;
+        private volatile CngProvider? _cachedProvider;
 
         /// <summary>
         ///     Algorithm group this key can be used with
@@ -29,25 +31,38 @@ public CngAlgorithm Algorithm
         {
             get
             {
-                string algorithm = _keyHandle.GetPropertyAsString(KeyPropertyName.Algorithm, CngPropertyOptions.None)!;
-                // .NET Framework compat: Don't check for null. Just let CngAlgorithm handle it.
-                return new CngAlgorithm(algorithm);
-            }
+                if (_cachedAlgorithm is null || _keyHandle.IsClosed)
+                {
+                    string algorithm = _keyHandle.GetPropertyAsString(KeyPropertyName.Algorithm, CngPropertyOptions.None)!;
 
+                    // .NET Framework compat: Don't check for null. Just let CngAlgorithm handle it.
+                    _cachedAlgorithm = new CngAlgorithm(algorithm);
+                }
+
+                return _cachedAlgorithm;
+            }
         }
 
         /// <summary>
         ///     Name of the algorithm this key can be used with
         /// </summary>
         public CngAlgorithmGroup? AlgorithmGroup
-
         {
             get
             {
-                string? algorithmGroup = _keyHandle.GetPropertyAsString(KeyPropertyName.AlgorithmGroup, CngPropertyOptions.None);
-                if (algorithmGroup == null)
-                    return null;
-                return new CngAlgorithmGroup(algorithmGroup);
+                if (!_hasCachedAlgorithmGroup || _keyHandle.IsClosed)
+                {
+                    string? algorithmGroup = _keyHandle.GetPropertyAsString(KeyPropertyName.AlgorithmGroup, CngPropertyOptions.None);
+
+                    if (algorithmGroup is not null)
+                    {
+                        _cachedAlgorithmGroup = new CngAlgorithmGroup(algorithmGroup);
+                    }
+
+                    _hasCachedAlgorithmGroup = true;
+                }
+
+                return _cachedAlgorithmGroup;
             }
         }
 
@@ -242,7 +257,6 @@ int ComputeKeySize()
         ///     Usage restrictions on the key
         /// </summary>
         public CngKeyUsages KeyUsage
-
         {
             get
             {
@@ -279,10 +293,19 @@ public CngProvider? Provider
         {
             get
             {
-                string? provider = _providerHandle.GetPropertyAsString(ProviderPropertyName.Name, CngPropertyOptions.None);
-                if (provider == null)
-                    return null;
-                return new CngProvider(provider);
+                if (!_hasCachedProvider || _providerHandle.IsClosed)
+                {
+                    string? provider = _providerHandle.GetPropertyAsString(ProviderPropertyName.Name, CngPropertyOptions.None);
+
+                    if (provider is not null)
+                    {
+                        _cachedProvider = new CngProvider(provider);
+                    }
+
+                    _hasCachedProvider = true;
+                }
+
+                return _cachedProvider;
             }
         }
 
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/FixedMemoryKeyBox.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/FixedMemoryKeyBox.cs
new file mode 100644
index 000000000000..7f6f1fce35dc
--- /dev/null
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/FixedMemoryKeyBox.cs
@@ -0,0 +1,31 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.InteropServices;
+
+namespace System.Security.Cryptography
+{
+    internal sealed unsafe class FixedMemoryKeyBox : SafeHandle
+    {
+        private readonly int _length;
+
+        internal FixedMemoryKeyBox(ReadOnlySpan<byte> key) : base(IntPtr.Zero, ownsHandle: true)
+        {
+            void* memory = NativeMemory.Alloc((nuint)key.Length);
+            key.CopyTo(new Span<byte>(memory, key.Length));
+            SetHandle((IntPtr)memory);
+            _length = key.Length;
+        }
+
+        internal ReadOnlySpan<byte> DangerousKeySpan => new ReadOnlySpan<byte>((void*)handle, _length);
+
+        protected override bool ReleaseHandle()
+        {
+            CryptographicOperations.ZeroMemory(new Span<byte>((void*)handle, _length));
+            NativeMemory.Free((void*)handle);
+            return true;
+        }
+
+        public override bool IsInvalid => handle == IntPtr.Zero;
+    }
+}
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/HashProviderCng.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/HashProviderCng.cs
index 831b846a7444..36f4767e9939 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/HashProviderCng.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/HashProviderCng.cs
@@ -39,7 +39,7 @@ internal HashProviderCng(string hashAlgId, ReadOnlySpan<byte> key, bool isHmac)
             // So keep hHash trapped in this scope to prevent (mis-)use of it.
             {
                 SafeBCryptHashHandle hHash;
-                NTSTATUS ntStatus = Interop.BCrypt.BCryptCreateHash(_hAlgorithm, out hHash, IntPtr.Zero, 0, key, key == null ? 0 : key.Length, BCryptCreateHashFlags.BCRYPT_HASH_REUSABLE_FLAG);
+                NTSTATUS ntStatus = Interop.BCrypt.BCryptCreateHash(_hAlgorithm, out hHash, IntPtr.Zero, 0, key, key.Length, BCryptCreateHashFlags.BCRYPT_HASH_REUSABLE_FLAG);
                 if (ntStatus == NTSTATUS.STATUS_INVALID_PARAMETER)
                 {
                     hHash.Dispose();
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/LiteHash.Apple.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/LiteHash.Apple.cs
index 57e63d8dc698..0b2fde720a20 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/LiteHash.Apple.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/LiteHash.Apple.cs
@@ -64,7 +64,7 @@ internal LiteHash(PAL_HashAlgorithm algorithm)
                 throw new PlatformNotSupportedException(
                     SR.Format(
                         SR.Cryptography_UnknownHashAlgorithm,
-                        Enum.GetName(typeof(PAL_HashAlgorithm), algorithm)));
+                        Enum.GetName(algorithm)));
             }
 
             if (_ctx.IsInvalid)
@@ -159,7 +159,7 @@ internal LiteHmac(PAL_HashAlgorithm algorithm, ReadOnlySpan<byte> key, bool prei
                 throw new PlatformNotSupportedException(
                     SR.Format(
                         SR.Cryptography_UnknownHashAlgorithm,
-                        Enum.GetName(typeof(Interop.AppleCrypto.PAL_HashAlgorithm), algorithm)));
+                        Enum.GetName(algorithm)));
             }
 
             if (_ctx.IsInvalid)
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/OidLookup.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/OidLookup.cs
index d2d07d42e572..e2de9d9bc5aa 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/OidLookup.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/OidLookup.cs
@@ -133,17 +133,14 @@ static OidLookup()
 
         private static void InitializeLookupDictionaries()
         {
-            static void AddEntry(string oid, string primaryFriendlyName, string[]? additionalFriendlyNames = null)
+            static void AddEntry(string oid, string primaryFriendlyName, ReadOnlySpan<string> additionalFriendlyNames = default)
             {
                 s_oidToFriendlyName.Add(oid, primaryFriendlyName);
                 s_friendlyNameToOid.Add(primaryFriendlyName, oid);
 
-                if (additionalFriendlyNames != null)
+                foreach (string additionalName in additionalFriendlyNames)
                 {
-                    foreach (var additionalName in additionalFriendlyNames)
-                    {
-                        s_friendlyNameToOid.Add(additionalName, oid);
-                    }
+                    s_friendlyNameToOid.Add(additionalName, oid);
                 }
             }
 
@@ -200,10 +197,9 @@ static void AddEntry(string oid, string primaryFriendlyName, string[]? additiona
             AddEntry("1.3.133.16.840.63.0.2", "ECDH_STD_SHA1_KDF");
             AddEntry("1.3.132.1.11.1", "ECDH_STD_SHA256_KDF");
             AddEntry("1.3.132.1.11.2", "ECDH_STD_SHA384_KDF");
-#pragma warning disable CA1861 // Avoid constant arrays as arguments. Loaded by static constructor
-            AddEntry("1.2.840.10045.3.1.7", "ECDSA_P256", new[] { "nistP256", "secP256r1", "x962P256v1", "ECDH_P256" });
-            AddEntry("1.3.132.0.34", "ECDSA_P384", new[] { "nistP384", "secP384r1", "ECDH_P384" });
-            AddEntry("1.3.132.0.35", "ECDSA_P521", new[] { "nistP521", "secP521r1", "ECDH_P521" });
+            AddEntry("1.2.840.10045.3.1.7", "ECDSA_P256", ["nistP256", "secP256r1", "x962P256v1", "ECDH_P256"]);
+            AddEntry("1.3.132.0.34", "ECDSA_P384", ["nistP384", "secP384r1", "ECDH_P384"]);
+            AddEntry("1.3.132.0.35", "ECDSA_P521", ["nistP521", "secP521r1", "ECDH_P521"]);
             AddEntry("1.2.840.113549.1.9.16.3.5", "ESDH");
             AddEntry("2.5.4.42", "G");
             AddEntry("2.5.4.43", "I");
@@ -230,8 +226,7 @@ static void AddEntry(string oid, string primaryFriendlyName, string[]? additiona
             AddEntry("1.2.840.113549.1.1.1", "RSA");
             AddEntry("1.2.840.113549.1.1.7", "RSAES_OAEP");
             AddEntry("1.2.840.113549.1.1.10", "RSASSA-PSS");
-            AddEntry("2.5.4.8", "S", new[] { "ST" });
-#pragma warning restore CA1861 // Avoid constant arrays as arguments
+            AddEntry("2.5.4.8", "S", ["ST"]);
             AddEntry("1.3.132.0.9", "secP160k1");
             AddEntry("1.3.132.0.8", "secP160r1");
             AddEntry("1.3.132.0.30", "secP160r2");
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/RSABCrypt.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/RSABCrypt.cs
index c5ac24ab029b..6e1fbbdb23d9 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/RSABCrypt.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/RSABCrypt.cs
@@ -20,6 +20,7 @@ internal sealed class RSABCrypt : RSA
 
         private SafeBCryptKeyHandle? _key;
         private int _lastKeySize;
+        private bool _publicOnly;
 
         internal RSABCrypt()
         {
@@ -51,11 +52,11 @@ private SafeBCryptKeyHandle GetKey()
 
             SafeBCryptKeyHandle newKey = Interop.BCrypt.BCryptGenerateKeyPair(s_algHandle, keySize);
             Interop.BCrypt.BCryptFinalizeKeyPair(newKey);
-            SetKey(newKey);
+            SetKey(newKey, publicOnly: false);
             return newKey;
         }
 
-        private void SetKey(SafeBCryptKeyHandle newKey)
+        private void SetKey(SafeBCryptKeyHandle newKey, bool publicOnly)
         {
             Debug.Assert(!newKey.IsInvalid);
 
@@ -65,6 +66,7 @@ private void SetKey(SafeBCryptKeyHandle newKey)
 
             SafeBCryptKeyHandle? oldKey = Interlocked.Exchange(ref _key, newKey);
             ForceSetKeySize(keySize);
+            _publicOnly = publicOnly;
             oldKey?.Dispose();
         }
 
@@ -112,7 +114,7 @@ public override void ImportParameters(RSAParameters parameters)
                 CryptoPool.Return(keyBlob);
             }
 
-            SetKey(newKey);
+            SetKey(newKey, publicOnly: parameters.D is null);
         }
 
         public override byte[] Encrypt(byte[] data, RSAEncryptionPadding padding)
@@ -190,6 +192,8 @@ public override bool TryDecrypt(
                 throw new CryptographicException(SR.Cryptography_RSA_DecryptWrongSize);
             }
 
+            ThrowIfPublicOnly();
+
             switch (padding.Mode)
             {
                 case RSAEncryptionPaddingMode.Pkcs1:
@@ -261,6 +265,7 @@ public override bool TrySignHash(
             string? hashAlgorithmName = hashAlgorithm.Name;
             ArgumentException.ThrowIfNullOrEmpty(hashAlgorithmName, nameof(hashAlgorithm));
             ArgumentNullException.ThrowIfNull(padding);
+            ThrowIfPublicOnly();
 
             SafeBCryptKeyHandle key = GetKey();
 
@@ -426,5 +431,13 @@ private void ThrowIfDisposed()
         {
             ObjectDisposedException.ThrowIf(_lastKeySize < 0, this);
         }
+
+        private void ThrowIfPublicOnly()
+        {
+            if (_publicOnly)
+            {
+                throw new CryptographicException(SR.Cryptography_CSP_NoPrivateKey);
+            }
+        }
     }
 }
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/SHA3_512.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/SHA3_512.cs
index 1e6cd209d76e..689b54c71224 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/SHA3_512.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/SHA3_512.cs
@@ -29,7 +29,7 @@ public abstract class SHA3_512 : HashAlgorithm
         public const int HashSizeInBytes = HashSizeInBits / 8;
 
         /// <summary>
-        /// Initializes a new instance of <see cref="SHA3_256" />.
+        /// Initializes a new instance of <see cref="SHA3_512" />.
         /// </summary>
         protected SHA3_512()
         {
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AndroidCertificatePal.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AndroidCertificatePal.cs
index f75879604e3f..30a6cdbce3c2 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AndroidCertificatePal.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AndroidCertificatePal.cs
@@ -118,8 +118,9 @@ ref MemoryMarshal.GetReference(rawData),
 
         private static AndroidCertificatePal ReadPkcs12(ReadOnlySpan<byte> rawData, SafePasswordHandle password, bool ephemeralSpecified)
         {
-            using (var reader = new AndroidPkcs12Reader(rawData))
+            using (var reader = new AndroidPkcs12Reader())
             {
+                reader.ParsePkcs12(rawData);
                 reader.Decrypt(password, ephemeralSpecified);
 
                 UnixPkcs12Reader.CertAndKey certAndKey = reader.GetSingleCert();
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AndroidPkcs12Reader.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AndroidPkcs12Reader.cs
index a22e15530798..10800a71d537 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AndroidPkcs12Reader.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AndroidPkcs12Reader.cs
@@ -11,17 +11,17 @@ namespace System.Security.Cryptography.X509Certificates
 {
     internal sealed class AndroidPkcs12Reader : UnixPkcs12Reader
     {
-        internal AndroidPkcs12Reader(ReadOnlySpan<byte> data)
+        internal AndroidPkcs12Reader()
         {
-            ParsePkcs12(data);
         }
 
         public static bool IsPkcs12(ReadOnlySpan<byte> data)
         {
             try
             {
-                using (var reader = new AndroidPkcs12Reader(data))
+                using (var reader = new AndroidPkcs12Reader())
                 {
+                    reader.ParsePkcs12(data);
                     return true;
                 }
             }
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AppleCertificatePal.Pkcs12.iOS.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AppleCertificatePal.Pkcs12.iOS.cs
index 26a1f569abe1..baa791d59f34 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AppleCertificatePal.Pkcs12.iOS.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AppleCertificatePal.Pkcs12.iOS.cs
@@ -14,8 +14,9 @@ private static AppleCertificatePal ImportPkcs12(
             SafePasswordHandle password,
             bool ephemeralSpecified)
         {
-            using (ApplePkcs12Reader reader = new ApplePkcs12Reader(rawData))
+            using (ApplePkcs12Reader reader = new ApplePkcs12Reader())
             {
+                reader.ParsePkcs12(rawData);
                 reader.Decrypt(password, ephemeralSpecified);
                 return ImportPkcs12(reader.GetSingleCert());
             }
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AppleCertificatePal.Pkcs12.macOS.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AppleCertificatePal.Pkcs12.macOS.cs
index 118f7067691e..6e329434278d 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AppleCertificatePal.Pkcs12.macOS.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/AppleCertificatePal.Pkcs12.macOS.cs
@@ -15,8 +15,9 @@ private static AppleCertificatePal ImportPkcs12(
             bool exportable,
             SafeKeychainHandle keychain)
         {
-            using (ApplePkcs12Reader reader = new ApplePkcs12Reader(rawData))
+            using (ApplePkcs12Reader reader = new ApplePkcs12Reader())
             {
+                reader.ParsePkcs12(rawData);
                 reader.Decrypt(password, ephemeralSpecified: false);
 
                 UnixPkcs12Reader.CertAndKey certAndKey = reader.GetSingleCert();
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/ApplePkcs12Reader.iOS.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/ApplePkcs12Reader.iOS.cs
index 5900a979ed83..e493436e01d7 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/ApplePkcs12Reader.iOS.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/ApplePkcs12Reader.iOS.cs
@@ -10,9 +10,8 @@ namespace System.Security.Cryptography.X509Certificates
 {
     internal sealed class ApplePkcs12Reader : UnixPkcs12Reader
     {
-        internal ApplePkcs12Reader(ReadOnlySpan<byte> data)
+        internal ApplePkcs12Reader()
         {
-            ParsePkcs12(data);
         }
 
         protected override ICertificatePalCore ReadX509Der(ReadOnlyMemory<byte> data)
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/ApplePkcs12Reader.macOS.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/ApplePkcs12Reader.macOS.cs
index 7c1121a6c86a..8f3274d15d23 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/ApplePkcs12Reader.macOS.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/ApplePkcs12Reader.macOS.cs
@@ -11,9 +11,8 @@ namespace System.Security.Cryptography.X509Certificates
 {
     internal sealed class ApplePkcs12Reader : UnixPkcs12Reader
     {
-        internal ApplePkcs12Reader(ReadOnlySpan<byte> data)
+        internal ApplePkcs12Reader()
         {
-            ParsePkcs12(data);
         }
 
         protected override ICertificatePalCore ReadX509Der(ReadOnlyMemory<byte> data)
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/CertificatePal.Windows.PrivateKey.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/CertificatePal.Windows.PrivateKey.cs
index e80641afc181..947d7bbee5e9 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/CertificatePal.Windows.PrivateKey.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/CertificatePal.Windows.PrivateKey.cs
@@ -62,9 +62,21 @@ public bool HasPrivateKey
 
         public ECDiffieHellman? GetECDiffieHellmanPrivateKey()
         {
+            static ECDiffieHellmanCng? FromCngKey(CngKey cngKey)
+            {
+                if (cngKey.AlgorithmGroup == CngAlgorithmGroup.ECDiffieHellman)
+                {
+                    return new ECDiffieHellmanCng(cngKey, transferOwnership: true);
+                }
+
+                // We might be getting an ECDSA key here. CNG allows ECDH to be either ECDH or ECDSA, however if
+                // the AlgorithmGroup is ECDSA, then it cannot be used for ECDH, even though both of them are ECC keys.
+                return null;
+            }
+
             return GetPrivateKey<ECDiffieHellman>(
                 csp => throw new NotSupportedException(SR.NotSupported_ECDiffieHellman_Csp),
-                cngKey => new ECDiffieHellmanCng(cngKey, transferOwnership: true)
+                FromCngKey
             );
         }
 
@@ -199,7 +211,7 @@ public ICertificatePal CopyWithPrivateKey(RSA rsa)
             }
         }
 
-        private T? GetPrivateKey<T>(Func<CspParameters, T> createCsp, Func<CngKey, T> createCng) where T : AsymmetricAlgorithm
+        private T? GetPrivateKey<T>(Func<CspParameters, T> createCsp, Func<CngKey, T?> createCng) where T : AsymmetricAlgorithm
         {
             using (SafeCertContextHandle certContext = GetCertContext())
             {
@@ -207,7 +219,15 @@ public ICertificatePal CopyWithPrivateKey(RSA rsa)
                 if (ncryptKey != null)
                 {
                     CngKey cngKey = CngKey.OpenNoDuplicate(ncryptKey, cngHandleOptions);
-                    return createCng(cngKey);
+                    T? result = createCng(cngKey);
+
+                    // Dispose of cngKey if its ownership did not transfer to the underlying algorithm.
+                    if (result is null)
+                    {
+                        cngKey.Dispose();
+                    }
+
+                    return result;
                 }
             }
 
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/CertificateRequest.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/CertificateRequest.cs
index c3084c964d7d..405d37188e37 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/CertificateRequest.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/CertificateRequest.cs
@@ -812,7 +812,7 @@ public X509Certificate2 Create(
 
             if (notAfter < notBefore)
                 throw new ArgumentException(SR.Cryptography_CertReq_DatesReversed);
-            if (serialNumber == null || serialNumber.Length < 1)
+            if (serialNumber.Length < 1)
                 throw new ArgumentException(SR.Arg_EmptyOrNullArray, nameof(serialNumber));
 
             byte[] signatureAlgorithm = generator.GetSignatureAlgorithmIdentifier(HashAlgorithm);
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/OpenSslPkcs12Reader.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/OpenSslPkcs12Reader.cs
index 7ce3eb5f3197..c0a4616273c0 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/OpenSslPkcs12Reader.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/OpenSslPkcs12Reader.cs
@@ -9,9 +9,8 @@ namespace System.Security.Cryptography.X509Certificates
 {
     internal sealed class OpenSslPkcs12Reader : UnixPkcs12Reader
     {
-        private OpenSslPkcs12Reader(ReadOnlySpan<byte> data)
+        private OpenSslPkcs12Reader()
         {
-            ParsePkcs12(data);
         }
 
         protected override ICertificatePalCore ReadX509Der(ReadOnlyMemory<byte> data)
@@ -89,7 +88,8 @@ private static bool TryRead(
 
             try
             {
-                pkcs12Reader = new OpenSslPkcs12Reader(data);
+                pkcs12Reader = new OpenSslPkcs12Reader();
+                pkcs12Reader.ParsePkcs12(data);
                 return true;
             }
             catch (CryptographicException e)
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/StorePal.Android.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/StorePal.Android.cs
index 0c3e92bfd94c..962287bc2630 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/StorePal.Android.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/StorePal.Android.cs
@@ -118,8 +118,9 @@ private static ICertificatePal[] ReadPkcs12Collection(
             SafePasswordHandle password,
             bool ephemeralSpecified)
         {
-            using (var reader = new AndroidPkcs12Reader(rawData))
+            using (var reader = new AndroidPkcs12Reader())
             {
+                reader.ParsePkcs12(rawData);
                 reader.Decrypt(password, ephemeralSpecified);
 
                 ICertificatePal[] certs = new ICertificatePal[reader.GetCertCount()];
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/StorePal.iOS.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/StorePal.iOS.cs
index ae90eabcf23a..edccc0b79e33 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/StorePal.iOS.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/StorePal.iOS.cs
@@ -46,10 +46,11 @@ private static ILoaderPal FromBlob(ReadOnlySpan<byte> rawData, SafePasswordHandl
             if (contentType == X509ContentType.Pkcs12)
             {
                 X509Certificate.EnforceIterationCountLimit(ref rawData, readingFromFile, password.PasswordProvided);
-                ApplePkcs12Reader reader = new ApplePkcs12Reader(rawData);
+                ApplePkcs12Reader reader = new ApplePkcs12Reader();
 
                 try
                 {
+                    reader.ParsePkcs12(rawData);
                     reader.Decrypt(password, ephemeralSpecified);
                     return new ApplePkcs12CertLoader(reader, password);
                 }
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/StorePal.macOS.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/StorePal.macOS.cs
index af87c145119b..b424e971b09e 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/StorePal.macOS.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/StorePal.macOS.cs
@@ -72,10 +72,11 @@ private static ApplePkcs12CertLoader ImportPkcs12(
             bool ephemeralSpecified,
             SafeKeychainHandle keychain)
         {
-            ApplePkcs12Reader reader = new ApplePkcs12Reader(rawData);
+            ApplePkcs12Reader reader = new ApplePkcs12Reader();
 
             try
             {
+                reader.ParsePkcs12(rawData);
                 reader.Decrypt(password, ephemeralSpecified);
                 return new ApplePkcs12CertLoader(reader, keychain, password, exportable);
             }
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/UnixPkcs12Reader.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/UnixPkcs12Reader.cs
index b4f39384cf0e..1f5a24fa15be 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/UnixPkcs12Reader.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/UnixPkcs12Reader.cs
@@ -5,6 +5,7 @@
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Formats.Asn1;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Security.Cryptography.Asn1;
 using System.Security.Cryptography.Asn1.Pkcs12;
@@ -30,7 +31,7 @@ internal abstract class UnixPkcs12Reader : IDisposable
         protected abstract ICertificatePalCore ReadX509Der(ReadOnlyMemory<byte> data);
         protected abstract AsymmetricAlgorithm LoadKey(ReadOnlyMemory<byte> safeBagBagValue);
 
-        protected void ParsePkcs12(ReadOnlySpan<byte> data)
+        internal void ParsePkcs12(ReadOnlySpan<byte> data)
         {
             try
             {
@@ -42,10 +43,19 @@ protected void ParsePkcs12(ReadOnlySpan<byte> data)
 
                 unsafe
                 {
-                    IntPtr tmpPtr = Marshal.AllocHGlobal(encodedData.Length);
-                    Span<byte> tmpSpan = new Span<byte>((byte*)tmpPtr, encodedData.Length);
-                    encodedData.CopyTo(tmpSpan);
-                    _tmpManager = new PointerMemoryManager<byte>((void*)tmpPtr, encodedData.Length);
+                    void* tmpPtr = NativeMemory.Alloc((uint)encodedData.Length);
+
+                    try
+                    {
+                        Span<byte> tmpSpan = new Span<byte>((byte*)tmpPtr, encodedData.Length);
+                        encodedData.CopyTo(tmpSpan);
+                        _tmpManager = new PointerMemoryManager<byte>(tmpPtr, encodedData.Length);
+                    }
+                    catch
+                    {
+                        NativeMemory.Free(tmpPtr);
+                        throw;
+                    }
                 }
 
                 ReadOnlyMemory<byte> tmpMemory = _tmpManager.Memory;
@@ -115,26 +125,27 @@ public void Dispose()
             // Generally, having a MemoryManager cleaned up in a Dispose is a bad practice.
             // In this case, the UnixPkcs12Reader is only ever created in a using statement,
             // never accessed by a second thread, and there isn't a manual call to Dispose
-            // mixed in anywhere.
-            if (_tmpManager != null)
+            // mixed in anywhere outside of an aborted allocation path.
+
+            PointerMemoryManager<byte>? manager = _tmpManager;
+            _tmpManager = null;
+
+            if (manager != null)
             {
                 unsafe
                 {
-                    Span<byte> tmp = _tmpManager.GetSpan();
+                    Span<byte> tmp = manager.GetSpan();
                     CryptographicOperations.ZeroMemory(tmp);
-
-                    fixed (byte* ptr = tmp)
-                    {
-                        Marshal.FreeHGlobal((IntPtr)ptr);
-                    }
+                    NativeMemory.Free(Unsafe.AsPointer(ref MemoryMarshal.GetReference(tmp)));
                 }
 
-                ((IDisposable)_tmpManager).Dispose();
-                _tmpManager = null;
+                ((IDisposable)manager).Dispose();
             }
 
-            ContentInfoAsn[]? rentedContents = Interlocked.Exchange(ref _safeContentsValues, null);
-            CertAndKey[]? rentedCerts = Interlocked.Exchange(ref _certs, null);
+            ContentInfoAsn[]? rentedContents = _safeContentsValues;
+            CertAndKey[]? rentedCerts = _certs;
+            _safeContentsValues = null;
+            _certs = null;
 
             if (rentedContents != null)
             {
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X500NameEncoder.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X500NameEncoder.cs
index d2509eca3c3d..865b18e09518 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X500NameEncoder.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X500NameEncoder.cs
@@ -444,7 +444,6 @@ private static List<byte[]> ParseDistinguishedName(
                 // then some whitespace.
                 case ParseState.MaybeEndQuote:
                 case ParseState.SeekComma:
-                    Debug.Assert(tagOid != null);
                     Debug.Assert(valueStart != -1);
                     Debug.Assert(valueEnd != -1);
 
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509Chain.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509Chain.cs
index 50eaba6eb91b..72707a421665 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509Chain.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509Chain.cs
@@ -137,26 +137,57 @@ internal bool Build(X509Certificate2 certificate, bool throwOnException)
                     chainPolicy.UrlRetrievalTimeout,
                     chainPolicy.DisableCertificateDownloads);
 
-                if (_pal == null)
-                    return false;
-
-                _chainElements = new X509ChainElementCollection(_pal.ChainElements!);
-
-                Exception? verificationException;
-                bool? verified = _pal.Verify(chainPolicy.VerificationFlags, out verificationException);
-                if (!verified.HasValue)
+                bool success = false;
+                if (_pal is not null)
                 {
-                    if (throwOnException)
-                    {
-                        throw verificationException!;
-                    }
-                    else
+                    _chainElements = new X509ChainElementCollection(_pal.ChainElements!);
+
+                    Exception? verificationException;
+                    bool? verified = _pal.Verify(chainPolicy.VerificationFlags, out verificationException);
+                    if (!verified.HasValue)
                     {
-                        verified = false;
+                        if (throwOnException)
+                        {
+                            throw verificationException!;
+                        }
+                        else
+                        {
+                            verified = false;
+                        }
                     }
+
+                    success = verified.Value;
+                }
+
+                // There are two reasons success might be false here.
+                //
+                // The most common reason is that we built the chain but the chain appears to run
+                // afoul of policy. This is represented by BuildChain returning a non-null object
+                // and storing potential policy violations in the chain structure. The public Build
+                // method returns false to the caller, and the caller can inspect the ChainStatus
+                // and ChainElements properties and evaluate the failure reason against app-level
+                // policies. If the caller does not care about these policy violations, they can
+                // choose to ignore them and to treat chain building as successful.
+                //
+                // The other type of failure is that BuildChain simply can't build the chain at all.
+                // Perhaps something within the certificate is not valid or is unsupported, or perhaps
+                // there's an internal failure within the OS layer we're invoking, etc. Whatever the
+                // reason, we're not meaningfully able to initialize the ChainStatus property, which
+                // means callers may observe a non-empty list of policy violations. Depending on the
+                // caller's logic, they might incorrectly interpret this as there being no policy
+                // violations at all, which means they might treat this condition as success.
+                //
+                // To avoid callers misinterpreting this latter condition as success, we'll throw an
+                // exception, which matches general .NET API behavior when a method cannot complete
+                // its objective. If throwOnException is false, it means the caller explicitly wants
+                // to suppress exceptions and normalize them to a false return value.
+
+                if (!success && throwOnException && _pal?.ChainStatus is not { Length: > 0 })
+                {
+                    throw new CryptographicException(SR.Cryptography_X509_ChainBuildingFailed);
                 }
 
-                return verified.Value;
+                return success;
             }
         }
 
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509Pal.macOS.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509Pal.macOS.cs
index c50806301ffe..d45cfe0b7aeb 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509Pal.macOS.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509Pal.macOS.cs
@@ -111,6 +111,7 @@ private static DSA DecodeDsaPublicKey(byte[] encodedKeyValue, byte[] encodedPara
             public X509ContentType GetCertContentType(ReadOnlySpan<byte> rawData)
             {
                 const int errSecUnknownFormat = -25257;
+
                 if (rawData.IsEmpty)
                 {
                     // Throw to match Windows and Unix behavior.
@@ -119,7 +120,7 @@ public X509ContentType GetCertContentType(ReadOnlySpan<byte> rawData)
 
                 X509ContentType contentType = Interop.AppleCrypto.X509GetContentType(rawData);
 
-                // Apple doesn't seem to recognize PFX files with no MAC, so try a quick maybe-it's-a-PFX test
+                // Apple's native check can't check for PKCS12, so do a quick decode test to see if it is PKCS12 / PFX.
                 if (contentType == X509ContentType.Unknown)
                 {
                     try
@@ -128,9 +129,11 @@ public X509ContentType GetCertContentType(ReadOnlySpan<byte> rawData)
                         {
                             fixed (byte* pin = rawData)
                             {
+                                AsnValueReader reader = new AsnValueReader(rawData, AsnEncodingRules.BER);
+
                                 using (var manager = new PointerMemoryManager<byte>(pin, rawData.Length))
                                 {
-                                    PfxAsn.Decode(manager.Memory, AsnEncodingRules.BER);
+                                    PfxAsn.Decode(ref reader, manager.Memory, out _);
                                 }
 
                                 contentType = X509ContentType.Pkcs12;
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509SubjectKeyIdentifierExtension.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509SubjectKeyIdentifierExtension.cs
index 52c4ad6f8b95..a97061a885f7 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509SubjectKeyIdentifierExtension.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509SubjectKeyIdentifierExtension.cs
@@ -186,11 +186,32 @@ private static byte[] GenerateSubjectKeyIdentifierFromPublicKey(PublicKey key, X
                 case X509SubjectKeyIdentifierHashAlgorithm.CapiSha1:
                     // CAPI SHA1 is the SHA-1 hash over the whole SubjectPublicKeyInfo
                     return HashSubjectPublicKeyInfo(key, HashAlgorithmName.SHA1);
+                case X509SubjectKeyIdentifierHashAlgorithm.Sha256:
+                    return HashSubjectPublicKeyInfo(key, HashAlgorithmName.SHA256);
+                case X509SubjectKeyIdentifierHashAlgorithm.Sha384:
+                    return HashSubjectPublicKeyInfo(key, HashAlgorithmName.SHA384);
+                case X509SubjectKeyIdentifierHashAlgorithm.Sha512:
+                    return HashSubjectPublicKeyInfo(key, HashAlgorithmName.SHA512);
+                case X509SubjectKeyIdentifierHashAlgorithm.ShortSha256:
+                    return HashSubjectPublicKeyLeft160Bits(key, HashAlgorithmName.SHA256);
+                case X509SubjectKeyIdentifierHashAlgorithm.ShortSha384:
+                    return HashSubjectPublicKeyLeft160Bits(key, HashAlgorithmName.SHA384);
+                case X509SubjectKeyIdentifierHashAlgorithm.ShortSha512:
+                    return HashSubjectPublicKeyLeft160Bits(key, HashAlgorithmName.SHA512);
                 default:
                     throw new ArgumentException(SR.Format(SR.Arg_EnumIllegalVal, algorithm), nameof(algorithm));
             }
         }
 
+        private static byte[] HashSubjectPublicKeyLeft160Bits(PublicKey key, HashAlgorithmName hashAlgorithmName)
+        {
+            const int TruncateSize = 160 / 8;
+            Span<byte> hash = stackalloc byte[512 / 8]; // Largest known hash is 512-bits.
+            int written = CryptographicOperations.HashData(hashAlgorithmName, key.EncodedKeyValue.RawData, hash);
+            Debug.Assert(written >= TruncateSize);
+            return hash.Slice(0, TruncateSize).ToArray();
+        }
+
         private static byte[] HashSubjectPublicKeyInfo(PublicKey key, HashAlgorithmName hashAlgorithmName)
         {
             Span<byte> hash = stackalloc byte[512 / 8]; // Largest known hash is 512-bits.
diff --git a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509SubjectKeyIdentifierHashAlgorithm.cs b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509SubjectKeyIdentifierHashAlgorithm.cs
index 5210a244455e..efe5777cc5b8 100644
--- a/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509SubjectKeyIdentifierHashAlgorithm.cs
+++ b/src/libraries/System.Security.Cryptography/src/System/Security/Cryptography/X509Certificates/X509SubjectKeyIdentifierHashAlgorithm.cs
@@ -11,5 +11,35 @@ public enum X509SubjectKeyIdentifierHashAlgorithm
         Sha1 = 0,
         ShortSha1 = 1,
         CapiSha1 = 2,
+
+        /// <summary>
+        /// The SHA-256 hash over the SubjectPublicKeyInfo as described in RFC 7093.
+        /// </summary>
+        Sha256 = 3,
+
+        /// <summary>
+        /// The SHA-384 hash over the SubjectPublicKeyInfo as described in RFC 7093.
+        /// </summary>
+        Sha384 = 4,
+
+        /// <summary>
+        /// The SHA-512 hash over the SubjectPublicKeyInfo as described in RFC 7093.
+        /// </summary>
+        Sha512 = 5,
+
+        /// <summary>
+        /// The SHA-256 hash over the subjectPublicKey truncated to the leftmost 160-bits as described in RFC 7093.
+        /// </summary>
+        ShortSha256 = 6,
+
+        /// <summary>
+        /// The SHA-384 hash over the subjectPublicKey truncated to the leftmost 160-bits as described in RFC 7093.
+        /// </summary>
+        ShortSha384 = 7,
+
+        /// <summary>
+        /// The SHA-512 hash over the subjectPublicKey truncated to the leftmost 160-bits as described in RFC 7093.
+        /// </summary>
+        ShortSha512 = 8,
     }
 }
diff --git a/src/libraries/System.Security.Cryptography/tests/KmacTestDriver.cs b/src/libraries/System.Security.Cryptography/tests/KmacTestDriver.cs
index 24df42b3d411..6a29bae1a17b 100644
--- a/src/libraries/System.Security.Cryptography/tests/KmacTestDriver.cs
+++ b/src/libraries/System.Security.Cryptography/tests/KmacTestDriver.cs
@@ -7,6 +7,7 @@
 using System.Text;
 using System.Threading;
 using System.Threading.Tasks;
+using Microsoft.DotNet.RemoteExecutor;
 using Microsoft.DotNet.XUnitExtensions;
 using Xunit;
 
@@ -1067,6 +1068,23 @@ public void IsSupported_AgreesWithPlatform()
             Assert.Equal(TKmacTrait.IsSupported, PlatformSupportsKmac());
         }
 
+        [ConditionalFact(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))]
+        public void IsSupported_InitializesCrypto()
+        {
+            if (!IsSupported)
+            {
+                throw new SkipTestException("Algorithm is not supported on current platform.");
+            }
+
+            // This ensures that KMAC is the first cryptographic algorithm touched in the process, which kicks off
+            // the initialization of the crypto layer on some platforms. Running in a remote executor ensures no other
+            // test has pre-initialized anything.
+            RemoteExecutor.Invoke(static () =>
+            {
+                return TKmacTrait.IsSupported ? RemoteExecutor.SuccessExitCode : 0;
+            }).Dispose();
+        }
+
         private static async Task AssertOneShotsThrowAnyAsync<TException>(
             int? keySize = null,
             int? customizationStringSize = null,
diff --git a/src/libraries/System.Security.Cryptography/tests/Resources/Strings.resx b/src/libraries/System.Security.Cryptography/tests/Resources/Strings.resx
index 684a789df83a..2588869aee80 100644
--- a/src/libraries/System.Security.Cryptography/tests/Resources/Strings.resx
+++ b/src/libraries/System.Security.Cryptography/tests/Resources/Strings.resx
@@ -75,6 +75,9 @@
   <data name="Cryptography_Invalid_IA5String" xml:space="preserve">
     <value>The string contains a character not in the 7 bit ASCII character set.</value>
   </data>
+  <data name="Cryptography_KeyNotExtractable" xml:space="preserve">
+    <value>The key does not permit being exported.</value>
+  </data>
   <data name="Cryptography_X509Store_WouldModifyUserTrust" xml:space="preserve">
     <value>Removing the requested certificate would modify user trust settings, and has been denied.</value>
   </data>
diff --git a/src/libraries/System.Security.Cryptography/tests/X509Certificates/CertTests.cs b/src/libraries/System.Security.Cryptography/tests/X509Certificates/CertTests.cs
index 74e10d31338c..a2479d92b718 100644
--- a/src/libraries/System.Security.Cryptography/tests/X509Certificates/CertTests.cs
+++ b/src/libraries/System.Security.Cryptography/tests/X509Certificates/CertTests.cs
@@ -27,6 +27,108 @@ public CertTests(ITestOutputHelper output)
             _log = output;
         }
 
+        [Fact]
+        public static void PrivateKey_FromCertificate_CanExportPrivate_ECDsa()
+        {
+            using (ECDsa ca = ECDsa.Create(ECCurve.NamedCurves.nistP256))
+            {
+                CertificateRequest req = new("CN=potatos", ca, HashAlgorithmName.SHA256);
+
+                using (X509Certificate2 cert = req.CreateSelfSigned(DateTimeOffset.Now, DateTimeOffset.Now.AddDays(3)))
+                using (ECDsa certKey = cert.GetECDsaPrivateKey())
+                {
+                    ECParameters certParameters = certKey.ExportParameters(true);
+                    ECParameters originalParameters = ca.ExportParameters(true);
+                    AssertExtensions.SequenceEqual(originalParameters.D, certParameters.D);
+                }
+            }
+        }
+
+        [Fact]
+        public static void PrivateKey_FromCertificate_CanExportPrivate_RSA()
+        {
+            using (RSA ca = RSA.Create(2048))
+            {
+                CertificateRequest req = new("CN=potatos", ca, HashAlgorithmName.SHA256, RSASignaturePadding.Pkcs1);
+
+                using (X509Certificate2 cert = req.CreateSelfSigned(DateTimeOffset.Now, DateTimeOffset.Now.AddDays(3)))
+                using (RSA certKey = cert.GetRSAPrivateKey())
+                {
+                    RSAParameters certParameters = certKey.ExportParameters(true);
+                    RSAParameters originalParameters = ca.ExportParameters(true);
+                    AssertExtensions.SequenceEqual(originalParameters.P, certParameters.P);
+                    AssertExtensions.SequenceEqual(originalParameters.Q, certParameters.Q);
+                }
+            }
+        }
+
+        [Fact]
+        [SkipOnPlatform(PlatformSupport.MobileAppleCrypto, "DSA is not available")]
+        public static void PrivateKey_FromCertificate_CanExportPrivate_DSA()
+        {
+            DSAParameters originalParameters = DSATestData.GetDSA1024Params();
+
+            using (DSA ca = DSA.Create())
+            {
+                ca.ImportParameters(originalParameters);
+                DSAX509SignatureGenerator gen = new DSAX509SignatureGenerator(ca);
+                X500DistinguishedName dn = new X500DistinguishedName("CN=potatos");
+
+                CertificateRequest req = new CertificateRequest(
+                    dn,
+                    gen.PublicKey,
+                    HashAlgorithmName.SHA1);
+
+                using (X509Certificate2 cert = req.Create(dn, gen, DateTimeOffset.Now, DateTimeOffset.Now.AddDays(3), [1, 2, 3]))
+                using (X509Certificate2 certWithKey = cert.CopyWithPrivateKey(ca))
+                using (DSA certKey = certWithKey.GetDSAPrivateKey())
+                {
+                    DSAParameters certParameters = certKey.ExportParameters(true);
+                    AssertExtensions.SequenceEqual(originalParameters.X, certParameters.X);
+                }
+            }
+        }
+
+        [Fact]
+        public static void PrivateKey_FromCertificate_CanExportPrivate_ECDiffieHellman()
+        {
+            using (ECDsa ca = ECDsa.Create(ECCurve.NamedCurves.nistP256))
+            using (ECDiffieHellman ecdh = ECDiffieHellman.Create(ECCurve.NamedCurves.nistP256))
+            {
+                CertificateRequest issuerRequest = new CertificateRequest(
+                    new X500DistinguishedName("CN=root"),
+                    ca,
+                    HashAlgorithmName.SHA256);
+
+                issuerRequest.CertificateExtensions.Add(
+                    new X509BasicConstraintsExtension(true, false, 0, true));
+
+                CertificateRequest request = new CertificateRequest(
+                    new X500DistinguishedName("CN=potato"),
+                    new PublicKey(ecdh),
+                    HashAlgorithmName.SHA256);
+
+                request.CertificateExtensions.Add(
+                    new X509BasicConstraintsExtension(false, false, 0, true));
+                request.CertificateExtensions.Add(
+                    new X509KeyUsageExtension(X509KeyUsageFlags.KeyAgreement, true));
+
+                DateTimeOffset notBefore = DateTimeOffset.UtcNow;
+                DateTimeOffset notAfter = notBefore.AddDays(30);
+                byte[] serial = [1, 2, 3, 4, 5, 6, 7, 8];
+
+                using (X509Certificate2 issuer = issuerRequest.CreateSelfSigned(notBefore, notAfter))
+                using (X509Certificate2 cert = request.Create(issuer, notBefore, notAfter, serial))
+                using (X509Certificate2 certWithKey = cert.CopyWithPrivateKey(ecdh))
+                using (ECDiffieHellman certKey = certWithKey.GetECDiffieHellmanPrivateKey())
+                {
+                    ECParameters certParameters = certKey.ExportParameters(true);
+                    ECParameters originalParameters = ecdh.ExportParameters(true);
+                    AssertExtensions.SequenceEqual(originalParameters.D, certParameters.D);
+                }
+            }
+        }
+
         [Fact]
         public static void PublicPrivateKey_IndependentLifetimes_ECDsa()
         {
@@ -591,6 +693,12 @@ public static void UseAfterDispose()
             }
         }
 
+        [Fact]
+        public static void EmptyPkcs7ThrowsException()
+        {
+            Assert.ThrowsAny<CryptographicException>(() => new X509Certificate2(TestData.EmptyPkcs7));
+        }
+
         [Fact]
         public static void ExportPublicKeyAsPkcs12()
         {
diff --git a/src/libraries/System.Security.Cryptography/tests/X509Certificates/ChainTests.cs b/src/libraries/System.Security.Cryptography/tests/X509Certificates/ChainTests.cs
index 60b16a3757f7..210f36dcf6d5 100644
--- a/src/libraries/System.Security.Cryptography/tests/X509Certificates/ChainTests.cs
+++ b/src/libraries/System.Security.Cryptography/tests/X509Certificates/ChainTests.cs
@@ -1269,6 +1269,60 @@ public static void BuildChainForSelfSignedSha3Certificate()
             }
         }
 
+        [Fact]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/100224", typeof(PlatformDetection), nameof(PlatformDetection.IsAndroid), nameof(PlatformDetection.IsArmOrArm64Process))]
+        public static void BuildChainForSelfSignedCertificate_WithSha256RsaSignature()
+        {
+            using (ChainHolder chainHolder = new ChainHolder())
+            using (X509Certificate2 cert = new X509Certificate2(TestData.SelfSignedCertSha256RsaBytes))
+            {
+                X509Chain chain = chainHolder.Chain;
+                chain.ChainPolicy.RevocationMode = X509RevocationMode.NoCheck;
+                chain.ChainPolicy.VerificationTime = cert.NotBefore.AddHours(2);
+
+                // No custom root of trust store means that this self-signed cert will at
+                // minimum be marked UntrustedRoot.
+
+                Assert.False(chain.Build(cert));
+                AssertExtensions.HasFlag(X509ChainStatusFlags.UntrustedRoot, chain.AllStatusFlags());
+            }
+        }
+
+        [Fact]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/100224", typeof(PlatformDetection), nameof(PlatformDetection.IsAndroid), nameof(PlatformDetection.IsArmOrArm64Process))]
+        public static void BuildChainForSelfSignedCertificate_WithUnknownOidSignature()
+        {
+            using (ChainHolder chainHolder = new ChainHolder())
+            using (X509Certificate2 cert = new X509Certificate2(TestData.SelfSignedCertDummyOidBytes))
+            {
+                X509Chain chain = chainHolder.Chain;
+                chain.ChainPolicy.RevocationMode = X509RevocationMode.NoCheck;
+                chain.ChainPolicy.VerificationTime = cert.NotBefore.AddHours(2);
+
+                // This tests a self-signed cert whose signature block contains a garbage signing alg OID.
+                // Some platforms return NotSignatureValid to indicate that they cannot understand the
+                // signature block. Other platforms return PartialChain to indicate that they think the
+                // bad signature block might correspond to some unknown, untrusted signer. Yet other
+                // platforms simply fail the operation; e.g., Windows's CertGetCertificateChain API returns
+                // NTE_BAD_ALGID, which we bubble up as CryptographicException.
+
+                if (PlatformDetection.UsesAppleCrypto)
+                {
+                    Assert.False(chain.Build(cert));
+                    AssertExtensions.HasFlag(X509ChainStatusFlags.PartialChain, chain.AllStatusFlags());
+                }
+                else if (PlatformDetection.IsOpenSslSupported)
+                {
+                    Assert.False(chain.Build(cert));
+                    AssertExtensions.HasFlag(X509ChainStatusFlags.NotSignatureValid, chain.AllStatusFlags());
+                }
+                else
+                {
+                    Assert.ThrowsAny<CryptographicException>(() => chain.Build(cert));
+                }
+            }
+        }
+
         internal static X509ChainStatusFlags AllStatusFlags(this X509Chain chain)
         {
             return chain.ChainStatus.Aggregate(
diff --git a/src/libraries/System.Security.Cryptography/tests/X509Certificates/ExtensionsTests/SubjectKeyIdentifierTests.cs b/src/libraries/System.Security.Cryptography/tests/X509Certificates/ExtensionsTests/SubjectKeyIdentifierTests.cs
index e4d743a00e01..583cac7b75c4 100644
--- a/src/libraries/System.Security.Cryptography/tests/X509Certificates/ExtensionsTests/SubjectKeyIdentifierTests.cs
+++ b/src/libraries/System.Security.Cryptography/tests/X509Certificates/ExtensionsTests/SubjectKeyIdentifierTests.cs
@@ -1,6 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Collections.Generic;
 using Test.Cryptography;
 using Xunit;
 
@@ -9,6 +10,10 @@ namespace System.Security.Cryptography.X509Certificates.Tests.ExtensionsTests
     [SkipOnPlatform(TestPlatforms.Browser, "Browser doesn't support X.509 certificates")]
     public static class SubjectKeyIdentifierTests
     {
+        private const string EcPublicKey = "1.2.840.10045.2.1";
+        private static ReadOnlySpan<byte> NistP256r1 => [0x06, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07];
+        private static ReadOnlySpan<byte> BrainpoolP256r1 => [0x06, 0x09, 0x2b, 0x24, 0x03, 0x03, 0x02, 0x08, 0x01, 0x01, 0x07];
+
         [Fact]
         public static void DefaultConstructor()
         {
@@ -148,6 +153,184 @@ public static void DecodeFromBER()
             Assert.Equal(skid, Convert.ToHexString(ext.SubjectKeyIdentifierBytes.Span));
         }
 
+        [Theory]
+        [MemberData(nameof(Rfc7093Examples))]
+        public static void EncodeDecode_Rfc7093_Examples(
+            byte[] subjectPublicKeyInfo,
+            X509SubjectKeyIdentifierHashAlgorithm algorithm,
+            string expectedDer,
+            string expectedIdentifier)
+        {
+            EncodeDecodeSubjectPublicKeyInfo(
+                subjectPublicKeyInfo,
+                algorithm,
+                false,
+                Convert.FromHexString(expectedDer),
+                expectedIdentifier);
+        }
+
+        [Theory]
+        [MemberData(nameof(Rfc7093Vectors))]
+        public static void EncodeDecode_Rfc7093_TestVectors(
+            byte[] key,
+            byte[] parameters,
+            X509SubjectKeyIdentifierHashAlgorithm algorithm,
+            string expectedDer,
+            string expectedIdentifier)
+        {
+            EncodeDecodePublicKey(
+                new PublicKey(new Oid("1.2.3.4"), new AsnEncodedData(parameters), new AsnEncodedData(key)),
+                algorithm,
+                false,
+                Convert.FromHexString(expectedDer),
+                expectedIdentifier);
+        }
+
+        [Theory]
+        [InlineData(X509SubjectKeyIdentifierHashAlgorithm.ShortSha256)]
+        [InlineData(X509SubjectKeyIdentifierHashAlgorithm.ShortSha384)]
+        [InlineData(X509SubjectKeyIdentifierHashAlgorithm.ShortSha512)]
+        public static void Rfc7093_Truncated_SubjectPublicKeyOnly(X509SubjectKeyIdentifierHashAlgorithm algorithm)
+        {
+            ReadOnlySpan<byte> ecKey =
+            [
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+            ];
+
+            PublicKey nistP256Key = new PublicKey(new Oid(EcPublicKey), new AsnEncodedData(NistP256r1), new AsnEncodedData(ecKey));
+            PublicKey brainboolP256Key = new PublicKey(new Oid(EcPublicKey), new AsnEncodedData(BrainpoolP256r1), new AsnEncodedData(ecKey));
+
+            X509SubjectKeyIdentifierExtension nistP256Extension = new(nistP256Key, algorithm, critical: false);
+            X509SubjectKeyIdentifierExtension brainpoolP256Extension = new(brainboolP256Key, algorithm, critical: false);
+
+            // Although the PublicKeys have different parameters by their curve, the key material is the same, so the
+            // hash should not differ.
+            AssertExtensions.SequenceEqual(
+                nistP256Extension.SubjectKeyIdentifierBytes.Span,
+                brainpoolP256Extension.SubjectKeyIdentifierBytes.Span);
+        }
+
+        [Theory]
+        [InlineData(X509SubjectKeyIdentifierHashAlgorithm.Sha256)]
+        [InlineData(X509SubjectKeyIdentifierHashAlgorithm.Sha384)]
+        [InlineData(X509SubjectKeyIdentifierHashAlgorithm.Sha512)]
+        public static void Rfc7093_SubjectPublicKeyInfo(X509SubjectKeyIdentifierHashAlgorithm algorithm)
+        {
+            ReadOnlySpan<byte> ecKey =
+            [
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+            ];
+
+            PublicKey nistP256Key = new PublicKey(new Oid(EcPublicKey), new AsnEncodedData(NistP256r1), new AsnEncodedData(ecKey));
+            PublicKey brainboolP256Key = new PublicKey(new Oid(EcPublicKey), new AsnEncodedData(BrainpoolP256r1), new AsnEncodedData(ecKey));
+
+            X509SubjectKeyIdentifierExtension nistP256Extension = new(nistP256Key, algorithm, critical: false);
+            X509SubjectKeyIdentifierExtension brainpoolP256Extension = new(brainboolP256Key, algorithm, critical: false);
+
+            // Although the PublicKeys have the same key, their parameters are different, thus should produce different
+            // hashes.
+            AssertExtensions.SequenceNotEqual(
+                nistP256Extension.SubjectKeyIdentifierBytes.Span,
+                brainpoolP256Extension.SubjectKeyIdentifierBytes.Span);
+        }
+
+        public static IEnumerable<object[]> Rfc7093Examples()
+        {
+            byte[] example =
+            [
+                0x30, 0x59,
+                    0x30, 0x13,
+                        0x06, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01,
+                        0x06, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07,
+                    0x03, 0x42, 0x00,
+                        0x04,
+                        0x7F, 0x7F, 0x35, 0xA7, 0x97, 0x94, 0xC9, 0x50, 0x06, 0x0B, 0x80, 0x29, 0xFC, 0x8F, 0x36, 0x3A,
+                        0x28, 0xF1, 0x11, 0x59, 0x69, 0x2D, 0x9D, 0x34, 0xE6, 0xAC, 0x94, 0x81, 0x90, 0x43, 0x47, 0x35,
+                        0xF8, 0x33, 0xB1, 0xA6, 0x66, 0x52, 0xDC, 0x51, 0x43, 0x37, 0xAF, 0xF7, 0xF5, 0xC9, 0xC7, 0x5D,
+                        0x67, 0x0C, 0x01, 0x9D, 0x95, 0xA5, 0xD6, 0x39, 0xB7, 0x27, 0x44, 0xC6, 0x4A, 0x91, 0x28, 0xBB,
+            ];
+
+            // Method 1 example from RFC 7093
+            yield return new object[]
+            {
+                example,
+                X509SubjectKeyIdentifierHashAlgorithm.ShortSha256,
+                "0414BF37B3E5808FD46D54B28E846311BCCE1CAD2E1A",
+                "BF37B3E5808FD46D54B28E846311BCCE1CAD2E1A",
+            };
+
+            // Method 4 example from RFC 7093
+            yield return new object[]
+            {
+                example,
+                X509SubjectKeyIdentifierHashAlgorithm.Sha256,
+                "04206D20896AB8BD833B6B66554BD59B20225D8A75A296088148399D7BF763D57405",
+                "6D20896AB8BD833B6B66554BD59B20225D8A75A296088148399D7BF763D57405",
+            };
+        }
+
+        public static IEnumerable<object[]> Rfc7093Vectors()
+        {
+            byte[] key = [1, 2, 3, 4];
+            byte[] parameters = [4, 4, 5, 6, 7, 8];
+
+            yield return new object[]
+            {
+                key,
+                parameters,
+                X509SubjectKeyIdentifierHashAlgorithm.Sha256,
+                "04200B710654AEB48CE6A1FF80C0F3E83FAD8DB63B7E1004BE8F3EEC10E95CF3C620",
+                "0B710654AEB48CE6A1FF80C0F3E83FAD8DB63B7E1004BE8F3EEC10E95CF3C620",
+            };
+
+            yield return new object[]
+            {
+                key,
+                parameters,
+                X509SubjectKeyIdentifierHashAlgorithm.Sha384,
+                "0430150E43FE7ACE471CDB3910809145AD44B5B7E641A0364D608A1C106C9AD47963BAFE05E431B7782D791DE1B7E25F69DA",
+                "150E43FE7ACE471CDB3910809145AD44B5B7E641A0364D608A1C106C9AD47963BAFE05E431B7782D791DE1B7E25F69DA",
+            };
+
+            yield return new object[]
+            {
+                key,
+                parameters,
+                X509SubjectKeyIdentifierHashAlgorithm.Sha512,
+                "0440493CF4FC4B5CF15C17D5FCF4F85CFD1CFBCF29BEA538B8063733922A43693FEECAE70A11BEE932E23C32350C1F624DB16962A6AE6EF4B29BB3BFAD838048006F",
+                "493CF4FC4B5CF15C17D5FCF4F85CFD1CFBCF29BEA538B8063733922A43693FEECAE70A11BEE932E23C32350C1F624DB16962A6AE6EF4B29BB3BFAD838048006F",
+            };
+
+            yield return new object[]
+            {
+                key,
+                parameters,
+                X509SubjectKeyIdentifierHashAlgorithm.ShortSha256,
+                "04149F64A747E1B97F131FABB6B447296C9B6F0201E7",
+                "9F64A747E1B97F131FABB6B447296C9B6F0201E7",
+            };
+
+            yield return new object[]
+            {
+                key,
+                parameters,
+                X509SubjectKeyIdentifierHashAlgorithm.ShortSha384,
+                "04145A667D62430A8C253EBAE433333904DC6E1D41DC",
+                "5A667D62430A8C253EBAE433333904DC6E1D41DC",
+            };
+
+            yield return new object[]
+            {
+                key,
+                parameters,
+                X509SubjectKeyIdentifierHashAlgorithm.ShortSha512,
+                "0414A7C976DB1723ADB41274178DC82E9B777941AB20",
+                "A7C976DB1723ADB41274178DC82E9B777941AB20",
+            };
+        }
+
         private static void EncodeDecode(
             byte[] certBytes,
             X509SubjectKeyIdentifierHashAlgorithm algorithm,
@@ -155,15 +338,30 @@ private static void EncodeDecode(
             byte[] expectedDer,
             string expectedIdentifier)
         {
-            PublicKey pk;
-
             using (var cert = new X509Certificate2(certBytes))
             {
-                pk = cert.PublicKey;
+                EncodeDecodePublicKey(cert.PublicKey, algorithm, critical, expectedDer, expectedIdentifier);
             }
+        }
 
-            X509SubjectKeyIdentifierExtension ext =
-                new X509SubjectKeyIdentifierExtension(pk, algorithm, critical);
+        private static void EncodeDecodeSubjectPublicKeyInfo(
+            byte[] spkiBytes,
+            X509SubjectKeyIdentifierHashAlgorithm algorithm,
+            bool critical,
+            byte[] expectedDer,
+            string expectedIdentifier)
+        {
+            PublicKey publicKey = PublicKey.CreateFromSubjectPublicKeyInfo(spkiBytes, out _);
+            EncodeDecodePublicKey(publicKey, algorithm, critical, expectedDer, expectedIdentifier);
+        }
+        private static void EncodeDecodePublicKey(
+            PublicKey publicKey,
+            X509SubjectKeyIdentifierHashAlgorithm algorithm,
+            bool critical,
+            byte[] expectedDer,
+            string expectedIdentifier)
+        {
+            X509SubjectKeyIdentifierExtension ext = new X509SubjectKeyIdentifierExtension(publicKey, algorithm, critical);
 
             byte[] rawData = ext.RawData;
             Assert.Equal(expectedDer, rawData);
diff --git a/src/libraries/System.Security.Cryptography/tests/X509Certificates/PfxIterationCountTests.X509Certificate2.cs b/src/libraries/System.Security.Cryptography/tests/X509Certificates/PfxIterationCountTests.X509Certificate2.cs
index 6e4697f40654..ce5c3be483da 100644
--- a/src/libraries/System.Security.Cryptography/tests/X509Certificates/PfxIterationCountTests.X509Certificate2.cs
+++ b/src/libraries/System.Security.Cryptography/tests/X509Certificates/PfxIterationCountTests.X509Certificate2.cs
@@ -1,8 +1,13 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using Microsoft.DotNet.RemoteExecutor;
+using Microsoft.DotNet.XUnitExtensions;
+using Xunit;
+
 namespace System.Security.Cryptography.X509Certificates.Tests
 {
+    [SkipOnPlatform(TestPlatforms.Browser, "Browser doesn't support X.509 certificates")]
     public class PfxIterationCountTests_X509Certificate2 : PfxIterationCountTests
     {
         internal override X509Certificate Import(byte[] blob)
@@ -22,5 +27,29 @@ internal override X509Certificate Import(string fileName, string password)
 
         internal override X509Certificate Import(string fileName, SecureString password)
             => new X509Certificate2(fileName, password);
+
+
+        [ConditionalFact(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))]
+        public static void Import_IterationCountLimitExceeded_ThrowsInAllottedTime()
+        {
+            const int AllottedTime = 5000;
+
+            if (!PfxTests.Pkcs12PBES2Supported)
+            {
+                throw new SkipTestException("Pkcs12NoPassword100MRounds uses PBES2, which is not supported on this version.");
+            }
+
+            RemoteInvokeOptions options = new()
+            {
+                TimeOut = AllottedTime
+            };
+
+            RemoteExecutor.Invoke(static () =>
+            {
+                byte[] blob = TestData.Pkcs12NoPassword100MRounds;
+                CryptographicException ce = Assert.Throws<CryptographicException>(() => new X509Certificate2(blob));
+                Assert.Contains(FwlinkId, ce.Message);
+            }, options).Dispose();
+        }
     }
 }
diff --git a/src/libraries/System.Security.Cryptography/tests/X509Certificates/PfxTests.cs b/src/libraries/System.Security.Cryptography/tests/X509Certificates/PfxTests.cs
index a8454f1cf3c3..c70260c9dc0d 100644
--- a/src/libraries/System.Security.Cryptography/tests/X509Certificates/PfxTests.cs
+++ b/src/libraries/System.Security.Cryptography/tests/X509Certificates/PfxTests.cs
@@ -250,6 +250,19 @@ public static void ECDHPrivateKeyProperty_WindowsPfx()
             }
         }
 
+        [Fact]
+        [PlatformSpecific(TestPlatforms.Windows)] // Only windows cares about the key usage attribute in the PKCS12
+        public static void ECDHPrivateKey_PfxKeyIsEcdsaConstrained()
+        {
+            // [SuppressMessage("Microsoft.Security", "CSCAN0220.DefaultPasswordContexts", Justification="Legacy Test Data")]
+            using (X509Certificate2 cert = new X509Certificate2(TestData.ECDsaP256_DigitalSignature_Pfx_Windows, "Test"))
+            {
+                    Assert.Null(cert.GetECDiffieHellmanPrivateKey());
+                    Assert.NotNull(cert.GetECDiffieHellmanPublicKey());
+                    Assert.NotNull(cert.GetECDsaPrivateKey());
+            }
+        }
+
         [Fact]
         [SkipOnPlatform(PlatformSupport.MobileAppleCrypto, "DSA is not available")]
         public static void DsaPrivateKeyProperty()
diff --git a/src/libraries/System.Security.Cryptography/tests/X509Certificates/TestData.cs b/src/libraries/System.Security.Cryptography/tests/X509Certificates/TestData.cs
index e3deb00b354d..0e0295ecf7f6 100644
--- a/src/libraries/System.Security.Cryptography/tests/X509Certificates/TestData.cs
+++ b/src/libraries/System.Security.Cryptography/tests/X509Certificates/TestData.cs
@@ -3437,6 +3437,26 @@ internal static DSAParameters GetDSA1024Params()
             "04020105000420AD0EB570ACFB8357A8E99B17672353CFBA69C76FFE5B6BC113" +
             "05577F12AE24040408D04E60444B79672302030927C1").HexToByteArray();
 
+        internal static readonly byte[] Pkcs12NoPassword100MRounds = Convert.FromBase64String(
+            "MIIDygIBAzCCA4QGCSqGSIb3DQEHAaCCA3UEggNxMIIDbTCCA2kGCSqGSIb3DQEHBqCCA1owggNW" +
+            "AgEAMIIDTwYJKoZIhvcNAQcBMF4GCSqGSIb3DQEFDTBRMDAGCSqGSIb3DQEFDDAjBBCNparJkj/3" +
+            "Uk8N7n0KCMeQAgEBMAwGCCqGSIb3DQILBQAwHQYJYIZIAWUDBAEqBBAcqpBrSDFcXYAWVWKcsEi9" +
+            "gIIC4P/ANdPYWI1vBH1U5sZGMIwLjY96pYaBelyZd0ZfKA8QfGHVNP9+E9hplBKGvRfIMiqmFutj" +
+            "RO4v7Ls8HZEk0hwBt9+6zXPWDJLxBDfSMHUd08+ZAH1yzEqq8aBMyIRVHOQkJFuFuCQJ9Ke5HzVi" +
+            "39S1rgHpnKYFvy+xZAhgI9OO1YxuFt4P9nhlEV/JCoyEQ/2iY99kKc3z7ArrV7BBFhfYGKhWQCBu" +
+            "kAmNBKweRldNWgDuW21WJEl5sByOmyDwpiK55Zxy1K1aIY8DYJTtIzzcX4CILaj6tClMH1G9w4jW" +
+            "BkQI2CG4vCsMl/28BbIP9EyH2C+gBAxvc1N32y3NSvO0/GPVenmQFF9KBMc4FVy4Z21syMKzUkBi" +
+            "PtIbDkcQbGAfyPgFk4SXCgn8OpIIvOOGI50/r+Hj14qex9VIrlwAAWCH8Y+YjwqFAQJYHQpb47zp" +
+            "B1fTwJFOrsXrBgLUzJLZKLR43yW2E9u6b8RsTuFHjh985naCHLuWPYOXS1zduBpHKpwoPUyCwD2r" +
+            "DAokCvA7RCsSXroUkpJarN4CAqsEB8COnzV1Dl2xcAYMerJxrTCKX6WIQUYo0/qeCoqTT38lDAlE" +
+            "7Ydjyx12iVM6eWejAdjORvlVtCQQtCxz8fZpdFGbMP8rf35A8hu++e4u0CLHnhTx38zPIm6H6YfN" +
+            "qj5h1Kz0xLzqnRfa7EGfDEERSHOy/DqNY4nUNG2DTjGOHy1QJelToG7Vo2L7CCZV+leX0nwLNExf" +
+            "hKEp+uQCiYSJe9iDm9fS9VymED79OJbr2bxdq3MggEGksLZv/H0ZT8Wsue0vq9jQ6J6YIEM+DKYr" +
+            "Zt2l4WgTBEKbpqmRvOqYRh9O8Sp+3IRNPzMC2ehzlYXqoPbtG4vxpoRsAMCM/W2x61jbsBSaNSFA" +
+            "eaUwcnKswRg30UonHUAIOJkqtadI57WE/Rat5eHVyya9f7ZN8bTFZjx0BQs6Bo8PK9yfqoidSN8w" +
+            "PTAfMAcGBSsOAwIaBBTt8zpgzygINykjoAwr2GKEywYFwgQUA+L1vfCVASwiE++gTfRgIScMGycC" +
+            "BAX14QA=");
+
         internal static readonly byte[] Pkcs12OpenSslOneCertDefaultEmptyPassword =
             ("308209CF0201033082098506092A864886F70D010701A0820976048209723082" +
             "096E308203E206092A864886F70D010706A08203D3308203CF020100308203C8" +
@@ -4224,5 +4244,56 @@ internal static DSAParameters GetDSA1024Params()
             "09463C6E50BCA36EB3F8BCB00D8A415D2D0DB5AE08303B301F300706052B0E03" +
             "021A0414A57105D833610A6D07EBFBE51E5486CD3F8BCE0D0414DB32290CC077" +
             "37E9D9446E37F104FA876C861C0102022710").HexToByteArray();
+
+        // Used for chain building tests (CN=Test chain building)
+        internal static readonly byte[] SelfSignedCertSha256RsaBytes = (
+            "308202BD308201A5A003020102020900EF79C10DFD657168300D06092A864886F70D0101" +
+            "0B0500301E311C301A060355040313135465737420636861696E206275696C64696E6730" +
+            "1E170D3231313031333231353735335A170D3232313031333231353735335A301E311C30" +
+            "1A060355040313135465737420636861696E206275696C64696E6730820122300D06092A" +
+            "864886F70D01010105000382010F003082010A0282010100E3B5BBF862313DEAA9172788" +
+            "278B26A3EAB61B9B0326F5CEA91B1A6C6DFD156836A2363BFAC5B0F4A78F4CFF5A11F35A" +
+            "831C6D7935D1DFD13DD81DA29AA0645CBA9F4D20BF991C625E6D61CF396C15914DEE41F6" +
+            "1190E97B52BFF7AE52B79FD0E2EEE3319EC23C30D27A52A2E8A963557B12BEC0664ADEF9" +
+            "3C520B587EC5DABFBC70980DB7473414B4B6BF982EA9AA0969F2A76AA085464AE78DFB2B" +
+            "F04BDE7192874679193119C2AABEC04D360F61925921660BF09A0489B7C53464F5FC35B8" +
+            "612F5B993D544475C20AC46CD350A34551FEA0ACBD138D8B72F79052BF0EB3BD794A426C" +
+            "0117CB77B4F311FFD1C628F8E438E5474509AD51FA035558771546310203010001300D06" +
+            "092A864886F70D01010B050003820101000A12CE2FC3DC854113D179725E9D9ADD013A42" +
+            "D66340CEA7A465D54EC357AD8FED1828862D8B5C32EB3D21FC8B26A7CFA9D9FB36F593CC" +
+            "6AD30C25C96E8100C3F07B1B51430245EE995864749C53B409260B4040705654710C236F" +
+            "D9B7DE3F3BE5E6E5047712C5E506419106A57C5290BB206A97F6A3FCC4B4C83E25C3FC6D" +
+            "2BAB03B941374086265EE08A90A8C72A63A4053044B9FA3ABD5ED5785CFDDB15A6A327BD" +
+            "C0CC2B115B9D33BD6E528E35670E5A6A8D9CF52199F8D693315C60D9ADAD54EF7FDCED36" +
+            "0C8C79E84D42AB5CB6355A70951B1ABF1F2B3FB8BEB7E3A8D6BA2293C0DB8C86B0BB060F" +
+            "0D6DB9939E88B998662A27F092634BBF21F58EEAAA").HexToByteArray();
+
+        // This is nearly identical to the cert in Pkcs7SelfSignedCertSha256RsaBytes,
+        // but we've replaced the OID (1.2.840.113549.1.1.11 sha256RSA) with a dummy OID
+        // 1.3.9999.1234.5678.1234. The cert should load properly into an X509Certificate2
+        // object but will cause chain building to fail.
+        internal static readonly byte[] SelfSignedCertDummyOidBytes = (
+            "308202BD308201A5A003020102020900EF79C10DFD657168300D06092A864886F70D0101" +
+            "0B0500301E311C301A060355040313135465737420636861696E206275696C64696E6730" +
+            "1E170D3231313031333231353735335A170D3232313031333231353735335A301E311C30" +
+            "1A060355040313135465737420636861696E206275696C64696E6730820122300D06092A" +
+            "864886F70D01010105000382010F003082010A0282010100E3B5BBF862313DEAA9172788" +
+            "278B26A3EAB61B9B0326F5CEA91B1A6C6DFD156836A2363BFAC5B0F4A78F4CFF5A11F35A" +
+            "831C6D7935D1DFD13DD81DA29AA0645CBA9F4D20BF991C625E6D61CF396C15914DEE41F6" +
+            "1190E97B52BFF7AE52B79FD0E2EEE3319EC23C30D27A52A2E8A963557B12BEC0664ADEF9" +
+            "3C520B587EC5DABFBC70980DB7473414B4B6BF982EA9AA0969F2A76AA085464AE78DFB2B" +
+            "F04BDE7192874679193119C2AABEC04D360F61925921660BF09A0489B7C53464F5FC35B8" +
+            "612F5B993D544475C20AC46CD350A34551FEA0ACBD138D8B72F79052BF0EB3BD794A426C" +
+            "0117CB77B4F311FFD1C628F8E438E5474509AD51FA035558771546310203010001300D06" +
+            "092BCE0F8952AC2E8952050003820101000A12CE2FC3DC854113D179725E9D9ADD013A42" +
+            "D66340CEA7A465D54EC357AD8FED1828862D8B5C32EB3D21FC8B26A7CFA9D9FB36F593CC" +
+            "6AD30C25C96E8100C3F07B1B51430245EE995864749C53B409260B4040705654710C236F" +
+            "D9B7DE3F3BE5E6E5047712C5E506419106A57C5290BB206A97F6A3FCC4B4C83E25C3FC6D" +
+            "2BAB03B941374086265EE08A90A8C72A63A4053044B9FA3ABD5ED5785CFDDB15A6A327BD" +
+            "C0CC2B115B9D33BD6E528E35670E5A6A8D9CF52199F8D693315C60D9ADAD54EF7FDCED36" +
+            "0C8C79E84D42AB5CB6355A70951B1ABF1F2B3FB8BEB7E3A8D6BA2293C0DB8C86B0BB060F" +
+            "0D6DB9939E88B998662A27F092634BBF21F58EEAAA").HexToByteArray();
+
+        internal static readonly byte[] EmptyPkcs7 = "300B06092A864886F70D010702".HexToByteArray();
     }
 }
diff --git a/src/libraries/System.ServiceModel.Syndication/src/System/ServiceModel/XmlBuffer.cs b/src/libraries/System.ServiceModel.Syndication/src/System/ServiceModel/XmlBuffer.cs
index f57dec531c28..17f3cfa2e1e7 100644
--- a/src/libraries/System.ServiceModel.Syndication/src/System/ServiceModel/XmlBuffer.cs
+++ b/src/libraries/System.ServiceModel.Syndication/src/System/ServiceModel/XmlBuffer.cs
@@ -86,7 +86,21 @@ public void Close()
             _bufferState = BufferState.Reading;
             _buffer = new byte[_stream.Length];
             _stream.Position = 0;
-            _stream.Read(_buffer, 0, _buffer.Length);
+
+#if NET7_0_OR_GREATER
+            _stream.ReadExactly(_buffer);
+#else
+            int totalRead = 0;
+            while (totalRead < _buffer.Length)
+            {
+                int bytesRead = _stream.Read(_buffer, totalRead, _buffer.Length - totalRead);
+                if (bytesRead <= 0)
+                {
+                    throw new EndOfStreamException();
+                }
+                totalRead += bytesRead;
+            }
+#endif
 
             _writer = null;
             _stream = null;
diff --git a/src/libraries/System.ServiceProcess.ServiceController/src/System.ServiceProcess.ServiceController.csproj b/src/libraries/System.ServiceProcess.ServiceController/src/System.ServiceProcess.ServiceController.csproj
index 91e3394f8534..f212430e588b 100644
--- a/src/libraries/System.ServiceProcess.ServiceController/src/System.ServiceProcess.ServiceController.csproj
+++ b/src/libraries/System.ServiceProcess.ServiceController/src/System.ServiceProcess.ServiceController.csproj
@@ -4,7 +4,7 @@
     <TargetFrameworks>$(NetCoreAppCurrent)-windows;$(NetCoreAppCurrent);$(NetCoreAppMinimum)-windows;$(NetCoreAppMinimum);netstandard2.0;$(NetFrameworkMinimum)</TargetFrameworks>
     <TargetFrameworks Condition="'$(NetCoreAppPrevious)' != ''">$(TargetFrameworks);$(NetCoreAppPrevious)-windows;$(NetCoreAppPrevious)</TargetFrameworks>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-    <NoWarn>$(NoWarn);CA2249</NoWarn>
+    <NoWarn>$(NoWarn);CA2249;CA1865</NoWarn>
     <UseCompilerGeneratedDocXmlFile>false</UseCompilerGeneratedDocXmlFile>
     <IsPackable>true</IsPackable>
     <PackageDescription>Provides the System.ServiceProcess.ServiceController class, which allows you to connect to a Windows service, manipulate it, or get information about it.
diff --git a/src/libraries/System.Speech/src/Internal/Synthesis/AudioBase.cs b/src/libraries/System.Speech/src/Internal/Synthesis/AudioBase.cs
index ff57d0987207..782cd59fb6c3 100644
--- a/src/libraries/System.Speech/src/Internal/Synthesis/AudioBase.cs
+++ b/src/libraries/System.Speech/src/Internal/Synthesis/AudioBase.cs
@@ -121,7 +121,22 @@ internal void PlayWaveFile(AudioData audio)
                     try
                     {
                         byte[] data = new byte[(int)audio._stream.Length];
-                        audio._stream.Read(data, 0, data.Length);
+
+#if NET7_0_OR_GREATER
+                        audio._stream.ReadExactly(data);
+#else
+                        int totalRead = 0;
+                        while (totalRead < data.Length)
+                        {
+                            int bytesRead = audio._stream.Read(data, totalRead, data.Length - totalRead);
+                            if (bytesRead <= 0)
+                            {
+                                throw new EndOfStreamException();
+                            }
+                            totalRead += bytesRead;
+                        }
+#endif
+
                         Play(data);
                     }
                     finally
diff --git a/src/libraries/System.Speech/src/Internal/Synthesis/EngineSite.cs b/src/libraries/System.Speech/src/Internal/Synthesis/EngineSite.cs
index e37766fc656b..a658f37ca618 100644
--- a/src/libraries/System.Speech/src/Internal/Synthesis/EngineSite.cs
+++ b/src/libraries/System.Speech/src/Internal/Synthesis/EngineSite.cs
@@ -174,7 +174,22 @@ public Stream LoadResource(Uri uri, string mediaType)
                     int cLen = (int)stream.Length;
                     MemoryStream memStream = new(cLen);
                     byte[] ab = new byte[cLen];
-                    stream.Read(ab, 0, ab.Length);
+
+#if NET7_0_OR_GREATER
+                    stream.ReadExactly(ab);
+#else
+                    int totalRead = 0;
+                    while (totalRead < cLen)
+                    {
+                        int bytesRead = stream.Read(ab, totalRead, cLen - totalRead);
+                        if (bytesRead <= 0)
+                        {
+                            throw new EndOfStreamException();
+                        }
+                        totalRead += bytesRead;
+                    }
+#endif
+
                     _resourceLoader.UnloadFile(localPath);
                     memStream.Write(ab, 0, cLen);
                     memStream.Position = 0;
diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingCharBuffer.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingCharBuffer.cs
index c65756dc3471..1ba281dc68af 100644
--- a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingCharBuffer.cs
+++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/EncodingCharBuffer.cs
@@ -53,7 +53,7 @@ internal unsafe bool AddChar(char ch, int numBytes)
                 {
                     // Throw maybe
                     _bytes -= numBytes;                                        // Didn't encode these bytes
-                    _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart);    // Throw?
+                    _enc.ThrowCharsOverflow(_decoder, _chars == _charStart);    // Throw?
                     return false;                                           // No throw, but no store either
                 }
 
@@ -72,11 +72,11 @@ internal unsafe bool AddChar(char ch)
         internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
         {
             // Need room for 2 chars
-            if (_chars >= _charEnd - 1)
+            if (_charEnd - _chars < 2)
             {
                 // Throw maybe
                 _bytes -= numBytes;                                        // Didn't encode these bytes
-                _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart);    // Throw?
+                _enc.ThrowCharsOverflow(_decoder, _chars == _charStart);    // Throw?
                 return false;                                           // No throw, but no store either
             }
             return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
diff --git a/src/libraries/System.Text.Encodings.Web/tools/GenDefinedCharList/Program.cs b/src/libraries/System.Text.Encodings.Web/tools/GenDefinedCharList/Program.cs
index 4168d6fc301b..ef9604ab3020 100644
--- a/src/libraries/System.Text.Encodings.Web/tools/GenDefinedCharList/Program.cs
+++ b/src/libraries/System.Text.Encodings.Web/tools/GenDefinedCharList/Program.cs
@@ -204,7 +204,7 @@ private static bool IsRangeDefinition(string rawName, out string rangeName, out
             // Ranges are represented within angle brackets, such as the following:
             // DC00;<Low Surrogate, First>;Cs;0;L;;;;;N;;;;;
             // DFFF;<Low Surrogate, Last>;Cs;0;L;;;;;N;;;;;
-            if (rawName.StartsWith("<", StringComparison.Ordinal))
+            if (rawName.StartsWith('<'))
             {
                 if (rawName.EndsWith(", First>", StringComparison.Ordinal))
                 {
diff --git a/src/libraries/System.Text.Json/gen/JsonSourceGenerator.Emitter.cs b/src/libraries/System.Text.Json/gen/JsonSourceGenerator.Emitter.cs
index c41234d54b7b..7dbc4de3a57f 100644
--- a/src/libraries/System.Text.Json/gen/JsonSourceGenerator.Emitter.cs
+++ b/src/libraries/System.Text.Json/gen/JsonSourceGenerator.Emitter.cs
@@ -6,7 +6,7 @@
 using System.Linq;
 using System.Reflection;
 using System.Text.Json.Serialization;
-using Microsoft.CodeAnalysis;
+using Microsoft.CodeAnalysis.CSharp;
 using Microsoft.CodeAnalysis.Text;
 using SourceGenerators;
 
@@ -638,16 +638,16 @@ private void GeneratePropMetadataInitFunc(SourceWriter writer, string propInitMe
                     writer.WriteLine($$"""
                         var {{InfoVarName}}{{i}} = new {{JsonPropertyInfoValuesTypeRef}}<{{propertyTypeFQN}}>
                         {
-                            IsProperty = {{FormatBool(property.IsProperty)}},
-                            IsPublic = {{FormatBool(property.IsPublic)}},
-                            IsVirtual = {{FormatBool(property.IsVirtual)}},
+                            IsProperty = {{FormatBoolLiteral(property.IsProperty)}},
+                            IsPublic = {{FormatBoolLiteral(property.IsPublic)}},
+                            IsVirtual = {{FormatBoolLiteral(property.IsVirtual)}},
                             DeclaringType = typeof({{property.DeclaringType.FullyQualifiedName}}),
                             Converter = {{converterInstantiationExpr ?? "null"}},
                             Getter = {{getterValue}},
                             Setter = {{setterValue}},
                             IgnoreCondition = {{ignoreConditionNamedArg}},
-                            HasJsonInclude = {{FormatBool(property.HasJsonInclude)}},
-                            IsExtensionData = {{FormatBool(property.IsExtensionData)}},
+                            HasJsonInclude = {{FormatBoolLiteral(property.HasJsonInclude)}},
+                            IsExtensionData = {{FormatBoolLiteral(property.IsExtensionData)}},
                             NumberHandling = {{FormatNumberHandling(property.NumberHandling)}},
                             PropertyName = {{FormatStringLiteral(property.MemberName)}},
                             JsonPropertyName = {{FormatStringLiteral(property.JsonPropertyName)}}
@@ -701,10 +701,10 @@ private static void GenerateCtorParamMetadataInitFunc(SourceWriter writer, strin
                     writer.WriteLine($$"""
                         {{parametersVarName}}[{{spec.ParameterIndex}}] = new()
                         {
-                            Name = "{{spec.Name}}",
+                            Name = {{FormatStringLiteral(spec.Name)}},
                             ParameterType = typeof({{spec.ParameterType.FullyQualifiedName}}),
                             Position = {{spec.ParameterIndex}},
-                            HasDefaultValue = {{FormatBool(spec.HasDefaultValue)}},
+                            HasDefaultValue = {{FormatBoolLiteral(spec.HasDefaultValue)}},
                             DefaultValue = {{CSharpSyntaxUtilities.FormatLiteral(spec.DefaultValue, spec.ParameterType)}}
                         };
 
@@ -721,7 +721,7 @@ private static void GenerateCtorParamMetadataInitFunc(SourceWriter writer, strin
                     writer.WriteLine($$"""
                         {{parametersVarName}}[{{spec.ParameterIndex}}] = new()
                         {
-                            Name = "{{spec.Name}}",
+                            Name = {{FormatStringLiteral(spec.Name)}},
                             ParameterType = typeof({{spec.ParameterType.FullyQualifiedName}}),
                             Position = {{spec.ParameterIndex}},
                         };
@@ -1106,10 +1106,10 @@ private static void GetLogicForDefaultSerializerOptionsInit(SourceGenerationOpti
                 writer.Indentation++;
 
                 if (optionsSpec.AllowOutOfOrderMetadataProperties is bool allowOutOfOrderMetadataProperties)
-                    writer.WriteLine($"AllowOutOfOrderMetadataProperties = {FormatBool(allowOutOfOrderMetadataProperties)},");
+                    writer.WriteLine($"AllowOutOfOrderMetadataProperties = {FormatBoolLiteral(allowOutOfOrderMetadataProperties)},");
 
                 if (optionsSpec.AllowTrailingCommas is bool allowTrailingCommas)
-                    writer.WriteLine($"AllowTrailingCommas = {FormatBool(allowTrailingCommas)},");
+                    writer.WriteLine($"AllowTrailingCommas = {FormatBoolLiteral(allowTrailingCommas)},");
 
                 if (optionsSpec.Converters is { Count: > 0 } converters)
                 {
@@ -1136,13 +1136,13 @@ private static void GetLogicForDefaultSerializerOptionsInit(SourceGenerationOpti
                     writer.WriteLine($"DictionaryKeyPolicy = {FormatNamingPolicy(dictionaryKeyPolicy)},");
 
                 if (optionsSpec.IgnoreReadOnlyFields is bool ignoreReadOnlyFields)
-                    writer.WriteLine($"IgnoreReadOnlyFields = {FormatBool(ignoreReadOnlyFields)},");
+                    writer.WriteLine($"IgnoreReadOnlyFields = {FormatBoolLiteral(ignoreReadOnlyFields)},");
 
                 if (optionsSpec.IgnoreReadOnlyProperties is bool ignoreReadOnlyProperties)
-                    writer.WriteLine($"IgnoreReadOnlyProperties = {FormatBool(ignoreReadOnlyProperties)},");
+                    writer.WriteLine($"IgnoreReadOnlyProperties = {FormatBoolLiteral(ignoreReadOnlyProperties)},");
 
                 if (optionsSpec.IncludeFields is bool includeFields)
-                    writer.WriteLine($"IncludeFields = {FormatBool(includeFields)},");
+                    writer.WriteLine($"IncludeFields = {FormatBoolLiteral(includeFields)},");
 
                 if (optionsSpec.MaxDepth is int maxDepth)
                     writer.WriteLine($"MaxDepth = {maxDepth},");
@@ -1154,7 +1154,7 @@ private static void GetLogicForDefaultSerializerOptionsInit(SourceGenerationOpti
                     writer.WriteLine($"PreferredObjectCreationHandling = {FormatObjectCreationHandling(preferredObjectCreationHandling)},");
 
                 if (optionsSpec.PropertyNameCaseInsensitive is bool propertyNameCaseInsensitive)
-                    writer.WriteLine($"PropertyNameCaseInsensitive = {FormatBool(propertyNameCaseInsensitive)},");
+                    writer.WriteLine($"PropertyNameCaseInsensitive = {FormatBoolLiteral(propertyNameCaseInsensitive)},");
 
                 if (optionsSpec.PropertyNamingPolicy is JsonKnownNamingPolicy propertyNamingPolicy)
                     writer.WriteLine($"PropertyNamingPolicy = {FormatNamingPolicy(propertyNamingPolicy)},");
@@ -1169,10 +1169,10 @@ private static void GetLogicForDefaultSerializerOptionsInit(SourceGenerationOpti
                     writer.WriteLine($"UnmappedMemberHandling = {FormatUnmappedMemberHandling(unmappedMemberHandling)},");
 
                 if (optionsSpec.WriteIndented is bool writeIndented)
-                    writer.WriteLine($"WriteIndented = {FormatBool(writeIndented)},");
+                    writer.WriteLine($"WriteIndented = {FormatBoolLiteral(writeIndented)},");
 
                 if (optionsSpec.IndentCharacter is char indentCharacter)
-                    writer.WriteLine($"IndentCharacter = {FormatIndentChar(indentCharacter)},");
+                    writer.WriteLine($"IndentCharacter = {FormatCharLiteral(indentCharacter)},");
 
                 if (optionsSpec.IndentSize is int indentSize)
                     writer.WriteLine($"IndentSize = {indentSize},");
@@ -1238,7 +1238,7 @@ private static void GenerateConverterHelpers(SourceWriter writer, bool emitGetCo
                         {
                             throw new {{InvalidOperationExceptionTypeRef}}(string.Format("{{ExceptionMessages.IncompatibleConverterType}}", converter.GetType(), type));
                         }
-                    
+
                         if (converter is {{JsonConverterFactoryTypeRef}} factory)
                         {
                             converter = factory.CreateConverter(type, options);
@@ -1247,7 +1247,7 @@ private static void GenerateConverterHelpers(SourceWriter writer, bool emitGetCo
                                 throw new {{InvalidOperationExceptionTypeRef}}(string.Format("{{ExceptionMessages.InvalidJsonConverterFactoryOutput}}", factory.GetType()));
                             }
                         }
-                    
+
                         return converter;
                     }
                     """);
@@ -1263,7 +1263,7 @@ private static void GenerateConverterHelpers(SourceWriter writer, bool emitGetCo
                             {
                                 return ({{JsonConverterTypeRef}}<{{TypeParameter}}?>){{ExpandConverterMethodName}}(typeof({{TypeParameter}}?), converter, options, validateCanConvert: false);
                             }
-                    
+
                             converter = {{ExpandConverterMethodName}}(typeof({{TypeParameter}}), converter, options);
                             {{JsonTypeInfoTypeRef}}<{{TypeParameter}}> typeInfo = {{JsonMetadataServicesTypeRef}}.{{CreateValueInfoMethodName}}<{{TypeParameter}}>(options, converter);
                             return {{JsonMetadataServicesTypeRef}}.GetNullableConverter<{{TypeParameter}}>(typeInfo);
@@ -1320,7 +1320,7 @@ private SourceText GetPropertyNameInitialization(ContextGenerationSpec contextSp
 
                 foreach (KeyValuePair<string, string> name_varName_pair in _propertyNames)
                 {
-                    writer.WriteLine($$"""private static readonly {{JsonEncodedTextTypeRef}} {{name_varName_pair.Value}} = {{JsonEncodedTextTypeRef}}.Encode("{{name_varName_pair.Key}}");""");
+                    writer.WriteLine($$"""private static readonly {{JsonEncodedTextTypeRef}} {{name_varName_pair.Value}} = {{JsonEncodedTextTypeRef}}.Encode({{FormatStringLiteral(name_varName_pair.Key)}});""");
                 }
 
                 return CompleteSourceFileAndReturnText(writer);
@@ -1351,9 +1351,9 @@ private static string FormatJsonSerializerDefaults(JsonSerializerDefaults defaul
 
             private static string GetCreateValueInfoMethodRef(string typeCompilableName) => $"{CreateValueInfoMethodName}<{typeCompilableName}>";
 
-            private static string FormatBool(bool value) => value ? "true" : "false";
-            private static string FormatStringLiteral(string? value) => value is null ? "null" : $"\"{value}\"";
-            private static string FormatIndentChar(char value) => value is '\t' ? "'\\t'" : $"'{value}'";
+            private static string FormatBoolLiteral(bool value) => value ? "true" : "false";
+            private static string FormatStringLiteral(string? value) => value is null ? "null" : SymbolDisplay.FormatLiteral(value, quote: true);
+            private static string FormatCharLiteral(char value) => SymbolDisplay.FormatLiteral(value, quote: true);
 
             /// <summary>
             /// Method used to generate JsonTypeInfo given options instance
diff --git a/src/libraries/System.Text.Json/ref/System.Text.Json.cs b/src/libraries/System.Text.Json/ref/System.Text.Json.cs
index b81b745471f1..c0f23522b137 100644
--- a/src/libraries/System.Text.Json/ref/System.Text.Json.cs
+++ b/src/libraries/System.Text.Json/ref/System.Text.Json.cs
@@ -978,7 +978,7 @@ public enum JsonKnownNamingPolicy
         KebabCaseLower = 4,
         KebabCaseUpper = 5,
     }
-    public sealed partial class JsonNumberEnumConverter<TEnum> : System.Text.Json.Serialization.JsonConverterFactory where TEnum : struct
+    public sealed partial class JsonNumberEnumConverter<TEnum> : System.Text.Json.Serialization.JsonConverterFactory where TEnum : struct, System.Enum
     {
         public JsonNumberEnumConverter() { }
         public override bool CanConvert(System.Type typeToConvert) { throw null; }
diff --git a/src/libraries/System.Text.Json/src/System.Text.Json.csproj b/src/libraries/System.Text.Json/src/System.Text.Json.csproj
index c95276f2db2b..34fb2d13a6f3 100644
--- a/src/libraries/System.Text.Json/src/System.Text.Json.csproj
+++ b/src/libraries/System.Text.Json/src/System.Text.Json.csproj
@@ -408,7 +408,7 @@ The System.Text.Json library is built-in as part of the shared framework in .NET
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\gen\System.Text.Json.SourceGeneration.Roslyn3.11.csproj" ReferenceOutputAssembly="false" PackAsAnalyzer="true" Condition="'$(DotNetBuildFromSource)' != 'true'" />
+    <ProjectReference Include="..\gen\System.Text.Json.SourceGeneration.Roslyn3.11.csproj" ReferenceOutputAssembly="false" PackAsAnalyzer="true" Condition="'$(DotNetBuildSourceOnly)' != 'true'" />
     <ProjectReference Include="..\gen\System.Text.Json.SourceGeneration.Roslyn4.0.csproj" ReferenceOutputAssembly="false" PackAsAnalyzer="true" />
     <ProjectReference Include="..\gen\System.Text.Json.SourceGeneration.Roslyn4.4.csproj" ReferenceOutputAssembly="false" PackAsAnalyzer="true" />
   </ItemGroup>
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonDocument.Parse.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonDocument.Parse.cs
index e5719888d80a..f9b1fffd4243 100644
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonDocument.Parse.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonDocument.Parse.cs
@@ -165,8 +165,6 @@ internal static JsonDocument ParseValue(Stream utf8Json, JsonDocumentOptions opt
 
         internal static JsonDocument ParseValue(ReadOnlySpan<byte> utf8Json, JsonDocumentOptions options)
         {
-            Debug.Assert(utf8Json != null);
-
             byte[] owned = new byte[utf8Json.Length];
             utf8Json.CopyTo(owned);
 
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonElement.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonElement.cs
index c5d687892335..1ca7fff9f7e3 100644
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonElement.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonElement.cs
@@ -5,6 +5,7 @@
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 
 namespace System.Text.Json
 {
@@ -1243,7 +1244,7 @@ public bool ValueEquals(ReadOnlySpan<byte> utf8Text)
             if (TokenType == JsonTokenType.Null)
             {
                 // This is different than Length == 0, in that it tests true for null, but false for ""
-                return utf8Text == default;
+                return Unsafe.IsNullRef(ref MemoryMarshal.GetReference(utf8Text));
             }
 
             return TextEqualsHelper(utf8Text, isPropertyName: false, shouldUnescape: true);
@@ -1271,7 +1272,7 @@ public bool ValueEquals(ReadOnlySpan<char> text)
             if (TokenType == JsonTokenType.Null)
             {
                 // This is different than Length == 0, in that it tests true for null, but false for ""
-                return text == default;
+                return Unsafe.IsNullRef(ref MemoryMarshal.GetReference(text));
             }
 
             return TextEqualsHelper(text, isPropertyName: false);
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/CharConverter.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/CharConverter.cs
index 8c19ce1474a6..82e7e2699a95 100644
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/CharConverter.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/CharConverter.cs
@@ -37,7 +37,7 @@ public override void Write(Utf8JsonWriter writer, char value, JsonSerializerOpti
         {
             writer.WriteStringValue(
 #if NETCOREAPP
-                MemoryMarshal.CreateSpan(ref value, 1)
+                new ReadOnlySpan<char>(in value)
 #else
                 value.ToString()
 #endif
@@ -54,7 +54,7 @@ internal override void WriteAsPropertyNameCore(Utf8JsonWriter writer, char value
         {
             writer.WritePropertyName(
 #if NETCOREAPP
-                MemoryMarshal.CreateSpan(ref value, 1)
+                new ReadOnlySpan<char>(in value)
 #else
                 value.ToString()
 #endif
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializerOptionsUpdateHandler.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializerOptionsUpdateHandler.cs
index 9e1d5e4c0d95..3675f269d8e2 100644
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializerOptionsUpdateHandler.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializerOptionsUpdateHandler.cs
@@ -2,9 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
-using System.Diagnostics.CodeAnalysis;
 using System.Reflection.Metadata;
-using System.Runtime.CompilerServices;
 using System.Text.Json;
 using System.Text.Json.Serialization.Metadata;
 
@@ -25,13 +23,7 @@ public static void ClearCache(Type[]? types)
                 options.Key.ClearCaches();
             }
 
-            if (RuntimeFeature.IsDynamicCodeSupported)
-            {
-                // Flush the dynamic method cache
-#pragma warning disable IL3050 // The analyzer doesn't understand runtime feature conditions: https://github.com/dotnet/linker/issues/2715
-                ReflectionEmitCachingMemberAccessor.Clear();
-#pragma warning restore IL3050
-            }
+            DefaultJsonTypeInfoResolver.ClearMemberAccessorCaches();
         }
     }
 }
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/DefaultJsonTypeInfoResolver.Helpers.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/DefaultJsonTypeInfoResolver.Helpers.cs
index 3f929d873784..3a39315eba51 100644
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/DefaultJsonTypeInfoResolver.Helpers.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/DefaultJsonTypeInfoResolver.Helpers.cs
@@ -7,6 +7,7 @@
 using System.Reflection;
 using System.Runtime.CompilerServices;
 using System.Text.Json.Reflection;
+using System.Threading;
 
 namespace System.Text.Json.Serialization.Metadata
 {
@@ -14,23 +15,30 @@ public partial class DefaultJsonTypeInfoResolver
     {
         internal static MemberAccessor MemberAccessor
         {
+            [RequiresUnreferencedCode(JsonSerializer.SerializationRequiresDynamicCodeMessage)]
             [RequiresDynamicCode(JsonSerializer.SerializationRequiresDynamicCodeMessage)]
             get
             {
-                return s_memberAccessor ??=
+                return s_memberAccessor ?? Initialize();
+                static MemberAccessor Initialize()
+                {
+                    MemberAccessor value =
 #if NETCOREAPP
-                // if dynamic code isn't supported, fallback to reflection
-                RuntimeFeature.IsDynamicCodeSupported ?
-                    new ReflectionEmitCachingMemberAccessor() :
-                    new ReflectionMemberAccessor();
+                        // if dynamic code isn't supported, fallback to reflection
+                        RuntimeFeature.IsDynamicCodeSupported ?
+                            new ReflectionEmitCachingMemberAccessor() :
+                            new ReflectionMemberAccessor();
 #elif NETFRAMEWORK
-                    new ReflectionEmitCachingMemberAccessor();
+                            new ReflectionEmitCachingMemberAccessor();
 #else
-                    new ReflectionMemberAccessor();
+                            new ReflectionMemberAccessor();
 #endif
+                    return Interlocked.CompareExchange(ref s_memberAccessor, value, null) ?? value;
+                }
             }
         }
 
+        internal static void ClearMemberAccessorCaches() => s_memberAccessor?.Clear();
         private static MemberAccessor? s_memberAccessor;
 
         [RequiresUnreferencedCode(JsonSerializer.SerializationUnreferencedCodeMessage)]
@@ -113,6 +121,33 @@ private static void PopulateProperties(JsonTypeInfo typeInfo)
             }
         }
 
+        private const BindingFlags AllInstanceMembers =
+            BindingFlags.Instance |
+            BindingFlags.Public |
+            BindingFlags.NonPublic |
+            BindingFlags.DeclaredOnly;
+
+        /// <summary>
+        /// Looks up the type for a member matching the given name and member type.
+        /// </summary>
+        [RequiresUnreferencedCode(JsonSerializer.SerializationUnreferencedCodeMessage)]
+        internal static MemberInfo? LookupMemberInfo(Type type, MemberTypes memberType, string name)
+        {
+            Debug.Assert(memberType is MemberTypes.Field or MemberTypes.Property);
+
+            // Walk the type hierarchy starting from the current type up to the base type(s)
+            foreach (Type t in type.GetSortedTypeHierarchy())
+            {
+                MemberInfo[] members = t.GetMember(name, memberType, AllInstanceMembers);
+                if (members.Length > 0)
+                {
+                    return members[0];
+                }
+            }
+
+            return null;
+        }
+
         [RequiresUnreferencedCode(JsonSerializer.SerializationUnreferencedCodeMessage)]
         [RequiresDynamicCode(JsonSerializer.SerializationRequiresDynamicCodeMessage)]
         private static void AddMembersDeclaredBySuperType(
@@ -124,17 +159,11 @@ private static void AddMembersDeclaredBySuperType(
             Debug.Assert(!typeInfo.IsReadOnly);
             Debug.Assert(currentType.IsAssignableFrom(typeInfo.Type));
 
-            const BindingFlags BindingFlags =
-                BindingFlags.Instance |
-                BindingFlags.Public |
-                BindingFlags.NonPublic |
-                BindingFlags.DeclaredOnly;
-
             // Compiler adds RequiredMemberAttribute to type if any of the members are marked with 'required' keyword.
             bool shouldCheckMembersForRequiredMemberAttribute =
                 !constructorHasSetsRequiredMembersAttribute && currentType.HasRequiredMemberAttribute();
 
-            foreach (PropertyInfo propertyInfo in currentType.GetProperties(BindingFlags))
+            foreach (PropertyInfo propertyInfo in currentType.GetProperties(AllInstanceMembers))
             {
                 // Ignore indexers and virtual properties that have overrides that were [JsonIgnore]d.
                 if (propertyInfo.GetIndexParameters().Length > 0 ||
@@ -160,7 +189,7 @@ private static void AddMembersDeclaredBySuperType(
                 }
             }
 
-            foreach (FieldInfo fieldInfo in currentType.GetFields(BindingFlags))
+            foreach (FieldInfo fieldInfo in currentType.GetFields(AllInstanceMembers))
             {
                 bool hasJsonIncludeAtribute = fieldInfo.GetCustomAttribute<JsonIncludeAttribute>(inherit: false) != null;
                 if (hasJsonIncludeAtribute || (fieldInfo.IsPublic && typeInfo.Options.IncludeFields))
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/JsonMetadataServices.Helpers.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/JsonMetadataServices.Helpers.cs
index 965b4cea3957..5c1de5c199b1 100644
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/JsonMetadataServices.Helpers.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/JsonMetadataServices.Helpers.cs
@@ -190,6 +190,7 @@ private static JsonPropertyInfo<T> CreatePropertyInfoCore<T>(JsonPropertyInfoVal
             propertyInfo.IgnoreCondition = propertyInfoValues.IgnoreCondition;
             propertyInfo.JsonTypeInfo = propertyInfoValues.PropertyTypeInfo;
             propertyInfo.NumberHandling = propertyInfoValues.NumberHandling;
+            propertyInfo.IsSourceGenerated = true;
 
             return propertyInfo;
         }
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/JsonPropertyInfo.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/JsonPropertyInfo.cs
index e2234093474e..959490b53f19 100644
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/JsonPropertyInfo.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/JsonPropertyInfo.cs
@@ -5,6 +5,7 @@
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Reflection;
+using System.Threading;
 
 namespace System.Text.Json.Serialization.Metadata
 {
@@ -162,17 +163,39 @@ internal JsonIgnoreCondition? IgnoreCondition
         /// </remarks>
         public ICustomAttributeProvider? AttributeProvider
         {
-            get => _attributeProvider;
+            get
+            {
+                ICustomAttributeProvider attributeProvider = _attributeProvider ?? InitializeAttributeProvider();
+                return ReferenceEquals(attributeProvider, s_nullAttributeProvider) ? null : attributeProvider;
+            }
             set
             {
                 VerifyMutable();
 
-                _attributeProvider = value;
+                _attributeProvider = value ?? s_nullAttributeProvider;
             }
         }
 
-        private JsonObjectCreationHandling? _objectCreationHandling;
-        internal JsonObjectCreationHandling EffectiveObjectCreationHandling { get; private set; }
+        // Because the getter can initialize its own backing field, we want to avoid races between the getter and setter.
+        // This is done using CAS on the single _attributeProvider field which employs the following encoding:
+        // null: not initialized, s_nullAttributeProvider: null, otherwise: _attributeProvider
+        private ICustomAttributeProvider? _attributeProvider;
+        private static readonly ICustomAttributeProvider s_nullAttributeProvider = typeof(NullAttributeProviderPlaceholder);
+        private sealed class NullAttributeProviderPlaceholder;
+
+        [UnconditionalSuppressMessage("Trimming", "IL2026:RequiresUnreferencedCode",
+            Justification = "Looks up members that are already being referenced by the source generator.")]
+        private ICustomAttributeProvider InitializeAttributeProvider()
+        {
+            // If the property is source generated, perform a reflection lookup of its MemberInfo.
+            // Avoids overhead of reflection at startup and makes this method trimmable if unused.
+            ICustomAttributeProvider? provider = IsSourceGenerated && MemberName != null
+                ? DefaultJsonTypeInfoResolver.LookupMemberInfo(DeclaringType, MemberType, MemberName)
+                : null;
+
+            provider ??= s_nullAttributeProvider;
+            return Interlocked.CompareExchange(ref _attributeProvider, provider, null) ?? provider;
+        }
 
         /// <summary>
         /// Gets or sets a value indicating if the property or field should be replaced or populated during deserialization.
@@ -202,10 +225,13 @@ public JsonObjectCreationHandling? ObjectCreationHandling
             }
         }
 
-        private ICustomAttributeProvider? _attributeProvider;
+        private JsonObjectCreationHandling? _objectCreationHandling;
+        internal JsonObjectCreationHandling EffectiveObjectCreationHandling { get; private set; }
+
         internal string? MemberName { get; set; }
         internal MemberTypes MemberType { get; set; }
         internal bool IsVirtual { get; set; }
+        internal bool IsSourceGenerated { get; set; }
 
         /// <summary>
         /// Specifies whether the current property is a special extension data property.
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/MemberAccessor.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/MemberAccessor.cs
index ff6c442fa488..39605a2cff40 100644
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/MemberAccessor.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/MemberAccessor.cs
@@ -9,23 +9,16 @@ namespace System.Text.Json.Serialization.Metadata
 {
     internal abstract class MemberAccessor
     {
-        public abstract Func<object>? CreateParameterlessConstructor(
-             [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] Type type,
-             ConstructorInfo? constructorInfo);
+        public abstract Func<object>? CreateParameterlessConstructor(Type type, ConstructorInfo? constructorInfo);
 
         public abstract Func<object[], T> CreateParameterizedConstructor<T>(ConstructorInfo constructor);
 
-        public abstract JsonTypeInfo.ParameterizedConstructorDelegate<T, TArg0, TArg1, TArg2, TArg3>?
-            CreateParameterizedConstructor<T, TArg0, TArg1, TArg2, TArg3>(ConstructorInfo constructor);
+        public abstract JsonTypeInfo.ParameterizedConstructorDelegate<T, TArg0, TArg1, TArg2, TArg3>? CreateParameterizedConstructor<T, TArg0, TArg1, TArg2, TArg3>(ConstructorInfo constructor);
 
         public abstract Action<TCollection, object?> CreateAddMethodDelegate<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicMethods)] TCollection>();
 
-        [RequiresUnreferencedCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
-        [RequiresDynamicCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
         public abstract Func<IEnumerable<TElement>, TCollection> CreateImmutableEnumerableCreateRangeDelegate<TCollection, TElement>();
 
-        [RequiresUnreferencedCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
-        [RequiresDynamicCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
         public abstract Func<IEnumerable<KeyValuePair<TKey, TValue>>, TCollection> CreateImmutableDictionaryCreateRangeDelegate<TCollection, TKey, TValue>();
 
         public abstract Func<object, TProperty> CreatePropertyGetter<TProperty>(PropertyInfo propertyInfo);
@@ -35,5 +28,7 @@ public abstract JsonTypeInfo.ParameterizedConstructorDelegate<T, TArg0, TArg1, T
         public abstract Func<object, TProperty> CreateFieldGetter<TProperty>(FieldInfo fieldInfo);
 
         public abstract Action<object, TProperty> CreateFieldSetter<TProperty>(FieldInfo fieldInfo);
+
+        public virtual void Clear() { }
     }
 }
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/ReflectionEmitCachingMemberAccessor.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/ReflectionEmitCachingMemberAccessor.cs
index e30f87d76da9..4efe2a3af47f 100644
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/ReflectionEmitCachingMemberAccessor.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/ReflectionEmitCachingMemberAccessor.cs
@@ -8,54 +8,70 @@
 
 namespace System.Text.Json.Serialization.Metadata
 {
-    [RequiresDynamicCode(JsonSerializer.SerializationRequiresDynamicCodeMessage)]
     internal sealed partial class ReflectionEmitCachingMemberAccessor : MemberAccessor
     {
-        private static readonly ReflectionEmitMemberAccessor s_sourceAccessor = new();
-        private static readonly Cache<(string id, Type declaringType, MemberInfo? member)> s_cache =
-            new(slidingExpiration: TimeSpan.FromMilliseconds(1000), evictionInterval: TimeSpan.FromMilliseconds(200));
+        private readonly ReflectionEmitMemberAccessor _sourceAccessor;
+        private readonly Cache<(string id, Type declaringType, MemberInfo? member)> _cache;
 
-        public static void Clear() => s_cache.Clear();
+        [RequiresDynamicCode(JsonSerializer.SerializationRequiresDynamicCodeMessage)]
+        [RequiresUnreferencedCode(JsonSerializer.SerializationRequiresDynamicCodeMessage)]
+        public ReflectionEmitCachingMemberAccessor()
+        {
+            _sourceAccessor = new ReflectionEmitMemberAccessor();
+            _cache = new(slidingExpiration: TimeSpan.FromMilliseconds(1000), evictionInterval: TimeSpan.FromMilliseconds(200));
+        }
 
-        public override Action<TCollection, object?> CreateAddMethodDelegate<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicMethods)] TCollection>()
-            => s_cache.GetOrAdd((nameof(CreateAddMethodDelegate), typeof(TCollection), null),
-                static (_) => s_sourceAccessor.CreateAddMethodDelegate<TCollection>());
+        public override void Clear() => _cache.Clear();
 
-        public override Func<object>? CreateParameterlessConstructor([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] Type type, ConstructorInfo? ctorInfo)
-            => s_cache.GetOrAdd((nameof(CreateParameterlessConstructor), type, ctorInfo),
-                [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2077:UnrecognizedReflectionPattern",
-                    Justification = "Cannot apply DynamicallyAccessedMembersAttribute to tuple properties.")]
-#pragma warning disable IL2077 // The suppression doesn't work for the trim analyzer: https://github.com/dotnet/roslyn/issues/59746
-                static (key) => s_sourceAccessor.CreateParameterlessConstructor(key.declaringType, (ConstructorInfo?)key.member));
-#pragma warning restore IL2077
+        public override Action<TCollection, object?> CreateAddMethodDelegate<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicMethods)] TCollection>() =>
+            _cache.GetOrAdd(
+                key: (nameof(CreateAddMethodDelegate), typeof(TCollection), null),
+                _ => _sourceAccessor.CreateAddMethodDelegate<TCollection>());
 
-        public override Func<object, TProperty> CreateFieldGetter<TProperty>(FieldInfo fieldInfo)
-            => s_cache.GetOrAdd((nameof(CreateFieldGetter), typeof(TProperty), fieldInfo), static key => s_sourceAccessor.CreateFieldGetter<TProperty>((FieldInfo)key.member!));
+        public override Func<object>? CreateParameterlessConstructor(Type type, ConstructorInfo? ctorInfo) =>
+            _cache.GetOrAdd(
+                key: (nameof(CreateParameterlessConstructor), type, ctorInfo),
+                valueFactory: key => _sourceAccessor.CreateParameterlessConstructor(key.declaringType, (ConstructorInfo?)key.member));
 
-        public override Action<object, TProperty> CreateFieldSetter<TProperty>(FieldInfo fieldInfo)
-            => s_cache.GetOrAdd((nameof(CreateFieldSetter), typeof(TProperty), fieldInfo), static key => s_sourceAccessor.CreateFieldSetter<TProperty>((FieldInfo)key.member!));
+        public override Func<object, TProperty> CreateFieldGetter<TProperty>(FieldInfo fieldInfo) =>
+            _cache.GetOrAdd(
+                key: (nameof(CreateFieldGetter), typeof(TProperty), fieldInfo),
+                valueFactory: key => _sourceAccessor.CreateFieldGetter<TProperty>((FieldInfo)key.member!));
 
-        [RequiresUnreferencedCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
-        public override Func<IEnumerable<KeyValuePair<TKey, TValue>>, TCollection> CreateImmutableDictionaryCreateRangeDelegate<TCollection, TKey, TValue>()
-            => s_cache.GetOrAdd((nameof(CreateImmutableDictionaryCreateRangeDelegate), typeof((TCollection, TKey, TValue)), null),
-                static (_) => s_sourceAccessor.CreateImmutableDictionaryCreateRangeDelegate<TCollection, TKey, TValue>());
+        public override Action<object, TProperty> CreateFieldSetter<TProperty>(FieldInfo fieldInfo) =>
+            _cache.GetOrAdd(
+                key: (nameof(CreateFieldSetter), typeof(TProperty), fieldInfo),
+                valueFactory: key => _sourceAccessor.CreateFieldSetter<TProperty>((FieldInfo)key.member!));
 
-        [RequiresUnreferencedCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
-        public override Func<IEnumerable<TElement>, TCollection> CreateImmutableEnumerableCreateRangeDelegate<TCollection, TElement>()
-            => s_cache.GetOrAdd((nameof(CreateImmutableEnumerableCreateRangeDelegate), typeof((TCollection, TElement)), null),
-                static (_) => s_sourceAccessor.CreateImmutableEnumerableCreateRangeDelegate<TCollection, TElement>());
+        public override Func<IEnumerable<KeyValuePair<TKey, TValue>>, TCollection> CreateImmutableDictionaryCreateRangeDelegate<TCollection, TKey, TValue>() =>
+            _cache.GetOrAdd(
+                key: (nameof(CreateImmutableDictionaryCreateRangeDelegate), typeof((TCollection, TKey, TValue)), null),
+                valueFactory: _ => _sourceAccessor.CreateImmutableDictionaryCreateRangeDelegate<TCollection, TKey, TValue>());
 
-        public override Func<object[], T> CreateParameterizedConstructor<T>(ConstructorInfo constructor)
-            => s_cache.GetOrAdd((nameof(CreateParameterizedConstructor), typeof(T), constructor), static key => s_sourceAccessor.CreateParameterizedConstructor<T>((ConstructorInfo)key.member!));
+        public override Func<IEnumerable<TElement>, TCollection> CreateImmutableEnumerableCreateRangeDelegate<TCollection, TElement>() =>
+            _cache.GetOrAdd(
+                key: (nameof(CreateImmutableEnumerableCreateRangeDelegate), typeof((TCollection, TElement)), null),
+                valueFactory: _ => _sourceAccessor.CreateImmutableEnumerableCreateRangeDelegate<TCollection, TElement>());
 
-        public override JsonTypeInfo.ParameterizedConstructorDelegate<T, TArg0, TArg1, TArg2, TArg3>? CreateParameterizedConstructor<T, TArg0, TArg1, TArg2, TArg3>(ConstructorInfo constructor)
-            => s_cache.GetOrAdd((nameof(CreateParameterizedConstructor), typeof(T), constructor), static key => s_sourceAccessor.CreateParameterizedConstructor<T, TArg0, TArg1, TArg2, TArg3>((ConstructorInfo)key.member!));
+        public override Func<object[], T> CreateParameterizedConstructor<T>(ConstructorInfo constructor) =>
+            _cache.GetOrAdd(
+                key: (nameof(CreateParameterizedConstructor), typeof(T), constructor),
+                valueFactory: key => _sourceAccessor.CreateParameterizedConstructor<T>((ConstructorInfo)key.member!));
 
-        public override Func<object, TProperty> CreatePropertyGetter<TProperty>(PropertyInfo propertyInfo)
-            => s_cache.GetOrAdd((nameof(CreatePropertyGetter), typeof(TProperty), propertyInfo), static key => s_sourceAccessor.CreatePropertyGetter<TProperty>((PropertyInfo)key.member!));
+        public override JsonTypeInfo.ParameterizedConstructorDelegate<T, TArg0, TArg1, TArg2, TArg3>? CreateParameterizedConstructor<T, TArg0, TArg1, TArg2, TArg3>(ConstructorInfo constructor) =>
+            _cache.GetOrAdd(
+                key: (nameof(CreateParameterizedConstructor), typeof(T), constructor),
+                valueFactory: key => _sourceAccessor.CreateParameterizedConstructor<T, TArg0, TArg1, TArg2, TArg3>((ConstructorInfo)key.member!));
 
-        public override Action<object, TProperty> CreatePropertySetter<TProperty>(PropertyInfo propertyInfo)
-            => s_cache.GetOrAdd((nameof(CreatePropertySetter), typeof(TProperty), propertyInfo), static key => s_sourceAccessor.CreatePropertySetter<TProperty>((PropertyInfo)key.member!));
+        public override Func<object, TProperty> CreatePropertyGetter<TProperty>(PropertyInfo propertyInfo) =>
+            _cache.GetOrAdd(
+                key: (nameof(CreatePropertyGetter), typeof(TProperty), propertyInfo),
+                valueFactory: key => _sourceAccessor.CreatePropertyGetter<TProperty>((PropertyInfo)key.member!));
+
+        public override Action<object, TProperty> CreatePropertySetter<TProperty>(PropertyInfo propertyInfo) =>
+            _cache.GetOrAdd(
+                key: (nameof(CreatePropertySetter), typeof(TProperty), propertyInfo),
+                valueFactory: key => _sourceAccessor.CreatePropertySetter<TProperty>((PropertyInfo)key.member!));
     }
 }
 #endif
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/ReflectionEmitMemberAccessor.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/ReflectionEmitMemberAccessor.cs
index 4b0b426bdaa9..5e6f6986539e 100644
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/ReflectionEmitMemberAccessor.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/ReflectionEmitMemberAccessor.cs
@@ -10,12 +10,17 @@
 
 namespace System.Text.Json.Serialization.Metadata
 {
-    [RequiresDynamicCode(JsonSerializer.SerializationRequiresDynamicCodeMessage)]
     internal sealed class ReflectionEmitMemberAccessor : MemberAccessor
     {
-        public override Func<object>? CreateParameterlessConstructor(
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] Type type,
-            ConstructorInfo? constructorInfo)
+        [RequiresDynamicCode(JsonSerializer.SerializationRequiresDynamicCodeMessage)]
+        [RequiresUnreferencedCode(JsonSerializer.SerializationRequiresDynamicCodeMessage)]
+        public ReflectionEmitMemberAccessor()
+        {
+        }
+
+        [SuppressMessage("AOT", "IL3050:Calling members annotated with 'RequiresDynamicCodeAttribute' may break functionality when AOT compiling.",
+            Justification = "The constructor has been marked RequiresDynamicCode")]
+        public override Func<object>? CreateParameterlessConstructor(Type type, ConstructorInfo? constructorInfo)
         {
             Debug.Assert(type != null);
             Debug.Assert(constructorInfo is null || constructorInfo.GetParameters().Length == 0);
@@ -68,6 +73,8 @@ internal sealed class ReflectionEmitMemberAccessor : MemberAccessor
         public override Func<object[], T> CreateParameterizedConstructor<T>(ConstructorInfo constructor) =>
             CreateDelegate<Func<object[], T>>(CreateParameterizedConstructor(constructor));
 
+        [UnconditionalSuppressMessage("AOT", "IL3050:Calling members annotated with 'RequiresDynamicCodeAttribute' may break functionality when AOT compiling.",
+            Justification = "The constructor has been marked RequiresDynamicCode")]
         private static DynamicMethod CreateParameterizedConstructor(ConstructorInfo constructor)
         {
             Type? type = constructor.DeclaringType;
@@ -109,6 +116,8 @@ public override JsonTypeInfo.ParameterizedConstructorDelegate<T, TArg0, TArg1, T
             CreateDelegate<JsonTypeInfo.ParameterizedConstructorDelegate<T, TArg0, TArg1, TArg2, TArg3>>(
                 CreateParameterizedConstructor(constructor, typeof(TArg0), typeof(TArg1), typeof(TArg2), typeof(TArg3)));
 
+        [UnconditionalSuppressMessage("AOT", "IL3050:Calling members annotated with 'RequiresDynamicCodeAttribute' may break functionality when AOT compiling.",
+            Justification = "The constructor has been marked RequiresDynamicCode")]
         private static DynamicMethod? CreateParameterizedConstructor(ConstructorInfo constructor, Type parameterType1, Type parameterType2, Type parameterType3, Type parameterType4)
         {
             Type? type = constructor.DeclaringType;
@@ -153,6 +162,8 @@ public override JsonTypeInfo.ParameterizedConstructorDelegate<T, TArg0, TArg1, T
         public override Action<TCollection, object?> CreateAddMethodDelegate<[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicMethods)] TCollection>() =>
             CreateDelegate<Action<TCollection, object?>>(CreateAddMethodDelegate(typeof(TCollection)));
 
+        [UnconditionalSuppressMessage("AOT", "IL3050:Calling members annotated with 'RequiresDynamicCodeAttribute' may break functionality when AOT compiling.",
+            Justification = "The constructor has been marked RequiresDynamicCode")]
         private static DynamicMethod CreateAddMethodDelegate(
             [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicMethods)] Type collectionType)
         {
@@ -176,13 +187,14 @@ private static DynamicMethod CreateAddMethodDelegate(
             return dynamicMethod;
         }
 
-        [RequiresUnreferencedCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
-        [RequiresDynamicCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
         public override Func<IEnumerable<TElement>, TCollection> CreateImmutableEnumerableCreateRangeDelegate<TCollection, TElement>() =>
             CreateDelegate<Func<IEnumerable<TElement>, TCollection>>(
                 CreateImmutableEnumerableCreateRangeDelegate(typeof(TCollection), typeof(TElement), typeof(IEnumerable<TElement>)));
 
-        [RequiresUnreferencedCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
+        [UnconditionalSuppressMessage("AOT", "IL3050:Calling members annotated with 'RequiresDynamicCodeAttribute' may break functionality when AOT compiling.",
+            Justification = "The constructor has been marked RequiresDynamicCode")]
+        [UnconditionalSuppressMessage("Trimming", "IL2026:Members annotated with 'RequiresUnreferencedCodeAttribute' require dynamic access otherwise can break functionality when trimming application code",
+            Justification = "The constructor has been marked RequiresUnreferencedCode")]
         private static DynamicMethod CreateImmutableEnumerableCreateRangeDelegate(Type collectionType, Type elementType, Type enumerableType)
         {
             MethodInfo realMethod = collectionType.GetImmutableEnumerableCreateRangeMethod(elementType);
@@ -203,13 +215,14 @@ private static DynamicMethod CreateImmutableEnumerableCreateRangeDelegate(Type c
             return dynamicMethod;
         }
 
-        [RequiresUnreferencedCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
-        [RequiresDynamicCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
         public override Func<IEnumerable<KeyValuePair<TKey, TValue>>, TCollection> CreateImmutableDictionaryCreateRangeDelegate<TCollection, TKey, TValue>() =>
             CreateDelegate<Func<IEnumerable<KeyValuePair<TKey, TValue>>, TCollection>>(
                 CreateImmutableDictionaryCreateRangeDelegate(typeof(TCollection), typeof(TKey), typeof(TValue), typeof(IEnumerable<KeyValuePair<TKey, TValue>>)));
 
-        [RequiresUnreferencedCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
+        [UnconditionalSuppressMessage("AOT", "IL3050:Calling members annotated with 'RequiresDynamicCodeAttribute' may break functionality when AOT compiling.",
+            Justification = "The constructor has been marked RequiresDynamicCode")]
+        [UnconditionalSuppressMessage("Trimming", "IL2026:Members annotated with 'RequiresUnreferencedCodeAttribute' require dynamic access otherwise can break functionality when trimming application code",
+            Justification = "The constructor has been marked RequiresUnreferencedCode")]
         private static DynamicMethod CreateImmutableDictionaryCreateRangeDelegate(Type collectionType, Type keyType, Type valueType, Type enumerableType)
         {
             MethodInfo realMethod = collectionType.GetImmutableDictionaryCreateRangeMethod(keyType, valueType);
@@ -379,6 +392,8 @@ private static DynamicMethod CreateFieldSetter(FieldInfo fieldInfo, Type runtime
             return dynamicMethod;
         }
 
+        [UnconditionalSuppressMessage("AOT", "IL3050:Calling members annotated with 'RequiresDynamicCodeAttribute' may break functionality when AOT compiling.",
+            Justification = "The constructor has been marked RequiresDynamicCode")]
         private static DynamicMethod CreateGetterMethod(string memberName, Type memberType) =>
             new DynamicMethod(
                 memberName + "Getter",
@@ -387,6 +402,8 @@ private static DynamicMethod CreateGetterMethod(string memberName, Type memberTy
                 typeof(ReflectionEmitMemberAccessor).Module,
                 skipVisibility: true);
 
+        [UnconditionalSuppressMessage("AOT", "IL3050:Calling members annotated with 'RequiresDynamicCodeAttribute' may break functionality when AOT compiling.",
+            Justification = "The constructor has been marked RequiresDynamicCode")]
         private static DynamicMethod CreateSetterMethod(string memberName, Type memberType) =>
             new DynamicMethod(
                 memberName + "Setter",
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/ReflectionMemberAccessor.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/ReflectionMemberAccessor.cs
index 8627a24f3f49..cfc94496480a 100644
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/ReflectionMemberAccessor.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Metadata/ReflectionMemberAccessor.cs
@@ -10,9 +10,15 @@ namespace System.Text.Json.Serialization.Metadata
 {
     internal sealed class ReflectionMemberAccessor : MemberAccessor
     {
-        public override Func<object>? CreateParameterlessConstructor(
-            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicConstructors)] Type type,
-            ConstructorInfo? ctorInfo)
+        [RequiresDynamicCode(JsonSerializer.SerializationRequiresDynamicCodeMessage)]
+        [RequiresUnreferencedCode(JsonSerializer.SerializationRequiresDynamicCodeMessage)]
+        public ReflectionMemberAccessor()
+        {
+        }
+
+        [UnconditionalSuppressMessage("Trimming", "IL2067:Target parameter argument does not satisfy 'DynamicallyAccessedMembersAttribute' in call to target method. The parameter of method does not have matching annotations.",
+            Justification = "The constructor has been marked RequiresUnreferencedCode")]
+        public override Func<object>? CreateParameterlessConstructor(Type type, ConstructorInfo? ctorInfo)
         {
             Debug.Assert(type != null);
             Debug.Assert(ctorInfo is null || ctorInfo.GetParameters().Length == 0);
@@ -122,8 +128,10 @@ public override JsonTypeInfo.ParameterizedConstructorDelegate<T, TArg0, TArg1, T
             };
         }
 
-        [RequiresUnreferencedCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
-        [RequiresDynamicCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
+        [UnconditionalSuppressMessage("Trimming", "IL2026:Members annotated with 'RequiresUnreferencedCodeAttribute' require dynamic access otherwise can break functionality when trimming application code",
+            Justification = "The constructor has been marked RequiresUnreferencedCode")]
+        [UnconditionalSuppressMessage("AOT", "IL3050:Calling members annotated with 'RequiresDynamicCodeAttribute' may break functionality when AOT compiling.",
+            Justification = "The constructor has been marked RequiresDynamicCode")]
         public override Func<IEnumerable<TElement>, TCollection> CreateImmutableEnumerableCreateRangeDelegate<TCollection, TElement>()
         {
             MethodInfo createRange = typeof(TCollection).GetImmutableEnumerableCreateRangeMethod(typeof(TElement));
@@ -131,8 +139,10 @@ public override Func<IEnumerable<TElement>, TCollection> CreateImmutableEnumerab
                 typeof(Func<IEnumerable<TElement>, TCollection>));
         }
 
-        [RequiresUnreferencedCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
-        [RequiresDynamicCode(IEnumerableConverterFactoryHelpers.ImmutableConvertersUnreferencedCodeMessage)]
+        [UnconditionalSuppressMessage("Trimming", "IL2026:Members annotated with 'RequiresUnreferencedCodeAttribute' require dynamic access otherwise can break functionality when trimming application code",
+            Justification = "The constructor has been marked RequiresUnreferencedCode")]
+        [UnconditionalSuppressMessage("AOT", "IL3050:Calling members annotated with 'RequiresDynamicCodeAttribute' may break functionality when AOT compiling.",
+            Justification = "The constructor has been marked RequiresDynamicCode")]
         public override Func<IEnumerable<KeyValuePair<TKey, TValue>>, TCollection> CreateImmutableDictionaryCreateRangeDelegate<TCollection, TKey, TValue>()
         {
             MethodInfo createRange = typeof(TCollection).GetImmutableDictionaryCreateRangeMethod(typeof(TKey), typeof(TValue));
diff --git a/src/libraries/System.Text.Json/tests/Common/PropertyNameTests.cs b/src/libraries/System.Text.Json/tests/Common/PropertyNameTests.cs
index 021481ae5a13..9e6066fc308c 100644
--- a/src/libraries/System.Text.Json/tests/Common/PropertyNameTests.cs
+++ b/src/libraries/System.Text.Json/tests/Common/PropertyNameTests.cs
@@ -33,7 +33,7 @@ public async Task BuiltInPolicyDeserializeMatch()
             await DeserializeAndAssert(JsonNamingPolicy.SnakeCaseLower, @"{""my_int16"":1}", 1);
             await DeserializeAndAssert(JsonNamingPolicy.SnakeCaseUpper, @"{""MY_INT16"":1}", 1);
             await DeserializeAndAssert(JsonNamingPolicy.KebabCaseLower, @"{""my-int16"":1}", 1);
-            await DeserializeAndAssert(JsonNamingPolicy.KebabCaseUpper, @"{""MY-INT16"":1}", 1);            
+            await DeserializeAndAssert(JsonNamingPolicy.KebabCaseUpper, @"{""MY-INT16"":1}", 1);
         }
 
         private async Task DeserializeAndAssert(JsonNamingPolicy policy, string json, short expected)
@@ -449,7 +449,8 @@ public async Task SpecialCharacters()
                 SmtpId = 3,
                 Emojies = 4,
                 \uA000 = 5,
-                YiIt_2 = 6
+                YiIt_2 = 6,
+                PropertyNameWithWhitespace = 7,
             };
 
             string json = await Serializer.SerializeWrapper(obj);
@@ -459,7 +460,8 @@ public async Task SpecialCharacters()
                 "\"smtp-id\":3," +
                 "\"\\uD83D\\uDE00\\uD83D\\uDE01\":4," +
                 "\"\\uA000\":5," +
-                "\"\\uA000_2\":6}", json);
+                "\"\\uA000_2\":6," +
+                "\"\\u0022ab \\n\\r\\t\\f\\bc\\u0022\":7}", json);
 
             obj = await Serializer.DeserializeWrapper<ClassWithSpecialCharacters>(json);
             Assert.Equal(1, obj.Baseline);
@@ -493,6 +495,10 @@ public class ClassWithSpecialCharacters
             [JsonPropertyOrder(6)]
             [JsonPropertyName("\uA000_2")] // Valid C# property name: \uA000_2
             public int YiIt_2 { get; set; }
+
+            [JsonPropertyOrder(7)]
+            [JsonPropertyName("\"ab \n\r\t\f\bc\"")] // Regression test for https://github.com/dotnet/runtime/issues/98638
+            public int PropertyNameWithWhitespace { get; set; }
         }
 
         [Theory]
diff --git a/src/libraries/System.Text.Json/tests/Common/PropertyVisibilityTests.cs b/src/libraries/System.Text.Json/tests/Common/PropertyVisibilityTests.cs
index 44c702025f5d..f957db15a96b 100644
--- a/src/libraries/System.Text.Json/tests/Common/PropertyVisibilityTests.cs
+++ b/src/libraries/System.Text.Json/tests/Common/PropertyVisibilityTests.cs
@@ -245,7 +245,7 @@ public async Task Ignore_BasePublicPropertyIgnored_ConflictWithDerivedPrivate()
         }
 
         [Fact]
-        public async void Ignore_BasePublicPropertyIgnored_ConflictWithDerivedPublicPropertyIgnored()
+        public async Task Ignore_BasePublicPropertyIgnored_ConflictWithDerivedPublicPropertyIgnored()
         {
             var obj = new ClassWithIgnoredPublicPropertyAndNewSlotPublicAndIgnoredToo();
 
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/ContextClasses.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/ContextClasses.cs
index ed29ed8fd4e1..182981002b74 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/ContextClasses.cs
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/ContextClasses.cs
@@ -58,6 +58,7 @@ public interface ITestContext
         public JsonTypeInfo<TypeWithDerivedAttribute> TypeWithDerivedAttribute { get; }
         public JsonTypeInfo<PolymorphicClass> PolymorphicClass { get; }
         public JsonTypeInfo<PocoWithNumberHandlingAttr> PocoWithNumberHandlingAttr { get; }
+        public JsonTypeInfo<PocoWithMixedVisibilityMembers> PocoWithMixedVisibilityMembers { get; }
     }
 
     internal partial class JsonContext : JsonSerializerContext
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/MetadataAndSerializationContextTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/MetadataAndSerializationContextTests.cs
index 4ff998bd5d6d..308a6b8a1046 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/MetadataAndSerializationContextTests.cs
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/MetadataAndSerializationContextTests.cs
@@ -54,6 +54,7 @@ namespace System.Text.Json.SourceGeneration.Tests
     [JsonSerializable(typeof(TypeWithDerivedAttribute))]
     [JsonSerializable(typeof(PolymorphicClass))]
     [JsonSerializable(typeof(PocoWithNumberHandlingAttr))]
+    [JsonSerializable(typeof(PocoWithMixedVisibilityMembers))]
     internal partial class MetadataAndSerializationContext : JsonSerializerContext, ITestContext
     {
         public JsonSourceGenerationMode JsonSourceGenerationMode => JsonSourceGenerationMode.Default;
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/MetadataContextTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/MetadataContextTests.cs
index 33683dfa09c6..10e394518cdf 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/MetadataContextTests.cs
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/MetadataContextTests.cs
@@ -53,6 +53,7 @@ namespace System.Text.Json.SourceGeneration.Tests
     [JsonSerializable(typeof(TypeWithDerivedAttribute), GenerationMode = JsonSourceGenerationMode.Metadata)]
     [JsonSerializable(typeof(PolymorphicClass), GenerationMode = JsonSourceGenerationMode.Metadata)]
     [JsonSerializable(typeof(PocoWithNumberHandlingAttr), GenerationMode = JsonSourceGenerationMode.Metadata)]
+    [JsonSerializable(typeof(PocoWithMixedVisibilityMembers), GenerationMode = JsonSourceGenerationMode.Metadata)]
     internal partial class MetadataWithPerTypeAttributeContext : JsonSerializerContext, ITestContext
     {
         public JsonSourceGenerationMode JsonSourceGenerationMode => JsonSourceGenerationMode.Metadata;
@@ -156,6 +157,7 @@ public override void EnsureFastPathGeneratedAsExpected()
     [JsonSerializable(typeof(TypeWithDerivedAttribute))]
     [JsonSerializable(typeof(PolymorphicClass))]
     [JsonSerializable(typeof(PocoWithNumberHandlingAttr))]
+    [JsonSerializable(typeof(PocoWithMixedVisibilityMembers))]
     internal partial class MetadataContext : JsonSerializerContext, ITestContext
     {
         public JsonSourceGenerationMode JsonSourceGenerationMode => JsonSourceGenerationMode.Metadata;
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/MixedModeContextTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/MixedModeContextTests.cs
index 72b9a9baff09..529bd598b6ea 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/MixedModeContextTests.cs
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/MixedModeContextTests.cs
@@ -54,6 +54,7 @@ namespace System.Text.Json.SourceGeneration.Tests
     [JsonSerializable(typeof(TypeWithDerivedAttribute), GenerationMode = JsonSourceGenerationMode.Metadata | JsonSourceGenerationMode.Serialization)]
     [JsonSerializable(typeof(PolymorphicClass), GenerationMode = JsonSourceGenerationMode.Metadata | JsonSourceGenerationMode.Serialization)]
     [JsonSerializable(typeof(PocoWithNumberHandlingAttr), GenerationMode = JsonSourceGenerationMode.Metadata | JsonSourceGenerationMode.Serialization)]
+    [JsonSerializable(typeof(PocoWithMixedVisibilityMembers), GenerationMode = JsonSourceGenerationMode.Metadata | JsonSourceGenerationMode.Serialization)]
     internal partial class MixedModeContext : JsonSerializerContext, ITestContext
     {
         public JsonSourceGenerationMode JsonSourceGenerationMode => JsonSourceGenerationMode.Metadata | JsonSourceGenerationMode.Serialization;
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/RealWorldContextTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/RealWorldContextTests.cs
index 42f75fcb019d..00ce5d49d451 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/RealWorldContextTests.cs
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/RealWorldContextTests.cs
@@ -4,6 +4,7 @@
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
+using System.Reflection;
 using System.Text.Json.Serialization;
 using System.Text.Json.Serialization.Metadata;
 using Xunit;
@@ -1111,5 +1112,36 @@ public void NumberHandlingHonoredOnPoco()
                 JsonTestHelper.AssertJsonEqual(@"{""Id"":""0""}", JsonSerializer.Serialize(new PocoWithNumberHandlingAttr(), DefaultContext.PocoWithNumberHandlingAttr));
             }
         }
+
+        [Theory]
+        [InlineData(MemberTypes.Property, nameof(PocoWithMixedVisibilityMembers.PublicProperty))]
+        [InlineData(MemberTypes.Field, nameof(PocoWithMixedVisibilityMembers.PublicField))]
+        [InlineData(MemberTypes.Property, nameof(PocoWithMixedVisibilityMembers.InternalProperty))]
+        [InlineData(MemberTypes.Field, nameof(PocoWithMixedVisibilityMembers.InternalField))]
+        [InlineData(MemberTypes.Property, nameof(PocoWithMixedVisibilityMembers.PropertyWithCustomName), "customProp")]
+        [InlineData(MemberTypes.Field, nameof(PocoWithMixedVisibilityMembers.FieldWithCustomName), "customField")]
+        [InlineData(MemberTypes.Property, nameof(PocoWithMixedVisibilityMembers.BaseProperty))]
+        [InlineData(MemberTypes.Property, nameof(PocoWithMixedVisibilityMembers.ShadowProperty))]
+        public void JsonPropertyInfo_PopulatesAttributeProvider(MemberTypes memberType, string propertyName, string? jsonPropertyName = null)
+        {
+            if (DefaultContext.JsonSourceGenerationMode is JsonSourceGenerationMode.Serialization)
+            {
+                return; // No metadata generated
+            }
+
+            JsonTypeInfo typeInfo = DefaultContext.PocoWithMixedVisibilityMembers;
+            string name = jsonPropertyName ?? propertyName;
+            JsonPropertyInfo prop = typeInfo.Properties.FirstOrDefault(prop => prop.Name == name);
+            Assert.NotNull(prop);
+
+            MemberInfo memberInfo = Assert.IsAssignableFrom<MemberInfo>(prop.AttributeProvider);
+            string? actualJsonPropertyName = memberInfo.GetCustomAttribute<JsonPropertyNameAttribute>()?.Name;
+
+            Assert.True(memberInfo.DeclaringType.IsAssignableFrom(typeInfo.Type));
+            Assert.Equal(memberType, memberInfo.MemberType);
+            Assert.Equal(prop.PropertyType, memberInfo is PropertyInfo p ? p.PropertyType : ((FieldInfo)memberInfo).FieldType);
+            Assert.Equal(propertyName, memberInfo.Name);
+            Assert.Equal(jsonPropertyName, actualJsonPropertyName);
+        }
     }
 }
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/SerializationContextTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/SerializationContextTests.cs
index 5a7776640b4e..162a8b12bd7e 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/SerializationContextTests.cs
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/SerializationContextTests.cs
@@ -55,6 +55,7 @@ namespace System.Text.Json.SourceGeneration.Tests
     [JsonSerializable(typeof(TypeWithDerivedAttribute))]
     [JsonSerializable(typeof(PolymorphicClass))]
     [JsonSerializable(typeof(PocoWithNumberHandlingAttr))]
+    [JsonSerializable(typeof(PocoWithMixedVisibilityMembers))]
     internal partial class SerializationContext : JsonSerializerContext, ITestContext
     {
         public JsonSourceGenerationMode JsonSourceGenerationMode => JsonSourceGenerationMode.Serialization;
@@ -109,6 +110,7 @@ internal partial class SerializationContext : JsonSerializerContext, ITestContex
     [JsonSerializable(typeof(TypeWithDerivedAttribute), GenerationMode = JsonSourceGenerationMode.Serialization)]
     [JsonSerializable(typeof(PolymorphicClass), GenerationMode = JsonSourceGenerationMode.Serialization)]
     [JsonSerializable(typeof(PocoWithNumberHandlingAttr), GenerationMode = JsonSourceGenerationMode.Serialization)]
+    [JsonSerializable(typeof(PocoWithMixedVisibilityMembers), GenerationMode = JsonSourceGenerationMode.Serialization)]
     internal partial class SerializationWithPerTypeAttributeContext : JsonSerializerContext, ITestContext
     {
         public JsonSourceGenerationMode JsonSourceGenerationMode => JsonSourceGenerationMode.Serialization;
@@ -162,6 +164,7 @@ internal partial class SerializationWithPerTypeAttributeContext : JsonSerializer
     [JsonSerializable(typeof(TypeWithDerivedAttribute), GenerationMode = JsonSourceGenerationMode.Serialization)]
     [JsonSerializable(typeof(PolymorphicClass), GenerationMode = JsonSourceGenerationMode.Serialization)]
     [JsonSerializable(typeof(PocoWithNumberHandlingAttr), GenerationMode = JsonSourceGenerationMode.Serialization)]
+    [JsonSerializable(typeof(PocoWithMixedVisibilityMembers), GenerationMode = JsonSourceGenerationMode.Serialization)]
     internal partial class SerializationContextWithCamelCase : JsonSerializerContext, ITestContext
     {
         public JsonSourceGenerationMode JsonSourceGenerationMode => JsonSourceGenerationMode.Serialization;
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/TestClasses.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/TestClasses.cs
index be241a0271d3..64a4f13818f0 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/TestClasses.cs
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/TestClasses.cs
@@ -305,4 +305,32 @@ public class PocoWithNumberHandlingAttr
     {
         public int Id { get; set; }
     }
+
+    public class PocoWithMixedVisibilityMembersBase
+    {
+        public string BaseProperty { get; set; }
+        public string ShadowProperty { get; set; }
+    }
+
+    public class PocoWithMixedVisibilityMembers : PocoWithMixedVisibilityMembersBase
+    {
+        public string PublicProperty { get; set; }
+
+        [JsonInclude]
+        public string PublicField;
+
+        [JsonInclude]
+        internal int InternalProperty { get; set; }
+
+        [JsonInclude]
+        internal int InternalField;
+
+        [JsonPropertyName("customProp")]
+        public string PropertyWithCustomName { get; set; }
+
+        [JsonInclude, JsonPropertyName("customField")]
+        public string FieldWithCustomName;
+
+        public new int ShadowProperty { get; set; }
+    }
 }
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Unit.Tests/JsonSourceGeneratorDiagnosticsTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Unit.Tests/JsonSourceGeneratorDiagnosticsTests.cs
index a554d2681d43..5e5d83de2c09 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Unit.Tests/JsonSourceGeneratorDiagnosticsTests.cs
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Unit.Tests/JsonSourceGeneratorDiagnosticsTests.cs
@@ -11,6 +11,8 @@ namespace System.Text.Json.SourceGeneration.UnitTests
 {
     [ActiveIssue("https://github.com/dotnet/runtime/issues/58226", TestPlatforms.Browser)]
     [SkipOnCoreClr("https://github.com/dotnet/runtime/issues/71962", ~RuntimeConfiguration.Release)]
+    [SkipOnMono("https://github.com/dotnet/runtime/issues/92467")]
+    [ConditionalClass(typeof(PlatformDetection), nameof(PlatformDetection.IsNotX86Process))] // https://github.com/dotnet/runtime/issues/71962
     public class JsonSourceGeneratorDiagnosticsTests
     {
         /// <summary>
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Unit.Tests/JsonSourceGeneratorIncrementalTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Unit.Tests/JsonSourceGeneratorIncrementalTests.cs
index daa6498cbc9b..5bcb01a94bde 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Unit.Tests/JsonSourceGeneratorIncrementalTests.cs
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Unit.Tests/JsonSourceGeneratorIncrementalTests.cs
@@ -13,6 +13,8 @@ namespace System.Text.Json.SourceGeneration.UnitTests
 {
     [ActiveIssue("https://github.com/dotnet/runtime/issues/58226", TestPlatforms.Browser)]
     [SkipOnCoreClr("https://github.com/dotnet/runtime/issues/71962", ~RuntimeConfiguration.Release)]
+    [SkipOnMono("https://github.com/dotnet/runtime/issues/92467")]
+    [ConditionalClass(typeof(PlatformDetection), nameof(PlatformDetection.IsNotX86Process))] // https://github.com/dotnet/runtime/issues/71962
     public static class JsonSourceGeneratorIncrementalTests
     {
         [Theory]
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Unit.Tests/JsonSourceGeneratorTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Unit.Tests/JsonSourceGeneratorTests.cs
index eb6d0991585c..e2f08b988441 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Unit.Tests/JsonSourceGeneratorTests.cs
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Unit.Tests/JsonSourceGeneratorTests.cs
@@ -10,6 +10,8 @@ namespace System.Text.Json.SourceGeneration.UnitTests
 {
     [ActiveIssue("https://github.com/dotnet/runtime/issues/58226", TestPlatforms.Browser)]
     [SkipOnCoreClr("https://github.com/dotnet/runtime/issues/71962", ~RuntimeConfiguration.Release)]
+    [SkipOnMono("https://github.com/dotnet/runtime/issues/92467")]
+    [ConditionalClass(typeof(PlatformDetection), nameof(PlatformDetection.IsNotX86Process))] // https://github.com/dotnet/runtime/issues/71962
     public class GeneratorTests
     {
         [Fact]
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/System.Text.Json.Tests.csproj b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/System.Text.Json.Tests.csproj
index 8bec6eca8c7e..61bea23cac82 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/System.Text.Json.Tests.csproj
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/System.Text.Json.Tests.csproj
@@ -18,9 +18,10 @@
     <IncludeRemoteExecutor>true</IncludeRemoteExecutor>
   </PropertyGroup>
   <PropertyGroup Condition="'$(TargetOS)' == 'browser'">
-    <WasmXHarnessMonoArgs>--setenv=XHARNESS_LOG_TEST_START=true</WasmXHarnessMonoArgs>
+    <XunitShowProgress>true</XunitShowProgress>
     <!-- This WASM test is problematic and slow right now. This sets the xharness timeout but there is also override in sendtohelix-browser.targets -->
     <WasmXHarnessTestsTimeout>01:15:00</WasmXHarnessTestsTimeout>
+    <WasmXHarnessMaxParallelThreads>1</WasmXHarnessMaxParallelThreads>
   </PropertyGroup>
   <ItemGroup Condition="'$(ContinuousIntegrationBuild)' == 'true'">
     <HighAotMemoryUsageAssembly Include="System.Text.Json.Tests.dll" />
diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
index 7e7fed6cab65..b2ab52c4c71f 100644
--- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
+++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
@@ -10,6 +10,7 @@
 using System.Globalization;
 using System.IO;
 using System.Linq;
+using System.Runtime.CompilerServices;
 using System.Security.Cryptography;
 using System.Threading;
 using Microsoft.CodeAnalysis;
@@ -390,11 +391,13 @@ private static void AddIsECMABoundaryHelper(Dictionary<string, string[]> require
         /// <summary>Adds a SearchValues instance declaration to the required helpers collection if the chars are ASCII.</summary>
         private static string EmitSearchValuesOrLiteral(ReadOnlySpan<char> chars, Dictionary<string, string[]> requiredHelpers)
         {
-            // SearchValues<char> is faster than a regular IndexOfAny("abcd") for sets of 4/5 values iff they are ASCII.
-            // Only emit SearchValues instances when we know they'll be faster to avoid increasing the startup cost too much.
-            Debug.Assert(chars.Length is 4 or 5);
+            Debug.Assert(chars.Length > 3);
 
-            return RegexCharClass.IsAscii(chars)
+            // IndexOfAny(SearchValues) is faster than a regular IndexOfAny("abcd") if:
+            // - There are more than 5 characters in the needle, or
+            // - There are only 4 or 5 characters in the needle and they're all ASCII.
+
+            return chars.Length > 5 || RegexCharClass.IsAscii(chars)
                 ? EmitSearchValues(chars.ToArray(), requiredHelpers)
                 : Literal(chars.ToString());
         }
@@ -415,7 +418,7 @@ private static string EmitSearchValues(char[] chars, Dictionary<string, string[]
                         bitmap[c >> 3] |= (byte)(1 << (c & 7));
                     }
 
-                    string hexBitmap = BitConverter.ToString(bitmap).Replace("-", string.Empty);
+                    string hexBitmap = ToHexStringNoDashes(bitmap);
 
                     fieldName = hexBitmap switch
                     {
@@ -732,6 +735,11 @@ private static void EmitTryFindNextPossibleStartingPosition(IndentedTextWriter w
                             EmitIndexOfString_RightToLeft();
                             break;
 
+                        case FindNextStartingPositionMode.LeadingStrings_LeftToRight:
+                        case FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight:
+                            EmitIndexOfStrings_LeftToRight();
+                            break;
+
                         case FindNextStartingPositionMode.LeadingSet_LeftToRight:
                         case FindNextStartingPositionMode.FixedDistanceSets_LeftToRight:
                             EmitFixedSet_LeftToRight();
@@ -1041,6 +1049,37 @@ UnicodeCategory.NonSpacingMark or
                 }
             }
 
+            // Emits a case-sensitive left-to-right search for any one of multiple leading prefixes.
+            void EmitIndexOfStrings_LeftToRight()
+            {
+                RegexFindOptimizations opts = regexTree.FindOptimizations;
+                Debug.Assert(opts.FindMode is FindNextStartingPositionMode.LeadingStrings_LeftToRight or FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight);
+
+                string prefixes = string.Join(", ", opts.LeadingPrefixes.Select(prefix => Literal(prefix)));
+                StringComparison stringComparison = opts.FindMode is FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight ?
+                    StringComparison.OrdinalIgnoreCase :
+                    StringComparison.Ordinal;
+                string fieldName = GetSHA256FieldName($"s_indexOfAnyStrings_{stringComparison}_", prefixes);
+
+                if (!requiredHelpers.ContainsKey(fieldName))
+                {
+                    requiredHelpers.Add(fieldName,
+                    [
+                        $"/// <summary>Supports searching for the specified strings.</summary>",
+                        $"internal static readonly SearchValues<string> {fieldName} = SearchValues.Create([{prefixes}], StringComparison.{stringComparison});", // explicitly using an array in case prefixes is large
+                    ]);
+                }
+
+                writer.WriteLine($"// The pattern has multiple strings that could begin the match. Search for any of them.");
+                writer.WriteLine($"// If none can be found, there's no match.");
+                writer.WriteLine($"int i = inputSpan.Slice(pos).IndexOfAny({HelpersTypeName}.{fieldName});");
+                using (EmitBlock(writer, "if (i >= 0)"))
+                {
+                    writer.WriteLine("base.runtextpos = pos + i;");
+                    writer.WriteLine("return true;");
+                }
+            }
+
             // Emits a case-sensitive right-to-left search for a substring.
             void EmitIndexOfString_RightToLeft()
             {
@@ -1416,6 +1455,16 @@ private static void EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, Reg
             HashSet<string> additionalDeclarations = new();
             Dictionary<string, string[]> additionalLocalFunctions = new();
 
+            // In debug builds, additional code is emitted to validate that the backtracking stack is being maintained appropriately.
+            // When state is pushed onto the backtracking stack, an additional known value is pushed, and when it's popped, it's
+            // the popped value is checked against that known value, throwing an exception if they don't match. This validation code
+            // is currently not part of RegexCompiler, though it could be added there in the future if desired.
+#if DEBUG
+#pragma warning disable RS1035 // Random isn't always deterministic, but this is only for debug builds, and we've seeded the Random with a constant
+            Random stackCookieGenerator = new(12345); // seed for deterministic behavior
+#pragma warning restore RS1035
+#endif
+
             // Declare some locals.
             string sliceSpan = "slice";
             writer.WriteLine("int pos = base.runtextpos;");
@@ -1618,7 +1667,7 @@ void EmitAlternation(RegexNode node)
                 }
 
                 // Detect whether every branch begins with one or more unique characters.
-                const int SetCharsSize = 5; // arbitrary limit (for IgnoreCase, we want this to be at least 3 to handle the vast majority of values)
+                const int SetCharsSize = 64; // arbitrary limit; we want it to be large enough to handle ignore-case of common sets, like hex, the latin alphabet, etc.
                 Span<char> setChars = stackalloc char[SetCharsSize];
                 if (useSwitchedBranches)
                 {
@@ -1628,8 +1677,10 @@ void EmitAlternation(RegexNode node)
                     var seenChars = new HashSet<char>();
                     for (int i = 0; i < childCount && useSwitchedBranches; i++)
                     {
-                        // If it's not a One, Multi, or Set, we can't apply this optimization.
-                        if (node.Child(i).FindBranchOneMultiOrSetStart() is not RegexNode oneMultiOrSet)
+                        // Look for the guaranteed starting node that's a one, multi, set,
+                        // or loop of one of those with at least one minimum iteration. We need to exclude notones.
+                        if (node.Child(i).FindStartingLiteralNode(allowZeroWidth: false) is not RegexNode startingLiteralNode ||
+                            startingLiteralNode.IsNotoneFamily)
                         {
                             useSwitchedBranches = false;
                             break;
@@ -1637,9 +1688,9 @@ void EmitAlternation(RegexNode node)
 
                         // If it's a One or a Multi, get the first character and add it to the set.
                         // If it was already in the set, we can't apply this optimization.
-                        if (oneMultiOrSet.Kind is RegexNodeKind.One or RegexNodeKind.Multi)
+                        if (startingLiteralNode.IsOneFamily || startingLiteralNode.Kind is RegexNodeKind.Multi)
                         {
-                            if (!seenChars.Add(oneMultiOrSet.FirstCharOfOneOrMulti()))
+                            if (!seenChars.Add(startingLiteralNode.FirstCharOfOneOrMulti()))
                             {
                                 useSwitchedBranches = false;
                                 break;
@@ -1649,10 +1700,10 @@ void EmitAlternation(RegexNode node)
                         {
                             // The branch begins with a set.  Make sure it's a set of only a few characters
                             // and get them.  If we can't, we can't apply this optimization.
-                            Debug.Assert(oneMultiOrSet.Kind is RegexNodeKind.Set);
+                            Debug.Assert(startingLiteralNode.IsSetFamily);
                             int numChars;
-                            if (RegexCharClass.IsNegated(oneMultiOrSet.Str!) ||
-                                (numChars = RegexCharClass.GetSetChars(oneMultiOrSet.Str!, setChars)) == 0)
+                            if (RegexCharClass.IsNegated(startingLiteralNode.Str!) ||
+                                (numChars = RegexCharClass.GetSetChars(startingLiteralNode.Str!, setChars)) == 0)
                             {
                                 useSwitchedBranches = false;
                                 break;
@@ -1694,7 +1745,7 @@ void EmitSwitchedBranches()
                     writer.WriteLine();
 
                     // Emit a switch statement on the first char of each branch.
-                    using (EmitBlock(writer, $"switch ({sliceSpan}[{sliceStaticPos++}])"))
+                    using (EmitBlock(writer, $"switch ({sliceSpan}[{sliceStaticPos}])"))
                     {
                         Span<char> setChars = stackalloc char[SetCharsSize]; // needs to be same size as detection check in caller
                         int startingSliceStaticPos = sliceStaticPos;
@@ -1704,56 +1755,74 @@ void EmitSwitchedBranches()
                         {
                             sliceStaticPos = startingSliceStaticPos;
 
-                            RegexNode child = node.Child(i);
-                            Debug.Assert(child.Kind is RegexNodeKind.One or RegexNodeKind.Multi or RegexNodeKind.Set or RegexNodeKind.Concatenate, DescribeNode(child, rm));
-                            Debug.Assert(child.Kind is not RegexNodeKind.Concatenate || (child.ChildCount() >= 2 && child.Child(0).Kind is RegexNodeKind.One or RegexNodeKind.Multi or RegexNodeKind.Set));
+                            // We know we're only in this code if every branch has a valid starting literal node. Get it.
+                            // We also get the immediate child. Ideally they're the same, in which case we might be able to
+                            // use the switch as the processing of that node, e.g. if the node is a One, then by matching the
+                            // literal via the switch, we've fully processed it. But there may be other cases in which it's not
+                            // sufficient, e.g. if that one was wrapped in a Capture, we still want to emit the capture code,
+                            // and for simplicity, we still end up emitting the re-evaluation of that character. It's still much
+                            // cheaper to do this than to emit the full alternation code.
 
-                            RegexNode? childStart = child.FindBranchOneMultiOrSetStart();
-                            Debug.Assert(childStart is not null, "Unexpectedly couldn't find the branch starting node.");
+                            RegexNode child = node.Child(i);
+                            RegexNode? startingLiteralNode = child.FindStartingLiteralNode(allowZeroWidth: false);
+                            Debug.Assert(startingLiteralNode is not null, "Unexpectedly couldn't find the branch starting node.");
 
-                            if (childStart.Kind is RegexNodeKind.Set)
+                            // Emit the case for this branch to match on the first character.
+                            if (startingLiteralNode.IsSetFamily)
                             {
-                                int numChars = RegexCharClass.GetSetChars(childStart.Str!, setChars);
+                                int numChars = RegexCharClass.GetSetChars(startingLiteralNode.Str!, setChars);
                                 Debug.Assert(numChars != 0);
                                 writer.WriteLine($"case {string.Join(" or ", setChars.Slice(0, numChars).ToArray().Select(Literal))}:");
                             }
                             else
                             {
-                                writer.WriteLine($"case {Literal(childStart.FirstCharOfOneOrMulti())}:");
+                                writer.WriteLine($"case {Literal(startingLiteralNode.FirstCharOfOneOrMulti())}:");
                             }
                             writer.Indent++;
 
                             // Emit the code for the branch, without the first character that was already matched in the switch.
+                            RegexNode? remainder = null;
+                            HandleChild:
                             switch (child.Kind)
                             {
+                                case RegexNodeKind.One:
+                                case RegexNodeKind.Set:
+                                    // The character was handled entirely by the switch. No additional matching is needed.
+                                    sliceStaticPos++;
+                                    break;
+
                                 case RegexNodeKind.Multi:
-                                    EmitNode(CloneMultiWithoutFirstChar(child));
+                                    // First character was handled by the switch. Emit matching code for the remainder of the multi string.
+                                    sliceStaticPos++;
+                                    EmitNode(child.Str!.Length == 2 ?
+                                        new RegexNode(RegexNodeKind.One, child.Options, child.Str![1]) :
+                                        new RegexNode(RegexNodeKind.Multi, child.Options, child.Str!.Substring(1)));
                                     writer.WriteLine();
                                     break;
 
-                                case RegexNodeKind.Concatenate:
-                                    var newConcat = new RegexNode(RegexNodeKind.Concatenate, child.Options);
-                                    if (childStart.Kind == RegexNodeKind.Multi)
-                                    {
-                                        newConcat.AddChild(CloneMultiWithoutFirstChar(childStart));
-                                    }
-                                    int concatChildCount = child.ChildCount();
-                                    for (int j = 1; j < concatChildCount; j++)
-                                    {
-                                        newConcat.AddChild(child.Child(j));
-                                    }
-                                    EmitNode(newConcat.Reduce());
-                                    writer.WriteLine();
+                                case RegexNodeKind.Concatenate when child.Child(0) == startingLiteralNode && (startingLiteralNode.Kind is RegexNodeKind.One or RegexNodeKind.Set or RegexNodeKind.Multi):
+                                    // This is a concatenation where its first node is the starting literal we found and that starting literal
+                                    // is one of the nodes above that we know how to handle completely. This is a common
+                                    // enough case that we want to special-case it to avoid duplicating the processing for that character
+                                    // unnecessarily. So, we'll shave off that first node from the concatenation and then handle the remainder.
+                                    // Note that it's critical startingLiteralNode is something we can fully handle above: if it's not,
+                                    // we'll end up losing some of the pattern due to overwriting `remainder`.
+                                    remainder = child;
+                                    child = child.Child(0);
+                                    remainder.ReplaceChild(0, new RegexNode(RegexNodeKind.Empty, remainder.Options));
+                                    goto HandleChild; // reprocess just the first node that was saved; the remainder will then be processed below
+
+                                default:
+                                    Debug.Assert(remainder is null);
+                                    remainder = child;
                                     break;
+                            }
 
-                                    static RegexNode CloneMultiWithoutFirstChar(RegexNode node)
-                                    {
-                                        Debug.Assert(node.Kind is RegexNodeKind.Multi);
-                                        Debug.Assert(node.Str!.Length >= 2);
-                                        return node.Str!.Length == 2 ?
-                                            new RegexNode(RegexNodeKind.One, node.Options, node.Str![1]) :
-                                            new RegexNode(RegexNodeKind.Multi, node.Options, node.Str!.Substring(1));
-                                    }
+                            if (remainder is not null)
+                            {
+                                // Emit a full match for whatever part of the child we haven't yet handled.
+                                EmitNode(remainder);
+                                writer.WriteLine();
                             }
 
                             // This is only ever used for atomic alternations, so we can simply reset the doneLabel
@@ -1857,6 +1926,7 @@ void EmitAllBranches()
                         additionalDeclarations.Add($"int {currentBranch} = 0;");
                     }
 
+                    int stackCookie = CreateStackCookie();
                     for (int i = 0; i < childCount; i++)
                     {
                         // If the alternation isn't atomic, backtracking may require our jump table jumping back
@@ -1896,7 +1966,7 @@ void EmitAllBranches()
                                 // the relevant state is stored in our locals.
                                 if (currentBranch is null)
                                 {
-                                    EmitStackPush(startingCapturePos is not null ?
+                                    EmitStackPush(stackCookie + i, startingCapturePos is not null ?
                                         [i.ToString(), startingPos, startingCapturePos] :
                                         [i.ToString(), startingPos]);
                                 }
@@ -1966,11 +2036,12 @@ void EmitAllBranches()
                         string switchClause;
                         if (currentBranch is null)
                         {
-                            // We're in a loop, so we use the backtracking stack to persist our state. Pop it off.
-                            EmitStackPop(startingCapturePos is not null ?
+                            // We're in a loop, so we use the backtracking stack to persist our state.
+                            // Pop it off and validate the stack position.
+                            EmitStackPop(0, startingCapturePos is not null ?
                                 [startingCapturePos, startingPos] :
                                 [startingPos]);
-                            switchClause = StackPop();
+                            switchClause = ValidateStackCookieWithAdditionAndReturnPoppedStack(stackCookie);
                         }
                         else
                         {
@@ -2070,6 +2141,7 @@ void EmitBackreferenceConditional(RegexNode node)
 
                 // We're branching in a complicated fashion.  Make sure sliceStaticPos is 0.
                 TransferSliceStaticPosToPos();
+                int stackCookie = CreateStackCookie();
 
                 // Get the capture number to test.
                 int capnum = RegexParser.MapCaptureNumber(node.M, rm.Tree.CaptureNumberSparseMapping);
@@ -2201,7 +2273,7 @@ void EmitBackreferenceConditional(RegexNode node)
                     // the local.
                     if (isInLoop)
                     {
-                        EmitStackPop(resumeAt);
+                        EmitStackPop(stackCookie, resumeAt);
                     }
                     using (EmitBlock(writer, $"switch ({resumeAt})"))
                     {
@@ -2230,7 +2302,7 @@ void EmitBackreferenceConditional(RegexNode node)
                     // so finish outputting our backtracking logic, which involves pushing onto the stack which
                     // branch to backtrack into.  If we're not in a loop, though, nothing else can overwrite this local
                     // in the interim, so we can avoid pushing it.
-                    EmitStackPush(resumeAt);
+                    EmitStackPush(stackCookie, resumeAt);
                 }
             }
 
@@ -2298,10 +2370,19 @@ void EmitExpressionConditional(RegexNode node)
                 writer.WriteLine();
                 int startingSliceStaticPos = sliceStaticPos;
 
-                // Emit the child. The condition expression is a zero-width assertion, which is atomic,
+                // Emit the condition. The condition expression is a zero-width assertion, which is atomic,
                 // so prevent backtracking into it.
                 writer.WriteLine("// Condition:");
-                EmitNode(condition);
+                if (rm.Analysis.MayBacktrack(condition))
+                {
+                    // Condition expressions are treated like positive lookarounds and thus are implicitly atomic,
+                    // so we need to emit the node as atomic if it might backtrack.
+                    EmitAtomic(node, null);
+                }
+                else
+                {
+                    EmitNode(condition);
+                }
                 writer.WriteLine();
                 doneLabel = originalDoneLabel;
 
@@ -2380,11 +2461,13 @@ void EmitExpressionConditional(RegexNode node)
                     doneLabel = backtrack;
                     MarkLabel(backtrack, emitSemicolon: false);
 
+                    int stackCookie = CreateStackCookie();
+
                     if (isInLoop)
                     {
                         // If we're not in a loop, the local will maintain its value until backtracking occurs.
                         // If we are in a loop, multiple iterations need their own value, so we need to use the stack.
-                        EmitStackPop(resumeAt);
+                        EmitStackPop(stackCookie, resumeAt);
                     }
 
                     using (EmitBlock(writer, $"switch ({resumeAt})"))
@@ -2405,7 +2488,7 @@ void EmitExpressionConditional(RegexNode node)
                     MarkLabel(endConditional, emitSemicolon: !isInLoop);
                     if (isInLoop)
                     {
-                        EmitStackPush(resumeAt);
+                        EmitStackPush(stackCookie, resumeAt);
                     }
                 }
             }
@@ -2477,12 +2560,13 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null)
                     // pushes/pops the starting position before falling through.
                     writer.WriteLine();
 
+                    int stackCookie = CreateStackCookie();
                     if (isInLoop)
                     {
                         // If we're in a loop, different iterations of the loop need their own
                         // starting position, so push it on to the stack.  If we're not in a loop,
                         // the local will maintain its value and will suffice.
-                        EmitStackPush(startingPos);
+                        EmitStackPush(stackCookie, startingPos);
                     }
 
                     // Skip past the backtracking section
@@ -2495,7 +2579,7 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null)
                     MarkLabel(backtrack, emitSemicolon: false);
                     if (isInLoop)
                     {
-                        EmitStackPop(startingPos);
+                        EmitStackPop(stackCookie, startingPos);
                     }
                     Goto(doneLabel);
                     writer.WriteLine();
@@ -2589,6 +2673,7 @@ void EmitNegativeLookaroundAssertion(RegexNode node)
                 RegexNode child = node.Child(0);
 
                 // Ensure we're able to uncapture anything captured by the child.
+                int stackCookie = CreateStackCookie();
                 bool isInLoop = false;
                 string? capturePos = null;
                 bool hasCaptures = rm.Analysis.MayContainCapture(child);
@@ -2599,7 +2684,7 @@ void EmitNegativeLookaroundAssertion(RegexNode node)
                     isInLoop = rm.Analysis.IsInLoop(node);
                     if (isInLoop)
                     {
-                        EmitStackPush("base.Crawlpos()");
+                        EmitStackPush(stackCookie, "base.Crawlpos()");
                     }
                     else
                     {
@@ -2623,6 +2708,12 @@ void EmitNegativeLookaroundAssertion(RegexNode node)
                 // If the generated code ends up here, it matched the lookaround, which actually
                 // means failure for a _negative_ lookaround, so we need to jump to the original done.
                 writer.WriteLine();
+                if (hasCaptures && isInLoop)
+                {
+                    // Pop the crawl position from the stack.
+                    writer.WriteLine("stackpos--;");
+                    EmitStackCookieValidate(stackCookie);
+                }
                 Goto(originalDoneLabel);
                 writer.WriteLine();
 
@@ -2637,7 +2728,15 @@ void EmitNegativeLookaroundAssertion(RegexNode node)
                 // And uncapture anything if necessary. Negative lookaround captures don't persist beyond the lookaround.
                 if (hasCaptures)
                 {
-                    EmitUncaptureUntil(isInLoop ? StackPop() : capturePos!);
+                    if (isInLoop)
+                    {
+                        EmitUncaptureUntil(StackPop());
+                        EmitStackCookieValidate(stackCookie);
+                    }
+                    else
+                    {
+                        EmitUncaptureUntil(capturePos!);
+                    }
                 }
 
                 doneLabel = originalDoneLabel;
@@ -2817,8 +2916,8 @@ void EmitNode(RegexNode node, RegexNode? subsequent = null, bool emitLengthCheck
             // Emits the node for an atomic.
             void EmitAtomic(RegexNode node, RegexNode? subsequent)
             {
-                Debug.Assert(node.Kind is RegexNodeKind.Atomic or RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround, $"Unexpected type: {node.Kind}");
-                Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}");
+                Debug.Assert(node.Kind is RegexNodeKind.Atomic or RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround or RegexNodeKind.ExpressionConditional, $"Unexpected type: {node.Kind}");
+                Debug.Assert(node.Kind is RegexNodeKind.ExpressionConditional ? node.ChildCount() >= 1 : node.ChildCount() == 1, $"Unexpected number of children: {node.ChildCount()}");
                 Debug.Assert(rm.Analysis.MayBacktrack(node.Child(0)), "Expected child to potentially backtrack");
 
                 // Grab the current done label and the current backtracking position.  The purpose of the atomic node
@@ -3227,6 +3326,7 @@ void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitL
                 // point we decrement the matched count as long as it's above the minimum
                 // required, and try again by flowing to everything that comes after this.
                 MarkLabel(backtrackingLabel, emitSemicolon: false);
+                int stackCookie = CreateStackCookie();
                 string? capturePos = null;
                 if (isInLoop)
                 {
@@ -3239,7 +3339,7 @@ void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitL
                     {
                         EmitUncaptureUntil(StackPop());
                     }
-                    EmitStackPop(endingPos, startingPos);
+                    EmitStackPop(stackCookie, endingPos, startingPos);
                 }
                 else if (expressionHasCaptures)
                 {
@@ -3294,7 +3394,7 @@ void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitL
                     // We're in a loop and thus can't rely on locals correctly holding the state we
                     // need (the locals could be overwritten by a subsequent iteration).  Push the state
                     // on to the backtracking stack.
-                    EmitStackPush(expressionHasCaptures ?
+                    EmitStackPush(stackCookie, expressionHasCaptures ?
                         [startingPos, endingPos, "base.Crawlpos()"] :
                         [startingPos, endingPos]);
                 }
@@ -3412,11 +3512,10 @@ void EmitSingleCharLazy(RegexNode node, RegexNode? subsequent = null, bool emitL
                 {
                     if (iterationCount is null &&
                         node.Kind is RegexNodeKind.Notonelazy &&
-                        subsequent?.FindStartingLiteral(4) is RegexNode.StartingLiteralData literal && // 5 == max efficiently optimized by IndexOfAny, and we need to reserve 1 for node.Ch
+                        subsequent?.FindStartingLiteral() is RegexNode.StartingLiteralData literal &&
                         !literal.Negated && // not negated; can't search for both the node.Ch and a negated subsequent char with an IndexOf* method
                         (literal.String is not null ||
                          literal.SetChars is not null ||
-                         (literal.AsciiChars is not null && node.Ch < 128) || // for ASCII sets, only allow when the target can be efficiently included in the set
                          literal.Range.LowInclusive == literal.Range.HighInclusive ||
                          (literal.Range.LowInclusive <= node.Ch && node.Ch <= literal.Range.HighInclusive))) // for ranges, only allow when the range overlaps with the target, since there's no accelerated way to search for the union
                     {
@@ -3448,18 +3547,6 @@ literal.SetChars is not null ||
                                 (false, _) => $"{startingPos} = {sliceSpan}.IndexOfAny({EmitSearchValuesOrLiteral($"{node.Ch}{literal.SetChars}".AsSpan(), requiredHelpers)});",
                             });
                         }
-                        else if (literal.AsciiChars is not null) // set of only ASCII characters
-                        {
-                            char[] asciiChars = literal.AsciiChars;
-                            overlap = asciiChars.Contains(node.Ch);
-                            if (!overlap)
-                            {
-                                Debug.Assert(node.Ch < 128);
-                                Array.Resize(ref asciiChars, asciiChars.Length + 1);
-                                asciiChars[asciiChars.Length - 1] = node.Ch;
-                            }
-                            writer.WriteLine($"{startingPos} = {sliceSpan}.IndexOfAny({EmitSearchValues(asciiChars, requiredHelpers)});");
-                        }
                         else if (literal.Range.LowInclusive == literal.Range.HighInclusive) // single char from a RegexNode.One
                         {
                             overlap = literal.Range.LowInclusive == node.Ch;
@@ -3535,9 +3622,10 @@ node.Kind is RegexNodeKind.Setlazy &&
                 if (isInLoop)
                 {
                     writer.WriteLine();
+                    int stackCookie = CreateStackCookie();
 
                     // Store the loop's state.
-                    EmitStackPush(
+                    EmitStackPush(stackCookie,
                         capturePos is not null && iterationCount is not null ? [startingPos, capturePos, iterationCount] :
                         capturePos is not null ? [startingPos, capturePos] :
                         iterationCount is not null ? [startingPos, iterationCount] :
@@ -3553,7 +3641,7 @@ node.Kind is RegexNodeKind.Setlazy &&
                     MarkLabel(backtrack, emitSemicolon: false);
 
                     // Restore the loop's state.
-                    EmitStackPop(
+                    EmitStackPop(stackCookie,
                         capturePos is not null && iterationCount is not null ? [iterationCount, capturePos, startingPos] :
                         capturePos is not null ? [capturePos, startingPos] :
                         iterationCount is not null ? [iterationCount, startingPos] :
@@ -3640,8 +3728,13 @@ void EmitLazy(RegexNode node)
                 // iterations, this state needs to be stored on to the backtracking stack.
                 if (!isAtomic)
                 {
-                    int entriesPerIteration = 1/*pos*/ + (iterationMayBeEmpty ? 2/*startingPos+sawEmpty*/ : 0) + (expressionHasCaptures ? 1/*Crawlpos*/ : 0);
-                    EmitStackPush(
+                    int stackCookie = CreateStackCookie();
+                    int entriesPerIteration =
+                        1/*pos*/ +
+                        (iterationMayBeEmpty ? 2/*startingPos+sawEmpty*/ : 0) +
+                        (expressionHasCaptures ? 1/*Crawlpos*/ : 0) +
+                        (stackCookie != 0 ? 1 : 0);
+                    EmitStackPush(stackCookie,
                         expressionHasCaptures && iterationMayBeEmpty ? ["pos", startingPos!, sawEmpty!, "base.Crawlpos()"] :
                         iterationMayBeEmpty ? ["pos", startingPos!, sawEmpty!] :
                         expressionHasCaptures ? ["pos", "base.Crawlpos()"] :
@@ -3721,7 +3814,7 @@ void EmitLazy(RegexNode node)
                         {
                             EmitUncaptureUntil(StackPop());
                         }
-                        EmitStackPop(iterationMayBeEmpty ?
+                        EmitStackPop(stackCookie, iterationMayBeEmpty ?
                             [sawEmpty!, startingPos!, "pos"] :
                             ["pos"]);
                         SliceInputSpan();
@@ -3778,7 +3871,8 @@ void EmitLazy(RegexNode node)
                     // of another loop, then any number of iterations might have such state that needs to be stored,
                     // and thus it needs to be pushed on to the backtracking stack.
                     bool isInLoop = rm.Analysis.IsInLoop(node);
-                    EmitStackPush(
+                    stackCookie = CreateStackCookie();
+                    EmitStackPush(stackCookie,
                         !isInLoop ? (expressionHasCaptures ? ["pos", "base.Crawlpos()"] : ["pos"]) :
                         iterationMayBeEmpty ? (expressionHasCaptures ? ["pos", iterationCount, startingPos!, sawEmpty!, "base.Crawlpos()"] : ["pos", iterationCount, startingPos!, sawEmpty!]) :
                         expressionHasCaptures ? ["pos", iterationCount, "base.Crawlpos()"] :
@@ -3800,7 +3894,7 @@ void EmitLazy(RegexNode node)
                     {
                         EmitUncaptureUntil(StackPop());
                     }
-                    EmitStackPop(
+                    EmitStackPop(stackCookie,
                         !isInLoop ? ["pos"] :
                         iterationMayBeEmpty ? [sawEmpty!, startingPos!, iterationCount, "pos"] :
                         [iterationCount, "pos"]);
@@ -4183,6 +4277,7 @@ void EmitLoop(RegexNode node)
 
                 int minIterations = node.M;
                 int maxIterations = node.N;
+                int stackCookie = CreateStackCookie();
 
                 // Special-case some repeaters.
                 if (minIterations == maxIterations)
@@ -4261,7 +4356,7 @@ void EmitLoop(RegexNode node)
                 // need to know where each iteration began so when backtracking we can jump back to that location.  This is
                 // true even if the loop is atomic, as we might need to backtrack within the loop in order to match the
                 // minimum iteration count.
-                EmitStackPush(
+                EmitStackPush(stackCookie,
                     expressionHasCaptures && iterationMayBeEmpty ? ["base.Crawlpos()", startingPos!, "pos"] :
                     expressionHasCaptures ? ["base.Crawlpos()", "pos"] :
                     iterationMayBeEmpty ? [startingPos!, "pos"] :
@@ -4371,13 +4466,14 @@ void EmitLoop(RegexNode node)
                     writer.WriteLine("// Unable to match the remainder of the expression after exhausting the loop.");
                     Goto(originalDoneLabel);
                 }
-                EmitStackPop(iterationMayBeEmpty ?
+                EmitStackPop(0, iterationMayBeEmpty ? // stack cookie handled is explicitly 0 to handle it below
                     ["pos", startingPos!] :
                     ["pos"]);
                 if (expressionHasCaptures)
                 {
                     EmitUncaptureUntil(StackPop());
                 }
+                EmitStackCookieValidate(stackCookie);
                 SliceInputSpan();
 
                 // If there's a required minimum iteration count, validate now that we've processed enough iterations.
@@ -4487,7 +4583,8 @@ void EmitLoop(RegexNode node)
                         writer.WriteLine();
 
                         // Store the loop's state
-                        EmitStackPush(
+                        stackCookie = CreateStackCookie();
+                        EmitStackPush(stackCookie,
                             startingPos is not null && startingStackpos is not null ? [startingPos, startingStackpos, iterationCount] :
                             startingPos is not null ? [startingPos, iterationCount] :
                             startingStackpos is not null ? [startingStackpos, iterationCount] :
@@ -4501,7 +4598,7 @@ void EmitLoop(RegexNode node)
                         // Emit a backtracking section that restores the loop's state and then jumps to the previous done label
                         string backtrack = ReserveName("LoopBacktrack");
                         MarkLabel(backtrack, emitSemicolon: false);
-                        EmitStackPop(
+                        EmitStackPop(stackCookie,
                             startingPos is not null && startingStackpos is not null ? [iterationCount, startingStackpos, startingPos] :
                             startingPos is not null ? [iterationCount, startingPos] :
                             startingStackpos is not null ? [iterationCount, startingStackpos] :
@@ -4552,7 +4649,7 @@ void EmitUncaptureUntil(string capturepos)
             }
 
             /// <summary>Pushes values on to the backtracking stack.</summary>
-            void EmitStackPush(params string[] args)
+            void EmitStackPush(int stackCookie, params string[] args)
             {
                 Debug.Assert(args.Length is >= 1);
 
@@ -4596,42 +4693,135 @@ void EmitStackPush(params string[] args)
                     requiredHelpers.Add(key, lines);
                 }
 
+                if (stackCookie != 0)
+                {
+                    EmitStackCookie(stackCookie);
+                }
                 writer.WriteLine($"{HelpersTypeName}.{MethodName}(ref base.runstack!, ref stackpos, {string.Join(", ", args)});");
             }
 
             /// <summary>Pops values from the backtracking stack into the specified locations.</summary>
-            void EmitStackPop(params string[] args)
+            void EmitStackPop(int stackCookie, params string[] args)
             {
                 Debug.Assert(args.Length is >= 1);
 
                 if (args.Length == 1)
                 {
                     writer.WriteLine($"{args[0]} = {StackPop()};");
-                    return;
                 }
-
-                const string MethodName = "StackPop";
-                string key = $"{MethodName}{args.Length}";
-
-                if (!requiredHelpers.ContainsKey(key))
+                else
                 {
-                    var lines = new string[5 + args.Length];
-                    lines[0] = $"/// <summary>Pops {args.Length} value{(args.Length == 1 ? "" : "s")} from the backtracking stack.</summary>";
-                    lines[1] = $"[MethodImpl(MethodImplOptions.AggressiveInlining)]";
-                    lines[2] = $"internal static void {MethodName}(int[] stack, ref int pos{FormatN(", out int arg{0}", args.Length)})";
-                    lines[3] = $"{{";
-                    for (int i = 0; i < args.Length; i++)
+                    const string MethodName = "StackPop";
+                    string key = $"{MethodName}{args.Length}";
+
+                    if (!requiredHelpers.ContainsKey(key))
                     {
-                        lines[4 + i] = $"    arg{i} = stack[--pos];";
+                        var lines = new string[5 + args.Length];
+                        lines[0] = $"/// <summary>Pops {args.Length} value{(args.Length == 1 ? "" : "s")} from the backtracking stack.</summary>";
+                        lines[1] = $"[MethodImpl(MethodImplOptions.AggressiveInlining)]";
+                        lines[2] = $"internal static void {MethodName}(int[] stack, ref int pos{FormatN(", out int arg{0}", args.Length)})";
+                        lines[3] = $"{{";
+                        for (int i = 0; i < args.Length; i++)
+                        {
+                            lines[4 + i] = $"    arg{i} = stack[--pos];";
+                        }
+                        lines[4 + args.Length] = $"}}";
+
+                        requiredHelpers.Add(key, lines);
                     }
-                    lines[4 + args.Length] = $"}}";
 
-                    requiredHelpers.Add(key, lines);
+                    writer.WriteLine($"{HelpersTypeName}.{MethodName}(base.runstack!, ref stackpos, out {string.Join(", out ", args)});");
+                }
+
+                if (stackCookie != 0)
+                {
+                    EmitStackCookieValidate(stackCookie);
                 }
+            }
+
+            /// <summary>Initializes a debug stack cookie for a new backtracking stack push.</summary>
+            int CreateStackCookie() =>
+#if DEBUG
+#pragma warning disable RS1035 // Random is banned from generators due to non-determinism, but this Random is seeded with a constant and it's only for debug builds
+                stackCookieGenerator.Next() + 1;
+#pragma warning restore RS1035
+#else
+                0;
+#endif
+
+            /// <summary>Emits a debug stack cookie for a new backtracking stack push.</summary>
+            void EmitStackCookie(int stackCookie)
+            {
+#if DEBUG
+                EmitStackPush(0, stackCookie.ToString());
+#endif
+            }
 
-                writer.WriteLine($"{HelpersTypeName}.{MethodName}(base.runstack!, ref stackpos, out {string.Join(", out ", args)});");
+            /// <summary>Emits validation for a debug stack cookie.</summary>
+            void EmitStackCookieValidate(int stackCookie)
+            {
+#if DEBUG
+                writer.WriteLine($"{StackCookieValidate(stackCookie)};");
+#endif
             }
 
+            /// <summary>
+            /// Returns an expression that:
+            /// In debug, pops item 1 from the backtracking stack, pops item 2 and validates it against the cookie, then evaluates to item1.
+            /// In release, pops and evaluates to an item from the backtracking stack.
+            /// </summary>
+            string ValidateStackCookieWithAdditionAndReturnPoppedStack(int stackCookie)
+            {
+#if DEBUG
+                const string MethodName = "ValidateStackCookieWithAdditionAndReturnPoppedStack";
+                if (!requiredHelpers.ContainsKey(MethodName))
+                {
+                    requiredHelpers.Add(MethodName,
+                    [
+                        $"/// <summary>Validates that a stack cookie popped off the backtracking stack holds the expected value. Debug only.</summary>",
+                        $"internal static int {MethodName}(int poppedStack, int expectedCookie, int actualCookie)",
+                        $"{{",
+                        $"    expectedCookie += poppedStack;",
+                        $"    if (expectedCookie != actualCookie)",
+                        $"    {{",
+                        $"          throw new Exception($\"Backtracking stack imbalance detected. Expected {{expectedCookie}}. Actual {{actualCookie}}.\");",
+                        $"    }}",
+                        $"    return poppedStack;",
+                        $"}}",
+                    ]);
+                }
+
+                return $"{HelpersTypeName}.{MethodName}({StackPop()}, {stackCookie}, {StackPop()})";
+#else
+                return StackPop();
+#endif
+            }
+
+#if DEBUG
+            /// <summary>Returns an expression that validates and returns a debug stack cookie.</summary>
+            string StackCookieValidate(int stackCookie)
+            {
+                const string MethodName = "ValidateStackCookie";
+                if (!requiredHelpers.ContainsKey(MethodName))
+                {
+                    requiredHelpers.Add(MethodName,
+                    [
+                        $"/// <summary>Validates that a stack cookie popped off the backtracking stack holds the expected value. Debug only.</summary>",
+                        $"internal static int {MethodName}(int expected, int actual)",
+                        $"{{",
+                        $"    if (expected != actual)",
+                        $"    {{",
+                        $"        throw new Exception($\"Backtracking stack imbalance detected. Expected {{expected}}. Actual {{actual}}.\");",
+                        $"    }}",
+                        $"    return actual;",
+                        $"}}",
+                    ]);
+                }
+
+                return $"{HelpersTypeName}.{MethodName}({stackCookie}, {StackPop()})";
+            }
+#endif
+
             /// <summary>Expression for popping the next item from the backtracking stack.</summary>
             string StackPop() => "base.runstack![--stackpos]";
 
@@ -4727,11 +4917,10 @@ private static bool TryEmitIndexOf(
             {
                 bool negated = RegexCharClass.IsNegated(node.Str) ^ negate;
 
-                Span<char> setChars = stackalloc char[5]; // current max that's vectorized
-                int setCharsCount = RegexCharClass.GetSetChars(node.Str, setChars);
-
-                // Prefer IndexOfAnyInRange over IndexOfAny for sets of 3-5 values that fit in a single range.
-                if (setCharsCount is not (1 or 2) && RegexCharClass.TryGetSingleRange(node.Str, out char lowInclusive, out char highInclusive))
+                // IndexOfAny{Except}InRange
+                // Prefer IndexOfAnyInRange over IndexOfAny, except for tiny ranges (1 or 2 items) that IndexOfAny handles more efficiently
+                if (RegexCharClass.TryGetSingleRange(node.Str, out char lowInclusive, out char highInclusive) &&
+                    (highInclusive - lowInclusive) > 1)
                 {
                     string indexOfAnyInRangeName = !negated ?
                         "IndexOfAnyInRange" :
@@ -4743,13 +4932,15 @@ private static bool TryEmitIndexOf(
                     return true;
                 }
 
-                if (setCharsCount > 0)
+                // IndexOfAny{Except}(ch1, ...)
+                Span<char> setChars = stackalloc char[128];
+                setChars = setChars.Slice(0, RegexCharClass.GetSetChars(node.Str, setChars));
+                if (!setChars.IsEmpty)
                 {
                     (string indexOfName, string indexOfAnyName) = !negated ?
                         ("IndexOf", "IndexOfAny") :
                         ("IndexOfAnyExcept", "IndexOfAnyExcept");
 
-                    setChars = setChars.Slice(0, setCharsCount);
                     indexOfExpr = setChars.Length switch
                     {
                         1 => $"{last}{indexOfName}({Literal(setChars[0])})",
@@ -4761,18 +4952,6 @@ private static bool TryEmitIndexOf(
                     literalLength = 1;
                     return true;
                 }
-
-                if (RegexCharClass.TryGetAsciiSetChars(node.Str, out char[]? asciiChars))
-                {
-                    string indexOfAnyName = !negated ?
-                        "IndexOfAny" :
-                        "IndexOfAnyExcept";
-
-                    indexOfExpr = $"{last}{indexOfAnyName}({EmitSearchValues(asciiChars, requiredHelpers)})";
-
-                    literalLength = 1;
-                    return true;
-                }
             }
 
             indexOfExpr = null;
@@ -5226,7 +5405,7 @@ private static string GetSHA256FieldName(string prefix, string toEncode)
         {
 #pragma warning disable CA1850 // SHA256.HashData isn't available on netstandard2.0
             using SHA256 sha = SHA256.Create();
-            return $"{prefix}{BitConverter.ToString(sha.ComputeHash(Encoding.UTF8.GetBytes(toEncode))).Replace("-", "")}";
+            return $"{prefix}{ToHexStringNoDashes(Encoding.UTF8.GetBytes(toEncode))}";
 #pragma warning restore CA1850
         }
 
@@ -5422,6 +5601,13 @@ private static string DescribeLoop(RegexNode node, RegexMethod rm)
             return style + bounds;
         }
 
+        private static string ToHexStringNoDashes(byte[] bytes) =>
+#if NETCOREAPP
+            Convert.ToHexString(bytes);
+#else
+            BitConverter.ToString(bytes).Replace("-", "");
+#endif
+
         private static FinishEmitBlock EmitBlock(IndentedTextWriter writer, string? clause, bool faux = false)
         {
             if (clause is not null)
diff --git a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj
index ec5b22d79229..7208f5a5185d 100644
--- a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj
+++ b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj
@@ -11,7 +11,7 @@
     <EnableDefaultEmbeddedResourceItems>false</EnableDefaultEmbeddedResourceItems>
     <UsingToolXliff>true</UsingToolXliff>
     <CLSCompliant>false</CLSCompliant>
-    <NoWarn>$(NoWarn);CS0436;CS0649</NoWarn>
+    <NoWarn>$(NoWarn);CS0436;CS0649;CA1872</NoWarn>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <AnalyzerLanguage>cs</AnalyzerLanguage>
   </PropertyGroup>
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexAssemblyCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexAssemblyCompiler.cs
index 2e5dc74f733d..662d8322e175 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexAssemblyCompiler.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexAssemblyCompiler.cs
@@ -25,7 +25,7 @@ internal sealed class RegexAssemblyCompiler : RegexCompiler
         /// <summary>Type count used to augment generated type names to create unique names.</summary>
         private static int s_typeCount;
 
-        private readonly AssemblyBuilder _assembly;
+        private readonly PersistedAssemblyBuilder _assembly;
         private readonly ModuleBuilder _module;
 
         internal RegexAssemblyCompiler(AssemblyName an, CustomAttributeBuilder[]? attribs, string? resourceFile)
@@ -36,7 +36,7 @@ internal RegexAssemblyCompiler(AssemblyName an, CustomAttributeBuilder[]? attrib
                 throw new PlatformNotSupportedException();
             }
 
-            _assembly = AssemblyBuilder.DefinePersistedAssembly(an, typeof(object).Assembly, attribs is not null ? new List<CustomAttributeBuilder>(attribs) : null) ??
+            _assembly = new PersistedAssemblyBuilder(an, typeof(object).Assembly, attribs is not null ? new List<CustomAttributeBuilder>(attribs) : null) ??
                 throw new InvalidOperationException("DefinePersistedAssembly returned null");
 
             _module = _assembly.DefineDynamicModule(an.Name + ".dll");
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCaseEquivalences.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCaseEquivalences.cs
index 4367da61026d..70587b19a6c0 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCaseEquivalences.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCaseEquivalences.cs
@@ -52,7 +52,7 @@ public static bool TryFindCaseEquivalencesForCharWithIBehavior(char c, CultureIn
                     // Default
                     _ => default
                 };
-                return equivalences != default;
+                return !equivalences.IsEmpty;
             }
             else
             {
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
index c56ad4b5b6e0..5666498347e4 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
@@ -815,17 +815,23 @@ public static bool TryGetDoubleRange(
         /// If 0 is returned, no assumptions can be made about the characters.
         /// </returns>
         /// <remarks>
-        /// Only considers character classes that only contain sets (no categories)
-        /// and no subtraction... just simple sets containing starting/ending pairs.
-        /// The returned characters may be negated: if IsNegated(set) is false, then
-        /// the returned characters are the only ones that match; if it returns true,
-        /// then the returned characters are the only ones that don't match.
+        /// Only considers character classes that only contain sets (no categories),
+        /// just simple sets containing starting/ending pairs (subtraction from those pairs
+        /// is factored in, however).The returned characters may be negated: if IsNegated(set)
+        /// is false, then the returned characters are the only ones that match; if it returns
+        /// true, then the returned characters are the only ones that don't match.
         /// </remarks>
         public static int GetSetChars(string set, Span<char> chars)
         {
             // We get the characters by enumerating the set portion, so we validate that it's
             // set up to enable that, e.g. no categories.
-            if (!CanEasilyEnumerateSetContents(set))
+            if (!CanEasilyEnumerateSetContents(set, out bool hasSubtraction))
+            {
+                return 0;
+            }
+
+            // Negation with subtraction is too cumbersome to reason about efficiently.
+            if (hasSubtraction && IsNegated(set))
             {
                 return 0;
             }
@@ -837,17 +843,30 @@ public static int GetSetChars(string set, Span<char> chars)
             // based on it a) complicating things, and b) it being really unlikely to
             // be part of a small set.
             int setLength = set[SetLengthIndex];
-            int count = 0;
+            int count = 0, evaluated = 0;
             for (int i = SetStartIndex; i < SetStartIndex + setLength; i += 2)
             {
                 int curSetEnd = set[i + 1];
                 for (int c = set[i]; c < curSetEnd; c++)
                 {
-                    if (count >= chars.Length)
+                    // Keep track of how many characters we've checked. This could work
+                    // just comparing count rather than evaluated, but we also want to
+                    // limit how much work is done here, which we can do by constraining
+                    // the number of checks to the size of the storage provided.
+                    if (++evaluated > chars.Length)
                     {
                         return 0;
                     }
 
+                    // If the set is all ranges but has a subtracted class,
+                    // validate the char is actually in the set prior to storing it:
+                    // it might be in the subtracted range.
+                    if (hasSubtraction && !CharInClass((char)c, set))
+                    {
+                        continue;
+                    }
+
+                    Debug.Assert(count <= evaluated);
                     chars[count++] = (char)c;
                 }
             }
@@ -855,22 +874,6 @@ public static int GetSetChars(string set, Span<char> chars)
             return count;
         }
 
-        public static bool TryGetAsciiSetChars(string set, [NotNullWhen(true)] out char[]? asciiChars)
-        {
-            Span<char> chars = stackalloc char[128];
-
-            chars = chars.Slice(0, GetSetChars(set, chars));
-
-            if (chars.IsEmpty || !IsAscii(chars))
-            {
-                asciiChars = null;
-                return false;
-            }
-
-            asciiChars = chars.ToArray();
-            return true;
-        }
-
         /// <summary>
         /// Determines whether two sets may overlap.
         /// </summary>
@@ -1054,6 +1057,21 @@ public static bool IsAscii(ReadOnlySpan<char> s)
 #endif
         }
 
+        /// <summary>Gets whether the set description string is for two ASCII letters that case to each other under OrdinalIgnoreCase rules.</summary>
+        public static bool SetContainsAsciiOrdinalIgnoreCaseCharacter(string set, Span<char> twoChars)
+        {
+            Debug.Assert(twoChars.Length >= 2);
+            return
+                !IsNegated(set) &&
+                GetSetChars(set, twoChars) == 2 &&
+                twoChars[0] < 128 &&
+                twoChars[1] < 128 &&
+                twoChars[0] != twoChars[1] &&
+                char.IsLetter(twoChars[0]) &&
+                char.IsLetter(twoChars[1]) &&
+                (twoChars[0] | 0x20) == (twoChars[1] | 0x20);
+        }
+
         /// <summary>Gets whether we can iterate through the set list pairs in order to completely enumerate the set's contents.</summary>
         /// <remarks>This may enumerate negated characters if the set is negated.  This will return false if the set has subtraction.</remarks>
         private static bool CanEasilyEnumerateSetContents(string set) =>
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
index 58abbb8b181e..082087939bcd 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
@@ -460,6 +460,8 @@ protected void EmitTryFindNextPossibleStartingPosition()
             {
                 case FindNextStartingPositionMode.LeadingString_LeftToRight:
                 case FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight:
+                case FindNextStartingPositionMode.LeadingStrings_LeftToRight:
+                case FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight:
                 case FindNextStartingPositionMode.FixedDistanceString_LeftToRight:
                     EmitIndexOfString_LeftToRight();
                     break;
@@ -745,15 +747,19 @@ bool EmitAnchors()
                 return false;
             }
 
-            // Emits a case-sensitive left-to-right search for a substring.
+            // Emits a case-sensitive left-to-right search for a substring or substrings.
             void EmitIndexOfString_LeftToRight()
             {
                 RegexFindOptimizations opts = _regexTree.FindOptimizations;
-                Debug.Assert(opts.FindMode is FindNextStartingPositionMode.LeadingString_LeftToRight or FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight or FindNextStartingPositionMode.FixedDistanceString_LeftToRight);
+                Debug.Assert(opts.FindMode is FindNextStartingPositionMode.LeadingString_LeftToRight or
+                                              FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight or
+                                              FindNextStartingPositionMode.FixedDistanceString_LeftToRight or
+                                              FindNextStartingPositionMode.LeadingStrings_LeftToRight or
+                                              FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight);
 
                 using RentedLocalBuilder i = RentInt32Local();
 
-                // int i = inputSpan.Slice(pos).IndexOf(prefix);
+                // int i = inputSpan.Slice(pos)...
                 Ldloca(inputSpan);
                 Ldloc(pos);
                 if (opts.FindMode is FindNextStartingPositionMode.FixedDistanceString_LeftToRight &&
@@ -763,11 +769,21 @@ void EmitIndexOfString_LeftToRight()
                     Add();
                 }
                 Call(s_spanSliceIntMethod);
-                string literalString = opts.FindMode is FindNextStartingPositionMode.LeadingString_LeftToRight or FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight ?
-                    opts.LeadingPrefix :
-                    opts.FixedDistanceLiteral.String!;
-                LoadSearchValues([literalString], opts.FindMode is FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal);
-                Call(s_spanIndexOfAnySearchValuesString);
+
+                // ...IndexOf(prefix);
+                if (opts.FindMode is FindNextStartingPositionMode.LeadingStrings_LeftToRight or FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight)
+                {
+                    LoadSearchValues(opts.LeadingPrefixes, opts.FindMode is FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal);
+                    Call(s_spanIndexOfAnySearchValuesString);
+                }
+                else
+                {
+                    string literalString = opts.FindMode is FindNextStartingPositionMode.LeadingString_LeftToRight or FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight ?
+                        opts.LeadingPrefix :
+                        opts.FixedDistanceLiteral.String!;
+                    LoadSearchValues([literalString], opts.FindMode is FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal);
+                    Call(s_spanIndexOfAnySearchValuesString);
+                }
                 Stloc(i);
 
                 // if (i < 0) goto ReturnFalse;
@@ -920,20 +936,10 @@ void EmitFixedSet_LeftToRight()
                                 Call(primarySet.Negated ? s_spanIndexOfAnyExceptCharCharChar : s_spanIndexOfAnyCharCharChar);
                                 break;
 
-                            case 4 or 5:
-                                // tmp = ...IndexOfAny("abcd");
-                                // Note that this case differs slightly from the source generator, where it might choose to use
-                                // SearchValues instead of a literal, but there's extra cost to doing so for RegexCompiler so
-                                // it just always uses IndexOfAny(span).
-                                Ldstr(new string(primarySet.Chars));
-                                Call(s_stringAsSpanMethod);
-                                Call(primarySet.Negated ? s_spanIndexOfAnyExceptSpan : s_spanIndexOfAnySpan);
-                                break;
-
                             default:
+                                // tmp = ...IndexOfAny(setChars);
                                 // tmp = ...IndexOfAny(s_searchValues);
-                                LoadSearchValues(primarySet.Chars);
-                                Call(primarySet.Negated ? s_spanIndexOfAnyExceptSearchValues : s_spanIndexOfAnySearchValues);
+                                EmitIndexOfAnyWithSearchValuesOrLiteral(primarySet.Chars, except: primarySet.Negated);
                                 break;
                         }
                     }
@@ -2233,9 +2239,18 @@ void EmitExpressionConditional(RegexNode node)
                 Stloc(startingPos);
                 int startingSliceStaticPos = sliceStaticPos;
 
-                // Emit the child. The condition expression is a zero-width assertion, which is atomic,
+                // Emit the condition. The condition expression is a zero-width assertion, which is atomic,
                 // so prevent backtracking into it.
-                EmitNode(condition);
+                if (analysis.MayBacktrack(condition))
+                {
+                    // Condition expressions are treated like positive lookarounds and thus are implicitly atomic,
+                    // so we need to emit the node as atomic if it might backtrack.
+                    EmitAtomic(node, null);
+                }
+                else
+                {
+                    EmitNode(condition);
+                }
                 doneLabel = originalDoneLabel;
 
                 // After the condition completes successfully, reset the text positions.
@@ -2625,6 +2640,15 @@ void EmitNegativeLookaroundAssertion(RegexNode node)
                 // If the generated code ends up here, it matched the lookaround, which actually
                 // means failure for a _negative_ lookaround, so we need to jump to the original done.
                 // goto originalDoneLabel;
+                if (capturePos is not null && isInLoop)
+                {
+                    // Pop the crawl position from the stack.
+                    // stackpos--;
+                    Ldloc(stackpos);
+                    Ldc(1);
+                    Sub();
+                    Stloc(stackpos);
+                }
                 BrFar(originalDoneLabel);
 
                 // Failures (success for a negative lookaround) jump here.
@@ -2803,8 +2827,8 @@ void EmitNode(RegexNode node, RegexNode? subsequent = null, bool emitLengthCheck
             // Emits the node for an atomic.
             void EmitAtomic(RegexNode node, RegexNode? subsequent)
             {
-                Debug.Assert(node.Kind is RegexNodeKind.Atomic or RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround, $"Unexpected type: {node.Kind}");
-                Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}");
+                Debug.Assert(node.Kind is RegexNodeKind.Atomic or RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround or RegexNodeKind.ExpressionConditional, $"Unexpected type: {node.Kind}");
+                Debug.Assert(node.Kind is RegexNodeKind.ExpressionConditional ? node.ChildCount() >= 1 : node.ChildCount() == 1, $"Unexpected number of children: {node.ChildCount()}");
 
                 RegexNode child = node.Child(0);
 
@@ -3563,11 +3587,10 @@ void EmitSingleCharLazy(RegexNode node, RegexNode? subsequent = null, bool emitL
                 if (!rtl &&
                     iterationCount is null &&
                     node.Kind is RegexNodeKind.Notonelazy &&
-                    subsequent?.FindStartingLiteral(4) is RegexNode.StartingLiteralData literal && // 5 == max optimized by IndexOfAny, and we need to reserve 1 for node.Ch
+                    subsequent?.FindStartingLiteral() is RegexNode.StartingLiteralData literal &&
                     !literal.Negated && // not negated; can't search for both the node.Ch and a negated subsequent char with an IndexOf* method
                     (literal.String is not null ||
                      literal.SetChars is not null ||
-                     (literal.AsciiChars is not null && node.Ch < 128) || // for ASCII sets, only allow when the target can be efficiently included in the set
                      literal.Range.LowInclusive == literal.Range.HighInclusive ||
                      (literal.Range.LowInclusive <= node.Ch && node.Ch <= literal.Range.HighInclusive))) // for ranges, only allow when the range overlaps with the target, since there's no accelerated way to search for the union
                 {
@@ -3619,9 +3642,7 @@ literal.SetChars is not null ||
 
                             case (true, _):
                                 // startingPos = slice.IndexOfAny(literal.SetChars);
-                                Ldstr(literal.SetChars);
-                                Call(s_stringAsSpanMethod);
-                                Call(s_spanIndexOfAnySpan);
+                                EmitIndexOfAnyWithSearchValuesOrLiteral(literal.SetChars);
                                 break;
 
                             case (false, 2):
@@ -3634,25 +3655,10 @@ literal.SetChars is not null ||
 
                             case (false, _):
                                 // startingPos = slice.IndexOfAny($"{node.Ch}{literal.SetChars}");
-                                Ldstr($"{node.Ch}{literal.SetChars}");
-                                Call(s_stringAsSpanMethod);
-                                Call(s_spanIndexOfAnySpan);
+                                EmitIndexOfAnyWithSearchValuesOrLiteral($"{node.Ch}{literal.SetChars}");
                                 break;
                         }
                     }
-                    else if (literal.AsciiChars is not null) // set of only ASCII characters
-                    {
-                        char[] asciiChars = literal.AsciiChars;
-                        overlap = asciiChars.AsSpan().Contains(node.Ch);
-                        if (!overlap)
-                        {
-                            Debug.Assert(node.Ch < 128);
-                            Array.Resize(ref asciiChars, asciiChars.Length + 1);
-                            asciiChars[^1] = node.Ch;
-                        }
-                        LoadSearchValues(asciiChars);
-                        Call(s_spanIndexOfAnySearchValues);
-                    }
                     else if (literal.Range.LowInclusive == literal.Range.HighInclusive) // single char from a RegexNode.One
                     {
                         overlap = literal.Range.LowInclusive == node.Ch;
@@ -5134,21 +5140,9 @@ bool CanEmitIndexOf(RegexNode node, out int literalLength)
 
                 if (node.IsSetFamily)
                 {
-                    Span<char> setChars = stackalloc char[5]; // current max that's vectorized
-                    int setCharsCount;
-                    if ((setCharsCount = RegexCharClass.GetSetChars(node.Str, setChars)) > 0)
-                    {
-                        literalLength = 1;
-                        return true;
-                    }
-
-                    if (RegexCharClass.TryGetSingleRange(node.Str, out char lowInclusive, out char highInclusive))
-                    {
-                        literalLength = 1;
-                        return true;
-                    }
-
-                    if (RegexCharClass.TryGetAsciiSetChars(node.Str, out _))
+                    Span<char> setChars = stackalloc char[128];
+                    if (RegexCharClass.TryGetSingleRange(node.Str, out _, out _) ||
+                        RegexCharClass.GetSetChars(node.Str, setChars) > 0)
                     {
                         literalLength = 1;
                         return true;
@@ -5199,26 +5193,11 @@ void EmitIndexOf(RegexNode node, bool useLast, bool negate)
                 {
                     bool negated = RegexCharClass.IsNegated(node.Str) ^ negate;
 
-                    Span<char> setChars = stackalloc char[5]; // current max that's vectorized
-                    int setCharsCount = RegexCharClass.GetSetChars(node.Str, setChars);
-
                     // IndexOfAny{Except}InRange
-                    // Prefer IndexOfAnyInRange over IndexOfAny for sets of 3-5 values that fit in a single range.
-                    if (setCharsCount is not (1 or 2) && RegexCharClass.TryGetSingleRange(node.Str, out char lowInclusive, out char highInclusive))
+                    // Prefer IndexOfAnyInRange over IndexOfAny, except for tiny ranges (1 or 2 items) that IndexOfAny handles more efficiently
+                    if (RegexCharClass.TryGetSingleRange(node.Str, out char lowInclusive, out char highInclusive) &&
+                        (highInclusive - lowInclusive) > 1)
                     {
-                        if (lowInclusive == highInclusive)
-                        {
-                            Ldc(lowInclusive);
-                            Call((useLast, negated) switch
-                            {
-                                (false, false) => s_spanIndexOfChar,
-                                (false, true) => s_spanIndexOfAnyExceptChar,
-                                (true, false) => s_spanLastIndexOfChar,
-                                (true, true) => s_spanLastIndexOfAnyExceptChar,
-                            });
-                            return;
-                        }
-
                         Ldc(lowInclusive);
                         Ldc(highInclusive);
                         Call((useLast, negated) switch
@@ -5232,6 +5211,8 @@ void EmitIndexOf(RegexNode node, bool useLast, bool negate)
                     }
 
                     // IndexOfAny{Except}(ch1, ...)
+                    Span<char> setChars = stackalloc char[128]; // arbitrary cut-off that accomodates all of ASCII and doesn't take too long to compute
+                    int setCharsCount = RegexCharClass.GetSetChars(node.Str, setChars);
                     if (setCharsCount > 0)
                     {
                         setChars = setChars.Slice(0, setCharsCount);
@@ -5274,32 +5255,10 @@ void EmitIndexOf(RegexNode node, bool useLast, bool negate)
                                 return;
 
                             default:
-                                Ldstr(setChars.ToString());
-                                Call(s_stringAsSpanMethod);
-                                Call((useLast, negated) switch
-                                {
-                                    (false, false) => s_spanIndexOfAnySpan,
-                                    (false, true) => s_spanIndexOfAnyExceptSpan,
-                                    (true, false) => s_spanLastIndexOfAnySpan,
-                                    (true, true) => s_spanLastIndexOfAnyExceptSpan,
-                                });
+                                EmitIndexOfAnyWithSearchValuesOrLiteral(setChars, last: useLast, except: negated);
                                 return;
                         }
                     }
-
-                    // IndexOfAny{Except}(SearchValues<char>)
-                    if (RegexCharClass.TryGetAsciiSetChars(node.Str, out char[]? asciiChars))
-                    {
-                        LoadSearchValues(asciiChars);
-                        Call((useLast, negated) switch
-                        {
-                            (false, false) => s_spanIndexOfAnySearchValues,
-                            (false, true) => s_spanIndexOfAnyExceptSearchValues,
-                            (true, false) => s_spanLastIndexOfAnySearchValues,
-                            (true, true) => s_spanLastIndexOfAnyExceptSearchValues,
-                        });
-                        return;
-                    }
                 }
 
                 Debug.Fail("We should never get here. This method should only be called if CanEmitIndexOf returned true, and all of the same cases should be covered.");
@@ -6192,6 +6151,38 @@ private void EmitTimeoutCheckIfNeeded()
             }
         }
 
+        /// <summary>Emits a call to either IndexOfAny("abcd") or IndexOfAny(SearchValues) depending on the <paramref name="chars"/>.</summary>
+        private void EmitIndexOfAnyWithSearchValuesOrLiteral(ReadOnlySpan<char> chars, bool last = false, bool except = false)
+        {
+            Debug.Assert(chars.Length > 3, $"chars.Length == {chars.Length}");
+
+            // SearchValues<char> is faster than a regular IndexOfAny("abcd") for sets of 4/5 values iff they are ASCII.
+            // Only emit SearchValues instances when we know they'll be faster to avoid increasing the startup cost too much.
+            if (chars.Length is 4 or 5 && !RegexCharClass.IsAscii(chars))
+            {
+                Ldstr(chars.ToString());
+                Call(s_stringAsSpanMethod);
+                Call((last, except) switch
+                {
+                    (false, false) => s_spanIndexOfAnySpan,
+                    (false, true) => s_spanIndexOfAnyExceptSpan,
+                    (true, false) => s_spanLastIndexOfAnySpan,
+                    (true, true) => s_spanLastIndexOfAnyExceptSpan,
+                });
+            }
+            else
+            {
+                LoadSearchValues(chars.ToArray());
+                Call((last, except) switch
+                {
+                    (false, false) => s_spanIndexOfAnySearchValues,
+                    (false, true) => s_spanIndexOfAnyExceptSearchValues,
+                    (true, false) => s_spanLastIndexOfAnySearchValues,
+                    (true, true) => s_spanLastIndexOfAnyExceptSearchValues,
+                });
+            }
+        }
+
         /// <summary>
         /// Adds an entry in <see cref="CompiledRegexRunner._searchValues"/> for the given <paramref name="values"/> and emits a load of that initialized value.
         /// </summary>
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs
index f40f48e35a6d..fa90486e7407 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs
@@ -94,7 +94,7 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
                 if (RegexPrefixAnalyzer.FindFirstCharClass(root) is string charClass)
                 {
                     // See if the set is limited to holding only a few characters.
-                    Span<char> scratch = stackalloc char[5]; // max efficiently optimized by IndexOfAny today
+                    Span<char> scratch = stackalloc char[5]; // max efficiently optimized by IndexOfAny today without SearchValues, which isn't used for RTL
                     int scratchCount;
                     char[]? chars = null;
                     if (!RegexCharClass.IsNegated(charClass) &&
@@ -137,7 +137,28 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
                 return;
             }
 
-            // We're now left-to-right only and looking for sets.
+            // We're now left-to-right only and looking for multiple prefixes and/or sets.
+
+            // If there are multiple leading strings, we can search for any of them.
+            if (compiled)
+            {
+                if (RegexPrefixAnalyzer.FindPrefixes(root, ignoreCase: true) is { Length: > 1 } caseInsensitivePrefixes)
+                {
+                    LeadingPrefixes = caseInsensitivePrefixes;
+                    FindMode = FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight;
+                    return;
+                }
+
+                // TODO: While some benchmarks benefit from this significantly, others regressed a bit (in particular those with few
+                //       matches). Before enabling this, we need to investigate the performance impact on real-world scenarios,
+                //       and see if there are ways to reduce the impact.
+                //if (RegexPrefixAnalyzer.FindPrefixes(root, ignoreCase: false) is { Length: > 1 } caseSensitivePrefixes)
+                //{
+                //    LeadingPrefixes = caseSensitivePrefixes;
+                //    FindMode = FindNextStartingPositionMode.LeadingStrings_LeftToRight;
+                //    return;
+                //}
+            }
 
             // Build up a list of all of the sets that are a fixed distance from the start of the expression.
             List<FixedDistanceSet>? fixedDistanceSets = RegexPrefixAnalyzer.FindFixedDistanceSets(root, thorough: !interpreter);
@@ -244,6 +265,9 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
         /// <summary>Gets the leading prefix.  May be an empty string.</summary>
         public string LeadingPrefix { get; } = string.Empty;
 
+        /// <summary>Gets the leading prefixes.  May be an empty array.</summary>
+        public string[] LeadingPrefixes { get; } = Array.Empty<string>();
+
         /// <summary>When in fixed distance literal mode, gets the literal and how far it is from the start of the pattern.</summary>
         public (char Char, string? String, int Distance) FixedDistanceLiteral { get; }
 
@@ -254,7 +278,6 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
         /// <summary>Data about a character class at a fixed offset from the start of any match to a pattern.</summary>
         public struct FixedDistanceSet(char[]? chars, string set, int distance)
         {
-
             /// <summary>The character class description.</summary>
             public string Set = set;
             /// <summary>Whether the <see cref="Set"/> is negated.</summary>
@@ -582,9 +605,9 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan<char> textSpan,
                 case FindNextStartingPositionMode.LeadingSet_LeftToRight:
                     {
                         FixedDistanceSet primarySet = FixedDistanceSets![0];
-                        char[]? chars = primarySet.Chars;
 
                         ReadOnlySpan<char> span = textSpan.Slice(pos);
+                        char[]? chars = primarySet.Chars;
                         if (chars is { Length: <= 5 }) // 5 == currently the max length efficiently handled by IndexOfAny{Except} without SearchValues
                         {
                             int i = primarySet.Negated ? span.IndexOfAnyExcept(chars) : span.IndexOfAny(chars);
@@ -594,6 +617,16 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan<char> textSpan,
                                 return true;
                             }
                         }
+                        else if (primarySet.Range is not null)
+                        {
+                            (char low, char high) = primarySet.Range.GetValueOrDefault();
+                            int i = primarySet.Negated ? span.IndexOfAnyExceptInRange(low, high) : span.IndexOfAnyInRange(low, high);
+                            if (i >= 0)
+                            {
+                                pos += i;
+                                return true;
+                            }
+                        }
                         else
                         {
                             ref uint[]? startingAsciiLookup = ref _asciiLookups![0];
@@ -767,10 +800,16 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan<char> textSpan,
                         return false;
                     }
 
+                // Not supported in the interpreter, but we could end up here for patterns so complex the compiler gave up on them.
+
+                case FindNextStartingPositionMode.LeadingStrings_LeftToRight:
+                case FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight:
+                    return true;
+
                 // Nothing special to look for.  Just return true indicating this is a valid position to try to match.
 
                 default:
-                    Debug.Assert(FindMode == FindNextStartingPositionMode.NoSearch);
+                    Debug.Assert(FindMode == FindNextStartingPositionMode.NoSearch, $"Unexpected FindMode {FindMode}");
                     return true;
             }
         }
@@ -810,6 +849,11 @@ internal enum FindNextStartingPositionMode
         /// <summary>A multi-character ordinal case-insensitive substring at the beginning of the pattern.</summary>
         LeadingString_OrdinalIgnoreCase_LeftToRight,
 
+        /// <summary>Multiple leading prefix strings</summary>
+        LeadingStrings_LeftToRight,
+        /// <summary>Multiple leading ordinal case-insensitive prefix strings</summary>
+        LeadingStrings_OrdinalIgnoreCase_LeftToRight,
+
         /// <summary>A set starting the pattern.</summary>
         LeadingSet_LeftToRight,
         /// <summary>A set starting the right-to-left pattern.</summary>
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
index 5445f696423e..4d9b7a0efdab 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
@@ -83,6 +83,24 @@ public RegexNode(RegexNodeKind kind, RegexOptions options, int m, int n)
             N = n;
         }
 
+        /// <summary>Creates a new node from an existing one/notone/setone {lazy/atomic} loop with one less iteration.</summary>
+        public RegexNode CloneCharLoopWithOneLessIteration()
+        {
+            Debug.Assert(Kind is RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or
+                                 RegexNodeKind.Notonelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or
+                                 RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic);
+            Debug.Assert(M > 0);
+
+            RegexNode newNode = IsSetFamily ?
+                new RegexNode(Kind, Options, Str!) :
+                new RegexNode(Kind, Options, Ch);
+
+            newNode.M = M - 1;
+            newNode.N = N == int.MaxValue ? int.MaxValue : N - 1;
+
+            return newNode;
+        }
+
         /// <summary>Creates a RegexNode representing a single character.</summary>
         /// <param name="ch">The character.</param>
         /// <param name="options">The node's options.</param>
@@ -1361,27 +1379,16 @@ static void ProcessOneOrMulti(RegexNode node, ReadOnlySpan<char> startingSpan)
             return branch.Kind is RegexNodeKind.One or RegexNodeKind.Multi ? branch : null;
         }
 
-        /// <summary>Same as <see cref="FindBranchOneOrMultiStart"/> but also for Sets.</summary>
-        public RegexNode? FindBranchOneMultiOrSetStart()
-        {
-            RegexNode branch = Kind == RegexNodeKind.Concatenate ? Child(0) : this;
-            return branch.Kind is RegexNodeKind.One or RegexNodeKind.Multi or RegexNodeKind.Set ? branch : null;
-        }
-
         /// <summary>Gets the character that begins a One or Multi.</summary>
         public char FirstCharOfOneOrMulti()
         {
-            Debug.Assert(Kind is RegexNodeKind.One or RegexNodeKind.Multi);
+            Debug.Assert(Kind is RegexNodeKind.One or RegexNodeKind.Multi || (IsOneFamily && M > 0));
             Debug.Assert((Options & RegexOptions.RightToLeft) == 0);
-            return Kind == RegexNodeKind.One ? Ch : Str![0];
+            return IsOneFamily ? Ch : Str![0];
         }
 
         /// <summary>Finds the guaranteed beginning literal(s) of the node, or null if none exists.</summary>
-        /// <returns>
-        /// A tuple of data about the literal: only one of the Char/String/SetChars fields is relevant.
-        /// The Negated value indicates whether the Char/SetChars should be considered exclusionary.
-        /// </returns>
-        public RegexNode? FindStartingLiteralNode()
+        public RegexNode? FindStartingLiteralNode(bool allowZeroWidth = true)
         {
             RegexNode? node = this;
             while (true)
@@ -1404,7 +1411,7 @@ public char FirstCharOfOneOrMulti()
                         case RegexNodeKind.Capture:
                         case RegexNodeKind.Group:
                         case RegexNodeKind.Loop or RegexNodeKind.Lazyloop when node.M > 0:
-                        case RegexNodeKind.PositiveLookaround:
+                        case RegexNodeKind.PositiveLookaround when allowZeroWidth:
                             node = node.Child(0);
                             continue;
                     }
@@ -1419,10 +1426,8 @@ public char FirstCharOfOneOrMulti()
         /// A tuple of data about the literal: only one of the Char/String/SetChars fields is relevant.
         /// The Negated value indicates whether the Char/SetChars should be considered exclusionary.
         /// </returns>
-        public StartingLiteralData? FindStartingLiteral(int maxSetCharacters = 5) // 5 is max efficiently optimized by IndexOfAny today
+        public StartingLiteralData? FindStartingLiteral()
         {
-            Debug.Assert(maxSetCharacters is >= 0 and <= 128, $"{nameof(maxSetCharacters)} == {maxSetCharacters} should be small enough to be stack allocated.");
-
             if (FindStartingLiteralNode() is RegexNode node)
             {
                 switch (node.Kind)
@@ -1434,23 +1439,18 @@ public char FirstCharOfOneOrMulti()
                         return new StartingLiteralData(range: (node.Ch, node.Ch), negated: true);
 
                     case RegexNodeKind.Set or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy:
-                        Span<char> setChars = stackalloc char[maxSetCharacters];
-                        int numChars;
-                        if ((numChars = RegexCharClass.GetSetChars(node.Str!, setChars)) != 0)
-                        {
-                            setChars = setChars.Slice(0, numChars);
-                            return new StartingLiteralData(setChars: setChars.ToString(), negated: RegexCharClass.IsNegated(node.Str!));
-                        }
-
-                        if (RegexCharClass.TryGetSingleRange(node.Str!, out char lowInclusive, out char highInclusive))
+                        if (RegexCharClass.TryGetSingleRange(node.Str!, out char lowInclusive, out char highInclusive) &&
+                            (highInclusive - lowInclusive) > 1) // prefer IndexOfAny for 1 or 2 elements as an optimization
                         {
                             Debug.Assert(lowInclusive < highInclusive);
                             return new StartingLiteralData(range: (lowInclusive, highInclusive), negated: RegexCharClass.IsNegated(node.Str!));
                         }
 
-                        if (RegexCharClass.TryGetAsciiSetChars(node.Str!, out char[]? asciiChars))
+                        Span<char> setChars = stackalloc char[128];
+                        int numChars;
+                        if ((numChars = RegexCharClass.GetSetChars(node.Str!, setChars)) != 0)
                         {
-                            return new StartingLiteralData(asciiChars: asciiChars, negated: RegexCharClass.IsNegated(node.Str!));
+                            return new StartingLiteralData(setChars: setChars.Slice(0, numChars).ToString(), negated: RegexCharClass.IsNegated(node.Str!));
                         }
                         break;
 
@@ -1468,7 +1468,6 @@ public readonly struct StartingLiteralData
             public readonly (char LowInclusive, char HighInclusive) Range;
             public readonly string? String;
             public readonly string? SetChars;
-            public readonly char[]? AsciiChars;
             public readonly bool Negated;
 
             public StartingLiteralData((char LowInclusive, char HighInclusive) range, bool negated)
@@ -1489,13 +1488,6 @@ public StartingLiteralData(string? setChars, bool negated)
                 SetChars = setChars;
                 Negated = negated;
             }
-
-            public StartingLiteralData(char[]? asciiChars, bool negated)
-            {
-                Debug.Assert(asciiChars is not null);
-                AsciiChars = asciiChars;
-                Negated = negated;
-            }
         }
 
         /// <summary>
@@ -2561,14 +2553,7 @@ public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChil
                 {
                     // In particular we want to look for sets that contain only the upper and lowercase variant
                     // of the same ASCII letter.
-                    if (RegexCharClass.IsNegated(child.Str!) ||
-                        RegexCharClass.GetSetChars(child.Str!, twoChars) != 2 ||
-                        twoChars[0] >= 128 ||
-                        twoChars[1] >= 128 ||
-                        twoChars[0] == twoChars[1] ||
-                        !char.IsLetter(twoChars[0]) ||
-                        !char.IsLetter(twoChars[1]) ||
-                        ((twoChars[0] | 0x20) != (twoChars[1] | 0x20)))
+                    if (!RegexCharClass.SetContainsAsciiOrdinalIgnoreCaseCharacter(child.Str!, twoChars))
                     {
                         break;
                     }
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs
index 6485f5e04659..fc6fa74e114b 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs
@@ -1592,7 +1592,7 @@ private char ScanCharEscape()
                 case 'b':
                     return '\b';
                 case 'e':
-                    return '\u001B';
+                    return '\e';
                 case 'f':
                     return '\f';
                 case 'n':
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs
index 1658e5bcdf2a..926a28339162 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs
@@ -11,6 +11,316 @@ namespace System.Text.RegularExpressions
     /// <summary>Detects various forms of prefixes in the regular expression that can help FindFirstChars optimize its search.</summary>
     internal static class RegexPrefixAnalyzer
     {
+        /// <summary>Finds an array of multiple prefixes that a node can begin with.</summary>
+        /// <param name="node">The node to search.</param>
+        /// <param name="ignoreCase">true to find ordinal ignore-case prefixes; false for case-sensitive.</param>
+        /// <returns>
+        /// If a fixed set of prefixes is found, such that a match for this node is guaranteed to begin
+        /// with one of those prefixes, an array of those prefixes is returned.  Otherwise, null.
+        /// </returns>
+        public static string[]? FindPrefixes(RegexNode node, bool ignoreCase)
+        {
+            // Minimum string length for prefixes to be useful. If any prefix has length 1,
+            // then we're generally better off just using IndexOfAny with chars.
+            const int MinPrefixLength = 2;
+
+            // Arbitrary string length limit (with some wiggle room) to avoid creating strings that are longer than is useful and consuming too much memory.
+            const int MaxPrefixLength = 8;
+
+            // Arbitrary limit on the number of prefixes to find. If we find more than this, we're likely to be spending too much time finding prefixes that won't be useful.
+            const int MaxPrefixes = 16;
+
+            // Analyze the node to find prefixes.
+            List<StringBuilder> results = [new StringBuilder()];
+            FindPrefixesCore(node, results, ignoreCase);
+
+            // If we found too many prefixes or if any found is too short, fail.
+            if (results.Count > MaxPrefixes || !results.TrueForAll(sb => sb.Length >= MinPrefixLength))
+            {
+                return null;
+            }
+
+            // Return the prefixes.
+            string[] resultStrings = new string[results.Count];
+            for (int i = 0; i < results.Count; i++)
+            {
+                resultStrings[i] = results[i].ToString();
+            }
+            return resultStrings;
+
+            // <summary>
+            // Updates the results list with found prefixes. All existing strings in the list are treated as existing
+            // discovered prefixes prior to the node being processed. The method returns true if subsequent nodes after
+            // this one should be examined, or returns false if they shouldn't be because the node wasn't guaranteed
+            // to be fully processed.
+            // </summary>
+            static bool FindPrefixesCore(RegexNode node, List<StringBuilder> results, bool ignoreCase)
+            {
+                // If we're too deep to analyze further, we can't trust what we've already computed, so stop iterating.
+                // Also bail if any of our results is already hitting the threshold, or if this node is RTL, which is
+                // not worth the complexity of handling.
+                if (!StackHelper.TryEnsureSufficientExecutionStack() ||
+                    !results.TrueForAll(sb => sb.Length < MaxPrefixLength) ||
+                    (node.Options & RegexOptions.RightToLeft) != 0)
+                {
+                    return false;
+                }
+
+                // These limits are approximations. We'll stop trying to make strings longer once we exceed the max length,
+                // and if we exceed the max number of prefixes by a non-trivial amount, we'll fail the operation.
+                Span<char> setChars = stackalloc char[MaxPrefixes]; // limit how many chars we get from a set based on the max prefixes we care about
+
+                // Loop down the left side of the tree, looking for a starting node we can handle. We only loop through
+                // atomic and capture nodes, as the child is guaranteed to execute once, as well as loops with a positive
+                // minimum and thus at least one guaranteed iteration.
+                while (true)
+                {
+                    switch (node.Kind)
+                    {
+                        // These nodes are all guaranteed to execute at least once, so we can just
+                        // skip through them to their child.
+                        case RegexNodeKind.Atomic:
+                        case RegexNodeKind.Capture:
+                            node = node.Child(0);
+                            continue;
+
+                        // Zero-width anchors and assertions don't impact a prefix and may be skipped over.
+                        case RegexNodeKind.Bol:
+                        case RegexNodeKind.Eol:
+                        case RegexNodeKind.Boundary:
+                        case RegexNodeKind.ECMABoundary:
+                        case RegexNodeKind.NonBoundary:
+                        case RegexNodeKind.NonECMABoundary:
+                        case RegexNodeKind.Beginning:
+                        case RegexNodeKind.Start:
+                        case RegexNodeKind.EndZ:
+                        case RegexNodeKind.End:
+                        case RegexNodeKind.Empty:
+                        case RegexNodeKind.UpdateBumpalong:
+                        case RegexNodeKind.PositiveLookaround:
+                        case RegexNodeKind.NegativeLookaround:
+                            return true;
+
+                        // If we hit a single character, we can just return that character.
+                        // This is only relevant for case-sensitive searches, as for case-insensitive we'd have sets for anything
+                        // that produces a different result when case-folded, or for strings composed entirely of characters that
+                        // don't participate in case conversion. Single character loops are handled the same as single characters
+                        // up to the min iteration limit. We can continue processing after them as well if they're repeaters such
+                        // that their min and max are the same.
+                        case RegexNodeKind.One or RegexNodeKind.Oneloop or RegexNodeKind.Onelazy or RegexNodeKind.Oneloopatomic when !ignoreCase || !RegexCharClass.ParticipatesInCaseConversion(node.Ch):
+                            {
+                                int reps = node.Kind is RegexNodeKind.One ? 1 : Math.Min(node.M, MaxPrefixLength);
+                                foreach (StringBuilder sb in results)
+                                {
+                                    sb.Append(node.Ch, reps);
+                                }
+                                return node.Kind is RegexNodeKind.One || reps == node.N;
+                            }
+
+                        // If we hit a string, we can just return that string.
+                        // As with One above, this is only relevant for case-sensitive searches.
+                        case RegexNodeKind.Multi:
+                            if (!ignoreCase)
+                            {
+                                foreach (StringBuilder sb in results)
+                                {
+                                    sb.Append(node.Str);
+                                }
+                            }
+                            else
+                            {
+                                // If we're ignoring case, then only append up through characters that don't participate in case conversion.
+                                // If there are any beyond that, we can't go further and need to stop with what we have.
+                                foreach (char c in node.Str!)
+                                {
+                                    if (RegexCharClass.ParticipatesInCaseConversion(c))
+                                    {
+                                        return false;
+                                    }
+
+                                    foreach (StringBuilder sb in results)
+                                    {
+                                        sb.Append(c);
+                                    }
+                                }
+                            }
+                            return true;
+
+                        // For case-sensitive,  try to extract the characters that comprise it, and if there are
+                        // any and there aren't more than the max number of prefixes, we can return
+                        // them each as a prefix. Effectively, this is an alternation of the characters
+                        // that comprise the set. For case-insensitive, we need the set to be two ASCII letters that case fold to the same thing.
+                        // As with One and loops, set loops are handled the same as sets up to the min iteration limit.
+                        case RegexNodeKind.Set or RegexNodeKind.Setloop or RegexNodeKind.Setlazy or RegexNodeKind.Setloopatomic when !RegexCharClass.IsNegated(node.Str!): // negated sets are too complex to analyze
+                            {
+                                int charCount = RegexCharClass.GetSetChars(node.Str!, setChars);
+                                if (charCount == 0)
+                                {
+                                    return false;
+                                }
+
+                                int reps = node.Kind is RegexNodeKind.Set ? 1 : Math.Min(node.M, MaxPrefixLength);
+                                if (!ignoreCase)
+                                {
+                                    int existingCount = results.Count;
+
+                                    // Duplicate all of the existing strings for all of the new suffixes, other than the first.
+                                    foreach (char suffix in setChars.Slice(1, charCount - 1))
+                                    {
+                                        for (int existing = 0; existing < existingCount; existing++)
+                                        {
+                                            StringBuilder newSb = new StringBuilder().Append(results[existing]);
+                                            newSb.Append(suffix, reps);
+                                            results.Add(newSb);
+                                        }
+                                    }
+
+                                    // Then append the first suffix to all of the existing strings.
+                                    for (int existing = 0; existing < existingCount; existing++)
+                                    {
+                                        results[existing].Append(setChars[0], reps);
+                                    }
+                                }
+                                else
+                                {
+                                    // For ignore-case, we currently only handle the simple (but common) case of a single
+                                    // ASCII character that case folds to the same char.
+                                    if (!RegexCharClass.SetContainsAsciiOrdinalIgnoreCaseCharacter(node.Str!, setChars))
+                                    {
+                                        return false;
+                                    }
+
+                                    // Append it to each.
+                                    foreach (StringBuilder sb in results)
+                                    {
+                                        sb.Append(setChars[1], reps);
+                                    }
+                                }
+
+                                return node.Kind is RegexNodeKind.Set || reps == node.N;
+                            }
+
+                        case RegexNodeKind.Concatenate:
+                            {
+                                int childCount = node.ChildCount();
+                                for (int i = 0; i < childCount; i++)
+                                {
+                                    if (!FindPrefixesCore(node.Child(i), results, ignoreCase))
+                                    {
+                                        return false;
+                                    }
+                                }
+                            }
+                            return true;
+
+                        // We can append any guaranteed iterations as if they were a concatenation.
+                        case RegexNodeKind.Loop or RegexNodeKind.Lazyloop when node.M > 0:
+                            {
+                                int limit = Math.Min(node.M, MaxPrefixLength); // MaxPrefixLength here is somewhat arbitrary, as a single loop iteration could yield multiple chars
+                                for (int i = 0; i < limit; i++)
+                                {
+                                    if (!FindPrefixesCore(node.Child(0), results, ignoreCase))
+                                    {
+                                        return false;
+                                    }
+                                }
+                                return limit == node.N;
+                            }
+
+                        // For alternations, we need to find a prefix for every branch; if we can't compute a
+                        // prefix for any one branch, we can't trust the results and need to give up, since we don't
+                        // know if our set of prefixes is complete.
+                        case RegexNodeKind.Alternate:
+                            {
+                                // If there are more children than our maximum, just give up immediately, as we
+                                // won't be able to get a prefix for every branch and have it be within our max.
+                                int childCount = node.ChildCount();
+                                Debug.Assert(childCount >= 2); // otherwise it would have been optimized out
+                                if (childCount > MaxPrefixes)
+                                {
+                                    return false;
+                                }
+
+                                // Build up the list of all prefixes across all branches.
+                                List<StringBuilder>? allBranchResults = null;
+                                List<StringBuilder>? alternateBranchResults = [new StringBuilder()];
+                                for (int i = 0; i < childCount; i++)
+                                {
+                                    _ = FindPrefixesCore(node.Child(i), alternateBranchResults, ignoreCase);
+
+                                    Debug.Assert(alternateBranchResults.Count > 0);
+                                    foreach (StringBuilder sb in alternateBranchResults)
+                                    {
+                                        // If a branch yields an empty prefix, then none of the other branches
+                                        // matter, e.g. if the pattern is abc(def|ghi|), then this would result
+                                        // in prefixes abcdef, abcghi, and abc, and since abc is a prefix of both
+                                        // abcdef and abcghi, the former two would never be used.
+                                        if (sb.Length == 0)
+                                        {
+                                            return false;
+                                        }
+                                    }
+
+                                    if (allBranchResults is null)
+                                    {
+                                        allBranchResults = alternateBranchResults;
+                                        alternateBranchResults = [new StringBuilder()];
+                                    }
+                                    else
+                                    {
+                                        allBranchResults.AddRange(alternateBranchResults);
+                                        alternateBranchResults.Clear();
+                                        alternateBranchResults.Add(new StringBuilder());
+                                    }
+                                }
+
+                                // At this point, we know we can successfully incorporate the alternation's results
+                                // into the main results.
+
+                                // If the results are currently empty (meaning a single empty StringBuilder), we can remove
+                                // that builder and just replace the results with the alternation's results. We would otherwise
+                                // be creating a dot product of every builder in the results with every branch's result, which
+                                // is logically the same thing.
+                                if (results.Count == 1 && results[0].Length == 0)
+                                {
+                                    results.Clear();
+                                    results.AddRange(allBranchResults!);
+                                }
+                                else
+                                {
+                                    // Duplicate all of the existing strings for all of the new suffixes, other than the first.
+                                    int existingCount = results.Count;
+                                    for (int i = 1; i < allBranchResults!.Count; i++)
+                                    {
+                                        StringBuilder suffix = allBranchResults[i];
+                                        for (int existing = 0; existing < existingCount; existing++)
+                                        {
+                                            StringBuilder newSb = new StringBuilder().Append(results[existing]);
+                                            newSb.Append(suffix);
+                                            results.Add(newSb);
+                                        }
+                                    }
+
+                                    // Then append the first suffix to all of the existing strings.
+                                    for (int existing = 0; existing < existingCount; existing++)
+                                    {
+                                        results[existing].Append(allBranchResults[0]);
+                                    }
+                                }
+                            }
+
+                            // We don't know that we fully processed every branch, so we can't iterate through what comes after this node.
+                            // The results were successfully updated, but return false to indicate that nothing after this node should be examined.
+                            return false;
+
+                        // Something else we don't recognize, so stop iterating.
+                        default:
+                            return false;
+                    }
+                }
+            }
+        }
+
         /// <summary>Computes the leading substring in <paramref name="node"/>; may be empty.</summary>
         public static string FindPrefix(RegexNode node)
         {
@@ -225,25 +535,24 @@ static bool Process(RegexNode node, ref ValueStringBuilder vsb)
 
             // For every entry, try to get the chars that make up the set, if there are few enough.
             // For any for which we couldn't get the small chars list, see if we can get other useful info.
-            Span<char> scratch = stackalloc char[128]; // limit based on what's currently efficiently handled by SearchValues
+            Span<char> scratch = stackalloc char[128];
             for (int i = 0; i < results.Count; i++)
             {
                 RegexFindOptimizations.FixedDistanceSet result = results[i];
                 result.Negated = RegexCharClass.IsNegated(result.Set);
 
-                int count = RegexCharClass.GetSetChars(result.Set, scratch);
-                if (count > 0)
+                if (RegexCharClass.TryGetSingleRange(result.Set, out char lowInclusive, out char highInclusive) &&
+                    (highInclusive - lowInclusive) > 1) // prefer IndexOfAny for tiny sets of 1 or 2 elements
                 {
-                    result.Chars = scratch.Slice(0, count).ToArray();
+                    result.Range = (lowInclusive, highInclusive);
                 }
-
-                // Prefer IndexOfAnyInRange over IndexOfAny for sets of 3-5 values that fit in a single range.
-                if (thorough &&
-                    (result.Chars is null || result.Chars.Length > 2) &&
-                    RegexCharClass.TryGetSingleRange(result.Set, out char lowInclusive, out char highInclusive))
+                else
                 {
-                    result.Chars = null;
-                    result.Range = (lowInclusive, highInclusive);
+                    int count = RegexCharClass.GetSetChars(result.Set, scratch);
+                    if (count > 0)
+                    {
+                        result.Chars = scratch.Slice(0, count).ToArray();
+                    }
                 }
 
                 results[i] = result;
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs
index 3b6259307fb9..49205f5ee264 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs
@@ -4,6 +4,7 @@
 using System.Collections;
 using System.Collections.Generic;
 using System.Diagnostics;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 
 #pragma warning disable CS8500 // takes address of managed type
@@ -39,7 +40,7 @@ public RegexReplacement(string rep, RegexNode concat, Hashtable _caps)
 
             var vsb = new ValueStringBuilder(stackalloc char[256]);
             FourStackStrings stackStrings = default;
-            var strings = new ValueListBuilder<string>(MemoryMarshal.CreateSpan(ref stackStrings.Item1!, 4));
+            var strings = new ValueListBuilder<string>(stackStrings);
             var rules = new ValueListBuilder<int>(stackalloc int[64]);
 
             int childCount = concat.ChildCount();
@@ -96,13 +97,10 @@ public RegexReplacement(string rep, RegexNode concat, Hashtable _caps)
         }
 
         /// <summary>Simple struct of four strings.</summary>
-        [StructLayout(LayoutKind.Sequential)]
+        [InlineArray(4)]
         private struct FourStackStrings // used to do the equivalent of: Span<string> strings = stackalloc string[4];
         {
-            public string Item1;
-            public string Item2;
-            public string Item3;
-            public string Item4;
+            private string _item1;
         }
 
         /// <summary>
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs
index c0b743d5f1b9..5284c09339bd 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs
@@ -28,8 +28,8 @@ internal ref struct RegexWriter
 #if DEBUG
         static RegexWriter()
         {
-            Debug.Assert(!Enum.IsDefined(typeof(RegexNodeKind), BeforeChild));
-            Debug.Assert(!Enum.IsDefined(typeof(RegexNodeKind), AfterChild));
+            Debug.Assert(!Enum.IsDefined(BeforeChild));
+            Debug.Assert(!Enum.IsDefined(AfterChild));
         }
 #endif
 
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/AttRegexTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/AttRegexTests.cs
index 829432b46630..8bc709229230 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/AttRegexTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/AttRegexTests.cs
@@ -45,7 +45,7 @@ public static IEnumerable<object[]> Test_MemberData()
             foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
             {
                 (RegexOptions Options, string Pattern, string Input, string Expected)[] cases = Match_MemberData_Cases(engine).ToArray();
-                Regex[] regexes = RegexHelpers.GetRegexesAsync(engine, cases.Select(c => (c.Pattern, (CultureInfo?)null, (RegexOptions?)c.Options, (TimeSpan?)null)).ToArray()).Result;
+                Regex[] regexes = RegexHelpers.GetRegexes(engine, cases.Select(c => (c.Pattern, (CultureInfo?)null, (RegexOptions?)c.Options, (TimeSpan?)null)).ToArray());
                 for (int i = 0; i < regexes.Length; i++)
                 {
                     yield return new object[] { regexes[i], cases[i].Input, cases[i].Expected };
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/MonoRegexTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/MonoRegexTests.cs
index 8aef06bae0cb..8865d17e05c9 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/MonoRegexTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/MonoRegexTests.cs
@@ -55,7 +55,7 @@ public static IEnumerable<object[]> ValidateRegex_MemberData()
             {
                 (string Pattern, RegexOptions Options, string Input, string Expected)[] allEngineCases = Cases(engine).ToArray();
 
-                Regex[] results = RegexHelpers.GetRegexesAsync(engine, allEngineCases.Select(c => (c.Pattern, (CultureInfo?)null, (RegexOptions?)c.Options, (TimeSpan?)null)).ToArray()).Result;
+                Regex[] results = RegexHelpers.GetRegexes(engine, allEngineCases.Select(c => (c.Pattern, (CultureInfo?)null, (RegexOptions?)c.Options, (TimeSpan?)null)).ToArray());
                 for (int i = 0; i < results.Length; i++)
                 {
                     string expected = allEngineCases[i].Expected;
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
index c3ff5b595caa..d3db90dfdfca 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
@@ -16,7 +16,7 @@ public static IEnumerable<object[]> Groups_MemberData()
             foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
             {
                 (CultureInfo Culture, string Pattern, string Input, RegexOptions Options, string[] Expected)[] cases = Groups_MemberData_Cases(engine).ToArray();
-                Regex[] regexes = RegexHelpers.GetRegexesAsync(engine, cases.Select(c => (c.Pattern, c.Culture, (RegexOptions?)c.Options, (TimeSpan?)null)).ToArray()).Result;
+                Regex[] regexes = RegexHelpers.GetRegexes(engine, cases.Select(c => (c.Pattern, c.Culture, (RegexOptions?)c.Options, (TimeSpan?)null)).ToArray());
                 for (int i = 0; i < regexes.Length; i++)
                 {
                     yield return new object[] { regexes[i], cases[i].Culture, cases[i].Input, cases[i].Expected };
@@ -289,7 +289,7 @@ public static IEnumerable<object[]> Groups_MemberData()
             yield return (enUS, @"(cat)([\u0041]*)(dog)", "catAAAdog", RegexOptions.None, new string[] { "catAAAdog", "cat", "AAA", "dog" });
             yield return (enUS, @"(cat)([\a]*)(dog)", "cat\a\a\adog", RegexOptions.None, new string[] { "cat\a\a\adog", "cat", "\a\a\a", "dog" });
             yield return (enUS, @"(cat)([\b]*)(dog)", "cat\b\b\bdog", RegexOptions.None, new string[] { "cat\b\b\bdog", "cat", "\b\b\b", "dog" });
-            yield return (enUS, @"(cat)([\e]*)(dog)", "cat\u001B\u001B\u001Bdog", RegexOptions.None, new string[] { "cat\u001B\u001B\u001Bdog", "cat", "\u001B\u001B\u001B", "dog" });
+            yield return (enUS, @"(cat)([\e]*)(dog)", "cat\e\e\edog", RegexOptions.None, new string[] { "cat\e\e\edog", "cat", "\e\e\e", "dog" });
             yield return (enUS, @"(cat)([\f]*)(dog)", "cat\f\f\fdog", RegexOptions.None, new string[] { "cat\f\f\fdog", "cat", "\f\f\f", "dog" });
             yield return (enUS, @"(cat)([\r]*)(dog)", "cat\r\r\rdog", RegexOptions.None, new string[] { "cat\r\r\rdog", "cat", "\r\r\r", "dog" });
             yield return (enUS, @"(cat)([\v]*)(dog)", "cat\v\v\vdog", RegexOptions.None, new string[] { "cat\v\v\vdog", "cat", "\v\v\v", "dog" });
@@ -433,7 +433,7 @@ public static IEnumerable<object[]> Groups_MemberData()
 
             if (!PlatformDetection.IsNetFramework) // `\c[` was not handled in .NET Framework. See https://github.com/dotnet/runtime/issues/24759.
             {
-                yield return (enUS, @"(cat)(\c[*)(dog)", "asdlkcat\u001bdogiwod", RegexOptions.None, new string[] { "cat\u001bdog", "cat", "\u001b", "dog" });
+                yield return (enUS, @"(cat)(\c[*)(dog)", "asdlkcat\edogiwod", RegexOptions.None, new string[] { "cat\edog", "cat", "\e", "dog" });
             }
 
             // Atomic Zero-Width Assertions \A \G ^ \Z \z \b \B
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.KnownPattern.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.KnownPattern.Tests.cs
index b34ede47dd2e..5748fd8df3c2 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.KnownPattern.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.KnownPattern.Tests.cs
@@ -1553,12 +1553,12 @@ public async Task PatternsDataSet_ConstructRegexForAll_NonBacktracking()
         [OuterLoop("Takes minutes to generate and compile thousands of expressions")]
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess), nameof(PlatformDetection.IsNotMobile), nameof(PlatformDetection.IsNotBrowser))] // consumes a lot of memory, doesn't work on mobile
         [ActiveIssue("https://github.com/dotnet/runtime/issues/80018", TestRuntimes.Mono)]
-        public void PatternsDataSet_ConstructRegexForAll_SourceGenerated()
+        public async Task PatternsDataSet_ConstructRegexForAll_SourceGenerated()
         {
-            Parallel.ForEach(s_patternsDataSet.Value.Chunk(50), chunk =>
+            await Parallel.ForEachAsync(s_patternsDataSet.Value.Chunk(50), async (chunk, ct) =>
             {
-                RegexHelpers.GetRegexesAsync(RegexEngine.SourceGenerated,
-                    chunk.Select(r => (r.Pattern, (CultureInfo?)null, (RegexOptions?)r.Options, (TimeSpan?)null)).ToArray()).GetAwaiter().GetResult();
+                await RegexHelpers.GetRegexesAsync(RegexEngine.SourceGenerated,
+                    chunk.Select(r => (r.Pattern, (CultureInfo?)null, (RegexOptions?)r.Options, (TimeSpan?)null)).ToArray());
             });
         }
 
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
index c003c03bba3a..1b06272fba64 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
@@ -21,7 +21,7 @@ public static IEnumerable<object[]> Match_MemberData()
             foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
             {
                 (string Pattern, string Input, RegexOptions Options, int Beginning, int Length, bool ExpectedSuccess, string ExpectedValue)[] cases = Match_MemberData_Cases(engine).ToArray();
-                Regex[] regexes = RegexHelpers.GetRegexesAsync(engine, cases.Select(c => (c.Pattern, (CultureInfo?)null, (RegexOptions?)c.Options, (TimeSpan?)null)).ToArray()).Result;
+                Regex[] regexes = RegexHelpers.GetRegexes(engine, cases.Select(c => (c.Pattern, (CultureInfo?)null, (RegexOptions?)c.Options, (TimeSpan?)null)).ToArray());
                 for (int i = 0; i < regexes.Length; i++)
                 {
                     yield return new object[] { engine, cases[i].Pattern, cases[i].Input, cases[i].Options, regexes[i], cases[i].Beginning, cases[i].Length, cases[i].ExpectedSuccess, cases[i].ExpectedValue };
@@ -92,6 +92,7 @@ public static IEnumerable<object[]> Match_MemberData()
                 yield return (@"(?:(?!(b)b)\1a)+", "babababa", RegexOptions.None, 0, 8, false, string.Empty);
                 yield return (@"(?:(?!(b)b)\1a)*", "babababa", RegexOptions.None, 0, 8, true, string.Empty);
                 yield return (@"(.*?)a(?!(a+)b\2c)\2(.*)", "baaabaac", RegexOptions.None, 0, 8, false, string.Empty);
+                yield return (@"(?!(abc))+\w\w\w", "abcdef", RegexOptions.None, 0, 6, true, "bcd");
 
                 // Zero-width positive lookbehind assertion
                 yield return (@"(\w){6}(?<=XXX)def", "abcXXXdef", RegexOptions.None, 0, 9, true, "abcXXXdef");
@@ -660,8 +661,15 @@ public static IEnumerable<object[]> Match_MemberData()
                 yield return (@$"^{aOptional}{{1,2}}?b", "aaab", RegexOptions.None, 0, 4, false, "");
                 yield return (@$"^{aOptional}{{2}}b", "aab", RegexOptions.None, 0, 3, true, "aab");
             }
+            yield return (@"(a+.|b+.)", "aaac", RegexOptions.None, 0, 4, true, "aaac");
+            yield return (@"(a+?.|b+?.)", "aaac", RegexOptions.None, 0, 4, true, "aa");
+            yield return (@"((a+?).|(b+?).)", "aaac", RegexOptions.None, 0, 4, true, "aa");
+            yield return (@"((a+?)+.|(b+?)+.)", "aaac", RegexOptions.None, 0, 4, true, "aaac");
+            yield return (@"((a+?)+?.|(b+?)+?.)", "aaac", RegexOptions.None, 0, 4, true, "aa");
             if (!RegexHelpers.IsNonBacktracking(engine))
             {
+                yield return (@"((?>a+).|(?>b+).)", "aaac", RegexOptions.None, 0, 4, true, "aaac");
+
                 yield return ("(?(dog2))", "dog2", RegexOptions.None, 0, 4, true, string.Empty);
                 yield return ("(?(a:b))", "a", RegexOptions.None, 0, 1, true, string.Empty);
                 yield return ("(?(a:))", "a", RegexOptions.None, 0, 1, true, string.Empty);
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Tests.Common.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Tests.Common.cs
index bb0b4b8f39d4..4d4f2252839a 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Tests.Common.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Tests.Common.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Globalization;
 using System.Linq;
@@ -123,6 +124,22 @@ public static async Task<Regex> GetRegexAsync(RegexEngine engine, [StringSyntax(
                 new Regex(pattern, options.Value | OptionsFromEngine(engine), matchTimeout.Value);
         }
 
+        public static Regex[] GetRegexes(RegexEngine engine, params (string pattern, CultureInfo? culture, RegexOptions? options, TimeSpan? matchTimeout)[] regexes)
+        {
+            if (engine == RegexEngine.SourceGenerated)
+            {
+                // Source generated regex creation can complete asynchronously, which is why GetRegexesAsync is async.
+                // xunit theory member data in xunit v2 may only be synchronous, and Roslyn's APIs are only asynchronous.
+                // As such, they need to block to get the results. But if they block on xunit's limited synchronization
+                // scheduler, they could deadlock, so escape the context by queueing the work to a thread pool thread.
+                return Task.Run(() => GetRegexesAsync(engine, regexes)).GetAwaiter().GetResult();
+            }
+
+            Task<Regex[]> t = GetRegexesAsync(engine, regexes);
+            Debug.Assert(t.IsCompleted);
+            return t.GetAwaiter().GetResult();
+        }
+
         public static async Task<Regex[]> GetRegexesAsync(RegexEngine engine, params (string pattern, CultureInfo? culture, RegexOptions? options, TimeSpan? matchTimeout)[] regexes)
         {
             if (engine == RegexEngine.SourceGenerated)
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs
index 181c978a3766..57d4232ee1ed 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs
@@ -2,15 +2,24 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.IO;
+using System.Linq;
+using System.Text.RegularExpressions.Generator;
 using System.Threading.Tasks;
 using Xunit;
 
 namespace System.Text.RegularExpressions.Tests
 {
-    [ConditionalClass(typeof(PlatformDetection), nameof(PlatformDetection.IsReflectionEmitSupported), nameof(PlatformDetection.IsNotMobile), nameof(PlatformDetection.IsNotBrowser))]
+    [ConditionalClass(typeof(RegexGeneratorOutputTests), nameof(GeneratorOutputTestsSupported))]
     public partial class RegexGeneratorOutputTests
     {
+        public static bool GeneratorOutputTestsSupported =>
+            PlatformDetection.IsReflectionEmitSupported &&
+            PlatformDetection.IsNotMobile &&
+            PlatformDetection.IsNotBrowser &&
+            typeof(RegexGenerator).Assembly.GetCustomAttributes(false).OfType<DebuggableAttribute>().Any(da => da.IsJITTrackingEnabled); // output differs between debug and release
+
         // This exists to ensure we're aware of any egregious breaks to formatting / readability.
         // Any updates that impact the generated code in these baselines will need to be updated
         // as changes are made to the code emitted by the generator.
@@ -258,6 +267,7 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)
                                         loop_iteration = 0;
 
                                         LoopBody:
+                                        Utilities.StackPush(ref base.runstack!, ref stackpos, 143337952);
                                         Utilities.StackPush(ref base.runstack!, ref stackpos, base.Crawlpos(), pos);
 
                                         loop_iteration++;
@@ -311,6 +321,7 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)
                                         }
                                         pos = base.runstack![--stackpos];
                                         UncaptureUntil(base.runstack![--stackpos]);
+                                        Utilities.ValidateStackCookie(143337952, base.runstack![--stackpos]);
                                         slice = inputSpan.Slice(pos);
                                         LoopEnd:;
                                     //}
@@ -381,6 +392,32 @@ internal static bool IsWordChar(char ch)
                                 (WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0;
                         }
 
+                        /// <summary>Pushes 1 value onto the backtracking stack.</summary>
+                        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                        internal static void StackPush(ref int[] stack, ref int pos, int arg0)
+                        {
+                            // If there's space available for the value, store it.
+                            int[] s = stack;
+                            int p = pos;
+                            if ((uint)p < (uint)s.Length)
+                            {
+                                s[p] = arg0;
+                                pos++;
+                                return;
+                            }
+
+                            // Otherwise, resize the stack to make room and try again.
+                            WithResize(ref stack, ref pos, arg0);
+
+                            // <summary>Resize the backtracking stack array and push 1 value onto the stack.</summary>
+                            [MethodImpl(MethodImplOptions.NoInlining)]
+                            static void WithResize(ref int[] stack, ref int pos, int arg0)
+                            {
+                                Array.Resize(ref stack, (pos + 0) * 2);
+                                StackPush(ref stack, ref pos, arg0);
+                            }
+                        }
+
                         /// <summary>Pushes 2 values onto the backtracking stack.</summary>
                         [MethodImpl(MethodImplOptions.AggressiveInlining)]
                         internal static void StackPush(ref int[] stack, ref int pos, int arg0, int arg1)
@@ -407,6 +444,16 @@ static void WithResize(ref int[] stack, ref int pos, int arg0, int arg1)
                                 StackPush(ref stack, ref pos, arg0, arg1);
                             }
                         }
+
+                        /// <summary>Validates that a stack cookie popped off the backtracking stack holds the expected value. Debug only.</summary>
+                        internal static int ValidateStackCookie(int expected, int actual)
+                        {
+                            if (expected != actual)
+                            {
+                                throw new Exception($"Backtracking stack imbalance detected. Expected {expected}. Actual {actual}.");
+                            }
+                            return actual;
+                        }
                     }
                 }
                 """
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexPcreTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexPcreTests.cs
index 0063cf802fdf..6ced2daba372 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexPcreTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexPcreTests.cs
@@ -19,7 +19,7 @@ public static IEnumerable<object[]> PcreTestData()
             foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
             {
                 (string pattern, RegexOptions options, string input, bool expectedSuccess)[] cases = PcreTestData_Cases(engine).ToArray();
-                Regex[] regexes = RegexHelpers.GetRegexesAsync(engine, cases.Select(c => (c.pattern, (CultureInfo?)null, (RegexOptions?)c.options, (TimeSpan?)null)).ToArray()).Result;
+                Regex[] regexes = RegexHelpers.GetRegexes(engine, cases.Select(c => (c.pattern, (CultureInfo?)null, (RegexOptions?)c.options, (TimeSpan?)null)).ToArray());
                 for (int i = 0; i < regexes.Length; i++)
                 {
                     yield return new object[] { regexes[i], cases[i].input, cases[i].expectedSuccess };
@@ -37,7 +37,7 @@ public static IEnumerable<object[]> PcreTestData()
             yield return ("The quick brown fox", RegexOptions.IgnoreCase, "The quick brown FOX", true);
             yield return ("The quick brown fox", RegexOptions.IgnoreCase, "What do you know about the quick brown fox?", true);
             yield return ("The quick brown fox", RegexOptions.IgnoreCase, "What do you know about THE QUICK BROWN FOX?", true);
-            yield return ("abcd\\t\\n\\r\\f\\a\\e\\071\\x3b\\$\\\\\\?caxyz", RegexOptions.None, "abcd\t\n\r\f\a\u001b9;$\\?caxyz", true);
+            yield return ("abcd\\t\\n\\r\\f\\a\\e\\071\\x3b\\$\\\\\\?caxyz", RegexOptions.None, "abcd\t\n\r\f\a\e9;$\\?caxyz", true);
             yield return ("a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz", RegexOptions.None, "abxyzpqrrrabbxyyyypqAzz", true);
             yield return ("a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz", RegexOptions.None, "aabxyzpqrrrabbxyyyypqAzz", true);
             yield return ("a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz", RegexOptions.None, "aaabxyzpqrrrabbxyyyypqAzz", true);
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexRustTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexRustTests.cs
index 1a6a807a57cf..be000266052f 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexRustTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexRustTests.cs
@@ -18,7 +18,7 @@ public static IEnumerable<object[]> MatchStartAndEndPositions_MemberData()
             foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
             {
                 (string Pattern, string Input, IEnumerable<(int, int)> MatchBoundaries)[] cases = MatchStartAndEndPositions_MemberData_Cases().ToArray();
-                Regex[] regexes = RegexHelpers.GetRegexesAsync(engine, cases.Select(c => (c.Pattern, (CultureInfo?)null, (RegexOptions?)RegexOptions.None, (TimeSpan?)null)).ToArray()).Result;
+                Regex[] regexes = RegexHelpers.GetRegexes(engine, cases.Select(c => (c.Pattern, (CultureInfo?)null, (RegexOptions?)RegexOptions.None, (TimeSpan?)null)).ToArray());
                 for (int i = 0; i < regexes.Length; i++)
                 {
                     yield return new object[] { regexes[i], cases[i].Input, cases[i].MatchBoundaries };
diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexPrefixAnalyzerTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexPrefixAnalyzerTests.cs
index 9c592d7c57f6..783b45e9d3c9 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexPrefixAnalyzerTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexPrefixAnalyzerTests.cs
@@ -70,6 +70,76 @@ public void FindFirstCharClass_StressDeep()
             FindFirstCharClass(string.Concat(Enumerable.Repeat($"(a?", nesting).Concat(Enumerable.Repeat(")*", nesting))), 0, null);
         }
 
+        [Theory]
+        // case-sensitive
+        [InlineData("abc", new[] { "abc" }, false)]
+        [InlineData("(abc+|bcd+)", new[] { "abc", "bcd" }, false)]
+        [InlineData("(ab+c|bcd+)", new[] { "ab", "bcd" }, false)]
+        [InlineData("(ab+c|bcd+)*", null, false)]
+        [InlineData("(ab+c|bcd+)+", new[] { "ab", "bcd" }, false)]
+        [InlineData("(ab+c|bcd+){3,5}", new[] { "ab", "bcd" }, false)]
+        [InlineData("abc|def", new[] { "abc", "def" }, false)]
+        [InlineData("ab{4}c|def{5}|g{2,4}h", new[] { "abbbbc", "defffff", "gg" }, false)]
+        [InlineData("abc|def|(ghi|jklm)", new[] { "abc", "def", "ghi", "jklm" }, false)]
+        [InlineData("abc[def]ghi", new[] { "abcdghi", "abceghi", "abcfghi" }, false)]
+        [InlineData("abc[def]ghi|[jkl]m", new[] { "abcdghi", "abceghi", "abcfghi", "jm", "km", "lm" }, false)]
+        [InlineData("agggtaaa|tttaccct", new[] { "agggtaaa", "tttaccct" }, false)]
+        [InlineData("[cgt]gggtaaa|tttaccc[acg]", new[] { "cgggtaaa", "ggggtaaa", "tgggtaaa", "tttaccca", "tttacccc", "tttacccg" }, false)]
+        [InlineData("a[act]ggtaaa|tttacc[agt]t", new[] { "aaggtaaa", "acggtaaa", "atggtaaa", "tttaccat", "tttaccgt", "tttacctt" }, false)]
+        [InlineData("ag[act]gtaaa|tttac[agt]ct", new[] { "agagtaaa", "agcgtaaa", "agtgtaaa", "tttacact", "tttacgct", "tttactct" }, false)]
+        [InlineData("agg[act]taaa|ttta[agt]cct", new[] { "aggataaa", "aggctaaa", "aggttaaa", "tttaacct", "tttagcct", "tttatcct" }, false)]
+        [InlineData(@"\b(abc|def)\b", new[] { "abc", "def" }, false)]
+        [InlineData("^(abc|def)$", new[] { "abc", "def" }, false)]
+        [InlineData("abcdefg|h", null, false)]
+        [InlineData("abc[def]ghi|[jkl]", null, false)]
+        [InlineData("[12][45][789]", new[] { "147", "148", "149", "157", "158", "159", "247", "248", "249", "257", "258", "259" }, false)]
+        [InlineData("[12]a[45]b[789]c", new[] { "1a4b7c", "1a4b8c", "1a4b9c", "1a5b7c", "1a5b8c", "1a5b9c", "2a4b7c", "2a4b8c", "2a4b9c", "2a5b7c", "2a5b8c", "2a5b9c" }, false)]
+        [InlineData("(abc){3}|(def){3}", new[] { "abcabcabc", "defdefdef" }, false)]
+        [InlineData("(abc){4,8}|(def){2,3}", new[] { "abcabcabc", "defdef" }, false)]
+        [InlineData("(abc){4,8}|(de+f){2,3}", new[] { "abcabcabc", "de" }, false)]
+        [InlineData("(ab{2}c){4,8}|(de+f){2,3}", new[] { "abbcabbc", "de" }, false)]
+        // case-insensitive
+        [InlineData("[Aa][Bb][Cc]", new[] { "abc" }, true)]
+        [InlineData("[Aa][Bbc][Cc]", null, true)]
+        [InlineData(":[Aa]![Bb]@", new[] { ":a!b@" }, true)]
+        [InlineData("(?i)abc", new[] { "abc" }, true)]
+        [InlineData("(?i)(abc+|bcd+)", new[] { "abc", "bcd" }, true)]
+        [InlineData("(?i)(ab+c|bcd+)", new[] { "ab", "bcd" }, true)]
+        [InlineData("(?i)(ab+c|bcd+)*", null, true)]
+        [InlineData("(?i)(ab+c|bcd+)+", new[] { "ab", "bcd" }, true)]
+        [InlineData("(?i)(ab+c|bcd+){3,5}", new[] { "ab", "bcd" }, true)]
+        [InlineData("(?i)abc|def", new[] { "abc", "def" }, true)]
+        [InlineData("(?i)ab{4}c|def{5}|g{2,4}h", new[] { "abbbbc", "defffff", "gg" }, true)]
+        [InlineData("(?i)(((?>abc)|(?>def)))", new[] { "abc", "def" }, true)]
+        [InlineData("(?i)(abc|def|(ghi|jklm))", null, true)]
+        [InlineData("(?i)(abc|def|(ghi|jlmn))", new[] { "abc", "def", "ghi", "jlmn" }, true)]
+        [InlineData("abc", null, true)]
+        [InlineData("abc|def", null, true)]
+        [InlineData("abc|def|(ghi|jklm)", null, true)]
+        [InlineData("://[Aa][Bb]|[Cc]@!", new[] { "://ab", "c@!" }, true)]
+        [InlineData("(?i)((abc){4,8}|(def){2,3})", new[] { "abcabcab", "defdef" }, true)]
+        [InlineData("(?i)((abc){4,8}|(de+f){2,3})", new[] { "abcabcab", "de" }, true)]
+        [InlineData("(?i)((ab{2}c){4,8}|(de+f){2,3})", new[] { "abbcabbc", "de" }, true)]
+        public void FindPrefixes(string pattern, string[] expectedSet, bool ignoreCase)
+        {
+            RegexTree tree = RegexParser.Parse(pattern, RegexOptions.None, CultureInfo.InvariantCulture);
+            string[] actual = RegexPrefixAnalyzer.FindPrefixes(tree.Root, ignoreCase);
+
+            if (expectedSet is null)
+            {
+                Assert.Null(actual);
+            }
+            else
+            {
+                Assert.NotNull(actual);
+
+                Array.Sort(actual, StringComparer.Ordinal);
+                Array.Sort(expectedSet, StringComparer.Ordinal);
+
+                Assert.Equal(expectedSet, actual);
+            }
+        }
+
         private static string FormatSet(string set)
         {
             if (set is null)
diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs
index cb28e55387b7..c10622548e28 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs
@@ -582,7 +582,7 @@ public void MinMaxLengthIsCorrect(string pattern, int options, int expectedMin,
 
             Assert.Equal(expectedMin, tree.FindOptimizations.MinRequiredLength);
 
-            if (!pattern.EndsWith("$", StringComparison.Ordinal) &&
+            if (!pattern.EndsWith('$') &&
                 !pattern.EndsWith(@"\Z", StringComparison.OrdinalIgnoreCase))
             {
                 // MaxPossibleLength is currently only computed/stored if there's a trailing End{Z} anchor as the max length is otherwise unused
diff --git a/src/libraries/System.Threading.RateLimiting/src/System/Threading/RateLimiting/DefaultPartitionedRateLimiter.cs b/src/libraries/System.Threading.RateLimiting/src/System/Threading/RateLimiting/DefaultPartitionedRateLimiter.cs
index fabf472ef2e5..5c58a1de5970 100644
--- a/src/libraries/System.Threading.RateLimiting/src/System/Threading/RateLimiting/DefaultPartitionedRateLimiter.cs
+++ b/src/libraries/System.Threading.RateLimiting/src/System/Threading/RateLimiting/DefaultPartitionedRateLimiter.cs
@@ -218,6 +218,7 @@ private async Task Heartbeat()
                 {
                     _cachedLimiters.Clear();
                     _cachedLimiters.AddRange(_limiters);
+                    _cacheInvalid = false;
                 }
             }
 
diff --git a/src/libraries/System.Threading.Tasks.Dataflow/src/PACKAGE.md b/src/libraries/System.Threading.Tasks.Dataflow/src/PACKAGE.md
index f15811be3267..7ed2a3f7ace7 100644
--- a/src/libraries/System.Threading.Tasks.Dataflow/src/PACKAGE.md
+++ b/src/libraries/System.Threading.Tasks.Dataflow/src/PACKAGE.md
@@ -2,43 +2,143 @@
 
 <!-- A description of the package and where one can find more documentation -->
 
-
+Provides dataflow components that are collectively referred to as the *TPL Dataflow Library*.
+This dataflow model promotes actor-based programming by providing in-process message passing for coarse-grained dataflow and pipelining tasks.
 
 ## Key Features
 
 <!-- The key features of this package -->
 
-*
-*
-*
+* Foundation for message passing and parallelizing CPU-intensive and I/O-intensive applications that have high throughput and low latency.
+* Provides multiple block types for various dataflow operations (e.g., `BufferBlock`, `ActionBlock`, `TransformBlock`).
+* Dataflow blocks support linking to form *networks*, allowing you to create complex processing topologies.
 
 ## How to Use
 
 <!-- A compelling example on how to use this package with code, as well as any specific guidelines for when to use the package -->
 
+This sample demonstrates a dataflow pipeline that downloads the book "The Iliad of Homer" from a website and searches the text to match individual words with words that reverse the first word's characters.
+
+```csharp
+using System.Net;
+using System.Text.RegularExpressions;
+using System.Threading.Tasks.Dataflow;
+
+var nonLetterRegex = new Regex(@"\P{L}", RegexOptions.Compiled);
+var client = new HttpClient(new HttpClientHandler { AutomaticDecompression = DecompressionMethods.GZip });
+
+// Setup blocks
+
+// Downloads the requested resource as a string.
+TransformBlock<string, string> downloadString = new TransformBlock<string, string>(async uri =>
+{
+    Console.WriteLine("Downloading '{0}'...", uri);
+
+    return await client.GetStringAsync(uri);
+});
+
+// Separates the specified text into an array of words.
+TransformBlock<string, string[]> createWordList = new TransformBlock<string, string[]>(text =>
+{
+    Console.WriteLine("Creating word list...");
+
+    // Remove common punctuation by replacing all non-letter characters with a space character.
+    text = nonLetterRegex.Replace(text, " ");
+
+    // Separate the text into an array of words.
+    return text.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
+});
+
+// Removes short words.
+TransformBlock<string[], string[]> filterWordList = new TransformBlock<string[], string[]>(words =>
+{
+    Console.WriteLine("Filtering word list...");
+
+    return words
+       .Where(word => word.Length > 3)
+       .ToArray();
+});
+
+// Finds all words in the specified collection whose reverse also exists in the collection.
+TransformManyBlock<string[], string> findReversedWords = new TransformManyBlock<string[], string>(words =>
+{
+    Console.WriteLine("Finding reversed words...");
+
+    var wordsSet = new HashSet<string>(words);
+
+    return from word in wordsSet
+           let reverse = string.Concat(word.Reverse())
+           where word != reverse && wordsSet.Contains(reverse)
+           select word;
+});
+
+// Prints the provided reversed words to the console.
+ActionBlock<string> printReversedWords = new ActionBlock<string>(reversedWord =>
+{
+    Console.WriteLine("Found reversed words {0}/{1}", reversedWord, string.Concat(reversedWord.Reverse()));
+});
+
+
+// Connect the dataflow blocks to form a pipeline.
+var linkOptions = new DataflowLinkOptions { PropagateCompletion = true };
+
+downloadString.LinkTo(createWordList, linkOptions);
+createWordList.LinkTo(filterWordList, linkOptions);
+filterWordList.LinkTo(findReversedWords, linkOptions);
+findReversedWords.LinkTo(printReversedWords, linkOptions);
+
+// Post data to the pipeline, "The Iliad of Homer" by Homer.
+downloadString.Post("http://www.gutenberg.org/cache/epub/16452/pg16452.txt");
+
+// Mark the head of the pipeline as complete.
+downloadString.Complete();
+
+// Wait for the last block in the pipeline to process all messages.
+printReversedWords.Completion.Wait();
+
+// Output:
+// Downloading 'http://www.gutenberg.org/cache/epub/16452/pg16452.txt'...
+// Creating word list...
+// Filtering word list...
+// Finding reversed words...
+// Found reversed words parts/strap
+// Found reversed words deer/reed
+// Found reversed words deem/meed
+// Found reversed words flow/wolf
+// ...
+
+```
+
+More details can be found on [Dataflow (Task Parallel Library)](https://learn.microsoft.com/dotnet/standard/parallel-programming/dataflow-task-parallel-library) and [Walkthrough: Creating a Dataflow Pipeline](https://learn.microsoft.com/dotnet/standard/parallel-programming/walkthrough-creating-a-dataflow-pipeline) pages.
+
 ## Main Types
 
 <!-- The main types provided in this library -->
 
 The main types provided by this library are:
 
-* ``
-* ``
-* ``
+* `System.Threading.Tasks.Dataflow.ISourceBlock<TOutput>`
+* `System.Threading.Tasks.Dataflow.ITargetBlock<TInput>`
+* `System.Threading.Tasks.Dataflow.IPropagatorBlock<TInput,TOutput>`
+* `System.Threading.Tasks.Dataflow.ActionBlock<TInput>`
+* `System.Threading.Tasks.Dataflow.BatchBlock<T>`
+* `System.Threading.Tasks.Dataflow.BatchedJoinBlock<T1, T2>`
+* `System.Threading.Tasks.Dataflow.BroadcastBlock<T>`
+* `System.Threading.Tasks.Dataflow.BufferBlock<T>`
+* `System.Threading.Tasks.Dataflow.JoinBlock<T1, T2>`
+* `System.Threading.Tasks.Dataflow.TransformBlock<TInput, TOutput>`
+* `System.Threading.Tasks.Dataflow.TransformManyBlock<TInput, TOutput>`
+* `System.Threading.Tasks.Dataflow.WriteOnceBlock<T>`
 
 ## Additional Documentation
 
 <!-- Links to further documentation. Remove conceptual documentation if not available for the library. -->
 
-* [Conceptual documentation](https://learn.microsoft.com/dotnet/standard/serialization/**LIBRARYNAME**/overview)
-* [API documentation](https://learn.microsoft.com/dotnet/api/**LIBRARYNAME**)
-
-## Related Packages
-
-<!-- The related packages associated with this package -->
+* [Conceptual documentation](https://learn.microsoft.com/dotnet/standard/parallel-programming/dataflow-task-parallel-library)
+* [API documentation](https://learn.microsoft.com/dotnet/api/system.threading.tasks.dataflow)
 
 ## Feedback & Contributing
 
 <!-- How to provide feedback on this package and contribute to it -->
 
-System.Threading.Tasks.Dataflow is released as open source under the [MIT license](https://licenses.nuget.org/MIT). Bug reports and contributions are welcome at [the GitHub repository](https://github.com/dotnet/runtime).
\ No newline at end of file
+System.Threading.Tasks.Dataflow is released as open source under the [MIT license](https://licenses.nuget.org/MIT). Bug reports and contributions are welcome at [the GitHub repository](https://github.com/dotnet/runtime).
diff --git a/src/libraries/System.Threading.Thread/ref/System.Threading.Thread.cs b/src/libraries/System.Threading.Thread/ref/System.Threading.Thread.cs
index c4c038036ce3..200fe545e563 100644
--- a/src/libraries/System.Threading.Thread/ref/System.Threading.Thread.cs
+++ b/src/libraries/System.Threading.Thread/ref/System.Threading.Thread.cs
@@ -110,41 +110,93 @@ public void UnsafeStart() { }
         [System.Runtime.Versioning.UnsupportedOSPlatformAttribute("browser")]
 #endif
         public void UnsafeStart(object? parameter) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static byte VolatileRead(ref byte address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static double VolatileRead(ref double address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static short VolatileRead(ref short address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static int VolatileRead(ref int address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static long VolatileRead(ref long address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static System.IntPtr VolatileRead(ref System.IntPtr address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         [return: System.Diagnostics.CodeAnalysis.NotNullIfNotNullAttribute("address")]
         public static object? VolatileRead([System.Diagnostics.CodeAnalysis.NotNullIfNotNullAttribute("address")] ref object? address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         [System.CLSCompliantAttribute(false)]
         public static sbyte VolatileRead(ref sbyte address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static float VolatileRead(ref float address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         [System.CLSCompliantAttribute(false)]
         public static ushort VolatileRead(ref ushort address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         [System.CLSCompliantAttribute(false)]
         public static uint VolatileRead(ref uint address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         [System.CLSCompliantAttribute(false)]
         public static ulong VolatileRead(ref ulong address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         [System.CLSCompliantAttribute(false)]
         public static System.UIntPtr VolatileRead(ref System.UIntPtr address) { throw null; }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static void VolatileWrite(ref byte address, byte value) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static void VolatileWrite(ref double address, double value) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static void VolatileWrite(ref short address, short value) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static void VolatileWrite(ref int address, int value) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static void VolatileWrite(ref long address, long value) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static void VolatileWrite(ref System.IntPtr address, System.IntPtr value) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static void VolatileWrite([System.Diagnostics.CodeAnalysis.NotNullIfNotNullAttribute("value")] ref object? address, object? value) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         [System.CLSCompliantAttribute(false)]
         public static void VolatileWrite(ref sbyte address, sbyte value) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public static void VolatileWrite(ref float address, float value) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         [System.CLSCompliantAttribute(false)]
         public static void VolatileWrite(ref ushort address, ushort value) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         [System.CLSCompliantAttribute(false)]
         public static void VolatileWrite(ref uint address, uint value) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         [System.CLSCompliantAttribute(false)]
         public static void VolatileWrite(ref ulong address, ulong value) { }
+        [System.ObsoleteAttribute("Thread.VolatileRead and Thread.VolatileWrite are obsolete. Use Volatile.Read or Volatile.Write respectively instead.", DiagnosticId = "SYSLIB0054", UrlFormat = "https://aka.ms/dotnet-warnings/{0}")]
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         [System.CLSCompliantAttribute(false)]
         public static void VolatileWrite(ref System.UIntPtr address, System.UIntPtr value) { }
         public static bool Yield() { throw null; }
diff --git a/src/libraries/System.Threading.Thread/src/CompatibilitySuppressions.Threading.xml b/src/libraries/System.Threading.Thread/src/CompatibilitySuppressions.Threading.xml
index 5fc41e30ed44..8eb2f78e6a70 100644
--- a/src/libraries/System.Threading.Thread/src/CompatibilitySuppressions.Threading.xml
+++ b/src/libraries/System.Threading.Thread/src/CompatibilitySuppressions.Threading.xml
@@ -20,6 +20,10 @@
     <DiagnosticId>CP0002</DiagnosticId>
     <Target>F:System.Threading.Thread.ThrowOnBlockingWaitOnJSInteropThread</Target>
   </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0002</DiagnosticId>
+    <Target>F:System.Threading.Thread.WarnOnBlockingWaitOnJSInteropThread</Target>
+  </Suppression>
   <Suppression>
     <DiagnosticId>CP0002</DiagnosticId>
     <Target>M:System.Threading.Thread.AssureBlockingPossible</Target>
diff --git a/src/libraries/System.Threading.Thread/tests/System.Threading.Thread.Tests.csproj b/src/libraries/System.Threading.Thread/tests/System.Threading.Thread.Tests.csproj
index 6a9977dc6e11..ed29b66576ee 100644
--- a/src/libraries/System.Threading.Thread/tests/System.Threading.Thread.Tests.csproj
+++ b/src/libraries/System.Threading.Thread/tests/System.Threading.Thread.Tests.csproj
@@ -5,6 +5,10 @@
     <IncludeRemoteExecutor>true</IncludeRemoteExecutor>
     <TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(TargetOS)' == 'browser'">
+    <XunitShowProgress>true</XunitShowProgress>
+    <_WasmPThreadPoolUnusedSize>10</_WasmPThreadPoolUnusedSize>
+  </PropertyGroup>
   <ItemGroup>
     <Compile Include="CompressedStackTests.cs" />
     <Compile Include="ExceptionTests.cs" />
diff --git a/src/libraries/System.Threading.ThreadPool/tests/System.Threading.ThreadPool.Tests.csproj b/src/libraries/System.Threading.ThreadPool/tests/System.Threading.ThreadPool.Tests.csproj
index 0cb21c9d3849..ad3f9814e0ad 100644
--- a/src/libraries/System.Threading.ThreadPool/tests/System.Threading.ThreadPool.Tests.csproj
+++ b/src/libraries/System.Threading.ThreadPool/tests/System.Threading.ThreadPool.Tests.csproj
@@ -4,6 +4,10 @@
     <TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
     <TestRuntime>true</TestRuntime>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(TargetOS)' == 'browser'">
+    <XunitShowProgress>true</XunitShowProgress>
+    <_WasmPThreadPoolUnusedSize>10</_WasmPThreadPoolUnusedSize>
+  </PropertyGroup>
   <ItemGroup>
     <Compile Include="ThreadPoolTests.cs" />
     <Compile Include="RegisteredWaitTests.cs" />
diff --git a/src/libraries/System.Threading.ThreadPool/tests/ThreadPoolTests.cs b/src/libraries/System.Threading.ThreadPool/tests/ThreadPoolTests.cs
index 0b51d5e07a6e..c96ad22b47b5 100644
--- a/src/libraries/System.Threading.ThreadPool/tests/ThreadPoolTests.cs
+++ b/src/libraries/System.Threading.ThreadPool/tests/ThreadPoolTests.cs
@@ -6,6 +6,8 @@
 using System.Diagnostics.Tracing;
 using System.IO;
 using System.Linq;
+using System.Net.Sockets;
+using System.Net;
 using System.Reflection;
 using System.Threading.Tasks;
 using System.Threading.Tests;
@@ -1160,6 +1162,95 @@ public void ThreadPoolMinMaxThreadsEventTest()
             }).Dispose();
         }
 
+        private sealed class RuntimeEventListener : EventListener
+        {
+            private const string ClrProviderName = "Microsoft-Windows-DotNETRuntime";
+            private const EventKeywords ThreadingKeyword = (EventKeywords)0x10000;
+
+            public volatile int tpIOEnqueue = 0;
+            public volatile int tpIODequeue = 0;
+            public ManualResetEvent tpWaitIOEnqueueEvent = new ManualResetEvent(false);
+            public ManualResetEvent tpWaitIODequeueEvent = new ManualResetEvent(false);
+
+            protected override void OnEventSourceCreated(EventSource eventSource)
+            {
+                if (eventSource.Name.Equals(ClrProviderName))
+                {
+                    EnableEvents(eventSource, EventLevel.Verbose, ThreadingKeyword);
+                }
+
+                base.OnEventSourceCreated(eventSource);
+            }
+
+            protected override void OnEventWritten(EventWrittenEventArgs eventData)
+            {
+                if (eventData.EventName.Equals("ThreadPoolIOEnqueue"))
+                {
+                    Interlocked.Increment(ref tpIOEnqueue);
+                    tpWaitIOEnqueueEvent.Set();
+                }
+                else if (eventData.EventName.Equals("ThreadPoolIODequeue"))
+                {
+                    Interlocked.Increment(ref tpIODequeue);
+                    tpWaitIODequeueEvent.Set();
+                }
+            }
+        }
+
+        [ConditionalFact(nameof(IsThreadingAndRemoteExecutorSupported), nameof(UseWindowsThreadPool))]
+        public void ReadWriteAsyncTest()
+        {
+            RemoteExecutor.Invoke(async () =>
+            {
+                using (RuntimeEventListener eventListener = new RuntimeEventListener())
+                {
+                    TaskCompletionSource<int> portTcs = new TaskCompletionSource<int>();
+                    TaskCompletionSource<bool> readAsyncReadyTcs = new TaskCompletionSource<bool>();
+
+                    async Task StartListenerAsync()
+                    {
+                        using TcpListener listener = new TcpListener(IPAddress.Loopback, 0);
+                        listener.Start();
+                        int port = ((IPEndPoint)listener.LocalEndpoint).Port;
+                        portTcs.SetResult(port);
+                        using TcpClient client = await listener.AcceptTcpClientAsync();
+                        using (NetworkStream stream = client.GetStream())
+                        {
+                            byte[] buffer = new byte[1];
+                            Task readAsyncTask = stream.ReadAsync(buffer, 0, buffer.Length);
+                            readAsyncReadyTcs.SetResult(true);
+                            await readAsyncTask;
+                        }
+                        listener.Stop();
+                    }
+
+                    async Task StartClientAsync()
+                    {
+                        int port = await portTcs.Task;
+                        using (TcpClient client = new TcpClient(new IPEndPoint(IPAddress.Loopback, 0)))
+                        {
+                            await client.ConnectAsync(IPAddress.Loopback, port);
+                            using (NetworkStream stream = client.GetStream())
+                            {
+                                bool readAsyncReady = await readAsyncReadyTcs.Task;
+                                byte[] data = new byte[1];
+                                await stream.WriteAsync(data, 0, data.Length);
+                            }
+                        }
+                    }
+
+                    Task listenerTask = StartListenerAsync();
+                    Task clientTask = StartClientAsync();
+                    await Task.WhenAll(listenerTask, clientTask);
+                    ManualResetEvent[] waitEvents = [eventListener.tpWaitIOEnqueueEvent, eventListener.tpWaitIODequeueEvent];
+
+                    Assert.True(WaitHandle.WaitAll(waitEvents, TimeSpan.FromSeconds(15))); // Assert that there wasn't a timeout
+                    Assert.True(eventListener.tpIOEnqueue > 0);
+                    Assert.True(eventListener.tpIODequeue > 0);
+                }
+            }).Dispose();
+        }
+
         public static bool IsThreadingAndRemoteExecutorSupported =>
             PlatformDetection.IsThreadingSupported && RemoteExecutor.IsSupported;
 
@@ -1169,6 +1260,7 @@ private static bool GetUseWindowsThreadPool()
             return useWindowsThreadPool;
         }
 
-        private static bool UsePortableThreadPool { get; } = !GetUseWindowsThreadPool();
+        private static bool UseWindowsThreadPool { get; } = GetUseWindowsThreadPool();
+        private static bool UsePortableThreadPool { get; } = !UseWindowsThreadPool;
     }
 }
diff --git a/src/libraries/System.Threading/tests/SemaphoreSlimTests.cs b/src/libraries/System.Threading/tests/SemaphoreSlimTests.cs
index 7aabd01c39f1..dfc75fc2a89e 100644
--- a/src/libraries/System.Threading/tests/SemaphoreSlimTests.cs
+++ b/src/libraries/System.Threading/tests/SemaphoreSlimTests.cs
@@ -90,6 +90,7 @@ public static void RunSemaphoreSlimTest1_WaitAsync()
         }
 
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/99501", typeof(PlatformDetection), nameof(PlatformDetection.IsWasmThreadingSupported))]
         public static void RunSemaphoreSlimTest1_WaitAsync_NegativeCases()
         {
             // Invalid timeout
diff --git a/src/libraries/System.Threading/tests/System.Threading.Tests.csproj b/src/libraries/System.Threading/tests/System.Threading.Tests.csproj
index e938db9863da..768bb7b665d9 100644
--- a/src/libraries/System.Threading/tests/System.Threading.Tests.csproj
+++ b/src/libraries/System.Threading/tests/System.Threading.Tests.csproj
@@ -7,6 +7,10 @@
     <!-- CA2252: Opt in to preview features before using them (Lock) -->
     <EnablePreviewFeatures>true</EnablePreviewFeatures>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(TargetOS)' == 'browser'">
+    <XunitShowProgress>true</XunitShowProgress>
+    <_WasmPThreadPoolUnusedSize>10</_WasmPThreadPoolUnusedSize>
+  </PropertyGroup>
   <ItemGroup>
     <Compile Include="AsyncLocalTests.cs" />
     <Compile Include="AutoResetEventTests.cs" />
diff --git a/src/libraries/System.Web.HttpUtility/src/System/Web/HttpUtility.cs b/src/libraries/System.Web.HttpUtility/src/System/Web/HttpUtility.cs
index 5d0d503f526b..9405c39a75bd 100644
--- a/src/libraries/System.Web.HttpUtility/src/System/Web/HttpUtility.cs
+++ b/src/libraries/System.Web.HttpUtility/src/System/Web/HttpUtility.cs
@@ -67,7 +67,7 @@ public override string ToString()
                         {
                             if (!string.IsNullOrEmpty(key))
                             {
-                                sb.Append(key).Append('=');
+                                sb.Append(UrlEncode(key)).Append('=');
                             }
                             sb.Append(UrlEncode(value)).Append('&');
                         }
diff --git a/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs b/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs
index 82268b3b326b..b95c663e932d 100644
--- a/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs
+++ b/src/libraries/System.Web.HttpUtility/tests/HttpUtility/HttpUtilityTest.cs
@@ -792,6 +792,7 @@ public void UrlPathEncode(string decoded, string encoded)
         [InlineData("foo&bar")]
         [InlineData("foo&name=bar")]
         [InlineData("name=bar&foo&foo")]
+        [InlineData("_return_fields%2b=extattrs&name%3a=somename.somedomain.local")]
         public void ParseAndToStringMaintainAllKeyValuePairs(string input)
         {
             var values = HttpUtility.ParseQueryString(input);
diff --git a/src/libraries/System.Windows.Extensions/src/System/Media/SoundPlayer.cs b/src/libraries/System.Windows.Extensions/src/System/Media/SoundPlayer.cs
index 6e39549fd0d6..d77919070c24 100644
--- a/src/libraries/System.Windows.Extensions/src/System/Media/SoundPlayer.cs
+++ b/src/libraries/System.Windows.Extensions/src/System/Media/SoundPlayer.cs
@@ -582,9 +582,9 @@ private unsafe void ValidateSoundFile(string fileName)
                         if (waveFormat == null)
                         {
                             int dw = ck.cksize;
-                            if (dw < Marshal.SizeOf(typeof(Interop.WinMM.WAVEFORMATEX)))
+                            if (dw < Marshal.SizeOf<Interop.WinMM.WAVEFORMATEX>())
                             {
-                                dw = Marshal.SizeOf(typeof(Interop.WinMM.WAVEFORMATEX));
+                                dw = Marshal.SizeOf<Interop.WinMM.WAVEFORMATEX>();
                             }
 
                             waveFormat = new Interop.WinMM.WAVEFORMATEX();
diff --git a/src/libraries/apicompat/ApiCompatBaseline.NetCoreAppLatestStable.xml b/src/libraries/apicompat/ApiCompatBaseline.NetCoreAppLatestStable.xml
index 2f7dfb841c80..09cfe417e381 100644
--- a/src/libraries/apicompat/ApiCompatBaseline.NetCoreAppLatestStable.xml
+++ b/src/libraries/apicompat/ApiCompatBaseline.NetCoreAppLatestStable.xml
@@ -1,4 +1,472 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <!-- https://learn.microsoft.com/en-us/dotnet/fundamentals/package-validation/diagnostic-ids -->
 <Suppressions xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.String)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.String)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.Type,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.Type,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.String)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.String)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.Type,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.Type,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EnumConverter.#ctor(System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.DesignerAttribute.DesignerBaseTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.DesignerAttribute.DesignerTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.EditorAttribute.EditorBaseTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.EditorAttribute.EditorTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.EnumConverter.EnumType:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/netstandard.dll</Left>
+    <Right>net9.0/netstandard.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.String)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.String)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.Type,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.Type,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.String)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.String)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.Type,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.Type,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.DesignerAttribute.DesignerBaseTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.DesignerAttribute.DesignerTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.EditorAttribute.EditorBaseTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.EditorAttribute.EditorTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.Primitives.dll</Left>
+    <Right>net9.0/System.ComponentModel.Primitives.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.String)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.String)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.Type,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.Type,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.String)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.String)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.Type,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.Type,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EnumConverter.#ctor(System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.DesignerAttribute.DesignerBaseTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.DesignerAttribute.DesignerTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.EditorAttribute.EditorBaseTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.EditorAttribute.EditorTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.EnumConverter.EnumType:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.ComponentModel.TypeConverter.dll</Left>
+    <Right>net9.0/System.ComponentModel.TypeConverter.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.String)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.String)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.String)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.Type,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.Type,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.DesignerAttribute.#ctor(System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.String)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.String)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.String,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.Type,System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EditorAttribute.#ctor(System.Type,System.Type)$1:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>M:System.ComponentModel.EnumConverter.#ctor(System.Type)$0:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.DesignerAttribute.DesignerBaseTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.DesignerAttribute.DesignerTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.EditorAttribute.EditorBaseTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.EditorAttribute.EditorTypeName:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0014</DiagnosticId>
+    <Target>P:System.ComponentModel.EnumConverter.EnumType:[T:System.Diagnostics.CodeAnalysis.DynamicallyAccessedMembersAttribute]</Target>
+    <Left>net8.0/System.dll</Left>
+    <Right>net9.0/System.dll</Right>
+  </Suppression>
 </Suppressions>
\ No newline at end of file
diff --git a/src/libraries/native-binplace.proj b/src/libraries/native-binplace.proj
index 484ba3175cbe..7237b6c8a9c4 100644
--- a/src/libraries/native-binplace.proj
+++ b/src/libraries/native-binplace.proj
@@ -13,15 +13,17 @@
   <!-- Ordering matters! Overriding GetBinPlaceItems and Build targets after the Sdk import. -->
   <Target Name="GetBinPlaceItems">
     <ItemGroup>
-      <BinPlaceItem Include="$(NativeBinDir)*.dll" />
+       <ExcludeNativeLibrariesRuntimeFiles Condition="'$(RuntimeFlavor)' != 'Mono'"
+                                          Include="$(NativeBinDir)System.Globalization.Native.dll;$(NativeBinDir)System.Globalization.Native.so;$(NativeBinDir)System.Globalization.Native.dylib" />
+      <BinPlaceItem Include="$(NativeBinDir)*.dll" Exclude="@(ExcludeNativeLibrariesRuntimeFiles)" />
       <BinPlaceItem Include="$(NativeBinDir)*.pdb" />
       <BinPlaceItem Include="$(NativeBinDir)*.lib" />
       <BinPlaceItem Include="$(NativeBinDir)*.lib.js" />
       <BinPlaceItem Include="$(NativeBinDir)*.a" />
       <BinPlaceItem Include="$(NativeBinDir)*.bc" />
-      <BinPlaceItem Include="$(NativeBinDir)*.so" />
+      <BinPlaceItem Include="$(NativeBinDir)*.so" Exclude="@(ExcludeNativeLibrariesRuntimeFiles)" />
       <BinPlaceItem Include="$(NativeBinDir)*.dbg" />
-      <BinPlaceItem Include="$(NativeBinDir)*.dylib" />
+      <BinPlaceItem Include="$(NativeBinDir)*.dylib" Exclude="@(ExcludeNativeLibrariesRuntimeFiles)" />
       <BinPlaceItem Include="$(NativeBinDir)*.dwarf" />
       <BinPlaceItem Include="$(NativeBinDir)*.dex" />
       <BinPlaceItem Include="$(NativeBinDir)*.jar" />
diff --git a/src/libraries/oob-all.proj b/src/libraries/oob-all.proj
index 07b063fab409..b8d20924522d 100644
--- a/src/libraries/oob-all.proj
+++ b/src/libraries/oob-all.proj
@@ -29,7 +29,7 @@
 
     <!-- Skip these projects during source-build as they rely on external prebuilts. -->
     <ProjectReference Remove="Microsoft.Extensions.DependencyInjection.Specification.Tests\src\Microsoft.Extensions.DependencyInjection.Specification.Tests.csproj"
-                      Condition="'$(DotNetBuildFromSource)' == 'true'" />
+                      Condition="'$(DotNetBuildSourceOnly)' == 'true'" />
   </ItemGroup>
 
 </Project>
diff --git a/src/libraries/oob-src.proj b/src/libraries/oob-src.proj
index bb571ebc35f1..48bc4897a267 100644
--- a/src/libraries/oob-src.proj
+++ b/src/libraries/oob-src.proj
@@ -37,7 +37,7 @@
 
     <!-- Skip these projects during source-build as they rely on external prebuilts. -->
     <ProjectReference Remove="Microsoft.Extensions.DependencyInjection.Specification.Tests\src\Microsoft.Extensions.DependencyInjection.Specification.Tests.csproj"
-                      Condition="'$(DotNetBuildFromSource)' == 'true'" />
+                      Condition="'$(DotNetBuildSourceOnly)' == 'true'" />
   </ItemGroup>
 
 </Project>
diff --git a/src/libraries/oob.proj b/src/libraries/oob.proj
index 34823cd1ba4f..d49efa6e6168 100644
--- a/src/libraries/oob.proj
+++ b/src/libraries/oob.proj
@@ -13,7 +13,7 @@
     <ProjectReference Include="oob-src.proj" OutputItemType="OOBAssembly" />
 
     <ProjectReference Include="apicompat\ApiCompat.proj"
-                      Condition="'$(DotNetBuildFromSource)' != 'true' and '$(ApiCompatValidateAssemblies)' != 'false'" />
+                      Condition="'$(DotNetBuildSourceOnly)' != 'true' and '$(ApiCompatValidateAssemblies)' != 'false'" />
   </ItemGroup>
 
   <ItemGroup Condition="'$(BuildTargetFramework)' != '$(NetCoreAppCurrent)' or '$(BuildTargetFramework)' == ''">
diff --git a/src/libraries/sendtohelix-superpmi-collect.targets b/src/libraries/sendtohelix-superpmi-collect.targets
index 5affc58dd36e..6c02e16db7c5 100644
--- a/src/libraries/sendtohelix-superpmi-collect.targets
+++ b/src/libraries/sendtohelix-superpmi-collect.targets
@@ -56,4 +56,9 @@
     <HelixPostCommand Include="$HELIX_PYTHONPATH $spmi_superpmi_py collect -log_level DEBUG -core_root $spmi_core_root --skip_cleanup --clean --ci --skip_collection_step --skip_toc_step -temp_dir $spmi_collect_dir -output_mch_path $spmi_finalmch -log_file $spmi_log_file" />
   </ItemGroup>
 
+  <ItemGroup>
+    <!-- Include the SuperPMI tools as a separate correlation payload -->
+    <HelixCorrelationPayload Include="$(TestArchiveRoot)coreclr.zip" Destination="coreclr" AsArchive="true" />
+  </ItemGroup>
+
 </Project>
diff --git a/src/libraries/sendtohelix.proj b/src/libraries/sendtohelix.proj
index 44a02cd5b828..17aa91b1597b 100644
--- a/src/libraries/sendtohelix.proj
+++ b/src/libraries/sendtohelix.proj
@@ -23,7 +23,7 @@
     <TestEnvFileName Condition=" '$(TargetOS)' != 'windows' ">SetStressModes_$(Scenario).sh</TestEnvFileName>
 
   </PropertyGroup>
-  
+
   <!-- The Helix correlation payload file -->
   <Target Name="_SetTestArchiveRuntimeFile"
           Condition="'$(BuildTargetFramework)' == '$(NetCoreAppCurrent)'">
@@ -201,7 +201,8 @@
   </Target>
 
   <Target Name="PrepareSuperPmiCollectCorrelationPayload"
-          Condition="'$(SuperPmiCollect)' == 'true'">
+          Condition="'$(SuperPmiCollect)' == 'true'"
+          DependsOnTargets="ResolveRuntimeFilesFromLocalBuild">
     <!-- Copy Files needed for SuperPMI collection into a directory that will be added to the single, compressed file used as the correlation payload by Helix. -->
     <Message Importance="High" Text="Preparing SuperPMI collection payload directory" />
 
@@ -220,12 +221,16 @@
          (superpmi, mcs, superpmi-shim-collector, clrjit), but it's easier to just copy everything.
     -->
 
-    <Message Importance="High" Text="Copying Runtime binaries from $(ArtifactsDir)transport/coreclr to $(NetCoreAppCurrentTestHostPath)coreclr" />
+    <Message Importance="High" Text="Copying Runtime binaries from $(CoreCLRArtifactsPath) to $(TestArchiveRoot)\coreclr" />
     <MakeDir Directories="$(NetCoreAppCurrentTestHostPath)coreclr" />
     <ItemGroup>
-      <SuperPMICoreClrFiles Include="$(ArtifactsDir)transport\coreclr\**\*.*"/>
+      <SuperPMICoreClrFiles Include="$(CoreCLRArtifactsPath)\**\*.*"/>
     </ItemGroup>
-    <Copy SourceFiles="@(SuperPMICoreClrFiles)" DestinationFolder="$(NetCoreAppCurrentTestHostPath)coreclr\%(RecursiveDir)" />
+    <Copy SourceFiles="@(SuperPMICoreClrFiles)" DestinationFolder="$(TestArchiveRoot)coreclr\%(RecursiveDir)" />
+
+    <ZipDirectory SourceDirectory="$(TestArchiveRoot)coreclr"
+                  DestinationFile="$(TestArchiveRoot)coreclr.zip"
+                  Overwrite="true" />
   </Target>
 
   <Target Name="CompressRuntimeDirectory"
diff --git a/src/libraries/sendtohelixhelp.proj b/src/libraries/sendtohelixhelp.proj
index 910cf4492609..7c668b034712 100644
--- a/src/libraries/sendtohelixhelp.proj
+++ b/src/libraries/sendtohelixhelp.proj
@@ -31,9 +31,9 @@
         '$(Scenario)' == 'gcstress0x3' or
         '$(Scenario)' == 'gcstress0xc' or
         '$(Scenario)' == 'heapverify1' or
-        '$(Scenario)' == 'gcstress0xc_zapdisable' or
-        '$(Scenario)' == 'gcstress0xc_zapdisable_jitstress2' or
-        '$(Scenario)' == 'gcstress0xc_zapdisable_heapverify1' or
+        '$(Scenario)' == 'gcstress0xc_disabler2r' or
+        '$(Scenario)' == 'gcstress0xc_disabler2r_jitstress2' or
+        '$(Scenario)' == 'gcstress0xc_disabler2r_heapverify1' or
         '$(Scenario)' == 'gcstress0xc_jitstress1' or
         '$(Scenario)' == 'gcstress0xc_jitstress2' or
         '$(Scenario)' == 'gcstress0xc_jitminopts_heapverify1'">06:00:00</_workItemTimeout>
@@ -240,9 +240,20 @@
     </ItemGroup>
   </Target>
 
+  <!-- XUnitLogChecker required configuration -->
   <ItemGroup Condition="Exists('$(XUnitLogCheckerLibrariesOutDir)')">
     <HelixCorrelationPayload Include="$(XUnitLogCheckerLibrariesOutDir)" />
-  </ItemGroup>  
+    <HelixCorrelationPayload Condition="'$(WindowsShell)' == 'true'" Include="dotnet-sos">
+      <Destination>sos</Destination>
+      <Uri>https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/flat2/dotnet-sos/$(DotnetSosVersion)/dotnet-sos.$(DotnetSosVersion).nupkg</Uri>
+    </HelixCorrelationPayload>
+  </ItemGroup>
+
+  <PropertyGroup Condition="'$(TargetOS)' == 'windows'">
+    <NtSymbolPathEnvVar>set _NT_SYMBOL_PATH=%25HELIX_CORRELATION_PAYLOAD%25%3B%25HELIX_CORRELATION_PAYLOAD%25\PDB%3B%25HELIX_CORRELATION_PAYLOAD%25\shared\$(MicrosoftNetCoreAppFrameworkName)\$(ProductVersion)</NtSymbolPathEnvVar>
+    <ExecuteDotNetSos>%25HELIX_CORRELATION_PAYLOAD%25\dotnet %25HELIX_CORRELATION_PAYLOAD%25\sos\tools\net$(DotnetSosTargetFrameworkVersion)\any\dotnet-sos.dll install --architecture $(TargetArchitecture)</ExecuteDotNetSos>
+    <HelixPreCommands>$(HelixPreCommands);$(NtSymbolPathEnvVar);$(ExecuteDotNetSos)</HelixPreCommands>
+  </PropertyGroup>
 
   <!--
     Create all the Helix data to start a set of jobs. Create a set of work items, one for each libraries
diff --git a/src/libraries/tests.proj b/src/libraries/tests.proj
index f7a6cb2d64f5..0e11b1da8933 100644
--- a/src/libraries/tests.proj
+++ b/src/libraries/tests.proj
@@ -99,8 +99,8 @@
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Transactions.Local\tests\System.Transactions.Local.Tests.csproj" />
   </ItemGroup>
 
-  <!-- Projects that won't work when DotNetBuildFromSource. -->
-  <ItemGroup Condition="'$(DotNetBuildFromSource)' == 'true'">
+  <!-- Projects that won't work when building source-only. -->
+  <ItemGroup Condition="'$(DotNetBuildSourceOnly)' == 'true'">
     <!-- Project targets an older target framework which does not get built. -->
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Numerics.Tensors\tests\Net8Tests\System.Numerics.Tensors.Net8.Tests.csproj" />
   </ItemGroup>
@@ -120,13 +120,6 @@
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Private.CoreLib\tests\IntrinsicsInSystemPrivatecoreLibAnalyzer.Tests\IntrinsicsInSystemPrivateCoreLib.Tests.csproj" />
   </ItemGroup>
 
-  <ItemGroup Condition="'$(TargetOS)' == 'windows' and '$(TargetArchitecture)' == 'arm'">
-    <!-- LibraryImportGenerator runtime tests depend on DNNE, which does not support Windows ARM32 as we don't officially support it. -->
-    <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime.InteropServices\tests\LibraryImportGenerator.Tests\LibraryImportGenerator.Tests.csproj" />
-    <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime.InteropServices.JavaScript\tests\JSImportGenerator.UnitTest\JSImportGenerator.Unit.Tests.csproj" />
-    <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime.InteropServices\tests\ComInterfaceGenerator.Tests\ComInterfaceGenerator.Tests.csproj" />
-  </ItemGroup>
-
   <ItemGroup Condition="'$(TargetArchitecture)' == 'armel'">
     <!-- LibraryImportGenerator runtime tests depend on DNNE, which does not support armel as we don't officially support it. -->
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime.InteropServices\tests\LibraryImportGenerator.Tests\LibraryImportGenerator.Tests.csproj" />
@@ -145,8 +138,6 @@
     <!-- LibraryImportGenerator runtime tests build depends pulling down a pre-built nethost binary, which is not available for s390x. -->
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime.InteropServices\tests\LibraryImportGenerator.Tests\LibraryImportGenerator.Tests.csproj" />
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime.InteropServices\tests\ComInterfaceGenerator.Tests\ComInterfaceGenerator.Tests.csproj" />
-    <!-- LibraryImportGenerator unit tests fail since NuGet 5.6.0 signature verification does not work on big-endian systems (needs >= 5.11.0). -->
-    <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime.InteropServices\tests\LibraryImportGenerator.UnitTests\LibraryImportGenerator.Unit.Tests.csproj" />
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime.InteropServices\tests\ComInterfaceGenerator.Unit.Tests\ComInterfaceGenerator.Unit.Tests.csproj" />
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime.InteropServices.JavaScript\tests\JSImportGenerator.UnitTest\JSImportGenerator.Unit.Tests.csproj" />
   </ItemGroup>
@@ -404,8 +395,6 @@
   </ItemGroup>
 
   <ItemGroup Condition="'$(TargetOS)' == 'browser' and '$(WasmEnableThreads)' == 'true' and '$(RunDisabledWasmTests)' != 'true'">
-    <!-- Until: https://github.com/dotnet/runtime/pull/97441 -->
-    <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime.InteropServices.JavaScript\tests\System.Runtime.InteropServices.JavaScript.UnitTests\System.Runtime.InteropServices.JavaScript.Tests.csproj" />
     <!-- Issue: https://github.com/dotnet/runtime/issues/95795 -->
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime\tests\System.Globalization.Tests\Hybrid\System.Globalization.Hybrid.WASM.Tests.csproj" />
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime\tests\System.Globalization.Calendars.Tests\Hybrid\System.Globalization.Calendars.Hybrid.WASM.Tests.csproj" />
@@ -452,6 +441,10 @@
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Text.Json\tests\System.Text.Json.SourceGeneration.Tests\System.Text.Json.SourceGeneration.Roslyn4.4.Tests.csproj"
                       Condition="'$(TargetOS)' == 'linux'" />
 
+    <!-- https://github.com/dotnet/runtime/issues/94653 -->
+    <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime\tests\System.Threading.Tasks.Tests\System.Threading.Tasks.Tests.csproj"
+                       Condition="'$(TargetsLinuxMusl)' == 'true'" />
+
     <!-- Not applicable to NativeAOT -->
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)Microsoft.Extensions.HostFactoryResolver\tests\Microsoft.Extensions.HostFactoryResolver.Tests.csproj" />
     <ProjectExclusions Include="$(MSBuildThisFileDirectory)System.Runtime.Loader\tests\DefaultContext\System.Runtime.Loader.DefaultContext.Tests.csproj" />
@@ -771,7 +764,7 @@
                       BuildInParallel="$(Samples_BuildInParallel)" />
   </ItemGroup>
 
-  <ItemGroup Condition="'$(ArchiveTests)' == 'true' and '$(BuildTargetFramework)' == '$(NetCoreAppCurrent)' and '$(IsXUnitLogCheckerSupported)' == 'true'">
+  <ItemGroup Condition="'$(ArchiveTests)' == 'true' and '$(TargetFrameworkIdentifier)' == '.NETCoreApp' and '$(IsXUnitLogCheckerSupported)' == 'true'">
     <ProjectReference
       Include="$(RepoRoot)src\tests\Common\XUnitLogChecker\XUnitLogChecker.csproj"
       AdditionalProperties="%(AdditionalProperties);Configuration=Release;OutDir=$(XUnitLogCheckerLibrariesOutDir)" />
diff --git a/src/mono/CMakeLists.txt b/src/mono/CMakeLists.txt
index 00f883aca07e..d46df3a77a3b 100644
--- a/src/mono/CMakeLists.txt
+++ b/src/mono/CMakeLists.txt
@@ -124,7 +124,6 @@ endif()
 
 if(NOT AOT_TARGET_TRIPLE STREQUAL "")
   set(MONO_CROSS_COMPILE 1)
-  add_definitions(-DNO_GLOBALIZATION_SHIM)
   if(NOT AOT_OFFSETS_FILE STREQUAL "")
     set(MONO_OFFSETS_FILE "${AOT_OFFSETS_FILE}")
   endif()
@@ -241,7 +240,6 @@ elseif(CLR_CMAKE_HOST_OS STREQUAL "android")
   # FIXME: Rest of the flags from configure.ac
 elseif(CLR_CMAKE_HOST_OS STREQUAL "emscripten")
   set(HOST_BROWSER 1)
-  add_definitions(-DNO_GLOBALIZATION_SHIM)
   add_definitions(-D_THREAD_SAFE)
   add_compile_options(-Wno-strict-prototypes)
   add_compile_options(-Wno-unused-but-set-variable)
@@ -255,7 +253,6 @@ elseif(CLR_CMAKE_HOST_OS STREQUAL "emscripten")
 elseif(CLR_CMAKE_HOST_OS STREQUAL "wasi")
   set(HOST_WASI 1)
   add_definitions(-D_WASI_EMULATED_PROCESS_CLOCKS -D_WASI_EMULATED_SIGNAL -D_WASI_EMULATED_MMAN -DHOST_WASI)
-  add_definitions(-DNO_GLOBALIZATION_SHIM)
   add_definitions(-D_THREAD_SAFE)
   add_definitions(-DDISABLE_SOCKET_TRANSPORT)
   add_definitions(-DDISABLE_EGD_SOCKET)
@@ -288,6 +285,7 @@ elseif(CLR_CMAKE_HOST_OS STREQUAL "windows")
     add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/Zi>) # enable debugging information
     add_link_options(/LTCG)    # link-time code generation
     add_link_options(/DEBUG)   # enable debugging information
+    add_link_options(/DEBUGTYPE:CV,FIXUP)   # enable fixup debug information
     add_link_options(/OPT:REF) # optimize: remove unreferenced functions & data
     add_link_options(/OPT:ICF) # optimize: enable COMDAT folding
     # the combination of /Zi compiler flag and /DEBUG /OPT:REF /OPT:ICF
@@ -710,79 +708,6 @@ if(LLVM_PREFIX)
   add_definitions(-DLLVM_API_VERSION=${llvm_api_version})
 endif()
 
-######################################
-# ICU CHECKS
-######################################
-if(HOST_OSX OR HOST_MACCAT OR HOST_IOS OR HOST_TVOS)
-  # FIXME: Handle errors
-  execute_process(COMMAND  brew --prefix OUTPUT_VARIABLE brew_prefix OUTPUT_STRIP_TRAILING_WHITESPACE)
-
-  if((HOST_MACCAT OR HOST_IOS OR HOST_TVOS) AND "${CMAKE_SHARED_LINKER_FLAGS}" MATCHES "${brew_prefix}/opt/icu4c/lib")
-    message(FATAL_ERROR "Linker flags contain the Homebrew version of ICU which conflicts with the iOS/tvOS/MacCatalyst version: ${CMAKE_SHARED_LINKER_FLAGS}")
-  endif()
-endif()
-
-set(ICU_SHIM_PATH "${CLR_SRC_NATIVE_DIR}/libs/System.Globalization.Native")
-if(MONO_CROSS_COMPILE)
-elseif(HOST_OSX AND NOT HOST_MACCAT)
-  include(FindPkgConfig)
-  set(ENV{PKG_CONFIG_PATH} "{$PKG_CONFIG_PATH}:${brew_prefix}/lib/pkgconfig:${brew_prefix}/opt/icu4c/lib/pkgconfig")
-  # Defines ICU_INCLUDEDIR/ICU_LIBDIR
-  pkg_check_modules(ICU icu-uc)
-  set(OSX_ICU_LIBRARY_PATH /usr/lib/libicucore.dylib)
-  set(ICU_FLAGS "-DTARGET_UNIX -DU_DISABLE_RENAMING -Wno-reserved-id-macro -Wno-documentation -Wno-documentation-unknown-command -Wno-switch-enum -Wno-covered-switch-default -Wno-extra-semi-stmt -Wno-unknown-warning-option -Wno-deprecated-declarations")
-  set(HAVE_SYS_ICU 1)
-elseif(HOST_WASI)
-  set(ICU_FLAGS "-DPALEXPORT=\"\" -DU_DISABLE_RENAMING -DHAVE_UDAT_STANDALONE_SHORTER_WEEKDAYS -DHAVE_SET_MAX_VARIABLE -DTARGET_UNIX -Wno-reserved-id-macro -Wno-documentation -Wno-documentation-unknown-command -Wno-switch-enum -Wno-covered-switch-default -Wno-extra-semi-stmt -Wno-unknown-warning-option")
-  set(HAVE_SYS_ICU 1)
-  set(STATIC_ICU 1)
-  set(ICU_LIBS "icucore")
-elseif(HOST_BROWSER)
-  set(ICU_FLAGS "-DPALEXPORT=\"\" -DU_DISABLE_RENAMING -DHAVE_UDAT_STANDALONE_SHORTER_WEEKDAYS -DHAVE_SET_MAX_VARIABLE -DTARGET_UNIX -Wno-reserved-id-macro -Wno-documentation -Wno-documentation-unknown-command -Wno-switch-enum -Wno-covered-switch-default -Wno-extra-semi-stmt -Wno-unknown-warning-option")
-  set(HAVE_SYS_ICU 1)
-  set(STATIC_ICU 1)
-  set(ICU_LIBS "icucore")
-elseif(HOST_IOS OR HOST_TVOS OR HOST_MACCAT)
-  set(ICU_FLAGS "-DTARGET_UNIX -DU_DISABLE_RENAMING -Wno-reserved-id-macro -Wno-documentation -Wno-documentation-unknown-command -Wno-switch-enum -Wno-covered-switch-default -Wno-extra-semi-stmt -Wno-unknown-warning-option -Wno-deprecated-declarations")
-  if(HOST_IOS)
-    string(APPEND ICU_FLAGS " -DTARGET_IOS")
-  elseif(HOST_TVOS)
-    string(APPEND ICU_FLAGS " -DTARGET_TVOS")
-  elseif(HOST_MACCAT)
-    string(APPEND ICU_FLAGS " -DTARGET_MACCATALYST")
-  endif()
-  string(APPEND ICU_FLAGS " -DAPPLE_HYBRID_GLOBALIZATION")
-  set(HAVE_SYS_ICU 1)
-  set(STATIC_ICU 1)
-elseif(HOST_ANDROID)
-  set(ICU_FLAGS "-DPALEXPORT=\"\" -DHAVE_UDAT_STANDALONE_SHORTER_WEEKDAYS -DHAVE_SET_MAX_VARIABLE -DTARGET_UNIX -DTARGET_ANDROID -Wno-reserved-id-macro -Wno-documentation -Wno-documentation-unknown-command -Wno-switch-enum -Wno-covered-switch-default -Wno-covered-switch-default -Wno-extra-semi-stmt -Wno-unknown-warning-option")
-  set(HAVE_SYS_ICU 1)
-elseif(HOST_LINUX)
-  include(FindPkgConfig)
-  if(CROSS_ROOTFS)
-    set(ENV{PKG_CONFIG_SYSROOT_DIR} "${CROSS_ROOTFS}")
-  endif(CROSS_ROOTFS)
-  pkg_check_modules(ICU icu-uc)
-  set(ICU_FLAGS "-DTARGET_UNIX -DU_DISABLE_RENAMING -Wno-reserved-id-macro -Wno-documentation -Wno-documentation-unknown-command -Wno-switch-enum -Wno-covered-switch-default -Wno-extra-semi-stmt -Wno-unknown-warning-option -Wno-deprecated-declarations")
-  set(HAVE_SYS_ICU 1)
-elseif(HOST_WIN32)
-  set(ICU_FLAGS "-DTARGET_WINDOWS -DPALEXPORT=EXTERN_C")
-  set(HAVE_SYS_ICU 1)
-elseif(HOST_SOLARIS)
-  set(ICU_FLAGS "-DPALEXPORT=\"\" -DTARGET_UNIX -Wno-reserved-id-macro -Wno-documentation -Wno-documentation-unknown-command -Wno-switch-enum -Wno-covered-switch-default -Wno-extra-semi-stmt -Wno-unknown-warning-option")
-  set(HAVE_SYS_ICU 1)
-elseif(TARGET_FREEBSD)
-  set(ICU_FLAGS "-DTARGET_UNIX -Wno-deprecated-declarations")
-  set(HAVE_SYS_ICU 1)
-  set(ICU_INCLUDEDIR "${CROSS_ROOTFS}/usr/local/include")
-  set(ICU_LIBDIR "${CROSS_ROOTFS}/usr/local/lib")
-elseif(HOST_HAIKU)
-  set(ICU_FLAGS "-DTARGET_UNIX -DU_DISABLE_RENAMING -Wno-reserved-id-macro -Wno-documentation -Wno-documentation-unknown-command -Wno-switch-enum -Wno-covered-switch-default -Wno-extra-semi-stmt -Wno-unknown-warning-option -Wno-deprecated-declarations")
-  set(HAVE_SYS_ICU 1)
-else()
-  message(FATAL_ERROR "Unknown host")
-endif()
-
 ######################################
 # GC CHECKS
 ######################################
diff --git a/src/mono/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/mono/System.Private.CoreLib/System.Private.CoreLib.csproj
index 1c085ed36e44..95f7f110cda5 100644
--- a/src/mono/System.Private.CoreLib/System.Private.CoreLib.csproj
+++ b/src/mono/System.Private.CoreLib/System.Private.CoreLib.csproj
@@ -246,7 +246,6 @@
       <Compile Include="$(BclSourcesRoot)\System\Reflection\Metadata\AssemblyExtensions.cs" />
       <Compile Include="$(BclSourcesRoot)\System\Reflection\Metadata\MetadataUpdater.cs" />
       <Compile Include="$(BclSourcesRoot)\System\Reflection\TypeNameParser.Mono.cs" />
-      <Compile Include="$(BclSourcesRoot)\System\Resources\ManifestBasedResourceGroveler.Mono.cs" />
       <Compile Include="$(BclSourcesRoot)\System\Runtime\ControlledExecution.Mono.cs" />
       <Compile Include="$(BclSourcesRoot)\System\Runtime\DependentHandle.cs" />
       <Compile Include="$(BclSourcesRoot)\System\Runtime\GCFrameRegistration.Mono.cs" />
@@ -264,7 +263,6 @@
                Condition="'$(FeatureObjCMarshal)' == 'true'"/>
       <Compile Include="$(BclSourcesRoot)\System\Runtime\Intrinsics\X86\X86Base.Mono.cs" />
       <Compile Include="$(BclSourcesRoot)\System\Runtime\Loader\AssemblyLoadContext.Mono.cs" />
-      <Compile Include="$(BclSourcesRoot)\System\Security\DynamicSecurityMethodAttribute.cs" />
       <Compile Include="$(BclSourcesRoot)\System\Threading\Interlocked.Mono.cs" />
       <Compile Include="$(BclSourcesRoot)\System\Threading\Monitor.Mono.cs" />
       <Compile Include="$(BclSourcesRoot)\System\Threading\ObjectHeader.Mono.cs" />
@@ -276,13 +274,10 @@
   <ItemGroup Condition="('$(TargetsUnix)' == 'true' or '$(TargetsBrowser)' == 'true' or '$(TargetsWasi)' == 'true') and '$(FeaturePortableThreadPool)' == 'true'">
       <Compile Include="$(BclSourcesRoot)\System\Threading\LowLevelLifoSemaphore.Unix.Mono.cs" />
   </ItemGroup>
-  <ItemGroup Condition="('$(TargetsBrowser)' == 'true'  or '$(TargetsWasi)' == 'true') and '$(FeatureWasmManagedThreads)' == 'true'">
+  <ItemGroup Condition="'$(TargetsBrowser)' == 'true' and '$(FeatureWasmManagedThreads)' == 'true'">
       <Compile Include="$(BclSourcesRoot)\System\Threading\ThreadPoolBoundHandle.Browser.Threads.Mono.cs" />
-      <Compile Include="$(BclSourcesRoot)\System\Threading\LowLevelLifoAsyncWaitSemaphore.Browser.Threads.Mono.cs" />
       <Compile Include="$(BclSourcesRoot)\System\Threading\PortableThreadPool.Browser.Threads.Mono.cs" />
-      <Compile Include="$(BclSourcesRoot)\System\Threading\PortableThreadPool.WorkerThread.Browser.Threads.Mono.cs" />
       <Compile Include="$(BclSourcesRoot)\System\Threading\ThreadPool.Browser.Threads.Mono.cs" />
-      <Compile Include="$(BclSourcesRoot)\System\Threading\WebWorkerEventLoop.Browser.Threads.Mono.cs" />
   </ItemGroup>
   <ItemGroup Condition="'$(TargetsBrowser)' == 'true' and '$(FeatureWasmManagedThreads)' != 'true'">
       <Compile Include="$(BclSourcesRoot)\System\Threading\ThreadPoolBoundHandle.Browser.Mono.cs" />
diff --git a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.iOS.xml b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.iOS.xml
index d0c319ec93aa..ed9ce57b2481 100644
--- a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.iOS.xml
+++ b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.iOS.xml
@@ -3,8 +3,5 @@
     <type fullname="System.Runtime.CompilerServices.RuntimeFeature">
       <method signature="System.Boolean get_IsDynamicCodeCompiled()" body="stub" value="false" />
     </type>
-    <type fullname="System.Runtime.CompilerServices.RuntimeFeature" feature="System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeSupported" featurevalue="false">
-        <method signature="System.Boolean get_IsDynamicCodeSupported()" body="stub" value="false" />
-    </type>
   </assembly>
 </linker>
diff --git a/src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs
index 8f45f602e6fb..bcb9b6b38f2e 100644
--- a/src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs
+++ b/src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs
@@ -23,7 +23,7 @@ internal static unsafe void Memmove<T>(ref T destination, ref T source, nuint el
             {
 #pragma warning disable 8500 // sizeof of managed types
                 // Blittable memmove
-                Memmove(
+                SpanHelpers.Memmove(
                     ref Unsafe.As<T, byte>(ref destination),
                     ref Unsafe.As<T, byte>(ref source),
                     elementCount * (nuint)sizeof(T));
diff --git a/src/mono/System.Private.CoreLib/src/System/Delegate.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Delegate.Mono.cs
index 9f71ff470f3e..98ade01313be 100644
--- a/src/mono/System.Private.CoreLib/src/System/Delegate.Mono.cs
+++ b/src/mono/System.Private.CoreLib/src/System/Delegate.Mono.cs
@@ -556,7 +556,7 @@ private DelegateData CreateDelegateData()
             return delegate_data;
         }
 
-        private static bool InternalEqualTypes(object source, object value)
+        internal static bool InternalEqualTypes(object source, object value)
         {
             return source.GetType() == value.GetType();
         }
diff --git a/src/mono/System.Private.CoreLib/src/System/Math.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Math.Mono.cs
index eb0024f983e6..c7f86a48b994 100644
--- a/src/mono/System.Private.CoreLib/src/System/Math.Mono.cs
+++ b/src/mono/System.Private.CoreLib/src/System/Math.Mono.cs
@@ -79,9 +79,6 @@ public partial class Math
         [MethodImpl(MethodImplOptions.InternalCall)]
         public static extern double Log2(double x);
 
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern double FMod(double x, double y);
-
         [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern unsafe double ModF(double x, double* intptr);
     }
diff --git a/src/mono/System.Private.CoreLib/src/System/MathF.Mono.cs b/src/mono/System.Private.CoreLib/src/System/MathF.Mono.cs
index 4fd4dc614458..6789856acae4 100644
--- a/src/mono/System.Private.CoreLib/src/System/MathF.Mono.cs
+++ b/src/mono/System.Private.CoreLib/src/System/MathF.Mono.cs
@@ -79,9 +79,6 @@ public partial class MathF
         [MethodImpl(MethodImplOptions.InternalCall)]
         public static extern float Log2(float x);
 
-        [MethodImpl(MethodImplOptions.InternalCall)]
-        private static extern float FMod(float x, float y);
-
         [MethodImpl(MethodImplOptions.InternalCall)]
         private static extern unsafe float ModF(float x, float* intptr);
     }
diff --git a/src/mono/System.Private.CoreLib/src/System/MulticastDelegate.Mono.cs b/src/mono/System.Private.CoreLib/src/System/MulticastDelegate.Mono.cs
index e634afd23448..73aacd9ca0e0 100644
--- a/src/mono/System.Private.CoreLib/src/System/MulticastDelegate.Mono.cs
+++ b/src/mono/System.Private.CoreLib/src/System/MulticastDelegate.Mono.cs
@@ -131,6 +131,10 @@ protected sealed override Delegate CombineImpl(Delegate? follow)
             if (follow == null)
                 return this;
 
+            // Verify that the types are the same...
+            if (!InternalEqualTypes(this, follow))
+                throw new ArgumentException(SR.Arg_DlgtTypeMis);
+
             MulticastDelegate other = (MulticastDelegate)follow;
 
             MulticastDelegate ret = AllocDelegateLike_internal(this);
diff --git a/src/mono/System.Private.CoreLib/src/System/Reflection/Assembly.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Reflection/Assembly.Mono.cs
index d47cbc107c83..6918e48eeebf 100644
--- a/src/mono/System.Private.CoreLib/src/System/Reflection/Assembly.Mono.cs
+++ b/src/mono/System.Private.CoreLib/src/System/Reflection/Assembly.Mono.cs
@@ -43,6 +43,7 @@ public static Assembly GetExecutingAssembly()
         internal static extern RuntimeAssembly GetExecutingAssembly(ref StackCrawlMark stackMark);
 
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        [System.Security.DynamicSecurityMethod] // Methods doing stack walks has to be marked DynamicSecurityMethod
         public static extern Assembly GetCallingAssembly();
 
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
diff --git a/src/mono/System.Private.CoreLib/src/System/Reflection/Emit/MethodOnTypeBuilderInstantiation.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Reflection/Emit/MethodOnTypeBuilderInstantiation.Mono.cs
index f0c730920df8..3219d0bedce5 100644
--- a/src/mono/System.Private.CoreLib/src/System/Reflection/Emit/MethodOnTypeBuilderInstantiation.Mono.cs
+++ b/src/mono/System.Private.CoreLib/src/System/Reflection/Emit/MethodOnTypeBuilderInstantiation.Mono.cs
@@ -66,25 +66,6 @@ public override Type[] GetGenericArguments()
             return result;
         }
 
-        public override bool ContainsGenericParameters
-        {
-            get
-            {
-                if (_method.ContainsGenericParameters)
-                    return true;
-                if (!_method.IsGenericMethodDefinition)
-                    throw new NotSupportedException();
-                if (_typeArguments == null)
-                    return true;
-                foreach (Type t in _typeArguments)
-                {
-                    if (t.ContainsGenericParameters)
-                        return true;
-                }
-                return false;
-            }
-        }
-
         public override bool IsGenericMethodDefinition => _method.IsGenericMethodDefinition && _typeArguments == null;
 
         public override MethodInfo GetGenericMethodDefinition() { return _genericMethodDefinition ?? _method; }
diff --git a/src/mono/System.Private.CoreLib/src/System/Reflection/Metadata/AssemblyExtensions.cs b/src/mono/System.Private.CoreLib/src/System/Reflection/Metadata/AssemblyExtensions.cs
index 9c677baa4bda..7eafc1a56096 100644
--- a/src/mono/System.Private.CoreLib/src/System/Reflection/Metadata/AssemblyExtensions.cs
+++ b/src/mono/System.Private.CoreLib/src/System/Reflection/Metadata/AssemblyExtensions.cs
@@ -8,6 +8,13 @@ namespace System.Reflection.Metadata
     public static class AssemblyExtensions
     {
         [CLSCompliant(false)]
-        public static unsafe bool TryGetRawMetadata(this Assembly assembly, out byte* blob, out int length) => throw new NotImplementedException();
+        public static unsafe bool TryGetRawMetadata(this Assembly assembly, out byte* blob, out int length)
+        {
+            ArgumentNullException.ThrowIfNull(assembly);
+
+            blob = null;
+            length = 0;
+            return false;
+        }
     }
 }
diff --git a/src/mono/System.Private.CoreLib/src/System/Reflection/RuntimeFieldInfo.cs b/src/mono/System.Private.CoreLib/src/System/Reflection/RuntimeFieldInfo.cs
index 33162acb5978..7811adcf631c 100644
--- a/src/mono/System.Private.CoreLib/src/System/Reflection/RuntimeFieldInfo.cs
+++ b/src/mono/System.Private.CoreLib/src/System/Reflection/RuntimeFieldInfo.cs
@@ -97,8 +97,8 @@ internal override void CheckConsistency(object target)
         [DebuggerHidden]
         internal override void UnsafeSetValue(object? obj, object? value, BindingFlags invokeAttr, Binder? binder, CultureInfo? culture)
         {
-            bool domainInitialized = false;
-            RuntimeFieldHandle.SetValue(this, obj, value, null, Attributes, null, ref domainInitialized);
+            bool isClassInitialized = false;
+            RuntimeFieldHandle.SetValue(this, obj, value, null, Attributes, null, ref isClassInitialized);
         }
 
         [DebuggerStepThrough]
diff --git a/src/mono/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.Mono.cs
deleted file mode 100644
index 54871091da0f..000000000000
--- a/src/mono/System.Private.CoreLib/src/System/Resources/ManifestBasedResourceGroveler.Mono.cs
+++ /dev/null
@@ -1,16 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Globalization;
-using System.Reflection;
-
-namespace System.Resources
-{
-    internal partial class ManifestBasedResourceGroveler
-    {
-        private static Assembly? InternalGetSatelliteAssembly(Assembly mainAssembly, CultureInfo culture, Version? version)
-        {
-            return (RuntimeAssembly.InternalGetSatelliteAssembly(mainAssembly, culture, version, throwOnFileNotFound: false));
-        }
-    }
-}
diff --git a/src/mono/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.Mono.cs
index 391bfaec6a2a..1e2241de12b3 100644
--- a/src/mono/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.Mono.cs
+++ b/src/mono/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.Mono.cs
@@ -4,6 +4,7 @@
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Diagnostics.Tracing;
+using System.Runtime.InteropServices;
 using System.Runtime.Serialization;
 
 namespace System.Runtime.CompilerServices
@@ -137,9 +138,15 @@ public static void RunModuleConstructor(ModuleHandle module)
             RunModuleConstructor(module.Value);
         }
 
-        public static IntPtr AllocateTypeAssociatedMemory(Type type, int size)
+        public static unsafe IntPtr AllocateTypeAssociatedMemory(Type type, int size)
         {
-            throw new PlatformNotSupportedException();
+            if (type is not RuntimeType)
+                throw new ArgumentException(SR.Arg_MustBeType, nameof(type));
+
+            ArgumentOutOfRangeException.ThrowIfNegative(size);
+
+            // We don't support unloading; the memory will never be freed.
+            return (IntPtr)NativeMemory.AllocZeroed((uint)size);
         }
 
         [Intrinsic]
@@ -204,5 +211,52 @@ private static extern unsafe IntPtr GetSpanDataFrom(
 
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         private static extern bool SufficientExecutionStack();
+
+        [MethodImplAttribute(MethodImplOptions.InternalCall)]
+        private static extern void InternalBox(QCallTypeHandle type, ref byte target, ObjectHandleOnStack result);
+
+        /// <summary>
+        /// Create a boxed object of the specified type from the data located at the target reference.
+        /// </summary>
+        /// <param name="target">The target data</param>
+        /// <param name="type">The type of box to create.</param>
+        /// <returns>A boxed object containing the specified data.</returns>
+        /// <exception cref="ArgumentNullException">The specified type handle is <c>null</c>.</exception>
+        /// <exception cref="ArgumentException">The specified type cannot have a boxed instance of itself created.</exception>
+        /// <exception cref="NotSupportedException">The passed in type is a by-ref-like type.</exception>
+        /// <remarks>This returns an object that is equivalent to executing the IL box instruction with the provided target address and type.</remarks>
+        public static object? Box(ref byte target, RuntimeTypeHandle type)
+        {
+            if (type.Value is 0)
+                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.type);
+
+            // Compatibility with CoreCLR, throw on a null reference to the unboxed data.
+            if (Unsafe.IsNullRef(ref target))
+                throw new NullReferenceException();
+
+            RuntimeType rtType = (RuntimeType)Type.GetTypeFromHandle(type)!;
+
+            if (rtType.ContainsGenericParameters
+                || rtType.IsPointer
+                || rtType.IsFunctionPointer
+                || rtType.IsByRef
+                || rtType.IsGenericParameter
+                || rtType == typeof(void))
+            {
+                throw new ArgumentException(SR.Arg_TypeNotSupported);
+            }
+
+            if (!rtType.IsValueType)
+            {
+                return Unsafe.As<byte, object?>(ref target);
+            }
+
+            if (rtType.IsByRefLike)
+                throw new NotSupportedException(SR.NotSupported_ByRefLike);
+
+            object? result = null;
+            InternalBox(new QCallTypeHandle(ref rtType), ref target, ObjectHandleOnStack.Create(ref result));
+            return result;
+        }
     }
 }
diff --git a/src/mono/System.Private.CoreLib/src/System/Security/DynamicSecurityMethodAttribute.cs b/src/mono/System.Private.CoreLib/src/System/Security/DynamicSecurityMethodAttribute.cs
deleted file mode 100644
index e3dae854517e..000000000000
--- a/src/mono/System.Private.CoreLib/src/System/Security/DynamicSecurityMethodAttribute.cs
+++ /dev/null
@@ -1,18 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-namespace System.Security
-{
-    // DynamicSecurityMethodAttribute:
-    //  All methods that use StackCrawlMark should be marked with this attribute. This attribute
-    //  disables inlining of the calling method to allow stackwalking to find the exact caller.
-    //
-    //  This attribute used to indicate that the target method requires space for a security object
-    //  to be allocated on the callers stack. It is not used for this purpose anymore because of security
-    //  stackwalks are not ever done in CoreCLR.
-    [AttributeUsage(AttributeTargets.Method | AttributeTargets.Constructor, AllowMultiple = true, Inherited = false)]
-    internal sealed class DynamicSecurityMethodAttribute : Attribute
-    {
-        public DynamicSecurityMethodAttribute() { }
-    }
-}
diff --git a/src/mono/System.Private.CoreLib/src/System/String.Mono.cs b/src/mono/System.Private.CoreLib/src/System/String.Mono.cs
index 7314504aff9a..7dedf5a6e536 100644
--- a/src/mono/System.Private.CoreLib/src/System/String.Mono.cs
+++ b/src/mono/System.Private.CoreLib/src/System/String.Mono.cs
@@ -116,7 +116,7 @@ private static unsafe void memset(byte* dest, int val, int len)
 
         private static unsafe void memcpy(byte* dest, byte* src, int size)
         {
-            Buffer.Memmove(ref *dest, ref *src, (nuint)size);
+            SpanHelpers.Memmove(ref *dest, ref *src, (nuint)size);
         }
 
         /* Used by the runtime */
diff --git a/src/mono/System.Private.CoreLib/src/System/Threading/Interlocked.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Threading/Interlocked.Mono.cs
index 36adaa901e8b..79e193e70e09 100644
--- a/src/mono/System.Private.CoreLib/src/System/Threading/Interlocked.Mono.cs
+++ b/src/mono/System.Private.CoreLib/src/System/Threading/Interlocked.Mono.cs
@@ -8,14 +8,6 @@ namespace System.Threading
 {
     public static partial class Interlocked
     {
-        [Intrinsic]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        public static extern byte CompareExchange(ref byte location1, byte value, byte comparand);
-
-        [Intrinsic]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        public static extern short CompareExchange(ref short location1, short value, short comparand);
-
         [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         public static extern int CompareExchange(ref int location1, int value, int comparand);
@@ -61,14 +53,6 @@ public static partial class Interlocked
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         public static extern long Increment(ref long location);
 
-        [Intrinsic]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        public static extern byte Exchange(ref byte location1, byte value);
-
-        [Intrinsic]
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        public static extern short Exchange(ref short location1, short value);
-
         [Intrinsic]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         public static extern int Exchange(ref int location1, int value);
diff --git a/src/mono/System.Private.CoreLib/src/System/Threading/LowLevelLifoAsyncWaitSemaphore.Browser.Threads.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Threading/LowLevelLifoAsyncWaitSemaphore.Browser.Threads.Mono.cs
deleted file mode 100644
index 0bb2d90bba2d..000000000000
--- a/src/mono/System.Private.CoreLib/src/System/Threading/LowLevelLifoAsyncWaitSemaphore.Browser.Threads.Mono.cs
+++ /dev/null
@@ -1,223 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-using System.Diagnostics;
-using System.Diagnostics.CodeAnalysis;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-
-namespace System.Threading;
-
-// <summary>
-// This class provides a way for browser threads to asynchronously wait for a semaphore
-// from JS, without using the threadpool.  It is used to implement threadpool workers.
-// </summary>
-internal sealed partial class LowLevelLifoAsyncWaitSemaphore : LowLevelLifoSemaphoreBase, IDisposable
-{
-    private IntPtr lifo_semaphore;
-
-    [MethodImplAttribute(MethodImplOptions.InternalCall)]
-    private static extern IntPtr InitInternal();
-
-    public LowLevelLifoAsyncWaitSemaphore(int initialSignalCount, int maximumSignalCount, int spinCount, Action onWait)
-        : base (initialSignalCount, maximumSignalCount, spinCount, onWait)
-    {
-        CreateAsyncWait(maximumSignalCount);
-    }
-
-#pragma warning disable IDE0060
-    private void CreateAsyncWait(int maximumSignalCount)
-#pragma warning restore IDE0060
-    {
-        lifo_semaphore = InitInternal();
-    }
-
-    [MethodImplAttribute(MethodImplOptions.InternalCall)]
-    private static extern void DeleteInternal(IntPtr semaphore);
-
-    public void Dispose()
-    {
-        DeleteInternal(lifo_semaphore);
-        lifo_semaphore = IntPtr.Zero;
-    }
-
-    [MethodImplAttribute(MethodImplOptions.InternalCall)]
-    private static extern void ReleaseInternal(IntPtr semaphore, int count);
-
-    protected override void ReleaseCore(int count)
-    {
-        ReleaseInternal(lifo_semaphore, count);
-    }
-
-    private sealed record WaitEntry (LowLevelLifoAsyncWaitSemaphore Semaphore, Action<LowLevelLifoAsyncWaitSemaphore, object?> OnSuccess, Action<LowLevelLifoAsyncWaitSemaphore, object?> OnTimeout, object? State)
-    {
-        public int TimeoutMs {get; internal set;}
-        public int StartWaitTicks {get; internal set; }
-    }
-
-    public void PrepareAsyncWait(int timeoutMs, Action<LowLevelLifoAsyncWaitSemaphore, object?> onSuccess, Action<LowLevelLifoAsyncWaitSemaphore, object?> onTimeout, object? state)
-    {
-        Debug.Assert(timeoutMs >= -1);
-
-        // Try to acquire the semaphore or
-        // a) register as a waiter and timeoutMs > 0
-        // b) bail out if timeoutMs == 0 and return false
-        Counts counts = _separated._counts;
-        while (true)
-        {
-            Debug.Assert(counts.SignalCount <= _maximumSignalCount);
-            Counts newCounts = counts;
-            if (counts.SignalCount != 0)
-            {
-                newCounts.DecrementSignalCount();
-            }
-            else if (timeoutMs != 0)
-            {
-                // Maximum number of spinners reached, register as a waiter instead
-                newCounts.IncrementWaiterCount();
-            }
-
-            Counts countsBeforeUpdate = _separated._counts.InterlockedCompareExchange(newCounts, counts);
-            if (countsBeforeUpdate == counts)
-            {
-                if (counts.SignalCount != 0)
-                {
-                    onSuccess (this, state);
-                    return;
-                }
-                if (newCounts.WaiterCount != counts.WaiterCount)
-                {
-                    PrepareAsyncWaitForSignal(timeoutMs, onSuccess, onTimeout, state);
-                    return;
-                }
-                if (timeoutMs == 0)
-                {
-                    onTimeout (this, state);
-                    return;
-                }
-                break;
-            }
-
-            counts = countsBeforeUpdate;
-        }
-
-        Debug.Fail("unreachable");
-    }
-
-    private void PrepareAsyncWaitForSignal(int timeoutMs, Action<LowLevelLifoAsyncWaitSemaphore, object?> onSuccess, Action<LowLevelLifoAsyncWaitSemaphore, object?> onTimeout, object? state)
-    {
-        Debug.Assert(timeoutMs > 0 || timeoutMs == -1);
-
-        _onWait();
-
-        WaitEntry we = new WaitEntry(this, onSuccess, onTimeout, state)
-        {
-            TimeoutMs = timeoutMs,
-            StartWaitTicks = timeoutMs != -1 ? Environment.TickCount : 0,
-        };
-        PrepareAsyncWaitCore(we);
-        // on success calls InternalAsyncWaitSuccess, on timeout calls InternalAsyncWaitTimeout
-    }
-
-    private static void InternalAsyncWaitTimeout(LowLevelLifoAsyncWaitSemaphore self, WaitEntry internalWaitEntry)
-    {
-        WaitEntry we = internalWaitEntry!;
-        // Unregister the waiter. The wait subsystem used above guarantees that a thread that wakes due to a timeout does
-        // not observe a signal to the object being waited upon.
-        self._separated._counts.InterlockedDecrementWaiterCount();
-        we.OnTimeout(self, we.State);
-    }
-
-    private static void InternalAsyncWaitSuccess(LowLevelLifoAsyncWaitSemaphore self, WaitEntry internalWaitEntry)
-    {
-        WaitEntry we = internalWaitEntry!;
-        int endWaitTicks = we.TimeoutMs != -1 ? Environment.TickCount : 0;
-        // Unregister the waiter if this thread will not be waiting anymore, and try to acquire the semaphore
-        Counts counts = self._separated._counts;
-        while (true)
-        {
-            Debug.Assert(counts.WaiterCount != 0);
-            Counts newCounts = counts;
-            if (counts.SignalCount != 0)
-            {
-                newCounts.DecrementSignalCount();
-                newCounts.DecrementWaiterCount();
-            }
-
-            // This waiter has woken up and this needs to be reflected in the count of waiters signaled to wake
-            if (counts.CountOfWaitersSignaledToWake != 0)
-            {
-                newCounts.DecrementCountOfWaitersSignaledToWake();
-            }
-
-            Counts countsBeforeUpdate = self._separated._counts.InterlockedCompareExchange(newCounts, counts);
-            if (countsBeforeUpdate == counts)
-            {
-                if (counts.SignalCount != 0)
-                {
-                    we.OnSuccess(self, we.State);
-                    return;
-                }
-                break;
-            }
-
-            counts = countsBeforeUpdate;
-        }
-        // if we get here, we need to keep waiting because the SignalCount above was 0 after we did
-        // the CompareExchange - someone took the signal before us.
-
-        if (we.TimeoutMs != -1) {
-            int waitMs = endWaitTicks - we.StartWaitTicks;
-            if (waitMs >= 0 && waitMs < we.TimeoutMs)
-                we.TimeoutMs -= waitMs;
-            else
-                we.TimeoutMs = 0;
-            we.StartWaitTicks = endWaitTicks;
-        }
-        PrepareAsyncWaitCore (we);
-        // on success calls InternalAsyncWaitSuccess, on timeout calls InternalAsyncWaitTimeout
-    }
-
-    private static void PrepareAsyncWaitCore(WaitEntry internalWaitEntry)
-    {
-        int timeoutMs = internalWaitEntry.TimeoutMs;
-        LowLevelLifoAsyncWaitSemaphore semaphore = internalWaitEntry.Semaphore;
-        if (timeoutMs == 0) {
-            internalWaitEntry.OnTimeout (semaphore, internalWaitEntry.State);
-            return;
-        }
-        GCHandle gchandle = GCHandle.Alloc (internalWaitEntry);
-        unsafe {
-            delegate* unmanaged<IntPtr, IntPtr, void> successCallback = &SuccessCallback;
-            delegate* unmanaged<IntPtr, IntPtr, void> timeoutCallback = &TimeoutCallback;
-            PrepareAsyncWaitInternal (semaphore.lifo_semaphore, timeoutMs, successCallback, timeoutCallback, GCHandle.ToIntPtr(gchandle));
-        }
-    }
-
-    [MethodImpl(MethodImplOptions.InternalCall)]
-    private static extern unsafe void PrepareAsyncWaitInternal(IntPtr semaphore,
-                                                               int timeoutMs,
-                                                               /*delegate* unmanaged<IntPtr, IntPtr, void> successCallback*/ void* successCallback,
-                                                               /*delegate* unmanaged<IntPtr, IntPtr, void> timeoutCallback*/ void* timeoutCallback,
-                                                               IntPtr userData);
-
-    [UnmanagedCallersOnly]
-    private static void SuccessCallback(IntPtr lifoSemaphore, IntPtr userData)
-    {
-        GCHandle gchandle = GCHandle.FromIntPtr(userData);
-        WaitEntry internalWaitEntry = (WaitEntry)gchandle.Target!;
-        gchandle.Free();
-        InternalAsyncWaitSuccess(internalWaitEntry.Semaphore, internalWaitEntry);
-    }
-
-    [UnmanagedCallersOnly]
-    private static void TimeoutCallback(IntPtr lifoSemaphore, IntPtr userData)
-    {
-        GCHandle gchandle = GCHandle.FromIntPtr(userData);
-        WaitEntry internalWaitEntry = (WaitEntry)gchandle.Target!;
-        gchandle.Free();
-        InternalAsyncWaitTimeout(internalWaitEntry.Semaphore, internalWaitEntry);
-    }
-
-}
diff --git a/src/mono/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Unix.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Unix.Mono.cs
index 82365c61b8bb..477ee0f08c9c 100644
--- a/src/mono/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Unix.Mono.cs
+++ b/src/mono/System.Private.CoreLib/src/System/Threading/LowLevelLifoSemaphore.Unix.Mono.cs
@@ -5,7 +5,7 @@
 
 namespace System.Threading
 {
-    internal sealed unsafe partial class LowLevelLifoSemaphore : LowLevelLifoSemaphoreBase, IDisposable
+    internal sealed unsafe partial class LowLevelLifoSemaphore : IDisposable
     {
         private IntPtr lifo_semaphore;
 
@@ -39,7 +39,7 @@ private bool WaitCore(int timeoutMs)
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         private static extern void ReleaseInternal(IntPtr semaphore, int count);
 
-        protected override void ReleaseCore(int count)
+        private void ReleaseCore(int count)
         {
             ReleaseInternal(lifo_semaphore, count);
         }
diff --git a/src/mono/System.Private.CoreLib/src/System/Threading/PortableThreadPool.Browser.Threads.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Threading/PortableThreadPool.Browser.Threads.Mono.cs
index 632b0c934ee4..cc3f606fe627 100644
--- a/src/mono/System.Private.CoreLib/src/System/Threading/PortableThreadPool.Browser.Threads.Mono.cs
+++ b/src/mono/System.Private.CoreLib/src/System/Threading/PortableThreadPool.Browser.Threads.Mono.cs
@@ -7,7 +7,7 @@ internal sealed partial class PortableThreadPool
 {
     private static partial class WorkerThread
     {
-        private static bool IsIOPending => WebWorkerEventLoop.HasJavaScriptInteropDependents;
+        private static bool IsIOPending => false;
     }
 
     private struct CpuUtilizationReader
diff --git a/src/mono/System.Private.CoreLib/src/System/Threading/PortableThreadPool.WorkerThread.Browser.Threads.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Threading/PortableThreadPool.WorkerThread.Browser.Threads.Mono.cs
deleted file mode 100644
index b45dee7fa2fd..000000000000
--- a/src/mono/System.Private.CoreLib/src/System/Threading/PortableThreadPool.WorkerThread.Browser.Threads.Mono.cs
+++ /dev/null
@@ -1,122 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-using System.Diagnostics.CodeAnalysis;
-using System.Diagnostics.Tracing;
-using System.Runtime.CompilerServices;
-
-namespace System.Threading
-{
-    internal sealed partial class PortableThreadPool
-    {
-        /// <summary>
-        /// The worker thread infastructure for the CLR thread pool.
-        /// </summary>
-        private static partial class WorkerThread
-        {
-            /// <summary>
-            /// Semaphore for controlling how many threads are currently working.
-            /// </summary>
-            private static readonly LowLevelLifoAsyncWaitSemaphore s_semaphore =
-                new LowLevelLifoAsyncWaitSemaphore(
-                    0,
-                    MaxPossibleThreadCount,
-                    AppContextConfigHelper.GetInt32Config(
-                        "System.Threading.ThreadPool.UnfairSemaphoreSpinLimit",
-                        SemaphoreSpinCountDefault,
-                        false),
-                    onWait: () =>
-                    {
-                        if (NativeRuntimeEventSource.Log.IsEnabled())
-                        {
-                            NativeRuntimeEventSource.Log.ThreadPoolWorkerThreadWait(
-                                (uint)ThreadPoolInstance._separated.counts.VolatileRead().NumExistingThreads);
-                        }
-                    });
-
-            private static readonly ThreadStart s_workerThreadStart = WorkerThreadStart;
-
-            private sealed record SemaphoreWaitState(PortableThreadPool ThreadPoolInstance, LowLevelLock ThreadAdjustmentLock, WebWorkerEventLoop.KeepaliveToken KeepaliveToken)
-            {
-                public bool SpinWait = true;
-
-                public void ResetIteration() {
-                    SpinWait = true;
-                }
-            }
-
-            private static void WorkerThreadStart()
-            {
-                Thread.CurrentThread.SetThreadPoolWorkerThreadName();
-
-                PortableThreadPool threadPoolInstance = ThreadPoolInstance;
-
-                if (NativeRuntimeEventSource.Log.IsEnabled())
-                {
-                    NativeRuntimeEventSource.Log.ThreadPoolWorkerThreadStart(
-                        (uint)threadPoolInstance._separated.counts.VolatileRead().NumExistingThreads);
-                }
-
-                LowLevelLock threadAdjustmentLock = threadPoolInstance._threadAdjustmentLock;
-                var keepaliveToken = WebWorkerEventLoop.KeepalivePush();
-                SemaphoreWaitState state = new(threadPoolInstance, threadAdjustmentLock, keepaliveToken) { SpinWait = true };
-                // set up the callbacks for semaphore waits, tell
-                // emscripten to keep the thread alive, and return to
-                // the JS event loop.
-                WaitForWorkLoop(s_semaphore, state);
-                // return from thread start with keepalive - the thread will stay alive in the JS event loop
-            }
-
-            private static readonly Action<LowLevelLifoAsyncWaitSemaphore, object?> s_WorkLoopSemaphoreSuccess = new(WorkLoopSemaphoreSuccess);
-            private static readonly Action<LowLevelLifoAsyncWaitSemaphore, object?> s_WorkLoopSemaphoreTimedOut = new(WorkLoopSemaphoreTimedOut);
-
-            private static void WaitForWorkLoop(LowLevelLifoAsyncWaitSemaphore semaphore, SemaphoreWaitState state)
-            {
-                semaphore.PrepareAsyncWait(ThreadPoolThreadTimeoutMs, s_WorkLoopSemaphoreSuccess, s_WorkLoopSemaphoreTimedOut, state);
-                // thread should still be kept alive
-                Debug.Assert(state.KeepaliveToken.Valid);
-            }
-
-            private static void WorkLoopSemaphoreSuccess(LowLevelLifoAsyncWaitSemaphore semaphore, object? stateObject)
-            {
-                SemaphoreWaitState state = (SemaphoreWaitState)stateObject!;
-                WorkerDoWork(state.ThreadPoolInstance, ref state.SpinWait);
-                // Go around the loop one more time, keeping existing mutated state
-                WaitForWorkLoop(semaphore, state);
-            }
-
-            private static void WorkLoopSemaphoreTimedOut(LowLevelLifoAsyncWaitSemaphore semaphore, object? stateObject)
-            {
-                SemaphoreWaitState state = (SemaphoreWaitState)stateObject!;
-                if (ShouldExitWorker(state.ThreadPoolInstance, state.ThreadAdjustmentLock)) {
-                    // we're done, kill the thread.
-
-                    // we're wrapped in an emscripten eventloop handler which will consult the
-                    // keepalive count, destroy the thread and run the TLS dtor which will
-                    // unregister the thread from Mono
-                    state.KeepaliveToken.Pop();
-                    return;
-                } else {
-                    // more work showed up while we were shutting down, go around one more time
-                    state.ResetIteration();
-                    WaitForWorkLoop(semaphore, state);
-                }
-            }
-
-            private static void CreateWorkerThread()
-            {
-                // Thread pool threads must start in the default execution context without transferring the context, so
-                // using captureContext: false.
-                Thread workerThread = new Thread(s_workerThreadStart);
-                workerThread.IsThreadPoolThread = true;
-                workerThread.IsBackground = true;
-                // thread name will be set in thread proc
-
-                // This thread will return to the JS event loop - tell the runtime not to cleanup
-                // after the start function returns, if the Emscripten keepalive is non-zero.
-                WebWorkerEventLoop.StartExitable(workerThread, captureContext: false);
-            }
-        }
-    }
-}
diff --git a/src/mono/System.Private.CoreLib/src/System/Threading/WebWorkerEventLoop.Browser.Threads.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Threading/WebWorkerEventLoop.Browser.Threads.Mono.cs
deleted file mode 100644
index 73c2959293d5..000000000000
--- a/src/mono/System.Private.CoreLib/src/System/Threading/WebWorkerEventLoop.Browser.Threads.Mono.cs
+++ /dev/null
@@ -1,98 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics.CodeAnalysis;
-using System.Diagnostics.Tracing;
-using System.Runtime.CompilerServices;
-
-namespace System.Threading;
-
-/// <summary>
-///   Keep a pthread alive in its WebWorker after its pthread start function returns.
-/// </summary>
-internal static class WebWorkerEventLoop
-{
-    // FIXME: these keepalive calls could be qcalls with a SuppressGCTransitionAttribute
-    [MethodImpl(MethodImplOptions.InternalCall)]
-    private static extern void KeepalivePushInternal();
-    [MethodImpl(MethodImplOptions.InternalCall)]
-    private static extern void KeepalivePopInternal();
-
-    /// <summary>
-    /// A keepalive token prevents a thread from shutting down even if it returns to the JS event
-    /// loop.  A thread may want a keepalive token if it needs to allow JS code to run to settle JS
-    /// promises or execute JS timeout callbacks.
-    /// </summary>
-    internal sealed class KeepaliveToken
-    {
-        public bool Valid {get; private set; }
-
-        private KeepaliveToken() { Valid = true; }
-
-        /// <summary>
-        ///  Decrement the Emscripten keepalive count.  A thread with a zero keepalive count will
-        ///  terminate when it returns from its start function or from an async invocation from the
-        ///  JS event loop.
-        /// </summary>
-        internal void Pop() {
-            if (!Valid)
-                throw new InvalidOperationException();
-            Valid = false;
-            KeepalivePopInternal();
-        }
-
-        internal static KeepaliveToken Create()
-        {
-            KeepalivePushInternal();
-            return new KeepaliveToken();
-        }
-    }
-
-    /// <summary>
-    ///  Increment the Emscripten keepalive count.  A thread with a positive keepalive can return from its
-    ///  thread start function or a JS event loop invocation and continue running in the JS event
-    ///  loop.
-    /// </summary>
-    internal static KeepaliveToken KeepalivePush() => KeepaliveToken.Create();
-
-    /// <summary>
-    ///   Start a thread that may be kept alive on its webworker after the start function returns,
-    ///   if the emscripten keepalive count is positive.  Once the thread returns to the JS event
-    ///   loop it will be able to settle JS promises as well as run any queued managed async
-    ///   callbacks.
-    /// </summary>
-    internal static void StartExitable(Thread thread, bool captureContext)
-    {
-        // don't support captureContext == true, for now, since it's
-        // not needed by PortableThreadPool.WorkerThread
-        if (captureContext)
-            throw new InvalidOperationException();
-        // for now, threadpool threads are exitable, and nothing else is.
-        if (!thread.IsThreadPoolThread)
-            throw new InvalidOperationException();
-        thread.HasExternalEventLoop = true;
-        thread.UnsafeStart();
-    }
-
-    /// returns true if the current thread has unsettled JS Interop promises
-    private static bool HasUnsettledInteropPromises => HasUnsettledInteropPromisesNative();
-
-    // FIXME: this could be a qcall with a SuppressGCTransitionAttribute
-    [MethodImpl(MethodImplOptions.InternalCall)]
-    private static extern bool HasUnsettledInteropPromisesNative();
-
-    /// <summary>returns true if the current WebWorker has JavaScript objects that depend on the
-    /// current managed thread.</summary>
-    ///
-    /// <remarks>If this returns false, the runtime is allowed to allow the current managed thread
-    /// to exit and for the WebWorker to be recycled by Emscripten for another managed
-    /// thread.</remarks>
-    internal static bool HasJavaScriptInteropDependents
-    {
-        //
-        // FIXME:
-        // https://github.com/dotnet/runtime/issues/85052 - unsettled promises are not the only relevant
-        // reasons for keeping a worker thread alive. We will need to add other conditions here.
-        get => HasUnsettledInteropPromises;
-    }
-}
diff --git a/src/mono/browser/README.md b/src/mono/browser/README.md
index 5ee64fcd4724..4fb494a86b19 100644
--- a/src/mono/browser/README.md
+++ b/src/mono/browser/README.md
@@ -117,10 +117,24 @@ The wrapper script used to actually run these tests, accepts:
 
 ### Using a local build of xharness
 
+XHarness consists of two pieces for WASM
+
+#### 1. CLI/host
+
 * set `XHARNESS_CLI_PATH=/path/to/xharness/artifacts/bin/Microsoft.DotNet.XHarness.CLI/Debug/net7.0/Microsoft.DotNet.XHarness.CLI.dll`
 
 **Note:** Additional msbuild arguments can be passed with: `make ..  MSBUILD_ARGS="/p:a=b"`
 
+#### 2. Test runner running inside of the browser
+
+All library tests are hosted by `WasmTestRunner.csproj`. The project references XHarness nuget for running tests using Xunit. To make changes and iterate quickly
+
+- Add property `<RestoreAdditionalProjectSources>$(RestoreAdditionalProjectSources);LOCAL_CLONE_OF_XHARNESS\artifacts\packages\Debug\Shipping</RestoreAdditionalProjectSources>` in `WasmTestRunner.csproj`.
+- Set environment variable in your terminal `$env:NUGET_PACKAGES="$pwd\.nuget"` (so that nuget packages are restored to local folder `.nuget`)
+- Run "Pack" in the XHarness solution in Visual Studio on `Microsoft.DotNet.XHarness.TestRunners.Common` or `Microsoft.DotNet.XHarness.TestRunners.Xunit` based on your changes (it will generate a nuget package in `LOCAL_CLONE_OF_XHARNESS\artifacts\packages\Debug\Shipping`).
+- Build WasmTestRunner `.\dotnet.cmd build -c Debug .\src\libraries\Common\tests\WasmTestRunner\WasmTestRunner.csproj`.
+- If you need to iterate, delete Xunit or Common nuget cache `rm -r .\.nuget\microsoft.dotnet.xharness.testrunners.xunit\` or `rm -r .\.nuget\microsoft.dotnet.xharness.testrunners.common\`.
+
 ### Symbolicating traces
 
 Exceptions thrown after the runtime starts get symbolicating from js itself. Exceptions before that, like asserts containing native traces get symbolicated by xharness using `src/mono/wasm/symbolicator`.
diff --git a/src/mono/browser/browser.proj b/src/mono/browser/browser.proj
index 335fec08ead3..201e214592c7 100644
--- a/src/mono/browser/browser.proj
+++ b/src/mono/browser/browser.proj
@@ -270,7 +270,8 @@
       <_EmccExportedLibraryFunction>"[@(EmccExportedLibraryFunction -> '%27%(Identity)%27', ',')]"</_EmccExportedLibraryFunction>
       <_EmccExportedRuntimeMethods>"[@(EmccExportedRuntimeMethod -> '%27%(Identity)%27', ',')]"</_EmccExportedRuntimeMethods>
       <_EmccExportedFunctions>@(EmccExportedFunction -> '%(Identity)',',')</_EmccExportedFunctions>
-      <EmccInitialHeapSize>16777216</EmccInitialHeapSize>
+      <!-- reserve at least enough space to complete initializing sgen without growing the heap -->
+      <EmccInitialHeapSize>33554432</EmccInitialHeapSize>
       <EmccStackSize>5MB</EmccStackSize>
     </PropertyGroup>
     <ItemGroup>
diff --git a/src/mono/browser/build/BrowserWasmApp.targets b/src/mono/browser/build/BrowserWasmApp.targets
index ec2a81e1e066..aa231c45d8b7 100644
--- a/src/mono/browser/build/BrowserWasmApp.targets
+++ b/src/mono/browser/build/BrowserWasmApp.targets
@@ -64,7 +64,6 @@
 
     <_BoolPropertiesThatTriggerRelinking Include="WasmEnableSIMD" DefaultValueInRuntimePack="true" />
     <_BoolPropertiesThatTriggerRelinking Include="WasmEnableExceptionHandling" DefaultValueInRuntimePack="true" />
-    <_BoolPropertiesThatTriggerRelinking Include="WasmNativeStrip" DefaultValueInRuntimePack="true" />
   </ItemGroup>
 
   <Target Name="_GetWasmGenerateAppBundleDependencies">
@@ -121,7 +120,8 @@
 
     <PropertyGroup>
       <!-- TODO: set this from some user-facing property?  -1 means use the default baked into dotnet.native.js -->
-      <_WasmPThreadPoolSize Condition="'$(_WasmPThreadPoolSize)' == ''">-1</_WasmPThreadPoolSize>
+      <_WasmPThreadPoolInitialSize Condition="'$(_WasmPThreadPoolInitialSize)' == ''">-1</_WasmPThreadPoolInitialSize>
+      <_WasmPThreadPoolUnusedSize Condition="'$(_WasmPThreadPoolUnusedSize)' == ''">-1</_WasmPThreadPoolUnusedSize>
     </PropertyGroup>
 
     <ItemGroup>
@@ -148,7 +148,8 @@
       NativeAssets="@(WasmNativeAsset)"
       DebugLevel="$(WasmDebugLevel)"
       IncludeThreadsWorker="$(WasmEnableThreads)"
-      PThreadPoolSize="$(_WasmPThreadPoolSize)"
+      PThreadPoolInitialSize="$(_WasmPThreadPoolInitialSize)"
+      PThreadPoolUnusedSize="$(_WasmPThreadPoolUnusedSize)"
       UseWebcil="$(WasmEnableWebcil)"
       WasmIncludeFullIcuData="$(WasmIncludeFullIcuData)"
       WasmIcuDataFileName="$(WasmIcuDataFileName)"
diff --git a/src/mono/browser/debugger/BrowserDebugProxy/DebugStore.cs b/src/mono/browser/debugger/BrowserDebugProxy/DebugStore.cs
index 6f205766afb6..a6c80c89ac8d 100644
--- a/src/mono/browser/debugger/BrowserDebugProxy/DebugStore.cs
+++ b/src/mono/browser/debugger/BrowserDebugProxy/DebugStore.cs
@@ -1446,7 +1446,7 @@ private void GetSourceLinkUrl(string document, Dictionary<string, string> source
             {
                 string key = sourceLinkDocument.Key;
 
-                if (!key.EndsWith("*", StringComparison.OrdinalIgnoreCase))
+                if (!key.EndsWith('*'))
                 {
                     continue;
                 }
diff --git a/src/mono/browser/debugger/BrowserDebugProxy/MonoProxy.cs b/src/mono/browser/debugger/BrowserDebugProxy/MonoProxy.cs
index 4ae7fdd5e15d..503f196c17f0 100644
--- a/src/mono/browser/debugger/BrowserDebugProxy/MonoProxy.cs
+++ b/src/mono/browser/debugger/BrowserDebugProxy/MonoProxy.cs
@@ -911,16 +911,18 @@ protected async Task<bool> EvaluateCondition(SessionId sessionId, ExecutionConte
                     return true;
                 }
             }
-            catch (ReturnAsErrorException raee)
+            catch (ReturnAsErrorException ree)
             {
-                logger.LogDebug($"Unable to evaluate breakpoint condition '{condition}': {raee}");
-                SendLog(sessionId, $"Unable to evaluate breakpoint condition '{condition}': {raee.Message}", token, type: "error");
+                logger.LogDebug($"Unable to evaluate breakpoint condition '{condition}': {ree}");
+                SendLog(sessionId, $"Unable to evaluate breakpoint condition '{condition}': {ree.Message}", token, type: "error");
                 bp.ConditionAlreadyEvaluatedWithError = true;
+                SendExceptionToTelemetry(ree, "EvaluateCondition", sessionId, token);
             }
             catch (Exception e)
             {
                 Log("info", $"Unable to evaluate breakpoint condition '{condition}': {e}");
                 bp.ConditionAlreadyEvaluatedWithError = true;
+                SendExceptionToTelemetry(e, "EvaluateCondition", sessionId, token);
             }
             return false;
         }
@@ -1519,17 +1521,29 @@ private async Task<bool> OnEvaluateOnCallFrame(MessageId msg_id, int scopeId, st
             catch (ReturnAsErrorException ree)
             {
                 SendResponse(msg_id, AddCallStackInfoToException(ree.Error, context, scopeId), token);
+                SendExceptionToTelemetry(ree, "OnEvaluateOnCallFrame", msg_id, token);
             }
             catch (Exception e)
             {
                 logger.LogDebug($"Error in EvaluateOnCallFrame for expression '{expression}' with '{e}.");
-                var exc = new ReturnAsErrorException(e.Message, e.GetType().Name);
-                SendResponse(msg_id, AddCallStackInfoToException(exc.Error, context, scopeId), token);
+                var ree = new ReturnAsErrorException(e.Message, e.GetType().Name);
+                SendResponse(msg_id, AddCallStackInfoToException(ree.Error, context, scopeId), token);
+                SendExceptionToTelemetry(e, "OnEvaluateOnCallFrame", msg_id, token);
             }
 
             return true;
         }
 
+        private void SendExceptionToTelemetry(Exception exc, string callingFunction, SessionId msg_id, CancellationToken token)
+        {
+            JObject reportBlazorDebugError = JObject.FromObject(new
+            {
+                exceptionType = "uncaughtException",
+                error = $"{exc.Message} at {callingFunction}",
+            });
+            SendEvent(msg_id, "DotnetDebugger.reportBlazorDebugError", reportBlazorDebugError, token);
+        }
+
         internal async Task<GetMembersResult> GetScopeProperties(SessionId msg_id, int scopeId, CancellationToken token)
         {
             try
diff --git a/src/mono/browser/debugger/BrowserDebugProxy/MonoSDBHelper.cs b/src/mono/browser/debugger/BrowserDebugProxy/MonoSDBHelper.cs
index 69e1cd780c05..9d70f173db04 100644
--- a/src/mono/browser/debugger/BrowserDebugProxy/MonoSDBHelper.cs
+++ b/src/mono/browser/debugger/BrowserDebugProxy/MonoSDBHelper.cs
@@ -525,7 +525,9 @@ private unsafe T ReadBigEndian<T>() where T : struct
             {
                 data.Reverse();
             }
-            data.CopyTo(new Span<byte>(Unsafe.AsPointer(ref ret), data.Length));
+#pragma warning disable CS8500 // takes address of managed type
+            data.CopyTo(new Span<byte>(&ret, data.Length));
+#pragma warning restore CS8500
             return ret;
         }
     }
@@ -546,7 +548,9 @@ public override void Write(string val)
         private unsafe void WriteBigEndian<T>(T val) where T : struct
         {
             Span<byte> data = stackalloc byte[Unsafe.SizeOf<T>()];
-            new Span<byte>(Unsafe.AsPointer(ref val), data.Length).CopyTo(data);
+#pragma warning disable CS8500 // takes address of managed type
+            new Span<byte>(&val, data.Length).CopyTo(data);
+#pragma warning restore CS8500
             if (BitConverter.IsLittleEndian)
             {
                 data.Reverse();
@@ -1369,7 +1373,7 @@ public async Task<string> GetAssemblyFileNameFromId(int assemblyId, Cancellation
 
             using var retDebuggerCmdReader = await SendDebuggerAgentCommand(CmdAssembly.GetName, commandParamsWriter, token);
             var name = retDebuggerCmdReader.ReadString();
-            return name.Remove(name.IndexOf(",")) + ".dll";
+            return name.Remove(name.IndexOf(',')) + ".dll";
         }
 
         public async Task<string> GetMethodName(int methodId, CancellationToken token)
@@ -2181,7 +2185,7 @@ public async Task<JArray> GetHoistedLocalVariables(MethodInfoWithDebugInformatio
                         asyncLocal["name"] = match.Groups["varName"].Value;
                     }
                 }
-                else if (fieldName.StartsWith("$"))
+                else if (fieldName.StartsWith('$'))
                 {
                     continue;
                 }
diff --git a/src/mono/browser/debugger/DebuggerTestSuite/BreakpointTests.cs b/src/mono/browser/debugger/DebuggerTestSuite/BreakpointTests.cs
index 6dc3bc056230..b5e10eec917f 100644
--- a/src/mono/browser/debugger/DebuggerTestSuite/BreakpointTests.cs
+++ b/src/mono/browser/debugger/DebuggerTestSuite/BreakpointTests.cs
@@ -172,7 +172,7 @@ await EvaluateAndCheck(
 
                     foreach (var frame in pause_location["callFrames"])
                     {
-                        Assert.Equal(false, frame["url"].Value<string>().Contains(".wasm"));
+                        Assert.Equal(false, frame["url"].Value<string>().EndsWith(".wasm", StringComparison.Ordinal));
                         Assert.Equal(false, frame["url"].Value<string>().Contains("wasm://"));
                     }
                     return Task.CompletedTask;
diff --git a/src/mono/browser/debugger/DebuggerTestSuite/DebuggerTestSuite.csproj b/src/mono/browser/debugger/DebuggerTestSuite/DebuggerTestSuite.csproj
index 87cfb17bbc04..7283603a955e 100644
--- a/src/mono/browser/debugger/DebuggerTestSuite/DebuggerTestSuite.csproj
+++ b/src/mono/browser/debugger/DebuggerTestSuite/DebuggerTestSuite.csproj
@@ -81,7 +81,7 @@
           BeforeTargets="CopyTestZipForHelix"
           DependsOnTargets="_GenerateRunSettingsFile">
 
-    <Exec Command="&quot;$(DotNetTool)&quot; test --no-build -s $(RunSettingsFilePath) -t --nologo -v:q" ConsoleToMSBuild="true">
+    <Exec Command="&quot;$(DotNetTool)&quot; test --no-build -s $(RunSettingsFilePath) -t --nologo -v:q -p:VsTestUseMSBuildOutput=false" ConsoleToMSBuild="true">
       <Output TaskParameter="ConsoleOutput" ItemName="_ListOfTestsLines" />
     </Exec>
 
@@ -89,12 +89,15 @@
       <_Regex>^ *(DebuggerTests[^\($]+)</_Regex>
     </PropertyGroup>
     <ItemGroup>
-      <_TestLines0 Include="$([System.Text.RegularExpressions.Regex]::Match('%(_ListOfTestsLines.Identity)', $(_Regex)))" />
-      <TestClassName Include="$([System.IO.Path]::GetFileNameWithoutExtension(%(_TestLines0.Identity)))" />
+      <_TestLines0 Include="$([System.Text.RegularExpressions.Regex]::Match('%(_ListOfTestsLines.Identity)', '$(_Regex)'))" />
+      <TestClassName Include="$([System.IO.Path]::GetFileNameWithoutExtension('%(_TestLines0.Identity)'))" />
     </ItemGroup>
 
+    <Error Text="No DebuggerTests test classes found!" Condition="'@(TestClassName)' == ''" />
+
     <WriteLinesToFile File="$(TestArchiveTestsDir)$(MSBuildProjectName).tests.list"
-                      Lines="@(TestClassName->Distinct())" />
+                      Lines="@(TestClassName->Distinct())"
+                      Overwrite="true" />
   </Target>
 
   <!-- Copy of `GenerateRunSettingsFile` from eng/testing/runsettings.targets -->
diff --git a/src/mono/browser/debugger/DebuggerTestSuite/MiscTests.cs b/src/mono/browser/debugger/DebuggerTestSuite/MiscTests.cs
index 80345ecb16ad..3b0d795dfe5f 100644
--- a/src/mono/browser/debugger/DebuggerTestSuite/MiscTests.cs
+++ b/src/mono/browser/debugger/DebuggerTestSuite/MiscTests.cs
@@ -1173,6 +1173,7 @@ public static TheoryData<int> CountToTen()
             return data;
         }
 
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/98110")]
         [ConditionalTheory(nameof(WasmMultiThreaded))]
         [MemberData(nameof(CountToTen))]
         public async Task TestDebugUsingMultiThreadedRuntime(int _attempt)
@@ -1199,5 +1200,24 @@ public async Task TestDebugUsingMultiThreadedRuntime(int _attempt)
             Assert.Equal(locals[1]["value"]["type"], "number");
             Assert.Equal(locals[1]["name"], "currentThread");
         }
+
+        [Fact]
+        public async Task InspectSpanByte()
+        {
+            var expression = $"{{ invoke_static_method('[debugger-test] SpanByte:Run'); }}";
+
+            await EvaluateAndCheck(
+                "window.setTimeout(function() {" + expression + "; }, 1);",
+                "dotnet://debugger-test.dll/debugger-test.cs", 1684, 8,
+                "SpanByte.Run",
+                wait_for_event_fn: async (pause_location) =>
+                {
+                    var id = pause_location["callFrames"][0]["callFrameId"].Value<string>();
+                    await EvaluateOnCallFrameAndCheck(id,
+                        ("span", TObject("System.Span<byte>", null))
+                    );
+                }
+            );
+        }
     }
 }
diff --git a/src/mono/browser/debugger/tests/debugger-test/debugger-main.js b/src/mono/browser/debugger/tests/debugger-test/debugger-main.js
index 6849e490de36..fcac75081843 100644
--- a/src/mono/browser/debugger/tests/debugger-test/debugger-main.js
+++ b/src/mono/browser/debugger/tests/debugger-test/debugger-main.js
@@ -36,18 +36,18 @@ try {
         }
     }
 
-    // this is fake implementation of legacy `bind_static_method` which uses `mono_wasm_invoke_method_raw`
+    // this is fake implementation of legacy `bind_static_method` which uses `mono_wasm_invoke_jsexport`
     // We have unit tests that stop on unhandled managed exceptions.
-    // as opposed to [JSExport], the `mono_wasm_invoke_method_raw` doesn't handle managed exceptions.
+    // as opposed to [JSExport], the `mono_wasm_invoke_jsexport` doesn't handle managed exceptions.
     // Same way as old `bind_static_method` didn't
     App.bind_static_method_native = (method_name) => {
         try {
             const monoMethodPtr = App.exports.DebuggerTests.BindStaticMethod.GetMonoMethodPtr(method_name);
             // this is only implemented for void methods with no arguments
-            const invoker = runtime.Module.cwrap("mono_wasm_invoke_method_raw", "number", ["number", "number"]);
+            const invoker = runtime.Module.cwrap("mono_wasm_invoke_jsexport", "void", ["number", "number"]);
             return function () {
                 try {
-                    return invoker(monoMethodPtr);
+                    return invoker(monoMethodPtr, 0, 0);
                 }
                 catch (err) {
                     console.error(err);
diff --git a/src/mono/browser/debugger/tests/debugger-test/debugger-test.cs b/src/mono/browser/debugger/tests/debugger-test/debugger-test.cs
index c06a12a60dd8..f4132f82c4a8 100644
--- a/src/mono/browser/debugger/tests/debugger-test/debugger-test.cs
+++ b/src/mono/browser/debugger/tests/debugger-test/debugger-test.cs
@@ -1675,4 +1675,13 @@ public static void Run()
             BreakpointTestsClass bpTest = new();
         }
     }
+}
+
+public class SpanByte
+{
+    public static void Run()
+    {
+        System.Span<byte> span = new ();
+        System.Diagnostics.Debugger.Break();
+    }
 }
\ No newline at end of file
diff --git a/src/mono/browser/runtime/.eslintignore b/src/mono/browser/runtime/.eslintignore
index 682049e38a92..a0de5cad5c0c 100644
--- a/src/mono/browser/runtime/.eslintignore
+++ b/src/mono/browser/runtime/.eslintignore
@@ -1,2 +1,4 @@
 jiterpreter-opcodes.ts
 jiterpreter-tables.ts
+dotnet.d.ts
+diagnostics-mock.d.ts
diff --git a/src/mono/browser/runtime/.eslintrc.cjs b/src/mono/browser/runtime/.eslintrc.cjs
index 5cdee1555241..0885027b9630 100644
--- a/src/mono/browser/runtime/.eslintrc.cjs
+++ b/src/mono/browser/runtime/.eslintrc.cjs
@@ -22,6 +22,8 @@ module.exports = {
         "es6/*.js",
         "jiterpreter-opcodes.ts",
         "jiterpreter-tables.ts",
+        "dotnet.d.ts",
+        "diagnostics-mock.d.ts",
     ],
     "rules": {
         "@typescript-eslint/no-explicit-any": "off",
@@ -48,6 +50,17 @@ module.exports = {
         "semi": [
             "error",
             "always"
-        ]
+        ],
+        "brace-style": ["error"],
+        "eol-last": ["error"],
+        "space-before-blocks": ["error", { "functions": "always", "keywords": "always", "classes": "always" }],
+        "semi-spacing": ["error", { "before": false, "after": true }],
+        "keyword-spacing": ["error", { "before": true, "after": true, "overrides": { "this": { "before": false } } }],
+        "no-trailing-spaces": ["error"],
+        "object-curly-spacing": ["error", "always"],
+        "array-bracket-spacing": ["error"],
+        "space-infix-ops": ["error"],
+        "func-call-spacing": ["error", "never"],
+        "space-before-function-paren": ["error", "always"],
     }
 };
diff --git a/src/mono/browser/runtime/assets.ts b/src/mono/browser/runtime/assets.ts
index bd88949b6234..7cdff19d8a84 100644
--- a/src/mono/browser/runtime/assets.ts
+++ b/src/mono/browser/runtime/assets.ts
@@ -1,19 +1,20 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+import type { AssetEntryInternal } from "./types/internal";
+
 import cwraps from "./cwraps";
 import { mono_wasm_load_icu_data } from "./icu";
 import { Module, loaderHelpers, mono_assert, runtimeHelpers } from "./globals";
 import { mono_log_info, mono_log_debug, parseSymbolMapFile } from "./logging";
 import { mono_wasm_load_bytes_into_heap } from "./memory";
 import { endMeasure, MeasuredBlock, startMeasure } from "./profiler";
-import { AssetEntryInternal } from "./types/internal";
 import { AssetEntry } from "./types";
 import { VoidPtr } from "./types/emscripten";
 import { setSegmentationRulesFromJson } from "./hybrid-globalization/grapheme-segmenter";
 
 // this need to be run only after onRuntimeInitialized event, when the memory is ready
-export function instantiate_asset(asset: AssetEntry, url: string, bytes: Uint8Array): void {
+export function instantiate_asset (asset: AssetEntry, url: string, bytes: Uint8Array): void {
     mono_log_debug(`Loaded:${asset.name} as ${asset.behavior} size ${bytes.length} from ${url}`);
     const mark = startMeasure();
 
@@ -81,22 +82,19 @@ export function instantiate_asset(asset: AssetEntry, url: string, bytes: Uint8Ar
             const index = loaderHelpers._loaded_files.findIndex(element => element.file == virtualName);
             loaderHelpers._loaded_files.splice(index, 1);
         }
-    }
-    else if (asset.behavior === "pdb") {
+    } else if (asset.behavior === "pdb") {
         cwraps.mono_wasm_add_assembly(virtualName, offset!, bytes.length);
-    }
-    else if (asset.behavior === "icu") {
+    } else if (asset.behavior === "icu") {
         if (!mono_wasm_load_icu_data(offset!))
             Module.err(`Error loading ICU asset ${asset.name}`);
-    }
-    else if (asset.behavior === "resource") {
+    } else if (asset.behavior === "resource") {
         cwraps.mono_wasm_add_satellite_assembly(virtualName, asset.culture || "", offset!, bytes.length);
     }
     endMeasure(mark, MeasuredBlock.instantiateAsset, asset.name);
     ++loaderHelpers.actual_instantiated_assets_count;
 }
 
-export async function instantiate_symbols_asset(pendingAsset: AssetEntryInternal): Promise<void> {
+export async function instantiate_symbols_asset (pendingAsset: AssetEntryInternal): Promise<void> {
     try {
         const response = await pendingAsset.pendingDownloadInternal!.response;
         const text = await response.text();
@@ -106,7 +104,7 @@ export async function instantiate_symbols_asset(pendingAsset: AssetEntryInternal
     }
 }
 
-export async function instantiate_segmentation_rules_asset(pendingAsset: AssetEntryInternal): Promise<void> {
+export async function instantiate_segmentation_rules_asset (pendingAsset: AssetEntryInternal): Promise<void> {
     try {
         const response = await pendingAsset.pendingDownloadInternal!.response;
         const json = await response.json();
@@ -116,7 +114,7 @@ export async function instantiate_segmentation_rules_asset(pendingAsset: AssetEn
     }
 }
 
-export async function wait_for_all_assets() {
+export async function wait_for_all_assets () {
     // wait for all assets in memory
     await runtimeHelpers.allAssetsInMemory.promise;
     if (runtimeHelpers.config.assets) {
@@ -128,6 +126,6 @@ export async function wait_for_all_assets() {
 }
 
 // Used by the debugger to enumerate loaded dlls and pdbs
-export function mono_wasm_get_loaded_files(): string[] {
+export function mono_wasm_get_loaded_files (): string[] {
     return loaderHelpers.loadedFiles;
-}
\ No newline at end of file
+}
diff --git a/src/mono/browser/runtime/base64.ts b/src/mono/browser/runtime/base64.ts
index ac0664b78c20..28bf9970ed7f 100644
--- a/src/mono/browser/runtime/base64.ts
+++ b/src/mono/browser/runtime/base64.ts
@@ -5,7 +5,7 @@
 // https://github.com/sq/JSIL/blob/1d57d5427c87ab92ffa3ca4b82429cd7509796ba/JSIL.Libraries/Includes/Bootstrap/Core/Classes/System.Convert.js#L149
 // Thanks to Katelyn Gadd @kg
 
-export function toBase64StringImpl(inArray: Uint8Array, offset?: number, length?: number) : string{
+export function toBase64StringImpl (inArray: Uint8Array, offset?: number, length?: number) : string {
     const reader = _makeByteReader(inArray, offset, length);
     let result = "";
     let ch1: number | null = 0, ch2: number | null = 0, ch3: number | null = 0;
@@ -76,7 +76,7 @@ const _base64Table = [
     "+", "/"
 ];
 
-function _makeByteReader(bytes: Uint8Array, index?: number, count?: number): {
+function _makeByteReader (bytes: Uint8Array, index?: number, count?: number): {
     read: () => number | null
 } {
     let position = (typeof (index) === "number") ? index : 0;
diff --git a/src/mono/browser/runtime/cancelable-promise.ts b/src/mono/browser/runtime/cancelable-promise.ts
index 0d8d7be66842..c82cbdacb196 100644
--- a/src/mono/browser/runtime/cancelable-promise.ts
+++ b/src/mono/browser/runtime/cancelable-promise.ts
@@ -1,48 +1,186 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import { _lookup_js_owned_object } from "./gc-handles";
+import WasmEnableThreads from "consts:wasmEnableThreads";
+
+import { _lookup_js_owned_object, teardown_managed_proxy, upgrade_managed_proxy_to_strong_ref } from "./gc-handles";
 import { createPromiseController, loaderHelpers, mono_assert } from "./globals";
-import { mono_log_warn } from "./logging";
-import { PromiseHolder } from "./marshal-to-cs";
-import { ControllablePromise, GCHandle } from "./types/internal";
+import { ControllablePromise, GCHandle, MarshalerToCs } from "./types/internal";
+import { ManagedObject } from "./marshal";
+import { compareExchangeI32, forceThreadMemoryViewRefresh } from "./memory";
+import { mono_log_debug } from "./logging";
+import { complete_task } from "./managed-exports";
+import { marshal_cs_object_to_cs } from "./marshal-to-cs";
+import { invoke_later_when_on_ui_thread_async } from "./invoke-js";
 
 export const _are_promises_supported = ((typeof Promise === "object") || (typeof Promise === "function")) && (typeof Promise.resolve === "function");
 
-export function isThenable(js_obj: any): boolean {
+export function isThenable (js_obj: any): boolean {
     // When using an external Promise library like Bluebird the Promise.resolve may not be sufficient
     // to identify the object as a Promise.
     return Promise.resolve(js_obj) === js_obj ||
         ((typeof js_obj === "object" || typeof js_obj === "function") && typeof js_obj.then === "function");
 }
 
-export function wrap_as_cancelable_promise<T>(fn: () => Promise<T>): ControllablePromise<T> {
+export function wrap_as_cancelable_promise<T> (fn: () => Promise<T>): ControllablePromise<T> {
     const { promise, promise_control } = createPromiseController<T>();
     const inner = fn();
     inner.then((data) => promise_control.resolve(data)).catch((reason) => promise_control.reject(reason));
     return promise;
 }
 
-export function wrap_as_cancelable<T>(inner: Promise<T>): ControllablePromise<T> {
+export function wrap_as_cancelable<T> (inner: Promise<T>): ControllablePromise<T> {
     const { promise, promise_control } = createPromiseController<T>();
     inner.then((data) => promise_control.resolve(data)).catch((reason) => promise_control.reject(reason));
     return promise;
 }
 
-export function mono_wasm_cancel_promise(task_holder_gc_handle: GCHandle): void {
-    const holder = _lookup_js_owned_object(task_holder_gc_handle) as PromiseHolder;
-    mono_assert(!!holder, () => `Expected Promise for GCHandle ${task_holder_gc_handle}`);
+export function mono_wasm_cancel_promise (task_holder_gc_handle: GCHandle): void {
+    // cancelation should not arrive earlier than the promise created by marshaling in mono_wasm_invoke_jsimport_MT
+    invoke_later_when_on_ui_thread_async(() => mono_wasm_cancel_promise_impl(task_holder_gc_handle));
+}
 
-    const promise = holder.promise;
-    loaderHelpers.assertIsControllablePromise(promise);
-    const promise_control = loaderHelpers.getPromiseController(promise);
-    if (holder.isResolved) {
-        // FIXME: something needs to free the GCHandle
-        mono_log_warn("Canceling a promise that has already resolved.");
+export function mono_wasm_cancel_promise_impl (task_holder_gc_handle: GCHandle): void {
+    if (!loaderHelpers.is_runtime_running()) {
+        mono_log_debug("This promise can't be canceled, mono runtime already exited.");
         return;
     }
-    mono_assert(!holder.isCanceled, "This promise already canceled.");
-    holder.isCanceled = true;
-    promise_control.reject(new Error("OperationCanceledException"));
+    const holder = _lookup_js_owned_object(task_holder_gc_handle) as PromiseHolder;
+    mono_assert(!!holder, () => `Expected Promise for GCHandle ${task_holder_gc_handle}`);
+    holder.cancel();
 }
 
+// NOTE: layout has to match PromiseHolderState in JSHostImplementation.Types.cs
+const enum PromiseHolderState {
+    IsResolving = 0,
+}
+
+const promise_holder_symbol = Symbol.for("wasm promise_holder");
+
+export class PromiseHolder extends ManagedObject {
+    public isResolved = false;
+    public isPosted = false;
+    public isPostponed = false;
+    public data: any = null;
+    public reason: any = undefined;
+    public constructor (public promise: Promise<any>,
+        private gc_handle: GCHandle,
+        private promiseHolderPtr: number, // could be null for GCV_handle
+        private res_converter?: MarshalerToCs) {
+        super();
+    }
+
+    // returns false if the promise is being canceled by another thread in managed code
+    setIsResolving (): boolean {
+        if (!WasmEnableThreads || this.promiseHolderPtr === 0) {
+            return true;
+        }
+        forceThreadMemoryViewRefresh();
+        if (compareExchangeI32(this.promiseHolderPtr + PromiseHolderState.IsResolving, 1, 0) === 0) {
+            return true;
+        }
+        return false;
+    }
+
+    resolve (data: any) {
+        if (!loaderHelpers.is_runtime_running()) {
+            mono_log_debug("This promise resolution can't be propagated to managed code, mono runtime already exited.");
+            return;
+        }
+        mono_assert(!this.isResolved, "resolve could be called only once");
+        mono_assert(!this.isDisposed, "resolve is already disposed.");
+        if (WasmEnableThreads && !this.setIsResolving()) {
+            // we know that cancelation is in flight
+            // because we need to keep the GCHandle alive until until the cancelation arrives
+            // we skip the this resolve and let the cancelation to reject the Task
+            // we store the original data and use it later
+            this.data = data;
+            this.isPostponed = true;
+
+            // but after the promise is resolved, nothing holds the weak reference to the PromiseHolder anymore
+            // we know that cancelation is in flight, so we upgrade the weak reference to strong for the meantime
+            upgrade_managed_proxy_to_strong_ref(this, this.gc_handle);
+            return;
+        }
+        this.isResolved = true;
+        this.complete_task_wrapper(data, null);
+    }
+
+    reject (reason: any) {
+        if (!loaderHelpers.is_runtime_running()) {
+            mono_log_debug("This promise rejection can't be propagated to managed code, mono runtime already exited.");
+            return;
+        }
+        mono_assert(!this.isResolved, "reject could be called only once");
+        mono_assert(!this.isDisposed, "resolve is already disposed.");
+        const isCancelation = reason && reason[promise_holder_symbol] === this;
+        if (WasmEnableThreads && !isCancelation && !this.setIsResolving()) {
+            // we know that cancelation is in flight
+            // because we need to keep the GCHandle alive until until the cancelation arrives
+            // we skip the this reject and let the cancelation to reject the Task
+            // we store the original reason and use it later
+            this.reason = reason;
+            this.isPostponed = true;
+
+            // but after the promise is resolved, nothing holds the weak reference to the PromiseHolder anymore
+            // we know that cancelation is in flight, so we upgrade the weak reference to strong for the meantime
+            upgrade_managed_proxy_to_strong_ref(this, this.gc_handle);
+            return;
+        }
+        this.isResolved = true;
+        this.complete_task_wrapper(null, reason);
+    }
+
+    cancel () {
+        if (!loaderHelpers.is_runtime_running()) {
+            mono_log_debug("This promise cancelation can't be propagated to managed code, mono runtime already exited.");
+            return;
+        }
+        mono_assert(!this.isResolved, "cancel could be called only once");
+        mono_assert(!this.isDisposed, "resolve is already disposed.");
+
+        if (this.isPostponed) {
+            // there was racing resolve/reject which was postponed, to retain valid GCHandle
+            // in this case we just finish the original resolve/reject
+            // and we need to use the postponed data/reason
+            this.isResolved = true;
+            if (this.reason !== undefined) {
+                this.complete_task_wrapper(null, this.reason);
+            } else {
+                this.complete_task_wrapper(this.data, null);
+            }
+        } else {
+            // there is no racing resolve/reject, we can reject/cancel the promise
+            const promise = this.promise;
+            loaderHelpers.assertIsControllablePromise(promise);
+            const promise_control = loaderHelpers.getPromiseController(promise);
+
+            const reason = new Error("OperationCanceledException") as any;
+            reason[promise_holder_symbol] = this;
+            promise_control.reject(reason);
+        }
+    }
+
+    // we can do this just once, because it will be dispose the GCHandle
+    complete_task_wrapper (data: any, reason: any) {
+        try {
+            mono_assert(!this.isPosted, "Promise is already posted to managed.");
+            this.isPosted = true;
+            if (WasmEnableThreads) {
+                forceThreadMemoryViewRefresh();
+            }
+
+            // we can unregister the GC handle just on JS side
+            teardown_managed_proxy(this, this.gc_handle, /*skipManaged: */ true);
+            // order of operations with teardown_managed_proxy matters
+            // so that managed user code running in the continuation could allocate the same GCHandle number and the local registry would be already ok with that
+            complete_task(this.gc_handle, reason, data, this.res_converter || marshal_cs_object_to_cs);
+        } catch (ex) {
+            try {
+                loaderHelpers.mono_exit(1, ex);
+            } catch (ex2) {
+                // there is no point to propagate the exception into the unhandled promise rejection
+            }
+        }
+    }
+}
diff --git a/src/mono/browser/runtime/corebindings.c b/src/mono/browser/runtime/corebindings.c
index 2814b0da915d..485ba3189af6 100644
--- a/src/mono/browser/runtime/corebindings.c
+++ b/src/mono/browser/runtime/corebindings.c
@@ -12,6 +12,8 @@
 #include <mono/metadata/class.h>
 #include <mono/metadata/loader.h>
 #include <mono/metadata/object.h>
+#include <mono/metadata/reflection.h>
+#include <mono/metadata/assembly.h>
 #include <mono/jit/jit.h>
 
 #include "wasm-config.h"
@@ -19,34 +21,40 @@
 
 //JS funcs
 extern void mono_wasm_release_cs_owned_object (int js_handle);
-extern void mono_wasm_resolve_or_reject_promise (void *data);
+extern void mono_wasm_resolve_or_reject_promise (void *args);
 extern void mono_wasm_cancel_promise (int task_holder_gc_handle);
 extern void mono_wasm_console_clear ();
+extern void mono_wasm_set_entrypoint_breakpoint (int entry_point_metadata_token);
+extern void mono_wasm_trace_logger (const char *log_domain, const char *log_level, const char *message, mono_bool fatal, void *user_data);
+extern void mono_wasm_invoke_js_function (int function_js_handle, void *args);
+
+extern int mono_runtime_run_module_cctor (MonoImage *image, MonoError *error);
 
 typedef void (*background_job_cb)(void);
 
+void mono_wasm_bind_assembly_exports (char *assembly_name);
+void mono_wasm_assembly_get_entry_point (char *assembly_name, int auto_insert_breakpoint, MonoMethod **method_out);
+void mono_wasm_get_assembly_export (char *assembly_name, char *namespace, char *classname, char *methodname, MonoMethod **method_out);
+
 #ifndef DISABLE_THREADS
 void mono_wasm_release_cs_owned_object_post (pthread_t target_tid, int js_handle);
-void mono_wasm_resolve_or_reject_promise_post (pthread_t target_tid, void *data);
+void mono_wasm_resolve_or_reject_promise_post (pthread_t target_tid, void *args);
 void mono_wasm_cancel_promise_post (pthread_t target_tid, int task_holder_gc_handle);
 
 extern void mono_wasm_install_js_worker_interop (int context_gc_handle);
+void mono_wasm_install_js_worker_interop_wrapper (int context_gc_handle, void* beforeSyncJSImport, void* afterSyncJSImport, void* pumpHandler);
 extern void mono_wasm_uninstall_js_worker_interop ();
-extern void mono_wasm_bind_cs_function (MonoString **fully_qualified_name, int signature_hash, void* signatures, int *is_exception, MonoObject **result);
-extern void mono_wasm_invoke_import_async (void* args, void* signature);
-void mono_wasm_invoke_import_async_post (pthread_t target_tid, void* args, void* signature);
-extern void mono_wasm_invoke_import_sync (void* args, void* signature);
-void mono_wasm_invoke_import_sync_send (pthread_t target_tid, void* args, void* signature);
-extern void mono_wasm_invoke_js_function (int function_js_handle, void *args);
+extern void mono_wasm_invoke_jsimport_MT (void* signature, void* args);
+void mono_wasm_invoke_jsimport_async_post (pthread_t target_tid, void* signature, void* args);
+void mono_wasm_invoke_jsimport_sync_send (pthread_t target_tid, void* signature, void* args);
 void mono_wasm_invoke_js_function_send (pthread_t target_tid, int function_js_handle, void *args);
 extern void mono_threads_wasm_async_run_in_target_thread_vi (pthread_t target_thread, void (*func) (gpointer), gpointer user_data1);
 extern void mono_threads_wasm_async_run_in_target_thread_vii (pthread_t target_thread, void (*func) (gpointer, gpointer), gpointer user_data1, gpointer user_data2);
-extern void mono_threads_wasm_sync_run_in_target_thread_vii (pthread_t target_thread, void (*func) (gpointer, gpointer), gpointer user_data1, gpointer user_data2);
+extern void mono_threads_wasm_sync_run_in_target_thread_vii (pthread_t target_thread, void (*func) (gpointer, gpointer), gpointer user_data1, gpointer args);
+extern void mono_wasm_warn_about_blocking_wait (void* ptr, int32_t length);
 #else
-extern void mono_wasm_bind_cs_function (MonoString **fully_qualified_name, int signature_hash, void* signatures, int *is_exception, MonoObject **result);
-extern void mono_wasm_bind_js_import (void *signature, int *is_exception, MonoObject **result);
-extern void mono_wasm_invoke_js_import (int function_handle, void *args);
-extern void mono_wasm_invoke_js_function (int function_js_handle, void *args);
+extern void* mono_wasm_bind_js_import_ST (void *signature);
+extern void mono_wasm_invoke_jsimport_ST (int function_handle, void *args);
 #endif /* DISABLE_THREADS */
 
 // HybridGlobalization
@@ -57,6 +65,7 @@ extern mono_bool mono_wasm_starts_with (MonoString **culture, const uint16_t* st
 extern mono_bool mono_wasm_ends_with (MonoString **culture, const uint16_t* str1, int32_t str1Length, const uint16_t* str2, int32_t str2Length, int32_t options, int *is_exception, MonoObject** ex_result);
 extern int mono_wasm_index_of (MonoString **culture, const uint16_t* str1, int32_t str1Length, const uint16_t* str2, int32_t str2Length, int32_t options, mono_bool fromBeginning, int *is_exception, MonoObject** ex_result);
 extern int mono_wasm_get_calendar_info (MonoString **culture, int32_t calendarId, const uint16_t* result, int32_t resultLength, int *is_exception, MonoObject** ex_result);
+extern int mono_wasm_get_locale_info (MonoString **locale, MonoString **culture, const uint16_t* result, int32_t resultLength, int *is_exception, MonoObject** ex_result);
 extern int mono_wasm_get_culture_info (MonoString **culture, const uint16_t* result, int32_t resultLength, int *is_exception, MonoObject** ex_result);
 extern int mono_wasm_get_first_day_of_week (MonoString **culture, int *is_exception, MonoObject** ex_result);
 extern int mono_wasm_get_first_week_of_year (MonoString **culture, int *is_exception, MonoObject** ex_result);
@@ -69,29 +78,28 @@ void bindings_initialize_internals (void)
 #endif /* ENABLE_JS_INTEROP_BY_VALUE */
 
 #ifndef DISABLE_THREADS
-	mono_add_internal_call ("Interop/Runtime::ReleaseCSOwnedObject", mono_wasm_release_cs_owned_object);
 	mono_add_internal_call ("Interop/Runtime::ReleaseCSOwnedObjectPost", mono_wasm_release_cs_owned_object_post);
-	mono_add_internal_call ("Interop/Runtime::ResolveOrRejectPromise", mono_wasm_resolve_or_reject_promise);
 	mono_add_internal_call ("Interop/Runtime::ResolveOrRejectPromisePost", mono_wasm_resolve_or_reject_promise_post);
-	mono_add_internal_call ("Interop/Runtime::InstallWebWorkerInterop", mono_wasm_install_js_worker_interop);
+	mono_add_internal_call ("Interop/Runtime::InstallWebWorkerInterop", mono_wasm_install_js_worker_interop_wrapper);
 	mono_add_internal_call ("Interop/Runtime::UninstallWebWorkerInterop", mono_wasm_uninstall_js_worker_interop);
-	mono_add_internal_call ("Interop/Runtime::BindCSFunction", mono_wasm_bind_cs_function);
-	mono_add_internal_call ("Interop/Runtime::InvokeJSImportSync", mono_wasm_invoke_import_sync);
-	mono_add_internal_call ("Interop/Runtime::InvokeJSImportSyncSend", mono_wasm_invoke_import_sync_send);
-	mono_add_internal_call ("Interop/Runtime::InvokeJSImportAsyncPost", mono_wasm_invoke_import_async_post);
-	mono_add_internal_call ("Interop/Runtime::InvokeJSFunction", mono_wasm_invoke_js_function);
+	mono_add_internal_call ("Interop/Runtime::InvokeJSImportSync", mono_wasm_invoke_jsimport_MT);
+	mono_add_internal_call ("Interop/Runtime::InvokeJSImportSyncSend", mono_wasm_invoke_jsimport_sync_send);
+	mono_add_internal_call ("Interop/Runtime::InvokeJSImportAsyncPost", mono_wasm_invoke_jsimport_async_post);
 	mono_add_internal_call ("Interop/Runtime::InvokeJSFunctionSend", mono_wasm_invoke_js_function_send);
-	mono_add_internal_call ("Interop/Runtime::CancelPromise", mono_wasm_cancel_promise);
 	mono_add_internal_call ("Interop/Runtime::CancelPromisePost", mono_wasm_cancel_promise_post);
+	mono_add_internal_call ("System.Threading.Thread::WarnAboutBlockingWait", mono_wasm_warn_about_blocking_wait);
 #else
+	mono_add_internal_call ("Interop/Runtime::BindJSImportST", mono_wasm_bind_js_import_ST);
+	mono_add_internal_call ("Interop/Runtime::InvokeJSImportST", mono_wasm_invoke_jsimport_ST);
+#endif /* DISABLE_THREADS */
+
 	mono_add_internal_call ("Interop/Runtime::ReleaseCSOwnedObject", mono_wasm_release_cs_owned_object);
 	mono_add_internal_call ("Interop/Runtime::ResolveOrRejectPromise", mono_wasm_resolve_or_reject_promise);
-	mono_add_internal_call ("Interop/Runtime::BindCSFunction", mono_wasm_bind_cs_function);
-	mono_add_internal_call ("Interop/Runtime::BindJSImport", mono_wasm_bind_js_import);
-	mono_add_internal_call ("Interop/Runtime::InvokeJSImport", mono_wasm_invoke_js_import);
 	mono_add_internal_call ("Interop/Runtime::InvokeJSFunction", mono_wasm_invoke_js_function);
 	mono_add_internal_call ("Interop/Runtime::CancelPromise", mono_wasm_cancel_promise);
-#endif /* DISABLE_THREADS */
+	mono_add_internal_call ("Interop/Runtime::AssemblyGetEntryPoint", mono_wasm_assembly_get_entry_point);
+	mono_add_internal_call ("Interop/Runtime::BindAssemblyExports", mono_wasm_bind_assembly_exports);
+	mono_add_internal_call ("Interop/Runtime::GetAssemblyExport", mono_wasm_get_assembly_export);
 
 	mono_add_internal_call ("Interop/JsGlobalization::ChangeCaseInvariant", mono_wasm_change_case_invariant);
 	mono_add_internal_call ("Interop/JsGlobalization::ChangeCase", mono_wasm_change_case);
@@ -100,39 +108,199 @@ void bindings_initialize_internals (void)
 	mono_add_internal_call ("Interop/JsGlobalization::EndsWith", mono_wasm_ends_with);
 	mono_add_internal_call ("Interop/JsGlobalization::IndexOf", mono_wasm_index_of);
 	mono_add_internal_call ("Interop/JsGlobalization::GetCalendarInfo", mono_wasm_get_calendar_info);
+	mono_add_internal_call ("Interop/JsGlobalization::GetLocaleInfo", mono_wasm_get_locale_info);
 	mono_add_internal_call ("Interop/JsGlobalization::GetCultureInfo", mono_wasm_get_culture_info);
 	mono_add_internal_call ("Interop/JsGlobalization::GetFirstDayOfWeek", mono_wasm_get_first_day_of_week);
 	mono_add_internal_call ("Interop/JsGlobalization::GetFirstWeekOfYear", mono_wasm_get_first_week_of_year);
 	mono_add_internal_call ("System.ConsolePal::Clear", mono_wasm_console_clear);
 }
 
+static MonoAssembly* _mono_wasm_assembly_load (char *assembly_name)
+{
+	assert (assembly_name);
+	MonoImageOpenStatus status;
+	MonoAssemblyName* aname = mono_assembly_name_new (assembly_name);
+	assert (aname);
+
+	MonoAssembly *res = mono_assembly_load (aname, NULL, &status);
+	mono_assembly_name_free (aname);
+	free (assembly_name);
+
+	return res;
+}
+
+void mono_wasm_assembly_get_entry_point (char *assembly_name, int auto_insert_breakpoint, MonoMethod **method_out)
+{
+	assert (assembly_name);
+	*method_out = NULL;
+	MonoAssembly* assembly = _mono_wasm_assembly_load (assembly_name);
+	if(!assembly)
+		goto end;
+
+	MonoImage *image;
+	MonoMethod *method = NULL;
+
+	image = mono_assembly_get_image (assembly);
+	uint32_t entry = mono_image_get_entry_point (image);
+	if (!entry)
+		goto end;
+
+	mono_domain_ensure_entry_assembly (mono_get_root_domain (), assembly);
+	method = mono_get_method (image, entry, NULL);
+
+	/*
+	 * If the entry point looks like a compiler generated wrapper around
+	 * an async method in the form "<Name>" then try to look up the async methods
+	 * "<Name>$" and "Name" it could be wrapping.  We do this because the generated
+	 * sync wrapper will call task.GetAwaiter().GetResult() when we actually want
+	 * to yield to the host runtime.
+	 */
+	if (mono_method_get_flags (method, NULL) & 0x0800 /* METHOD_ATTRIBUTE_SPECIAL_NAME */) {
+		const char *name = mono_method_get_name (method);
+		int name_length = strlen (name);
+
+		if ((*name != '<') || (name [name_length - 1] != '>'))
+			goto end;
+
+		MonoClass *klass = mono_method_get_class (method);
+		assert(klass);
+		char *async_name = malloc (name_length + 2);
+		snprintf (async_name, name_length + 2, "%s$", name);
+
+		// look for "<Name>$"
+		MonoMethodSignature *sig = mono_method_get_signature (method, image, mono_method_get_token (method));
+		MonoMethod *async_method = mono_class_get_method_from_name (klass, async_name, mono_signature_get_param_count (sig));
+		if (async_method != NULL) {
+			free (async_name);
+			method = async_method;
+			goto end;
+		}
+
+		// look for "Name" by trimming the first and last character of "<Name>"
+		async_name [name_length - 1] = '\0';
+		async_method = mono_class_get_method_from_name (klass, async_name + 1, mono_signature_get_param_count (sig));
+
+		free (async_name);
+		if (async_method != NULL)
+			method = async_method;
+	}
+
+end:
+	if (auto_insert_breakpoint && method)
+	{
+		mono_wasm_set_entrypoint_breakpoint(mono_method_get_token (method));
+	}
+	*method_out = method;
+}
+
+void mono_wasm_bind_assembly_exports (char *assembly_name)
+{
+	MonoError error;
+	MonoAssembly* assembly;
+	MonoImage *image;
+	MonoClass *klass;
+	MonoMethod *method;
+	PVOLATILE(MonoObject) temp_exc = NULL;
+
+	assert (assembly_name);
+	assembly = _mono_wasm_assembly_load (assembly_name);
+	assert (assembly);
+	image = mono_assembly_get_image (assembly);
+	assert (image);
+
+	klass = mono_class_from_name (image, "System.Runtime.InteropServices.JavaScript", "__GeneratedInitializer");
+	if (klass) {
+		method = mono_class_get_method_from_name (klass, "__Register_", -1);
+		if (method) {
+			mono_runtime_invoke (method, NULL, NULL, (MonoObject **)&temp_exc);
+			if (temp_exc) {
+				PVOLATILE(MonoObject) exc2 = NULL;
+				store_volatile((MonoObject**)&temp_exc, (MonoObject*)mono_object_to_string ((MonoObject*)temp_exc, (MonoObject **)&exc2));
+				if (exc2) {
+					mono_wasm_trace_logger ("jsinterop", "critical", "mono_wasm_bind_assembly_exports unexpected double fault", 1, NULL);
+				} else {
+					mono_wasm_trace_logger ("jsinterop", "critical", mono_string_to_utf8((MonoString*)temp_exc), 1, NULL);
+				}
+				abort ();
+			}
+		}
+	}
+	else if (!mono_runtime_run_module_cctor(image, &error)) {
+		//g_print ("Failed to run module constructor due to %s\n", mono_error_get_message (error));
+	}
+}
+
+void mono_wasm_get_assembly_export (char *assembly_name, char *namespace, char *classname, char *methodname, MonoMethod **method_out)
+{
+	MonoError error;
+	MonoAssembly* assembly;
+	MonoImage *image;
+	MonoClass *klass;
+	MonoMethod *method=NULL;
+	*method_out = NULL;
+
+	assert (assembly_name);
+	assembly = _mono_wasm_assembly_load (assembly_name);
+	assert (assembly);
+	image = mono_assembly_get_image (assembly);
+	assert (image);
+
+	klass = mono_class_from_name (image, namespace, classname);
+	assert (klass);
+	method = mono_class_get_method_from_name (klass, methodname, -1);
+	assert (method);
+
+	*method_out = method;
+	free (namespace);
+	free (classname);
+	free (methodname);
+}
+
 #ifndef DISABLE_THREADS
 
+void* before_sync_js_import;
+void* after_sync_js_import;
+void* synchronization_context_pump_handler;
+
+void mono_wasm_install_js_worker_interop_wrapper (int context_gc_handle, void* beforeSyncJSImport, void* afterSyncJSImport, void* pumpHandler)
+{
+	before_sync_js_import = beforeSyncJSImport;
+	after_sync_js_import = afterSyncJSImport;
+	synchronization_context_pump_handler = pumpHandler;
+	mono_wasm_install_js_worker_interop (context_gc_handle);
+}
+
+// async
 void mono_wasm_release_cs_owned_object_post (pthread_t target_tid, int js_handle)
 {
 	mono_threads_wasm_async_run_in_target_thread_vi (target_tid, (void (*) (gpointer))mono_wasm_release_cs_owned_object, (gpointer)js_handle);
 }
 
+// async
 void mono_wasm_resolve_or_reject_promise_post (pthread_t target_tid, void* args)
 {
 	mono_threads_wasm_async_run_in_target_thread_vi (target_tid, (void (*) (gpointer))mono_wasm_resolve_or_reject_promise, (gpointer)args);
 }
 
+// async
 void mono_wasm_cancel_promise_post (pthread_t target_tid, int task_holder_gc_handle)
 {
 	mono_threads_wasm_async_run_in_target_thread_vi (target_tid, (void (*) (gpointer))mono_wasm_cancel_promise, (gpointer)task_holder_gc_handle);
 }
 
-void mono_wasm_invoke_import_async_post (pthread_t target_tid, void* args, void* signature)
+// async
+void mono_wasm_invoke_jsimport_async_post (pthread_t target_tid, void* signature, void* args)
 {
-	mono_threads_wasm_async_run_in_target_thread_vii (target_tid, (void (*) (gpointer, gpointer))mono_wasm_invoke_import_async, (gpointer)args, (gpointer)signature);
+	mono_threads_wasm_async_run_in_target_thread_vii (target_tid, (void (*) (gpointer, gpointer))mono_wasm_invoke_jsimport_MT, (gpointer)signature, (gpointer)args);
 }
 
-void mono_wasm_invoke_import_sync_send (pthread_t target_tid, void* args, void* signature)
+// sync
+void mono_wasm_invoke_jsimport_sync_send (pthread_t target_tid, void* signature, void* args)
 {
-	mono_threads_wasm_sync_run_in_target_thread_vii (target_tid, (void (*) (gpointer, gpointer))mono_wasm_invoke_import_sync, (gpointer)args, (gpointer)signature);
+	mono_threads_wasm_sync_run_in_target_thread_vii (target_tid, (void (*) (gpointer, gpointer))mono_wasm_invoke_jsimport_MT, (gpointer)signature, (gpointer)args);
 }
 
+// sync
 void mono_wasm_invoke_js_function_send (pthread_t target_tid, int function_js_handle, void *args)
 {
 	mono_threads_wasm_sync_run_in_target_thread_vii (target_tid, (void (*) (gpointer, gpointer))mono_wasm_invoke_js_function, (gpointer)function_js_handle, (gpointer)args);
diff --git a/src/mono/browser/runtime/crypto.ts b/src/mono/browser/runtime/crypto.ts
index f396b5d833b1..a6642d874ffc 100644
--- a/src/mono/browser/runtime/crypto.ts
+++ b/src/mono/browser/runtime/crypto.ts
@@ -5,7 +5,7 @@ import { isSharedArrayBuffer, localHeapViewU8 } from "./memory";
 // https://www.w3.org/TR/WebCryptoAPI/#Crypto-method-getRandomValues
 const batchedQuotaMax = 65536;
 
-export function mono_wasm_browser_entropy(bufferPtr: number, bufferLength: number): number {
+export function mono_wasm_browser_entropy (bufferPtr: number, bufferLength: number): number {
     if (!globalThis.crypto || !globalThis.crypto.getRandomValues) {
         return -1;
     }
diff --git a/src/mono/browser/runtime/cuint64.ts b/src/mono/browser/runtime/cuint64.ts
index 558c230265e2..20ab2c88ca42 100644
--- a/src/mono/browser/runtime/cuint64.ts
+++ b/src/mono/browser/runtime/cuint64.ts
@@ -6,11 +6,11 @@
 /// and 'import type { CUInt64 } from './cuint64';
 export type CUInt64 = readonly [number, number];
 
-export function toBigInt(x: CUInt64): bigint {
+export function toBigInt (x: CUInt64): bigint {
     return BigInt(x[0]) | BigInt(x[1]) << BigInt(32);
 }
 
-export function fromBigInt(x: bigint): CUInt64 {
+export function fromBigInt (x: bigint): CUInt64 {
     if (x < BigInt(0))
         throw new Error(`${x} is not a valid 64 bit integer`);
     if (x > BigInt(0xFFFFFFFFFFFFFFFF))
@@ -20,11 +20,11 @@ export function fromBigInt(x: bigint): CUInt64 {
     return [low, high];
 }
 
-export function dangerousToNumber(x: CUInt64): number {
+export function dangerousToNumber (x: CUInt64): number {
     return x[0] | x[1] << 32;
 }
 
-export function fromNumber(x: number): CUInt64 {
+export function fromNumber (x: number): CUInt64 {
     if (x < 0)
         throw new Error(`${x} is not a valid 64 bit integer`);
     if ((x >> 32) > 0xFFFFFFFF)
@@ -34,11 +34,11 @@ export function fromNumber(x: number): CUInt64 {
     return [x & 0xFFFFFFFF, x >> 32];
 }
 
-export function pack32(lo: number, hi: number): CUInt64 {
+export function pack32 (lo: number, hi: number): CUInt64 {
     return [lo, hi];
 }
 
-export function unpack32(x: CUInt64): [number, number] {
+export function unpack32 (x: CUInt64): [number, number] {
     return [x[0], x[1]];
 }
 
diff --git a/src/mono/browser/runtime/cwraps.ts b/src/mono/browser/runtime/cwraps.ts
index ba92cac4b1dc..c63065cd469b 100644
--- a/src/mono/browser/runtime/cwraps.ts
+++ b/src/mono/browser/runtime/cwraps.ts
@@ -8,7 +8,7 @@ import WasmEnableThreads from "consts:wasmEnableThreads";
 import type {
     MonoAssembly, MonoClass,
     MonoMethod, MonoObject,
-    MonoType, MonoObjectRef, MonoStringRef, JSMarshalerArguments
+    MonoType, MonoObjectRef, MonoStringRef, JSMarshalerArguments, PThreadPtr
 } from "./types/internal";
 import type { VoidPtr, CharPtrPtr, Int32Ptr, CharPtr, ManagedPointer } from "./types/emscripten";
 import { Module, runtimeHelpers } from "./globals";
@@ -27,6 +27,16 @@ const threading_cwraps: SigLine[] = WasmEnableThreads ? [
     [true, "mono_wasm_diagnostic_server_post_resume_runtime", "void", []],
     [true, "mono_wasm_diagnostic_server_create_stream", "number", []],
     [false, "mono_wasm_init_finalizer_thread", null, []],
+    [false, "mono_wasm_invoke_jsexport_async_post", "void", ["number", "number", "number"]],
+    [false, "mono_wasm_invoke_jsexport_sync_send", "void", ["number", "number", "number"]],
+    [false, "mono_wasm_invoke_jsexport_sync", "void", ["number", "number"]],
+    [true, "mono_wasm_create_deputy_thread", "number", []],
+    [true, "mono_wasm_create_io_thread", "number", []],
+    [true, "mono_wasm_register_ui_thread", "void", []],
+    [true, "mono_wasm_register_io_thread", "void", []],
+    [true, "mono_wasm_print_thread_dump", "void", []],
+    [true, "mono_wasm_synchronization_context_pump", "void", []],
+    [true, "mono_threads_wasm_sync_run_in_target_thread_done", "void", ["number"]],
 ] : [];
 
 // when the method is assigned/cached at usage, instead of being invoked directly from cwraps, it can't be marked lazy, because it would be re-bound on each call
@@ -50,12 +60,9 @@ const fn_signatures: SigLine[] = [
 
     [true, "mono_wasm_assembly_load", "number", ["string"]],
     [true, "mono_wasm_assembly_find_class", "number", ["number", "string", "string"]],
-    [true, "mono_wasm_runtime_run_module_cctor", "void", ["number"]],
     [true, "mono_wasm_assembly_find_method", "number", ["number", "string", "number"]],
-    [false, "mono_wasm_invoke_method_ref", "void", ["number", "number", "number", "number", "number"]],
     [true, "mono_wasm_string_from_utf16_ref", "void", ["number", "number", "number"]],
     [true, "mono_wasm_intern_string_ref", "void", ["number"]],
-    [true, "mono_wasm_assembly_get_entry_point", "number", ["number", "number"]],
 
     [false, "mono_wasm_exit", "void", ["number"]],
     [false, "mono_wasm_abort", "void", []],
@@ -66,8 +73,7 @@ const fn_signatures: SigLine[] = [
     [() => !runtimeHelpers.emscriptenBuildOptions.enableBrowserProfiler, "mono_wasm_profiler_init_aot", "void", ["string"]],
     [true, "mono_wasm_profiler_init_browser", "void", ["number"]],
     [false, "mono_wasm_exec_regression", "number", ["number", "string"]],
-    [false, "mono_wasm_invoke_method_bound", "number", ["number", "number", "number"]],
-    [false, "mono_wasm_invoke_method_raw", "number", ["number", "number"]],
+    [false, "mono_wasm_invoke_jsexport", "void", ["number", "number"]],
     [true, "mono_wasm_write_managed_pointer_unsafe", "void", ["number", "number"]],
     [true, "mono_wasm_copy_managed_pointer", "void", ["number", "number"]],
     [true, "mono_wasm_i52_to_f64", "number", ["number", "number"]],
@@ -146,6 +152,16 @@ export interface t_ThreadingCwraps {
     mono_wasm_diagnostic_server_post_resume_runtime(): void;
     mono_wasm_diagnostic_server_create_stream(): VoidPtr;
     mono_wasm_init_finalizer_thread(): void;
+    mono_wasm_invoke_jsexport_async_post(targetTID: PThreadPtr, method: MonoMethod, args: VoidPtr): void;
+    mono_wasm_invoke_jsexport_sync_send(targetTID: PThreadPtr, method: MonoMethod, args: VoidPtr): void;
+    mono_wasm_invoke_jsexport_sync(method: MonoMethod, args: VoidPtr): void;
+    mono_wasm_create_deputy_thread(): PThreadPtr;
+    mono_wasm_create_io_thread(): PThreadPtr;
+    mono_wasm_register_ui_thread(): void;
+    mono_wasm_register_io_thread(): void;
+    mono_wasm_print_thread_dump(): void;
+    mono_wasm_synchronization_context_pump(): void;
+    mono_threads_wasm_sync_run_in_target_thread_done(sem: VoidPtr): void;
 }
 
 export interface t_ProfilerCwraps {
@@ -168,15 +184,13 @@ export interface t_Cwraps {
     mono_wasm_load_icu_data(offset: VoidPtr): number;
     mono_wasm_add_assembly(name: string, data: VoidPtr, size: number): number;
     mono_wasm_add_satellite_assembly(name: string, culture: string, data: VoidPtr, size: number): void;
-    mono_wasm_load_runtime(unused: string, debugLevel: number): void;
+    mono_wasm_load_runtime(debugLevel: number): void;
     mono_wasm_change_debugger_log_level(value: number): void;
 
     mono_wasm_assembly_load(name: string): MonoAssembly;
     mono_wasm_assembly_find_class(assembly: MonoAssembly, namespace: string, name: string): MonoClass;
     mono_wasm_assembly_find_method(klass: MonoClass, name: string, args: number): MonoMethod;
-    mono_wasm_invoke_method_ref(method: MonoMethod, this_arg: MonoObjectRef, params: VoidPtr, out_exc: MonoObjectRef, out_result: MonoObjectRef): void;
     mono_wasm_string_from_utf16_ref(str: CharPtr, len: number, result: MonoObjectRef): void;
-    mono_wasm_assembly_get_entry_point(assembly: MonoAssembly, idx: number): MonoMethod;
     mono_wasm_intern_string_ref(strRef: MonoStringRef): void;
 
     mono_wasm_exit(exit_code: number): void;
@@ -184,15 +198,13 @@ export interface t_Cwraps {
     mono_wasm_getenv(name: string): CharPtr;
     mono_wasm_set_main_args(argc: number, argv: VoidPtr): void;
     mono_wasm_exec_regression(verbose_level: number, image: string): number;
-    mono_wasm_invoke_method_bound(method: MonoMethod, args: JSMarshalerArguments, fail: MonoStringRef): number;
-    mono_wasm_invoke_method_raw(method: MonoMethod, fail: MonoStringRef): number;
+    mono_wasm_invoke_jsexport(method: MonoMethod, args: JSMarshalerArguments): void;
     mono_wasm_write_managed_pointer_unsafe(destination: VoidPtr | MonoObjectRef, pointer: ManagedPointer): void;
     mono_wasm_copy_managed_pointer(destination: VoidPtr | MonoObjectRef, source: VoidPtr | MonoObjectRef): void;
     mono_wasm_i52_to_f64(source: VoidPtr, error: Int32Ptr): number;
     mono_wasm_u52_to_f64(source: VoidPtr, error: Int32Ptr): number;
     mono_wasm_f64_to_i52(destination: VoidPtr, value: number): I52Error;
     mono_wasm_f64_to_u52(destination: VoidPtr, value: number): I52Error;
-    mono_wasm_runtime_run_module_cctor(assembly: MonoAssembly): void;
     mono_wasm_method_get_name(method: MonoMethod): CharPtr;
     mono_wasm_method_get_full_name(method: MonoMethod): CharPtr;
     mono_wasm_gc_lock(): void;
@@ -275,7 +287,7 @@ export const enum I52Error {
 
 const fastCwrapTypes = ["void", "number", null];
 
-function cwrap(name: string, returnType: string | null, argTypes: string[] | undefined, opts: any): Function {
+function cwrap (name: string, returnType: string | null, argTypes: string[] | undefined, opts: any): Function {
     // Attempt to bypass emscripten's generated wrapper if it is safe to do so
     let fce =
         // Special cwrap options disable the fast path
@@ -306,7 +318,7 @@ function cwrap(name: string, returnType: string | null, argTypes: string[] | und
     return fce;
 }
 
-export function init_c_exports(): void {
+export function init_c_exports (): void {
     const fns = NativeAOT ? [] : [...fn_signatures];
 
     for (const sig of fns) {
diff --git a/src/mono/browser/runtime/debug.ts b/src/mono/browser/runtime/debug.ts
index 76562bc8f249..99963d964f00 100644
--- a/src/mono/browser/runtime/debug.ts
+++ b/src/mono/browser/runtime/debug.ts
@@ -1,8 +1,7 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import BuildConfiguration from "consts:configuration";
-import { INTERNAL, Module, runtimeHelpers } from "./globals";
+import { INTERNAL, Module, loaderHelpers, runtimeHelpers } from "./globals";
 import { toBase64StringImpl } from "./base64";
 import cwraps from "./cwraps";
 import { VoidPtr, CharPtr } from "./types/emscripten";
@@ -10,7 +9,9 @@ import { mono_log_warn } from "./logging";
 import { forceThreadMemoryViewRefresh, localHeapViewU8 } from "./memory";
 import { utf8ToString } from "./strings";
 const commands_received: any = new Map<number, CommandResponse>();
-commands_received.remove = function (key: number): CommandResponse { const value = this.get(key); this.delete(key); return value; };
+commands_received.remove = function (key: number): CommandResponse {
+    const value = this.get(key); this.delete(key); return value;
+};
 let _call_function_res_cache: any = {};
 let _next_call_function_res_id = 0;
 let _debugger_buffer_len = -1;
@@ -18,7 +19,7 @@ let _debugger_buffer: VoidPtr;
 let _assembly_name_str: string; //keep this variable, it's used by BrowserDebugProxy
 let _entrypoint_method_token: number; //keep this variable, it's used by BrowserDebugProxy
 
-export function mono_wasm_runtime_ready(): void {
+export function mono_wasm_runtime_ready (): void {
     INTERNAL.mono_wasm_runtime_is_ready = runtimeHelpers.mono_wasm_runtime_is_ready = true;
 
     // FIXME: where should this go?
@@ -32,7 +33,7 @@ export function mono_wasm_runtime_ready(): void {
         debugger;
 }
 
-export function mono_wasm_fire_debugger_agent_message_with_data_to_pause(base64String: string): void {
+export function mono_wasm_fire_debugger_agent_message_with_data_to_pause (base64String: string): void {
     //keep this console.assert, otherwise optimization will remove the assignments
     // eslint-disable-next-line no-console
     console.assert(true, `mono_wasm_fire_debugger_agent_message_with_data ${base64String}`);
@@ -40,12 +41,12 @@ export function mono_wasm_fire_debugger_agent_message_with_data_to_pause(base64S
     debugger;
 }
 
-export function mono_wasm_fire_debugger_agent_message_with_data(data: number, len: number): void {
+export function mono_wasm_fire_debugger_agent_message_with_data (data: number, len: number): void {
     const base64String = toBase64StringImpl(new Uint8Array(localHeapViewU8().buffer, data, len));
     mono_wasm_fire_debugger_agent_message_with_data_to_pause(base64String);
 }
 
-export function mono_wasm_add_dbg_command_received(res_ok: boolean, id: number, buffer: number, buffer_len: number): void {
+export function mono_wasm_add_dbg_command_received (res_ok: boolean, id: number, buffer: number, buffer_len: number): void {
     const dbg_command = new Uint8Array(localHeapViewU8().buffer, buffer, buffer_len);
     const base64String = toBase64StringImpl(dbg_command);
     const buffer_obj = {
@@ -60,7 +61,7 @@ export function mono_wasm_add_dbg_command_received(res_ok: boolean, id: number,
     commands_received.set(id, buffer_obj);
 }
 
-function mono_wasm_malloc_and_set_debug_buffer(command_parameters: string) {
+function mono_wasm_malloc_and_set_debug_buffer (command_parameters: string) {
     if (command_parameters.length > _debugger_buffer_len) {
         if (_debugger_buffer)
             Module._free(_debugger_buffer);
@@ -74,7 +75,7 @@ function mono_wasm_malloc_and_set_debug_buffer(command_parameters: string) {
     }
 }
 
-export function mono_wasm_send_dbg_command_with_parms(id: number, command_set: number, command: number, command_parameters: string, length: number, valtype: number, newvalue: number): CommandResponseResult {
+export function mono_wasm_send_dbg_command_with_parms (id: number, command_set: number, command: number, command_parameters: string, length: number, valtype: number, newvalue: number): CommandResponseResult {
     forceThreadMemoryViewRefresh();
 
     mono_wasm_malloc_and_set_debug_buffer(command_parameters);
@@ -82,11 +83,11 @@ export function mono_wasm_send_dbg_command_with_parms(id: number, command_set: n
 
     const { res_ok, res } = commands_received.remove(id);
     if (!res_ok)
-        throw new Error("Failed on mono_wasm_invoke_method_debugger_agent_with_parms");
+        throw new Error("Failed on mono_wasm_send_dbg_command_with_parms");
     return res;
 }
 
-export function mono_wasm_send_dbg_command(id: number, command_set: number, command: number, command_parameters: string): CommandResponseResult {
+export function mono_wasm_send_dbg_command (id: number, command_set: number, command: number, command_parameters: string): CommandResponseResult {
     forceThreadMemoryViewRefresh();
 
     mono_wasm_malloc_and_set_debug_buffer(command_parameters);
@@ -100,7 +101,7 @@ export function mono_wasm_send_dbg_command(id: number, command_set: number, comm
 
 }
 
-export function mono_wasm_get_dbg_command_info(): CommandResponseResult {
+export function mono_wasm_get_dbg_command_info (): CommandResponseResult {
     const { res_ok, res } = commands_received.remove(0);
 
     if (!res_ok)
@@ -108,16 +109,16 @@ export function mono_wasm_get_dbg_command_info(): CommandResponseResult {
     return res;
 }
 
-export function mono_wasm_debugger_resume(): void {
+export function mono_wasm_debugger_resume (): void {
     forceThreadMemoryViewRefresh();
 }
 
-export function mono_wasm_detach_debugger(): void {
+export function mono_wasm_detach_debugger (): void {
     forceThreadMemoryViewRefresh();
     cwraps.mono_wasm_set_is_debugger_attached(false);
 }
 
-export function mono_wasm_change_debugger_log_level(level: number): void {
+export function mono_wasm_change_debugger_log_level (level: number): void {
     forceThreadMemoryViewRefresh();
     cwraps.mono_wasm_change_debugger_log_level(level);
 }
@@ -125,7 +126,7 @@ export function mono_wasm_change_debugger_log_level(level: number): void {
 /**
  * Raises an event for the debug proxy
  */
-export function mono_wasm_raise_debug_event(event: WasmEvent, args = {}): void {
+export function mono_wasm_raise_debug_event (event: WasmEvent, args = {}): void {
     if (typeof event !== "object")
         throw new Error(`event must be an object, but got ${JSON.stringify(event)}`);
 
@@ -139,7 +140,7 @@ export function mono_wasm_raise_debug_event(event: WasmEvent, args = {}): void {
     console.debug("mono_wasm_debug_event_raised:aef14bca-5519-4dfe-b35a-f867abc123ae", JSON.stringify(event), JSON.stringify(args));
 }
 
-export function mono_wasm_wait_for_debugger(): Promise<void> {
+export function mono_wasm_wait_for_debugger (): Promise<void> {
     return new Promise<void>((resolve) => {
         const interval = setInterval(() => {
             if (runtimeHelpers.waitForDebugger != 1) {
@@ -151,16 +152,16 @@ export function mono_wasm_wait_for_debugger(): Promise<void> {
     });
 }
 
-export function mono_wasm_debugger_attached(): void {
+export function mono_wasm_debugger_attached (): void {
     if (runtimeHelpers.waitForDebugger == -1)
         runtimeHelpers.waitForDebugger = 1;
     forceThreadMemoryViewRefresh();
     cwraps.mono_wasm_set_is_debugger_attached(true);
 }
 
-export function mono_wasm_set_entrypoint_breakpoint(assembly_name: CharPtr, entrypoint_method_token: number): void {
+export function mono_wasm_set_entrypoint_breakpoint (entrypoint_method_token: number): void {
     //keep these assignments, these values are used by BrowserDebugProxy
-    _assembly_name_str = utf8ToString(assembly_name).concat(".dll");
+    _assembly_name_str = loaderHelpers.config.mainAssemblyName + ".dll";
     _entrypoint_method_token = entrypoint_method_token;
     //keep this console.assert, otherwise optimization will remove the assignments
     // eslint-disable-next-line no-console
@@ -171,7 +172,7 @@ export function mono_wasm_set_entrypoint_breakpoint(assembly_name: CharPtr, entr
     forceThreadMemoryViewRefresh();
 }
 
-function _create_proxy_from_object_id(objectId: string, details: any) {
+function _create_proxy_from_object_id (objectId: string, details: any) {
     if (objectId.startsWith("dotnet:array:")) {
         let ret: Array<any>;
         if (details.items === undefined) {
@@ -191,7 +192,7 @@ function _create_proxy_from_object_id(objectId: string, details: any) {
             Object.defineProperty(proxy,
                 prop.name,
                 {
-                    get() {
+                    get () {
                         return mono_wasm_send_dbg_command(prop.get.id, prop.get.commandSet, prop.get.command, prop.get.buffer);
                     },
                     set: function (newValue) {
@@ -203,7 +204,7 @@ function _create_proxy_from_object_id(objectId: string, details: any) {
             Object.defineProperty(proxy,
                 prop.name,
                 {
-                    get() {
+                    get () {
                         return prop.value;
                     },
                     set: function (newValue) {
@@ -218,7 +219,7 @@ function _create_proxy_from_object_id(objectId: string, details: any) {
     return proxy;
 }
 
-export function mono_wasm_call_function_on(request: CallRequest): CFOResponse {
+export function mono_wasm_call_function_on (request: CallRequest): CFOResponse {
     forceThreadMemoryViewRefresh();
 
     if (request.arguments != undefined && !Array.isArray(request.arguments))
@@ -277,7 +278,7 @@ export function mono_wasm_call_function_on(request: CallRequest): CFOResponse {
     return { type: "object", className: "Object", description: "Object", objectId: fn_res_id };
 }
 
-function _get_cfo_res_details(objectId: string, args: any): ValueAsJsonString {
+function _get_cfo_res_details (objectId: string, args: any): ValueAsJsonString {
     if (!(objectId in _call_function_res_cache))
         throw new Error(`Could not find any object with id ${objectId}`);
 
@@ -339,23 +340,23 @@ type ValueAsJsonString = {
     __value_as_json_string__: string;
 }
 
-export function mono_wasm_get_details(objectId: string, args = {}): ValueAsJsonString {
+export function mono_wasm_get_details (objectId: string, args = {}): ValueAsJsonString {
     forceThreadMemoryViewRefresh();
     return _get_cfo_res_details(`dotnet:cfo_res:${objectId}`, args);
 }
 
-function _cache_call_function_res(obj: any) {
+function _cache_call_function_res (obj: any) {
     const id = `dotnet:cfo_res:${_next_call_function_res_id++}`;
     _call_function_res_cache[id] = obj;
     return id;
 }
 
-export function mono_wasm_release_object(objectId: string): void {
+export function mono_wasm_release_object (objectId: string): void {
     if (objectId in _call_function_res_cache)
         delete _call_function_res_cache[objectId];
 }
 
-export function mono_wasm_debugger_log(level: number, message_ptr: CharPtr): void {
+export function mono_wasm_debugger_log (level: number, message_ptr: CharPtr): void {
     forceThreadMemoryViewRefresh();
     const message = utf8ToString(message_ptr);
 
@@ -363,11 +364,6 @@ export function mono_wasm_debugger_log(level: number, message_ptr: CharPtr): voi
         INTERNAL.logging.debugger(level, message);
         return;
     }
-
-    if (BuildConfiguration === "Debug") {
-        // eslint-disable-next-line no-console
-        console.debug(`Debugger.Debug: ${message}`);
-    }
 }
 
 type CallDetails = {
diff --git a/src/mono/browser/runtime/diagnostics/browser/controller.ts b/src/mono/browser/runtime/diagnostics/browser/controller.ts
index 8cc60f7742c4..0119a9fda984 100644
--- a/src/mono/browser/runtime/diagnostics/browser/controller.ts
+++ b/src/mono/browser/runtime/diagnostics/browser/controller.ts
@@ -7,9 +7,10 @@ import { threads_c_functions as cwraps } from "../../cwraps";
 import { INTERNAL, mono_assert } from "../../globals";
 import { mono_log_info, mono_log_debug, mono_log_warn } from "../../logging";
 import { withStackAlloc, getI32 } from "../../memory";
-import { Thread, waitForThread } from "../../pthreads/browser";
+import { waitForThread } from "../../pthreads";
 import { isDiagnosticMessage, makeDiagnosticServerControlCommand } from "../shared/controller-commands";
 import monoDiagnosticsMock from "consts:monoDiagnosticsMock";
+import { PThreadPtr, Thread } from "../../types/internal";
 
 /// An object that can be used to control the diagnostic server.
 export interface ServerController {
@@ -17,23 +18,23 @@ export interface ServerController {
 }
 
 class ServerControllerImpl implements ServerController {
-    constructor(private server: Thread) {
+    constructor (private server: Thread) {
         server.port.addEventListener("message", this.onServerReply.bind(this));
     }
-    start(): void {
+    start (): void {
         mono_log_debug("signaling the diagnostic server to start");
         this.server.postMessageToWorker(makeDiagnosticServerControlCommand("start"));
     }
-    stop(): void {
+    stop (): void {
         mono_log_debug("signaling the diagnostic server to stop");
         this.server.postMessageToWorker(makeDiagnosticServerControlCommand("stop"));
     }
-    postServerAttachToRuntime(): void {
+    postServerAttachToRuntime (): void {
         mono_log_debug("signal the diagnostic server to attach to the runtime");
         this.server.postMessageToWorker(makeDiagnosticServerControlCommand("attach_to_runtime"));
     }
 
-    onServerReply(event: MessageEvent): void {
+    onServerReply (event: MessageEvent): void {
         const d = event.data;
         if (isDiagnosticMessage(d)) {
             switch (d.cmd) {
@@ -47,20 +48,20 @@ class ServerControllerImpl implements ServerController {
 
 let serverController: ServerController | null = null;
 
-export function getController(): ServerController {
+export function getController (): ServerController {
     if (serverController)
         return serverController;
     throw new Error("unexpected no server controller");
 }
 
-export async function startDiagnosticServer(websocket_url: string): Promise<ServerController | null> {
+export async function startDiagnosticServer (websocket_url: string): Promise<ServerController | null> {
     mono_assert(WasmEnableThreads, "The diagnostic server requires threads to be enabled during build time.");
     const sizeOfPthreadT = 4;
     mono_log_info(`starting the diagnostic server url: ${websocket_url}`);
-    const result: number | undefined = withStackAlloc(sizeOfPthreadT, (pthreadIdPtr) => {
+    const result: PThreadPtr | undefined = withStackAlloc(sizeOfPthreadT, (pthreadIdPtr) => {
         if (!cwraps.mono_wasm_diagnostic_server_create_thread(websocket_url, pthreadIdPtr))
             return undefined;
-        const pthreadId = getI32(pthreadIdPtr);
+        const pthreadId = getI32(pthreadIdPtr) as any as PThreadPtr;
         return pthreadId;
     });
     if (result === undefined) {
diff --git a/src/mono/browser/runtime/diagnostics/index.ts b/src/mono/browser/runtime/diagnostics/index.ts
index 474fd8a824fd..f8f93f6781a0 100644
--- a/src/mono/browser/runtime/diagnostics/index.ts
+++ b/src/mono/browser/runtime/diagnostics/index.ts
@@ -15,7 +15,7 @@ import { mono_assert, runtimeHelpers } from "../globals";
 
 
 // called from C on the main thread
-export function mono_wasm_event_pipe_early_startup_callback(): void {
+export function mono_wasm_event_pipe_early_startup_callback (): void {
     if (WasmEnableThreads) {
         return;
     }
@@ -36,13 +36,10 @@ let diagnosticsServerEnabled = false;
 
 let diagnosticsInitialized = false;
 
-export async function mono_wasm_init_diagnostics(): Promise<void> {
-    if (diagnosticsInitialized)
-        return;
-    if (!WasmEnableThreads) {
-        mono_log_warn("ignoring diagnostics options because this runtime does not support diagnostics");
-        return;
-    }
+export async function mono_wasm_init_diagnostics (): Promise<void> {
+    if (!WasmEnableThreads) return;
+    if (diagnosticsInitialized) return;
+
     const options = diagnostic_options_from_environment();
     if (!options)
         return;
@@ -63,7 +60,7 @@ export async function mono_wasm_init_diagnostics(): Promise<void> {
     }
 }
 
-function boolsyOption(x: string | boolean): boolean {
+function boolsyOption (x: string | boolean): boolean {
     if (x === true || x === false)
         return x;
     if (typeof x === "string") {
@@ -80,7 +77,7 @@ function boolsyOption(x: string | boolean): boolean {
 /// The environment variables are:
 ///  * DOTNET_DiagnosticPorts
 ///
-function diagnostic_options_from_environment(): DiagnosticOptions | null {
+function diagnostic_options_from_environment (): DiagnosticOptions | null {
     const val = runtimeHelpers.config.environmentVariables ? runtimeHelpers.config.environmentVariables["DOTNET_DiagnosticPorts"] : undefined;
     if (is_nullish(val))
         return null;
@@ -91,7 +88,7 @@ function diagnostic_options_from_environment(): DiagnosticOptions | null {
 
 /// Parse a DOTNET_DiagnosticPorts string and return a DiagnosticOptions object.
 /// See https://docs.microsoft.com/en-us/dotnet/core/diagnostics/diagnostic-port#configure-additional-diagnostic-ports
-function diagnostic_options_from_ports_spec(val: string): DiagnosticOptions | null {
+function diagnostic_options_from_ports_spec (val: string): DiagnosticOptions | null {
     if (val === "")
         return null;
     const ports = val.split(";");
@@ -143,7 +140,7 @@ function diagnostic_options_from_ports_spec(val: string): DiagnosticOptions | nu
 
 }
 
-export function mono_wasm_diagnostic_server_on_runtime_server_init(out_options: VoidPtr): void {
+export function mono_wasm_diagnostic_server_on_runtime_server_init (out_options: VoidPtr): void {
     mono_assert(WasmEnableThreads, "The diagnostic server requires threads to be enabled during build time.");
     if (diagnosticsServerEnabled) {
         /* called on the main thread when the runtime is sufficiently initialized */
diff --git a/src/mono/browser/runtime/diagnostics/mock/environment.ts b/src/mono/browser/runtime/diagnostics/mock/environment.ts
index bcbdf390a4df..7e2a8a591863 100644
--- a/src/mono/browser/runtime/diagnostics/mock/environment.ts
+++ b/src/mono/browser/runtime/diagnostics/mock/environment.ts
@@ -6,11 +6,11 @@ import type { FilterPredicate, MockEnvironment } from "./types";
 import Serializer from "../server_pthread/ipc-protocol/base-serializer";
 import { CommandSetId, EventPipeCommandId, ProcessCommandId } from "../server_pthread/ipc-protocol/types";
 import { assertNever } from "../../types/internal";
-import { pthread_self } from "../../pthreads/worker";
+import { pthread_self } from "../../pthreads";
 import { createPromiseController, mono_assert } from "../../globals";
 
 
-function expectAdvertise(data: ArrayBuffer): boolean {
+function expectAdvertise (data: ArrayBuffer): boolean {
     if (typeof (data) === "string") {
         assertNever(data);
     } else {
@@ -21,7 +21,7 @@ function expectAdvertise(data: ArrayBuffer): boolean {
     }
 }
 
-function expectOk(payloadLength?: number): FilterPredicate {
+function expectOk (payloadLength?: number): FilterPredicate {
     return (data) => {
         if (typeof (data) === "string") {
             assertNever(data);
@@ -33,7 +33,7 @@ function expectOk(payloadLength?: number): FilterPredicate {
     };
 }
 
-function extractOkSessionID(data: ArrayBuffer): number {
+function extractOkSessionID (data: ArrayBuffer): number {
     if (typeof (data) === "string") {
         assertNever(data);
     } else {
@@ -45,13 +45,13 @@ function extractOkSessionID(data: ArrayBuffer): number {
     }
 }
 
-function computeStringByteLength(s: string | null): number {
+function computeStringByteLength (s: string | null): number {
     if (s === undefined || s === null || s === "")
         return 4; // just length of zero
     return 4 + 2 * s.length + 2; // length + UTF16 + null
 }
 
-function computeCollectTracing2PayloadByteLength(payload: RemoveCommandSetAndId<EventPipeCommandCollectTracing2>): number {
+function computeCollectTracing2PayloadByteLength (payload: RemoveCommandSetAndId<EventPipeCommandCollectTracing2>): number {
     let len = 0;
     len += 4; // circularBufferMB
     len += 4; // format
@@ -66,7 +66,7 @@ function computeCollectTracing2PayloadByteLength(payload: RemoveCommandSetAndId<
     return len;
 }
 
-function makeEventPipeCollectTracing2(payload: RemoveCommandSetAndId<EventPipeCommandCollectTracing2>): Uint8Array {
+function makeEventPipeCollectTracing2 (payload: RemoveCommandSetAndId<EventPipeCommandCollectTracing2>): Uint8Array {
     const payloadLength = computeCollectTracing2PayloadByteLength(payload);
     const messageLength = Serializer.computeMessageByteLength(payloadLength);
     const buffer = new Uint8Array(messageLength);
@@ -85,7 +85,7 @@ function makeEventPipeCollectTracing2(payload: RemoveCommandSetAndId<EventPipeCo
     return buffer;
 }
 
-function makeEventPipeStopTracing(payload: RemoveCommandSetAndId<EventPipeCommandStopTracing>): Uint8Array {
+function makeEventPipeStopTracing (payload: RemoveCommandSetAndId<EventPipeCommandStopTracing>): Uint8Array {
     const payloadLength = 8;
     const messageLength = Serializer.computeMessageByteLength(payloadLength);
     const buffer = new Uint8Array(messageLength);
@@ -96,7 +96,7 @@ function makeEventPipeStopTracing(payload: RemoveCommandSetAndId<EventPipeComman
     return buffer;
 }
 
-function makeProcessResumeRuntime(): Uint8Array {
+function makeProcessResumeRuntime (): Uint8Array {
     const payloadLength = 0;
     const messageLength = Serializer.computeMessageByteLength(payloadLength);
     const buffer = new Uint8Array(messageLength);
@@ -105,20 +105,20 @@ function makeProcessResumeRuntime(): Uint8Array {
     return buffer;
 }
 
-function postMessageToBrowser(message: any, transferable?: Transferable[]): void {
+function postMessageToBrowser (message: any, transferable?: Transferable[]): void {
     pthread_self.postMessageToBrowser({
         type: "diagnostic_server_mock",
         ...message
     }, transferable);
 }
 
-function addEventListenerFromBrowser(cmd: string, listener: (data: any) => void) {
+function addEventListenerFromBrowser (cmd: string, listener: (data: any) => void) {
     pthread_self.addEventListenerFromBrowser((event) => {
         if (event.data.cmd === cmd) listener(event.data);
     });
 }
 
-export function createMockEnvironment(): MockEnvironment {
+export function createMockEnvironment (): MockEnvironment {
     const command = {
         makeEventPipeCollectTracing2,
         makeEventPipeStopTracing,
diff --git a/src/mono/browser/runtime/diagnostics/mock/index.ts b/src/mono/browser/runtime/diagnostics/mock/index.ts
index 813041540713..d320980e454d 100644
--- a/src/mono/browser/runtime/diagnostics/mock/index.ts
+++ b/src/mono/browser/runtime/diagnostics/mock/index.ts
@@ -25,12 +25,12 @@ type MockConnectionScript = (engine: MockScriptConnection) => Promise<void>;
 export type MockScript = (env: MockEnvironment) => MockConnectionScript[];
 
 let MockImplConstructor: new (script: MockScript) => Mock;
-export function mock(script: MockScript): Mock {
+export function mock (script: MockScript): Mock {
     if (monoDiagnosticsMock) {
         if (!MockImplConstructor) {
             class MockScriptEngineSocketImpl implements MockRemoteSocket {
-                constructor(private readonly engine: MockScriptEngineImpl) { }
-                send(data: string | ArrayBuffer): void {
+                constructor (private readonly engine: MockScriptEngineImpl) { }
+                send (data: string | ArrayBuffer): void {
                     mono_log_debug(`mock ${this.engine.ident} client sent: `, data);
                     let event: MessageEvent<string | ArrayBuffer> | null = null;
                     if (typeof data === "string") {
@@ -45,19 +45,19 @@ export function mock(script: MockScript): Mock {
                     this.engine.mockReplyEventTarget.dispatchEvent(event);
                 }
                 addEventListener<T extends keyof WebSocketEventMap>(event: T, listener: (event: WebSocketEventMap[T]) => any, options?: boolean | AddEventListenerOptions): void;
-                addEventListener(event: string, listener: EventListenerOrEventListenerObject, options?: boolean | AddEventListenerOptions): void {
+                addEventListener (event: string, listener: EventListenerOrEventListenerObject, options?: boolean | AddEventListenerOptions): void {
                     mono_log_debug(`mock ${this.engine.ident} client added listener for ${event}`);
                     this.engine.eventTarget.addEventListener(event, listener, options);
                 }
-                removeEventListener(event: string, listener: EventListenerOrEventListenerObject): void {
+                removeEventListener (event: string, listener: EventListenerOrEventListenerObject): void {
                     mono_log_debug(`mock ${this.engine.ident} client removed listener for ${event}`);
                     this.engine.eventTarget.removeEventListener(event, listener);
                 }
-                close(): void {
+                close (): void {
                     mono_log_debug(`mock ${this.engine.ident} client closed`);
                     this.engine.mockReplyEventTarget.dispatchEvent(new CloseEvent("close"));
                 }
-                dispatchEvent(ev: Event): boolean {
+                dispatchEvent (ev: Event): boolean {
                     return this.engine.eventTarget.dispatchEvent(ev);
                 }
             }
@@ -68,11 +68,11 @@ export function mock(script: MockScript): Mock {
                 readonly eventTarget: EventTarget = new EventTarget();
                 // eventTarget that the MockReplySocket with send() to
                 readonly mockReplyEventTarget: EventTarget = new EventTarget();
-                constructor(readonly ident: number) {
+                constructor (readonly ident: number) {
                     this.socket = new MockScriptEngineSocketImpl(this);
                 }
 
-                reply(data: ArrayBuffer | Uint8Array) {
+                reply (data: ArrayBuffer | Uint8Array) {
                     mono_log_debug(`mock ${this.ident} reply:`, data);
                     let sendData: ArrayBuffer;
                     if (typeof data === "object" && data instanceof ArrayBuffer) {
@@ -91,7 +91,7 @@ export function mock(script: MockScript): Mock {
                     this.eventTarget.dispatchEvent(new MessageEvent("message", { data: sendData }));
                 }
 
-                processSend(onMessage: (data: ArrayBuffer) => any): Promise<void> {
+                processSend (onMessage: (data: ArrayBuffer) => any): Promise<void> {
                     mono_log_debug(`mock ${this.ident} processSend`);
 
                     return new Promise<void>((resolve, reject) => {
@@ -112,7 +112,7 @@ export function mock(script: MockScript): Mock {
                     });
                 }
 
-                async waitForSend<T = void>(filter: (data: ArrayBuffer) => boolean, extract?: (data: ArrayBuffer) => T): Promise<T> {
+                async waitForSend<T = void> (filter: (data: ArrayBuffer) => boolean, extract?: (data: ArrayBuffer) => T): Promise<T> {
                     mono_log_debug(`mock ${this.ident} waitForSend`);
 
                     const data = await new Promise<ArrayBuffer>((resolve) => {
@@ -141,7 +141,7 @@ export function mock(script: MockScript): Mock {
                 openCount: number;
                 engines: MockScriptEngineImpl[];
                 connectionScripts: MockConnectionScript[];
-                constructor(public readonly mockScript: MockScript) {
+                constructor (public readonly mockScript: MockScript) {
                     const env: MockEnvironment = createMockEnvironment();
                     this.connectionScripts = mockScript(env);
                     this.openCount = 0;
@@ -151,13 +151,13 @@ export function mock(script: MockScript): Mock {
                         this.engines[i] = new MockScriptEngineImpl(i);
                     }
                 }
-                open(): MockRemoteSocket {
+                open (): MockRemoteSocket {
                     const i = this.openCount++;
                     mono_log_debug(`mock ${i} open`);
                     return this.engines[i].socket;
                 }
 
-                async run(): Promise<void> {
+                async run (): Promise<void> {
                     const scripts = this.connectionScripts;
                     await Promise.all(scripts.map((script, i) => script(this.engines[i])));
                 }
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/common-socket.ts b/src/mono/browser/runtime/diagnostics/server_pthread/common-socket.ts
index b6bb8084a268..6714af0bc4d2 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/common-socket.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/common-socket.ts
@@ -17,7 +17,7 @@ export interface CommonSocket {
 
 type AssignableTo<T, Q> = Q extends T ? true : false;
 
-function static_assert<Cond extends boolean>(x: Cond): asserts x is Cond { /*empty*/ }
+function static_assert<Cond extends boolean> (x: Cond): asserts x is Cond { /*empty*/ }
 
 {
     static_assert<AssignableTo<CommonSocket, WebSocket>>(true);
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/index.ts b/src/mono/browser/runtime/diagnostics/server_pthread/index.ts
index 1fbca276f3c8..cbc94347a7d2 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/index.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/index.ts
@@ -6,7 +6,7 @@ import WasmEnableThreads from "consts:wasmEnableThreads";
 import monoDiagnosticsMock from "consts:monoDiagnosticsMock";
 
 import { PromiseAndController, assertNever } from "../../types/internal";
-import { pthread_self } from "../../pthreads/worker";
+import { pthread_self } from "../../pthreads";
 import { createPromiseController, mono_assert } from "../../globals";
 import { threads_c_functions as cwraps } from "../../cwraps";
 import { EventPipeSessionIDImpl } from "../shared/types";
@@ -50,16 +50,20 @@ import {
 import { mono_log_error, mono_log_info, mono_log_debug, mono_log_warn } from "../../logging";
 import { utf8ToString } from "../../strings";
 
-function addOneShotProtocolCommandEventListener(src: EventTarget): Promise<ProtocolCommandEvent> {
+function addOneShotProtocolCommandEventListener (src: EventTarget): Promise<ProtocolCommandEvent> {
     return new Promise((resolve) => {
-        const listener = (event: Event) => { resolve(event as ProtocolCommandEvent); };
+        const listener = (event: Event) => {
+            resolve(event as ProtocolCommandEvent);
+        };
         src.addEventListener(dotnetDiagnosticsServerProtocolCommandEvent, listener, { once: true });
     });
 }
 
-function addOneShotOpenEventListenr(src: EventTarget): Promise<Event> {
+function addOneShotOpenEventListenr (src: EventTarget): Promise<Event> {
     return new Promise((resolve) => {
-        const listener = (event: Event) => { resolve(event); };
+        const listener = (event: Event) => {
+            resolve(event);
+        };
         src.addEventListener("open", listener, { once: true });
     });
 }
@@ -73,7 +77,7 @@ class DiagnosticServerImpl implements DiagnosticServer {
     readonly mocked: Promise<Mock> | undefined;
     runtimeResumed = false;
 
-    constructor(websocketUrl: string, mockPromise?: Promise<Mock>) {
+    constructor (websocketUrl: string, mockPromise?: Promise<Mock>) {
         this.websocketUrl = websocketUrl;
         pthread_self.addEventListenerFromBrowser(this.onMessageFromMainThread.bind(this));
         this.mocked = monoDiagnosticsMock ? mockPromise : undefined;
@@ -85,21 +89,21 @@ class DiagnosticServerImpl implements DiagnosticServer {
 
     private attachToRuntimeController = createPromiseController<void>().promise_control;
 
-    start(): void {
+    start (): void {
         mono_log_info(`starting diagnostic server with url: ${this.websocketUrl}`);
         this.startRequestedController.resolve();
     }
-    stop(): void {
+    stop (): void {
         this.stopRequested = true;
         this.stopRequestedController.resolve();
     }
 
-    attachToRuntime(): void {
+    attachToRuntime (): void {
         cwraps.mono_wasm_diagnostic_server_thread_attach_to_runtime();
         this.attachToRuntimeController.resolve();
     }
 
-    async serverLoop(this: DiagnosticServerImpl): Promise<void> {
+    async serverLoop (this: DiagnosticServerImpl): Promise<void> {
         await this.startRequestedController.promise;
         await this.attachToRuntimeController.promise; // can't start tracing until we've attached to the runtime
         while (!this.stopRequested) {
@@ -119,7 +123,7 @@ class DiagnosticServerImpl implements DiagnosticServer {
         }
     }
 
-    async openSocket(): Promise<CommonSocket> {
+    async openSocket (): Promise<CommonSocket> {
         if (monoDiagnosticsMock && this.mocked) {
             return (await this.mocked).open();
         } else {
@@ -132,7 +136,7 @@ class DiagnosticServerImpl implements DiagnosticServer {
 
     private openCount = 0;
 
-    async advertiseAndWaitForClient(): Promise<void> {
+    async advertiseAndWaitForClient (): Promise<void> {
         try {
             const connNum = this.openCount++;
             mono_log_debug("opening websocket and sending ADVR_V1", connNum);
@@ -148,7 +152,7 @@ class DiagnosticServerImpl implements DiagnosticServer {
         }
     }
 
-    async parseAndDispatchMessage(ws: CommonSocket, connNum: number, message: ProtocolCommandEvent): Promise<void> {
+    async parseAndDispatchMessage (ws: CommonSocket, connNum: number, message: ProtocolCommandEvent): Promise<void> {
         try {
             const cmd = this.parseCommand(message, connNum);
             if (cmd === null) {
@@ -167,7 +171,7 @@ class DiagnosticServerImpl implements DiagnosticServer {
         }
     }
 
-    sendAdvertise(ws: CommonSocket) {
+    sendAdvertise (ws: CommonSocket) {
         /* FIXME: don't use const fake guid and fake process id. In dotnet-dsrouter the pid is used
          * as a dictionary key,so if we ever supprt multiple runtimes, this might need to change.
         */
@@ -178,7 +182,7 @@ class DiagnosticServerImpl implements DiagnosticServer {
         ws.send(buf);
     }
 
-    parseCommand(message: ProtocolCommandEvent, connNum: number): ProtocolClientCommandBase | null {
+    parseCommand (message: ProtocolCommandEvent, connNum: number): ProtocolClientCommandBase | null {
         mono_log_debug("parsing byte command: ", message.data, connNum);
         const result = parseProtocolCommand(message.data);
         mono_log_debug("parsed byte command: ", result, connNum);
@@ -190,7 +194,7 @@ class DiagnosticServerImpl implements DiagnosticServer {
         }
     }
 
-    onMessageFromMainThread(this: DiagnosticServerImpl, event: MessageEvent<unknown>): void {
+    onMessageFromMainThread (this: DiagnosticServerImpl, event: MessageEvent<unknown>): void {
         const d = event.data;
         if (d && isDiagnosticMessage(d)) {
             this.controlCommandReceived(d as DiagnosticServerControlCommand);
@@ -198,7 +202,7 @@ class DiagnosticServerImpl implements DiagnosticServer {
     }
 
     /// dispatch commands received from the main thread
-    controlCommandReceived(cmd: DiagnosticServerControlCommand): void {
+    controlCommandReceived (cmd: DiagnosticServerControlCommand): void {
         switch (cmd.cmd) {
             case "start":
                 this.start();
@@ -216,7 +220,7 @@ class DiagnosticServerImpl implements DiagnosticServer {
     }
 
     // dispatch EventPipe commands received from the diagnostic client
-    async dispatchEventPipeCommand(ws: CommonSocket, cmd: EventPipeClientCommandBase): Promise<void> {
+    async dispatchEventPipeCommand (ws: CommonSocket, cmd: EventPipeClientCommandBase): Promise<void> {
         if (isEventPipeCommandCollectTracing2(cmd)) {
             await this.collectTracingEventPipe(ws, cmd);
         } else if (isEventPipeCommandStopTracing(cmd)) {
@@ -226,12 +230,12 @@ class DiagnosticServerImpl implements DiagnosticServer {
         }
     }
 
-    postClientReplyOK(ws: CommonSocket, payload?: Uint8Array): void {
+    postClientReplyOK (ws: CommonSocket, payload?: Uint8Array): void {
         // FIXME: send a binary response for non-mock sessions!
         ws.send(createBinaryCommandOKReply(payload));
     }
 
-    async stopEventPipe(ws: WebSocket | MockRemoteSocket, sessionID: EventPipeSessionIDImpl): Promise<void> {
+    async stopEventPipe (ws: WebSocket | MockRemoteSocket, sessionID: EventPipeSessionIDImpl): Promise<void> {
         mono_log_debug("stopEventPipe", sessionID);
         cwraps.mono_wasm_event_pipe_session_disable(sessionID);
         // we might send OK before the session is actually stopped since the websocket is async
@@ -239,7 +243,7 @@ class DiagnosticServerImpl implements DiagnosticServer {
         this.postClientReplyOK(ws);
     }
 
-    async collectTracingEventPipe(ws: WebSocket | MockRemoteSocket, cmd: EventPipeCommandCollectTracing2): Promise<void> {
+    async collectTracingEventPipe (ws: WebSocket | MockRemoteSocket, cmd: EventPipeCommandCollectTracing2): Promise<void> {
         const session = await makeEventPipeStreamingSession(ws, cmd);
         const sessionIDbuf = new Uint8Array(8); // 64 bit
         sessionIDbuf[0] = session.sessionID & 0xFF;
@@ -253,7 +257,7 @@ class DiagnosticServerImpl implements DiagnosticServer {
     }
 
     // dispatch Process commands received from the diagnostic client
-    async dispatchProcessCommand(ws: WebSocket | MockRemoteSocket, cmd: ProcessClientCommandBase): Promise<void> {
+    async dispatchProcessCommand (ws: WebSocket | MockRemoteSocket, cmd: ProcessClientCommandBase): Promise<void> {
         if (isProcessCommandResumeRuntime(cmd)) {
             this.processResumeRuntime(ws);
         } else {
@@ -261,12 +265,12 @@ class DiagnosticServerImpl implements DiagnosticServer {
         }
     }
 
-    processResumeRuntime(ws: WebSocket | MockRemoteSocket): void {
+    processResumeRuntime (ws: WebSocket | MockRemoteSocket): void {
         this.postClientReplyOK(ws);
         this.resumeRuntime();
     }
 
-    resumeRuntime(): void {
+    resumeRuntime (): void {
         if (!this.runtimeResumed) {
             mono_log_debug("resuming runtime startup");
             cwraps.mono_wasm_diagnostic_server_post_resume_runtime();
@@ -275,7 +279,7 @@ class DiagnosticServerImpl implements DiagnosticServer {
     }
 }
 
-function parseProtocolCommand(data: ArrayBuffer | BinaryProtocolCommand): ParseClientCommandResult<ProtocolClientCommandBase> {
+function parseProtocolCommand (data: ArrayBuffer | BinaryProtocolCommand): ParseClientCommandResult<ProtocolClientCommandBase> {
     if (isBinaryProtocolCommand(data)) {
         return parseBinaryProtocolCommand(data);
     } else {
@@ -284,7 +288,7 @@ function parseProtocolCommand(data: ArrayBuffer | BinaryProtocolCommand): ParseC
 }
 
 /// Called by the runtime  to initialize the diagnostic server workers
-export function mono_wasm_diagnostic_server_on_server_thread_created(websocketUrlPtr: CharPtr): void {
+export function mono_wasm_diagnostic_server_on_server_thread_created (websocketUrlPtr: CharPtr): void {
     mono_assert(WasmEnableThreads, "The diagnostic server requires threads to be enabled during build time.");
     const websocketUrl = utf8ToString(websocketUrlPtr);
     mono_log_debug(`mono_wasm_diagnostic_server_on_server_thread_created, url ${websocketUrl}`);
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/base-parser.ts b/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/base-parser.ts
index 2129fc76e601..ddd37a145c08 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/base-parser.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/base-parser.ts
@@ -4,12 +4,12 @@
 import Magic from "./magic";
 import { BinaryProtocolCommand } from "./types";
 
-function advancePos(pos: { pos: number }, offset: number): void {
+function advancePos (pos: { pos: number }, offset: number): void {
     pos.pos += offset;
 }
 
 const Parser = {
-    tryParseHeader(buf: Uint8Array, pos: { pos: number }): boolean {
+    tryParseHeader (buf: Uint8Array, pos: { pos: number }): boolean {
         let j = pos.pos;
         for (let i = 0; i < Magic.DOTNET_IPC_V1.length; i++) {
             if (buf[j++] !== Magic.DOTNET_IPC_V1[i]) {
@@ -19,10 +19,10 @@ const Parser = {
         advancePos(pos, Magic.DOTNET_IPC_V1.length);
         return true;
     },
-    tryParseSize(buf: Uint8Array, pos: { pos: number }): number | undefined {
+    tryParseSize (buf: Uint8Array, pos: { pos: number }): number | undefined {
         return Parser.tryParseUint16(buf, pos);
     },
-    tryParseCommand(buf: Uint8Array, pos: { pos: number }): BinaryProtocolCommand | undefined {
+    tryParseCommand (buf: Uint8Array, pos: { pos: number }): BinaryProtocolCommand | undefined {
         const commandSet = Parser.tryParseUint8(buf, pos);
         if (commandSet === undefined)
             return undefined;
@@ -39,7 +39,7 @@ const Parser = {
         };
         return result;
     },
-    tryParseReserved(buf: Uint8Array, pos: { pos: number }): true | undefined {
+    tryParseReserved (buf: Uint8Array, pos: { pos: number }): true | undefined {
         const reservedLength = 2; // 2 bytes reserved, must be 0
         for (let i = 0; i < reservedLength; i++) {
             const reserved = Parser.tryParseUint8(buf, pos);
@@ -49,7 +49,7 @@ const Parser = {
         }
         return true;
     },
-    tryParseUint8(buf: Uint8Array, pos: { pos: number }): number | undefined {
+    tryParseUint8 (buf: Uint8Array, pos: { pos: number }): number | undefined {
         const j = pos.pos;
         if (j >= buf.byteLength) {
             return undefined;
@@ -58,7 +58,7 @@ const Parser = {
         advancePos(pos, 1);
         return size;
     },
-    tryParseUint16(buf: Uint8Array, pos: { pos: number }): number | undefined {
+    tryParseUint16 (buf: Uint8Array, pos: { pos: number }): number | undefined {
         const j = pos.pos;
         if (j + 1 >= buf.byteLength) {
             return undefined;
@@ -67,7 +67,7 @@ const Parser = {
         advancePos(pos, 2);
         return size;
     },
-    tryParseUint32(buf: Uint8Array, pos: { pos: number }): number | undefined {
+    tryParseUint32 (buf: Uint8Array, pos: { pos: number }): number | undefined {
         const j = pos.pos;
         if (j + 3 >= buf.byteLength) {
             return undefined;
@@ -76,7 +76,7 @@ const Parser = {
         advancePos(pos, 4);
         return size;
     },
-    tryParseUint64(buf: Uint8Array, pos: { pos: number }): [number, number] | undefined {
+    tryParseUint64 (buf: Uint8Array, pos: { pos: number }): [number, number] | undefined {
         const lo = Parser.tryParseUint32(buf, pos);
         if (lo === undefined)
             return undefined;
@@ -85,22 +85,22 @@ const Parser = {
             return undefined;
         return [lo, hi];
     },
-    tryParseBool(buf: Uint8Array, pos: { pos: number }): boolean | undefined {
+    tryParseBool (buf: Uint8Array, pos: { pos: number }): boolean | undefined {
         const r = Parser.tryParseUint8(buf, pos);
         if (r === undefined)
             return undefined;
         return r !== 0;
     },
-    tryParseArraySize(buf: Uint8Array, pos: { pos: number }): number | undefined {
+    tryParseArraySize (buf: Uint8Array, pos: { pos: number }): number | undefined {
         const r = Parser.tryParseUint32(buf, pos);
         if (r === undefined)
             return undefined;
         return r;
     },
-    tryParseStringLength(buf: Uint8Array, pos: { pos: number }): number | undefined {
+    tryParseStringLength (buf: Uint8Array, pos: { pos: number }): number | undefined {
         return Parser.tryParseArraySize(buf, pos);
     },
-    tryParseUtf16String(buf: Uint8Array, pos: { pos: number }): string | undefined {
+    tryParseUtf16String (buf: Uint8Array, pos: { pos: number }): string | undefined {
         const length = Parser.tryParseStringLength(buf, pos);
         if (length === undefined)
             return undefined;
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/base-serializer.ts b/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/base-serializer.ts
index 09115d7245bc..11c846763b25 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/base-serializer.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/base-serializer.ts
@@ -4,7 +4,7 @@
 import { CommandSetId, ServerCommandId, EventPipeCommandId, ProcessCommandId } from "./types";
 import Magic from "./magic";
 
-function advancePos(pos: { pos: number }, count: number): void {
+function advancePos (pos: { pos: number }, count: number): void {
     pos.pos += count;
 }
 
@@ -12,7 +12,7 @@ function advancePos(pos: { pos: number }, count: number): void {
 function serializeHeader(buf: Uint8Array, pos: { pos: number }, commandSet: CommandSetId.EventPipe, command: EventPipeCommandId, len: number): void;
 function serializeHeader(buf: Uint8Array, pos: { pos: number }, commandSet: CommandSetId.Process, command: ProcessCommandId, len: number): void;
 function serializeHeader(buf: Uint8Array, pos: { pos: number }, commandSet: CommandSetId.Server, command: ServerCommandId, len: number): void;
-function serializeHeader(buf: Uint8Array, pos: { pos: number }, commandSet: CommandSetId, command: EventPipeCommandId | ProcessCommandId | ServerCommandId, len: number): void {
+function serializeHeader (buf: Uint8Array, pos: { pos: number }, commandSet: CommandSetId, command: EventPipeCommandId | ProcessCommandId | ServerCommandId, len: number): void {
     Serializer.serializeMagic(buf, pos);
     Serializer.serializeUint16(buf, pos, len);
     Serializer.serializeUint8(buf, pos, commandSet);
@@ -22,7 +22,7 @@ function serializeHeader(buf: Uint8Array, pos: { pos: number }, commandSet: Comm
 
 
 const Serializer = {
-    computeMessageByteLength(payload?: number | Uint8Array): number {
+    computeMessageByteLength (payload?: number | Uint8Array): number {
         const fullHeaderSize = Magic.MinimalHeaderSize // magic, len
             + 2 // commandSet, command
             + 2; // reserved ;
@@ -30,33 +30,33 @@ const Serializer = {
         const len = fullHeaderSize + payloadLength; // magic, size, commandSet, command, reserved
         return len;
     },
-    serializeMagic(buf: Uint8Array, pos: { pos: number }): void {
+    serializeMagic (buf: Uint8Array, pos: { pos: number }): void {
         buf.set(Magic.DOTNET_IPC_V1, pos.pos);
         advancePos(pos, Magic.DOTNET_IPC_V1.byteLength);
     },
-    serializeUint8(buf: Uint8Array, pos: { pos: number }, value: number): void {
+    serializeUint8 (buf: Uint8Array, pos: { pos: number }, value: number): void {
         buf[pos.pos++] = value;
     },
-    serializeUint16(buf: Uint8Array, pos: { pos: number }, value: number): void {
+    serializeUint16 (buf: Uint8Array, pos: { pos: number }, value: number): void {
         buf[pos.pos++] = value & 0xFF;
         buf[pos.pos++] = (value >> 8) & 0xFF;
     },
-    serializeUint32(buf: Uint8Array, pos: { pos: number }, value: number): void {
+    serializeUint32 (buf: Uint8Array, pos: { pos: number }, value: number): void {
         buf[pos.pos++] = value & 0xFF;
         buf[pos.pos++] = (value >> 8) & 0xFF;
         buf[pos.pos++] = (value >> 16) & 0xFF;
         buf[pos.pos++] = (value >> 24) & 0xFF;
     },
-    serializeUint64(buf: Uint8Array, pos: { pos: number }, value: [number, number]): void {
+    serializeUint64 (buf: Uint8Array, pos: { pos: number }, value: [number, number]): void {
         Serializer.serializeUint32(buf, pos, value[0]);
         Serializer.serializeUint32(buf, pos, value[1]);
     },
     serializeHeader,
-    serializePayload(buf: Uint8Array, pos: { pos: number }, payload: Uint8Array): void {
+    serializePayload (buf: Uint8Array, pos: { pos: number }, payload: Uint8Array): void {
         buf.set(payload, pos.pos);
         advancePos(pos, payload.byteLength);
     },
-    serializeString(buf: Uint8Array, pos: { pos: number }, s: string | null): void {
+    serializeString (buf: Uint8Array, pos: { pos: number }, s: string | null): void {
         if (s === null || s === undefined || s === "") {
             Serializer.serializeUint32(buf, pos, 0);
         } else {
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/magic.ts b/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/magic.ts
index e7f27b9c6ab1..9d98130bf84c 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/magic.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/magic.ts
@@ -3,7 +3,7 @@
 
 let magic_buf: Uint8Array = null!;
 const Magic = {
-    get DOTNET_IPC_V1(): Uint8Array {
+    get DOTNET_IPC_V1 (): Uint8Array {
         if (magic_buf === null) {
             const magic = "DOTNET_IPC_V1";
             const magic_len = magic.length + 1; // nul terminated
@@ -15,7 +15,7 @@ const Magic = {
         }
         return magic_buf;
     },
-    get MinimalHeaderSize(): number {
+    get MinimalHeaderSize (): number {
         // we just need to see the magic and the size
         const sizeOfSize = 2;
         return Magic.DOTNET_IPC_V1.byteLength + sizeOfSize;
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/parser.ts b/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/parser.ts
index 765e16718c95..045bb76a4c5e 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/parser.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/parser.ts
@@ -27,7 +27,7 @@ interface ParseClientCommandResultOk<C = ProtocolClientCommandBase> extends Pars
 
 export type ParseClientCommandResult<C = ProcessClientCommandBase> = ParseClientCommandResultOk<C> | ParseResultFail;
 
-export function parseBinaryProtocolCommand(cmd: BinaryProtocolCommand): ParseClientCommandResult<ProtocolClientCommandBase> {
+export function parseBinaryProtocolCommand (cmd: BinaryProtocolCommand): ParseClientCommandResult<ProtocolClientCommandBase> {
     switch (cmd.commandSet) {
         case CommandSetId.Reserved:
             throw new Error("unexpected reserved command_set command");
@@ -44,7 +44,7 @@ export function parseBinaryProtocolCommand(cmd: BinaryProtocolCommand): ParseCli
     }
 }
 
-function parseEventPipeCommand(cmd: BinaryProtocolCommand & { commandSet: CommandSetId.EventPipe }): ParseClientCommandResult<EventPipeClientCommandBase> {
+function parseEventPipeCommand (cmd: BinaryProtocolCommand & { commandSet: CommandSetId.EventPipe }): ParseClientCommandResult<EventPipeClientCommandBase> {
     switch (cmd.command) {
         case EventPipeCommandId.StopTracing:
             return parseEventPipeStopTracing(cmd);
@@ -58,7 +58,7 @@ function parseEventPipeCommand(cmd: BinaryProtocolCommand & { commandSet: Comman
     }
 }
 
-function parseEventPipeCollectTracing2(cmd: BinaryProtocolCommand & { commandSet: CommandSetId.EventPipe, command: EventPipeCommandId.CollectTracing2 }): ParseClientCommandResult<EventPipeCommandCollectTracing2> {
+function parseEventPipeCollectTracing2 (cmd: BinaryProtocolCommand & { commandSet: CommandSetId.EventPipe, command: EventPipeCommandId.CollectTracing2 }): ParseClientCommandResult<EventPipeCommandCollectTracing2> {
     const pos = { pos: 0 };
     const buf = cmd.payload;
     const circularBufferMB = Parser.tryParseUint32(buf, pos);
@@ -89,7 +89,7 @@ function parseEventPipeCollectTracing2(cmd: BinaryProtocolCommand & { commandSet
     return { success: true, result: command };
 }
 
-function parseEventPipeCollectTracingCommandProvider(buf: Uint8Array, pos: { pos: number }): ParseClientCommandResult<EventPipeCollectTracingCommandProvider> {
+function parseEventPipeCollectTracingCommandProvider (buf: Uint8Array, pos: { pos: number }): ParseClientCommandResult<EventPipeCollectTracingCommandProvider> {
     const keywords = Parser.tryParseUint64(buf, pos);
     if (keywords === undefined) {
         return { success: false, error: "failed to parse keywords in EventPipe CollectTracing provider" };
@@ -107,7 +107,7 @@ function parseEventPipeCollectTracingCommandProvider(buf: Uint8Array, pos: { pos
     return { success: true, result: provider };
 }
 
-function parseEventPipeStopTracing(cmd: BinaryProtocolCommand & { commandSet: CommandSetId.EventPipe, command: EventPipeCommandId.StopTracing }): ParseClientCommandResult<EventPipeCommandStopTracing> {
+function parseEventPipeStopTracing (cmd: BinaryProtocolCommand & { commandSet: CommandSetId.EventPipe, command: EventPipeCommandId.StopTracing }): ParseClientCommandResult<EventPipeCommandStopTracing> {
     const pos = { pos: 0 };
     const buf = cmd.payload;
     const sessionID = Parser.tryParseUint64(buf, pos);
@@ -122,7 +122,7 @@ function parseEventPipeStopTracing(cmd: BinaryProtocolCommand & { commandSet: Co
     return { success: true, result: command };
 }
 
-function parseProcessCommand(cmd: BinaryProtocolCommand & { commandSet: CommandSetId.Process }): ParseClientCommandResult<ProcessClientCommandBase> {
+function parseProcessCommand (cmd: BinaryProtocolCommand & { commandSet: CommandSetId.Process }): ParseClientCommandResult<ProcessClientCommandBase> {
     switch (cmd.command) {
         case ProcessCommandId.ProcessInfo:
             throw new Error("TODO");
@@ -138,7 +138,7 @@ function parseProcessCommand(cmd: BinaryProtocolCommand & { commandSet: CommandS
     }
 }
 
-function parseProcessResumeRuntime(cmd: BinaryProtocolCommand & { commandSet: CommandSetId.Process, command: ProcessCommandId.ResumeRuntime }): ParseClientCommandResult<ProcessCommandResumeRuntime> {
+function parseProcessResumeRuntime (cmd: BinaryProtocolCommand & { commandSet: CommandSetId.Process, command: ProcessCommandId.ResumeRuntime }): ParseClientCommandResult<ProcessCommandResumeRuntime> {
     const buf = cmd.payload;
     if (buf.byteLength !== 0) {
         return { success: false, error: "unexpected payload in Process ResumeRuntime command" };
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/serializer.ts b/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/serializer.ts
index 6b67488c5fda..558926d8533f 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/serializer.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/serializer.ts
@@ -5,7 +5,7 @@ import { mono_assert } from "../../../globals";
 import Serializer from "./base-serializer";
 import { CommandSetId, ServerCommandId } from "./types";
 
-export function createBinaryCommandOKReply(payload?: Uint8Array): Uint8Array {
+export function createBinaryCommandOKReply (payload?: Uint8Array): Uint8Array {
     const len = Serializer.computeMessageByteLength(payload);
     const buf = new Uint8Array(len);
     const pos = { pos: 0 };
@@ -16,7 +16,7 @@ export function createBinaryCommandOKReply(payload?: Uint8Array): Uint8Array {
     return buf;
 }
 
-function serializeGuid(buf: Uint8Array, pos: { pos: number }, guid: string): void {
+function serializeGuid (buf: Uint8Array, pos: { pos: number }, guid: string): void {
     guid.split("-").forEach((part) => {
         // FIXME: I'm sure the endianness is wrong here
         for (let i = 0; i < part.length; i += 2) {
@@ -26,7 +26,7 @@ function serializeGuid(buf: Uint8Array, pos: { pos: number }, guid: string): voi
     });
 }
 
-function serializeAsciiLiteralString(buf: Uint8Array, pos: { pos: number }, s: string): void {
+function serializeAsciiLiteralString (buf: Uint8Array, pos: { pos: number }, s: string): void {
     const len = s.length;
     const hasNul = s[len - 1] === "\0";
     for (let i = 0; i < len; i++) {
@@ -38,7 +38,7 @@ function serializeAsciiLiteralString(buf: Uint8Array, pos: { pos: number }, s: s
 }
 
 
-export function createAdvertise(guid: string, processId: [/*lo*/ number, /*hi*/number]): Uint8Array {
+export function createAdvertise (guid: string, processId: [/*lo*/ number, /*hi*/number]): Uint8Array {
     const BUF_LENGTH = 34;
     const buf = new Uint8Array(BUF_LENGTH);
     const pos = { pos: 0 };
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/types.ts b/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/types.ts
index 2031de3499bd..70794014fe8e 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/types.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/ipc-protocol/types.ts
@@ -8,7 +8,7 @@ export interface BinaryProtocolCommand {
     payload: Uint8Array;
 }
 
-export function isBinaryProtocolCommand(x: object): x is BinaryProtocolCommand {
+export function isBinaryProtocolCommand (x: object): x is BinaryProtocolCommand {
     return "commandSet" in x && "command" in x && "payload" in x;
 }
 
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/mock-remote.ts b/src/mono/browser/runtime/diagnostics/server_pthread/mock-remote.ts
index b3d07952de76..48ec5b1adbd9 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/mock-remote.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/mock-remote.ts
@@ -5,7 +5,7 @@ import monoDiagnosticsMock from "consts:monoDiagnosticsMock";
 import type { Mock } from "../mock";
 import { mock } from "../mock";
 
-export function importAndInstantiateMock(mockURL: string): Promise<Mock> {
+export function importAndInstantiateMock (mockURL: string): Promise<Mock> {
     if (monoDiagnosticsMock) {
         const mockPrefix = "mock:";
         const scriptURL = mockURL.substring(mockPrefix.length);
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/protocol-client-commands.ts b/src/mono/browser/runtime/diagnostics/server_pthread/protocol-client-commands.ts
index a255e65fe3c1..f0f7a054bc46 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/protocol-client-commands.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/protocol-client-commands.ts
@@ -51,32 +51,32 @@ export type RemoveCommandSetAndId<T extends ProtocolClientCommandBase> = Omit<T,
 
 export type ProtocolClientCommand = ProcessCommand | EventPipeCommand;
 
-export function isDiagnosticCommandBase(x: object): x is ProtocolClientCommandBase {
+export function isDiagnosticCommandBase (x: object): x is ProtocolClientCommandBase {
     return typeof x === "object" && "command_set" in x && "command" in x;
 }
 
-export function isProcessCommand(x: object): x is ProcessClientCommandBase {
+export function isProcessCommand (x: object): x is ProcessClientCommandBase {
     return isDiagnosticCommandBase(x) && x.command_set === "Process";
 }
 
-export function isEventPipeCommand(x: object): x is EventPipeClientCommandBase {
+export function isEventPipeCommand (x: object): x is EventPipeClientCommandBase {
     return isDiagnosticCommandBase(x) && x.command_set === "EventPipe";
 }
 
-export function isProcessCommandResumeRuntime(x: ProcessClientCommandBase): x is ProcessCommandResumeRuntime {
+export function isProcessCommandResumeRuntime (x: ProcessClientCommandBase): x is ProcessCommandResumeRuntime {
     return isProcessCommand(x) && x.command === "ResumeRuntime";
 }
 
-export function isEventPipeCollectTracingCommandProvider(x: object): x is EventPipeCollectTracingCommandProvider {
+export function isEventPipeCollectTracingCommandProvider (x: object): x is EventPipeCollectTracingCommandProvider {
     return typeof x === "object" && "keywords" in x && "logLevel" in x && "provider_name" in x && "filter_data" in x;
 }
 
-export function isEventPipeCommandCollectTracing2(x: object): x is EventPipeCommandCollectTracing2 {
+export function isEventPipeCommandCollectTracing2 (x: object): x is EventPipeCommandCollectTracing2 {
     return isEventPipeCommand(x) && x.command === "CollectTracing2" && "circularBufferMB" in x &&
         "format" in x && "requestRundown" in x && "providers" in x &&
         Array.isArray((<any>x).providers) && (<any>x).providers.every(isEventPipeCollectTracingCommandProvider);
 }
 
-export function isEventPipeCommandStopTracing(x: object): x is EventPipeCommandStopTracing {
+export function isEventPipeCommandStopTracing (x: object): x is EventPipeCommandStopTracing {
     return isEventPipeCommand(x) && x.command === "StopTracing" && "sessionID" in x;
 }
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/protocol-socket.ts b/src/mono/browser/runtime/diagnostics/server_pthread/protocol-socket.ts
index 7bd03e0d003a..049d1525cd32 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/protocol-socket.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/protocol-socket.ts
@@ -60,10 +60,10 @@ export type ParseResult = ParseResultBinaryCommandOk | ParseResultFail;
 class StatefulParser {
     private state: State = { state: InState.Idle };
 
-    constructor(private readonly emitCommandCallback: (command: BinaryProtocolCommand) => void) { }
+    constructor (private readonly emitCommandCallback: (command: BinaryProtocolCommand) => void) { }
 
     /// process the data in the given buffer and update the state.
-    receiveBuffer(buf: ArrayBuffer): void {
+    receiveBuffer (buf: ArrayBuffer): void {
         if (this.state.state == InState.Error) {
             return;
         }
@@ -87,7 +87,7 @@ class StatefulParser {
         }
     }
 
-    tryParseHeader(buf: Uint8Array): ParseResult {
+    tryParseHeader (buf: Uint8Array): ParseResult {
         const pos = { pos: 0 };
         if (buf.byteLength < Magic.MinimalHeaderSize) {
             // TODO: we need to see the magic and the size to make a partial commmand
@@ -110,14 +110,14 @@ class StatefulParser {
         return this.continueWithBuffer(partialState, buf.subarray(parsedSize));
     }
 
-    tryAppendBuffer(moreBuf: Uint8Array): ParseResult {
+    tryAppendBuffer (moreBuf: Uint8Array): ParseResult {
         if (this.state.state !== InState.PartialCommand) {
             return { success: false, error: "not in partial command state" };
         }
         return this.continueWithBuffer(this.state, moreBuf);
     }
 
-    continueWithBuffer(state: PartialCommandState, moreBuf: Uint8Array): ParseResult {
+    continueWithBuffer (state: PartialCommandState, moreBuf: Uint8Array): ParseResult {
         const buf = state.buf;
         let partialSize = state.size;
         let overflow: Uint8Array | null = null;
@@ -150,7 +150,7 @@ class StatefulParser {
         }
     }
 
-    tryParseCompletedBuffer(buf: Uint8Array, pos: { pos: number }): ParseResult {
+    tryParseCompletedBuffer (buf: Uint8Array, pos: { pos: number }): ParseResult {
         const command = Parser.tryParseCommand(buf, pos);
         if (!command) {
             this.setState({ state: InState.Error });
@@ -159,11 +159,11 @@ class StatefulParser {
         return { success: true, command, newState: { state: InState.Idle } };
     }
 
-    private setState(state: State) {
+    private setState (state: State) {
         this.state = state;
     }
 
-    reset() {
+    reset () {
         this.setState({ state: InState.Idle });
     }
 
@@ -173,9 +173,9 @@ class ProtocolSocketImpl implements ProtocolSocket {
     private readonly statefulParser = new StatefulParser(this.emitCommandCallback.bind(this));
     private protocolListeners = 0;
     private readonly messageListener: (this: CommonSocket, ev: MessageEvent) => void = this.onMessage.bind(this);
-    constructor(private readonly sock: CommonSocket) { }
+    constructor (private readonly sock: CommonSocket) { }
 
-    onMessage(this: ProtocolSocketImpl, ev: MessageEvent<ArrayBuffer | Blob | string>): void {
+    onMessage (this: ProtocolSocketImpl, ev: MessageEvent<ArrayBuffer | Blob | string>): void {
         const data = ev.data;
         mono_log_debug("protocol socket received message", ev.data);
         if (typeof data === "object" && data instanceof ArrayBuffer) {
@@ -190,17 +190,17 @@ class ProtocolSocketImpl implements ProtocolSocket {
         }
     }
 
-    dispatchEvent(evt: Event): boolean {
+    dispatchEvent (evt: Event): boolean {
         return this.sock.dispatchEvent(evt);
     }
 
-    onArrayBuffer(this: ProtocolSocketImpl, buf: ArrayBuffer) {
+    onArrayBuffer (this: ProtocolSocketImpl, buf: ArrayBuffer) {
         mono_log_debug("protocol-socket: parsing array buffer", buf);
         this.statefulParser.receiveBuffer(buf);
     }
 
     // called by the stateful parser when it has a complete command
-    emitCommandCallback(this: this, command: BinaryProtocolCommand): void {
+    emitCommandCallback (this: this, command: BinaryProtocolCommand): void {
         mono_log_debug("protocol-socket: queueing command", command);
         queueMicrotask(() => {
             mono_log_debug("dispatching protocol event with command", command);
@@ -209,14 +209,14 @@ class ProtocolSocketImpl implements ProtocolSocket {
     }
 
 
-    dispatchProtocolCommandEvent(cmd: BinaryProtocolCommand): void {
+    dispatchProtocolCommandEvent (cmd: BinaryProtocolCommand): void {
         const ev = new Event(dotnetDiagnosticsServerProtocolCommandEvent);
         (<any>ev).data = cmd; // FIXME: use a proper event subclass
         this.sock.dispatchEvent(ev);
     }
 
     addEventListener<K extends keyof ProtocolSocketEventMap>(type: K, listener: (this: ProtocolSocket, ev: ProtocolSocketEventMap[K]) => any, options?: boolean | AddEventListenerOptions | undefined): void;
-    addEventListener(type: string, listener: EventListenerOrEventListenerObject, options?: boolean | AddEventListenerOptions | undefined): void {
+    addEventListener (type: string, listener: EventListenerOrEventListenerObject, options?: boolean | AddEventListenerOptions | undefined): void {
         this.sock.addEventListener(type, listener, options);
         if (type === dotnetDiagnosticsServerProtocolCommandEvent) {
             if (this.protocolListeners === 0) {
@@ -228,7 +228,7 @@ class ProtocolSocketImpl implements ProtocolSocket {
     }
 
     removeEventListener<K extends keyof ProtocolSocketEventMap>(type: K, listener: (this: ProtocolSocket, ev: ProtocolSocketEventMap[K]) => any): void;
-    removeEventListener(type: string, listener: EventListenerOrEventListenerObject): void {
+    removeEventListener (type: string, listener: EventListenerOrEventListenerObject): void {
         if (type === dotnetDiagnosticsServerProtocolCommandEvent) {
             mono_log_debug("removing protocol listener and message chaser");
             this.protocolListeners--;
@@ -240,18 +240,18 @@ class ProtocolSocketImpl implements ProtocolSocket {
         this.sock.removeEventListener(type, listener);
     }
 
-    send(buf: Uint8Array) {
+    send (buf: Uint8Array) {
         this.sock.send(buf);
     }
 
-    close() {
+    close () {
         this.sock.close();
         this.statefulParser.reset();
     }
 
 }
 
-export function createProtocolSocket(socket: CommonSocket): ProtocolSocket {
+export function createProtocolSocket (socket: CommonSocket): ProtocolSocket {
     return new ProtocolSocketImpl(socket);
 }
 
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/socket-connection.ts b/src/mono/browser/runtime/diagnostics/server_pthread/socket-connection.ts
index d02ae16e3403..fec4a1fc32aa 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/socket-connection.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/socket-connection.ts
@@ -13,11 +13,11 @@ enum ListenerState {
 }
 
 class SocketGuts {
-    constructor(public readonly socket: CommonSocket) { }
-    close(): void {
+    constructor (public readonly socket: CommonSocket) { }
+    close (): void {
         this.socket.close();
     }
-    write(data: VoidPtr, size: number): void {
+    write (data: VoidPtr, size: number): void {
         const buf = new ArrayBuffer(size);
         const view = new Uint8Array(buf);
         // Can we avoid this copy?
@@ -33,12 +33,12 @@ class SocketGuts {
 export class EventPipeSocketConnection {
     private _state: ListenerState;
     readonly stream: SocketGuts;
-    constructor(socket: CommonSocket) {
+    constructor (socket: CommonSocket) {
         this._state = ListenerState.Sending;
         this.stream = new SocketGuts(socket);
     }
 
-    close(): void {
+    close (): void {
         mono_log_debug("EventPipe session stream closing websocket");
         switch (this._state) {
             case ListenerState.Error:
@@ -52,7 +52,7 @@ export class EventPipeSocketConnection {
         }
     }
 
-    write(ptr: VoidPtr, len: number): boolean {
+    write (ptr: VoidPtr, len: number): boolean {
         switch (this._state) {
             case ListenerState.Sending:
                 this.stream.write(ptr, len);
@@ -65,7 +65,7 @@ export class EventPipeSocketConnection {
         }
     }
 
-    private _onMessage(event: MessageEvent): void {
+    private _onMessage (event: MessageEvent): void {
         switch (this._state) {
             case ListenerState.Sending:
                 /* unexpected message */
@@ -85,7 +85,7 @@ export class EventPipeSocketConnection {
 
     }
 
-    private _onClose(/*event: CloseEvent*/) {
+    private _onClose (/*event: CloseEvent*/) {
         switch (this._state) {
             case ListenerState.Closed:
                 return; /* do nothing */
@@ -99,14 +99,14 @@ export class EventPipeSocketConnection {
         }
     }
 
-    private _onError(event: Event) {
+    private _onError (event: Event) {
         mono_log_debug("EventPipe session stream websocket error", event);
         this._state = ListenerState.Error;
         this.stream.close();
         // TODO: notify runtime that connection had an error
     }
 
-    addListeners(): void {
+    addListeners (): void {
         const socket = this.stream.socket;
         socket.addEventListener("message", this._onMessage.bind(this));
         addEventListener("close", this._onClose.bind(this));
@@ -116,7 +116,7 @@ export class EventPipeSocketConnection {
 
 /// Take over a WebSocket that was used by the diagnostic server to receive the StartCollecting command and
 /// use it for sending the event pipe data back to the host.
-export function takeOverSocket(socket: CommonSocket): EventPipeSocketConnection {
+export function takeOverSocket (socket: CommonSocket): EventPipeSocketConnection {
     const connection = new EventPipeSocketConnection(socket);
     connection.addListeners();
     return connection;
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/stream-queue.ts b/src/mono/browser/runtime/diagnostics/server_pthread/stream-queue.ts
index c7b9f13db7e3..693082b86b8a 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/stream-queue.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/stream-queue.ts
@@ -2,7 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 import { VoidPtr } from "../../types/emscripten";
-import * as Memory from "../../memory";
+import { getI32, notifyI32, setI32, storeI32 } from "../../memory";
 
 
 /// One-reader, one-writer, size 1 queue for messages from an EventPipe streaming thread to
@@ -38,71 +38,71 @@ export class StreamQueue {
     readonly workAvailable: EventTarget = new globalThis.EventTarget();
     readonly signalWorkAvailable = this.signalWorkAvailableImpl.bind(this);
 
-    constructor(readonly queue_addr: VoidPtr, readonly syncSendBuffer: SyncSendBuffer, readonly syncSendClose: SyncSendClose) {
+    constructor (readonly queue_addr: VoidPtr, readonly syncSendBuffer: SyncSendBuffer, readonly syncSendClose: SyncSendClose) {
         this.workAvailable.addEventListener("workAvailable", this.onWorkAvailable.bind(this));
     }
 
-    private get buf_addr(): VoidPtr {
+    private get buf_addr (): VoidPtr {
         return <any>this.queue_addr + BUF_OFFSET;
     }
-    private get count_addr(): VoidPtr {
+    private get count_addr (): VoidPtr {
         return <any>this.queue_addr + COUNT_OFFSET;
     }
-    private get buf_full_addr(): VoidPtr {
+    private get buf_full_addr (): VoidPtr {
         return <any>this.queue_addr + WRITE_DONE_OFFSET;
     }
 
     /// called from native code on the diagnostic thread when the streaming thread queues a call to notify the
     /// diagnostic thread that it can send the buffer.
-    wakeup(): void {
+    wakeup (): void {
         queueMicrotask(this.signalWorkAvailable);
     }
 
-    workAvailableNow(): void {
+    workAvailableNow (): void {
         // process the queue immediately, rather than waiting for the next event loop tick.
         this.onWorkAvailable();
     }
 
-    private signalWorkAvailableImpl(this: StreamQueue): void {
+    private signalWorkAvailableImpl (this: StreamQueue): void {
         this.workAvailable.dispatchEvent(new Event("workAvailable"));
     }
 
-    private onWorkAvailable(this: StreamQueue /*,event: Event */): void {
-        const buf = Memory.getI32(this.buf_addr) as unknown as VoidPtr;
+    private onWorkAvailable (this: StreamQueue /*,event: Event */): void {
+        const buf = getI32(this.buf_addr) as unknown as VoidPtr;
         const intptr_buf = buf as unknown as number;
         if (intptr_buf === STREAM_CLOSE_SENTINEL) {
             // special value signaling that the streaming thread closed the queue.
             this.syncSendClose();
         } else {
-            const count = Memory.getI32(this.count_addr);
-            Memory.setI32(this.buf_addr, 0);
+            const count = getI32(this.count_addr);
+            setI32(this.buf_addr, 0);
             if (count > 0) {
                 this.syncSendBuffer(buf, count);
             }
         }
         /* buffer is now not full */
-        Memory.Atomics.storeI32(this.buf_full_addr, 0);
+        storeI32(this.buf_full_addr, 0);
         /* wake up the writer thread */
-        Memory.Atomics.notifyI32(this.buf_full_addr, 1);
+        notifyI32(this.buf_full_addr, 1);
     }
 }
 
 // maps stream queue addresses to StreamQueue instances
 const streamQueueMap = new Map<VoidPtr, StreamQueue>();
 
-export function allocateQueue(nativeQueueAddr: VoidPtr, syncSendBuffer: SyncSendBuffer, syncSendClose: SyncSendClose): StreamQueue {
+export function allocateQueue (nativeQueueAddr: VoidPtr, syncSendBuffer: SyncSendBuffer, syncSendClose: SyncSendClose): StreamQueue {
     const queue = new StreamQueue(nativeQueueAddr, syncSendBuffer, syncSendClose);
     streamQueueMap.set(nativeQueueAddr, queue);
     return queue;
 }
 
-export function closeQueue(nativeQueueAddr: VoidPtr): void {
+export function closeQueue (nativeQueueAddr: VoidPtr): void {
     streamQueueMap.delete(nativeQueueAddr);
     // TODO: remove the event listener?
 }
 
 // called from native code on the diagnostic thread by queueing a call from the streaming thread.
-export function mono_wasm_diagnostic_server_stream_signal_work_available(nativeQueueAddr: VoidPtr, current_thread: number): void {
+export function mono_wasm_diagnostic_server_stream_signal_work_available (nativeQueueAddr: VoidPtr, current_thread: number): void {
     const queue = streamQueueMap.get(nativeQueueAddr);
     if (queue) {
         if (current_thread === 0) {
diff --git a/src/mono/browser/runtime/diagnostics/server_pthread/streaming-session.ts b/src/mono/browser/runtime/diagnostics/server_pthread/streaming-session.ts
index 6854261e4d00..53e30cc594a1 100644
--- a/src/mono/browser/runtime/diagnostics/server_pthread/streaming-session.ts
+++ b/src/mono/browser/runtime/diagnostics/server_pthread/streaming-session.ts
@@ -20,11 +20,11 @@ import { mono_assert } from "../../globals";
 ///  queue used by the EventPipe streaming thread to forward events to the diagnostic server thread,
 ///  and a wrapper around the WebSocket object used to send event data back to the host.
 export class EventPipeStreamingSession {
-    constructor(readonly sessionID: EventPipeSessionIDImpl,
+    constructor (readonly sessionID: EventPipeSessionIDImpl,
         readonly queue: StreamQueue, readonly connection: EventPipeSocketConnection) { }
 }
 
-export async function makeEventPipeStreamingSession(ws: WebSocket | MockRemoteSocket, cmd: EventPipeCommandCollectTracing2): Promise<EventPipeStreamingSession> {
+export async function makeEventPipeStreamingSession (ws: WebSocket | MockRemoteSocket, cmd: EventPipeCommandCollectTracing2): Promise<EventPipeStreamingSession> {
     mono_assert(WasmEnableThreads, "The diagnostic server requires threads to be enabled during build time.");
     // First, create the native IPC stream and get its queue.
     const ipcStreamAddr = cwraps.mono_wasm_diagnostic_server_create_stream(); // FIXME: this should be a wrapped in a JS object so we can free it when we're done.
@@ -46,17 +46,17 @@ export async function makeEventPipeStreamingSession(ws: WebSocket | MockRemoteSo
 }
 
 
-function providersStringFromObject(providers: EventPipeCollectTracingCommandProvider[]) {
+function providersStringFromObject (providers: EventPipeCollectTracingCommandProvider[]) {
     const providersString = providers.map(providerToString).join(",");
     return providersString;
 
-    function providerToString(provider: EventPipeCollectTracingCommandProvider): string {
+    function providerToString (provider: EventPipeCollectTracingCommandProvider): string {
         const keyword_str = provider.keywords[0] === 0 && provider.keywords[1] === 0 ? "" : keywordsToHexString(provider.keywords);
         const args_str = provider.filter_data === "" ? "" : ":" + provider.filter_data;
         return provider.provider_name + ":" + keyword_str + ":" + provider.logLevel + args_str;
     }
 
-    function keywordsToHexString(k: [number, number]): string {
+    function keywordsToHexString (k: [number, number]): string {
         const lo = k[0];
         const hi = k[1];
         const lo_hex = leftPad(lo.toString(16), "0", 8);
@@ -64,7 +64,7 @@ function providersStringFromObject(providers: EventPipeCollectTracingCommandProv
         return hi_hex + lo_hex;
     }
 
-    function leftPad(s: string, fill: string, width: number): string {
+    function leftPad (s: string, fill: string, width: number): string {
         if (s.length >= width)
             return s;
         const prefix = fill.repeat(width - s.length);
@@ -74,6 +74,6 @@ function providersStringFromObject(providers: EventPipeCollectTracingCommandProv
 
 
 const IPC_STREAM_QUEUE_OFFSET = 4; /* keep in sync with mono_wasm_diagnostic_server_create_stream() in C */
-function getQueueAddrFromStreamAddr(streamAddr: VoidPtr): VoidPtr {
+function getQueueAddrFromStreamAddr (streamAddr: VoidPtr): VoidPtr {
     return <any>streamAddr + IPC_STREAM_QUEUE_OFFSET;
 }
diff --git a/src/mono/browser/runtime/diagnostics/shared/controller-commands.ts b/src/mono/browser/runtime/diagnostics/shared/controller-commands.ts
index 5e08f56c627e..d4ed63049451 100644
--- a/src/mono/browser/runtime/diagnostics/shared/controller-commands.ts
+++ b/src/mono/browser/runtime/diagnostics/shared/controller-commands.ts
@@ -1,8 +1,8 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import type { MonoThreadMessage } from "../../pthreads/shared";
-import { isMonoThreadMessage } from "../../pthreads/shared";
+import { isMonoThreadMessage } from "../../pthreads";
+import type { MonoThreadMessage } from "../../types/internal";
 
 // Messages from the main thread to the diagnostic server thread
 export interface DiagnosticMessage extends MonoThreadMessage {
@@ -10,7 +10,7 @@ export interface DiagnosticMessage extends MonoThreadMessage {
     cmd: string;
 }
 
-export function isDiagnosticMessage(x: unknown): x is DiagnosticMessage {
+export function isDiagnosticMessage (x: unknown): x is DiagnosticMessage {
     return isMonoThreadMessage(x) && x.type === "diagnostic_server";
 }
 
@@ -29,7 +29,7 @@ export type DiagnosticServerControlCommandStart = DiagnosticServerControlCommand
 export type DiagnosticServerControlCommandStop = DiagnosticServerControlCommandSpecific<"stop">;
 export type DiagnosticServerControlCommandAttachToRuntime = DiagnosticServerControlCommandSpecific<"attach_to_runtime">;
 
-export function makeDiagnosticServerControlCommand<T extends DiagnosticServerControlCommand["cmd"]>(cmd: T): DiagnosticServerControlCommandSpecific<T> {
+export function makeDiagnosticServerControlCommand<T extends DiagnosticServerControlCommand["cmd"]> (cmd: T): DiagnosticServerControlCommandSpecific<T> {
     return {
         type: "diagnostic_server",
         cmd: cmd,
diff --git a/src/mono/browser/runtime/diagnostics/shared/create-session.ts b/src/mono/browser/runtime/diagnostics/shared/create-session.ts
index 17af0cbc5beb..866c0d129233 100644
--- a/src/mono/browser/runtime/diagnostics/shared/create-session.ts
+++ b/src/mono/browser/runtime/diagnostics/shared/create-session.ts
@@ -25,7 +25,7 @@ type SessionType =
     };
 
 
-function createSessionWithPtrCB(sessionIdOutPtr: VoidPtr, options: EventPipeCreateSessionOptions, sessionType: SessionType): false | EventPipeSessionIDImpl {
+function createSessionWithPtrCB (sessionIdOutPtr: VoidPtr, options: EventPipeCreateSessionOptions, sessionType: SessionType): false | EventPipeSessionIDImpl {
     memory.setI32(sessionIdOutPtr, 0);
     let tracePath: string | null;
     let ipcStreamAddr: VoidPtr;
@@ -43,10 +43,10 @@ function createSessionWithPtrCB(sessionIdOutPtr: VoidPtr, options: EventPipeCrea
     }
 }
 
-export function createEventPipeStreamingSession(ipcStreamAddr: VoidPtr, options: EventPipeCreateSessionOptions): EventPipeSessionIDImpl | false {
+export function createEventPipeStreamingSession (ipcStreamAddr: VoidPtr, options: EventPipeCreateSessionOptions): EventPipeSessionIDImpl | false {
     return memory.withStackAlloc(sizeOfInt32, createSessionWithPtrCB, options, { type: "stream", stream: ipcStreamAddr });
 }
 
-export function createEventPipeFileSession(tracePath: string, options: EventPipeCreateSessionOptions): EventPipeSessionIDImpl | false {
+export function createEventPipeFileSession (tracePath: string, options: EventPipeCreateSessionOptions): EventPipeSessionIDImpl | false {
     return memory.withStackAlloc(sizeOfInt32, createSessionWithPtrCB, options, { type: "file", filePath: tracePath });
 }
diff --git a/src/mono/browser/runtime/dotnet.d.ts b/src/mono/browser/runtime/dotnet.d.ts
index 23f55cacc455..e0d644558b46 100644
--- a/src/mono/browser/runtime/dotnet.d.ts
+++ b/src/mono/browser/runtime/dotnet.d.ts
@@ -189,7 +189,15 @@ type MonoConfig = {
     /**
      * initial number of workers to add to the emscripten pthread pool
      */
-    pthreadPoolSize?: number;
+    pthreadPoolInitialSize?: number;
+    /**
+     * number of unused workers kept in the emscripten pthread pool after startup
+     */
+    pthreadPoolUnusedSize?: number;
+    /**
+     * Delay in milliseconds before starting the finalizer thread
+     */
+    finalizerThreadStartDelayMs?: number;
     /**
      * If true, a list of the methods optimized by the interpreter will be saved and used for faster startup
      *  on future runs of the application
@@ -352,7 +360,15 @@ type SingleAssetBehaviors =
 /**
  * Typically blazor.boot.json
  */
- | "manifest";
+ | "manifest"
+/**
+ * The debugging symbols
+ */
+ | "symbols"
+/**
+ * Load segmentation rules file for Hybrid Globalization.
+ */
+ | "segmentation-rules";
 type AssetBehaviors = SingleAssetBehaviors | 
 /**
  * Load asset as a managed resource assembly.
@@ -381,15 +397,7 @@ type AssetBehaviors = SingleAssetBehaviors |
 /**
  * The javascript module that came from nuget package .
  */
- | "js-module-library-initializer"
-/**
- * The javascript module for threads.
- */
- | "symbols"
-/**
- * Load segmentation rules file for Hybrid Globalization.
- */
- | "segmentation-rules";
+ | "js-module-library-initializer";
 declare const enum GlobalizationMode {
     /**
      * Load sharded ICU data.
@@ -438,6 +446,13 @@ type APIType = {
      * @returns exit code of the Main() method.
      */
     runMainAndExit: (mainAssemblyName?: string, args?: string[]) => Promise<number>;
+    /**
+     * Exits the runtime.
+     * Note: after the runtime exits, it would reject all further calls to the API.
+     * @param code "process" exit code.
+     * @param reason could be a string or an Error object.
+     */
+    exit: (code: number, reason?: any) => void;
     /**
      * Sets the environment variable for the "process"
      * @param name
@@ -468,6 +483,10 @@ type APIType = {
      * Writes to the WASM linear memory
      */
     setHeapB32: (offset: NativePointer, value: number | boolean) => void;
+    /**
+     * Writes to the WASM linear memory
+     */
+    setHeapB8: (offset: NativePointer, value: number | boolean) => void;
     /**
      * Writes to the WASM linear memory
      */
@@ -516,6 +535,10 @@ type APIType = {
      * Reads from the WASM linear memory
      */
     getHeapB32: (offset: NativePointer) => boolean;
+    /**
+     * Reads from the WASM linear memory
+     */
+    getHeapB8: (offset: NativePointer) => boolean;
     /**
      * Reads from the WASM linear memory
      */
diff --git a/src/mono/browser/runtime/driver.c b/src/mono/browser/runtime/driver.c
index a7bd6f5966e0..f231a86119e7 100644
--- a/src/mono/browser/runtime/driver.c
+++ b/src/mono/browser/runtime/driver.c
@@ -181,7 +181,7 @@ cleanup_runtime_config (MonovmRuntimeConfigArguments *args, void *user_data)
 }
 
 EMSCRIPTEN_KEEPALIVE void
-mono_wasm_load_runtime (const char *unused, int debug_level)
+mono_wasm_load_runtime (int debug_level)
 {
 	const char *interp_opts = "";
 
@@ -227,115 +227,94 @@ mono_wasm_load_runtime (const char *unused, int debug_level)
 	bindings_initialize_internals();
 }
 
-EMSCRIPTEN_KEEPALIVE int
-mono_wasm_invoke_method_bound (MonoMethod *method, void* args /*JSMarshalerArguments*/, MonoString **out_exc)
+EMSCRIPTEN_KEEPALIVE void
+mono_wasm_invoke_jsexport (MonoMethod *method, void* args)
 {
 	PVOLATILE(MonoObject) temp_exc = NULL;
 
 	void *invoke_args[1] = { args };
-	int is_err = 0;
 
 	MONO_ENTER_GC_UNSAFE;
-	mono_runtime_invoke (method, NULL, invoke_args, (MonoObject **)&temp_exc);
+	mono_runtime_invoke (method, NULL, args ? invoke_args : NULL, (MonoObject **)&temp_exc);
 
 	// this failure is unlikely because it would be runtime error, not application exception.
 	// the application exception is passed inside JSMarshalerArguments `args`
-	if (temp_exc && out_exc) {
+	// so, if that happens, we should abort the runtime
+	if (temp_exc) {
 		PVOLATILE(MonoObject) exc2 = NULL;
-		store_volatile((MonoObject**)out_exc, (MonoObject*)mono_object_to_string ((MonoObject*)temp_exc, (MonoObject **)&exc2));
-		if (exc2)
-			store_volatile((MonoObject**)out_exc, (MonoObject*)mono_string_new (root_domain, "Exception Double Fault"));
-		is_err = 1;
+		store_volatile((MonoObject**)&temp_exc, (MonoObject*)mono_object_to_string ((MonoObject*)temp_exc, (MonoObject **)&exc2));
+		if (exc2) {
+			mono_wasm_trace_logger ("jsinterop", "critical", "mono_wasm_invoke_jsexport unexpected double fault", 1, NULL);
+		} else {
+			mono_wasm_trace_logger ("jsinterop", "critical", mono_string_to_utf8((MonoString*)temp_exc), 1, NULL);
+		}
+		abort ();
 	}
 	MONO_EXIT_GC_UNSAFE;
-	return is_err;
 }
 
-EMSCRIPTEN_KEEPALIVE int
-mono_wasm_invoke_method_raw (MonoMethod *method, MonoString **out_exc)
-{
-	PVOLATILE(MonoObject) temp_exc = NULL;
+#ifndef DISABLE_THREADS
 
-	int is_err = 0;
+extern void mono_threads_wasm_async_run_in_target_thread_vii (void* target_thread, void (*func) (gpointer, gpointer), gpointer user_data1, gpointer user_data2);
+extern void mono_threads_wasm_sync_run_in_target_thread_vii (void* target_thread, void (*func) (gpointer, gpointer), gpointer user_data1, gpointer args);
+extern void mono_print_thread_dump (void *sigctx);
 
-	MONO_ENTER_GC_UNSAFE;
-	mono_runtime_invoke (method, NULL, NULL, (MonoObject **)&temp_exc);
+EMSCRIPTEN_KEEPALIVE void
+mono_wasm_print_thread_dump (void)
+{
+	mono_print_thread_dump (NULL);
+}
 
-	if (temp_exc && out_exc) {
-		PVOLATILE(MonoObject) exc2 = NULL;
-		store_volatile((MonoObject**)out_exc, (MonoObject*)mono_object_to_string ((MonoObject*)temp_exc, (MonoObject **)&exc2));
-		if (exc2)
-			store_volatile((MonoObject**)out_exc, (MonoObject*)mono_string_new (root_domain, "Exception Double Fault"));
-		is_err = 1;
+// this is running on the target thread
+static void
+mono_wasm_invoke_jsexport_async_post_cb (MonoMethod *method, void* args)
+{
+	mono_wasm_invoke_jsexport (method, args);
+	if (args) {
+		MonoBoolean *is_receiver_should_free = (MonoBoolean *)(((char *) args) + 20/*JSMarshalerArgumentOffsets.ReceiverShouldFree*/);
+		if(*is_receiver_should_free != 0){
+			free (args);
+		}
 	}
-	MONO_EXIT_GC_UNSAFE;
-	return is_err;
 }
 
-EMSCRIPTEN_KEEPALIVE MonoMethod*
-mono_wasm_assembly_get_entry_point (MonoAssembly *assembly, int auto_insert_breakpoint)
+// async
+EMSCRIPTEN_KEEPALIVE void
+mono_wasm_invoke_jsexport_async_post (void* target_thread, MonoMethod *method, void* args /*JSMarshalerArguments*/)
 {
-	MonoImage *image;
-	MonoMethod *method;
+	mono_threads_wasm_async_run_in_target_thread_vii(target_thread, (void (*)(gpointer, gpointer))mono_wasm_invoke_jsexport_async_post_cb, method, args);
+}
 
-	MONO_ENTER_GC_UNSAFE;
-	image = mono_assembly_get_image (assembly);
-	uint32_t entry = mono_image_get_entry_point (image);
-	if (!entry)
-		goto end;
 
-	mono_domain_ensure_entry_assembly (root_domain, assembly);
-	method = mono_get_method (image, entry, NULL);
-
-	/*
-	 * If the entry point looks like a compiler generated wrapper around
-	 * an async method in the form "<Name>" then try to look up the async methods
-	 * "<Name>$" and "Name" it could be wrapping.  We do this because the generated
-	 * sync wrapper will call task.GetAwaiter().GetResult() when we actually want
-	 * to yield to the host runtime.
-	 */
-	if (mono_method_get_flags (method, NULL) & 0x0800 /* METHOD_ATTRIBUTE_SPECIAL_NAME */) {
-		const char *name = mono_method_get_name (method);
-		int name_length = strlen (name);
-
-		if ((*name != '<') || (name [name_length - 1] != '>'))
-			goto end;
-
-		MonoClass *klass = mono_method_get_class (method);
-		assert(klass);
-		char *async_name = malloc (name_length + 2);
-		snprintf (async_name, name_length + 2, "%s$", name);
-
-		// look for "<Name>$"
-		MonoMethodSignature *sig = mono_method_get_signature (method, image, mono_method_get_token (method));
-		MonoMethod *async_method = mono_class_get_method_from_name (klass, async_name, mono_signature_get_param_count (sig));
-		if (async_method != NULL) {
-			free (async_name);
-			method = async_method;
-			goto end;
-		}
+typedef void (*js_interop_event)(void* args);
+typedef void (*sync_context_pump)(void);
+extern js_interop_event before_sync_js_import;
+extern js_interop_event after_sync_js_import;
+extern sync_context_pump synchronization_context_pump_handler;
 
-		// look for "Name" by trimming the first and last character of "<Name>"
-		async_name [name_length - 1] = '\0';
-		async_method = mono_class_get_method_from_name (klass, async_name + 1, mono_signature_get_param_count (sig));
+// this is running on the target thread
+EMSCRIPTEN_KEEPALIVE void
+mono_wasm_invoke_jsexport_sync (MonoMethod *method, void* args)
+{
+	before_sync_js_import (args);
+	mono_wasm_invoke_jsexport (method, args);
+	after_sync_js_import (args);
+}
 
-		free (async_name);
-		if (async_method != NULL)
-			method = async_method;
-	}
+// sync
+EMSCRIPTEN_KEEPALIVE void
+mono_wasm_invoke_jsexport_sync_send (void* target_thread, MonoMethod *method, void* args /*JSMarshalerArguments*/)
+{
+	mono_threads_wasm_sync_run_in_target_thread_vii (target_thread, (void (*)(gpointer, gpointer))mono_wasm_invoke_jsexport_sync, method, args);
+}
 
-	end:
-	MONO_EXIT_GC_UNSAFE;
-	if (auto_insert_breakpoint)
-	{
-		MonoAssemblyName *aname = mono_assembly_get_name (assembly);
-		const char *name = mono_assembly_name_get_name (aname);
-		if (name != NULL)
-			mono_wasm_set_entrypoint_breakpoint(name, mono_method_get_token (method));
-	}
-	return method;
+EMSCRIPTEN_KEEPALIVE void mono_wasm_synchronization_context_pump (void)
+{
+	synchronization_context_pump_handler ();
 }
 
+#endif /* DISABLE_THREADS */
+
 EMSCRIPTEN_KEEPALIVE void
 mono_wasm_string_from_utf16_ref (const mono_unichar2 * chars, int length, MonoString **result)
 {
@@ -462,7 +441,9 @@ mono_wasm_init_finalizer_thread (void)
 {
 	// in the single threaded build, finalizers periodically run on the main thread instead.
 #ifndef DISABLE_THREADS
+	MONO_ENTER_GC_UNSAFE;
 	mono_gc_init_finalizer_thread ();
+	MONO_EXIT_GC_UNSAFE;
 #endif
 }
 
@@ -520,11 +501,19 @@ EMSCRIPTEN_KEEPALIVE int mono_wasm_f64_to_i52 (int64_t *destination, double valu
 
 // JS is responsible for freeing this
 EMSCRIPTEN_KEEPALIVE const char * mono_wasm_method_get_full_name (MonoMethod *method) {
-	return mono_method_get_full_name(method);
+	const char *res;
+	MONO_ENTER_GC_UNSAFE;
+	res = mono_method_get_full_name (method);
+	MONO_EXIT_GC_UNSAFE;
+	return res;
 }
 
 EMSCRIPTEN_KEEPALIVE const char * mono_wasm_method_get_name (MonoMethod *method) {
-	return mono_method_get_name(method);
+	const char *res;
+	MONO_ENTER_GC_UNSAFE;
+	res = mono_method_get_name (method);
+	MONO_EXIT_GC_UNSAFE;
+	return res;
 }
 
 EMSCRIPTEN_KEEPALIVE float mono_wasm_get_f32_unaligned (const float *src) {
diff --git a/src/mono/browser/runtime/export-api.ts b/src/mono/browser/runtime/export-api.ts
index 1b4b596bdb6e..3c8e6312c590 100644
--- a/src/mono/browser/runtime/export-api.ts
+++ b/src/mono/browser/runtime/export-api.ts
@@ -5,15 +5,16 @@ import type { MonoConfig, APIType } from "./types";
 
 import { mono_wasm_get_assembly_exports } from "./invoke-cs";
 import { mono_wasm_set_module_imports } from "./invoke-js";
-import { getB32, getF32, getF64, getI16, getI32, getI52, getI64Big, getI8, getU16, getU32, getU52, getU8, localHeapViewF32, localHeapViewF64, localHeapViewI16, localHeapViewI32, localHeapViewI64Big, localHeapViewI8, localHeapViewU16, localHeapViewU32, localHeapViewU8, setB32, setF32, setF64, setI16, setI32, setI52, setI64Big, setI8, setU16, setU32, setU52, setU8 } from "./memory";
+import { getB32, getB8, getF32, getF64, getI16, getI32, getI52, getI64Big, getI8, getU16, getU32, getU52, getU8, localHeapViewF32, localHeapViewF64, localHeapViewI16, localHeapViewI32, localHeapViewI64Big, localHeapViewI8, localHeapViewU16, localHeapViewU32, localHeapViewU8, setB32, setB8, setF32, setF64, setI16, setI32, setI52, setI64Big, setI8, setU16, setU32, setU52, setU8 } from "./memory";
 import { mono_run_main, mono_run_main_and_exit } from "./run";
 import { mono_wasm_setenv } from "./startup";
 import { loaderHelpers, runtimeHelpers } from "./globals";
 
-export function export_api(): any {
+export function export_api (): any {
     const api: APIType = {
         runMain: mono_run_main,
         runMainAndExit: mono_run_main_and_exit,
+        exit: loaderHelpers.mono_exit,
         setEnvironmentVariable: mono_wasm_setenv,
         getAssemblyExports: mono_wasm_get_assembly_exports,
         setModuleImports: mono_wasm_set_module_imports,
@@ -22,6 +23,7 @@ export function export_api(): any {
         },
         invokeLibraryInitializers: loaderHelpers.invokeLibraryInitializers,
         setHeapB32: setB32,
+        setHeapB8: setB8,
         setHeapU8: setU8,
         setHeapU16: setU16,
         setHeapU32: setU32,
@@ -34,6 +36,7 @@ export function export_api(): any {
         setHeapF32: setF32,
         setHeapF64: setF64,
         getHeapB32: getB32,
+        getHeapB8: getB8,
         getHeapU8: getU8,
         getHeapU16: getU16,
         getHeapU32: getU32,
diff --git a/src/mono/browser/runtime/exports-binding.ts b/src/mono/browser/runtime/exports-binding.ts
index cb286789879b..f5420a3a729e 100644
--- a/src/mono/browser/runtime/exports-binding.ts
+++ b/src/mono/browser/runtime/exports-binding.ts
@@ -5,14 +5,11 @@ import WasmEnableThreads from "consts:wasmEnableThreads";
 
 import { mono_wasm_debugger_log, mono_wasm_add_dbg_command_received, mono_wasm_set_entrypoint_breakpoint, mono_wasm_fire_debugger_agent_message_with_data, mono_wasm_fire_debugger_agent_message_with_data_to_pause } from "./debug";
 import { mono_wasm_release_cs_owned_object } from "./gc-handles";
-import { mono_wasm_bind_cs_function } from "./invoke-cs";
-import { mono_wasm_bind_js_import, mono_wasm_invoke_js_function, mono_wasm_invoke_import_async, mono_wasm_invoke_import_sync, mono_wasm_invoke_js_import } from "./invoke-js";
+import { mono_wasm_bind_js_import_ST, mono_wasm_invoke_js_function, mono_wasm_invoke_jsimport_MT, mono_wasm_invoke_jsimport_ST } from "./invoke-js";
 import { mono_interp_tier_prepare_jiterpreter, mono_jiterp_free_method_data_js } from "./jiterpreter";
 import { mono_interp_jit_wasm_entry_trampoline, mono_interp_record_interp_entry } from "./jiterpreter-interp-entry";
 import { mono_interp_jit_wasm_jit_call_trampoline, mono_interp_invoke_wasm_jit_call_trampoline, mono_interp_flush_jitcall_queue } from "./jiterpreter-jit-call";
 import { mono_wasm_resolve_or_reject_promise } from "./marshal-to-js";
-import { mono_wasm_eventloop_has_unsettled_interop_promises } from "./pthreads/shared/eventloop";
-import { mono_wasm_pthread_on_pthread_attached, mono_wasm_pthread_on_pthread_unregistered, mono_wasm_pthread_on_pthread_registered, mono_wasm_pthread_set_name } from "./pthreads/worker";
 import { mono_wasm_schedule_timer, schedule_background_exec } from "./scheduling";
 import { mono_wasm_asm_loaded } from "./startup";
 import { mono_wasm_diagnostic_server_on_server_thread_created } from "./diagnostics/server_pthread";
@@ -23,13 +20,20 @@ import { mono_wasm_profiler_leave, mono_wasm_profiler_enter } from "./profiler";
 import { mono_wasm_change_case, mono_wasm_change_case_invariant } from "./hybrid-globalization/change-case";
 import { mono_wasm_compare_string, mono_wasm_ends_with, mono_wasm_starts_with, mono_wasm_index_of } from "./hybrid-globalization/collations";
 import { mono_wasm_get_calendar_info } from "./hybrid-globalization/calendar";
-import { mono_wasm_install_js_worker_interop, mono_wasm_uninstall_js_worker_interop } from "./pthreads/shared";
 
 import { mono_wasm_get_culture_info } from "./hybrid-globalization/culture-info";
-import { mono_wasm_get_first_day_of_week, mono_wasm_get_first_week_of_year } from "./hybrid-globalization/locales";
+import { mono_wasm_get_locale_info, mono_wasm_get_first_day_of_week, mono_wasm_get_first_week_of_year } from "./hybrid-globalization/locales";
 import { mono_wasm_browser_entropy } from "./crypto";
 import { mono_wasm_cancel_promise } from "./cancelable-promise";
 
+import {
+    mono_wasm_start_deputy_thread_async,
+    mono_wasm_pthread_on_pthread_attached, mono_wasm_pthread_on_pthread_unregistered,
+    mono_wasm_pthread_on_pthread_registered, mono_wasm_pthread_set_name, mono_wasm_install_js_worker_interop, mono_wasm_uninstall_js_worker_interop, mono_wasm_start_io_thread_async, mono_wasm_warn_about_blocking_wait
+} from "./pthreads";
+import { mono_wasm_dump_threads } from "./pthreads/ui-thread";
+import { mono_wasm_schedule_synchronization_context } from "./pthreads/shared";
+
 // the JS methods would be visible to EMCC linker and become imports of the WASM module
 
 export const mono_wasm_threads_imports = !WasmEnableThreads ? [] : [
@@ -38,9 +42,12 @@ export const mono_wasm_threads_imports = !WasmEnableThreads ? [] : [
     mono_wasm_pthread_on_pthread_attached,
     mono_wasm_pthread_on_pthread_unregistered,
     mono_wasm_pthread_set_name,
+    mono_wasm_start_deputy_thread_async,
+    mono_wasm_start_io_thread_async,
+    mono_wasm_schedule_synchronization_context,
 
-    // threads.c
-    mono_wasm_eventloop_has_unsettled_interop_promises,
+    // mono-threads.c
+    mono_wasm_dump_threads,
     // diagnostics_server.c
     mono_wasm_diagnostic_server_on_server_thread_created,
     mono_wasm_diagnostic_server_on_runtime_server_init,
@@ -49,8 +56,8 @@ export const mono_wasm_threads_imports = !WasmEnableThreads ? [] : [
     // corebindings.c
     mono_wasm_install_js_worker_interop,
     mono_wasm_uninstall_js_worker_interop,
-    mono_wasm_invoke_import_async,
-    mono_wasm_invoke_import_sync,
+    mono_wasm_invoke_jsimport_MT,
+    mono_wasm_warn_about_blocking_wait,
 ];
 
 export const mono_wasm_imports = [
@@ -89,10 +96,9 @@ export const mono_wasm_imports = [
     // corebindings.c
     mono_wasm_console_clear,
     mono_wasm_release_cs_owned_object,
-    mono_wasm_bind_js_import,
+    mono_wasm_bind_js_import_ST,
     mono_wasm_invoke_js_function,
-    mono_wasm_invoke_js_import,
-    mono_wasm_bind_cs_function,
+    mono_wasm_invoke_jsimport_ST,
     mono_wasm_resolve_or_reject_promise,
     mono_wasm_cancel_promise,
     mono_wasm_change_case_invariant,
@@ -103,6 +109,7 @@ export const mono_wasm_imports = [
     mono_wasm_index_of,
     mono_wasm_get_calendar_info,
     mono_wasm_get_culture_info,
+    mono_wasm_get_locale_info,
     mono_wasm_get_first_day_of_week,
     mono_wasm_get_first_week_of_year,
 ];
@@ -113,7 +120,7 @@ const wasmImports: Function[] = [
     ...mono_wasm_threads_imports,
 ];
 
-export function replace_linker_placeholders(imports: WebAssembly.Imports) {
+export function replace_linker_placeholders (imports: WebAssembly.Imports) {
     // the output from emcc contains wrappers for these linker imports which add overhead,
     //  but now we have what we need to replace them with the actual functions
     // By default the imports all live inside of 'env', but emscripten minification could rename it to 'a'.
diff --git a/src/mono/browser/runtime/exports-internal.ts b/src/mono/browser/runtime/exports-internal.ts
index f431298cb1f5..bd5cfacafad6 100644
--- a/src/mono/browser/runtime/exports-internal.ts
+++ b/src/mono/browser/runtime/exports-internal.ts
@@ -1,13 +1,16 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import cwraps, { profiler_c_functions } from "./cwraps";
+import WasmEnableThreads from "consts:wasmEnableThreads";
+
+import { MonoObjectNull, type MonoObject } from "./types/internal";
+import cwraps, { profiler_c_functions, threads_c_functions as twraps } from "./cwraps";
 import { mono_wasm_send_dbg_command_with_parms, mono_wasm_send_dbg_command, mono_wasm_get_dbg_command_info, mono_wasm_get_details, mono_wasm_release_object, mono_wasm_call_function_on, mono_wasm_debugger_resume, mono_wasm_detach_debugger, mono_wasm_raise_debug_event, mono_wasm_change_debugger_log_level, mono_wasm_debugger_attached } from "./debug";
 import { http_wasm_supports_streaming_request, http_wasm_supports_streaming_response, http_wasm_create_controller, http_wasm_abort_request, http_wasm_abort_response, http_wasm_transform_stream_write, http_wasm_transform_stream_close, http_wasm_fetch, http_wasm_fetch_stream, http_wasm_fetch_bytes, http_wasm_get_response_header_names, http_wasm_get_response_header_values, http_wasm_get_response_bytes, http_wasm_get_response_length, http_wasm_get_streamed_response_bytes, http_wasm_get_response_type, http_wasm_get_response_status } from "./http";
 import { exportedRuntimeAPI, Module, runtimeHelpers } from "./globals";
 import { get_property, set_property, has_property, get_typeof_property, get_global_this, dynamic_import } from "./invoke-js";
 import { mono_wasm_stringify_as_error_with_stack } from "./logging";
-import { ws_wasm_create, ws_wasm_open, ws_wasm_send, ws_wasm_receive, ws_wasm_close, ws_wasm_abort } from "./web-socket";
+import { ws_wasm_create, ws_wasm_open, ws_wasm_send, ws_wasm_receive, ws_wasm_close, ws_wasm_abort, ws_get_state } from "./web-socket";
 import { mono_wasm_get_loaded_files } from "./assets";
 import { jiterpreter_dump_stats } from "./jiterpreter";
 import { interp_pgo_load_data, interp_pgo_save_data } from "./interp-pgo";
@@ -17,15 +20,19 @@ import { loadLazyAssembly } from "./lazyLoading";
 import { loadSatelliteAssemblies } from "./satelliteAssemblies";
 import { forceDisposeProxies } from "./gc-handles";
 import { mono_wasm_get_func_id_to_name_mappings } from "./logging";
-import { MonoObject, MonoObjectNull } from "./types/internal";
 import { monoStringToStringUnsafe } from "./strings";
-import { thread_available } from "./pthreads/browser";
+import { mono_wasm_bind_cs_function } from "./invoke-cs";
+
+import { mono_wasm_dump_threads } from "./pthreads";
 
-export function export_internal(): any {
+export function export_internal (): any {
     return {
         // tests
-        mono_wasm_exit: (exit_code: number) => { Module.err("early exit " + exit_code); },
+        mono_wasm_exit: (exit_code: number) => {
+            Module.err("early exit " + exit_code);
+        },
         forceDisposeProxies,
+        mono_wasm_dump_threads: WasmEnableThreads ? mono_wasm_dump_threads : undefined,
 
         // with mono_wasm_debugger_log and mono_wasm_trace_logger
         logging: undefined,
@@ -56,7 +63,7 @@ export function export_internal(): any {
         get_global_this,
         get_dotnet_instance: () => exportedRuntimeAPI,
         dynamic_import,
-        thread_available,
+        mono_wasm_bind_cs_function,
 
         // BrowserWebSocket
         ws_wasm_create,
@@ -65,6 +72,7 @@ export function export_internal(): any {
         ws_wasm_receive,
         ws_wasm_close,
         ws_wasm_abort,
+        ws_get_state,
 
         // BrowserHttpHandler
         http_wasm_supports_streaming_request,
@@ -107,17 +115,21 @@ export function export_internal(): any {
     };
 }
 
-export function cwraps_internal(internal: any): void {
+export function cwraps_internal (internal: any): void {
     Object.assign(internal, {
         mono_wasm_exit: cwraps.mono_wasm_exit,
         mono_wasm_profiler_init_aot: profiler_c_functions.mono_wasm_profiler_init_aot,
         mono_wasm_profiler_init_browser: profiler_c_functions.mono_wasm_profiler_init_browser,
         mono_wasm_exec_regression: cwraps.mono_wasm_exec_regression,
+        mono_wasm_print_thread_dump: WasmEnableThreads ? twraps.mono_wasm_print_thread_dump : undefined,
     });
 }
 
 /* @deprecated not GC safe, legacy support for Blazor */
-export function monoObjectAsBoolOrNullUnsafe(obj: MonoObject): boolean | null {
+export function monoObjectAsBoolOrNullUnsafe (obj: MonoObject): boolean | null {
+    // TODO https://github.com/dotnet/runtime/issues/100411
+    // after Blazor stops using monoObjectAsBoolOrNullUnsafe
+
     if (obj === MonoObjectNull) {
         return null;
     }
diff --git a/src/mono/browser/runtime/exports-linker.ts b/src/mono/browser/runtime/exports-linker.ts
index 84e39b446155..b1d5c0234f56 100644
--- a/src/mono/browser/runtime/exports-linker.ts
+++ b/src/mono/browser/runtime/exports-linker.ts
@@ -4,7 +4,7 @@
 import { mono_wasm_imports, mono_wasm_threads_imports } from "./exports-binding";
 import gitHash from "consts:gitHash";
 
-export function export_linker_indexes_as_code(): string {
+export function export_linker_indexes_as_code (): string {
     const indexByName: any = {
         mono_wasm_imports: {},
         mono_wasm_threads_imports: {},
@@ -25,5 +25,5 @@ export function export_linker_indexes_as_code(): string {
     `;
 }
 
-// this is running during runtime compile time inside rollup process. 
-(globalThis as any).export_linker_indexes_as_code = export_linker_indexes_as_code;
\ No newline at end of file
+// this is running during runtime compile time inside rollup process.
+(globalThis as any).export_linker_indexes_as_code = export_linker_indexes_as_code;
diff --git a/src/mono/browser/runtime/exports.ts b/src/mono/browser/runtime/exports.ts
index 2f3aa96a0ec6..d26e3c8b57ce 100644
--- a/src/mono/browser/runtime/exports.ts
+++ b/src/mono/browser/runtime/exports.ts
@@ -10,7 +10,7 @@ import WasmEnableExceptionHandling from "consts:wasmEnableExceptionHandling";
 import type { RuntimeAPI } from "./types";
 
 import { Module, exportedRuntimeAPI, loaderHelpers, passEmscriptenInternals, runtimeHelpers, setRuntimeGlobals, } from "./globals";
-import { GlobalObjects } from "./types/internal";
+import { GlobalObjects, RuntimeHelpers } from "./types/internal";
 import { configureEmscriptenStartup, configureRuntimeStartup, configureWorkerStartup } from "./startup";
 
 import { create_weak_ref } from "./weak-ref";
@@ -22,29 +22,29 @@ import { mono_wasm_stringify_as_error_with_stack } from "./logging";
 import { instantiate_asset, instantiate_symbols_asset, instantiate_segmentation_rules_asset } from "./assets";
 import { jiterpreter_dump_stats } from "./jiterpreter";
 import { forceDisposeProxies } from "./gc-handles";
-import { dumpThreads } from "./pthreads/browser";
+import { mono_wasm_dump_threads } from "./pthreads";
 
 export let runtimeList: RuntimeList;
 
-function initializeExports(globalObjects: GlobalObjects): RuntimeAPI {
+function initializeExports (globalObjects: GlobalObjects): RuntimeAPI {
     const module = Module;
     const globals = globalObjects;
     const globalThisAny = globalThis as any;
 
     Object.assign(globals.internal, export_internal());
-    Object.assign(runtimeHelpers, {
+    const rh: Partial<RuntimeHelpers> = {
         stringify_as_error_with_stack: mono_wasm_stringify_as_error_with_stack,
         instantiate_symbols_asset,
         instantiate_asset,
         jiterpreter_dump_stats,
         forceDisposeProxies,
         instantiate_segmentation_rules_asset,
-    });
+
+    };
     if (WasmEnableThreads) {
-        Object.assign(runtimeHelpers, {
-            dumpThreads,
-        });
+        rh.dumpThreads = mono_wasm_dump_threads;
     }
+    Object.assign(runtimeHelpers, rh);
 
     const API = export_api();
     Object.assign(exportedRuntimeAPI, {
@@ -65,8 +65,7 @@ function initializeExports(globalObjects: GlobalObjects): RuntimeAPI {
     if (!globalThisAny.getDotnetRuntime) {
         globalThisAny.getDotnetRuntime = (runtimeId: string) => globalThisAny.getDotnetRuntime.__list.getRuntime(runtimeId);
         globalThisAny.getDotnetRuntime.__list = runtimeList = new RuntimeList();
-    }
-    else {
+    } else {
         runtimeList = globalThisAny.getDotnetRuntime.__list;
     }
 
@@ -76,7 +75,7 @@ function initializeExports(globalObjects: GlobalObjects): RuntimeAPI {
 class RuntimeList {
     private list: { [runtimeId: number]: WeakRef<RuntimeAPI> } = {};
 
-    public registerRuntime(api: RuntimeAPI): number {
+    public registerRuntime (api: RuntimeAPI): number {
         if (api.runtimeId === undefined) {
             api.runtimeId = Object.keys(this.list).length;
         }
@@ -85,7 +84,7 @@ class RuntimeList {
         return api.runtimeId;
     }
 
-    public getRuntime(runtimeId: number): RuntimeAPI | undefined {
+    public getRuntime (runtimeId: number): RuntimeAPI | undefined {
         const wr = this.list[runtimeId];
         return wr ? wr.deref() : undefined;
     }
@@ -94,4 +93,4 @@ class RuntimeList {
 // export external API
 export {
     passEmscriptenInternals, initializeExports, initializeReplacements, configureRuntimeStartup, configureEmscriptenStartup, configureWorkerStartup, setRuntimeGlobals
-};
\ No newline at end of file
+};
diff --git a/src/mono/browser/runtime/gc-handles.ts b/src/mono/browser/runtime/gc-handles.ts
index c192891e5142..30e3f3027145 100644
--- a/src/mono/browser/runtime/gc-handles.ts
+++ b/src/mono/browser/runtime/gc-handles.ts
@@ -4,13 +4,14 @@
 import WasmEnableThreads from "consts:wasmEnableThreads";
 import BuildConfiguration from "consts:configuration";
 
-import { loaderHelpers, mono_assert, runtimeHelpers } from "./globals";
+import { loaderHelpers, mono_assert } from "./globals";
 import { assert_js_interop, js_import_wrapper_by_fn_handle } from "./invoke-js";
 import { mono_log_info, mono_log_warn } from "./logging";
 import { bound_cs_function_symbol, imported_js_function_symbol, proxy_debug_symbol } from "./marshal";
 import { GCHandle, GCHandleNull, JSHandle, WeakRefInternal } from "./types/internal";
-import { _use_weak_ref, create_weak_ref } from "./weak-ref";
+import { _use_weak_ref, create_strong_ref, create_weak_ref } from "./weak-ref";
 import { exportsByAssembly } from "./invoke-cs";
+import { release_js_owned_object_by_gc_handle } from "./managed-exports";
 
 const _use_finalization_registry = typeof globalThis.FinalizationRegistry === "function";
 let _js_owned_object_registry: FinalizationRegistry<any>;
@@ -30,24 +31,24 @@ let _next_gcv_handle = -2;
 // GCVHandle is like GCHandle, but it's not tracked and allocated by the mono GC, but just by JS.
 // It's used when we need to create GCHandle-like identity ahead of time, before calling Mono.
 // they have negative values, so that they don't collide with GCHandles.
-export function alloc_gcv_handle(): GCHandle {
+export function alloc_gcv_handle (): GCHandle {
     const gcv_handle = _gcv_handle_free_list.length ? _gcv_handle_free_list.pop() : _next_gcv_handle--;
     return gcv_handle as any;
 }
 
-export function free_gcv_handle(gcv_handle: GCHandle): void {
+export function free_gcv_handle (gcv_handle: GCHandle): void {
     _gcv_handle_free_list.push(gcv_handle);
 }
 
-export function is_jsv_handle(js_handle: JSHandle): boolean {
+export function is_jsv_handle (js_handle: JSHandle): boolean {
     return (js_handle as any) < -1;
 }
 
-export function is_js_handle(js_handle: JSHandle): boolean {
+export function is_js_handle (js_handle: JSHandle): boolean {
     return (js_handle as any) > 0;
 }
 
-export function is_gcv_handle(gc_handle: GCHandle): boolean {
+export function is_gcv_handle (gc_handle: GCHandle): boolean {
     return (gc_handle as any) < -1;
 }
 
@@ -61,7 +62,7 @@ export const cs_owned_js_handle_symbol = Symbol.for("wasm cs_owned_js_handle");
 export const do_not_force_dispose = Symbol.for("wasm do_not_force_dispose");
 
 
-export function mono_wasm_get_jsobj_from_js_handle(js_handle: JSHandle): any {
+export function mono_wasm_get_jsobj_from_js_handle (js_handle: JSHandle): any {
     if (is_js_handle(js_handle))
         return _cs_owned_objects_by_js_handle[<any>js_handle];
     if (is_jsv_handle(js_handle))
@@ -69,7 +70,7 @@ export function mono_wasm_get_jsobj_from_js_handle(js_handle: JSHandle): any {
     return null;
 }
 
-export function mono_wasm_get_js_handle(js_obj: any): JSHandle {
+export function mono_wasm_get_js_handle (js_obj: any): JSHandle {
     assert_js_interop();
     if (js_obj[cs_owned_js_handle_symbol]) {
         return js_obj[cs_owned_js_handle_symbol];
@@ -90,7 +91,7 @@ export function mono_wasm_get_js_handle(js_obj: any): JSHandle {
     return js_handle as JSHandle;
 }
 
-export function register_with_jsv_handle(js_obj: any, jsv_handle: JSHandle) {
+export function register_with_jsv_handle (js_obj: any, jsv_handle: JSHandle) {
     assert_js_interop();
     // note _cs_owned_objects_by_js_handle is list, not Map. That's why we maintain _js_handle_free_list.
     _cs_owned_objects_by_jsv_handle[0 - <any>jsv_handle] = js_obj;
@@ -101,14 +102,13 @@ export function register_with_jsv_handle(js_obj: any, jsv_handle: JSHandle) {
 }
 
 // note: in MT, this is called from locked JSProxyContext. Don't call anything that would need locking.
-export function mono_wasm_release_cs_owned_object(js_handle: JSHandle): void {
+export function mono_wasm_release_cs_owned_object (js_handle: JSHandle): void {
     let obj: any;
     if (is_js_handle(js_handle)) {
         obj = _cs_owned_objects_by_js_handle[<any>js_handle];
         _cs_owned_objects_by_js_handle[<any>js_handle] = undefined;
         _js_handle_free_list.push(js_handle);
-    }
-    else if (is_jsv_handle(js_handle)) {
+    } else if (is_jsv_handle(js_handle)) {
         obj = _cs_owned_objects_by_jsv_handle[0 - <any>js_handle];
         _cs_owned_objects_by_jsv_handle[0 - <any>js_handle] = undefined;
         // see free list in JSProxyContext.FreeJSVHandle
@@ -119,7 +119,7 @@ export function mono_wasm_release_cs_owned_object(js_handle: JSHandle): void {
     }
 }
 
-export function setup_managed_proxy(owner: any, gc_handle: GCHandle): void {
+export function setup_managed_proxy (owner: any, gc_handle: GCHandle): void {
     assert_js_interop();
     // keep the gc_handle so that we could easily convert it back to original C# object for roundtrip
     owner[js_owned_gc_handle_symbol] = gc_handle;
@@ -136,7 +136,15 @@ export function setup_managed_proxy(owner: any, gc_handle: GCHandle): void {
     _js_owned_object_table.set(gc_handle, wr);
 }
 
-export function teardown_managed_proxy(owner: any, gc_handle: GCHandle, skipManaged?: boolean): void {
+export function upgrade_managed_proxy_to_strong_ref (owner: any, gc_handle: GCHandle): void {
+    const sr = create_strong_ref(owner);
+    if (_use_finalization_registry) {
+        _js_owned_object_registry.unregister(owner);
+    }
+    _js_owned_object_table.set(gc_handle, sr);
+}
+
+export function teardown_managed_proxy (owner: any, gc_handle: GCHandle, skipManaged?: boolean): void {
     assert_js_interop();
     // The JS object associated with this gc_handle has been collected by the JS GC.
     // As such, it's not possible for this gc_handle to be invoked by JS anymore, so
@@ -151,8 +159,8 @@ export function teardown_managed_proxy(owner: any, gc_handle: GCHandle, skipMana
         }
     }
     if (gc_handle !== GCHandleNull && _js_owned_object_table.delete(gc_handle) && !skipManaged) {
-        if (loaderHelpers.is_runtime_running()) {
-            runtimeHelpers.javaScriptExports.release_js_owned_object_by_gc_handle(gc_handle);
+        if (loaderHelpers.is_runtime_running() && !force_dispose_proxies_in_progress) {
+            release_js_owned_object_by_gc_handle(gc_handle);
         }
     }
     if (is_gcv_handle(gc_handle)) {
@@ -160,13 +168,13 @@ export function teardown_managed_proxy(owner: any, gc_handle: GCHandle, skipMana
     }
 }
 
-export function assert_not_disposed(result: any): GCHandle {
+export function assert_not_disposed (result: any): GCHandle {
     const gc_handle = result[js_owned_gc_handle_symbol];
     mono_check(gc_handle != GCHandleNull, "ObjectDisposedException");
     return gc_handle;
 }
 
-function _js_owned_object_finalized(gc_handle: GCHandle): void {
+function _js_owned_object_finalized (gc_handle: GCHandle): void {
     if (!loaderHelpers.is_runtime_running()) {
         // We're shutting down, so don't bother doing anything else.
         return;
@@ -174,7 +182,7 @@ function _js_owned_object_finalized(gc_handle: GCHandle): void {
     teardown_managed_proxy(null, gc_handle);
 }
 
-export function _lookup_js_owned_object(gc_handle: GCHandle): any {
+export function _lookup_js_owned_object (gc_handle: GCHandle): any {
     if (!gc_handle)
         return null;
     const wr = _js_owned_object_table.get(gc_handle);
@@ -186,7 +194,7 @@ export function _lookup_js_owned_object(gc_handle: GCHandle): any {
     return null;
 }
 
-export function assertNoProxies(): void {
+export function assertNoProxies (): void {
     if (!WasmEnableThreads) return;
     mono_assert(_js_owned_object_table.size === 0, "There should be no proxies on this thread.");
     mono_assert(_cs_owned_objects_by_js_handle.length === 1, "There should be no proxies on this thread.");
@@ -195,11 +203,14 @@ export function assertNoProxies(): void {
     mono_assert(js_import_wrapper_by_fn_handle.length === 1, "There should be no imports on this thread.");
 }
 
+let force_dispose_proxies_in_progress = false;
+
 // when we arrive here from UninstallWebWorkerInterop, the C# will unregister the handles too.
 // when called from elsewhere, C# side could be unbalanced!!
-export function forceDisposeProxies(disposeMethods: boolean, verbose: boolean): void {
+export function forceDisposeProxies (disposeMethods: boolean, verbose: boolean): void {
     let keepSomeCsAlive = false;
     let keepSomeJsAlive = false;
+    force_dispose_proxies_in_progress = true;
 
     let doneImports = 0;
     let doneExports = 0;
@@ -324,4 +335,4 @@ export function forceDisposeProxies(disposeMethods: boolean, verbose: boolean):
         exportsByAssembly.clear();
     }
     mono_log_info(`forceDisposeProxies done: ${doneImports} imports, ${doneExports} exports, ${doneGCHandles} GCHandles, ${doneJSHandles} JSHandles.`);
-}
\ No newline at end of file
+}
diff --git a/src/mono/browser/runtime/gc-lock.ts b/src/mono/browser/runtime/gc-lock.ts
index 876e6a2eb586..f787c4710bab 100644
--- a/src/mono/browser/runtime/gc-lock.ts
+++ b/src/mono/browser/runtime/gc-lock.ts
@@ -1,11 +1,17 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
 import WasmEnableThreads from "consts:wasmEnableThreads";
 import { ENVIRONMENT_IS_PTHREAD } from "./globals";
 import cwraps from "./cwraps";
 
-let locked = false;
+export let gc_locked = false;
+
+// TODO https://github.com/dotnet/runtime/issues/100411
+// after Blazor stops using mono_wasm_gc_lock, mono_wasm_gc_unlock
 
-export function mono_wasm_gc_lock(): void {
-    if (locked) {
+export function mono_wasm_gc_lock (): void {
+    if (gc_locked) {
         throw new Error("GC is already locked");
     }
     if (WasmEnableThreads) {
@@ -14,11 +20,11 @@ export function mono_wasm_gc_lock(): void {
         }
         cwraps.mono_wasm_gc_lock();
     }
-    locked = true;
+    gc_locked = true;
 }
 
-export function mono_wasm_gc_unlock(): void {
-    if (!locked) {
+export function mono_wasm_gc_unlock (): void {
+    if (!gc_locked) {
         throw new Error("GC is not locked");
     }
     if (WasmEnableThreads) {
@@ -27,5 +33,5 @@ export function mono_wasm_gc_unlock(): void {
         }
         cwraps.mono_wasm_gc_unlock();
     }
-    locked = false;
+    gc_locked = false;
 }
diff --git a/src/mono/browser/runtime/globals.ts b/src/mono/browser/runtime/globals.ts
index be2ae8f86bee..047fcb60c950 100644
--- a/src/mono/browser/runtime/globals.ts
+++ b/src/mono/browser/runtime/globals.ts
@@ -9,7 +9,7 @@
 import gitHash from "consts:gitHash";
 
 import { RuntimeAPI } from "./types/index";
-import type { GlobalObjects, EmscriptenInternals, RuntimeHelpers, LoaderHelpers, DotnetModuleInternal, PromiseAndController, EmscriptenBuildOptions } from "./types/internal";
+import type { GlobalObjects, EmscriptenInternals, RuntimeHelpers, LoaderHelpers, DotnetModuleInternal, PromiseAndController, EmscriptenBuildOptions, GCHandle } from "./types/internal";
 import { mono_log_error } from "./logging";
 
 // these are our public API (except internal)
@@ -32,7 +32,7 @@ export let loaderHelpers: LoaderHelpers = null as any;
 
 export let _runtimeModuleLoaded = false; // please keep it in place also as rollup guard
 
-export function passEmscriptenInternals(internals: EmscriptenInternals, emscriptenBuildOptions: EmscriptenBuildOptions): void {
+export function passEmscriptenInternals (internals: EmscriptenInternals, emscriptenBuildOptions: EmscriptenBuildOptions): void {
     runtimeHelpers.emscriptenBuildOptions = emscriptenBuildOptions;
 
     ENVIRONMENT_IS_PTHREAD = internals.isPThread;
@@ -44,7 +44,7 @@ export function passEmscriptenInternals(internals: EmscriptenInternals, emscript
 }
 
 // NOTE: this is called AFTER the config is loaded
-export function setRuntimeGlobals(globalObjects: GlobalObjects) {
+export function setRuntimeGlobals (globalObjects: GlobalObjects) {
     if (_runtimeModuleLoaded) {
         throw new Error("Runtime module already loaded");
     }
@@ -55,7 +55,7 @@ export function setRuntimeGlobals(globalObjects: GlobalObjects) {
     loaderHelpers = globalObjects.loaderHelpers;
     exportedRuntimeAPI = globalObjects.api;
 
-    Object.assign(runtimeHelpers, {
+    const rh: Partial<RuntimeHelpers> = {
         gitHash,
         allAssetsInMemory: createPromiseController<void>(),
         dotnetReady: createPromiseController<any>(),
@@ -64,15 +64,18 @@ export function setRuntimeGlobals(globalObjects: GlobalObjects) {
         afterPreInit: createPromiseController<void>(),
         afterPreRun: createPromiseController<void>(),
         beforeOnRuntimeInitialized: createPromiseController<void>(),
+        afterMonoStarted: createPromiseController<GCHandle | undefined>(),
+        afterIOStarted: createPromiseController<void>(),
         afterOnRuntimeInitialized: createPromiseController<void>(),
         afterPostRun: createPromiseController<void>(),
-        mono_wasm_exit: () => {
-            throw new Error("Mono shutdown");
+        nativeAbort: (reason: any) => {
+            throw reason || new Error("abort");
         },
-        abort: (reason: any) => {
-            throw reason;
-        }
-    });
+        nativeExit: (code: number) => {
+            throw new Error("exit:" + code);
+        },
+    };
+    Object.assign(runtimeHelpers, rh);
 
     Object.assign(globalObjects.module.config!, {}) as any;
     Object.assign(globalObjects.api, {
@@ -83,14 +86,14 @@ export function setRuntimeGlobals(globalObjects: GlobalObjects) {
     });
 }
 
-export function createPromiseController<T>(afterResolve?: () => void, afterReject?: () => void): PromiseAndController<T> {
+export function createPromiseController<T> (afterResolve?: () => void, afterReject?: () => void): PromiseAndController<T> {
     return loaderHelpers.createPromiseController<T>(afterResolve, afterReject);
 }
 
 // this will abort the program if the condition is false
 // see src\mono\browser\runtime\rollup.config.js
 // we inline the condition, because the lambda could allocate closure on hot path otherwise
-export function mono_assert(condition: unknown, messageFactory: string | (() => string)): asserts condition {
+export function mono_assert (condition: unknown, messageFactory: string | (() => string)): asserts condition {
     if (condition) return;
     const message = "Assert failed: " + (typeof messageFactory === "function"
         ? messageFactory()
diff --git a/src/mono/browser/runtime/guarded-promise.ts b/src/mono/browser/runtime/guarded-promise.ts
index fe46ca43521b..8901403b758b 100644
--- a/src/mono/browser/runtime/guarded-promise.ts
+++ b/src/mono/browser/runtime/guarded-promise.ts
@@ -3,7 +3,7 @@
 
 /// A Promise<T> that guards against multiple-resolve, multiple-reject, reject-after-accept and accept-after-reject.
 class GuardedPromise<T> extends Promise<T> {
-    constructor(executor: (resolve: (value: T | PromiseLike<T>) => void, reject: (reason?: any) => void) => void) {
+    constructor (executor: (resolve: (value: T | PromiseLike<T>) => void, reject: (reason?: any) => void) => void) {
         super((resolve, reject) => {
             let resolved = false;
             let rejected = false;
diff --git a/src/mono/browser/runtime/http.ts b/src/mono/browser/runtime/http.ts
index 9f606f481d39..9591c3c86fed 100644
--- a/src/mono/browser/runtime/http.ts
+++ b/src/mono/browser/runtime/http.ts
@@ -11,7 +11,7 @@ import type { VoidPtr } from "./types/emscripten";
 import { ControllablePromise } from "./types/internal";
 
 
-function verifyEnvironment() {
+function verifyEnvironment () {
     if (typeof globalThis.fetch !== "function" || typeof globalThis.AbortController !== "function") {
         const message = ENVIRONMENT_IS_NODE
             ? "Please install `node-fetch` and `node-abort-controller` npm packages to enable HTTP client support. See also https://aka.ms/dotnet-wasm-features"
@@ -20,12 +20,16 @@ function verifyEnvironment() {
     }
 }
 
-function commonAsserts(controller: HttpController) {
+function commonAsserts (controller: HttpController) {
     assert_js_interop();
     mono_assert(controller, "expected controller");
 }
 
-export function http_wasm_supports_streaming_request(): boolean {
+let http_wasm_supports_streaming_request_cached: boolean | undefined;
+export function http_wasm_supports_streaming_request (): boolean {
+    if (http_wasm_supports_streaming_request_cached !== undefined) {
+        return http_wasm_supports_streaming_request_cached;
+    }
     // Detecting streaming request support works like this:
     // If the browser doesn't support a particular body type, it calls toString() on the object and uses the result as the body.
     // So, if the browser doesn't support request streams, the request body becomes the string "[object ReadableStream]".
@@ -38,21 +42,28 @@ export function http_wasm_supports_streaming_request(): boolean {
         const hasContentType = new Request("", {
             body: new ReadableStream(),
             method: "POST",
-            get duplex() {
+            get duplex () {
                 duplexAccessed = true;
                 return "half";
             },
         } as RequestInit /* https://github.com/microsoft/TypeScript-DOM-lib-generator/issues/1483 */).headers.has("Content-Type");
-        return duplexAccessed && !hasContentType;
+        http_wasm_supports_streaming_request_cached = duplexAccessed && !hasContentType;
+    } else {
+        http_wasm_supports_streaming_request_cached = false;
     }
-    return false;
+    return http_wasm_supports_streaming_request_cached;
 }
 
-export function http_wasm_supports_streaming_response(): boolean {
-    return typeof Response !== "undefined" && "body" in Response.prototype && typeof ReadableStream === "function";
+let http_wasm_supports_streaming_response_cached: boolean | undefined;
+export function http_wasm_supports_streaming_response (): boolean {
+    if (http_wasm_supports_streaming_response_cached !== undefined) {
+        return http_wasm_supports_streaming_response_cached;
+    }
+    http_wasm_supports_streaming_response_cached = typeof Response !== "undefined" && "body" in Response.prototype && typeof ReadableStream === "function";
+    return http_wasm_supports_streaming_response_cached;
 }
 
-export function http_wasm_create_controller(): HttpController {
+export function http_wasm_create_controller (): HttpController {
     verifyEnvironment();
     assert_js_interop();
     const controller: HttpController = {
@@ -61,21 +72,21 @@ export function http_wasm_create_controller(): HttpController {
     return controller;
 }
 
-export function http_wasm_abort_request(controller: HttpController): void {
+export function http_wasm_abort_request (controller: HttpController): void {
     try {
         if (controller.streamWriter) {
             controller.streamWriter.abort();
         }
-    }
-    catch (err) {
+    } catch (err) {
         // ignore
     }
     http_wasm_abort_response(controller);
 }
 
-export function http_wasm_abort_response(controller: HttpController): void {
+export function http_wasm_abort_response (controller: HttpController): void {
     if (BuildConfiguration === "Debug") commonAsserts(controller);
     try {
+        controller.isAborted = true;
         if (controller.streamReader) {
             controller.streamReader.cancel().catch((err) => {
                 if (err && err.name !== "AbortError") {
@@ -85,13 +96,12 @@ export function http_wasm_abort_response(controller: HttpController): void {
             });
         }
         controller.abortController.abort();
-    }
-    catch (err) {
+    } catch (err) {
         // ignore
     }
 }
 
-export function http_wasm_transform_stream_write(controller: HttpController, bufferPtr: VoidPtr, bufferLength: number): ControllablePromise<void> {
+export function http_wasm_transform_stream_write (controller: HttpController, bufferPtr: VoidPtr, bufferLength: number): ControllablePromise<void> {
     if (BuildConfiguration === "Debug") commonAsserts(controller);
     mono_assert(bufferLength > 0, "expected bufferLength > 0");
     // the bufferPtr is pinned by the caller
@@ -106,7 +116,7 @@ export function http_wasm_transform_stream_write(controller: HttpController, buf
     });
 }
 
-export function http_wasm_transform_stream_close(controller: HttpController): ControllablePromise<void> {
+export function http_wasm_transform_stream_close (controller: HttpController): ControllablePromise<void> {
     mono_assert(controller, "expected controller");
     return wrap_as_cancelable_promise(async () => {
         mono_assert(controller.streamWriter, "expected streamWriter");
@@ -117,7 +127,7 @@ export function http_wasm_transform_stream_close(controller: HttpController): Co
     });
 }
 
-export function http_wasm_fetch_stream(controller: HttpController, url: string, header_names: string[], header_values: string[], option_names: string[], option_values: any[]): ControllablePromise<void> {
+export function http_wasm_fetch_stream (controller: HttpController, url: string, header_names: string[], header_values: string[], option_names: string[], option_values: any[]): ControllablePromise<void> {
     if (BuildConfiguration === "Debug") commonAsserts(controller);
     const transformStream = new TransformStream<Uint8Array, Uint8Array>();
     controller.streamWriter = transformStream.writable.getWriter();
@@ -125,7 +135,7 @@ export function http_wasm_fetch_stream(controller: HttpController, url: string,
     return fetch_promise;
 }
 
-export function http_wasm_fetch_bytes(controller: HttpController, url: string, header_names: string[], header_values: string[], option_names: string[], option_values: any[], bodyPtr: VoidPtr, bodyLength: number): ControllablePromise<void> {
+export function http_wasm_fetch_bytes (controller: HttpController, url: string, header_names: string[], header_values: string[], option_names: string[], option_values: any[], bodyPtr: VoidPtr, bodyLength: number): ControllablePromise<void> {
     if (BuildConfiguration === "Debug") commonAsserts(controller);
     // the bodyPtr is pinned by the caller
     const view = new Span(bodyPtr, bodyLength, MemoryViewType.Byte);
@@ -133,7 +143,7 @@ export function http_wasm_fetch_bytes(controller: HttpController, url: string, h
     return http_wasm_fetch(controller, url, header_names, header_values, option_names, option_values, copy);
 }
 
-export function http_wasm_fetch(controller: HttpController, url: string, header_names: string[], header_values: string[], option_names: string[], option_values: any[], body: Uint8Array | ReadableStream | null): ControllablePromise<void> {
+export function http_wasm_fetch (controller: HttpController, url: string, header_names: string[], header_values: string[], option_names: string[], option_values: any[], body: Uint8Array | ReadableStream | null): ControllablePromise<void> {
     if (BuildConfiguration === "Debug") commonAsserts(controller);
     verifyEnvironment();
     assert_js_interop();
@@ -179,30 +189,30 @@ export function http_wasm_fetch(controller: HttpController, url: string, header_
     return controller.responsePromise;
 }
 
-export function http_wasm_get_response_type(controller: HttpController): string | undefined {
+export function http_wasm_get_response_type (controller: HttpController): string | undefined {
     if (BuildConfiguration === "Debug") commonAsserts(controller);
     return controller.response?.type;
 }
 
-export function http_wasm_get_response_status(controller: HttpController): number {
+export function http_wasm_get_response_status (controller: HttpController): number {
     if (BuildConfiguration === "Debug") commonAsserts(controller);
     return controller.response?.status ?? 0;
 }
 
 
-export function http_wasm_get_response_header_names(controller: HttpController): string[] {
+export function http_wasm_get_response_header_names (controller: HttpController): string[] {
     if (BuildConfiguration === "Debug") commonAsserts(controller);
     mono_assert(controller.responseHeaderNames, "expected responseHeaderNames");
     return controller.responseHeaderNames;
 }
 
-export function http_wasm_get_response_header_values(controller: HttpController): string[] {
+export function http_wasm_get_response_header_values (controller: HttpController): string[] {
     if (BuildConfiguration === "Debug") commonAsserts(controller);
     mono_assert(controller.responseHeaderValues, "expected responseHeaderValues");
     return controller.responseHeaderValues;
 }
 
-export function http_wasm_get_response_length(controller: HttpController): ControllablePromise<number> {
+export function http_wasm_get_response_length (controller: HttpController): ControllablePromise<number> {
     if (BuildConfiguration === "Debug") commonAsserts(controller);
     return wrap_as_cancelable_promise(async () => {
         const buffer = await controller.response!.arrayBuffer();
@@ -212,7 +222,7 @@ export function http_wasm_get_response_length(controller: HttpController): Contr
     });
 }
 
-export function http_wasm_get_response_bytes(controller: HttpController, view: Span): number {
+export function http_wasm_get_response_bytes (controller: HttpController, view: Span): number {
     mono_assert(controller, "expected controller");
     mono_assert(controller.responseBuffer, "expected resoved arrayBuffer");
     mono_assert(controller.currentBufferOffset != undefined, "expected currentBufferOffset");
@@ -226,7 +236,7 @@ export function http_wasm_get_response_bytes(controller: HttpController, view: S
     return bytes_read;
 }
 
-export function http_wasm_get_streamed_response_bytes(controller: HttpController, bufferPtr: VoidPtr, bufferLength: number): ControllablePromise<number> {
+export function http_wasm_get_streamed_response_bytes (controller: HttpController, bufferPtr: VoidPtr, bufferLength: number): ControllablePromise<number> {
     if (BuildConfiguration === "Debug") commonAsserts(controller);
     // the bufferPtr is pinned by the caller
     const view = new Span(bufferPtr, bufferLength, MemoryViewType.Byte);
@@ -240,6 +250,9 @@ export function http_wasm_get_streamed_response_bytes(controller: HttpController
             controller.currentBufferOffset = 0;
         }
         if (controller.currentStreamReaderChunk.done) {
+            if (controller.isAborted) {
+                throw new Error("OperationCanceledException");
+            }
             return 0;
         }
 
@@ -260,6 +273,7 @@ export function http_wasm_get_streamed_response_bytes(controller: HttpController
 
 interface HttpController {
     abortController: AbortController
+    isAborted?: boolean
 
     // streaming request
     streamReader?: ReadableStreamDefaultReader<Uint8Array>
diff --git a/src/mono/browser/runtime/hybrid-globalization/calendar.ts b/src/mono/browser/runtime/hybrid-globalization/calendar.ts
index 002c76da1be1..d3944f514ce8 100644
--- a/src/mono/browser/runtime/hybrid-globalization/calendar.ts
+++ b/src/mono/browser/runtime/hybrid-globalization/calendar.ts
@@ -6,7 +6,7 @@ import { mono_wasm_new_external_root } from "../roots";
 import { monoStringToString, stringToUTF16 } from "../strings";
 import { MonoObject, MonoObjectRef, MonoString, MonoStringRef } from "../types/internal";
 import { Int32Ptr } from "../types/emscripten";
-import { wrap_error_root, wrap_no_error_root } from "../invoke-js";
+import { wrap_error_root, wrap_no_error_root } from "./helpers";
 import { INNER_SEPARATOR, OUTER_SEPARATOR, normalizeSpaces } from "./helpers";
 
 const MONTH_CODE = "MMMM";
@@ -14,8 +14,7 @@ const YEAR_CODE = "yyyy";
 const DAY_CODE = "d";
 
 // this function joins all calendar info with OUTER_SEPARATOR into one string and returns it back to managed code
-export function mono_wasm_get_calendar_info(culture: MonoStringRef, calendarId: number, dst: number, dstLength: number, isException: Int32Ptr, exAddress: MonoObjectRef): number
-{
+export function mono_wasm_get_calendar_info (culture: MonoStringRef, calendarId: number, dst: number, dstLength: number, isException: Int32Ptr, exAddress: MonoObjectRef): number {
     const cultureRoot = mono_wasm_new_external_root<MonoString>(culture),
         exceptionRoot = mono_wasm_new_external_root<MonoObject>(exAddress);
     try {
@@ -57,56 +56,47 @@ export function mono_wasm_get_calendar_info(culture: MonoStringRef, calendarId:
         calendarInfo.AbbreviatedEraNames = eraNames.abbreviatedEraNames;
 
         const result = Object.values(calendarInfo).join(OUTER_SEPARATOR);
-        if (result.length > dstLength)
-        {
+        if (result.length > dstLength) {
             throw new Error(`Calendar info exceeds length of ${dstLength}.`);
         }
         stringToUTF16(dst, dst + 2 * result.length, result);
         wrap_no_error_root(isException, exceptionRoot);
         return result.length;
-    }
-    catch (ex: any) {
+    } catch (ex: any) {
         wrap_error_root(isException, ex, exceptionRoot);
         return -1;
-    }
-    finally {
+    } finally {
         cultureRoot.release();
         exceptionRoot.release();
     }
 }
 
-function getCalendarName(locale: any){
+function getCalendarName (locale: any) {
     const calendars = getCalendarInfo(locale);
     if (!calendars || calendars.length == 0)
         return "";
     return calendars[0];
 }
 
-function getCalendarInfo(locale: string)
-{
+function getCalendarInfo (locale: string) {
     try {
         // most tools have it implemented as a property
         return (new Intl.Locale(locale) as any).calendars;
-    }
-    catch {
+    } catch {
         try {
             // but a few use methods, which is the preferred way
             return (new Intl.Locale(locale) as any).getCalendars();
-        }
-        catch
-        {
+        } catch {
             return undefined;
         }
     }
 }
 
-function getMonthYearPattern(locale: string | undefined, date: Date): string
-{
+function getMonthYearPattern (locale: string | undefined, date: Date): string {
     let pattern = date.toLocaleDateString(locale, { year: "numeric", month: "long" }).toLowerCase();
     // pattern has month name as string or as number
     const monthName = date.toLocaleString(locale, { month: "long" }).toLowerCase().trim();
-    if (monthName.charAt(monthName.length - 1) == "\u6708")
-    {
+    if (monthName.charAt(monthName.length - 1) == "\u6708") {
         // Chineese-like patterns:
         return "yyyy\u5e74M\u6708";
     }
@@ -118,13 +108,11 @@ function getMonthYearPattern(locale: string | undefined, date: Date): string
     return pattern.replace(yearStr, YEAR_CODE);
 }
 
-function getMonthDayPattern(locale: string | undefined, date: Date): string
-{
-    let pattern = date.toLocaleDateString(locale, { month: "long", day: "numeric"}).toLowerCase();
+function getMonthDayPattern (locale: string | undefined, date: Date): string {
+    let pattern = date.toLocaleDateString(locale, { month: "long", day: "numeric" }).toLowerCase();
     // pattern has month name as string or as number
     const monthName = date.toLocaleString(locale, { month: "long" }).toLowerCase().trim();
-    if (monthName.charAt(monthName.length - 1) == "\u6708")
-    {
+    if (monthName.charAt(monthName.length - 1) == "\u6708") {
         // Chineese-like patterns:
         return "M\u6708d\u65e5";
     }
@@ -136,10 +124,8 @@ function getMonthDayPattern(locale: string | undefined, date: Date): string
     return pattern.replace(dayStr, DAY_CODE);
 }
 
-function getShortDatePattern(locale: string | undefined): string
-{
-    if (locale?.substring(0, 2) == "fa")
-    {
+function getShortDatePattern (locale: string | undefined): string {
+    if (locale?.substring(0, 2) == "fa") {
         // persian calendar is shifted and it has no lapping dates with
         // arabic and gregorian calendars, so that both day and month would be < 10
         return "yyyy/M/d";
@@ -154,18 +140,15 @@ function getShortDatePattern(locale: string | undefined): string
     const shortMonthStr = "1";
     const longDayStr = "02";
     const shortDayStr = "2";
-    let pattern = date.toLocaleDateString(locale, {dateStyle: "short"});
+    let pattern = date.toLocaleDateString(locale, { dateStyle: "short" });
     // each date part might be in localized numbers or standard arabic numbers
     // toLocaleDateString returns not compatible data,
     // e.g. { dateStyle: "short" } sometimes contains localized year number
     // while { year: "numeric" } contains non-localized year number and vice versa
-    if (pattern.includes(shortYearStr))
-    {
+    if (pattern.includes(shortYearStr)) {
         pattern = pattern.replace(longYearStr, YEAR_CODE);
         pattern = pattern.replace(shortYearStr, YEAR_CODE);
-    }
-    else
-    {
+    } else {
         const yearStr = date.toLocaleDateString(locale, { year: "numeric" });
         const yearStrShort = yearStr.substring(yearStr.length - 2, yearStr.length);
         pattern = pattern.replace(yearStr, YEAR_CODE);
@@ -173,25 +156,19 @@ function getShortDatePattern(locale: string | undefined): string
             pattern = pattern.replace(yearStrShort, YEAR_CODE);
     }
 
-    if (pattern.includes(shortMonthStr))
-    {
+    if (pattern.includes(shortMonthStr)) {
         pattern = pattern.replace(longMonthStr, "MM");
         pattern = pattern.replace(shortMonthStr, "M");
-    }
-    else
-    {
+    } else {
         const monthStr = date.toLocaleDateString(locale, { month: "numeric" });
         const localizedMonthCode = monthStr.length == 1 ? "M" : "MM";
         pattern = pattern.replace(monthStr, localizedMonthCode);
     }
 
-    if (pattern.includes(shortDayStr))
-    {
+    if (pattern.includes(shortDayStr)) {
         pattern = pattern.replace(longDayStr, "dd");
         pattern = pattern.replace(shortDayStr, "d");
-    }
-    else
-    {
+    } else {
         const dayStr = date.toLocaleDateString(locale, { day: "numeric" });
         const localizedDayCode = dayStr.length == 1 ? "d" : "dd";
         pattern = pattern.replace(dayStr, localizedDayCode);
@@ -199,27 +176,22 @@ function getShortDatePattern(locale: string | undefined): string
     return normalizeSpaces(pattern);
 }
 
-function getLongDatePattern(locale: string | undefined, date: Date): string
-{
-    if (locale == "th-TH")
-    {
+function getLongDatePattern (locale: string | undefined, date: Date): string {
+    if (locale == "th-TH") {
         // cannot be caught with regexes
         return "ddddที่ d MMMM g yyyy";
     }
-    let pattern = new Intl.DateTimeFormat(locale, { weekday: "long", year: "numeric", month: "long", day: "numeric"}).format(date).toLowerCase();
+    let pattern = new Intl.DateTimeFormat(locale, { weekday: "long", year: "numeric", month: "long", day: "numeric" }).format(date).toLowerCase();
     const monthName = date.toLocaleString(locale, { month: "long" }).trim().toLowerCase();
 
     // pattern has month name as string or as number
     const monthSuffix = monthName.charAt(monthName.length - 1);
-    if (monthSuffix == "\u6708" || monthSuffix == "\uc6d4")
-    {
+    if (monthSuffix == "\u6708" || monthSuffix == "\uc6d4") {
         // Asian-like patterns:
         const shortMonthName = date.toLocaleString(locale, { month: "short" });
         pattern = pattern.replace(shortMonthName, `M${monthSuffix}`);
-    }
-    else
-    {
-        const replacedMonthName = getGenitiveForName(date, pattern, monthName, new Intl.DateTimeFormat(locale, { weekday: "long", year: "numeric", day: "numeric"}));
+    } else {
+        const replacedMonthName = getGenitiveForName(date, pattern, monthName, new Intl.DateTimeFormat(locale, { weekday: "long", year: "numeric", day: "numeric" }));
         pattern = pattern.replace(replacedMonthName, MONTH_CODE);
     }
     pattern = pattern.replace("999", YEAR_CODE);
@@ -228,7 +200,7 @@ function getLongDatePattern(locale: string | undefined, date: Date): string
     const yearStr = date.toLocaleDateString(locale, { year: "numeric" });
     pattern = pattern.replace(yearStr, YEAR_CODE);
     const weekday = date.toLocaleDateString(locale, { weekday: "long" }).toLowerCase();
-    const replacedWeekday = getGenitiveForName(date, pattern, weekday, new Intl.DateTimeFormat(locale, { year: "numeric", month: "long", day: "numeric"}));
+    const replacedWeekday = getGenitiveForName(date, pattern, weekday, new Intl.DateTimeFormat(locale, { year: "numeric", month: "long", day: "numeric" }));
     pattern = pattern.replace(replacedWeekday, "dddd");
     pattern = pattern.replace("22", DAY_CODE);
     const dayStr = date.toLocaleDateString(locale, { day: "numeric" }); // should we replace it for localized digits?
@@ -236,14 +208,12 @@ function getLongDatePattern(locale: string | undefined, date: Date): string
     return pattern.replace(dayStr, DAY_CODE);
 }
 
-function getGenitiveForName(date: Date, pattern: string, name: string, formatWithoutName: Intl.DateTimeFormat)
-{
+function getGenitiveForName (date: Date, pattern: string, name: string, formatWithoutName: Intl.DateTimeFormat) {
     let genitiveName = name;
     const nameStart = pattern.indexOf(name);
     if (nameStart == -1 ||
         // genitive month name can include monthName and monthName can include spaces, e.g. "tháng 11":, so we cannot use pattern.includes() or pattern.split(" ").includes()
-        (nameStart != -1 && pattern.length > nameStart + name.length && pattern[nameStart + name.length] != " " && pattern[nameStart + name.length] != "," && pattern[nameStart + name.length] != "\u060c"))
-    {
+        (nameStart != -1 && pattern.length > nameStart + name.length && pattern[nameStart + name.length] != " " && pattern[nameStart + name.length] != "," && pattern[nameStart + name.length] != "\u060c")) {
         // needs to be in Genitive form to be useful
         // e.g.
         // pattern = '999 m. lapkričio 22 d., šeštadienis',
@@ -256,24 +226,21 @@ function getGenitiveForName(date: Date, pattern: string, name: string, formatWit
     return genitiveName;
 }
 
-function getDayNames(locale: string | undefined) : { long: string[], abbreviated: string[], shortest: string[] }
-{
+function getDayNames (locale: string | undefined) : { long: string[], abbreviated: string[], shortest: string[] } {
     const weekDay = new Date(2023, 5, 25); // Sunday
     const dayNames = [];
     const dayNamesAbb = [];
     const dayNamesSS = [];
-    for(let i=0; i<7; i++)
-    {
+    for (let i = 0; i < 7; i++) {
         dayNames[i] = weekDay.toLocaleDateString(locale, { weekday: "long" });
         dayNamesAbb[i] = weekDay.toLocaleDateString(locale, { weekday: "short" });
         dayNamesSS[i] = weekDay.toLocaleDateString(locale, { weekday: "narrow" });
         weekDay.setDate(weekDay.getDate() + 1);
     }
-    return {long: dayNames, abbreviated: dayNamesAbb, shortest: dayNamesSS };
+    return { long: dayNames, abbreviated: dayNamesAbb, shortest: dayNamesSS };
 }
 
-function getMonthNames(locale: string | undefined) : { long: string[], abbreviated: string[], longGenitive: string[], abbreviatedGenitive: string[] }
-{
+function getMonthNames (locale: string | undefined) : { long: string[], abbreviated: string[], longGenitive: string[], abbreviatedGenitive: string[] } {
     // some calendars have the first month on non-0 index in JS
     // first month: Muharram ("ar") or Farwardin ("fa") or January
     const localeLang = locale ? locale.split("-")[0] : "";
@@ -284,8 +251,7 @@ function getMonthNames(locale: string | undefined) : { long: string[], abbreviat
     const monthsGen: string[] = [];
     const monthsAbbGen: string[] = [];
     let isChineeseStyle, isShortFormBroken;
-    for(let i = firstMonthShift; i < 12 + firstMonthShift; i++)
-    {
+    for (let i = firstMonthShift; i < 12 + firstMonthShift; i++) {
         const monthCnt = i % 12;
         date.setMonth(monthCnt);
 
@@ -295,36 +261,32 @@ function getMonthNames(locale: string | undefined) : { long: string[], abbreviat
         monthsAbb[i - firstMonthShift] = monthNameShort;
         // for Genitive forms:
         isChineeseStyle = isChineeseStyle ?? monthNameLong.charAt(monthNameLong.length - 1) == "\u6708";
-        if (isChineeseStyle)
-        {
+        if (isChineeseStyle) {
             // for Chinese-like calendar's Genitive = Nominative
             monthsGen[i - firstMonthShift] = monthNameLong;
             monthsAbbGen[i - firstMonthShift] = monthNameShort;
             continue;
         }
         const formatWithoutMonthName = new Intl.DateTimeFormat(locale, { day: "numeric" });
-        const monthWithDayLong = date.toLocaleDateString(locale, { month: "long", day: "numeric"});
+        const monthWithDayLong = date.toLocaleDateString(locale, { month: "long", day: "numeric" });
         monthsGen[i - firstMonthShift] = getGenitiveForName(date, monthWithDayLong, monthNameLong, formatWithoutMonthName);
         isShortFormBroken = isShortFormBroken ?? /^\d+$/.test(monthNameShort);
-        if (isShortFormBroken)
-        {
+        if (isShortFormBroken) {
             // for buggy locales e.g. lt-LT, short month contains only number instead of string
             // we leave Genitive = Nominative
             monthsAbbGen[i - firstMonthShift] = monthNameShort;
             continue;
         }
-        const monthWithDayShort = date.toLocaleDateString(locale, { month: "short", day: "numeric"});
+        const monthWithDayShort = date.toLocaleDateString(locale, { month: "short", day: "numeric" });
         monthsAbbGen[i - firstMonthShift] = getGenitiveForName(date, monthWithDayShort, monthNameShort, formatWithoutMonthName);
     }
-    return {long: months, abbreviated: monthsAbb, longGenitive: monthsGen, abbreviatedGenitive: monthsAbbGen };
+    return { long: months, abbreviated: monthsAbb, longGenitive: monthsGen, abbreviatedGenitive: monthsAbbGen };
 }
 
 // .NET expects that only the Japanese calendars have more than 1 era.
 // So for other calendars, only return the latest era.
-function getEraNames(date: Date, locale: string | undefined, calendarId: number) : { eraNames: string, abbreviatedEraNames: string}
-{
-    if (shouldBePopulatedByManagedCode(calendarId))
-    {
+function getEraNames (date: Date, locale: string | undefined, calendarId: number) : { eraNames: string, abbreviatedEraNames: string} {
+    if (shouldBePopulatedByManagedCode(calendarId)) {
         // managed code already handles these calendars,
         // so empty strings will get overwritten in
         // InitializeEraNames/InitializeAbbreviatedEraNames
@@ -347,13 +309,11 @@ function getEraNames(date: Date, locale: string | undefined, calendarId: number)
         abbreviatedEraNames: getEraFromDateParts(eraDateParts.abbrEraDateParts, eraDateParts.ignoredPart)
     };
 
-    function shouldBePopulatedByManagedCode(calendarId: number)
-    {
+    function shouldBePopulatedByManagedCode (calendarId: number) {
         return (calendarId > 1 && calendarId < 15) || calendarId == 22 || calendarId == 23;
     }
 
-    function getEraFromDateParts(dateParts: string[], ignoredPart: string) : string
-    {
+    function getEraFromDateParts (dateParts: string[], ignoredPart: string) : string {
         const regex = new RegExp(`^((?!${ignoredPart}|[0-9]).)*$`);
         const filteredEra = dateParts.filter(part => regex.test(part));
         if (filteredEra.length == 0)
@@ -361,10 +321,8 @@ function getEraNames(date: Date, locale: string | undefined, calendarId: number)
         return filteredEra[0].trim();
     }
 
-    function getEraDateParts(yearStr: string)
-    {
-        if (eraDate.startsWith(yearStr) || eraDate.endsWith(yearStr))
-        {
+    function getEraDateParts (yearStr: string) {
+        if (eraDate.startsWith(yearStr) || eraDate.endsWith(yearStr)) {
             return {
                 eraDateParts: eraDate.split(dayStr),
                 abbrEraDateParts: shortEraDate.split(dayStr),
diff --git a/src/mono/browser/runtime/hybrid-globalization/change-case.ts b/src/mono/browser/runtime/hybrid-globalization/change-case.ts
index f9d227517b81..762eacc94c26 100644
--- a/src/mono/browser/runtime/hybrid-globalization/change-case.ts
+++ b/src/mono/browser/runtime/hybrid-globalization/change-case.ts
@@ -5,19 +5,18 @@ import { mono_wasm_new_external_root } from "../roots";
 import { monoStringToString, utf16ToStringLoop, stringToUTF16 } from "../strings";
 import { MonoObject, MonoObjectRef, MonoString, MonoStringRef } from "../types/internal";
 import { Int32Ptr } from "../types/emscripten";
-import { wrap_error_root, wrap_no_error_root } from "../invoke-js";
+import { wrap_error_root, wrap_no_error_root } from "./helpers";
 import { localHeapViewU16, setU16_local } from "../memory";
 import { isSurrogate } from "./helpers";
 
-export function mono_wasm_change_case_invariant(src: number, srcLength: number, dst: number, dstLength: number, toUpper: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): void {
+export function mono_wasm_change_case_invariant (src: number, srcLength: number, dst: number, dstLength: number, toUpper: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): void {
     const exceptionRoot = mono_wasm_new_external_root<MonoObject>(ex_address);
     try {
         const input = utf16ToStringLoop(src, src + 2 * srcLength);
         const result = toUpper ? input.toUpperCase() : input.toLowerCase();
         // Unicode defines some codepoints which expand into multiple codepoints,
         // originally we do not support this expansion
-        if (result.length <= dstLength)
-        {
+        if (result.length <= dstLength) {
             stringToUTF16(dst, dst + 2 * dstLength, result);
             wrap_no_error_root(is_exception, exceptionRoot);
             return;
@@ -26,62 +25,49 @@ export function mono_wasm_change_case_invariant(src: number, srcLength: number,
         // workaround to maintain the ICU-like behavior
         const heapI16 = localHeapViewU16();
         let jump = 1;
-        if (toUpper)
-        {
-            for (let i=0; i < input.length; i+=jump)
-            {
+        if (toUpper) {
+            for (let i = 0; i < input.length; i += jump) {
                 // surrogate parts have to enter ToUpper/ToLower together to give correct output
-                if (isSurrogate(input, i))
-                {
+                if (isSurrogate(input, i)) {
                     jump = 2;
-                    const surrogate = input.substring(i, i+2);
+                    const surrogate = input.substring(i, i + 2);
                     const upperSurrogate = surrogate.toUpperCase();
                     const appendedSurrogate = upperSurrogate.length > 2 ? surrogate : upperSurrogate;
                     appendSurrogateToMemory(heapI16, dst, appendedSurrogate, i);
 
-                }
-                else
-                {
+                } else {
                     jump = 1;
                     const upperChar = input[i].toUpperCase();
                     const appendedChar = upperChar.length > 1 ? input[i] : upperChar;
-                    setU16_local(heapI16, dst + i*2, appendedChar.charCodeAt(0));
+                    setU16_local(heapI16, dst + i * 2, appendedChar.charCodeAt(0));
                 }
             }
-        }
-        else
-        {
-            for (let i=0; i < input.length; i+=jump)
-            {
-                if (isSurrogate(input, i))
-                {
+        } else {
+            for (let i = 0; i < input.length; i += jump) {
+                if (isSurrogate(input, i)) {
                     jump = 2;
-                    const surrogate = input.substring(i, i+2);
+                    const surrogate = input.substring(i, i + 2);
                     const upperSurrogate = surrogate.toLowerCase();
                     const appendedSurrogate = upperSurrogate.length > 2 ? surrogate : upperSurrogate;
                     appendSurrogateToMemory(heapI16, dst, appendedSurrogate, i);
 
-                }
-                else
-                {
+                } else {
                     jump = 1;
                     const upperChar = input[i].toLowerCase();
                     const appendedChar = upperChar.length > 1 ? input[i] : upperChar;
-                    setU16_local(heapI16, dst + i*2, appendedChar.charCodeAt(0));
+                    setU16_local(heapI16, dst + i * 2, appendedChar.charCodeAt(0));
                 }
             }
         }
         wrap_no_error_root(is_exception, exceptionRoot);
-    }
-    catch (ex: any) {
+    } catch (ex: any) {
         wrap_error_root(is_exception, ex, exceptionRoot);
-    }
-    finally {
+    } finally {
         exceptionRoot.release();
     }
 }
 
-export function mono_wasm_change_case(culture: MonoStringRef, src: number, srcLength: number, dst: number, dstLength: number, toUpper: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): void {
+export function mono_wasm_change_case (culture: MonoStringRef, src: number, srcLength: number, dst: number, dstLength: number, toUpper: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): void {
     const cultureRoot = mono_wasm_new_external_root<MonoString>(culture),
         exceptionRoot = mono_wasm_new_external_root<MonoObject>(ex_address);
     try {
@@ -91,8 +77,7 @@ export function mono_wasm_change_case(culture: MonoStringRef, src: number, srcLe
         const input = utf16ToStringLoop(src, src + 2 * srcLength);
         const result = toUpper ? input.toLocaleUpperCase(cultureName) : input.toLocaleLowerCase(cultureName);
 
-        if (result.length <= input.length)
-        {
+        if (result.length <= input.length) {
             stringToUTF16(dst, dst + 2 * dstLength, result);
             wrap_no_error_root(is_exception, exceptionRoot);
             return;
@@ -100,64 +85,50 @@ export function mono_wasm_change_case(culture: MonoStringRef, src: number, srcLe
         // workaround to maintain the ICU-like behavior
         const heapI16 = localHeapViewU16();
         let jump = 1;
-        if (toUpper)
-        {
-            for (let i=0; i < input.length; i+=jump)
-            {
+        if (toUpper) {
+            for (let i = 0; i < input.length; i += jump) {
                 // surrogate parts have to enter ToUpper/ToLower together to give correct output
-                if (isSurrogate(input, i))
-                {
+                if (isSurrogate(input, i)) {
                     jump = 2;
-                    const surrogate = input.substring(i, i+2);
+                    const surrogate = input.substring(i, i + 2);
                     const upperSurrogate = surrogate.toLocaleUpperCase(cultureName);
                     const appendedSurrogate = upperSurrogate.length > 2 ? surrogate : upperSurrogate;
                     appendSurrogateToMemory(heapI16, dst, appendedSurrogate, i);
 
-                }
-                else
-                {
+                } else {
                     jump = 1;
                     const upperChar = input[i].toLocaleUpperCase(cultureName);
                     const appendedChar = upperChar.length > 1 ? input[i] : upperChar;
-                    setU16_local(heapI16, dst + i*2, appendedChar.charCodeAt(0));
+                    setU16_local(heapI16, dst + i * 2, appendedChar.charCodeAt(0));
                 }
             }
-        }
-        else
-        {
-            for (let i=0; i < input.length; i+=jump)
-            {
+        } else {
+            for (let i = 0; i < input.length; i += jump) {
                 // surrogate parts have to enter ToUpper/ToLower together to give correct output
-                if (isSurrogate(input, i))
-                {
+                if (isSurrogate(input, i)) {
                     jump = 2;
-                    const surrogate = input.substring(i, i+2);
+                    const surrogate = input.substring(i, i + 2);
                     const upperSurrogate = surrogate.toLocaleLowerCase(cultureName);
                     const appendedSurrogate = upperSurrogate.length > 2 ? surrogate : upperSurrogate;
                     appendSurrogateToMemory(heapI16, dst, appendedSurrogate, i);
-                }
-                else
-                {
+                } else {
                     jump = 1;
                     const lowerChar = input[i].toLocaleLowerCase(cultureName);
                     const appendedChar = lowerChar.length > 1 ? input[i] : lowerChar;
-                    setU16_local(heapI16, dst + i*2, appendedChar.charCodeAt(0));
+                    setU16_local(heapI16, dst + i * 2, appendedChar.charCodeAt(0));
                 }
             }
         }
         wrap_no_error_root(is_exception, exceptionRoot);
-    }
-    catch (ex: any) {
+    } catch (ex: any) {
         wrap_error_root(is_exception, ex, exceptionRoot);
-    }
-    finally {
+    } finally {
         cultureRoot.release();
         exceptionRoot.release();
     }
 }
 
-function appendSurrogateToMemory(heapI16: Uint16Array, dst: number, surrogate: string, idx: number)
-{
-    setU16_local(heapI16, dst + idx*2, surrogate.charCodeAt(0));
-    setU16_local(heapI16, dst + (idx+1)*2, surrogate.charCodeAt(1));
+function appendSurrogateToMemory (heapI16: Uint16Array, dst: number, surrogate: string, idx: number) {
+    setU16_local(heapI16, dst + idx * 2, surrogate.charCodeAt(0));
+    setU16_local(heapI16, dst + (idx + 1) * 2, surrogate.charCodeAt(1));
 }
diff --git a/src/mono/browser/runtime/hybrid-globalization/collations.ts b/src/mono/browser/runtime/hybrid-globalization/collations.ts
index 7c1bb5a03da0..523f63307e27 100644
--- a/src/mono/browser/runtime/hybrid-globalization/collations.ts
+++ b/src/mono/browser/runtime/hybrid-globalization/collations.ts
@@ -5,14 +5,14 @@ import { mono_wasm_new_external_root } from "../roots";
 import { monoStringToString, utf16ToString } from "../strings";
 import { MonoObject, MonoObjectRef, MonoString, MonoStringRef } from "../types/internal";
 import { Int32Ptr } from "../types/emscripten";
-import { wrap_error_root, wrap_no_error_root } from "../invoke-js";
+import { wrap_error_root, wrap_no_error_root } from "./helpers";
 import { GraphemeSegmenter } from "./grapheme-segmenter";
 
 const COMPARISON_ERROR = -2;
 const INDEXING_ERROR = -1;
 let graphemeSegmenterCached: GraphemeSegmenter | null;
 
-export function mono_wasm_compare_string(culture: MonoStringRef, str1: number, str1Length: number, str2: number, str2Length: number, options: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): number {
+export function mono_wasm_compare_string (culture: MonoStringRef, str1: number, str1Length: number, str2: number, str2Length: number, options: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): number {
     const cultureRoot = mono_wasm_new_external_root<MonoString>(culture),
         exceptionRoot = mono_wasm_new_external_root<MonoObject>(ex_address);
     try {
@@ -23,18 +23,16 @@ export function mono_wasm_compare_string(culture: MonoStringRef, str1: number, s
         const locale = cultureName ? cultureName : undefined;
         wrap_no_error_root(is_exception, exceptionRoot);
         return compareStrings(string1, string2, locale, casePicker);
-    }
-    catch (ex: any) {
+    } catch (ex: any) {
         wrap_error_root(is_exception, ex, exceptionRoot);
         return COMPARISON_ERROR;
-    }
-    finally {
+    } finally {
         cultureRoot.release();
         exceptionRoot.release();
     }
 }
 
-export function mono_wasm_starts_with(culture: MonoStringRef, str1: number, str1Length: number, str2: number, str2Length: number, options: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): number {
+export function mono_wasm_starts_with (culture: MonoStringRef, str1: number, str1Length: number, str2: number, str2Length: number, options: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): number {
     const cultureRoot = mono_wasm_new_external_root<MonoString>(culture),
         exceptionRoot = mono_wasm_new_external_root<MonoObject>(ex_address);
     try {
@@ -54,18 +52,16 @@ export function mono_wasm_starts_with(culture: MonoStringRef, str1: number, str1
         const result = compareStrings(sourceOfPrefixLength, prefix, locale, casePicker);
         wrap_no_error_root(is_exception, exceptionRoot);
         return result === 0 ? 1 : 0; // equals ? true : false
-    }
-    catch (ex: any) {
+    } catch (ex: any) {
         wrap_error_root(is_exception, ex, exceptionRoot);
         return INDEXING_ERROR;
-    }
-    finally {
+    } finally {
         cultureRoot.release();
         exceptionRoot.release();
     }
 }
 
-export function mono_wasm_ends_with(culture: MonoStringRef, str1: number, str1Length: number, str2: number, str2Length: number, options: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): number {
+export function mono_wasm_ends_with (culture: MonoStringRef, str1: number, str1Length: number, str2: number, str2Length: number, options: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): number {
     const cultureRoot = mono_wasm_new_external_root<MonoString>(culture),
         exceptionRoot = mono_wasm_new_external_root<MonoObject>(ex_address);
     try {
@@ -85,18 +81,16 @@ export function mono_wasm_ends_with(culture: MonoStringRef, str1: number, str1Le
         const result = compareStrings(sourceOfSuffixLength, suffix, locale, casePicker);
         wrap_no_error_root(is_exception, exceptionRoot);
         return result === 0 ? 1 : 0; // equals ? true : false
-    }
-    catch (ex: any) {
+    } catch (ex: any) {
         wrap_error_root(is_exception, ex, exceptionRoot);
         return INDEXING_ERROR;
-    }
-    finally {
+    } finally {
         cultureRoot.release();
         exceptionRoot.release();
     }
 }
 
-export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, needleLength: number, srcPtr: number, srcLength: number, options: number, fromBeginning: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): number {
+export function mono_wasm_index_of (culture: MonoStringRef, needlePtr: number, needleLength: number, srcPtr: number, srcLength: number, options: number, fromBeginning: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): number {
     const cultureRoot = mono_wasm_new_external_root<MonoString>(culture),
         exceptionRoot = mono_wasm_new_external_root<MonoObject>(ex_address);
     try {
@@ -156,22 +150,20 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne
         }
         wrap_no_error_root(is_exception, exceptionRoot);
         return result;
-    }
-    catch (ex: any) {
+    } catch (ex: any) {
         wrap_error_root(is_exception, ex, exceptionRoot);
         return INDEXING_ERROR;
-    }
-    finally {
+    } finally {
         cultureRoot.release();
         exceptionRoot.release();
     }
 
-    function checkMatchFound(str1: string, str2: string, locale: string | undefined, casePicker: number): boolean {
+    function checkMatchFound (str1: string, str2: string, locale: string | undefined, casePicker: number): boolean {
         return compareStrings(str1, str2, locale, casePicker) === 0;
     }
 }
 
-function compareStrings(string1: string, string2: string, locale: string | undefined, casePicker: number): number {
+function compareStrings (string1: string, string2: string, locale: string | undefined, casePicker: number): number {
     switch (casePicker) {
         case 0:
             // 0: None - default algorithm for the platform OR
@@ -263,12 +255,12 @@ function compareStrings(string1: string, string2: string, locale: string | undef
     }
 }
 
-function decodeToCleanString(strPtr: number, strLen: number) {
+function decodeToCleanString (strPtr: number, strLen: number) {
     const str = utf16ToString(<any>strPtr, <any>(strPtr + 2 * strLen));
     return cleanString(str);
 }
 
-function cleanString(str: string) {
+function cleanString (str: string) {
     const nStr = str.normalize();
     return nStr.replace(/[\u200B-\u200D\uFEFF\0]/g, "");
 }
diff --git a/src/mono/browser/runtime/hybrid-globalization/culture-info.ts b/src/mono/browser/runtime/hybrid-globalization/culture-info.ts
index 48c4e47828d0..03bebd9ec65e 100644
--- a/src/mono/browser/runtime/hybrid-globalization/culture-info.ts
+++ b/src/mono/browser/runtime/hybrid-globalization/culture-info.ts
@@ -1,15 +1,14 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import { wrap_error_root, wrap_no_error_root } from "../invoke-js";
+import { wrap_error_root, wrap_no_error_root } from "./helpers";
 import { mono_wasm_new_external_root } from "../roots";
 import { monoStringToString, stringToUTF16 } from "../strings";
 import { Int32Ptr } from "../types/emscripten";
 import { MonoObject, MonoObjectRef, MonoString, MonoStringRef } from "../types/internal";
 import { OUTER_SEPARATOR, normalizeLocale, normalizeSpaces } from "./helpers";
 
-export function mono_wasm_get_culture_info(culture: MonoStringRef, dst: number, dstLength: number, isException: Int32Ptr, exAddress: MonoObjectRef): number
-{
+export function mono_wasm_get_culture_info (culture: MonoStringRef, dst: number, dstLength: number, isException: Int32Ptr, exAddress: MonoObjectRef): number {
     const cultureRoot = mono_wasm_new_external_root<MonoString>(culture),
         exceptionRoot = mono_wasm_new_external_root<MonoObject>(exAddress);
     try {
@@ -27,26 +26,22 @@ export function mono_wasm_get_culture_info(culture: MonoStringRef, dst: number,
         cultureInfo.LongTimePattern = getLongTimePattern(canonicalLocale, designators);
         cultureInfo.ShortTimePattern = getShortTimePattern(cultureInfo.LongTimePattern);
         const result = Object.values(cultureInfo).join(OUTER_SEPARATOR);
-        if (result.length > dstLength)
-        {
+        if (result.length > dstLength) {
             throw new Error(`Culture info exceeds length of ${dstLength}.`);
         }
         stringToUTF16(dst, dst + 2 * result.length, result);
         wrap_no_error_root(isException, exceptionRoot);
         return result.length;
-    }
-    catch (ex: any) {
+    } catch (ex: any) {
         wrap_error_root(isException, ex, exceptionRoot);
         return -1;
-    }
-    finally {
+    } finally {
         cultureRoot.release();
         exceptionRoot.release();
     }
 }
 
-function getAmPmDesignators(locale: any)
-{
+function getAmPmDesignators (locale: any) {
     const pmTime = new Date("August 19, 1975 12:15:33"); // do not change, some PM hours result in hour digits change, e.g. 13 -> 01 or 1
     const amTime = new Date("August 19, 1975 11:15:33"); // do not change, some AM hours result in hour digits change, e.g. 9 -> 09
     const pmDesignator = getDesignator(pmTime, locale);
@@ -57,19 +52,17 @@ function getAmPmDesignators(locale: any)
     };
 }
 
-function getDesignator(time: Date, locale: string)
-{
-    let withDesignator = time.toLocaleTimeString(locale, { hourCycle: "h12"});
+function getDesignator (time: Date, locale: string) {
+    let withDesignator = time.toLocaleTimeString(locale, { hourCycle: "h12" });
     const localizedZero = (0).toLocaleString(locale);
-    if (withDesignator.includes(localizedZero))
-    {
+    if (withDesignator.includes(localizedZero)) {
         // in v8>=11.8 "12" changes to "0" for ja-JP
         const localizedTwelve = (12).toLocaleString(locale);
         withDesignator = withDesignator.replace(localizedZero, localizedTwelve);
     }
-    const withoutDesignator = time.toLocaleTimeString(locale, { hourCycle: "h24"});
+    const withoutDesignator = time.toLocaleTimeString(locale, { hourCycle: "h24" });
     const designator = withDesignator.replace(withoutDesignator, "").trim();
-    if (new RegExp("[0-9]$").test(designator)){
+    if (new RegExp("[0-9]$").test(designator)) {
         const designatorParts = withDesignator.split(" ").filter(part => new RegExp("^((?![0-9]).)*$").test(part));
         if (!designatorParts || designatorParts.length == 0)
             return "";
@@ -78,8 +71,7 @@ function getDesignator(time: Date, locale: string)
     return designator;
 }
 
-function getLongTimePattern(locale: string | undefined, designators: any) : string
-{
+function getLongTimePattern (locale: string | undefined, designators: any): string {
     const hourIn24Format = 18; // later hours than 18 have night designators in some locales (instead of AM designator)
     const hourIn12Format = 6;
     const localizedHour24 = (hourIn24Format).toLocaleString(locale); // not all locales use arabic numbers
@@ -97,14 +89,11 @@ function getLongTimePattern(locale: string | undefined, designators: any) : stri
     const amTime = new Date(`August 19, 1975 ${hourIn12Format}:15:30`);
     const h12Style = shortTime.format(amTime);
     let hourPattern;
-    if (isISOStyle) // 24h
-    {
+    if (isISOStyle) { // 24h
         const hasPrefix = h12Style.includes(hour12WithPrefix);
         hourPattern = hasPrefix ? "HH" : "H";
         pattern = pattern.replace(localizedHour24, hourPattern);
-    }
-    else // 12h
-    {
+    } else { // 12h
         const hasPrefix = h12Style.includes(hour12WithPrefix);
         hourPattern = hasPrefix ? "hh" : "h";
         pattern = pattern.replace(hasPrefix ? hour12WithPrefix : localizedHour12, hourPattern);
@@ -112,23 +101,18 @@ function getLongTimePattern(locale: string | undefined, designators: any) : stri
     return normalizeSpaces(pattern);
 }
 
-function getShortTimePattern(pattern: string) : string
-{
+function getShortTimePattern (pattern: string): string {
     // remove seconds:
     // short dotnet pattern does not contain seconds while JS's pattern always contains them
     const secondsIdx = pattern.indexOf("ss");
-    if (secondsIdx > 0)
-    {
+    if (secondsIdx > 0) {
         const secondsWithSeparator = `${pattern[secondsIdx - 1]}ss`;
         // en-US: 12:mm:ss tt -> 12:mm tt;
         // fr-CA: 12 h mm min ss s -> 12 h mm min s
         const shortPatternNoSecondsDigits = pattern.replace(secondsWithSeparator, "");
-        if (shortPatternNoSecondsDigits.length > secondsIdx && shortPatternNoSecondsDigits[shortPatternNoSecondsDigits.length - 1] != "t")
-        {
+        if (shortPatternNoSecondsDigits.length > secondsIdx && shortPatternNoSecondsDigits[shortPatternNoSecondsDigits.length - 1] != "t") {
             pattern = pattern.split(secondsWithSeparator)[0];
-        }
-        else
-        {
+        } else {
             pattern = shortPatternNoSecondsDigits;
         }
     }
diff --git a/src/mono/browser/runtime/hybrid-globalization/grapheme-segmenter.ts b/src/mono/browser/runtime/hybrid-globalization/grapheme-segmenter.ts
index 7322443a86cc..83bf8619affe 100644
--- a/src/mono/browser/runtime/hybrid-globalization/grapheme-segmenter.ts
+++ b/src/mono/browser/runtime/hybrid-globalization/grapheme-segmenter.ts
@@ -2,7 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 /**
- * This file is partially using code from FormatJS Intl.Segmenter implementation, reference: 
+ * This file is partially using code from FormatJS Intl.Segmenter implementation, reference:
  * https://github.com/formatjs/formatjs/blob/58d6a7b398d776ca3d2726d72ae1573b65cc3bef/packages/intl-segmenter/src/segmenter.ts
  * https://github.com/formatjs/formatjs/blob/58d6a7b398d776ca3d2726d72ae1573b65cc3bef/packages/intl-segmenter/src/segmentation-utils.ts
  */
@@ -21,7 +21,7 @@ type SegmentationRuleRaw = {
     before?: string
     after?: string
 }
-  
+
 type SegmentationTypeRaw = {
     variables: Record<string, string>
     rules: Record<string, SegmentationRuleRaw>
@@ -29,7 +29,7 @@ type SegmentationTypeRaw = {
 
 let segmentationRules: Record<string, SegmentationRule>;
 
-function replaceVariables(variables: Record<string, string>, input: string): string {
+function replaceVariables (variables: Record<string, string>, input: string): string {
     const findVarRegex = /\$[A-Za-z0-9_]+/gm;
     return input.replaceAll(findVarRegex, match => {
         if (!(match in variables)) {
@@ -43,11 +43,11 @@ function generateRegexRule (rule: string, variables: Record<string, string>, aft
     return new RegExp(`${after ? "^" : ""}${replaceVariables(variables, rule)}${after ? "" : "$"}`);
 }
 
-function isSegmentationTypeRaw(obj: any): obj is SegmentationTypeRaw {
+function isSegmentationTypeRaw (obj: any): obj is SegmentationTypeRaw {
     return obj.variables != null && obj.rules != null;
 }
 
-export function setSegmentationRulesFromJson(json: string) {
+export function setSegmentationRulesFromJson (json: string) {
     mono_assert(isSegmentationTypeRaw(json), "Provided grapheme segmentation rules are not valid");
     segmentationRules = GraphemeSegmenter.prepareSegmentationRules(json);
 }
@@ -56,7 +56,7 @@ export class GraphemeSegmenter {
     private readonly rules: Record<string, SegmentationRule>;
     private readonly ruleSortedKeys: string[];
 
-    public constructor() {  
+    public constructor () {
         this.rules = segmentationRules;
         this.ruleSortedKeys = Object.keys(this.rules).sort((a, b) => Number(a) - Number(b));
     }
@@ -67,25 +67,25 @@ export class GraphemeSegmenter {
      * @param startIndex - The starting index.
      * @returns The next grapheme.
      */
-    public nextGrapheme(str: string, startIndex: number): string {
+    public nextGrapheme (str: string, startIndex: number): string {
         const breakIdx = this.nextGraphemeBreak(str, startIndex);
         return str.substring(startIndex, breakIdx);
     }
 
     /**
      * Finds the index of the next grapheme break in a given string starting from a specified index.
-     * 
+     *
      * @param str - The input string.
      * @param startIndex - The index to start searching from.
      * @returns The index of the next grapheme break.
      */
-    public nextGraphemeBreak(str: string, startIndex: number): number {
+    public nextGraphemeBreak (str: string, startIndex: number): number {
         if (startIndex < 0)
             return 0;
-    
+
         if (startIndex >= str.length - 1)
             return str.length;
-    
+
         let prev = String.fromCodePoint(str.codePointAt(startIndex)!);
         for (let i = startIndex + 1; i < str.length; i++) {
             // Don't break surrogate pairs
@@ -96,16 +96,16 @@ export class GraphemeSegmenter {
             const curr = String.fromCodePoint(str.codePointAt(i)!);
             if (this.isGraphemeBreak(prev, curr))
                 return i;
-    
+
             prev = curr;
         }
-    
+
         return str.length;
     }
 
-    private isGraphemeBreak(previous: string, current: string): boolean {
+    private isGraphemeBreak (previous: string, current: string): boolean {
         for (const key of this.ruleSortedKeys) {
-            const {before, after, breaks} = this.rules[key];
+            const { before, after, breaks } = this.rules[key];
             // match before and after rules
             if (before && !before.test(previous)) {
                 continue;
@@ -121,20 +121,20 @@ export class GraphemeSegmenter {
         return true;
     }
 
-    public static prepareSegmentationRules(segmentationRules: SegmentationTypeRaw): Record<string, SegmentationRule> {
+    public static prepareSegmentationRules (segmentationRules: SegmentationTypeRaw): Record<string, SegmentationRule> {
         const preparedRules: Record<string, SegmentationRule> = {};
-    
+
         for (const key of Object.keys(segmentationRules.rules)) {
             const ruleValue = segmentationRules.rules[key];
             const preparedRule: SegmentationRule = { breaks: ruleValue.breaks, };
-    
+
             if ("before" in ruleValue && ruleValue.before) {
                 preparedRule.before = generateRegexRule(ruleValue.before, segmentationRules.variables, false);
             }
             if ("after" in ruleValue && ruleValue.after) {
                 preparedRule.after = generateRegexRule(ruleValue.after, segmentationRules.variables, true);
             }
-    
+
             preparedRules[key] = preparedRule;
         }
         return preparedRules;
diff --git a/src/mono/browser/runtime/hybrid-globalization/helpers.ts b/src/mono/browser/runtime/hybrid-globalization/helpers.ts
index 1590936d8eac..b58fc6779b25 100644
--- a/src/mono/browser/runtime/hybrid-globalization/helpers.ts
+++ b/src/mono/browser/runtime/hybrid-globalization/helpers.ts
@@ -1,6 +1,14 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+import NativeAOT from "consts:nativeAOT";
+
+import { normalize_exception } from "../invoke-js";
+import { receiveWorkerHeapViews, setI32_unchecked } from "../memory";
+import { stringToMonoStringRoot } from "../strings";
+import { Int32Ptr } from "../types/emscripten";
+import { MonoObject, WasmRoot } from "../types/internal";
+
 const SURROGATE_HIGHER_START = "\uD800";
 const SURROGATE_HIGHER_END = "\uDBFF";
 const SURROGATE_LOWER_START = "\uDC00";
@@ -9,30 +17,24 @@ const SURROGATE_LOWER_END = "\uDFFF";
 export const OUTER_SEPARATOR = "##";
 export const INNER_SEPARATOR = "||";
 
-export function normalizeLocale(locale: string | null)
-{
+export function normalizeLocale (locale: string | null) {
     if (!locale)
         return undefined;
-    try
-    {
+    try {
         locale = locale.toLocaleLowerCase();
-        if (locale.includes("zh"))
-        {
+        if (locale.includes("zh")) {
             // browser does not recognize "zh-chs" and "zh-cht" as equivalents of "zh-HANS" "zh-HANT", we are helping, otherwise
             // it would throw on getCanonicalLocales with "RangeError: Incorrect locale information provided"
             locale = locale.replace("chs", "HANS").replace("cht", "HANT");
         }
         const canonicalLocales = (Intl as any).getCanonicalLocales(locale.replace("_", "-"));
         return canonicalLocales.length > 0 ? canonicalLocales[0] : undefined;
-    }
-    catch(ex: any)
-    {
-        throw new Error(`Get culture info failed for culture = ${locale} with error: ${ex}`);
+    } catch {
+        return undefined;
     }
 }
 
-export function normalizeSpaces(pattern: string)
-{
+export function normalizeSpaces (pattern: string) {
     if (!pattern.includes("\u202F"))
         return pattern;
 
@@ -41,11 +43,39 @@ export function normalizeSpaces(pattern: string)
 }
 
 
-export function isSurrogate(str: string, startIdx: number) : boolean
-{
+export function isSurrogate (str: string, startIdx: number): boolean {
     return SURROGATE_HIGHER_START <= str[startIdx] &&
         str[startIdx] <= SURROGATE_HIGHER_END &&
-        startIdx+1 < str.length &&
-        SURROGATE_LOWER_START <= str[startIdx+1] &&
-        str[startIdx+1] <= SURROGATE_LOWER_END;
+        startIdx + 1 < str.length &&
+        SURROGATE_LOWER_START <= str[startIdx + 1] &&
+        str[startIdx + 1] <= SURROGATE_LOWER_END;
+}
+
+function _wrap_error_flag (is_exception: Int32Ptr | null, ex: any): string {
+    const res = normalize_exception(ex);
+    if (is_exception) {
+        receiveWorkerHeapViews();
+        setI32_unchecked(is_exception, 1);
+    }
+    return res;
+}
+
+export function wrap_error_root (is_exception: Int32Ptr | null, ex: any, result: WasmRoot<MonoObject>): void {
+    const res = _wrap_error_flag(is_exception, ex);
+    if (NativeAOT) {
+        return;
+    }
+    stringToMonoStringRoot(res, <any>result);
+}
+
+// TODO replace it with replace it with UTF16 char*, no GC root needed
+// https://github.com/dotnet/runtime/issues/98365
+export function wrap_no_error_root (is_exception: Int32Ptr | null, result?: WasmRoot<MonoObject>): void {
+    if (is_exception) {
+        receiveWorkerHeapViews();
+        setI32_unchecked(is_exception, 0);
+    }
+    if (result) {
+        result.clear();
+    }
 }
diff --git a/src/mono/browser/runtime/hybrid-globalization/locales.ts b/src/mono/browser/runtime/hybrid-globalization/locales.ts
index 4330d0977e36..252dfe1badf9 100644
--- a/src/mono/browser/runtime/hybrid-globalization/locales.ts
+++ b/src/mono/browser/runtime/hybrid-globalization/locales.ts
@@ -1,14 +1,88 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import { wrap_error_root, wrap_no_error_root } from "../invoke-js";
+import { wrap_error_root, wrap_no_error_root } from "./helpers";
 import { mono_wasm_new_external_root } from "../roots";
-import { monoStringToString } from "../strings";
+import { monoStringToString, stringToUTF16 } from "../strings";
 import { Int32Ptr } from "../types/emscripten";
 import { MonoObject, MonoObjectRef, MonoString, MonoStringRef } from "../types/internal";
-import { normalizeLocale } from "./helpers";
+import { OUTER_SEPARATOR, normalizeLocale } from "./helpers";
 
-export function mono_wasm_get_first_day_of_week(culture: MonoStringRef, isException: Int32Ptr, exAddress: MonoObjectRef): number{
+export function mono_wasm_get_locale_info (culture: MonoStringRef, locale: MonoStringRef, dst: number, dstLength: number, isException: Int32Ptr, exAddress: MonoObjectRef): number {
+    const localeRoot = mono_wasm_new_external_root<MonoString>(locale),
+        cultureRoot = mono_wasm_new_external_root<MonoString>(culture),
+        exceptionRoot = mono_wasm_new_external_root<MonoObject>(exAddress);
+    try {
+        const localeNameOriginal = monoStringToString(localeRoot);
+        const localeName = normalizeLocale(localeNameOriginal);
+        if (!localeName && localeNameOriginal) {
+            // handle non-standard or malformed locales by forwarding the locale code
+            stringToUTF16(dst, dst + 2 * localeNameOriginal.length, localeNameOriginal);
+            wrap_no_error_root(isException, exceptionRoot);
+            return localeNameOriginal.length;
+        }
+        const cultureNameOriginal = monoStringToString(cultureRoot);
+        const cultureName = normalizeLocale(cultureNameOriginal);
+
+        if (!localeName || !cultureName)
+            throw new Error(`Locale or culture name is null or empty. localeName=${localeName}, cultureName=${cultureName}`);
+
+        const localeParts = localeName.split("-");
+        // cultureName can be in a form of:
+        // 1) "language", e.g. "zh"
+        // 2) "language-region", e.g. "zn-CN"
+        // 3) "language-script-region", e.g. "zh-Hans-CN"
+        // 4) "language-script", e.g. "zh-Hans" (served in the catch block below)
+        let languageName, regionName;
+        try {
+            const region = localeParts.length > 1 ? localeParts.pop() : undefined;
+            // this line might fail if form 4 from the comment above is used:
+            regionName = region ? new Intl.DisplayNames([cultureName], { type: "region" }).of(region) : undefined;
+            const language = localeParts.join("-");
+            languageName = new Intl.DisplayNames([cultureName], { type: "language" }).of(language);
+        } catch (error) {
+            if (error instanceof RangeError && error.message === "invalid_argument") {
+                // if it failed from this reason then cultureName is in a form "language-script", without region
+                try {
+                    languageName = new Intl.DisplayNames([cultureName], { type: "language" }).of(localeName);
+                } catch (error) {
+                    if (error instanceof RangeError && error.message === "invalid_argument" && localeNameOriginal) {
+                        // handle non-standard or malformed locales by forwarding the locale code, e.g. "xx-u-xx"
+                        stringToUTF16(dst, dst + 2 * localeNameOriginal.length, localeNameOriginal);
+                        wrap_no_error_root(isException, exceptionRoot);
+                        return localeNameOriginal.length;
+                    }
+                    throw error;
+                }
+            } else {
+                throw error;
+            }
+        }
+        const localeInfo = {
+            LanguageName: languageName,
+            RegionName: regionName,
+        };
+        const result = Object.values(localeInfo).join(OUTER_SEPARATOR);
+
+        if (!result)
+            throw new Error(`Locale info for locale=${localeName} is null or empty.`);
+
+        if (result.length > dstLength)
+            throw new Error(`Locale info for locale=${localeName} exceeds length of ${dstLength}.`);
+
+        stringToUTF16(dst, dst + 2 * result.length, result);
+        wrap_no_error_root(isException, exceptionRoot);
+        return result.length;
+    } catch (ex: any) {
+        wrap_error_root(isException, ex, exceptionRoot);
+        return -1;
+    } finally {
+        cultureRoot.release();
+        exceptionRoot.release();
+    }
+}
+
+export function mono_wasm_get_first_day_of_week (culture: MonoStringRef, isException: Int32Ptr, exAddress: MonoObjectRef): number {
 
     const cultureRoot = mono_wasm_new_external_root<MonoString>(culture),
         exceptionRoot = mono_wasm_new_external_root<MonoObject>(exAddress);
@@ -17,18 +91,16 @@ export function mono_wasm_get_first_day_of_week(culture: MonoStringRef, isExcept
         const canonicalLocale = normalizeLocale(cultureName);
         wrap_no_error_root(isException, exceptionRoot);
         return getFirstDayOfWeek(canonicalLocale);
-    }
-    catch (ex: any) {
+    } catch (ex: any) {
         wrap_error_root(isException, ex, exceptionRoot);
         return -1;
-    }
-    finally {
+    } finally {
         cultureRoot.release();
         exceptionRoot.release();
     }
 }
 
-export function mono_wasm_get_first_week_of_year(culture: MonoStringRef, isException: Int32Ptr, exAddress: MonoObjectRef): number{
+export function mono_wasm_get_first_week_of_year (culture: MonoStringRef, isException: Int32Ptr, exAddress: MonoObjectRef): number {
 
     const cultureRoot = mono_wasm_new_external_root<MonoString>(culture),
         exceptionRoot = mono_wasm_new_external_root<MonoObject>(exAddress);
@@ -37,47 +109,39 @@ export function mono_wasm_get_first_week_of_year(culture: MonoStringRef, isExcep
         const canonicalLocale = normalizeLocale(cultureName);
         wrap_no_error_root(isException, exceptionRoot);
         return getFirstWeekOfYear(canonicalLocale);
-    }
-    catch (ex: any) {
+    } catch (ex: any) {
         wrap_error_root(isException, ex, exceptionRoot);
         return -1;
-    }
-    finally {
+    } finally {
         cultureRoot.release();
         exceptionRoot.release();
     }
 }
 
-function getFirstDayOfWeek(locale: string)
-{
+function getFirstDayOfWeek (locale: string) {
     const weekInfo = getWeekInfo(locale);
-    if (weekInfo)
-    {
+    if (weekInfo) {
         // JS's Sunday == 7 while dotnet's Sunday == 0
         return weekInfo.firstDay == 7 ? 0 : weekInfo.firstDay;
     }
     // Firefox does not support it rn but we can make a temporary workaround for it,
     // that should be removed when it starts being supported:
-    const saturdayLocales = [ "en-AE", "en-SD", "fa-IR" ];
-    if (saturdayLocales.includes(locale))
-    {
+    const saturdayLocales = ["en-AE", "en-SD", "fa-IR"];
+    if (saturdayLocales.includes(locale)) {
         return 6;
     }
-    const sundayLanguages = [ "zh", "th", "pt", "mr", "ml", "ko", "kn", "ja", "id", "hi", "he", "gu", "fil", "bn", "am", "ar" ];
-    const sundayLocales = [ "ta-SG", "ta-IN", "sw-KE", "ms-SG", "fr-CA", "es-MX", "en-US", "en-ZW", "en-ZA", "en-WS", "en-VI", "en-UM", "en-TT", "en-SG", "en-PR", "en-PK", "en-PH", "en-MT", "en-MO", "en-MH", "en-KE", "en-JM", "en-IN", "en-IL", "en-HK", "en-GU", "en-DM", "en-CA", "en-BZ", "en-BW", "en-BS", "en-AU", "en-AS", "en-AG" ];
+    const sundayLanguages = ["zh", "th", "pt", "mr", "ml", "ko", "kn", "ja", "id", "hi", "he", "gu", "fil", "bn", "am", "ar"];
+    const sundayLocales = ["ta-SG", "ta-IN", "sw-KE", "ms-SG", "fr-CA", "es-MX", "en-US", "en-ZW", "en-ZA", "en-WS", "en-VI", "en-UM", "en-TT", "en-SG", "en-PR", "en-PK", "en-PH", "en-MT", "en-MO", "en-MH", "en-KE", "en-JM", "en-IN", "en-IL", "en-HK", "en-GU", "en-DM", "en-CA", "en-BZ", "en-BW", "en-BS", "en-AU", "en-AS", "en-AG"];
     const localeLang = locale.split("-")[0];
-    if (sundayLanguages.includes(localeLang) || sundayLocales.includes(locale))
-    {
+    if (sundayLanguages.includes(localeLang) || sundayLocales.includes(locale)) {
         return 0;
     }
     return 1;
 }
 
-function getFirstWeekOfYear(locale: string)
-{
+function getFirstWeekOfYear (locale: string) {
     const weekInfo = getWeekInfo(locale);
-    if (weekInfo)
-    {
+    if (weekInfo) {
         // enum CalendarWeekRule
         // FirstDay = 0,           // when minimalDays < 4
         // FirstFullWeek = 1,      // when miminalDays == 7
@@ -87,30 +151,25 @@ function getFirstWeekOfYear(locale: string)
     }
     // Firefox does not support it rn but we can make a temporary workaround for it,
     // that should be removed when it starts being supported:
-    const firstFourDayWeekLocales = [ "pt-PT", "fr-CH", "fr-FR", "fr-BE", "es-ES", "en-SE", "en-NL", "en-JE", "en-IM", "en-IE", "en-GI", "en-GG", "en-GB", "en-FJ", "en-FI", "en-DK", "en-DE", "en-CH", "en-BE", "en-AT", "el-GR" ];
-    const firstFourDayWeekLanguages = [ "sv", "sk", "ru", "pl", "nl", "no", "lt", "it", "hu", "fi", "et", "de", "da", "cs", "ca", "bg" ];
+    const firstFourDayWeekLocales = ["pt-PT", "fr-CH", "fr-FR", "fr-BE", "es-ES", "en-SE", "en-NL", "en-JE", "en-IM", "en-IE", "en-GI", "en-GG", "en-GB", "en-FJ", "en-FI", "en-DK", "en-DE", "en-CH", "en-BE", "en-AT", "el-GR"];
+    const firstFourDayWeekLanguages = ["sv", "sk", "ru", "pl", "nl", "no", "lt", "it", "hu", "fi", "et", "de", "da", "cs", "ca", "bg"];
     const localeLang = locale.split("-")[0];
-    if (firstFourDayWeekLocales.includes(locale) || firstFourDayWeekLanguages.includes(localeLang))
-    {
+    if (firstFourDayWeekLocales.includes(locale) || firstFourDayWeekLanguages.includes(localeLang)) {
         return 2;
     }
     return 0;
 }
 
-function getWeekInfo(locale: string)
-{
+function getWeekInfo (locale: string) {
     try {
         // most tools have it implemented as property
         return (new Intl.Locale(locale) as any).weekInfo;
-    }
-    catch {
+    } catch {
         try {
             // but a few use methods, which is the preferred way
             return (new Intl.Locale(locale) as any).getWeekInfo();
-        }
-        catch
-        {
+        } catch {
             return undefined;
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/mono/browser/runtime/icu.ts b/src/mono/browser/runtime/icu.ts
index 28aa01ad727c..16b23871f987 100644
--- a/src/mono/browser/runtime/icu.ts
+++ b/src/mono/browser/runtime/icu.ts
@@ -6,6 +6,6 @@ import { VoidPtr } from "./types/emscripten";
 
 // @offset must be the address of an ICU data archive in the native heap.
 // returns true on success.
-export function mono_wasm_load_icu_data(offset: VoidPtr): boolean {
+export function mono_wasm_load_icu_data (offset: VoidPtr): boolean {
     return (cwraps.mono_wasm_load_icu_data(offset)) === 1;
 }
diff --git a/src/mono/browser/runtime/interp-pgo.ts b/src/mono/browser/runtime/interp-pgo.ts
index dfe894f56982..14697385800b 100644
--- a/src/mono/browser/runtime/interp-pgo.ts
+++ b/src/mono/browser/runtime/interp-pgo.ts
@@ -11,7 +11,7 @@ import { MonoConfigInternal } from "./types/internal";
 
 export const tablePrefix = "https://dotnet.generated.invalid/interp_pgo";
 
-export async function interp_pgo_save_data() {
+export async function interp_pgo_save_data () {
     if (!loaderHelpers.is_runtime_running()) {
         mono_log_info("Skipped saving interp_pgo table (already exited)");
         return;
@@ -53,7 +53,7 @@ export async function interp_pgo_save_data() {
     }
 }
 
-export async function interp_pgo_load_data() {
+export async function interp_pgo_load_data () {
     const cacheKey = await getCacheKey(tablePrefix);
     if (!cacheKey) {
         mono_log_error("Failed to create cache key for interp_pgo table");
@@ -76,7 +76,7 @@ export async function interp_pgo_load_data() {
     Module._free(pData);
 }
 
-async function openCache(): Promise<Cache | null> {
+async function openCache (): Promise<Cache | null> {
     // cache integrity is compromised if the first request has been served over http (except localhost)
     // in this case, we want to disable caching and integrity validation
     if (ENVIRONMENT_IS_WEB && globalThis.window.isSecureContext === false) {
@@ -113,7 +113,7 @@ async function openCache(): Promise<Cache | null> {
     }
 }
 
-export async function getCacheEntry(cacheKey: string): Promise<ArrayBuffer | undefined> {
+export async function getCacheEntry (cacheKey: string): Promise<ArrayBuffer | undefined> {
     try {
         const cache = await openCache();
         if (!cache) {
@@ -130,7 +130,7 @@ export async function getCacheEntry(cacheKey: string): Promise<ArrayBuffer | und
     }
 }
 
-export async function storeCacheEntry(cacheKey: string, memory: ArrayBuffer, mimeType: string): Promise<boolean> {
+export async function storeCacheEntry (cacheKey: string, memory: ArrayBuffer, mimeType: string): Promise<boolean> {
     try {
         const cache = await openCache();
         if (!cache) {
@@ -157,7 +157,7 @@ export async function storeCacheEntry(cacheKey: string, memory: ArrayBuffer, mim
     }
 }
 
-export async function cleanupCache(prefix: string, protectKey: string) {
+export async function cleanupCache (prefix: string, protectKey: string) {
     try {
         const cache = await openCache();
         if (!cache) {
@@ -175,7 +175,7 @@ export async function cleanupCache(prefix: string, protectKey: string) {
 }
 
 // calculate hash of things which affect config hash
-export async function getCacheKey(prefix: string): Promise<string | null> {
+export async function getCacheKey (prefix: string): Promise<string | null> {
     if (!runtimeHelpers.subtle) {
         return null;
     }
@@ -193,11 +193,12 @@ export async function getCacheKey(prefix: string): Promise<string | null> {
     delete inputs.forwardConsoleLogsToWS;
     delete inputs.diagnosticTracing;
     delete inputs.appendElementOnExit;
-    delete inputs.assertAfterExit;
     delete inputs.interopCleanupOnExit;
     delete inputs.dumpThreadsOnNonZeroExit;
     delete inputs.logExitCode;
-    delete inputs.pthreadPoolSize;
+    delete inputs.pthreadPoolInitialSize;
+    delete inputs.pthreadPoolUnusedSize;
+    delete inputs.finalizerThreadStartDelayMs;
     delete inputs.asyncFlushOnExit;
     delete inputs.remoteSources;
     delete inputs.ignorePdbLoadErrors;
@@ -205,6 +206,7 @@ export async function getCacheKey(prefix: string): Promise<string | null> {
     delete inputs.enableDownloadRetry;
     delete inputs.extensions;
     delete inputs.runtimeId;
+    delete inputs.jsThreadBlockingMode;
 
     inputs.GitHash = loaderHelpers.gitHash;
     inputs.ProductVersion = ProductVersion;
diff --git a/src/mono/browser/runtime/invoke-cs.ts b/src/mono/browser/runtime/invoke-cs.ts
index cac5551ca81a..be36c3f9e971 100644
--- a/src/mono/browser/runtime/invoke-cs.ts
+++ b/src/mono/browser/runtime/invoke-cs.ts
@@ -10,177 +10,153 @@ import WasmEnableThreads from "consts:wasmEnableThreads";
 
 import { Module, loaderHelpers, mono_assert, runtimeHelpers } from "./globals";
 import { bind_arg_marshal_to_cs } from "./marshal-to-cs";
-import { marshal_exception_to_js, bind_arg_marshal_to_js, end_marshal_task_to_js } from "./marshal-to-js";
+import { bind_arg_marshal_to_js, end_marshal_task_to_js } from "./marshal-to-js";
 import {
-    get_arg, get_sig, get_signature_argument_count, is_args_exception,
-    bound_cs_function_symbol, get_signature_version, alloc_stack_frame, get_signature_type, set_args_context,
+    get_sig, get_signature_argument_count,
+    bound_cs_function_symbol, get_signature_version, alloc_stack_frame, get_signature_type,
 } from "./marshal";
-import { mono_wasm_new_external_root, mono_wasm_new_root } from "./roots";
-import { monoStringToString } from "./strings";
-import { MonoObjectRef, MonoStringRef, MonoString, MonoObject, MonoMethod, JSMarshalerArguments, JSFunctionSignature, BoundMarshalerToCs, BoundMarshalerToJs, VoidPtrNull, MonoObjectRefNull, MonoObjectNull, MarshalerType, MonoAssembly } from "./types/internal";
-import { Int32Ptr } from "./types/emscripten";
-import cwraps from "./cwraps";
-import { assert_c_interop, assert_js_interop, wrap_error_root, wrap_no_error_root } from "./invoke-js";
+import { MonoMethod, JSFunctionSignature, BoundMarshalerToCs, BoundMarshalerToJs, MarshalerType } from "./types/internal";
+import { assert_js_interop } from "./invoke-js";
 import { startMeasure, MeasuredBlock, endMeasure } from "./profiler";
+import { bind_assembly_exports, invoke_async_jsexport, invoke_sync_jsexport } from "./managed-exports";
 import { mono_log_debug } from "./logging";
 
-const _assembly_cache_by_name = new Map<string, MonoAssembly>();
+const s_charsToReplace = [".", "-", "+"];
 
-// function mono_wasm_bind_cs_function_naot(fully_qualified_name: CharPtr, fully_qualified_name_length: number, signature_hash: number, signature: JSFunctionSignature, is_exception: Int32Ptr): void
-export function mono_wasm_bind_cs_function(fully_qualified_name: MonoStringRef, signature_hash: number, signature: JSFunctionSignature, is_exception: Int32Ptr, result_address: MonoObjectRef): void {
-    assert_js_interop();
-    const fqn_root = mono_wasm_new_external_root<MonoString>(fully_qualified_name), resultRoot = mono_wasm_new_external_root<MonoObject>(result_address);
-    const mark = startMeasure();
-    try {
-        if (NativeAOT) {
-            signature_hash = arguments[2];
-            signature = arguments[3];
-            is_exception = arguments[4];
+function fixupSymbolName (name: string) {
+    // Sync with JSExportGenerator.FixupSymbolName
+    let result = "";
+    for (let index = 0; index < name.length; index++) {
+        const b = name[index];
+        if ((b >= "0" && b <= "9") ||
+            (b >= "a" && b <= "z") ||
+            (b >= "A" && b <= "Z") ||
+            (b == "_")) {
+            result += b;
+        } else if (s_charsToReplace.includes(b)) {
+            result += "_";
+        } else {
+            result += `_${b.charCodeAt(0).toString(16).toUpperCase()}_`;
         }
-        const version = get_signature_version(signature);
-        mono_assert(version === 2, () => `Signature version ${version} mismatch.`);
+    }
 
-        const args_count = get_signature_argument_count(signature);
-        const js_fqn = NativeAOT ? utf16ToString(arguments[0], arguments[0] + 2 * arguments[1]) : monoStringToString(fqn_root)!;
-        mono_assert(js_fqn, "fully_qualified_name must be string");
+    return result;
+}
 
-        mono_log_debug(`Binding [JSExport] ${js_fqn}`);
+export function mono_wasm_bind_cs_function (method: MonoMethod, assemblyName: string, namespaceName: string, shortClassName: string, methodName: string, signatureHash: number, signature: JSFunctionSignature): void {
+    const fullyQualifiedName = `[${assemblyName}] ${namespaceName}.${shortClassName}:${methodName}`;
+    const mark = startMeasure();
+    mono_log_debug(`Binding [JSExport] ${namespaceName}.${shortClassName}:${methodName} from ${assemblyName} assembly`);
+    if (NativeAOT) {
+        signatureHash = arguments[2];
+        signature = arguments[3];
+
+        const js_fqn = utf16ToString(arguments[0], arguments[0] + 2 * arguments[1]);
+        const wrapper_name = fixupSymbolName(`${js_fqn}_${signatureHash}`);
+        method = (Module as any)["_" + wrapper_name];
+        if (!method)
+            throw new Error(`Could not find method: ${wrapper_name} in ${js_fqn}`);
+    }
+    const version = get_signature_version(signature);
+    mono_assert(version === 2, () => `Signature version ${version} mismatch.`);
+
+    const args_count = get_signature_argument_count(signature);
+
+    const arg_marshalers: (BoundMarshalerToCs)[] = new Array(args_count);
+    for (let index = 0; index < args_count; index++) {
+        const sig = get_sig(signature, index + 2);
+        const marshaler_type = get_signature_type(sig);
+        const arg_marshaler = bind_arg_marshal_to_cs(sig, marshaler_type, index + 2);
+        mono_assert(arg_marshaler, "ERR43: argument marshaler must be resolved");
+        arg_marshalers[index] = arg_marshaler;
+    }
 
-        const { assembly, namespace, classname, methodname } = parseFQN(js_fqn);
+    const res_sig = get_sig(signature, 1);
+    let res_marshaler_type = get_signature_type(res_sig);
 
-        let method = null;
-        if (NativeAOT) {
-            const wrapper_name = fixupSymbolName(`${js_fqn}_${signature_hash}`);
-            method = (Module as any)["_" + wrapper_name];
-            if (!method)
-                throw new Error(`Could not find method: ${wrapper_name} in ${js_fqn}`);
-        } else {
-            const asm = assembly_load(assembly);
-            if (!asm)
-                throw new Error("Could not find assembly: " + assembly);
-
-            const klass = cwraps.mono_wasm_assembly_find_class(asm, namespace, classname);
-            if (!klass)
-                throw new Error("Could not find class: " + namespace + ":" + classname + " in assembly " + assembly);
-
-            const wrapper_name = `__Wrapper_${methodname}_${signature_hash}`;
-            method = cwraps.mono_wasm_assembly_find_method(klass, wrapper_name, -1);
-            if (!method)
-                throw new Error(`Could not find method: ${wrapper_name} in ${klass} [${assembly}]`);
-        }
+    // hack until we have public API for JSType.DiscardNoWait
+    if (WasmEnableThreads && shortClassName === "DefaultWebAssemblyJSRuntime"
+        && namespaceName === "Microsoft.AspNetCore.Components.WebAssembly.Services"
+        && (methodName === "BeginInvokeDotNet" || methodName === "EndInvokeJS" || methodName === "ReceiveByteArrayFromJS")) {
+        res_marshaler_type = MarshalerType.DiscardNoWait;
+    }
 
-        const arg_marshalers: (BoundMarshalerToCs)[] = new Array(args_count);
-        for (let index = 0; index < args_count; index++) {
-            const sig = get_sig(signature, index + 2);
-            const marshaler_type = get_signature_type(sig);
-            const arg_marshaler = bind_arg_marshal_to_cs(sig, marshaler_type, index + 2);
-            mono_assert(arg_marshaler, "ERR43: argument marshaler must be resolved");
-            arg_marshalers[index] = arg_marshaler;
-        }
+    const is_async = res_marshaler_type == MarshalerType.Task;
+    const is_discard_no_wait = res_marshaler_type == MarshalerType.DiscardNoWait;
+    if (is_async) {
+        res_marshaler_type = MarshalerType.TaskPreCreated;
+    }
+    const res_converter = bind_arg_marshal_to_js(res_sig, res_marshaler_type, 1);
+
+    const closure: BindingClosure = {
+        method,
+        fullyQualifiedName,
+        args_count,
+        arg_marshalers,
+        res_converter,
+        is_async,
+        is_discard_no_wait,
+        isDisposed: false,
+    };
+    let bound_fn: Function;
 
-        const res_sig = get_sig(signature, 1);
-        let res_marshaler_type = get_signature_type(res_sig);
-        const is_async = res_marshaler_type == MarshalerType.Task;
-        if (is_async) {
-            res_marshaler_type = MarshalerType.TaskPreCreated;
+    if (is_async) {
+        if (args_count == 1 && res_converter) {
+            bound_fn = bind_fn_1RA(closure);
+        } else if (args_count == 2 && res_converter) {
+            bound_fn = bind_fn_2RA(closure);
+        } else {
+            bound_fn = bind_fn(closure);
         }
-        const res_converter = bind_arg_marshal_to_js(res_sig, res_marshaler_type, 1);
-
-        const closure: BindingClosure = {
-            method,
-            fqn: js_fqn,
-            args_count,
-            arg_marshalers,
-            res_converter,
-            is_async,
-            isDisposed: false,
-        };
-        let bound_fn: Function;
-        // void
+    } else if (is_discard_no_wait) {
+        bound_fn = bind_fn(closure);
+    } else {
         if (args_count == 0 && !res_converter) {
             bound_fn = bind_fn_0V(closure);
-        }
-        else if (args_count == 1 && !res_converter) {
+        } else if (args_count == 1 && !res_converter) {
             bound_fn = bind_fn_1V(closure);
-        }
-        else if (is_async && args_count == 1 && res_converter) {
-            bound_fn = bind_fn_1RA(closure);
-        }
-        else if (is_async && args_count == 2 && res_converter) {
-            bound_fn = bind_fn_2RA(closure);
-        }
-        else if (args_count == 1 && res_converter) {
+        } else if (args_count == 1 && res_converter) {
             bound_fn = bind_fn_1R(closure);
-        }
-        else if (args_count == 2 && res_converter) {
+        } else if (args_count == 2 && res_converter) {
             bound_fn = bind_fn_2R(closure);
-        }
-        else {
+        } else {
             bound_fn = bind_fn(closure);
         }
-
-        // this is just to make debugging easier. 
-        // It's not CSP compliant and possibly not performant, that's why it's only enabled in debug builds
-        // in Release configuration, it would be a trimmed by rollup
-        if (BuildConfiguration === "Debug" && !runtimeHelpers.cspPolicy) {
-            try {
-                bound_fn = new Function("fn", "return (function JSExport_" + methodname + "(){ return fn.apply(this, arguments)});")(bound_fn);
-            }
-            catch (ex) {
-                runtimeHelpers.cspPolicy = true;
-            }
-        }
-
-        (<any>bound_fn)[bound_cs_function_symbol] = closure;
-
-        _walk_exports_to_set_function(assembly, namespace, classname, methodname, signature_hash, bound_fn);
-        endMeasure(mark, MeasuredBlock.bindCsFunction, js_fqn);
-        wrap_no_error_root(is_exception, resultRoot);
     }
-    catch (ex: any) {
-        Module.err(ex.toString());
-        wrap_error_root(is_exception, ex, resultRoot);
-    } finally {
-        resultRoot.release();
-        fqn_root.release();
-    }
-}
 
-const s_charsToReplace = [".", "-", "+"];
-
-function fixupSymbolName(name: string) {
-    // Sync with JSExportGenerator.FixupSymbolName
-    let result = "";
-    for (let index = 0; index < name.length; index++) {
-        const b = name[index];
-        if ((b >= "0" && b <= "9") ||
-            (b >= "a" && b <= "z") ||
-            (b >= "A" && b <= "Z") ||
-            (b == "_")) {
-            result += b;
-        } else if( s_charsToReplace.includes(b)) {
-            result += "_";
-        } else {
-            result += `_${b.charCodeAt(0).toString(16).toUpperCase()}_`;
+    // this is just to make debugging easier.
+    // It's not CSP compliant and possibly not performant, that's why it's only enabled in debug builds
+    // in Release configuration, it would be a trimmed by rollup
+    if (BuildConfiguration === "Debug" && !runtimeHelpers.cspPolicy) {
+        try {
+            const url = `//# sourceURL=https://dotnet/JSExport/${methodName}`;
+            const body = `return (function JSExport_${methodName}(){ return fn.apply(this, arguments)});`;
+            bound_fn = new Function("fn", url + "\r\n" + body)(bound_fn);
+        } catch (ex) {
+            runtimeHelpers.cspPolicy = true;
         }
     }
 
-    return result;
+    (<any>bound_fn)[bound_cs_function_symbol] = closure;
+
+    _walk_exports_to_set_function(assemblyName, namespaceName, shortClassName, methodName, signatureHash, bound_fn);
+    endMeasure(mark, MeasuredBlock.bindCsFunction, fullyQualifiedName);
 }
 
-function bind_fn_0V(closure: BindingClosure) {
+function bind_fn_0V (closure: BindingClosure) {
     const method = closure.method;
-    const fqn = closure.fqn;
+    const fqn = closure.fullyQualifiedName;
     if (!WasmEnableThreads) (<any>closure) = null;
-    return function bound_fn_0V() {
+    return function bound_fn_0V () {
         const mark = startMeasure();
         loaderHelpers.assert_runtime_running();
         mono_assert(!WasmEnableThreads || !closure.isDisposed, "The function was already disposed");
         const sp = Module.stackSave();
         try {
-            const args = alloc_stack_frame(2);
+            const size = 2;
+            const args = alloc_stack_frame(size);
             // call C# side
-            invoke_method_and_handle_exception(method, args);
+            invoke_sync_jsexport(method, args);
         } finally {
             Module.stackRestore(sp);
             endMeasure(mark, MeasuredBlock.callCsFunction, fqn);
@@ -188,22 +164,23 @@ function bind_fn_0V(closure: BindingClosure) {
     };
 }
 
-function bind_fn_1V(closure: BindingClosure) {
+function bind_fn_1V (closure: BindingClosure) {
     const method = closure.method;
     const marshaler1 = closure.arg_marshalers[0]!;
-    const fqn = closure.fqn;
+    const fqn = closure.fullyQualifiedName;
     if (!WasmEnableThreads) (<any>closure) = null;
-    return function bound_fn_1V(arg1: any) {
+    return function bound_fn_1V (arg1: any) {
         const mark = startMeasure();
         loaderHelpers.assert_runtime_running();
         mono_assert(!WasmEnableThreads || !closure.isDisposed, "The function was already disposed");
         const sp = Module.stackSave();
         try {
-            const args = alloc_stack_frame(3);
+            const size = 3;
+            const args = alloc_stack_frame(size);
             marshaler1(args, arg1);
 
             // call C# side
-            invoke_method_and_handle_exception(method, args);
+            invoke_sync_jsexport(method, args);
         } finally {
             Module.stackRestore(sp);
             endMeasure(mark, MeasuredBlock.callCsFunction, fqn);
@@ -211,23 +188,24 @@ function bind_fn_1V(closure: BindingClosure) {
     };
 }
 
-function bind_fn_1R(closure: BindingClosure) {
+function bind_fn_1R (closure: BindingClosure) {
     const method = closure.method;
     const marshaler1 = closure.arg_marshalers[0]!;
     const res_converter = closure.res_converter!;
-    const fqn = closure.fqn;
+    const fqn = closure.fullyQualifiedName;
     if (!WasmEnableThreads) (<any>closure) = null;
-    return function bound_fn_1R(arg1: any) {
+    return function bound_fn_1R (arg1: any) {
         const mark = startMeasure();
         loaderHelpers.assert_runtime_running();
         mono_assert(!WasmEnableThreads || !closure.isDisposed, "The function was already disposed");
         const sp = Module.stackSave();
         try {
-            const args = alloc_stack_frame(3);
+            const size = 3;
+            const args = alloc_stack_frame(size);
             marshaler1(args, arg1);
 
             // call C# side
-            invoke_method_and_handle_exception(method, args);
+            invoke_sync_jsexport(method, args);
 
             const js_result = res_converter(args);
             return js_result;
@@ -238,26 +216,27 @@ function bind_fn_1R(closure: BindingClosure) {
     };
 }
 
-function bind_fn_1RA(closure: BindingClosure) {
+function bind_fn_1RA (closure: BindingClosure) {
     const method = closure.method;
     const marshaler1 = closure.arg_marshalers[0]!;
     const res_converter = closure.res_converter!;
-    const fqn = closure.fqn;
+    const fqn = closure.fullyQualifiedName;
     if (!WasmEnableThreads) (<any>closure) = null;
-    return function bound_fn_1R(arg1: any) {
+    return function bind_fn_1RA (arg1: any) {
         const mark = startMeasure();
         loaderHelpers.assert_runtime_running();
         mono_assert(!WasmEnableThreads || !closure.isDisposed, "The function was already disposed");
         const sp = Module.stackSave();
         try {
-            const args = alloc_stack_frame(3);
+            const size = 3;
+            const args = alloc_stack_frame(size);
             marshaler1(args, arg1);
 
             // pre-allocate the promise
             let promise = res_converter(args);
 
             // call C# side
-            invoke_method_and_handle_exception(method, args);
+            invoke_async_jsexport(runtimeHelpers.managedThreadTID, method, args, size);
 
             // in case the C# side returned synchronously
             promise = end_marshal_task_to_js(args, undefined, promise);
@@ -270,25 +249,26 @@ function bind_fn_1RA(closure: BindingClosure) {
     };
 }
 
-function bind_fn_2R(closure: BindingClosure) {
+function bind_fn_2R (closure: BindingClosure) {
     const method = closure.method;
     const marshaler1 = closure.arg_marshalers[0]!;
     const marshaler2 = closure.arg_marshalers[1]!;
     const res_converter = closure.res_converter!;
-    const fqn = closure.fqn;
+    const fqn = closure.fullyQualifiedName;
     if (!WasmEnableThreads) (<any>closure) = null;
-    return function bound_fn_2R(arg1: any, arg2: any) {
+    return function bound_fn_2R (arg1: any, arg2: any) {
         const mark = startMeasure();
         loaderHelpers.assert_runtime_running();
         mono_assert(!WasmEnableThreads || !closure.isDisposed, "The function was already disposed");
         const sp = Module.stackSave();
         try {
-            const args = alloc_stack_frame(4);
+            const size = 4;
+            const args = alloc_stack_frame(size);
             marshaler1(args, arg1);
             marshaler2(args, arg2);
 
             // call C# side
-            invoke_method_and_handle_exception(method, args);
+            invoke_sync_jsexport(method, args);
 
             const js_result = res_converter(args);
             return js_result;
@@ -299,20 +279,21 @@ function bind_fn_2R(closure: BindingClosure) {
     };
 }
 
-function bind_fn_2RA(closure: BindingClosure) {
+function bind_fn_2RA (closure: BindingClosure) {
     const method = closure.method;
     const marshaler1 = closure.arg_marshalers[0]!;
     const marshaler2 = closure.arg_marshalers[1]!;
     const res_converter = closure.res_converter!;
-    const fqn = closure.fqn;
+    const fqn = closure.fullyQualifiedName;
     if (!WasmEnableThreads) (<any>closure) = null;
-    return function bound_fn_2R(arg1: any, arg2: any) {
+    return function bind_fn_2RA (arg1: any, arg2: any) {
         const mark = startMeasure();
         loaderHelpers.assert_runtime_running();
         mono_assert(!WasmEnableThreads || !closure.isDisposed, "The function was already disposed");
         const sp = Module.stackSave();
         try {
-            const args = alloc_stack_frame(4);
+            const size = 4;
+            const args = alloc_stack_frame(size);
             marshaler1(args, arg1);
             marshaler2(args, arg2);
 
@@ -320,7 +301,7 @@ function bind_fn_2RA(closure: BindingClosure) {
             let promise = res_converter(args);
 
             // call C# side
-            invoke_method_and_handle_exception(method, args);
+            invoke_async_jsexport(runtimeHelpers.managedThreadTID, method, args, size);
 
             // in case the C# side returned synchronously
             promise = end_marshal_task_to_js(args, undefined, promise);
@@ -333,21 +314,23 @@ function bind_fn_2RA(closure: BindingClosure) {
     };
 }
 
-function bind_fn(closure: BindingClosure) {
+function bind_fn (closure: BindingClosure) {
     const args_count = closure.args_count;
     const arg_marshalers = closure.arg_marshalers;
     const res_converter = closure.res_converter;
     const method = closure.method;
-    const fqn = closure.fqn;
+    const fqn = closure.fullyQualifiedName;
     const is_async = closure.is_async;
+    const is_discard_no_wait = closure.is_discard_no_wait;
     if (!WasmEnableThreads) (<any>closure) = null;
-    return function bound_fn(...js_args: any[]) {
+    return function bound_fn (...js_args: any[]) {
         const mark = startMeasure();
         loaderHelpers.assert_runtime_running();
         mono_assert(!WasmEnableThreads || !closure.isDisposed, "The function was already disposed");
         const sp = Module.stackSave();
         try {
-            const args = alloc_stack_frame(2 + args_count);
+            const size = 2 + args_count;
+            const args = alloc_stack_frame(size);
             for (let index = 0; index < args_count; index++) {
                 const marshaler = arg_marshalers[index];
                 if (marshaler) {
@@ -362,13 +345,18 @@ function bind_fn(closure: BindingClosure) {
             }
 
             // call C# side
-            invoke_method_and_handle_exception(method, args);
             if (is_async) {
+                invoke_async_jsexport(runtimeHelpers.managedThreadTID, method, args, size);
                 // in case the C# side returned synchronously
                 js_result = end_marshal_task_to_js(args, undefined, js_result);
-            }
-            else if (res_converter) {
-                js_result = res_converter(args);
+            } else if (is_discard_no_wait) {
+                // call C# side, fire and forget
+                invoke_async_jsexport(runtimeHelpers.managedThreadTID, method, args, size);
+            } else {
+                invoke_sync_jsexport(method, args);
+                if (res_converter) {
+                    js_result = res_converter(args);
+                }
             }
             return js_result;
         } finally {
@@ -379,56 +367,18 @@ function bind_fn(closure: BindingClosure) {
 }
 
 type BindingClosure = {
-    fqn: string,
+    fullyQualifiedName: string,
     args_count: number,
     method: MonoMethod,
     arg_marshalers: (BoundMarshalerToCs)[],
     res_converter: BoundMarshalerToJs | undefined,
     is_async: boolean,
+    is_discard_no_wait: boolean,
     isDisposed: boolean,
 }
 
-export function invoke_method_and_handle_exception_mono(method: MonoMethod, args: JSMarshalerArguments): void {
-    assert_js_interop();
-    const fail_root = mono_wasm_new_root<MonoString>();
-    try {
-        set_args_context(args);
-        const fail = cwraps.mono_wasm_invoke_method_bound(method, args, fail_root.address);
-        if (fail) runtimeHelpers.nativeAbort("ERR24: Unexpected error: " + monoStringToString(fail_root));
-        if (is_args_exception(args)) {
-            const exc = get_arg(args, 0);
-            throw marshal_exception_to_js(exc);
-        }
-    }
-    finally {
-        fail_root.release();
-    }
-}
-
-function invoke_method_and_handle_exception_naot(method: Function, args: JSMarshalerArguments): void {
-    method(args);
-    if (is_args_exception(args)) {
-        const exc = get_arg(args, 0);
-        throw marshal_exception_to_js(exc);
-    }
-}
-
-export const invoke_method_and_handle_exception: (method: any, args: JSMarshalerArguments) => void = NativeAOT ? invoke_method_and_handle_exception_naot : invoke_method_and_handle_exception_mono;
-
-export function invoke_method_raw(method: MonoMethod): void {
-    assert_c_interop();
-    const fail_root = mono_wasm_new_root<MonoString>();
-    try {
-        const fail = cwraps.mono_wasm_invoke_method_raw(method, fail_root.address);
-        if (fail) runtimeHelpers.nativeAbort("ERR24: Unexpected error: " + monoStringToString(fail_root));
-    }
-    finally {
-        fail_root.release();
-    }
-}
-
 export const exportsByAssembly: Map<string, any> = new Map();
-function _walk_exports_to_set_function(assembly: string, namespace: string, classname: string, methodname: string, signature_hash: number, fn: Function): void {
+function _walk_exports_to_set_function (assembly: string, namespace: string, classname: string, methodname: string, signature_hash: number, fn: Function): void {
     const parts = `${namespace}.${classname}`.replace(/\//g, ".").split(".");
     let scope: any = undefined;
     let assemblyScope = exportsByAssembly.get(assembly);
@@ -457,97 +407,12 @@ function _walk_exports_to_set_function(assembly: string, namespace: string, clas
     scope[`${methodname}.${signature_hash}`] = fn;
 }
 
-async function mono_wasm_get_assembly_exports_mono(assembly: string): Promise<any> {
-    assert_js_interop();
-    const result = exportsByAssembly.get(assembly);
-    if (!result) {
-        const mark = startMeasure();
-        const asm = assembly_load(assembly);
-        if (!asm)
-            throw new Error("Could not find assembly: " + assembly);
-
-        const klass = cwraps.mono_wasm_assembly_find_class(asm, runtimeHelpers.runtime_interop_namespace, "__GeneratedInitializer");
-        if (klass) {
-            const method = cwraps.mono_wasm_assembly_find_method(klass, "__Register_", -1);
-            if (method) {
-                const outException = mono_wasm_new_root();
-                const outResult = mono_wasm_new_root<MonoString>();
-                try {
-                    cwraps.mono_wasm_invoke_method_ref(method, MonoObjectRefNull, VoidPtrNull, outException.address, outResult.address);
-                    if (outException.value !== MonoObjectNull) {
-                        const msg = monoStringToString(outResult)!;
-                        throw new Error(msg);
-                    }
-                }
-                finally {
-                    outException.release();
-                    outResult.release();
-                }
-            }
-        } else {
-            mono_assert(!WasmEnableThreads, () => `JSExport with multi-threading enabled is not supported with assembly ${assembly} as it was generated with the .NET 7 SDK`);
-            // this needs to stay here for compatibility with assemblies generated in Net7
-            // it doesn't have the __GeneratedInitializer class
-            cwraps.mono_wasm_runtime_run_module_cctor(asm);
-        }
-        endMeasure(mark, MeasuredBlock.getAssemblyExports, assembly);
-    }
-
-    return exportsByAssembly.get(assembly) || {};
-}
-
-async function mono_wasm_get_assembly_exports_naot(assembly: string): Promise<any> {
+export async function mono_wasm_get_assembly_exports (assembly: string): Promise<any> {
     assert_js_interop();
     const result = exportsByAssembly.get(assembly);
     if (!result) {
-        const mark = startMeasure();
-        
-        let assemblyWithoutExtension = assembly;
-        if (assemblyWithoutExtension.endsWith(".dll")) {
-            assemblyWithoutExtension = assemblyWithoutExtension.substring(0, assembly.length - 4);
-        }
-        const register = (Module as any)["_" + assemblyWithoutExtension + "__GeneratedInitializer" + "__Register_"];
-        mono_assert(register, `Missing wasm export for JSExport registration function in assembly ${assembly}`);
-        register();
-
-        endMeasure(mark, MeasuredBlock.getAssemblyExports, assembly);
+        await bind_assembly_exports(assembly);
     }
 
     return exportsByAssembly.get(assembly) || {};
 }
-
-export const mono_wasm_get_assembly_exports = NativeAOT ? mono_wasm_get_assembly_exports_naot : mono_wasm_get_assembly_exports_mono;
-
-export function parseFQN(fqn: string)
-    : { assembly: string, namespace: string, classname: string, methodname: string } {
-    const assembly = fqn.substring(fqn.indexOf("[") + 1, fqn.indexOf("]")).trim();
-    fqn = fqn.substring(fqn.indexOf("]") + 1).trim();
-
-    const methodname = fqn.substring(fqn.indexOf(":") + 1);
-    fqn = fqn.substring(0, fqn.indexOf(":")).trim();
-
-    let namespace = "";
-    let classname = fqn;
-    if (fqn.indexOf(".") != -1) {
-        const idx = fqn.lastIndexOf(".");
-        namespace = fqn.substring(0, idx);
-        classname = fqn.substring(idx + 1);
-    }
-
-    if (!assembly.trim())
-        throw new Error("No assembly name specified " + fqn);
-    if (!classname.trim())
-        throw new Error("No class name specified " + fqn);
-    if (!methodname.trim())
-        throw new Error("No method name specified " + fqn);
-    return { assembly, namespace, classname, methodname };
-}
-
-export function assembly_load(name: string): MonoAssembly {
-    if (_assembly_cache_by_name.has(name))
-        return <MonoAssembly>_assembly_cache_by_name.get(name);
-
-    const result = cwraps.mono_wasm_assembly_load(name);
-    _assembly_cache_by_name.set(name, result);
-    return result;
-}
diff --git a/src/mono/browser/runtime/invoke-js.ts b/src/mono/browser/runtime/invoke-js.ts
index 10e9ca9e601d..2dc7584ab2b6 100644
--- a/src/mono/browser/runtime/invoke-js.ts
+++ b/src/mono/browser/runtime/invoke-js.ts
@@ -2,44 +2,38 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 /* eslint-disable prefer-rest-params */
 
-import NativeAOT from "consts:nativeAOT";
-
 import WasmEnableThreads from "consts:wasmEnableThreads";
 import BuildConfiguration from "consts:configuration";
 
-import { marshal_exception_to_cs, bind_arg_marshal_to_cs } from "./marshal-to-cs";
-import { get_signature_argument_count, bound_js_function_symbol, get_sig, get_signature_version, get_signature_type, imported_js_function_symbol, get_signature_handle, get_signature_function_name, get_signature_module_name } from "./marshal";
-import { setI32_unchecked, receiveWorkerHeapViews } from "./memory";
-import { stringToMonoStringRoot } from "./strings";
-import { MonoObject, MonoObjectRef, JSFunctionSignature, JSMarshalerArguments, WasmRoot, BoundMarshalerToJs, JSFnHandle, BoundMarshalerToCs, JSHandle, MarshalerType } from "./types/internal";
-import { Int32Ptr } from "./types/emscripten";
-import { ENVIRONMENT_IS_WORKER, INTERNAL, Module, loaderHelpers, mono_assert, runtimeHelpers } from "./globals";
+import { marshal_exception_to_cs, bind_arg_marshal_to_cs, marshal_task_to_cs } from "./marshal-to-cs";
+import { get_signature_argument_count, bound_js_function_symbol, get_sig, get_signature_version, get_signature_type, imported_js_function_symbol, get_signature_handle, get_signature_function_name, get_signature_module_name, is_receiver_should_free, get_caller_native_tid, get_sync_done_semaphore_ptr, get_arg } from "./marshal";
+import { forceThreadMemoryViewRefresh } from "./memory";
+import { JSFunctionSignature, JSMarshalerArguments, BoundMarshalerToJs, JSFnHandle, BoundMarshalerToCs, JSHandle, MarshalerType, VoidPtrNull } from "./types/internal";
+import { VoidPtr } from "./types/emscripten";
+import { INTERNAL, Module, loaderHelpers, mono_assert, runtimeHelpers } from "./globals";
 import { bind_arg_marshal_to_js } from "./marshal-to-js";
-import { mono_wasm_new_external_root } from "./roots";
 import { mono_log_debug, mono_wasm_symbolicate_string } from "./logging";
 import { mono_wasm_get_jsobj_from_js_handle } from "./gc-handles";
 import { endMeasure, MeasuredBlock, startMeasure } from "./profiler";
 import { wrap_as_cancelable_promise } from "./cancelable-promise";
-import { is_thread_available } from "./pthreads/shared/emscripten-replacements";
+import { threads_c_functions as tcwraps } from "./cwraps";
+import { monoThreadInfo } from "./pthreads";
+import { stringToUTF16Ptr } from "./strings";
 
 export const js_import_wrapper_by_fn_handle: Function[] = <any>[null];// 0th slot is dummy, main thread we free them on shutdown. On web worker thread we free them when worker is detached.
 
-export function mono_wasm_bind_js_import(signature: JSFunctionSignature, is_exception: Int32Ptr, result_address: MonoObjectRef): void {
-    if (WasmEnableThreads) return;
+export function mono_wasm_bind_js_import_ST (signature: JSFunctionSignature): VoidPtr {
+    if (WasmEnableThreads) return VoidPtrNull;
     assert_js_interop();
-    const resultRoot = mono_wasm_new_external_root<MonoObject>(result_address);
     try {
         bind_js_import(signature);
-        wrap_no_error_root(is_exception, resultRoot);
+        return VoidPtrNull;
     } catch (ex: any) {
-        Module.err(ex.toString());
-        wrap_error_root(is_exception, ex, resultRoot);
-    } finally {
-        resultRoot.release();
+        return stringToUTF16Ptr(normalize_exception(ex));
     }
 }
 
-export function mono_wasm_invoke_import_async(args: JSMarshalerArguments, signature: JSFunctionSignature) {
+export function mono_wasm_invoke_jsimport_MT (signature: JSFunctionSignature, args: JSMarshalerArguments) {
     if (!WasmEnableThreads) return;
     assert_js_interop();
 
@@ -48,49 +42,44 @@ export function mono_wasm_invoke_import_async(args: JSMarshalerArguments, signat
     let bound_fn = js_import_wrapper_by_fn_handle[function_handle];
     if (bound_fn == undefined) {
         // it was not bound yet, let's do it now
-        bound_fn = bind_js_import(signature);
-    }
-    mono_assert(bound_fn, () => `Imported function handle expected ${function_handle}`);
-
-    let max_postpone_count = 10;
-    function postpone_invoke_import_async() {
-        if (max_postpone_count < 0 || is_thread_available()) {
-            bound_fn(args);
-            Module._free(args as any);
-        } else {
-            max_postpone_count--;
-            Module.safeSetTimeout(postpone_invoke_import_async, 10);
+        try {
+            bound_fn = bind_js_import(signature);
+        } catch (ex: any) {
+            // propagate the exception back to caller, which could be on different thread. Handle both sync and async signatures.
+            try {
+                const res_sig = get_sig(signature, 1);
+                const res_type = get_signature_type(res_sig);
+                if (res_type === MarshalerType.Task) {
+                    const res = get_arg(args, 1);
+                    marshal_task_to_cs(res, Promise.reject(ex));
+                } else {
+                    marshal_exception_to_cs(<any>args, ex);
+                    if (monoThreadInfo.isUI) {
+                        const done_semaphore = get_sync_done_semaphore_ptr(args);
+                        tcwraps.mono_threads_wasm_sync_run_in_target_thread_done(done_semaphore);
+                    }
+                }
+                return;
+            } catch (ex2: any) {
+                runtimeHelpers.nativeExit(ex2);
+                return;
+            }
         }
     }
+    mono_assert(bound_fn, () => `Imported function handle expected ${function_handle}`);
 
-    if (WasmEnableThreads && !ENVIRONMENT_IS_WORKER) {
-        // give thread chance to load before we run more synchronous code on UI thread
-        postpone_invoke_import_async();
-    }
-    else {
-        bound_fn(args);
-        // this works together with AllocHGlobal in JSFunctionBinding.DispatchJSImportAsync
-        Module._free(args as any);
-    }
+    bound_fn(args);
 }
 
-export function mono_wasm_invoke_import_sync(args: JSMarshalerArguments, signature: JSFunctionSignature) {
-    if (!WasmEnableThreads) return;
-    assert_js_interop();
-
-    const function_handle = get_signature_handle(signature);
-
-    let bound_fn = js_import_wrapper_by_fn_handle[function_handle];
-    if (bound_fn == undefined) {
-        // it was not bound yet, let's do it now
-        bound_fn = bind_js_import(signature);
-    }
+export function mono_wasm_invoke_jsimport_ST (function_handle: JSFnHandle, args: JSMarshalerArguments): void {
+    if (WasmEnableThreads) return;
+    loaderHelpers.assert_runtime_running();
+    const bound_fn = js_import_wrapper_by_fn_handle[<any>function_handle];
     mono_assert(bound_fn, () => `Imported function handle expected ${function_handle}`);
-
     bound_fn(args);
 }
 
-function bind_js_import(signature: JSFunctionSignature): Function {
+function bind_js_import (signature: JSFunctionSignature): Function {
     assert_js_interop();
     const mark = startMeasure();
 
@@ -128,6 +117,9 @@ function bind_js_import(signature: JSFunctionSignature): Function {
     const res_marshaler_type = get_signature_type(res_sig);
     const res_converter = bind_arg_marshal_to_cs(res_sig, res_marshaler_type, 1);
 
+    const is_discard_no_wait = res_marshaler_type == MarshalerType.DiscardNoWait;
+    const is_async = res_marshaler_type == MarshalerType.Task || res_marshaler_type == MarshalerType.TaskPreCreated;
+
     const closure: BindingClosure = {
         fn,
         fqn: js_module_name + ":" + js_function_name,
@@ -136,51 +128,94 @@ function bind_js_import(signature: JSFunctionSignature): Function {
         res_converter,
         has_cleanup,
         arg_cleanup,
+        is_discard_no_wait,
+        is_async,
         isDisposed: false,
     };
-    let bound_fn: Function;
-    if (args_count == 0 && !res_converter) {
-        bound_fn = bind_fn_0V(closure);
+    let bound_fn: WrappedJSFunction;
+    if (is_async || is_discard_no_wait || has_cleanup) {
+        bound_fn = bind_fn(closure);
+    } else {
+        if (args_count == 0 && !res_converter) {
+            bound_fn = bind_fn_0V(closure);
+        } else if (args_count == 1 && !res_converter) {
+            bound_fn = bind_fn_1V(closure);
+        } else if (args_count == 1 && res_converter) {
+            bound_fn = bind_fn_1R(closure);
+        } else if (args_count == 2 && res_converter) {
+            bound_fn = bind_fn_2R(closure);
+        } else {
+            bound_fn = bind_fn(closure);
+        }
     }
-    else if (args_count == 1 && !has_cleanup && !res_converter) {
-        bound_fn = bind_fn_1V(closure);
+
+    function async_bound_fn (args: JSMarshalerArguments): void {
+        forceThreadMemoryViewRefresh();
+        bound_fn(args);
+    }
+    function sync_bound_fn (args: JSMarshalerArguments): void {
+        const previous = runtimeHelpers.isPendingSynchronousCall;
+        try {
+            forceThreadMemoryViewRefresh();
+            const caller_tid = get_caller_native_tid(args);
+            runtimeHelpers.isPendingSynchronousCall = runtimeHelpers.managedThreadTID === caller_tid;
+            bound_fn(args);
+        } finally {
+            runtimeHelpers.isPendingSynchronousCall = previous;
+        }
     }
-    else if (args_count == 1 && !has_cleanup && res_converter) {
-        bound_fn = bind_fn_1R(closure);
+    function async_bound_fn_ui (args: JSMarshalerArguments): void {
+        invoke_later_when_on_ui_thread_async(() => async_bound_fn(args));
     }
-    else if (args_count == 2 && !has_cleanup && res_converter) {
-        bound_fn = bind_fn_2R(closure);
+    function sync_bound_fn_ui (args: JSMarshalerArguments): void {
+        invoke_later_when_on_ui_thread_sync(() => sync_bound_fn(args), args);
     }
-    else {
-        bound_fn = bind_fn(closure);
+
+    let wrapped_fn: WrappedJSFunction = bound_fn;
+    if (WasmEnableThreads) {
+        if (monoThreadInfo.isUI) {
+            if (is_async || is_discard_no_wait) {
+                wrapped_fn = async_bound_fn_ui;
+            } else {
+                wrapped_fn = sync_bound_fn_ui;
+            }
+        } else {
+            if (is_async || is_discard_no_wait) {
+                wrapped_fn = async_bound_fn;
+            } else {
+                wrapped_fn = sync_bound_fn;
+            }
+        }
     }
 
-    // this is just to make debugging easier. 
+    // this is just to make debugging easier by naming the function in the stack trace.
     // It's not CSP compliant and possibly not performant, that's why it's only enabled in debug builds
     // in Release configuration, it would be a trimmed by rollup
     if (BuildConfiguration === "Debug" && !runtimeHelpers.cspPolicy) {
         try {
-            bound_fn = new Function("fn", "return (function JSImport_" + js_function_name.replaceAll(".", "_") + "(){ return fn.apply(this, arguments)});")(bound_fn);
-        }
-        catch (ex) {
+            const fname = js_function_name.replaceAll(".", "_");
+            const url = `//# sourceURL=https://dotnet/JSImport/${fname}`;
+            const body = `return (function JSImport_${fname}(){ return fn.apply(this, arguments)});`;
+            wrapped_fn = new Function("fn", url + "\r\n" + body)(wrapped_fn);
+        } catch (ex) {
             runtimeHelpers.cspPolicy = true;
         }
     }
 
-    (<any>bound_fn)[imported_js_function_symbol] = closure;
+    (<any>wrapped_fn)[imported_js_function_symbol] = closure;
 
-    js_import_wrapper_by_fn_handle[function_handle] = bound_fn;
+    js_import_wrapper_by_fn_handle[function_handle] = wrapped_fn;
 
     endMeasure(mark, MeasuredBlock.bindJsFunction, js_function_name);
 
-    return bound_fn;
+    return wrapped_fn;
 }
 
-function bind_fn_0V(closure: BindingClosure) {
+function bind_fn_0V (closure: BindingClosure) {
     const fn = closure.fn;
     const fqn = closure.fqn;
     if (!WasmEnableThreads) (<any>closure) = null;
-    return function bound_fn_0V(args: JSMarshalerArguments) {
+    return function bound_fn_0V (args: JSMarshalerArguments) {
         const mark = startMeasure();
         try {
             mono_assert(!WasmEnableThreads || !closure.isDisposed, "The function was already disposed");
@@ -188,19 +223,18 @@ function bind_fn_0V(closure: BindingClosure) {
             fn();
         } catch (ex) {
             marshal_exception_to_cs(<any>args, ex);
-        }
-        finally {
+        } finally {
             endMeasure(mark, MeasuredBlock.callCsFunction, fqn);
         }
     };
 }
 
-function bind_fn_1V(closure: BindingClosure) {
+function bind_fn_1V (closure: BindingClosure) {
     const fn = closure.fn;
     const marshaler1 = closure.arg_marshalers[0]!;
     const fqn = closure.fqn;
     if (!WasmEnableThreads) (<any>closure) = null;
-    return function bound_fn_1V(args: JSMarshalerArguments) {
+    return function bound_fn_1V (args: JSMarshalerArguments) {
         const mark = startMeasure();
         try {
             mono_assert(!WasmEnableThreads || !closure.isDisposed, "The function was already disposed");
@@ -209,20 +243,19 @@ function bind_fn_1V(closure: BindingClosure) {
             fn(arg1);
         } catch (ex) {
             marshal_exception_to_cs(<any>args, ex);
-        }
-        finally {
+        } finally {
             endMeasure(mark, MeasuredBlock.callCsFunction, fqn);
         }
     };
 }
 
-function bind_fn_1R(closure: BindingClosure) {
+function bind_fn_1R (closure: BindingClosure) {
     const fn = closure.fn;
     const marshaler1 = closure.arg_marshalers[0]!;
     const res_converter = closure.res_converter!;
     const fqn = closure.fqn;
     if (!WasmEnableThreads) (<any>closure) = null;
-    return function bound_fn_1R(args: JSMarshalerArguments) {
+    return function bound_fn_1R (args: JSMarshalerArguments) {
         const mark = startMeasure();
         try {
             mono_assert(!WasmEnableThreads || !closure.isDisposed, "The function was already disposed");
@@ -232,21 +265,20 @@ function bind_fn_1R(closure: BindingClosure) {
             res_converter(args, js_result);
         } catch (ex) {
             marshal_exception_to_cs(<any>args, ex);
-        }
-        finally {
+        } finally {
             endMeasure(mark, MeasuredBlock.callCsFunction, fqn);
         }
     };
 }
 
-function bind_fn_2R(closure: BindingClosure) {
+function bind_fn_2R (closure: BindingClosure) {
     const fn = closure.fn;
     const marshaler1 = closure.arg_marshalers[0]!;
     const marshaler2 = closure.arg_marshalers[1]!;
     const res_converter = closure.res_converter!;
     const fqn = closure.fqn;
     if (!WasmEnableThreads) (<any>closure) = null;
-    return function bound_fn_2R(args: JSMarshalerArguments) {
+    return function bound_fn_2R (args: JSMarshalerArguments) {
         const mark = startMeasure();
         try {
             mono_assert(!WasmEnableThreads || !closure.isDisposed, "The function was already disposed");
@@ -257,14 +289,13 @@ function bind_fn_2R(closure: BindingClosure) {
             res_converter(args, js_result);
         } catch (ex) {
             marshal_exception_to_cs(<any>args, ex);
-        }
-        finally {
+        } finally {
             endMeasure(mark, MeasuredBlock.callCsFunction, fqn);
         }
     };
 }
 
-function bind_fn(closure: BindingClosure) {
+function bind_fn (closure: BindingClosure) {
     const args_count = closure.args_count;
     const arg_marshalers = closure.arg_marshalers;
     const res_converter = closure.res_converter;
@@ -273,7 +304,8 @@ function bind_fn(closure: BindingClosure) {
     const fn = closure.fn;
     const fqn = closure.fqn;
     if (!WasmEnableThreads) (<any>closure) = null;
-    return function bound_fn(args: JSMarshalerArguments) {
+    return function bound_fn (args: JSMarshalerArguments) {
+        const receiver_should_free = WasmEnableThreads && is_receiver_should_free(args);
         const mark = startMeasure();
         try {
             mono_assert(!WasmEnableThreads || !closure.isDisposed, "The function was already disposed");
@@ -301,13 +333,17 @@ function bind_fn(closure: BindingClosure) {
             }
         } catch (ex) {
             marshal_exception_to_cs(<any>args, ex);
-        }
-        finally {
+        } finally {
+            if (receiver_should_free) {
+                Module._free(args as any);
+            }
             endMeasure(mark, MeasuredBlock.callCsFunction, fqn);
         }
     };
 }
 
+type WrappedJSFunction = (args: JSMarshalerArguments) => void;
+
 type BindingClosure = {
     fn: Function,
     fqn: string,
@@ -316,27 +352,28 @@ type BindingClosure = {
     arg_marshalers: (BoundMarshalerToJs)[],
     res_converter: BoundMarshalerToCs | undefined,
     has_cleanup: boolean,
+    is_discard_no_wait: boolean,
+    is_async: boolean,
     arg_cleanup: (Function | undefined)[]
 }
 
-export function mono_wasm_invoke_js_function(bound_function_js_handle: JSHandle, args: JSMarshalerArguments): void {
-    const bound_fn = mono_wasm_get_jsobj_from_js_handle(bound_function_js_handle);
-    mono_assert(bound_fn && typeof (bound_fn) === "function" && bound_fn[bound_js_function_symbol], () => `Bound function handle expected ${bound_function_js_handle}`);
-    bound_fn(args);
+export function mono_wasm_invoke_js_function (bound_function_js_handle: JSHandle, args: JSMarshalerArguments): void {
+    invoke_later_when_on_ui_thread_sync(() => mono_wasm_invoke_js_function_impl(bound_function_js_handle, args), args);
 }
 
-export function mono_wasm_invoke_js_import(function_handle: JSFnHandle, args: JSMarshalerArguments): void {
-    const bound_fn = js_import_wrapper_by_fn_handle[<any>function_handle];
-    mono_assert(bound_fn, () => `Imported function handle expected ${function_handle}`);
+export function mono_wasm_invoke_js_function_impl (bound_function_js_handle: JSHandle, args: JSMarshalerArguments): void {
+    loaderHelpers.assert_runtime_running();
+    const bound_fn = mono_wasm_get_jsobj_from_js_handle(bound_function_js_handle);
+    mono_assert(bound_fn && typeof (bound_fn) === "function" && bound_fn[bound_js_function_symbol], () => `Bound function handle expected ${bound_function_js_handle}`);
     bound_fn(args);
 }
 
-export function mono_wasm_set_module_imports(module_name: string, moduleImports: any) {
+export function mono_wasm_set_module_imports (module_name: string, moduleImports: any) {
     importedModules.set(module_name, moduleImports);
     mono_log_debug(`added module imports '${module_name}'`);
 }
 
-function mono_wasm_lookup_js_import(function_name: string, js_module_name: string | null): Function {
+function mono_wasm_lookup_js_import (function_name: string, js_module_name: string | null): Function {
     mono_assert(function_name && typeof function_name === "string", "function_name must be string");
 
     let scope: any = {};
@@ -348,12 +385,10 @@ function mono_wasm_lookup_js_import(function_name: string, js_module_name: strin
         } else {
             mono_assert(scope, () => `ES6 module ${js_module_name} was not imported yet, please call JSHost.ImportAsync() first.`);
         }
-    }
-    else if (parts[0] === "INTERNAL") {
+    } else if (parts[0] === "INTERNAL") {
         scope = INTERNAL;
         parts.shift();
-    }
-    else if (parts[0] === "globalThis") {
+    } else if (parts[0] === "globalThis") {
         scope = globalThis;
         parts.shift();
     }
@@ -361,47 +396,51 @@ function mono_wasm_lookup_js_import(function_name: string, js_module_name: strin
     for (let i = 0; i < parts.length - 1; i++) {
         const part = parts[i];
         const newscope = scope[part];
-        mono_assert(newscope, () => `${part} not found while looking up ${function_name}`);
+        if (!newscope) {
+            throw new Error(`${part} not found while looking up ${function_name}`);
+        }
         scope = newscope;
     }
 
     const fname = parts[parts.length - 1];
     const fn = scope[fname];
 
-    mono_assert(typeof (fn) === "function", () => `${function_name} must be a Function but was ${typeof fn}`);
+    if (typeof (fn) !== "function") {
+        throw new Error(`${function_name} must be a Function but was ${typeof fn}`);
+    }
 
     // if the function was already bound to some object it would stay bound to original object. That's good.
     return fn.bind(scope);
 }
 
-export function set_property(self: any, name: string, value: any): void {
+export function set_property (self: any, name: string, value: any): void {
     mono_check(self, "Null reference");
     self[name] = value;
 }
 
-export function get_property(self: any, name: string): any {
+export function get_property (self: any, name: string): any {
     mono_check(self, "Null reference");
     return self[name];
 }
 
-export function has_property(self: any, name: string): boolean {
+export function has_property (self: any, name: string): boolean {
     mono_check(self, "Null reference");
     return name in self;
 }
 
-export function get_typeof_property(self: any, name: string): string {
+export function get_typeof_property (self: any, name: string): string {
     mono_check(self, "Null reference");
     return typeof self[name];
 }
 
-export function get_global_this(): any {
+export function get_global_this (): any {
     return globalThis;
 }
 
 export const importedModulesPromises: Map<string, Promise<any>> = new Map();
 export const importedModules: Map<string, Promise<any>> = new Map();
 
-export function dynamic_import(module_name: string, module_url: string): Promise<any> {
+export function dynamic_import (module_name: string, module_url: string): Promise<any> {
     assert_js_interop();
     mono_assert(module_name && typeof module_name === "string", "module_name must be string");
     mono_assert(module_url && typeof module_url === "string", "module_url must be string");
@@ -423,7 +462,7 @@ export function dynamic_import(module_name: string, module_url: string): Promise
     });
 }
 
-function _wrap_error_flag(is_exception: Int32Ptr | null, ex: any): string {
+export function normalize_exception (ex: any) {
     let res = "unknown exception";
     if (ex) {
         res = ex.toString();
@@ -439,43 +478,19 @@ function _wrap_error_flag(is_exception: Int32Ptr | null, ex: any): string {
 
         res = mono_wasm_symbolicate_string(res);
     }
-    if (is_exception) {
-        receiveWorkerHeapViews();
-        setI32_unchecked(is_exception, 1);
-    }
     return res;
 }
 
-export function wrap_error_root(is_exception: Int32Ptr | null, ex: any, result: WasmRoot<MonoObject>): void {
-    const res = _wrap_error_flag(is_exception, ex);
-    if (NativeAOT) {
-        return;
-    }
-
-    stringToMonoStringRoot(res, <any>result);
-}
-
-// to set out parameters of icalls
-export function wrap_no_error_root(is_exception: Int32Ptr | null, result?: WasmRoot<MonoObject>): void {
-    if (is_exception) {
-        receiveWorkerHeapViews();
-        setI32_unchecked(is_exception, 0);
-    }
-    if (result) {
-        result.clear();
-    }
-}
-
-export function assert_js_interop(): void {
+export function assert_js_interop (): void {
     loaderHelpers.assert_runtime_running();
     if (WasmEnableThreads) {
-        mono_assert(runtimeHelpers.mono_wasm_bindings_is_ready && runtimeHelpers.proxy_context_gc_handle, "Please use dedicated worker for working with JavaScript interop. See https://github.com/dotnet/runtime/blob/main/src/mono/wasm/threads.md#JS-interop-on-dedicated-threads");
+        mono_assert(runtimeHelpers.mono_wasm_bindings_is_ready && runtimeHelpers.proxyGCHandle, "Please use dedicated worker for working with JavaScript interop. See https://github.com/dotnet/runtime/blob/main/src/mono/wasm/threads.md#JS-interop-on-dedicated-threads");
     } else {
         mono_assert(runtimeHelpers.mono_wasm_bindings_is_ready, "The runtime must be initialized.");
     }
 }
 
-export function assert_c_interop(): void {
+export function assert_c_interop (): void {
     loaderHelpers.assert_runtime_running();
     if (WasmEnableThreads) {
         mono_assert(runtimeHelpers.mono_wasm_bindings_is_ready, "Please use dedicated worker for working with JavaScript interop. See https://github.com/dotnet/runtime/blob/main/src/mono/wasm/threads.md#JS-interop-on-dedicated-threads");
@@ -483,3 +498,29 @@ export function assert_c_interop(): void {
         mono_assert(runtimeHelpers.mono_wasm_bindings_is_ready, "The runtime must be initialized.");
     }
 }
+
+// make sure we are not blocking em_task_queue_execute up the call stack
+// so that when we call back to managed, the FS calls could still be processed by the UI thread
+// see also emscripten_yield which can process the FS calls inside the spin wait
+export function invoke_later_when_on_ui_thread_sync (fn: Function, args: JSMarshalerArguments) {
+    if (WasmEnableThreads && monoThreadInfo.isUI) {
+        Module.safeSetTimeout(() => {
+            fn();
+            // see also mono_threads_wasm_sync_run_in_target_thread_vii_cb
+            const done_semaphore = get_sync_done_semaphore_ptr(args);
+            tcwraps.mono_threads_wasm_sync_run_in_target_thread_done(done_semaphore);
+        }, 0);
+    } else {
+        fn();
+    }
+}
+
+// make sure we are not blocking em_task_queue_execute up the call stack
+// so that when we call back to managed, the FS calls could still be processed by the UI thread
+export function invoke_later_when_on_ui_thread_async (fn: Function) {
+    if (WasmEnableThreads && monoThreadInfo.isUI) {
+        Module.safeSetTimeout(fn, 0);
+    } else {
+        fn();
+    }
+}
diff --git a/src/mono/browser/runtime/jiterpreter-enums.ts b/src/mono/browser/runtime/jiterpreter-enums.ts
index 1e65f7b3cec3..07a3f815b831 100644
--- a/src/mono/browser/runtime/jiterpreter-enums.ts
+++ b/src/mono/browser/runtime/jiterpreter-enums.ts
@@ -44,6 +44,8 @@ export const enum JiterpMember {
     ClassRank,
     ClassElementClass,
     BoxedValueData,
+    BackwardBranchTaken,
+    BailoutOpcodeCount,
 }
 
 // keep in sync with jiterpreter.c, see mono_jiterp_write_number_unaligned
diff --git a/src/mono/browser/runtime/jiterpreter-interp-entry.ts b/src/mono/browser/runtime/jiterpreter-interp-entry.ts
index cedf54cdde93..b99917dd2bb0 100644
--- a/src/mono/browser/runtime/jiterpreter-interp-entry.ts
+++ b/src/mono/browser/runtime/jiterpreter-interp-entry.ts
@@ -68,7 +68,7 @@ const enum WasmReftype {
 }
 */
 
-function getTrampImports() {
+function getTrampImports () {
     if (trampImports)
         return trampImports;
 
@@ -98,7 +98,7 @@ class TrampolineInfo {
     result: number;
     hitCount: number;
 
-    constructor(
+    constructor (
         imethod: number, method: MonoMethod, argumentCount: number, pParamTypes: NativePointer,
         unbox: boolean, hasThisReference: boolean, hasReturnValue: boolean, name: string,
         defaultImplementation: number
@@ -137,12 +137,12 @@ let mostRecentOptions: JiterpreterOptions | undefined = undefined;
 // If a method is freed we need to remove its info (just in case another one gets
 //  allocated at that exact memory offset later) and more importantly, ensure it is
 //  not waiting in the jit queue
-export function mono_jiterp_free_method_data_interp_entry(imethod: number) {
+export function mono_jiterp_free_method_data_interp_entry (imethod: number) {
     delete infoTable[imethod];
 }
 
 // FIXME: move this counter into C and make it thread safe
-export function mono_interp_record_interp_entry(imethod: number) {
+export function mono_interp_record_interp_entry (imethod: number) {
     // clear the unbox bit
     imethod = imethod & ~0x1;
 
@@ -168,7 +168,7 @@ export function mono_interp_record_interp_entry(imethod: number) {
 }
 
 // returns function pointer
-export function mono_interp_jit_wasm_entry_trampoline(
+export function mono_interp_jit_wasm_entry_trampoline (
     imethod: number, method: MonoMethod, argumentCount: number, pParamTypes: NativePointer,
     unbox: boolean, hasThisReference: boolean, hasReturnValue: boolean, name: NativePointer,
     defaultImplementation: number
@@ -208,7 +208,7 @@ export function mono_interp_jit_wasm_entry_trampoline(
     return info.result;
 }
 
-function ensure_jit_is_scheduled() {
+function ensure_jit_is_scheduled () {
     if (jitQueueTimeout > 0)
         return;
 
@@ -227,7 +227,7 @@ function ensure_jit_is_scheduled() {
     }, queueFlushDelayMs);
 }
 
-function flush_wasm_entry_trampoline_jit_queue() {
+function flush_wasm_entry_trampoline_jit_queue () {
     const jitQueue : TrampolineInfo[] = [];
     let methodPtr = <MonoMethod><any>0;
     while ((methodPtr = <any>cwraps.mono_jiterp_tlqueue_next(JitQueue.InterpEntry)) != 0) {
@@ -447,7 +447,7 @@ function flush_wasm_entry_trampoline_jit_queue() {
     }
 }
 
-function append_stackval_from_data(
+function append_stackval_from_data (
     builder: WasmBuilder, imethod: number, type: MonoType, valueName: string, argIndex: number
 ) {
     const rawSize = cwraps.mono_jiterp_type_get_raw_value_size(type);
@@ -520,7 +520,7 @@ function append_stackval_from_data(
     }
 }
 
-function generate_wasm_body(
+function generate_wasm_body (
     builder: WasmBuilder, info: TrampolineInfo
 ): boolean {
     // FIXME: This is not thread-safe, but the alternative of alloca makes the trampoline
diff --git a/src/mono/browser/runtime/jiterpreter-jit-call.ts b/src/mono/browser/runtime/jiterpreter-jit-call.ts
index 09018413e24e..f8da716a5934 100644
--- a/src/mono/browser/runtime/jiterpreter-jit-call.ts
+++ b/src/mono/browser/runtime/jiterpreter-jit-call.ts
@@ -95,7 +95,7 @@ class TrampolineInfo {
     wasmNativeSignature: WasmValtype[];
     enableDirect: boolean;
 
-    constructor(
+    constructor (
         method: MonoMethod, rmethod: VoidPtr, cinfo: VoidPtr,
         arg_offsets: VoidPtr, catch_exceptions: boolean
     ) {
@@ -166,7 +166,7 @@ class TrampolineInfo {
 // this is cached replacements for Module.getWasmTableEntry();
 // we could add <EmccExportedLibraryFunction Include="$getWasmTableEntry" /> and <EmccExportedRuntimeMethod Include="getWasmTableEntry" />
 // if we need to export the original
-function getWasmTableEntry(index: number) {
+function getWasmTableEntry (index: number) {
     let result = fnCache[index];
     if (!result) {
         if (index >= fnCache.length)
@@ -179,7 +179,7 @@ function getWasmTableEntry(index: number) {
     return result;
 }
 
-export function mono_interp_invoke_wasm_jit_call_trampoline(
+export function mono_interp_invoke_wasm_jit_call_trampoline (
     thunkIndex: number, ret_sp: number, sp: number, ftndesc: number, thrown: NativePointer
 ) {
     const thunk = <Function>getWasmTableEntry(thunkIndex);
@@ -218,7 +218,7 @@ export function mono_interp_invoke_wasm_jit_call_trampoline(
 // If a method is freed we need to remove its info (just in case another one gets
 //  allocated at that exact memory offset later) and more importantly, ensure it is
 //  not waiting in the jit queue
-export function mono_jiterp_free_method_data_jit_call(method: MonoMethod) {
+export function mono_jiterp_free_method_data_jit_call (method: MonoMethod) {
     // FIXME
     const infoArray = infosByMethod[<any>method];
     if (!infoArray)
@@ -230,7 +230,7 @@ export function mono_jiterp_free_method_data_jit_call(method: MonoMethod) {
     delete infosByMethod[<any>method];
 }
 
-export function mono_interp_jit_wasm_jit_call_trampoline(
+export function mono_interp_jit_wasm_jit_call_trampoline (
     method: MonoMethod, rmethod: VoidPtr, cinfo: VoidPtr,
     arg_offsets: VoidPtr, catch_exceptions: number
 ): void {
@@ -276,7 +276,7 @@ export function mono_interp_jit_wasm_jit_call_trampoline(
         mono_interp_flush_jitcall_queue();
 }
 
-function getIsWasmEhSupported(): boolean {
+function getIsWasmEhSupported (): boolean {
     if (wasmEhSupported !== undefined)
         return wasmEhSupported;
 
@@ -288,7 +288,7 @@ function getIsWasmEhSupported(): boolean {
     return wasmEhSupported;
 }
 
-export function mono_interp_flush_jitcall_queue(): void {
+export function mono_interp_flush_jitcall_queue (): void {
     const jitQueue: TrampolineInfo[] = [];
     let methodPtr = <MonoMethod><any>0;
     while ((methodPtr = <any>cwraps.mono_jiterp_tlqueue_next(JitQueue.JitCall)) != 0) {
@@ -624,19 +624,19 @@ const wasmOpcodeFromCilOpcode = {
     [CilOpcodes.STIND_I]: WasmOpcode.i32_store,
 };
 
-function append_ldloc(builder: WasmBuilder, offsetBytes: number, opcode: WasmOpcode) {
+function append_ldloc (builder: WasmBuilder, offsetBytes: number, opcode: WasmOpcode) {
     builder.local("sp");
     builder.appendU8(opcode);
     builder.appendMemarg(offsetBytes, 0);
 }
 
-function append_ldloca(builder: WasmBuilder, offsetBytes: number) {
+function append_ldloca (builder: WasmBuilder, offsetBytes: number) {
     builder.local("sp");
     builder.i32_const(offsetBytes);
     builder.appendU8(WasmOpcode.i32_add);
 }
 
-function generate_wasm_body(
+function generate_wasm_body (
     builder: WasmBuilder, info: TrampolineInfo
 ): boolean {
     let stack_index = 0;
diff --git a/src/mono/browser/runtime/jiterpreter-opcodes.ts b/src/mono/browser/runtime/jiterpreter-opcodes.ts
index 9c046ebf8319..d535070df2ae 100644
--- a/src/mono/browser/runtime/jiterpreter-opcodes.ts
+++ b/src/mono/browser/runtime/jiterpreter-opcodes.ts
@@ -1,8 +1,6 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-// Keep this file in sync with mintops.def. The order and values need to match exactly.
-
 import cwraps from "./cwraps";
 import { utf8ToString } from "./strings";
 import { OpcodeInfoType } from "./jiterpreter-enums";
@@ -28,27 +26,29 @@ export type SimdInfoTable = {
     [argument_count: number]: SimdInfoSubtable
 }
 
+// Keep in sync with mintops.h
 export const enum MintOpArgType {
-    MintOpNoArgs = 0,
-    MintOpShortInt,
-    MintOpUShortInt,
-    MintOpInt,
-    MintOpLongInt,
-    MintOpFloat,
-    MintOpDouble,
-    MintOpBranch,
-    MintOpShortBranch,
-    MintOpSwitch,
-    MintOpMethodToken,
-    MintOpFieldToken,
-    MintOpClassToken,
-    MintOpTwoShorts,
-    MintOpTwoInts,
-    MintOpShortAndInt,
-    MintOpShortAndShortBranch,
-    MintOpPair2,
-    MintOpPair3,
-    MintOpPair4
+	MintOpNoArgs = 0,
+	MintOpShortInt,
+	MintOpUShortInt,
+	MintOpInt,
+	MintOpLongInt,
+	MintOpFloat,
+	MintOpDouble,
+	MintOpBranch,
+	MintOpShortBranch,
+	MintOpSwitch,
+	MintOpMethodToken,
+	MintOpFieldToken,
+	MintOpClassToken,
+	MintOpVTableToken,
+	MintOpTwoShorts,
+	MintOpTwoInts,
+	MintOpShortAndInt,
+	MintOpShortAndShortBranch,
+	MintOpPair2,
+	MintOpPair3,
+	MintOpPair4
 }
 
 // keep in sync with jiterpreter.c, see mono_jiterp_relop_fp
diff --git a/src/mono/browser/runtime/jiterpreter-support.ts b/src/mono/browser/runtime/jiterpreter-support.ts
index 34c4879526cf..c375b4f91e70 100644
--- a/src/mono/browser/runtime/jiterpreter-support.ts
+++ b/src/mono/browser/runtime/jiterpreter-support.ts
@@ -14,7 +14,7 @@ import { localHeapViewU8, localHeapViewU32 } from "./memory";
 import { utf8ToString } from "./strings";
 import {
     JiterpNumberMode, BailoutReason, JiterpreterTable,
-    JiterpCounter, JiterpMember
+    JiterpCounter, JiterpMember, OpcodeInfoType
 } from "./jiterpreter-enums";
 
 export const maxFailures = 2,
@@ -106,18 +106,19 @@ export class WasmBuilder {
     backBranchOffsets: Array<MintOpcodePtr> = [];
     callHandlerReturnAddresses: Array<MintOpcodePtr> = [];
     nextConstantSlot = 0;
+    backBranchTraceLevel = 0;
 
     compressImportNames = false;
     lockImports = false;
 
-    constructor(constantSlotCount: number) {
+    constructor (constantSlotCount: number) {
         this.stack = [new BlobBuilder()];
         this.clear(constantSlotCount);
         this.cfg = new Cfg(this);
         this.defineType("__cpp_exception", { "ptr": WasmValtype.i32 }, WasmValtype.void, true);
     }
 
-    clear(constantSlotCount: number) {
+    clear (constantSlotCount: number) {
         this.options = getOptions();
         this.stackSize = 1;
         this.inSection = false;
@@ -157,14 +158,14 @@ export class WasmBuilder {
         this.allowNullCheckOptimization = this.options.eliminateNullChecks;
     }
 
-    _push() {
+    _push () {
         this.stackSize++;
         if (this.stackSize >= this.stack.length)
             this.stack.push(new BlobBuilder());
         this.current.clear();
     }
 
-    _pop(writeToOutput: boolean) {
+    _pop (writeToOutput: boolean) {
         if (this.stackSize <= 1)
             throw new Error("Stack empty");
 
@@ -179,21 +180,21 @@ export class WasmBuilder {
             return current.getArrayView(false).slice(0, current.size);
     }
 
-    setImportFunction(name: string, value: Function) {
+    setImportFunction (name: string, value: Function) {
         const imp = this.importedFunctions[name];
         if (!imp)
             throw new Error("No import named " + name);
         imp.func = value;
     }
 
-    getExceptionTag(): any {
+    getExceptionTag (): any {
         const exceptionTag = (<any>Module)["asm"]["__cpp_exception"];
         if (typeof (exceptionTag) !== "undefined")
             mono_assert(exceptionTag instanceof (<any>WebAssembly).Tag, () => `expected __cpp_exception export from dotnet.wasm to be WebAssembly.Tag but was ${exceptionTag}`);
         return exceptionTag;
     }
 
-    getWasmImports(): WebAssembly.Imports {
+    getWasmImports (): WebAssembly.Imports {
         const memory = runtimeHelpers.getMemory();
         mono_assert(memory instanceof WebAssembly.Memory, () => `expected heap import to be WebAssembly.Memory but was ${memory}`);
 
@@ -226,7 +227,7 @@ export class WasmBuilder {
     // HACK: Approximate amount of space we need to generate the full module at present
     // FIXME: This does not take into account any other functions already generated if they weren't
     //  emitted into the module immediately
-    get bytesGeneratedSoFar() {
+    get bytesGeneratedSoFar () {
         const importSize = this.compressImportNames
             // mod (2 bytes) name (2-3 bytes) type (1 byte) typeidx (1-2 bytes)
             ? 8
@@ -243,74 +244,74 @@ export class WasmBuilder {
             this.estimatedExportBytes;
     }
 
-    get current() {
+    get current () {
         return this.stack[this.stackSize - 1];
     }
 
-    get size() {
+    get size () {
         return this.current.size;
     }
 
-    appendU8(value: number | WasmOpcode) {
+    appendU8 (value: number | WasmOpcode) {
         if ((value != value >>> 0) || (value > 255))
             throw new Error(`Byte out of range: ${value}`);
         return this.current.appendU8(value);
     }
 
-    appendSimd(value: WasmSimdOpcode, allowLoad?: boolean) {
+    appendSimd (value: WasmSimdOpcode, allowLoad?: boolean) {
         this.current.appendU8(WasmOpcode.PREFIX_simd);
         // Yes that's right. We're using LEB128 to encode 8-bit opcodes. Why? I don't know
         mono_assert(((value | 0) !== 0) || ((value === WasmSimdOpcode.v128_load) && (allowLoad === true)), "Expected non-v128_load simd opcode or allowLoad==true");
         return this.current.appendULeb(value);
     }
 
-    appendU32(value: number) {
+    appendU32 (value: number) {
         return this.current.appendU32(value);
     }
 
-    appendF32(value: number) {
+    appendF32 (value: number) {
         return this.current.appendF32(value);
     }
 
-    appendF64(value: number) {
+    appendF64 (value: number) {
         return this.current.appendF64(value);
     }
 
-    appendBoundaryValue(bits: number, sign: number) {
+    appendBoundaryValue (bits: number, sign: number) {
         return this.current.appendBoundaryValue(bits, sign);
     }
 
-    appendULeb(value: number | MintOpcodePtr) {
+    appendULeb (value: number | MintOpcodePtr) {
         return this.current.appendULeb(<any>value);
     }
 
-    appendLeb(value: number) {
+    appendLeb (value: number) {
         return this.current.appendLeb(value);
     }
 
-    appendLebRef(sourceAddress: VoidPtr, signed: boolean) {
+    appendLebRef (sourceAddress: VoidPtr, signed: boolean) {
         return this.current.appendLebRef(sourceAddress, signed);
     }
 
-    appendBytes(bytes: Uint8Array) {
+    appendBytes (bytes: Uint8Array) {
         return this.current.appendBytes(bytes);
     }
 
-    appendName(text: string) {
+    appendName (text: string) {
         return this.current.appendName(text);
     }
 
-    ret(ip: MintOpcodePtr) {
+    ret (ip: MintOpcodePtr) {
         this.ip_const(ip);
         this.appendU8(WasmOpcode.return_);
     }
 
-    i32_const(value: number | ManagedPointer | NativePointer) {
+    i32_const (value: number | ManagedPointer | NativePointer) {
         this.appendU8(WasmOpcode.i32_const);
         this.appendLeb(<any>value);
     }
 
-    ptr_const(pointer: number | ManagedPointer | NativePointer) {
+    ptr_const (pointer: number | ManagedPointer | NativePointer) {
         let idx = this.options.useConstants ? this.constantSlots.indexOf(<any>pointer) : -1;
         if (
             this.options.useConstants &&
@@ -329,17 +330,17 @@ export class WasmBuilder {
         }
     }
 
-    ip_const(value: MintOpcodePtr) {
+    ip_const (value: MintOpcodePtr) {
         this.appendU8(WasmOpcode.i32_const);
         this.appendLeb(<any>value - <any>this.base);
     }
 
-    i52_const(value: number) {
+    i52_const (value: number) {
         this.appendU8(WasmOpcode.i64_const);
         this.appendLeb(value);
     }
 
-    v128_const(value: 0 | Uint8Array) {
+    v128_const (value: 0 | Uint8Array) {
         if (value === 0) {
             // This encoding is much smaller than a v128_const
             // But v8 doesn't optimize it :-((((((
@@ -368,7 +369,7 @@ export class WasmBuilder {
         }
     }
 
-    defineType(
+    defineType (
         name: string, parameters: { [name: string]: WasmValtype }, returnType: WasmValtype,
         permanent: boolean
     ) {
@@ -417,7 +418,7 @@ export class WasmBuilder {
         return index;
     }
 
-    generateTypeSection() {
+    generateTypeSection () {
         this.beginSection(1);
         this.appendULeb(this.functionTypeCount);
         /*
@@ -443,7 +444,7 @@ export class WasmBuilder {
         this.endSection();
     }
 
-    getImportedFunctionTable(): any {
+    getImportedFunctionTable (): any {
         const imports: any = {};
         for (const k in this.importedFunctions) {
             const f = this.importedFunctions[k];
@@ -453,7 +454,7 @@ export class WasmBuilder {
         return imports;
     }
 
-    getCompressedName(ifi: ImportedFunctionInfo) {
+    getCompressedName (ifi: ImportedFunctionInfo) {
         if (!this.compressImportNames || typeof (ifi.index) !== "number")
             return ifi.name;
 
@@ -463,7 +464,7 @@ export class WasmBuilder {
         return result;
     }
 
-    getImportsToEmit() {
+    getImportsToEmit () {
         const result = [];
         for (const k in this.importedFunctions) {
             const v = this.importedFunctions[k];
@@ -476,7 +477,7 @@ export class WasmBuilder {
         return result;
     }
 
-    _generateImportSection(includeFunctionTable?: boolean) {
+    _generateImportSection (includeFunctionTable?: boolean) {
         const importsToEmit = this.getImportsToEmit();
         this.lockImports = true;
 
@@ -555,7 +556,7 @@ export class WasmBuilder {
         }
     }
 
-    defineImportedFunction(
+    defineImportedFunction (
         module: string, name: string, functionTypeName: string,
         permanent: boolean, func?: Function | number
     ): ImportedFunctionInfo {
@@ -584,7 +585,7 @@ export class WasmBuilder {
         return result;
     }
 
-    markImportAsUsed(name: string) {
+    markImportAsUsed (name: string) {
         const func = this.importedFunctions[name];
         if (!func)
             throw new Error("No imported function named " + name);
@@ -592,14 +593,14 @@ export class WasmBuilder {
             func.index = this.importedFunctionCount++;
     }
 
-    getTypeIndex(name: string) {
+    getTypeIndex (name: string) {
         const type = this.functionTypes[name];
         if (!type)
             throw new Error("No type named " + name);
         return type[0];
     }
 
-    defineFunction(
+    defineFunction (
         options: {
             type: string,
             name: string,
@@ -624,7 +625,7 @@ export class WasmBuilder {
         return rec;
     }
 
-    emitImportsAndFunctions(includeFunctionTable?: boolean) {
+    emitImportsAndFunctions (includeFunctionTable?: boolean) {
         let exportCount = 0;
         for (let i = 0; i < this.functions.length; i++) {
             const func = this.functions[i];
@@ -683,7 +684,7 @@ export class WasmBuilder {
         this.endSection();
     }
 
-    call_indirect(/* functionTypeName: string, tableIndex: number */) {
+    call_indirect (/* functionTypeName: string, tableIndex: number */) {
         throw new Error("call_indirect unavailable");
         /*
         const type = this.functionTypes[functionTypeName];
@@ -696,7 +697,7 @@ export class WasmBuilder {
         */
     }
 
-    callImport(name: string) {
+    callImport (name: string) {
         const func = this.importedFunctions[name];
         if (!func)
             throw new Error("No imported function named " + name);
@@ -709,7 +710,7 @@ export class WasmBuilder {
         this.appendULeb(func.index);
     }
 
-    beginSection(type: number) {
+    beginSection (type: number) {
         if (this.inSection)
             this._pop(true);
         this.appendU8(type);
@@ -717,7 +718,7 @@ export class WasmBuilder {
         this.inSection = true;
     }
 
-    endSection() {
+    endSection () {
         if (!this.inSection)
             throw new Error("Not in section");
         if (this.inFunction)
@@ -736,7 +737,7 @@ export class WasmBuilder {
         return result;
     };
 
-    _assignLocalIndices(
+    _assignLocalIndices (
         counts: any, locals: { [name: string]: WasmValtype },
         base: number, localGroupCount: number
     ) {
@@ -795,7 +796,7 @@ export class WasmBuilder {
         return localGroupCount;
     }
 
-    beginFunction(
+    beginFunction (
         type: string,
         locals?: { [name: string]: WasmValtype }
     ) {
@@ -839,7 +840,7 @@ export class WasmBuilder {
         this.inFunction = true;
     }
 
-    endFunction(writeToOutput: boolean) {
+    endFunction (writeToOutput: boolean) {
         if (!this.inFunction)
             throw new Error("Not in function");
         if (this.activeBlocks > 0)
@@ -849,7 +850,7 @@ export class WasmBuilder {
         return result;
     }
 
-    block(type?: WasmValtype, opcode?: WasmOpcode) {
+    block (type?: WasmValtype, opcode?: WasmOpcode) {
         const result = this.appendU8(opcode || WasmOpcode.block);
         if (type)
             this.appendU8(type);
@@ -859,14 +860,14 @@ export class WasmBuilder {
         return result;
     }
 
-    endBlock() {
+    endBlock () {
         if (this.activeBlocks <= 0)
             throw new Error("No blocks active");
         this.activeBlocks--;
         this.appendU8(WasmOpcode.end);
     }
 
-    arg(name: string | number, opcode?: WasmOpcode) {
+    arg (name: string | number, opcode?: WasmOpcode) {
         const index = typeof (name) === "string"
             ? (this.locals.has(name) ? this.locals.get(name)! : undefined)
             : name;
@@ -877,7 +878,7 @@ export class WasmBuilder {
         this.appendULeb(index);
     }
 
-    local(name: string | number, opcode?: WasmOpcode) {
+    local (name: string | number, opcode?: WasmOpcode) {
         const index = typeof (name) === "string"
             ? (this.locals.has(name) ? this.locals.get(name)! : undefined)
             : name + this.argumentCount;
@@ -890,7 +891,7 @@ export class WasmBuilder {
         this.appendULeb(index);
     }
 
-    appendMemarg(offset: number, alignPower: number) {
+    appendMemarg (offset: number, alignPower: number) {
         this.appendULeb(alignPower);
         this.appendULeb(offset);
     }
@@ -898,7 +899,7 @@ export class WasmBuilder {
     /*
         generates either (u32)get_local(ptr) + offset or (u32)ptr1 + offset
     */
-    lea(ptr1: string | number, offset: number) {
+    lea (ptr1: string | number, offset: number) {
         if (typeof (ptr1) === "string")
             this.local(ptr1);
         else
@@ -909,13 +910,13 @@ export class WasmBuilder {
         this.appendU8(WasmOpcode.i32_add);
     }
 
-    getArrayView(fullCapacity?: boolean) {
+    getArrayView (fullCapacity?: boolean) {
         if (this.stackSize > 1)
             throw new Error("Jiterpreter block stack not empty");
         return this.stack[0].getArrayView(fullCapacity);
     }
 
-    getConstants() {
+    getConstants () {
         const result: { [key: string]: number } = {};
         for (let i = 0; i < this.constantSlots.length; i++)
             result[i.toString(shortNameBase)] = this.constantSlots[i];
@@ -930,7 +931,7 @@ export class BlobBuilder {
     encoder?: TextEncoder;
     textBuf = new Uint8Array(1024);
 
-    constructor() {
+    constructor () {
         this.capacity = 16 * 1024;
         this.buffer = <any>Module._malloc(this.capacity);
         localHeapViewU8().fill(0, this.buffer, this.buffer + this.capacity);
@@ -940,11 +941,11 @@ export class BlobBuilder {
             this.encoder = new TextEncoder();
     }
 
-    clear() {
+    clear () {
         this.size = 0;
     }
 
-    appendU8(value: number | WasmOpcode) {
+    appendU8 (value: number | WasmOpcode) {
         if (this.size >= this.capacity)
             throw new Error("Buffer full");
 
@@ -953,35 +954,35 @@ export class BlobBuilder {
         return result;
     }
 
-    appendU32(value: number) {
+    appendU32 (value: number) {
         const result = this.size;
         cwraps.mono_jiterp_write_number_unaligned(<any>this.buffer + this.size, value, JiterpNumberMode.U32);
         this.size += 4;
         return result;
     }
 
-    appendI32(value: number) {
+    appendI32 (value: number) {
         const result = this.size;
         cwraps.mono_jiterp_write_number_unaligned(<any>this.buffer + this.size, value, JiterpNumberMode.I32);
         this.size += 4;
         return result;
     }
 
-    appendF32(value: number) {
+    appendF32 (value: number) {
         const result = this.size;
         cwraps.mono_jiterp_write_number_unaligned(<any>this.buffer + this.size, value, JiterpNumberMode.F32);
         this.size += 4;
         return result;
     }
 
-    appendF64(value: number) {
+    appendF64 (value: number) {
         const result = this.size;
         cwraps.mono_jiterp_write_number_unaligned(<any>this.buffer + this.size, value, JiterpNumberMode.F64);
         this.size += 8;
         return result;
     }
 
-    appendBoundaryValue(bits: number, sign: number) {
+    appendBoundaryValue (bits: number, sign: number) {
         if (this.size + 8 >= this.capacity)
             throw new Error("Buffer full");
 
@@ -992,7 +993,7 @@ export class BlobBuilder {
         return bytesWritten;
     }
 
-    appendULeb(value: number) {
+    appendULeb (value: number) {
         mono_assert(typeof (value) === "number", () => `appendULeb expected number but got ${value}`);
         mono_assert(value >= 0, "cannot pass negative value to appendULeb");
         if (value < 0x7F) {
@@ -1013,7 +1014,7 @@ export class BlobBuilder {
         return bytesWritten;
     }
 
-    appendLeb(value: number) {
+    appendLeb (value: number) {
         mono_assert(typeof (value) === "number", () => `appendLeb expected number but got ${value}`);
         if (this.size + 8 >= this.capacity)
             throw new Error("Buffer full");
@@ -1025,7 +1026,7 @@ export class BlobBuilder {
         return bytesWritten;
     }
 
-    appendLebRef(sourceAddress: VoidPtr, signed: boolean) {
+    appendLebRef (sourceAddress: VoidPtr, signed: boolean) {
         if (this.size + 8 >= this.capacity)
             throw new Error("Buffer full");
 
@@ -1036,7 +1037,7 @@ export class BlobBuilder {
         return bytesWritten;
     }
 
-    copyTo(destination: BlobBuilder, count?: number) {
+    copyTo (destination: BlobBuilder, count?: number) {
         if (typeof (count) !== "number")
             count = this.size;
 
@@ -1044,7 +1045,7 @@ export class BlobBuilder {
         destination.size += count;
     }
 
-    appendBytes(bytes: Uint8Array, count?: number) {
+    appendBytes (bytes: Uint8Array, count?: number) {
         const result = this.size;
         const heapU8 = localHeapViewU8();
         if (bytes.buffer === heapU8.buffer) {
@@ -1064,7 +1065,7 @@ export class BlobBuilder {
         return result;
     }
 
-    appendName(text: string) {
+    appendName (text: string) {
         let count = text.length;
         // TextEncoder overhead is significant for short strings, and lots of our strings
         //  are single-character import names, so add a fast path for single characters
@@ -1099,7 +1100,7 @@ export class BlobBuilder {
             this.appendBytes(this.textBuf, count);
     }
 
-    getArrayView(fullCapacity?: boolean) {
+    getArrayView (fullCapacity?: boolean) {
         return new Uint8Array(localHeapViewU8().buffer, this.buffer, fullCapacity ? this.capacity : this.size);
     }
 }
@@ -1141,8 +1142,11 @@ class Cfg {
     backBranchTargets: Uint16Array | null = null;
     base!: MintOpcodePtr;
     ip!: MintOpcodePtr;
+    // The address of the prepare point
     entryIp!: MintOpcodePtr;
     exitIp!: MintOpcodePtr;
+    // The address of the first actual opcode in the trace
+    firstOpcodeIp!: MintOpcodePtr;
     lastSegmentStartIp!: MintOpcodePtr;
     lastSegmentEnd = 0;
     overheadBytes = 0;
@@ -1150,31 +1154,34 @@ class Cfg {
     blockStack: Array<MintOpcodePtr> = [];
     backDispatchOffsets: Array<MintOpcodePtr> = [];
     dispatchTable = new Map<MintOpcodePtr, number>();
-    observedBranchTargets = new Set<MintOpcodePtr>();
+    observedBackBranchTargets = new Set<MintOpcodePtr>();
     trace = 0;
 
-    constructor(builder: WasmBuilder) {
+    constructor (builder: WasmBuilder) {
         this.builder = builder;
     }
 
-    initialize(startOfBody: MintOpcodePtr, backBranchTargets: Uint16Array | null, trace: number) {
+    initialize (startOfBody: MintOpcodePtr, backBranchTargets: Uint16Array | null, trace: number) {
         this.segments.length = 0;
         this.blockStack.length = 0;
         this.startOfBody = startOfBody;
         this.backBranchTargets = backBranchTargets;
         this.base = this.builder.base;
-        this.ip = this.lastSegmentStartIp = this.builder.base;
+        this.ip = this.lastSegmentStartIp = this.firstOpcodeIp = this.builder.base;
         this.lastSegmentEnd = 0;
         this.overheadBytes = 10; // epilogue
         this.dispatchTable.clear();
-        this.observedBranchTargets.clear();
+        this.observedBackBranchTargets.clear();
         this.trace = trace;
         this.backDispatchOffsets.length = 0;
     }
 
     // We have a header containing the table of locals and we need to preserve it
-    entry(ip: MintOpcodePtr) {
+    entry (ip: MintOpcodePtr) {
         this.entryIp = ip;
+        // Skip over the enter opcode
+        const enterSizeU16 = cwraps.mono_jiterp_get_opcode_info(MintOpcode.MINT_TIER_ENTER_JITERPRETER, OpcodeInfoType.Length);
+        this.firstOpcodeIp = ip + <any>(enterSizeU16 * 2);
         this.appendBlob();
         mono_assert(this.segments.length === 1, "expected 1 segment");
         mono_assert(this.segments[0].type === "blob", "expected blob");
@@ -1185,9 +1192,10 @@ class Cfg {
             this.overheadBytes += 20; // some extra padding for the dispatch br_table
             this.overheadBytes += this.backBranchTargets.length; // one byte for each target in the table
         }
+        return this.firstOpcodeIp;
     }
 
-    appendBlob() {
+    appendBlob () {
         if (this.builder.current.size === this.lastSegmentEnd)
             return;
 
@@ -1203,7 +1211,7 @@ class Cfg {
         this.overheadBytes += 2;
     }
 
-    startBranchBlock(ip: MintOpcodePtr, isBackBranchTarget: boolean) {
+    startBranchBlock (ip: MintOpcodePtr, isBackBranchTarget: boolean) {
         this.appendBlob();
         this.segments.push({
             type: "branch-block-header",
@@ -1213,8 +1221,10 @@ class Cfg {
         this.overheadBytes += 1; // each branch block just costs us an end
     }
 
-    branch(target: MintOpcodePtr, isBackward: boolean, branchType: CfgBranchType) {
-        this.observedBranchTargets.add(target);
+    branch (target: MintOpcodePtr, isBackward: boolean, branchType: CfgBranchType) {
+        if (isBackward)
+            this.observedBackBranchTargets.add(target);
+
         this.appendBlob();
         this.segments.push({
             type: "branch",
@@ -1226,31 +1236,31 @@ class Cfg {
         // some branches will generate bailouts instead so we allocate 4 bytes per branch
         //  to try and balance this out and avoid underestimating too much
         this.overheadBytes += 4; // forward branches are a constant br + depth (optimally 2 bytes)
+
         if (isBackward) {
-            // get_local <cinfo>
-            // i32_const 1
-            // i32_store 0 0
             // i32.const <n>
             // set_local <disp>
-            this.overheadBytes += 11;
+            this.overheadBytes += 4;
         }
 
-        // Account for the size of the safepoint
-        if (
-            (branchType === CfgBranchType.SafepointConditional) ||
-            (branchType === CfgBranchType.SafepointUnconditional)
-        ) {
-            this.overheadBytes += 17;
+        if (WasmEnableThreads) {
+            // Account for the size of the safepoint
+            if (
+                (branchType === CfgBranchType.SafepointConditional) ||
+                (branchType === CfgBranchType.SafepointUnconditional)
+            ) {
+                this.overheadBytes += 17;
+            }
         }
     }
 
-    emitBlob(segment: CfgBlob, source: Uint8Array) {
+    emitBlob (segment: CfgBlob, source: Uint8Array) {
         // mono_log_info(`segment @${(<any>segment.ip).toString(16)} ${segment.start}-${segment.start + segment.length}`);
         const view = source.subarray(segment.start, segment.start + segment.length);
         this.builder.appendBytes(view);
     }
 
-    generate(): Uint8Array {
+    generate (): Uint8Array {
         // HACK: Make sure any remaining bytes are inserted into a trailing segment
         this.appendBlob();
 
@@ -1268,8 +1278,9 @@ class Cfg {
         // We wrap the entire trace in a loop that starts with a dispatch br_table in order to support
         //  backwards branches.
         if (this.backBranchTargets) {
-            this.builder.i32_const(0);
-            this.builder.local("disp", WasmOpcode.set_local);
+            // unnecessary, the local is default initialized to zero
+            // this.builder.i32_const(0);
+            // this.builder.local("disp", WasmOpcode.set_local);
             this.builder.block(WasmValtype.void, WasmOpcode.loop);
         }
 
@@ -1300,7 +1311,7 @@ class Cfg {
                 const breakDepth = this.blockStack.indexOf(offset);
                 if (breakDepth < 0)
                     continue;
-                if (!this.observedBranchTargets.has(offset))
+                if (!this.observedBackBranchTargets.has(offset))
                     continue;
 
                 this.dispatchTable.set(offset, this.backDispatchOffsets.length + 1);
@@ -1318,17 +1329,22 @@ class Cfg {
                         mono_log_info(`Exactly one back dispatch offset and it was 0x${(<any>this.backDispatchOffsets[0]).toString(16)}`);
                 }
 
-                // if (disp) goto back_branch_target else fallthrough
+                // if (disp)
+                //    goto back_branch_target;
                 this.builder.local("disp");
                 this.builder.appendU8(WasmOpcode.br_if);
                 this.builder.appendULeb(this.blockStack.indexOf(this.backDispatchOffsets[0]));
             } else {
+                if (this.trace > 0)
+                    mono_log_info(`${this.backDispatchOffsets.length} back branch offsets after filtering.`);
+
                 // the loop needs to start with a br_table that performs dispatch based on the current value
                 //  of the dispatch index local
                 // br_table has to be surrounded by a block in order for a depth of 0 to be fallthrough
                 // We wrap it in an additional block so we can have a trap for unexpected disp values
                 this.builder.block(WasmValtype.void);
                 this.builder.block(WasmValtype.void);
+                // switch (disp) {
                 this.builder.local("disp");
                 this.builder.appendU8(WasmOpcode.br_table);
 
@@ -1378,23 +1394,16 @@ class Cfg {
                 case "branch": {
                     const lookupTarget = segment.isBackward ? dispatchIp : segment.target;
                     let indexInStack = this.blockStack.indexOf(lookupTarget),
-                        successfulBackBranch = false;
+                        successfulBackBranch = false,
+                        disp : number | undefined = undefined;
 
                     // Back branches will target the dispatcher loop so we need to update the dispatch index
                     //  which will be used by the loop dispatch br_table to jump to the correct location
                     if (segment.isBackward) {
                         if (this.dispatchTable.has(segment.target)) {
-                            const disp = this.dispatchTable.get(segment.target)!;
+                            disp = this.dispatchTable.get(segment.target)!;
                             if (this.trace > 1)
                                 mono_log_info(`backward br from ${(<any>segment.from).toString(16)} to ${(<any>segment.target).toString(16)}: disp=${disp}`);
-
-                            // Set the back branch taken flag local so it will get flushed on monitoring exit
-                            this.builder.i32_const(1);
-                            this.builder.local("backbranched", WasmOpcode.set_local);
-
-                            // set the dispatch index for the br_table
-                            this.builder.i32_const(disp);
-                            this.builder.local("disp", WasmOpcode.set_local);
                             successfulBackBranch = true;
                         } else {
                             if (this.trace > 0)
@@ -1408,20 +1417,40 @@ class Cfg {
                         switch (segment.branchType) {
                             case CfgBranchType.SafepointUnconditional:
                                 append_safepoint(this.builder, segment.from);
+                                if (disp !== undefined) {
+                                    this.builder.i32_const(disp);
+                                    this.builder.local("disp", WasmOpcode.set_local);
+                                }
                                 this.builder.appendU8(WasmOpcode.br);
                                 break;
                             case CfgBranchType.SafepointConditional:
                                 // Wrap the safepoint + branch in an if
                                 this.builder.block(WasmValtype.void, WasmOpcode.if_);
                                 append_safepoint(this.builder, segment.from);
+                                if (disp !== undefined) {
+                                    this.builder.i32_const(disp);
+                                    this.builder.local("disp", WasmOpcode.set_local);
+                                }
                                 this.builder.appendU8(WasmOpcode.br);
                                 offset = 1;
                                 break;
                             case CfgBranchType.Unconditional:
+                                if (disp !== undefined) {
+                                    this.builder.i32_const(disp);
+                                    this.builder.local("disp", WasmOpcode.set_local);
+                                }
                                 this.builder.appendU8(WasmOpcode.br);
                                 break;
                             case CfgBranchType.Conditional:
-                                this.builder.appendU8(WasmOpcode.br_if);
+                                if (disp !== undefined) {
+                                    this.builder.block(WasmValtype.void, WasmOpcode.if_);
+                                    this.builder.i32_const(disp);
+                                    this.builder.local("disp", WasmOpcode.set_local);
+                                    offset = 1;
+                                    this.builder.appendU8(WasmOpcode.br);
+                                } else {
+                                    this.builder.appendU8(WasmOpcode.br_if);
+                                }
                                 break;
                             default:
                                 throw new Error("Unimplemented branch type");
@@ -1490,7 +1519,10 @@ export const _now = (globalThis.performance && globalThis.performance.now)
 
 let scratchBuffer: NativePointer = <any>0;
 
-export function append_safepoint(builder: WasmBuilder, ip: MintOpcodePtr) {
+export function append_safepoint (builder: WasmBuilder, ip: MintOpcodePtr) {
+    // safepoints are never triggered in a single-threaded build
+    if (!WasmEnableThreads)
+        return;
     // Check whether a safepoint is required
     builder.ptr_const(cwraps.mono_jiterp_get_polling_required_address());
     builder.appendU8(WasmOpcode.i32_load);
@@ -1504,7 +1536,7 @@ export function append_safepoint(builder: WasmBuilder, ip: MintOpcodePtr) {
     builder.endBlock();
 }
 
-export function append_bailout(builder: WasmBuilder, ip: MintOpcodePtr, reason: BailoutReason) {
+export function append_bailout (builder: WasmBuilder, ip: MintOpcodePtr, reason: BailoutReason) {
     builder.ip_const(ip);
     if (builder.options.countBailouts) {
         builder.i32_const(builder.traceIndex);
@@ -1515,21 +1547,33 @@ export function append_bailout(builder: WasmBuilder, ip: MintOpcodePtr, reason:
 }
 
 // generate a bailout that is recorded for the monitoring phase as a possible early exit.
-export function append_exit(builder: WasmBuilder, ip: MintOpcodePtr, opcodeCounter: number, reason: BailoutReason) {
+export function append_exit (builder: WasmBuilder, ip: MintOpcodePtr, opcodeCounter: number, reason: BailoutReason) {
+    /*
+     * disp will always be nonzero once we've taken at least one backward branch.
+     * if (cinfo) {
+     *   cinfo->backward_branch_taken = disp;
+     *   if (opcodeCounter <= threshold)
+     *     cinfo->opcode_count = opcodeCounter;
+     * }
+     */
+
+    builder.local("cinfo");
+    builder.block(WasmValtype.void, WasmOpcode.if_);
+
+    builder.local("cinfo");
+    builder.local("disp");
+    builder.appendU8(WasmOpcode.i32_store);
+    builder.appendMemarg(getMemberOffset(JiterpMember.BackwardBranchTaken), 0);
+
     if (opcodeCounter <= (builder.options.monitoringLongDistance + 2)) {
         builder.local("cinfo");
         builder.i32_const(opcodeCounter);
         builder.appendU8(WasmOpcode.i32_store);
-        builder.appendMemarg(4, 0); // bailout_opcode_count
-        // flush the backward branch taken flag into the cinfo so that the monitoring phase
-        //  knows we took a backward branch. this is unfortunate but unavoidable overhead
-        // we just make it a flag instead of an increment to reduce the cost
-        builder.local("cinfo");
-        builder.local("backbranched");
-        builder.appendU8(WasmOpcode.i32_store);
-        builder.appendMemarg(0, 0); // JiterpreterCallInfo.backward_branch_taken
+        builder.appendMemarg(getMemberOffset(JiterpMember.BailoutOpcodeCount), 0);
     }
 
+    builder.endBlock();
+
     builder.ip_const(ip);
     if (builder.options.countBailouts) {
         builder.i32_const(builder.traceIndex);
@@ -1539,7 +1583,7 @@ export function append_exit(builder: WasmBuilder, ip: MintOpcodePtr, opcodeCount
     builder.appendU8(WasmOpcode.return_);
 }
 
-export function copyIntoScratchBuffer(src: NativePointer, size: number): NativePointer {
+export function copyIntoScratchBuffer (src: NativePointer, size: number): NativePointer {
     if (!scratchBuffer)
         scratchBuffer = Module._malloc(64);
     if (size > 64)
@@ -1549,7 +1593,7 @@ export function copyIntoScratchBuffer(src: NativePointer, size: number): NativeP
     return scratchBuffer;
 }
 
-export function getWasmFunctionTable() {
+export function getWasmFunctionTable () {
     if (!wasmTable)
         wasmTable = runtimeHelpers.getWasmIndirectFunctionTable();
     if (!wasmTable)
@@ -1557,7 +1601,7 @@ export function getWasmFunctionTable() {
     return wasmTable;
 }
 
-export function addWasmFunctionPointer(table: JiterpreterTable, f: Function) {
+export function addWasmFunctionPointer (table: JiterpreterTable, f: Function) {
     mono_assert(f, "Attempting to set null function into table");
 
     const index = cwraps.mono_jiterp_allocate_table_entry(table);
@@ -1571,7 +1615,7 @@ export function addWasmFunctionPointer(table: JiterpreterTable, f: Function) {
     return index;
 }
 
-export function try_append_memset_fast(builder: WasmBuilder, localOffset: number, value: number, count: number, destOnStack: boolean) {
+export function try_append_memset_fast (builder: WasmBuilder, localOffset: number, value: number, count: number, destOnStack: boolean) {
     if (count <= 0) {
         if (destOnStack)
             builder.appendU8(WasmOpcode.drop);
@@ -1642,7 +1686,7 @@ export function try_append_memset_fast(builder: WasmBuilder, localOffset: number
     return true;
 }
 
-export function append_memset_dest(builder: WasmBuilder, value: number, count: number) {
+export function append_memset_dest (builder: WasmBuilder, value: number, count: number) {
     // spec: pop n, pop val, pop d, fill from d[0] to d[n] with value val
     if (try_append_memset_fast(builder, 0, value, count, true))
         return;
@@ -1654,7 +1698,7 @@ export function append_memset_dest(builder: WasmBuilder, value: number, count: n
     builder.appendU8(0);
 }
 
-export function try_append_memmove_fast(
+export function try_append_memmove_fast (
     builder: WasmBuilder, destLocalOffset: number, srcLocalOffset: number,
     count: number, addressesOnStack: boolean, destLocal?: string, srcLocal?: string
 ) {
@@ -1754,7 +1798,7 @@ export function try_append_memmove_fast(
 }
 
 // expects dest then source to have been pushed onto wasm stack
-export function append_memmove_dest_src(builder: WasmBuilder, count: number) {
+export function append_memmove_dest_src (builder: WasmBuilder, count: number) {
     if (try_append_memmove_fast(builder, 0, 0, count, true))
         return true;
 
@@ -1768,7 +1812,7 @@ export function append_memmove_dest_src(builder: WasmBuilder, count: number) {
     return true;
 }
 
-export function recordFailure(): void {
+export function recordFailure (): void {
     const result = modifyCounter(JiterpCounter.Failures, 1);
     if (result >= maxFailures) {
         mono_log_info(`Disabling jiterpreter after ${result} failures`);
@@ -1782,7 +1826,7 @@ export function recordFailure(): void {
 
 const memberOffsets: { [index: number]: number } = {};
 
-export function getMemberOffset(member: JiterpMember) {
+export function getMemberOffset (member: JiterpMember) {
     const cached = memberOffsets[member];
     if (cached === undefined)
         return memberOffsets[member] = cwraps.mono_jiterp_get_member_offset(<any>member);
@@ -1790,7 +1834,7 @@ export function getMemberOffset(member: JiterpMember) {
         return cached;
 }
 
-export function getRawCwrap(name: string): Function {
+export function getRawCwrap (name: string): Function {
     const result = (<any>Module)["asm"][name];
     if (typeof (result) !== "function")
         throw new Error(`raw cwrap ${name} not found`);
@@ -1799,18 +1843,18 @@ export function getRawCwrap(name: string): Function {
 
 const opcodeTableCache: { [opcode: number]: number } = {};
 
-export function getOpcodeTableValue(opcode: MintOpcode) {
+export function getOpcodeTableValue (opcode: MintOpcode) {
     let result = opcodeTableCache[opcode];
     if (typeof (result) !== "number")
         result = opcodeTableCache[opcode] = cwraps.mono_jiterp_get_opcode_value_table_entry(<any>opcode);
     return result;
 }
 
-export function importDef(name: string, fn: Function): [string, string, Function] {
+export function importDef (name: string, fn: Function): [string, string, Function] {
     return [name, name, fn];
 }
 
-export function bytesFromHex(hex: string): Uint8Array {
+export function bytesFromHex (hex: string): Uint8Array {
     const bytes = new Uint8Array(hex.length / 2);
     for (let i = 0; i < hex.length; i += 2)
         bytes[i / 2] = parseInt(hex.substring(i, i + 2), 16);
@@ -1819,7 +1863,7 @@ export function bytesFromHex(hex: string): Uint8Array {
 
 let observedTaintedZeroPage: boolean | undefined;
 
-export function isZeroPageReserved(): boolean {
+export function isZeroPageReserved (): boolean {
     // FIXME: This check will always return true on worker threads.
     // Right now the jiterpreter is disabled when threading is active, so that's not an issue.
     if (WasmEnableThreads)
@@ -1860,6 +1904,7 @@ export type JiterpreterOptions = {
     enableWasmEh: boolean;
     enableSimd: boolean;
     zeroPageOptimization: boolean;
+    cprop: boolean;
     // For locations where the jiterpreter heuristic says we will be unable to generate
     //  a trace, insert an entry point opcode anyway. This enables collecting accurate
     //  stats for options like estimateHeat, but raises overhead.
@@ -1905,6 +1950,7 @@ const optionNames: { [jsName: string]: string } = {
     "enableWasmEh": "jiterpreter-wasm-eh-enabled",
     "enableSimd": "jiterpreter-simd-enabled",
     "zeroPageOptimization": "jiterpreter-zero-page-optimization",
+    "cprop": "jiterpreter-constant-propagation",
     "enableStats": "jiterpreter-stats-enabled",
     "disableHeuristic": "jiterpreter-disable-heuristic",
     "estimateHeat": "jiterpreter-estimate-heat",
@@ -1934,7 +1980,7 @@ let optionsVersion = -1;
 let optionTable: JiterpreterOptions = <any>{};
 
 // applies one or more jiterpreter options to change the current jiterpreter configuration.
-export function applyOptions(options: JiterpreterOptions) {
+export function applyOptions (options: JiterpreterOptions) {
     for (const k in options) {
         const info = optionNames[k];
         if (!info) {
@@ -1952,16 +1998,16 @@ export function applyOptions(options: JiterpreterOptions) {
     }
 }
 
-export function getCounter(counter: JiterpCounter): number {
+export function getCounter (counter: JiterpCounter): number {
     return cwraps.mono_jiterp_get_counter(counter);
 }
 
-export function modifyCounter(counter: JiterpCounter, delta: number): number {
+export function modifyCounter (counter: JiterpCounter, delta: number): number {
     return cwraps.mono_jiterp_modify_counter(counter, delta);
 }
 
 // returns the current jiterpreter configuration. do not mutate the return value!
-export function getOptions() {
+export function getOptions () {
     const currentVersion = cwraps.mono_jiterp_get_options_version();
     if (currentVersion !== optionsVersion) {
         updateOptions();
@@ -1970,7 +2016,7 @@ export function getOptions() {
     return optionTable;
 }
 
-function updateOptions() {
+function updateOptions () {
     const pJson = cwraps.mono_jiterp_get_options_as_json();
     const json = utf8ToString(<any>pJson);
     Module._free(<any>pJson);
@@ -1983,7 +2029,7 @@ function updateOptions() {
     }
 }
 
-function jiterpreter_allocate_table(type: JiterpreterTable, base: number, size: number, fillValue: Function) {
+function jiterpreter_allocate_table (type: JiterpreterTable, base: number, size: number, fillValue: Function) {
     const wasmTable = getWasmFunctionTable();
     const firstIndex = base, lastIndex = firstIndex + size - 1;
     mono_assert(lastIndex < wasmTable.length, () => `Last index out of range: ${lastIndex} >= ${wasmTable.length}`);
@@ -2007,10 +2053,10 @@ function jiterpreter_allocate_table(type: JiterpreterTable, base: number, size:
 // we need to ensure we only ever initialize tables once on each js worker.
 let jiterpreter_tables_allocated = false;
 
-
-export function jiterpreter_allocate_tables() {
+export function jiterpreter_allocate_tables () {
     if (NativeAOT)
         return;
+
     if (jiterpreter_tables_allocated)
         return;
     jiterpreter_tables_allocated = true;
diff --git a/src/mono/browser/runtime/jiterpreter-tables.ts b/src/mono/browser/runtime/jiterpreter-tables.ts
index 71a3eb86c9f0..a345844e9d90 100644
--- a/src/mono/browser/runtime/jiterpreter-tables.ts
+++ b/src/mono/browser/runtime/jiterpreter-tables.ts
@@ -1,3 +1,6 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
 import {
     WasmOpcode, WasmSimdOpcode, JiterpSpecialOpcode
 } from "./jiterpreter-opcodes";
@@ -6,16 +9,8 @@ import {
 } from "./mintops";
 
 export const ldcTable: { [opcode: number]: [WasmOpcode, number] } = {
-    [MintOpcode.MINT_LDC_I4_M1]: [WasmOpcode.i32_const, -1],
     [MintOpcode.MINT_LDC_I4_0]:  [WasmOpcode.i32_const,  0],
     [MintOpcode.MINT_LDC_I4_1]:  [WasmOpcode.i32_const,  1],
-    [MintOpcode.MINT_LDC_I4_2]:  [WasmOpcode.i32_const,  2],
-    [MintOpcode.MINT_LDC_I4_3]:  [WasmOpcode.i32_const,  3],
-    [MintOpcode.MINT_LDC_I4_4]:  [WasmOpcode.i32_const,  4],
-    [MintOpcode.MINT_LDC_I4_5]:  [WasmOpcode.i32_const,  5],
-    [MintOpcode.MINT_LDC_I4_6]:  [WasmOpcode.i32_const,  6],
-    [MintOpcode.MINT_LDC_I4_7]:  [WasmOpcode.i32_const,  7],
-    [MintOpcode.MINT_LDC_I4_8]:  [WasmOpcode.i32_const,  8],
 };
 
 // operator, loadOperator, storeOperator
@@ -90,6 +85,16 @@ export const unopTable: { [opcode: number]: OpRec3 | undefined } = {
     [MintOpcode.MINT_CLZ_I8]:        [WasmOpcode.i64_clz,    WasmOpcode.i64_load, WasmOpcode.i64_store],
     [MintOpcode.MINT_CTZ_I8]:        [WasmOpcode.i64_ctz,    WasmOpcode.i64_load, WasmOpcode.i64_store],
     [MintOpcode.MINT_POPCNT_I8]:     [WasmOpcode.i64_popcnt, WasmOpcode.i64_load, WasmOpcode.i64_store],
+
+    [MintOpcode.MINT_ADD_I4_IMM2]:   [WasmOpcode.i32_add, WasmOpcode.i32_load, WasmOpcode.i32_store],
+    [MintOpcode.MINT_MUL_I4_IMM2]:   [WasmOpcode.i32_mul, WasmOpcode.i32_load, WasmOpcode.i32_store],
+    [MintOpcode.MINT_ADD_I8_IMM2]:   [WasmOpcode.i64_add, WasmOpcode.i64_load, WasmOpcode.i64_store],
+    [MintOpcode.MINT_MUL_I8_IMM2]:   [WasmOpcode.i64_mul, WasmOpcode.i64_load, WasmOpcode.i64_store],
+
+    [MintOpcode.MINT_AND_I4_IMM]:    [WasmOpcode.i32_and, WasmOpcode.i32_load, WasmOpcode.i32_store],
+    [MintOpcode.MINT_AND_I4_IMM2]:   [WasmOpcode.i32_and, WasmOpcode.i32_load, WasmOpcode.i32_store],
+    [MintOpcode.MINT_OR_I4_IMM]:     [WasmOpcode.i32_or,  WasmOpcode.i32_load, WasmOpcode.i32_store],
+    [MintOpcode.MINT_OR_I4_IMM2]:    [WasmOpcode.i32_or,  WasmOpcode.i32_load, WasmOpcode.i32_store],
 };
 
 // HACK: Generating correct wasm for these is non-trivial so we hand them off to C.
diff --git a/src/mono/browser/runtime/jiterpreter-trace-generator.ts b/src/mono/browser/runtime/jiterpreter-trace-generator.ts
index 616792ffec25..3e9fba479e1b 100644
--- a/src/mono/browser/runtime/jiterpreter-trace-generator.ts
+++ b/src/mono/browser/runtime/jiterpreter-trace-generator.ts
@@ -9,7 +9,7 @@ import {
 } from "./memory";
 import {
     WasmOpcode, WasmSimdOpcode, WasmValtype,
-    getOpcodeName,
+    getOpcodeName, MintOpArgType
 } from "./jiterpreter-opcodes";
 import {
     MintOpcode, SimdInfo,
@@ -33,11 +33,11 @@ import {
 
     disabledOpcodes, countCallTargets,
     callTargetCounts,
-    trace, traceOnError, traceOnRuntimeError,
+    trace, traceOnError,
     emitPadding, traceBranchDisplacements,
     traceEip, nullCheckValidation,
     traceNullCheckOptimizations,
-    nullCheckCaching, traceBackBranches,
+    nullCheckCaching, defaultTraceBackBranches,
     maxCallHandlerReturnAddresses,
 
     mostRecentOptions,
@@ -59,56 +59,57 @@ import { mono_log_error, mono_log_info } from "./logging";
 import { mono_assert, runtimeHelpers } from "./globals";
 
 // indexPlusOne so that ip[1] in the interpreter becomes getArgU16(ip, 1)
-function getArgU16(ip: MintOpcodePtr, indexPlusOne: number) {
+function getArgU16 (ip: MintOpcodePtr, indexPlusOne: number) {
     return getU16(<any>ip + (2 * indexPlusOne));
 }
 
-function getArgI16(ip: MintOpcodePtr, indexPlusOne: number) {
+function getArgI16 (ip: MintOpcodePtr, indexPlusOne: number) {
     return getI16(<any>ip + (2 * indexPlusOne));
 }
 
-function getArgI32(ip: MintOpcodePtr, indexPlusOne: number) {
+function getArgI32 (ip: MintOpcodePtr, indexPlusOne: number) {
     const src = <any>ip + (2 * indexPlusOne);
     return getI32_unaligned(src);
 }
 
-function getArgF32(ip: MintOpcodePtr, indexPlusOne: number) {
+function getArgF32 (ip: MintOpcodePtr, indexPlusOne: number) {
     const src = <any>ip + (2 * indexPlusOne);
     return getF32_unaligned(src);
 }
 
-function getArgF64(ip: MintOpcodePtr, indexPlusOne: number) {
+function getArgF64 (ip: MintOpcodePtr, indexPlusOne: number) {
     const src = <any>ip + (2 * indexPlusOne);
     return getF64_unaligned(src);
 }
 
-function get_imethod(frame: NativePointer) {
+function get_imethod (frame: NativePointer) {
     // FIXME: Encoding this data directly into the trace will prevent trace reuse
     const iMethod = getU32_unaligned(<any>frame + getMemberOffset(JiterpMember.Imethod));
     return iMethod;
 }
 
-function get_imethod_data(frame: NativePointer, index: number) {
+function get_imethod_data (frame: NativePointer, index: number) {
     // FIXME: Encoding this data directly into the trace will prevent trace reuse
     const pData = getU32_unaligned(get_imethod(frame) + getMemberOffset(JiterpMember.DataItems));
     const dataOffset = pData + (index * sizeOfDataItem);
     return getU32_unaligned(dataOffset);
 }
 
-function get_imethod_clause_data_offset(frame: NativePointer, index: number) {
+function get_imethod_clause_data_offset (frame: NativePointer, index: number) {
     // FIXME: Encoding this data directly into the trace will prevent trace reuse
     const pData = getU32_unaligned(get_imethod(frame) + getMemberOffset(JiterpMember.ClauseDataOffsets));
     const dataOffset = pData + (index * sizeOfDataItem);
     return getU32_unaligned(dataOffset);
 }
 
-function is_backward_branch_target(
+function is_backward_branch_target (
     ip: MintOpcodePtr, startOfBody: MintOpcodePtr,
     backwardBranchTable: Uint16Array | null
 ) {
     if (!backwardBranchTable)
         return false;
 
+    // TODO: sort the table and exploit that for faster scan. Not important yet
     for (let i = 0; i < backwardBranchTable.length; i++) {
         const actualOffset = (backwardBranchTable[i] * 2) + <any>startOfBody;
         if (actualOffset === ip)
@@ -118,17 +119,124 @@ function is_backward_branch_target(
     return false;
 }
 
+interface KnownConstantI32 {
+    type: "i32";
+    value: number;
+}
+
+interface KnownConstantV128 {
+    type: "v128";
+    value: Uint8Array;
+}
+
+interface KnownConstantLdloca {
+    type: "ldloca";
+    offset: number;
+}
+
+type KnownConstant = KnownConstantI32 | KnownConstantV128 | KnownConstantLdloca;
 type KnownConstantValue = number | Uint8Array;
-const knownConstantValues = new Map<number, KnownConstantValue>();
+const knownConstants = new Map<number, KnownConstant>();
 
-function get_known_constant_value(builder: WasmBuilder, localOffset: number): KnownConstantValue | undefined {
+function get_known_constant (builder: WasmBuilder, localOffset: number): KnownConstant | undefined {
     if (isAddressTaken(builder, localOffset))
         return undefined;
 
-    return knownConstantValues.get(localOffset);
+    return knownConstants.get(localOffset);
+}
+
+function get_known_constant_value (builder: WasmBuilder, localOffset: number): KnownConstantValue | undefined {
+    const kc = get_known_constant(builder, localOffset);
+    if (kc === undefined)
+        return undefined;
+
+    switch (kc.type) {
+        case "i32":
+        case "v128":
+            return kc.value;
+    }
+
+    return undefined;
+}
+
+// Perform a quick scan through the opcodes potentially in this trace to build a table of
+//  backwards branch targets, compatible with the layout of the old one that was generated in C.
+// We do this here to match the exact way that the jiterp calculates branch targets, since
+//  there were previously corner cases where jiterp and interp disagreed.
+export function generateBackwardBranchTable (
+    ip: MintOpcodePtr, startOfBody: MintOpcodePtr, sizeOfBody: MintOpcodePtr,
+): Uint16Array | null {
+    const endOfBody = <any>startOfBody + <any>sizeOfBody;
+    // TODO: Cache this table object instance and reuse it to reduce gc pressure?
+    const table: number[] = [];
+    // IP of the start of the trace in U16s, relative to startOfBody.
+    const rbase16 = (<any>ip - <any>startOfBody) / 2;
+
+    // FIXME: This will potentially scan the entire method and record branches that won't
+    //  ever run since the trace compilation will end before we reach them.
+    while (ip < endOfBody) {
+        // IP of the current opcode in U16s, relative to startOfBody. This is what the back branch table uses
+        const rip16 = (<any>ip - <any>startOfBody) / 2;
+        const opcode = <MintOpcode>getU16(ip);
+        // HACK
+        if (opcode === MintOpcode.MINT_SWITCH)
+            break;
+
+        const opLengthU16 = cwraps.mono_jiterp_get_opcode_info(opcode, OpcodeInfoType.Length);
+        // Any opcode with a branch argtype will have a decoded displacement, even if we don't
+        //  implement the opcode. Everything else will return undefined here and be skipped
+        const displacement = getBranchDisplacement(ip, opcode);
+        if (typeof (displacement) !== "number") {
+            ip += <any>(opLengthU16 * 2);
+            continue;
+        }
+
+        // These checks shouldn't fail unless memory is corrupted or something is wrong with the decoder.
+        // We don't want to cause decoder bugs to make the application exit, though - graceful degradation.
+        if (displacement === 0) {
+            mono_log_info(`opcode @${ip} branch target is self. aborting backbranch table generation`);
+            break;
+        }
+
+        // Only record *backward* branches
+        // We will filter this down further in the Cfg because it takes note of which branches it sees,
+        //  but it is also beneficial to have a null table (further down) due to seeing no potential
+        //  back branch targets at all, as it allows the Cfg to skip additional code generation entirely
+        //  if it knows there will never be any backwards branches in a given trace
+        if (displacement < 0) {
+            const rtarget16 = rip16 + (displacement);
+            if (rtarget16 < 0) {
+                mono_log_info(`opcode @${ip}'s displacement of ${displacement} goes before body: ${rtarget16}. aborting backbranch table generation`);
+                break;
+            }
+
+            // If the relative target is before the start of the trace, don't record it.
+            // The trace will be unable to successfully branch to it so it would just make the table bigger.
+            if (rtarget16 >= rbase16)
+                table.push(rtarget16);
+        }
+
+        switch (opcode) {
+            case MintOpcode.MINT_CALL_HANDLER:
+            case MintOpcode.MINT_CALL_HANDLER_S:
+                // While this formally isn't a backward branch target, we want to record
+                //  the offset of its following instruction so that the jiterpreter knows
+                //  to generate the necessary dispatch code to enable branching back to it.
+                table.push(rip16 + opLengthU16);
+                break;
+        }
+
+        ip += <any>(opLengthU16 * 2);
+    }
+
+    if (table.length <= 0)
+        return null;
+    // Not important yet, so not doing it
+    // table.sort((a, b) => a - b);
+    return new Uint16Array(table);
 }
 
-export function generateWasmBody(
+export function generateWasmBody (
     frame: NativePointer, traceName: string, ip: MintOpcodePtr,
     startOfBody: MintOpcodePtr, endOfBody: MintOpcodePtr,
     builder: WasmBuilder, instrumentedTraceId: number,
@@ -141,14 +249,17 @@ export function generateWasmBody(
     let result = 0,
         prologueOpcodeCounter = 0,
         conditionalOpcodeCounter = 0;
+
     eraseInferredState();
 
-    // Skip over the enter opcode
-    const enterSizeU16 = cwraps.mono_jiterp_get_opcode_info(MintOpcode.MINT_TIER_ENTER_JITERPRETER, OpcodeInfoType.Length);
-    ip += <any>(enterSizeU16 * 2);
-    let rip = ip;
+    // If a trace is instrumented, also activate back branch tracing
+    builder.backBranchTraceLevel = instrumentedTraceId
+        ? 2
+        : defaultTraceBackBranches;
 
-    builder.cfg.entry(ip);
+    // Record the address of our prepare_jiterpreter opcode as the entry point, not the opcode after it.
+    // Some back-branches will target prepare_jiterpreter directly, and we need them to work.
+    let rip = builder.cfg.entry(ip);
 
     while (ip) {
         // This means some code went 'ip = abort; continue'
@@ -223,7 +334,7 @@ export function generateWasmBody(
         // We record the offset of each backward branch we encounter, so that later branch
         //  opcodes know that it's available by branching to the top of the dispatch loop
         if (isBackBranchTarget) {
-            if (traceBackBranches > 1)
+            if (builder.backBranchTraceLevel > 1)
                 mono_log_info(`${traceName} recording back branch target 0x${(<any>ip).toString(16)}`);
             builder.backBranchOffsets.push(ip);
         }
@@ -300,7 +411,20 @@ export function generateWasmBody(
                 builder.callImport("localloc");
                 break;
             }
-            case MintOpcode.MINT_INITOBJ: {
+            case MintOpcode.MINT_ZEROBLK: {
+                // dest
+                append_ldloc(builder, getArgU16(ip, 1), WasmOpcode.i32_load);
+                // value
+                builder.i32_const(0);
+                // count
+                append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load);
+                // memset
+                builder.appendU8(WasmOpcode.PREFIX_sat);
+                builder.appendU8(11);
+                builder.appendU8(0);
+                break;
+            }
+            case MintOpcode.MINT_ZEROBLK_IMM: {
                 append_ldloc(builder, getArgU16(ip, 1), WasmOpcode.i32_load);
                 append_memset_dest(builder, 0, getArgU16(ip, 2));
                 break;
@@ -459,11 +583,20 @@ export function generateWasmBody(
                 builder.local("pLocals");
                 // locals[ip[1]] = &locals[ip[2]]
                 const offset = getArgU16(ip, 2),
-                    flag = isAddressTaken(builder, offset);
+                    flag = isAddressTaken(builder, offset),
+                    destOffset = getArgU16(ip, 1);
                 if (!flag)
                     mono_log_error(`${traceName}: Expected local ${offset} to have address taken flag`);
                 append_ldloca(builder, offset);
-                append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
+                append_stloc_tail(builder, destOffset, WasmOpcode.i32_store);
+                // Record this ldloca as a known constant so that later uses of it turn into a lea,
+                //  and the wasm runtime can constant fold them with other constants. It's not uncommon
+                //  to have code that does '&x + c', which (if this optimization works) should
+                //  turn into '&locals + offsetof(x) + c' and get constant folded to have the same cost
+                //  as a regular ldloc
+                knownConstants.set(destOffset, { type: "ldloca", offset: offset });
+                // dreg invalidation would blow the known constant away, so disable it
+                skipDregInvalidation = true;
                 break;
             }
 
@@ -1098,18 +1231,6 @@ export function generateWasmBody(
                 break;
             }
 
-            case MintOpcode.MINT_NEWOBJ_VT_INLINED: {
-                const ret_size = getArgU16(ip, 3);
-                // memset (this_vt, 0, ret_size);
-                append_ldloca(builder, getArgU16(ip, 2), ret_size);
-                append_memset_dest(builder, 0, ret_size);
-                // LOCAL_VAR (ip [1], gpointer) = this_vt;
-                builder.local("pLocals");
-                append_ldloca(builder, getArgU16(ip, 2), ret_size);
-                append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
-                break;
-            }
-
             case MintOpcode.MINT_NEWOBJ:
             case MintOpcode.MINT_NEWOBJ_VT:
             case MintOpcode.MINT_CALLVIRT_FAST:
@@ -1446,7 +1567,7 @@ export function generateWasmBody(
                     } else
                         ip = abort;
                 } else if (
-                    (opcode >= MintOpcode.MINT_LDC_I4_M1) &&
+                    (opcode >= MintOpcode.MINT_LDC_I4_0) &&
                     (opcode <= MintOpcode.MINT_LDC_R8)
                 ) {
                     if (!emit_ldc(builder, ip, opcode))
@@ -1558,7 +1679,7 @@ export function generateWasmBody(
                 }
             }
 
-            if ((trace > 1) || traceOnError || traceOnRuntimeError || mostRecentOptions!.dumpTraces || instrumentedTraceId) {
+            if ((trace > 1) || traceOnError || mostRecentOptions!.dumpTraces || instrumentedTraceId) {
                 let stmtText = `${(<any>ip).toString(16)} ${opname} `;
                 const firstDreg = <any>ip + 2;
                 const firstSreg = firstDreg + (numDregs * 2);
@@ -1627,29 +1748,29 @@ export function generateWasmBody(
 const notNullSince: Map<number, number> = new Map();
 let cknullOffset = -1;
 
-function eraseInferredState() {
+function eraseInferredState () {
     cknullOffset = -1;
     notNullSince.clear();
-    knownConstantValues.clear();
+    knownConstants.clear();
 }
 
-function invalidate_local(offset: number) {
+function invalidate_local (offset: number) {
     if (cknullOffset === offset)
         cknullOffset = -1;
     notNullSince.delete(offset);
-    knownConstantValues.delete(offset);
+    knownConstants.delete(offset);
 }
 
-function invalidate_local_range(start: number, bytes: number) {
+function invalidate_local_range (start: number, bytes: number) {
     for (let i = 0; i < bytes; i += 1)
         invalidate_local(start + i);
 }
 
-function append_branch_target_block(builder: WasmBuilder, ip: MintOpcodePtr, isBackBranchTarget: boolean) {
+function append_branch_target_block (builder: WasmBuilder, ip: MintOpcodePtr, isBackBranchTarget: boolean) {
     builder.cfg.startBranchBlock(ip, isBackBranchTarget);
 }
 
-function computeMemoryAlignment(offset: number, opcodeOrPrefix: WasmOpcode, simdOpcode?: WasmSimdOpcode) {
+function computeMemoryAlignment (offset: number, opcodeOrPrefix: WasmOpcode, simdOpcode?: WasmSimdOpcode) {
     // First, compute the best possible alignment
     let alignment = 0;
     if (offset % 16 === 0)
@@ -1710,7 +1831,47 @@ function computeMemoryAlignment(offset: number, opcodeOrPrefix: WasmOpcode, simd
     return alignment;
 }
 
-function append_ldloc(builder: WasmBuilder, offset: number, opcodeOrPrefix: WasmOpcode, simdOpcode?: WasmSimdOpcode) {
+function try_append_ldloc_cprop (
+    builder: WasmBuilder, offset: number, opcodeOrPrefix: WasmOpcode,
+    dryRun: boolean, requireNonzero?: boolean
+) {
+    if (builder.options.cprop && (opcodeOrPrefix === WasmOpcode.i32_load)) {
+        // It's common to ldc.i4 or ldloca immediately before using the value
+        // in these cases the known constant analysis will work consistently, and we can skip the extra
+        //  memory load to read the constant we just wrote to a local. the resulting traces should be
+        //  both smaller and faster, while still correct since the ldc still writes to memory
+        // of course, if known constant analysis is broken, this will break too, but it's better to
+        //  learn immediately whether known constant analysis has been broken this whole time
+        // at least on x86 this will enable much better native code generation for the trace, since
+        //  operations like memory stores have forms that accept an immediate as rhs
+        const knownConstant = get_known_constant(builder, offset);
+        if (knownConstant) {
+            switch (knownConstant.type) {
+                case "i32":
+                    if (requireNonzero && (knownConstant.value === 0))
+                        return false;
+                    if (!dryRun)
+                        builder.i32_const(knownConstant.value);
+                    return true;
+                case "ldloca":
+                    // FIXME: Do we need to invalidate the local again? I don't think we do, we invalidated it
+                    //  when the ldloca operation originally happened, and we're just propagating that address
+                    //  constant forward to its point of use
+                    // requireNonzero is a no-op since ldloca always produces a nonzero result
+                    if (!dryRun)
+                        append_ldloca(builder, knownConstant.offset, 0);
+                    return true;
+            }
+        }
+    }
+
+    return false;
+}
+
+function append_ldloc (builder: WasmBuilder, offset: number, opcodeOrPrefix: WasmOpcode, simdOpcode?: WasmSimdOpcode) {
+    if (try_append_ldloc_cprop(builder, offset, opcodeOrPrefix, false))
+        return;
+
     builder.local("pLocals");
     mono_assert(opcodeOrPrefix >= WasmOpcode.i32_load, () => `Expected load opcode but got ${opcodeOrPrefix}`);
     builder.appendU8(opcodeOrPrefix);
@@ -1729,7 +1890,7 @@ function append_ldloc(builder: WasmBuilder, offset: number, opcodeOrPrefix: Wasm
 //  where the offset+alignment pair is referred to as a 'memarg' by the spec.
 // The actual store operation is equivalent to `pBase[offset] = value` (alignment has no
 //  observable impact on behavior, other than causing compilation failures if out of range)
-function append_stloc_tail(builder: WasmBuilder, offset: number, opcodeOrPrefix: WasmOpcode, simdOpcode?: WasmSimdOpcode) {
+function append_stloc_tail (builder: WasmBuilder, offset: number, opcodeOrPrefix: WasmOpcode, simdOpcode?: WasmSimdOpcode) {
     mono_assert(opcodeOrPrefix >= WasmOpcode.i32_store, () => `Expected store opcode but got ${opcodeOrPrefix}`);
     builder.appendU8(opcodeOrPrefix);
     if (simdOpcode !== undefined) {
@@ -1746,9 +1907,7 @@ function append_stloc_tail(builder: WasmBuilder, offset: number, opcodeOrPrefix:
 
 // Pass bytesInvalidated=0 if you are reading from the local and the address will never be
 //  used for writes
-// Pass transient=true if the address will not persist after use (so it can't be used to later
-//  modify the contents of this local)
-function append_ldloca(builder: WasmBuilder, localOffset: number, bytesInvalidated?: number) {
+function append_ldloca (builder: WasmBuilder, localOffset: number, bytesInvalidated?: number) {
     if (typeof (bytesInvalidated) !== "number")
         bytesInvalidated = 512;
     // FIXME: We need to know how big this variable is so we can invalidate the whole space it occupies
@@ -1757,7 +1916,7 @@ function append_ldloca(builder: WasmBuilder, localOffset: number, bytesInvalidat
     builder.lea("pLocals", localOffset);
 }
 
-function append_memset_local(builder: WasmBuilder, localOffset: number, value: number, count: number) {
+function append_memset_local (builder: WasmBuilder, localOffset: number, value: number, count: number) {
     invalidate_local_range(localOffset, count);
 
     // spec: pop n, pop val, pop d, fill from d[0] to d[n] with value val
@@ -1769,7 +1928,7 @@ function append_memset_local(builder: WasmBuilder, localOffset: number, value: n
     append_memset_dest(builder, value, count);
 }
 
-function append_memmove_local_local(builder: WasmBuilder, destLocalOffset: number, sourceLocalOffset: number, count: number) {
+function append_memmove_local_local (builder: WasmBuilder, destLocalOffset: number, sourceLocalOffset: number, count: number) {
     invalidate_local_range(destLocalOffset, count);
 
     if (try_append_memmove_fast(builder, destLocalOffset, sourceLocalOffset, count, false))
@@ -1781,12 +1940,12 @@ function append_memmove_local_local(builder: WasmBuilder, destLocalOffset: numbe
     append_memmove_dest_src(builder, count);
 }
 
-function isAddressTaken(builder: WasmBuilder, localOffset: number) {
+function isAddressTaken (builder: WasmBuilder, localOffset: number) {
     return cwraps.mono_jiterp_is_imethod_var_address_taken(<any>get_imethod(builder.frame), localOffset) !== 0;
 }
 
 // Loads the specified i32 value and then bails out if it is null, leaving it in the cknull_ptr local.
-function append_ldloc_cknull(builder: WasmBuilder, localOffset: number, ip: MintOpcodePtr, leaveOnStack: boolean) {
+function append_ldloc_cknull (builder: WasmBuilder, localOffset: number, ip: MintOpcodePtr, leaveOnStack: boolean) {
     const optimize = builder.allowNullCheckOptimization &&
         notNullSince.has(localOffset) &&
         !isAddressTaken(builder, localOffset);
@@ -1838,7 +1997,7 @@ function append_ldloc_cknull(builder: WasmBuilder, localOffset: number, ip: Mint
         cknullOffset = -1;
 }
 
-function emit_ldc(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
+function emit_ldc (builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
     let storeType = WasmOpcode.i32_store;
     let value: number | undefined;
 
@@ -1903,14 +2062,14 @@ function emit_ldc(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode):
     invalidate_local(localOffset);
 
     if (typeof (value) === "number")
-        knownConstantValues.set(localOffset, value);
+        knownConstants.set(localOffset, { type: "i32", value: value });
     else
-        knownConstantValues.delete(localOffset);
+        knownConstants.delete(localOffset);
 
     return true;
 }
 
-function emit_mov(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
+function emit_mov (builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
     let loadOp = WasmOpcode.i32_load, storeOp = WasmOpcode.i32_store;
     switch (opcode) {
         case MintOpcode.MINT_MOV_I4_I1:
@@ -1973,7 +2132,7 @@ function emit_mov(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode):
     return true;
 }
 
-function append_vtable_initialize(builder: WasmBuilder, pVtable: NativePointer, ip: MintOpcodePtr) {
+function append_vtable_initialize (builder: WasmBuilder, pVtable: NativePointer, ip: MintOpcodePtr) {
     // TODO: Actually initialize the vtable instead of just checking and bailing out?
     builder.block();
     // FIXME: This will prevent us from reusing traces between runs since the vtables can move
@@ -1988,7 +2147,7 @@ function append_vtable_initialize(builder: WasmBuilder, pVtable: NativePointer,
     builder.endBlock();
 }
 
-function emit_fieldop(
+function emit_fieldop (
     builder: WasmBuilder, frame: NativePointer,
     ip: MintOpcodePtr, opcode: MintOpcode
 ): boolean {
@@ -2010,6 +2169,8 @@ function emit_fieldop(
         notNullSince.has(objectOffset) &&
         !isAddressTaken(builder, objectOffset);
 
+    // TODO: Figure out whether this is commonly used to access fields of structs that
+    //  live on the stack, and if so, whether we want to do cprop of the ldloca
     if (
         (opcode !== MintOpcode.MINT_LDFLDA_UNSAFE) &&
         (opcode !== MintOpcode.MINT_STFLD_O)
@@ -2182,7 +2343,7 @@ function emit_fieldop(
     }
 }
 
-function emit_sfieldop(
+function emit_sfieldop (
     builder: WasmBuilder, frame: NativePointer,
     ip: MintOpcodePtr, opcode: MintOpcode
 ): boolean {
@@ -2289,7 +2450,7 @@ function emit_sfieldop(
     }
 }
 
-function emit_binop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
+function emit_binop (builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
     // operands are popped right to left, which means you build the arg list left to right
     let lhsLoadOp: WasmOpcode, rhsLoadOp: WasmOpcode, storeOp: WasmOpcode,
         lhsVar = "math_lhs32", rhsVar = "math_rhs32",
@@ -2441,7 +2602,7 @@ function emit_binop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode)
     return true;
 }
 
-function emit_unop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
+function emit_unop (builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
     // operands are popped right to left, which means you build the arg list left to right
     const info = unopTable[<any>opcode];
     if (!info)
@@ -2531,6 +2692,8 @@ function emit_unop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode):
 
         case MintOpcode.MINT_ADD_I4_IMM:
         case MintOpcode.MINT_MUL_I4_IMM:
+        case MintOpcode.MINT_AND_I4_IMM:
+        case MintOpcode.MINT_OR_I4_IMM:
         case MintOpcode.MINT_SHL_I4_IMM:
         case MintOpcode.MINT_SHR_I4_IMM:
         case MintOpcode.MINT_SHR_UN_I4_IMM:
@@ -2540,6 +2703,14 @@ function emit_unop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode):
             builder.i32_const(getArgI16(ip, 3));
             break;
 
+        case MintOpcode.MINT_ADD_I4_IMM2:
+        case MintOpcode.MINT_MUL_I4_IMM2:
+        case MintOpcode.MINT_AND_I4_IMM2:
+        case MintOpcode.MINT_OR_I4_IMM2:
+            append_ldloc(builder, getArgU16(ip, 2), loadOp);
+            builder.i32_const(getArgI32(ip, 3));
+            break;
+
         case MintOpcode.MINT_ADD_I8_IMM:
         case MintOpcode.MINT_MUL_I8_IMM:
         case MintOpcode.MINT_SHL_I8_IMM:
@@ -2551,6 +2722,12 @@ function emit_unop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode):
             builder.i52_const(getArgI16(ip, 3));
             break;
 
+        case MintOpcode.MINT_ADD_I8_IMM2:
+        case MintOpcode.MINT_MUL_I8_IMM2:
+            append_ldloc(builder, getArgU16(ip, 2), loadOp);
+            builder.i52_const(getArgI32(ip, 3));
+            break;
+
         default:
             append_ldloc(builder, getArgU16(ip, 2), loadOp);
             break;
@@ -2564,7 +2741,7 @@ function emit_unop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode):
     return true;
 }
 
-function append_call_handler_store_ret_ip(
+function append_call_handler_store_ret_ip (
     builder: WasmBuilder, ip: MintOpcodePtr,
     frame: NativePointer, opcode: MintOpcode
 ) {
@@ -2584,13 +2761,45 @@ function append_call_handler_store_ret_ip(
     builder.callHandlerReturnAddresses.push(retIp);
 }
 
-function emit_branch(
+function getBranchDisplacement (
+    ip: MintOpcodePtr, opcode: MintOpcode
+): number | undefined {
+    const opArgType = cwraps.mono_jiterp_get_opcode_info(opcode, OpcodeInfoType.OpArgType),
+        payloadOffset = cwraps.mono_jiterp_get_opcode_info(opcode, OpcodeInfoType.Sregs),
+        payloadAddress = <any>ip + 2 + (payloadOffset * 2);
+
+    let result: number;
+    switch (opArgType) {
+        case MintOpArgType.MintOpBranch:
+            result = getI32_unaligned(payloadAddress);
+            break;
+        case MintOpArgType.MintOpShortBranch:
+            result = getI16(payloadAddress);
+            break;
+        case MintOpArgType.MintOpShortAndShortBranch:
+            result = getI16(payloadAddress + 2);
+            break;
+        default:
+            return undefined;
+    }
+
+    if (traceBranchDisplacements)
+        mono_log_info(`${getOpcodeName(opcode)} @${ip} displacement=${result}`);
+
+    return result;
+}
+
+function emit_branch (
     builder: WasmBuilder, ip: MintOpcodePtr,
-    frame: NativePointer, opcode: MintOpcode, displacement?: number
+    frame: NativePointer, opcode: MintOpcode
 ): boolean {
     const isSafepoint = (opcode >= MintOpcode.MINT_BRFALSE_I4_SP) &&
         (opcode <= MintOpcode.MINT_BLT_UN_I8_IMM_SP);
 
+    const displacement = getBranchDisplacement(ip, opcode);
+    if (typeof (displacement) !== "number")
+        return false;
+
     // If the branch is taken we bail out to allow the interpreter to do it.
     // So for brtrue, we want to do 'cond == 0' to produce a bailout only
     //  when the branch will be taken (by skipping the bailout in this block)
@@ -2604,15 +2813,7 @@ function emit_branch(
         case MintOpcode.MINT_BR_S: {
             const isCallHandler = (opcode === MintOpcode.MINT_CALL_HANDLER) ||
                 (opcode === MintOpcode.MINT_CALL_HANDLER_S);
-            displacement = (
-                (opcode === MintOpcode.MINT_BR) ||
-                (opcode === MintOpcode.MINT_CALL_HANDLER)
-            )
-                ? getArgI32(ip, 1)
-                : getArgI16(ip, 1);
 
-            if (traceBranchDisplacements)
-                mono_log_info(`br.s @${ip} displacement=${displacement}`);
             const destination = <any>ip + (displacement * 2);
 
             if (displacement <= 0) {
@@ -2620,8 +2821,8 @@ function emit_branch(
                     // We found a backward branch target we can branch to, so we branch out
                     //  to the top of the loop body
                     // append_safepoint(builder, ip);
-                    if (traceBackBranches > 1)
-                        mono_log_info(`performing backward branch to 0x${destination.toString(16)}`);
+                    if (builder.backBranchTraceLevel > 1)
+                        mono_log_info(`0x${(<any>ip).toString(16)} performing backward branch to 0x${destination.toString(16)}`);
                     if (isCallHandler)
                         append_call_handler_store_ret_ip(builder, ip, frame, opcode);
                     builder.cfg.branch(destination, true, CfgBranchType.Unconditional);
@@ -2629,9 +2830,9 @@ function emit_branch(
                     return true;
                 } else {
                     if (destination < builder.cfg.entryIp) {
-                        if ((traceBackBranches > 1) || (builder.cfg.trace > 1))
-                            mono_log_info(`${getOpcodeName(opcode)} target 0x${destination.toString(16)} before start of trace`);
-                    } else if ((traceBackBranches > 0) || (builder.cfg.trace > 0))
+                        if ((builder.backBranchTraceLevel > 1) || (builder.cfg.trace > 1))
+                            mono_log_info(`0x${(<any>ip).toString(16)} ${getOpcodeName(opcode)} target 0x${destination.toString(16)} before start of trace`);
+                    } else if ((builder.backBranchTraceLevel > 0) || (builder.cfg.trace > 0))
                         mono_log_info(`0x${(<any>ip).toString(16)} ${getOpcodeName(opcode)} target 0x${destination.toString(16)} not found in list ` +
                             builder.backBranchOffsets.map(bbo => "0x" + (<any>bbo).toString(16)).join(", ")
                         );
@@ -2665,7 +2866,6 @@ function emit_branch(
 
             // Load the condition
 
-            displacement = getArgI16(ip, 2);
             append_ldloc(builder, getArgU16(ip, 1), is64 ? WasmOpcode.i64_load : WasmOpcode.i32_load);
             if (
                 (opcode === MintOpcode.MINT_BRFALSE_I4_S) ||
@@ -2696,26 +2896,21 @@ function emit_branch(
         }
     }
 
-    if (!displacement)
-        throw new Error("Branch had no displacement");
-    else if (traceBranchDisplacements)
-        mono_log_info(`${getOpcodeName(opcode)} @${ip} displacement=${displacement}`);
-
     const destination = <any>ip + (displacement * 2);
 
     if (displacement < 0) {
         if (builder.backBranchOffsets.indexOf(destination) >= 0) {
             // We found a backwards branch target we can reach via our outer trace loop, so
             //  we update eip and branch out to the top of the loop block
-            if (traceBackBranches > 1)
-                mono_log_info(`performing conditional backward branch to 0x${destination.toString(16)}`);
+            if (builder.backBranchTraceLevel > 1)
+                mono_log_info(`0x${(<any>ip).toString(16)} performing conditional backward branch to 0x${destination.toString(16)}`);
             builder.cfg.branch(destination, true, isSafepoint ? CfgBranchType.SafepointConditional : CfgBranchType.Conditional);
             modifyCounter(JiterpCounter.BackBranchesEmitted, 1);
         } else {
             if (destination < builder.cfg.entryIp) {
-                if ((traceBackBranches > 1) || (builder.cfg.trace > 1))
-                    mono_log_info(`${getOpcodeName(opcode)} target 0x${destination.toString(16)} before start of trace`);
-            } else if ((traceBackBranches > 0) || (builder.cfg.trace > 0))
+                if ((builder.backBranchTraceLevel > 1) || (builder.cfg.trace > 1))
+                    mono_log_info(`0x${(<any>ip).toString(16)} ${getOpcodeName(opcode)} target 0x${destination.toString(16)} before start of trace`);
+            } else if ((builder.backBranchTraceLevel > 0) || (builder.cfg.trace > 0))
                 mono_log_info(`0x${(<any>ip).toString(16)} ${getOpcodeName(opcode)} target 0x${destination.toString(16)} not found in list ` +
                     builder.backBranchOffsets.map(bbo => "0x" + (<any>bbo).toString(16)).join(", ")
                 );
@@ -2735,7 +2930,7 @@ function emit_branch(
     return true;
 }
 
-function emit_relop_branch(
+function emit_relop_branch (
     builder: WasmBuilder, ip: MintOpcodePtr,
     frame: NativePointer, opcode: MintOpcode
 ): boolean {
@@ -2753,10 +2948,6 @@ function emit_relop_branch(
     if (!relopInfo && !intrinsicFpBinop)
         return false;
 
-    const displacement = getArgI16(ip, 3);
-    if (traceBranchDisplacements)
-        mono_log_info(`relop @${ip} displacement=${displacement}`);
-
     const operandLoadOp = relopInfo
         ? relopInfo[1]
         : (
@@ -2791,10 +2982,10 @@ function emit_relop_branch(
         builder.callImport("relop_fp");
     }
 
-    return emit_branch(builder, ip, frame, opcode, displacement);
+    return emit_branch(builder, ip, frame, opcode);
 }
 
-function emit_math_intrinsic(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
+function emit_math_intrinsic (builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
     let isUnary: boolean, isF32: boolean, name: string | undefined;
     let wasmOp: WasmOpcode | undefined;
     const destOffset = getArgU16(ip, 1),
@@ -2842,7 +3033,7 @@ function emit_math_intrinsic(builder: WasmBuilder, ip: MintOpcodePtr, opcode: Mi
     }
 }
 
-function emit_indirectop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
+function emit_indirectop (builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
     const isLoad = (opcode >= MintOpcode.MINT_LDIND_I1) &&
         (opcode <= MintOpcode.MINT_LDIND_OFFSET_ADD_MUL_IMM_I8);
     const isAddMul = (
@@ -2976,13 +3167,21 @@ function emit_indirectop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOp
             return false;
     }
 
-    append_ldloc_cknull(builder, addressVarIndex, ip, false);
+    // Check whether ldloc cprop is possible for the address var, if it is, skip doing the ldloc_cknull.
+    // We'll also skip loading cknull_ptr later.
+    const addressCprop = try_append_ldloc_cprop(builder, addressVarIndex, WasmOpcode.i32_load, true, true);
+    if (!addressCprop)
+        append_ldloc_cknull(builder, addressVarIndex, ip, false);
 
     if (isLoad) {
         // pre-load pLocals for the store operation
         builder.local("pLocals");
         // Load address
-        builder.local("cknull_ptr");
+        if (addressCprop)
+            mono_assert(try_append_ldloc_cprop(builder, addressVarIndex, WasmOpcode.i32_load, false, true), "Unknown jiterpreter cprop failure");
+        else
+            builder.local("cknull_ptr");
+
         // For ldind_offset we need to load an offset from another local
         //  and then add it to the null checked address
         if (isAddMul) {
@@ -3014,13 +3213,21 @@ function emit_indirectop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOp
         append_stloc_tail(builder, valueVarIndex, setter);
     } else if (opcode === MintOpcode.MINT_STIND_REF) {
         // Load destination address
-        builder.local("cknull_ptr");
+        if (addressCprop)
+            mono_assert(try_append_ldloc_cprop(builder, addressVarIndex, WasmOpcode.i32_load, false, true), "Unknown jiterpreter cprop failure");
+        else
+            builder.local("cknull_ptr");
+
         // Load address of value so that copy_managed_pointer can grab it
         append_ldloca(builder, valueVarIndex, 0);
         builder.callImport("copy_ptr");
     } else {
         // Pre-load address for the store operation
-        builder.local("cknull_ptr");
+        if (addressCprop)
+            mono_assert(try_append_ldloc_cprop(builder, addressVarIndex, WasmOpcode.i32_load, false, true), "Unknown jiterpreter cprop failure");
+        else
+            builder.local("cknull_ptr");
+
         // For ldind_offset we need to load an offset from another local
         //  and then add it to the null checked address
         if (isOffset && offsetVarIndex >= 0) {
@@ -3040,7 +3247,7 @@ function emit_indirectop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOp
     return true;
 }
 
-function append_getelema1(
+function append_getelema1 (
     builder: WasmBuilder, ip: MintOpcodePtr,
     objectOffset: number, indexOffset: number, elementSize: number
 ) {
@@ -3099,7 +3306,7 @@ function append_getelema1(
     // append_getelema1 leaves the address on the stack
 }
 
-function emit_arrayop(builder: WasmBuilder, frame: NativePointer, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
+function emit_arrayop (builder: WasmBuilder, frame: NativePointer, ip: MintOpcodePtr, opcode: MintOpcode): boolean {
     const isLoad = ((opcode <= MintOpcode.MINT_LDELEMA_TC) && (opcode >= MintOpcode.MINT_LDELEM_I1)) ||
         (opcode === MintOpcode.MINT_LDLEN),
         objectOffset = getArgU16(ip, isLoad ? 2 : 1),
@@ -3263,7 +3470,7 @@ function emit_arrayop(builder: WasmBuilder, frame: NativePointer, ip: MintOpcode
 
 let wasmSimdSupported: boolean | undefined;
 
-function getIsWasmSimdSupported(): boolean {
+function getIsWasmSimdSupported (): boolean {
     if (wasmSimdSupported !== undefined)
         return wasmSimdSupported;
 
@@ -3274,7 +3481,7 @@ function getIsWasmSimdSupported(): boolean {
     return wasmSimdSupported;
 }
 
-function get_import_name(
+function get_import_name (
     builder: WasmBuilder, typeName: string,
     functionPtr: number
 ): string {
@@ -3285,7 +3492,7 @@ function get_import_name(
     return name;
 }
 
-function emit_simd(
+function emit_simd (
     builder: WasmBuilder, ip: MintOpcodePtr,
     opcode: MintOpcode, opname: string,
     argCount: number, index: number
@@ -3317,7 +3524,7 @@ function emit_simd(
                 const view = localHeapViewU8().slice(<any>ip + 4, <any>ip + 4 + sizeOfV128);
                 builder.v128_const(view);
                 append_simd_store(builder, ip);
-                knownConstantValues.set(getArgU16(ip, 1), view);
+                knownConstants.set(getArgU16(ip, 1), { type: "v128", value: view });
             } else {
                 // dest
                 append_ldloca(builder, getArgU16(ip, 1), sizeOfV128);
@@ -3386,31 +3593,31 @@ function emit_simd(
     }
 }
 
-function append_simd_store(builder: WasmBuilder, ip: MintOpcodePtr) {
+function append_simd_store (builder: WasmBuilder, ip: MintOpcodePtr) {
     append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_store);
 }
 
-function append_simd_2_load(builder: WasmBuilder, ip: MintOpcodePtr, loadOp?: WasmSimdOpcode) {
+function append_simd_2_load (builder: WasmBuilder, ip: MintOpcodePtr, loadOp?: WasmSimdOpcode) {
     builder.local("pLocals");
     // This || is harmless since v128_load is 0
     append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, loadOp || WasmSimdOpcode.v128_load);
 }
 
-function append_simd_3_load(builder: WasmBuilder, ip: MintOpcodePtr) {
+function append_simd_3_load (builder: WasmBuilder, ip: MintOpcodePtr) {
     builder.local("pLocals");
     append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
     // FIXME: Can rhs be a scalar? We handle shifts separately already
     append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
 }
 
-function append_simd_4_load(builder: WasmBuilder, ip: MintOpcodePtr) {
+function append_simd_4_load (builder: WasmBuilder, ip: MintOpcodePtr) {
     builder.local("pLocals");
     append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
     append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
     append_ldloc(builder, getArgU16(ip, 4), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
 }
 
-function emit_simd_2(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic2): boolean {
+function emit_simd_2 (builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic2): boolean {
     const simple = <WasmSimdOpcode>cwraps.mono_jiterp_get_simd_opcode(1, index);
     if (simple >= 0) {
         if (simdLoadTable.has(index)) {
@@ -3477,7 +3684,7 @@ function emit_simd_2(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrins
     }
 }
 
-function emit_simd_3(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic3): boolean {
+function emit_simd_3 (builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic3): boolean {
     const simple = <WasmSimdOpcode>cwraps.mono_jiterp_get_simd_opcode(2, index);
     if (simple >= 0) {
         const isShift = simdShiftTable.has(index),
@@ -3598,7 +3805,7 @@ function emit_simd_3(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrins
 
 // implement i16 and i32 shuffles on top of wasm's only shuffle opcode by expanding the
 //  element shuffle indices into byte indices
-function emit_shuffle(builder: WasmBuilder, ip: MintOpcodePtr, elementCount: number): boolean {
+function emit_shuffle (builder: WasmBuilder, ip: MintOpcodePtr, elementCount: number): boolean {
     const elementSize = 16 / elementCount,
         indicesOffset = getArgU16(ip, 3),
         constantIndices = get_known_constant_value(builder, indicesOffset);
@@ -3659,7 +3866,7 @@ function emit_shuffle(builder: WasmBuilder, ip: MintOpcodePtr, elementCount: num
     return true;
 }
 
-function emit_simd_4(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic4): boolean {
+function emit_simd_4 (builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic4): boolean {
     const simple = <WasmSimdOpcode>cwraps.mono_jiterp_get_simd_opcode(3, index);
     if (simple >= 0) {
         // [lane count, value load opcode]
diff --git a/src/mono/browser/runtime/jiterpreter.ts b/src/mono/browser/runtime/jiterpreter.ts
index c2bf13f0b3d8..dbd963bec77c 100644
--- a/src/mono/browser/runtime/jiterpreter.ts
+++ b/src/mono/browser/runtime/jiterpreter.ts
@@ -5,7 +5,7 @@ import NativeAOT from "consts:nativeAOT";
 import { MonoMethod } from "./types/internal";
 import { NativePointer } from "./types/emscripten";
 import { Module, mono_assert, runtimeHelpers } from "./globals";
-import { getU16, getU32_unaligned, localHeapViewU8 } from "./memory";
+import { getU16 } from "./memory";
 import { WasmValtype, WasmOpcode, getOpcodeName } from "./jiterpreter-opcodes";
 import { MintOpcode } from "./mintops";
 import cwraps from "./cwraps";
@@ -13,15 +13,15 @@ import {
     MintOpcodePtr, WasmBuilder, addWasmFunctionPointer,
     _now, isZeroPageReserved,
     getRawCwrap, importDef, JiterpreterOptions, getOptions, recordFailure,
-    getMemberOffset, getCounter, modifyCounter,
+    getCounter, modifyCounter,
     simdFallbackCounters, getWasmFunctionTable
 } from "./jiterpreter-support";
 import {
-    JiterpMember, BailoutReasonNames, BailoutReason,
+    BailoutReasonNames, BailoutReason,
     JiterpreterTable, JiterpCounter,
 } from "./jiterpreter-enums";
 import {
-    generateWasmBody
+    generateWasmBody, generateBackwardBranchTable
 } from "./jiterpreter-trace-generator";
 import { mono_jiterp_free_method_data_interp_entry } from "./jiterpreter-interp-entry";
 import { mono_jiterp_free_method_data_jit_call } from "./jiterpreter-jit-call";
@@ -35,10 +35,6 @@ export const
     // Record a trace of all managed interpreter opcodes then dump it to console
     //  if an error occurs while compiling the output wasm
     traceOnError = false,
-    // Record trace but dump it when the trace has a runtime error instead
-    //  requires trapTraceErrors to work and will slow trace compilation +
-    //  increase memory usage
-    traceOnRuntimeError = false,
     // Trace the method name, location and reason for each abort
     traceAbortLocations = false,
     // Count the number of times a given method is seen as a call target, then
@@ -61,12 +57,7 @@ export const
     traceNullCheckOptimizations = false,
     // Print diagnostic information when generating backward branches
     // 1 = failures only, 2 = full detail
-    traceBackBranches = 0,
-    // If we encounter an enter opcode that looks like a loop body and it was already
-    //  jitted, we should abort the current trace since it's not worth continuing
-    // Unproductive if we have backward branches enabled because it can stop us from jitting
-    //  nested loops
-    abortAtJittedLoopBodies = true,
+    defaultTraceBackBranches = 0,
     // Enable generating conditional backward branches for ENDFINALLY opcodes if we saw some CALL_HANDLER
     //  opcodes previously, up to this many potential return addresses. If a trace contains more potential
     //  return addresses than this we will not emit code for the ENDFINALLY opcode
@@ -108,7 +99,7 @@ export class InstrumentedTraceState {
     operand1: number | undefined;
     operand2: number | undefined;
 
-    constructor(name: string) {
+    constructor (name: string) {
         this.name = name;
         this.eip = <any>0;
     }
@@ -124,13 +115,13 @@ export class TraceInfo {
     bailoutCount: number | undefined;
     isVerbose: boolean;
 
-    constructor(ip: MintOpcodePtr, index: number, isVerbose: number) {
+    constructor (ip: MintOpcodePtr, index: number, isVerbose: number) {
         this.ip = ip;
         this.index = index;
         this.isVerbose = !!isVerbose;
     }
 
-    get hitCount() {
+    get hitCount () {
         return cwraps.mono_jiterp_get_trace_hit_count(this.index);
     }
 }
@@ -235,7 +226,7 @@ const mathOps1d =
         "powf",
     ];
 
-function recordBailout(ip: number, traceIndex: number, reason: BailoutReason) {
+function recordBailout (ip: number, traceIndex: number, reason: BailoutReason) {
     cwraps.mono_jiterp_trace_bailout(reason);
     // Counting these is not meaningful and messes up the end of run statistics
     if (reason === BailoutReason.Return)
@@ -261,7 +252,7 @@ function recordBailout(ip: number, traceIndex: number, reason: BailoutReason) {
     return ip;
 }
 
-function getTraceImports() {
+function getTraceImports () {
     if (traceImports)
         return traceImports;
 
@@ -323,7 +314,7 @@ function getTraceImports() {
     return traceImports;
 }
 
-function initialize_builder(builder: WasmBuilder) {
+function initialize_builder (builder: WasmBuilder) {
     // Function type for compiled traces
     builder.defineType(
         "trace",
@@ -704,7 +695,7 @@ function initialize_builder(builder: WasmBuilder) {
     }
 }
 
-function assert_not_null(
+function assert_not_null (
     value: number, expectedValue: number, traceIndex: number, ip: MintOpcodePtr
 ) {
     if (value && (value === expectedValue))
@@ -714,7 +705,7 @@ function assert_not_null(
 }
 
 // returns function id
-function generate_wasm(
+function generate_wasm (
     frame: NativePointer, methodName: string, ip: MintOpcodePtr,
     startOfBody: MintOpcodePtr, sizeOfBody: MintOpcodePtr,
     traceIndex: number, methodFullName: string | undefined,
@@ -789,7 +780,6 @@ function generate_wasm(
             "math_rhs64": WasmValtype.i64,
             "temp_f32": WasmValtype.f32,
             "temp_f64": WasmValtype.f64,
-            "backbranched": WasmValtype.i32,
         };
         if (builder.options.enableSimd) {
             traceLocals["v128_zero"] = WasmValtype.v128;
@@ -901,7 +891,7 @@ function generate_wasm(
         //  suites or benchmarks if you've enabled stats
         const tracesCompiled = getCounter(JiterpCounter.TracesCompiled);
         if (builder.options.enableStats && tracesCompiled && (tracesCompiled % autoDumpInterval) === 0)
-            jiterpreter_dump_stats(false, true);
+            jiterpreter_dump_stats(true);
 
         return idx;
     } catch (exc: any) {
@@ -959,7 +949,7 @@ function generate_wasm(
     }
 }
 
-export function trace_current_ip(traceId: number, eip: MintOpcodePtr) {
+export function trace_current_ip (traceId: number, eip: MintOpcodePtr) {
     const tup = instrumentedTraces[traceId];
     if (!tup)
         throw new Error(`Unrecognized instrumented trace id ${traceId}`);
@@ -967,14 +957,14 @@ export function trace_current_ip(traceId: number, eip: MintOpcodePtr) {
     mostRecentTrace = tup;
 }
 
-export function trace_operands(a: number, b: number) {
+export function trace_operands (a: number, b: number) {
     if (!mostRecentTrace)
         throw new Error("No trace active");
     mostRecentTrace.operand1 = a >>> 0;
     mostRecentTrace.operand2 = b >>> 0;
 }
 
-export function record_abort(traceIndex: number, ip: MintOpcodePtr, traceName: string, reason: string | MintOpcode) {
+export function record_abort (traceIndex: number, ip: MintOpcodePtr, traceName: string, reason: string | MintOpcode) {
     if (typeof (reason) === "number") {
         cwraps.mono_jiterp_adjust_abort_count(reason, 1);
         reason = getOpcodeName(reason);
@@ -997,7 +987,7 @@ export function record_abort(traceIndex: number, ip: MintOpcodePtr, traceName: s
 const JITERPRETER_TRAINING = 0;
 const JITERPRETER_NOT_JITTED = 1;
 
-export function mono_interp_tier_prepare_jiterpreter(
+export function mono_interp_tier_prepare_jiterpreter (
     frame: NativePointer, method: MonoMethod, ip: MintOpcodePtr, index: number,
     startOfBody: MintOpcodePtr, sizeOfBody: MintOpcodePtr, isVerbose: number,
     presetFunctionPointer: number
@@ -1031,11 +1021,8 @@ export function mono_interp_tier_prepare_jiterpreter(
     const methodName = utf8ToString(cwraps.mono_wasm_method_get_name(method));
     info.name = methodFullName || methodName;
 
-    const imethod = getU32_unaligned(getMemberOffset(JiterpMember.Imethod) + <any>frame);
-    const backBranchCount = getU32_unaligned(getMemberOffset(JiterpMember.BackwardBranchOffsetsCount) + imethod);
-    const pBackBranches = getU32_unaligned(getMemberOffset(JiterpMember.BackwardBranchOffsets) + imethod);
-    let backwardBranchTable = backBranchCount
-        ? new Uint16Array(localHeapViewU8().buffer, pBackBranches, backBranchCount)
+    let backwardBranchTable = mostRecentOptions.noExitBackwardBranches
+        ? generateBackwardBranchTable(ip, startOfBody, sizeOfBody)
         : null;
 
     // If we're compiling a trace that doesn't start at the beginning of a method,
@@ -1045,7 +1032,7 @@ export function mono_interp_tier_prepare_jiterpreter(
         const threshold = (<any>ip - <any>startOfBody) / 2;
         let foundReachableBranchTarget = false;
         for (let i = 0; i < backwardBranchTable.length; i++) {
-            if (backwardBranchTable[i] > threshold) {
+            if (backwardBranchTable[i] >= threshold) {
                 foundReachableBranchTarget = true;
                 break;
             }
@@ -1075,7 +1062,7 @@ export function mono_interp_tier_prepare_jiterpreter(
 
 // NOTE: This will potentially be called once for every trace entry point
 //  in a given method, not just once per method
-export function mono_jiterp_free_method_data_js(
+export function mono_jiterp_free_method_data_js (
     method: MonoMethod, imethod: number, traceIndex: number
 ) {
     // TODO: Uninstall the trace function pointer from the function pointer table,
@@ -1087,17 +1074,17 @@ export function mono_jiterp_free_method_data_js(
     mono_jiterp_free_method_data_jit_call(method);
 }
 
-export function jiterpreter_dump_stats(b?: boolean, concise?: boolean) {
+export function jiterpreter_dump_stats (concise?: boolean): void {
     if (NativeAOT) {
         return;
     }
     if (!runtimeHelpers.runtimeReady) {
         return;
     }
-    if (!mostRecentOptions || (b !== undefined))
+    if (!mostRecentOptions)
         mostRecentOptions = getOptions();
 
-    if (!mostRecentOptions.enableStats && (b !== undefined))
+    if (!mostRecentOptions.enableStats)
         return;
 
     const backBranchesEmitted = getCounter(JiterpCounter.BackBranchesEmitted),
@@ -1259,10 +1246,4 @@ export function jiterpreter_dump_stats(b?: boolean, concise?: boolean) {
 
     for (const k in simdFallbackCounters)
         mono_log_info(`// simd ${k}: ${simdFallbackCounters[k]} fallback insn(s)`);
-
-    if ((typeof (globalThis.setTimeout) === "function") && (b !== undefined))
-        setTimeout(
-            () => jiterpreter_dump_stats(b),
-            15000
-        );
 }
diff --git a/src/mono/browser/runtime/lazyLoading.ts b/src/mono/browser/runtime/lazyLoading.ts
index 55bcfd67101e..f4da4521010a 100644
--- a/src/mono/browser/runtime/lazyLoading.ts
+++ b/src/mono/browser/runtime/lazyLoading.ts
@@ -1,10 +1,11 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import { loaderHelpers, runtimeHelpers } from "./globals";
+import { loaderHelpers } from "./globals";
+import { load_lazy_assembly } from "./managed-exports";
 import { AssetEntry } from "./types";
 
-export async function loadLazyAssembly(assemblyNameToLoad: string): Promise<boolean> {
+export async function loadLazyAssembly (assemblyNameToLoad: string): Promise<boolean> {
     const resources = loaderHelpers.config.resources!;
     const lazyAssemblies = resources.lazyAssembly;
     if (!lazyAssemblies) {
@@ -26,7 +27,7 @@ export async function loadLazyAssembly(assemblyNameToLoad: string): Promise<bool
     }
 
     const pdbNameToLoad = changeExtension(dllAsset.name, ".pdb");
-    const shouldLoadPdb = loaderHelpers.hasDebuggingEnabled(loaderHelpers.config) && Object.prototype.hasOwnProperty.call(lazyAssemblies, pdbNameToLoad);
+    const shouldLoadPdb = loaderHelpers.config.debugLevel != 0 && loaderHelpers.isDebuggingSupported() && Object.prototype.hasOwnProperty.call(lazyAssemblies, pdbNameToLoad);
 
     const dllBytesPromise = loaderHelpers.retrieve_asset_download(dllAsset);
 
@@ -51,15 +52,15 @@ export async function loadLazyAssembly(assemblyNameToLoad: string): Promise<bool
         pdb = null;
     }
 
-    runtimeHelpers.javaScriptExports.load_lazy_assembly(dll, pdb);
+    load_lazy_assembly(dll, pdb);
     return true;
 }
 
-function changeExtension(filename: string, newExtensionWithLeadingDot: string) {
+function changeExtension (filename: string, newExtensionWithLeadingDot: string) {
     const lastDotIndex = filename.lastIndexOf(".");
     if (lastDotIndex < 0) {
         throw new Error(`No extension to replace in '${filename}'`);
     }
 
     return filename.substring(0, lastDotIndex) + newExtensionWithLeadingDot;
-}
\ No newline at end of file
+}
diff --git a/src/mono/browser/runtime/loader/assets.ts b/src/mono/browser/runtime/loader/assets.ts
index 4a06d91a9798..3ef34458dd83 100644
--- a/src/mono/browser/runtime/loader/assets.ts
+++ b/src/mono/browser/runtime/loader/assets.ts
@@ -3,7 +3,7 @@
 
 import WasmEnableThreads from "consts:wasmEnableThreads";
 
-import type { AssetEntryInternal, PromiseAndController } from "../types/internal";
+import { PThreadPtrNull, type AssetEntryInternal, type PThreadWorker, type PromiseAndController } from "../types/internal";
 import type { AssetBehaviors, AssetEntry, LoadingResource, ResourceList, SingleAssetBehaviors as SingleAssetBehaviors, WebAssemblyBootResourceType } from "../types";
 import { ENVIRONMENT_IS_NODE, ENVIRONMENT_IS_SHELL, ENVIRONMENT_IS_WEB, loaderHelpers, mono_assert, runtimeHelpers } from "./globals";
 import { createPromiseController } from "./promise-controller";
@@ -20,6 +20,9 @@ let parallel_count = 0;
 const assetsToLoad: AssetEntryInternal[] = [];
 const singleAssets: Map<string, AssetEntryInternal> = new Map();
 
+// A duplicate in pthreads/shared.ts
+const worker_empty_prefix = "          -    ";
+
 const jsRuntimeModulesAssetTypes: {
     [k: string]: boolean
 } = {
@@ -80,11 +83,19 @@ const skipInstantiateByAssetTypes: {
     "segmentation-rules": true,
 };
 
-export function shouldLoadIcuAsset(asset: AssetEntryInternal): boolean {
+// load again for each worker
+const loadIntoWorker: {
+    [k: string]: boolean
+} = {
+    "symbols": true,
+    "segmentation-rules": true,
+};
+
+export function shouldLoadIcuAsset (asset: AssetEntryInternal): boolean {
     return !(asset.behavior == "icu" && asset.name != loaderHelpers.preferredIcuAsset);
 }
 
-function convert_single_asset(assetsCollection: AssetEntryInternal[], resource: ResourceList | undefined, behavior: SingleAssetBehaviors): AssetEntryInternal {
+function convert_single_asset (assetsCollection: AssetEntryInternal[], resource: ResourceList | undefined, behavior: SingleAssetBehaviors): AssetEntryInternal {
     const keys = Object.keys(resource || {});
     mono_assert(keys.length == 1, `Expect to have one ${behavior} asset in resources`);
 
@@ -103,20 +114,20 @@ function convert_single_asset(assetsCollection: AssetEntryInternal[], resource:
     return asset;
 }
 
-function set_single_asset(asset: AssetEntryInternal) {
+function set_single_asset (asset: AssetEntryInternal) {
     if (singleAssetTypes[asset.behavior]) {
         singleAssets.set(asset.behavior, asset);
     }
 }
 
-function get_single_asset(behavior: SingleAssetBehaviors): AssetEntryInternal {
+function get_single_asset (behavior: SingleAssetBehaviors): AssetEntryInternal {
     mono_assert(singleAssetTypes[behavior], `Unknown single asset behavior ${behavior}`);
     const asset = singleAssets.get(behavior);
     mono_assert(asset, `Single asset for ${behavior} not found`);
     return asset;
 }
 
-export function resolve_single_asset_path(behavior: SingleAssetBehaviors): AssetEntryInternal {
+export function resolve_single_asset_path (behavior: SingleAssetBehaviors): AssetEntryInternal {
     const asset = get_single_asset(behavior);
     if (!asset.resolvedUrl) {
         asset.resolvedUrl = loaderHelpers.locateFile(asset.name);
@@ -137,7 +148,7 @@ export function resolve_single_asset_path(behavior: SingleAssetBehaviors): Asset
     return asset;
 }
 
-export async function mono_download_assets(): Promise<void> {
+export async function mono_download_assets (): Promise<void> {
     mono_log_debug("mono_download_assets");
     try {
         const promises_of_assets: Promise<AssetEntryInternal>[] = [];
@@ -226,7 +237,7 @@ export async function mono_download_assets(): Promise<void> {
     }
 }
 
-export function prepareAssets() {
+export function prepareAssets () {
     const config = loaderHelpers.config;
     const modulesAssets: AssetEntryInternal[] = [];
 
@@ -266,7 +277,7 @@ export function prepareAssets() {
             }
         }
 
-        if (config.debugLevel != 0 && resources.pdb) {
+        if (config.debugLevel != 0 && loaderHelpers.isDebuggingSupported() && resources.pdb) {
             for (const name in resources.pdb) {
                 assetsToLoad.push({
                     name,
@@ -354,27 +365,30 @@ export function prepareAssets() {
     config.assets = [...assetsToLoad, ...modulesAssets];
 }
 
-export function prepareAssetsWorker() {
+export function prepareAssetsWorker () {
     const config = loaderHelpers.config;
     mono_assert(config.assets, "config.assets must be defined");
 
     for (const asset of config.assets) {
         set_single_asset(asset);
+        if (loadIntoWorker[asset.behavior]) {
+            assetsToLoad.push(asset);
+        }
     }
 }
 
-export function delay(ms: number): Promise<void> {
+export function delay (ms: number): Promise<void> {
     return new Promise(resolve => globalThis.setTimeout(resolve, ms));
 }
 
-export async function retrieve_asset_download(asset: AssetEntry): Promise<ArrayBuffer> {
+export async function retrieve_asset_download (asset: AssetEntry): Promise<ArrayBuffer> {
     const pendingAsset = await start_asset_download(asset);
     await pendingAsset.pendingDownloadInternal!.response;
     return pendingAsset.buffer!;
 }
 
 // FIXME: Connection reset is probably the only good one for which we should retry
-export async function start_asset_download(asset: AssetEntryInternal): Promise<AssetEntryInternal> {
+export async function start_asset_download (asset: AssetEntryInternal): Promise<AssetEntryInternal> {
     try {
         return await start_asset_download_with_throttle(asset);
     } catch (err: any) {
@@ -415,7 +429,7 @@ export async function start_asset_download(asset: AssetEntryInternal): Promise<A
     }
 }
 
-async function start_asset_download_with_throttle(asset: AssetEntryInternal): Promise<AssetEntryInternal> {
+async function start_asset_download_with_throttle (asset: AssetEntryInternal): Promise<AssetEntryInternal> {
     // we don't addRunDependency to allow download in parallel with onRuntimeInitialized event!
     while (throttlingPromise) {
         await throttlingPromise.promise;
@@ -438,8 +452,7 @@ async function start_asset_download_with_throttle(asset: AssetEntryInternal): Pr
         asset.buffer = await response.arrayBuffer();
         ++loaderHelpers.actual_downloaded_assets_count;
         return asset;
-    }
-    finally {
+    } finally {
         --parallel_count;
         if (throttlingPromise && parallel_count == loaderHelpers.maxParallelDownloads - 1) {
             mono_log_debug("Resuming more parallel downloads");
@@ -450,7 +463,7 @@ async function start_asset_download_with_throttle(asset: AssetEntryInternal): Pr
     }
 }
 
-async function start_asset_download_sources(asset: AssetEntryInternal): Promise<Response | undefined> {
+async function start_asset_download_sources (asset: AssetEntryInternal): Promise<Response | undefined> {
     // we don't addRunDependency to allow download in parallel with onRuntimeInitialized event!
     if (asset.pendingDownload) {
         asset.pendingDownloadInternal = asset.pendingDownload;
@@ -470,7 +483,9 @@ async function start_asset_download_sources(asset: AssetEntryInternal): Promise<
                 ok: true,
                 arrayBuffer: () => buffer,
                 json: () => JSON.parse(new TextDecoder("utf-8").decode(buffer)),
-                text: () => { throw new Error("NotImplementedException"); },
+                text: () => {
+                    throw new Error("NotImplementedException");
+                },
                 headers: {
                     get: () => undefined,
                 }
@@ -502,8 +517,7 @@ async function start_asset_download_sources(asset: AssetEntryInternal): Promise<
                 continue;// next source
             }
             return response;
-        }
-        catch (err) {
+        } catch (err) {
             if (!response) {
                 response = {
                     ok: false,
@@ -527,34 +541,31 @@ async function start_asset_download_sources(asset: AssetEntryInternal): Promise<
     }
 }
 
-function resolve_path(asset: AssetEntry, sourcePrefix: string): string {
+function resolve_path (asset: AssetEntry, sourcePrefix: string): string {
     mono_assert(sourcePrefix !== null && sourcePrefix !== undefined, () => `sourcePrefix must be provided for ${asset.name}`);
     let attemptUrl;
     if (!asset.resolvedUrl) {
         if (sourcePrefix === "") {
             if (asset.behavior === "assembly" || asset.behavior === "pdb") {
                 attemptUrl = asset.name;
-            }
-            else if (asset.behavior === "resource") {
+            } else if (asset.behavior === "resource") {
                 const path = asset.culture && asset.culture !== "" ? `${asset.culture}/${asset.name}` : asset.name;
                 attemptUrl = path;
-            }
-            else {
+            } else {
                 attemptUrl = asset.name;
             }
         } else {
             attemptUrl = sourcePrefix + asset.name;
         }
         attemptUrl = appendUniqueQuery(loaderHelpers.locateFile(attemptUrl), asset.behavior);
-    }
-    else {
+    } else {
         attemptUrl = asset.resolvedUrl;
     }
     mono_assert(attemptUrl && typeof attemptUrl == "string", "attemptUrl need to be path or url string");
     return attemptUrl;
 }
 
-export function appendUniqueQuery(attemptUrl: string, behavior: AssetBehaviors): string {
+export function appendUniqueQuery (attemptUrl: string, behavior: AssetBehaviors): string {
     // apply unique query to js modules to make the module state independent of the other runtime instances
     if (loaderHelpers.modulesUniqueQuery && appendQueryAssetTypes[behavior]) {
         attemptUrl = attemptUrl + loaderHelpers.modulesUniqueQuery;
@@ -566,7 +577,7 @@ export function appendUniqueQuery(attemptUrl: string, behavior: AssetBehaviors):
 let resourcesLoaded = 0;
 const totalResources = new Set<string>();
 
-function download_resource(asset: AssetEntryInternal): LoadingResource {
+function download_resource (asset: AssetEntryInternal): LoadingResource {
     try {
         mono_assert(asset.resolvedUrl, "Request's resolvedUrl must be set");
         const fetchResponse = download_resource_with_cache(asset);
@@ -589,8 +600,12 @@ function download_resource(asset: AssetEntryInternal): LoadingResource {
             url: asset.resolvedUrl,
             status: 500,
             statusText: "ERR29: " + err,
-            arrayBuffer: () => { throw err; },
-            json: () => { throw err; }
+            arrayBuffer: () => {
+                throw err;
+            },
+            json: () => {
+                throw err;
+            }
         };
         return {
             name: asset.name, url: asset.resolvedUrl!, response: Promise.resolve(response)
@@ -598,7 +613,7 @@ function download_resource(asset: AssetEntryInternal): LoadingResource {
     }
 }
 
-async function download_resource_with_cache(asset: AssetEntryInternal): Promise<Response> {
+async function download_resource_with_cache (asset: AssetEntryInternal): Promise<Response> {
     let response = await findCachedResponse(asset);
     if (!response) {
         response = await fetchResource(asset);
@@ -608,7 +623,7 @@ async function download_resource_with_cache(asset: AssetEntryInternal): Promise<
     return response;
 }
 
-function fetchResource(asset: AssetEntryInternal): Promise<Response> {
+function fetchResource (asset: AssetEntryInternal): Promise<Response> {
     // Allow developers to override how the resource is loaded
     let url = asset.resolvedUrl!;
     if (loaderHelpers.loadBootResource) {
@@ -632,7 +647,7 @@ function fetchResource(asset: AssetEntryInternal): Promise<Response> {
         // Include credentials so the server can allow download / provide user specific file
         fetchOptions.credentials = "include";
     } else {
-        // `disableIntegrityCheck` is to give developers an easy opt-out from the integrity check 
+        // `disableIntegrityCheck` is to give developers an easy opt-out from the integrity check
         if (!loaderHelpers.config.disableIntegrityCheck && asset.hash) {
             // Any other resource than configuration should provide integrity check
             fetchOptions.integrity = asset.hash;
@@ -656,7 +671,7 @@ const monoToBlazorAssetTypeMap: { [key: string]: WebAssemblyBootResourceType | u
     "js-module-threads": "dotnetjs"
 };
 
-function invokeLoadBootResource(asset: AssetEntryInternal): string | Promise<Response> | null | undefined {
+function invokeLoadBootResource (asset: AssetEntryInternal): string | Promise<Response> | null | undefined {
     if (loaderHelpers.loadBootResource) {
         const requestHash = asset.hash ?? "";
         const url = asset.resolvedUrl!;
@@ -674,7 +689,7 @@ function invokeLoadBootResource(asset: AssetEntryInternal): string | Promise<Res
     return undefined;
 }
 
-export function cleanupAsset(asset: AssetEntryInternal) {
+export function cleanupAsset (asset: AssetEntryInternal) {
     // give GC chance to collect resources
     asset.pendingDownloadInternal = null as any; // GC
     asset.pendingDownload = null as any; // GC
@@ -682,7 +697,7 @@ export function cleanupAsset(asset: AssetEntryInternal) {
     asset.moduleExports = null as any; // GC
 }
 
-function fileName(name: string) {
+function fileName (name: string) {
     let lastIndexOfSlash = name.lastIndexOf("/");
     if (lastIndexOfSlash >= 0) {
         lastIndexOfSlash++;
@@ -690,7 +705,7 @@ function fileName(name: string) {
     return name.substring(lastIndexOfSlash);
 }
 
-export async function streamingCompileWasm() {
+export async function streamingCompileWasm () {
     try {
         const wasmModuleAsset = resolve_single_asset_path("dotnetwasm");
         await start_asset_download(wasmModuleAsset);
@@ -718,8 +733,26 @@ export async function streamingCompileWasm() {
         wasmModuleAsset.buffer = null as any; // GC
         wasmModuleAsset.moduleExports = null as any; // GC
         loaderHelpers.wasmCompilePromise.promise_control.resolve(compiledModule);
-    }
-    catch (err) {
+    } catch (err) {
         loaderHelpers.wasmCompilePromise.promise_control.reject(err);
     }
-}
\ No newline at end of file
+}
+export function preloadWorkers () {
+    if (!WasmEnableThreads) return;
+    const jsModuleWorker = resolve_single_asset_path("js-module-threads");
+    for (let i = 0; i < loaderHelpers.config.pthreadPoolInitialSize!; i++) {
+        const workerNumber = loaderHelpers.workerNextNumber++;
+        const worker: Partial<PThreadWorker> = new Worker(jsModuleWorker.resolvedUrl!, {
+            name: "dotnet-worker-" + workerNumber.toString().padStart(3, "0"),
+        });
+        worker.info = {
+            workerNumber,
+            pthreadId: PThreadPtrNull,
+            reuseCount: 0,
+            updateCount: 0,
+            threadPrefix: worker_empty_prefix,
+            threadName: "emscripten-pool",
+        } as any;
+        loaderHelpers.loadingWorkers.push(worker as any);
+    }
+}
diff --git a/src/mono/browser/runtime/loader/assetsCache.ts b/src/mono/browser/runtime/loader/assetsCache.ts
index ec6bf96233b5..1be22cfdd1fd 100644
--- a/src/mono/browser/runtime/loader/assetsCache.ts
+++ b/src/mono/browser/runtime/loader/assetsCache.ts
@@ -10,7 +10,7 @@ const networkLoads: { [name: string]: LoadLogEntry } = {};
 const cacheLoads: { [name: string]: LoadLogEntry } = {};
 let cacheIfUsed: Cache | null;
 
-export function logDownloadStatsToConsole(): void {
+export function logDownloadStatsToConsole (): void {
     const cacheLoadsEntries = Object.values(cacheLoads);
     const networkLoadsEntries = Object.values(networkLoads);
     const cacheResponseBytes = countTotalBytes(cacheLoadsEntries);
@@ -51,7 +51,7 @@ export function logDownloadStatsToConsole(): void {
     console.groupEnd();
 }
 
-export async function purgeUnusedCacheEntriesAsync(): Promise<void> {
+export async function purgeUnusedCacheEntriesAsync (): Promise<void> {
     // We want to keep the cache small because, even though the browser will evict entries if it
     // gets too big, we don't want to be considered problematic by the end user viewing storage stats
     const cache = cacheIfUsed;
@@ -67,7 +67,7 @@ export async function purgeUnusedCacheEntriesAsync(): Promise<void> {
     }
 }
 
-export async function findCachedResponse(asset: AssetEntryInternal): Promise<Response | undefined> {
+export async function findCachedResponse (asset: AssetEntryInternal): Promise<Response | undefined> {
     const cache = cacheIfUsed;
     if (!cache || asset.noCache || !asset.hash || asset.hash.length === 0) {
         return undefined;
@@ -94,7 +94,7 @@ export async function findCachedResponse(asset: AssetEntryInternal): Promise<Res
     return cachedResponse;
 }
 
-export function addCachedReponse(asset: AssetEntryInternal, networkResponse: Response): void {
+export function addCachedReponse (asset: AssetEntryInternal, networkResponse: Response): void {
     const cache = cacheIfUsed;
     if (!cache || asset.noCache || !asset.hash || asset.hash.length === 0) {
         return;
@@ -108,11 +108,11 @@ export function addCachedReponse(asset: AssetEntryInternal, networkResponse: Res
     }, 0);
 }
 
-function getCacheKey(asset: AssetEntryInternal) {
+function getCacheKey (asset: AssetEntryInternal) {
     return `${asset.resolvedUrl}.${asset.hash}`;
 }
 
-async function addToCacheAsync(cache: Cache, name: string, cacheKey: string, clonedResponse: Response) {
+async function addToCacheAsync (cache: Cache, name: string, cacheKey: string, clonedResponse: Response) {
     // We have to clone in order to put this in the cache *and* not prevent other code from
     // reading the original response stream.
     const responseData = await clonedResponse.arrayBuffer();
@@ -142,11 +142,11 @@ async function addToCacheAsync(cache: Cache, name: string, cacheKey: string, clo
     }
 }
 
-export async function initCacheToUseIfEnabled(): Promise<void> {
+export async function initCacheToUseIfEnabled (): Promise<void> {
     cacheIfUsed = await getCacheToUseIfEnabled(loaderHelpers.config);
 }
 
-async function getCacheToUseIfEnabled(config: MonoConfig): Promise<Cache | null> {
+async function getCacheToUseIfEnabled (config: MonoConfig): Promise<Cache | null> {
     // caches will be undefined if we're running on an insecure origin (secure means https or localhost)
     if (!config.cacheBootResources || typeof globalThis.caches === "undefined" || typeof globalThis.document === "undefined") {
         return null;
@@ -180,15 +180,15 @@ async function getCacheToUseIfEnabled(config: MonoConfig): Promise<Cache | null>
     }
 }
 
-function countTotalBytes(loads: LoadLogEntry[]) {
+function countTotalBytes (loads: LoadLogEntry[]) {
     return loads.reduce((prev, item) => prev + (item.responseBytes || 0), 0);
 }
 
-function toDataSizeString(byteCount: number) {
+function toDataSizeString (byteCount: number) {
     return `${(byteCount / (1024 * 1024)).toFixed(2)} MB`;
 }
 
-function getPerformanceEntry(url: string): PerformanceResourceTiming | undefined {
+function getPerformanceEntry (url: string): PerformanceResourceTiming | undefined {
     if (typeof performance !== "undefined") {
         return performance.getEntriesByName(url)[0] as PerformanceResourceTiming;
     }
diff --git a/src/mono/browser/runtime/loader/config.ts b/src/mono/browser/runtime/loader/config.ts
index a8fb3b653f4a..5ff185827c10 100644
--- a/src/mono/browser/runtime/loader/config.ts
+++ b/src/mono/browser/runtime/loader/config.ts
@@ -4,16 +4,16 @@
 import BuildConfiguration from "consts:configuration";
 import WasmEnableThreads from "consts:wasmEnableThreads";
 
-import type { DotnetModuleInternal, MonoConfigInternal } from "../types/internal";
+import { type DotnetModuleInternal, type MonoConfigInternal, JSThreadBlockingMode } from "../types/internal";
 import type { DotnetModuleConfig, MonoConfig, ResourceGroups, ResourceList } from "../types";
-import { ENVIRONMENT_IS_WEB, exportedRuntimeAPI, loaderHelpers, runtimeHelpers } from "./globals";
+import { exportedRuntimeAPI, loaderHelpers, runtimeHelpers } from "./globals";
 import { mono_log_error, mono_log_debug } from "./logging";
 import { importLibraryInitializers, invokeLibraryInitializers } from "./libraryInitializers";
 import { mono_exit } from "./exit";
 import { makeURLAbsoluteWithApplicationBase } from "./polyfills";
 import { appendUniqueQuery } from "./assets";
 
-export function deep_merge_config(target: MonoConfigInternal, source: MonoConfigInternal): MonoConfigInternal {
+export function deep_merge_config (target: MonoConfigInternal, source: MonoConfigInternal): MonoConfigInternal {
     // no need to merge the same object
     if (target === source) return target;
 
@@ -39,7 +39,7 @@ export function deep_merge_config(target: MonoConfigInternal, source: MonoConfig
     return Object.assign(target, providedConfig);
 }
 
-export function deep_merge_module(target: DotnetModuleInternal, source: DotnetModuleConfig): DotnetModuleInternal {
+export function deep_merge_module (target: DotnetModuleInternal, source: DotnetModuleConfig): DotnetModuleInternal {
     // no need to merge the same object
     if (target === source) return target;
 
@@ -51,7 +51,7 @@ export function deep_merge_module(target: DotnetModuleInternal, source: DotnetMo
     return Object.assign(target, providedConfig);
 }
 
-function deep_merge_resources(target: ResourceGroups, source: ResourceGroups): ResourceGroups {
+function deep_merge_resources (target: ResourceGroups, source: ResourceGroups): ResourceGroups {
     // no need to merge the same object
     if (target === source) return target;
 
@@ -101,7 +101,7 @@ function deep_merge_resources(target: ResourceGroups, source: ResourceGroups): R
     return Object.assign(target, providedResources);
 }
 
-function deep_merge_dict(target: { [key: string]: ResourceList }, source: { [key: string]: ResourceList }) {
+function deep_merge_dict (target: { [key: string]: ResourceList }, source: { [key: string]: ResourceList }) {
     // no need to merge the same object
     if (target === source) return target;
 
@@ -112,7 +112,7 @@ function deep_merge_dict(target: { [key: string]: ResourceList }, source: { [key
 }
 
 // NOTE: this is called before setRuntimeGlobals
-export function normalizeConfig() {
+export function normalizeConfig () {
     // normalize
     const config = loaderHelpers.config;
 
@@ -177,8 +177,6 @@ export function normalizeConfig() {
         }
     }
 
-    loaderHelpers.assertAfterExit = config.assertAfterExit = config.assertAfterExit || !ENVIRONMENT_IS_WEB;
-
     if (config.debugLevel === undefined && BuildConfiguration === "Debug") {
         config.debugLevel = -1;
     }
@@ -187,9 +185,21 @@ export function normalizeConfig() {
         config.cachedResourcesPurgeDelay = 10000;
     }
 
-    if (WasmEnableThreads && !Number.isInteger(config.pthreadPoolSize)) {
-        // ActiveIssue https://github.com/dotnet/runtime/issues/75602
-        config.pthreadPoolSize = 7;
+    // ActiveIssue https://github.com/dotnet/runtime/issues/75602
+    if (WasmEnableThreads) {
+
+        if (!Number.isInteger(config.pthreadPoolInitialSize)) {
+            config.pthreadPoolInitialSize = 5;
+        }
+        if (!Number.isInteger(config.pthreadPoolUnusedSize)) {
+            config.pthreadPoolUnusedSize = 1;
+        }
+        if (!Number.isInteger(config.finalizerThreadStartDelayMs)) {
+            config.finalizerThreadStartDelayMs = 200;
+        }
+        if (config.jsThreadBlockingMode == undefined) {
+            config.jsThreadBlockingMode = JSThreadBlockingMode.PreventSynchronousJSExport;
+        }
     }
 
     // this is how long the Mono GC will try to wait for all threads to be suspended before it gives up and aborts the process
@@ -197,13 +207,6 @@ export function normalizeConfig() {
         config.environmentVariables["MONO_SLEEP_ABORT_LIMIT"] = "5000";
     }
 
-    // Default values (when WasmDebugLevel is not set)
-    // - Build   (debug)    => debugBuild=true  & debugLevel=-1 => -1
-    // - Build   (release)  => debugBuild=true  & debugLevel=0  => 0
-    // - Publish (debug)    => debugBuild=false & debugLevel=-1 => 0
-    // - Publish (release)  => debugBuild=false & debugLevel=0  => 0
-    config.debugLevel = hasDebuggingEnabled(config) ? config.debugLevel : 0;
-
     if (BuildConfiguration === "Debug" && config.diagnosticTracing === undefined) {
         config.diagnosticTracing = true;
     }
@@ -225,7 +228,7 @@ export function normalizeConfig() {
 }
 
 let configLoaded = false;
-export async function mono_wasm_load_config(module: DotnetModuleInternal): Promise<void> {
+export async function mono_wasm_load_config (module: DotnetModuleInternal): Promise<void> {
     const configFilePath = module.configSrc;
     if (configLoaded) {
         await loaderHelpers.afterConfigLoaded.promise;
@@ -248,8 +251,7 @@ export async function mono_wasm_load_config(module: DotnetModuleInternal): Promi
             try {
                 await module.onConfigLoaded(loaderHelpers.config, exportedRuntimeAPI);
                 normalizeConfig();
-            }
-            catch (err: any) {
+            } catch (err: any) {
                 mono_log_error("onConfigLoaded() failed", err);
                 throw err;
             }
@@ -264,17 +266,16 @@ export async function mono_wasm_load_config(module: DotnetModuleInternal): Promi
     }
 }
 
-export function hasDebuggingEnabled(config: MonoConfigInternal): boolean {
+export function isDebuggingSupported (): boolean {
     // Copied from blazor MonoDebugger.ts/attachDebuggerHotkey
     if (!globalThis.navigator) {
         return false;
     }
 
-    const hasReferencedPdbs = !!config.resources!.pdb;
-    return (hasReferencedPdbs || config.debugLevel != 0) && (loaderHelpers.isChromium || loaderHelpers.isFirefox);
+    return loaderHelpers.isChromium || loaderHelpers.isFirefox;
 }
 
-async function loadBootConfig(module: DotnetModuleInternal): Promise<void> {
+async function loadBootConfig (module: DotnetModuleInternal): Promise<void> {
     const defaultConfigSrc = loaderHelpers.locateFile(module.configSrc!);
 
     const loaderResponse = loaderHelpers.loadBootResource !== undefined ?
@@ -294,7 +295,7 @@ async function loadBootConfig(module: DotnetModuleInternal): Promise<void> {
     const loadedConfig: MonoConfig = await readBootConfigResponse(loadConfigResponse);
     deep_merge_config(loaderHelpers.config, loadedConfig);
 
-    function defaultLoadBootConfig(url: string): Promise<Response> {
+    function defaultLoadBootConfig (url: string): Promise<Response> {
         return loaderHelpers.fetch_like(url, {
             method: "GET",
             credentials: "include",
@@ -303,7 +304,7 @@ async function loadBootConfig(module: DotnetModuleInternal): Promise<void> {
     }
 }
 
-async function readBootConfigResponse(loadConfigResponse: Response): Promise<MonoConfig> {
+async function readBootConfigResponse (loadConfigResponse: Response): Promise<MonoConfig> {
     const config = loaderHelpers.config;
     const loadedConfig: MonoConfig = await loadConfigResponse.json();
 
@@ -327,4 +328,4 @@ async function readBootConfigResponse(loadConfigResponse: Response): Promise<Mon
     }
 
     return loadedConfig;
-}
\ No newline at end of file
+}
diff --git a/src/mono/browser/runtime/loader/exit.ts b/src/mono/browser/runtime/loader/exit.ts
index c6f7100564a6..2264cd086fbd 100644
--- a/src/mono/browser/runtime/loader/exit.ts
+++ b/src/mono/browser/runtime/loader/exit.ts
@@ -6,28 +6,25 @@ import WasmEnableThreads from "consts:wasmEnableThreads";
 import { ENVIRONMENT_IS_NODE, ENVIRONMENT_IS_WEB, ENVIRONMENT_IS_WORKER, INTERNAL, emscriptenModule, loaderHelpers, mono_assert, runtimeHelpers } from "./globals";
 import { mono_log_debug, mono_log_error, mono_log_info_no_prefix, mono_log_warn, teardown_proxy_console } from "./logging";
 
-export function is_exited() {
+export function is_exited () {
     return loaderHelpers.exitCode !== undefined;
 }
 
-export function is_runtime_running() {
+export function is_runtime_running () {
     return runtimeHelpers.runtimeReady && !is_exited();
 }
 
-export function assert_runtime_running() {
-    if (!is_exited()) {
-        if (WasmEnableThreads && ENVIRONMENT_IS_WORKER) {
-            mono_assert(runtimeHelpers.runtimeReady, "The WebWorker is not attached to the runtime. See https://github.com/dotnet/runtime/blob/main/src/mono/wasm/threads.md#JS-interop-on-dedicated-threads");
-        } else {
-            mono_assert(runtimeHelpers.runtimeReady, ".NET runtime didn't start yet. Please call dotnet.create() first.");
-        }
+export function assert_runtime_running () {
+    mono_assert(!is_exited(), () => `.NET runtime already exited with ${loaderHelpers.exitCode} ${loaderHelpers.exitReason}. You can use runtime.runMain() which doesn't exit the runtime.`);
+    if (WasmEnableThreads && ENVIRONMENT_IS_WORKER) {
+        mono_assert(runtimeHelpers.runtimeReady, "The WebWorker is not attached to the runtime. See https://github.com/dotnet/runtime/blob/main/src/mono/wasm/threads.md#JS-interop-on-dedicated-threads");
     } else {
-        mono_assert(!loaderHelpers.assertAfterExit, () => `.NET runtime already exited with ${loaderHelpers.exitCode} ${loaderHelpers.exitReason}. You can use runtime.runMain() which doesn't exit the runtime.`);
+        mono_assert(runtimeHelpers.runtimeReady, ".NET runtime didn't start yet. Please call dotnet.create() first.");
     }
 }
 
 
-export function installUnhandledErrorHandler() {
+export function installUnhandledErrorHandler () {
     // it seems that emscripten already does the right thing for NodeJs and that there is no good solution for V8 shell.
     if (ENVIRONMENT_IS_WEB) {
         globalThis.addEventListener("unhandledrejection", unhandledrejection_handler);
@@ -35,14 +32,14 @@ export function installUnhandledErrorHandler() {
     }
 }
 
-export function uninstallUnhandledErrorHandler() {
+export function uninstallUnhandledErrorHandler () {
     if (ENVIRONMENT_IS_WEB) {
         globalThis.removeEventListener("unhandledrejection", unhandledrejection_handler);
         globalThis.removeEventListener("error", error_handler);
     }
 }
 
-export function registerEmscriptenExitHandlers() {
+export function registerEmscriptenExitHandlers () {
     if (!emscriptenModule.onAbort) {
         emscriptenModule.onAbort = onAbort;
     }
@@ -51,7 +48,7 @@ export function registerEmscriptenExitHandlers() {
     }
 }
 
-function unregisterEmscriptenExitHandlers() {
+function unregisterEmscriptenExitHandlers () {
     if (emscriptenModule.onAbort == onAbort) {
         emscriptenModule.onAbort = undefined;
     }
@@ -59,22 +56,26 @@ function unregisterEmscriptenExitHandlers() {
         emscriptenModule.onExit = undefined;
     }
 }
-function onExit(code: number) {
+function onExit (code: number) {
     mono_exit(code, loaderHelpers.exitReason);
 }
 
-function onAbort(reason: any) {
+function onAbort (reason: any) {
     mono_exit(1, loaderHelpers.exitReason || reason);
 }
 
 // this will also call mono_wasm_exit if available, which will call exitJS -> _proc_exit -> terminateAllThreads
-export function mono_exit(exit_code: number, reason?: any): void {
+export function mono_exit (exit_code: number, reason?: any): void {
     unregisterEmscriptenExitHandlers();
     uninstallUnhandledErrorHandler();
 
     // unify shape of the reason object
     const is_object = reason && typeof reason === "object";
-    exit_code = (is_object && typeof reason.status === "number") ? reason.status : exit_code;
+    exit_code = (is_object && typeof reason.status === "number")
+        ? reason.status
+        : exit_code === undefined
+            ? -1
+            : exit_code;
     const message = (is_object && typeof reason.message === "string")
         ? reason.message
         : "" + reason;
@@ -112,8 +113,7 @@ export function mono_exit(exit_code: number, reason?: any): void {
                     runtimeHelpers.dumpThreads();
                 }
             }
-        }
-        catch (err) {
+        } catch (err) {
             mono_log_warn("mono_exit failed", err);
             // don't propagate any failures
         }
@@ -121,8 +121,7 @@ export function mono_exit(exit_code: number, reason?: any): void {
         try {
             logOnExit(exit_code, reason);
             appendElementOnExit(exit_code);
-        }
-        catch (err) {
+        } catch (err) {
             mono_log_warn("mono_exit failed", err);
             // don't propagate any failures
         }
@@ -140,8 +139,7 @@ export function mono_exit(exit_code: number, reason?: any): void {
         (async () => {
             try {
                 await flush_node_streams();
-            }
-            finally {
+            } finally {
                 set_exit_code_and_quit_now(exit_code, reason);
             }
         })();
@@ -153,7 +151,7 @@ export function mono_exit(exit_code: number, reason?: any): void {
     }
 }
 
-function set_exit_code_and_quit_now(exit_code: number, reason?: any): void {
+function set_exit_code_and_quit_now (exit_code: number, reason?: any): void {
     if (WasmEnableThreads && ENVIRONMENT_IS_WORKER && runtimeHelpers.runtimeReady && runtimeHelpers.nativeAbort) {
         // note that the reason is not passed to UI thread
         runtimeHelpers.runtimeReady = false;
@@ -165,8 +163,7 @@ function set_exit_code_and_quit_now(exit_code: number, reason?: any): void {
         runtimeHelpers.runtimeReady = false;
         try {
             runtimeHelpers.nativeExit(exit_code);
-        }
-        catch (error: any) {
+        } catch (error: any) {
             if (runtimeHelpers.ExitStatus && !(error instanceof runtimeHelpers.ExitStatus)) {
                 mono_log_warn("mono_wasm_exit failed: " + error.toString());
             }
@@ -176,15 +173,14 @@ function set_exit_code_and_quit_now(exit_code: number, reason?: any): void {
     if (exit_code !== 0 || !ENVIRONMENT_IS_WEB) {
         if (ENVIRONMENT_IS_NODE && INTERNAL.process) {
             INTERNAL.process.exit(exit_code);
-        }
-        else if (runtimeHelpers.quit) {
+        } else if (runtimeHelpers.quit) {
             runtimeHelpers.quit(exit_code, reason);
         }
         throw reason;
     }
 }
 
-async function flush_node_streams() {
+async function flush_node_streams () {
     try {
         // eslint-disable-next-line @typescript-eslint/ban-ts-comment
         // @ts-ignore:
@@ -208,7 +204,7 @@ async function flush_node_streams() {
     }
 }
 
-function abort_promises(reason: any) {
+function abort_promises (reason: any) {
     loaderHelpers.exitReason = reason;
     loaderHelpers.allDownloadsQueued.promise_control.reject(reason);
     loaderHelpers.afterConfigLoaded.promise_control.reject(reason);
@@ -226,18 +222,18 @@ function abort_promises(reason: any) {
     }
 }
 
-function appendElementOnExit(exit_code: number) {
-    if (ENVIRONMENT_IS_WEB && !ENVIRONMENT_IS_WORKER && loaderHelpers.config && loaderHelpers.config.appendElementOnExit) {
+function appendElementOnExit (exit_code: number) {
+    if (ENVIRONMENT_IS_WEB && !ENVIRONMENT_IS_WORKER && loaderHelpers.config && loaderHelpers.config.appendElementOnExit && document) {
         //Tell xharness WasmBrowserTestRunner what was the exit code
         const tests_done_elem = document.createElement("label");
         tests_done_elem.id = "tests_done";
-        if (exit_code) tests_done_elem.style.background = "red";
-        tests_done_elem.innerHTML = exit_code.toString();
+        if (exit_code !== 0) tests_done_elem.style.background = "red";
+        tests_done_elem.innerHTML = "" + exit_code;
         document.body.appendChild(tests_done_elem);
     }
 }
 
-function logOnExit(exit_code: number, reason: any) {
+function logOnExit (exit_code: number, reason: any) {
     if (exit_code !== 0 && reason) {
         // ExitStatus usually is not real JS error and so stack strace is not very useful.
         // We will use debug level for it, which will print only when diagnosticTracing is set.
@@ -246,8 +242,7 @@ function logOnExit(exit_code: number, reason: any) {
             : mono_log_error;
         if (typeof reason == "string") {
             mono_log(reason);
-        }
-        else {
+        } else {
             if (reason.stack === undefined) {
                 reason.stack = new Error().stack + "";
             }
@@ -256,8 +251,7 @@ function logOnExit(exit_code: number, reason: any) {
                     ? runtimeHelpers.stringify_as_error_with_stack(reason.message + "\n" + reason.stack)
                     : reason.message + "\n" + reason.stack;
                 mono_log(message);
-            }
-            else {
+            } else {
                 mono_log(JSON.stringify(reason));
             }
         }
@@ -269,21 +263,20 @@ function logOnExit(exit_code: number, reason: any) {
             } else {
                 mono_log_info_no_prefix("WASM EXIT " + exit_code);
             }
-        }
-        else if (loaderHelpers.config.forwardConsoleLogsToWS) {
+        } else if (loaderHelpers.config.forwardConsoleLogsToWS) {
             teardown_proxy_console();
         }
     }
 }
-function unhandledrejection_handler(event: any) {
+function unhandledrejection_handler (event: any) {
     fatal_handler(event, event.reason, "rejection");
 }
 
-function error_handler(event: any) {
+function error_handler (event: any) {
     fatal_handler(event, event.error, "error");
 }
 
-function fatal_handler(event: any, reason: any, type: string) {
+function fatal_handler (event: any, reason: any, type: string) {
     event.preventDefault();
     try {
         if (!reason) {
diff --git a/src/mono/browser/runtime/loader/globals.ts b/src/mono/browser/runtime/loader/globals.ts
index 1b77c5510c3c..5e76904d5525 100644
--- a/src/mono/browser/runtime/loader/globals.ts
+++ b/src/mono/browser/runtime/loader/globals.ts
@@ -15,7 +15,7 @@ import { assertIsControllablePromise, createPromiseController, getPromiseControl
 import { mono_download_assets, resolve_single_asset_path, retrieve_asset_download } from "./assets";
 import { mono_log_error, set_thread_prefix, setup_proxy_console } from "./logging";
 import { invokeLibraryInitializers } from "./libraryInitializers";
-import { deep_merge_config, hasDebuggingEnabled } from "./config";
+import { deep_merge_config, isDebuggingSupported } from "./config";
 import { logDownloadStatsToConsole, purgeUnusedCacheEntriesAsync } from "./assetsCache";
 
 // if we are the first script loaded in the web worker, we are expected to become the sidecar
@@ -53,7 +53,7 @@ export const globalObjectsRoot: GlobalObjects = {
 
 setLoaderGlobals(globalObjectsRoot);
 
-export function setLoaderGlobals(
+export function setLoaderGlobals (
     globalObjects: GlobalObjects,
 ) {
     if (_loaderModuleLoaded) {
@@ -74,11 +74,14 @@ export function setLoaderGlobals(
     });
     const rh: Partial<RuntimeHelpers> = {
         mono_wasm_bindings_is_ready: false,
-        javaScriptExports: {} as any,
         config: globalObjects.module.config,
         diagnosticTracing: false,
-        nativeAbort: (reason: any) => { throw reason; },
-        nativeExit: (code: number) => { throw new Error("exit:" + code); }
+        nativeAbort: (reason: any) => {
+            throw reason || new Error("abort");
+        },
+        nativeExit: (code: number) => {
+            throw new Error("exit:" + code);
+        }
     };
     const lh: Partial<LoaderHelpers> = {
         gitHash,
@@ -87,12 +90,13 @@ export function setLoaderGlobals(
 
         maxParallelDownloads: 16,
         enableDownloadRetry: true,
-        assertAfterExit: !ENVIRONMENT_IS_WEB,
 
         _loaded_files: [],
         loadedFiles: [],
         loadedAssemblies: [],
         libraryInitializers: [],
+        loadingWorkers: [],
+        workerNextNumber: 1,
         actual_downloaded_assets_count: 0,
         actual_instantiated_assets_count: 0,
         expected_downloaded_assets_count: 0,
@@ -118,9 +122,9 @@ export function setLoaderGlobals(
         purgeUnusedCacheEntriesAsync,
         installUnhandledErrorHandler,
 
-        hasDebuggingEnabled,
         retrieve_asset_download,
         invokeLibraryInitializers,
+        isDebuggingSupported,
 
         // from wasm-feature-detect npm package
         exceptions,
@@ -133,7 +137,7 @@ export function setLoaderGlobals(
 // this will abort the program if the condition is false
 // see src\mono\browser\runtime\rollup.config.js
 // we inline the condition, because the lambda could allocate closure on hot path otherwise
-export function mono_assert(condition: unknown, messageFactory: string | (() => string)): asserts condition {
+export function mono_assert (condition: unknown, messageFactory: string | (() => string)): asserts condition {
     if (condition) return;
     const message = "Assert failed: " + (typeof messageFactory === "function"
         ? messageFactory()
@@ -141,4 +145,4 @@ export function mono_assert(condition: unknown, messageFactory: string | (() =>
     const error = new Error(message);
     mono_log_error(message, error);
     runtimeHelpers.nativeAbort(error);
-}
\ No newline at end of file
+}
diff --git a/src/mono/browser/runtime/loader/icu.ts b/src/mono/browser/runtime/loader/icu.ts
index 46a75726702e..0a7823497aa8 100644
--- a/src/mono/browser/runtime/loader/icu.ts
+++ b/src/mono/browser/runtime/loader/icu.ts
@@ -5,16 +5,16 @@ import { GlobalizationMode, MonoConfig } from "../types";
 import { ENVIRONMENT_IS_WEB, loaderHelpers } from "./globals";
 import { mono_log_info, mono_log_debug } from "./logging";
 
-export function init_globalization() {
+export function init_globalization () {
     loaderHelpers.preferredIcuAsset = getIcuResourceName(loaderHelpers.config);
-    loaderHelpers.invariantMode = loaderHelpers.config.globalizationMode == GlobalizationMode.Invariant;
+    let invariantMode = loaderHelpers.config.globalizationMode == GlobalizationMode.Invariant;
 
-    if (!loaderHelpers.invariantMode) {
+    if (!invariantMode) {
         if (loaderHelpers.preferredIcuAsset) {
             mono_log_debug("ICU data archive(s) available, disabling invariant mode");
         } else if (loaderHelpers.config.globalizationMode !== GlobalizationMode.Custom && loaderHelpers.config.globalizationMode !== GlobalizationMode.All && loaderHelpers.config.globalizationMode !== GlobalizationMode.Sharded) {
             mono_log_debug("ICU data archive(s) not available, using invariant globalization mode");
-            loaderHelpers.invariantMode = true;
+            invariantMode = true;
             loaderHelpers.preferredIcuAsset = null;
         } else {
             const msg = "invariant globalization mode is inactive and no ICU data archives are available";
@@ -28,8 +28,7 @@ export function init_globalization() {
     const env_variables = loaderHelpers.config.environmentVariables!;
     if (env_variables[hybridEnv] === undefined && loaderHelpers.config.globalizationMode === GlobalizationMode.Hybrid) {
         env_variables[hybridEnv] = "1";
-    }
-    else if (env_variables[invariantEnv] === undefined && loaderHelpers.invariantMode) {
+    } else if (env_variables[invariantEnv] === undefined && invariantMode) {
         env_variables[invariantEnv] = "1";
     }
     if (env_variables["TZ"] === undefined) {
@@ -45,7 +44,7 @@ export function init_globalization() {
     }
 }
 
-export function getIcuResourceName(config: MonoConfig): string | null {
+export function getIcuResourceName (config: MonoConfig): string | null {
     if (config.resources?.icu && config.globalizationMode != GlobalizationMode.Invariant) {
         // TODO: when starting on sidecar, we should pass default culture from UI thread
         const culture = config.applicationCulture || (ENVIRONMENT_IS_WEB ? (globalThis.navigator && globalThis.navigator.languages && globalThis.navigator.languages[0]) : Intl.DateTimeFormat().resolvedOptions().locale);
@@ -74,7 +73,7 @@ export function getIcuResourceName(config: MonoConfig): string | null {
     return null;
 }
 
-function getShardedIcuResourceName(culture: string): string {
+function getShardedIcuResourceName (culture: string): string {
     const prefix = culture.split("-")[0];
     if (prefix === "en" || ["fr", "fr-FR", "it", "it-IT", "de", "de-DE", "es", "es-ES"].includes(culture)) {
         return "icudt_EFIGS.dat";
diff --git a/src/mono/browser/runtime/loader/libraryInitializers.ts b/src/mono/browser/runtime/loader/libraryInitializers.ts
index 216c7fe7180e..a8a16c543dbc 100644
--- a/src/mono/browser/runtime/loader/libraryInitializers.ts
+++ b/src/mono/browser/runtime/loader/libraryInitializers.ts
@@ -7,7 +7,7 @@ import { loaderHelpers } from "./globals";
 import { mono_exit } from "./exit";
 import { ResourceList } from "../types";
 
-export async function importLibraryInitializers(libraryInitializers: ResourceList | undefined): Promise<void> {
+export async function importLibraryInitializers (libraryInitializers: ResourceList | undefined): Promise<void> {
     if (!libraryInitializers) {
         return;
     }
@@ -15,7 +15,7 @@ export async function importLibraryInitializers(libraryInitializers: ResourceLis
     const initializerFiles = Object.keys(libraryInitializers);
     await Promise.all(initializerFiles.map(f => importInitializer(f)));
 
-    async function importInitializer(path: string): Promise<void> {
+    async function importInitializer (path: string): Promise<void> {
         try {
             const adjustedPath = appendUniqueQuery(loaderHelpers.locateFile(path), "js-module-library-initializer");
             mono_log_debug(`Attempting to import '${adjustedPath}' for ${path}`);
@@ -28,7 +28,7 @@ export async function importLibraryInitializers(libraryInitializers: ResourceLis
     }
 }
 
-export async function invokeLibraryInitializers(functionName: string, args: any[]) {
+export async function invokeLibraryInitializers (functionName: string, args: any[]) {
     if (!loaderHelpers.libraryInitializers) {
         return;
     }
@@ -44,7 +44,7 @@ export async function invokeLibraryInitializers(functionName: string, args: any[
     await Promise.all(promises);
 }
 
-async function abortStartupOnError(scriptName: string, methodName: string, callback: () => Promise<void> | undefined): Promise<void> {
+async function abortStartupOnError (scriptName: string, methodName: string, callback: () => Promise<void> | undefined): Promise<void> {
     try {
         await callback();
     } catch (err) {
@@ -52,4 +52,4 @@ async function abortStartupOnError(scriptName: string, methodName: string, callb
         mono_exit(1, err);
         throw err;
     }
-}
\ No newline at end of file
+}
diff --git a/src/mono/browser/runtime/loader/logging.ts b/src/mono/browser/runtime/loader/logging.ts
index 723e55201fcc..668d7e667b6d 100644
--- a/src/mono/browser/runtime/loader/logging.ts
+++ b/src/mono/browser/runtime/loader/logging.ts
@@ -14,29 +14,29 @@ let theConsoleApi: any;
 let originalConsoleMethods: any;
 let threadNamePrefix: string;
 
-export function set_thread_prefix(threadPrefix: string) {
+export function set_thread_prefix (threadPrefix: string) {
     threadNamePrefix = threadPrefix;
 }
 
-export function mono_log_debug(msg: string, ...data: any[]) {
+export function mono_log_debug (msg: string, ...data: any[]) {
     if (loaderHelpers.diagnosticTracing) {
         console.debug(prefix + msg, ...data);
     }
 }
 
-export function mono_log_info(msg: string, ...data: any) {
+export function mono_log_info (msg: string, ...data: any) {
     console.info(prefix + msg, ...data);
 }
 
-export function mono_log_info_no_prefix(msg: string, ...data: any) {
+export function mono_log_info_no_prefix (msg: string, ...data: any) {
     console.info(msg, ...data);
 }
 
-export function mono_log_warn(msg: string, ...data: any) {
+export function mono_log_warn (msg: string, ...data: any) {
     console.warn(prefix + msg, ...data);
 }
 
-export function mono_log_error(msg: string, ...data: any) {
+export function mono_log_error (msg: string, ...data: any) {
     if (data && data.length > 0 && data[0] && typeof data[0] === "object") {
         // don't log silent errors
         if (data[0].silent) {
@@ -54,7 +54,7 @@ export function mono_log_error(msg: string, ...data: any) {
 }
 let tick = "";
 let last = new Date().valueOf();
-function proxyConsoleMethod(prefix: string, func: any, asJson: boolean) {
+function proxyConsoleMethod (prefix: string, func: any, asJson: boolean) {
     return function (...args: any[]) {
         try {
             let payload = args[0];
@@ -101,7 +101,7 @@ function proxyConsoleMethod(prefix: string, func: any, asJson: boolean) {
     };
 }
 
-export function setup_proxy_console(id: string, console: Console, origin: string): void {
+export function setup_proxy_console (id: string, console: Console, origin: string): void {
     theConsoleApi = console as any;
     threadNamePrefix = id;
     originalConsoleMethods = {
@@ -117,14 +117,13 @@ export function setup_proxy_console(id: string, console: Console, origin: string
     setupWS();
 }
 
-export function teardown_proxy_console(message?: string) {
+export function teardown_proxy_console (message?: string) {
     const stop_when_ws_buffer_empty = () => {
         if (!consoleWebSocket) {
             if (message && originalConsoleMethods) {
                 originalConsoleMethods.log(message);
             }
-        }
-        else if (consoleWebSocket.bufferedAmount == 0) {
+        } else if (consoleWebSocket.bufferedAmount == 0) {
             if (message) {
                 // tell xharness WasmTestMessagesProcessor we are done.
                 // note this sends last few bytes into the same WS
@@ -136,38 +135,36 @@ export function teardown_proxy_console(message?: string) {
             consoleWebSocket.removeEventListener("close", logWSClose);
             consoleWebSocket.close(1000, message);
             (consoleWebSocket as any) = undefined;
-        }
-        else {
+        } else {
             globalThis.setTimeout(stop_when_ws_buffer_empty, 100);
         }
     };
     stop_when_ws_buffer_empty();
 }
 
-function send(msg: string) {
+function send (msg: string) {
     if (consoleWebSocket && consoleWebSocket.readyState === WebSocket.OPEN) {
         consoleWebSocket.send(msg);
-    }
-    else {
+    } else {
         originalConsoleMethods.log(msg);
     }
 }
 
-function logWSError(event: Event) {
+function logWSError (event: Event) {
     originalConsoleMethods.error(`[${threadNamePrefix}] proxy console websocket error: ${event}`, event);
 }
 
-function logWSClose(event: Event) {
+function logWSClose (event: Event) {
     originalConsoleMethods.debug(`[${threadNamePrefix}] proxy console websocket closed: ${event}`, event);
 }
 
-function setupWS() {
+function setupWS () {
     for (const m of methods) {
         theConsoleApi[m] = proxyConsoleMethod(`console.${m}`, send, true);
     }
 }
 
-function setupOriginal() {
+function setupOriginal () {
     for (const m of methods) {
         theConsoleApi[m] = proxyConsoleMethod(`console.${m}`, originalConsoleMethods.log, false);
     }
diff --git a/src/mono/browser/runtime/loader/polyfills.ts b/src/mono/browser/runtime/loader/polyfills.ts
index e60fb2111b8f..181323b0fe0a 100644
--- a/src/mono/browser/runtime/loader/polyfills.ts
+++ b/src/mono/browser/runtime/loader/polyfills.ts
@@ -10,15 +10,15 @@ let node_fs: any | undefined = undefined;
 let node_url: any | undefined = undefined;
 const URLPolyfill = class URL {
     private url;
-    constructor(url: string) {
+    constructor (url: string) {
         this.url = url;
     }
-    toString() {
+    toString () {
         return this.url;
     }
 };
 
-export function verifyEnvironment() {
+export function verifyEnvironment () {
     mono_assert(ENVIRONMENT_IS_SHELL || typeof globalThis.URL === "function", "This browser/engine doesn't support URL API. Please use a modern version. See also https://aka.ms/dotnet-wasm-features");
     mono_assert(typeof globalThis.BigInt64Array === "function", "This browser/engine doesn't support BigInt64Array API. Please use a modern version. See also https://aka.ms/dotnet-wasm-features");
     if (WasmEnableThreads) {
@@ -28,7 +28,7 @@ export function verifyEnvironment() {
     }
 }
 
-export async function detect_features_and_polyfill(module: DotnetModuleInternal): Promise<void> {
+export async function detect_features_and_polyfill (module: DotnetModuleInternal): Promise<void> {
     if (ENVIRONMENT_IS_NODE) {
         // eslint-disable-next-line @typescript-eslint/ban-ts-comment
         // @ts-ignore:
@@ -39,7 +39,7 @@ export async function detect_features_and_polyfill(module: DotnetModuleInternal)
         }
     }
 
-    const scriptUrlQuery =/*! webpackIgnore: true */import.meta.url;
+    const scriptUrlQuery = /*! webpackIgnore: true */import.meta.url;
     const queryIndex = scriptUrlQuery.indexOf("?");
     if (queryIndex > 0) {
         loaderHelpers.modulesUniqueQuery = scriptUrlQuery.substring(queryIndex);
@@ -66,8 +66,7 @@ export async function detect_features_and_polyfill(module: DotnetModuleInternal)
         const brands = navigator.userAgentData && navigator.userAgentData.brands;
         if (brands && brands.length > 0) {
             loaderHelpers.isChromium = brands.some((b: any) => b.brand === "Google Chrome" || b.brand === "Microsoft Edge" || b.brand === "Chromium");
-        }
-        else if (navigator.userAgent) {
+        } else if (navigator.userAgent) {
             loaderHelpers.isChromium = navigator.userAgent.includes("Chrome");
             loaderHelpers.isFirefox = navigator.userAgent.includes("Firefox");
         }
@@ -78,7 +77,9 @@ export async function detect_features_and_polyfill(module: DotnetModuleInternal)
         // @ts-ignore:
         INTERNAL.require = await import(/*! webpackIgnore: true */"module").then(mod => mod.createRequire(/*! webpackIgnore: true */import.meta.url));
     } else {
-        INTERNAL.require = Promise.resolve(() => { throw new Error("require not supported"); });
+        INTERNAL.require = Promise.resolve(() => {
+            throw new Error("require not supported");
+        });
     }
 
     if (typeof globalThis.URL === "undefined") {
@@ -86,7 +87,7 @@ export async function detect_features_and_polyfill(module: DotnetModuleInternal)
     }
 }
 
-export async function fetch_like(url: string, init?: RequestInit): Promise<Response> {
+export async function fetch_like (url: string, init?: RequestInit): Promise<Response> {
     try {
         // this need to be detected only after we import node modules in onConfigLoaded
         const hasFetch = typeof (globalThis.fetch) === "function";
@@ -113,13 +114,13 @@ export async function fetch_like(url: string, init?: RequestInit): Promise<Respo
                 url,
                 arrayBuffer: () => arrayBuffer,
                 json: () => JSON.parse(arrayBuffer),
-                text: () => { throw new Error("NotImplementedException"); }
+                text: () => {
+                    throw new Error("NotImplementedException");
+                }
             };
-        }
-        else if (hasFetch) {
+        } else if (hasFetch) {
             return globalThis.fetch(url, init || { credentials: "same-origin" });
-        }
-        else if (typeof (read) === "function") {
+        } else if (typeof (read) === "function") {
             // note that it can't open files with unicode names, like Stra<unicode char - Latin Small Letter Sharp S>e.xml
             // https://bugs.chromium.org/p/v8/issues/detail?id=12541
             return <Response><any>{
@@ -138,8 +139,7 @@ export async function fetch_like(url: string, init?: RequestInit): Promise<Respo
                 text: () => read(url, "utf8")
             };
         }
-    }
-    catch (e: any) {
+    } catch (e: any) {
         return <Response><any>{
             ok: false,
             url,
@@ -149,19 +149,25 @@ export async function fetch_like(url: string, init?: RequestInit): Promise<Respo
                 get: () => null
             },
             statusText: "ERR28: " + e,
-            arrayBuffer: () => { throw e; },
-            json: () => { throw e; },
-            text: () => { throw e; }
+            arrayBuffer: () => {
+                throw e;
+            },
+            json: () => {
+                throw e;
+            },
+            text: () => {
+                throw e;
+            }
         };
     }
     throw new Error("No fetch implementation available");
 }
 
-// context: the loadBootResource extension point can return URL/string which is unqualified. 
+// context: the loadBootResource extension point can return URL/string which is unqualified.
 // For example `xxx/a.js` and we have to make it absolute
 // For compatibility reasons, it's based of document.baseURI even for JS modules like `./xxx/a.js`, which normally use script directory of a caller of `import`
 // Script directory in general doesn't match document.baseURI
-export function makeURLAbsoluteWithApplicationBase(url: string) {
+export function makeURLAbsoluteWithApplicationBase (url: string) {
     mono_assert(typeof url === "string", "url must be a string");
     if (!isPathAbsolute(url) && url.indexOf("./") !== 0 && url.indexOf("../") !== 0 && globalThis.URL && globalThis.document && globalThis.document.baseURI) {
         url = (new URL(url, globalThis.document.baseURI)).toString();
@@ -169,19 +175,19 @@ export function makeURLAbsoluteWithApplicationBase(url: string) {
     return url;
 }
 
-function normalizeFileUrl(filename: string) {
+function normalizeFileUrl (filename: string) {
     // unix vs windows
     // remove query string
     return filename.replace(/\\/g, "/").replace(/[?#].*/, "");
 }
 
-function normalizeDirectoryUrl(dir: string) {
+function normalizeDirectoryUrl (dir: string) {
     return dir.slice(0, dir.lastIndexOf("/")) + "/";
 }
 
 const protocolRx = /^[a-zA-Z][a-zA-Z\d+\-.]*?:\/\//;
 const windowsAbsoluteRx = /[a-zA-Z]:[\\/]/;
-function isPathAbsolute(path: string): boolean {
+function isPathAbsolute (path: string): boolean {
     if (ENVIRONMENT_IS_NODE || ENVIRONMENT_IS_SHELL) {
         // unix /x.json
         // windows \x.json
diff --git a/src/mono/browser/runtime/loader/promise-controller.ts b/src/mono/browser/runtime/loader/promise-controller.ts
index b27c8216f4fc..64917aa385ab 100644
--- a/src/mono/browser/runtime/loader/promise-controller.ts
+++ b/src/mono/browser/runtime/loader/promise-controller.ts
@@ -9,7 +9,7 @@ export const promise_control_symbol = Symbol.for("wasm promise_control");
 
 /// Creates a new promise together with a controller that can be used to resolve or reject that promise.
 /// Optionally takes callbacks to be called immediately after a promise is resolved or rejected.
-export function createPromiseController<T>(afterResolve?: () => void, afterReject?: () => void): PromiseAndController<T> {
+export function createPromiseController<T> (afterResolve?: () => void, afterReject?: () => void): PromiseAndController<T> {
     let promise_control: PromiseController<T> = null as unknown as PromiseController<T>;
     const promise = new Promise<T>(function (resolve, reject) {
         promise_control = {
@@ -42,14 +42,14 @@ export function createPromiseController<T>(afterResolve?: () => void, afterRejec
 }
 
 export function getPromiseController<T>(promise: ControllablePromise<T>): PromiseController<T>;
-export function getPromiseController<T>(promise: Promise<T>): PromiseController<T> | undefined {
+export function getPromiseController<T> (promise: Promise<T>): PromiseController<T> | undefined {
     return (promise as any)[promise_control_symbol];
 }
 
-export function isControllablePromise<T>(promise: Promise<T>): promise is ControllablePromise<T> {
+export function isControllablePromise<T> (promise: Promise<T>): promise is ControllablePromise<T> {
     return (promise as any)[promise_control_symbol] !== undefined;
 }
 
-export function assertIsControllablePromise<T>(promise: Promise<T>): asserts promise is ControllablePromise<T> {
+export function assertIsControllablePromise<T> (promise: Promise<T>): asserts promise is ControllablePromise<T> {
     mono_assert(promise && isControllablePromise(promise), "Promise is not controllable");
 }
diff --git a/src/mono/browser/runtime/loader/run.ts b/src/mono/browser/runtime/loader/run.ts
index 3ab8e0e8a8ba..7aeb75d260cb 100644
--- a/src/mono/browser/runtime/loader/run.ts
+++ b/src/mono/browser/runtime/loader/run.ts
@@ -11,7 +11,7 @@ import { ENVIRONMENT_IS_WEB, ENVIRONMENT_IS_WORKER, emscriptenModule, exportedRu
 import { deep_merge_config, deep_merge_module, mono_wasm_load_config } from "./config";
 import { installUnhandledErrorHandler, mono_exit, registerEmscriptenExitHandlers } from "./exit";
 import { setup_proxy_console, mono_log_info, mono_log_debug } from "./logging";
-import { mono_download_assets, prepareAssets, prepareAssetsWorker, resolve_single_asset_path, streamingCompileWasm } from "./assets";
+import { mono_download_assets, preloadWorkers, prepareAssets, prepareAssetsWorker, resolve_single_asset_path, streamingCompileWasm } from "./assets";
 import { detect_features_and_polyfill } from "./polyfills";
 import { runtimeHelpers, loaderHelpers } from "./globals";
 import { init_globalization } from "./icu";
@@ -24,7 +24,7 @@ export class HostBuilder implements DotnetHostBuilder {
     private instance?: RuntimeAPI;
 
     // internal
-    withModuleConfig(moduleConfig: DotnetModuleConfig): DotnetHostBuilder {
+    withModuleConfig (moduleConfig: DotnetModuleConfig): DotnetHostBuilder {
         try {
             deep_merge_module(emscriptenModule, moduleConfig);
             return this;
@@ -35,7 +35,7 @@ export class HostBuilder implements DotnetHostBuilder {
     }
 
     // internal
-    withOnConfigLoaded(onConfigLoaded: (config: MonoConfig) => void | Promise<void>): DotnetHostBuilder {
+    withOnConfigLoaded (onConfigLoaded: (config: MonoConfig) => void | Promise<void>): DotnetHostBuilder {
         try {
             deep_merge_module(emscriptenModule, {
                 onConfigLoaded
@@ -48,7 +48,7 @@ export class HostBuilder implements DotnetHostBuilder {
     }
 
     // internal
-    withConsoleForwarding(): DotnetHostBuilder {
+    withConsoleForwarding (): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, {
                 forwardConsoleLogsToWS: true
@@ -61,7 +61,7 @@ export class HostBuilder implements DotnetHostBuilder {
     }
 
     // internal
-    withExitOnUnhandledError(): DotnetHostBuilder {
+    withExitOnUnhandledError (): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, {
                 exitOnUnhandledError: true
@@ -75,7 +75,7 @@ export class HostBuilder implements DotnetHostBuilder {
     }
 
     // internal
-    withAsyncFlushOnExit(): DotnetHostBuilder {
+    withAsyncFlushOnExit (): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, {
                 asyncFlushOnExit: true
@@ -88,7 +88,7 @@ export class HostBuilder implements DotnetHostBuilder {
     }
 
     // internal
-    withExitCodeLogging(): DotnetHostBuilder {
+    withExitCodeLogging (): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, {
                 logExitCode: true
@@ -101,7 +101,7 @@ export class HostBuilder implements DotnetHostBuilder {
     }
 
     // internal
-    withElementOnExit(): DotnetHostBuilder {
+    withElementOnExit (): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, {
                 appendElementOnExit: true
@@ -114,7 +114,7 @@ export class HostBuilder implements DotnetHostBuilder {
     }
 
     // internal
-    withInteropCleanupOnExit(): DotnetHostBuilder {
+    withInteropCleanupOnExit (): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, {
                 interopCleanupOnExit: true
@@ -127,7 +127,7 @@ export class HostBuilder implements DotnetHostBuilder {
     }
 
     // internal
-    withDumpThreadsOnNonZeroExit(): DotnetHostBuilder {
+    withDumpThreadsOnNonZeroExit (): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, {
                 dumpThreadsOnNonZeroExit: true
@@ -139,22 +139,9 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    // internal
-    withAssertAfterExit(): DotnetHostBuilder {
-        try {
-            deep_merge_config(monoConfig, {
-                assertAfterExit: true
-            });
-            return this;
-        } catch (err) {
-            mono_exit(1, err);
-            throw err;
-        }
-    }
-
     // internal
     //  todo fallback later by debugLevel
-    withWaitingForDebugger(level: number): DotnetHostBuilder {
+    withWaitingForDebugger (level: number): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, {
                 waitForDebugger: level
@@ -166,7 +153,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withInterpreterPgo(value: boolean, autoSaveDelay?: number): DotnetHostBuilder {
+    withInterpreterPgo (value: boolean, autoSaveDelay?: number): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, {
                 interpreterPgo: value,
@@ -179,7 +166,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withConfig(config: MonoConfig): DotnetHostBuilder {
+    withConfig (config: MonoConfig): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, config);
             return this;
@@ -189,7 +176,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withConfigSrc(configSrc: string): DotnetHostBuilder {
+    withConfigSrc (configSrc: string): DotnetHostBuilder {
         try {
             mono_assert(configSrc && typeof configSrc === "string", "must be file path or URL");
             deep_merge_module(emscriptenModule, { configSrc });
@@ -200,7 +187,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withVirtualWorkingDirectory(vfsPath: string): DotnetHostBuilder {
+    withVirtualWorkingDirectory (vfsPath: string): DotnetHostBuilder {
         try {
             mono_assert(vfsPath && typeof vfsPath === "string", "must be directory path");
             deep_merge_config(monoConfig, {
@@ -213,7 +200,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withEnvironmentVariable(name: string, value: string): DotnetHostBuilder {
+    withEnvironmentVariable (name: string, value: string): DotnetHostBuilder {
         try {
             const environmentVariables: { [key: string]: string } = {};
             environmentVariables[name] = value;
@@ -227,7 +214,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withEnvironmentVariables(variables: { [i: string]: string; }): DotnetHostBuilder {
+    withEnvironmentVariables (variables: { [i: string]: string; }): DotnetHostBuilder {
         try {
             mono_assert(variables && typeof variables === "object", "must be dictionary object");
             deep_merge_config(monoConfig, {
@@ -240,7 +227,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withDiagnosticTracing(enabled: boolean): DotnetHostBuilder {
+    withDiagnosticTracing (enabled: boolean): DotnetHostBuilder {
         try {
             mono_assert(typeof enabled === "boolean", "must be boolean");
             deep_merge_config(monoConfig, {
@@ -253,7 +240,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withDebugging(level: number): DotnetHostBuilder {
+    withDebugging (level: number): DotnetHostBuilder {
         try {
             mono_assert(level !== undefined && level !== null && typeof level === "number", "must be number");
             deep_merge_config(monoConfig, {
@@ -266,7 +253,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withApplicationArguments(...args: string[]): DotnetHostBuilder {
+    withApplicationArguments (...args: string[]): DotnetHostBuilder {
         try {
             mono_assert(args && Array.isArray(args), "must be array of strings");
             deep_merge_config(monoConfig, {
@@ -279,7 +266,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withRuntimeOptions(runtimeOptions: string[]): DotnetHostBuilder {
+    withRuntimeOptions (runtimeOptions: string[]): DotnetHostBuilder {
         try {
             mono_assert(runtimeOptions && Array.isArray(runtimeOptions), "must be array of strings");
             deep_merge_config(monoConfig, {
@@ -292,7 +279,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withMainAssembly(mainAssemblyName: string): DotnetHostBuilder {
+    withMainAssembly (mainAssemblyName: string): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, {
                 mainAssemblyName
@@ -304,7 +291,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withApplicationArgumentsFromQuery(): DotnetHostBuilder {
+    withApplicationArgumentsFromQuery (): DotnetHostBuilder {
         try {
             if (!globalThis.window) {
                 throw new Error("Missing window to the query parameters from");
@@ -323,7 +310,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withApplicationEnvironment(applicationEnvironment?: string): DotnetHostBuilder {
+    withApplicationEnvironment (applicationEnvironment?: string): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, {
                 applicationEnvironment,
@@ -335,7 +322,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withApplicationCulture(applicationCulture?: string): DotnetHostBuilder {
+    withApplicationCulture (applicationCulture?: string): DotnetHostBuilder {
         try {
             deep_merge_config(monoConfig, {
                 applicationCulture,
@@ -347,7 +334,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    withResourceLoader(loadBootResource?: LoadBootResourceCallback): DotnetHostBuilder {
+    withResourceLoader (loadBootResource?: LoadBootResourceCallback): DotnetHostBuilder {
         try {
             loaderHelpers.loadBootResource = loadBootResource;
             return this;
@@ -357,7 +344,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    async create(): Promise<RuntimeAPI> {
+    async create (): Promise<RuntimeAPI> {
         try {
             if (!this.instance) {
                 this.instance = await createApi();
@@ -369,7 +356,7 @@ export class HostBuilder implements DotnetHostBuilder {
         }
     }
 
-    async run(): Promise<number> {
+    async run (): Promise<number> {
         try {
             mono_assert(emscriptenModule.config, "Null moduleConfig.config");
             if (!this.instance) {
@@ -383,7 +370,7 @@ export class HostBuilder implements DotnetHostBuilder {
     }
 }
 
-export async function createApi(): Promise<RuntimeAPI> {
+export async function createApi (): Promise<RuntimeAPI> {
     if (ENVIRONMENT_IS_WEB && loaderHelpers.config.forwardConsoleLogsToWS && typeof globalThis.WebSocket != "undefined") {
         setup_proxy_console("main", globalThis.console, globalThis.location.origin);
     }
@@ -393,7 +380,7 @@ export async function createApi(): Promise<RuntimeAPI> {
     return globalObjectsRoot.api;
 }
 
-export async function createEmscripten(moduleFactory: DotnetModuleConfig | ((api: RuntimeAPI) => DotnetModuleConfig)): Promise<RuntimeAPI | EmscriptenModuleInternal> {
+export async function createEmscripten (moduleFactory: DotnetModuleConfig | ((api: RuntimeAPI) => DotnetModuleConfig)): Promise<RuntimeAPI | EmscriptenModuleInternal> {
     // extract ModuleConfig
     if (typeof moduleFactory === "function") {
         const extension = moduleFactory(globalObjectsRoot.api) as any;
@@ -402,11 +389,9 @@ export async function createEmscripten(moduleFactory: DotnetModuleConfig | ((api
         }
         Object.assign(emscriptenModule, extension);
         deep_merge_module(emscriptenModule, extension);
-    }
-    else if (typeof moduleFactory === "object") {
+    } else if (typeof moduleFactory === "object") {
         deep_merge_module(emscriptenModule, moduleFactory);
-    }
-    else {
+    } else {
         throw new Error("Can't use moduleFactory callback of createDotnetRuntime function.");
     }
 
@@ -424,7 +409,7 @@ export async function createEmscripten(moduleFactory: DotnetModuleConfig | ((api
 }
 
 // in the future we can use feature detection to load different flavors
-function importModules() {
+function importModules () {
     const jsModuleRuntimeAsset = resolve_single_asset_path("js-module-runtime");
     const jsModuleNativeAsset = resolve_single_asset_path("js-module-native");
 
@@ -448,7 +433,7 @@ function importModules() {
     return [jsModuleRuntimePromise, jsModuleNativePromise];
 }
 
-async function initializeModules(es6Modules: [RuntimeModuleExportsInternal, NativeModuleExportsInternal]) {
+async function initializeModules (es6Modules: [RuntimeModuleExportsInternal, NativeModuleExportsInternal]) {
     const { initializeExports, initializeReplacements, configureRuntimeStartup, configureEmscriptenStartup, configureWorkerStartup, setRuntimeGlobals, passEmscriptenInternals } = es6Modules[0];
     const { default: emscriptenFactory } = es6Modules[1];
     setRuntimeGlobals(globalObjectsRoot);
@@ -468,7 +453,7 @@ async function initializeModules(es6Modules: [RuntimeModuleExportsInternal, Nati
     });
 }
 
-async function createEmscriptenMain(): Promise<RuntimeAPI> {
+async function createEmscriptenMain (): Promise<RuntimeAPI> {
     if (NativeAOT && !loaderHelpers.config?.resources) {
         if (!loaderHelpers.config) {
             loaderHelpers.config = {};
@@ -504,9 +489,9 @@ async function createEmscriptenMain(): Promise<RuntimeAPI> {
     setTimeout(async () => {
         try {
             init_globalization();
+            preloadWorkers();
             await mono_download_assets();
-        }
-        catch (err) {
+        } catch (err) {
             mono_exit(1, err);
         }
     }, 0);
@@ -523,13 +508,22 @@ async function createEmscriptenMain(): Promise<RuntimeAPI> {
     return exportedRuntimeAPI;
 }
 
-async function createEmscriptenWorker(): Promise<EmscriptenModuleInternal> {
+async function createEmscriptenWorker (): Promise<EmscriptenModuleInternal> {
     setupPreloadChannelToMainThread();
 
     await loaderHelpers.afterConfigLoaded.promise;
 
     prepareAssetsWorker();
 
+    setTimeout(async () => {
+        try {
+            // load subset which is on JS heap rather than in WASM linear memory
+            await mono_download_assets();
+        } catch (err) {
+            mono_exit(1, err);
+        }
+    }, 0);
+
     const promises = importModules();
     const es6Modules = await Promise.all(promises);
     await initializeModules(es6Modules as any);
diff --git a/src/mono/browser/runtime/loader/worker.ts b/src/mono/browser/runtime/loader/worker.ts
index 81a9cecad6a7..d14d862c05aa 100644
--- a/src/mono/browser/runtime/loader/worker.ts
+++ b/src/mono/browser/runtime/loader/worker.ts
@@ -1,19 +1,20 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import { MonoConfigInternal, WorkerToMainMessageType, monoMessageSymbol } from "../types/internal";
+import { MonoConfigInternal, PThreadInfo, WorkerToMainMessageType, monoMessageSymbol } from "../types/internal";
 import { MonoConfig } from "../types";
 import { deep_merge_config, normalizeConfig } from "./config";
-import { ENVIRONMENT_IS_WEB, loaderHelpers } from "./globals";
+import { ENVIRONMENT_IS_WEB, loaderHelpers, runtimeHelpers } from "./globals";
 import { mono_log_debug } from "./logging";
 
-export function setupPreloadChannelToMainThread() {
+export function setupPreloadChannelToMainThread () {
     const channel = new MessageChannel();
     const workerPort = channel.port1;
     const mainPort = channel.port2;
     workerPort.addEventListener("message", (event) => {
         const config = JSON.parse(event.data.config) as MonoConfig;
-        onMonoConfigReceived(config);
+        const monoThreadInfo = JSON.parse(event.data.monoThreadInfo) as PThreadInfo;
+        onMonoConfigReceived(config, monoThreadInfo);
         workerPort.close();
         mainPort.close();
     }, { once: true });
@@ -30,13 +31,13 @@ export function setupPreloadChannelToMainThread() {
 let workerMonoConfigReceived = false;
 
 // called when the main thread sends us the mono config
-function onMonoConfigReceived(config: MonoConfigInternal): void {
+function onMonoConfigReceived (config: MonoConfigInternal, monoThreadInfo: PThreadInfo): void {
     if (workerMonoConfigReceived) {
         mono_log_debug("mono config already received");
         return;
     }
-
     deep_merge_config(loaderHelpers.config, config);
+    runtimeHelpers.monoThreadInfo = monoThreadInfo;
     normalizeConfig();
     mono_log_debug("mono config received");
     workerMonoConfigReceived = true;
diff --git a/src/mono/browser/runtime/logging.ts b/src/mono/browser/runtime/logging.ts
index 4e9229796138..1d91a870d52f 100644
--- a/src/mono/browser/runtime/logging.ts
+++ b/src/mono/browser/runtime/logging.ts
@@ -2,31 +2,31 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 /* eslint-disable no-console */
-import { INTERNAL, runtimeHelpers } from "./globals";
+import { INTERNAL, runtimeHelpers, mono_assert } from "./globals";
 import { utf8ToString } from "./strings";
 import { CharPtr, VoidPtr } from "./types/emscripten";
 
 let prefix = "MONO_WASM: ";
 
-export function set_thread_prefix(threadPrefix: string) {
+export function set_thread_prefix (threadPrefix: string) {
     prefix = `[${threadPrefix}] MONO_WASM: `;
 }
 
-export function mono_log_debug(msg: string, ...data: any) {
+export function mono_log_debug (msg: string, ...data: any) {
     if (runtimeHelpers.diagnosticTracing) {
         console.debug(prefix + msg, ...data);
     }
 }
 
-export function mono_log_info(msg: string, ...data: any) {
+export function mono_log_info (msg: string, ...data: any) {
     console.info(prefix + msg, ...data);
 }
 
-export function mono_log_warn(msg: string, ...data: any) {
+export function mono_log_warn (msg: string, ...data: any) {
     console.warn(prefix + msg, ...data);
 }
 
-export function mono_log_error(msg: string, ...data: any) {
+export function mono_log_error (msg: string, ...data: any) {
     if (data && data.length > 0 && data[0] && typeof data[0] === "object" && data[0].silent) {
         // don't log silent errors
         return;
@@ -35,6 +35,7 @@ export function mono_log_error(msg: string, ...data: any) {
 }
 
 export const wasm_func_map = new Map<number, string>();
+let wasm_pending_symbol_table: string | undefined;
 const regexes: any[] = [];
 
 // V8
@@ -52,8 +53,10 @@ regexes.push(/(?<replaceSection>[a-z]+:\/\/[^ )]*:wasm-function\[(?<funcNum>\d+)
 //# <?>.wasm-function[8962]
 regexes.push(/(?<replaceSection><[^ >]+>[.:]wasm-function\[(?<funcNum>[0-9]+)\])/);
 
-export function mono_wasm_symbolicate_string(message: string): string {
+export function mono_wasm_symbolicate_string (message: string): string {
     try {
+        performDeferredSymbolMapParsing();
+
         if (wasm_func_map.size == 0)
             return message;
 
@@ -89,12 +92,11 @@ export function mono_wasm_symbolicate_string(message: string): string {
     }
 }
 
-export function mono_wasm_stringify_as_error_with_stack(reason: any): string {
+export function mono_wasm_stringify_as_error_with_stack (reason: any): string {
     let stack: string;
     if (typeof reason === "string") {
         stack = reason;
-    }
-    else if (reason === undefined || reason === null || reason.stack === undefined) {
+    } else if (reason === undefined || reason === null || reason.stack === undefined) {
         stack = new Error().stack + "";
     } else {
         stack = reason.stack + "";
@@ -104,7 +106,7 @@ export function mono_wasm_stringify_as_error_with_stack(reason: any): string {
     return mono_wasm_symbolicate_string(stack);
 }
 
-export function mono_wasm_trace_logger(log_domain_ptr: CharPtr, log_level_ptr: CharPtr, message_ptr: CharPtr, fatal: number, user_data: VoidPtr): void {
+export function mono_wasm_trace_logger (log_domain_ptr: CharPtr, log_level_ptr: CharPtr, message_ptr: CharPtr, fatal: number, user_data: VoidPtr): void {
     const origMessage = utf8ToString(message_ptr);
     const isFatal = !!fatal;
     const domain = utf8ToString(log_domain_ptr);
@@ -142,23 +144,41 @@ export function mono_wasm_trace_logger(log_domain_ptr: CharPtr, log_level_ptr: C
 }
 
 
-export function parseSymbolMapFile(text: string) {
-    text.split(/[\r\n]/).forEach((line: string) => {
-        const parts: string[] = line.split(/:/);
-        if (parts.length < 2)
-            return;
+export function parseSymbolMapFile (text: string) {
+    // Symbol map parsing is very expensive, so doing it during startup is wasteful
+    //  instead, we defer it until the first time the symbol map is needed - which
+    //  may be never
+    mono_assert(!wasm_pending_symbol_table, "Another symbol map was already loaded");
+    wasm_pending_symbol_table = text;
+    mono_log_debug(`Deferred loading of ${text.length}ch symbol map`);
+}
 
-        parts[1] = parts.splice(1).join(":");
-        wasm_func_map.set(Number(parts[0]), parts[1]);
-    });
+function performDeferredSymbolMapParsing () {
+    if (!wasm_pending_symbol_table)
+        return;
 
-    mono_log_debug(`Loaded ${wasm_func_map.size} symbols`);
+    const text = wasm_pending_symbol_table!;
+    wasm_pending_symbol_table = undefined;
+    try {
+        text.split(/[\r\n]/).forEach((line: string) => {
+            const parts: string[] = line.split(/:/);
+            if (parts.length < 2)
+                return;
+
+            parts[1] = parts.splice(1).join(":");
+            wasm_func_map.set(Number(parts[0]), parts[1]);
+        });
+        mono_log_debug(`Loaded ${wasm_func_map.size} symbols`);
+    } catch (exc) {
+        mono_log_warn(`Failed to load symbol map: ${exc}`);
+    }
 }
 
-export function mono_wasm_get_func_id_to_name_mappings() {
+export function mono_wasm_get_func_id_to_name_mappings () {
+    performDeferredSymbolMapParsing();
     return [...wasm_func_map.values()];
 }
 
-export function mono_wasm_console_clear() {
+export function mono_wasm_console_clear () {
     console.clear();
-}
\ No newline at end of file
+}
diff --git a/src/mono/browser/runtime/managed-exports.ts b/src/mono/browser/runtime/managed-exports.ts
index 2e39a7aed6da..16232e086c54 100644
--- a/src/mono/browser/runtime/managed-exports.ts
+++ b/src/mono/browser/runtime/managed-exports.ts
@@ -5,207 +5,411 @@ import NativeAOT from "consts:nativeAOT";
 
 import WasmEnableThreads from "consts:wasmEnableThreads";
 
-import { GCHandle, MarshalerToCs, MarshalerToJs, MarshalerType, MonoMethod } from "./types/internal";
-import cwraps from "./cwraps";
+import { GCHandle, GCHandleNull, JSMarshalerArguments, JSThreadBlockingMode, MarshalerToCs, MarshalerToJs, MarshalerType, MonoMethod, PThreadPtr } from "./types/internal";
+import cwraps, { threads_c_functions as twraps } from "./cwraps";
 import { runtimeHelpers, Module, loaderHelpers, mono_assert } from "./globals";
-import { alloc_stack_frame, get_arg, set_arg_type, set_gc_handle } from "./marshal";
-import { invoke_method_and_handle_exception, invoke_method_raw } from "./invoke-cs";
-import { marshal_array_to_cs, marshal_array_to_cs_impl, marshal_exception_to_cs, marshal_intptr_to_cs } from "./marshal-to-cs";
-import { marshal_int32_to_js, end_marshal_task_to_js, marshal_string_to_js, begin_marshal_task_to_js } from "./marshal-to-js";
-import { do_not_force_dispose } from "./gc-handles";
+import { JavaScriptMarshalerArgSize, alloc_stack_frame, get_arg, get_arg_gc_handle, is_args_exception, set_arg_i32, set_arg_intptr, set_arg_type, set_gc_handle, set_receiver_should_free } from "./marshal";
+import { marshal_array_to_cs, marshal_array_to_cs_impl, marshal_bool_to_cs, marshal_exception_to_cs, marshal_intptr_to_cs, marshal_string_to_cs } from "./marshal-to-cs";
+import { marshal_int32_to_js, end_marshal_task_to_js, marshal_string_to_js, begin_marshal_task_to_js, marshal_exception_to_js } from "./marshal-to-js";
+import { do_not_force_dispose, is_gcv_handle } from "./gc-handles";
+import { assert_c_interop, assert_js_interop } from "./invoke-js";
+import { monoThreadInfo, mono_wasm_main_thread_ptr } from "./pthreads";
+import { _zero_region, copyBytes } from "./memory";
+import { stringToUTF8Ptr } from "./strings";
+import { mono_log_debug } from "./logging";
 
-export function init_managed_exports(): void {
+const managedExports: ManagedExports = {} as any;
+
+export function init_managed_exports (): void {
     if (NativeAOT) {
         return;
     }
     const exports_fqn_asm = "System.Runtime.InteropServices.JavaScript";
+    // TODO https://github.com/dotnet/runtime/issues/98366
     runtimeHelpers.runtime_interop_module = cwraps.mono_wasm_assembly_load(exports_fqn_asm);
     if (!runtimeHelpers.runtime_interop_module)
         throw "Can't find bindings module assembly: " + exports_fqn_asm;
 
-    runtimeHelpers.runtime_interop_namespace = "System.Runtime.InteropServices.JavaScript";
+    runtimeHelpers.runtime_interop_namespace = exports_fqn_asm;
     runtimeHelpers.runtime_interop_exports_classname = "JavaScriptExports";
+    // TODO https://github.com/dotnet/runtime/issues/98366
     runtimeHelpers.runtime_interop_exports_class = cwraps.mono_wasm_assembly_find_class(runtimeHelpers.runtime_interop_module, runtimeHelpers.runtime_interop_namespace, runtimeHelpers.runtime_interop_exports_classname);
     if (!runtimeHelpers.runtime_interop_exports_class)
         throw "Can't find " + runtimeHelpers.runtime_interop_namespace + "." + runtimeHelpers.runtime_interop_exports_classname + " class";
 
-    const install_main_synchronization_context = WasmEnableThreads ? get_method("InstallMainSynchronizationContext") : undefined;
-    mono_assert(!WasmEnableThreads || install_main_synchronization_context, "Can't find InstallMainSynchronizationContext method");
-    const call_entry_point = get_method("CallEntrypoint");
-    mono_assert(call_entry_point, "Can't find CallEntrypoint method");
-    const release_js_owned_object_by_gc_handle_method = get_method("ReleaseJSOwnedObjectByGCHandle");
-    mono_assert(release_js_owned_object_by_gc_handle_method, "Can't find ReleaseJSOwnedObjectByGCHandle method");
-    const complete_task_method = get_method("CompleteTask");
-    mono_assert(complete_task_method, "Can't find CompleteTask method");
-    const call_delegate_method = get_method("CallDelegate");
-    mono_assert(call_delegate_method, "Can't find CallDelegate method");
-    const get_managed_stack_trace_method = get_method("GetManagedStackTrace");
-    mono_assert(get_managed_stack_trace_method, "Can't find GetManagedStackTrace method");
-    const load_satellite_assembly_method = get_method("LoadSatelliteAssembly");
-    mono_assert(load_satellite_assembly_method, "Can't find LoadSatelliteAssembly method");
-    const load_lazy_assembly_method = get_method("LoadLazyAssembly");
-    mono_assert(load_lazy_assembly_method, "Can't find LoadLazyAssembly method");
-
-    runtimeHelpers.javaScriptExports.call_entry_point = async (entry_point: MonoMethod, program_args?: string[]): Promise<number> => {
-        loaderHelpers.assert_runtime_running();
-        const sp = Module.stackSave();
-        try {
-            Module.runtimeKeepalivePush();
-            const args = alloc_stack_frame(4);
-            const res = get_arg(args, 1);
-            const arg1 = get_arg(args, 2);
-            const arg2 = get_arg(args, 3);
-            marshal_intptr_to_cs(arg1, entry_point);
-            if (program_args && program_args.length == 0) {
-                program_args = undefined;
-            }
-            marshal_array_to_cs_impl(arg2, program_args, MarshalerType.String);
+    managedExports.InstallMainSynchronizationContext = WasmEnableThreads ? get_method("InstallMainSynchronizationContext") : undefined;
+    managedExports.CallEntrypoint = get_method("CallEntrypoint");
+    managedExports.BindAssemblyExports = get_method("BindAssemblyExports");
+    managedExports.ReleaseJSOwnedObjectByGCHandle = get_method("ReleaseJSOwnedObjectByGCHandle");
+    managedExports.CompleteTask = get_method("CompleteTask");
+    managedExports.CallDelegate = get_method("CallDelegate");
+    managedExports.GetManagedStackTrace = get_method("GetManagedStackTrace");
+    managedExports.LoadSatelliteAssembly = get_method("LoadSatelliteAssembly");
+    managedExports.LoadLazyAssembly = get_method("LoadLazyAssembly");
+}
+
+// the marshaled signature is: Task<int>? CallEntrypoint(char* mainAssemblyName, string[] args)
+export function call_entry_point (main_assembly_name: string, program_args: string[] | undefined, waitForDebugger: boolean): Promise<number> {
+    loaderHelpers.assert_runtime_running();
+    const sp = Module.stackSave();
+    try {
+        const size = 5;
+        const args = alloc_stack_frame(size);
+        const res = get_arg(args, 1);
+        const arg1 = get_arg(args, 2);
+        const arg2 = get_arg(args, 3);
+        const arg3 = get_arg(args, 4);
+        const main_assembly_name_ptr = stringToUTF8Ptr(main_assembly_name);
+        marshal_intptr_to_cs(arg1, main_assembly_name_ptr);
+        marshal_array_to_cs_impl(arg2, program_args && !program_args.length ? undefined : program_args, MarshalerType.String);
+        marshal_bool_to_cs(arg3, waitForDebugger);
+
+        // because this is async, we could pre-allocate the promise
+        let promise = begin_marshal_task_to_js(res, MarshalerType.TaskPreCreated, marshal_int32_to_js);
+
+        invoke_async_jsexport(runtimeHelpers.managedThreadTID, managedExports.CallEntrypoint, args, size);
+
+        // in case the C# side returned synchronously
+        promise = end_marshal_task_to_js(args, marshal_int32_to_js, promise);
+
+        if (promise === null || promise === undefined) {
+            promise = Promise.resolve(0);
+        }
+        (promise as any)[do_not_force_dispose] = true; // prevent disposing the task in forceDisposeProxies()
 
-            // because this is async, we could pre-allocate the promise
-            let promise = begin_marshal_task_to_js(res, MarshalerType.TaskPreCreated, marshal_int32_to_js);
+        return promise;
+    } finally {
+        Module.stackRestore(sp); // synchronously
+    }
+}
 
-            // NOTE: at the moment this is synchronous call on the same thread and therefore we could marshal (null) result synchronously
-            invoke_method_and_handle_exception(call_entry_point, args);
+// the marshaled signature is: void LoadSatelliteAssembly(byte[] dll)
+export function load_satellite_assembly (dll: Uint8Array): void {
+    loaderHelpers.assert_runtime_running();
+    const sp = Module.stackSave();
+    try {
+        const size = 3;
+        const args = alloc_stack_frame(size);
+        const arg1 = get_arg(args, 2);
+        set_arg_type(arg1, MarshalerType.Array);
+        marshal_array_to_cs(arg1, dll, MarshalerType.Byte);
+        invoke_sync_jsexport(managedExports.LoadSatelliteAssembly, args);
+    } finally {
+        Module.stackRestore(sp);
+    }
+}
 
-            // in case the C# side returned synchronously
-            promise = end_marshal_task_to_js(args, marshal_int32_to_js, promise);
+// the marshaled signature is: void LoadLazyAssembly(byte[] dll, byte[] pdb)
+export function load_lazy_assembly (dll: Uint8Array, pdb: Uint8Array | null): void {
+    loaderHelpers.assert_runtime_running();
+    const sp = Module.stackSave();
+    try {
+        const size = 4;
+        const args = alloc_stack_frame(size);
+        const arg1 = get_arg(args, 2);
+        const arg2 = get_arg(args, 3);
+        set_arg_type(arg1, MarshalerType.Array);
+        set_arg_type(arg2, MarshalerType.Array);
+        marshal_array_to_cs(arg1, dll, MarshalerType.Byte);
+        marshal_array_to_cs(arg2, pdb, MarshalerType.Byte);
+        invoke_sync_jsexport(managedExports.LoadLazyAssembly, args);
+    } finally {
+        Module.stackRestore(sp);
+    }
+}
 
-            if (promise === null || promise === undefined) {
-                promise = Promise.resolve(0);
+// the marshaled signature is: void ReleaseJSOwnedObjectByGCHandle(GCHandle gcHandle)
+export function release_js_owned_object_by_gc_handle (gc_handle: GCHandle) {
+    mono_assert(gc_handle, "Must be valid gc_handle");
+    loaderHelpers.assert_runtime_running();
+    const sp = Module.stackSave();
+    try {
+        const size = 3;
+        const args = alloc_stack_frame(size);
+        const arg1 = get_arg(args, 2);
+        set_arg_type(arg1, MarshalerType.Object);
+        set_gc_handle(arg1, gc_handle);
+        if (!WasmEnableThreads || is_gcv_handle(gc_handle) || !monoThreadInfo.isUI) {
+            // this must stay synchronous for free_gcv_handle sake, to not use-after-free
+            // also on JSWebWorker, because the message could arrive after the worker is terminated and the GCHandle of JSProxyContext is already freed
+            invoke_sync_jsexport(managedExports.ReleaseJSOwnedObjectByGCHandle, args);
+        } else {
+            invoke_async_jsexport(runtimeHelpers.ioThreadTID, managedExports.ReleaseJSOwnedObjectByGCHandle, args, size);
+        }
+    } finally {
+        Module.stackRestore(sp);
+    }
+}
+
+// the marshaled signature is: void CompleteTask<T>(GCHandle holder, Exception? exceptionResult, T? result)
+export function complete_task (holder_gc_handle: GCHandle, error?: any, data?: any, res_converter?: MarshalerToCs) {
+    loaderHelpers.assert_runtime_running();
+    const sp = Module.stackSave();
+    try {
+        const size = 5;
+        const args = alloc_stack_frame(size);
+        const arg1 = get_arg(args, 2);
+        set_arg_type(arg1, MarshalerType.Object);
+        set_gc_handle(arg1, holder_gc_handle);
+        const arg2 = get_arg(args, 3);
+        if (error) {
+            marshal_exception_to_cs(arg2, error);
+        } else {
+            set_arg_type(arg2, MarshalerType.None);
+            const arg3 = get_arg(args, 4);
+            mono_assert(res_converter, "res_converter missing");
+            res_converter(arg3, data);
+        }
+        invoke_async_jsexport(runtimeHelpers.ioThreadTID, managedExports.CompleteTask, args, size);
+    } finally {
+        Module.stackRestore(sp);
+    }
+}
+
+// the marshaled signature is: TRes? CallDelegate<T1,T2,T3,TRes>(GCHandle callback, T1? arg1, T2? arg2, T3? arg3)
+export function call_delegate (callback_gc_handle: GCHandle, arg1_js: any, arg2_js: any, arg3_js: any, res_converter?: MarshalerToJs, arg1_converter?: MarshalerToCs, arg2_converter?: MarshalerToCs, arg3_converter?: MarshalerToCs) {
+    loaderHelpers.assert_runtime_running();
+    if (WasmEnableThreads) {
+        if (monoThreadInfo.isUI) {
+            if (runtimeHelpers.config.jsThreadBlockingMode == JSThreadBlockingMode.PreventSynchronousJSExport) {
+                throw new Error("Cannot call synchronous C# methods.");
+            } else if (runtimeHelpers.isPendingSynchronousCall) {
+                throw new Error("Cannot call synchronous C# method from inside a synchronous call to a JS method.");
             }
-            (promise as any)[do_not_force_dispose] = true; // prevent disposing the task in forceDisposeProxies()
-            return await promise;
-        } finally {
-            Module.runtimeKeepalivePop();// after await promise !
-            Module.stackRestore(sp);
-        }
-    };
-    runtimeHelpers.javaScriptExports.load_satellite_assembly = (dll: Uint8Array): void => {
-        const sp = Module.stackSave();
-        try {
-            const args = alloc_stack_frame(3);
-            const arg1 = get_arg(args, 2);
-            set_arg_type(arg1, MarshalerType.Array);
-            marshal_array_to_cs(arg1, dll, MarshalerType.Byte);
-            invoke_method_and_handle_exception(load_satellite_assembly_method, args);
-        } finally {
-            Module.stackRestore(sp);
-        }
-    };
-    runtimeHelpers.javaScriptExports.load_lazy_assembly = (dll: Uint8Array, pdb: Uint8Array | null): void => {
-        const sp = Module.stackSave();
-        try {
-            const args = alloc_stack_frame(4);
-            const arg1 = get_arg(args, 2);
+        }
+    }
+    const sp = Module.stackSave();
+    try {
+        const size = 6;
+        const args = alloc_stack_frame(size);
+
+        const arg1 = get_arg(args, 2);
+        set_arg_type(arg1, MarshalerType.Object);
+        set_gc_handle(arg1, callback_gc_handle);
+        // payload arg numbers are shifted by one, the real first is a gc handle of the callback
+
+        if (arg1_converter) {
             const arg2 = get_arg(args, 3);
-            set_arg_type(arg1, MarshalerType.Array);
-            set_arg_type(arg2, MarshalerType.Array);
-            marshal_array_to_cs(arg1, dll, MarshalerType.Byte);
-            marshal_array_to_cs(arg2, pdb, MarshalerType.Byte);
-            invoke_method_and_handle_exception(load_lazy_assembly_method, args);
-        } finally {
-            Module.stackRestore(sp);
-        }
-    };
-    runtimeHelpers.javaScriptExports.release_js_owned_object_by_gc_handle = (gc_handle: GCHandle) => {
-        mono_assert(gc_handle, "Must be valid gc_handle");
-        loaderHelpers.assert_runtime_running();
-        const sp = Module.stackSave();
-        try {
-            const args = alloc_stack_frame(3);
-            const arg1 = get_arg(args, 2);
-            set_arg_type(arg1, MarshalerType.Object);
-            set_gc_handle(arg1, gc_handle);
-            invoke_method_and_handle_exception(release_js_owned_object_by_gc_handle_method, args);
-        } finally {
-            Module.stackRestore(sp);
-        }
-    };
-    runtimeHelpers.javaScriptExports.complete_task = (holder_gc_handle: GCHandle, isCanceling: boolean, error?: any, data?: any, res_converter?: MarshalerToCs) => {
-        loaderHelpers.assert_runtime_running();
-        const sp = Module.stackSave();
-        try {
-            const args = alloc_stack_frame(5);
+            arg1_converter(arg2, arg1_js);
+        }
+        if (arg2_converter) {
+            const arg3 = get_arg(args, 4);
+            arg2_converter(arg3, arg2_js);
+        }
+        if (arg3_converter) {
+            const arg4 = get_arg(args, 5);
+            arg3_converter(arg4, arg3_js);
+        }
+
+        invoke_sync_jsexport(managedExports.CallDelegate, args);
+
+        if (res_converter) {
             const res = get_arg(args, 1);
-            const arg1 = get_arg(args, 2);
-            set_arg_type(arg1, MarshalerType.Object);
-            set_gc_handle(arg1, holder_gc_handle);
-            const arg2 = get_arg(args, 3);
-            if (error) {
-                marshal_exception_to_cs(arg2, error);
-                if (isCanceling) {
-                    set_arg_type(res, MarshalerType.Discard);
-                }
-            } else {
-                set_arg_type(arg2, MarshalerType.None);
-                const arg3 = get_arg(args, 4);
-                mono_assert(res_converter, "res_converter missing");
-                res_converter(arg3, data);
-            }
-            invoke_method_and_handle_exception(complete_task_method, args);
-        } finally {
-            Module.stackRestore(sp);
-        }
-    };
-    runtimeHelpers.javaScriptExports.call_delegate = (callback_gc_handle: GCHandle, arg1_js: any, arg2_js: any, arg3_js: any, res_converter?: MarshalerToJs, arg1_converter?: MarshalerToCs, arg2_converter?: MarshalerToCs, arg3_converter?: MarshalerToCs) => {
-        loaderHelpers.assert_runtime_running();
-        const sp = Module.stackSave();
-        try {
-            const args = alloc_stack_frame(6);
-
-            const arg1 = get_arg(args, 2);
-            set_arg_type(arg1, MarshalerType.Object);
-            set_gc_handle(arg1, callback_gc_handle);
-            // payload arg numbers are shifted by one, the real first is a gc handle of the callback
-
-            if (arg1_converter) {
-                const arg2 = get_arg(args, 3);
-                arg1_converter(arg2, arg1_js);
-            }
-            if (arg2_converter) {
-                const arg3 = get_arg(args, 4);
-                arg2_converter(arg3, arg2_js);
-            }
-            if (arg3_converter) {
-                const arg4 = get_arg(args, 5);
-                arg3_converter(arg4, arg3_js);
-            }
+            return res_converter(res);
+        }
+    } finally {
+        Module.stackRestore(sp);
+    }
+}
+
+// the marshaled signature is: string GetManagedStackTrace(GCHandle exception)
+export function get_managed_stack_trace (exception_gc_handle: GCHandle) {
+    loaderHelpers.assert_runtime_running();
+    const sp = Module.stackSave();
+    try {
+        const size = 3;
+        const args = alloc_stack_frame(size);
 
-            invoke_method_and_handle_exception(call_delegate_method, args);
+        const arg1 = get_arg(args, 2);
+        set_arg_type(arg1, MarshalerType.Exception);
+        set_gc_handle(arg1, exception_gc_handle);
 
-            if (res_converter) {
-                const res = get_arg(args, 1);
-                return res_converter(res);
+        invoke_sync_jsexport(managedExports.GetManagedStackTrace, args);
+        const res = get_arg(args, 1);
+        return marshal_string_to_js(res);
+    } finally {
+        Module.stackRestore(sp);
+    }
+}
+
+// GCHandle InstallMainSynchronizationContext(nint jsNativeTID, JSThreadBlockingMode jsThreadBlockingMode)
+export function install_main_synchronization_context (jsThreadBlockingMode: JSThreadBlockingMode): GCHandle {
+    if (!WasmEnableThreads) return GCHandleNull;
+    assert_c_interop();
+
+    try {
+        // this block is like alloc_stack_frame() but without set_args_context()
+        const bytes = JavaScriptMarshalerArgSize * 4;
+        const args = Module.stackAlloc(bytes) as any;
+        _zero_region(args, bytes);
+
+        const res = get_arg(args, 1);
+        const arg1 = get_arg(args, 2);
+        const arg2 = get_arg(args, 3);
+        set_arg_intptr(arg1, mono_wasm_main_thread_ptr() as any);
+
+        // sync with JSHostImplementation.Types.cs
+        switch (jsThreadBlockingMode) {
+            case JSThreadBlockingMode.PreventSynchronousJSExport:
+                set_arg_i32(arg2, 0);
+                break;
+            case JSThreadBlockingMode.ThrowWhenBlockingWait:
+                set_arg_i32(arg2, 1);
+                break;
+            case JSThreadBlockingMode.WarnWhenBlockingWait:
+                set_arg_i32(arg2, 2);
+                break;
+            case JSThreadBlockingMode.DangerousAllowBlockingWait:
+                set_arg_i32(arg2, 100);
+                break;
+            default:
+                throw new Error("Invalid jsThreadBlockingMode");
+        }
+
+        // this block is like invoke_sync_jsexport() but without assert_js_interop()
+        cwraps.mono_wasm_invoke_jsexport(managedExports.InstallMainSynchronizationContext!, args);
+        if (is_args_exception(args)) {
+            const exc = get_arg(args, 0);
+            throw marshal_exception_to_js(exc);
+        }
+        return get_arg_gc_handle(res) as any;
+    } catch (e) {
+        mono_log_debug("install_main_synchronization_context failed", e);
+        throw e;
+    }
+}
+
+function invoke_async_jsexport_mono (managedTID: PThreadPtr, method: MonoMethod, args: JSMarshalerArguments, size: number): void {
+    assert_js_interop();
+    if (!WasmEnableThreads || runtimeHelpers.isManagedRunningOnCurrentThread) {
+        cwraps.mono_wasm_invoke_jsexport(method, args as any);
+        if (is_args_exception(args)) {
+            const exc = get_arg(args, 0);
+            throw marshal_exception_to_js(exc);
+        }
+    } else {
+        set_receiver_should_free(args);
+        const bytes = JavaScriptMarshalerArgSize * size;
+        const cpy = Module._malloc(bytes) as any;
+        copyBytes(args as any, cpy, bytes);
+        twraps.mono_wasm_invoke_jsexport_async_post(managedTID, method, cpy);
+    }
+}
+
+function invoke_async_jsexport_naot (_managedTID: PThreadPtr, method: Function, args: JSMarshalerArguments): void {
+    if (!WasmEnableThreads || runtimeHelpers.isManagedRunningOnCurrentThread) {
+        method(args);
+        if (is_args_exception(args)) {
+            const exc = get_arg(args, 0);
+            throw marshal_exception_to_js(exc);
+        }
+    } else {
+        throw new Error("Not supported in NAOT");
+    }
+}
+
+export const invoke_async_jsexport: (managedTID: PThreadPtr, method: any, args: JSMarshalerArguments, size: number) => void = NativeAOT ? invoke_async_jsexport_naot : invoke_async_jsexport_mono;
+
+function invoke_sync_jsexport_mono (method: MonoMethod, args: JSMarshalerArguments): void {
+    assert_js_interop();
+    if (!WasmEnableThreads) {
+        cwraps.mono_wasm_invoke_jsexport(method, args as any);
+    } else {
+        if (monoThreadInfo.isUI) {
+            if (runtimeHelpers.config.jsThreadBlockingMode == JSThreadBlockingMode.PreventSynchronousJSExport) {
+                throw new Error("Cannot call synchronous C# methods.");
+            } else if (runtimeHelpers.isPendingSynchronousCall) {
+                throw new Error("Cannot call synchronous C# method from inside a synchronous call to a JS method.");
             }
-        } finally {
-            Module.stackRestore(sp);
         }
-    };
-    runtimeHelpers.javaScriptExports.get_managed_stack_trace = (exception_gc_handle: GCHandle) => {
-        loaderHelpers.assert_runtime_running();
-        const sp = Module.stackSave();
-        try {
-            const args = alloc_stack_frame(3);
+        if (runtimeHelpers.isManagedRunningOnCurrentThread) {
+            twraps.mono_wasm_invoke_jsexport_sync(method, args as any);
+        } else {
+            // this is blocking too
+            twraps.mono_wasm_invoke_jsexport_sync_send(runtimeHelpers.managedThreadTID, method, args as any);
+        }
+    }
 
-            const arg1 = get_arg(args, 2);
-            set_arg_type(arg1, MarshalerType.Exception);
-            set_gc_handle(arg1, exception_gc_handle);
+    if (is_args_exception(args)) {
+        const exc = get_arg(args, 0);
+        throw marshal_exception_to_js(exc);
+    }
+}
 
-            invoke_method_and_handle_exception(get_managed_stack_trace_method, args);
-            const res = get_arg(args, 1);
-            return marshal_string_to_js(res);
-        } finally {
-            Module.stackRestore(sp);
+function invoke_sync_jsexport_naot (method: Function, args: JSMarshalerArguments): void {
+    method(args);
+    if (is_args_exception(args)) {
+        const exc = get_arg(args, 0);
+        throw marshal_exception_to_js(exc);
+    }
+}
+
+export const invoke_sync_jsexport: (method: any, args: JSMarshalerArguments) => void = NativeAOT ? invoke_sync_jsexport_naot : invoke_sync_jsexport_mono;
+
+// the marshaled signature is: Task BindAssemblyExports(string assemblyName)
+function bind_assembly_exports_mono (assemblyName: string): Promise<void> {
+    loaderHelpers.assert_runtime_running();
+    const sp = Module.stackSave();
+    try {
+        const size = 3;
+        const args = alloc_stack_frame(size);
+        const res = get_arg(args, 1);
+        const arg1 = get_arg(args, 2);
+        marshal_string_to_cs(arg1, assemblyName);
+
+        // because this is async, we could pre-allocate the promise
+        let promise = begin_marshal_task_to_js(res, MarshalerType.TaskPreCreated);
+
+        invoke_async_jsexport(runtimeHelpers.managedThreadTID, managedExports.BindAssemblyExports, args, size);
+
+        // in case the C# side returned synchronously
+        promise = end_marshal_task_to_js(args, marshal_int32_to_js, promise);
+
+        if (promise === null || promise === undefined) {
+            promise = Promise.resolve();
+        }
+        return promise;
+    } finally {
+        Module.stackRestore(sp); // synchronously
+    }
+}
+
+export const exportsByAssembly: Map<string, any> = new Map();
+function bind_assembly_exports_naot (assembly: string) {
+    assert_js_interop();
+    const result = exportsByAssembly.get(assembly);
+    if (!result) {
+        let assemblyWithoutExtension = assembly;
+        if (assemblyWithoutExtension.endsWith(".dll")) {
+            assemblyWithoutExtension = assemblyWithoutExtension.substring(0, assembly.length - 4);
         }
-    };
-    if (WasmEnableThreads && install_main_synchronization_context) {
-        runtimeHelpers.javaScriptExports.install_main_synchronization_context = () => invoke_method_raw(install_main_synchronization_context);
+        const register = (Module as any)["_" + assemblyWithoutExtension + "__GeneratedInitializer" + "__Register_"];
+        mono_assert(register, `Missing wasm export for JSExport registration function in assembly ${assembly}`);
+        register();
     }
+
+    return exportsByAssembly.get(assembly) || {};
 }
 
-export function get_method(method_name: string): MonoMethod {
+export const bind_assembly_exports: (assemblyName: string) => Promise<void> = NativeAOT ? bind_assembly_exports_naot : bind_assembly_exports_mono;
+
+function get_method (method_name: string): MonoMethod {
+    // TODO https://github.com/dotnet/runtime/issues/98366
     const res = cwraps.mono_wasm_assembly_find_method(runtimeHelpers.runtime_interop_exports_class, method_name, -1);
     if (!res)
         throw "Can't find method " + runtimeHelpers.runtime_interop_namespace + "." + runtimeHelpers.runtime_interop_exports_classname + "." + method_name;
     return res;
 }
+
+type ManagedExports = {
+    InstallMainSynchronizationContext: MonoMethod | undefined,
+    entry_point: MonoMethod,
+    CallEntrypoint: MonoMethod,
+    BindAssemblyExports: MonoMethod,
+    ReleaseJSOwnedObjectByGCHandle: MonoMethod,
+    CompleteTask: MonoMethod,
+    CallDelegate: MonoMethod,
+    GetManagedStackTrace: MonoMethod,
+    LoadSatelliteAssembly: MonoMethod,
+    LoadLazyAssembly: MonoMethod,
+}
diff --git a/src/mono/browser/runtime/marshal-to-cs.ts b/src/mono/browser/runtime/marshal-to-cs.ts
index e1baec112ab3..360cefec509c 100644
--- a/src/mono/browser/runtime/marshal-to-cs.ts
+++ b/src/mono/browser/runtime/marshal-to-cs.ts
@@ -5,34 +5,32 @@ import WasmEnableThreads from "consts:wasmEnableThreads";
 import BuildConfiguration from "consts:configuration";
 import WasmEnableJsInteropByValue from "consts:wasmEnableJsInteropByValue";
 
-import { isThenable } from "./cancelable-promise";
+import { PromiseHolder, isThenable } from "./cancelable-promise";
 import cwraps from "./cwraps";
-import { alloc_gcv_handle, assert_not_disposed, cs_owned_js_handle_symbol, js_owned_gc_handle_symbol, mono_wasm_get_js_handle, setup_managed_proxy, teardown_managed_proxy } from "./gc-handles";
-import { Module, loaderHelpers, mono_assert, runtimeHelpers } from "./globals";
+import { alloc_gcv_handle, assert_not_disposed, cs_owned_js_handle_symbol, js_owned_gc_handle_symbol, mono_wasm_get_js_handle, setup_managed_proxy } from "./gc-handles";
+import { Module, mono_assert, runtimeHelpers } from "./globals";
 import {
     ManagedError,
-    set_gc_handle, set_js_handle, set_arg_type, set_arg_i32, set_arg_f64, set_arg_i52, set_arg_f32, set_arg_i16, set_arg_u8, set_arg_b8, set_arg_date,
+    set_gc_handle, set_js_handle, set_arg_type, set_arg_i32, set_arg_f64, set_arg_i52, set_arg_f32, set_arg_i16, set_arg_u8, set_arg_bool, set_arg_date,
     set_arg_length, get_arg, get_signature_arg1_type, get_signature_arg2_type, js_to_cs_marshalers,
     get_signature_res_type, bound_js_function_symbol, set_arg_u16, array_element_size,
     get_string_root, Span, ArraySegment, MemoryViewType, get_signature_arg3_type, set_arg_i64_big, set_arg_intptr,
-    set_arg_element_type, ManagedObject, JavaScriptMarshalerArgSize, proxy_debug_symbol, get_arg_gc_handle, get_arg_type
+    set_arg_element_type, ManagedObject, JavaScriptMarshalerArgSize, proxy_debug_symbol, get_arg_gc_handle, get_arg_type, set_arg_proxy_context, get_arg_intptr
 } from "./marshal";
 import { get_marshaler_to_js_by_type } from "./marshal-to-js";
 import { _zero_region, localHeapViewF64, localHeapViewI32, localHeapViewU8 } from "./memory";
 import { stringToMonoStringRoot, stringToUTF16 } from "./strings";
 import { JSMarshalerArgument, JSMarshalerArguments, JSMarshalerType, MarshalerToCs, MarshalerToJs, BoundMarshalerToCs, MarshalerType } from "./types/internal";
 import { TypedArray } from "./types/emscripten";
-import { addUnsettledPromise, settleUnsettledPromise } from "./pthreads/shared/eventloop";
-import { mono_log_debug } from "./logging";
 
 export const jsinteropDoc = "For more information see https://aka.ms/dotnet-wasm-jsinterop";
 
-export function initialize_marshalers_to_cs(): void {
+export function initialize_marshalers_to_cs (): void {
     if (js_to_cs_marshalers.size == 0) {
         js_to_cs_marshalers.set(MarshalerType.Array, marshal_array_to_cs);
         js_to_cs_marshalers.set(MarshalerType.Span, _marshal_span_to_cs);
         js_to_cs_marshalers.set(MarshalerType.ArraySegment, _marshal_array_segment_to_cs);
-        js_to_cs_marshalers.set(MarshalerType.Boolean, _marshal_bool_to_cs);
+        js_to_cs_marshalers.set(MarshalerType.Boolean, marshal_bool_to_cs);
         js_to_cs_marshalers.set(MarshalerType.Byte, _marshal_byte_to_cs);
         js_to_cs_marshalers.set(MarshalerType.Char, _marshal_char_to_cs);
         js_to_cs_marshalers.set(MarshalerType.Int16, _marshal_int16_to_cs);
@@ -44,24 +42,25 @@ export function initialize_marshalers_to_cs(): void {
         js_to_cs_marshalers.set(MarshalerType.IntPtr, marshal_intptr_to_cs);
         js_to_cs_marshalers.set(MarshalerType.DateTime, _marshal_date_time_to_cs);
         js_to_cs_marshalers.set(MarshalerType.DateTimeOffset, _marshal_date_time_offset_to_cs);
-        js_to_cs_marshalers.set(MarshalerType.String, _marshal_string_to_cs);
+        js_to_cs_marshalers.set(MarshalerType.String, marshal_string_to_cs);
         js_to_cs_marshalers.set(MarshalerType.Exception, marshal_exception_to_cs);
         js_to_cs_marshalers.set(MarshalerType.JSException, marshal_exception_to_cs);
         js_to_cs_marshalers.set(MarshalerType.JSObject, marshal_js_object_to_cs);
-        js_to_cs_marshalers.set(MarshalerType.Object, _marshal_cs_object_to_cs);
-        js_to_cs_marshalers.set(MarshalerType.Task, _marshal_task_to_cs);
-        js_to_cs_marshalers.set(MarshalerType.TaskResolved, _marshal_task_to_cs);
-        js_to_cs_marshalers.set(MarshalerType.TaskRejected, _marshal_task_to_cs);
+        js_to_cs_marshalers.set(MarshalerType.Object, marshal_cs_object_to_cs);
+        js_to_cs_marshalers.set(MarshalerType.Task, marshal_task_to_cs);
+        js_to_cs_marshalers.set(MarshalerType.TaskResolved, marshal_task_to_cs);
+        js_to_cs_marshalers.set(MarshalerType.TaskRejected, marshal_task_to_cs);
         js_to_cs_marshalers.set(MarshalerType.Action, _marshal_function_to_cs);
         js_to_cs_marshalers.set(MarshalerType.Function, _marshal_function_to_cs);
         js_to_cs_marshalers.set(MarshalerType.None, _marshal_null_to_cs);// also void
         js_to_cs_marshalers.set(MarshalerType.Discard, _marshal_null_to_cs);// also void
         js_to_cs_marshalers.set(MarshalerType.Void, _marshal_null_to_cs);// also void
+        js_to_cs_marshalers.set(MarshalerType.DiscardNoWait, _marshal_null_to_cs);// also void
     }
 }
 
-export function bind_arg_marshal_to_cs(sig: JSMarshalerType, marshaler_type: MarshalerType, index: number): BoundMarshalerToCs | undefined {
-    if (marshaler_type === MarshalerType.None || marshaler_type === MarshalerType.Void) {
+export function bind_arg_marshal_to_cs (sig: JSMarshalerType, marshaler_type: MarshalerType, index: number): BoundMarshalerToCs | undefined {
+    if (marshaler_type === MarshalerType.None || marshaler_type === MarshalerType.Void || marshaler_type === MarshalerType.Discard || marshaler_type === MarshalerType.DiscardNoWait) {
         return undefined;
     }
     let res_marshaler: MarshalerToCs | undefined = undefined;
@@ -87,7 +86,7 @@ export function bind_arg_marshal_to_cs(sig: JSMarshalerType, marshaler_type: Mar
     };
 }
 
-export function get_marshaler_to_cs_by_type(marshaler_type: MarshalerType): MarshalerToCs | undefined {
+export function get_marshaler_to_cs_by_type (marshaler_type: MarshalerType): MarshalerToCs | undefined {
     if (marshaler_type === MarshalerType.None || marshaler_type === MarshalerType.Void) {
         return undefined;
     }
@@ -96,162 +95,149 @@ export function get_marshaler_to_cs_by_type(marshaler_type: MarshalerType): Mars
     return converter;
 }
 
-function _marshal_bool_to_cs(arg: JSMarshalerArgument, value: any): void {
+export function marshal_bool_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         set_arg_type(arg, MarshalerType.Boolean);
-        set_arg_b8(arg, value);
+        set_arg_bool(arg, value);
     }
 }
 
-function _marshal_byte_to_cs(arg: JSMarshalerArgument, value: any): void {
+function _marshal_byte_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         set_arg_type(arg, MarshalerType.Byte);
         set_arg_u8(arg, value);
     }
 }
 
-function _marshal_char_to_cs(arg: JSMarshalerArgument, value: any): void {
+function _marshal_char_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         set_arg_type(arg, MarshalerType.Char);
         set_arg_u16(arg, value);
     }
 }
 
-function _marshal_int16_to_cs(arg: JSMarshalerArgument, value: any): void {
+function _marshal_int16_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         set_arg_type(arg, MarshalerType.Int16);
         set_arg_i16(arg, value);
     }
 }
 
-function _marshal_int32_to_cs(arg: JSMarshalerArgument, value: any): void {
+function _marshal_int32_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         set_arg_type(arg, MarshalerType.Int32);
         set_arg_i32(arg, value);
     }
 }
 
-function _marshal_int52_to_cs(arg: JSMarshalerArgument, value: any): void {
+function _marshal_int52_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         set_arg_type(arg, MarshalerType.Int52);
         set_arg_i52(arg, value);
     }
 }
 
-function _marshal_bigint64_to_cs(arg: JSMarshalerArgument, value: any): void {
+function _marshal_bigint64_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         set_arg_type(arg, MarshalerType.BigInt64);
         set_arg_i64_big(arg, value);
     }
 }
 
-function _marshal_double_to_cs(arg: JSMarshalerArgument, value: any): void {
+function _marshal_double_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         set_arg_type(arg, MarshalerType.Double);
         set_arg_f64(arg, value);
     }
 }
 
-function _marshal_float_to_cs(arg: JSMarshalerArgument, value: any): void {
+function _marshal_float_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         set_arg_type(arg, MarshalerType.Single);
         set_arg_f32(arg, value);
     }
 }
 
-export function marshal_intptr_to_cs(arg: JSMarshalerArgument, value: any): void {
+export function marshal_intptr_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         set_arg_type(arg, MarshalerType.IntPtr);
         set_arg_intptr(arg, value);
     }
 }
 
-function _marshal_date_time_to_cs(arg: JSMarshalerArgument, value: Date): void {
+function _marshal_date_time_to_cs (arg: JSMarshalerArgument, value: Date): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         mono_check(value instanceof Date, "Value is not a Date");
         set_arg_type(arg, MarshalerType.DateTime);
         set_arg_date(arg, value);
     }
 }
 
-function _marshal_date_time_offset_to_cs(arg: JSMarshalerArgument, value: Date): void {
+function _marshal_date_time_offset_to_cs (arg: JSMarshalerArgument, value: Date): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         mono_check(value instanceof Date, "Value is not a Date");
         set_arg_type(arg, MarshalerType.DateTimeOffset);
         set_arg_date(arg, value);
     }
 }
 
-function _marshal_string_to_cs(arg: JSMarshalerArgument, value: string) {
+export function marshal_string_to_cs (arg: JSMarshalerArgument, value: string) {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         set_arg_type(arg, MarshalerType.String);
         mono_check(typeof value === "string", "Value is not a String");
         _marshal_string_to_cs_impl(arg, value);
     }
 }
 
-function _marshal_string_to_cs_impl(arg: JSMarshalerArgument, value: string) {
+function _marshal_string_to_cs_impl (arg: JSMarshalerArgument, value: string) {
     if (WasmEnableJsInteropByValue) {
         const bufferLen = value.length * 2;
-        const buffer = Module._malloc(bufferLen);
+        const buffer = Module._malloc(bufferLen);// together with Marshal.FreeHGlobal
         stringToUTF16(buffer as any, buffer as any + bufferLen, value);
         set_arg_intptr(arg, buffer);
         set_arg_length(arg, value.length);
     } else {
+        mono_assert(!WasmEnableThreads, "Marshaling strings by reference is not supported in multithreaded mode");
         const root = get_string_root(arg);
         try {
             stringToMonoStringRoot(value, root);
-        }
-        finally {
+        } finally {
             root.release();
         }
     }
 }
 
-function _marshal_null_to_cs(arg: JSMarshalerArgument) {
+function _marshal_null_to_cs (arg: JSMarshalerArgument) {
     set_arg_type(arg, MarshalerType.None);
 }
 
-function _marshal_function_to_cs(arg: JSMarshalerArgument, value: Function, _?: MarshalerType, res_converter?: MarshalerToCs, arg1_converter?: MarshalerToJs, arg2_converter?: MarshalerToJs, arg3_converter?: MarshalerToJs): void {
+function _marshal_function_to_cs (arg: JSMarshalerArgument, value: Function, _?: MarshalerType, res_converter?: MarshalerToCs, arg1_converter?: MarshalerToJs, arg2_converter?: MarshalerToJs, arg3_converter?: MarshalerToJs): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
         return;
@@ -259,13 +245,14 @@ function _marshal_function_to_cs(arg: JSMarshalerArgument, value: Function, _?:
     mono_check(value && value instanceof Function, "Value is not a Function");
 
     // TODO: we could try to cache value -> existing JSHandle
-    const wrapper: any = (args: JSMarshalerArguments) => {
+    const wrapper: any = function delegate_wrapper (args: JSMarshalerArguments) {
         const exc = get_arg(args, 0);
         const res = get_arg(args, 1);
         const arg1 = get_arg(args, 2);
         const arg2 = get_arg(args, 3);
         const arg3 = get_arg(args, 4);
 
+        const previousPendingSynchronousCall = runtimeHelpers.isPendingSynchronousCall;
         try {
             mono_assert(!WasmEnableThreads || !wrapper.isDisposed, "Function is disposed and should not be invoked anymore.");
 
@@ -281,6 +268,7 @@ function _marshal_function_to_cs(arg: JSMarshalerArgument, value: Function, _?:
             if (arg3_converter) {
                 arg3_js = arg3_converter(arg3);
             }
+            runtimeHelpers.isPendingSynchronousCall = true; // this is always synchronous call for now
             const res_js = value(arg1_js, arg2_js, arg3_js);
             if (res_converter) {
                 res_converter(res, res_js);
@@ -288,12 +276,16 @@ function _marshal_function_to_cs(arg: JSMarshalerArgument, value: Function, _?:
 
         } catch (ex) {
             marshal_exception_to_cs(exc, ex);
+        } finally {
+            runtimeHelpers.isPendingSynchronousCall = previousPendingSynchronousCall;
         }
     };
 
     wrapper[bound_js_function_symbol] = true;
     wrapper.isDisposed = false;
-    wrapper.dispose = () => { wrapper.isDisposed = true; };
+    wrapper.dispose = () => {
+        wrapper.isDisposed = true;
+    };
     const bound_function_handle = mono_wasm_get_js_handle(wrapper)!;
     if (BuildConfiguration === "Debug") {
         wrapper[proxy_debug_symbol] = `Proxy of JS Function with JSHandle ${bound_function_handle}: ${value.toString()}`;
@@ -302,15 +294,8 @@ function _marshal_function_to_cs(arg: JSMarshalerArgument, value: Function, _?:
     set_arg_type(arg, MarshalerType.Function);//TODO or action ?
 }
 
-export class PromiseHolder extends ManagedObject {
-    public isResolved = false;
-    public isCanceled = false;
-    public constructor(public promise: Promise<any>) {
-        super();
-    }
-}
 
-function _marshal_task_to_cs(arg: JSMarshalerArgument, value: Promise<any>, _?: MarshalerType, res_converter?: MarshalerToCs) {
+export function marshal_task_to_cs (arg: JSMarshalerArgument, value: Promise<any>, _?: MarshalerType, res_converter?: MarshalerToCs) {
     const handleIsPreallocated = get_arg_type(arg) == MarshalerType.TaskPreCreated;
     if (value === null || value === undefined) {
         if (WasmEnableThreads && handleIsPreallocated) {
@@ -328,89 +313,31 @@ function _marshal_task_to_cs(arg: JSMarshalerArgument, value: Promise<any>, _?:
     mono_check(isThenable(value), "Value is not a Promise");
 
     const gc_handle = handleIsPreallocated ? get_arg_gc_handle(arg) : alloc_gcv_handle();
+    const promiseHolderPtr = WasmEnableThreads && handleIsPreallocated ? get_arg_intptr(arg) : 0;
     if (!handleIsPreallocated) {
         set_gc_handle(arg, gc_handle);
         set_arg_type(arg, MarshalerType.Task);
     }
-    const holder = new PromiseHolder(value);
+
+    const holder = new PromiseHolder(value, gc_handle, promiseHolderPtr, res_converter);
     setup_managed_proxy(holder, gc_handle);
+
     if (BuildConfiguration === "Debug") {
         (holder as any)[proxy_debug_symbol] = `PromiseHolder with GCHandle ${gc_handle}`;
     }
 
-    if (WasmEnableThreads)
-        addUnsettledPromise();
-
-    function resolve(data: any) {
-        if (!loaderHelpers.is_runtime_running()) {
-            mono_log_debug("This promise can't be propagated to managed code, mono runtime already exited.");
-            return;
-        }
-        try {
-            mono_assert(!holder.isDisposed, "This promise can't be propagated to managed code, because the Task was already freed.");
-            mono_assert(!holder.isResolved, "This promise already resolved.");
-            mono_assert(!holder.isCanceled, "This promise already canceled.");
-            holder.isResolved = true;
-            if (WasmEnableThreads) {
-                settleUnsettledPromise();
-            }
-            // we can unregister the GC handle just on JS side
-            teardown_managed_proxy(holder, gc_handle, /*skipManaged: */ true);
-            // order of operations with teardown_managed_proxy matters
-            // so that managed user code running in the continuation could allocate the same GCHandle number and the local registry would be already ok with that
-            runtimeHelpers.javaScriptExports.complete_task(gc_handle, false, null, data, res_converter || _marshal_cs_object_to_cs);
-        }
-        catch (ex) {
-            try {
-                loaderHelpers.mono_exit(1, ex);
-            }
-            catch (ex2) {
-                // there is no point to propagate the exception into the unhandled promise rejection
-            }
-        }
-    }
-
-    function reject(reason: any) {
-        if (!loaderHelpers.is_runtime_running()) {
-            mono_log_debug("This promise can't be propagated to managed code, mono runtime already exited.", reason);
-            return;
-        }
-        try {
-            mono_assert(!holder.isDisposed, "This promise can't be propagated to managed code, because the Task was already freed.");
-            mono_assert(!holder.isResolved, "This promise already resolved.");
-            holder.isResolved = true;
-            if (WasmEnableThreads) {
-                settleUnsettledPromise();
-            }
-            // we can unregister the GC handle just on JS side
-            teardown_managed_proxy(holder, gc_handle, /*skipManaged: */ true);
-            // order of operations with teardown_managed_proxy matters
-            runtimeHelpers.javaScriptExports.complete_task(gc_handle, holder.isCanceled, reason, null, undefined);
-        }
-        catch (ex) {
-            try {
-                loaderHelpers.mono_exit(1, ex);
-            }
-            catch (ex2) {
-                // there is no point to propagate the exception into the unhandled promise rejection
-            }
-        }
-    }
-
-    value.then(resolve).catch(reject);
+    value.then(data => holder.resolve(data), reason => holder.reject(reason));
 }
 
-export function marshal_exception_to_cs(arg: JSMarshalerArgument, value: any): void {
+export function marshal_exception_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else if (value instanceof ManagedError) {
+    } else if (value instanceof ManagedError) {
         set_arg_type(arg, MarshalerType.Exception);
         // this is managed exception round-trip
         const gc_handle = assert_not_disposed(value);
         set_gc_handle(arg, gc_handle);
-    }
-    else {
+    } else {
         mono_check(typeof value === "object" || typeof value === "string", () => `Value is not an Error ${typeof value}`);
         set_arg_type(arg, MarshalerType.JSException);
         const message = value.toString();
@@ -418,8 +345,7 @@ export function marshal_exception_to_cs(arg: JSMarshalerArgument, value: any): v
         const known_js_handle = value[cs_owned_js_handle_symbol];
         if (known_js_handle) {
             set_js_handle(arg, known_js_handle);
-        }
-        else {
+        } else {
             const js_handle = mono_wasm_get_js_handle(value)!;
             if (BuildConfiguration === "Debug" && Object.isExtensible(value)) {
                 value[proxy_debug_symbol] = `JS Error with JSHandle ${js_handle}`;
@@ -429,11 +355,11 @@ export function marshal_exception_to_cs(arg: JSMarshalerArgument, value: any): v
     }
 }
 
-export function marshal_js_object_to_cs(arg: JSMarshalerArgument, value: any): void {
+export function marshal_js_object_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === undefined || value === null) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+        set_arg_proxy_context(arg);
+    } else {
         // if value was ManagedObject, it would be double proxied, but the C# signature requires that
         mono_check(value[js_owned_gc_handle_symbol] === undefined, () => `JSObject proxy of ManagedObject proxy is not supported. ${jsinteropDoc}`);
         mono_check(typeof value === "function" || typeof value === "object", () => `JSObject proxy of ${typeof value} is not supported`);
@@ -447,50 +373,40 @@ export function marshal_js_object_to_cs(arg: JSMarshalerArgument, value: any): v
     }
 }
 
-function _marshal_cs_object_to_cs(arg: JSMarshalerArgument, value: any): void {
+export function marshal_cs_object_to_cs (arg: JSMarshalerArgument, value: any): void {
     if (value === undefined || value === null) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+        set_arg_proxy_context(arg);
+    } else {
         const gc_handle = value[js_owned_gc_handle_symbol];
         const js_type = typeof (value);
         if (gc_handle === undefined) {
             if (js_type === "string" || js_type === "symbol") {
                 set_arg_type(arg, MarshalerType.String);
                 _marshal_string_to_cs_impl(arg, value);
-            }
-            else if (js_type === "number") {
+            } else if (js_type === "number") {
                 set_arg_type(arg, MarshalerType.Double);
                 set_arg_f64(arg, value);
-            }
-            else if (js_type === "bigint") {
+            } else if (js_type === "bigint") {
                 // we do it because not all bigint values could fit into Int64
                 throw new Error("NotImplementedException: bigint");
-            }
-            else if (js_type === "boolean") {
+            } else if (js_type === "boolean") {
                 set_arg_type(arg, MarshalerType.Boolean);
-                set_arg_b8(arg, value);
-            }
-            else if (value instanceof Date) {
+                set_arg_bool(arg, value);
+            } else if (value instanceof Date) {
                 set_arg_type(arg, MarshalerType.DateTime);
                 set_arg_date(arg, value);
-            }
-            else if (value instanceof Error) {
+            } else if (value instanceof Error) {
                 marshal_exception_to_cs(arg, value);
-            }
-            else if (value instanceof Uint8Array) {
+            } else if (value instanceof Uint8Array) {
                 marshal_array_to_cs_impl(arg, value, MarshalerType.Byte);
-            }
-            else if (value instanceof Float64Array) {
+            } else if (value instanceof Float64Array) {
                 marshal_array_to_cs_impl(arg, value, MarshalerType.Double);
-            }
-            else if (value instanceof Int32Array) {
+            } else if (value instanceof Int32Array) {
                 marshal_array_to_cs_impl(arg, value, MarshalerType.Int32);
-            }
-            else if (Array.isArray(value)) {
+            } else if (Array.isArray(value)) {
                 marshal_array_to_cs_impl(arg, value, MarshalerType.Object);
-            }
-            else if (value instanceof Int16Array
+            } else if (value instanceof Int16Array
                 || value instanceof Int8Array
                 || value instanceof Uint8ClampedArray
                 || value instanceof Uint16Array
@@ -498,35 +414,28 @@ function _marshal_cs_object_to_cs(arg: JSMarshalerArgument, value: any): void {
                 || value instanceof Float32Array
             ) {
                 throw new Error("NotImplementedException: TypedArray");
-            }
-            else if (isThenable(value)) {
-                _marshal_task_to_cs(arg, value);
-            }
-            else if (value instanceof Span) {
+            } else if (isThenable(value)) {
+                marshal_task_to_cs(arg, value);
+            } else if (value instanceof Span) {
                 throw new Error("NotImplementedException: Span");
-            }
-            else if (js_type == "object") {
+            } else if (js_type == "object") {
                 const js_handle = mono_wasm_get_js_handle(value);
                 set_arg_type(arg, MarshalerType.JSObject);
                 if (BuildConfiguration === "Debug" && Object.isExtensible(value)) {
                     value[proxy_debug_symbol] = `JS Object with JSHandle ${js_handle}`;
                 }
                 set_js_handle(arg, js_handle);
-            }
-            else {
+            } else {
                 throw new Error(`JSObject proxy is not supported for ${js_type} ${value}`);
             }
-        }
-        else {
+        } else {
             assert_not_disposed(value);
             if (value instanceof ArraySegment) {
                 throw new Error("NotImplementedException: ArraySegment. " + jsinteropDoc);
-            }
-            else if (value instanceof ManagedError) {
+            } else if (value instanceof ManagedError) {
                 set_arg_type(arg, MarshalerType.Exception);
                 set_gc_handle(arg, gc_handle);
-            }
-            else if (value instanceof ManagedObject) {
+            } else if (value instanceof ManagedObject) {
                 set_arg_type(arg, MarshalerType.Object);
                 set_gc_handle(arg, gc_handle);
             } else {
@@ -536,16 +445,15 @@ function _marshal_cs_object_to_cs(arg: JSMarshalerArgument, value: any): void {
     }
 }
 
-export function marshal_array_to_cs(arg: JSMarshalerArgument, value: Array<any> | TypedArray | undefined | null, element_type?: MarshalerType): void {
+export function marshal_array_to_cs (arg: JSMarshalerArgument, value: Array<any> | TypedArray | undefined | null, element_type?: MarshalerType): void {
     mono_assert(!!element_type, "Expected valid element_type parameter");
     marshal_array_to_cs_impl(arg, value, element_type);
 }
 
-export function marshal_array_to_cs_impl(arg: JSMarshalerArgument, value: Array<any> | TypedArray | undefined | null, element_type: MarshalerType): void {
+export function marshal_array_to_cs_impl (arg: JSMarshalerArgument, value: Array<any> | TypedArray | undefined | null, element_type: MarshalerType): void {
     if (value === null || value === undefined) {
         set_arg_type(arg, MarshalerType.None);
-    }
-    else {
+    } else {
         const element_size = array_element_size(element_type);
         mono_assert(element_size != -1, () => `Element type ${MarshalerType[element_type]} not supported`);
         const length = value.length;
@@ -555,48 +463,44 @@ export function marshal_array_to_cs_impl(arg: JSMarshalerArgument, value: Array<
             mono_check(Array.isArray(value), "Value is not an Array");
             _zero_region(buffer_ptr, buffer_length);
             if (!WasmEnableJsInteropByValue) {
+                mono_assert(!WasmEnableThreads, "Marshaling strings by reference is not supported in multithreaded mode");
                 cwraps.mono_wasm_register_root(buffer_ptr, buffer_length, "marshal_array_to_cs");
             }
             for (let index = 0; index < length; index++) {
                 const element_arg = get_arg(<any>buffer_ptr, index);
-                _marshal_string_to_cs(element_arg, value[index]);
+                marshal_string_to_cs(element_arg, value[index]);
             }
-        }
-        else if (element_type == MarshalerType.Object) {
+        } else if (element_type == MarshalerType.Object) {
             mono_check(Array.isArray(value), "Value is not an Array");
             _zero_region(buffer_ptr, buffer_length);
             if (!WasmEnableJsInteropByValue) {
+                mono_assert(!WasmEnableThreads, "Marshaling objects by reference is not supported in multithreaded mode");
                 cwraps.mono_wasm_register_root(buffer_ptr, buffer_length, "marshal_array_to_cs");
             }
             for (let index = 0; index < length; index++) {
                 const element_arg = get_arg(<any>buffer_ptr, index);
-                _marshal_cs_object_to_cs(element_arg, value[index]);
+                marshal_cs_object_to_cs(element_arg, value[index]);
             }
-        }
-        else if (element_type == MarshalerType.JSObject) {
+        } else if (element_type == MarshalerType.JSObject) {
             mono_check(Array.isArray(value), "Value is not an Array");
             _zero_region(buffer_ptr, buffer_length);
             for (let index = 0; index < length; index++) {
                 const element_arg = get_arg(buffer_ptr, index);
                 marshal_js_object_to_cs(element_arg, value[index]);
             }
-        }
-        else if (element_type == MarshalerType.Byte) {
+        } else if (element_type == MarshalerType.Byte) {
             mono_check(Array.isArray(value) || value instanceof Uint8Array, "Value is not an Array or Uint8Array");
             const targetView = localHeapViewU8().subarray(<any>buffer_ptr, buffer_ptr + length);
             targetView.set(value);
-        }
-        else if (element_type == MarshalerType.Int32) {
+        } else if (element_type == MarshalerType.Int32) {
             mono_check(Array.isArray(value) || value instanceof Int32Array, "Value is not an Array or Int32Array");
             const targetView = localHeapViewI32().subarray(<any>buffer_ptr >> 2, (buffer_ptr >> 2) + length);
             targetView.set(value);
-        }
-        else if (element_type == MarshalerType.Double) {
+        } else if (element_type == MarshalerType.Double) {
             mono_check(Array.isArray(value) || value instanceof Float64Array, "Value is not an Array or Float64Array");
             const targetView = localHeapViewF64().subarray(<any>buffer_ptr >> 3, (buffer_ptr >> 3) + length);
             targetView.set(value);
-        }
-        else {
+        } else {
             throw new Error("not implemented");
         }
         set_arg_intptr(arg, buffer_ptr);
@@ -606,7 +510,7 @@ export function marshal_array_to_cs_impl(arg: JSMarshalerArgument, value: Array<
     }
 }
 
-function _marshal_span_to_cs(arg: JSMarshalerArgument, value: Span, element_type?: MarshalerType): void {
+function _marshal_span_to_cs (arg: JSMarshalerArgument, value: Span, element_type?: MarshalerType): void {
     mono_assert(!!element_type, "Expected valid element_type parameter");
     mono_check(!value.isDisposed, "ObjectDisposedException");
     checkViewType(element_type, value._viewType);
@@ -617,7 +521,7 @@ function _marshal_span_to_cs(arg: JSMarshalerArgument, value: Span, element_type
 }
 
 // this only supports round-trip
-function _marshal_array_segment_to_cs(arg: JSMarshalerArgument, value: ArraySegment, element_type?: MarshalerType): void {
+function _marshal_array_segment_to_cs (arg: JSMarshalerArgument, value: ArraySegment, element_type?: MarshalerType): void {
     mono_assert(!!element_type, "Expected valid element_type parameter");
     const gc_handle = assert_not_disposed(value);
     mono_assert(gc_handle, "Only roundtrip of ArraySegment instance created by C#");
@@ -628,17 +532,14 @@ function _marshal_array_segment_to_cs(arg: JSMarshalerArgument, value: ArraySegm
     set_gc_handle(arg, gc_handle);
 }
 
-function checkViewType(element_type: MarshalerType, viewType: MemoryViewType) {
+function checkViewType (element_type: MarshalerType, viewType: MemoryViewType) {
     if (element_type == MarshalerType.Byte) {
         mono_check(MemoryViewType.Byte == viewType, "Expected MemoryViewType.Byte");
-    }
-    else if (element_type == MarshalerType.Int32) {
+    } else if (element_type == MarshalerType.Int32) {
         mono_check(MemoryViewType.Int32 == viewType, "Expected MemoryViewType.Int32");
-    }
-    else if (element_type == MarshalerType.Double) {
+    } else if (element_type == MarshalerType.Double) {
         mono_check(MemoryViewType.Double == viewType, "Expected MemoryViewType.Double");
-    }
-    else {
+    } else {
         throw new Error(`NotImplementedException ${MarshalerType[element_type]} `);
     }
 }
diff --git a/src/mono/browser/runtime/marshal-to-js.ts b/src/mono/browser/runtime/marshal-to-js.ts
index 51a91644096d..eb12f9b542d5 100644
--- a/src/mono/browser/runtime/marshal-to-js.ts
+++ b/src/mono/browser/runtime/marshal-to-js.ts
@@ -9,22 +9,25 @@ import WasmEnableJsInteropByValue from "consts:wasmEnableJsInteropByValue";
 
 import cwraps from "./cwraps";
 import { _lookup_js_owned_object, mono_wasm_get_js_handle, mono_wasm_get_jsobj_from_js_handle, mono_wasm_release_cs_owned_object, register_with_jsv_handle, setup_managed_proxy, teardown_managed_proxy } from "./gc-handles";
-import { Module, loaderHelpers, mono_assert, runtimeHelpers } from "./globals";
+import { Module, loaderHelpers, mono_assert } from "./globals";
 import {
     ManagedObject, ManagedError,
     get_arg_gc_handle, get_arg_js_handle, get_arg_type, get_arg_i32, get_arg_f64, get_arg_i52, get_arg_i16, get_arg_u8, get_arg_f32,
-    get_arg_b8, get_arg_date, get_arg_length, get_arg, set_arg_type,
+    get_arg_bool, get_arg_date, get_arg_length, get_arg, set_arg_type,
     get_signature_arg2_type, get_signature_arg1_type, cs_to_js_marshalers,
     get_signature_res_type, get_arg_u16, array_element_size, get_string_root,
-    ArraySegment, Span, MemoryViewType, get_signature_arg3_type, get_arg_i64_big, get_arg_intptr, get_arg_element_type, JavaScriptMarshalerArgSize, proxy_debug_symbol, set_js_handle
+    ArraySegment, Span, MemoryViewType, get_signature_arg3_type, get_arg_i64_big, get_arg_intptr, get_arg_element_type, JavaScriptMarshalerArgSize, proxy_debug_symbol, set_js_handle, is_receiver_should_free
 } from "./marshal";
 import { monoStringToString, utf16ToString } from "./strings";
 import { GCHandleNull, JSMarshalerArgument, JSMarshalerArguments, JSMarshalerType, MarshalerToCs, MarshalerToJs, BoundMarshalerToJs, MarshalerType, JSHandle } from "./types/internal";
 import { TypedArray } from "./types/emscripten";
 import { get_marshaler_to_cs_by_type, jsinteropDoc, marshal_exception_to_cs } from "./marshal-to-cs";
 import { localHeapViewF64, localHeapViewI32, localHeapViewU8 } from "./memory";
+import { call_delegate } from "./managed-exports";
+import { mono_log_debug } from "./logging";
+import { invoke_later_when_on_ui_thread_async } from "./invoke-js";
 
-export function initialize_marshalers_to_js(): void {
+export function initialize_marshalers_to_js (): void {
     if (cs_to_js_marshalers.size == 0) {
         cs_to_js_marshalers.set(MarshalerType.Array, _marshal_array_to_js);
         cs_to_js_marshalers.set(MarshalerType.Span, _marshal_span_to_js);
@@ -55,11 +58,12 @@ export function initialize_marshalers_to_js(): void {
         cs_to_js_marshalers.set(MarshalerType.None, _marshal_null_to_js);
         cs_to_js_marshalers.set(MarshalerType.Void, _marshal_null_to_js);
         cs_to_js_marshalers.set(MarshalerType.Discard, _marshal_null_to_js);
+        cs_to_js_marshalers.set(MarshalerType.DiscardNoWait, _marshal_null_to_js);
     }
 }
 
-export function bind_arg_marshal_to_js(sig: JSMarshalerType, marshaler_type: MarshalerType, index: number): BoundMarshalerToJs | undefined {
-    if (marshaler_type === MarshalerType.None || marshaler_type === MarshalerType.Void) {
+export function bind_arg_marshal_to_js (sig: JSMarshalerType, marshaler_type: MarshalerType, index: number): BoundMarshalerToJs | undefined {
+    if (marshaler_type === MarshalerType.None || marshaler_type === MarshalerType.Void || marshaler_type === MarshalerType.Discard || marshaler_type === MarshalerType.DiscardNoWait) {
         return undefined;
     }
 
@@ -86,7 +90,7 @@ export function bind_arg_marshal_to_js(sig: JSMarshalerType, marshaler_type: Mar
     };
 }
 
-export function get_marshaler_to_js_by_type(marshaler_type: MarshalerType): MarshalerToJs | undefined {
+export function get_marshaler_to_js_by_type (marshaler_type: MarshalerType): MarshalerToJs | undefined {
     if (marshaler_type === MarshalerType.None || marshaler_type === MarshalerType.Void) {
         return undefined;
     }
@@ -95,15 +99,15 @@ export function get_marshaler_to_js_by_type(marshaler_type: MarshalerType): Mars
     return converter;
 }
 
-function _marshal_bool_to_js(arg: JSMarshalerArgument): boolean | null {
+function _marshal_bool_to_js (arg: JSMarshalerArgument): boolean | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
     }
-    return get_arg_b8(arg);
+    return get_arg_bool(arg);
 }
 
-function _marshal_byte_to_js(arg: JSMarshalerArgument): number | null {
+function _marshal_byte_to_js (arg: JSMarshalerArgument): number | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -111,7 +115,7 @@ function _marshal_byte_to_js(arg: JSMarshalerArgument): number | null {
     return get_arg_u8(arg);
 }
 
-function _marshal_char_to_js(arg: JSMarshalerArgument): number | null {
+function _marshal_char_to_js (arg: JSMarshalerArgument): number | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -119,7 +123,7 @@ function _marshal_char_to_js(arg: JSMarshalerArgument): number | null {
     return get_arg_u16(arg);
 }
 
-function _marshal_int16_to_js(arg: JSMarshalerArgument): number | null {
+function _marshal_int16_to_js (arg: JSMarshalerArgument): number | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -127,7 +131,7 @@ function _marshal_int16_to_js(arg: JSMarshalerArgument): number | null {
     return get_arg_i16(arg);
 }
 
-export function marshal_int32_to_js(arg: JSMarshalerArgument): number | null {
+export function marshal_int32_to_js (arg: JSMarshalerArgument): number | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -135,7 +139,7 @@ export function marshal_int32_to_js(arg: JSMarshalerArgument): number | null {
     return get_arg_i32(arg);
 }
 
-function _marshal_int52_to_js(arg: JSMarshalerArgument): number | null {
+function _marshal_int52_to_js (arg: JSMarshalerArgument): number | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -143,7 +147,7 @@ function _marshal_int52_to_js(arg: JSMarshalerArgument): number | null {
     return get_arg_i52(arg);
 }
 
-function _marshal_bigint64_to_js(arg: JSMarshalerArgument): bigint | null {
+function _marshal_bigint64_to_js (arg: JSMarshalerArgument): bigint | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -151,7 +155,7 @@ function _marshal_bigint64_to_js(arg: JSMarshalerArgument): bigint | null {
     return get_arg_i64_big(arg);
 }
 
-function _marshal_float_to_js(arg: JSMarshalerArgument): number | null {
+function _marshal_float_to_js (arg: JSMarshalerArgument): number | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -159,7 +163,7 @@ function _marshal_float_to_js(arg: JSMarshalerArgument): number | null {
     return get_arg_f32(arg);
 }
 
-function _marshal_double_to_js(arg: JSMarshalerArgument): number | null {
+function _marshal_double_to_js (arg: JSMarshalerArgument): number | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -167,7 +171,7 @@ function _marshal_double_to_js(arg: JSMarshalerArgument): number | null {
     return get_arg_f64(arg);
 }
 
-function _marshal_intptr_to_js(arg: JSMarshalerArgument): number | null {
+function _marshal_intptr_to_js (arg: JSMarshalerArgument): number | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -175,11 +179,11 @@ function _marshal_intptr_to_js(arg: JSMarshalerArgument): number | null {
     return get_arg_intptr(arg);
 }
 
-function _marshal_null_to_js(): null {
+function _marshal_null_to_js (): null {
     return null;
 }
 
-function _marshal_datetime_to_js(arg: JSMarshalerArgument): Date | null {
+function _marshal_datetime_to_js (arg: JSMarshalerArgument): Date | null {
     const type = get_arg_type(arg);
     if (type === MarshalerType.None) {
         return null;
@@ -188,7 +192,7 @@ function _marshal_datetime_to_js(arg: JSMarshalerArgument): Date | null {
 }
 
 // NOTE: at the moment, this can't dispatch async calls (with Task/Promise return type). Therefore we don't have to worry about pre-created Task.
-function _marshal_delegate_to_js(arg: JSMarshalerArgument, _?: MarshalerType, res_converter?: MarshalerToJs, arg1_converter?: MarshalerToCs, arg2_converter?: MarshalerToCs, arg3_converter?: MarshalerToCs): Function | null {
+function _marshal_delegate_to_js (arg: JSMarshalerArgument, _?: MarshalerType, res_converter?: MarshalerToJs, arg1_converter?: MarshalerToCs, arg2_converter?: MarshalerToCs, arg3_converter?: MarshalerToCs): Function | null {
     const type = get_arg_type(arg);
     if (type === MarshalerType.None) {
         return null;
@@ -201,7 +205,7 @@ function _marshal_delegate_to_js(arg: JSMarshalerArgument, _?: MarshalerType, re
         result = (arg1_js: any, arg2_js: any, arg3_js: any): any => {
             mono_assert(!WasmEnableThreads || !result.isDisposed, "Delegate is disposed and should not be invoked anymore.");
             // arg numbers are shifted by one, the real first is a gc handle of the callback
-            return runtimeHelpers.javaScriptExports.call_delegate(gc_handle, arg1_js, arg2_js, arg3_js, res_converter, arg1_converter, arg2_converter, arg3_converter);
+            return call_delegate(gc_handle, arg1_js, arg2_js, arg3_js, res_converter, arg1_converter, arg2_converter, arg3_converter);
         };
         result.dispose = () => {
             if (!result.isDisposed) {
@@ -220,11 +224,11 @@ function _marshal_delegate_to_js(arg: JSMarshalerArgument, _?: MarshalerType, re
 }
 
 export class TaskHolder {
-    constructor(public promise: Promise<any>, public resolve_or_reject: (type: MarshalerType, js_handle: JSHandle, argInner: JSMarshalerArgument) => void) {
+    constructor (public promise: Promise<any>, public resolve_or_reject: (type: MarshalerType, js_handle: JSHandle, argInner: JSMarshalerArgument) => void) {
     }
 }
 
-export function marshal_task_to_js(arg: JSMarshalerArgument, _?: MarshalerType, res_converter?: MarshalerToJs): Promise<any> | null {
+export function marshal_task_to_js (arg: JSMarshalerArgument, _?: MarshalerType, res_converter?: MarshalerToJs): Promise<any> | null {
     const type = get_arg_type(arg);
     // this path is used only when Task is passed as argument to JSImport and virtual JSHandle would be used
     mono_assert(type != MarshalerType.TaskPreCreated, "Unexpected Task type: TaskPreCreated");
@@ -245,7 +249,7 @@ export function marshal_task_to_js(arg: JSMarshalerArgument, _?: MarshalerType,
     return holder.promise;
 }
 
-export function begin_marshal_task_to_js(arg: JSMarshalerArgument, _?: MarshalerType, res_converter?: MarshalerToJs): Promise<any> | null {
+export function begin_marshal_task_to_js (arg: JSMarshalerArgument, _?: MarshalerType, res_converter?: MarshalerToJs): Promise<any> | null {
     // this path is used when Task is returned from JSExport/call_entry_point
     const holder = create_task_holder(res_converter);
     const js_handle = mono_wasm_get_js_handle(holder);
@@ -257,7 +261,7 @@ export function begin_marshal_task_to_js(arg: JSMarshalerArgument, _?: Marshaler
     return holder.promise;
 }
 
-export function end_marshal_task_to_js(args: JSMarshalerArguments, res_converter: MarshalerToJs | undefined, eagerPromise: Promise<any> | null) {
+export function end_marshal_task_to_js (args: JSMarshalerArguments, res_converter: MarshalerToJs | undefined, eagerPromise: Promise<any> | null) {
     // this path is used when Task is returned from JSExport/call_entry_point
     const res = get_arg(args, 1);
     const type = get_arg_type(res);
@@ -280,7 +284,7 @@ export function end_marshal_task_to_js(args: JSMarshalerArguments, res_converter
     return promise;
 }
 
-function try_marshal_sync_task_to_js(arg: JSMarshalerArgument, type: MarshalerType, res_converter?: MarshalerToJs): Promise<any> | null | false {
+function try_marshal_sync_task_to_js (arg: JSMarshalerArgument, type: MarshalerType, res_converter?: MarshalerToJs): Promise<any> | null | false {
     if (type === MarshalerType.None) {
         return null;
     }
@@ -306,7 +310,7 @@ function try_marshal_sync_task_to_js(arg: JSMarshalerArgument, type: MarshalerTy
     return false;
 }
 
-function create_task_holder(res_converter?: MarshalerToJs) {
+function create_task_holder (res_converter?: MarshalerToJs) {
     const { promise, promise_control } = loaderHelpers.createPromiseController<any>();
     const holder = new TaskHolder(promise, (type, js_handle, argInner) => {
         if (type === MarshalerType.TaskRejected) {
@@ -326,8 +330,7 @@ function create_task_holder(res_converter?: MarshalerToJs) {
                 const js_value = res_converter!(argInner);
                 promise_control.resolve(js_value);
             }
-        }
-        else {
+        } else {
             mono_assert(false, () => `Unexpected type ${MarshalerType[type]}`);
         }
         mono_wasm_release_cs_owned_object(js_handle);
@@ -335,8 +338,17 @@ function create_task_holder(res_converter?: MarshalerToJs) {
     return holder;
 }
 
-export function mono_wasm_resolve_or_reject_promise(args: JSMarshalerArguments): void {
+export function mono_wasm_resolve_or_reject_promise (args: JSMarshalerArguments): void {
+    // rejection/resolution should not arrive earlier than the promise created by marshaling in mono_wasm_invoke_jsimport_MT
+    invoke_later_when_on_ui_thread_async(() => mono_wasm_resolve_or_reject_promise_impl(args));
+}
+export function mono_wasm_resolve_or_reject_promise_impl (args: JSMarshalerArguments): void {
+    if (!loaderHelpers.is_runtime_running()) {
+        mono_log_debug("This promise resolution/rejection can't be propagated to managed code, mono runtime already exited.");
+        return;
+    }
     const exc = get_arg(args, 0);
+    const receiver_should_free = WasmEnableThreads && is_receiver_should_free(args);
     try {
         loaderHelpers.assert_runtime_running();
 
@@ -351,24 +363,23 @@ export function mono_wasm_resolve_or_reject_promise(args: JSMarshalerArguments):
         mono_assert(holder, () => `Cannot find Promise for JSHandle ${js_handle}`);
 
         holder.resolve_or_reject(type, js_handle, arg_value);
-        if (WasmEnableThreads && get_arg_b8(res)) {
+        if (receiver_should_free) {
             // this works together with AllocHGlobal in JSFunctionBinding.ResolveOrRejectPromise
             Module._free(args as any);
-        }
-        else {
+        } else {
             set_arg_type(res, MarshalerType.Void);
             set_arg_type(exc, MarshalerType.None);
         }
 
     } catch (ex: any) {
-        if (WasmEnableThreads) {
+        if (receiver_should_free) {
             mono_assert(false, () => `Failed to resolve or reject promise ${ex}`);
         }
         marshal_exception_to_cs(exc, ex);
     }
 }
 
-export function marshal_string_to_js(arg: JSMarshalerArgument): string | null {
+export function marshal_string_to_js (arg: JSMarshalerArgument): string | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -379,8 +390,8 @@ export function marshal_string_to_js(arg: JSMarshalerArgument): string | null {
         const value = utf16ToString(<any>buffer, <any>buffer + len);
         Module._free(buffer as any);
         return value;
-    }
-    else {
+    } else {
+        mono_assert(!WasmEnableThreads, "Marshaling strings by reference is not supported in multithreaded mode");
         const root = get_string_root(arg);
         try {
             const value = monoStringToString(root);
@@ -391,7 +402,7 @@ export function marshal_string_to_js(arg: JSMarshalerArgument): string | null {
     }
 }
 
-export function marshal_exception_to_js(arg: JSMarshalerArgument): Error | null {
+export function marshal_exception_to_js (arg: JSMarshalerArgument): Error | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -422,7 +433,7 @@ export function marshal_exception_to_js(arg: JSMarshalerArgument): Error | null
     return result;
 }
 
-function _marshal_js_object_to_js(arg: JSMarshalerArgument): any {
+function _marshal_js_object_to_js (arg: JSMarshalerArgument): any {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -433,7 +444,7 @@ function _marshal_js_object_to_js(arg: JSMarshalerArgument): any {
     return js_obj;
 }
 
-function _marshal_cs_object_to_js(arg: JSMarshalerArgument): any {
+function _marshal_cs_object_to_js (arg: JSMarshalerArgument): any {
     const marshaler_type = get_arg_type(arg);
     if (marshaler_type == MarshalerType.None) {
         return null;
@@ -476,12 +487,12 @@ function _marshal_cs_object_to_js(arg: JSMarshalerArgument): any {
     return converter(arg);
 }
 
-function _marshal_array_to_js(arg: JSMarshalerArgument, element_type?: MarshalerType): Array<any> | TypedArray | null {
+function _marshal_array_to_js (arg: JSMarshalerArgument, element_type?: MarshalerType): Array<any> | TypedArray | null {
     mono_assert(!!element_type, "Expected valid element_type parameter");
     return _marshal_array_to_js_impl(arg, element_type);
 }
 
-function _marshal_array_to_js_impl(arg: JSMarshalerArgument, element_type: MarshalerType): Array<any> | TypedArray | null {
+function _marshal_array_to_js_impl (arg: JSMarshalerArgument, element_type: MarshalerType): Array<any> | TypedArray | null {
     const type = get_arg_type(arg);
     if (type == MarshalerType.None) {
         return null;
@@ -498,46 +509,42 @@ function _marshal_array_to_js_impl(arg: JSMarshalerArgument, element_type: Marsh
             result[index] = marshal_string_to_js(element_arg);
         }
         if (!WasmEnableJsInteropByValue) {
+            mono_assert(!WasmEnableThreads, "Marshaling string by reference is not supported in multithreaded mode");
             cwraps.mono_wasm_deregister_root(<any>buffer_ptr);
         }
-    }
-    else if (element_type == MarshalerType.Object) {
+    } else if (element_type == MarshalerType.Object) {
         result = new Array(length);
         for (let index = 0; index < length; index++) {
             const element_arg = get_arg(<any>buffer_ptr, index);
             result[index] = _marshal_cs_object_to_js(element_arg);
         }
         if (!WasmEnableJsInteropByValue) {
+            mono_assert(!WasmEnableThreads, "Marshaling objects by reference is not supported in multithreaded mode");
             cwraps.mono_wasm_deregister_root(<any>buffer_ptr);
         }
-    }
-    else if (element_type == MarshalerType.JSObject) {
+    } else if (element_type == MarshalerType.JSObject) {
         result = new Array(length);
         for (let index = 0; index < length; index++) {
             const element_arg = get_arg(<any>buffer_ptr, index);
             result[index] = _marshal_js_object_to_js(element_arg);
         }
-    }
-    else if (element_type == MarshalerType.Byte) {
+    } else if (element_type == MarshalerType.Byte) {
         const sourceView = localHeapViewU8().subarray(<any>buffer_ptr, buffer_ptr + length);
         result = sourceView.slice();//copy
-    }
-    else if (element_type == MarshalerType.Int32) {
+    } else if (element_type == MarshalerType.Int32) {
         const sourceView = localHeapViewI32().subarray(buffer_ptr >> 2, (buffer_ptr >> 2) + length);
         result = sourceView.slice();//copy
-    }
-    else if (element_type == MarshalerType.Double) {
+    } else if (element_type == MarshalerType.Double) {
         const sourceView = localHeapViewF64().subarray(buffer_ptr >> 3, (buffer_ptr >> 3) + length);
         result = sourceView.slice();//copy
-    }
-    else {
+    } else {
         throw new Error(`NotImplementedException ${MarshalerType[element_type]}. ${jsinteropDoc}`);
     }
     Module._free(<any>buffer_ptr);
     return result;
 }
 
-function _marshal_span_to_js(arg: JSMarshalerArgument, element_type?: MarshalerType): Span {
+function _marshal_span_to_js (arg: JSMarshalerArgument, element_type?: MarshalerType): Span {
     mono_assert(!!element_type, "Expected valid element_type parameter");
 
     const buffer_ptr = get_arg_intptr(arg);
@@ -545,20 +552,17 @@ function _marshal_span_to_js(arg: JSMarshalerArgument, element_type?: MarshalerT
     let result: Span | null = null;
     if (element_type == MarshalerType.Byte) {
         result = new Span(<any>buffer_ptr, length, MemoryViewType.Byte);
-    }
-    else if (element_type == MarshalerType.Int32) {
+    } else if (element_type == MarshalerType.Int32) {
         result = new Span(<any>buffer_ptr, length, MemoryViewType.Int32);
-    }
-    else if (element_type == MarshalerType.Double) {
+    } else if (element_type == MarshalerType.Double) {
         result = new Span(<any>buffer_ptr, length, MemoryViewType.Double);
-    }
-    else {
+    } else {
         throw new Error(`NotImplementedException ${MarshalerType[element_type]}. ${jsinteropDoc}`);
     }
     return result;
 }
 
-function _marshal_array_segment_to_js(arg: JSMarshalerArgument, element_type?: MarshalerType): ArraySegment {
+function _marshal_array_segment_to_js (arg: JSMarshalerArgument, element_type?: MarshalerType): ArraySegment {
     mono_assert(!!element_type, "Expected valid element_type parameter");
 
     const buffer_ptr = get_arg_intptr(arg);
@@ -566,14 +570,11 @@ function _marshal_array_segment_to_js(arg: JSMarshalerArgument, element_type?: M
     let result: ArraySegment | null = null;
     if (element_type == MarshalerType.Byte) {
         result = new ArraySegment(<any>buffer_ptr, length, MemoryViewType.Byte);
-    }
-    else if (element_type == MarshalerType.Int32) {
+    } else if (element_type == MarshalerType.Int32) {
         result = new ArraySegment(<any>buffer_ptr, length, MemoryViewType.Int32);
-    }
-    else if (element_type == MarshalerType.Double) {
+    } else if (element_type == MarshalerType.Double) {
         result = new ArraySegment(<any>buffer_ptr, length, MemoryViewType.Double);
-    }
-    else {
+    } else {
         throw new Error(`NotImplementedException ${MarshalerType[element_type]}. ${jsinteropDoc}`);
     }
     const gc_handle = get_arg_gc_handle(arg);
diff --git a/src/mono/browser/runtime/marshal.ts b/src/mono/browser/runtime/marshal.ts
index 7af38977bff1..c0ece106f53b 100644
--- a/src/mono/browser/runtime/marshal.ts
+++ b/src/mono/browser/runtime/marshal.ts
@@ -5,11 +5,12 @@ import WasmEnableThreads from "consts:wasmEnableThreads";
 
 import { js_owned_gc_handle_symbol, teardown_managed_proxy } from "./gc-handles";
 import { Module, loaderHelpers, mono_assert, runtimeHelpers } from "./globals";
-import { getF32, getF64, getI16, getI32, getI64Big, getU16, getU32, getU8, setF32, setF64, setI16, setI32, setI64Big, setU16, setU32, setU8, localHeapViewF64, localHeapViewI32, localHeapViewU8, _zero_region } from "./memory";
+import { getF32, getF64, getI16, getI32, getI64Big, getU16, getU32, getU8, setF32, setF64, setI16, setI32, setI64Big, setU16, setU32, setU8, localHeapViewF64, localHeapViewI32, localHeapViewU8, _zero_region, forceThreadMemoryViewRefresh, setB8, getB8 } from "./memory";
 import { mono_wasm_new_external_root } from "./roots";
-import { GCHandle, JSHandle, MonoObject, MonoString, GCHandleNull, JSMarshalerArguments, JSFunctionSignature, JSMarshalerType, JSMarshalerArgument, MarshalerToJs, MarshalerToCs, WasmRoot, MarshalerType } from "./types/internal";
+import { GCHandle, JSHandle, MonoObject, MonoString, GCHandleNull, JSMarshalerArguments, JSFunctionSignature, JSMarshalerType, JSMarshalerArgument, MarshalerToJs, MarshalerToCs, WasmRoot, MarshalerType, PThreadPtr, PThreadPtrNull, VoidPtrNull } from "./types/internal";
 import { TypedArray, VoidPtr } from "./types/emscripten";
 import { utf16ToString } from "./strings";
+import { get_managed_stack_trace } from "./managed-exports";
 
 export const cs_to_js_marshalers = new Map<MarshalerType, MarshalerToJs>();
 export const js_to_cs_marshalers = new Map<MarshalerType, MarshalerToCs>();
@@ -19,10 +20,54 @@ export const imported_js_function_symbol = Symbol.for("wasm imported_js_function
 export const proxy_debug_symbol = Symbol.for("wasm proxy_debug");
 
 export const JavaScriptMarshalerArgSize = 32;
+// keep in sync with JSMarshalerArgumentImpl offsets
+const enum JSMarshalerArgumentOffsets {
+    BooleanValue = 0,
+    ByteValue = 0,
+    CharValue = 0,
+    Int16Value = 0,
+    Int32Value = 0,
+    Int64Value = 0,
+    SingleValue = 0,
+    DoubleValue = 0,
+    IntPtrValue = 0,
+    JSHandle = 4,
+    GCHandle = 4,
+    Length = 8,
+    Type = 12,
+    ElementType = 13,
+    ContextHandle = 16,
+    ReceiverShouldFree = 20,
+    CallerNativeTID = 24,
+    SyncDoneSemaphorePtr = 28,
+}
 export const JSMarshalerTypeSize = 32;
+// keep in sync with JSFunctionBinding.JSBindingType
+const enum JSBindingTypeOffsets {
+    Type = 0,
+    ResultMarshalerType = 16,
+    Arg1MarshalerType = 20,
+    Arg2MarshalerType = 24,
+    Arg3MarshalerType = 28,
+}
 export const JSMarshalerSignatureHeaderSize = 4 * 8; // without Exception and Result
-
-export function alloc_stack_frame(size: number): JSMarshalerArguments {
+// keep in sync with JSFunctionBinding.JSBindingHeader
+const enum JSBindingHeaderOffsets {
+    Version = 0,
+    ArgumentCount = 4,
+    ImportHandle = 8,
+    FunctionNameOffset = 16,
+    FunctionNameLength = 20,
+    ModuleNameOffset = 24,
+    ModuleNameLength = 28,
+    Exception = 32,
+    Result = 64,
+}
+
+export function alloc_stack_frame (size: number): JSMarshalerArguments {
+    if (WasmEnableThreads) {
+        forceThreadMemoryViewRefresh();
+    }
     const bytes = JavaScriptMarshalerArgSize * size;
     const args = Module.stackAlloc(bytes) as any;
     _zero_region(args, bytes);
@@ -30,18 +75,41 @@ export function alloc_stack_frame(size: number): JSMarshalerArguments {
     return args;
 }
 
-export function get_arg(args: JSMarshalerArguments, index: number): JSMarshalerArgument {
+export function get_arg (args: JSMarshalerArguments, index: number): JSMarshalerArgument {
     mono_assert(args, "Null args");
     return <any>args + (index * JavaScriptMarshalerArgSize);
 }
 
-export function is_args_exception(args: JSMarshalerArguments): boolean {
+export function is_args_exception (args: JSMarshalerArguments): boolean {
     mono_assert(args, "Null args");
     const exceptionType = get_arg_type(<any>args);
     return exceptionType !== MarshalerType.None;
 }
 
-export function set_args_context(args: JSMarshalerArguments): void {
+export function is_receiver_should_free (args: JSMarshalerArguments): boolean {
+    if (!WasmEnableThreads) return false;
+    mono_assert(args, "Null args");
+    return getB8(<any>args + JSMarshalerArgumentOffsets.ReceiverShouldFree);
+}
+
+export function get_sync_done_semaphore_ptr (args: JSMarshalerArguments): VoidPtr {
+    if (!WasmEnableThreads) return VoidPtrNull;
+    mono_assert(args, "Null args");
+    return getI32(<any>args + JSMarshalerArgumentOffsets.SyncDoneSemaphorePtr) as any;
+}
+
+export function get_caller_native_tid (args: JSMarshalerArguments): PThreadPtr {
+    if (!WasmEnableThreads) return PThreadPtrNull;
+    mono_assert(args, "Null args");
+    return getI32(<any>args + JSMarshalerArgumentOffsets.CallerNativeTID) as any;
+}
+
+export function set_receiver_should_free (args: JSMarshalerArguments): void {
+    mono_assert(args, "Null args");
+    setB8(<any>args + JSMarshalerArgumentOffsets.ReceiverShouldFree, true);
+}
+
+export function set_args_context (args: JSMarshalerArguments): void {
     if (!WasmEnableThreads) return;
     mono_assert(args, "Null args");
     const exc = get_arg(args, 0);
@@ -50,257 +118,257 @@ export function set_args_context(args: JSMarshalerArguments): void {
     set_arg_proxy_context(res);
 }
 
-export function get_sig(signature: JSFunctionSignature, index: number): JSMarshalerType {
+export function get_sig (signature: JSFunctionSignature, index: number): JSMarshalerType {
     mono_assert(signature, "Null signatures");
     return <any>signature + (index * JSMarshalerTypeSize) + JSMarshalerSignatureHeaderSize;
 }
 
-export function get_signature_type(sig: JSMarshalerType): MarshalerType {
+export function get_signature_type (sig: JSMarshalerType): MarshalerType {
     mono_assert(sig, "Null sig");
-    return <any>getU8(sig);
+    return <any>getU8(<any>sig + JSBindingTypeOffsets.Type);
 }
 
-export function get_signature_res_type(sig: JSMarshalerType): MarshalerType {
+export function get_signature_res_type (sig: JSMarshalerType): MarshalerType {
     mono_assert(sig, "Null sig");
-    return <any>getU8(<any>sig + 16);
+    return <any>getU8(<any>sig + JSBindingTypeOffsets.ResultMarshalerType);
 }
 
-export function get_signature_arg1_type(sig: JSMarshalerType): MarshalerType {
+export function get_signature_arg1_type (sig: JSMarshalerType): MarshalerType {
     mono_assert(sig, "Null sig");
-    return <any>getU8(<any>sig + 20);
+    return <any>getU8(<any>sig + JSBindingTypeOffsets.Arg1MarshalerType);
 }
 
-export function get_signature_arg2_type(sig: JSMarshalerType): MarshalerType {
+export function get_signature_arg2_type (sig: JSMarshalerType): MarshalerType {
     mono_assert(sig, "Null sig");
-    return <any>getU8(<any>sig + 24);
+    return <any>getU8(<any>sig + JSBindingTypeOffsets.Arg2MarshalerType);
 }
 
-export function get_signature_arg3_type(sig: JSMarshalerType): MarshalerType {
+export function get_signature_arg3_type (sig: JSMarshalerType): MarshalerType {
     mono_assert(sig, "Null sig");
-    return <any>getU8(<any>sig + 28);
+    return <any>getU8(<any>sig + JSBindingTypeOffsets.Arg2MarshalerType);
 }
 
-export function get_signature_argument_count(signature: JSFunctionSignature): number {
+export function get_signature_argument_count (signature: JSFunctionSignature): number {
     mono_assert(signature, "Null signatures");
-    return <any>getI32(<any>signature + 4);
+    return <any>getI32(<any>signature + JSBindingHeaderOffsets.ArgumentCount);
 }
 
-export function get_signature_version(signature: JSFunctionSignature): number {
+export function get_signature_version (signature: JSFunctionSignature): number {
     mono_assert(signature, "Null signatures");
-    return <any>getI32(signature);
+    return <any>getI32(<any>signature + JSBindingHeaderOffsets.Version);
 }
 
-export function get_signature_handle(signature: JSFunctionSignature): number {
+export function get_signature_handle (signature: JSFunctionSignature): number {
     mono_assert(signature, "Null signatures");
-    return <any>getI32(<any>signature + 8);
+    return <any>getI32(<any>signature + JSBindingHeaderOffsets.ImportHandle);
 }
 
-export function get_signature_function_name(signature: JSFunctionSignature): string | null {
+export function get_signature_function_name (signature: JSFunctionSignature): string | null {
     mono_assert(signature, "Null signatures");
-    const functionNameOffset = <any>getI32(<any>signature + 16);
+    const functionNameOffset = <any>getI32(<any>signature + JSBindingHeaderOffsets.FunctionNameOffset);
     if (functionNameOffset === 0) return null;
-    const functionNameLength = <any>getI32(<any>signature + 20);
+    const functionNameLength = <any>getI32(<any>signature + JSBindingHeaderOffsets.FunctionNameLength);
     mono_assert(functionNameOffset, "Null name");
     return utf16ToString(<any>signature + functionNameOffset, <any>signature + functionNameOffset + functionNameLength);
 }
 
-export function get_signature_module_name(signature: JSFunctionSignature): string | null {
+export function get_signature_module_name (signature: JSFunctionSignature): string | null {
     mono_assert(signature, "Null signatures");
-    const moduleNameOffset = <any>getI32(<any>signature + 24);
+    const moduleNameOffset = <any>getI32(<any>signature + JSBindingHeaderOffsets.ModuleNameOffset);
     if (moduleNameOffset === 0) return null;
-    const moduleNameLength = <any>getI32(<any>signature + 28);
+    const moduleNameLength = <any>getI32(<any>signature + JSBindingHeaderOffsets.ModuleNameLength);
     return utf16ToString(<any>signature + moduleNameOffset, <any>signature + moduleNameOffset + moduleNameLength);
 }
 
-export function get_sig_type(sig: JSMarshalerType): MarshalerType {
+export function get_sig_type (sig: JSMarshalerType): MarshalerType {
     mono_assert(sig, "Null signatures");
     return <any>getU8(sig);
 }
 
-export function get_arg_type(arg: JSMarshalerArgument): MarshalerType {
+export function get_arg_type (arg: JSMarshalerArgument): MarshalerType {
     mono_assert(arg, "Null arg");
-    const type = getU8(<any>arg + 12);
+    const type = getU8(<any>arg + JSMarshalerArgumentOffsets.Type);
     return <any>type;
 }
 
-export function get_arg_element_type(arg: JSMarshalerArgument): MarshalerType {
+export function get_arg_element_type (arg: JSMarshalerArgument): MarshalerType {
     mono_assert(arg, "Null arg");
-    const type = getU8(<any>arg + 13);
+    const type = getU8(<any>arg + JSMarshalerArgumentOffsets.ElementType);
     return <any>type;
 }
 
-export function set_arg_type(arg: JSMarshalerArgument, type: MarshalerType): void {
+export function set_arg_type (arg: JSMarshalerArgument, type: MarshalerType): void {
     mono_assert(arg, "Null arg");
-    setU8(<any>arg + 12, type);
+    setU8(<any>arg + JSMarshalerArgumentOffsets.Type, type);
 }
 
-export function set_arg_element_type(arg: JSMarshalerArgument, type: MarshalerType): void {
+export function set_arg_element_type (arg: JSMarshalerArgument, type: MarshalerType): void {
     mono_assert(arg, "Null arg");
-    setU8(<any>arg + 13, type);
+    setU8(<any>arg + JSMarshalerArgumentOffsets.ElementType, type);
 }
 
-export function get_arg_b8(arg: JSMarshalerArgument): boolean {
+export function get_arg_bool (arg: JSMarshalerArgument): boolean {
     mono_assert(arg, "Null arg");
-    return !!getU8(<any>arg);
+    return getB8(<any>arg);
 }
 
-export function get_arg_u8(arg: JSMarshalerArgument): number {
+export function get_arg_u8 (arg: JSMarshalerArgument): number {
     mono_assert(arg, "Null arg");
     return getU8(<any>arg);
 }
 
-export function get_arg_u16(arg: JSMarshalerArgument): number {
+export function get_arg_u16 (arg: JSMarshalerArgument): number {
     mono_assert(arg, "Null arg");
     return getU16(<any>arg);
 }
 
-export function get_arg_i16(arg: JSMarshalerArgument): number {
+export function get_arg_i16 (arg: JSMarshalerArgument): number {
     mono_assert(arg, "Null arg");
     return getI16(<any>arg);
 }
 
-export function get_arg_i32(arg: JSMarshalerArgument): number {
+export function get_arg_i32 (arg: JSMarshalerArgument): number {
     mono_assert(arg, "Null arg");
     return getI32(<any>arg);
 }
 
-export function get_arg_intptr(arg: JSMarshalerArgument): number {
+export function get_arg_intptr (arg: JSMarshalerArgument): number {
     mono_assert(arg, "Null arg");
     return getI32(<any>arg);
 }
 
-export function get_arg_i52(arg: JSMarshalerArgument): number {
+export function get_arg_i52 (arg: JSMarshalerArgument): number {
     mono_assert(arg, "Null arg");
     // we know that the range check and conversion from Int64 was be done on C# side
     return getF64(<any>arg);
 }
 
-export function get_arg_i64_big(arg: JSMarshalerArgument): bigint {
+export function get_arg_i64_big (arg: JSMarshalerArgument): bigint {
     mono_assert(arg, "Null arg");
     return getI64Big(<any>arg);
 }
 
-export function get_arg_date(arg: JSMarshalerArgument): Date {
+export function get_arg_date (arg: JSMarshalerArgument): Date {
     mono_assert(arg, "Null arg");
     const unixTime = getF64(<any>arg);
     const date = new Date(unixTime);
     return date;
 }
 
-export function get_arg_f32(arg: JSMarshalerArgument): number {
+export function get_arg_f32 (arg: JSMarshalerArgument): number {
     mono_assert(arg, "Null arg");
     return getF32(<any>arg);
 }
 
-export function get_arg_f64(arg: JSMarshalerArgument): number {
+export function get_arg_f64 (arg: JSMarshalerArgument): number {
     mono_assert(arg, "Null arg");
     return getF64(<any>arg);
 }
 
-export function set_arg_b8(arg: JSMarshalerArgument, value: boolean): void {
+export function set_arg_bool (arg: JSMarshalerArgument, value: boolean): void {
     mono_assert(arg, "Null arg");
     mono_check(typeof value === "boolean", () => `Value is not a Boolean: ${value} (${typeof (value)})`);
-    setU8(<any>arg, value ? 1 : 0);
+    setB8(<any>arg, value);
 }
 
-export function set_arg_u8(arg: JSMarshalerArgument, value: number): void {
+export function set_arg_u8 (arg: JSMarshalerArgument, value: number): void {
     mono_assert(arg, "Null arg");
     setU8(<any>arg, value);
 }
 
-export function set_arg_u16(arg: JSMarshalerArgument, value: number): void {
+export function set_arg_u16 (arg: JSMarshalerArgument, value: number): void {
     mono_assert(arg, "Null arg");
     setU16(<any>arg, value);
 }
 
-export function set_arg_i16(arg: JSMarshalerArgument, value: number): void {
+export function set_arg_i16 (arg: JSMarshalerArgument, value: number): void {
     mono_assert(arg, "Null arg");
     setI16(<any>arg, value);
 }
 
-export function set_arg_i32(arg: JSMarshalerArgument, value: number): void {
+export function set_arg_i32 (arg: JSMarshalerArgument, value: number): void {
     mono_assert(arg, "Null arg");
     setI32(<any>arg, value);
 }
 
-export function set_arg_intptr(arg: JSMarshalerArgument, value: VoidPtr): void {
+export function set_arg_intptr (arg: JSMarshalerArgument, value: VoidPtr): void {
     mono_assert(arg, "Null arg");
     setI32(<any>arg, <any>value);
 }
 
-export function set_arg_i52(arg: JSMarshalerArgument, value: number): void {
+export function set_arg_i52 (arg: JSMarshalerArgument, value: number): void {
     mono_assert(arg, "Null arg");
     mono_check(Number.isSafeInteger(value), () => `Value is not an integer: ${value} (${typeof (value)})`);
     // we know that conversion to Int64 would be done on C# side
     setF64(<any>arg, value);
 }
 
-export function set_arg_i64_big(arg: JSMarshalerArgument, value: bigint): void {
+export function set_arg_i64_big (arg: JSMarshalerArgument, value: bigint): void {
     mono_assert(arg, "Null arg");
     setI64Big(<any>arg, value);
 }
 
-export function set_arg_date(arg: JSMarshalerArgument, value: Date): void {
+export function set_arg_date (arg: JSMarshalerArgument, value: Date): void {
     mono_assert(arg, "Null arg");
     // getTime() is always UTC
     const unixTime = value.getTime();
     setF64(<any>arg, unixTime);
 }
 
-export function set_arg_f64(arg: JSMarshalerArgument, value: number): void {
+export function set_arg_f64 (arg: JSMarshalerArgument, value: number): void {
     mono_assert(arg, "Null arg");
     setF64(<any>arg, value);
 }
 
-export function set_arg_f32(arg: JSMarshalerArgument, value: number): void {
+export function set_arg_f32 (arg: JSMarshalerArgument, value: number): void {
     mono_assert(arg, "Null arg");
     setF32(<any>arg, value);
 }
 
-export function get_arg_js_handle(arg: JSMarshalerArgument): JSHandle {
+export function get_arg_js_handle (arg: JSMarshalerArgument): JSHandle {
     mono_assert(arg, "Null arg");
-    return <any>getI32(<any>arg + 4);
+    return <any>getI32(<any>arg + JSMarshalerArgumentOffsets.JSHandle);
 }
 
-export function set_arg_proxy_context(arg: JSMarshalerArgument): void {
+export function set_arg_proxy_context (arg: JSMarshalerArgument): void {
     if (!WasmEnableThreads) return;
     mono_assert(arg, "Null arg");
-    setI32(<any>arg + 16, <any>runtimeHelpers.proxy_context_gc_handle);
+    setI32(<any>arg + JSMarshalerArgumentOffsets.ContextHandle, <any>runtimeHelpers.proxyGCHandle);
 }
 
-export function set_js_handle(arg: JSMarshalerArgument, jsHandle: JSHandle): void {
+export function set_js_handle (arg: JSMarshalerArgument, jsHandle: JSHandle): void {
     mono_assert(arg, "Null arg");
-    setI32(<any>arg + 4, <any>jsHandle);
+    setI32(<any>arg + JSMarshalerArgumentOffsets.JSHandle, <any>jsHandle);
     set_arg_proxy_context(arg);
 }
 
-export function get_arg_gc_handle(arg: JSMarshalerArgument): GCHandle {
+export function get_arg_gc_handle (arg: JSMarshalerArgument): GCHandle {
     mono_assert(arg, "Null arg");
-    return <any>getI32(<any>arg + 4);
+    return <any>getI32(<any>arg + JSMarshalerArgumentOffsets.GCHandle);
 }
 
-export function set_gc_handle(arg: JSMarshalerArgument, gcHandle: GCHandle): void {
+export function set_gc_handle (arg: JSMarshalerArgument, gcHandle: GCHandle): void {
     mono_assert(arg, "Null arg");
-    setI32(<any>arg + 4, <any>gcHandle);
+    setI32(<any>arg + JSMarshalerArgumentOffsets.GCHandle, <any>gcHandle);
     set_arg_proxy_context(arg);
 }
 
-export function get_string_root(arg: JSMarshalerArgument): WasmRoot<MonoString> {
+export function get_string_root (arg: JSMarshalerArgument): WasmRoot<MonoString> {
     mono_assert(arg, "Null arg");
     return mono_wasm_new_external_root<MonoString>(<any>arg);
 }
 
-export function get_arg_length(arg: JSMarshalerArgument): number {
+export function get_arg_length (arg: JSMarshalerArgument): number {
     mono_assert(arg, "Null arg");
-    return <any>getI32(<any>arg + 8);
+    return <any>getI32(<any>arg + JSMarshalerArgumentOffsets.Length);
 }
 
-export function set_arg_length(arg: JSMarshalerArgument, size: number): void {
+export function set_arg_length (arg: JSMarshalerArgument, size: number): void {
     mono_assert(arg, "Null arg");
-    setI32(<any>arg + 8, size);
+    setI32(<any>arg + JSMarshalerArgumentOffsets.Length, size);
 }
 
-export function set_root(arg: JSMarshalerArgument, root: WasmRoot<MonoObject>): void {
+export function set_root (arg: JSMarshalerArgument, root: WasmRoot<MonoObject>): void {
     mono_assert(arg, "Null arg");
     setU32(<any>arg + 0, root.get_address());
 }
@@ -311,15 +379,15 @@ export interface IDisposable {
 }
 
 export class ManagedObject implements IDisposable {
-    dispose(): void {
+    dispose (): void {
         teardown_managed_proxy(this, GCHandleNull);
     }
 
-    get isDisposed(): boolean {
+    get isDisposed (): boolean {
         return (<any>this)[js_owned_gc_handle_symbol] === GCHandleNull;
     }
 
-    toString(): string {
+    toString (): string {
         return `CsObject(gc_handle: ${(<any>this)[js_owned_gc_handle_symbol]})`;
     }
 }
@@ -327,7 +395,7 @@ export class ManagedObject implements IDisposable {
 export class ManagedError extends Error implements IDisposable {
     private superStack: any;
     private managed_stack: any;
-    constructor(message: string) {
+    constructor (message: string) {
         super(message);
         this.superStack = Object.getOwnPropertyDescriptor(this, "stack"); // this works on Chrome
         Object.defineProperty(this, "stack", {
@@ -335,7 +403,7 @@ export class ManagedError extends Error implements IDisposable {
         });
     }
 
-    getSuperStack() {
+    getSuperStack () {
         if (this.superStack) {
             if (this.superStack.value !== undefined)
                 return this.superStack.value;
@@ -345,7 +413,7 @@ export class ManagedError extends Error implements IDisposable {
         return super.stack; // this works on FF
     }
 
-    getManageStack() {
+    getManageStack () {
         if (this.managed_stack) {
             return this.managed_stack;
         }
@@ -353,10 +421,10 @@ export class ManagedError extends Error implements IDisposable {
             this.managed_stack = "... omitted managed stack trace.\n" + this.getSuperStack();
             return this.managed_stack;
         }
-        if (!WasmEnableThreads || runtimeHelpers.proxy_context_gc_handle) {
+        if (!WasmEnableThreads || runtimeHelpers.proxyGCHandle) {
             const gc_handle = (<any>this)[js_owned_gc_handle_symbol];
             if (gc_handle !== GCHandleNull) {
-                const managed_stack = runtimeHelpers.javaScriptExports.get_managed_stack_trace(gc_handle);
+                const managed_stack = get_managed_stack_trace(gc_handle);
                 if (managed_stack) {
                     this.managed_stack = managed_stack + "\n" + this.getSuperStack();
                     return this.managed_stack;
@@ -366,23 +434,23 @@ export class ManagedError extends Error implements IDisposable {
         return this.getSuperStack();
     }
 
-    dispose(): void {
+    dispose (): void {
         teardown_managed_proxy(this, GCHandleNull);
     }
 
-    get isDisposed(): boolean {
+    get isDisposed (): boolean {
         return (<any>this)[js_owned_gc_handle_symbol] === GCHandleNull;
     }
 }
 
-export function get_signature_marshaler(signature: JSFunctionSignature, index: number): JSHandle {
+export function get_signature_marshaler (signature: JSFunctionSignature, index: number): JSHandle {
     mono_assert(signature, "Null signatures");
     const sig = get_sig(signature, index);
-    return <any>getU32(<any>sig + 8);
+    return <any>getU32(<any>sig + JSBindingHeaderOffsets.ImportHandle);
 }
 
 
-export function array_element_size(element_type: MarshalerType): number {
+export function array_element_size (element_type: MarshalerType): number {
     return element_type == MarshalerType.Byte ? 1
         : element_type == MarshalerType.Int32 ? 4
             : element_type == MarshalerType.Int52 ? 8
@@ -400,13 +468,13 @@ export const enum MemoryViewType {
 }
 
 abstract class MemoryView implements IMemoryView {
-    protected constructor(public _pointer: VoidPtr, public _length: number, public _viewType: MemoryViewType) {
+    protected constructor (public _pointer: VoidPtr, public _length: number, public _viewType: MemoryViewType) {
     }
 
     abstract dispose(): void;
     abstract get isDisposed(): boolean;
 
-    _unsafe_create_view(): TypedArray {
+    _unsafe_create_view (): TypedArray {
         // this view must be short lived so that it doesn't fail after wasm memory growth
         // for that reason we also don't give the view out to end user and provide set/slice/copyTo API instead
         const view = this._viewType == MemoryViewType.Byte ? new Uint8Array(localHeapViewU8().buffer, <any>this._pointer, this._length)
@@ -417,7 +485,7 @@ abstract class MemoryView implements IMemoryView {
         return view;
     }
 
-    set(source: TypedArray, targetOffset?: number): void {
+    set (source: TypedArray, targetOffset?: number): void {
         mono_check(!this.isDisposed, "ObjectDisposedException");
         const targetView = this._unsafe_create_view();
         mono_check(source && targetView && source.constructor === targetView.constructor, () => `Expected ${targetView.constructor}`);
@@ -425,7 +493,7 @@ abstract class MemoryView implements IMemoryView {
         // TODO consider memory write barrier
     }
 
-    copyTo(target: TypedArray, sourceOffset?: number): void {
+    copyTo (target: TypedArray, sourceOffset?: number): void {
         mono_check(!this.isDisposed, "ObjectDisposedException");
         const sourceView = this._unsafe_create_view();
         mono_check(target && sourceView && target.constructor === sourceView.constructor, () => `Expected ${sourceView.constructor}`);
@@ -434,19 +502,19 @@ abstract class MemoryView implements IMemoryView {
         target.set(trimmedSource);
     }
 
-    slice(start?: number, end?: number): TypedArray {
+    slice (start?: number, end?: number): TypedArray {
         mono_check(!this.isDisposed, "ObjectDisposedException");
         const sourceView = this._unsafe_create_view();
         // TODO consider memory read barrier
         return sourceView.slice(start, end);
     }
 
-    get length(): number {
+    get length (): number {
         mono_check(!this.isDisposed, "ObjectDisposedException");
         return this._length;
     }
 
-    get byteLength(): number {
+    get byteLength (): number {
         mono_check(!this.isDisposed, "ObjectDisposedException");
         return this._viewType == MemoryViewType.Byte ? this._length
             : this._viewType == MemoryViewType.Int32 ? this._length << 2
@@ -478,27 +546,27 @@ export interface IMemoryView extends IDisposable {
 
 export class Span extends MemoryView {
     private is_disposed = false;
-    public constructor(pointer: VoidPtr, length: number, viewType: MemoryViewType) {
+    public constructor (pointer: VoidPtr, length: number, viewType: MemoryViewType) {
         super(pointer, length, viewType);
     }
-    dispose(): void {
+    dispose (): void {
         this.is_disposed = true;
     }
-    get isDisposed(): boolean {
+    get isDisposed (): boolean {
         return this.is_disposed;
     }
 }
 
 export class ArraySegment extends MemoryView {
-    public constructor(pointer: VoidPtr, length: number, viewType: MemoryViewType) {
+    public constructor (pointer: VoidPtr, length: number, viewType: MemoryViewType) {
         super(pointer, length, viewType);
     }
 
-    dispose(): void {
+    dispose (): void {
         teardown_managed_proxy(this, GCHandleNull);
     }
 
-    get isDisposed(): boolean {
+    get isDisposed (): boolean {
         return (<any>this)[js_owned_gc_handle_symbol] === GCHandleNull;
     }
 }
diff --git a/src/mono/browser/runtime/memory.ts b/src/mono/browser/runtime/memory.ts
index 0ae04672797c..8205e6fd8098 100644
--- a/src/mono/browser/runtime/memory.ts
+++ b/src/mono/browser/runtime/memory.ts
@@ -6,14 +6,15 @@ import WasmEnableThreads from "consts:wasmEnableThreads";
 import { MemOffset, NumberOrPointer } from "./types/internal";
 import { VoidPtr, CharPtr } from "./types/emscripten";
 import cwraps, { I52Error } from "./cwraps";
-import { Module, runtimeHelpers } from "./globals";
+import { Module, mono_assert, runtimeHelpers } from "./globals";
 import { utf8ToString } from "./strings";
+import { mono_log_warn } from "./logging";
 
 const alloca_stack: Array<VoidPtr> = [];
 const alloca_buffer_size = 32 * 1024;
 let alloca_base: VoidPtr, alloca_offset: VoidPtr, alloca_limit: VoidPtr;
 
-function _ensure_allocated(): void {
+function _ensure_allocated (): void {
     if (alloca_base)
         return;
     alloca_base = Module._malloc(alloca_buffer_size);
@@ -24,7 +25,7 @@ function _ensure_allocated(): void {
 const max_int64_big = BigInt("9223372036854775807");
 const min_int64_big = BigInt("-9223372036854775808");
 
-export function temp_malloc(size: number): VoidPtr {
+export function temp_malloc (size: number): VoidPtr {
     _ensure_allocated();
     if (!alloca_stack.length)
         throw new Error("No temp frames have been created at this point");
@@ -36,12 +37,12 @@ export function temp_malloc(size: number): VoidPtr {
     return result;
 }
 
-export function _create_temp_frame(): void {
+export function _create_temp_frame (): void {
     _ensure_allocated();
     alloca_stack.push(alloca_offset);
 }
 
-export function _release_temp_frame(): void {
+export function _release_temp_frame (): void {
     if (!alloca_stack.length)
         throw new Error("No temp frames have been created at this point");
 
@@ -49,16 +50,17 @@ export function _release_temp_frame(): void {
 }
 
 
-function assert_int_in_range(value: Number, min: Number, max: Number) {
+function assert_int_in_range (value: Number, min: Number, max: Number) {
     mono_check(Number.isSafeInteger(value), () => `Value is not an integer: ${value} (${typeof (value)})`);
     mono_check(value >= min && value <= max, () => `Overflow: value ${value} is out of ${min} ${max} range`);
 }
 
-export function _zero_region(byteOffset: VoidPtr, sizeBytes: number): void {
+export function _zero_region (byteOffset: VoidPtr, sizeBytes: number): void {
     localHeapViewU8().fill(0, <any>byteOffset, <any>byteOffset + sizeBytes);
 }
 
-export function setB32(offset: MemOffset, value: number | boolean): void {
+/** note: MonoBoolean is 8 bits not 32 bits when inside a structure or array */
+export function setB32 (offset: MemOffset, value: number | boolean): void {
     receiveWorkerHeapViews();
     const boolValue = !!value;
     if (typeof (value) === "number")
@@ -66,64 +68,72 @@ export function setB32(offset: MemOffset, value: number | boolean): void {
     Module.HEAP32[<any>offset >>> 2] = boolValue ? 1 : 0;
 }
 
-export function setU8(offset: MemOffset, value: number): void {
+export function setB8 (offset: MemOffset, value: number | boolean): void {
+    const boolValue = !!value;
+    if (typeof (value) === "number")
+        assert_int_in_range(value, 0, 1);
+    receiveWorkerHeapViews();
+    Module.HEAPU8[<any>offset] = boolValue ? 1 : 0;
+}
+
+export function setU8 (offset: MemOffset, value: number): void {
     assert_int_in_range(value, 0, 0xFF);
     receiveWorkerHeapViews();
     Module.HEAPU8[<any>offset] = value;
 }
 
-export function setU16(offset: MemOffset, value: number): void {
+export function setU16 (offset: MemOffset, value: number): void {
     assert_int_in_range(value, 0, 0xFFFF);
     receiveWorkerHeapViews();
     Module.HEAPU16[<any>offset >>> 1] = value;
 }
 
 // does not check for growable heap
-export function setU16_local(localView: Uint16Array, offset: MemOffset, value: number): void {
+export function setU16_local (localView: Uint16Array, offset: MemOffset, value: number): void {
     assert_int_in_range(value, 0, 0xFFFF);
     localView[<any>offset >>> 1] = value;
 }
 
 // does not check for overflow nor growable heap
-export function setU16_unchecked(offset: MemOffset, value: number): void {
+export function setU16_unchecked (offset: MemOffset, value: number): void {
     Module.HEAPU16[<any>offset >>> 1] = value;
 }
 
 // does not check for overflow nor growable heap
-export function setU32_unchecked(offset: MemOffset, value: NumberOrPointer): void {
+export function setU32_unchecked (offset: MemOffset, value: NumberOrPointer): void {
     Module.HEAPU32[<any>offset >>> 2] = <number><any>value;
 }
 
-export function setU32(offset: MemOffset, value: NumberOrPointer): void {
+export function setU32 (offset: MemOffset, value: NumberOrPointer): void {
     assert_int_in_range(<any>value, 0, 0xFFFF_FFFF);
     receiveWorkerHeapViews();
     Module.HEAPU32[<any>offset >>> 2] = <number><any>value;
 }
 
-export function setI8(offset: MemOffset, value: number): void {
+export function setI8 (offset: MemOffset, value: number): void {
     assert_int_in_range(value, -0x80, 0x7F);
     receiveWorkerHeapViews();
     Module.HEAP8[<any>offset] = value;
 }
 
-export function setI16(offset: MemOffset, value: number): void {
+export function setI16 (offset: MemOffset, value: number): void {
     assert_int_in_range(value, -0x8000, 0x7FFF);
     receiveWorkerHeapViews();
     Module.HEAP16[<any>offset >>> 1] = value;
 }
 
-export function setI32_unchecked(offset: MemOffset, value: number): void {
+export function setI32_unchecked (offset: MemOffset, value: number): void {
     receiveWorkerHeapViews();
     Module.HEAP32[<any>offset >>> 2] = value;
 }
 
-export function setI32(offset: MemOffset, value: number): void {
+export function setI32 (offset: MemOffset, value: number): void {
     assert_int_in_range(<any>value, -0x8000_0000, 0x7FFF_FFFF);
     receiveWorkerHeapViews();
     Module.HEAP32[<any>offset >>> 2] = value;
 }
 
-function autoThrowI52(error: I52Error) {
+function autoThrowI52 (error: I52Error) {
     if (error === I52Error.NONE)
         return;
 
@@ -140,7 +150,7 @@ function autoThrowI52(error: I52Error) {
 /**
  * Throws for values which are not 52 bit integer. See Number.isSafeInteger()
  */
-export function setI52(offset: MemOffset, value: number): void {
+export function setI52 (offset: MemOffset, value: number): void {
     mono_check(Number.isSafeInteger(value), () => `Value is not a safe integer: ${value} (${typeof (value)})`);
     receiveWorkerHeapViews();
     const error = cwraps.mono_wasm_f64_to_i52(<any>offset, value);
@@ -150,7 +160,7 @@ export function setI52(offset: MemOffset, value: number): void {
 /**
  * Throws for values which are not 52 bit integer or are negative. See Number.isSafeInteger().
  */
-export function setU52(offset: MemOffset, value: number): void {
+export function setU52 (offset: MemOffset, value: number): void {
     mono_check(Number.isSafeInteger(value), () => `Value is not a safe integer: ${value} (${typeof (value)})`);
     mono_check(value >= 0, "Can't convert negative Number into UInt64");
     receiveWorkerHeapViews();
@@ -158,101 +168,112 @@ export function setU52(offset: MemOffset, value: number): void {
     autoThrowI52(error);
 }
 
-export function setI64Big(offset: MemOffset, value: bigint): void {
+export function setI64Big (offset: MemOffset, value: bigint): void {
     mono_check(typeof value === "bigint", () => `Value is not an bigint: ${value} (${typeof (value)})`);
     mono_check(value >= min_int64_big && value <= max_int64_big, () => `Overflow: value ${value} is out of ${min_int64_big} ${max_int64_big} range`);
 
     Module.HEAP64[<any>offset >>> 3] = value;
 }
 
-export function setF32(offset: MemOffset, value: number): void {
+export function setF32 (offset: MemOffset, value: number): void {
     mono_check(typeof value === "number", () => `Value is not a Number: ${value} (${typeof (value)})`);
     receiveWorkerHeapViews();
     Module.HEAPF32[<any>offset >>> 2] = value;
 }
 
-export function setF64(offset: MemOffset, value: number): void {
+export function setF64 (offset: MemOffset, value: number): void {
     mono_check(typeof value === "number", () => `Value is not a Number: ${value} (${typeof (value)})`);
     receiveWorkerHeapViews();
     Module.HEAPF64[<any>offset >>> 3] = value;
 }
 
+let warnDirtyBool = true;
+
+export function getB32 (offset: MemOffset): boolean {
+    receiveWorkerHeapViews();
+    const value = (Module.HEAPU32[<any>offset >>> 2]);
+    if (value > 1 && warnDirtyBool) {
+        warnDirtyBool = false;
+        mono_log_warn(`getB32: value at ${offset} is not a boolean, but a number: ${value}`);
+    }
+    return !!value;
+}
 
-export function getB32(offset: MemOffset): boolean {
+export function getB8 (offset: MemOffset): boolean {
     receiveWorkerHeapViews();
-    return !!(Module.HEAP32[<any>offset >>> 2]);
+    return !!(Module.HEAPU8[<any>offset]);
 }
 
-export function getU8(offset: MemOffset): number {
+export function getU8 (offset: MemOffset): number {
     receiveWorkerHeapViews();
     return Module.HEAPU8[<any>offset];
 }
 
-export function getU16(offset: MemOffset): number {
+export function getU16 (offset: MemOffset): number {
     receiveWorkerHeapViews();
     return Module.HEAPU16[<any>offset >>> 1];
 }
 
 // does not check for growable heap
-export function getU16_local(localView: Uint16Array, offset: MemOffset): number {
+export function getU16_local (localView: Uint16Array, offset: MemOffset): number {
     return localView[<any>offset >>> 1];
 }
 
-export function getU32(offset: MemOffset): number {
+export function getU32 (offset: MemOffset): number {
     receiveWorkerHeapViews();
     return Module.HEAPU32[<any>offset >>> 2];
 }
 
 // does not check for growable heap
-export function getU32_local(localView: Uint32Array, offset: MemOffset): number {
+export function getU32_local (localView: Uint32Array, offset: MemOffset): number {
     return localView[<any>offset >>> 2];
 }
 
-export function getI32_unaligned(offset: MemOffset): number {
+export function getI32_unaligned (offset: MemOffset): number {
     return cwraps.mono_wasm_get_i32_unaligned(<any>offset);
 }
 
-export function getU32_unaligned(offset: MemOffset): number {
+export function getU32_unaligned (offset: MemOffset): number {
     return cwraps.mono_wasm_get_i32_unaligned(<any>offset) >>> 0;
 }
 
-export function getF32_unaligned(offset: MemOffset): number {
+export function getF32_unaligned (offset: MemOffset): number {
     return cwraps.mono_wasm_get_f32_unaligned(<any>offset);
 }
 
-export function getF64_unaligned(offset: MemOffset): number {
+export function getF64_unaligned (offset: MemOffset): number {
     return cwraps.mono_wasm_get_f64_unaligned(<any>offset);
 }
 
-export function getI8(offset: MemOffset): number {
+export function getI8 (offset: MemOffset): number {
     receiveWorkerHeapViews();
     return Module.HEAP8[<any>offset];
 }
 
-export function getI16(offset: MemOffset): number {
+export function getI16 (offset: MemOffset): number {
     receiveWorkerHeapViews();
     return Module.HEAP16[<any>offset >>> 1];
 }
 
 // does not check for growable heap
-export function getI16_local(localView: Int16Array, offset: MemOffset): number {
+export function getI16_local (localView: Int16Array, offset: MemOffset): number {
     return localView[<any>offset >>> 1];
 }
 
-export function getI32(offset: MemOffset): number {
+export function getI32 (offset: MemOffset): number {
     receiveWorkerHeapViews();
     return Module.HEAP32[<any>offset >>> 2];
 }
 
 // does not check for growable heap
-export function getI32_local(localView: Int32Array, offset: MemOffset): number {
+export function getI32_local (localView: Int32Array, offset: MemOffset): number {
     return localView[<any>offset >>> 2];
 }
 
 /**
  * Throws for Number.MIN_SAFE_INTEGER > value > Number.MAX_SAFE_INTEGER
  */
-export function getI52(offset: MemOffset): number {
+export function getI52 (offset: MemOffset): number {
     const result = cwraps.mono_wasm_i52_to_f64(<any>offset, runtimeHelpers._i52_error_scratch_buffer);
     const error = getI32(runtimeHelpers._i52_error_scratch_buffer);
     autoThrowI52(error);
@@ -262,24 +283,24 @@ export function getI52(offset: MemOffset): number {
 /**
  * Throws for 0 > value > Number.MAX_SAFE_INTEGER
  */
-export function getU52(offset: MemOffset): number {
+export function getU52 (offset: MemOffset): number {
     const result = cwraps.mono_wasm_u52_to_f64(<any>offset, runtimeHelpers._i52_error_scratch_buffer);
     const error = getI32(runtimeHelpers._i52_error_scratch_buffer);
     autoThrowI52(error);
     return result;
 }
 
-export function getI64Big(offset: MemOffset): bigint {
+export function getI64Big (offset: MemOffset): bigint {
     receiveWorkerHeapViews();
     return Module.HEAP64[<any>offset >>> 3];
 }
 
-export function getF32(offset: MemOffset): number {
+export function getF32 (offset: MemOffset): number {
     receiveWorkerHeapViews();
     return Module.HEAPF32[<any>offset >>> 2];
 }
 
-export function getF64(offset: MemOffset): number {
+export function getF64 (offset: MemOffset): number {
     receiveWorkerHeapViews();
     return Module.HEAPF64[<any>offset >>> 3];
 }
@@ -291,7 +312,7 @@ export function withStackAlloc<TResult>(bytesWanted: number, f: (ptr: VoidPtr) =
 export function withStackAlloc<T1, TResult>(bytesWanted: number, f: (ptr: VoidPtr, ud1: T1) => TResult, ud1: T1): TResult;
 export function withStackAlloc<T1, T2, TResult>(bytesWanted: number, f: (ptr: VoidPtr, ud1: T1, ud2: T2) => TResult, ud1: T1, ud2: T2): TResult;
 export function withStackAlloc<T1, T2, T3, TResult>(bytesWanted: number, f: (ptr: VoidPtr, ud1: T1, ud2: T2, ud3: T3) => TResult, ud1: T1, ud2: T2, ud3: T3): TResult;
-export function withStackAlloc<T1, T2, T3, TResult>(bytesWanted: number, f: (ptr: VoidPtr, ud1?: T1, ud2?: T2, ud3?: T3) => TResult, ud1?: T1, ud2?: T2, ud3?: T3): TResult {
+export function withStackAlloc<T1, T2, T3, TResult> (bytesWanted: number, f: (ptr: VoidPtr, ud1?: T1, ud2?: T2, ud3?: T3) => TResult, ud1?: T1, ud2?: T2, ud3?: T3): TResult {
     const sp = Module.stackSave();
     const ptr = Module.stackAlloc(bytesWanted);
     try {
@@ -303,14 +324,14 @@ export function withStackAlloc<T1, T2, T3, TResult>(bytesWanted: number, f: (ptr
 
 // @bytes must be a typed array. space is allocated for it in the native heap
 //  and it is copied to that location. returns the address of the allocation.
-export function mono_wasm_load_bytes_into_heap(bytes: Uint8Array): VoidPtr {
+export function mono_wasm_load_bytes_into_heap (bytes: Uint8Array): VoidPtr {
     const memoryOffset = Module._malloc(bytes.length);
     const heapBytes = new Uint8Array(localHeapViewU8().buffer, <any>memoryOffset, bytes.length);
     heapBytes.set(bytes);
     return memoryOffset;
 }
 
-export function getEnv(name: string): string | null {
+export function getEnv (name: string): string | null {
     let charPtr: CharPtr = <any>0;
     try {
         charPtr = cwraps.mono_wasm_getenv(name);
@@ -322,77 +343,92 @@ export function getEnv(name: string): string | null {
     }
 }
 
-const BuiltinAtomics = globalThis.Atomics;
-
-export const Atomics = WasmEnableThreads ? {
-    storeI32(offset: MemOffset, value: number): void {
-        BuiltinAtomics.store(localHeapViewI32(), <any>offset >>> 2, value);
-    },
-    notifyI32(offset: MemOffset, count: number): void {
-        BuiltinAtomics.notify(localHeapViewI32(), <any>offset >>> 2, count);
+export function compareExchangeI32 (offset: MemOffset, value: number, expected: number): number {
+    mono_assert((<any>offset & 3) === 0, () => `compareExchangeI32: offset must be 4-byte aligned, got ${offset}`);
+    if (!WasmEnableThreads) {
+        const actual = getI32(offset);
+        if (actual === expected) {
+            setI32(offset, value);
+        }
+        return actual;
     }
-} : {
-    storeI32: setI32,
-    notifyI32: () => { /*empty*/ }
-};
+    return globalThis.Atomics.compareExchange(localHeapViewI32(), <any>offset >>> 2, expected, value);
+}
+
+export function storeI32 (offset: MemOffset, value: number): void {
+    mono_assert((<any>offset & 3) === 0, () => `storeI32: offset must be 4-byte aligned, got ${offset}`);
+    if (!WasmEnableThreads) return setI32(offset, value);
+    globalThis.Atomics.store(localHeapViewI32(), <any>offset >>> 2, value);
+}
+
+export function notifyI32 (offset: MemOffset, count: number): void {
+    mono_assert((<any>offset & 3) === 0, () => `notifyI32: offset must be 4-byte aligned, got ${offset}`);
+    if (!WasmEnableThreads) return;
+    globalThis.Atomics.notify(localHeapViewI32(), <any>offset >>> 2, count);
+}
 
 // returns memory view which is valid within current synchronous call stack
-export function localHeapViewI8(): Int8Array {
+export function localHeapViewI8 (): Int8Array {
     receiveWorkerHeapViews();
     return Module.HEAP8;
 }
 
 // returns memory view which is valid within current synchronous call stack
-export function localHeapViewI16(): Int16Array {
+export function localHeapViewI16 (): Int16Array {
     receiveWorkerHeapViews();
     return Module.HEAP16;
 }
 
 // returns memory view which is valid within current synchronous call stack
-export function localHeapViewI32(): Int32Array {
+export function localHeapViewI32 (): Int32Array {
     receiveWorkerHeapViews();
     return Module.HEAP32;
 }
 
 // returns memory view which is valid within current synchronous call stack
-export function localHeapViewI64Big(): BigInt64Array {
+export function localHeapViewI64Big (): BigInt64Array {
     receiveWorkerHeapViews();
     return Module.HEAP64;
 }
 
 // returns memory view which is valid within current synchronous call stack
-export function localHeapViewU8(): Uint8Array {
+export function localHeapViewU8 (): Uint8Array {
     receiveWorkerHeapViews();
     return Module.HEAPU8;
 }
 
 // returns memory view which is valid within current synchronous call stack
-export function localHeapViewU16(): Uint16Array {
+export function localHeapViewU16 (): Uint16Array {
     receiveWorkerHeapViews();
     return Module.HEAPU16;
 }
 
 // returns memory view which is valid within current synchronous call stack
-export function localHeapViewU32(): Uint32Array {
+export function localHeapViewU32 (): Uint32Array {
     receiveWorkerHeapViews();
     return Module.HEAPU32;
 }
 
 // returns memory view which is valid within current synchronous call stack
-export function localHeapViewF32(): Float32Array {
+export function localHeapViewF32 (): Float32Array {
     receiveWorkerHeapViews();
     return Module.HEAPF32;
 }
 
 // returns memory view which is valid within current synchronous call stack
-export function localHeapViewF64(): Float64Array {
+export function localHeapViewF64 (): Float64Array {
     receiveWorkerHeapViews();
     return Module.HEAPF64;
 }
 
+export function copyBytes (srcPtr: VoidPtr, dstPtr: VoidPtr, bytes: number): void {
+    const heap = localHeapViewU8();
+    heap.copyWithin(dstPtr as any, srcPtr as any, srcPtr as any + bytes);
+}
+
 // when we run with multithreading enabled, we need to make sure that the memory views are updated on each worker
 // on non-MT build, this will be a no-op trimmed by rollup
-export function receiveWorkerHeapViews() {
+export function receiveWorkerHeapViews () {
     if (!WasmEnableThreads) return;
     const memory = runtimeHelpers.getMemory();
     if (memory.buffer !== Module.HEAPU8.buffer) {
@@ -401,7 +437,7 @@ export function receiveWorkerHeapViews() {
 }
 
 const sharedArrayBufferDefined = typeof SharedArrayBuffer !== "undefined";
-export function isSharedArrayBuffer(buffer: any): buffer is SharedArrayBuffer {
+export function isSharedArrayBuffer (buffer: any): buffer is SharedArrayBuffer {
     // this condition should be eliminated by rollup on non-threading builds
     if (!WasmEnableThreads) return false;
     // BEWARE: In some cases, `instanceof SharedArrayBuffer` returns false even though buffer is an SAB.
@@ -411,23 +447,23 @@ export function isSharedArrayBuffer(buffer: any): buffer is SharedArrayBuffer {
 }
 
 /*
-Problem: When WebWorker is suspended in the browser, the other running threads could `grow` the linear memory in the meantime. 
-After the thread is un-suspended C code may to try to de-reference pointer which is beyond it's known view. 
+Problem: When WebWorker is suspended in the browser, the other running threads could `grow` the linear memory in the meantime.
+After the thread is un-suspended C code may to try to de-reference pointer which is beyond it's known view.
 This is likely V8 bug. We don't have direct evidence, just failed debugger unit tests with MT runtime.
 */
-export function forceThreadMemoryViewRefresh() {
+export function forceThreadMemoryViewRefresh () {
     // this condition should be eliminated by rollup on non-threading builds and it would become empty method.
     if (!WasmEnableThreads) return;
 
     const wasmMemory = runtimeHelpers.getMemory();
 
     /*
-    Normally when wasm memory grows in v8, this size change is broadcast to other web workers via an 'interrupt', which works by setting a thread-local flag that needs to be checked. 
-    It's possible that at this point in execution the flag has not been checked yet (because this worker was suspended by the debugger in an unknown location), 
-    which means the size change has not taken effect in this worker. 
-    wasmMemory.grow's implementation in v8 checks to see whether other workers have already grown the buffer, 
-    and will update the current worker's knowledge of the buffer's size. 
-    After that we should be able to safely updateMemoryViews and get a correctly sized view. 
+    Normally when wasm memory grows in v8, this size change is broadcast to other web workers via an 'interrupt', which works by setting a thread-local flag that needs to be checked.
+    It's possible that at this point in execution the flag has not been checked yet (because this worker was suspended by the debugger in an unknown location),
+    which means the size change has not taken effect in this worker.
+    wasmMemory.grow's implementation in v8 checks to see whether other workers have already grown the buffer,
+    and will update the current worker's knowledge of the buffer's size.
+    After that we should be able to safely updateMemoryViews and get a correctly sized view.
     This only works because their implementation does not skip doing work even when you ask to grow by 0 pages.
     */
     wasmMemory.grow(0);
diff --git a/src/mono/browser/runtime/polyfills.ts b/src/mono/browser/runtime/polyfills.ts
index 7aae92566b34..8998dc37c32e 100644
--- a/src/mono/browser/runtime/polyfills.ts
+++ b/src/mono/browser/runtime/polyfills.ts
@@ -5,7 +5,8 @@ import WasmEnableThreads from "consts:wasmEnableThreads";
 import type { EmscriptenReplacements } from "./types/internal";
 import type { TypedArray } from "./types/emscripten";
 import { ENVIRONMENT_IS_NODE, ENVIRONMENT_IS_WORKER, INTERNAL, Module, loaderHelpers, runtimeHelpers } from "./globals";
-import { replaceEmscriptenPThreadLibrary } from "./pthreads/shared/emscripten-replacements";
+import { replaceEmscriptenTLSInit } from "./pthreads";
+import { replaceEmscriptenPThreadUI } from "./pthreads";
 
 const dummyPerformance = {
     now: function () {
@@ -13,7 +14,7 @@ const dummyPerformance = {
     }
 };
 
-export function initializeReplacements(replacements: EmscriptenReplacements): void {
+export function initializeReplacements (replacements: EmscriptenReplacements): void {
     // performance.now() is used by emscripten and doesn't work in JSC
     if (typeof globalThis.performance === "undefined") {
         globalThis.performance = dummyPerformance as any;
@@ -34,16 +35,20 @@ export function initializeReplacements(replacements: EmscriptenReplacements): vo
 
     // threads
     if (WasmEnableThreads && replacements.modulePThread) {
-        replaceEmscriptenPThreadLibrary(replacements.modulePThread);
+        if (ENVIRONMENT_IS_WORKER) {
+            replaceEmscriptenTLSInit(replacements.modulePThread);
+        } else {
+            replaceEmscriptenPThreadUI(replacements.modulePThread);
+        }
     }
 }
 
-export async function init_polyfills_async(): Promise<void> {
+export async function init_polyfills_async (): Promise<void> {
     // v8 shell doesn't have Event and EventTarget
     if (WasmEnableThreads && typeof globalThis.Event === "undefined") {
         globalThis.Event = class Event {
             readonly type: string;
-            constructor(type: string) {
+            constructor (type: string) {
                 this.type = type;
             }
         } as any;
@@ -51,7 +56,7 @@ export async function init_polyfills_async(): Promise<void> {
     if (WasmEnableThreads && typeof globalThis.EventTarget === "undefined") {
         globalThis.EventTarget = class EventTarget {
             private subscribers = new Map<string, Array<{ listener: EventListenerOrEventListenerObject, oneShot: boolean }>>();
-            addEventListener(type: string, listener: EventListenerOrEventListenerObject | null, options?: boolean | AddEventListenerOptions) {
+            addEventListener (type: string, listener: EventListenerOrEventListenerObject | null, options?: boolean | AddEventListenerOptions) {
                 if (listener === undefined || listener == null)
                     return;
                 let oneShot = false;
@@ -73,7 +78,7 @@ export async function init_polyfills_async(): Promise<void> {
                 }
                 listeners.push({ listener, oneShot });
             }
-            removeEventListener(type: string, listener: EventListenerOrEventListenerObject | null, options?: boolean | EventListenerOptions) {
+            removeEventListener (type: string, listener: EventListenerOrEventListenerObject | null, options?: boolean | EventListenerOptions) {
                 if (listener === undefined || listener == null)
                     return;
                 if (options !== undefined) {
@@ -97,7 +102,7 @@ export async function init_polyfills_async(): Promise<void> {
                     subscribers.splice(index, 1);
                 }
             }
-            dispatchEvent(event: Event) {
+            dispatchEvent (event: Event) {
                 if (!this.subscribers.has(event.type)) {
                     return true;
                 }
diff --git a/src/mono/browser/runtime/profiler.ts b/src/mono/browser/runtime/profiler.ts
index 8dce6e31fcba..a223a7fdbf4a 100644
--- a/src/mono/browser/runtime/profiler.ts
+++ b/src/mono/browser/runtime/profiler.ts
@@ -14,7 +14,7 @@ import { utf8ToString } from "./strings";
 // sendTo defaults to 'WebAssembly.Runtime::DumpAotProfileData'.
 // DumpAotProfileData stores the data into INTERNAL.aotProfileData.
 //
-export function mono_wasm_init_aot_profiler(options: AOTProfilerOptions): void {
+export function mono_wasm_init_aot_profiler (options: AOTProfilerOptions): void {
     mono_assert(runtimeHelpers.emscriptenBuildOptions.enableAotProfiler, "AOT profiler is not enabled, please use <WasmProfilers>aot;</WasmProfilers> in your project file.");
     if (options == null)
         options = {};
@@ -26,7 +26,7 @@ export function mono_wasm_init_aot_profiler(options: AOTProfilerOptions): void {
     cwraps.mono_wasm_profiler_init_aot(arg);
 }
 
-export function mono_wasm_init_browser_profiler(options: BrowserProfilerOptions): void {
+export function mono_wasm_init_browser_profiler (options: BrowserProfilerOptions): void {
     mono_assert(runtimeHelpers.emscriptenBuildOptions.enableBrowserProfiler, "Browser profiler is not enabled, please use <WasmProfilers>browser;</WasmProfilers> in your project file.");
     if (options == null)
         options = {};
@@ -59,14 +59,14 @@ export type TimeStamp = {
     __brand: "TimeStamp"
 }
 
-export function startMeasure(): TimeStamp {
+export function startMeasure (): TimeStamp {
     if (runtimeHelpers.enablePerfMeasure) {
         return globalThis.performance.now() as any;
     }
     return undefined as any;
 }
 
-export function endMeasure(start: TimeStamp, block: string, id?: string) {
+export function endMeasure (start: TimeStamp, block: string, id?: string) {
     if (runtimeHelpers.enablePerfMeasure && start) {
         const options = ENVIRONMENT_IS_WEB
             ? { start: start as any }
@@ -77,14 +77,14 @@ export function endMeasure(start: TimeStamp, block: string, id?: string) {
 }
 
 const stackFrames: number[] = [];
-export function mono_wasm_profiler_enter(): void {
+export function mono_wasm_profiler_enter (): void {
     if (runtimeHelpers.enablePerfMeasure) {
         stackFrames.push(globalThis.performance.now());
     }
 }
 
 const methodNames: Map<number, string> = new Map();
-export function mono_wasm_profiler_leave(method: MonoMethod): void {
+export function mono_wasm_profiler_leave (method: MonoMethod): void {
     if (runtimeHelpers.enablePerfMeasure) {
         const start = stackFrames.pop();
         const options = ENVIRONMENT_IS_WEB
diff --git a/src/mono/browser/runtime/pthreads/README.md b/src/mono/browser/runtime/pthreads/README.md
index 34f3508988cc..757cc73a85e4 100644
--- a/src/mono/browser/runtime/pthreads/README.md
+++ b/src/mono/browser/runtime/pthreads/README.md
@@ -17,11 +17,11 @@ On the other hand, pthreads in native code have a peer relationship: any two thr
 
 ## Main thread API
 
-In the main thread, `pthreads/browser` provides a `getThread` function that returns a `{ pthread_ptr: pthread_ptr, worker: Worker, port: MessagePort }` object that can be used to communicate with the worker thread.
+In the main thread, `pthreads/ui-thread` provides a `getThread` function that returns a `{ pthread_ptr: pthread_ptr, worker: Worker, port: MessagePort }` object that can be used to communicate with the worker thread.
 
 ## Worker thread API
 
-In the worker threads, `pthread/worker` provides `currentWorkerThreadEvents` which is an [`EventTarget`](https://developer.mozilla.org/en-US/docs/Web/API/EventTarget) that fires `'dotnet:pthread:created'` and `'dotnet:pthread:attached'` events when a pthread is started on the worker, and when that pthread attaches to the Mono runtime. A good place to add event listeners is in `mono_wasm_pthread_worker_init` in `startup.ts`.
+In the worker threads, `pthread/worker-*` provides `currentWorkerThreadEvents` which is an [`EventTarget`](https://developer.mozilla.org/en-US/docs/Web/API/EventTarget) that fires `'dotnet:pthread:created'` and `'dotnet:pthread:attached'` events when a pthread is started on the worker, and when that pthread attaches to the Mono runtime. A good place to add event listeners is in `mono_wasm_pthread_worker_init` in `startup.ts`.
 The events have a `portToMain` property which is a dotnet-specific `MessagePort` for posting messages to the main thread and for listening for messages from the main thread.
 
 ## Implementation
diff --git a/src/mono/browser/runtime/pthreads/browser/index.ts b/src/mono/browser/runtime/pthreads/browser/index.ts
deleted file mode 100644
index e1ea4961a846..000000000000
--- a/src/mono/browser/runtime/pthreads/browser/index.ts
+++ /dev/null
@@ -1,206 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-import WasmEnableThreads from "consts:wasmEnableThreads";
-
-import { MonoWorkerToMainMessage, PThreadInfo, pthreadPtr } from "../shared/types";
-import { MonoThreadMessage } from "../shared";
-import { PThreadWorker, allocateUnusedWorker, getRunningWorkers, getUnusedWorkerPool, getWorker, loadWasmModuleToWorker } from "../shared/emscripten-internals";
-import { createPromiseController, mono_assert, runtimeHelpers } from "../../globals";
-import { MainToWorkerMessageType, PromiseAndController, PromiseController, WorkerToMainMessageType, monoMessageSymbol } from "../../types/internal";
-import { mono_log_info } from "../../logging";
-import { monoThreadInfo } from "../worker";
-
-const threadPromises: Map<pthreadPtr, PromiseController<Thread>[]> = new Map();
-
-export interface Thread {
-    readonly pthreadPtr: pthreadPtr;
-    readonly port: MessagePort;
-    postMessageToWorker<T extends MonoThreadMessage>(message: T): void;
-}
-
-class ThreadImpl implements Thread {
-    constructor(readonly pthreadPtr: pthreadPtr, readonly worker: Worker, readonly port: MessagePort) { }
-    postMessageToWorker<T extends MonoThreadMessage>(message: T): void {
-        this.port.postMessage(message);
-    }
-}
-
-/// wait until the thread with the given id has set up a message port to the runtime
-export function waitForThread(pthreadPtr: pthreadPtr): Promise<Thread> {
-    if (!WasmEnableThreads) return null as any;
-    const worker = getWorker(pthreadPtr);
-    if (worker?.thread) {
-        return Promise.resolve(worker?.thread);
-    }
-    const promiseAndController = createPromiseController<Thread>();
-    const arr = threadPromises.get(pthreadPtr);
-    if (arr === undefined) {
-        threadPromises.set(pthreadPtr, [promiseAndController.promise_control]);
-    } else {
-        arr.push(promiseAndController.promise_control);
-    }
-    return promiseAndController.promise;
-}
-
-export function resolveThreadPromises(pthreadPtr: pthreadPtr, thread?: Thread): void {
-    if (!WasmEnableThreads) return;
-    const arr = threadPromises.get(pthreadPtr);
-    if (arr !== undefined) {
-        arr.forEach((controller) => {
-            if (thread) {
-                controller.resolve(thread);
-            } else {
-                controller.reject();
-            }
-        });
-        threadPromises.delete(pthreadPtr);
-    }
-}
-
-// handler that runs in the main thread when a message is received from a pthread worker
-function monoWorkerMessageHandler(worker: PThreadWorker, ev: MessageEvent<any>): void {
-    if (!WasmEnableThreads) return;
-    let pthreadId: pthreadPtr;
-    // this is emscripten message
-    if (ev.data.cmd === "killThread") {
-        pthreadId = ev.data["thread"];
-        mono_assert(pthreadId == worker.info.pthreadId, "expected pthreadId to match");
-        worker.info.isRunning = false;
-        worker.info.pthreadId = 0;
-        return;
-    }
-
-    const message = ev.data[monoMessageSymbol] as MonoWorkerToMainMessage;
-    if (message === undefined) {
-        /// N.B. important to ignore messages we don't recognize - Emscripten uses the message event to send internal messages
-        return;
-    }
-
-    let port: MessagePort;
-    let thread: Thread;
-    pthreadId = message.info?.pthreadId ?? 0;
-
-    switch (message.monoCmd) {
-        case WorkerToMainMessageType.preload:
-            // this one shot port from setupPreloadChannelToMainThread
-            port = message.port!;
-            port.postMessage({
-                type: "pthread",
-                cmd: MainToWorkerMessageType.applyConfig,
-                config: JSON.stringify(runtimeHelpers.config)
-            });
-            port.close();
-            break;
-        case WorkerToMainMessageType.pthreadCreated:
-            port = message.port!;
-            thread = new ThreadImpl(pthreadId, worker, port);
-            worker.thread = thread;
-            worker.info.isRunning = true;
-            resolveThreadPromises(pthreadId, thread);
-        // fall through
-        case WorkerToMainMessageType.monoRegistered:
-        case WorkerToMainMessageType.monoAttached:
-        case WorkerToMainMessageType.enabledInterop:
-        case WorkerToMainMessageType.monoUnRegistered:
-        case WorkerToMainMessageType.updateInfo:
-            worker.info = Object.assign(worker.info!, message.info, {});
-            break;
-        default:
-            throw new Error(`Unhandled message from worker: ${message.monoCmd}`);
-    }
-}
-
-let pendingWorkerLoad: PromiseAndController<void> | undefined;
-
-/// Called by Emscripten internals on the browser thread when a new pthread worker is created and added to the pthread worker pool.
-/// At this point the worker doesn't have any pthread assigned to it, yet.
-export function onWorkerLoadInitiated(worker: PThreadWorker, loaded: Promise<Worker>): void {
-    if (!WasmEnableThreads) return;
-    worker.addEventListener("message", (ev) => monoWorkerMessageHandler(worker, ev));
-    if (pendingWorkerLoad == undefined) {
-        pendingWorkerLoad = createPromiseController<void>();
-    }
-    loaded.then(() => {
-        worker.info.isLoaded = true;
-        if (pendingWorkerLoad != undefined) {
-            pendingWorkerLoad.promise_control.resolve();
-            pendingWorkerLoad = undefined;
-        }
-    });
-}
-
-export function thread_available(): Promise<void> {
-    if (!WasmEnableThreads) return null as any;
-    if (pendingWorkerLoad == undefined) {
-        return Promise.resolve();
-    }
-    return pendingWorkerLoad.promise;
-}
-
-/// We call on the main thread this during startup to pre-allocate a pool of pthread workers.
-/// At this point asset resolution needs to be working (ie we loaded MonoConfig).
-/// This is used instead of the Emscripten PThread.initMainThread because we call it later.
-export function preAllocatePThreadWorkerPool(pthreadPoolSize: number): void {
-    if (!WasmEnableThreads) return;
-    for (let i = 0; i < pthreadPoolSize; i++) {
-        allocateUnusedWorker();
-    }
-}
-
-/// We call this on the main thread during startup once we fetched WasmModule.
-/// This sends a message to each pre-allocated worker to load the WasmModule and dotnet.js and to set up
-/// message handling.
-/// This is used instead of the Emscripten "receiveInstance" in "createWasm" because that code is
-/// conditioned on a non-zero PTHREAD_POOL_SIZE (but we set it to 0 to avoid early worker allocation).
-export async function instantiateWasmPThreadWorkerPool(): Promise<void> {
-    if (!WasmEnableThreads) return null as any;
-    // this is largely copied from emscripten's "receiveInstance" in "createWasm" in "src/preamble.js"
-    const workers = getUnusedWorkerPool();
-    if (workers.length > 0) {
-        const promises = workers.map(loadWasmModuleToWorker);
-        await Promise.all(promises);
-    }
-}
-
-// when we create threads with browser event loop, it's not able to be joined by mono's thread join during shutdown and blocks process exit
-export function cancelThreads() {
-    const workers: PThreadWorker[] = getRunningWorkers();
-    for (const worker of workers) {
-        if (worker.info.isExternalEventLoop) {
-            worker.postMessage({ cmd: "cancel" });
-        }
-    }
-}
-
-export function dumpThreads(): void {
-    if (!WasmEnableThreads) return;
-    mono_log_info("Dumping web worker info as seen by UI thread, it could be stale: ");
-    const emptyInfo = {
-        pthreadId: 0,
-        threadPrefix: "          -    ",
-        threadName: "????",
-        isRunning: false,
-        isAttached: false,
-        isExternalEventLoop: false,
-        reuseCount: 0,
-    };
-    const threadInfos: PThreadInfo[] = [
-        Object.assign({}, emptyInfo, monoThreadInfo), // UI thread
-    ];
-    for (const worker of getRunningWorkers()) {
-        threadInfos.push(Object.assign({}, emptyInfo, worker.info));
-    }
-    for (const worker of getUnusedWorkerPool()) {
-        threadInfos.push(Object.assign({}, emptyInfo, worker.info));
-    }
-    threadInfos.forEach((info, i) => {
-        const idx = (i + "").padStart(2, "0");
-        const isRunning = (info.isRunning + "").padStart(5, " ");
-        const isAttached = (info.isAttached + "").padStart(5, " ");
-        const isEventLoop = (info.isExternalEventLoop + "").padStart(5, " ");
-        const reuseCount = (info.reuseCount + "").padStart(3, " ");
-        // eslint-disable-next-line no-console
-        console.info(`${idx} | ${info.threadPrefix}: isRunning:${isRunning} isAttached:${isAttached} isEventLoop:${isEventLoop} reuseCount:${reuseCount} - ${info.threadName}`);
-    });
-}
diff --git a/src/mono/browser/runtime/pthreads/deputy-thread.ts b/src/mono/browser/runtime/pthreads/deputy-thread.ts
new file mode 100644
index 000000000000..4b514b28a4ae
--- /dev/null
+++ b/src/mono/browser/runtime/pthreads/deputy-thread.ts
@@ -0,0 +1,60 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+import WasmEnableThreads from "consts:wasmEnableThreads";
+import BuildConfiguration from "consts:configuration";
+
+import { mono_log_error, mono_log_info } from "../logging";
+import { monoThreadInfo, postMessageToMain, update_thread_info } from "./shared";
+import { Module, loaderHelpers, runtimeHelpers } from "../globals";
+import { start_runtime } from "../startup";
+import { WorkerToMainMessageType } from "../types/internal";
+import { forceThreadMemoryViewRefresh } from "../memory";
+
+export function mono_wasm_start_deputy_thread_async () {
+    if (!WasmEnableThreads) return;
+
+    if (BuildConfiguration === "Debug" && globalThis.setInterval) globalThis.setInterval(() => {
+        mono_log_info("Deputy thread is alive!");
+    }, 3000);
+
+    try {
+        monoThreadInfo.isDeputy = true;
+        monoThreadInfo.threadName = "Managed Main Deputy";
+        update_thread_info();
+        postMessageToMain({
+            monoCmd: WorkerToMainMessageType.deputyCreated,
+            info: monoThreadInfo,
+        });
+        Module.runtimeKeepalivePush();
+        Module.safeSetTimeout(async () => {
+            try {
+                forceThreadMemoryViewRefresh();
+
+                await start_runtime();
+
+                postMessageToMain({
+                    monoCmd: WorkerToMainMessageType.deputyStarted,
+                    info: monoThreadInfo,
+                    deputyProxyGCHandle: runtimeHelpers.proxyGCHandle,
+                });
+            } catch (err) {
+                postMessageToMain({
+                    monoCmd: WorkerToMainMessageType.deputyFailed,
+                    info: monoThreadInfo,
+                    error: "mono_wasm_start_deputy_thread_async() failed" + err,
+                });
+                mono_log_error("mono_wasm_start_deputy_thread_async() failed", err);
+                loaderHelpers.mono_exit(1, err);
+                throw err;
+            }
+        }, 0);
+    } catch (err) {
+        mono_log_error("mono_wasm_start_deputy_thread_async() failed", err);
+        loaderHelpers.mono_exit(1, err);
+        throw err;
+    }
+
+    // same as emscripten_exit_with_live_runtime()
+    throw "unwind";
+}
diff --git a/src/mono/browser/runtime/pthreads/index.ts b/src/mono/browser/runtime/pthreads/index.ts
new file mode 100644
index 000000000000..3678d709d105
--- /dev/null
+++ b/src/mono/browser/runtime/pthreads/index.ts
@@ -0,0 +1,28 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+import { mono_log_warn } from "../logging";
+import { utf16ToString } from "../strings";
+
+export {
+    mono_wasm_main_thread_ptr, mono_wasm_install_js_worker_interop, mono_wasm_uninstall_js_worker_interop,
+    mono_wasm_pthread_ptr, update_thread_info, isMonoThreadMessage, monoThreadInfo,
+} from "./shared";
+export {
+    mono_wasm_dump_threads, cancelThreads, is_thread_available,
+    populateEmscriptenPool, mono_wasm_init_threads, init_finalizer_thread,
+    waitForThread, replaceEmscriptenPThreadUI
+} from "./ui-thread";
+export {
+    mono_wasm_pthread_on_pthread_attached, mono_wasm_pthread_on_pthread_unregistered,
+    mono_wasm_pthread_on_pthread_registered, mono_wasm_pthread_set_name, currentWorkerThreadEvents,
+    dotnetPthreadCreated, initWorkerThreadEvents, replaceEmscriptenTLSInit, pthread_self
+} from "./worker-thread";
+
+export { mono_wasm_start_deputy_thread_async } from "./deputy-thread";
+export { mono_wasm_start_io_thread_async } from "./io-thread";
+
+export function mono_wasm_warn_about_blocking_wait (ptr: number, length: number) {
+    const warning = utf16ToString(ptr, ptr + (length * 2));
+    mono_log_warn(warning);
+}
diff --git a/src/mono/browser/runtime/pthreads/io-thread.ts b/src/mono/browser/runtime/pthreads/io-thread.ts
new file mode 100644
index 000000000000..011ac7b304fc
--- /dev/null
+++ b/src/mono/browser/runtime/pthreads/io-thread.ts
@@ -0,0 +1,39 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+import WasmEnableThreads from "consts:wasmEnableThreads";
+import BuildConfiguration from "consts:configuration";
+
+import { mono_log_error, mono_log_info } from "../logging";
+import { monoThreadInfo, postMessageToMain, update_thread_info } from "./shared";
+import { Module, loaderHelpers } from "../globals";
+import { WorkerToMainMessageType } from "../types/internal";
+import { threads_c_functions as tcwraps } from "../cwraps";
+
+export function mono_wasm_start_io_thread_async () {
+    if (!WasmEnableThreads) return;
+
+
+    if (BuildConfiguration === "Debug" && globalThis.setInterval) globalThis.setInterval(() => {
+        mono_log_info("I/O thread is alive!");
+    }, 3000);
+
+    try {
+        monoThreadInfo.isIo = true;
+        monoThreadInfo.threadName = "JS I/O Thread";
+        update_thread_info();
+        tcwraps.mono_wasm_register_io_thread();
+        postMessageToMain({
+            monoCmd: WorkerToMainMessageType.ioStarted,
+            info: monoThreadInfo,
+        });
+        Module.runtimeKeepalivePush();
+    } catch (err) {
+        mono_log_error("mono_wasm_start_io_thread_async() failed", err);
+        loaderHelpers.mono_exit(1, err);
+        throw err;
+    }
+
+    // same as emscripten_exit_with_live_runtime()
+    throw "unwind";
+}
diff --git a/src/mono/browser/runtime/pthreads/shared.ts b/src/mono/browser/runtime/pthreads/shared.ts
new file mode 100644
index 000000000000..f72804fbcf87
--- /dev/null
+++ b/src/mono/browser/runtime/pthreads/shared.ts
@@ -0,0 +1,153 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+import WasmEnableThreads from "consts:wasmEnableThreads";
+import BuildConfiguration from "consts:configuration";
+
+import type { GCHandle, MonoThreadMessage, PThreadInfo, PThreadPtr } from "../types/internal";
+
+import { ENVIRONMENT_IS_PTHREAD, Module, loaderHelpers, mono_assert, runtimeHelpers } from "../globals";
+import { set_thread_prefix } from "../logging";
+import { bindings_init } from "../startup";
+import { forceDisposeProxies } from "../gc-handles";
+import { monoMessageSymbol, GCHandleNull, PThreadPtrNull, WorkerToMainMessageType } from "../types/internal";
+import { threads_c_functions as tcwraps } from "../cwraps";
+import { forceThreadMemoryViewRefresh } from "../memory";
+
+// A duplicate in loader/assets.ts
+export const worker_empty_prefix = "          -    ";
+
+const monoThreadInfoPartial: Partial<PThreadInfo> = {
+    pthreadId: PThreadPtrNull,
+    reuseCount: 0,
+    updateCount: 0,
+    threadPrefix: worker_empty_prefix,
+    threadName: "emscripten-loaded",
+};
+export const monoThreadInfo: PThreadInfo = monoThreadInfoPartial as PThreadInfo;
+
+export function isMonoThreadMessage (x: unknown): x is MonoThreadMessage {
+    if (typeof (x) !== "object" || x === null) {
+        return false;
+    }
+    const xmsg = x as MonoThreadMessage;
+    return typeof (xmsg.type) === "string" && typeof (xmsg.cmd) === "string";
+}
+
+export function mono_wasm_install_js_worker_interop (context_gc_handle: GCHandle): void {
+    if (!WasmEnableThreads) return;
+    bindings_init();
+    mono_assert(!runtimeHelpers.proxyGCHandle, "JS interop should not be already installed on this worker.");
+    runtimeHelpers.proxyGCHandle = context_gc_handle;
+    if (ENVIRONMENT_IS_PTHREAD) {
+        runtimeHelpers.managedThreadTID = runtimeHelpers.currentThreadTID;
+        runtimeHelpers.isManagedRunningOnCurrentThread = true;
+    }
+    Module.runtimeKeepalivePush();
+    monoThreadInfo.isDirtyBecauseOfInterop = true;
+    update_thread_info();
+    if (ENVIRONMENT_IS_PTHREAD) {
+        postMessageToMain({
+            monoCmd: WorkerToMainMessageType.enabledInterop,
+            info: monoThreadInfo,
+        });
+    }
+}
+
+export function mono_wasm_uninstall_js_worker_interop (): void {
+    if (!WasmEnableThreads) return;
+    mono_assert(runtimeHelpers.mono_wasm_bindings_is_ready, "JS interop is not installed on this worker.");
+    mono_assert(runtimeHelpers.proxyGCHandle, "JSSynchronizationContext is not installed on this worker.");
+
+    forceDisposeProxies(true, runtimeHelpers.diagnosticTracing);
+    Module.runtimeKeepalivePop();
+
+    runtimeHelpers.proxyGCHandle = GCHandleNull;
+    runtimeHelpers.mono_wasm_bindings_is_ready = false;
+    update_thread_info();
+}
+
+// this is just for Debug build of the runtime, making it easier to debug worker threads
+export function update_thread_info (): void {
+    if (!WasmEnableThreads) return;
+    const threadType = !monoThreadInfo.isRegistered ? "emsc"
+        : monoThreadInfo.isUI ? "-UI-"
+            : monoThreadInfo.isDeputy ? "dpty"
+                : monoThreadInfo.isIo ? "-IO-"
+                    : monoThreadInfo.isTimer ? "timr"
+                        : monoThreadInfo.isLongRunning ? "long"
+                            : monoThreadInfo.isThreadPoolGate ? "gate"
+                                : monoThreadInfo.isDebugger ? "dbgr"
+                                    : monoThreadInfo.isThreadPoolWorker ? "pool"
+                                        : monoThreadInfo.isExternalEventLoop ? "jsww"
+                                            : monoThreadInfo.isBackground ? "back"
+                                                : "norm";
+    const hexPtr = (monoThreadInfo.pthreadId as any).toString(16).padStart(8, "0");
+    const hexPrefix = monoThreadInfo.isRegistered ? "0x" : "--";
+    monoThreadInfo.threadPrefix = `${hexPrefix}${hexPtr}-${threadType}`;
+
+    loaderHelpers.set_thread_prefix(monoThreadInfo.threadPrefix!);
+    if (!loaderHelpers.config.forwardConsoleLogsToWS) {
+        set_thread_prefix(monoThreadInfo.threadPrefix!);
+    }
+
+    // this is just to make debugging easier by naming the thread debugger window.
+    // It's not CSP compliant and possibly not performant, that's why it's only enabled in debug builds
+    // in Release configuration, it would be a trimmed by rollup
+    if (WasmEnableThreads && BuildConfiguration === "Debug" && !runtimeHelpers.cspPolicy) {
+        monoThreadInfo.updateCount++;
+        try {
+            const url = `//# sourceURL=https://dotnet/thread/${monoThreadInfo.updateCount}-${monoThreadInfo.threadPrefix}`;
+            const infoJson = JSON.stringify(monoThreadInfo, null, 2);
+            const body = `const monoThreadInfo=${infoJson};\r\nconsole.log(monoThreadInfo);`;
+            (globalThis as any).monoThreadInfoFn = new Function(body + "\r\n" + url);
+        } catch (ex) {
+            runtimeHelpers.cspPolicy = true;
+        }
+    }
+}
+
+export function exec_synchronization_context_pump (): void {
+    if (!loaderHelpers.is_runtime_running()) {
+        return;
+    }
+    forceThreadMemoryViewRefresh();
+    tcwraps.mono_wasm_synchronization_context_pump();
+}
+
+export function mono_wasm_schedule_synchronization_context (): void {
+    if (!WasmEnableThreads) return;
+    Module.safeSetTimeout(exec_synchronization_context_pump, 0);
+}
+
+export function mono_wasm_pthread_ptr (): PThreadPtr {
+    if (!WasmEnableThreads) return PThreadPtrNull;
+    return (<any>Module)["_pthread_self"]();
+}
+
+export function mono_wasm_main_thread_ptr (): PThreadPtr {
+    if (!WasmEnableThreads) return PThreadPtrNull;
+    return (<any>Module)["_emscripten_main_runtime_thread_id"]();
+}
+
+export function postMessageToMain (message: MonoWorkerToMainMessage, transfer?: Transferable[]) {
+    self.postMessage({
+        [monoMessageSymbol]: message
+    }, transfer ? transfer : []);
+}
+
+export interface MonoWorkerToMainMessage {
+    monoCmd: WorkerToMainMessageType;
+    info: PThreadInfo;
+    port?: MessagePort;
+    error?: string;
+    deputyProxyGCHandle?: GCHandle;
+}
+
+/// Identification of the current thread executing on a worker
+export interface PThreadSelf {
+    info: PThreadInfo;
+    portToBrowser: MessagePort;
+    postMessageToBrowser: <T extends MonoThreadMessage>(message: T, transfer?: Transferable[]) => void;
+    addEventListenerFromBrowser: (listener: <T extends MonoThreadMessage>(event: MessageEvent<T>) => void) => void;
+}
diff --git a/src/mono/browser/runtime/pthreads/shared/emscripten-internals.ts b/src/mono/browser/runtime/pthreads/shared/emscripten-internals.ts
deleted file mode 100644
index 5516f1a0f813..000000000000
--- a/src/mono/browser/runtime/pthreads/shared/emscripten-internals.ts
+++ /dev/null
@@ -1,65 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-import { Module } from "../../globals";
-import { Thread } from "../browser";
-import { PThreadInfo, pthreadPtr } from "./types";
-
-/** @module emscripten-internals accessors to the functions in the emscripten PThreads library, including
- * the low-level representations of {@linkcode pthreadPtr} thread info structs, etc.
- * Additionally, note that some of these functions are replaced by {@linkcode file://./emscripten-replacements.ts}.
- * These have a hard dependency on the version of Emscripten that we are using and may need to be kept in sync with
- *    {@linkcode file://./../../../emsdk/upstream/emscripten/src/library_pthread.js}
- */
-
-// This is what we know about the Emscripten PThread library
-export interface PThreadLibrary {
-    unusedWorkers: PThreadWorker[];
-    runningWorkers: PThreadWorker[];
-    pthreads: PThreadInfoMap;
-    allocateUnusedWorker: () => void;
-    loadWasmModuleToWorker: (worker: PThreadWorker) => Promise<PThreadWorker>;
-    threadInitTLS: () => void,
-    getNewWorker: () => PThreadWorker,
-    returnWorkerToPool: (worker: PThreadWorker) => void,
-}
-
-
-/// N.B. emscripten deletes the `pthread` property from the worker when it is not actively running a pthread
-export interface PThreadWorker extends Worker {
-    pthread_ptr: pthreadPtr;
-    loaded: boolean;
-    // this info is updated via async messages from the worker, it could be stale
-    info: PThreadInfo;
-    thread?: Thread;
-}
-
-interface PThreadInfoMap {
-    [key: pthreadPtr]: PThreadWorker;
-}
-
-
-export function getWorker(pthreadPtr: pthreadPtr): PThreadWorker | undefined {
-    return getModulePThread().pthreads[pthreadPtr];
-}
-
-export function allocateUnusedWorker(): void {
-    /// See library_pthread.js in Emscripten.
-    /// This function allocates a new worker and adds it to the pool of workers.
-    /// It's called when the pool of workers is empty and a new thread is created.
-    getModulePThread().allocateUnusedWorker();
-}
-export function getUnusedWorkerPool(): PThreadWorker[] {
-    return getModulePThread().unusedWorkers;
-}
-export function getRunningWorkers(): PThreadWorker[] {
-    return getModulePThread().runningWorkers;
-}
-
-export function loadWasmModuleToWorker(worker: PThreadWorker): Promise<PThreadWorker> {
-    return getModulePThread().loadWasmModuleToWorker(worker);
-}
-
-export function getModulePThread(): PThreadLibrary {
-    return (<any>Module).PThread as PThreadLibrary;
-}
diff --git a/src/mono/browser/runtime/pthreads/shared/emscripten-replacements.ts b/src/mono/browser/runtime/pthreads/shared/emscripten-replacements.ts
deleted file mode 100644
index f0bbf80d6636..000000000000
--- a/src/mono/browser/runtime/pthreads/shared/emscripten-replacements.ts
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-import WasmEnableThreads from "consts:wasmEnableThreads";
-import BuildConfiguration from "consts:configuration";
-
-import { dumpThreads, onWorkerLoadInitiated, resolveThreadPromises } from "../browser";
-import { mono_wasm_pthread_on_pthread_created } from "../worker";
-import { PThreadLibrary, PThreadWorker, getModulePThread, getUnusedWorkerPool } from "./emscripten-internals";
-import { loaderHelpers, mono_assert } from "../../globals";
-import { mono_log_warn } from "../../logging";
-
-/** @module emscripten-replacements Replacements for individual functions in the emscripten PThreads library.
- * These have a hard dependency on the version of Emscripten that we are using and may need to be kept in sync with
- *    {@linkcode file://./../../../emsdk/upstream/emscripten/src/library_pthread.js}
- */
-
-export function replaceEmscriptenPThreadLibrary(modulePThread: PThreadLibrary): void {
-    if (!WasmEnableThreads) return;
-
-    const originalLoadWasmModuleToWorker = modulePThread.loadWasmModuleToWorker;
-    const originalThreadInitTLS = modulePThread.threadInitTLS;
-    const originalReturnWorkerToPool = modulePThread.returnWorkerToPool;
-
-    modulePThread.loadWasmModuleToWorker = (worker: PThreadWorker): Promise<PThreadWorker> => {
-        const afterLoaded = originalLoadWasmModuleToWorker(worker);
-        afterLoaded.then(() => {
-            availableThreadCount++;
-        });
-        onWorkerLoadInitiated(worker, afterLoaded);
-        if (loaderHelpers.config.exitOnUnhandledError) {
-            worker.onerror = (e) => {
-                loaderHelpers.mono_exit(1, e);
-            };
-        }
-        return afterLoaded;
-    };
-    modulePThread.threadInitTLS = (): void => {
-        originalThreadInitTLS();
-        mono_wasm_pthread_on_pthread_created();
-    };
-    modulePThread.allocateUnusedWorker = allocateUnusedWorker;
-    modulePThread.getNewWorker = () => getNewWorker(modulePThread);
-    modulePThread.returnWorkerToPool = (worker: PThreadWorker) => {
-        // when JS interop is installed on JSWebWorker
-        // we can't reuse the worker, because user code could leave the worker JS globals in a dirty state
-        worker.info.isRunning = false;
-        resolveThreadPromises(worker.pthread_ptr, undefined);
-        worker.info.pthreadId = 0;
-        if (worker.thread?.port) {
-            worker.thread.port.close();
-        }
-        worker.thread = undefined;
-        if (worker.info && worker.info.isDirtyBecauseOfInterop) {
-            // we are on UI thread, invoke the handler directly to destroy the dirty worker
-            worker.onmessage!(new MessageEvent("message", {
-                data: {
-                    "cmd": "killThread",
-                    thread: worker.pthread_ptr
-                }
-            }));
-        } else {
-            availableThreadCount++;
-            originalReturnWorkerToPool(worker);
-        }
-    };
-    if (BuildConfiguration === "Debug") {
-        (globalThis as any).dumpThreads = dumpThreads;
-        (globalThis as any).getModulePThread = getModulePThread;
-    }
-}
-
-let availableThreadCount = 0;
-export function is_thread_available() {
-    return availableThreadCount > 0;
-}
-
-function getNewWorker(modulePThread: PThreadLibrary): PThreadWorker {
-    if (!WasmEnableThreads) return null as any;
-
-    if (modulePThread.unusedWorkers.length == 0) {
-        mono_log_warn(`Failed to find unused WebWorker, this may deadlock. Please increase the pthreadPoolSize. Running threads ${modulePThread.runningWorkers.length}. Loading workers: ${modulePThread.unusedWorkers.length}`);
-        const worker = allocateUnusedWorker();
-        modulePThread.loadWasmModuleToWorker(worker);
-        availableThreadCount--;
-        return worker;
-    }
-
-    // keep them pre-allocated all the time, not just during startup
-    if (loaderHelpers.config.pthreadPoolSize && modulePThread.unusedWorkers.length <= loaderHelpers.config.pthreadPoolSize) {
-        const worker = allocateUnusedWorker();
-        modulePThread.loadWasmModuleToWorker(worker);
-    }
-
-    for (let i = 0; i < modulePThread.unusedWorkers.length; i++) {
-        const worker = modulePThread.unusedWorkers[i];
-        if (worker.loaded) {
-            modulePThread.unusedWorkers.splice(i, 1);
-            availableThreadCount--;
-            return worker;
-        }
-    }
-    mono_log_warn(`Failed to find loaded WebWorker, this may deadlock. Please increase the pthreadPoolSize. Running threads ${modulePThread.runningWorkers.length}. Loading workers: ${modulePThread.unusedWorkers.length}`);
-    availableThreadCount--; // negative value
-    return modulePThread.unusedWorkers.pop()!;
-}
-
-/// We replace Module["PThreads"].allocateUnusedWorker with this version that knows about assets
-function allocateUnusedWorker(): PThreadWorker {
-    if (!WasmEnableThreads) return null as any;
-
-    const asset = loaderHelpers.resolve_single_asset_path("js-module-threads");
-    const uri = asset.resolvedUrl;
-    mono_assert(uri !== undefined, "could not resolve the uri for the js-module-threads asset");
-    const worker = new Worker(uri) as PThreadWorker;
-    getUnusedWorkerPool().push(worker);
-    worker.loaded = false;
-    worker.info = {
-        pthreadId: 0,
-        reuseCount: 0,
-        updateCount: 0,
-        threadPrefix: "          -    ",
-        threadName: "emscripten-pool",
-    };
-    return worker;
-}
-
-
diff --git a/src/mono/browser/runtime/pthreads/shared/eventloop.ts b/src/mono/browser/runtime/pthreads/shared/eventloop.ts
deleted file mode 100644
index 602c3fb221fb..000000000000
--- a/src/mono/browser/runtime/pthreads/shared/eventloop.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-
-let perThreadUnsettledPromiseCount = 0;
-
-export function addUnsettledPromise() {
-    perThreadUnsettledPromiseCount++;
-}
-
-export function settleUnsettledPromise() {
-    perThreadUnsettledPromiseCount--;
-}
-
-/// Called from the C# threadpool worker loop to find out if there are any
-/// unsettled JS promises that need to keep the worker alive
-export function mono_wasm_eventloop_has_unsettled_interop_promises(): boolean {
-    return perThreadUnsettledPromiseCount > 0;
-}
diff --git a/src/mono/browser/runtime/pthreads/shared/index.ts b/src/mono/browser/runtime/pthreads/shared/index.ts
deleted file mode 100644
index d91efc577517..000000000000
--- a/src/mono/browser/runtime/pthreads/shared/index.ts
+++ /dev/null
@@ -1,109 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-import WasmEnableThreads from "consts:wasmEnableThreads";
-import BuildConfiguration from "consts:configuration";
-
-import { ENVIRONMENT_IS_PTHREAD, Module, loaderHelpers, mono_assert, runtimeHelpers } from "../../globals";
-import { mono_log_debug, set_thread_prefix } from "../../logging";
-import { bindings_init } from "../../startup";
-import { forceDisposeProxies } from "../../gc-handles";
-import { GCHandle, GCHandleNull, WorkerToMainMessageType, monoMessageSymbol } from "../../types/internal";
-import { MonoWorkerToMainMessage } from "./types";
-import { monoThreadInfo } from "../worker";
-
-/// Messages sent on the dedicated mono channel between a pthread and the browser thread
-
-// We use a namespacing scheme to avoid collisions: type/command should be unique.
-export interface MonoThreadMessage {
-    // Type of message.  Generally a subsystem like "diagnostic_server", or "event_pipe", "debugger", etc.
-    type: string;
-    // A particular kind of message. For example, "started", "stopped", "stopped_with_error", etc.
-    cmd: string;
-}
-
-export function isMonoThreadMessage(x: unknown): x is MonoThreadMessage {
-    if (typeof (x) !== "object" || x === null) {
-        return false;
-    }
-    const xmsg = x as MonoThreadMessage;
-    return typeof (xmsg.type) === "string" && typeof (xmsg.cmd) === "string";
-}
-
-export function mono_wasm_install_js_worker_interop(context_gc_handle: GCHandle): void {
-    if (!WasmEnableThreads) return;
-    bindings_init();
-    if (!runtimeHelpers.proxy_context_gc_handle) {
-        runtimeHelpers.proxy_context_gc_handle = context_gc_handle;
-        mono_log_debug("Installed JSSynchronizationContext");
-    }
-    Module.runtimeKeepalivePush();
-    monoThreadInfo.isDirtyBecauseOfInterop = true;
-    update_thread_info();
-    if (ENVIRONMENT_IS_PTHREAD) {
-        postMessageToMain({
-            monoCmd: WorkerToMainMessageType.enabledInterop,
-            info: monoThreadInfo,
-        });
-    }
-}
-
-export function mono_wasm_uninstall_js_worker_interop(): void {
-    if (!WasmEnableThreads) return;
-    mono_assert(runtimeHelpers.mono_wasm_bindings_is_ready, "JS interop is not installed on this worker.");
-    mono_assert(runtimeHelpers.proxy_context_gc_handle, "JSSynchronizationContext is not installed on this worker.");
-
-    forceDisposeProxies(true, runtimeHelpers.diagnosticTracing);
-    Module.runtimeKeepalivePop();
-
-    runtimeHelpers.proxy_context_gc_handle = GCHandleNull;
-    runtimeHelpers.mono_wasm_bindings_is_ready = false;
-    update_thread_info();
-}
-
-// this is just for Debug build of the runtime, making it easier to debug worker threads
-export function update_thread_info(): void {
-    const threadType = monoThreadInfo.isUI ? "main"
-        : !monoThreadInfo.isAttached ? "emsc"
-            : monoThreadInfo.isTimer ? "timr"
-                : monoThreadInfo.isLongRunning ? "long"
-                    : monoThreadInfo.isThreadPoolGate ? "gate"
-                        : monoThreadInfo.isDebugger ? "dbgr"
-                            : monoThreadInfo.isThreadPoolWorker ? "pool"
-                                : monoThreadInfo.isExternalEventLoop ? "jsww"
-                                    : monoThreadInfo.isBackground ? "back"
-                                        : "norm";
-    monoThreadInfo.threadPrefix = `0x${monoThreadInfo.pthreadId.toString(16).padStart(8, "0")}-${threadType}`;
-
-    loaderHelpers.set_thread_prefix(monoThreadInfo.threadPrefix!);
-    if (!loaderHelpers.config.forwardConsoleLogsToWS) {
-        set_thread_prefix(monoThreadInfo.threadPrefix!);
-    }
-
-    (globalThis as any).monoThreadInfo = monoThreadInfo;
-    if (WasmEnableThreads && BuildConfiguration === "Debug" && !runtimeHelpers.cspPolicy) {
-        monoThreadInfo.updateCount++;
-        try {
-            (globalThis as any).monoThreadInfoFn = new Function(`//# sourceURL=https://${monoThreadInfo.updateCount}WorkerInfo${monoThreadInfo.isAttached ? monoThreadInfo.threadPrefix : ""}/\r\nconsole.log("${JSON.stringify(monoThreadInfo)}");`);
-        }
-        catch (ex) {
-            runtimeHelpers.cspPolicy = true;
-        }
-    }
-}
-
-export function mono_wasm_pthread_ptr(): number {
-    if (!WasmEnableThreads) return 0;
-    return (<any>Module)["_pthread_self"]();
-}
-
-export function mono_wasm_main_thread_ptr(): number {
-    if (!WasmEnableThreads) return 0;
-    return (<any>Module)["_emscripten_main_runtime_thread_id"]();
-}
-
-export function postMessageToMain(message: MonoWorkerToMainMessage, transfer?: Transferable[]) {
-    self.postMessage({
-        [monoMessageSymbol]: message
-    }, transfer ? transfer : []);
-}
\ No newline at end of file
diff --git a/src/mono/browser/runtime/pthreads/shared/tsconfig.json b/src/mono/browser/runtime/pthreads/shared/tsconfig.json
deleted file mode 100644
index 8986477dd8fc..000000000000
--- a/src/mono/browser/runtime/pthreads/shared/tsconfig.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-    "extends": "../../tsconfig.shared.json",
-    "include": [
-        "../../**/*.ts",
-        "../../**/*.d.ts"
-    ]
-
-}
diff --git a/src/mono/browser/runtime/pthreads/shared/types.ts b/src/mono/browser/runtime/pthreads/shared/types.ts
deleted file mode 100644
index 11716137a87e..000000000000
--- a/src/mono/browser/runtime/pthreads/shared/types.ts
+++ /dev/null
@@ -1,41 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-import type { WorkerToMainMessageType } from "../../types/internal";
-
-/// pthread_t in C
-export type pthreadPtr = number;
-
-export interface PThreadInfo {
-    pthreadId: pthreadPtr;
-
-    reuseCount: number,
-    updateCount: number,
-
-    threadName: string,
-    threadPrefix: string,
-
-    isLoaded?: boolean,
-    isRegistered?: boolean,
-    isRunning?: boolean,
-    isAttached?: boolean,
-    isExternalEventLoop?: boolean,
-    isUI?: boolean;
-    isBackground?: boolean,
-    isDebugger?: boolean,
-    isThreadPoolWorker?: boolean,
-    isTimer?: boolean,
-    isLongRunning?: boolean,
-    isThreadPoolGate?: boolean,
-    isFinalizer?: boolean,
-    isDirtyBecauseOfInterop?: boolean,
-}
-
-/// Messages sent from the main thread using Worker.postMessage or from the worker using DedicatedWorkerGlobalScope.postMessage
-/// should use this interface.  The message event is also used by emscripten internals (and possibly by 3rd party libraries targeting Emscripten).
-/// We should just use this to establish a dedicated MessagePort for Mono's uses.
-export interface MonoWorkerToMainMessage {
-    monoCmd: WorkerToMainMessageType;
-    info: PThreadInfo;
-    port?: MessagePort;
-}
diff --git a/src/mono/browser/runtime/pthreads/ui-thread.ts b/src/mono/browser/runtime/pthreads/ui-thread.ts
new file mode 100644
index 000000000000..14a7c9353b2f
--- /dev/null
+++ b/src/mono/browser/runtime/pthreads/ui-thread.ts
@@ -0,0 +1,349 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+import WasmEnableThreads from "consts:wasmEnableThreads";
+import BuildConfiguration from "consts:configuration";
+
+import { } from "../globals";
+import { MonoWorkerToMainMessage, monoThreadInfo, mono_wasm_pthread_ptr, update_thread_info, worker_empty_prefix } from "./shared";
+import { Module, ENVIRONMENT_IS_WORKER, createPromiseController, loaderHelpers, mono_assert, runtimeHelpers } from "../globals";
+import { PThreadLibrary, MainToWorkerMessageType, MonoThreadMessage, PThreadInfo, PThreadPtr, PThreadPtrNull, PThreadWorker, PromiseController, Thread, WorkerToMainMessageType, monoMessageSymbol } from "../types/internal";
+import { mono_log_error, mono_log_info, mono_log_debug } from "../logging";
+import { threads_c_functions as cwraps } from "../cwraps";
+
+const threadPromises: Map<PThreadPtr, PromiseController<Thread>[]> = new Map();
+
+class ThreadImpl implements Thread {
+    constructor (readonly pthreadPtr: PThreadPtr, readonly worker: Worker, readonly port: MessagePort) { }
+    postMessageToWorker<T extends MonoThreadMessage> (message: T): void {
+        this.port.postMessage(message);
+    }
+}
+
+/// wait until the thread with the given id has set up a message port to the runtime
+export function waitForThread (pthreadPtr: PThreadPtr): Promise<Thread> {
+    if (!WasmEnableThreads) return null as any;
+    mono_assert(!ENVIRONMENT_IS_WORKER, "waitForThread should only be called from the UI thread");
+    const worker = getWorker(pthreadPtr);
+    if (worker?.thread) {
+        return Promise.resolve(worker?.thread);
+    }
+    const promiseAndController = createPromiseController<Thread>();
+    const arr = threadPromises.get(pthreadPtr);
+    if (arr === undefined) {
+        threadPromises.set(pthreadPtr, [promiseAndController.promise_control]);
+    } else {
+        arr.push(promiseAndController.promise_control);
+    }
+    return promiseAndController.promise;
+}
+
+export function resolveThreadPromises (pthreadPtr: PThreadPtr, thread?: Thread): void {
+    if (!WasmEnableThreads) return;
+    const arr = threadPromises.get(pthreadPtr);
+    if (arr !== undefined) {
+        arr.forEach((controller) => {
+            if (thread) {
+                controller.resolve(thread);
+            } else {
+                controller.reject();
+            }
+        });
+        threadPromises.delete(pthreadPtr);
+    }
+}
+
+// handler that runs in the main thread when a message is received from a pthread worker
+function monoWorkerMessageHandler (worker: PThreadWorker, ev: MessageEvent<any>): void {
+    if (!WasmEnableThreads) return;
+    let pthreadId: PThreadPtr;
+    // this is emscripten message
+    if (ev.data.cmd === "killThread") {
+        pthreadId = ev.data["thread"];
+        mono_assert(pthreadId == worker.info.pthreadId, "expected pthreadId to match");
+        worker.info.isRunning = false;
+        worker.info.pthreadId = PThreadPtrNull;
+        return;
+    }
+
+    const message = ev.data[monoMessageSymbol] as MonoWorkerToMainMessage;
+    if (message === undefined) {
+        /// N.B. important to ignore messages we don't recognize - Emscripten uses the message event to send internal messages
+        return;
+    }
+
+    let port: MessagePort;
+    let thread: Thread;
+    pthreadId = message.info?.pthreadId ?? 0;
+    worker.info = Object.assign({}, worker.info, message.info);
+    switch (message.monoCmd) {
+        case WorkerToMainMessageType.preload:
+            // this one shot port from setupPreloadChannelToMainThread
+            port = message.port!;
+            port.postMessage({
+                type: "pthread",
+                cmd: MainToWorkerMessageType.applyConfig,
+                config: JSON.stringify(runtimeHelpers.config),
+                monoThreadInfo: JSON.stringify(worker.info),
+            });
+            port.close();
+            break;
+        case WorkerToMainMessageType.pthreadCreated:
+            port = message.port!;
+            thread = new ThreadImpl(pthreadId, worker, port);
+            worker.thread = thread;
+            worker.info.isRunning = true;
+            resolveThreadPromises(pthreadId, thread);
+            worker.info = Object.assign(worker.info!, message.info, {});
+            break;
+        case WorkerToMainMessageType.deputyStarted:
+            runtimeHelpers.afterMonoStarted.promise_control.resolve(message.deputyProxyGCHandle);
+            break;
+        case WorkerToMainMessageType.ioStarted:
+            runtimeHelpers.afterIOStarted.promise_control.resolve();
+            break;
+        case WorkerToMainMessageType.deputyFailed:
+            runtimeHelpers.afterMonoStarted.promise_control.reject(new Error(message.error));
+            break;
+        case WorkerToMainMessageType.monoRegistered:
+        case WorkerToMainMessageType.monoAttached:
+        case WorkerToMainMessageType.enabledInterop:
+        case WorkerToMainMessageType.monoUnRegistered:
+        case WorkerToMainMessageType.updateInfo:
+        case WorkerToMainMessageType.deputyCreated:
+            // just worker.info updates above
+            break;
+        default:
+            throw new Error(`Unhandled message from worker: ${message.monoCmd}`);
+    }
+}
+
+/// Called by Emscripten internals on the browser thread when a new pthread worker is created and added to the pthread worker pool.
+/// At this point the worker doesn't have any pthread assigned to it, yet.
+export function onWorkerLoadInitiated (worker: PThreadWorker, loaded: Promise<Worker>): void {
+    if (!WasmEnableThreads) return;
+    worker.addEventListener("message", (ev) => monoWorkerMessageHandler(worker, ev));
+    loaded.then(() => {
+        worker.info.isLoaded = true;
+    });
+}
+
+
+export function populateEmscriptenPool (): void {
+    if (!WasmEnableThreads) return;
+    const unused = getUnusedWorkerPool();
+    for (const worker of loaderHelpers.loadingWorkers) {
+        unused.push(worker);
+    }
+    loaderHelpers.loadingWorkers = [];
+}
+
+export async function mono_wasm_init_threads () {
+    if (!WasmEnableThreads) return;
+
+    // setup the UI thread
+    runtimeHelpers.currentThreadTID = monoThreadInfo.pthreadId = mono_wasm_pthread_ptr();
+    monoThreadInfo.threadName = "UI Thread";
+    monoThreadInfo.isUI = true;
+    monoThreadInfo.isRunning = true;
+    monoThreadInfo.workerNumber = 0;
+    update_thread_info();
+
+    // wait until all workers in the pool are loaded - ready to be used as pthread synchronously
+    const workers = getUnusedWorkerPool();
+    if (workers.length > 0) {
+        const promises = workers.map(loadWasmModuleToWorker);
+        await Promise.all(promises);
+    }
+}
+
+// when we create threads with browser event loop, it's not able to be joined by mono's thread join during shutdown and blocks process exit
+export function cancelThreads () {
+    if (!WasmEnableThreads) return;
+    const workers: PThreadWorker[] = getRunningWorkers();
+    for (const worker of workers) {
+        if (worker.info.isExternalEventLoop) {
+            worker.postMessage({ cmd: "cancel" });
+        }
+    }
+}
+
+export function mono_wasm_dump_threads (): void {
+    if (!WasmEnableThreads) return;
+    mono_log_info("Dumping web worker info as seen by UI thread, it could be stale: ");
+    const emptyInfo: PThreadInfo = {
+        workerNumber: -1,
+        pthreadId: PThreadPtrNull,
+        threadPrefix: worker_empty_prefix,
+        threadName: "????",
+        isRunning: false,
+        isAttached: false,
+        isExternalEventLoop: false,
+        reuseCount: 0,
+        updateCount: 0,
+    };
+    const threadInfos: PThreadInfo[] = [
+        Object.assign({}, emptyInfo, monoThreadInfo), // UI thread
+    ];
+    for (const worker of getRunningWorkers()) {
+        threadInfos.push(Object.assign({}, emptyInfo, worker.info));
+    }
+    for (const worker of getUnusedWorkerPool()) {
+        threadInfos.push(Object.assign({}, emptyInfo, worker.info));
+    }
+    threadInfos.forEach((info) => {
+        const idx = info.workerNumber.toString().padStart(3, "0");
+        const isRunning = (info.isRunning + "").padStart(5, " ");
+        const isAttached = (info.isAttached + "").padStart(5, " ");
+        const isEventLoop = (info.isExternalEventLoop + "").padStart(5, " ");
+        const reuseCount = (info.reuseCount + "").padStart(3, " ");
+        // eslint-disable-next-line no-console
+        console.info(`${idx} | ${info.threadPrefix}: isRunning:${isRunning} isAttached:${isAttached} isEventLoop:${isEventLoop} reuseCount:${reuseCount} - ${info.threadName}`);
+    });
+}
+
+export function init_finalizer_thread () {
+    // we don't need it immediately, so we can wait a bit, to keep CPU working on normal startup
+    setTimeout(() => {
+        try {
+            if (loaderHelpers.is_runtime_running()) {
+                cwraps.mono_wasm_init_finalizer_thread();
+            } else {
+                mono_log_debug("init_finalizer_thread skipped");
+            }
+        } catch (err) {
+            mono_log_error("init_finalizer_thread() failed", err);
+            loaderHelpers.mono_exit(1, err);
+        }
+    }, loaderHelpers.config.finalizerThreadStartDelayMs);
+}
+
+export function replaceEmscriptenPThreadUI (modulePThread: PThreadLibrary): void {
+    if (!WasmEnableThreads) return;
+
+    const originalLoadWasmModuleToWorker = modulePThread.loadWasmModuleToWorker;
+    const originalReturnWorkerToPool = modulePThread.returnWorkerToPool;
+
+    modulePThread.loadWasmModuleToWorker = (worker: PThreadWorker): Promise<PThreadWorker> => {
+        const afterLoaded = originalLoadWasmModuleToWorker(worker);
+        afterLoaded.then(() => {
+            availableThreadCount++;
+        });
+        onWorkerLoadInitiated(worker, afterLoaded);
+        if (loaderHelpers.config.exitOnUnhandledError) {
+            worker.onerror = (e) => {
+                loaderHelpers.mono_exit(1, e);
+            };
+        }
+        return afterLoaded;
+    };
+    modulePThread.allocateUnusedWorker = allocateUnusedWorker;
+    modulePThread.getNewWorker = () => getNewWorker(modulePThread);
+    modulePThread.returnWorkerToPool = (worker: PThreadWorker) => {
+        // when JS interop is installed on JSWebWorker
+        // we can't reuse the worker, because user code could leave the worker JS globals in a dirty state
+        worker.info.isRunning = false;
+        resolveThreadPromises(worker.pthread_ptr, undefined);
+        worker.info.pthreadId = PThreadPtrNull;
+        if (worker.thread?.port) {
+            worker.thread.port.close();
+        }
+        worker.thread = undefined;
+        if (worker.info && worker.info.isDirtyBecauseOfInterop) {
+            // we are on UI thread, invoke the handler directly to destroy the dirty worker
+            worker.onmessage!(new MessageEvent("message", {
+                data: {
+                    "cmd": "killThread",
+                    thread: worker.pthread_ptr
+                }
+            }));
+        } else {
+            availableThreadCount++;
+            originalReturnWorkerToPool(worker);
+        }
+    };
+    if (BuildConfiguration === "Debug") {
+        (globalThis as any).dumpThreads = mono_wasm_dump_threads;
+        (globalThis as any).getModulePThread = getModulePThread;
+    }
+}
+
+let availableThreadCount = 0;
+export function is_thread_available () {
+    if (!WasmEnableThreads) return true;
+    return availableThreadCount > 0;
+}
+
+function getNewWorker (modulePThread: PThreadLibrary): PThreadWorker {
+    if (!WasmEnableThreads) return null as any;
+
+    if (modulePThread.unusedWorkers.length == 0) {
+        mono_log_debug(`Failed to find unused WebWorker, this may deadlock. Please increase the pthreadPoolReady. Running threads ${modulePThread.runningWorkers.length}. Loading workers: ${modulePThread.unusedWorkers.length}`);
+        const worker = allocateUnusedWorker();
+        modulePThread.loadWasmModuleToWorker(worker);
+        availableThreadCount--;
+        return worker;
+    }
+
+    // keep them pre-allocated all the time, not just during startup
+    if (modulePThread.unusedWorkers.length <= loaderHelpers.config.pthreadPoolUnusedSize!) {
+        const worker = allocateUnusedWorker();
+        modulePThread.loadWasmModuleToWorker(worker);
+    }
+
+    for (let i = 0; i < modulePThread.unusedWorkers.length; i++) {
+        const worker = modulePThread.unusedWorkers[i];
+        if (worker.loaded) {
+            modulePThread.unusedWorkers.splice(i, 1);
+            availableThreadCount--;
+            return worker;
+        }
+    }
+    mono_log_debug(`Failed to find loaded WebWorker, this may deadlock. Please increase the pthreadPoolReady. Running threads ${modulePThread.runningWorkers.length}. Loading workers: ${modulePThread.unusedWorkers.length}`);
+    availableThreadCount--; // negative value
+    return modulePThread.unusedWorkers.pop()!;
+}
+
+/// We replace Module["PThreads"].allocateUnusedWorker with this version that knows about assets
+function allocateUnusedWorker (): PThreadWorker {
+    if (!WasmEnableThreads) return null as any;
+
+    const asset = loaderHelpers.resolve_single_asset_path("js-module-threads");
+    const uri = asset.resolvedUrl;
+    mono_assert(uri !== undefined, "could not resolve the uri for the js-module-threads asset");
+    const workerNumber = loaderHelpers.workerNextNumber++;
+    const worker = new Worker(uri, {
+        name: "dotnet-worker-" + workerNumber.toString().padStart(3, "0"),
+    }) as PThreadWorker;
+    getUnusedWorkerPool().push(worker);
+    worker.loaded = false;
+    worker.info = {
+        workerNumber,
+        pthreadId: PThreadPtrNull,
+        reuseCount: 0,
+        updateCount: 0,
+        threadPrefix: worker_empty_prefix,
+        threadName: "emscripten-pool",
+    };
+    return worker;
+}
+
+export function getWorker (pthreadPtr: PThreadPtr): PThreadWorker | undefined {
+    return getModulePThread().pthreads[pthreadPtr as any];
+}
+
+export function getUnusedWorkerPool (): PThreadWorker[] {
+    return getModulePThread().unusedWorkers;
+}
+
+export function getRunningWorkers (): PThreadWorker[] {
+    return getModulePThread().runningWorkers;
+}
+
+export function loadWasmModuleToWorker (worker: PThreadWorker): Promise<PThreadWorker> {
+    return getModulePThread().loadWasmModuleToWorker(worker);
+}
+
+export function getModulePThread (): PThreadLibrary {
+    return (<any>Module).PThread as PThreadLibrary;
+}
diff --git a/src/mono/browser/runtime/pthreads/worker/events.ts b/src/mono/browser/runtime/pthreads/worker-events.ts
similarity index 90%
rename from src/mono/browser/runtime/pthreads/worker/events.ts
rename to src/mono/browser/runtime/pthreads/worker-events.ts
index ace256459d43..63c108b01f84 100644
--- a/src/mono/browser/runtime/pthreads/worker/events.ts
+++ b/src/mono/browser/runtime/pthreads/worker-events.ts
@@ -2,7 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 import WasmEnableThreads from "consts:wasmEnableThreads";
-import { PThreadSelf } from "./index";
+import { PThreadSelf } from "./shared";
 
 export const dotnetPthreadCreated = "dotnet:pthread:created" as const;
 export const dotnetPthreadAttached = "dotnet:pthread:attached" as const;
@@ -30,10 +30,12 @@ export interface WorkerThreadEventTarget extends EventTarget {
 
 let WorkerThreadEventClassConstructor: new (type: keyof WorkerThreadEventMap, pthread_self: PThreadSelf) => WorkerThreadEvent;
 export const makeWorkerThreadEvent: (type: keyof WorkerThreadEventMap, pthread_self: PThreadSelf) => WorkerThreadEvent = !WasmEnableThreads
-    ? (() => { throw new Error("threads support disabled"); })
+    ? (() => {
+        throw new Error("threads support disabled");
+    })
     : ((type: keyof WorkerThreadEventMap, pthread_self: PThreadSelf) => {
         if (!WorkerThreadEventClassConstructor) WorkerThreadEventClassConstructor = class WorkerThreadEventImpl extends Event implements WorkerThreadEvent {
-            constructor(type: keyof WorkerThreadEventMap, readonly pthread_self: PThreadSelf) {
+            constructor (type: keyof WorkerThreadEventMap, readonly pthread_self: PThreadSelf) {
                 super(type);
             }
         };
diff --git a/src/mono/browser/runtime/pthreads/worker/index.ts b/src/mono/browser/runtime/pthreads/worker-thread.ts
similarity index 65%
rename from src/mono/browser/runtime/pthreads/worker/index.ts
rename to src/mono/browser/runtime/pthreads/worker-thread.ts
index 0ea6e82de3d9..fa1269c300cb 100644
--- a/src/mono/browser/runtime/pthreads/worker/index.ts
+++ b/src/mono/browser/runtime/pthreads/worker-thread.ts
@@ -5,21 +5,20 @@
 
 import WasmEnableThreads from "consts:wasmEnableThreads";
 
-import { ENVIRONMENT_IS_PTHREAD, loaderHelpers, mono_assert } from "../../globals";
-import { mono_wasm_pthread_ptr, postMessageToMain, update_thread_info } from "../shared";
-import { PThreadInfo } from "../shared/types";
-import { WorkerToMainMessageType, is_nullish } from "../../types/internal";
-import { MonoThreadMessage } from "../shared";
+import { ENVIRONMENT_IS_PTHREAD, Module, loaderHelpers, mono_assert, runtimeHelpers } from "../globals";
+import { PThreadSelf, monoThreadInfo, mono_wasm_pthread_ptr, postMessageToMain, update_thread_info } from "./shared";
+import { PThreadLibrary, MonoThreadMessage, PThreadInfo, PThreadPtr, WorkerToMainMessageType } from "../types/internal";
 import {
     makeWorkerThreadEvent,
     dotnetPthreadCreated,
     dotnetPthreadAttached,
     WorkerThreadEventTarget
-} from "./events";
-import { postRunWorker, preRunWorker } from "../../startup";
-import { mono_log_debug, mono_log_error } from "../../logging";
-import { CharPtr } from "../../types/emscripten";
-import { utf8ToString } from "../../strings";
+} from "./worker-events";
+import { postRunWorker, preRunWorker } from "../startup";
+import { mono_log_debug, mono_log_error } from "../logging";
+import { CharPtr } from "../types/emscripten";
+import { utf8ToString } from "../strings";
+import { forceThreadMemoryViewRefresh } from "../memory";
 
 // re-export some of the events types
 export {
@@ -28,43 +27,26 @@ export {
     dotnetPthreadCreated,
     WorkerThreadEvent,
     WorkerThreadEventTarget,
-} from "./events";
-
-/// Identification of the current thread executing on a worker
-export interface PThreadSelf {
-    info: PThreadInfo;
-    portToBrowser: MessagePort;
-    postMessageToBrowser: <T extends MonoThreadMessage>(message: T, transfer?: Transferable[]) => void;
-    addEventListenerFromBrowser: (listener: <T extends MonoThreadMessage>(event: MessageEvent<T>) => void) => void;
-}
+} from "./worker-events";
+
+export let pthread_self: PThreadSelf = null as any as PThreadSelf;
 
 class WorkerSelf implements PThreadSelf {
-    constructor(public info: PThreadInfo, public portToBrowser: MessagePort) {
+    constructor (public info: PThreadInfo, public portToBrowser: MessagePort) {
     }
 
-    postMessageToBrowser(message: MonoThreadMessage, transfer?: Transferable[]) {
+    postMessageToBrowser (message: MonoThreadMessage, transfer?: Transferable[]) {
         if (transfer) {
             this.portToBrowser.postMessage(message, transfer);
         } else {
             this.portToBrowser.postMessage(message);
         }
     }
-    addEventListenerFromBrowser(listener: (event: MessageEvent<MonoThreadMessage>) => void) {
+    addEventListenerFromBrowser (listener: (event: MessageEvent<MonoThreadMessage>) => void) {
         this.portToBrowser.addEventListener("message", listener);
     }
 }
 
-// we are lying that this is never null, but afterThreadInit should be the first time we get to run any code
-// in the worker, so this becomes non-null very early.
-export let pthread_self: PThreadSelf = null as any as PThreadSelf;
-export const monoThreadInfo: PThreadInfo = {
-    pthreadId: 0,
-    reuseCount: 0,
-    updateCount: 0,
-    threadPrefix: "          -    ",
-    threadName: "emscripten-loaded",
-};
-
 /// This is the "public internal" API for runtime subsystems that wish to be notified about
 /// pthreads that are running on the current worker.
 /// Example:
@@ -74,27 +56,33 @@ export const monoThreadInfo: PThreadInfo = {
 export let currentWorkerThreadEvents: WorkerThreadEventTarget = undefined as any;
 
 // this is very very early in the worker startup
-export function initWorkerThreadEvents() {
+export function initWorkerThreadEvents () {
     // treeshake if threads are disabled
     currentWorkerThreadEvents = WasmEnableThreads ? new globalThis.EventTarget() : null as any as WorkerThreadEventTarget;
+    Object.assign(monoThreadInfo, runtimeHelpers.monoThreadInfo);
 }
 
 // this is the message handler for the worker that receives messages from the main thread
 // extend this with new cases as needed
-function monoDedicatedChannelMessageFromMainToWorker(event: MessageEvent<string>): void {
+function monoDedicatedChannelMessageFromMainToWorker (event: MessageEvent<string>): void {
     mono_log_debug("got message from main on the dedicated channel", event.data);
 }
 
+export function on_emscripten_thread_init (pthread_ptr: PThreadPtr) {
+    runtimeHelpers.currentThreadTID = monoThreadInfo.pthreadId = pthread_ptr;
+    forceThreadMemoryViewRefresh();
+}
 
 /// Called by emscripten when a pthread is setup to run on a worker.  Can be called multiple times
 /// for the same webworker, since emscripten can reuse workers.
 /// This is an implementation detail, that shouldn't be used directly.
-export function mono_wasm_pthread_on_pthread_created(): void {
+export function mono_wasm_pthread_on_pthread_created (): void {
     if (!WasmEnableThreads) return;
     try {
+        forceThreadMemoryViewRefresh();
         const pthread_id = mono_wasm_pthread_ptr();
-        mono_assert(!is_nullish(pthread_id), "pthread_self() returned null");
-        monoThreadInfo.pthreadId = pthread_id;
+        mono_assert(pthread_id == monoThreadInfo.pthreadId, `needs to match (mono_wasm_pthread_ptr ${pthread_id}, threadId from thread info ${monoThreadInfo.pthreadId})`);
+
         monoThreadInfo.reuseCount++;
         monoThreadInfo.updateCount++;
         monoThreadInfo.threadName = "pthread-assigned";
@@ -122,8 +110,7 @@ export function mono_wasm_pthread_on_pthread_created(): void {
             info: monoThreadInfo,
             port: mainPort,
         }, [mainPort]);
-    }
-    catch (err) {
+    } catch (err) {
         mono_log_error("mono_wasm_pthread_on_pthread_created () failed", err);
         loaderHelpers.mono_exit(1, err);
         throw err;
@@ -131,17 +118,18 @@ export function mono_wasm_pthread_on_pthread_created(): void {
 }
 
 /// Called in the worker thread (not main thread) from mono when a pthread becomes registered to the mono runtime.
-export function mono_wasm_pthread_on_pthread_registered(pthread_id: number): void {
+export function mono_wasm_pthread_on_pthread_registered (pthread_id: PThreadPtr): void {
     if (!WasmEnableThreads) return;
     try {
         mono_assert(monoThreadInfo !== null && monoThreadInfo.pthreadId == pthread_id, "expected monoThreadInfo to be set already when registering");
+        monoThreadInfo.isRegistered = true;
+        update_thread_info();
         postMessageToMain({
             monoCmd: WorkerToMainMessageType.monoRegistered,
             info: monoThreadInfo,
         });
         preRunWorker();
-    }
-    catch (err) {
+    } catch (err) {
         mono_log_error("mono_wasm_pthread_on_pthread_registered () failed", err);
         loaderHelpers.mono_exit(1, err);
         throw err;
@@ -149,7 +137,7 @@ export function mono_wasm_pthread_on_pthread_registered(pthread_id: number): voi
 }
 
 /// Called in the worker thread (not main thread) from mono when a pthread becomes attached to the mono runtime.
-export function mono_wasm_pthread_on_pthread_attached(pthread_id: number, thread_name: CharPtr, background_thread: number, threadpool_thread: number, external_eventloop: number, debugger_thread: number): void {
+export function mono_wasm_pthread_on_pthread_attached (pthread_id: PThreadPtr, thread_name: CharPtr, background_thread: number, threadpool_thread: number, external_eventloop: number, debugger_thread: number): void {
     if (!WasmEnableThreads) return;
     try {
         mono_assert(monoThreadInfo !== null && monoThreadInfo.pthreadId == pthread_id, "expected monoThreadInfo to be set already when attaching");
@@ -172,15 +160,14 @@ export function mono_wasm_pthread_on_pthread_attached(pthread_id: number, thread
             monoCmd: WorkerToMainMessageType.monoAttached,
             info: monoThreadInfo,
         });
-    }
-    catch (err) {
+    } catch (err) {
         mono_log_error("mono_wasm_pthread_on_pthread_attached () failed", err);
         loaderHelpers.mono_exit(1, err);
         throw err;
     }
 }
 
-export function mono_wasm_pthread_set_name(name: CharPtr): void {
+export function mono_wasm_pthread_set_name (name: CharPtr): void {
     if (!WasmEnableThreads) return;
     if (!ENVIRONMENT_IS_PTHREAD) return;
     monoThreadInfo.threadName = utf8ToString(name);
@@ -192,21 +179,44 @@ export function mono_wasm_pthread_set_name(name: CharPtr): void {
 }
 
 /// Called in the worker thread (not main thread) from mono when a pthread becomes detached from the mono runtime.
-export function mono_wasm_pthread_on_pthread_unregistered(pthread_id: number): void {
+export function mono_wasm_pthread_on_pthread_unregistered (pthread_id: PThreadPtr): void {
     if (!WasmEnableThreads) return;
     try {
         mono_assert(pthread_id === monoThreadInfo.pthreadId, "expected pthread_id to match when un-registering");
         postRunWorker();
         monoThreadInfo.isAttached = false;
+        monoThreadInfo.isRegistered = false;
+        monoThreadInfo.threadName = "unregistered:(" + monoThreadInfo.threadName + ")";
         update_thread_info();
         postMessageToMain({
             monoCmd: WorkerToMainMessageType.monoUnRegistered,
             info: monoThreadInfo,
         });
-    }
-    catch (err) {
+    } catch (err) {
         mono_log_error("mono_wasm_pthread_on_pthread_unregistered () failed", err);
         loaderHelpers.mono_exit(1, err);
         throw err;
     }
 }
+
+export function replaceEmscriptenTLSInit (modulePThread: PThreadLibrary): void {
+    if (!WasmEnableThreads) return;
+
+    const originalThreadInitTLS = modulePThread.threadInitTLS;
+
+    modulePThread.threadInitTLS = (): void => {
+        originalThreadInitTLS();
+        mono_wasm_pthread_on_pthread_created();
+    };
+}
+
+export function replaceEmscriptenPThreadInit (): void {
+    const original_emscripten_thread_init = Module["__emscripten_thread_init"];
+    function emscripten_thread_init_wrapper (pthread_ptr: PThreadPtr, isMainBrowserThread: number, isMainRuntimeThread: number, canBlock: number) {
+        on_emscripten_thread_init(pthread_ptr);
+        original_emscripten_thread_init(pthread_ptr, isMainBrowserThread, isMainRuntimeThread, canBlock);
+        // re-install self
+        Module["__emscripten_thread_init"] = emscripten_thread_init_wrapper;
+    }
+    Module["__emscripten_thread_init"] = emscripten_thread_init_wrapper;
+}
diff --git a/src/mono/browser/runtime/pthreads/worker/tsconfig.json b/src/mono/browser/runtime/pthreads/worker/tsconfig.json
deleted file mode 100644
index 071a4d824c62..000000000000
--- a/src/mono/browser/runtime/pthreads/worker/tsconfig.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "extends": "../../tsconfig.worker.json",
-    "include": [
-        "../../**/*.ts",
-        "../../**/*.d.ts"
-    ]
-}
diff --git a/src/mono/browser/runtime/queue.ts b/src/mono/browser/runtime/queue.ts
index b31b4790e86c..a6ccd00615ca 100644
--- a/src/mono/browser/runtime/queue.ts
+++ b/src/mono/browser/runtime/queue.ts
@@ -6,19 +6,19 @@ export class Queue<T> {
     private queue: T[];
     private offset: number;
 
-    constructor() {
+    constructor () {
         this.queue = [];
         this.offset = 0;
     }
     // initialise the queue and offset
 
     // Returns the length of the queue.
-    getLength(): number {
+    getLength (): number {
         return (this.queue.length - this.offset);
     }
 
     // Returns true if the queue is empty, and false otherwise.
-    isEmpty(): boolean {
+    isEmpty (): boolean {
         return (this.queue.length == 0);
     }
 
@@ -26,14 +26,14 @@ export class Queue<T> {
     *
     * item - the item to enqueue
     */
-    enqueue(item: T): void {
+    enqueue (item: T): void {
         this.queue.push(item);
     }
 
     /* Dequeues an item and returns it. If the queue is empty, the value
     * 'undefined' is returned.
     */
-    dequeue(): T | undefined {
+    dequeue (): T | undefined {
 
         // if the queue is empty, return immediately
         if (this.queue.length === 0) return undefined;
@@ -57,14 +57,14 @@ export class Queue<T> {
     /* Returns the item at the front of the queue (without dequeuing it). If the
      * queue is empty then undefined is returned.
      */
-    peek(): T | undefined {
+    peek (): T | undefined {
         return (this.queue.length > 0 ? this.queue[this.offset] : undefined);
     }
 
-    drain(onEach: (item: T) => void): void {
+    drain (onEach: (item: T) => void): void {
         while (this.getLength()) {
             const item = this.dequeue()!;
             onEach(item);
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/mono/browser/runtime/rollup.config.js b/src/mono/browser/runtime/rollup.config.js
index 8821c3c1a2a9..cf7b9dfcbea1 100644
--- a/src/mono/browser/runtime/rollup.config.js
+++ b/src/mono/browser/runtime/rollup.config.js
@@ -109,7 +109,7 @@ const envConstants = {
 };
 
 const locationCache = {};
-function sourcemapPathTransform(relativeSourcePath, sourcemapPath) {
+function sourcemapPathTransform (relativeSourcePath, sourcemapPath) {
     let res = locationCache[relativeSourcePath];
     if (res === undefined) {
         if (!isContinuousIntegrationBuild) {
@@ -127,7 +127,7 @@ function sourcemapPathTransform(relativeSourcePath, sourcemapPath) {
     return res;
 }
 
-function consts(dict) {
+function consts (dict) {
     // implement rollup-plugin-const in terms of @rollup/plugin-virtual
     // It's basically the same thing except "consts" names all its modules with a "consts:" prefix,
     // and the virtual module always exports a single default binding (the const value).
@@ -247,7 +247,7 @@ if (isDebug) {
 }
 
 /* Web Workers */
-function makeWorkerConfig(workerName, workerInputSourcePath) {
+function makeWorkerConfig (workerName, workerInputSourcePath) {
     const workerConfig = {
         input: workerInputSourcePath,
         output: [
@@ -275,14 +275,14 @@ const allConfigs = [
     .concat(diagnosticMockTypesConfig ? [diagnosticMockTypesConfig] : []);
 export default defineConfig(allConfigs);
 
-function evalCodePlugin() {
+function evalCodePlugin () {
     return {
         name: "evalCode",
         generateBundle: evalCode
     };
 }
 
-async function evalCode(options, bundle) {
+async function evalCode (options, bundle) {
     try {
         const name = Object.keys(bundle)[0];
         const asset = bundle[name];
@@ -298,7 +298,7 @@ async function evalCode(options, bundle) {
 
 
 // this would create .sha256 file next to the output file, so that we do not touch datetime of the file if it's same -> faster incremental build.
-function writeOnChangePlugin() {
+function writeOnChangePlugin () {
     return {
         name: "writeOnChange",
         generateBundle: writeWhenChanged
@@ -306,7 +306,7 @@ function writeOnChangePlugin() {
 }
 
 // force always unix line ending
-function alwaysLF() {
+function alwaysLF () {
     return {
         name: "writeOnChange",
         generateBundle: (options, bundle) => {
@@ -318,7 +318,7 @@ function alwaysLF() {
     };
 }
 
-async function writeWhenChanged(options, bundle) {
+async function writeWhenChanged (options, bundle) {
     try {
         const name = Object.keys(bundle)[0];
         const asset = bundle[name];
@@ -349,31 +349,31 @@ async function writeWhenChanged(options, bundle) {
     }
 }
 
-function checkFileExists(file) {
+function checkFileExists (file) {
     return fs.promises.access(file, fs.constants.F_OK)
         .then(() => true)
         .catch(() => false);
 }
 
-function regexCheck(checks = []) {
+function regexCheck (checks = []) {
     const filter = createFilter("**/*.ts");
 
     return {
         name: "regexCheck",
 
-        renderChunk(code, chunk) {
+        renderChunk (code, chunk) {
             const id = chunk.fileName;
             if (!filter(id)) return null;
             return executeCheck(this, code, id);
         },
 
-        transform(code, id) {
+        transform (code, id) {
             if (!filter(id)) return null;
             return executeCheck(this, code, id);
         }
     };
 
-    function executeCheck(self, code, id) {
+    function executeCheck (self, code, id) {
         // self.warn("executeCheck" + id);
         for (const rep of checks) {
             const { pattern, failure } = rep;
@@ -389,25 +389,25 @@ function regexCheck(checks = []) {
 }
 
 
-function regexReplace(replacements = []) {
+function regexReplace (replacements = []) {
     const filter = createFilter("**/*.ts");
 
     return {
         name: "regexReplace",
 
-        renderChunk(code, chunk) {
+        renderChunk (code, chunk) {
             const id = chunk.fileName;
             if (!filter(id)) return null;
             return executeReplacement(this, code, id);
         },
 
-        transform(code, id) {
+        transform (code, id) {
             if (!filter(id)) return null;
             return executeReplacement(this, code, id);
         }
     };
 
-    function executeReplacement(_, code, id) {
+    function executeReplacement (_, code, id) {
         const magicString = new MagicString(code);
         if (!codeHasReplacements(code, id, magicString)) {
             return null;
@@ -418,7 +418,7 @@ function regexReplace(replacements = []) {
         return result;
     }
 
-    function codeHasReplacements(code, id, magicString) {
+    function codeHasReplacements (code, id, magicString) {
         let result = false;
         let match;
         for (const rep of replacements) {
@@ -443,7 +443,7 @@ function regexReplace(replacements = []) {
 // Returns an array of objects {"workerName": "foo", "path": "/path/dotnet-foo-worker.ts"}
 //
 // A file looks like a webworker toplevel input if it's `dotnet-{name}-worker.ts` or `.js`
-function findWebWorkerInputs(basePath) {
+function findWebWorkerInputs (basePath) {
     const glob = "dotnet-*-worker.[tj]s";
     const files = fast_glob.sync(glob, { cwd: basePath });
     if (files.length == 0) {
@@ -460,7 +460,7 @@ function findWebWorkerInputs(basePath) {
     return results;
 }
 
-function onwarn(warning) {
+function onwarn (warning) {
     if (warning.code === "CIRCULAR_DEPENDENCY") {
         return;
     }
diff --git a/src/mono/browser/runtime/roots.ts b/src/mono/browser/runtime/roots.ts
index e34e56f4c1b1..12efdddee8dd 100644
--- a/src/mono/browser/runtime/roots.ts
+++ b/src/mono/browser/runtime/roots.ts
@@ -2,11 +2,14 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 import NativeAOT from "consts:nativeAOT";
+import WasmEnableThreads from "consts:wasmEnableThreads";
+
 import cwraps from "./cwraps";
-import { Module } from "./globals";
+import { Module, mono_assert, runtimeHelpers } from "./globals";
 import { VoidPtr, ManagedPointer, NativePointer } from "./types/emscripten";
 import { MonoObjectRef, MonoObjectRefNull, MonoObject, is_nullish, WasmRoot, WasmRootBuffer } from "./types/internal";
 import { _zero_region, localHeapViewU32 } from "./memory";
+import { gc_locked } from "./gc-lock";
 
 const maxScratchRoots = 8192;
 let _scratch_root_buffer: WasmRootBuffer | null = null;
@@ -21,7 +24,8 @@ const _external_root_free_instances: WasmExternalRoot<any>[] = [];
  * Once you are done using the root buffer, you must call its release() method.
  * For small numbers of roots, it is preferable to use the mono_wasm_new_root and mono_wasm_new_roots APIs instead.
  */
-export function mono_wasm_new_root_buffer(capacity: number, name?: string): WasmRootBuffer {
+export function mono_wasm_new_root_buffer (capacity: number, name?: string): WasmRootBuffer {
+    if (WasmEnableThreads && runtimeHelpers.disableManagedTransition) throw new Error("External roots are not supported when threads are enabled");
     if (capacity <= 0)
         throw new Error("capacity >= 1");
 
@@ -37,30 +41,11 @@ export function mono_wasm_new_root_buffer(capacity: number, name?: string): Wasm
     return new WasmRootBufferImpl(offset, capacity, true, name);
 }
 
-/**
- * Creates a root buffer object representing an existing allocation in the native heap and registers
- *  the allocation with the GC. The caller is responsible for managing the lifetime of the allocation.
- */
-export function mono_wasm_new_root_buffer_from_pointer(offset: VoidPtr, capacity: number, name?: string): WasmRootBuffer {
-    if (capacity <= 0)
-        throw new Error("capacity >= 1");
-
-    capacity = capacity | 0;
-
-    const capacityBytes = capacity * 4;
-    if ((<any>offset % 4) !== 0)
-        throw new Error("Unaligned offset");
-
-    _zero_region(offset, capacityBytes);
-
-    return new WasmRootBufferImpl(offset, capacity, false, name);
-}
-
 /**
  * Allocates a WasmRoot pointing to a root provided and controlled by external code. Typicaly on managed stack.
  * Releasing this root will not de-allocate the root space. You still need to call .release().
  */
-export function mono_wasm_new_external_root<T extends MonoObject>(address: VoidPtr | MonoObjectRef): WasmRoot<T> {
+export function mono_wasm_new_external_root<T extends MonoObject> (address: VoidPtr | MonoObjectRef): WasmRoot<T> {
     if (NativeAOT) {
         return {
             // eslint-disable-next-line @typescript-eslint/no-empty-function
@@ -70,6 +55,7 @@ export function mono_wasm_new_external_root<T extends MonoObject>(address: VoidP
         } as unknown as WasmRoot<T>;
     }
 
+    if (WasmEnableThreads && runtimeHelpers.disableManagedTransition) throw new Error("External roots are not supported in multithreaded mode");
     let result: WasmExternalRoot<T>;
 
     if (!address)
@@ -92,7 +78,8 @@ export function mono_wasm_new_external_root<T extends MonoObject>(address: VoidP
  * The result object has get() and set(value) methods, along with a .value property.
  * When you are done using the root you must call its .release() method.
  */
-export function mono_wasm_new_root<T extends MonoObject>(value: T | undefined = undefined): WasmRoot<T> {
+export function mono_wasm_new_root<T extends MonoObject> (value: T | undefined = undefined): WasmRoot<T> {
+    if (WasmEnableThreads && runtimeHelpers.disableManagedTransition) throw new Error("External roots are not supported in multithreaded mode");
     let result: WasmRoot<T>;
 
     if (_scratch_root_free_instances.length > 0) {
@@ -122,7 +109,7 @@ export function mono_wasm_new_root<T extends MonoObject>(value: T | undefined =
  * mono_wasm_new_roots([a, b, ...]) returns an array of new roots initialized with each element.
  * Each root must be released with its release method, or using the mono_wasm_release_roots API.
  */
-export function mono_wasm_new_roots<T extends MonoObject>(count_or_values: number | T[]): WasmRoot<T>[] {
+export function mono_wasm_new_roots<T extends MonoObject> (count_or_values: number | T[]): WasmRoot<T>[] {
     let result;
 
     if (Array.isArray(count_or_values)) {
@@ -147,7 +134,7 @@ export function mono_wasm_new_roots<T extends MonoObject>(count_or_values: numbe
  *  even if you are not sure all of your roots have been created yet.
  * @param {... WasmRoot} roots
  */
-export function mono_wasm_release_roots(...args: WasmRoot<any>[]): void {
+export function mono_wasm_release_roots (...args: WasmRoot<any>[]): void {
     for (let i = 0; i < args.length; i++) {
         if (is_nullish(args[i]))
             continue;
@@ -156,7 +143,7 @@ export function mono_wasm_release_roots(...args: WasmRoot<any>[]): void {
     }
 }
 
-function _mono_wasm_release_scratch_index(index: number) {
+function _mono_wasm_release_scratch_index (index: number) {
     if (index === undefined)
         return;
 
@@ -165,7 +152,7 @@ function _mono_wasm_release_scratch_index(index: number) {
     _scratch_root_free_indices_count++;
 }
 
-function _mono_wasm_claim_scratch_index() {
+function _mono_wasm_claim_scratch_index () {
     if (is_nullish(_scratch_root_buffer) || !_scratch_root_free_indices) {
         _scratch_root_buffer = mono_wasm_new_root_buffer(maxScratchRoots, "js roots");
 
@@ -191,32 +178,33 @@ export class WasmRootBufferImpl implements WasmRootBuffer {
     private __handle: number;
     private __ownsAllocation: boolean;
 
-    constructor(offset: VoidPtr, capacity: number, ownsAllocation: boolean, name?: string) {
+    constructor (offset: VoidPtr, capacity: number, ownsAllocation: boolean, name?: string) {
         const capacityBytes = capacity * 4;
 
         this.__offset = offset;
         this.__offset32 = <number><any>offset >>> 2;
         this.__count = capacity;
         this.length = capacity;
+        mono_assert(!WasmEnableThreads || !gc_locked, "GC must not be locked when creating a GC root");
         this.__handle = cwraps.mono_wasm_register_root(offset, capacityBytes, name || "noname");
         this.__ownsAllocation = ownsAllocation;
     }
 
-    _throw_index_out_of_range(): void {
+    _throw_index_out_of_range (): void {
         throw new Error("index out of range");
     }
 
-    _check_in_range(index: number): void {
+    _check_in_range (index: number): void {
         if ((index >= this.__count) || (index < 0))
             this._throw_index_out_of_range();
     }
 
-    get_address(index: number): MonoObjectRef {
+    get_address (index: number): MonoObjectRef {
         this._check_in_range(index);
         return <any>this.__offset + (index * 4);
     }
 
-    get_address_32(index: number): number {
+    get_address_32 (index: number): number {
         this._check_in_range(index);
         return this.__offset32 + index;
     }
@@ -224,39 +212,40 @@ export class WasmRootBufferImpl implements WasmRootBuffer {
     // NOTE: These functions do not use the helpers from memory.ts because WasmRoot.get and WasmRoot.set
     //  are hot-spots when you profile any application that uses the bindings extensively.
 
-    get(index: number): ManagedPointer {
+    get (index: number): ManagedPointer {
         this._check_in_range(index);
         const offset = this.get_address_32(index);
         return <any>localHeapViewU32()[offset];
     }
 
-    set(index: number, value: ManagedPointer): ManagedPointer {
+    set (index: number, value: ManagedPointer): ManagedPointer {
         const address = this.get_address(index);
         cwraps.mono_wasm_write_managed_pointer_unsafe(address, value);
         return value;
     }
 
-    copy_value_from_address(index: number, sourceAddress: MonoObjectRef): void {
+    copy_value_from_address (index: number, sourceAddress: MonoObjectRef): void {
         const destinationAddress = this.get_address(index);
         cwraps.mono_wasm_copy_managed_pointer(destinationAddress, sourceAddress);
     }
 
-    _unsafe_get(index: number): number {
+    _unsafe_get (index: number): number {
         return localHeapViewU32()[this.__offset32 + index];
     }
 
-    _unsafe_set(index: number, value: ManagedPointer | NativePointer): void {
+    _unsafe_set (index: number, value: ManagedPointer | NativePointer): void {
         const address = <any>this.__offset + index;
         cwraps.mono_wasm_write_managed_pointer_unsafe(<VoidPtr><any>address, <ManagedPointer>value);
     }
 
-    clear(): void {
+    clear (): void {
         if (this.__offset)
             _zero_region(this.__offset, this.__count * 4);
     }
 
-    release(): void {
+    release (): void {
         if (this.__offset && this.__ownsAllocation) {
+            mono_assert(!WasmEnableThreads || !gc_locked, "GC must not be locked when disposing a GC root");
             cwraps.mono_wasm_deregister_root(this.__offset);
             _zero_region(this.__offset, this.__count * 4);
             Module._free(this.__offset);
@@ -265,7 +254,7 @@ export class WasmRootBufferImpl implements WasmRootBuffer {
         this.__handle = (<any>this.__offset) = this.__count = this.__offset32 = 0;
     }
 
-    toString(): string {
+    toString (): string {
         return `[root buffer @${this.get_address(0)}, size ${this.__count} ]`;
     }
 }
@@ -274,76 +263,76 @@ class WasmJsOwnedRoot<T extends MonoObject> implements WasmRoot<T> {
     private __buffer: WasmRootBuffer;
     private __index: number;
 
-    constructor(buffer: WasmRootBuffer, index: number) {
+    constructor (buffer: WasmRootBuffer, index: number) {
         this.__buffer = buffer;//TODO
         this.__index = index;
     }
 
-    get_address(): MonoObjectRef {
+    get_address (): MonoObjectRef {
         return this.__buffer.get_address(this.__index);
     }
 
-    get_address_32(): number {
+    get_address_32 (): number {
         return this.__buffer.get_address_32(this.__index);
     }
 
-    get address(): MonoObjectRef {
+    get address (): MonoObjectRef {
         return this.__buffer.get_address(this.__index);
     }
 
-    get(): T {
+    get (): T {
         const result = (<WasmRootBufferImpl>this.__buffer)._unsafe_get(this.__index);
         return <any>result;
     }
 
-    set(value: T): T {
+    set (value: T): T {
         const destinationAddress = this.__buffer.get_address(this.__index);
         cwraps.mono_wasm_write_managed_pointer_unsafe(destinationAddress, <ManagedPointer>value);
         return value;
     }
 
-    copy_from(source: WasmRoot<T>): void {
+    copy_from (source: WasmRoot<T>): void {
         const sourceAddress = source.address;
         const destinationAddress = this.address;
         cwraps.mono_wasm_copy_managed_pointer(destinationAddress, sourceAddress);
     }
 
-    copy_to(destination: WasmRoot<T>): void {
+    copy_to (destination: WasmRoot<T>): void {
         const sourceAddress = this.address;
         const destinationAddress = destination.address;
         cwraps.mono_wasm_copy_managed_pointer(destinationAddress, sourceAddress);
     }
 
-    copy_from_address(source: MonoObjectRef): void {
+    copy_from_address (source: MonoObjectRef): void {
         const destinationAddress = this.address;
         cwraps.mono_wasm_copy_managed_pointer(destinationAddress, source);
     }
 
-    copy_to_address(destination: MonoObjectRef): void {
+    copy_to_address (destination: MonoObjectRef): void {
         const sourceAddress = this.address;
         cwraps.mono_wasm_copy_managed_pointer(destination, sourceAddress);
     }
 
-    get value(): T {
+    get value (): T {
         return this.get();
     }
 
-    set value(value: T) {
+    set value (value: T) {
         this.set(value);
     }
 
-    valueOf(): T {
+    valueOf (): T {
         throw new Error("Implicit conversion of roots to pointers is no longer supported. Use .value or .address as appropriate");
     }
 
-    clear(): void {
+    clear (): void {
         // .set performs an expensive write barrier, and that is not necessary in most cases
         //  for clear since clearing a root cannot cause new objects to survive a GC
         const address32 = this.__buffer.get_address_32(this.__index);
         localHeapViewU32()[address32] = 0;
     }
 
-    release(): void {
+    release (): void {
         if (!this.__buffer)
             throw new Error("No buffer");
 
@@ -358,7 +347,7 @@ class WasmJsOwnedRoot<T extends MonoObject> implements WasmRoot<T> {
         }
     }
 
-    toString(): string {
+    toString (): string {
         return `[root @${this.address}]`;
     }
 }
@@ -367,84 +356,84 @@ class WasmExternalRoot<T extends MonoObject> implements WasmRoot<T> {
     private __external_address: MonoObjectRef = MonoObjectRefNull;
     private __external_address_32: number = <any>0;
 
-    constructor(address: NativePointer | ManagedPointer) {
+    constructor (address: NativePointer | ManagedPointer) {
         this._set_address(address);
     }
 
-    _set_address(address: NativePointer | ManagedPointer): void {
+    _set_address (address: NativePointer | ManagedPointer): void {
         this.__external_address = <MonoObjectRef><any>address;
         this.__external_address_32 = <number><any>address >>> 2;
     }
 
-    get address(): MonoObjectRef {
+    get address (): MonoObjectRef {
         return <MonoObjectRef><any>this.__external_address;
     }
 
-    get_address(): MonoObjectRef {
+    get_address (): MonoObjectRef {
         return <MonoObjectRef><any>this.__external_address;
     }
 
-    get_address_32(): number {
+    get_address_32 (): number {
         return this.__external_address_32;
     }
 
-    get(): T {
+    get (): T {
         const result = localHeapViewU32()[this.__external_address_32];
         return <any>result;
     }
 
-    set(value: T): T {
+    set (value: T): T {
         cwraps.mono_wasm_write_managed_pointer_unsafe(this.__external_address, <ManagedPointer>value);
         return value;
     }
 
-    copy_from(source: WasmRoot<T>): void {
+    copy_from (source: WasmRoot<T>): void {
         const sourceAddress = source.address;
         const destinationAddress = this.__external_address;
         cwraps.mono_wasm_copy_managed_pointer(destinationAddress, sourceAddress);
     }
 
-    copy_to(destination: WasmRoot<T>): void {
+    copy_to (destination: WasmRoot<T>): void {
         const sourceAddress = this.__external_address;
         const destinationAddress = destination.address;
         cwraps.mono_wasm_copy_managed_pointer(destinationAddress, sourceAddress);
     }
 
-    copy_from_address(source: MonoObjectRef): void {
+    copy_from_address (source: MonoObjectRef): void {
         const destinationAddress = this.__external_address;
         cwraps.mono_wasm_copy_managed_pointer(destinationAddress, source);
     }
 
-    copy_to_address(destination: MonoObjectRef): void {
+    copy_to_address (destination: MonoObjectRef): void {
         const sourceAddress = this.__external_address;
         cwraps.mono_wasm_copy_managed_pointer(destination, sourceAddress);
     }
 
-    get value(): T {
+    get value (): T {
         return this.get();
     }
 
-    set value(value: T) {
+    set value (value: T) {
         this.set(value);
     }
 
-    valueOf(): T {
+    valueOf (): T {
         throw new Error("Implicit conversion of roots to pointers is no longer supported. Use .value or .address as appropriate");
     }
 
-    clear(): void {
+    clear (): void {
         // .set performs an expensive write barrier, and that is not necessary in most cases
         //  for clear since clearing a root cannot cause new objects to survive a GC
         localHeapViewU32()[<any>this.__external_address >>> 2] = 0;
     }
 
-    release(): void {
+    release (): void {
         const maxPooledInstances = 128;
         if (_external_root_free_instances.length < maxPooledInstances)
             _external_root_free_instances.push(this);
     }
 
-    toString(): string {
+    toString (): string {
         return `[external root @${this.address}]`;
     }
 }
diff --git a/src/mono/browser/runtime/run.ts b/src/mono/browser/runtime/run.ts
index deece7dca320..0a16ce45d533 100644
--- a/src/mono/browser/runtime/run.ts
+++ b/src/mono/browser/runtime/run.ts
@@ -2,23 +2,21 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 import NativeAOT from "consts:nativeAOT";
-import { Module } from "./globals";
 
 import WasmEnableThreads from "consts:wasmEnableThreads";
 
-import { ENVIRONMENT_IS_NODE, loaderHelpers, mono_assert, runtimeHelpers } from "./globals";
+import { ENVIRONMENT_IS_NODE, Module, loaderHelpers, mono_assert, runtimeHelpers } from "./globals";
 import { mono_wasm_wait_for_debugger } from "./debug";
 import { mono_wasm_set_main_args } from "./startup";
 import cwraps from "./cwraps";
 import { mono_log_info } from "./logging";
-import { assert_js_interop } from "./invoke-js";
-import { assembly_load } from "./invoke-cs";
-import { cancelThreads } from "./pthreads/browser";
+import { cancelThreads } from "./pthreads";
+import { call_entry_point } from "./managed-exports";
 
 /**
  * Possible signatures are described here  https://docs.microsoft.com/en-us/dotnet/csharp/fundamentals/program-structure/main-command-line
  */
-export async function mono_run_main_and_exit(main_assembly_name?: string, args?: string[]): Promise<number> {
+export async function mono_run_main_and_exit (main_assembly_name?: string, args?: string[]): Promise<number> {
     try {
         const result = await mono_run_main(main_assembly_name, args);
         loaderHelpers.mono_exit(result);
@@ -26,8 +24,7 @@ export async function mono_run_main_and_exit(main_assembly_name?: string, args?:
     } catch (error: any) {
         try {
             loaderHelpers.mono_exit(1, error);
-        }
-        catch (e) {
+        } catch (e) {
             // ignore
         }
         if (error && typeof error.status === "number") {
@@ -40,7 +37,7 @@ export async function mono_run_main_and_exit(main_assembly_name?: string, args?:
 /**
  * Possible signatures are described here  https://docs.microsoft.com/en-us/dotnet/csharp/fundamentals/program-structure/main-command-line
  */
-export async function mono_run_main(main_assembly_name?: string, args?: string[]): Promise<number> {
+export async function mono_run_main (main_assembly_name?: string, args?: string[]): Promise<number> {
     if (main_assembly_name === undefined || main_assembly_name === null || main_assembly_name === "") {
         main_assembly_name = loaderHelpers.config.mainAssemblyName;
         mono_assert(main_assembly_name, "Null or empty config.mainAssemblyName");
@@ -58,51 +55,41 @@ export async function mono_run_main(main_assembly_name?: string, args?: string[]
             args = [];
         }
     }
-    
+
     if (NativeAOT) {
         return (Module as any)["callMain"](args);
     }
-    
+
     mono_wasm_set_main_args(main_assembly_name, args);
+    loaderHelpers.config.mainAssemblyName = main_assembly_name;
+
     if (runtimeHelpers.waitForDebugger == -1) {
         mono_log_info("waiting for debugger...");
         await mono_wasm_wait_for_debugger();
     }
-    const method = find_entry_point(main_assembly_name);
 
-    const res = await runtimeHelpers.javaScriptExports.call_entry_point(method, args);
+    try {
+        Module.runtimeKeepalivePush();
 
-    // one more timer loop before we return, so that any remaining queued calls could run
-    await new Promise(resolve => globalThis.setTimeout(resolve, 0));
+        // one more timer loop before we return, so that any remaining queued calls could run
+        await new Promise(resolve => globalThis.setTimeout(resolve, 0));
 
-    return res;
+        return await call_entry_point(main_assembly_name, args, runtimeHelpers.waitForDebugger == 1);
+    } finally {
+        Module.runtimeKeepalivePop();// after await promise !
+    }
 }
 
-export function find_entry_point(assembly: string) {
-    loaderHelpers.assert_runtime_running();
-    assert_js_interop();
-    const asm = assembly_load(assembly);
-    if (!asm)
-        throw new Error("Could not find assembly: " + assembly);
 
-    let auto_set_breakpoint = 0;
-    if (runtimeHelpers.waitForDebugger == 1)
-        auto_set_breakpoint = 1;
-
-    const method = cwraps.mono_wasm_assembly_get_entry_point(asm, auto_set_breakpoint);
-    if (!method)
-        throw new Error("Could not find entry point for assembly: " + assembly);
-    return method;
-}
 
-export function nativeExit(code: number) {
+export function nativeExit (code: number) {
     if (WasmEnableThreads) {
         cancelThreads();
     }
     cwraps.mono_wasm_exit(code);
 }
 
-export function nativeAbort(reason: any) {
+export function nativeAbort (reason: any) {
     loaderHelpers.exitReason = reason;
     if (!loaderHelpers.is_exited()) {
         cwraps.mono_wasm_abort();
diff --git a/src/mono/browser/runtime/runtime.c b/src/mono/browser/runtime/runtime.c
index 73e64f0a8a3f..7dbdc02c2599 100644
--- a/src/mono/browser/runtime/runtime.c
+++ b/src/mono/browser/runtime/runtime.c
@@ -76,6 +76,7 @@ int monoeg_g_setenv(const char *variable, const char *value, int overwrite);
 int32_t mini_parse_debug_option (const char *option);
 char *mono_method_get_full_name (MonoMethod *method);
 void mono_trace_init (void);
+MonoMethod *mono_marshal_get_managed_wrapper (MonoMethod *method, MonoClass *delegate_klass, MonoGCHandle target_handle, MonoError *error);
 
 /* Not part of public headers */
 #define MONO_ICALL_TABLE_CALLBACKS_VERSION 3
@@ -318,19 +319,22 @@ mono_wasm_load_runtime_common (int debug_level, MonoLogCallback log_callback, co
 	return domain;
 }
 
+// TODO https://github.com/dotnet/runtime/issues/98366
 EMSCRIPTEN_KEEPALIVE MonoAssembly*
 mono_wasm_assembly_load (const char *name)
 {
+	MonoAssembly *res;
 	assert (name);
 	MonoImageOpenStatus status;
+	MONO_ENTER_GC_UNSAFE;
 	MonoAssemblyName* aname = mono_assembly_name_new (name);
-
-	MonoAssembly *res = mono_assembly_load (aname, NULL, &status);
+	res = mono_assembly_load (aname, NULL, &status);
 	mono_assembly_name_free (aname);
-
+	MONO_EXIT_GC_UNSAFE;
 	return res;
 }
 
+// TODO https://github.com/dotnet/runtime/issues/98366
 EMSCRIPTEN_KEEPALIVE MonoClass*
 mono_wasm_assembly_find_class (MonoAssembly *assembly, const char *namespace, const char *name)
 {
@@ -342,21 +346,7 @@ mono_wasm_assembly_find_class (MonoAssembly *assembly, const char *namespace, co
 	return result;
 }
 
-extern int mono_runtime_run_module_cctor (MonoImage *image, MonoError *error);
-
-EMSCRIPTEN_KEEPALIVE void
-mono_wasm_runtime_run_module_cctor (MonoAssembly *assembly)
-{
-	assert (assembly);
-	MonoError error;
-	MONO_ENTER_GC_UNSAFE;
-	MonoImage *image = mono_assembly_get_image (assembly);
-    if (!mono_runtime_run_module_cctor(image, &error)) {
-        //g_print ("Failed to run module constructor due to %s\n", mono_error_get_message (error));
-    }
-	MONO_EXIT_GC_UNSAFE;
-}
-
+// TODO https://github.com/dotnet/runtime/issues/98366
 EMSCRIPTEN_KEEPALIVE MonoMethod*
 mono_wasm_assembly_find_method (MonoClass *klass, const char *name, int arguments)
 {
@@ -368,30 +358,24 @@ mono_wasm_assembly_find_method (MonoClass *klass, const char *name, int argument
 	return result;
 }
 
-EMSCRIPTEN_KEEPALIVE void
-mono_wasm_invoke_method_ref (MonoMethod *method, MonoObject **this_arg_in, void *params[], MonoObject **_out_exc, MonoObject **out_result)
+/*
+ * mono_wasm_marshal_get_managed_wrapper:
+ * Creates a wrapper for a function pointer to a method marked with
+ * UnamangedCallersOnlyAttribute.
+ * This wrapper ensures that the interpreter initializes the pointers.
+ */
+void
+mono_wasm_marshal_get_managed_wrapper (const char* assemblyName, const char* typeName, const char* methodName, int num_params)
 {
-	PPVOLATILE(MonoObject) out_exc = _out_exc;
-	PVOLATILE(MonoObject) temp_exc = NULL;
-	if (out_exc)
-		*out_exc = NULL;
-	else
-		out_exc = &temp_exc;
-
-	MONO_ENTER_GC_UNSAFE;
-	if (out_result) {
-		*out_result = NULL;
-		PVOLATILE(MonoObject) invoke_result = mono_runtime_invoke (method, this_arg_in ? *this_arg_in : NULL, params, (MonoObject **)out_exc);
-		store_volatile(out_result, invoke_result);
-	} else {
-		mono_runtime_invoke (method, this_arg_in ? *this_arg_in : NULL, params, (MonoObject **)out_exc);
-	}
-
-	if (*out_exc && out_result) {
-		PVOLATILE(MonoObject) exc2 = NULL;
-		store_volatile(out_result, (MonoObject*)mono_object_to_string (*out_exc, (MonoObject **)&exc2));
-		if (exc2)
-			store_volatile(out_result, (MonoObject*)mono_string_new (mono_get_root_domain (), "Exception Double Fault"));
-	}
-	MONO_EXIT_GC_UNSAFE;
+	MonoError error;
+	mono_error_init (&error);
+	MonoAssembly* assembly = mono_wasm_assembly_load (assemblyName);
+	assert (assembly);
+	MonoClass* class = mono_wasm_assembly_find_class (assembly, "", typeName);
+	assert (class);
+	MonoMethod* method = mono_wasm_assembly_find_method (class, methodName, num_params);
+	assert (method);
+	MonoMethod *managedWrapper = mono_marshal_get_managed_wrapper (method, NULL, 0, &error);
+	assert (managedWrapper);
+	mono_compile_method (managedWrapper);
 }
diff --git a/src/mono/browser/runtime/runtime.h b/src/mono/browser/runtime/runtime.h
index 8718fdb59672..0ad4d2abd451 100644
--- a/src/mono/browser/runtime/runtime.h
+++ b/src/mono/browser/runtime/runtime.h
@@ -18,5 +18,7 @@ extern int mono_wasm_enable_gc;
 MonoDomain *mono_wasm_load_runtime_common (int debug_level, MonoLogCallback log_callback, const char *interp_opts);
 MonoAssembly *mono_wasm_assembly_load (const char *name);
 MonoClass *mono_wasm_assembly_find_class (MonoAssembly *assembly, const char *namespace, const char *name);
+MonoMethod *mono_wasm_assembly_find_method (MonoClass *klass, const char *name, int arguments);
+void mono_wasm_marshal_get_managed_wrapper (const char* assemblyName, const char* typeName, const char* methodName, int num_params);
 
 #endif
diff --git a/src/mono/browser/runtime/satelliteAssemblies.ts b/src/mono/browser/runtime/satelliteAssemblies.ts
index 100af0696aae..713cfb94161f 100644
--- a/src/mono/browser/runtime/satelliteAssemblies.ts
+++ b/src/mono/browser/runtime/satelliteAssemblies.ts
@@ -1,10 +1,11 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-import { loaderHelpers, runtimeHelpers } from "./globals";
+import { loaderHelpers } from "./globals";
+import { load_satellite_assembly } from "./managed-exports";
 import { AssetEntry } from "./types";
 
-export async function loadSatelliteAssemblies(culturesToLoad: string[]): Promise<void> {
+export async function loadSatelliteAssemblies (culturesToLoad: string[]): Promise<void> {
     const satelliteResources = loaderHelpers.config.resources!.satelliteResources;
     if (!satelliteResources) {
         return;
@@ -30,6 +31,6 @@ export async function loadSatelliteAssemblies(culturesToLoad: string[]): Promise
         .reduce((previous, next) => previous.concat(next), new Array<Promise<ArrayBuffer>>())
         .map(async bytesPromise => {
             const bytes = await bytesPromise;
-            runtimeHelpers.javaScriptExports.load_satellite_assembly(new Uint8Array(bytes));
+            load_satellite_assembly(new Uint8Array(bytes));
         }));
-}
\ No newline at end of file
+}
diff --git a/src/mono/browser/runtime/scheduling.ts b/src/mono/browser/runtime/scheduling.ts
index 69412b794732..35552123845a 100644
--- a/src/mono/browser/runtime/scheduling.ts
+++ b/src/mono/browser/runtime/scheduling.ts
@@ -4,13 +4,14 @@
 import WasmEnableThreads from "consts:wasmEnableThreads";
 
 import cwraps from "./cwraps";
-import { ENVIRONMENT_IS_WORKER, Module, loaderHelpers } from "./globals";
-import { is_thread_available } from "./pthreads/shared/emscripten-replacements";
+import { Module, loaderHelpers } from "./globals";
+import { forceThreadMemoryViewRefresh } from "./memory";
 
 let spread_timers_maximum = 0;
 let pump_count = 0;
 
-export function prevent_timer_throttling(): void {
+export function prevent_timer_throttling (): void {
+    if (WasmEnableThreads) return;
     if (!loaderHelpers.isChromium) {
         return;
     }
@@ -28,7 +29,8 @@ export function prevent_timer_throttling(): void {
     spread_timers_maximum = desired_reach_time;
 }
 
-function prevent_timer_throttling_tick() {
+function prevent_timer_throttling_tick () {
+    if (WasmEnableThreads) return;
     Module.maybeExit();
     if (!loaderHelpers.is_runtime_running()) {
         return;
@@ -38,7 +40,8 @@ function prevent_timer_throttling_tick() {
     mono_background_exec_until_done();
 }
 
-function mono_background_exec_until_done() {
+function mono_background_exec_until_done () {
+    if (WasmEnableThreads) return;
     Module.maybeExit();
     if (!loaderHelpers.is_runtime_running()) {
         return;
@@ -49,42 +52,28 @@ function mono_background_exec_until_done() {
     }
 }
 
-export function schedule_background_exec(): void {
+export function schedule_background_exec (): void {
+    if (WasmEnableThreads) return;
     ++pump_count;
-    let max_postpone_count = 10;
-    function postpone_schedule_background() {
-        if (max_postpone_count < 0 || is_thread_available()) {
-            Module.safeSetTimeout(mono_background_exec_until_done, 0);
-        } else {
-            max_postpone_count--;
-            Module.safeSetTimeout(postpone_schedule_background, 10);
-        }
-    }
-
-    if (WasmEnableThreads && !ENVIRONMENT_IS_WORKER) {
-        // give threads chance to load before we run more synchronous code on UI thread
-        postpone_schedule_background();
-    }
-    else {
-        Module.safeSetTimeout(mono_background_exec_until_done, 0);
-    }
+    Module.safeSetTimeout(mono_background_exec_until_done, 0);
 }
 
 let lastScheduledTimeoutId: any = undefined;
-export function mono_wasm_schedule_timer(shortestDueTimeMs: number): void {
+export function mono_wasm_schedule_timer (shortestDueTimeMs: number): void {
+    if (WasmEnableThreads) return;
     if (lastScheduledTimeoutId) {
         globalThis.clearTimeout(lastScheduledTimeoutId);
         lastScheduledTimeoutId = undefined;
-        // NOTE: Multi-threaded Module.safeSetTimeout() does the runtimeKeepalivePush() 
-        // and non-Multi-threaded Module.safeSetTimeout does not runtimeKeepalivePush() 
-        // but clearTimeout does not runtimeKeepalivePop() so we need to do it here in MT only.
-        if (WasmEnableThreads) Module.runtimeKeepalivePop();
     }
     lastScheduledTimeoutId = Module.safeSetTimeout(mono_wasm_schedule_timer_tick, shortestDueTimeMs);
 }
 
-function mono_wasm_schedule_timer_tick() {
+function mono_wasm_schedule_timer_tick () {
+    if (WasmEnableThreads) return;
     Module.maybeExit();
+    if (WasmEnableThreads) {
+        forceThreadMemoryViewRefresh();
+    }
     if (!loaderHelpers.is_runtime_running()) {
         return;
     }
diff --git a/src/mono/browser/runtime/startup.ts b/src/mono/browser/runtime/startup.ts
index 117090fcc74f..abe9e9f78d2b 100644
--- a/src/mono/browser/runtime/startup.ts
+++ b/src/mono/browser/runtime/startup.ts
@@ -4,9 +4,10 @@
 import NativeAOT from "consts:nativeAOT";
 
 import WasmEnableThreads from "consts:wasmEnableThreads";
+import BuildConfiguration from "consts:configuration";
 
 import { DotnetModuleInternal, CharPtrNull } from "./types/internal";
-import { ENVIRONMENT_IS_NODE, exportedRuntimeAPI, INTERNAL, loaderHelpers, Module, runtimeHelpers, createPromiseController, mono_assert, ENVIRONMENT_IS_WORKER } from "./globals";
+import { exportedRuntimeAPI, INTERNAL, loaderHelpers, Module, runtimeHelpers, createPromiseController, mono_assert } from "./globals";
 import cwraps, { init_c_exports, threads_c_functions as tcwraps } from "./cwraps";
 import { mono_wasm_raise_debug_event, mono_wasm_runtime_ready } from "./debug";
 import { toBase64StringImpl } from "./base64";
@@ -15,33 +16,34 @@ import { initialize_marshalers_to_cs } from "./marshal-to-cs";
 import { initialize_marshalers_to_js } from "./marshal-to-js";
 import { init_polyfills_async } from "./polyfills";
 import { strings_init, utf8ToString } from "./strings";
-import { init_managed_exports } from "./managed-exports";
+import { init_managed_exports, install_main_synchronization_context } from "./managed-exports";
 import { cwraps_internal } from "./exports-internal";
 import { CharPtr, InstantiateWasmCallBack, InstantiateWasmSuccessCallback } from "./types/emscripten";
 import { wait_for_all_assets } from "./assets";
-import { mono_wasm_init_diagnostics } from "./diagnostics";
 import { replace_linker_placeholders } from "./exports-binding";
 import { endMeasure, MeasuredBlock, startMeasure } from "./profiler";
 import { interp_pgo_load_data, interp_pgo_save_data } from "./interp-pgo";
-import { mono_log_debug, mono_log_error, mono_log_warn } from "./logging";
+import { mono_log_debug, mono_log_error, mono_log_info, mono_log_warn } from "./logging";
 
 // threads
-import { preAllocatePThreadWorkerPool, instantiateWasmPThreadWorkerPool } from "./pthreads/browser";
-import { currentWorkerThreadEvents, dotnetPthreadCreated, initWorkerThreadEvents, monoThreadInfo } from "./pthreads/worker";
-import { mono_wasm_main_thread_ptr, mono_wasm_pthread_ptr } from "./pthreads/shared";
+import { populateEmscriptenPool, mono_wasm_init_threads, init_finalizer_thread } from "./pthreads";
+import { currentWorkerThreadEvents, dotnetPthreadCreated, initWorkerThreadEvents, monoThreadInfo } from "./pthreads";
+import { mono_wasm_pthread_ptr, update_thread_info } from "./pthreads";
 import { jiterpreter_allocate_tables } from "./jiterpreter-support";
 import { localHeapViewU8 } from "./memory";
 import { assertNoProxies } from "./gc-handles";
 import { runtimeList } from "./exports";
 import { nativeAbort, nativeExit } from "./run";
+import { mono_wasm_init_diagnostics } from "./diagnostics";
+import { replaceEmscriptenPThreadInit } from "./pthreads/worker-thread";
 
-export async function configureRuntimeStartup(): Promise<void> {
+export async function configureRuntimeStartup (): Promise<void> {
     await init_polyfills_async();
 }
 
 // we are making emscripten startup async friendly
 // emscripten is executing the events without awaiting it and so we need to block progress via PromiseControllers above
-export function configureEmscriptenStartup(module: DotnetModuleInternal): void {
+export function configureEmscriptenStartup (module: DotnetModuleInternal): void {
     const mark = startMeasure();
 
     if (!module.locateFile) {
@@ -98,7 +100,7 @@ export function configureEmscriptenStartup(module: DotnetModuleInternal): void {
     module.ready = runtimeHelpers.dotnetReady.promise;
 }
 
-function instantiateWasm(
+function instantiateWasm (
     imports: WebAssembly.Imports,
     successCallback: InstantiateWasmSuccessCallback,
     userInstantiateWasm?: InstantiateWasmCallBack): any[] {
@@ -118,7 +120,7 @@ function instantiateWasm(
     return []; // No exports
 }
 
-async function instantiateWasmWorker(
+async function instantiateWasmWorker (
     imports: WebAssembly.Imports,
     successCallback: InstantiateWasmSuccessCallback
 ): Promise<void> {
@@ -127,6 +129,7 @@ async function instantiateWasmWorker(
     await loaderHelpers.afterConfigLoaded.promise;
 
     replace_linker_placeholders(imports);
+    replaceEmscriptenPThreadInit();
 
     // Instantiate from the module posted from the main thread.
     // We can just use sync instantiation in the worker.
@@ -135,7 +138,7 @@ async function instantiateWasmWorker(
     Module.wasmModule = null;
 }
 
-function preInit(userPreInit: (() => void)[]) {
+function preInit (userPreInit: (() => void)[]) {
     Module.addRunDependency("mono_pre_init");
     const mark = startMeasure();
     try {
@@ -168,7 +171,7 @@ function preInit(userPreInit: (() => void)[]) {
     })();
 }
 
-async function preInitWorkerAsync() {
+async function preInitWorkerAsync () {
     if (!WasmEnableThreads) return;
     const mark = startMeasure();
     try {
@@ -192,7 +195,7 @@ async function preInitWorkerAsync() {
 }
 
 // runs for each re-attached worker
-export function preRunWorker() {
+export function preRunWorker () {
     if (!WasmEnableThreads) return;
     const mark = startMeasure();
     try {
@@ -208,7 +211,7 @@ export function preRunWorker() {
     }
 }
 
-async function preRunAsync(userPreRun: (() => void)[]) {
+async function preRunAsync (userPreRun: (() => void)[]) {
     Module.addRunDependency("mono_pre_run_async");
     // wait for previous stages
     try {
@@ -229,7 +232,7 @@ async function preRunAsync(userPreRun: (() => void)[]) {
     Module.removeRunDependency("mono_pre_run_async");
 }
 
-async function onRuntimeInitializedAsync(userOnRuntimeInitialized: () => void) {
+async function onRuntimeInitializedAsync (userOnRuntimeInitialized: () => void) {
     try {
         // wait for previous stage
         await runtimeHelpers.afterPreRun.promise;
@@ -242,22 +245,12 @@ async function onRuntimeInitializedAsync(userOnRuntimeInitialized: () => void) {
         // signal this stage, this will allow pending assets to allocate memory
         runtimeHelpers.beforeOnRuntimeInitialized.promise_control.resolve();
 
-        await wait_for_all_assets();
-
+        let threadsReady: Promise<void> | undefined;
         if (WasmEnableThreads) {
-            await mono_wasm_init_threads();
-        }
-
-        // load runtime and apply environment settings (if necessary)
-        await start_runtime();
-
-        if (runtimeHelpers.config.interpreterPgo) {
-            await interp_pgo_load_data();
+            threadsReady = mono_wasm_init_threads();
         }
 
-        if (!ENVIRONMENT_IS_WORKER) {
-            Module.runtimeKeepalivePush();
-        }
+        await wait_for_all_assets();
 
         if (runtimeHelpers.config.virtualWorkingDirectory) {
             const FS = Module.FS;
@@ -270,20 +263,49 @@ async function onRuntimeInitializedAsync(userOnRuntimeInitialized: () => void) {
             FS.chdir(cwd);
         }
 
-        bindings_init();
-        jiterpreter_allocate_tables();
-
-        if (ENVIRONMENT_IS_NODE && !ENVIRONMENT_IS_WORKER) {
-            Module.runtimeKeepalivePush();
+        if (WasmEnableThreads && threadsReady) {
+            await threadsReady;
         }
 
-        runtimeHelpers.runtimeReady = true;
-        runtimeList.registerRuntime(exportedRuntimeAPI);
+        if (runtimeHelpers.config.interpreterPgo)
+            setTimeout(maybeSaveInterpPgoTable, (runtimeHelpers.config.interpreterPgoSaveDelay || 15) * 1000);
+
+
+        Module.runtimeKeepalivePush();
+        if (WasmEnableThreads && BuildConfiguration === "Debug" && globalThis.setInterval) globalThis.setInterval(() => {
+            mono_log_info("UI thread is alive!");
+        }, 3000);
+
+        if (WasmEnableThreads) {
+            // this will create thread and call start_runtime() on it
+            runtimeHelpers.monoThreadInfo = monoThreadInfo;
+            runtimeHelpers.isManagedRunningOnCurrentThread = false;
+            update_thread_info();
+            runtimeHelpers.managedThreadTID = tcwraps.mono_wasm_create_deputy_thread();
+            runtimeHelpers.proxyGCHandle = await runtimeHelpers.afterMonoStarted.promise;
+            runtimeHelpers.ioThreadTID = tcwraps.mono_wasm_create_io_thread();
+
+            // TODO make UI thread not managed/attached https://github.com/dotnet/runtime/issues/100411
+            tcwraps.mono_wasm_register_ui_thread();
+            monoThreadInfo.isAttached = true;
+            monoThreadInfo.isRegistered = true;
+
+            runtimeHelpers.runtimeReady = true;
+            update_thread_info();
+            bindings_init();
+
+            runtimeHelpers.disableManagedTransition = true;
+        } else {
+            // load mono runtime and apply environment settings (if necessary)
+            await start_runtime();
+        }
 
         if (WasmEnableThreads) {
-            runtimeHelpers.javaScriptExports.install_main_synchronization_context();
+            await runtimeHelpers.afterIOStarted.promise;
         }
 
+        runtimeList.registerRuntime(exportedRuntimeAPI);
+
         if (!runtimeHelpers.mono_wasm_runtime_is_ready) mono_wasm_runtime_ready();
 
         if (loaderHelpers.config.debugLevel !== 0 && loaderHelpers.config.cacheBootResources) {
@@ -297,8 +319,7 @@ async function onRuntimeInitializedAsync(userOnRuntimeInitialized: () => void) {
         // call user code
         try {
             userOnRuntimeInitialized();
-        }
-        catch (err: any) {
+        } catch (err: any) {
             mono_log_error("user callback onRuntimeInitialized() failed", err);
             throw err;
         }
@@ -306,6 +327,7 @@ async function onRuntimeInitializedAsync(userOnRuntimeInitialized: () => void) {
         await mono_wasm_after_user_runtime_initialized();
         endMeasure(mark, MeasuredBlock.onRuntimeInitialized);
     } catch (err) {
+        Module.runtimeKeepalivePop();
         mono_log_error("onRuntimeInitializedAsync() failed", err);
         loaderHelpers.mono_exit(1, err);
         throw err;
@@ -314,7 +336,7 @@ async function onRuntimeInitializedAsync(userOnRuntimeInitialized: () => void) {
     runtimeHelpers.afterOnRuntimeInitialized.promise_control.resolve();
 }
 
-async function postRunAsync(userpostRun: (() => void)[]) {
+async function postRunAsync (userpostRun: (() => void)[]) {
     // wait for previous stage
     try {
         await runtimeHelpers.afterOnRuntimeInitialized.promise;
@@ -325,10 +347,6 @@ async function postRunAsync(userpostRun: (() => void)[]) {
         Module["FS_createPath"]("/", "usr", true, true);
         Module["FS_createPath"]("/", "usr/share", true, true);
 
-        if (WasmEnableThreads) {
-            tcwraps.mono_wasm_init_finalizer_thread();
-        }
-
         // all user Module.postRun callbacks
         userpostRun.map(fn => fn());
         endMeasure(mark, MeasuredBlock.postRun);
@@ -342,13 +360,12 @@ async function postRunAsync(userpostRun: (() => void)[]) {
 }
 
 // runs for each re-detached worker
-export function postRunWorker() {
+export function postRunWorker () {
     if (!WasmEnableThreads) return;
     const mark = startMeasure();
     try {
-        if (runtimeHelpers.proxy_context_gc_handle) {
-            const pthread_ptr = mono_wasm_pthread_ptr();
-            mono_log_warn(`JSSynchronizationContext is still installed on worker 0x${pthread_ptr.toString(16)}.`);
+        if (runtimeHelpers.proxyGCHandle) {
+            mono_log_warn("JSSynchronizationContext is still installed on worker.");
         } else {
             assertNoProxies();
         }
@@ -364,20 +381,7 @@ export function postRunWorker() {
     }
 }
 
-async function mono_wasm_init_threads() {
-    if (!WasmEnableThreads) return;
-
-    const threadPrefix = `0x${mono_wasm_main_thread_ptr().toString(16)}-main`;
-    monoThreadInfo.threadPrefix = threadPrefix;
-    monoThreadInfo.threadName = "UI Thread";
-    monoThreadInfo.isUI = true;
-    monoThreadInfo.isAttached = true;
-    loaderHelpers.set_thread_prefix(threadPrefix);
-    await instantiateWasmPThreadWorkerPool();
-    await mono_wasm_init_diagnostics();
-}
-
-function mono_wasm_pre_init_essential(isWorker: boolean): void {
+function mono_wasm_pre_init_essential (isWorker: boolean): void {
     if (!isWorker)
         Module.addRunDependency("mono_wasm_pre_init_essential");
 
@@ -404,27 +408,24 @@ function mono_wasm_pre_init_essential(isWorker: boolean): void {
         Module.removeRunDependency("mono_wasm_pre_init_essential");
 }
 
-async function mono_wasm_pre_init_essential_async(): Promise<void> {
+async function mono_wasm_pre_init_essential_async (): Promise<void> {
     mono_log_debug("mono_wasm_pre_init_essential_async");
     Module.addRunDependency("mono_wasm_pre_init_essential_async");
 
     if (WasmEnableThreads) {
-        preAllocatePThreadWorkerPool(runtimeHelpers.config.pthreadPoolSize!);
+        populateEmscriptenPool();
     }
 
     Module.removeRunDependency("mono_wasm_pre_init_essential_async");
 }
 
-async function mono_wasm_after_user_runtime_initialized(): Promise<void> {
+async function mono_wasm_after_user_runtime_initialized (): Promise<void> {
     mono_log_debug("mono_wasm_after_user_runtime_initialized");
     try {
-        mono_log_debug("Initializing mono runtime");
-
         if (Module.onDotnetReady) {
             try {
                 await Module.onDotnetReady();
-            }
-            catch (err: any) {
+            } catch (err: any) {
                 mono_log_error("onDotnetReady () failed", err);
                 throw err;
             }
@@ -437,15 +438,14 @@ async function mono_wasm_after_user_runtime_initialized(): Promise<void> {
 
 // Set environment variable NAME to VALUE
 // Should be called before mono_load_runtime_and_bcl () in most cases
-export function mono_wasm_setenv(name: string, value: string): void {
+export function mono_wasm_setenv (name: string, value: string): void {
     cwraps.mono_wasm_setenv(name, value);
 }
 
-export function mono_wasm_set_runtime_options(options: string[]): void {
+export function mono_wasm_set_runtime_options (options: string[]): void {
     if (NativeAOT) {
         return;
     }
-
     if (!Array.isArray(options))
         throw new Error("Expected runtimeOptions to be an array of strings");
 
@@ -461,7 +461,7 @@ export function mono_wasm_set_runtime_options(options: string[]): void {
     cwraps.mono_wasm_parse_runtime_options(options.length, argv);
 }
 
-async function instantiate_wasm_module(
+async function instantiate_wasm_module (
     imports: WebAssembly.Imports,
     successCallback: InstantiateWasmSuccessCallback,
 ): Promise<void> {
@@ -491,7 +491,7 @@ async function instantiate_wasm_module(
     Module.removeRunDependency("instantiate_wasm_module");
 }
 
-async function ensureUsedWasmFeatures() {
+async function ensureUsedWasmFeatures () {
     runtimeHelpers.featureWasmSimd = await loaderHelpers.simd();
     runtimeHelpers.featureWasmEh = await loaderHelpers.exceptions();
     if (runtimeHelpers.emscriptenBuildOptions.wasmEnableSIMD) {
@@ -502,38 +502,72 @@ async function ensureUsedWasmFeatures() {
     }
 }
 
-async function start_runtime() {
-    const mark = startMeasure();
+export async function start_runtime () {
+    try {
+        const mark = startMeasure();
+        mono_log_debug("Initializing mono runtime");
+        if (NativeAOT) {
+            runtimeHelpers.config.environmentVariables = {};
+        }
 
-    if (NativeAOT) {
-        runtimeHelpers.config.environmentVariables = {};
-    }
+        for (const k in runtimeHelpers.config.environmentVariables) {
+            const v = runtimeHelpers.config.environmentVariables![k];
+            if (typeof (v) === "string")
+                mono_wasm_setenv(k, v);
+            else
+                throw new Error(`Expected environment variable '${k}' to be a string but it was ${typeof v}: '${v}'`);
+        }
+        if (runtimeHelpers.config.runtimeOptions)
+            mono_wasm_set_runtime_options(runtimeHelpers.config.runtimeOptions);
 
-    for (const k in runtimeHelpers.config.environmentVariables) {
-        const v = runtimeHelpers.config.environmentVariables![k];
-        if (typeof (v) === "string")
-            mono_wasm_setenv(k, v);
-        else
-            throw new Error(`Expected environment variable '${k}' to be a string but it was ${typeof v}: '${v}'`);
-    }
-    if (runtimeHelpers.config.runtimeOptions)
-        mono_wasm_set_runtime_options(runtimeHelpers.config.runtimeOptions);
+        if (runtimeHelpers.config.aotProfilerOptions)
+            mono_wasm_init_aot_profiler(runtimeHelpers.config.aotProfilerOptions);
 
-    if (runtimeHelpers.config.aotProfilerOptions)
-        mono_wasm_init_aot_profiler(runtimeHelpers.config.aotProfilerOptions);
+        if (runtimeHelpers.config.browserProfilerOptions)
+            mono_wasm_init_browser_profiler(runtimeHelpers.config.browserProfilerOptions);
+
+        if (WasmEnableThreads) {
+            // this is not mono-attached thread, so we can start it earlier
+            await mono_wasm_init_diagnostics();
+        }
 
-    if (runtimeHelpers.config.browserProfilerOptions)
-        mono_wasm_init_browser_profiler(runtimeHelpers.config.browserProfilerOptions);
+        mono_wasm_load_runtime();
 
-    mono_wasm_load_runtime("unused", runtimeHelpers.config.debugLevel);
+        jiterpreter_allocate_tables();
 
-    if (runtimeHelpers.config.interpreterPgo)
-        setTimeout(maybeSaveInterpPgoTable, (runtimeHelpers.config.interpreterPgoSaveDelay || 15) * 1000);
+        bindings_init();
 
-    endMeasure(mark, MeasuredBlock.startRuntime);
+        runtimeHelpers.runtimeReady = true;
+
+        if (WasmEnableThreads) {
+            monoThreadInfo.isAttached = true;
+            monoThreadInfo.isRunning = true;
+            monoThreadInfo.isRegistered = true;
+            runtimeHelpers.currentThreadTID = monoThreadInfo.pthreadId = runtimeHelpers.managedThreadTID = mono_wasm_pthread_ptr();
+            update_thread_info();
+            runtimeHelpers.proxyGCHandle = install_main_synchronization_context(runtimeHelpers.config.jsThreadBlockingMode!);
+            runtimeHelpers.isManagedRunningOnCurrentThread = true;
+
+            // start finalizer thread, lazy
+            init_finalizer_thread();
+        }
+
+        // get GCHandle of the ctx
+        runtimeHelpers.afterMonoStarted.promise_control.resolve(runtimeHelpers.proxyGCHandle);
+
+        if (runtimeHelpers.config.interpreterPgo) {
+            await interp_pgo_load_data();
+        }
+
+        endMeasure(mark, MeasuredBlock.startRuntime);
+    } catch (err) {
+        mono_log_error("start_runtime() failed", err);
+        loaderHelpers.mono_exit(1, err);
+        throw err;
+    }
 }
 
-async function maybeSaveInterpPgoTable() {
+async function maybeSaveInterpPgoTable () {
     // If the application exited abnormally, don't save the table. It probably doesn't contain useful data,
     //  and saving would overwrite any existing table from a previous successful run.
     // We treat exiting with a code of 0 as equivalent to if the app is still running - it's perfectly fine
@@ -544,20 +578,24 @@ async function maybeSaveInterpPgoTable() {
     await interp_pgo_save_data();
 }
 
-export function mono_wasm_load_runtime(unused?: string, debugLevel?: number): void {
+export function mono_wasm_load_runtime (): void {
     if (NativeAOT) {
         return;
     }
     mono_log_debug("mono_wasm_load_runtime");
     try {
         const mark = startMeasure();
+        let debugLevel = runtimeHelpers.config.debugLevel;
         if (debugLevel == undefined) {
             debugLevel = 0;
             if (runtimeHelpers.config.debugLevel) {
                 debugLevel = 0 + debugLevel;
             }
         }
-        cwraps.mono_wasm_load_runtime(unused || "unused", debugLevel);
+        if (!loaderHelpers.isDebuggingSupported() || !runtimeHelpers.config.resources!.pdb) {
+            debugLevel = 0;
+        }
+        cwraps.mono_wasm_load_runtime(debugLevel);
         endMeasure(mark, MeasuredBlock.loadRuntime);
 
     } catch (err: any) {
@@ -567,7 +605,7 @@ export function mono_wasm_load_runtime(unused?: string, debugLevel?: number): vo
     }
 }
 
-export function bindings_init(): void {
+export function bindings_init (): void {
     if (runtimeHelpers.mono_wasm_bindings_is_ready) {
         return;
     }
@@ -575,8 +613,10 @@ export function bindings_init(): void {
     runtimeHelpers.mono_wasm_bindings_is_ready = true;
     try {
         const mark = startMeasure();
-        strings_init();
-        init_managed_exports();
+        if (!NativeAOT) {
+            strings_init();
+            init_managed_exports();
+        }
         initialize_marshalers_to_js();
         initialize_marshalers_to_cs();
         runtimeHelpers._i52_error_scratch_buffer = <any>Module._malloc(4);
@@ -588,7 +628,7 @@ export function bindings_init(): void {
 }
 
 
-export function mono_wasm_asm_loaded(assembly_name: CharPtr, assembly_ptr: number, assembly_len: number, pdb_ptr: number, pdb_len: number): void {
+export function mono_wasm_asm_loaded (assembly_name: CharPtr, assembly_ptr: number, assembly_len: number, pdb_ptr: number, pdb_len: number): void {
     // Only trigger this codepath for assemblies loaded after app is ready
     if (runtimeHelpers.mono_wasm_runtime_is_ready !== true)
         return;
@@ -611,7 +651,7 @@ export function mono_wasm_asm_loaded(assembly_name: CharPtr, assembly_ptr: numbe
     });
 }
 
-export function mono_wasm_set_main_args(name: string, allRuntimeArguments: string[]): void {
+export function mono_wasm_set_main_args (name: string, allRuntimeArguments: string[]): void {
     const main_argc = allRuntimeArguments.length + 1;
     const main_argv = <any>Module._malloc(main_argc * 4);
     let aindex = 0;
@@ -631,7 +671,7 @@ export function mono_wasm_set_main_args(name: string, allRuntimeArguments: strin
 /// 1. Emscripten skips a lot of initialization on the pthread workers, Module may not have everything you expect.
 /// 2. Emscripten does not run any event but preInit in the workers.
 /// 3. At the point when this executes there is no pthread assigned to the worker yet.
-export async function configureWorkerStartup(module: DotnetModuleInternal): Promise<void> {
+export async function configureWorkerStartup (module: DotnetModuleInternal): Promise<void> {
     if (!WasmEnableThreads) return;
 
     initWorkerThreadEvents();
diff --git a/src/mono/browser/runtime/strings.ts b/src/mono/browser/runtime/strings.ts
index 9058f29d0bf2..a596b4e35ef5 100644
--- a/src/mono/browser/runtime/strings.ts
+++ b/src/mono/browser/runtime/strings.ts
@@ -1,12 +1,14 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+import WasmEnableThreads from "consts:wasmEnableThreads";
+
 import { mono_wasm_new_root, mono_wasm_new_root_buffer } from "./roots";
 import { MonoString, MonoStringNull, WasmRoot, WasmRootBuffer } from "./types/internal";
 import { Module } from "./globals";
 import cwraps from "./cwraps";
-import { isSharedArrayBuffer, localHeapViewU8, getU32_local, setU16_local, localHeapViewU32, getU16_local, localHeapViewU16 } from "./memory";
-import { NativePointer, CharPtr } from "./types/emscripten";
+import { isSharedArrayBuffer, localHeapViewU8, getU32_local, setU16_local, localHeapViewU32, getU16_local, localHeapViewU16, _zero_region } from "./memory";
+import { NativePointer, CharPtr, VoidPtr } from "./types/emscripten";
 
 export const interned_js_string_table = new Map<string, MonoString>();
 export const mono_wasm_empty_string = "";
@@ -21,7 +23,7 @@ let _text_decoder_utf8_relaxed: TextDecoder | undefined = undefined;
 let _text_decoder_utf8_validating: TextDecoder | undefined = undefined;
 let _text_encoder_utf8: TextEncoder | undefined = undefined;
 
-export function strings_init(): void {
+export function strings_init (): void {
     if (!mono_wasm_string_decoder_buffer) {
         if (typeof TextDecoder !== "undefined") {
             _text_decoder_utf16 = new TextDecoder("utf-16le");
@@ -31,9 +33,11 @@ export function strings_init(): void {
         }
         mono_wasm_string_decoder_buffer = Module._malloc(12);
     }
+    if (!mono_wasm_string_root)
+        mono_wasm_string_root = mono_wasm_new_root();
 }
 
-export function stringToUTF8(str: string): Uint8Array {
+export function stringToUTF8 (str: string): Uint8Array {
     if (_text_encoder_utf8 === undefined) {
         const buffer = new Uint8Array(str.length * 2);
         Module.stringToUTF8Array(str, buffer, 0, str.length * 2);
@@ -42,19 +46,28 @@ export function stringToUTF8(str: string): Uint8Array {
     return _text_encoder_utf8.encode(str);
 }
 
-export function utf8ToStringRelaxed(buffer: Uint8Array): string {
+export function stringToUTF8Ptr (str: string): CharPtr {
+    const bytes = (str.length + 1) * 2;
+    const ptr = Module._malloc(bytes) as any;
+    _zero_region(ptr, str.length * 2);
+    const buffer = localHeapViewU8().subarray(ptr, ptr + bytes);
+    buffer.set(stringToUTF8(str));
+    return ptr;
+}
+
+export function utf8ToStringRelaxed (buffer: Uint8Array): string {
     if (_text_decoder_utf8_relaxed === undefined) {
         return Module.UTF8ArrayToString(buffer, 0, buffer.byteLength);
     }
     return _text_decoder_utf8_relaxed.decode(buffer);
 }
 
-export function utf8ToString(ptr: CharPtr): string {
+export function utf8ToString (ptr: CharPtr): string {
     const heapU8 = localHeapViewU8();
     return utf8BufferToString(heapU8, ptr as any, heapU8.length - (ptr as any));
 }
 
-export function utf8BufferToString(heapOrArray: Uint8Array, idx: number, maxBytesToRead: number): string {
+export function utf8BufferToString (heapOrArray: Uint8Array, idx: number, maxBytesToRead: number): string {
     const endIdx = idx + maxBytesToRead;
     let endPtr = idx;
     while (heapOrArray[endPtr] && !(endPtr >= endIdx)) ++endPtr;
@@ -68,7 +81,7 @@ export function utf8BufferToString(heapOrArray: Uint8Array, idx: number, maxByte
     return _text_decoder_utf8_validating.decode(view);
 }
 
-export function utf16ToString(startPtr: number, endPtr: number): string {
+export function utf16ToString (startPtr: number, endPtr: number): string {
     if (_text_decoder_utf16) {
         const subArray = viewOrCopy(localHeapViewU8(), startPtr as any, endPtr as any);
         return _text_decoder_utf16.decode(subArray);
@@ -77,7 +90,7 @@ export function utf16ToString(startPtr: number, endPtr: number): string {
     }
 }
 
-export function utf16ToStringLoop(startPtr: number, endPtr: number): string {
+export function utf16ToStringLoop (startPtr: number, endPtr: number): string {
     let str = "";
     const heapU16 = localHeapViewU16();
     for (let i = startPtr; i < endPtr; i += 2) {
@@ -87,7 +100,7 @@ export function utf16ToStringLoop(startPtr: number, endPtr: number): string {
     return str;
 }
 
-export function stringToUTF16(dstPtr: number, endPtr: number, text: string) {
+export function stringToUTF16 (dstPtr: number, endPtr: number, text: string) {
     const heapI16 = localHeapViewU16();
     const len = text.length;
     for (let i = 0; i < len; i++) {
@@ -97,7 +110,20 @@ export function stringToUTF16(dstPtr: number, endPtr: number, text: string) {
     }
 }
 
-export function monoStringToString(root: WasmRoot<MonoString>): string | null {
+export function stringToUTF16Ptr (str: string): VoidPtr {
+    const bytes = (str.length + 1) * 2;
+    const ptr = Module._malloc(bytes) as any;
+    _zero_region(ptr, str.length * 2);
+    stringToUTF16(ptr, ptr + bytes, str);
+    return ptr;
+
+}
+
+export function monoStringToString (root: WasmRoot<MonoString>): string | null {
+    // TODO https://github.com/dotnet/runtime/issues/100411
+    // after Blazor stops using monoStringToStringUnsafe
+    // mono_assert(!WasmEnableThreads, "Marshaling strings by reference is not supported in multithreaded mode");
+
     if (root.value === MonoStringNull)
         return null;
 
@@ -131,7 +157,8 @@ export function monoStringToString(root: WasmRoot<MonoString>): string | null {
     return result;
 }
 
-export function stringToMonoStringRoot(string: string, result: WasmRoot<MonoString>): void {
+export function stringToMonoStringRoot (string: string, result: WasmRoot<MonoString>): void {
+    if (WasmEnableThreads) return;
     result.clear();
 
     if (string === null)
@@ -160,7 +187,7 @@ export function stringToMonoStringRoot(string: string, result: WasmRoot<MonoStri
     }
 }
 
-export function stringToInternedMonoStringRoot(string: string | symbol, result: WasmRoot<MonoString>): void {
+function stringToInternedMonoStringRoot (string: string | symbol, result: WasmRoot<MonoString>): void {
     let text: string | undefined;
     if (typeof (string) === "symbol") {
         text = string.description;
@@ -193,7 +220,7 @@ export function stringToInternedMonoStringRoot(string: string | symbol, result:
     storeStringInInternTable(text, result, true);
 }
 
-function storeStringInInternTable(string: string, root: WasmRoot<MonoString>, internIt: boolean): void {
+function storeStringInInternTable (string: string, root: WasmRoot<MonoString>, internIt: boolean): void {
     if (!root.value)
         throw new Error("null pointer passed to _store_string_in_intern_table");
 
@@ -231,8 +258,11 @@ function storeStringInInternTable(string: string, root: WasmRoot<MonoString>, in
     rootBuffer.copy_value_from_address(index, root.address);
 }
 
-function stringToMonoStringNewRoot(string: string, result: WasmRoot<MonoString>): void {
+function stringToMonoStringNewRoot (string: string, result: WasmRoot<MonoString>): void {
     const bufferLen = (string.length + 1) * 2;
+    // TODO this could be stack allocated for small strings
+    // or temp_malloc/alloca for large strings
+    // or skip the scratch buffer entirely, and make a new MonoString of size string.length, pin it, and then call stringToUTF16 to write directly into the MonoString's chars
     const buffer = Module._malloc(bufferLen);
     stringToUTF16(buffer as any, buffer as any + bufferLen, string);
     cwraps.mono_wasm_string_from_utf16_ref(<any>buffer, string.length, result.address);
@@ -242,7 +272,7 @@ function stringToMonoStringNewRoot(string: string, result: WasmRoot<MonoString>)
 // When threading is enabled, TextDecoder does not accept a view of a
 // SharedArrayBuffer, we must make a copy of the array first.
 // See https://github.com/whatwg/encoding/issues/172
-export function viewOrCopy(view: Uint8Array, start: CharPtr, end: CharPtr): Uint8Array {
+export function viewOrCopy (view: Uint8Array, start: CharPtr, end: CharPtr): Uint8Array {
     // this condition should be eliminated by rollup on non-threading builds
     const needsCopy = isSharedArrayBuffer(view.buffer);
     return needsCopy
@@ -254,11 +284,9 @@ export function viewOrCopy(view: Uint8Array, start: CharPtr, end: CharPtr): Uint
 let mono_wasm_string_root: any;
 
 /* @deprecated not GC safe, use monoStringToString */
-export function monoStringToStringUnsafe(mono_string: MonoString): string | null {
+export function monoStringToStringUnsafe (mono_string: MonoString): string | null {
     if (mono_string === MonoStringNull)
         return null;
-    if (!mono_wasm_string_root)
-        mono_wasm_string_root = mono_wasm_new_root();
 
     mono_wasm_string_root.value = mono_string;
     const result = monoStringToString(mono_wasm_string_root);
diff --git a/src/mono/browser/runtime/types/index.ts b/src/mono/browser/runtime/types/index.ts
index 0e382399d92d..73b9f9a2dee6 100644
--- a/src/mono/browser/runtime/types/index.ts
+++ b/src/mono/browser/runtime/types/index.ts
@@ -143,7 +143,15 @@ export type MonoConfig = {
     /**
      * initial number of workers to add to the emscripten pthread pool
      */
-    pthreadPoolSize?: number,
+    pthreadPoolInitialSize?: number,
+    /**
+     * number of unused workers kept in the emscripten pthread pool after startup
+     */
+    pthreadPoolUnusedSize?: number,
+    /**
+     * Delay in milliseconds before starting the finalizer thread
+     */
+    finalizerThreadStartDelayMs?: number,
     /**
      * If true, a list of the methods optimized by the interpreter will be saved and used for faster startup
      *  on future runs of the application
@@ -316,7 +324,15 @@ export type SingleAssetBehaviors =
     /**
      * Typically blazor.boot.json
      */
-    | "manifest";
+    | "manifest"
+    /**
+     * The debugging symbols
+     */
+    | "symbols"
+    /**
+     * Load segmentation rules file for Hybrid Globalization.
+     */
+    | "segmentation-rules";
 
 export type AssetBehaviors = SingleAssetBehaviors |
     /**
@@ -347,14 +363,6 @@ export type AssetBehaviors = SingleAssetBehaviors |
      * The javascript module that came from nuget package .
      */
     | "js-module-library-initializer"
-    /**
-     * The javascript module for threads.
-     */
-    | "symbols"
-    /**
-     * Load segmentation rules file for Hybrid Globalization.
-     */
-    | "segmentation-rules"
 
 export const enum GlobalizationMode {
     /**
@@ -407,6 +415,13 @@ export type APIType = {
      * @returns exit code of the Main() method.
      */
     runMainAndExit: (mainAssemblyName?: string, args?: string[]) => Promise<number>;
+    /**
+     * Exits the runtime.
+     * Note: after the runtime exits, it would reject all further calls to the API.
+     * @param code "process" exit code.
+     * @param reason could be a string or an Error object.
+     */
+    exit: (code: number, reason?: any) => void;
     /**
      * Sets the environment variable for the "process"
      * @param name
@@ -437,6 +452,10 @@ export type APIType = {
      * Writes to the WASM linear memory
      */
     setHeapB32: (offset: NativePointer, value: number | boolean) => void;
+    /**
+     * Writes to the WASM linear memory
+     */
+    setHeapB8: (offset: NativePointer, value: number | boolean) => void;
     /**
      * Writes to the WASM linear memory
      */
@@ -485,6 +504,10 @@ export type APIType = {
      * Reads from the WASM linear memory
      */
     getHeapB32: (offset: NativePointer) => boolean;
+    /**
+     * Reads from the WASM linear memory
+     */
+    getHeapB8: (offset: NativePointer) => boolean;
     /**
      * Reads from the WASM linear memory
      */
diff --git a/src/mono/browser/runtime/types/internal.ts b/src/mono/browser/runtime/types/internal.ts
index 9b5af8831e43..0e564f8c8728 100644
--- a/src/mono/browser/runtime/types/internal.ts
+++ b/src/mono/browser/runtime/types/internal.ts
@@ -2,7 +2,6 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 import type { AssetEntry, DotnetModuleConfig, LoadBootResourceCallback, LoadingResource, MonoConfig, RuntimeAPI, SingleAssetBehaviors } from ".";
-import type { PThreadLibrary } from "../pthreads/shared/emscripten-internals";
 import type { CharPtr, EmscriptenModule, ManagedPointer, NativePointer, VoidPtr, Int32Ptr } from "./emscripten";
 
 export type GCHandle = {
@@ -14,6 +13,9 @@ export type JSHandle = {
 export type JSFnHandle = {
     __brand: "JSFnHandle"
 }
+export type PThreadPtr = {
+    __brand: "PThreadPtr" // like pthread_t in C
+}
 export interface MonoObject extends ManagedPointer {
     __brandMonoObject: "MonoObject"
 }
@@ -56,11 +58,13 @@ export const MonoStringRefNull: MonoStringRef = <MonoStringRef><any>0;
 export const JSHandleDisposed: JSHandle = <JSHandle><any>-1;
 export const JSHandleNull: JSHandle = <JSHandle><any>0;
 export const GCHandleNull: GCHandle = <GCHandle><any>0;
+export const GCHandleInvalid: GCHandle = <GCHandle><any>-1;
 export const VoidPtrNull: VoidPtr = <VoidPtr><any>0;
 export const CharPtrNull: CharPtr = <CharPtr><any>0;
 export const NativePointerNull: NativePointer = <NativePointer><any>0;
+export const PThreadPtrNull: PThreadPtr = <PThreadPtr><any>0;
 
-export function coerceNull<T extends ManagedPointer | NativePointer>(ptr: T | null | undefined): T {
+export function coerceNull<T extends ManagedPointer | NativePointer> (ptr: T | null | undefined): T {
     if ((ptr === null) || (ptr === undefined))
         return (0 as any) as T;
     else
@@ -76,7 +80,6 @@ export type MonoConfigInternal = MonoConfig & {
     browserProfilerOptions?: BrowserProfilerOptions, // dictionary-style Object. If omitted, browser profiler will not be initialized.
     waitForDebugger?: number,
     appendElementOnExit?: boolean
-    assertAfterExit?: boolean // default true for shell/nodeJS
     interopCleanupOnExit?: boolean
     dumpThreadsOnNonZeroExit?: boolean
     logExitCode?: boolean
@@ -91,6 +94,8 @@ export type MonoConfigInternal = MonoConfig & {
     resourcesHash?: string,
     GitHash?: string,
     ProductVersion?: string,
+
+    jsThreadBlockingMode?: JSThreadBlockingMode,
 };
 
 export type RunArguments = {
@@ -115,7 +120,6 @@ export type LoaderHelpers = {
 
     maxParallelDownloads: number;
     enableDownloadRetry: boolean;
-    assertAfterExit: boolean;
 
     exitCode: number | undefined;
     exitReason: any;
@@ -127,7 +131,8 @@ export type LoaderHelpers = {
     scriptUrl: string
     modulesUniqueQuery?: string
     preferredIcuAsset?: string | null,
-    invariantMode: boolean,
+    loadingWorkers: PThreadWorker[],
+    workerNextNumber: number,
 
     actual_downloaded_assets_count: number,
     actual_instantiated_assets_count: number,
@@ -155,7 +160,6 @@ export type LoaderHelpers = {
     out(message: string): void;
     err(message: string): void;
 
-    hasDebuggingEnabled(config: MonoConfig): boolean,
     retrieve_asset_download(asset: AssetEntry): Promise<ArrayBuffer>;
     onDownloadResourceProgress?: (resourcesLoaded: number, totalResources: number) => void;
     logDownloadStatsToConsole: () => void;
@@ -166,6 +170,7 @@ export type LoaderHelpers = {
     invokeLibraryInitializers: (functionName: string, args: any[]) => Promise<void>,
     libraryInitializers?: { scriptName: string, exports: any }[];
 
+    isDebuggingSupported(): boolean,
     isChromium: boolean,
     isFirefox: boolean
 
@@ -194,13 +199,19 @@ export type RuntimeHelpers = {
     quit: Function,
     nativeExit: (code: number) => void,
     nativeAbort: (reason: any) => void,
-    javaScriptExports: JavaScriptExports,
     subtle: SubtleCrypto | null,
     updateMemoryViews: () => void
     getMemory(): WebAssembly.Memory,
     getWasmIndirectFunctionTable(): WebAssembly.Table,
     runtimeReady: boolean,
-    proxy_context_gc_handle: GCHandle,
+    disableManagedTransition: boolean,
+    monoThreadInfo: PThreadInfo,
+    proxyGCHandle: GCHandle | undefined,
+    managedThreadTID: PThreadPtr,
+    ioThreadTID: PThreadPtr,
+    currentThreadTID: PThreadPtr,
+    isManagedRunningOnCurrentThread: boolean,
+    isPendingSynchronousCall: boolean, // true when we are in the middle of a synchronous call from managed code from same thread
     cspPolicy: boolean,
 
     allAssetsInMemory: PromiseAndController<void>,
@@ -210,6 +221,8 @@ export type RuntimeHelpers = {
     afterPreInit: PromiseAndController<void>,
     afterPreRun: PromiseAndController<void>,
     beforeOnRuntimeInitialized: PromiseAndController<void>,
+    afterMonoStarted: PromiseAndController<GCHandle | undefined>,
+    afterIOStarted: PromiseAndController<void>,
     afterOnRuntimeInitialized: PromiseAndController<void>,
     afterPostRun: PromiseAndController<void>,
 
@@ -221,7 +234,7 @@ export type RuntimeHelpers = {
     instantiate_asset: (asset: AssetEntry, url: string, bytes: Uint8Array) => void,
     instantiate_symbols_asset: (pendingAsset: AssetEntryInternal) => Promise<void>,
     instantiate_segmentation_rules_asset: (pendingAsset: AssetEntryInternal) => Promise<void>,
-    jiterpreter_dump_stats?: (x: boolean) => string,
+    jiterpreter_dump_stats?: (concise?: boolean) => void,
     forceDisposeProxies: (disposeMethods: boolean, verbose: boolean) => void,
     dumpThreads: () => void,
 }
@@ -240,7 +253,7 @@ export type DotnetModuleInternal = EmscriptenModule & DotnetModuleConfig & Emscr
 
 // Evaluates whether a value is nullish (same definition used as the ?? operator,
 //  https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Nullish_coalescing_operator)
-export function is_nullish<T>(value: T | null | undefined): value is null | undefined {
+export function is_nullish<T> (value: T | null | undefined): value is null | undefined {
     return (value === undefined) || (value === null);
 }
 
@@ -285,14 +298,14 @@ export interface ExitStatusError {
 
 /// Always throws. Used to handle unreachable switch branches when TypeScript refines the type of a variable
 /// to 'never' after you handle all the cases it knows about.
-export function assertNever(x: never): never {
+export function assertNever (x: never): never {
     throw new Error("Unexpected value: " + x);
 }
 
 /// returns true if the given value is not Thenable
 ///
 /// Useful if some function returns a value or a promise of a value.
-export function notThenable<T>(x: T | PromiseLike<T>): x is T {
+export function notThenable<T> (x: T | PromiseLike<T>): x is T {
     return typeof x !== "object" || typeof ((<PromiseLike<T>>x).then) !== "function";
 }
 
@@ -300,35 +313,6 @@ export function notThenable<T>(x: T | PromiseLike<T>): x is T {
 /// Primarily intended for debugging purposes.
 export type EventPipeSessionID = bigint;
 
-// in all the exported internals methods, we use the same data structures for stack frame as normal full blow interop
-// see src\libraries\System.Runtime.InteropServices.JavaScript\src\System\Runtime\InteropServices\JavaScript\Interop\JavaScriptExports.cs
-export interface JavaScriptExports {
-    // the marshaled signature is: void ReleaseJSOwnedObjectByGCHandle(GCHandle gcHandle)
-    release_js_owned_object_by_gc_handle(gc_handle: GCHandle): void;
-
-    // the marshaled signature is: void CompleteTask<T>(GCHandle holder, Exception? exceptionResult, T? result)
-    complete_task(holder_gc_handle: GCHandle, isCanceling: boolean, error?: any, data?: any, res_converter?: MarshalerToCs): void;
-
-    // the marshaled signature is: TRes? CallDelegate<T1,T2,T3TRes>(GCHandle callback, T1? arg1, T2? arg2, T3? arg3)
-    call_delegate(callback_gc_handle: GCHandle, arg1_js: any, arg2_js: any, arg3_js: any,
-        res_converter?: MarshalerToJs, arg1_converter?: MarshalerToCs, arg2_converter?: MarshalerToCs, arg3_converter?: MarshalerToCs): any;
-
-    // the marshaled signature is: Task<int>? CallEntrypoint(MonoMethod* entrypointPtr, string[] args)
-    call_entry_point(entry_point: MonoMethod, args?: string[]): Promise<number>;
-
-    // the marshaled signature is: void InstallMainSynchronizationContext()
-    install_main_synchronization_context(): void;
-
-    // the marshaled signature is: string GetManagedStackTrace(GCHandle exception)
-    get_managed_stack_trace(exception_gc_handle: GCHandle): string | null
-
-    // the marshaled signature is: void LoadSatelliteAssembly(byte[] dll)
-    load_satellite_assembly(dll: Uint8Array): void;
-
-    // the marshaled signature is: void LoadLazyAssembly(byte[] dll, byte[] pdb)
-    load_lazy_assembly(dll: Uint8Array, pdb: Uint8Array | null): void;
-}
-
 export type MarshalerToJs = (arg: JSMarshalerArgument, element_type?: MarshalerType, res_converter?: MarshalerToJs, arg1_converter?: MarshalerToCs, arg2_converter?: MarshalerToCs, arg3_converter?: MarshalerToCs) => any;
 export type MarshalerToCs = (arg: JSMarshalerArgument, value: any, element_type?: MarshalerType, res_converter?: MarshalerToCs, arg1_converter?: MarshalerToJs, arg2_converter?: MarshalerToJs, arg3_converter?: MarshalerToJs) => void;
 export type BoundMarshalerToJs = (args: JSMarshalerArguments) => any;
@@ -362,6 +346,7 @@ export enum MarshalerType {
     Span,
     Action,
     Function,
+    DiscardNoWait,
 
     // only on runtime
     JSException,
@@ -445,6 +430,7 @@ export declare interface EmscriptenModuleInternal {
     runtimeKeepalivePush(): void;
     runtimeKeepalivePop(): void;
     maybeExit(): void;
+    __emscripten_thread_init(pthread_ptr: PThreadPtr, isMainBrowserThread: number, isMainRuntimeThread: number, canBlock: number): void;
 }
 
 /// A PromiseController encapsulates a Promise together with easy access to its resolve and reject functions.
@@ -472,6 +458,7 @@ export type passEmscriptenInternalsType = (internals: EmscriptenInternals, emscr
 export type setGlobalObjectsType = (globalObjects: GlobalObjects) => void;
 export type initializeExportsType = (globalObjects: GlobalObjects) => RuntimeAPI;
 export type initializeReplacementsType = (replacements: EmscriptenReplacements) => void;
+export type afterInitializeType = (module: EmscriptenModuleInternal) => void;
 export type configureEmscriptenStartupType = (module: DotnetModuleInternal) => void;
 export type configureRuntimeStartupType = () => Promise<void>;
 export type configureWorkerStartupType = (module: DotnetModuleInternal) => Promise<void>
@@ -506,9 +493,112 @@ export const enum WorkerToMainMessageType {
     enabledInterop = "notify_enabled_interop",
     monoUnRegistered = "monoUnRegistered",
     pthreadCreated = "pthreadCreated",
+    deputyCreated = "createdDeputy",
+    deputyFailed = "deputyFailed",
+    deputyStarted = "monoStarted",
+    ioStarted = "ioStarted",
     preload = "preload",
 }
 
 export const enum MainToWorkerMessageType {
     applyConfig = "apply_mono_config",
 }
+
+export interface PThreadWorker extends Worker {
+    pthread_ptr: PThreadPtr;
+    loaded: boolean;
+    // this info is updated via async messages from the worker, it could be stale
+    info: PThreadInfo;
+    thread?: Thread;
+}
+
+export interface PThreadInfo {
+    pthreadId: PThreadPtr;
+
+    workerNumber: number,
+    reuseCount: number,
+    updateCount: number,
+
+    threadName: string,
+    threadPrefix: string,
+
+    isLoaded?: boolean,
+    isRegistered?: boolean,
+    isRunning?: boolean,
+    isAttached?: boolean,
+    isDeputy?: boolean,
+    isIo?: boolean,
+    isExternalEventLoop?: boolean,
+    isUI?: boolean;
+    isBackground?: boolean,
+    isDebugger?: boolean,
+    isThreadPoolWorker?: boolean,
+    isTimer?: boolean,
+    isLongRunning?: boolean,
+    isThreadPoolGate?: boolean,
+    isFinalizer?: boolean,
+    isDirtyBecauseOfInterop?: boolean,
+}
+
+export interface PThreadLibrary {
+    unusedWorkers: PThreadWorker[];
+    runningWorkers: PThreadWorker[];
+    pthreads: PThreadInfoMap;
+    allocateUnusedWorker: () => void;
+    loadWasmModuleToWorker: (worker: PThreadWorker) => Promise<PThreadWorker>;
+    threadInitTLS: () => void,
+    getNewWorker: () => PThreadWorker,
+    returnWorkerToPool: (worker: PThreadWorker) => void,
+}
+
+export interface PThreadInfoMap {
+    [key: number]: PThreadWorker;
+}
+
+export interface Thread {
+    readonly pthreadPtr: PThreadPtr;
+    readonly port: MessagePort;
+    postMessageToWorker<T extends MonoThreadMessage>(message: T): void;
+}
+
+export interface MonoThreadMessage {
+    // Type of message.  Generally a subsystem like "diagnostic_server", or "event_pipe", "debugger", etc.
+    type: string;
+    // A particular kind of message. For example, "started", "stopped", "stopped_with_error", etc.
+    cmd: string;
+}
+
+// keep in sync with JSHostImplementation.Types.cs
+export const enum JSThreadBlockingMode {
+    /**
+     * Prevents synchronous JSExport from being called from JavaScript code in UI thread.
+     * On JSWebWorker synchronous JSExport always works.
+     * On JSWebWorker blocking .Wait always warns.
+     * This is the default mode.
+     */
+    PreventSynchronousJSExport = "PreventSynchronousJSExport",
+    /**
+     * Allows synchronous JSExport to be called from JavaScript code also in UI thread.
+     * Inside of that call blocking .Wait throws PNSE.
+     * Inside of that call nested call back to synchronous JSImport throws PNSE (because it would deadlock otherwise in 100% cases).
+     * On JSWebWorker synchronous JSExport always works.
+     * On JSWebWorker blocking .Wait always throws PNSE.
+     */
+    ThrowWhenBlockingWait = "ThrowWhenBlockingWait",
+    /**
+     * Allows synchronous JSExport to be called from JavaScript code also in UI thread.
+     * Inside of that call blocking .Wait warns.
+     * Inside of that call nested call back to synchronous JSImport throws PNSE (because it would deadlock otherwise in 100% cases).
+     * On JSWebWorker synchronous JSExport always works.
+     * On JSWebWorker blocking .Wait always warns.
+     */
+    WarnWhenBlockingWait = "WarnWhenBlockingWait",
+    /**
+     * Allows synchronous JSExport to be called from JavaScript code, and allows managed code to use blocking .Wait
+     * .Wait on Promise/Task chains could lead to deadlock because JS event loop is not processed and it can't resolve JS promises.
+     * This mode is dangerous and not supported.
+     * Allows synchronous JSExport to be called from JavaScript code also in Main thread.
+     * Inside of that call nested call back to synchronous JSImport throws PNSE (because it would deadlock otherwise in 100% cases).
+     */
+    DangerousAllowBlockingWait = "DangerousAllowBlockingWait",
+}
diff --git a/src/mono/browser/runtime/types/node.d.ts b/src/mono/browser/runtime/types/node.d.ts
index 6c548d2404ee..e42f6034ead4 100644
--- a/src/mono/browser/runtime/types/node.d.ts
+++ b/src/mono/browser/runtime/types/node.d.ts
@@ -5,4 +5,4 @@
 declare const __filename: string;
 declare const __dirname: string;
 declare type Buffer = {}
-declare const process: any;
\ No newline at end of file
+declare const process: any;
diff --git a/src/mono/browser/runtime/types/sidecar.d.ts b/src/mono/browser/runtime/types/sidecar.d.ts
index bc50b63f1fb3..b4fd0d3950ea 100644
--- a/src/mono/browser/runtime/types/sidecar.d.ts
+++ b/src/mono/browser/runtime/types/sidecar.d.ts
@@ -1,4 +1,4 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-declare let dotnetSidecar: boolean | undefined;
\ No newline at end of file
+declare let dotnetSidecar: boolean | undefined;
diff --git a/src/mono/browser/runtime/types/v8.d.ts b/src/mono/browser/runtime/types/v8.d.ts
index db11d5ef8552..95093e37c3e9 100644
--- a/src/mono/browser/runtime/types/v8.d.ts
+++ b/src/mono/browser/runtime/types/v8.d.ts
@@ -2,4 +2,4 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 // read is a v8 debugger command
-declare function read(name: string, mode?: string): any;
\ No newline at end of file
+declare function read(name: string, mode?: string): any;
diff --git a/src/mono/browser/runtime/weak-ref.ts b/src/mono/browser/runtime/weak-ref.ts
index ebb4ab7f8e08..c9b0e15e9821 100644
--- a/src/mono/browser/runtime/weak-ref.ts
+++ b/src/mono/browser/runtime/weak-ref.ts
@@ -5,19 +5,22 @@ import { WeakRefInternal } from "./types/internal";
 
 export const _use_weak_ref = typeof globalThis.WeakRef === "function";
 
-export function create_weak_ref<T extends object>(js_obj: T): WeakRefInternal<T> {
+export function create_weak_ref<T extends object> (js_obj: T): WeakRefInternal<T> {
     if (_use_weak_ref) {
         return new WeakRef(js_obj);
-    }
-    else {
+    } else {
         // this is trivial WeakRef replacement, which holds strong refrence, instead of weak one, when the browser doesn't support it
-        return <any>{
-            deref: () => {
-                return js_obj;
-            },
-            dispose: () => {
-                js_obj = null!;
-            }
-        };
+        return create_strong_ref(js_obj);
     }
 }
+
+export function create_strong_ref<T extends object> (js_obj: T): WeakRefInternal<T> {
+    return <any>{
+        deref: () => {
+            return js_obj;
+        },
+        dispose: () => {
+            js_obj = null!;
+        }
+    };
+}
diff --git a/src/mono/browser/runtime/web-socket.ts b/src/mono/browser/runtime/web-socket.ts
index b40e9e5daf48..d97cc76cefe4 100644
--- a/src/mono/browser/runtime/web-socket.ts
+++ b/src/mono/browser/runtime/web-socket.ts
@@ -6,7 +6,7 @@ import WasmEnableThreads from "consts:wasmEnableThreads";
 import { prevent_timer_throttling } from "./scheduling";
 import { Queue } from "./queue";
 import { ENVIRONMENT_IS_NODE, ENVIRONMENT_IS_SHELL, createPromiseController, loaderHelpers, mono_assert } from "./globals";
-import { setI32, localHeapViewU8 } from "./memory";
+import { setI32, localHeapViewU8, forceThreadMemoryViewRefresh } from "./memory";
 import { VoidPtr } from "./types/emscripten";
 import { PromiseController } from "./types/internal";
 import { mono_log_warn } from "./logging";
@@ -32,7 +32,7 @@ const wasm_ws_receive_status_ptr = Symbol.for("wasm ws_receive_status_ptr");
 const ws_send_buffer_blocking_threshold = 65536;
 const emptyBuffer = new Uint8Array();
 
-function verifyEnvironment() {
+function verifyEnvironment () {
     if (ENVIRONMENT_IS_SHELL) {
         throw new Error("WebSockets are not supported in shell JS engine.");
     }
@@ -44,15 +44,24 @@ function verifyEnvironment() {
     }
 }
 
-export function ws_wasm_create(uri: string, sub_protocols: string[] | null, receive_status_ptr: VoidPtr): WebSocketExtension {
+export function ws_get_state (ws: WebSocketExtension): number {
+    if (ws.readyState != WebSocket.CLOSED)
+        return ws.readyState ?? -1;
+    const receive_event_queue = ws[wasm_ws_pending_receive_event_queue];
+    const queued_events_count = receive_event_queue.getLength();
+    if (queued_events_count == 0)
+        return ws.readyState ?? -1;
+    return WebSocket.OPEN;
+}
+
+export function ws_wasm_create (uri: string, sub_protocols: string[] | null, receive_status_ptr: VoidPtr): WebSocketExtension {
     verifyEnvironment();
     assert_js_interop();
     mono_assert(uri && typeof uri === "string", () => `ERR12: Invalid uri ${typeof uri}`);
     let ws: WebSocketExtension;
     try {
         ws = new globalThis.WebSocket(uri, sub_protocols || undefined) as WebSocketExtension;
-    }
-    catch (error: any) {
+    } catch (error: any) {
         mono_log_warn("WebSocket error in ws_wasm_create: " + error.toString());
         throw error;
     }
@@ -69,6 +78,9 @@ export function ws_wasm_create(uri: string, sub_protocols: string[] | null, rece
         try {
             if (ws[wasm_ws_is_aborted]) return;
             if (!loaderHelpers.is_runtime_running()) return;
+            if (WasmEnableThreads) {
+                forceThreadMemoryViewRefresh();
+            }
             open_promise_control.resolve(ws);
             prevent_timer_throttling();
         } catch (error: any) {
@@ -79,6 +91,9 @@ export function ws_wasm_create(uri: string, sub_protocols: string[] | null, rece
         try {
             if (ws[wasm_ws_is_aborted]) return;
             if (!loaderHelpers.is_runtime_running()) return;
+            if (WasmEnableThreads) {
+                forceThreadMemoryViewRefresh();
+            }
             web_socket_on_message(ws, ev);
             prevent_timer_throttling();
         } catch (error: any) {
@@ -90,6 +105,9 @@ export function ws_wasm_create(uri: string, sub_protocols: string[] | null, rece
             ws.removeEventListener("message", local_on_message);
             if (ws[wasm_ws_is_aborted]) return;
             if (!loaderHelpers.is_runtime_running()) return;
+            if (WasmEnableThreads) {
+                forceThreadMemoryViewRefresh();
+            }
 
             ws[wasm_ws_close_received] = true;
             ws["close_status"] = ev.code;
@@ -119,6 +137,9 @@ export function ws_wasm_create(uri: string, sub_protocols: string[] | null, rece
         try {
             if (ws[wasm_ws_is_aborted]) return;
             if (!loaderHelpers.is_runtime_running()) return;
+            if (WasmEnableThreads) {
+                forceThreadMemoryViewRefresh();
+            }
             ws.removeEventListener("message", local_on_message);
             const message = ev.message
                 ? "WebSocket error: " + ev.message
@@ -145,7 +166,7 @@ export function ws_wasm_create(uri: string, sub_protocols: string[] | null, rece
     return ws;
 }
 
-export function ws_wasm_open(ws: WebSocketExtension): Promise<WebSocketExtension> | null {
+export function ws_wasm_open (ws: WebSocketExtension): Promise<WebSocketExtension> | null {
     mono_assert(!!ws, "ERR17: expected ws instance");
     if (ws[wasm_ws_pending_error]) {
         return rejectedPromise(ws[wasm_ws_pending_error]);
@@ -155,7 +176,7 @@ export function ws_wasm_open(ws: WebSocketExtension): Promise<WebSocketExtension
     return open_promise_control.promise;
 }
 
-export function ws_wasm_send(ws: WebSocketExtension, buffer_ptr: VoidPtr, buffer_length: number, message_type: number, end_of_message: boolean): Promise<void> | null {
+export function ws_wasm_send (ws: WebSocketExtension, buffer_ptr: VoidPtr, buffer_length: number, message_type: number, end_of_message: boolean): Promise<void> | null {
     mono_assert(!!ws, "ERR17: expected ws instance");
 
     if (ws[wasm_ws_pending_error]) {
@@ -180,7 +201,7 @@ export function ws_wasm_send(ws: WebSocketExtension, buffer_ptr: VoidPtr, buffer
     return web_socket_send_and_wait(ws, whole_buffer);
 }
 
-export function ws_wasm_receive(ws: WebSocketExtension, buffer_ptr: VoidPtr, buffer_length: number): Promise<void> | null {
+export function ws_wasm_receive (ws: WebSocketExtension, buffer_ptr: VoidPtr, buffer_length: number): Promise<void> | null {
     mono_assert(!!ws, "ERR18: expected ws instance");
 
     if (ws[wasm_ws_pending_error]) {
@@ -207,8 +228,7 @@ export function ws_wasm_receive(ws: WebSocketExtension, buffer_ptr: VoidPtr, buf
         return resolvedPromise();
     }
 
-    const readyState = ws.readyState;
-    if (readyState == WebSocket.CLOSED) {
+    if (ws[wasm_ws_close_received]) {
         const receive_status_ptr = ws[wasm_ws_receive_status_ptr];
         setI32(receive_status_ptr, 0); // count
         setI32(<any>receive_status_ptr + 4, 2); // type:close
@@ -225,7 +245,7 @@ export function ws_wasm_receive(ws: WebSocketExtension, buffer_ptr: VoidPtr, buf
     return promise;
 }
 
-export function ws_wasm_close(ws: WebSocketExtension, code: number, reason: string | null, wait_for_close_received: boolean): Promise<void> | null {
+export function ws_wasm_close (ws: WebSocketExtension, code: number, reason: string | null, wait_for_close_received: boolean): Promise<void> | null {
     mono_assert(!!ws, "ERR19: expected ws instance");
 
     if (ws[wasm_ws_is_aborted] || ws[wasm_ws_close_sent] || ws.readyState == WebSocket.CLOSED) {
@@ -245,8 +265,7 @@ export function ws_wasm_close(ws: WebSocketExtension, code: number, reason: stri
             ws.close(code);
         }
         return promise;
-    }
-    else {
+    } else {
         if (typeof reason === "string") {
             ws.close(code, reason);
         } else {
@@ -256,7 +275,7 @@ export function ws_wasm_close(ws: WebSocketExtension, code: number, reason: stri
     }
 }
 
-export function ws_wasm_abort(ws: WebSocketExtension): void {
+export function ws_wasm_abort (ws: WebSocketExtension): void {
     mono_assert(!!ws, "ERR18: expected ws instance");
 
     if (ws[wasm_ws_is_aborted] || ws[wasm_ws_close_sent]) {
@@ -274,7 +293,7 @@ export function ws_wasm_abort(ws: WebSocketExtension): void {
     }
 }
 
-function reject_promises(ws: WebSocketExtension, error: Error) {
+function reject_promises (ws: WebSocketExtension, error: Error) {
     const open_promise_control = ws[wasm_ws_pending_open_promise];
     const open_promise_used = ws[wasm_ws_pending_open_promise_used];
 
@@ -297,7 +316,7 @@ function reject_promises(ws: WebSocketExtension, error: Error) {
 }
 
 // send and return promise
-function web_socket_send_and_wait(ws: WebSocketExtension, buffer_view: Uint8Array | string): Promise<void> | null {
+function web_socket_send_and_wait (ws: WebSocketExtension, buffer_view: Uint8Array | string): Promise<void> | null {
     ws.send(buffer_view);
     ws[wasm_ws_pending_send_buffer] = null;
 
@@ -319,15 +338,13 @@ function web_socket_send_and_wait(ws: WebSocketExtension, buffer_view: Uint8Arra
             // was it all sent yet ?
             if (ws.bufferedAmount === 0) {
                 promise_control.resolve();
-            }
-            else {
+            } else {
                 const readyState = ws.readyState;
                 if (readyState != WebSocket.OPEN && readyState != WebSocket.CLOSING) {
                     // only reject if the data were not sent
                     // bufferedAmount does not reset to zero once the connection closes
                     promise_control.reject(new Error(`InvalidState: ${readyState} The WebSocket is not connected.`));
-                }
-                else if (!promise_control.isDone) {
+                } else if (!promise_control.isDone) {
                     globalThis.setTimeout(polling_check, nextDelay);
                     // exponentially longer delays, up to 1000ms
                     nextDelay = Math.min(nextDelay * 1.5, 1000);
@@ -339,8 +356,7 @@ function web_socket_send_and_wait(ws: WebSocketExtension, buffer_view: Uint8Arra
             if (index > -1) {
                 pending.splice(index, 1);
             }
-        }
-        catch (error: any) {
+        } catch (error: any) {
             mono_log_warn("WebSocket error in web_socket_send_and_wait: " + error.toString());
             promise_control.reject(error);
         }
@@ -351,7 +367,7 @@ function web_socket_send_and_wait(ws: WebSocketExtension, buffer_view: Uint8Arra
     return promise;
 }
 
-function web_socket_on_message(ws: WebSocketExtension, event: MessageEvent) {
+function web_socket_on_message (ws: WebSocketExtension, event: MessageEvent) {
     const event_queue = ws[wasm_ws_pending_receive_event_queue];
     const promise_queue = ws[wasm_ws_pending_receive_promise_queue];
 
@@ -364,8 +380,7 @@ function web_socket_on_message(ws: WebSocketExtension, event: MessageEvent) {
             data: stringToUTF8(event.data),
             offset: 0
         });
-    }
-    else {
+    } else {
         if (event.data.constructor.name !== "ArrayBuffer") {
             throw new Error("ERR19: WebSocket receive expected ArrayBuffer");
         }
@@ -387,7 +402,7 @@ function web_socket_on_message(ws: WebSocketExtension, event: MessageEvent) {
     prevent_timer_throttling();
 }
 
-function web_socket_receive_buffering(ws: WebSocketExtension, event_queue: Queue<any>, buffer_ptr: VoidPtr, buffer_length: number) {
+function web_socket_receive_buffering (ws: WebSocketExtension, event_queue: Queue<any>, buffer_ptr: VoidPtr, buffer_length: number) {
     const event = event_queue.peek();
 
     const count = Math.min(buffer_length, event.data.length - event.offset);
@@ -407,7 +422,7 @@ function web_socket_receive_buffering(ws: WebSocketExtension, event_queue: Queue
     setI32(<any>response_ptr + 8, end_of_message);
 }
 
-function web_socket_send_buffering(ws: WebSocketExtension, buffer_view: Uint8Array, message_type: number, end_of_message: boolean): Uint8Array | string | null {
+function web_socket_send_buffering (ws: WebSocketExtension, buffer_view: Uint8Array, message_type: number, end_of_message: boolean): Uint8Array | string | null {
     let buffer = ws[wasm_ws_pending_send_buffer];
     let offset = 0;
     const length = buffer_view.byteLength;
@@ -423,15 +438,13 @@ function web_socket_send_buffering(ws: WebSocketExtension, buffer_view: Uint8Arr
                 newbuffer.set(buffer, 0);// copy previous buffer
                 newbuffer.subarray(offset).set(buffer_view);// append copy at the end
                 ws[wasm_ws_pending_send_buffer] = buffer = newbuffer;
-            }
-            else {
+            } else {
                 buffer.subarray(offset).set(buffer_view);// append copy at the end
             }
             offset += length;
             ws[wasm_ws_pending_send_buffer_offset] = offset;
         }
-    }
-    else if (!end_of_message) {
+    } else if (!end_of_message) {
         // create new buffer
         if (length !== 0) {
             buffer = <Uint8Array>buffer_view.slice(); // copy
@@ -440,8 +453,7 @@ function web_socket_send_buffering(ws: WebSocketExtension, buffer_view: Uint8Arr
             ws[wasm_ws_pending_send_buffer] = buffer;
         }
         ws[wasm_ws_pending_send_buffer_type] = message_type;
-    }
-    else {
+    } else {
         if (length !== 0) {
             // we could use the un-pinned view, because it will be immediately used in ws.send()
             if (WasmEnableThreads) {
@@ -502,7 +514,7 @@ type Message = {
     offset: number
 }
 
-function resolvedPromise(): Promise<void> | null {
+function resolvedPromise (): Promise<void> | null {
     if (!WasmEnableThreads) {
         // signal that we are finished synchronously
         // this is optimization, which doesn't allocate and doesn't require to marshal resolve() call to C# side.
@@ -517,7 +529,7 @@ function resolvedPromise(): Promise<void> | null {
     }
 }
 
-function rejectedPromise(message: string): Promise<any> | null {
+function rejectedPromise (message: string): Promise<any> | null {
     const resolved = Promise.reject(new Error(message));
     return wrap_as_cancelable<void>(resolved);
 }
diff --git a/src/mono/browser/test-main.js b/src/mono/browser/test-main.js
index 78c697de1996..1feb21ef2f79 100644
--- a/src/mono/browser/test-main.js
+++ b/src/mono/browser/test-main.js
@@ -250,10 +250,10 @@ function configureRuntime(dotnet, runArgs) {
         .withExitCodeLogging()
         .withElementOnExit()
         .withInteropCleanupOnExit()
-        .withAssertAfterExit()
         .withDumpThreadsOnNonZeroExit()
         .withConfig({
-            loadAllSatelliteResources: true
+            loadAllSatelliteResources: true,
+            jsThreadBlockingMode: "ThrowWhenBlockingWait",
         });
 
     if (ENVIRONMENT_IS_NODE) {
diff --git a/src/mono/cmake/config.h.in b/src/mono/cmake/config.h.in
index e484a802aed9..1fd699e76107 100644
--- a/src/mono/cmake/config.h.in
+++ b/src/mono/cmake/config.h.in
@@ -5,9 +5,9 @@
 
 #include <SDKDDKVer.h>
 
-#if _WIN32_WINNT < 0x0601
-#error "Mono requires Windows 7 or later."
-#endif /* _WIN32_WINNT < 0x0601 */
+#if _WIN32_WINNT < 0x0602
+#error "Mono requires Windows 8 or later."
+#endif /* _WIN32_WINNT < 0x0602 */
 
 #ifndef HAVE_WINAPI_FAMILY_SUPPORT
 
@@ -183,9 +183,6 @@
 /* Define to 1 if you have the <unwind.h> header file. */
 #cmakedefine HAVE_UNWIND_H 1
 
-/* Use static ICU */
-#cmakedefine STATIC_ICU 1
-
 /* Use in-tree zlib */
 #cmakedefine INTERNAL_ZLIB 1
 
diff --git a/src/mono/dlls/mscordbi/CMakeLists.txt b/src/mono/dlls/mscordbi/CMakeLists.txt
index 52b4e62ad341..e39aeac5ac6d 100644
--- a/src/mono/dlls/mscordbi/CMakeLists.txt
+++ b/src/mono/dlls/mscordbi/CMakeLists.txt
@@ -110,7 +110,6 @@ if (CLR_CMAKE_HOST_UNIX)
   add_subdirectory(${CLR_DIR}/pal pal)
 
   include_directories(${CLR_DIR}/pal/inc/rt/cpp)
-  add_compile_options(-nostdinc)
 endif (CLR_CMAKE_HOST_UNIX)
 
 if (CLR_CMAKE_HOST_UNIX)
diff --git a/src/mono/mono.proj b/src/mono/mono.proj
index 3b0ade6bd89c..8b57badb1a2e 100644
--- a/src/mono/mono.proj
+++ b/src/mono/mono.proj
@@ -80,7 +80,6 @@
   </PropertyGroup>
 
   <ItemGroup Condition="'$(TargetsBrowser)' == 'true' or '$(TargetsWasi)' == 'true' or '$(TargetsiOS)' == 'true' or '$(TargetstvOS)' == 'true' or '$(TargetsMacCatalyst)' == 'true'">
-    <PackageReference Include="Microsoft.NETCore.Runtime.ICU.Transport" PrivateAssets="all" Version="$(MicrosoftNETCoreRuntimeICUTransportVersion)" GeneratePathProperty="true" />
     <PackageReference Condition="'$(HostOS)' == 'windows'"
                       Include="Microsoft.NET.Runtime.Emscripten.3.1.34.Python.win-x64"
                       PrivateAssets="all"
@@ -97,7 +96,7 @@
   </PropertyGroup>
 
   <!-- CI specific build options -->
-  <ItemGroup Condition="'$(ContinuousIntegrationBuild)' == 'true' and ('$(TargetsOSX)' == 'true' or '$(TargetsMacCatalyst)' == 'true' or '$(TargetsBrowser)' == 'true' or '$(TargetsWasi)' == 'true' or '$(Targetsillumos)' == 'true')">
+  <ItemGroup Condition="'$(ContinuousIntegrationBuild)' == 'true' and '$(DotNetBuildOrchestrator)' != 'true' and ('$(TargetsOSX)' == 'true' or '$(TargetsMacCatalyst)' == 'true' or '$(TargetsBrowser)' == 'true' or '$(TargetsWasi)' == 'true' or '$(Targetsillumos)' == 'true')">
     <_MonoCMakeArgs Include="-DENABLE_WERROR=1"/>
   </ItemGroup>
 
@@ -297,9 +296,6 @@
       <_MonoBuildEnv Condition="'$(Platform)' == 'arm64'" Include="TARGET_BUILD_ARCH=arm64" />
       <_MonoBuildEnv Condition="'$(Platform)' == 'arm'" Include="TARGET_BUILD_ARCH=arm" />
       <_MonoBuildEnv Condition="'$(Platform)' == 'armv6'" Include="TARGET_BUILD_ARCH=armv6" />
-      <_MonoBuildEnv Condition="'$(Platform)' == 'arm64'" Include="PKG_CONFIG_PATH=$(MonoCrossDir)/usr/lib/aarch64-linux-gnu/pkgconfig" />
-      <_MonoBuildEnv Condition="'$(Platform)' == 'arm'" Include="PKG_CONFIG_PATH=$(MonoCrossDir)/usr/lib/arm-linux-gnueabihf/pkgconfig" />
-      <_MonoBuildEnv Condition="'$(Platform)' == 'armv6'" Include="PKG_CONFIG_PATH=$(MonoCrossDir)/usr/lib/arm-linux-gnueabihf/pkgconfig" />
       <_MonoCFLAGS Condition="'$(TargetArchitecture)' == 'armv6'" Include="-march=armv6zk" />
       <_MonoCFLAGS Condition="'$(TargetArchitecture)' == 'armv6'" Include="-mcpu=arm1176jzf-s" />
       <_MonoCFLAGS Condition="'$(TargetArchitecture)' == 'armv6'" Include="-mfpu=vfp" />
@@ -314,49 +310,42 @@
     <ItemGroup Condition="'$(Targetsillumos)' == 'true' and '$(MonoCrossDir)' != ''">
       <_MonoCMakeArgs Include="-DCMAKE_TOOLCHAIN_FILE=$(CrossToolchainFile)" />
       <_MonoBuildEnv Include="TARGET_BUILD_ARCH=x64" />
-      <_MonoBuildEnv Include="PKG_CONFIG_PATH=$(MonoCrossDir)/lib/pkgconfig" />
     </ItemGroup>
 
     <!-- x64 Haiku cross build options -->
     <ItemGroup Condition="'$(TargetsHaiku)' == 'true' and '$(MonoCrossDir)' != ''">
       <_MonoCMakeArgs Include="-DCMAKE_TOOLCHAIN_FILE=$(CrossToolchainFile)" />
       <_MonoBuildEnv Include="TARGET_BUILD_ARCH=x64" />
-      <_MonoBuildEnv Include="PKG_CONFIG_PATH=$(MonoCrossDir)/boot/system/lib/pkgconfig" />
     </ItemGroup>
 
     <!-- x64 linux cross build options -->
     <ItemGroup Condition="'$(TargetsLinux)' == 'true' and '$(MonoCrossDir)' != '' and '$(TargetArchitecture)' == 'x64'">
       <_MonoCMakeArgs Include="-DCMAKE_TOOLCHAIN_FILE=$(CrossToolchainFile)" />
       <_MonoBuildEnv Include="TARGET_BUILD_ARCH=x64" />
-      <_MonoBuildEnv Include="PKG_CONFIG_PATH=$(MonoCrossDir)/usr/lib/pkgconfig" />
     </ItemGroup>
 
     <!-- x86 linux cross build options -->
     <ItemGroup Condition="'$(TargetsLinux)' == 'true' and '$(MonoCrossDir)' != '' and '$(TargetArchitecture)' == 'x86'">
       <_MonoCMakeArgs Include="-DCMAKE_TOOLCHAIN_FILE=$(CrossToolchainFile)" />
       <_MonoBuildEnv Include="TARGET_BUILD_ARCH=x86" />
-      <_MonoBuildEnv Include="PKG_CONFIG_PATH=$(MonoCrossDir)/usr/lib/pkgconfig" />
     </ItemGroup>
 
     <!-- riscv64 Linux cross build options -->
     <ItemGroup Condition="'$(MonoCrossDir)' != '' and '$(TargetArchitecture)' == 'riscv64'">
       <_MonoCMakeArgs Include="-DCMAKE_TOOLCHAIN_FILE=$(CrossToolchainFile)" />
       <_MonoBuildEnv Include="TARGET_BUILD_ARCH=riscv64" />
-      <_MonoBuildEnv Include="PKG_CONFIG_PATH=$(MonoCrossDir)/usr/lib/riscv64-linux-gnu/pkgconfig" />
     </ItemGroup>
 
     <!-- s390x Linux cross build options -->
     <ItemGroup Condition="'$(MonoCrossDir)' != '' and '$(TargetArchitecture)' == 's390x'">
       <_MonoCMakeArgs Include="-DCMAKE_TOOLCHAIN_FILE=$(CrossToolchainFile)" />
       <_MonoBuildEnv Include="TARGET_BUILD_ARCH=s390x" />
-      <_MonoBuildEnv Include="PKG_CONFIG_PATH=$(MonoCrossDir)/usr/lib/s390x-linux-gnu/pkgconfig" />
     </ItemGroup>
 
     <!-- ppc64le Linux cross build options -->
     <ItemGroup Condition="'$(MonoCrossDir)' != '' and '$(TargetArchitecture)' == 'ppc64le'">
       <_MonoCMakeArgs Include="-DCMAKE_TOOLCHAIN_FILE=$(CrossToolchainFile)" />
       <_MonoBuildEnv Include="TARGET_BUILD_ARCH=ppc64le" />
-      <_MonoBuildEnv Include="PKG_CONFIG_PATH=$(MonoCrossDir)/usr/lib/powerpc64le-linux-gnu/pkgconfig" />
     </ItemGroup>
 
     <!-- x64 FreeBSD cross build options -->
@@ -385,9 +374,8 @@
       <_MonoCPPFLAGS Include="-D_CRT_NONSTDC_NO_DEPRECATE" />
       <!--<_MonoCPPFLAGS Include="-DGC_NOT_DLL" />--> <!-- only used for Boehm -->
       <_MonoCPPFLAGS Include="-DWIN32_THREADS" />
-      <_MonoCPPFLAGS Include="-DWINVER=0x0601" />
-      <_MonoCPPFLAGS Include="-D_WIN32_WINNT=0x0601" />
-      <_MonoCPPFLAGS Include="-D_WIN32_IE=0x0501" />
+      <_MonoCPPFLAGS Include="-DWINVER=0x0602" />
+      <_MonoCPPFLAGS Include="-D_WIN32_WINNT=0x0602" />
       <_MonoCPPFLAGS Include="-D_UNICODE" />
       <_MonoCPPFLAGS Include="-DUNICODE" />
       <_MonoCPPFLAGS Include="-DFD_SETSIZE=1024" />
@@ -444,9 +432,6 @@
     <ItemGroup Condition="'$(TargetsBrowser)' == 'true'">
       <_MonoCMakeArgs Include="-DENABLE_LLVM_RUNTIME=1"/>
       <_MonoCMakeArgs Include="-DEMSCRIPTEN_SYSTEM_PROCESSOR=wasm"/>
-      <_MonoCFLAGS Condition="'$(WasmEnableThreads)' == 'true'" Include="$(EscapedQuoteW)-I$([MSBuild]::NormalizePath('$(PkgMicrosoft_NETCore_Runtime_ICU_Transport)', 'runtimes', 'browser-wasm-threads', 'native', 'include'))$(EscapedQuoteW)"/>
-
-      <_MonoCFLAGS Condition="'$(WasmEnableThreads)' != 'true'" Include="$(EscapedQuoteW)-I$([MSBuild]::NormalizePath('$(PkgMicrosoft_NETCore_Runtime_ICU_Transport)', 'runtimes', 'browser-wasm', 'native', 'include'))$(EscapedQuoteW)"/>
     </ItemGroup>
     <!-- WASI specific options -->
     <ItemGroup Condition="'$(TargetsWasi)' == 'true'">
@@ -461,8 +446,6 @@
       <_MonoCFLAGS Include="-D_WASI_EMULATED_PROCESS_CLOCKS"/>
       <_MonoCFLAGS Include="-D_WASI_EMULATED_SIGNAL"/>
       <_MonoCFLAGS Include="-D_WASI_EMULATED_MMAN"/>
-      <_MonoCFLAGS Condition="'$(WasmEnableThreads)' == 'true'" Include="$(EscapedQuoteW)-I$([MSBuild]::NormalizePath('$(PkgMicrosoft_NETCore_Runtime_ICU_Transport)', 'runtimes', 'wasi-wasm-threads', 'native', 'include').Replace('\','/'))$(EscapedQuoteW)"/>
-      <_MonoCFLAGS Condition="'$(WasmEnableThreads)' != 'true'" Include="$(EscapedQuoteW)-I$([MSBuild]::NormalizePath('$(PkgMicrosoft_NETCore_Runtime_ICU_Transport)', 'runtimes', 'wasi-wasm', 'native', 'include').Replace('\','/'))$(EscapedQuoteW)"/>
     </ItemGroup>
 
     <!-- iOS/tvOS specific options -->
@@ -765,9 +748,8 @@
       <_MonoAOTCPPFLAGS Include="-D_CRT_NONSTDC_NO_DEPRECATE" />
       <!--<_MonoAOTCPPFLAGS Include="-DGC_NOT_DLL" />--> <!-- only used for Boehm -->
       <_MonoAOTCPPFLAGS Include="-DWIN32_THREADS" />
-      <_MonoAOTCPPFLAGS Include="-DWINVER=0x0601" />
-      <_MonoAOTCPPFLAGS Include="-D_WIN32_WINNT=0x0601" />
-      <_MonoAOTCPPFLAGS Include="-D_WIN32_IE=0x0501" />
+      <_MonoAOTCPPFLAGS Include="-DWINVER=0x0602" />
+      <_MonoAOTCPPFLAGS Include="-D_WIN32_WINNT=0x0602" />
       <_MonoAOTCPPFLAGS Include="-D_UNICODE" />
       <_MonoAOTCPPFLAGS Include="-DUNICODE" />
       <_MonoAOTCPPFLAGS Include="-DFD_SETSIZE=1024" />
@@ -834,21 +816,6 @@
       <_MonoAotBuildEnv Include="TARGET_BUILD_ARCH=$(AotHostArchitecture)" />
     </ItemGroup>
 
-    <!-- x64 Linux cross build options -->
-    <ItemGroup Condition="'$(AotHostArchitecture)' == 'x64' and '$(AotHostOS)' == 'linux'">
-      <_MonoAotBuildEnv Include="PKG_CONFIG_PATH=$(MonoCrossDir)/usr/lib/x86_64-linux-gnu/pkgconfig" />
-    </ItemGroup>
-
-    <!-- ARM Linux cross build options on CI -->
-    <ItemGroup Condition="'$(AotHostArchitecture)' == 'arm64' and '$(AotHostOS)' == 'linux'">
-      <_MonoAotBuildEnv Include="PKG_CONFIG_PATH=$(MonoCrossDir)/usr/lib/aarch64-linux-gnu/pkgconfig" />
-    </ItemGroup>
-
-    <!-- ARM Linux cross build options on CI -->
-    <ItemGroup Condition="'$(AotHostOS)' == 'linux-musl'">
-      <_MonoAotBuildEnv Include="PKG_CONFIG_PATH=$(MonoCrossDir)/usr/lib/pkgconfig" />
-    </ItemGroup>
-
     <PropertyGroup>
       <_MonoAOTCFLAGSOption>-DCMAKE_C_FLAGS=&quot;@(_MonoAOTCPPFLAGS, ' ') @(_MonoAOTCFLAGS, ' ')&quot;</_MonoAOTCFLAGSOption>
       <_MonoAOTCXXFLAGSOption>-DCMAKE_CXX_FLAGS=&quot;@(_MonoAOTCPPFLAGS, ' ') @(_MonoAOTCXXFLAGS, ' ')&quot;</_MonoAOTCXXFLAGSOption>
diff --git a/src/mono/mono/component/debugger-agent.c b/src/mono/mono/component/debugger-agent.c
index f6ceb01b1561..7c0a93093e34 100644
--- a/src/mono/mono/component/debugger-agent.c
+++ b/src/mono/mono/component/debugger-agent.c
@@ -4124,7 +4124,7 @@ jit_end (MonoProfiler *prof, MonoMethod *method, MonoJitInfo *jinfo)
 		if (assembly) {
 			DebuggerTlsData *tls;
 			tls = (DebuggerTlsData *)mono_native_tls_get_value (debugger_tls_id);
-			if (tls->invoke == NULL) {
+			if (!CHECK_ICORDBG (TRUE) || tls->invoke == NULL) {
 				process_profiler_event (EVENT_KIND_ASSEMBLY_LOAD, assembly);
 			} else {
 				assembly_load(prof, assembly); //send later
@@ -5581,6 +5581,7 @@ decode_value_compute_size (MonoType *t, int type, MonoDomain *domain, guint8 *bu
 	if (type != t->type && !MONO_TYPE_IS_REFERENCE (t) &&
 		!(t->type == MONO_TYPE_I && type == MONO_TYPE_VALUETYPE) &&
 		!(type == VALUE_TYPE_ID_FIXED_ARRAY) &&
+		!(type == MDBGPROT_VALUE_TYPE_ID_NULL) &&
 		!(t->type == MONO_TYPE_U && type == MONO_TYPE_VALUETYPE) &&
 		!(t->type == MONO_TYPE_PTR && type == MONO_TYPE_I8) &&
 		!(t->type == MONO_TYPE_FNPTR && type == MONO_TYPE_I8) &&
diff --git a/src/mono/mono/component/diagnostics_server.c b/src/mono/mono/component/diagnostics_server.c
index 4bea3d722625..02179f785eb3 100644
--- a/src/mono/mono/component/diagnostics_server.c
+++ b/src/mono/mono/component/diagnostics_server.c
@@ -250,6 +250,7 @@ queue_push_sync (WasmIpcStreamQueue *q, const uint8_t *buf, uint32_t buf_size, u
 		gboolean is_browser_thread = FALSE;
 		while (mono_atomic_load_i32 (&q->buf_full) != 0) {
 			if (G_UNLIKELY (!is_browser_thread_inited)) {
+					// FIXME for deputy
 					is_browser_thread = mono_threads_wasm_is_ui_thread ();
 					is_browser_thread_inited = TRUE;
 			}
diff --git a/src/mono/mono/component/hot_reload.c b/src/mono/mono/component/hot_reload.c
index 8d14eca73157..85d1029e490d 100644
--- a/src/mono/mono/component/hot_reload.c
+++ b/src/mono/mono/component/hot_reload.c
@@ -933,6 +933,7 @@ delta_info_initialize_mutants (const MonoImage *base, const BaselineInfo *base_i
 			tbl->row_size = prev_table->row_size;
 			tbl->size_bitfield = prev_table->size_bitfield;
 		}
+		mono_metadata_compute_column_offsets (tbl);
 		tbl->rows_ = rows;
 		g_assert (tbl->rows_ > 0 && tbl->row_size != 0);
 
@@ -1890,7 +1891,7 @@ apply_enclog_pass2 (Pass2Context *ctx, MonoImage *image_base, BaselineInfo *base
 		gboolean is_addition = token_index-1 >= delta_info->count[token_table].prev_gen_rows ;
 
 		*should_invalidate_transformed_code |= table_should_invalidate_transformed_code (token_table);
-		
+
 		mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_METADATA_UPDATE, "enclog i=%d: token=0x%08x (table=%s): %d:\t%s", i, log_token, mono_meta_table_name (token_table), func_code, (is_addition ? "ADD" : "UPDATE"));
 
 
@@ -2934,7 +2935,7 @@ add_property_to_existing_class (MonoImage *image_base, BaselineInfo *base_info,
 	parent_info->generation = generation;
 
 	return prop;
-	
+
 }
 
 MonoClassMetadataUpdateEvent *
@@ -2962,7 +2963,7 @@ add_event_to_existing_class (MonoImage *image_base, BaselineInfo *base_info, uin
 	parent_info->generation = generation;
 
 	return evt;
-	
+
 }
 
 
@@ -3017,7 +3018,7 @@ add_semantic_method_to_existing_event (MonoImage *image_base, BaselineInfo *base
 	g_assert (m_event_is_from_update (evt));
 
 	MonoMethod **dest = NULL;
-	
+
 	switch (semantics) {
 	case METHOD_SEMANTIC_ADD_ON:
 		dest = &evt->add;
@@ -3425,7 +3426,7 @@ recompute_ginst_props (MonoClass *ginst, MonoClassMetadataUpdateInfo *info,
 	for (GSList *ptr = gtd_info->added_props; ptr; ptr = ptr->next) {
 		MonoClassMetadataUpdateProperty *gtd_added_prop = (MonoClassMetadataUpdateProperty *)ptr->data;
 		MonoClassMetadataUpdateProperty *added_prop = mono_class_new0 (ginst, MonoClassMetadataUpdateProperty, 1);
-		
+
 		added_prop->prop = gtd_added_prop->prop;
 		added_prop->token = gtd_added_prop->token;
 
@@ -3453,7 +3454,7 @@ recompute_ginst_events (MonoClass *ginst, MonoClassMetadataUpdateInfo *info,
 	for (GSList *ptr = gtd_info->added_events; ptr; ptr = ptr->next) {
 		MonoClassMetadataUpdateEvent *gtd_added_event = (MonoClassMetadataUpdateEvent *)ptr->data;
 		MonoClassMetadataUpdateEvent *added_event = mono_class_new0 (ginst, MonoClassMetadataUpdateEvent, 1);
-		
+
 		added_event->evt = gtd_added_event->evt;
 
 		if (added_event->evt.add)
@@ -3466,7 +3467,7 @@ recompute_ginst_events (MonoClass *ginst, MonoClassMetadataUpdateInfo *info,
 			added_event->evt.raise = mono_class_inflate_generic_method_full_checked (
 				added_event->evt.raise, ginst, mono_class_get_context (ginst), error);
 		mono_error_assert_ok (error); /*FIXME proper error handling*/
-		
+
 		added_event->evt.parent = ginst;
 
 		info->added_events = g_slist_prepend_mem_manager (m_class_get_mem_manager (ginst), info->added_events, (gpointer)added_event);
@@ -3506,7 +3507,7 @@ recompute_ginst_update_info(MonoClass *ginst, MonoClass *gtd, MonoClassMetadataU
 {
 	// if ginst has a `MonoClassMetadataUpdateInfo`, use it to start with, otherwise, allocate a new one
 	MonoClassMetadataUpdateInfo *info = mono_class_get_or_add_metadata_update_info (ginst);
-  
+
 	if (!info)
 		info = mono_class_new0 (ginst, MonoClassMetadataUpdateInfo, 1);
 
@@ -3517,13 +3518,13 @@ recompute_ginst_update_info(MonoClass *ginst, MonoClass *gtd, MonoClassMetadataU
 
 	recompute_ginst_events (ginst, info, gtd, gtd_info, error);
 	mono_error_assert_ok (error);
-	
+
 	recompute_ginst_fields (ginst, info, gtd, gtd_info, error);
 	mono_error_assert_ok (error);
 
 	// finally, update the generation of the ginst info to the same one as the gtd
 	info->generation = gtd_info->generation;
-	// we're done info is now up to date    
+	// we're done info is now up to date
 }
 
 static MonoProperty *
diff --git a/src/mono/mono/component/marshal-ilgen.c b/src/mono/mono/component/marshal-ilgen.c
index ee5c60bd4195..c554dab5600d 100644
--- a/src/mono/mono/component/marshal-ilgen.c
+++ b/src/mono/mono/component/marshal-ilgen.c
@@ -2247,6 +2247,12 @@ emit_marshal_object_ilgen (EmitMarshalContext *m, int argnum, MonoType *t,
 			encoding = cb_to_mono->get_string_encoding (m->piinfo, spec);
 			conv = cb_to_mono->get_ptr_to_stringbuilder_conv (m->piinfo, spec, &need_free);
 
+			if (conv == MONO_MARSHAL_CONV_INVALID) {
+				char *msg = g_strdup_printf ("stringbuilder marshalling conversion %d not implemented", encoding);
+				cb_to_mono->methodBuilder.emit_exception_marshal_directive (mb, msg);
+				break;
+			}
+
 			g_assert (encoding != -1);
 
 			if (m_type_is_byref (t)) {
diff --git a/src/mono/mono/eglib/gfile-posix.c b/src/mono/mono/eglib/gfile-posix.c
index 53414c659d78..f38db70c6a7b 100644
--- a/src/mono/mono/eglib/gfile-posix.c
+++ b/src/mono/mono/eglib/gfile-posix.c
@@ -142,6 +142,7 @@ g_file_open_tmp (const gchar *tmpl, gchar **name_used, GError **gerror)
 	}
 
 	t = g_build_filename (g_get_tmp_dir (), tmpl, (const char*)NULL);
+	g_assert (t);
 
 	#ifdef HOST_WASI
 	g_critical ("g_file_open_tmp is not implemented for WASI");
diff --git a/src/mono/mono/eglib/gfile.c b/src/mono/mono/eglib/gfile.c
index 4d5390c19329..5720a54a7126 100644
--- a/src/mono/mono/eglib/gfile.c
+++ b/src/mono/mono/eglib/gfile.c
@@ -112,6 +112,7 @@ is_ascii_string (const gchar *str)
 	while (*str) {
 		if (!g_isascii (*str))
 			return FALSE;
+		str ++;
 	}
 	return TRUE;
 }
diff --git a/src/mono/mono/eglib/glib.h b/src/mono/mono/eglib/glib.h
index 7ec9dcde9c8e..d6c1e2e59a9a 100644
--- a/src/mono/mono/eglib/glib.h
+++ b/src/mono/mono/eglib/glib.h
@@ -69,6 +69,18 @@
 #error Mono requires _Noreturn (C11 or newer)
 #endif
 
+G_ATTR_NORETURN
+static inline void eg_unreachable (void) {
+#if defined(_MSC_VER)
+	 __assume(0);
+#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 5)))
+	__builtin_unreachable();
+#else
+	for (;;)
+	;
+#endif
+}
+
 #ifdef __cplusplus
 
 #define g_cast monoeg_g_cast // in case not inlined (see eglib-remap.h)
@@ -735,14 +747,13 @@ G_ATTR_NORETURN void
 const char *   g_get_assertion_message (void);
 
 #ifndef DISABLE_ASSERT_MESSAGES
-/* The for (;;) tells gc thats g_error () doesn't return, avoiding warnings */
-#define g_error(...) do { g_log (G_LOG_DOMAIN, G_LOG_LEVEL_ERROR, __VA_ARGS__); for (;;); } while (0)
+#define g_error(...) do { g_log (G_LOG_DOMAIN, G_LOG_LEVEL_ERROR, __VA_ARGS__); eg_unreachable (); } while (0)
 #define g_critical(...) g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL, __VA_ARGS__)
 #define g_warning(...)  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_WARNING, __VA_ARGS__)
 #define g_message(...)  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_MESSAGE, __VA_ARGS__)
 #define g_debug(...)    g_log (G_LOG_DOMAIN, G_LOG_LEVEL_DEBUG, __VA_ARGS__)
 #else
-#define g_error(...)    do { g_log_disabled (G_LOG_DOMAIN, G_LOG_LEVEL_ERROR, __FILE__, __LINE__); for (;;); } while (0)
+#define g_error(...)    do { g_log_disabled (G_LOG_DOMAIN, G_LOG_LEVEL_ERROR, __FILE__, __LINE__); eg_unreachable (); } while (0)
 #define g_critical(...) g_log_disabled (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL, __FILE__, __LINE__)
 #define g_warning(...)  g_log_disabled (G_LOG_DOMAIN, G_LOG_LEVEL_WARNING, __FILE__, __LINE__)
 #define g_message(...)  g_log_disabled (G_LOG_DOMAIN, G_LOG_LEVEL_MESSAGE, __FILE__, __LINE__)
@@ -785,14 +796,6 @@ gpointer g_convert_error_quark(void);
 #define G_UNLIKELY(x) (x)
 #endif
 
-#if defined(_MSC_VER)
-#define  eg_unreachable() __assume(0)
-#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 5)))
-#define  eg_unreachable() __builtin_unreachable()
-#else
-#define  eg_unreachable()
-#endif
-
 /* g_assert is a boolean expression; the precise value is not preserved, just true or false. */
 #ifdef DISABLE_ASSERT_MESSAGES
 // This is smaller than the equivalent mono_assertion_message (..."disabled");
diff --git a/src/mono/mono/eventpipe/ep-rt-mono-runtime-provider.c b/src/mono/mono/eventpipe/ep-rt-mono-runtime-provider.c
index 381164d8f940..a62ddf3aec9b 100644
--- a/src/mono/mono/eventpipe/ep-rt-mono-runtime-provider.c
+++ b/src/mono/mono/eventpipe/ep-rt-mono-runtime-provider.c
@@ -779,6 +779,8 @@ include_method (MonoMethod *method)
 		return false;
 	} else if (!m_method_is_wrapper (method)) {
 		return true;
+	} else if (method->wrapper_type == MONO_WRAPPER_DYNAMIC_METHOD){
+		return true;
 	} else {
 		WrapperInfo *wrapper = mono_marshal_get_wrapper_info (method);
 		return (wrapper && wrapper->subtype == WRAPPER_SUBTYPE_PINVOKE) ? true : false;
diff --git a/src/mono/mono/eventpipe/ep-rt-mono.c b/src/mono/mono/eventpipe/ep-rt-mono.c
index 9b9f9e34c4bc..0aa3518df7af 100644
--- a/src/mono/mono/eventpipe/ep-rt-mono.c
+++ b/src/mono/mono/eventpipe/ep-rt-mono.c
@@ -852,6 +852,11 @@ ep_rt_mono_init_finish (void)
 void
 ep_rt_mono_fini (void)
 {
+	// Avoid cleaning up resources to prevent cleaning up out from under running
+	// threads.
+	if (!mono_runtime_is_shutting_down ())
+		return;
+
 	ep_rt_mono_runtime_provider_fini ();
 	ep_rt_mono_profiler_provider_fini ();
 
diff --git a/src/mono/mono/eventpipe/test/CMakeLists.txt b/src/mono/mono/eventpipe/test/CMakeLists.txt
index d867685fb2da..d313e630adaa 100644
--- a/src/mono/mono/eventpipe/test/CMakeLists.txt
+++ b/src/mono/mono/eventpipe/test/CMakeLists.txt
@@ -39,13 +39,7 @@ if(ENABLE_PERFTRACING)
         set(CMAKE_SKIP_RPATH 1)
         add_executable(ep-test ${EVENTPIPE_TEST_SOURCES} ${EVENTPIPE_TEST_HEADERS})
         target_sources(ep-test PRIVATE "${mono-components-objects}")
-        target_link_libraries(ep-test PRIVATE eglib_api monosgen-static ${OS_LIBS} ${LLVM_LIBS} ${ICU_LIBS} ${Z_LIBS} monoapi)
-        if(HAVE_SYS_ICU)
-            target_link_libraries(ep-test PRIVATE icu_shim_objects)
-        endif()
-        if(ICU_LDFLAGS)
-            set_target_properties(ep-test PROPERTIES LINK_FLAGS ${ICU_LDFLAGS})
-        endif()
+        target_link_libraries(ep-test PRIVATE eglib_api monosgen-static ${OS_LIBS} ${LLVM_LIBS} ${Z_LIBS} monoapi)
         install_with_stripped_symbols(ep-test TARGETS bin)
     else(ENABLE_EVENTPIPE_TEST AND STATIC_COMPONENTS AND (NOT DISABLE_COMPONENTS) AND (NOT DISABLE_LIBS) AND (NOT DISABLE_EXECUTABLES))
         message(VERBOSE "Skip building native EventPipe library test runner.")
diff --git a/src/mono/mono/metadata/appdomain.c b/src/mono/mono/metadata/appdomain.c
index 3ed266c09798..e41b4fb1ca8b 100644
--- a/src/mono/mono/metadata/appdomain.c
+++ b/src/mono/mono/metadata/appdomain.c
@@ -612,6 +612,7 @@ try_load_from (MonoAssembly **assembly,
 
 	*assembly = NULL;
 	fullpath = g_build_filename (path1, path2, path3, path4, (const char*)NULL);
+	g_assert (fullpath);
 
 	found = g_file_test (fullpath, G_FILE_TEST_IS_REGULAR);
 
diff --git a/src/mono/mono/metadata/assembly.c b/src/mono/mono/metadata/assembly.c
index 63677c87b6f3..49eca9772f48 100644
--- a/src/mono/mono/metadata/assembly.c
+++ b/src/mono/mono/metadata/assembly.c
@@ -336,6 +336,7 @@ load_in_path (const char *basename, const char** search_path, const MonoAssembly
 
 	for (i = 0; search_path [i]; ++i) {
 		fullpath = g_build_filename (search_path [i], basename, (const char*)NULL);
+		g_assert (fullpath);
 		result = mono_assembly_request_open (fullpath, req, status);
 		g_free (fullpath);
 		if (result)
@@ -1407,6 +1408,7 @@ absolute_dir (const gchar *filename)
 
 	cwd = g_get_current_dir ();
 	mixed = g_build_filename (cwd, filename, (const char*)NULL);
+	g_assert (mixed);
 	parts = g_strsplit (mixed, G_DIR_SEPARATOR_S, 0);
 	g_free (mixed);
 	g_free (cwd);
diff --git a/src/mono/mono/metadata/class-getters.h b/src/mono/mono/metadata/class-getters.h
index eb69558a1d01..57ff9afefceb 100644
--- a/src/mono/mono/metadata/class-getters.h
+++ b/src/mono/mono/metadata/class-getters.h
@@ -39,6 +39,7 @@ MONO_CLASS_GETTER(m_class_is_delegate, gboolean, , MonoClass, delegate)
 MONO_CLASS_GETTER(m_class_is_gc_descr_inited, gboolean, , MonoClass, gc_descr_inited)
 MONO_CLASS_GETTER(m_class_has_cctor, gboolean,  , MonoClass, has_cctor)
 MONO_CLASS_GETTER(m_class_has_references, gboolean, , MonoClass, has_references)
+MONO_CLASS_GETTER(m_class_has_ref_fields, gboolean, , MonoClass, has_ref_fields)
 MONO_CLASS_GETTER(m_class_has_static_refs, gboolean, , MonoClass, has_static_refs)
 MONO_CLASS_GETTER(m_class_has_no_special_static_fields, gboolean, , MonoClass, no_special_static_fields)
 MONO_CLASS_GETTER(m_class_is_nested_classes_inited, gboolean, , MonoClass, nested_classes_inited)
diff --git a/src/mono/mono/metadata/class-init.c b/src/mono/mono/metadata/class-init.c
index 484eedb9038f..d9cfd751c819 100644
--- a/src/mono/mono/metadata/class-init.c
+++ b/src/mono/mono/metadata/class-init.c
@@ -2272,6 +2272,7 @@ mono_class_layout_fields (MonoClass *klass, int base_instance_size, int packing_
 				}
 
 				size = mono_type_size (field->type, &align);
+				// keep in sync with marshal.c mono_marshal_load_type_info
 				if (m_class_is_inlinearray (klass)) {
 					// Limit the max size of array instance to 1MiB
 					const guint32 struct_max_size = 1024 * 1024;
@@ -2572,6 +2573,10 @@ mono_class_layout_fields (MonoClass *klass, int base_instance_size, int packing_
 		case MONO_TYPE_VALUETYPE:
 		case MONO_TYPE_GENERICINST:
 			field_class = mono_class_from_mono_type_internal (field->type);
+			if (mono_class_is_ginst (field_class) && !mono_verifier_class_is_valid_generic_instantiation (field_class)) {
+				mono_class_set_type_load_failure (klass, "Field '%s' is an invalid generic instantiation of type %s", field->name, mono_type_get_full_name (field_class));
+				return;
+			}
 			break;
 		default:
 			break;
@@ -2952,7 +2957,7 @@ mono_class_init_internal (MonoClass *klass)
 	if (klass->inited || mono_class_has_failure (klass))
 		return !mono_class_has_failure (klass);
 
-	/*g_print ("Init class %s\n", mono_type_get_full_name (klass));*/
+	// g_print ("Init class %s\n", mono_type_get_full_name (klass));
 
 	/*
 	 * This function can recursively call itself.
@@ -3878,7 +3883,7 @@ mono_class_setup_interface_id (MonoClass *klass)
 /*
  * mono_class_setup_interfaces:
  *
- *   Initialize klass->interfaces/interfaces_count.
+ *   Initialize klass->interfaces/interface_count.
  * LOCKING: Acquires the loader lock.
  * This function can fail the type.
  */
diff --git a/src/mono/mono/metadata/class.c b/src/mono/mono/metadata/class.c
index c5fcd2a8d7a1..05da1332e301 100644
--- a/src/mono/mono/metadata/class.c
+++ b/src/mono/mono/metadata/class.c
@@ -640,10 +640,7 @@ mono_type_is_valid_generic_argument (MonoType *type)
 {
 	switch (type->type) {
 	case MONO_TYPE_VOID:
-	case MONO_TYPE_TYPEDBYREF:
 		return FALSE;
-	case MONO_TYPE_VALUETYPE:
-		return !m_class_is_byreflike (type->data.klass);
 	default:
 		return TRUE;
 	}
@@ -6788,10 +6785,13 @@ mono_method_get_base_method (MonoMethod *method, gboolean definition, MonoError
 			if (mono_class_is_open_constructed_type (m_class_get_byval_arg (parent))) {
 				parent = mono_class_inflate_generic_class_checked (parent, generic_inst, error);
 				return_val_if_nok  (error, NULL);
+				g_assert (parent);
 			}
+
 			if (mono_class_is_ginst (parent)) {
 				parent_inst = mono_class_get_context (parent);
 				parent = mono_class_get_generic_class (parent)->container_class;
+				g_assert (parent);
 			}
 
 			mono_class_setup_vtable (parent);
@@ -6811,6 +6811,7 @@ mono_method_get_base_method (MonoMethod *method, gboolean definition, MonoError
 		if (mono_class_is_open_constructed_type (m_class_get_byval_arg (klass))) {
 			klass = mono_class_inflate_generic_class_checked (klass, generic_inst, error);
 			return_val_if_nok (error, NULL);
+			g_assert (klass);
 
 			generic_inst = NULL;
 		}
@@ -6824,6 +6825,7 @@ mono_method_get_base_method (MonoMethod *method, gboolean definition, MonoError
 	if (generic_inst) {
 		klass = mono_class_inflate_generic_class_checked (klass, generic_inst, error);
 		return_val_if_nok (error, NULL);
+		g_assert (klass);
 		generic_inst = NULL;
 	}
 
@@ -6912,7 +6914,7 @@ mono_class_has_default_constructor (MonoClass *klass, gboolean public_only)
  * \param klass class in which the failure was detected
  * \param fmt \c printf -style error message string.
  *
- * Sets a deferred failure in the class and prints a warning message. 
+ * Sets a deferred failure in the class and prints a warning message.
  * The deferred failure allows the runtime to attempt setting up the class layout at runtime.
  *
  * LOCKING: Acquires the loader lock.
diff --git a/src/mono/mono/metadata/icall-decl.h b/src/mono/mono/metadata/icall-decl.h
index 1f4ad944a326..ebce337af7b1 100644
--- a/src/mono/mono/metadata/icall-decl.h
+++ b/src/mono/mono/metadata/icall-decl.h
@@ -80,7 +80,6 @@ ICALL_EXPORT double ves_icall_System_Math_Ceiling (double);
 ICALL_EXPORT double ves_icall_System_Math_Cos (double);
 ICALL_EXPORT double ves_icall_System_Math_Cosh (double);
 ICALL_EXPORT double ves_icall_System_Math_Exp (double);
-ICALL_EXPORT double ves_icall_System_Math_FMod (double, double);
 ICALL_EXPORT double ves_icall_System_Math_Floor (double);
 ICALL_EXPORT double ves_icall_System_Math_Log (double);
 ICALL_EXPORT double ves_icall_System_Math_Log10 (double);
@@ -104,7 +103,6 @@ ICALL_EXPORT float ves_icall_System_MathF_Ceiling (float);
 ICALL_EXPORT float ves_icall_System_MathF_Cos (float);
 ICALL_EXPORT float ves_icall_System_MathF_Cosh (float);
 ICALL_EXPORT float ves_icall_System_MathF_Exp (float);
-ICALL_EXPORT float ves_icall_System_MathF_FMod (float, float);
 ICALL_EXPORT float ves_icall_System_MathF_Floor (float);
 ICALL_EXPORT float ves_icall_System_MathF_Log (float);
 ICALL_EXPORT float ves_icall_System_MathF_Log10 (float);
@@ -189,16 +187,6 @@ ICALL_EXPORT void     ves_icall_System_Threading_LowLevelLifoSemaphore_DeleteInt
 ICALL_EXPORT gint32   ves_icall_System_Threading_LowLevelLifoSemaphore_TimedWaitInternal (gpointer sem_ptr, gint32 timeout_ms);
 ICALL_EXPORT void     ves_icall_System_Threading_LowLevelLifoSemaphore_ReleaseInternal (gpointer sem_ptr, gint32 count);
 
-/* include these declarations if we're in the threaded wasm runtime, or if we're building a wasm-targeting cross compiler and we need to support --print-icall-table */
-#if (defined(HOST_BROWSER) && !defined(DISABLE_THREADS)) || (defined(TARGET_WASM) && defined(ENABLE_ICALL_SYMBOL_MAP))
-ICALL_EXPORT gpointer ves_icall_System_Threading_LowLevelLifoAsyncWaitSemaphore_InitInternal (void);
-ICALL_EXPORT void   ves_icall_System_Threading_LowLevelLifoAsyncWaitSemaphore_PrepareAsyncWaitInternal (gpointer sem_ptr, gint32 timeout_ms, gpointer success_cb, gpointer timeout_cb, intptr_t user_data);
-
-ICALL_EXPORT MonoBoolean ves_icall_System_Threading_WebWorkerEventLoop_HasUnsettledInteropPromisesNative(void);
-ICALL_EXPORT void ves_icall_System_Threading_WebWorkerEventLoop_KeepalivePushInternal (void);
-ICALL_EXPORT void ves_icall_System_Threading_WebWorkerEventLoop_KeepalivePopInternal (void);
-#endif
-
 #ifdef TARGET_AMD64
 ICALL_EXPORT void ves_icall_System_Runtime_Intrinsics_X86_X86Base___cpuidex (int abcd[4], int function_id, int subfunction_id);
 #endif
diff --git a/src/mono/mono/metadata/icall-def.h b/src/mono/mono/metadata/icall-def.h
index 06f3ab888b72..766754d52008 100644
--- a/src/mono/mono/metadata/icall-def.h
+++ b/src/mono/mono/metadata/icall-def.h
@@ -243,7 +243,6 @@ NOHANDLES_FLAGS(ICALL(MATH_21, "Ceiling", ves_icall_System_Math_Ceiling), MONO_I
 NOHANDLES_FLAGS(ICALL(MATH_5, "Cos", ves_icall_System_Math_Cos), MONO_ICALL_FLAGS_NO_EXCEPTION)
 NOHANDLES_FLAGS(ICALL(MATH_6, "Cosh", ves_icall_System_Math_Cosh), MONO_ICALL_FLAGS_NO_EXCEPTION)
 NOHANDLES_FLAGS(ICALL(MATH_7, "Exp", ves_icall_System_Math_Exp), MONO_ICALL_FLAGS_NO_EXCEPTION)
-NOHANDLES_FLAGS(ICALL(MATH_7a, "FMod", ves_icall_System_Math_FMod), MONO_ICALL_FLAGS_NO_EXCEPTION)
 NOHANDLES_FLAGS(ICALL(MATH_8, "Floor", ves_icall_System_Math_Floor), MONO_ICALL_FLAGS_NO_EXCEPTION)
 NOHANDLES_FLAGS(ICALL(MATH_22, "FusedMultiplyAdd", ves_icall_System_Math_FusedMultiplyAdd), MONO_ICALL_FLAGS_NO_EXCEPTION)
 NOHANDLES_FLAGS(ICALL(MATH_9, "Log", ves_icall_System_Math_Log), MONO_ICALL_FLAGS_NO_EXCEPTION)
@@ -271,7 +270,6 @@ NOHANDLES_FLAGS(ICALL(MATHF_9, "Ceiling", ves_icall_System_MathF_Ceiling), MONO_
 NOHANDLES_FLAGS(ICALL(MATHF_10, "Cos", ves_icall_System_MathF_Cos), MONO_ICALL_FLAGS_NO_EXCEPTION)
 NOHANDLES_FLAGS(ICALL(MATHF_11, "Cosh", ves_icall_System_MathF_Cosh), MONO_ICALL_FLAGS_NO_EXCEPTION)
 NOHANDLES_FLAGS(ICALL(MATHF_12, "Exp", ves_icall_System_MathF_Exp), MONO_ICALL_FLAGS_NO_EXCEPTION)
-NOHANDLES_FLAGS(ICALL(MATHF_22, "FMod", ves_icall_System_MathF_FMod), MONO_ICALL_FLAGS_NO_EXCEPTION)
 NOHANDLES_FLAGS(ICALL(MATHF_13, "Floor", ves_icall_System_MathF_Floor), MONO_ICALL_FLAGS_NO_EXCEPTION)
 NOHANDLES_FLAGS(ICALL(MATHF_24, "FusedMultiplyAdd", ves_icall_System_MathF_FusedMultiplyAdd), MONO_ICALL_FLAGS_NO_EXCEPTION)
 NOHANDLES_FLAGS(ICALL(MATHF_14, "Log", ves_icall_System_MathF_Log), MONO_ICALL_FLAGS_NO_EXCEPTION)
@@ -437,6 +435,7 @@ HANDLES(RUNH_1, "GetObjectValue", ves_icall_System_Runtime_CompilerServices_Runt
 HANDLES(RUNH_6, "GetSpanDataFrom", ves_icall_System_Runtime_CompilerServices_RuntimeHelpers_GetSpanDataFrom, gpointer, 3, (MonoClassField_ptr, MonoType_ptr, gpointer))
 HANDLES(RUNH_2, "GetUninitializedObjectInternal", ves_icall_System_Runtime_CompilerServices_RuntimeHelpers_GetUninitializedObjectInternal, MonoObject, 1, (MonoType_ptr))
 HANDLES(RUNH_3, "InitializeArray", ves_icall_System_Runtime_CompilerServices_RuntimeHelpers_InitializeArray, void, 2, (MonoArray, MonoClassField_ptr))
+HANDLES(RUNH_8, "InternalBox", ves_icall_System_Runtime_CompilerServices_RuntimeHelpers_InternalBox, void, 3, (MonoQCallTypeHandle, char_ref, MonoObjectHandleOnStack))
 HANDLES(RUNH_7, "InternalGetHashCode", ves_icall_System_Runtime_CompilerServices_RuntimeHelpers_InternalGetHashCode, int, 1, (MonoObject))
 HANDLES(RUNH_3a, "PrepareMethod", ves_icall_System_Runtime_CompilerServices_RuntimeHelpers_PrepareMethod, void, 3, (MonoMethod_ptr, gpointer, int))
 HANDLES(RUNH_4, "RunClassConstructor", ves_icall_System_Runtime_CompilerServices_RuntimeHelpers_RunClassConstructor, void, 1, (MonoType_ptr))
@@ -556,17 +555,13 @@ HANDLES(STRING_11, "InternalIsInterned", ves_icall_System_String_InternalIsInter
 ICALL_TYPE(ILOCK, "System.Threading.Interlocked", ILOCK_1)
 NOHANDLES(ICALL(ILOCK_1, "Add(int&,int)", ves_icall_System_Threading_Interlocked_Add_Int))
 NOHANDLES(ICALL(ILOCK_2, "Add(long&,long)", ves_icall_System_Threading_Interlocked_Add_Long))
-NOHANDLES(ICALL(ILOCK_24, "CompareExchange(byte&,byte,byte)", ves_icall_System_Threading_Interlocked_CompareExchange_Byte))
 NOHANDLES(ICALL(ILOCK_5, "CompareExchange(int&,int,int)", ves_icall_System_Threading_Interlocked_CompareExchange_Int))
 NOHANDLES(ICALL(ILOCK_6, "CompareExchange(int&,int,int,bool&)", ves_icall_System_Threading_Interlocked_CompareExchange_Int_Success))
-NOHANDLES(ICALL(ILOCK_25, "CompareExchange(int16&,int16,int16)", ves_icall_System_Threading_Interlocked_CompareExchange_Short))
 NOHANDLES(ICALL(ILOCK_8, "CompareExchange(long&,long,long)", ves_icall_System_Threading_Interlocked_CompareExchange_Long))
 NOHANDLES(ICALL(ILOCK_9, "CompareExchange(object&,object&,object&,object&)", ves_icall_System_Threading_Interlocked_CompareExchange_Object))
 NOHANDLES(ICALL(ILOCK_11, "Decrement(int&)", ves_icall_System_Threading_Interlocked_Decrement_Int))
 NOHANDLES(ICALL(ILOCK_12, "Decrement(long&)", ves_icall_System_Threading_Interlocked_Decrement_Long))
-NOHANDLES(ICALL(ILOCK_26, "Exchange(byte&,byte)", ves_icall_System_Threading_Interlocked_Exchange_Byte))
 NOHANDLES(ICALL(ILOCK_15, "Exchange(int&,int)", ves_icall_System_Threading_Interlocked_Exchange_Int))
-NOHANDLES(ICALL(ILOCK_27, "Exchange(int16&,int16)", ves_icall_System_Threading_Interlocked_Exchange_Short))
 NOHANDLES(ICALL(ILOCK_17, "Exchange(long&,long)", ves_icall_System_Threading_Interlocked_Exchange_Long))
 NOHANDLES(ICALL(ILOCK_18, "Exchange(object&,object&,object&)", ves_icall_System_Threading_Interlocked_Exchange_Object))
 NOHANDLES(ICALL(ILOCK_20, "Increment(int&)", ves_icall_System_Threading_Interlocked_Increment_Int))
@@ -574,16 +569,6 @@ NOHANDLES(ICALL(ILOCK_21, "Increment(long&)", ves_icall_System_Threading_Interlo
 NOHANDLES(ICALL(ILOCK_22, "MemoryBarrierProcessWide", ves_icall_System_Threading_Interlocked_MemoryBarrierProcessWide))
 NOHANDLES(ICALL(ILOCK_23, "Read(long&)", ves_icall_System_Threading_Interlocked_Read_Long))
 
-/* include these icalls if we're in the threaded wasm runtime, or if we're building a wasm-targeting cross compiler and we need to support --print-icall-table */
-#if (defined(HOST_BROWSER) && !defined(DISABLE_THREADS)) || (defined(TARGET_WASM) && defined(ENABLE_ICALL_SYMBOL_MAP))
-ICALL_TYPE(LIFOASYNCSEM, "System.Threading.LowLevelLifoAsyncWaitSemaphore", LIFOASYNCSEM_1)
-NOHANDLES(ICALL(LIFOASYNCSEM_1, "DeleteInternal", ves_icall_System_Threading_LowLevelLifoSemaphore_DeleteInternal))
-NOHANDLES(ICALL(LIFOASYNCSEM_2, "InitInternal", ves_icall_System_Threading_LowLevelLifoAsyncWaitSemaphore_InitInternal))
-NOHANDLES(ICALL(LIFOASYNCSEM_3, "PrepareAsyncWaitInternal", ves_icall_System_Threading_LowLevelLifoAsyncWaitSemaphore_PrepareAsyncWaitInternal))
-NOHANDLES(ICALL(LIFOASYNCSEM_4, "ReleaseInternal", ves_icall_System_Threading_LowLevelLifoSemaphore_ReleaseInternal))
-#endif
-
-
 ICALL_TYPE(LIFOSEM, "System.Threading.LowLevelLifoSemaphore", LIFOSEM_1)
 NOHANDLES(ICALL(LIFOSEM_1, "DeleteInternal", ves_icall_System_Threading_LowLevelLifoSemaphore_DeleteInternal))
 NOHANDLES(ICALL(LIFOSEM_2, "InitInternal", ves_icall_System_Threading_LowLevelLifoSemaphore_InitInternal))
@@ -615,14 +600,6 @@ HANDLES(THREAD_10, "SetState", ves_icall_System_Threading_Thread_SetState, void,
 HANDLES(THREAD_13, "StartInternal", ves_icall_System_Threading_Thread_StartInternal, void, 2, (MonoThreadObject, gint32))
 NOHANDLES(ICALL(THREAD_14, "YieldInternal", ves_icall_System_Threading_Thread_YieldInternal))
 
-/* include these icalls if we're in the threaded wasm runtime, or if we're building a wasm-targeting cross compiler and we need to support --print-icall-table */
-#if (defined(HOST_BROWSER) && !defined(DISABLE_THREADS)) || (defined(TARGET_WASM) && defined(ENABLE_ICALL_SYMBOL_MAP))
-ICALL_TYPE(WEBWORKERLOOP, "System.Threading.WebWorkerEventLoop", WEBWORKERLOOP_1)
-NOHANDLES(ICALL(WEBWORKERLOOP_1, "HasUnsettledInteropPromisesNative", ves_icall_System_Threading_WebWorkerEventLoop_HasUnsettledInteropPromisesNative))
-NOHANDLES(ICALL(WEBWORKERLOOP_2, "KeepalivePopInternal", ves_icall_System_Threading_WebWorkerEventLoop_KeepalivePopInternal))
-NOHANDLES(ICALL(WEBWORKERLOOP_3, "KeepalivePushInternal", ves_icall_System_Threading_WebWorkerEventLoop_KeepalivePushInternal))
-#endif
-
 ICALL_TYPE(TYPE, "System.Type", TYPE_1)
 HANDLES(TYPE_1, "internal_from_handle", ves_icall_System_Type_internal_from_handle, MonoReflectionType, 1, (MonoType_ref))
 
diff --git a/src/mono/mono/metadata/icall-table.h b/src/mono/mono/metadata/icall-table.h
index 1336ed368e9d..6af12c82e4f4 100644
--- a/src/mono/mono/metadata/icall-table.h
+++ b/src/mono/mono/metadata/icall-table.h
@@ -71,6 +71,7 @@ typedef mono_unichar2 *mono_unichar2_ptr;
 typedef mono_unichar4 *mono_unichar4_ptr;
 typedef MonoSpanOfObjects *MonoSpanOfObjects_ref;
 
+typedef char    *char_ref;
 typedef char **char_ptr_ref;
 typedef gint32  *gint32_ref;
 typedef gint64  *gint64_ref;
@@ -173,6 +174,7 @@ typedef MonoStringHandle MonoStringOutHandle;
 #define MONO_HANDLE_TYPE_WRAP_int_ref  			ICALL_HANDLES_WRAP_VALUETYPE_REF
 #define MONO_HANDLE_TYPE_WRAP_gint32_ref  			ICALL_HANDLES_WRAP_VALUETYPE_REF
 #define MONO_HANDLE_TYPE_WRAP_int_ptr_ref  		ICALL_HANDLES_WRAP_VALUETYPE_REF
+#define MONO_HANDLE_TYPE_WRAP_char_ref		ICALL_HANDLES_WRAP_VALUETYPE_REF
 #define MONO_HANDLE_TYPE_WRAP_char_ptr_ref		ICALL_HANDLES_WRAP_VALUETYPE_REF
 #define MONO_HANDLE_TYPE_WRAP_guint8_ptr_ref		ICALL_HANDLES_WRAP_VALUETYPE_REF
 #define MONO_HANDLE_TYPE_WRAP_MonoResolveTokenError_ref	ICALL_HANDLES_WRAP_VALUETYPE_REF
diff --git a/src/mono/mono/metadata/icall.c b/src/mono/mono/metadata/icall.c
index 9c3ea1171106..54cae6cef38d 100644
--- a/src/mono/mono/metadata/icall.c
+++ b/src/mono/mono/metadata/icall.c
@@ -1210,6 +1210,26 @@ ves_icall_System_Runtime_CompilerServices_RuntimeHelpers_PrepareMethod (MonoMeth
 	// FIXME: Implement
 }
 
+void
+ves_icall_System_Runtime_CompilerServices_RuntimeHelpers_InternalBox (MonoQCallTypeHandle type_handle, char* data, MonoObjectHandleOnStack obj, MonoError *error)
+{
+	MonoType *type = type_handle.type;
+	MonoClass *klass = mono_class_from_mono_type_internal (type);
+
+	g_assert (m_class_is_valuetype (klass));
+
+	mono_class_init_checked (klass, error);
+	goto_if_nok (error, error_ret);
+
+	MonoObject* raw_obj = mono_value_box_checked (klass, data, error);
+	goto_if_nok (error, error_ret);
+
+	HANDLE_ON_STACK_SET(obj, raw_obj);
+	return;
+error_ret:
+	HANDLE_ON_STACK_SET (obj, NULL);
+}
+
 MonoObjectHandle
 ves_icall_System_Object_MemberwiseClone (MonoObjectHandle this_obj, MonoError *error)
 {
@@ -3028,7 +3048,7 @@ ves_icall_RuntimeType_GetNamespace (MonoQCallTypeHandle type_handle, MonoObjectH
 
 	MonoClass *klass = mono_class_from_mono_type_internal (type);
 	MonoClass *elem;
-	while (!m_class_is_enumtype (klass) && 
+	while (!m_class_is_enumtype (klass) &&
 		!mono_class_is_nullable (klass) &&
 		(klass != (elem = m_class_get_element_class (klass))))
 		klass = elem;
@@ -4564,6 +4584,7 @@ ves_icall_System_Reflection_RuntimeAssembly_GetInfo (MonoQCallAssemblyHandle ass
 		else
 			absolute = g_build_filename (assembly->basedir, filename, (const char*)NULL);
 
+		g_assert (absolute);
 		mono_icall_make_platform_path (absolute);
 
 		const gchar *prepend = mono_icall_get_file_path_prefix (absolute);
diff --git a/src/mono/mono/metadata/image.c b/src/mono/mono/metadata/image.c
index dbca4e24f48e..23333b1b2a97 100644
--- a/src/mono/mono/metadata/image.c
+++ b/src/mono/mono/metadata/image.c
@@ -2511,6 +2511,7 @@ mono_image_load_file_for_image_checked (MonoImage *image, uint32_t fileidx, Mono
 	fname = mono_metadata_string_heap (image, fname_id);
 	base_dir = g_path_get_dirname (image->name);
 	name = g_build_filename (base_dir, fname, (const char*)NULL);
+	g_assert (name);
 	res = mono_image_open (name, NULL);
 	if (!res)
 		goto done;
diff --git a/src/mono/mono/metadata/loader.c b/src/mono/mono/metadata/loader.c
index 67c613b9631a..5f9e5c0e7d1f 100644
--- a/src/mono/mono/metadata/loader.c
+++ b/src/mono/mono/metadata/loader.c
@@ -823,7 +823,6 @@ mono_method_search_in_array_class (MonoClass *klass, const char *name, MonoMetho
 	int i;
 
 	mono_class_setup_methods (klass);
-	g_assert (!mono_class_has_failure (klass)); /*FIXME this should not fail, right?*/
 	int mcount = mono_class_get_method_count (klass);
 	MonoMethod **klass_methods = m_class_get_methods (klass);
 	for (i = 0; i < mcount; ++i) {
diff --git a/src/mono/mono/metadata/marshal.c b/src/mono/mono/metadata/marshal.c
index 1e10d82ee408..09fddd573c0f 100644
--- a/src/mono/mono/metadata/marshal.c
+++ b/src/mono/mono/metadata/marshal.c
@@ -3294,7 +3294,7 @@ mono_emit_marshal (EmitMarshalContext *m, int argnum, MonoType *t,
 		return mono_emit_disabled_marshal (m, argnum, t, spec, conv_arg, conv_arg_type, action);
 
 	return mono_component_marshal_ilgen()->emit_marshal_ilgen(m, argnum, t, spec, conv_arg, conv_arg_type, action, get_marshal_cb());
-} 
+}
 
 static void
 mono_marshal_set_callconv_for_type(MonoType *type, MonoMethodSignature *csig, gboolean *skip_gc_trans /*out*/)
@@ -3577,7 +3577,7 @@ mono_marshal_get_native_wrapper (MonoMethod *method, gboolean check_exceptions,
 
 	if (G_UNLIKELY (pinvoke && mono_method_has_unmanaged_callers_only_attribute (method))) {
 		/*
-		 * In AOT mode and embedding scenarios, it is possible that the icall is not registered in the runtime doing the AOT compilation. 
+		 * In AOT mode and embedding scenarios, it is possible that the icall is not registered in the runtime doing the AOT compilation.
 		 * Emit a wrapper that throws a NotSupportedException.
 		 */
 		get_marshal_cb ()->mb_emit_exception (mb, "System", "NotSupportedException", "Method canot be marked with both DllImportAttribute and UnmanagedCallersOnlyAttribute");
@@ -3711,9 +3711,9 @@ mono_marshal_get_native_wrapper (MonoMethod *method, gboolean check_exceptions,
 						swift_error_args++;
 					} else if (param_klass == swift_self) {
 						swift_self_args++;
-					} else if (!m_class_is_blittable (param_klass) && m_class_get_this_arg (param_klass)->type != MONO_TYPE_FNPTR) {
+					} else if (!m_class_is_blittable (param_klass) || m_class_is_simd_type (param_klass)) {
 						swift_error_args = swift_self_args = 0;
-						mono_error_set_generic_error (emitted_error, "System", "InvalidProgramException", "Passing non-primitive value types to a P/Invoke with the Swift calling convention is unsupported.");
+						mono_error_set_generic_error (emitted_error, "System", "InvalidProgramException", "Passing non-blittable types to a P/Invoke with the Swift calling convention is unsupported.");
 						break;
 					}
 				}
@@ -3757,7 +3757,7 @@ mono_marshal_get_native_wrapper (MonoMethod *method, gboolean check_exceptions,
 	}
 
 	goto leave;
-	
+
 	emit_exception_for_error:
 		mono_error_cleanup (emitted_error);
 		info = mono_wrapper_info_create (mb, WRAPPER_SUBTYPE_NONE);
@@ -3922,14 +3922,14 @@ mono_marshal_get_native_func_wrapper_indirect (MonoClass *caller_class, MonoMeth
 	caller_class = mono_class_get_generic_type_definition (caller_class);
 	MonoImage *image = m_class_get_image (caller_class);
 	g_assert (sig->pinvoke);
-	g_assert (!sig->hasthis && ! sig->explicit_this);
+	g_assert (!sig->hasthis && !sig->explicit_this);
 	g_assert (!sig->has_type_parameters);
 
 #if 0
 	/*
 	 * Since calli sigs are already part of ECMA-335, they were already used by C++/CLI, which
 	 * allowed non-blittable types.  So the C# function pointers spec doesn't restrict this to
-	 * blittable tyhpes only.
+	 * blittable types only.
 	 */
 	g_assertf (type_is_blittable (sig->ret), "sig return type %s is not blittable\n", mono_type_full_name (sig->ret));
 
@@ -5231,7 +5231,7 @@ mono_marshal_get_unsafe_accessor_wrapper (MonoMethod *accessor_method, MonoUnsaf
 
 	if (member_name == NULL && kind != MONO_UNSAFE_ACCESSOR_CTOR)
 		member_name = accessor_method->name;
-	
+
 	/*
 	 * Check cache
 	 */
@@ -5827,11 +5827,31 @@ mono_marshal_load_type_info (MonoClass* klass)
 			continue;
 		}
 
+		size = mono_marshal_type_size (field->type, info->fields [j].mspec,
+						&align, TRUE, m_class_is_unicode (klass));
+
+		// Keep in sync with class-init.c mono_class_layout_fields
+		if (m_class_is_inlinearray (klass)) {
+			// Limit the max size of array instance to 1MiB
+			const int struct_max_size = 1024 * 1024;
+			guint32 initial_size = size;
+			size *= m_class_inlinearray_value (klass);
+			if(size == 0 || size > struct_max_size) {
+				if (mono_get_runtime_callbacks ()->mono_class_set_deferred_type_load_failure_callback) {
+					if (mono_get_runtime_callbacks ()->mono_class_set_deferred_type_load_failure_callback (klass, "Inline array struct size out of bounds, abnormally large."))
+						break;
+					else
+						size = initial_size; // failure occured during AOT compilation, continue execution
+				} else {
+					mono_class_set_type_load_failure (klass, "Inline array struct size out of bounds, abnormally large.");
+					break;
+				}
+			}
+		}
+
 		switch (layout) {
 		case TYPE_ATTRIBUTE_AUTO_LAYOUT:
 		case TYPE_ATTRIBUTE_SEQUENTIAL_LAYOUT:
-			size = mono_marshal_type_size (field->type, info->fields [j].mspec,
-						       &align, TRUE, m_class_is_unicode (klass));
 			align = m_class_get_packing_size (klass) ? MIN (m_class_get_packing_size (klass), align): align;
 			min_align = MAX (align, min_align);
 			info->fields [j].offset = info->native_size;
@@ -5840,8 +5860,6 @@ mono_marshal_load_type_info (MonoClass* klass)
 			info->native_size = info->fields [j].offset + size;
 			break;
 		case TYPE_ATTRIBUTE_EXPLICIT_LAYOUT:
-			size = mono_marshal_type_size (field->type, info->fields [j].mspec,
-						       &align, TRUE, m_class_is_unicode (klass));
 			min_align = MAX (align, min_align);
 			info->fields [j].offset = m_field_get_offset (field) - MONO_ABI_SIZEOF (MonoObject);
 			info->native_size = MAX (info->native_size, info->fields [j].offset + size);
@@ -6573,3 +6591,293 @@ mono_wrapper_caches_free (MonoWrapperCaches *cache)
 	free_hash (cache->thunk_invoke_cache);
 	free_hash (cache->unsafe_accessor_cache);
 }
+
+typedef enum {
+	SWIFT_EMPTY = 0,
+	SWIFT_OPAQUE,
+	SWIFT_INT64,
+	SWIFT_FLOAT,
+	SWIFT_DOUBLE,
+} SwiftPhysicalLoweringKind;
+
+static int get_swift_lowering_alignment (SwiftPhysicalLoweringKind kind) {
+	switch (kind) {
+	case SWIFT_INT64:
+	case SWIFT_DOUBLE:
+		return 8;
+	case SWIFT_FLOAT:
+		return 4;
+	default:
+		return 1;
+	}
+}
+
+static void set_lowering_range(guint8* lowered_bytes, guint32 offset, guint32 size, SwiftPhysicalLoweringKind kind) {
+	bool force_opaque = false;
+	
+	if (offset != ALIGN_TO(offset, get_swift_lowering_alignment(kind))) {
+		// If the start of the range is not aligned, we need to force the entire range to be opaque.
+		force_opaque = true;
+	}
+	
+        // Check if any of the range is non-empty.
+        // If so, we need to force this range to be opaque
+        // and extend the range to the existing tag's range and mark as opaque in addition to the requested range.
+	
+	for (guint32 i = 0; i < size; ++i) {
+		SwiftPhysicalLoweringKind current = (SwiftPhysicalLoweringKind)lowered_bytes[offset + i];
+		if (current != SWIFT_EMPTY && current != kind) {
+			force_opaque = true;
+			offset = ALIGN_DOWN_TO(offset, get_swift_lowering_alignment(current));
+			size = ALIGN_TO(size + offset, get_swift_lowering_alignment(current)) - offset;
+			break;
+		}
+	}
+
+	if (force_opaque) {
+		kind = SWIFT_OPAQUE;
+	}
+
+	memset(lowered_bytes + offset, kind, size);
+}
+
+static void record_struct_field_physical_lowering (guint8* lowered_bytes, MonoType* type, guint32 offset);
+
+static void record_inlinearray_struct_physical_lowering (guint8* lowered_bytes, MonoClass* klass, guint32 offset) {
+	// Get the first field and record its physical lowering N times
+	MonoClassField* field = mono_class_get_fields_internal (klass, NULL);
+	MonoType* fieldType = field->type;
+	for (int i = 0; i < m_class_inlinearray_value(klass); ++i) {
+		record_struct_field_physical_lowering(lowered_bytes, fieldType, offset + m_field_get_offset(field) + i * mono_type_size(fieldType, NULL));
+	}
+}
+
+static void record_struct_physical_lowering (guint8* lowered_bytes, MonoClass* klass, guint32 offset)
+{
+	if (m_class_is_inlinearray(klass)) {
+		record_inlinearray_struct_physical_lowering(lowered_bytes, klass, offset);
+		return;
+	}
+
+	// For each field, we need to record the physical lowering of it.
+	gpointer iter = NULL;
+	MonoClassField* field;
+	while ((field = mono_class_get_fields_internal (klass, &iter))) {
+		if (field->type->attrs & FIELD_ATTRIBUTE_STATIC)
+			continue;
+		if (mono_field_is_deleted (field))
+			continue;
+
+		record_struct_field_physical_lowering(lowered_bytes, field->type, offset + m_field_get_offset(field));
+	}
+}
+
+static void record_struct_field_physical_lowering (guint8* lowered_bytes, MonoType* type, guint32 offset) {
+	// Normalize pointer types to IntPtr and resolve generic classes.
+	// We don't need to care about specific pointer types at this ABI level.
+	if (type->type == MONO_TYPE_PTR || type->type == MONO_TYPE_FNPTR) {
+		type = m_class_get_byval_arg (mono_defaults.int_class);
+	}
+	if (type->type == MONO_TYPE_VALUETYPE || (type->type == MONO_TYPE_GENERICINST && mono_type_generic_inst_is_valuetype (type))) {
+		// If a struct type is encountered, we need to record the physical lowering for each field of that struct recursively
+		record_struct_physical_lowering(lowered_bytes, mono_class_from_mono_type_internal(type), offset);
+	} else {
+		SwiftPhysicalLoweringKind kind = SWIFT_OPAQUE;
+		// The only types that are non-opaque are 64-bit integers, floats, doubles, and vector types.
+		// We currently don't support vector types, so we'll only handle the first three.
+		if (type->type == MONO_TYPE_I8 || type->type == MONO_TYPE_U8) {
+			kind = SWIFT_INT64;
+		}
+#if TARGET_SIZEOF_VOID_P == 8
+		else if (type->type == MONO_TYPE_PTR || type->type == MONO_TYPE_FNPTR
+			|| type->type == MONO_TYPE_I || type->type == MONO_TYPE_U) {
+			kind = SWIFT_INT64;
+		} 
+#endif
+		else if (type->type == MONO_TYPE_R4) {
+			kind = SWIFT_FLOAT;
+		} else if (type->type == MONO_TYPE_R8) {
+			kind = SWIFT_DOUBLE;
+		}
+
+		set_lowering_range(lowered_bytes, offset, mono_type_size(type, NULL), kind);
+	}
+}
+
+SwiftPhysicalLowering
+mono_marshal_get_swift_physical_lowering (MonoType *type, gboolean native_layout)
+{
+	// TODO: Add support for the native type layout.
+	g_assert (!native_layout);
+	SwiftPhysicalLowering lowering = { 0 };
+
+	// Normalize pointer types to IntPtr and resolve generic classes.
+	// We don't need to care about specific pointer types at this ABI level.
+	if (type->type == MONO_TYPE_PTR || type->type == MONO_TYPE_FNPTR) {
+		type = m_class_get_byval_arg (mono_defaults.int_class);
+	}
+
+	// Non-value types are illegal at the interop boundary.
+	if (type->type == MONO_TYPE_GENERICINST && !mono_type_generic_inst_is_valuetype (type)) {
+		lowering.by_reference = TRUE;
+		return lowering;
+	} else if (type->type != MONO_TYPE_VALUETYPE && !mono_type_is_primitive(type)) {
+		lowering.by_reference = TRUE;
+		return lowering;
+	}
+
+	MonoClass *klass = mono_class_from_mono_type_internal (type);
+
+	// TODO: We currently don't support vector types, so we can say that the maximum size of a non-by_reference struct
+	// is 4 * PointerSize.
+	// Strictly, this is inaccurate in the case where a struct has a fully-empty 8 bytes of padding using explicit layout,
+	// but that's not possible in the Swift layout algorithm.
+
+	if (m_class_get_instance_size(klass) > 4 * TARGET_SIZEOF_VOID_P) {
+		lowering.by_reference = TRUE;
+		return lowering;
+	}
+
+	guint8 lowered_bytes[TARGET_SIZEOF_VOID_P * 4] = { 0 };
+	
+	// Loop through all fields and get the physical lowering for each field
+	record_struct_physical_lowering(lowered_bytes, klass, 0);
+
+	struct _SwiftInterval {
+		guint32 start;
+		guint32 size;
+		SwiftPhysicalLoweringKind kind;
+	};
+
+	GArray* intervals = g_array_new(FALSE, TRUE, sizeof(struct _SwiftInterval));
+
+	// Now we'll build the intervals from the lowered_bytes array
+	int instance_size = m_class_get_instance_size(klass);
+	for (int i = 0; i < instance_size; ++i) {
+        	// Don't create an interval for empty bytes
+		if (lowered_bytes[i] == SWIFT_EMPTY) {
+			continue;
+		}
+
+		SwiftPhysicalLoweringKind current = (SwiftPhysicalLoweringKind)lowered_bytes[i];
+
+		bool start_new_interval =
+			// We're at the start of the type
+			i == 0
+			// We're starting a new float (as we're aligned)
+			|| (i == ALIGN_TO(i, 4) && current == SWIFT_FLOAT)
+			// We're starting a new double or int64_t (as we're aligned)
+			|| (i == ALIGN_TO(i, 8) && (current == SWIFT_DOUBLE || current == SWIFT_INT64))
+			// We've changed interval types
+			|| current != lowered_bytes[i - 1];
+		
+		if (start_new_interval) {
+			struct _SwiftInterval interval = { i, 1, current };
+			g_array_append_val(intervals, interval);
+		} else {
+			// Extend the current interval
+			(g_array_index(intervals, struct _SwiftInterval, intervals->len - 1)).size++;
+		}
+	}
+
+	// Merge opaque intervals that are in the same pointer-sized block
+	for (int i = 0; i < intervals->len - 1; ++i) {
+		struct _SwiftInterval current = g_array_index(intervals, struct _SwiftInterval, i);
+		struct _SwiftInterval next = g_array_index(intervals, struct _SwiftInterval, i + 1);
+
+		if (current.kind == SWIFT_OPAQUE && next.kind == SWIFT_OPAQUE && current.start / TARGET_SIZEOF_VOID_P == next.start / TARGET_SIZEOF_VOID_P) {
+			current.size = next.start + next.size - current.start;
+			g_array_remove_index(intervals, i + 1);
+			i--;
+		}
+	}
+
+	// Now that we have the intervals, we can calculate the lowering
+	MonoTypeEnum lowered_types[4];
+	guint32 offsets[4];
+	guint32 num_lowered_types = 0;
+	
+	for (int i = 0; i < intervals->len; ++i) {
+		if (num_lowered_types == 4) {
+			// We can't handle more than 4 fields
+			lowering.by_reference = TRUE;
+			g_array_free(intervals, TRUE);
+			return lowering;
+		}
+
+		struct _SwiftInterval interval = g_array_index(intervals, struct _SwiftInterval, i);
+		
+		offsets[num_lowered_types] = interval.start;
+
+		switch (interval.kind) {
+			case SWIFT_INT64:
+				lowered_types[num_lowered_types++] = MONO_TYPE_I8;
+				break;
+			case SWIFT_FLOAT:
+				lowered_types[num_lowered_types++] = MONO_TYPE_R4;
+				break;
+			case SWIFT_DOUBLE:
+				lowered_types[num_lowered_types++] = MONO_TYPE_R8;
+				break;
+			case SWIFT_OPAQUE:
+			{
+				// We need to split the opaque ranges into integer parameters.
+				// As part of this splitting, we must ensure that we don't introduce alignment padding.
+				// This lowering algorithm should produce a lowered type sequence that would have the same padding for
+				// a naturally-aligned struct with the lowered fields as the original type has.
+				// This algorithm intends to split the opaque range into the least number of lowered elements that covers the entire range.
+				// The lowered range is allowed to extend past the end of the opaque range (including past the end of the struct),
+				// but not into the next non-empty interval.
+				// However, due to the properties of the lowering (the only non-8 byte elements of the lowering are 4-byte floats),
+				// we'll never encounter a scenario where we need would need to account for a correctly-aligned
+				// opaque range of > 4 bytes that we must not pad to 8 bytes.
+
+
+				// As long as we need to fill more than 4 bytes and the sequence is currently 8-byte aligned, we'll split into 8-byte integers.
+				// If we have more than 2 bytes but less than 4 and the sequence is 4-byte aligned, we'll use a 4-byte integer to represent the rest of the parameters.
+				// If we have 2 bytes and the sequence is 2-byte aligned, we'll use a 2-byte integer to represent the rest of the parameters.
+				// If we have 1 byte, we'll use a 1-byte integer to represent the rest of the parameters.
+				guint32 opaque_interval_start = interval.start;
+				// The remaining size here may become negative, so use a signed type.
+				gint32 remaining_interval_size = (gint32)interval.size;
+				while (remaining_interval_size > 0) {
+					if (num_lowered_types == 4) {
+						// We can't handle more than 4 fields
+						lowering.by_reference = TRUE;
+						g_array_free(intervals, TRUE);
+						return lowering;
+					}
+
+					offsets[num_lowered_types] = opaque_interval_start;
+
+					if (remaining_interval_size > 8 && (opaque_interval_start % 8 == 0)) {
+						lowered_types[num_lowered_types] = MONO_TYPE_I8;
+						remaining_interval_size -= 8;
+						opaque_interval_start += 8;
+					} else if (remaining_interval_size > 4 && (opaque_interval_start % 4 == 0)) {
+						lowered_types[num_lowered_types] = MONO_TYPE_I4;
+						remaining_interval_size -= 4;
+						opaque_interval_start += 4;
+					} else if (remaining_interval_size > 2 && (opaque_interval_start % 2 == 0)) {
+						lowered_types[num_lowered_types] = MONO_TYPE_I2;
+						remaining_interval_size -= 2;
+						opaque_interval_start += 2;
+					} else {
+						lowered_types[num_lowered_types] = MONO_TYPE_U1;
+						remaining_interval_size -= 1;
+						opaque_interval_start += 1;
+					}
+
+					num_lowered_types++;
+				}
+			}
+		}
+	}
+
+	memcpy(lowering.lowered_elements, lowered_types, num_lowered_types * sizeof(MonoTypeEnum));
+	memcpy(lowering.offsets, offsets, num_lowered_types * sizeof(guint32));
+	lowering.num_lowered_elements = num_lowered_types;
+	lowering.by_reference = FALSE;
+
+	return lowering;
+}
diff --git a/src/mono/mono/metadata/marshal.h b/src/mono/mono/metadata/marshal.h
index 87561efe4673..8d545fcc25de 100644
--- a/src/mono/mono/metadata/marshal.h
+++ b/src/mono/mono/metadata/marshal.h
@@ -742,4 +742,14 @@ mono_marshal_get_mono_callbacks_for_ilgen (void);
 GENERATE_TRY_GET_CLASS_WITH_CACHE_DECL (swift_self)
 GENERATE_TRY_GET_CLASS_WITH_CACHE_DECL (swift_error)
 
+typedef struct {
+	gboolean by_reference;
+	int num_lowered_elements;
+	MonoTypeEnum lowered_elements[4];
+	uint32_t offsets[4];
+} SwiftPhysicalLowering;
+
+SwiftPhysicalLowering
+mono_marshal_get_swift_physical_lowering (MonoType *type, gboolean native_layout);
+
 #endif /* __MONO_MARSHAL_H__ */
diff --git a/src/mono/mono/metadata/metadata-internals.h b/src/mono/mono/metadata/metadata-internals.h
index 05934225cbd9..67c6d1350473 100644
--- a/src/mono/mono/metadata/metadata-internals.h
+++ b/src/mono/mono/metadata/metadata-internals.h
@@ -217,6 +217,8 @@ typedef struct {
 	guint32  size;
 } MonoStreamHeader;
 
+#define MONO_TABLE_INFO_MAX_COLUMNS 9
+
 struct _MonoTableInfo {
 	const char *base;
 	guint       rows_     : 24;	/* don't access directly, use table_info_get_rows */
@@ -234,6 +236,12 @@ struct _MonoTableInfo {
 	 * we only need 4, but 8 is aligned no shift required.
 	 */
 	guint32   size_bitfield;
+
+	/*
+	 * optimize out the loop in mono_metadata_decode_row_col_raw.
+	 * 4 * 9 easily fits in a uint8
+	 */
+	guint8    column_offsets[MONO_TABLE_INFO_MAX_COLUMNS];
 };
 
 #define REFERENCE_MISSING ((gpointer) -1)
@@ -888,6 +896,8 @@ mono_metadata_table_bounds_check (MonoImage *image, int table_index, int token_i
 MONO_COMPONENT_API
 const char *   mono_meta_table_name              (int table);
 void           mono_metadata_compute_table_bases (MonoImage *meta);
+MONO_COMPONENT_API
+void           mono_metadata_compute_column_offsets (MonoTableInfo *table);
 
 gboolean
 mono_metadata_interfaces_from_typedef_full  (MonoImage             *image,
diff --git a/src/mono/mono/metadata/metadata.c b/src/mono/mono/metadata/metadata.c
index f71602922329..d005bfdb6b6b 100644
--- a/src/mono/mono/metadata/metadata.c
+++ b/src/mono/mono/metadata/metadata.c
@@ -1003,6 +1003,18 @@ mono_metadata_table_bounds_check_slow (MonoImage *image, int table_index, int to
         return mono_metadata_update_table_bounds_check (image, table_index, token_index);
 }
 
+void
+mono_metadata_compute_column_offsets (MonoTableInfo *table)
+{
+	int offset = 0, c = mono_metadata_table_count (table->size_bitfield);
+	memset(table->column_offsets, 0, MONO_TABLE_INFO_MAX_COLUMNS);
+	for (int i = 0; i < c; i++) {
+		int size = mono_metadata_table_size (table->size_bitfield, i);
+		table->column_offsets[i] = (guint8)offset;
+		offset += size;
+	}
+}
+
 /**
  * mono_metadata_compute_table_bases:
  * \param meta metadata context to compute table values
@@ -1022,6 +1034,7 @@ mono_metadata_compute_table_bases (MonoImage *meta)
 			continue;
 
 		table->row_size = mono_metadata_compute_size (meta, i, &table->size_bitfield);
+		mono_metadata_compute_column_offsets (table);
 		table->base = base;
 		base += table_info_get_rows (table) * table->row_size;
 	}
@@ -1470,18 +1483,12 @@ mono_metadata_decode_row_col_raw (const MonoTableInfo *t, int idx, guint col)
 {
 	const char *data;
 	int n;
-
 	guint32 bitfield = t->size_bitfield;
 
 	g_assert (GINT_TO_UINT32(idx) < table_info_get_rows (t));
 	g_assert (col < mono_metadata_table_count (bitfield));
-	data = t->base + idx * t->row_size;
-
-	n = mono_metadata_table_size (bitfield, 0);
-	for (guint i = 0; i < col; ++i) {
-		data += n;
-		n = mono_metadata_table_size (bitfield, i + 1);
-	}
+	data = t->base + idx * t->row_size + t->column_offsets [col];
+	n = mono_metadata_table_size (bitfield, col);
 	switch (n) {
 	case 1:
 		return *data;
@@ -3942,7 +3949,7 @@ compare_type_literals (MonoImage *image, int class_type, int type_type, MonoErro
 		if (class_type == MONO_TYPE_STRING || class_type == MONO_TYPE_OBJECT)
 			return TRUE;
 		//XXX stringify this argument
-		mono_error_set_bad_image (error, image, "Expected reference type but got type kind %d", class_type);
+		mono_error_set_type_load_name (error, NULL, NULL, "Expected reference type but got type kind %d", class_type);
 		return FALSE;
 	}
 
@@ -3966,7 +3973,7 @@ compare_type_literals (MonoImage *image, int class_type, int type_type, MonoErro
 		return TRUE;
 	default:
 		//XXX stringify this argument
-		mono_error_set_bad_image (error, image, "Expected value type but got type kind %d", class_type);
+		mono_error_set_type_load_name (error, NULL, NULL, "Expected value type but got type kind %d", class_type);
 		return FALSE;
 	}
 }
@@ -6009,7 +6016,7 @@ signature_equiv_vararg (MonoMethodSignature *sig1, MonoMethodSignature *sig2, in
 	if (sig1->hasthis != sig2->hasthis ||
 	    sig1->sentinelpos != sig2->sentinelpos)
 		return FALSE;
-	
+
 	int flag = MONO_TYPE_EQ_FLAGS_SIG_ONLY | (((equiv_flags & SIG_EQUIV_FLAG_IGNORE_CMODS) != 0) ? MONO_TYPE_EQ_FLAG_IGNORE_CMODS : 0);
 
 	for (i = 0; i < sig1->sentinelpos; i++) {
diff --git a/src/mono/mono/metadata/native-library-qcall.c b/src/mono/mono/metadata/native-library-qcall.c
index 5b17173c6661..77ead5fbd487 100644
--- a/src/mono/mono/metadata/native-library-qcall.c
+++ b/src/mono/mono/metadata/native-library-qcall.c
@@ -3,7 +3,7 @@
 
 static Entry mono_qcalls[] =
 {
-	DllImportEntry(NULL) // This NULL entry can be removed when a QCall is added to Mono (and added to this array)
+	{"NULL", NULL}, // This NULL entry can be removed when a QCall is added to Mono (and added to this array)
 };
 
 gpointer
diff --git a/src/mono/mono/metadata/native-library.c b/src/mono/mono/metadata/native-library.c
index 669273625133..f26ab2e9152a 100644
--- a/src/mono/mono/metadata/native-library.c
+++ b/src/mono/mono/metadata/native-library.c
@@ -47,10 +47,6 @@ static GHashTable *native_library_module_map;
  */
 static GHashTable *native_library_module_blocklist;
 
-#ifndef NO_GLOBALIZATION_SHIM
-extern const void *GlobalizationResolveDllImport (const char *name);
-#endif
-
 static GHashTable *global_module_map; // should only be accessed with the global loader data lock
 
 static MonoDl *internal_module; // used when pinvoking `__Internal`
@@ -786,26 +782,6 @@ get_dllimportsearchpath_flags (MonoCustomAttrInfo *cinfo)
 	return flags;
 }
 
-#ifndef NO_GLOBALIZATION_SHIM
-#ifdef HOST_WIN32
-#define GLOBALIZATION_DLL_NAME "System.Globalization.Native"
-#else
-#define GLOBALIZATION_DLL_NAME "libSystem.Globalization.Native"
-#endif
-
-static gpointer
-default_resolve_dllimport (const char *dll, const char *func)
-{
-	if (strcmp (dll, GLOBALIZATION_DLL_NAME) == 0) {
-		const void *method_impl = GlobalizationResolveDllImport (func);
-		if (method_impl)
-			return (gpointer)method_impl;
-	}
-
-	return NULL;
-}
-#endif // NO_GLOBALIZATION_SHIM
-
 gpointer
 lookup_pinvoke_call_impl (MonoMethod *method, MonoLookupPInvokeStatus *status_out)
 {
@@ -881,12 +857,6 @@ lookup_pinvoke_call_impl (MonoMethod *method, MonoLookupPInvokeStatus *status_ou
 	}
 #endif
 
-#ifndef NO_GLOBALIZATION_SHIM
-	addr = default_resolve_dllimport (new_scope, new_import);
-	if (addr)
-		goto exit;
-#endif
-
 	if (pinvoke_override) {
 		addr = pinvoke_override (new_scope, new_import);
 		if (addr)
diff --git a/src/mono/mono/metadata/object-internals.h b/src/mono/mono/metadata/object-internals.h
index 257d06a915da..daaba307f17d 100644
--- a/src/mono/mono/metadata/object-internals.h
+++ b/src/mono/mono/metadata/object-internals.h
@@ -1439,16 +1439,17 @@ typedef struct {
 
 /* Keep in sync with System.GenericParameterAttributes */
 typedef enum {
-	GENERIC_PARAMETER_ATTRIBUTE_NON_VARIANT		= 0,
-	GENERIC_PARAMETER_ATTRIBUTE_COVARIANT		= 1,
-	GENERIC_PARAMETER_ATTRIBUTE_CONTRAVARIANT	= 2,
-	GENERIC_PARAMETER_ATTRIBUTE_VARIANCE_MASK	= 3,
-
-	GENERIC_PARAMETER_ATTRIBUTE_NO_SPECIAL_CONSTRAINT	= 0,
-	GENERIC_PARAMETER_ATTRIBUTE_REFERENCE_TYPE_CONSTRAINT	= 4,
-	GENERIC_PARAMETER_ATTRIBUTE_VALUE_TYPE_CONSTRAINT	= 8,
-	GENERIC_PARAMETER_ATTRIBUTE_CONSTRUCTOR_CONSTRAINT	= 16,
-	GENERIC_PARAMETER_ATTRIBUTE_SPECIAL_CONSTRAINTS_MASK	= 28
+	GENERIC_PARAMETER_ATTRIBUTE_NON_VARIANT		= 0x0000,
+	GENERIC_PARAMETER_ATTRIBUTE_COVARIANT		= 0x0001,
+	GENERIC_PARAMETER_ATTRIBUTE_CONTRAVARIANT	= 0x0002,
+	GENERIC_PARAMETER_ATTRIBUTE_VARIANCE_MASK	= 0x0003,
+
+	GENERIC_PARAMETER_ATTRIBUTE_NO_SPECIAL_CONSTRAINT			= 0x0000,
+	GENERIC_PARAMETER_ATTRIBUTE_REFERENCE_TYPE_CONSTRAINT		= 0x0004,
+	GENERIC_PARAMETER_ATTRIBUTE_VALUE_TYPE_CONSTRAINT			= 0x0008,
+	GENERIC_PARAMETER_ATTRIBUTE_CONSTRUCTOR_CONSTRAINT			= 0x0010,
+	GENERIC_PARAMETER_ATTRIBUTE_ACCEPT_BYREFLIKE_CONSTRAINTS	= 0x0020,	// type argument can be ByRefLike
+	GENERIC_PARAMETER_ATTRIBUTE_SPECIAL_CONSTRAINTS_MASK		= 0x003c
 } GenericParameterAttributes;
 
 typedef struct {
diff --git a/src/mono/mono/metadata/object.c b/src/mono/mono/metadata/object.c
index d6ddb751fe3a..4b14d889b9df 100644
--- a/src/mono/mono/metadata/object.c
+++ b/src/mono/mono/metadata/object.c
@@ -4243,6 +4243,8 @@ prepare_run_main (MonoMethod *method, int argc, char *argv[])
 						    basename,
 						    (const char*)NULL);
 
+		g_assert (fullpath);
+
 		utf8_fullpath = utf8_from_external (fullpath);
 		if(utf8_fullpath == NULL) {
 			/* Printing the arg text will cause glib to
@@ -5355,7 +5357,7 @@ MonoObjectHandle
 mono_object_new_handle (MonoClass *klass, MonoError *error)
 {
 	MONO_REQ_GC_UNSAFE_MODE;
-	
+
 	if (MONO_CLASS_IS_IMPORT(klass)) {
 		mono_error_set_not_supported (error, "Built-in COM interop is not supported on Mono.");
 		return MONO_HANDLE_NEW (MonoObject, NULL);
diff --git a/src/mono/mono/metadata/sgen-tarjan-bridge.c b/src/mono/mono/metadata/sgen-tarjan-bridge.c
index 6f1fb1ec10bd..b0c9cf1f83ba 100644
--- a/src/mono/mono/metadata/sgen-tarjan-bridge.c
+++ b/src/mono/mono/metadata/sgen-tarjan-bridge.c
@@ -819,8 +819,10 @@ create_scc (ScanData *data)
 			g_error ("Invalid state when building SCC %d", other->state);
 		}
 
-		if (other->is_bridge)
+		if (other->is_bridge) {
+			g_assert (color_data);
 			dyn_array_ptr_add (&color_data->bridges, other->obj);
+		}
 
 		// Maybe we should make sure we are not adding duplicates here. It is not really a problem
 		// since we will get rid of duplicates before submitting the SCCs to the client in gather_xrefs
diff --git a/src/mono/mono/metadata/sysmath.c b/src/mono/mono/metadata/sysmath.c
index b7c6006da692..afcbf447ad72 100644
--- a/src/mono/mono/metadata/sysmath.c
+++ b/src/mono/mono/metadata/sysmath.c
@@ -43,12 +43,6 @@ ves_icall_System_Math_Round (gdouble x)
 	return mono_round_to_even (x);
 }
 
-gdouble
-ves_icall_System_Math_FMod (gdouble x, gdouble y)
-{
-	return fmod (x, y);
-}
-
 gdouble
 ves_icall_System_Math_ModF (gdouble x, gdouble *d)
 {
@@ -313,12 +307,6 @@ ves_icall_System_MathF_Tanh (float x)
 	return tanhf (x);
 }
 
-float
-ves_icall_System_MathF_FMod (float x, float y)
-{
-	return fmodf (x, y);
-}
-
 float
 ves_icall_System_MathF_ModF (float x, float *d)
 {
diff --git a/src/mono/mono/metadata/threads-types.h b/src/mono/mono/metadata/threads-types.h
index 576cdcb25ca9..c92e61497dba 100644
--- a/src/mono/mono/metadata/threads-types.h
+++ b/src/mono/mono/metadata/threads-types.h
@@ -136,12 +136,6 @@ gint32 ves_icall_System_Threading_Interlocked_Decrement_Int(gint32 *location);
 ICALL_EXPORT
 gint64 ves_icall_System_Threading_Interlocked_Decrement_Long(gint64 * location);
 
-ICALL_EXPORT
-guint8 ves_icall_System_Threading_Interlocked_Exchange_Byte(guint8 *location, guint8 value);
-
-ICALL_EXPORT
-gint16 ves_icall_System_Threading_Interlocked_Exchange_Short(gint16 *location, gint16 value);
-
 ICALL_EXPORT
 gint32 ves_icall_System_Threading_Interlocked_Exchange_Int(gint32 *location, gint32 value);
 
@@ -151,12 +145,6 @@ gint64 ves_icall_System_Threading_Interlocked_Exchange_Long(gint64 *location, gi
 ICALL_EXPORT
 void ves_icall_System_Threading_Interlocked_Exchange_Object (MonoObject *volatile*location, MonoObject *volatile*value, MonoObject *volatile*res);
 
-ICALL_EXPORT
-guint8 ves_icall_System_Threading_Interlocked_CompareExchange_Byte(guint8 *location, guint8 value, guint8 comparand);
-
-ICALL_EXPORT
-gint16 ves_icall_System_Threading_Interlocked_CompareExchange_Short(gint16 *location, gint16 value, gint16 comparand);
-
 ICALL_EXPORT
 gint32 ves_icall_System_Threading_Interlocked_CompareExchange_Int(gint32 *location, gint32 value, gint32 comparand);
 
diff --git a/src/mono/mono/metadata/threads.c b/src/mono/mono/metadata/threads.c
index 0fe313db637c..22a7760450e8 100644
--- a/src/mono/mono/metadata/threads.c
+++ b/src/mono/mono/metadata/threads.c
@@ -1242,8 +1242,7 @@ start_wrapper_internal (StartInfo *start_info, gsize *stack_ptr)
 
 	if (G_UNLIKELY (external_eventloop)) {
 		/* if the thread wants to stay alive in an external eventloop, don't clean up after it */
-		if (mono_thread_platform_external_eventloop_keepalive_check ())
-			return 0; // MONO_ENTER_GC_SAFE_UNBALANCED is done in start_wrapper
+		return 0; // MONO_ENTER_GC_SAFE_UNBALANCED is done in start_wrapper
 	}
 
 	/* Do any cleanup needed for apartment state. This
@@ -1278,11 +1277,9 @@ start_wrapper (gpointer data)
 
 	if (G_UNLIKELY (external_eventloop)) {
 		/* if the thread wants to stay alive, don't clean up after it */
-		if (mono_thread_platform_external_eventloop_keepalive_check ()) {
-			/* while we wait in the external eventloop, we're GC safe */
-			MONO_ENTER_GC_SAFE_UNBALANCED;
-			return 0;
-		}
+		/* while we wait in the external eventloop, we're GC safe */
+		MONO_ENTER_GC_SAFE_UNBALANCED;
+		return 0;
 	}
 
 	mono_thread_info_exit (res);
@@ -2146,22 +2143,6 @@ gint64 ves_icall_System_Threading_Interlocked_Decrement_Long (gint64 * location)
 	return mono_atomic_dec_i64 (location);
 }
 
-guint8 ves_icall_System_Threading_Interlocked_Exchange_Byte (guint8 *location, guint8 value)
-{
-	if (G_UNLIKELY (!location))
-		return (guint8)set_pending_null_reference_exception ();
-
-	return mono_atomic_xchg_u8(location, value);
-}
-
-gint16 ves_icall_System_Threading_Interlocked_Exchange_Short (gint16 *location, gint16 value)
-{
-	if (G_UNLIKELY (!location))
-		return (gint16)set_pending_null_reference_exception ();
-
-	return mono_atomic_xchg_i16(location, value);
-}
-
 gint32 ves_icall_System_Threading_Interlocked_Exchange_Int (gint32 *location, gint32 value)
 {
 	if (G_UNLIKELY (!location))
@@ -2210,22 +2191,6 @@ ves_icall_System_Threading_Interlocked_Exchange_Long (gint64 *location, gint64 v
 	return mono_atomic_xchg_i64 (location, value);
 }
 
-guint8 ves_icall_System_Threading_Interlocked_CompareExchange_Byte(guint8 *location, guint8 value, guint8 comparand)
-{
-	if (G_UNLIKELY (!location))
-		return (guint8)set_pending_null_reference_exception ();
-
-	return mono_atomic_cas_u8(location, value, comparand);
-}
-
-gint16 ves_icall_System_Threading_Interlocked_CompareExchange_Short(gint16 *location, gint16 value, gint16 comparand)
-{
-	if (G_UNLIKELY (!location))
-		return (gint16)set_pending_null_reference_exception ();
-
-	return mono_atomic_cas_i16(location, value, comparand);
-}
-
 gint32 ves_icall_System_Threading_Interlocked_CompareExchange_Int(gint32 *location, gint32 value, gint32 comparand)
 {
 	if (G_UNLIKELY (!location))
@@ -4940,115 +4905,20 @@ ves_icall_System_Threading_LowLevelLifoSemaphore_InitInternal (void)
 void
 ves_icall_System_Threading_LowLevelLifoSemaphore_DeleteInternal (gpointer sem_ptr)
 {
-	LifoSemaphoreBase *sem = (LifoSemaphoreBase *)sem_ptr;
-	switch (sem->kind) {
-	case LIFO_SEMAPHORE_NORMAL:
-		mono_lifo_semaphore_delete ((LifoSemaphore*)sem);
-		break;
-#if defined(HOST_BROWSER) && !defined(DISABLE_THREADS)
-	case LIFO_SEMAPHORE_ASYNCWAIT:
-		mono_lifo_semaphore_asyncwait_delete ((LifoSemaphoreAsyncWait*)sem);
-		break;
-#endif
-	default:
-		g_assert_not_reached();
-	}
+	LifoSemaphore *sem = (LifoSemaphore *)sem_ptr;
+	mono_lifo_semaphore_delete (sem);
 }
 
 gint32
 ves_icall_System_Threading_LowLevelLifoSemaphore_TimedWaitInternal (gpointer sem_ptr, gint32 timeout_ms)
 {
 	LifoSemaphore *sem = (LifoSemaphore *)sem_ptr;
-	g_assert (sem->base.kind == LIFO_SEMAPHORE_NORMAL);
 	return mono_lifo_semaphore_timed_wait (sem, timeout_ms);
 }
 
 void
 ves_icall_System_Threading_LowLevelLifoSemaphore_ReleaseInternal (gpointer sem_ptr, gint32 count)
 {
-	LifoSemaphoreBase *sem = (LifoSemaphoreBase *)sem_ptr;
-	switch (sem->kind) {
-	case LIFO_SEMAPHORE_NORMAL:
-		mono_lifo_semaphore_release ((LifoSemaphore*)sem, count);
-		break;
-#if defined(HOST_BROWSER) && !defined(DISABLE_THREADS)
-	case LIFO_SEMAPHORE_ASYNCWAIT:
-		mono_lifo_semaphore_asyncwait_release ((LifoSemaphoreAsyncWait*)sem, count);
-		break;
-#endif
-	default:
-		g_assert_not_reached();
-	}
-}
-
-#if defined(HOST_BROWSER) && !defined(DISABLE_THREADS)
-gpointer
-ves_icall_System_Threading_LowLevelLifoAsyncWaitSemaphore_InitInternal (void)
-{
-	return (gpointer)mono_lifo_semaphore_asyncwait_init ();
-}
-
-void
-ves_icall_System_Threading_LowLevelLifoAsyncWaitSemaphore_PrepareAsyncWaitInternal (gpointer sem_ptr, gint32 timeout_ms, gpointer success_cb, gpointer timedout_cb, intptr_t user_data)
-{
-	LifoSemaphoreAsyncWait *sem = (LifoSemaphoreAsyncWait *)sem_ptr;
-	g_assert (sem->base.kind == LIFO_SEMAPHORE_ASYNCWAIT);
-	mono_lifo_semaphore_asyncwait_prepare_wait (sem, timeout_ms, (LifoSemaphoreAsyncWaitCallbackFn)success_cb, (LifoSemaphoreAsyncWaitCallbackFn)timedout_cb, user_data);
-}
-
-void
-ves_icall_System_Threading_WebWorkerEventLoop_KeepalivePushInternal (void)
-{
-	emscripten_runtime_keepalive_push();
-}
-
-void
-ves_icall_System_Threading_WebWorkerEventLoop_KeepalivePopInternal (void)
-{
-	emscripten_runtime_keepalive_pop();
-}
-
-extern int mono_wasm_eventloop_has_unsettled_interop_promises(void);
-
-MonoBoolean
-ves_icall_System_Threading_WebWorkerEventLoop_HasUnsettledInteropPromisesNative(void)
-{
-	return !!mono_wasm_eventloop_has_unsettled_interop_promises();
-}
-
-#endif /* HOST_BROWSER && !DISABLE_THREADS */
-
-/* for the AOT cross compiler with --print-icall-table these don't need to be callable, they just
- * need to be defined */
-#if defined(TARGET_WASM) && defined(ENABLE_ICALL_SYMBOL_MAP)
-gpointer
-ves_icall_System_Threading_LowLevelLifoAsyncWaitSemaphore_InitInternal (void)
-{
-	g_assert_not_reached ();
-}
-
-void
-ves_icall_System_Threading_LowLevelLifoAsyncWaitSemaphore_PrepareAsyncWaitInternal (gpointer sem_ptr, gint32 timeout_ms, gpointer success_cb, gpointer timedout_cb, intptr_t user_data)
-{
-	g_assert_not_reached ();
-}
-
-void
-ves_icall_System_Threading_WebWorkerEventLoop_KeepalivePushInternal (void)
-{
-	g_assert_not_reached();
-}
-
-void
-ves_icall_System_Threading_WebWorkerEventLoop_KeepalivePopInternal (void)
-{
-	g_assert_not_reached();
-}
-
-MonoBoolean
-ves_icall_System_Threading_WebWorkerEventLoop_HasUnsettledInteropPromisesNative(void)
-{
-	g_assert_not_reached();
+	LifoSemaphore *sem = (LifoSemaphore *)sem_ptr;
+	mono_lifo_semaphore_release (sem, count);
 }
-#endif /* defined(TARGET_WASM) && defined(ENABLE_ICALL_SYMBOL_MAP) */
-
diff --git a/src/mono/mono/metadata/verify.c b/src/mono/mono/metadata/verify.c
index 660f6226eb00..621599fa4eee 100644
--- a/src/mono/mono/metadata/verify.c
+++ b/src/mono/mono/metadata/verify.c
@@ -87,6 +87,9 @@ is_valid_generic_instantiation (MonoGenericContainer *gc, MonoGenericContext *co
 				return FALSE;
 		}
 
+		if (m_class_is_byreflike (paramClass) && (param_info->flags & GENERIC_PARAMETER_ATTRIBUTE_ACCEPT_BYREFLIKE_CONSTRAINTS) == 0)
+			return FALSE;
+
 		if (!param_info->constraints && !(param_info->flags & GENERIC_PARAMETER_ATTRIBUTE_SPECIAL_CONSTRAINTS_MASK))
 			continue;
 
diff --git a/src/mono/mono/mini/CMakeLists.txt b/src/mono/mono/mini/CMakeLists.txt
index 5e6ebe9ce49d..e81afefecfc2 100644
--- a/src/mono/mono/mini/CMakeLists.txt
+++ b/src/mono/mono/mini/CMakeLists.txt
@@ -1,6 +1,6 @@
 project(mini C)
 
-if(ENABLE_LLVM OR ENABLE_LLVM_RUNTIME OR HOST_BROWSER OR (HAVE_SYS_ICU AND NOT HOST_WASI))
+if(ENABLE_LLVM OR ENABLE_LLVM_RUNTIME OR HOST_BROWSER)
   enable_language(CXX)
 endif()
 
@@ -45,78 +45,6 @@ if(HOST_WIN32)
   add_link_options(/OPT:NOICF)
 endif()
 
-# ICU
-if(HAVE_SYS_ICU)
-  if(STATIC_ICU)
-    set(pal_icushim_sources_base
-        pal_icushim_static.c)
-    add_definitions(-DSTATIC_ICU=1)
-  else()
-    set(pal_icushim_sources_base
-        pal_icushim.c)
-  endif()
-
-  set(icu_shim_sources_base
-      pal_idna.c
-      pal_common.c
-      entrypoints.c
-      ${pal_icushim_sources_base})
-
-if (NOT CLR_CMAKE_TARGET_MACCATALYST AND NOT CLR_CMAKE_TARGET_IOS AND NOT CLR_CMAKE_TARGET_TVOS)
-    set(icu_shim_sources_base
-      ${icu_shim_sources_base}
-      pal_calendarData.c
-      pal_casing.c
-      pal_collation.c
-      pal_locale.c
-      pal_localeNumberData.c
-      pal_localeStringData.c
-      pal_normalization.c
-      pal_timeZoneInfo.c
-    )
-endif()
-
-if (CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET_IOS OR CLR_CMAKE_TARGET_TVOS)
-    set(icu_shim_sources_base
-      ${icu_shim_sources_base}
-      pal_placeholders.c
-    )
-endif()
-
-  addprefix(icu_shim_sources "${ICU_SHIM_PATH}" "${icu_shim_sources_base}")
-
-  if (TARGET_DARWIN)
-    set(icu_shim_darwin_sources_base
-        pal_locale.m
-        pal_collation.m
-        pal_casing.m
-        pal_calendarData.m
-        pal_normalization.m
-        pal_timeZoneInfo.m)
-
-    addprefix(icu_shim_darwin_sources "${ICU_SHIM_PATH}" "${icu_shim_darwin_sources_base}")
-    set(icu_shim_sources ${icu_shim_sources} ${icu_shim_darwin_sources})
-  endif()
-
-  set_source_files_properties(${icu_shim_sources} PROPERTIES COMPILE_DEFINITIONS OSX_ICU_LIBRARY_PATH="${OSX_ICU_LIBRARY_PATH}")
-  set_source_files_properties(${icu_shim_sources} PROPERTIES COMPILE_FLAGS "-I\"${ICU_INCLUDEDIR}\" -I\"${CLR_SRC_NATIVE_DIR}/libs/System.Globalization.Native/\" -I\"${CLR_SRC_NATIVE_DIR}/libs/Common/\" ${ICU_FLAGS}")
-  if(TARGET_DARWIN)
-    set_property(SOURCE ${icu_shim_darwin_sources} APPEND_STRING PROPERTY COMPILE_FLAGS " -fobjc-arc ${CLR_CMAKE_COMMON_OBJC_FLAGS}")
-  endif()
-  if(TARGET_WIN32)
-      set_source_files_properties(${icu_shim_sources} PROPERTIES LANGUAGE CXX)
-  endif()
-  if(ICU_LIBDIR)
-    set(ICU_LDFLAGS "-L${ICU_LIBDIR}")
-  endif()
-  if(CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET_IOS OR CLR_CMAKE_TARGET_TVOS)
-    set(ICU_LDFLAGS "-licucore")
-  endif()
-
-  add_library(icu_shim_objects OBJECT "${icu_shim_sources}")
-  set(HAVE_ICU_SHIM 1)
-endif()
-
 #
 # MINI
 #
@@ -434,9 +362,6 @@ if(NOT DISABLE_SHARED_LIBS)
   else()
     target_link_libraries(monosgen-shared PRIVATE monoapi eglib_objects utils_objects sgen_objects metadata_objects)
   endif()
-  if(HAVE_ICU_SHIM)
-    target_link_libraries(monosgen-shared PRIVATE icu_shim_objects)
-  endif()
   target_include_directories (monosgen-shared PRIVATE monoapi)
   if(TARGET_WIN32)
     # on Windows the import library for the shared mono library will have the same name as the static library,
@@ -446,14 +371,8 @@ if(NOT DISABLE_SHARED_LIBS)
   if(CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET_IOS OR CLR_CMAKE_TARGET_TVOS)
     target_link_libraries(monosgen-shared PRIVATE ${OS_LIBS} ${LLVM_LIBS} ${Z_LIBS})
   else()
-    target_link_libraries(monosgen-shared PRIVATE ${OS_LIBS} ${LLVM_LIBS} ${ICU_LIBS} ${Z_LIBS})
-  endif()
-  if(ICU_LDFLAGS)
-    set_property(TARGET monosgen-shared APPEND_STRING PROPERTY LINK_FLAGS " ${ICU_LDFLAGS}")
+    target_link_libraries(monosgen-shared PRIVATE ${OS_LIBS} ${LLVM_LIBS} ${Z_LIBS})
   endif()
-  if(NOT TARGET_WASM AND STATIC_ICU)
-    set_property(TARGET monosgen-shared APPEND_STRING PROPERTY LINKER_LANGUAGE CXX)
-  endif ()
   if(TARGET_DARWIN)
     set_property(TARGET monosgen-shared APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-compatibility_version -Wl,2.0 -Wl,-current_version -Wl,2.0")
   endif()
@@ -498,17 +417,8 @@ if(NOT DISABLE_SHARED_LIBS)
         add_library(${frameworkconfig} SHARED $<TARGET_OBJECTS:monosgen-objects>)
         target_compile_definitions(${frameworkconfig} PRIVATE -DMONO_DLL_EXPORT)
         target_link_libraries(${frameworkconfig} PRIVATE monoapi eglib_objects utils_objects sgen_objects metadata_objects)
-        if(HAVE_ICU_SHIM)
-          target_link_libraries(${frameworkconfig} PRIVATE icu_shim_objects)
-        endif()
-        target_link_libraries(${frameworkconfig} PRIVATE ${OS_LIBS} ${LLVM_LIBS} ${ICU_LIBS} ${Z_LIBS})
+        target_link_libraries(${frameworkconfig} PRIVATE ${OS_LIBS} ${LLVM_LIBS} ${Z_LIBS})
 
-        if(ICU_LDFLAGS)
-          set_property(TARGET ${frameworkconfig} APPEND_STRING PROPERTY LINK_FLAGS " ${ICU_LDFLAGS}")
-        endif()
-        if(STATIC_ICU)
-          set_property(TARGET ${frameworkconfig} APPEND_STRING PROPERTY LINKER_LANGUAGE CXX)
-        endif ()
         set_property(TARGET ${frameworkconfig} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-compatibility_version -Wl,2.0 -Wl,-current_version -Wl,2.0")
         string(REPLACE "*" ";" componentsobjects-whitespace "${componentsobjects}")
         target_sources(${frameworkconfig} PRIVATE "${componentsobjects-whitespace}")
@@ -627,10 +537,7 @@ if(NOT DISABLE_EXECUTABLES)
   if (HOST_WASM)
     target_link_libraries(mono-sgen PRIVATE mono-wasm-nosimd)
   endif()
-  if(HAVE_ICU_SHIM)
-    target_link_libraries(mono-sgen PRIVATE icu_shim_objects)
-  endif()
-  target_link_libraries(mono-sgen PRIVATE ${OS_LIBS} ${LLVM_LIBS} ${ICU_LIBS} ${Z_LIBS})
+  target_link_libraries(mono-sgen PRIVATE ${OS_LIBS} ${LLVM_LIBS} ${Z_LIBS})
   # musl-libc implements ucontext in a different library on s390x
   if(CLR_CMAKE_TARGET_LINUX_MUSL AND TARGET_S390X)
     target_link_libraries(mono-sgen PRIVATE ucontext)
@@ -649,9 +556,6 @@ if(NOT DISABLE_EXECUTABLES)
     # if components are disabled, link the fallback stubs into the runtime
     # fallback stubs already provided in monosgen-static when components are disabled
   endif()
-  if(ICU_LDFLAGS)
-    set_property(TARGET mono-sgen APPEND_STRING PROPERTY LINK_FLAGS " ${ICU_LDFLAGS}")
-  endif()
 
   if(CMAKE_BUILD_TYPE STREQUAL "Debug")
     target_sources(mono-sgen PRIVATE ${mono_validate_apis_source})
diff --git a/src/mono/mono/mini/aot-compiler.c b/src/mono/mono/mini/aot-compiler.c
index d8f80b0bc6a9..bda04415e2d6 100644
--- a/src/mono/mono/mini/aot-compiler.c
+++ b/src/mono/mono/mini/aot-compiler.c
@@ -238,10 +238,15 @@ typedef struct MonoAotOptions {
 	gboolean verbose;
 	gboolean deterministic;
 	gboolean allow_errors;
+	gboolean compile_in_child;
+	gboolean child;
 	char *tool_prefix;
 	char *as_prefix;
+	char *as_name;
+	char *as_options;
 	char *ld_flags;
 	char *ld_name;
+	char *ld_options;
 	char *mtriple;
 	char *llvm_path;
 	char *temp_path;
@@ -258,6 +263,8 @@ typedef struct MonoAotOptions {
 	char *clangxx;
 	char *depfile;
 	char *runtime_init_callback;
+	GPtrArray *runtime_args;
+	const char *aot_options;
 } MonoAotOptions;
 
 typedef enum {
@@ -366,10 +373,12 @@ typedef struct MonoAotCompile {
 	MonoDwarfWriter *dwarf;
 	FILE *fp;
 	char *tmpbasename;
-	char *tmpfname;
+	char *asm_fname;
 	char *temp_dir_to_delete;
 	char *llvm_sfile;
 	char *llvm_ofile;
+	char *bc_fname;
+	char *optbc_fname;
 	GSList *cie_program;
 	GHashTable *unwind_info_offsets;
 	GPtrArray *unwind_ops;
@@ -448,6 +457,7 @@ static MonoAotCompile *llvm_acfg;
 static MonoAotCompile *current_acfg;
 static MonoAssembly *dedup_assembly;
 static GHashTable *dedup_methods;
+static GPtrArray *dedup_methods_list;
 
 /* Cache of decoded method external icall symbol names. */
 /* Owned by acfg, but kept in this static as well since it is */
@@ -481,9 +491,15 @@ get_patch_name (int info)
 static int
 aot_assembly (MonoAssembly *ass, guint32 jit_opts, MonoAotOptions *aot_options);
 
+static void
+set_paths (MonoAotCompile *acfg);
+
 static int
 emit_aot_image (MonoAotCompile *acfg);
 
+static int
+assemble_link (MonoAotCompile *acfg);
+
 static guint32
 get_unwind_info_offset (MonoAotCompile *acfg, guint8 *encoded, guint32 encoded_len);
 
@@ -508,6 +524,9 @@ is_direct_pinvoke_specified_for_method (MonoAotCompile *acfg, MonoMethod *method
 static inline const char*
 lookup_direct_pinvoke_symbol_name_aot (MonoAotCompile *acfg, MonoMethod *method);
 
+static int
+compile_assemblies_in_child (MonoAotOptions *aot_opts, MonoAssembly **assemblies, int nassemblies, GPtrArray *runtime_args, const char *aot_options);
+
 static gboolean
 mono_aot_mode_is_full (MonoAotOptions *opts)
 {
@@ -4351,11 +4370,13 @@ collect_dedup_method (MonoAotCompile *acfg, MonoMethod *method)
 			return TRUE;
 		// Remember for later
 		g_assert (acfg->dedup_phase == DEDUP_COLLECT);
-		if (!g_hash_table_lookup (dedup_methods, method))
+		if (!g_hash_table_lookup (dedup_methods, method)) {
 			g_hash_table_insert (dedup_methods, method, method);
-		else
+			g_ptr_array_add (dedup_methods_list, method);
+		} else {
 			// Already processed when compiling another assembly
 			return TRUE;
+		}
 	}
 	return FALSE;
 }
@@ -5490,10 +5511,10 @@ MONO_RESTORE_WARNING
 					if (decoded_args->named_args_info [j].field && !strcmp (decoded_args->named_args_info [j].field->name, "EntryPoint")) {
 						named = (const char *)decoded_args->named_args[j]->value.primitive;
 						slen = mono_metadata_decode_value (named, &named);
-						
+
 						int prefix_len = (int)strlen (acfg->user_symbol_prefix);
 						g_assert (prefix_len < 2);
-						
+
 						export_name = (char *)g_malloc (prefix_len + slen + 1);
 						if (prefix_len == 1)
 							export_name[0] = *acfg->user_symbol_prefix;
@@ -5833,12 +5854,14 @@ add_generic_class_with_depth (MonoAotCompile *acfg, MonoClass *klass, int depth,
 
 		icomparable_inst = mono_class_inflate_generic_class_checked (icomparable, &ctx, error);
 		mono_error_assert_ok (error); /* FIXME don't swallow the error */
+		g_assert (icomparable_inst);
 
 		if (mono_class_is_assignable_from_internal (icomparable_inst, tclass)) {
 			MonoClass *gcomparer_inst;
 			gcomparer = mono_class_load_from_name (mono_defaults.corlib, "System.Collections.Generic", "GenericComparer`1");
 			gcomparer_inst = mono_class_inflate_generic_class_checked (gcomparer, &ctx, error);
 			mono_error_assert_ok (error); /* FIXME don't swallow the error */
+			g_assert (gcomparer_inst);
 
 			add_generic_class (acfg, gcomparer_inst, FALSE, "Comparer<T>");
 		}
@@ -5860,6 +5883,7 @@ add_generic_class_with_depth (MonoAotCompile *acfg, MonoClass *klass, int depth,
 
 		iface_inst = mono_class_inflate_generic_class_checked (iface, &ctx, error);
 		mono_error_assert_ok (error); /* FIXME don't swallow the error */
+		g_assert (iface_inst);
 
 		if (mono_class_is_assignable_from_internal (iface_inst, tclass)) {
 			MonoClass *gcomparer_inst;
@@ -5867,6 +5891,7 @@ add_generic_class_with_depth (MonoAotCompile *acfg, MonoClass *klass, int depth,
 			gcomparer = mono_class_load_from_name (mono_defaults.corlib, "System.Collections.Generic", "GenericEqualityComparer`1");
 			gcomparer_inst = mono_class_inflate_generic_class_checked (gcomparer, &ctx, error);
 			mono_error_assert_ok (error); /* FIXME don't swallow the error */
+			g_assert (gcomparer_inst);
 			add_generic_class (acfg, gcomparer_inst, FALSE, "EqualityComparer<T>");
 		}
 	}
@@ -5888,6 +5913,7 @@ add_generic_class_with_depth (MonoAotCompile *acfg, MonoClass *klass, int depth,
 			enum_comparer = mono_class_load_from_name (mono_defaults.corlib, "System.Collections.Generic", "EnumEqualityComparer`1");
 			enum_comparer_inst = mono_class_inflate_generic_class_checked (enum_comparer, &ctx, error);
 			mono_error_assert_ok (error); /* FIXME don't swallow the error */
+			g_assert (enum_comparer_inst);
 			add_generic_class (acfg, enum_comparer_inst, FALSE, "EqualityComparer<T>");
 		}
 	}
@@ -5909,6 +5935,7 @@ add_generic_class_with_depth (MonoAotCompile *acfg, MonoClass *klass, int depth,
 			comparer = mono_class_load_from_name (mono_defaults.corlib, "System.Collections.Generic", "ObjectComparer`1");
 			comparer_inst = mono_class_inflate_generic_class_checked (comparer, &ctx, error);
 			mono_error_assert_ok (error); /* FIXME don't swallow the error */
+			g_assert (comparer_inst);
 			add_generic_class (acfg, comparer_inst, FALSE, "Comparer<T>");
 		}
 	}
@@ -5932,6 +5959,7 @@ add_instances_of (MonoAotCompile *acfg, MonoClass *klass, MonoType **insts, int
 		ctx.class_inst = mono_metadata_get_generic_inst (1, args);
 		generic_inst = mono_class_inflate_generic_class_checked (klass, &ctx, error);
 		mono_error_assert_ok (error); /* FIXME don't swallow the error */
+		g_assert (generic_inst);
 		add_generic_class (acfg, generic_inst, force, "");
 	}
 }
@@ -8915,10 +8943,16 @@ mono_aot_parse_options (const char *aot_options, MonoAotOptions *opts)
 			opts->tool_prefix = g_strdup (arg + strlen ("tool-prefix="));
 		} else if (str_begins_with (arg, "as-prefix=")) {
 			opts->as_prefix = g_strdup (arg + strlen ("as-prefix="));
+		} else if (str_begins_with (arg, "as-name=")) {
+			opts->as_name = g_strdup (arg + strlen ("as-name="));
+		} else if (str_begins_with (arg, "as-options=")) {
+			opts->as_options = g_strdup (arg + strlen ("as-options="));
 		} else if (str_begins_with (arg, "ld-flags=")) {
 			opts->ld_flags = g_strdup (arg + strlen ("ld-flags="));
 		} else if (str_begins_with (arg, "ld-name=")) {
 			opts->ld_name = g_strdup (arg + strlen ("ld-name="));
+		} else if (str_begins_with (arg, "ld-options=")) {
+			opts->ld_options = g_strdup (arg + strlen ("ld-options="));
 		} else if (str_begins_with (arg, "soft-debug")) {
 			opts->soft_debug = TRUE;
 		// Intentionally undocumented x2-- deprecated
@@ -9070,6 +9104,10 @@ mono_aot_parse_options (const char *aot_options, MonoAotOptions *opts)
 		// direct pinvokes (managed-to-native wrappers) and fallbacks to JIT for majority of managed methods.
 		} else if (str_begins_with (arg, "wrappers-only")) {
 			opts->wrappers_only = TRUE;
+		} else if (!strcmp (arg, "compile-in-child")) {
+			opts->compile_in_child = TRUE;
+		} else if (!strcmp (arg, "_child")) {
+			opts->child = TRUE;
 		} else if (str_begins_with (arg, "help") || str_begins_with (arg, "?")) {
 			printf ("Supported options for --aot:\n");
 			printf ("    asmonly                              - \n");
@@ -9120,6 +9158,7 @@ mono_aot_parse_options (const char *aot_options, MonoAotOptions *opts)
 			printf ("    verbose                              - \n");
 			printf ("    allow-errors                         - \n");
 			printf ("    no-opt                               - \n");
+			printf ("    compile-in-child                     - \n");
 			printf ("    llvmopts=<value>                     - \n");
 			printf ("    llvmllc=<value>                      - \n");
 			printf ("    clangxx=<value>                      - \n");
@@ -9518,6 +9557,23 @@ compile_method (MonoAotCompile *acfg, MonoMethod *method)
 	cfg = mini_method_compile (method, acfg->jit_opts, flags, 0, index);
 	mono_time_track_end (&mono_jit_stats.jit_time, jit_time_start);
 
+	if (cfg->prefer_instances) {
+		/*
+		 * Compile the original specific instances in addition to the gshared method
+		 * for performance reasons, since gshared methods cannot implement some
+		 * features like static virtual methods efficiently.
+		 */
+		/* Instances encountered later will be handled in add_extra_method_full () */
+		g_hash_table_insert (acfg->prefer_instances, method, method);
+		GPtrArray *instances = g_hash_table_lookup (acfg->gshared_instances, method);
+		if (instances) {
+			for (guint i = 0; i < instances->len; ++i) {
+				MonoMethod *instance = (MonoMethod*)g_ptr_array_index (instances, i);
+				add_extra_method_full (acfg, instance, FALSE, 0);
+			}
+		}
+	}
+
 	if (cfg->exception_type == MONO_EXCEPTION_GENERIC_SHARING_FAILED) {
 		if (acfg->aot_opts.print_skipped_methods)
 			printf ("Skip (gshared failure): %s (%s)\n", mono_method_get_full_name (method), cfg->exception_message);
@@ -9603,23 +9659,6 @@ compile_method (MonoAotCompile *acfg, MonoMethod *method)
 			printf ("%s ### %d\n", mono_method_get_full_name (method), cfg->code_size);
 	}
 
-	if (cfg->prefer_instances) {
-		/*
-		 * Compile the original specific instances in addition to the gshared method
-		 * for performance reasons, since gshared methods cannot implement some
-		 * features like static virtual methods efficiently.
-		 */
-		/* Instances encountered later will be handled in add_extra_method_full () */
-		g_hash_table_insert (acfg->prefer_instances, method, method);
-		GPtrArray *instances = g_hash_table_lookup (acfg->gshared_instances, method);
-		if (instances) {
-			for (guint i = 0; i < instances->len; ++i) {
-				MonoMethod *instance = (MonoMethod*)g_ptr_array_index (instances, i);
-				add_extra_method_full (acfg, instance, FALSE, 0);
-			}
-		}
-	}
-
 	/* Adds generic instances referenced by this method */
 	/*
 	 * The depth is used to avoid infinite loops when generic virtual recursion is
@@ -10649,32 +10688,21 @@ execute_system (const char * command)
 #endif
 
 /*
- * emit_llvm_file:
+ * compile_llvm_file:
  *
- *   Emit the LLVM code into an LLVM bytecode file, and compile it using the LLVM
- * tools.
+ *  Compile the llvm bitcode file using the LLVM tools.
  */
 static gboolean
-emit_llvm_file (MonoAotCompile *acfg)
+compile_llvm_file (MonoAotCompile *acfg)
 {
 	char *command, *opts, *tempbc, *optbc, *output_fname;
 
-	if (acfg->aot_opts.llvm_only && acfg->aot_opts.asm_only) {
-		if (acfg->aot_opts.no_opt)
-			tempbc = g_strdup (acfg->aot_opts.llvm_outfile);
-		else
-			tempbc = g_strdup_printf ("%s.bc", acfg->tmpbasename);
-		optbc = g_strdup (acfg->aot_opts.llvm_outfile);
-	} else {
-		tempbc = g_strdup_printf ("%s.bc", acfg->tmpbasename);
-		optbc = g_strdup_printf ("%s.opt.bc", acfg->tmpbasename);
-	}
-
-	mono_llvm_emit_aot_module (tempbc, g_path_get_basename (acfg->image->name));
-
 	if (acfg->aot_opts.no_opt)
 		return TRUE;
 
+	tempbc = acfg->bc_fname;
+	optbc = acfg->optbc_fname;
+
 #if (defined(TARGET_X86) || defined(TARGET_AMD64))
 	if (acfg->aot_opts.llvm_cpu_attr && strstr (acfg->aot_opts.llvm_cpu_attr, "sse4.2"))
 		/*
@@ -10797,7 +10825,7 @@ emit_llvm_file (MonoAotCompile *acfg)
 #if ( defined(TARGET_MACH) && defined(TARGET_ARM) ) || defined(TARGET_ORBIS) || defined(TARGET_X86_64_WIN32_MSVC) || defined(TARGET_ANDROID)
 	g_string_append_printf (acfg->llc_args, " -relocation-model=pic");
 #else
-	if (llvm_acfg->aot_opts.static_link)
+	if (acfg->aot_opts.static_link)
 		g_string_append_printf (acfg->llc_args, " -relocation-model=static");
 	else
 		g_string_append_printf (acfg->llc_args, " -relocation-model=pic");
@@ -11554,6 +11582,9 @@ emit_exception_info (MonoAotCompile *acfg)
 		char *aot_file = g_strdup_printf("%s%s", image_basename, SEQ_POINT_AOT_EXT);
 		char *aot_file_path = g_build_filename (dir, aot_file, (const char*)NULL);
 
+		g_assert (dir);
+		g_assert (aot_file_path);
+
 		if (g_ensure_directory_exists (aot_file_path) == FALSE) {
 			fprintf (stderr, "AOT : failed to create msym directory: %s\n", aot_file_path);
 			exit (1);
@@ -13180,7 +13211,7 @@ compile_asm (MonoAotCompile *acfg)
 #endif
 
 	if (acfg->aot_opts.asm_only) {
-		aot_printf (acfg, "Output file: '%s'.\n", acfg->tmpfname);
+		aot_printf (acfg, "Output file: '%s'.\n", acfg->asm_fname);
 		if (acfg->aot_opts.static_link)
 			aot_printf (acfg, "Linking symbol: '%s'.\n", acfg->static_linking_symbol);
 		if (acfg->llvm)
@@ -13194,16 +13225,24 @@ compile_asm (MonoAotCompile *acfg)
 		else
 			objfile = g_strdup_printf ("%s." AS_OBJECT_FILE_SUFFIX, acfg->image->name);
 	} else {
-		objfile = g_strdup_printf ("%s." AS_OBJECT_FILE_SUFFIX, acfg->tmpfname);
+		objfile = g_strdup_printf ("%s." AS_OBJECT_FILE_SUFFIX, acfg->asm_fname);
 	}
 
 #ifdef TARGET_OSX
 	g_string_append (acfg->as_args, "-c -x assembler ");
 #endif
+	const char *as_binary_name = acfg->aot_opts.as_name;
+	if (as_binary_name == NULL) {
+		as_binary_name = AS_NAME;
+	}
+	const char *as_options = acfg->aot_opts.as_options;
+	if (as_options == NULL) {
+		as_options = AS_OPTIONS;
+	}
 
-	command = g_strdup_printf ("\"%s%s\" %s %s -o %s %s", as_prefix, AS_NAME, AS_OPTIONS,
+	command = g_strdup_printf ("\"%s%s\" %s %s -o %s %s", as_prefix, as_binary_name, as_options,
 			acfg->as_args ? acfg->as_args->str : "",
-			wrap_path (objfile), wrap_path (acfg->tmpfname));
+			wrap_path (objfile), wrap_path (acfg->asm_fname));
 	aot_printf (acfg, "Executing the native assembler: %s\n", command);
 	if (execute_system (command) != 0) {
 		g_free (command);
@@ -13212,7 +13251,7 @@ compile_asm (MonoAotCompile *acfg)
 	}
 
 	if (acfg->llvm && !acfg->llvm_owriter) {
-		command = g_strdup_printf ("\"%s%s\" %s %s -o %s %s", as_prefix, AS_NAME, AS_OPTIONS,
+		command = g_strdup_printf ("\"%s%s\" %s %s -o %s %s", as_prefix, as_binary_name, as_options,
 			acfg->as_args ? acfg->as_args->str : "",
 			wrap_path (acfg->llvm_ofile), wrap_path (acfg->llvm_sfile));
 		aot_printf (acfg, "Executing the native assembler: %s\n", command);
@@ -13261,16 +13300,21 @@ compile_asm (MonoAotCompile *acfg)
 
 	str = g_string_new ("");
 	const char *ld_binary_name = acfg->aot_opts.ld_name;
+
+	const char *ld_options = acfg->aot_opts.ld_options;
+	if (ld_options == NULL) {
+		ld_options = LD_OPTIONS;
+	}
 #if defined(LD_NAME)
 	if (ld_binary_name == NULL) {
 		ld_binary_name = LD_NAME;
 	}
 	if (acfg->aot_opts.tool_prefix)
-		g_string_append_printf (str, "\"%s%s\" %s", tool_prefix, ld_binary_name, LD_OPTIONS);
+		g_string_append_printf (str, "\"%s%s\" %s", tool_prefix, ld_binary_name, ld_options);
 	else if (acfg->aot_opts.llvm_only)
 		g_string_append_printf (str, "%s", acfg->aot_opts.clangxx);
 	else
-		g_string_append_printf (str, "\"%s%s\" %s", tool_prefix, ld_binary_name, LD_OPTIONS);
+		g_string_append_printf (str, "\"%s%s\" %s", tool_prefix, ld_binary_name, ld_options);
 #else
 	if (ld_binary_name == NULL) {
 		ld_binary_name = "ld";
@@ -13279,7 +13323,7 @@ compile_asm (MonoAotCompile *acfg)
 	// Default (linux)
 	if (acfg->aot_opts.tool_prefix)
 		/* Cross compiling */
-		g_string_append_printf (str, "\"%s%s\" %s", tool_prefix, ld_binary_name, LD_OPTIONS);
+		g_string_append_printf (str, "\"%s%s\" %s", tool_prefix, ld_binary_name, ld_options);
 	else if (acfg->aot_opts.llvm_only)
 		g_string_append_printf (str, "%s", acfg->aot_opts.clangxx);
 	else
@@ -13288,7 +13332,7 @@ compile_asm (MonoAotCompile *acfg)
 #endif
 	g_string_append_printf (str, " -o %s %s %s %s",
 							wrap_path (tmp_outfile_name), wrap_path (llvm_ofile),
-							wrap_path (g_strdup_printf ("%s." AS_OBJECT_FILE_SUFFIX, acfg->tmpfname)), ld_flags);
+							wrap_path (g_strdup_printf ("%s." AS_OBJECT_FILE_SUFFIX, acfg->asm_fname)), ld_flags);
 
 #if defined(TARGET_MACH)
 	g_string_append_printf (str, " \"-Wl,-install_name,%s%s\"", g_path_get_basename (acfg->image->name), MONO_SOLIB_EXT);
@@ -13355,7 +13399,7 @@ compile_asm (MonoAotCompile *acfg)
 	if (acfg->aot_opts.save_temps)
 		aot_printf (acfg, "Retained input file.\n");
 	else
-		g_unlink (acfg->tmpfname);
+		g_unlink (acfg->asm_fname);
 
 	return 0;
 }
@@ -14210,8 +14254,11 @@ aot_opts_free (MonoAotOptions *aot_opts)
 	g_free (aot_opts->dedup_include);
 	g_free (aot_opts->tool_prefix);
 	g_free (aot_opts->as_prefix);
+	g_free (aot_opts->as_name);
+	g_free (aot_opts->as_options);
 	g_free (aot_opts->ld_flags);
 	g_free (aot_opts->ld_name);
+	g_free (aot_opts->ld_options);
 	g_free (aot_opts->mtriple);
 	g_free (aot_opts->llvm_path);
 	g_free (aot_opts->temp_path);
@@ -14888,14 +14935,14 @@ aot_assembly (MonoAssembly *ass, guint32 jit_opts, MonoAotOptions *aot_options)
 	if (acfg->jit_opts & MONO_OPT_GSHAREDVT)
 		mono_set_generic_sharing_vt_supported (TRUE);
 
-	if (acfg->dedup_phase != DEDUP_COLLECT)
+	if (acfg->dedup_phase != DEDUP_COLLECT && !acfg->aot_opts.child)
 		aot_printf (acfg, "Mono Ahead of Time compiler - compiling assembly %s\n", image->name);
 
 	if (!acfg->aot_opts.deterministic)
 		generate_aotid ((guint8*) &acfg->image->aotid);
 
 	char *aotid = mono_guid_to_string (acfg->image->aotid);
-	if (acfg->dedup_phase != DEDUP_COLLECT && !acfg->aot_opts.deterministic)
+	if (acfg->dedup_phase != DEDUP_COLLECT && !acfg->aot_opts.deterministic && !acfg->aot_opts.child)
 		aot_printf (acfg, "AOTID %s\n", aotid);
 	g_free (aotid);
 
@@ -15050,6 +15097,7 @@ aot_assembly (MonoAssembly *ass, guint32 jit_opts, MonoAotOptions *aot_options)
 	arch_init (acfg);
 
 	if (mono_use_llvm || acfg->aot_opts.llvm) {
+		acfg->llvm = TRUE;
 		/*
 		 * Emit all LLVM code into a separate assembly/object file and link with it
 		 * normally.
@@ -15064,6 +15112,8 @@ aot_assembly (MonoAssembly *ass, guint32 jit_opts, MonoAotOptions *aot_options)
 		}
 	}
 
+	set_paths (acfg);
+
 	if (acfg->llvm && acfg->thumb_mixed)
 		acfg->flags = (MonoAotFileFlags)(acfg->flags | MONO_AOT_FILE_FLAG_LLVM_THUMB);
 	if (acfg->aot_opts.llvm_only)
@@ -15084,6 +15134,21 @@ aot_assembly (MonoAssembly *ass, guint32 jit_opts, MonoAotOptions *aot_options)
 		acfg->llvm_eh_frame_symbol = g_strdup_printf ("%s_eh_frame", acfg->global_prefix);
 	}
 
+	if (acfg->aot_opts.compile_in_child) {
+		if (acfg->aot_opts.dedup_include) {
+			if (acfg->image->assembly == dedup_assembly)
+				return assemble_link (acfg);
+			else
+				return 0;
+		} else {
+			res = compile_assemblies_in_child (&acfg->aot_opts, &acfg->image->assembly, 1, acfg->aot_opts.runtime_args, acfg->aot_opts.aot_options);
+			if (res)
+				return res;
+
+			return assemble_link (acfg);
+		}
+	}
+
 	acfg->method_index = 1;
 
 	if (mono_aot_mode_is_full (&acfg->aot_opts) || mono_aot_mode_is_hybrid (&acfg->aot_opts))
@@ -15099,13 +15164,10 @@ aot_assembly (MonoAssembly *ass, guint32 jit_opts, MonoAotOptions *aot_options)
 		/* Add collected dedup-able methods */
 		aot_printf (acfg, "Adding %d dedup-ed methods.\n", g_hash_table_size (dedup_methods));
 
-		GHashTableIter iter;
-		MonoMethod *key;
-		MonoMethod *method;
-
-		g_hash_table_iter_init (&iter, dedup_methods);
-		while (g_hash_table_iter_next (&iter, (gpointer *)&key, (gpointer *)&method))
+		for (guint i = 0; i < dedup_methods_list->len; ++i) {
+			MonoMethod *method = (MonoMethod*)g_ptr_array_index (dedup_methods_list, i);
 			add_method_full (acfg, method, TRUE, 0);
+		}
 	}
 
 	{
@@ -15288,24 +15350,18 @@ create_depfile (MonoAotCompile *acfg)
 	fclose (depfile);
 }
 
-static int
-emit_aot_image (MonoAotCompile *acfg)
+static void
+set_paths (MonoAotCompile *acfg)
 {
-	int res;
-	TV_DECLARE (atv);
-	TV_DECLARE (btv);
-
-	TV_GETTIME (atv);
-
 #ifdef ENABLE_LLVM
 	if (acfg->llvm) {
 		if (acfg->aot_opts.asm_only) {
 			if (acfg->aot_opts.outfile) {
-				acfg->tmpfname = g_strdup_printf ("%s", acfg->aot_opts.outfile);
-				acfg->tmpbasename = g_strdup (acfg->tmpfname);
+				acfg->asm_fname = g_strdup_printf ("%s", acfg->aot_opts.outfile);
+				acfg->tmpbasename = g_strdup (acfg->asm_fname);
 			} else {
 				acfg->tmpbasename = g_strdup_printf ("%s", acfg->image->name);
-				acfg->tmpfname = g_strdup_printf ("%s.s", acfg->tmpbasename);
+				acfg->asm_fname = g_strdup_printf ("%s.s", acfg->tmpbasename);
 			}
 			g_assert (acfg->aot_opts.llvm_outfile);
 			acfg->llvm_sfile = g_strdup (acfg->aot_opts.llvm_outfile);
@@ -15324,7 +15380,9 @@ emit_aot_image (MonoAotCompile *acfg)
 			}
 
 			acfg->tmpbasename = g_build_filename (temp_path, "temp", (const char*)NULL);
-			acfg->tmpfname = g_strdup_printf ("%s.s", acfg->tmpbasename);
+			g_assert (acfg->tmpbasename);
+
+			acfg->asm_fname = g_strdup_printf ("%s.s", acfg->tmpbasename);
 			acfg->llvm_sfile = g_strdup_printf ("%s-llvm.s", acfg->tmpbasename);
 
 			if (acfg->aot_opts.static_link)
@@ -15334,26 +15392,90 @@ emit_aot_image (MonoAotCompile *acfg)
 
 			g_free (temp_path);
 		}
+
+		if (acfg->aot_opts.llvm_only && acfg->aot_opts.asm_only) {
+			if (acfg->aot_opts.no_opt)
+				acfg->bc_fname = g_strdup (acfg->aot_opts.llvm_outfile);
+			else
+				acfg->bc_fname = g_strdup_printf ("%s.bc", acfg->tmpbasename);
+			acfg->optbc_fname = g_strdup (acfg->aot_opts.llvm_outfile);
+		} else {
+			acfg->bc_fname = g_strdup_printf ("%s.bc", acfg->tmpbasename);
+			acfg->optbc_fname = g_strdup_printf ("%s.opt.bc", acfg->tmpbasename);
+		}
 	}
 #endif
 
 	if (acfg->aot_opts.asm_only && !acfg->aot_opts.llvm_only) {
 		if (acfg->aot_opts.outfile)
-			acfg->tmpfname = g_strdup_printf ("%s", acfg->aot_opts.outfile);
+			acfg->asm_fname = g_strdup_printf ("%s", acfg->aot_opts.outfile);
 		else
-			acfg->tmpfname = g_strdup_printf ("%s.s", acfg->image->name);
-		acfg->fp = g_fopen (acfg->tmpfname, "w+");
+			acfg->asm_fname = g_strdup_printf ("%s.s", acfg->image->name);
 	} else {
 		if (strcmp (acfg->aot_opts.temp_path, "") == 0) {
-			acfg->fp = fdopen (g_file_open_tmp ("mono_aot_XXXXXX", &acfg->tmpfname, NULL), "w+");
+			/* Done later */
 		} else {
 			acfg->tmpbasename = g_build_filename (acfg->aot_opts.temp_path, "temp", (const char*)NULL);
-			acfg->tmpfname = g_strdup_printf ("%s.s", acfg->tmpbasename);
-			acfg->fp = g_fopen (acfg->tmpfname, "w+");
+			g_assert (acfg->tmpbasename);
+
+			acfg->asm_fname = g_strdup_printf ("%s.s", acfg->tmpbasename);
+		}
+	}
+}
+
+/* Run external tools to assemble/link the aot image */
+static int
+assemble_link (MonoAotCompile *acfg)
+{
+	int res;
+	TV_DECLARE (atv);
+	TV_DECLARE (btv);
+
+	TV_GETTIME (atv);
+
+#ifdef ENABLE_LLVM
+	if (acfg->llvm) {
+		gboolean emit_res;
+
+		emit_res = compile_llvm_file (acfg);
+		if (!emit_res)
+			return 1;
+	}
+#endif
+
+	if (!acfg->aot_opts.llvm_only) {
+		res = compile_asm (acfg);
+		if (res != 0) {
+			acfg_free (acfg);
+			return res;
+		}
+	}
+	TV_GETTIME (btv);
+	acfg->stats.link_time = GINT64_TO_INT (TV_ELAPSED (atv, btv));
+
+	return 0;
+}
+
+static int
+emit_aot_image (MonoAotCompile *acfg)
+{
+	int res;
+	TV_DECLARE (atv);
+	TV_DECLARE (btv);
+
+	TV_GETTIME (atv);
+
+	if (acfg->aot_opts.asm_only && !acfg->aot_opts.llvm_only) {
+		acfg->fp = g_fopen (acfg->asm_fname, "w+");
+	} else {
+		if (strcmp (acfg->aot_opts.temp_path, "") == 0) {
+			acfg->fp = fdopen (g_file_open_tmp ("mono_aot_XXXXXX", &acfg->asm_fname, NULL), "w+");
+		} else {
+			acfg->fp = g_fopen (acfg->asm_fname, "w+");
 		}
 	}
 	if (acfg->fp == 0 && !acfg->aot_opts.llvm_only) {
-		aot_printerrf (acfg, "Unable to open file '%s': %s\n", acfg->tmpfname, strerror (errno));
+		aot_printerrf (acfg, "Unable to open file '%s': %s\n", acfg->asm_fname, strerror (errno));
 		return 1;
 	}
 	if (acfg->fp)
@@ -15471,13 +15593,8 @@ emit_aot_image (MonoAotCompile *acfg)
 		fclose (acfg->data_outfile);
 
 #ifdef ENABLE_LLVM
-	if (acfg->llvm) {
-		gboolean emit_res;
-
-		emit_res = emit_llvm_file (acfg);
-		if (!emit_res)
-			return 1;
-	}
+	if (acfg->llvm)
+		mono_llvm_emit_aot_module (acfg->bc_fname, g_path_get_basename (acfg->image->name));
 #endif
 
 	emit_library_info (acfg);
@@ -15489,26 +15606,13 @@ emit_aot_image (MonoAotCompile *acfg)
 	if (!acfg->aot_opts.stats)
 		aot_printf (acfg, "Compiled: %d/%d\n", acfg->stats.ccount, acfg->stats.mcount);
 
-	TV_GETTIME (atv);
 	if (acfg->w) {
 		res = mono_img_writer_emit_writeout (acfg->w);
 		if (res != 0) {
 			acfg_free (acfg);
 			return res;
 		}
-		res = compile_asm (acfg);
-		if (res != 0) {
-			acfg_free (acfg);
-			return res;
-		}
 	}
-	TV_GETTIME (btv);
-	acfg->stats.link_time = GINT64_TO_INT (TV_ELAPSED (atv, btv));
-
-	if (acfg->aot_opts.stats)
-		print_stats (acfg);
-
-	aot_printf (acfg, "JIT time: %d ms, Generation time: %d ms, Assembly+Link time: %d ms.\n", acfg->stats.jit_time / 1000, acfg->stats.gen_time / 1000, acfg->stats.link_time / 1000);
 
 	if (acfg->aot_opts.depfile)
 		create_depfile (acfg);
@@ -15516,6 +15620,19 @@ emit_aot_image (MonoAotCompile *acfg)
 	if (acfg->aot_opts.dump_json)
 		aot_dump (acfg);
 
+	if (acfg->aot_opts.child)
+		/* The rest is done in the parent */
+		return 0;
+
+	res = assemble_link (acfg);
+	if (res)
+		return res;
+
+	if (acfg->aot_opts.stats)
+		print_stats (acfg);
+
+	aot_printf (acfg, "JIT time: %d ms, Generation time: %d ms, Assembly+Link time: %d ms.\n", acfg->stats.jit_time / 1000, acfg->stats.gen_time / 1000, acfg->stats.link_time / 1000);
+
 	if (!acfg->aot_opts.save_temps && acfg->temp_dir_to_delete) {
 		char *command = g_strdup_printf ("rm -r %s", acfg->temp_dir_to_delete);
 		execute_system (command);
@@ -15527,14 +15644,96 @@ emit_aot_image (MonoAotCompile *acfg)
 	return 0;
 }
 
+static int
+compile_assemblies_in_child (MonoAotOptions *aot_opts, MonoAssembly **assemblies, int nassemblies, GPtrArray *runtime_args, const char *aot_options)
+{
+	FILE *response = NULL;
+	char *response_fname = NULL;
+
+	/* Find --aot argument */
+	int aot_index = -1;
+	for (guint32 i = 1; i < runtime_args->len; ++i) {
+		const char *arg = (const char*)g_ptr_array_index (runtime_args, i);
+		if (strncmp (arg, "--aot=", strlen ("--aot=")) == 0) {
+			aot_index = i;
+			break;
+		}
+	}
+	g_assert (aot_index != -1);
+
+#ifdef HOST_WIN32
+	response_fname = g_build_filename (aot_opts->temp_path, "temp.rsp", (const char*)NULL);
+	g_assert (response_fname);
+	response = fopen (response_fname, "w");
+	g_assert (response);
+#endif
+
+	GString *command;
+
+	command = g_string_new ("");
+
+	g_string_append_printf (command, "%s", (const char*)g_ptr_array_index (runtime_args, 0));
+
+	for (guint32 i = 1; i < runtime_args->len; ++i) {
+		const char *arg = (const char*)g_ptr_array_index (runtime_args, i);
+		if (strncmp (arg, "--response=", strlen ("--response=")) == 0)
+			/* Already expanded */
+			continue;
+		if (i != aot_index) {
+			if (response)
+				fprintf (response, "%s\n", arg);
+			else
+				g_string_append_printf (command, " \"%s\"", arg);
+		}
+	}
+
+	/* Pass '_child' instead of 'compile-in-child' */
+	GPtrArray *aot_split_args = mono_aot_split_options (aot_options);
+	GString *new_aot_args = g_string_new ("");
+	for (guint32 i = 0; i < aot_split_args->len; ++i) {
+		const char *aot_arg = (const char*)g_ptr_array_index (aot_split_args, i);
+		if (!strcmp (aot_arg, "compile-in-child"))
+			aot_arg = "_child";
+		if (i > 0)
+			g_string_append_printf (new_aot_args, ",");
+		g_string_append_printf (new_aot_args, "%s", aot_arg);
+	}
+
+	if (response)
+		fprintf (response, "\"--aot=%s\"\n", g_string_free (new_aot_args, FALSE));
+	else
+		g_string_append_printf (command, " \"--aot=%s\"", g_string_free (new_aot_args, FALSE));
+
+	for (int i = 0; i < nassemblies; ++i) {
+		if (response)
+			fprintf (response, "\"%s\"\n", assemblies [i]->image->name);
+		else
+			g_string_append_printf (command, " \"%s\"", assemblies [i]->image->name);
+	}
+
+	if (response)
+		fclose (response);
+
+	if (response)
+		g_string_append_printf (command, " \"--response=%s\"", response_fname);
+	char *cmd = g_string_free (command, FALSE);
+	printf ("Executing: %s\n", cmd);
+	int res = execute_system (cmd);
+	g_free (cmd);
+
+	return res;
+}
+
 int
-mono_aot_assemblies (MonoAssembly **assemblies, int nassemblies, guint32 jit_opts, const char *aot_options)
+mono_aot_assemblies (MonoAssembly **assemblies, int nassemblies, guint32 jit_opts, GPtrArray *runtime_args, const char *aot_options)
 {
 	int res = 0;
 	MonoAotOptions aot_opts;
 
 	init_options (&aot_opts);
 	mono_aot_parse_options (aot_options, &aot_opts);
+	aot_opts.runtime_args = runtime_args;
+	aot_opts.aot_options = aot_options;
 	if (aot_opts.direct_extern_calls && !(aot_opts.llvm && aot_opts.static_link)) {
 		fprintf (stderr, "The 'direct-extern-calls' option requires the 'llvm' and 'static' options.\n");
 		res = 1;
@@ -15547,6 +15746,22 @@ mono_aot_assemblies (MonoAssembly **assemblies, int nassemblies, guint32 jit_opt
 		goto early_exit;
 	}
 
+	if (aot_opts.compile_in_child) {
+		if (aot_opts.temp_path [0] == '\0') {
+			fprintf (stderr, "The 'compile-in-child' option requires the 'temp-path=' option.\n");
+			res = 1;
+			goto early_exit;
+		}
+		if (nassemblies > 1 && !aot_opts.dedup_include)
+			aot_opts.compile_in_child = FALSE;
+	}
+
+	if (aot_opts.dedup_include && aot_opts.compile_in_child) {
+		res = compile_assemblies_in_child (&aot_opts, assemblies, nassemblies, aot_opts.runtime_args, aot_opts.aot_options);
+		if (res)
+			return res;
+	}
+
 	if (aot_opts.dedup_include) {
 		/* Find the assembly which will contain the dedup-ed code */
 		int dedup_aindex = -1;
@@ -15570,6 +15785,7 @@ mono_aot_assemblies (MonoAssembly **assemblies, int nassemblies, guint32 jit_opt
 		assemblies [dedup_aindex] = atmp;
 
 		dedup_methods = g_hash_table_new (NULL, NULL);
+		dedup_methods_list = g_ptr_array_new ();
 	}
 
 	if (aot_opts.trimming_eligible_methods_outfile) {
@@ -15636,7 +15852,7 @@ mono_aot_get_method_index (MonoMethod *method)
 }
 
 int
-mono_aot_assemblies (MonoAssembly **assemblies, int nassemblies, guint32 opts, const char *aot_options)
+mono_aot_assemblies (MonoAssembly **assemblies, int nassemblies, guint32 jit_opts, GPtrArray *runtime_args, const char *aot_options)
 {
 	return 0;
 }
diff --git a/src/mono/mono/mini/aot-compiler.h b/src/mono/mono/mini/aot-compiler.h
index 7aaf862f2e92..5856134b8eff 100644
--- a/src/mono/mono/mini/aot-compiler.h
+++ b/src/mono/mono/mini/aot-compiler.h
@@ -7,7 +7,7 @@
 
 #include "mini.h"
 
-int mono_aot_assemblies (MonoAssembly **assemblies, int nassemblies, guint32 opts, const char *aot_options);
+int mono_aot_assemblies (MonoAssembly **assemblies, int nassemblies, guint32 opts, GPtrArray *runtime_args, const char *aot_options);
 void* mono_aot_readonly_field_override (MonoClassField *field);
 gboolean mono_aot_direct_icalls_enabled_for_method (MonoCompile *cfg, MonoMethod *method);
 gboolean mono_aot_is_shared_got_offset (int offset);
diff --git a/src/mono/mono/mini/aot-runtime-wasm.c b/src/mono/mono/mini/aot-runtime-wasm.c
index cf1ab0239293..30fde73c155b 100644
--- a/src/mono/mono/mini/aot-runtime-wasm.c
+++ b/src/mono/mono/mini/aot-runtime-wasm.c
@@ -15,8 +15,12 @@
 #ifdef HOST_WASM
 
 static char
-type_to_c (MonoType *t)
+type_to_c (MonoType *t, gboolean *is_byref_return)
 {
+	g_assert (t);
+
+	if (is_byref_return)
+		*is_byref_return = 0;
 	if (m_type_is_byref (t))
 		return 'I';
 
@@ -48,7 +52,7 @@ type_to_c (MonoType *t)
 		return 'L';
 	case MONO_TYPE_VOID:
 		return 'V';
-	case MONO_TYPE_VALUETYPE:
+	case MONO_TYPE_VALUETYPE: {
 		if (m_class_is_enumtype (t->data.klass)) {
 			t = mono_class_enum_basetype_internal (t->data.klass);
 			goto handle_enum;
@@ -60,13 +64,27 @@ type_to_c (MonoType *t)
 		// FIXME: Handle the scenario where there are fields of struct types that contain no members
 		MonoType *scalar_vtype;
 		if (mini_wasm_is_scalar_vtype (t, &scalar_vtype))
-			return type_to_c (scalar_vtype);
+			return type_to_c (scalar_vtype, NULL);
+
+		if (is_byref_return)
+			*is_byref_return = 1;
 
 		return 'I';
-	case MONO_TYPE_GENERICINST:
-		if (m_class_is_valuetype (t->data.klass))
+	}
+	case MONO_TYPE_GENERICINST: {
+		if (m_class_is_valuetype (t->data.klass)) {
+			MonoType *scalar_vtype;
+			if (mini_wasm_is_scalar_vtype (t, &scalar_vtype))
+				return type_to_c (scalar_vtype, NULL);
+
+			if (is_byref_return)
+				*is_byref_return = 1;
+
 			return 'S';
+		}
+
 		return 'I';
+	}
 	default:
 		g_warning ("CANT TRANSLATE %s", mono_type_full_name (t));
 		return 'X';
@@ -140,18 +158,29 @@ gpointer
 mono_wasm_get_interp_to_native_trampoline (MonoMethodSignature *sig)
 {
 	char cookie [32];
-	int c_count;
+	int c_count, offset = 1;
+	gboolean is_byref_return = 0;
+
+	memset (cookie, 0, 32);
+	cookie [0] = type_to_c (sig->ret, &is_byref_return);
 
-	c_count = sig->param_count + sig->hasthis + 1;
+	c_count = sig->param_count + sig->hasthis + is_byref_return + 1;
 	g_assert (c_count < sizeof (cookie)); //ensure we don't overflow the local
 
-	cookie [0] = type_to_c (sig->ret);
-	if (sig->hasthis)
-		cookie [1] = 'I';
+	if (is_byref_return) {
+		cookie[0] = 'V';
+		// return value address goes in arg0
+		cookie[1] = 'I';
+		offset += 1;
+	}
+	if (sig->hasthis) {
+		// thisptr goes in arg0/arg1 depending on return type
+		cookie [offset] = 'I';
+		offset += 1;
+	}
 	for (int i = 0; i < sig->param_count; ++i) {
-		cookie [1 + sig->hasthis + i] = type_to_c (sig->params [i]);
+		cookie [offset + i] = type_to_c (sig->params [i], NULL);
 	}
-	cookie [c_count] = 0;
 
 	void *p = mono_wasm_interp_to_native_callback (cookie);
 	if (!p)
diff --git a/src/mono/mono/mini/cpu-riscv64.mdesc b/src/mono/mono/mini/cpu-riscv64.mdesc
index eba087a9b590..6adf5a6755d9 100644
--- a/src/mono/mono/mini/cpu-riscv64.mdesc
+++ b/src/mono/mono/mini/cpu-riscv64.mdesc
@@ -35,17 +35,18 @@ dummy_use: src1:i len:0
 il_seq_point: len:0
 seq_point: len:0
 
+arglist: src1:i len:12
 check_this: src1:b len:4
 get_ex_obj: dest:i len:4
 gc_safe_point: src1:i len:12 clob:c
-start_handler: len:16 clob:c
+start_handler: len:36 clob:c
 call_handler: len:4 clob:c
 endfinally: len:32
 endfilter: src1:i len:32
 localloc: dest:i src1:i len:52
 localloc_imm: dest:i len:28
 generic_class_init: src1:a len:12 clob:c
-ckfinite: dest:f src1:f len:28
+ckfinite: dest:f src1:f len:32
 break: len:4
 
 throw: src1:i len:4
@@ -54,21 +55,21 @@ rethrow: src1:i len:4
 br: len:4
 br_reg: src1:i len:4
 jump_table: dest:i len:16
-call: dest:a len:4 clob:c
-call_reg: dest:a src1:i len:4 clob:c
-call_membase: dest:a src1:b len:20 clob:c
-voidcall: len:4 clob:c
-voidcall_reg: src1:i len:4 clob:c
-voidcall_membase: src1:b len:20 clob:c
-vcall2: len:16 clob:c
+call: dest:a len:28 clob:c
+call_reg: dest:a src1:i len:16 clob:c
+call_membase: dest:a src1:b len:32 clob:c
+voidcall: len:28 clob:c
+voidcall_reg: src1:i len:16 clob:c
+voidcall_membase: src1:b len:32 clob:c
+vcall2: len:28 clob:c
 vcall2_reg: src1:i len:16 clob:c
-vcall2_membase: src1:b len:28 clob:c
-fcall: dest:f len:8 clob:c
-fcall_reg: dest:f src1:i len:8 clob:c
-fcall_membase: dest:f src1:b len:12 clob:c
-rcall: dest:f len:8 clob:c
-rcall_reg: dest:f src1:i len:8 clob:c
-rcall_membase: dest:f src1:b len:12 clob:c
+vcall2_membase: src1:b len:32 clob:c
+fcall: dest:f len:28 clob:c
+fcall_reg: dest:f src1:i len:16 clob:c
+fcall_membase: dest:f src1:b len:32 clob:c
+rcall: dest:f len:28 clob:c
+rcall_reg: dest:f src1:i len:16 clob:c
+rcall_membase: dest:f src1:b len:32 clob:c
 
 # Note: in RV32, it shoule be 
 # lcall: dest:l ...
@@ -101,8 +102,8 @@ loadr4_membase: dest:f src1:b len:24
 loadr8_membase: dest:f src1:b len:24
 
 memory_barrier: len:4
-atomic_add_i4: dest:i src1:i src2:i len:4
-atomic_add_i8: dest:i src1:i src2:i len:4
+atomic_add_i4: dest:i src1:i src2:i len:8
+atomic_add_i8: dest:i src1:i src2:i len:8
 atomic_store_i1: dest:b src1:i len:8
 atomic_store_u1: dest:b src1:i len:8
 atomic_store_i2: dest:b src1:i len:8
@@ -113,7 +114,7 @@ atomic_store_i8: dest:b src1:i len:8
 atomic_store_u8: dest:b src1:i len:8
 atomic_load_i1: dest:b src1:i len:12
 atomic_load_u1: dest:b src1:i len:12
-atomic_load_i2: dest:b src1:i len:12
+atomic_load_i2: dest:b src1:i len:24
 atomic_load_u2: dest:b src1:i len:12
 atomic_load_i4: dest:b src1:i len:12
 atomic_load_u4: dest:b src1:i len:12
@@ -137,18 +138,18 @@ iconst: dest:i len:16
 int_add: dest:i src1:i src2:i len:4
 int_sub: dest:i src1:i src2:i len:4
 int_mul: dest:i src1:i src2:i len:4
-int_div: dest:i src1:i src2:i len:32
+int_div: dest:i src1:i src2:i len:80
 int_div_un: dest:i src1:i src2:i len:32
-int_rem: dest:i src1:i src2:i len:32
+int_rem: dest:i src1:i src2:i len:80
 int_rem_un: dest:i src1:i src2:i len:32
 
 i8const: dest:i len:16
 long_add: dest:i src1:i src2:i len:4
 long_sub: dest:i src1:i src2:i len:4
 long_mul: dest:i src1:i src2:i len:4
-long_div: dest:i src1:i src2:i len:32
+long_div: dest:i src1:i src2:i len:80
 long_div_un: dest:i src1:i src2:i len:32
-long_rem: dest:i src1:i src2:i len:32
+long_rem: dest:i src1:i src2:i len:80
 long_rem_un: dest:i src1:i src2:i len:32
 
 r8const: dest:f len:16
diff --git a/src/mono/mono/mini/driver.c b/src/mono/mono/mini/driver.c
index 2077f30d412d..56e20eeb5eb0 100644
--- a/src/mono/mono/mini/driver.c
+++ b/src/mono/mono/mini/driver.c
@@ -1386,6 +1386,7 @@ typedef struct
 	char **argv;
 	guint32 opts;
 	char *aot_options;
+	GPtrArray *runtime_args;
 } MainThreadArgs;
 
 static void
@@ -1421,7 +1422,7 @@ main_thread_handler (gpointer user_data)
 			assemblies [i] = assembly;
 		}
 
-		res = mono_aot_assemblies (assemblies, main_args->argc, main_args->opts, main_args->aot_options);
+		res = mono_aot_assemblies (assemblies, main_args->argc, main_args->opts, main_args->runtime_args, main_args->aot_options);
 		if (res)
 			exit (1);
 		return;
@@ -1767,7 +1768,7 @@ mono_jit_parse_options (int argc, char * argv[])
 	memcpy (copy_argv, argv, sizeof (char*) * argc);
 	argv = copy_argv;
 
-	mono_options_parse_options ((const char**)argv, argc, &argc, error);
+	mono_options_parse_options ((const char**)argv, argc, &argc, NULL, error);
 	if (!is_ok (error)) {
 		g_printerr ("%s", mono_error_get_message (error));
 		mono_error_cleanup (error);
@@ -2067,6 +2068,7 @@ mono_main (int argc, char* argv[])
 	GPtrArray *agents = NULL;
 	char *extra_bindings_config_file = NULL;
 	GList *paths = NULL;
+	GPtrArray *args;
 #ifdef MONO_JIT_INFO_TABLE_TEST
 	int test_jit_info_table = FALSE;
 #endif
@@ -2095,7 +2097,9 @@ mono_main (int argc, char* argv[])
 
 	enable_debugging = TRUE;
 
-	mono_options_parse_options ((const char**)argv + 1, argc - 1, &argc, error);
+	args = g_ptr_array_new ();
+
+	mono_options_parse_options ((const char**)argv + 1, argc - 1, &argc, args, error);
 	argc ++;
 	if (!is_ok (error)) {
 		g_printerr ("%s", mono_error_get_message (error));
@@ -2103,9 +2107,11 @@ mono_main (int argc, char* argv[])
 		return 1;
 	}
 
+	g_ptr_array_add (args, argv [0]);
 	for (i = 1; i < argc; ++i) {
 		if (argv [i] [0] != '-')
 			break;
+		g_ptr_array_add (args, argv [i]);
 		if (strcmp (argv [i], "--regression") == 0) {
 			action = DO_REGRESSION;
 		} else if (strncmp (argv [i], "--single-method=", 16) == 0) {
@@ -2464,7 +2470,7 @@ mono_main (int argc, char* argv[])
 
 			/* Parse newly added options */
 			int n = argc;
-			mono_options_parse_options ((const char**)(argv + orig_argc), argc - orig_argc, &n, error);
+			mono_options_parse_options ((const char**)(argv + orig_argc), argc - orig_argc, &n, args, error);
 			if (!is_ok (error)) {
 				g_printerr ("%s", mono_error_get_message (error));
 				mono_error_cleanup (error);
@@ -2649,6 +2655,7 @@ mono_main (int argc, char* argv[])
 		main_args.argv = argv + i;
 		main_args.opts = opt;
 		main_args.aot_options = aot_options;
+		main_args.runtime_args = args;
 		main_thread_handler (&main_args);
 		mono_thread_manage_internal ();
 
diff --git a/src/mono/mono/mini/exceptions-ppc.c b/src/mono/mono/mini/exceptions-ppc.c
index 146fece23692..c7537a258acf 100644
--- a/src/mono/mono/mini/exceptions-ppc.c
+++ b/src/mono/mono/mini/exceptions-ppc.c
@@ -838,5 +838,8 @@ mono_arch_setup_resume_sighandler_ctx (MonoContext *ctx, gpointer func)
 	ctx->regs[2] = (gulong)handler_ftnptr->toc;
 #else
 	MONO_CONTEXT_SET_IP(ctx, (unsigned long) func);
+#ifdef TARGET_POWERPC64
+        ctx->regs[12] = (gulong)func;
+#endif
 #endif
 }
diff --git a/src/mono/mono/mini/exceptions-s390x.c b/src/mono/mono/mini/exceptions-s390x.c
index 6c40214a13f7..a5c92c01ec4c 100644
--- a/src/mono/mono/mini/exceptions-s390x.c
+++ b/src/mono/mono/mini/exceptions-s390x.c
@@ -515,7 +515,6 @@ mono_arch_unwind_frame (MonoJitTlsData *jit_tls,
 	*new_ctx = *ctx;
 
 	if (ji != NULL) {
-		uintptr_t address;
 		guint8 *cfa;
 		guint32 unwind_info_len;
 		guint8 *unwind_info;
@@ -528,8 +527,6 @@ mono_arch_unwind_frame (MonoJitTlsData *jit_tls,
 
 		unwind_info = mono_jinfo_get_unwind_info (ji, &unwind_info_len);
 
-		address = (char *)ip - (char *)ji->code_start;
-
 		if (ji->has_arch_eh_info)
 			epilog = (guint8*)ji->code_start + ji->code_size - mono_jinfo_get_epilog_size (ji);
 
diff --git a/src/mono/mono/mini/interp/interp-internals.h b/src/mono/mono/mini/interp/interp-internals.h
index 72ef097eac22..a815f39f9c8e 100644
--- a/src/mono/mono/mini/interp/interp-internals.h
+++ b/src/mono/mono/mini/interp/interp-internals.h
@@ -145,6 +145,7 @@ struct InterpMethod {
 	MonoFtnDesc *ftndesc_unbox;
 	MonoDelegateTrampInfo *del_info;
 
+	/* locals_size is equal to the offset of the param_area */
 	guint32 locals_size;
 	guint32 alloca_size;
 	int num_clauses; // clauses
@@ -153,6 +154,7 @@ struct InterpMethod {
 	unsigned int hasthis; // boolean
 	MonoProfilerCallInstrumentationFlags prof_flags;
 	InterpMethodCodeType code_type;
+	MonoBitSet *ref_slots;
 #ifdef ENABLE_EXPERIMENT_TIERED
 	MiniTieredCounter tiered_counter;
 #endif
@@ -181,8 +183,6 @@ struct InterpMethod {
 	unsigned int is_verbose : 1;
 #if HOST_BROWSER
 	unsigned int contains_traces : 1;
-	guint16 *backward_branch_offsets;
-	unsigned int backward_branch_offsets_count;
 	MonoBitSet *address_taken_bits;
 #endif
 #if PROFILE_INTERP
@@ -270,11 +270,20 @@ typedef struct {
 	guchar *stack_pointer;
 	/* Used for allocation of localloc regions */
 	FrameDataAllocator data_stack;
+	/* If bit n is set, it means that the n-th stack slot (pointer sized) from stack_start doesn't contain any refs */
+	guint8 *no_ref_slots;
 } ThreadContext;
 
 typedef struct {
 	gint64 transform_time;
 	gint64 methods_transformed;
+	gint64 optimize_time;
+	gint64 ssa_compute_time;
+	gint64 ssa_compute_dominance_time;
+	gint64 ssa_compute_global_vars_time;
+	gint64 ssa_compute_pruned_liveness_time;
+	gint64 ssa_rename_vars_time;
+	gint64 optimize_bblocks_time;
 	gint64 cprop_time;
 	gint64 super_instructions_time;
 	gint32 emitted_instructions;
diff --git a/src/mono/mono/mini/interp/interp-intrins.c b/src/mono/mono/mini/interp/interp-intrins.c
index c9e7b5ba2a7e..1489ddfa09c4 100644
--- a/src/mono/mono/mini/interp/interp-intrins.c
+++ b/src/mono/mono/mini/interp/interp-intrins.c
@@ -16,7 +16,7 @@ rotate_left (guint32 value, int offset)
 }
 
 void
-interp_intrins_marvin_block (guint32 *pp0, guint32 *pp1)
+interp_intrins_marvin_block (guint32 *pp0, guint32 *pp1, guint32 *dest0, guint32 *dest1)
 {
 	// Marvin.Block
 	guint32 p0 = *pp0;
@@ -34,8 +34,8 @@ interp_intrins_marvin_block (guint32 *pp0, guint32 *pp1)
 	p0 += p1;
 	p1 = rotate_left (p1, 19);
 
-	*pp0 = p0;
-	*pp1 = p1;
+	*dest0 = p0;
+	*dest1 = p1;
 }
 
 guint32
diff --git a/src/mono/mono/mini/interp/interp-intrins.h b/src/mono/mono/mini/interp/interp-intrins.h
index 1e3b218af927..98229f26d413 100644
--- a/src/mono/mono/mini/interp/interp-intrins.h
+++ b/src/mono/mono/mini/interp/interp-intrins.h
@@ -124,7 +124,7 @@ interp_intrins_popcount_i8 (guint64 val)
 #endif
 
 void
-interp_intrins_marvin_block (guint32 *pp0, guint32 *pp1);
+interp_intrins_marvin_block (guint32 *pp0, guint32 *pp1, guint32 *dest0, guint32 *dest1);
 
 guint32
 interp_intrins_ascii_chars_to_uppercase (guint32 val);
diff --git a/src/mono/mono/mini/interp/interp-simd.c b/src/mono/mono/mini/interp/interp-simd.c
index edaa7615652e..cdc6a951c5f9 100644
--- a/src/mono/mono/mini/interp/interp-simd.c
+++ b/src/mono/mono/mini/interp/interp-simd.c
@@ -315,47 +315,15 @@ interp_v128_u2_widen_upper (gpointer res, gpointer v1)
 static void
 interp_v128_u1_narrow (gpointer res, gpointer v1, gpointer v2)
 {
-	guint8 *res_typed = (guint8*)res;
+	guint8 res_typed [SIZEOF_V128];
 	guint16 *v1_typed = (guint16*)v1;
 	guint16 *v2_typed = (guint16*)v2;
 
-	if (res != v2) {
-		res_typed [0] = v1_typed [0];
-		res_typed [1] = v1_typed [1];
-		res_typed [2] = v1_typed [2];
-		res_typed [3] = v1_typed [3];
-		res_typed [4] = v1_typed [4];
-		res_typed [5] = v1_typed [5];
-		res_typed [6] = v1_typed [6];
-		res_typed [7] = v1_typed [7];
-
-		res_typed [8] = v2_typed [0];
-		res_typed [9] = v2_typed [1];
-		res_typed [10] = v2_typed [2];
-		res_typed [11] = v2_typed [3];
-		res_typed [12] = v2_typed [4];
-		res_typed [13] = v2_typed [5];
-		res_typed [14] = v2_typed [6];
-		res_typed [15] = v2_typed [7];
-	} else {
-		res_typed [15] = v2_typed [7];
-		res_typed [14] = v2_typed [6];
-		res_typed [13] = v2_typed [5];
-		res_typed [12] = v2_typed [4];
-		res_typed [11] = v2_typed [3];
-		res_typed [10] = v2_typed [2];
-		res_typed [9] = v2_typed [1];
-		res_typed [8] = v2_typed [0];
-
-		res_typed [0] = v1_typed [0];
-		res_typed [1] = v1_typed [1];
-		res_typed [2] = v1_typed [2];
-		res_typed [3] = v1_typed [3];
-		res_typed [4] = v1_typed [4];
-		res_typed [5] = v1_typed [5];
-		res_typed [6] = v1_typed [6];
-		res_typed [7] = v1_typed [7];
-	}
+	for (int i = 0; i < 8; i++)
+		res_typed [i] = v1_typed [i];
+	for (int i = 0; i < 8; i++)
+		res_typed [i + 8] = v2_typed [i];
+	memcpy (res, res_typed, SIZEOF_V128);
 }
 
 // GreaterThan
diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c
index 59f7c09e5306..f8e2ad02ad95 100644
--- a/src/mono/mono/mini/interp/interp.c
+++ b/src/mono/mono/mini/interp/interp.c
@@ -412,6 +412,9 @@ get_context (void)
 	if (context == NULL) {
 		context = g_new0 (ThreadContext, 1);
 		context->stack_start = (guchar*)mono_valloc_aligned (INTERP_STACK_SIZE, MINT_STACK_ALIGNMENT, MONO_MMAP_READ | MONO_MMAP_WRITE, MONO_MEM_ACCOUNT_INTERP_STACK);
+		// A bit for every pointer sized slot in the stack. FIXME don't allocate whole bit array
+		if (mono_interp_opt & INTERP_OPT_PRECISE_GC)
+			context->no_ref_slots = (guchar*)mono_valloc (NULL, INTERP_STACK_SIZE / (8 * sizeof (gpointer)), MONO_MMAP_READ | MONO_MMAP_WRITE, MONO_MEM_ACCOUNT_INTERP_STACK);
 		context->stack_end = context->stack_start + INTERP_STACK_SIZE - INTERP_REDZONE_SIZE;
 		context->stack_real_end = context->stack_start + INTERP_STACK_SIZE;
 		/* We reserve a stack slot at the top of the interp stack to make temp objects visible to GC */
@@ -1340,7 +1343,10 @@ typedef enum {
 	PINVOKE_ARG_R8 = 3,
 	PINVOKE_ARG_R4 = 4,
 	PINVOKE_ARG_VTYPE = 5,
-	PINVOKE_ARG_SCALAR_VTYPE = 6
+	PINVOKE_ARG_SCALAR_VTYPE = 6,
+	// This isn't ifdefed so it's easier to write code that handles it without sprinkling
+	//  800 ifdefs in this file
+	PINVOKE_ARG_WASM_VALUETYPE_RESULT = 7,
 } PInvokeArgType;
 
 typedef struct {
@@ -1436,6 +1442,7 @@ get_build_args_from_sig_info (MonoMemoryManager *mem_manager, MonoMethodSignatur
 			ilen++;
 			break;
 		case MONO_TYPE_GENERICINST: {
+			// FIXME: Should mini_wasm_is_scalar_vtype stuff go in here?
 			MonoClass *container_class = type->data.generic_class->container_class;
 			type = m_class_get_byval_arg (container_class);
 			goto retry;
@@ -1473,11 +1480,32 @@ get_build_args_from_sig_info (MonoMemoryManager *mem_manager, MonoMethodSignatur
 		case MONO_TYPE_CLASS:
 		case MONO_TYPE_OBJECT:
 		case MONO_TYPE_STRING:
+			info->ret_pinvoke_type = PINVOKE_ARG_INT;
+			break;
+#if SIZEOF_VOID_P == 8
+		case MONO_TYPE_I8:
+		case MONO_TYPE_U8:
+#endif
+			info->ret_pinvoke_type = PINVOKE_ARG_INT;
+			break;
+#if SIZEOF_VOID_P == 4
 		case MONO_TYPE_I8:
 		case MONO_TYPE_U8:
+			info->ret_pinvoke_type = PINVOKE_ARG_INT;
+			break;
+#endif
 		case MONO_TYPE_VALUETYPE:
 		case MONO_TYPE_GENERICINST:
 			info->ret_pinvoke_type = PINVOKE_ARG_INT;
+#ifdef HOST_WASM
+			// This ISSTRUCT check is important, because the type could be an enum
+			if (MONO_TYPE_ISSTRUCT (info->ret_mono_type)) {
+				// The return type was already filtered previously, so if we get here
+				//  we're returning a struct byref instead of as a scalar
+				info->ret_pinvoke_type = PINVOKE_ARG_WASM_VALUETYPE_RESULT;
+				info->ilen++;
+			}
+#endif
 			break;
 		case MONO_TYPE_R4:
 		case MONO_TYPE_R8:
@@ -1503,6 +1531,15 @@ build_args_from_sig (InterpMethodArguments *margs, MonoMethodSignature *sig, Bui
 	margs->ilen = info->ilen;
 	margs->flen = info->flen;
 
+	size_t int_i = 0;
+	size_t int_f = 0;
+
+	if (info->ret_pinvoke_type == PINVOKE_ARG_WASM_VALUETYPE_RESULT) {
+		// Allocate an empty arg0 for the address of the return value
+		// info->ilen was already increased earlier
+		int_i++;
+	}
+
 	if (margs->ilen > 0) {
 		if (margs->ilen <= 8)
 			margs->iargs = margs->iargs_buf;
@@ -1517,9 +1554,6 @@ build_args_from_sig (InterpMethodArguments *margs, MonoMethodSignature *sig, Bui
 			margs->fargs = g_malloc0 (sizeof (double) * margs->flen);
 	}
 
-	size_t int_i = 0;
-	size_t int_f = 0;
-
 	for (int i = 0; i < sig->param_count; i++) {
 		guint32 offset = get_arg_offset (frame->imethod, sig, i);
 		stackval *sp_arg = STACK_ADD_BYTES (frame->stack, offset);
@@ -1578,6 +1612,15 @@ build_args_from_sig (InterpMethodArguments *margs, MonoMethodSignature *sig, Bui
 	}
 
 	switch (info->ret_pinvoke_type) {
+	case PINVOKE_ARG_WASM_VALUETYPE_RESULT:
+		// We pass the return value address in arg0 so fill it in, we already
+		//  reserved space for it earlier.
+		g_assert (frame->retval);
+		margs->iargs[0] = (gpointer*)frame->retval;
+		// The return type is void so retval should be NULL
+		margs->retval = NULL;
+		margs->is_float_ret = 0;
+		break;
 	case PINVOKE_ARG_INT:
 		margs->retval = (gpointer*)frame->retval;
 		margs->is_float_ret = 0;
@@ -1795,8 +1838,10 @@ ves_pinvoke_method (
 	g_free (ccontext.stack);
 #else
 	// Only the vt address has been returned, we need to copy the entire content on interp stack
-	if (!context->has_resume_state && MONO_TYPE_ISSTRUCT (call_info->ret_mono_type))
-		stackval_from_data (call_info->ret_mono_type, frame.retval, (char*)frame.retval->data.p, sig->pinvoke && !sig->marshalling_disabled);
+	if (!context->has_resume_state && MONO_TYPE_ISSTRUCT (call_info->ret_mono_type)) {
+		if (call_info->ret_pinvoke_type != PINVOKE_ARG_WASM_VALUETYPE_RESULT)
+			stackval_from_data (call_info->ret_mono_type, frame.retval, (char*)frame.retval->data.p, sig->pinvoke && !sig->marshalling_disabled);
+	}
 
 	if (margs.iargs != margs.iargs_buf)
 		g_free (margs.iargs);
@@ -3436,9 +3481,6 @@ interp_create_method_pointer (MonoMethod *method, gboolean compile, MonoError *e
 		return (gpointer)no_llvmonly_interp_method_pointer;
 	}
 
-	if (method->wrapper_type && method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE)
-		return imethod;
-
 #ifndef MONO_ARCH_HAVE_FTNPTR_ARG_TRAMPOLINE
 	/*
 	 * Interp in wrappers get the argument in the rgctx register. If
@@ -3821,11 +3863,6 @@ max_d (double lhs, double rhs)
 		return fmax (lhs, rhs);
 }
 
-#if HOST_BROWSER
-// Dummy call info used outside of monitoring phase. We don't care what's in it
-static JiterpreterCallInfo jiterpreter_call_info = { 0 };
-#endif
-
 /*
  * If CLAUSE_ARGS is non-null, start executing from it.
  * The ERROR argument is used to avoid declaring an error object for every interp frame, its not used
@@ -3955,36 +3992,13 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause
 			MINT_IN_BREAK;
 		}
 
-#define LDC(n) do { LOCAL_VAR (ip [1], gint32) = (n); ip += 2; } while (0)
-		MINT_IN_CASE(MINT_LDC_I4_M1)
-			LDC(-1);
-			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_LDC_I4_0)
-			LDC(0);
+			LOCAL_VAR (ip [1], gint32) = 0;
+			ip += 2;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_LDC_I4_1)
-			LDC(1);
-			MINT_IN_BREAK;
-		MINT_IN_CASE(MINT_LDC_I4_2)
-			LDC(2);
-			MINT_IN_BREAK;
-		MINT_IN_CASE(MINT_LDC_I4_3)
-			LDC(3);
-			MINT_IN_BREAK;
-		MINT_IN_CASE(MINT_LDC_I4_4)
-			LDC(4);
-			MINT_IN_BREAK;
-		MINT_IN_CASE(MINT_LDC_I4_5)
-			LDC(5);
-			MINT_IN_BREAK;
-		MINT_IN_CASE(MINT_LDC_I4_6)
-			LDC(6);
-			MINT_IN_BREAK;
-		MINT_IN_CASE(MINT_LDC_I4_7)
-			LDC(7);
-			MINT_IN_BREAK;
-		MINT_IN_CASE(MINT_LDC_I4_8)
-			LDC(8);
+			LOCAL_VAR (ip [1], gint32) = 1;
+			ip += 2;
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_LDC_I4_S)
 			LOCAL_VAR (ip [1], gint32) = (short)ip [2];
@@ -4255,6 +4269,7 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause
 			call_args_offset = ip [2];
 
 			this_arg = LOCAL_VAR (call_args_offset, MonoObject*);
+			NULL_CHECK (this_arg);
 
 			slot = (gint16)ip [4];
 			ip += 5;
@@ -5266,6 +5281,10 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) + (gint16)ip [3];
 			ip += 4;
 			MINT_IN_BREAK;
+		MINT_IN_CASE(MINT_ADD_I4_IMM2)
+			LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) + (gint32)READ32 (ip + 3);
+			ip += 5;
+			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_ADD1_I8)
 			LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) + 1;
 			ip += 3;
@@ -5274,6 +5293,10 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) + (gint16)ip [3];
 			ip += 4;
 			MINT_IN_BREAK;
+		MINT_IN_CASE(MINT_ADD_I8_IMM2)
+			LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) + (gint32)READ32 (ip + 3);
+			ip += 5;
+			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_SUB_I4)
 			BINOP(gint32, -);
 			MINT_IN_BREAK;
@@ -5304,10 +5327,18 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) * (gint16)ip [3];
 			ip += 4;
 			MINT_IN_BREAK;
+		MINT_IN_CASE(MINT_MUL_I4_IMM2)
+			LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) * (gint32)READ32 (ip + 3);
+			ip += 5;
+			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_MUL_I8_IMM)
 			LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) * (gint16)ip [3];
 			ip += 4;
 			MINT_IN_BREAK;
+		MINT_IN_CASE(MINT_MUL_I8_IMM2)
+			LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) * (gint32)READ32 (ip + 3);
+			ip += 5;
+			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_ADD_MUL_I4_IMM)
 			LOCAL_VAR (ip [1], gint32) = (LOCAL_VAR (ip [2], gint32) + (gint16)ip [3]) * (gint16)ip [4];
 			ip += 5;
@@ -5415,12 +5446,28 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_AND_I4)
 			BINOP(gint32, &);
 			MINT_IN_BREAK;
+		MINT_IN_CASE(MINT_AND_I4_IMM)
+			LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) & (gint16)ip [3];
+			ip += 4;
+			MINT_IN_BREAK;
+		MINT_IN_CASE(MINT_AND_I4_IMM2)
+			LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) & READ32 (ip + 3);
+			ip += 5;
+			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_AND_I8)
 			BINOP(gint64, &);
 			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_OR_I4)
 			BINOP(gint32, |);
 			MINT_IN_BREAK;
+		MINT_IN_CASE(MINT_OR_I4_IMM)
+			LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) | (gint16)ip [3];
+			ip += 4;
+			MINT_IN_BREAK;
+		MINT_IN_CASE(MINT_OR_I4_IMM2)
+			LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) | READ32 (ip + 3);
+			ip += 5;
+			MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_OR_I8)
 			BINOP(gint64, |);
 			MINT_IN_BREAK;
@@ -5813,15 +5860,6 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			cmethod = (InterpMethod*)frame->imethod->data_items [imethod_index];
 			goto jit_call;
 		}
-		MINT_IN_CASE(MINT_NEWOBJ_VT_INLINED) {
-			guint16 ret_size = ip [3];
-			gpointer this_vt = locals + ip [2];
-
-			memset (this_vt, 0, ret_size);
-			LOCAL_VAR (ip [1], gpointer) = this_vt;
-			ip += 4;
-			MINT_IN_BREAK;
-		}
 		MINT_IN_CASE(MINT_NEWOBJ_SLOW) {
 			guint32 const token = ip [3];
 			return_offset = ip [1];
@@ -5964,8 +6002,13 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_INTRINS_MARVIN_BLOCK) {
-			interp_intrins_marvin_block ((guint32*)(locals + ip [1]), (guint32*)(locals + ip [2]));
-			ip += 3;
+			guint32 *pp0 = (guint32*)(locals + ip [1]);
+			guint32 *pp1 = (guint32*)(locals + ip [2]);
+			guint32 *dest0 = (guint32*)(locals + ip [3]);
+			guint32 *dest1 = (guint32*)(locals + ip [4]);
+
+			interp_intrins_marvin_block (pp0, pp1, dest0, dest1);
+			ip += 5;
 			MINT_IN_BREAK;
 		}
 		MINT_IN_CASE(MINT_INTRINS_ASCII_CHARS_TO_UPPERCASE) {
@@ -6378,6 +6421,13 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			MonoVTable *vtable = (MonoVTable*)frame->imethod->data_items [ip [3]];
 			MonoClass *c = vtable->klass;
 
+			if (G_UNLIKELY (m_class_is_byreflike (c))) {
+				char *str = g_strdup_printf ("Cannot box IsByRefLike type '%s.%s'", m_class_get_name_space (c), m_class_get_name (c));
+				MonoException *ex = mono_exception_from_name_msg (mono_defaults.corlib, "System", "InvalidProgramException", str);
+				g_free (str);
+				THROW_EX (ex, ip);
+			}
+
 			// FIXME push/pop LMF
 			MonoObject *o = mono_gc_alloc_obj (vtable, m_class_get_instance_size (c));
 			SET_TEMP_POINTER(o);
@@ -7580,7 +7630,11 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 			/* top of stack is result of filter */
 			frame->retval->data.i = LOCAL_VAR (ip [1], gint32);
 			goto exit_clause;
-		MINT_IN_CASE(MINT_INITOBJ)
+		MINT_IN_CASE(MINT_ZEROBLK)
+			memset (LOCAL_VAR (ip [1], gpointer), 0, LOCAL_VAR (ip [2], gsize));
+			ip += 3;
+			MINT_IN_BREAK;
+		MINT_IN_CASE(MINT_ZEROBLK_IMM)
 			memset (LOCAL_VAR (ip [1], gpointer), 0, ip [2]);
 			ip += 3;
 			MINT_IN_BREAK;
@@ -7813,7 +7867,7 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 						// now execute the trace
 						// this isn't important for performance, but it makes it easier to use the
 						//  jiterpreter early in automated tests where code only runs once
-						offset = prepare_result (frame, locals, &jiterpreter_call_info, ip);
+						offset = prepare_result (frame, locals, NULL, ip);
 						ip = (guint16*) (((guint8*)ip) + offset);
 						break;
 				}
@@ -7836,7 +7890,7 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
 		MINT_IN_CASE(MINT_TIER_ENTER_JITERPRETER) {
 			// The fn ptr is encoded in a guint16 relative to the index of the first trace fn ptr, so compute the actual ptr
 			JiterpreterThunk thunk = (JiterpreterThunk)(void *)(((JiterpreterOpcode *)ip)->relative_fn_ptr + mono_jiterp_first_trace_fn_ptr);
-			ptrdiff_t offset = thunk (frame, locals, &jiterpreter_call_info, ip);
+			ptrdiff_t offset = thunk (frame, locals, NULL, ip);
 			ip = (guint16*) (((guint8*)ip) + offset);
 			MINT_IN_BREAK;
 		}
@@ -7958,6 +8012,10 @@ interp_parse_options (const char *options)
 			else if (strncmp (arg, "jiterp", 6) == 0)
 				opt = INTERP_OPT_JITERPRETER;
 #endif
+			else if (strncmp (arg, "ssa", 3) == 0)
+				opt = INTERP_OPT_SSA;
+			else if (strncmp (arg, "precise", 7) == 0)
+				opt = INTERP_OPT_PRECISE_GC;
 			else if (strncmp (arg, "all", 3) == 0)
 				opt = ~INTERP_OPT_NONE;
 
@@ -8420,6 +8478,57 @@ interp_stop_single_stepping (void)
 	ss_enabled = FALSE;
 }
 
+
+static void
+interp_mark_frame_no_ref_slots (ThreadContext *context, InterpFrame *frame, gpointer *top_limit)
+{
+	InterpMethod *imethod = frame->imethod;
+	gpointer *frame_stack = (gpointer*)frame->stack;
+	gpointer *frame_stack_end = (gpointer*)((guchar*)frame->stack + imethod->alloca_size);
+	// The way interpreter implements calls is by moving arguments to the param area, at the
+	// top of the stack and then proceed with the call. Up to the moment of the call these slots
+	// are owned by the calling frame. Once we do the call, the stack pointer of the called
+	// frame will point inside the param area of the calling frame.
+	//
+	// We mark no ref slots from top to bottom and we use the top limit to ignore slots
+	// that were already handled in the called frame.
+	if (top_limit && top_limit < frame_stack_end)
+		frame_stack_end = top_limit;
+
+	for (gpointer *current = frame_stack; current < frame_stack_end; current++) {
+		gsize slot_index = current - frame_stack;
+		if (!mono_bitset_test_fast (imethod->ref_slots, slot_index)) {
+			gsize global_slot_index = current - (gpointer*)context->stack_start;
+			gsize table_index = global_slot_index / 8;
+			int bit_index = global_slot_index % 8;
+			context->no_ref_slots [table_index] |= 1 << bit_index;
+		}
+	}
+}
+
+static void
+interp_mark_no_ref_slots (ThreadContext *context, MonoLMF* lmf)
+{
+	memset (context->no_ref_slots, 0, (context->stack_pointer - context->stack_start) / (8 * sizeof (gpointer)) + 1);
+	while (lmf) {
+		if ((gsize)lmf->previous_lmf & 2) {
+			MonoLMFExt *lmf_ext = (MonoLMFExt*) lmf;
+			if (lmf_ext->kind == MONO_LMFEXT_INTERP_EXIT || lmf_ext->kind == MONO_LMFEXT_INTERP_EXIT_WITH_CTX) {
+				InterpFrame *frame = (InterpFrame*)lmf_ext->interp_exit_data;
+				gpointer *top_limit = NULL;
+				while (frame) {
+					if (frame->imethod) {
+						interp_mark_frame_no_ref_slots (context, frame, top_limit);
+						top_limit = (gpointer*)frame->stack;
+					}
+					frame = frame->parent;
+				}
+			}
+		}
+		lmf = (MonoLMF*)((gsize)lmf->previous_lmf & ~3);
+	}
+}
+
 /*
  * interp_mark_stack:
  *
@@ -8452,9 +8561,20 @@ interp_mark_stack (gpointer thread_data, GcScanFunc func, gpointer gc_data, gboo
 	if (!context || !context->stack_start)
 		return;
 
-	// FIXME: Scan the whole area with 1 call
-	for (gpointer *p = (gpointer*)context->stack_start; p < (gpointer*)context->stack_pointer; p++)
-		func (p, gc_data);
+	if (mono_interp_opt & INTERP_OPT_PRECISE_GC) {
+		MonoLMF **lmf_addr = (MonoLMF**)info->tls [TLS_KEY_LMF_ADDR];
+		if (lmf_addr)
+			interp_mark_no_ref_slots (context, *lmf_addr);
+	}
+
+	int slot_index = 0;
+	for (gpointer *p = (gpointer*)context->stack_start; p < (gpointer*)context->stack_pointer; p++) {
+		if (context->no_ref_slots && (context->no_ref_slots [slot_index / 8] & (1 << (slot_index % 8))))
+			;// This slot is marked as no ref, we don't scan it
+		else
+			func (p, gc_data);
+		slot_index++;
+	}
 
 	FrameDataFragment *frag;
 	for (frag = context->data_stack.first; frag; frag = frag->next) {
@@ -8709,19 +8829,6 @@ interp_cleanup (void)
 #endif
 }
 
-static void
-register_interp_stats (void)
-{
-	mono_counters_init ();
-	mono_counters_register ("Total transform time", MONO_COUNTER_INTERP | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &mono_interp_stats.transform_time);
-	mono_counters_register ("Methods transformed", MONO_COUNTER_INTERP | MONO_COUNTER_LONG, &mono_interp_stats.methods_transformed);
-	mono_counters_register ("Total cprop time", MONO_COUNTER_INTERP | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &mono_interp_stats.cprop_time);
-	mono_counters_register ("Total super instructions time", MONO_COUNTER_INTERP | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &mono_interp_stats.super_instructions_time);
-	mono_counters_register ("Emitted instructions", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.emitted_instructions);
-	mono_counters_register ("Methods inlined", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.inlined_methods);
-	mono_counters_register ("Inline failures", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.inline_failures);
-}
-
 #undef MONO_EE_CALLBACK
 #define MONO_EE_CALLBACK(ret, name, sig) interp_ ## name,
 
@@ -8750,8 +8857,6 @@ mono_ee_interp_init (const char *opts)
 
 	mini_install_interp_callbacks (&mono_interp_callbacks);
 
-	register_interp_stats ();
-
 #ifdef HOST_WASI
 	debugger_enabled = mini_get_debug_options ()->mdb_optimizations;
 #endif
diff --git a/src/mono/mono/mini/interp/interp.h b/src/mono/mono/mini/interp/interp.h
index 137b40e1dd49..a09111c490be 100644
--- a/src/mono/mono/mini/interp/interp.h
+++ b/src/mono/mono/mini/interp/interp.h
@@ -41,7 +41,9 @@ enum {
 #if HOST_BROWSER
 	INTERP_OPT_JITERPRETER = 64,
 #endif
-	INTERP_OPT_DEFAULT = INTERP_OPT_INLINE | INTERP_OPT_CPROP | INTERP_OPT_SUPER_INSTRUCTIONS | INTERP_OPT_BBLOCKS | INTERP_OPT_TIERING | INTERP_OPT_SIMD
+	INTERP_OPT_SSA = 128,
+	INTERP_OPT_PRECISE_GC = 256,
+	INTERP_OPT_DEFAULT = INTERP_OPT_INLINE | INTERP_OPT_CPROP | INTERP_OPT_SUPER_INSTRUCTIONS | INTERP_OPT_BBLOCKS | INTERP_OPT_TIERING | INTERP_OPT_SIMD | INTERP_OPT_SSA | INTERP_OPT_PRECISE_GC
 #if HOST_BROWSER
 		| INTERP_OPT_JITERPRETER
 #endif
diff --git a/src/mono/mono/mini/interp/jiterpreter-opcode-values.h b/src/mono/mono/mini/interp/jiterpreter-opcode-values.h
index d109c70ba25b..c3926ccb4c47 100644
--- a/src/mono/mono/mini/interp/jiterpreter-opcode-values.h
+++ b/src/mono/mono/mini/interp/jiterpreter-opcode-values.h
@@ -35,7 +35,7 @@ OPRANGE(MINT_RET_I4_IMM, MINT_RET_I8_IMM, ABORT_OUTSIDE_BRANCH_BLOCK_NONE)
 
 // High value because interp has to do a memory load for the immediate
 //  but we can inline it into the trace
-OPRANGE(MINT_LDC_I4_M1, MINT_LDC_R8, HIGH)
+OPRANGE(MINT_LDC_I4_0, MINT_LDC_R8, HIGH)
 
 OPRANGE(MINT_MOV_I4_I1, MINT_MOV_4, NORMAL)
 // High value for large/complex moves
@@ -43,8 +43,10 @@ OPRANGE(MINT_MOV_8, MINT_MOV_8_4, HIGH)
 
 // Binops. Assume most of them are not any faster in jiterp
 OPRANGE(MINT_ADD_I4, MINT_CLT_UN_R8, NORMAL)
-// Unops and some superinsns. Most will not be faster in jiterp.
-OPRANGE(MINT_ADD1_I4, MINT_SHR_I8_IMM, NORMAL)
+// Unops. Most will not be faster in jiterp.
+OPRANGE(MINT_ADD1_I4, MINT_CEQ0_I4, NORMAL)
+// Some superinsns that will be faster in jiterp due to inline constants
+OPRANGE(MINT_ADD_I4_IMM, MINT_ADD_MUL_I8_IMM, HIGH)
 // Math intrinsics. We implement most of these by calling libc or using wasm opcodes
 OPRANGE(MINT_ASIN, MINT_MAXF, NORMAL)
 // Field operations. Null check optimization makes these more efficient than interp
@@ -69,6 +71,8 @@ OP(MINT_TIER_MONITOR_JITERPRETER, NONE)
 OP(MINT_TIER_ENTER_JITERPRETER, NONE)
 OP(MINT_NOP, NONE)
 OP(MINT_DEF, NONE)
+OP(MINT_DEF_ARG, NONE)
+OP(MINT_DEF_TIER_VAR, NONE)
 OP(MINT_DUMMY_USE, NONE)
 OP(MINT_IL_SEQ_POINT, NONE)
 OP(MINT_TIER_PATCHPOINT_DATA, NONE)
@@ -130,12 +134,12 @@ OP(MINT_INTRINS_MEMORYMARSHAL_GETARRAYDATAREF, HIGH)
 OP(MINT_INITLOCAL, MASSIVE)
 OP(MINT_INITLOCALS, MASSIVE)
 OP(MINT_LOCALLOC, NORMAL)
-OP(MINT_INITOBJ, MASSIVE)
+OP(MINT_ZEROBLK, MASSIVE)
+OP(MINT_ZEROBLK_IMM, HIGH)
 OP(MINT_INTRINS_RUNTIMEHELPERS_OBJECT_HAS_COMPONENT_SIZE, HIGH)
 OP(MINT_INTRINS_ENUM_HASFLAG, HIGH)
 OP(MINT_INTRINS_ORDINAL_IGNORE_CASE_ASCII, HIGH)
 OP(MINT_NEWOBJ_INLINED, HIGH)
-OP(MINT_NEWOBJ_VT_INLINED, MASSIVE)
 OP(MINT_CPBLK, HIGH)
 OP(MINT_INITBLK, HIGH)
 OP(MINT_ROL_I4_IMM, HIGH)
diff --git a/src/mono/mono/mini/interp/jiterpreter.c b/src/mono/mono/mini/interp/jiterpreter.c
index 0d4e17bf346f..12678e10e828 100644
--- a/src/mono/mono/mini/interp/jiterpreter.c
+++ b/src/mono/mono/mini/interp/jiterpreter.c
@@ -617,16 +617,17 @@ jiterp_get_opcode_value (InterpInst *ins, gboolean *inside_branch_block)
 		initialize_opcode_value_table ();
 
 	guint16 opcode = ins->opcode;
-	g_assert(opcode < MINT_LASTOP);
+	g_assert (opcode < MINT_LASTOP);
 	int table_value = opcode_value_table[opcode];
 
-	if (table_value == VALUE_ABORT_OUTSIDE_BRANCH_BLOCK) {
-		return *inside_branch_block ? VALUE_LOW : VALUE_ABORT;
-	} else if (table_value == VALUE_ABORT_OUTSIDE_BRANCH_BLOCK) {
-		return *inside_branch_block ? VALUE_NONE : VALUE_ABORT;
-	} else if (table_value == VALUE_BEGIN_BRANCH_BLOCK) {
-		*inside_branch_block = TRUE;
-		return VALUE_NORMAL;
+	switch (table_value) {
+		case VALUE_ABORT_OUTSIDE_BRANCH_BLOCK:
+			return *inside_branch_block ? VALUE_LOW : VALUE_ABORT;
+		case VALUE_ABORT_OUTSIDE_BRANCH_BLOCK_NONE:
+			return *inside_branch_block ? VALUE_NONE : VALUE_ABORT;
+		case VALUE_BEGIN_BRANCH_BLOCK:
+			*inside_branch_block = TRUE;
+			return VALUE_NORMAL;
 	}
 
 	switch (opcode) {
@@ -884,7 +885,10 @@ jiterp_insert_entry_points (void *_imethod, void *_td)
 		// Increase the instruction counter. If we inserted an entry point at the top of this bb,
 		//  the new instruction counter will be the number of instructions in the block, so if
 		//  it's big enough we'll be able to insert another entry point right away.
-		instruction_count += bb->in_count;
+		for (InterpInst * ins = bb->first_ins; ins != NULL; ins = ins->next) {
+			if (!MINT_IS_EMIT_NOP (ins->opcode))
+				instruction_count++;
+		}
 
 		build_address_taken_bitset (td, bb, bitset_size);
 	}
@@ -999,7 +1003,7 @@ mono_jiterp_parse_option (const char *option)
 
 	const char *arr[2] = { option, NULL };
 	int temp;
-	mono_options_parse_options (arr, 1, &temp, NULL);
+	mono_options_parse_options (arr, 1, &temp, NULL, NULL);
 	return TRUE;
 }
 
@@ -1165,7 +1169,9 @@ enum {
 	JITERP_MEMBER_SPAN_LENGTH,
 	JITERP_MEMBER_SPAN_DATA,
 	JITERP_MEMBER_ARRAY_LENGTH,
+	// Kept as-is but no longer implemented
 	JITERP_MEMBER_BACKWARD_BRANCH_OFFSETS,
+	// Ditto
 	JITERP_MEMBER_BACKWARD_BRANCH_OFFSETS_COUNT,
 	JITERP_MEMBER_CLAUSE_DATA_OFFSETS,
 	JITERP_MEMBER_PARAMS_COUNT,
@@ -1173,7 +1179,9 @@ enum {
 	JITERP_MEMBER_VTABLE_KLASS,
 	JITERP_MEMBER_CLASS_RANK,
 	JITERP_MEMBER_CLASS_ELEMENT_CLASS,
-	JITERP_MEMBER_BOXED_VALUE_DATA
+	JITERP_MEMBER_BOXED_VALUE_DATA,
+	JITERP_MEMBER_BACKWARD_BRANCH_TAKEN,
+	JITERP_MEMBER_BAILOUT_OPCODE_COUNT,
 };
 
 
@@ -1195,10 +1203,6 @@ mono_jiterp_get_member_offset (int member) {
 			return offsetof (InterpFrame, imethod);
 		case JITERP_MEMBER_DATA_ITEMS:
 			return offsetof (InterpMethod, data_items);
-		case JITERP_MEMBER_BACKWARD_BRANCH_OFFSETS:
-			return offsetof (InterpMethod, backward_branch_offsets);
-		case JITERP_MEMBER_BACKWARD_BRANCH_OFFSETS_COUNT:
-			return offsetof (InterpMethod, backward_branch_offsets_count);
 		case JITERP_MEMBER_CLAUSE_DATA_OFFSETS:
 			return offsetof (InterpMethod, clause_data_offsets);
 		case JITERP_MEMBER_RMETHOD:
@@ -1220,6 +1224,10 @@ mono_jiterp_get_member_offset (int member) {
 		// see mono_object_get_data
 		case JITERP_MEMBER_BOXED_VALUE_DATA:
 			return MONO_ABI_SIZEOF (MonoObject);
+		case JITERP_MEMBER_BACKWARD_BRANCH_TAKEN:
+			return offsetof (JiterpreterCallInfo, backward_branch_taken);
+		case JITERP_MEMBER_BAILOUT_OPCODE_COUNT:
+			return offsetof (JiterpreterCallInfo, bailout_opcode_count);
 		default:
 			g_assert_not_reached();
 	}
@@ -1675,7 +1683,20 @@ mono_jiterp_tlqueue_clear (int queue) {
 
 // HACK: fix C4206
 EMSCRIPTEN_KEEPALIVE
+#else
+int
+mono_jiterp_is_enabled (void);
 #endif // HOST_BROWSER
 
-void jiterp_preserve_module (void) {
+int
+mono_jiterp_is_enabled (void) {
+#if HOST_BROWSER
+	return mono_opt_jiterpreter_traces_enabled;
+#else
+	return 0;
+#endif
+}
+
+void
+jiterp_preserve_module (void) {
 }
diff --git a/src/mono/mono/mini/interp/jiterpreter.h b/src/mono/mono/mini/interp/jiterpreter.h
index 26b05f64a0c8..ed57b0a0e17b 100644
--- a/src/mono/mono/mini/interp/jiterpreter.h
+++ b/src/mono/mono/mini/interp/jiterpreter.h
@@ -239,4 +239,7 @@ mono_jiterp_tlqueue_purge_all (gpointer item);
 
 #endif // HOST_BROWSER
 
+int
+mono_jiterp_is_enabled (void);
+
 #endif // __MONO_MINI_JITERPRETER_H__
diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def
index 7dd9d914fa32..d79d5cd30aca 100644
--- a/src/mono/mono/mini/interp/mintops.def
+++ b/src/mono/mono/mini/interp/mintops.def
@@ -42,16 +42,8 @@ OPDEF(MINT_RET_U1, "ret.u1", 2, 0, 1, MintOpNoArgs)
 OPDEF(MINT_RET_I2, "ret.i2", 2, 0, 1, MintOpNoArgs)
 OPDEF(MINT_RET_U2, "ret.u2", 2, 0, 1, MintOpNoArgs)
 
-OPDEF(MINT_LDC_I4_M1, "ldc.i4.m1", 2, 1, 0, MintOpNoArgs)
 OPDEF(MINT_LDC_I4_0, "ldc.i4.0", 2, 1, 0, MintOpNoArgs)
 OPDEF(MINT_LDC_I4_1, "ldc.i4.1", 2, 1, 0, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_2, "ldc.i4.2", 2, 1, 0, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_3, "ldc.i4.3", 2, 1, 0, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_4, "ldc.i4.4", 2, 1, 0, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_5, "ldc.i4.5", 2, 1, 0, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_6, "ldc.i4.6", 2, 1, 0, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_7, "ldc.i4.7", 2, 1, 0, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_8, "ldc.i4.8", 2, 1, 0, MintOpNoArgs)
 OPDEF(MINT_LDC_I4_S, "ldc.i4.s", 3, 1, 0, MintOpShortInt)
 OPDEF(MINT_LDC_I4, "ldc.i4", 4, 1, 0, MintOpInt)
 
@@ -363,8 +355,8 @@ OPDEF(MINT_NEWOBJ_STRING, "newobj_string", 4, 1, 1, MintOpMethodToken)
 OPDEF(MINT_NEWOBJ, "newobj", 5, 1, 1, MintOpMethodToken)
 OPDEF(MINT_NEWOBJ_INLINED, "newobj_inlined", 3, 1, 0, MintOpVTableToken)
 OPDEF(MINT_NEWOBJ_VT, "newobj_vt", 5, 1, 1, MintOpMethodToken)
-OPDEF(MINT_NEWOBJ_VT_INLINED, "newobj_vt_inlined", 4, 1, 1, MintOpShortInt)
-OPDEF(MINT_INITOBJ, "initobj", 3, 0, 1, MintOpShortInt)
+OPDEF(MINT_ZEROBLK, "zeroblk", 3, 0, 2, MintOpNoArgs)
+OPDEF(MINT_ZEROBLK_IMM, "zeroblk_imm", 3, 0, 1, MintOpShortInt)
 OPDEF(MINT_CASTCLASS, "castclass", 4, 1, 1, MintOpClassToken)
 OPDEF(MINT_ISINST, "isinst", 4, 1, 1, MintOpClassToken)
 OPDEF(MINT_CASTCLASS_INTERFACE, "castclass.interface", 4, 1, 1, MintOpClassToken)
@@ -654,13 +646,20 @@ OPDEF(MINT_RET_I4_IMM, "ret.i4.imm", 2, 0, 0, MintOpShortInt)
 OPDEF(MINT_RET_I8_IMM, "ret.i8.imm", 2, 0, 0, MintOpShortInt)
 
 OPDEF(MINT_ADD_I4_IMM, "add.i4.imm", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_ADD_I4_IMM2, "add.i4.imm2", 5, 1, 1, MintOpInt)
 OPDEF(MINT_ADD_I8_IMM, "add.i8.imm", 4, 1, 1, MintOpShortInt)
-
-OPDEF(MINT_ADD_MUL_I4_IMM, "add.mul.i4.imm", 5, 1, 1, MintOpTwoShorts)
-OPDEF(MINT_ADD_MUL_I8_IMM, "add.mul.i8.imm", 5, 1, 1, MintOpTwoShorts)
+OPDEF(MINT_ADD_I8_IMM2, "add.i8.imm2", 5, 1, 1, MintOpInt)
 
 OPDEF(MINT_MUL_I4_IMM, "mul.i4.imm", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_MUL_I4_IMM2, "mul.i4.imm2", 5, 1, 1, MintOpInt)
 OPDEF(MINT_MUL_I8_IMM, "mul.i8.imm", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_MUL_I8_IMM2, "mul.i8.imm2", 5, 1, 1, MintOpInt)
+
+OPDEF(MINT_AND_I4_IMM, "and.i4.imm", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_AND_I4_IMM2, "and.i4.imm2", 5, 1, 1, MintOpInt)
+
+OPDEF(MINT_OR_I4_IMM, "or.i4.imm", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_OR_I4_IMM2, "or.i4.imm2", 5, 1, 1, MintOpInt)
 
 OPDEF(MINT_SHR_UN_I4_IMM, "shr.un.i4.imm", 4, 1, 1, MintOpShortInt)
 OPDEF(MINT_SHR_UN_I8_IMM, "shr.un.i8.imm", 4, 1, 1, MintOpShortInt)
@@ -669,6 +668,9 @@ OPDEF(MINT_SHL_I8_IMM, "shl.i8.imm", 4, 1, 1, MintOpShortInt)
 OPDEF(MINT_SHR_I4_IMM, "shr.i4.imm", 4, 1, 1, MintOpShortInt)
 OPDEF(MINT_SHR_I8_IMM, "shr.i8.imm", 4, 1, 1, MintOpShortInt)
 
+OPDEF(MINT_ADD_MUL_I4_IMM, "add.mul.i4.imm", 5, 1, 1, MintOpTwoShorts)
+OPDEF(MINT_ADD_MUL_I8_IMM, "add.mul.i8.imm", 5, 1, 1, MintOpTwoShorts)
+
 OPDEF(MINT_SHL_AND_I4, "shl.i4.and", 4, 1, 2, MintOpNoArgs)
 OPDEF(MINT_SHL_AND_I8, "shl.i8.and", 4, 1, 2, MintOpNoArgs)
 
@@ -815,7 +817,8 @@ OPDEF(MINT_INTRINS_GET_TYPE, "intrins_get_type", 3, 1, 1, MintOpNoArgs)
 OPDEF(MINT_INTRINS_SPAN_CTOR, "intrins_span_ctor", 4, 1, 2, MintOpNoArgs)
 OPDEF(MINT_INTRINS_RUNTIMEHELPERS_OBJECT_HAS_COMPONENT_SIZE, "intrins_runtimehelpers_object_has_component_size", 3, 1, 1, MintOpNoArgs)
 OPDEF(MINT_INTRINS_CLEAR_WITH_REFERENCES, "intrin_clear_with_references", 3, 0, 2, MintOpNoArgs)
-OPDEF(MINT_INTRINS_MARVIN_BLOCK, "intrins_marvin_block", 3, 0, 2, MintOpNoArgs)
+// This actually has 2 dregs and 2 sregs. Dregs are displayed as the metadata
+OPDEF(MINT_INTRINS_MARVIN_BLOCK, "intrins_marvin_block", 5, 0, 2, MintOpTwoShorts)
 OPDEF(MINT_INTRINS_ASCII_CHARS_TO_UPPERCASE, "intrins_ascii_chars_to_uppercase", 3, 1, 1, MintOpNoArgs)
 OPDEF(MINT_INTRINS_MEMORYMARSHAL_GETARRAYDATAREF, "intrins_memorymarshal_getarraydataref", 3, 1, 1, MintOpNoArgs)
 OPDEF(MINT_INTRINS_ORDINAL_IGNORE_CASE_ASCII, "intrins_ordinal_ignore_case_ascii", 4, 1, 2, MintOpNoArgs)
@@ -834,12 +837,18 @@ OPDEF(MINT_TIER_MONITOR_JITERPRETER, "tier_monitor_jiterpreter", 4, 0, 0, MintOp
 
 IROPDEF(MINT_NOP, "nop", 1, 0, 0, MintOpNoArgs)
 IROPDEF(MINT_DEF, "def", 2, 1, 0, MintOpNoArgs)
+IROPDEF(MINT_DEF_ARG, "def_arg", 2, 1, 0, MintOpNoArgs)
+IROPDEF(MINT_DEF_TIER_VAR, "def_tier_var", 3, 1, 1, MintOpNoArgs)
 IROPDEF(MINT_IL_SEQ_POINT, "il_seq_point", 1, 0, 0, MintOpNoArgs)
 IROPDEF(MINT_DUMMY_USE, "dummy_use", 2, 0, 1, MintOpNoArgs)
 IROPDEF(MINT_TIER_PATCHPOINT_DATA, "tier_patchpoint_data", 2, 0, 0, MintOpShortInt)
 // These two opcodes are resolved to a normal MINT_MOV when emitting compacted instructions
 IROPDEF(MINT_MOV_SRC_OFF, "mov.src.off", 6, 1, 1, MintOpTwoShorts)
-IROPDEF(MINT_MOV_DST_OFF, "mov.dst.off", 6, 1, 1, MintOpTwoShorts)
+IROPDEF(MINT_MOV_DST_OFF, "mov.dst.off", 8, 1, 2, MintOpTwoShorts)
+IROPDEF(MINT_PHI, "phi", 2, 1, 0, MintOpNoArgs)
+IROPDEF(MINT_DEAD_PHI, "dead_phi", 1, 0, 0, MintOpNoArgs)
+IROPDEF(MINT_INTRINS_MARVIN_BLOCK_SSA1, "intrins_marvin_block_ssa1", 4, 1, 2, MintOpNoArgs)
+IROPDEF(MINT_INTRINS_MARVIN_BLOCK_SSA2, "intrins_marvin_block_ssa2", 4, 1, 2, MintOpNoArgs)
 
 #ifdef __DEFINED_IROPDEF__
 #undef IROPDEF
diff --git a/src/mono/mono/mini/interp/mintops.h b/src/mono/mono/mini/interp/mintops.h
index ec8e5f050c67..5acc555b3a4e 100644
--- a/src/mono/mono/mini/interp/mintops.h
+++ b/src/mono/mono/mini/interp/mintops.h
@@ -8,6 +8,7 @@
 #include <config.h>
 #include <glib.h>
 
+// If you change this, update jiterpreter-opcodes.ts.
 typedef enum
 {
 	MintOpNoArgs,
@@ -208,7 +209,8 @@ typedef enum {
 
 #define MINT_SWITCH_LEN(n) (4 + (n) * 2)
 
-#define MINT_IS_NOP(op) ((op) == MINT_NOP || (op) == MINT_DEF || (op) == MINT_DUMMY_USE || (op) == MINT_IL_SEQ_POINT)
+#define MINT_IS_NOP(op) ((op) == MINT_NOP || (op) == MINT_DEF || (op) == MINT_DEF_ARG || (op) == MINT_DUMMY_USE || (op) == MINT_IL_SEQ_POINT)
+#define MINT_IS_EMIT_NOP(op) ((op) == MINT_NOP || (op) == MINT_DEF || (op) == MINT_DEF_ARG || (op) == MINT_DEF_TIER_VAR || (op) == MINT_DUMMY_USE)
 #define MINT_IS_MOV(op) ((op) >= MINT_MOV_I4_I1 && (op) <= MINT_MOV_VT)
 #define MINT_IS_UNCONDITIONAL_BRANCH(op) ((op) >= MINT_BR && (op) <= MINT_CALL_HANDLER_S)
 #define MINT_IS_CONDITIONAL_BRANCH(op) ((op) >= MINT_BRFALSE_I4 && (op) <= MINT_BLT_UN_R8_S)
@@ -218,10 +220,11 @@ typedef enum {
 #define MINT_IS_SUPER_BRANCH(op) ((op) >= MINT_BRFALSE_I4_SP && (op) <= MINT_BLT_UN_I8_IMM_SP)
 #define MINT_IS_CALL(op) ((op) >= MINT_CALL && (op) <= MINT_JIT_CALL)
 #define MINT_IS_PATCHABLE_CALL(op) ((op) >= MINT_CALL && (op) <= MINT_VCALL)
-#define MINT_IS_LDC_I4(op) ((op) >= MINT_LDC_I4_M1 && (op) <= MINT_LDC_I4)
+#define MINT_IS_LDC_I4(op) ((op) >= MINT_LDC_I4_0 && (op) <= MINT_LDC_I4)
 #define MINT_IS_LDC_I8(op) ((op) >= MINT_LDC_I8_0 && (op) <= MINT_LDC_I8)
 #define MINT_IS_UNOP(op) ((op) >= MINT_ADD1_I4 && (op) <= MINT_CEQ0_I4)
 #define MINT_IS_BINOP(op) ((op) >= MINT_ADD_I4 && (op) <= MINT_CLT_UN_R8)
+#define MINT_IS_BINOP_IMM(op) ((op) >= MINT_ADD_I4_IMM && (op) <= MINT_SHR_I8_IMM)
 #define MINT_IS_BINOP_SHIFT(op) ((op) >= MINT_SHR_UN_I4 && (op) <= MINT_SHR_I8)
 #define MINT_IS_LDFLD(op) ((op) >= MINT_LDFLD_I1 && (op) <= MINT_LDFLD_O)
 #define MINT_IS_STFLD(op) ((op) >= MINT_STFLD_I1 && (op) <= MINT_STFLD_O)
@@ -232,9 +235,10 @@ typedef enum {
 #define MINT_IS_LDIND_OFFSET(op) ((op) >= MINT_LDIND_OFFSET_I1 && (op) <= MINT_LDIND_OFFSET_I8)
 #define MINT_IS_SIMD_CREATE(op) ((op) >= MINT_SIMD_V128_I1_CREATE && (op) <= MINT_SIMD_V128_I8_CREATE)
 #define MINT_IS_RETURN(op) (((op) >= MINT_RET && (op) <= MINT_RET_U2) || (op) == MINT_RET_I4_IMM || (op) == MINT_RET_I8_IMM)
+#define MINT_IS_BOX(op) ((op) == MINT_BOX || (op) == MINT_BOX_VT || (op) == MINT_BOX_PTR || (op) == MINT_BOX_NULLABLE_PTR)
 
 // TODO Add more
-#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_LDC_R4 || op == MINT_LDC_R8 || op == MINT_LDPTR || op == MINT_BOX)
+#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_LDC_R4 || op == MINT_LDC_R8 || op == MINT_LDPTR || MINT_IS_BOX(op) || op == MINT_INITLOCAL)
 
 #define MINT_CALL_ARGS 2
 #define MINT_CALL_ARGS_SREG -2
diff --git a/src/mono/mono/mini/interp/transform-opt.c b/src/mono/mono/mini/interp/transform-opt.c
index c6fc3a069c04..4ee96b7a541d 100644
--- a/src/mono/mono/mini/interp/transform-opt.c
+++ b/src/mono/mono/mini/interp/transform-opt.c
@@ -4,6 +4,11 @@
 
 #include "mintops.h"
 #include "transform.h"
+#include "interp-intrins.h"
+
+/*
+ * VAR OFFSET ALLOCATOR
+ */
 
 // Allocates var at the offset that tos points to, also updating it.
 static int
@@ -27,7 +32,9 @@ alloc_var_offset (TransformData *td, int local, gint32 *ptos)
 int
 interp_alloc_global_var_offset (TransformData *td, int var)
 {
-	return alloc_var_offset (td, var, &td->total_locals_size);
+	int offset = alloc_var_offset (td, var, &td->total_locals_size);
+	interp_mark_ref_slots_for_var (td, var);
+	return offset;
 }
 
 static void
@@ -215,7 +222,6 @@ end_active_call (TransformData *td, ActiveCalls *ac, InterpInst *call)
 }
 
 // Data structure used for offset allocation of local vars
-
 typedef struct {
 	int var;
 	gboolean is_alive;
@@ -342,7 +348,7 @@ interp_alloc_offsets (TransformData *td)
 
 					while (var != -1) {
 						if (td->vars [var].global ||
-								!td->local_ref_count || td->local_ref_count [var] > 1 ||
+								!td->var_values || td->var_values [var].ref_count > 1 ||
 								td->vars [var].no_call_args) {
 							// Some vars can't be allocated on the call args stack, since the constraint is that
 							// call args vars die after the call. This isn't necessarily true for global vars or
@@ -460,6 +466,8 @@ interp_alloc_offsets (TransformData *td)
 					add_active_call (td, &ac, td->vars [var].call);
 				} else if (!td->vars [var].global && td->vars [var].offset == -1) {
 					alloc_var_offset (td, var, &current_offset);
+					interp_mark_ref_slots_for_var (td, var);
+
 					if (current_offset > final_total_locals_size)
 						final_total_locals_size = current_offset;
 
@@ -488,12 +496,17 @@ interp_alloc_offsets (TransformData *td)
 		// These are allocated separately at the end of the stack
 		if (td->vars [i].call_args) {
 			td->vars [i].offset += td->param_area_offset;
+			interp_mark_ref_slots_for_var (td, i);
 			final_total_locals_size = MAX (td->vars [i].offset + td->vars [i].size, final_total_locals_size);
 		}
 	}
 	td->total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_ALIGNMENT);
 }
 
+/*
+ * DOMINANCE COMPUTATION
+ */
+
 static GString*
 interp_get_bb_links (InterpBasicBlock *bb)
 {
@@ -520,6 +533,1029 @@ interp_get_bb_links (InterpBasicBlock *bb)
 	return str;
 }
 
+static int
+dfs_visit (TransformData *td)
+{
+	int dfs_index = 0;
+	int next_stack_index = 0;
+	td->bblocks = (InterpBasicBlock**)mono_mempool_alloc0 (td->opt_mempool, sizeof (InterpBasicBlock*) * td->bb_count);
+	InterpBasicBlock **stack = (InterpBasicBlock**)g_malloc0 (sizeof (InterpBasicBlock*) * td->bb_count);
+
+	g_assert (!td->entry_bb->in_count);
+	stack [next_stack_index++] = td->entry_bb;
+
+	while (next_stack_index > 0) {
+		// Pop last added element
+		next_stack_index--;
+		InterpBasicBlock *bb = stack [next_stack_index];
+
+		// Process current bblock
+		td->bblocks [dfs_index] = bb;
+		bb->dfs_index = dfs_index++;
+
+		// Push all nodes to process next
+		for (int i = 0; i < bb->out_count; i++) {
+			InterpBasicBlock *out_bb = bb->out_bb [i];
+			if (out_bb->dfs_index == -1) {
+				stack [next_stack_index++] = out_bb;
+				// Mark node as gray so it is not pushed again
+				out_bb->dfs_index = -2;
+			}
+		}
+	}
+
+	g_free (stack);
+	return dfs_index;
+}
+
+static void
+interp_compute_dfs_indexes (TransformData *td)
+{
+	for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb)
+		bb->dfs_index = -1;
+	// Sort bblocks in reverse postorder
+	int dfs_index = dfs_visit (td);
+	td->bblocks_count_no_eh = dfs_index;
+
+	// Visit also bblocks reachable from eh handlers. These bblocks are not linked
+	// to the main cfg (where we do dominator computation, ssa transformation etc)
+	if (td->header->num_clauses > 0) {
+		InterpBasicBlock *current = td->entry_bb;
+		while (current != NULL) {
+			if (current->reachable && current->dfs_index == -1) {
+				current->dfs_index = dfs_index;
+				td->bblocks [dfs_index] = current;
+				dfs_index++;
+			}
+			current = current->next_bb;
+		}
+	}
+	td->bblocks_count_eh = dfs_index;
+
+	if (td->verbose_level) {
+		InterpBasicBlock *bb;
+		g_print ("\nBASIC BLOCK GRAPH:\n");
+		for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
+			GString* bb_info = interp_get_bb_links (bb);
+			g_print ("BB%d: DFS%s(%d), %s\n", bb->index, (bb->dfs_index >= td->bblocks_count_no_eh) ? "_EH" : "" , bb->dfs_index, bb_info->str);
+			g_string_free (bb_info, TRUE);
+		}
+	}
+}
+
+static InterpBasicBlock*
+dom_intersect (InterpBasicBlock **idoms, InterpBasicBlock *bb1, InterpBasicBlock *bb2)
+{
+	while (bb1 != bb2) {
+		while (bb1->dfs_index < bb2->dfs_index)
+			bb2 = idoms [bb2->dfs_index];
+		while (bb2->dfs_index < bb1->dfs_index)
+			bb1 = idoms [bb1->dfs_index];
+	}
+	return bb1;
+}
+
+static gboolean
+is_bblock_ssa_cfg (TransformData *td, InterpBasicBlock *bb)
+{
+	// bblocks with uninitialized dfs_index are unreachable
+	if (bb->dfs_index == -1)
+		return FALSE;
+	if (bb->dfs_index < td->bblocks_count_no_eh)
+		return TRUE;
+	return FALSE;
+}
+
+static void
+interp_compute_dominators (TransformData *td)
+{
+	InterpBasicBlock **idoms = (InterpBasicBlock**)mono_mempool_alloc0 (td->opt_mempool, sizeof (InterpBasicBlock*) * td->bblocks_count_no_eh);
+
+	idoms [0] = td->entry_bb;
+	gboolean changed = TRUE;
+	while (changed) {
+		changed = FALSE;
+		// all bblocks in reverse post order except entry
+		for (int i = 1; i < td->bblocks_count_no_eh; i++) {
+			InterpBasicBlock *bb = td->bblocks [i];
+			InterpBasicBlock *new_idom = NULL;
+			// pick candidate idom from first processed predecessor of it
+			int j;
+			for (j = 0; j < bb->in_count; j++) {
+                                InterpBasicBlock *in_bb = bb->in_bb [j];
+                                if (is_bblock_ssa_cfg (td, in_bb) && idoms [in_bb->dfs_index]) {
+                                        new_idom = in_bb;
+                                        break;
+                                }
+                        }
+
+			// intersect new_idom with dominators from the other predecessors
+			for (; j < bb->in_count; j++) {
+				InterpBasicBlock *in_bb = bb->in_bb [j];
+				if (is_bblock_ssa_cfg (td, in_bb) && idoms [in_bb->dfs_index])
+					new_idom = dom_intersect (idoms, in_bb, new_idom);
+			}
+
+			// check if we obtained new idom
+			if (idoms [i] != new_idom) {
+				idoms [i] = new_idom;
+				changed = TRUE;
+			}
+		}
+	}
+
+	td->idoms = idoms;
+
+	// Build `dominated` bblock list for each bblock
+	for (int i = 1; i < td->bblocks_count_no_eh; i++) {
+		InterpBasicBlock *bb = td->bblocks [i];
+		InterpBasicBlock *idom = td->idoms [i];
+		if (idom)
+			idom->dominated = g_slist_prepend (idom->dominated, bb);
+	}
+
+	if (td->verbose_level) {
+		InterpBasicBlock *bb;
+		g_print ("\nBASIC BLOCK IDOMS:\n");
+		for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
+			if (!is_bblock_ssa_cfg (td, bb))
+				continue;
+			g_print ("IDOM (BB%d) = BB%d\n", bb->index, td->idoms [bb->dfs_index]->index);
+		}
+
+		g_print ("\nBASIC BLOCK DOMINATED:\n");
+		for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
+			if (!is_bblock_ssa_cfg (td, bb))
+				continue;
+			if (bb->dominated) {
+				g_print ("DOMINATED (BB%d)  = {", bb->index);
+				GSList *dominated = bb->dominated;
+				while (dominated) {
+					InterpBasicBlock *dominated_bb = (InterpBasicBlock*)dominated->data;
+					g_print (" BB%d", dominated_bb->index);
+					dominated = dominated->next;
+				}
+				g_print (" }\n");
+			}
+		}
+	}
+}
+
+static void
+interp_compute_dominance_frontier (TransformData *td)
+{
+	int bitsize = mono_bitset_alloc_size (td->bblocks_count_no_eh, 0);
+	char *mem = (char *)mono_mempool_alloc0 (td->opt_mempool, bitsize * td->bblocks_count_no_eh);
+
+	for (int i = 0; i < td->bblocks_count_no_eh; i++) {
+		td->bblocks [i]->dfrontier = mono_bitset_mem_new (mem, td->bblocks_count_no_eh, 0);
+		mem += bitsize;
+	}
+
+	for (int i = 0; i < td->bblocks_count_no_eh; i++) {
+		InterpBasicBlock *bb = td->bblocks [i];
+
+		if (bb->in_count > 1) {
+			for (int j = 0; j < bb->in_count; ++j) {
+				InterpBasicBlock *p = bb->in_bb [j];
+				if (!is_bblock_ssa_cfg (td, p))
+					continue;
+
+				g_assert (p->dfs_index || p == td->entry_bb);
+
+				while (p != td->idoms [bb->dfs_index]) {
+					g_assert (bb->dfs_index < td->bblocks_count_no_eh);
+					mono_bitset_set_fast (p->dfrontier, bb->dfs_index);
+					p = td->idoms [p->dfs_index];
+				}
+			}
+		}
+	}
+
+	if (td->verbose_level) {
+		InterpBasicBlock *bb;
+		g_print ("\nBASIC BLOCK DFRONTIERS:\n");
+		for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
+			if (!is_bblock_ssa_cfg (td, bb))
+				continue;
+			g_print ("DFRONTIER (BB%d) = {", bb->index);
+			int i;
+			mono_bitset_foreach_bit (bb->dfrontier, i, td->bb_count) {
+				g_print (" BB%d", td->bblocks [i]->index);
+			}
+			g_print (" }\n");
+		}
+	}
+}
+
+static void
+interp_compute_dominance (TransformData *td)
+{
+	/*
+	 * A dominator for a bblock n, is a bblock that is reached on every path to n. Dominance is transitive.
+	 * An immediate dominator for a bblock n, is the bblock that dominates n, but doesn't dominate any other
+	 * dominators of n, meaning it is the closest dominator to n. The dominance frontier of a node V is the set
+	 * of nodes where the dominance stops. This means that it is the set of nodes where node V doesn't dominate
+	 * it, but it does dominate a predecessor of it (including if the predecessor is V itself).
+	 *
+	 * The dominance frontier is relevant for SSA computation since, for a var defined in a bblock, the DF of bblock
+	 * represents the set of bblocks where we need to add a PHI opcode for that variable.
+	 */
+	interp_compute_dfs_indexes (td);
+
+	interp_compute_dominators (td);
+
+	interp_compute_dominance_frontier (td);
+}
+
+/*
+ * SSA TRANSFORMATION
+ */
+static void
+compute_eh_var_cb (TransformData *td, int *pvar, gpointer data)
+{
+	int var = *pvar;
+	td->vars [var].eh_var = TRUE;
+}
+
+static void
+interp_compute_eh_vars (TransformData *td)
+{
+	// FIXME we can now remove EH bblocks. This means some vars can stop being EH vars
+
+	// EH bblocks are stored separately and are not reachable from the non-EF control flow
+	// path. Any var reachable from EH bblocks will not be in SSA form.
+	for (int i = td->bblocks_count_no_eh; i < td->bblocks_count_eh; i++) {
+		InterpBasicBlock *bb = td->bblocks [i];
+		for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) {
+			if (ins->opcode == MINT_LDLOCA_S)
+				td->vars [ins->sregs [0]].eh_var = TRUE;
+			interp_foreach_ins_var (td, ins, bb, compute_eh_var_cb);
+		}
+	}
+
+	// If we have a try block that might catch exceptions, then we can't do any propagation
+	// of the values defined in the block since an exception could interrupt the normal control
+	// flow. All vars defined in this block will not be in SSA form.
+	for (unsigned int i = 0; i < td->header->num_clauses; i++) {
+		MonoExceptionClause *c = &td->header->clauses [i];
+		if (c->flags == MONO_EXCEPTION_CLAUSE_NONE ||
+				c->flags == MONO_EXCEPTION_CLAUSE_FILTER) {
+			InterpBasicBlock *bb = td->offset_to_bb [c->try_offset];
+			int try_end = c->try_offset + c->try_len;
+			g_assert (bb);
+			while (bb->il_offset != -1 && bb->il_offset < try_end) {
+				for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) {
+					if (mono_interp_op_dregs [ins->opcode])
+						td->vars [ins->dreg].eh_var = TRUE;
+				}
+				bb = bb->next_bb;
+			}
+		}
+	}
+
+	td->eh_vars_computed = TRUE;
+}
+
+static void
+interp_compute_ssa_vars (TransformData *td)
+{
+	if (!td->eh_vars_computed)
+		interp_compute_eh_vars (td);
+
+	for (unsigned int i = 0; i < td->vars_size; i++) {
+		if (td->vars [i].indirects > 0) {
+			td->vars [i].no_ssa = TRUE;
+			td->vars [i].has_indirects = TRUE;
+		} else {
+			td->vars [i].has_indirects = FALSE;
+			if (td->vars [i].eh_var)
+				td->vars [i].no_ssa = TRUE;
+			else
+				td->vars [i].no_ssa = FALSE;
+		}
+	}
+}
+
+static gboolean
+var_is_ssa_form (TransformData *td, int var)
+{
+	if (td->vars [var].no_ssa)
+		return FALSE;
+
+	return TRUE;
+}
+
+static gboolean
+var_has_indirects (TransformData *td, int var)
+{
+	if (td->vars [var].has_indirects)
+		return TRUE;
+
+	return FALSE;
+}
+
+static InterpVarValue*
+get_var_value (TransformData *td, int var)
+{
+	if (var_is_ssa_form (td, var))
+		return &td->var_values [var];
+
+	if (var_has_indirects (td, var))
+		return NULL;
+
+	// No ssa var, check if we have a def set for the current bblock
+	if (td->var_values [var].def) {
+		if (td->var_values [var].liveness.bb_dfs_index == td->cbb->dfs_index)
+			return &td->var_values [var];
+	}
+	return NULL;
+
+}
+
+static InterpInst*
+get_var_value_def (TransformData *td, int var)
+{
+	InterpVarValue *val = get_var_value (td, var);
+	if (val)
+		return val->def;
+	return NULL;
+}
+
+static int
+get_var_value_type (TransformData *td, int var)
+{
+	InterpVarValue *val = get_var_value (td, var);
+	if (val)
+		return val->type;
+	return VAR_VALUE_NONE;
+}
+
+static void
+compute_global_var_cb (TransformData *td, int *pvar, gpointer data)
+{
+	int var = *pvar;
+	InterpBasicBlock *bb = (InterpBasicBlock*)data;
+	InterpVar *var_data = &td->vars [var];
+	if (!var_is_ssa_form (td, var) || td->vars [var].ext_index == -1)
+		return;
+	// If var is used in another block than the one that it is declared then mark it as global
+	if (var_data->declare_bbs && var_data->declare_bbs->data != bb) {
+		int ext_index = td->vars [var].ext_index;
+		td->renamable_vars [ext_index].ssa_global = TRUE;
+	}
+}
+
+// We obtain the list of global vars, as well as the list of bblocks where each one of the global vars is declared.
+static void
+interp_compute_global_vars (TransformData *td)
+{
+	InterpBasicBlock *bb;
+	for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
+		if (!is_bblock_ssa_cfg (td, bb))
+			continue;
+		InterpInst *ins;
+		for (ins = bb->first_ins; ins != NULL; ins = ins->next) {
+			if (mono_interp_op_dregs [ins->opcode] && var_is_ssa_form (td, ins->dreg)) {
+				// Save the list of bblocks where a global var is defined in
+				InterpVar *var_data = &td->vars [ins->dreg];
+				if (!var_data->declare_bbs) {
+					var_data->declare_bbs = g_slist_prepend (NULL, bb);
+				} else {
+					int ext_index = interp_make_var_renamable (td, ins->dreg);
+					if (!g_slist_find (var_data->declare_bbs, bb)) {
+						// Var defined in multiple bblocks, it is ssa global
+						var_data->declare_bbs = g_slist_prepend (var_data->declare_bbs, bb);
+						td->renamable_vars [ext_index].ssa_global = TRUE;
+					}
+				}
+			}
+		}
+	}
+
+	for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
+		if (!is_bblock_ssa_cfg (td, bb))
+			continue;
+		InterpInst *ins;
+		for (ins = bb->first_ins; ins != NULL; ins = ins->next)
+			interp_foreach_ins_svar (td, ins, bb, compute_global_var_cb);
+	}
+
+	if (td->verbose_level) {
+		g_print ("\nSSA GLOBALS:\n");
+		for (unsigned int i = 0; i < td->renamable_vars_size; i++) {
+			if (td->renamable_vars [i].ssa_global) {
+				int var = td->renamable_vars [i].var_index;
+				g_print ("DECLARE_BB (%d) = {", var);
+				GSList *l = td->vars [var].declare_bbs;
+				while (l) {
+					g_print (" BB%d", ((InterpBasicBlock*)l->data)->index);
+					l = l->next;
+				}
+				g_print (" }\n");
+			}
+		}
+	}
+}
+
+static void
+compute_gen_set_cb (TransformData *td, int *pvar, gpointer data)
+{
+	int var = *pvar;
+	InterpBasicBlock *bb = (InterpBasicBlock*)data;
+
+	int ext_index = td->vars [var].ext_index;
+	if (ext_index == -1)
+		return;
+
+	if (!td->renamable_vars [ext_index].ssa_global)
+		return;
+
+	if (!mono_bitset_test_fast (bb->kill_set, ext_index))
+		mono_bitset_set_fast (bb->gen_set, ext_index);
+}
+
+// For each bblock, computes the kill set (the set of vars defined by the bblock)
+// and gen set (the set of vars used by the bblock, with the definition not being
+// in the bblock).
+static void
+compute_gen_kill_sets (TransformData *td)
+{
+	int bitsize = mono_bitset_alloc_size (td->renamable_vars_size, 0);
+	char *mem = (char *)mono_mempool_alloc0 (td->opt_mempool, bitsize * td->bblocks_count_no_eh * 4);
+
+	for (int i = 0; i < td->bblocks_count_no_eh; i++) {
+		InterpBasicBlock *bb = td->bblocks [i];
+
+		bb->gen_set = mono_bitset_mem_new (mem, td->renamable_vars_size, 0);
+		mem += bitsize;
+		bb->kill_set = mono_bitset_mem_new (mem, td->renamable_vars_size, 0);
+		mem += bitsize;
+		bb->live_in_set = mono_bitset_mem_new (mem, td->renamable_vars_size, 0);
+		mem += bitsize;
+		bb->live_out_set = mono_bitset_mem_new (mem, td->renamable_vars_size, 0);
+		mem += bitsize;
+
+		for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) {
+			interp_foreach_ins_svar (td, ins, bb, compute_gen_set_cb);
+			if (mono_interp_op_dregs [ins->opcode]) {
+				int ext_index = td->vars [ins->dreg].ext_index;
+				if (ext_index != -1 && td->renamable_vars [ext_index].ssa_global)
+					mono_bitset_set_fast (bb->kill_set, ext_index);
+			}
+		}
+	}
+}
+
+// Compute live_in and live_out sets
+// For a bblock, live_in contains all vars that are live at exit of bblock and not redefined,
+// together with all vars used in the bblock without being defined. For a bblock, live_out set
+// contains all vars that are live_in any successor. This computation starts with empty sets
+// (starting to generate live vars from the gen sets) and it is run iteratively until the
+// computation converges.
+static void
+recompute_live_out (TransformData *td, InterpBasicBlock *bb)
+{
+	for (int i = 0; i < bb->out_count; i++) {
+		InterpBasicBlock *sbb = bb->out_bb [i];
+
+		// Recompute live_in_set for each successor of bb
+		mono_bitset_copyto_fast (sbb->live_out_set, sbb->live_in_set);
+		mono_bitset_sub_fast (sbb->live_in_set, sbb->kill_set);
+		mono_bitset_union_fast (sbb->live_in_set, sbb->gen_set);
+
+		// Recompute live_out_set of bb, by adding the live_in_set of each successor
+		mono_bitset_union_fast (bb->live_out_set, sbb->live_in_set);
+	}
+}
+
+// For each bblock, compute LiveIn, LiveOut sets tracking liveness for the previously computed global vars
+static void
+interp_compute_pruned_ssa_liveness (TransformData *td)
+{
+	compute_gen_kill_sets (td);
+
+	gboolean changed = TRUE;
+	while (changed) {
+		changed = FALSE;
+		for (int i = 0; i < td->bblocks_count_no_eh; i++) {
+			InterpBasicBlock *bb = td->bblocks [i];
+			guint32 prev_count = mono_bitset_count (bb->live_out_set);
+			recompute_live_out (td, bb);
+			if (prev_count != mono_bitset_count (bb->live_out_set))
+				changed = TRUE;
+		}
+	}
+
+	if (td->verbose_level) {
+		InterpBasicBlock *bb;
+		g_print ("\nBASIC BLOCK LIVENESS:\n");
+		for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
+			unsigned int i;
+			if (!is_bblock_ssa_cfg (td, bb))
+				continue;
+			g_print ("BB%d\n\tLIVE_IN = {", bb->index);
+			mono_bitset_foreach_bit (bb->live_in_set, i, td->renamable_vars_size) {
+				g_print (" %d", td->renamable_vars [i].var_index);
+			}
+			g_print (" }\n\tLIVE_OUT = {", bb->index);
+			mono_bitset_foreach_bit (bb->live_out_set, i, td->renamable_vars_size) {
+				g_print (" %d", td->renamable_vars [i].var_index);
+			}
+			g_print (" }\n");
+		}
+	}
+}
+
+static gboolean
+bb_has_phi (TransformData *td, InterpBasicBlock *bb, int var)
+{
+	InterpInst *ins = bb->first_ins;
+	while (ins) {
+		if (ins->opcode == MINT_PHI) {
+			if (ins->dreg == var)
+				return TRUE;
+		} else if (ins->opcode == MINT_DEAD_PHI) {
+			MonoBitSet *bitset = ins->info.dead_phi_vars;
+			int ext_index = td->vars [var].ext_index;
+			if (mono_bitset_test_fast (bitset, ext_index))
+				return TRUE;
+		} else {
+			// if we have a phi it is at the start of the bb
+			return FALSE;
+		}
+		ins = ins->next;
+	}
+	return FALSE;
+}
+
+static void
+bb_insert_phi (TransformData *td, InterpBasicBlock *bb, int var)
+{
+	InterpInst *first_ins = NULL;
+	// We keep dead phi as first instruction so we can find it quickly
+	if (bb->first_ins && bb->first_ins->opcode == MINT_DEAD_PHI)
+		first_ins = bb->first_ins;
+	InterpInst *phi = interp_insert_ins_bb (td, bb, first_ins, MINT_PHI);
+	if (td->verbose_level)
+		g_print ("BB%d NEW_PHI %d\n", bb->index, var);
+
+	phi->dreg = var;
+	phi->info.args = (int*)mono_mempool_alloc (td->opt_mempool, (bb->in_count + 1) * sizeof (int));
+	int i;
+	for (i = 0; i < bb->in_count; i++)
+		phi->info.args [i] = var;
+	phi->info.args [i] = -1;
+}
+
+static void
+bb_insert_dead_phi (TransformData *td, InterpBasicBlock *bb, int var)
+{
+	MonoBitSet *bitset;
+	if (bb->first_ins && bb->first_ins->opcode == MINT_DEAD_PHI) {
+		bitset = bb->first_ins->info.dead_phi_vars;
+	} else {
+		InterpInst *phi = interp_insert_ins_bb (td, bb, NULL, MINT_DEAD_PHI);
+		gpointer mem = mono_mempool_alloc0 (td->opt_mempool, mono_bitset_alloc_size (td->renamable_vars_size, 0));
+		phi->info.dead_phi_vars = bitset = mono_bitset_mem_new (mem, td->renamable_vars_size, 0);
+	}
+	int ext_index = td->vars [var].ext_index;
+	mono_bitset_set_fast (bitset, ext_index);
+	if (td->verbose_level)
+		g_print ("BB%d NEW_DEAD_PHI %d\n", bb->index, var);
+
+}
+
+static void
+insert_phi_nodes (TransformData *td)
+{
+	if (td->verbose_level)
+		g_print ("\nINSERT PHI NODES:\n");
+	for (unsigned int i = 0; i < td->renamable_vars_size; i++) {
+		if (!td->renamable_vars [i].ssa_global)
+			continue;
+
+		// For every definition of this var, we add a phi node at the start of
+		// all bblocks in the dominance frontier of the defining bblock.
+		int var = td->renamable_vars [i].var_index;
+		GSList *workset = g_slist_copy (td->vars [var].declare_bbs);
+		while (workset) {
+			GSList *old_head = workset;
+			InterpBasicBlock *bb = (InterpBasicBlock*)workset->data;
+			workset = workset->next;
+			g_free (old_head);
+			g_assert (is_bblock_ssa_cfg (td, bb));
+			int j;
+			mono_bitset_foreach_bit (bb->dfrontier, j, td->bb_count) {
+				InterpBasicBlock *bd = td->bblocks [j];
+				g_assert (is_bblock_ssa_cfg (td, bb));
+				if (!bb_has_phi (td, bd, var)) {
+					if (mono_bitset_test_fast (bd->live_in_set, i)) {
+						td->renamable_vars [i].ssa_fixed = TRUE;
+						bb_insert_phi (td, bd, var);
+					} else {
+						// We need this only for vars that are ssa fixed, but it is not clear
+						// if the current var is fixed or not. We will ignore these opcodes if
+						// the var is not actually ssa fixed.
+						bb_insert_dead_phi (td, bd, var);
+					}
+					if (!g_slist_find (workset, bd))
+						workset = g_slist_prepend (workset, bd);
+				}
+			}
+		}
+	}
+}
+
+// Additional fixed vars, in addition to vars that are args to phi nodes
+static void
+insert_tiering_defs (TransformData *td)
+{
+	for (int i = 0; i < td->bblocks_count_no_eh; i++) {
+		InterpBasicBlock *bb = td->bblocks [i];
+		if (!bb->patchpoint_bb)
+			continue;
+
+		// All IL locals live at entry to this bb have to be fixed
+		for (unsigned int k = 0; k < td->renamable_vars_size; k++) {
+			int var_index = td->renamable_vars [k].var_index;
+			if (td->vars [var_index].il_global && mono_bitset_test_fast (bb->live_in_set, k)) {
+				td->renamable_vars [k].ssa_fixed = TRUE;
+
+				// Patchpoints introduce some complications since some variables have to be
+				// accessed from same offset between unoptimized and optimized methods.
+				//
+				// Consider the following scenario
+				// BB0 -> BB2       BB0: TMP <- def; IL_VAR <- TMP
+				// | ^              BB1: Use IL_VAR
+				// v |              BB2: Use IL_VAR
+				// BB1
+				//
+				//     BB1 is a basic block containing a patchpoint, BB0 dominates both BB1 and BB2.
+				// IL_VAR is used both in BB1 and BB2. In BB1, in optimized code, we could normally
+				// replace use of IL_VAR with use of TMP. However, this is incorrect, because TMP
+				// can be allocated at a different offset from IL_VAR and, if we enter the method
+				// from the patchpoint in BB1, the data at var TMP would not be initialized since
+				// we only copy the IL var space.
+				//     Even if we prevent the copy propagation in BB1, then tiering is still broken.
+				// In BB2 we could replace use of IL_VAR with TMP, and we end up hitting the same problem.
+				// Optimized code will attempt to access value of IL_VAR from the offset of TMP_VAR,
+				// which is not initialized if we enter from the patchpoint in BB1.
+				//     We solve these issues by inserting a MINT_DEF_TIER_VAR in BB1. This instruction
+				// prevents cprop of the IL_VAR in the patchpoint bblock since MINT_DEF_TIER_VAR is seen
+				// as a redefinition. In addition to that, in BB2 we now have 2 reaching definitions for
+				// IL_VAR, the original one from BB0 and the one from patchpoint bblock from BB1. This
+				// will force a phi definition in BB2 and we will once again be force to access IL_VAR
+				// from the original offset that is equal to the one in unoptimized method.
+				InterpInst *def = interp_insert_ins_bb (td, bb, NULL, MINT_DEF_TIER_VAR);
+				def->sregs [0] = var_index;
+				def->dreg = var_index;
+				InterpVar *var_data = &td->vars [var_index];
+				// Record the new declaration for this var. Phi nodes insertion phase will account for this
+				if (!g_slist_find (var_data->declare_bbs, bb))
+					var_data->declare_bbs = g_slist_prepend (var_data->declare_bbs, bb);
+				if (td->verbose_level) {
+					g_print ("insert patchpoint var define in BB%d:\n\t", bb->index);
+					interp_dump_ins (def, td->data_items);
+				}
+			}
+		}
+	}
+}
+
+static int
+get_renamed_var (TransformData *td, int var, gboolean def_arg)
+{
+	int ext_index = td->vars [var].ext_index;
+	g_assert (ext_index != -1);
+	int renamed_var = interp_create_var (td, td->vars [var].type);
+	td->vars [renamed_var].def_arg = def_arg;
+
+	if (td->renamable_vars [ext_index].ssa_fixed) {
+		td->vars [renamed_var].renamed_ssa_fixed = TRUE;
+		interp_create_renamed_fixed_var (td, renamed_var, var);
+	} else {
+		// Renamed var reference the orignal var through the ext_index
+		td->vars [renamed_var].ext_index = ext_index;
+	}
+	td->renamable_vars [ext_index].ssa_stack = g_slist_prepend (td->renamable_vars [ext_index].ssa_stack, (gpointer)(gsize)renamed_var);
+	return renamed_var;
+}
+
+static void
+rename_ins_var_cb (TransformData *td, int *pvar, gpointer data)
+{
+	int var = *pvar;
+	int ext_index = td->vars [var].ext_index;
+	if (ext_index != -1) {
+		int renamed_var = (int)(gsize)td->renamable_vars [ext_index].ssa_stack->data;
+		g_assert (renamed_var != -1);
+		*pvar = renamed_var;
+	}
+}
+
+static void
+rename_phi_args_in_out_bbs (TransformData *td, InterpBasicBlock *bb)
+{
+        for (int i = 0; i < bb->out_count; i++) {
+                InterpBasicBlock *bb_out = bb->out_bb [i];
+
+		int aindex;
+                for (aindex = 0; aindex < bb_out->in_count; aindex++)
+                        if (bb_out->in_bb [aindex] == bb)
+                                break;
+
+		for (InterpInst *ins = bb_out->first_ins; ins != NULL; ins = ins->next) {
+			if (ins->opcode == MINT_PHI) {
+				int var = ins->info.args [aindex];
+				int ext_index = td->vars [var].ext_index;
+				GSList *stack = td->renamable_vars [ext_index].ssa_stack;
+				ins->info.args [aindex] = (int)(gsize)stack->data;
+			} else if (ins->opcode == MINT_DEAD_PHI) {
+				continue;
+			} else if (ins->opcode != MINT_NOP) {
+				break;
+			}
+                }
+        }
+}
+
+static void
+rename_vars_in_bb_start (TransformData *td, InterpBasicBlock *bb)
+{
+	InterpInst *ins;
+
+	// Rename vars defined with MINT_PHI
+	for (ins = bb->first_ins; ins != NULL; ins = ins->next) {
+		if (ins->opcode == MINT_PHI) {
+			ins->dreg = get_renamed_var (td, ins->dreg, FALSE);
+		} else if (ins->opcode == MINT_DEAD_PHI) {
+			unsigned int ext_index;
+			mono_bitset_foreach_bit (ins->info.dead_phi_vars, ext_index, td->renamable_vars_size) {
+				if (td->renamable_vars [ext_index].ssa_fixed) {
+					// we push an invalid var that will be just a marker for marking var live limits
+					td->renamable_vars [ext_index].ssa_stack = g_slist_prepend (td->renamable_vars [ext_index].ssa_stack, (gpointer)(gsize)-1);
+				}
+			}
+		} else {
+			break;
+		}
+	}
+
+	InterpLivenessPosition current_liveness;
+	current_liveness.bb_dfs_index = bb->dfs_index;
+	current_liveness.ins_index = 0;
+
+	// Use renamed definition for sources
+	for (; ins != NULL; ins = ins->next) {
+		if (interp_ins_is_nop (ins) || ins->opcode == MINT_DEAD_PHI)
+			continue;
+		ins->flags |= INTERP_INST_FLAG_LIVENESS_MARKER;
+		current_liveness.ins_index++;
+
+		interp_foreach_ins_svar (td, ins, NULL, rename_ins_var_cb);
+		if (!mono_interp_op_dregs [ins->opcode] || td->vars [ins->dreg].ext_index == -1)
+			continue;
+
+		if (ins->opcode == MINT_DEF_ARG) {
+			ins->dreg = get_renamed_var (td, ins->dreg, TRUE);
+		} else if (mono_interp_op_dregs [ins->opcode]) {
+			g_assert (!td->vars [ins->dreg].renamed_ssa_fixed);
+			int renamable_ext_index = td->vars [ins->dreg].ext_index;
+			if (td->renamable_vars [renamable_ext_index].ssa_fixed &&
+					td->renamable_vars [renamable_ext_index].ssa_stack) {
+				// Mark the exact liveness end limit for the ssa fixed var that is overwritten (the old entry on the stack)
+				int renamed_var = (int)(gsize)td->renamable_vars [renamable_ext_index].ssa_stack->data;
+				if (renamed_var != -1) {
+					g_assert (td->vars [renamed_var].renamed_ssa_fixed);
+					int renamed_var_ext = td->vars [renamed_var].ext_index;
+					InterpLivenessPosition *liveness_ptr = (InterpLivenessPosition*)mono_mempool_alloc (td->opt_mempool, sizeof (InterpLivenessPosition));
+					*liveness_ptr = current_liveness;
+					td->renamed_fixed_vars [renamed_var_ext].live_limit_bblocks = g_slist_prepend (td->renamed_fixed_vars [renamed_var_ext].live_limit_bblocks, liveness_ptr);
+				}
+			}
+			ins->dreg = get_renamed_var (td, ins->dreg, FALSE);
+		}
+	}
+
+	rename_phi_args_in_out_bbs (td, bb);
+}
+
+static void
+rename_vars_in_bb_end (TransformData *td, InterpBasicBlock *bb)
+{
+	InterpInst *ins;
+
+	// All vars currently on the ssa stack are live until the end of the bblock
+	for (unsigned int i = 0; i < td->renamable_vars_size; i++) {
+		if (td->renamable_vars [i].ssa_fixed && td->renamable_vars [i].ssa_stack) {
+			int renamed_var = (int)(gsize)td->renamable_vars [i].ssa_stack->data;
+			if (renamed_var != -1) {
+				g_assert (td->vars [renamed_var].renamed_ssa_fixed);
+				int renamed_var_ext = td->vars [renamed_var].ext_index;
+				if (!td->renamed_fixed_vars [renamed_var_ext].live_out_bblocks) {
+					gpointer mem = mono_mempool_alloc0 (td->opt_mempool, mono_bitset_alloc_size (td->bblocks_count_no_eh, 0));
+					td->renamed_fixed_vars [renamed_var_ext].live_out_bblocks = mono_bitset_mem_new (mem, td->bblocks_count_no_eh, 0);
+				}
+
+				mono_bitset_set_fast (td->renamed_fixed_vars [renamed_var_ext].live_out_bblocks, bb->dfs_index);
+			}
+		}
+	}
+
+	// Pop from the stack any new vars defined in this bblock
+	for (ins = bb->first_ins; ins != NULL; ins = ins->next) {
+		if (mono_interp_op_dregs [ins->opcode]) {
+			int ext_index = td->vars [ins->dreg].ext_index;
+			if (ext_index == -1)
+				continue;
+			if (td->vars [ins->dreg].renamed_ssa_fixed)
+				ext_index = td->renamed_fixed_vars [ext_index].renamable_var_ext_index;
+			GSList *prev_head = td->renamable_vars [ext_index].ssa_stack;
+			td->renamable_vars [ext_index].ssa_stack = prev_head->next;
+			g_free (prev_head);
+		} else if (ins->opcode == MINT_DEAD_PHI) {
+			unsigned int ext_index;
+			mono_bitset_foreach_bit (ins->info.dead_phi_vars, ext_index, td->renamable_vars_size) {
+				if (td->renamable_vars [ext_index].ssa_fixed) {
+					GSList *prev_head = td->renamable_vars [ext_index].ssa_stack;
+					td->renamable_vars [ext_index].ssa_stack = prev_head->next;
+					g_free (prev_head);
+				}
+			}
+			interp_clear_ins (ins);
+		}
+	}
+
+}
+
+static void
+rename_vars (TransformData *td)
+{
+	int next_stack_index = 0;
+	InterpBasicBlock **stack = (InterpBasicBlock**)g_malloc0 (sizeof (InterpBasicBlock*) * td->bblocks_count_no_eh);
+	gboolean *bb_status = (gboolean*)g_malloc0 (sizeof (InterpBasicBlock*) * td->bblocks_count_no_eh);
+
+	stack [next_stack_index++] = td->entry_bb;
+
+	while (next_stack_index > 0) {
+		next_stack_index--;
+		InterpBasicBlock *bb = stack [next_stack_index];
+
+		if (!bb_status [bb->dfs_index]) {
+			rename_vars_in_bb_start (td, bb);
+			bb_status [bb->dfs_index] = TRUE;
+			stack [next_stack_index++] = bb;
+
+			// Rename recursively every successor of bb in the dominator tree
+			GSList *dominated = bb->dominated;
+			while (dominated) {
+				InterpBasicBlock *dominated_bb = (InterpBasicBlock*)dominated->data;
+				g_assert (!bb_status [dominated_bb->dfs_index]);
+				stack [next_stack_index++] = dominated_bb;
+				dominated = dominated->next;
+			}
+		} else {
+			// We reach this entry after all the successors have been processed
+			rename_vars_in_bb_end (td, bb);
+		}
+	}
+
+	g_free (stack);
+	g_free (bb_status);
+
+	if (td->verbose_level) {
+		g_print ("\nFIXED SSA VARS LIVENESS LIMIT:\n");
+		for (unsigned int i = 0; i < td->renamed_fixed_vars_size; i++) {
+			g_print ("FIXED VAR %d\n\tNO LIVE LIMIT BBLOCKS: {", td->renamed_fixed_vars [i].var_index);
+			MonoBitSet *live_out_bblocks = td->renamed_fixed_vars [i].live_out_bblocks;
+			if (live_out_bblocks) {
+				int j;
+				mono_bitset_foreach_bit (live_out_bblocks, j, td->bblocks_count_no_eh) {
+					g_print (" BB%d", td->bblocks [j]->index);
+				}
+			}
+			g_print (" }\n");
+			g_print ("\tLIVE LIMIT BBLOCKS: {");
+			GSList *live_limit_bblocks = td->renamed_fixed_vars [i].live_limit_bblocks;
+			while (live_limit_bblocks) {
+				InterpLivenessPosition *live_limit = (InterpLivenessPosition*)live_limit_bblocks->data;
+				g_print (" (BB%d, %d)", td->bblocks [live_limit->bb_dfs_index]->index, live_limit->ins_index);
+				live_limit_bblocks = live_limit_bblocks->next;
+			}
+			g_print (" }\n");
+		}
+	}
+}
+
+static void
+interp_compute_ssa (TransformData *td)
+{
+	if (td->verbose_level) {
+		g_print ("\nIR before SSA compute:\n");
+		mono_interp_print_td_code (td);
+	}
+
+	MONO_TIME_TRACK (mono_interp_stats.ssa_compute_dominance_time, interp_compute_dominance (td));
+
+	interp_compute_ssa_vars (td);
+
+	MONO_TIME_TRACK (mono_interp_stats.ssa_compute_global_vars_time, interp_compute_global_vars (td));
+
+	MONO_TIME_TRACK (mono_interp_stats.ssa_compute_pruned_liveness_time, interp_compute_pruned_ssa_liveness (td));
+
+	insert_tiering_defs (td);
+
+	insert_phi_nodes (td);
+
+	MONO_TIME_TRACK (mono_interp_stats.ssa_rename_vars_time, rename_vars (td));
+
+	if (td->verbose_level) {
+		g_print ("\nIR after SSA compute:\n");
+		mono_interp_print_td_code (td);
+	}
+}
+
+static void
+revert_ssa_rename_cb (TransformData *td, int *pvar, gpointer data)
+{
+	int var = *pvar;
+	int ext_index = td->vars [var].ext_index;
+	if (ext_index == -1)
+		return;
+
+	int new_var = -1;
+	if (td->vars [var].renamed_ssa_fixed) {
+		int renamable_var_ext_index = td->renamed_fixed_vars [ext_index].renamable_var_ext_index;
+		new_var = td->renamable_vars [renamable_var_ext_index].var_index;
+	} else if (td->vars [var].def_arg) {
+		new_var = td->renamable_vars [ext_index].var_index;
+	}
+
+	if (new_var != -1) {
+		*pvar = new_var;
+		// Offset allocator checks ref_count to detect single use vars. Keep it updated
+		td->var_values [new_var].ref_count += td->var_values [var].ref_count;
+	}
+}
+
+static void
+interp_exit_ssa (TransformData *td)
+{
+	// Remove all MINT_PHI opcodes and revert ssa renaming
+	for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
+		InterpInst *ins;
+		for (ins = bb->first_ins; ins != NULL; ins = ins->next) {
+			if (ins->opcode == MINT_PHI || ins->opcode == MINT_DEAD_PHI)
+				ins->opcode = MINT_NOP;
+			else
+				interp_foreach_ins_var (td, ins, NULL, revert_ssa_rename_cb);
+
+			ins->flags &= ~INTERP_INST_FLAG_LIVENESS_MARKER;
+		}
+	}
+
+	// Free memory and restore state
+	for (unsigned int i = 0; i < td->vars_size; i++) {
+		if (td->vars [i].declare_bbs) {
+			g_slist_free (td->vars [i].declare_bbs);
+			td->vars [i].declare_bbs = NULL;
+		}
+		td->vars [i].ext_index = -1;
+	}
+
+	for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb)	{
+		if (bb->dominated) {
+			g_slist_free (bb->dominated);
+			bb->dominated = NULL;
+		}
+		bb->gen_set = NULL;
+		bb->kill_set = NULL;
+		bb->live_in_set = NULL;
+		bb->live_out_set = NULL;
+	}
+
+	for (unsigned int i = 0; i < td->renamable_vars_size; i++) {
+		if (td->renamable_vars [i].ssa_stack) {
+			g_slist_free (td->renamable_vars [i].ssa_stack);
+			td->renamable_vars [i].ssa_stack = NULL;
+		}
+	}
+	td->renamable_vars_size = 0;
+
+	for (unsigned int i = 0; i < td->renamed_fixed_vars_size; i++) {
+		if (td->renamed_fixed_vars [i].live_limit_bblocks) {
+			g_slist_free (td->renamed_fixed_vars [i].live_limit_bblocks);
+			td->renamed_fixed_vars [i].live_limit_bblocks = NULL;
+		}
+	}
+	td->renamed_fixed_vars_size = 0;
+}
+
+/*
+ * BASIC BLOCK OPTIMIZATION
+ */
+
 static void
 mark_bb_as_dead (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock *replace_bb)
 {
@@ -544,6 +1580,8 @@ mark_bb_as_dead (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock *repl
 			break;
 	}
 
+	if (bb->dominated)
+		g_slist_free (bb->dominated);
 	bb->dead = TRUE;
 	// bb should never be used/referenced after this
 }
@@ -592,6 +1630,14 @@ interp_merge_bblocks (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock
 		}
 	}
 
+#if defined(TARGET_WASM)
+	// Copy jiterpreter data
+	if (bbadd->backwards_branch_target)
+		bb->backwards_branch_target = TRUE;
+	if (bbadd->contains_call_instruction)
+		bb->contains_call_instruction = TRUE;
+#endif
+
 	mark_bb_as_dead (td, bbadd, bb);
 }
 
@@ -618,28 +1664,45 @@ interp_unlink_bblocks (InterpBasicBlock *from, InterpBasicBlock *to)
 	to->in_count--;
 }
 
-static gboolean
-interp_remove_bblock (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock *prev_bb)
+static void
+interp_handle_unreachable_bblock (TransformData *td, InterpBasicBlock *bb)
 {
-	gboolean needs_cprop = FALSE;
-
 	for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) {
 		if (ins->opcode == MINT_LDLOCA_S) {
 			td->vars [ins->sregs [0]].indirects--;
 			if (!td->vars [ins->sregs [0]].indirects) {
-				// We can do cprop now through this local. Run cprop again.
-				needs_cprop = TRUE;
+				if (td->verbose_level)
+					g_print ("Remove bblock %d, var %d no longer indirect\n", bb->index, ins->sregs [0]);
+				td->need_optimization_retry = TRUE;
 			}
 		}
+
+		// If preserve is set, even if we know this bblock is unreachable, we still have to keep
+		// it alive (for now at least). We just remove all instructions from it in this case.
+		if (bb->preserve)
+			interp_clear_ins (ins);
 	}
+}
+
+static void
+interp_remove_bblock (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock *prev_bb)
+{
 	while (bb->in_count)
 		interp_unlink_bblocks (bb->in_bb [0], bb);
 	while (bb->out_count)
 		interp_unlink_bblocks (bb, bb->out_bb [0]);
 	prev_bb->next_bb = bb->next_bb;
 	mark_bb_as_dead (td, bb, bb->next_bb);
+}
 
-	return needs_cprop;
+static int
+get_bb_links_capacity (int links)
+{
+	if (links <= 2)
+		return links;
+	// Return the next power of 2 bigger or equal to links
+	int leading_zero = interp_intrins_clz_i4 (links - 1);
+	return 1 << (32 - leading_zero);
 }
 
 void
@@ -655,12 +1718,15 @@ interp_link_bblocks (TransformData *td, InterpBasicBlock *from, InterpBasicBlock
 		}
 	}
 	if (!found) {
-		InterpBasicBlock **newa = (InterpBasicBlock**)mono_mempool_alloc (td->mempool, sizeof (InterpBasicBlock*) * (from->out_count + 1));
-		for (i = 0; i < from->out_count; ++i)
-			newa [i] = from->out_bb [i];
-		newa [i] = to;
+		int prev_capacity = get_bb_links_capacity (from->out_count);
+		int new_capacity = get_bb_links_capacity (from->out_count + 1);
+		if (new_capacity > prev_capacity) {
+			InterpBasicBlock **newa = (InterpBasicBlock**)mono_mempool_alloc (td->mempool, new_capacity * sizeof (InterpBasicBlock*));
+			memcpy (newa, from->out_bb, from->out_count * sizeof (InterpBasicBlock*));
+			from->out_bb = newa;
+		}
+		from->out_bb [from->out_count] = to;
 		from->out_count++;
-		from->out_bb = newa;
 	}
 
 	found = FALSE;
@@ -671,38 +1737,36 @@ interp_link_bblocks (TransformData *td, InterpBasicBlock *from, InterpBasicBlock
 		}
 	}
 	if (!found) {
-		InterpBasicBlock **newa = (InterpBasicBlock**)mono_mempool_alloc (td->mempool, sizeof (InterpBasicBlock*) * (to->in_count + 1));
-		for (i = 0; i < to->in_count; ++i)
-			newa [i] = to->in_bb [i];
-		newa [i] = from;
+		int prev_capacity = get_bb_links_capacity (to->in_count);
+		int new_capacity = get_bb_links_capacity (to->in_count + 1);
+		if (new_capacity > prev_capacity) {
+			InterpBasicBlock **newa = (InterpBasicBlock**)mono_mempool_alloc (td->mempool, new_capacity * sizeof (InterpBasicBlock*));
+			memcpy (newa, to->in_bb, to->in_count * sizeof (InterpBasicBlock*));
+			to->in_bb = newa;
+		}
+		to->in_bb [to->in_count] = from;
 		to->in_count++;
-		to->in_bb = newa;
 	}
 }
 
 static void
 interp_mark_reachable_bblocks (TransformData *td)
 {
-	InterpBasicBlock **queue = mono_mempool_alloc0 (td->mempool, td->bb_count * sizeof (InterpBasicBlock*));
+	InterpBasicBlock **queue = g_malloc0 (td->bb_count * sizeof (InterpBasicBlock*));
 	InterpBasicBlock *current;
 	int cur_index = 0;
 	int next_position = 0;
 
-	// FIXME There is no need to force eh bblocks to remain alive
 	current = td->entry_bb;
 	while (current != NULL) {
-		if (current->eh_block || current->patchpoint_data) {
-			queue [next_position++] = current;
-			current->reachable = TRUE;
-		} else {
-			current->reachable = FALSE;
-		}
+		current->reachable = FALSE;
 		current = current->next_bb;
 	}
 
 	queue [next_position++] = td->entry_bb;
 	td->entry_bb->reachable = TRUE;
 
+retry:
 	// We have the roots, traverse everything else
 	while (cur_index < next_position) {
 		current = queue [cur_index++];
@@ -714,6 +1778,25 @@ interp_mark_reachable_bblocks (TransformData *td)
 			}
 		}
 	}
+
+	if (td->header->num_clauses) {
+		gboolean needs_retry = FALSE;
+		current = td->entry_bb;
+		while (current != NULL) {
+			if (current->try_bblock && !current->reachable && current->try_bblock->reachable) {
+				// Try bblock is reachable and the handler is not yet marked
+				queue [next_position++] = current;
+				current->reachable = TRUE;
+				needs_retry = TRUE;
+			}
+			current = current->next_bb;
+		}
+
+		if (needs_retry)
+			goto retry;
+	}
+
+	g_free (queue);
 }
 
 /**
@@ -804,7 +1887,13 @@ interp_reorder_bblocks (TransformData *td)
 {
 	InterpBasicBlock *bb;
 	for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
-		if (bb->eh_block)
+		if (bb->preserve)
+			continue;
+		// We do optimizations below where we reduce the in count of bb, but it is ideal to have
+		// this bblock remain alive so we can correctly resolve mapping from unoptimized method.
+		// We could in theory address this and attempt to remove bb, but this scenario is extremely
+		// rare and doesn't seem worth the investment.
+		if (bb->patchpoint_data)
 			continue;
 		InterpInst *first = interp_first_ins (bb);
 		if (!first)
@@ -883,7 +1972,8 @@ interp_reorder_bblocks (TransformData *td)
 				InterpInst *last_ins = interp_last_ins (in_bb);
 				if (last_ins && (MINT_IS_CONDITIONAL_BRANCH (last_ins->opcode) ||
 						MINT_IS_UNCONDITIONAL_BRANCH (last_ins->opcode)) &&
-						last_ins->info.target_bb == bb) {
+						last_ins->info.target_bb == bb &&
+						in_bb != bb) {
 					InterpBasicBlock *target_bb = first->info.target_bb;
 					last_ins->info.target_bb = target_bb;
 					interp_unlink_bblocks (in_bb, bb);
@@ -910,11 +2000,10 @@ interp_reorder_bblocks (TransformData *td)
 }
 
 // Traverse the list of basic blocks and merge adjacent blocks
-static gboolean
+static void
 interp_optimize_bblocks (TransformData *td)
 {
 	InterpBasicBlock *bb = td->entry_bb;
-	gboolean needs_cprop = FALSE;
 
 	interp_reorder_bblocks (td);
 
@@ -925,82 +2014,79 @@ interp_optimize_bblocks (TransformData *td)
 		if (!next_bb)
 			break;
 		if (!next_bb->reachable) {
-			if (td->verbose_level)
-				g_print ("Removed BB%d\n", next_bb->index);
-			needs_cprop |= interp_remove_bblock (td, next_bb, bb);
-			continue;
-		} else if (bb->out_count == 1 && bb->out_bb [0] == next_bb && next_bb->in_count == 1 && !next_bb->eh_block && !next_bb->patchpoint_data) {
+			interp_handle_unreachable_bblock (td, next_bb);
+			if (next_bb->preserve) {
+				if (td->verbose_level)
+					g_print ("Removed BB%d, cleared instructions only\n", next_bb->index);
+			} else {
+				if (td->verbose_level)
+					g_print ("Removed BB%d\n", next_bb->index);
+				interp_remove_bblock (td, next_bb, bb);
+				continue;
+			}
+		} else if (bb->out_count == 1 && bb->out_bb [0] == next_bb && next_bb->in_count == 1 && !next_bb->preserve && !next_bb->patchpoint_data) {
 			g_assert (next_bb->in_bb [0] == bb);
 			interp_merge_bblocks (td, bb, next_bb);
 			if (td->verbose_level)
 				g_print ("Merged BB%d and BB%d\n", bb->index, next_bb->index);
-			needs_cprop = TRUE;
 			continue;
 		}
 
 		bb = next_bb;
 	}
-	return needs_cprop;
 }
 
-static gboolean
-interp_local_deadce (TransformData *td)
+static void
+decrement_ref_count (TransformData *td, int *varp, gpointer data)
 {
-	int *local_ref_count = td->local_ref_count;
-	gboolean needs_dce = FALSE;
-	gboolean needs_cprop = FALSE;
+	int var = *varp;
+	td->var_values [var].ref_count--;
+	// FIXME we could clear recursively
+	if (!td->var_values [var].ref_count)
+		*(gboolean*)data = TRUE;
+}
 
-	for (unsigned int i = 0; i < td->vars_size; i++) {
-		g_assert (local_ref_count [i] >= 0);
-		g_assert (td->vars [i].indirects >= 0);
-		if (td->vars [i].indirects || td->vars [i].dead)
-			continue;
-		if (!local_ref_count [i]) {
-			needs_dce = TRUE;
-			td->vars [i].dead = TRUE;
-		} else if (!td->vars [i].unknown_use) {
-			if (!td->vars [i].local_only) {
-				// The value of this var is not passed between multiple basic blocks
-				td->vars [i].local_only = TRUE;
-				if (td->verbose_level)
-					g_print ("Var %d is local only\n", i);
-				needs_cprop = TRUE;
-			}
-		}
-		td->vars [i].unknown_use = FALSE;
-	}
+static void
+interp_var_deadce (TransformData *td)
+{
+	gboolean need_retry;
 
-	// Return early if all locals are alive
-	if (!needs_dce)
-		return needs_cprop;
+retry:
+	need_retry = FALSE;
 
-	// Kill instructions that don't use stack and are storing into dead locals
+	// Kill instructions that are storing into unreferenced vars
 	for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
 		for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) {
 			if (MINT_NO_SIDE_EFFECTS (ins->opcode) ||
 					ins->opcode == MINT_LDLOCA_S) {
 				int dreg = ins->dreg;
-				if (td->vars [dreg].dead) {
+				if (var_has_indirects (td, dreg))
+					continue;
+
+				if (!td->var_values [dreg].ref_count) {
 					if (td->verbose_level) {
 						g_print ("kill dead ins:\n\t");
 						interp_dump_ins (ins, td->data_items);
 					}
-
 					if (ins->opcode == MINT_LDLOCA_S) {
 						td->vars [ins->sregs [0]].indirects--;
 						if (!td->vars [ins->sregs [0]].indirects) {
-							// We can do cprop now through this local. Run cprop again.
-							needs_cprop = TRUE;
+							if (td->verbose_level)
+								g_print ("Kill ldloca, var %d no longer indirect\n", ins->sregs [0]);
+							td->need_optimization_retry = TRUE;
 						}
 					}
+
+					interp_foreach_ins_svar (td, ins, &need_retry, decrement_ref_count);
+
 					interp_clear_ins (ins);
-					// FIXME This is lazy. We should update the ref count for the sregs and redo deadce.
-					needs_cprop = TRUE;
 				}
 			}
 		}
 	}
-	return needs_cprop;
+
+	if (need_retry)
+		goto retry;
 }
 
 static InterpInst*
@@ -1071,15 +2157,16 @@ interp_get_mt_for_ldind (int ldind_op)
 		break;
 
 static InterpInst*
-interp_fold_unop (TransformData *td, InterpVarValue *local_defs, InterpInst *ins)
+interp_fold_unop (TransformData *td, InterpInst *ins)
 {
-	int *local_ref_count = td->local_ref_count;
 	// ins should be an unop, therefore it should have a single dreg and a single sreg
 	int dreg = ins->dreg;
 	int sreg = ins->sregs [0];
-	InterpVarValue *val = &local_defs [sreg];
+	InterpVarValue *val = get_var_value (td, sreg);
 	InterpVarValue result;
 
+	if (!val)
+		return ins;
 	if (val->type != VAR_VALUE_I4 && val->type != VAR_VALUE_I8)
 		return ins;
 
@@ -1151,10 +2238,10 @@ interp_fold_unop (TransformData *td, InterpVarValue *local_defs, InterpInst *ins
 		interp_dump_ins (ins, td->data_items);
 	}
 
-	local_ref_count [sreg]--;
-	result.ins = ins;
-	result.ref_count = 0;
-	local_defs [dreg] = result;
+	td->var_values [sreg].ref_count--;
+	result.def = ins;
+	result.ref_count = td->var_values [dreg].ref_count; // preserve ref count
+	td->var_values [dreg] = result;
 
 	return ins;
 }
@@ -1174,13 +2261,14 @@ interp_fold_unop (TransformData *td, InterpVarValue *local_defs, InterpInst *ins
 		break;
 
 static InterpInst*
-interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpVarValue *local_defs, InterpInst *ins)
+interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpInst *ins)
 {
-	int *local_ref_count = td->local_ref_count;
 	// ins should be an unop conditional branch, therefore it should have a single sreg
 	int sreg = ins->sregs [0];
-	InterpVarValue *val = &local_defs [sreg];
+	InterpVarValue *val = get_var_value (td, sreg);
 
+	if (!val)
+		return ins;
 	if (val->type != VAR_VALUE_I4 && val->type != VAR_VALUE_I8 && val->type != VAR_VALUE_NON_NULL)
 		return ins;
 
@@ -1212,7 +2300,7 @@ interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpVarVal
 		interp_dump_ins (ins, td->data_items);
 	}
 
-	local_ref_count [sreg]--;
+	td->var_values [sreg].ref_count--;
 	return ins;
 }
 
@@ -1243,19 +2331,65 @@ interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpVarVal
 
 
 static InterpInst*
-interp_fold_binop (TransformData *td, InterpVarValue *local_defs, InterpInst *ins, gboolean *folded)
+interp_fold_binop (TransformData *td, InterpInst *ins, gboolean *folded)
 {
-	int *local_ref_count = td->local_ref_count;
 	// ins should be a binop, therefore it should have a single dreg and two sregs
 	int dreg = ins->dreg;
 	int sreg1 = ins->sregs [0];
 	int sreg2 = ins->sregs [1];
-	InterpVarValue *val1 = &local_defs [sreg1];
-	InterpVarValue *val2 = &local_defs [sreg2];
+	InterpVarValue *val1 = get_var_value (td, sreg1);
+	InterpVarValue *val2 = get_var_value (td, sreg2);
 	InterpVarValue result;
 
 	*folded = FALSE;
 
+	if (!val1 || !val2)
+		return ins;
+
+	if ((val1->type == VAR_VALUE_I4 || val1->type == VAR_VALUE_I8) && val2->type == VAR_VALUE_NON_NULL) {
+		gint64 imm = (val1->type == VAR_VALUE_I4) ? (gint64)val1->i : val1->l;
+		if (imm == 0) {
+			result.type = VAR_VALUE_NONE;
+			switch (ins->opcode) {
+				case MINT_CEQ_I4:
+				case MINT_CEQ_I8:
+				case MINT_CGT_UN_I4:
+				case MINT_CGT_UN_I8:
+					result.type = VAR_VALUE_I4;
+					result.i = 0;
+					goto fold_ok;
+				case MINT_CNE_I4:
+				case MINT_CNE_I8:
+				case MINT_CLT_UN_I4:
+				case MINT_CLT_UN_I8:
+					result.type = VAR_VALUE_I4;
+					result.i = 1;
+					goto fold_ok;
+			}
+		}
+	} else if (val1->type == VAR_VALUE_NON_NULL && (val2->type == VAR_VALUE_I4 || val2->type == VAR_VALUE_I8)) {
+		gint64 imm = (val2->type == VAR_VALUE_I4) ? (gint64)val2->i : val2->l;
+		if (imm == 0) {
+			result.type = VAR_VALUE_NONE;
+			switch (ins->opcode) {
+				case MINT_CNE_I4:
+				case MINT_CNE_I8:
+				case MINT_CGT_UN_I4:
+				case MINT_CGT_UN_I8:
+					result.type = VAR_VALUE_I4;
+					result.i = 1;
+					goto fold_ok;
+				case MINT_CEQ_I4:
+				case MINT_CEQ_I8:
+				case MINT_CLT_UN_I4:
+				case MINT_CLT_UN_I8:
+					result.type = VAR_VALUE_I4;
+					result.i = 0;
+					goto fold_ok;
+			}
+		}
+	}
+
 	if (val1->type != VAR_VALUE_I4 && val1->type != VAR_VALUE_I8)
 		return ins;
 	if (val2->type != VAR_VALUE_I4 && val2->type != VAR_VALUE_I8)
@@ -1323,6 +2457,7 @@ interp_fold_binop (TransformData *td, InterpVarValue *local_defs, InterpInst *in
 			return ins;
 	}
 
+fold_ok:
 	// We were able to compute the result of the ins instruction. We replace the binop
 	// with a LDC of the constant. We leave alone the sregs of this instruction, for
 	// deadce to kill the instructions initializing them.
@@ -1339,11 +2474,12 @@ interp_fold_binop (TransformData *td, InterpVarValue *local_defs, InterpInst *in
 		interp_dump_ins (ins, td->data_items);
 	}
 
-	local_ref_count [sreg1]--;
-	local_ref_count [sreg2]--;
-	result.ins = ins;
-	result.ref_count = 0;
-	local_defs [dreg] = result;
+	td->var_values [sreg1].ref_count--;
+	td->var_values [sreg2].ref_count--;
+	result.def = ins;
+	result.ref_count = td->var_values [dreg].ref_count; // preserve ref count
+	td->var_values [dreg] = result;
+
 	return ins;
 }
 
@@ -1366,15 +2502,16 @@ interp_fold_binop (TransformData *td, InterpVarValue *local_defs, InterpInst *in
 		break;
 
 static InterpInst*
-interp_fold_binop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpVarValue *local_defs, InterpInst *ins)
+interp_fold_binop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpInst *ins)
 {
-	int *local_ref_count = td->local_ref_count;
 	// ins should be a conditional binop, therefore it should have only two sregs
 	int sreg1 = ins->sregs [0];
 	int sreg2 = ins->sregs [1];
-	InterpVarValue *val1 = &local_defs [sreg1];
-	InterpVarValue *val2 = &local_defs [sreg2];
+	InterpVarValue *val1 = get_var_value (td, sreg1);
+	InterpVarValue *val2 = get_var_value (td, sreg2);
 
+	if (!val1 || !val2)
+		return ins;
 	if (val1->type != VAR_VALUE_I4 && val1->type != VAR_VALUE_I8)
 		return ins;
 	if (val2->type != VAR_VALUE_I4 && val2->type != VAR_VALUE_I8)
@@ -1411,8 +2548,8 @@ interp_fold_binop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpVarVa
 		interp_dump_ins (ins, td->data_items);
 	}
 
-	local_ref_count [sreg1]--;
-	local_ref_count [sreg2]--;
+	td->var_values [sreg1].ref_count--;
+	td->var_values [sreg2].ref_count--;
 	return ins;
 }
 
@@ -1432,15 +2569,15 @@ write_v128_element (gpointer v128_addr, InterpVarValue *val, int index, int el_s
 }
 
 static InterpInst*
-interp_fold_simd_create (TransformData *td, InterpBasicBlock *cbb, InterpVarValue *local_defs, InterpInst *ins)
+interp_fold_simd_create (TransformData *td, InterpBasicBlock *cbb, InterpInst *ins)
 {
-	int *local_ref_count = td->local_ref_count;
-
 	int *args = ins->info.call_info->call_args;
 	int index = 0;
 	int var = args [index];
 	while (var != -1) {
-		InterpVarValue *val = &local_defs [var];
+		InterpVarValue *val = get_var_value (td, var);
+		if (!val)
+			return ins;
 		if (val->type != VAR_VALUE_I4 && val->type != VAR_VALUE_I8 && val->type != VAR_VALUE_R4)
 			return ins;
 		index++;
@@ -1461,10 +2598,9 @@ interp_fold_simd_create (TransformData *td, InterpBasicBlock *cbb, InterpVarValu
 	index = 0;
 	var = args [index];
 	while (var != -1) {
-		InterpVarValue *val = &local_defs [var];
+		InterpVarValue *val = &td->var_values [var];
 		write_v128_element (v128_addr, val, index, el_size);
 		val->ref_count--;
-		local_ref_count [var]--;
 		index++;
 		var = args [index];
 	}
@@ -1474,106 +2610,200 @@ interp_fold_simd_create (TransformData *td, InterpBasicBlock *cbb, InterpVarValu
 		interp_dump_ins (ins, td->data_items);
 	}
 
-	local_defs [dreg].ins = ins;
-	local_defs [dreg].type = VAR_VALUE_NONE;
+	td->var_values [dreg].def = ins;
+	td->var_values [dreg].type = VAR_VALUE_NONE;
 
 	return ins;
 }
 
-static void
-cprop_sreg (TransformData *td, InterpInst *ins, int *psreg, InterpVarValue *local_defs)
+static gboolean
+can_extend_ssa_var_liveness (TransformData *td, int var, InterpLivenessPosition cur_liveness)
 {
-	int *local_ref_count = td->local_ref_count;
-	int sreg = *psreg;
+	if (!td->vars [var].renamed_ssa_fixed)
+		return TRUE;
+
+	InterpRenamedFixedVar *fixed_var_ext = &td->renamed_fixed_vars [td->vars [var].ext_index];
+
+	// If var was already live at the end of this bblocks, there is no liveness extension happening
+	if (fixed_var_ext->live_out_bblocks && mono_bitset_test_fast (fixed_var_ext->live_out_bblocks, cur_liveness.bb_dfs_index))
+		return TRUE;
+
+	GSList *bb_liveness = fixed_var_ext->live_limit_bblocks;
+	while (bb_liveness) {
+		InterpLivenessPosition *liveness_limit = (InterpLivenessPosition*)bb_liveness->data;
+		if (cur_liveness.bb_dfs_index == liveness_limit->bb_dfs_index) {
+			if (cur_liveness.ins_index <= liveness_limit->ins_index)
+				return TRUE;
+			else
+				return FALSE;
+		} else {
+			bb_liveness = bb_liveness->next;
+		}
+	}
 
-	local_ref_count [sreg]++;
-	local_defs [sreg].ref_count++;
-	if (local_defs [sreg].type == VAR_VALUE_OTHER_VAR) {
-		int cprop_local = local_defs [sreg].var;
+	return FALSE;
+}
 
-		// We are trying to replace sregs [i] with its def local (cprop_local), but cprop_local has since been
-		// modified, so we can't use it.
-		if (local_defs [cprop_local].ins != NULL && local_defs [cprop_local].def_index > local_defs [sreg].def_index)
-			return;
+// We are attempting to extend liveness of var to cur_liveness (propagate its use).
+// We know that var was still alive at the point of original_liveness.
+// cur_liveness is in td->cbb
+static gboolean
+can_extend_var_liveness (TransformData *td, int var, InterpLivenessPosition original_liveness, InterpLivenessPosition cur_liveness)
+{
+	if (var_is_ssa_form (td, var)) {
+		// If var is fixed ssa, we can extend liveness if it doesn't overlap with other renamed
+		// vars. If var is normal ssa, we can extend its liveness with no constraints
+		return can_extend_ssa_var_liveness (td, var, cur_liveness);
+	} else {
+		gboolean original_in_curbb = original_liveness.bb_dfs_index == td->cbb->dfs_index;
+		if (!original_in_curbb) {
+			// var is not in ssa form and we only track its value within a single bblock.
+			// The original liveness information is not in cbb and, by the time we get to cbb,
+			// its value could be different so we can't use it.
+			return FALSE;
+		} else {
+			InterpVarValue *var_val = get_var_value (td, var);
+			if (!var_val) {
+				// We know that var is alive at original_liveness, which is in cbb, and that
+				// the var has not been defined yet in cbb, meaning its value was not overwritten
+				// and we can use it.
+				return TRUE;
+			} else {
+				// We know that var is alive at original_liveness, which is in cbb, and that
+				// the var has been redefined in cbb. We can extend its liveness to cur_liveness,
+				// only if it hasn't been redefined between original and cur liveness.
+				g_assert (var_val->liveness.bb_dfs_index == original_liveness.bb_dfs_index);
+				return var_val->liveness.ins_index < original_liveness.ins_index;
+			}
+		}
+	}
+}
 
+static void
+replace_svar_use (TransformData *td, int *pvar, gpointer data)
+{
+	int *var_pair = (int*)data;
+	int old_var = var_pair [0];
+	if (*pvar == old_var) {
+		int new_var = var_pair [1];
+		td->var_values [old_var].ref_count--;
+		td->var_values [new_var].ref_count++;
+		*pvar = new_var;
 		if (td->verbose_level)
-			g_print ("cprop %d -> %d:\n\t", sreg, cprop_local);
-		local_ref_count [sreg]--;
-		*psreg = cprop_local;
-		local_ref_count [cprop_local]++;
-		if (td->verbose_level)
-			interp_dump_ins (ins, td->data_items);
-	} else if (!local_defs [sreg].ins) {
-		td->vars [sreg].unknown_use = TRUE;
+			g_print ("\treplace svar use: %d -> %d\n", old_var, new_var);
 	}
 }
 
 static void
-clear_local_defs (TransformData *td, int *pvar, void *data)
+replace_svar_uses (TransformData *td, InterpInst *first, InterpInst *last, int old_var, int new_var)
 {
-	int var = *pvar;
-	InterpVarValue *local_defs = (InterpVarValue*) data;
-	local_defs [var].type = VAR_VALUE_NONE;
-	local_defs [var].ins = NULL;
-	local_defs [var].ref_count = 0;
+	int *var_pair = alloca (2 * sizeof (int));
+	var_pair [0] = old_var;
+	var_pair [1] = new_var;
+	for (InterpInst *ins = first; ins != last; ins = ins->next)
+		interp_foreach_ins_svar (td, ins, var_pair, replace_svar_use);
 }
 
 static void
-clear_unused_defs (TransformData *td, int *pvar, void *data)
+cprop_svar (TransformData *td, InterpInst *ins, int *pvar, InterpLivenessPosition current_liveness)
 {
 	int var = *pvar;
-	if (!td->vars [var].local_only)
-		return;
-	if (td->vars [var].indirects)
+	if (var_has_indirects (td, var))
 		return;
 
-	InterpVarValue *local_def = &((InterpVarValue*) data) [var];
-	InterpInst *def_ins = local_def->ins;
-	if (!def_ins)
-		return;
-	if (local_def->ref_count)
-		return;
+	InterpVarValue *val = get_var_value (td, var);
+	if (val && val->type == VAR_VALUE_OTHER_VAR) {
+		// var <- cprop_var;
+		// ....
+		// use var;
+		int cprop_var = val->var;
+		if (td->vars [var].renamed_ssa_fixed && !td->vars [cprop_var].renamed_ssa_fixed) {
+			// ssa fixed vars are likely to live, keep using them
+			val->ref_count++;
+		} else if (can_extend_var_liveness (td, cprop_var, val->liveness, current_liveness)) {
+			if (td->verbose_level)
+				g_print ("cprop %d -> %d:\n\t", var, cprop_var);
+			td->var_values [cprop_var].ref_count++;
+			*pvar = cprop_var;
+			if (td->verbose_level)
+				interp_dump_ins (ins, td->data_items);
+		} else {
+			val->ref_count++;
+		}
+	} else {
+		td->var_values [var].ref_count++;
+	}
 
-	// This is a local only var that is defined in this bblock and its value is not used
-	// at all in this bblock. Clear the definition
-	if (MINT_NO_SIDE_EFFECTS (def_ins->opcode)) {
-		for (int i = 0; i < mono_interp_op_sregs [def_ins->opcode]; i++)
-			td->local_ref_count [def_ins->sregs [i]]--;
-		if (td->verbose_level) {
-			g_print ("kill unused local def:\n\t");
-			interp_dump_ins (def_ins, td->data_items);
+	// Mark the last use for a renamable fixed var
+	var = *pvar;
+	if (td->vars [var].renamed_ssa_fixed) {
+		int ext_index = td->renamed_fixed_vars [td->vars [var].ext_index].renamable_var_ext_index;
+		td->renamable_vars [ext_index].last_use_liveness = current_liveness;
+	}
+}
+
+static gboolean
+can_cprop_dreg (TransformData *td, InterpInst *mov_ins)
+{
+	int dreg = mov_ins->dreg;
+	int sreg = mov_ins->sregs [0];
+
+	// sreg = def
+	// mov sreg -> dreg
+
+	InterpVarValue *sreg_val = get_var_value (td, sreg);
+	if (!sreg_val)
+		return FALSE;
+	// We only apply this optimization if the definition is in the same bblock as this use
+	if (sreg_val->liveness.bb_dfs_index != td->cbb->dfs_index)
+		return FALSE;
+	if (td->var_values [sreg].def->opcode == MINT_DEF_ARG)
+		return FALSE;
+	if (sreg_val->def->flags & INTERP_INST_FLAG_PROTECTED_NEWOBJ)
+		return FALSE;
+	// reordering moves might break conversions
+	if (td->vars [dreg].mt != td->vars [sreg].mt)
+		return FALSE;
+
+	if (var_is_ssa_form (td, sreg)) {
+		// check if dreg is a renamed ssa fixed var (likely to remain alive)
+		if (td->vars [dreg].renamed_ssa_fixed && !td->vars [sreg].renamed_ssa_fixed) {
+			InterpLivenessPosition last_use_liveness = td->renamable_vars [td->renamed_fixed_vars [td->vars [dreg].ext_index].renamable_var_ext_index].last_use_liveness;
+			if (last_use_liveness.bb_dfs_index != td->cbb->dfs_index ||
+					sreg_val->liveness.ins_index >= last_use_liveness.ins_index) {
+				// No other conflicting renamed fixed vars (of dreg) are used in this bblock, or their
+				// last use predates the definition. This means we can tweak def of sreg to store directly
+				// into dreg and patch all intermediary instructions to use dreg instead.
+				return TRUE;
+			}
 		}
-		interp_clear_ins (def_ins);
+	} else if (!var_is_ssa_form (td, dreg)) {
+		// Neither sreg nor dreg are in SSA form. IL globals are likely to remain alive
+		// We ensure that stores to no SSA vars, that are il globals, are not reordered.
+		// For simplicity, we apply the optimization only if the def and move are adjacent.
+		if (td->vars [dreg].il_global && !td->vars [sreg].il_global && mov_ins == interp_next_ins (sreg_val->def))
+			return TRUE;
 	}
+
+	return FALSE;
 }
 
 static void
 interp_cprop (TransformData *td)
 {
-	InterpVarValue *local_defs = (InterpVarValue*) g_malloc (td->vars_size * sizeof (InterpVarValue));
-	int *local_ref_count = (int*) g_malloc (td->vars_size * sizeof (int));
-	InterpBasicBlock *bb;
-	gboolean needs_retry;
-	int ins_index;
-	int iteration_count = 0;
-
-	td->local_ref_count = local_ref_count;
-retry:
-	needs_retry = FALSE;
-	memset (local_ref_count, 0, td->vars_size * sizeof (int));
-
 	if (td->verbose_level)
-		g_print ("\ncprop iteration %d\n", iteration_count++);
-
-	for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
-		InterpInst *ins;
-		ins_index = 0;
+		g_print ("\nCPROP:\n");
 
-		// Set cbb since we do some instruction inserting below
-		td->cbb = bb;
+	// FIXME
+	// There is no need to zero, if we pay attention to phi args vars. They
+	// can be used before the definition.
+	td->var_values = (InterpVarValue*) g_malloc0 (td->vars_size * sizeof (InterpVarValue));
 
-		for (ins = bb->first_ins; ins != NULL; ins = ins->next)
-			interp_foreach_ins_var (td, ins, local_defs, clear_local_defs);
+	// Traverse in dfs order. This guarantees that we always reach the definition first before the
+	// use of the var. Exception is only for phi nodes, where we don't care about the definition
+	// anyway.
+	for (int bb_dfs_index = 0; bb_dfs_index < td->bblocks_count_eh; bb_dfs_index++) {
+		InterpBasicBlock *bb = td->bblocks [bb_dfs_index];
 
 		if (td->verbose_level) {
 			GString* bb_info = interp_get_bb_links (bb);
@@ -1581,58 +2811,69 @@ interp_cprop (TransformData *td)
 			g_string_free (bb_info, TRUE);
 		}
 
-		for (ins = bb->first_ins; ins != NULL; ins = ins->next) {
-			int opcode = ins->opcode;
-
-			if (opcode == MINT_NOP)
+		InterpLivenessPosition current_liveness;
+		current_liveness.bb_dfs_index = bb->dfs_index;
+		current_liveness.ins_index = 0;
+		// Set cbb since we do some instruction inserting below
+		td->cbb = bb;
+		for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) {
+			int opcode, num_sregs, num_dregs;
+			gint32 *sregs;
+			gint32 dreg;
+			// LIVENESS_MARKER is set only for non-eh bblocks
+			if (bb->dfs_index >= td->bblocks_count_no_eh || bb->dfs_index == -1 || (ins->flags & INTERP_INST_FLAG_LIVENESS_MARKER))
+				current_liveness.ins_index++;
+
+			if (interp_ins_is_nop (ins))
 				continue;
 
-			int num_sregs = mono_interp_op_sregs [opcode];
-			int num_dregs = mono_interp_op_dregs [opcode];
-			gint32 *sregs = &ins->sregs [0];
-			gint32 dreg = ins->dreg;
+retry_instruction:
+			opcode = ins->opcode;
+			num_sregs = mono_interp_op_sregs [opcode];
+			num_dregs = mono_interp_op_dregs [opcode];
+			sregs = &ins->sregs [0];
+			dreg = ins->dreg;
 
-			if (td->verbose_level && ins->opcode != MINT_NOP && ins->opcode != MINT_IL_SEQ_POINT)
+			if (td->verbose_level)
 				interp_dump_ins (ins, td->data_items);
 
-			for (int i = 0; i < num_sregs; i++) {
-				if (sregs [i] == MINT_CALL_ARGS_SREG) {
-					if (ins->info.call_info && ins->info.call_info->call_args) {
-						int *call_args = ins->info.call_info->call_args;
-						while (*call_args != -1) {
-							cprop_sreg (td, ins, call_args, local_defs);
-							call_args++;
+			if (opcode == MINT_DEF_TIER_VAR) {
+				// We can't do any var propagation into this instruction since it will be deleted
+				// dreg and sreg should always be identical, a ssa fixed var.
+				td->var_values [sregs [0]].ref_count++;
+			} else if (num_sregs) {
+				for (int i = 0; i < num_sregs; i++) {
+					if (sregs [i] == MINT_CALL_ARGS_SREG) {
+						if (ins->info.call_info && ins->info.call_info->call_args) {
+							int *call_args = ins->info.call_info->call_args;
+							while (*call_args != -1) {
+								cprop_svar (td, ins, call_args, current_liveness);
+								call_args++;
+							}
 						}
+					} else {
+						cprop_svar (td, ins, &sregs [i], current_liveness);
 					}
-				} else {
-					cprop_sreg (td, ins, &sregs [i], local_defs);
+				}
+			} else if (opcode == MINT_PHI) {
+				// no cprop but add ref counts
+				int *args = ins->info.args;
+				while (*args != -1) {
+					td->var_values [*args].ref_count++;
+					args++;
 				}
 			}
 
 			if (num_dregs) {
-				// Check if the previous definition of this var was used at all.
-				// If it wasn't we can just clear the instruction
-				//
-				// MINT_MOV_DST_OFF doesn't fully write to the var, so we special case it here
-				if (local_defs [dreg].ins != NULL &&
-						local_defs [dreg].ref_count == 0 &&
-						!td->vars [dreg].indirects &&
-						opcode != MINT_MOV_DST_OFF) {
-					InterpInst *prev_def = local_defs [dreg].ins;
-					if (MINT_NO_SIDE_EFFECTS (prev_def->opcode)) {
-						for (int i = 0; i < mono_interp_op_sregs [prev_def->opcode]; i++)
-							local_ref_count [prev_def->sregs [i]]--;
-						interp_clear_ins (prev_def);
-					}
-				}
-				local_defs [dreg].type = VAR_VALUE_NONE;
-				local_defs [dreg].ins = ins;
-				local_defs [dreg].def_index = ins_index;
+				InterpVarValue *dval = &td->var_values [dreg];
+				dval->type = VAR_VALUE_NONE;
+				dval->def = ins;
+				dval->liveness = current_liveness;
 			}
 
 			// We always store to the full i4, except as part of STIND opcodes. These opcodes can be
 			// applied to a local var only if that var has LDLOCA applied to it
-			if ((opcode >= MINT_MOV_I4_I1 && opcode <= MINT_MOV_I4_U2) && !td->vars [sregs [0]].indirects) {
+			if ((opcode >= MINT_MOV_I4_I1 && opcode <= MINT_MOV_I4_U2) && !var_has_indirects (td, sregs [0])) {
 				ins->opcode = MINT_MOV_4;
 				opcode = MINT_MOV_4;
 			}
@@ -1643,140 +2884,138 @@ interp_cprop (TransformData *td)
 					if (td->verbose_level)
 						g_print ("clear redundant mov\n");
 					interp_clear_ins (ins);
-					local_ref_count [sreg]--;
-				} else if (td->vars [sreg].indirects || td->vars [dreg].indirects) {
+					td->var_values [sreg].ref_count--;
+				} else if (var_has_indirects (td, sreg) || var_has_indirects (td, dreg)) {
 					// Don't bother with indirect locals
-				} else if (local_defs [sreg].type == VAR_VALUE_I4 || local_defs [sreg].type == VAR_VALUE_I8) {
+				} else if (get_var_value_type (td, sreg) == VAR_VALUE_I4 || get_var_value_type (td, sreg) == VAR_VALUE_I8) {
 					// Replace mov with ldc
-					gboolean is_i4 = local_defs [sreg].type == VAR_VALUE_I4;
-					g_assert (!td->vars [sreg].indirects);
-					local_defs [dreg].type = local_defs [sreg].type;
+					gboolean is_i4 = td->var_values [sreg].type == VAR_VALUE_I4;
+					td->var_values [dreg].type = td->var_values [sreg].type;
 					if (is_i4) {
-						int ct = local_defs [sreg].i;
+						int ct = td->var_values [sreg].i;
 						ins = interp_get_ldc_i4_from_const (td, ins, ct, dreg);
-						local_defs [dreg].i = ct;
+						td->var_values [dreg].i = ct;
 					} else {
-						gint64 ct = local_defs [sreg].l;
+						gint64 ct = td->var_values [sreg].l;
 						ins = interp_inst_replace_with_i8_const (td, ins, ct);
-						local_defs [dreg].l = ct;
+						td->var_values [dreg].l = ct;
 					}
-					local_defs [dreg].ins = ins;
-					local_ref_count [sreg]--;
+					td->var_values [dreg].def = ins;
+					td->var_values [sreg].ref_count--;
 					if (td->verbose_level) {
 						g_print ("cprop loc %d -> ct :\n\t", sreg);
 						interp_dump_ins (ins, td->data_items);
 					}
-				} else if (local_defs [sreg].ins != NULL &&
-						td->vars [sreg].execution_stack &&
-						!td->vars [dreg].execution_stack &&
-						interp_prev_ins (ins) == local_defs [sreg].ins &&
-						!(interp_prev_ins (ins)->flags & INTERP_INST_FLAG_PROTECTED_NEWOBJ)) {
-					// hackish temporary optimization that won't be necessary in the future
-					// We replace `local1 <- ?, local2 <- local1` with `local2 <- ?, local1 <- local2`
-					// if local1 is execution stack local and local2 is normal global local. This makes
-					// it more likely for `local1 <- local2` to be killed, while before we always needed
-					// to store to the global local, which is likely accessed by other instructions.
-					InterpInst *def = local_defs [sreg].ins;
-					int original_dreg = def->dreg;
-
-					def->dreg = dreg;
-					ins->dreg = original_dreg;
-					sregs [0] = dreg;
-
-					local_defs [dreg].type = VAR_VALUE_NONE;
-					local_defs [dreg].ins = def;
-					local_defs [dreg].def_index = local_defs [original_dreg].def_index;
-					local_defs [dreg].ref_count++;
-					local_defs [original_dreg].type = VAR_VALUE_OTHER_VAR;
-					local_defs [original_dreg].ins = ins;
-					local_defs [original_dreg].var = dreg;
-					local_defs [original_dreg].def_index = ins_index;
-					local_defs [original_dreg].ref_count--;
-
-					local_ref_count [original_dreg]--;
-					local_ref_count [dreg]++;
+				} else if (can_cprop_dreg (td, ins)) {
+					int dreg_ref_count = td->var_values [dreg].ref_count;
+					td->var_values [dreg] = td->var_values [sreg];
+					td->var_values [dreg].ref_count = dreg_ref_count;
+					td->var_values [dreg].def->dreg = dreg;
 
 					if (td->verbose_level) {
-						g_print ("cprop dreg:\n\t");
-						interp_dump_ins (def, td->data_items);
+						g_print ("cprop fixed dreg %d:\n\t", dreg);
+						interp_dump_ins (td->var_values [dreg].def, td->data_items);
+					}
+					// Overwrite all uses of sreg with dreg up to this point
+					replace_svar_uses (td, td->var_values [dreg].def->next, ins, sreg, dreg);
+
+					// Transform `mov dreg <- sreg` into `mov sreg <- dreg` in case sreg is still used
+					ins->dreg = sreg;
+					ins->sregs [0] = dreg;
+					td->var_values [dreg].ref_count++;
+					td->var_values [sreg].ref_count--;
+
+					td->var_values [sreg].def = ins;
+					td->var_values [sreg].type = VAR_VALUE_OTHER_VAR;
+					td->var_values [sreg].var = dreg;
+					td->var_values [sreg].liveness = current_liveness;
+					if (td->verbose_level) {
 						g_print ("\t");
 						interp_dump_ins (ins, td->data_items);
 					}
 				} else {
 					if (td->verbose_level)
 						g_print ("local copy %d <- %d\n", dreg, sreg);
-					local_defs [dreg].type = VAR_VALUE_OTHER_VAR;
-					local_defs [dreg].var = sreg;
+					td->var_values [dreg].type = VAR_VALUE_OTHER_VAR;
+					td->var_values [dreg].var = sreg;
 				}
 			} else if (opcode == MINT_LDLOCA_S) {
 				// The local that we are taking the address of is not a sreg but still referenced
-				local_ref_count [ins->sregs [0]]++;
+				td->var_values [ins->sregs [0]].ref_count++;
 			} else if (MINT_IS_LDC_I4 (opcode)) {
-				local_defs [dreg].type = VAR_VALUE_I4;
-				local_defs [dreg].i = interp_get_const_from_ldc_i4 (ins);
+				td->var_values [dreg].type = VAR_VALUE_I4;
+				td->var_values [dreg].i = interp_get_const_from_ldc_i4 (ins);
 			} else if (MINT_IS_LDC_I8 (opcode)) {
-				local_defs [dreg].type = VAR_VALUE_I8;
-				local_defs [dreg].l = interp_get_const_from_ldc_i8 (ins);
+				td->var_values [dreg].type = VAR_VALUE_I8;
+				td->var_values [dreg].l = interp_get_const_from_ldc_i8 (ins);
 			} else if (opcode == MINT_LDC_R4) {
 				guint32 val_u = READ32 (&ins->data [0]);
 				float f = *(float*)(&val_u);
-				local_defs [dreg].type = VAR_VALUE_R4;
-				local_defs [dreg].f = f;
+				td->var_values [dreg].type = VAR_VALUE_R4;
+				td->var_values [dreg].f = f;
 			} else if (ins->opcode == MINT_LDPTR) {
 #if SIZEOF_VOID_P == 8
-				local_defs [dreg].type = VAR_VALUE_I8;
-				local_defs [dreg].l = (gint64)td->data_items [ins->data [0]];
+				td->var_values [dreg].type = VAR_VALUE_I8;
+				td->var_values [dreg].l = (gint64)td->data_items [ins->data [0]];
 #else
-				local_defs [dreg].type = VAR_VALUE_I4;
-				local_defs [dreg].i = (gint32)td->data_items [ins->data [0]];
+				td->var_values [dreg].type = VAR_VALUE_I4;
+				td->var_values [dreg].i = (gint32)td->data_items [ins->data [0]];
 #endif
 			} else if (MINT_IS_UNOP (opcode)) {
-				ins = interp_fold_unop (td, local_defs, ins);
+				ins = interp_fold_unop (td, ins);
 			} else if (MINT_IS_UNOP_CONDITIONAL_BRANCH (opcode)) {
-				ins = interp_fold_unop_cond_br (td, bb, local_defs, ins);
+				ins = interp_fold_unop_cond_br (td, bb, ins);
 			} else if (MINT_IS_SIMD_CREATE (opcode)) {
-				ins = interp_fold_simd_create (td, bb, local_defs, ins);
+				ins = interp_fold_simd_create (td, bb, ins);
 			} else if (MINT_IS_BINOP (opcode)) {
 				gboolean folded;
-				ins = interp_fold_binop (td, local_defs, ins, &folded);
+				ins = interp_fold_binop (td, ins, &folded);
 				if (!folded) {
 					int sreg = -1;
 					guint16 mov_op = 0;
-					if ((opcode == MINT_MUL_I4 || opcode == MINT_DIV_I4) &&
-							local_defs [ins->sregs [1]].type == VAR_VALUE_I4 &&
-							local_defs [ins->sregs [1]].i == 1) {
-						sreg = ins->sregs [0];
-						mov_op = MINT_MOV_4;
-					} else if ((opcode == MINT_MUL_I8 || opcode == MINT_DIV_I8) &&
-							local_defs [ins->sregs [1]].type == VAR_VALUE_I8 &&
-							local_defs [ins->sregs [1]].l == 1) {
-						sreg = ins->sregs [0];
-						mov_op = MINT_MOV_8;
-					} else if (opcode == MINT_MUL_I4 &&
-							local_defs [ins->sregs [0]].type == VAR_VALUE_I4 &&
-							local_defs [ins->sregs [0]].i == 1) {
-						sreg = ins->sregs [1];
-						mov_op = MINT_MOV_4;
-					} else if (opcode == MINT_MUL_I8 &&
-							local_defs [ins->sregs [0]].type == VAR_VALUE_I8 &&
-							local_defs [ins->sregs [0]].l == 1) {
-						sreg = ins->sregs [1];
-						mov_op = MINT_MOV_8;
+					InterpVarValue *vv0 = get_var_value (td, ins->sregs [0]);
+					InterpVarValue *vv1 = get_var_value (td, ins->sregs [1]);
+					if (vv1) {
+						if ((opcode == MINT_MUL_I4 || opcode == MINT_DIV_I4) &&
+								vv1->type == VAR_VALUE_I4 &&
+								vv1->i == 1) {
+							sreg = ins->sregs [0];
+							mov_op = MINT_MOV_4;
+						} else if ((opcode == MINT_MUL_I8 || opcode == MINT_DIV_I8) &&
+								vv1->type == VAR_VALUE_I8 &&
+								vv1->l == 1) {
+							sreg = ins->sregs [0];
+							mov_op = MINT_MOV_8;
+						}
+					} else if (vv0) {
+						if (opcode == MINT_MUL_I4 &&
+								vv0->type == VAR_VALUE_I4 &&
+								vv0->i == 1) {
+							sreg = ins->sregs [1];
+							mov_op = MINT_MOV_4;
+						} else if (opcode == MINT_MUL_I8 &&
+								vv0->type == VAR_VALUE_I8 &&
+								vv0->l == 1) {
+							sreg = ins->sregs [1];
+							mov_op = MINT_MOV_8;
+						}
 					}
 					if (sreg != -1) {
+						td->var_values [ins->sregs [0]].ref_count--;
+						td->var_values [ins->sregs [1]].ref_count--;
 						ins->opcode = mov_op;
 						ins->sregs [0] = sreg;
 						if (td->verbose_level) {
 							g_print ("Replace idempotent binop :\n\t");
 							interp_dump_ins (ins, td->data_items);
 						}
-						needs_retry = TRUE;
+						goto retry_instruction;
 					}
 				}
 			} else if (MINT_IS_BINOP_CONDITIONAL_BRANCH (opcode)) {
-				ins = interp_fold_binop_cond_br (td, bb, local_defs, ins);
+				ins = interp_fold_binop_cond_br (td, bb, ins);
 			} else if (MINT_IS_LDIND (opcode)) {
-				InterpInst *ldloca = local_defs [sregs [0]].ins;
+				InterpInst *ldloca = get_var_value_def (td, sregs [0]);
 				if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) {
 					int local = ldloca->sregs [0];
 					int mt = td->vars [local].mt;
@@ -1794,23 +3033,22 @@ interp_cprop (TransformData *td)
 								ins->opcode = GINT_TO_OPCODE (interp_get_mov_for_type (ldind_mt, FALSE));
 								break;
 						}
-						local_ref_count [sregs [0]]--;
+						td->var_values [sregs [0]].ref_count--;
 						interp_ins_set_sreg (ins, local);
 
 						if (td->verbose_level) {
 							g_print ("Replace ldloca/ldind pair :\n\t");
 							interp_dump_ins (ins, td->data_items);
 						}
-						needs_retry = TRUE;
 					}
 				}
 			} else if (MINT_IS_LDFLD (opcode)) {
-				InterpInst *ldloca = local_defs [sregs [0]].ins;
+				InterpInst *ldloca = get_var_value_def (td, sregs [0]);
 				if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) {
 					int mt = ins->opcode - MINT_LDFLD_I1;
 					int local = ldloca->sregs [0];
 					// Allow ldloca instruction to be killed
-					local_ref_count [sregs [0]]--;
+					td->var_values [sregs [0]].ref_count--;
 					if (td->vars [local].mt == (ins->opcode - MINT_LDFLD_I1) && ins->data [0] == 0) {
 						// Replace LDLOCA + LDFLD with LDLOC, when the loading field represents
 						// the entire local. This is the case with loading the only field of an
@@ -1835,47 +3073,45 @@ interp_cprop (TransformData *td)
 							ins->data [2] = ldsize;
 
 						interp_clear_ins (ins->prev);
+						td->var_values [ins->dreg].def = ins;
 					}
 
 					if (td->verbose_level) {
 						g_print ("Replace ldloca/ldfld pair :\n\t");
 						interp_dump_ins (ins, td->data_items);
 					}
-					needs_retry = TRUE;
 				}
-			} else if (opcode == MINT_INITOBJ) {
-				InterpInst *ldloca = local_defs [sregs [0]].ins;
+			} else if (opcode == MINT_ZEROBLK_IMM) {
+				InterpInst *ldloca = get_var_value_def (td, sregs [0]);
 				if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) {
 					int size = ins->data [0];
 					int local = ldloca->sregs [0];
-					// Replace LDLOCA + INITOBJ with or LDC
+					// Replace LDLOCA + ZEROBLK_IMM with or LDC
 					if (size <= 4)
 						ins->opcode = MINT_LDC_I4_0;
 					else if (size <= 8)
 						ins->opcode = MINT_LDC_I8_0;
 					else
 						ins->opcode = MINT_INITLOCAL;
-					local_ref_count [sregs [0]]--;
+					td->var_values [sregs [0]].ref_count--;
 					ins->dreg = local;
 
 					if (td->verbose_level) {
-						g_print ("Replace ldloca/initobj pair :\n\t");
+						g_print ("Replace ldloca/zeroblk pair :\n\t");
 						interp_dump_ins (ins, td->data_items);
 					}
-					needs_retry = TRUE;
 				}
 			} else if (opcode == MINT_LDOBJ_VT) {
-				InterpInst *ldloca = local_defs [sregs [0]].ins;
+				InterpInst *ldloca = get_var_value_def (td, sregs [0]);
 				if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) {
 					int ldsize = ins->data [0];
 					int local = ldloca->sregs [0];
-					local_ref_count [sregs [0]]--;
+					td->var_values [sregs [0]].ref_count--;
 
 					if (ldsize == td->vars [local].size) {
 						// Replace LDLOCA + LDOBJ_VT with MOV_VT
 						ins->opcode = MINT_MOV_VT;
 						sregs [0] = local;
-						needs_retry = TRUE;
 					} else {
 						// This loads just a part of the local valuetype
 						ins = interp_insert_ins (td, ins, MINT_MOV_SRC_OFF);
@@ -1893,18 +3129,17 @@ interp_cprop (TransformData *td)
 					}
 				}
 			} else if (opcode == MINT_STOBJ_VT || opcode == MINT_STOBJ_VT_NOREF) {
-				InterpInst *ldloca = local_defs [sregs [0]].ins;
+				InterpInst *ldloca = get_var_value_def (td, sregs [0]);
 				if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) {
 					int stsize = ins->data [0];
 					int local = ldloca->sregs [0];
 
 					if (stsize == td->vars [local].size) {
 						// Replace LDLOCA + STOBJ_VT with MOV_VT
-						local_ref_count [sregs [0]]--;
+						td->var_values [sregs [0]].ref_count--;
 						ins->opcode = MINT_MOV_VT;
 						sregs [0] = sregs [1];
 						ins->dreg = local;
-						needs_retry = TRUE;
 
 						if (td->verbose_level) {
 							g_print ("Replace ldloca/stobj_vt pair :\n\t");
@@ -1913,13 +3148,13 @@ interp_cprop (TransformData *td)
 					}
 				}
 			} else if (MINT_IS_STIND (opcode)) {
-				InterpInst *ldloca = local_defs [sregs [0]].ins;
+				InterpInst *ldloca = get_var_value_def (td, sregs [0]);
 				if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) {
 					int local = ldloca->sregs [0];
 					int mt = td->vars [local].mt;
 					if (mt != MINT_TYPE_VT) {
 						// We have an 8 byte local, just replace the stind with a mov
-						local_ref_count [sregs [0]]--;
+						td->var_values [sregs [0]].ref_count--;
 						// We make the assumption that the STIND matches the local type
 						ins->opcode = GINT_TO_OPCODE (interp_get_mov_for_type (mt, TRUE));
 						interp_ins_set_dreg (ins, local);
@@ -1929,15 +3164,14 @@ interp_cprop (TransformData *td)
 							g_print ("Replace ldloca/stind pair :\n\t");
 							interp_dump_ins (ins, td->data_items);
 						}
-						needs_retry = TRUE;
 					}
 				}
 			} else if (MINT_IS_STFLD (opcode)) {
-				InterpInst *ldloca = local_defs [sregs [0]].ins;
+				InterpInst *ldloca = get_var_value_def (td, sregs [0]);
 				if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) {
 					int mt = ins->opcode - MINT_STFLD_I1;
 					int local = ldloca->sregs [0];
-					local_ref_count [sregs [0]]--;
+					td->var_values [sregs [0]].ref_count--;
 					// Allow ldloca instruction to be killed
 					if (td->vars [local].mt == (ins->opcode - MINT_STFLD_I1) && ins->data [0] == 0) {
 						ins->opcode = GINT_TO_OPCODE (interp_get_mov_for_type (mt, FALSE));
@@ -1962,57 +3196,49 @@ interp_cprop (TransformData *td)
 						// This stores just to part of the dest valuetype
 						ins = interp_insert_ins (td, ins, MINT_MOV_DST_OFF);
 						interp_ins_set_dreg (ins, local);
-						interp_ins_set_sreg (ins, sregs [1]);
+						interp_ins_set_sregs2 (ins, sregs [1], local);
 						ins->data [0] = GINT_TO_UINT16 (foffset);
 						ins->data [1] = GINT_TO_UINT16 (mt);
 						ins->data [2] = vtsize;
 
 						interp_clear_ins (ins->prev);
+
+						// MINT_MOV_DST_OFF doesn't work if dreg is allocated at the same location as the
+						// field value to be stored, because its behavior is not atomic in nature. We first
+						// copy the original whole vt, potentially overwritting the new field value.
+						ins = interp_insert_ins (td, ins, MINT_DUMMY_USE);
+						interp_ins_set_sreg (ins, sregs [1]);
+						td->var_values [sregs [1]].ref_count++;
 					}
 					if (td->verbose_level) {
 						g_print ("Replace ldloca/stfld pair (off %p) :\n\t", (void *)(uintptr_t) ldloca->il_offset);
 						interp_dump_ins (ins, td->data_items);
 					}
-					needs_retry = TRUE;
 				}
 			} else if (opcode == MINT_GETITEM_SPAN) {
-				InterpInst *ldloca = local_defs [sregs [0]].ins;
+				InterpInst *ldloca = get_var_value_def (td, sregs [0]);
 				if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) {
 					int local = ldloca->sregs [0];
 					// Allow ldloca instruction to be killed
-					local_ref_count [sregs [0]]--;
+					td->var_values [sregs [0]].ref_count--;
 					// Instead of loading from the indirect pointer pass directly the vt var
 					ins->opcode = MINT_GETITEM_LOCALSPAN;
 					sregs [0] = local;
-					needs_retry = TRUE;
 				}
 			} else if (opcode == MINT_CKNULL) {
-				InterpInst *def = local_defs [sregs [0]].ins;
+				InterpInst *def = get_var_value_def (td, sregs [0]);
 				if (def && def->opcode == MINT_LDLOCA_S) {
 					// CKNULL on LDLOCA is a NOP
 					ins->opcode = MINT_MOV_P;
-					needs_retry = TRUE;
+					td->var_values [ins->sregs [0]].ref_count--;
+					goto retry_instruction;
 				}
-			} else if (opcode == MINT_BOX) {
+			} else if (MINT_IS_BOX (opcode)) {
 				// TODO Add more relevant opcodes
-				local_defs [dreg].type = VAR_VALUE_NON_NULL;
+				td->var_values [dreg].type = VAR_VALUE_NON_NULL;
 			}
-
-			ins_index++;
 		}
-
-		for (ins = bb->first_ins; ins != NULL; ins = ins->next)
-			interp_foreach_ins_var (td, ins, local_defs, clear_unused_defs);
 	}
-
-	needs_retry |= interp_local_deadce (td);
-	if (mono_interp_opt & INTERP_OPT_BBLOCKS)
-		needs_retry |= interp_optimize_bblocks (td);
-
-	if (needs_retry)
-		goto retry;
-
-	g_free (local_defs);
 }
 
 void
@@ -2021,11 +3247,18 @@ mono_test_interp_cprop (TransformData *td)
 	interp_cprop (td);
 }
 
+// If sreg is constant, it returns the value in `imm` and the smallest
+// containing type for it in `imm_mt`.
 static gboolean
-get_sreg_imm (TransformData *td, int sreg, gint16 *imm, int result_mt)
+get_sreg_imm (TransformData *td, int sreg, gint32 *imm, int *imm_mt)
 {
-	InterpInst *def = td->vars [sreg].def;
-	if (def != NULL && td->local_ref_count [sreg] == 1) {
+	if (var_has_indirects (td, sreg))
+		return FALSE;
+	InterpInst *def = get_var_value_def (td, sreg);
+	if (!def)
+		return FALSE;
+	InterpVarValue *sreg_val = &td->var_values [sreg];
+	if (sreg_val->ref_count == 1) {
 		gint64 ct;
 		if (MINT_IS_LDC_I4 (def->opcode))
 			ct = interp_get_const_from_ldc_i4 (def);
@@ -2033,32 +3266,15 @@ get_sreg_imm (TransformData *td, int sreg, gint16 *imm, int result_mt)
 			ct = interp_get_const_from_ldc_i8 (def);
 		else
 			return FALSE;
-		gint64 min_val, max_val;
-		// We only propagate the immediate only if it fits into the desired type,
-		// so we don't accidentaly handle conversions wrong
-		switch (result_mt) {
-			case MINT_TYPE_I1:
-				min_val = G_MININT8;
-				max_val = G_MAXINT8;
-				break;
-			case MINT_TYPE_I2:
-				min_val = G_MININT16;
-				max_val = G_MAXINT16;
-				break;
-			case MINT_TYPE_U1:
-				min_val = 0;
-				max_val = G_MAXUINT8;
-				break;
-			case MINT_TYPE_U2:
-				min_val = 0;
-				max_val = G_MAXINT16;
-				break;
-			default:
-				g_assert_not_reached ();
-
-		}
-		if (ct >= min_val && ct <= max_val) {
+		if (ct >= G_MININT16 && ct <= G_MAXINT16) {
 			*imm = (gint16)ct;
+			if (imm_mt)
+				*imm_mt = MINT_TYPE_I2;
+			return TRUE;
+		} else if (ct >= G_MININT32 && ct <= G_MAXINT32) {
+			*imm = (gint32)ct;
+			if (imm_mt)
+				*imm_mt = MINT_TYPE_I4;
 			return TRUE;
 		}
 	}
@@ -2133,75 +3349,135 @@ get_unop_condbr_sp (int opcode)
 	}
 }
 
+// We have the pattern of:
+//
+// var <- def (v1, v2, ..)
+// ...
+// use var
+//
+// We want to optimize out `var <- def` and replace `use var` with `use v1, v2, ...` in a super instruction.
+// This can be done only if var is used only once (otherwise `var <- def` will remain alive and in the
+// superinstruction we duplicate the calculation of var) and v1, v2, .. can have their liveness extended
+// to the current liveness
+static gboolean
+can_propagate_var_def (TransformData *td, int var, InterpLivenessPosition cur_liveness)
+{
+	InterpVarValue *val = get_var_value (td, var);
+	if (!val)
+		return FALSE;
+	if (val->ref_count != 1)
+		return FALSE;
+
+	InterpInst *def = val->def;
+	int num_sregs = mono_interp_op_sregs [def->opcode];
+
+	for (int i = 0; i < num_sregs; i++) {
+		int svar = def->sregs [i];
+		if (svar == MINT_CALL_ARGS_SREG)
+			return FALSE; // We don't care for these in super instructions
+
+		if (!can_extend_var_liveness (td, svar, val->liveness, cur_liveness))
+			return FALSE;
+	}
+	return TRUE;
+}
+
 static void
 interp_super_instructions (TransformData *td)
 {
-	InterpBasicBlock *bb;
-	int *local_ref_count = td->local_ref_count;
-
 	interp_compute_native_offset_estimates (td);
 
 	// Add some actual super instructions
-	for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
-		InterpInst *ins;
-		int noe;
+	for (int bb_dfs_index = 0; bb_dfs_index < td->bblocks_count_eh; bb_dfs_index++) {
+		InterpBasicBlock *bb = td->bblocks [bb_dfs_index];
 
 		// Set cbb since we do some instruction inserting below
 		td->cbb = bb;
-		noe = bb->native_offset_estimate;
-		for (ins = bb->first_ins; ins != NULL; ins = ins->next) {
+		int noe = bb->native_offset_estimate;
+		InterpLivenessPosition current_liveness;
+		current_liveness.bb_dfs_index = bb->dfs_index;
+		current_liveness.ins_index = 0;
+		for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) {
 			int opcode = ins->opcode;
+			if (bb->dfs_index >= td->bblocks_count_no_eh || bb->dfs_index == -1 || (ins->flags & INTERP_INST_FLAG_LIVENESS_MARKER))
+				current_liveness.ins_index++;
 			if (MINT_IS_NOP (opcode))
 				continue;
-			if (mono_interp_op_dregs [opcode] && !td->vars [ins->dreg].global)
-				td->vars [ins->dreg].def = ins;
 
+			if (mono_interp_op_dregs [opcode] && !var_is_ssa_form (td, ins->dreg) && !var_has_indirects (td, ins->dreg)) {
+				InterpVarValue *dval = &td->var_values [ins->dreg];
+				dval->type = VAR_VALUE_NONE;
+				dval->def = ins;
+				dval->liveness = current_liveness;
+			}
 			if (opcode == MINT_RET || (opcode >= MINT_RET_I1 && opcode <= MINT_RET_U2)) {
 				// ldc + ret -> ret.imm
 				int sreg = ins->sregs [0];
-				gint16 imm;
-				if (get_sreg_imm (td, sreg, &imm, (opcode == MINT_RET) ? MINT_TYPE_I2 : opcode - MINT_RET_I1)) {
-					InterpInst *def = td->vars [sreg].def;
-					int ret_op = MINT_IS_LDC_I4 (def->opcode) ? MINT_RET_I4_IMM : MINT_RET_I8_IMM;
-					InterpInst *new_inst = interp_insert_ins (td, ins, ret_op);
-					new_inst->data [0] = imm;
-					interp_clear_ins (def);
-					interp_clear_ins (ins);
-					local_ref_count [sreg]--;
-
-					if (td->verbose_level) {
-						g_print ("superins: ");
-						interp_dump_ins (new_inst, td->data_items);
+				gint32 imm;
+				if (get_sreg_imm (td, sreg, &imm, NULL)) {
+					// compute the casting as done by the ret opcode
+					int ret_mt = (opcode == MINT_RET) ? MINT_TYPE_I8 : opcode - MINT_RET_I1;
+					if (ret_mt == MINT_TYPE_I1)
+						imm = (gint8)imm;
+					else if (ret_mt == MINT_TYPE_U1)
+						imm = (guint8)imm;
+					else if (ret_mt == MINT_TYPE_I2)
+						imm = (gint16)imm;
+					else if (ret_mt == MINT_TYPE_U2)
+						imm = (guint16)imm;
+
+					if (imm >= G_MININT16 && imm <= G_MAXINT16) {
+						InterpInst *def = td->var_values [sreg].def;
+						int ret_op = MINT_IS_LDC_I4 (def->opcode) ? MINT_RET_I4_IMM : MINT_RET_I8_IMM;
+						InterpInst *new_inst = interp_insert_ins (td, ins, ret_op);
+						new_inst->data [0] = (gint16)imm;
+						interp_clear_ins (def);
+						interp_clear_ins (ins);
+						td->var_values [sreg].ref_count--; // 0
+						if (td->verbose_level) {
+							g_print ("superins: ");
+							interp_dump_ins (new_inst, td->data_items);
+						}
 					}
 				}
 			} else if (opcode == MINT_ADD_I4 || opcode == MINT_ADD_I8 ||
-					opcode == MINT_MUL_I4 || opcode == MINT_MUL_I8) {
+					opcode == MINT_MUL_I4 || opcode == MINT_MUL_I8 ||
+					opcode == MINT_OR_I4 || opcode == MINT_AND_I4) {
 				int sreg = -1;
 				int sreg_imm = -1;
-				gint16 imm;
-				if (get_sreg_imm (td, ins->sregs [0], &imm, MINT_TYPE_I2)) {
+				int imm_mt;
+				gint32 imm;
+				if (get_sreg_imm (td, ins->sregs [0], &imm, &imm_mt)) {
 					sreg = ins->sregs [1];
 					sreg_imm = ins->sregs [0];
-				} else if (get_sreg_imm (td, ins->sregs [1], &imm, MINT_TYPE_I2)) {
+				} else if (get_sreg_imm (td, ins->sregs [1], &imm, &imm_mt)) {
 					sreg = ins->sregs [0];
 					sreg_imm = ins->sregs [1];
 				}
 				if (sreg != -1) {
 					int binop;
 					switch (opcode) {
-						case MINT_ADD_I4: binop = MINT_ADD_I4_IMM; break;
-						case MINT_ADD_I8: binop = MINT_ADD_I8_IMM; break;
-						case MINT_MUL_I4: binop = MINT_MUL_I4_IMM; break;
-						case MINT_MUL_I8: binop = MINT_MUL_I8_IMM; break;
+						case MINT_ADD_I4: binop = (imm_mt == MINT_TYPE_I2) ? MINT_ADD_I4_IMM : MINT_ADD_I4_IMM2; break;
+						case MINT_ADD_I8: binop = (imm_mt == MINT_TYPE_I2) ? MINT_ADD_I8_IMM : MINT_ADD_I8_IMM2; break;
+						case MINT_MUL_I4: binop = (imm_mt == MINT_TYPE_I2) ? MINT_MUL_I4_IMM : MINT_MUL_I4_IMM2; break;
+						case MINT_MUL_I8: binop = (imm_mt == MINT_TYPE_I2) ? MINT_MUL_I8_IMM : MINT_MUL_I8_IMM2; break;
+						case MINT_OR_I4: binop = (imm_mt == MINT_TYPE_I2) ? MINT_OR_I4_IMM : MINT_OR_I4_IMM2; break;
+						case MINT_AND_I4: binop = (imm_mt == MINT_TYPE_I2) ? MINT_AND_I4_IMM : MINT_AND_I4_IMM2; break;
 						default: g_assert_not_reached ();
 					}
 					InterpInst *new_inst = interp_insert_ins (td, ins, binop);
 					new_inst->dreg = ins->dreg;
 					new_inst->sregs [0] = sreg;
-					new_inst->data [0] = imm;
-					interp_clear_ins (td->vars [sreg_imm].def);
+					if (imm_mt == MINT_TYPE_I2)
+						new_inst->data [0] = (gint16)imm;
+					else if (imm_mt == MINT_TYPE_I4)
+						WRITE32_INS (new_inst, 0, &imm);
+					else
+						g_assert_not_reached ();
+					interp_clear_ins (td->var_values [sreg_imm].def);
 					interp_clear_ins (ins);
-					local_ref_count [sreg_imm]--;
+					td->var_values [sreg_imm].ref_count--; // 0
+					td->var_values [new_inst->dreg].def = new_inst;
 					if (td->verbose_level) {
 						g_print ("superins: ");
 						interp_dump_ins (new_inst, td->data_items);
@@ -2209,17 +3485,19 @@ interp_super_instructions (TransformData *td)
 				}
 			} else if (opcode == MINT_SUB_I4 || opcode == MINT_SUB_I8) {
 				// ldc + sub -> add.-imm
-				gint16 imm;
+				gint32 imm;
+				int imm_mt;
 				int sreg_imm = ins->sregs [1];
-				if (get_sreg_imm (td, sreg_imm, &imm, MINT_TYPE_I2) && imm != G_MININT16) {
+				if (get_sreg_imm (td, sreg_imm, &imm, &imm_mt) && imm_mt == MINT_TYPE_I2 && imm != G_MININT16) {
 					int add_op = opcode == MINT_SUB_I4 ? MINT_ADD_I4_IMM : MINT_ADD_I8_IMM;
 					InterpInst *new_inst = interp_insert_ins (td, ins, add_op);
 					new_inst->dreg = ins->dreg;
 					new_inst->sregs [0] = ins->sregs [0];
-					new_inst->data [0] = -imm;
-					interp_clear_ins (td->vars [sreg_imm].def);
+					new_inst->data [0] = (gint16)-imm;
+					interp_clear_ins (td->var_values [sreg_imm].def);
 					interp_clear_ins (ins);
-					local_ref_count [sreg_imm]--;
+					td->var_values [sreg_imm].ref_count--; // 0
+					td->var_values [new_inst->dreg].def = new_inst;
 					if (td->verbose_level) {
 						g_print ("superins: ");
 						interp_dump_ins (new_inst, td->data_items);
@@ -2227,8 +3505,8 @@ interp_super_instructions (TransformData *td)
 				}
 			} else if (opcode == MINT_MUL_I4_IMM || opcode == MINT_MUL_I8_IMM) {
 				int sreg = ins->sregs [0];
-				InterpInst *def = td->vars [sreg].def;
-				if (def != NULL && td->local_ref_count [sreg] == 1) {
+				if (can_propagate_var_def (td, sreg, current_liveness)) {
+					InterpInst *def = get_var_value_def (td, sreg);
 					gboolean is_i4 = opcode == MINT_MUL_I4_IMM;
 					if ((is_i4 && def->opcode == MINT_ADD_I4_IMM) ||
 							(!is_i4 && def->opcode == MINT_ADD_I8_IMM)) {
@@ -2239,7 +3517,8 @@ interp_super_instructions (TransformData *td)
 						new_inst->data [1] = ins->data [0];
 						interp_clear_ins (def);
 						interp_clear_ins (ins);
-						local_ref_count [sreg]--;
+						td->var_values [sreg].ref_count--; // 0
+						td->var_values [new_inst->dreg].def = new_inst;
 						if (td->verbose_level) {
 							g_print ("superins: ");
 							interp_dump_ins (new_inst, td->data_items);
@@ -2247,51 +3526,49 @@ interp_super_instructions (TransformData *td)
 					}
 				}
 			} else if (MINT_IS_BINOP_SHIFT (opcode)) {
-				gint16 imm;
+				gint32 imm;
+				int imm_mt;
 				int sreg_imm = ins->sregs [1];
-				if (get_sreg_imm (td, sreg_imm, &imm, MINT_TYPE_I2)) {
+				if (get_sreg_imm (td, sreg_imm, &imm, &imm_mt) && imm_mt == MINT_TYPE_I2) {
 					// ldc + sh -> sh.imm
 					int shift_op = MINT_SHR_UN_I4_IMM + (opcode - MINT_SHR_UN_I4);
 					InterpInst *new_inst = interp_insert_ins (td, ins, shift_op);
 					new_inst->dreg = ins->dreg;
 					new_inst->sregs [0] = ins->sregs [0];
-					new_inst->data [0] = imm;
-					interp_clear_ins (td->vars [sreg_imm].def);
+					new_inst->data [0] = (gint16)imm;
+					interp_clear_ins (td->var_values [sreg_imm].def);
 					interp_clear_ins (ins);
-					local_ref_count [sreg_imm]--;
+					td->var_values [sreg_imm].ref_count--; // 0
+					td->var_values [new_inst->dreg].def = new_inst;
 					if (td->verbose_level) {
 						g_print ("superins: ");
 						interp_dump_ins (new_inst, td->data_items);
 					}
 				} else if (opcode == MINT_SHL_I4 || opcode == MINT_SHL_I8) {
 					int amount_var = ins->sregs [1];
-					InterpInst *amount_def = td->vars [amount_var].def;
-					if (amount_def != NULL && td->local_ref_count [amount_var] == 1 && amount_def->opcode == MINT_AND_I4) {
-						int mask_var = amount_def->sregs [1];
-						if (get_sreg_imm (td, mask_var, &imm, MINT_TYPE_I2)) {
-							// ldc + and + shl -> shl_and_imm
-							int new_opcode = -1;
-							if (opcode == MINT_SHL_I4 && imm == 31)
-								new_opcode = MINT_SHL_AND_I4;
-							else if (opcode == MINT_SHL_I8 && imm == 63)
-								new_opcode = MINT_SHL_AND_I8;
-
-							if (new_opcode != -1) {
-								InterpInst *new_inst = interp_insert_ins (td, ins, new_opcode);
-								new_inst->dreg = ins->dreg;
-								new_inst->sregs [0] = ins->sregs [0];
-								new_inst->sregs [1] = amount_def->sregs [0];
-
-								local_ref_count [amount_var]--;
-								local_ref_count [mask_var]--;
-
-								interp_clear_ins (td->vars [mask_var].def);
-								interp_clear_ins (amount_def);
-								interp_clear_ins (ins);
-								if (td->verbose_level) {
-									g_print ("superins: ");
-									interp_dump_ins (new_inst, td->data_items);
-								}
+					InterpInst *amount_def = get_var_value_def (td, amount_var);
+					if (amount_def != NULL && td->var_values [amount_var].ref_count == 1 && amount_def->opcode == MINT_AND_I4_IMM) {
+						// and_imm + shl -> shl_and_imm
+						int new_opcode = -1;
+						if (opcode == MINT_SHL_I4 && amount_def->data [0] == 31)
+							new_opcode = MINT_SHL_AND_I4;
+						else if (opcode == MINT_SHL_I8 && amount_def->data [0] == 63)
+							new_opcode = MINT_SHL_AND_I8;
+
+						if (new_opcode != -1) {
+							InterpInst *new_inst = interp_insert_ins (td, ins, new_opcode);
+							new_inst->dreg = ins->dreg;
+							new_inst->sregs [0] = ins->sregs [0];
+							new_inst->sregs [1] = amount_def->sregs [0];
+
+							td->var_values [amount_var].ref_count--; // 0
+							td->var_values [new_inst->dreg].def = new_inst;
+
+							interp_clear_ins (amount_def);
+							interp_clear_ins (ins);
+							if (td->verbose_level) {
+								g_print ("superins: ");
+								interp_dump_ins (new_inst, td->data_items);
 							}
 						}
 					}
@@ -2299,8 +3576,8 @@ interp_super_instructions (TransformData *td)
 			} else if (opcode == MINT_DIV_UN_I4 || opcode == MINT_DIV_UN_I8) {
 				// ldc + div.un -> shr.imm
 				int sreg_imm = ins->sregs [1];
-				InterpInst *def = td->vars [sreg_imm].def;
-				if (def != NULL && td->local_ref_count [sreg_imm] == 1) {
+				InterpInst *def = get_var_value_def (td, sreg_imm);
+				if (def != NULL && td->var_values [sreg_imm].ref_count == 1) {
 					int power2 = -1;
 					if (MINT_IS_LDC_I4 (def->opcode)) {
 						guint32 ct = interp_get_const_from_ldc_i4 (def);
@@ -2322,7 +3599,8 @@ interp_super_instructions (TransformData *td)
 
 						interp_clear_ins (def);
 						interp_clear_ins (ins);
-						local_ref_count [sreg_imm]--;
+						td->var_values [sreg_imm].ref_count--;
+						td->var_values [new_inst->dreg].def = new_inst;
 						if (td->verbose_level) {
 							g_print ("lower div.un: ");
 							interp_dump_ins (new_inst, td->data_items);
@@ -2331,8 +3609,8 @@ interp_super_instructions (TransformData *td)
 				}
 			} else if (MINT_IS_LDIND_INT (opcode)) {
 				int sreg_base = ins->sregs [0];
-				InterpInst *def = td->vars [sreg_base].def;
-				if (def != NULL && td->local_ref_count [sreg_base] == 1) {
+				if (can_propagate_var_def (td, sreg_base, current_liveness)) {
+					InterpInst *def = get_var_value_def (td, sreg_base);
 					InterpInst *new_inst = NULL;
 					if (def->opcode == MINT_ADD_P) {
 						int ldind_offset_op = MINT_LDIND_OFFSET_I1 + (opcode - MINT_LDIND_I1);
@@ -2350,7 +3628,8 @@ interp_super_instructions (TransformData *td)
 					if (new_inst) {
 						interp_clear_ins (def);
 						interp_clear_ins (ins);
-						local_ref_count [sreg_base]--;
+						td->var_values [sreg_base].ref_count--;
+						td->var_values [new_inst->dreg].def = new_inst;
 						if (td->verbose_level) {
 							g_print ("superins: ");
 							interp_dump_ins (new_inst, td->data_items);
@@ -2359,8 +3638,8 @@ interp_super_instructions (TransformData *td)
 				}
 			} else if (MINT_IS_LDIND_OFFSET (opcode)) {
 				int sreg_off = ins->sregs [1];
-				InterpInst *def = td->vars [sreg_off].def;
-				if (def != NULL && td->local_ref_count [sreg_off] == 1) {
+				if (can_propagate_var_def (td, sreg_off, current_liveness)) {
+					InterpInst *def = get_var_value_def (td, sreg_off);
 					if (def->opcode == MINT_MUL_P_IMM || def->opcode == MINT_ADD_P_IMM || def->opcode == MINT_ADD_MUL_P_IMM) {
 						int ldind_offset_op = MINT_LDIND_OFFSET_ADD_MUL_IMM_I1 + (opcode - MINT_LDIND_OFFSET_I1);
 						InterpInst *new_inst = interp_insert_ins (td, ins, ldind_offset_op);
@@ -2386,17 +3665,18 @@ interp_super_instructions (TransformData *td)
 
 						interp_clear_ins (def);
 						interp_clear_ins (ins);
-						local_ref_count [sreg_off]--;
+						td->var_values [sreg_off].ref_count--; // 0
+						td->var_values [new_inst->dreg].def = new_inst;
 						if (td->verbose_level) {
-							g_print ("method %s:%s, superins: ", m_class_get_name (td->method->klass), td->method->name);
+							g_print ("superins: ");
 							interp_dump_ins (new_inst, td->data_items);
 						}
 					}
 				}
 			} else if (MINT_IS_STIND_INT (opcode)) {
 				int sreg_base = ins->sregs [0];
-				InterpInst *def = td->vars [sreg_base].def;
-				if (def != NULL && td->local_ref_count [sreg_base] == 1) {
+				if (can_propagate_var_def (td, sreg_base, current_liveness)) {
+					InterpInst *def = get_var_value_def (td, sreg_base);
 					InterpInst *new_inst = NULL;
 					if (def->opcode == MINT_ADD_P) {
 						int stind_offset_op = MINT_STIND_OFFSET_I1 + (opcode - MINT_STIND_I1);
@@ -2414,7 +3694,7 @@ interp_super_instructions (TransformData *td)
 					if (new_inst) {
 						interp_clear_ins (def);
 						interp_clear_ins (ins);
-						local_ref_count [sreg_base]--;
+						td->var_values [sreg_base].ref_count--;
 						if (td->verbose_level) {
 							g_print ("superins: ");
 							interp_dump_ins (new_inst, td->data_items);
@@ -2427,21 +3707,22 @@ interp_super_instructions (TransformData *td)
 				// when inlining property accessors. We should have more advanced cknull removal
 				// optimzations, so we can catch cases where instructions are not next to each other.
 				int obj_sreg = ins->sregs [0];
-				InterpInst *def = td->vars [obj_sreg].def;
+				InterpInst *def = get_var_value_def (td, obj_sreg);
 				if (def != NULL && def->opcode == MINT_CKNULL && interp_prev_ins (ins) == def &&
-						def->dreg == obj_sreg && local_ref_count [obj_sreg] == 1) {
+						def->dreg == obj_sreg && td->var_values [obj_sreg].ref_count == 1) {
 					if (td->verbose_level) {
-						g_print ("remove redundant cknull (%s): ", td->method->name);
+						g_print ("remove redundant cknull: ");
 						interp_dump_ins (def, td->data_items);
 					}
 					ins->sregs [0] = def->sregs [0];
 					interp_clear_ins (def);
-					local_ref_count [obj_sreg]--;
+					td->var_values [obj_sreg].ref_count--;
 				}
 			} else if (MINT_IS_BINOP_CONDITIONAL_BRANCH (opcode) && interp_is_short_offset (noe, ins->info.target_bb->native_offset_estimate)) {
-				gint16 imm;
+				gint32 imm;
+				int imm_mt;
 				int sreg_imm = ins->sregs [1];
-				if (get_sreg_imm (td, sreg_imm, &imm, MINT_TYPE_I2)) {
+				if (get_sreg_imm (td, sreg_imm, &imm, &imm_mt) && imm_mt == MINT_TYPE_I2) {
 					int condbr_op = get_binop_condbr_imm_sp (opcode);
 					if (condbr_op != MINT_NOP) {
 						InterpInst *prev_ins = interp_prev_ins (ins);
@@ -2450,11 +3731,11 @@ interp_super_instructions (TransformData *td)
 							interp_clear_ins (prev_ins);
 						InterpInst *new_ins = interp_insert_ins (td, ins, condbr_op);
 						new_ins->sregs [0] = ins->sregs [0];
-						new_ins->data [0] = imm;
+						new_ins->data [0] = (gint16)imm;
 						new_ins->info.target_bb = ins->info.target_bb;
-						interp_clear_ins (td->vars [sreg_imm].def);
+						interp_clear_ins (td->var_values [sreg_imm].def);
 						interp_clear_ins (ins);
-						local_ref_count [sreg_imm]--;
+						td->var_values [sreg_imm].ref_count--; // 0
 						if (td->verbose_level) {
 							g_print ("superins: ");
 							interp_dump_ins (new_ins, td->data_items);
@@ -2478,8 +3759,8 @@ interp_super_instructions (TransformData *td)
 				if (opcode == MINT_BRFALSE_I4 || opcode == MINT_BRTRUE_I4) {
 					gboolean negate = opcode == MINT_BRFALSE_I4;
 					int cond_sreg = ins->sregs [0];
-					InterpInst *def = td->vars [cond_sreg].def;
-					if (def != NULL && local_ref_count [cond_sreg] == 1) {
+					if (can_propagate_var_def (td, cond_sreg, current_liveness)) {
+						InterpInst *def = get_var_value_def (td, cond_sreg);
 						int replace_opcode = -1;
 						switch (def->opcode) {
 							case MINT_CEQ_I4: replace_opcode = negate ? MINT_BNE_UN_I4 : MINT_BEQ_I4; break;
@@ -2513,7 +3794,7 @@ interp_super_instructions (TransformData *td)
 							if (def->opcode != MINT_CEQ0_I4)
 								ins->sregs [1] = def->sregs [1];
 							interp_clear_ins (def);
-							local_ref_count [cond_sreg]--;
+							td->var_values [cond_sreg].ref_count--;
 							if (td->verbose_level) {
 								g_print ("superins: ");
 								interp_dump_ins (ins, td->data_items);
@@ -2538,8 +3819,8 @@ interp_super_instructions (TransformData *td)
 				}
 			} else if (opcode == MINT_STOBJ_VT_NOREF) {
 				int sreg_src = ins->sregs [1];
-				InterpInst *def = td->vars [sreg_src].def;
-				if (def != NULL && interp_prev_ins (ins) == def && def->opcode == MINT_LDOBJ_VT && ins->data [0] == def->data [0] && td->local_ref_count [sreg_src] == 1) {
+				InterpInst *def = get_var_value_def (td, sreg_src);
+				if (def != NULL && interp_prev_ins (ins) == def && def->opcode == MINT_LDOBJ_VT && ins->data [0] == def->data [0] && td->var_values [sreg_src].ref_count == 1) {
 					InterpInst *new_inst = interp_insert_ins (td, ins, MINT_CPOBJ_VT_NOREF);
 					new_inst->sregs [0] = ins->sregs [0]; // dst
 					new_inst->sregs [1] = def->sregs [0]; // src
@@ -2547,34 +3828,182 @@ interp_super_instructions (TransformData *td)
 
 					interp_clear_ins (def);
 					interp_clear_ins (ins);
-					local_ref_count [sreg_src]--;
+					td->var_values [sreg_src].ref_count--;
 					if (td->verbose_level) {
 						g_print ("superins: ");
 						interp_dump_ins (new_inst, td->data_items);
 					}
 				}
+			} else if (opcode == MINT_MOV_4 || opcode == MINT_MOV_8 || opcode == MINT_MOV_VT) {
+				int sreg = ins->sregs [0];
+				InterpInst *def = get_var_value_def (td, sreg);
+				if (def && td->var_values [sreg].ref_count == 1) {
+					// The svar is used only for this mov. Try to get the definition to store directly instead
+					if (def->opcode != MINT_DEF_ARG && def->opcode != MINT_PHI && def->opcode != MINT_DEF_TIER_VAR &&
+							!(def->flags & INTERP_INST_FLAG_PROTECTED_NEWOBJ)) {
+						int dreg = ins->dreg;
+						// if var is not ssa or it is a renamed fixed, then we can't replace the dreg
+						// since there can be conflicting liveness, unless the instructions are adjacent
+						if ((var_is_ssa_form (td, dreg) && !td->vars [dreg].renamed_ssa_fixed) ||
+								interp_prev_ins (ins) == def) {
+							def->dreg = dreg;
+
+							// Copy var value, while keeping the ref count intact
+							int dreg_ref_count = td->var_values [dreg].ref_count;
+							td->var_values [dreg] = td->var_values [sreg];
+							td->var_values [dreg].ref_count = dreg_ref_count;
+
+							// clear the move
+							td->var_values [sreg].ref_count--; // 0
+							interp_clear_ins (ins);
+
+							if (td->verbose_level) {
+								g_print ("forward dreg: ");
+								interp_dump_ins (def, td->data_items);
+							}
+						}
+					}
+				}
 			}
 			noe += interp_get_ins_length (ins);
 		}
 	}
 }
 
+static void
+interp_prepare_no_ssa_opt (TransformData *td)
+{
+	for (unsigned int i = 0; i < td->vars_size; i++) {
+		td->vars [i].no_ssa = TRUE;
+		td->vars [i].has_indirects = (td->vars [i].indirects > 0) ? TRUE : FALSE;
+	}
+
+	td->bblocks = (InterpBasicBlock**)mono_mempool_alloc0 (td->opt_mempool, sizeof (InterpBasicBlock*) * td->bb_count);
+
+	int i = 0;
+	for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
+		td->bblocks [i] = bb;
+		bb->dfs_index = i;
+		i++;
+	}
+	td->bblocks_count_no_eh = 0;
+	td->bblocks_count_eh = i;
+}
+
+static void
+interp_remove_ins (InterpBasicBlock *bb, InterpInst *ins)
+{
+	if (ins->next)
+		ins->next->prev = ins->prev;
+	else
+		bb->last_ins = ins->prev;
+
+	if (ins->prev)
+		ins->prev->next = ins->next;
+	else
+		bb->first_ins = ins->next;
+}
+
+static void
+interp_remove_nops (TransformData *td)
+{
+	InterpBasicBlock *bb;
+	for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
+		InterpInst *ins;
+		for (ins = bb->first_ins; ins != NULL; ins = ins->next) {
+			if (ins->opcode == MINT_NOP && ins->prev &&
+					(ins->il_offset == -1 ||
+					ins->prev->il_offset == ins->il_offset)) {
+				// This is a NOP instruction that has no relevant il_offset, actually remove it
+				interp_remove_ins (bb, ins);
+			}
+
+		}
+	}
+}
+
 void
 interp_optimize_code (TransformData *td)
 {
 	if (mono_interp_opt & INTERP_OPT_BBLOCKS)
-		interp_optimize_bblocks (td);
+		MONO_TIME_TRACK (mono_interp_stats.optimize_bblocks_time, interp_optimize_bblocks (td));
 
-	if (mono_interp_opt & INTERP_OPT_CPROP)
-		MONO_TIME_TRACK (mono_interp_stats.cprop_time, interp_cprop (td));
+	// Nothing to optimize if we don't have cprop enabled
+	if (!(mono_interp_opt & INTERP_OPT_CPROP))
+		return;
 
-	// After this point control optimizations on control flow can no longer happen, so we can determine
-	// which vars are global. This helps speed up the super instructions pass, which only operates on
-	// single def, single use local vars.
-	initialize_global_vars (td);
+	if (!(mono_interp_opt & INTERP_OPT_SSA))
+		td->disable_ssa = TRUE;
+
+	gboolean ssa_enabled_retry = FALSE;
+
+	if (!td->disable_ssa && td->bb_count > 1000) {
+		// We have ssa enabled but we are compiling a huge method. Do the first iteration
+		// in ssa disabled mode. This should greatly simplify the CFG and the code, so the
+		// following iteration with SSA transformation enabled is much faster. In general,
+		// for huge methods we end up doing multiple optimization iterations anyway.
+		ssa_enabled_retry = TRUE;
+		td->disable_ssa = TRUE;
+		if (td->verbose_level)
+			g_print ("Huge method. SSA disabled for first iteration\n");
+	}
+optimization_retry:
+	if (td->opt_mempool != NULL)
+		mono_mempool_destroy (td->opt_mempool);
+	if (td->var_values != NULL) {
+		g_free (td->var_values);
+		td->var_values = NULL;
+	}
+	td->opt_mempool = mono_mempool_new ();
+
+	td->need_optimization_retry = FALSE;
+
+	if (td->disable_ssa)
+		interp_prepare_no_ssa_opt (td);
+	else
+		MONO_TIME_TRACK (mono_interp_stats.ssa_compute_time, interp_compute_ssa (td));
+
+	MONO_TIME_TRACK (mono_interp_stats.cprop_time, interp_cprop (td));
+
+	interp_var_deadce (td);
+
+	// We run this after var deadce to detect more single use vars. This pass will clear
+	// unnecessary instruction on the fly so deadce is no longer needed to run.
+	if (mono_interp_opt & INTERP_OPT_SUPER_INSTRUCTIONS) {
+		// This pass is enough to be called only once, after all cprop and other optimizations
+		// are done. The problem is that currently it needs to run over code in SSA form, so we
+		// can't just run it at the very end of optimization cycles. Also bblock optimization can
+		// lead to another optimization iteration, so we can still end up running it multiple times.
+		// Basic block optimization currently needs to run after we exited SSA.
+		if (!ssa_enabled_retry && !td->need_optimization_retry)
+			MONO_TIME_TRACK (mono_interp_stats.super_instructions_time, interp_super_instructions (td));
+	}
+
+	if (!td->disable_ssa)
+		interp_exit_ssa (td);
+
+	interp_remove_nops (td);
+
+	if (mono_interp_opt & INTERP_OPT_BBLOCKS)
+		MONO_TIME_TRACK (mono_interp_stats.optimize_bblocks_time, interp_optimize_bblocks (td));
+
+	if (ssa_enabled_retry) {
+		ssa_enabled_retry = FALSE;
+		td->disable_ssa = FALSE;
+		if (td->verbose_level)
+			g_print ("Retry optimization with SSA enabled\n");
+		goto optimization_retry;
+	} else if (td->need_optimization_retry) {
+		if (td->verbose_level)
+			g_print ("Retry optimization\n");
+		goto optimization_retry;
+	}
+
+	mono_mempool_destroy (td->opt_mempool);
 
-	if ((mono_interp_opt & INTERP_OPT_SUPER_INSTRUCTIONS) &&
-			(mono_interp_opt & INTERP_OPT_CPROP))
-		MONO_TIME_TRACK (mono_interp_stats.super_instructions_time, interp_super_instructions (td));
+	if (td->verbose_level) {
+		g_print ("\nOptimized IR:\n");
+		mono_interp_print_td_code (td);
+	}
 }
 
diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c
index dd195a29a469..70773797a47f 100644
--- a/src/mono/mono/mini/interp/transform.c
+++ b/src/mono/mono/mini/interp/transform.c
@@ -349,6 +349,64 @@ mono_mint_type (MonoType *type)
 	return -1;
 }
 
+// This marks the var as renamable, allocating space for additional data.
+// The original var data (InterpVar) will have an index that points to this
+// additional data.
+int
+interp_make_var_renamable (TransformData *td, int var)
+{
+	// Check if already allocated
+	if (td->vars [var].ext_index != -1)
+		return td->vars [var].ext_index;
+
+	if (td->renamable_vars_size == td->renamable_vars_capacity) {
+		td->renamable_vars_capacity *= 2;
+		if (td->renamable_vars_capacity == 0)
+			td->renamable_vars_capacity = 2;
+		td->renamable_vars = (InterpRenamableVar*) g_realloc (td->renamable_vars, td->renamable_vars_capacity * sizeof (InterpRenamableVar));
+	}
+
+	int ext_index = td->renamable_vars_size;
+	InterpRenamableVar *ext = &td->renamable_vars [ext_index];
+	memset (ext, 0, sizeof (InterpRenamableVar));
+	ext->var_index = var;
+
+	td->vars [var].ext_index = ext_index;
+
+	td->renamable_vars_size++;
+
+	return ext_index;
+}
+
+// This doesn't allocate a new var, rather additional information for fixed renamed vars
+int
+interp_create_renamed_fixed_var (TransformData *td, int var_index, int renamable_var_index)
+{
+	g_assert (td->vars [renamable_var_index].ext_index != -1);
+	g_assert (td->vars [var_index].ext_index == -1);
+	g_assert (td->vars [var_index].renamed_ssa_fixed);
+
+	if (td->renamed_fixed_vars_size == td->renamed_fixed_vars_capacity) {
+		td->renamed_fixed_vars_capacity *= 2;
+		if (td->renamed_fixed_vars_capacity == 0)
+			td->renamed_fixed_vars_capacity = 2;
+		td->renamed_fixed_vars = (InterpRenamedFixedVar*) g_realloc (td->renamed_fixed_vars, td->renamed_fixed_vars_capacity * sizeof (InterpRenamedFixedVar));
+	}
+
+	int ext_index = td->renamed_fixed_vars_size;
+	InterpRenamedFixedVar *ext = &td->renamed_fixed_vars [ext_index];
+
+	ext->var_index = var_index;
+	ext->renamable_var_ext_index = td->vars [renamable_var_index].ext_index;
+	ext->live_out_bblocks = NULL;
+	ext->live_limit_bblocks = NULL;
+
+	td->vars [var_index].ext_index = ext_index;
+
+	td->renamed_fixed_vars_size++;
+
+	return ext_index;
+}
 
 /*
  * These are additional locals that can be allocated as we transform the code.
@@ -377,7 +435,7 @@ interp_create_var_explicit (TransformData *td, MonoType *type, int size)
 	local->size = size;
 	local->live_start = -1;
 	local->bb_index = -1;
-	local->def = NULL;
+	local->ext_index = -1;
 
 	td->vars_size++;
 	return td->vars_size - 1;
@@ -493,6 +551,22 @@ set_simple_type_and_var (TransformData *td, StackInfo *sp, int type)
 	set_type_and_var (td, sp, type, NULL);
 }
 
+static void
+push_mono_type (TransformData *td, MonoType *type, int mt, MonoClass *k)
+{
+	if (mt == -1)
+		mt = mono_mint_type (type);
+	if (!k)
+		k = mono_class_from_mono_type_internal (type);
+
+	g_assert (mt != MINT_TYPE_VT);
+
+	if (m_type_is_byref (type))
+		push_type_explicit (td, STACK_TYPE_MP, k, MINT_STACK_SLOT_SIZE);
+	else
+		push_type_explicit (td, stack_type [mt], k, MINT_STACK_SLOT_SIZE);
+}
+
 static void
 push_type (TransformData *td, int type, MonoClass *k)
 {
@@ -659,8 +733,7 @@ handle_branch (TransformData *td, int long_op, int offset)
 	if (offset < 0 && td->sp == td->stack && !td->inlined_method) {
 		// Backwards branch inside unoptimized method where the IL stack is empty
 		// This is candidate for a patchpoint
-		if (!td->optimized)
-			target_bb->emit_patchpoint = TRUE;
+		target_bb->patchpoint_bb = TRUE;
 		if (mono_interp_tiering_enabled () && !target_bb->patchpoint_data && td->optimized) {
 			// The optimized imethod will store mapping from bb index to native offset so it
 			// can resume execution in the optimized method, once we tier up in patchpoint
@@ -673,9 +746,12 @@ handle_branch (TransformData *td, int long_op, int offset)
 	if (offset > 0)
 		init_bb_stack_state (td, target_bb);
 
-	if (td->cbb->no_inlining && long_op != MINT_CALL_HANDLER)
-		target_bb->jump_targets--;
-	interp_link_bblocks (td, td->cbb, target_bb);
+	if (long_op != MINT_CALL_HANDLER) {
+		if (td->cbb->no_inlining)
+			target_bb->jump_targets--;
+		// We don't link finally blocks into the cfg (or other handler blocks for that matter)
+		interp_link_bblocks (td, td->cbb, target_bb);
+	}
 
 	interp_add_ins (td, long_op);
 	td->last_ins->info.target_bb = target_bb;
@@ -946,7 +1022,7 @@ load_arg(TransformData *td, int n)
 		if (hasthis && n == 0) {
 			mt = MINT_TYPE_I;
 			klass = NULL;
-			push_type (td, stack_type [mt], klass);
+			push_type (td, STACK_TYPE_MP, klass);
 		} else {
 			g_assert (size < G_MAXUINT16);
 			push_type_vt (td, klass, size);
@@ -960,7 +1036,7 @@ load_arg(TransformData *td, int n)
 			if (mt == MINT_TYPE_O)
 				klass = mono_class_from_mono_type_internal (type);
 		}
-		push_type (td, stack_type [mt], klass);
+		push_mono_type (td, type, mt, klass);
 	}
 	interp_add_ins (td, interp_get_mov_for_type (mt, TRUE));
 	interp_ins_set_sreg (td->last_ins, n);
@@ -1009,7 +1085,7 @@ load_local (TransformData *td, int local)
 		MonoClass *klass = NULL;
 		if (mt == MINT_TYPE_O)
 			klass = mono_class_from_mono_type_internal (type);
-		push_type (td, stack_type [mt], klass);
+		push_mono_type (td, type, mt, klass);
 	}
 	interp_add_ins (td, interp_get_mov_for_type (mt, TRUE));
 	interp_ins_set_sreg (td->last_ins, local);
@@ -1023,6 +1099,7 @@ store_local (TransformData *td, int local)
 {
 	int mt = td->vars [local].mt;
 	CHECK_STACK_RET_VOID (td, 1);
+
 #if SIZEOF_VOID_P == 8
 	// nint and int32 can be used interchangeably. Add implicit conversions.
 	if (td->sp [-1].type == STACK_TYPE_I4 && stack_type [mt] == STACK_TYPE_I8)
@@ -1441,7 +1518,14 @@ interp_dump_ins (InterpInst *ins, gpointer *data_items)
 	else
 		g_string_append_printf (str, " [nil <-");
 
-	if (mono_interp_op_sregs [opcode] > 0) {
+	if (opcode == MINT_PHI) {
+		int *args = ins->info.args;
+		while (*args != -1) {
+			g_string_append_printf (str, " %d", *args);
+			args++;
+		}
+		g_string_append_printf (str, "],");
+	} else if (mono_interp_op_sregs [opcode] > 0) {
 		for (int i = 0; i < mono_interp_op_sregs [opcode]; i++) {
 			if (ins->sregs [i] == MINT_CALL_ARGS_SREG) {
 				g_string_append_printf (str, " c:");
@@ -1507,7 +1591,6 @@ mono_interp_print_code (InterpMethod *imethod)
 void
 mono_interp_print_td_code (TransformData *td)
 {
-	g_print ("Unoptimized IR:\n");
 	for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb)
 		interp_dump_bb (bb, td->data_items);
 }
@@ -1537,24 +1620,16 @@ interp_ip_in_cbb (TransformData *td, int il_offset)
 static gboolean
 interp_ins_is_ldc (InterpInst *ins)
 {
-	return ins->opcode >= MINT_LDC_I4_M1 && ins->opcode <= MINT_LDC_I8;
+	return ins->opcode >= MINT_LDC_I4_0 && ins->opcode <= MINT_LDC_I8;
 }
 
 gint32
 interp_get_const_from_ldc_i4 (InterpInst *ins)
 {
 	switch (ins->opcode) {
-	case MINT_LDC_I4_M1: return -1;
 	case MINT_LDC_I4_0: return 0;
 	case MINT_LDC_I4_1: return 1;
-	case MINT_LDC_I4_2: return 2;
-	case MINT_LDC_I4_3: return 3;
-	case MINT_LDC_I4_4: return 4;
-	case MINT_LDC_I4_5: return 5;
-	case MINT_LDC_I4_6: return 6;
-	case MINT_LDC_I4_7: return 7;
-	case MINT_LDC_I4_8: return 8;
-	case MINT_LDC_I4_S: return (gint32)(gint8)ins->data [0];
+	case MINT_LDC_I4_S: return (gint32)(gint16)ins->data [0];
 	case MINT_LDC_I4: return READ32 (&ins->data [0]);
 	default:
 		g_assert_not_reached ();
@@ -1566,24 +1641,14 @@ InterpInst*
 interp_get_ldc_i4_from_const (TransformData *td, InterpInst *ins, gint32 ct, int dreg)
 {
 	guint16 opcode;
-	switch (ct) {
-	case -1: opcode = MINT_LDC_I4_M1; break;
-	case 0: opcode = MINT_LDC_I4_0; break;
-	case 1: opcode = MINT_LDC_I4_1; break;
-	case 2: opcode = MINT_LDC_I4_2; break;
-	case 3: opcode = MINT_LDC_I4_3; break;
-	case 4: opcode = MINT_LDC_I4_4; break;
-	case 5: opcode = MINT_LDC_I4_5; break;
-	case 6: opcode = MINT_LDC_I4_6; break;
-	case 7: opcode = MINT_LDC_I4_7; break;
-	case 8: opcode = MINT_LDC_I4_8; break;
-	default:
-		if (ct >= -128 && ct <= 127)
-			opcode = MINT_LDC_I4_S;
-		else
-			opcode = MINT_LDC_I4;
-		break;
-	}
+	if (!ct)
+		opcode = MINT_LDC_I4_0;
+	else if (ct == 1)
+		opcode = MINT_LDC_I4_1;
+	else if (ct >= G_MININT16 && ct <= G_MAXINT16)
+		opcode = MINT_LDC_I4_S;
+	else
+		opcode = MINT_LDC_I4;
 
 	int new_size = mono_interp_oplen [opcode];
 
@@ -1601,7 +1666,7 @@ interp_get_ldc_i4_from_const (TransformData *td, InterpInst *ins, gint32 ct, int
 	interp_ins_set_dreg (ins, dreg);
 
 	if (new_size == 3)
-		ins->data [0] = (gint8)ct;
+		ins->data [0] = (gint16)ct;
 	else if (new_size == 4)
 		WRITE32_INS (ins, 0, &ct);
 
@@ -1871,32 +1936,62 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
 		}
 	} else if (in_corlib &&
 			!strcmp (klass_name_space, "System") &&
-			!strcmp (klass_name, "SpanHelpers") &&
-			!strcmp (tm, "ClearWithReferences")) {
-		*op = MINT_INTRINS_CLEAR_WITH_REFERENCES;
+			!strcmp (klass_name, "SpanHelpers")) {
+		if (!strcmp (tm, "ClearWithReferences")) {
+			*op = MINT_INTRINS_CLEAR_WITH_REFERENCES;
+		} else if (!strcmp (tm, "ClearWithoutReferences")) {
+			*op = MINT_ZEROBLK;
+		} else if (!strcmp (tm, "Fill") && csignature->param_count == 3) {
+			int align;
+			if (mono_type_size (csignature->params [2], &align) == 1) {
+				interp_add_ins (td, MINT_INITBLK);
+				td->sp -= 3;
+				interp_ins_set_sregs3 (td->last_ins, td->sp [0].var, td->sp [2].var, td->sp [1].var);
+				td->ip += 5;
+				return TRUE;
+			}
+		}
 	} else if (in_corlib && !strcmp (klass_name_space, "System") && !strcmp (klass_name, "Marvin")) {
 		if (!strcmp (tm, "Block")) {
 			InterpInst *ldloca2 = td->last_ins;
 			if (ldloca2 != NULL && ldloca2->opcode == MINT_LDLOCA_S) {
 				InterpInst *ldloca1 = interp_prev_ins (ldloca2);
 				if (ldloca1 != NULL && ldloca1->opcode == MINT_LDLOCA_S) {
-					interp_add_ins (td, MINT_INTRINS_MARVIN_BLOCK);
-					td->last_ins->sregs [0] = ldloca1->sregs [0];
-					td->last_ins->sregs [1] = ldloca2->sregs [0];
-
-					// This intrinsic would normally receive two local refs, however, we try optimizing
-					// away both ldlocas for better codegen. This means that this intrinsic will instead
-					// modify the values of both sregs. In order to not overcomplicate the optimization
-					// passes and offset allocator with support for modifiable sregs or multi dregs, we
-					// just redefine both sregs after the intrinsic.
-					interp_add_ins (td, MINT_DEF);
-					td->last_ins->dreg = ldloca1->sregs [0];
-					interp_add_ins (td, MINT_DEF);
-					td->last_ins->dreg = ldloca2->sregs [0];
+					int var1 = ldloca1->sregs [0];
+					int var2 = ldloca2->sregs [0];
+					if (!td->optimized) {
+						interp_add_ins (td, MINT_INTRINS_MARVIN_BLOCK);
+						td->last_ins->sregs [0] = var1;
+						td->last_ins->sregs [1] = var2;
+						td->last_ins->data [0] = GINT_TO_UINT16 (var1);
+						td->last_ins->data [1] = GINT_TO_UINT16 (var2);
+					} else {
+						// Convert this instruction to SSA form by splitting it into 2 different
+						// single dreg instructions. When we generate final code, we will couple them
+						// together.
+						int result1 = interp_create_var (td, m_class_get_byval_arg (mono_defaults.uint32_class));
+						int result2 = interp_create_var (td, m_class_get_byval_arg (mono_defaults.uint32_class));
+						interp_add_ins (td, MINT_INTRINS_MARVIN_BLOCK_SSA1);
+						td->last_ins->sregs [0] = var1;
+						td->last_ins->sregs [1] = var2;
+						td->last_ins->dreg = result1;
+
+						interp_add_ins (td, MINT_INTRINS_MARVIN_BLOCK_SSA2);
+						td->last_ins->sregs [0] = var1;
+						td->last_ins->sregs [1] = var2;
+						td->last_ins->dreg = result2;
+
+						interp_add_ins (td, MINT_MOV_4);
+						td->last_ins->sregs [0] = result1;
+						td->last_ins->dreg = var1;
+						interp_add_ins (td, MINT_MOV_4);
+						td->last_ins->sregs [0] = result2;
+						td->last_ins->dreg = var2;
+					}
 
 					// Remove the ldlocas
-					td->vars [ldloca1->sregs [0]].indirects--;
-					td->vars [ldloca2->sregs [0]].indirects--;
+					td->vars [var1].indirects--;
+					td->vars [var2].indirects--;
 					interp_clear_ins (ldloca1);
 					interp_clear_ins (ldloca2);
 					td->sp -= 2;
@@ -2734,6 +2829,11 @@ interp_method_check_inlining (TransformData *td, MonoMethod *method, MonoMethodS
 	if (td->cbb->no_inlining)
 		return FALSE;
 
+	// Exception handlers are always uncommon, with the exception of finally.
+	int inner_clause = td->clause_indexes [td->current_il_offset];
+	if (inner_clause != -1 && td->header->clauses [inner_clause].flags != MONO_EXCEPTION_CLAUSE_FINALLY)
+		return FALSE;
+
 	if (method->flags & METHOD_ATTRIBUTE_REQSECOBJ)
 		/* Used to mark methods containing StackCrawlMark locals */
 		return FALSE;
@@ -2783,6 +2883,14 @@ interp_method_check_inlining (TransformData *td, MonoMethod *method, MonoMethodS
 	if (td->prof_coverage)
 		return FALSE;
 
+	/*
+	 * doesnotreturn methods are not profitable to inline, since they almost certainly will not
+	 * actually run during normal execution, and if they do they will only run once, so the
+	 * upside to inlining them is effectively zero, and we'd waste time doing the inline
+	 */
+	if (has_doesnotreturn_attribute (method))
+		return FALSE;
+
 	if (!is_metadata_update_disabled () && mono_metadata_update_no_inline (td->method, method))
 		return FALSE;
 
@@ -2962,11 +3070,9 @@ interp_inline_newobj (TransformData *td, MonoMethod *target_method, MonoMethodSi
 
 		dreg = interp_create_var (td, get_type_from_stack (stack_type [ret_mt], klass));
 
-		// For valuetypes, we need to control the lifetime of the valuetype.
-		// MINT_NEWOBJ_VT_INLINED takes the address of this reg and we should keep
-		// the vt alive until the inlining is completed.
-		interp_add_ins (td, MINT_DEF);
+		interp_add_ins (td, MINT_INITLOCAL);
 		interp_ins_set_dreg (td->last_ins, dreg);
+		td->last_ins->data [0] = GINT_TO_UINT16 (vtsize);
 	} else {
 		dreg = interp_create_var (td, get_type_from_stack (stack_type [ret_mt], klass));
 	}
@@ -2985,11 +3091,10 @@ interp_inline_newobj (TransformData *td, MonoMethod *target_method, MonoMethodSi
 	td->sp += csignature->param_count;
 
 	if (is_vt) {
-		// Receives the valuetype allocated with MINT_DEF, and returns its address
-		newobj_fast = interp_add_ins (td, MINT_NEWOBJ_VT_INLINED);
+		newobj_fast = interp_add_ins (td, MINT_LDLOCA_S);
 		interp_ins_set_dreg (newobj_fast, this_reg);
 		interp_ins_set_sreg (newobj_fast, dreg);
-		newobj_fast->data [0] = GUINTPTR_TO_UINT16 (ALIGN_TO (vtsize, MINT_STACK_SLOT_SIZE));
+		td->vars [dreg].indirects++;
 	} else {
 		MonoVTable *vtable = mono_class_vtable_checked (klass, error);
 		goto_if_nok (error, fail);
@@ -3007,11 +3112,6 @@ interp_inline_newobj (TransformData *td, MonoMethod *target_method, MonoMethodSi
 	if (!interp_inline_method (td, target_method, mheader, error))
 		goto fail;
 
-	if (is_vt) {
-		interp_add_ins (td, MINT_DUMMY_USE);
-		interp_ins_set_sreg (td->last_ins, dreg);
-	}
-
 	push_var (td, dreg);
 	return TRUE;
 fail:
@@ -3220,6 +3320,7 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
 	int need_null_check = is_virtual;
 	int fp_sreg = -1, first_sreg = -1, dreg = -1;
 	gboolean is_delegate_invoke = FALSE;
+	InterpInst *null_check = NULL;
 
 	guint32 token = read32 (td->ip + 1);
 
@@ -3472,6 +3573,9 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
 		interp_ins_set_sreg (td->last_ins, sp->var);
 		set_type_and_var (td, sp, sp->type, sp->klass);
 		interp_ins_set_dreg (td->last_ins, sp->var);
+		// If the call instruction will do a null check, then this instruction
+		// will be transformed into a simple MOV, so it can be optimized out
+		null_check = td->last_ins;
 	}
 
 	/* Offset the function pointer when emitting convert instructions */
@@ -3611,7 +3715,7 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
 				return FALSE;
 			}
 		} else {
-			push_type (td, stack_type[mt], klass);
+			push_mono_type (td, csignature->ret, mt, klass);
 		}
 		dreg = td->sp [-1].var;
 	} else {
@@ -3737,6 +3841,7 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
 			} else if (is_virtual) {
 				interp_add_ins (td, MINT_CALLVIRT_FAST);
 				td->last_ins->data [1] = get_virt_method_slot (target_method);
+				null_check->opcode = MINT_MOV_P;
 			} else {
 				interp_add_ins (td, MINT_CALL);
 			}
@@ -4229,6 +4334,10 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
 	td->vars = (InterpVar*)g_malloc0 (num_locals * sizeof (InterpVar));
 	td->vars_size = num_locals;
 	td->vars_capacity = td->vars_size;
+
+	td->renamable_vars = (InterpRenamableVar*)g_malloc (num_locals * sizeof (InterpRenamableVar));
+	td->renamable_vars_size = 0;
+	td->renamable_vars_capacity = num_locals;
 	offset = 0;
 
 	/*
@@ -4245,13 +4354,15 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
 		int mt = mono_mint_type (type);
 		td->vars [i].type = type;
 		td->vars [i].global = TRUE;
+		td->vars [i].il_global = TRUE;
 		td->vars [i].indirects = 0;
 		td->vars [i].mt = mt;
-		td->vars [i].def = NULL;
+		td->vars [i].ext_index = -1;
 		size = mono_interp_type_size (type, mt, &align);
 		td->vars [i].size = size;
 		offset = ALIGN_TO (offset, align);
 		td->vars [i].offset = offset;
+		interp_mark_ref_slots_for_var (td, i);
 		offset += size;
 	}
 	offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT);
@@ -4272,10 +4383,12 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
 		td->vars [index].type = header->locals [i];
 		td->vars [index].offset = offset;
 		td->vars [index].global = TRUE;
+		td->vars [index].il_global = TRUE;
 		td->vars [index].indirects = 0;
 		td->vars [index].mt = mono_mint_type (header->locals [i]);
-		td->vars [index].def = NULL;
+		td->vars [index].ext_index = -1;
 		td->vars [index].size = size;
+		interp_mark_ref_slots_for_var (td, index);
 		// Every local takes a MINT_STACK_SLOT_SIZE so IL locals have same behavior as execution locals
 		offset += size;
 	}
@@ -4473,7 +4586,7 @@ interp_emit_sfld_access (TransformData *td, MonoClassField *field, MonoClass *fi
 				interp_add_ins (td, interp_get_ldind_for_mt (mt));
 				interp_ins_set_sreg (td->last_ins, td->sp [-1].var);
 				td->sp--;
-				push_type (td, stack_type [mt], field_class);
+				push_mono_type (td, ftype, mt, field_class);
 				interp_ins_set_dreg (td->last_ins, td->sp [-1].var);
 			}
 		} else {
@@ -4500,14 +4613,14 @@ interp_emit_sfld_access (TransformData *td, MonoClassField *field, MonoClass *fi
 				if (mt == MINT_TYPE_VT) {
 					push_type_vt (td, field_class, size);
 				} else {
-					push_type (td, stack_type [mt], field_class);
+					push_mono_type (td, ftype, mt, field_class);
 				}
 			} else if (mt == MINT_TYPE_VT) {
 				interp_add_ins (td, MINT_LDSFLD_VT);
 				push_type_vt (td, field_class, size);
 			} else {
 				interp_add_ins (td, MINT_LDSFLD_I1 + mt - MINT_TYPE_I1);
-				push_type (td, stack_type [mt], field_class);
+				push_mono_type (td, ftype, mt, field_class);
 			}
 			interp_ins_set_dreg (td->last_ins, td->sp [-1].var);
 		} else {
@@ -4533,6 +4646,33 @@ interp_emit_sfld_access (TransformData *td, MonoClassField *field, MonoClass *fi
 	}
 }
 
+static gboolean
+interp_handle_box_patterns (TransformData *td, MonoClass *box_class, const unsigned char *end, MonoImage *image, MonoGenericContext *generic_context, MonoError *error)
+{
+	const unsigned char *next_ip = td->ip + 5;
+	if (next_ip >= end || !interp_ip_in_cbb (td, GPTRDIFF_TO_INT (next_ip - td->il_code)))
+		return FALSE;
+	MonoMethod *method = td->inlined_method ? td->inlined_method : td->method;
+	MonoMethod *cmethod;
+	if (*next_ip == CEE_CALL &&
+			(cmethod = interp_get_method (method, read32 (next_ip + 1), image, generic_context, error)) &&
+			(cmethod->klass == mono_defaults.object_class) &&
+			(strcmp (cmethod->name, "GetType") == 0)) {
+		MonoType *klass_type = m_class_get_byval_arg (box_class);
+		MonoReflectionType* reflection_type = mono_type_get_object_checked (klass_type, error);
+		return_val_if_nok (error, FALSE);
+
+		td->sp--;
+		interp_add_ins (td, MINT_LDPTR);
+		push_type (td, STACK_TYPE_O, mono_defaults.runtimetype_class);
+		interp_ins_set_dreg (td->last_ins, td->sp [-1].var);
+		td->last_ins->data [0] = get_data_item_index (td, reflection_type);
+		td->ip = next_ip + 5;
+		return TRUE;
+	}
+	return FALSE;
+}
+
 static void
 initialize_clause_bblocks (TransformData *td)
 {
@@ -4543,21 +4683,22 @@ initialize_clause_bblocks (TransformData *td)
 
 	for (guint i = 0; i < header->num_clauses; i++) {
 		MonoExceptionClause *c = header->clauses + i;
-		InterpBasicBlock *bb;
+		InterpBasicBlock *try_bb, *bb;
 
 		for (uint32_t j = c->handler_offset; j < c->handler_offset + c->handler_len; j++) {
 			if (td->clause_indexes [j] == -1)
 				td->clause_indexes [j] = i;
 		}
 
-		bb = td->offset_to_bb [c->try_offset];
-		g_assert (bb);
-		bb->eh_block = TRUE;
+		try_bb = td->offset_to_bb [c->try_offset];
+		g_assert (try_bb);
+		try_bb->preserve = TRUE;
 
 		/* We never inline methods with clauses, so we can hard code stack heights */
 		bb = td->offset_to_bb [c->handler_offset];
 		g_assert (bb);
-		bb->eh_block = TRUE;
+		bb->preserve = TRUE;
+		bb->try_bblock = try_bb;
 
 		if (c->flags == MONO_EXCEPTION_CLAUSE_FINALLY) {
 			bb->stack_height = 0;
@@ -4573,7 +4714,9 @@ initialize_clause_bblocks (TransformData *td)
 		if (c->flags == MONO_EXCEPTION_CLAUSE_FILTER) {
 			bb = td->offset_to_bb [c->data.filter_offset];
 			g_assert (bb);
-			bb->eh_block = TRUE;
+			bb->preserve = TRUE;
+			bb->try_bblock = try_bb;
+
 			bb->stack_height = 1;
 			bb->stack_state = (StackInfo*) mono_mempool_alloc0 (td->mempool, sizeof (StackInfo));
 			bb->stack_state [0].type = STACK_TYPE_O;
@@ -4714,20 +4857,16 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 	td->in_start = td->ip = header->code;
 	end = td->ip + header->code_size;
 
-	td->cbb = td->entry_bb = (InterpBasicBlock*)mono_mempool_alloc0 (td->mempool, sizeof (InterpBasicBlock));
+	td->cbb = td->entry_bb = interp_alloc_bb (td);
 	if (td->gen_sdb_seq_points)
 		td->basic_blocks = g_list_prepend_mempool (td->mempool, td->basic_blocks, td->cbb);
 
-	td->cbb->index = td->bb_count++;
-	td->cbb->native_offset = -1;
 	td->cbb->stack_height = GPTRDIFF_TO_INT (td->sp - td->stack);
 
-	if (inlining) {
-		exit_bb = (InterpBasicBlock*)mono_mempool_alloc0 (td->mempool, sizeof (InterpBasicBlock));
-		exit_bb->index = td->bb_count++;
-		exit_bb->native_offset = -1;
-		exit_bb->stack_height = -1;
-	}
+	if (inlining)
+		exit_bb = interp_alloc_bb (td);
+	else
+		td->entry_bb->il_offset = 0;
 
 	il_targets = mono_bitset_mem_new (
 		mono_mempool_alloc0 (td->mempool, mono_bitset_alloc_size (header->code_size, 0)),
@@ -4799,6 +4938,14 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 			g_free (name);
 		}
 
+		if (td->optimized && !td->disable_ssa) {
+			// Add arg defining instructions for SSA machinery
+			for (int i = 0; i < num_args; i++) {
+				interp_add_ins (td, MINT_DEF_ARG);
+				interp_ins_set_dreg (td->last_ins, i);
+			}
+		}
+
 		if (rtm->vararg) {
 			// vararg calls are identical to normal calls on the call site. However, the
 			// first instruction in a vararg method needs to copy the variable arguments
@@ -4814,23 +4961,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 			td->has_localloc = TRUE;
 		}
 
-		/*
-		 * We initialize the locals regardless of the presence of the init_locals
-		 * flag. Locals holding references need to be zeroed so we don't risk
-		 * crashing the GC if they end up being stored in an object.
-		 *
-		 * FIXME
-		 * Track values of locals over multiple basic blocks. This would enable
-		 * us to kill the MINT_INITLOCALS instruction if all locals are initialized
-		 * before use. We also don't need this instruction if the init locals flag
-		 * is not set and there are no locals holding references.
-		 */
-		if (header->num_locals) {
-			interp_add_ins (td, MINT_INITLOCALS);
-			td->last_ins->data [0] = GUINT_TO_UINT16 (td->il_locals_offset);
-			td->last_ins->data [1] = GUINT_TO_UINT16 (td->il_locals_size);
-		}
-
 		guint16 enter_profiling = 0;
 		if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
 			enter_profiling |= TRACING_FLAG;
@@ -4874,11 +5004,32 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 		}
 
 		local_locals = (guint32*) g_malloc (header->num_locals * sizeof (guint32));
-		/* Allocate locals to store inlined method args from stack */
 		for (int i = 0; i < header->num_locals; i++)
 			local_locals [i] = interp_create_var (td, header->locals [i]);
 	}
 
+	/*
+	 * We initialize the locals regardless of the presence of the init_locals
+	 * flag. Locals holding references need to be zeroed so we don't risk
+	 * crashing the GC if they end up being stored in an object.
+	 */
+	if (header->num_locals) {
+		if (td->optimized) {
+			// Add individual initlocal for each IL local. These should
+			// all be optimized out by SSA cprop/deadce optimizations.
+			for (int i = 0; i < header->num_locals; i++) {
+				interp_add_ins (td, MINT_INITLOCAL);
+				int local_var = inlining ? local_locals [i] : (num_args + i);
+				td->last_ins->dreg = local_var;
+				td->last_ins->data [0] = GINT_TO_UINT16 (td->vars [local_var].size);
+			}
+		} else {
+			interp_add_ins (td, MINT_INITLOCALS);
+			td->last_ins->data [0] = GUINT_TO_UINT16 (td->il_locals_offset);
+			td->last_ins->data [1] = GUINT_TO_UINT16 (td->il_locals_size);
+		}
+	}
+
 	td->dont_inline = g_list_prepend (td->dont_inline, method);
 	while (td->ip < end) {
 		// Check here for every opcode to avoid code bloat
@@ -5121,7 +5272,8 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 			++td->ip;
 			break;
 		case CEE_LDC_I4_M1:
-			interp_add_ins (td, MINT_LDC_I4_M1);
+			interp_add_ins (td, MINT_LDC_I4_S);
+			td->last_ins->data [0] = (guint16)-1;
 			push_simple_type (td, STACK_TYPE_I4);
 			interp_ins_set_dreg (td->last_ins, td->sp [-1].var);
 			++td->ip;
@@ -5165,7 +5317,8 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 		case CEE_LDC_I4_6:
 		case CEE_LDC_I4_7:
 		case CEE_LDC_I4_8:
-			interp_add_ins (td, (*td->ip - CEE_LDC_I4_0) + MINT_LDC_I4_0);
+			interp_add_ins (td, MINT_LDC_I4_S);
+			td->last_ins->data [0] = *td->ip - CEE_LDC_I4_0;
 			push_simple_type (td, STACK_TYPE_I4);
 			interp_ins_set_dreg (td->last_ins, td->sp [-1].var);
 			++td->ip;
@@ -6136,6 +6289,11 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 
 					// First arg is dummy var, it is null when passed to the ctor
 					call_args [0] = interp_create_var (td, get_type_from_stack (stack_type [ret_mt], NULL));
+					if (!td->disable_ssa) {
+						// Make sure this arg is defined for SSA optimizations
+						interp_add_ins (td, MINT_DEF);
+					}
+					td->last_ins->dreg = call_args [0];
 					for (int i = 0; i < csignature->param_count; i++) {
 						call_args [i + 1] = td->sp [i].var;
 					}
@@ -6276,6 +6434,9 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 					push_type (td, stack_type [ret_mt], klass);
 					push_type (td, stack_type [ret_mt], klass);
 				}
+				// Make sure this arg is defined for SSA optimizations
+				interp_add_ins (td, MINT_DEF);
+				td->last_ins->dreg = td->sp [-1].var;
 				int dreg = td->sp [-2].var;
 
 				// Push back the params to top of stack. The original vars are maintained.
@@ -6321,7 +6482,26 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 			token = read32 (td->ip + 1);
 			klass = mini_get_class (method, token, generic_context);
 			CHECK_TYPELOAD (klass);
-			interp_handle_isinst (td, klass, isinst_instr);
+
+			if (isinst_instr && td->last_ins && MINT_IS_BOX (td->last_ins->opcode)) {
+				MonoClass *box_class;
+				if (td->last_ins->opcode == MINT_BOX_NULLABLE_PTR)
+					box_class = (MonoClass*)td->data_items [td->last_ins->data [0]];
+				else
+					box_class = ((MonoVTable*)td->data_items [td->last_ins->data [0]])->klass;
+				gboolean isinst = mono_class_is_assignable_from_internal (klass, box_class);
+				if (isinst) {
+					// We just leave boxed instance on the stack, nothing to do
+				} else {
+					td->sp--;
+					interp_add_ins (td, MINT_LDNULL);
+					push_type (td, STACK_TYPE_O, NULL);
+					interp_ins_set_dreg (td->last_ins, td->sp [-1].var);
+				}
+				td->ip += 5;
+			} else {
+				interp_handle_isinst (td, klass, isinst_instr);
+			}
 			break;
 		}
 		case CEE_CONV_R_UN:
@@ -6441,6 +6621,16 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 		case CEE_THROW:
 			if (!td->aggressive_inlining)
 				INLINE_FAILURE;
+			if (!inlining) {
+				guint32 il_offset = GINT_TO_UINT32(td->current_il_offset);
+				for (unsigned int i = 0; i < td->header->num_clauses; i++) {
+					MonoExceptionClause *clause = &td->header->clauses [i];
+					// If we throw during try and then catch we don't have the bblocks
+					// properly linked, just disable ssa for now
+					if (clause->flags == MONO_EXCEPTION_CLAUSE_NONE && (clause->try_offset <= il_offset) && (il_offset < (clause->try_offset + clause->try_len)))
+						td->disable_ssa = TRUE;
+				}
+			}
 			CHECK_STACK (td, 1);
 			interp_add_ins (td, MINT_THROW);
 			interp_ins_set_sreg (td->last_ins, td->sp [-1].var);
@@ -6537,7 +6727,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 					if (mt == MINT_TYPE_VT)
 						push_type_vt (td, field_klass, field_size);
 					else
-						push_type (td, stack_type [mt], field_klass);
+						push_mono_type (td, ftype, mt, field_klass);
 					interp_ins_set_dreg (td->last_ins, td->sp [-1].var);
 				} else {
 					if (G_UNLIKELY (m_field_is_from_update (field))) {
@@ -6567,7 +6757,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 					if (mt == MINT_TYPE_VT)
 						push_type_vt (td, field_klass, field_size);
 					else
-						push_type (td, stack_type [mt], field_klass);
+						push_mono_type (td, ftype, mt, field_klass);
 					interp_ins_set_dreg (td->last_ins, td->sp [-1].var);
 				}
 			}
@@ -6768,6 +6958,10 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 				klass = mini_get_class (method, token, generic_context);
 			CHECK_TYPELOAD (klass);
 
+			if (interp_handle_box_patterns (td, klass, end, image, generic_context, error))
+				break;
+			goto_if_nok (error, exit);
+
 			if (mono_class_is_nullable (klass)) {
 				MonoMethod *target_method = mono_class_get_method_from_name_checked (klass, "Box", 1, 0, error);
 				goto_if_nok (error, exit);
@@ -6778,8 +6972,11 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 				/* already boxed, do nothing. */
 				td->ip += 5;
 			} else {
-				if (G_UNLIKELY (m_class_is_byreflike (klass))) {
-					mono_error_set_bad_image (error, image, "Cannot box IsByRefLike type '%s.%s'", m_class_get_name_space (klass), m_class_get_name (klass));
+				if (G_UNLIKELY (m_class_is_byreflike (klass)) && !td->optimized) {
+					if (td->verbose_level)
+						g_print ("Box byreflike detected. Retry compilation with full optimization.\n");
+					td->retry_compilation = TRUE;
+					td->retry_with_inlining = TRUE;
 					goto exit;
 				}
 
@@ -7446,7 +7643,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 			} else {
 				handle_branch (td, MINT_BR, target_offset);
 			}
-			td->last_ins->info.target_bb->eh_block = TRUE;
+			td->last_ins->info.target_bb->preserve = TRUE;
 
 			if (*td->ip == CEE_LEAVE)
 				td->ip += 5;
@@ -7516,8 +7713,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 					int param_offset = get_tos_offset (td);
 
 					if (!MONO_TYPE_IS_VOID (info->sig->ret)) {
-						mt = mono_mint_type (info->sig->ret);
-						push_simple_type (td, stack_type [mt]);
+						push_mono_type (td, info->sig->ret, -1, NULL);
 						dreg = td->sp [-1].var;
 					} else {
 						// dummy dreg
@@ -8008,7 +8204,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 				CHECK_TYPELOAD (klass);
 				if (m_class_is_valuetype (klass)) {
 					--td->sp;
-					interp_add_ins (td, MINT_INITOBJ);
+					interp_add_ins (td, MINT_ZEROBLK_IMM);
 					interp_ins_set_sreg (td->last_ins, td->sp [0].var);
 					i32 = mono_class_value_size (klass, NULL);
 					g_assert (i32 < G_MAXUINT16);
@@ -8266,13 +8462,21 @@ interp_compute_native_offset_estimates (TransformData *td)
 	for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
 		InterpInst *ins;
 		bb->native_offset_estimate = noe;
-		if (bb->emit_patchpoint)
+		if (!td->optimized && bb->patchpoint_bb)
 			noe += 2;
 
+#if HOST_BROWSER
+		// We don't know in advance whether a bb will have a trace entry point,
+		//  but we know that it will only ever have one trace entry point, so
+		//  reserve space for it so we can correctly insert one later
+		if (mono_jiterp_is_enabled ())
+			noe += 4;
+#endif
+
 		for (ins = bb->first_ins; ins != NULL; ins = ins->next) {
 			int opcode = ins->opcode;
 			// Skip dummy opcodes for more precise offset computation
-			if (MINT_IS_NOP (opcode))
+			if (MINT_IS_EMIT_NOP (opcode))
 				continue;
 			noe += interp_get_ins_length (ins);
 			if (!td->optimized)
@@ -8320,11 +8524,83 @@ get_short_brop (int opcode)
 	return opcode;
 }
 
+static void
+interp_mark_ref_slots_for_vt (TransformData *td, int base_offset, MonoClass *klass)
+{
+	if (!m_class_has_references (klass) && !m_class_has_ref_fields (klass))
+		return;
+
+	gpointer iter = NULL;
+	MonoClassField *field;
+	while ((field = mono_class_get_fields_internal (klass, &iter))) {
+		MonoType *ftype = mono_field_get_type_internal (field);
+		if (ftype->attrs & FIELD_ATTRIBUTE_STATIC)
+			continue;
+		int offset = base_offset + m_field_get_offset (field) - MONO_ABI_SIZEOF (MonoObject);
+retry:
+		if (mini_type_is_reference (ftype) || ftype->type == MONO_TYPE_I || ftype->type == MONO_TYPE_U || m_type_is_byref (ftype)) {
+			int index = offset / sizeof (gpointer);
+			mono_bitset_set_fast (td->ref_slots, index);
+			if (td->verbose_level)
+				g_print ("Stack ref slot vt field at off %d\n", offset);
+		} else if (ftype->type == MONO_TYPE_VALUETYPE || ftype->type == MONO_TYPE_GENERICINST) {
+			interp_mark_ref_slots_for_vt (td, offset, mono_class_from_mono_type_internal (ftype));
+		}
+
+		if (m_class_is_inlinearray (klass)) {
+			int max_offset = base_offset + m_class_get_instance_size (klass) - MONO_ABI_SIZEOF (MonoObject);
+	                int align;
+			int field_size = mono_type_size (ftype, &align);
+			offset += field_size;
+			offset = ALIGN_TO (offset, align);
+			if (offset < max_offset)
+				goto retry;
+		}
+	}
+}
+
+void
+interp_mark_ref_slots_for_var (TransformData *td, int var)
+{
+	if (!(mono_interp_opt & INTERP_OPT_PRECISE_GC))
+		return;
+
+	g_assert (td->vars [var].offset != -1);
+
+	gsize max_index = (td->vars [var].offset + td->vars [var].size) / sizeof (gpointer);
+
+	if (!td->ref_slots || max_index >= td->ref_slots->size) {
+		guint32 old_size = td->ref_slots ? (guint32)td->ref_slots->size : 0;
+		guint32 new_size = old_size ? old_size * 2 : 32;
+
+		gpointer mem = mono_mempool_alloc0 (td->mempool, mono_bitset_alloc_size (new_size, 0));
+		MonoBitSet *new_ref_slots = mono_bitset_mem_new (mem, new_size, 0);
+
+		if (old_size)
+			memcpy (&new_ref_slots->data, &td->ref_slots->data, old_size / 8);
+		td->ref_slots = new_ref_slots;
+	}
+
+	MonoType *type = td->vars [var].type;
+	if (td->vars [var].mt == MINT_TYPE_VT) {
+		MonoClass *klass = mono_class_from_mono_type_internal (type);
+		interp_mark_ref_slots_for_vt (td, td->vars [var].offset, klass);
+	} else {
+		// Managed pointers in interp are normally MONO_TYPE_I
+		if (mini_type_is_reference (type) || type->type == MONO_TYPE_I || type->type == MONO_TYPE_U || m_type_is_byref (type)) {
+			int index = td->vars [var].offset / sizeof (gpointer);
+			mono_bitset_set_fast (td->ref_slots, index);
+			if (td->verbose_level)
+				g_print ("Stack ref slot at off %d for var %d\n", index * sizeof (gpointer), var);
+		}
+	}
+}
+
 static int
-get_local_offset (TransformData *td, int local)
+get_var_offset (TransformData *td, int var)
 {
-	if (td->vars [local].offset != -1)
-		return td->vars [local].offset;
+	if (td->vars [var].offset != -1)
+		return td->vars [var].offset;
 
 	// FIXME Some vars might end up with unitialized offset because they are not declared at all in the code.
 	// This can happen if the bblock declaring the var gets removed, while other unreachable bblocks, that access
@@ -8336,10 +8612,11 @@ get_local_offset (TransformData *td, int local)
 	// If we use the optimized offset allocator, all locals should have had their offsets already allocated
 	g_assert (!td->optimized);
 	// The only remaining locals to allocate are the ones from the execution stack
-	g_assert (td->vars [local].execution_stack);
+	g_assert (td->vars [var].execution_stack);
 
-	td->vars [local].offset = td->total_locals_size + td->vars [local].stack_offset;
-	return td->vars [local].offset;
+	td->vars [var].offset = td->total_locals_size + td->vars [var].stack_offset;
+	interp_mark_ref_slots_for_var (td, var);
+	return td->vars [var].offset;
 }
 
 static guint16*
@@ -8360,13 +8637,13 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
 		g_array_append_val (td->line_numbers, lne);
 	}
 
-	if (opcode == MINT_NOP || opcode == MINT_DEF || opcode == MINT_DUMMY_USE)
+	if (MINT_IS_EMIT_NOP (opcode))
 		return ip;
 
 	*ip++ = opcode;
 	if (opcode == MINT_SWITCH) {
 		int labels = READ32 (&ins->data [0]);
-		*ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [0]));
+		*ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->sregs [0]));
 		// Write number of switch labels
 		*ip++ = ins->data [0];
 		*ip++ = ins->data [1];
@@ -8394,7 +8671,7 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
 		const int br_offset = GPTRDIFF_TO_INT (start_ip - td->new_code);
 		gboolean has_imm = opcode >= MINT_BEQ_I4_IMM_SP && opcode <= MINT_BLT_UN_I8_IMM_SP;
 		for (int i = 0; i < mono_interp_op_sregs [opcode]; i++)
-			*ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [i]));
+			*ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->sregs [i]));
 		if (has_imm)
 			*ip++ = ins->data [0];
 
@@ -8413,9 +8690,21 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
 			ip--;
 		} else {
 			// If the estimate offset is short, then surely the real offset is short
-			gboolean is_short = interp_is_short_offset (br_offset, ins->info.target_bb->native_offset_estimate);
+			// otherwise we conservatively have to use long branch opcodes
+			int cur_estimation_error = td->cbb->native_offset_estimate - td->cbb->native_offset;
+			int target_bb_estimated_offset = ins->info.target_bb->native_offset_estimate - cur_estimation_error;
+			gboolean is_short = interp_is_short_offset (br_offset, target_bb_estimated_offset);
 			if (is_short)
 				*start_ip = GINT_TO_OPCODE (get_short_brop (opcode));
+			else if (MINT_IS_SUPER_BRANCH (opcode)) {
+				g_printf (
+					"long superbranch detected with opcode %d (%s) in method %s.%s\n",
+					opcode, mono_interp_opname (opcode),
+					m_class_get_name (td->method->klass), td->method->name
+				);
+				// FIXME missing handling for long branch
+				g_assert (FALSE);
+			}
 
 			// We don't know the in_offset of the target, add a reloc
 			Reloc *reloc = (Reloc*)mono_mempool_alloc0 (td->mempool, sizeof (Reloc));
@@ -8461,9 +8750,18 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
 		guint16 foff = ins->data [0];
 		guint16 mt = ins->data [1];
 		guint16 fsize = ins->data [2];
+		ip--;
+
+		if (opcode == MINT_MOV_DST_OFF && get_var_offset (td, ins->dreg) != get_var_offset (td, ins->sregs [1])) {
+			// We are no longer storing a field into the same valuetype. Copy also the whole vt.
+			*ip++ = MINT_MOV_VT;
+			*ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->dreg));
+			*ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->sregs [1]));
+			*ip++ = GINT_TO_UINT16 (td->vars [ins->dreg].size);
+		}
 
-		int dest_off = get_local_offset (td, ins->dreg);
-		int src_off = get_local_offset (td, ins->sregs [0]);
+		int dest_off = get_var_offset (td, ins->dreg);
+		int src_off = get_var_offset (td, ins->sregs [0]);
 		if (opcode == MINT_MOV_SRC_OFF)
 			src_off += foff;
 		else
@@ -8487,8 +8785,7 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
 				}
 			}
 		}
-		// Replace MINT_MOV_OFF with the real instruction
-		ip [-1] = opcode;
+		*ip++ = opcode;
 		*ip++ = GINT_TO_UINT16 (dest_off);
 		*ip++ = GINT_TO_UINT16 (src_off);
 		if (opcode == MINT_MOV_VT)
@@ -8522,17 +8819,54 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
 		// actually vars. Resolve their offset
 		int num_vars = mono_interp_oplen [opcode] - 1;
 		for (int i = 0; i < num_vars; i++)
-			*ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->data [i]));
+			*ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->data [i]));
 	} else if (opcode == MINT_MOV_STACK_UNOPT) {
 		g_assert (!td->optimized);
 		// ins->data [0] represents the stack offset of the call args (within the execution stack)
 		*ip++ = GINT_TO_UINT16 (td->param_area_offset + ins->data [0]);
 		*ip++ = GINT_TO_UINT16 (ins->data [1]);
 		*ip++ = GINT_TO_UINT16 (ins->data [2]);
+	} else if (opcode == MINT_INTRINS_MARVIN_BLOCK) {
+		// Generated only in unoptimized code
+		int var0 = ins->sregs [0];
+		int var1 = ins->sregs [1];
+		g_assert (var0 == ins->data [0]);
+		g_assert (var1 == ins->data [1]);
+
+		*ip++ = GINT_TO_UINT16 (get_var_offset (td, var0));
+		*ip++ = GINT_TO_UINT16 (get_var_offset (td, var1));
+		*ip++ = GINT_TO_UINT16 (get_var_offset (td, var0));
+		*ip++ = GINT_TO_UINT16 (get_var_offset (td, var1));
+	} else if (opcode == MINT_INTRINS_MARVIN_BLOCK_SSA1) {
+		int var0 = ins->sregs [0];
+		int var1 = ins->sregs [1];
+		g_assert (ins->next->opcode == MINT_INTRINS_MARVIN_BLOCK_SSA2);
+		g_assert (var0 == ins->next->sregs [0]);
+		g_assert (var1 == ins->next->sregs [1]);
+		int dvar0 = ins->dreg;
+		int dvar1 = ins->next->dreg;
+		ip [-1] = MINT_INTRINS_MARVIN_BLOCK;
+		*ip++ = GINT_TO_UINT16 (get_var_offset (td, var0));
+		*ip++ = GINT_TO_UINT16 (get_var_offset (td, var1));
+		*ip++ = GINT_TO_UINT16 (get_var_offset (td, dvar0));
+		*ip++ = GINT_TO_UINT16 (get_var_offset (td, dvar1));
+
+		ins->next->opcode = MINT_NOP;
+		InterpInst *next = interp_next_ins (ins);
+		// We ensure that next->sregs [0] is not used again, it will no longer be set by intrinsic
+		if (next->opcode == MINT_MOV_4 && td->var_values && td->var_values [next->sregs [0]].ref_count == 1) {
+			if (next->sregs [0] == dvar0) {
+				ip [-2] = GINT_TO_UINT16 (get_var_offset (td, next->dreg));
+				next->opcode = MINT_NOP;
+			} else if (next->sregs [0] == dvar1) {
+				ip [-1] = GINT_TO_UINT16 (get_var_offset (td, next->dreg));
+				next->opcode = MINT_NOP;
+			}
+		}
 	} else {
 opcode_emit:
 		if (mono_interp_op_dregs [opcode])
-			*ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->dreg));
+			*ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->dreg));
 
 		if (mono_interp_op_sregs [opcode]) {
 			for (int i = 0; i < mono_interp_op_sregs [opcode]; i++) {
@@ -8540,12 +8874,12 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
 					int offset = td->param_area_offset + ins->info.call_info->call_offset;
 					*ip++ = GINT_TO_UINT16 (offset);
 				} else {
-					*ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [i]));
+					*ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->sregs [i]));
 				}
 			}
 		} else if (opcode == MINT_LDLOCA_S) {
 			// This opcode receives a local but it is not viewed as a sreg since we don't load the value
-			*ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [0]));
+			*ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->sregs [0]));
 		}
 
 		int left = interp_get_ins_length (ins) - GPTRDIFF_TO_INT(ip - start_ip);
@@ -8579,11 +8913,6 @@ generate_compacted_code (InterpMethod *rtm, TransformData *td)
 	int patchpoint_data_index = 0;
 	td->relocs = g_ptr_array_new ();
 	InterpBasicBlock *bb;
-#if HOST_BROWSER
-	#define BACKWARD_BRANCH_OFFSETS_SIZE 64
-	unsigned int backward_branch_offsets_count = 0;
-	guint16 backward_branch_offsets[BACKWARD_BRANCH_OFFSETS_SIZE] = { 0 };
-#endif
 
 	// This iteration could be avoided at the cost of less precise size result, following
 	// super instruction pass
@@ -8601,18 +8930,12 @@ generate_compacted_code (InterpMethod *rtm, TransformData *td)
 	for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
 		InterpInst *ins = bb->first_ins;
 		bb->native_offset = GPTRDIFF_TO_INT (ip - td->new_code);
+		g_assert (bb->native_offset <= bb->native_offset_estimate);
 		td->cbb = bb;
 
-#if HOST_BROWSER
-		if (bb->backwards_branch_target && rtm->contains_traces) {
-			if (backward_branch_offsets_count < BACKWARD_BRANCH_OFFSETS_SIZE)
-				backward_branch_offsets[backward_branch_offsets_count++] = ip - td->new_code;
-		}
-#endif
-
 		if (bb->patchpoint_data)
 			patchpoint_data_index = add_patchpoint_data (td, patchpoint_data_index, bb->native_offset, bb->index);
-		if (bb->emit_patchpoint) {
+		if (!td->optimized && bb->patchpoint_bb) {
 			// Add patchpoint in unoptimized method
 			*ip++ = MINT_TIER_PATCHPOINT;
 			*ip++ = (guint16)bb->index;
@@ -8622,17 +8945,6 @@ generate_compacted_code (InterpMethod *rtm, TransformData *td)
 			if (ins->opcode == MINT_TIER_PATCHPOINT_DATA) {
 				int native_offset = (int)(ip - td->new_code);
 				patchpoint_data_index = add_patchpoint_data (td, patchpoint_data_index, native_offset, -ins->data [0]);
-#if HOST_BROWSER
-			} else if (rtm->contains_traces && (
-				(ins->opcode == MINT_CALL_HANDLER_S) || (ins->opcode == MINT_CALL_HANDLER)
-			)) {
-				// While this formally isn't a backward branch target, we want to record
-				//  the offset of its following instruction so that the jiterpreter knows
-				//  to generate the necessary dispatch code to enable branching back to it.
-				ip = emit_compacted_instruction (td, ip, ins);
-				if (backward_branch_offsets_count < BACKWARD_BRANCH_OFFSETS_SIZE)
-					backward_branch_offsets[backward_branch_offsets_count++] = ip - td->new_code;
-#endif
 			} else {
 				ip = emit_compacted_instruction (td, ip, ins);
 			}
@@ -8647,16 +8959,6 @@ generate_compacted_code (InterpMethod *rtm, TransformData *td)
 	handle_relocations (td);
 
 	g_ptr_array_free (td->relocs, TRUE);
-
-#if HOST_BROWSER
-	if (backward_branch_offsets_count > 0) {
-		rtm->backward_branch_offsets = imethod_alloc0 (td, backward_branch_offsets_count * sizeof(guint16));
-		rtm->backward_branch_offsets_count = backward_branch_offsets_count;
-		memcpy(rtm->backward_branch_offsets, backward_branch_offsets, backward_branch_offsets_count * sizeof(guint16));
-	}
-
-	#undef BACKWARD_BRANCH_OFFSETS_SIZE
-#endif
 }
 
 /*
@@ -8703,6 +9005,40 @@ interp_fix_localloc_ret (TransformData *td)
 	}
 }
 
+static void
+interp_squash_initlocals (TransformData *td)
+{
+	InterpInst *last_initlocal = NULL;
+	int last_start = 0, last_end = 0;
+
+	for (InterpInst *ins = td->entry_bb->first_ins; ins != NULL; ins = ins->next) {
+		// Once we reach the real method code, we are finished with this pass
+		if (ins->il_offset != -1)
+			break;
+		if (ins->opcode == MINT_INITLOCAL) {
+			if (!last_initlocal) {
+				last_initlocal = ins;
+				last_start = get_var_offset (td, ins->dreg);
+				last_end = last_start + (int)ins->data [0];
+			} else {
+				int new_start = get_var_offset (td, ins->dreg);
+				// We allow a maximum of 64 bytes of redundant memset when squashing initlocals
+				if (new_start >= last_end && new_start <= (last_end + 64)) {
+					last_initlocal->opcode = MINT_INITLOCALS;
+					last_initlocal->data [0] = GINT_TO_UINT16 (last_start);
+					last_end = new_start + ins->data [0];
+					last_initlocal->data [1] = GINT_TO_UINT16 (last_end - last_start);
+					interp_clear_ins (ins);
+				} else {
+					last_initlocal = ins;
+					last_start = get_var_offset (td, ins->dreg);
+					last_end = last_start + ins->data [0];
+				}
+			}
+		}
+	}
+}
+
 static int
 get_native_offset (TransformData *td, int il_offset)
 {
@@ -8722,9 +9058,10 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
 {
 	TransformData transform_data;
 	TransformData *td;
-	gboolean retry_compilation = FALSE;
 	static gboolean verbose_method_inited;
 	static char* verbose_method_name;
+	gboolean retry_compilation = FALSE;
+	gboolean retry_with_inlining = FALSE;
 
 	if (!verbose_method_inited) {
 		verbose_method_name = g_getenv ("MONO_VERBOSE_METHOD");
@@ -8762,7 +9099,8 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
 		// Optimizing the method can lead to deadce and better var offset allocation
 		// reducing the likelihood of local space overflow.
 		td->optimized = rtm->optimized = TRUE;
-		td->disable_inlining = TRUE;
+		if (!retry_with_inlining)
+			td->disable_inlining = TRUE;
 	} else {
 		td->optimized = rtm->optimized;
 		td->disable_inlining = !td->optimized;
@@ -8799,7 +9137,8 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
 	td->line_numbers = g_array_new (FALSE, TRUE, sizeof (MonoDebugLineNumberEntry));
 	td->current_il_offset = -1;
 
-	generate_code (td, method, header, generic_context, error);
+	if (!generate_code (td, method, header, generic_context, error))
+		goto exit;
 	goto_if_nok (error, exit);
 
 	// Any newly created instructions will have undefined il_offset
@@ -8812,12 +9151,15 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
 	if (td->has_localloc)
 		interp_fix_localloc_ret (td);
 
-	if (td->verbose_level)
+	if (td->verbose_level) {
+		g_print ("\nUnoptimized IR:\n");
 		mono_interp_print_td_code (td);
+	}
 
 	if (td->optimized) {
-		interp_optimize_code (td);
+		MONO_TIME_TRACK (mono_interp_stats.optimize_time, interp_optimize_code (td));
 		interp_alloc_offsets (td);
+		interp_squash_initlocals (td);
 #if HOST_BROWSER
 		if (mono_interp_opt & INTERP_OPT_JITERPRETER)
 			jiterp_insert_entry_points (rtm, td);
@@ -8826,6 +9168,15 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
 
 	generate_compacted_code (rtm, td);
 
+	if (td->optimized) {
+		// Offset allocator and compacted code generation use computed ref counts
+		// from var values. We have to free this table later here.
+		if (td->var_values != NULL) {
+			g_free (td->var_values);
+			td->var_values = NULL;
+		}
+	}
+
 	if (td->total_locals_size >= G_MAXUINT16) {
 		if (td->disable_inlining && td->optimized) {
 			char *name = mono_method_get_full_name (method);
@@ -8833,17 +9184,18 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
 			g_free (name);
 			mono_error_set_generic_error (error, "System", "InvalidProgramException", "%s", msg);
 			g_free (msg);
-			retry_compilation = FALSE;
+			td->retry_compilation = FALSE;
 			goto exit;
 		} else {
 			// We give the method another chance to compile with inlining disabled and optimization enabled
 			if (td->verbose_level)
 				g_print ("Local space overflow. Retrying compilation\n");
-			retry_compilation = TRUE;
+			td->retry_compilation = TRUE;
+			td->retry_with_inlining = FALSE;
 			goto exit;
 		}
 	} else {
-		retry_compilation = FALSE;
+		td->retry_compilation = FALSE;
 	}
 
 	if (td->verbose_level) {
@@ -8893,6 +9245,21 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
 	mono_interp_register_imethod_data_items (rtm->data_items, td->imethod_items);
 	rtm->patchpoint_data = td->patchpoint_data;
 
+	if (td->ref_slots) {
+		gpointer ref_slots_mem = mono_mem_manager_alloc0 (td->mem_manager, mono_bitset_alloc_size (rtm->alloca_size / sizeof (gpointer), 0));
+		rtm->ref_slots = mono_bitset_mem_new (ref_slots_mem, rtm->alloca_size / sizeof (gpointer), 0);
+		gsize copy_size = rtm->ref_slots->size;
+		if (td->ref_slots->size < copy_size)
+			copy_size = td->ref_slots->size;
+		memcpy (&rtm->ref_slots->data, &td->ref_slots->data, copy_size / 8);
+		if (!td->optimized) {
+			// Unoptimized code can have some stack slot moving patterns as part of calls.
+			// Just conservatively mark all these slots as potentially containing refs.
+			for (guint32 offset = rtm->locals_size; offset < rtm->alloca_size; offset += sizeof (gpointer))
+				mono_bitset_set (rtm->ref_slots, offset / sizeof (gpointer));
+		}
+	}
+
 	/* Save debug info */
 	interp_save_debug_info (rtm, header, td, td->line_numbers);
 
@@ -8945,8 +9312,11 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
 	g_slist_free (td->imethod_items);
 	mono_mempool_destroy (td->mempool);
 	mono_interp_pgo_generate_end ();
-	if (retry_compilation)
+	if (td->retry_compilation) {
+		retry_compilation = TRUE;
+		retry_with_inlining = td->retry_with_inlining;
 		goto retry;
+	}
 }
 
 gboolean
diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h
index 1949698ca4a7..1e6185f8089c 100644
--- a/src/mono/mono/mini/interp/transform.h
+++ b/src/mono/mono/mini/interp/transform.h
@@ -14,6 +14,9 @@
 #define INTERP_INST_FLAG_ACTIVE_CALL 64
 // This instruction is protected by a clause
 #define INTERP_INST_FLAG_PROTECTED_NEWOBJ 128
+// This instruction bumps the liveness index. Enables liveness checks as new instructions
+// are added in the code, since new instructions won't have this flag set.
+#define INTERP_INST_FLAG_LIVENESS_MARKER 256
 
 typedef struct _InterpInst InterpInst;
 typedef struct _InterpBasicBlock InterpBasicBlock;
@@ -41,11 +44,15 @@ typedef struct
 #define VAR_VALUE_I8 3
 #define VAR_VALUE_R4 4
 #define VAR_VALUE_NON_NULL 5
+#define VAR_VALUE_COUNT 6
 
-// LocalValue contains data to construct an InterpInst that is equivalent with the contents
-// of the stack slot / local / argument.
 typedef struct {
-	// Indicates the type of the stored information. It can be another local or a constant
+	guint32 bb_dfs_index;
+	guint32 ins_index;
+} InterpLivenessPosition;
+
+typedef struct {
+	// Indicates the type of the stored information. It can be another var or a constant
 	int type;
 	// Holds the local index or the actual constant value
 	union {
@@ -55,9 +62,11 @@ typedef struct {
 		float f;
 	};
 	// The instruction that writes this local.
-	InterpInst *ins;
-	int def_index;
-	// ref count for ins->dreg
+	InterpInst *def;
+	// Liveness marker of the definition
+	InterpLivenessPosition liveness;
+	// The number of times this var is referenced. After optimizations
+	// this can become 0, in which case we can clear the def instruction.
 	int ref_count;
 } InterpVarValue;
 
@@ -78,6 +87,8 @@ struct _InterpInst {
 		InterpBasicBlock *target_bb;
 		InterpBasicBlock **target_bb_table;
 		InterpCallInfo *call_info;
+		int *args; // for variable number of args, used only for phi
+		MonoBitSet *dead_phi_vars; // only for MINT_DEAD_PHI
 	} info;
 	// Variable data immediately following the dreg/sreg information. This is represented exactly
 	// in the final code stream as in this array.
@@ -98,6 +109,21 @@ struct _InterpBasicBlock {
 	gint16 out_count;
 	InterpBasicBlock **out_bb;
 
+	/* Index into td->bblocks */
+	int dfs_index;
+
+	/* Dominance frontier for this bblock */
+	MonoBitSet *dfrontier;
+
+	/* List of bblocks that are immediately dominated by this bblock */
+	GSList *dominated;
+
+	/* Live variable analysis, for vars in locals_ext */
+	MonoBitSet *gen_set;
+	MonoBitSet *kill_set;
+	MonoBitSet *live_in_set;
+	MonoBitSet *live_out_set;
+
 	/* The real native offset of this bblock, computed when emitting the instructions in the code stream */
 	int native_offset;
 	/*
@@ -119,13 +145,15 @@ struct _InterpBasicBlock {
 	int index;
 	int jump_targets;
 
+	InterpBasicBlock *try_bblock;
+
 	// This will hold a list of last sequence points of incoming basic blocks
 	SeqPoint **pred_seq_points;
 	guint num_pred_seq_points;
 
 	guint reachable : 1;
 	// This block has special semantics and it shouldn't be optimized away
-	guint eh_block : 1;
+	guint preserve : 1;
 	guint dead: 1;
 	// This bblock is detectead early as being dead, we don't inline into it
 	guint no_inlining: 1;
@@ -133,7 +161,7 @@ struct _InterpBasicBlock {
 	// InterpMethod. In the unoptimized method we will map from native offset to the bb_index while in the
 	// optimized method we will map the bb_index to the corresponding native offset.
 	guint patchpoint_data: 1;
-	guint emit_patchpoint: 1;
+	guint patchpoint_bb: 1;
 	// used by jiterpreter
 	guint backwards_branch_target: 1;
 	guint contains_call_instruction: 1;
@@ -174,6 +202,8 @@ typedef struct {
 	int indirects;
 	int offset;
 	int size;
+	int ext_index;
+	GSList *declare_bbs;
 	union {
 		// live_start and live_end are used by the offset allocator for optimized code
 		int live_start;
@@ -187,21 +217,50 @@ typedef struct {
 		// If var is INTERP_LOCAL_FLAG_CALL_ARGS, this is the call instruction using it.
 		// Only used during var offset allocator
 		InterpInst *call;
-		// For local vars, this represents the instruction declaring it.
-		// Only used during super instruction pass.
-		InterpInst *def;
 	};
 
-	guint dead : 1;
 	guint execution_stack : 1;
 	guint call_args : 1;
 	guint global : 1;
 	guint no_call_args : 1;
-	guint unknown_use : 1;
-	guint local_only : 1;
 	guint simd : 1; // We use this flag to avoid addition of align field in InterpVar, for now
+	guint eh_var : 1; // This var is used inside a clause handler. It will not be in ssa form.
+	guint no_ssa : 1; // Var is not in ssa form, not subject to all optimizations
+	guint has_indirects : 1; // Var had ldloca applied to it, not subject to optimizations
+	guint il_global : 1; // Args and IL locals
+	guint renamed_ssa_fixed : 1; // If true, ext_index points to InterpRenamedVar, otherwise to InterpRenamableVar
+	guint def_arg : 1; // Var is a result of MINT_DEF_ARG. This var will have to be renamed back to the original arg var
 } InterpVar;
 
+typedef struct {
+	int var_index;
+	GSList *ssa_stack;
+	// This liveness is bblock only. It is used during cprop to determine whether we
+	// can move the definition of a renamed fixed var earlier (if there are no conflicts with
+	// other renamed vars from the same var)
+	InterpLivenessPosition last_use_liveness;
+
+	// Var that is global and might take part in phi opcodes
+	guint ssa_global : 1;
+	// IL locals/args. Vars included in phi opcodes. All renamed vars are allocated
+	// to the same offset. Optimizations need to ensure there is no overlapping liveness
+	guint ssa_fixed : 1;
+} InterpRenamableVar;
+
+// In addition to InterpRenamableVar information, this stores liveness information that enables us
+// to ensure that the liveness of the corresponding var is not overlapping with the other renamed vars,
+// after optimization.
+typedef struct {
+	int var_index;
+	int renamable_var_ext_index;
+	// Bit set of bblocks where the renamed var is live at the bb end
+	// This means that within these bblocks we can freely increase the var liveness
+	MonoBitSet *live_out_bblocks;
+	// This is a list of InterpLivenessPosition*, that indicates that in bblock with
+	// index bb_index, the var can have its liveness extended to at most inst_index
+	GSList *live_limit_bblocks;
+} InterpRenamedFixedVar;
+
 typedef struct
 {
 	MonoMethod *method;
@@ -235,6 +294,19 @@ typedef struct
 	unsigned int vars_size;
 	unsigned int vars_capacity;
 
+	// Additional information for vars that are renamable
+	InterpRenamableVar *renamable_vars;
+	unsigned int renamable_vars_size;
+	unsigned int renamable_vars_capacity;
+
+	// Newly created, renamed vars of fixed vars. We compute liveness on this subset
+	// of vars so we ensure we don't have conflicting liveness.
+	unsigned int renamed_fixed_vars_size;
+	unsigned int renamed_fixed_vars_capacity;
+	InterpRenamedFixedVar *renamed_fixed_vars;
+
+	InterpVarValue *var_values;
+
 	int n_data_items;
 	int max_data_items;
 	void **data_items;
@@ -250,8 +322,13 @@ typedef struct
 	GPtrArray *seq_points;
 	InterpBasicBlock **offset_to_bb;
 	InterpBasicBlock *entry_bb, *cbb;
+	InterpBasicBlock **bblocks; // ordering of bblocks in reverse postorder dfs
+	int bblocks_count_no_eh;
+	int bblocks_count_eh;
+	InterpBasicBlock **idoms; // immediate dominator for each bblock, index from reverse postorder dfs
 	int bb_count;
 	MonoMemPool     *mempool;
+	MonoMemPool     *opt_mempool;
 	MonoMemoryManager *mem_manager;
 	GList *basic_blocks;
 	GPtrArray *relocs;
@@ -263,6 +340,8 @@ typedef struct
 	int inline_depth;
 	int patchpoint_data_n;
 	int *patchpoint_data;
+	// This marks each stack slot offset that might contain refs throughout the execution of this method
+	MonoBitSet *ref_slots;
 	guint has_localloc : 1;
 	// If method compilation fails due to certain limits being exceeded, we disable inlining
 	// and retry compilation.
@@ -273,6 +352,11 @@ typedef struct
 	guint optimized : 1;
 	guint has_invalid_code : 1;
 	guint has_inlined_one_call : 1;
+	guint need_optimization_retry : 1;
+	guint disable_ssa : 1;
+	guint eh_vars_computed : 1;
+	guint retry_compilation : 1;
+	guint retry_with_inlining : 1;
 } TransformData;
 
 #define STACK_TYPE_I4 0
@@ -449,12 +533,20 @@ interp_alloc_global_var_offset (TransformData *td, int var);
 int
 interp_create_var (TransformData *td, MonoType *type);
 
+int
+interp_make_var_renamable (TransformData *td, int var);
+
+int
+interp_create_renamed_fixed_var (TransformData *td, int var_index, int renamable_var_index);
+
 void
 interp_foreach_ins_var (TransformData *td, InterpInst *ins, gpointer data, void (*callback)(TransformData*, int*, gpointer));
 
 void
 interp_foreach_ins_svar (TransformData *td, InterpInst *ins, gpointer data, void (*callback)(TransformData*, int*, gpointer));
 
+void
+interp_mark_ref_slots_for_var (TransformData *td, int var);
 
 /* Forward definitions for simd methods */
 static gboolean
diff --git a/src/mono/mono/mini/intrinsics.c b/src/mono/mono/mini/intrinsics.c
index 178da508ce2e..4835a84e2c91 100644
--- a/src/mono/mono/mini/intrinsics.c
+++ b/src/mono/mono/mini/intrinsics.c
@@ -82,11 +82,9 @@ mini_emit_inst_for_ctor (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignat
 	if (!(cfg->opt & MONO_OPT_INTRINS))
 		return ins;
 
-	if (cfg->opt & MONO_OPT_SIMD) {
-		ins = mono_emit_simd_intrinsics (cfg, cmethod, fsig, args);
-		if (ins)
-			return ins;
-	}
+	ins = mono_emit_simd_intrinsics (cfg, cmethod, fsig, args);
+	if (ins)
+		return ins;
 
 	ins = mono_emit_common_intrinsics (cfg, cmethod, fsig, args);
 	if (ins)
@@ -2093,11 +2091,9 @@ mini_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign
 		}
 	}
 
-	if (cfg->opt & MONO_OPT_SIMD) {
-		ins = mono_emit_simd_intrinsics (cfg, cmethod, fsig, args);
-		if (ins)
-			return ins;
-	}
+	ins = mono_emit_simd_intrinsics (cfg, cmethod, fsig, args);
+	if (ins)
+		return ins;
 
 	ins = mono_emit_common_intrinsics (cfg, cmethod, fsig, args);
 	if (ins)
diff --git a/src/mono/mono/mini/llvm-intrinsics.h b/src/mono/mono/mini/llvm-intrinsics.h
index 100b00d3d1ed..d4c7deea9ee8 100644
--- a/src/mono/mono/mini/llvm-intrinsics.h
+++ b/src/mono/mono/mini/llvm-intrinsics.h
@@ -26,6 +26,7 @@
 #define WidenAcross INTRIN_kind_widen_across
 #define Across INTRIN_kind_across
 #define Arm64DotProd INTRIN_kind_arm64_dot_prod
+#define AddPointer INTRIN_kind_add_pointer
 #if !defined(Generic)
 #define Generic
 #endif
@@ -361,6 +362,48 @@ INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_SADDV, aarch64_neon_saddv, Arm64, Across,
 INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_UADDV, aarch64_neon_uaddv, Arm64, Across, V64 | V128 | I1 | I2 | I4 | I8)
 INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_FADDV, aarch64_neon_faddv, Arm64, Across, V64 | V128 | R4 | R8)
 
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD1X2_V64, aarch64_neon_ld1x2, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD1X3_V64, aarch64_neon_ld1x3, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD1X4_V64, aarch64_neon_ld1x4, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD1X2_V128, aarch64_neon_ld1x2, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD1X3_V128, aarch64_neon_ld1x3, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD1X4_V128, aarch64_neon_ld1x4, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD2_V64, aarch64_neon_ld2, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD3_V64, aarch64_neon_ld3, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD4_V64, aarch64_neon_ld4, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD2_V128, aarch64_neon_ld2, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD3_V128, aarch64_neon_ld3, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD4_V128, aarch64_neon_ld4, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD2R_V64, aarch64_neon_ld2r, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD3R_V64, aarch64_neon_ld3r, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD4R_V64, aarch64_neon_ld4r, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD2R_V128, aarch64_neon_ld2r, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD3R_V128, aarch64_neon_ld3r, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD4R_V128, aarch64_neon_ld4r, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD2LANE_V64, aarch64_neon_ld2lane, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD3LANE_V64, aarch64_neon_ld3lane, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD4LANE_V64, aarch64_neon_ld4lane, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD2LANE_V128, aarch64_neon_ld2lane, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD3LANE_V128, aarch64_neon_ld3lane, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD4LANE_V128, aarch64_neon_ld4lane, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST2LANE_V64, aarch64_neon_st2lane, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST3LANE_V64, aarch64_neon_st3lane, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST4LANE_V64, aarch64_neon_st4lane, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST2LANE_V128, aarch64_neon_st2lane, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST3LANE_V128, aarch64_neon_st3lane, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST4LANE_V128, aarch64_neon_st4lane, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST1X2_V64, aarch64_neon_st1x2, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST1X3_V64, aarch64_neon_st1x3, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST1X4_V64, aarch64_neon_st1x4, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST1X2_V128, aarch64_neon_st1x2, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST1X3_V128, aarch64_neon_st1x3, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST1X4_V128, aarch64_neon_st1x4, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST2_V64, aarch64_neon_st2, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST3_V64, aarch64_neon_st3, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST4_V64, aarch64_neon_st4, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST2_V128, aarch64_neon_st2, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST3_V128, aarch64_neon_st3, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
+INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST4_V128, aarch64_neon_st4, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
 INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_SMAXV, aarch64_neon_smaxv, Arm64, Across, V64 | V128 | I1 | I2 | I4)
 INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_UMAXV, aarch64_neon_umaxv, Arm64, Across, V64 | V128 | I1 | I2 | I4)
 INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_SMINV, aarch64_neon_sminv, Arm64, Across, V64 | V128 | I1 | I2 | I4)
diff --git a/src/mono/mono/mini/method-to-ir.c b/src/mono/mono/mini/method-to-ir.c
index 86c4eb291587..8d569eae92a3 100644
--- a/src/mono/mono/mini/method-to-ir.c
+++ b/src/mono/mono/mini/method-to-ir.c
@@ -2320,16 +2320,13 @@ emit_type_load_failure (MonoCompile* cfg, MonoClass* klass)
 }
 
 static void
-emit_invalid_program_with_msg (MonoCompile *cfg, MonoError *error_msg, MonoMethod *caller, MonoMethod *callee)
+emit_invalid_program_with_msg (MonoCompile *cfg, char *error_msg)
 {
-	g_assert (!is_ok (error_msg));
-
-	char *str = mono_mem_manager_strdup (cfg->mem_manager, mono_error_get_message (error_msg));
 	MonoInst *iargs[1];
 	if (cfg->compile_aot)
-		EMIT_NEW_LDSTRLITCONST (cfg, iargs [0], str);
+		EMIT_NEW_LDSTRLITCONST (cfg, iargs [0], error_msg);
 	else
-		EMIT_NEW_PCONST (cfg, iargs [0], str);
+		EMIT_NEW_PCONST (cfg, iargs [0], error_msg);
 	mono_emit_jit_icall (cfg, mono_throw_invalid_program, iargs);
 }
 
@@ -3416,8 +3413,8 @@ mini_emit_box (MonoCompile *cfg, MonoInst *val, MonoClass *klass, int context_us
 	MonoInst *alloc, *ins;
 
 	if (G_UNLIKELY (m_class_is_byreflike (klass))) {
-		mono_error_set_bad_image (cfg->error, m_class_get_image (cfg->method->klass), "Cannot box IsByRefLike type '%s.%s'", m_class_get_name_space (klass), m_class_get_name (klass));
-		mono_cfg_set_exception (cfg, MONO_EXCEPTION_MONO_ERROR);
+		mono_error_set_invalid_program (cfg->error, "Cannot box IsByRefLike type '%s.%s'", m_class_get_name_space (klass), m_class_get_name (klass));
+		mono_cfg_set_exception (cfg, MONO_EXCEPTION_INVALID_PROGRAM);
 		return NULL;
 	}
 
@@ -3547,23 +3544,11 @@ method_needs_stack_walk (MonoCompile *cfg, MonoMethod *cmethod)
 	}
 
 	/*
-	 * In corelib code, methods which need to do a stack walk declare a StackCrawlMark local and pass it as an
-	 * arguments until it reaches an icall. Its hard to detect which methods do that especially with
-	 * StackCrawlMark.LookForMyCallersCaller, so for now, just hardcode the classes which contain the public
-	 * methods whose caller is needed.
+	 * Methods which do stack walks are marked with [System.Security.DynamicSecurityMethod] in the bcl.
+	 * This check won't work for StackCrawlMark.LookForMyCallersCaller, but thats not currently by the
+	 * stack walk code anyway.
 	 */
-	if (mono_is_corlib_image (m_class_get_image (cmethod->klass))) {
-		const char *cname = m_class_get_name (cmethod->klass);
-		if (!strcmp (cname, "Assembly") ||
-			!strcmp (cname, "AssemblyLoadContext") ||
-			(!strcmp (cname, "Activator"))) {
-			if (!strcmp (cmethod->name, "op_Equality"))
-				return FALSE;
-			return TRUE;
-		}
-	}
-
-	return FALSE;
+	return (cmethod->flags & METHOD_ATTRIBUTE_REQSECOBJ) != 0;
 }
 
 G_GNUC_UNUSED MonoInst*
@@ -4747,11 +4732,11 @@ mini_inline_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *
 }
 
 static gboolean
-aggressive_inline_method (MonoMethod *cmethod)
+aggressive_inline_method (MonoCompile *cfg, MonoMethod *cmethod)
 {
 	gboolean aggressive_inline = m_method_is_aggressive_inlining (cmethod);
 	if (aggressive_inline)
-		aggressive_inline = !mono_simd_unsupported_aggressive_inline_intrinsic_type (cmethod);
+		aggressive_inline = !mono_simd_unsupported_aggressive_inline_intrinsic_type (cfg, cmethod);
 	return aggressive_inline;
 }
 
@@ -4880,7 +4865,7 @@ inline_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig,
 	cfg->disable_inline = prev_disable_inline;
 	cfg->inline_depth --;
 
-	if ((costs >= 0 && costs < 60) || inline_always || (costs >= 0 && aggressive_inline_method (cmethod))) {
+	if ((costs >= 0 && costs < 60) || inline_always || (costs >= 0 && aggressive_inline_method (cfg, cmethod))) {
 		if (cfg->verbose_level > 2)
 			printf ("INLINE END %s -> %s\n", mono_method_full_name (cfg->method, TRUE), mono_method_full_name (cmethod, TRUE));
 
@@ -7560,6 +7545,11 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
 					if (cfg->compile_aot)
 						cfg->pinvoke_calli_signatures = g_slist_prepend_mempool (cfg->mempool, cfg->pinvoke_calli_signatures, fsig);
 
+					if (fsig->has_type_parameters) {
+						cfg->prefer_instances = TRUE;
+						GENERIC_SHARING_FAILURE (CEE_CALLI);
+					}
+
 					/* Call the wrapper that will do the GC transition instead */
 					MonoMethod *wrapper = mono_marshal_get_native_func_wrapper_indirect (method->klass, fsig, cfg->compile_aot);
 
@@ -8942,6 +8932,11 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
 			ins->sreg2 = sp [1]->dreg;
 			type_from_op (cfg, ins, sp [0], sp [1]);
 			CHECK_TYPE (ins);
+
+			if (((sp [0]->type == STACK_R4 && sp [1]->type == STACK_R8) ||
+			     (sp [0]->type == STACK_R8 && sp [1]->type == STACK_R4)))
+				add_widen_op (cfg, ins, &sp [0], &sp [1]);
+
 			ins->dreg = alloc_dreg ((cfg), (MonoStackType)(ins)->type);
 
 			/* Use the immediate opcodes if possible */
@@ -9284,7 +9279,7 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
 
 			mono_save_token_info (cfg, image, token, cmethod);
 
-			if (!mono_class_init_internal (cmethod->klass))
+			if (mono_class_has_failure (cmethod->klass) || !mono_class_init_internal (cmethod->klass))
 				TYPE_LOAD_ERROR (cmethod->klass);
 
 			context_used = mini_method_check_context_used (cfg, cmethod);
@@ -9997,8 +9992,8 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
 		case MONO_CEE_STSFLD: {
 			MonoClassField *field;
 			guint foffset;
-			gboolean is_instance;
 			gpointer addr = NULL;
+			gboolean is_instance;
 			gboolean is_special_static;
 			MonoType *ftype;
 			MonoInst *store_val = NULL;
@@ -10083,6 +10078,7 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
 				is_instance = FALSE;
 			}
 
+
 			context_used = mini_class_check_context_used (cfg, klass);
 
 			if (il_op == MONO_CEE_LDSFLD) {
@@ -10565,7 +10561,6 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
 
 			context_used = mini_class_check_context_used (cfg, klass);
 
-#ifndef TARGET_S390X
 			if (sp [0]->type == STACK_I8 && TARGET_SIZEOF_VOID_P == 4) {
 				MONO_INST_NEW (cfg, ins, OP_LCONV_TO_OVF_U4);
 				ins->sreg1 = sp [0]->dreg;
@@ -10574,7 +10569,8 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
 				MONO_ADD_INS (cfg->cbb, ins);
 				*sp = mono_decompose_opcode (cfg, ins);
 			}
-#else
+
+#if defined(TARGET_S390X) || defined(TARGET_POWERPC64)
 			/* The array allocator expects a 64-bit input, and we cannot rely
 			   on the high bits of a 32-bit result, so we have to extend.  */
 			if (sp [0]->type == STACK_I4 && TARGET_SIZEOF_VOID_P == 8) {
@@ -11847,7 +11843,8 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
 					/* if we couldn't create a wrapper because cmethod isn't supposed to have an
 					UnmanagedCallersOnly attribute, follow CoreCLR behavior and throw when the
 					method with the ldftn is executing, not when it is being compiled. */
-					emit_invalid_program_with_msg (cfg, wrapper_error, method, cmethod);
+					char *err_msg = mono_mem_manager_strdup (cfg->mem_manager, mono_error_get_message (wrapper_error));
+					emit_invalid_program_with_msg (cfg, err_msg);
 					mono_error_cleanup (wrapper_error);
 					EMIT_NEW_PCONST (cfg, ins, NULL);
 					*sp++ = ins;
diff --git a/src/mono/mono/mini/mini-generic-sharing.c b/src/mono/mono/mini/mini-generic-sharing.c
index 58440dcdc359..90e724bf417b 100644
--- a/src/mono/mono/mini/mini-generic-sharing.c
+++ b/src/mono/mono/mini/mini-generic-sharing.c
@@ -584,12 +584,14 @@ inflate_info (MonoMemoryManager *mem_manager, MonoRuntimeGenericContextInfoTempl
 
 		if (m_class_get_byval_arg (inflated_class)->type == MONO_TYPE_ARRAY ||
 			m_class_get_byval_arg (inflated_class)->type == MONO_TYPE_SZARRAY) {
+			g_assert (!mono_class_has_failure (inflated_class));
 			inflated_method = mono_method_search_in_array_class (inflated_class,
 				method->name, method->signature);
 		} else {
 			inflated_method = mono_class_inflate_generic_method_checked (method, context, error);
 			g_assert (is_ok (error)); /* FIXME don't swallow the error */
 		}
+		g_assert (inflated_method);
 		mono_class_init_internal (inflated_method->klass);
 		g_assert (inflated_method->klass == inflated_class);
 		return inflated_method;
@@ -648,12 +650,14 @@ inflate_info (MonoMemoryManager *mem_manager, MonoRuntimeGenericContextInfoTempl
 
 		if (m_class_get_byval_arg (inflated_class)->type == MONO_TYPE_ARRAY ||
 			m_class_get_byval_arg (inflated_class)->type == MONO_TYPE_SZARRAY) {
+			g_assert (!mono_class_has_failure (inflated_class));
 			inflated_method = mono_method_search_in_array_class (inflated_class,
 				method->name, method->signature);
 		} else {
 			inflated_method = mono_class_inflate_generic_method_checked (method, context, error);
 			g_assert (is_ok (error)); /* FIXME don't swallow the error */
 		}
+		g_assert (inflated_method);
 		mono_class_init_internal (inflated_method->klass);
 		g_assert (inflated_method->klass == inflated_class);
 
@@ -1308,6 +1312,7 @@ get_wrapper_shared_vtype (MonoType *t)
 
 	MonoClass *tuple_inst = mono_class_inflate_generic_class_checked (tuple_class, &ctx, error);
 	mono_error_assert_ok (error);
+	g_assert (tuple_inst);
 
 	//printf ("T: %s\n", mono_class_full_name (tuple_inst));
 
@@ -1407,6 +1412,7 @@ get_wrapper_shared_type_full (MonoType *t, gboolean is_field)
 		}
 		klass = mono_class_inflate_generic_class_checked (mono_class_get_generic_class (klass)->container_class, &ctx, error);
 		mono_error_assert_ok (error); /* FIXME don't swallow the error */
+		g_assert (klass);
 
 		t = m_class_get_byval_arg (klass);
 		MonoType *shared_type = get_wrapper_shared_vtype (t);
@@ -4345,6 +4351,7 @@ get_shared_type (MonoType *t, MonoType *type)
 
 		k = mono_class_inflate_generic_class_checked (gclass->container_class, &context, error);
 		mono_error_assert_ok (error); /* FIXME don't swallow the error */
+		g_assert (k);
 
 		return mini_get_shared_gparam (t, m_class_get_byval_arg (k));
 	} else if (MONO_TYPE_ISSTRUCT (type)) {
diff --git a/src/mono/mono/mini/mini-llvm-cpp.cpp b/src/mono/mono/mini/mini-llvm-cpp.cpp
index 21bd0119c046..beb9a8b8a09a 100644
--- a/src/mono/mono/mini/mini-llvm-cpp.cpp
+++ b/src/mono/mono/mini/mini-llvm-cpp.cpp
@@ -726,7 +726,7 @@ mono_llvm_register_intrinsic (LLVMModuleRef module, IntrinsicId id, LLVMTypeRef
 }
 
 /*
- * mono_llvm_register_intrinsic:
+ * mono_llvm_register_overloaded_intrinsic:
  *
  *   Register an overloaded LLVM intrinsic identified by ID using the supplied types.
  */
diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c
index 9a01d6248f26..f7e59ef5acbb 100644
--- a/src/mono/mono/mini/mini-llvm.c
+++ b/src/mono/mono/mini/mini-llvm.c
@@ -399,7 +399,7 @@ static const llvm_ovr_tag_t intrin_arm64_ovr [] = {
 	#define INTRINS_OVR_2_ARG(sym, ...) 0,
 	#define INTRINS_OVR_3_ARG(sym, ...) 0,
 	#define INTRINS_OVR_TAG(sym, _, arch, spec) spec,
-	#define INTRINS_OVR_TAG_KIND(sym, _, kind, arch, spec) spec,
+	#define INTRINS_OVR_TAG_KIND(sym, _, arch, kind, spec) spec,
 	#include "llvm-intrinsics.h"
 };
 
@@ -409,6 +409,7 @@ enum {
 	INTRIN_kind_widen_across,
 	INTRIN_kind_across,
 	INTRIN_kind_arm64_dot_prod,
+	INTRIN_kind_add_pointer,
 };
 
 static const uint8_t intrin_kind [] = {
@@ -658,6 +659,8 @@ get_vtype_size_align (MonoType *t)
 	return ret;
 }
 
+static LLVMTypeRef simd_valuetuple_to_llvm_type (EmitContext *ctx, MonoClass *klass);
+
 /*
  * simd_class_to_llvm_type:
  *
@@ -666,25 +669,28 @@ get_vtype_size_align (MonoType *t)
 static LLVMTypeRef
 simd_class_to_llvm_type (EmitContext *ctx, MonoClass *klass)
 {
-	guint32 nelems;
-	MonoTypeEnum type = mini_get_simd_type_info (klass, &nelems);
-
-	return LLVMVectorType (primitive_type_to_llvm_type (type), nelems);
+	const char *klass_name = m_class_get_name (klass);
+	if (strstr (klass_name, "ValueTuple") != NULL) {
+		return simd_valuetuple_to_llvm_type (ctx, klass);
+	} else {
+		guint32 nelems;
+		MonoTypeEnum type = mini_get_simd_type_info (klass, &nelems);
+		return LLVMVectorType (primitive_type_to_llvm_type (type), nelems);
+	}
+	g_assert_not_reached ();
 }
 
 static LLVMTypeRef
 simd_valuetuple_to_llvm_type (EmitContext *ctx, MonoClass *klass)
 {
 	const char *klass_name = m_class_get_name (klass);
-	if (!strcmp (klass_name, "ValueTuple`2")) {
-		MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0];
-		if (etype->type != MONO_TYPE_GENERICINST)
-			g_assert_not_reached ();
-		MonoClass *eklass = etype->data.generic_class->cached_class;
-		LLVMTypeRef ltype = simd_class_to_llvm_type (ctx, eklass);
-		return LLVMArrayType (ltype, 2);
-	}
-	g_assert_not_reached ();
+	g_assert (strstr (klass_name, "ValueTuple") != NULL);
+	MonoGenericInst *class_inst = mono_class_get_generic_class (klass)->context.class_inst;
+	MonoType *etype = class_inst->type_argv [0];
+	g_assert (etype->type == MONO_TYPE_GENERICINST);
+	MonoClass *eklass = etype->data.generic_class->cached_class;
+	LLVMTypeRef ltype = simd_class_to_llvm_type (ctx, eklass);
+	return LLVMArrayType (ltype, class_inst->type_argc);
 }
 
 /* Return the 128 bit SIMD type corresponding to the mono type TYPE */
@@ -5467,7 +5473,9 @@ immediate_unroll_begin (
 	LLVMBasicBlockRef continuation = gen_bb (ctx, name);
 	LLVMValueRef switch_ins = LLVMBuildSwitch (ctx->builder, switch_index, default_case, max_cases);
 	LLVMPositionBuilderAtEnd (ctx->builder, continuation);
-	LLVMValueRef phi = LLVMBuildPhi (ctx->builder, return_type, name);
+	LLVMValueRef phi = NULL;
+	if (return_type != LLVMVoidType ())
+		phi = LLVMBuildPhi (ctx->builder, return_type, name);
 	ImmediateUnrollCtx ictx = { 0 };
 	ictx.ctx = ctx;
 	ictx.bb = bb;
@@ -5498,7 +5506,8 @@ immediate_unroll_commit (ImmediateUnrollCtx *ictx, int switch_const, LLVMValueRe
 {
 	LLVMBuildBr (ictx->ctx->builder, ictx->continuation);
 	LLVMAddCase (ictx->switch_ins, LLVMConstInt (ictx->switch_index_type, switch_const, FALSE), ictx->tmp_block);
-	LLVMAddIncoming (ictx->phi, &value, &ictx->tmp_block, 1);
+	if (ictx->phi)
+		LLVMAddIncoming (ictx->phi, &value, &ictx->tmp_block, 1);
 }
 
 static void
@@ -5511,7 +5520,8 @@ static void
 immediate_unroll_commit_default (ImmediateUnrollCtx *ictx, LLVMValueRef value)
 {
 	LLVMBuildBr (ictx->ctx->builder, ictx->continuation);
-	LLVMAddIncoming (ictx->phi, &value, &ictx->default_case, 1);
+	if (ictx->phi)
+		LLVMAddIncoming (ictx->phi, &value, &ictx->default_case, 1);
 }
 
 static void
@@ -11573,6 +11583,93 @@ MONO_RESTORE_WARNING
 			values [ins->dreg] = result;
 			break;
 		}
+		case OP_ARM64_LDM_INSERT: {
+			LLVMTypeRef ret_t = simd_class_to_llvm_type (ctx, ins->klass);
+			LLVMTypeRef vec_t = LLVMGetElementType (ret_t);
+			if (!addresses [ins->dreg])
+				addresses [ins->dreg] = create_address (ctx, build_named_alloca (ctx, m_class_get_byval_arg (ins->klass), "arm64_ld_insert"), ret_t);
+			unsigned int n_elem_tuple = LLVMGetArrayLength (ret_t);
+			unsigned int n_elem_vector = LLVMGetVectorSize (vec_t);
+			LLVMTypeRef elem_t = LLVMGetElementType (vec_t);
+			unsigned int elem_bits = mono_llvm_get_prim_size_bits (elem_t);
+			unsigned int vector_size = n_elem_vector * elem_bits;
+			IntrinsicId iid;
+			switch (vector_size) {
+			case 64: {
+				switch (n_elem_tuple) {
+				case 2:
+					iid = INTRINS_AARCH64_ADV_SIMD_LD2LANE_V64;
+					break;
+				case 3:
+					iid = INTRINS_AARCH64_ADV_SIMD_LD3LANE_V64;
+					break;
+				case 4:
+					iid = INTRINS_AARCH64_ADV_SIMD_LD4LANE_V64;
+					break;
+				default:
+					g_assert_not_reached ();
+					break;
+				}
+				break;
+			}
+			case 128: {
+				switch (n_elem_tuple) {
+				case 2:
+					iid = INTRINS_AARCH64_ADV_SIMD_LD2LANE_V128;
+					break;
+				case 3:
+					iid = INTRINS_AARCH64_ADV_SIMD_LD3LANE_V128;
+					break;
+				case 4:
+					iid = INTRINS_AARCH64_ADV_SIMD_LD4LANE_V128;
+					break;
+				default:
+					g_assert_not_reached ();
+					break;
+				}
+				break;
+			}
+			default:
+				g_assert_not_reached ();
+				break;
+			
+			}
+
+			lhs = LLVMBuildLoad2 (builder, ret_t, addresses [ins->sreg1]->value, "");
+
+			LLVMValueRef *args = g_newa0(LLVMValueRef, n_elem_tuple + 2);
+			unsigned int idx = 0;
+			for ( ; idx < n_elem_tuple; idx++) {
+				args [idx] = LLVMBuildExtractValue (builder, lhs, idx, "extract_elem");
+			}
+			args [idx++] = rhs;
+			args [idx] = arg3;
+
+			llvm_ovr_tag_t ovr_tag = ovr_tag_from_llvm_type (vec_t);
+
+			// convert rhs to a constant
+			ImmediateUnrollCtx ictx = immediate_unroll_begin (ctx, bb, 16, rhs, ret_t, "");
+			int i = 0;
+			while (immediate_unroll_next (&ictx, &i)) {
+				LLVMValueRef retval = LLVMGetUndef (ret_t);
+				args [idx - 1] = const_int64 (i);
+				LLVMValueRef result_loaded = call_overloaded_intrins (ctx, iid, ovr_tag, args, "");
+				for (unsigned int j = 0; j < n_elem_tuple; j++) {
+					LLVMValueRef elem = LLVMBuildExtractValue (builder, result_loaded,  j, "extract_elem");
+					retval = LLVMBuildInsertValue (builder, retval, elem, j, "insert_val");
+				}
+				immediate_unroll_commit (&ictx, i, retval);
+			}
+			immediate_unroll_default (&ictx);
+			immediate_unroll_commit_default (&ictx, LLVMConstNull (ret_t));
+			LLVMValueRef result = immediate_unroll_end (&ictx, &cbb);
+
+			LLVMTypeRef retptr_t = pointer_type (ret_t);
+			LLVMValueRef dst = convert (ctx, addresses [ins->dreg]->value, retptr_t);
+			LLVMBuildStore (builder, result, dst);
+			values [ins->dreg] = result;
+			break;
+		}
 		case OP_ARM64_LD1R:
 		case OP_ARM64_LD1: {
 			gboolean replicate = ins->opcode == OP_ARM64_LD1R;
@@ -11612,7 +11709,7 @@ MONO_RESTORE_WARNING
 				LLVMTypeRef etype = type_to_llvm_type (ctx, m_class_get_byval_arg (ins->klass));
 				addresses [ins->dreg] = create_address (ctx, build_named_alloca (ctx, m_class_get_byval_arg (ins->klass), oname), etype);
 			}
-			LLVMTypeRef ret_t = simd_valuetuple_to_llvm_type (ctx, ins->klass);
+			LLVMTypeRef ret_t = simd_class_to_llvm_type (ctx, ins->klass);
 			LLVMTypeRef vec_t = LLVMGetElementType (ret_t);
 			LLVMValueRef ix = const_int32 (1);
 			LLVMTypeRef e_t = scalar ? LLVMGetElementType (vec_t) : vec_t;
@@ -11641,6 +11738,43 @@ MONO_RESTORE_WARNING
 			values [ins->dreg] = vec_sz == 64 ? val : NULL;
 			break;
 		}
+		case OP_ARM64_LDM: {
+			const char *oname = "arm64_ldm";
+			LLVMTypeRef ret_t = simd_class_to_llvm_type (ctx, ins->klass);
+			if (!addresses [ins->dreg])
+				addresses [ins->dreg] = create_address (ctx, build_named_alloca (ctx, m_class_get_byval_arg (ins->klass), oname), ret_t);
+			LLVMTypeRef vec_t = LLVMGetElementType (ret_t);
+			IntrinsicId iid = (IntrinsicId) ins->inst_c0;
+			llvm_ovr_tag_t ovr_tag = ovr_tag_from_llvm_type (vec_t);
+			LLVMValueRef result = call_overloaded_intrins (ctx, iid, ovr_tag, &lhs, oname);
+			LLVMTypeRef retptr_t = pointer_type (ret_t);
+			LLVMValueRef dst = convert (ctx, addresses [ins->dreg]->value, retptr_t);
+			LLVMBuildStore (builder, result, dst);
+			values [ins->dreg] = result;
+			break;
+		}
+		case OP_ARM64_STM: {
+			LLVMTypeRef tuple_t = simd_class_to_llvm_type (ctx, ins->klass);
+			LLVMTypeRef vec_t = LLVMGetElementType (tuple_t);
+
+			IntrinsicId iid = (IntrinsicId) ins->inst_c0;
+			llvm_ovr_tag_t ovr_tag = ovr_tag_from_llvm_type (vec_t);
+
+			LLVMValueRef value_tuple = LLVMBuildLoad2 (builder, tuple_t, addresses [ins->sreg2]->value, "load_param");
+
+			int len = LLVMGetArrayLength (tuple_t);
+
+			LLVMValueRef *args = g_alloca ((len + 1) * sizeof (LLVMValueRef));
+
+			for (int i = 0; i < len; i++) {
+				LLVMValueRef elem = LLVMBuildExtractValue (builder, value_tuple, i, "extract_elem");
+				args [i] = elem;
+			}
+			args [len] = lhs;
+
+			call_overloaded_intrins (ctx, iid, ovr_tag, args, "");
+			break;
+		}
 		case OP_ARM64_ST1: {
 			LLVMTypeRef t = LLVMTypeOf (rhs);
 			LLVMValueRef address = convert (ctx, lhs, pointer_type (t));
@@ -11656,6 +11790,82 @@ MONO_RESTORE_WARNING
 			mono_llvm_build_aligned_store (builder, val, address, FALSE, alignment);
 			break;
 		}
+		case OP_ARM64_STM_SCALAR: {
+			LLVMTypeRef tuple_t = simd_class_to_llvm_type (ctx, ins->klass);
+			LLVMTypeRef vec_t = LLVMGetElementType (tuple_t);
+			unsigned int n_elem_tuple = LLVMGetArrayLength (tuple_t);
+			unsigned int n_elem_vector = LLVMGetVectorSize (vec_t);
+			LLVMTypeRef elem_t = LLVMGetElementType (vec_t);
+			unsigned int elem_bits = mono_llvm_get_prim_size_bits (elem_t);
+			unsigned int vector_size = n_elem_vector * elem_bits;
+			IntrinsicId iid;
+			switch (vector_size) {
+			case 64: {
+				switch (n_elem_tuple) {
+				case 2:
+					iid = INTRINS_AARCH64_ADV_SIMD_ST2LANE_V64;
+					break;
+				case 3:
+					iid = INTRINS_AARCH64_ADV_SIMD_ST3LANE_V64;
+					break;
+				case 4:
+					iid = INTRINS_AARCH64_ADV_SIMD_ST4LANE_V64;
+					break;
+				default:
+					g_assert_not_reached ();
+					break;
+				}
+				break;
+			}
+			case 128: {
+				switch (n_elem_tuple) {
+				case 2:
+					iid = INTRINS_AARCH64_ADV_SIMD_ST2LANE_V128;
+					break;
+				case 3:
+					iid = INTRINS_AARCH64_ADV_SIMD_ST3LANE_V128;
+					break;
+				case 4:
+					iid = INTRINS_AARCH64_ADV_SIMD_ST4LANE_V128;
+					break;
+				default:
+					g_assert_not_reached ();
+					break;
+				}
+				break;
+			}
+			default:
+				g_assert_not_reached ();
+				break;
+			
+			}
+
+			rhs = LLVMBuildLoad2 (builder, tuple_t, addresses [ins->sreg2]->value, "");
+
+			LLVMValueRef *args = g_newa0(LLVMValueRef, n_elem_tuple + 2);
+			unsigned int idx = 0;
+			for ( ; idx < n_elem_tuple; idx++) {
+				args [idx] = LLVMBuildExtractValue (builder, rhs, idx, "extract_elem");
+			}
+			args [idx++] = arg3;
+			args [idx] = lhs;
+
+			llvm_ovr_tag_t ovr_tag = ovr_tag_from_llvm_type (vec_t);
+
+			// convert arg3 to a constant
+			LLVMTypeRef ret_t = LLVMVoidType ();
+			ImmediateUnrollCtx ictx = immediate_unroll_begin (ctx, bb, 16, arg3, ret_t, "");
+			int i = 0;
+			while (immediate_unroll_next (&ictx, &i)) {
+				args [idx - 1] = const_int64 (i);
+				call_overloaded_intrins (ctx, iid, ovr_tag, args, "");
+				immediate_unroll_commit (&ictx, i, NULL);
+			}
+			immediate_unroll_default (&ictx);
+			immediate_unroll_commit_default (&ictx, NULL);
+			immediate_unroll_end (&ictx, &cbb);
+			break;
+		}
 		case OP_ARM64_ADDHN:
 		case OP_ARM64_ADDHN2:
 		case OP_ARM64_SUBHN:
@@ -13580,6 +13790,10 @@ add_intrinsic (EmitContext *ctx, int id)
 						 */
 						 LLVMTypeRef associated_type = intrin_types [vw][0];
 						 intrins = add_intrins2 (module, id, distinguishing_type, associated_type, &intrins_type);
+					} else if (kind == INTRIN_kind_add_pointer) {
+						LLVMTypeRef elem_type = LLVMGetElementType (distinguishing_type);
+						LLVMTypeRef src_t = pointer_type (elem_type);
+						intrins = add_intrins2 (module, id, distinguishing_type, src_t, &intrins_type);
 					} else
 						intrins = add_intrins1 (module, id, distinguishing_type, &intrins_type);
 					int key = key_from_id_and_tag (id, test);
diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h
index ccdc1726a302..ec82dd5de8a8 100644
--- a/src/mono/mono/mini/mini-ops.h
+++ b/src/mono/mono/mini/mini-ops.h
@@ -804,8 +804,6 @@ MINI_OP(OP_LDTOKEN_FIELD, "ldtoken_field", VREG, VREG, NONE)
 
 /* SIMD opcodes. */
 
-#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_ARM64)
-
 MINI_OP(OP_ICONV_TO_R4_RAW, "iconv_to_r4_raw", FREG, IREG, NONE)
 
 /* Extract an element from a vector with a constant lane index.
@@ -853,8 +851,6 @@ MINI_OP(OP_EXPAND_R4, "expand_r4", XREG, FREG, NONE)
 MINI_OP(OP_EXPAND_I8, "expand_i8", XREG, IREG, NONE)
 MINI_OP(OP_EXPAND_R8, "expand_r8", XREG, FREG, NONE)
 
-#endif
-
 // wasm specific SIMD v128
 
 #if defined(TARGET_WASM)
@@ -1629,7 +1625,9 @@ MINI_OP(OP_LSCNT64, "lscnt64", LREG, LREG, NONE)
 
 MINI_OP(OP_ARM64_CLZ, "arm64_clz", XREG, XREG, NONE)
 
-MINI_OP3(OP_ARM64_LD1_INSERT, "arm64_ld1_insert", XREG, IREG, XREG, IREG)
+MINI_OP3(OP_ARM64_LD1_INSERT, "arm64_ld1_insert", XREG, XREG, IREG, IREG)
+MINI_OP3(OP_ARM64_LDM_INSERT, "arm64_ldm_insert", VREG, VREG, IREG, IREG)
+
 MINI_OP(OP_ARM64_LD1, "arm64_ld1", XREG, IREG, NONE)
 MINI_OP(OP_ARM64_LD1R, "arm64_ld1r", XREG, IREG, NONE)
 
@@ -1642,10 +1640,14 @@ MINI_OP(OP_ARM64_LDNP_SCALAR, "arm64_ldnp_scalar", VREG, IREG, NONE)
 MINI_OP(OP_ARM64_LDP, "arm64_ldp", VREG, IREG, NONE)
 MINI_OP(OP_ARM64_LDP_SCALAR, "arm64_ldp_scalar", VREG, IREG, NONE)
 
+MINI_OP(OP_ARM64_LDM, "arm64_ldm", VREG, IREG, NONE)
+
 MINI_OP(OP_ARM64_ST1, "arm64_st1", NONE, IREG, XREG)
 MINI_OP(OP_ARM64_SXTL, "arm64_sxtl", XREG, XREG, NONE)
 MINI_OP(OP_ARM64_SXTL2, "arm64_sxtl2", XREG, XREG, NONE)
 
+MINI_OP(OP_ARM64_STM, "arm64_stm", NONE, IREG, VREG)
+
 MINI_OP(OP_ARM64_SMULH, "arm64_smulh", LREG, LREG, LREG)
 MINI_OP(OP_ARM64_SQRT_SCALAR, "arm64_sqrt_scalar", XREG, XREG, NONE)
 MINI_OP(OP_ARM64_TRN1, "arm64_trn1", XREG, XREG, XREG)
@@ -1658,6 +1660,7 @@ MINI_OP(OP_ARM64_UZP2, "arm64_uzp2", XREG, XREG, XREG)
 MINI_OP(OP_ARM64_ZIP1, "arm64_zip1", XREG, XREG, XREG)
 MINI_OP(OP_ARM64_ZIP2, "arm64_zip2", XREG, XREG, XREG)
 MINI_OP3(OP_ARM64_ST1_SCALAR, "arm64_st1_scalar", NONE, IREG, XREG, IREG)
+MINI_OP3(OP_ARM64_STM_SCALAR, "arm64_stm_scalar", NONE, IREG, VREG, IREG)
 MINI_OP3(OP_ARM64_STNP, "arm64_stnp", NONE, IREG, XREG, XREG)
 MINI_OP3(OP_ARM64_STNP_SCALAR, "arm64_stnp_scalar", NONE, IREG, XREG, XREG)
 MINI_OP3(OP_ARM64_STP, "arm64_stp", NONE, IREG, XREG, XREG)
diff --git a/src/mono/mono/mini/mini-riscv.c b/src/mono/mono/mini/mini-riscv.c
index afaa6838dd27..4f25347b37e6 100644
--- a/src/mono/mono/mini/mini-riscv.c
+++ b/src/mono/mono/mini/mini-riscv.c
@@ -9,6 +9,7 @@
 #include "ir-emit.h"
 
 #include <mono/metadata/tokentype.h>
+#include <mono/metadata/marshal-shared.h>
 
 #ifdef TARGET_RISCV64
 #include "cpu-riscv64.h"
@@ -157,9 +158,9 @@ get_delegate_invoke_impl (gboolean has_target, gboolean param_count, guint32 *co
 
 	if (has_target) {
 		start = code = mono_global_codeman_reserve (4 * 3);
-		code = mono_riscv_emit_load (code, RISCV_T0, RISCV_A0, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr), 0);
+		code = mono_riscv_emit_load (code, RISCV_T1, RISCV_A0, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr), 0);
 		code = mono_riscv_emit_load (code, RISCV_A0, RISCV_A0, MONO_STRUCT_OFFSET (MonoDelegate, target), 0);
-		riscv_jalr (code, RISCV_ZERO, RISCV_T0, 0);
+		riscv_jalr (code, RISCV_ZERO, RISCV_T1, 0);
 
 		g_assert ((code - start) <= 4 * 3);
 	} else {
@@ -168,12 +169,12 @@ get_delegate_invoke_impl (gboolean has_target, gboolean param_count, guint32 *co
 		size = 8 + param_count * 4;
 		start = code = mono_global_codeman_reserve (size);
 
-		code = mono_riscv_emit_load (code, RISCV_T0, RISCV_A0, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr), 0);
+		code = mono_riscv_emit_load (code, RISCV_T1, RISCV_A0, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr), 0);
 		/* slide down the arguments */
 		for (i = 0; i < param_count; ++i)
 			riscv_addi (code, RISCV_A0 + i, RISCV_A0 + i + 1, 0);
 
-		riscv_jalr (code, RISCV_ZERO, RISCV_T0, 0);
+		riscv_jalr (code, RISCV_ZERO, RISCV_T1, 0);
 		g_assert ((code - start) <= size);
 	}
 
@@ -196,8 +197,8 @@ get_delegate_virtual_invoke_impl (MonoTrampInfo **info, gboolean load_imt_reg, i
 	GSList *unwind_ops;
 
 	if (offset / (int)sizeof (target_mgreg_t) > MAX_VIRTUAL_DELEGATE_OFFSET)
-		NOT_IMPLEMENTED;
-	
+		return NULL;
+
 	MINI_BEGIN_CODEGEN ();
 
 	start = code = mono_global_codeman_reserve (size);
@@ -213,10 +214,10 @@ get_delegate_virtual_invoke_impl (MonoTrampInfo **info, gboolean load_imt_reg, i
 		g_assert_not_reached ();
 
 	/* Load this->vtable [offset] */
-	code = mono_riscv_emit_load (code, RISCV_T0, RISCV_A0, MONO_STRUCT_OFFSET (MonoObject, vtable), 0);
-	code = mono_riscv_emit_load (code, RISCV_T0, RISCV_T0, offset, 0);
+	code = mono_riscv_emit_load (code, RISCV_T1, RISCV_A0, MONO_STRUCT_OFFSET (MonoObject, vtable), 0);
+	code = mono_riscv_emit_load (code, RISCV_T1, RISCV_T1, offset, 0);
 
-	riscv_jalr (code, RISCV_ZERO, RISCV_T0, 0);
+	riscv_jalr (code, RISCV_ZERO, RISCV_T1, 0);
 
 	g_assert ((code - start) <= size);
 
@@ -796,22 +797,129 @@ add_farg (CallInfo *cinfo, ArgInfo *ainfo, gboolean single)
 		NOT_IMPLEMENTED;
 #endif
 	} else {
-		ainfo->storage = single ? ArgOnStackR4 : ArgOnStackR8;
-		ainfo->slot_size = size;
-		ainfo->offset = cinfo->stack_usage;
-		cinfo->stack_usage += size;
+		// As ABI specifed, if there is ireg avaliable, store it into ireg
+		if (cinfo->next_arg <= RISCV_A7) {
+			ainfo->storage = single ? ArgR4InIReg : ArgR8InIReg;
+			ainfo->reg = cinfo->next_arg;
+			cinfo->next_arg++;
+		} else {
+			ainfo->storage = single ? ArgOnStackR4 : ArgOnStackR8;
+			ainfo->slot_size = size;
+			ainfo->offset = cinfo->stack_usage;
+			cinfo->stack_usage += size;
+		}
+	}
+}
+
+static gboolean
+is_hfa (MonoType *t, int *out_nfields, int *out_esize, int *field_offsets)
+{
+	MonoClass *klass;
+	gpointer iter;
+	MonoClassField *field;
+	MonoType *ftype, *prev_ftype = NULL;
+	int nfields = 0;
+
+	klass = mono_class_from_mono_type_internal (t);
+	iter = NULL;
+	while ((field = mono_class_get_fields_internal (klass, &iter))) {
+		if (field->type->attrs & FIELD_ATTRIBUTE_STATIC)
+			continue;
+		ftype = mono_field_get_type_internal (field);
+		ftype = mini_get_underlying_type (ftype);
+
+		if (MONO_TYPE_ISSTRUCT (ftype)) {
+			int nested_nfields, nested_esize;
+			int nested_field_offsets [16];
+
+			MonoType *fixed_etype;
+			int fixed_len;
+			if (mono_marshal_shared_get_fixed_buffer_attr (field, &fixed_etype, &fixed_len)) {
+				if (fixed_etype->type != MONO_TYPE_R4 && fixed_etype->type != MONO_TYPE_R8)
+					return FALSE;
+				if (fixed_len > 16)
+					return FALSE;
+				nested_nfields = fixed_len;
+				nested_esize = fixed_etype->type == MONO_TYPE_R4 ? 4 : 8;
+				for (int i = 0; i < nested_nfields; ++i)
+					nested_field_offsets [i] = i * nested_esize;
+			} else {
+				if (!is_hfa (ftype, &nested_nfields, &nested_esize, nested_field_offsets))
+					return FALSE;
+			}
+
+			if (nested_esize == 4)
+				ftype = m_class_get_byval_arg (mono_defaults.single_class);
+			else
+				ftype = m_class_get_byval_arg (mono_defaults.double_class);
+
+			if (prev_ftype && prev_ftype->type != ftype->type)
+				return FALSE;
+			prev_ftype = ftype;
+			for (int i = 0; i < nested_nfields; ++i) {
+				if (nfields + i < 4)
+					field_offsets [nfields + i] =
+					    field->offset - MONO_ABI_SIZEOF (MonoObject) + nested_field_offsets [i];
+			}
+			nfields += nested_nfields;
+		} else {
+			if (!(!m_type_is_byref (ftype) && (ftype->type == MONO_TYPE_R4 || ftype->type == MONO_TYPE_R8)))
+				return FALSE;
+			if (prev_ftype && prev_ftype->type != ftype->type)
+				return FALSE;
+			prev_ftype = ftype;
+			if (nfields < 4)
+				field_offsets [nfields] = field->offset - MONO_ABI_SIZEOF (MonoObject);
+			nfields++;
+		}
 	}
+	if (nfields == 0 || nfields > 2)
+		return FALSE;
+	*out_nfields = nfields;
+	*out_esize = prev_ftype->type == MONO_TYPE_R4 ? 4 : 8;
+	return TRUE;
 }
 
 static void
 add_valuetype (CallInfo *cinfo, ArgInfo *ainfo, MonoType *t)
 {
-	int size, aligned_size;
+	int size, aligned_size, nfields, esize;
 	guint32 align;
+	int field_offsets [16];
 
 	size = mini_type_stack_size_full (t, &align, cinfo->pinvoke);
 	aligned_size = ALIGN_TO (size, align);
 
+	if (is_hfa (t, &nfields, &esize, field_offsets)) {
+		/*
+		 * The struct might include nested float structs aligned at 8,
+		 * so need to keep track of the offsets of the individual fields.
+		 */
+		if (cinfo->next_farg + nfields - 1 <= RISCV_FA7) {
+			ainfo->storage = ArgHFA;
+			ainfo->reg = cinfo->next_farg;
+			ainfo->nregs = nfields;
+			ainfo->size = size;
+			ainfo->esize = esize;
+			for (int i = 0; i < nfields; ++i)
+				ainfo->foffsets [i] = GINT_TO_UINT8 (field_offsets [i]);
+			cinfo->next_farg += ainfo->nregs;
+		} else {
+			ainfo->nfregs_to_skip = cinfo->next_farg <= RISCV_FA7 ? RISCV_FA7 - cinfo->next_farg + 1 : 0;
+			cinfo->next_farg = RISCV_FA7 + 1;
+
+			ainfo->offset = cinfo->stack_usage;
+			ainfo->storage = ArgVtypeOnStack;
+			cinfo->stack_usage += aligned_size;
+			ainfo->slot_size = aligned_size;
+
+			ainfo->hfa = TRUE;
+			ainfo->nregs = nfields;
+			ainfo->esize = esize;
+		}
+		return;
+	}
+
 	// Scalars wider than 2×XLEN bits are passed by reference
 	if (aligned_size > sizeof (host_mgreg_t) * 2) {
 		if (cinfo->next_arg > RISCV_A7) {
@@ -823,8 +931,8 @@ add_valuetype (CallInfo *cinfo, ArgInfo *ainfo, MonoType *t)
 			ainfo->storage = ArgVtypeByRef;
 			ainfo->reg = cinfo->next_arg;
 			ainfo->size = sizeof (host_mgreg_t);
-			ainfo->is_regpair = FALSE;
-			cinfo->next_arg += 1;
+			ainfo->nregs = 1;
+			cinfo->next_arg += ainfo->nregs;
 		}
 	}
 	// Scalars that are 2×XLEN bits wide are passed in a pair of argument registers
@@ -846,9 +954,8 @@ add_valuetype (CallInfo *cinfo, ArgInfo *ainfo, MonoType *t)
 
 			ainfo->reg = cinfo->next_arg;
 			ainfo->size = sizeof (host_mgreg_t);
-			ainfo->is_regpair = FALSE;
-
-			cinfo->next_arg += 1;
+			ainfo->nregs = 1;
+			cinfo->next_arg += ainfo->nregs;
 		}
 		// Scalars that are 2×XLEN bits wide are passed in a pair of argument
 		// registers, with the low-order XLEN bits in the lower-numbered register
@@ -857,9 +964,8 @@ add_valuetype (CallInfo *cinfo, ArgInfo *ainfo, MonoType *t)
 			ainfo->storage = ArgVtypeInIReg;
 			ainfo->reg = cinfo->next_arg;
 			ainfo->size = sizeof (host_mgreg_t) * 2;
-			ainfo->is_regpair = TRUE;
-
-			cinfo->next_arg += 2;
+			ainfo->nregs = 2;
+			cinfo->next_arg += ainfo->nregs;
 		}
 	}
 	// Scalars that are at most XLEN bits wide are passed in a single argument register
@@ -873,9 +979,8 @@ add_valuetype (CallInfo *cinfo, ArgInfo *ainfo, MonoType *t)
 			ainfo->storage = ArgVtypeInIReg;
 			ainfo->reg = cinfo->next_arg;
 			ainfo->size = sizeof (host_mgreg_t);
-			ainfo->is_regpair = FALSE;
-
-			cinfo->next_arg += 1;
+			ainfo->nregs = 1;
+			cinfo->next_arg += ainfo->nregs;
 		}
 	}
 }
@@ -1028,15 +1133,23 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
 		ArgInfo *ainfo = cinfo->args + sig->hasthis + pindex;
 
 		// process the variable parameter sig->sentinelpos mark the first VARARG
-		if ((sig->call_convention == MONO_CALL_VARARG) && (pindex == sig->sentinelpos))
-			NOT_IMPLEMENTED;
+		if ((sig->call_convention == MONO_CALL_VARARG) && (pindex == sig->sentinelpos)) {
+			cinfo->next_arg = RISCV_A7 + 1;
+			cinfo->next_farg = RISCV_FA7 + 1;
+			/* Emit the signature cookie just before the implicit arguments */
+			add_param (cinfo, &cinfo->sig_cookie, mono_get_int_type ());
+		}
 
 		add_param (cinfo, ainfo, sig->params [pindex]);
 	}
 
 	/* Handle the case where there are no implicit arguments */
-	if ((sig->call_convention == MONO_CALL_VARARG) && (pindex == sig->sentinelpos))
-		NOT_IMPLEMENTED;
+	if ((sig->call_convention == MONO_CALL_VARARG) && (pindex == sig->sentinelpos)) {
+		cinfo->next_arg = RISCV_A7 + 1;
+		cinfo->next_farg = RISCV_FA7 + 1;
+		/* Emit the signature cookie just before the implicit arguments */
+		add_param (cinfo, &cinfo->sig_cookie, mono_get_int_type ());
+	}
 
 	cinfo->stack_usage = ALIGN_TO (cinfo->stack_usage, MONO_ARCH_FRAME_ALIGNMENT);
 
@@ -1059,6 +1172,7 @@ arg_get_storage (CallContext *ccontext, ArgInfo *ainfo)
 	case ArgVtypeInIReg:
 		return &ccontext->gregs [ainfo->reg];
 	case ArgInFReg:
+	case ArgHFA:
 		return &ccontext->fregs [ainfo->reg];
 	case ArgOnStack:
 	case ArgVtypeOnStack:
@@ -1331,21 +1445,20 @@ mono_arch_is_inst_imm (int opcode, int imm_opcode, gint64 imm)
 
 gint static mono_arch_get_memory_ordering (int memory_barrier_kind)
 {
-	gint ordering;
 	switch (memory_barrier_kind) {
 	case MONO_MEMORY_BARRIER_ACQ:
-		ordering = RISCV_ORDER_AQ;
+		return RISCV_ORDER_AQ;
 		break;
 	case MONO_MEMORY_BARRIER_REL:
-		ordering = RISCV_ORDER_RL;
+		return RISCV_ORDER_RL;
 		break;
 	case MONO_MEMORY_BARRIER_SEQ:
-		ordering = RISCV_ORDER_ALL;
+		return RISCV_ORDER_ALL;
+		break;
 	default:
-		ordering = RISCV_ORDER_NONE;
+		return RISCV_ORDER_NONE;
 		break;
 	}
-	return ordering;
 }
 
 GList *
@@ -1489,6 +1602,24 @@ add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, ArgStorage storage, int re
 		MONO_ADD_INS (cfg->cbb, ins);
 		mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE);
 		break;
+	case ArgR4InIReg:
+		MONO_INST_NEW (cfg, ins, OP_MOVE_F_TO_I4);
+		ins->dreg = mono_alloc_ireg (cfg);
+		ins->sreg1 = arg->dreg;
+		MONO_ADD_INS (cfg->cbb, ins);
+		mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, FALSE);
+		break;
+	case ArgR8InIReg:
+#ifdef TARGET_RISCV64
+		MONO_INST_NEW (cfg, ins, OP_MOVE_F_TO_I8);
+		ins->dreg = mono_alloc_ireg (cfg);
+		ins->sreg1 = arg->dreg;
+		MONO_ADD_INS (cfg->cbb, ins);
+		mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, FALSE);
+#else
+		NOT_IMPLEMENTED;
+#endif
+		break;
 	}
 }
 
@@ -1501,7 +1632,6 @@ add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, ArgStorage storage, int re
 static void
 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
 {
-	NOT_IMPLEMENTED;
 	MonoMethodSignature *tmp_sig;
 	int sig_reg;
 
@@ -1549,6 +1679,7 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
 	/* Emit the inst of return at mono_arch_emit_setret() */
 	switch (cinfo->ret.storage) {
 	case ArgVtypeInIReg:
+	case ArgHFA:
 		if (MONO_IS_TAILCALL_OPCODE (call))
 			break;
 		/*
@@ -1612,6 +1743,8 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
 		case ArgInIReg:
 		case ArgInFReg:
 		case ArgInFRegR4:
+		case ArgR4InIReg:
+		case ArgR8InIReg:
 			add_outarg_reg (cfg, call, ainfo->storage, ainfo->reg, arg);
 			break;
 		case ArgOnStack: {
@@ -1644,6 +1777,9 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
 			}
 			break;
 		}
+		case ArgOnStackR4:
+			MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, RISCV_SP, ainfo->offset, arg->dreg);
+			break;
 		case ArgOnStackR8:
 			MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, RISCV_SP, ainfo->offset, arg->dreg);
 			break;
@@ -1651,7 +1787,8 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
 		case ArgVtypeByRef:
 		case ArgVtypeOnStack:
 		case ArgVtypeInMixed:
-		case ArgVtypeByRefOnStack: {
+		case ArgVtypeByRefOnStack:
+		case ArgHFA: {
 			MonoInst *ins;
 			guint32 align;
 			guint32 size;
@@ -1692,6 +1829,19 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
 	if (ins->backend.size == 0)
 		return;
 	switch (ainfo->storage) {
+	case ArgHFA:
+		for (int i = 0; i < ainfo->nregs; ++i) {
+			if (ainfo->esize == 4)
+				MONO_INST_NEW (cfg, load, OP_LOADR4_MEMBASE);
+			else
+				MONO_INST_NEW (cfg, load, OP_LOADR8_MEMBASE);
+			load->dreg = mono_alloc_freg (cfg);
+			load->inst_basereg = src->dreg;
+			load->inst_offset = ainfo->foffsets [i];
+			MONO_ADD_INS (cfg->cbb, load);
+			add_outarg_reg (cfg, call, ainfo->esize == 4 ? ArgInFRegR4 : ArgInFReg, ainfo->reg + i, load);
+		}
+		break;
 	case ArgVtypeInIReg:
 		MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
 		load->dreg = mono_alloc_ireg (cfg);
@@ -1873,7 +2023,6 @@ mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
 	case OP_ICONV_TO_I4:
 	case OP_ICONV_TO_OVF_I4:
 	case OP_ICONV_TO_OVF_I4_UN:
-	case OP_ICONV_TO_U4:
 	case OP_ICONV_TO_I8:
 	case OP_ICONV_TO_U8:
 	case OP_LCONV_TO_U:
@@ -1987,7 +2136,6 @@ mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
 	case OP_ICONV_TO_OVF_U2_UN:
 	case OP_ICONV_TO_OVF_I8:
 	case OP_ICONV_TO_OVF_I8_UN:
-	case OP_ICONV_TO_OVF_U4:
 	case OP_ICONV_TO_OVF_U4_UN:
 	case OP_ICONV_TO_OVF_U8:
 	case OP_ICONV_TO_OVF_U8_UN:
@@ -1995,6 +2143,17 @@ mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
 	case OP_ICONV_TO_OVF_U_UN:
 
 		break;
+
+	case OP_ICONV_TO_U4:
+		ins->opcode = OP_ZEXT_I4;
+		break;
+	case OP_ICONV_TO_OVF_U4:
+		MONO_EMIT_NEW_UNALU (cfg, OP_SEXT_I4, ins->dreg, ins->sreg1);
+		MONO_EMIT_NEW_ICOMPARE_IMM (cfg, ins->dreg, 0);
+		MONO_EMIT_NEW_COND_EXC (cfg, ILT, "OverflowException");
+		MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ins->dreg, ins->sreg1);
+		NULLIFY_INS (ins);
+		break;
 	default:
 		g_print ("Can't decompose the OP %s\n", mono_inst_name (ins->opcode));
 		NOT_IMPLEMENTED;
@@ -2064,13 +2223,13 @@ mono_arch_allocate_vars (MonoCompile *cfg)
 			cfg->ret->inst_c0 = cinfo->ret.reg;
 			cfg->ret->dreg = cinfo->ret.reg;
 			break;
+		case ArgHFA:
 		case ArgVtypeInIReg:
 			/* Allocate a local to hold the result, the epilog will copy it to the correct place */
 			cfg->ret->opcode = OP_REGOFFSET;
 			cfg->ret->inst_basereg = cfg->frame_reg;
-			if (cinfo->ret.is_regpair)
-				offset += sizeof (host_mgreg_t);
-			offset += sizeof (host_mgreg_t);
+			g_assert (cinfo->ret.nregs > 0);
+			offset += cinfo->ret.nregs * sizeof (host_mgreg_t);
 			cfg->ret->inst_offset = -offset;
 			break;
 		case ArgVtypeByRef:
@@ -2108,23 +2267,27 @@ mono_arch_allocate_vars (MonoCompile *cfg)
 		case ArgInIReg:
 		case ArgInFReg:
 		case ArgInFRegR4:
+		case ArgR4InIReg:
+		case ArgR8InIReg:
 			offset += sizeof (host_mgreg_t);
 			ins->inst_offset = -offset;
 			break;
 		case ArgOnStack:
+		case ArgOnStackR4:
+		case ArgOnStackR8:
 		case ArgVtypeOnStack:
 			/* These are in the parent frame */
 			g_assert (ainfo->offset >= 0);
 			ins->inst_basereg = RISCV_FP;
 			ins->inst_offset = ainfo->offset;
 			break;
+		case ArgHFA:
 		case ArgVtypeInIReg:
 			ins->opcode = OP_REGOFFSET;
 			ins->inst_basereg = cfg->frame_reg;
 			/* These arguments are saved to the stack in the prolog */
-			if (ainfo->is_regpair)
-				offset += sizeof (host_mgreg_t);
-			offset += sizeof (host_mgreg_t);
+			g_assert (ainfo->nregs > 0);
+			offset += ainfo->nregs * sizeof (host_mgreg_t);
 			ins->inst_offset = -offset;
 			break;
 		case ArgVtypeInMixed:
@@ -2275,6 +2438,7 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 	{
 	loop_start:
 		switch (ins->opcode) {
+		case OP_ARGLIST:
 		case OP_CKFINITE:
 		case OP_BREAK:
 		case OP_IL_SEQ_POINT:
@@ -2636,11 +2800,14 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 					next_ins->sreg1 = ins->dreg;
 					next_ins->sreg2 = RISCV_ZERO;
 				} else {
-					g_print ("Unhandaled op %s following after OP_RCOMPARE\n", mono_inst_name (next_ins->opcode));
-					NOT_IMPLEMENTED;
+					if (cfg->verbose_level > 1) {
+						g_print ("Unhandaled op %s following after OP_RCOMPARE\n", mono_inst_name (next_ins->opcode));
+					}
+					NULLIFY_INS (ins);
 				}
 			} else {
-				g_assert_not_reached ();
+				NULLIFY_INS (ins);
+				// g_assert_not_reached ();
 			}
 			break;
 		}
@@ -2716,14 +2883,15 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 					next_ins->opcode = OP_RISCV_BNE;
 					next_ins->sreg1 = ins->dreg;
 					next_ins->sreg2 = RISCV_ZERO;
-				} else if (next_ins->opcode == OP_BR) {
-					NULLIFY_INS (ins);
 				} else {
-					g_print ("Unhandaled op %s following after OP_FCOMPARE\n", mono_inst_name (next_ins->opcode));
-					NOT_IMPLEMENTED;
+					if (cfg->verbose_level > 1) {
+						g_print ("Unhandaled op %s following after OP_FCOMPARE\n", mono_inst_name (next_ins->opcode));
+					}
+					NULLIFY_INS (ins);
 				}
 			} else {
-				g_assert_not_reached ();
+				NULLIFY_INS (ins);
+				// g_assert_not_reached ();
 			}
 			break;
 		}
@@ -2924,7 +3092,7 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 						break;
 					}
 				} else if (next_ins->opcode == OP_LCGT_UN || next_ins->opcode == OP_ICGT_UN) {
-					if (RISCV_VALID_I_IMM (ins->inst_imm + 1)) {
+					if ((ins->inst_imm != -1) && RISCV_VALID_I_IMM (ins->inst_imm + 1)) {
 						// compare rs1, imm; lcgt_un rd => sltiu rd, rs1, imm; xori rd, rd, 1
 						ins->opcode = OP_RISCV_SLTIU;
 						ins->dreg = next_ins->dreg;
@@ -2936,6 +3104,14 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 						next_ins->sreg1 = ins->dreg;
 						next_ins->inst_imm = 1;
 						break;
+					} else if ((ins->inst_imm == -1)) {
+						// rs1 will never greater than -1
+						next_ins->opcode = OP_ADD_IMM;
+						next_ins->sreg1 = RISCV_ZERO;
+						next_ins->inst_imm = 0;
+
+						NULLIFY_INS (ins);
+						break;
 					}
 				} else if (next_ins->opcode == OP_LCGT || next_ins->opcode == OP_ICGT) {
 					if (RISCV_VALID_I_IMM (ins->inst_imm + 1)) {
@@ -3116,28 +3292,24 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 					next_ins->sreg1 = ins->sreg2;
 					next_ins->sreg2 = ins->sreg1;
 					NULLIFY_INS (ins);
-				} else if (next_ins->opcode == OP_IL_SEQ_POINT || next_ins->opcode == OP_MOVE ||
-				           next_ins->opcode == OP_LOAD_MEMBASE || next_ins->opcode == OP_NOP ||
-				           next_ins->opcode == OP_LOADI4_MEMBASE || next_ins->opcode == OP_BR ||
-				           next_ins->opcode == OP_LOADI8_MEMBASE || next_ins->opcode == OP_ICONST ||
-				           next_ins->opcode == OP_I8CONST || next_ins->opcode == OP_ADD_IMM) {
+				} else {
 					/**
 					 * there is compare without branch OP followed
 					 *
 					 *  icompare_imm R226
-					 *  il_seq_point il: 0xc6
+					 *  call
 					 *
 					 * what should I do?
 					 */
+					if (cfg->verbose_level > 1) {
+						g_print ("Unhandaled op %s following after OP_{I|L}COMPARE{|_IMM}\n",
+						         mono_inst_name (next_ins->opcode));
+					}
 					NULLIFY_INS (ins);
 					break;
-				} else {
-					g_print ("Unhandaled op %s following after OP_{I|L}COMPARE{|_IMM}\n",
-					         mono_inst_name (next_ins->opcode));
-					NOT_IMPLEMENTED;
 				}
 			} else
-				g_assert_not_reached ();
+				NULLIFY_INS (ins);
 			break;
 		}
 
@@ -3291,6 +3463,19 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 			g_assert (mono_op_imm_to_op (ins->opcode) == OP_LDIV);
 #else
 			g_assert (mono_op_imm_to_op (ins->opcode) == OP_IDIV);
+#endif
+			ins->opcode = mono_op_imm_to_op (ins->opcode);
+			ins->sreg2 = temp->dreg;
+			break;
+		case OP_DIV_UN_IMM:
+			NEW_INS_BEFORE (cfg, ins, temp, OP_I8CONST);
+			temp->inst_l = ins->inst_imm;
+			temp->dreg = mono_alloc_ireg (cfg);
+
+#ifdef TARGET_RISCV64
+			g_assert (mono_op_imm_to_op (ins->opcode) == OP_LDIV_UN);
+#else
+			g_assert (mono_op_imm_to_op (ins->opcode) == OP_IDIV_UN);
 #endif
 			ins->opcode = mono_op_imm_to_op (ins->opcode);
 			ins->sreg2 = temp->dreg;
@@ -3298,6 +3483,7 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 		case OP_IMUL_IMM:
 		case OP_LMUL_IMM:
 		case OP_IDIV_IMM:
+		case OP_IDIV_UN_IMM:
 		case OP_IREM_IMM:
 		case OP_LREM_IMM:
 		case OP_IREM_UN_IMM:
@@ -3553,6 +3739,7 @@ guint8 *
 mono_riscv_emit_load (guint8 *code, int rd, int rs1, target_mgreg_t imm, int length)
 {
 	if (!RISCV_VALID_I_IMM (imm)) {
+		g_assert (rs1 != RISCV_T0);
 		code = mono_riscv_emit_imm (code, RISCV_T0, imm);
 		riscv_add (code, RISCV_T0, rs1, RISCV_T0);
 		rs1 = RISCV_T0;
@@ -3593,6 +3780,7 @@ guint8 *
 mono_riscv_emit_loadu (guint8 *code, int rd, int rs1, target_mgreg_t imm, int length)
 {
 	if (!RISCV_VALID_I_IMM (imm)) {
+		g_assert (rs1 != RISCV_T0);
 		code = mono_riscv_emit_imm (code, RISCV_T0, imm);
 		riscv_add (code, RISCV_T0, rs1, RISCV_T0);
 		rs1 = RISCV_T0;
@@ -3624,6 +3812,7 @@ mono_riscv_emit_fload (guint8 *code, int rd, int rs1, target_mgreg_t imm, gboole
 {
 	g_assert (riscv_stdext_d || (isSingle && riscv_stdext_f));
 	if (!RISCV_VALID_I_IMM (imm)) {
+		g_assert (rs1 != RISCV_T0);
 		code = mono_riscv_emit_imm (code, RISCV_T0, imm);
 		riscv_add (code, RISCV_T0, rs1, RISCV_T0);
 		rs1 = RISCV_T0;
@@ -3644,6 +3833,7 @@ guint8 *
 mono_riscv_emit_store (guint8 *code, int rs2, int rs1, target_mgreg_t imm, int length)
 {
 	if (!RISCV_VALID_S_IMM (imm)) {
+		g_assert (rs1 != RISCV_T0 && rs2 != RISCV_T0);
 		code = mono_riscv_emit_imm (code, RISCV_T0, imm);
 		riscv_add (code, RISCV_T0, rs1, RISCV_T0);
 		rs1 = RISCV_T0;
@@ -3685,6 +3875,7 @@ mono_riscv_emit_fstore (guint8 *code, int rs2, int rs1, target_mgreg_t imm, gboo
 {
 	g_assert (riscv_stdext_d || (isSingle && riscv_stdext_f));
 	if (!RISCV_VALID_I_IMM (imm)) {
+		g_assert (rs1 != RISCV_T0);
 		code = mono_riscv_emit_imm (code, RISCV_T0, imm);
 		riscv_add (code, RISCV_T0, rs1, RISCV_T0);
 		rs1 = RISCV_T0;
@@ -3879,12 +4070,14 @@ emit_setup_lmf (MonoCompile *cfg, guint8 *code, gint32 lmf_offset)
 	 * need to be restored during EH.
 	 */
 	g_assert (lmf_offset <= 0);
-	/* pc */
-	code = mono_riscv_emit_store (code, RISCV_RA, RISCV_FP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, pc), 0);
 	/* callee saved gregs + sp */
 	code = emit_store_regarray_cfa (cfg, code, MONO_ARCH_LMF_REGS, RISCV_FP,
 	                                lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, gregs), (1 << RISCV_SP | 1 << RISCV_FP));
 
+	/* pc */
+	riscv_auipc (code, RISCV_RA, 0);
+	code = mono_riscv_emit_store (code, RISCV_RA, RISCV_FP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, pc), 0);
+
 	return code;
 }
 
@@ -3926,7 +4119,8 @@ emit_move_args (MonoCompile *cfg, guint8 *code)
 
 			switch (ainfo->storage) {
 			case ArgInIReg:
-				g_assert (ainfo->is_regpair == FALSE);
+			case ArgR4InIReg:
+			case ArgR8InIReg:
 				code = mono_riscv_emit_store (code, ainfo->reg, ins->inst_basereg, ins->inst_offset, 0);
 				if (i == 0 && sig->hasthis) {
 					mono_add_var_location (cfg, ins, TRUE, ainfo->reg, 0, 0, code - cfg->native_code);
@@ -3940,14 +4134,15 @@ emit_move_args (MonoCompile *cfg, guint8 *code)
 				                               ainfo->storage == ArgInFRegR4);
 				break;
 			case ArgVtypeInIReg:
-				if (ainfo->is_regpair)
+				g_assert (ainfo->nregs <= 2);
+				if (ainfo->nregs == 2)
 					code = mono_riscv_emit_store (code, ainfo->reg + 1, ins->inst_basereg,
 					                              ins->inst_offset + sizeof (host_mgreg_t), 0);
 				code = mono_riscv_emit_store (code, ainfo->reg, ins->inst_basereg, ins->inst_offset, 0);
 				break;
 			case ArgVtypeInMixed:
-				code = mono_riscv_emit_load (code, RISCV_T0, RISCV_S0, 0, 0);
-				code = mono_riscv_emit_store (code, RISCV_T0, ins->inst_basereg,
+				code = mono_riscv_emit_load (code, RISCV_T1, RISCV_S0, 0, 0);
+				code = mono_riscv_emit_store (code, RISCV_T1, ins->inst_basereg,
 				                              ins->inst_offset + sizeof (host_mgreg_t), 0);
 				code = mono_riscv_emit_store (code, ainfo->reg, ins->inst_basereg, ins->inst_offset, 0);
 				break;
@@ -3962,7 +4157,20 @@ emit_move_args (MonoCompile *cfg, guint8 *code)
 				                              ins->inst_left->inst_offset, 0);
 				// }
 				break;
+			case ArgHFA:
+				for (int part = 0; part < ainfo->nregs; part++) {
+					if (ainfo->esize == 4)
+						code = mono_riscv_emit_fstore (code, ainfo->reg + part, ins->inst_basereg,
+						                               GTMREG_TO_INT (ins->inst_offset + ainfo->foffsets [part]), TRUE);
+					else
+						code =
+						    mono_riscv_emit_fstore (code, ainfo->reg + part, ins->inst_basereg,
+						                            GTMREG_TO_INT (ins->inst_offset + ainfo->foffsets [part]), FALSE);
+				}
+				break;
 			case ArgOnStack:
+			case ArgOnStackR4:
+			case ArgOnStackR8:
 			case ArgVtypeOnStack:
 			case ArgVtypeByRefOnStack:
 				break;
@@ -4013,10 +4221,26 @@ emit_move_return_value (MonoCompile *cfg, guint8 *code, MonoInst *ins)
 
 		/* Load the destination address */
 		g_assert (loc && loc->opcode == OP_REGOFFSET);
-		code = mono_riscv_emit_load (code, RISCV_T0, loc->inst_basereg, loc->inst_offset, 0);
-		code = mono_riscv_emit_store (code, cinfo->ret.reg, RISCV_T0, 0, 0);
-		if (cinfo->ret.is_regpair) {
-			code = mono_riscv_emit_store (code, cinfo->ret.reg + 1, RISCV_T0, sizeof (host_mgreg_t), 0);
+		code = mono_riscv_emit_load (code, RISCV_T1, loc->inst_basereg, loc->inst_offset, 0);
+		code = mono_riscv_emit_store (code, cinfo->ret.reg, RISCV_T1, 0, 0);
+		g_assert (cinfo->ret.nregs <= 2);
+		if (cinfo->ret.nregs == 2) {
+			code = mono_riscv_emit_store (code, cinfo->ret.reg + 1, RISCV_T1, sizeof (host_mgreg_t), 0);
+		}
+		break;
+	}
+	case ArgHFA: {
+		MonoInst *loc = cfg->arch.vret_addr_loc;
+		int i;
+
+		/* Load the destination address */
+		g_assert (loc && loc->opcode == OP_REGOFFSET);
+		code = mono_riscv_emit_load (code, RISCV_T1, loc->inst_basereg, GTMREG_TO_INT (loc->inst_offset), 0);
+		for (i = 0; i < cinfo->ret.nregs; ++i) {
+			if (cinfo->ret.esize == 4)
+				code = mono_riscv_emit_fstore (code, cinfo->ret.reg + i, RISCV_T1, cinfo->ret.foffsets [i], TRUE);
+			else
+				code = mono_riscv_emit_fstore (code, cinfo->ret.reg + i, RISCV_T1, cinfo->ret.foffsets [i], FALSE);
 		}
 		break;
 	}
@@ -4103,6 +4327,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 	 * - Setup frame
 	 */
 	int stack_size = 0;
+	mono_emit_unwind_op_def_cfa (cfg, code, RISCV_SP, 0);
 
 	/* Setup frame */
 	if (RISCV_VALID_I_IMM (-cfg->stack_offset)) {
@@ -4125,7 +4350,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 		code = mono_riscv_emit_imm (code, RISCV_T0, cfg->stack_offset);
 		// calculate SP
 		riscv_sub (code, RISCV_SP, RISCV_SP, RISCV_T0);
-		mono_emit_unwind_op_def_cfa (cfg, code, RISCV_SP, cfg->stack_offset);
+		mono_emit_unwind_op_def_cfa_offset (cfg, code, cfg->stack_offset);
 
 		// save return value
 		stack_size += sizeof (target_mgreg_t);
@@ -4192,15 +4417,15 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 			ins = cfg->arch.ss_tramp_var;
 			g_assert (ins->opcode == OP_REGOFFSET);
 
-			code = mono_riscv_emit_imm (code, RISCV_T0, (guint64)&ss_trampoline);
-			code = mono_riscv_emit_store (code, RISCV_T0, ins->inst_basereg, ins->inst_offset, 0);
+			code = mono_riscv_emit_imm (code, RISCV_T1, (guint64)&ss_trampoline);
+			code = mono_riscv_emit_store (code, RISCV_T1, ins->inst_basereg, ins->inst_offset, 0);
 		}
 		if (cfg->arch.bp_tramp_var) {
 			/* Initialize bp_tramp_var */
 			ins = cfg->arch.bp_tramp_var;
 			g_assert (ins->opcode == OP_REGOFFSET);
-			code = mono_riscv_emit_imm (code, RISCV_T0, (guint64)bp_trampoline);
-			code = mono_riscv_emit_store (code, RISCV_T0, ins->inst_basereg, ins->inst_offset, 0);
+			code = mono_riscv_emit_imm (code, RISCV_T1, (guint64)bp_trampoline);
+			code = mono_riscv_emit_store (code, RISCV_T1, ins->inst_basereg, ins->inst_offset, 0);
 		}
 	}
 
@@ -4239,12 +4464,26 @@ mono_arch_emit_epilog (MonoCompile *cfg)
 	case ArgVtypeInIReg: {
 		MonoInst *ins = cfg->ret;
 
-		if (cinfo->ret.is_regpair)
+		g_assert (cinfo->ret.nregs <= 2);
+		if (cinfo->ret.nregs == 2)
 			code = mono_riscv_emit_load (code, cinfo->ret.reg + 1, ins->inst_basereg,
 			                             ins->inst_offset + sizeof (host_mgreg_t), 0);
 		code = mono_riscv_emit_load (code, cinfo->ret.reg, ins->inst_basereg, ins->inst_offset, 0);
 		break;
 	}
+	case ArgHFA: {
+		MonoInst *ins = cfg->ret;
+
+		for (int i = 0; i < cinfo->ret.nregs; ++i) {
+			if (cinfo->ret.esize == 4)
+				code = mono_riscv_emit_fload (code, cinfo->ret.reg + i, ins->inst_basereg,
+				                              GTMREG_TO_INT (ins->inst_offset + cinfo->ret.foffsets [i]), TRUE);
+			else
+				code = mono_riscv_emit_fload (code, cinfo->ret.reg + i, ins->inst_basereg,
+				                              GTMREG_TO_INT (ins->inst_offset + cinfo->ret.foffsets [i]), FALSE);
+		}
+		break;
+	}
 	default:
 		g_print ("Unable process returned storage %d(0x%x)\n", cinfo->ret.storage, cinfo->ret.storage);
 		NOT_IMPLEMENTED;
@@ -4317,13 +4556,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 				g_assert (var->opcode == OP_REGOFFSET);
 				/* Load ss_tramp_var */
 				/* This is equal to &ss_trampoline */
-				code = mono_riscv_emit_load (code, RISCV_T0, var->inst_basereg, var->inst_offset, 0);
+				code = mono_riscv_emit_load (code, RISCV_T1, var->inst_basereg, var->inst_offset, 0);
 				/* Load the trampoline address */
-				code = mono_riscv_emit_load (code, RISCV_T0, RISCV_T0, 0, 0);
+				code = mono_riscv_emit_load (code, RISCV_T1, RISCV_T1, 0, 0);
 				/* Call it if it is non-null */
 				// In riscv, we use jalr to jump
-				riscv_beq (code, RISCV_ZERO, RISCV_T0, 8);
-				riscv_jalr (code, RISCV_ZERO, RISCV_T0, 0);
+				riscv_beq (code, RISCV_ZERO, RISCV_T1, 8);
+				riscv_jalr (code, RISCV_ZERO, RISCV_T1, 0);
 			}
 			mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
 
@@ -4333,8 +4572,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 				MonoInst *var = cfg->arch.bp_tramp_var;
 				g_assert (var);
 				g_assert (var->opcode == OP_REGOFFSET);
-				/* Load the address of the bp trampoline into IP0 */
-				code = mono_riscv_emit_load (code, RISCV_T0, var->inst_basereg, var->inst_offset, 0);
+				/* Load the address of the bp trampoline into T1 */
+				code = mono_riscv_emit_load (code, RISCV_T1, var->inst_basereg, var->inst_offset, 0);
 				/*
 				 * A placeholder for a possible breakpoint inserted by
 				 * mono_arch_set_breakpoint ().
@@ -4363,9 +4602,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 			riscv_addi (code, RISCV_T0, RISCV_SP, 0);
 			loop_start = code;
 			riscv_beq (code, RISCV_T0, RISCV_T1, 0);
-			code = mono_riscv_emit_store (code, RISCV_ZERO, RISCV_T0, 0, 0);
-			code = mono_riscv_emit_store (code, RISCV_ZERO, RISCV_T0, sizeof (host_mgreg_t), 0);
+			riscv_sd (code, RISCV_ZERO, RISCV_T0, 0);
+			riscv_sd (code, RISCV_ZERO, RISCV_T0, sizeof (host_mgreg_t));
 #ifdef TARGET_RISCV32
+			NOT_IMPLEMENTED;
 			code = mono_riscv_emit_store (code, RISCV_ZERO, RISCV_T0, sizeof (host_mgreg_t) * 2, 0);
 			code = mono_riscv_emit_store (code, RISCV_ZERO, RISCV_T0, sizeof (host_mgreg_t) * 3, 0);
 #endif
@@ -4417,9 +4657,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 			byte_offset = MONO_STRUCT_OFFSET (MonoVTable, initialized);
 
 			/* Load vtable->initialized */
-			code = mono_riscv_emit_load (code, RISCV_T0, ins->sreg1, byte_offset, 1);
+			code = mono_riscv_emit_load (code, RISCV_T1, ins->sreg1, byte_offset, 1);
 			branch_label = code;
-			riscv_bne (code, RISCV_ZERO, RISCV_T0, 0);
+			riscv_bne (code, RISCV_ZERO, RISCV_T1, 0);
 
 			/* Slowpath */
 			g_assert (ins->sreg1 == RISCV_A0);
@@ -4440,6 +4680,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 			riscv_andi (code, RISCV_T0, RISCV_T0, ~(RISCV_FCLASS_INF | RISCV_FCLASS_NAN));
 			code =
 			    mono_riscv_emit_branch_exc (cfg, code, OP_RISCV_EXC_BEQ, RISCV_T0, RISCV_ZERO, "ArithmeticException");
+			if (ins->dreg != ins->sreg1) {
+				if (riscv_stdext_d)
+					riscv_fsgnj_d (code, ins->dreg, ins->sreg1, ins->sreg1);
+				else
+					riscv_fsgnj_s (code, ins->dreg, ins->sreg1, ins->sreg1);
+			}
 		}
 		case OP_BREAK:
 			/*
@@ -4450,6 +4696,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 			code = mono_riscv_emit_call (cfg, code, MONO_PATCH_INFO_JIT_ICALL_ID,
 			                             GUINT_TO_POINTER (MONO_JIT_ICALL_mono_break));
 			break;
+		case OP_ARGLIST:
+			g_assert (cfg->arch.cinfo);
+			riscv_addi (code, RISCV_T1, RISCV_FP, cfg->arch.cinfo->sig_cookie.offset);
+			code = mono_riscv_emit_store (code, RISCV_T1, ins->sreg1, 0, 0);
+			break;
 
 		case OP_NOP:
 		case OP_RELAXED_NOP:
@@ -4635,40 +4886,77 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 				riscv_fdiv_s (code, RISCV_ROUND_DY, ins->dreg, ins->sreg1, ins->sreg2);
 			}
 			break;
-		case OP_IDIV:
-		case OP_LDIV:
+		case OP_IREM:
+		case OP_IDIV: {
 			g_assert (riscv_stdext_m);
+			/* Check for zero */
 			code = mono_riscv_emit_branch_exc (cfg, code, OP_RISCV_EXC_BEQ, ins->sreg2, RISCV_ZERO,
 			                                   "DivideByZeroException");
-			riscv_div (code, ins->dreg, ins->sreg1, ins->sreg2);
-			break;
-		case OP_IDIV_UN:
+			/* Check for INT64_MIN/-1 */
 #ifdef TARGET_RISCV64
-			g_assert (riscv_stdext_m);
-			code = mono_riscv_emit_branch_exc (cfg, code, OP_RISCV_EXC_BEQ, ins->sreg2, RISCV_ZERO,
-			                                   "DivideByZeroException");
-			riscv_divuw (code, ins->dreg, ins->sreg1, ins->sreg2);
-			break;
+			code = mono_riscv_emit_imm (code, RISCV_T0, 0xffffffff80000000);
+#else
+			code = mono_riscv_emit_imm (code, RISCV_T0, 0x80000000);
 #endif
-		case OP_LDIV_UN:
+			// compare t0, rs1; ceq rd => xor t0, t0, rs1; sltiu t0, t0, 1
+			riscv_xor (code, RISCV_T0, RISCV_T0, ins->sreg1);
+			riscv_sltiu (code, RISCV_T1, RISCV_T0, 1);
+#ifdef TARGET_RISCV64
+			code = mono_riscv_emit_imm (code, RISCV_T0, 0xffffffffffffffff);
+#else
+			code = mono_riscv_emit_imm (code, RISCV_T0, 0xffffffff);
+#endif
+			riscv_xor (code, RISCV_T0, RISCV_T0, ins->sreg2);
+			riscv_sltiu (code, RISCV_T0, RISCV_T0, 1);
+			riscv_and (code, RISCV_T0, RISCV_T0, RISCV_T1);
+			riscv_addi (code, RISCV_T0, RISCV_T0, -1);
+			code = mono_riscv_emit_branch_exc (cfg, code, OP_RISCV_EXC_BEQ, RISCV_T0, RISCV_ZERO, "OverflowException");
+			if (ins->opcode == OP_IREM)
+#ifdef TARGET_RISCV64
+				riscv_remw (code, ins->dreg, ins->sreg1, ins->sreg2);
+#else
+				riscv_rem (code, ins->dreg, ins->sreg1, ins->sreg2);
+#endif
+			else
+				riscv_div (code, ins->dreg, ins->sreg1, ins->sreg2);
+			break;
+		}
+		case OP_LREM:
+		case OP_LDIV: {
 			g_assert (riscv_stdext_m);
+			/* Check for zero */
 			code = mono_riscv_emit_branch_exc (cfg, code, OP_RISCV_EXC_BEQ, ins->sreg2, RISCV_ZERO,
 			                                   "DivideByZeroException");
-			riscv_divu (code, ins->dreg, ins->sreg1, ins->sreg2);
+			/* Check for INT64_MIN/-1 */
+			code = mono_riscv_emit_imm (code, RISCV_T0, 0x8000000000000000);
+			// compare t0, rs1; ceq rd => xor t0, t0, rs1; sltiu t0, t0, 1
+			riscv_xor (code, RISCV_T0, RISCV_T0, ins->sreg1);
+			riscv_sltiu (code, RISCV_T1, RISCV_T0, 1);
+			code = mono_riscv_emit_imm (code, RISCV_T0, 0xffffffffffffffff);
+			riscv_xor (code, RISCV_T0, RISCV_T0, ins->sreg2);
+			riscv_sltiu (code, RISCV_T0, RISCV_T0, 1);
+			riscv_and (code, RISCV_T0, RISCV_T0, RISCV_T1);
+			riscv_addi (code, RISCV_T0, RISCV_T0, -1);
+			code = mono_riscv_emit_branch_exc (cfg, code, OP_RISCV_EXC_BEQ, RISCV_T0, RISCV_ZERO, "OverflowException");
+			if (ins->opcode == OP_LREM)
+				riscv_rem (code, ins->dreg, ins->sreg1, ins->sreg2);
+			else
+				riscv_div (code, ins->dreg, ins->sreg1, ins->sreg2);
 			break;
-		case OP_IREM:
+		}
+		case OP_IDIV_UN:
 #ifdef TARGET_RISCV64
 			g_assert (riscv_stdext_m);
 			code = mono_riscv_emit_branch_exc (cfg, code, OP_RISCV_EXC_BEQ, ins->sreg2, RISCV_ZERO,
 			                                   "DivideByZeroException");
-			riscv_remw (code, ins->dreg, ins->sreg1, ins->sreg2);
+			riscv_divuw (code, ins->dreg, ins->sreg1, ins->sreg2);
 			break;
 #endif
-		case OP_LREM:
+		case OP_LDIV_UN:
 			g_assert (riscv_stdext_m);
 			code = mono_riscv_emit_branch_exc (cfg, code, OP_RISCV_EXC_BEQ, ins->sreg2, RISCV_ZERO,
 			                                   "DivideByZeroException");
-			riscv_rem (code, ins->dreg, ins->sreg1, ins->sreg2);
+			riscv_divu (code, ins->dreg, ins->sreg1, ins->sreg2);
 			break;
 		case OP_IREM_UN:
 #ifdef TARGET_RISCV64
@@ -4787,40 +5075,67 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
 		/* Atomic */
 		case OP_MEMORY_BARRIER:
-			riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_MEM);
-			break;
+			riscv_fence (code, mono_arch_get_memory_ordering (ins->backend.memory_barrier_kind),
+			             mono_arch_get_memory_ordering (ins->backend.memory_barrier_kind));
+			break;
+		/**
+		 * OP_ATOMIC_ADD_I4 rd, rs1, rs2
+		 * this instriuction increase the value of address rs1 by rs2
+		 * and store the **new** value to rd.
+		 * But in RISC-V amoadd rd, rs2(rs1) increase the value of address rs1 by rs2
+		 * and store the **old** value to rd,  store the result value to address rs1.
+		 * So we need more add rd, rd, rs2 to fix the rd as the **new** value.
+		 */
 		case OP_ATOMIC_ADD_I4:
-			riscv_amoadd_w (code, RISCV_ORDER_ALL, ins->dreg, ins->sreg2, ins->sreg1);
+			riscv_amoadd_w (code, RISCV_ORDER_ALL, RISCV_T0, ins->sreg2, ins->sreg1);
+			riscv_addw (code, ins->dreg, RISCV_T0, ins->sreg2);
 			break;
 		case OP_ATOMIC_ADD_I8:
-			riscv_amoadd_d (code, RISCV_ORDER_ALL, ins->dreg, ins->sreg2, ins->sreg1);
+			riscv_amoadd_d (code, RISCV_ORDER_ALL, RISCV_T0, ins->sreg2, ins->sreg1);
+			riscv_add (code, ins->dreg, RISCV_T0, ins->sreg2);
 			break;
-		case OP_ATOMIC_LOAD_I1:
-		case OP_ATOMIC_LOAD_U1: {
+		case OP_ATOMIC_LOAD_I1: {
 			riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_MEM);
 			code = mono_riscv_emit_load (code, ins->dreg, ins->sreg1, ins->inst_offset, 1);
 			riscv_fence (code, RISCV_FENCE_R, RISCV_FENCE_MEM);
 			break;
 		}
-		case OP_ATOMIC_LOAD_U2:
+		case OP_ATOMIC_LOAD_U1: {
+			riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_MEM);
+			code = mono_riscv_emit_loadu (code, ins->dreg, ins->sreg1, ins->inst_offset, 1);
+			riscv_fence (code, RISCV_FENCE_R, RISCV_FENCE_MEM);
+			break;
+		}
+		case OP_ATOMIC_LOAD_U2: {
+			riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_MEM);
+			code = mono_riscv_emit_loadu (code, ins->dreg, ins->sreg1, ins->inst_offset, 2);
+			riscv_fence (code, RISCV_FENCE_R, RISCV_FENCE_MEM);
+			break;
+		}
 		case OP_ATOMIC_LOAD_I2: {
 			riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_MEM);
 			code = mono_riscv_emit_load (code, ins->dreg, ins->sreg1, ins->inst_offset, 2);
 			riscv_fence (code, RISCV_FENCE_R, RISCV_FENCE_MEM);
+			break;
 		}
-		case OP_ATOMIC_LOAD_I4:
-		case OP_ATOMIC_LOAD_U4: {
+		case OP_ATOMIC_LOAD_I4: {
 			riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_MEM);
 			code = mono_riscv_emit_load (code, ins->dreg, ins->sreg1, ins->inst_offset, 4);
 			riscv_fence (code, RISCV_FENCE_R, RISCV_FENCE_MEM);
 			break;
 		}
+		case OP_ATOMIC_LOAD_U4: {
+			riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_MEM);
+			code = mono_riscv_emit_loadu (code, ins->dreg, ins->sreg1, ins->inst_offset, 4);
+			riscv_fence (code, RISCV_FENCE_R, RISCV_FENCE_MEM);
+			break;
+		}
 		case OP_ATOMIC_STORE_I1:
 		case OP_ATOMIC_STORE_U1: {
 			riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_W);
 			code = mono_riscv_emit_store (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset, 1);
 			if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
-				riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_MEM);
+				riscv_fence (code, RISCV_FENCE_ALL, RISCV_FENCE_ALL);
 			break;
 		}
 		case OP_ATOMIC_STORE_I2:
@@ -4828,7 +5143,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 			riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_W);
 			code = mono_riscv_emit_store (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset, 2);
 			if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
-				riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_MEM);
+				riscv_fence (code, RISCV_FENCE_ALL, RISCV_FENCE_ALL);
 			break;
 		}
 		case OP_ATOMIC_STORE_I4:
@@ -4836,14 +5151,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 			riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_W);
 			code = mono_riscv_emit_store (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset, 4);
 			if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
-				riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_MEM);
+				riscv_fence (code, RISCV_FENCE_ALL, RISCV_FENCE_ALL);
 			break;
 		}
 		case OP_ATOMIC_CAS_I4: {
 			g_assert (riscv_stdext_a);
 			/**
 			 * loop_start:
-			 * 	lr.w	t0, rs1
+			 * 	lr.w.aqrl	t0, rs1
 			 * 	bne		t0, rs3, loop_end
 			 * 	sc.w.rl t1, rs2, rs1
 			 * 	bnez t1, loop_start
@@ -4855,7 +5170,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 			guint8 *loop_start, *branch_label;
 			/* sreg2 is the value, sreg3 is the comparand */
 			loop_start = code;
-			riscv_lr_w (code, RISCV_ORDER_NONE, RISCV_T0, ins->sreg1);
+			riscv_lr_w (code, RISCV_ORDER_ALL, RISCV_T0, ins->sreg1);
 			branch_label = code;
 			riscv_bne (code, RISCV_T0, ins->sreg3, 0);
 			riscv_sc_w (code, RISCV_ORDER_RL, RISCV_T1, ins->sreg2, ins->sreg1);
@@ -4878,7 +5193,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 			riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_W);
 			code = mono_riscv_emit_store (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset, 8);
 			if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
-				riscv_fence (code, RISCV_FENCE_MEM, RISCV_FENCE_MEM);
+				riscv_fence (code, RISCV_FENCE_ALL, RISCV_FENCE_ALL);
 			break;
 		}
 		case OP_ATOMIC_LOAD_I8:
@@ -4894,7 +5209,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 			guint8 *loop_start, *branch_label;
 			/* sreg2 is the value, sreg3 is the comparand */
 			loop_start = code;
-			riscv_lr_d (code, RISCV_ORDER_NONE, RISCV_T0, ins->sreg1);
+			riscv_lr_d (code, RISCV_ORDER_ALL, RISCV_T0, ins->sreg1);
 			branch_label = code;
 			riscv_bne (code, RISCV_T0, ins->sreg3, 0);
 			riscv_sc_d (code, RISCV_ORDER_RL, RISCV_T1, ins->sreg2, ins->sreg1);
@@ -4988,7 +5303,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 		case OP_FCONV_TO_R4:
 		case OP_RISCV_SETFREG_R4: {
 			g_assert (riscv_stdext_d);
-			riscv_fcvt_s_d (code, RISCV_ROUND_TZ, ins->dreg, ins->sreg1);
+			riscv_fcvt_s_d (code, RISCV_ROUND_DY, ins->dreg, ins->sreg1);
 			break;
 		}
 		case OP_RDIV: {
@@ -5168,8 +5483,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 		case OP_FCALL_MEMBASE:
 		case OP_VCALL2_MEMBASE:
 		case OP_VOIDCALL_MEMBASE:
-			code = mono_riscv_emit_load (code, RISCV_T0, ins->inst_basereg, ins->inst_offset, 0);
-			riscv_jalr (code, RISCV_RA, RISCV_T0, 0);
+			code = mono_riscv_emit_load (code, RISCV_T1, ins->inst_basereg, ins->inst_offset, 0);
+			riscv_jalr (code, RISCV_RA, RISCV_T1, 0);
 			code = emit_move_return_value (cfg, code, ins);
 			break;
 
diff --git a/src/mono/mono/mini/mini-riscv.h b/src/mono/mono/mini/mini-riscv.h
index 36b092c122bb..d19fc88da5be 100644
--- a/src/mono/mono/mini/mini-riscv.h
+++ b/src/mono/mono/mini/mini-riscv.h
@@ -236,12 +236,15 @@ typedef enum {
 	ArgNone, // only in void return type
 	ArgInIReg = 0x01,
 	ArgInFReg,
+	ArgR4InIReg,
+	ArgR8InIReg,
 #ifdef TARGET_RISCV64
 	ArgInFRegR4,
 #endif
 	ArgOnStack,
 	ArgOnStackR4,
 	ArgOnStackR8,
+	ArgHFA,
 
 	/*
 	 * Vtype passed in consecutive int registers.
@@ -258,11 +261,18 @@ typedef struct {
 	/* ArgVtypeInIRegs */
 	guint8 reg;
 	int size;
-	guint8 is_regpair;
+	/* ArgVtypeInIRegs/ArgHFA */
+	guint8 nregs;
 	/* ArgOnStack */
 	int slot_size;
 	gint32 offset;
 	guint8 is_signed : 1;
+	/* ArgHFA */
+	int esize;
+	/* The offsets of the float values inside the arg */
+	guint16 foffsets [4];
+	int nfregs_to_skip;
+	gboolean hfa;
 } ArgInfo;
 
 struct CallInfo {
diff --git a/src/mono/mono/mini/mini-runtime.c b/src/mono/mono/mini/mini-runtime.c
index 82a9a5a0a9ef..90fe8ddc799e 100644
--- a/src/mono/mono/mini/mini-runtime.c
+++ b/src/mono/mono/mini/mini-runtime.c
@@ -404,14 +404,17 @@ void *(mono_global_codeman_reserve) (int size)
 			global_codeman = mono_code_manager_new ();
 		else
 			global_codeman = mono_code_manager_new_aot ();
-		return mono_code_manager_reserve (global_codeman, size);
+		ptr = mono_code_manager_reserve (global_codeman, size);
 	}
 	else {
 		mono_jit_lock ();
 		ptr = mono_code_manager_reserve (global_codeman, size);
 		mono_jit_unlock ();
-		return ptr;
 	}
+
+	/* Virtually all call sites for this API assume it can't return NULL. */
+	g_assert (ptr);
+	return ptr;
 }
 
 /* The callback shouldn't take any locks */
@@ -2167,7 +2170,7 @@ mono_emit_jit_dump (MonoJitInfo *jinfo, gpointer code)
 	int i;
 
 	memset (&rec, 0, sizeof (rec));
-	
+
 	// populating info relating debug methods
 	dmji = mono_debug_find_method (jinfo->d.method, NULL);
 
@@ -2561,7 +2564,18 @@ compile_special (MonoMethod *method, MonoError *error)
 		} else {
 			MonoMethod *nm = mono_marshal_get_native_wrapper (method, TRUE, mono_aot_only);
 			compiled_method = mono_jit_compile_method_jit_only (nm, error);
-			return_val_if_nok (error, NULL);
+			if (!compiled_method && mono_aot_only && mono_use_interpreter) {
+				// We failed to find wrapper in aot images, try interpreting it instead
+				mono_error_cleanup (error);
+				error_init_reuse (error);
+				nm = mono_marshal_get_native_wrapper (method, TRUE, FALSE);
+				compiled_method = mono_jit_compile_method (nm, error);
+				return_val_if_nok (error, NULL);
+				code = mono_get_addr_from_ftnptr (compiled_method);
+				return code;
+			} else {
+				return_val_if_nok (error, NULL);
+			}
 		}
 
 		code = mono_get_addr_from_ftnptr (compiled_method);
@@ -4540,20 +4554,20 @@ mini_llvm_init (void)
 }
 
 #ifdef ENSURE_PRIMARY_STACK_SIZE
-/*++ 
- Function: 
-   EnsureStackSize 
-  
- Abstract: 
-   This fixes a problem on MUSL where the initial stack size reported by the 
-   pthread_attr_getstack is about 128kB, but this limit is not fixed and 
-   the stack can grow dynamically. The problem is that it makes the 
-   functions ReflectionInvocation::[Try]EnsureSufficientExecutionStack 
-   to fail for real life scenarios like e.g. compilation of corefx. 
-   Since there is no real fixed limit for the stack, the code below 
-   ensures moving the stack limit to a value that makes reasonable 
-   real life scenarios work. 
-  
+/*++
+ Function:
+   EnsureStackSize
+
+ Abstract:
+   This fixes a problem on MUSL where the initial stack size reported by the
+   pthread_attr_getstack is about 128kB, but this limit is not fixed and
+   the stack can grow dynamically. The problem is that it makes the
+   functions ReflectionInvocation::[Try]EnsureSufficientExecutionStack
+   to fail for real life scenarios like e.g. compilation of corefx.
+   Since there is no real fixed limit for the stack, the code below
+   ensures moving the stack limit to a value that makes reasonable
+   real life scenarios work.
+
  --*/
 static MONO_NO_OPTIMIZATION MONO_NEVER_INLINE void
 ensure_stack_size (size_t size)
@@ -4737,7 +4751,7 @@ mini_init (const char *filename)
 	mono_w32handle_init ();
 #endif
 
-#ifdef ENSURE_PRIMARY_STACK_SIZE 
+#ifdef ENSURE_PRIMARY_STACK_SIZE
 	ensure_stack_size (5 * 1024 * 1024);
 #endif // ENSURE_PRIMARY_STACK_SIZE
 
diff --git a/src/mono/mono/mini/mini-s390x.c b/src/mono/mono/mini/mini-s390x.c
index 5c13a13420f7..a5f228ea20f1 100644
--- a/src/mono/mono/mini/mini-s390x.c
+++ b/src/mono/mono/mini/mini-s390x.c
@@ -3400,13 +3400,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 			break;
 		case OP_AOTCONST: {
 			mono_add_patch_info (cfg, code - cfg->native_code,
-				(MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
+				(MonoJumpInfoType)(gsize)ins->inst_i1, ins->inst_p0);
 			S390_LOAD_TEMPLATE (code, ins->dreg);
 		}
 			break;
 		case OP_JUMP_TABLE: {
 			mono_add_patch_info (cfg, code - cfg->native_code,
-				(MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
+				(MonoJumpInfoType)(gsize)ins->inst_i1, ins->inst_p0);
 			S390_LOAD_TEMPLATE (code, ins->dreg);
 		}
 			break;
diff --git a/src/mono/mono/mini/mini-wasm.c b/src/mono/mono/mini/mini-wasm.c
index af943e24adec..db7c8b2de39a 100644
--- a/src/mono/mono/mini/mini-wasm.c
+++ b/src/mono/mono/mini/mini-wasm.c
@@ -75,17 +75,23 @@ get_storage (MonoType *type, MonoType **etype, gboolean is_return)
 	case MONO_TYPE_R8:
 		return ArgOnStack;
 
-	case MONO_TYPE_GENERICINST:
+	case MONO_TYPE_GENERICINST: {
 		if (!mono_type_generic_inst_is_valuetype (type))
 			return ArgOnStack;
 
 		if (mini_is_gsharedvt_variable_type (type))
 			return ArgGsharedVTOnStack;
-		/* fall through */
+
+		if (mini_wasm_is_scalar_vtype (type, etype))
+			return ArgVtypeAsScalar;
+
+		return is_return ? ArgValuetypeAddrInIReg : ArgValuetypeAddrOnStack;
+	}
 	case MONO_TYPE_VALUETYPE:
 	case MONO_TYPE_TYPEDBYREF: {
 		if (mini_wasm_is_scalar_vtype (type, etype))
 			return ArgVtypeAsScalar;
+
 		return is_return ? ArgValuetypeAddrInIReg : ArgValuetypeAddrOnStack;
 	}
 	case MONO_TYPE_VAR:
@@ -438,14 +444,17 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
 
 //functions exported to be used by JS
 G_BEGIN_DECLS
-EMSCRIPTEN_KEEPALIVE void mono_wasm_execute_timer (void);
 
 //JS functions imported that we use
+#ifdef DISABLE_THREADS
+EMSCRIPTEN_KEEPALIVE void mono_wasm_execute_timer (void);
+EMSCRIPTEN_KEEPALIVE void mono_background_exec (void);
 extern void mono_wasm_schedule_timer (int shortestDueTimeMs);
+#else
+extern void mono_target_thread_schedule_synchronization_context(MonoNativeThreadId target_thread);
+#endif // DISABLE_THREADS
 G_END_DECLS
 
-void mono_background_exec (void);
-
 #endif // HOST_BROWSER
 
 gpointer
@@ -582,6 +591,8 @@ mono_thread_state_init_from_handle (MonoThreadUnwindState *tctx, MonoThreadInfo
 	return FALSE;
 }
 
+#ifdef DISABLE_THREADS
+
 // this points to System.Threading.TimerQueue.TimerHandler C# method
 static void *timer_handler;
 
@@ -594,10 +605,11 @@ mono_wasm_execute_timer (void)
 	}
 
 	background_job_cb cb = timer_handler;
+	MONO_ENTER_GC_UNSAFE;
 	cb ();
+	MONO_EXIT_GC_UNSAFE;
 }
 
-#ifdef DISABLE_THREADS
 void
 mono_wasm_main_thread_schedule_timer (void *timerHandler, int shortestDueTimeMs)
 {
@@ -618,7 +630,7 @@ mono_arch_register_icall (void)
 	mono_add_internal_call_internal ("System.Threading.TimerQueue::MainThreadScheduleTimer", mono_wasm_main_thread_schedule_timer);
 	mono_add_internal_call_internal ("System.Threading.ThreadPool::MainThreadScheduleBackgroundJob", mono_main_thread_schedule_background_job);
 #else
-	mono_add_internal_call_internal ("System.Runtime.InteropServices.JavaScript.JSSynchronizationContext::TargetThreadScheduleBackgroundJob", mono_target_thread_schedule_background_job);
+	mono_add_internal_call_internal ("System.Runtime.InteropServices.JavaScript.JSSynchronizationContext::ScheduleSynchronizationContext", mono_target_thread_schedule_synchronization_context);
 #endif /* DISABLE_THREADS */
 #endif /* HOST_BROWSER */
 }
@@ -769,7 +781,12 @@ mini_wasm_is_scalar_vtype (MonoType *type, MonoType **etype)
 		if (nfields > 1)
 			return FALSE;
 		MonoType *t = mini_get_underlying_type (field->type);
-		if (MONO_TYPE_ISSTRUCT (t)) {
+		int align, field_size = mono_type_size (t, &align);
+		// inlinearray and fixed both work by having a single field that is bigger than its element type.
+		// we also don't want to scalarize a struct that has padding in its metadata, even if it would fit.
+		if (field_size != size) {
+			return FALSE;
+		} else if (MONO_TYPE_ISSTRUCT (t)) {
 			if (!mini_wasm_is_scalar_vtype (t, etype))
 				return FALSE;
 		} else if (!((MONO_TYPE_IS_PRIMITIVE (t) || MONO_TYPE_IS_REFERENCE (t) || MONO_TYPE_IS_POINTER (t)))) {
diff --git a/src/mono/mono/mini/mini-wasm.h b/src/mono/mono/mini/mini-wasm.h
index 77c7f3a78fa5..95bafda3336c 100644
--- a/src/mono/mono/mini/mini-wasm.h
+++ b/src/mono/mono/mini/mini-wasm.h
@@ -100,11 +100,18 @@ typedef struct {
 // sdks/wasm/driver.c is C and uses this
 G_EXTERN_C void mono_wasm_enable_debugging (int log_level);
 
+#ifdef HOST_BROWSER
+
+//JS functions imported that we use
 #ifdef DISABLE_THREADS
+void mono_wasm_execute_timer (void);
 void mono_wasm_main_thread_schedule_timer (void *timerHandler, int shortestDueTimeMs);
 #endif // DISABLE_THREADS
 
 void mono_wasm_print_stack_trace (void);
+#endif // HOST_BROWSER
+
+
 
 gboolean
 mini_wasm_is_scalar_vtype (MonoType *type, MonoType **etype);
diff --git a/src/mono/mono/mini/mini.c b/src/mono/mono/mini/mini.c
index eb174000eabe..92bf21887c59 100644
--- a/src/mono/mono/mini/mini.c
+++ b/src/mono/mono/mini/mini.c
@@ -3037,6 +3037,9 @@ is_simd_supported (MonoCompile *cfg)
 {
 #ifdef DISABLE_SIMD
     return FALSE;
+#endif
+#ifndef MONO_ARCH_SIMD_INTRINSICS
+	return FALSE;
 #endif
 	// FIXME: Clean this up
 #ifdef TARGET_WASM
@@ -4319,6 +4322,7 @@ mini_handle_call_res_devirt (MonoMethod *cmethod)
 
 		inst = mono_class_inflate_generic_class_checked (mono_class_get_iequatable_class (), &ctx, error);
 		mono_error_assert_ok (error);
+		g_assert (inst);
 
 		// EqualityComparer<T>.Default returns specific types depending on T
 		// FIXME: Special case more types: byte, string, nullable, enum ?
diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h
index 9982afc22e3f..781cb54b0492 100644
--- a/src/mono/mono/mini/mini.h
+++ b/src/mono/mono/mini/mini.h
@@ -2811,12 +2811,20 @@ void mono_cfg_add_try_hole (MonoCompile *cfg, MonoExceptionClause *clause, guint
 void mono_cfg_set_exception (MonoCompile *cfg, MonoExceptionType type);
 void mono_cfg_set_exception_invalid_program (MonoCompile *cfg, const char *msg);
 
+#if defined(HOST_WASM)
+#define MONO_TIME_TRACK(a, phase) \
+	{ \
+		(phase) ; \
+		a = 0; \
+	}
+#else
 #define MONO_TIME_TRACK(a, phase) \
 	{ \
 		gint64 start = mono_time_track_start (); \
 		(phase) ; \
 		mono_time_track_end (&(a), start); \
 	}
+#endif // HOST_WASM
 
 gint64 mono_time_track_start (void);
 void mono_time_track_end (gint64 *time, gint64 start);
@@ -2962,7 +2970,7 @@ MonoInst*   mono_emit_common_intrinsics (MonoCompile *cfg, MonoMethod *cmethod,
 MonoInst*   mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args);
 MonoInst*   mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr);
 void        mono_simd_intrinsics_init (void);
-gboolean    mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod *cmethod);
+gboolean    mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoCompile *cfg, MonoMethod *cmethod);
 
 MonoMethod*
 mini_method_to_shared (MonoMethod *method); // null if not shared
diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c
index 73e5d88f3b50..e003d1247892 100644
--- a/src/mono/mono/mini/simd-intrinsics.c
+++ b/src/mono/mono/mini/simd-intrinsics.c
@@ -19,8 +19,6 @@
 #include <mono/metadata/reflection-internals.h>
 #include <mono/utils/mono-hwcap.h>
 
-#if defined (MONO_ARCH_SIMD_INTRINSICS)
-
 #if defined(DISABLE_JIT)
 
 void
@@ -176,7 +174,7 @@ has_intrinsic_cattr (MonoMethod *method)
 }
 
 static gboolean
-is_SIMD_feature_supported(MonoCompile *cfg, MonoCPUFeatures feature) 
+is_SIMD_feature_supported(MonoCompile *cfg, MonoCPUFeatures feature)
 {
 	return mini_get_cpu_features (cfg) & feature;
 }
@@ -319,7 +317,7 @@ emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSigna
 	if (id == SN_BitwiseAnd || id == SN_BitwiseOr || id == SN_Xor ||
 		id == SN_op_BitwiseAnd || id == SN_op_BitwiseOr || id == SN_op_ExclusiveOr) {
 		op = OP_XBINOP_FORCEINT;
-	
+
 		switch (id) {
 		case SN_BitwiseAnd:
 		case SN_op_BitwiseAnd:
@@ -421,7 +419,7 @@ emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSigna
 				if (!COMPILE_LLVM (cfg))
 					return NULL;
 #endif
-				if (fsig->params [1]->type != MONO_TYPE_GENERICINST) 
+				if (fsig->params [1]->type != MONO_TYPE_GENERICINST)
 					return handle_mul_div_by_scalar (cfg, klass, arg_type, args [1]->dreg, args [0]->dreg, OP_IMUL);
 				else if (fsig->params [0]->type != MONO_TYPE_GENERICINST)
 					return handle_mul_div_by_scalar (cfg, klass, arg_type, args [0]->dreg, args [1]->dreg, OP_IMUL);
@@ -566,7 +564,7 @@ emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum element_type, Mono
 	} else {
 		return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg);
 	}
-#else	
+#else
 	MonoInst *ins = emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg);
 	if (!COMPILE_LLVM (cfg))
 		ins->inst_c1 = mono_class_get_context (klass)->class_inst->type_argv [0]->type;
@@ -634,6 +632,7 @@ emit_xconst_v128 (MonoCompile *cfg, MonoClass *klass, guint8 value[16])
 	ins->type = STACK_VTYPE;
 	ins->dreg = alloc_xreg (cfg);
 	ins->inst_p0 = mono_mem_manager_alloc (cfg->mem_manager, size);
+	ins->klass = klass;
 	MONO_ADD_INS (cfg->cbb, ins);
 
 	memcpy (ins->inst_p0, &value[0], size);
@@ -650,12 +649,12 @@ emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_t
 	MonoClass *vector_class = mono_class_from_mono_type_internal (vector_type);
 	int vector_size = mono_class_value_size (vector_class, NULL);
 	int element_size;
-	
+
 	guint32 nelems;
  	mini_get_simd_type_info (vector_class, &nelems);
 
 	// Override nelems for Vector3, with actual number of elements, instead of treating it as a 4-element vector (three elements + zero).
-	const char *klass_name = m_class_get_name (vector_class); 
+	const char *klass_name = m_class_get_name (vector_class);
 	if (!strcmp (klass_name, "Vector3"))
 		nelems = 3;
 
@@ -727,7 +726,7 @@ emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_t
 	case MONO_TYPE_U1:
 		// byte, sbyte not supported yet
 		return NULL;
-	case MONO_TYPE_I2: 
+	case MONO_TYPE_I2:
 	case MONO_TYPE_U2:
 		instc0 = INTRINS_SSE_PHADDW;
 		break;
@@ -759,12 +758,12 @@ emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_t
 	default: {
 		return NULL;
 	}
-	}	
-	
+	}
+
 	// Check if necessary SIMD intrinsics are supported on the current machine
 	MonoCPUFeatures feature = type_enum_is_float (element_type) ? MONO_CPU_X86_SSE3 : MONO_CPU_X86_SSSE3;
 	if (!is_SIMD_feature_supported (cfg, feature))
-		return NULL;	
+		return NULL;
 
 	int vector_size = mono_class_value_size (vector_class, NULL);
 	MonoType *etype = mono_class_get_context (vector_class)->class_inst->type_argv [0];
@@ -1063,7 +1062,7 @@ emit_hardware_intrinsics (
 
 static MonoInst*
 emit_vector_insert_element (
-	MonoCompile* cfg, MonoClass* vklass, MonoInst* ins, MonoTypeEnum type, MonoInst* element, 
+	MonoCompile* cfg, MonoClass* vklass, MonoInst* ins, MonoTypeEnum type, MonoInst* element,
 	int index, gboolean is_zero_inited)
 {
 	int op = type_to_insert_op (type);
@@ -1071,7 +1070,7 @@ emit_vector_insert_element (
 	if (is_zero_inited && is_zero_const (element)) {
 			// element already set to zero
 #ifdef TARGET_ARM64
-	} else if (!COMPILE_LLVM (cfg) && element->opcode == type_to_extract_op (type) && 
+	} else if (!COMPILE_LLVM (cfg) && element->opcode == type_to_extract_op (type) &&
 		(type == MONO_TYPE_R4 || type == MONO_TYPE_R8)) {
 		// OP_INSERT_Ix inserts from GP reg, not SIMD. Cannot optimize for int types.
 		ins = emit_simd_ins (cfg, vklass, op, ins->dreg, element->sreg1);
@@ -1100,8 +1099,6 @@ emit_vector_create_elementwise (
 	return ins;
 }
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_WASM)
-
 static int
 type_to_xinsert_op (MonoTypeEnum type)
 {
@@ -1178,20 +1175,6 @@ create_class_instance (const char* name_space, const char *name, MonoType *param
 	return ivector_inst;
 }
 
-static gboolean
-is_supported_vector_primitive_type (MonoType *type)
-{
-	gboolean constrained_generic_param = (type->type == MONO_TYPE_VAR || type->type == MONO_TYPE_MVAR);
-
-	if (constrained_generic_param && type->data.generic_param->gshared_constraint && MONO_TYPE_IS_VECTOR_PRIMITIVE (type->data.generic_param->gshared_constraint))
-		return TRUE;
-
-	if (MONO_TYPE_IS_VECTOR_PRIMITIVE (type))
-		return TRUE;
-
-	return FALSE;
-}
-
 static guint16 sri_vector_methods [] = {
 	SN_Abs,
 	SN_Add,
@@ -1408,6 +1391,76 @@ emit_msb_shift_vector_constant (MonoCompile *cfg, MonoClass *arg_class, MonoType
 }
 #endif
 
+static MonoInst*
+emit_dot (MonoCompile *cfg, MonoClass *klass, MonoType *vector_type, MonoTypeEnum arg0_type, int sreg1, int sreg2) {
+	if (!is_element_type_primitive (vector_type))
+		return NULL;
+#if defined(TARGET_WASM)
+	if (!COMPILE_LLVM (cfg) && (arg0_type == MONO_TYPE_I8 || arg0_type == MONO_TYPE_U8))
+		return NULL;
+#elif defined(TARGET_ARM64)
+	if (!COMPILE_LLVM (cfg) && (arg0_type == MONO_TYPE_I8 || arg0_type == MONO_TYPE_U8 || arg0_type == MONO_TYPE_I || arg0_type == MONO_TYPE_U))
+		return NULL;
+#endif
+
+#if defined(TARGET_ARM64) || defined(TARGET_WASM)
+	MonoInst *pairwise_multiply = emit_simd_ins (cfg, klass, OP_XBINOP, sreg1, sreg2);
+	pairwise_multiply->inst_c0 = type_enum_is_float (arg0_type) ? OP_FMUL : OP_IMUL;
+	pairwise_multiply->inst_c1 = arg0_type;
+	return emit_sum_vector (cfg, vector_type, arg0_type, pairwise_multiply);
+#elif defined(TARGET_AMD64)
+	int instc =-1;
+	if (type_enum_is_float (arg0_type)) {
+		if (is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSE41)) {
+			int mask_val = -1;
+			switch (arg0_type) {
+			case MONO_TYPE_R4:
+				instc = COMPILE_LLVM (cfg) ? OP_SSE41_DPPS : OP_SSE41_DPPS_IMM;
+				mask_val = 0xf1; // 0xf1 ... 0b11110001
+				break;
+			case MONO_TYPE_R8:
+				instc = COMPILE_LLVM (cfg) ? OP_SSE41_DPPD : OP_SSE41_DPPD_IMM;
+				mask_val = 0x31; // 0x31 ... 0b00110001
+				break;
+			default:
+				return NULL;
+			}
+
+			MonoInst *dot;
+			if (COMPILE_LLVM (cfg)) {
+				int mask_reg = alloc_ireg (cfg);
+				MONO_EMIT_NEW_ICONST (cfg, mask_reg, mask_val);
+
+				dot = emit_simd_ins (cfg, klass, instc, sreg1, sreg2);
+				dot->sreg3 = mask_reg;
+			} else {
+				dot = emit_simd_ins (cfg, klass, instc, sreg1, sreg2);
+				dot->inst_c0 = mask_val;
+			}
+			return extract_first_element (cfg, klass, arg0_type, dot->dreg);
+		} else {
+			instc = OP_FMUL;
+		}
+	} else {
+		if (arg0_type == MONO_TYPE_I1 || arg0_type == MONO_TYPE_U1)
+			return NULL; 	// We don't support sum vector for byte, sbyte types yet
+
+		// FIXME:
+		if (!COMPILE_LLVM (cfg))
+			return NULL;
+
+		instc = OP_IMUL;
+	}
+	MonoInst *pairwise_multiply = emit_simd_ins (cfg, klass, OP_XBINOP, sreg1, sreg2);
+	pairwise_multiply->inst_c0 = type_enum_is_float (arg0_type) ? OP_FMUL : OP_IMUL;
+	pairwise_multiply->inst_c1 = arg0_type;
+
+	return emit_sum_vector (cfg, vector_type, arg0_type, pairwise_multiply);
+#else
+	return NULL;
+#endif
+}
+
 /*
  * Emit intrinsics in System.Numerics.Vector and System.Runtime.Intrinsics.Vector64/128/256/512.
  * If the intrinsic is not supported for some reasons, return NULL, and fall back to the c#
@@ -1415,7 +1468,7 @@ emit_msb_shift_vector_constant (MonoCompile *cfg, MonoClass *arg_class, MonoType
  */
 static MonoInst*
 emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
-{	
+{
 	int id = lookup_intrins (sri_vector_methods, sizeof (sri_vector_methods), cmethod);
 	if (id == -1) {
 		//check_no_intrinsic_cattr (cmethod);
@@ -1439,6 +1492,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 	if (vector_size == 256 || vector_size == 512)
 		return NULL;
 
+	if (!(cfg->opt & MONO_OPT_SIMD))
+		return NULL;
+
 // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64
 #ifdef TARGET_ARM64
 	if (!COMPILE_LLVM (cfg)) {
@@ -1501,9 +1557,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 		} else {
 			if (COMPILE_LLVM (cfg))
 				return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args);
-			
+
 			// SSSE3 does not support i64
-			if (is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSSE3) && 
+			if (is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSSE3) &&
 				!(arg0_type == MONO_TYPE_I8 || (TARGET_SIZEOF_VOID_P == 8 && arg0_type == MONO_TYPE_I)))
 				return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args);
 
@@ -1511,7 +1567,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 			MonoInst *neg = emit_simd_ins (cfg, klass, OP_XBINOP, zero->dreg, args [0]->dreg);
 			neg->inst_c0 = OP_ISUB;
 			neg->inst_c1 = arg0_type;
-			
+
 			MonoInst *ins = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, neg->dreg);
 			ins->inst_c0 = OP_IMAX;
 			ins->inst_c1 = arg0_type;
@@ -1540,7 +1596,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 			return NULL;
 		return emit_simd_ins_for_binary_op (cfg, klass, fsig, args, arg0_type, id);
 	case SN_AndNot: {
-		if (!is_element_type_primitive (fsig->params [0])) 
+		if (!is_element_type_primitive (fsig->params [0]))
 			return NULL;
 #ifdef TARGET_ARM64
 		return emit_simd_ins_for_sig (cfg, klass, OP_ARM64_BIC, -1, arg0_type, fsig, args);
@@ -1615,8 +1671,8 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 			return NULL;
 #if defined(TARGET_ARM64)
 		if (!COMPILE_LLVM (cfg)) {
-			return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP, 
-				arg0_type == MONO_TYPE_I8 ? OP_CVT_SI_FP : OP_CVT_UI_FP, 
+			return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP,
+				arg0_type == MONO_TYPE_I8 ? OP_CVT_SI_FP : OP_CVT_UI_FP,
 				MONO_TYPE_R8, fsig, args);
 		}
 #endif
@@ -1640,15 +1696,15 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 		return NULL;
 #endif
 	}
-	case SN_ConvertToInt32: 
+	case SN_ConvertToInt32:
 	case SN_ConvertToUInt32: {
 		if (arg0_type != MONO_TYPE_R4)
 			return NULL;
 #if defined(TARGET_ARM64)
 		if (!COMPILE_LLVM (cfg)) {
-			return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP, 
-				id == SN_ConvertToInt32 ? OP_CVT_FP_SI : OP_CVT_FP_UI, 
-				id == SN_ConvertToInt32 ? MONO_TYPE_I4 : MONO_TYPE_U4, 
+			return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP,
+				id == SN_ConvertToInt32 ? OP_CVT_FP_SI : OP_CVT_FP_UI,
+				id == SN_ConvertToInt32 ? MONO_TYPE_I4 : MONO_TYPE_U4,
 				fsig, args);
 		}
 #endif
@@ -1672,9 +1728,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 			return NULL;
 #if defined(TARGET_ARM64)
 		if (!COMPILE_LLVM (cfg)) {
-			return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP, 
-				id == SN_ConvertToInt64 ? OP_CVT_FP_SI : OP_CVT_FP_UI, 
-				id == SN_ConvertToInt64 ? MONO_TYPE_I8 : MONO_TYPE_U8, 
+			return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP,
+				id == SN_ConvertToInt64 ? OP_CVT_FP_SI : OP_CVT_FP_UI,
+				id == SN_ConvertToInt64 ? MONO_TYPE_I8 : MONO_TYPE_U8,
 				fsig, args);
 		}
 #endif
@@ -1703,8 +1759,8 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 			return NULL;
 #if defined(TARGET_ARM64)
 		if (!COMPILE_LLVM (cfg)) {
-			return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP, 
-				arg0_type == MONO_TYPE_I4 ? OP_CVT_SI_FP : OP_CVT_UI_FP, 
+			return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP,
+				arg0_type == MONO_TYPE_I4 ? OP_CVT_SI_FP : OP_CVT_UI_FP,
 				MONO_TYPE_R4, fsig, args);
 		}
 #endif
@@ -1783,70 +1839,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 		}
 	}
 	case SN_Dot: {
-		if (!is_element_type_primitive (fsig->params [0]))
-			return NULL;
-#if defined(TARGET_WASM)
-		if (!COMPILE_LLVM (cfg) && (arg0_type == MONO_TYPE_I8 || arg0_type == MONO_TYPE_U8))
-			return NULL;
-#elif defined(TARGET_ARM64)
-		if (!COMPILE_LLVM (cfg) && (arg0_type == MONO_TYPE_I8 || arg0_type == MONO_TYPE_U8 || arg0_type == MONO_TYPE_I || arg0_type == MONO_TYPE_U))
-			return NULL;
-#endif
-
-#if defined(TARGET_ARM64) || defined(TARGET_WASM)
-		int instc0 = type_enum_is_float (arg0_type) ? OP_FMUL : OP_IMUL;
-		MonoInst *pairwise_multiply = emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, instc0, arg0_type, fsig, args);
-		return emit_sum_vector (cfg, fsig->params [0], arg0_type, pairwise_multiply);
-#elif defined(TARGET_AMD64)
-		int instc =-1;
-		if (type_enum_is_float (arg0_type)) {
-			if (is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSE41)) {
-				int mask_val = -1;
-				switch (arg0_type) {
-				case MONO_TYPE_R4:
-					instc = COMPILE_LLVM (cfg) ? OP_SSE41_DPPS : OP_SSE41_DPPS_IMM;
-					mask_val = 0xf1; // 0xf1 ... 0b11110001
-					break;
-				case MONO_TYPE_R8:
-					instc = COMPILE_LLVM (cfg) ? OP_SSE41_DPPD : OP_SSE41_DPPD_IMM;
-					mask_val = 0x31; // 0x31 ... 0b00110001
-					break;
-				default:
-					return NULL;
-				}	
-
-				MonoInst *dot;
-				if (COMPILE_LLVM (cfg)) {
-					int mask_reg = alloc_ireg (cfg);
-					MONO_EMIT_NEW_ICONST (cfg, mask_reg, mask_val);
-
-					dot = emit_simd_ins (cfg, klass, instc, args [0]->dreg, args [1]->dreg);
-					dot->sreg3 = mask_reg;
-				} else {
-					dot = emit_simd_ins (cfg, klass, instc, args [0]->dreg, args [1]->dreg);
-					dot->inst_c0 = mask_val;
-				}
-
-				return extract_first_element (cfg, klass, arg0_type, dot->dreg);
-			} else {
-				instc = OP_FMUL;
-			}	
-		} else {
-			if (arg0_type == MONO_TYPE_I1 || arg0_type == MONO_TYPE_U1)
-				return NULL; 	// We don't support sum vector for byte, sbyte types yet
-
-			// FIXME:
-			if (!COMPILE_LLVM (cfg))
-				return NULL;
-
-			instc = OP_IMUL;
-		}
-		MonoInst *pairwise_multiply = emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, instc, arg0_type, fsig, args);
-
-		return emit_sum_vector (cfg, fsig->params [0], arg0_type, pairwise_multiply);
-#else
-		return NULL;
-#endif
+		return emit_dot (cfg, klass, fsig->params [0], arg0_type, args [0]->dreg, args [1]->dreg);
 	}
 	case SN_Equals:
 	case SN_EqualsAll:
@@ -1899,7 +1892,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 		} else {
 			arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
 		}
-		
+
 		// FIXME: Add support for Vector64 on arm64 https://github.com/dotnet/runtime/issues/90402
 		int size = mono_class_value_size (arg_class, NULL);
 		if (size != 16)
@@ -1923,10 +1916,10 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 
 			MonoInst* ext_low_vec = emit_simd_ins_for_sig (cfg, arg_class, OP_XLOWER, 8, arg0_type, fsig, &shift_res_vec);
 			MonoInst* sum_low_vec = emit_sum_vector (cfg, fsig->params [0], arg0_type, ext_low_vec);
-			
+
 			MonoInst* ext_high_vec = emit_simd_ins_for_sig (cfg, arg_class, OP_XUPPER, 8, arg0_type, fsig, &shift_res_vec);
-			MonoInst* sum_high_vec = emit_sum_vector (cfg, fsig->params [0], arg0_type, ext_high_vec);			
-			
+			MonoInst* sum_high_vec = emit_sum_vector (cfg, fsig->params [0], arg0_type, ext_high_vec);
+
 			MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHL_IMM, sum_high_vec->dreg, sum_high_vec->dreg, 8);
 			EMIT_NEW_BIALU (cfg, result_ins, OP_IOR, sum_high_vec->dreg, sum_high_vec->dreg, sum_low_vec->dreg);
 		} else {
@@ -1939,9 +1932,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 		switch (arg0_type) {
 			case MONO_TYPE_U2:
 			case MONO_TYPE_I2: {
-				if (!is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSSE3)) 
+				if (!is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSSE3))
 					return NULL;
-					
+
 				type = type_enum_is_unsigned (arg0_type) ? MONO_TYPE_U1 : MONO_TYPE_I1;
 				MonoClass* arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
 
@@ -2009,7 +2002,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 			if (index < 0 || index >= elems) {
 				MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
 				MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
-			} 
+			}
 
 			// Bounds check is elided if we know the index is safe.
 			int extract_op = type_to_extract_op (arg0_type);
@@ -2095,7 +2088,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 		case SN_GreaterThanAll:
 		case SN_GreaterThanOrEqualAll:
 		case SN_LessThanAll:
-		case SN_LessThanOrEqualAll: 
+		case SN_LessThanOrEqualAll:
 			is_all = TRUE;
 			break;
 		}
@@ -2257,7 +2250,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 		if (!is_element_type_primitive (fsig->params [0]))
 			return NULL;
 		return emit_simd_ins_for_unary_op (cfg, klass, fsig, args, arg0_type, id);
-	} 
+	}
 	case SN_Shuffle: {
 		if (!is_element_type_primitive (fsig->params [0]))
 			return NULL;
@@ -2317,7 +2310,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 	}
 	case SN_WithElement: {
 		int elems;
-		
+
 		if (!is_element_type_primitive (fsig->params [0]))
 			return NULL;
 
@@ -2345,7 +2338,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 			}
 
 			return emit_vector_insert_element (cfg, klass, args [0], arg0_type, args [2], index, FALSE);
-		} 
+		}
 
 		if (!COMPILE_LLVM (cfg) && fsig->params [0]->type != MONO_TYPE_GENERICINST)
 			return NULL;
@@ -2407,7 +2400,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 				subop = is_upper ? OP_ARM64_UXTL2 : OP_ARM64_UXTL;
 			else
 				subop = is_upper ? OP_ARM64_SXTL2 : OP_ARM64_SXTL;
-			
+
 			MonoInst* ins = emit_simd_ins (cfg, klass, OP_XUNOP, args [0]->dreg, -1);
 			ins->inst_c0 = subop;
 			ins->inst_c1 = arg0_type;
@@ -2491,12 +2484,6 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
 		g_free (name);
 	}
 
-	if (id == SN_get_IsSupported) {
-		MonoInst *ins;
-		EMIT_NEW_ICONST (cfg, ins, is_supported_vector_primitive_type (etype) ? 1 : 0);
-		return ins;
-	}
-
 	// Apart from filtering out non-primitive types this also filters out shared generic instance types like: T_BYTE which cannot be intrinsified
 	if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype)) {
 		// Happens often in gshared code
@@ -2540,6 +2527,9 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
 		return ins;
 	}
 
+	if (!(cfg->opt & MONO_OPT_SIMD))
+		return NULL;
+
 	/* Vector256/Vector512 */
 	if (size == 32 || size == 64)
 		return NULL;
@@ -2660,7 +2650,7 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
 			return NULL;
 		arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID;
 		return emit_simd_ins_for_binary_op (cfg, klass, fsig, args, arg0_type, id);
-		
+
 	}
 	case SN_op_Equality:
 	case SN_op_Inequality: {
@@ -2764,13 +2754,15 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
 		return NULL;
 #endif
 
+	if (!(cfg->opt & MONO_OPT_SIMD))
+		return NULL;
+
 	if (cfg->verbose_level > 1) {
 		char *name = mono_method_full_name (cmethod, TRUE);
 		printf ("  SIMD intrinsic %s\n", name);
 		g_free (name);
 	}
 
-	// Similar to the cases in emit_sys_numerics_vector_t ()
 	switch (id) {
 	case SN_ctor:
 		if (is_elementwise_ctor (fsig, etype)) {
@@ -2804,11 +2796,11 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
 				ins->klass = klass;
 			}
 			return ins;
-		} 
-// FIXME: Support Vector2 and Vector3 for WASM
-#ifndef TARGET_WASM 
-		else if (len == 3 && fsig->param_count == 2 && fsig->params [0]->type == MONO_TYPE_VALUETYPE && fsig->params [1]->type == etype->type) {
+		} else if (len == 3 && fsig->param_count == 2 && fsig->params [0]->type == MONO_TYPE_VALUETYPE && fsig->params [1]->type == etype->type) {
 			/* Vector3 (Vector2, float) */
+			if (!mini_class_is_simd (cfg, mono_class_from_mono_type_internal (fsig->params [0])))
+				// FIXME: Support Vector2 and Vector3 for WASM and AMD64
+				return NULL;
 			int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
 			MonoInst* vec_ins = args [1];
 			if (COMPILE_LLVM (cfg)) {
@@ -2820,12 +2812,18 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
 			return ins;
 		} else if (len == 4 && fsig->param_count == 2 && fsig->params [0]->type == MONO_TYPE_VALUETYPE && fsig->params [1]->type == etype->type) {
 			/* Vector4 (Vector3, float) */
+			if (!mini_class_is_simd (cfg, mono_class_from_mono_type_internal (fsig->params [0])))
+				// FIXME: Support Vector2 and Vector3 for WASM and AMD64
+				return NULL;
 			int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
 			ins = emit_vector_insert_element (cfg, klass, args [1], MONO_TYPE_R4, args [2], 3, FALSE);
 			ins->dreg = dreg;
 			return ins;
 		} else if (len == 4 && fsig->param_count == 3 && fsig->params [0]->type == MONO_TYPE_VALUETYPE && fsig->params [1]->type == etype->type && fsig->params [2]->type == etype->type) {
 			/* Vector4 (Vector2, float, float) */
+			if (!mini_class_is_simd (cfg, mono_class_from_mono_type_internal (fsig->params [0])))
+				// FIXME: Support Vector2 and Vector3 for WASM and AMD64
+				return NULL;
 			int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
 			MonoInst* vec_ins = args [1];
 			if (COMPILE_LLVM (cfg)) {
@@ -2837,7 +2835,6 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
 			ins->dreg = dreg;
 			return ins;
 		}
-#endif
 		break;
 	case SN_get_Item: {
 		// GetElement is marked as Intrinsic, but handling this in get_Item leads to better code
@@ -2921,6 +2918,8 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
 		value [1] = 1.0f;
 		value [2] = 1.0f;
 		value [3] = 1.0f;
+		if (len == 3)
+			value [3] = 0.0f;
 		return emit_xconst_v128 (cfg, klass, (guint8*)value);
 	}
 	case SN_set_Item: {
@@ -2999,28 +2998,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
 		return emit_simd_ins_for_binary_op (cfg, klass, fsig, args, MONO_TYPE_R4, id);
 	}
 	case SN_Dot: {
-#if defined(TARGET_ARM64) || defined(TARGET_WASM)
-		MonoInst *pairwise_multiply = emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, OP_FMUL, MONO_TYPE_R4, fsig, args);
-		return emit_sum_vector (cfg, fsig->params [0], MONO_TYPE_R4, pairwise_multiply);
-#elif defined(TARGET_AMD64)
-		if (!(mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE41))
-			return NULL;
-
-		int mask_reg = alloc_ireg (cfg);
-		MONO_EMIT_NEW_ICONST (cfg, mask_reg, 0xf1);
-		MonoInst *dot = emit_simd_ins (cfg, klass, OP_SSE41_DPPS, args [0]->dreg, args [1]->dreg);
-		dot->sreg3 = mask_reg;
-
-		MONO_INST_NEW (cfg, ins, OP_EXTRACT_R4);
-		ins->dreg = alloc_freg (cfg);
-		ins->sreg1 = dot->dreg;
-		ins->inst_c0 = 0;
-		ins->inst_c1 = MONO_TYPE_R4;
-		MONO_ADD_INS (cfg->cbb, ins);
-		return ins;
-#else
-		return NULL;
-#endif
+		return emit_dot (cfg, klass, fsig->params [0], MONO_TYPE_R4, args [0]->dreg, args [1]->dreg);
 	}
 	case SN_Negate:
 	case SN_op_UnaryNegation: {
@@ -3072,7 +3050,6 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
 #endif
 	}
 	case SN_CopyTo:
-		// FIXME: https://github.com/dotnet/runtime/issues/91394
 		return NULL;
 	case SN_Clamp: {
 		if (!(!fsig->hasthis && fsig->param_count == 3 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type) && mono_metadata_type_equal (fsig->params [2], type)))
@@ -3088,15 +3065,133 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
 
 		return min;
 	}
-	case SN_Conjugate:
-	case SN_Distance:
-	case SN_DistanceSquared:
+	case SN_Distance: 
+	case SN_DistanceSquared: {
+#if defined(TARGET_ARM64)
+		MonoInst *diffs = emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, OP_FSUB, MONO_TYPE_R4, fsig, args);
+		MonoInst *dot = emit_dot(cfg, klass, fsig->params [0], MONO_TYPE_R4, diffs->dreg, diffs->dreg);
+
+		switch (id) {
+		case SN_Distance: {
+			dot = emit_simd_ins (cfg, klass, OP_EXPAND_R4, dot->dreg, -1);
+			dot->inst_c1 = MONO_TYPE_R4;
+
+			MonoInst *sqrt = emit_simd_ins (cfg, klass, OP_XOP_OVR_X_X, dot->dreg, -1);
+			sqrt->inst_c0 = INTRINS_AARCH64_ADV_SIMD_FSQRT;
+			sqrt->inst_c1 = MONO_TYPE_R4;
+			
+			MonoInst *distance = emit_simd_ins (cfg, klass, OP_EXTRACT_R4, sqrt->dreg, -1);
+			distance->inst_c0 = 0;
+			distance->inst_c1 = MONO_TYPE_R4;
+			return distance;
+		}
+		case SN_DistanceSquared:
+			return dot;
+		default:
+			g_assert_not_reached ();
+		}
+#else
+		return NULL;
+#endif
+	}
 	case SN_Length:
-	case SN_LengthSquared:
-	case SN_Lerp:
+	case SN_LengthSquared: {
+#if defined (TARGET_ARM64)
+		int src1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
+		MonoInst *dot = emit_dot(cfg, klass, type, MONO_TYPE_R4, src1, src1);
+
+		switch (id) {
+		case SN_Length: {
+			dot = emit_simd_ins (cfg, klass, OP_EXPAND_R4, dot->dreg, -1);
+			dot->inst_c1 = MONO_TYPE_R4;
+
+			MonoInst *sqrt = emit_simd_ins (cfg, klass, OP_XOP_OVR_X_X, dot->dreg, -1);
+			sqrt->inst_c0 = INTRINS_AARCH64_ADV_SIMD_FSQRT;
+			sqrt->inst_c1 = MONO_TYPE_R4;
+			
+			MonoInst *length = emit_simd_ins (cfg, klass, OP_EXTRACT_R4, sqrt->dreg, -1);
+			length->inst_c0 = 0;
+			length->inst_c1 = MONO_TYPE_R4;
+			return length;
+		}
+		case SN_LengthSquared:
+			return dot;
+		default:
+			g_assert_not_reached ();
+		}
+#else
+		return NULL;
+#endif
+	}
+	case SN_Lerp: {
+#if defined (TARGET_ARM64)
+		MonoInst* v1 = args [1];
+		if (!strcmp ("Quaternion", m_class_get_name (klass))) 
+			return NULL;
+		
+
+		MonoInst *diffs = emit_simd_ins (cfg, klass, OP_XBINOP, v1->dreg, args [0]->dreg);
+		diffs->inst_c0 = OP_FSUB;
+		diffs->inst_c1 = MONO_TYPE_R4;
+
+		MonoInst *scaled_diffs = handle_mul_div_by_scalar (cfg, klass, MONO_TYPE_R4, args [2]->dreg, diffs->dreg, OP_FMUL);
+		
+		MonoInst *result = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, scaled_diffs->dreg);
+		result->inst_c0 = OP_FADD;
+		result->inst_c1 = MONO_TYPE_R4;
+
+		return result;
+#else
+		return NULL;
+#endif
+	}
 	case SN_Normalize: {
-		// FIXME: https://github.com/dotnet/runtime/issues/91394
+#if defined (TARGET_ARM64)
+	MonoInst* vec = args[0];
+	const char *class_name = m_class_get_name (klass);
+	if (!strcmp ("Plane", class_name)) {
+		static float r4_0 = 0;
+		MonoInst *zero;
+		int zero_dreg = alloc_freg (cfg);
+		MONO_INST_NEW (cfg, zero, OP_R4CONST);
+		zero->inst_p0 = (void*)&r4_0;
+		zero->dreg = zero_dreg;
+		MONO_ADD_INS (cfg->cbb, zero);
+		vec = emit_vector_insert_element (cfg, klass, vec, MONO_TYPE_R4, zero, 3, FALSE);
+	}
+
+	MonoInst *dot = emit_dot(cfg, klass, type, MONO_TYPE_R4, vec->dreg, vec->dreg);
+	dot = emit_simd_ins (cfg, klass, OP_EXPAND_R4, dot->dreg, -1);
+	dot->inst_c1 = MONO_TYPE_R4;
+	
+	MonoInst *sqrt_vec = emit_simd_ins (cfg, klass, OP_XOP_OVR_X_X, dot->dreg, -1);
+	sqrt_vec->inst_c0 = INTRINS_AARCH64_ADV_SIMD_FSQRT;
+	sqrt_vec->inst_c1 = MONO_TYPE_R4;
+
+	MonoInst *normalized_vec = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, sqrt_vec->dreg);
+	normalized_vec->inst_c0 = OP_FDIV;
+	normalized_vec->inst_c1 = MONO_TYPE_R4;
+	
+	return normalized_vec;
+#else
 		return NULL;
+#endif
+	}
+	case SN_Conjugate: {
+#if defined (TARGET_ARM64)
+		float value[4];
+		value [0] = -1.0f;
+		value [1] = -1.0f;
+		value [2] = -1.0f;
+		value [3] = 1.0f;
+		MonoInst* r = emit_xconst_v128 (cfg, klass, (guint8*)value);
+		MonoInst* result = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, r->dreg);
+		result->inst_c0 = OP_FMUL;
+		result->inst_c1 = MONO_TYPE_R4;
+		return result;
+#else
+		return NULL;
+#endif
 	}
 	default:
 		g_assert_not_reached ();
@@ -3105,488 +3200,56 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
 	return NULL;
 }
 
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_WASM)
+#ifdef TARGET_ARM64
 
-#ifdef TARGET_AMD64
+static SimdIntrinsic armbase_methods [] = {
+	{SN_LeadingSignCount},
+	{SN_LeadingZeroCount},
+	{SN_MultiplyHigh},
+	{SN_ReverseElementBits},
+	{SN_Yield},
+	{SN_get_IsSupported},
+};
 
-static guint16 vector_methods [] = {
-	SN_ConvertToDouble,
-	SN_ConvertToInt32,
-	SN_ConvertToInt64,
-	SN_ConvertToSingle,
-	SN_ConvertToUInt32,
-	SN_ConvertToUInt64,
-	SN_Narrow,
-	SN_Widen,
-	SN_get_IsHardwareAccelerated,
+static SimdIntrinsic crc32_methods [] = {
+	{SN_ComputeCrc32},
+	{SN_ComputeCrc32C},
+	{SN_get_IsSupported}
 };
 
-static MonoInst*
-emit_sys_numerics_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
-{
-	MonoInst *ins;
-	int id;
-	MonoType *etype;
+static SimdIntrinsic crypto_aes_methods [] = {
+	{SN_Decrypt, OP_XOP_X_X_X, INTRINS_AARCH64_AESD},
+	{SN_Encrypt, OP_XOP_X_X_X, INTRINS_AARCH64_AESE},
+	{SN_InverseMixColumns, OP_XOP_X_X, INTRINS_AARCH64_AESIMC},
+	{SN_MixColumns, OP_XOP_X_X, INTRINS_AARCH64_AESMC},
+	{SN_PolynomialMultiplyWideningLower},
+	{SN_PolynomialMultiplyWideningUpper},
+	{SN_get_IsSupported},
+};
 
-	id = lookup_intrins (vector_methods, sizeof (vector_methods), cmethod);
-	if (id == -1) {
-		//check_no_intrinsic_cattr (cmethod);
-		return NULL;
-	}
+static SimdIntrinsic sha1_methods [] = {
+	{SN_FixedRotate, OP_XOP_X_X, INTRINS_AARCH64_SHA1H},
+	{SN_HashUpdateChoose, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA1C},
+	{SN_HashUpdateMajority, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA1M},
+	{SN_HashUpdateParity, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA1P},
+	{SN_ScheduleUpdate0, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA1SU0},
+	{SN_ScheduleUpdate1, OP_XOP_X_X_X, INTRINS_AARCH64_SHA1SU1},
+	{SN_get_IsSupported}
+};
 
-	//printf ("%s\n", mono_method_full_name (cmethod, 1));
+static SimdIntrinsic sha256_methods [] = {
+	{SN_HashUpdate1, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA256H},
+	{SN_HashUpdate2, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA256H2},
+	{SN_ScheduleUpdate0, OP_XOP_X_X_X, INTRINS_AARCH64_SHA256SU0},
+	{SN_ScheduleUpdate1, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA256SU1},
+	{SN_get_IsSupported}
+};
 
-	if (cfg->verbose_level > 1) {
-		char *name = mono_method_full_name (cmethod, TRUE);
-		printf ("  SIMD intrinsic %s\n", name);
-		g_free (name);
-	}
-
-	switch (id) {
-	case SN_get_IsHardwareAccelerated:
-		EMIT_NEW_ICONST (cfg, ins, 1);
-		ins->type = STACK_I4;
-		return ins;
-	case SN_ConvertToInt32:
-		etype = get_vector_t_elem_type (fsig->params [0]);
-		g_assert (etype->type == MONO_TYPE_R4);
-		return emit_simd_ins (cfg, mono_class_from_mono_type_internal (fsig->ret), OP_CVTPS2DQ, args [0]->dreg, -1);
-	case SN_ConvertToSingle:
-		etype = get_vector_t_elem_type (fsig->params [0]);
-		g_assert (etype->type == MONO_TYPE_I4 || etype->type == MONO_TYPE_U4);
-		// FIXME:
-		if (etype->type == MONO_TYPE_U4)
-			return NULL;
-		return emit_simd_ins (cfg, mono_class_from_mono_type_internal (fsig->ret), OP_CVTDQ2PS, args [0]->dreg, -1);
-	case SN_ConvertToDouble:
-	case SN_ConvertToInt64:
-	case SN_ConvertToUInt32:
-	case SN_ConvertToUInt64:
-	case SN_Narrow:
-	case SN_Widen:
-		// FIXME:
-		break;
-	default:
-		break;
-	}
-
-	return NULL;
-}
-
-static guint16 vector_t_methods [] = {
-	SN_ctor,
-	SN_CopyTo,
-	SN_GreaterThan,
-	SN_GreaterThanOrEqual,
-	SN_LessThan,
-	SN_LessThanOrEqual,
-	SN_Max,
-	SN_Min,
-	SN_get_AllBitsSet,
-	SN_get_Count,
-	SN_get_IsSupported,
-	SN_get_Item,
-	SN_get_One,
-	SN_get_Zero,
-	SN_op_Addition,
-	SN_op_BitwiseAnd,
-	SN_op_BitwiseOr,
-	SN_op_Division,
-	SN_op_Equality,
-	SN_op_ExclusiveOr,
-	SN_op_Explicit,
-	SN_op_Inequality,
-	SN_op_Multiply,
-	SN_op_Subtraction
-};
-
-static MonoInst*
-emit_sys_numerics_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
-{
-	MonoInst *ins;
-	MonoType *type, *etype;
-	MonoClass *klass;
-	int size, len, id;
-	gboolean is_unsigned;
-
-	id = lookup_intrins (vector_t_methods, sizeof (vector_t_methods), cmethod);
-	if (id == -1) {
-		//check_no_intrinsic_cattr (cmethod);
-		return NULL;
-	}
-
-	klass = cmethod->klass;
-	type = m_class_get_byval_arg (klass);
-	etype = mono_class_get_context (klass)->class_inst->type_argv [0];
-
-	if (id == SN_get_IsSupported) {
-		EMIT_NEW_ICONST (cfg, ins, is_supported_vector_primitive_type (etype) ? 1 : 0);
-		return ins;
-	}
-
-	if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype))
-		return NULL;
-
-	size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
-	g_assert (size);
-	len = register_size / size;
-
-
-	if (cfg->verbose_level > 1) {
-		char *name = mono_method_full_name (cmethod, TRUE);
-		printf ("  SIMD intrinsic %s\n", name);
-		g_free (name);
-	}
-
-	switch (id) {
-	case SN_get_IsSupported: {
-		EMIT_NEW_ICONST (cfg, ins, 1);
-		return ins;
-	}
-	case SN_get_Count:
-		if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
-			break;
-		EMIT_NEW_ICONST (cfg, ins, len);
-		return ins;
-	case SN_get_Zero:
-		g_assert (fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type));
-		return emit_xzero (cfg, klass);
-	case SN_get_One: {
-		g_assert (fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type));
-		g_assert (register_size == 16);
-
-		switch (etype->type) {
-		case MONO_TYPE_I1:
-		case MONO_TYPE_U1: {
-			guint8 value[16];
-
-			for (int i = 0; i < len; ++i) {
-				value [i] = 1;
-			}
-
-			return emit_xconst_v128 (cfg, klass, value);
-		}
-		case MONO_TYPE_I2:
-		case MONO_TYPE_U2: {
-			guint16 value[8];
-
-			for (int i = 0; i < len; ++i) {
-				value [i] = 1;
-			}
-
-			return emit_xconst_v128 (cfg, klass, (guint8*)value);
-		}
-#if TARGET_SIZEOF_VOID_P == 4
-		case MONO_TYPE_I:
-		case MONO_TYPE_U:
-#endif
-		case MONO_TYPE_I4:
-		case MONO_TYPE_U4: {
-			guint32 value[4];
-
-			for (int i = 0; i < len; ++i) {
-				value [i] = 1;
-			}
-
-			return emit_xconst_v128 (cfg, klass, (guint8*)value);
-		}
-#if TARGET_SIZEOF_VOID_P == 8
-		case MONO_TYPE_I:
-		case MONO_TYPE_U:
-#endif
-		case MONO_TYPE_I8:
-		case MONO_TYPE_U8: {
-			guint64 value[2];
-
-			for (int i = 0; i < len; ++i) {
-				value [i] = 1;
-			}
-
-			return emit_xconst_v128 (cfg, klass, (guint8*)value);
-		}
-		case MONO_TYPE_R4: {
-			float value[4];
-
-			for (int i = 0; i < len; ++i) {
-				value [i] = 1.0f;
-			}
-
-			return emit_xconst_v128 (cfg, klass, (guint8*)value);
-		}
-		case MONO_TYPE_R8: {
-			double value[2];
-
-			for (int i = 0; i < len; ++i) {
-				value [i] = 1.0;
-			}
-
-			return emit_xconst_v128 (cfg, klass, (guint8*)value);
-		}
-		default:
-			g_assert_not_reached ();
-		}
-	}
-	case SN_get_AllBitsSet: {
-		return emit_xones (cfg, klass);
-	}
-	case SN_get_Item: {
-		if (!COMPILE_LLVM (cfg))
-			return NULL;
-		MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
-		MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
-		MonoTypeEnum ty = etype->type;
-		int opcode = type_to_xextract_op (ty);
-		int src1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
-		ins = emit_simd_ins (cfg, klass, opcode, src1, args [1]->dreg);
-		ins->inst_c1 = ty;
-		return ins;
-	}
-	case SN_ctor:
-		if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) {
-			int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
-
-			int opcode = type_to_expand_op (etype->type);
-			ins = emit_simd_ins (cfg, klass, opcode, args [1]->dreg, -1);
-			ins->dreg = dreg;
-			return ins;
-		}
-		if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
-			MonoInst *array_ins = args [1];
-			MonoInst *index_ins;
-			MonoInst *ldelema_ins;
-			MonoInst *var;
-			int end_index_reg;
-
-			if (args [0]->opcode != OP_LDADDR)
-				return NULL;
-
-			/* .ctor (T[]) or .ctor (T[], index) */
-
-			if (fsig->param_count == 2) {
-				index_ins = args [2];
-			} else {
-				EMIT_NEW_ICONST (cfg, index_ins, 0);
-			}
-
-			/* Emit bounds check for the index (index >= 0) */
-			mini_emit_bounds_check_offset (cfg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), index_ins->dreg, "ArgumentOutOfRangeException", FALSE);
-
-			/* Emit bounds check for the end (index + len - 1 < array length) */
-			end_index_reg = alloc_ireg (cfg);
-			EMIT_NEW_BIALU_IMM (cfg, ins, OP_IADD_IMM, end_index_reg, index_ins->dreg, len - 1);
-			mini_emit_bounds_check_offset (cfg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), end_index_reg, "ArgumentOutOfRangeException", FALSE);
-
-			/* Load the array slice into the simd reg */
-			ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, FALSE, FALSE);
-			g_assert (args [0]->opcode == OP_LDADDR);
-			var = (MonoInst*)args [0]->inst_p0;
-			EMIT_NEW_LOAD_MEMBASE (cfg, ins, OP_LOADX_MEMBASE, var->dreg, ldelema_ins->dreg, 0);
-			ins->klass = cmethod->klass;
-			return args [0];
-		}
-		break;
-	case SN_CopyTo:
-		if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
-			MonoInst *array_ins = args [1];
-			MonoInst *index_ins;
-			MonoInst *ldelema_ins;
-			int val_vreg, end_index_reg;
-
-			val_vreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
-
-			/* CopyTo (T[]) or CopyTo (T[], index) */
-
-			if (fsig->param_count == 2) {
-				index_ins = args [2];
-			} else {
-				EMIT_NEW_ICONST (cfg, index_ins, 0);
-			}
-
-			/* CopyTo () does complicated argument checks */
-			mini_emit_bounds_check_offset (cfg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), index_ins->dreg, "ArgumentOutOfRangeException", FALSE);
-			end_index_reg = alloc_ireg (cfg);
-			int len_reg = alloc_ireg (cfg);
-			MONO_EMIT_NEW_LOAD_MEMBASE_OP_FLAGS (cfg, OP_LOADI4_MEMBASE, len_reg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), MONO_INST_INVARIANT_LOAD);
-			EMIT_NEW_BIALU (cfg, ins, OP_ISUB, end_index_reg, len_reg, index_ins->dreg);
-			MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, end_index_reg, len);
-			MONO_EMIT_NEW_COND_EXC (cfg, LT, "ArgumentException");
-
-			/* Load the array slice into the simd reg */
-			ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, FALSE, FALSE);
-			EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_MEMBASE, ldelema_ins->dreg, 0, val_vreg);
-			ins->klass = cmethod->klass;
-			return ins;
-		}
-		break;
-	case SN_op_Equality:
-	case SN_op_Inequality:
-		g_assert (fsig->param_count == 2 && fsig->ret->type == MONO_TYPE_BOOLEAN &&
-				  mono_metadata_type_equal (fsig->params [0], type) &&
-				  mono_metadata_type_equal (fsig->params [1], type));
-		switch (id) {
-			case SN_op_Equality: return emit_xequal (cfg, klass, etype->type, args [0], args [1]);
-			case SN_op_Inequality: return emit_not_xequal (cfg, klass, etype->type, args [0], args [1]);
-			default: g_assert_not_reached ();
-		}
-	case SN_GreaterThan:
-	case SN_GreaterThanOrEqual:
-	case SN_LessThan:
-	case SN_LessThanOrEqual:
-		g_assert (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type));
-		is_unsigned = etype->type == MONO_TYPE_U1 || etype->type == MONO_TYPE_U2 || etype->type == MONO_TYPE_U4 || etype->type == MONO_TYPE_U8 || etype->type == MONO_TYPE_U;
-		ins = emit_xcompare (cfg, klass, etype->type, args [0], args [1]);
-		switch (id) {
-		case SN_GreaterThan:
-			ins->inst_c0 = is_unsigned ? CMP_GT_UN : CMP_GT;
-			break;
-		case SN_GreaterThanOrEqual:
-			ins->inst_c0 = is_unsigned ? CMP_GE_UN : CMP_GE;
-			break;
-		case SN_LessThan:
-			ins->inst_c0 = is_unsigned ? CMP_LT_UN : CMP_LT;
-			break;
-		case SN_LessThanOrEqual:
-			ins->inst_c0 = is_unsigned ? CMP_LE_UN : CMP_LE;
-			break;
-		default:
-			g_assert_not_reached ();
-		}
-		return ins;
-	case SN_op_Explicit:
-		return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1);
-	case SN_op_Addition:
-	case SN_op_Subtraction:
-	case SN_op_Division:
-	case SN_op_Multiply:
-	case SN_op_BitwiseAnd:
-	case SN_op_BitwiseOr:
-	case SN_op_ExclusiveOr:
-	case SN_Max:
-	case SN_Min:
-		if (!(fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)))
-			return NULL;
-		ins = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, args [1]->dreg);
-		ins->inst_c1 = etype->type;
-
-		if (type_enum_is_float (etype->type)) {
-			switch (id) {
-			case SN_op_Addition:
-				ins->inst_c0 = OP_FADD;
-				break;
-			case SN_op_Subtraction:
-				ins->inst_c0 = OP_FSUB;
-				break;
-			case SN_op_Multiply:
-				ins->inst_c0 = OP_FMUL;
-				break;
-			case SN_op_Division:
-				ins->inst_c0 = OP_FDIV;
-				break;
-			case SN_Max:
-				ins->inst_c0 = OP_FMAX;
-				break;
-			case SN_Min:
-				ins->inst_c0 = OP_FMIN;
-				break;
-			default:
-				NULLIFY_INS (ins);
-				return NULL;
-			}
-		} else {
-			switch (id) {
-			case SN_op_Addition:
-				ins->inst_c0 = OP_IADD;
-				break;
-			case SN_op_Subtraction:
-				ins->inst_c0 = OP_ISUB;
-				break;
-				/*
-			case SN_op_Division:
-				ins->inst_c0 = OP_IDIV;
-				break;
-			case SN_op_Multiply:
-				ins->inst_c0 = OP_IMUL;
-				break;
-				*/
-			case SN_op_BitwiseAnd:
-				ins->inst_c0 = OP_IAND;
-				break;
-			case SN_op_BitwiseOr:
-				ins->inst_c0 = OP_IOR;
-				break;
-			case SN_op_ExclusiveOr:
-				ins->inst_c0 = OP_IXOR;
-				break;
-			case SN_Max:
-				ins->inst_c0 = OP_IMAX;
-				break;
-			case SN_Min:
-				ins->inst_c0 = OP_IMIN;
-				break;
-			default:
-				NULLIFY_INS (ins);
-				return NULL;
-			}
-		}
-		return ins;
-	default:
-		break;
-	}
-
-	return NULL;
-}
-#endif // TARGET_AMD64
-
-#ifdef TARGET_ARM64
-
-static SimdIntrinsic armbase_methods [] = {
-	{SN_LeadingSignCount},
-	{SN_LeadingZeroCount},
-	{SN_MultiplyHigh},
-	{SN_ReverseElementBits},
-	{SN_Yield},
-	{SN_get_IsSupported},
-};
-
-static SimdIntrinsic crc32_methods [] = {
-	{SN_ComputeCrc32},
-	{SN_ComputeCrc32C},
-	{SN_get_IsSupported}
-};
-
-static SimdIntrinsic crypto_aes_methods [] = {
-	{SN_Decrypt, OP_XOP_X_X_X, INTRINS_AARCH64_AESD},
-	{SN_Encrypt, OP_XOP_X_X_X, INTRINS_AARCH64_AESE},
-	{SN_InverseMixColumns, OP_XOP_X_X, INTRINS_AARCH64_AESIMC},
-	{SN_MixColumns, OP_XOP_X_X, INTRINS_AARCH64_AESMC},
-	{SN_PolynomialMultiplyWideningLower},
-	{SN_PolynomialMultiplyWideningUpper},
-	{SN_get_IsSupported},
-};
-
-static SimdIntrinsic sha1_methods [] = {
-	{SN_FixedRotate, OP_XOP_X_X, INTRINS_AARCH64_SHA1H},
-	{SN_HashUpdateChoose, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA1C},
-	{SN_HashUpdateMajority, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA1M},
-	{SN_HashUpdateParity, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA1P},
-	{SN_ScheduleUpdate0, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA1SU0},
-	{SN_ScheduleUpdate1, OP_XOP_X_X_X, INTRINS_AARCH64_SHA1SU1},
-	{SN_get_IsSupported}
-};
-
-static SimdIntrinsic sha256_methods [] = {
-	{SN_HashUpdate1, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA256H},
-	{SN_HashUpdate2, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA256H2},
-	{SN_ScheduleUpdate0, OP_XOP_X_X_X, INTRINS_AARCH64_SHA256SU0},
-	{SN_ScheduleUpdate1, OP_XOP_X_X_X_X, INTRINS_AARCH64_SHA256SU1},
-	{SN_get_IsSupported}
-};
-
-// This table must be kept in sorted order. ASCII } is sorted after alphanumeric
-// characters, so blind use of your editor's "sort lines" facility will
-// mis-order the lines.
-//
-// In Vim you can use `sort /.*{[0-9A-z]*/ r` to sort this table.
+// This table must be kept in sorted order. ASCII } is sorted after alphanumeric
+// characters, so blind use of your editor's "sort lines" facility will
+// mis-order the lines.
+//
+// In Vim you can use `sort /.*{[0-9A-z]*/ r` to sort this table.
 
 static SimdIntrinsic advsimd_methods [] = {
 	{SN_Abs, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_ABS, None, None, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FABS},
@@ -3733,7 +3396,13 @@ static SimdIntrinsic advsimd_methods [] = {
 	{SN_LeadingZeroCount, OP_ARM64_CLZ},
 	{SN_LoadAndInsertScalar},
 	{SN_LoadAndReplicateToVector128, OP_ARM64_LD1R},
+	{SN_LoadAndReplicateToVector128x2, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD2R_V128},
+	{SN_LoadAndReplicateToVector128x3, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD3R_V128},
+	{SN_LoadAndReplicateToVector128x4, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD4R_V128},
 	{SN_LoadAndReplicateToVector64, OP_ARM64_LD1R},
+	{SN_LoadAndReplicateToVector64x2, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD2R_V64},
+	{SN_LoadAndReplicateToVector64x3, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD3R_V64},
+	{SN_LoadAndReplicateToVector64x4, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD4R_V64},
 	{SN_LoadPairScalarVector64, OP_ARM64_LDP_SCALAR},
 	{SN_LoadPairScalarVector64NonTemporal, OP_ARM64_LDNP_SCALAR},
 	{SN_LoadPairVector128, OP_ARM64_LDP},
@@ -3741,7 +3410,19 @@ static SimdIntrinsic advsimd_methods [] = {
 	{SN_LoadPairVector64, OP_ARM64_LDP},
 	{SN_LoadPairVector64NonTemporal, OP_ARM64_LDNP},
 	{SN_LoadVector128, OP_ARM64_LD1},
+	{SN_LoadVector128x2, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD1X2_V128},
+	{SN_LoadVector128x2AndUnzip, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD2_V128},
+	{SN_LoadVector128x3, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD1X3_V128},
+	{SN_LoadVector128x3AndUnzip, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD3_V128},
+	{SN_LoadVector128x4, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD1X4_V128},
+	{SN_LoadVector128x4AndUnzip, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD4_V128},
 	{SN_LoadVector64, OP_ARM64_LD1},
+	{SN_LoadVector64x2, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD1X2_V64},
+	{SN_LoadVector64x2AndUnzip, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD2_V64},
+	{SN_LoadVector64x3, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD1X3_V64},
+	{SN_LoadVector64x3AndUnzip, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD3_V64},
+	{SN_LoadVector64x4, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD1X4_V64},
+	{SN_LoadVector64x4AndUnzip, OP_ARM64_LDM, INTRINS_AARCH64_ADV_SIMD_LD4_V64},
 	{SN_Max, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_SMAX, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_UMAX, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_FMAX},
 	{SN_MaxAcross, OP_ARM64_XHORIZ, INTRINS_AARCH64_ADV_SIMD_SMAXV, OP_ARM64_XHORIZ, INTRINS_AARCH64_ADV_SIMD_UMAXV, OP_ARM64_XHORIZ, INTRINS_AARCH64_ADV_SIMD_FMAXV},
 	{SN_MaxNumber, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_FMAXNM},
@@ -3934,6 +3615,18 @@ static SimdIntrinsic advsimd_methods [] = {
 	{SN_StorePairScalar, OP_ARM64_STP_SCALAR},
 	{SN_StorePairScalarNonTemporal, OP_ARM64_STNP_SCALAR},
 	{SN_StoreSelectedScalar},
+	{SN_StoreVector128x2},
+	{SN_StoreVector128x2AndZip},
+	{SN_StoreVector128x3},
+	{SN_StoreVector128x3AndZip},
+	{SN_StoreVector128x4},
+	{SN_StoreVector128x4AndZip},
+	{SN_StoreVector64x2},
+	{SN_StoreVector64x2AndZip},
+	{SN_StoreVector64x3},
+	{SN_StoreVector64x3AndZip},
+	{SN_StoreVector64x4},
+	{SN_StoreVector64x4AndZip},
 	{SN_Subtract, OP_XBINOP, OP_ISUB, None, None, OP_XBINOP, OP_FSUB},
 	{SN_SubtractHighNarrowingLower, OP_ARM64_SUBHN},
 	{SN_SubtractHighNarrowingUpper, OP_ARM64_SUBHN2},
@@ -4023,7 +3716,7 @@ emit_arm64_intrinsics (
 			MONO_ADD_INS (cfg->cbb, ins);
 			return ins;
 		}
-			
+
 		default:
 			g_assert_not_reached (); // if a new API is added we need to either implement it or change IsSupported to false
 		}
@@ -4123,10 +3816,14 @@ emit_arm64_intrinsics (
 			ins->inst_c1 = arg0_type;
 			return ins;
 		}
-		case SN_LoadAndInsertScalar:
-			if (!is_intrinsics_vector_type (fsig->params [0]))
-				return NULL;
-			return emit_simd_ins_for_sig (cfg, klass, OP_ARM64_LD1_INSERT, 0, arg0_type, fsig, args);
+		case SN_LoadAndInsertScalar: {
+			int load_op;
+			if (is_intrinsics_vector_type (fsig->params [0]))
+				load_op = OP_ARM64_LD1_INSERT;
+			else
+				load_op = OP_ARM64_LDM_INSERT;
+			return emit_simd_ins_for_sig (cfg, klass, load_op, 0, arg0_type, fsig, args);
+		}
 		case SN_InsertSelectedScalar:
 		case SN_InsertScalar:
 		case SN_Insert: {
@@ -4170,9 +3867,13 @@ emit_arm64_intrinsics (
 			return ret;
 		}
 		case SN_StoreSelectedScalar: {
-			if (!is_intrinsics_vector_type (fsig->params [1]))
-				return NULL;
-			return emit_simd_ins_for_sig (cfg, klass, OP_ARM64_ST1_SCALAR, 0, arg0_type, fsig, args);
+			int store_op;
+			if (is_intrinsics_vector_type (fsig->params [1]))
+				store_op = OP_ARM64_ST1_SCALAR;
+			else
+				store_op = OP_ARM64_STM_SCALAR;
+			MonoClass* klass_tuple_var = mono_class_from_mono_type_internal (fsig->params [1]);
+			return emit_simd_ins_for_sig (cfg, klass_tuple_var, store_op, 0, arg0_type, fsig, args);
 		}
 		case SN_MultiplyRoundedDoublingBySelectedScalarSaturateHigh:
 		case SN_MultiplyRoundedDoublingScalarBySelectedScalarSaturateHigh:
@@ -4328,6 +4029,38 @@ emit_arm64_intrinsics (
 			MONO_ADD_INS (cfg->cbb, ins);
 			return ins;
 		}
+		case SN_StoreVector128x2:
+		case SN_StoreVector128x3:
+		case SN_StoreVector128x4:
+		case SN_StoreVector64x2:
+		case SN_StoreVector64x3:
+		case SN_StoreVector64x4:
+		case SN_StoreVector128x2AndZip:
+		case SN_StoreVector128x3AndZip:
+		case SN_StoreVector128x4AndZip:
+		case SN_StoreVector64x2AndZip:
+		case SN_StoreVector64x3AndZip:
+		case SN_StoreVector64x4AndZip: {
+			int iid = 0;
+			switch (id) {
+			case SN_StoreVector128x2: iid = INTRINS_AARCH64_ADV_SIMD_ST1X2_V128; break;
+			case SN_StoreVector128x3: iid = INTRINS_AARCH64_ADV_SIMD_ST1X3_V128; break;
+			case SN_StoreVector128x4: iid = INTRINS_AARCH64_ADV_SIMD_ST1X4_V128; break;
+			case SN_StoreVector64x2: iid = INTRINS_AARCH64_ADV_SIMD_ST1X2_V64; break;
+			case SN_StoreVector64x3: iid = INTRINS_AARCH64_ADV_SIMD_ST1X3_V64; break;
+			case SN_StoreVector64x4: iid = INTRINS_AARCH64_ADV_SIMD_ST1X4_V64; break;
+			case SN_StoreVector128x2AndZip: iid = INTRINS_AARCH64_ADV_SIMD_ST2_V128; break;
+			case SN_StoreVector128x3AndZip: iid = INTRINS_AARCH64_ADV_SIMD_ST3_V128; break;
+			case SN_StoreVector128x4AndZip: iid = INTRINS_AARCH64_ADV_SIMD_ST4_V128; break;
+			case SN_StoreVector64x2AndZip: iid = INTRINS_AARCH64_ADV_SIMD_ST2_V64; break;
+			case SN_StoreVector64x3AndZip: iid = INTRINS_AARCH64_ADV_SIMD_ST3_V64; break;
+			case SN_StoreVector64x4AndZip: iid = INTRINS_AARCH64_ADV_SIMD_ST4_V64; break;
+			default: g_assert_not_reached ();
+			}
+
+			MonoClass* klass_tuple_var = mono_class_from_mono_type_internal (fsig->params [1]);
+			return emit_simd_ins_for_sig (cfg, klass_tuple_var, OP_ARM64_STM, iid, arg0_type, fsig, args);
+		}
 		default:
 			g_assert_not_reached ();
 		}
@@ -5383,15 +5116,15 @@ emit_x86_intrinsics (
 			MONO_ADD_INS (cfg->cbb, ins);
 			return ins;
 		case SN_DivRem: {
-			g_assert (!(TARGET_SIZEOF_VOID_P == 4 && is_64bit)); // x86(no -64) cannot do divisions with 64-bit regs 
+			g_assert (!(TARGET_SIZEOF_VOID_P == 4 && is_64bit)); // x86(no -64) cannot do divisions with 64-bit regs
 			const MonoStackType divtype = is_64bit ? STACK_I8 : STACK_I4;
 			const int storetype = is_64bit ? OP_STOREI8_MEMBASE_REG : OP_STOREI4_MEMBASE_REG;
 			const int obj_size = MONO_ABI_SIZEOF (MonoObject);
 
-			// We must decide by the second argument, the first is always unsigned here	
+			// We must decide by the second argument, the first is always unsigned here
 			MonoTypeEnum arg1_type = fsig->param_count > 1 ? get_underlying_type (fsig->params [1]) : MONO_TYPE_VOID;
 			MonoInst* div;
-			MonoInst* div2; 
+			MonoInst* div2;
 
 			if (type_enum_is_unsigned (arg1_type)) {
 				MONO_INST_NEW (cfg, div, is_64bit ? OP_X86_LDIVREMU : OP_X86_IDIVREMU);
@@ -5412,7 +5145,7 @@ emit_x86_intrinsics (
 			div2->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
 			div2->type = divtype;
 			MONO_ADD_INS (cfg->cbb, div2);
-			
+
 			// TODO: Can the creation of tuple be elided? (e.g. if deconstruction is used)
 			MonoInst* tuple = mono_compile_create_var (cfg, fsig->ret, OP_LOCAL);
 			MonoInst* tuple_addr;
@@ -5915,13 +5648,6 @@ arch_emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCom
 			emit_arm64_intrinsics);
 	}
 
-	if (!strcmp (class_ns, "System.Numerics")) {
-		if (!strcmp (class_name, "Vector"))
-			return emit_sri_vector (cfg, cmethod, fsig, args);
-		if (!strcmp (class_name, "Vector`1"))
-			return emit_sri_vector_t (cfg, cmethod, fsig, args);
-	}
-
 	return NULL;
 }
 #elif defined(TARGET_AMD64)
@@ -5936,15 +5662,6 @@ arch_emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCom
 			emit_x86_intrinsics);
 	}
 
-	if (!strcmp (class_ns, "System.Numerics")) {
-		// FIXME: Shouldn't this call emit_sri_vector () ?
-		if (!strcmp (class_name, "Vector"))
-			return emit_sys_numerics_vector (cfg, cmethod, fsig, args);
-		// FIXME: Shouldn't this call emit_sri_vector_t () ?
-		if (!strcmp (class_name, "Vector`1"))
-			return emit_sys_numerics_vector_t (cfg, cmethod, fsig, args);
-	}
-
 	return NULL;
 }
 #elif defined(TARGET_WASM)
@@ -5958,13 +5675,6 @@ arch_emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCom
 			emit_wasm_supported_intrinsics);
 	}
 
-	if (!strcmp (class_ns, "System.Numerics")) {
-		if (!strcmp (class_name, "Vector"))
-			return emit_sri_vector (cfg, cmethod, fsig, args);
-		if (!strcmp (class_name, "Vector`1"))
-			return emit_sri_vector_t (cfg, cmethod, fsig, args);
-	}
-
 	return NULL;
 }
 #else
@@ -6007,9 +5717,11 @@ emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCompile
 {
 	MonoInst *ins;
 
-	ins = arch_emit_simd_intrinsics (class_ns, class_name, cfg, cmethod, fsig, args);
-	if (ins)
-		return ins;
+	if (cfg->opt & MONO_OPT_SIMD) {
+		ins = arch_emit_simd_intrinsics (class_ns, class_name, cfg, cmethod, fsig, args);
+		if (ins)
+			return ins;
+	}
 
 	if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
 		if (!strcmp (class_name, "Vector64") || !strcmp (class_name, "Vector128") || !strcmp (class_name, "Vector256") || !strcmp (class_name, "Vector512"))
@@ -6019,6 +5731,10 @@ emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCompile
 	}
 
 	if (!strcmp (class_ns, "System.Numerics")) {
+		if (!strcmp (class_name, "Vector"))
+			return emit_sri_vector (cfg, cmethod, fsig, args);
+		if (!strcmp (class_name, "Vector`1"))
+			return emit_sri_vector_t (cfg, cmethod, fsig, args);
 		if (!strcmp (class_name, "Vector2") || !strcmp (class_name, "Vector3") || !strcmp (class_name, "Vector4") ||
 			!strcmp (class_name, "Quaternion") || !strcmp (class_name, "Plane"))
 			return emit_vector_2_3_4 (cfg, cmethod, fsig, args);
@@ -6144,8 +5860,17 @@ mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *i
 	}
 }
 
+#else
+
+void
+mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
+{
+}
+
+#endif /*defined(TARGET_WIN32) && defined(TARGET_AMD64)*/
+
 gboolean
-mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod *cmethod)
+mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoCompile *cfg, MonoMethod *cmethod)
 {
 	/*
 	* If a method has been marked with aggressive inlining, check if we support
@@ -6156,66 +5881,18 @@ mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod *cmethod)
 	if (!strcmp (m_class_get_name_space (cmethod->klass), "System.Runtime.Intrinsics")) {
 		if (!strncmp(m_class_get_name (cmethod->klass), "Vector", 6)) {
 			const char *vector_type = m_class_get_name (cmethod->klass) + 6;
-			if (!strcmp(vector_type, "256`1") || !strcmp(vector_type, "512`1"))
+			if (!strcmp(vector_type, "256`1") || !strcmp(vector_type, "512`1") || !strcmp(vector_type, "256") || !strcmp(vector_type, "512"))
+				return TRUE;
+			if (!(cfg->opt & MONO_OPT_SIMD) && (!strcmp (vector_type, "128`1") || !strcmp (vector_type, "128") || !strcmp (vector_type, "64`1") || !strcmp (vector_type, "64")))
 				return TRUE;
 		}
 	}
-	return FALSE;
-}
-#else
-void
-mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
-{
-}
 
-gboolean
-mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod* cmethod)
-{
 	return FALSE;
 }
 
-#endif /*defined(TARGET_WIN32) && defined(TARGET_AMD64)*/
-
 #endif /* DISABLE_JIT */
 
-#else /* MONO_ARCH_SIMD_INTRINSICS */
-
-void
-mono_simd_intrinsics_init (void)
-{
-}
-
-MonoInst*
-mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr)
-{
-	return NULL;
-}
-
-MonoInst*
-mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
-{
-	return NULL;
-}
-
-MonoInst*
-mono_emit_common_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
-{
-	return NULL;
-}
-
-void
-mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
-{
-}
-
-gboolean
-mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod* cmethod)
-{
-	return FALSE;
-}
-
-#endif /* MONO_ARCH_SIMD_INTRINSICS */
-
 #if defined(TARGET_AMD64)
 void
 ves_icall_System_Runtime_Intrinsics_X86_X86Base___cpuidex (int abcd[4], int function_id, int subfunction_id)
diff --git a/src/mono/mono/mini/simd-methods.h b/src/mono/mono/mini/simd-methods.h
index e8f307419550..70138b552685 100644
--- a/src/mono/mono/mini/simd-methods.h
+++ b/src/mono/mono/mini/simd-methods.h
@@ -165,6 +165,12 @@ METHOD(Extract)
 METHOD(LoadHigh)
 METHOD(LoadLow)
 METHOD(LoadVector128)
+METHOD(LoadVector128x2)
+METHOD(LoadVector128x3)
+METHOD(LoadVector128x4)
+METHOD(LoadVector128x2AndUnzip)
+METHOD(LoadVector128x3AndUnzip)
+METHOD(LoadVector128x4AndUnzip)
 METHOD(LoadScalarVector128)
 METHOD(MoveHighToLow)
 METHOD(MoveLowToHigh)
@@ -428,7 +434,13 @@ METHOD(InsertScalar)
 METHOD(InsertSelectedScalar)
 METHOD(LoadAndInsertScalar)
 METHOD(LoadAndReplicateToVector128)
+METHOD(LoadAndReplicateToVector128x2)
+METHOD(LoadAndReplicateToVector128x3)
+METHOD(LoadAndReplicateToVector128x4)
 METHOD(LoadAndReplicateToVector64)
+METHOD(LoadAndReplicateToVector64x2)
+METHOD(LoadAndReplicateToVector64x3)
+METHOD(LoadAndReplicateToVector64x4)
 METHOD(LoadPairScalarVector64)
 METHOD(LoadPairScalarVector64NonTemporal)
 METHOD(LoadPairVector128)
@@ -436,6 +448,12 @@ METHOD(LoadPairVector128NonTemporal)
 METHOD(LoadPairVector64)
 METHOD(LoadPairVector64NonTemporal)
 METHOD(LoadVector64)
+METHOD(LoadVector64x2)
+METHOD(LoadVector64x3)
+METHOD(LoadVector64x4)
+METHOD(LoadVector64x2AndUnzip)
+METHOD(LoadVector64x3AndUnzip)
+METHOD(LoadVector64x4AndUnzip)
 METHOD(MaxAcross)
 METHOD(MaxNumber)
 METHOD(MaxNumberAcross)
@@ -606,6 +624,18 @@ METHOD(StorePairNonTemporal)
 METHOD(StorePairScalar)
 METHOD(StorePairScalarNonTemporal)
 METHOD(StoreSelectedScalar)
+METHOD(StoreVector128x2)
+METHOD(StoreVector128x3)
+METHOD(StoreVector128x4)
+METHOD(StoreVector128x2AndZip)
+METHOD(StoreVector128x3AndZip)
+METHOD(StoreVector128x4AndZip)
+METHOD(StoreVector64x2)
+METHOD(StoreVector64x3)
+METHOD(StoreVector64x4)
+METHOD(StoreVector64x2AndZip)
+METHOD(StoreVector64x3AndZip)
+METHOD(StoreVector64x4AndZip)
 METHOD(SubtractHighNarrowingLower)
 METHOD(SubtractHighNarrowingUpper)
 METHOD(SubtractRoundedHighNarrowingLower)
diff --git a/src/mono/mono/tools/offsets-tool/offsets-tool.py b/src/mono/mono/tools/offsets-tool/offsets-tool.py
index 008bf3c5aa48..ad450989afed 100644
--- a/src/mono/mono/tools/offsets-tool/offsets-tool.py
+++ b/src/mono/mono/tools/offsets-tool/offsets-tool.py
@@ -89,7 +89,7 @@ def require_emscipten_path (args):
 
 		if "wasm" in args.abi:
 			if args.wasi_path != None:
-				self.sys_includes = [args.wasi_path + "/share/wasi-sysroot/include", args.wasi_path + "/lib/clang/16/include", args.mono_path + "/wasi/mono-include"]
+				self.sys_includes = [args.wasi_path + "/share/wasi-sysroot/include", args.wasi_path + "/lib/clang/17/include", args.mono_path + "/wasi/mono-include"]
 				self.target = Target ("TARGET_WASI", None, ["TARGET_WASM"] + WASI_DEFINES)
 				self.target_args += ["-target", args.abi]
 			else:
diff --git a/src/mono/mono/utils/atomic.h b/src/mono/mono/utils/atomic.h
index 83a835da1895..7c7c684ab94e 100644
--- a/src/mono/mono/utils/atomic.h
+++ b/src/mono/mono/utils/atomic.h
@@ -95,22 +95,6 @@ Apple targets have historically being problematic, xcode 4.6 would miscompile th
 
 #include <stdatomic.h>
 
-static inline guint8
-mono_atomic_cas_u8 (volatile guint8 *dest, guint8 exch, guint8 comp)
-{
-	g_static_assert (sizeof (atomic_uchar) == sizeof (*dest) && ATOMIC_CHAR_LOCK_FREE == 2);
-	(void)atomic_compare_exchange_strong ((volatile atomic_uchar *)dest, &comp, exch);
-	return comp;
-}
-
-static inline gint16
-mono_atomic_cas_i16 (volatile gint16 *dest, gint16 exch, gint16 comp)
-{
-	g_static_assert (sizeof (atomic_short) == sizeof (*dest) && ATOMIC_SHORT_LOCK_FREE == 2);
-	(void)atomic_compare_exchange_strong ((volatile atomic_short *)dest, &comp, exch);
-	return comp;
-}
-
 static inline gint32
 mono_atomic_cas_i32 (volatile gint32 *dest, gint32 exch, gint32 comp)
 {
@@ -187,20 +171,6 @@ mono_atomic_dec_i64 (volatile gint64 *dest)
 	return mono_atomic_add_i64 (dest, -1);
 }
 
-static inline guint8
-mono_atomic_xchg_u8 (volatile guint8 *dest, guint8 exch)
-{
-	g_static_assert (sizeof (atomic_uchar) == sizeof (*dest) && ATOMIC_CHAR_LOCK_FREE == 2);
-	return atomic_exchange ((volatile atomic_uchar *)dest, exch);
-}
-
-static inline gint16
-mono_atomic_xchg_i16 (volatile gint16 *dest, gint16 exch)
-{
-	g_static_assert (sizeof (atomic_short) == sizeof (*dest) && ATOMIC_SHORT_LOCK_FREE == 2);
-	return atomic_exchange ((volatile atomic_short *)dest, exch);
-}
-
 static inline gint32
 mono_atomic_xchg_i32 (volatile gint32 *dest, gint32 exch)
 {
@@ -341,18 +311,6 @@ mono_atomic_store_ptr (volatile gpointer *dst, gpointer val)
 #include <windows.h>
 #include <intrin.h>
 
-static inline guint8
-mono_atomic_cas_u8 (volatile guint8 *dest, guint8 exch, guint8 comp)
-{
-	return _InterlockedCompareExchange8 ((CHAR volatile *)dest, (CHAR)exch, (CHAR)comp);
-}
-
-static inline gint16
-mono_atomic_cas_i16 (volatile gint16 *dest, gint16 exch, gint16 comp)
-{
-	return _InterlockedCompareExchange16 ((SHORT volatile *)dest, (SHORT)exch, (SHORT)comp);
-}
-
 static inline gint32
 mono_atomic_cas_i32 (volatile gint32 *dest, gint32 exch, gint32 comp)
 {
@@ -407,18 +365,6 @@ mono_atomic_dec_i64 (volatile gint64 *dest)
 	return InterlockedDecrement64 ((LONG64 volatile *)dest);
 }
 
-static inline guint8
-mono_atomic_xchg_u8 (volatile guint8 *dest, guint8 exch)
-{
-	return _InterlockedExchange8 ((CHAR volatile *)dest, (CHAR)exch);
-}
-
-static inline gint16
-mono_atomic_xchg_i16 (volatile gint16 *dest, gint16 exch)
-{
-	return _InterlockedExchange16 ((SHORT volatile *)dest, (SHORT)exch);
-}
-
 static inline gint32
 mono_atomic_xchg_i32 (volatile gint32 *dest, gint32 exch)
 {
@@ -562,18 +508,6 @@ mono_atomic_store_ptr (volatile gpointer *dst, gpointer val)
 #define gcc_sync_fetch_and_add(a, b) __sync_fetch_and_add (a, b)
 #endif
 
-static inline guint8 mono_atomic_cas_u8(volatile guint8 *dest,
-						guint8 exch, guint8 comp)
-{
-	return gcc_sync_val_compare_and_swap (dest, comp, exch);
-}
-
-static inline gint16 mono_atomic_cas_i16(volatile gint16 *dest,
-						gint16 exch, gint16 comp)
-{
-	return gcc_sync_val_compare_and_swap (dest, comp, exch);
-}
-
 static inline gint32 mono_atomic_cas_i32(volatile gint32 *dest,
 						gint32 exch, gint32 comp)
 {
@@ -600,24 +534,6 @@ static inline gint32 mono_atomic_dec_i32(volatile gint32 *val)
 	return gcc_sync_sub_and_fetch (val, 1);
 }
 
-static inline guint8 mono_atomic_xchg_u8(volatile guint8 *val, guint8 new_val)
-{
-	guint8 old_val;
-	do {
-		old_val = *val;
-	} while (gcc_sync_val_compare_and_swap (val, old_val, new_val) != old_val);
-	return old_val;
-}
-
-static inline gint16 mono_atomic_xchg_i16(volatile gint16 *val, gint16 new_val)
-{
-	gint16 old_val;
-	do {
-		old_val = *val;
-	} while (gcc_sync_val_compare_and_swap (val, old_val, new_val) != old_val);
-	return old_val;
-}
-
 static inline gint32 mono_atomic_xchg_i32(volatile gint32 *val, gint32 new_val)
 {
 	gint32 old_val;
@@ -809,12 +725,7 @@ static inline void mono_atomic_store_i64(volatile gint64 *dst, gint64 val)
 
 #define WAPI_NO_ATOMIC_ASM
 
-/* Fallbacks seem to not be used anymore, they should be removed
- * or small type ones should be added in case we find a platform that still needs them.
- * extern guint8 mono_atomic_cas_u8(volatile guint8 *dest, guint8 exch, guint8 comp);
- * extern gint16 mono_atomic_cas_i16(volatile gint16 *dest, gint16 exch, gint16 comp);
- * extern guint8 mono_atomic_xchg_u8(volatile guint8 *dest, guint8 exch);
- * extern gint16 mono_atomic_xchg_i16(volatile gint16 *dest, gint16 exch); */
+/* Fallbacks seem to not be used anymore, they should be removed. */
 extern gint32 mono_atomic_cas_i32(volatile gint32 *dest, gint32 exch, gint32 comp);
 extern gint64 mono_atomic_cas_i64(volatile gint64 *dest, gint64 exch, gint64 comp);
 extern gpointer mono_atomic_cas_ptr(volatile gpointer *dest, gpointer exch, gpointer comp);
diff --git a/src/mono/mono/utils/lifo-semaphore.c b/src/mono/mono/utils/lifo-semaphore.c
index dce67c48e8b3..1f3f6c4410b9 100644
--- a/src/mono/mono/utils/lifo-semaphore.c
+++ b/src/mono/mono/utils/lifo-semaphore.c
@@ -11,11 +11,10 @@ LifoSemaphore *
 mono_lifo_semaphore_init (void)
 {
 	LifoSemaphore *semaphore = g_new0 (LifoSemaphore, 1);
-	semaphore->base.kind = LIFO_SEMAPHORE_NORMAL;
 	if (semaphore == NULL)
 		return NULL;
 
-	mono_coop_mutex_init (&semaphore->base.mutex);
+	mono_coop_mutex_init (&semaphore->mutex);
 
 	return semaphore;
 }
@@ -24,7 +23,7 @@ void
 mono_lifo_semaphore_delete (LifoSemaphore *semaphore)
 {
 	g_assert (semaphore->head == NULL);
-	mono_coop_mutex_destroy (&semaphore->base.mutex);
+	mono_coop_mutex_destroy (&semaphore->mutex);
 	g_free (semaphore);
 }
 
@@ -34,12 +33,12 @@ mono_lifo_semaphore_timed_wait (LifoSemaphore *semaphore, int32_t timeout_ms)
 	LifoSemaphoreWaitEntry wait_entry = {0};
 
 	mono_coop_cond_init (&wait_entry.condition);
-	mono_coop_mutex_lock (&semaphore->base.mutex);
+	mono_coop_mutex_lock (&semaphore->mutex);
 
-	if (semaphore->base.pending_signals > 0) {
-		--semaphore->base.pending_signals;
+	if (semaphore->pending_signals > 0) {
+		--semaphore->pending_signals;
 		mono_coop_cond_destroy (&wait_entry.condition);
-		mono_coop_mutex_unlock (&semaphore->base.mutex);
+		mono_coop_mutex_unlock (&semaphore->mutex);
 		return 1;
 	}
 
@@ -53,7 +52,7 @@ mono_lifo_semaphore_timed_wait (LifoSemaphore *semaphore, int32_t timeout_ms)
 	// Wait for a signal or timeout
 	int wait_error = 0;
 	do {
-		wait_error = mono_coop_cond_timedwait (&wait_entry.condition, &semaphore->base.mutex, timeout_ms);
+		wait_error = mono_coop_cond_timedwait (&wait_entry.condition, &semaphore->mutex, timeout_ms);
 	} while (wait_error == 0 && !wait_entry.signaled);
 
 	if (wait_error == -1) {
@@ -66,7 +65,7 @@ mono_lifo_semaphore_timed_wait (LifoSemaphore *semaphore, int32_t timeout_ms)
 	}
 
 	mono_coop_cond_destroy (&wait_entry.condition);
-	mono_coop_mutex_unlock (&semaphore->base.mutex);
+	mono_coop_mutex_unlock (&semaphore->mutex);
 
 	return wait_entry.signaled;
 }
@@ -74,7 +73,7 @@ mono_lifo_semaphore_timed_wait (LifoSemaphore *semaphore, int32_t timeout_ms)
 void
 mono_lifo_semaphore_release (LifoSemaphore *semaphore, uint32_t count)
 {
-	mono_coop_mutex_lock (&semaphore->base.mutex);
+	mono_coop_mutex_lock (&semaphore->mutex);
 
 	while (count > 0) {
 		LifoSemaphoreWaitEntry *wait_entry = semaphore->head;
@@ -88,243 +87,10 @@ mono_lifo_semaphore_release (LifoSemaphore *semaphore, uint32_t count)
 			mono_coop_cond_signal (&wait_entry->condition);
 			--count;
 		} else {
-			semaphore->base.pending_signals += count;
+			semaphore->pending_signals += count;
 			count = 0;
 		}
 	}
 
-	mono_coop_mutex_unlock (&semaphore->base.mutex);
+	mono_coop_mutex_unlock (&semaphore->mutex);
 }
-
-#if defined(HOST_BROWSER) && !defined(DISABLE_THREADS)
-
-LifoSemaphoreAsyncWait *
-mono_lifo_semaphore_asyncwait_init (void)
-{
-	LifoSemaphoreAsyncWait *sem = g_new0 (LifoSemaphoreAsyncWait, 1);
-	if (sem == NULL)
-		return NULL;
-	sem->base.kind = LIFO_SEMAPHORE_ASYNCWAIT;
-
-	mono_coop_mutex_init (&sem->base.mutex);
-
-	return sem;
-}
-
-void
-mono_lifo_semaphore_asyncwait_delete (LifoSemaphoreAsyncWait *sem)
-{
-	/* FIXME: this is probably hard to guarantee - in-flight signaled semaphores still have wait entries */
-	g_assert (sem->head == NULL);
-	mono_coop_mutex_destroy (&sem->base.mutex);
-	g_free (sem);
-}
-
-enum {
-	LIFO_JS_WAITING = 0,
-	LIFO_JS_SIGNALED = 1,
-	LIFO_JS_SIGNALED_TIMEOUT_IGNORED = 2,
-
-};
-
-static void
-lifo_js_wait_entry_on_timeout (void *wait_entry_as_user_data);
-static void
-lifo_js_wait_entry_on_success (void *wait_entry_as_user_data);
-
-
-static void
-lifo_js_wait_entry_push (LifoSemaphoreAsyncWaitWaitEntry **head,
-			 LifoSemaphoreAsyncWaitWaitEntry *entry)
-{
-	LifoSemaphoreAsyncWaitWaitEntry *next = *head;
-	*head = entry;
-	entry->next = next;
-	if (next)
-		next->previous = entry;
-}
-
-static void
-lifo_js_wait_entry_unlink (LifoSemaphoreAsyncWaitWaitEntry **head,
-			   LifoSemaphoreAsyncWaitWaitEntry *entry)
-{
-	if (*head == entry) {
-		*head = entry->next;
-	}
-	if (entry->previous) {
-		entry->previous->next = entry->next;
-	}
-	if (entry->next) {
-		entry->next->previous = entry->previous;
-	}
-}
-
-/* LOCKING: assumes semaphore is locked */
-static LifoSemaphoreAsyncWaitWaitEntry *
-lifo_js_find_waiter (LifoSemaphoreAsyncWaitWaitEntry *entry)
-{
-	while (entry) {
-		if (entry->state == LIFO_JS_WAITING)
-			return entry;
-		entry = entry->next;
-	}
-	return NULL;
-}
-
-static gboolean
-lifo_js_wait_entry_no_thread (LifoSemaphoreAsyncWaitWaitEntry *entry,
-			     pthread_t cur)
-{
-	while (entry) {
-		if (pthread_equal (entry->thread, cur))
-			return FALSE;
-		entry = entry->next;
-	}
-	return TRUE;
-}
-
-void
-mono_lifo_semaphore_asyncwait_prepare_wait (LifoSemaphoreAsyncWait *sem,
-				     int32_t timeout_ms,
-				     LifoSemaphoreAsyncWaitCallbackFn success_cb,
-				     LifoSemaphoreAsyncWaitCallbackFn timeout_cb,
-				     intptr_t user_data)
-{
-	mono_coop_mutex_lock (&sem->base.mutex);
-	if (sem->base.pending_signals > 0) {
-		sem->base.pending_signals--;
-		mono_coop_mutex_unlock (&sem->base.mutex);
-		success_cb (sem, user_data); // FIXME: queue microtask
-		return;
-	}
-
-	pthread_t cur = pthread_self ();
-
-	/* Don't allow the current thread to wait multiple times.
-	 * No particular reason for it, except that it makes reasoning a bit easier.
-	 * This can probably be relaxed if there's a need.
-	 */
-	g_assert (lifo_js_wait_entry_no_thread(sem->head, cur));
-
-	LifoSemaphoreAsyncWaitWaitEntry *wait_entry = g_new0 (LifoSemaphoreAsyncWaitWaitEntry, 1);
-	wait_entry->success_cb = success_cb;
-	wait_entry->timeout_cb = timeout_cb;
-	wait_entry->sem = sem;
-	wait_entry->user_data = user_data;
-	wait_entry->thread = pthread_self();
-	wait_entry->state = LIFO_JS_WAITING;
-        wait_entry->refcount = 1; // timeout owns the wait entry
-	wait_entry->js_timeout_id = emscripten_set_timeout (lifo_js_wait_entry_on_timeout, (double)timeout_ms, wait_entry);
-	lifo_js_wait_entry_push (&sem->head, wait_entry);
-	mono_coop_mutex_unlock (&sem->base.mutex);
-	return;
-}
-
-void
-mono_lifo_semaphore_asyncwait_release (LifoSemaphoreAsyncWait *sem,
-				uint32_t count)
-{
-	mono_coop_mutex_lock (&sem->base.mutex);
-
-	while (count > 0) {
-		LifoSemaphoreAsyncWaitWaitEntry *wait_entry = lifo_js_find_waiter (sem->head);
-		if (wait_entry != NULL) {
-			/* found one.  set its status and queue some work to run on the signaled thread */
-			pthread_t target = wait_entry->thread;
-			wait_entry->state = LIFO_JS_SIGNALED;
-			wait_entry->refcount++;
-			// we're under the mutex - if we got here the timeout hasn't fired yet
-			g_assert (wait_entry->refcount == 2); 
-			--count;
-			/* if we're on the same thread, don't run the callback while holding the lock */
-			emscripten_dispatch_to_thread_async (target, EM_FUNC_SIG_VI, lifo_js_wait_entry_on_success, NULL, wait_entry);
-		} else {
-			sem->base.pending_signals += count;
-			count = 0;
-		}
-	}
-
-	mono_coop_mutex_unlock (&sem->base.mutex);
-}
-
-static void
-lifo_js_wait_entry_on_timeout (void *wait_entry_as_user_data)
-{
-	LifoSemaphoreAsyncWaitWaitEntry *wait_entry = (LifoSemaphoreAsyncWaitWaitEntry *)wait_entry_as_user_data;
-	g_assert (pthread_equal (wait_entry->thread, pthread_self()));
-	g_assert (wait_entry->sem != NULL);
-	LifoSemaphoreAsyncWait *sem = wait_entry->sem;
-	gboolean call_timeout_cb = FALSE;
-	LifoSemaphoreAsyncWaitCallbackFn timeout_cb = NULL;
-	intptr_t user_data = 0;
-	MONO_ENTER_GC_UNSAFE;
-	mono_coop_mutex_lock (&sem->base.mutex);
-	switch (wait_entry->state) {
-	case LIFO_JS_WAITING:
-		/* semaphore timed out before a Release. */
-		g_assert (wait_entry->refcount == 1);
-		/* unlink and free the wait entry, run the user timeout_cb. */
-		lifo_js_wait_entry_unlink (&sem->head, wait_entry);
-		timeout_cb = wait_entry->timeout_cb;
-		user_data = wait_entry->user_data;
-		g_free (wait_entry);
-		call_timeout_cb = TRUE;
-		break;
-	case LIFO_JS_SIGNALED:
-		/* seamphore was signaled, but the timeout callback ran before the success callback arrived */
-		g_assert (wait_entry->refcount == 2);
-		/* set state to LIFO_JS_SIGNALED_TIMEOUT_IGNORED, decrement refcount, return */
-		wait_entry->state = LIFO_JS_SIGNALED_TIMEOUT_IGNORED;
-		wait_entry->refcount--;		
-		break;
-	case LIFO_JS_SIGNALED_TIMEOUT_IGNORED:
-	default:
-		g_assert_not_reached();
-	}
-	mono_coop_mutex_unlock (&sem->base.mutex);
-	if (call_timeout_cb) {
-		timeout_cb (sem, user_data);
-	}
-	MONO_EXIT_GC_UNSAFE;
-}
-
-static void
-lifo_js_wait_entry_on_success (void *wait_entry_as_user_data)
-{
-	LifoSemaphoreAsyncWaitWaitEntry *wait_entry = (LifoSemaphoreAsyncWaitWaitEntry *)wait_entry_as_user_data;
-	g_assert (pthread_equal (wait_entry->thread, pthread_self()));
-	g_assert (wait_entry->sem != NULL);
-	LifoSemaphoreAsyncWait *sem = wait_entry->sem;
-	gboolean call_success_cb = FALSE;
-	LifoSemaphoreAsyncWaitCallbackFn success_cb = NULL;
-	intptr_t user_data = 0;
-	MONO_ENTER_GC_UNSAFE;
-	mono_coop_mutex_lock (&sem->base.mutex);
-	switch (wait_entry->state) {
-	case LIFO_JS_SIGNALED:
-		g_assert (wait_entry->refcount == 2);
-		emscripten_clear_timeout (wait_entry->js_timeout_id);
-		/* emscripten safeSetTimeout calls keepalive push which is popped by the timeout
-		 * callback. If we cancel the timeout, we have to pop the keepalive ourselves. */
-		emscripten_runtime_keepalive_pop();
-		wait_entry->refcount--;
-		/* fallthru */
-	case LIFO_JS_SIGNALED_TIMEOUT_IGNORED:
-		g_assert (wait_entry->refcount == 1);
-		lifo_js_wait_entry_unlink (&sem->head, wait_entry);
-		success_cb = wait_entry->success_cb;
-		user_data = wait_entry->user_data;
-		g_free (wait_entry);
-		call_success_cb = TRUE;
-		break;
-	case LIFO_JS_WAITING:
-	default:
-		g_assert_not_reached();
-	}
-	mono_coop_mutex_unlock (&sem->base.mutex);
-	g_assert (call_success_cb);
-	success_cb (sem, user_data);
-	MONO_EXIT_GC_UNSAFE;
-}
-
-#endif /* HOST_BROWSER && !DISABLE_THREADS */
diff --git a/src/mono/mono/utils/lifo-semaphore.h b/src/mono/mono/utils/lifo-semaphore.h
index 1a91a6f4d7c3..ad0492c6defb 100644
--- a/src/mono/mono/utils/lifo-semaphore.h
+++ b/src/mono/mono/utils/lifo-semaphore.h
@@ -3,22 +3,6 @@
 
 #include <mono/utils/mono-coop-mutex.h>
 
-typedef struct _LifoSemaphoreBase LifoSemaphoreBase;
-
-struct _LifoSemaphoreBase
-{
-	MonoCoopMutex mutex;
-	uint32_t pending_signals;
-	uint8_t       kind;
-};
-
-enum {
-	LIFO_SEMAPHORE_NORMAL = 1,
-#if defined(HOST_BROWSER) && !defined(DISABLE_THREADS)
-	LIFO_SEMAPHORE_ASYNCWAIT,
-#endif
-};
-	
 typedef struct _LifoSemaphore LifoSemaphore;
 typedef struct _LifoSemaphoreWaitEntry LifoSemaphoreWaitEntry;
 
@@ -30,7 +14,8 @@ struct _LifoSemaphoreWaitEntry {
 };
 
 struct _LifoSemaphore {
-	LifoSemaphoreBase base;
+	MonoCoopMutex mutex;
+	uint32_t pending_signals;
 	LifoSemaphoreWaitEntry *head;
 };
 
@@ -46,91 +31,4 @@ mono_lifo_semaphore_timed_wait (LifoSemaphore *semaphore, int32_t timeout_ms);
 void
 mono_lifo_semaphore_release (LifoSemaphore *semaphore, uint32_t count);
 
-#if defined(HOST_BROWSER) && !defined(DISABLE_THREADS)
-/* A type of lifo semaphore that can be waited from the JS event loop.
- *
- * Instead of a blocking timed_wait function, it uses a pair of callbacks: a success callback and a
- * timeout callback.  The wait function returns immediately and the callbacks will fire on the JS
- * event loop when the semaphore is released or the timeout expires.
- */
-typedef struct _LifoSemaphoreAsyncWait LifoSemaphoreAsyncWait;
-/*
- * Because the callbacks are asynchronous, it's possible for the same thread to attempt to wait
- * multiple times for the same semaphore.  For simplicity of reasoning, we dissallow that and
- * assert.  In principle we could support it, but we haven't implemented that.
- */
-typedef struct _LifoSemaphoreAsyncWaitWaitEntry LifoSemaphoreAsyncWaitWaitEntry;
-
-typedef void (*LifoSemaphoreAsyncWaitCallbackFn)(LifoSemaphoreAsyncWait *semaphore, intptr_t user_data);
-
-struct _LifoSemaphoreAsyncWaitWaitEntry {
-	LifoSemaphoreAsyncWaitWaitEntry *previous;
-	LifoSemaphoreAsyncWaitWaitEntry *next;
-	LifoSemaphoreAsyncWaitCallbackFn success_cb;
-	LifoSemaphoreAsyncWaitCallbackFn timeout_cb;
-	LifoSemaphoreAsyncWait *sem;
-	intptr_t user_data;
-	pthread_t thread;
-	int32_t js_timeout_id; // only valid to access from the waiting thread
-	/* state and refcount are protected by the semaphore mutex */
-	uint16_t state; /* 0 waiting, 1 signaled, 2 signaled - timeout ignored */
-	uint16_t refcount; /* 1 if waiting, 2 if signaled, 1 if timeout fired while signaled and we're ignoring the timeout */
-};
-	
-struct _LifoSemaphoreAsyncWait {
-	LifoSemaphoreBase base;
-	LifoSemaphoreAsyncWaitWaitEntry *head;
-};
-
-LifoSemaphoreAsyncWait *
-mono_lifo_semaphore_asyncwait_init (void);
-
-/* what to do with waiters?
- * might be kind of academic - we don't expect to destroy these
- */
-void
-mono_lifo_semaphore_asyncwait_delete (LifoSemaphoreAsyncWait *semaphore);
-
-/*
- * the timeout_cb is triggered by a JS setTimeout callback
- *
- * the success_cb is triggered using Emscripten's capability to push async work from one thread to
- * another.  That means the main thread will need to be able to process JS events (in order to
- * assist threads in pushing work from one thread to another) in order for success callbacks to
- * function.  Emscripten also pumps the async work queues in other circumstances (during sleeps) but
- * the main thread still needs to participate.
- *
- * There's a potential race the implementation needs to be careful about:
- *   when one thread releases a semaphore and queues the success callback to run,
- *   while the success callback is in flight, the timeout callback can fire.
- *   It is important that the called back functions don't destroy the wait entry until either both
- *   callbacks have fired, or the success callback has a chance to cancel the timeout callback.
- * 
- * We use a refcount to delimit the lifetime of the wait entry. When the wait is created, the
- * refcount is 1 and it is notionally owned by the timeout callback.  When a sempahore is released,
- * the refcount goes to 2.  When a continuation fires, it decreases the refcount.  If the timeout
- * callback fires first if it sees a refcount of 2 it can decrement and return - we know a success
- * continuation is in flight and we can allow it to complete. If the refcount is 1 we need to take the semaphore's mutex and remove the wait entry. (With double check locking - the refcount could go up).
- *
- * When the success continuation fires,it will examine the refcount. If the refcount is 1 at the
- * outset, then the cancelation already tried to fire while we were in flight.  If the refcount is 2
- * at the outset, then the success contination fired before the timeout, so we can cancel the
- * timeout.  In either case we can remove the wait entry.
- *
- * Both the success and timeout code only calls the user provided callbacks after the wait entry is
- * destroyed.
- *
- * FIXME: should we just always use the mutex to protect the wait entry status+refcount?
- */
-void
-mono_lifo_semaphore_asyncwait_prepare_wait (LifoSemaphoreAsyncWait *semaphore, int32_t timeout_ms,
-				     LifoSemaphoreAsyncWaitCallbackFn success_cb,
-				     LifoSemaphoreAsyncWaitCallbackFn timeout_cb,
-				     intptr_t user_data);
-
-void
-mono_lifo_semaphore_asyncwait_release (LifoSemaphoreAsyncWait *semaphore, uint32_t count);
-
-#endif /* HOST_BROWSER && !DISABLE_THREADS */
-
 #endif // __MONO_LIFO_SEMAPHORE_H__
diff --git a/src/mono/mono/utils/mono-cgroup.c b/src/mono/mono/utils/mono-cgroup.c
index 0a6c96678877..4c58f53257b9 100644
--- a/src/mono/mono/utils/mono-cgroup.c
+++ b/src/mono/mono/utils/mono-cgroup.c
@@ -45,7 +45,6 @@ Module Name:
 #endif
 
 #define CGROUP2_SUPER_MAGIC 0x63677270
-#define TMPFS_MAGIC 0x01021994
 
 #define PROC_MOUNTINFO_FILENAME "/proc/self/mountinfo"
 #define PROC_CGROUP_FILENAME "/proc/self/cgroup"
@@ -219,10 +218,13 @@ findCGroupVersion(void)
 	if (result != 0)
 		return 0;
 
-	switch (stats.f_type) {
-	case TMPFS_MAGIC: return 1;
-	case CGROUP2_SUPER_MAGIC: return 2;
-	default: return 0;
+	if (stats.f_type == CGROUP2_SUPER_MAGIC) {
+		return 2;
+	} else {
+		// Assume that if /sys/fs/cgroup exists and the file system type is not cgroup2fs,
+		// it is cgroup v1. Typically the file system type is tmpfs, but other values have
+		// been seen in the wild.
+		return 1;
 	}
 }
 
diff --git a/src/mono/mono/utils/mono-path.c b/src/mono/mono/utils/mono-path.c
index 4632a74556c3..616fa183d5a5 100644
--- a/src/mono/mono/utils/mono-path.c
+++ b/src/mono/mono/utils/mono-path.c
@@ -44,6 +44,7 @@ mono_path_canonicalize (const char *path)
 	} else {
 		gchar *tmpdir = g_get_current_dir ();
 		abspath = g_build_filename (tmpdir, path, (const char*)NULL);
+		g_assert (abspath);
 		g_free (tmpdir);
 	}
 
@@ -128,6 +129,7 @@ resolve_symlink (const char *path)
 		if (!g_path_is_absolute (buffer)) {
 			dir = g_path_get_dirname (p);
 			concat = g_build_filename (dir, buffer, (const char*)NULL);
+			g_assert (concat);
 			g_free (dir);
 		} else {
 			concat = g_strdup (buffer);
diff --git a/src/mono/mono/utils/mono-threads-posix.c b/src/mono/mono/utils/mono-threads-posix.c
index 23c1e4f3056a..ea4cf7f90fb5 100644
--- a/src/mono/mono/utils/mono-threads-posix.c
+++ b/src/mono/mono/utils/mono-threads-posix.c
@@ -133,15 +133,6 @@ mono_threads_platform_exit (gsize exit_code)
 	pthread_exit ((gpointer) exit_code);
 }
 
-gboolean
-mono_thread_platform_external_eventloop_keepalive_check (void)
-{
-	/* vanilla POSIX thread creation doesn't support an external eventloop: when the thread main
-	   function returns, the thread is done.
-	*/
-	return FALSE;
-}
-
 #if HOST_FUCHSIA
 int
 mono_thread_info_get_system_max_stack_size (void)
diff --git a/src/mono/mono/utils/mono-threads-wasm.c b/src/mono/mono/utils/mono-threads-wasm.c
index ea8faf904c25..597592a7966c 100644
--- a/src/mono/mono/utils/mono-threads-wasm.c
+++ b/src/mono/mono/utils/mono-threads-wasm.c
@@ -45,7 +45,7 @@ wasm_get_stack_size (void)
 	return (guint8*)emscripten_stack_get_base () - (guint8*)emscripten_stack_get_end ();
 }
 
-#else /* WASI */
+#else /* HOST_BROWSER -> WASI */
 
 // TODO after https://github.com/llvm/llvm-project/commit/1532be98f99384990544bd5289ba339bca61e15b
 // use __stack_low && __stack_high
@@ -79,7 +79,7 @@ wasm_get_stack_base (void)
 	// this will need further change for multithreading as the stack will allocated be per thread at different addresses
 }
 
-#endif
+#endif /* HOST_BROWSER */
 
 int
 mono_thread_info_get_system_max_stack_size (void)
@@ -176,6 +176,7 @@ mono_native_thread_set_name (MonoNativeThreadId tid, const char *name)
 {
 #ifndef DISABLE_THREADS
 	// note there is also emscripten_set_thread_name, but it only changes the name for emscripten profiler
+	// this only sets the name for the current thread
 	mono_wasm_pthread_set_name (name);
 #endif
 }
@@ -314,21 +315,6 @@ mono_thread_platform_create_thread (MonoThreadStart thread_fn, gpointer thread_d
 #endif
 }
 
-gboolean
-mono_thread_platform_external_eventloop_keepalive_check (void)
-{
-#if defined(HOST_BROWSER) && !defined(DISABLE_THREADS)
-	MONO_REQ_GC_SAFE_MODE;
-	/* if someone called emscripten_runtime_keepalive_push (), the
-	 * thread will stay alive in the JS event loop after returning
-	 * from the thread's main function.
-	 */
-	return emscripten_runtime_keepalive_check ();
-#else
-	return FALSE;
-#endif
-}
-
 void mono_threads_platform_init (void)
 {
 }
@@ -360,7 +346,6 @@ G_EXTERN_C
 extern void schedule_background_exec (void);
 
 // when this is called from ThreadPool, the cb would be System.Threading.ThreadPool.BackgroundJobHandler
-// when this is called from JSSynchronizationContext, the cb would be System.Runtime.InteropServices.JavaScript.JSSynchronizationContext.BackgroundJobHandler
 // when this is called from sgen it would be wrapper of sgen_perform_collection_inner
 // when this is called from gc, it would be mono_runtime_do_background_work
 #ifdef DISABLE_THREADS
@@ -368,77 +353,24 @@ void
 mono_main_thread_schedule_background_job (background_job_cb cb)
 {
 	g_assert (cb);
-	THREADS_DEBUG ("mono_main_thread_schedule_background_job2: thread %p queued job %p to current thread\n", (gpointer)pthread_self(), (gpointer) cb);
-	mono_current_thread_schedule_background_job (cb);
-}
-#endif /*DISABLE_THREADS*/
-
-#ifndef DISABLE_THREADS
-MonoNativeTlsKey jobs_key;
-#else /* DISABLE_THREADS */
-GSList *jobs;
-#endif /* DISABLE_THREADS */
-
-void
-mono_current_thread_schedule_background_job (background_job_cb cb)
-{
-	g_assert (cb);
-#ifdef DISABLE_THREADS
-
-	if (!jobs)
-		schedule_background_exec ();
+	THREADS_DEBUG ("mono_main_thread_schedule_background_job: thread %p queued job %p to current thread\n", (gpointer)pthread_self(), (gpointer) cb);
 
-	if (!g_slist_find (jobs, (gconstpointer)cb))
-		jobs = g_slist_prepend (jobs, (gpointer)cb);
-
-#else /*DISABLE_THREADS*/
-
-	GSList *jobs = mono_native_tls_get_value (jobs_key);
-	THREADS_DEBUG ("mono_current_thread_schedule_background_job1: thread %p queuing job %p into %p\n", (gpointer)pthread_self(), (gpointer) cb, (gpointer) jobs);
 	if (!jobs)
-	{
-		THREADS_DEBUG ("mono_current_thread_schedule_background_job2: thread %p calling schedule_background_exec before job %p\n", (gpointer)pthread_self(), (gpointer) cb);
 		schedule_background_exec ();
-	}
 
 	if (!g_slist_find (jobs, (gconstpointer)cb))
-	{
 		jobs = g_slist_prepend (jobs, (gpointer)cb);
-		mono_native_tls_set_value (jobs_key, jobs);
-		THREADS_DEBUG ("mono_current_thread_schedule_background_job3: thread %p queued job %p\n", (gpointer)pthread_self(), (gpointer) cb);
-	}
-
-#endif /*DISABLE_THREADS*/
-}
-
-#ifndef DISABLE_THREADS
-void
-mono_target_thread_schedule_background_job (MonoNativeThreadId target_thread, background_job_cb cb)
-{
-	THREADS_DEBUG ("worker %p queued job %p to worker %p \n", (gpointer)pthread_self(), (gpointer) cb, (gpointer) target_thread);
-	// NOTE: here the cb is [UnmanagedCallersOnly] which wraps it with MONO_ENTER_GC_UNSAFE/MONO_EXIT_GC_UNSAFE
-	mono_threads_wasm_async_run_in_target_thread_vi ((pthread_t) target_thread, (void*)mono_current_thread_schedule_background_job, (gpointer)cb);
 }
-#endif /*DISABLE_THREADS*/
 
-G_EXTERN_C
-EMSCRIPTEN_KEEPALIVE void
-mono_background_exec (void);
+GSList *jobs;
 
 G_EXTERN_C
 EMSCRIPTEN_KEEPALIVE void
 mono_background_exec (void)
 {
 	MONO_ENTER_GC_UNSAFE;
-#ifdef DISABLE_THREADS
 	GSList *j = jobs, *cur;
 	jobs = NULL;
-#else /* DISABLE_THREADS */
-	THREADS_DEBUG ("mono_background_exec on thread %p started\n", (gpointer)pthread_self());
-	GSList *jobs = mono_native_tls_get_value (jobs_key);
-	GSList *j = jobs, *cur;
-	mono_native_tls_set_value (jobs_key, NULL);
-#endif /* DISABLE_THREADS */
 
 	for (cur = j; cur; cur = cur->next) {
 		background_job_cb cb = (background_job_cb)cur->data;
@@ -451,13 +383,24 @@ mono_background_exec (void)
 	MONO_EXIT_GC_UNSAFE;
 }
 
+#else /*DISABLE_THREADS*/
+
+extern void mono_wasm_schedule_synchronization_context ();
+
+void mono_target_thread_schedule_synchronization_context(MonoNativeThreadId target_thread)
+{
+	emscripten_dispatch_to_thread_async ((pthread_t) target_thread, EM_FUNC_SIG_V, mono_wasm_schedule_synchronization_context, NULL);
+}
+
+#endif /*DISABLE_THREADS*/
+
 gboolean
 mono_threads_platform_is_main_thread (void)
 {
 #ifdef DISABLE_THREADS
 	return TRUE;
 #else
-	return emscripten_is_main_runtime_thread ();
+	return mono_threads_wasm_is_deputy_thread ();
 #endif
 }
 
@@ -495,7 +438,7 @@ mono_threads_wasm_on_thread_attached (pthread_t tid, const char* thread_name, gb
 #else
 	if (mono_threads_wasm_is_ui_thread ()) {
 		// FIXME: we should not be attaching UI thread with deputy design
-		// but right now we do, because mono_wasm_load_runtime is running in UI thread
+		// but right now we do
 		// g_assert(!mono_threads_wasm_is_ui_thread ());
 		return;
 	}
@@ -540,22 +483,105 @@ mono_threads_wasm_on_thread_registered (void)
 }
 
 #ifndef DISABLE_THREADS
-void
-mono_threads_wasm_async_run_in_ui_thread (void (*func) (void))
+static pthread_t deputy_thread_tid;
+static pthread_t io_thread_tid;
+extern void mono_wasm_start_deputy_thread_async (void);
+extern void mono_wasm_start_io_thread_async (void);
+extern void mono_wasm_trace_logger (const char *log_domain, const char *log_level, const char *message, mono_bool fatal, void *user_data);
+extern void mono_wasm_dump_threads (void);
+
+void mono_wasm_dump_threads_async (void)
 {
-	emscripten_async_run_in_main_runtime_thread (EM_FUNC_SIG_V, func);
+	mono_threads_wasm_async_run_in_target_thread (mono_threads_wasm_ui_thread_tid (), mono_wasm_dump_threads);
 }
 
-void
-mono_threads_wasm_async_run_in_ui_thread_vi (void (*func) (gpointer), gpointer user_data)
+gboolean
+mono_threads_wasm_is_deputy_thread (void)
 {
-	emscripten_async_run_in_main_runtime_thread (EM_FUNC_SIG_VI, func, user_data);
+	return pthread_self () == deputy_thread_tid;
 }
 
-void
-mono_threads_wasm_async_run_in_ui_thread_vii (void (*func) (gpointer, gpointer), gpointer user_data1, gpointer user_data2)
+MonoNativeThreadId
+mono_threads_wasm_deputy_thread_tid (void)
+{
+	return (MonoNativeThreadId) deputy_thread_tid;
+}
+
+// this is running in deputy thread
+static gsize
+deputy_thread_fn (void* unused_arg G_GNUC_UNUSED)
+{
+	deputy_thread_tid = pthread_self ();
+
+	// this will throw JS "unwind"
+	mono_wasm_start_deputy_thread_async();
+	
+	return 0;// never reached
+}
+
+EMSCRIPTEN_KEEPALIVE MonoNativeThreadId
+mono_wasm_create_deputy_thread (void)
+{
+	pthread_create (&deputy_thread_tid, NULL, (void *(*)(void *)) deputy_thread_fn, NULL);
+	return deputy_thread_tid;
+}
+
+gboolean
+mono_threads_wasm_is_io_thread (void)
+{
+	return pthread_self () == io_thread_tid;
+}
+
+MonoNativeThreadId
+mono_threads_wasm_io_thread_tid (void)
+{
+	return (MonoNativeThreadId) io_thread_tid;
+}
+
+// this is running in io thread
+static gsize
+io_thread_fn (void* unused_arg G_GNUC_UNUSED)
+{
+	io_thread_tid = pthread_self ();
+
+	// this will throw JS "unwind"
+	mono_wasm_start_io_thread_async();
+	
+	return 0;// never reached
+}
+
+EMSCRIPTEN_KEEPALIVE MonoNativeThreadId
+mono_wasm_create_io_thread (void)
+{
+	pthread_create (&io_thread_tid, NULL, (void *(*)(void *)) io_thread_fn, NULL);
+	return io_thread_tid;
+}
+
+// TODO ideally we should not need to have UI thread registered as managed
+EMSCRIPTEN_KEEPALIVE void
+mono_wasm_register_ui_thread (void)
+{
+	MonoThread *thread = mono_thread_internal_attach (mono_get_root_domain ());
+	mono_thread_set_state (thread, ThreadState_Background);
+	mono_thread_info_set_flags (MONO_THREAD_INFO_FLAGS_NONE);
+
+	MonoThreadInfo *info = mono_thread_info_current_unchecked ();
+	g_assert (info);
+	info->runtime_thread = TRUE;
+	MONO_ENTER_GC_SAFE_UNBALANCED;
+}
+
+EMSCRIPTEN_KEEPALIVE void
+mono_wasm_register_io_thread (void)
 {
-	emscripten_async_run_in_main_runtime_thread (EM_FUNC_SIG_VII, func, user_data1, user_data2);
+	MonoThread *thread = mono_thread_internal_attach (mono_get_root_domain ());
+	mono_thread_set_state (thread, ThreadState_Background);
+	mono_thread_info_set_flags (MONO_THREAD_INFO_FLAGS_NONE);
+
+	MonoThreadInfo *info = mono_thread_info_current_unchecked ();
+	g_assert (info);
+	info->runtime_thread = TRUE;
+	MONO_ENTER_GC_SAFE_UNBALANCED;
 }
 
 void
@@ -576,18 +602,33 @@ mono_threads_wasm_async_run_in_target_thread_vii (pthread_t target_thread, void
 	emscripten_dispatch_to_thread_async (target_thread, EM_FUNC_SIG_VII, func, NULL, user_data1, user_data2);
 }
 
-static void mono_threads_wasm_sync_run_in_target_thread_vii_cb (MonoCoopSem *done, void (*func) (gpointer, gpointer), gpointer user_data1, gpointer user_data2)
+static void mono_threads_wasm_sync_run_in_target_thread_vii_cb (MonoCoopSem *done, void (*func) (gpointer, gpointer), gpointer user_data1, void* args)
+{
+	// in UI thread we postpone the execution via safeSetTimeout so that emscripten_proxy_execute_queue is not blocked by this call
+	// see invoke_later_on_ui_thread
+	if (mono_threads_wasm_is_ui_thread()) {
+		MonoCoopSem **semPtrPtr = (MonoCoopSem **)(((char *) args) + 28/*JSMarshalerArgumentOffsets.SyncDoneSemaphorePtr*/);
+		*semPtrPtr = done;
+		func (user_data1, args);
+	}
+	else {
+		func (user_data1, args);
+		mono_coop_sem_post (done);
+	}
+}
+
+EMSCRIPTEN_KEEPALIVE void 
+mono_threads_wasm_sync_run_in_target_thread_done (MonoCoopSem *sem)
 {
-	func (user_data1, user_data2);
-	mono_coop_sem_post (done);
+	mono_coop_sem_post (sem);
 }
 
 void
-mono_threads_wasm_sync_run_in_target_thread_vii (pthread_t target_thread, void (*func) (gpointer, gpointer), gpointer user_data1, gpointer user_data2)
+mono_threads_wasm_sync_run_in_target_thread_vii (pthread_t target_thread, void (*func) (gpointer, gpointer), gpointer user_data1, gpointer args)
 {
 	MonoCoopSem sem;
 	mono_coop_sem_init (&sem, 0);
-	emscripten_dispatch_to_thread_async (target_thread, EM_FUNC_SIG_VIIII, mono_threads_wasm_sync_run_in_target_thread_vii_cb, NULL, &sem, func, user_data1, user_data2);
+	emscripten_dispatch_to_thread_async (target_thread, EM_FUNC_SIG_VIIII, mono_threads_wasm_sync_run_in_target_thread_vii_cb, NULL, &sem, func, user_data1, args);
 
 	MONO_ENTER_GC_UNSAFE;
 	mono_coop_sem_wait (&sem, MONO_SEM_FLAGS_NONE);
diff --git a/src/mono/mono/utils/mono-threads-wasm.h b/src/mono/mono/utils/mono-threads-wasm.h
index 1c4934c2f9b4..927c5b0eb0ea 100644
--- a/src/mono/mono/utils/mono-threads-wasm.h
+++ b/src/mono/mono/utils/mono-threads-wasm.h
@@ -27,21 +27,33 @@ MonoNativeThreadId
 mono_threads_wasm_ui_thread_tid (void);
 
 #ifndef DISABLE_THREADS
-/**
- * Runs the given function asynchronously on the main thread.
- * See emscripten/threading.h emscripten_async_run_in_main_runtime_thread
- */
-void
-mono_threads_wasm_async_run_in_ui_thread (void (*func) (void));
 
-/*
- * Variant that takes an argument. Add more variants as needed.
- */
+void 
+mono_wasm_dump_threads_async (void);
+
+gboolean
+mono_threads_wasm_is_deputy_thread (void);
+
+gboolean
+mono_threads_wasm_is_io_thread (void);
+
+MonoNativeThreadId
+mono_threads_wasm_deputy_thread_tid (void);
+
+MonoNativeThreadId
+mono_threads_wasm_io_thread_tid (void);
+
+MonoNativeThreadId
+mono_wasm_create_deputy_thread (void);
+
+MonoNativeThreadId
+mono_wasm_create_io_thread (void);
+
 void
-mono_threads_wasm_async_run_in_ui_thread_vi (void (*func)(gpointer), gpointer user_data);
+mono_wasm_register_ui_thread (void);
 
 void
-mono_threads_wasm_async_run_in_ui_thread_vii (void (*func)(gpointer, gpointer), gpointer user_data1, gpointer user_data2);
+mono_wasm_register_io_thread (void);
 
 void
 mono_threads_wasm_async_run_in_target_thread (pthread_t target_thread, void (*func) (void));
@@ -53,7 +65,10 @@ void
 mono_threads_wasm_async_run_in_target_thread_vii (pthread_t target_thread, void (*func) (gpointer, gpointer), gpointer user_data1, gpointer user_data2);
 
 void
-mono_threads_wasm_sync_run_in_target_thread_vii (pthread_t target_thread, void (*func) (gpointer, gpointer), gpointer user_data1, gpointer user_data2);
+mono_threads_wasm_sync_run_in_target_thread_vii (pthread_t target_thread, void (*func) (gpointer, gpointer), gpointer user_data1, gpointer args);
+
+void 
+mono_threads_wasm_sync_run_in_target_thread_done (MonoCoopSem *sem);
 
 static inline
 int32_t
@@ -72,9 +87,9 @@ mono_wasm_atomic_wait_i32 (volatile int32_t *addr, int32_t expected, int32_t tim
 	return __builtin_wasm_memory_atomic_wait32((int32_t*)addr, expected, timeout_ns);
 }
 
-extern MonoNativeTlsKey jobs_key;
 #else /* DISABLE_THREADS */
 extern GSList *jobs;
+void mono_background_exec (void);
 #endif /* DISABLE_THREADS */
 
 void
diff --git a/src/mono/mono/utils/mono-threads-windows.c b/src/mono/mono/utils/mono-threads-windows.c
index 169449b831e8..3e56205c0ab8 100644
--- a/src/mono/mono/utils/mono-threads-windows.c
+++ b/src/mono/mono/utils/mono-threads-windows.c
@@ -501,15 +501,6 @@ typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
 static gboolean is_wow64 = FALSE;
 #endif
 
-gboolean
-mono_thread_platform_external_eventloop_keepalive_check (void)
-{
-	/* We don't support thread creation with an external eventloop on WIN32: when the thread start
-	   function returns, the thread is done.
-	*/
-	return FALSE;
-}
-
 /* We do this at init time to avoid potential races with module opening */
 void
 mono_threads_platform_init (void)
diff --git a/src/mono/mono/utils/mono-threads.c b/src/mono/mono/utils/mono-threads.c
index 515cde6eebad..144feb388787 100644
--- a/src/mono/mono/utils/mono-threads.c
+++ b/src/mono/mono/utils/mono-threads.c
@@ -281,6 +281,12 @@ mono_threads_end_global_suspend (void)
 static void
 dump_threads (void)
 {
+#ifdef HOST_BROWSER
+#ifndef DISABLE_THREADS
+	mono_wasm_dump_threads_async ();
+#endif
+#endif
+
 	MonoThreadInfo *cur = mono_thread_info_current ();
 
 	g_async_safe_printf ("STATE CUE CARD: (? means a positive number, usually 1 or 2, * means any number)\n");
@@ -517,12 +523,6 @@ register_thread (MonoThreadInfo *info)
 	g_assert (staddr);
 #endif /* TARGET_WASM */
 
-#ifdef HOST_WASM
-#ifndef DISABLE_THREADS
-	mono_native_tls_set_value (jobs_key, NULL);
-#endif /* DISABLE_THREADS */
-#endif /* HOST_WASM */
-
 	g_assert (stsize);
 	info->stack_start_limit = staddr;
 	info->stack_end = staddr + stsize;
@@ -973,12 +973,6 @@ mono_thread_info_init (size_t info_size)
 
 	mono_threads_suspend_policy_init ();
 
-#ifdef HOST_WASM
-#ifndef DISABLE_THREADS
-	res = mono_native_tls_alloc (&jobs_key, NULL);
-#endif /* DISABLE_THREADS */
-#endif /* HOST_BROWSER */
-
 #ifdef HOST_WIN32
 	res = mono_native_tls_alloc (&thread_info_key, NULL);
 	res = mono_native_tls_alloc (&thread_exited_key, NULL);
diff --git a/src/mono/mono/utils/mono-threads.h b/src/mono/mono/utils/mono-threads.h
index a738460f5895..8410e43ef930 100644
--- a/src/mono/mono/utils/mono-threads.h
+++ b/src/mono/mono/utils/mono-threads.h
@@ -634,9 +634,6 @@ gboolean mono_threads_platform_in_critical_region (THREAD_INFO_TYPE *info);
 gboolean mono_threads_platform_yield (void);
 void mono_threads_platform_exit (gsize exit_code);
 
-gboolean
-mono_thread_platform_external_eventloop_keepalive_check (void);
-
 void mono_threads_coop_begin_global_suspend (void);
 void mono_threads_coop_end_global_suspend (void);
 
@@ -850,9 +847,9 @@ void mono_threads_join_unlock (void);
 typedef void (*background_job_cb)(void);
 #ifdef DISABLE_THREADS
 void mono_main_thread_schedule_background_job (background_job_cb cb);
+#else
+void mono_target_thread_schedule_synchronization_context(MonoNativeThreadId target_thread);
 #endif // DISABLE_THREADS
-void mono_current_thread_schedule_background_job (background_job_cb cb);
-void mono_target_thread_schedule_background_job (MonoNativeThreadId target_thread, background_job_cb cb);
 #endif
 
 #ifdef USE_WINDOWS_BACKEND
diff --git a/src/mono/mono/utils/options-def.h b/src/mono/mono/utils/options-def.h
index 8f3ec6d47c4e..8cdbc942e1b8 100644
--- a/src/mono/mono/utils/options-def.h
+++ b/src/mono/mono/utils/options-def.h
@@ -58,13 +58,13 @@ DEFINE_BOOL_READONLY(readonly_flag, "readonly-flag", FALSE, "Example")
 */
 
 DEFINE_BOOL(wasm_exceptions, "wasm-exceptions", FALSE, "Enable codegen for WASM exceptions")
-DEFINE_BOOL(wasm_gc_safepoints, "wasm-gc-safepoints", FALSE, "Use GC safepoints on WASM")
 DEFINE_BOOL(aot_lazy_assembly_load, "aot-lazy-assembly-load", FALSE, "Load assemblies referenced by AOT images lazily")
 
 #if HOST_BROWSER
 DEFINE_BOOL(interp_pgo_recording, "interp-pgo-recording", TRUE, "Record interpreter tiering information for automatic PGO")
 #else
 DEFINE_BOOL(interp_pgo_recording, "interp-pgo-recording", FALSE, "Record interpreter tiering information for automatic PGO")
+DEFINE_BOOL(wasm_gc_safepoints, "wasm-gc-safepoints", FALSE, "Use GC safepoints on WASM")
 #endif
 DEFINE_BOOL(interp_pgo_logging, "interp-pgo-logging", FALSE, "Log messages when interpreter PGO optimizes a method or updates its table")
 DEFINE_BOOL(interp_codegen_timing, "interp-codegen-timing", FALSE, "Measure time spent generating interpreter code and log it periodically")
@@ -79,13 +79,17 @@ DEFINE_BOOL(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", TRUE, "JIT
 DEFINE_BOOL(jiterpreter_interp_entry_enabled, "jiterpreter-interp-entry-enabled", TRUE, "JIT specialized WASM interp_entry wrappers")
 // jit_call_enabled controls whether do_jit_call will use specialized trampolines for hot call sites
 DEFINE_BOOL(jiterpreter_jit_call_enabled, "jiterpreter-jit-call-enabled", TRUE, "JIT specialized WASM do_jit_call trampolines")
+
+DEFINE_BOOL(wasm_gc_safepoints, "wasm-gc-safepoints", FALSE, "Use GC safepoints on WASM")
 #else
 // traces_enabled controls whether the jiterpreter will JIT individual interpreter opcode traces
-DEFINE_BOOL(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", TRUE, "JIT interpreter opcode traces into WASM")
+DEFINE_BOOL_READONLY(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", FALSE, "JIT interpreter opcode traces into WASM")
 // interp_entry_enabled controls whether specialized interp_entry wrappers will be jitted
 DEFINE_BOOL_READONLY(jiterpreter_interp_entry_enabled, "jiterpreter-interp-entry-enabled", FALSE, "JIT specialized WASM interp_entry wrappers")
 // jit_call_enabled controls whether do_jit_call will use specialized trampolines for hot call sites
 DEFINE_BOOL_READONLY(jiterpreter_jit_call_enabled, "jiterpreter-jit-call-enabled", FALSE, "JIT specialized WASM do_jit_call trampolines")
+
+DEFINE_BOOL_READONLY(wasm_gc_safepoints, "wasm-gc-safepoints", TRUE, "Use GC safepoints on WASM")
 #endif // DISABLE_THREADS
 
 // enables using WASM try/catch_all instructions where appropriate (currently only do_jit_call),
@@ -121,6 +125,10 @@ DEFINE_BOOL(jiterpreter_backward_branches_enabled, "jiterpreter-backward-branche
 DEFINE_BOOL(jiterpreter_enable_simd, "jiterpreter-simd-enabled", TRUE, "Attempt to use WebAssembly SIMD support")
 // Since the zero page is unallocated, loading array/string/span lengths from null ptrs will yield zero
 DEFINE_BOOL(jiterpreter_zero_page_optimization, "jiterpreter-zero-page-optimization", TRUE, "Exploit the zero page being unallocated to optimize out null checks")
+// We can produce higher quality code by embedding known constants directly into traces instead of loading
+//  the constant from its storage location in the interpreter's locals in memory, even if we can't skip
+//  the write of the constant into memory.
+DEFINE_BOOL(jiterpreter_constant_propagation, "jiterpreter-constant-propagation", TRUE, "Propagate ldc.i4 and ldloca expressions forward to locations where those constants are used")
 // When compiling a jit_call wrapper, bypass sharedvt wrappers if possible by inlining their
 //  logic into the compiled wrapper and calling the target AOTed function with native call convention
 DEFINE_BOOL(jiterpreter_direct_jit_call, "jiterpreter-direct-jit-calls", TRUE, "Bypass gsharedvt wrappers when compiling JIT call wrappers")
diff --git a/src/mono/mono/utils/options.c b/src/mono/mono/utils/options.c
index bf092372828c..2a8dc6f60480 100644
--- a/src/mono/mono/utils/options.c
+++ b/src/mono/mono/utils/options.c
@@ -121,12 +121,13 @@ get_option_hash (void)
  *   Set options based on the command line arguments in ARGV/ARGC.
  * Remove processed arguments from ARGV and set *OUT_ARGC to the
  * number of remaining arguments.
+ * If PROCESSED is != NULL, add the processed arguments to it.
  *
  * NOTE: This only sets the variables, the caller might need to do
  * additional processing based on the new values of the variables.
  */
 void
-mono_options_parse_options (const char **argv, int argc, int *out_argc, MonoError *error)
+mono_options_parse_options (const char **argv, int argc, int *out_argc, GPtrArray *processed, MonoError *error)
 {
 	int aindex = 0;
 	GHashTable *option_hash = NULL;
@@ -187,6 +188,8 @@ mono_options_parse_options (const char **argv, int argc, int *out_argc, MonoErro
 				break;
 			}
 			*(gboolean*)option->addr = negate ? FALSE : TRUE;
+			if (processed)
+				g_ptr_array_add (processed, (gpointer)argv [aindex]);
 			argv [aindex] = NULL;
 			break;
 		}
@@ -202,12 +205,18 @@ mono_options_parse_options (const char **argv, int argc, int *out_argc, MonoErro
 					break;
 				}
 				value = argv [aindex + 1];
+				if (processed) {
+					g_ptr_array_add (processed, (gpointer)argv [aindex]);
+					g_ptr_array_add (processed, (gpointer)argv [aindex + 1]);
+				}
 				argv [aindex] = NULL;
 				argv [aindex + 1] = NULL;
 				aindex ++;
 			} else if (equals_sign_index != -1) {
 				// option=value
 				value = arg + equals_sign_index + 1;
+				if (processed)
+					g_ptr_array_add (processed, (gpointer)argv [aindex]);
 				argv [aindex] = NULL;
 			} else {
 				g_assert_not_reached ();
diff --git a/src/mono/mono/utils/options.h b/src/mono/mono/utils/options.h
index 41090e3897ca..e7f2906eeb05 100644
--- a/src/mono/mono/utils/options.h
+++ b/src/mono/mono/utils/options.h
@@ -26,7 +26,7 @@ extern int mono_options_version;
 
 void mono_options_print_usage (void);
 
-void mono_options_parse_options (const char **args, int argc, int *out_argc, MonoError *error);
+void mono_options_parse_options (const char **args, int argc, int *out_argc, GPtrArray *processed, MonoError *error);
 
 /* returns a json blob representing the current values of all options */
 char * mono_options_get_as_json (void);
diff --git a/src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/README.md b/src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/README.md
index c8a46cb59461..a611a20f7cd0 100644
--- a/src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/README.md
+++ b/src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/README.md
@@ -5,11 +5,34 @@ Tasks, and targets to support workload testing in `dotnet` repositories.
 - `$(InstallWorkloadForTesting)` - required
 - `$(BuiltNuGetsDir)` - required
 - `$(DotNetInstallArgumentsForWorkloadsTesting)` - required
+- `$(TemplateNuGetConfigPathForWorkloadTesting)` - required
+
 - `$(TestUsingWorkloads)` - optional
 - `$(SkipTempDirectoryCleanup)` - optional
 - `$(VersionBandForManifestPackages)` - optional
 - `$(ExtraWorkloadInstallCommandArguments)` - optional
+- `$(WorkloadInstallCommandOutputImportance)` - optional, defaults to `Normal`
+
+## `$(PackageSourceNameForBuiltPackages)` - optional
+
+`<add key="<$sourceName>" value="file:///..." />`
+
+Defaults to `nuget-local`.
+
+## `$(NuGetConfigPackageSourceMappingsForWorkloadTesting)` - optional
+
+For a value of `*Aspire*;Foo*`, a package source mapping will be added to the local nuget source
+added for built nugets:
+
+```xml
+  <packageSourceMapping>
+    <packageSource key="nuget-local">
+      <package pattern="*Aspire*" />
+      <package pattern="Foo*" />
+    </packageSource>
+    ...
+```
 
-## items
+# items
 
 - `@(DefaultPropertiesForNuGetBuild)`
diff --git a/src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/Sdk/WorkloadTesting.Core.targets b/src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/Sdk/WorkloadTesting.Core.targets
index dca02e054376..d853cb881eb8 100755
--- a/src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/Sdk/WorkloadTesting.Core.targets
+++ b/src/mono/nuget/Microsoft.NET.Runtime.WorkloadTesting.Internal/Sdk/WorkloadTesting.Core.targets
@@ -12,6 +12,9 @@
 
     <SdkWithWorkloadStampPath>$(SdkWithWorkloadForTestingPath)version-$(SdkVersionForWorkloadTesting).stamp</SdkWithWorkloadStampPath>
     <SdkWithWorkload_WorkloadStampPath>$(SdkWithWorkloadForTestingPath)workload.stamp</SdkWithWorkload_WorkloadStampPath>
+
+    <TemplateNuGetConfigPathForWorkloadTesting Condition="'$(TemplateNuGetConfigPathForWorkloadTesting)' == '' and '$(RepoRoot)' != ''">$(RepoRoot)NuGet.config</TemplateNuGetConfigPathForWorkloadTesting>
+    <InstallWorkloadUsingArtifactsAfterThisTarget Condition="'$(InstallWorkloadUsingArtifactsAfterThisTarget)' == ''">ArchiveTests</InstallWorkloadUsingArtifactsAfterThisTarget>
   </PropertyGroup>
 
   <PropertyGroup Condition="'$(InstallWorkloadForTesting)' == 'true'">
@@ -25,12 +28,12 @@
     <PreparePackagesForWorkloadInstall Condition="'$(PreparePackagesForWorkloadInstall)' == ''">true</PreparePackagesForWorkloadInstall>
     <InstallWorkloadUsingArtifactsDependsOn>
       $(InstallWorkloadUsingArtifactsDependsOn);
+      GetNuGetsToBuildForWorkloadTesting;
+      _PreparePackagesForWorkloadInstall;
       ProvisionDotNetForWorkloadTesting;
       _GetDotNetVersion;
       _FirstDotNetRun;
       _SetPackageVersionForWorkloadsTesting;
-      GetNuGetsToBuildForWorkloadTesting;
-      _PreparePackagesForWorkloadInstall;
       GetWorkloadInputs;
       _InstallWorkloads
     </InstallWorkloadUsingArtifactsDependsOn>
@@ -159,8 +162,12 @@
              TaskName="Microsoft.Workload.Build.Tasks.InstallWorkloadFromArtifacts"
              AssemblyFile="$(WorkloadBuildTasksAssemblyPath)" />
 
+  <UsingTask Condition="'$(WorkloadBuildTasksAssemblyPath)' != ''"
+             TaskName="Microsoft.Workload.Build.Tasks.PatchNuGetConfig"
+             AssemblyFile="$(WorkloadBuildTasksAssemblyPath)" />
+
   <Target Name="InstallWorkloadUsingArtifacts"
-          AfterTargets="ArchiveTests"
+          AfterTargets="$(InstallWorkloadUsingArtifactsAfterThisTarget)"
           DependsOnTargets="$(InstallWorkloadUsingArtifactsDependsOn)"
           Condition="'$(InstallWorkloadForTesting)' == 'true'" />
 
@@ -198,8 +205,11 @@
                      VersionBandForManifestPackages="$(VersionBandForManifestPackages)"
                      LocalNuGetsPath="$(BuiltNugetsDir)"
                      ExtraWorkloadInstallCommandArguments="$(ExtraWorkloadInstallCommandArguments)"
-                     TemplateNuGetConfigPath="$(RepoRoot)NuGet.config"
+                     PackageSourceNameForBuiltPackages="$(PackageSourceNameForBuiltPackages)"
+                     TemplateNuGetConfigPath="$(TemplateNuGetConfigPathForWorkloadTesting)"
+                     NuGetConfigPackageSourceMappings="$(NuGetConfigPackageSourceMappingsForWorkloadTesting)"
                      SdkWithNoWorkloadInstalledPath="$(_SdkWithNoWorkloadPath)"
+                     WorkloadInstallCommandOutputImportance="$(WorkloadInstallCommandOutputImportance)"
                      IntermediateOutputPath="$(ArtifactsObjDir)"
                      SkipTempDirectoryCleanup="$(SkipTempDirectoryCleanup)"
       />
diff --git a/src/mono/nuget/Microsoft.NET.Sdk.WebAssembly.Pack/build/Microsoft.NET.Sdk.WebAssembly.Browser.targets b/src/mono/nuget/Microsoft.NET.Sdk.WebAssembly.Pack/build/Microsoft.NET.Sdk.WebAssembly.Browser.targets
index 28517e9c58ba..fcf283375f82 100644
--- a/src/mono/nuget/Microsoft.NET.Sdk.WebAssembly.Pack/build/Microsoft.NET.Sdk.WebAssembly.Browser.targets
+++ b/src/mono/nuget/Microsoft.NET.Sdk.WebAssembly.Pack/build/Microsoft.NET.Sdk.WebAssembly.Browser.targets
@@ -73,10 +73,6 @@ Copyright (c) .NET Foundation. All rights reserved.
     <PreserveCompilationReferences>false</PreserveCompilationReferences>
     <IsWebConfigTransformDisabled>true</IsWebConfigTransformDisabled>
 
-    <!-- JS Modules -->
-    <!-- We disable the manifest generation because we are going to inline the modules in the blazor.boot.json manifest -->
-    <GenerateJSModuleManifest>false</GenerateJSModuleManifest>
-
     <EnableDefaultWasmAssembliesToBundle>false</EnableDefaultWasmAssembliesToBundle>
     <WasmNestedPublishAppDependsOn>_GatherWasmFilesToPublish;$(WasmNestedPublishAppDependsOn)</WasmNestedPublishAppDependsOn>
     <_WasmNestedPublishAppPreTarget>ComputeFilesToPublish</_WasmNestedPublishAppPreTarget>
@@ -95,10 +91,6 @@ Copyright (c) .NET Foundation. All rights reserved.
       $(ResolveStaticWebAssetsInputsDependsOn);
       _AddWasmStaticWebAssets;
     </ResolveStaticWebAssetsInputsDependsOn>
-    <StaticWebAssetsPrepareForRunDependsOn>
-      _GenerateBuildWasmBootJson;
-      $(StaticWebAssetsPrepareForRunDependsOn)
-    </StaticWebAssetsPrepareForRunDependsOn>
     <ResolvePublishStaticWebAssetsDependsOn Condition="'$(WasmBuildingForNestedPublish)' != 'true'">
       $(ResolvePublishStaticWebAssetsDependsOn);
       ProcessPublishFilesForWasm;
@@ -114,19 +106,17 @@ Copyright (c) .NET Foundation. All rights reserved.
     <ResolveCompressedFilesDependsOn>
       $(ResolveCompressedFilesDependsOn);
       ResolveWasmOutputs;
+      _GenerateBuildWasmBootJson;
       _AddWasmStaticWebAssets;
     </ResolveCompressedFilesDependsOn>
-    <ResolveCompressedFilesForPublishDependsOn>
+    <ResolveCompressedFilesForPublishDependsOn Condition="'$(WasmBuildingForNestedPublish)' != 'true'">
       $(ResolveCompressedFilesForPublishDependsOn);
       ProcessPublishFilesForWasm;
       ComputeWasmExtensions;
+      GeneratePublishWasmBootJson;
       _AddPublishWasmBootJsonToStaticWebAssets;
     </ResolveCompressedFilesForPublishDependsOn>
-    <CompressFilesDependsOn>
-      $(CompressFilesDependsOn)
-      _GenerateBuildWasmBootJson;
-    </CompressFilesDependsOn>
-    <CompressFilesForPublishDependsOn>
+    <CompressFilesForPublishDependsOn Condition="'$(WasmBuildingForNestedPublish)' != 'true'">
       $(CompressFilesForPublishDependsOn);
       GeneratePublishWasmBootJson;
     </CompressFilesForPublishDependsOn>
@@ -138,11 +128,8 @@ Copyright (c) .NET Foundation. All rights reserved.
     </AddWasmStaticWebAssetsDependsOn>
     <GenerateBuildWasmBootJsonDependsOn>
       $(GenerateBuildWasmBootJsonDependsOn);
-      ResolveStaticWebAssetsInputs;
+      ResolveWasmOutputs;
     </GenerateBuildWasmBootJsonDependsOn>
-    <GeneratePublishWasmBootJsonDependsOn>
-      $(GeneratePublishWasmBootJsonDependsOn);
-    </GeneratePublishWasmBootJsonDependsOn>
   </PropertyGroup>
 
   <Target Name="_WasmNativeForBuild" DependsOnTargets="_GatherWasmFilesToBuild;WasmBuildApp" Condition="'$(UsingBrowserRuntimeWorkload)' == 'true'" />
@@ -203,9 +190,6 @@ Copyright (c) .NET Foundation. All rights reserved.
       <_BlazorWebAssemblyStartupMemoryCache>$(BlazorWebAssemblyStartupMemoryCache)</_BlazorWebAssemblyStartupMemoryCache>
       <_BlazorWebAssemblyJiterpreter>$(BlazorWebAssemblyJiterpreter)</_BlazorWebAssemblyJiterpreter>
       <_BlazorWebAssemblyRuntimeOptions>$(BlazorWebAssemblyRuntimeOptions)</_BlazorWebAssemblyRuntimeOptions>
-      <_WasmDebugLevel>$(WasmDebugLevel)</_WasmDebugLevel>
-      <_WasmDebugLevel Condition="'$(_WasmDebugLevel)' == ''">0</_WasmDebugLevel>
-      <_WasmDebugLevel Condition="'$(_WasmDebugLevel)' == '0' and ('$(DebuggerSupport)' == 'true' or '$(Configuration)' == 'Debug')">-1</_WasmDebugLevel>
 
       <!-- Workaround for https://github.com/dotnet/sdk/issues/12114-->
       <PublishDir Condition="'$(AppendRuntimeIdentifierToOutputPath)' != 'true' AND '$(PublishDir)' == '$(OutputPath)$(RuntimeIdentifier)\$(PublishDirName)\'">$(OutputPath)$(PublishDirName)\</PublishDir>
@@ -223,12 +207,15 @@ Copyright (c) .NET Foundation. All rights reserved.
   </Target>
 
   <Target Name="_ResolveWasmOutputs" DependsOnTargets="ResolveReferences;PrepareResourceNames;ComputeIntermediateSatelliteAssemblies;_ResolveWasmConfiguration;_WasmNativeForBuild;_GetWasmRuntimePackVersion">
+    <PropertyGroup>
+      <_WasmNativeAssetFileNames>;@(WasmNativeAsset->'%(FileName)%(Extension)');</_WasmNativeAssetFileNames>
+    </PropertyGroup>
+
     <ItemGroup>
       <_WasmConfigFileCandidates Include="@(StaticWebAsset)" Condition="'%(SourceType)' == 'Discovered'" />
 
       <!-- Remove dotnet.js/wasm from runtime pack, in favor of the relinked ones in @(WasmNativeAsset) -->
-      <ReferenceCopyLocalPaths Remove="@(ReferenceCopyLocalPaths)"
-                               Condition="@(WasmNativeAsset->Count()) > 0 and ( '%(FileName)' == 'dotnet' or '%(FileName)' == 'dotnet.native' ) and ('%(Extension)' == '.wasm' or '%(Extension)' == '.js')" />
+      <ReferenceCopyLocalPaths Remove="@(ReferenceCopyLocalPaths)" Condition="$(_WasmNativeAssetFileNames.Contains(';%(FileName)%(Extension);'))" />
     </ItemGroup>
 
     <PropertyGroup>
@@ -299,31 +286,6 @@ Copyright (c) .NET Foundation. All rights reserved.
       <ReferenceCopyLocalPaths Remove="@(_WasmBuildFilesToRemove)" />
     </ItemGroup>
 
-    <PropertyGroup>
-      <_WasmBuildBootJsonPath>$(IntermediateOutputPath)blazor.boot.json</_WasmBuildBootJsonPath>
-    </PropertyGroup>
-
-    <ItemGroup>
-      <_BuildWasmBootJson
-        Include="$(_WasmBuildBootJsonPath)"
-        RelativePath="_framework/blazor.boot.json" />
-    </ItemGroup>
-
-    <DefineStaticWebAssets
-      CandidateAssets="@(_BuildWasmBootJson)"
-      SourceId="$(PackageId)"
-      SourceType="Computed"
-      AssetKind="Build"
-      AssetRole="Primary"
-      AssetTraitName="WasmResource"
-      AssetTraitValue="manifest"
-      CopyToOutputDirectory="PreserveNewest"
-      CopyToPublishDirectory="Never"
-      ContentRoot="$(OutDir)wwwroot"
-      BasePath="$(StaticWebAssetBasePath)"
-    >
-      <Output TaskParameter="Assets" ItemName="_BuildWasmBootJsonStaticWebAsset" />
-    </DefineStaticWebAssets>
   </Target>
 
   <Target Name="_AddWasmStaticWebAssets" DependsOnTargets="$(AddWasmStaticWebAssetsDependsOn)">
@@ -364,7 +326,7 @@ Copyright (c) .NET Foundation. All rights reserved.
       AssemblyPath="@(IntermediateAssembly)"
       Resources="@(_WasmOutputWithHash)"
       DebugBuild="true"
-      DebugLevel="$(_WasmDebugLevel)"
+      DebugLevel="$(WasmDebugLevel)"
       LinkerEnabled="false"
       CacheBootResources="$(BlazorCacheBootResources)"
       OutputPath="$(_WasmBuildBootJsonPath)"
@@ -380,11 +342,39 @@ Copyright (c) .NET Foundation. All rights reserved.
       Extensions="@(WasmBootConfigExtension)"
       TargetFrameworkVersion="$(TargetFrameworkVersion)"
       ModuleAfterConfigLoaded="@(WasmModuleAfterConfigLoaded)"
-      ModuleAfterRuntimeReady="@(WasmModuleAfterRuntimeReady)" />
+      ModuleAfterRuntimeReady="@(WasmModuleAfterRuntimeReady)"
+      IsPublish="false" />
 
     <ItemGroup>
       <FileWrites Include="$(_WasmBuildBootJsonPath)" />
     </ItemGroup>
+
+    <PropertyGroup>
+      <_WasmBuildBootJsonPath>$(IntermediateOutputPath)blazor.boot.json</_WasmBuildBootJsonPath>
+    </PropertyGroup>
+
+    <ItemGroup>
+      <_BuildWasmBootJson
+        Include="$(_WasmBuildBootJsonPath)"
+        RelativePath="_framework/blazor.boot.json" />
+    </ItemGroup>
+
+    <DefineStaticWebAssets
+      CandidateAssets="@(_BuildWasmBootJson)"
+      SourceId="$(PackageId)"
+      SourceType="Computed"
+      AssetKind="Build"
+      AssetRole="Primary"
+      AssetTraitName="WasmResource"
+      AssetTraitValue="manifest"
+      CopyToOutputDirectory="PreserveNewest"
+      CopyToPublishDirectory="Never"
+      ContentRoot="$(OutDir)wwwroot"
+      BasePath="$(StaticWebAssetBasePath)"
+    >
+      <Output TaskParameter="Assets" ItemName="_BuildWasmBootJsonStaticWebAsset" />
+    </DefineStaticWebAssets>
+
   </Target>
 
   <!-- Publish starts here -->
@@ -452,10 +442,23 @@ Copyright (c) .NET Foundation. All rights reserved.
     </PropertyGroup>
 
     <ConvertDllsToWebCil Candidates="@(_NewWasmPublishStaticWebAssets)" IntermediateOutputPath="$(_WasmPublishTmpWebCilPath)" OutputPath="$(_WasmPublishWebCilPath)" IsEnabled="$(_WasmEnableWebcil)">
-      <Output TaskParameter="WebCilCandidates" ItemName="_NewWebCilPublishStaticWebAssets" />
+      <Output TaskParameter="WebCilCandidates" ItemName="_NewWebCilPublishStaticWebAssetsCandidates" />
       <Output TaskParameter="FileWrites" ItemName="FileWrites" />
     </ConvertDllsToWebCil>
 
+    <!-- _NewWebCilPublishStaticWebAssetsCandidates contain the `Fingerprint` and the `Integrity` from the old assets.
+         Remove them and call DefineStaticWebAssets so that they can get re-computed appropriately.
+    -->
+    <ItemGroup>    
+      <_NewWebCilPublishStaticWebAssetsCandidatesNoMetadata 
+        Include="@(_NewWebCilPublishStaticWebAssetsCandidates)"
+        RemoveMetadata="Integrity;Fingerprint" />
+    </ItemGroup>
+
+    <DefineStaticWebAssets CandidateAssets="@(_NewWebCilPublishStaticWebAssetsCandidatesNoMetadata)">
+      <Output TaskParameter="Assets" ItemName="_NewWebCilPublishStaticWebAssets" />
+    </DefineStaticWebAssets>
+
     <ItemGroup>
       <ResolvedFileToPublish Remove="@(_PublishResolvedFilesToRemove)" />
       <StaticWebAsset Include="@(_NewWebCilPublishStaticWebAssets)" />
@@ -533,7 +536,6 @@ Copyright (c) .NET Foundation. All rights reserved.
   </Target>
 
   <Target Name="GeneratePublishWasmBootJson" DependsOnTargets="$(GeneratePublishWasmBootJsonDependsOn)">
-
     <ItemGroup>
       <_WasmPublishAsset
         Include="@(StaticWebAsset)"
@@ -549,7 +551,6 @@ Copyright (c) .NET Foundation. All rights reserved.
 
       <!-- We remove the extensions since they are added to the list of static web assets but we need to compute the target path for them -->
       <_WasmPublishAsset Remove="@(_BlazorExtensionsCandidatesForPublish)" />
-
     </ItemGroup>
 
     <ComputeStaticWebAssetsTargetPaths
@@ -568,7 +569,7 @@ Copyright (c) .NET Foundation. All rights reserved.
       AssemblyPath="@(IntermediateAssembly)"
       Resources="@(_WasmPublishBootResourceWithHash)"
       DebugBuild="false"
-      DebugLevel="$(_WasmDebugLevel)"
+      DebugLevel="$(WasmDebugLevel)"
       LinkerEnabled="$(PublishTrimmed)"
       CacheBootResources="$(BlazorCacheBootResources)"
       OutputPath="$(IntermediateOutputPath)blazor.publish.boot.json"
@@ -584,7 +585,8 @@ Copyright (c) .NET Foundation. All rights reserved.
       Extensions="@(WasmBootConfigExtension)"
       TargetFrameworkVersion="$(TargetFrameworkVersion)"
       ModuleAfterConfigLoaded="@(WasmModuleAfterConfigLoaded)"
-      ModuleAfterRuntimeReady="@(WasmModuleAfterRuntimeReady)" />
+      ModuleAfterRuntimeReady="@(WasmModuleAfterRuntimeReady)"
+      IsPublish="true" />
 
     <ItemGroup>
       <FileWrites Include="$(IntermediateOutputPath)blazor.publish.boot.json" />
diff --git a/src/mono/nuget/mono-packages.proj b/src/mono/nuget/mono-packages.proj
index 025c6666db71..10add92c17ef 100644
--- a/src/mono/nuget/mono-packages.proj
+++ b/src/mono/nuget/mono-packages.proj
@@ -30,7 +30,7 @@
     <ProjectReference Include="Microsoft.NET.Runtime.LibraryBuilder.Sdk\Microsoft.NET.Runtime.LibraryBuilder.Sdk.pkgproj" />
   </ItemGroup>
 
-  <ItemGroup Condition="'$(DotNetBuildFromSource)' != 'true'">
+  <ItemGroup Condition="'$(DotNetBuildSourceOnly)' != 'true'">
     <ProjectReference Include="Microsoft.NET.Runtime.MonoTargets.Sdk\Microsoft.NET.Runtime.MonoTargets.Sdk.pkgproj" />
   </ItemGroup>
 </Project>
diff --git a/src/mono/sample/wasi/native/Program.cs b/src/mono/sample/wasi/native/Program.cs
index cb2fd0f36caf..d8d480869a8d 100644
--- a/src/mono/sample/wasi/native/Program.cs
+++ b/src/mono/sample/wasi/native/Program.cs
@@ -20,11 +20,6 @@ public static int MyExport(int number)
     public unsafe static int Main(string[] args)
     {
         Console.WriteLine($"main: {args.Length}");
-        // workaround to force the interpreter to initialize wasm_native_to_interp_ftndesc for MyExport
-        if (args.Length > 10000) {
-            ((IntPtr)(delegate* unmanaged<int,int>)&MyExport).ToString();
-        }
-
         MyImport();
         return 0;
     }
diff --git a/src/mono/sample/wasm/Directory.Build.props b/src/mono/sample/wasm/Directory.Build.props
index 93717b5fbb4f..ef6268ac4245 100644
--- a/src/mono/sample/wasm/Directory.Build.props
+++ b/src/mono/sample/wasm/Directory.Build.props
@@ -17,6 +17,7 @@
     <WasmAppDir>$(MSBuildProjectDirectory)\bin\$(Configuration)\AppBundle\</WasmAppDir>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)' == 'Debug'">
+    <WasmNativeDebugSymbols>true</WasmNativeDebugSymbols>
     <WasmNativeStrip>false</WasmNativeStrip>
   </PropertyGroup>
 
diff --git a/src/mono/sample/wasm/blazor-frame/blazor-frame.diff b/src/mono/sample/wasm/blazor-frame/blazor-frame.diff
index f72b283d4f1d..c57ea7d76a04 100644
--- a/src/mono/sample/wasm/blazor-frame/blazor-frame.diff
+++ b/src/mono/sample/wasm/blazor-frame/blazor-frame.diff
@@ -1,3 +1,22 @@
+diff -ruw blazor/Pages/Home.razor blazor/Pages/Home.razor
+--- a/blazor/Pages/Home.razor	2024-01-23 14:30:05
++++ b/blazor/Pages/Home.razor	2024-02-12 17:59:24
+@@ -1,7 +1,15 @@
+ ﻿@page "/"
++@inject IJSRuntime JSRuntime
+ 
+ <PageTitle>Home</PageTitle>
+ 
+ <h1>Hello, world!</h1>
+ 
+ Welcome to your new app.
++
++@code {
++    protected override void OnAfterRender(bool firstRender)
++    {
++        BenchmarkEvent.Send(JSRuntime, "Rendered Index.razor");
++    }
++}
 diff -urw blazor/Program.cs blazor/Program.cs
 --- a/blazor/Program.cs	2024-01-22 16:01:30
 +++ b/blazor/Program.cs	2023-09-28 13:12:14
diff --git a/src/mono/sample/wasm/blazor-frame/blazor.csproj b/src/mono/sample/wasm/blazor-frame/blazor.csproj
index 5592718e0714..3061f77e83ac 100644
--- a/src/mono/sample/wasm/blazor-frame/blazor.csproj
+++ b/src/mono/sample/wasm/blazor-frame/blazor.csproj
@@ -11,8 +11,8 @@
 
   <ItemGroup>
     <!-- TODO un-pin this when it's possible -->
-    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly" Version="9.0.0-alpha.1.24061.8" />
-    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.DevServer" Version="9.0.0-alpha.1.24061.8" PrivateAssets="all" />
+    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly" Version="9.0.0-preview.1.24081.5" />
+    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.DevServer" Version="9.0.0-preview.1.24081.5" PrivateAssets="all" />
   </ItemGroup>
 
 </Project>
diff --git a/src/mono/sample/wasm/browser-bench/Console/Wasm.Console.Bench.Sample.csproj b/src/mono/sample/wasm/browser-bench/Console/Wasm.Console.Bench.Sample.csproj
index 6e36786bb6fb..924e253ddeaa 100644
--- a/src/mono/sample/wasm/browser-bench/Console/Wasm.Console.Bench.Sample.csproj
+++ b/src/mono/sample/wasm/browser-bench/Console/Wasm.Console.Bench.Sample.csproj
@@ -5,7 +5,6 @@
     <WasmMainJSPath>$(BrowserProjectRoot)\test-main.js</WasmMainJSPath>
     <WasmGenerateRunV8Script>true</WasmGenerateRunV8Script>
     <SuppressTrimAnalysisWarnings>true</SuppressTrimAnalysisWarnings>
-    <WasmNativeStrip>false</WasmNativeStrip>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/src/mono/sample/wasm/browser-bench/Wasm.Browser.Bench.Sample.csproj b/src/mono/sample/wasm/browser-bench/Wasm.Browser.Bench.Sample.csproj
index f8c7b1de29b4..b4d32416389a 100644
--- a/src/mono/sample/wasm/browser-bench/Wasm.Browser.Bench.Sample.csproj
+++ b/src/mono/sample/wasm/browser-bench/Wasm.Browser.Bench.Sample.csproj
@@ -40,6 +40,7 @@
   &lt;/disabledPackageSources&gt;
 &lt;/configuration&gt;
     </NugetConfigContent>
+    <NugetPackagesPath>$(MSBuildThisFileDirectory)nugetPackages</NugetPackagesPath>
   </PropertyGroup>
 
   <Target Name="BuildWBT">
@@ -50,6 +51,8 @@
     </PropertyGroup>
 
     <Exec IgnoreExitCode="true" Command="$(_Dotnet) build $(MonoProjectRoot)wasm/Wasm.Build.Tests/Wasm.Build.Tests.csproj -c $(Configuration) -t:InstallWorkloadUsingArtifacts -p:TargetOS=browser -p:TargetArchitecture=wasm $(BuildAdditionalArgs)" />
+    <RemoveDir Directories="$(NugetPackagesPath)" />
+    <MakeDir Directories="$(NugetPackagesPath)" />
   </Target>
 
   <Target Name="PrepareBlazorTemplate" Condition="!Exists('$(MonoProjectRoot)sample/wasm/blazor-frame/blazor')">
@@ -68,15 +71,15 @@
         Overwrite="true"
         Lines="$(NugetConfigContent)" />
 
-    <Exec EnvironmentVariables="MSBuildSDKsPath=;DOTNET_ROOT=$(ArtifactsDir)bin/dotnet-latest;PATH=$(ArtifactsDir)bin/dotnet-latest:$(PATH)" WorkingDirectory="$(MSBuildThisFileDirectory)../blazor-frame/blazor" Command="dotnet new blazorwasm" />
+    <Exec EnvironmentVariables="MSBuildSDKsPath=;NUGET_PACKAGES=$(NugetPackagesPath);DOTNET_ROOT=$(ArtifactsDir)bin/dotnet-latest;PATH=$(ArtifactsDir)bin/dotnet-latest:$(PATH)" WorkingDirectory="$(MSBuildThisFileDirectory)../blazor-frame/blazor" Command="dotnet new blazorwasm" />
     <Exec WorkingDirectory="$(MSBuildThisFileDirectory)../blazor-frame" Command="git apply blazor-frame.diff" />
   </Target>
 
   <Target Name="BuildBlazorFrame" AfterTargets="BuildSampleInTree" Condition="'$(BlazorStartup)' == 'true'" DependsOnTargets="BuildWBT;PrepareBlazorTemplate">
-    <Exec EnvironmentVariables="MSBuildSDKsPath=;DOTNET_ROOT=$(ArtifactsDir)bin/dotnet-latest;PATH=$(ArtifactsDir)bin/dotnet-latest:$(PATH)" WorkingDirectory="$(MSBuildThisFileDirectory)../blazor-frame/blazor" Command="dotnet publish blazor.csproj -c $(Configuration) -p:WBTOverrideRuntimePack=true -p:TargetOS=browser -p:TargetArchitecture=wasm $(BuildAdditionalArgs)" />
+    <Exec EnvironmentVariables="MSBuildSDKsPath=;NUGET_PACKAGES=$(NugetPackagesPath);DOTNET_ROOT=$(ArtifactsDir)bin/dotnet-latest;PATH=$(ArtifactsDir)bin/dotnet-latest:$(PATH)" WorkingDirectory="$(MSBuildThisFileDirectory)../blazor-frame/blazor" Command="dotnet publish blazor.csproj -c $(Configuration) -p:WBTOverrideRuntimePack=true -p:TargetOS=browser -p:TargetArchitecture=wasm $(BuildAdditionalArgs)" />
 
     <ItemGroup>
-        <BlazorSourceFiles Include="$(MSBuildThisFileDirectory)../blazor-frame/blazor/bin/$(Configuration)/net9.0/publish/wwwroot/blazor-template/**/*.*"/>
+        <BlazorSourceFiles Include="$(MSBuildThisFileDirectory)../blazor-frame/blazor/bin/$(Configuration)/$(NetCoreAppCurrent)/publish/wwwroot/blazor-template/**/*.*"/>
     </ItemGroup>
 
     <Copy
@@ -84,25 +87,36 @@
         DestinationFolder="$(MSBuildThisFileDirectory)/bin/$(Configuration)/AppBundle/blazor-template/%(RecursiveDir)" />
   </Target>
 
-  <Target Name="BuildBrowserFrame" AfterTargets="BuildSampleInTree" Condition="'$(BrowserStartup)' == 'true'" DependsOnTargets="BuildWBT">
+  <Target Name="PrepareBrowserTemplate" Condition="!Exists('$(MonoProjectRoot)sample/wasm/browser-frame/browser-frame')">
     <ItemGroup>
         <OverrideFiles Include="$(MonoProjectRoot)wasm/Wasm.Build.Tests/data/WasmOverridePacks.targets" />
         <OverrideFiles Include="$(MonoProjectRoot)wasm/Wasm.Build.Tests/data/Blazor.Directory.Build.targets" />
-        <OverrideDestFiles Include="$(MonoProjectRoot)sample/wasm/browser-frame/WasmOverridePacks.targets" />
-        <OverrideDestFiles Include="$(MonoProjectRoot)sample/wasm/browser-frame/Directory.Build.targets" />
+        <OverrideFiles Include="$(MonoProjectRoot)sample/wasm/browser-frame/AppStart.cs" />
+        <OverrideFiles Include="$(MonoProjectRoot)sample/wasm/browser-frame/frame.js" />
+        <OverrideDestFiles Include="$(MonoProjectRoot)sample/wasm/browser-frame/browser-frame/WasmOverridePacks.targets" />
+        <OverrideDestFiles Include="$(MonoProjectRoot)sample/wasm/browser-frame/browser-frame/Directory.Build.targets" />
+        <OverrideDestFiles Include="$(MonoProjectRoot)sample/wasm/browser-frame/browser-frame/AppStart.cs" />
+        <OverrideDestFiles Include="$(MonoProjectRoot)sample/wasm/browser-frame/browser-frame/wwwroot/frame.js" />
     </ItemGroup>
 
+    <MakeDir Directories="$(MonoProjectRoot)sample/wasm/blazor-frame/browser-frame"/>
     <Copy SourceFiles="@(OverrideFiles)" DestinationFiles="@(OverrideDestFiles)" UseSymbolicLinksIfPossible="true" OverwriteReadOnlyFiles="true" />
 
     <WriteLinesToFile
-        File="$(MSBuildThisFileDirectory)../browser-frame/nuget.config"
+        File="$(MSBuildThisFileDirectory)../browser-frame/browser-frame/nuget.config"
         Overwrite="true"
         Lines="$(NugetConfigContent)" />
 
-    <Exec EnvironmentVariables="MSBuildSDKsPath=;DOTNET_ROOT=$(ArtifactsDir)bin/dotnet-latest;PATH=$(ArtifactsDir)bin/dotnet-latest:$(PATH)" WorkingDirectory="$(MSBuildThisFileDirectory)../browser-frame" Command="dotnet publish browser-frame.csproj -c $(Configuration) -p:WBTOverrideRuntimePack=true -p:TargetOS=browser -p:TargetArchitecture=wasm $(BuildAdditionalArgs)" />
+    <Exec EnvironmentVariables="MSBuildSDKsPath=;NUGET_PACKAGES=$(NugetPackagesPath);DOTNET_ROOT=$(ArtifactsDir)bin/dotnet-latest;PATH=$(ArtifactsDir)bin/dotnet-latest:$(PATH)" WorkingDirectory="$(MSBuildThisFileDirectory)../browser-frame/browser-frame/" Command="dotnet new wasmbrowser" />
+    <Exec WorkingDirectory="$(MSBuildThisFileDirectory)../browser-frame" Command="git apply browser-frame.diff" />
+  </Target>
+
+  <Target Name="BuildBrowserFrame" AfterTargets="BuildSampleInTree" Condition="'$(BrowserStartup)' == 'true'" DependsOnTargets="BuildWBT;PrepareBrowserTemplate">
+
+    <Exec EnvironmentVariables="MSBuildSDKsPath=;NUGET_PACKAGES=$(NugetPackagesPath);DOTNET_ROOT=$(ArtifactsDir)bin/dotnet-latest;PATH=$(ArtifactsDir)bin/dotnet-latest:$(PATH)" WorkingDirectory="$(MSBuildThisFileDirectory)../browser-frame/browser-frame" Command="dotnet publish browser-frame.csproj -c $(Configuration) -p:WBTOverrideRuntimePack=true -p:TargetOS=browser -p:TargetArchitecture=wasm $(BuildAdditionalArgs)" />
 
     <ItemGroup>
-        <BrowserSourceFiles Include="$(MSBuildThisFileDirectory)../browser-frame/bin/$(Configuration)/net8.0/publish/wwwroot/**/*.*"/>
+        <BrowserSourceFiles Include="$(MSBuildThisFileDirectory)../browser-frame/browser-frame/bin/$(Configuration)/$(NetCoreAppCurrent)/publish/wwwroot/**/*.*"/>
     </ItemGroup>
 
     <Copy
diff --git a/src/mono/sample/wasm/browser-bench/main.js b/src/mono/sample/wasm/browser-bench/main.js
index da1d7255e739..0f1a65304912 100644
--- a/src/mono/sample/wasm/browser-bench/main.js
+++ b/src/mono/sample/wasm/browser-bench/main.js
@@ -205,7 +205,9 @@ class MainApp {
     }
 
     removeFrame() {
-        this._frame.contentWindow.muteErrors();
+        if (this._frame.contentWindow.muteErrors !== undefined)
+            this._frame.contentWindow.muteErrors();
+
         document.body.removeChild(this._frame);
     }
 }
diff --git a/src/mono/sample/wasm/browser-frame/Program.cs b/src/mono/sample/wasm/browser-frame/Program.cs
deleted file mode 100644
index a5999f79c548..000000000000
--- a/src/mono/sample/wasm/browser-frame/Program.cs
+++ /dev/null
@@ -1,20 +0,0 @@
-using System;
-using System.Runtime.InteropServices.JavaScript;
-
-BrowserBench.FrameApp.ReachedManaged();
-
-Console.WriteLine("Hello, Browser!");
-
-public partial class MyClass
-{
-    [JSExport]
-    internal static string Greeting()
-    {
-        var text = $"Hello, World! Greetings from {GetHRef()}";
-        Console.WriteLine(text);
-        return text;
-    }
-
-    [JSImport("window.location.href", "main.js")]
-    internal static partial string GetHRef();
-}
diff --git a/src/mono/sample/wasm/browser-frame/browser-frame.diff b/src/mono/sample/wasm/browser-frame/browser-frame.diff
new file mode 100644
index 000000000000..72795bed4394
--- /dev/null
+++ b/src/mono/sample/wasm/browser-frame/browser-frame.diff
@@ -0,0 +1,23 @@
+diff -ru browser-frame/Program.cs browser-frame/Program.cs
+--- a/browser-frame/Program.cs	2024-03-07 09:00:37
++++ b/browser-frame/Program.cs	2024-03-05 15:38:42
+@@ -1,6 +1,8 @@
+ using System;
+ using System.Runtime.InteropServices.JavaScript;
+ 
++BrowserBench.FrameApp.ReachedManaged();
++
+ Console.WriteLine("Hello, Browser!");
+ 
+ public partial class MyClass
+diff -ru browser-frame/wwwroot/index.html browser-frame/wwwroot/index.html
+--- a/browser-frame/wwwroot/index.html	2024-03-07 09:00:37
++++ b/browser-frame/wwwroot/index.html	2024-03-05 15:38:42
+@@ -8,6 +8,7 @@
+   <meta charset="UTF-8">
+   <meta name="viewport" content="width=device-width, initial-scale=1.0">
+   <script type='module' src="./main.js"></script>
++  <script type="module" src="./frame.js"></script>
+ </head>
+ 
+ <body>
diff --git a/src/mono/sample/wasm/browser-frame/wwwroot/frame.js b/src/mono/sample/wasm/browser-frame/frame.js
similarity index 100%
rename from src/mono/sample/wasm/browser-frame/wwwroot/frame.js
rename to src/mono/sample/wasm/browser-frame/frame.js
diff --git a/src/mono/sample/wasm/browser-frame/runtimeconfig.template.json b/src/mono/sample/wasm/browser-frame/runtimeconfig.template.json
deleted file mode 100644
index b96a94320ba5..000000000000
--- a/src/mono/sample/wasm/browser-frame/runtimeconfig.template.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-    "wasmHostProperties": {
-        "perHostConfig": [
-            {
-                "name": "browser",
-                "host": "browser"
-            }
-        ]
-    }
-}
\ No newline at end of file
diff --git a/src/mono/sample/wasm/browser-frame/wwwroot/index.html b/src/mono/sample/wasm/browser-frame/wwwroot/index.html
deleted file mode 100644
index 8b8df7572c2b..000000000000
--- a/src/mono/sample/wasm/browser-frame/wwwroot/index.html
+++ /dev/null
@@ -1,18 +0,0 @@
-<!DOCTYPE html>
-<!--  Licensed to the .NET Foundation under one or more agreements. -->
-<!-- The .NET Foundation licenses this file to you under the MIT license. -->
-<html>
-
-<head>
-  <title>browser-frame</title>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <script type='module' src="./main.js"></script>
-  <script type="module" src="./frame.js"></script>
-</head>
-
-<body>
-  <span id="out"></span>
-</body>
-
-</html>
diff --git a/src/mono/sample/wasm/browser-frame/wwwroot/main.js b/src/mono/sample/wasm/browser-frame/wwwroot/main.js
deleted file mode 100644
index a073fc9cf703..000000000000
--- a/src/mono/sample/wasm/browser-frame/wwwroot/main.js
+++ /dev/null
@@ -1,25 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-import { dotnet } from './_framework/dotnet.js'
-
-const { setModuleImports, getAssemblyExports, getConfig } = await dotnet
-    .withDiagnosticTracing(false)
-    .withApplicationArgumentsFromQuery()
-    .create();
-
-setModuleImports('main.js', {
-    window: {
-        location: {
-            href: () => globalThis.window.location.href
-        }
-    }
-});
-
-const config = getConfig();
-const exports = await getAssemblyExports(config.mainAssemblyName);
-const text = exports.MyClass.Greeting();
-console.log(text);
-
-document.getElementById('out').innerHTML = text;
-await dotnet.run();
\ No newline at end of file
diff --git a/src/mono/sample/wasm/browser-frame/wwwroot/start.html b/src/mono/sample/wasm/browser-frame/wwwroot/start.html
deleted file mode 100644
index 9dad19ec0a1c..000000000000
--- a/src/mono/sample/wasm/browser-frame/wwwroot/start.html
+++ /dev/null
@@ -1,18 +0,0 @@
-<!DOCTYPE html>
-<!--  Licensed to the .NET Foundation under one or more agreements. -->
-<!-- The .NET Foundation licenses this file to you under the MIT license. -->
-<html>
-
-<head>
-  <title>browser-frame</title>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <script type='module' src="./start.js"></script>
-  <script type="module" src="./frame.js"></script>
-</head>
-
-<body>
-  <span id="out"></span>
-</body>
-
-</html>
diff --git a/src/mono/sample/wasm/browser-frame/wwwroot/start.js b/src/mono/sample/wasm/browser-frame/wwwroot/start.js
deleted file mode 100644
index a073fc9cf703..000000000000
--- a/src/mono/sample/wasm/browser-frame/wwwroot/start.js
+++ /dev/null
@@ -1,25 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-import { dotnet } from './_framework/dotnet.js'
-
-const { setModuleImports, getAssemblyExports, getConfig } = await dotnet
-    .withDiagnosticTracing(false)
-    .withApplicationArgumentsFromQuery()
-    .create();
-
-setModuleImports('main.js', {
-    window: {
-        location: {
-            href: () => globalThis.window.location.href
-        }
-    }
-});
-
-const config = getConfig();
-const exports = await getAssemblyExports(config.mainAssemblyName);
-const text = exports.MyClass.Greeting();
-console.log(text);
-
-document.getElementById('out').innerHTML = text;
-await dotnet.run();
\ No newline at end of file
diff --git a/src/mono/sample/wasm/browser-profile/Wasm.BrowserProfile.Sample.csproj b/src/mono/sample/wasm/browser-profile/Wasm.BrowserProfile.Sample.csproj
index ddf26a063036..81f03cdc9878 100644
--- a/src/mono/sample/wasm/browser-profile/Wasm.BrowserProfile.Sample.csproj
+++ b/src/mono/sample/wasm/browser-profile/Wasm.BrowserProfile.Sample.csproj
@@ -2,8 +2,6 @@
   <PropertyGroup>
     <WasmCopyAppZipToHelixTestDir Condition="'$(ArchiveTests)' == 'true'">true</WasmCopyAppZipToHelixTestDir>
     <WasmProfilers>aot;</WasmProfilers>
-    <WasmBuildNative>true</WasmBuildNative>
-    <WasmNativeStrip>false</WasmNativeStrip>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/src/mono/sample/wasm/browser-shutdown/main.js b/src/mono/sample/wasm/browser-shutdown/main.js
index d727f7932a10..e3141c08976c 100644
--- a/src/mono/sample/wasm/browser-shutdown/main.js
+++ b/src/mono/sample/wasm/browser-shutdown/main.js
@@ -14,7 +14,6 @@ try {
         .withExitOnUnhandledError()
         .withExitCodeLogging()
         .withElementOnExit()
-        .withAssertAfterExit()
         .withOnConfigLoaded(() => {
             // you can test abort of the startup by opening http://localhost:8000/?throwError=true
             const params = new URLSearchParams(location.search);
diff --git a/src/mono/sample/wasm/browser-threads/Program.cs b/src/mono/sample/wasm/browser-threads/Program.cs
index 148a002dfe1e..8783ace18be7 100644
--- a/src/mono/sample/wasm/browser-threads/Program.cs
+++ b/src/mono/sample/wasm/browser-threads/Program.cs
@@ -2,159 +2,97 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.IO;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices.JavaScript;
 using System.Threading;
 using System.Threading.Tasks;
 using System.Collections.Generic;
 
-namespace Sample
-{
-    public partial class Test
-    {
-        public static int Main(string[] args)
-        {
-            Console.WriteLine("Hello, World!");
-            return 0;
-        }
+namespace Sample;
 
-        [JSImport("globalThis.console.log")]
-        public static partial void ConsoleLog(string status);
+public partial class Test
+{
+    private static int _animationCounter = 0;
+    private static int _callCounter = 0;
+    private static bool _isRunning = false;
+    private static readonly IReadOnlyList<string> _animations = new string[] { "\u2680", "\u2681", "\u2682", "\u2683", "\u2684", "\u2685" };
 
-        [JSImport("Sample.Test.updateProgress", "main.js")]
-        static partial void updateProgress(string status);
+    public static async Task<int> Main(string[] args)
+    {
+        Console.WriteLine("Hello, World!");
+        await updateProgress2();
+        return 0;
+    }
 
-        internal static void UpdateProgress(string status) => updateProgress(status);
+    [JSImport("globalThis.console.log")]
+    public static partial void ConsoleLog(string status);
 
-        static Demo _demo = null;
+    [JSImport("Sample.Test.updateProgress", "main.js")]
+    private static partial Task updateProgress(string status);
 
-        [JSExport]
-        public static void Start(int n)
-        {
-            var comp = new ExpensiveComputation(n);
-            comp.Start();
-            #pragma warning disable CS4014
-            WaitForCompletion(comp);
-            _demo = new Demo(UpdateProgress, comp);
-        }
+    [JSImport("Sample.Test.updateProgress2", "main.js")]
+    private static partial Task updateProgress2();
 
-        public static async Task WaitForCompletion (ExpensiveComputation comp) {
-            Console.WriteLine($"WaitForCompletion started on thread {Thread.CurrentThread.ManagedThreadId}");
-            await comp.Completion;
-            Console.WriteLine($"WaitForCompletion completed on thread {Thread.CurrentThread.ManagedThreadId}");
-            UpdateProgress("\u270C\uFE0E");
-        }
+    [JSExport]
+    public static void Progress2()
+    {
+        // both calls here are sync POSIX calls dispatched to UI thread, which is already blocked because this is synchronous method on deputy thread
+        // it should not deadlock anyway, see also invoke_later_when_on_ui_thread_sync and emscripten_yield
+        var cwd = Directory.GetCurrentDirectory();
+        Console.WriteLine("Progress! "+ cwd); 
+
+        // below is blocking call, which means that UI will spin-lock little longer
+        // it will warn about blocking wait because of jsThreadBlockingMode: "WarnWhenBlockingWait"
+        // but it will not deadlock because underlying task chain is not JS promise
+        Task.Delay(10).Wait();
+    }
 
-        [JSExport]
-        public static int Progress()
+    [JSExport]
+    public static bool Progress()
+    {
+        updateProgress(""+_animations[_animationCounter++]);
+        if (_animationCounter >= _animations.Count)
         {
-            if (_demo.Progress())
-                return 0; /* done */
-            else
-                return 1; /* continue */
+            _animationCounter = 0;
         }
-
-        [JSExport]
-        public static int GetAnswer() { return _demo.Result; }
+        return _isRunning;
     }
 
-}
-
-public class ExpensiveComputation
-{
-    private readonly TaskCompletionSource<int> _tcs = new();
-    private readonly int UpTo;
-    public ExpensiveComputation(int n) { UpTo = n; }
-    public long CallCounter { get; private set; }
-    public Task<int> Completion => _tcs.Task;
-
-    public void Start()
+    [JSExport]
+    [return: JSMarshalAs<JSType.Promise<JSType.Number>>]
+    public static Task<long> Fib(int n)
     {
-        new Thread((o) => ((ExpensiveComputation)o).Run()).Start(this);
+        return Task.Run(()=>{
+            _isRunning = true;
+            var res = FibImpl(n);
+            _isRunning = false;
+            return Task.FromResult(res);
+        });
     }
 
-    public void Run()
+    private static long FibImpl(int n)
     {
-        Console.WriteLine("Hello from ManagedThreadId " + Thread.CurrentThread.ManagedThreadId);
-        long result = Fib(UpTo);
-        if (result < (long)int.MaxValue)
-            _tcs.SetResult((int)result);
-        else
-            _tcs.SetException(new Exception("Fibonacci computation exceeded Int32.MaxValue"));
-    }
-    public long Fib(int n)
-    {
-        CallCounter++;
+        _callCounter++;
         // make some garbage every 1000 calls
-        if (CallCounter % 1000 == 0)
+        if (_callCounter % 1000 == 0)
         {
             AllocateGarbage();
         }
         // and collect it every once in a while
-        if (CallCounter % 500000 == 0)
+        if (_callCounter % 500000 == 0)
             GC.Collect();
 
         if (n < 2)
             return n;
-        return Fib(n - 1) + Fib(n - 2);
+        return FibImpl(n - 1) + FibImpl(n - 2);
     }
 
     [MethodImpl(MethodImplOptions.NoInlining)]
-    private void AllocateGarbage()
+    private static void AllocateGarbage()
     {
         object[] garbage = new object[200];
         garbage[12] = new object();
         garbage[197] = garbage;
     }
-
-}
-
-public class Demo
-{
-    public class Animation
-    {
-        private readonly Action<string> _updateProgress;
-        private int _counter = 0;
-
-        private readonly IReadOnlyList<string> _animations = new string[] { "\u2680", "\u2681", "\u2682", "\u2683", "\u2684", "\u2685" };
-
-        public void Step(string suffix = "")
-        {
-            _updateProgress(_animations[_counter++] + suffix);
-            if (_counter >= _animations.Count)
-            {
-                _counter = 0;
-            }
-        }
-
-        public Animation(Action<string> updateProgress)
-        {
-            _updateProgress = updateProgress;
-        }
-
-
-    }
-
-    private readonly Action<string> _updateProgress;
-    private readonly Animation _animation;
-    private readonly ExpensiveComputation _expensiveComputation;
-
-    public Demo(Action<string> updateProgress, ExpensiveComputation comp)
-    {
-        _updateProgress = updateProgress;
-        _animation = new Animation(updateProgress);
-        _expensiveComputation = comp;
-    }
-
-    public bool Progress()
-    {
-        if (!_expensiveComputation.Completion.IsCompleted)
-        {
-            _animation.Step($"{_expensiveComputation.CallCounter} calls");
-        }
-
-        return _expensiveComputation.Completion.IsCompleted;
-    }
-
-    public int Result => _expensiveComputation.Completion.Result;
 }
diff --git a/src/mono/sample/wasm/browser-threads/main.js b/src/mono/sample/wasm/browser-threads/main.js
index d53e59304e10..ea97a5ce200c 100644
--- a/src/mono/sample/wasm/browser-threads/main.js
+++ b/src/mono/sample/wasm/browser-threads/main.js
@@ -4,77 +4,83 @@
 import { dotnet, exit } from './_framework/dotnet.js'
 
 let progressElement = null;
-
-function updateProgress(status) {
-    if (progressElement) {
-        progressElement.innerText = status;
-    } else {
-        console.log("Progress: " + status);
-    }
-}
-
+let inputElement = null;
+let exports = null;
 const assemblyName = "Wasm.Browser.Threads.Sample.dll";
 
-function delay(ms) {
-    return new Promise(resolve => setTimeout(resolve, ms));
-}
-
-async function Run(exports, N) {
-    while (true) {
-        await delay(50);
-        const p = exports.Sample.Test.Progress();
-        if (p === 0)
-            break;
-    }
-    const answer = exports.Sample.Test.GetAnswer();
-    document.getElementById("out").innerText = `Fib(${N}) =  ${answer}`;
-}
-
-async function doMathSlowly(exports) {
+try {
     progressElement = document.getElementById("progressElement");
-    const N = parseInt(document.getElementById("inputN").value);
-    exports.Sample.Test.Start(N);
-    await Run(exports, N);
-}
+    inputElement = document.getElementById("inputN");
 
-function setEditable(inputElement, isEditable) {
-    inputElement.disabled = !isEditable;
-}
-
-function onInputValueChanged(exports, inputElement) {
-    async function handler() {
-        setEditable(inputElement, false);
-        await doMathSlowly(exports);
-        setEditable(inputElement, true);
-    }
-    return handler;
-}
-
-try {
-    const inputElement = document.getElementById("inputN");
     const { setModuleImports, getAssemblyExports, runMain } = await dotnet
         .withEnvironmentVariable("MONO_LOG_LEVEL", "debug")
         .withElementOnExit()
         .withExitCodeLogging()
         .withExitOnUnhandledError()
+        .withConfig({
+            jsThreadBlockingMode: "WarnWhenBlockingWait",
+        })
         .create();
 
     setModuleImports("main.js", {
         Sample: {
             Test: {
-                updateProgress
+                updateProgress,
+                updateProgress2
             }
         }
     });
 
-    const exports = await getAssemblyExports(assemblyName);
+    exports = await getAssemblyExports(assemblyName);
 
-    await doMathSlowly(exports);
-    setEditable(inputElement, true);
-    inputElement.addEventListener("change", onInputValueChanged(exports, inputElement));
+    await doSlowMath();
+    setEditable(true);
+    inputElement.addEventListener("change", onInputValueChanged);
 
     let exit_code = await runMain(assemblyName, []);
+    // comment out the following line for interactive testing, otherwise further call would be rejected by runtime
     exit(exit_code);
 } catch (err) {
     exit(2, err);
 }
+
+async function doSlowMath() {
+    const N = parseInt(document.getElementById("inputN").value);
+    const resultPromise = exports.Sample.Test.Fib(N);
+
+    while (true) {
+        await delay(50);
+        const isRunning = exports.Sample.Test.Progress();
+        if (!isRunning)
+            break;
+    }
+    const answer = await resultPromise;
+    document.getElementById("out").innerText = `Fib(${N}) =  ${answer}`;
+
+}
+
+export async function updateProgress(status) {
+    if (progressElement) {
+        progressElement.innerText = status;
+    } else {
+        console.log("Progress: " + status);
+    }
+}
+
+export async function updateProgress2() {
+    exports.Sample.Test.Progress2();
+}
+
+function delay(ms) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+}
+
+function setEditable(isEditable) {
+    inputElement.disabled = !isEditable;
+}
+
+async function onInputValueChanged() {
+    setEditable(false);
+    await doSlowMath(exports);
+    setEditable(true);
+}
diff --git a/src/mono/sample/wasm/simple-raytracer/Program.cs b/src/mono/sample/wasm/simple-raytracer/Program.cs
index f356394f0de1..5ee605e8ef68 100644
--- a/src/mono/sample/wasm/simple-raytracer/Program.cs
+++ b/src/mono/sample/wasm/simple-raytracer/Program.cs
@@ -195,7 +195,7 @@ private static void renderPixel (int i, int j, ref Vec3f light, Intersector inte
             if (didHitZ && (hitZ > sphere.Center.z))
                 continue;
 
-            if (intersector.Intersect(ref pos, ref dir, Unsafe.AsPointer(ref sphere), ref intersection_normal)) {
+            if (intersector.Intersect(ref pos, ref dir, &sphere, ref intersection_normal)) {
                 sampleEnv(ref intersection_normal, ref color);
 
                 const float ambientScale = 0.2f;
diff --git a/src/mono/wasi/README.md b/src/mono/wasi/README.md
index 106c65322dc3..9cf3f8757afa 100644
--- a/src/mono/wasi/README.md
+++ b/src/mono/wasi/README.md
@@ -2,6 +2,46 @@
 
 This directory contains a build configuration for WASI support, plus a basic sample. This is not intended for production use, nor is it currently supported. This is a step towards possible future support.
 
+## Try it out
+
+Here is a quick overview of how to consume published artifacts. Assuming .NET SDK is already installed, you should run:
+
+```
+dotnet workload install wasi-experimental
+```
+
+This will install workload for building .NET based WASI apps + basic template.
+Now you can create a new .NET application that targets WASI
+
+```
+dotnet new wasiconsole
+```
+
+And run it with
+
+```
+dotnet run
+```
+
+The `runtimeconfig.template.json` contains `perHostConfig` section where wasm hosts can be configured
+
+### Wasi SDK
+
+The workload for the time being doesn't include Wasi SDK, which is responsible for native compilation.
+If you don't need to modify runtime configuration, you can omit this step. In case you get:
+
+```
+error : Could not find wasi-sdk. Either set $(WASI_SDK_PATH), or use workloads to get the sdk. SDK is required for building native files.
+```
+
+you will need to separately download a WASI SDK from https://github.com/WebAssembly/wasi-sdk and point an environment variable `WASI_SDK_PATH` or MSBuild property `WasiSdkRoot` to a location where you extract it.
+
+### Optional build flags
+
+- `WasmSingleFileBundle` - bundle all assets into the `.wasm`. The output file name will match the project name.
+- `InvariantGlobalization` - remove globalization support, decrease the publish size.
+- More details can be found at https://github.com/dotnet/runtime/blob/main/src/mono/wasm/build/WasmApp.Common.targets and https://github.com/dotnet/runtime/blob/main/src/mono/wasi/build/WasiApp.targets
+
 ## How it works
 
 The mechanism for executing .NET code in a WASI runtime environment is equivalent to how `dotnet.wasm` executes .NET code in a browser environment. That is, it runs the Mono interpreter to execute .NET bytecode that has been built in the normal way. It should also work with AOT but this is not yet attempted.
@@ -65,4 +105,4 @@ Download the Mono Debug extension and configure a launch.json like this:
         }
     ]
 }
-```
\ No newline at end of file
+```
diff --git a/src/mono/wasi/Wasi.Build.Tests/PInvokeTableGeneratorTests.cs b/src/mono/wasi/Wasi.Build.Tests/PInvokeTableGeneratorTests.cs
new file mode 100644
index 000000000000..6a47c0364658
--- /dev/null
+++ b/src/mono/wasi/Wasi.Build.Tests/PInvokeTableGeneratorTests.cs
@@ -0,0 +1,87 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.IO;
+using Xunit;
+using Xunit.Abstractions;
+using Wasm.Build.Tests;
+
+#nullable enable
+
+namespace Wasi.Build.Tests;
+
+public class PInvokeTableGeneratorTests : BuildTestBase
+{
+    public PInvokeTableGeneratorTests(ITestOutputHelper output, SharedBuildPerTestClassFixture buildContext)
+        : base(output, buildContext)
+    {
+    }
+
+    [Fact]
+    public void InteropSupportForUnmanagedEntryPointWithoutDelegate()
+    {
+        string config = "Release";
+        string id = $"{config}_{GetRandomId()}";
+        string projectFile = CreateWasmTemplateProject(id, "wasiconsole");
+        string code =
+                """
+                using System;
+                using System.Runtime.InteropServices;
+                public unsafe class Test
+                {
+                    [UnmanagedCallersOnly(EntryPoint = "ManagedFunc")]
+                    public static int MyExport(int number)
+                    {
+                        // called from MyImport aka UnmanagedFunc
+                        Console.WriteLine($"MyExport({number}) -> 42");
+                        return 42;
+                    }
+
+                    [DllImport("*", EntryPoint = "UnmanagedFunc")]
+                    public static extern void MyImport(); // calls ManagedFunc aka MyExport
+
+                    public unsafe static int Main(string[] args)
+                    {
+                        Console.WriteLine($"main: {args.Length}");
+                        MyImport();
+                        return 0;
+                    }
+                }
+                """;
+        string cCode =
+                """
+                #include <stdio.h>
+
+                int ManagedFunc(int number);
+
+                void UnmanagedFunc()
+                {
+                    int ret = 0;
+                    printf("UnmanagedFunc calling ManagedFunc\n");
+                    ret = ManagedFunc(123);
+                    printf("ManagedFunc returned %d\n", ret);
+                }
+                """;
+        File.WriteAllText(Path.Combine(_projectDir!, "Program.cs"), code);
+        File.WriteAllText(Path.Combine(_projectDir!, "local.c"), cCode);
+        string extraProperties = @"<WasmNativeStrip>false</WasmNativeStrip>
+                                   <AllowUnsafeBlocks>true</AllowUnsafeBlocks>";
+        AddItemsPropertiesToProject(projectFile, extraProperties: extraProperties, extraItems: @"<NativeFileReference Include=""local.c"" />");
+        string projectName = Path.GetFileNameWithoutExtension(projectFile);
+        var buildArgs = new BuildArgs(projectName, config, AOT: true, ProjectFileContents: id, ExtraBuildArgs: null);
+        buildArgs = ExpandBuildArgs(buildArgs);
+        BuildProject(buildArgs,
+                    id: id,
+                    new BuildProjectOptions(
+                        DotnetWasmFromRuntimePack: false,
+                        CreateProject: false,
+                        Publish: true
+                        ));
+
+        CommandResult res = new RunCommand(s_buildEnv, _testOutput)
+                                    .WithWorkingDirectory(_projectDir!)
+                                    .ExecuteWithCapturedOutput($"run --no-silent --no-build -c {config}")
+                                    .EnsureSuccessful();
+        Assert.Contains("MyExport(123) -> 42", res.Output);
+    }
+}
diff --git a/src/mono/wasi/build/WasiApp.targets b/src/mono/wasi/build/WasiApp.targets
index e54fbe4f89ac..b229d76dd71f 100644
--- a/src/mono/wasi/build/WasiApp.targets
+++ b/src/mono/wasi/build/WasiApp.targets
@@ -39,7 +39,6 @@
   <ItemGroup>
     <_BoolPropertiesThatTriggerRelinking Include="WasmEnableSIMD" DefaultValueInRuntimePack="false" />
     <!--<_BoolPropertiesThatTriggerRelinking Include="WasmEnableExceptionHandling" DefaultValueInRuntimePack="true" />-->
-    <_BoolPropertiesThatTriggerRelinking Include="WasmNativeStrip" DefaultValueInRuntimePack="true" />
   </ItemGroup>
 
   <Target Name="_GetWasiGenerateAppBundleDependencies">
@@ -66,6 +65,11 @@
           Outputs="$(WasmAppDir)\.stamp"
           Condition="'$(WasmGenerateAppBundle)' == 'true'">
 
+    <PropertyGroup>
+      <_WasmOutputSymbolsToAppBundle Condition="'$(CopyOutputSymbolsToPublishDirectory)' == 'true' and '$(_IsPublishing)' == 'true'">true</_WasmOutputSymbolsToAppBundle>
+      <_WasmOutputSymbolsToAppBundle Condition="'$(_WasmOutputSymbolsToAppBundle)' == ''">false</_WasmOutputSymbolsToAppBundle>
+    </PropertyGroup>
+
     <WasiAppBuilder
       AppDir="$(WasmAppDir)"
       Assemblies="@(_WasmAssembliesInternal)"
@@ -80,7 +84,7 @@
       FilesToIncludeInFileSystem="@(WasmFilesToIncludeInFileSystem)"
       ExtraFilesToDeploy="@(WasmExtraFilesToDeploy)"
       NativeAssets="@(WasmNativeAsset)"
-      DebugLevel="$(WasmDebugLevel)"
+      OutputSymbolsToAppBundle="$(_WasmOutputSymbolsToAppBundle)"
       RuntimeConfigJsonPath="$(_WasmRuntimeConfigFilePath)"
       />
   </Target>
diff --git a/src/mono/wasi/mono-include/driver.h b/src/mono/wasi/mono-include/driver.h
index 47c72f146f7c..8931972755a6 100644
--- a/src/mono/wasi/mono-include/driver.h
+++ b/src/mono/wasi/mono-include/driver.h
@@ -11,7 +11,6 @@ MonoAssembly* mono_wasm_assembly_load(const char *name);
 MonoMethod* mono_wasi_assembly_get_entry_point (MonoAssembly *assembly);
 MonoClass* mono_wasm_assembly_find_class (MonoAssembly *assembly, const char *namespace, const char *name);
 MonoMethod* mono_wasm_assembly_find_method (MonoClass *klass, const char *name, int arguments);
-MonoObject* mono_wasm_invoke_method (MonoMethod *method, MonoObject *this_arg, void *params[], MonoObject **out_exc);
 int mono_unbox_int (MonoObject *obj);
 void add_assembly(const char* base_dir, const char *name);
 
diff --git a/src/mono/wasi/wasi-sdk-version.txt b/src/mono/wasi/wasi-sdk-version.txt
index 209e3ef4b624..aabe6ec3909c 100644
--- a/src/mono/wasi/wasi-sdk-version.txt
+++ b/src/mono/wasi/wasi-sdk-version.txt
@@ -1 +1 @@
-20
+21
diff --git a/src/mono/wasm/Wasm.Build.Tests/Blazor/AppsettingsTests.cs b/src/mono/wasm/Wasm.Build.Tests/Blazor/AppsettingsTests.cs
index 8069ef424f18..db0607d226a8 100644
--- a/src/mono/wasm/Wasm.Build.Tests/Blazor/AppsettingsTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/Blazor/AppsettingsTests.cs
@@ -46,7 +46,7 @@ var builder
         await BlazorRunForBuildWithDotnetRun(new BlazorRunOptions()
         {
             Config = "debug",
-            OnConsoleMessage = msg =>
+            OnConsoleMessage = (_, msg) =>
             {
                 if (msg.Text.Contains("appSettings Exists 'True'"))
                     existsChecked = true;
diff --git a/src/mono/wasm/Wasm.Build.Tests/Blazor/BlazorRunOptions.cs b/src/mono/wasm/Wasm.Build.Tests/Blazor/BlazorRunOptions.cs
index 683524eac322..c0e2a2e60cce 100644
--- a/src/mono/wasm/Wasm.Build.Tests/Blazor/BlazorRunOptions.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/Blazor/BlazorRunOptions.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Collections.Generic;
 using System.Threading.Tasks;
 using Microsoft.Playwright;
 
@@ -13,11 +14,14 @@ public record BlazorRunOptions
     BlazorRunHost Host = BlazorRunHost.DotnetRun,
     bool DetectRuntimeFailures = true,
     bool CheckCounter = true,
+    Dictionary<string, string>? ServerEnvironment = null,
     Func<IPage, Task>? Test = null,
-    Action<IConsoleMessage>? OnConsoleMessage = null,
+    Action<IPage, IConsoleMessage>? OnConsoleMessage = null,
+    Action<string>? OnServerMessage = null,
     Action<string>? OnErrorMessage = null,
     string Config = "Debug",
     string? ExtraArgs = null,
+    string BrowserPath = "",
     string QueryString = ""
 );
 
diff --git a/src/mono/wasm/Wasm.Build.Tests/Blazor/BlazorWasmTestBase.cs b/src/mono/wasm/Wasm.Build.Tests/Blazor/BlazorWasmTestBase.cs
index a5d01c2d8838..da9c7764f2d1 100644
--- a/src/mono/wasm/Wasm.Build.Tests/Blazor/BlazorWasmTestBase.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/Blazor/BlazorWasmTestBase.cs
@@ -63,7 +63,9 @@ public string CreateBlazorWasmTemplateProject(string id)
         (CommandResult res, string logPath) = BlazorBuildInternal(options.Id, options.Config, publish: false, setWasmDevel: false, expectSuccess: options.ExpectSuccess, extraArgs);
 
         if (options.ExpectSuccess && options.AssertAppBundle)
+        {
             AssertBundle(res.Output, options with { IsPublish = false });
+        }
 
         return (res, logPath);
     }
@@ -77,6 +79,10 @@ public string CreateBlazorWasmTemplateProject(string id)
 
         if (options.ExpectSuccess && options.AssertAppBundle)
         {
+            // Because we do relink in Release publish by default
+            if (options.Config == "Release")
+                options = options with { ExpectedFileType = NativeFilesType.Relinked };
+
             AssertBundle(res.Output, options with { IsPublish = true });
         }
 
@@ -185,14 +191,24 @@ public async Task BlazorRunTest(string runArgs,
     {
         if (!string.IsNullOrEmpty(runOptions.ExtraArgs))
             runArgs += $" {runOptions.ExtraArgs}";
+
+        runOptions.ServerEnvironment?.ToList().ForEach(
+            kv => s_buildEnv.EnvVars[kv.Key] = kv.Value);
+
         using var runCommand = new RunCommand(s_buildEnv, _testOutput)
                                     .WithWorkingDirectory(workingDirectory);
 
         await using var runner = new BrowserRunner(_testOutput);
-        var page = await runner.RunAsync(runCommand, runArgs, onConsoleMessage: OnConsoleMessage, onError: OnErrorMessage, modifyBrowserUrl: browserUrl => browserUrl + runOptions.QueryString);
+        var page = await runner.RunAsync(
+            runCommand,
+            runArgs,
+            onConsoleMessage: OnConsoleMessage,
+            onServerMessage: runOptions.OnServerMessage,
+            onError: OnErrorMessage,
+            modifyBrowserUrl: browserUrl => browserUrl + runOptions.BrowserPath + runOptions.QueryString);
 
         _testOutput.WriteLine("Waiting for page to load");
-        await page.WaitForLoadStateAsync(LoadState.DOMContentLoaded);
+        await page.WaitForLoadStateAsync(LoadState.DOMContentLoaded, new () { Timeout = 1 * 60 * 1000 });
 
         if (runOptions.CheckCounter)
         {
@@ -201,6 +217,7 @@ public async Task BlazorRunTest(string runArgs,
             Assert.Equal("Current count: 0", txt);
 
             await page.Locator("text=\"Click me\"").ClickAsync();
+            await Task.Delay(300);
             txt = await page.Locator("p[role='status']").InnerHTMLAsync();
             Assert.Equal("Current count: 1", txt);
         }
@@ -211,11 +228,11 @@ public async Task BlazorRunTest(string runArgs,
         _testOutput.WriteLine($"Waiting for additional 10secs to see if any errors are reported");
         await Task.Delay(10_000);
 
-        void OnConsoleMessage(IConsoleMessage msg)
+        void OnConsoleMessage(IPage page, IConsoleMessage msg)
         {
             _testOutput.WriteLine($"[{msg.Type}] {msg.Text}");
 
-            runOptions.OnConsoleMessage?.Invoke(msg);
+            runOptions.OnConsoleMessage?.Invoke(page, msg);
 
             if (runOptions.DetectRuntimeFailures)
             {
@@ -231,6 +248,12 @@ void OnErrorMessage(string msg)
         }
     }
 
-    public string FindBlazorBinFrameworkDir(string config, bool forPublish, string framework = DefaultTargetFrameworkForBlazor)
-        => _provider.FindBinFrameworkDir(config: config, forPublish: forPublish, framework: framework);
+    public string FindBlazorBinFrameworkDir(string config, bool forPublish, string framework = DefaultTargetFrameworkForBlazor, string? projectDir = null)
+        => _provider.FindBinFrameworkDir(config: config, forPublish: forPublish, framework: framework, projectDir: projectDir);
+
+    public string FindBlazorHostedBinFrameworkDir(string config, bool forPublish, string clientDirRelativeToProjectDir, string framework = DefaultTargetFrameworkForBlazor)
+    {
+        string? clientProjectDir = _projectDir == null ? null : Path.Combine(_projectDir, clientDirRelativeToProjectDir);
+        return _provider.FindBinFrameworkDir(config: config, forPublish: forPublish, framework: framework, projectDir: clientProjectDir);
+    }
 }
diff --git a/src/mono/wasm/Wasm.Build.Tests/Blazor/SimpleMultiThreadedTests.cs b/src/mono/wasm/Wasm.Build.Tests/Blazor/SimpleMultiThreadedTests.cs
index c28f18a3c574..556d40d42a40 100644
--- a/src/mono/wasm/Wasm.Build.Tests/Blazor/SimpleMultiThreadedTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/Blazor/SimpleMultiThreadedTests.cs
@@ -38,7 +38,8 @@ public SimpleMultiThreadedTests(ITestOutputHelper output, SharedBuildPerTestClas
     // }
 
     [ConditionalTheory(typeof(BuildTestBase), nameof(IsWorkloadWithMultiThreadingForDefaultFramework))]
-    [InlineData("Debug", false)]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/100373")] // to be fixed by: "https://github.com/dotnet/aspnetcore/issues/54365"
+    // [InlineData("Debug", false)] // ActiveIssue https://github.com/dotnet/runtime/issues/98758
     // [InlineData("Debug", true)]
     [InlineData("Release", false)]
     // [InlineData("Release", true)]
@@ -48,7 +49,17 @@ public async Task BlazorPublishRunTest(string config, bool aot)
         string projectFile = CreateWasmTemplateProject(id, "blazorwasm");
         AddItemsPropertiesToProject(projectFile, "<WasmEnableThreads>true</WasmEnableThreads>");
         // if (aot)
-            // AddItemsPropertiesToProject(projectFile, "<RunAOTCompilation>true</RunAOTCompilation>");
+        // AddItemsPropertiesToProject(projectFile, "<RunAOTCompilation>true</RunAOTCompilation>");
+
+        File.WriteAllText(
+            Path.Combine(Path.GetDirectoryName(projectFile)!, "wwwroot", id + ".lib.module.js"),
+            """
+            export function onRuntimeReady({ runtimeBuildInfo }) {
+                console.log('Runtime is ready: ' + JSON.stringify(runtimeBuildInfo));
+                console.log(`WasmEnableThreads=${runtimeBuildInfo.wasmEnableThreads}`);
+            }
+            """
+        );
 
         BlazorPublish(new BlazorBuildOptions(
             id,
@@ -57,13 +68,17 @@ public async Task BlazorPublishRunTest(string config, bool aot)
                 : (config == "Release" ? NativeFilesType.Relinked : NativeFilesType.FromRuntimePack),
             RuntimeType: RuntimeVariant.MultiThreaded));
 
+        bool hasEmittedWasmEnableThreads = false;
         StringBuilder errorOutput = new();
         await BlazorRunForPublishWithWebServer(
                 runOptions: new BlazorRunOptions(
                     Config: config,
                     ExtraArgs: "--web-server-use-cors --web-server-use-cop",
-                    OnConsoleMessage: (message) =>
+                    OnConsoleMessage: (_, message) =>
                     {
+                        if (message.Text.Contains("WasmEnableThreads=true"))
+                            hasEmittedWasmEnableThreads = true;
+
                         if (message.Type == "error")
                             errorOutput.AppendLine(message.Text);
                     },
@@ -74,5 +89,8 @@ await BlazorRunForPublishWithWebServer(
 
         if (errorOutput.Length > 0)
             throw new XunitException($"Errors found in browser console output:\n{errorOutput}");
+
+        if (!hasEmittedWasmEnableThreads)
+            throw new XunitException($"The test didn't emit expected message 'WasmEnableThreads=true'");
     }
 }
diff --git a/src/mono/wasm/Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs b/src/mono/wasm/Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs
index deb8ad5def8e..a735e2af15e4 100644
--- a/src/mono/wasm/Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs
@@ -120,7 +120,7 @@ await BlazorRunTest(new BlazorRunOptions()
         {
             Config = config,
             Host = publish ? BlazorRunHost.WebServer : BlazorRunHost.DotnetRun,
-            OnConsoleMessage = msg =>
+            OnConsoleMessage = (_, msg) =>
             {
                 sbOutput.AppendLine(msg.Text);
             }
@@ -144,7 +144,8 @@ await BlazorRunTest(new BlazorRunOptions()
             Assert.DoesNotContain("Could not create es-ES culture", output);
             Assert.DoesNotContain("invalid culture", output);
             Assert.DoesNotContain("CurrentCulture.NativeName: Invariant Language (Invariant Country)", output);
-            Assert.Contains("es-ES: Is-LCID-InvariantCulture: False, NativeName: es (ES)", output);
+            Assert.Contains("es-ES: Is-LCID-InvariantCulture: False", output);
+            Assert.Contains("NativeName: espa\u00F1ol (Espa\u00F1a)", output);
 
             // ignoring the last line of the output which prints the current culture
         }
diff --git a/src/mono/wasm/Wasm.Build.Tests/BrowserRunner.cs b/src/mono/wasm/Wasm.Build.Tests/BrowserRunner.cs
index 4aad869d9c17..c29233c69844 100644
--- a/src/mono/wasm/Wasm.Build.Tests/BrowserRunner.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/BrowserRunner.cs
@@ -36,7 +36,8 @@ internal class BrowserRunner : IAsyncDisposable
 
     public async Task<string> StartServerAndGetUrlAsync(
         ToolCommand cmd,
-        string args
+        string args,
+        Action<string>? onServerMessage = null
     ) {
         TaskCompletionSource<string> urlAvailable = new();
         Action<string?> outputHandler = msg =>
@@ -44,8 +45,12 @@ string args
             if (string.IsNullOrEmpty(msg))
                 return;
 
+            onServerMessage?.Invoke(msg);
+
             lock (OutputLines)
+            {
                 OutputLines.Add(msg);
+            }
 
             Match m = s_appHostUrlRegex.Match(msg);
             if (!m.Success)
@@ -70,7 +75,17 @@ string args
         cmd.WithErrorDataReceived(outputHandler).WithOutputDataReceived(outputHandler);
         var runTask = cmd.ExecuteAsync(args);
 
-        await Task.WhenAny(runTask, urlAvailable.Task, Task.Delay(TimeSpan.FromSeconds(30)));
+        var delayTask = Task.Delay(TimeSpan.FromSeconds(30));
+
+        await Task.WhenAny(runTask, urlAvailable.Task, delayTask);
+        if (delayTask.IsCompleted)
+        {
+            _testOutput.WriteLine("First 30s delay reached, scheduling next one");
+
+            delayTask = Task.Delay(TimeSpan.FromSeconds(30));
+            await Task.WhenAny(runTask, urlAvailable.Task, delayTask);
+        }
+
         if (runTask.IsCompleted)
         {
             var res = await runTask;
@@ -91,7 +106,8 @@ public async Task<IBrowser> SpawnBrowserAsync(
     ) {
         var url = new Uri(browserUrl);
         Playwright = await Microsoft.Playwright.Playwright.CreateAsync();
-        string[] chromeArgs = new[] { $"--explicitly-allowed-ports={url.Port}" };
+        // codespaces: ignore certificate error -> Microsoft.Playwright.PlaywrightException : net::ERR_CERT_AUTHORITY_INVALID
+        string[] chromeArgs = new[] { $"--explicitly-allowed-ports={url.Port}", "--ignore-certificate-errors" };
         _testOutput.WriteLine($"Launching chrome ('{s_chromePath.Value}') via playwright with args = {string.Join(',', chromeArgs)}");
         return Browser = await Playwright.Chromium.LaunchAsync(new BrowserTypeLaunchOptions{
             ExecutablePath = s_chromePath.Value,
@@ -105,11 +121,12 @@ public async Task<IPage> RunAsync(
         ToolCommand cmd,
         string args,
         bool headless = true,
-        Action<IConsoleMessage>? onConsoleMessage = null,
+        Action<IPage, IConsoleMessage>? onConsoleMessage = null,
+        Action<string>? onServerMessage = null,
         Action<string>? onError = null,
         Func<string, string>? modifyBrowserUrl = null)
     {
-        var urlString = await StartServerAndGetUrlAsync(cmd, args);
+        var urlString = await StartServerAndGetUrlAsync(cmd, args, onServerMessage);
         var browser = await SpawnBrowserAsync(urlString, headless);
         var context = await browser.NewContextAsync();
         return await RunAsync(context, urlString, headless, onConsoleMessage, onError, modifyBrowserUrl);
@@ -119,7 +136,7 @@ public async Task<IPage> RunAsync(
         IBrowserContext context,
         string browserUrl,
         bool headless = true,
-        Action<IConsoleMessage>? onConsoleMessage = null,
+        Action<IPage, IConsoleMessage>? onConsoleMessage = null,
         Action<string>? onError = null,
         Func<string, string>? modifyBrowserUrl = null,
         bool resetExitedState = false
@@ -131,8 +148,9 @@ public async Task<IPage> RunAsync(
             browserUrl = modifyBrowserUrl(browserUrl);
 
         IPage page = await context.NewPageAsync();
+
         if (onConsoleMessage is not null)
-            page.Console += (_, msg) => onConsoleMessage(msg);
+            page.Console += (_, msg) => onConsoleMessage(page, msg);
 
         onError ??= _testOutput.WriteLine;
         if (onError is not null)
diff --git a/src/mono/wasm/Wasm.Build.Tests/Common/EnvironmentVariables.cs b/src/mono/wasm/Wasm.Build.Tests/Common/EnvironmentVariables.cs
index 841aa6a9b345..50bf882d072d 100644
--- a/src/mono/wasm/Wasm.Build.Tests/Common/EnvironmentVariables.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/Common/EnvironmentVariables.cs
@@ -18,10 +18,10 @@ internal static class EnvironmentVariables
         internal static readonly string? BuiltNuGetsPath           = Environment.GetEnvironmentVariable("BUILT_NUGETS_PATH");
         internal static readonly string? BrowserPathForTests       = Environment.GetEnvironmentVariable("BROWSER_PATH_FOR_TESTS");
         internal static readonly string? V8PathForTests            = Environment.GetEnvironmentVariable("V8_PATH_FOR_TESTS");
-        internal static readonly bool    ShowBuildOutput           = Environment.GetEnvironmentVariable("SHOW_BUILD_OUTPUT") is not null;
+        internal static readonly bool    IsRunningOnCI             = Environment.GetEnvironmentVariable("IS_RUNNING_ON_CI") is "true";
+        internal static readonly bool    ShowBuildOutput           = IsRunningOnCI || Environment.GetEnvironmentVariable("SHOW_BUILD_OUTPUT") is not null;
         internal static readonly bool UseWebcil                    = Environment.GetEnvironmentVariable("USE_WEBCIL_FOR_TESTS") is "true";
         internal static readonly string? SdkDirName                = Environment.GetEnvironmentVariable("SDK_DIR_NAME");
         internal static readonly string? WasiSdkPath               = Environment.GetEnvironmentVariable("WASI_SDK_PATH");
-        internal static readonly bool    IsRunningOnCI             = Environment.GetEnvironmentVariable("IS_RUNNING_ON_CI") is "true";
     }
 }
diff --git a/src/mono/wasm/Wasm.Build.Tests/Common/TestOutputWrapper.cs b/src/mono/wasm/Wasm.Build.Tests/Common/TestOutputWrapper.cs
index a28657fa7bf0..03bef6c6ccb3 100644
--- a/src/mono/wasm/Wasm.Build.Tests/Common/TestOutputWrapper.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/Common/TestOutputWrapper.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Text;
 using Xunit.Abstractions;
 
 #nullable enable
@@ -10,9 +11,12 @@ namespace Wasm.Build.Tests;
 
 public class TestOutputWrapper(ITestOutputHelper baseOutput) : ITestOutputHelper
 {
+    private readonly StringBuilder _outputBuffer = new StringBuilder();
+
     public void WriteLine(string message)
     {
         baseOutput.WriteLine(message);
+        _outputBuffer.AppendLine(message);
         if (EnvironmentVariables.ShowBuildOutput)
             Console.WriteLine(message);
     }
@@ -20,7 +24,10 @@ public void WriteLine(string message)
     public void WriteLine(string format, params object[] args)
     {
         baseOutput.WriteLine(format, args);
+        _outputBuffer.AppendFormat(format, args).AppendLine();
         if (EnvironmentVariables.ShowBuildOutput)
             Console.WriteLine(format, args);
     }
+
+    public override string ToString() => _outputBuffer.ToString();
 }
diff --git a/src/mono/wasm/Wasm.Build.Tests/Common/TestUtils.cs b/src/mono/wasm/Wasm.Build.Tests/Common/TestUtils.cs
index d9acfdaa3765..97aa019d4543 100644
--- a/src/mono/wasm/Wasm.Build.Tests/Common/TestUtils.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/Common/TestUtils.cs
@@ -52,7 +52,6 @@ public static void AssertFile(string file0, string file1, string? label = null,
         if (!same && finfo0.Length == finfo1.Length)
             throw new XunitException($"{label}:{Environment.NewLine}  File sizes should not match for {file0} ({finfo0.Length}), and {file1} ({finfo1.Length})");
     }
-
     public static string FindSubDirIgnoringCase(string parentDir, string dirName)
     {
         IEnumerable<string> matchingDirs = Directory.EnumerateDirectories(parentDir,
diff --git a/src/mono/wasm/Wasm.Build.Tests/InvariantGlobalizationTests.cs b/src/mono/wasm/Wasm.Build.Tests/InvariantGlobalizationTests.cs
index 25f16fca67af..816f08866673 100644
--- a/src/mono/wasm/Wasm.Build.Tests/InvariantGlobalizationTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/InvariantGlobalizationTests.cs
@@ -69,8 +69,8 @@ private void TestInvariantGlobalization(BuildArgs buildArgs, bool? invariantGlob
             }
             else
             {
-                string output = RunAndTestWasmApp(buildArgs, expectedExitCode: 42, host: host, id: id);
-                Assert.Contains("es-ES: Is Invariant LCID: False, NativeName: es (ES)", output);
+                string output = RunAndTestWasmApp(buildArgs, expectedExitCode: 42, host: host, id: id, args: "nativename=\"espa\u00F1ol (Espa\u00F1a)\"");
+                Assert.Contains("es-ES: Is Invariant LCID: False", output);
 
                 // ignoring the last line of the output which prints the current culture
             }
diff --git a/src/mono/wasm/Wasm.Build.Tests/MainWithArgsTests.cs b/src/mono/wasm/Wasm.Build.Tests/MainWithArgsTests.cs
index eb9829438f75..fc6099fb7279 100644
--- a/src/mono/wasm/Wasm.Build.Tests/MainWithArgsTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/MainWithArgsTests.cs
@@ -39,8 +39,8 @@ public static async System.Threading.Tasks.Task<int> Main(string[] args)
                 buildArgs, args, host, id);
 
         [Theory]
-        [MemberData(nameof(MainWithArgsTestData), parameters: new object[] { /*aot*/ false, RunHost.All })]
-        [MemberData(nameof(MainWithArgsTestData), parameters: new object[] { /*aot*/ true, RunHost.All })]
+        [MemberData(nameof(MainWithArgsTestData), parameters: new object[] { /*aot*/ false, RunHost.NodeJS })]
+        //[MemberData(nameof(MainWithArgsTestData), parameters: new object[] { /*aot*/ true, RunHost.All })]
         public void TopLevelWithArgs(BuildArgs buildArgs, string[] args, RunHost host, string id)
             => TestMainWithArgs("top_level_args",
                                 @"##CODE## return await System.Threading.Tasks.Task.FromResult(42 + count);",
@@ -89,12 +89,24 @@ void TestMainWithArgs(string projectNamePrefix,
                                 InitProject: () => File.WriteAllText(Path.Combine(_projectDir!, "Program.cs"), programText),
                                 DotnetWasmFromRuntimePack: dotnetWasmFromRuntimePack));
 
-            RunAndTestWasmApp(buildArgs, buildDir: _projectDir, expectedExitCode: 42 + args.Length, args: string.Join(' ', args),
+            // Because we get extra "-verbosity", "Debug" from XHarness
+            int argsCount = args.Length;
+            bool isBrowser = host == RunHost.Chrome || host == RunHost.Firefox || host == RunHost.Safari;
+            if (isBrowser)
+                argsCount += 2;
+
+            RunAndTestWasmApp(buildArgs, buildDir: _projectDir, expectedExitCode: 42 + argsCount, args: string.Join(' ', args),
                 test: output =>
                 {
-                    Assert.Contains($"args#: {args.Length}", output);
+                    Assert.Contains($"args#: {argsCount}", output);
                     foreach (var arg in args)
                         Assert.Contains($"arg: {arg}", output);
+
+                    if (isBrowser)
+                    {
+                        Assert.Contains($"arg: -verbosity", output);
+                        Assert.Contains($"arg: Debug", output);
+                    }
                 }, host: host, id: id);
         }
     }
diff --git a/src/mono/wasm/Wasm.Build.Tests/NativeBuildTests.cs b/src/mono/wasm/Wasm.Build.Tests/NativeBuildTests.cs
index ac8b567761f3..f7c7483a949d 100644
--- a/src/mono/wasm/Wasm.Build.Tests/NativeBuildTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/NativeBuildTests.cs
@@ -93,5 +93,24 @@ public void IntermediateBitcodeToObjectFilesAreNotLLVMIR(BuildArgs buildArgs, st
                                             + " It might fail if it was incorrectly compiled to a bitcode file, instead of wasm.");
         }
 
+        [Theory]
+        [BuildAndRun(host: RunHost.None, aot: true)]
+        public void NativeBuildIsRequired(BuildArgs buildArgs, string id)
+        {
+            string projectName = $"native_build_{buildArgs.Config}_{buildArgs.AOT}";
+
+            buildArgs = buildArgs with { ProjectName = projectName, ExtraBuildArgs = "-p:WasmBuildNative=false -p:WasmSingleFileBundle=true" };
+            buildArgs = ExpandBuildArgs(buildArgs);
+
+            (_, string output) = BuildProject(
+                                    buildArgs,
+                                    id: id,
+                                    new BuildProjectOptions(
+                                        InitProject: () => File.WriteAllText(Path.Combine(_projectDir!, "Program.cs"), s_mainReturns42),
+                                        DotnetWasmFromRuntimePack: false,
+                                        ExpectSuccess: false));
+
+            Assert.Contains("WasmBuildNative is required", output);
+        }
     }
 }
diff --git a/src/mono/wasm/Wasm.Build.Tests/PInvokeTableGeneratorTests.cs b/src/mono/wasm/Wasm.Build.Tests/PInvokeTableGeneratorTests.cs
index f553f10ddeb3..8c0442a1b0d6 100644
--- a/src/mono/wasm/Wasm.Build.Tests/PInvokeTableGeneratorTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/PInvokeTableGeneratorTests.cs
@@ -32,7 +32,7 @@ public class Test
                     public static int Main(string[] args)
                     {
                         Console.WriteLine($""Main running"");
-                        if (args.Length > 0)
+                        if (args.Length > 2)
                         {
                             // We don't want to run this, because we can't call variadic functions
                             Console.WriteLine($""sum_three: {sum_three(7, 14, 21)}"");
@@ -739,6 +739,16 @@ public struct Nested1 {
                 public struct SingleI64Struct {
                     public Int64 Value;
                 }
+                public struct PairStruct {
+                    public int A, B;
+                }
+                public unsafe struct MyFixedArray {
+                    public fixed int elements[2];
+                }
+                [System.Runtime.CompilerServices.InlineArray(2)]
+                public struct MyInlineArray {
+                    public int element0;
+                }
 
                 public class Test
                 {
@@ -765,9 +775,35 @@ public static unsafe int Main(string[] argv)
                         var res = indirect(sds);
                         Console.WriteLine(""s (s)="" + res.Value);
 
+                        var pair = new PairStruct { A = 1, B = 2 };
+                        var paires = accept_and_return_pair(pair);
+                        Console.WriteLine(""paires.B="" + paires.B);
+
+                        // This test is split into methods to simplify debugging issues with it
+                        var ia = InlineArrayTest1();
+                        var iares = InlineArrayTest2(ia);
+                        Console.WriteLine($""iares[0]={iares[0]} iares[1]={iares[1]}"");
+
+                        MyFixedArray fa = new ();
+                        for (int i = 0; i < 2; i++)
+                            fa.elements[i] = i;
+                        var fares = accept_and_return_fixedarray(fa);
+                        Console.WriteLine(""fares.elements[1]="" + fares.elements[1]);
+
                         return (int)res.Value;
                     }
 
+                    public static unsafe MyInlineArray InlineArrayTest1 () {
+                        MyInlineArray ia = new ();
+                        for (int i = 0; i < 2; i++)
+                            ia[i] = i;
+                        return ia;
+                    }
+
+                    public static unsafe MyInlineArray InlineArrayTest2 (MyInlineArray ia) {
+                        return accept_and_return_inlinearray(ia);
+                    }
+
                     [DllImport(""wasm-abi"", EntryPoint=""accept_double_struct_and_return_float_struct"")]
                     public static extern SingleFloatStruct indirect(SingleDoubleStruct arg);
 
@@ -782,9 +818,18 @@ public static unsafe int Main(string[] argv)
 
                     [DllImport(""wasm-abi"", EntryPoint=""accept_and_return_i64_struct"")]
                     public static extern Int64 direct64(Int64 arg);
+
+                    [DllImport(""wasm-abi"", EntryPoint=""accept_and_return_pair"")]
+                    public static extern PairStruct accept_and_return_pair(PairStruct arg);
+
+                    [DllImport(""wasm-abi"", EntryPoint=""accept_and_return_fixedarray"")]
+                    public static extern MyFixedArray accept_and_return_fixedarray(MyFixedArray arg);
+
+                    [DllImport(""wasm-abi"", EntryPoint=""accept_and_return_inlinearray"")]
+                    public static extern MyInlineArray accept_and_return_inlinearray(MyInlineArray arg);
                 }";
 
-            var extraProperties = "<AllowUnsafeBlocks>true</AllowUnsafeBlocks><_WasmDevel>true</_WasmDevel>";
+            var extraProperties = "<AllowUnsafeBlocks>true</AllowUnsafeBlocks><_WasmDevel>false</_WasmDevel><WasmNativeStrip>false</WasmNativeStrip>";
             var extraItems = @"<NativeFileReference Include=""wasm-abi.c"" />";
 
             buildArgs = ExpandBuildArgs(buildArgs,
@@ -824,6 +869,10 @@ public static unsafe int Main(string[] argv)
             Assert.Contains("f (d)=3.14", runOutput);
             Assert.Contains("f (s)=3.14", runOutput);
             Assert.Contains("s (s)=3.14", runOutput);
+            Assert.Contains("paires.B=4", runOutput);
+            Assert.Contains("iares[0]=32", runOutput);
+            Assert.Contains("iares[1]=2", runOutput);
+            Assert.Contains("fares.elements[1]=2", runOutput);
         }
 
         [Theory]
diff --git a/src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs b/src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs
index 81c6a4894f12..581a187270ae 100644
--- a/src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/ProjectProviderBase.cs
@@ -490,10 +490,10 @@ private void AssertFileNames(IEnumerable<string> expected, IEnumerable<string> a
         Assert.Equal(expected, actualFileNames);
     }
 
-    public virtual string FindBinFrameworkDir(string config, bool forPublish, string framework, string? bundleDirName = null)
+    public virtual string FindBinFrameworkDir(string config, bool forPublish, string framework, string? bundleDirName = null, string? projectDir = null)
     {
         EnsureProjectDirIsSet();
-        string basePath = Path.Combine(ProjectDir!, "bin", config, framework);
+        string basePath = Path.Combine(projectDir ?? ProjectDir!, "bin", config, framework);
         if (forPublish)
             basePath = FindSubDirIgnoringCase(basePath, "publish");
 
diff --git a/src/mono/wasm/Wasm.Build.Tests/Templates/InterpPgoTests.cs b/src/mono/wasm/Wasm.Build.Tests/Templates/InterpPgoTests.cs
index 203ad80fc4dc..4fe88ee8daca 100644
--- a/src/mono/wasm/Wasm.Build.Tests/Templates/InterpPgoTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/Templates/InterpPgoTests.cs
@@ -34,7 +34,7 @@ public async Task FirstRunGeneratesTableAndSecondRunLoadsIt(string config)
 
         string id = $"browser_{config}_{GetRandomId()}";
         _testOutput.WriteLine("/// Creating project");
-        string projectFile = CreateWasmTemplateProject(id, "wasmbrowser");
+        string projectFile = CreateWasmTemplateProject(id, "wasmbrowser", extraProperties: "<WasmDebugLevel>0</WasmDebugLevel>");
 
         _testOutput.WriteLine("/// Updating JS");
         UpdateBrowserMainJs((js) => {
@@ -53,6 +53,7 @@ public async Task FirstRunGeneratesTableAndSecondRunLoadsIt(string config)
             //  then call INTERNAL.interp_pgo_save_data() to save the interp PGO table
             js = js.Replace(
                 "const text = exports.MyClass.Greeting();",
+                "console.log(`WASM debug level ${getConfig().debugLevel}`);\n" + 
                 "let text = '';\n" +
                 $"for (let i = 0; i < {iterationCount}; i++) {{ text = exports.MyClass.Greeting(); }};\n" +
                 "await INTERNAL.interp_pgo_save_data();"
diff --git a/src/mono/wasm/Wasm.Build.Tests/Templates/WasmTemplateTests.cs b/src/mono/wasm/Wasm.Build.Tests/Templates/WasmTemplateTests.cs
index cd7e770503b4..d3e2e506b477 100644
--- a/src/mono/wasm/Wasm.Build.Tests/Templates/WasmTemplateTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/Templates/WasmTemplateTests.cs
@@ -13,7 +13,7 @@
 
 namespace Wasm.Build.Tests
 {
-    public class WasmTemplateTests : WasmTemplateTestBase
+    public class WasmTemplateTests : BlazorWasmTestBase
     {
         public WasmTemplateTests(ITestOutputHelper output, SharedBuildPerTestClassFixture buildContext)
             : base(output, buildContext)
@@ -603,5 +603,29 @@ internal static void TestWasmStripILAfterAOTOutput(string objBuildDir, string fr
                 }
             }
         }
+
+        [Theory]
+        [InlineData(false)]
+        [InlineData(true)]
+        public void PublishPdb(bool copyOutputSymbolsToPublishDirectory)
+        {
+            string config = "Release";
+            string id = $"publishpdb_{copyOutputSymbolsToPublishDirectory.ToString().ToLower()}_{GetRandomId()}";
+            CreateWasmTemplateProject(id, "wasmbrowser");
+
+            (CommandResult result, _) = BlazorPublish(new BlazorBuildOptions(id, config), $"-p:CopyOutputSymbolsToPublishDirectory={copyOutputSymbolsToPublishDirectory.ToString().ToLower()}");
+            result.EnsureSuccessful();
+
+            string publishFrameworkPath = Path.GetFullPath(FindBlazorBinFrameworkDir(config, forPublish: true));
+            AssertFile(".pdb");
+            AssertFile(".pdb.gz");
+            AssertFile(".pdb.br");
+
+            void AssertFile(string suffix)
+            {
+                var fileName = $"{id}{suffix}";
+                Assert.True(copyOutputSymbolsToPublishDirectory == File.Exists(Path.Combine(publishFrameworkPath, fileName)), $"The {fileName} file {(copyOutputSymbolsToPublishDirectory ? "should" : "shouldn't")} exist in publish folder");
+            }
+        }
     }
 }
diff --git a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/AppSettingsTests.cs b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/AppSettingsTests.cs
index 96f2c4ebd6a1..5d028cc23890 100644
--- a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/AppSettingsTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/AppSettingsTests.cs
@@ -28,7 +28,7 @@ public async Task LoadAppSettingsBasedOnApplicationEnvironment(string applicatio
         CopyTestAsset("WasmBasicTestApp", "AppSettingsTests");
         PublishProject("Debug");
 
-        var result = await RunSdkStyleApp(new(
+        var result = await RunSdkStyleAppForPublish(new(
             Configuration: "Debug",
             TestScenario: "AppSettingsTest",
             BrowserQueryString: new Dictionary<string, string> { ["applicationEnvironment"] = applicationEnvironment }
diff --git a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/AppTestBase.cs b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/AppTestBase.cs
index dc6fb9b490e7..01a1afe96c0b 100644
--- a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/AppTestBase.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/AppTestBase.cs
@@ -31,20 +31,54 @@ protected void CopyTestAsset(string assetName, string generatedProjectNamePrefix
         LogPath = Path.Combine(s_buildEnv.LogRootPath, Id);
         Utils.DirectoryCopy(Path.Combine(BuildEnvironment.TestAssetsPath, assetName), Path.Combine(_projectDir!));
 
-        // WasmBasicTestApp consists of App + Library projects
-        if (assetName == "WasmBasicTestApp")
-            _projectDir = Path.Combine(_projectDir!, "App");
+        switch(assetName)
+        {
+            case "WasmBasicTestApp":
+                // WasmBasicTestApp consists of App + Library projects
+                _projectDir = Path.Combine(_projectDir!, "App");
+                break;
+            case "BlazorHostedApp":
+                // BlazorHostedApp consists of BlazorHosted.Client and BlazorHosted.Server projects
+                _projectDir = Path.Combine(_projectDir!, "BlazorHosted.Server");
+                break;
+        }
+    }
+
+    protected void BlazorHostedBuild(
+        string config,
+        string assetName,
+        string clientDirRelativeToProjectDir = "",
+        string? generatedProjectNamePrefix = null,
+        RuntimeVariant runtimeType = RuntimeVariant.SingleThreaded)
+    {
+        CopyTestAsset(assetName, generatedProjectNamePrefix);
+        string frameworkDir = FindBlazorHostedBinFrameworkDir(config,
+            forPublish: false,
+            clientDirRelativeToProjectDir: clientDirRelativeToProjectDir);
+        BuildProject(configuration: config,
+            binFrameworkDir: frameworkDir,
+            runtimeType: runtimeType);
     }
 
-    protected void BuildProject(string configuration)
+    protected void BuildProject(
+        string configuration,
+        string? binFrameworkDir = null,
+        RuntimeVariant runtimeType = RuntimeVariant.SingleThreaded,
+        bool assertAppBundle = true,
+        params string[] extraArgs)
     {
-        (CommandResult result, _) = BlazorBuild(new BlazorBuildOptions(Id, configuration));
+        (CommandResult result, _) = BlazorBuild(new BlazorBuildOptions(
+            Id: Id,
+            Config: configuration,
+            BinFrameworkDir: binFrameworkDir,
+            RuntimeType: runtimeType,
+            AssertAppBundle: assertAppBundle), extraArgs);
         result.EnsureSuccessful();
     }
 
-    protected void PublishProject(string configuration)
+    protected void PublishProject(string configuration, params string[] extraArgs)
     {
-        (CommandResult result, _) = BlazorPublish(new BlazorBuildOptions(Id, configuration));
+        (CommandResult result, _) = BlazorPublish(new BlazorBuildOptions(Id, configuration), extraArgs);
         result.EnsureSuccessful();
     }
 
@@ -52,32 +86,43 @@ protected void PublishProject(string configuration)
         .WithWorkingDirectory(_projectDir!)
         .WithEnvironmentVariable("NUGET_PACKAGES", _nugetPackagesDir);
 
-    protected async Task<RunResult> RunSdkStyleApp(RunOptions options)
+    protected Task<RunResult> RunSdkStyleAppForBuild(RunOptions options)
+        => RunSdkStyleApp(options, BlazorRunHost.DotnetRun);
+
+    protected Task<RunResult> RunSdkStyleAppForPublish(RunOptions options)
+        => RunSdkStyleApp(options, BlazorRunHost.WebServer);
+
+    private async Task<RunResult> RunSdkStyleApp(RunOptions options, BlazorRunHost host = BlazorRunHost.DotnetRun)
     {
-        string queryString = "?test=" + options.TestScenario;
-        if (options.BrowserQueryString != null)
-            queryString += "&" + string.Join("&", options.BrowserQueryString.Select(kvp => $"{kvp.Key}={kvp.Value}"));
+        var query = options.BrowserQueryString ?? new Dictionary<string, string>();
+        if (!string.IsNullOrEmpty(options.TestScenario))
+            query.Add("test", options.TestScenario);
+
+        var queryString = query.Any() ? "?" + string.Join("&", query.Select(kvp => $"{kvp.Key}={kvp.Value}")) : "";
 
         var tcs = new TaskCompletionSource<int>();
         List<string> testOutput = new();
         List<string> consoleOutput = new();
-        Regex exitRegex = new Regex("WASM EXIT (?<exitCode>[0-9]+)$");
+        List<string> serverOutput = new();
+        Regex exitRegex = new Regex("(WASM EXIT (?<exitCode>[0-9]+)$)|(Program terminated with exit\\((?<exitCode>[0-9]+)\\))");
 
         BlazorRunOptions blazorRunOptions = new(
                 CheckCounter: false,
                 Config: options.Configuration,
+                ServerEnvironment: options.ServerEnvironment,
                 OnConsoleMessage: OnConsoleMessage,
-                QueryString: queryString);
+                OnServerMessage: OnServerMessage,
+                BrowserPath: options.BrowserPath,
+                QueryString: queryString,
+                Host: host);
 
-        await BlazorRunForBuildWithDotnetRun(blazorRunOptions);
+        await BlazorRunTest(blazorRunOptions);
 
-        void OnConsoleMessage(IConsoleMessage msg)
+        void OnConsoleMessage(IPage page, IConsoleMessage msg)
         {
             consoleOutput.Add(msg.Text);
 
-            const string testOutputPrefix = "TestOutput -> ";
-            if (msg.Text.StartsWith(testOutputPrefix))
-                testOutput.Add(msg.Text.Substring(testOutputPrefix.Length));
+            OnTestOutput(msg.Text);
 
             var exitMatch = exitRegex.Match(msg.Text);
             if (exitMatch.Success)
@@ -87,7 +132,23 @@ void OnConsoleMessage(IConsoleMessage msg)
                 throw new Exception(msg.Text);
 
             if (options.OnConsoleMessage != null)
-                options.OnConsoleMessage(msg);
+                options.OnConsoleMessage(page, msg);
+        }
+
+        void OnServerMessage(string msg)
+        {
+            serverOutput.Add(msg);
+            OnTestOutput(msg);
+
+            if (options.OnServerMessage != null)
+                options.OnServerMessage(msg);
+        }
+
+        void OnTestOutput(string msg)
+        {
+            const string testOutputPrefix = "TestOutput -> ";
+            if (msg.StartsWith(testOutputPrefix))
+                testOutput.Add(msg.Substring(testOutputPrefix.Length));
         }
 
         //TimeSpan timeout = TimeSpan.FromMinutes(2);
@@ -99,20 +160,24 @@ void OnConsoleMessage(IConsoleMessage msg)
         if (options.ExpectedExitCode != null && wasmExitCode != options.ExpectedExitCode)
             throw new Exception($"Expected exit code {options.ExpectedExitCode} but got {wasmExitCode}");
 
-        return new(wasmExitCode, testOutput, consoleOutput);
+        return new(wasmExitCode, testOutput, consoleOutput, serverOutput);
     }
 
     protected record RunOptions(
         string Configuration,
-        string TestScenario,
+        string BrowserPath = "",
+        string? TestScenario = null,
         Dictionary<string, string> BrowserQueryString = null,
-        Action<IConsoleMessage> OnConsoleMessage = null,
+        Dictionary<string, string> ServerEnvironment = null,
+        Action<IPage, IConsoleMessage> OnConsoleMessage = null,
+        Action<string> OnServerMessage = null,
         int? ExpectedExitCode = 0
     );
 
     protected record RunResult(
         int ExitCode,
         IReadOnlyCollection<string> TestOutput,
-        IReadOnlyCollection<string> ConsoleOutput
+        IReadOnlyCollection<string> ConsoleOutput,
+        IReadOnlyCollection<string> ServerOutput
     );
 }
diff --git a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/DebugLevelTests.cs b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/DebugLevelTests.cs
new file mode 100644
index 000000000000..1bbe8691d80d
--- /dev/null
+++ b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/DebugLevelTests.cs
@@ -0,0 +1,109 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Threading.Tasks;
+using Xunit;
+using Xunit.Abstractions;
+
+#nullable enable
+
+namespace Wasm.Build.Tests.TestAppScenarios;
+
+public class DebugLevelTests : AppTestBase
+{
+    public DebugLevelTests(ITestOutputHelper output, SharedBuildPerTestClassFixture buildContext)
+        : base(output, buildContext)
+    {
+    }
+
+    private void AssertDebugLevel(RunResult result, int value)
+    {
+        Assert.Collection(
+            result.TestOutput,
+            m => Assert.Equal($"WasmDebugLevel: {value}", m)
+        );
+    }
+
+    [Theory]
+    [InlineData("Debug")]
+    [InlineData("Release")]
+    public async Task BuildWithDefaultLevel(string configuration)
+    {
+        CopyTestAsset("WasmBasicTestApp", $"DebugLevelTests_BuildWithDefaultLevel_{configuration}");
+        BuildProject(configuration);
+
+        var result = await RunSdkStyleAppForBuild(new(
+            Configuration: configuration,
+            TestScenario: "DebugLevelTest"
+        ));
+        AssertDebugLevel(result, -1);
+    }
+
+    [Theory]
+    [InlineData("Debug", 1)]
+    [InlineData("Release", 1)]
+    [InlineData("Debug", 0)]
+    [InlineData("Release", 0)]
+    public async Task BuildWithExplicitValue(string configuration, int debugLevel)
+    {
+        CopyTestAsset("WasmBasicTestApp", $"DebugLevelTests_BuildWithExplicitValue_{configuration}");
+        BuildProject(configuration: configuration, extraArgs: $"-p:WasmDebugLevel={debugLevel}");
+
+        var result = await RunSdkStyleAppForBuild(new(
+            Configuration: configuration,
+            TestScenario: "DebugLevelTest"
+        ));
+        AssertDebugLevel(result, debugLevel);
+    }
+
+    [Theory]
+    [InlineData("Debug")]
+    [InlineData("Release")]
+    public async Task PublishWithDefaultLevel(string configuration)
+    {
+        CopyTestAsset("WasmBasicTestApp", $"DebugLevelTests_PublishWithDefaultLevel_{configuration}");
+        PublishProject(configuration);
+
+        var result = await RunSdkStyleAppForPublish(new(
+            Configuration: configuration,
+            TestScenario: "DebugLevelTest"
+        ));
+        AssertDebugLevel(result, 0);
+    }
+
+    [Theory]
+    [InlineData("Debug", 1)]
+    [InlineData("Release", 1)]
+    [InlineData("Debug", -1)]
+    [InlineData("Release", -1)]
+    public async Task PublishWithExplicitValue(string configuration, int debugLevel)
+    {
+        CopyTestAsset("WasmBasicTestApp", $"DebugLevelTests_PublishWithExplicitValue_{configuration}");
+        PublishProject(configuration, $"-p:WasmDebugLevel={debugLevel}");
+
+        var result = await RunSdkStyleAppForPublish(new(
+            Configuration: configuration,
+            TestScenario: "DebugLevelTest"
+        ));
+        AssertDebugLevel(result, debugLevel);
+    }
+
+    [Theory]
+    [InlineData("Debug")]
+    [InlineData("Release")]
+    public async Task PublishWithDefaultLevelAndPdbs(string configuration)
+    {
+        CopyTestAsset("WasmBasicTestApp", $"DebugLevelTests_PublishWithDefaultLevelAndPdbs_{configuration}");
+        PublishProject(configuration, $"-p:CopyOutputSymbolsToPublishDirectory=true");
+
+        var result = await RunSdkStyleAppForPublish(new(
+            Configuration: configuration,
+            TestScenario: "DebugLevelTest"
+        ));
+        AssertDebugLevel(result, -1);
+    }
+}
diff --git a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/DownloadResourceProgressTests.cs b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/DownloadResourceProgressTests.cs
index 70f9b4f1507d..7cc55ebd07ae 100644
--- a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/DownloadResourceProgressTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/DownloadResourceProgressTests.cs
@@ -28,7 +28,7 @@ public async Task DownloadProgressFinishes(bool failAssemblyDownload)
         CopyTestAsset("WasmBasicTestApp", $"DownloadResourceProgressTests_{failAssemblyDownload}");
         PublishProject("Debug");
 
-        var result = await RunSdkStyleApp(new(
+        var result = await RunSdkStyleAppForPublish(new(
             Configuration: "Debug",
             TestScenario: "DownloadResourceProgressTest",
             BrowserQueryString: new Dictionary<string, string> { ["failAssemblyDownload"] = failAssemblyDownload.ToString().ToLowerInvariant() }
diff --git a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/LazyLoadingTests.cs b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/LazyLoadingTests.cs
index 8f37a47e1886..cf16a0536a38 100644
--- a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/LazyLoadingTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/LazyLoadingTests.cs
@@ -26,7 +26,7 @@ public async Task LoadLazyAssemblyBeforeItIsNeeded()
         CopyTestAsset("WasmBasicTestApp", "LazyLoadingTests");
         PublishProject("Debug");
 
-        var result = await RunSdkStyleApp(new(Configuration: "Debug", TestScenario: "LazyLoadingTest"));
+        var result = await RunSdkStyleAppForPublish(new(Configuration: "Debug", TestScenario: "LazyLoadingTest"));
         Assert.True(result.TestOutput.Any(m => m.Contains("FirstName")), "The lazy loading test didn't emit expected message with JSON");
     }
 
@@ -36,7 +36,7 @@ public async Task FailOnMissingLazyAssembly()
         CopyTestAsset("WasmBasicTestApp", "LazyLoadingTests");
         PublishProject("Debug");
 
-        var result = await RunSdkStyleApp(new(
+        var result = await RunSdkStyleAppForPublish(new(
             Configuration: "Debug",
             TestScenario: "LazyLoadingTest",
             BrowserQueryString: new Dictionary<string, string> { ["loadRequiredAssembly"] = "false" },
diff --git a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/LibraryInitializerTests.cs b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/LibraryInitializerTests.cs
index 6f68a96ad1d6..e985ad23d89a 100644
--- a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/LibraryInitializerTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/LibraryInitializerTests.cs
@@ -29,7 +29,7 @@ public async Task LoadLibraryInitializer()
         CopyTestAsset("WasmBasicTestApp", "LibraryInitializerTests_LoadLibraryInitializer");
         PublishProject("Debug");
 
-        var result = await RunSdkStyleApp(new(Configuration: "Debug", TestScenario: "LibraryInitializerTest"));
+        var result = await RunSdkStyleAppForPublish(new(Configuration: "Debug", TestScenario: "LibraryInitializerTest"));
         Assert.Collection(
             result.TestOutput,
             m => Assert.Equal("LIBRARY_INITIALIZER_TEST = 1", m)
@@ -42,7 +42,7 @@ public async Task AbortStartupOnError()
         CopyTestAsset("WasmBasicTestApp", "LibraryInitializerTests_AbortStartupOnError");
         PublishProject("Debug");
 
-        var result = await RunSdkStyleApp(new(
+        var result = await RunSdkStyleAppForPublish(new(
             Configuration: "Debug",
             TestScenario: "LibraryInitializerTest",
             BrowserQueryString: new Dictionary<string, string> { ["throwError"] = "true" },
diff --git a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/SatelliteLoadingTests.cs b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/SatelliteLoadingTests.cs
index 31dcb6558286..2088e1522ad7 100644
--- a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/SatelliteLoadingTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/SatelliteLoadingTests.cs
@@ -29,7 +29,7 @@ public async Task LoadSatelliteAssembly()
         CopyTestAsset("WasmBasicTestApp", "SatelliteLoadingTests");
         BuildProject("Debug");
 
-        var result = await RunSdkStyleApp(new(Configuration: "Debug", TestScenario: "SatelliteAssembliesTest"));
+        var result = await RunSdkStyleAppForBuild(new(Configuration: "Debug", TestScenario: "SatelliteAssembliesTest"));
         Assert.Collection(
             result.TestOutput,
             m => Assert.Equal("default: hello", m),
diff --git a/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/SignalRClientTests.cs b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/SignalRClientTests.cs
new file mode 100644
index 000000000000..1b09272b4879
--- /dev/null
+++ b/src/mono/wasm/Wasm.Build.Tests/TestAppScenarios/SignalRClientTests.cs
@@ -0,0 +1,101 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Text.RegularExpressions;
+using System.Threading.Tasks;
+using Microsoft.Playwright;
+using Xunit.Abstractions;
+using Xunit;
+
+#nullable enable
+
+namespace Wasm.Build.Tests.TestAppScenarios;
+
+public class SignalRClientTests : AppTestBase
+{
+    public SignalRClientTests(ITestOutputHelper output, SharedBuildPerTestClassFixture buildContext)
+        : base(output, buildContext)
+    {
+    }
+
+    [ConditionalTheory(typeof(BuildTestBase), nameof(IsWorkloadWithMultiThreadingForDefaultFramework))]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/100445")] // to be fixed by: "https://github.com/dotnet/aspnetcore/issues/54365"
+    [InlineData("Debug", "LongPolling")]
+    [InlineData("Release", "LongPolling")]
+    [InlineData("Debug", "WebSockets")]
+    [InlineData("Release", "WebSockets")]
+    public async Task SignalRPassMessages(string config, string transport)
+    {
+        BlazorHostedBuild(config,
+            assetName: "BlazorHostedApp",
+            clientDirRelativeToProjectDir: "../BlazorHosted.Client",
+            generatedProjectNamePrefix: "SignalRClientTests",
+            runtimeType: RuntimeVariant.MultiThreaded);
+
+        List<string> consoleOutput = new();
+        List<string> serverOutput = new();
+
+        var result = await RunSdkStyleAppForBuild(new(
+            Configuration: config,
+            // We are using build (not publish),
+            // we need to instruct static web assets to use manifest file,
+            // because wwwroot in bin doesn't contain all files (for build)
+            ServerEnvironment: new Dictionary<string, string> { ["ASPNETCORE_ENVIRONMENT"] = "Development" },
+            BrowserPath: "/chat",
+            BrowserQueryString: new Dictionary<string, string> { ["transport"] = transport, ["message"] = "ping" },
+            OnServerMessage: (msg) => serverOutput.Add(msg),
+            OnConsoleMessage: async (page, msg) =>
+            {
+                consoleOutput.Add(msg.Text);
+                if (msg.Text.Contains("TestOutput ->"))
+                    _testOutput.WriteLine(msg.Text);
+
+                // prevent timeouts with [Long Running Test] on error
+                if (msg.Text.ToLowerInvariant().Contains("error"))
+                {
+                    Console.WriteLine(msg.Text);
+                    Console.WriteLine(_testOutput);
+                    throw new Exception(msg.Text);
+                }
+
+                if (msg.Text.Contains("Finished GetQueryParameters"))
+                    await SaveClickButtonAsync(page, "button#connectButton");
+
+                if (msg.Text.Contains("SignalR connected"))
+                    await SaveClickButtonAsync(page, "button#subscribeButton");
+
+                if (msg.Text.Contains("Subscribed to ReceiveMessage"))
+                    await SaveClickButtonAsync(page, "button#sendMessageButton");
+
+                if (msg.Text.Contains("ReceiveMessage from server"))
+                    await SaveClickButtonAsync(page, "button#exitProgramButton");
+            }
+        ));
+
+        string output = _testOutput.ToString() ?? "";
+        Assert.NotEmpty(output);
+        // check sending and receiving threadId
+        string threadIdUsedForSending = GetThreadOfAction(output, @"SignalRPassMessages was sent by CurrentManagedThreadId=(\d+)", "signalR message was sent");
+        string threadIdUsedForReceiving = GetThreadOfAction(output, @"ReceiveMessage from server on CurrentManagedThreadId=(\d+)", "signalR message was received");
+        Assert.True("1" != threadIdUsedForSending || "1" != threadIdUsedForReceiving,
+            $"Expected to send/receive with signalR in non-UI threads, instead only CurrentManagedThreadId=1 was used. TestOutput: {output}.");
+    }
+
+    private string GetThreadOfAction(string testOutput, string pattern, string actionDescription)
+    {
+        Match match = Regex.Match(testOutput, pattern);
+        Assert.True(match.Success, $"Expected to find a log that {actionDescription}. TestOutput: {testOutput}.");
+        return match.Groups[1].Value ?? "";
+    }
+
+    private async Task SaveClickButtonAsync(IPage page, string selector)
+    {
+        await page.WaitForSelectorAsync(selector);
+        await page.ClickAsync(selector);
+    }
+}
diff --git a/src/mono/wasm/Wasm.Build.Tests/TestMainJsProjectProvider.cs b/src/mono/wasm/Wasm.Build.Tests/TestMainJsProjectProvider.cs
index 7042c4855549..89a1ddc8c132 100644
--- a/src/mono/wasm/Wasm.Build.Tests/TestMainJsProjectProvider.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/TestMainJsProjectProvider.cs
@@ -108,9 +108,9 @@ public void AssertBundle(BuildArgs buildArgs, BuildProjectOptions buildProjectOp
         AssertBundle(assertOptions);
     }
 
-    public override string FindBinFrameworkDir(string config, bool forPublish, string framework, string? bundleDirName = null)
+    public override string FindBinFrameworkDir(string config, bool forPublish, string framework, string? bundleDirName = null, string? projectDir = null)
     {
         EnsureProjectDirIsSet();
-        return Path.Combine(ProjectDir!, "bin", config, framework, "browser-wasm", bundleDirName ?? this.BundleDirName, "_framework");
+        return Path.Combine(projectDir ?? ProjectDir!, "bin", config, framework, "browser-wasm", bundleDirName ?? this.BundleDirName, "_framework");
     }
 }
diff --git a/src/mono/wasm/Wasm.Build.Tests/WasmSdkBasedProjectProvider.cs b/src/mono/wasm/Wasm.Build.Tests/WasmSdkBasedProjectProvider.cs
index 0480473d249a..9cfbcca73843 100644
--- a/src/mono/wasm/Wasm.Build.Tests/WasmSdkBasedProjectProvider.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/WasmSdkBasedProjectProvider.cs
@@ -105,12 +105,14 @@ public void AssertBundle(AssertWasmSdkBundleOptions assertOptions)
             _ => throw new ArgumentOutOfRangeException(nameof(assertOptions.ExpectedFileType))
         };
         string buildType = assertOptions.IsPublish ? "publish" : "build";
-        foreach (string nativeFilename in new[] { "dotnet.native.wasm", "dotnet.native.js" })
+        var nativeFilesToCheck = new List<string>() { "dotnet.native.wasm", "dotnet.native.js" };
+        if (assertOptions.RuntimeType == RuntimeVariant.MultiThreaded)
+            nativeFilesToCheck.Add("dotnet.native.worker.js");
+        foreach (string nativeFilename in nativeFilesToCheck)
         {
             if (!actualDotnetFiles.TryGetValue(nativeFilename, out DotNetFileName? dotnetFile))
             {
                 throw new XunitException($"Could not find {nativeFilename}. Actual files on disk: {string.Join($"{Environment.NewLine}  ", actualDotnetFiles.Values.Select(a => a.ActualPath).Order())}");
-
             }
             // For any *type*, check against the expected path
             TestUtils.AssertSameFile(Path.Combine(srcDirForNativeFileToCompareAgainst, nativeFilename),
@@ -119,6 +121,11 @@ public void AssertBundle(AssertWasmSdkBundleOptions assertOptions)
 
             if (assertOptions.ExpectedFileType != NativeFilesType.FromRuntimePack)
             {
+                if (nativeFilename == "dotnet.native.worker.js")
+                {
+                    Console.WriteLine($"Skipping the verification whether {nativeFilename} is from the runtime pack. The check wouldn't be meaningful as the runtime pack file has the same size as the relinked file");
+                    continue;
+                }
                 // Confirm that it doesn't match the file from the runtime pack
                 TestUtils.AssertNotSameFile(Path.Combine(runtimeNativeDir, nativeFilename),
                                    actualDotnetFiles[nativeFilename].ActualPath,
diff --git a/src/mono/wasm/Wasm.Build.Tests/WasmTemplateTestBase.cs b/src/mono/wasm/Wasm.Build.Tests/WasmTemplateTestBase.cs
index 38037f7d96f1..9c9237992215 100644
--- a/src/mono/wasm/Wasm.Build.Tests/WasmTemplateTestBase.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/WasmTemplateTestBase.cs
@@ -22,7 +22,7 @@ protected WasmTemplateTestBase(ITestOutputHelper output, SharedBuildPerTestClass
         _provider.BundleDirName = "AppBundle";
     }
 
-    public string CreateWasmTemplateProject(string id, string template = "wasmbrowser", string extraArgs = "", bool runAnalyzers = true, bool addFrameworkArg = false)
+    public string CreateWasmTemplateProject(string id, string template = "wasmbrowser", string extraArgs = "", bool runAnalyzers = true, bool addFrameworkArg = false, string? extraProperties = null)
     {
         InitPaths(id);
         InitProjectDir(_projectDir, addNuGetSourceForLocalPackages: true);
@@ -49,7 +49,10 @@ public string CreateWasmTemplateProject(string id, string template = "wasmbrowse
                 .EnsureSuccessful();
 
         string projectfile = Path.Combine(_projectDir!, $"{id}.csproj");
-        string extraProperties = string.Empty;
+
+        if (extraProperties == null)
+            extraProperties = string.Empty;
+
         extraProperties += "<TreatWarningsAsErrors>true</TreatWarningsAsErrors>";
         if (runAnalyzers)
             extraProperties += "<RunAnalyzers>true</RunAnalyzers>";
diff --git a/src/mono/wasm/build/WasmApp.Common.targets b/src/mono/wasm/build/WasmApp.Common.targets
index 3f3a97fff9b3..5c7e4353fee0 100644
--- a/src/mono/wasm/build/WasmApp.Common.targets
+++ b/src/mono/wasm/build/WasmApp.Common.targets
@@ -50,7 +50,7 @@
       - $(EmccExtraCFlags)                  - Extra emcc flags for compiling native files
       - $(EmccEnableAssertions)             - Corresponds to `ASSERTIONS` arg for emcc. Default false.
       - $(EmccEnvironment)                  - Corresponds to `ENVIRONMENT` arg for emcc. Default is `web,webview,worker,node,shell`.
-      - $(WasmInitialHeapSize)              - Initial heap size specified with `emcc`. Default value: 16777216 or size of the DLLs, whichever is larger.
+      - $(WasmInitialHeapSize)              - Initial heap size specified with `emcc`. Default value: 33554432 or size of the DLLs, whichever is larger.
                                               Corresponds to `-s INITIAL_MEMORY=...` emcc arg.
                                               (previously named EmccTotalMemory, which is still kept as an alias)
       - $(EmccMaximumHeapSize)              - Maximum heap size specified with `emcc`. Default value: 2147483648 or size of the DLLs, whichever is larger.
@@ -111,6 +111,13 @@
   <UsingTask TaskName="MonoTargetsTasks.MarshalingPInvokeScanner" AssemblyFile="$(MonoTargetsTasksAssemblyPath)" />
   <UsingTask TaskName="EmitBundleObjectFiles" AssemblyFile="$(MonoTargetsTasksAssemblyPath)" />
 
+  <ItemGroup>
+      <_BoolPropertiesThatTriggerRelinking Include="InvariantTimezone" DefaultValueInRuntimePack="false" />
+      <_BoolPropertiesThatTriggerRelinking Include="InvariantGlobalization" DefaultValueInRuntimePack="false" />
+      <_BoolPropertiesThatTriggerRelinking Include="WasmNativeStrip" DefaultValueInRuntimePack="true" />
+      <_BoolPropertiesThatTriggerRelinking Include="WasmSingleFileBundle" DefaultValueInRuntimePack="false" />
+  </ItemGroup>
+
   <PropertyGroup>
     <PrepareInputsForWasmBuildDependsOn>
       $(PrepareInputsForWasmBuildDependsOn);
@@ -172,9 +179,6 @@
     -->
     <UseAppHost>false</UseAppHost>
 
-    <!-- if DebuggerSupport==true, then ensure that WasmDebugLevel isn't disabling debugging -->
-    <WasmDebugLevel Condition="('$(WasmDebugLevel)' == '' or '$(WasmDebugLevel)' == '0') and ('$(DebuggerSupport)' == 'true' or '$(Configuration)' == 'Debug')">-1</WasmDebugLevel>
-
     <!-- Post Wasm MVP features -->
     <WasmEnableExceptionHandling Condition="'$(WasmEnableExceptionHandling)' == ''">false</WasmEnableExceptionHandling>
     <WasmEnableSIMD Condition="'$(WasmEnableSIMD)' == ''">false</WasmEnableSIMD>
@@ -497,14 +501,18 @@
            Text="$(_ToolchainMissingErrorMessage) SDK is required for AOT'ing assemblies." />
 
     <ItemGroup>
-      <_BoolPropertiesThatTriggerRelinking Include="InvariantTimezone" DefaultValueInRuntimePack="false" />
-      <_BoolPropertiesThatTriggerRelinking Include="InvariantGlobalization" DefaultValueInRuntimePack="false" />
+      <_ChangedBoolPropertiesThatTriggerRelinking Include="%(_BoolPropertiesThatTriggerRelinking.Identity)" Condition="'$(%(_BoolPropertiesThatTriggerRelinking.Identity))' != '' and
+                                                                                                            '$(%(_BoolPropertiesThatTriggerRelinking.Identity))' != '%(_BoolPropertiesThatTriggerRelinking.DefaultValueInRuntimePack)'" />
     </ItemGroup>
+    <PropertyGroup>
+      <_WasmBuildNativeRequired Condition="@(_ChangedBoolPropertiesThatTriggerRelinking->Count()) > 0">true</_WasmBuildNativeRequired>
+    </PropertyGroup>
+
+    <Error Condition="'$(WasmBuildNative)' == 'false' and '$(_WasmBuildNativeRequired)' == 'true'"
+           Text="WasmBuildNative is required because %(_ChangedBoolPropertiesThatTriggerRelinking.Identity)=$(%(_ChangedBoolPropertiesThatTriggerRelinking.Identity)), but WasmBuildNative is already set to 'false'." />
 
     <PropertyGroup>
-      <WasmBuildNative Condition="'$(WasmBuildNative)' == '' and
-                          '$(%(_BoolPropertiesThatTriggerRelinking.Identity))' != '' and
-                          '$(%(_BoolPropertiesThatTriggerRelinking.Identity))' != '%(_BoolPropertiesThatTriggerRelinking.DefaultValueInRuntimePack)'">true</WasmBuildNative>
+      <WasmBuildNative Condition="'$(WasmBuildNative)' == '' and '$(_WasmBuildNativeRequired)' == 'true'">true</WasmBuildNative>
     </PropertyGroup>
 
     <!-- When Building -->
@@ -513,7 +521,6 @@
       <WasmBuildNative Condition="'$(RunAOTCompilation)' == 'true' and '$(RunAOTCompilationAfterBuild)' == 'true'">true</WasmBuildNative>
 
       <WasmBuildNative Condition="'$(WasmBuildNative)' == '' and @(NativeFileReference->Count()) > 0" >true</WasmBuildNative>
-      <WasmBuildNative Condition="'$(WasmBuildNative)' == '' and '$(WasmSingleFileBundle)' == 'true'">true</WasmBuildNative>
     </PropertyGroup>
 
     <!-- When Publishing -->
@@ -521,7 +528,6 @@
       <!-- AOT==true overrides WasmBuildNative -->
       <WasmBuildNative Condition="'$(RunAOTCompilation)' == 'true'">true</WasmBuildNative>
       <WasmBuildNative Condition="'$(WasmBuildNative)' == '' and @(NativeFileReference->Count()) > 0" >true</WasmBuildNative>
-      <WasmBuildNative Condition="'$(WasmBuildNative)' == '' and '$(WasmSingleFileBundle)' == 'true'">true</WasmBuildNative>
 
       <!-- not aot, not trimmed app, no reason to relink -->
       <WasmBuildNative Condition="'$(WasmBuildNative)' == '' and '$(PublishTrimmed)' != 'true'">false</WasmBuildNative>
@@ -578,8 +584,8 @@
       <Output TaskParameter="InitialHeapSize" PropertyName="_WasmCalculatedInitialHeapSize" />
     </WasmCalculateInitialHeapSize>
     <PropertyGroup>
-      <WasmInitialHeapSize Condition="'$(WasmInitialHeapSize)' == '' and '$(_WasmCalculatedInitialHeapSize)' != '' and $(_WasmCalculatedInitialHeapSize) > 16777216">$(_WasmCalculatedInitialHeapSize)</WasmInitialHeapSize>
-      <WasmInitialHeapSize Condition="'$(WasmInitialHeapSize)' == ''">16777216</WasmInitialHeapSize>
+      <WasmInitialHeapSize Condition="'$(WasmInitialHeapSize)' == '' and '$(_WasmCalculatedInitialHeapSize)' != '' and $(_WasmCalculatedInitialHeapSize) > 33554432">$(_WasmCalculatedInitialHeapSize)</WasmInitialHeapSize>
+      <WasmInitialHeapSize Condition="'$(WasmInitialHeapSize)' == ''">33554432</WasmInitialHeapSize>
     </PropertyGroup>
   </Target>
 
@@ -618,6 +624,7 @@
       <MonoAOTCompilerDefaultAotArguments Include="static" />
       <MonoAOTCompilerDefaultAotArguments Include="direct-icalls" />
       <MonoAOTCompilerDefaultAotArguments Include="deterministic" />
+      <MonoAOTCompilerDefaultAotArguments Include="compile-in-child" Condition="!$([MSBuild]::IsOSPlatform('windows'))" />
       <MonoAOTCompilerDefaultAotArguments Include="mattr=simd" Condition="'$(WasmEnableSIMD)' == 'true'" />
       <MonoAOTCompilerDefaultProcessArguments Include="-v" Condition="'$(WasmAOTCompilerVerbose)' == 'true'" />
       <MonoAOTCompilerDefaultProcessArguments Include="--wasm-exceptions" Condition="'$(WasmEnableExceptionHandling)' == 'true'" />
diff --git a/src/mono/wasm/features.md b/src/mono/wasm/features.md
index 90c0744a8586..53995e656a1a 100644
--- a/src/mono/wasm/features.md
+++ b/src/mono/wasm/features.md
@@ -402,8 +402,8 @@ In Blazor, you can customize the startup in your index.html
 <script src="_framework/blazor.webassembly.js" autostart="false"></script>
 <script>
 Blazor.start({
-    configureRuntime: function (builder) {
-        builder.withConfig({
+    configureRuntime: function (dotnet) {
+        dotnet.withConfig({
             browserProfilerOptions: {}
         });
     }
diff --git a/src/mono/wasm/host/Options.cs b/src/mono/wasm/host/Options.cs
index 78c22be9382d..9743297cfd0a 100644
--- a/src/mono/wasm/host/Options.cs
+++ b/src/mono/wasm/host/Options.cs
@@ -746,7 +746,7 @@ public override string Description
 
         public override bool GetArguments(string value, out IEnumerable<string> replacement)
         {
-            if (string.IsNullOrEmpty(value) || !value.StartsWith("@"))
+            if (string.IsNullOrEmpty(value) || !value.StartsWith('@'))
             {
                 replacement = null;
                 return false;
diff --git a/src/mono/wasm/host/WasmAppHost.csproj b/src/mono/wasm/host/WasmAppHost.csproj
index 0fea53c84cf5..b6c8fa42fc25 100644
--- a/src/mono/wasm/host/WasmAppHost.csproj
+++ b/src/mono/wasm/host/WasmAppHost.csproj
@@ -3,7 +3,7 @@
   <PropertyGroup>
     <TargetFramework>$(AspNetCoreAppCurrent)</TargetFramework>
     <CopyLocalLockFileAssemblies>true</CopyLocalLockFileAssemblies>
-    <NoWarn>$(NoWarn),CA2007</NoWarn>
+    <NoWarn>$(NoWarn);CA2007</NoWarn>
     <Nullable>enable</Nullable>
     <UseAppHost>false</UseAppHost>
     <RollForward>LatestMajor</RollForward>
diff --git a/src/mono/wasm/sln/WasmBuild.sln b/src/mono/wasm/sln/WasmBuild.sln
index da3a82af643f..b84f2d7ba8c8 100755
--- a/src/mono/wasm/sln/WasmBuild.sln
+++ b/src/mono/wasm/sln/WasmBuild.sln
@@ -25,7 +25,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ApplyUpdateReferencedAssemb
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.NET.Sdk.WebAssembly.Pack.Tasks", "..\..\..\tasks\Microsoft.NET.Sdk.WebAssembly.Pack.Tasks\Microsoft.NET.Sdk.WebAssembly.Pack.Tasks.csproj", "{5EEC2925-2021-4830-B7E9-72BB8B2C283D}"
 EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Wasi.Build.Tests", "..\..\wasi\Wasi.Build.Tests\Wasi.Build.Tests.csproj", "{3A3AEAE5-0110-45D3-89B0-B82AC430535C}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Wasi.Build.Tests", "..\..\wasi\Wasi.Build.Tests\Wasi.Build.Tests.csproj", "{3A3AEAE5-0110-45D3-89B0-B82AC430535C}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "WasmTestRunner", "..\..\..\libraries\Common\tests\WasmTestRunner\WasmTestRunner.csproj", "{2BBE4AA8-5424-44AB-933C-66554B688872}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -81,6 +83,10 @@ Global
 		{3A3AEAE5-0110-45D3-89B0-B82AC430535C}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{3A3AEAE5-0110-45D3-89B0-B82AC430535C}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{3A3AEAE5-0110-45D3-89B0-B82AC430535C}.Release|Any CPU.Build.0 = Release|Any CPU
+		{2BBE4AA8-5424-44AB-933C-66554B688872}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{2BBE4AA8-5424-44AB-933C-66554B688872}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{2BBE4AA8-5424-44AB-933C-66554B688872}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{2BBE4AA8-5424-44AB-933C-66554B688872}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/App.razor b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/App.razor
new file mode 100644
index 000000000000..6fd3ed1b5a3b
--- /dev/null
+++ b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/App.razor
@@ -0,0 +1,12 @@
+﻿<Router AppAssembly="@typeof(App).Assembly">
+    <Found Context="routeData">
+        <RouteView RouteData="@routeData" DefaultLayout="@typeof(MainLayout)" />
+        <FocusOnNavigate RouteData="@routeData" Selector="h1" />
+    </Found>
+    <NotFound>
+        <PageTitle>Not found</PageTitle>
+        <LayoutView Layout="@typeof(MainLayout)">
+            <p role="alert">Sorry, there's nothing at this address.</p>
+        </LayoutView>
+    </NotFound>
+</Router>
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/BlazorHosted.Client.csproj b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/BlazorHosted.Client.csproj
new file mode 100644
index 000000000000..314f3453a08e
--- /dev/null
+++ b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/BlazorHosted.Client.csproj
@@ -0,0 +1,19 @@
+﻿<Project Sdk="Microsoft.NET.Sdk.BlazorWebAssembly">
+
+  <PropertyGroup>
+    <TargetFramework>net9.0</TargetFramework>
+    <Nullable>enable</Nullable>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <WasmEnableThreads>true</WasmEnableThreads>
+    <!-- nullablility warning, async warning -->
+    <NoWarn>CS8604;CS4014</NoWarn>
+  </PropertyGroup>
+
+  <!-- versions are pinned but when run from WBT level, it's taking in-tree runtime -->
+  <ItemGroup>
+    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly" Version="9.0.0-preview.1.24081.5" />
+    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.DevServer" Version="9.0.0-preview.1.24081.5" PrivateAssets="all" />
+    <PackageReference Include="Microsoft.AspNetCore.SignalR.Client" Version="8.0.1" />
+    <PackageReference Include="System.Net.Http.Json" Version="8.0.0" />
+  </ItemGroup>
+</Project>
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/Helper.cs b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/Helper.cs
new file mode 100644
index 000000000000..38ead1438099
--- /dev/null
+++ b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/Helper.cs
@@ -0,0 +1,42 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Specialized;
+using Microsoft.AspNetCore.Http.Connections;
+
+namespace BlazorHosted.Client;
+
+public static class Helper
+{
+    public static string GetValue(NameValueCollection parameters, string key)
+    {
+        var values = parameters.GetValues(key);
+        if (values == null || values.Length == 0)
+        {
+            throw new Exception($"Parameter '{key}' is required in the query string");
+        }
+        if (values.Length > 1)
+        {
+            throw new Exception($"Parameter '{key}' should be unique in the query string");
+        }
+        return values[0];
+    }
+
+    public static HttpTransportType StringToTransportType(string transport)
+    {
+        switch (transport.ToLowerInvariant())
+        {
+            case "longpolling":
+                return HttpTransportType.LongPolling;
+            case "websockets":
+                return HttpTransportType.WebSockets;
+            default:
+                throw new Exception($"{transport} is invalid transport type");
+        }
+    }
+
+    public static void TestOutputWriteLine(string message)
+    {
+        Console.WriteLine("TestOutput -> " + message);
+    }
+}
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/Layout/MainLayout.razor b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/Layout/MainLayout.razor
new file mode 100644
index 000000000000..f4d8cbb8e66e
--- /dev/null
+++ b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/Layout/MainLayout.razor
@@ -0,0 +1,13 @@
+﻿@inherits LayoutComponentBase
+
+<div class="page">
+    <main>
+        <div class="top-row px-4">
+            <a href="https://learn.microsoft.com/aspnet/core/" target="_blank">About</a>
+        </div>
+
+        <article class="content px-4">
+            @Body
+        </article>
+    </main>
+</div>
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/Pages/Chat.razor b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/Pages/Chat.razor
new file mode 100644
index 000000000000..f90aa96c87b9
--- /dev/null
+++ b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/Pages/Chat.razor
@@ -0,0 +1,116 @@
+﻿@page "/chat"
+@using Microsoft.AspNetCore.SignalR
+@using Microsoft.AspNetCore.SignalR.Client
+@using Microsoft.AspNetCore.Http.Connections;
+@using System.Web;
+@inject NavigationManager NavigationManager
+@inject IJSRuntime JSRuntime
+
+<h1>Chat Room</h1>
+<button id="connectButton" @onclick="Connect">Connect SignalR</button>
+<button id="subscribeButton" @onclick="Subscribe">Subscribe to server messages</button>
+<button id="sendMessageButton" @onclick="SignalRPassMessages">Send message from query</button>
+<button id="exitProgramButton" @onclick="SendExitSignal">Send exit signal to server</button>
+<button id="disconnectButton" @onclick="DisposeHubConnection">Disconnect SignalR</button>
+<div>
+    @foreach (var chatMessage in chatMessages)
+    {
+        <p>@chatMessage</p>
+    }
+</div>
+
+@code {
+    private string _hubUrl = string.Empty;
+    private HubConnection? _hubConnection;
+    private string message = string.Empty;
+    private string transport = string.Empty;
+    private List<string> chatMessages = new List<string>();
+    private string wrongQueryError = "Query string with parameters 'message' and 'transport' are required";
+
+    // remove when https://github.com/dotnet/runtime/issues/96546 is fixed
+    // log that rendering is about to start in case we hit the issue before OnAfterRender is called
+    protected override bool ShouldRender()
+    {
+        bool shouldRender = base.ShouldRender();
+        Helper.TestOutputWriteLine($"ShouldRender = {shouldRender}");
+        return shouldRender;
+    }
+
+    protected override void OnAfterRender(bool firstRender)
+    {
+        if (firstRender)
+        {
+            Helper.TestOutputWriteLine($"OnAfterRender on CurrentManagedThreadId={Environment.CurrentManagedThreadId}");
+            GetQueryParameters();
+        }
+        base.OnAfterRender(firstRender);
+    }
+
+    private void GetQueryParameters()
+    {
+        var uri = new Uri(NavigationManager.Uri);
+        if (string.IsNullOrEmpty(uri.Query))
+        {
+            throw new Exception(wrongQueryError);
+        }
+        var parameters = HttpUtility.ParseQueryString(uri.Query);
+        if (parameters == null)
+        {
+            throw new Exception(wrongQueryError);
+        }
+        transport = Helper.GetValue(parameters, "transport");
+        message = $"{transport} {Helper.GetValue(parameters, "message")}" ;
+        Helper.TestOutputWriteLine($"Finished GetQueryParameters on CurrentManagedThreadId={Environment.CurrentManagedThreadId}.");
+    }
+
+    private async Task Connect()
+    {
+        _hubUrl = NavigationManager.BaseUri + "chathub";
+        HttpTransportType httpTransportType = Helper.StringToTransportType(transport);
+        _hubConnection = new HubConnectionBuilder()
+            .WithUrl(_hubUrl, options =>
+                {
+                    options.Transports = httpTransportType;
+                })
+            .Build();
+
+        await _hubConnection.StartAsync();
+        Helper.TestOutputWriteLine($"SignalR connected by CurrentManagedThreadId={Environment.CurrentManagedThreadId}");
+    }
+
+    private void Subscribe()
+    {
+        _hubConnection.On<string>("ReceiveMessage", (message) =>
+        {
+            Helper.TestOutputWriteLine($"Message = [{message}]. ReceiveMessage from server on CurrentManagedThreadId={Environment.CurrentManagedThreadId}");
+            chatMessages.Add(message);
+        });
+        Helper.TestOutputWriteLine($"Subscribed to ReceiveMessage by CurrentManagedThreadId={Environment.CurrentManagedThreadId}");
+    }
+
+    private async Task SignalRPassMessages() =>
+        await Task.Run(async () =>
+            {
+                await _hubConnection.SendAsync( "SendMessage", message, Environment.CurrentManagedThreadId);
+                Helper.TestOutputWriteLine($"SignalRPassMessages was sent by CurrentManagedThreadId={Environment.CurrentManagedThreadId}");
+            });
+
+    private async Task SendExitSignal()
+    {
+        await DisposeHubConnection();
+        // exit the client
+        Helper.TestOutputWriteLine($"SendExitSignal by CurrentManagedThreadId={Environment.CurrentManagedThreadId}");
+        await JSRuntime.InvokeVoidAsync("eval", "setTimeout(() => { getDotnetRuntime(0).exit(0); }, 50);");
+    }
+
+    private async Task DisposeHubConnection()
+    {
+        if (_hubConnection != null)
+        {
+            _hubConnection.Remove("ReceiveMessage");
+            await _hubConnection.DisposeAsync();
+            _hubConnection = null;
+        }
+        Helper.TestOutputWriteLine($"SignalR disconnected by CurrentManagedThreadId={Environment.CurrentManagedThreadId}");
+    }
+}
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/Program.cs b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/Program.cs
new file mode 100644
index 000000000000..67a2fb06d6a1
--- /dev/null
+++ b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/Program.cs
@@ -0,0 +1,13 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using BlazorHosted.Client;
+using Microsoft.AspNetCore.Components.Web;
+using Microsoft.AspNetCore.Components.WebAssembly.Hosting;
+
+var builder = WebAssemblyHostBuilder.CreateDefault(args);
+builder.RootComponents.Add<App>("#app");
+builder.RootComponents.Add<HeadOutlet>("head::after");
+builder.Services.AddScoped(sp => new HttpClient { BaseAddress = new Uri(builder.HostEnvironment.BaseAddress) });
+
+await builder.Build().RunAsync().ConfigureAwait(false);
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/_Imports.razor b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/_Imports.razor
new file mode 100644
index 000000000000..d39afd384f89
--- /dev/null
+++ b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/_Imports.razor
@@ -0,0 +1,6 @@
+@using Microsoft.AspNetCore.Components.Forms
+@using Microsoft.AspNetCore.Components.Routing
+@using Microsoft.AspNetCore.Components.Web
+@using Microsoft.JSInterop
+@using BlazorHosted.Client
+@using BlazorHosted.Client.Layout
\ No newline at end of file
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/wwwroot/favicon.ico b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/wwwroot/favicon.ico
new file mode 100644
index 000000000000..63e859b476ef
Binary files /dev/null and b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/wwwroot/favicon.ico differ
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/wwwroot/index.html b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/wwwroot/index.html
new file mode 100644
index 000000000000..56dd2027fdf8
--- /dev/null
+++ b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Client/wwwroot/index.html
@@ -0,0 +1,29 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
+    <title>BlazorHosted</title>
+    <base href="/" />
+</head>
+
+<body>
+    <div id="app">Loading...</div>
+
+    <div id="blazor-error-ui">
+        An unhandled error has occurred.
+        <a href="" class="reload">Reload</a>
+        <a class="dismiss">🗙</a>
+    </div>
+    <script src="_framework/blazor.webassembly.js" autostart="false"></script>
+    <script>
+        Blazor.start({
+            configureRuntime: function (dotnet) {
+                dotnet.withExitCodeLogging();
+            }
+        });
+    </script>
+</body>
+
+</html>
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Server/BlazorHosted.Server.csproj b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Server/BlazorHosted.Server.csproj
new file mode 100644
index 000000000000..cc3ac1aae891
--- /dev/null
+++ b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Server/BlazorHosted.Server.csproj
@@ -0,0 +1,19 @@
+<Project Sdk="Microsoft.NET.Sdk.Web">
+
+  <PropertyGroup>
+    <TargetFramework>net9.0</TargetFramework>
+    <Nullable>enable</Nullable>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <!-- configure await warnings -->
+    <NoWarn>CA2007</NoWarn>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.Server" Version="9.0.0-preview.1.24081.5" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\BlazorHosted.Client\BlazorHosted.Client.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Server/ChatHub.cs b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Server/ChatHub.cs
new file mode 100644
index 000000000000..8b2e77807c6f
--- /dev/null
+++ b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Server/ChatHub.cs
@@ -0,0 +1,15 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Microsoft.AspNetCore.SignalR;
+
+namespace BlazorHosted.Server.Hubs;
+public class ChatHub : Hub
+{
+    public async Task SendMessage(string message, int sendingThreadId)
+    {
+        Console.WriteLine($"Server: receives Message=[{message}] sent by treadID = {sendingThreadId} and sends it back.");
+        string changedMessage = $"{message}-pong";
+        await Clients.All.SendAsync("ReceiveMessage", changedMessage).ConfigureAwait(false);
+    }
+}
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Server/Program.cs b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Server/Program.cs
new file mode 100644
index 000000000000..fea18a9250cc
--- /dev/null
+++ b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Server/Program.cs
@@ -0,0 +1,52 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Microsoft.Extensions.Configuration;
+using System;
+using Microsoft.Extensions.Logging;
+using BlazorHosted.Server.Hubs;
+
+var builder = WebApplication.CreateBuilder(args);
+
+builder.Services.AddControllersWithViews();
+builder.Services.AddRazorPages();
+builder.Services.AddSignalR(options =>
+{
+    options.KeepAliveInterval = TimeSpan.Zero; // minimize keep-alive messages
+});
+
+var app = builder.Build();
+
+// Configure the HTTP request pipeline.
+if (app.Environment.IsDevelopment())
+{
+    app.UseWebAssemblyDebugging();
+}
+else
+{
+    app.UseExceptionHandler("/Error");
+    // The default HSTS value is 30 days. You may want to change this for production scenarios, see https://aka.ms/aspnetcore-hsts.
+    app.UseHsts();
+}
+
+// Add headers to enable SharedArrayBuffer
+app.Use(async (context, next) =>
+{
+    var response = context.Response;
+    response.Headers.Append("Cross-Origin-Opener-Policy", "same-origin");
+    response.Headers.Append("Cross-Origin-Embedder-Policy", "require-corp");
+
+    await next();
+});
+app.UseBlazorFrameworkFiles();
+app.UseStaticFiles();
+
+app.UseRouting();
+
+app.MapRazorPages();
+app.MapControllers();
+app.MapFallbackToFile("index.html");
+
+app.MapHub<ChatHub>("/chathub");
+
+app.Run();
diff --git a/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Server/appsettings.json b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Server/appsettings.json
new file mode 100644
index 000000000000..75b7c2aa1ece
--- /dev/null
+++ b/src/mono/wasm/testassets/BlazorHostedApp/BlazorHosted.Server/appsettings.json
@@ -0,0 +1,8 @@
+{
+  "Logging": {
+    "LogLevel": {
+      "Default": "Information",
+        "Microsoft.AspNetCore": "Warning"
+    }
+  }
+}
\ No newline at end of file
diff --git a/src/mono/wasm/testassets/Wasm.Buid.Tests.Programs/InvariantGlobalization.cs b/src/mono/wasm/testassets/Wasm.Buid.Tests.Programs/InvariantGlobalization.cs
index 9237110cbc49..c7b1219b6aab 100644
--- a/src/mono/wasm/testassets/Wasm.Buid.Tests.Programs/InvariantGlobalization.cs
+++ b/src/mono/wasm/testassets/Wasm.Buid.Tests.Programs/InvariantGlobalization.cs
@@ -1,11 +1,20 @@
 using System;
 using System.Globalization;
+using System.Linq;
 
 // https://github.com/dotnet/runtime/blob/main/docs/design/features/globalization-invariant-mode.md#cultures-and-culture-data
 try
 {
     CultureInfo culture = new ("es-ES", false);
-    Console.WriteLine($"es-ES: Is Invariant LCID: {culture.LCID == CultureInfo.InvariantCulture.LCID}, NativeName: {culture.NativeName}");
+    Console.WriteLine($"es-ES: Is Invariant LCID: {culture.LCID == CultureInfo.InvariantCulture.LCID}");
+    
+    var nativeNameArg = args.FirstOrDefault(arg => arg.StartsWith("nativename="));
+    if (nativeNameArg == null)
+        throw new ArgumentException($"When not in invariant mode, InvariantGlobalization.cs expects nativename argument with expected es-ES NativeName.");
+    string expectedNativeName = nativeNameArg.Substring(11).Trim('"'); // skip nativename=
+    string nativeName = culture.NativeName;
+    if (nativeName != expectedNativeName)
+        throw new ArgumentException($"Expected es-ES NativeName: {expectedNativeName}, but got: {nativeName}");
 }
 catch (CultureNotFoundException cnfe)
 {
diff --git a/src/mono/wasm/testassets/WasmBasicTestApp/App/wwwroot/README.md b/src/mono/wasm/testassets/WasmBasicTestApp/App/wwwroot/README.md
new file mode 100644
index 000000000000..996992663189
--- /dev/null
+++ b/src/mono/wasm/testassets/WasmBasicTestApp/App/wwwroot/README.md
@@ -0,0 +1,15 @@
+## WasmBasicTestApp
+
+This is a test application used by various Wasm.Build.Tests. The idea is to share a common behavior (so that we don't have to maintain many test apps) and tweak it for the test case.
+It typically suits scenario where you need more than a plain template app. If the test case is too different, feel free to create another app.
+
+### Usage
+
+The app reads `test` query parameter and uses it to switch between test cases. Entrypoint is `main.js`.
+There is common unit, then switch based on test case for modifying app startup, then app starts and executes next switch based on test case for actually running code.
+
+Some test cases passes additional parameters to differentiate behavior, see `src/mono/wasm/Wasm.Build.Tests/TestAppScenarios`.
+
+### Running out side of WBT
+
+One of the benefits is that you can copy the app out of intree and run the app without running Wasm.Build.Tests with just `dotnet run`.
\ No newline at end of file
diff --git a/src/mono/wasm/testassets/WasmBasicTestApp/App/wwwroot/main.js b/src/mono/wasm/testassets/WasmBasicTestApp/App/wwwroot/main.js
index 076f37a62a6d..3a01053875c0 100644
--- a/src/mono/wasm/testassets/WasmBasicTestApp/App/wwwroot/main.js
+++ b/src/mono/wasm/testassets/WasmBasicTestApp/App/wwwroot/main.js
@@ -33,6 +33,8 @@ switch (testCase) {
                 Math.floor(Math.random() * 5) + 5,
                 Math.floor(Math.random() * 5) + 10
             ];
+            console.log(`Failing test at assembly indexes [${failAtAssemblyNumbers.join(", ")}]`);
+            let alreadyFailed = [];
             dotnet.withDiagnosticTracing(true).withResourceLoader((type, name, defaultUri, integrity, behavior) => {
                 if (type === "dotnetjs") {
                     // loadBootResource could return string with unqualified name of resource. 
@@ -44,10 +46,11 @@ switch (testCase) {
                     return defaultUri;
                 }
 
-                assemblyCounter++;
-                if (!failAtAssemblyNumbers.includes(assemblyCounter))
+                const currentCounter = assemblyCounter++;
+                if (!failAtAssemblyNumbers.includes(currentCounter) || alreadyFailed.includes(defaultUri))
                     return defaultUri;
 
+                alreadyFailed.push(defaultUri);
                 testOutput("Throw error instead of downloading resource");
                 const error = new Error("Simulating a failed fetch");
                 error.silent = true;
@@ -94,6 +97,10 @@ try {
         case "DownloadResourceProgressTest":
             exit(0);
             break;
+        case "DebugLevelTest":
+            testOutput("WasmDebugLevel: " + config.debugLevel);
+            exit(0);
+            break;
         default:
             console.error(`Unknown test case: ${testCase}`);
             exit(3);
diff --git a/src/mono/wasm/testassets/native-libs/wasm-abi.c b/src/mono/wasm/testassets/native-libs/wasm-abi.c
index 0ace2037daf2..083bce6abe0c 100644
--- a/src/mono/wasm/testassets/native-libs/wasm-abi.c
+++ b/src/mono/wasm/testassets/native-libs/wasm-abi.c
@@ -1,5 +1,7 @@
 #include <stdio.h>
 
+#define TRACING 0
+
 typedef struct {
     float value;
 } TRes;
@@ -7,10 +9,12 @@ typedef struct {
 TRes accept_double_struct_and_return_float_struct (
     struct { struct { double value; } value; } arg
 ) {
+#if TRACING
     printf (
         "&arg=%x (ulonglong)arg=%llx arg.value.value=%lf\n",
         (unsigned int)&arg, *(unsigned long long*)&arg, (double)arg.value.value
     );
+#endif
     TRes result = { arg.value.value };
     return result;
 }
@@ -20,10 +24,48 @@ typedef struct {
 } TResI64;
 
 TResI64 accept_and_return_i64_struct (TResI64 arg) {
+#if TRACING
     printf (
         "&arg=%x (ulonglong)arg=%llx\n",
         (unsigned int)&arg, *(unsigned long long*)&arg
     );
+#endif
     TResI64 result = { ~arg.value };
     return result;
 }
+
+typedef struct {
+    int A, B;
+} PairStruct;
+
+PairStruct accept_and_return_pair (PairStruct arg) {
+#if TRACING
+    printf (
+        "&arg=%d arg.A=%d arg.B=%d\n",
+        (unsigned int)&arg, arg.A, arg.B
+    );
+#endif
+    arg.A = 32;
+    arg.B *= 2;
+    return arg;
+}
+
+typedef struct {
+    int elements[2];
+} MyInlineArray;
+
+MyInlineArray accept_and_return_inlinearray (MyInlineArray arg) {
+#if TRACING
+    printf (
+        "&arg=%d arg.elements[0]=%d arg.elements[1]=%d\n",
+        (unsigned int)&arg, arg.elements[0], arg.elements[1]
+    );
+#endif
+    arg.elements[0] = 32;
+    arg.elements[1] *= 2;
+    return arg;
+}
+
+MyInlineArray accept_and_return_fixedarray (MyInlineArray arg) {
+    return accept_and_return_inlinearray (arg);
+}
diff --git a/src/native/corehost/apphost/static/singlefilehost.def b/src/native/corehost/apphost/static/singlefilehost.def
index 6052b832b0b0..e1208056b832 100644
--- a/src/native/corehost/apphost/static/singlefilehost.def
+++ b/src/native/corehost/apphost/static/singlefilehost.def
@@ -13,5 +13,8 @@ CLRJitAttachState                                   @3 data
 ; needed by SOS, WinDBG, and Watson. This must remain ordinal 4.
 DotNetRuntimeInfo                                   @4 data
 
+; DAC table export
+g_dacTable = s_dacGlobals
+
 ; Used by profilers
 MetaDataGetDispenser
diff --git a/src/native/corehost/fxr/fx_resolver.cpp b/src/native/corehost/fxr/fx_resolver.cpp
index ec6e4f5ed16d..1340af625423 100644
--- a/src/native/corehost/fxr/fx_resolver.cpp
+++ b/src/native/corehost/fxr/fx_resolver.cpp
@@ -307,25 +307,6 @@ namespace
     }
 }
 
-StatusCode fx_resolver_t::reconcile_fx_references_helper(
-    const fx_reference_t& lower_fx_ref,
-    const fx_reference_t& higher_fx_ref,
-    /*out*/ fx_reference_t& effective_fx_ref)
-{
-    if (!lower_fx_ref.is_compatible_with_higher_version(higher_fx_ref.get_fx_version_number()))
-    {
-        // Error condition - not compatible with the other reference
-        display_incompatible_framework_error(higher_fx_ref.get_fx_version(), lower_fx_ref);
-        return StatusCode::FrameworkCompatFailure;
-    }
-
-    effective_fx_ref = fx_reference_t(higher_fx_ref); // copy
-    effective_fx_ref.merge_roll_forward_settings_from(lower_fx_ref);
-
-    display_compatible_framework_trace(higher_fx_ref.get_fx_version(), lower_fx_ref);
-    return StatusCode::Success;
-}
-
 // Reconciles two framework references into a new effective framework reference
 // This process is sometimes also called "soft roll forward" (soft as in no IO)
 // - fx_ref_a - one of the framework references to reconcile
@@ -341,16 +322,24 @@ StatusCode fx_resolver_t::reconcile_fx_references(
     const fx_reference_t& fx_ref_b,
     /*out*/ fx_reference_t& effective_fx_ref)
 {
-    // The function is split into the helper because the various tracing messages
+    // Determine which framework reference is higher to do the compat check. The various tracing messages
     // make more sense if they're always written with higher/lower versions ordered in particular way.
-    if (fx_ref_a.get_fx_version_number() >= fx_ref_b.get_fx_version_number())
-    {
-        return reconcile_fx_references_helper(fx_ref_b, fx_ref_a, effective_fx_ref);
-    }
-    else
+    bool is_a_higher_than_b = fx_ref_a.get_fx_version_number() >= fx_ref_b.get_fx_version_number();
+    const fx_reference_t& lower_fx_ref = is_a_higher_than_b ? fx_ref_b : fx_ref_a;
+    const fx_reference_t& higher_fx_ref = is_a_higher_than_b ? fx_ref_a : fx_ref_b;
+
+    if (!lower_fx_ref.is_compatible_with_higher_version(higher_fx_ref.get_fx_version_number()))
     {
-        return reconcile_fx_references_helper(fx_ref_a, fx_ref_b, effective_fx_ref);
+        // Error condition - not compatible with the other reference
+        display_incompatible_framework_error(higher_fx_ref.get_fx_version(), lower_fx_ref);
+        return StatusCode::FrameworkCompatFailure;
     }
+
+    effective_fx_ref = fx_reference_t(higher_fx_ref); // copy
+    effective_fx_ref.merge_roll_forward_settings_from(lower_fx_ref);
+
+    display_compatible_framework_trace(higher_fx_ref.get_fx_version(), lower_fx_ref);
+    return StatusCode::Success;
 }
 
 void fx_resolver_t::update_newest_references(
@@ -415,7 +404,7 @@ StatusCode fx_resolver_t::read_framework(
     // This reconciles duplicate references to minimize the number of resolve retries.
     update_newest_references(config);
 
-    StatusCode rc = StatusCode::Success;
+    StatusCode rc;
 
     // Loop through each reference and resolve the framework
     for (const fx_reference_t& original_fx_ref : config.get_frameworks())
@@ -432,23 +421,20 @@ StatusCode fx_resolver_t::read_framework(
         const fx_reference_t& current_effective_fx_ref = m_effective_fx_references[fx_name];
         fx_reference_t new_effective_fx_ref;
 
+        // Reconcile the framework reference with the most up to date so far we have for the framework.
+        // This does not read any physical framework folders yet.
+        rc = reconcile_fx_references(fx_ref, current_effective_fx_ref, new_effective_fx_ref);
+        if (rc != StatusCode::Success)
+            return rc;
+
         auto existing_framework = std::find_if(
             fx_definitions.begin(),
             fx_definitions.end(),
             [&](const std::unique_ptr<fx_definition_t> & fx) { return fx_name == fx->get_name(); });
-
         if (existing_framework == fx_definitions.end())
         {
-            // Reconcile the framework reference with the most up to date so far we have for the framework.
-            // This does not read any physical framework folders yet.
             // Since we didn't find the framework in the resolved list yet, it's OK to update the effective reference
             // as we haven't processed it yet.
-            rc = reconcile_fx_references(fx_ref, current_effective_fx_ref, new_effective_fx_ref);
-            if (rc)
-            {
-                break; // Error case
-            }
-
             m_effective_fx_references[fx_name] = new_effective_fx_ref;
 
             // Resolve the effective framework reference against the existing physical framework folders
@@ -463,7 +449,7 @@ StatusCode fx_resolver_t::read_framework(
                     app_display_name != nullptr ? app_display_name : host_info.host_path.c_str(),
                     get_current_arch_name());
                 display_missing_framework_error(fx_name, new_effective_fx_ref.get_fx_version(), pal::string_t(), host_info.dotnet_root, disable_multilevel_lookup);
-                return FrameworkMissingFailure;
+                return StatusCode::FrameworkMissingFailure;
             }
 
             // Do NOT update the effective reference to have the same version as the resolved framework.
@@ -492,23 +478,13 @@ StatusCode fx_resolver_t::read_framework(
             }
 
             rc = read_framework(host_info, disable_multilevel_lookup, override_settings, new_config, &new_effective_fx_ref, fx_definitions, app_display_name);
-            if (rc)
-            {
-                break; // Error case
-            }
+            if (rc != StatusCode::Success)
+                return rc;
         }
         else
         {
-            // Reconcile the framework reference with the most up to date so far we have for the framework.
-            // Note that since we found the framework in the already resolved frameworks
-            // any update to the effective framework reference needs to restart the resolution process
-            // so that we re-resolve the framework against disk.
-            rc = reconcile_fx_references(fx_ref, current_effective_fx_ref, new_effective_fx_ref);
-            if (rc)
-            {
-                break; // Error case
-            }
-
+            // Since we found the framework in the already resolved frameworks, any update to the effective framework
+            // reference needs to restart the resolution process so that we re-resolve the framework against disk.
             if (new_effective_fx_ref != current_effective_fx_ref)
             {
                 display_retry_framework_trace(current_effective_fx_ref, fx_ref);
@@ -522,11 +498,7 @@ StatusCode fx_resolver_t::read_framework(
         }
     }
 
-    return rc;
-}
-
-fx_resolver_t::fx_resolver_t()
-{
+    return StatusCode::Success;
 }
 
 StatusCode fx_resolver_t::resolve_frameworks_for_app(
diff --git a/src/native/corehost/fxr/fx_resolver.h b/src/native/corehost/fxr/fx_resolver.h
index 35c6fd250af5..018294148f30 100644
--- a/src/native/corehost/fxr/fx_resolver.h
+++ b/src/native/corehost/fxr/fx_resolver.h
@@ -27,7 +27,7 @@ class fx_resolver_t
         const std::unordered_map<pal::string_t, const fx_ver_t> &existing_framework_versions_by_name);
 
 private:
-    fx_resolver_t();
+    fx_resolver_t() = default;
 
     void update_newest_references(
         const runtime_config_t& config);
@@ -40,10 +40,6 @@ class fx_resolver_t
         fx_definition_vector_t& fx_definitions,
         const pal::char_t* app_display_name);
 
-    static StatusCode reconcile_fx_references_helper(
-        const fx_reference_t& lower_fx_ref,
-        const fx_reference_t& higher_fx_ref,
-        /*out*/ fx_reference_t& effective_fx_ref);
     static StatusCode reconcile_fx_references(
         const fx_reference_t& fx_ref_a,
         const fx_reference_t& fx_ref_b,
diff --git a/src/native/corehost/json_parser.h b/src/native/corehost/json_parser.h
index 2c2845aac46b..d7393b0ae678 100644
--- a/src/native/corehost/json_parser.h
+++ b/src/native/corehost/json_parser.h
@@ -8,12 +8,22 @@
 // https://github.com/Tencent/rapidjson/issues/1596#issuecomment-548774663
 #define RAPIDJSON_48BITPOINTER_OPTIMIZATION 0
 
+// see https://github.com/Tencent/rapidjson/issues/1448
+// including windows.h on purpose to provoke a compile time problem as GetObject is a 
+// macro that gets defined when windows.h is included
+#ifdef _WIN32
+#define NOMINMAX
+#include <windows.h>
+#endif
+
 #include "pal.h"
 #include <external/rapidjson/document.h>
 #include <external/rapidjson/fwd.h>
 #include <vector>
 #include "bundle/info.h"
 
+#undef GetObject
+
 class json_parser_t {
     public:
 #ifdef _WIN32
diff --git a/src/native/eventpipe/ep-event-source.c b/src/native/eventpipe/ep-event-source.c
index 26900d11f0bd..982f75a3c531 100644
--- a/src/native/eventpipe/ep-event-source.c
+++ b/src/native/eventpipe/ep-event-source.c
@@ -48,6 +48,8 @@ const ep_char8_t* _ep_arch_info = "s390x";
 const ep_char8_t* _ep_arch_info = "loongarch64";
 #elif defined(TARGET_POWERPC64)
 const ep_char8_t* _ep_arch_info = "ppc64le";
+#elif defined(TARGET_RISCV64)
+const ep_char8_t* _ep_arch_info = "riscv64";
 #else
 const ep_char8_t* _ep_arch_info = "Unknown";
 #endif
diff --git a/src/native/external/cgmanifest.json b/src/native/external/cgmanifest.json
index 4f6264b3c7ab..e45d9d900296 100644
--- a/src/native/external/cgmanifest.json
+++ b/src/native/external/cgmanifest.json
@@ -46,7 +46,7 @@
         "Type": "git",
         "Git": {
           "RepositoryUrl": "https://github.com/madler/zlib",
-          "CommitHash": "04f42ceca40f73e2978b50e93806c2a18c1281fc"
+          "CommitHash": "51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf"
         }
       },
       "DevelopmentDependency": false
diff --git a/src/native/external/libunwind-version.txt b/src/native/external/libunwind-version.txt
index 776043575539..fd56a772b51c 100644
--- a/src/native/external/libunwind-version.txt
+++ b/src/native/external/libunwind-version.txt
@@ -8,3 +8,4 @@ Apply https://github.com/libunwind/libunwind/pull/704
 Revert https://github.com/libunwind/libunwind/pull/503 # issue: https://github.com/libunwind/libunwind/issues/702
 Apply https://github.com/libunwind/libunwind/pull/714
 Revert https://github.com/libunwind/libunwind/commit/ec03043244082b8f552881ba9fb790aa49c85468 and follow up changes in the same file # issue: https://github.com/libunwind/libunwind/issues/715
+Apply https://github.com/libunwind/libunwind/pull/734
diff --git a/src/native/external/libunwind/include/dwarf_i.h b/src/native/external/libunwind/include/dwarf_i.h
index 0f47082adbb7..624021faed4a 100644
--- a/src/native/external/libunwind/include/dwarf_i.h
+++ b/src/native/external/libunwind/include/dwarf_i.h
@@ -280,7 +280,7 @@ dwarf_readw (unw_addr_space_t as, unw_accessors_t *a, unw_word_t *addr,
       ret = dwarf_readu64 (as, a, addr, &u64, arg);
       if (ret < 0)
         return ret;
-      *val = u64;
+      *val = (unw_word_t) u64;
       return ret;
 
     default:
@@ -398,7 +398,7 @@ dwarf_read_encoded_pointer_inlined (unw_addr_space_t as, unw_accessors_t *a,
     case DW_EH_PE_udata8:
       if ((ret = dwarf_readu64 (as, a, addr, &uval64, arg)) < 0)
         return ret;
-      val = uval64;
+      val = (unw_word_t) uval64;
       break;
 
     case DW_EH_PE_sleb128:
@@ -421,7 +421,7 @@ dwarf_read_encoded_pointer_inlined (unw_addr_space_t as, unw_accessors_t *a,
     case DW_EH_PE_sdata8:
       if ((ret = dwarf_reads64 (as, a, addr, &sval64, arg)) < 0)
         return ret;
-      val = sval64;
+      val = (unw_word_t) sval64;
       break;
 
     default:
diff --git a/src/native/external/libunwind/include/libunwind_i.h b/src/native/external/libunwind/include/libunwind_i.h
index 1dbcb6a86d0f..4140d88a10c6 100644
--- a/src/native/external/libunwind/include/libunwind_i.h
+++ b/src/native/external/libunwind/include/libunwind_i.h
@@ -333,7 +333,7 @@ static inline void _unw_debug(int level, char const * const fname, char const *
 
       if (level > 16) level = 16;
       int bcount = snprintf (buf, buf_size, "%*c>%s: ", level, ' ', fname);
-      int res = write(STDERR_FILENO, buf, bcount);
+      ssize_t res = write(STDERR_FILENO, buf, bcount);
 
       va_list ap;
       va_start(ap, fmt);
@@ -350,7 +350,7 @@ static inline void _unw_debug(int level, char const * const fname, char const *
 # define Dprintf( /* format */ ...)
 #endif /* defined(UNW_DEBUG) */
 
-static ALWAYS_INLINE int
+static ALWAYS_INLINE ssize_t
 print_error (const char *string)
 {
   return write (2, string, strlen (string));
@@ -419,6 +419,9 @@ static inline void invalidate_edi (struct elf_dyn_info *edi)
 # define PT_ARM_EXIDX           0x70000001      /* ARM unwind segment */
 #endif /* !PT_ARM_EXIDX */
 
+#define DWARF_GET_MEM_LOC(l)    DWARF_GET_LOC(l)
+#define DWARF_GET_REG_LOC(l)    ((unw_regnum_t) DWARF_GET_LOC(l))
+
 #include "tdep/libunwind_i.h"
 
 #ifndef TDEP_DWARF_SP
diff --git a/src/native/external/libunwind/include/tdep-aarch64/libunwind_i.h b/src/native/external/libunwind/include/tdep-aarch64/libunwind_i.h
index ec1a2e91afd6..fd5554946712 100644
--- a/src/native/external/libunwind/include/tdep-aarch64/libunwind_i.h
+++ b/src/native/external/libunwind/include/tdep-aarch64/libunwind_i.h
@@ -197,10 +197,10 @@ dwarf_getfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t *val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        val, 0, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, (unw_word_t *) valp,
                                        0, c->as_arg)) < 0)
     return ret;
@@ -220,10 +220,10 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        &val, 1, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, (unw_word_t *) valp,
                                        1, c->as_arg)) < 0)
     return ret;
@@ -245,10 +245,10 @@ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                      0, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                      0, c->as_arg);
 }
 
@@ -265,10 +265,10 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), &val,
                                      1, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), &val,
                                      1, c->as_arg);
 }
 
diff --git a/src/native/external/libunwind/include/tdep-arm/ex_tables.h b/src/native/external/libunwind/include/tdep-arm/ex_tables.h
index 9df5e0a9fa4b..90a023d49554 100644
--- a/src/native/external/libunwind/include/tdep-arm/ex_tables.h
+++ b/src/native/external/libunwind/include/tdep-arm/ex_tables.h
@@ -49,7 +49,7 @@ struct arm_exbuf_data
 #define arm_exidx_apply_cmd     UNW_OBJ(arm_exidx_apply_cmd)
 
 int arm_exidx_extract (struct dwarf_cursor *c, uint8_t *buf);
-int arm_exidx_decode (const uint8_t *buf, uint8_t len, struct dwarf_cursor *c);
+int arm_exidx_decode (const uint8_t *buf, int len, struct dwarf_cursor *c);
 int arm_exidx_apply_cmd (struct arm_exbuf_data *edata, struct dwarf_cursor *c);
 
 #endif // ARM_EX_TABLES_H
diff --git a/src/native/external/libunwind/include/tdep-arm/libunwind_i.h b/src/native/external/libunwind/include/tdep-arm/libunwind_i.h
index 35b13c79fbac..0f55dd04ba09 100644
--- a/src/native/external/libunwind/include/tdep-arm/libunwind_i.h
+++ b/src/native/external/libunwind/include/tdep-arm/libunwind_i.h
@@ -178,7 +178,7 @@ dwarf_getfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t *val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        val, 0, c->as_arg);
 
   addr = DWARF_GET_LOC (loc);
@@ -201,7 +201,7 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        &val, 1, c->as_arg);
 
   addr = DWARF_GET_LOC (loc);
@@ -226,10 +226,10 @@ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                      0, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                      0, c->as_arg);
 }
 
@@ -246,10 +246,10 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), &val,
                                      1, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), &val,
                                      1, c->as_arg);
 }
 
diff --git a/src/native/external/libunwind/include/tdep-hppa/libunwind_i.h b/src/native/external/libunwind/include/tdep-hppa/libunwind_i.h
index 1b6757fb1361..ce60dcf14b67 100644
--- a/src/native/external/libunwind/include/tdep-hppa/libunwind_i.h
+++ b/src/native/external/libunwind/include/tdep-hppa/libunwind_i.h
@@ -146,10 +146,10 @@ dwarf_getfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t *val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        val, 0, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, (unw_word_t *) valp,
                                        0, c->as_arg)) < 0)
     return ret;
@@ -169,10 +169,10 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        &val, 1, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, (unw_word_t *) valp,
                                        1, c->as_arg)) < 0)
     return ret;
@@ -194,10 +194,10 @@ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                      0, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                      0, c->as_arg);
 }
 
@@ -214,10 +214,10 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), &val,
                                      1, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), &val,
                                      1, c->as_arg);
 }
 
diff --git a/src/native/external/libunwind/include/tdep-loongarch64/libunwind_i.h b/src/native/external/libunwind/include/tdep-loongarch64/libunwind_i.h
index d21c9229766e..11fe95d6f110 100644
--- a/src/native/external/libunwind/include/tdep-loongarch64/libunwind_i.h
+++ b/src/native/external/libunwind/include/tdep-loongarch64/libunwind_i.h
@@ -167,10 +167,10 @@ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                      0, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                      0, c->as_arg);
 }
 
diff --git a/src/native/external/libunwind/include/tdep-mips/libunwind_i.h b/src/native/external/libunwind/include/tdep-mips/libunwind_i.h
index b0e623499d05..a3bd4479ae39 100644
--- a/src/native/external/libunwind/include/tdep-mips/libunwind_i.h
+++ b/src/native/external/libunwind/include/tdep-mips/libunwind_i.h
@@ -195,10 +195,10 @@ dwarf_getfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t *val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        val, 0, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, (unw_word_t *) valp,
                                        0, c->as_arg)) < 0)
     return ret;
@@ -218,10 +218,10 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        &val, 1, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, (unw_word_t *) valp,
                                        1, c->as_arg)) < 0)
     return ret;
@@ -243,20 +243,20 @@ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                      0, c->as_arg);
   else if (c->as->abi == UNW_MIPS_ABI_O32)
-    return read_s32 (c, DWARF_GET_LOC (loc), val);
+    return read_s32 (c, DWARF_GET_MEM_LOC (loc), val);
   else if (c->as->abi == UNW_MIPS_ABI_N32) {
     if (tdep_big_endian(c->as))
-      return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc) + 4, val,
+      return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc) + 4, val,
                                        0, c->as_arg);
     else
-      return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+      return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                        0, c->as_arg);
   }
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                      0, c->as_arg);
 }
 
@@ -273,12 +273,12 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), &val,
                                      1, c->as_arg);
   else if (c->as->abi == UNW_MIPS_ABI_O32)
-    return write_s32 (c, DWARF_GET_LOC (loc), &val);
+    return write_s32 (c, DWARF_GET_MEM_LOC (loc), &val);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), &val,
                                      1, c->as_arg);
 }
 
diff --git a/src/native/external/libunwind/include/tdep-ppc32/libunwind_i.h b/src/native/external/libunwind/include/tdep-ppc32/libunwind_i.h
index 46d4f5a8ed9d..469e02f24b8c 100644
--- a/src/native/external/libunwind/include/tdep-ppc32/libunwind_i.h
+++ b/src/native/external/libunwind/include/tdep-ppc32/libunwind_i.h
@@ -130,10 +130,10 @@ dwarf_getvr (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t * val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                       val, 0, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
 
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, valp,
                                        0, c->as_arg)) < 0)
@@ -156,10 +156,10 @@ dwarf_putvr (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                       &val, 1, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, valp,
                                        1, c->as_arg)) < 0)
     return ret;
@@ -180,10 +180,10 @@ dwarf_getfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t * val)
   assert (!DWARF_IS_V_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        val, 0, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   return (*c->as->acc.access_mem) (c->as, addr + 0, valp, 0, c->as_arg);
 
 }
@@ -201,10 +201,10 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
   assert (!DWARF_IS_V_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        &val, 1, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
 
   return (*c->as->acc.access_mem) (c->as, addr + 0, valp, 1, c->as_arg);
 }
@@ -223,10 +223,10 @@ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t * val)
   assert (!DWARF_IS_V_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                      0, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                      0, c->as_arg);
 }
 
@@ -244,10 +244,10 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
   assert (!DWARF_IS_V_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), &val,
                                      1, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), &val,
                                      1, c->as_arg);
 }
 
diff --git a/src/native/external/libunwind/include/tdep-ppc64/libunwind_i.h b/src/native/external/libunwind/include/tdep-ppc64/libunwind_i.h
index a93d56931843..0767706956b8 100644
--- a/src/native/external/libunwind/include/tdep-ppc64/libunwind_i.h
+++ b/src/native/external/libunwind/include/tdep-ppc64/libunwind_i.h
@@ -183,10 +183,10 @@ dwarf_getvr (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t * val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                       val, 0, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
 
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, valp,
                                        0, c->as_arg)) < 0)
@@ -209,10 +209,10 @@ dwarf_putvr (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                       &val, 1, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, valp,
                                        1, c->as_arg)) < 0)
     return ret;
@@ -233,12 +233,11 @@ dwarf_getfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t * val)
   assert (!DWARF_IS_V_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        val, 0, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   return (*c->as->acc.access_mem) (c->as, addr + 0, valp, 0, c->as_arg);
-
 }
 
 static inline int
@@ -254,11 +253,10 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
   assert (!DWARF_IS_V_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        &val, 1, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
-
+  addr = DWARF_GET_MEM_LOC (loc);
   return (*c->as->acc.access_mem) (c->as, addr + 0, valp, 1, c->as_arg);
 }
 
@@ -276,10 +274,10 @@ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t * val)
   assert (!DWARF_IS_V_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                      0, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                      0, c->as_arg);
 }
 
@@ -297,10 +295,10 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
   assert (!DWARF_IS_V_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), &val,
                                      1, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), &val,
                                      1, c->as_arg);
 }
 
diff --git a/src/native/external/libunwind/include/tdep-riscv/libunwind_i.h b/src/native/external/libunwind/include/tdep-riscv/libunwind_i.h
index 951de12a0bc9..b0aebc35801b 100644
--- a/src/native/external/libunwind/include/tdep-riscv/libunwind_i.h
+++ b/src/native/external/libunwind/include/tdep-riscv/libunwind_i.h
@@ -169,11 +169,11 @@ dwarf_getfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t *val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        val, 0, c->as_arg);
 
   /* FIXME: unw_word_t may not be equal to FLEN */
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
 #if __riscv_xlen == __riscv_flen
   return (*c->as->acc.access_mem) (c->as, addr, (unw_word_t *) valp,
                                        0, c->as_arg);
@@ -192,11 +192,11 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        &val, 1, c->as_arg);
 
   /* FIXME: unw_word_t may not be equal to FLEN */
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
 #if __riscv_xlen == __riscv_flen
   return (*c->as->acc.access_mem) (c->as, addr, (unw_word_t *) valp,
                                        1, c->as_arg);
@@ -218,10 +218,10 @@ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                      0, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                      0, c->as_arg);
 }
 
@@ -238,10 +238,10 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), &val,
                                      1, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), &val,
                                      1, c->as_arg);
 }
 
diff --git a/src/native/external/libunwind/include/tdep-s390x/libunwind_i.h b/src/native/external/libunwind/include/tdep-s390x/libunwind_i.h
index a6af60c9c61d..70605a3f8b50 100644
--- a/src/native/external/libunwind/include/tdep-s390x/libunwind_i.h
+++ b/src/native/external/libunwind/include/tdep-s390x/libunwind_i.h
@@ -123,17 +123,17 @@ dwarf_getfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t *val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_FP_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                        0, c->as_arg);
   /* FPRs may be saved in GPRs */
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), (unw_word_t*)val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), (unw_word_t*)val,
                                      0, c->as_arg);
   if (DWARF_IS_MEM_LOC (loc))
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), (unw_word_t*)val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), (unw_word_t*)val,
                                      0, c->as_arg);
   assert(DWARF_IS_VAL_LOC (loc));
-  *val = *(unw_fpreg_t*) DWARF_GET_LOC (loc);
+  *val = *(unw_fpreg_t*) DWARF_GET_MEM_LOC (loc);
   return 0;
 }
 
@@ -147,15 +147,15 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_FP_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc), &val,
                                        1, c->as_arg);
   /* FPRs may be saved in GPRs */
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), (unw_word_t*) &val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), (unw_word_t*) &val,
                                      1, c->as_arg);
 
   assert(DWARF_IS_MEM_LOC (loc));
-  return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), (unw_word_t*) &val,
+  return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), (unw_word_t*) &val,
                                    1, c->as_arg);
 }
 
@@ -169,13 +169,13 @@ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
 
   /* GPRs may be saved in FPRs */
   if (DWARF_IS_FP_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc), (unw_fpreg_t*)val,
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc), (unw_fpreg_t*)val,
                                        0, c->as_arg);
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                      0, c->as_arg);
   if (DWARF_IS_MEM_LOC (loc))
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                      0, c->as_arg);
   assert(DWARF_IS_VAL_LOC (loc));
   *val = DWARF_GET_LOC (loc);
@@ -193,14 +193,14 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
 
   /* GPRs may be saved in FPRs */
   if (DWARF_IS_FP_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc), (unw_fpreg_t*) &val,
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc), (unw_fpreg_t*) &val,
                                        1, c->as_arg);
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), &val,
                                      1, c->as_arg);
 
   assert(DWARF_IS_MEM_LOC (loc));
-  return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
+  return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), &val,
                                    1, c->as_arg);
 }
 
diff --git a/src/native/external/libunwind/include/tdep-sh/libunwind_i.h b/src/native/external/libunwind/include/tdep-sh/libunwind_i.h
index 4f4a5cdd0675..e5b048235124 100644
--- a/src/native/external/libunwind/include/tdep-sh/libunwind_i.h
+++ b/src/native/external/libunwind/include/tdep-sh/libunwind_i.h
@@ -147,10 +147,10 @@ dwarf_getfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t *val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        val, 0, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, (unw_word_t *) valp,
                                        0, c->as_arg)) < 0)
     return ret;
@@ -170,10 +170,10 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        &val, 1, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, (unw_word_t *) valp,
                                        1, c->as_arg)) < 0)
     return ret;
@@ -195,10 +195,10 @@ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                      0, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                      0, c->as_arg);
 }
 
@@ -215,10 +215,10 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
   assert (!DWARF_IS_FP_LOC (loc));
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), &val,
                                      1, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), &val,
                                      1, c->as_arg);
 }
 
diff --git a/src/native/external/libunwind/include/tdep-x86/libunwind_i.h b/src/native/external/libunwind/include/tdep-x86/libunwind_i.h
index 58e583c3b3f2..1f4f07abf779 100644
--- a/src/native/external/libunwind/include/tdep-x86/libunwind_i.h
+++ b/src/native/external/libunwind/include/tdep-x86/libunwind_i.h
@@ -144,10 +144,10 @@ dwarf_getfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t *val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        val, 0, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, (unw_word_t *) valp,
                                        0, c->as_arg)) < 0)
     return ret;
@@ -167,10 +167,10 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_LOC (loc),
+    return (*c->as->acc.access_fpreg) (c->as, DWARF_GET_REG_LOC (loc),
                                        &val, 1, c->as_arg);
 
-  addr = DWARF_GET_LOC (loc);
+  addr = DWARF_GET_MEM_LOC (loc);
   if ((ret = (*c->as->acc.access_mem) (c->as, addr + 0, (unw_word_t *) valp,
                                        1, c->as_arg)) < 0)
     return ret;
@@ -188,10 +188,10 @@ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                      0, c->as_arg);
   if (DWARF_IS_MEM_LOC (loc))
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                      0, c->as_arg);
   assert(DWARF_IS_VAL_LOC (loc));
   *val = DWARF_GET_LOC (loc);
@@ -207,10 +207,10 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), &val,
                                      1, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), &val,
                                      1, c->as_arg);
 }
 
diff --git a/src/native/external/libunwind/include/tdep-x86_64/libunwind_i.h b/src/native/external/libunwind/include/tdep-x86_64/libunwind_i.h
index 7ec16aafdcdc..683b397f8bbb 100644
--- a/src/native/external/libunwind/include/tdep-x86_64/libunwind_i.h
+++ b/src/native/external/libunwind/include/tdep-x86_64/libunwind_i.h
@@ -199,10 +199,10 @@ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), val,
                                      0, c->as_arg);
   if (DWARF_IS_MEM_LOC (loc))
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), val,
                                      0, c->as_arg);
   assert(DWARF_IS_VAL_LOC (loc));
   *val = DWARF_GET_LOC (loc);
@@ -218,10 +218,10 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
     return -UNW_EBADREG;
 
   if (DWARF_IS_REG_LOC (loc))
-    return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_reg) (c->as, DWARF_GET_REG_LOC (loc), &val,
                                      1, c->as_arg);
   else
-    return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
+    return (*c->as->acc.access_mem) (c->as, DWARF_GET_MEM_LOC (loc), &val,
                                      1, c->as_arg);
 }
 
diff --git a/src/native/external/libunwind/src/aarch64/Gget_save_loc.c b/src/native/external/libunwind/src/aarch64/Gget_save_loc.c
index 86bbbd03d11b..9fbef2488127 100644
--- a/src/native/external/libunwind/src/aarch64/Gget_save_loc.c
+++ b/src/native/external/libunwind/src/aarch64/Gget_save_loc.c
@@ -88,13 +88,13 @@ unw_get_save_loc (unw_cursor_t *cursor, int reg, unw_save_loc_t *sloc)
   if (DWARF_IS_REG_LOC (loc))
     {
       sloc->type = UNW_SLT_REG;
-      sloc->u.regnum = DWARF_GET_LOC (loc);
+      sloc->u.regnum = DWARF_GET_REG_LOC (loc);
     }
   else
 #endif
     {
       sloc->type = UNW_SLT_MEMORY;
-      sloc->u.addr = DWARF_GET_LOC (loc);
+      sloc->u.addr = DWARF_GET_MEM_LOC (loc);
     }
   return 0;
 }
diff --git a/src/native/external/libunwind/src/aarch64/Gstash_frame.c b/src/native/external/libunwind/src/aarch64/Gstash_frame.c
index c6f370a44285..7eb317d92690 100644
--- a/src/native/external/libunwind/src/aarch64/Gstash_frame.c
+++ b/src/native/external/libunwind/src/aarch64/Gstash_frame.c
@@ -54,25 +54,25 @@ tdep_stash_frame (struct dwarf_cursor *d, struct dwarf_reg_state *rs)
       && (rs->reg.where[DWARF_CFA_REG_COLUMN] == DWARF_WHERE_REG)
       && (rs->reg.val[DWARF_CFA_REG_COLUMN] == FP
           || rs->reg.val[DWARF_CFA_REG_COLUMN] == SP)
-      && labs(rs->reg.val[DWARF_CFA_OFF_COLUMN]) < (1 << 29)
+      && labs((long)rs->reg.val[DWARF_CFA_OFF_COLUMN]) < (1 << 29)
       && rs->ret_addr_column == LR
       && (rs->reg.where[FP] == DWARF_WHERE_UNDEF
           || rs->reg.where[FP] == DWARF_WHERE_SAME
           || rs->reg.where[FP] == DWARF_WHERE_CFA
           || (rs->reg.where[FP] == DWARF_WHERE_CFAREL
-              && labs(rs->reg.val[FP]) < (1 << 29)
+              && labs((long)rs->reg.val[FP]) < (1 << 29)
               && rs->reg.val[FP]+1 != 0))
       && (rs->reg.where[LR] == DWARF_WHERE_UNDEF
           || rs->reg.where[LR] == DWARF_WHERE_SAME
           || rs->reg.where[LR] == DWARF_WHERE_CFA
           || (rs->reg.where[LR] == DWARF_WHERE_CFAREL
-              && labs(rs->reg.val[LR]) < (1 << 29)
+              && labs((long)rs->reg.val[LR]) < (1 << 29)
               && rs->reg.val[LR]+1 != 0))
       && (rs->reg.where[SP] == DWARF_WHERE_UNDEF
           || rs->reg.where[SP] == DWARF_WHERE_SAME
           || rs->reg.where[SP] == DWARF_WHERE_CFA
           || (rs->reg.where[SP] == DWARF_WHERE_CFAREL
-              && labs(rs->reg.val[SP]) < (1 << 29)
+              && labs((long)rs->reg.val[SP]) < (1 << 29)
               && rs->reg.val[SP]+1 != 0)))
   {
     /* Save information for a standard frame. */
diff --git a/src/native/external/libunwind/src/arm/Gex_tables.c b/src/native/external/libunwind/src/arm/Gex_tables.c
index 56bbd0d07666..1d93e1d8c929 100644
--- a/src/native/external/libunwind/src/arm/Gex_tables.c
+++ b/src/native/external/libunwind/src/arm/Gex_tables.c
@@ -26,7 +26,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 specific unwind information.  Documentation about the exception handling
 ABI for the ARM architecture can be found at:
 http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf
-*/ 
+*/
 
 #include "libunwind_i.h"
 
@@ -151,7 +151,7 @@ arm_exidx_apply_cmd (struct arm_exbuf_data *edata, struct dwarf_cursor *c)
  * arm_exidx_apply_cmd that applies the command onto the dwarf_cursor.
  */
 HIDDEN int
-arm_exidx_decode (const uint8_t *buf, uint8_t len, struct dwarf_cursor *c)
+arm_exidx_decode (const uint8_t *buf, int len, struct dwarf_cursor *c)
 {
 #define READ_OP() *buf++
   assert(buf != NULL);
@@ -284,7 +284,7 @@ arm_exidx_decode (const uint8_t *buf, uint8_t len, struct dwarf_cursor *c)
 
 /**
  * Reads the entry from the given cursor and extracts the unwind instructions
- * into buf.  Returns the number of the extracted unwind insns or 
+ * into buf.  Returns the number of the extracted unwind insns or
  * -UNW_ESTOPUNWIND if the special bit pattern ARM_EXIDX_CANT_UNWIND (0x1) was
  * found.
  */
@@ -297,7 +297,7 @@ arm_exidx_extract (struct dwarf_cursor *c, uint8_t *buf)
   uint32_t data;
 
   /* An ARM unwind entry consists of a prel31 offset to the start of a
-     function followed by 31bits of data: 
+     function followed by 31bits of data:
        * if set to 0x1: the function cannot be unwound (EXIDX_CANTUNWIND)
        * if bit 31 is one: this is a table entry itself (ARM_EXIDX_COMPACT)
        * if bit 31 is zero: this is a prel31 offset of the start of the
@@ -317,9 +317,9 @@ arm_exidx_extract (struct dwarf_cursor *c, uint8_t *buf)
     {
       Debug (2, "%p compact model %d [%8.8x]\n", (void *)addr,
              (data >> 24) & 0x7f, data);
-      buf[nbuf++] = data >> 16;
-      buf[nbuf++] = data >> 8;
-      buf[nbuf++] = data;
+      buf[nbuf++] = (uint8_t) (data >> 16);
+      buf[nbuf++] = (uint8_t) (data >> 8);
+      buf[nbuf++] = (uint8_t) data;
     }
   else
     {
@@ -342,9 +342,11 @@ arm_exidx_extract (struct dwarf_cursor *c, uint8_t *buf)
               extbl_data += 4;
             }
           else
-            buf[nbuf++] = data >> 16;
-          buf[nbuf++] = data >> 8;
-          buf[nbuf++] = data;
+            {
+              buf[nbuf++] = (uint8_t) (data >> 16);
+            }
+          buf[nbuf++] = (uint8_t) (data >> 8);
+          buf[nbuf++] = (uint8_t) data;
         }
       else
         {
@@ -357,9 +359,9 @@ arm_exidx_extract (struct dwarf_cursor *c, uint8_t *buf)
                                        c->as_arg) < 0)
             return -UNW_EINVAL;
           n_table_words = data >> 24;
-          buf[nbuf++] = data >> 16;
-          buf[nbuf++] = data >> 8;
-          buf[nbuf++] = data;
+          buf[nbuf++] = (uint8_t) (data >> 16);
+          buf[nbuf++] = (uint8_t) (data >> 8);
+          buf[nbuf++] = (uint8_t) data;
           extbl_data += 8;
         }
       assert (n_table_words <= 5);
@@ -370,10 +372,10 @@ arm_exidx_extract (struct dwarf_cursor *c, uint8_t *buf)
                                        c->as_arg) < 0)
             return -UNW_EINVAL;
           extbl_data += 4;
-          buf[nbuf++] = data >> 24;
-          buf[nbuf++] = data >> 16;
-          buf[nbuf++] = data >> 8;
-          buf[nbuf++] = data >> 0;
+          buf[nbuf++] = (uint8_t) (data >> 24);
+          buf[nbuf++] = (uint8_t) (data >> 16);
+          buf[nbuf++] = (uint8_t) (data >> 8);
+          buf[nbuf++] = (uint8_t) data;
         }
     }
 
@@ -458,7 +460,7 @@ tdep_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
            && di->format != UNW_INFO_FORMAT_ARM_EXIDX)
     return dwarf_search_unwind_table (as, ip, di, pi, need_unwind_info, arg);
 
-  return -UNW_ENOINFO; 
+  return -UNW_ENOINFO;
 }
 
 #ifndef UNW_REMOTE_ONLY
diff --git a/src/native/external/libunwind/src/arm/Gget_save_loc.c b/src/native/external/libunwind/src/arm/Gget_save_loc.c
index 906c5b180d0d..e9b43fc6dd2d 100644
--- a/src/native/external/libunwind/src/arm/Gget_save_loc.c
+++ b/src/native/external/libunwind/src/arm/Gget_save_loc.c
@@ -88,13 +88,13 @@ unw_get_save_loc (unw_cursor_t *cursor, int reg, unw_save_loc_t *sloc)
   if (DWARF_IS_REG_LOC (loc))
     {
       sloc->type = UNW_SLT_REG;
-      sloc->u.regnum = DWARF_GET_LOC (loc);
+      sloc->u.regnum = DWARF_GET_REG_LOC (loc);
     }
   else
 #endif
     {
       sloc->type = UNW_SLT_MEMORY;
-      sloc->u.addr = DWARF_GET_LOC (loc);
+      sloc->u.addr = DWARF_GET_MEM_LOC (loc);
     }
   return 0;
 }
diff --git a/src/native/external/libunwind/src/dwarf/Gexpr.c b/src/native/external/libunwind/src/dwarf/Gexpr.c
index 4a8da2ce1bb3..ddaeb7b0b780 100644
--- a/src/native/external/libunwind/src/dwarf/Gexpr.c
+++ b/src/native/external/libunwind/src/dwarf/Gexpr.c
@@ -122,7 +122,7 @@ sword (unw_addr_space_t as UNUSED, unw_word_t val)
     }
 }
 
-static inline unw_word_t
+static inline int
 read_operand (unw_addr_space_t as, unw_accessors_t *a,
               unw_word_t *addr, int operand_type, unw_word_t *val, void *arg)
 {
@@ -169,7 +169,7 @@ read_operand (unw_addr_space_t as, unw_accessors_t *a,
       ret = dwarf_readu64 (as, a, addr, &u64, arg);
       if (ret < 0)
         return ret;
-      *val = u64;
+      *val = (unw_word_t) u64;
       break;
 
     case ULEB128:
@@ -366,7 +366,7 @@ if (stackerror)                                 \
           Debug (15, "OP_bregx(r%d,0x%lx)\n",
                  (int) operand1, (unsigned long) operand2);
           if ((ret = unw_get_reg (dwarf_to_cursor (c),
-                                  dwarf_to_unw_regnum (operand1), &tmp1)) < 0)
+                                  dwarf_to_unw_regnum ((int) operand1), &tmp1)) < 0)
             return ret;
           push (tmp1 + operand2);
           break;
@@ -475,7 +475,7 @@ if (stackerror)                                 \
             case 8:
               if ((ret = dwarf_readu64 (as, a, &tmp1, &u64, arg)) < 0)
                 return ret;
-              tmp2 = u64;
+              tmp2 = (unw_word_t) u64;
               if (operand1 != 8)
                 {
                   if (dwarf_is_big_endian (as))
diff --git a/src/native/external/libunwind/src/dwarf/Gfde.c b/src/native/external/libunwind/src/dwarf/Gfde.c
index 3847d0a03c07..3752e0a9d3ee 100644
--- a/src/native/external/libunwind/src/dwarf/Gfde.c
+++ b/src/native/external/libunwind/src/dwarf/Gfde.c
@@ -102,7 +102,7 @@ parse_cie (unw_addr_space_t as, unw_accessors_t *a, unw_word_t addr,
 
       if ((ret = dwarf_readu64 (as, a, &addr, &u64val, arg)) < 0)
         return ret;
-      len = u64val;
+      len = (unw_word_t) u64val;
       cie_end_addr = addr + len;
       if ((ret = dwarf_readu64 (as, a, &addr, &cie_id, arg)) < 0)
         return ret;
@@ -237,7 +237,8 @@ dwarf_extract_proc_info_from_fde (unw_addr_space_t as, unw_accessors_t *a,
 {
   unw_word_t fde_end_addr, cie_addr, cie_offset_addr, aug_end_addr = 0;
   unw_word_t start_ip, ip_range, aug_size, addr = *addrp;
-  int ret, ip_range_encoding;
+  int ret;
+  uint8_t ip_range_encoding;
   struct dwarf_cie_info dci;
   uint64_t u64val;
   uint32_t u32val;
@@ -288,18 +289,18 @@ dwarf_extract_proc_info_from_fde (unw_addr_space_t as, unw_accessors_t *a,
       if ((ret = dwarf_readu64 (as, a, &addr, &u64val, arg)) < 0)
         return ret;
 
-      *addrp = fde_end_addr = addr + u64val;
+      *addrp = fde_end_addr = (unw_word_t) (addr + u64val);
       cie_offset_addr = addr;
 
       if ((ret = dwarf_reads64 (as, a, &addr, &cie_offset, arg)) < 0)
         return ret;
 
-      if (is_cie_id (cie_offset, is_debug_frame))
+      if (is_cie_id ((unw_word_t) cie_offset, is_debug_frame))
         /* ignore CIEs (happens during linear searches) */
         return 0;
 
       if (is_debug_frame)
-        cie_addr = base + cie_offset;
+        cie_addr = (unw_word_t) (base + cie_offset);
       else
         /* DWARF says that the CIE_pointer in the FDE is a
            .debug_frame-relative offset, but the GCC-generated .eh_frame
diff --git a/src/native/external/libunwind/src/dwarf/Gfind_proc_info-lsb.c b/src/native/external/libunwind/src/dwarf/Gfind_proc_info-lsb.c
index c11345e88383..7e681477da48 100644
--- a/src/native/external/libunwind/src/dwarf/Gfind_proc_info-lsb.c
+++ b/src/native/external/libunwind/src/dwarf/Gfind_proc_info-lsb.c
@@ -975,10 +975,10 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
 #endif
     {
 #ifndef UNW_LOCAL_ONLY
-      int32_t last_ip_offset = di->end_ip - ip_base - di->load_offset;
+      int32_t last_ip_offset = (int32_t) (di->end_ip - ip_base - di->load_offset);
       segbase = di->u.rti.segbase;
       if ((ret = remote_lookup (as, (uintptr_t) table, table_len,
-                                ip - ip_base, &ent, &last_ip_offset, arg)) < 0)
+                                (int32_t) (ip - ip_base), &ent, &last_ip_offset, arg)) < 0)
         return ret;
       if (ret)
 	{
diff --git a/src/native/external/libunwind/src/dwarf/Gparser.c b/src/native/external/libunwind/src/dwarf/Gparser.c
index 7a5d7e1f0ff3..0616b2359a9b 100644
--- a/src/native/external/libunwind/src/dwarf/Gparser.c
+++ b/src/native/external/libunwind/src/dwarf/Gparser.c
@@ -666,7 +666,7 @@ hash (unw_word_t ip, unsigned short log_size)
   /* based on (sqrt(5)/2-1)*2^64 */
 # define magic  ((unw_word_t) 0x9e3779b97f4a7c16ULL)
 
-  return ip * magic >> ((sizeof(unw_word_t) * 8) - (log_size + 1));
+  return (unw_hash_index_t) (ip * magic >> ((sizeof(unw_word_t) * 8) - (log_size + 1)));
 }
 
 static inline long
@@ -730,7 +730,7 @@ rs_new (struct dwarf_rs_cache *cache, struct dwarf_cursor * c)
 
   cache->links[head].ip = c->ip;
   cache->links[head].valid = 1;
-  cache->links[head].signal_frame = tdep_cache_frame(c);
+  cache->links[head].signal_frame = tdep_cache_frame(c) ? 1 : 0;
   return cache->buckets + head;
 }
 
@@ -841,7 +841,8 @@ aarch64_get_ra_sign_state(struct dwarf_reg_state *rs)
 static int
 apply_reg_state (struct dwarf_cursor *c, struct dwarf_reg_state *rs)
 {
-  unw_word_t regnum, addr, cfa, ip;
+  unw_regnum_t regnum;
+  unw_word_t addr, cfa, ip;
   unw_word_t prev_ip, prev_cfa;
   unw_addr_space_t as;
   dwarf_loc_t cfa_loc;
@@ -881,7 +882,7 @@ apply_reg_state (struct dwarf_cursor *c, struct dwarf_reg_state *rs)
           cfa = c->cfa;
       else
         {
-          regnum = dwarf_to_unw_regnum (rs->reg.val[DWARF_CFA_REG_COLUMN]);
+          regnum = dwarf_to_unw_regnum ((unw_regnum_t) rs->reg.val[DWARF_CFA_REG_COLUMN]);
           if ((ret = unw_get_reg (dwarf_to_cursor(c), regnum, &cfa)) < 0)
             return ret;
         }
@@ -1015,7 +1016,7 @@ find_reg_state (struct dwarf_cursor *c, dwarf_state_record_t *sr)
       (rs = rs_lookup(cache, c)))
     {
       /* update hint; no locking needed: single-word writes are atomic */
-      unsigned short index = rs - cache->buckets;
+      unsigned short index = (unsigned short) (rs - cache->buckets);
       c->use_prev_instr = ! cache->links[index].signal_frame;
       memcpy (&sr->rs_current, rs, sizeof (*rs));
     }
@@ -1047,7 +1048,7 @@ find_reg_state (struct dwarf_cursor *c, dwarf_state_record_t *sr)
     {
       if (rs)
 	{
-	  index = rs - cache->buckets;
+	  index = (unsigned short) (rs - cache->buckets);
 	  c->hint = cache->links[index].hint;
 	  cache->links[c->prev_rs].hint = index + 1;
 	  c->prev_rs = index;
diff --git a/src/native/external/libunwind/src/hppa/Gget_save_loc.c b/src/native/external/libunwind/src/hppa/Gget_save_loc.c
index 02dfa3084f91..fa4088da85b6 100644
--- a/src/native/external/libunwind/src/hppa/Gget_save_loc.c
+++ b/src/native/external/libunwind/src/hppa/Gget_save_loc.c
@@ -47,13 +47,13 @@ unw_get_save_loc (unw_cursor_t *cursor, int reg, unw_save_loc_t *sloc)
   if (DWARF_IS_REG_LOC (loc))
     {
       sloc->type = UNW_SLT_REG;
-      sloc->u.regnum = DWARF_GET_LOC (loc);
+      sloc->u.regnum = DWARF_GET_REG_LOC (loc);
     }
   else
 #endif
     {
       sloc->type = UNW_SLT_MEMORY;
-      sloc->u.addr = DWARF_GET_LOC (loc);
+      sloc->u.addr = DWARF_GET_MEM_LOC (loc);
     }
   return 0;
 }
diff --git a/src/native/external/libunwind/src/loongarch64/Gget_save_loc.c b/src/native/external/libunwind/src/loongarch64/Gget_save_loc.c
index edc765744ad9..13ab43d42efb 100644
--- a/src/native/external/libunwind/src/loongarch64/Gget_save_loc.c
+++ b/src/native/external/libunwind/src/loongarch64/Gget_save_loc.c
@@ -89,13 +89,13 @@ unw_get_save_loc (unw_cursor_t *cursor, int reg, unw_save_loc_t *sloc)
   if (DWARF_IS_REG_LOC (loc))
     {
       sloc->type = UNW_SLT_REG;
-      sloc->u.regnum = DWARF_GET_LOC (loc);
+      sloc->u.regnum = DWARF_GET_REG_LOC (loc);
     }
   else
 #endif
     {
       sloc->type = UNW_SLT_MEMORY;
-      sloc->u.addr = DWARF_GET_LOC (loc);
+      sloc->u.addr = DWARF_GET_MEM_LOC (loc);
     }
   return 0;
 }
diff --git a/src/native/external/libunwind/src/mi/Gdyn-remote.c b/src/native/external/libunwind/src/mi/Gdyn-remote.c
index 6d4ec1ecf869..ec2667e216d6 100644
--- a/src/native/external/libunwind/src/mi/Gdyn-remote.c
+++ b/src/native/external/libunwind/src/mi/Gdyn-remote.c
@@ -101,7 +101,7 @@ intern_array (unw_addr_space_t as, unw_accessors_t *a,
               unw_word_t *addr, unw_word_t table_len, unw_word_t **table_data,
               void *arg)
 {
-  unw_word_t i, *data = calloc (table_len, WSIZE);
+  unw_word_t i, *data = calloc ((size_t) table_len, WSIZE);
   int ret = 0;
 
   if (!data)
diff --git a/src/native/external/libunwind/src/mips/Gget_save_loc.c b/src/native/external/libunwind/src/mips/Gget_save_loc.c
index c21f9b06d060..ca8adbd2a4c9 100644
--- a/src/native/external/libunwind/src/mips/Gget_save_loc.c
+++ b/src/native/external/libunwind/src/mips/Gget_save_loc.c
@@ -88,13 +88,13 @@ unw_get_save_loc (unw_cursor_t *cursor, int reg, unw_save_loc_t *sloc)
   if (DWARF_IS_REG_LOC (loc))
     {
       sloc->type = UNW_SLT_REG;
-      sloc->u.regnum = DWARF_GET_LOC (loc);
+      sloc->u.regnum = DWARF_GET_REG_LOC (loc);
     }
   else
 #endif
     {
       sloc->type = UNW_SLT_MEMORY;
-      sloc->u.addr = DWARF_GET_LOC (loc);
+      sloc->u.addr = DWARF_GET_MEM_LOC (loc);
     }
   return 0;
 }
diff --git a/src/native/external/libunwind/src/riscv/Gget_save_loc.c b/src/native/external/libunwind/src/riscv/Gget_save_loc.c
index 342f8654fbc6..11aed5c0044f 100644
--- a/src/native/external/libunwind/src/riscv/Gget_save_loc.c
+++ b/src/native/external/libunwind/src/riscv/Gget_save_loc.c
@@ -85,13 +85,13 @@ unw_get_save_loc (unw_cursor_t *cursor, int reg, unw_save_loc_t *sloc)
   if (DWARF_IS_REG_LOC (loc))
     {
       sloc->type = UNW_SLT_REG;
-      sloc->u.regnum = DWARF_GET_LOC (loc);
+      sloc->u.regnum = DWARF_GET_REG_LOC (loc);
     }
   else
 #endif
     {
       sloc->type = UNW_SLT_MEMORY;
-      sloc->u.addr = DWARF_GET_LOC (loc);
+      sloc->u.addr = DWARF_GET_MEM_LOC (loc);
     }
   return 0;
 }
diff --git a/src/native/external/libunwind/src/s390x/Gget_save_loc.c b/src/native/external/libunwind/src/s390x/Gget_save_loc.c
index dc462c966e56..40d2f0e54e4f 100644
--- a/src/native/external/libunwind/src/s390x/Gget_save_loc.c
+++ b/src/native/external/libunwind/src/s390x/Gget_save_loc.c
@@ -74,13 +74,13 @@ unw_get_save_loc (unw_cursor_t *cursor, int reg, unw_save_loc_t *sloc)
   if (DWARF_IS_REG_LOC (loc))
     {
       sloc->type = UNW_SLT_REG;
-      sloc->u.regnum = DWARF_GET_LOC (loc);
+      sloc->u.regnum = DWARF_GET_REG_LOC (loc);
     }
   else
 #endif
     {
       sloc->type = UNW_SLT_MEMORY;
-      sloc->u.addr = DWARF_GET_LOC (loc);
+      sloc->u.addr = DWARF_GET_MEM_LOC (loc);
     }
   return 0;
 }
diff --git a/src/native/external/libunwind/src/sh/Gget_save_loc.c b/src/native/external/libunwind/src/sh/Gget_save_loc.c
index 24d9f63bc329..a9a884570514 100644
--- a/src/native/external/libunwind/src/sh/Gget_save_loc.c
+++ b/src/native/external/libunwind/src/sh/Gget_save_loc.c
@@ -71,13 +71,13 @@ unw_get_save_loc (unw_cursor_t *cursor, int reg, unw_save_loc_t *sloc)
   if (DWARF_IS_REG_LOC (loc))
     {
       sloc->type = UNW_SLT_REG;
-      sloc->u.regnum = DWARF_GET_LOC (loc);
+      sloc->u.regnum = DWARF_GET_REG_LOC (loc);
     }
   else
 #endif
     {
       sloc->type = UNW_SLT_MEMORY;
-      sloc->u.addr = DWARF_GET_LOC (loc);
+      sloc->u.addr = DWARF_GET_MEM_LOC (loc);
     }
   return 0;
 }
diff --git a/src/native/external/libunwind/src/x86/Gget_save_loc.c b/src/native/external/libunwind/src/x86/Gget_save_loc.c
index e459382f6d3c..849f1cd8bf89 100644
--- a/src/native/external/libunwind/src/x86/Gget_save_loc.c
+++ b/src/native/external/libunwind/src/x86/Gget_save_loc.c
@@ -121,13 +121,13 @@ unw_get_save_loc (unw_cursor_t *cursor, int reg, unw_save_loc_t *sloc)
   if (DWARF_IS_REG_LOC (loc))
     {
       sloc->type = UNW_SLT_REG;
-      sloc->u.regnum = DWARF_GET_LOC (loc);
+      sloc->u.regnum = DWARF_GET_REG_LOC (loc);
     }
   else
 #endif
     {
       sloc->type = UNW_SLT_MEMORY;
-      sloc->u.addr = DWARF_GET_LOC (loc);
+      sloc->u.addr = DWARF_GET_MEM_LOC (loc);
     }
   return 0;
 }
diff --git a/src/native/external/libunwind/src/x86_64/Gget_save_loc.c b/src/native/external/libunwind/src/x86_64/Gget_save_loc.c
index 40568700e0e4..9d51185220f5 100644
--- a/src/native/external/libunwind/src/x86_64/Gget_save_loc.c
+++ b/src/native/external/libunwind/src/x86_64/Gget_save_loc.c
@@ -62,13 +62,13 @@ unw_get_save_loc (unw_cursor_t *cursor, int reg, unw_save_loc_t *sloc)
   if (DWARF_IS_REG_LOC (loc))
     {
       sloc->type = UNW_SLT_REG;
-      sloc->u.regnum = DWARF_GET_LOC (loc);
+      sloc->u.regnum = DWARF_GET_REG_LOC (loc);
     }
   else
 #endif
     {
       sloc->type = UNW_SLT_MEMORY;
-      sloc->u.addr = DWARF_GET_LOC (loc);
+      sloc->u.addr = DWARF_GET_MEM_LOC (loc);
     }
   return 0;
 }
diff --git a/src/native/external/libunwind_extras/CMakeLists.txt b/src/native/external/libunwind_extras/CMakeLists.txt
index 07a4bb64db94..2bfd2194c969 100644
--- a/src/native/external/libunwind_extras/CMakeLists.txt
+++ b/src/native/external/libunwind_extras/CMakeLists.txt
@@ -140,7 +140,6 @@ if(CLR_CMAKE_HOST_WIN32)
 
     # Warnings in release builds
     add_compile_options(-wd4068) # ignore unknown pragma warnings (gcc pragmas)
-    add_compile_options(-wd4244) # possible loss of data
     add_compile_options(-wd4334) # 32-bit shift implicitly converted to 64 bits
 
     # Disable warning due to incorrect format specifier in debugging printf via the Debug macro
diff --git a/src/native/external/rapidjson-version.txt b/src/native/external/rapidjson-version.txt
index 0ccc08a3c223..b6f5f9532a7d 100644
--- a/src/native/external/rapidjson-version.txt
+++ b/src/native/external/rapidjson-version.txt
@@ -1,6 +1,6 @@
-d87b698d0fcc10a5f632ecbc80a9cb2a8fa094a5
+3f73edae00aba5b0112a80b4d41e6f1ff7d92a3d
 
-https://github.com/Tencent/rapidjson/commit/d87b698d0fcc10a5f632ecbc80a9cb2a8fa094a5
+https://github.com/Tencent/rapidjson/commit/3f73edae00aba5b0112a80b4d41e6f1ff7d92a3d
 
 Note: This library is not using a proper release lifecycle. v1.1.0 was the last version released in 2016.
-      Therefore, we are pointing to a random commit from 2019 rather than a version tag.
+      Therefore, we are pointing to a random commit from 2024 rather than a version tag.
diff --git a/src/native/external/rapidjson/README.TXT b/src/native/external/rapidjson/README.TXT
index bc0a70382f4a..9eff509a934d 100644
--- a/src/native/external/rapidjson/README.TXT
+++ b/src/native/external/rapidjson/README.TXT
@@ -1,2 +1,2 @@
-This directory contains the contents of `include/rapidjson` from 
-<https://github.com/tencent/rapidjson>, commit hash d87b698d0fcc10.
+This directory contains selective files from
+https://github.com/Tencent/rapidjson/tree/3f73edae00aba5b0112a80b4d41e6f1ff7d92a3d/include/rapidjson
diff --git a/src/native/external/rapidjson/allocators.h b/src/native/external/rapidjson/allocators.h
index cc67c8971323..275417bd8b37 100644
--- a/src/native/external/rapidjson/allocators.h
+++ b/src/native/external/rapidjson/allocators.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
@@ -16,6 +16,14 @@
 #define RAPIDJSON_ALLOCATORS_H_
 
 #include "rapidjson.h"
+#include "internal/meta.h"
+
+#include <memory>
+#include <limits>
+
+#if RAPIDJSON_HAS_CXX11
+#include <type_traits>
+#endif
 
 RAPIDJSON_NAMESPACE_BEGIN
 
@@ -77,19 +85,26 @@ class CrtAllocator {
     static const bool kNeedFree = true;
     void* Malloc(size_t size) { 
         if (size) //  behavior of malloc(0) is implementation defined.
-            return std::malloc(size);
+            return RAPIDJSON_MALLOC(size);
         else
             return NULL; // standardize to returning NULL.
     }
     void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) {
         (void)originalSize;
         if (newSize == 0) {
-            std::free(originalPtr);
+            RAPIDJSON_FREE(originalPtr);
             return NULL;
         }
-        return std::realloc(originalPtr, newSize);
+        return RAPIDJSON_REALLOC(originalPtr, newSize);
+    }
+    static void Free(void *ptr) RAPIDJSON_NOEXCEPT { RAPIDJSON_FREE(ptr); }
+
+    bool operator==(const CrtAllocator&) const RAPIDJSON_NOEXCEPT {
+        return true;
+    }
+    bool operator!=(const CrtAllocator&) const RAPIDJSON_NOEXCEPT {
+        return false;
     }
-    static void Free(void *ptr) { std::free(ptr); }
 };
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -113,16 +128,64 @@ class CrtAllocator {
 */
 template <typename BaseAllocator = CrtAllocator>
 class MemoryPoolAllocator {
+    //! Chunk header for perpending to each chunk.
+    /*! Chunks are stored as a singly linked list.
+    */
+    struct ChunkHeader {
+        size_t capacity;    //!< Capacity of the chunk in bytes (excluding the header itself).
+        size_t size;        //!< Current size of allocated memory in bytes.
+        ChunkHeader *next;  //!< Next chunk in the linked list.
+    };
+
+    struct SharedData {
+        ChunkHeader *chunkHead;  //!< Head of the chunk linked-list. Only the head chunk serves allocation.
+        BaseAllocator* ownBaseAllocator; //!< base allocator created by this object.
+        size_t refcount;
+        bool ownBuffer;
+    };
+
+    static const size_t SIZEOF_SHARED_DATA = RAPIDJSON_ALIGN(sizeof(SharedData));
+    static const size_t SIZEOF_CHUNK_HEADER = RAPIDJSON_ALIGN(sizeof(ChunkHeader));
+
+    static inline ChunkHeader *GetChunkHead(SharedData *shared)
+    {
+        return reinterpret_cast<ChunkHeader*>(reinterpret_cast<uint8_t*>(shared) + SIZEOF_SHARED_DATA);
+    }
+    static inline uint8_t *GetChunkBuffer(SharedData *shared)
+    {
+        return reinterpret_cast<uint8_t*>(shared->chunkHead) + SIZEOF_CHUNK_HEADER;
+    }
+
+    static const size_t kDefaultChunkCapacity = RAPIDJSON_ALLOCATOR_DEFAULT_CHUNK_CAPACITY; //!< Default chunk capacity.
+
 public:
     static const bool kNeedFree = false;    //!< Tell users that no need to call Free() with this allocator. (concept Allocator)
+    static const bool kRefCounted = true;   //!< Tell users that this allocator is reference counted on copy
 
     //! Constructor with chunkSize.
     /*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
         \param baseAllocator The allocator for allocating memory chunks.
     */
+    explicit
     MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : 
-        chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0)
+        chunk_capacity_(chunkSize),
+        baseAllocator_(baseAllocator ? baseAllocator : RAPIDJSON_NEW(BaseAllocator)()),
+        shared_(static_cast<SharedData*>(baseAllocator_ ? baseAllocator_->Malloc(SIZEOF_SHARED_DATA + SIZEOF_CHUNK_HEADER) : 0))
     {
+        RAPIDJSON_ASSERT(baseAllocator_ != 0);
+        RAPIDJSON_ASSERT(shared_ != 0);
+        if (baseAllocator) {
+            shared_->ownBaseAllocator = 0;
+        }
+        else {
+            shared_->ownBaseAllocator = baseAllocator_;
+        }
+        shared_->chunkHead = GetChunkHead(shared_);
+        shared_->chunkHead->capacity = 0;
+        shared_->chunkHead->size = 0;
+        shared_->chunkHead->next = 0;
+        shared_->ownBuffer = true;
+        shared_->refcount = 1;
     }
 
     //! Constructor with user-supplied buffer.
@@ -136,41 +199,101 @@ class MemoryPoolAllocator {
         \param baseAllocator The allocator for allocating memory chunks.
     */
     MemoryPoolAllocator(void *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) :
-        chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0)
+        chunk_capacity_(chunkSize),
+        baseAllocator_(baseAllocator),
+        shared_(static_cast<SharedData*>(AlignBuffer(buffer, size)))
+    {
+        RAPIDJSON_ASSERT(size >= SIZEOF_SHARED_DATA + SIZEOF_CHUNK_HEADER);
+        shared_->chunkHead = GetChunkHead(shared_);
+        shared_->chunkHead->capacity = size - SIZEOF_SHARED_DATA - SIZEOF_CHUNK_HEADER;
+        shared_->chunkHead->size = 0;
+        shared_->chunkHead->next = 0;
+        shared_->ownBaseAllocator = 0;
+        shared_->ownBuffer = false;
+        shared_->refcount = 1;
+    }
+
+    MemoryPoolAllocator(const MemoryPoolAllocator& rhs) RAPIDJSON_NOEXCEPT :
+        chunk_capacity_(rhs.chunk_capacity_),
+        baseAllocator_(rhs.baseAllocator_),
+        shared_(rhs.shared_)
+    {
+        RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
+        ++shared_->refcount;
+    }
+    MemoryPoolAllocator& operator=(const MemoryPoolAllocator& rhs) RAPIDJSON_NOEXCEPT
     {
-        RAPIDJSON_ASSERT(buffer != 0);
-        RAPIDJSON_ASSERT(size > sizeof(ChunkHeader));
-        chunkHead_ = reinterpret_cast<ChunkHeader*>(buffer);
-        chunkHead_->capacity = size - sizeof(ChunkHeader);
-        chunkHead_->size = 0;
-        chunkHead_->next = 0;
+        RAPIDJSON_NOEXCEPT_ASSERT(rhs.shared_->refcount > 0);
+        ++rhs.shared_->refcount;
+        this->~MemoryPoolAllocator();
+        baseAllocator_ = rhs.baseAllocator_;
+        chunk_capacity_ = rhs.chunk_capacity_;
+        shared_ = rhs.shared_;
+        return *this;
     }
 
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    MemoryPoolAllocator(MemoryPoolAllocator&& rhs) RAPIDJSON_NOEXCEPT :
+        chunk_capacity_(rhs.chunk_capacity_),
+        baseAllocator_(rhs.baseAllocator_),
+        shared_(rhs.shared_)
+    {
+        RAPIDJSON_NOEXCEPT_ASSERT(rhs.shared_->refcount > 0);
+        rhs.shared_ = 0;
+    }
+    MemoryPoolAllocator& operator=(MemoryPoolAllocator&& rhs) RAPIDJSON_NOEXCEPT
+    {
+        RAPIDJSON_NOEXCEPT_ASSERT(rhs.shared_->refcount > 0);
+        this->~MemoryPoolAllocator();
+        baseAllocator_ = rhs.baseAllocator_;
+        chunk_capacity_ = rhs.chunk_capacity_;
+        shared_ = rhs.shared_;
+        rhs.shared_ = 0;
+        return *this;
+    }
+#endif
+
     //! Destructor.
     /*! This deallocates all memory chunks, excluding the user-supplied buffer.
     */
-    ~MemoryPoolAllocator() {
+    ~MemoryPoolAllocator() RAPIDJSON_NOEXCEPT {
+        if (!shared_) {
+            // do nothing if moved
+            return;
+        }
+        if (shared_->refcount > 1) {
+            --shared_->refcount;
+            return;
+        }
         Clear();
-        RAPIDJSON_DELETE(ownBaseAllocator_);
+        BaseAllocator *a = shared_->ownBaseAllocator;
+        if (shared_->ownBuffer) {
+            baseAllocator_->Free(shared_);
+        }
+        RAPIDJSON_DELETE(a);
     }
 
-    //! Deallocates all memory chunks, excluding the user-supplied buffer.
-    void Clear() {
-        while (chunkHead_ && chunkHead_ != userBuffer_) {
-            ChunkHeader* next = chunkHead_->next;
-            baseAllocator_->Free(chunkHead_);
-            chunkHead_ = next;
+    //! Deallocates all memory chunks, excluding the first/user one.
+    void Clear() RAPIDJSON_NOEXCEPT {
+        RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
+        for (;;) {
+            ChunkHeader* c = shared_->chunkHead;
+            if (!c->next) {
+                break;
+            }
+            shared_->chunkHead = c->next;
+            baseAllocator_->Free(c);
         }
-        if (chunkHead_ && chunkHead_ == userBuffer_)
-            chunkHead_->size = 0; // Clear user buffer
+        shared_->chunkHead->size = 0;
     }
 
     //! Computes the total capacity of allocated memory chunks.
     /*! \return total capacity in bytes.
     */
-    size_t Capacity() const {
+    size_t Capacity() const RAPIDJSON_NOEXCEPT {
+        RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
         size_t capacity = 0;
-        for (ChunkHeader* c = chunkHead_; c != 0; c = c->next)
+        for (ChunkHeader* c = shared_->chunkHead; c != 0; c = c->next)
             capacity += c->capacity;
         return capacity;
     }
@@ -178,25 +301,35 @@ class MemoryPoolAllocator {
     //! Computes the memory blocks allocated.
     /*! \return total used bytes.
     */
-    size_t Size() const {
+    size_t Size() const RAPIDJSON_NOEXCEPT {
+        RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
         size_t size = 0;
-        for (ChunkHeader* c = chunkHead_; c != 0; c = c->next)
+        for (ChunkHeader* c = shared_->chunkHead; c != 0; c = c->next)
             size += c->size;
         return size;
     }
 
+    //! Whether the allocator is shared.
+    /*! \return true or false.
+    */
+    bool Shared() const RAPIDJSON_NOEXCEPT {
+        RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
+        return shared_->refcount > 1;
+    }
+
     //! Allocates a memory block. (concept Allocator)
     void* Malloc(size_t size) {
+        RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
         if (!size)
             return NULL;
 
         size = RAPIDJSON_ALIGN(size);
-        if (chunkHead_ == 0 || chunkHead_->size + size > chunkHead_->capacity)
+        if (RAPIDJSON_UNLIKELY(shared_->chunkHead->size + size > shared_->chunkHead->capacity))
             if (!AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size))
                 return NULL;
 
-        void *buffer = reinterpret_cast<char *>(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size;
-        chunkHead_->size += size;
+        void *buffer = GetChunkBuffer(shared_) + shared_->chunkHead->size;
+        shared_->chunkHead->size += size;
         return buffer;
     }
 
@@ -205,6 +338,7 @@ class MemoryPoolAllocator {
         if (originalPtr == 0)
             return Malloc(newSize);
 
+        RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
         if (newSize == 0)
             return NULL;
 
@@ -216,10 +350,10 @@ class MemoryPoolAllocator {
             return originalPtr;
 
         // Simply expand it if it is the last allocation and there is sufficient space
-        if (originalPtr == reinterpret_cast<char *>(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size - originalSize) {
+        if (originalPtr == GetChunkBuffer(shared_) + shared_->chunkHead->size - originalSize) {
             size_t increment = static_cast<size_t>(newSize - originalSize);
-            if (chunkHead_->size + increment <= chunkHead_->capacity) {
-                chunkHead_->size += increment;
+            if (shared_->chunkHead->size + increment <= shared_->chunkHead->capacity) {
+                shared_->chunkHead->size += increment;
                 return originalPtr;
             }
         }
@@ -235,50 +369,325 @@ class MemoryPoolAllocator {
     }
 
     //! Frees a memory block (concept Allocator)
-    static void Free(void *ptr) { (void)ptr; } // Do nothing
+    static void Free(void *ptr) RAPIDJSON_NOEXCEPT { (void)ptr; } // Do nothing
 
-private:
-    //! Copy constructor is not permitted.
-    MemoryPoolAllocator(const MemoryPoolAllocator& rhs) /* = delete */;
-    //! Copy assignment operator is not permitted.
-    MemoryPoolAllocator& operator=(const MemoryPoolAllocator& rhs) /* = delete */;
+    //! Compare (equality) with another MemoryPoolAllocator
+    bool operator==(const MemoryPoolAllocator& rhs) const RAPIDJSON_NOEXCEPT {
+        RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
+        RAPIDJSON_NOEXCEPT_ASSERT(rhs.shared_->refcount > 0);
+        return shared_ == rhs.shared_;
+    }
+    //! Compare (inequality) with another MemoryPoolAllocator
+    bool operator!=(const MemoryPoolAllocator& rhs) const RAPIDJSON_NOEXCEPT {
+        return !operator==(rhs);
+    }
 
+private:
     //! Creates a new chunk.
     /*! \param capacity Capacity of the chunk in bytes.
         \return true if success.
     */
     bool AddChunk(size_t capacity) {
         if (!baseAllocator_)
-            ownBaseAllocator_ = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator)();
-        if (ChunkHeader* chunk = reinterpret_cast<ChunkHeader*>(baseAllocator_->Malloc(RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + capacity))) {
+            shared_->ownBaseAllocator = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator)();
+        if (ChunkHeader* chunk = static_cast<ChunkHeader*>(baseAllocator_->Malloc(SIZEOF_CHUNK_HEADER + capacity))) {
             chunk->capacity = capacity;
             chunk->size = 0;
-            chunk->next = chunkHead_;
-            chunkHead_ =  chunk;
+            chunk->next = shared_->chunkHead;
+            shared_->chunkHead = chunk;
             return true;
         }
         else
             return false;
     }
 
-    static const int kDefaultChunkCapacity = RAPIDJSON_ALLOCATOR_DEFAULT_CHUNK_CAPACITY; //!< Default chunk capacity.
-
-    //! Chunk header for perpending to each chunk.
-    /*! Chunks are stored as a singly linked list.
-    */
-    struct ChunkHeader {
-        size_t capacity;    //!< Capacity of the chunk in bytes (excluding the header itself).
-        size_t size;        //!< Current size of allocated memory in bytes.
-        ChunkHeader *next;  //!< Next chunk in the linked list.
-    };
+    static inline void* AlignBuffer(void* buf, size_t &size)
+    {
+        RAPIDJSON_NOEXCEPT_ASSERT(buf != 0);
+        const uintptr_t mask = sizeof(void*) - 1;
+        const uintptr_t ubuf = reinterpret_cast<uintptr_t>(buf);
+        if (RAPIDJSON_UNLIKELY(ubuf & mask)) {
+            const uintptr_t abuf = (ubuf + mask) & ~mask;
+            RAPIDJSON_ASSERT(size >= abuf - ubuf);
+            buf = reinterpret_cast<void*>(abuf);
+            size -= abuf - ubuf;
+        }
+        return buf;
+    }
 
-    ChunkHeader *chunkHead_;    //!< Head of the chunk linked-list. Only the head chunk serves allocation.
     size_t chunk_capacity_;     //!< The minimum capacity of chunk when they are allocated.
-    void *userBuffer_;          //!< User supplied buffer.
     BaseAllocator* baseAllocator_;  //!< base allocator for allocating memory chunks.
-    BaseAllocator* ownBaseAllocator_;   //!< base allocator created by this object.
+    SharedData *shared_;        //!< The shared data of the allocator
 };
 
+namespace internal {
+    template<typename, typename = void>
+    struct IsRefCounted :
+        public FalseType
+    { };
+    template<typename T>
+    struct IsRefCounted<T, typename internal::EnableIfCond<T::kRefCounted>::Type> :
+        public TrueType
+    { };
+}
+
+template<typename T, typename A>
+inline T* Realloc(A& a, T* old_p, size_t old_n, size_t new_n)
+{
+    RAPIDJSON_NOEXCEPT_ASSERT(old_n <= (std::numeric_limits<size_t>::max)() / sizeof(T) && new_n <= (std::numeric_limits<size_t>::max)() / sizeof(T));
+    return static_cast<T*>(a.Realloc(old_p, old_n * sizeof(T), new_n * sizeof(T)));
+}
+
+template<typename T, typename A>
+inline T *Malloc(A& a, size_t n = 1)
+{
+    return Realloc<T, A>(a, NULL, 0, n);
+}
+
+template<typename T, typename A>
+inline void Free(A& a, T *p, size_t n = 1)
+{
+    static_cast<void>(Realloc<T, A>(a, p, n, 0));
+}
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++) // std::allocator can safely be inherited
+#endif
+
+template <typename T, typename BaseAllocator = CrtAllocator>
+class StdAllocator :
+    public std::allocator<T>
+{
+    typedef std::allocator<T> allocator_type;
+#if RAPIDJSON_HAS_CXX11
+    typedef std::allocator_traits<allocator_type> traits_type;
+#else
+    typedef allocator_type traits_type;
+#endif
+
+public:
+    typedef BaseAllocator BaseAllocatorType;
+
+    StdAllocator() RAPIDJSON_NOEXCEPT :
+        allocator_type(),
+        baseAllocator_()
+    { }
+
+    StdAllocator(const StdAllocator& rhs) RAPIDJSON_NOEXCEPT :
+        allocator_type(rhs),
+        baseAllocator_(rhs.baseAllocator_)
+    { }
+
+    template<typename U>
+    StdAllocator(const StdAllocator<U, BaseAllocator>& rhs) RAPIDJSON_NOEXCEPT :
+        allocator_type(rhs),
+        baseAllocator_(rhs.baseAllocator_)
+    { }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    StdAllocator(StdAllocator&& rhs) RAPIDJSON_NOEXCEPT :
+        allocator_type(std::move(rhs)),
+        baseAllocator_(std::move(rhs.baseAllocator_))
+    { }
+#endif
+#if RAPIDJSON_HAS_CXX11
+    using propagate_on_container_move_assignment = std::true_type;
+    using propagate_on_container_swap = std::true_type;
+#endif
+
+    /* implicit */
+    StdAllocator(const BaseAllocator& baseAllocator) RAPIDJSON_NOEXCEPT :
+        allocator_type(),
+        baseAllocator_(baseAllocator)
+    { }
+
+    ~StdAllocator() RAPIDJSON_NOEXCEPT
+    { }
+
+    template<typename U>
+    struct rebind {
+        typedef StdAllocator<U, BaseAllocator> other;
+    };
+
+    typedef typename traits_type::size_type         size_type;
+    typedef typename traits_type::difference_type   difference_type;
+
+    typedef typename traits_type::value_type        value_type;
+    typedef typename traits_type::pointer           pointer;
+    typedef typename traits_type::const_pointer     const_pointer;
+
+#if RAPIDJSON_HAS_CXX11
+
+    typedef typename std::add_lvalue_reference<value_type>::type &reference;
+    typedef typename std::add_lvalue_reference<typename std::add_const<value_type>::type>::type &const_reference;
+
+    pointer address(reference r) const RAPIDJSON_NOEXCEPT
+    {
+        return std::addressof(r);
+    }
+    const_pointer address(const_reference r) const RAPIDJSON_NOEXCEPT
+    {
+        return std::addressof(r);
+    }
+
+    size_type max_size() const RAPIDJSON_NOEXCEPT
+    {
+        return traits_type::max_size(*this);
+    }
+
+    template <typename ...Args>
+    void construct(pointer p, Args&&... args)
+    {
+        traits_type::construct(*this, p, std::forward<Args>(args)...);
+    }
+    void destroy(pointer p)
+    {
+        traits_type::destroy(*this, p);
+    }
+
+#else // !RAPIDJSON_HAS_CXX11
+
+    typedef typename allocator_type::reference       reference;
+    typedef typename allocator_type::const_reference const_reference;
+
+    pointer address(reference r) const RAPIDJSON_NOEXCEPT
+    {
+        return allocator_type::address(r);
+    }
+    const_pointer address(const_reference r) const RAPIDJSON_NOEXCEPT
+    {
+        return allocator_type::address(r);
+    }
+
+    size_type max_size() const RAPIDJSON_NOEXCEPT
+    {
+        return allocator_type::max_size();
+    }
+
+    void construct(pointer p, const_reference r)
+    {
+        allocator_type::construct(p, r);
+    }
+    void destroy(pointer p)
+    {
+        allocator_type::destroy(p);
+    }
+
+#endif // !RAPIDJSON_HAS_CXX11
+
+    template <typename U>
+    U* allocate(size_type n = 1, const void* = 0)
+    {
+        return RAPIDJSON_NAMESPACE::Malloc<U>(baseAllocator_, n);
+    }
+    template <typename U>
+    void deallocate(U* p, size_type n = 1)
+    {
+        RAPIDJSON_NAMESPACE::Free<U>(baseAllocator_, p, n);
+    }
+
+    pointer allocate(size_type n = 1, const void* = 0)
+    {
+        return allocate<value_type>(n);
+    }
+    void deallocate(pointer p, size_type n = 1)
+    {
+        deallocate<value_type>(p, n);
+    }
+
+#if RAPIDJSON_HAS_CXX11
+    using is_always_equal = std::is_empty<BaseAllocator>;
+#endif
+
+    template<typename U>
+    bool operator==(const StdAllocator<U, BaseAllocator>& rhs) const RAPIDJSON_NOEXCEPT
+    {
+        return baseAllocator_ == rhs.baseAllocator_;
+    }
+    template<typename U>
+    bool operator!=(const StdAllocator<U, BaseAllocator>& rhs) const RAPIDJSON_NOEXCEPT
+    {
+        return !operator==(rhs);
+    }
+
+    //! rapidjson Allocator concept
+    static const bool kNeedFree = BaseAllocator::kNeedFree;
+    static const bool kRefCounted = internal::IsRefCounted<BaseAllocator>::Value;
+    void* Malloc(size_t size)
+    {
+        return baseAllocator_.Malloc(size);
+    }
+    void* Realloc(void* originalPtr, size_t originalSize, size_t newSize)
+    {
+        return baseAllocator_.Realloc(originalPtr, originalSize, newSize);
+    }
+    static void Free(void *ptr) RAPIDJSON_NOEXCEPT
+    {
+        BaseAllocator::Free(ptr);
+    }
+
+private:
+    template <typename, typename>
+    friend class StdAllocator; // access to StdAllocator<!T>.*
+
+    BaseAllocator baseAllocator_;
+};
+
+#if !RAPIDJSON_HAS_CXX17 // std::allocator<void> deprecated in C++17
+template <typename BaseAllocator>
+class StdAllocator<void, BaseAllocator> :
+    public std::allocator<void>
+{
+    typedef std::allocator<void> allocator_type;
+
+public:
+    typedef BaseAllocator BaseAllocatorType;
+
+    StdAllocator() RAPIDJSON_NOEXCEPT :
+        allocator_type(),
+        baseAllocator_()
+    { }
+
+    StdAllocator(const StdAllocator& rhs) RAPIDJSON_NOEXCEPT :
+        allocator_type(rhs),
+        baseAllocator_(rhs.baseAllocator_)
+    { }
+
+    template<typename U>
+    StdAllocator(const StdAllocator<U, BaseAllocator>& rhs) RAPIDJSON_NOEXCEPT :
+        allocator_type(rhs),
+        baseAllocator_(rhs.baseAllocator_)
+    { }
+
+    /* implicit */
+    StdAllocator(const BaseAllocator& baseAllocator) RAPIDJSON_NOEXCEPT :
+        allocator_type(),
+        baseAllocator_(baseAllocator)
+    { }
+
+    ~StdAllocator() RAPIDJSON_NOEXCEPT
+    { }
+
+    template<typename U>
+    struct rebind {
+        typedef StdAllocator<U, BaseAllocator> other;
+    };
+
+    typedef typename allocator_type::value_type value_type;
+
+private:
+    template <typename, typename>
+    friend class StdAllocator; // access to StdAllocator<!T>.*
+
+    BaseAllocator baseAllocator_;
+};
+#endif
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_POP
+#endif
+
 RAPIDJSON_NAMESPACE_END
 
 #endif // RAPIDJSON_ENCODINGS_H_
diff --git a/src/native/external/rapidjson/cursorstreamwrapper.h b/src/native/external/rapidjson/cursorstreamwrapper.h
deleted file mode 100644
index 52c11a7c01d7..000000000000
--- a/src/native/external/rapidjson/cursorstreamwrapper.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// Tencent is pleased to support the open source community by making RapidJSON available.
-//
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
-//
-// Licensed under the MIT License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// http://opensource.org/licenses/MIT
-//
-// Unless required by applicable law or agreed to in writing, software distributed
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations under the License.
-
-#ifndef RAPIDJSON_CURSORSTREAMWRAPPER_H_
-#define RAPIDJSON_CURSORSTREAMWRAPPER_H_
-
-#include "stream.h"
-
-#if defined(__GNUC__)
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(effc++)
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER <= 1800
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(4702)  // unreachable code
-RAPIDJSON_DIAG_OFF(4512)  // assignment operator could not be generated
-#endif
-
-RAPIDJSON_NAMESPACE_BEGIN
-
-
-//! Cursor stream wrapper for counting line and column number if error exists.
-/*!
-    \tparam InputStream     Any stream that implements Stream Concept
-*/
-template <typename InputStream, typename Encoding = UTF8<> >
-class CursorStreamWrapper : public GenericStreamWrapper<InputStream, Encoding> {
-public:
-    typedef typename Encoding::Ch Ch;
-
-    CursorStreamWrapper(InputStream& is):
-        GenericStreamWrapper<InputStream, Encoding>(is), line_(1), col_(0) {}
-
-    // counting line and column number
-    Ch Take() {
-        Ch ch = this->is_.Take();
-        if(ch == '\n') {
-            line_ ++;
-            col_ = 0;
-        } else {
-            col_ ++;
-        }
-        return ch;
-    }
-
-    //! Get the error line number, if error exists.
-    size_t GetLine() const { return line_; }
-    //! Get the error column number, if error exists.
-    size_t GetColumn() const { return col_; }
-
-private:
-    size_t line_;   //!< Current Line
-    size_t col_;    //!< Current Column
-};
-
-#if defined(_MSC_VER) && _MSC_VER <= 1800
-RAPIDJSON_DIAG_POP
-#endif
-
-#if defined(__GNUC__)
-RAPIDJSON_DIAG_POP
-#endif
-
-RAPIDJSON_NAMESPACE_END
-
-#endif // RAPIDJSON_CURSORSTREAMWRAPPER_H_
diff --git a/src/native/external/rapidjson/document.h b/src/native/external/rapidjson/document.h
index 74666e3423ee..2cd9a70a6003 100644
--- a/src/native/external/rapidjson/document.h
+++ b/src/native/external/rapidjson/document.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
@@ -24,6 +24,9 @@
 #include "encodedstream.h"
 #include <new>      // placement new
 #include <limits>
+#ifdef __cpp_lib_three_way_comparison
+#include <compare>
+#endif
 
 RAPIDJSON_DIAG_PUSH
 #ifdef __clang__
@@ -39,12 +42,21 @@ RAPIDJSON_DIAG_OFF(4244) // conversion from kXxxFlags to 'uint16_t', possible lo
 RAPIDJSON_DIAG_OFF(effc++)
 #endif // __GNUC__
 
+#ifdef GetObject
+// see https://github.com/Tencent/rapidjson/issues/1448
+// a former included windows.h might have defined a macro called GetObject, which affects
+// GetObject defined here. This ensures the macro does not get applied
+#pragma push_macro("GetObject")
+#define RAPIDJSON_WINDOWS_GETOBJECT_WORKAROUND_APPLIED
+#undef GetObject
+#endif
+
 #ifndef RAPIDJSON_NOMEMBERITERATORCLASS
 #include <iterator> // std::random_access_iterator_tag
 #endif
 
-#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
-#include <utility> // std::move
+#if RAPIDJSON_USE_MEMBERSMAP
+#include <map> // std::multimap
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -56,6 +68,48 @@ class GenericValue;
 template <typename Encoding, typename Allocator, typename StackAllocator>
 class GenericDocument;
 
+/*! \def RAPIDJSON_DEFAULT_ALLOCATOR
+    \ingroup RAPIDJSON_CONFIG
+    \brief Allows to choose default allocator.
+
+    User can define this to use CrtAllocator or MemoryPoolAllocator.
+*/
+#ifndef RAPIDJSON_DEFAULT_ALLOCATOR
+#define RAPIDJSON_DEFAULT_ALLOCATOR ::RAPIDJSON_NAMESPACE::MemoryPoolAllocator<::RAPIDJSON_NAMESPACE::CrtAllocator>
+#endif
+
+/*! \def RAPIDJSON_DEFAULT_STACK_ALLOCATOR
+    \ingroup RAPIDJSON_CONFIG
+    \brief Allows to choose default stack allocator for Document.
+
+    User can define this to use CrtAllocator or MemoryPoolAllocator.
+*/
+#ifndef RAPIDJSON_DEFAULT_STACK_ALLOCATOR
+#define RAPIDJSON_DEFAULT_STACK_ALLOCATOR ::RAPIDJSON_NAMESPACE::CrtAllocator
+#endif
+
+/*! \def RAPIDJSON_VALUE_DEFAULT_OBJECT_CAPACITY
+    \ingroup RAPIDJSON_CONFIG
+    \brief User defined kDefaultObjectCapacity value.
+
+    User can define this as any natural number.
+*/
+#ifndef RAPIDJSON_VALUE_DEFAULT_OBJECT_CAPACITY
+// number of objects that rapidjson::Value allocates memory for by default
+#define RAPIDJSON_VALUE_DEFAULT_OBJECT_CAPACITY 16
+#endif
+
+/*! \def RAPIDJSON_VALUE_DEFAULT_ARRAY_CAPACITY
+    \ingroup RAPIDJSON_CONFIG
+    \brief User defined kDefaultArrayCapacity value.
+
+    User can define this as any natural number.
+*/
+#ifndef RAPIDJSON_VALUE_DEFAULT_ARRAY_CAPACITY
+// number of array elements that rapidjson::Value allocates memory for by default
+#define RAPIDJSON_VALUE_DEFAULT_ARRAY_CAPACITY 16
+#endif
+
 //! Name-value pair in a JSON object value.
 /*!
     This class was internal to GenericValue. It used to be a inner struct.
@@ -63,15 +117,45 @@ class GenericDocument;
     https://code.google.com/p/rapidjson/issues/detail?id=64
 */
 template <typename Encoding, typename Allocator> 
-struct GenericMember { 
+class GenericMember {
+public:
     GenericValue<Encoding, Allocator> name;     //!< name of member (must be a string)
     GenericValue<Encoding, Allocator> value;    //!< value of member.
 
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    //! Move constructor in C++11
+    GenericMember(GenericMember&& rhs) RAPIDJSON_NOEXCEPT
+        : name(std::move(rhs.name)),
+          value(std::move(rhs.value))
+    {
+    }
+
+    //! Move assignment in C++11
+    GenericMember& operator=(GenericMember&& rhs) RAPIDJSON_NOEXCEPT {
+        return *this = static_cast<GenericMember&>(rhs);
+    }
+#endif
+
+    //! Assignment with move semantics.
+    /*! \param rhs Source of the assignment. Its name and value will become a null value after assignment.
+    */
+    GenericMember& operator=(GenericMember& rhs) RAPIDJSON_NOEXCEPT {
+        if (RAPIDJSON_LIKELY(this != &rhs)) {
+            name = rhs.name;
+            value = rhs.value;
+        }
+        return *this;
+    }
+
     // swap() for std::sort() and other potential use in STL.
     friend inline void swap(GenericMember& a, GenericMember& b) RAPIDJSON_NOEXCEPT {
         a.name.Swap(b.name);
         a.value.Swap(b.value);
     }
+
+private:
+    //! Copy constructor is not permitted.
+    GenericMember(const GenericMember& rhs);
 };
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -175,12 +259,16 @@ class GenericMemberIterator {
 
     //! @name relations
     //@{
-    bool operator==(ConstIterator that) const { return ptr_ == that.ptr_; }
-    bool operator!=(ConstIterator that) const { return ptr_ != that.ptr_; }
-    bool operator<=(ConstIterator that) const { return ptr_ <= that.ptr_; }
-    bool operator>=(ConstIterator that) const { return ptr_ >= that.ptr_; }
-    bool operator< (ConstIterator that) const { return ptr_ < that.ptr_; }
-    bool operator> (ConstIterator that) const { return ptr_ > that.ptr_; }
+    template <bool Const_> bool operator==(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ == that.ptr_; }
+    template <bool Const_> bool operator!=(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ != that.ptr_; }
+    template <bool Const_> bool operator<=(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ <= that.ptr_; }
+    template <bool Const_> bool operator>=(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ >= that.ptr_; }
+    template <bool Const_> bool operator< (const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ < that.ptr_; }
+    template <bool Const_> bool operator> (const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ > that.ptr_; }
+
+#ifdef __cpp_lib_three_way_comparison
+    template <bool Const_> std::strong_ordering operator<=>(const GenericMemberIterator<Const_, Encoding, Allocator>& that) const { return ptr_ <=> that.ptr_; }
+#endif
     //@}
 
     //! @name dereference
@@ -210,12 +298,14 @@ class GenericMemberIterator;
 //! non-const GenericMemberIterator
 template <typename Encoding, typename Allocator>
 class GenericMemberIterator<false,Encoding,Allocator> {
+public:
     //! use plain pointer as iterator type
     typedef GenericMember<Encoding,Allocator>* Iterator;
 };
 //! const GenericMemberIterator
 template <typename Encoding, typename Allocator>
 class GenericMemberIterator<true,Encoding,Allocator> {
+public:
     //! use plain const pointer as iterator type
     typedef const GenericMember<Encoding,Allocator>* Iterator;
 };
@@ -574,7 +664,7 @@ template <bool, typename> class GenericObject;
     \tparam Encoding    Encoding of the value. (Even non-string values need to have the same encoding in a document)
     \tparam Allocator   Allocator type for allocating memory of object, array and string.
 */
-template <typename Encoding, typename Allocator = MemoryPoolAllocator<> > 
+template <typename Encoding, typename Allocator = RAPIDJSON_DEFAULT_ALLOCATOR >
 class GenericValue {
 public:
     //! Name-value pair in an object.
@@ -651,18 +741,8 @@ class GenericValue {
     template <typename SourceAllocator>
     GenericValue(const GenericValue<Encoding,SourceAllocator>& rhs, Allocator& allocator, bool copyConstStrings = false) {
         switch (rhs.GetType()) {
-        case kObjectType: {
-                SizeType count = rhs.data_.o.size;
-                Member* lm = reinterpret_cast<Member*>(allocator.Malloc(count * sizeof(Member)));
-                const typename GenericValue<Encoding,SourceAllocator>::Member* rm = rhs.GetMembersPointer();
-                for (SizeType i = 0; i < count; i++) {
-                    new (&lm[i].name) GenericValue(rm[i].name, allocator, copyConstStrings);
-                    new (&lm[i].value) GenericValue(rm[i].value, allocator, copyConstStrings);
-                }
-                data_.f.flags = kObjectFlag;
-                data_.o.size = data_.o.capacity = count;
-                SetMembersPointer(lm);
-            }
+        case kObjectType:
+            DoCopyMembers(rhs, allocator, copyConstStrings);
             break;
         case kArrayType: {
                 SizeType count = rhs.data_.a.size;
@@ -798,25 +878,30 @@ class GenericValue {
     /*! Need to destruct elements of array, members of object, or copy-string.
     */
     ~GenericValue() {
-        if (Allocator::kNeedFree) { // Shortcut by Allocator's trait
+        // With RAPIDJSON_USE_MEMBERSMAP, the maps need to be destroyed to release
+        // their Allocator if it's refcounted (e.g. MemoryPoolAllocator).
+        if (Allocator::kNeedFree || (RAPIDJSON_USE_MEMBERSMAP+0 &&
+                                     internal::IsRefCounted<Allocator>::Value)) {
             switch(data_.f.flags) {
             case kArrayFlag:
                 {
                     GenericValue* e = GetElementsPointer();
                     for (GenericValue* v = e; v != e + data_.a.size; ++v)
                         v->~GenericValue();
-                    Allocator::Free(e);
+                    if (Allocator::kNeedFree) { // Shortcut by Allocator's trait
+                        Allocator::Free(e);
+                    }
                 }
                 break;
 
             case kObjectFlag:
-                for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
-                    m->~Member();
-                Allocator::Free(GetMembersPointer());
+                DoFreeMembers();
                 break;
 
             case kCopyStringFlag:
-                Allocator::Free(const_cast<Ch*>(GetStringPointer()));
+                if (Allocator::kNeedFree) { // Shortcut by Allocator's trait
+                    Allocator::Free(const_cast<Ch*>(GetStringPointer()));
+                }
                 break;
 
             default:
@@ -835,8 +920,13 @@ class GenericValue {
     */
     GenericValue& operator=(GenericValue& rhs) RAPIDJSON_NOEXCEPT {
         if (RAPIDJSON_LIKELY(this != &rhs)) {
+            // Can't destroy "this" before assigning "rhs", otherwise "rhs"
+            // could be used after free if it's an sub-Value of "this",
+            // hence the temporary danse.
+            GenericValue temp;
+            temp.RawAssign(rhs);
             this->~GenericValue();
-            RawAssign(rhs);
+            RawAssign(temp);
         }
         return *this;
     }
@@ -988,6 +1078,7 @@ class GenericValue {
     */
     template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>,internal::IsGenericValue<T> >), (bool)) operator==(const T& rhs) const { return *this == GenericValue(rhs); }
 
+#ifndef __cpp_impl_three_way_comparison
     //! Not-equal-to operator
     /*! \return !(*this == rhs)
      */
@@ -1012,6 +1103,7 @@ class GenericValue {
      */
     template <typename T> friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator!=(const T& lhs, const GenericValue& rhs) { return !(rhs == lhs); }
     //@}
+#endif
 
     //!@name Type
     //@{
@@ -1138,13 +1230,28 @@ class GenericValue {
         else {
             RAPIDJSON_ASSERT(false);    // see above note
 
-            // This will generate -Wexit-time-destructors in clang
-            // static GenericValue NullValue;
-            // return NullValue;
-
-            // Use static buffer and placement-new to prevent destruction
-            static char buffer[sizeof(GenericValue)];
+#if RAPIDJSON_HAS_CXX11
+            // Use thread-local storage to prevent races between threads.
+            // Use static buffer and placement-new to prevent destruction, with
+            // alignas() to ensure proper alignment.
+            alignas(GenericValue) thread_local static char buffer[sizeof(GenericValue)];
+            return *new (buffer) GenericValue();
+#elif defined(_MSC_VER) && _MSC_VER < 1900
+            // There's no way to solve both thread locality and proper alignment
+            // simultaneously.
+            __declspec(thread) static char buffer[sizeof(GenericValue)];
             return *new (buffer) GenericValue();
+#elif defined(__GNUC__) || defined(__clang__)
+            // This will generate -Wexit-time-destructors in clang, but that's
+            // better than having under-alignment.
+            __thread static GenericValue buffer;
+            return buffer;
+#else
+            // Don't know what compiler this is, so don't know how to ensure
+            // thread-locality.
+            static GenericValue buffer;
+            return buffer;
+#endif
         }
     }
     template <typename SourceAllocator>
@@ -1177,10 +1284,7 @@ class GenericValue {
     */
     GenericValue& MemberReserve(SizeType newCapacity, Allocator &allocator) {
         RAPIDJSON_ASSERT(IsObject());
-        if (newCapacity > data_.o.capacity) {
-            SetMembersPointer(reinterpret_cast<Member*>(allocator.Realloc(GetMembersPointer(), data_.o.capacity * sizeof(Member), newCapacity * sizeof(Member))));
-            data_.o.capacity = newCapacity;
-        }
+        DoReserveMembers(newCapacity, allocator);
         return *this;
     }
 
@@ -1254,11 +1358,7 @@ class GenericValue {
     MemberIterator FindMember(const GenericValue<Encoding, SourceAllocator>& name) {
         RAPIDJSON_ASSERT(IsObject());
         RAPIDJSON_ASSERT(name.IsString());
-        MemberIterator member = MemberBegin();
-        for ( ; member != MemberEnd(); ++member)
-            if (name.StringEqual(member->name))
-                break;
-        return member;
+        return DoFindMember(name);
     }
     template <typename SourceAllocator> ConstMemberIterator FindMember(const GenericValue<Encoding, SourceAllocator>& name) const { return const_cast<GenericValue&>(*this).FindMember(name); }
 
@@ -1287,14 +1387,7 @@ class GenericValue {
     GenericValue& AddMember(GenericValue& name, GenericValue& value, Allocator& allocator) {
         RAPIDJSON_ASSERT(IsObject());
         RAPIDJSON_ASSERT(name.IsString());
-
-        ObjectData& o = data_.o;
-        if (o.size >= o.capacity)
-            MemberReserve(o.capacity == 0 ? kDefaultObjectCapacity : (o.capacity + (o.capacity + 1) / 2), allocator);
-        Member* members = GetMembersPointer();
-        members[o.size].name.RawAssign(name);
-        members[o.size].value.RawAssign(value);
-        o.size++;
+        DoAddMember(name, value, allocator);
         return *this;
     }
 
@@ -1428,9 +1521,7 @@ class GenericValue {
     */
     void RemoveAllMembers() {
         RAPIDJSON_ASSERT(IsObject()); 
-        for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
-            m->~Member();
-        data_.o.size = 0;
+        DoClearMembers();
     }
 
     //! Remove a member in object by its name.
@@ -1474,14 +1565,7 @@ class GenericValue {
         RAPIDJSON_ASSERT(data_.o.size > 0);
         RAPIDJSON_ASSERT(GetMembersPointer() != 0);
         RAPIDJSON_ASSERT(m >= MemberBegin() && m < MemberEnd());
-
-        MemberIterator last(GetMembersPointer() + (data_.o.size - 1));
-        if (data_.o.size > 1 && m != last)
-            *m = *last; // Move the last one to this place
-        else
-            m->~Member(); // Only one left, just destroy
-        --data_.o.size;
-        return m;
+        return DoRemoveMember(m);
     }
 
     //! Remove a member from an object by iterator.
@@ -1513,13 +1597,7 @@ class GenericValue {
         RAPIDJSON_ASSERT(first >= MemberBegin());
         RAPIDJSON_ASSERT(first <= last);
         RAPIDJSON_ASSERT(last <= MemberEnd());
-
-        MemberIterator pos = MemberBegin() + (first - MemberBegin());
-        for (MemberIterator itr = pos; itr != last; ++itr)
-            itr->~Member();
-        std::memmove(static_cast<void*>(&*pos), &*last, static_cast<size_t>(MemberEnd() - last) * sizeof(Member));
-        data_.o.size -= static_cast<SizeType>(last - first);
-        return pos;
+        return DoEraseMembers(first, last);
     }
 
     //! Erase a member in object by its name.
@@ -1548,7 +1626,9 @@ class GenericValue {
     }
 
     Object GetObject() { RAPIDJSON_ASSERT(IsObject()); return Object(*this); }
+    Object GetObj() { RAPIDJSON_ASSERT(IsObject()); return Object(*this); }
     ConstObject GetObject() const { RAPIDJSON_ASSERT(IsObject()); return ConstObject(*this); }
+    ConstObject GetObj() const { RAPIDJSON_ASSERT(IsObject()); return ConstObject(*this); }
 
     //@}
 
@@ -1770,12 +1850,12 @@ class GenericValue {
     //!@name String
     //@{
 
-    const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return (data_.f.flags & kInlineStrFlag) ? data_.ss.str : GetStringPointer(); }
+    const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return DataString(data_); }
 
     //! Get the length of string.
     /*! Since rapidjson permits "\\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength().
     */
-    SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return ((data_.f.flags & kInlineStrFlag) ? (data_.ss.GetLength()) : data_.s.length); }
+    SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return DataStringLength(data_); }
 
     //! Set this value as a string without copying source string.
     /*! This version has better performance with supplied length, and also support string containing null character.
@@ -1886,7 +1966,7 @@ class GenericValue {
         case kArrayType:
             if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
                 return false;
-            for (const GenericValue* v = Begin(); v != End(); ++v)
+            for (ConstValueIterator v = Begin(); v != End(); ++v)
                 if (RAPIDJSON_UNLIKELY(!v->Accept(handler)))
                     return false;
             return handler.EndArray(data_.a.size);
@@ -1922,25 +2002,26 @@ class GenericValue {
 
         // Initial flags of different types.
         kNullFlag = kNullType,
-        kTrueFlag = kTrueType | kBoolFlag,
-        kFalseFlag = kFalseType | kBoolFlag,
-        kNumberIntFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag,
-        kNumberUintFlag = kNumberType | kNumberFlag | kUintFlag | kUint64Flag | kInt64Flag,
-        kNumberInt64Flag = kNumberType | kNumberFlag | kInt64Flag,
-        kNumberUint64Flag = kNumberType | kNumberFlag | kUint64Flag,
-        kNumberDoubleFlag = kNumberType | kNumberFlag | kDoubleFlag,
-        kNumberAnyFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag | kUintFlag | kUint64Flag | kDoubleFlag,
-        kConstStringFlag = kStringType | kStringFlag,
-        kCopyStringFlag = kStringType | kStringFlag | kCopyFlag,
-        kShortStringFlag = kStringType | kStringFlag | kCopyFlag | kInlineStrFlag,
+        // These casts are added to suppress the warning on MSVC about bitwise operations between enums of different types.
+        kTrueFlag = static_cast<int>(kTrueType) | static_cast<int>(kBoolFlag),
+        kFalseFlag = static_cast<int>(kFalseType) | static_cast<int>(kBoolFlag),
+        kNumberIntFlag = static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kIntFlag | kInt64Flag),
+        kNumberUintFlag = static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kUintFlag | kUint64Flag | kInt64Flag),
+        kNumberInt64Flag = static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kInt64Flag),
+        kNumberUint64Flag = static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kUint64Flag),
+        kNumberDoubleFlag = static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kDoubleFlag),
+        kNumberAnyFlag = static_cast<int>(kNumberType) | static_cast<int>(kNumberFlag | kIntFlag | kInt64Flag | kUintFlag | kUint64Flag | kDoubleFlag),
+        kConstStringFlag = static_cast<int>(kStringType) | static_cast<int>(kStringFlag),
+        kCopyStringFlag = static_cast<int>(kStringType) | static_cast<int>(kStringFlag | kCopyFlag),
+        kShortStringFlag = static_cast<int>(kStringType) | static_cast<int>(kStringFlag | kCopyFlag | kInlineStrFlag),
         kObjectFlag = kObjectType,
         kArrayFlag = kArrayType,
 
         kTypeMask = 0x07
     };
 
-    static const SizeType kDefaultArrayCapacity = 16;
-    static const SizeType kDefaultObjectCapacity = 16;
+    static const SizeType kDefaultArrayCapacity = RAPIDJSON_VALUE_DEFAULT_ARRAY_CAPACITY;
+    static const SizeType kDefaultObjectCapacity = RAPIDJSON_VALUE_DEFAULT_OBJECT_CAPACITY;
 
     struct Flag {
 #if RAPIDJSON_48BITPOINTER_OPTIMIZATION
@@ -2023,6 +2104,13 @@ class GenericValue {
         Flag f;
     };  // 16 bytes in 32-bit mode, 24 bytes in 64-bit mode, 16 bytes in 64-bit with RAPIDJSON_48BITPOINTER_OPTIMIZATION
 
+    static RAPIDJSON_FORCEINLINE const Ch* DataString(const Data& data) {
+        return (data.f.flags & kInlineStrFlag) ? data.ss.str : RAPIDJSON_GETPOINTER(Ch, data.s.str);
+    }
+    static RAPIDJSON_FORCEINLINE SizeType DataStringLength(const Data& data) {
+        return (data.f.flags & kInlineStrFlag) ? data.ss.GetLength() : data.s.length;
+    }
+
     RAPIDJSON_FORCEINLINE const Ch* GetStringPointer() const { return RAPIDJSON_GETPOINTER(Ch, data_.s.str); }
     RAPIDJSON_FORCEINLINE const Ch* SetStringPointer(const Ch* str) { return RAPIDJSON_SETPOINTER(Ch, data_.s.str, str); }
     RAPIDJSON_FORCEINLINE GenericValue* GetElementsPointer() const { return RAPIDJSON_GETPOINTER(GenericValue, data_.a.elements); }
@@ -2030,6 +2118,286 @@ class GenericValue {
     RAPIDJSON_FORCEINLINE Member* GetMembersPointer() const { return RAPIDJSON_GETPOINTER(Member, data_.o.members); }
     RAPIDJSON_FORCEINLINE Member* SetMembersPointer(Member* members) { return RAPIDJSON_SETPOINTER(Member, data_.o.members, members); }
 
+#if RAPIDJSON_USE_MEMBERSMAP
+
+    struct MapTraits {
+        struct Less {
+            bool operator()(const Data& s1, const Data& s2) const {
+                SizeType n1 = DataStringLength(s1), n2 = DataStringLength(s2);
+                int cmp = std::memcmp(DataString(s1), DataString(s2), sizeof(Ch) * (n1 < n2 ? n1 : n2));
+                return cmp < 0 || (cmp == 0 && n1 < n2);
+            }
+        };
+        typedef std::pair<const Data, SizeType> Pair;
+        typedef std::multimap<Data, SizeType, Less, StdAllocator<Pair, Allocator> > Map;
+        typedef typename Map::iterator Iterator;
+    };
+    typedef typename MapTraits::Map         Map;
+    typedef typename MapTraits::Less        MapLess;
+    typedef typename MapTraits::Pair        MapPair;
+    typedef typename MapTraits::Iterator    MapIterator;
+
+    //
+    // Layout of the members' map/array, re(al)located according to the needed capacity:
+    //
+    //    {Map*}<>{capacity}<>{Member[capacity]}<>{MapIterator[capacity]}
+    //
+    // (where <> stands for the RAPIDJSON_ALIGN-ment, if needed)
+    //
+
+    static RAPIDJSON_FORCEINLINE size_t GetMapLayoutSize(SizeType capacity) {
+        return RAPIDJSON_ALIGN(sizeof(Map*)) +
+               RAPIDJSON_ALIGN(sizeof(SizeType)) +
+               RAPIDJSON_ALIGN(capacity * sizeof(Member)) +
+               capacity * sizeof(MapIterator);
+    }
+
+    static RAPIDJSON_FORCEINLINE SizeType &GetMapCapacity(Map* &map) {
+        return *reinterpret_cast<SizeType*>(reinterpret_cast<uintptr_t>(&map) +
+                                            RAPIDJSON_ALIGN(sizeof(Map*)));
+    }
+
+    static RAPIDJSON_FORCEINLINE Member* GetMapMembers(Map* &map) {
+        return reinterpret_cast<Member*>(reinterpret_cast<uintptr_t>(&map) +
+                                         RAPIDJSON_ALIGN(sizeof(Map*)) +
+                                         RAPIDJSON_ALIGN(sizeof(SizeType)));
+    }
+
+    static RAPIDJSON_FORCEINLINE MapIterator* GetMapIterators(Map* &map) {
+        return reinterpret_cast<MapIterator*>(reinterpret_cast<uintptr_t>(&map) +
+                                              RAPIDJSON_ALIGN(sizeof(Map*)) +
+                                              RAPIDJSON_ALIGN(sizeof(SizeType)) +
+                                              RAPIDJSON_ALIGN(GetMapCapacity(map) * sizeof(Member)));
+    }
+
+    static RAPIDJSON_FORCEINLINE Map* &GetMap(Member* members) {
+        RAPIDJSON_ASSERT(members != 0);
+        return *reinterpret_cast<Map**>(reinterpret_cast<uintptr_t>(members) -
+                                        RAPIDJSON_ALIGN(sizeof(SizeType)) -
+                                        RAPIDJSON_ALIGN(sizeof(Map*)));
+    }
+
+    // Some compilers' debug mechanisms want all iterators to be destroyed, for their accounting..
+    RAPIDJSON_FORCEINLINE MapIterator DropMapIterator(MapIterator& rhs) {
+#if RAPIDJSON_HAS_CXX11
+        MapIterator ret = std::move(rhs);
+#else
+        MapIterator ret = rhs;
+#endif
+        rhs.~MapIterator();
+        return ret;
+    }
+
+    Map* &DoReallocMap(Map** oldMap, SizeType newCapacity, Allocator& allocator) {
+        Map **newMap = static_cast<Map**>(allocator.Malloc(GetMapLayoutSize(newCapacity)));
+        GetMapCapacity(*newMap) = newCapacity;
+        if (!oldMap) {
+            *newMap = new (allocator.Malloc(sizeof(Map))) Map(MapLess(), allocator);
+        }
+        else {
+            *newMap = *oldMap;
+            size_t count = (*oldMap)->size();
+            std::memcpy(static_cast<void*>(GetMapMembers(*newMap)),
+                        static_cast<void*>(GetMapMembers(*oldMap)),
+                        count * sizeof(Member));
+            MapIterator *oldIt = GetMapIterators(*oldMap),
+                        *newIt = GetMapIterators(*newMap);
+            while (count--) {
+                new (&newIt[count]) MapIterator(DropMapIterator(oldIt[count]));
+            }
+            Allocator::Free(oldMap);
+        }
+        return *newMap;
+    }
+
+    RAPIDJSON_FORCEINLINE Member* DoAllocMembers(SizeType capacity, Allocator& allocator) {
+        return GetMapMembers(DoReallocMap(0, capacity, allocator));
+    }
+
+    void DoReserveMembers(SizeType newCapacity, Allocator& allocator) {
+        ObjectData& o = data_.o;
+        if (newCapacity > o.capacity) {
+            Member* oldMembers = GetMembersPointer();
+            Map **oldMap = oldMembers ? &GetMap(oldMembers) : 0,
+                *&newMap = DoReallocMap(oldMap, newCapacity, allocator);
+            RAPIDJSON_SETPOINTER(Member, o.members, GetMapMembers(newMap));
+            o.capacity = newCapacity;
+        }
+    }
+
+    template <typename SourceAllocator>
+    MemberIterator DoFindMember(const GenericValue<Encoding, SourceAllocator>& name) {
+        if (Member* members = GetMembersPointer()) {
+            Map* &map = GetMap(members);
+            MapIterator mit = map->find(reinterpret_cast<const Data&>(name.data_));
+            if (mit != map->end()) {
+                return MemberIterator(&members[mit->second]);
+            }
+        }
+        return MemberEnd();
+    }
+
+    void DoClearMembers() {
+        if (Member* members = GetMembersPointer()) {
+            Map* &map = GetMap(members);
+            MapIterator* mit = GetMapIterators(map);
+            for (SizeType i = 0; i < data_.o.size; i++) {
+                map->erase(DropMapIterator(mit[i]));
+                members[i].~Member();
+            }
+            data_.o.size = 0;
+        }
+    }
+
+    void DoFreeMembers() {
+        if (Member* members = GetMembersPointer()) {
+            GetMap(members)->~Map();
+            for (SizeType i = 0; i < data_.o.size; i++) {
+                members[i].~Member();
+            }
+            if (Allocator::kNeedFree) { // Shortcut by Allocator's trait
+                Map** map = &GetMap(members);
+                Allocator::Free(*map);
+                Allocator::Free(map);
+            }
+        }
+    }
+
+#else // !RAPIDJSON_USE_MEMBERSMAP
+
+    RAPIDJSON_FORCEINLINE Member* DoAllocMembers(SizeType capacity, Allocator& allocator) {
+        return Malloc<Member>(allocator, capacity);
+    }
+
+    void DoReserveMembers(SizeType newCapacity, Allocator& allocator) {
+        ObjectData& o = data_.o;
+        if (newCapacity > o.capacity) {
+            Member* newMembers = Realloc<Member>(allocator, GetMembersPointer(), o.capacity, newCapacity);
+            RAPIDJSON_SETPOINTER(Member, o.members, newMembers);
+            o.capacity = newCapacity;
+        }
+    }
+
+    template <typename SourceAllocator>
+    MemberIterator DoFindMember(const GenericValue<Encoding, SourceAllocator>& name) {
+        MemberIterator member = MemberBegin();
+        for ( ; member != MemberEnd(); ++member)
+            if (name.StringEqual(member->name))
+                break;
+        return member;
+    }
+
+    void DoClearMembers() {
+        for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
+            m->~Member();
+        data_.o.size = 0;
+    }
+
+    void DoFreeMembers() {
+        for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
+            m->~Member();
+        Allocator::Free(GetMembersPointer());
+    }
+
+#endif // !RAPIDJSON_USE_MEMBERSMAP
+
+    void DoAddMember(GenericValue& name, GenericValue& value, Allocator& allocator) {
+        ObjectData& o = data_.o;
+        if (o.size >= o.capacity)
+            DoReserveMembers(o.capacity ? (o.capacity + (o.capacity + 1) / 2) : kDefaultObjectCapacity, allocator);
+        Member* members = GetMembersPointer();
+        Member* m = members + o.size;
+        m->name.RawAssign(name);
+        m->value.RawAssign(value);
+#if RAPIDJSON_USE_MEMBERSMAP
+        Map* &map = GetMap(members);
+        MapIterator* mit = GetMapIterators(map);
+        new (&mit[o.size]) MapIterator(map->insert(MapPair(m->name.data_, o.size)));
+#endif
+        ++o.size;
+    }
+
+    MemberIterator DoRemoveMember(MemberIterator m) {
+        ObjectData& o = data_.o;
+        Member* members = GetMembersPointer();
+#if RAPIDJSON_USE_MEMBERSMAP
+        Map* &map = GetMap(members);
+        MapIterator* mit = GetMapIterators(map);
+        SizeType mpos = static_cast<SizeType>(&*m - members);
+        map->erase(DropMapIterator(mit[mpos]));
+#endif
+        MemberIterator last(members + (o.size - 1));
+        if (o.size > 1 && m != last) {
+#if RAPIDJSON_USE_MEMBERSMAP
+            new (&mit[mpos]) MapIterator(DropMapIterator(mit[&*last - members]));
+            mit[mpos]->second = mpos;
+#endif
+            *m = *last; // Move the last one to this place
+        }
+        else {
+            m->~Member(); // Only one left, just destroy
+        }
+        --o.size;
+        return m;
+    }
+
+    MemberIterator DoEraseMembers(ConstMemberIterator first, ConstMemberIterator last) {
+        ObjectData& o = data_.o;
+        MemberIterator beg = MemberBegin(),
+                       pos = beg + (first - beg),
+                       end = MemberEnd();
+#if RAPIDJSON_USE_MEMBERSMAP
+        Map* &map = GetMap(GetMembersPointer());
+        MapIterator* mit = GetMapIterators(map);
+#endif
+        for (MemberIterator itr = pos; itr != last; ++itr) {
+#if RAPIDJSON_USE_MEMBERSMAP
+            map->erase(DropMapIterator(mit[itr - beg]));
+#endif
+            itr->~Member();
+        }
+#if RAPIDJSON_USE_MEMBERSMAP
+        if (first != last) {
+            // Move remaining members/iterators
+            MemberIterator next = pos + (last - first);
+            for (MemberIterator itr = pos; next != end; ++itr, ++next) {
+                std::memcpy(static_cast<void*>(&*itr), &*next, sizeof(Member));
+                SizeType mpos = static_cast<SizeType>(itr - beg);
+                new (&mit[mpos]) MapIterator(DropMapIterator(mit[next - beg]));
+                mit[mpos]->second = mpos;
+            }
+        }
+#else
+        std::memmove(static_cast<void*>(&*pos), &*last,
+                     static_cast<size_t>(end - last) * sizeof(Member));
+#endif
+        o.size -= static_cast<SizeType>(last - first);
+        return pos;
+    }
+
+    template <typename SourceAllocator>
+    void DoCopyMembers(const GenericValue<Encoding,SourceAllocator>& rhs, Allocator& allocator, bool copyConstStrings) {
+        RAPIDJSON_ASSERT(rhs.GetType() == kObjectType);
+
+        data_.f.flags = kObjectFlag;
+        SizeType count = rhs.data_.o.size;
+        Member* lm = DoAllocMembers(count, allocator);
+        const typename GenericValue<Encoding,SourceAllocator>::Member* rm = rhs.GetMembersPointer();
+#if RAPIDJSON_USE_MEMBERSMAP
+        Map* &map = GetMap(lm);
+        MapIterator* mit = GetMapIterators(map);
+#endif
+        for (SizeType i = 0; i < count; i++) {
+            new (&lm[i].name) GenericValue(rm[i].name, allocator, copyConstStrings);
+            new (&lm[i].value) GenericValue(rm[i].value, allocator, copyConstStrings);
+#if RAPIDJSON_USE_MEMBERSMAP
+            new (&mit[i]) MapIterator(map->insert(MapPair(lm[i].name.data_, i)));
+#endif
+        }
+        data_.o.size = data_.o.capacity = count;
+        SetMembersPointer(lm);
+    }
+
     // Initialize this value as array with initial data, without calling destructor.
     void SetArrayRaw(GenericValue* values, SizeType count, Allocator& allocator) {
         data_.f.flags = kArrayFlag;
@@ -2047,9 +2415,16 @@ class GenericValue {
     void SetObjectRaw(Member* members, SizeType count, Allocator& allocator) {
         data_.f.flags = kObjectFlag;
         if (count) {
-            Member* m = static_cast<Member*>(allocator.Malloc(count * sizeof(Member)));
+            Member* m = DoAllocMembers(count, allocator);
             SetMembersPointer(m);
             std::memcpy(static_cast<void*>(m), members, count * sizeof(Member));
+#if RAPIDJSON_USE_MEMBERSMAP
+            Map* &map = GetMap(m);
+            MapIterator* mit = GetMapIterators(map);
+            for (SizeType i = 0; i < count; i++) {
+                new (&mit[i]) MapIterator(map->insert(MapPair(m[i].name.data_, i)));
+            }
+#endif
         }
         else
             SetMembersPointer(0);
@@ -2094,11 +2469,11 @@ class GenericValue {
 
         const SizeType len1 = GetStringLength();
         const SizeType len2 = rhs.GetStringLength();
-        if (len1 != len2) { return false; }
+        if(len1 != len2) { return false; }
 
         const Ch* const str1 = GetString();
         const Ch* const str2 = rhs.GetString();
-        if (str1 == str2) { return true; } // fast path for constant string
+        if(str1 == str2) { return true; } // fast path for constant string
 
         return (std::memcmp(str1, str2, sizeof(Ch) * len1) == 0);
     }
@@ -2120,12 +2495,13 @@ typedef GenericValue<UTF8<> > Value;
     \tparam StackAllocator Allocator for allocating memory for stack during parsing.
     \warning Although GenericDocument inherits from GenericValue, the API does \b not provide any virtual functions, especially no virtual destructor.  To avoid memory leaks, do not \c delete a GenericDocument object via a pointer to a GenericValue.
 */
-template <typename Encoding, typename Allocator = MemoryPoolAllocator<>, typename StackAllocator = CrtAllocator>
+template <typename Encoding, typename Allocator = RAPIDJSON_DEFAULT_ALLOCATOR, typename StackAllocator = RAPIDJSON_DEFAULT_STACK_ALLOCATOR >
 class GenericDocument : public GenericValue<Encoding, Allocator> {
 public:
     typedef typename Encoding::Ch Ch;                       //!< Character type derived from Encoding.
     typedef GenericValue<Encoding, Allocator> ValueType;    //!< Value type of the document.
     typedef Allocator AllocatorType;                        //!< Allocator type from template parameter.
+    typedef StackAllocator StackAllocatorType;              //!< StackAllocator type from template parameter.
 
     //! Constructor
     /*! Creates an empty document of specified type.
@@ -2170,6 +2546,13 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
 #endif
 
     ~GenericDocument() {
+        // Clear the ::ValueType before ownAllocator is destroyed, ~ValueType()
+        // runs last and may access its elements or members which would be freed
+        // with an allocator like MemoryPoolAllocator (CrtAllocator does not
+        // free its data when destroyed, but MemoryPoolAllocator does).
+        if (ownAllocator_) {
+            ValueType::SetNull();
+        }
         Destroy();
     }
 
@@ -2505,6 +2888,7 @@ class GenericDocument : public GenericValue<Encoding, Allocator> {
 //! GenericDocument with UTF8 encoding
 typedef GenericDocument<UTF8<> > Document;
 
+
 //! Helper class for accessing Value of array type.
 /*!
     Instance of this helper class is obtained by \c GenericValue::GetArray().
@@ -2529,6 +2913,7 @@ class GenericArray {
     GenericArray& operator=(const GenericArray& rhs) { value_ = rhs.value_; return *this; }
     ~GenericArray() {}
 
+    operator ValueType&() const { return value_; }
     SizeType Size() const { return value_.Size(); }
     SizeType Capacity() const { return value_.Capacity(); }
     bool Empty() const { return value_.Empty(); }
@@ -2584,6 +2969,7 @@ class GenericObject {
     GenericObject& operator=(const GenericObject& rhs) { value_ = rhs.value_; return *this; }
     ~GenericObject() {}
 
+    operator ValueType&() const { return value_; }
     SizeType MemberCount() const { return value_.MemberCount(); }
     SizeType MemberCapacity() const { return value_.MemberCapacity(); }
     bool ObjectEmpty() const { return value_.ObjectEmpty(); }
@@ -2649,4 +3035,9 @@ class GenericObject {
 RAPIDJSON_NAMESPACE_END
 RAPIDJSON_DIAG_POP
 
+#ifdef RAPIDJSON_WINDOWS_GETOBJECT_WORKAROUND_APPLIED
+#pragma pop_macro("GetObject")
+#undef RAPIDJSON_WINDOWS_GETOBJECT_WORKAROUND_APPLIED
+#endif
+
 #endif // RAPIDJSON_DOCUMENT_H_
diff --git a/src/native/external/rapidjson/encodedstream.h b/src/native/external/rapidjson/encodedstream.h
index 223601c0599b..cf046b89235f 100644
--- a/src/native/external/rapidjson/encodedstream.h
+++ b/src/native/external/rapidjson/encodedstream.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
diff --git a/src/native/external/rapidjson/encodings.h b/src/native/external/rapidjson/encodings.h
index 0b2446795015..50ad18bdc08c 100644
--- a/src/native/external/rapidjson/encodings.h
+++ b/src/native/external/rapidjson/encodings.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
diff --git a/src/native/external/rapidjson/error/en.h b/src/native/external/rapidjson/error/en.h
index 2db838bff239..c87b04eb133e 100644
--- a/src/native/external/rapidjson/error/en.h
+++ b/src/native/external/rapidjson/error/en.h
@@ -1,15 +1,15 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_ERROR_EN_H_
@@ -39,13 +39,13 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro
 
         case kParseErrorDocumentEmpty:                  return RAPIDJSON_ERROR_STRING("The document is empty.");
         case kParseErrorDocumentRootNotSingular:        return RAPIDJSON_ERROR_STRING("The document root must not be followed by other values.");
-    
+
         case kParseErrorValueInvalid:                   return RAPIDJSON_ERROR_STRING("Invalid value.");
-    
+
         case kParseErrorObjectMissName:                 return RAPIDJSON_ERROR_STRING("Missing a name for object member.");
         case kParseErrorObjectMissColon:                return RAPIDJSON_ERROR_STRING("Missing a colon after a name of object member.");
         case kParseErrorObjectMissCommaOrCurlyBracket:  return RAPIDJSON_ERROR_STRING("Missing a comma or '}' after an object member.");
-    
+
         case kParseErrorArrayMissCommaOrSquareBracket:  return RAPIDJSON_ERROR_STRING("Missing a comma or ']' after an array element.");
 
         case kParseErrorStringUnicodeEscapeInvalidHex:  return RAPIDJSON_ERROR_STRING("Incorrect hex digit after \\u escape in string.");
@@ -65,6 +65,108 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro
     }
 }
 
+//! Maps error code of validation into error message.
+/*!
+    \ingroup RAPIDJSON_ERRORS
+    \param validateErrorCode Error code obtained from validator.
+    \return the error message.
+    \note User can make a copy of this function for localization.
+        Using switch-case is safer for future modification of error codes.
+*/
+inline const RAPIDJSON_ERROR_CHARTYPE* GetValidateError_En(ValidateErrorCode validateErrorCode) {
+    switch (validateErrorCode) {
+        case kValidateErrors:                           return RAPIDJSON_ERROR_STRING("One or more validation errors have occurred");
+        case kValidateErrorNone:                        return RAPIDJSON_ERROR_STRING("No error.");
+
+        case kValidateErrorMultipleOf:                  return RAPIDJSON_ERROR_STRING("Number '%actual' is not a multiple of the 'multipleOf' value '%expected'.");
+        case kValidateErrorMaximum:                     return RAPIDJSON_ERROR_STRING("Number '%actual' is greater than the 'maximum' value '%expected'.");
+        case kValidateErrorExclusiveMaximum:            return RAPIDJSON_ERROR_STRING("Number '%actual' is greater than or equal to the 'exclusiveMaximum' value '%expected'.");
+        case kValidateErrorMinimum:                     return RAPIDJSON_ERROR_STRING("Number '%actual' is less than the 'minimum' value '%expected'.");
+        case kValidateErrorExclusiveMinimum:            return RAPIDJSON_ERROR_STRING("Number '%actual' is less than or equal to the 'exclusiveMinimum' value '%expected'.");
+
+        case kValidateErrorMaxLength:                   return RAPIDJSON_ERROR_STRING("String '%actual' is longer than the 'maxLength' value '%expected'.");
+        case kValidateErrorMinLength:                   return RAPIDJSON_ERROR_STRING("String '%actual' is shorter than the 'minLength' value '%expected'.");
+        case kValidateErrorPattern:                     return RAPIDJSON_ERROR_STRING("String '%actual' does not match the 'pattern' regular expression.");
+
+        case kValidateErrorMaxItems:                    return RAPIDJSON_ERROR_STRING("Array of length '%actual' is longer than the 'maxItems' value '%expected'.");
+        case kValidateErrorMinItems:                    return RAPIDJSON_ERROR_STRING("Array of length '%actual' is shorter than the 'minItems' value '%expected'.");
+        case kValidateErrorUniqueItems:                 return RAPIDJSON_ERROR_STRING("Array has duplicate items at indices '%duplicates' but 'uniqueItems' is true.");
+        case kValidateErrorAdditionalItems:             return RAPIDJSON_ERROR_STRING("Array has an additional item at index '%disallowed' that is not allowed by the schema.");
+
+        case kValidateErrorMaxProperties:               return RAPIDJSON_ERROR_STRING("Object has '%actual' members which is more than 'maxProperties' value '%expected'.");
+        case kValidateErrorMinProperties:               return RAPIDJSON_ERROR_STRING("Object has '%actual' members which is less than 'minProperties' value '%expected'.");
+        case kValidateErrorRequired:                    return RAPIDJSON_ERROR_STRING("Object is missing the following members required by the schema: '%missing'.");
+        case kValidateErrorAdditionalProperties:        return RAPIDJSON_ERROR_STRING("Object has an additional member '%disallowed' that is not allowed by the schema.");
+        case kValidateErrorPatternProperties:           return RAPIDJSON_ERROR_STRING("Object has 'patternProperties' that are not allowed by the schema.");
+        case kValidateErrorDependencies:                return RAPIDJSON_ERROR_STRING("Object has missing property or schema dependencies, refer to following errors.");
+
+        case kValidateErrorEnum:                        return RAPIDJSON_ERROR_STRING("Property has a value that is not one of its allowed enumerated values.");
+        case kValidateErrorType:                        return RAPIDJSON_ERROR_STRING("Property has a type '%actual' that is not in the following list: '%expected'.");
+
+        case kValidateErrorOneOf:                       return RAPIDJSON_ERROR_STRING("Property did not match any of the sub-schemas specified by 'oneOf', refer to following errors.");
+        case kValidateErrorOneOfMatch:                  return RAPIDJSON_ERROR_STRING("Property matched more than one of the sub-schemas specified by 'oneOf', indices '%matches'.");
+        case kValidateErrorAllOf:                       return RAPIDJSON_ERROR_STRING("Property did not match all of the sub-schemas specified by 'allOf', refer to following errors.");
+        case kValidateErrorAnyOf:                       return RAPIDJSON_ERROR_STRING("Property did not match any of the sub-schemas specified by 'anyOf', refer to following errors.");
+        case kValidateErrorNot:                         return RAPIDJSON_ERROR_STRING("Property matched the sub-schema specified by 'not'.");
+
+        case kValidateErrorReadOnly:                    return RAPIDJSON_ERROR_STRING("Property is read-only but has been provided when validation is for writing.");
+        case kValidateErrorWriteOnly:                   return RAPIDJSON_ERROR_STRING("Property is write-only but has been provided when validation is for reading.");
+
+        default:                                        return RAPIDJSON_ERROR_STRING("Unknown error.");
+    }
+}
+
+//! Maps error code of schema document compilation into error message.
+/*!
+    \ingroup RAPIDJSON_ERRORS
+    \param schemaErrorCode Error code obtained from compiling the schema document.
+    \return the error message.
+    \note User can make a copy of this function for localization.
+        Using switch-case is safer for future modification of error codes.
+*/
+  inline const RAPIDJSON_ERROR_CHARTYPE* GetSchemaError_En(SchemaErrorCode schemaErrorCode) {
+      switch (schemaErrorCode) {
+          case kSchemaErrorNone:                        return RAPIDJSON_ERROR_STRING("No error.");
+
+          case kSchemaErrorStartUnknown:                return RAPIDJSON_ERROR_STRING("Pointer '%value' to start of schema does not resolve to a location in the document.");
+          case kSchemaErrorRefPlainName:                return RAPIDJSON_ERROR_STRING("$ref fragment '%value' must be a JSON pointer.");
+          case kSchemaErrorRefInvalid:                  return RAPIDJSON_ERROR_STRING("$ref must not be an empty string.");
+          case kSchemaErrorRefPointerInvalid:           return RAPIDJSON_ERROR_STRING("$ref fragment '%value' is not a valid JSON pointer at offset '%offset'.");
+          case kSchemaErrorRefUnknown:                  return RAPIDJSON_ERROR_STRING("$ref '%value' does not resolve to a location in the target document.");
+          case kSchemaErrorRefCyclical:                 return RAPIDJSON_ERROR_STRING("$ref '%value' is cyclical.");
+          case kSchemaErrorRefNoRemoteProvider:         return RAPIDJSON_ERROR_STRING("$ref is remote but there is no remote provider.");
+          case kSchemaErrorRefNoRemoteSchema:           return RAPIDJSON_ERROR_STRING("$ref '%value' is remote but the remote provider did not return a schema.");
+          case kSchemaErrorRegexInvalid:                return RAPIDJSON_ERROR_STRING("Invalid regular expression '%value' in 'pattern' or 'patternProperties'.");
+          case kSchemaErrorSpecUnknown:                 return RAPIDJSON_ERROR_STRING("JSON schema draft or OpenAPI version is not recognized.");
+          case kSchemaErrorSpecUnsupported:             return RAPIDJSON_ERROR_STRING("JSON schema draft or OpenAPI version is not supported.");
+          case kSchemaErrorSpecIllegal:                 return RAPIDJSON_ERROR_STRING("Both JSON schema draft and OpenAPI version found in document.");
+          case kSchemaErrorReadOnlyAndWriteOnly:        return RAPIDJSON_ERROR_STRING("Property must not be both 'readOnly' and 'writeOnly'.");
+
+          default:                                      return RAPIDJSON_ERROR_STRING("Unknown error.");
+    }
+  }
+
+//! Maps error code of pointer parse into error message.
+/*!
+    \ingroup RAPIDJSON_ERRORS
+    \param pointerParseErrorCode Error code obtained from pointer parse.
+    \return the error message.
+    \note User can make a copy of this function for localization.
+        Using switch-case is safer for future modification of error codes.
+*/
+inline const RAPIDJSON_ERROR_CHARTYPE* GetPointerParseError_En(PointerParseErrorCode pointerParseErrorCode) {
+    switch (pointerParseErrorCode) {
+        case kPointerParseErrorNone:                       return RAPIDJSON_ERROR_STRING("No error.");
+
+        case kPointerParseErrorTokenMustBeginWithSolidus:  return RAPIDJSON_ERROR_STRING("A token must begin with a '/'.");
+        case kPointerParseErrorInvalidEscape:              return RAPIDJSON_ERROR_STRING("Invalid escape.");
+        case kPointerParseErrorInvalidPercentEncoding:     return RAPIDJSON_ERROR_STRING("Invalid percent encoding in URI fragment.");
+        case kPointerParseErrorCharacterMustPercentEncode: return RAPIDJSON_ERROR_STRING("A character must be percent encoded in a URI fragment.");
+
+        default:                                           return RAPIDJSON_ERROR_STRING("Unknown error.");
+    }
+}
+
 RAPIDJSON_NAMESPACE_END
 
 #ifdef __clang__
diff --git a/src/native/external/rapidjson/error/error.h b/src/native/external/rapidjson/error/error.h
index 9311d2f03bff..cae345db36d2 100644
--- a/src/native/external/rapidjson/error/error.h
+++ b/src/native/external/rapidjson/error/error.h
@@ -1,15 +1,15 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_ERROR_ERROR_H_
@@ -42,7 +42,7 @@ RAPIDJSON_DIAG_OFF(padded)
 ///////////////////////////////////////////////////////////////////////////////
 // RAPIDJSON_ERROR_STRING
 
-//! Macro for converting string literial to \ref RAPIDJSON_ERROR_CHARTYPE[].
+//! Macro for converting string literal to \ref RAPIDJSON_ERROR_CHARTYPE[].
 /*! \ingroup RAPIDJSON_ERRORS
     By default this conversion macro does nothing.
     On Windows, user can define this macro as \c _T(x) for supporting both
@@ -152,6 +152,130 @@ struct ParseResult {
 */
 typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetParseErrorFunc)(ParseErrorCode);
 
+///////////////////////////////////////////////////////////////////////////////
+// ValidateErrorCode
+
+//! Error codes when validating.
+/*! \ingroup RAPIDJSON_ERRORS
+    \see GenericSchemaValidator
+*/
+enum ValidateErrorCode {
+    kValidateErrors    = -1,                   //!< Top level error code when kValidateContinueOnErrorsFlag set.
+    kValidateErrorNone = 0,                    //!< No error.
+
+    kValidateErrorMultipleOf,                  //!< Number is not a multiple of the 'multipleOf' value.
+    kValidateErrorMaximum,                     //!< Number is greater than the 'maximum' value.
+    kValidateErrorExclusiveMaximum,            //!< Number is greater than or equal to the 'maximum' value.
+    kValidateErrorMinimum,                     //!< Number is less than the 'minimum' value.
+    kValidateErrorExclusiveMinimum,            //!< Number is less than or equal to the 'minimum' value.
+
+    kValidateErrorMaxLength,                   //!< String is longer than the 'maxLength' value.
+    kValidateErrorMinLength,                   //!< String is longer than the 'maxLength' value.
+    kValidateErrorPattern,                     //!< String does not match the 'pattern' regular expression.
+
+    kValidateErrorMaxItems,                    //!< Array is longer than the 'maxItems' value.
+    kValidateErrorMinItems,                    //!< Array is shorter than the 'minItems' value.
+    kValidateErrorUniqueItems,                 //!< Array has duplicate items but 'uniqueItems' is true.
+    kValidateErrorAdditionalItems,             //!< Array has additional items that are not allowed by the schema.
+
+    kValidateErrorMaxProperties,               //!< Object has more members than 'maxProperties' value.
+    kValidateErrorMinProperties,               //!< Object has less members than 'minProperties' value.
+    kValidateErrorRequired,                    //!< Object is missing one or more members required by the schema.
+    kValidateErrorAdditionalProperties,        //!< Object has additional members that are not allowed by the schema.
+    kValidateErrorPatternProperties,           //!< See other errors.
+    kValidateErrorDependencies,                //!< Object has missing property or schema dependencies.
+
+    kValidateErrorEnum,                        //!< Property has a value that is not one of its allowed enumerated values.
+    kValidateErrorType,                        //!< Property has a type that is not allowed by the schema.
+
+    kValidateErrorOneOf,                       //!< Property did not match any of the sub-schemas specified by 'oneOf'.
+    kValidateErrorOneOfMatch,                  //!< Property matched more than one of the sub-schemas specified by 'oneOf'.
+    kValidateErrorAllOf,                       //!< Property did not match all of the sub-schemas specified by 'allOf'.
+    kValidateErrorAnyOf,                       //!< Property did not match any of the sub-schemas specified by 'anyOf'.
+    kValidateErrorNot,                         //!< Property matched the sub-schema specified by 'not'.
+
+    kValidateErrorReadOnly,                    //!< Property is read-only but has been provided when validation is for writing
+    kValidateErrorWriteOnly                    //!< Property is write-only but has been provided when validation is for reading
+};
+
+//! Function pointer type of GetValidateError().
+/*! \ingroup RAPIDJSON_ERRORS
+
+    This is the prototype for \c GetValidateError_X(), where \c X is a locale.
+    User can dynamically change locale in runtime, e.g.:
+\code
+    GetValidateErrorFunc GetValidateError = GetValidateError_En; // or whatever
+    const RAPIDJSON_ERROR_CHARTYPE* s = GetValidateError(validator.GetInvalidSchemaCode());
+\endcode
+*/
+typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetValidateErrorFunc)(ValidateErrorCode);
+
+///////////////////////////////////////////////////////////////////////////////
+// SchemaErrorCode
+
+//! Error codes when validating.
+/*! \ingroup RAPIDJSON_ERRORS
+    \see GenericSchemaValidator
+*/
+enum SchemaErrorCode {
+    kSchemaErrorNone = 0,                      //!< No error.
+
+    kSchemaErrorStartUnknown,                  //!< Pointer to start of schema does not resolve to a location in the document
+    kSchemaErrorRefPlainName,                  //!< $ref fragment must be a JSON pointer
+    kSchemaErrorRefInvalid,                    //!< $ref must not be an empty string
+    kSchemaErrorRefPointerInvalid,             //!< $ref fragment is not a valid JSON pointer at offset
+    kSchemaErrorRefUnknown,                    //!< $ref does not resolve to a location in the target document
+    kSchemaErrorRefCyclical,                   //!< $ref is cyclical
+    kSchemaErrorRefNoRemoteProvider,           //!< $ref is remote but there is no remote provider
+    kSchemaErrorRefNoRemoteSchema,             //!< $ref is remote but the remote provider did not return a schema
+    kSchemaErrorRegexInvalid,                  //!< Invalid regular expression in 'pattern' or 'patternProperties'
+    kSchemaErrorSpecUnknown,                   //!< JSON schema draft or OpenAPI version is not recognized
+    kSchemaErrorSpecUnsupported,               //!< JSON schema draft or OpenAPI version is not supported
+    kSchemaErrorSpecIllegal,                   //!< Both JSON schema draft and OpenAPI version found in document
+    kSchemaErrorReadOnlyAndWriteOnly           //!< Property must not be both 'readOnly' and 'writeOnly'
+};
+
+//! Function pointer type of GetSchemaError().
+/*! \ingroup RAPIDJSON_ERRORS
+
+    This is the prototype for \c GetSchemaError_X(), where \c X is a locale.
+    User can dynamically change locale in runtime, e.g.:
+\code
+    GetSchemaErrorFunc GetSchemaError = GetSchemaError_En; // or whatever
+    const RAPIDJSON_ERROR_CHARTYPE* s = GetSchemaError(validator.GetInvalidSchemaCode());
+\endcode
+*/
+typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetSchemaErrorFunc)(SchemaErrorCode);
+
+///////////////////////////////////////////////////////////////////////////////
+// PointerParseErrorCode
+
+//! Error code of JSON pointer parsing.
+/*! \ingroup RAPIDJSON_ERRORS
+    \see GenericPointer::GenericPointer, GenericPointer::GetParseErrorCode
+*/
+enum PointerParseErrorCode {
+    kPointerParseErrorNone = 0,                     //!< The parse is successful
+
+    kPointerParseErrorTokenMustBeginWithSolidus,    //!< A token must begin with a '/'
+    kPointerParseErrorInvalidEscape,                //!< Invalid escape
+    kPointerParseErrorInvalidPercentEncoding,       //!< Invalid percent encoding in URI fragment
+    kPointerParseErrorCharacterMustPercentEncode    //!< A character must percent encoded in URI fragment
+};
+
+//! Function pointer type of GetPointerParseError().
+/*! \ingroup RAPIDJSON_ERRORS
+
+    This is the prototype for \c GetPointerParseError_X(), where \c X is a locale.
+    User can dynamically change locale in runtime, e.g.:
+\code
+    GetPointerParseErrorFunc GetPointerParseError = GetPointerParseError_En; // or whatever
+    const RAPIDJSON_ERROR_CHARTYPE* s = GetPointerParseError(pointer.GetParseErrorCode());
+\endcode
+*/
+typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetPointerParseErrorFunc)(PointerParseErrorCode);
+
+
 RAPIDJSON_NAMESPACE_END
 
 #ifdef __clang__
diff --git a/src/native/external/rapidjson/filereadstream.h b/src/native/external/rapidjson/filereadstream.h
deleted file mode 100644
index 6b343707ade0..000000000000
--- a/src/native/external/rapidjson/filereadstream.h
+++ /dev/null
@@ -1,99 +0,0 @@
-// Tencent is pleased to support the open source community by making RapidJSON available.
-// 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
-//
-// Licensed under the MIT License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// http://opensource.org/licenses/MIT
-//
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
-// specific language governing permissions and limitations under the License.
-
-#ifndef RAPIDJSON_FILEREADSTREAM_H_
-#define RAPIDJSON_FILEREADSTREAM_H_
-
-#include "stream.h"
-#include <cstdio>
-
-#ifdef __clang__
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(padded)
-RAPIDJSON_DIAG_OFF(unreachable-code)
-RAPIDJSON_DIAG_OFF(missing-noreturn)
-#endif
-
-RAPIDJSON_NAMESPACE_BEGIN
-
-//! File byte stream for input using fread().
-/*!
-    \note implements Stream concept
-*/
-class FileReadStream {
-public:
-    typedef char Ch;    //!< Character type (byte).
-
-    //! Constructor.
-    /*!
-        \param fp File pointer opened for read.
-        \param buffer user-supplied buffer.
-        \param bufferSize size of buffer in bytes. Must >=4 bytes.
-    */
-    FileReadStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferSize_(bufferSize), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) { 
-        RAPIDJSON_ASSERT(fp_ != 0);
-        RAPIDJSON_ASSERT(bufferSize >= 4);
-        Read();
-    }
-
-    Ch Peek() const { return *current_; }
-    Ch Take() { Ch c = *current_; Read(); return c; }
-    size_t Tell() const { return count_ + static_cast<size_t>(current_ - buffer_); }
-
-    // Not implemented
-    void Put(Ch) { RAPIDJSON_ASSERT(false); }
-    void Flush() { RAPIDJSON_ASSERT(false); } 
-    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
-
-    // For encoding detection only.
-    const Ch* Peek4() const {
-        return (current_ + 4 - !eof_ <= bufferLast_) ? current_ : 0;
-    }
-
-private:
-    void Read() {
-        if (current_ < bufferLast_)
-            ++current_;
-        else if (!eof_) {
-            count_ += readCount_;
-            readCount_ = std::fread(buffer_, 1, bufferSize_, fp_);
-            bufferLast_ = buffer_ + readCount_ - 1;
-            current_ = buffer_;
-
-            if (readCount_ < bufferSize_) {
-                buffer_[readCount_] = '\0';
-                ++bufferLast_;
-                eof_ = true;
-            }
-        }
-    }
-
-    std::FILE* fp_;
-    Ch *buffer_;
-    size_t bufferSize_;
-    Ch *bufferLast_;
-    Ch *current_;
-    size_t readCount_;
-    size_t count_;  //!< Number of characters read
-    bool eof_;
-};
-
-RAPIDJSON_NAMESPACE_END
-
-#ifdef __clang__
-RAPIDJSON_DIAG_POP
-#endif
-
-#endif // RAPIDJSON_FILESTREAM_H_
diff --git a/src/native/external/rapidjson/filewritestream.h b/src/native/external/rapidjson/filewritestream.h
deleted file mode 100644
index 8b48fee197c4..000000000000
--- a/src/native/external/rapidjson/filewritestream.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// Tencent is pleased to support the open source community by making RapidJSON available.
-// 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
-//
-// Licensed under the MIT License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// http://opensource.org/licenses/MIT
-//
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
-// specific language governing permissions and limitations under the License.
-
-#ifndef RAPIDJSON_FILEWRITESTREAM_H_
-#define RAPIDJSON_FILEWRITESTREAM_H_
-
-#include "stream.h"
-#include <cstdio>
-
-#ifdef __clang__
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(unreachable-code)
-#endif
-
-RAPIDJSON_NAMESPACE_BEGIN
-
-//! Wrapper of C file stream for output using fwrite().
-/*!
-    \note implements Stream concept
-*/
-class FileWriteStream {
-public:
-    typedef char Ch;    //!< Character type. Only support char.
-
-    FileWriteStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferEnd_(buffer + bufferSize), current_(buffer_) { 
-        RAPIDJSON_ASSERT(fp_ != 0);
-    }
-
-    void Put(char c) { 
-        if (current_ >= bufferEnd_)
-            Flush();
-
-        *current_++ = c;
-    }
-
-    void PutN(char c, size_t n) {
-        size_t avail = static_cast<size_t>(bufferEnd_ - current_);
-        while (n > avail) {
-            std::memset(current_, c, avail);
-            current_ += avail;
-            Flush();
-            n -= avail;
-            avail = static_cast<size_t>(bufferEnd_ - current_);
-        }
-
-        if (n > 0) {
-            std::memset(current_, c, n);
-            current_ += n;
-        }
-    }
-
-    void Flush() {
-        if (current_ != buffer_) {
-            size_t result = std::fwrite(buffer_, 1, static_cast<size_t>(current_ - buffer_), fp_);
-            if (result < static_cast<size_t>(current_ - buffer_)) {
-                // failure deliberately ignored at this time
-                // added to avoid warn_unused_result build errors
-            }
-            current_ = buffer_;
-        }
-    }
-
-    // Not implemented
-    char Peek() const { RAPIDJSON_ASSERT(false); return 0; }
-    char Take() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
-    char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; }
-
-private:
-    // Prohibit copy constructor & assignment operator.
-    FileWriteStream(const FileWriteStream&);
-    FileWriteStream& operator=(const FileWriteStream&);
-
-    std::FILE* fp_;
-    char *buffer_;
-    char *bufferEnd_;
-    char *current_;
-};
-
-//! Implement specialized version of PutN() with memset() for better performance.
-template<>
-inline void PutN(FileWriteStream& stream, char c, size_t n) {
-    stream.PutN(c, n);
-}
-
-RAPIDJSON_NAMESPACE_END
-
-#ifdef __clang__
-RAPIDJSON_DIAG_POP
-#endif
-
-#endif // RAPIDJSON_FILESTREAM_H_
diff --git a/src/native/external/rapidjson/fwd.h b/src/native/external/rapidjson/fwd.h
index e8104e841bcd..d62f77f0ecfa 100644
--- a/src/native/external/rapidjson/fwd.h
+++ b/src/native/external/rapidjson/fwd.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
@@ -102,7 +102,7 @@ class PrettyWriter;
 // document.h
 
 template <typename Encoding, typename Allocator> 
-struct GenericMember;
+class GenericMember;
 
 template <bool Const, typename Encoding, typename Allocator>
 class GenericMemberIterator;
diff --git a/src/native/external/rapidjson/internal/biginteger.h b/src/native/external/rapidjson/internal/biginteger.h
index a31c8a88d6eb..4930043dc7c5 100644
--- a/src/native/external/rapidjson/internal/biginteger.h
+++ b/src/native/external/rapidjson/internal/biginteger.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
@@ -17,9 +17,13 @@
 
 #include "../rapidjson.h"
 
-#if defined(_MSC_VER) && !__INTEL_COMPILER && defined(_M_AMD64)
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && defined(_M_AMD64)
 #include <intrin.h> // for _umul128
+#if !defined(_ARM64EC_)
 #pragma intrinsic(_umul128)
+#else
+#pragma comment(lib,"softintrin")
+#endif
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -37,7 +41,8 @@ class BigInteger {
         digits_[0] = u;
     }
 
-    BigInteger(const char* decimals, size_t length) : count_(1) {
+    template<typename Ch>
+    BigInteger(const Ch* decimals, size_t length) : count_(1) {
         RAPIDJSON_ASSERT(length > 0);
         digits_[0] = 0;
         size_t i = 0;
@@ -221,7 +226,8 @@ class BigInteger {
     bool IsZero() const { return count_ == 1 && digits_[0] == 0; }
 
 private:
-    void AppendDecimal64(const char* begin, const char* end) {
+    template<typename Ch>
+    void AppendDecimal64(const Ch* begin, const Ch* end) {
         uint64_t u = ParseUint64(begin, end);
         if (IsZero())
             *this = u;
@@ -236,11 +242,12 @@ class BigInteger {
         digits_[count_++] = digit;
     }
 
-    static uint64_t ParseUint64(const char* begin, const char* end) {
+    template<typename Ch>
+    static uint64_t ParseUint64(const Ch* begin, const Ch* end) {
         uint64_t r = 0;
-        for (const char* p = begin; p != end; ++p) {
-            RAPIDJSON_ASSERT(*p >= '0' && *p <= '9');
-            r = r * 10u + static_cast<unsigned>(*p - '0');
+        for (const Ch* p = begin; p != end; ++p) {
+            RAPIDJSON_ASSERT(*p >= Ch('0') && *p <= Ch('9'));
+            r = r * 10u + static_cast<unsigned>(*p - Ch('0'));
         }
         return r;
     }
@@ -252,7 +259,7 @@ class BigInteger {
         if (low < k)
             (*outHigh)++;
         return low;
-#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
         __extension__ typedef unsigned __int128 uint128;
         uint128 p = static_cast<uint128>(a) * static_cast<uint128>(b);
         p += k;
diff --git a/src/native/external/rapidjson/internal/clzll.h b/src/native/external/rapidjson/internal/clzll.h
new file mode 100644
index 000000000000..8fc5118aa47b
--- /dev/null
+++ b/src/native/external/rapidjson/internal/clzll.h
@@ -0,0 +1,71 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_CLZLL_H_
+#define RAPIDJSON_CLZLL_H_
+
+#include "../rapidjson.h"
+
+#if defined(_MSC_VER) && !defined(UNDER_CE)
+#include <intrin.h>
+#if defined(_WIN64)
+#pragma intrinsic(_BitScanReverse64)
+#else
+#pragma intrinsic(_BitScanReverse)
+#endif
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+inline uint32_t clzll(uint64_t x) {
+    // Passing 0 to __builtin_clzll is UB in GCC and results in an
+    // infinite loop in the software implementation.
+    RAPIDJSON_ASSERT(x != 0);
+
+#if defined(_MSC_VER) && !defined(UNDER_CE)
+    unsigned long r = 0;
+#if defined(_WIN64)
+    _BitScanReverse64(&r, x);
+#else
+    // Scan the high 32 bits.
+    if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32)))
+        return 63 - (r + 32);
+
+    // Scan the low 32 bits.
+    _BitScanReverse(&r, static_cast<uint32_t>(x & 0xFFFFFFFF));
+#endif // _WIN64
+
+    return 63 - r;
+#elif (defined(__GNUC__) && __GNUC__ >= 4) || RAPIDJSON_HAS_BUILTIN(__builtin_clzll)
+    // __builtin_clzll wrapper
+    return static_cast<uint32_t>(__builtin_clzll(x));
+#else
+    // naive version
+    uint32_t r = 0;
+    while (!(x & (static_cast<uint64_t>(1) << 63))) {
+        x <<= 1;
+        ++r;
+    }
+
+    return r;
+#endif // _MSC_VER
+}
+
+#define RAPIDJSON_CLZLL RAPIDJSON_NAMESPACE::internal::clzll
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_CLZLL_H_
diff --git a/src/native/external/rapidjson/internal/diyfp.h b/src/native/external/rapidjson/internal/diyfp.h
index b6c2cf5618d4..1f60fb60ca04 100644
--- a/src/native/external/rapidjson/internal/diyfp.h
+++ b/src/native/external/rapidjson/internal/diyfp.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 //
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
@@ -20,12 +20,16 @@
 #define RAPIDJSON_DIYFP_H_
 
 #include "../rapidjson.h"
+#include "clzll.h"
 #include <limits>
 
 #if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
 #include <intrin.h>
-#pragma intrinsic(_BitScanReverse64)
+#if !defined(_ARM64EC_)
 #pragma intrinsic(_umul128)
+#else
+#pragma comment(lib,"softintrin")
+#endif
 #endif
 
 RAPIDJSON_NAMESPACE_BEGIN
@@ -75,7 +79,7 @@ struct DiyFp {
         if (l & (uint64_t(1) << 63)) // rounding
             h++;
         return DiyFp(h, e + rhs.e + 64);
-#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
         __extension__ typedef unsigned __int128 uint128;
         uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
         uint64_t h = static_cast<uint64_t>(p >> 64);
@@ -100,22 +104,8 @@ struct DiyFp {
     }
 
     DiyFp Normalize() const {
-        RAPIDJSON_ASSERT(f != 0); // https://stackoverflow.com/a/26809183/291737
-#if defined(_MSC_VER) && defined(_M_AMD64)
-        unsigned long index;
-        _BitScanReverse64(&index, f);
-        return DiyFp(f << (63 - index), e - (63 - index));
-#elif defined(__GNUC__) && __GNUC__ >= 4
-        int s = __builtin_clzll(f);
+        int s = static_cast<int>(clzll(f));
         return DiyFp(f << s, e - s);
-#else
-        DiyFp res = *this;
-        while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
-            res.f <<= 1;
-            res.e--;
-        }
-        return res;
-#endif
     }
 
     DiyFp NormalizeBoundary() const {
diff --git a/src/native/external/rapidjson/internal/dtoa.h b/src/native/external/rapidjson/internal/dtoa.h
index bf2e9b2e59a4..cd456721a71c 100644
--- a/src/native/external/rapidjson/internal/dtoa.h
+++ b/src/native/external/rapidjson/internal/dtoa.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
@@ -58,7 +58,11 @@ inline int CountDecimalDigit32(uint32_t n) {
 }
 
 inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) {
-    static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
+    static const uint64_t kPow10[] = { 1ULL, 10ULL, 100ULL, 1000ULL, 10000ULL, 100000ULL, 1000000ULL, 10000000ULL, 100000000ULL,
+                                       1000000000ULL, 10000000000ULL, 100000000000ULL, 1000000000000ULL,
+                                       10000000000000ULL, 100000000000000ULL, 1000000000000000ULL,
+                                       10000000000000000ULL, 100000000000000000ULL, 1000000000000000000ULL,
+                                       10000000000000000000ULL };
     const DiyFp one(uint64_t(1) << -Mp.e, Mp.e);
     const DiyFp wp_w = Mp - W;
     uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e);
@@ -86,7 +90,7 @@ inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buff
         uint64_t tmp = (static_cast<uint64_t>(p1) << -one.e) + p2;
         if (tmp <= delta) {
             *K += kappa;
-            GrisuRound(buffer, *len, delta, tmp, static_cast<uint64_t>(kPow10[kappa]) << -one.e, wp_w.f);
+            GrisuRound(buffer, *len, delta, tmp, kPow10[kappa] << -one.e, wp_w.f);
             return;
         }
     }
@@ -103,7 +107,7 @@ inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buff
         if (p2 < delta) {
             *K += kappa;
             int index = -kappa;
-            GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 9 ? kPow10[index] : 0));
+            GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 20 ? kPow10[index] : 0));
             return;
         }
     }
diff --git a/src/native/external/rapidjson/internal/ieee754.h b/src/native/external/rapidjson/internal/ieee754.h
index c2684ba2a35f..68c9e96649b8 100644
--- a/src/native/external/rapidjson/internal/ieee754.h
+++ b/src/native/external/rapidjson/internal/ieee754.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
diff --git a/src/native/external/rapidjson/internal/itoa.h b/src/native/external/rapidjson/internal/itoa.h
index 9b1c45cc1b4a..9fe8c932ffa6 100644
--- a/src/native/external/rapidjson/internal/itoa.h
+++ b/src/native/external/rapidjson/internal/itoa.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 //
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
diff --git a/src/native/external/rapidjson/internal/meta.h b/src/native/external/rapidjson/internal/meta.h
index d401edf85150..27092dc0d69c 100644
--- a/src/native/external/rapidjson/internal/meta.h
+++ b/src/native/external/rapidjson/internal/meta.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
diff --git a/src/native/external/rapidjson/internal/pow10.h b/src/native/external/rapidjson/internal/pow10.h
index 02f475d705fc..eae1a43ed1a0 100644
--- a/src/native/external/rapidjson/internal/pow10.h
+++ b/src/native/external/rapidjson/internal/pow10.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
diff --git a/src/native/external/rapidjson/internal/regex.h b/src/native/external/rapidjson/internal/regex.h
deleted file mode 100644
index 16e355921f88..000000000000
--- a/src/native/external/rapidjson/internal/regex.h
+++ /dev/null
@@ -1,740 +0,0 @@
-// Tencent is pleased to support the open source community by making RapidJSON available.
-// 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
-//
-// Licensed under the MIT License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// http://opensource.org/licenses/MIT
-//
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
-// specific language governing permissions and limitations under the License.
-
-#ifndef RAPIDJSON_INTERNAL_REGEX_H_
-#define RAPIDJSON_INTERNAL_REGEX_H_
-
-#include "../allocators.h"
-#include "../stream.h"
-#include "stack.h"
-
-#ifdef __clang__
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(padded)
-RAPIDJSON_DIAG_OFF(switch-enum)
-RAPIDJSON_DIAG_OFF(implicit-fallthrough)
-#elif defined(_MSC_VER)
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
-#endif
-
-#ifdef __GNUC__
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(effc++)
-#if __GNUC__ >= 7
-RAPIDJSON_DIAG_OFF(implicit-fallthrough)
-#endif
-#endif
-
-#ifndef RAPIDJSON_REGEX_VERBOSE
-#define RAPIDJSON_REGEX_VERBOSE 0
-#endif
-
-RAPIDJSON_NAMESPACE_BEGIN
-namespace internal {
-
-///////////////////////////////////////////////////////////////////////////////
-// DecodedStream
-
-template <typename SourceStream, typename Encoding>
-class DecodedStream {
-public:
-    DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
-    unsigned Peek() { return codepoint_; }
-    unsigned Take() {
-        unsigned c = codepoint_;
-        if (c) // No further decoding when '\0'
-            Decode();
-        return c;
-    }
-
-private:
-    void Decode() {
-        if (!Encoding::Decode(ss_, &codepoint_))
-            codepoint_ = 0;
-    }
-
-    SourceStream& ss_;
-    unsigned codepoint_;
-};
-
-///////////////////////////////////////////////////////////////////////////////
-// GenericRegex
-
-static const SizeType kRegexInvalidState = ~SizeType(0);  //!< Represents an invalid index in GenericRegex::State::out, out1
-static const SizeType kRegexInvalidRange = ~SizeType(0);
-
-template <typename Encoding, typename Allocator>
-class GenericRegexSearch;
-
-//! Regular expression engine with subset of ECMAscript grammar.
-/*!
-    Supported regular expression syntax:
-    - \c ab     Concatenation
-    - \c a|b    Alternation
-    - \c a?     Zero or one
-    - \c a*     Zero or more
-    - \c a+     One or more
-    - \c a{3}   Exactly 3 times
-    - \c a{3,}  At least 3 times
-    - \c a{3,5} 3 to 5 times
-    - \c (ab)   Grouping
-    - \c ^a     At the beginning
-    - \c a$     At the end
-    - \c .      Any character
-    - \c [abc]  Character classes
-    - \c [a-c]  Character class range
-    - \c [a-z0-9_] Character class combination
-    - \c [^abc] Negated character classes
-    - \c [^a-c] Negated character class range
-    - \c [\b]   Backspace (U+0008)
-    - \c \\| \\\\ ...  Escape characters
-    - \c \\f Form feed (U+000C)
-    - \c \\n Line feed (U+000A)
-    - \c \\r Carriage return (U+000D)
-    - \c \\t Tab (U+0009)
-    - \c \\v Vertical tab (U+000B)
-
-    \note This is a Thompson NFA engine, implemented with reference to 
-        Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).", 
-        https://swtch.com/~rsc/regexp/regexp1.html 
-*/
-template <typename Encoding, typename Allocator = CrtAllocator>
-class GenericRegex {
-public:
-    typedef Encoding EncodingType;
-    typedef typename Encoding::Ch Ch;
-    template <typename, typename> friend class GenericRegexSearch;
-
-    GenericRegex(const Ch* source, Allocator* allocator = 0) : 
-        ownAllocator_(allocator ? 0 : RAPIDJSON_NEW(Allocator)()), allocator_(allocator ? allocator : ownAllocator_), 
-        states_(allocator_, 256), ranges_(allocator_, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), 
-        anchorBegin_(), anchorEnd_()
-    {
-        GenericStringStream<Encoding> ss(source);
-        DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss);
-        Parse(ds);
-    }
-
-    ~GenericRegex()
-    {
-        RAPIDJSON_DELETE(ownAllocator_);
-    }
-
-    bool IsValid() const {
-        return root_ != kRegexInvalidState;
-    }
-
-private:
-    enum Operator {
-        kZeroOrOne,
-        kZeroOrMore,
-        kOneOrMore,
-        kConcatenation,
-        kAlternation,
-        kLeftParenthesis
-    };
-
-    static const unsigned kAnyCharacterClass = 0xFFFFFFFF;   //!< For '.'
-    static const unsigned kRangeCharacterClass = 0xFFFFFFFE;
-    static const unsigned kRangeNegationFlag = 0x80000000;
-
-    struct Range {
-        unsigned start; // 
-        unsigned end;
-        SizeType next;
-    };
-
-    struct State {
-        SizeType out;     //!< Equals to kInvalid for matching state
-        SizeType out1;    //!< Equals to non-kInvalid for split
-        SizeType rangeStart;
-        unsigned codepoint;
-    };
-
-    struct Frag {
-        Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {}
-        SizeType start;
-        SizeType out; //!< link-list of all output states
-        SizeType minIndex;
-    };
-
-    State& GetState(SizeType index) {
-        RAPIDJSON_ASSERT(index < stateCount_);
-        return states_.template Bottom<State>()[index];
-    }
-
-    const State& GetState(SizeType index) const {
-        RAPIDJSON_ASSERT(index < stateCount_);
-        return states_.template Bottom<State>()[index];
-    }
-
-    Range& GetRange(SizeType index) {
-        RAPIDJSON_ASSERT(index < rangeCount_);
-        return ranges_.template Bottom<Range>()[index];
-    }
-
-    const Range& GetRange(SizeType index) const {
-        RAPIDJSON_ASSERT(index < rangeCount_);
-        return ranges_.template Bottom<Range>()[index];
-    }
-
-    template <typename InputStream>
-    void Parse(DecodedStream<InputStream, Encoding>& ds) {
-        Stack<Allocator> operandStack(allocator_, 256);    // Frag
-        Stack<Allocator> operatorStack(allocator_, 256);   // Operator
-        Stack<Allocator> atomCountStack(allocator_, 256);  // unsigned (Atom per parenthesis)
-
-        *atomCountStack.template Push<unsigned>() = 0;
-
-        unsigned codepoint;
-        while (ds.Peek() != 0) {
-            switch (codepoint = ds.Take()) {
-                case '^':
-                    anchorBegin_ = true;
-                    break;
-
-                case '$':
-                    anchorEnd_ = true;
-                    break;
-
-                case '|':
-                    while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
-                        if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
-                            return;
-                    *operatorStack.template Push<Operator>() = kAlternation;
-                    *atomCountStack.template Top<unsigned>() = 0;
-                    break;
-
-                case '(':
-                    *operatorStack.template Push<Operator>() = kLeftParenthesis;
-                    *atomCountStack.template Push<unsigned>() = 0;
-                    break;
-
-                case ')':
-                    while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
-                        if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
-                            return;
-                    if (operatorStack.Empty())
-                        return;
-                    operatorStack.template Pop<Operator>(1);
-                    atomCountStack.template Pop<unsigned>(1);
-                    ImplicitConcatenation(atomCountStack, operatorStack);
-                    break;
-
-                case '?':
-                    if (!Eval(operandStack, kZeroOrOne))
-                        return;
-                    break;
-
-                case '*':
-                    if (!Eval(operandStack, kZeroOrMore))
-                        return;
-                    break;
-
-                case '+':
-                    if (!Eval(operandStack, kOneOrMore))
-                        return;
-                    break;
-
-                case '{':
-                    {
-                        unsigned n, m;
-                        if (!ParseUnsigned(ds, &n))
-                            return;
-
-                        if (ds.Peek() == ',') {
-                            ds.Take();
-                            if (ds.Peek() == '}')
-                                m = kInfinityQuantifier;
-                            else if (!ParseUnsigned(ds, &m) || m < n)
-                                return;
-                        }
-                        else
-                            m = n;
-
-                        if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
-                            return;
-                        ds.Take();
-                    }
-                    break;
-
-                case '.':
-                    PushOperand(operandStack, kAnyCharacterClass);
-                    ImplicitConcatenation(atomCountStack, operatorStack);
-                    break;
-
-                case '[':
-                    {
-                        SizeType range;
-                        if (!ParseRange(ds, &range))
-                            return;
-                        SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
-                        GetState(s).rangeStart = range;
-                        *operandStack.template Push<Frag>() = Frag(s, s, s);
-                    }
-                    ImplicitConcatenation(atomCountStack, operatorStack);
-                    break;
-
-                case '\\': // Escape character
-                    if (!CharacterEscape(ds, &codepoint))
-                        return; // Unsupported escape character
-                    // fall through to default
-
-                default: // Pattern character
-                    PushOperand(operandStack, codepoint);
-                    ImplicitConcatenation(atomCountStack, operatorStack);
-            }
-        }
-
-        while (!operatorStack.Empty())
-            if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
-                return;
-
-        // Link the operand to matching state.
-        if (operandStack.GetSize() == sizeof(Frag)) {
-            Frag* e = operandStack.template Pop<Frag>(1);
-            Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
-            root_ = e->start;
-
-#if RAPIDJSON_REGEX_VERBOSE
-            printf("root: %d\n", root_);
-            for (SizeType i = 0; i < stateCount_ ; i++) {
-                State& s = GetState(i);
-                printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
-            }
-            printf("\n");
-#endif
-        }
-    }
-
-    SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
-        State* s = states_.template Push<State>();
-        s->out = out;
-        s->out1 = out1;
-        s->codepoint = codepoint;
-        s->rangeStart = kRegexInvalidRange;
-        return stateCount_++;
-    }
-
-    void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
-        SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
-        *operandStack.template Push<Frag>() = Frag(s, s, s);
-    }
-
-    void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
-        if (*atomCountStack.template Top<unsigned>())
-            *operatorStack.template Push<Operator>() = kConcatenation;
-        (*atomCountStack.template Top<unsigned>())++;
-    }
-
-    SizeType Append(SizeType l1, SizeType l2) {
-        SizeType old = l1;
-        while (GetState(l1).out != kRegexInvalidState)
-            l1 = GetState(l1).out;
-        GetState(l1).out = l2;
-        return old;
-    }
-
-    void Patch(SizeType l, SizeType s) {
-        for (SizeType next; l != kRegexInvalidState; l = next) {
-            next = GetState(l).out;
-            GetState(l).out = s;
-        }
-    }
-
-    bool Eval(Stack<Allocator>& operandStack, Operator op) {
-        switch (op) {
-            case kConcatenation:
-                RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
-                {
-                    Frag e2 = *operandStack.template Pop<Frag>(1);
-                    Frag e1 = *operandStack.template Pop<Frag>(1);
-                    Patch(e1.out, e2.start);
-                    *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
-                }
-                return true;
-
-            case kAlternation:
-                if (operandStack.GetSize() >= sizeof(Frag) * 2) {
-                    Frag e2 = *operandStack.template Pop<Frag>(1);
-                    Frag e1 = *operandStack.template Pop<Frag>(1);
-                    SizeType s = NewState(e1.start, e2.start, 0);
-                    *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
-                    return true;
-                }
-                return false;
-
-            case kZeroOrOne:
-                if (operandStack.GetSize() >= sizeof(Frag)) {
-                    Frag e = *operandStack.template Pop<Frag>(1);
-                    SizeType s = NewState(kRegexInvalidState, e.start, 0);
-                    *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
-                    return true;
-                }
-                return false;
-
-            case kZeroOrMore:
-                if (operandStack.GetSize() >= sizeof(Frag)) {
-                    Frag e = *operandStack.template Pop<Frag>(1);
-                    SizeType s = NewState(kRegexInvalidState, e.start, 0);
-                    Patch(e.out, s);
-                    *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
-                    return true;
-                }
-                return false;
-
-            case kOneOrMore:
-                if (operandStack.GetSize() >= sizeof(Frag)) {
-                    Frag e = *operandStack.template Pop<Frag>(1);
-                    SizeType s = NewState(kRegexInvalidState, e.start, 0);
-                    Patch(e.out, s);
-                    *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
-                    return true;
-                }
-                return false;
-
-            default: 
-                // syntax error (e.g. unclosed kLeftParenthesis)
-                return false;
-        }
-    }
-
-    bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
-        RAPIDJSON_ASSERT(n <= m);
-        RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag));
-
-        if (n == 0) {
-            if (m == 0)                             // a{0} not support
-                return false;
-            else if (m == kInfinityQuantifier)
-                Eval(operandStack, kZeroOrMore);    // a{0,} -> a*
-            else {
-                Eval(operandStack, kZeroOrOne);         // a{0,5} -> a?
-                for (unsigned i = 0; i < m - 1; i++)
-                    CloneTopOperand(operandStack);      // a{0,5} -> a? a? a? a? a?
-                for (unsigned i = 0; i < m - 1; i++)
-                    Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
-            }
-            return true;
-        }
-
-        for (unsigned i = 0; i < n - 1; i++)        // a{3} -> a a a
-            CloneTopOperand(operandStack);
-
-        if (m == kInfinityQuantifier)
-            Eval(operandStack, kOneOrMore);         // a{3,} -> a a a+
-        else if (m > n) {
-            CloneTopOperand(operandStack);          // a{3,5} -> a a a a
-            Eval(operandStack, kZeroOrOne);         // a{3,5} -> a a a a?
-            for (unsigned i = n; i < m - 1; i++)
-                CloneTopOperand(operandStack);      // a{3,5} -> a a a a? a?
-            for (unsigned i = n; i < m; i++)
-                Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
-        }
-
-        for (unsigned i = 0; i < n - 1; i++)
-            Eval(operandStack, kConcatenation);     // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
-
-        return true;
-    }
-
-    static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
-
-    void CloneTopOperand(Stack<Allocator>& operandStack) {
-        const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
-        SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
-        State* s = states_.template Push<State>(count);
-        memcpy(s, &GetState(src.minIndex), count * sizeof(State));
-        for (SizeType j = 0; j < count; j++) {
-            if (s[j].out != kRegexInvalidState)
-                s[j].out += count;
-            if (s[j].out1 != kRegexInvalidState)
-                s[j].out1 += count;
-        }
-        *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
-        stateCount_ += count;
-    }
-
-    template <typename InputStream>
-    bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u) {
-        unsigned r = 0;
-        if (ds.Peek() < '0' || ds.Peek() > '9')
-            return false;
-        while (ds.Peek() >= '0' && ds.Peek() <= '9') {
-            if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
-                return false; // overflow
-            r = r * 10 + (ds.Take() - '0');
-        }
-        *u = r;
-        return true;
-    }
-
-    template <typename InputStream>
-    bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range) {
-        bool isBegin = true;
-        bool negate = false;
-        int step = 0;
-        SizeType start = kRegexInvalidRange;
-        SizeType current = kRegexInvalidRange;
-        unsigned codepoint;
-        while ((codepoint = ds.Take()) != 0) {
-            if (isBegin) {
-                isBegin = false;
-                if (codepoint == '^') {
-                    negate = true;
-                    continue;
-                }
-            }
-
-            switch (codepoint) {
-            case ']':
-                if (start == kRegexInvalidRange)
-                    return false;   // Error: nothing inside []
-                if (step == 2) { // Add trailing '-'
-                    SizeType r = NewRange('-');
-                    RAPIDJSON_ASSERT(current != kRegexInvalidRange);
-                    GetRange(current).next = r;
-                }
-                if (negate)
-                    GetRange(start).start |= kRangeNegationFlag;
-                *range = start;
-                return true;
-
-            case '\\':
-                if (ds.Peek() == 'b') {
-                    ds.Take();
-                    codepoint = 0x0008; // Escape backspace character
-                }
-                else if (!CharacterEscape(ds, &codepoint))
-                    return false;
-                // fall through to default
-
-            default:
-                switch (step) {
-                case 1:
-                    if (codepoint == '-') {
-                        step++;
-                        break;
-                    }
-                    // fall through to step 0 for other characters
-
-                case 0:
-                    {
-                        SizeType r = NewRange(codepoint);
-                        if (current != kRegexInvalidRange)
-                            GetRange(current).next = r;
-                        if (start == kRegexInvalidRange)
-                            start = r;
-                        current = r;
-                    }
-                    step = 1;
-                    break;
-
-                default:
-                    RAPIDJSON_ASSERT(step == 2);
-                    GetRange(current).end = codepoint;
-                    step = 0;
-                }
-            }
-        }
-        return false;
-    }
-    
-    SizeType NewRange(unsigned codepoint) {
-        Range* r = ranges_.template Push<Range>();
-        r->start = r->end = codepoint;
-        r->next = kRegexInvalidRange;
-        return rangeCount_++;
-    }
-
-    template <typename InputStream>
-    bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) {
-        unsigned codepoint;
-        switch (codepoint = ds.Take()) {
-            case '^':
-            case '$':
-            case '|':
-            case '(':
-            case ')':
-            case '?':
-            case '*':
-            case '+':
-            case '.':
-            case '[':
-            case ']':
-            case '{':
-            case '}':
-            case '\\':
-                *escapedCodepoint = codepoint; return true;
-            case 'f': *escapedCodepoint = 0x000C; return true;
-            case 'n': *escapedCodepoint = 0x000A; return true;
-            case 'r': *escapedCodepoint = 0x000D; return true;
-            case 't': *escapedCodepoint = 0x0009; return true;
-            case 'v': *escapedCodepoint = 0x000B; return true;
-            default:
-                return false; // Unsupported escape character
-        }
-    }
-
-    Allocator* ownAllocator_;
-    Allocator* allocator_;
-    Stack<Allocator> states_;
-    Stack<Allocator> ranges_;
-    SizeType root_;
-    SizeType stateCount_;
-    SizeType rangeCount_;
-
-    static const unsigned kInfinityQuantifier = ~0u;
-
-    // For SearchWithAnchoring()
-    bool anchorBegin_;
-    bool anchorEnd_;
-};
-
-template <typename RegexType, typename Allocator = CrtAllocator>
-class GenericRegexSearch {
-public:
-    typedef typename RegexType::EncodingType Encoding;
-    typedef typename Encoding::Ch Ch;
-
-    GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) : 
-        regex_(regex), allocator_(allocator), ownAllocator_(0),
-        state0_(allocator, 0), state1_(allocator, 0), stateSet_()
-    {
-        RAPIDJSON_ASSERT(regex_.IsValid());
-        if (!allocator_)
-            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
-        stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize()));
-        state0_.template Reserve<SizeType>(regex_.stateCount_);
-        state1_.template Reserve<SizeType>(regex_.stateCount_);
-    }
-
-    ~GenericRegexSearch() {
-        Allocator::Free(stateSet_);
-        RAPIDJSON_DELETE(ownAllocator_);
-    }
-
-    template <typename InputStream>
-    bool Match(InputStream& is) {
-        return SearchWithAnchoring(is, true, true);
-    }
-
-    bool Match(const Ch* s) {
-        GenericStringStream<Encoding> is(s);
-        return Match(is);
-    }
-
-    template <typename InputStream>
-    bool Search(InputStream& is) {
-        return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_);
-    }
-
-    bool Search(const Ch* s) {
-        GenericStringStream<Encoding> is(s);
-        return Search(is);
-    }
-
-private:
-    typedef typename RegexType::State State;
-    typedef typename RegexType::Range Range;
-
-    template <typename InputStream>
-    bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) {
-        DecodedStream<InputStream, Encoding> ds(is);
-
-        state0_.Clear();
-        Stack<Allocator> *current = &state0_, *next = &state1_;
-        const size_t stateSetSize = GetStateSetSize();
-        std::memset(stateSet_, 0, stateSetSize);
-
-        bool matched = AddState(*current, regex_.root_);
-        unsigned codepoint;
-        while (!current->Empty() && (codepoint = ds.Take()) != 0) {
-            std::memset(stateSet_, 0, stateSetSize);
-            next->Clear();
-            matched = false;
-            for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
-                const State& sr = regex_.GetState(*s);
-                if (sr.codepoint == codepoint ||
-                    sr.codepoint == RegexType::kAnyCharacterClass || 
-                    (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
-                {
-                    matched = AddState(*next, sr.out) || matched;
-                    if (!anchorEnd && matched)
-                        return true;
-                }
-                if (!anchorBegin)
-                    AddState(*next, regex_.root_);
-            }
-            internal::Swap(current, next);
-        }
-
-        return matched;
-    }
-
-    size_t GetStateSetSize() const {
-        return (regex_.stateCount_ + 31) / 32 * 4;
-    }
-
-    // Return whether the added states is a match state
-    bool AddState(Stack<Allocator>& l, SizeType index) {
-        RAPIDJSON_ASSERT(index != kRegexInvalidState);
-
-        const State& s = regex_.GetState(index);
-        if (s.out1 != kRegexInvalidState) { // Split
-            bool matched = AddState(l, s.out);
-            return AddState(l, s.out1) || matched;
-        }
-        else if (!(stateSet_[index >> 5] & (1u << (index & 31)))) {
-            stateSet_[index >> 5] |= (1u << (index & 31));
-            *l.template PushUnsafe<SizeType>() = index;
-        }
-        return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
-    }
-
-    bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
-        bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0;
-        while (rangeIndex != kRegexInvalidRange) {
-            const Range& r = regex_.GetRange(rangeIndex);
-            if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
-                return yes;
-            rangeIndex = r.next;
-        }
-        return !yes;
-    }
-
-    const RegexType& regex_;
-    Allocator* allocator_;
-    Allocator* ownAllocator_;
-    Stack<Allocator> state0_;
-    Stack<Allocator> state1_;
-    uint32_t* stateSet_;
-};
-
-typedef GenericRegex<UTF8<> > Regex;
-typedef GenericRegexSearch<Regex> RegexSearch;
-
-} // namespace internal
-RAPIDJSON_NAMESPACE_END
-
-#ifdef __GNUC__
-RAPIDJSON_DIAG_POP
-#endif
-
-#if defined(__clang__) || defined(_MSC_VER)
-RAPIDJSON_DIAG_POP
-#endif
-
-#endif // RAPIDJSON_INTERNAL_REGEX_H_
diff --git a/src/native/external/rapidjson/internal/stack.h b/src/native/external/rapidjson/internal/stack.h
index 45dca6a8b09e..73abd706e976 100644
--- a/src/native/external/rapidjson/internal/stack.h
+++ b/src/native/external/rapidjson/internal/stack.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
diff --git a/src/native/external/rapidjson/internal/strfunc.h b/src/native/external/rapidjson/internal/strfunc.h
index 226439a76736..b698a8f43fa6 100644
--- a/src/native/external/rapidjson/internal/strfunc.h
+++ b/src/native/external/rapidjson/internal/strfunc.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
@@ -45,6 +45,20 @@ inline SizeType StrLen(const wchar_t* s) {
     return SizeType(std::wcslen(s));
 }
 
+//! Custom strcmpn() which works on different character types.
+/*! \tparam Ch Character type (e.g. char, wchar_t, short)
+    \param s1 Null-terminated input string.
+    \param s2 Null-terminated input string.
+    \return 0 if equal
+*/
+template<typename Ch>
+inline int StrCmp(const Ch* s1, const Ch* s2) {
+    RAPIDJSON_ASSERT(s1 != 0);
+    RAPIDJSON_ASSERT(s2 != 0);
+    while(*s1 && (*s1 == *s2)) { s1++; s2++; }
+    return static_cast<unsigned>(*s1) < static_cast<unsigned>(*s2) ? -1 : static_cast<unsigned>(*s1) > static_cast<unsigned>(*s2);
+}
+
 //! Returns number of code points in a encoded string.
 template<typename Encoding>
 bool CountStringCodePoint(const typename Encoding::Ch* s, SizeType length, SizeType* outCount) {
diff --git a/src/native/external/rapidjson/internal/strtod.h b/src/native/external/rapidjson/internal/strtod.h
index dfca22b65ac0..55f0e380bfaa 100644
--- a/src/native/external/rapidjson/internal/strtod.h
+++ b/src/native/external/rapidjson/internal/strtod.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
@@ -128,17 +128,18 @@ inline bool StrtodFast(double d, int p, double* result) {
 }
 
 // Compute an approximation and see if it is within 1/2 ULP
-inline bool StrtodDiyFp(const char* decimals, int dLen, int dExp, double* result) {
+template<typename Ch>
+inline bool StrtodDiyFp(const Ch* decimals, int dLen, int dExp, double* result) {
     uint64_t significand = 0;
     int i = 0;   // 2^64 - 1 = 18446744073709551615, 1844674407370955161 = 0x1999999999999999    
     for (; i < dLen; i++) {
         if (significand  >  RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) ||
-            (significand == RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) && decimals[i] > '5'))
+            (significand == RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) && decimals[i] > Ch('5')))
             break;
-        significand = significand * 10u + static_cast<unsigned>(decimals[i] - '0');
+        significand = significand * 10u + static_cast<unsigned>(decimals[i] - Ch('0'));
     }
     
-    if (i < dLen && decimals[i] >= '5') // Rounding
+    if (i < dLen && decimals[i] >= Ch('5')) // Rounding
         significand++;
 
     int remaining = dLen - i;
@@ -205,7 +206,8 @@ inline bool StrtodDiyFp(const char* decimals, int dLen, int dExp, double* result
     return halfWay - static_cast<unsigned>(error) >= precisionBits || precisionBits >= halfWay + static_cast<unsigned>(error);
 }
 
-inline double StrtodBigInteger(double approx, const char* decimals, int dLen, int dExp) {
+template<typename Ch>
+inline double StrtodBigInteger(double approx, const Ch* decimals, int dLen, int dExp) {
     RAPIDJSON_ASSERT(dLen >= 0);
     const BigInteger dInt(decimals, static_cast<unsigned>(dLen));
     Double a(approx);
@@ -223,7 +225,8 @@ inline double StrtodBigInteger(double approx, const char* decimals, int dLen, in
         return a.NextPositiveDouble();
 }
 
-inline double StrtodFullPrecision(double d, int p, const char* decimals, size_t length, size_t decimalPosition, int exp) {
+template<typename Ch>
+inline double StrtodFullPrecision(double d, int p, const Ch* decimals, size_t length, size_t decimalPosition, int exp) {
     RAPIDJSON_ASSERT(d >= 0.0);
     RAPIDJSON_ASSERT(length >= 1);
 
diff --git a/src/native/external/rapidjson/internal/swap.h b/src/native/external/rapidjson/internal/swap.h
index 666e49f97b68..2cf92f93a1d3 100644
--- a/src/native/external/rapidjson/internal/swap.h
+++ b/src/native/external/rapidjson/internal/swap.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 //
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
diff --git a/src/native/external/rapidjson/istreamwrapper.h b/src/native/external/rapidjson/istreamwrapper.h
index c4950b9dcf82..01437ec0127a 100644
--- a/src/native/external/rapidjson/istreamwrapper.h
+++ b/src/native/external/rapidjson/istreamwrapper.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
diff --git a/src/native/external/rapidjson/memorybuffer.h b/src/native/external/rapidjson/memorybuffer.h
deleted file mode 100644
index 39bee1dec1c0..000000000000
--- a/src/native/external/rapidjson/memorybuffer.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Tencent is pleased to support the open source community by making RapidJSON available.
-// 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
-//
-// Licensed under the MIT License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// http://opensource.org/licenses/MIT
-//
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
-// specific language governing permissions and limitations under the License.
-
-#ifndef RAPIDJSON_MEMORYBUFFER_H_
-#define RAPIDJSON_MEMORYBUFFER_H_
-
-#include "stream.h"
-#include "internal/stack.h"
-
-RAPIDJSON_NAMESPACE_BEGIN
-
-//! Represents an in-memory output byte stream.
-/*!
-    This class is mainly for being wrapped by EncodedOutputStream or AutoUTFOutputStream.
-
-    It is similar to FileWriteBuffer but the destination is an in-memory buffer instead of a file.
-
-    Differences between MemoryBuffer and StringBuffer:
-    1. StringBuffer has Encoding but MemoryBuffer is only a byte buffer. 
-    2. StringBuffer::GetString() returns a null-terminated string. MemoryBuffer::GetBuffer() returns a buffer without terminator.
-
-    \tparam Allocator type for allocating memory buffer.
-    \note implements Stream concept
-*/
-template <typename Allocator = CrtAllocator>
-struct GenericMemoryBuffer {
-    typedef char Ch; // byte
-
-    GenericMemoryBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {}
-
-    void Put(Ch c) { *stack_.template Push<Ch>() = c; }
-    void Flush() {}
-
-    void Clear() { stack_.Clear(); }
-    void ShrinkToFit() { stack_.ShrinkToFit(); }
-    Ch* Push(size_t count) { return stack_.template Push<Ch>(count); }
-    void Pop(size_t count) { stack_.template Pop<Ch>(count); }
-
-    const Ch* GetBuffer() const {
-        return stack_.template Bottom<Ch>();
-    }
-
-    size_t GetSize() const { return stack_.GetSize(); }
-
-    static const size_t kDefaultCapacity = 256;
-    mutable internal::Stack<Allocator> stack_;
-};
-
-typedef GenericMemoryBuffer<> MemoryBuffer;
-
-//! Implement specialized version of PutN() with memset() for better performance.
-template<>
-inline void PutN(MemoryBuffer& memoryBuffer, char c, size_t n) {
-    std::memset(memoryBuffer.stack_.Push<char>(n), c, n * sizeof(c));
-}
-
-RAPIDJSON_NAMESPACE_END
-
-#endif // RAPIDJSON_MEMORYBUFFER_H_
diff --git a/src/native/external/rapidjson/memorystream.h b/src/native/external/rapidjson/memorystream.h
index 1d71d8a4f0e0..77af6c999e97 100644
--- a/src/native/external/rapidjson/memorystream.h
+++ b/src/native/external/rapidjson/memorystream.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
diff --git a/src/native/external/rapidjson/msinttypes/inttypes.h b/src/native/external/rapidjson/msinttypes/inttypes.h
deleted file mode 100644
index 18111286bf55..000000000000
--- a/src/native/external/rapidjson/msinttypes/inttypes.h
+++ /dev/null
@@ -1,316 +0,0 @@
-// ISO C9x  compliant inttypes.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
-// 
-//  Copyright (c) 2006-2013 Alexander Chemeris
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// 
-//   1. Redistributions of source code must retain the above copyright notice,
-//      this list of conditions and the following disclaimer.
-// 
-//   2. Redistributions in binary form must reproduce the above copyright
-//      notice, this list of conditions and the following disclaimer in the
-//      documentation and/or other materials provided with the distribution.
-// 
-//   3. Neither the name of the product nor the names of its contributors may
-//      be used to endorse or promote products derived from this software
-//      without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
-// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
-// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// 
-///////////////////////////////////////////////////////////////////////////////
-
-// The above software in this distribution may have been modified by 
-// THL A29 Limited ("Tencent Modifications"). 
-// All Tencent Modifications are Copyright (C) 2015 THL A29 Limited.
-
-#ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
-#endif // _MSC_VER ]
-
-#ifndef _MSC_INTTYPES_H_ // [
-#define _MSC_INTTYPES_H_
-
-#if _MSC_VER > 1000
-#pragma once
-#endif
-
-#include "stdint.h"
-
-// miloyip: VC supports inttypes.h since VC2013
-#if _MSC_VER >= 1800
-#include <inttypes.h>
-#else
-
-// 7.8 Format conversion of integer types
-
-typedef struct {
-   intmax_t quot;
-   intmax_t rem;
-} imaxdiv_t;
-
-// 7.8.1 Macros for format specifiers
-
-#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [   See footnote 185 at page 198
-
-// The fprintf macros for signed integers are:
-#define PRId8       "d"
-#define PRIi8       "i"
-#define PRIdLEAST8  "d"
-#define PRIiLEAST8  "i"
-#define PRIdFAST8   "d"
-#define PRIiFAST8   "i"
-
-#define PRId16       "hd"
-#define PRIi16       "hi"
-#define PRIdLEAST16  "hd"
-#define PRIiLEAST16  "hi"
-#define PRIdFAST16   "hd"
-#define PRIiFAST16   "hi"
-
-#define PRId32       "I32d"
-#define PRIi32       "I32i"
-#define PRIdLEAST32  "I32d"
-#define PRIiLEAST32  "I32i"
-#define PRIdFAST32   "I32d"
-#define PRIiFAST32   "I32i"
-
-#define PRId64       "I64d"
-#define PRIi64       "I64i"
-#define PRIdLEAST64  "I64d"
-#define PRIiLEAST64  "I64i"
-#define PRIdFAST64   "I64d"
-#define PRIiFAST64   "I64i"
-
-#define PRIdMAX     "I64d"
-#define PRIiMAX     "I64i"
-
-#define PRIdPTR     "Id"
-#define PRIiPTR     "Ii"
-
-// The fprintf macros for unsigned integers are:
-#define PRIo8       "o"
-#define PRIu8       "u"
-#define PRIx8       "x"
-#define PRIX8       "X"
-#define PRIoLEAST8  "o"
-#define PRIuLEAST8  "u"
-#define PRIxLEAST8  "x"
-#define PRIXLEAST8  "X"
-#define PRIoFAST8   "o"
-#define PRIuFAST8   "u"
-#define PRIxFAST8   "x"
-#define PRIXFAST8   "X"
-
-#define PRIo16       "ho"
-#define PRIu16       "hu"
-#define PRIx16       "hx"
-#define PRIX16       "hX"
-#define PRIoLEAST16  "ho"
-#define PRIuLEAST16  "hu"
-#define PRIxLEAST16  "hx"
-#define PRIXLEAST16  "hX"
-#define PRIoFAST16   "ho"
-#define PRIuFAST16   "hu"
-#define PRIxFAST16   "hx"
-#define PRIXFAST16   "hX"
-
-#define PRIo32       "I32o"
-#define PRIu32       "I32u"
-#define PRIx32       "I32x"
-#define PRIX32       "I32X"
-#define PRIoLEAST32  "I32o"
-#define PRIuLEAST32  "I32u"
-#define PRIxLEAST32  "I32x"
-#define PRIXLEAST32  "I32X"
-#define PRIoFAST32   "I32o"
-#define PRIuFAST32   "I32u"
-#define PRIxFAST32   "I32x"
-#define PRIXFAST32   "I32X"
-
-#define PRIo64       "I64o"
-#define PRIu64       "I64u"
-#define PRIx64       "I64x"
-#define PRIX64       "I64X"
-#define PRIoLEAST64  "I64o"
-#define PRIuLEAST64  "I64u"
-#define PRIxLEAST64  "I64x"
-#define PRIXLEAST64  "I64X"
-#define PRIoFAST64   "I64o"
-#define PRIuFAST64   "I64u"
-#define PRIxFAST64   "I64x"
-#define PRIXFAST64   "I64X"
-
-#define PRIoMAX     "I64o"
-#define PRIuMAX     "I64u"
-#define PRIxMAX     "I64x"
-#define PRIXMAX     "I64X"
-
-#define PRIoPTR     "Io"
-#define PRIuPTR     "Iu"
-#define PRIxPTR     "Ix"
-#define PRIXPTR     "IX"
-
-// The fscanf macros for signed integers are:
-#define SCNd8       "d"
-#define SCNi8       "i"
-#define SCNdLEAST8  "d"
-#define SCNiLEAST8  "i"
-#define SCNdFAST8   "d"
-#define SCNiFAST8   "i"
-
-#define SCNd16       "hd"
-#define SCNi16       "hi"
-#define SCNdLEAST16  "hd"
-#define SCNiLEAST16  "hi"
-#define SCNdFAST16   "hd"
-#define SCNiFAST16   "hi"
-
-#define SCNd32       "ld"
-#define SCNi32       "li"
-#define SCNdLEAST32  "ld"
-#define SCNiLEAST32  "li"
-#define SCNdFAST32   "ld"
-#define SCNiFAST32   "li"
-
-#define SCNd64       "I64d"
-#define SCNi64       "I64i"
-#define SCNdLEAST64  "I64d"
-#define SCNiLEAST64  "I64i"
-#define SCNdFAST64   "I64d"
-#define SCNiFAST64   "I64i"
-
-#define SCNdMAX     "I64d"
-#define SCNiMAX     "I64i"
-
-#ifdef _WIN64 // [
-#  define SCNdPTR     "I64d"
-#  define SCNiPTR     "I64i"
-#else  // _WIN64 ][
-#  define SCNdPTR     "ld"
-#  define SCNiPTR     "li"
-#endif  // _WIN64 ]
-
-// The fscanf macros for unsigned integers are:
-#define SCNo8       "o"
-#define SCNu8       "u"
-#define SCNx8       "x"
-#define SCNX8       "X"
-#define SCNoLEAST8  "o"
-#define SCNuLEAST8  "u"
-#define SCNxLEAST8  "x"
-#define SCNXLEAST8  "X"
-#define SCNoFAST8   "o"
-#define SCNuFAST8   "u"
-#define SCNxFAST8   "x"
-#define SCNXFAST8   "X"
-
-#define SCNo16       "ho"
-#define SCNu16       "hu"
-#define SCNx16       "hx"
-#define SCNX16       "hX"
-#define SCNoLEAST16  "ho"
-#define SCNuLEAST16  "hu"
-#define SCNxLEAST16  "hx"
-#define SCNXLEAST16  "hX"
-#define SCNoFAST16   "ho"
-#define SCNuFAST16   "hu"
-#define SCNxFAST16   "hx"
-#define SCNXFAST16   "hX"
-
-#define SCNo32       "lo"
-#define SCNu32       "lu"
-#define SCNx32       "lx"
-#define SCNX32       "lX"
-#define SCNoLEAST32  "lo"
-#define SCNuLEAST32  "lu"
-#define SCNxLEAST32  "lx"
-#define SCNXLEAST32  "lX"
-#define SCNoFAST32   "lo"
-#define SCNuFAST32   "lu"
-#define SCNxFAST32   "lx"
-#define SCNXFAST32   "lX"
-
-#define SCNo64       "I64o"
-#define SCNu64       "I64u"
-#define SCNx64       "I64x"
-#define SCNX64       "I64X"
-#define SCNoLEAST64  "I64o"
-#define SCNuLEAST64  "I64u"
-#define SCNxLEAST64  "I64x"
-#define SCNXLEAST64  "I64X"
-#define SCNoFAST64   "I64o"
-#define SCNuFAST64   "I64u"
-#define SCNxFAST64   "I64x"
-#define SCNXFAST64   "I64X"
-
-#define SCNoMAX     "I64o"
-#define SCNuMAX     "I64u"
-#define SCNxMAX     "I64x"
-#define SCNXMAX     "I64X"
-
-#ifdef _WIN64 // [
-#  define SCNoPTR     "I64o"
-#  define SCNuPTR     "I64u"
-#  define SCNxPTR     "I64x"
-#  define SCNXPTR     "I64X"
-#else  // _WIN64 ][
-#  define SCNoPTR     "lo"
-#  define SCNuPTR     "lu"
-#  define SCNxPTR     "lx"
-#  define SCNXPTR     "lX"
-#endif  // _WIN64 ]
-
-#endif // __STDC_FORMAT_MACROS ]
-
-// 7.8.2 Functions for greatest-width integer types
-
-// 7.8.2.1 The imaxabs function
-#define imaxabs _abs64
-
-// 7.8.2.2 The imaxdiv function
-
-// This is modified version of div() function from Microsoft's div.c found
-// in %MSVC.NET%\crt\src\div.c
-#ifdef STATIC_IMAXDIV // [
-static
-#else // STATIC_IMAXDIV ][
-_inline
-#endif // STATIC_IMAXDIV ]
-imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
-{
-   imaxdiv_t result;
-
-   result.quot = numer / denom;
-   result.rem = numer % denom;
-
-   if (numer < 0 && result.rem > 0) {
-      // did division wrong; must fix up
-      ++result.quot;
-      result.rem -= denom;
-   }
-
-   return result;
-}
-
-// 7.8.2.3 The strtoimax and strtoumax functions
-#define strtoimax _strtoi64
-#define strtoumax _strtoui64
-
-// 7.8.2.4 The wcstoimax and wcstoumax functions
-#define wcstoimax _wcstoi64
-#define wcstoumax _wcstoui64
-
-#endif // _MSC_VER >= 1800
-
-#endif // _MSC_INTTYPES_H_ ]
diff --git a/src/native/external/rapidjson/msinttypes/stdint.h b/src/native/external/rapidjson/msinttypes/stdint.h
deleted file mode 100644
index 3d4477b9a024..000000000000
--- a/src/native/external/rapidjson/msinttypes/stdint.h
+++ /dev/null
@@ -1,300 +0,0 @@
-// ISO C9x  compliant stdint.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
-// 
-//  Copyright (c) 2006-2013 Alexander Chemeris
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// 
-//   1. Redistributions of source code must retain the above copyright notice,
-//      this list of conditions and the following disclaimer.
-// 
-//   2. Redistributions in binary form must reproduce the above copyright
-//      notice, this list of conditions and the following disclaimer in the
-//      documentation and/or other materials provided with the distribution.
-// 
-//   3. Neither the name of the product nor the names of its contributors may
-//      be used to endorse or promote products derived from this software
-//      without specific prior written permission.
-// 
-// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
-// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
-// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// 
-///////////////////////////////////////////////////////////////////////////////
-
-// The above software in this distribution may have been modified by 
-// THL A29 Limited ("Tencent Modifications"). 
-// All Tencent Modifications are Copyright (C) 2015 THL A29 Limited.
-
-#ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
-#endif // _MSC_VER ]
-
-#ifndef _MSC_STDINT_H_ // [
-#define _MSC_STDINT_H_
-
-#if _MSC_VER > 1000
-#pragma once
-#endif
-
-// miloyip: Originally Visual Studio 2010 uses its own stdint.h. However it generates warning with INT64_C(), so change to use this file for vs2010.
-#if _MSC_VER >= 1600 // [
-#include <stdint.h>
-
-#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
-
-#undef INT8_C
-#undef INT16_C
-#undef INT32_C
-#undef INT64_C
-#undef UINT8_C
-#undef UINT16_C
-#undef UINT32_C
-#undef UINT64_C
-
-// 7.18.4.1 Macros for minimum-width integer constants
-
-#define INT8_C(val)  val##i8
-#define INT16_C(val) val##i16
-#define INT32_C(val) val##i32
-#define INT64_C(val) val##i64
-
-#define UINT8_C(val)  val##ui8
-#define UINT16_C(val) val##ui16
-#define UINT32_C(val) val##ui32
-#define UINT64_C(val) val##ui64
-
-// 7.18.4.2 Macros for greatest-width integer constants
-// These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>.
-// Check out Issue 9 for the details.
-#ifndef INTMAX_C //   [
-#  define INTMAX_C   INT64_C
-#endif // INTMAX_C    ]
-#ifndef UINTMAX_C //  [
-#  define UINTMAX_C  UINT64_C
-#endif // UINTMAX_C   ]
-
-#endif // __STDC_CONSTANT_MACROS ]
-
-#else // ] _MSC_VER >= 1700 [
-
-#include <limits.h>
-
-// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
-// compiling for ARM we have to wrap <wchar.h> include with 'extern "C++" {}'
-// or compiler would give many errors like this:
-//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
-#if defined(__cplusplus) && !defined(_M_ARM)
-extern "C" {
-#endif
-#  include <wchar.h>
-#if defined(__cplusplus) && !defined(_M_ARM)
-}
-#endif
-
-// Define _W64 macros to mark types changing their size, like intptr_t.
-#ifndef _W64
-#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
-#     define _W64 __w64
-#  else
-#     define _W64
-#  endif
-#endif
-
-
-// 7.18.1 Integer types
-
-// 7.18.1.1 Exact-width integer types
-
-// Visual Studio 6 and Embedded Visual C++ 4 doesn't
-// realize that, e.g. char has the same size as __int8
-// so we give up on __intX for them.
-#if (_MSC_VER < 1300)
-   typedef signed char       int8_t;
-   typedef signed short      int16_t;
-   typedef signed int        int32_t;
-   typedef unsigned char     uint8_t;
-   typedef unsigned short    uint16_t;
-   typedef unsigned int      uint32_t;
-#else
-   typedef signed __int8     int8_t;
-   typedef signed __int16    int16_t;
-   typedef signed __int32    int32_t;
-   typedef unsigned __int8   uint8_t;
-   typedef unsigned __int16  uint16_t;
-   typedef unsigned __int32  uint32_t;
-#endif
-typedef signed __int64       int64_t;
-typedef unsigned __int64     uint64_t;
-
-
-// 7.18.1.2 Minimum-width integer types
-typedef int8_t    int_least8_t;
-typedef int16_t   int_least16_t;
-typedef int32_t   int_least32_t;
-typedef int64_t   int_least64_t;
-typedef uint8_t   uint_least8_t;
-typedef uint16_t  uint_least16_t;
-typedef uint32_t  uint_least32_t;
-typedef uint64_t  uint_least64_t;
-
-// 7.18.1.3 Fastest minimum-width integer types
-typedef int8_t    int_fast8_t;
-typedef int16_t   int_fast16_t;
-typedef int32_t   int_fast32_t;
-typedef int64_t   int_fast64_t;
-typedef uint8_t   uint_fast8_t;
-typedef uint16_t  uint_fast16_t;
-typedef uint32_t  uint_fast32_t;
-typedef uint64_t  uint_fast64_t;
-
-// 7.18.1.4 Integer types capable of holding object pointers
-#ifdef _WIN64 // [
-   typedef signed __int64    intptr_t;
-   typedef unsigned __int64  uintptr_t;
-#else // _WIN64 ][
-   typedef _W64 signed int   intptr_t;
-   typedef _W64 unsigned int uintptr_t;
-#endif // _WIN64 ]
-
-// 7.18.1.5 Greatest-width integer types
-typedef int64_t   intmax_t;
-typedef uint64_t  uintmax_t;
-
-
-// 7.18.2 Limits of specified-width integer types
-
-#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
-
-// 7.18.2.1 Limits of exact-width integer types
-#define INT8_MIN     ((int8_t)_I8_MIN)
-#define INT8_MAX     _I8_MAX
-#define INT16_MIN    ((int16_t)_I16_MIN)
-#define INT16_MAX    _I16_MAX
-#define INT32_MIN    ((int32_t)_I32_MIN)
-#define INT32_MAX    _I32_MAX
-#define INT64_MIN    ((int64_t)_I64_MIN)
-#define INT64_MAX    _I64_MAX
-#define UINT8_MAX    _UI8_MAX
-#define UINT16_MAX   _UI16_MAX
-#define UINT32_MAX   _UI32_MAX
-#define UINT64_MAX   _UI64_MAX
-
-// 7.18.2.2 Limits of minimum-width integer types
-#define INT_LEAST8_MIN    INT8_MIN
-#define INT_LEAST8_MAX    INT8_MAX
-#define INT_LEAST16_MIN   INT16_MIN
-#define INT_LEAST16_MAX   INT16_MAX
-#define INT_LEAST32_MIN   INT32_MIN
-#define INT_LEAST32_MAX   INT32_MAX
-#define INT_LEAST64_MIN   INT64_MIN
-#define INT_LEAST64_MAX   INT64_MAX
-#define UINT_LEAST8_MAX   UINT8_MAX
-#define UINT_LEAST16_MAX  UINT16_MAX
-#define UINT_LEAST32_MAX  UINT32_MAX
-#define UINT_LEAST64_MAX  UINT64_MAX
-
-// 7.18.2.3 Limits of fastest minimum-width integer types
-#define INT_FAST8_MIN    INT8_MIN
-#define INT_FAST8_MAX    INT8_MAX
-#define INT_FAST16_MIN   INT16_MIN
-#define INT_FAST16_MAX   INT16_MAX
-#define INT_FAST32_MIN   INT32_MIN
-#define INT_FAST32_MAX   INT32_MAX
-#define INT_FAST64_MIN   INT64_MIN
-#define INT_FAST64_MAX   INT64_MAX
-#define UINT_FAST8_MAX   UINT8_MAX
-#define UINT_FAST16_MAX  UINT16_MAX
-#define UINT_FAST32_MAX  UINT32_MAX
-#define UINT_FAST64_MAX  UINT64_MAX
-
-// 7.18.2.4 Limits of integer types capable of holding object pointers
-#ifdef _WIN64 // [
-#  define INTPTR_MIN   INT64_MIN
-#  define INTPTR_MAX   INT64_MAX
-#  define UINTPTR_MAX  UINT64_MAX
-#else // _WIN64 ][
-#  define INTPTR_MIN   INT32_MIN
-#  define INTPTR_MAX   INT32_MAX
-#  define UINTPTR_MAX  UINT32_MAX
-#endif // _WIN64 ]
-
-// 7.18.2.5 Limits of greatest-width integer types
-#define INTMAX_MIN   INT64_MIN
-#define INTMAX_MAX   INT64_MAX
-#define UINTMAX_MAX  UINT64_MAX
-
-// 7.18.3 Limits of other integer types
-
-#ifdef _WIN64 // [
-#  define PTRDIFF_MIN  _I64_MIN
-#  define PTRDIFF_MAX  _I64_MAX
-#else  // _WIN64 ][
-#  define PTRDIFF_MIN  _I32_MIN
-#  define PTRDIFF_MAX  _I32_MAX
-#endif  // _WIN64 ]
-
-#define SIG_ATOMIC_MIN  INT_MIN
-#define SIG_ATOMIC_MAX  INT_MAX
-
-#ifndef SIZE_MAX // [
-#  ifdef _WIN64 // [
-#     define SIZE_MAX  _UI64_MAX
-#  else // _WIN64 ][
-#     define SIZE_MAX  _UI32_MAX
-#  endif // _WIN64 ]
-#endif // SIZE_MAX ]
-
-// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
-#ifndef WCHAR_MIN // [
-#  define WCHAR_MIN  0
-#endif  // WCHAR_MIN ]
-#ifndef WCHAR_MAX // [
-#  define WCHAR_MAX  _UI16_MAX
-#endif  // WCHAR_MAX ]
-
-#define WINT_MIN  0
-#define WINT_MAX  _UI16_MAX
-
-#endif // __STDC_LIMIT_MACROS ]
-
-
-// 7.18.4 Limits of other integer types
-
-#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
-
-// 7.18.4.1 Macros for minimum-width integer constants
-
-#define INT8_C(val)  val##i8
-#define INT16_C(val) val##i16
-#define INT32_C(val) val##i32
-#define INT64_C(val) val##i64
-
-#define UINT8_C(val)  val##ui8
-#define UINT16_C(val) val##ui16
-#define UINT32_C(val) val##ui32
-#define UINT64_C(val) val##ui64
-
-// 7.18.4.2 Macros for greatest-width integer constants
-// These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>.
-// Check out Issue 9 for the details.
-#ifndef INTMAX_C //   [
-#  define INTMAX_C   INT64_C
-#endif // INTMAX_C    ]
-#ifndef UINTMAX_C //  [
-#  define UINTMAX_C  UINT64_C
-#endif // UINTMAX_C   ]
-
-#endif // __STDC_CONSTANT_MACROS ]
-
-#endif // _MSC_VER >= 1600 ]
-
-#endif // _MSC_STDINT_H_ ]
diff --git a/src/native/external/rapidjson/ostreamwrapper.h b/src/native/external/rapidjson/ostreamwrapper.h
deleted file mode 100644
index 6f4667c08ad7..000000000000
--- a/src/native/external/rapidjson/ostreamwrapper.h
+++ /dev/null
@@ -1,81 +0,0 @@
-// Tencent is pleased to support the open source community by making RapidJSON available.
-// 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
-//
-// Licensed under the MIT License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// http://opensource.org/licenses/MIT
-//
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
-// specific language governing permissions and limitations under the License.
-
-#ifndef RAPIDJSON_OSTREAMWRAPPER_H_
-#define RAPIDJSON_OSTREAMWRAPPER_H_
-
-#include "stream.h"
-#include <iosfwd>
-
-#ifdef __clang__
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(padded)
-#endif
-
-RAPIDJSON_NAMESPACE_BEGIN
-
-//! Wrapper of \c std::basic_ostream into RapidJSON's Stream concept.
-/*!
-    The classes can be wrapped including but not limited to:
-
-    - \c std::ostringstream
-    - \c std::stringstream
-    - \c std::wpstringstream
-    - \c std::wstringstream
-    - \c std::ifstream
-    - \c std::fstream
-    - \c std::wofstream
-    - \c std::wfstream
-
-    \tparam StreamType Class derived from \c std::basic_ostream.
-*/
-   
-template <typename StreamType>
-class BasicOStreamWrapper {
-public:
-    typedef typename StreamType::char_type Ch;
-    BasicOStreamWrapper(StreamType& stream) : stream_(stream) {}
-
-    void Put(Ch c) {
-        stream_.put(c);
-    }
-
-    void Flush() {
-        stream_.flush();
-    }
-
-    // Not implemented
-    char Peek() const { RAPIDJSON_ASSERT(false); return 0; }
-    char Take() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
-    char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
-    size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; }
-
-private:
-    BasicOStreamWrapper(const BasicOStreamWrapper&);
-    BasicOStreamWrapper& operator=(const BasicOStreamWrapper&);
-
-    StreamType& stream_;
-};
-
-typedef BasicOStreamWrapper<std::ostream> OStreamWrapper;
-typedef BasicOStreamWrapper<std::wostream> WOStreamWrapper;
-
-#ifdef __clang__
-RAPIDJSON_DIAG_POP
-#endif
-
-RAPIDJSON_NAMESPACE_END
-
-#endif // RAPIDJSON_OSTREAMWRAPPER_H_
diff --git a/src/native/external/rapidjson/pointer.h b/src/native/external/rapidjson/pointer.h
deleted file mode 100644
index 063abab9a170..000000000000
--- a/src/native/external/rapidjson/pointer.h
+++ /dev/null
@@ -1,1414 +0,0 @@
-// Tencent is pleased to support the open source community by making RapidJSON available.
-// 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
-//
-// Licensed under the MIT License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// http://opensource.org/licenses/MIT
-//
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
-// specific language governing permissions and limitations under the License.
-
-#ifndef RAPIDJSON_POINTER_H_
-#define RAPIDJSON_POINTER_H_
-
-#include "document.h"
-#include "internal/itoa.h"
-
-#ifdef __clang__
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(switch-enum)
-#elif defined(_MSC_VER)
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
-#endif
-
-RAPIDJSON_NAMESPACE_BEGIN
-
-static const SizeType kPointerInvalidIndex = ~SizeType(0);  //!< Represents an invalid index in GenericPointer::Token
-
-//! Error code of parsing.
-/*! \ingroup RAPIDJSON_ERRORS
-    \see GenericPointer::GenericPointer, GenericPointer::GetParseErrorCode
-*/
-enum PointerParseErrorCode {
-    kPointerParseErrorNone = 0,                     //!< The parse is successful
-
-    kPointerParseErrorTokenMustBeginWithSolidus,    //!< A token must begin with a '/'
-    kPointerParseErrorInvalidEscape,                //!< Invalid escape
-    kPointerParseErrorInvalidPercentEncoding,       //!< Invalid percent encoding in URI fragment
-    kPointerParseErrorCharacterMustPercentEncode    //!< A character must percent encoded in URI fragment
-};
-
-///////////////////////////////////////////////////////////////////////////////
-// GenericPointer
-
-//! Represents a JSON Pointer. Use Pointer for UTF8 encoding and default allocator.
-/*!
-    This class implements RFC 6901 "JavaScript Object Notation (JSON) Pointer" 
-    (https://tools.ietf.org/html/rfc6901).
-
-    A JSON pointer is for identifying a specific value in a JSON document
-    (GenericDocument). It can simplify coding of DOM tree manipulation, because it
-    can access multiple-level depth of DOM tree with single API call.
-
-    After it parses a string representation (e.g. "/foo/0" or URI fragment 
-    representation (e.g. "#/foo/0") into its internal representation (tokens),
-    it can be used to resolve a specific value in multiple documents, or sub-tree 
-    of documents.
-
-    Contrary to GenericValue, Pointer can be copy constructed and copy assigned.
-    Apart from assignment, a Pointer cannot be modified after construction.
-
-    Although Pointer is very convenient, please aware that constructing Pointer
-    involves parsing and dynamic memory allocation. A special constructor with user-
-    supplied tokens eliminates these.
-
-    GenericPointer depends on GenericDocument and GenericValue.
-    
-    \tparam ValueType The value type of the DOM tree. E.g. GenericValue<UTF8<> >
-    \tparam Allocator The allocator type for allocating memory for internal representation.
-    
-    \note GenericPointer uses same encoding of ValueType.
-    However, Allocator of GenericPointer is independent of Allocator of Value.
-*/
-template <typename ValueType, typename Allocator = CrtAllocator>
-class GenericPointer {
-public:
-    typedef typename ValueType::EncodingType EncodingType;  //!< Encoding type from Value
-    typedef typename ValueType::Ch Ch;                      //!< Character type from Value
-
-    //! A token is the basic units of internal representation.
-    /*!
-        A JSON pointer string representation "/foo/123" is parsed to two tokens: 
-        "foo" and 123. 123 will be represented in both numeric form and string form.
-        They are resolved according to the actual value type (object or array).
-
-        For token that are not numbers, or the numeric value is out of bound
-        (greater than limits of SizeType), they are only treated as string form
-        (i.e. the token's index will be equal to kPointerInvalidIndex).
-
-        This struct is public so that user can create a Pointer without parsing and 
-        allocation, using a special constructor.
-    */
-    struct Token {
-        const Ch* name;             //!< Name of the token. It has null character at the end but it can contain null character.
-        SizeType length;            //!< Length of the name.
-        SizeType index;             //!< A valid array index, if it is not equal to kPointerInvalidIndex.
-    };
-
-    //!@name Constructors and destructor.
-    //@{
-
-    //! Default constructor.
-    GenericPointer(Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {}
-
-    //! Constructor that parses a string or URI fragment representation.
-    /*!
-        \param source A null-terminated, string or URI fragment representation of JSON pointer.
-        \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one.
-    */
-    explicit GenericPointer(const Ch* source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
-        Parse(source, internal::StrLen(source));
-    }
-
-#if RAPIDJSON_HAS_STDSTRING
-    //! Constructor that parses a string or URI fragment representation.
-    /*!
-        \param source A string or URI fragment representation of JSON pointer.
-        \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one.
-        \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
-    */
-    explicit GenericPointer(const std::basic_string<Ch>& source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
-        Parse(source.c_str(), source.size());
-    }
-#endif
-
-    //! Constructor that parses a string or URI fragment representation, with length of the source string.
-    /*!
-        \param source A string or URI fragment representation of JSON pointer.
-        \param length Length of source.
-        \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one.
-        \note Slightly faster than the overload without length.
-    */
-    GenericPointer(const Ch* source, size_t length, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
-        Parse(source, length);
-    }
-
-    //! Constructor with user-supplied tokens.
-    /*!
-        This constructor let user supplies const array of tokens.
-        This prevents the parsing process and eliminates allocation.
-        This is preferred for memory constrained environments.
-
-        \param tokens An constant array of tokens representing the JSON pointer.
-        \param tokenCount Number of tokens.
-
-        \b Example
-        \code
-        #define NAME(s) { s, sizeof(s) / sizeof(s[0]) - 1, kPointerInvalidIndex }
-        #define INDEX(i) { #i, sizeof(#i) - 1, i }
-
-        static const Pointer::Token kTokens[] = { NAME("foo"), INDEX(123) };
-        static const Pointer p(kTokens, sizeof(kTokens) / sizeof(kTokens[0]));
-        // Equivalent to static const Pointer p("/foo/123");
-
-        #undef NAME
-        #undef INDEX
-        \endcode
-    */
-    GenericPointer(const Token* tokens, size_t tokenCount) : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(const_cast<Token*>(tokens)), tokenCount_(tokenCount), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {}
-
-    //! Copy constructor.
-    GenericPointer(const GenericPointer& rhs) : allocator_(rhs.allocator_), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
-        *this = rhs;
-    }
-
-    //! Copy constructor.
-    GenericPointer(const GenericPointer& rhs, Allocator* allocator) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
-        *this = rhs;
-    }
-
-    //! Destructor.
-    ~GenericPointer() {
-        if (nameBuffer_)    // If user-supplied tokens constructor is used, nameBuffer_ is nullptr and tokens_ are not deallocated.
-            Allocator::Free(tokens_);
-        RAPIDJSON_DELETE(ownAllocator_);
-    }
-
-    //! Assignment operator.
-    GenericPointer& operator=(const GenericPointer& rhs) {
-        if (this != &rhs) {
-            // Do not delete ownAllcator
-            if (nameBuffer_)
-                Allocator::Free(tokens_);
-
-            tokenCount_ = rhs.tokenCount_;
-            parseErrorOffset_ = rhs.parseErrorOffset_;
-            parseErrorCode_ = rhs.parseErrorCode_;
-
-            if (rhs.nameBuffer_)
-                CopyFromRaw(rhs); // Normally parsed tokens.
-            else {
-                tokens_ = rhs.tokens_; // User supplied const tokens.
-                nameBuffer_ = 0;
-            }
-        }
-        return *this;
-    }
-
-    //! Swap the content of this pointer with an other.
-    /*!
-        \param other The pointer to swap with.
-        \note Constant complexity.
-    */
-    GenericPointer& Swap(GenericPointer& other) RAPIDJSON_NOEXCEPT {
-        internal::Swap(allocator_, other.allocator_);
-        internal::Swap(ownAllocator_, other.ownAllocator_);
-        internal::Swap(nameBuffer_, other.nameBuffer_);
-        internal::Swap(tokens_, other.tokens_);
-        internal::Swap(tokenCount_, other.tokenCount_);
-        internal::Swap(parseErrorOffset_, other.parseErrorOffset_);
-        internal::Swap(parseErrorCode_, other.parseErrorCode_);
-        return *this;
-    }
-
-    //! free-standing swap function helper
-    /*!
-        Helper function to enable support for common swap implementation pattern based on \c std::swap:
-        \code
-        void swap(MyClass& a, MyClass& b) {
-            using std::swap;
-            swap(a.pointer, b.pointer);
-            // ...
-        }
-        \endcode
-        \see Swap()
-     */
-    friend inline void swap(GenericPointer& a, GenericPointer& b) RAPIDJSON_NOEXCEPT { a.Swap(b); }
-
-    //@}
-
-    //!@name Append token
-    //@{
-
-    //! Append a token and return a new Pointer
-    /*!
-        \param token Token to be appended.
-        \param allocator Allocator for the newly return Pointer.
-        \return A new Pointer with appended token.
-    */
-    GenericPointer Append(const Token& token, Allocator* allocator = 0) const {
-        GenericPointer r;
-        r.allocator_ = allocator;
-        Ch *p = r.CopyFromRaw(*this, 1, token.length + 1);
-        std::memcpy(p, token.name, (token.length + 1) * sizeof(Ch));
-        r.tokens_[tokenCount_].name = p;
-        r.tokens_[tokenCount_].length = token.length;
-        r.tokens_[tokenCount_].index = token.index;
-        return r;
-    }
-
-    //! Append a name token with length, and return a new Pointer
-    /*!
-        \param name Name to be appended.
-        \param length Length of name.
-        \param allocator Allocator for the newly return Pointer.
-        \return A new Pointer with appended token.
-    */
-    GenericPointer Append(const Ch* name, SizeType length, Allocator* allocator = 0) const {
-        Token token = { name, length, kPointerInvalidIndex };
-        return Append(token, allocator);
-    }
-
-    //! Append a name token without length, and return a new Pointer
-    /*!
-        \param name Name (const Ch*) to be appended.
-        \param allocator Allocator for the newly return Pointer.
-        \return A new Pointer with appended token.
-    */
-    template <typename T>
-    RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >), (GenericPointer))
-    Append(T* name, Allocator* allocator = 0) const {
-        return Append(name, internal::StrLen(name), allocator);
-    }
-
-#if RAPIDJSON_HAS_STDSTRING
-    //! Append a name token, and return a new Pointer
-    /*!
-        \param name Name to be appended.
-        \param allocator Allocator for the newly return Pointer.
-        \return A new Pointer with appended token.
-    */
-    GenericPointer Append(const std::basic_string<Ch>& name, Allocator* allocator = 0) const {
-        return Append(name.c_str(), static_cast<SizeType>(name.size()), allocator);
-    }
-#endif
-
-    //! Append a index token, and return a new Pointer
-    /*!
-        \param index Index to be appended.
-        \param allocator Allocator for the newly return Pointer.
-        \return A new Pointer with appended token.
-    */
-    GenericPointer Append(SizeType index, Allocator* allocator = 0) const {
-        char buffer[21];
-        char* end = sizeof(SizeType) == 4 ? internal::u32toa(index, buffer) : internal::u64toa(index, buffer);
-        SizeType length = static_cast<SizeType>(end - buffer);
-        buffer[length] = '\0';
-
-        if (sizeof(Ch) == 1) {
-            Token token = { reinterpret_cast<Ch*>(buffer), length, index };
-            return Append(token, allocator);
-        }
-        else {
-            Ch name[21];
-            for (size_t i = 0; i <= length; i++)
-                name[i] = static_cast<Ch>(buffer[i]);
-            Token token = { name, length, index };
-            return Append(token, allocator);
-        }
-    }
-
-    //! Append a token by value, and return a new Pointer
-    /*!
-        \param token token to be appended.
-        \param allocator Allocator for the newly return Pointer.
-        \return A new Pointer with appended token.
-    */
-    GenericPointer Append(const ValueType& token, Allocator* allocator = 0) const {
-        if (token.IsString())
-            return Append(token.GetString(), token.GetStringLength(), allocator);
-        else {
-            RAPIDJSON_ASSERT(token.IsUint64());
-            RAPIDJSON_ASSERT(token.GetUint64() <= SizeType(~0));
-            return Append(static_cast<SizeType>(token.GetUint64()), allocator);
-        }
-    }
-
-    //!@name Handling Parse Error
-    //@{
-
-    //! Check whether this is a valid pointer.
-    bool IsValid() const { return parseErrorCode_ == kPointerParseErrorNone; }
-
-    //! Get the parsing error offset in code unit.
-    size_t GetParseErrorOffset() const { return parseErrorOffset_; }
-
-    //! Get the parsing error code.
-    PointerParseErrorCode GetParseErrorCode() const { return parseErrorCode_; }
-
-    //@}
-
-    //! Get the allocator of this pointer.
-    Allocator& GetAllocator() { return *allocator_; }
-
-    //!@name Tokens
-    //@{
-
-    //! Get the token array (const version only).
-    const Token* GetTokens() const { return tokens_; }
-
-    //! Get the number of tokens.
-    size_t GetTokenCount() const { return tokenCount_; }
-
-    //@}
-
-    //!@name Equality/inequality operators
-    //@{
-
-    //! Equality operator.
-    /*!
-        \note When any pointers are invalid, always returns false.
-    */
-    bool operator==(const GenericPointer& rhs) const {
-        if (!IsValid() || !rhs.IsValid() || tokenCount_ != rhs.tokenCount_)
-            return false;
-
-        for (size_t i = 0; i < tokenCount_; i++) {
-            if (tokens_[i].index != rhs.tokens_[i].index ||
-                tokens_[i].length != rhs.tokens_[i].length || 
-                (tokens_[i].length != 0 && std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch)* tokens_[i].length) != 0))
-            {
-                return false;
-            }
-        }
-
-        return true;
-    }
-
-    //! Inequality operator.
-    /*!
-        \note When any pointers are invalid, always returns true.
-    */
-    bool operator!=(const GenericPointer& rhs) const { return !(*this == rhs); }
-
-    //! Less than operator.
-    /*!
-        \note Invalid pointers are always greater than valid ones.
-    */
-    bool operator<(const GenericPointer& rhs) const {
-        if (!IsValid())
-            return false;
-        if (!rhs.IsValid())
-            return true;
-
-        if (tokenCount_ != rhs.tokenCount_)
-            return tokenCount_ < rhs.tokenCount_;
-
-        for (size_t i = 0; i < tokenCount_; i++) {
-            if (tokens_[i].index != rhs.tokens_[i].index)
-                return tokens_[i].index < rhs.tokens_[i].index;
-
-            if (tokens_[i].length != rhs.tokens_[i].length)
-                return tokens_[i].length < rhs.tokens_[i].length;
-
-            if (int cmp = std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch) * tokens_[i].length))
-                return cmp < 0;
-        }
-
-        return false;
-    }
-
-    //@}
-
-    //!@name Stringify
-    //@{
-
-    //! Stringify the pointer into string representation.
-    /*!
-        \tparam OutputStream Type of output stream.
-        \param os The output stream.
-    */
-    template<typename OutputStream>
-    bool Stringify(OutputStream& os) const {
-        return Stringify<false, OutputStream>(os);
-    }
-
-    //! Stringify the pointer into URI fragment representation.
-    /*!
-        \tparam OutputStream Type of output stream.
-        \param os The output stream.
-    */
-    template<typename OutputStream>
-    bool StringifyUriFragment(OutputStream& os) const {
-        return Stringify<true, OutputStream>(os);
-    }
-
-    //@}
-
-    //!@name Create value
-    //@{
-
-    //! Create a value in a subtree.
-    /*!
-        If the value is not exist, it creates all parent values and a JSON Null value.
-        So it always succeed and return the newly created or existing value.
-
-        Remind that it may change types of parents according to tokens, so it 
-        potentially removes previously stored values. For example, if a document 
-        was an array, and "/foo" is used to create a value, then the document 
-        will be changed to an object, and all existing array elements are lost.
-
-        \param root Root value of a DOM subtree to be resolved. It can be any value other than document root.
-        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
-        \param alreadyExist If non-null, it stores whether the resolved value is already exist.
-        \return The resolved newly created (a JSON Null value), or already exists value.
-    */
-    ValueType& Create(ValueType& root, typename ValueType::AllocatorType& allocator, bool* alreadyExist = 0) const {
-        RAPIDJSON_ASSERT(IsValid());
-        ValueType* v = &root;
-        bool exist = true;
-        for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
-            if (v->IsArray() && t->name[0] == '-' && t->length == 1) {
-                v->PushBack(ValueType().Move(), allocator);
-                v = &((*v)[v->Size() - 1]);
-                exist = false;
-            }
-            else {
-                if (t->index == kPointerInvalidIndex) { // must be object name
-                    if (!v->IsObject())
-                        v->SetObject(); // Change to Object
-                }
-                else { // object name or array index
-                    if (!v->IsArray() && !v->IsObject())
-                        v->SetArray(); // Change to Array
-                }
-
-                if (v->IsArray()) {
-                    if (t->index >= v->Size()) {
-                        v->Reserve(t->index + 1, allocator);
-                        while (t->index >= v->Size())
-                            v->PushBack(ValueType().Move(), allocator);
-                        exist = false;
-                    }
-                    v = &((*v)[t->index]);
-                }
-                else {
-                    typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length));
-                    if (m == v->MemberEnd()) {
-                        v->AddMember(ValueType(t->name, t->length, allocator).Move(), ValueType().Move(), allocator);
-                        v = &(--v->MemberEnd())->value; // Assumes AddMember() appends at the end
-                        exist = false;
-                    }
-                    else
-                        v = &m->value;
-                }
-            }
-        }
-
-        if (alreadyExist)
-            *alreadyExist = exist;
-
-        return *v;
-    }
-
-    //! Creates a value in a document.
-    /*!
-        \param document A document to be resolved.
-        \param alreadyExist If non-null, it stores whether the resolved value is already exist.
-        \return The resolved newly created, or already exists value.
-    */
-    template <typename stackAllocator>
-    ValueType& Create(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, bool* alreadyExist = 0) const {
-        return Create(document, document.GetAllocator(), alreadyExist);
-    }
-
-    //@}
-
-    //!@name Query value
-    //@{
-
-    //! Query a value in a subtree.
-    /*!
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \param unresolvedTokenIndex If the pointer cannot resolve a token in the pointer, this parameter can obtain the index of unresolved token.
-        \return Pointer to the value if it can be resolved. Otherwise null.
-
-        \note
-        There are only 3 situations when a value cannot be resolved:
-        1. A value in the path is not an array nor object.
-        2. An object value does not contain the token.
-        3. A token is out of range of an array value.
-
-        Use unresolvedTokenIndex to retrieve the token index.
-    */
-    ValueType* Get(ValueType& root, size_t* unresolvedTokenIndex = 0) const {
-        RAPIDJSON_ASSERT(IsValid());
-        ValueType* v = &root;
-        for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
-            switch (v->GetType()) {
-            case kObjectType:
-                {
-                    typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length));
-                    if (m == v->MemberEnd())
-                        break;
-                    v = &m->value;
-                }
-                continue;
-            case kArrayType:
-                if (t->index == kPointerInvalidIndex || t->index >= v->Size())
-                    break;
-                v = &((*v)[t->index]);
-                continue;
-            default:
-                break;
-            }
-
-            // Error: unresolved token
-            if (unresolvedTokenIndex)
-                *unresolvedTokenIndex = static_cast<size_t>(t - tokens_);
-            return 0;
-        }
-        return v;
-    }
-
-    //! Query a const value in a const subtree.
-    /*!
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \return Pointer to the value if it can be resolved. Otherwise null.
-    */
-    const ValueType* Get(const ValueType& root, size_t* unresolvedTokenIndex = 0) const { 
-        return Get(const_cast<ValueType&>(root), unresolvedTokenIndex);
-    }
-
-    //@}
-
-    //!@name Query a value with default
-    //@{
-
-    //! Query a value in a subtree with default value.
-    /*!
-        Similar to Get(), but if the specified value do not exists, it creates all parents and clone the default value.
-        So that this function always succeed.
-
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \param defaultValue Default value to be cloned if the value was not exists.
-        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
-        \see Create()
-    */
-    ValueType& GetWithDefault(ValueType& root, const ValueType& defaultValue, typename ValueType::AllocatorType& allocator) const {
-        bool alreadyExist;
-        ValueType& v = Create(root, allocator, &alreadyExist);
-        return alreadyExist ? v : v.CopyFrom(defaultValue, allocator);
-    }
-
-    //! Query a value in a subtree with default null-terminated string.
-    ValueType& GetWithDefault(ValueType& root, const Ch* defaultValue, typename ValueType::AllocatorType& allocator) const {
-        bool alreadyExist;
-        ValueType& v = Create(root, allocator, &alreadyExist);
-        return alreadyExist ? v : v.SetString(defaultValue, allocator);
-    }
-
-#if RAPIDJSON_HAS_STDSTRING
-    //! Query a value in a subtree with default std::basic_string.
-    ValueType& GetWithDefault(ValueType& root, const std::basic_string<Ch>& defaultValue, typename ValueType::AllocatorType& allocator) const {
-        bool alreadyExist;
-        ValueType& v = Create(root, allocator, &alreadyExist);
-        return alreadyExist ? v : v.SetString(defaultValue, allocator);
-    }
-#endif
-
-    //! Query a value in a subtree with default primitive value.
-    /*!
-        \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
-    */
-    template <typename T>
-    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
-    GetWithDefault(ValueType& root, T defaultValue, typename ValueType::AllocatorType& allocator) const {
-        return GetWithDefault(root, ValueType(defaultValue).Move(), allocator);
-    }
-
-    //! Query a value in a document with default value.
-    template <typename stackAllocator>
-    ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const ValueType& defaultValue) const {
-        return GetWithDefault(document, defaultValue, document.GetAllocator());
-    }
-
-    //! Query a value in a document with default null-terminated string.
-    template <typename stackAllocator>
-    ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const Ch* defaultValue) const {
-        return GetWithDefault(document, defaultValue, document.GetAllocator());
-    }
-    
-#if RAPIDJSON_HAS_STDSTRING
-    //! Query a value in a document with default std::basic_string.
-    template <typename stackAllocator>
-    ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const std::basic_string<Ch>& defaultValue) const {
-        return GetWithDefault(document, defaultValue, document.GetAllocator());
-    }
-#endif
-
-    //! Query a value in a document with default primitive value.
-    /*!
-        \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
-    */
-    template <typename T, typename stackAllocator>
-    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
-    GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, T defaultValue) const {
-        return GetWithDefault(document, defaultValue, document.GetAllocator());
-    }
-
-    //@}
-
-    //!@name Set a value
-    //@{
-
-    //! Set a value in a subtree, with move semantics.
-    /*!
-        It creates all parents if they are not exist or types are different to the tokens.
-        So this function always succeeds but potentially remove existing values.
-
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \param value Value to be set.
-        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
-        \see Create()
-    */
-    ValueType& Set(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const {
-        return Create(root, allocator) = value;
-    }
-
-    //! Set a value in a subtree, with copy semantics.
-    ValueType& Set(ValueType& root, const ValueType& value, typename ValueType::AllocatorType& allocator) const {
-        return Create(root, allocator).CopyFrom(value, allocator);
-    }
-
-    //! Set a null-terminated string in a subtree.
-    ValueType& Set(ValueType& root, const Ch* value, typename ValueType::AllocatorType& allocator) const {
-        return Create(root, allocator) = ValueType(value, allocator).Move();
-    }
-
-#if RAPIDJSON_HAS_STDSTRING
-    //! Set a std::basic_string in a subtree.
-    ValueType& Set(ValueType& root, const std::basic_string<Ch>& value, typename ValueType::AllocatorType& allocator) const {
-        return Create(root, allocator) = ValueType(value, allocator).Move();
-    }
-#endif
-
-    //! Set a primitive value in a subtree.
-    /*!
-        \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
-    */
-    template <typename T>
-    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
-    Set(ValueType& root, T value, typename ValueType::AllocatorType& allocator) const {
-        return Create(root, allocator) = ValueType(value).Move();
-    }
-
-    //! Set a value in a document, with move semantics.
-    template <typename stackAllocator>
-    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, ValueType& value) const {
-        return Create(document) = value;
-    }
-
-    //! Set a value in a document, with copy semantics.
-    template <typename stackAllocator>
-    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const ValueType& value) const {
-        return Create(document).CopyFrom(value, document.GetAllocator());
-    }
-
-    //! Set a null-terminated string in a document.
-    template <typename stackAllocator>
-    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const Ch* value) const {
-        return Create(document) = ValueType(value, document.GetAllocator()).Move();
-    }
-
-#if RAPIDJSON_HAS_STDSTRING
-    //! Sets a std::basic_string in a document.
-    template <typename stackAllocator>
-    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const std::basic_string<Ch>& value) const {
-        return Create(document) = ValueType(value, document.GetAllocator()).Move();
-    }
-#endif
-
-    //! Set a primitive value in a document.
-    /*!
-    \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
-    */
-    template <typename T, typename stackAllocator>
-    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
-        Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, T value) const {
-            return Create(document) = value;
-    }
-
-    //@}
-
-    //!@name Swap a value
-    //@{
-
-    //! Swap a value with a value in a subtree.
-    /*!
-        It creates all parents if they are not exist or types are different to the tokens.
-        So this function always succeeds but potentially remove existing values.
-
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \param value Value to be swapped.
-        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
-        \see Create()
-    */
-    ValueType& Swap(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const {
-        return Create(root, allocator).Swap(value);
-    }
-
-    //! Swap a value with a value in a document.
-    template <typename stackAllocator>
-    ValueType& Swap(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, ValueType& value) const {
-        return Create(document).Swap(value);
-    }
-
-    //@}
-
-    //! Erase a value in a subtree.
-    /*!
-        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
-        \return Whether the resolved value is found and erased.
-
-        \note Erasing with an empty pointer \c Pointer(""), i.e. the root, always fail and return false.
-    */
-    bool Erase(ValueType& root) const {
-        RAPIDJSON_ASSERT(IsValid());
-        if (tokenCount_ == 0) // Cannot erase the root
-            return false;
-
-        ValueType* v = &root;
-        const Token* last = tokens_ + (tokenCount_ - 1);
-        for (const Token *t = tokens_; t != last; ++t) {
-            switch (v->GetType()) {
-            case kObjectType:
-                {
-                    typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length));
-                    if (m == v->MemberEnd())
-                        return false;
-                    v = &m->value;
-                }
-                break;
-            case kArrayType:
-                if (t->index == kPointerInvalidIndex || t->index >= v->Size())
-                    return false;
-                v = &((*v)[t->index]);
-                break;
-            default:
-                return false;
-            }
-        }
-
-        switch (v->GetType()) {
-        case kObjectType:
-            return v->EraseMember(GenericStringRef<Ch>(last->name, last->length));
-        case kArrayType:
-            if (last->index == kPointerInvalidIndex || last->index >= v->Size())
-                return false;
-            v->Erase(v->Begin() + last->index);
-            return true;
-        default:
-            return false;
-        }
-    }
-
-private:
-    //! Clone the content from rhs to this.
-    /*!
-        \param rhs Source pointer.
-        \param extraToken Extra tokens to be allocated.
-        \param extraNameBufferSize Extra name buffer size (in number of Ch) to be allocated.
-        \return Start of non-occupied name buffer, for storing extra names.
-    */
-    Ch* CopyFromRaw(const GenericPointer& rhs, size_t extraToken = 0, size_t extraNameBufferSize = 0) {
-        if (!allocator_) // allocator is independently owned.
-            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
-
-        size_t nameBufferSize = rhs.tokenCount_; // null terminators for tokens
-        for (Token *t = rhs.tokens_; t != rhs.tokens_ + rhs.tokenCount_; ++t)
-            nameBufferSize += t->length;
-
-        tokenCount_ = rhs.tokenCount_ + extraToken;
-        tokens_ = static_cast<Token *>(allocator_->Malloc(tokenCount_ * sizeof(Token) + (nameBufferSize + extraNameBufferSize) * sizeof(Ch)));
-        nameBuffer_ = reinterpret_cast<Ch *>(tokens_ + tokenCount_);
-        if (rhs.tokenCount_ > 0) {
-            std::memcpy(tokens_, rhs.tokens_, rhs.tokenCount_ * sizeof(Token));
-        }
-        if (nameBufferSize > 0) {
-            std::memcpy(nameBuffer_, rhs.nameBuffer_, nameBufferSize * sizeof(Ch));
-        }
-
-        // Adjust pointers to name buffer
-        std::ptrdiff_t diff = nameBuffer_ - rhs.nameBuffer_;
-        for (Token *t = tokens_; t != tokens_ + rhs.tokenCount_; ++t)
-            t->name += diff;
-
-        return nameBuffer_ + nameBufferSize;
-    }
-
-    //! Check whether a character should be percent-encoded.
-    /*!
-        According to RFC 3986 2.3 Unreserved Characters.
-        \param c The character (code unit) to be tested.
-    */
-    bool NeedPercentEncode(Ch c) const {
-        return !((c >= '0' && c <= '9') || (c >= 'A' && c <='Z') || (c >= 'a' && c <= 'z') || c == '-' || c == '.' || c == '_' || c =='~');
-    }
-
-    //! Parse a JSON String or its URI fragment representation into tokens.
-#ifndef __clang__ // -Wdocumentation
-    /*!
-        \param source Either a JSON Pointer string, or its URI fragment representation. Not need to be null terminated.
-        \param length Length of the source string.
-        \note Source cannot be JSON String Representation of JSON Pointer, e.g. In "/\u0000", \u0000 will not be unescaped.
-    */
-#endif
-    void Parse(const Ch* source, size_t length) {
-        RAPIDJSON_ASSERT(source != NULL);
-        RAPIDJSON_ASSERT(nameBuffer_ == 0);
-        RAPIDJSON_ASSERT(tokens_ == 0);
-
-        // Create own allocator if user did not supply.
-        if (!allocator_)
-            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
-
-        // Count number of '/' as tokenCount
-        tokenCount_ = 0;
-        for (const Ch* s = source; s != source + length; s++) 
-            if (*s == '/')
-                tokenCount_++;
-
-        Token* token = tokens_ = static_cast<Token *>(allocator_->Malloc(tokenCount_ * sizeof(Token) + length * sizeof(Ch)));
-        Ch* name = nameBuffer_ = reinterpret_cast<Ch *>(tokens_ + tokenCount_);
-        size_t i = 0;
-
-        // Detect if it is a URI fragment
-        bool uriFragment = false;
-        if (source[i] == '#') {
-            uriFragment = true;
-            i++;
-        }
-
-        if (i != length && source[i] != '/') {
-            parseErrorCode_ = kPointerParseErrorTokenMustBeginWithSolidus;
-            goto error;
-        }
-
-        while (i < length) {
-            RAPIDJSON_ASSERT(source[i] == '/');
-            i++; // consumes '/'
-
-            token->name = name;
-            bool isNumber = true;
-
-            while (i < length && source[i] != '/') {
-                Ch c = source[i];
-                if (uriFragment) {
-                    // Decoding percent-encoding for URI fragment
-                    if (c == '%') {
-                        PercentDecodeStream is(&source[i], source + length);
-                        GenericInsituStringStream<EncodingType> os(name);
-                        Ch* begin = os.PutBegin();
-                        if (!Transcoder<UTF8<>, EncodingType>().Validate(is, os) || !is.IsValid()) {
-                            parseErrorCode_ = kPointerParseErrorInvalidPercentEncoding;
-                            goto error;
-                        }
-                        size_t len = os.PutEnd(begin);
-                        i += is.Tell() - 1;
-                        if (len == 1)
-                            c = *name;
-                        else {
-                            name += len;
-                            isNumber = false;
-                            i++;
-                            continue;
-                        }
-                    }
-                    else if (NeedPercentEncode(c)) {
-                        parseErrorCode_ = kPointerParseErrorCharacterMustPercentEncode;
-                        goto error;
-                    }
-                }
-
-                i++;
-                
-                // Escaping "~0" -> '~', "~1" -> '/'
-                if (c == '~') {
-                    if (i < length) {
-                        c = source[i];
-                        if (c == '0')       c = '~';
-                        else if (c == '1')  c = '/';
-                        else {
-                            parseErrorCode_ = kPointerParseErrorInvalidEscape;
-                            goto error;
-                        }
-                        i++;
-                    }
-                    else {
-                        parseErrorCode_ = kPointerParseErrorInvalidEscape;
-                        goto error;
-                    }
-                }
-
-                // First check for index: all of characters are digit
-                if (c < '0' || c > '9')
-                    isNumber = false;
-
-                *name++ = c;
-            }
-            token->length = static_cast<SizeType>(name - token->name);
-            if (token->length == 0)
-                isNumber = false;
-            *name++ = '\0'; // Null terminator
-
-            // Second check for index: more than one digit cannot have leading zero
-            if (isNumber && token->length > 1 && token->name[0] == '0')
-                isNumber = false;
-
-            // String to SizeType conversion
-            SizeType n = 0;
-            if (isNumber) {
-                for (size_t j = 0; j < token->length; j++) {
-                    SizeType m = n * 10 + static_cast<SizeType>(token->name[j] - '0');
-                    if (m < n) {   // overflow detection
-                        isNumber = false;
-                        break;
-                    }
-                    n = m;
-                }
-            }
-
-            token->index = isNumber ? n : kPointerInvalidIndex;
-            token++;
-        }
-
-        RAPIDJSON_ASSERT(name <= nameBuffer_ + length); // Should not overflow buffer
-        parseErrorCode_ = kPointerParseErrorNone;
-        return;
-
-    error:
-        Allocator::Free(tokens_);
-        nameBuffer_ = 0;
-        tokens_ = 0;
-        tokenCount_ = 0;
-        parseErrorOffset_ = i;
-        return;
-    }
-
-    //! Stringify to string or URI fragment representation.
-    /*!
-        \tparam uriFragment True for stringifying to URI fragment representation. False for string representation.
-        \tparam OutputStream type of output stream.
-        \param os The output stream.
-    */
-    template<bool uriFragment, typename OutputStream>
-    bool Stringify(OutputStream& os) const {
-        RAPIDJSON_ASSERT(IsValid());
-
-        if (uriFragment)
-            os.Put('#');
-
-        for (Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
-            os.Put('/');
-            for (size_t j = 0; j < t->length; j++) {
-                Ch c = t->name[j];
-                if (c == '~') {
-                    os.Put('~');
-                    os.Put('0');
-                }
-                else if (c == '/') {
-                    os.Put('~');
-                    os.Put('1');
-                }
-                else if (uriFragment && NeedPercentEncode(c)) { 
-                    // Transcode to UTF8 sequence
-                    GenericStringStream<typename ValueType::EncodingType> source(&t->name[j]);
-                    PercentEncodeStream<OutputStream> target(os);
-                    if (!Transcoder<EncodingType, UTF8<> >().Validate(source, target))
-                        return false;
-                    j += source.Tell() - 1;
-                }
-                else
-                    os.Put(c);
-            }
-        }
-        return true;
-    }
-
-    //! A helper stream for decoding a percent-encoded sequence into code unit.
-    /*!
-        This stream decodes %XY triplet into code unit (0-255).
-        If it encounters invalid characters, it sets output code unit as 0 and 
-        mark invalid, and to be checked by IsValid().
-    */
-    class PercentDecodeStream {
-    public:
-        typedef typename ValueType::Ch Ch;
-
-        //! Constructor
-        /*!
-            \param source Start of the stream
-            \param end Past-the-end of the stream.
-        */
-        PercentDecodeStream(const Ch* source, const Ch* end) : src_(source), head_(source), end_(end), valid_(true) {}
-
-        Ch Take() {
-            if (*src_ != '%' || src_ + 3 > end_) { // %XY triplet
-                valid_ = false;
-                return 0;
-            }
-            src_++;
-            Ch c = 0;
-            for (int j = 0; j < 2; j++) {
-                c = static_cast<Ch>(c << 4);
-                Ch h = *src_;
-                if      (h >= '0' && h <= '9') c = static_cast<Ch>(c + h - '0');
-                else if (h >= 'A' && h <= 'F') c = static_cast<Ch>(c + h - 'A' + 10);
-                else if (h >= 'a' && h <= 'f') c = static_cast<Ch>(c + h - 'a' + 10);
-                else {
-                    valid_ = false;
-                    return 0;
-                }
-                src_++;
-            }
-            return c;
-        }
-
-        size_t Tell() const { return static_cast<size_t>(src_ - head_); }
-        bool IsValid() const { return valid_; }
-
-    private:
-        const Ch* src_;     //!< Current read position.
-        const Ch* head_;    //!< Original head of the string.
-        const Ch* end_;     //!< Past-the-end position.
-        bool valid_;        //!< Whether the parsing is valid.
-    };
-
-    //! A helper stream to encode character (UTF-8 code unit) into percent-encoded sequence.
-    template <typename OutputStream>
-    class PercentEncodeStream {
-    public:
-        PercentEncodeStream(OutputStream& os) : os_(os) {}
-        void Put(char c) { // UTF-8 must be byte
-            unsigned char u = static_cast<unsigned char>(c);
-            static const char hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
-            os_.Put('%');
-            os_.Put(static_cast<typename OutputStream::Ch>(hexDigits[u >> 4]));
-            os_.Put(static_cast<typename OutputStream::Ch>(hexDigits[u & 15]));
-        }
-    private:
-        OutputStream& os_;
-    };
-
-    Allocator* allocator_;                  //!< The current allocator. It is either user-supplied or equal to ownAllocator_.
-    Allocator* ownAllocator_;               //!< Allocator owned by this Pointer.
-    Ch* nameBuffer_;                        //!< A buffer containing all names in tokens.
-    Token* tokens_;                         //!< A list of tokens.
-    size_t tokenCount_;                     //!< Number of tokens in tokens_.
-    size_t parseErrorOffset_;               //!< Offset in code unit when parsing fail.
-    PointerParseErrorCode parseErrorCode_;  //!< Parsing error code.
-};
-
-//! GenericPointer for Value (UTF-8, default allocator).
-typedef GenericPointer<Value> Pointer;
-
-//!@name Helper functions for GenericPointer
-//@{
-
-//////////////////////////////////////////////////////////////////////////////
-
-template <typename T>
-typename T::ValueType& CreateValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::AllocatorType& a) {
-    return pointer.Create(root, a);
-}
-
-template <typename T, typename CharType, size_t N>
-typename T::ValueType& CreateValueByPointer(T& root, const CharType(&source)[N], typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).Create(root, a);
-}
-
-// No allocator parameter
-
-template <typename DocumentType>
-typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer) {
-    return pointer.Create(document);
-}
-
-template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const CharType(&source)[N]) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Create(document);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-
-template <typename T>
-typename T::ValueType* GetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, size_t* unresolvedTokenIndex = 0) {
-    return pointer.Get(root, unresolvedTokenIndex);
-}
-
-template <typename T>
-const typename T::ValueType* GetValueByPointer(const T& root, const GenericPointer<typename T::ValueType>& pointer, size_t* unresolvedTokenIndex = 0) {
-    return pointer.Get(root, unresolvedTokenIndex);
-}
-
-template <typename T, typename CharType, size_t N>
-typename T::ValueType* GetValueByPointer(T& root, const CharType (&source)[N], size_t* unresolvedTokenIndex = 0) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).Get(root, unresolvedTokenIndex);
-}
-
-template <typename T, typename CharType, size_t N>
-const typename T::ValueType* GetValueByPointer(const T& root, const CharType(&source)[N], size_t* unresolvedTokenIndex = 0) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).Get(root, unresolvedTokenIndex);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-
-template <typename T>
-typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::ValueType& defaultValue, typename T::AllocatorType& a) {
-    return pointer.GetWithDefault(root, defaultValue, a);
-}
-
-template <typename T>
-typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::Ch* defaultValue, typename T::AllocatorType& a) {
-    return pointer.GetWithDefault(root, defaultValue, a);
-}
-
-#if RAPIDJSON_HAS_STDSTRING
-template <typename T>
-typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const std::basic_string<typename T::Ch>& defaultValue, typename T::AllocatorType& a) {
-    return pointer.GetWithDefault(root, defaultValue, a);
-}
-#endif
-
-template <typename T, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
-GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, T2 defaultValue, typename T::AllocatorType& a) {
-    return pointer.GetWithDefault(root, defaultValue, a);
-}
-
-template <typename T, typename CharType, size_t N>
-typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::ValueType& defaultValue, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
-}
-
-template <typename T, typename CharType, size_t N>
-typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::Ch* defaultValue, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
-}
-
-#if RAPIDJSON_HAS_STDSTRING
-template <typename T, typename CharType, size_t N>
-typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const std::basic_string<typename T::Ch>& defaultValue, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
-}
-#endif
-
-template <typename T, typename CharType, size_t N, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
-GetValueByPointerWithDefault(T& root, const CharType(&source)[N], T2 defaultValue, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
-}
-
-// No allocator parameter
-
-template <typename DocumentType>
-typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::ValueType& defaultValue) {
-    return pointer.GetWithDefault(document, defaultValue);
-}
-
-template <typename DocumentType>
-typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::Ch* defaultValue) {
-    return pointer.GetWithDefault(document, defaultValue);
-}
-
-#if RAPIDJSON_HAS_STDSTRING
-template <typename DocumentType>
-typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const std::basic_string<typename DocumentType::Ch>& defaultValue) {
-    return pointer.GetWithDefault(document, defaultValue);
-}
-#endif
-
-template <typename DocumentType, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
-GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, T2 defaultValue) {
-    return pointer.GetWithDefault(document, defaultValue);
-}
-
-template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& defaultValue) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
-}
-
-template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* defaultValue) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
-}
-
-#if RAPIDJSON_HAS_STDSTRING
-template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const std::basic_string<typename DocumentType::Ch>& defaultValue) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
-}
-#endif
-
-template <typename DocumentType, typename CharType, size_t N, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
-GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], T2 defaultValue) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-
-template <typename T>
-typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::ValueType& value, typename T::AllocatorType& a) {
-    return pointer.Set(root, value, a);
-}
-
-template <typename T>
-typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::ValueType& value, typename T::AllocatorType& a) {
-    return pointer.Set(root, value, a);
-}
-
-template <typename T>
-typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::Ch* value, typename T::AllocatorType& a) {
-    return pointer.Set(root, value, a);
-}
-
-#if RAPIDJSON_HAS_STDSTRING
-template <typename T>
-typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const std::basic_string<typename T::Ch>& value, typename T::AllocatorType& a) {
-    return pointer.Set(root, value, a);
-}
-#endif
-
-template <typename T, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
-SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, T2 value, typename T::AllocatorType& a) {
-    return pointer.Set(root, value, a);
-}
-
-template <typename T, typename CharType, size_t N>
-typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
-}
-
-template <typename T, typename CharType, size_t N>
-typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::ValueType& value, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
-}
-
-template <typename T, typename CharType, size_t N>
-typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::Ch* value, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
-}
-
-#if RAPIDJSON_HAS_STDSTRING
-template <typename T, typename CharType, size_t N>
-typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const std::basic_string<typename T::Ch>& value, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
-}
-#endif
-
-template <typename T, typename CharType, size_t N, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
-SetValueByPointer(T& root, const CharType(&source)[N], T2 value, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
-}
-
-// No allocator parameter
-
-template <typename DocumentType>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, typename DocumentType::ValueType& value) {
-    return pointer.Set(document, value);
-}
-
-template <typename DocumentType>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::ValueType& value) {
-    return pointer.Set(document, value);
-}
-
-template <typename DocumentType>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::Ch* value) {
-    return pointer.Set(document, value);
-}
-
-#if RAPIDJSON_HAS_STDSTRING
-template <typename DocumentType>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const std::basic_string<typename DocumentType::Ch>& value) {
-    return pointer.Set(document, value);
-}
-#endif
-
-template <typename DocumentType, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
-SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, T2 value) {
-    return pointer.Set(document, value);
-}
-
-template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
-}
-
-template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& value) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
-}
-
-template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* value) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
-}
-
-#if RAPIDJSON_HAS_STDSTRING
-template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const std::basic_string<typename DocumentType::Ch>& value) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
-}
-#endif
-
-template <typename DocumentType, typename CharType, size_t N, typename T2>
-RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
-SetValueByPointer(DocumentType& document, const CharType(&source)[N], T2 value) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-
-template <typename T>
-typename T::ValueType& SwapValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::ValueType& value, typename T::AllocatorType& a) {
-    return pointer.Swap(root, value, a);
-}
-
-template <typename T, typename CharType, size_t N>
-typename T::ValueType& SwapValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).Swap(root, value, a);
-}
-
-template <typename DocumentType>
-typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, typename DocumentType::ValueType& value) {
-    return pointer.Swap(document, value);
-}
-
-template <typename DocumentType, typename CharType, size_t N>
-typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) {
-    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Swap(document, value);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-
-template <typename T>
-bool EraseValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer) {
-    return pointer.Erase(root);
-}
-
-template <typename T, typename CharType, size_t N>
-bool EraseValueByPointer(T& root, const CharType(&source)[N]) {
-    return GenericPointer<typename T::ValueType>(source, N - 1).Erase(root);
-}
-
-//@}
-
-RAPIDJSON_NAMESPACE_END
-
-#if defined(__clang__) || defined(_MSC_VER)
-RAPIDJSON_DIAG_POP
-#endif
-
-#endif // RAPIDJSON_POINTER_H_
diff --git a/src/native/external/rapidjson/prettywriter.h b/src/native/external/rapidjson/prettywriter.h
deleted file mode 100644
index 45afb6949deb..000000000000
--- a/src/native/external/rapidjson/prettywriter.h
+++ /dev/null
@@ -1,277 +0,0 @@
-// Tencent is pleased to support the open source community by making RapidJSON available.
-// 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
-//
-// Licensed under the MIT License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// http://opensource.org/licenses/MIT
-//
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
-// specific language governing permissions and limitations under the License.
-
-#ifndef RAPIDJSON_PRETTYWRITER_H_
-#define RAPIDJSON_PRETTYWRITER_H_
-
-#include "writer.h"
-
-#ifdef __GNUC__
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(effc++)
-#endif
-
-#if defined(__clang__)
-RAPIDJSON_DIAG_PUSH
-RAPIDJSON_DIAG_OFF(c++98-compat)
-#endif
-
-RAPIDJSON_NAMESPACE_BEGIN
-
-//! Combination of PrettyWriter format flags.
-/*! \see PrettyWriter::SetFormatOptions
- */
-enum PrettyFormatOptions {
-    kFormatDefault = 0,         //!< Default pretty formatting.
-    kFormatSingleLineArray = 1  //!< Format arrays on a single line.
-};
-
-//! Writer with indentation and spacing.
-/*!
-    \tparam OutputStream Type of output os.
-    \tparam SourceEncoding Encoding of source string.
-    \tparam TargetEncoding Encoding of output stream.
-    \tparam StackAllocator Type of allocator for allocating memory of stack.
-*/
-template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags>
-class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator, writeFlags> {
-public:
-    typedef Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator, writeFlags> Base;
-    typedef typename Base::Ch Ch;
-
-    //! Constructor
-    /*! \param os Output stream.
-        \param allocator User supplied allocator. If it is null, it will create a private one.
-        \param levelDepth Initial capacity of stack.
-    */
-    explicit PrettyWriter(OutputStream& os, StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : 
-        Base(os, allocator, levelDepth), indentChar_(' '), indentCharCount_(4), formatOptions_(kFormatDefault) {}
-
-
-    explicit PrettyWriter(StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : 
-        Base(allocator, levelDepth), indentChar_(' '), indentCharCount_(4) {}
-
-#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    PrettyWriter(PrettyWriter&& rhs) :
-        Base(std::forward<PrettyWriter>(rhs)), indentChar_(rhs.indentChar_), indentCharCount_(rhs.indentCharCount_), formatOptions_(rhs.formatOptions_) {}
-#endif
-
-    //! Set custom indentation.
-    /*! \param indentChar       Character for indentation. Must be whitespace character (' ', '\\t', '\\n', '\\r').
-        \param indentCharCount  Number of indent characters for each indentation level.
-        \note The default indentation is 4 spaces.
-    */
-    PrettyWriter& SetIndent(Ch indentChar, unsigned indentCharCount) {
-        RAPIDJSON_ASSERT(indentChar == ' ' || indentChar == '\t' || indentChar == '\n' || indentChar == '\r');
-        indentChar_ = indentChar;
-        indentCharCount_ = indentCharCount;
-        return *this;
-    }
-
-    //! Set pretty writer formatting options.
-    /*! \param options Formatting options.
-    */
-    PrettyWriter& SetFormatOptions(PrettyFormatOptions options) {
-        formatOptions_ = options;
-        return *this;
-    }
-
-    /*! @name Implementation of Handler
-        \see Handler
-    */
-    //@{
-
-    bool Null()                 { PrettyPrefix(kNullType);   return Base::EndValue(Base::WriteNull()); }
-    bool Bool(bool b)           { PrettyPrefix(b ? kTrueType : kFalseType); return Base::EndValue(Base::WriteBool(b)); }
-    bool Int(int i)             { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteInt(i)); }
-    bool Uint(unsigned u)       { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteUint(u)); }
-    bool Int64(int64_t i64)     { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteInt64(i64)); }
-    bool Uint64(uint64_t u64)   { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteUint64(u64));  }
-    bool Double(double d)       { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteDouble(d)); }
-
-    bool RawNumber(const Ch* str, SizeType length, bool copy = false) {
-        RAPIDJSON_ASSERT(str != 0);
-        (void)copy;
-        PrettyPrefix(kNumberType);
-        return Base::EndValue(Base::WriteString(str, length));
-    }
-
-    bool String(const Ch* str, SizeType length, bool copy = false) {
-        RAPIDJSON_ASSERT(str != 0);
-        (void)copy;
-        PrettyPrefix(kStringType);
-        return Base::EndValue(Base::WriteString(str, length));
-    }
-
-#if RAPIDJSON_HAS_STDSTRING
-    bool String(const std::basic_string<Ch>& str) {
-        return String(str.data(), SizeType(str.size()));
-    }
-#endif
-
-    bool StartObject() {
-        PrettyPrefix(kObjectType);
-        new (Base::level_stack_.template Push<typename Base::Level>()) typename Base::Level(false);
-        return Base::WriteStartObject();
-    }
-
-    bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); }
-
-#if RAPIDJSON_HAS_STDSTRING
-    bool Key(const std::basic_string<Ch>& str) {
-        return Key(str.data(), SizeType(str.size()));
-    }
-#endif
-	
-    bool EndObject(SizeType memberCount = 0) {
-        (void)memberCount;
-        RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); // not inside an Object
-        RAPIDJSON_ASSERT(!Base::level_stack_.template Top<typename Base::Level>()->inArray); // currently inside an Array, not Object
-        RAPIDJSON_ASSERT(0 == Base::level_stack_.template Top<typename Base::Level>()->valueCount % 2); // Object has a Key without a Value
-       
-        bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0;
-
-        if (!empty) {
-            Base::os_->Put('\n');
-            WriteIndent();
-        }
-        bool ret = Base::EndValue(Base::WriteEndObject());
-        (void)ret;
-        RAPIDJSON_ASSERT(ret == true);
-        if (Base::level_stack_.Empty()) // end of json text
-            Base::Flush();
-        return true;
-    }
-
-    bool StartArray() {
-        PrettyPrefix(kArrayType);
-        new (Base::level_stack_.template Push<typename Base::Level>()) typename Base::Level(true);
-        return Base::WriteStartArray();
-    }
-
-    bool EndArray(SizeType memberCount = 0) {
-        (void)memberCount;
-        RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level));
-        RAPIDJSON_ASSERT(Base::level_stack_.template Top<typename Base::Level>()->inArray);
-        bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0;
-
-        if (!empty && !(formatOptions_ & kFormatSingleLineArray)) {
-            Base::os_->Put('\n');
-            WriteIndent();
-        }
-        bool ret = Base::EndValue(Base::WriteEndArray());
-        (void)ret;
-        RAPIDJSON_ASSERT(ret == true);
-        if (Base::level_stack_.Empty()) // end of json text
-            Base::Flush();
-        return true;
-    }
-
-    //@}
-
-    /*! @name Convenience extensions */
-    //@{
-
-    //! Simpler but slower overload.
-    bool String(const Ch* str) { return String(str, internal::StrLen(str)); }
-    bool Key(const Ch* str) { return Key(str, internal::StrLen(str)); }
-
-    //@}
-
-    //! Write a raw JSON value.
-    /*!
-        For user to write a stringified JSON as a value.
-
-        \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range.
-        \param length Length of the json.
-        \param type Type of the root of json.
-        \note When using PrettyWriter::RawValue(), the result json may not be indented correctly.
-    */
-    bool RawValue(const Ch* json, size_t length, Type type) {
-        RAPIDJSON_ASSERT(json != 0);
-        PrettyPrefix(type);
-        return Base::EndValue(Base::WriteRawValue(json, length));
-    }
-
-protected:
-    void PrettyPrefix(Type type) {
-        (void)type;
-        if (Base::level_stack_.GetSize() != 0) { // this value is not at root
-            typename Base::Level* level = Base::level_stack_.template Top<typename Base::Level>();
-
-            if (level->inArray) {
-                if (level->valueCount > 0) {
-                    Base::os_->Put(','); // add comma if it is not the first element in array
-                    if (formatOptions_ & kFormatSingleLineArray)
-                        Base::os_->Put(' ');
-                }
-
-                if (!(formatOptions_ & kFormatSingleLineArray)) {
-                    Base::os_->Put('\n');
-                    WriteIndent();
-                }
-            }
-            else {  // in object
-                if (level->valueCount > 0) {
-                    if (level->valueCount % 2 == 0) {
-                        Base::os_->Put(',');
-                        Base::os_->Put('\n');
-                    }
-                    else {
-                        Base::os_->Put(':');
-                        Base::os_->Put(' ');
-                    }
-                }
-                else
-                    Base::os_->Put('\n');
-
-                if (level->valueCount % 2 == 0)
-                    WriteIndent();
-            }
-            if (!level->inArray && level->valueCount % 2 == 0)
-                RAPIDJSON_ASSERT(type == kStringType);  // if it's in object, then even number should be a name
-            level->valueCount++;
-        }
-        else {
-            RAPIDJSON_ASSERT(!Base::hasRoot_);  // Should only has one and only one root.
-            Base::hasRoot_ = true;
-        }
-    }
-
-    void WriteIndent()  {
-        size_t count = (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_;
-        PutN(*Base::os_, static_cast<typename OutputStream::Ch>(indentChar_), count);
-    }
-
-    Ch indentChar_;
-    unsigned indentCharCount_;
-    PrettyFormatOptions formatOptions_;
-
-private:
-    // Prohibit copy constructor & assignment operator.
-    PrettyWriter(const PrettyWriter&);
-    PrettyWriter& operator=(const PrettyWriter&);
-};
-
-RAPIDJSON_NAMESPACE_END
-
-#if defined(__clang__)
-RAPIDJSON_DIAG_POP
-#endif
-
-#ifdef __GNUC__
-RAPIDJSON_DIAG_POP
-#endif
-
-#endif // RAPIDJSON_RAPIDJSON_H_
diff --git a/src/native/external/rapidjson/rapidjson.h b/src/native/external/rapidjson/rapidjson.h
index 549936ffe06c..5ea69479501a 100644
--- a/src/native/external/rapidjson/rapidjson.h
+++ b/src/native/external/rapidjson/rapidjson.h
@@ -1,15 +1,15 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
-// 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
 //
 // http://opensource.org/licenses/MIT
 //
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
 
 #ifndef RAPIDJSON_RAPIDJSON_H_
@@ -17,7 +17,7 @@
 
 /*!\file rapidjson.h
     \brief common definitions and configuration
-    
+
     \see RAPIDJSON_CONFIG
  */
 
@@ -124,6 +124,19 @@
 #define RAPIDJSON_NAMESPACE_END }
 #endif
 
+///////////////////////////////////////////////////////////////////////////////
+// __cplusplus macro
+
+//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
+
+#if defined(_MSC_VER)
+#define RAPIDJSON_CPLUSPLUS _MSVC_LANG
+#else
+#define RAPIDJSON_CPLUSPLUS __cplusplus
+#endif
+
+//!@endcond
+
 ///////////////////////////////////////////////////////////////////////////////
 // RAPIDJSON_HAS_STDSTRING
 
@@ -149,6 +162,24 @@
 #include <string>
 #endif // RAPIDJSON_HAS_STDSTRING
 
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_USE_MEMBERSMAP
+
+/*! \def RAPIDJSON_USE_MEMBERSMAP
+    \ingroup RAPIDJSON_CONFIG
+    \brief Enable RapidJSON support for object members handling in a \c std::multimap
+
+    By defining this preprocessor symbol to \c 1, \ref rapidjson::GenericValue object
+    members are stored in a \c std::multimap for faster lookup and deletion times, a
+    trade off with a slightly slower insertion time and a small object allocat(or)ed
+    memory overhead.
+
+    \hideinitializer
+*/
+#ifndef RAPIDJSON_USE_MEMBERSMAP
+#define RAPIDJSON_USE_MEMBERSMAP 0 // not by default
+#endif
+
 ///////////////////////////////////////////////////////////////////////////////
 // RAPIDJSON_NO_INT64DEFINE
 
@@ -164,7 +195,7 @@
 */
 #ifndef RAPIDJSON_NO_INT64DEFINE
 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
-#if defined(_MSC_VER) && (_MSC_VER < 1800)	// Visual Studio 2013
+#if defined(_MSC_VER) && (_MSC_VER < 1800) // Visual Studio 2013
 #include "msinttypes/stdint.h"
 #include "msinttypes/inttypes.h"
 #else
@@ -246,7 +277,7 @@
 #  elif defined(RAPIDJSON_DOXYGEN_RUNNING)
 #    define RAPIDJSON_ENDIAN
 #  else
-#    error Unknown machine endianness detected. User needs to define RAPIDJSON_ENDIAN.   
+#    error Unknown machine endianness detected. User needs to define RAPIDJSON_ENDIAN.
 #  endif
 #endif // RAPIDJSON_ENDIAN
 
@@ -411,7 +442,7 @@ RAPIDJSON_NAMESPACE_END
 
 // Prefer C++11 static_assert, if available
 #ifndef RAPIDJSON_STATIC_ASSERT
-#if __cplusplus >= 201103L || ( defined(_MSC_VER) && _MSC_VER >= 1800 )
+#if RAPIDJSON_CPLUSPLUS >= 201103L || ( defined(_MSC_VER) && _MSC_VER >= 1800 )
 #define RAPIDJSON_STATIC_ASSERT(x) \
    static_assert(x, RAPIDJSON_STRINGIFY(x))
 #endif // C++11
@@ -482,7 +513,7 @@ RAPIDJSON_NAMESPACE_END
 
 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
 
-#define RAPIDJSON_MULTILINEMACRO_BEGIN do {  
+#define RAPIDJSON_MULTILINEMACRO_BEGIN do {
 #define RAPIDJSON_MULTILINEMACRO_END \
 } while((void)0, 0)
 
@@ -490,6 +521,12 @@ RAPIDJSON_NAMESPACE_END
 #define RAPIDJSON_VERSION_CODE(x,y,z) \
   (((x)*100000) + ((y)*100) + (z))
 
+#if defined(__has_builtin)
+#define RAPIDJSON_HAS_BUILTIN(x) __has_builtin(x)
+#else
+#define RAPIDJSON_HAS_BUILTIN(x) 0
+#endif
+
 ///////////////////////////////////////////////////////////////////////////////
 // RAPIDJSON_DIAG_PUSH/POP, RAPIDJSON_DIAG_OFF
 
@@ -535,8 +572,14 @@ RAPIDJSON_NAMESPACE_END
 ///////////////////////////////////////////////////////////////////////////////
 // C++11 features
 
+#ifndef RAPIDJSON_HAS_CXX11
+#define RAPIDJSON_HAS_CXX11 (RAPIDJSON_CPLUSPLUS >= 201103L)
+#endif
+
 #ifndef RAPIDJSON_HAS_CXX11_RVALUE_REFS
-#if defined(__clang__)
+#if RAPIDJSON_HAS_CXX11
+#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1
+#elif defined(__clang__)
 #if __has_feature(cxx_rvalue_references) && \
     (defined(_MSC_VER) || defined(_LIBCPP_VERSION) || defined(__GLIBCXX__) && __GLIBCXX__ >= 20080306)
 #define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1
@@ -553,8 +596,14 @@ RAPIDJSON_NAMESPACE_END
 #endif
 #endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
 
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+#include <utility> // std::move
+#endif
+
 #ifndef RAPIDJSON_HAS_CXX11_NOEXCEPT
-#if defined(__clang__)
+#if RAPIDJSON_HAS_CXX11
+#define RAPIDJSON_HAS_CXX11_NOEXCEPT 1
+#elif defined(__clang__)
 #define RAPIDJSON_HAS_CXX11_NOEXCEPT __has_feature(cxx_noexcept)
 #elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \
     (defined(_MSC_VER) && _MSC_VER >= 1900) || \
@@ -564,11 +613,13 @@ RAPIDJSON_NAMESPACE_END
 #define RAPIDJSON_HAS_CXX11_NOEXCEPT 0
 #endif
 #endif
+#ifndef RAPIDJSON_NOEXCEPT
 #if RAPIDJSON_HAS_CXX11_NOEXCEPT
 #define RAPIDJSON_NOEXCEPT noexcept
 #else
-#define RAPIDJSON_NOEXCEPT /* noexcept */
+#define RAPIDJSON_NOEXCEPT throw()
 #endif // RAPIDJSON_HAS_CXX11_NOEXCEPT
+#endif
 
 // no automatic detection, yet
 #ifndef RAPIDJSON_HAS_CXX11_TYPETRAITS
@@ -591,6 +642,27 @@ RAPIDJSON_NAMESPACE_END
 #endif
 #endif // RAPIDJSON_HAS_CXX11_RANGE_FOR
 
+///////////////////////////////////////////////////////////////////////////////
+// C++17 features
+
+#ifndef RAPIDJSON_HAS_CXX17
+#define RAPIDJSON_HAS_CXX17 (RAPIDJSON_CPLUSPLUS >= 201703L)
+#endif
+
+#if RAPIDJSON_HAS_CXX17
+# define RAPIDJSON_DELIBERATE_FALLTHROUGH [[fallthrough]]
+#elif defined(__has_cpp_attribute)
+# if __has_cpp_attribute(clang::fallthrough)
+#  define RAPIDJSON_DELIBERATE_FALLTHROUGH [[clang::fallthrough]]
+# elif __has_cpp_attribute(fallthrough)
+#  define RAPIDJSON_DELIBERATE_FALLTHROUGH __attribute__((fallthrough))
+# else
+#  define RAPIDJSON_DELIBERATE_FALLTHROUGH
+# endif
+#else
+# define RAPIDJSON_DELIBERATE_FALLTHROUGH
+#endif
+
 //!@endcond
 
 //! Assertion (in non-throwing contexts).
@@ -609,16 +681,29 @@ RAPIDJSON_NAMESPACE_END
 
 #ifndef RAPIDJSON_NOEXCEPT_ASSERT
 #ifdef RAPIDJSON_ASSERT_THROWS
-#if RAPIDJSON_HAS_CXX11_NOEXCEPT
-#define RAPIDJSON_NOEXCEPT_ASSERT(x)
-#else
-#define RAPIDJSON_NOEXCEPT_ASSERT(x) RAPIDJSON_ASSERT(x)
-#endif // RAPIDJSON_HAS_CXX11_NOEXCEPT
+#include <cassert>
+#define RAPIDJSON_NOEXCEPT_ASSERT(x) assert(x)
 #else
 #define RAPIDJSON_NOEXCEPT_ASSERT(x) RAPIDJSON_ASSERT(x)
 #endif // RAPIDJSON_ASSERT_THROWS
 #endif // RAPIDJSON_NOEXCEPT_ASSERT
 
+///////////////////////////////////////////////////////////////////////////////
+// malloc/realloc/free
+
+#ifndef RAPIDJSON_MALLOC
+///! customization point for global \c malloc
+#define RAPIDJSON_MALLOC(size) std::malloc(size)
+#endif
+#ifndef RAPIDJSON_REALLOC
+///! customization point for global \c realloc
+#define RAPIDJSON_REALLOC(ptr, new_size) std::realloc(ptr, new_size)
+#endif
+#ifndef RAPIDJSON_FREE
+///! customization point for global \c free
+#define RAPIDJSON_FREE(ptr) std::free(ptr)
+#endif
+
 ///////////////////////////////////////////////////////////////////////////////
 // new/delete
 
@@ -646,7 +731,7 @@ enum Type {
     kFalseType = 1,     //!< false
     kTrueType = 2,      //!< true
     kObjectType = 3,    //!< object
-    kArrayType = 4,     //!< array 
+    kArrayType = 4,     //!< array
     kStringType = 5,    //!< string
     kNumberType = 6     //!< number
 };
diff --git a/src/native/external/rapidjson/reader.h b/src/native/external/rapidjson/reader.h
index 44a6bcd30cf2..55546601e29b 100644
--- a/src/native/external/rapidjson/reader.h
+++ b/src/native/external/rapidjson/reader.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 //
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
@@ -20,6 +20,7 @@
 #include "allocators.h"
 #include "stream.h"
 #include "encodedstream.h"
+#include "internal/clzll.h"
 #include "internal/meta.h"
 #include "internal/stack.h"
 #include "internal/strtod.h"
@@ -153,6 +154,7 @@ enum ParseFlag {
     kParseNumbersAsStringsFlag = 64,    //!< Parse all numbers (ints/doubles) as strings.
     kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays.
     kParseNanAndInfFlag = 256,      //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
+    kParseEscapedApostropheFlag = 512,  //!< Allow escaped apostrophe in strings.
     kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS  //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
 };
 
@@ -443,16 +445,16 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
 
         x = vmvnq_u8(x);                       // Negate
         x = vrev64q_u8(x);                     // Rev in 64
-        uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
-        uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+        uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
+        uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
 
         if (low == 0) {
             if (high != 0) {
-                int lz =__builtin_clzll(high);;
+                uint32_t lz = internal::clzll(high);
                 return p + 8 + (lz >> 3);
             }
         } else {
-            int lz = __builtin_clzll(low);;
+            uint32_t lz = internal::clzll(low);
             return p + (lz >> 3);
         }
     }
@@ -479,16 +481,16 @@ inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
 
         x = vmvnq_u8(x);                       // Negate
         x = vrev64q_u8(x);                     // Rev in 64
-        uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
-        uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+        uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
+        uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
 
         if (low == 0) {
             if (high != 0) {
-                int lz = __builtin_clzll(high);
+                uint32_t lz = internal::clzll(high);
                 return p + 8 + (lz >> 3);
             }
         } else {
-            int lz = __builtin_clzll(low);
+            uint32_t lz = internal::clzll(low);
             return p + (lz >> 3);
         }
     }
@@ -990,7 +992,7 @@ class GenericReader {
 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
         static const char escape[256] = {
-            Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
+            Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '/',
             Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
             0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
             0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -1013,19 +1015,31 @@ class GenericReader {
                     is.Take();
                     os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
                 }
+                else if ((parseFlags & kParseEscapedApostropheFlag) && RAPIDJSON_LIKELY(e == '\'')) { // Allow escaped apostrophe
+                    is.Take();
+                    os.Put('\'');
+                }
                 else if (RAPIDJSON_LIKELY(e == 'u')) {    // Unicode
                     is.Take();
                     unsigned codepoint = ParseHex4(is, escapeOffset);
                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
-                    if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
-                        // Handle UTF-16 surrogate pair
-                        if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
-                            RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
-                        unsigned codepoint2 = ParseHex4(is, escapeOffset);
-                        RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
-                        if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
+                    if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
+                        // high surrogate, check if followed by valid low surrogate
+                        if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) {
+                            // Handle UTF-16 surrogate pair
+                            if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
+                                RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
+                            unsigned codepoint2 = ParseHex4(is, escapeOffset);
+                            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+                            if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
+                                RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
+                            codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
+                        }
+                        // single low surrogate
+                        else
+                        {
                             RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
-                        codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
+                        }
                     }
                     TEncoding::Encode(os, codepoint);
                 }
@@ -1244,19 +1258,19 @@ class GenericReader {
             x = vorrq_u8(x, vcltq_u8(s, s3));
 
             x = vrev64q_u8(x);                     // Rev in 64
-            uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
-            uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+            uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
+            uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
 
             SizeType length = 0;
             bool escaped = false;
             if (low == 0) {
                 if (high != 0) {
-                    unsigned lz = (unsigned)__builtin_clzll(high);;
+                    uint32_t lz = internal::clzll(high);
                     length = 8 + (lz >> 3);
                     escaped = true;
                 }
             } else {
-                unsigned lz = (unsigned)__builtin_clzll(low);;
+                uint32_t lz = internal::clzll(low);
                 length = lz >> 3;
                 escaped = true;
             }
@@ -1314,19 +1328,19 @@ class GenericReader {
             x = vorrq_u8(x, vcltq_u8(s, s3));
 
             x = vrev64q_u8(x);                     // Rev in 64
-            uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
-            uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+            uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
+            uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
 
             SizeType length = 0;
             bool escaped = false;
             if (low == 0) {
                 if (high != 0) {
-                    unsigned lz = (unsigned)__builtin_clzll(high);
+                    uint32_t lz = internal::clzll(high);
                     length = 8 + (lz >> 3);
                     escaped = true;
                 }
             } else {
-                unsigned lz = (unsigned)__builtin_clzll(low);
+                uint32_t lz = internal::clzll(low);
                 length = lz >> 3;
                 escaped = true;
             }
@@ -1370,17 +1384,17 @@ class GenericReader {
             x = vorrq_u8(x, vcltq_u8(s, s3));
 
             x = vrev64q_u8(x);                     // Rev in 64
-            uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
-            uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+            uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
+            uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
 
             if (low == 0) {
                 if (high != 0) {
-                    int lz = __builtin_clzll(high);
+                    uint32_t lz = internal::clzll(high);
                     p += 8 + (lz >> 3);
                     break;
                 }
             } else {
-                int lz = __builtin_clzll(low);
+                uint32_t lz = internal::clzll(low);
                 p += lz >> 3;
                 break;
             }
@@ -1390,11 +1404,11 @@ class GenericReader {
     }
 #endif // RAPIDJSON_NEON
 
-    template<typename InputStream, bool backup, bool pushOnTake>
+    template<typename InputStream, typename StackCharacter, bool backup, bool pushOnTake>
     class NumberStream;
 
-    template<typename InputStream>
-    class NumberStream<InputStream, false, false> {
+    template<typename InputStream, typename StackCharacter>
+    class NumberStream<InputStream, StackCharacter, false, false> {
     public:
         typedef typename InputStream::Ch Ch;
 
@@ -1403,11 +1417,11 @@ class GenericReader {
         RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
         RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
         RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
-		  RAPIDJSON_FORCEINLINE void Push(char) {}
+        RAPIDJSON_FORCEINLINE void Push(char) {}
 
         size_t Tell() { return is.Tell(); }
         size_t Length() { return 0; }
-        const char* Pop() { return 0; }
+        const StackCharacter* Pop() { return 0; }
 
     protected:
         NumberStream& operator=(const NumberStream&);
@@ -1415,45 +1429,47 @@ class GenericReader {
         InputStream& is;
     };
 
-    template<typename InputStream>
-    class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> {
-        typedef NumberStream<InputStream, false, false> Base;
+    template<typename InputStream, typename StackCharacter>
+    class NumberStream<InputStream, StackCharacter, true, false> : public NumberStream<InputStream, StackCharacter, false, false> {
+        typedef NumberStream<InputStream, StackCharacter, false, false> Base;
     public:
-        NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {}
+        NumberStream(GenericReader& reader, InputStream& s) : Base(reader, s), stackStream(reader.stack_) {}
 
         RAPIDJSON_FORCEINLINE Ch TakePush() {
-            stackStream.Put(static_cast<char>(Base::is.Peek()));
+            stackStream.Put(static_cast<StackCharacter>(Base::is.Peek()));
             return Base::is.Take();
         }
 
-        RAPIDJSON_FORCEINLINE void Push(char c) {
+        RAPIDJSON_FORCEINLINE void Push(StackCharacter c) {
             stackStream.Put(c);
         }
 
         size_t Length() { return stackStream.Length(); }
 
-        const char* Pop() {
+        const StackCharacter* Pop() {
             stackStream.Put('\0');
             return stackStream.Pop();
         }
 
     private:
-        StackStream<char> stackStream;
+        StackStream<StackCharacter> stackStream;
     };
 
-    template<typename InputStream>
-    class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> {
-        typedef NumberStream<InputStream, true, false> Base;
+    template<typename InputStream, typename StackCharacter>
+    class NumberStream<InputStream, StackCharacter, true, true> : public NumberStream<InputStream, StackCharacter, true, false> {
+        typedef NumberStream<InputStream, StackCharacter, true, false> Base;
     public:
-        NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {}
+        NumberStream(GenericReader& reader, InputStream& s) : Base(reader, s) {}
 
         RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
     };
 
     template<unsigned parseFlags, typename InputStream, typename Handler>
     void ParseNumber(InputStream& is, Handler& handler) {
+        typedef typename internal::SelectIf<internal::BoolType<(parseFlags & kParseNumbersAsStringsFlag) != 0>, typename TargetEncoding::Ch, char>::Type NumberCharacter;
+
         internal::StreamLocalCopy<InputStream> copy(is);
-        NumberStream<InputStream,
+        NumberStream<InputStream, NumberCharacter,
             ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
                 ((parseFlags & kParseInsituFlag) == 0) :
                 ((parseFlags & kParseFullPrecisionFlag) != 0),
@@ -1678,10 +1694,10 @@ class GenericReader {
             }
             else {
                 SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
-                StringStream srcStream(s.Pop());
+                GenericStringStream<UTF8<NumberCharacter> > srcStream(s.Pop());
                 StackStream<typename TargetEncoding::Ch> dstStream(stack_);
                 while (numCharsToCopy--) {
-                    Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream);
+                    Transcoder<UTF8<typename TargetEncoding::Ch>, TargetEncoding>::Transcode(srcStream, dstStream);
                 }
                 dstStream.Put('\0');
                 const typename TargetEncoding::Ch* str = dstStream.Pop();
@@ -1691,7 +1707,7 @@ class GenericReader {
         }
         else {
            size_t length = s.Length();
-           const char* decimal = s.Pop();  // Pop stack no matter if it will be used or not.
+           const NumberCharacter* decimal = s.Pop();  // Pop stack no matter if it will be used or not.
 
            if (useDouble) {
                int p = exp + expFrac;
diff --git a/src/native/external/rapidjson/schema.h b/src/native/external/rapidjson/schema.h
deleted file mode 100644
index 26ae94748063..000000000000
--- a/src/native/external/rapidjson/schema.h
+++ /dev/null
@@ -1,2497 +0,0 @@
-// Tencent is pleased to support the open source community by making RapidJSON available->
-// 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip-> All rights reserved->
-//
-// Licensed under the MIT License (the "License"); you may not use this file except
-// in compliance with the License-> You may obtain a copy of the License at
-//
-// http://opensource->org/licenses/MIT
-//
-// Unless required by applicable law or agreed to in writing, software distributed 
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
-// CONDITIONS OF ANY KIND, either express or implied-> See the License for the 
-// specific language governing permissions and limitations under the License->
-
-#ifndef RAPIDJSON_SCHEMA_H_
-#define RAPIDJSON_SCHEMA_H_
-
-#include "document.h"
-#include "pointer.h"
-#include "stringbuffer.h"
-#include <cmath> // abs, floor
-
-#if !defined(RAPIDJSON_SCHEMA_USE_INTERNALREGEX)
-#define RAPIDJSON_SCHEMA_USE_INTERNALREGEX 1
-#else
-#define RAPIDJSON_SCHEMA_USE_INTERNALREGEX 0
-#endif
-
-#if !RAPIDJSON_SCHEMA_USE_INTERNALREGEX && defined(RAPIDJSON_SCHEMA_USE_STDREGEX) && (__cplusplus >=201103L || (defined(_MSC_VER) && _MSC_VER >= 1800))
-#define RAPIDJSON_SCHEMA_USE_STDREGEX 1
-#else
-#define RAPIDJSON_SCHEMA_USE_STDREGEX 0
-#endif
-
-#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX
-#include "internal/regex.h"
-#elif RAPIDJSON_SCHEMA_USE_STDREGEX
-#include <regex>
-#endif
-
-#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX || RAPIDJSON_SCHEMA_USE_STDREGEX
-#define RAPIDJSON_SCHEMA_HAS_REGEX 1
-#else
-#define RAPIDJSON_SCHEMA_HAS_REGEX 0
-#endif
-
-#ifndef RAPIDJSON_SCHEMA_VERBOSE
-#define RAPIDJSON_SCHEMA_VERBOSE 0
-#endif
-
-#if RAPIDJSON_SCHEMA_VERBOSE
-#include "stringbuffer.h"
-#endif
-
-RAPIDJSON_DIAG_PUSH
-
-#if defined(__GNUC__)
-RAPIDJSON_DIAG_OFF(effc++)
-#endif
-
-#ifdef __clang__
-RAPIDJSON_DIAG_OFF(weak-vtables)
-RAPIDJSON_DIAG_OFF(exit-time-destructors)
-RAPIDJSON_DIAG_OFF(c++98-compat-pedantic)
-RAPIDJSON_DIAG_OFF(variadic-macros)
-#elif defined(_MSC_VER)
-RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
-#endif
-
-RAPIDJSON_NAMESPACE_BEGIN
-
-///////////////////////////////////////////////////////////////////////////////
-// Verbose Utilities
-
-#if RAPIDJSON_SCHEMA_VERBOSE
-
-namespace internal {
-
-inline void PrintInvalidKeyword(const char* keyword) {
-    printf("Fail keyword: %s\n", keyword);
-}
-
-inline void PrintInvalidKeyword(const wchar_t* keyword) {
-    wprintf(L"Fail keyword: %ls\n", keyword);
-}
-
-inline void PrintInvalidDocument(const char* document) {
-    printf("Fail document: %s\n\n", document);
-}
-
-inline void PrintInvalidDocument(const wchar_t* document) {
-    wprintf(L"Fail document: %ls\n\n", document);
-}
-
-inline void PrintValidatorPointers(unsigned depth, const char* s, const char* d) {
-    printf("S: %*s%s\nD: %*s%s\n\n", depth * 4, " ", s, depth * 4, " ", d);
-}
-
-inline void PrintValidatorPointers(unsigned depth, const wchar_t* s, const wchar_t* d) {
-    wprintf(L"S: %*ls%ls\nD: %*ls%ls\n\n", depth * 4, L" ", s, depth * 4, L" ", d);
-}
-
-} // namespace internal
-
-#endif // RAPIDJSON_SCHEMA_VERBOSE
-
-///////////////////////////////////////////////////////////////////////////////
-// RAPIDJSON_INVALID_KEYWORD_RETURN
-
-#if RAPIDJSON_SCHEMA_VERBOSE
-#define RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword) internal::PrintInvalidKeyword(keyword)
-#else
-#define RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword)
-#endif
-
-#define RAPIDJSON_INVALID_KEYWORD_RETURN(keyword)\
-RAPIDJSON_MULTILINEMACRO_BEGIN\
-    context.invalidKeyword = keyword.GetString();\
-    RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword.GetString());\
-    return false;\
-RAPIDJSON_MULTILINEMACRO_END
-
-///////////////////////////////////////////////////////////////////////////////
-// Forward declarations
-
-template <typename ValueType, typename Allocator>
-class GenericSchemaDocument;
-
-namespace internal {
-
-template <typename SchemaDocumentType>
-class Schema;
-
-///////////////////////////////////////////////////////////////////////////////
-// ISchemaValidator
-
-class ISchemaValidator {
-public:
-    virtual ~ISchemaValidator() {}
-    virtual bool IsValid() const = 0;
-};
-
-///////////////////////////////////////////////////////////////////////////////
-// ISchemaStateFactory
-
-template <typename SchemaType>
-class ISchemaStateFactory {
-public:
-    virtual ~ISchemaStateFactory() {}
-    virtual ISchemaValidator* CreateSchemaValidator(const SchemaType&) = 0;
-    virtual void DestroySchemaValidator(ISchemaValidator* validator) = 0;
-    virtual void* CreateHasher() = 0;
-    virtual uint64_t GetHashCode(void* hasher) = 0;
-    virtual void DestroryHasher(void* hasher) = 0;
-    virtual void* MallocState(size_t size) = 0;
-    virtual void FreeState(void* p) = 0;
-};
-
-///////////////////////////////////////////////////////////////////////////////
-// IValidationErrorHandler
-
-template <typename SchemaType>
-class IValidationErrorHandler {
-public:
-    typedef typename SchemaType::Ch Ch;
-    typedef typename SchemaType::SValue SValue;
-
-    virtual ~IValidationErrorHandler() {}
-
-    virtual void NotMultipleOf(int64_t actual, const SValue& expected) = 0;
-    virtual void NotMultipleOf(uint64_t actual, const SValue& expected) = 0;
-    virtual void NotMultipleOf(double actual, const SValue& expected) = 0;
-    virtual void AboveMaximum(int64_t actual, const SValue& expected, bool exclusive) = 0;
-    virtual void AboveMaximum(uint64_t actual, const SValue& expected, bool exclusive) = 0;
-    virtual void AboveMaximum(double actual, const SValue& expected, bool exclusive) = 0;
-    virtual void BelowMinimum(int64_t actual, const SValue& expected, bool exclusive) = 0;
-    virtual void BelowMinimum(uint64_t actual, const SValue& expected, bool exclusive) = 0;
-    virtual void BelowMinimum(double actual, const SValue& expected, bool exclusive) = 0;
-
-    virtual void TooLong(const Ch* str, SizeType length, SizeType expected) = 0;
-    virtual void TooShort(const Ch* str, SizeType length, SizeType expected) = 0;
-    virtual void DoesNotMatch(const Ch* str, SizeType length) = 0;
-
-    virtual void DisallowedItem(SizeType index) = 0;
-    virtual void TooFewItems(SizeType actualCount, SizeType expectedCount) = 0;
-    virtual void TooManyItems(SizeType actualCount, SizeType expectedCount) = 0;
-    virtual void DuplicateItems(SizeType index1, SizeType index2) = 0;
-
-    virtual void TooManyProperties(SizeType actualCount, SizeType expectedCount) = 0;
-    virtual void TooFewProperties(SizeType actualCount, SizeType expectedCount) = 0;
-    virtual void StartMissingProperties() = 0;
-    virtual void AddMissingProperty(const SValue& name) = 0;
-    virtual bool EndMissingProperties() = 0;
-    virtual void PropertyViolations(ISchemaValidator** subvalidators, SizeType count) = 0;
-    virtual void DisallowedProperty(const Ch* name, SizeType length) = 0;
-
-    virtual void StartDependencyErrors() = 0;
-    virtual void StartMissingDependentProperties() = 0;
-    virtual void AddMissingDependentProperty(const SValue& targetName) = 0;
-    virtual void EndMissingDependentProperties(const SValue& sourceName) = 0;
-    virtual void AddDependencySchemaError(const SValue& souceName, ISchemaValidator* subvalidator) = 0;
-    virtual bool EndDependencyErrors() = 0;
-
-    virtual void DisallowedValue() = 0;
-    virtual void StartDisallowedType() = 0;
-    virtual void AddExpectedType(const typename SchemaType::ValueType& expectedType) = 0;
-    virtual void EndDisallowedType(const typename SchemaType::ValueType& actualType) = 0;
-    virtual void NotAllOf(ISchemaValidator** subvalidators, SizeType count) = 0;
-    virtual void NoneOf(ISchemaValidator** subvalidators, SizeType count) = 0;
-    virtual void NotOneOf(ISchemaValidator** subvalidators, SizeType count) = 0;
-    virtual void Disallowed() = 0;
-};
-
-
-///////////////////////////////////////////////////////////////////////////////
-// Hasher
-
-// For comparison of compound value
-template<typename Encoding, typename Allocator>
-class Hasher {
-public:
-    typedef typename Encoding::Ch Ch;
-
-    Hasher(Allocator* allocator = 0, size_t stackCapacity = kDefaultSize) : stack_(allocator, stackCapacity) {}
-
-    bool Null() { return WriteType(kNullType); }
-    bool Bool(bool b) { return WriteType(b ? kTrueType : kFalseType); }
-    bool Int(int i) { Number n; n.u.i = i; n.d = static_cast<double>(i); return WriteNumber(n); }
-    bool Uint(unsigned u) { Number n; n.u.u = u; n.d = static_cast<double>(u); return WriteNumber(n); }
-    bool Int64(int64_t i) { Number n; n.u.i = i; n.d = static_cast<double>(i); return WriteNumber(n); }
-    bool Uint64(uint64_t u) { Number n; n.u.u = u; n.d = static_cast<double>(u); return WriteNumber(n); }
-    bool Double(double d) { 
-        Number n; 
-        if (d < 0) n.u.i = static_cast<int64_t>(d);
-        else       n.u.u = static_cast<uint64_t>(d); 
-        n.d = d;
-        return WriteNumber(n);
-    }
-
-    bool RawNumber(const Ch* str, SizeType len, bool) {
-        WriteBuffer(kNumberType, str, len * sizeof(Ch));
-        return true;
-    }
-
-    bool String(const Ch* str, SizeType len, bool) {
-        WriteBuffer(kStringType, str, len * sizeof(Ch));
-        return true;
-    }
-
-    bool StartObject() { return true; }
-    bool Key(const Ch* str, SizeType len, bool copy) { return String(str, len, copy); }
-    bool EndObject(SizeType memberCount) { 
-        uint64_t h = Hash(0, kObjectType);
-        uint64_t* kv = stack_.template Pop<uint64_t>(memberCount * 2);
-        for (SizeType i = 0; i < memberCount; i++)
-            h ^= Hash(kv[i * 2], kv[i * 2 + 1]);  // Use xor to achieve member order insensitive
-        *stack_.template Push<uint64_t>() = h;
-        return true;
-    }
-    
-    bool StartArray() { return true; }
-    bool EndArray(SizeType elementCount) { 
-        uint64_t h = Hash(0, kArrayType);
-        uint64_t* e = stack_.template Pop<uint64_t>(elementCount);
-        for (SizeType i = 0; i < elementCount; i++)
-            h = Hash(h, e[i]); // Use hash to achieve element order sensitive
-        *stack_.template Push<uint64_t>() = h;
-        return true;
-    }
-
-    bool IsValid() const { return stack_.GetSize() == sizeof(uint64_t); }
-
-    uint64_t GetHashCode() const {
-        RAPIDJSON_ASSERT(IsValid());
-        return *stack_.template Top<uint64_t>();
-    }
-
-private:
-    static const size_t kDefaultSize = 256;
-    struct Number {
-        union U {
-            uint64_t u;
-            int64_t i;
-        }u;
-        double d;
-    };
-
-    bool WriteType(Type type) { return WriteBuffer(type, 0, 0); }
-    
-    bool WriteNumber(const Number& n) { return WriteBuffer(kNumberType, &n, sizeof(n)); }
-    
-    bool WriteBuffer(Type type, const void* data, size_t len) {
-        // FNV-1a from http://isthe.com/chongo/tech/comp/fnv/
-        uint64_t h = Hash(RAPIDJSON_UINT64_C2(0x84222325, 0xcbf29ce4), type);
-        const unsigned char* d = static_cast<const unsigned char*>(data);
-        for (size_t i = 0; i < len; i++)
-            h = Hash(h, d[i]);
-        *stack_.template Push<uint64_t>() = h;
-        return true;
-    }
-
-    static uint64_t Hash(uint64_t h, uint64_t d) {
-        static const uint64_t kPrime = RAPIDJSON_UINT64_C2(0x00000100, 0x000001b3);
-        h ^= d;
-        h *= kPrime;
-        return h;
-    }
-
-    Stack<Allocator> stack_;
-};
-
-///////////////////////////////////////////////////////////////////////////////
-// SchemaValidationContext
-
-template <typename SchemaDocumentType>
-struct SchemaValidationContext {
-    typedef Schema<SchemaDocumentType> SchemaType;
-    typedef ISchemaStateFactory<SchemaType> SchemaValidatorFactoryType;
-    typedef IValidationErrorHandler<SchemaType> ErrorHandlerType;
-    typedef typename SchemaType::ValueType ValueType;
-    typedef typename ValueType::Ch Ch;
-
-    enum PatternValidatorType {
-        kPatternValidatorOnly,
-        kPatternValidatorWithProperty,
-        kPatternValidatorWithAdditionalProperty
-    };
-
-    SchemaValidationContext(SchemaValidatorFactoryType& f, ErrorHandlerType& eh, const SchemaType* s) :
-        factory(f),
-        error_handler(eh),
-        schema(s),
-        valueSchema(),
-        invalidKeyword(),
-        hasher(),
-        arrayElementHashCodes(),
-        validators(),
-        validatorCount(),
-        patternPropertiesValidators(),
-        patternPropertiesValidatorCount(),
-        patternPropertiesSchemas(),
-        patternPropertiesSchemaCount(),
-        valuePatternValidatorType(kPatternValidatorOnly),
-        propertyExist(),
-        inArray(false),
-        valueUniqueness(false),
-        arrayUniqueness(false)
-    {
-    }
-
-    ~SchemaValidationContext() {
-        if (hasher)
-            factory.DestroryHasher(hasher);
-        if (validators) {
-            for (SizeType i = 0; i < validatorCount; i++)
-                factory.DestroySchemaValidator(validators[i]);
-            factory.FreeState(validators);
-        }
-        if (patternPropertiesValidators) {
-            for (SizeType i = 0; i < patternPropertiesValidatorCount; i++)
-                factory.DestroySchemaValidator(patternPropertiesValidators[i]);
-            factory.FreeState(patternPropertiesValidators);
-        }
-        if (patternPropertiesSchemas)
-            factory.FreeState(patternPropertiesSchemas);
-        if (propertyExist)
-            factory.FreeState(propertyExist);
-    }
-
-    SchemaValidatorFactoryType& factory;
-    ErrorHandlerType& error_handler;
-    const SchemaType* schema;
-    const SchemaType* valueSchema;
-    const Ch* invalidKeyword;
-    void* hasher; // Only validator access
-    void* arrayElementHashCodes; // Only validator access this
-    ISchemaValidator** validators;
-    SizeType validatorCount;
-    ISchemaValidator** patternPropertiesValidators;
-    SizeType patternPropertiesValidatorCount;
-    const SchemaType** patternPropertiesSchemas;
-    SizeType patternPropertiesSchemaCount;
-    PatternValidatorType valuePatternValidatorType;
-    PatternValidatorType objectPatternValidatorType;
-    SizeType arrayElementIndex;
-    bool* propertyExist;
-    bool inArray;
-    bool valueUniqueness;
-    bool arrayUniqueness;
-};
-
-///////////////////////////////////////////////////////////////////////////////
-// Schema
-
-template <typename SchemaDocumentType>
-class Schema {
-public:
-    typedef typename SchemaDocumentType::ValueType ValueType;
-    typedef typename SchemaDocumentType::AllocatorType AllocatorType;
-    typedef typename SchemaDocumentType::PointerType PointerType;
-    typedef typename ValueType::EncodingType EncodingType;
-    typedef typename EncodingType::Ch Ch;
-    typedef SchemaValidationContext<SchemaDocumentType> Context;
-    typedef Schema<SchemaDocumentType> SchemaType;
-    typedef GenericValue<EncodingType, AllocatorType> SValue;
-    typedef IValidationErrorHandler<Schema> ErrorHandler;
-    friend class GenericSchemaDocument<ValueType, AllocatorType>;
-
-    Schema(SchemaDocumentType* schemaDocument, const PointerType& p, const ValueType& value, const ValueType& document, AllocatorType* allocator) :
-        allocator_(allocator),
-        uri_(schemaDocument->GetURI(), *allocator),
-        pointer_(p, allocator),
-        typeless_(schemaDocument->GetTypeless()),
-        enum_(),
-        enumCount_(),
-        not_(),
-        type_((1 << kTotalSchemaType) - 1), // typeless
-        validatorCount_(),
-        notValidatorIndex_(),
-        properties_(),
-        additionalPropertiesSchema_(),
-        patternProperties_(),
-        patternPropertyCount_(),
-        propertyCount_(),
-        minProperties_(),
-        maxProperties_(SizeType(~0)),
-        additionalProperties_(true),
-        hasDependencies_(),
-        hasRequired_(),
-        hasSchemaDependencies_(),
-        additionalItemsSchema_(),
-        itemsList_(),
-        itemsTuple_(),
-        itemsTupleCount_(),
-        minItems_(),
-        maxItems_(SizeType(~0)),
-        additionalItems_(true),
-        uniqueItems_(false),
-        pattern_(),
-        minLength_(0),
-        maxLength_(~SizeType(0)),
-        exclusiveMinimum_(false),
-        exclusiveMaximum_(false),
-        defaultValueLength_(0)
-    {
-        typedef typename SchemaDocumentType::ValueType ValueType;
-        typedef typename ValueType::ConstValueIterator ConstValueIterator;
-        typedef typename ValueType::ConstMemberIterator ConstMemberIterator;
-
-        if (!value.IsObject())
-            return;
-
-        if (const ValueType* v = GetMember(value, GetTypeString())) {
-            type_ = 0;
-            if (v->IsString())
-                AddType(*v);
-            else if (v->IsArray())
-                for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr)
-                    AddType(*itr);
-        }
-
-        if (const ValueType* v = GetMember(value, GetEnumString()))
-            if (v->IsArray() && v->Size() > 0) {
-                enum_ = static_cast<uint64_t*>(allocator_->Malloc(sizeof(uint64_t) * v->Size()));
-                for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr) {
-                    typedef Hasher<EncodingType, MemoryPoolAllocator<> > EnumHasherType;
-                    char buffer[256u + 24];
-                    MemoryPoolAllocator<> hasherAllocator(buffer, sizeof(buffer));
-                    EnumHasherType h(&hasherAllocator, 256);
-                    itr->Accept(h);
-                    enum_[enumCount_++] = h.GetHashCode();
-                }
-            }
-
-        if (schemaDocument) {
-            AssignIfExist(allOf_, *schemaDocument, p, value, GetAllOfString(), document);
-            AssignIfExist(anyOf_, *schemaDocument, p, value, GetAnyOfString(), document);
-            AssignIfExist(oneOf_, *schemaDocument, p, value, GetOneOfString(), document);
-        }
-
-        if (const ValueType* v = GetMember(value, GetNotString())) {
-            schemaDocument->CreateSchema(&not_, p.Append(GetNotString(), allocator_), *v, document);
-            notValidatorIndex_ = validatorCount_;
-            validatorCount_++;
-        }
-
-        // Object
-
-        const ValueType* properties = GetMember(value, GetPropertiesString());
-        const ValueType* required = GetMember(value, GetRequiredString());
-        const ValueType* dependencies = GetMember(value, GetDependenciesString());
-        {
-            // Gather properties from properties/required/dependencies
-            SValue allProperties(kArrayType);
-
-            if (properties && properties->IsObject())
-                for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr)
-                    AddUniqueElement(allProperties, itr->name);
-            
-            if (required && required->IsArray())
-                for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr)
-                    if (itr->IsString())
-                        AddUniqueElement(allProperties, *itr);
-
-            if (dependencies && dependencies->IsObject())
-                for (ConstMemberIterator itr = dependencies->MemberBegin(); itr != dependencies->MemberEnd(); ++itr) {
-                    AddUniqueElement(allProperties, itr->name);
-                    if (itr->value.IsArray())
-                        for (ConstValueIterator i = itr->value.Begin(); i != itr->value.End(); ++i)
-                            if (i->IsString())
-                                AddUniqueElement(allProperties, *i);
-                }
-
-            if (allProperties.Size() > 0) {
-                propertyCount_ = allProperties.Size();
-                properties_ = static_cast<Property*>(allocator_->Malloc(sizeof(Property) * propertyCount_));
-                for (SizeType i = 0; i < propertyCount_; i++) {
-                    new (&properties_[i]) Property();
-                    properties_[i].name = allProperties[i];
-                    properties_[i].schema = typeless_;
-                }
-            }
-        }
-
-        if (properties && properties->IsObject()) {
-            PointerType q = p.Append(GetPropertiesString(), allocator_);
-            for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr) {
-                SizeType index;
-                if (FindPropertyIndex(itr->name, &index))
-                    schemaDocument->CreateSchema(&properties_[index].schema, q.Append(itr->name, allocator_), itr->value, document);
-            }
-        }
-
-        if (const ValueType* v = GetMember(value, GetPatternPropertiesString())) {
-            PointerType q = p.Append(GetPatternPropertiesString(), allocator_);
-            patternProperties_ = static_cast<PatternProperty*>(allocator_->Malloc(sizeof(PatternProperty) * v->MemberCount()));
-            patternPropertyCount_ = 0;
-
-            for (ConstMemberIterator itr = v->MemberBegin(); itr != v->MemberEnd(); ++itr) {
-                new (&patternProperties_[patternPropertyCount_]) PatternProperty();
-                patternProperties_[patternPropertyCount_].pattern = CreatePattern(itr->name);
-                schemaDocument->CreateSchema(&patternProperties_[patternPropertyCount_].schema, q.Append(itr->name, allocator_), itr->value, document);
-                patternPropertyCount_++;
-            }
-        }
-
-        if (required && required->IsArray())
-            for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr)
-                if (itr->IsString()) {
-                    SizeType index;
-                    if (FindPropertyIndex(*itr, &index)) {
-                        properties_[index].required = true;
-                        hasRequired_ = true;
-                    }
-                }
-
-        if (dependencies && dependencies->IsObject()) {
-            PointerType q = p.Append(GetDependenciesString(), allocator_);
-            hasDependencies_ = true;
-            for (ConstMemberIterator itr = dependencies->MemberBegin(); itr != dependencies->MemberEnd(); ++itr) {
-                SizeType sourceIndex;
-                if (FindPropertyIndex(itr->name, &sourceIndex)) {
-                    if (itr->value.IsArray()) {
-                        properties_[sourceIndex].dependencies = static_cast<bool*>(allocator_->Malloc(sizeof(bool) * propertyCount_));
-                        std::memset(properties_[sourceIndex].dependencies, 0, sizeof(bool)* propertyCount_);
-                        for (ConstValueIterator targetItr = itr->value.Begin(); targetItr != itr->value.End(); ++targetItr) {
-                            SizeType targetIndex;
-                            if (FindPropertyIndex(*targetItr, &targetIndex))
-                                properties_[sourceIndex].dependencies[targetIndex] = true;
-                        }
-                    }
-                    else if (itr->value.IsObject()) {
-                        hasSchemaDependencies_ = true;
-                        schemaDocument->CreateSchema(&properties_[sourceIndex].dependenciesSchema, q.Append(itr->name, allocator_), itr->value, document);
-                        properties_[sourceIndex].dependenciesValidatorIndex = validatorCount_;
-                        validatorCount_++;
-                    }
-                }
-            }
-        }
-
-        if (const ValueType* v = GetMember(value, GetAdditionalPropertiesString())) {
-            if (v->IsBool())
-                additionalProperties_ = v->GetBool();
-            else if (v->IsObject())
-                schemaDocument->CreateSchema(&additionalPropertiesSchema_, p.Append(GetAdditionalPropertiesString(), allocator_), *v, document);
-        }
-
-        AssignIfExist(minProperties_, value, GetMinPropertiesString());
-        AssignIfExist(maxProperties_, value, GetMaxPropertiesString());
-
-        // Array
-        if (const ValueType* v = GetMember(value, GetItemsString())) {
-            PointerType q = p.Append(GetItemsString(), allocator_);
-            if (v->IsObject()) // List validation
-                schemaDocument->CreateSchema(&itemsList_, q, *v, document);
-            else if (v->IsArray()) { // Tuple validation
-                itemsTuple_ = static_cast<const Schema**>(allocator_->Malloc(sizeof(const Schema*) * v->Size()));
-                SizeType index = 0;
-                for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr, index++)
-                    schemaDocument->CreateSchema(&itemsTuple_[itemsTupleCount_++], q.Append(index, allocator_), *itr, document);
-            }
-        }
-
-        AssignIfExist(minItems_, value, GetMinItemsString());
-        AssignIfExist(maxItems_, value, GetMaxItemsString());
-
-        if (const ValueType* v = GetMember(value, GetAdditionalItemsString())) {
-            if (v->IsBool())
-                additionalItems_ = v->GetBool();
-            else if (v->IsObject())
-                schemaDocument->CreateSchema(&additionalItemsSchema_, p.Append(GetAdditionalItemsString(), allocator_), *v, document);
-        }
-
-        AssignIfExist(uniqueItems_, value, GetUniqueItemsString());
-
-        // String
-        AssignIfExist(minLength_, value, GetMinLengthString());
-        AssignIfExist(maxLength_, value, GetMaxLengthString());
-
-        if (const ValueType* v = GetMember(value, GetPatternString()))
-            pattern_ = CreatePattern(*v);
-
-        // Number
-        if (const ValueType* v = GetMember(value, GetMinimumString()))
-            if (v->IsNumber())
-                minimum_.CopyFrom(*v, *allocator_);
-
-        if (const ValueType* v = GetMember(value, GetMaximumString()))
-            if (v->IsNumber())
-                maximum_.CopyFrom(*v, *allocator_);
-
-        AssignIfExist(exclusiveMinimum_, value, GetExclusiveMinimumString());
-        AssignIfExist(exclusiveMaximum_, value, GetExclusiveMaximumString());
-
-        if (const ValueType* v = GetMember(value, GetMultipleOfString()))
-            if (v->IsNumber() && v->GetDouble() > 0.0)
-                multipleOf_.CopyFrom(*v, *allocator_);
-
-        // Default
-        if (const ValueType* v = GetMember(value, GetDefaultValueString()))
-            if (v->IsString())
-                defaultValueLength_ = v->GetStringLength();
-
-    }
-
-    ~Schema() {
-        AllocatorType::Free(enum_);
-        if (properties_) {
-            for (SizeType i = 0; i < propertyCount_; i++)
-                properties_[i].~Property();
-            AllocatorType::Free(properties_);
-        }
-        if (patternProperties_) {
-            for (SizeType i = 0; i < patternPropertyCount_; i++)
-                patternProperties_[i].~PatternProperty();
-            AllocatorType::Free(patternProperties_);
-        }
-        AllocatorType::Free(itemsTuple_);
-#if RAPIDJSON_SCHEMA_HAS_REGEX
-        if (pattern_) {
-            pattern_->~RegexType();
-            AllocatorType::Free(pattern_);
-        }
-#endif
-    }
-
-    const SValue& GetURI() const {
-        return uri_;
-    }
-
-    const PointerType& GetPointer() const {
-        return pointer_;
-    }
-
-    bool BeginValue(Context& context) const {
-        if (context.inArray) {
-            if (uniqueItems_)
-                context.valueUniqueness = true;
-
-            if (itemsList_)
-                context.valueSchema = itemsList_;
-            else if (itemsTuple_) {
-                if (context.arrayElementIndex < itemsTupleCount_)
-                    context.valueSchema = itemsTuple_[context.arrayElementIndex];
-                else if (additionalItemsSchema_)
-                    context.valueSchema = additionalItemsSchema_;
-                else if (additionalItems_)
-                    context.valueSchema = typeless_;
-                else {
-                    context.error_handler.DisallowedItem(context.arrayElementIndex);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetItemsString());
-                }
-            }
-            else
-                context.valueSchema = typeless_;
-
-            context.arrayElementIndex++;
-        }
-        return true;
-    }
-
-    RAPIDJSON_FORCEINLINE bool EndValue(Context& context) const {
-        if (context.patternPropertiesValidatorCount > 0) {
-            bool otherValid = false;
-            SizeType count = context.patternPropertiesValidatorCount;
-            if (context.objectPatternValidatorType != Context::kPatternValidatorOnly)
-                otherValid = context.patternPropertiesValidators[--count]->IsValid();
-
-            bool patternValid = true;
-            for (SizeType i = 0; i < count; i++)
-                if (!context.patternPropertiesValidators[i]->IsValid()) {
-                    patternValid = false;
-                    break;
-                }
-
-            if (context.objectPatternValidatorType == Context::kPatternValidatorOnly) {
-                if (!patternValid) {
-                    context.error_handler.PropertyViolations(context.patternPropertiesValidators, count);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString());
-                }
-            }
-            else if (context.objectPatternValidatorType == Context::kPatternValidatorWithProperty) {
-                if (!patternValid || !otherValid) {
-                    context.error_handler.PropertyViolations(context.patternPropertiesValidators, count + 1);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString());
-                }
-            }
-            else if (!patternValid && !otherValid) { // kPatternValidatorWithAdditionalProperty)
-                context.error_handler.PropertyViolations(context.patternPropertiesValidators, count + 1);
-                RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString());
-            }
-        }
-
-        if (enum_) {
-            const uint64_t h = context.factory.GetHashCode(context.hasher);
-            for (SizeType i = 0; i < enumCount_; i++)
-                if (enum_[i] == h)
-                    goto foundEnum;
-            context.error_handler.DisallowedValue();
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetEnumString());
-            foundEnum:;
-        }
-
-        if (allOf_.schemas)
-            for (SizeType i = allOf_.begin; i < allOf_.begin + allOf_.count; i++)
-                if (!context.validators[i]->IsValid()) {
-                    context.error_handler.NotAllOf(&context.validators[allOf_.begin], allOf_.count);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetAllOfString());
-                }
-        
-        if (anyOf_.schemas) {
-            for (SizeType i = anyOf_.begin; i < anyOf_.begin + anyOf_.count; i++)
-                if (context.validators[i]->IsValid())
-                    goto foundAny;
-            context.error_handler.NoneOf(&context.validators[anyOf_.begin], anyOf_.count);
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetAnyOfString());
-            foundAny:;
-        }
-
-        if (oneOf_.schemas) {
-            bool oneValid = false;
-            for (SizeType i = oneOf_.begin; i < oneOf_.begin + oneOf_.count; i++)
-                if (context.validators[i]->IsValid()) {
-                    if (oneValid) {
-                        context.error_handler.NotOneOf(&context.validators[oneOf_.begin], oneOf_.count);
-                        RAPIDJSON_INVALID_KEYWORD_RETURN(GetOneOfString());
-                    } else
-                        oneValid = true;
-                }
-            if (!oneValid) {
-                context.error_handler.NotOneOf(&context.validators[oneOf_.begin], oneOf_.count);
-                RAPIDJSON_INVALID_KEYWORD_RETURN(GetOneOfString());
-            }
-        }
-
-        if (not_ && context.validators[notValidatorIndex_]->IsValid()) {
-            context.error_handler.Disallowed();
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetNotString());
-        }
-
-        return true;
-    }
-
-    bool Null(Context& context) const {
-        if (!(type_ & (1 << kNullSchemaType))) {
-            DisallowedType(context, GetNullString());
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
-        }
-        return CreateParallelValidator(context);
-    }
-    
-    bool Bool(Context& context, bool) const {
-        if (!(type_ & (1 << kBooleanSchemaType))) {
-            DisallowedType(context, GetBooleanString());
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
-        }
-        return CreateParallelValidator(context);
-    }
-
-    bool Int(Context& context, int i) const {
-        if (!CheckInt(context, i))
-            return false;
-        return CreateParallelValidator(context);
-    }
-
-    bool Uint(Context& context, unsigned u) const {
-        if (!CheckUint(context, u))
-            return false;
-        return CreateParallelValidator(context);
-    }
-
-    bool Int64(Context& context, int64_t i) const {
-        if (!CheckInt(context, i))
-            return false;
-        return CreateParallelValidator(context);
-    }
-
-    bool Uint64(Context& context, uint64_t u) const {
-        if (!CheckUint(context, u))
-            return false;
-        return CreateParallelValidator(context);
-    }
-
-    bool Double(Context& context, double d) const {
-        if (!(type_ & (1 << kNumberSchemaType))) {
-            DisallowedType(context, GetNumberString());
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
-        }
-
-        if (!minimum_.IsNull() && !CheckDoubleMinimum(context, d))
-            return false;
-
-        if (!maximum_.IsNull() && !CheckDoubleMaximum(context, d))
-            return false;
-        
-        if (!multipleOf_.IsNull() && !CheckDoubleMultipleOf(context, d))
-            return false;
-        
-        return CreateParallelValidator(context);
-    }
-    
-    bool String(Context& context, const Ch* str, SizeType length, bool) const {
-        if (!(type_ & (1 << kStringSchemaType))) {
-            DisallowedType(context, GetStringString());
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
-        }
-
-        if (minLength_ != 0 || maxLength_ != SizeType(~0)) {
-            SizeType count;
-            if (internal::CountStringCodePoint<EncodingType>(str, length, &count)) {
-                if (count < minLength_) {
-                    context.error_handler.TooShort(str, length, minLength_);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinLengthString());
-                }
-                if (count > maxLength_) {
-                    context.error_handler.TooLong(str, length, maxLength_);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxLengthString());
-                }
-            }
-        }
-
-        if (pattern_ && !IsPatternMatch(pattern_, str, length)) {
-            context.error_handler.DoesNotMatch(str, length);
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternString());
-        }
-
-        return CreateParallelValidator(context);
-    }
-
-    bool StartObject(Context& context) const {
-        if (!(type_ & (1 << kObjectSchemaType))) {
-            DisallowedType(context, GetObjectString());
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
-        }
-
-        if (hasDependencies_ || hasRequired_) {
-            context.propertyExist = static_cast<bool*>(context.factory.MallocState(sizeof(bool) * propertyCount_));
-            std::memset(context.propertyExist, 0, sizeof(bool) * propertyCount_);
-        }
-
-        if (patternProperties_) { // pre-allocate schema array
-            SizeType count = patternPropertyCount_ + 1; // extra for valuePatternValidatorType
-            context.patternPropertiesSchemas = static_cast<const SchemaType**>(context.factory.MallocState(sizeof(const SchemaType*) * count));
-            context.patternPropertiesSchemaCount = 0;
-            std::memset(context.patternPropertiesSchemas, 0, sizeof(SchemaType*) * count);
-        }
-
-        return CreateParallelValidator(context);
-    }
-    
-    bool Key(Context& context, const Ch* str, SizeType len, bool) const {
-        if (patternProperties_) {
-            context.patternPropertiesSchemaCount = 0;
-            for (SizeType i = 0; i < patternPropertyCount_; i++)
-                if (patternProperties_[i].pattern && IsPatternMatch(patternProperties_[i].pattern, str, len)) {
-                    context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = patternProperties_[i].schema;
-                    context.valueSchema = typeless_;
-                }
-        }
-
-        SizeType index;
-        if (FindPropertyIndex(ValueType(str, len).Move(), &index)) {
-            if (context.patternPropertiesSchemaCount > 0) {
-                context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = properties_[index].schema;
-                context.valueSchema = typeless_;
-                context.valuePatternValidatorType = Context::kPatternValidatorWithProperty;
-            }
-            else
-                context.valueSchema = properties_[index].schema;
-
-            if (context.propertyExist)
-                context.propertyExist[index] = true;
-
-            return true;
-        }
-
-        if (additionalPropertiesSchema_) {
-            if (additionalPropertiesSchema_ && context.patternPropertiesSchemaCount > 0) {
-                context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = additionalPropertiesSchema_;
-                context.valueSchema = typeless_;
-                context.valuePatternValidatorType = Context::kPatternValidatorWithAdditionalProperty;
-            }
-            else
-                context.valueSchema = additionalPropertiesSchema_;
-            return true;
-        }
-        else if (additionalProperties_) {
-            context.valueSchema = typeless_;
-            return true;
-        }
-
-        if (context.patternPropertiesSchemaCount == 0) { // patternProperties are not additional properties
-            context.error_handler.DisallowedProperty(str, len);
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetAdditionalPropertiesString());
-        }
-
-        return true;
-    }
-
-    bool EndObject(Context& context, SizeType memberCount) const {
-        if (hasRequired_) {
-            context.error_handler.StartMissingProperties();
-            for (SizeType index = 0; index < propertyCount_; index++)
-                if (properties_[index].required && !context.propertyExist[index])
-                    if (properties_[index].schema->defaultValueLength_ == 0 )
-                        context.error_handler.AddMissingProperty(properties_[index].name);
-            if (context.error_handler.EndMissingProperties())
-                RAPIDJSON_INVALID_KEYWORD_RETURN(GetRequiredString());
-        }
-
-        if (memberCount < minProperties_) {
-            context.error_handler.TooFewProperties(memberCount, minProperties_);
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinPropertiesString());
-        }
-
-        if (memberCount > maxProperties_) {
-            context.error_handler.TooManyProperties(memberCount, maxProperties_);
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxPropertiesString());
-        }
-
-        if (hasDependencies_) {
-            context.error_handler.StartDependencyErrors();
-            for (SizeType sourceIndex = 0; sourceIndex < propertyCount_; sourceIndex++) {
-                const Property& source = properties_[sourceIndex];
-                if (context.propertyExist[sourceIndex]) {
-                    if (source.dependencies) {
-                        context.error_handler.StartMissingDependentProperties();
-                        for (SizeType targetIndex = 0; targetIndex < propertyCount_; targetIndex++)
-                            if (source.dependencies[targetIndex] && !context.propertyExist[targetIndex])
-                                context.error_handler.AddMissingDependentProperty(properties_[targetIndex].name);
-                        context.error_handler.EndMissingDependentProperties(source.name);
-                    }
-                    else if (source.dependenciesSchema) {
-                        ISchemaValidator* dependenciesValidator = context.validators[source.dependenciesValidatorIndex];
-                        if (!dependenciesValidator->IsValid())
-                            context.error_handler.AddDependencySchemaError(source.name, dependenciesValidator);
-                    }
-                }
-            }
-            if (context.error_handler.EndDependencyErrors())
-                RAPIDJSON_INVALID_KEYWORD_RETURN(GetDependenciesString());
-        }
-
-        return true;
-    }
-
-    bool StartArray(Context& context) const {
-        if (!(type_ & (1 << kArraySchemaType))) {
-            DisallowedType(context, GetArrayString());
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
-        }
-
-        context.arrayElementIndex = 0;
-        context.inArray = true;
-
-        return CreateParallelValidator(context);
-    }
-
-    bool EndArray(Context& context, SizeType elementCount) const {
-        context.inArray = false;
-        
-        if (elementCount < minItems_) {
-            context.error_handler.TooFewItems(elementCount, minItems_);
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinItemsString());
-        }
-        
-        if (elementCount > maxItems_) {
-            context.error_handler.TooManyItems(elementCount, maxItems_);
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxItemsString());
-        }
-
-        return true;
-    }
-
-    // Generate functions for string literal according to Ch
-#define RAPIDJSON_STRING_(name, ...) \
-    static const ValueType& Get##name##String() {\
-        static const Ch s[] = { __VA_ARGS__, '\0' };\
-        static const ValueType v(s, static_cast<SizeType>(sizeof(s) / sizeof(Ch) - 1));\
-        return v;\
-    }
-
-    RAPIDJSON_STRING_(Null, 'n', 'u', 'l', 'l')
-    RAPIDJSON_STRING_(Boolean, 'b', 'o', 'o', 'l', 'e', 'a', 'n')
-    RAPIDJSON_STRING_(Object, 'o', 'b', 'j', 'e', 'c', 't')
-    RAPIDJSON_STRING_(Array, 'a', 'r', 'r', 'a', 'y')
-    RAPIDJSON_STRING_(String, 's', 't', 'r', 'i', 'n', 'g')
-    RAPIDJSON_STRING_(Number, 'n', 'u', 'm', 'b', 'e', 'r')
-    RAPIDJSON_STRING_(Integer, 'i', 'n', 't', 'e', 'g', 'e', 'r')
-    RAPIDJSON_STRING_(Type, 't', 'y', 'p', 'e')
-    RAPIDJSON_STRING_(Enum, 'e', 'n', 'u', 'm')
-    RAPIDJSON_STRING_(AllOf, 'a', 'l', 'l', 'O', 'f')
-    RAPIDJSON_STRING_(AnyOf, 'a', 'n', 'y', 'O', 'f')
-    RAPIDJSON_STRING_(OneOf, 'o', 'n', 'e', 'O', 'f')
-    RAPIDJSON_STRING_(Not, 'n', 'o', 't')
-    RAPIDJSON_STRING_(Properties, 'p', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
-    RAPIDJSON_STRING_(Required, 'r', 'e', 'q', 'u', 'i', 'r', 'e', 'd')
-    RAPIDJSON_STRING_(Dependencies, 'd', 'e', 'p', 'e', 'n', 'd', 'e', 'n', 'c', 'i', 'e', 's')
-    RAPIDJSON_STRING_(PatternProperties, 'p', 'a', 't', 't', 'e', 'r', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
-    RAPIDJSON_STRING_(AdditionalProperties, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
-    RAPIDJSON_STRING_(MinProperties, 'm', 'i', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
-    RAPIDJSON_STRING_(MaxProperties, 'm', 'a', 'x', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
-    RAPIDJSON_STRING_(Items, 'i', 't', 'e', 'm', 's')
-    RAPIDJSON_STRING_(MinItems, 'm', 'i', 'n', 'I', 't', 'e', 'm', 's')
-    RAPIDJSON_STRING_(MaxItems, 'm', 'a', 'x', 'I', 't', 'e', 'm', 's')
-    RAPIDJSON_STRING_(AdditionalItems, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'I', 't', 'e', 'm', 's')
-    RAPIDJSON_STRING_(UniqueItems, 'u', 'n', 'i', 'q', 'u', 'e', 'I', 't', 'e', 'm', 's')
-    RAPIDJSON_STRING_(MinLength, 'm', 'i', 'n', 'L', 'e', 'n', 'g', 't', 'h')
-    RAPIDJSON_STRING_(MaxLength, 'm', 'a', 'x', 'L', 'e', 'n', 'g', 't', 'h')
-    RAPIDJSON_STRING_(Pattern, 'p', 'a', 't', 't', 'e', 'r', 'n')
-    RAPIDJSON_STRING_(Minimum, 'm', 'i', 'n', 'i', 'm', 'u', 'm')
-    RAPIDJSON_STRING_(Maximum, 'm', 'a', 'x', 'i', 'm', 'u', 'm')
-    RAPIDJSON_STRING_(ExclusiveMinimum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'i', 'n', 'i', 'm', 'u', 'm')
-    RAPIDJSON_STRING_(ExclusiveMaximum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'a', 'x', 'i', 'm', 'u', 'm')
-    RAPIDJSON_STRING_(MultipleOf, 'm', 'u', 'l', 't', 'i', 'p', 'l', 'e', 'O', 'f')
-    RAPIDJSON_STRING_(DefaultValue, 'd', 'e', 'f', 'a', 'u', 'l', 't')
-
-#undef RAPIDJSON_STRING_
-
-private:
-    enum SchemaValueType {
-        kNullSchemaType,
-        kBooleanSchemaType,
-        kObjectSchemaType,
-        kArraySchemaType,
-        kStringSchemaType,
-        kNumberSchemaType,
-        kIntegerSchemaType,
-        kTotalSchemaType
-    };
-
-#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX
-        typedef internal::GenericRegex<EncodingType, AllocatorType> RegexType;
-#elif RAPIDJSON_SCHEMA_USE_STDREGEX
-        typedef std::basic_regex<Ch> RegexType;
-#else
-        typedef char RegexType;
-#endif
-
-    struct SchemaArray {
-        SchemaArray() : schemas(), count() {}
-        ~SchemaArray() { AllocatorType::Free(schemas); }
-        const SchemaType** schemas;
-        SizeType begin; // begin index of context.validators
-        SizeType count;
-    };
-
-    template <typename V1, typename V2>
-    void AddUniqueElement(V1& a, const V2& v) {
-        for (typename V1::ConstValueIterator itr = a.Begin(); itr != a.End(); ++itr)
-            if (*itr == v)
-                return;
-        V1 c(v, *allocator_);
-        a.PushBack(c, *allocator_);
-    }
-
-    static const ValueType* GetMember(const ValueType& value, const ValueType& name) {
-        typename ValueType::ConstMemberIterator itr = value.FindMember(name);
-        return itr != value.MemberEnd() ? &(itr->value) : 0;
-    }
-
-    static void AssignIfExist(bool& out, const ValueType& value, const ValueType& name) {
-        if (const ValueType* v = GetMember(value, name))
-            if (v->IsBool())
-                out = v->GetBool();
-    }
-
-    static void AssignIfExist(SizeType& out, const ValueType& value, const ValueType& name) {
-        if (const ValueType* v = GetMember(value, name))
-            if (v->IsUint64() && v->GetUint64() <= SizeType(~0))
-                out = static_cast<SizeType>(v->GetUint64());
-    }
-
-    void AssignIfExist(SchemaArray& out, SchemaDocumentType& schemaDocument, const PointerType& p, const ValueType& value, const ValueType& name, const ValueType& document) {
-        if (const ValueType* v = GetMember(value, name)) {
-            if (v->IsArray() && v->Size() > 0) {
-                PointerType q = p.Append(name, allocator_);
-                out.count = v->Size();
-                out.schemas = static_cast<const Schema**>(allocator_->Malloc(out.count * sizeof(const Schema*)));
-                memset(out.schemas, 0, sizeof(Schema*)* out.count);
-                for (SizeType i = 0; i < out.count; i++)
-                    schemaDocument.CreateSchema(&out.schemas[i], q.Append(i, allocator_), (*v)[i], document);
-                out.begin = validatorCount_;
-                validatorCount_ += out.count;
-            }
-        }
-    }
-
-#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX
-    template <typename ValueType>
-    RegexType* CreatePattern(const ValueType& value) {
-        if (value.IsString()) {
-            RegexType* r = new (allocator_->Malloc(sizeof(RegexType))) RegexType(value.GetString(), allocator_);
-            if (!r->IsValid()) {
-                r->~RegexType();
-                AllocatorType::Free(r);
-                r = 0;
-            }
-            return r;
-        }
-        return 0;
-    }
-
-    static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType) {
-        GenericRegexSearch<RegexType> rs(*pattern);
-        return rs.Search(str);
-    }
-#elif RAPIDJSON_SCHEMA_USE_STDREGEX
-    template <typename ValueType>
-    RegexType* CreatePattern(const ValueType& value) {
-        if (value.IsString()) {
-            RegexType *r = static_cast<RegexType*>(allocator_->Malloc(sizeof(RegexType)));
-            try {
-                return new (r) RegexType(value.GetString(), std::size_t(value.GetStringLength()), std::regex_constants::ECMAScript);
-            }
-            catch (const std::regex_error&) {
-                AllocatorType::Free(r);
-            }
-        }
-        return 0;
-    }
-
-    static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType length) {
-        std::match_results<const Ch*> r;
-        return std::regex_search(str, str + length, r, *pattern);
-    }
-#else
-    template <typename ValueType>
-    RegexType* CreatePattern(const ValueType&) { return 0; }
-
-    static bool IsPatternMatch(const RegexType*, const Ch *, SizeType) { return true; }
-#endif // RAPIDJSON_SCHEMA_USE_STDREGEX
-
-    void AddType(const ValueType& type) {
-        if      (type == GetNullString()   ) type_ |= 1 << kNullSchemaType;
-        else if (type == GetBooleanString()) type_ |= 1 << kBooleanSchemaType;
-        else if (type == GetObjectString() ) type_ |= 1 << kObjectSchemaType;
-        else if (type == GetArrayString()  ) type_ |= 1 << kArraySchemaType;
-        else if (type == GetStringString() ) type_ |= 1 << kStringSchemaType;
-        else if (type == GetIntegerString()) type_ |= 1 << kIntegerSchemaType;
-        else if (type == GetNumberString() ) type_ |= (1 << kNumberSchemaType) | (1 << kIntegerSchemaType);
-    }
-
-    bool CreateParallelValidator(Context& context) const {
-        if (enum_ || context.arrayUniqueness)
-            context.hasher = context.factory.CreateHasher();
-
-        if (validatorCount_) {
-            RAPIDJSON_ASSERT(context.validators == 0);
-            context.validators = static_cast<ISchemaValidator**>(context.factory.MallocState(sizeof(ISchemaValidator*) * validatorCount_));
-            context.validatorCount = validatorCount_;
-
-            if (allOf_.schemas)
-                CreateSchemaValidators(context, allOf_);
-
-            if (anyOf_.schemas)
-                CreateSchemaValidators(context, anyOf_);
-            
-            if (oneOf_.schemas)
-                CreateSchemaValidators(context, oneOf_);
-            
-            if (not_)
-                context.validators[notValidatorIndex_] = context.factory.CreateSchemaValidator(*not_);
-            
-            if (hasSchemaDependencies_) {
-                for (SizeType i = 0; i < propertyCount_; i++)
-                    if (properties_[i].dependenciesSchema)
-                        context.validators[properties_[i].dependenciesValidatorIndex] = context.factory.CreateSchemaValidator(*properties_[i].dependenciesSchema);
-            }
-        }
-
-        return true;
-    }
-
-    void CreateSchemaValidators(Context& context, const SchemaArray& schemas) const {
-        for (SizeType i = 0; i < schemas.count; i++)
-            context.validators[schemas.begin + i] = context.factory.CreateSchemaValidator(*schemas.schemas[i]);
-    }
-
-    // O(n)
-    bool FindPropertyIndex(const ValueType& name, SizeType* outIndex) const {
-        SizeType len = name.GetStringLength();
-        const Ch* str = name.GetString();
-        for (SizeType index = 0; index < propertyCount_; index++)
-            if (properties_[index].name.GetStringLength() == len && 
-                (std::memcmp(properties_[index].name.GetString(), str, sizeof(Ch) * len) == 0))
-            {
-                *outIndex = index;
-                return true;
-            }
-        return false;
-    }
-
-    bool CheckInt(Context& context, int64_t i) const {
-        if (!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType)))) {
-            DisallowedType(context, GetIntegerString());
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
-        }
-
-        if (!minimum_.IsNull()) {
-            if (minimum_.IsInt64()) {
-                if (exclusiveMinimum_ ? i <= minimum_.GetInt64() : i < minimum_.GetInt64()) {
-                    context.error_handler.BelowMinimum(i, minimum_, exclusiveMinimum_);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString());
-                }
-            }
-            else if (minimum_.IsUint64()) {
-                context.error_handler.BelowMinimum(i, minimum_, exclusiveMinimum_);
-                RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString()); // i <= max(int64_t) < minimum.GetUint64()
-            }
-            else if (!CheckDoubleMinimum(context, static_cast<double>(i)))
-                return false;
-        }
-
-        if (!maximum_.IsNull()) {
-            if (maximum_.IsInt64()) {
-                if (exclusiveMaximum_ ? i >= maximum_.GetInt64() : i > maximum_.GetInt64()) {
-                    context.error_handler.AboveMaximum(i, maximum_, exclusiveMaximum_);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString());
-                }
-            }
-            else if (maximum_.IsUint64()) { }
-                /* do nothing */ // i <= max(int64_t) < maximum_.GetUint64()
-            else if (!CheckDoubleMaximum(context, static_cast<double>(i)))
-                return false;
-        }
-
-        if (!multipleOf_.IsNull()) {
-            if (multipleOf_.IsUint64()) {
-                if (static_cast<uint64_t>(i >= 0 ? i : -i) % multipleOf_.GetUint64() != 0) {
-                    context.error_handler.NotMultipleOf(i, multipleOf_);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString());
-                }
-            }
-            else if (!CheckDoubleMultipleOf(context, static_cast<double>(i)))
-                return false;
-        }
-
-        return true;
-    }
-
-    bool CheckUint(Context& context, uint64_t i) const {
-        if (!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType)))) {
-            DisallowedType(context, GetIntegerString());
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
-        }
-
-        if (!minimum_.IsNull()) {
-            if (minimum_.IsUint64()) {
-                if (exclusiveMinimum_ ? i <= minimum_.GetUint64() : i < minimum_.GetUint64()) {
-                    context.error_handler.BelowMinimum(i, minimum_, exclusiveMinimum_);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString());
-                }
-            }
-            else if (minimum_.IsInt64())
-                /* do nothing */; // i >= 0 > minimum.Getint64()
-            else if (!CheckDoubleMinimum(context, static_cast<double>(i)))
-                return false;
-        }
-
-        if (!maximum_.IsNull()) {
-            if (maximum_.IsUint64()) {
-                if (exclusiveMaximum_ ? i >= maximum_.GetUint64() : i > maximum_.GetUint64()) {
-                    context.error_handler.AboveMaximum(i, maximum_, exclusiveMaximum_);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString());
-                }
-            }
-            else if (maximum_.IsInt64()) {
-                context.error_handler.AboveMaximum(i, maximum_, exclusiveMaximum_);
-                RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString()); // i >= 0 > maximum_
-            }
-            else if (!CheckDoubleMaximum(context, static_cast<double>(i)))
-                return false;
-        }
-
-        if (!multipleOf_.IsNull()) {
-            if (multipleOf_.IsUint64()) {
-                if (i % multipleOf_.GetUint64() != 0) {
-                    context.error_handler.NotMultipleOf(i, multipleOf_);
-                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString());
-                }
-            }
-            else if (!CheckDoubleMultipleOf(context, static_cast<double>(i)))
-                return false;
-        }
-
-        return true;
-    }
-
-    bool CheckDoubleMinimum(Context& context, double d) const {
-        if (exclusiveMinimum_ ? d <= minimum_.GetDouble() : d < minimum_.GetDouble()) {
-            context.error_handler.BelowMinimum(d, minimum_, exclusiveMinimum_);
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString());
-        }
-        return true;
-    }
-
-    bool CheckDoubleMaximum(Context& context, double d) const {
-        if (exclusiveMaximum_ ? d >= maximum_.GetDouble() : d > maximum_.GetDouble()) {
-            context.error_handler.AboveMaximum(d, maximum_, exclusiveMaximum_);
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString());
-        }
-        return true;
-    }
-
-    bool CheckDoubleMultipleOf(Context& context, double d) const {
-        double a = std::abs(d), b = std::abs(multipleOf_.GetDouble());
-        double q = std::floor(a / b);
-        double r = a - q * b;
-        if (r > 0.0) {
-            context.error_handler.NotMultipleOf(d, multipleOf_);
-            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString());
-        }
-        return true;
-    }
-
-    void DisallowedType(Context& context, const ValueType& actualType) const {
-        ErrorHandler& eh = context.error_handler;
-        eh.StartDisallowedType();
-
-        if (type_ & (1 << kNullSchemaType)) eh.AddExpectedType(GetNullString());
-        if (type_ & (1 << kBooleanSchemaType)) eh.AddExpectedType(GetBooleanString());
-        if (type_ & (1 << kObjectSchemaType)) eh.AddExpectedType(GetObjectString());
-        if (type_ & (1 << kArraySchemaType)) eh.AddExpectedType(GetArrayString());
-        if (type_ & (1 << kStringSchemaType)) eh.AddExpectedType(GetStringString());
-
-        if (type_ & (1 << kNumberSchemaType)) eh.AddExpectedType(GetNumberString());
-        else if (type_ & (1 << kIntegerSchemaType)) eh.AddExpectedType(GetIntegerString());
-
-        eh.EndDisallowedType(actualType);
-    }
-
-    struct Property {
-        Property() : schema(), dependenciesSchema(), dependenciesValidatorIndex(), dependencies(), required(false) {}
-        ~Property() { AllocatorType::Free(dependencies); }
-        SValue name;
-        const SchemaType* schema;
-        const SchemaType* dependenciesSchema;
-        SizeType dependenciesValidatorIndex;
-        bool* dependencies;
-        bool required;
-    };
-
-    struct PatternProperty {
-        PatternProperty() : schema(), pattern() {}
-        ~PatternProperty() { 
-            if (pattern) {
-                pattern->~RegexType();
-                AllocatorType::Free(pattern);
-            }
-        }
-        const SchemaType* schema;
-        RegexType* pattern;
-    };
-
-    AllocatorType* allocator_;
-    SValue uri_;
-    PointerType pointer_;
-    const SchemaType* typeless_;
-    uint64_t* enum_;
-    SizeType enumCount_;
-    SchemaArray allOf_;
-    SchemaArray anyOf_;
-    SchemaArray oneOf_;
-    const SchemaType* not_;
-    unsigned type_; // bitmask of kSchemaType
-    SizeType validatorCount_;
-    SizeType notValidatorIndex_;
-
-    Property* properties_;
-    const SchemaType* additionalPropertiesSchema_;
-    PatternProperty* patternProperties_;
-    SizeType patternPropertyCount_;
-    SizeType propertyCount_;
-    SizeType minProperties_;
-    SizeType maxProperties_;
-    bool additionalProperties_;
-    bool hasDependencies_;
-    bool hasRequired_;
-    bool hasSchemaDependencies_;
-
-    const SchemaType* additionalItemsSchema_;
-    const SchemaType* itemsList_;
-    const SchemaType** itemsTuple_;
-    SizeType itemsTupleCount_;
-    SizeType minItems_;
-    SizeType maxItems_;
-    bool additionalItems_;
-    bool uniqueItems_;
-
-    RegexType* pattern_;
-    SizeType minLength_;
-    SizeType maxLength_;
-
-    SValue minimum_;
-    SValue maximum_;
-    SValue multipleOf_;
-    bool exclusiveMinimum_;
-    bool exclusiveMaximum_;
-    
-    SizeType defaultValueLength_;
-};
-
-template<typename Stack, typename Ch>
-struct TokenHelper {
-    RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index) {
-        *documentStack.template Push<Ch>() = '/';
-        char buffer[21];
-        size_t length = static_cast<size_t>((sizeof(SizeType) == 4 ? u32toa(index, buffer) : u64toa(index, buffer)) - buffer);
-        for (size_t i = 0; i < length; i++)
-            *documentStack.template Push<Ch>() = static_cast<Ch>(buffer[i]);
-    }
-};
-
-// Partial specialized version for char to prevent buffer copying.
-template <typename Stack>
-struct TokenHelper<Stack, char> {
-    RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index) {
-        if (sizeof(SizeType) == 4) {
-            char *buffer = documentStack.template Push<char>(1 + 10); // '/' + uint
-            *buffer++ = '/';
-            const char* end = internal::u32toa(index, buffer);
-             documentStack.template Pop<char>(static_cast<size_t>(10 - (end - buffer)));
-        }
-        else {
-            char *buffer = documentStack.template Push<char>(1 + 20); // '/' + uint64
-            *buffer++ = '/';
-            const char* end = internal::u64toa(index, buffer);
-            documentStack.template Pop<char>(static_cast<size_t>(20 - (end - buffer)));
-        }
-    }
-};
-
-} // namespace internal
-
-///////////////////////////////////////////////////////////////////////////////
-// IGenericRemoteSchemaDocumentProvider
-
-template <typename SchemaDocumentType>
-class IGenericRemoteSchemaDocumentProvider {
-public:
-    typedef typename SchemaDocumentType::Ch Ch;
-
-    virtual ~IGenericRemoteSchemaDocumentProvider() {}
-    virtual const SchemaDocumentType* GetRemoteDocument(const Ch* uri, SizeType length) = 0;
-};
-
-///////////////////////////////////////////////////////////////////////////////
-// GenericSchemaDocument
-
-//! JSON schema document.
-/*!
-    A JSON schema document is a compiled version of a JSON schema.
-    It is basically a tree of internal::Schema.
-
-    \note This is an immutable class (i.e. its instance cannot be modified after construction).
-    \tparam ValueT Type of JSON value (e.g. \c Value ), which also determine the encoding.
-    \tparam Allocator Allocator type for allocating memory of this document.
-*/
-template <typename ValueT, typename Allocator = CrtAllocator>
-class GenericSchemaDocument {
-public:
-    typedef ValueT ValueType;
-    typedef IGenericRemoteSchemaDocumentProvider<GenericSchemaDocument> IRemoteSchemaDocumentProviderType;
-    typedef Allocator AllocatorType;
-    typedef typename ValueType::EncodingType EncodingType;
-    typedef typename EncodingType::Ch Ch;
-    typedef internal::Schema<GenericSchemaDocument> SchemaType;
-    typedef GenericPointer<ValueType, Allocator> PointerType;
-    typedef GenericValue<EncodingType, Allocator> URIType;
-    friend class internal::Schema<GenericSchemaDocument>;
-    template <typename, typename, typename>
-    friend class GenericSchemaValidator;
-
-    //! Constructor.
-    /*!
-        Compile a JSON document into schema document.
-
-        \param document A JSON document as source.
-        \param uri The base URI of this schema document for purposes of violation reporting.
-        \param uriLength Length of \c name, in code points.
-        \param remoteProvider An optional remote schema document provider for resolving remote reference. Can be null.
-        \param allocator An optional allocator instance for allocating memory. Can be null.
-    */
-    explicit GenericSchemaDocument(const ValueType& document, const Ch* uri = 0, SizeType uriLength = 0,
-        IRemoteSchemaDocumentProviderType* remoteProvider = 0, Allocator* allocator = 0) :
-        remoteProvider_(remoteProvider),
-        allocator_(allocator),
-        ownAllocator_(),
-        root_(),
-        typeless_(),
-        schemaMap_(allocator, kInitialSchemaMapSize),
-        schemaRef_(allocator, kInitialSchemaRefSize)
-    {
-        if (!allocator_)
-            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
-
-        Ch noUri[1] = {0};
-        uri_.SetString(uri ? uri : noUri, uriLength, *allocator_);
-
-        typeless_ = static_cast<SchemaType*>(allocator_->Malloc(sizeof(SchemaType)));
-        new (typeless_) SchemaType(this, PointerType(), ValueType(kObjectType).Move(), ValueType(kObjectType).Move(), allocator_);
-
-        // Generate root schema, it will call CreateSchema() to create sub-schemas,
-        // And call AddRefSchema() if there are $ref.
-        CreateSchemaRecursive(&root_, PointerType(), document, document);
-
-        // Resolve $ref
-        while (!schemaRef_.Empty()) {
-            SchemaRefEntry* refEntry = schemaRef_.template Pop<SchemaRefEntry>(1);
-            if (const SchemaType* s = GetSchema(refEntry->target)) {
-                if (refEntry->schema)
-                    *refEntry->schema = s;
-
-                // Create entry in map if not exist
-                if (!GetSchema(refEntry->source)) {
-                    new (schemaMap_.template Push<SchemaEntry>()) SchemaEntry(refEntry->source, const_cast<SchemaType*>(s), false, allocator_);
-                }
-            }
-            else if (refEntry->schema)
-                *refEntry->schema = typeless_;
-
-            refEntry->~SchemaRefEntry();
-        }
-
-        RAPIDJSON_ASSERT(root_ != 0);
-
-        schemaRef_.ShrinkToFit(); // Deallocate all memory for ref
-    }
-
-#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
-    //! Move constructor in C++11
-    GenericSchemaDocument(GenericSchemaDocument&& rhs) RAPIDJSON_NOEXCEPT :
-        remoteProvider_(rhs.remoteProvider_),
-        allocator_(rhs.allocator_),
-        ownAllocator_(rhs.ownAllocator_),
-        root_(rhs.root_),
-        typeless_(rhs.typeless_),
-        schemaMap_(std::move(rhs.schemaMap_)),
-        schemaRef_(std::move(rhs.schemaRef_)),
-        uri_(std::move(rhs.uri_))
-    {
-        rhs.remoteProvider_ = 0;
-        rhs.allocator_ = 0;
-        rhs.ownAllocator_ = 0;
-        rhs.typeless_ = 0;
-    }
-#endif
-
-    //! Destructor
-    ~GenericSchemaDocument() {
-        while (!schemaMap_.Empty())
-            schemaMap_.template Pop<SchemaEntry>(1)->~SchemaEntry();
-
-        if (typeless_) {
-            typeless_->~SchemaType();
-            Allocator::Free(typeless_);
-        }
-
-        RAPIDJSON_DELETE(ownAllocator_);
-    }
-
-    const URIType& GetURI() const { return uri_; }
-
-    //! Get the root schema.
-    const SchemaType& GetRoot() const { return *root_; }
-
-private:
-    //! Prohibit copying
-    GenericSchemaDocument(const GenericSchemaDocument&);
-    //! Prohibit assignment
-    GenericSchemaDocument& operator=(const GenericSchemaDocument&);
-
-    struct SchemaRefEntry {
-        SchemaRefEntry(const PointerType& s, const PointerType& t, const SchemaType** outSchema, Allocator *allocator) : source(s, allocator), target(t, allocator), schema(outSchema) {}
-        PointerType source;
-        PointerType target;
-        const SchemaType** schema;
-    };
-
-    struct SchemaEntry {
-        SchemaEntry(const PointerType& p, SchemaType* s, bool o, Allocator* allocator) : pointer(p, allocator), schema(s), owned(o) {}
-        ~SchemaEntry() {
-            if (owned) {
-                schema->~SchemaType();
-                Allocator::Free(schema);
-            }
-        }
-        PointerType pointer;
-        SchemaType* schema;
-        bool owned;
-    };
-
-    void CreateSchemaRecursive(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) {
-        if (schema)
-            *schema = typeless_;
-
-        if (v.GetType() == kObjectType) {
-            const SchemaType* s = GetSchema(pointer);
-            if (!s)
-                CreateSchema(schema, pointer, v, document);
-
-            for (typename ValueType::ConstMemberIterator itr = v.MemberBegin(); itr != v.MemberEnd(); ++itr)
-                CreateSchemaRecursive(0, pointer.Append(itr->name, allocator_), itr->value, document);
-        }
-        else if (v.GetType() == kArrayType)
-            for (SizeType i = 0; i < v.Size(); i++)
-                CreateSchemaRecursive(0, pointer.Append(i, allocator_), v[i], document);
-    }
-
-    void CreateSchema(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) {
-        RAPIDJSON_ASSERT(pointer.IsValid());
-        if (v.IsObject()) {
-            if (!HandleRefSchema(pointer, schema, v, document)) {
-                SchemaType* s = new (allocator_->Malloc(sizeof(SchemaType))) SchemaType(this, pointer, v, document, allocator_);
-                new (schemaMap_.template Push<SchemaEntry>()) SchemaEntry(pointer, s, true, allocator_);
-                if (schema)
-                    *schema = s;
-            }
-        }
-    }
-
-    bool HandleRefSchema(const PointerType& source, const SchemaType** schema, const ValueType& v, const ValueType& document) {
-        static const Ch kRefString[] = { '$', 'r', 'e', 'f', '\0' };
-        static const ValueType kRefValue(kRefString, 4);
-
-        typename ValueType::ConstMemberIterator itr = v.FindMember(kRefValue);
-        if (itr == v.MemberEnd())
-            return false;
-
-        if (itr->value.IsString()) {
-            SizeType len = itr->value.GetStringLength();
-            if (len > 0) {
-                const Ch* s = itr->value.GetString();
-                SizeType i = 0;
-                while (i < len && s[i] != '#') // Find the first #
-                    i++;
-
-                if (i > 0) { // Remote reference, resolve immediately
-                    if (remoteProvider_) {
-                        if (const GenericSchemaDocument* remoteDocument = remoteProvider_->GetRemoteDocument(s, i)) {
-                            PointerType pointer(&s[i], len - i, allocator_);
-                            if (pointer.IsValid()) {
-                                if (const SchemaType* sc = remoteDocument->GetSchema(pointer)) {
-                                    if (schema)
-                                        *schema = sc;
-                                    new (schemaMap_.template Push<SchemaEntry>()) SchemaEntry(source, const_cast<SchemaType*>(sc), false, allocator_);
-                                    return true;
-                                }
-                            }
-                        }
-                    }
-                }
-                else if (s[i] == '#') { // Local reference, defer resolution
-                    PointerType pointer(&s[i], len - i, allocator_);
-                    if (pointer.IsValid()) {
-                        if (const ValueType* nv = pointer.Get(document))
-                            if (HandleRefSchema(source, schema, *nv, document))
-                                return true;
-
-                        new (schemaRef_.template Push<SchemaRefEntry>()) SchemaRefEntry(source, pointer, schema, allocator_);
-                        return true;
-                    }
-                }
-            }
-        }
-        return false;
-    }
-
-    const SchemaType* GetSchema(const PointerType& pointer) const {
-        for (const SchemaEntry* target = schemaMap_.template Bottom<SchemaEntry>(); target != schemaMap_.template End<SchemaEntry>(); ++target)
-            if (pointer == target->pointer)
-                return target->schema;
-        return 0;
-    }
-
-    PointerType GetPointer(const SchemaType* schema) const {
-        for (const SchemaEntry* target = schemaMap_.template Bottom<SchemaEntry>(); target != schemaMap_.template End<SchemaEntry>(); ++target)
-            if (schema == target->schema)
-                return target->pointer;
-        return PointerType();
-    }
-
-    const SchemaType* GetTypeless() const { return typeless_; }
-
-    static const size_t kInitialSchemaMapSize = 64;
-    static const size_t kInitialSchemaRefSize = 64;
-
-    IRemoteSchemaDocumentProviderType* remoteProvider_;
-    Allocator *allocator_;
-    Allocator *ownAllocator_;
-    const SchemaType* root_;                //!< Root schema.
-    SchemaType* typeless_;
-    internal::Stack<Allocator> schemaMap_;  // Stores created Pointer -> Schemas
-    internal::Stack<Allocator> schemaRef_;  // Stores Pointer from $ref and schema which holds the $ref
-    URIType uri_;
-};
-
-//! GenericSchemaDocument using Value type.
-typedef GenericSchemaDocument<Value> SchemaDocument;
-//! IGenericRemoteSchemaDocumentProvider using SchemaDocument.
-typedef IGenericRemoteSchemaDocumentProvider<SchemaDocument> IRemoteSchemaDocumentProvider;
-
-///////////////////////////////////////////////////////////////////////////////
-// GenericSchemaValidator
-
-//! JSON Schema Validator.
-/*!
-    A SAX style JSON schema validator.
-    It uses a \c GenericSchemaDocument to validate SAX events.
-    It delegates the incoming SAX events to an output handler.
-    The default output handler does nothing.
-    It can be reused multiple times by calling \c Reset().
-
-    \tparam SchemaDocumentType Type of schema document.
-    \tparam OutputHandler Type of output handler. Default handler does nothing.
-    \tparam StateAllocator Allocator for storing the internal validation states.
-*/
-template <
-    typename SchemaDocumentType,
-    typename OutputHandler = BaseReaderHandler<typename SchemaDocumentType::SchemaType::EncodingType>,
-    typename StateAllocator = CrtAllocator>
-class GenericSchemaValidator :
-    public internal::ISchemaStateFactory<typename SchemaDocumentType::SchemaType>, 
-    public internal::ISchemaValidator,
-    public internal::IValidationErrorHandler<typename SchemaDocumentType::SchemaType>
-{
-public:
-    typedef typename SchemaDocumentType::SchemaType SchemaType;
-    typedef typename SchemaDocumentType::PointerType PointerType;
-    typedef typename SchemaType::EncodingType EncodingType;
-    typedef typename SchemaType::SValue SValue;
-    typedef typename EncodingType::Ch Ch;
-    typedef GenericStringRef<Ch> StringRefType;
-    typedef GenericValue<EncodingType, StateAllocator> ValueType;
-
-    //! Constructor without output handler.
-    /*!
-        \param schemaDocument The schema document to conform to.
-        \param allocator Optional allocator for storing internal validation states.
-        \param schemaStackCapacity Optional initial capacity of schema path stack.
-        \param documentStackCapacity Optional initial capacity of document path stack.
-    */
-    GenericSchemaValidator(
-        const SchemaDocumentType& schemaDocument,
-        StateAllocator* allocator = 0, 
-        size_t schemaStackCapacity = kDefaultSchemaStackCapacity,
-        size_t documentStackCapacity = kDefaultDocumentStackCapacity)
-        :
-        schemaDocument_(&schemaDocument),
-        root_(schemaDocument.GetRoot()),
-        stateAllocator_(allocator),
-        ownStateAllocator_(0),
-        schemaStack_(allocator, schemaStackCapacity),
-        documentStack_(allocator, documentStackCapacity),
-        outputHandler_(0),
-        error_(kObjectType),
-        currentError_(),
-        missingDependents_(),
-        valid_(true)
-#if RAPIDJSON_SCHEMA_VERBOSE
-        , depth_(0)
-#endif
-    {
-    }
-
-    //! Constructor with output handler.
-    /*!
-        \param schemaDocument The schema document to conform to.
-        \param allocator Optional allocator for storing internal validation states.
-        \param schemaStackCapacity Optional initial capacity of schema path stack.
-        \param documentStackCapacity Optional initial capacity of document path stack.
-    */
-    GenericSchemaValidator(
-        const SchemaDocumentType& schemaDocument,
-        OutputHandler& outputHandler,
-        StateAllocator* allocator = 0, 
-        size_t schemaStackCapacity = kDefaultSchemaStackCapacity,
-        size_t documentStackCapacity = kDefaultDocumentStackCapacity)
-        :
-        schemaDocument_(&schemaDocument),
-        root_(schemaDocument.GetRoot()),
-        stateAllocator_(allocator),
-        ownStateAllocator_(0),
-        schemaStack_(allocator, schemaStackCapacity),
-        documentStack_(allocator, documentStackCapacity),
-        outputHandler_(&outputHandler),
-        error_(kObjectType),
-        currentError_(),
-        missingDependents_(),
-        valid_(true)
-#if RAPIDJSON_SCHEMA_VERBOSE
-        , depth_(0)
-#endif
-    {
-    }
-
-    //! Destructor.
-    ~GenericSchemaValidator() {
-        Reset();
-        RAPIDJSON_DELETE(ownStateAllocator_);
-    }
-
-    //! Reset the internal states.
-    void Reset() {
-        while (!schemaStack_.Empty())
-            PopSchema();
-        documentStack_.Clear();
-        error_.SetObject();
-        currentError_.SetNull();
-        missingDependents_.SetNull();
-        valid_ = true;
-    }
-
-    //! Checks whether the current state is valid.
-    // Implementation of ISchemaValidator
-    virtual bool IsValid() const { return valid_; }
-
-    //! Gets the error object.
-    ValueType& GetError() { return error_; }
-    const ValueType& GetError() const { return error_; }
-
-    //! Gets the JSON pointer pointed to the invalid schema.
-    PointerType GetInvalidSchemaPointer() const {
-        return schemaStack_.Empty() ? PointerType() : CurrentSchema().GetPointer();
-    }
-
-    //! Gets the keyword of invalid schema.
-    const Ch* GetInvalidSchemaKeyword() const {
-        return schemaStack_.Empty() ? 0 : CurrentContext().invalidKeyword;
-    }
-
-    //! Gets the JSON pointer pointed to the invalid value.
-    PointerType GetInvalidDocumentPointer() const {
-        if (documentStack_.Empty()) {
-            return PointerType();
-        }
-        else {
-            return PointerType(documentStack_.template Bottom<Ch>(), documentStack_.GetSize() / sizeof(Ch));
-        }
-    }
-
-    void NotMultipleOf(int64_t actual, const SValue& expected) {
-        AddNumberError(SchemaType::GetMultipleOfString(), ValueType(actual).Move(), expected);
-    }
-    void NotMultipleOf(uint64_t actual, const SValue& expected) {
-        AddNumberError(SchemaType::GetMultipleOfString(), ValueType(actual).Move(), expected);
-    }
-    void NotMultipleOf(double actual, const SValue& expected) {
-        AddNumberError(SchemaType::GetMultipleOfString(), ValueType(actual).Move(), expected);
-    }
-    void AboveMaximum(int64_t actual, const SValue& expected, bool exclusive) {
-        AddNumberError(SchemaType::GetMaximumString(), ValueType(actual).Move(), expected,
-            exclusive ? &SchemaType::GetExclusiveMaximumString : 0);
-    }
-    void AboveMaximum(uint64_t actual, const SValue& expected, bool exclusive) {
-        AddNumberError(SchemaType::GetMaximumString(), ValueType(actual).Move(), expected,
-            exclusive ? &SchemaType::GetExclusiveMaximumString : 0);
-    }
-    void AboveMaximum(double actual, const SValue& expected, bool exclusive) {
-        AddNumberError(SchemaType::GetMaximumString(), ValueType(actual).Move(), expected,
-            exclusive ? &SchemaType::GetExclusiveMaximumString : 0);
-    }
-    void BelowMinimum(int64_t actual, const SValue& expected, bool exclusive) {
-        AddNumberError(SchemaType::GetMinimumString(), ValueType(actual).Move(), expected,
-            exclusive ? &SchemaType::GetExclusiveMinimumString : 0);
-    }
-    void BelowMinimum(uint64_t actual, const SValue& expected, bool exclusive) {
-        AddNumberError(SchemaType::GetMinimumString(), ValueType(actual).Move(), expected,
-            exclusive ? &SchemaType::GetExclusiveMinimumString : 0);
-    }
-    void BelowMinimum(double actual, const SValue& expected, bool exclusive) {
-        AddNumberError(SchemaType::GetMinimumString(), ValueType(actual).Move(), expected,
-            exclusive ? &SchemaType::GetExclusiveMinimumString : 0);
-    }
-
-    void TooLong(const Ch* str, SizeType length, SizeType expected) {
-        AddNumberError(SchemaType::GetMaxLengthString(),
-            ValueType(str, length, GetStateAllocator()).Move(), SValue(expected).Move());
-    }
-    void TooShort(const Ch* str, SizeType length, SizeType expected) {
-        AddNumberError(SchemaType::GetMinLengthString(),
-            ValueType(str, length, GetStateAllocator()).Move(), SValue(expected).Move());
-    }
-    void DoesNotMatch(const Ch* str, SizeType length) {
-        currentError_.SetObject();
-        currentError_.AddMember(GetActualString(), ValueType(str, length, GetStateAllocator()).Move(), GetStateAllocator());
-        AddCurrentError(SchemaType::GetPatternString());
-    }
-
-    void DisallowedItem(SizeType index) {
-        currentError_.SetObject();
-        currentError_.AddMember(GetDisallowedString(), ValueType(index).Move(), GetStateAllocator());
-        AddCurrentError(SchemaType::GetAdditionalItemsString(), true);
-    }
-    void TooFewItems(SizeType actualCount, SizeType expectedCount) {
-        AddNumberError(SchemaType::GetMinItemsString(),
-            ValueType(actualCount).Move(), SValue(expectedCount).Move());
-    }
-    void TooManyItems(SizeType actualCount, SizeType expectedCount) {
-        AddNumberError(SchemaType::GetMaxItemsString(),
-            ValueType(actualCount).Move(), SValue(expectedCount).Move());
-    }
-    void DuplicateItems(SizeType index1, SizeType index2) {
-        ValueType duplicates(kArrayType);
-        duplicates.PushBack(index1, GetStateAllocator());
-        duplicates.PushBack(index2, GetStateAllocator());
-        currentError_.SetObject();
-        currentError_.AddMember(GetDuplicatesString(), duplicates, GetStateAllocator());
-        AddCurrentError(SchemaType::GetUniqueItemsString(), true);
-    }
-
-    void TooManyProperties(SizeType actualCount, SizeType expectedCount) {
-        AddNumberError(SchemaType::GetMaxPropertiesString(),
-            ValueType(actualCount).Move(), SValue(expectedCount).Move());
-    }
-    void TooFewProperties(SizeType actualCount, SizeType expectedCount) {
-        AddNumberError(SchemaType::GetMinPropertiesString(),
-            ValueType(actualCount).Move(), SValue(expectedCount).Move());
-    }
-    void StartMissingProperties() {
-        currentError_.SetArray();
-    }
-    void AddMissingProperty(const SValue& name) {
-        currentError_.PushBack(ValueType(name, GetStateAllocator()).Move(), GetStateAllocator());
-    }
-    bool EndMissingProperties() {
-        if (currentError_.Empty())
-            return false;
-        ValueType error(kObjectType);
-        error.AddMember(GetMissingString(), currentError_, GetStateAllocator());
-        currentError_ = error;
-        AddCurrentError(SchemaType::GetRequiredString());
-        return true;
-    }
-    void PropertyViolations(ISchemaValidator** subvalidators, SizeType count) {
-        for (SizeType i = 0; i < count; ++i)
-            MergeError(static_cast<GenericSchemaValidator*>(subvalidators[i])->GetError());
-    }
-    void DisallowedProperty(const Ch* name, SizeType length) {
-        currentError_.SetObject();
-        currentError_.AddMember(GetDisallowedString(), ValueType(name, length, GetStateAllocator()).Move(), GetStateAllocator());
-        AddCurrentError(SchemaType::GetAdditionalPropertiesString(), true);
-    }
-
-    void StartDependencyErrors() {
-        currentError_.SetObject();
-    }
-    void StartMissingDependentProperties() {
-        missingDependents_.SetArray();
-    }
-    void AddMissingDependentProperty(const SValue& targetName) {
-        missingDependents_.PushBack(ValueType(targetName, GetStateAllocator()).Move(), GetStateAllocator());
-    }
-    void EndMissingDependentProperties(const SValue& sourceName) {
-        if (!missingDependents_.Empty())
-            currentError_.AddMember(ValueType(sourceName, GetStateAllocator()).Move(),
-                missingDependents_, GetStateAllocator());
-    }
-    void AddDependencySchemaError(const SValue& sourceName, ISchemaValidator* subvalidator) {
-        currentError_.AddMember(ValueType(sourceName, GetStateAllocator()).Move(),
-            static_cast<GenericSchemaValidator*>(subvalidator)->GetError(), GetStateAllocator());
-    }
-    bool EndDependencyErrors() {
-        if (currentError_.ObjectEmpty())
-            return false;
-        ValueType error(kObjectType);
-        error.AddMember(GetErrorsString(), currentError_, GetStateAllocator());
-        currentError_ = error;
-        AddCurrentError(SchemaType::GetDependenciesString());
-        return true;
-    }
-
-    void DisallowedValue() {
-        currentError_.SetObject();
-        AddCurrentError(SchemaType::GetEnumString());
-    }
-    void StartDisallowedType() {
-        currentError_.SetArray();
-    }
-    void AddExpectedType(const typename SchemaType::ValueType& expectedType) {
-        currentError_.PushBack(ValueType(expectedType, GetStateAllocator()).Move(), GetStateAllocator());
-    }
-    void EndDisallowedType(const typename SchemaType::ValueType& actualType) {
-        ValueType error(kObjectType);
-        error.AddMember(GetExpectedString(), currentError_, GetStateAllocator());
-        error.AddMember(GetActualString(), ValueType(actualType, GetStateAllocator()).Move(), GetStateAllocator());
-        currentError_ = error;
-        AddCurrentError(SchemaType::GetTypeString());
-    }
-    void NotAllOf(ISchemaValidator** subvalidators, SizeType count) {
-        for (SizeType i = 0; i < count; ++i) {
-            MergeError(static_cast<GenericSchemaValidator*>(subvalidators[i])->GetError());
-        }
-    }
-    void NoneOf(ISchemaValidator** subvalidators, SizeType count) {
-        AddErrorArray(SchemaType::GetAnyOfString(), subvalidators, count);
-    }
-    void NotOneOf(ISchemaValidator** subvalidators, SizeType count) {
-        AddErrorArray(SchemaType::GetOneOfString(), subvalidators, count);
-    }
-    void Disallowed() {
-        currentError_.SetObject();
-        AddCurrentError(SchemaType::GetNotString());
-    }
-
-#define RAPIDJSON_STRING_(name, ...) \
-    static const StringRefType& Get##name##String() {\
-        static const Ch s[] = { __VA_ARGS__, '\0' };\
-        static const StringRefType v(s, static_cast<SizeType>(sizeof(s) / sizeof(Ch) - 1)); \
-        return v;\
-    }
-
-    RAPIDJSON_STRING_(InstanceRef, 'i', 'n', 's', 't', 'a', 'n', 'c', 'e', 'R', 'e', 'f')
-    RAPIDJSON_STRING_(SchemaRef, 's', 'c', 'h', 'e', 'm', 'a', 'R', 'e', 'f')
-    RAPIDJSON_STRING_(Expected, 'e', 'x', 'p', 'e', 'c', 't', 'e', 'd')
-    RAPIDJSON_STRING_(Actual, 'a', 'c', 't', 'u', 'a', 'l')
-    RAPIDJSON_STRING_(Disallowed, 'd', 'i', 's', 'a', 'l', 'l', 'o', 'w', 'e', 'd')
-    RAPIDJSON_STRING_(Missing, 'm', 'i', 's', 's', 'i', 'n', 'g')
-    RAPIDJSON_STRING_(Errors, 'e', 'r', 'r', 'o', 'r', 's')
-    RAPIDJSON_STRING_(Duplicates, 'd', 'u', 'p', 'l', 'i', 'c', 'a', 't', 'e', 's')
-
-#undef RAPIDJSON_STRING_
-
-#if RAPIDJSON_SCHEMA_VERBOSE
-#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_() \
-RAPIDJSON_MULTILINEMACRO_BEGIN\
-    *documentStack_.template Push<Ch>() = '\0';\
-    documentStack_.template Pop<Ch>(1);\
-    internal::PrintInvalidDocument(documentStack_.template Bottom<Ch>());\
-RAPIDJSON_MULTILINEMACRO_END
-#else
-#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_()
-#endif
-
-#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_(method, arg1)\
-    if (!valid_) return false; \
-    if (!BeginValue() || !CurrentSchema().method arg1) {\
-        RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_();\
-        return valid_ = false;\
-    }
-
-#define RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2)\
-    for (Context* context = schemaStack_.template Bottom<Context>(); context != schemaStack_.template End<Context>(); context++) {\
-        if (context->hasher)\
-            static_cast<HasherType*>(context->hasher)->method arg2;\
-        if (context->validators)\
-            for (SizeType i_ = 0; i_ < context->validatorCount; i_++)\
-                static_cast<GenericSchemaValidator*>(context->validators[i_])->method arg2;\
-        if (context->patternPropertiesValidators)\
-            for (SizeType i_ = 0; i_ < context->patternPropertiesValidatorCount; i_++)\
-                static_cast<GenericSchemaValidator*>(context->patternPropertiesValidators[i_])->method arg2;\
-    }
-
-#define RAPIDJSON_SCHEMA_HANDLE_END_(method, arg2)\
-    return valid_ = EndValue() && (!outputHandler_ || outputHandler_->method arg2)
-
-#define RAPIDJSON_SCHEMA_HANDLE_VALUE_(method, arg1, arg2) \
-    RAPIDJSON_SCHEMA_HANDLE_BEGIN_   (method, arg1);\
-    RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2);\
-    RAPIDJSON_SCHEMA_HANDLE_END_     (method, arg2)
-
-    bool Null()             { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Null,   (CurrentContext()), ( )); }
-    bool Bool(bool b)       { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Bool,   (CurrentContext(), b), (b)); }
-    bool Int(int i)         { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int,    (CurrentContext(), i), (i)); }
-    bool Uint(unsigned u)   { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint,   (CurrentContext(), u), (u)); }
-    bool Int64(int64_t i)   { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int64,  (CurrentContext(), i), (i)); }
-    bool Uint64(uint64_t u) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint64, (CurrentContext(), u), (u)); }
-    bool Double(double d)   { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Double, (CurrentContext(), d), (d)); }
-    bool RawNumber(const Ch* str, SizeType length, bool copy)
-                                    { RAPIDJSON_SCHEMA_HANDLE_VALUE_(String, (CurrentContext(), str, length, copy), (str, length, copy)); }
-    bool String(const Ch* str, SizeType length, bool copy)
-                                    { RAPIDJSON_SCHEMA_HANDLE_VALUE_(String, (CurrentContext(), str, length, copy), (str, length, copy)); }
-
-    bool StartObject() {
-        RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartObject, (CurrentContext()));
-        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartObject, ());
-        return valid_ = !outputHandler_ || outputHandler_->StartObject();
-    }
-    
-    bool Key(const Ch* str, SizeType len, bool copy) {
-        if (!valid_) return false;
-        AppendToken(str, len);
-        if (!CurrentSchema().Key(CurrentContext(), str, len, copy)) return valid_ = false;
-        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(Key, (str, len, copy));
-        return valid_ = !outputHandler_ || outputHandler_->Key(str, len, copy);
-    }
-    
-    bool EndObject(SizeType memberCount) { 
-        if (!valid_) return false;
-        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(EndObject, (memberCount));
-        if (!CurrentSchema().EndObject(CurrentContext(), memberCount)) return valid_ = false;
-        RAPIDJSON_SCHEMA_HANDLE_END_(EndObject, (memberCount));
-    }
-
-    bool StartArray() {
-        RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartArray, (CurrentContext()));
-        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartArray, ());
-        return valid_ = !outputHandler_ || outputHandler_->StartArray();
-    }
-    
-    bool EndArray(SizeType elementCount) {
-        if (!valid_) return false;
-        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(EndArray, (elementCount));
-        if (!CurrentSchema().EndArray(CurrentContext(), elementCount)) return valid_ = false;
-        RAPIDJSON_SCHEMA_HANDLE_END_(EndArray, (elementCount));
-    }
-
-#undef RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_
-#undef RAPIDJSON_SCHEMA_HANDLE_BEGIN_
-#undef RAPIDJSON_SCHEMA_HANDLE_PARALLEL_
-#undef RAPIDJSON_SCHEMA_HANDLE_VALUE_
-
-    // Implementation of ISchemaStateFactory<SchemaType>
-    virtual ISchemaValidator* CreateSchemaValidator(const SchemaType& root) {
-        return new (GetStateAllocator().Malloc(sizeof(GenericSchemaValidator))) GenericSchemaValidator(*schemaDocument_, root, documentStack_.template Bottom<char>(), documentStack_.GetSize(),
-#if RAPIDJSON_SCHEMA_VERBOSE
-        depth_ + 1,
-#endif
-        &GetStateAllocator());
-    }
-
-    virtual void DestroySchemaValidator(ISchemaValidator* validator) {
-        GenericSchemaValidator* v = static_cast<GenericSchemaValidator*>(validator);
-        v->~GenericSchemaValidator();
-        StateAllocator::Free(v);
-    }
-
-    virtual void* CreateHasher() {
-        return new (GetStateAllocator().Malloc(sizeof(HasherType))) HasherType(&GetStateAllocator());
-    }
-
-    virtual uint64_t GetHashCode(void* hasher) {
-        return static_cast<HasherType*>(hasher)->GetHashCode();
-    }
-
-    virtual void DestroryHasher(void* hasher) {
-        HasherType* h = static_cast<HasherType*>(hasher);
-        h->~HasherType();
-        StateAllocator::Free(h);
-    }
-
-    virtual void* MallocState(size_t size) {
-        return GetStateAllocator().Malloc(size);
-    }
-
-    virtual void FreeState(void* p) {
-        StateAllocator::Free(p);
-    }
-
-private:
-    typedef typename SchemaType::Context Context;
-    typedef GenericValue<UTF8<>, StateAllocator> HashCodeArray;
-    typedef internal::Hasher<EncodingType, StateAllocator> HasherType;
-
-    GenericSchemaValidator( 
-        const SchemaDocumentType& schemaDocument,
-        const SchemaType& root,
-        const char* basePath, size_t basePathSize,
-#if RAPIDJSON_SCHEMA_VERBOSE
-        unsigned depth,
-#endif
-        StateAllocator* allocator = 0,
-        size_t schemaStackCapacity = kDefaultSchemaStackCapacity,
-        size_t documentStackCapacity = kDefaultDocumentStackCapacity)
-        :
-        schemaDocument_(&schemaDocument),
-        root_(root),
-        stateAllocator_(allocator),
-        ownStateAllocator_(0),
-        schemaStack_(allocator, schemaStackCapacity),
-        documentStack_(allocator, documentStackCapacity),
-        outputHandler_(0),
-        error_(kObjectType),
-        currentError_(),
-        missingDependents_(),
-        valid_(true)
-#if RAPIDJSON_SCHEMA_VERBOSE
-        , depth_(depth)
-#endif
-    {
-        if (basePath && basePathSize)
-            memcpy(documentStack_.template Push<char>(basePathSize), basePath, basePathSize);
-    }
-
-    StateAllocator& GetStateAllocator() {
-        if (!stateAllocator_)
-            stateAllocator_ = ownStateAllocator_ = RAPIDJSON_NEW(StateAllocator)();
-        return *stateAllocator_;
-    }
-
-    bool BeginValue() {
-        if (schemaStack_.Empty())
-            PushSchema(root_);
-        else {
-            if (CurrentContext().inArray)
-                internal::TokenHelper<internal::Stack<StateAllocator>, Ch>::AppendIndexToken(documentStack_, CurrentContext().arrayElementIndex);
-
-            if (!CurrentSchema().BeginValue(CurrentContext()))
-                return false;
-
-            SizeType count = CurrentContext().patternPropertiesSchemaCount;
-            const SchemaType** sa = CurrentContext().patternPropertiesSchemas;
-            typename Context::PatternValidatorType patternValidatorType = CurrentContext().valuePatternValidatorType;
-            bool valueUniqueness = CurrentContext().valueUniqueness;
-            RAPIDJSON_ASSERT(CurrentContext().valueSchema);
-            PushSchema(*CurrentContext().valueSchema);
-
-            if (count > 0) {
-                CurrentContext().objectPatternValidatorType = patternValidatorType;
-                ISchemaValidator**& va = CurrentContext().patternPropertiesValidators;
-                SizeType& validatorCount = CurrentContext().patternPropertiesValidatorCount;
-                va = static_cast<ISchemaValidator**>(MallocState(sizeof(ISchemaValidator*) * count));
-                for (SizeType i = 0; i < count; i++)
-                    va[validatorCount++] = CreateSchemaValidator(*sa[i]);
-            }
-
-            CurrentContext().arrayUniqueness = valueUniqueness;
-        }
-        return true;
-    }
-
-    bool EndValue() {
-        if (!CurrentSchema().EndValue(CurrentContext()))
-            return false;
-
-#if RAPIDJSON_SCHEMA_VERBOSE
-        GenericStringBuffer<EncodingType> sb;
-        schemaDocument_->GetPointer(&CurrentSchema()).Stringify(sb);
-
-        *documentStack_.template Push<Ch>() = '\0';
-        documentStack_.template Pop<Ch>(1);
-        internal::PrintValidatorPointers(depth_, sb.GetString(), documentStack_.template Bottom<Ch>());
-#endif
-
-        uint64_t h = CurrentContext().arrayUniqueness ? static_cast<HasherType*>(CurrentContext().hasher)->GetHashCode() : 0;
-        
-        PopSchema();
-
-        if (!schemaStack_.Empty()) {
-            Context& context = CurrentContext();
-            if (context.valueUniqueness) {
-                HashCodeArray* a = static_cast<HashCodeArray*>(context.arrayElementHashCodes);
-                if (!a)
-                    CurrentContext().arrayElementHashCodes = a = new (GetStateAllocator().Malloc(sizeof(HashCodeArray))) HashCodeArray(kArrayType);
-                for (typename HashCodeArray::ConstValueIterator itr = a->Begin(); itr != a->End(); ++itr)
-                    if (itr->GetUint64() == h) {
-                        DuplicateItems(static_cast<SizeType>(itr - a->Begin()), a->Size());
-                        RAPIDJSON_INVALID_KEYWORD_RETURN(SchemaType::GetUniqueItemsString());
-                    }
-                a->PushBack(h, GetStateAllocator());
-            }
-        }
-
-        // Remove the last token of document pointer
-        while (!documentStack_.Empty() && *documentStack_.template Pop<Ch>(1) != '/')
-            ;
-
-        return true;
-    }
-
-    void AppendToken(const Ch* str, SizeType len) {
-        documentStack_.template Reserve<Ch>(1 + len * 2); // worst case all characters are escaped as two characters
-        *documentStack_.template PushUnsafe<Ch>() = '/';
-        for (SizeType i = 0; i < len; i++) {
-            if (str[i] == '~') {
-                *documentStack_.template PushUnsafe<Ch>() = '~';
-                *documentStack_.template PushUnsafe<Ch>() = '0';
-            }
-            else if (str[i] == '/') {
-                *documentStack_.template PushUnsafe<Ch>() = '~';
-                *documentStack_.template PushUnsafe<Ch>() = '1';
-            }
-            else
-                *documentStack_.template PushUnsafe<Ch>() = str[i];
-        }
-    }
-
-    RAPIDJSON_FORCEINLINE void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push<Context>()) Context(*this, *this, &schema); }
-    
-    RAPIDJSON_FORCEINLINE void PopSchema() {
-        Context* c = schemaStack_.template Pop<Context>(1);
-        if (HashCodeArray* a = static_cast<HashCodeArray*>(c->arrayElementHashCodes)) {
-            a->~HashCodeArray();
-            StateAllocator::Free(a);
-        }
-        c->~Context();
-    }
-
-    void AddErrorLocation(ValueType& result, bool parent) {
-        GenericStringBuffer<EncodingType> sb;
-        PointerType instancePointer = GetInvalidDocumentPointer();
-        ((parent && instancePointer.GetTokenCount() > 0)
-            ? PointerType(instancePointer.GetTokens(), instancePointer.GetTokenCount() - 1)
-            : instancePointer).StringifyUriFragment(sb);
-        ValueType instanceRef(sb.GetString(), static_cast<SizeType>(sb.GetSize() / sizeof(Ch)),
-            GetStateAllocator());
-        result.AddMember(GetInstanceRefString(), instanceRef, GetStateAllocator());
-        sb.Clear();
-        memcpy(sb.Push(CurrentSchema().GetURI().GetStringLength()),
-            CurrentSchema().GetURI().GetString(),
-            CurrentSchema().GetURI().GetStringLength() * sizeof(Ch));
-        GetInvalidSchemaPointer().StringifyUriFragment(sb);
-        ValueType schemaRef(sb.GetString(), static_cast<SizeType>(sb.GetSize() / sizeof(Ch)),
-            GetStateAllocator());
-        result.AddMember(GetSchemaRefString(), schemaRef, GetStateAllocator());
-    }
-
-    void AddError(ValueType& keyword, ValueType& error) {
-        typename ValueType::MemberIterator member = error_.FindMember(keyword);
-        if (member == error_.MemberEnd())
-            error_.AddMember(keyword, error, GetStateAllocator());
-        else {
-            if (member->value.IsObject()) {
-                ValueType errors(kArrayType);
-                errors.PushBack(member->value, GetStateAllocator());
-                member->value = errors;
-            }
-            member->value.PushBack(error, GetStateAllocator());
-        }
-    }
-
-    void AddCurrentError(const typename SchemaType::ValueType& keyword, bool parent = false) {
-        AddErrorLocation(currentError_, parent);
-        AddError(ValueType(keyword, GetStateAllocator(), false).Move(), currentError_);
-    }
-
-    void MergeError(ValueType& other) {
-        for (typename ValueType::MemberIterator it = other.MemberBegin(), end = other.MemberEnd(); it != end; ++it) {
-            AddError(it->name, it->value);
-        }
-    }
-
-    void AddNumberError(const typename SchemaType::ValueType& keyword, ValueType& actual, const SValue& expected,
-        const typename SchemaType::ValueType& (*exclusive)() = 0) {
-        currentError_.SetObject();
-        currentError_.AddMember(GetActualString(), actual, GetStateAllocator());
-        currentError_.AddMember(GetExpectedString(), ValueType(expected, GetStateAllocator()).Move(), GetStateAllocator());
-        if (exclusive)
-            currentError_.AddMember(ValueType(exclusive(), GetStateAllocator()).Move(), true, GetStateAllocator());
-        AddCurrentError(keyword);
-    }
-
-    void AddErrorArray(const typename SchemaType::ValueType& keyword,
-        ISchemaValidator** subvalidators, SizeType count) {
-        ValueType errors(kArrayType);
-        for (SizeType i = 0; i < count; ++i)
-            errors.PushBack(static_cast<GenericSchemaValidator*>(subvalidators[i])->GetError(), GetStateAllocator());
-        currentError_.SetObject();
-        currentError_.AddMember(GetErrorsString(), errors, GetStateAllocator());
-        AddCurrentError(keyword);
-    }
-
-    const SchemaType& CurrentSchema() const { return *schemaStack_.template Top<Context>()->schema; }
-    Context& CurrentContext() { return *schemaStack_.template Top<Context>(); }
-    const Context& CurrentContext() const { return *schemaStack_.template Top<Context>(); }
-
-    static const size_t kDefaultSchemaStackCapacity = 1024;
-    static const size_t kDefaultDocumentStackCapacity = 256;
-    const SchemaDocumentType* schemaDocument_;
-    const SchemaType& root_;
-    StateAllocator* stateAllocator_;
-    StateAllocator* ownStateAllocator_;
-    internal::Stack<StateAllocator> schemaStack_;    //!< stack to store the current path of schema (BaseSchemaType *)
-    internal::Stack<StateAllocator> documentStack_;  //!< stack to store the current path of validating document (Ch)
-    OutputHandler* outputHandler_;
-    ValueType error_;
-    ValueType currentError_;
-    ValueType missingDependents_;
-    bool valid_;
-#if RAPIDJSON_SCHEMA_VERBOSE
-    unsigned depth_;
-#endif
-};
-
-typedef GenericSchemaValidator<SchemaDocument> SchemaValidator;
-
-///////////////////////////////////////////////////////////////////////////////
-// SchemaValidatingReader
-
-//! A helper class for parsing with validation.
-/*!
-    This helper class is a functor, designed as a parameter of \ref GenericDocument::Populate().
-
-    \tparam parseFlags Combination of \ref ParseFlag.
-    \tparam InputStream Type of input stream, implementing Stream concept.
-    \tparam SourceEncoding Encoding of the input stream.
-    \tparam SchemaDocumentType Type of schema document.
-    \tparam StackAllocator Allocator type for stack.
-*/
-template <
-    unsigned parseFlags,
-    typename InputStream,
-    typename SourceEncoding,
-    typename SchemaDocumentType = SchemaDocument,
-    typename StackAllocator = CrtAllocator>
-class SchemaValidatingReader {
-public:
-    typedef typename SchemaDocumentType::PointerType PointerType;
-    typedef typename InputStream::Ch Ch;
-    typedef GenericValue<SourceEncoding, StackAllocator> ValueType;
-
-    //! Constructor
-    /*!
-        \param is Input stream.
-        \param sd Schema document.
-    */
-    SchemaValidatingReader(InputStream& is, const SchemaDocumentType& sd) : is_(is), sd_(sd), invalidSchemaKeyword_(), error_(kObjectType), isValid_(true) {}
-
-    template <typename Handler>
-    bool operator()(Handler& handler) {
-        GenericReader<SourceEncoding, typename SchemaDocumentType::EncodingType, StackAllocator> reader;
-        GenericSchemaValidator<SchemaDocumentType, Handler> validator(sd_, handler);
-        parseResult_ = reader.template Parse<parseFlags>(is_, validator);
-
-        isValid_ = validator.IsValid();
-        if (isValid_) {
-            invalidSchemaPointer_ = PointerType();
-            invalidSchemaKeyword_ = 0;
-            invalidDocumentPointer_ = PointerType();
-            error_.SetObject();
-        }
-        else {
-            invalidSchemaPointer_ = validator.GetInvalidSchemaPointer();
-            invalidSchemaKeyword_ = validator.GetInvalidSchemaKeyword();
-            invalidDocumentPointer_ = validator.GetInvalidDocumentPointer();
-            error_.CopyFrom(validator.GetError(), allocator_);
-        }
-
-        return parseResult_;
-    }
-
-    const ParseResult& GetParseResult() const { return parseResult_; }
-    bool IsValid() const { return isValid_; }
-    const PointerType& GetInvalidSchemaPointer() const { return invalidSchemaPointer_; }
-    const Ch* GetInvalidSchemaKeyword() const { return invalidSchemaKeyword_; }
-    const PointerType& GetInvalidDocumentPointer() const { return invalidDocumentPointer_; }
-    const ValueType& GetError() const { return error_; }
-
-private:
-    InputStream& is_;
-    const SchemaDocumentType& sd_;
-
-    ParseResult parseResult_;
-    PointerType invalidSchemaPointer_;
-    const Ch* invalidSchemaKeyword_;
-    PointerType invalidDocumentPointer_;
-    StackAllocator allocator_;
-    ValueType error_;
-    bool isValid_;
-};
-
-RAPIDJSON_NAMESPACE_END
-RAPIDJSON_DIAG_POP
-
-#endif // RAPIDJSON_SCHEMA_H_
diff --git a/src/native/external/rapidjson/stream.h b/src/native/external/rapidjson/stream.h
index 7f2643e48142..1fd70915c547 100644
--- a/src/native/external/rapidjson/stream.h
+++ b/src/native/external/rapidjson/stream.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 //
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
diff --git a/src/native/external/rapidjson/stringbuffer.h b/src/native/external/rapidjson/stringbuffer.h
index 4e38b82c3d98..82ad3ca6bbfe 100644
--- a/src/native/external/rapidjson/stringbuffer.h
+++ b/src/native/external/rapidjson/stringbuffer.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
diff --git a/src/native/external/rapidjson/writer.h b/src/native/external/rapidjson/writer.h
index 6f5b6903467a..632e02ce74a5 100644
--- a/src/native/external/rapidjson/writer.h
+++ b/src/native/external/rapidjson/writer.h
@@ -1,6 +1,6 @@
 // Tencent is pleased to support the open source community by making RapidJSON available.
 // 
-// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
 //
 // Licensed under the MIT License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
@@ -16,6 +16,7 @@
 #define RAPIDJSON_WRITER_H_
 
 #include "stream.h"
+#include "internal/clzll.h"
 #include "internal/meta.h"
 #include "internal/stack.h"
 #include "internal/strfunc.h"
@@ -66,6 +67,7 @@ enum WriteFlag {
     kWriteNoFlags = 0,              //!< No flags are set.
     kWriteValidateEncodingFlag = 1, //!< Validate encoding of JSON strings.
     kWriteNanAndInfFlag = 2,        //!< Allow writing of Infinity, -Infinity and NaN.
+    kWriteNanAndInfNullFlag = 4,    //!< Allow writing of Infinity, -Infinity and NaN as null.
     kWriteDefaultFlags = RAPIDJSON_WRITE_DEFAULT_FLAGS  //!< Default write flags. Can be customized by defining RAPIDJSON_WRITE_DEFAULT_FLAGS
 };
 
@@ -226,7 +228,7 @@ class Writer {
       return Key(str.data(), SizeType(str.size()));
     }
 #endif
-	
+
     bool EndObject(SizeType memberCount = 0) {
         (void)memberCount;
         RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); // not inside an Object
@@ -282,6 +284,8 @@ class Writer {
         os_->Flush();
     }
 
+    static const size_t kDefaultLevelDepth = 32;
+
 protected:
     //! Information for each nested level
     struct Level {
@@ -290,8 +294,6 @@ class Writer {
         bool inArray;       //!< true if in array, otherwise in object
     };
 
-    static const size_t kDefaultLevelDepth = 32;
-
     bool WriteNull()  {
         PutReserve(*os_, 4);
         PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l'); return true;
@@ -347,8 +349,13 @@ class Writer {
 
     bool WriteDouble(double d) {
         if (internal::Double(d).IsNanOrInf()) {
-            if (!(writeFlags & kWriteNanAndInfFlag))
+            if (!(writeFlags & kWriteNanAndInfFlag) && !(writeFlags & kWriteNanAndInfNullFlag))
                 return false;
+            if (writeFlags & kWriteNanAndInfNullFlag) {
+                PutReserve(*os_, 4);
+                PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l');
+                return true;
+            }
             if (internal::Double(d).IsNan()) {
                 PutReserve(*os_, 3);
                 PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N');
@@ -547,6 +554,11 @@ inline bool Writer<StringBuffer>::WriteDouble(double d) {
         // Note: This code path can only be reached if (RAPIDJSON_WRITE_DEFAULT_FLAGS & kWriteNanAndInfFlag).
         if (!(kWriteDefaultFlags & kWriteNanAndInfFlag))
             return false;
+        if (kWriteDefaultFlags & kWriteNanAndInfNullFlag) {
+            PutReserve(*os_, 4);
+            PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l');
+            return true;
+        }
         if (internal::Double(d).IsNan()) {
             PutReserve(*os_, 3);
             PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N');
@@ -668,19 +680,19 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
         x = vorrq_u8(x, vcltq_u8(s, s3));
 
         x = vrev64q_u8(x);                     // Rev in 64
-        uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
-        uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+        uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
+        uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
 
         SizeType len = 0;
         bool escaped = false;
         if (low == 0) {
             if (high != 0) {
-                unsigned lz = (unsigned)__builtin_clzll(high);
+                uint32_t lz = internal::clzll(high);
                 len = 8 + (lz >> 3);
                 escaped = true;
             }
         } else {
-            unsigned lz = (unsigned)__builtin_clzll(low);
+            uint32_t lz = internal::clzll(low);
             len = lz >> 3;
             escaped = true;
         }
diff --git a/src/native/external/zlib-version.txt b/src/native/external/zlib-version.txt
index fcac66cc4645..5da9869df33f 100644
--- a/src/native/external/zlib-version.txt
+++ b/src/native/external/zlib-version.txt
@@ -1,17 +1,9 @@
-v1.2.13
-(04f42ceca40f73e2978b50e93806c2a18c1281fc)
+v1.3.1
+(51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf)
 
-https://github.com/madler/zlib/releases/tag/v1.2.13
+https://github.com/madler/zlib/releases/tag/v1.3.1
 
 We have removed zlib.3.pdf from our local copy, as it is a binary file which is
 not needed for our compilation.
 
-We have also cherry-picked into our local copy:
-
-- https://github.com/madler/zlib/commit/e554695638228b846d49657f31eeff0ca4680e8a
-
-  This patch only affects memLevel 9 compression. .NET doesn't currently use this
-  memLevel, but we'll take this patch out of an abundance of caution just in case
-  we enable this functionality in a future release.
-
 We have also applied the custom patches under the patches/zlib folder.
diff --git a/src/native/external/zlib/CMakeLists.txt b/src/native/external/zlib/CMakeLists.txt
index 36885aee7145..133c29b7e4fc 100644
--- a/src/native/external/zlib/CMakeLists.txt
+++ b/src/native/external/zlib/CMakeLists.txt
@@ -1,9 +1,11 @@
-cmake_minimum_required(VERSION 2.4.4)
+cmake_minimum_required(VERSION 2.4.4...3.15.0)
 set(CMAKE_ALLOW_LOOSE_LOOP_CONSTRUCTS ON)
 
 project(zlib C)
 
-set(VERSION "1.2.13")
+set(VERSION "1.3.1")
+
+option(ZLIB_BUILD_EXAMPLES "Enable Zlib Examples" ON)
 
 set(INSTALL_BIN_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables")
 set(INSTALL_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries")
@@ -148,7 +150,9 @@ if(MINGW)
 endif(MINGW)
 
 add_library(zlib SHARED ${ZLIB_SRCS} ${ZLIB_DLL_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
+target_include_directories(zlib PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
 add_library(zlibstatic STATIC ${ZLIB_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
+target_include_directories(zlibstatic PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
 set_target_properties(zlib PROPERTIES DEFINE_SYMBOL ZLIB_DLL)
 set_target_properties(zlib PROPERTIES SOVERSION 1)
 
@@ -166,7 +170,7 @@ endif()
 if(UNIX)
     # On unix-like platforms the library is almost always called libz
    set_target_properties(zlib zlibstatic PROPERTIES OUTPUT_NAME z)
-   if(NOT APPLE)
+   if(NOT APPLE AND NOT(CMAKE_SYSTEM_NAME STREQUAL AIX))
      set_target_properties(zlib PROPERTIES LINK_FLAGS "-Wl,--version-script,\"${CMAKE_CURRENT_SOURCE_DIR}/zlib.map\"")
    endif()
 elseif(BUILD_SHARED_LIBS AND WIN32)
@@ -193,23 +197,24 @@ endif()
 #============================================================================
 # Example binaries
 #============================================================================
-
-add_executable(example test/example.c)
-target_link_libraries(example zlib)
-add_test(example example)
-
-add_executable(minigzip test/minigzip.c)
-target_link_libraries(minigzip zlib)
-
-if(HAVE_OFF64_T)
-    add_executable(example64 test/example.c)
-    target_link_libraries(example64 zlib)
-    set_target_properties(example64 PROPERTIES COMPILE_FLAGS "-D_FILE_OFFSET_BITS=64")
-    add_test(example64 example64)
-
-    add_executable(minigzip64 test/minigzip.c)
-    target_link_libraries(minigzip64 zlib)
-    set_target_properties(minigzip64 PROPERTIES COMPILE_FLAGS "-D_FILE_OFFSET_BITS=64")
+if(ZLIB_BUILD_EXAMPLES)
+    add_executable(example test/example.c)
+    target_link_libraries(example zlib)
+    add_test(example example)
+
+    add_executable(minigzip test/minigzip.c)
+    target_link_libraries(minigzip zlib)
+
+    if(HAVE_OFF64_T)
+        add_executable(example64 test/example.c)
+        target_link_libraries(example64 zlib)
+        set_target_properties(example64 PROPERTIES COMPILE_FLAGS "-D_FILE_OFFSET_BITS=64")
+        add_test(example64 example64)
+
+        add_executable(minigzip64 test/minigzip.c)
+        target_link_libraries(minigzip64 zlib)
+        set_target_properties(minigzip64 PROPERTIES COMPILE_FLAGS "-D_FILE_OFFSET_BITS=64")
+    endif()
 endif()
 
 add_compile_options(-Wno-deprecated-non-prototype)
diff --git a/src/native/external/zlib/ChangeLog b/src/native/external/zlib/ChangeLog
index 457526bc6a51..b801a1031ec0 100644
--- a/src/native/external/zlib/ChangeLog
+++ b/src/native/external/zlib/ChangeLog
@@ -1,6 +1,34 @@
 
                 ChangeLog file for zlib
 
+Changes in 1.3.1 (22 Jan 2024)
+- Reject overflows of zip header fields in minizip
+- Fix bug in inflateSync() for data held in bit buffer
+- Add LIT_MEM define to use more memory for a small deflate speedup
+- Fix decision on the emission of Zip64 end records in minizip
+- Add bounds checking to ERR_MSG() macro, used by zError()
+- Neutralize zip file traversal attacks in miniunz
+- Fix a bug in ZLIB_DEBUG compiles in check_match()
+- Various portability and appearance improvements
+
+Changes in 1.3 (18 Aug 2023)
+- Remove K&R function definitions and zlib2ansi
+- Fix bug in deflateBound() for level 0 and memLevel 9
+- Fix bug when gzungetc() is used immediately after gzopen()
+- Fix bug when using gzflush() with a very small buffer
+- Fix crash when gzsetparams() attempted for transparent write
+- Fix test/example.c to work with FORCE_STORED
+- Rewrite of zran in examples (see zran.c version history)
+- Fix minizip to allow it to open an empty zip file
+- Fix reading disk number start on zip64 files in minizip
+- Fix logic error in minizip argument processing
+- Add minizip testing to Makefile
+- Read multiple bytes instead of byte-by-byte in minizip unzip.c
+- Add memory sanitizer to configure (--memory)
+- Various portability improvements
+- Various documentation improvements
+- Various spelling and typo corrections
+
 Changes in 1.2.13 (13 Oct 2022)
 - Fix configure issue that discarded provided CC definition
 - Correct incorrect inputs provided to the CRC functions
@@ -1445,7 +1473,7 @@ Changes in 0.99 (27 Jan 96)
 - fix typo in Make_vms.com (f$trnlnm -> f$getsyi)
 - in fcalloc, normalize pointer if size > 65520 bytes
 - don't use special fcalloc for 32 bit Borland C++
-- use STDC instead of __GO32__ to avoid redeclaring exit, calloc, etc...
+- use STDC instead of __GO32__ to avoid redeclaring exit, calloc, etc.
 - use Z_BINARY instead of BINARY
 - document that gzclose after gzdopen will close the file
 - allow "a" as mode in gzopen
diff --git a/src/native/external/zlib/FAQ b/src/native/external/zlib/FAQ
index 99b7cf92e454..92f5d3e29fab 100644
--- a/src/native/external/zlib/FAQ
+++ b/src/native/external/zlib/FAQ
@@ -4,7 +4,7 @@
 
 If your question is not there, please check the zlib home page
 http://zlib.net/ which may have more recent information.
-The lastest zlib FAQ is at http://zlib.net/zlib_faq.html
+The latest zlib FAQ is at http://zlib.net/zlib_faq.html
 
 
  1. Is zlib Y2K-compliant?
@@ -14,8 +14,7 @@ The lastest zlib FAQ is at http://zlib.net/zlib_faq.html
  2. Where can I get a Windows DLL version?
 
     The zlib sources can be compiled without change to produce a DLL.  See the
-    file win32/DLL_FAQ.txt in the zlib distribution.  Pointers to the
-    precompiled DLL are found in the zlib web site at http://zlib.net/ .
+    file win32/DLL_FAQ.txt in the zlib distribution.
 
  3. Where can I get a Visual Basic interface to zlib?
 
diff --git a/src/native/external/zlib/Makefile.in b/src/native/external/zlib/Makefile.in
index 7d2713f4c574..cb8b00a9b078 100644
--- a/src/native/external/zlib/Makefile.in
+++ b/src/native/external/zlib/Makefile.in
@@ -1,5 +1,5 @@
 # Makefile for zlib
-# Copyright (C) 1995-2017 Jean-loup Gailly, Mark Adler
+# Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler
 # For conditions of distribution and use, see copyright notice in zlib.h
 
 # To compile and test, type:
@@ -22,13 +22,13 @@ CFLAGS=-O
 
 SFLAGS=-O
 LDFLAGS=
-TEST_LDFLAGS=$(LDFLAGS) -L. libz.a
+TEST_LIBS=-L. libz.a
 LDSHARED=$(CC)
 CPP=$(CC) -E
 
 STATICLIB=libz.a
 SHAREDLIB=libz.so
-SHAREDLIBV=libz.so.1.2.13
+SHAREDLIBV=libz.so.1.3.1
 SHAREDLIBM=libz.so.1
 LIBS=$(STATICLIB) $(SHAREDLIBV)
 
@@ -282,10 +282,10 @@ placebo $(SHAREDLIBV): $(PIC_OBJS) libz.a
 	-@rmdir objs
 
 example$(EXE): example.o $(STATICLIB)
-	$(CC) $(CFLAGS) -o $@ example.o $(TEST_LDFLAGS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ example.o $(TEST_LIBS)
 
 minigzip$(EXE): minigzip.o $(STATICLIB)
-	$(CC) $(CFLAGS) -o $@ minigzip.o $(TEST_LDFLAGS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ minigzip.o $(TEST_LIBS)
 
 examplesh$(EXE): example.o $(SHAREDLIBV)
 	$(CC) $(CFLAGS) -o $@ example.o $(LDFLAGS) -L. $(SHAREDLIBV)
@@ -294,10 +294,10 @@ minigzipsh$(EXE): minigzip.o $(SHAREDLIBV)
 	$(CC) $(CFLAGS) -o $@ minigzip.o $(LDFLAGS) -L. $(SHAREDLIBV)
 
 example64$(EXE): example64.o $(STATICLIB)
-	$(CC) $(CFLAGS) -o $@ example64.o $(TEST_LDFLAGS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ example64.o $(TEST_LIBS)
 
 minigzip64$(EXE): minigzip64.o $(STATICLIB)
-	$(CC) $(CFLAGS) -o $@ minigzip64.o $(TEST_LDFLAGS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ minigzip64.o $(TEST_LIBS)
 
 install-libs: $(LIBS)
 	-@if [ ! -d $(DESTDIR)$(exec_prefix)  ]; then mkdir -p $(DESTDIR)$(exec_prefix); fi
@@ -359,8 +359,14 @@ zconf.h.cmakein: $(SRCDIR)zconf.h.in
 zconf: $(SRCDIR)zconf.h.in
 	cp -p $(SRCDIR)zconf.h.in zconf.h
 
+minizip-test: static
+	cd contrib/minizip && { CC="$(CC)" CFLAGS="$(CFLAGS)" $(MAKE) test ; cd ../.. ; }
+
+minizip-clean:
+	cd contrib/minizip && { $(MAKE) clean ; cd ../.. ; }
+
 mostlyclean: clean
-clean:
+clean: minizip-clean
 	rm -f *.o *.lo *~ \
 	   example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
 	   example64$(EXE) minigzip64$(EXE) \
diff --git a/src/native/external/zlib/README b/src/native/external/zlib/README
index ba34d1894a9b..c5f917540b6f 100644
--- a/src/native/external/zlib/README
+++ b/src/native/external/zlib/README
@@ -1,6 +1,6 @@
 ZLIB DATA COMPRESSION LIBRARY
 
-zlib 1.2.13 is a general purpose data compression library.  All the code is
+zlib 1.3.1 is a general purpose data compression library.  All the code is
 thread safe.  The data format used by the zlib library is described by RFCs
 (Request for Comments) 1950 to 1952 in the files
 http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and
@@ -29,18 +29,17 @@ PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help.
 
 Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan.  1997
 issue of Dr.  Dobb's Journal; a copy of the article is available at
-http://marknelson.us/1997/01/01/zlib-engine/ .
+https://marknelson.us/posts/1997/01/01/zlib-engine.html .
 
-The changes made in version 1.2.13 are documented in the file ChangeLog.
+The changes made in version 1.3.1 are documented in the file ChangeLog.
 
 Unsupported third party contributions are provided in directory contrib/ .
 
-zlib is available in Java using the java.util.zip package, documented at
-http://java.sun.com/developer/technicalArticles/Programming/compression/ .
+zlib is available in Java using the java.util.zip package. Follow the API
+Documentation link at: https://docs.oracle.com/search/?q=java.util.zip .
 
-A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is available
-at CPAN (Comprehensive Perl Archive Network) sites, including
-http://search.cpan.org/~pmqs/IO-Compress-Zlib/ .
+A Perl interface to zlib and bzip2 written by Paul Marquess <pmqs@cpan.org>
+can be found at https://github.com/pmqs/IO-Compress .
 
 A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is
 available in Python 1.5 and later versions, see
@@ -64,7 +63,7 @@ Notes for some targets:
 - zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works
   when compiled with cc.
 
-- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is
+- On Digital Unix 4.0D (formerly OSF/1) on AlphaServer, the cc option -std1 is
   necessary to get gzprintf working correctly. This is done by configure.
 
 - zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with
@@ -84,7 +83,7 @@ Acknowledgments:
 
 Copyright notice:
 
- (C) 1995-2022 Jean-loup Gailly and Mark Adler
+ (C) 1995-2024 Jean-loup Gailly and Mark Adler
 
   This software is provided 'as-is', without any express or implied
   warranty.  In no event will the authors be held liable for any damages
diff --git a/src/native/external/zlib/adler32.c b/src/native/external/zlib/adler32.c
index d0be4380a39c..04b81d29bad1 100644
--- a/src/native/external/zlib/adler32.c
+++ b/src/native/external/zlib/adler32.c
@@ -7,8 +7,6 @@
 
 #include "zutil.h"
 
-local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
-
 #define BASE 65521U     /* largest prime smaller than 65536 */
 #define NMAX 5552
 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
@@ -60,11 +58,7 @@ local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
 #endif
 
 /* ========================================================================= */
-uLong ZEXPORT adler32_z(adler, buf, len)
-    uLong adler;
-    const Bytef *buf;
-    z_size_t len;
-{
+uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, z_size_t len) {
     unsigned long sum2;
     unsigned n;
 
@@ -131,20 +125,12 @@ uLong ZEXPORT adler32_z(adler, buf, len)
 }
 
 /* ========================================================================= */
-uLong ZEXPORT adler32(adler, buf, len)
-    uLong adler;
-    const Bytef *buf;
-    uInt len;
-{
+uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len) {
     return adler32_z(adler, buf, len);
 }
 
 /* ========================================================================= */
-local uLong adler32_combine_(adler1, adler2, len2)
-    uLong adler1;
-    uLong adler2;
-    z_off64_t len2;
-{
+local uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2) {
     unsigned long sum1;
     unsigned long sum2;
     unsigned rem;
@@ -169,18 +155,10 @@ local uLong adler32_combine_(adler1, adler2, len2)
 }
 
 /* ========================================================================= */
-uLong ZEXPORT adler32_combine(adler1, adler2, len2)
-    uLong adler1;
-    uLong adler2;
-    z_off_t len2;
-{
+uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, z_off_t len2) {
     return adler32_combine_(adler1, adler2, len2);
 }
 
-uLong ZEXPORT adler32_combine64(adler1, adler2, len2)
-    uLong adler1;
-    uLong adler2;
-    z_off64_t len2;
-{
+uLong ZEXPORT adler32_combine64(uLong adler1, uLong adler2, z_off64_t len2) {
     return adler32_combine_(adler1, adler2, len2);
 }
diff --git a/src/native/external/zlib/compress.c b/src/native/external/zlib/compress.c
index 2ad5326c14ec..f43bacf7ab97 100644
--- a/src/native/external/zlib/compress.c
+++ b/src/native/external/zlib/compress.c
@@ -19,13 +19,8 @@
    memory, Z_BUF_ERROR if there was not enough room in the output buffer,
    Z_STREAM_ERROR if the level parameter is invalid.
 */
-int ZEXPORT compress2(dest, destLen, source, sourceLen, level)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong sourceLen;
-    int level;
-{
+int ZEXPORT compress2(Bytef *dest, uLongf *destLen, const Bytef *source,
+                      uLong sourceLen, int level) {
     z_stream stream;
     int err;
     const uInt max = (uInt)-1;
@@ -65,12 +60,8 @@ int ZEXPORT compress2(dest, destLen, source, sourceLen, level)
 
 /* ===========================================================================
  */
-int ZEXPORT compress(dest, destLen, source, sourceLen)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong sourceLen;
-{
+int ZEXPORT compress(Bytef *dest, uLongf *destLen, const Bytef *source,
+                     uLong sourceLen) {
     return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
 }
 
@@ -78,9 +69,7 @@ int ZEXPORT compress(dest, destLen, source, sourceLen)
      If the default memLevel or windowBits for deflateInit() is changed, then
    this function needs to be updated.
  */
-uLong ZEXPORT compressBound(sourceLen)
-    uLong sourceLen;
-{
+uLong ZEXPORT compressBound(uLong sourceLen) {
     return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
            (sourceLen >> 25) + 13;
 }
diff --git a/src/native/external/zlib/configure b/src/native/external/zlib/configure
index fa4d5daaba99..c55098afc4ae 100644
--- a/src/native/external/zlib/configure
+++ b/src/native/external/zlib/configure
@@ -25,7 +25,7 @@ if test $SRCDIR = "."; then
     ZINCOUT="-I."
     SRCDIR=""
 else
-    ZINC='-include zconf.h'
+    ZINC='-I. -include zconf.h'
     ZINCOUT='-I. -I$(SRCDIR)'
     SRCDIR="$SRCDIR/"
 fi
@@ -44,9 +44,8 @@ STATICLIB=libz.a
 
 # extract zlib version numbers from zlib.h
 VER=`sed -n -e '/VERSION "/s/.*"\(.*\)".*/\1/p' < ${SRCDIR}zlib.h`
-VER3=`sed -n -e '/VERSION "/s/.*"\([0-9]*\\.[0-9]*\\.[0-9]*\).*/\1/p' < ${SRCDIR}zlib.h`
-VER2=`sed -n -e '/VERSION "/s/.*"\([0-9]*\\.[0-9]*\)\\..*/\1/p' < ${SRCDIR}zlib.h`
-VER1=`sed -n -e '/VERSION "/s/.*"\([0-9]*\)\\..*/\1/p' < ${SRCDIR}zlib.h`
+VER3=`echo ${VER}|sed -n -e 's/\([0-9]\{1,\}\(\\.[0-9]\{1,\}\)\{1,2\}\).*/\1/p'`
+VER1=`echo ${VER}|sed -n -e 's/\([0-9]\{1,\}\)\\..*/\1/p'`
 
 # establish commands for library building
 if "${CROSS_PREFIX}ar" --version >/dev/null 2>/dev/null || test $? -lt 126; then
@@ -90,7 +89,8 @@ build64=0
 gcc=0
 warn=0
 debug=0
-sanitize=0
+address=0
+memory=0
 old_cc="$CC"
 old_cflags="$CFLAGS"
 OBJC='$(OBJZ) $(OBJG)'
@@ -102,7 +102,7 @@ leave()
   if test "$*" != "0"; then
     echo "** $0 aborting." | tee -a configure.log
   fi
-  rm -f $test.[co] $test $test$shared_ext $test.gcno ./--version
+  rm -rf $test.[co] $test $test$shared_ext $test.gcno $test.dSYM ./--version
   echo -------------------- >> configure.log
   echo >> configure.log
   echo >> configure.log
@@ -141,7 +141,9 @@ case "$1" in
     -c* | --const) zconst=1; shift ;;
     -w* | --warn) warn=1; shift ;;
     -d* | --debug) debug=1; shift ;;
-    --sanitize) sanitize=1; shift ;;
+    --sanitize) address=1; shift ;;
+    --address) address=1; shift ;;
+    --memory) memory=1; shift ;;
     *)
       echo "unknown option: $1" | tee -a configure.log
       echo "$0 --help for help" | tee -a configure.log
@@ -211,8 +213,11 @@ if test "$gcc" -eq 1 && ($cc -c $test.c) >> configure.log 2>&1; then
       CFLAGS="${CFLAGS} -Wall -Wextra"
     fi
   fi
-  if test $sanitize -eq 1; then
-    CFLAGS="${CFLAGS} -g -fsanitize=address"
+  if test $address -eq 1; then
+    CFLAGS="${CFLAGS} -g -fsanitize=address -fno-omit-frame-pointer"
+  fi
+  if test $memory -eq 1; then
+    CFLAGS="${CFLAGS} -g -fsanitize=memory -fno-omit-frame-pointer"
   fi
   if test $debug -eq 1; then
     CFLAGS="${CFLAGS} -DZLIB_DEBUG"
@@ -260,7 +265,9 @@ if test "$gcc" -eq 1 && ($cc -c $test.c) >> configure.log 2>&1; then
         SHAREDLIBV=libz.$VER$shared_ext
         SHAREDLIBM=libz.$VER1$shared_ext
         LDSHARED=${LDSHARED-"$cc -dynamiclib -install_name $libdir/$SHAREDLIBM -compatibility_version $VER1 -current_version $VER3"}
-        if libtool -V 2>&1 | grep Apple > /dev/null; then
+        if "${CROSS_PREFIX}libtool" -V 2>&1 | grep Apple > /dev/null; then
+            AR="${CROSS_PREFIX}libtool"
+        elif libtool -V 2>&1 | grep Apple > /dev/null; then
             AR="libtool"
         else
             AR="/usr/bin/libtool"
@@ -435,7 +442,7 @@ EOF
 if test $shared -eq 1; then
   echo Checking for shared library support... | tee -a configure.log
   # we must test in two steps (cc then ld), required at least on SunOS 4.x
-  if try $CC -w -c $SFLAGS $test.c &&
+  if try $CC -c $SFLAGS $test.c &&
      try $LDSHARED $SFLAGS -o $test$shared_ext $test.o; then
     echo Building shared library $SHAREDLIBV with $CC. | tee -a configure.log
   elif test -z "$old_cc" -a -z "$old_cflags"; then
@@ -860,7 +867,7 @@ echo prefix = $prefix >> configure.log
 echo sharedlibdir = $sharedlibdir >> configure.log
 echo uname = $uname >> configure.log
 
-# udpate Makefile with the configure results
+# update Makefile with the configure results
 sed < ${SRCDIR}Makefile.in "
 /^CC *=/s#=.*#=$CC#
 /^CFLAGS *=/s#=.*#=$CFLAGS#
diff --git a/src/native/external/zlib/crc32.c b/src/native/external/zlib/crc32.c
index f8357b083f76..6c38f5c04c6a 100644
--- a/src/native/external/zlib/crc32.c
+++ b/src/native/external/zlib/crc32.c
@@ -103,19 +103,6 @@
 #  define ARMCRC32
 #endif
 
-/* Local functions. */
-local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
-local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
-
-#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
-    local z_word_t byte_swap OF((z_word_t word));
-#endif
-
-#if defined(W) && !defined(ARMCRC32)
-    local z_crc_t crc_word OF((z_word_t data));
-    local z_word_t crc_word_big OF((z_word_t data));
-#endif
-
 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
 /*
   Swap the bytes in a z_word_t to convert between little and big endian. Any
@@ -123,9 +110,7 @@ local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
   instruction, if one is available. This assumes that word_t is either 32 bits
   or 64 bits.
  */
-local z_word_t byte_swap(word)
-    z_word_t word;
-{
+local z_word_t byte_swap(z_word_t word) {
 #  if W == 8
     return
         (word & 0xff00000000000000) >> 56 |
@@ -146,24 +131,77 @@ local z_word_t byte_swap(word)
 }
 #endif
 
+#ifdef DYNAMIC_CRC_TABLE
+/* =========================================================================
+ * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
+ * below.
+ */
+   local z_crc_t FAR x2n_table[32];
+#else
+/* =========================================================================
+ * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
+ * of x for combining CRC-32s, all made by make_crc_table().
+ */
+#  include "crc32.h"
+#endif
+
 /* CRC polynomial. */
 #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
 
-#ifdef DYNAMIC_CRC_TABLE
+/*
+  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
+  reflected. For speed, this requires that a not be zero.
+ */
+local z_crc_t multmodp(z_crc_t a, z_crc_t b) {
+    z_crc_t m, p;
+
+    m = (z_crc_t)1 << 31;
+    p = 0;
+    for (;;) {
+        if (a & m) {
+            p ^= b;
+            if ((a & (m - 1)) == 0)
+                break;
+        }
+        m >>= 1;
+        b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
+    }
+    return p;
+}
 
+/*
+  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
+  initialized.
+ */
+local z_crc_t x2nmodp(z_off64_t n, unsigned k) {
+    z_crc_t p;
+
+    p = (z_crc_t)1 << 31;           /* x^0 == 1 */
+    while (n) {
+        if (n & 1)
+            p = multmodp(x2n_table[k & 31], p);
+        n >>= 1;
+        k++;
+    }
+    return p;
+}
+
+#ifdef DYNAMIC_CRC_TABLE
+/* =========================================================================
+ * Build the tables for byte-wise and braided CRC-32 calculations, and a table
+ * of powers of x for combining CRC-32s.
+ */
 local z_crc_t FAR crc_table[256];
-local z_crc_t FAR x2n_table[32];
-local void make_crc_table OF((void));
 #ifdef W
    local z_word_t FAR crc_big_table[256];
    local z_crc_t FAR crc_braid_table[W][256];
    local z_word_t FAR crc_braid_big_table[W][256];
-   local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
+   local void braid(z_crc_t [][256], z_word_t [][256], int, int);
 #endif
 #ifdef MAKECRCH
-   local void write_table OF((FILE *, const z_crc_t FAR *, int));
-   local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
-   local void write_table64 OF((FILE *, const z_word_t FAR *, int));
+   local void write_table(FILE *, const z_crc_t FAR *, int);
+   local void write_table32hi(FILE *, const z_word_t FAR *, int);
+   local void write_table64(FILE *, const z_word_t FAR *, int);
 #endif /* MAKECRCH */
 
 /*
@@ -176,7 +214,6 @@ local void make_crc_table OF((void));
 
 /* Definition of once functionality. */
 typedef struct once_s once_t;
-local void once OF((once_t *, void (*)(void)));
 
 /* Check for the availability of atomics. */
 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
@@ -196,10 +233,7 @@ struct once_s {
   invoke once() at the same time. The state must be a once_t initialized with
   ONCE_INIT.
  */
-local void once(state, init)
-    once_t *state;
-    void (*init)(void);
-{
+local void once(once_t *state, void (*init)(void)) {
     if (!atomic_load(&state->done)) {
         if (atomic_flag_test_and_set(&state->begun))
             while (!atomic_load(&state->done))
@@ -222,10 +256,7 @@ struct once_s {
 
 /* Test and set. Alas, not atomic, but tries to minimize the period of
    vulnerability. */
-local int test_and_set OF((int volatile *));
-local int test_and_set(flag)
-    int volatile *flag;
-{
+local int test_and_set(int volatile *flag) {
     int was;
 
     was = *flag;
@@ -234,10 +265,7 @@ local int test_and_set(flag)
 }
 
 /* Run the provided init() function once. This is not thread-safe. */
-local void once(state, init)
-    once_t *state;
-    void (*init)(void);
-{
+local void once(once_t *state, void (*init)(void)) {
     if (!state->done) {
         if (test_and_set(&state->begun))
             while (!state->done)
@@ -279,8 +307,7 @@ local once_t made = ONCE_INIT;
   combinations of CRC register values and incoming bytes.
  */
 
-local void make_crc_table()
-{
+local void make_crc_table(void) {
     unsigned i, j, n;
     z_crc_t p;
 
@@ -447,11 +474,7 @@ local void make_crc_table()
    Write the 32-bit values in table[0..k-1] to out, five per line in
    hexadecimal separated by commas.
  */
-local void write_table(out, table, k)
-    FILE *out;
-    const z_crc_t FAR *table;
-    int k;
-{
+local void write_table(FILE *out, const z_crc_t FAR *table, int k) {
     int n;
 
     for (n = 0; n < k; n++)
@@ -464,11 +487,7 @@ local void write_table(out, table, k)
    Write the high 32-bits of each value in table[0..k-1] to out, five per line
    in hexadecimal separated by commas.
  */
-local void write_table32hi(out, table, k)
-FILE *out;
-const z_word_t FAR *table;
-int k;
-{
+local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {
     int n;
 
     for (n = 0; n < k; n++)
@@ -484,11 +503,7 @@ int k;
   bits. If not, then the type cast and format string can be adjusted
   accordingly.
  */
-local void write_table64(out, table, k)
-    FILE *out;
-    const z_word_t FAR *table;
-    int k;
-{
+local void write_table64(FILE *out, const z_word_t FAR *table, int k) {
     int n;
 
     for (n = 0; n < k; n++)
@@ -498,8 +513,7 @@ local void write_table64(out, table, k)
 }
 
 /* Actually do the deed. */
-int main()
-{
+int main(void) {
     make_crc_table();
     return 0;
 }
@@ -511,12 +525,7 @@ int main()
   Generate the little and big-endian braid tables for the given n and z_word_t
   size w. Each array must have room for w blocks of 256 elements.
  */
-local void braid(ltl, big, n, w)
-    z_crc_t ltl[][256];
-    z_word_t big[][256];
-    int n;
-    int w;
-{
+local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {
     int k;
     z_crc_t i, p, q;
     for (k = 0; k < w; k++) {
@@ -531,69 +540,13 @@ local void braid(ltl, big, n, w)
 }
 #endif
 
-#else /* !DYNAMIC_CRC_TABLE */
-/* ========================================================================
- * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
- * of x for combining CRC-32s, all made by make_crc_table().
- */
-#include "crc32.h"
 #endif /* DYNAMIC_CRC_TABLE */
 
-/* ========================================================================
- * Routines used for CRC calculation. Some are also required for the table
- * generation above.
- */
-
-/*
-  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
-  reflected. For speed, this requires that a not be zero.
- */
-local z_crc_t multmodp(a, b)
-    z_crc_t a;
-    z_crc_t b;
-{
-    z_crc_t m, p;
-
-    m = (z_crc_t)1 << 31;
-    p = 0;
-    for (;;) {
-        if (a & m) {
-            p ^= b;
-            if ((a & (m - 1)) == 0)
-                break;
-        }
-        m >>= 1;
-        b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
-    }
-    return p;
-}
-
-/*
-  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
-  initialized.
- */
-local z_crc_t x2nmodp(n, k)
-    z_off64_t n;
-    unsigned k;
-{
-    z_crc_t p;
-
-    p = (z_crc_t)1 << 31;           /* x^0 == 1 */
-    while (n) {
-        if (n & 1)
-            p = multmodp(x2n_table[k & 31], p);
-        n >>= 1;
-        k++;
-    }
-    return p;
-}
-
 /* =========================================================================
  * This function can be used by asm versions of crc32(), and to force the
  * generation of the CRC tables in a threaded application.
  */
-const z_crc_t FAR * ZEXPORT get_crc_table()
-{
+const z_crc_t FAR * ZEXPORT get_crc_table(void) {
 #ifdef DYNAMIC_CRC_TABLE
     once(&made, make_crc_table);
 #endif /* DYNAMIC_CRC_TABLE */
@@ -619,11 +572,8 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
 #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
 #define Z_BATCH_MIN 800             /* fewest words in a final batch */
 
-unsigned long ZEXPORT crc32_z(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    z_size_t len;
-{
+unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
+                              z_size_t len) {
     z_crc_t val;
     z_word_t crc1, crc2;
     const z_word_t *word;
@@ -723,18 +673,14 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
   least-significant byte of the word as the first byte of data, without any pre
   or post conditioning. This is used to combine the CRCs of each braid.
  */
-local z_crc_t crc_word(data)
-    z_word_t data;
-{
+local z_crc_t crc_word(z_word_t data) {
     int k;
     for (k = 0; k < W; k++)
         data = (data >> 8) ^ crc_table[data & 0xff];
     return (z_crc_t)data;
 }
 
-local z_word_t crc_word_big(data)
-    z_word_t data;
-{
+local z_word_t crc_word_big(z_word_t data) {
     int k;
     for (k = 0; k < W; k++)
         data = (data << 8) ^
@@ -745,11 +691,8 @@ local z_word_t crc_word_big(data)
 #endif
 
 /* ========================================================================= */
-unsigned long ZEXPORT crc32_z(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    z_size_t len;
-{
+unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
+                              z_size_t len) {
     /* Return initial CRC, if requested. */
     if (buf == Z_NULL) return 0;
 
@@ -781,8 +724,8 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
         words = (z_word_t const *)buf;
 
         /* Do endian check at execution time instead of compile time, since ARM
-           processors can change the endianess at execution time. If the
-           compiler knows what the endianess will be, it can optimize out the
+           processors can change the endianness at execution time. If the
+           compiler knows what the endianness will be, it can optimize out the
            check and the unused branch. */
         endian = 1;
         if (*(unsigned char *)&endian) {
@@ -1069,20 +1012,13 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
 #endif
 
 /* ========================================================================= */
-unsigned long ZEXPORT crc32(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    uInt len;
-{
+unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf,
+                            uInt len) {
     return crc32_z(crc, buf, len);
 }
 
 /* ========================================================================= */
-uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
-    uLong crc1;
-    uLong crc2;
-    z_off64_t len2;
-{
+uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {
 #ifdef DYNAMIC_CRC_TABLE
     once(&made, make_crc_table);
 #endif /* DYNAMIC_CRC_TABLE */
@@ -1090,18 +1026,12 @@ uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
 }
 
 /* ========================================================================= */
-uLong ZEXPORT crc32_combine(crc1, crc2, len2)
-    uLong crc1;
-    uLong crc2;
-    z_off_t len2;
-{
+uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {
     return crc32_combine64(crc1, crc2, (z_off64_t)len2);
 }
 
 /* ========================================================================= */
-uLong ZEXPORT crc32_combine_gen64(len2)
-    z_off64_t len2;
-{
+uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {
 #ifdef DYNAMIC_CRC_TABLE
     once(&made, make_crc_table);
 #endif /* DYNAMIC_CRC_TABLE */
@@ -1109,17 +1039,11 @@ uLong ZEXPORT crc32_combine_gen64(len2)
 }
 
 /* ========================================================================= */
-uLong ZEXPORT crc32_combine_gen(len2)
-    z_off_t len2;
-{
+uLong ZEXPORT crc32_combine_gen(z_off_t len2) {
     return crc32_combine_gen64((z_off64_t)len2);
 }
 
 /* ========================================================================= */
-uLong ZEXPORT crc32_combine_op(crc1, crc2, op)
-    uLong crc1;
-    uLong crc2;
-    uLong op;
-{
+uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {
     return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
 }
diff --git a/src/native/external/zlib/deflate.c b/src/native/external/zlib/deflate.c
index b76366397545..ca2fc59a1b54 100644
--- a/src/native/external/zlib/deflate.c
+++ b/src/native/external/zlib/deflate.c
@@ -1,5 +1,5 @@
 /* deflate.c -- compress data using the deflation algorithm
- * Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler
+ * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -52,7 +52,7 @@
 #include "deflate.h"
 
 const char deflate_copyright[] =
-   " deflate 1.2.13 Copyright 1995-2022 Jean-loup Gailly and Mark Adler ";
+   " deflate 1.3.1 Copyright 1995-2024 Jean-loup Gailly and Mark Adler ";
 /*
   If you use the zlib library in a product, an acknowledgment is welcome
   in the documentation of your product. If for some reason you cannot
@@ -60,9 +60,6 @@ const char deflate_copyright[] =
   copyright string in the executable of your product.
  */
 
-/* ===========================================================================
- *  Function prototypes.
- */
 typedef enum {
     need_more,      /* block not completed, need more input or more output */
     block_done,     /* block flush performed */
@@ -70,29 +67,16 @@ typedef enum {
     finish_done     /* finish done, accept no more input or output */
 } block_state;
 
-typedef block_state (*compress_func) OF((deflate_state *s, int flush));
+typedef block_state (*compress_func)(deflate_state *s, int flush);
 /* Compression function. Returns the block state after the call. */
 
-local int deflateStateCheck      OF((z_streamp strm));
-local void slide_hash     OF((deflate_state *s));
-local void fill_window    OF((deflate_state *s));
-local block_state deflate_stored OF((deflate_state *s, int flush));
-local block_state deflate_fast   OF((deflate_state *s, int flush));
+local block_state deflate_stored(deflate_state *s, int flush);
+local block_state deflate_fast(deflate_state *s, int flush);
 #ifndef FASTEST
-local block_state deflate_slow   OF((deflate_state *s, int flush));
-#endif
-local block_state deflate_rle    OF((deflate_state *s, int flush));
-local block_state deflate_huff   OF((deflate_state *s, int flush));
-local void lm_init        OF((deflate_state *s));
-local void putShortMSB    OF((deflate_state *s, uInt b));
-local void flush_pending  OF((z_streamp strm));
-local unsigned read_buf   OF((z_streamp strm, Bytef *buf, unsigned size));
-local uInt longest_match  OF((deflate_state *s, IPos cur_match));
-
-#ifdef ZLIB_DEBUG
-local  void check_match OF((deflate_state *s, IPos start, IPos match,
-                            int length));
+local block_state deflate_slow(deflate_state *s, int flush);
 #endif
+local block_state deflate_rle(deflate_state *s, int flush);
+local block_state deflate_huff(deflate_state *s, int flush);
 
 /* ===========================================================================
  * Local data
@@ -195,9 +179,12 @@ local const config configuration_table[10] = {
  * bit values at the expense of memory usage). We slide even when level == 0 to
  * keep the hash table consistent if we switch back to level > 0 later.
  */
-local void slide_hash(s)
-    deflate_state *s;
-{
+#if defined(__has_feature)
+#  if __has_feature(memory_sanitizer)
+     __attribute__((no_sanitize("memory")))
+#  endif
+#endif
+local void slide_hash(deflate_state *s) {
     unsigned n, m;
     Posf *p;
     uInt wsize = s->w_size;
@@ -221,30 +208,177 @@ local void slide_hash(s)
 #endif
 }
 
+/* ===========================================================================
+ * Read a new buffer from the current input stream, update the adler32
+ * and total number of bytes read.  All deflate() input goes through
+ * this function so some applications may wish to modify it to avoid
+ * allocating a large strm->next_in buffer and copying from it.
+ * (See also flush_pending()).
+ */
+local unsigned read_buf(z_streamp strm, Bytef *buf, unsigned size) {
+    unsigned len = strm->avail_in;
+
+    if (len > size) len = size;
+    if (len == 0) return 0;
+
+    strm->avail_in  -= len;
+
+    zmemcpy(buf, strm->next_in, len);
+    if (strm->state->wrap == 1) {
+        strm->adler = adler32(strm->adler, buf, len);
+    }
+#ifdef GZIP
+    else if (strm->state->wrap == 2) {
+        strm->adler = crc32(strm->adler, buf, len);
+    }
+#endif
+    strm->next_in  += len;
+    strm->total_in += len;
+
+    return len;
+}
+
+/* ===========================================================================
+ * Fill the window when the lookahead becomes insufficient.
+ * Updates strstart and lookahead.
+ *
+ * IN assertion: lookahead < MIN_LOOKAHEAD
+ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
+ *    At least one byte has been read, or avail_in == 0; reads are
+ *    performed for at least two bytes (required for the zip translate_eol
+ *    option -- not supported here).
+ */
+local void fill_window(deflate_state *s) {
+    unsigned n;
+    unsigned more;    /* Amount of free space at the end of the window. */
+    uInt wsize = s->w_size;
+
+    Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
+
+    do {
+        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
+
+        /* Deal with !@#$% 64K limit: */
+        if (sizeof(int) <= 2) {
+            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
+                more = wsize;
+
+            } else if (more == (unsigned)(-1)) {
+                /* Very unlikely, but possible on 16 bit machine if
+                 * strstart == 0 && lookahead == 1 (input done a byte at time)
+                 */
+                more--;
+            }
+        }
+
+        /* If the window is almost full and there is insufficient lookahead,
+         * move the upper half to the lower one to make room in the upper half.
+         */
+        if (s->strstart >= wsize + MAX_DIST(s)) {
+
+            zmemcpy(s->window, s->window + wsize, (unsigned)wsize - more);
+            s->match_start -= wsize;
+            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
+            s->block_start -= (long) wsize;
+            if (s->insert > s->strstart)
+                s->insert = s->strstart;
+            slide_hash(s);
+            more += wsize;
+        }
+        if (s->strm->avail_in == 0) break;
+
+        /* If there was no sliding:
+         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
+         *    more == window_size - lookahead - strstart
+         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
+         * => more >= window_size - 2*WSIZE + 2
+         * In the BIG_MEM or MMAP case (not yet supported),
+         *   window_size == input_size + MIN_LOOKAHEAD  &&
+         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
+         * Otherwise, window_size == 2*WSIZE so more >= 2.
+         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
+         */
+        Assert(more >= 2, "more < 2");
+
+        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
+        s->lookahead += n;
+
+        /* Initialize the hash value now that we have some input: */
+        if (s->lookahead + s->insert >= MIN_MATCH) {
+            uInt str = s->strstart - s->insert;
+            s->ins_h = s->window[str];
+            UPDATE_HASH(s, s->ins_h, s->window[str + 1]);
+#if MIN_MATCH != 3
+            Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+            while (s->insert) {
+                UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
+#ifndef FASTEST
+                s->prev[str & s->w_mask] = s->head[s->ins_h];
+#endif
+                s->head[s->ins_h] = (Pos)str;
+                str++;
+                s->insert--;
+                if (s->lookahead + s->insert < MIN_MATCH)
+                    break;
+            }
+        }
+        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
+         * but this is not important since only literal bytes will be emitted.
+         */
+
+    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
+
+    /* If the WIN_INIT bytes after the end of the current data have never been
+     * written, then zero those bytes in order to avoid memory check reports of
+     * the use of uninitialized (or uninitialised as Julian writes) bytes by
+     * the longest match routines.  Update the high water mark for the next
+     * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
+     * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
+     */
+    if (s->high_water < s->window_size) {
+        ulg curr = s->strstart + (ulg)(s->lookahead);
+        ulg init;
+
+        if (s->high_water < curr) {
+            /* Previous high water mark below current data -- zero WIN_INIT
+             * bytes or up to end of window, whichever is less.
+             */
+            init = s->window_size - curr;
+            if (init > WIN_INIT)
+                init = WIN_INIT;
+            zmemzero(s->window + curr, (unsigned)init);
+            s->high_water = curr + init;
+        }
+        else if (s->high_water < (ulg)curr + WIN_INIT) {
+            /* High water mark at or above current data, but below current data
+             * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
+             * to end of window, whichever is less.
+             */
+            init = (ulg)curr + WIN_INIT - s->high_water;
+            if (init > s->window_size - s->high_water)
+                init = s->window_size - s->high_water;
+            zmemzero(s->window + s->high_water, (unsigned)init);
+            s->high_water += init;
+        }
+    }
+
+    Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
+           "not enough room for search");
+}
+
 /* ========================================================================= */
-int ZEXPORT deflateInit_(strm, level, version, stream_size)
-    z_streamp strm;
-    int level;
-    const char *version;
-    int stream_size;
-{
+int ZEXPORT deflateInit_(z_streamp strm, int level, const char *version,
+                         int stream_size) {
     return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
                          Z_DEFAULT_STRATEGY, version, stream_size);
     /* To do: ignore strm->next_in if we use it as window */
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
-                  version, stream_size)
-    z_streamp strm;
-    int  level;
-    int  method;
-    int  windowBits;
-    int  memLevel;
-    int  strategy;
-    const char *version;
-    int stream_size;
-{
+int ZEXPORT deflateInit2_(z_streamp strm, int level, int method,
+                          int windowBits, int memLevel, int strategy,
+                          const char *version, int stream_size) {
     deflate_state *s;
     int wrap = 1;
     static const char my_version[] = ZLIB_VERSION;
@@ -359,7 +493,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
      * symbols from which it is being constructed.
      */
 
-    s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4);
+    s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, LIT_BUFS);
     s->pending_buf_size = (ulg)s->lit_bufsize * 4;
 
     if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
@@ -369,8 +503,14 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
         deflateEnd (strm);
         return Z_MEM_ERROR;
     }
+#ifdef LIT_MEM
+    s->d_buf = (ushf *)(s->pending_buf + (s->lit_bufsize << 1));
+    s->l_buf = s->pending_buf + (s->lit_bufsize << 2);
+    s->sym_end = s->lit_bufsize - 1;
+#else
     s->sym_buf = s->pending_buf + s->lit_bufsize;
     s->sym_end = (s->lit_bufsize - 1) * 3;
+#endif
     /* We avoid equality with lit_bufsize*3 because of wraparound at 64K
      * on 16 bit machines and because stored blocks are restricted to
      * 64K-1 bytes.
@@ -386,9 +526,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
 /* =========================================================================
  * Check for a valid deflate stream state. Return 0 if ok, 1 if not.
  */
-local int deflateStateCheck(strm)
-    z_streamp strm;
-{
+local int deflateStateCheck(z_streamp strm) {
     deflate_state *s;
     if (strm == Z_NULL ||
         strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
@@ -409,11 +547,8 @@ local int deflateStateCheck(strm)
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateSetDictionary(strm, dictionary, dictLength)
-    z_streamp strm;
-    const Bytef *dictionary;
-    uInt  dictLength;
-{
+int ZEXPORT deflateSetDictionary(z_streamp strm, const Bytef *dictionary,
+                                 uInt  dictLength) {
     deflate_state *s;
     uInt str, n;
     int wrap;
@@ -478,11 +613,8 @@ int ZEXPORT deflateSetDictionary(strm, dictionary, dictLength)
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateGetDictionary(strm, dictionary, dictLength)
-    z_streamp strm;
-    Bytef *dictionary;
-    uInt  *dictLength;
-{
+int ZEXPORT deflateGetDictionary(z_streamp strm, Bytef *dictionary,
+                                 uInt *dictLength) {
     deflate_state *s;
     uInt len;
 
@@ -500,9 +632,7 @@ int ZEXPORT deflateGetDictionary(strm, dictionary, dictLength)
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateResetKeep(strm)
-    z_streamp strm;
-{
+int ZEXPORT deflateResetKeep(z_streamp strm) {
     deflate_state *s;
 
     if (deflateStateCheck(strm)) {
@@ -537,10 +667,32 @@ int ZEXPORT deflateResetKeep(strm)
     return Z_OK;
 }
 
+/* ===========================================================================
+ * Initialize the "longest match" routines for a new zlib stream
+ */
+local void lm_init(deflate_state *s) {
+    s->window_size = (ulg)2L*s->w_size;
+
+    CLEAR_HASH(s);
+
+    /* Set the default configuration parameters:
+     */
+    s->max_lazy_match   = configuration_table[s->level].max_lazy;
+    s->good_match       = configuration_table[s->level].good_length;
+    s->nice_match       = configuration_table[s->level].nice_length;
+    s->max_chain_length = configuration_table[s->level].max_chain;
+
+    s->strstart = 0;
+    s->block_start = 0L;
+    s->lookahead = 0;
+    s->insert = 0;
+    s->match_length = s->prev_length = MIN_MATCH-1;
+    s->match_available = 0;
+    s->ins_h = 0;
+}
+
 /* ========================================================================= */
-int ZEXPORT deflateReset(strm)
-    z_streamp strm;
-{
+int ZEXPORT deflateReset(z_streamp strm) {
     int ret;
 
     ret = deflateResetKeep(strm);
@@ -550,10 +702,7 @@ int ZEXPORT deflateReset(strm)
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateSetHeader(strm, head)
-    z_streamp strm;
-    gz_headerp head;
-{
+int ZEXPORT deflateSetHeader(z_streamp strm, gz_headerp head) {
     if (deflateStateCheck(strm) || strm->state->wrap != 2)
         return Z_STREAM_ERROR;
     strm->state->gzhead = head;
@@ -561,11 +710,7 @@ int ZEXPORT deflateSetHeader(strm, head)
 }
 
 /* ========================================================================= */
-int ZEXPORT deflatePending(strm, pending, bits)
-    unsigned *pending;
-    int *bits;
-    z_streamp strm;
-{
+int ZEXPORT deflatePending(z_streamp strm, unsigned *pending, int *bits) {
     if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
     if (pending != Z_NULL)
         *pending = strm->state->pending;
@@ -575,19 +720,21 @@ int ZEXPORT deflatePending(strm, pending, bits)
 }
 
 /* ========================================================================= */
-int ZEXPORT deflatePrime(strm, bits, value)
-    z_streamp strm;
-    int bits;
-    int value;
-{
+int ZEXPORT deflatePrime(z_streamp strm, int bits, int value) {
     deflate_state *s;
     int put;
 
     if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
     s = strm->state;
+#ifdef LIT_MEM
+    if (bits < 0 || bits > 16 ||
+        (uchf *)s->d_buf < s->pending_out + ((Buf_size + 7) >> 3))
+        return Z_BUF_ERROR;
+#else
     if (bits < 0 || bits > 16 ||
         s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
         return Z_BUF_ERROR;
+#endif
     do {
         put = Buf_size - s->bi_valid;
         if (put > bits)
@@ -602,11 +749,7 @@ int ZEXPORT deflatePrime(strm, bits, value)
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateParams(strm, level, strategy)
-    z_streamp strm;
-    int level;
-    int strategy;
-{
+int ZEXPORT deflateParams(z_streamp strm, int level, int strategy) {
     deflate_state *s;
     compress_func func;
 
@@ -651,13 +794,8 @@ int ZEXPORT deflateParams(strm, level, strategy)
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
-    z_streamp strm;
-    int good_length;
-    int max_lazy;
-    int nice_length;
-    int max_chain;
-{
+int ZEXPORT deflateTune(z_streamp strm, int good_length, int max_lazy,
+                        int nice_length, int max_chain) {
     deflate_state *s;
 
     if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
@@ -693,10 +831,7 @@ int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
  *
  * Shifts are used to approximate divisions, for speed.
  */
-uLong ZEXPORT deflateBound(strm, sourceLen)
-    z_streamp strm;
-    uLong sourceLen;
-{
+uLong ZEXPORT deflateBound(z_streamp strm, uLong sourceLen) {
     deflate_state *s;
     uLong fixedlen, storelen, wraplen;
 
@@ -766,10 +901,7 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
  * IN assertion: the stream state is correct and there is enough room in
  * pending_buf.
  */
-local void putShortMSB(s, b)
-    deflate_state *s;
-    uInt b;
-{
+local void putShortMSB(deflate_state *s, uInt b) {
     put_byte(s, (Byte)(b >> 8));
     put_byte(s, (Byte)(b & 0xff));
 }
@@ -780,9 +912,7 @@ local void putShortMSB(s, b)
  * applications may wish to modify it to avoid allocating a large
  * strm->next_out buffer and copying into it. (See also read_buf()).
  */
-local void flush_pending(strm)
-    z_streamp strm;
-{
+local void flush_pending(z_streamp strm) {
     unsigned len;
     deflate_state *s = strm->state;
 
@@ -813,10 +943,7 @@ local void flush_pending(strm)
     } while (0)
 
 /* ========================================================================= */
-int ZEXPORT deflate(strm, flush)
-    z_streamp strm;
-    int flush;
-{
+int ZEXPORT deflate(z_streamp strm, int flush) {
     int old_flush; /* value of flush param for previous deflate call */
     deflate_state *s;
 
@@ -1128,9 +1255,7 @@ int ZEXPORT deflate(strm, flush)
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateEnd(strm)
-    z_streamp strm;
-{
+int ZEXPORT deflateEnd(z_streamp strm) {
     int status;
 
     if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
@@ -1154,11 +1279,10 @@ int ZEXPORT deflateEnd(strm)
  * To simplify the source, this is not supported for 16-bit MSDOS (which
  * doesn't have enough memory anyway to duplicate compression states).
  */
-int ZEXPORT deflateCopy(dest, source)
-    z_streamp dest;
-    z_streamp source;
-{
+int ZEXPORT deflateCopy(z_streamp dest, z_streamp source) {
 #ifdef MAXSEG_64K
+    (void)dest;
+    (void)source;
     return Z_STREAM_ERROR;
 #else
     deflate_state *ds;
@@ -1182,7 +1306,7 @@ int ZEXPORT deflateCopy(dest, source)
     ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
     ds->prev   = (Posf *)  ZALLOC(dest, ds->w_size, sizeof(Pos));
     ds->head   = (Posf *)  ZALLOC(dest, ds->hash_size, sizeof(Pos));
-    ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4);
+    ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, LIT_BUFS);
 
     if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
         ds->pending_buf == Z_NULL) {
@@ -1193,10 +1317,15 @@ int ZEXPORT deflateCopy(dest, source)
     zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
     zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos));
     zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos));
-    zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
+    zmemcpy(ds->pending_buf, ss->pending_buf, ds->lit_bufsize * LIT_BUFS);
 
     ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
+#ifdef LIT_MEM
+    ds->d_buf = (ushf *)(ds->pending_buf + (ds->lit_bufsize << 1));
+    ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2);
+#else
     ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
+#endif
 
     ds->l_desc.dyn_tree = ds->dyn_ltree;
     ds->d_desc.dyn_tree = ds->dyn_dtree;
@@ -1206,66 +1335,6 @@ int ZEXPORT deflateCopy(dest, source)
 #endif /* MAXSEG_64K */
 }
 
-/* ===========================================================================
- * Read a new buffer from the current input stream, update the adler32
- * and total number of bytes read.  All deflate() input goes through
- * this function so some applications may wish to modify it to avoid
- * allocating a large strm->next_in buffer and copying from it.
- * (See also flush_pending()).
- */
-local unsigned read_buf(strm, buf, size)
-    z_streamp strm;
-    Bytef *buf;
-    unsigned size;
-{
-    unsigned len = strm->avail_in;
-
-    if (len > size) len = size;
-    if (len == 0) return 0;
-
-    strm->avail_in  -= len;
-
-    zmemcpy(buf, strm->next_in, len);
-    if (strm->state->wrap == 1) {
-        strm->adler = adler32(strm->adler, buf, len);
-    }
-#ifdef GZIP
-    else if (strm->state->wrap == 2) {
-        strm->adler = crc32(strm->adler, buf, len);
-    }
-#endif
-    strm->next_in  += len;
-    strm->total_in += len;
-
-    return len;
-}
-
-/* ===========================================================================
- * Initialize the "longest match" routines for a new zlib stream
- */
-local void lm_init(s)
-    deflate_state *s;
-{
-    s->window_size = (ulg)2L*s->w_size;
-
-    CLEAR_HASH(s);
-
-    /* Set the default configuration parameters:
-     */
-    s->max_lazy_match   = configuration_table[s->level].max_lazy;
-    s->good_match       = configuration_table[s->level].good_length;
-    s->nice_match       = configuration_table[s->level].nice_length;
-    s->max_chain_length = configuration_table[s->level].max_chain;
-
-    s->strstart = 0;
-    s->block_start = 0L;
-    s->lookahead = 0;
-    s->insert = 0;
-    s->match_length = s->prev_length = MIN_MATCH-1;
-    s->match_available = 0;
-    s->ins_h = 0;
-}
-
 #ifndef FASTEST
 /* ===========================================================================
  * Set match_start to the longest match starting at the given string and
@@ -1276,10 +1345,7 @@ local void lm_init(s)
  *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
  * OUT assertion: the match length is not greater than s->lookahead.
  */
-local uInt longest_match(s, cur_match)
-    deflate_state *s;
-    IPos cur_match;                             /* current match */
-{
+local uInt longest_match(deflate_state *s, IPos cur_match) {
     unsigned chain_length = s->max_chain_length;/* max hash chain length */
     register Bytef *scan = s->window + s->strstart; /* current string */
     register Bytef *match;                      /* matched string */
@@ -1427,10 +1493,7 @@ local uInt longest_match(s, cur_match)
 /* ---------------------------------------------------------------------------
  * Optimized version for FASTEST only
  */
-local uInt longest_match(s, cur_match)
-    deflate_state *s;
-    IPos cur_match;                             /* current match */
-{
+local uInt longest_match(deflate_state *s, IPos cur_match) {
     register Bytef *scan = s->window + s->strstart; /* current string */
     register Bytef *match;                       /* matched string */
     register int len;                           /* length of current match */
@@ -1491,19 +1554,23 @@ local uInt longest_match(s, cur_match)
 /* ===========================================================================
  * Check that the match at match_start is indeed a match.
  */
-local void check_match(s, start, match, length)
-    deflate_state *s;
-    IPos start, match;
-    int length;
-{
+local void check_match(deflate_state *s, IPos start, IPos match, int length) {
     /* check that the match is indeed a match */
-    if (zmemcmp(s->window + match,
-                s->window + start, length) != EQUAL) {
-        fprintf(stderr, " start %u, match %u, length %d\n",
-                start, match, length);
+    Bytef *back = s->window + (int)match, *here = s->window + start;
+    IPos len = length;
+    if (match == (IPos)-1) {
+        /* match starts one byte before the current window -- just compare the
+           subsequent length-1 bytes */
+        back++;
+        here++;
+        len--;
+    }
+    if (zmemcmp(back, here, len) != EQUAL) {
+        fprintf(stderr, " start %u, match %d, length %d\n",
+                start, (int)match, length);
         do {
-            fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
-        } while (--length != 0);
+            fprintf(stderr, "(%02x %02x)", *back++, *here++);
+        } while (--len != 0);
         z_error("invalid match");
     }
     if (z_verbose > 1) {
@@ -1515,137 +1582,6 @@ local void check_match(s, start, match, length)
 #  define check_match(s, start, match, length)
 #endif /* ZLIB_DEBUG */
 
-/* ===========================================================================
- * Fill the window when the lookahead becomes insufficient.
- * Updates strstart and lookahead.
- *
- * IN assertion: lookahead < MIN_LOOKAHEAD
- * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
- *    At least one byte has been read, or avail_in == 0; reads are
- *    performed for at least two bytes (required for the zip translate_eol
- *    option -- not supported here).
- */
-local void fill_window(s)
-    deflate_state *s;
-{
-    unsigned n;
-    unsigned more;    /* Amount of free space at the end of the window. */
-    uInt wsize = s->w_size;
-
-    Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
-
-    do {
-        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
-
-        /* Deal with !@#$% 64K limit: */
-        if (sizeof(int) <= 2) {
-            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
-                more = wsize;
-
-            } else if (more == (unsigned)(-1)) {
-                /* Very unlikely, but possible on 16 bit machine if
-                 * strstart == 0 && lookahead == 1 (input done a byte at time)
-                 */
-                more--;
-            }
-        }
-
-        /* If the window is almost full and there is insufficient lookahead,
-         * move the upper half to the lower one to make room in the upper half.
-         */
-        if (s->strstart >= wsize + MAX_DIST(s)) {
-
-            zmemcpy(s->window, s->window + wsize, (unsigned)wsize - more);
-            s->match_start -= wsize;
-            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
-            s->block_start -= (long) wsize;
-            if (s->insert > s->strstart)
-                s->insert = s->strstart;
-            slide_hash(s);
-            more += wsize;
-        }
-        if (s->strm->avail_in == 0) break;
-
-        /* If there was no sliding:
-         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
-         *    more == window_size - lookahead - strstart
-         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
-         * => more >= window_size - 2*WSIZE + 2
-         * In the BIG_MEM or MMAP case (not yet supported),
-         *   window_size == input_size + MIN_LOOKAHEAD  &&
-         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
-         * Otherwise, window_size == 2*WSIZE so more >= 2.
-         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
-         */
-        Assert(more >= 2, "more < 2");
-
-        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
-        s->lookahead += n;
-
-        /* Initialize the hash value now that we have some input: */
-        if (s->lookahead + s->insert >= MIN_MATCH) {
-            uInt str = s->strstart - s->insert;
-            s->ins_h = s->window[str];
-            UPDATE_HASH(s, s->ins_h, s->window[str + 1]);
-#if MIN_MATCH != 3
-            Call UPDATE_HASH() MIN_MATCH-3 more times
-#endif
-            while (s->insert) {
-                UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
-#ifndef FASTEST
-                s->prev[str & s->w_mask] = s->head[s->ins_h];
-#endif
-                s->head[s->ins_h] = (Pos)str;
-                str++;
-                s->insert--;
-                if (s->lookahead + s->insert < MIN_MATCH)
-                    break;
-            }
-        }
-        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
-         * but this is not important since only literal bytes will be emitted.
-         */
-
-    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
-
-    /* If the WIN_INIT bytes after the end of the current data have never been
-     * written, then zero those bytes in order to avoid memory check reports of
-     * the use of uninitialized (or uninitialised as Julian writes) bytes by
-     * the longest match routines.  Update the high water mark for the next
-     * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
-     * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
-     */
-    if (s->high_water < s->window_size) {
-        ulg curr = s->strstart + (ulg)(s->lookahead);
-        ulg init;
-
-        if (s->high_water < curr) {
-            /* Previous high water mark below current data -- zero WIN_INIT
-             * bytes or up to end of window, whichever is less.
-             */
-            init = s->window_size - curr;
-            if (init > WIN_INIT)
-                init = WIN_INIT;
-            zmemzero(s->window + curr, (unsigned)init);
-            s->high_water = curr + init;
-        }
-        else if (s->high_water < (ulg)curr + WIN_INIT) {
-            /* High water mark at or above current data, but below current data
-             * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
-             * to end of window, whichever is less.
-             */
-            init = (ulg)curr + WIN_INIT - s->high_water;
-            if (init > s->window_size - s->high_water)
-                init = s->window_size - s->high_water;
-            zmemzero(s->window + s->high_water, (unsigned)init);
-            s->high_water += init;
-        }
-    }
-
-    Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
-           "not enough room for search");
-}
-
 /* ===========================================================================
  * Flush the current block, with given end-of-file flag.
  * IN assertion: strstart is set to the end of the current match.
@@ -1688,10 +1624,7 @@ local void fill_window(s)
  * copied. It is most efficient with large input and output buffers, which
  * maximizes the opportunities to have a single copy from next_in to next_out.
  */
-local block_state deflate_stored(s, flush)
-    deflate_state *s;
-    int flush;
-{
+local block_state deflate_stored(deflate_state *s, int flush) {
     /* Smallest worthy block size when not flushing or finishing. By default
      * this is 32K. This can be as small as 507 bytes for memLevel == 1. For
      * large input and output buffers, the stored block size will be larger.
@@ -1875,10 +1808,7 @@ local block_state deflate_stored(s, flush)
  * new strings in the dictionary only for unmatched strings or for short
  * matches. It is used only for the fast compression options.
  */
-local block_state deflate_fast(s, flush)
-    deflate_state *s;
-    int flush;
-{
+local block_state deflate_fast(deflate_state *s, int flush) {
     IPos hash_head;       /* head of the hash chain */
     int bflush;           /* set if current block must be flushed */
 
@@ -1977,10 +1907,7 @@ local block_state deflate_fast(s, flush)
  * evaluation for matches: a match is finally adopted only if there is
  * no better match at the next window position.
  */
-local block_state deflate_slow(s, flush)
-    deflate_state *s;
-    int flush;
-{
+local block_state deflate_slow(deflate_state *s, int flush) {
     IPos hash_head;          /* head of hash chain */
     int bflush;              /* set if current block must be flushed */
 
@@ -2108,10 +2035,7 @@ local block_state deflate_slow(s, flush)
  * one.  Do not maintain a hash table.  (It will be regenerated if this run of
  * deflate switches away from Z_RLE.)
  */
-local block_state deflate_rle(s, flush)
-    deflate_state *s;
-    int flush;
-{
+local block_state deflate_rle(deflate_state *s, int flush) {
     int bflush;             /* set if current block must be flushed */
     uInt prev;              /* byte at distance one to match */
     Bytef *scan, *strend;   /* scan goes up to strend for length of run */
@@ -2182,10 +2106,7 @@ local block_state deflate_rle(s, flush)
  * For Z_HUFFMAN_ONLY, do not look for matches.  Do not maintain a hash table.
  * (It will be regenerated if this run of deflate switches away from Huffman.)
  */
-local block_state deflate_huff(s, flush)
-    deflate_state *s;
-    int flush;
-{
+local block_state deflate_huff(deflate_state *s, int flush) {
     int bflush;             /* set if current block must be flushed */
 
     for (;;) {
diff --git a/src/native/external/zlib/deflate.h b/src/native/external/zlib/deflate.h
index 1a06cd5f25d1..300c6ada62b8 100644
--- a/src/native/external/zlib/deflate.h
+++ b/src/native/external/zlib/deflate.h
@@ -1,5 +1,5 @@
 /* deflate.h -- internal compression state
- * Copyright (C) 1995-2018 Jean-loup Gailly
+ * Copyright (C) 1995-2024 Jean-loup Gailly
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -23,6 +23,10 @@
 #  define GZIP
 #endif
 
+/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at
+   the cost of a larger memory footprint */
+/* #define LIT_MEM */
+
 /* ===========================================================================
  * Internal compression state.
  */
@@ -217,7 +221,14 @@ typedef struct internal_state {
     /* Depth of each subtree used as tie breaker for trees of equal frequency
      */
 
+#ifdef LIT_MEM
+#   define LIT_BUFS 5
+    ushf *d_buf;          /* buffer for distances */
+    uchf *l_buf;          /* buffer for literals/lengths */
+#else
+#   define LIT_BUFS 4
     uchf *sym_buf;        /* buffer for distances and literals/lengths */
+#endif
 
     uInt  lit_bufsize;
     /* Size of match buffer for literals/lengths.  There are 4 reasons for
@@ -239,7 +250,7 @@ typedef struct internal_state {
      *   - I can't count above 4
      */
 
-    uInt sym_next;      /* running index in sym_buf */
+    uInt sym_next;      /* running index in symbol buffer */
     uInt sym_end;       /* symbol table full when sym_next reaches this */
 
     ulg opt_len;        /* bit length of current block with optimal trees */
@@ -291,14 +302,14 @@ typedef struct internal_state {
    memory checker errors from longest match routines */
 
         /* in trees.c */
-void ZLIB_INTERNAL _tr_init OF((deflate_state *s));
-int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
-void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf,
-                        ulg stored_len, int last));
-void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s));
-void ZLIB_INTERNAL _tr_align OF((deflate_state *s));
-void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
-                        ulg stored_len, int last));
+void ZLIB_INTERNAL _tr_init(deflate_state *s);
+int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc);
+void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
+                                   ulg stored_len, int last);
+void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s);
+void ZLIB_INTERNAL _tr_align(deflate_state *s);
+void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
+                                    ulg stored_len, int last);
 
 #define d_code(dist) \
    ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
@@ -318,6 +329,25 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
   extern const uch ZLIB_INTERNAL _dist_code[];
 #endif
 
+#ifdef LIT_MEM
+# define _tr_tally_lit(s, c, flush) \
+  { uch cc = (c); \
+    s->d_buf[s->sym_next] = 0; \
+    s->l_buf[s->sym_next++] = cc; \
+    s->dyn_ltree[cc].Freq++; \
+    flush = (s->sym_next == s->sym_end); \
+   }
+# define _tr_tally_dist(s, distance, length, flush) \
+  { uch len = (uch)(length); \
+    ush dist = (ush)(distance); \
+    s->d_buf[s->sym_next] = dist; \
+    s->l_buf[s->sym_next++] = len; \
+    dist--; \
+    s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
+    s->dyn_dtree[d_code(dist)].Freq++; \
+    flush = (s->sym_next == s->sym_end); \
+  }
+#else
 # define _tr_tally_lit(s, c, flush) \
   { uch cc = (c); \
     s->sym_buf[s->sym_next++] = 0; \
@@ -337,6 +367,7 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
     s->dyn_dtree[d_code(dist)].Freq++; \
     flush = (s->sym_next == s->sym_end); \
   }
+#endif
 #else
 # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
 # define _tr_tally_dist(s, distance, length, flush) \
diff --git a/src/native/external/zlib/gzclose.c b/src/native/external/zlib/gzclose.c
index caeb99a3177f..48d6a86f04b6 100644
--- a/src/native/external/zlib/gzclose.c
+++ b/src/native/external/zlib/gzclose.c
@@ -8,9 +8,7 @@
 /* gzclose() is in a separate file so that it is linked in only if it is used.
    That way the other gzclose functions can be used instead to avoid linking in
    unneeded compression or decompression routines. */
-int ZEXPORT gzclose(file)
-    gzFile file;
-{
+int ZEXPORT gzclose(gzFile file) {
 #ifndef NO_GZCOMPRESS
     gz_statep state;
 
diff --git a/src/native/external/zlib/gzguts.h b/src/native/external/zlib/gzguts.h
index 57faf37165a3..eba72085bb75 100644
--- a/src/native/external/zlib/gzguts.h
+++ b/src/native/external/zlib/gzguts.h
@@ -1,5 +1,5 @@
 /* gzguts.h -- zlib internal header definitions for gz* operations
- * Copyright (C) 2004-2019 Mark Adler
+ * Copyright (C) 2004-2024 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -7,9 +7,8 @@
 #  ifndef _LARGEFILE_SOURCE
 #    define _LARGEFILE_SOURCE 1
 #  endif
-#  ifdef _FILE_OFFSET_BITS
-#    undef _FILE_OFFSET_BITS
-#  endif
+#  undef _FILE_OFFSET_BITS
+#  undef _TIME_BITS
 #endif
 
 #ifdef HAVE_HIDDEN
@@ -119,8 +118,8 @@
 
 /* gz* functions always use library allocation functions */
 #ifndef STDC
-  extern voidp  malloc OF((uInt size));
-  extern void   free   OF((voidpf ptr));
+  extern voidp  malloc(uInt size);
+  extern void   free(voidpf ptr);
 #endif
 
 /* get errno and strerror definition */
@@ -138,10 +137,10 @@
 
 /* provide prototypes for these when building zlib without LFS */
 #if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0
-    ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
-    ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
-    ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
-    ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
+    ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *);
+    ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int);
+    ZEXTERN z_off64_t ZEXPORT gztell64(gzFile);
+    ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile);
 #endif
 
 /* default memLevel */
@@ -203,17 +202,13 @@ typedef struct {
 typedef gz_state FAR *gz_statep;
 
 /* shared functions */
-void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *));
+void ZLIB_INTERNAL gz_error(gz_statep, int, const char *);
 #if defined UNDER_CE
-char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error));
+char ZLIB_INTERNAL *gz_strwinerror(DWORD error);
 #endif
 
 /* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t
    value -- needed when comparing unsigned to z_off64_t, which is signed
    (possible z_off64_t types off_t, off64_t, and long are all signed) */
-#ifdef INT_MAX
-#  define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX)
-#else
-unsigned ZLIB_INTERNAL gz_intmax OF((void));
-#  define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax())
-#endif
+unsigned ZLIB_INTERNAL gz_intmax(void);
+#define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax())
diff --git a/src/native/external/zlib/gzlib.c b/src/native/external/zlib/gzlib.c
index 55da46a453fd..983153cc8e49 100644
--- a/src/native/external/zlib/gzlib.c
+++ b/src/native/external/zlib/gzlib.c
@@ -1,5 +1,5 @@
 /* gzlib.c -- zlib functions common to reading and writing gzip files
- * Copyright (C) 2004-2019 Mark Adler
+ * Copyright (C) 2004-2024 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -15,10 +15,6 @@
 #endif
 #endif
 
-/* Local functions */
-local void gz_reset OF((gz_statep));
-local gzFile gz_open OF((const void *, int, const char *));
-
 #if defined UNDER_CE
 
 /* Map the Windows error number in ERROR to a locale-dependent error message
@@ -30,9 +26,7 @@ local gzFile gz_open OF((const void *, int, const char *));
 
    The gz_strwinerror function does not change the current setting of
    GetLastError. */
-char ZLIB_INTERNAL *gz_strwinerror(error)
-     DWORD error;
-{
+char ZLIB_INTERNAL *gz_strwinerror(DWORD error) {
     static char buf[1024];
 
     wchar_t *msgbuf;
@@ -72,9 +66,7 @@ char ZLIB_INTERNAL *gz_strwinerror(error)
 #endif /* UNDER_CE */
 
 /* Reset gzip file state */
-local void gz_reset(state)
-    gz_statep state;
-{
+local void gz_reset(gz_statep state) {
     state->x.have = 0;              /* no output data available */
     if (state->mode == GZ_READ) {   /* for reading ... */
         state->eof = 0;             /* not at end of file */
@@ -90,11 +82,7 @@ local void gz_reset(state)
 }
 
 /* Open a gzip file either by name or file descriptor. */
-local gzFile gz_open(path, fd, mode)
-    const void *path;
-    int fd;
-    const char *mode;
-{
+local gzFile gz_open(const void *path, int fd, const char *mode) {
     gz_statep state;
     z_size_t len;
     int oflag;
@@ -269,26 +257,17 @@ local gzFile gz_open(path, fd, mode)
 }
 
 /* -- see zlib.h -- */
-gzFile ZEXPORT gzopen(path, mode)
-    const char *path;
-    const char *mode;
-{
+gzFile ZEXPORT gzopen(const char *path, const char *mode) {
     return gz_open(path, -1, mode);
 }
 
 /* -- see zlib.h -- */
-gzFile ZEXPORT gzopen64(path, mode)
-    const char *path;
-    const char *mode;
-{
+gzFile ZEXPORT gzopen64(const char *path, const char *mode) {
     return gz_open(path, -1, mode);
 }
 
 /* -- see zlib.h -- */
-gzFile ZEXPORT gzdopen(fd, mode)
-    int fd;
-    const char *mode;
-{
+gzFile ZEXPORT gzdopen(int fd, const char *mode) {
     char *path;         /* identifier for error messages */
     gzFile gz;
 
@@ -306,19 +285,13 @@ gzFile ZEXPORT gzdopen(fd, mode)
 
 /* -- see zlib.h -- */
 #ifdef WIDECHAR
-gzFile ZEXPORT gzopen_w(path, mode)
-    const wchar_t *path;
-    const char *mode;
-{
+gzFile ZEXPORT gzopen_w(const wchar_t *path, const char *mode) {
     return gz_open(path, -2, mode);
 }
 #endif
 
 /* -- see zlib.h -- */
-int ZEXPORT gzbuffer(file, size)
-    gzFile file;
-    unsigned size;
-{
+int ZEXPORT gzbuffer(gzFile file, unsigned size) {
     gz_statep state;
 
     /* get internal structure and check integrity */
@@ -335,16 +308,14 @@ int ZEXPORT gzbuffer(file, size)
     /* check and set requested size */
     if ((size << 1) < size)
         return -1;              /* need to be able to double it */
-    if (size < 2)
-        size = 2;               /* need two bytes to check magic header */
+    if (size < 8)
+        size = 8;               /* needed to behave well with flushing */
     state->want = size;
     return 0;
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzrewind(file)
-    gzFile file;
-{
+int ZEXPORT gzrewind(gzFile file) {
     gz_statep state;
 
     /* get internal structure */
@@ -365,11 +336,7 @@ int ZEXPORT gzrewind(file)
 }
 
 /* -- see zlib.h -- */
-z_off64_t ZEXPORT gzseek64(file, offset, whence)
-    gzFile file;
-    z_off64_t offset;
-    int whence;
-{
+z_off64_t ZEXPORT gzseek64(gzFile file, z_off64_t offset, int whence) {
     unsigned n;
     z_off64_t ret;
     gz_statep state;
@@ -442,11 +409,7 @@ z_off64_t ZEXPORT gzseek64(file, offset, whence)
 }
 
 /* -- see zlib.h -- */
-z_off_t ZEXPORT gzseek(file, offset, whence)
-    gzFile file;
-    z_off_t offset;
-    int whence;
-{
+z_off_t ZEXPORT gzseek(gzFile file, z_off_t offset, int whence) {
     z_off64_t ret;
 
     ret = gzseek64(file, (z_off64_t)offset, whence);
@@ -454,9 +417,7 @@ z_off_t ZEXPORT gzseek(file, offset, whence)
 }
 
 /* -- see zlib.h -- */
-z_off64_t ZEXPORT gztell64(file)
-    gzFile file;
-{
+z_off64_t ZEXPORT gztell64(gzFile file) {
     gz_statep state;
 
     /* get internal structure and check integrity */
@@ -471,9 +432,7 @@ z_off64_t ZEXPORT gztell64(file)
 }
 
 /* -- see zlib.h -- */
-z_off_t ZEXPORT gztell(file)
-    gzFile file;
-{
+z_off_t ZEXPORT gztell(gzFile file) {
     z_off64_t ret;
 
     ret = gztell64(file);
@@ -481,9 +440,7 @@ z_off_t ZEXPORT gztell(file)
 }
 
 /* -- see zlib.h -- */
-z_off64_t ZEXPORT gzoffset64(file)
-    gzFile file;
-{
+z_off64_t ZEXPORT gzoffset64(gzFile file) {
     z_off64_t offset;
     gz_statep state;
 
@@ -504,9 +461,7 @@ z_off64_t ZEXPORT gzoffset64(file)
 }
 
 /* -- see zlib.h -- */
-z_off_t ZEXPORT gzoffset(file)
-    gzFile file;
-{
+z_off_t ZEXPORT gzoffset(gzFile file) {
     z_off64_t ret;
 
     ret = gzoffset64(file);
@@ -514,9 +469,7 @@ z_off_t ZEXPORT gzoffset(file)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzeof(file)
-    gzFile file;
-{
+int ZEXPORT gzeof(gzFile file) {
     gz_statep state;
 
     /* get internal structure and check integrity */
@@ -531,10 +484,7 @@ int ZEXPORT gzeof(file)
 }
 
 /* -- see zlib.h -- */
-const char * ZEXPORT gzerror(file, errnum)
-    gzFile file;
-    int *errnum;
-{
+const char * ZEXPORT gzerror(gzFile file, int *errnum) {
     gz_statep state;
 
     /* get internal structure and check integrity */
@@ -552,9 +502,7 @@ const char * ZEXPORT gzerror(file, errnum)
 }
 
 /* -- see zlib.h -- */
-void ZEXPORT gzclearerr(file)
-    gzFile file;
-{
+void ZEXPORT gzclearerr(gzFile file) {
     gz_statep state;
 
     /* get internal structure and check integrity */
@@ -578,11 +526,7 @@ void ZEXPORT gzclearerr(file)
    memory).  Simply save the error message as a static string.  If there is an
    allocation failure constructing the error message, then convert the error to
    out of memory. */
-void ZLIB_INTERNAL gz_error(state, err, msg)
-    gz_statep state;
-    int err;
-    const char *msg;
-{
+void ZLIB_INTERNAL gz_error(gz_statep state, int err, const char *msg) {
     /* free previously allocated message and clear */
     if (state->msg != NULL) {
         if (state->err != Z_MEM_ERROR)
@@ -619,21 +563,20 @@ void ZLIB_INTERNAL gz_error(state, err, msg)
 #endif
 }
 
-#ifndef INT_MAX
 /* portably return maximum value for an int (when limits.h presumed not
    available) -- we need to do this to cover cases where 2's complement not
    used, since C standard permits 1's complement and sign-bit representations,
    otherwise we could just use ((unsigned)-1) >> 1 */
-unsigned ZLIB_INTERNAL gz_intmax()
-{
-    unsigned p, q;
-
-    p = 1;
+unsigned ZLIB_INTERNAL gz_intmax(void) {
+#ifdef INT_MAX
+    return INT_MAX;
+#else
+    unsigned p = 1, q;
     do {
         q = p;
         p <<= 1;
         p++;
     } while (p > q);
     return q >> 1;
-}
 #endif
+}
diff --git a/src/native/external/zlib/gzread.c b/src/native/external/zlib/gzread.c
index dd77381596cb..4168cbc88752 100644
--- a/src/native/external/zlib/gzread.c
+++ b/src/native/external/zlib/gzread.c
@@ -5,25 +5,12 @@
 
 #include "gzguts.h"
 
-/* Local functions */
-local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
-local int gz_avail OF((gz_statep));
-local int gz_look OF((gz_statep));
-local int gz_decomp OF((gz_statep));
-local int gz_fetch OF((gz_statep));
-local int gz_skip OF((gz_statep, z_off64_t));
-local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
-
 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
    state->fd, and update state->eof, state->err, and state->msg as appropriate.
    This function needs to loop on read(), since read() is not guaranteed to
    read the number of bytes requested, depending on the type of descriptor. */
-local int gz_load(state, buf, len, have)
-    gz_statep state;
-    unsigned char *buf;
-    unsigned len;
-    unsigned *have;
-{
+local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
+                  unsigned *have) {
     int ret;
     unsigned get, max = ((unsigned)-1 >> 2) + 1;
 
@@ -53,9 +40,7 @@ local int gz_load(state, buf, len, have)
    If strm->avail_in != 0, then the current data is moved to the beginning of
    the input buffer, and then the remainder of the buffer is loaded with the
    available data from the input file. */
-local int gz_avail(state)
-    gz_statep state;
-{
+local int gz_avail(gz_statep state) {
     unsigned got;
     z_streamp strm = &(state->strm);
 
@@ -88,9 +73,7 @@ local int gz_avail(state)
    case, all further file reads will be directly to either the output buffer or
    a user buffer.  If decompressing, the inflate state will be initialized.
    gz_look() will return 0 on success or -1 on failure. */
-local int gz_look(state)
-    gz_statep state;
-{
+local int gz_look(gz_statep state) {
     z_streamp strm = &(state->strm);
 
     /* allocate read buffers and inflate memory */
@@ -170,9 +153,7 @@ local int gz_look(state)
    data.  If the gzip stream completes, state->how is reset to LOOK to look for
    the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
    on success, -1 on failure. */
-local int gz_decomp(state)
-    gz_statep state;
-{
+local int gz_decomp(gz_statep state) {
     int ret = Z_OK;
     unsigned had;
     z_streamp strm = &(state->strm);
@@ -224,9 +205,7 @@ local int gz_decomp(state)
    looked for to determine whether to copy or decompress.  Returns -1 on error,
    otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
    end of the input file has been reached and all data has been processed.  */
-local int gz_fetch(state)
-    gz_statep state;
-{
+local int gz_fetch(gz_statep state) {
     z_streamp strm = &(state->strm);
 
     do {
@@ -254,10 +233,7 @@ local int gz_fetch(state)
 }
 
 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
-local int gz_skip(state, len)
-    gz_statep state;
-    z_off64_t len;
-{
+local int gz_skip(gz_statep state, z_off64_t len) {
     unsigned n;
 
     /* skip over len bytes or reach end-of-file, whichever comes first */
@@ -289,11 +265,7 @@ local int gz_skip(state, len)
    input.  Return the number of bytes read.  If zero is returned, either the
    end of file was reached, or there was an error.  state->err must be
    consulted in that case to determine which. */
-local z_size_t gz_read(state, buf, len)
-    gz_statep state;
-    voidp buf;
-    z_size_t len;
-{
+local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) {
     z_size_t got;
     unsigned n;
 
@@ -370,11 +342,7 @@ local z_size_t gz_read(state, buf, len)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzread(file, buf, len)
-    gzFile file;
-    voidp buf;
-    unsigned len;
-{
+int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) {
     gz_statep state;
 
     /* get internal structure */
@@ -406,12 +374,7 @@ int ZEXPORT gzread(file, buf, len)
 }
 
 /* -- see zlib.h -- */
-z_size_t ZEXPORT gzfread(buf, size, nitems, file)
-    voidp buf;
-    z_size_t size;
-    z_size_t nitems;
-    gzFile file;
-{
+z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, gzFile file) {
     z_size_t len;
     gz_statep state;
 
@@ -442,9 +405,7 @@ z_size_t ZEXPORT gzfread(buf, size, nitems, file)
 #else
 #  undef gzgetc
 #endif
-int ZEXPORT gzgetc(file)
-    gzFile file;
-{
+int ZEXPORT gzgetc(gzFile file) {
     unsigned char buf[1];
     gz_statep state;
 
@@ -469,17 +430,12 @@ int ZEXPORT gzgetc(file)
     return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
 }
 
-int ZEXPORT gzgetc_(file)
-gzFile file;
-{
+int ZEXPORT gzgetc_(gzFile file) {
     return gzgetc(file);
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzungetc(c, file)
-    int c;
-    gzFile file;
-{
+int ZEXPORT gzungetc(int c, gzFile file) {
     gz_statep state;
 
     /* get internal structure */
@@ -487,6 +443,10 @@ int ZEXPORT gzungetc(c, file)
         return -1;
     state = (gz_statep)file;
 
+    /* in case this was just opened, set up the input buffer */
+    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
+        (void)gz_look(state);
+
     /* check that we're reading and that there's no (serious) error */
     if (state->mode != GZ_READ ||
         (state->err != Z_OK && state->err != Z_BUF_ERROR))
@@ -536,11 +496,7 @@ int ZEXPORT gzungetc(c, file)
 }
 
 /* -- see zlib.h -- */
-char * ZEXPORT gzgets(file, buf, len)
-    gzFile file;
-    char *buf;
-    int len;
-{
+char * ZEXPORT gzgets(gzFile file, char *buf, int len) {
     unsigned left, n;
     char *str;
     unsigned char *eol;
@@ -600,9 +556,7 @@ char * ZEXPORT gzgets(file, buf, len)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzdirect(file)
-    gzFile file;
-{
+int ZEXPORT gzdirect(gzFile file) {
     gz_statep state;
 
     /* get internal structure */
@@ -620,9 +574,7 @@ int ZEXPORT gzdirect(file)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzclose_r(file)
-    gzFile file;
-{
+int ZEXPORT gzclose_r(gzFile file) {
     int ret, err;
     gz_statep state;
 
diff --git a/src/native/external/zlib/gzwrite.c b/src/native/external/zlib/gzwrite.c
index eb8a0e5893ff..435b4621b534 100644
--- a/src/native/external/zlib/gzwrite.c
+++ b/src/native/external/zlib/gzwrite.c
@@ -5,18 +5,10 @@
 
 #include "gzguts.h"
 
-/* Local functions */
-local int gz_init OF((gz_statep));
-local int gz_comp OF((gz_statep, int));
-local int gz_zero OF((gz_statep, z_off64_t));
-local z_size_t gz_write OF((gz_statep, voidpc, z_size_t));
-
 /* Initialize state for writing a gzip file.  Mark initialization by setting
    state->size to non-zero.  Return -1 on a memory allocation failure, or 0 on
    success. */
-local int gz_init(state)
-    gz_statep state;
-{
+local int gz_init(gz_statep state) {
     int ret;
     z_streamp strm = &(state->strm);
 
@@ -70,10 +62,7 @@ local int gz_init(state)
    deflate() flush value.  If flush is Z_FINISH, then the deflate() state is
    reset to start a new gzip stream.  If gz->direct is true, then simply write
    to the output file without compressing, and ignore flush. */
-local int gz_comp(state, flush)
-    gz_statep state;
-    int flush;
-{
+local int gz_comp(gz_statep state, int flush) {
     int ret, writ;
     unsigned have, put, max = ((unsigned)-1 >> 2) + 1;
     z_streamp strm = &(state->strm);
@@ -151,10 +140,7 @@ local int gz_comp(state, flush)
 
 /* Compress len zeros to output.  Return -1 on a write error or memory
    allocation failure by gz_comp(), or 0 on success. */
-local int gz_zero(state, len)
-    gz_statep state;
-    z_off64_t len;
-{
+local int gz_zero(gz_statep state, z_off64_t len) {
     int first;
     unsigned n;
     z_streamp strm = &(state->strm);
@@ -184,11 +170,7 @@ local int gz_zero(state, len)
 
 /* Write len bytes from buf to file.  Return the number of bytes written.  If
    the returned value is less than len, then there was an error. */
-local z_size_t gz_write(state, buf, len)
-    gz_statep state;
-    voidpc buf;
-    z_size_t len;
-{
+local z_size_t gz_write(gz_statep state, voidpc buf, z_size_t len) {
     z_size_t put = len;
 
     /* if len is zero, avoid unnecessary operations */
@@ -252,11 +234,7 @@ local z_size_t gz_write(state, buf, len)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzwrite(file, buf, len)
-    gzFile file;
-    voidpc buf;
-    unsigned len;
-{
+int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len) {
     gz_statep state;
 
     /* get internal structure */
@@ -280,12 +258,8 @@ int ZEXPORT gzwrite(file, buf, len)
 }
 
 /* -- see zlib.h -- */
-z_size_t ZEXPORT gzfwrite(buf, size, nitems, file)
-    voidpc buf;
-    z_size_t size;
-    z_size_t nitems;
-    gzFile file;
-{
+z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size, z_size_t nitems,
+                          gzFile file) {
     z_size_t len;
     gz_statep state;
 
@@ -310,10 +284,7 @@ z_size_t ZEXPORT gzfwrite(buf, size, nitems, file)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzputc(file, c)
-    gzFile file;
-    int c;
-{
+int ZEXPORT gzputc(gzFile file, int c) {
     unsigned have;
     unsigned char buf[1];
     gz_statep state;
@@ -358,10 +329,7 @@ int ZEXPORT gzputc(file, c)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzputs(file, s)
-    gzFile file;
-    const char *s;
-{
+int ZEXPORT gzputs(gzFile file, const char *s) {
     z_size_t len, put;
     gz_statep state;
 
@@ -388,8 +356,7 @@ int ZEXPORT gzputs(file, s)
 #include <stdarg.h>
 
 /* -- see zlib.h -- */
-int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
-{
+int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) {
     int len;
     unsigned left;
     char *next;
@@ -460,8 +427,7 @@ int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
     return len;
 }
 
-int ZEXPORTVA gzprintf(gzFile file, const char *format, ...)
-{
+int ZEXPORTVA gzprintf(gzFile file, const char *format, ...) {
     va_list va;
     int ret;
 
@@ -474,13 +440,10 @@ int ZEXPORTVA gzprintf(gzFile file, const char *format, ...)
 #else /* !STDC && !Z_HAVE_STDARG_H */
 
 /* -- see zlib.h -- */
-int ZEXPORTVA gzprintf(file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
-                       a11, a12, a13, a14, a15, a16, a17, a18, a19, a20)
-    gzFile file;
-    const char *format;
-    int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
-        a11, a12, a13, a14, a15, a16, a17, a18, a19, a20;
-{
+int ZEXPORTVA gzprintf(gzFile file, const char *format, int a1, int a2, int a3,
+                       int a4, int a5, int a6, int a7, int a8, int a9, int a10,
+                       int a11, int a12, int a13, int a14, int a15, int a16,
+                       int a17, int a18, int a19, int a20) {
     unsigned len, left;
     char *next;
     gz_statep state;
@@ -562,10 +525,7 @@ int ZEXPORTVA gzprintf(file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
 #endif
 
 /* -- see zlib.h -- */
-int ZEXPORT gzflush(file, flush)
-    gzFile file;
-    int flush;
-{
+int ZEXPORT gzflush(gzFile file, int flush) {
     gz_statep state;
 
     /* get internal structure */
@@ -594,11 +554,7 @@ int ZEXPORT gzflush(file, flush)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzsetparams(file, level, strategy)
-    gzFile file;
-    int level;
-    int strategy;
-{
+int ZEXPORT gzsetparams(gzFile file, int level, int strategy) {
     gz_statep state;
     z_streamp strm;
 
@@ -609,7 +565,7 @@ int ZEXPORT gzsetparams(file, level, strategy)
     strm = &(state->strm);
 
     /* check that we're writing and that there's no error */
-    if (state->mode != GZ_WRITE || state->err != Z_OK)
+    if (state->mode != GZ_WRITE || state->err != Z_OK || state->direct)
         return Z_STREAM_ERROR;
 
     /* if no change is requested, then do nothing */
@@ -636,9 +592,7 @@ int ZEXPORT gzsetparams(file, level, strategy)
 }
 
 /* -- see zlib.h -- */
-int ZEXPORT gzclose_w(file)
-    gzFile file;
-{
+int ZEXPORT gzclose_w(gzFile file) {
     int ret = Z_OK;
     gz_statep state;
 
diff --git a/src/native/external/zlib/infback.c b/src/native/external/zlib/infback.c
index babeaf1806f9..e7b25b307a30 100644
--- a/src/native/external/zlib/infback.c
+++ b/src/native/external/zlib/infback.c
@@ -15,9 +15,6 @@
 #include "inflate.h"
 #include "inffast.h"
 
-/* function prototypes */
-local void fixedtables OF((struct inflate_state FAR *state));
-
 /*
    strm provides memory allocation functions in zalloc and zfree, or
    Z_NULL to use the library memory allocation functions.
@@ -25,13 +22,9 @@ local void fixedtables OF((struct inflate_state FAR *state));
    windowBits is in the range 8..15, and window is a user-supplied
    window and output buffer that is 2**windowBits bytes.
  */
-int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size)
-z_streamp strm;
-int windowBits;
-unsigned char FAR *window;
-const char *version;
-int stream_size;
-{
+int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits,
+                             unsigned char FAR *window, const char *version,
+                             int stream_size) {
     struct inflate_state FAR *state;
 
     if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
@@ -80,9 +73,7 @@ int stream_size;
    used for threaded applications, since the rewriting of the tables and virgin
    may not be thread-safe.
  */
-local void fixedtables(state)
-struct inflate_state FAR *state;
-{
+local void fixedtables(struct inflate_state FAR *state) {
 #ifdef BUILDFIXED
     static int virgin = 1;
     static code *lenfix, *distfix;
@@ -248,13 +239,8 @@ struct inflate_state FAR *state;
    inflateBack() can also return Z_STREAM_ERROR if the input parameters
    are not correct, i.e. strm is Z_NULL or the state was not initialized.
  */
-int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc)
-z_streamp strm;
-in_func in;
-void FAR *in_desc;
-out_func out;
-void FAR *out_desc;
-{
+int ZEXPORT inflateBack(z_streamp strm, in_func in, void FAR *in_desc,
+                        out_func out, void FAR *out_desc) {
     struct inflate_state FAR *state;
     z_const unsigned char FAR *next;    /* next input */
     unsigned char FAR *put;     /* next output */
@@ -632,9 +618,7 @@ void FAR *out_desc;
     return ret;
 }
 
-int ZEXPORT inflateBackEnd(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateBackEnd(z_streamp strm) {
     if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
         return Z_STREAM_ERROR;
     ZFREE(strm, strm->state);
diff --git a/src/native/external/zlib/inffast.c b/src/native/external/zlib/inffast.c
index 1fec7f363fa6..9354676e786e 100644
--- a/src/native/external/zlib/inffast.c
+++ b/src/native/external/zlib/inffast.c
@@ -47,10 +47,7 @@
       requires strm->avail_out >= 258 for each loop to avoid checking for
       output space.
  */
-void ZLIB_INTERNAL inflate_fast(strm, start)
-z_streamp strm;
-unsigned start;         /* inflate()'s starting value for strm->avail_out */
-{
+void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start) {
     struct inflate_state FAR *state;
     z_const unsigned char FAR *in;      /* local strm->next_in */
     z_const unsigned char FAR *last;    /* have enough input while in < last */
diff --git a/src/native/external/zlib/inffast.h b/src/native/external/zlib/inffast.h
index e5c1aa4ca8cd..49c6d156c5c6 100644
--- a/src/native/external/zlib/inffast.h
+++ b/src/native/external/zlib/inffast.h
@@ -8,4 +8,4 @@
    subject to change. Applications should only use zlib.h.
  */
 
-void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start));
+void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start);
diff --git a/src/native/external/zlib/inflate.c b/src/native/external/zlib/inflate.c
index 8acbef44e993..94ecff015a9b 100644
--- a/src/native/external/zlib/inflate.c
+++ b/src/native/external/zlib/inflate.c
@@ -91,20 +91,7 @@
 #  endif
 #endif
 
-/* function prototypes */
-local int inflateStateCheck OF((z_streamp strm));
-local void fixedtables OF((struct inflate_state FAR *state));
-local int updatewindow OF((z_streamp strm, const unsigned char FAR *end,
-                           unsigned copy));
-#ifdef BUILDFIXED
-   void makefixed OF((void));
-#endif
-local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf,
-                              unsigned len));
-
-local int inflateStateCheck(strm)
-z_streamp strm;
-{
+local int inflateStateCheck(z_streamp strm) {
     struct inflate_state FAR *state;
     if (strm == Z_NULL ||
         strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
@@ -116,9 +103,7 @@ z_streamp strm;
     return 0;
 }
 
-int ZEXPORT inflateResetKeep(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateResetKeep(z_streamp strm) {
     struct inflate_state FAR *state;
 
     if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
@@ -142,9 +127,7 @@ z_streamp strm;
     return Z_OK;
 }
 
-int ZEXPORT inflateReset(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateReset(z_streamp strm) {
     struct inflate_state FAR *state;
 
     if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
@@ -155,10 +138,7 @@ z_streamp strm;
     return inflateResetKeep(strm);
 }
 
-int ZEXPORT inflateReset2(strm, windowBits)
-z_streamp strm;
-int windowBits;
-{
+int ZEXPORT inflateReset2(z_streamp strm, int windowBits) {
     int wrap;
     struct inflate_state FAR *state;
 
@@ -195,12 +175,8 @@ int windowBits;
     return inflateReset(strm);
 }
 
-int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size)
-z_streamp strm;
-int windowBits;
-const char *version;
-int stream_size;
-{
+int ZEXPORT inflateInit2_(z_streamp strm, int windowBits,
+                          const char *version, int stream_size) {
     int ret;
     struct inflate_state FAR *state;
 
@@ -239,22 +215,17 @@ int stream_size;
     return ret;
 }
 
-int ZEXPORT inflateInit_(strm, version, stream_size)
-z_streamp strm;
-const char *version;
-int stream_size;
-{
+int ZEXPORT inflateInit_(z_streamp strm, const char *version,
+                         int stream_size) {
     return inflateInit2_(strm, DEF_WBITS, version, stream_size);
 }
 
-int ZEXPORT inflatePrime(strm, bits, value)
-z_streamp strm;
-int bits;
-int value;
-{
+int ZEXPORT inflatePrime(z_streamp strm, int bits, int value) {
     struct inflate_state FAR *state;
 
     if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    if (bits == 0)
+        return Z_OK;
     state = (struct inflate_state FAR *)strm->state;
     if (bits < 0) {
         state->hold = 0;
@@ -278,9 +249,7 @@ int value;
    used for threaded applications, since the rewriting of the tables and virgin
    may not be thread-safe.
  */
-local void fixedtables(state)
-struct inflate_state FAR *state;
-{
+local void fixedtables(struct inflate_state FAR *state) {
 #ifdef BUILDFIXED
     static int virgin = 1;
     static code *lenfix, *distfix;
@@ -342,7 +311,7 @@ struct inflate_state FAR *state;
 
     a.out > inffixed.h
  */
-void makefixed()
+void makefixed(void)
 {
     unsigned low, size;
     struct inflate_state state;
@@ -396,11 +365,7 @@ void makefixed()
    output will fall in the output data, making match copies simpler and faster.
    The advantage may be dependent on the size of the processor's data caches.
  */
-local int updatewindow(strm, end, copy)
-z_streamp strm;
-const Bytef *end;
-unsigned copy;
-{
+local int updatewindow(z_streamp strm, const Bytef *end, unsigned copy) {
     struct inflate_state FAR *state;
     unsigned dist;
 
@@ -622,10 +587,7 @@ unsigned copy;
    will return Z_BUF_ERROR if it has not reached the end of the stream.
  */
 
-int ZEXPORT inflate(strm, flush)
-z_streamp strm;
-int flush;
-{
+int ZEXPORT inflate(z_streamp strm, int flush) {
     struct inflate_state FAR *state;
     z_const unsigned char FAR *next;    /* next input */
     unsigned char FAR *put;     /* next output */
@@ -1301,9 +1263,7 @@ int flush;
     return ret;
 }
 
-int ZEXPORT inflateEnd(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateEnd(z_streamp strm) {
     struct inflate_state FAR *state;
     if (inflateStateCheck(strm))
         return Z_STREAM_ERROR;
@@ -1315,11 +1275,8 @@ z_streamp strm;
     return Z_OK;
 }
 
-int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength)
-z_streamp strm;
-Bytef *dictionary;
-uInt *dictLength;
-{
+int ZEXPORT inflateGetDictionary(z_streamp strm, Bytef *dictionary,
+                                 uInt *dictLength) {
     struct inflate_state FAR *state;
 
     /* check state */
@@ -1338,11 +1295,8 @@ uInt *dictLength;
     return Z_OK;
 }
 
-int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
-z_streamp strm;
-const Bytef *dictionary;
-uInt dictLength;
-{
+int ZEXPORT inflateSetDictionary(z_streamp strm, const Bytef *dictionary,
+                                 uInt dictLength) {
     struct inflate_state FAR *state;
     unsigned long dictid;
     int ret;
@@ -1373,10 +1327,7 @@ uInt dictLength;
     return Z_OK;
 }
 
-int ZEXPORT inflateGetHeader(strm, head)
-z_streamp strm;
-gz_headerp head;
-{
+int ZEXPORT inflateGetHeader(z_streamp strm, gz_headerp head) {
     struct inflate_state FAR *state;
 
     /* check state */
@@ -1401,11 +1352,8 @@ gz_headerp head;
    called again with more data and the *have state.  *have is initialized to
    zero for the first call.
  */
-local unsigned syncsearch(have, buf, len)
-unsigned FAR *have;
-const unsigned char FAR *buf;
-unsigned len;
-{
+local unsigned syncsearch(unsigned FAR *have, const unsigned char FAR *buf,
+                          unsigned len) {
     unsigned got;
     unsigned next;
 
@@ -1424,9 +1372,7 @@ unsigned len;
     return next;
 }
 
-int ZEXPORT inflateSync(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateSync(z_streamp strm) {
     unsigned len;               /* number of bytes to look at or looked at */
     int flags;                  /* temporary to save header status */
     unsigned long in, out;      /* temporary to save total_in and total_out */
@@ -1441,7 +1387,7 @@ z_streamp strm;
     /* if first time, start search in bit buffer */
     if (state->mode != SYNC) {
         state->mode = SYNC;
-        state->hold <<= state->bits & 7;
+        state->hold >>= state->bits & 7;
         state->bits -= state->bits & 7;
         len = 0;
         while (state->bits >= 8) {
@@ -1482,9 +1428,7 @@ z_streamp strm;
    block. When decompressing, PPP checks that at the end of input packet,
    inflate is waiting for these length bytes.
  */
-int ZEXPORT inflateSyncPoint(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateSyncPoint(z_streamp strm) {
     struct inflate_state FAR *state;
 
     if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
@@ -1492,10 +1436,7 @@ z_streamp strm;
     return state->mode == STORED && state->bits == 0;
 }
 
-int ZEXPORT inflateCopy(dest, source)
-z_streamp dest;
-z_streamp source;
-{
+int ZEXPORT inflateCopy(z_streamp dest, z_streamp source) {
     struct inflate_state FAR *state;
     struct inflate_state FAR *copy;
     unsigned char FAR *window;
@@ -1539,10 +1480,7 @@ z_streamp source;
     return Z_OK;
 }
 
-int ZEXPORT inflateUndermine(strm, subvert)
-z_streamp strm;
-int subvert;
-{
+int ZEXPORT inflateUndermine(z_streamp strm, int subvert) {
     struct inflate_state FAR *state;
 
     if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
@@ -1557,10 +1495,7 @@ int subvert;
 #endif
 }
 
-int ZEXPORT inflateValidate(strm, check)
-z_streamp strm;
-int check;
-{
+int ZEXPORT inflateValidate(z_streamp strm, int check) {
     struct inflate_state FAR *state;
 
     if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
@@ -1572,9 +1507,7 @@ int check;
     return Z_OK;
 }
 
-long ZEXPORT inflateMark(strm)
-z_streamp strm;
-{
+long ZEXPORT inflateMark(z_streamp strm) {
     struct inflate_state FAR *state;
 
     if (inflateStateCheck(strm))
@@ -1585,9 +1518,7 @@ z_streamp strm;
             (state->mode == MATCH ? state->was - state->length : 0));
 }
 
-unsigned long ZEXPORT inflateCodesUsed(strm)
-z_streamp strm;
-{
+unsigned long ZEXPORT inflateCodesUsed(z_streamp strm) {
     struct inflate_state FAR *state;
     if (inflateStateCheck(strm)) return (unsigned long)-1;
     state = (struct inflate_state FAR *)strm->state;
diff --git a/src/native/external/zlib/inftrees.c b/src/native/external/zlib/inftrees.c
index 57d2793bec93..98cfe164458c 100644
--- a/src/native/external/zlib/inftrees.c
+++ b/src/native/external/zlib/inftrees.c
@@ -1,5 +1,5 @@
 /* inftrees.c -- generate Huffman trees for efficient decoding
- * Copyright (C) 1995-2022 Mark Adler
+ * Copyright (C) 1995-2024 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -9,7 +9,7 @@
 #define MAXBITS 15
 
 const char inflate_copyright[] =
-   " inflate 1.2.13 Copyright 1995-2022 Mark Adler ";
+   " inflate 1.3.1 Copyright 1995-2024 Mark Adler ";
 /*
   If you use the zlib library in a product, an acknowledgment is welcome
   in the documentation of your product. If for some reason you cannot
@@ -29,14 +29,9 @@ const char inflate_copyright[] =
    table index bits.  It will differ if the request is greater than the
    longest code or if it is less than the shortest code.
  */
-int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work)
-codetype type;
-unsigned short FAR *lens;
-unsigned codes;
-code FAR * FAR *table;
-unsigned FAR *bits;
-unsigned short FAR *work;
-{
+int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
+                                unsigned codes, code FAR * FAR *table,
+                                unsigned FAR *bits, unsigned short FAR *work) {
     unsigned len;               /* a code's length in bits */
     unsigned sym;               /* index of code symbols */
     unsigned min, max;          /* minimum and maximum code lengths */
@@ -62,7 +57,7 @@ unsigned short FAR *work;
         35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
         16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
-        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 194, 65};
+        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77};
     static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
         257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
diff --git a/src/native/external/zlib/inftrees.h b/src/native/external/zlib/inftrees.h
index f53665311c16..396f74b5da79 100644
--- a/src/native/external/zlib/inftrees.h
+++ b/src/native/external/zlib/inftrees.h
@@ -41,8 +41,8 @@ typedef struct {
    examples/enough.c found in the zlib distribution.  The arguments to that
    program are the number of symbols, the initial root table size, and the
    maximum bit length of a code.  "enough 286 9 15" for literal/length codes
-   returns returns 852, and "enough 30 6 15" for distance codes returns 592.
-   The initial root table size (9 or 6) is found in the fifth argument of the
+   returns 852, and "enough 30 6 15" for distance codes returns 592. The
+   initial root table size (9 or 6) is found in the fifth argument of the
    inflate_table() calls in inflate.c and infback.c.  If the root table size is
    changed, then these maximum sizes would be need to be recalculated and
    updated. */
@@ -57,6 +57,6 @@ typedef enum {
     DISTS
 } codetype;
 
-int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens,
-                             unsigned codes, code FAR * FAR *table,
-                             unsigned FAR *bits, unsigned short FAR *work));
+int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
+                                unsigned codes, code FAR * FAR *table,
+                                unsigned FAR *bits, unsigned short FAR *work);
diff --git a/src/native/external/zlib/treebuild.xml b/src/native/external/zlib/treebuild.xml
index 0017a45d3c5c..930b00be4a85 100644
--- a/src/native/external/zlib/treebuild.xml
+++ b/src/native/external/zlib/treebuild.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" ?>
-<package name="zlib" version="1.2.13">
-    <library name="zlib" dlversion="1.2.13" dlname="z">
+<package name="zlib" version="1.3.1">
+    <library name="zlib" dlversion="1.3.1" dlname="z">
 	<property name="description"> zip compression library </property>
 	<property name="include-target-dir" value="$(@PACKAGE/install-includedir)" />
 
diff --git a/src/native/external/zlib/trees.c b/src/native/external/zlib/trees.c
index 8a3eec559e55..979ae4100a02 100644
--- a/src/native/external/zlib/trees.c
+++ b/src/native/external/zlib/trees.c
@@ -1,5 +1,5 @@
 /* trees.c -- output deflated data using Huffman coding
- * Copyright (C) 1995-2021 Jean-loup Gailly
+ * Copyright (C) 1995-2024 Jean-loup Gailly
  * detect_data_type() function provided freely by Cosmin Truta, 2006
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
@@ -122,39 +122,116 @@ struct static_tree_desc_s {
     int     max_length;          /* max bit length for the codes */
 };
 
-local const static_tree_desc  static_l_desc =
+#ifdef NO_INIT_GLOBAL_POINTERS
+#  define TCONST
+#else
+#  define TCONST const
+#endif
+
+local TCONST static_tree_desc static_l_desc =
 {static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
 
-local const static_tree_desc  static_d_desc =
+local TCONST static_tree_desc static_d_desc =
 {static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS};
 
-local const static_tree_desc  static_bl_desc =
+local TCONST static_tree_desc static_bl_desc =
 {(const ct_data *)0, extra_blbits, 0,   BL_CODES, MAX_BL_BITS};
 
 /* ===========================================================================
- * Local (static) routines in this file.
+ * Output a short LSB first on the stream.
+ * IN assertion: there is enough room in pendingBuf.
+ */
+#define put_short(s, w) { \
+    put_byte(s, (uch)((w) & 0xff)); \
+    put_byte(s, (uch)((ush)(w) >> 8)); \
+}
+
+/* ===========================================================================
+ * Reverse the first len bits of a code, using straightforward code (a faster
+ * method would use a table)
+ * IN assertion: 1 <= len <= 15
  */
+local unsigned bi_reverse(unsigned code, int len) {
+    register unsigned res = 0;
+    do {
+        res |= code & 1;
+        code >>= 1, res <<= 1;
+    } while (--len > 0);
+    return res >> 1;
+}
 
-local void tr_static_init OF((void));
-local void init_block     OF((deflate_state *s));
-local void pqdownheap     OF((deflate_state *s, ct_data *tree, int k));
-local void gen_bitlen     OF((deflate_state *s, tree_desc *desc));
-local void gen_codes      OF((ct_data *tree, int max_code, ushf *bl_count));
-local void build_tree     OF((deflate_state *s, tree_desc *desc));
-local void scan_tree      OF((deflate_state *s, ct_data *tree, int max_code));
-local void send_tree      OF((deflate_state *s, ct_data *tree, int max_code));
-local int  build_bl_tree  OF((deflate_state *s));
-local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
-                              int blcodes));
-local void compress_block OF((deflate_state *s, const ct_data *ltree,
-                              const ct_data *dtree));
-local int  detect_data_type OF((deflate_state *s));
-local unsigned bi_reverse OF((unsigned code, int len));
-local void bi_windup      OF((deflate_state *s));
-local void bi_flush       OF((deflate_state *s));
+/* ===========================================================================
+ * Flush the bit buffer, keeping at most 7 bits in it.
+ */
+local void bi_flush(deflate_state *s) {
+    if (s->bi_valid == 16) {
+        put_short(s, s->bi_buf);
+        s->bi_buf = 0;
+        s->bi_valid = 0;
+    } else if (s->bi_valid >= 8) {
+        put_byte(s, (Byte)s->bi_buf);
+        s->bi_buf >>= 8;
+        s->bi_valid -= 8;
+    }
+}
+
+/* ===========================================================================
+ * Flush the bit buffer and align the output on a byte boundary
+ */
+local void bi_windup(deflate_state *s) {
+    if (s->bi_valid > 8) {
+        put_short(s, s->bi_buf);
+    } else if (s->bi_valid > 0) {
+        put_byte(s, (Byte)s->bi_buf);
+    }
+    s->bi_buf = 0;
+    s->bi_valid = 0;
+#ifdef ZLIB_DEBUG
+    s->bits_sent = (s->bits_sent + 7) & ~7;
+#endif
+}
+
+/* ===========================================================================
+ * Generate the codes for a given tree and bit counts (which need not be
+ * optimal).
+ * IN assertion: the array bl_count contains the bit length statistics for
+ * the given tree and the field len is set for all tree elements.
+ * OUT assertion: the field code is set for all tree elements of non
+ *     zero code length.
+ */
+local void gen_codes(ct_data *tree, int max_code, ushf *bl_count) {
+    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
+    unsigned code = 0;         /* running code value */
+    int bits;                  /* bit index */
+    int n;                     /* code index */
+
+    /* The distribution counts are first used to generate the code values
+     * without bit reversal.
+     */
+    for (bits = 1; bits <= MAX_BITS; bits++) {
+        code = (code + bl_count[bits - 1]) << 1;
+        next_code[bits] = (ush)code;
+    }
+    /* Check that the bit counts in bl_count are consistent. The last code
+     * must be all ones.
+     */
+    Assert (code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1,
+            "inconsistent bit counts");
+    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
+
+    for (n = 0;  n <= max_code; n++) {
+        int len = tree[n].Len;
+        if (len == 0) continue;
+        /* Now reverse the bits */
+        tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
+
+        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
+            n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len] - 1));
+    }
+}
 
 #ifdef GEN_TREES_H
-local void gen_trees_header OF((void));
+local void gen_trees_header(void);
 #endif
 
 #ifndef ZLIB_DEBUG
@@ -167,27 +244,12 @@ local void gen_trees_header OF((void));
        send_bits(s, tree[c].Code, tree[c].Len); }
 #endif
 
-/* ===========================================================================
- * Output a short LSB first on the stream.
- * IN assertion: there is enough room in pendingBuf.
- */
-#define put_short(s, w) { \
-    put_byte(s, (uch)((w) & 0xff)); \
-    put_byte(s, (uch)((ush)(w) >> 8)); \
-}
-
 /* ===========================================================================
  * Send a value on a given number of bits.
  * IN assertion: length <= 16 and value fits in length bits.
  */
 #ifdef ZLIB_DEBUG
-local void send_bits      OF((deflate_state *s, int value, int length));
-
-local void send_bits(s, value, length)
-    deflate_state *s;
-    int value;  /* value to send */
-    int length; /* number of bits */
-{
+local void send_bits(deflate_state *s, int value, int length) {
     Tracevv((stderr," l %2d v %4x ", length, value));
     Assert(length > 0 && length <= 15, "invalid length");
     s->bits_sent += (ulg)length;
@@ -229,8 +291,7 @@ local void send_bits(s, value, length)
 /* ===========================================================================
  * Initialize the various 'constant' tables.
  */
-local void tr_static_init()
-{
+local void tr_static_init(void) {
 #if defined(GEN_TREES_H) || !defined(STDC)
     static int static_init_done = 0;
     int n;        /* iterates over tree elements */
@@ -323,8 +384,7 @@ local void tr_static_init()
       ((i) == (last)? "\n};\n\n" :    \
        ((i) % (width) == (width) - 1 ? ",\n" : ", "))
 
-void gen_trees_header()
-{
+void gen_trees_header(void) {
     FILE *header = fopen("trees.h", "w");
     int i;
 
@@ -373,12 +433,26 @@ void gen_trees_header()
 }
 #endif /* GEN_TREES_H */
 
+/* ===========================================================================
+ * Initialize a new block.
+ */
+local void init_block(deflate_state *s) {
+    int n; /* iterates over tree elements */
+
+    /* Initialize the trees. */
+    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
+    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
+    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
+
+    s->dyn_ltree[END_BLOCK].Freq = 1;
+    s->opt_len = s->static_len = 0L;
+    s->sym_next = s->matches = 0;
+}
+
 /* ===========================================================================
  * Initialize the tree data structures for a new zlib stream.
  */
-void ZLIB_INTERNAL _tr_init(s)
-    deflate_state *s;
-{
+void ZLIB_INTERNAL _tr_init(deflate_state *s) {
     tr_static_init();
 
     s->l_desc.dyn_tree = s->dyn_ltree;
@@ -401,24 +475,6 @@ void ZLIB_INTERNAL _tr_init(s)
     init_block(s);
 }
 
-/* ===========================================================================
- * Initialize a new block.
- */
-local void init_block(s)
-    deflate_state *s;
-{
-    int n; /* iterates over tree elements */
-
-    /* Initialize the trees. */
-    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
-    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
-    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
-
-    s->dyn_ltree[END_BLOCK].Freq = 1;
-    s->opt_len = s->static_len = 0L;
-    s->sym_next = s->matches = 0;
-}
-
 #define SMALLEST 1
 /* Index within the heap array of least frequent node in the Huffman tree */
 
@@ -448,11 +504,7 @@ local void init_block(s)
  * when the heap property is re-established (each father smaller than its
  * two sons).
  */
-local void pqdownheap(s, tree, k)
-    deflate_state *s;
-    ct_data *tree;  /* the tree to restore */
-    int k;               /* node to move down */
-{
+local void pqdownheap(deflate_state *s, ct_data *tree, int k) {
     int v = s->heap[k];
     int j = k << 1;  /* left son of k */
     while (j <= s->heap_len) {
@@ -483,10 +535,7 @@ local void pqdownheap(s, tree, k)
  *     The length opt_len is updated; static_len is also updated if stree is
  *     not null.
  */
-local void gen_bitlen(s, desc)
-    deflate_state *s;
-    tree_desc *desc;    /* the tree descriptor */
-{
+local void gen_bitlen(deflate_state *s, tree_desc *desc) {
     ct_data *tree        = desc->dyn_tree;
     int max_code         = desc->max_code;
     const ct_data *stree = desc->stat_desc->static_tree;
@@ -561,48 +610,9 @@ local void gen_bitlen(s, desc)
     }
 }
 
-/* ===========================================================================
- * Generate the codes for a given tree and bit counts (which need not be
- * optimal).
- * IN assertion: the array bl_count contains the bit length statistics for
- * the given tree and the field len is set for all tree elements.
- * OUT assertion: the field code is set for all tree elements of non
- *     zero code length.
- */
-local void gen_codes(tree, max_code, bl_count)
-    ct_data *tree;             /* the tree to decorate */
-    int max_code;              /* largest code with non zero frequency */
-    ushf *bl_count;            /* number of codes at each bit length */
-{
-    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
-    unsigned code = 0;         /* running code value */
-    int bits;                  /* bit index */
-    int n;                     /* code index */
-
-    /* The distribution counts are first used to generate the code values
-     * without bit reversal.
-     */
-    for (bits = 1; bits <= MAX_BITS; bits++) {
-        code = (code + bl_count[bits - 1]) << 1;
-        next_code[bits] = (ush)code;
-    }
-    /* Check that the bit counts in bl_count are consistent. The last code
-     * must be all ones.
-     */
-    Assert (code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1,
-            "inconsistent bit counts");
-    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
-
-    for (n = 0;  n <= max_code; n++) {
-        int len = tree[n].Len;
-        if (len == 0) continue;
-        /* Now reverse the bits */
-        tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
-
-        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
-            n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len] - 1));
-    }
-}
+#ifdef DUMP_BL_TREE
+#  include <stdio.h>
+#endif
 
 /* ===========================================================================
  * Construct one Huffman tree and assigns the code bit strings and lengths.
@@ -612,10 +622,7 @@ local void gen_codes(tree, max_code, bl_count)
  *     and corresponding code. The length opt_len is updated; static_len is
  *     also updated if stree is not null. The field max_code is set.
  */
-local void build_tree(s, desc)
-    deflate_state *s;
-    tree_desc *desc; /* the tree descriptor */
-{
+local void build_tree(deflate_state *s, tree_desc *desc) {
     ct_data *tree         = desc->dyn_tree;
     const ct_data *stree  = desc->stat_desc->static_tree;
     int elems             = desc->stat_desc->elems;
@@ -700,11 +707,7 @@ local void build_tree(s, desc)
  * Scan a literal or distance tree to determine the frequencies of the codes
  * in the bit length tree.
  */
-local void scan_tree(s, tree, max_code)
-    deflate_state *s;
-    ct_data *tree;   /* the tree to be scanned */
-    int max_code;    /* and its largest code of non zero frequency */
-{
+local void scan_tree(deflate_state *s, ct_data *tree, int max_code) {
     int n;                     /* iterates over all tree elements */
     int prevlen = -1;          /* last emitted length */
     int curlen;                /* length of current code */
@@ -745,11 +748,7 @@ local void scan_tree(s, tree, max_code)
  * Send a literal or distance tree in compressed form, using the codes in
  * bl_tree.
  */
-local void send_tree(s, tree, max_code)
-    deflate_state *s;
-    ct_data *tree; /* the tree to be scanned */
-    int max_code;       /* and its largest code of non zero frequency */
-{
+local void send_tree(deflate_state *s, ct_data *tree, int max_code) {
     int n;                     /* iterates over all tree elements */
     int prevlen = -1;          /* last emitted length */
     int curlen;                /* length of current code */
@@ -796,9 +795,7 @@ local void send_tree(s, tree, max_code)
  * Construct the Huffman tree for the bit lengths and return the index in
  * bl_order of the last bit length code to send.
  */
-local int build_bl_tree(s)
-    deflate_state *s;
-{
+local int build_bl_tree(deflate_state *s) {
     int max_blindex;  /* index of last bit length code of non zero freq */
 
     /* Determine the bit length frequencies for literal and distance trees */
@@ -831,10 +828,8 @@ local int build_bl_tree(s)
  * lengths of the bit length codes, the literal tree and the distance tree.
  * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
  */
-local void send_all_trees(s, lcodes, dcodes, blcodes)
-    deflate_state *s;
-    int lcodes, dcodes, blcodes; /* number of codes for each tree */
-{
+local void send_all_trees(deflate_state *s, int lcodes, int dcodes,
+                          int blcodes) {
     int rank;                    /* index in bl_order */
 
     Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
@@ -860,12 +855,8 @@ local void send_all_trees(s, lcodes, dcodes, blcodes)
 /* ===========================================================================
  * Send a stored block
  */
-void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
-    deflate_state *s;
-    charf *buf;       /* input block */
-    ulg stored_len;   /* length of input block */
-    int last;         /* one if this is the last block for a file */
-{
+void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
+                                    ulg stored_len, int last) {
     send_bits(s, (STORED_BLOCK<<1) + last, 3);  /* send block type */
     bi_windup(s);        /* align on byte boundary */
     put_short(s, (ush)stored_len);
@@ -884,9 +875,7 @@ void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
 /* ===========================================================================
  * Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
  */
-void ZLIB_INTERNAL _tr_flush_bits(s)
-    deflate_state *s;
-{
+void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s) {
     bi_flush(s);
 }
 
@@ -894,9 +883,7 @@ void ZLIB_INTERNAL _tr_flush_bits(s)
  * Send one empty static block to give enough lookahead for inflate.
  * This takes 10 bits, of which 7 may remain in the bit buffer.
  */
-void ZLIB_INTERNAL _tr_align(s)
-    deflate_state *s;
-{
+void ZLIB_INTERNAL _tr_align(deflate_state *s) {
     send_bits(s, STATIC_TREES<<1, 3);
     send_code(s, END_BLOCK, static_ltree);
 #ifdef ZLIB_DEBUG
@@ -905,16 +892,108 @@ void ZLIB_INTERNAL _tr_align(s)
     bi_flush(s);
 }
 
+/* ===========================================================================
+ * Send the block data compressed using the given Huffman trees
+ */
+local void compress_block(deflate_state *s, const ct_data *ltree,
+                          const ct_data *dtree) {
+    unsigned dist;      /* distance of matched string */
+    int lc;             /* match length or unmatched char (if dist == 0) */
+    unsigned sx = 0;    /* running index in symbol buffers */
+    unsigned code;      /* the code to send */
+    int extra;          /* number of extra bits to send */
+
+    if (s->sym_next != 0) do {
+#ifdef LIT_MEM
+        dist = s->d_buf[sx];
+        lc = s->l_buf[sx++];
+#else
+        dist = s->sym_buf[sx++] & 0xff;
+        dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
+        lc = s->sym_buf[sx++];
+#endif
+        if (dist == 0) {
+            send_code(s, lc, ltree); /* send a literal byte */
+            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
+        } else {
+            /* Here, lc is the match length - MIN_MATCH */
+            code = _length_code[lc];
+            send_code(s, code + LITERALS + 1, ltree);   /* send length code */
+            extra = extra_lbits[code];
+            if (extra != 0) {
+                lc -= base_length[code];
+                send_bits(s, lc, extra);       /* send the extra length bits */
+            }
+            dist--; /* dist is now the match distance - 1 */
+            code = d_code(dist);
+            Assert (code < D_CODES, "bad d_code");
+
+            send_code(s, code, dtree);       /* send the distance code */
+            extra = extra_dbits[code];
+            if (extra != 0) {
+                dist -= (unsigned)base_dist[code];
+                send_bits(s, dist, extra);   /* send the extra distance bits */
+            }
+        } /* literal or match pair ? */
+
+        /* Check for no overlay of pending_buf on needed symbols */
+#ifdef LIT_MEM
+        Assert(s->pending < 2 * (s->lit_bufsize + sx), "pendingBuf overflow");
+#else
+        Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
+#endif
+
+    } while (sx < s->sym_next);
+
+    send_code(s, END_BLOCK, ltree);
+}
+
+/* ===========================================================================
+ * Check if the data type is TEXT or BINARY, using the following algorithm:
+ * - TEXT if the two conditions below are satisfied:
+ *    a) There are no non-portable control characters belonging to the
+ *       "block list" (0..6, 14..25, 28..31).
+ *    b) There is at least one printable character belonging to the
+ *       "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
+ * - BINARY otherwise.
+ * - The following partially-portable control characters form a
+ *   "gray list" that is ignored in this detection algorithm:
+ *   (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
+ * IN assertion: the fields Freq of dyn_ltree are set.
+ */
+local int detect_data_type(deflate_state *s) {
+    /* block_mask is the bit mask of block-listed bytes
+     * set bits 0..6, 14..25, and 28..31
+     * 0xf3ffc07f = binary 11110011111111111100000001111111
+     */
+    unsigned long block_mask = 0xf3ffc07fUL;
+    int n;
+
+    /* Check for non-textual ("block-listed") bytes. */
+    for (n = 0; n <= 31; n++, block_mask >>= 1)
+        if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0))
+            return Z_BINARY;
+
+    /* Check for textual ("allow-listed") bytes. */
+    if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
+            || s->dyn_ltree[13].Freq != 0)
+        return Z_TEXT;
+    for (n = 32; n < LITERALS; n++)
+        if (s->dyn_ltree[n].Freq != 0)
+            return Z_TEXT;
+
+    /* There are no "block-listed" or "allow-listed" bytes:
+     * this stream either is empty or has tolerated ("gray-listed") bytes only.
+     */
+    return Z_BINARY;
+}
+
 /* ===========================================================================
  * Determine the best encoding for the current block: dynamic trees, static
  * trees or store, and write out the encoded block.
  */
-void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
-    deflate_state *s;
-    charf *buf;       /* input block, or NULL if too old */
-    ulg stored_len;   /* length of input block */
-    int last;         /* one if this is the last block for a file */
-{
+void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
+                                   ulg stored_len, int last) {
     ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
     int max_blindex = 0;  /* index of last bit length code of non zero freq */
 
@@ -1011,14 +1090,15 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
  * Save the match info and tally the frequency counts. Return true if
  * the current block must be flushed.
  */
-int ZLIB_INTERNAL _tr_tally(s, dist, lc)
-    deflate_state *s;
-    unsigned dist;  /* distance of matched string */
-    unsigned lc;    /* match length - MIN_MATCH or unmatched char (dist==0) */
-{
+int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) {
+#ifdef LIT_MEM
+    s->d_buf[s->sym_next] = (ush)dist;
+    s->l_buf[s->sym_next++] = (uch)lc;
+#else
     s->sym_buf[s->sym_next++] = (uch)dist;
     s->sym_buf[s->sym_next++] = (uch)(dist >> 8);
     s->sym_buf[s->sym_next++] = (uch)lc;
+#endif
     if (dist == 0) {
         /* lc is the unmatched char */
         s->dyn_ltree[lc].Freq++;
@@ -1035,147 +1115,3 @@ int ZLIB_INTERNAL _tr_tally(s, dist, lc)
     }
     return (s->sym_next == s->sym_end);
 }
-
-/* ===========================================================================
- * Send the block data compressed using the given Huffman trees
- */
-local void compress_block(s, ltree, dtree)
-    deflate_state *s;
-    const ct_data *ltree; /* literal tree */
-    const ct_data *dtree; /* distance tree */
-{
-    unsigned dist;      /* distance of matched string */
-    int lc;             /* match length or unmatched char (if dist == 0) */
-    unsigned sx = 0;    /* running index in sym_buf */
-    unsigned code;      /* the code to send */
-    int extra;          /* number of extra bits to send */
-
-    if (s->sym_next != 0) do {
-        dist = s->sym_buf[sx++] & 0xff;
-        dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
-        lc = s->sym_buf[sx++];
-        if (dist == 0) {
-            send_code(s, lc, ltree); /* send a literal byte */
-            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
-        } else {
-            /* Here, lc is the match length - MIN_MATCH */
-            code = _length_code[lc];
-            send_code(s, code + LITERALS + 1, ltree);   /* send length code */
-            extra = extra_lbits[code];
-            if (extra != 0) {
-                lc -= base_length[code];
-                send_bits(s, lc, extra);       /* send the extra length bits */
-            }
-            dist--; /* dist is now the match distance - 1 */
-            code = d_code(dist);
-            Assert (code < D_CODES, "bad d_code");
-
-            send_code(s, code, dtree);       /* send the distance code */
-            extra = extra_dbits[code];
-            if (extra != 0) {
-                dist -= (unsigned)base_dist[code];
-                send_bits(s, dist, extra);   /* send the extra distance bits */
-            }
-        } /* literal or match pair ? */
-
-        /* Check that the overlay between pending_buf and sym_buf is ok: */
-        Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
-
-    } while (sx < s->sym_next);
-
-    send_code(s, END_BLOCK, ltree);
-}
-
-/* ===========================================================================
- * Check if the data type is TEXT or BINARY, using the following algorithm:
- * - TEXT if the two conditions below are satisfied:
- *    a) There are no non-portable control characters belonging to the
- *       "block list" (0..6, 14..25, 28..31).
- *    b) There is at least one printable character belonging to the
- *       "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
- * - BINARY otherwise.
- * - The following partially-portable control characters form a
- *   "gray list" that is ignored in this detection algorithm:
- *   (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
- * IN assertion: the fields Freq of dyn_ltree are set.
- */
-local int detect_data_type(s)
-    deflate_state *s;
-{
-    /* block_mask is the bit mask of block-listed bytes
-     * set bits 0..6, 14..25, and 28..31
-     * 0xf3ffc07f = binary 11110011111111111100000001111111
-     */
-    unsigned long block_mask = 0xf3ffc07fUL;
-    int n;
-
-    /* Check for non-textual ("block-listed") bytes. */
-    for (n = 0; n <= 31; n++, block_mask >>= 1)
-        if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0))
-            return Z_BINARY;
-
-    /* Check for textual ("allow-listed") bytes. */
-    if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
-            || s->dyn_ltree[13].Freq != 0)
-        return Z_TEXT;
-    for (n = 32; n < LITERALS; n++)
-        if (s->dyn_ltree[n].Freq != 0)
-            return Z_TEXT;
-
-    /* There are no "block-listed" or "allow-listed" bytes:
-     * this stream either is empty or has tolerated ("gray-listed") bytes only.
-     */
-    return Z_BINARY;
-}
-
-/* ===========================================================================
- * Reverse the first len bits of a code, using straightforward code (a faster
- * method would use a table)
- * IN assertion: 1 <= len <= 15
- */
-local unsigned bi_reverse(code, len)
-    unsigned code; /* the value to invert */
-    int len;       /* its bit length */
-{
-    register unsigned res = 0;
-    do {
-        res |= code & 1;
-        code >>= 1, res <<= 1;
-    } while (--len > 0);
-    return res >> 1;
-}
-
-/* ===========================================================================
- * Flush the bit buffer, keeping at most 7 bits in it.
- */
-local void bi_flush(s)
-    deflate_state *s;
-{
-    if (s->bi_valid == 16) {
-        put_short(s, s->bi_buf);
-        s->bi_buf = 0;
-        s->bi_valid = 0;
-    } else if (s->bi_valid >= 8) {
-        put_byte(s, (Byte)s->bi_buf);
-        s->bi_buf >>= 8;
-        s->bi_valid -= 8;
-    }
-}
-
-/* ===========================================================================
- * Flush the bit buffer and align the output on a byte boundary
- */
-local void bi_windup(s)
-    deflate_state *s;
-{
-    if (s->bi_valid > 8) {
-        put_short(s, s->bi_buf);
-    } else if (s->bi_valid > 0) {
-        put_byte(s, (Byte)s->bi_buf);
-    }
-    s->bi_buf = 0;
-    s->bi_valid = 0;
-#ifdef ZLIB_DEBUG
-    s->bits_sent = (s->bits_sent + 7) & ~7;
-#endif
-}
diff --git a/src/native/external/zlib/uncompr.c b/src/native/external/zlib/uncompr.c
index f9532f46c1a6..5e256663b451 100644
--- a/src/native/external/zlib/uncompr.c
+++ b/src/native/external/zlib/uncompr.c
@@ -24,12 +24,8 @@
    Z_DATA_ERROR if the input data was corrupted, including if the input data is
    an incomplete zlib stream.
 */
-int ZEXPORT uncompress2(dest, destLen, source, sourceLen)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong *sourceLen;
-{
+int ZEXPORT uncompress2(Bytef *dest, uLongf *destLen, const Bytef *source,
+                        uLong *sourceLen) {
     z_stream stream;
     int err;
     const uInt max = (uInt)-1;
@@ -83,11 +79,7 @@ int ZEXPORT uncompress2(dest, destLen, source, sourceLen)
            err;
 }
 
-int ZEXPORT uncompress(dest, destLen, source, sourceLen)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong sourceLen;
-{
+int ZEXPORT uncompress(Bytef *dest, uLongf *destLen, const Bytef *source,
+                       uLong sourceLen) {
     return uncompress2(dest, destLen, source, &sourceLen);
 }
diff --git a/src/native/external/zlib/zconf.h b/src/native/external/zlib/zconf.h
index bf977d3e70ad..62adc8d8431f 100644
--- a/src/native/external/zlib/zconf.h
+++ b/src/native/external/zlib/zconf.h
@@ -1,5 +1,5 @@
 /* zconf.h -- configuration of the zlib compression library
- * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
+ * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -241,7 +241,11 @@
 #endif
 
 #ifdef Z_SOLO
-   typedef unsigned long z_size_t;
+#  ifdef _WIN64
+     typedef unsigned long long z_size_t;
+#  else
+     typedef unsigned long z_size_t;
+#  endif
 #else
 #  define z_longlong long long
 #  if defined(NO_SIZE_T)
@@ -296,14 +300,6 @@
 #  endif
 #endif
 
-#ifndef Z_ARG /* function prototypes for stdarg */
-#  if defined(STDC) || defined(Z_HAVE_STDARG_H)
-#    define Z_ARG(args)  args
-#  else
-#    define Z_ARG(args)  ()
-#  endif
-#endif
-
 /* The following definitions for FAR are needed only for MSDOS mixed
  * model programming (small or medium model with some far allocations).
  * This was tested only with MSC; for other MSDOS compilers you may have
@@ -520,7 +516,7 @@ typedef uLong FAR uLongf;
 #if !defined(_WIN32) && defined(Z_LARGE64)
 #  define z_off64_t off64_t
 #else
-#  if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO)
+#  if defined(_WIN32) && !defined(__GNUC__)
 #    define z_off64_t __int64
 #  else
 #    define z_off64_t z_off_t
diff --git a/src/native/external/zlib/zconf.h.cmakein b/src/native/external/zlib/zconf.h.cmakein
index 247ba2461dd0..0abe3bc9d8fa 100644
--- a/src/native/external/zlib/zconf.h.cmakein
+++ b/src/native/external/zlib/zconf.h.cmakein
@@ -1,5 +1,5 @@
 /* zconf.h -- configuration of the zlib compression library
- * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
+ * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -243,7 +243,11 @@
 #endif
 
 #ifdef Z_SOLO
-   typedef unsigned long z_size_t;
+#  ifdef _WIN64
+     typedef unsigned long long z_size_t;
+#  else
+     typedef unsigned long z_size_t;
+#  endif
 #else
 #  define z_longlong long long
 #  if defined(NO_SIZE_T)
@@ -298,14 +302,6 @@
 #  endif
 #endif
 
-#ifndef Z_ARG /* function prototypes for stdarg */
-#  if defined(STDC) || defined(Z_HAVE_STDARG_H)
-#    define Z_ARG(args)  args
-#  else
-#    define Z_ARG(args)  ()
-#  endif
-#endif
-
 /* The following definitions for FAR are needed only for MSDOS mixed
  * model programming (small or medium model with some far allocations).
  * This was tested only with MSC; for other MSDOS compilers you may have
@@ -522,7 +518,7 @@ typedef uLong FAR uLongf;
 #if !defined(_WIN32) && defined(Z_LARGE64)
 #  define z_off64_t off64_t
 #else
-#  if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO)
+#  if defined(_WIN32) && !defined(__GNUC__)
 #    define z_off64_t __int64
 #  else
 #    define z_off64_t z_off_t
diff --git a/src/native/external/zlib/zconf.h.in b/src/native/external/zlib/zconf.h.in
index bf977d3e70ad..62adc8d8431f 100644
--- a/src/native/external/zlib/zconf.h.in
+++ b/src/native/external/zlib/zconf.h.in
@@ -1,5 +1,5 @@
 /* zconf.h -- configuration of the zlib compression library
- * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
+ * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -241,7 +241,11 @@
 #endif
 
 #ifdef Z_SOLO
-   typedef unsigned long z_size_t;
+#  ifdef _WIN64
+     typedef unsigned long long z_size_t;
+#  else
+     typedef unsigned long z_size_t;
+#  endif
 #else
 #  define z_longlong long long
 #  if defined(NO_SIZE_T)
@@ -296,14 +300,6 @@
 #  endif
 #endif
 
-#ifndef Z_ARG /* function prototypes for stdarg */
-#  if defined(STDC) || defined(Z_HAVE_STDARG_H)
-#    define Z_ARG(args)  args
-#  else
-#    define Z_ARG(args)  ()
-#  endif
-#endif
-
 /* The following definitions for FAR are needed only for MSDOS mixed
  * model programming (small or medium model with some far allocations).
  * This was tested only with MSC; for other MSDOS compilers you may have
@@ -520,7 +516,7 @@ typedef uLong FAR uLongf;
 #if !defined(_WIN32) && defined(Z_LARGE64)
 #  define z_off64_t off64_t
 #else
-#  if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO)
+#  if defined(_WIN32) && !defined(__GNUC__)
 #    define z_off64_t __int64
 #  else
 #    define z_off64_t z_off_t
diff --git a/src/native/external/zlib/zlib.3 b/src/native/external/zlib/zlib.3
index 6f6e91404dff..c716020ea9c4 100644
--- a/src/native/external/zlib/zlib.3
+++ b/src/native/external/zlib/zlib.3
@@ -1,4 +1,4 @@
-.TH ZLIB 3 "13 Oct 2022"
+.TH ZLIB 3 "22 Jan 2024"
 .SH NAME
 zlib \- compression/decompression library
 .SH SYNOPSIS
@@ -105,9 +105,9 @@ before asking for help.
 Send questions and/or comments to zlib@gzip.org,
 or (for the Windows DLL version) to Gilles Vollant (info@winimage.com).
 .SH AUTHORS AND LICENSE
-Version 1.2.13
+Version 1.3.1
 .LP
-Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler
+Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
 .LP
 This software is provided 'as-is', without any express or implied
 warranty.  In no event will the authors be held liable for any damages
diff --git a/src/native/external/zlib/zlib.h b/src/native/external/zlib/zlib.h
index 953cb5012dc2..8d4b932eaf6a 100644
--- a/src/native/external/zlib/zlib.h
+++ b/src/native/external/zlib/zlib.h
@@ -1,7 +1,7 @@
 /* zlib.h -- interface of the 'zlib' general purpose compression library
-  version 1.2.13, October 13th, 2022
+  version 1.3.1, January 22nd, 2024
 
-  Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler
+  Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
 
   This software is provided 'as-is', without any express or implied
   warranty.  In no event will the authors be held liable for any damages
@@ -37,11 +37,11 @@
 extern "C" {
 #endif
 
-#define ZLIB_VERSION "1.2.13"
-#define ZLIB_VERNUM 0x12d0
+#define ZLIB_VERSION "1.3.1"
+#define ZLIB_VERNUM 0x1310
 #define ZLIB_VER_MAJOR 1
-#define ZLIB_VER_MINOR 2
-#define ZLIB_VER_REVISION 13
+#define ZLIB_VER_MINOR 3
+#define ZLIB_VER_REVISION 1
 #define ZLIB_VER_SUBREVISION 0
 
 /*
@@ -78,8 +78,8 @@ extern "C" {
   even in the case of corrupted input.
 */
 
-typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
-typedef void   (*free_func)  OF((voidpf opaque, voidpf address));
+typedef voidpf (*alloc_func)(voidpf opaque, uInt items, uInt size);
+typedef void   (*free_func)(voidpf opaque, voidpf address);
 
 struct internal_state;
 
@@ -217,7 +217,7 @@ typedef gz_header FAR *gz_headerp;
 
                         /* basic functions */
 
-ZEXTERN const char * ZEXPORT zlibVersion OF((void));
+ZEXTERN const char * ZEXPORT zlibVersion(void);
 /* The application can compare zlibVersion and ZLIB_VERSION for consistency.
    If the first character differs, the library code actually used is not
    compatible with the zlib.h header file used by the application.  This check
@@ -225,12 +225,12 @@ ZEXTERN const char * ZEXPORT zlibVersion OF((void));
  */
 
 /*
-ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
+ZEXTERN int ZEXPORT deflateInit(z_streamp strm, int level);
 
      Initializes the internal stream state for compression.  The fields
    zalloc, zfree and opaque must be initialized before by the caller.  If
    zalloc and zfree are set to Z_NULL, deflateInit updates them to use default
-   allocation functions.
+   allocation functions.  total_in, total_out, adler, and msg are initialized.
 
      The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
    1 gives best speed, 9 gives best compression, 0 gives no compression at all
@@ -247,7 +247,7 @@ ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
 */
 
 
-ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
+ZEXTERN int ZEXPORT deflate(z_streamp strm, int flush);
 /*
     deflate compresses as much data as possible, and stops when the input
   buffer becomes empty or the output buffer becomes full.  It may introduce
@@ -320,8 +320,8 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
   with the same value of the flush parameter and more output space (updated
   avail_out), until the flush is complete (deflate returns with non-zero
   avail_out).  In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
-  avail_out is greater than six to avoid repeated flush markers due to
-  avail_out == 0 on return.
+  avail_out is greater than six when the flush marker begins, in order to avoid
+  repeated flush markers upon calling deflate() again when avail_out == 0.
 
     If the parameter flush is set to Z_FINISH, pending input is processed,
   pending output is flushed and deflate returns with Z_STREAM_END if there was
@@ -360,7 +360,7 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
 */
 
 
-ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
+ZEXTERN int ZEXPORT deflateEnd(z_streamp strm);
 /*
      All dynamically allocated data structures for this stream are freed.
    This function discards any unprocessed input and does not flush any pending
@@ -375,7 +375,7 @@ ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
 
 
 /*
-ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
+ZEXTERN int ZEXPORT inflateInit(z_streamp strm);
 
      Initializes the internal stream state for decompression.  The fields
    next_in, avail_in, zalloc, zfree and opaque must be initialized before by
@@ -383,7 +383,8 @@ ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
    read or consumed.  The allocation of a sliding window will be deferred to
    the first call of inflate (if the decompression does not complete on the
    first call).  If zalloc and zfree are set to Z_NULL, inflateInit updates
-   them to use default allocation functions.
+   them to use default allocation functions.  total_in, total_out, adler, and
+   msg are initialized.
 
      inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
    memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
@@ -397,7 +398,7 @@ ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
 */
 
 
-ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
+ZEXTERN int ZEXPORT inflate(z_streamp strm, int flush);
 /*
     inflate decompresses as much data as possible, and stops when the input
   buffer becomes empty or the output buffer becomes full.  It may introduce
@@ -517,7 +518,7 @@ ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
 */
 
 
-ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
+ZEXTERN int ZEXPORT inflateEnd(z_streamp strm);
 /*
      All dynamically allocated data structures for this stream are freed.
    This function discards any unprocessed input and does not flush any pending
@@ -535,12 +536,12 @@ ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
 */
 
 /*
-ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
-                                     int  level,
-                                     int  method,
-                                     int  windowBits,
-                                     int  memLevel,
-                                     int  strategy));
+ZEXTERN int ZEXPORT deflateInit2(z_streamp strm,
+                                 int level,
+                                 int method,
+                                 int windowBits,
+                                 int memLevel,
+                                 int strategy);
 
      This is another version of deflateInit with more compression options.  The
    fields zalloc, zfree and opaque must be initialized before by the caller.
@@ -607,9 +608,9 @@ ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
    compression: this will be done by deflate().
 */
 
-ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
-                                             const Bytef *dictionary,
-                                             uInt  dictLength));
+ZEXTERN int ZEXPORT deflateSetDictionary(z_streamp strm,
+                                         const Bytef *dictionary,
+                                         uInt  dictLength);
 /*
      Initializes the compression dictionary from the given byte sequence
    without producing any compressed output.  When using the zlib format, this
@@ -651,9 +652,9 @@ ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
    not perform any compression: this will be done by deflate().
 */
 
-ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm,
-                                             Bytef *dictionary,
-                                             uInt  *dictLength));
+ZEXTERN int ZEXPORT deflateGetDictionary(z_streamp strm,
+                                         Bytef *dictionary,
+                                         uInt  *dictLength);
 /*
      Returns the sliding dictionary being maintained by deflate.  dictLength is
    set to the number of bytes in the dictionary, and that many bytes are copied
@@ -673,8 +674,8 @@ ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm,
    stream state is inconsistent.
 */
 
-ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
-                                    z_streamp source));
+ZEXTERN int ZEXPORT deflateCopy(z_streamp dest,
+                                z_streamp source);
 /*
      Sets the destination stream as a complete copy of the source stream.
 
@@ -691,20 +692,20 @@ ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
    destination.
 */
 
-ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
+ZEXTERN int ZEXPORT deflateReset(z_streamp strm);
 /*
      This function is equivalent to deflateEnd followed by deflateInit, but
    does not free and reallocate the internal compression state.  The stream
    will leave the compression level and any other attributes that may have been
-   set unchanged.
+   set unchanged.  total_in, total_out, adler, and msg are initialized.
 
      deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
    stream state was inconsistent (such as zalloc or state being Z_NULL).
 */
 
-ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
-                                      int level,
-                                      int strategy));
+ZEXTERN int ZEXPORT deflateParams(z_streamp strm,
+                                  int level,
+                                  int strategy);
 /*
      Dynamically update the compression level and compression strategy.  The
    interpretation of level and strategy is as in deflateInit2().  This can be
@@ -729,7 +730,7 @@ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
    Then no more input data should be provided before the deflateParams() call.
    If this is done, the old level and strategy will be applied to the data
    compressed before deflateParams(), and the new level and strategy will be
-   applied to the the data compressed after deflateParams().
+   applied to the data compressed after deflateParams().
 
      deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream
    state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if
@@ -740,11 +741,11 @@ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
    retried with more output space.
 */
 
-ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
-                                    int good_length,
-                                    int max_lazy,
-                                    int nice_length,
-                                    int max_chain));
+ZEXTERN int ZEXPORT deflateTune(z_streamp strm,
+                                int good_length,
+                                int max_lazy,
+                                int nice_length,
+                                int max_chain);
 /*
      Fine tune deflate's internal compression parameters.  This should only be
    used by someone who understands the algorithm used by zlib's deflate for
@@ -757,8 +758,8 @@ ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
    returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.
  */
 
-ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
-                                       uLong sourceLen));
+ZEXTERN uLong ZEXPORT deflateBound(z_streamp strm,
+                                   uLong sourceLen);
 /*
      deflateBound() returns an upper bound on the compressed size after
    deflation of sourceLen bytes.  It must be called after deflateInit() or
@@ -772,9 +773,9 @@ ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
    than Z_FINISH or Z_NO_FLUSH are used.
 */
 
-ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm,
-                                       unsigned *pending,
-                                       int *bits));
+ZEXTERN int ZEXPORT deflatePending(z_streamp strm,
+                                   unsigned *pending,
+                                   int *bits);
 /*
      deflatePending() returns the number of bytes and bits of output that have
    been generated, but not yet provided in the available output.  The bytes not
@@ -787,9 +788,9 @@ ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm,
    stream state was inconsistent.
  */
 
-ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
-                                     int bits,
-                                     int value));
+ZEXTERN int ZEXPORT deflatePrime(z_streamp strm,
+                                 int bits,
+                                 int value);
 /*
      deflatePrime() inserts bits in the deflate output stream.  The intent
    is that this function is used to start off the deflate output with the bits
@@ -804,8 +805,8 @@ ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
    source stream state was inconsistent.
 */
 
-ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
-                                         gz_headerp head));
+ZEXTERN int ZEXPORT deflateSetHeader(z_streamp strm,
+                                     gz_headerp head);
 /*
      deflateSetHeader() provides gzip header information for when a gzip
    stream is requested by deflateInit2().  deflateSetHeader() may be called
@@ -821,16 +822,17 @@ ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
    gzip file" and give up.
 
      If deflateSetHeader is not used, the default gzip header has text false,
-   the time set to zero, and os set to 255, with no extra, name, or comment
-   fields.  The gzip header is returned to the default state by deflateReset().
+   the time set to zero, and os set to the current operating system, with no
+   extra, name, or comment fields.  The gzip header is returned to the default
+   state by deflateReset().
 
      deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
    stream state was inconsistent.
 */
 
 /*
-ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
-                                     int  windowBits));
+ZEXTERN int ZEXPORT inflateInit2(z_streamp strm,
+                                 int windowBits);
 
      This is another version of inflateInit with an extra parameter.  The
    fields next_in, avail_in, zalloc, zfree and opaque must be initialized
@@ -883,9 +885,9 @@ ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
    deferred until inflate() is called.
 */
 
-ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
-                                             const Bytef *dictionary,
-                                             uInt  dictLength));
+ZEXTERN int ZEXPORT inflateSetDictionary(z_streamp strm,
+                                         const Bytef *dictionary,
+                                         uInt  dictLength);
 /*
      Initializes the decompression dictionary from the given uncompressed byte
    sequence.  This function must be called immediately after a call of inflate,
@@ -906,9 +908,9 @@ ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
    inflate().
 */
 
-ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm,
-                                             Bytef *dictionary,
-                                             uInt  *dictLength));
+ZEXTERN int ZEXPORT inflateGetDictionary(z_streamp strm,
+                                         Bytef *dictionary,
+                                         uInt  *dictLength);
 /*
      Returns the sliding dictionary being maintained by inflate.  dictLength is
    set to the number of bytes in the dictionary, and that many bytes are copied
@@ -921,7 +923,7 @@ ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm,
    stream state is inconsistent.
 */
 
-ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
+ZEXTERN int ZEXPORT inflateSync(z_streamp strm);
 /*
      Skips invalid compressed data until a possible full flush point (see above
    for the description of deflate with Z_FULL_FLUSH) can be found, or until all
@@ -934,14 +936,14 @@ ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
      inflateSync returns Z_OK if a possible full flush point has been found,
    Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point
    has been found, or Z_STREAM_ERROR if the stream structure was inconsistent.
-   In the success case, the application may save the current current value of
-   total_in which indicates where valid compressed data was found.  In the
-   error case, the application may repeatedly call inflateSync, providing more
-   input each time, until success or end of the input data.
+   In the success case, the application may save the current value of total_in
+   which indicates where valid compressed data was found.  In the error case,
+   the application may repeatedly call inflateSync, providing more input each
+   time, until success or end of the input data.
 */
 
-ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
-                                    z_streamp source));
+ZEXTERN int ZEXPORT inflateCopy(z_streamp dest,
+                                z_streamp source);
 /*
      Sets the destination stream as a complete copy of the source stream.
 
@@ -956,18 +958,19 @@ ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
    destination.
 */
 
-ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
+ZEXTERN int ZEXPORT inflateReset(z_streamp strm);
 /*
      This function is equivalent to inflateEnd followed by inflateInit,
    but does not free and reallocate the internal decompression state.  The
    stream will keep attributes that may have been set by inflateInit2.
+   total_in, total_out, adler, and msg are initialized.
 
      inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
    stream state was inconsistent (such as zalloc or state being Z_NULL).
 */
 
-ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm,
-                                      int windowBits));
+ZEXTERN int ZEXPORT inflateReset2(z_streamp strm,
+                                  int windowBits);
 /*
      This function is the same as inflateReset, but it also permits changing
    the wrap and window size requests.  The windowBits parameter is interpreted
@@ -980,9 +983,9 @@ ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm,
    the windowBits parameter is invalid.
 */
 
-ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
-                                     int bits,
-                                     int value));
+ZEXTERN int ZEXPORT inflatePrime(z_streamp strm,
+                                 int bits,
+                                 int value);
 /*
      This function inserts bits in the inflate input stream.  The intent is
    that this function is used to start inflating at a bit position in the
@@ -1001,7 +1004,7 @@ ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
    stream state was inconsistent.
 */
 
-ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm));
+ZEXTERN long ZEXPORT inflateMark(z_streamp strm);
 /*
      This function returns two values, one in the lower 16 bits of the return
    value, and the other in the remaining upper bits, obtained by shifting the
@@ -1029,8 +1032,8 @@ ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm));
    source stream state was inconsistent.
 */
 
-ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
-                                         gz_headerp head));
+ZEXTERN int ZEXPORT inflateGetHeader(z_streamp strm,
+                                     gz_headerp head);
 /*
      inflateGetHeader() requests that gzip header information be stored in the
    provided gz_header structure.  inflateGetHeader() may be called after
@@ -1070,8 +1073,8 @@ ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
 */
 
 /*
-ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
-                                        unsigned char FAR *window));
+ZEXTERN int ZEXPORT inflateBackInit(z_streamp strm, int windowBits,
+                                    unsigned char FAR *window);
 
      Initialize the internal stream state for decompression using inflateBack()
    calls.  The fields zalloc, zfree and opaque in strm must be initialized
@@ -1091,13 +1094,13 @@ ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
    the version of the header file.
 */
 
-typedef unsigned (*in_func) OF((void FAR *,
-                                z_const unsigned char FAR * FAR *));
-typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
+typedef unsigned (*in_func)(void FAR *,
+                            z_const unsigned char FAR * FAR *);
+typedef int (*out_func)(void FAR *, unsigned char FAR *, unsigned);
 
-ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
-                                    in_func in, void FAR *in_desc,
-                                    out_func out, void FAR *out_desc));
+ZEXTERN int ZEXPORT inflateBack(z_streamp strm,
+                                in_func in, void FAR *in_desc,
+                                out_func out, void FAR *out_desc);
 /*
      inflateBack() does a raw inflate with a single call using a call-back
    interface for input and output.  This is potentially more efficient than
@@ -1165,7 +1168,7 @@ ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
    cannot return Z_OK.
 */
 
-ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
+ZEXTERN int ZEXPORT inflateBackEnd(z_streamp strm);
 /*
      All memory allocated by inflateBackInit() is freed.
 
@@ -1173,7 +1176,7 @@ ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
    state was inconsistent.
 */
 
-ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
+ZEXTERN uLong ZEXPORT zlibCompileFlags(void);
 /* Return flags indicating compile-time options.
 
     Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other:
@@ -1226,8 +1229,8 @@ ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
    you need special options.
 */
 
-ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
-                                 const Bytef *source, uLong sourceLen));
+ZEXTERN int ZEXPORT compress(Bytef *dest,   uLongf *destLen,
+                             const Bytef *source, uLong sourceLen);
 /*
      Compresses the source buffer into the destination buffer.  sourceLen is
    the byte length of the source buffer.  Upon entry, destLen is the total size
@@ -1241,9 +1244,9 @@ ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
    buffer.
 */
 
-ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
-                                  const Bytef *source, uLong sourceLen,
-                                  int level));
+ZEXTERN int ZEXPORT compress2(Bytef *dest,   uLongf *destLen,
+                              const Bytef *source, uLong sourceLen,
+                              int level);
 /*
      Compresses the source buffer into the destination buffer.  The level
    parameter has the same meaning as in deflateInit.  sourceLen is the byte
@@ -1257,15 +1260,15 @@ ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
    Z_STREAM_ERROR if the level parameter is invalid.
 */
 
-ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen));
+ZEXTERN uLong ZEXPORT compressBound(uLong sourceLen);
 /*
      compressBound() returns an upper bound on the compressed size after
    compress() or compress2() on sourceLen bytes.  It would be used before a
    compress() or compress2() call to allocate the destination buffer.
 */
 
-ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
-                                   const Bytef *source, uLong sourceLen));
+ZEXTERN int ZEXPORT uncompress(Bytef *dest,   uLongf *destLen,
+                               const Bytef *source, uLong sourceLen);
 /*
      Decompresses the source buffer into the destination buffer.  sourceLen is
    the byte length of the source buffer.  Upon entry, destLen is the total size
@@ -1282,8 +1285,8 @@ ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
    buffer with the uncompressed data up to that point.
 */
 
-ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest,   uLongf *destLen,
-                                    const Bytef *source, uLong *sourceLen));
+ZEXTERN int ZEXPORT uncompress2(Bytef *dest,   uLongf *destLen,
+                                const Bytef *source, uLong *sourceLen);
 /*
      Same as uncompress, except that sourceLen is a pointer, where the
    length of the source is *sourceLen.  On return, *sourceLen is the number of
@@ -1302,7 +1305,7 @@ ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest,   uLongf *destLen,
 typedef struct gzFile_s *gzFile;    /* semi-opaque gzip file descriptor */
 
 /*
-ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));
+ZEXTERN gzFile ZEXPORT gzopen(const char *path, const char *mode);
 
      Open the gzip (.gz) file at path for reading and decompressing, or
    compressing and writing.  The mode parameter is as in fopen ("rb" or "wb")
@@ -1339,7 +1342,7 @@ ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));
    file could not be opened.
 */
 
-ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
+ZEXTERN gzFile ZEXPORT gzdopen(int fd, const char *mode);
 /*
      Associate a gzFile with the file descriptor fd.  File descriptors are
    obtained from calls like open, dup, creat, pipe or fileno (if the file has
@@ -1362,7 +1365,7 @@ ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
    will not detect if fd is invalid (unless fd is -1).
 */
 
-ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
+ZEXTERN int ZEXPORT gzbuffer(gzFile file, unsigned size);
 /*
      Set the internal buffer size used by this library's functions for file to
    size.  The default buffer size is 8192 bytes.  This function must be called
@@ -1378,7 +1381,7 @@ ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
    too late.
 */
 
-ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
+ZEXTERN int ZEXPORT gzsetparams(gzFile file, int level, int strategy);
 /*
      Dynamically update the compression level and strategy for file.  See the
    description of deflateInit2 for the meaning of these parameters. Previously
@@ -1389,7 +1392,7 @@ ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
    or Z_MEM_ERROR if there is a memory allocation error.
 */
 
-ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
+ZEXTERN int ZEXPORT gzread(gzFile file, voidp buf, unsigned len);
 /*
      Read and decompress up to len uncompressed bytes from file into buf.  If
    the input file is not in gzip format, gzread copies the given number of
@@ -1419,8 +1422,8 @@ ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
    Z_STREAM_ERROR.
 */
 
-ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems,
-                                     gzFile file));
+ZEXTERN z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems,
+                                 gzFile file);
 /*
      Read and decompress up to nitems items of size size from file into buf,
    otherwise operating as gzread() does.  This duplicates the interface of
@@ -1445,14 +1448,14 @@ ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems,
    file, resetting and retrying on end-of-file, when size is not 1.
 */
 
-ZEXTERN int ZEXPORT gzwrite OF((gzFile file, voidpc buf, unsigned len));
+ZEXTERN int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len);
 /*
      Compress and write the len uncompressed bytes at buf to file. gzwrite
    returns the number of uncompressed bytes written or 0 in case of error.
 */
 
-ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size,
-                                      z_size_t nitems, gzFile file));
+ZEXTERN z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size,
+                                  z_size_t nitems, gzFile file);
 /*
      Compress and write nitems items of size size from buf to file, duplicating
    the interface of stdio's fwrite(), with size_t request and return types.  If
@@ -1465,7 +1468,7 @@ ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size,
    is returned, and the error state is set to Z_STREAM_ERROR.
 */
 
-ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...));
+ZEXTERN int ZEXPORTVA gzprintf(gzFile file, const char *format, ...);
 /*
      Convert, format, compress, and write the arguments (...) to file under
    control of the string format, as in fprintf.  gzprintf returns the number of
@@ -1480,7 +1483,7 @@ ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...));
    This can be determined using zlibCompileFlags().
 */
 
-ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
+ZEXTERN int ZEXPORT gzputs(gzFile file, const char *s);
 /*
      Compress and write the given null-terminated string s to file, excluding
    the terminating null character.
@@ -1488,7 +1491,7 @@ ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
      gzputs returns the number of characters written, or -1 in case of error.
 */
 
-ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
+ZEXTERN char * ZEXPORT gzgets(gzFile file, char *buf, int len);
 /*
      Read and decompress bytes from file into buf, until len-1 characters are
    read, or until a newline character is read and transferred to buf, or an
@@ -1502,13 +1505,13 @@ ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
    buf are indeterminate.
 */
 
-ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c));
+ZEXTERN int ZEXPORT gzputc(gzFile file, int c);
 /*
      Compress and write c, converted to an unsigned char, into file.  gzputc
    returns the value that was written, or -1 in case of error.
 */
 
-ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
+ZEXTERN int ZEXPORT gzgetc(gzFile file);
 /*
      Read and decompress one byte from file.  gzgetc returns this byte or -1
    in case of end of file or error.  This is implemented as a macro for speed.
@@ -1517,7 +1520,7 @@ ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
    points to has been clobbered or not.
 */
 
-ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));
+ZEXTERN int ZEXPORT gzungetc(int c, gzFile file);
 /*
      Push c back onto the stream for file to be read as the first character on
    the next read.  At least one character of push-back is always allowed.
@@ -1529,7 +1532,7 @@ ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));
    gzseek() or gzrewind().
 */
 
-ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
+ZEXTERN int ZEXPORT gzflush(gzFile file, int flush);
 /*
      Flush all pending output to file.  The parameter flush is as in the
    deflate() function.  The return value is the zlib error number (see function
@@ -1545,8 +1548,8 @@ ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
 */
 
 /*
-ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
-                                   z_off_t offset, int whence));
+ZEXTERN z_off_t ZEXPORT gzseek(gzFile file,
+                               z_off_t offset, int whence);
 
      Set the starting position to offset relative to whence for the next gzread
    or gzwrite on file.  The offset represents a number of bytes in the
@@ -1564,7 +1567,7 @@ ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
    would be before the current position.
 */
 
-ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
+ZEXTERN int ZEXPORT    gzrewind(gzFile file);
 /*
      Rewind file. This function is supported only for reading.
 
@@ -1572,7 +1575,7 @@ ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
 */
 
 /*
-ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));
+ZEXTERN z_off_t ZEXPORT    gztell(gzFile file);
 
      Return the starting position for the next gzread or gzwrite on file.
    This position represents a number of bytes in the uncompressed data stream,
@@ -1583,7 +1586,7 @@ ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));
 */
 
 /*
-ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file));
+ZEXTERN z_off_t ZEXPORT gzoffset(gzFile file);
 
      Return the current compressed (actual) read or write offset of file.  This
    offset includes the count of bytes that precede the gzip stream, for example
@@ -1592,7 +1595,7 @@ ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file));
    be used for a progress indicator.  On error, gzoffset() returns -1.
 */
 
-ZEXTERN int ZEXPORT gzeof OF((gzFile file));
+ZEXTERN int ZEXPORT gzeof(gzFile file);
 /*
      Return true (1) if the end-of-file indicator for file has been set while
    reading, false (0) otherwise.  Note that the end-of-file indicator is set
@@ -1607,7 +1610,7 @@ ZEXTERN int ZEXPORT gzeof OF((gzFile file));
    has grown since the previous end of file was detected.
 */
 
-ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
+ZEXTERN int ZEXPORT gzdirect(gzFile file);
 /*
      Return true (1) if file is being copied directly while reading, or false
    (0) if file is a gzip stream being decompressed.
@@ -1628,7 +1631,7 @@ ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
    gzip file reading and decompression, which may not be desired.)
 */
 
-ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
+ZEXTERN int ZEXPORT    gzclose(gzFile file);
 /*
      Flush all pending output for file, if necessary, close file and
    deallocate the (de)compression state.  Note that once file is closed, you
@@ -1641,8 +1644,8 @@ ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
    last read ended in the middle of a gzip stream, or Z_OK on success.
 */
 
-ZEXTERN int ZEXPORT gzclose_r OF((gzFile file));
-ZEXTERN int ZEXPORT gzclose_w OF((gzFile file));
+ZEXTERN int ZEXPORT gzclose_r(gzFile file);
+ZEXTERN int ZEXPORT gzclose_w(gzFile file);
 /*
      Same as gzclose(), but gzclose_r() is only for use when reading, and
    gzclose_w() is only for use when writing or appending.  The advantage to
@@ -1653,7 +1656,7 @@ ZEXTERN int ZEXPORT gzclose_w OF((gzFile file));
    zlib library.
 */
 
-ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
+ZEXTERN const char * ZEXPORT gzerror(gzFile file, int *errnum);
 /*
      Return the error message for the last error which occurred on file.
    errnum is set to zlib error number.  If an error occurred in the file system
@@ -1669,7 +1672,7 @@ ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
    functions above that do not distinguish those cases in their return values.
 */
 
-ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
+ZEXTERN void ZEXPORT gzclearerr(gzFile file);
 /*
      Clear the error and end-of-file flags for file.  This is analogous to the
    clearerr() function in stdio.  This is useful for continuing to read a gzip
@@ -1686,7 +1689,7 @@ ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
    library.
 */
 
-ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
+ZEXTERN uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len);
 /*
      Update a running Adler-32 checksum with the bytes buf[0..len-1] and
    return the updated checksum. An Adler-32 value is in the range of a 32-bit
@@ -1706,15 +1709,15 @@ ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
      if (adler != original_adler) error();
 */
 
-ZEXTERN uLong ZEXPORT adler32_z OF((uLong adler, const Bytef *buf,
-                                    z_size_t len));
+ZEXTERN uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf,
+                                z_size_t len);
 /*
      Same as adler32(), but with a size_t length.
 */
 
 /*
-ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
-                                          z_off_t len2));
+ZEXTERN uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2,
+                                      z_off_t len2);
 
      Combine two Adler-32 checksums into one.  For two sequences of bytes, seq1
    and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for
@@ -1724,7 +1727,7 @@ ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
    negative, the result has no meaning or utility.
 */
 
-ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len));
+ZEXTERN uLong ZEXPORT crc32(uLong crc, const Bytef *buf, uInt len);
 /*
      Update a running CRC-32 with the bytes buf[0..len-1] and return the
    updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer.
@@ -1742,30 +1745,30 @@ ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len));
      if (crc != original_crc) error();
 */
 
-ZEXTERN uLong ZEXPORT crc32_z OF((uLong crc, const Bytef *buf,
-                                  z_size_t len));
+ZEXTERN uLong ZEXPORT crc32_z(uLong crc, const Bytef *buf,
+                              z_size_t len);
 /*
      Same as crc32(), but with a size_t length.
 */
 
 /*
-ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
+ZEXTERN uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2);
 
      Combine two CRC-32 check values into one.  For two sequences of bytes,
    seq1 and seq2 with lengths len1 and len2, CRC-32 check values were
    calculated for each, crc1 and crc2.  crc32_combine() returns the CRC-32
    check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and
-   len2.
+   len2. len2 must be non-negative.
 */
 
 /*
-ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t len2));
+ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t len2);
 
      Return the operator corresponding to length len2, to be used with
-   crc32_combine_op().
+   crc32_combine_op(). len2 must be non-negative.
 */
 
-ZEXTERN uLong ZEXPORT crc32_combine_op OF((uLong crc1, uLong crc2, uLong op));
+ZEXTERN uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op);
 /*
      Give the same result as crc32_combine(), using op in place of len2. op is
    is generated from len2 by crc32_combine_gen(). This will be faster than
@@ -1778,20 +1781,20 @@ ZEXTERN uLong ZEXPORT crc32_combine_op OF((uLong crc1, uLong crc2, uLong op));
 /* deflateInit and inflateInit are macros to allow checking the zlib version
  * and the compiler's view of z_stream:
  */
-ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
-                                     const char *version, int stream_size));
-ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
-                                     const char *version, int stream_size));
-ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int  level, int  method,
-                                      int windowBits, int memLevel,
-                                      int strategy, const char *version,
-                                      int stream_size));
-ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int  windowBits,
-                                      const char *version, int stream_size));
-ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
-                                         unsigned char FAR *window,
-                                         const char *version,
-                                         int stream_size));
+ZEXTERN int ZEXPORT deflateInit_(z_streamp strm, int level,
+                                 const char *version, int stream_size);
+ZEXTERN int ZEXPORT inflateInit_(z_streamp strm,
+                                 const char *version, int stream_size);
+ZEXTERN int ZEXPORT deflateInit2_(z_streamp strm, int  level, int  method,
+                                  int windowBits, int memLevel,
+                                  int strategy, const char *version,
+                                  int stream_size);
+ZEXTERN int ZEXPORT inflateInit2_(z_streamp strm, int  windowBits,
+                                  const char *version, int stream_size);
+ZEXTERN int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits,
+                                     unsigned char FAR *window,
+                                     const char *version,
+                                     int stream_size);
 #ifdef Z_PREFIX_SET
 #  define z_deflateInit(strm, level) \
           deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
@@ -1836,7 +1839,7 @@ struct gzFile_s {
     unsigned char *next;
     z_off64_t pos;
 };
-ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
+ZEXTERN int ZEXPORT gzgetc_(gzFile file);       /* backward compatibility */
 #ifdef Z_PREFIX_SET
 #  undef z_gzgetc
 #  define z_gzgetc(g) \
@@ -1853,13 +1856,13 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
  * without large file support, _LFS64_LARGEFILE must also be true
  */
 #ifdef Z_LARGE64
-   ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
-   ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
-   ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
-   ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
-   ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t));
-   ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t));
-   ZEXTERN uLong ZEXPORT crc32_combine_gen64 OF((z_off64_t));
+   ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *);
+   ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int);
+   ZEXTERN z_off64_t ZEXPORT gztell64(gzFile);
+   ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile);
+   ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off64_t);
+   ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off64_t);
+   ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off64_t);
 #endif
 
 #if !defined(ZLIB_INTERNAL) && defined(Z_WANT64)
@@ -1881,50 +1884,50 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
 #    define crc32_combine_gen crc32_combine_gen64
 #  endif
 #  ifndef Z_LARGE64
-     ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
-     ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int));
-     ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile));
-     ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile));
-     ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
-     ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
-     ZEXTERN uLong ZEXPORT crc32_combine_gen64 OF((z_off_t));
+     ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *);
+     ZEXTERN z_off_t ZEXPORT gzseek64(gzFile, z_off_t, int);
+     ZEXTERN z_off_t ZEXPORT gztell64(gzFile);
+     ZEXTERN z_off_t ZEXPORT gzoffset64(gzFile);
+     ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t);
+     ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t);
+     ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t);
 #  endif
 #else
-   ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *));
-   ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int));
-   ZEXTERN z_off_t ZEXPORT gztell OF((gzFile));
-   ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile));
-   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
-   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
-   ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t));
+   ZEXTERN gzFile ZEXPORT gzopen(const char *, const char *);
+   ZEXTERN z_off_t ZEXPORT gzseek(gzFile, z_off_t, int);
+   ZEXTERN z_off_t ZEXPORT gztell(gzFile);
+   ZEXTERN z_off_t ZEXPORT gzoffset(gzFile);
+   ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t);
+   ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t);
+   ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t);
 #endif
 
 #else /* Z_SOLO */
 
-   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
-   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
-   ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t));
+   ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t);
+   ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t);
+   ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t);
 
 #endif /* !Z_SOLO */
 
 /* undocumented functions */
-ZEXTERN const char   * ZEXPORT zError           OF((int));
-ZEXTERN int            ZEXPORT inflateSyncPoint OF((z_streamp));
-ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table    OF((void));
-ZEXTERN int            ZEXPORT inflateUndermine OF((z_streamp, int));
-ZEXTERN int            ZEXPORT inflateValidate OF((z_streamp, int));
-ZEXTERN unsigned long  ZEXPORT inflateCodesUsed OF((z_streamp));
-ZEXTERN int            ZEXPORT inflateResetKeep OF((z_streamp));
-ZEXTERN int            ZEXPORT deflateResetKeep OF((z_streamp));
+ZEXTERN const char   * ZEXPORT zError(int);
+ZEXTERN int            ZEXPORT inflateSyncPoint(z_streamp);
+ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table(void);
+ZEXTERN int            ZEXPORT inflateUndermine(z_streamp, int);
+ZEXTERN int            ZEXPORT inflateValidate(z_streamp, int);
+ZEXTERN unsigned long  ZEXPORT inflateCodesUsed(z_streamp);
+ZEXTERN int            ZEXPORT inflateResetKeep(z_streamp);
+ZEXTERN int            ZEXPORT deflateResetKeep(z_streamp);
 #if defined(_WIN32) && !defined(Z_SOLO)
-ZEXTERN gzFile         ZEXPORT gzopen_w OF((const wchar_t *path,
-                                            const char *mode));
+ZEXTERN gzFile         ZEXPORT gzopen_w(const wchar_t *path,
+                                        const char *mode);
 #endif
 #if defined(STDC) || defined(Z_HAVE_STDARG_H)
 #  ifndef Z_SOLO
-ZEXTERN int            ZEXPORTVA gzvprintf Z_ARG((gzFile file,
-                                                  const char *format,
-                                                  va_list va));
+ZEXTERN int            ZEXPORTVA gzvprintf(gzFile file,
+                                           const char *format,
+                                           va_list va);
 #  endif
 #endif
 
diff --git a/src/native/external/zlib/zlib2ansi b/src/native/external/zlib/zlib2ansi
deleted file mode 100644
index 23b2a1d5a3ec..000000000000
--- a/src/native/external/zlib/zlib2ansi
+++ /dev/null
@@ -1,152 +0,0 @@
-#!/usr/bin/perl
-
-# Transform K&R C function definitions into ANSI equivalent.
-#
-# Author: Paul Marquess
-# Version: 1.0
-# Date: 3 October 2006
-
-# TODO
-#
-# Assumes no function pointer parameters. unless they are typedefed.
-# Assumes no literal strings that look like function definitions
-# Assumes functions start at the beginning of a line
-
-use strict;
-use warnings;
-
-local $/;
-$_ = <>;
-
-my $sp = qr{ \s* (?: /\* .*? \*/ )? \s* }x; # assume no nested comments
-
-my $d1    = qr{ $sp (?: [\w\*\s]+ $sp)* $sp \w+ $sp [\[\]\s]* $sp }x ;
-my $decl  = qr{ $sp (?: \w+ $sp )+ $d1 }xo ;
-my $dList = qr{ $sp $decl (?: $sp , $d1 )* $sp ; $sp }xo ;
-
-
-while (s/^
-            (                  # Start $1
-                (              #   Start $2
-                    .*?        #     Minimal eat content
-                    ( ^ \w [\w\s\*]+ )    #     $3 -- function name
-                    \s*        #     optional whitespace
-                )              # $2 - Matched up to before parameter list
-
-                \( \s*         # Literal "(" + optional whitespace
-                ( [^\)]+ )     # $4 - one or more anythings except ")"
-                \s* \)         # optional whitespace surrounding a Literal ")"
-
-                ( (?: $dList )+ ) # $5
-
-                $sp ^ {        # literal "{" at start of line
-            )                  # Remember to $1
-        //xsom
-      )
-{
-    my $all = $1 ;
-    my $prefix = $2;
-    my $param_list = $4 ;
-    my $params = $5;
-
-    StripComments($params);
-    StripComments($param_list);
-    $param_list =~ s/^\s+//;
-    $param_list =~ s/\s+$//;
-
-    my $i = 0 ;
-    my %pList = map { $_ => $i++ }
-                split /\s*,\s*/, $param_list;
-    my $pMatch = '(\b' . join('|', keys %pList) . '\b)\W*$' ;
-
-    my @params = split /\s*;\s*/, $params;
-    my @outParams = ();
-    foreach my $p (@params)
-    {
-        if ($p =~ /,/)
-        {
-            my @bits = split /\s*,\s*/, $p;
-            my $first = shift @bits;
-            $first =~ s/^\s*//;
-            push @outParams, $first;
-            $first =~ /^(\w+\s*)/;
-            my $type = $1 ;
-            push @outParams, map { $type . $_ } @bits;
-        }
-        else
-        {
-            $p =~ s/^\s+//;
-            push @outParams, $p;
-        }
-    }
-
-
-    my %tmp = map { /$pMatch/;  $_ => $pList{$1}  }
-              @outParams ;
-
-    @outParams = map  { "    $_" }
-                 sort { $tmp{$a} <=> $tmp{$b} }
-                 @outParams ;
-
-    print $prefix ;
-    print "(\n" . join(",\n", @outParams) . ")\n";
-    print "{" ;
-
-}
-
-# Output any trailing code.
-print ;
-exit 0;
-
-
-sub StripComments
-{
-
-  no warnings;
-
-  # Strip C & C++ comments
-  # From the perlfaq
-  $_[0] =~
-
-    s{
-       /\*         ##  Start of /* ... */ comment
-       [^*]*\*+    ##  Non-* followed by 1-or-more *'s
-       (
-         [^/*][^*]*\*+
-       )*          ##  0-or-more things which don't start with /
-                   ##    but do end with '*'
-       /           ##  End of /* ... */ comment
-
-     |         ##     OR  C++ Comment
-       //          ## Start of C++ comment //
-       [^\n]*      ## followed by 0-or-more non end of line characters
-
-     |         ##     OR  various things which aren't comments:
-
-       (
-         "           ##  Start of " ... " string
-         (
-           \\.           ##  Escaped char
-         |               ##    OR
-           [^"\\]        ##  Non "\
-         )*
-         "           ##  End of " ... " string
-
-       |         ##     OR
-
-         '           ##  Start of ' ... ' string
-         (
-           \\.           ##  Escaped char
-         |               ##    OR
-           [^'\\]        ##  Non '\
-         )*
-         '           ##  End of ' ... ' string
-
-       |         ##     OR
-
-         .           ##  Anything other char
-         [^/"'\\]*   ##  Chars which doesn't start a comment, string or escape
-       )
-     }{$2}gxs;
-
-}
diff --git a/src/native/external/zlib/zutil.c b/src/native/external/zlib/zutil.c
index 9543ae825e32..b1c5d2d3c6da 100644
--- a/src/native/external/zlib/zutil.c
+++ b/src/native/external/zlib/zutil.c
@@ -24,13 +24,11 @@ z_const char * const z_errmsg[10] = {
 };
 
 
-const char * ZEXPORT zlibVersion()
-{
+const char * ZEXPORT zlibVersion(void) {
     return ZLIB_VERSION;
 }
 
-uLong ZEXPORT zlibCompileFlags()
-{
+uLong ZEXPORT zlibCompileFlags(void) {
     uLong flags;
 
     flags = 0;
@@ -121,9 +119,7 @@ uLong ZEXPORT zlibCompileFlags()
 #  endif
 int ZLIB_INTERNAL z_verbose = verbose;
 
-void ZLIB_INTERNAL z_error(m)
-    char *m;
-{
+void ZLIB_INTERNAL z_error(char *m) {
     fprintf(stderr, "%s\n", m);
     exit(1);
 }
@@ -132,9 +128,7 @@ void ZLIB_INTERNAL z_error(m)
 /* exported to allow conversion of error code to string for compress() and
  * uncompress()
  */
-const char * ZEXPORT zError(err)
-    int err;
-{
+const char * ZEXPORT zError(int err) {
     return ERR_MSG(err);
 }
 
@@ -148,22 +142,14 @@ const char * ZEXPORT zError(err)
 
 #ifndef HAVE_MEMCPY
 
-void ZLIB_INTERNAL zmemcpy(dest, source, len)
-    Bytef* dest;
-    const Bytef* source;
-    uInt  len;
-{
+void ZLIB_INTERNAL zmemcpy(Bytef* dest, const Bytef* source, uInt len) {
     if (len == 0) return;
     do {
         *dest++ = *source++; /* ??? to be unrolled */
     } while (--len != 0);
 }
 
-int ZLIB_INTERNAL zmemcmp(s1, s2, len)
-    const Bytef* s1;
-    const Bytef* s2;
-    uInt  len;
-{
+int ZLIB_INTERNAL zmemcmp(const Bytef* s1, const Bytef* s2, uInt len) {
     uInt j;
 
     for (j = 0; j < len; j++) {
@@ -172,10 +158,7 @@ int ZLIB_INTERNAL zmemcmp(s1, s2, len)
     return 0;
 }
 
-void ZLIB_INTERNAL zmemzero(dest, len)
-    Bytef* dest;
-    uInt  len;
-{
+void ZLIB_INTERNAL zmemzero(Bytef* dest, uInt len) {
     if (len == 0) return;
     do {
         *dest++ = 0;  /* ??? to be unrolled */
@@ -216,8 +199,7 @@ local ptr_table table[MAX_PTR];
  * a protected system like OS/2. Use Microsoft C instead.
  */
 
-voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size)
-{
+voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size) {
     voidpf buf;
     ulg bsize = (ulg)items*size;
 
@@ -242,8 +224,7 @@ voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size)
     return buf;
 }
 
-void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr)
-{
+void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) {
     int n;
 
     (void)opaque;
@@ -279,14 +260,12 @@ void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr)
 #  define _hfree   hfree
 #endif
 
-voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, uInt items, uInt size)
-{
+voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, uInt items, uInt size) {
     (void)opaque;
     return _halloc((long)items, size);
 }
 
-void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr)
-{
+void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) {
     (void)opaque;
     _hfree(ptr);
 }
@@ -299,25 +278,18 @@ void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr)
 #ifndef MY_ZCALLOC /* Any system without a special alloc function */
 
 #ifndef STDC
-extern voidp  malloc OF((uInt size));
-extern voidp  calloc OF((uInt items, uInt size));
-extern void   free   OF((voidpf ptr));
+extern voidp malloc(uInt size);
+extern voidp calloc(uInt items, uInt size);
+extern void free(voidpf ptr);
 #endif
 
-voidpf ZLIB_INTERNAL zcalloc(opaque, items, size)
-    voidpf opaque;
-    unsigned items;
-    unsigned size;
-{
+voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size) {
     (void)opaque;
     return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) :
                               (voidpf)calloc(items, size);
 }
 
-void ZLIB_INTERNAL zcfree(opaque, ptr)
-    voidpf opaque;
-    voidpf ptr;
-{
+void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) {
     (void)opaque;
     free(ptr);
 }
diff --git a/src/native/external/zlib/zutil.h b/src/native/external/zlib/zutil.h
index 0bc7f4ecd1c0..48dd7febae65 100644
--- a/src/native/external/zlib/zutil.h
+++ b/src/native/external/zlib/zutil.h
@@ -1,5 +1,5 @@
 /* zutil.h -- internal interface and configuration of the compression library
- * Copyright (C) 1995-2022 Jean-loup Gailly, Mark Adler
+ * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -56,7 +56,7 @@ typedef unsigned long  ulg;
 extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 /* (size given to avoid silly warnings with Visual C++) */
 
-#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
+#define ERR_MSG(err) z_errmsg[(err) < -6 || (err) > 2 ? 9 : 2 - (err)]
 
 #define ERR_RETURN(strm,err) \
   return (strm->msg = ERR_MSG(err), (err))
@@ -137,17 +137,8 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 #  endif
 #endif
 
-#if defined(MACOS) || defined(TARGET_OS_MAC)
+#if defined(MACOS)
 #  define OS_CODE  7
-#  ifndef Z_SOLO
-#    if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os
-#      include <unix.h> /* for fdopen */
-#    else
-#      ifndef fdopen
-#        define fdopen(fd,mode) NULL /* No fdopen() */
-#      endif
-#    endif
-#  endif
 #endif
 
 #ifdef __acorn
@@ -170,18 +161,6 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 #  define OS_CODE 19
 #endif
 
-#if defined(_BEOS_) || defined(RISCOS)
-#  define fdopen(fd,mode) NULL /* No fdopen() */
-#endif
-
-#if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX
-#  if defined(_WIN32_WCE)
-#    define fdopen(fd,mode) NULL /* No fdopen() */
-#  else
-#    define fdopen(fd,type)  _fdopen(fd,type)
-#  endif
-#endif
-
 #if defined(__BORLANDC__) && !defined(MSDOS)
   #pragma warn -8004
   #pragma warn -8008
@@ -191,9 +170,9 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 /* provide prototypes for these when building zlib without LFS */
 #if !defined(_WIN32) && \
     (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0)
-    ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
-    ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
-    ZEXTERN uLong ZEXPORT crc32_combine_gen64 OF((z_off_t));
+    ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t);
+    ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t);
+    ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t);
 #endif
 
         /* common defaults */
@@ -232,16 +211,16 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 #    define zmemzero(dest, len) memset(dest, 0, len)
 #  endif
 #else
-   void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len));
-   int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len));
-   void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len));
+   void ZLIB_INTERNAL zmemcpy(Bytef* dest, const Bytef* source, uInt len);
+   int ZLIB_INTERNAL zmemcmp(const Bytef* s1, const Bytef* s2, uInt len);
+   void ZLIB_INTERNAL zmemzero(Bytef* dest, uInt len);
 #endif
 
 /* Diagnostic functions */
 #ifdef ZLIB_DEBUG
 #  include <stdio.h>
    extern int ZLIB_INTERNAL z_verbose;
-   extern void ZLIB_INTERNAL z_error OF((char *m));
+   extern void ZLIB_INTERNAL z_error(char *m);
 #  define Assert(cond,msg) {if(!(cond)) z_error(msg);}
 #  define Trace(x) {if (z_verbose>=0) fprintf x ;}
 #  define Tracev(x) {if (z_verbose>0) fprintf x ;}
@@ -258,9 +237,9 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 #endif
 
 #ifndef Z_SOLO
-   voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items,
-                                    unsigned size));
-   void ZLIB_INTERNAL zcfree  OF((voidpf opaque, voidpf ptr));
+   voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items,
+                                unsigned size);
+   void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr);
 #endif
 
 #define ZALLOC(strm, items, size) \
diff --git a/src/native/libs/CMakeLists.txt b/src/native/libs/CMakeLists.txt
index ca07ee3918ae..ded638d85418 100644
--- a/src/native/libs/CMakeLists.txt
+++ b/src/native/libs/CMakeLists.txt
@@ -99,11 +99,7 @@ if (CLR_CMAKE_TARGET_UNIX OR CLR_CMAKE_TARGET_BROWSER OR CLR_CMAKE_TARGET_WASI)
     endif ()
 else ()
     set(CMAKE_SHARED_LIBRARY_PREFIX "")
-
-    # we only need to build System.Globalization.Native when building static libs.
-    if (STATIC_LIBS_ONLY)
-        add_subdirectory(System.Globalization.Native)
-    endif ()
+    add_subdirectory(System.Globalization.Native)
 endif ()
 
 add_subdirectory(System.IO.Compression.Native)
diff --git a/src/native/libs/Common/pal_io_common.h b/src/native/libs/Common/pal_io_common.h
index 328b33f43022..27022e5c8fe6 100644
--- a/src/native/libs/Common/pal_io_common.h
+++ b/src/native/libs/Common/pal_io_common.h
@@ -8,7 +8,6 @@
 #include <poll.h>
 #include <pal_error_common.h>
 #include <pal_utilities.h>
-#include <minipal/utils.h>
 
 /**
  * Our intermediate pollfd struct to normalize the data types
diff --git a/src/native/libs/Common/pal_utilities.h b/src/native/libs/Common/pal_utilities.h
index 3fece3a08aa3..7b5fa63b6cac 100644
--- a/src/native/libs/Common/pal_utilities.h
+++ b/src/native/libs/Common/pal_utilities.h
@@ -15,6 +15,8 @@
 #include <unistd.h>
 #include <limits.h>
 
+#include <minipal/utils.h>
+
 #ifdef DEBUG
 #define assert_err(cond, msg, err) do \
 { \
@@ -43,16 +45,6 @@
 #define CONST_CAST2(TOTYPE, FROMTYPE, X) ((union { FROMTYPE _q; TOTYPE _nq; }){ ._q = (X) }._nq)
 #define CONST_CAST(TYPE, X) CONST_CAST2(TYPE, const TYPE, (X))
 
-#ifndef __has_attribute
-#define __has_attribute(x) (0)
-#endif
-
-#if __has_attribute(fallthrough)
-#define FALLTHROUGH __attribute__((fallthrough))
-#else
-#define FALLTHROUGH
-#endif
-
 /**
  * Abstraction helper method to safely copy strings using strlcpy or strcpy_s
  * or a different safe copy method, depending on the current platform.
diff --git a/src/native/libs/System.Globalization.Native/CMakeLists.txt b/src/native/libs/System.Globalization.Native/CMakeLists.txt
index 5230593aea28..16de5f623ff5 100644
--- a/src/native/libs/System.Globalization.Native/CMakeLists.txt
+++ b/src/native/libs/System.Globalization.Native/CMakeLists.txt
@@ -132,7 +132,8 @@ if (NOT CLR_CMAKE_TARGET_BROWSER AND NOT CLR_CMAKE_TARGET_WASI AND NOT CLR_CMAKE
 endif()
 
 if (MSVC)
-    set_source_files_properties(${NATIVEGLOBALIZATION_SOURCES} PROPERTIES LANGUAGE CXX)
+    # on VS < 17.9 we need to force compile sources as C++ since msvc doesn't support __typeof__ there
+    set_source_files_properties(${NATIVEGLOBALIZATION_SOURCES} PROPERTIES COMPILE_FLAGS "/TP")
 endif()
 
 include_directories("../Common")
@@ -143,11 +144,21 @@ if (GEN_SHARED_LIB)
         find_library(FOUNDATION Foundation REQUIRED)
     endif()
 
-    add_library(System.Globalization.Native
-        SHARED
-        ${NATIVEGLOBALIZATION_SOURCES}
-        ${VERSION_FILE_PATH}
-    )
+    if (CLR_CMAKE_TARGET_WIN32)
+        add_definitions(-DVER_FILEDESCRIPTION_STR="System.Globalization.Native")
+        add_library(System.Globalization.Native
+            SHARED
+            ${NATIVEGLOBALIZATION_SOURCES}
+            System.Globalization.Native.def
+            ${VERSION_FILE_RC_PATH}
+        )
+    else()
+        add_library(System.Globalization.Native
+            SHARED
+            ${NATIVEGLOBALIZATION_SOURCES}
+            ${VERSION_FILE_PATH}
+        )
+    endif()
 
     target_link_libraries(System.Globalization.Native
         PRIVATE
@@ -155,7 +166,20 @@ if (GEN_SHARED_LIB)
         ${FOUNDATION}
     )
 
-    install_with_stripped_symbols (System.Globalization.Native PROGRAMS .)
+    if(CLR_CMAKE_TARGET_WIN32)
+        include(GenerateExportHeader)
+        GENERATE_EXPORT_HEADER( System.Globalization.Native
+         BASE_NAME System.Globalization.Native
+         EXPORT_MACRO_NAME System.Globalization.Native_EXPORT
+         EXPORT_FILE_NAME System.Globalization.Native_Export.h
+         STATIC_DEFINE System.Globalization.Native_BUILT_AS_STATIC
+        )
+
+        install (TARGETS System.Globalization.Native DESTINATION .)
+        install (FILES $<TARGET_PDB_FILE:System.Globalization.Native> DESTINATION .)
+    else()
+        install_with_stripped_symbols (System.Globalization.Native PROGRAMS .)
+    endif()
 endif()
 
 add_library(System.Globalization.Native-Static
@@ -170,7 +194,7 @@ endif()
 
 install (TARGETS System.Globalization.Native-Static DESTINATION ${STATIC_LIB_DESTINATION} COMPONENT libs)
 
-if(NOT CLR_CMAKE_TARGET_APPLE AND NOT CLR_CMAKE_TARGET_ANDROID AND NOT CLR_CMAKE_TARGET_LINUX_MUSL AND NOT CLR_CMAKE_TARGET_HAIKU)
+if(NOT CLR_CMAKE_TARGET_WIN32 AND NOT CLR_CMAKE_TARGET_APPLE AND NOT CLR_CMAKE_TARGET_ANDROID AND NOT CLR_CMAKE_TARGET_LINUX_MUSL AND NOT CLR_CMAKE_TARGET_HAIKU)
     if (GEN_SHARED_LIB)
         add_custom_command(TARGET System.Globalization.Native POST_BUILD
             COMMENT "Verifying System.Globalization.Native.so dependencies"
diff --git a/src/native/libs/System.Globalization.Native/System.Globalization.Native.def b/src/native/libs/System.Globalization.Native/System.Globalization.Native.def
new file mode 100644
index 000000000000..3338d4c36994
--- /dev/null
+++ b/src/native/libs/System.Globalization.Native/System.Globalization.Native.def
@@ -0,0 +1,39 @@
+LIBRARY System.Globalization.Native.dll
+
+EXPORTS
+    GlobalizationNative_ChangeCase
+    GlobalizationNative_ChangeCaseInvariant
+    GlobalizationNative_ChangeCaseTurkish
+    GlobalizationNative_CloseSortHandle
+    GlobalizationNative_CompareString
+    GlobalizationNative_EndsWith
+    GlobalizationNative_EnumCalendarInfo
+    GlobalizationNative_GetCalendarInfo
+    GlobalizationNative_GetCalendars
+    GlobalizationNative_GetDefaultLocaleName
+    GlobalizationNative_GetICUVersion
+    GlobalizationNative_GetJapaneseEraStartDate
+    GlobalizationNative_GetLatestJapaneseEra
+    GlobalizationNative_GetLocaleInfoGroupingSizes
+    GlobalizationNative_GetLocaleInfoInt
+    GlobalizationNative_GetLocaleInfoString
+    GlobalizationNative_GetLocaleName
+    GlobalizationNative_GetLocales
+    GlobalizationNative_GetLocaleTimeFormat
+    GlobalizationNative_GetSortHandle
+    GlobalizationNative_GetSortKey
+    GlobalizationNative_GetSortVersion
+    GlobalizationNative_GetTimeZoneDisplayName
+    GlobalizationNative_IanaIdToWindowsId
+    GlobalizationNative_IndexOf
+    GlobalizationNative_InitICUFunctions
+    GlobalizationNative_IsNormalized
+    GlobalizationNative_IsPredefinedLocale
+    GlobalizationNative_LastIndexOf
+    GlobalizationNative_LoadICU
+    GlobalizationNative_NormalizeString
+    GlobalizationNative_StartsWith
+    GlobalizationNative_WindowsIdToIanaId
+    GlobalizationNative_ToAscii
+    GlobalizationNative_ToUnicode
+    GlobalizationNative_InitOrdinalCasingPage
\ No newline at end of file
diff --git a/src/native/libs/System.Globalization.Native/pal_collation.c b/src/native/libs/System.Globalization.Native/pal_collation.c
index 44de265b4a19..7f0c5e01f9c9 100644
--- a/src/native/libs/System.Globalization.Native/pal_collation.c
+++ b/src/native/libs/System.Globalization.Native/pal_collation.c
@@ -257,7 +257,7 @@ static void FillIgnoreWidthRules(UChar* completeRules, int32_t* fillIndex, int32
         for (UChar ch = 0xFF21; ch <= 0xFF3A; ch++)
         {
             completeRules[*fillIndex] = '&';
-            completeRules[(*fillIndex) + 1] = ch + UpperCaseToLowerCaseOffset;
+            completeRules[(*fillIndex) + 1] = (UChar)(ch + UpperCaseToLowerCaseOffset);
             completeRules[(*fillIndex) + 2] = '=';
             completeRules[(*fillIndex) + 3] = ch;
             (*fillIndex) += 4;
diff --git a/src/native/libs/System.Globalization.Native/pal_localeNumberData.c b/src/native/libs/System.Globalization.Native/pal_localeNumberData.c
index c0996e373317..1cc0e55d188b 100644
--- a/src/native/libs/System.Globalization.Native/pal_localeNumberData.c
+++ b/src/native/libs/System.Globalization.Native/pal_localeNumberData.c
@@ -165,7 +165,7 @@ static int GetNumericPattern(const UNumberFormat* pNumberFormat,
                              int isNegative)
 {
     const int INVALID_FORMAT = -1;
-    const int MAX_DOTNET_NUMERIC_PATTERN_LENGTH = 6; // example: "(C n)" plus terminator
+    const size_t MAX_DOTNET_NUMERIC_PATTERN_LENGTH = 6; // example: "(C n)" plus terminator
 
     UErrorCode ignore = U_ZERO_ERROR;
     int32_t icuPatternLength = unum_toPattern(pNumberFormat, false, NULL, 0, &ignore) + 1;
diff --git a/src/native/libs/System.Native/pal_interfaceaddresses.c b/src/native/libs/System.Native/pal_interfaceaddresses.c
index 8fee3e0e3e96..fe42cd91dac6 100644
--- a/src/native/libs/System.Native/pal_interfaceaddresses.c
+++ b/src/native/libs/System.Native/pal_interfaceaddresses.c
@@ -117,7 +117,7 @@ static inline uint8_t mask2prefix(uint8_t* mask, int length)
 static int (*getifaddrs)(struct ifaddrs**) = NULL;
 static void (*freeifaddrs)(struct ifaddrs*) = NULL;
 
-static void try_loading_getifaddrs()
+static void try_loading_getifaddrs(void)
 {
     if (android_get_device_api_level() >= 24)
     {
@@ -139,7 +139,7 @@ static void try_loading_getifaddrs()
     }
 }
 
-static bool ensure_getifaddrs_is_loaded()
+static bool ensure_getifaddrs_is_loaded(void)
 {
     static pthread_once_t getifaddrs_is_loaded = PTHREAD_ONCE_INIT;
     pthread_once(&getifaddrs_is_loaded, try_loading_getifaddrs);
@@ -169,11 +169,12 @@ int32_t SystemNative_EnumerateInterfaceAddresses(void* context,
 
     for (struct ifaddrs* current = headAddr; current != NULL; current = current->ifa_next)
     {
-        if (current->ifa_addr == NULL)
+        char *ifa_name = current->ifa_name;
+        if (current->ifa_addr == NULL || ifa_name == NULL)
         {
             continue;
         }
-        uint32_t interfaceIndex = if_nametoindex(current->ifa_name);
+        uint32_t interfaceIndex = if_nametoindex(ifa_name);
         // ifa_name may be an aliased interface name.
         // Use if_indextoname to map back to the true device name.
         char actualName[IF_NAMESIZE];
@@ -376,9 +377,17 @@ int32_t SystemNative_GetNetworkInterfaces(int32_t * interfaceCount, NetworkInter
 
     while (ifaddrsEntry != NULL)
     {
+        char *ifa_name = ifaddrsEntry->ifa_name;
+
+        if (ifa_name == NULL)
+        {
+            ifaddrsEntry = ifaddrsEntry->ifa_next;
+            continue;
+        }
+
         //current = NULL;
         nii = NULL;
-        uint ifindex = if_nametoindex(ifaddrsEntry->ifa_name);
+        uint ifindex = if_nametoindex(ifa_name);
         for (index = 0; index < (int)ifcount; index ++)
         {
             if (((NetworkInterfaceInfo*)memoryBlock)[index].InterfaceIndex == ifindex)
@@ -393,8 +402,8 @@ int32_t SystemNative_GetNetworkInterfaces(int32_t * interfaceCount, NetworkInter
             // We git new interface.
             nii = &((NetworkInterfaceInfo*)memoryBlock)[ifcount++];
 
-            memcpy(nii->Name, ifaddrsEntry->ifa_name, sizeof(nii->Name));
-            nii->InterfaceIndex = if_nametoindex(ifaddrsEntry->ifa_name);
+            memcpy(nii->Name, ifa_name, sizeof(nii->Name));
+            nii->InterfaceIndex = ifindex;
             nii->Speed = -1;
             nii->HardwareType = ((ifaddrsEntry->ifa_flags & IFF_LOOPBACK) == IFF_LOOPBACK) ? NetworkInterfaceType_Loopback : NetworkInterfaceType_Unknown;
 
diff --git a/src/native/libs/System.Native/pal_threading.c b/src/native/libs/System.Native/pal_threading.c
index c96c7e8abd55..975e94acc476 100644
--- a/src/native/libs/System.Native/pal_threading.c
+++ b/src/native/libs/System.Native/pal_threading.c
@@ -233,6 +233,14 @@ int32_t SystemNative_CreateThread(uintptr_t stackSize, void *(*startAddress)(voi
     error = pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
     assert(error == 0);
 
+#ifdef HOST_APPLE
+    // Match Windows stack size
+    if (stackSize == 0)
+    {
+        stackSize = 1536 * 1024;
+    }
+#endif
+
     if (stackSize > 0)
     {
         if (stackSize < (uintptr_t)PTHREAD_STACK_MIN)
diff --git a/src/native/libs/System.Security.Cryptography.Native.Android/pal_cipher.c b/src/native/libs/System.Security.Cryptography.Native.Android/pal_cipher.c
index b09932392eea..d60dbdd02df0 100644
--- a/src/native/libs/System.Security.Cryptography.Native.Android/pal_cipher.c
+++ b/src/native/libs/System.Security.Cryptography.Native.Android/pal_cipher.c
@@ -20,7 +20,7 @@ typedef struct CipherInfo
 } CipherInfo;
 
 #define DEFINE_CIPHER(cipherId, width, javaName, flags) \
-CipherInfo* AndroidCryptoNative_ ## cipherId() \
+CipherInfo* AndroidCryptoNative_ ## cipherId(void) \
 { \
     static CipherInfo info = { flags, width, javaName }; \
     return &info; \
diff --git a/src/native/libs/System.Security.Cryptography.Native.Apple/entrypoints.c b/src/native/libs/System.Security.Cryptography.Native.Apple/entrypoints.c
index 9f91b6d2488f..099fb3439471 100644
--- a/src/native/libs/System.Security.Cryptography.Native.Apple/entrypoints.c
+++ b/src/native/libs/System.Security.Cryptography.Native.Apple/entrypoints.c
@@ -70,6 +70,7 @@ static const Entry s_cryptoAppleNative[] =
     DllImportEntry(AppleCryptoNative_RsaGenerateKey)
     DllImportEntry(AppleCryptoNative_RsaDecryptOaep)
     DllImportEntry(AppleCryptoNative_RsaDecryptPkcs)
+    DllImportEntry(AppleCryptoNative_RsaDecryptRaw)
     DllImportEntry(AppleCryptoNative_RsaEncryptOaep)
     DllImportEntry(AppleCryptoNative_RsaEncryptPkcs)
     DllImportEntry(AppleCryptoNative_RsaSignaturePrimitive)
diff --git a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_rsa.c b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_rsa.c
index a9aece35fb0f..1746828d5b0d 100644
--- a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_rsa.c
+++ b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_rsa.c
@@ -134,6 +134,13 @@ int32_t AppleCryptoNative_RsaDecryptOaep(SecKeyRef privateKey,
         privateKey, pbData, cbData, pDecryptedOut, pErrorOut, mgfAlgorithm, SecKeyCreateDecryptedData);
 }
 
+int32_t AppleCryptoNative_RsaDecryptRaw(
+    SecKeyRef privateKey, uint8_t* pbData, int32_t cbData, CFDataRef* pDecryptedOut, CFErrorRef* pErrorOut)
+{
+    return RsaPrimitive(
+        privateKey, pbData, cbData, pDecryptedOut, pErrorOut, kSecKeyAlgorithmRSAEncryptionRaw, SecKeyCreateDecryptedData);
+}
+
 int32_t AppleCryptoNative_RsaDecryptPkcs(
     SecKeyRef privateKey, uint8_t* pbData, int32_t cbData, CFDataRef* pDecryptedOut, CFErrorRef* pErrorOut)
 {
diff --git a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_rsa.h b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_rsa.h
index 253fdae78e4b..34a350f80f9b 100644
--- a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_rsa.h
+++ b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_rsa.h
@@ -31,6 +31,14 @@ PALEXPORT int32_t AppleCryptoNative_RsaDecryptOaep(SecKeyRef privateKey,
                                                    CFDataRef* pDecryptedOut,
                                                    CFErrorRef* pErrorOut);
 
+/*
+Decrypt the contents of pbData using the provided privateKey without validating or removing padding.
+
+Follows pal_seckey return conventions.
+*/
+PALEXPORT int32_t AppleCryptoNative_RsaDecryptRaw(
+    SecKeyRef privateKey, uint8_t* pbData, int32_t cbData, CFDataRef* pDecryptedOut, CFErrorRef* pErrorOut);
+
 /*
 Decrypt the contents of pbData using the provided privateKey under PKCS#1 padding.
 
diff --git a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_seckey.c b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_seckey.c
index 05147521f189..e717eeb92a59 100644
--- a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_seckey.c
+++ b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_seckey.c
@@ -72,9 +72,31 @@ int32_t AppleCryptoNative_SecKeyCopyExternalRepresentation(SecKeyRef pKey,
     assert(pErrorOut != NULL);
 
     *pErrorOut = NULL;
+    *ppDataOut = NULL;
+    int32_t ret = 0;
+    CFDictionaryRef attributes = SecKeyCopyAttributes(pKey);
 
-    *ppDataOut = SecKeyCopyExternalRepresentation(pKey, pErrorOut);
-    return *ppDataOut == NULL ? kErrorSeeError : 1;
+    if (attributes != NULL)
+    {
+        if (CFDictionaryGetValue(attributes, kSecAttrIsExtractable) == kCFBooleanFalse)
+        {
+            ret = kKeyIsNotExtractable;
+        }
+        else if (CFDictionaryGetValue(attributes, kSecAttrIsSensitive) == kCFBooleanTrue)
+        {
+            ret = kKeyIsSensitive;
+        }
+
+        CFRelease(attributes);
+    }
+
+    if (ret == 0)
+    {
+        *ppDataOut = SecKeyCopyExternalRepresentation(pKey, pErrorOut);
+        ret = *ppDataOut == NULL ? kErrorSeeError : 1;
+    }
+
+    return ret;
 }
 
 SecKeyRef AppleCryptoNative_SecKeyCopyPublicKey(SecKeyRef privateKey)
diff --git a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_seckey.h b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_seckey.h
index 083a9f9af5b9..97543db915c5 100644
--- a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_seckey.h
+++ b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_seckey.h
@@ -16,6 +16,8 @@ static const int32_t kErrorSeeError = -2;
 static const int32_t kErrorUnknownAlgorithm = -3;
 static const int32_t kErrorUnknownState = -4;
 static const int32_t kPlatformNotSupported = -5;
+static const int32_t kKeyIsSensitive = -6;
+static const int32_t kKeyIsNotExtractable = -7;
 
 enum
 {
diff --git a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_swiftbindings.h b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_swiftbindings.h
index 9fd0f1ea0ab3..fd90fd0ad821 100644
--- a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_swiftbindings.h
+++ b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_swiftbindings.h
@@ -6,58 +6,7 @@
 #include "pal_types.h"
 #include "pal_compiler.h"
 
-PALEXPORT int32_t AppleCryptoNative_ChaCha20Poly1305Encrypt(
-    uint8_t* keyPtr,
-    int32_t keyLength,
-    uint8_t* noncePtr,
-    int32_t nonceLength,
-    uint8_t* plaintextPtr,
-    int32_t plaintextLength,
-    uint8_t* ciphertextBuffer,
-    int32_t ciphertextBufferLength,
-    uint8_t* tagBuffer,
-    int32_t tagBufferLength,
-    uint8_t* aadPtr,
-    int32_t aadLength);
-
-PALEXPORT int32_t AppleCryptoNative_ChaCha20Poly1305Decrypt(
-    uint8_t* keyPtr,
-    int32_t keyLength,
-    uint8_t* noncePtr,
-    int32_t nonceLength,
-    uint8_t* ciphertextPtr,
-    int32_t ciphertextLength,
-    uint8_t* tagPtr,
-    int32_t tagLength,
-    uint8_t* plaintextBuffer,
-    int32_t plaintextBufferLength,
-    uint8_t* aadPtr,
-    int32_t aadLength);
-
-PALEXPORT int32_t AppleCryptoNative_AesGcmEncrypt(
-    uint8_t* keyPtr,
-    int32_t keyLength,
-    uint8_t* noncePtr,
-    int32_t nonceLength,
-    uint8_t* plaintextPtr,
-    int32_t plaintextLength,
-    uint8_t* ciphertextBuffer,
-    int32_t ciphertextBufferLength,
-    uint8_t* tagBuffer,
-    int32_t tagBufferLength,
-    uint8_t* aadPtr,
-    int32_t aadLength);
-
-PALEXPORT int32_t AppleCryptoNative_AesGcmDecrypt(
-    uint8_t* keyPtr,
-    int32_t keyLength,
-    uint8_t* noncePtr,
-    int32_t nonceLength,
-    uint8_t* ciphertextPtr,
-    int32_t ciphertextLength,
-    uint8_t* tagPtr,
-    int32_t tagLength,
-    uint8_t* plaintextBuffer,
-    int32_t plaintextBufferLength,
-    uint8_t* aadPtr,
-    int32_t aadLength);
+EXTERN_C void* AppleCryptoNative_ChaCha20Poly1305Encrypt;
+EXTERN_C void* AppleCryptoNative_ChaCha20Poly1305Decrypt;
+EXTERN_C void* AppleCryptoNative_AesGcmEncrypt;
+EXTERN_C void* AppleCryptoNative_AesGcmDecrypt;
diff --git a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_swiftbindings.swift b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_swiftbindings.swift
index 7b04d52504fa..2a50deefb52b 100644
--- a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_swiftbindings.swift
+++ b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_swiftbindings.swift
@@ -4,7 +4,7 @@
 import CryptoKit
 import Foundation
 
-@_cdecl("AppleCryptoNative_ChaCha20Poly1305Encrypt")
+@_silgen_name("AppleCryptoNative_ChaCha20Poly1305Encrypt")
 public func AppleCryptoNative_ChaCha20Poly1305Encrypt(
     keyPtr: UnsafeMutableRawPointer,
     keyLength: Int32,
@@ -41,7 +41,7 @@ public func AppleCryptoNative_ChaCha20Poly1305Encrypt(
     return 1
  }
 
-@_cdecl("AppleCryptoNative_ChaCha20Poly1305Decrypt")
+@_silgen_name("AppleCryptoNative_ChaCha20Poly1305Decrypt")
 public func AppleCryptoNative_ChaCha20Poly1305Decrypt(
     keyPtr: UnsafeMutableRawPointer,
     keyLength: Int32,
@@ -86,7 +86,7 @@ public func AppleCryptoNative_ChaCha20Poly1305Decrypt(
     }
 }
 
-@_cdecl("AppleCryptoNative_AesGcmEncrypt")
+@_silgen_name("AppleCryptoNative_AesGcmEncrypt")
 public func AppleCryptoNative_AesGcmEncrypt(
     keyPtr: UnsafeMutableRawPointer,
     keyLength: Int32,
@@ -123,7 +123,7 @@ public func AppleCryptoNative_AesGcmEncrypt(
     return 1
  }
 
-@_cdecl("AppleCryptoNative_AesGcmDecrypt")
+@_silgen_name("AppleCryptoNative_AesGcmDecrypt")
 public func AppleCryptoNative_AesGcmDecrypt(
     keyPtr: UnsafeMutableRawPointer,
     keyLength: Int32,
diff --git a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_x509.c b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_x509.c
index 8335c1c362c0..94e22f28c879 100644
--- a/src/native/libs/System.Security.Cryptography.Native.Apple/pal_x509.c
+++ b/src/native/libs/System.Security.Cryptography.Native.Apple/pal_x509.c
@@ -70,7 +70,6 @@ PAL_X509ContentType AppleCryptoNative_X509GetContentType(uint8_t* pbData, int32_
     // The sniffing order is:
     // * X509 DER
     // * PKCS7 PEM/DER
-    // * PKCS12 DER (or PEM if Apple has non-standard support for that)
     // * X509 PEM or PEM aggregate (or DER, but that already matched)
     //
     // If the X509 PEM check is done first SecItemImport will erroneously match
@@ -78,6 +77,11 @@ PAL_X509ContentType AppleCryptoNative_X509GetContentType(uint8_t* pbData, int32_
     //
     // Likewise, if the X509 DER check isn't done first, Apple will report it as
     // being a PKCS#7.
+    //
+    // This does not attempt to open a PFX / PKCS12 as Apple does not provide
+    // a suitable API to determine if it is PKCS12 without doing potentially
+    // unbound MAC / KDF work. Instead, let that return Unknown and let the managed
+    // decoding do the check.
     SecCertificateRef certref = SecCertificateCreateWithData(NULL, cfData);
 
     if (certref != NULL)
@@ -104,41 +108,6 @@ PAL_X509ContentType AppleCryptoNative_X509GetContentType(uint8_t* pbData, int32_
         }
     }
 
-    dataFormat = kSecFormatPKCS12;
-    actualFormat = dataFormat;
-    itemType = kSecItemTypeAggregate;
-    actualType = itemType;
-
-    osStatus = SecItemImport(cfData, NULL, &actualFormat, &actualType, 0, NULL, NULL, NULL);
-
-    if (osStatus == errSecPassphraseRequired)
-    {
-        dataFormat = kSecFormatPKCS12;
-        actualFormat = dataFormat;
-        itemType = kSecItemTypeAggregate;
-        actualType = itemType;
-
-        SecItemImportExportKeyParameters importParams;
-        memset(&importParams, 0, sizeof(SecItemImportExportKeyParameters));
-
-        importParams.version = SEC_KEY_IMPORT_EXPORT_PARAMS_VERSION;
-        importParams.passphrase = CFSTR("");
-
-        osStatus = SecItemImport(cfData, NULL, &actualFormat, &actualType, 0, &importParams, NULL, NULL);
-
-        CFRelease(importParams.passphrase);
-        importParams.passphrase = NULL;
-    }
-
-    if (osStatus == noErr || osStatus == errSecPkcs12VerifyFailure)
-    {
-        if (actualType == itemType && actualFormat == dataFormat)
-        {
-            CFRelease(cfData);
-            return PAL_Pkcs12;
-        }
-    }
-
     dataFormat = kSecFormatX509Cert;
     actualFormat = dataFormat;
     itemType = kSecItemTypeCertificate;
diff --git a/src/native/libs/System.Security.Cryptography.Native/apibridge.c b/src/native/libs/System.Security.Cryptography.Native/apibridge.c
index 5c8d05d17c74..bf1eb9d9ecb9 100644
--- a/src/native/libs/System.Security.Cryptography.Native/apibridge.c
+++ b/src/native/libs/System.Security.Cryptography.Native/apibridge.c
@@ -112,7 +112,7 @@ int32_t local_X509_get_version(const X509* x509)
 
 X509_PUBKEY* local_X509_get_X509_PUBKEY(const X509* x509)
 {
-    if (x509)
+    if (x509 && x509->cert_info)
     {
         return x509->cert_info->key;
     }
@@ -123,13 +123,28 @@ X509_PUBKEY* local_X509_get_X509_PUBKEY(const X509* x509)
 int32_t local_X509_PUBKEY_get0_param(
     ASN1_OBJECT** palgOid, const uint8_t** pkeyBytes, int* pkeyBytesLen, X509_ALGOR** palg, X509_PUBKEY* pubkey)
 {
+    if (!pubkey)
+    {
+        return 0;
+    }
+
     if (palgOid)
     {
+        if (!pubkey->algor)
+        {
+            return 0;
+        }
+
         *palgOid = pubkey->algor->algorithm;
     }
 
     if (pkeyBytes)
     {
+        if (!pubkey->public_key)
+        {
+            return 0;
+        }
+
         *pkeyBytes = pubkey->public_key->data;
         *pkeyBytesLen = pubkey->public_key->length;
     }
diff --git a/src/native/libs/System.Security.Cryptography.Native/openssl.c b/src/native/libs/System.Security.Cryptography.Native/openssl.c
index ba713b6fdcc2..9e57a8413ca3 100644
--- a/src/native/libs/System.Security.Cryptography.Native/openssl.c
+++ b/src/native/libs/System.Security.Cryptography.Native/openssl.c
@@ -668,8 +668,11 @@ BIO* CryptoNative_GetX509NameInfo(X509* x509, int32_t nameType, int32_t forIssue
                                 if (sizeof(szOidUpn) == cchLocalOid &&
                                     0 == strncmp(localOid, szOidUpn, sizeof(szOidUpn)))
                                 {
-                                    // OTHERNAME->ASN1_TYPE->union.field
-                                    str = value->value->value.asn1_string;
+                                    if (value->value)
+                                    {
+                                        // OTHERNAME->ASN1_TYPE->union.field
+                                        str = value->value->value.asn1_string;
+                                    }
                                 }
                             }
 
diff --git a/src/native/libs/System.Security.Cryptography.Native/opensslshim.c b/src/native/libs/System.Security.Cryptography.Native/opensslshim.c
index d21d62734101..cd3e5f46b87d 100644
--- a/src/native/libs/System.Security.Cryptography.Native/opensslshim.c
+++ b/src/native/libs/System.Security.Cryptography.Native/opensslshim.c
@@ -116,8 +116,21 @@ static void OpenLibraryOnce(void)
         DlOpen(MAKELIB("10"));
     }
 
-    // FreeBSD uses a different suffix numbering convention.
-    // Current supported FreeBSD releases should use the order .11 -> .111
+#ifdef __FreeBSD__
+    // The ports version of OpenSSL is used over base where possible
+    if (libssl == NULL)
+    {
+        // OpenSSL 3.0 from ports
+        DlOpen(MAKELIB("12"));
+    }
+
+    if (libssl == NULL)
+    {
+        // OpenSSL 3.0 from base as found in FreeBSD 14.0
+        DlOpen(MAKELIB("30"));
+    }
+
+    // Fallbacks for OpenSSL 1.1.x
     if (libssl == NULL)
     {
         DlOpen(MAKELIB("11"));
@@ -127,6 +140,8 @@ static void OpenLibraryOnce(void)
     {
         DlOpen(MAKELIB("111"));
     }
+#endif
+
 }
 
 static pthread_once_t g_openLibrary = PTHREAD_ONCE_INIT;
diff --git a/src/native/libs/System.Security.Cryptography.Native/pal_evp_mac.c b/src/native/libs/System.Security.Cryptography.Native/pal_evp_mac.c
index f3bce0a78909..38bac3258913 100644
--- a/src/native/libs/System.Security.Cryptography.Native/pal_evp_mac.c
+++ b/src/native/libs/System.Security.Cryptography.Native/pal_evp_mac.c
@@ -121,7 +121,7 @@ int32_t CryptoNative_EvpMacInit(EVP_MAC_CTX* ctx,
 
         size_t keyLengthT = Int32ToSizeT(keyLength);
 
-        OSSL_PARAM params[4] = { 0 };
+        OSSL_PARAM params[4] = {{0}};
         int i = 0;
         params[i++] = OSSL_PARAM_construct_octet_string(OSSL_MAC_PARAM_KEY, (void*) key, keyLengthT);
         params[i++] = OSSL_PARAM_construct_int32(OSSL_MAC_PARAM_XOF, &xof);
@@ -340,7 +340,7 @@ int32_t CryptoNative_EvpMacOneShot(EVP_MAC* mac,
         size_t dataLengthT = Int32ToSizeT(dataLength);
         size_t macLengthT = Int32ToSizeT(destinationLength);
 
-        OSSL_PARAM params[5] = { 0 };
+        OSSL_PARAM params[5] = {{0}};
         int i = 0;
 
         params[i++] = OSSL_PARAM_construct_octet_string(OSSL_MAC_PARAM_KEY, (void*)key, keyLengthT);
diff --git a/src/native/libs/System.Security.Cryptography.Native/pal_pkcs7.c b/src/native/libs/System.Security.Cryptography.Native/pal_pkcs7.c
index efb0a738966f..bc6c1215d632 100644
--- a/src/native/libs/System.Security.Cryptography.Native/pal_pkcs7.c
+++ b/src/native/libs/System.Security.Cryptography.Native/pal_pkcs7.c
@@ -53,9 +53,19 @@ int32_t CryptoNative_GetPkcs7Certificates(PKCS7* p7, X509Stack** certs)
     switch (OBJ_obj2nid(p7->type))
     {
         case NID_pkcs7_signed:
+            if (!p7->d.sign)
+            {
+                return 0;
+            }
+
             *certs = p7->d.sign->cert;
             return 1;
         case NID_pkcs7_signedAndEnveloped:
+            if (!p7->d.signed_and_enveloped)
+            {
+                return 0;
+            }
+
             *certs = p7->d.signed_and_enveloped->cert;
             return 1;
     }
diff --git a/src/native/managed/Directory.Build.props b/src/native/managed/Directory.Build.props
new file mode 100644
index 000000000000..a431ff1b38fd
--- /dev/null
+++ b/src/native/managed/Directory.Build.props
@@ -0,0 +1,4 @@
+<Project>
+    <Import Project="..\..\..\Directory.Build.props" />
+    <Import Project=".\native-library.props" Condition="'$(IsSourceProject)' == 'true' and '$(Language)' == 'C#'" />
+</Project>
diff --git a/src/native/managed/Directory.Build.targets b/src/native/managed/Directory.Build.targets
new file mode 100644
index 000000000000..40dd9aec1ccb
--- /dev/null
+++ b/src/native/managed/Directory.Build.targets
@@ -0,0 +1,4 @@
+<Project>
+    <Import Project=".\native-library.targets" Condition="'$(IsSourceProject)' == 'true' and '$(Language)' == 'C#'" />
+    <Import Project="..\..\..\Directory.Build.targets" />
+</Project>
diff --git a/src/native/managed/README.md b/src/native/managed/README.md
new file mode 100644
index 000000000000..047d9fcc327d
--- /dev/null
+++ b/src/native/managed/README.md
@@ -0,0 +1,33 @@
+# Native runtime component libraries using NativeAOT
+
+This directory contains managed libraries that will be compiled using NativeAOT and can be used in runtime components.
+
+## Adding a new managed library
+
+Add a new subdirectory to `src/native/managed` for your library with a `src`, `inc` and `test` subdirectories:
+
+``` console
+$ mkdir -p libMyNewLibrary/src libMyNewLibrary/inc libMyNewLibrary/test
+$ dotnet new classlib -n libMyNewLibrary -o libMyNewLibrary/src
+```
+
+In `src/native/managed/compile-native.proj`, add
+`src/native/managed/libMyNewLibrary/src/libMyNewLibrary.csproj` to the `NativeLibsProjectsToBuild`
+item group.
+
+In `src/native/managed/libMyNewLibrary/src/libMyNewLibrary.csproj`:
+1. Define an item `@(InstallRuntimeComponentDestination)` that has directory names relative to `artifacts/bin/<runtimeFlavor>/<os.arch.config>/` where the shared library should be installed.  It's a good idea to have at least `.`:
+    ```xml
+      <ItemGroup>
+          <InstallRuntimeComponentDestination Include="." />
+          <InstallRuntimeComponentDestination Include="sharedFramework" Condition="'$(RuntimeFlavor)' == 'coreclr'"/>
+      </ItemGroup>
+    ```
+
+Limitations:
+
+* The project should be called `libXXXX` - currently the infrastructure expects a `lib` prefix on all platforms.
+
+* Currently only shared library output is supported.  In principle static linking is possible, but the
+infrastructure is not finished yet.  Additionally, mixing Debug/Release configurations with static
+linking will not be supported on Windows.
diff --git a/src/native/managed/cdacreader/cmake/CMakeLists.txt b/src/native/managed/cdacreader/cmake/CMakeLists.txt
new file mode 100644
index 000000000000..2a7459c37b85
--- /dev/null
+++ b/src/native/managed/cdacreader/cmake/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_library(cdacreader_api INTERFACE)
+target_include_directories(cdacreader_api INTERFACE ${CLR_SRC_NATIVE_DIR}/managed/cdacreader/inc)
diff --git a/src/native/managed/cdacreader/inc/cdac_reader.h b/src/native/managed/cdacreader/inc/cdac_reader.h
new file mode 100644
index 000000000000..b6c71b671a6e
--- /dev/null
+++ b/src/native/managed/cdacreader/inc/cdac_reader.h
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef CDAC_READER_H
+#define CDAC_READER_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+int cdac_reader_init(intptr_t descriptor, intptr_t* handle);
+int cdac_reader_free(intptr_t handle);
+int cdac_reader_get_sos_interface(intptr_t handle, IUnknown** obj);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // CDAC_READER_H
diff --git a/src/native/managed/cdacreader/src/Entrypoints.cs b/src/native/managed/cdacreader/src/Entrypoints.cs
new file mode 100644
index 000000000000..a65ba9c5fa5e
--- /dev/null
+++ b/src/native/managed/cdacreader/src/Entrypoints.cs
@@ -0,0 +1,50 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.InteropServices;
+using System.Runtime.InteropServices.Marshalling;
+
+namespace Microsoft.Diagnostics.DataContractReader;
+
+internal static class Entrypoints
+{
+    private const string CDAC = "cdac_reader_";
+
+    [UnmanagedCallersOnly(EntryPoint = $"{CDAC}init")]
+    private static unsafe int Init(nint descriptor, IntPtr* handle)
+    {
+        Target target = new(descriptor);
+        GCHandle gcHandle = GCHandle.Alloc(target);
+        *handle = GCHandle.ToIntPtr(gcHandle);
+        return 0;
+    }
+
+    [UnmanagedCallersOnly(EntryPoint = $"{CDAC}free")]
+    private static unsafe int Free(IntPtr handle)
+    {
+        GCHandle h = GCHandle.FromIntPtr(handle);
+        h.Free();
+        return 0;
+    }
+
+    /// <summary>
+    /// Get the SOS-DAC interface implementation.
+    /// </summary>
+    /// <param name="handle">Handle crated via cdac initialization</param>
+    /// <param name="obj"><c>IUnknown</c> pointer that can be queried for ISOSDacInterface*</param>
+    /// <returns></returns>
+    [UnmanagedCallersOnly(EntryPoint = $"{CDAC}get_sos_interface")]
+    private static unsafe int GetSOSInterface(IntPtr handle, nint* obj)
+    {
+        ComWrappers cw = new StrategyBasedComWrappers();
+        Target? target = GCHandle.FromIntPtr(handle).Target as Target;
+        if (target == null)
+            return -1;
+
+        SOSDacImpl impl = new(target);
+        nint ptr = cw.GetOrCreateComInterfaceForObject(impl, CreateComInterfaceFlags.None);
+        *obj = ptr;
+        return 0;
+    }
+}
diff --git a/src/native/managed/cdacreader/src/SOSDacImpl.cs b/src/native/managed/cdacreader/src/SOSDacImpl.cs
new file mode 100644
index 000000000000..893c39bff883
--- /dev/null
+++ b/src/native/managed/cdacreader/src/SOSDacImpl.cs
@@ -0,0 +1,36 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.InteropServices;
+using System.Runtime.InteropServices.Marshalling;
+
+namespace Microsoft.Diagnostics.DataContractReader;
+
+[GeneratedComInterface]
+[Guid("4eca42d8-7e7b-4c8a-a116-7bfbf6929267")]
+internal partial interface ISOSDacInterface9
+{
+    int GetBreakingChangeVersion();
+}
+
+/// <summary>
+/// Implementation of ISOSDacInterface* interfaces intended to be passed out to consumers
+/// interacting with the DAC via those COM interfaces.
+/// </summary>
+[GeneratedComClass]
+internal sealed partial class SOSDacImpl : ISOSDacInterface9
+{
+    private readonly Target _target;
+
+    public SOSDacImpl(Target target)
+    {
+        _target = target;
+    }
+
+    public int GetBreakingChangeVersion()
+    {
+        // TODO: Return non-hard-coded version
+        return 4;
+    }
+}
diff --git a/src/tests/Interop/PInvoke/Primitives/Pointer/NonBlittablePointerNative.cpp b/src/native/managed/cdacreader/src/Target.cs
similarity index 54%
rename from src/tests/Interop/PInvoke/Primitives/Pointer/NonBlittablePointerNative.cpp
rename to src/native/managed/cdacreader/src/Target.cs
index a2b90f5bfcf9..1590984f017c 100644
--- a/src/tests/Interop/PInvoke/Primitives/Pointer/NonBlittablePointerNative.cpp
+++ b/src/native/managed/cdacreader/src/Target.cs
@@ -1,9 +1,11 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-#include <xplatform.h>
+namespace Microsoft.Diagnostics.DataContractReader;
 
-extern "C" DLL_EXPORT void STDMETHODCALLTYPE Negate(bool* ptr)
+internal sealed class Target
 {
-    *ptr = !*ptr;
+    public Target(nint _)
+    {
+    }
 }
diff --git a/src/native/managed/cdacreader/src/cdacreader.csproj b/src/native/managed/cdacreader/src/cdacreader.csproj
new file mode 100644
index 000000000000..51f87fa8908d
--- /dev/null
+++ b/src/native/managed/cdacreader/src/cdacreader.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>$(NetCoreAppToolCurrent)</TargetFramework>
+    <Nullable>enable</Nullable>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <!-- Do not produce a public package. This ships as part of the runtime -->
+    <IsShippingPackage>false</IsShippingPackage>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <InstallRuntimeComponentDestination Include="." />
+    <!-- TODO: [cdac] Output to sharedFramework and add PlatformManifestFileEntry for Microsoft.NETCore.App once ready to include in shipping package -->
+    <!-- <InstallRuntimeComponentDestination Include="sharedFramework" Condition="'$(RuntimeFlavor)' == 'coreclr'"/> -->
+  </ItemGroup>
+
+</Project>
diff --git a/src/native/managed/compile-native.proj b/src/native/managed/compile-native.proj
new file mode 100644
index 000000000000..b9815ae30e48
--- /dev/null
+++ b/src/native/managed/compile-native.proj
@@ -0,0 +1,63 @@
+<Project Sdk="Microsoft.Build.Traversal" DefaultTargets="Publish">
+    <PropertyGroup>
+        <!-- We always want to use release for publishing using NativeAOT -->
+        <NativeLibsPublishConfiguration>Release</NativeLibsPublishConfiguration>
+	<!-- we always want to make shared libs -->
+        <NativeLibKind Condition="'$(NativeLibKind)' == ''">shared</NativeLibKind>
+
+	<!-- When we publish, we want to ensure the SDK does the same thing as though we ran 'dotnet publish' -->
+	<TraversalPublishGlobalProperties>$(TraversalPublishGlobalProperties);_IsPublishing=true</TraversalPublishGlobalProperties>
+    </PropertyGroup>
+
+    <ItemGroup>
+        <!-- add new projects here -->
+        <!-- NativeLibsProjectsToBuild Include="$(MSBuildThisFileDirectory)libhellomanaged/src/libhellomanaged.csproj" -->
+        <NativeLibsProjectsToBuild Include="$(MSBuildThisFileDirectory)cdacreader/src/cdacreader.csproj" />
+    </ItemGroup>
+
+    <!-- Decide if we're going to do the NativeAOT builds -->
+    <PropertyGroup>
+        <!-- disable on Mono, for now -->
+        <SupportsNativeAotComponents Condition="'$(SupportsNativeAotComponents)' == '' and '$(RuntimeFlavor)' == 'Mono'">false</SupportsNativeAotComponents>
+        <!-- disable on linux-bionic, for now -->
+        <SupportsNativeAotComponents Condition="'$(SupportsNativeAotComponents)' == '' and '$(TargetsLinuxBionic)' == 'true'">false</SupportsNativeAotComponents>
+        <!-- NativeAOT doesn't support cross-OS compilation. disable for crossdac-->
+        <SupportsNativeAotComponents Condition="'$(SupportsNativeAotComponents)' == '' and '$(HostOS)' != '$(TargetOS)'">false</SupportsNativeAotComponents>
+        <!-- unsupported targets -->
+        <SupportsNativeAotComponents Condition="'$(SupportsNativeAotComponents)' == '' and ('$(TargetArchitecture)' == 'arm' or '$(TargetArchitecture)' == 'armel' or '$(TargetArchitecture)' == 'x86' or '$(TargetArchitecture)' == 'riscv64')">false</SupportsNativeAotComponents>
+        <SupportsNativeAotComponents Condition="'$(SupportsNativeAotComponents)' == '' and ('$(TargetsWindows)' == 'true' or '$(TargetsOSX)' == 'true' or ('$(TargetsLinux)' == 'true' and '$(TargetsAndroid)' != 'true' and '$(TargetsLinuxMusl)' != 'true'))">true</SupportsNativeAotComponents>
+        <SupportsNativeAotComponents Condition="'$(SupportsNativeAotComponents)' == ''">false</SupportsNativeAotComponents>
+    </PropertyGroup>
+
+    <!-- some special kinds of runtime builds need extra NativeAOT flags -->
+    <PropertyGroup>
+        <SysRoot Condition="'$(CrossBuild)' == 'true' and '$(HostOS)' != 'windows'">$(ROOTFS_DIR)</SysRoot>
+        <LinkerFlavor Condition="'$(CrossBuild)' == 'true' and '$(TargetsLinux)' == 'true'">lld</LinkerFlavor>
+        <CustomLinkerArgToolchainArg Condition="'$(CrossBuild)' == 'true' and '$(_hostArchitecture)' == '$(_targetArchitecture)' and '$(_hostOS)' != 'windows'">--gcc-toolchain=$(ROOTFS_DIR)/usr</CustomLinkerArgToolchainArg>
+    </PropertyGroup>
+
+    <!-- properties to pass down to the subproject builds -->
+    <ItemGroup>
+        <SubprojectProps Include="Configuration" Value="$(NativeLibsPublishConfiguration)" />
+        <SubprojectProps Include="RuntimeConfiguration" Value="$(RuntimeConfiguration)" />
+        <SubprojectProps Include="LibrariesConfiguration" Value="$(LibrariesConfiguration)" />
+        <SubprojectProps Include="RuntimeIdentifier" Value="$(OutputRID)" />
+
+        <SubprojectProps Include="NativeLib" Value="$(NativeLibKind)" />
+
+        <SubprojectProps Condition="'$(SysRoot)' != ''" Include="SysRoot" Value="$(SysRoot)" />
+        <SubprojectProps Condition="'$(LinkerFlavor)' != ''" Include="LinkerFlavor" Value="$(LinkerFlavor)" />
+        <SubprojectProps Condition="'$(CustomLinkerArgToolchainArg)' != ''" Include="CustomLinkerArgToolchainArg" Value="$(CustomLinkerArgToolchainArg)" />
+    </ItemGroup>
+
+    <PropertyGroup>
+        <SplitSubprojectProps>@(SubprojectProps->'%(Identity)=%(Value)', ';')</SplitSubprojectProps>
+    </PropertyGroup>
+
+    <ItemGroup>
+        <ProjectReference Include="@(NativeLibsProjectsToBuild)"
+                          ReferenceOutputAssembly="false"
+                          AdditionalProperties="%(AdditionalProperties);$(SplitSubprojectProps)"
+			  Condition="$(SupportsNativeAotComponents)"/>
+    </ItemGroup>
+</Project>
diff --git a/src/native/managed/native-library.props b/src/native/managed/native-library.props
new file mode 100644
index 000000000000..89a80a8005a0
--- /dev/null
+++ b/src/native/managed/native-library.props
@@ -0,0 +1,35 @@
+<Project>
+  <PropertyGroup>
+    <PublishAot>true</PublishAot>
+    <SelfContained>true</SelfContained>
+    <!-- Don't strip symbols NativeAOT's default way. We will strip and save the symbols ourselves,
+         the same way as eng/native/functions.cmake strip_symbols
+    -->
+    <StripSymbols>false</StripSymbols>
+  </PropertyGroup>
+
+  <!-- set the shared library name.  this helps the native linker correctly reference this shared
+       library in dependents -->
+  <PropertyGroup>
+    <!-- in net9.0 we can do this, but only on mobile apple platforms, not OSX -->
+    <SharedLibraryInstallName>@rpath/$(MSBuildProjectName).dylib</SharedLibraryInstallName>
+  </PropertyGroup>
+  <ItemGroup Condition="'$(TargetsOSX)' == 'true'">
+      <LinkerArg Include="-Wl,-install_name,@rpath/$(MSBuildProjectName).dylib" />
+  </ItemGroup>
+  <ItemGroup Condition="'$(TargetsUnix)' == 'true' and ! ('$(TargetsAppleMobile)' == 'true' or '$(TargetsOSX)' == 'true')">
+      <!-- If there is no soname, ld on Linux and some other Unixes will embed the full (build-time!)
+           path to this shared library into any binary that links against it. We don't want that. -->
+      <LinkerArg Include="-Wl,-soname=$(MSBuildProjectName).so" />
+  </ItemGroup>
+
+  <PropertyGroup>
+      <!-- if IsRuntimeComponent is true, we will put the native library into the specified locations under `artifacts/bin/$(RuntimeFlavor)/os.arch.config/` -->
+      <IsRuntimeComponent Condition="'$(IsRuntimeComponent)' == ''">true</IsRuntimeComponent>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <!-- passed by compile-native.proj to set - -gcc-toolchain=$(ROOTFS_DIR)/usr -->
+    <CustomLinkerArg Condition="'$(CustomLinkerArgToolchainArg)' != ''" Include="$(CustomLinkerArgToolchainArg)" />
+  </ItemGroup>
+</Project>
diff --git a/src/native/managed/native-library.targets b/src/native/managed/native-library.targets
new file mode 100644
index 000000000000..e0f23322bba4
--- /dev/null
+++ b/src/native/managed/native-library.targets
@@ -0,0 +1,107 @@
+<Project>
+
+    <!-- strip the library the same way as the cmake build for coreclr does it:
+         - on mac, leave a .dylib.dwarf file next to the library.
+         - on linux leave a .so.dbg file next to to the library
+    -->
+    <Target Name="StripLibraryLikeCoreCLRSetupPaths"
+            DependsOnTargets="CopyNativeBinary"
+            Condition="'$(IsRuntimeComponent)' == 'true' and '$(TargetsWindows)' != 'true'">
+        <PropertyGroup>
+            <StrippedOutputPath>$(OutputPath)stripped\</StrippedOutputPath>
+            <StripSourceFile>$(StrippedOutputPath)$(TargetName)$(NativeBinaryExt)</StripSourceFile>
+            <StrippedExt Condition="'$(StrippedExt)' == '' and ('$(TargetsOSX)' == 'true' or '$(TargetsAppleMobile)' == 'true')">.dylib.dwarf</StrippedExt>
+            <StrippedExt Condition="'$(TargetsUnix)' == 'true' and '$(StrippedExt)' == ''">.so.dbg</StrippedExt>
+            <StripDestinationFile>$(StrippedOutputPath)$(TargetName)$(StrippedExt)</StripDestinationFile>
+        </PropertyGroup>
+    </Target>
+
+    <!--
+	Hack: temporarily turn on StripSymbols whlie SetupOSSpecificProps runs, then turn it off
+	again before LinkNative runs.  The problem is that SetupOSSpecificProps only probes for
+	$(ObjCopyName) and $(ObjCopyNameAlternative) if symbol stripping is turned on.  But we don't
+	want LinkNative to actually do any stripping since we have our own way that we'd like it to
+	work.
+    -->
+    <Target Name="TempStripSymbolsOn"
+	    BeforeTargets="SetupOSSpecificProps">
+	<PropertyGroup>
+	    <StripSymbols>true</StripSymbols>
+	</PropertyGroup>
+    </Target>
+
+    <Target Name="TempStripSymbolsOff"
+	    AfterTargets="SetupOSSpecificProps"
+	    BeforeTargets="LinkNative">
+	<PropertyGroup>
+	    <StripSymbols>false</StripSymbols>
+	</PropertyGroup>
+    </Target>
+
+    <Target Name="StripLibraryLikeCoreCLRBuild"
+            DependsOnTargets="SetupOSSpecificProps;CopyNativeBinary;StripLibraryLikeCoreCLRSetupPaths"
+            Condition="'$(IsRuntimeComponent)' == 'true' and '$(TargetsWindows)' != 'true'"
+            Inputs="$(PublishDir)$(TargetName)$(NativeBinaryExt)"
+            Outputs="$(StripSourceFile);$(StripDestinationFile)">
+        <Error Text="Do not set StripSymbols to true - runtime components stripping is controlled by native-library.targets" Condition="'$(StripSymbols)' == 'true'" />
+
+        <Message Importance="Normal" Text="Stripping $(PublishDir)$(TargetName)$(NativeBinaryExt) into $(StripSourceFile) and $(StripDestinationFile)" />
+
+        <!-- copy from the published/ subfolder to the stripped/ subfolder -->
+        <Copy SourceFiles="$(PublishDir)$(TargetName)$(NativeBinaryExt)"
+              DestinationFolder="$(StrippedOutputPath)"
+              SkipUnchangedFiles="true" />
+
+        <PropertyGroup>
+            <_StripLike Condition="'$(TargetsOSX)' == 'true' or '$(TargetsAppleMobile)' == 'true'">apple</_StripLike>
+            <_StripLike Condition="'$(_StripLike)' == ''">gnu</_StripLike>
+        </PropertyGroup>
+
+        <Exec Command="dsymutil --flat $(LikeCoreCLRDSymUtilMinimizeOpt) $(StripSourceFile)" Condition="'$(_StripLike)' == 'apple'"/> <!-- produces the .dylib.dwarf file -->
+        <Exec Command="strip -no_code_signature_warning -S $(StripSourceFile)" Condition="'$(_StripLike)' == 'apple'"/>
+        <!-- runtime build runs "codesign -f -s - libWhatever.dylib" in release configurations -->
+        <Exec Command="codesign -f -s - $(StripSourceFile)" Condition="'$(RuntimeConfiguration)' == 'Release' and '$(_StripLike)' == 'apple'" />
+
+        <Exec Command="$(ObjCopyName) --only-keep-debug $(StripSourceFile) $(StripDestinationFile)" Condition="'$(_StripLike)' == 'gnu'"/>
+        <Exec Command="$(ObjCopyName) --strip-debug --strip-unneeded $(StripSourceFile)" Condition="'$(_StripLike)' == 'gnu'"/>
+        <Exec Command="$(ObjCopyName) --add-gnu-debuglink=$(StripDestinationFile) $(StripSourceFile)" Condition="'$(_StripLike)' == 'gnu'"/>
+    </Target>
+
+    <Target Name="InstallRuntimeComponentToFinalDestination"
+            AfterTargets="CopyNativeBinary"
+            DependsOnTargets="CopyNativeBinary;StripLibraryLikeCoreCLRBuild" Condition="'$(IsRuntimeComponent)' == 'true'">
+        <Error Text="Set at least one @InstallRuntimeComponentDestination item" Condition="@(InstallRuntimeComponentDestination->Count()) == 0" />
+
+        <PropertyGroup>
+            <!-- FIXME: this is the same as CoreCLRToolPath - but that doesn't seem like a good name -->
+            <FinalRuntimeComponentDestinationBase>$([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', '$(RuntimeFlavor.ToLower())', '$(TargetOS).$(TargetArchitecture).$(RuntimeConfiguration)'))</FinalRuntimeComponentDestinationBase>
+        </PropertyGroup>
+
+        <ItemGroup>
+            <_NormalizedInstallRuntimeComponentDest Include="$([MSBuild]::NormalizeDirectory('$(FinalRuntimeComponentDestinationBase)', '%(InstallRuntimeComponentDestination.Identity)'))" />
+        </ItemGroup>
+
+        <ItemGroup Condition="'$(TargetsWindows)' != 'true'">
+            <CopyFinalFiles Include="$(StripSourceFile)" />
+            <CopyFinalFiles Include="$(StripDestinationFile)" />
+        </ItemGroup>
+
+        <ItemGroup Condition="'$(TargetsWindows)' == 'true'">
+            <CopyFinalFiles Include="$(PublishDir)$(TargetName)$(NativeBinaryExt)" />
+            <CopyFinalFilesPDB Include="$(PublishDir)$(TargetName).pdb" />
+        </ItemGroup>
+
+        <Message Importance="Normal" Text="Installing @(CopyFinalFiles) into %(_NormalizedInstallRuntimeComponentDest.Identity)"/>
+        <Message Importance="Normal" Text="Installing @(CopyFinalFilesPDB) into %(_NormalizedInstallRuntimeComponentDest.Identity)PDB\" Condition="'$(TargetsWindows)' == 'true'"/>
+
+        <Copy SourceFiles="@(CopyFinalFiles)"
+              DestinationFolder="%(_NormalizedInstallRuntimeComponentDest.Identity)"
+              SkipUnchangedFiles="true"/>
+        <Copy SourceFiles="@(CopyFinalFilesPDB)"
+              DestinationFolder="%(_NormalizedInstallRuntimeComponentDest.Identity)PDB\"
+              SkipUnchangedFiles="true"
+              Condition="'$(TargetsWindows)' == 'true'"/>
+    </Target>
+
+
+</Project>
diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c
index aeb7686dbec7..89b30724aafd 100644
--- a/src/native/minipal/cpufeatures.c
+++ b/src/native/minipal/cpufeatures.c
@@ -277,6 +277,28 @@ int minipal_getcpufeatures(void)
                                             {
                                                 result |= XArchIntrinsicConstants_AvxVnni;
                                             }
+
+                                            if ((cpuidInfo[CPUID_EDX] & (1 << 19)) != 0)                                // Avx10
+                                            {
+                                                __cpuidex(cpuidInfo, 0x00000024, 0x00000000);
+                                                if((cpuidInfo[CPUID_EBX] & 0xFF) >= 1)                                  // Avx10v1 - CPUID.(EAX=24H, ECX=00H):EBX[7:0] >= 1
+                                                {
+                                                    if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0)
+                                                    {
+                                                        result |= XArchIntrinsicConstants_Avx10v1;
+                                                    }
+
+                                                    if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0)
+                                                    {
+                                                        result |= XArchIntrinsicConstants_Avx10v1_V256;
+                                                    }
+
+                                                    if ((cpuidInfo[CPUID_EBX] & (1 << 18)) != 0)
+                                                    {
+                                                        result |= XArchIntrinsicConstants_Avx10v1_V512;
+                                                    }
+                                                }
+                                            }
                                         }
                                     }
                                 }
diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h
index 3b8d0de8aa86..a5a803e5d288 100644
--- a/src/native/minipal/cpufeatures.h
+++ b/src/native/minipal/cpufeatures.h
@@ -40,6 +40,9 @@ enum XArchIntrinsicConstants
     XArchIntrinsicConstants_VectorT128 = 0x4000000,
     XArchIntrinsicConstants_VectorT256 = 0x8000000,
     XArchIntrinsicConstants_VectorT512 = 0x10000000,
+    XArchIntrinsicConstants_Avx10v1 = 0x20000000,
+    XArchIntrinsicConstants_Avx10v1_V256 = 0x40000000,
+    XArchIntrinsicConstants_Avx10v1_V512 = 0x80000000,
 };
 #endif // HOST_X86 || HOST_AMD64
 
diff --git a/src/native/minipal/entrypoints.h b/src/native/minipal/entrypoints.h
index d7908764e05e..5ef45699cacd 100644
--- a/src/native/minipal/entrypoints.h
+++ b/src/native/minipal/entrypoints.h
@@ -14,9 +14,9 @@ typedef struct
     const void* method;
 } Entry;
 
-// expands to:      {"impl", (void*)impl},
+// expands to:      {"impl", (void*)&impl},
 #define DllImportEntry(impl) \
-    {#impl, (void*)impl},
+    {#impl, (void*)&impl},
 
 static const void* minipal_resolve_dllimport(const Entry* resolutionTable, size_t tableLength, const char* name)
 {
diff --git a/src/native/minipal/utils.h b/src/native/minipal/utils.h
index 644ed21f2714..ef840a529f48 100644
--- a/src/native/minipal/utils.h
+++ b/src/native/minipal/utils.h
@@ -13,6 +13,25 @@
 #define __has_builtin(x) 0
 #endif
 
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#ifdef __cplusplus
+#  ifndef __has_cpp_attribute
+#    define __has_cpp_attribute(x) 0
+#  endif
+#  if __has_cpp_attribute(fallthrough)
+#    define FALLTHROUGH [[fallthrough]]
+#  else
+#    define FALLTHROUGH
+#  endif
+#elif __has_attribute(fallthrough)
+#  define FALLTHROUGH __attribute__((fallthrough))
+#else
+#  define FALLTHROUGH
+#endif
+
 #if defined(_MSC_VER)
 #  if defined(__SANITIZE_ADDRESS__)
 #    define HAS_ADDRESS_SANITIZER
diff --git a/src/tasks/AndroidAppBuilder/ApkBuilder.cs b/src/tasks/AndroidAppBuilder/ApkBuilder.cs
index 9b2a4e364566..d947c2f88a29 100644
--- a/src/tasks/AndroidAppBuilder/ApkBuilder.cs
+++ b/src/tasks/AndroidAppBuilder/ApkBuilder.cs
@@ -206,7 +206,7 @@ public ApkBuilder(TaskLoggingHelper logger)
                 // also, aapt is not happy about zip files
                 return false;
             }
-            if (fileName.StartsWith("."))
+            if (fileName.StartsWith('.'))
             {
                 // aapt complains on such files
                 return false;
diff --git a/src/tasks/Common/FileCache.cs b/src/tasks/Common/FileCache.cs
index 39ecd5e70ab2..05d92bdbc2c8 100644
--- a/src/tasks/Common/FileCache.cs
+++ b/src/tasks/Common/FileCache.cs
@@ -33,9 +33,7 @@ public FileCache(string? cacheFilePath, TaskLoggingHelper log)
         Enabled = true;
         if (File.Exists(cacheFilePath))
         {
-            _oldCache = (CompilerCache?)JsonSerializer.Deserialize(File.ReadAllText(cacheFilePath),
-                                                                    typeof(CompilerCache),
-                                                                    s_jsonOptions);
+            _oldCache = JsonSerializer.Deserialize<CompilerCache>(File.ReadAllText(cacheFilePath), s_jsonOptions);
         }
 
         _oldCache ??= new();
diff --git a/src/tasks/Crossgen2Tasks/Crossgen2Tasks.csproj b/src/tasks/Crossgen2Tasks/Crossgen2Tasks.csproj
index 8554022304cd..0def5b7ce873 100644
--- a/src/tasks/Crossgen2Tasks/Crossgen2Tasks.csproj
+++ b/src/tasks/Crossgen2Tasks/Crossgen2Tasks.csproj
@@ -4,9 +4,6 @@
     <OutputType>Library</OutputType>
     <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
     <NoWarn>$(NoWarn),CA1050</NoWarn>
-
-    <!-- Ignore nullable warnings on net4* -->
-    <NoWarn Condition="$(TargetFramework.StartsWith('net4'))">$(NoWarn),CS8604,CS8602</NoWarn>
   </PropertyGroup>
   <ItemGroup>
     <PackageReference Include="Microsoft.Build.Tasks.Core" Version="$(MicrosoftBuildTasksCoreVersion)" />
diff --git a/src/tasks/Crossgen2Tasks/ResolveReadyToRunCompilers.cs b/src/tasks/Crossgen2Tasks/ResolveReadyToRunCompilers.cs
index ced9becf153a..843333ff3fcb 100644
--- a/src/tasks/Crossgen2Tasks/ResolveReadyToRunCompilers.cs
+++ b/src/tasks/Crossgen2Tasks/ResolveReadyToRunCompilers.cs
@@ -230,6 +230,9 @@ private static bool ExtractTargetPlatformAndArchitecture(string runtimeIdentifie
                 case "x86":
                     architecture = Architecture.X86;
                     break;
+                case "riscv64":
+                    architecture = Architecture.RiscV64;
+                    break;
                 default:
                     return false;
             }
@@ -387,6 +390,7 @@ private static string ArchitectureToString(Architecture architecture)
                 Architecture.X64 => "x64",
                 Architecture.Arm => "arm",
                 Architecture.Arm64 => "arm64",
+                Architecture.RiscV64 => "riscv64",
                 _ => null
             };
         }
diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonData.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonData.cs
index 97bd05110bfb..22edc2c68fbe 100644
--- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonData.cs
+++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/BootJsonData.cs
@@ -108,9 +108,14 @@ public class BootJsonData
     public object diagnosticTracing { get; set; }
 
     /// <summary>
-    /// Gets or sets pthread pool size.
+    /// Gets or sets pthread pool initial size.
     /// </summary>
-    public int? pthreadPoolSize { get; set; }
+    public int? pthreadPoolInitialSize { get; set; }
+
+    /// <summary>
+    /// Gets or sets pthread pool unused size.
+    /// </summary>
+    public int? pthreadPoolUnusedSize { get; set; }
 }
 
 public class ResourcesData
diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ComputeWasmPublishAssets.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ComputeWasmPublishAssets.cs
index 429addedfd55..64b7b99c930a 100644
--- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ComputeWasmPublishAssets.cs
+++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ComputeWasmPublishAssets.cs
@@ -313,6 +313,7 @@ private List<ITaskItem> ProcessSymbolAssets(
     {
         var symbolStaticWebAssets = new List<ITaskItem>();
         var updateMap = new Dictionary<string, ITaskItem>();
+        var existingToRemove = new Dictionary<string, ITaskItem>();
 
         foreach (var kvp in symbolAssets)
         {
@@ -339,9 +340,14 @@ private List<ITaskItem> ProcessSymbolAssets(
                     resolvedPublishFilesToRemove.Remove(existing.ItemSpec);
                 }
             }
+            else
+            {
+                Log.LogMessage(MessageImportance.Low, "Marking '{0}' as removed for filtering compressed assets.", kvp.Key);
+                existingToRemove.Add(kvp.Key, kvp.Value);
+            }
         }
 
-        var compressedFiles = ProcessCompressedAssets(compressedRepresentations, symbolAssets, updateMap);
+        var compressedFiles = ProcessCompressedAssets(compressedRepresentations, symbolAssets, updateMap, existingToRemove);
 
         foreach (var file in compressedFiles)
         {
@@ -460,7 +466,8 @@ private List<ITaskItem> ComputeUpdatedAssemblies(
     private List<ITaskItem> ProcessCompressedAssets(
         Dictionary<string, ITaskItem> compressedRepresentations,
         Dictionary<string, ITaskItem> assetsToUpdate,
-        Dictionary<string, ITaskItem> updatedAssets)
+        Dictionary<string, ITaskItem> updatedAssets,
+        Dictionary<string, ITaskItem> existingToRemove = null)
     {
         var processed = new List<string>();
         var runtimeAssetsToUpdate = new List<ITaskItem>();
@@ -470,7 +477,11 @@ private List<ITaskItem> ProcessCompressedAssets(
             var relatedAsset = compressedAsset.GetMetadata("RelatedAsset");
             if (assetsToUpdate.ContainsKey(relatedAsset))
             {
-                if (!updatedAssets.ContainsKey(relatedAsset))
+                if (existingToRemove?.ContainsKey(relatedAsset) == true)
+                {
+                    Log.LogMessage(MessageImportance.Low, "Removing compressed '{0}' because related '{1}' is not published.", compressedAsset.ItemSpec, relatedAsset);
+                }
+                else if (!updatedAssets.ContainsKey(relatedAsset))
                 {
                     Log.LogMessage(MessageImportance.Low, "Related assembly for '{0}' was not updated and the compressed asset can be reused.", relatedAsset);
                     var newCompressedAsset = new TaskItem(compressedAsset);
diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/GenerateWasmBootJson.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/GenerateWasmBootJson.cs
index ef42b6fa952f..d5dc83e4252d 100644
--- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/GenerateWasmBootJson.cs
+++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/GenerateWasmBootJson.cs
@@ -71,6 +71,8 @@ public class GenerateWasmBootJson : Task
 
     public ITaskItem[] LazyLoadedAssemblies { get; set; }
 
+    public bool IsPublish { get; set; }
+
     public override bool Execute()
     {
         using var fileStream = File.Create(OutputPath);
@@ -101,7 +103,6 @@ public void WriteBootJson(Stream output, string entryAssemblyName)
 
         if (IsTargeting80OrLater())
         {
-            result.debugLevel = ParseOptionalInt(DebugLevel) ?? (DebugBuild ? 1 : 0);
             result.mainAssemblyName = entryAssemblyName;
             result.globalizationMode = GetGlobalizationMode().ToString().ToLowerInvariant();
 
@@ -329,6 +330,20 @@ public void WriteBootJson(Stream output, string entryAssemblyName)
             }
         }
 
+        if (IsTargeting80OrLater())
+        {
+            int? debugLevel = ParseOptionalInt(DebugLevel);
+
+            // If user didn't give us a value, check if we have any PDB.
+            if (debugLevel == null && result.resources?.pdb?.Count > 0)
+                debugLevel = -1;
+
+            // Fallback to -1 for build, or 0 for publish
+            debugLevel ??= IsPublish ? 0 : -1;
+
+            result.debugLevel = debugLevel.Value;
+        }
+
         if (ConfigurationFiles != null)
         {
             foreach (var configFile in ConfigurationFiles)
diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks.csproj b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks.csproj
index a41e88575de7..3e1e01fb6b8f 100644
--- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks.csproj
+++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFrameworks>$(NetCoreAppToolCurrent);$(NetFrameworkToolCurrent)</TargetFrameworks>
-    <NoWarn>$(NoWarn),CA1050,CA1850,CA1845,CA1859,NU5128</NoWarn>
+    <NoWarn>$(NoWarn),CA1050,CA1850,CA1845,CA1859;CA1866,NU5128</NoWarn>
     <RootNamespace>Microsoft.NET.Sdk.WebAssembly</RootNamespace>
     <CopyLocalLockFileAssemblies>true</CopyLocalLockFileAssemblies>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
diff --git a/src/tasks/Microsoft.NET.WebAssembly.Webcil/Microsoft.NET.WebAssembly.Webcil.csproj b/src/tasks/Microsoft.NET.WebAssembly.Webcil/Microsoft.NET.WebAssembly.Webcil.csproj
index b5131f4d47b8..14ecec43bf12 100644
--- a/src/tasks/Microsoft.NET.WebAssembly.Webcil/Microsoft.NET.WebAssembly.Webcil.csproj
+++ b/src/tasks/Microsoft.NET.WebAssembly.Webcil/Microsoft.NET.WebAssembly.Webcil.csproj
@@ -8,16 +8,14 @@
     <IncludeSymbols>true</IncludeSymbols>
     <Serviceable>true</Serviceable>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-    <IsShipping>true</IsShipping>
-    <!-- this assembly should not produce a public package, rather it's meant to be shipped by the
-          WasmAppBuilder task and the BrowserDebugProxy -->
-    <IsShippingPackage>false</IsShippingPackage>
+    <IsShipping>false</IsShipping>
+    <IsPackable>true</IsPackable>
   </PropertyGroup>
 
-  <ItemGroup>
+  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETFramework'">
     <PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" /> 
     <PackageReference Include="System.Reflection.Metadata" Version="$(SystemReflectionMetadataToolsetVersion)" />
-    <PackageReference Include="System.Collections.Immutable" Version="$(SystemCollectionsImmutableVersion)" />
+    <PackageReference Include="System.Collections.Immutable" Version="$(SystemCollectionsImmutableToolsetVersion)" />
   </ItemGroup>
 
   <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETFramework'">
diff --git a/src/tasks/MonoTargetsTasks/MonoTargetsTasks.csproj b/src/tasks/MonoTargetsTasks/MonoTargetsTasks.csproj
index 28c68c0150d7..3c652477cad9 100644
--- a/src/tasks/MonoTargetsTasks/MonoTargetsTasks.csproj
+++ b/src/tasks/MonoTargetsTasks/MonoTargetsTasks.csproj
@@ -5,19 +5,18 @@
     <Nullable>enable</Nullable>
     <NoWarn>$(NoWarn),CA1050,CA1850</NoWarn>
   </PropertyGroup>
-  <ItemGroup Condition="'$(TargetFramework)' == '$(NetCoreAppToolCurrent)'">
+  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETCoreApp'">
     <PackageReference Include="Microsoft.Build" Version="$(MicrosoftBuildVersion)" />
     <PackageReference Include="Microsoft.Build.Tasks.Core" Version="$(MicrosoftBuildTasksCoreVersion)" />
-    <PackageReference Include="System.Reflection.Metadata" Version="$(SystemReflectionMetadataToolsetVersion)" />
   </ItemGroup>
-  <ItemGroup Condition="'$(TargetFramework)' == '$(NetFrameworkToolCurrent)'">
+  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETFramework'">
     <!-- On .NET Framework, make sure we don't include a copy of the MSBuild assemblies with the task. The NETCore version doesn't do it already. -->
     <PackageReference Include="Microsoft.Build" Version="$(MicrosoftBuildVersion)" IncludeAssets="compile" />
     <PackageReference Include="Microsoft.Build.Tasks.Core" Version="$(MicrosoftBuildTasksCoreVersion)" IncludeAssets="compile" />
-    <PackageReference Include="System.Reflection.Metadata" Version="$(SystemReflectionMetadataToolsetVersion)" PrivateAssets="All" />
     <!-- These versions should not be newer than what Visual Studio MSBuild uses -->
+    <PackageReference Include="System.Reflection.Metadata" Version="$(SystemReflectionMetadataToolsetVersion)" PrivateAssets="All" />
     <PackageReference Include="System.Threading.Tasks.Extensions" Version="$(SystemThreadingTasksExtensionsVersion)" PrivateAssets="all" />
-    <PackageReference Include="System.Collections.Immutable" Version="$(SystemCollectionsImmutableVersion)" />
+    <PackageReference Include="System.Collections.Immutable" Version="$(SystemCollectionsImmutableToolsetVersion)" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/src/tasks/WasmAppBuilder/PInvokeTableGenerator.cs b/src/tasks/WasmAppBuilder/PInvokeTableGenerator.cs
index cd8535463bc3..4a29b47666e9 100644
--- a/src/tasks/WasmAppBuilder/PInvokeTableGenerator.cs
+++ b/src/tasks/WasmAppBuilder/PInvokeTableGenerator.cs
@@ -179,7 +179,7 @@ private string CEntryPoint(PInvoke pinvoke)
         if (pinvoke.WasmLinkage)
         {
             // We mangle the name to avoid collisions with symbols in other modules
-            return _fixupSymbolName($"{pinvoke.Module}_{pinvoke.EntryPoint}");
+            return _fixupSymbolName($"{pinvoke.Module}#{pinvoke.EntryPoint}");
         }
         return _fixupSymbolName(pinvoke.EntryPoint);
     }
@@ -269,10 +269,19 @@ private static bool TryIsMethodGetParametersUnsupported(MethodInfo method, [NotN
             return null;
         }
 
+        var realReturnType = method.ReturnType;
+        var realParameterTypes = method.GetParameters().Select(p => MapType(p.ParameterType)).ToList();
+
+        SignatureMapper.TypeToChar(realReturnType, Log, out bool resultIsByRef);
+        if (resultIsByRef) {
+            realReturnType = typeof(void);
+            realParameterTypes.Insert(0, "void *");
+        }
+
         return
             $$"""
             {{(pinvoke.WasmLinkage ? $"__attribute__((import_module(\"{EscapeLiteral(pinvoke.Module)}\"),import_name(\"{EscapeLiteral(pinvoke.EntryPoint)}\")))" : "")}}
-            {{(pinvoke.WasmLinkage ? "extern " : "")}}{{MapType(method.ReturnType)}} {{CEntryPoint(pinvoke)}} ({{string.Join(", ", method.GetParameters().Select(p => MapType(p.ParameterType)))}});
+            {{(pinvoke.WasmLinkage ? "extern " : "")}}{{MapType(realReturnType)}} {{CEntryPoint(pinvoke)}} ({{string.Join(", ", realParameterTypes)}});
             """;
     }
 
@@ -314,6 +323,14 @@ private void EmitNativeToInterp(StreamWriter w, List<PInvokeCallback> callbacks)
         // Only blittable parameter/return types are supposed.
         int cb_index = 0;
 
+        w.Write(@"#include <mono/utils/details/mono-error-types.h>
+                #include <mono/metadata/assembly.h>
+                #include <mono/utils/mono-error.h>
+                #include <mono/metadata/object.h>
+                #include <mono/utils/details/mono-logger-types.h>
+                #include ""runtime.h""
+                ");
+
         // Arguments to interp entry functions in the runtime
         w.WriteLine($"InterpFtnDesc wasm_native_to_interp_ftndescs[{callbacks.Count}] = {{}};");
 
@@ -362,7 +379,16 @@ private void EmitNativeToInterp(StreamWriter w, List<PInvokeCallback> callbacks)
             if (!is_void)
                 sb.Append($"  {MapType(method.ReturnType)} res;\n");
 
-            //sb.Append($"  printf(\"{entry_name} called\\n\");\n");
+            // In case when null force interpreter to initialize the pointers
+            sb.Append($"  if (!(WasmInterpEntrySig_{cb_index})wasm_native_to_interp_ftndescs [{cb_index}].func) {{\n");
+            var assemblyFullName = cb.Method.DeclaringType == null ? "" : cb.Method.DeclaringType.Assembly.FullName;
+            var assemblyName = assemblyFullName != null && assemblyFullName.Split(',').Length > 0 ? assemblyFullName.Split(',')[0].Trim() : "";
+            var typeName = cb.Method.DeclaringType == null  || cb.Method.DeclaringType.FullName == null ? "" : cb.Method.DeclaringType.FullName;
+            var methodName = cb.Method.Name;
+            int numParams = method.GetParameters().Length;
+            sb.Append($"   mono_wasm_marshal_get_managed_wrapper (\"{assemblyName}\", \"{typeName}\", \"{methodName}\", {numParams});\n");
+            sb.Append($"  }}\n");
+
             sb.Append($"  ((WasmInterpEntrySig_{cb_index})wasm_native_to_interp_ftndescs [{cb_index}].func) (");
             if (!is_void)
             {
diff --git a/src/tasks/WasmAppBuilder/SignatureMapper.cs b/src/tasks/WasmAppBuilder/SignatureMapper.cs
index f3b7f17ad017..3638e432f0ce 100644
--- a/src/tasks/WasmAppBuilder/SignatureMapper.cs
+++ b/src/tasks/WasmAppBuilder/SignatureMapper.cs
@@ -11,8 +11,15 @@
 
 internal static class SignatureMapper
 {
-    private static char? TypeToChar(Type t, LogAdapter log)
+    internal static char? TypeToChar(Type t, LogAdapter log, out bool isByRefStruct, int depth = 0)
     {
+        isByRefStruct = false;
+
+        if (depth > 5) {
+            log.Warning("WASM0064", $"Unbounded recursion detected through parameter type '{t.Name}'");
+            return null;
+        }
+
         char? c = null;
         if (t.Namespace == "System") {
             c = t.Name switch
@@ -20,6 +27,7 @@ internal static class SignatureMapper
                 nameof(String) => 'I',
                 nameof(Boolean) => 'I',
                 nameof(Char) => 'I',
+                nameof(SByte) => 'I',
                 nameof(Byte) => 'I',
                 nameof(Int16) => 'I',
                 nameof(UInt16) => 'I',
@@ -51,19 +59,23 @@ internal static class SignatureMapper
                 c = 'I';
             else if (t.IsInterface)
                 c = 'I';
-            else if (t.IsEnum)
-                c = TypeToChar(t.GetEnumUnderlyingType(), log);
-            else if (t.IsPointer)
+            else if (t.IsEnum) {
+                Type underlyingType = t.GetEnumUnderlyingType();
+                c = TypeToChar(underlyingType, log, out _, ++depth);
+            } else if (t.IsPointer)
                 c = 'I';
             else if (PInvokeTableGenerator.IsFunctionPointer(t))
                 c = 'I';
             else if (t.IsValueType)
             {
                 var fields = t.GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
-                if (fields.Length == 1)
-                    return TypeToChar(fields[0].FieldType, log);
-                else if (PInvokeTableGenerator.IsBlittable(t, log))
+                if (fields.Length == 1) {
+                    Type fieldType = fields[0].FieldType;
+                    return TypeToChar(fieldType, log, out isByRefStruct, ++depth);
+                } else if (PInvokeTableGenerator.IsBlittable(t, log))
                     c = 'I';
+
+                isByRefStruct = true;
             }
             else
                 log.Warning("WASM0064", $"Unsupported parameter type '{t.Name}'");
@@ -74,15 +86,20 @@ internal static class SignatureMapper
 
     public static string? MethodToSignature(MethodInfo method, LogAdapter log)
     {
-        string? result = TypeToChar(method.ReturnType, log)?.ToString();
+        string? result = TypeToChar(method.ReturnType, log, out bool resultIsByRef)?.ToString();
         if (result == null)
         {
             return null;
         }
 
+        if (resultIsByRef) {
+            // WASM abi passes a result-pointer in slot 0 instead of returning struct results
+            result = "VI";
+        }
+
         foreach (var parameter in method.GetParameters())
         {
-            char? parameterChar = TypeToChar(parameter.ParameterType, log);
+            char? parameterChar = TypeToChar(parameter.ParameterType, log, out _);
             if (parameterChar == null)
             {
                 return null;
diff --git a/src/tasks/WasmAppBuilder/WasmAppBuilder.cs b/src/tasks/WasmAppBuilder/WasmAppBuilder.cs
index 243a7aed31a5..7847039163b1 100644
--- a/src/tasks/WasmAppBuilder/WasmAppBuilder.cs
+++ b/src/tasks/WasmAppBuilder/WasmAppBuilder.cs
@@ -23,12 +23,14 @@ public class WasmAppBuilder : WasmAppBuilderBaseTask
 {
     public ITaskItem[]? RemoteSources { get; set; }
     public bool IncludeThreadsWorker { get; set; }
-    public int PThreadPoolSize { get; set; }
+    public int PThreadPoolInitialSize { get; set; }
+    public int PThreadPoolUnusedSize { get; set; }
     public bool UseWebcil { get; set; }
     public bool WasmIncludeFullIcuData { get; set; }
     public string? WasmIcuDataFileName { get; set; }
     public string? RuntimeAssetsLocation { get; set; }
     public bool CacheBootResources { get; set; }
+    public int DebugLevel { get; set; }
 
     private static readonly JsonSerializerOptions s_jsonOptions = new JsonSerializerOptions
     {
@@ -333,13 +335,22 @@ protected override bool ExecuteInternal()
 
         var extraConfiguration = new Dictionary<string, object?>();
 
-        if (PThreadPoolSize < -1)
+        if (PThreadPoolInitialSize < -1)
         {
-            throw new LogAsErrorException($"PThreadPoolSize must be -1, 0 or positive, but got {PThreadPoolSize}");
+            throw new LogAsErrorException($"PThreadPoolInitialSize must be -1, 0 or positive, but got {PThreadPoolInitialSize}");
         }
-        else if (PThreadPoolSize > -1)
+        else if (PThreadPoolInitialSize > -1)
         {
-            bootConfig.pthreadPoolSize = PThreadPoolSize;
+            bootConfig.pthreadPoolInitialSize = PThreadPoolInitialSize;
+        }
+
+        if (PThreadPoolUnusedSize < -1)
+        {
+            throw new LogAsErrorException($"PThreadPoolUnusedSize must be -1, 0 or positive, but got {PThreadPoolUnusedSize}");
+        }
+        else if (PThreadPoolUnusedSize > -1)
+        {
+            bootConfig.pthreadPoolUnusedSize = PThreadPoolUnusedSize;
         }
 
         foreach (ITaskItem extra in ExtraConfig ?? Enumerable.Empty<ITaskItem>())
diff --git a/src/tasks/WasmAppBuilder/WasmAppBuilder.csproj b/src/tasks/WasmAppBuilder/WasmAppBuilder.csproj
index 32fad42f32b9..34a689f680da 100644
--- a/src/tasks/WasmAppBuilder/WasmAppBuilder.csproj
+++ b/src/tasks/WasmAppBuilder/WasmAppBuilder.csproj
@@ -22,15 +22,27 @@
     <Compile Include="..\Common\TempFileName.cs" />
     <Compile Include="..\Microsoft.NET.Sdk.WebAssembly.Pack.Tasks\BootJsonData.cs" />
     <Compile Include="..\Microsoft.NET.Sdk.WebAssembly.Pack.Tasks\BootJsonBuilderHelper.cs" />
+  </ItemGroup>
+
+  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETCoreApp'">
+    <PackageReference Include="Microsoft.Build" Version="$(MicrosoftBuildVersion)" />
+    <PackageReference Include="Microsoft.Build.Tasks.Core" Version="$(MicrosoftBuildTasksCoreVersion)" />
+    <!-- we need to explicitly reference System.Reflection.MetadataLoadContext 8.0 or newer for function pointer support,
+         because the default version pulled in from MSBuild is lower. -->
+    <!-- TODO: this can be removed once MSBuild brings in a new enough version -->
+    <PackageReference Include="System.Reflection.MetadataLoadContext" Version="$(SystemReflectionMetadataLoadContextToolsetVersion)" />
+  </ItemGroup>
 
-    <!-- excludeassets prevents the older version of MLC from MSBuild from trampling the newer version we need -->
+  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETFramework'">
+    <!-- excludeassets prevents the older version of System.Reflection.MetadataLoadContext from MSBuild from trampling the newer version we need -->
     <PackageReference Include="Microsoft.Build" Version="$(MicrosoftBuildVersion)" ExcludeAssets="runtime" />
     <PackageReference Include="Microsoft.Build.Tasks.Core" Version="$(MicrosoftBuildTasksCoreVersion)" ExcludeAssets="runtime" />
-    <!-- we need to explicitly reference MLC 8.0 or newer for function pointer support,
-         because the default version pulled in from MSBuild is 6.0. -->
+    <!-- we need to explicitly reference System.Reflection.MetadataLoadContext 8.0 or newer for function pointer support,
+         because the default version pulled in from MSBuild is lower. -->
+    <!-- TODO: this can be removed once MSBuild brings in a new enough version -->
+    <PackageReference Include="System.Reflection.MetadataLoadContext" Version="$(SystemReflectionMetadataLoadContextToolsetVersion)" />
     <PackageReference Include="System.Reflection.Metadata" Version="$(SystemReflectionMetadataToolsetVersion)" />
-    <PackageReference Include="System.Reflection.MetadataLoadContext" Version="$(SystemReflectionMetadataLoadContextVersion)" />
-    <PackageReference Include="System.Collections.Immutable" Version="$(SystemCollectionsImmutableVersion)" />
+    <PackageReference Include="System.Collections.Immutable" Version="$(SystemCollectionsImmutableToolsetVersion)" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/src/tasks/WasmAppBuilder/WasmAppBuilderBaseTask.cs b/src/tasks/WasmAppBuilder/WasmAppBuilderBaseTask.cs
index c580fcd80eff..97bae1eaf373 100644
--- a/src/tasks/WasmAppBuilder/WasmAppBuilderBaseTask.cs
+++ b/src/tasks/WasmAppBuilder/WasmAppBuilderBaseTask.cs
@@ -38,7 +38,6 @@ public abstract class WasmAppBuilderBaseTask : Task
     // https://github.com/dotnet/icu/tree/maint/maint-67/icu-filters
     public string[] IcuDataFileNames { get; set; } = Array.Empty<string>();
 
-    public int DebugLevel { get; set; }
     public ITaskItem[] SatelliteAssemblies { get; set; } = Array.Empty<ITaskItem>();
     public bool HybridGlobalization { get; set; }
     public bool InvariantGlobalization { get; set; }
diff --git a/src/tasks/WasmAppBuilder/wasi/WasiAppBuilder.cs b/src/tasks/WasmAppBuilder/wasi/WasiAppBuilder.cs
index 59c737db14e2..8575ea6bf8ad 100644
--- a/src/tasks/WasmAppBuilder/wasi/WasiAppBuilder.cs
+++ b/src/tasks/WasmAppBuilder/wasi/WasiAppBuilder.cs
@@ -12,6 +12,7 @@ namespace Microsoft.WebAssembly.Build.Tasks;
 public class WasiAppBuilder : WasmAppBuilderBaseTask
 {
     public bool IsSingleFileBundle { get; set; }
+    public bool OutputSymbolsToAppBundle { get; set; }
 
     protected override bool ValidateArguments()
     {
@@ -66,7 +67,7 @@ protected override bool ExecuteInternal()
             {
                 FileCopyChecked(assembly, Path.Combine(asmRootPath, Path.GetFileName(assembly)), "Assemblies");
 
-                if (DebugLevel != 0)
+                if (OutputSymbolsToAppBundle)
                 {
                     string pdb = Path.ChangeExtension(assembly, ".pdb");
                     if (File.Exists(pdb))
diff --git a/src/tasks/WasmBuildTasks/UpdateChromeVersions.cs b/src/tasks/WasmBuildTasks/UpdateChromeVersions.cs
index 998a888c3844..71d359167fa1 100644
--- a/src/tasks/WasmBuildTasks/UpdateChromeVersions.cs
+++ b/src/tasks/WasmBuildTasks/UpdateChromeVersions.cs
@@ -44,6 +44,9 @@ public partial class UpdateChromeVersions : MBU.Task
     [Required, NotNull]
     public string ChromeVersionsPath { get; set; } = string.Empty;
 
+    [Required, NotNull]
+    public string EnvVarsForPRPath { get; set; } = string.Empty;
+
     public int MaxMajorVersionsToCheck { get; set; } = 2;
 
     // start at the branch position found in all.json, and try to
@@ -70,15 +73,21 @@ private async Task<bool> ExecuteInternalAsync()
             XmlDocument chromeVersionsXmlDoc = new XmlDocument();
             chromeVersionsXmlDoc.Load(ChromeVersionsPath);
             var osInfo = OSIdentifiers.Zip(OSPrefixes, (num, str) => new { Identifier = num, Prefix = str });
+            List<ChromeVersionSpec> versions = new();
             foreach (var info in osInfo)
             {
                 (ChromeVersionSpec version, string baseUrl) = await FindVersionFromChromiumDash(info.Prefix, info.Identifier).ConfigureAwait(false);
+                versions.Add(version);
                 bool hasMajorChanges = AreVersionsChanged(chromeVersionsXmlDoc, version, baseUrl);
                 if (hasMajorChanges)
                 {
                     VersionsChanged = UpdateChromeVersionsFile(chromeVersionsXmlDoc, version, baseUrl);
                 }
             }
+            if (VersionsChanged)
+            {
+                UpdateEnvVarsForPRFile(versions);
+            }
             return !Log.HasLoggedErrors;
         }
         catch (LogAsErrorException laee)
@@ -137,6 +146,26 @@ private bool UpdateChromeVersionsFile(XmlDocument xmlDoc, ChromeVersionSpec vers
         return true;
     }
 
+    private void UpdateEnvVarsForPRFile(List<ChromeVersionSpec> versions)
+    {
+        using StreamWriter writer = new StreamWriter(EnvVarsForPRPath);
+        foreach (var version in versions)
+        {
+            if (string.Equals(version.os, "Linux", StringComparison.OrdinalIgnoreCase))
+            {
+                writer.WriteLine($"CHROME_LINUX_VER={version.version}");
+            }
+            else if (string.Equals(version.os, "Windows", StringComparison.OrdinalIgnoreCase))
+            {
+                writer.WriteLine($"CHROME_WIN_VER={version.version}");
+            }
+            else
+            {
+                throw new Exception($"UpdateEnvVarsForPRFile task was used with unknown OS: {version.os}");
+            }
+        }
+    }
+
     private static void UpdateNodeValue(XmlDocument xmlDoc, string nodeName, string newValue)
     {
         XmlNode? node = xmlDoc.SelectSingleNode($"/Project/PropertyGroup/{nodeName}");
@@ -256,7 +285,7 @@ private async Task<Stream> GetDownloadFileStreamAsync(string filename, string ur
                                                     .ConfigureAwait(false);
             if (response.StatusCode == HttpStatusCode.OK)
             {
-                Log.LogMessage(MessageImportance.Low, $"Found {url}");
+                Log.LogMessage(MessageImportance.Low, $"Found url = {url} with branchUrl = ${branchUrl}");
                 return branchUrl;
             }
 
diff --git a/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs b/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs
index b775ad7cd19e..92d90e032788 100644
--- a/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs
+++ b/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs
@@ -8,10 +8,13 @@
 using System.IO;
 using System.IO.Compression;
 using System.Linq;
+using System.Text;
 using System.Text.Json;
 using System.Text.Json.Serialization;
 using System.Text.RegularExpressions;
+using System.Xml;
 using System.Xml.Linq;
+using System.Xml.XPath;
 using Microsoft.Build.Framework;
 using Microsoft.Build.Utilities;
 
@@ -19,7 +22,7 @@
 
 namespace Microsoft.Workload.Build.Tasks
 {
-    public partial class InstallWorkloadFromArtifacts : Task
+    public partial class InstallWorkloadFromArtifacts : PatchNuGetConfig
     {
         [Required, NotNull]
         public ITaskItem[]    WorkloadIds        { get; set; } = Array.Empty<ITaskItem>();
@@ -33,12 +36,6 @@ public partial class InstallWorkloadFromArtifacts : Task
         [Required, NotNull]
         public string?        VersionBandForManifestPackages       { get; set; }
 
-        [Required, NotNull]
-        public string?        LocalNuGetsPath    { get; set; }
-
-        [Required, NotNull]
-        public string?        TemplateNuGetConfigPath { get; set; }
-
         [Required, NotNull]
         public string         SdkWithNoWorkloadInstalledPath { get; set; } = string.Empty;
 
@@ -47,7 +44,9 @@ public partial class InstallWorkloadFromArtifacts : Task
         public bool           OnlyUpdateManifests { get; set; }
         public bool           SkipTempDirectoryCleanup { get; set; }
 
-        private const string s_nugetInsertionTag = "<!-- TEST_RESTORE_SOURCES_INSERTION_LINE -->";
+        // Should match enum values for MessageImportance - Low, Normal (default), High
+        public string?        WorkloadInstallCommandOutputImportance { get; set; }
+
         private string AllManifestsStampPath => Path.Combine(SdkWithNoWorkloadInstalledPath, ".all-manifests.stamp");
         private string _tempDir = string.Empty;
         private string _nugetCachePath = string.Empty;
@@ -67,6 +66,10 @@ public override bool Execute()
                 Directory.Delete(_tempDir, recursive: true);
             Directory.CreateDirectory(_tempDir);
             _nugetCachePath = Path.Combine(_tempDir, "nuget-cache");
+            if (SkipTempDirectoryCleanup)
+            {
+                Log.LogMessage(MessageImportance.High, $"Using temporary directory {_tempDir} for installing workloads from artifacts.");
+            }
 
             try
             {
@@ -217,6 +220,12 @@ private bool InstallPacks(InstallWorkloadRequest req, string nugetConfigContents
             string nugetConfigPath = Path.Combine(_tempDir, $"NuGet.{Path.GetRandomFileName()}.config");
             File.WriteAllText(nugetConfigPath, nugetConfigContents);
 
+            if (string.IsNullOrEmpty(WorkloadInstallCommandOutputImportance) ||
+                !Enum.TryParse<MessageImportance>(WorkloadInstallCommandOutputImportance, out var outputImportance))
+            {
+                outputImportance = MessageImportance.Normal;
+            }
+
             // Log.LogMessage(MessageImportance.High, $"{Environment.NewLine}** dotnet workload install {req.WorkloadId} **{Environment.NewLine}");
             (int exitCode, string output) = Utils.TryRunProcess(
                                                     Log,
@@ -228,7 +237,7 @@ private bool InstallPacks(InstallWorkloadRequest req, string nugetConfigContents
                                                     },
                                                     logStdErrAsMessage: req.IgnoreErrors,
                                                     silent: false,
-                                                    debugMessageImportance: MessageImportance.Normal);
+                                                    debugMessageImportance: outputImportance);
             if (exitCode != 0)
             {
                 if (req.IgnoreErrors)
@@ -255,11 +264,11 @@ private bool InstallPacks(InstallWorkloadRequest req, string nugetConfigContents
 
         private string GetNuGetConfig()
         {
-            string contents = File.ReadAllText(TemplateNuGetConfigPath);
-            if (!contents.Contains(s_nugetInsertionTag, StringComparison.InvariantCultureIgnoreCase))
-                throw new LogAsErrorException($"Could not find {s_nugetInsertionTag} in {TemplateNuGetConfigPath}");
-
-            return contents.Replace(s_nugetInsertionTag, $@"<add key=""nuget-local"" value=""file://{LocalNuGetsPath}"" />");
+            var nugetConfigPath = Path.GetTempFileName();
+            PatchNuGetConfig.GetNuGetConfig(TemplateNuGetConfigPath, LocalNuGetsPath, PackageSourceNameForBuiltPackages, NuGetConfigPackageSourceMappings, nugetConfigPath);
+            string contents = File.ReadAllText(nugetConfigPath);
+            File.Delete(nugetConfigPath);
+            return contents;
         }
 
         private bool InstallWorkloadManifest(ITaskItem workloadId, string name, string version, string sdkDir, string nugetConfigContents, bool stopOnMissing)
diff --git a/src/tasks/WorkloadBuildTasks/PackageInstaller.cs b/src/tasks/WorkloadBuildTasks/PackageInstaller.cs
index 0d2f5db32906..b7a188afb158 100644
--- a/src/tasks/WorkloadBuildTasks/PackageInstaller.cs
+++ b/src/tasks/WorkloadBuildTasks/PackageInstaller.cs
@@ -54,9 +54,18 @@ private bool InstallActual(PackageReference[] references, bool stopOnMissing)
 
             Directory.CreateDirectory(projecDir);
 
-            File.WriteAllText(Path.Combine(projecDir, "Directory.Build.props"), "<Project />");
-            File.WriteAllText(Path.Combine(projecDir, "Directory.Packages.props"), "<Project />");
-            File.WriteAllText(Path.Combine(projecDir, "Directory.Build.targets"), "<Project />");
+            File.WriteAllText(Path.Combine(projecDir, "Directory.Build.props"), """
+<Project>
+
+  <!-- This is an empty Directory.Build.props file to prevent projects which reside
+       under this directory to use any of the repository local settings. -->
+  <PropertyGroup>
+    <ImportDirectoryPackagesProps>false</ImportDirectoryPackagesProps>
+    <ImportDirectoryBuildTargets>false</ImportDirectoryBuildTargets>
+  </PropertyGroup>
+
+</Project>
+""");
             File.WriteAllText(projectPath, GenerateProject(references));
             File.WriteAllText(Path.Combine(projecDir, "nuget.config"), _nugetConfigContents);
 
@@ -119,7 +128,7 @@ private static string GenerateProject(IEnumerable<PackageReference> references)
     <ItemGroup>");
 
             foreach (var reference in references)
-                projectFileBuilder.AppendLine($"<PackageReference Include=\"{reference.Name}\" Version=\"{reference.Version}\" />");
+                projectFileBuilder.AppendLine($"<PackageDownload Include=\"{reference.Name}\" Version=\"[{reference.Version}]\" />");
 
             projectFileBuilder.Append(@"
     </ItemGroup>
diff --git a/src/tasks/WorkloadBuildTasks/PatchNuGetConfig.cs b/src/tasks/WorkloadBuildTasks/PatchNuGetConfig.cs
new file mode 100644
index 000000000000..34dfe2c1706e
--- /dev/null
+++ b/src/tasks/WorkloadBuildTasks/PatchNuGetConfig.cs
@@ -0,0 +1,140 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Xml.Linq;
+using System.Xml.XPath;
+using System.Xml;
+using Microsoft.Build.Framework;
+using Microsoft.Build.Utilities;
+
+#nullable enable
+
+namespace Microsoft.Workload.Build.Tasks;
+
+/*
+ * Used for patching a nuget.config to:
+ *
+ * 1. Add a new package source to the nuget.config
+ * 2. Add a new package source mapping to the nuget.config
+ *
+ * This is useful specifically the case of workload testing
+ */
+public class PatchNuGetConfig : Task
+{
+    [Required, NotNull]
+    public string? TemplateNuGetConfigPath { get; set; }
+
+    [Required, NotNull]
+    public string?        LocalNuGetsPath    { get; set; }
+
+    public string? OutputPath { get; set; }
+
+    /*
+     * Value: ["*Aspire*", "Foo*"]
+     * This will be translated to:
+     * <packageSourceMapping>
+     *  <packageSource key="nuget-local">
+     *    <package pattern="*Aspire*" />
+     *    <package pattern="Foo*" />
+     *  </packageSource>
+     *
+     * This is useful when using Central Package Management (https://learn.microsoft.com/en-us/nuget/consume-packages/central-package-management)
+    */
+    public string[] NuGetConfigPackageSourceMappings { get; set; } = Array.Empty<string>();
+
+    public string   PackageSourceNameForBuiltPackages { get; set; } = "nuget-local";
+
+    public override bool Execute()
+    {
+        try
+        {
+            Validate(TemplateNuGetConfigPath, PackageSourceNameForBuiltPackages, OutputPath);
+            GetNuGetConfig(TemplateNuGetConfigPath, LocalNuGetsPath, PackageSourceNameForBuiltPackages, NuGetConfigPackageSourceMappings, OutputPath!);
+            Log.LogMessage(MessageImportance.Low, $"Generated patched nuget.config at {OutputPath}");
+            return true;
+        }
+        catch (LogAsErrorException laee)
+        {
+            Log.LogError(laee.Message);
+            return false;
+        }
+    }
+
+    private static void Validate(string? templateNuGetConfigPath, string? packageSourceNameForBuiltPackages, string? outputPath)
+    {
+        if (string.IsNullOrEmpty(templateNuGetConfigPath))
+            throw new LogAsErrorException($"{nameof(templateNuGetConfigPath)} is required");
+
+        if (!File.Exists(templateNuGetConfigPath))
+            throw new LogAsErrorException($"Cannot find {nameof(templateNuGetConfigPath)}={templateNuGetConfigPath}");
+
+        if (string.IsNullOrEmpty(packageSourceNameForBuiltPackages))
+            throw new LogAsErrorException($"{nameof(packageSourceNameForBuiltPackages)} is required");
+
+        if (string.IsNullOrEmpty(outputPath))
+            throw new LogAsErrorException($"{nameof(outputPath)} is required");
+
+        if (Directory.Exists(outputPath))
+            throw new LogAsErrorException($"{nameof(outputPath)}={outputPath} is a directory, it should be a file");
+    }
+
+    public static void GetNuGetConfig(string templateNuGetConfigPath, string localNuGetsPath, string packageSourceNameForBuiltPackages, string[] nuGetConfigPackageSourceMappings, string outputPath)
+    {
+        Validate(templateNuGetConfigPath, packageSourceNameForBuiltPackages, outputPath);
+
+        XDocument doc = XDocument.Load(templateNuGetConfigPath);
+        string xpath = "/configuration/packageSources";
+        XElement? packageSources = doc.XPathSelectElement(xpath);
+        if (packageSources is null)
+            throw new LogAsErrorException($"Could not find {xpath} in {templateNuGetConfigPath}");
+
+        var newPackageSourceElement = new XElement("add",
+                                        new XAttribute("key", packageSourceNameForBuiltPackages),
+                                        new XAttribute("value", $"file://{localNuGetsPath}"));
+        if (packageSources.LastNode is not null)
+        {
+            packageSources.LastNode.AddAfterSelf(newPackageSourceElement);
+        }
+        else
+        {
+            packageSources.Add(newPackageSourceElement);
+        }
+
+        if (nuGetConfigPackageSourceMappings.Length > 0)
+        {
+            string mappingXpath = "/configuration/packageSourceMapping";
+            XElement? packageSourceMapping = doc.XPathSelectElement(mappingXpath);
+            if (packageSourceMapping is null)
+            {
+                if (doc.Root is null)
+                    throw new LogAsErrorException($"Could not find root element in {templateNuGetConfigPath}");
+
+                packageSourceMapping = new XElement("packageSourceMapping");
+                doc.Root.Add(packageSourceMapping);
+            }
+
+            var newPackageSourceMappingElement = new XElement("packageSource",
+                                                    new XAttribute("key", packageSourceNameForBuiltPackages),
+                                                    nuGetConfigPackageSourceMappings.Select
+                                                        (pattern => new XElement("package", new XAttribute("pattern", pattern))));
+            if (packageSourceMapping.FirstNode is not null)
+            {
+                packageSourceMapping.FirstNode?.AddBeforeSelf(newPackageSourceMappingElement);
+            }
+            else
+            {
+                packageSourceMapping.Add(newPackageSourceMappingElement);
+            }
+        }
+
+        using var xw = XmlWriter.Create(outputPath, new XmlWriterSettings { Indent = true, NewLineHandling = NewLineHandling.None, Encoding = Encoding.UTF8 });
+        doc.WriteTo(xw);
+        xw.Close();
+    }
+}
diff --git a/src/tasks/WorkloadBuildTasks/WorkloadBuildTasks.csproj b/src/tasks/WorkloadBuildTasks/WorkloadBuildTasks.csproj
index 4e91a40280ca..50249f07c44e 100644
--- a/src/tasks/WorkloadBuildTasks/WorkloadBuildTasks.csproj
+++ b/src/tasks/WorkloadBuildTasks/WorkloadBuildTasks.csproj
@@ -2,7 +2,7 @@
   <PropertyGroup>
     <TargetFrameworks>$(NetCoreAppToolCurrent)</TargetFrameworks>
     <!-- net8.0 is only need for the internal nuget produced which isn't required for source builds -->
-    <TargetFrameworks Condition="'$(DotNetBuildFromSource)' != 'true'">$(TargetFrameworks);net8.0</TargetFrameworks>
+    <TargetFrameworks Condition="'$(DotNetBuildSourceOnly)' != 'true'">$(TargetFrameworks);net8.0</TargetFrameworks>
     <Nullable>enable</Nullable>
     <NoWarn>$(NoWarn),CA1050,CA1850</NoWarn>
   </PropertyGroup>
diff --git a/src/tasks/tasks.proj b/src/tasks/tasks.proj
index ab54991791ba..504f3caad094 100644
--- a/src/tasks/tasks.proj
+++ b/src/tasks/tasks.proj
@@ -6,7 +6,7 @@
     <!-- For example, they may take dependencies on pre-built packages that aren't build-from-source. e.g. 'Microsoft.DotNet.CilStrip.Sources' -->
     <ProjectReference Remove="MonoTargetsTasks\MonoTargetsTasks.csproj;
                               MonoTargetsTasks\ILStrip\AssemblyStripper\AssemblyStripper.csproj"
-                      Condition="'$(DotNetBuildFromSource)' == 'true'" />
+                      Condition="'$(DotNetBuildSourceOnly)' == 'true'" />
   </ItemGroup>
 
   <!--
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index bb57a2dc781a..518f832c9f5b 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -19,6 +19,7 @@ endif()
 
 if (CLR_CMAKE_HOST_WIN32)
     # 4100 - unreferenced formal parameter
+    # 4242 - conversion from 'type1' to 'type2', possible loss of data
     # 4244 - conversion from 'type1' to 'type2', possible loss of data
     # 4514 - unreferenced inline function has been removed
     # 4625 - copy constructor was implicitly defined as deleted because a base class copy constructor is inaccessible or deleted
@@ -32,7 +33,7 @@ if (CLR_CMAKE_HOST_WIN32)
     # 5026 - move constructor was implicitly defined as deleted
     # 5027 - move assignment operator was implicitly defined as deleted
     # 5039 - pointer or reference to potentially throwing function passed to extern C function under -EHc. Undefined behavior may occur if this function throws an exception.
-    add_compile_options(-wd4100 -wd4244 -wd4514 -wd4625 -wd4626 -wd4668 -wd4710 -wd4711 -wd4774 -wd4820 -wd5025 -wd5026 -wd5027 -wd5039)
+    add_compile_options(-wd4100 -wd4242 -wd4244 -wd4514 -wd4625 -wd4626 -wd4668 -wd4710 -wd4711 -wd4774 -wd4820 -wd5025 -wd5026 -wd5027 -wd5039)
 
     set_property(DIRECTORY PROPERTY CLR_EH_OPTION /EHa) # enable C++ EH (w/ SEH exceptions)
 endif()
diff --git a/src/tests/Common/CLRTest.CrossGen.targets b/src/tests/Common/CLRTest.CrossGen.targets
index ff8f875df744..cacc3980c1a1 100644
--- a/src/tests/Common/CLRTest.CrossGen.targets
+++ b/src/tests/Common/CLRTest.CrossGen.targets
@@ -165,9 +165,6 @@ if [ ! -z ${RunCrossGen2+x} ]%3B then
     fi
     ReleaseLock
   fi
-
-  export DOTNET_ZapRequire=$(ZapRequire)
-  export DOTNET_ZapRequireList=$(AssemblyName)
 fi
         ]]>
       </CrossgenBashScript>
@@ -339,9 +336,6 @@ if defined RunCrossGen2 (
         ECHO R2RDump failed with exitcode - !R2RDumpStatus!
         Exit /b 1
     )
-
-  set DOTNET_ZapRequire=$(ZapRequire)
-  set DOTNET_ZapRequireList=$(AssemblyName)
 )
         ]]>
       </CrossgenBatchScript>
diff --git a/src/tests/Common/CoreCLRTestLibrary/CoreClrConfigurationDetection.cs b/src/tests/Common/CoreCLRTestLibrary/CoreClrConfigurationDetection.cs
index 75014ad32484..8b6d893f8d97 100644
--- a/src/tests/Common/CoreCLRTestLibrary/CoreClrConfigurationDetection.cs
+++ b/src/tests/Common/CoreCLRTestLibrary/CoreClrConfigurationDetection.cs
@@ -17,11 +17,17 @@ public static class CoreClrConfigurationDetection
     public static bool IsJitStressRegs => !string.Equals(GetEnvironmentVariableValue("JitStressRegs"), "0", StringComparison.InvariantCulture);
     public static bool IsJitMinOpts => string.Equals(GetEnvironmentVariableValue("JITMinOpts"), "1", StringComparison.InvariantCulture);
     public static bool IsTailCallStress => string.Equals(GetEnvironmentVariableValue("TailcallStress"), "1", StringComparison.InvariantCulture);
-    public static bool IsZapDisable => string.Equals(GetEnvironmentVariableValue("ZapDisable"), "1", StringComparison.InvariantCulture);
+    public static bool IsDisableR2R => string.Equals(GetEnvironmentVariableValue("ReadyToRun"), "0", StringComparison.InvariantCulture);
     public static bool IsGCStress3 => CompareGCStressModeAsLower(GetEnvironmentVariableValue("GCStress"), "0x3", "3");
     public static bool IsGCStressC => CompareGCStressModeAsLower(GetEnvironmentVariableValue("GCStress"), "0xC", "C");
+    public static bool IsTieredCompilation => string.Equals(GetEnvironmentVariableValue("TieredCompilation", "1"), "1", StringComparison.InvariantCulture);
+    public static bool IsHeapVerify => string.Equals(GetEnvironmentVariableValue("HeapVerify"), "1", StringComparison.InvariantCulture);
 
     public static bool IsGCStress => !string.Equals(GetEnvironmentVariableValue("GCStress"), "0", StringComparison.InvariantCulture);
+    
+    public static bool IsAnyJitStress => IsJitStress || IsJitStressRegs || IsJitMinOpts || IsTailCallStress;
+
+    public static bool IsAnyJitOptimizationStress => IsAnyJitStress || IsTieredCompilation;
 
     public static bool IsCheckedRuntime => AssemblyConfigurationEquals("Checked");
     public static bool IsReleaseRuntime => AssemblyConfigurationEquals("Release");
@@ -29,14 +35,12 @@ public static class CoreClrConfigurationDetection
 
     public static bool IsStressTest =>
         IsGCStress ||
-        IsZapDisable ||
-        IsTailCallStress ||
-        IsJitStressRegs ||
-        IsJitStress ||
-        IsJitMinOpts;
+        IsDisableR2R ||
+        IsAnyJitStress ||
+        IsHeapVerify;
 
-    private static string GetEnvironmentVariableValue(string name) =>
-        Environment.GetEnvironmentVariable("DOTNET_" + name) ?? Environment.GetEnvironmentVariable("COMPlus_" + name) ?? "0";
+    private static string GetEnvironmentVariableValue(string name, string defaultValue = "0") =>
+        Environment.GetEnvironmentVariable("DOTNET_" + name) ?? Environment.GetEnvironmentVariable("COMPlus_" + name) ?? defaultValue;
 
     private static bool AssemblyConfigurationEquals(string configuration)
     {
@@ -54,4 +58,4 @@ private static bool CompareGCStressModeAsLower(string value, string first, strin
             string.Equals(value, "0xf", StringComparison.InvariantCulture) ||
             string.Equals(value, "f", StringComparison.InvariantCulture);
     }
-}
+}
\ No newline at end of file
diff --git a/src/tests/Common/CoreCLRTestLibrary/PlatformDetection.cs b/src/tests/Common/CoreCLRTestLibrary/PlatformDetection.cs
index 56581dc9ea77..d03327e5fdde 100644
--- a/src/tests/Common/CoreCLRTestLibrary/PlatformDetection.cs
+++ b/src/tests/Common/CoreCLRTestLibrary/PlatformDetection.cs
@@ -50,6 +50,8 @@ public static bool IsNonZeroLowerBoundArraySupported
 
         public static bool IsMonoLLVMAOT => _variant == "llvmaot";
         public static bool IsMonoLLVMFULLAOT => _variant == "llvmfullaot";
+        public static bool IsMonoMINIFULLAOT => _variant == "minifullaot";
+        public static bool IsMonoFULLAOT => IsMonoLLVMFULLAOT || IsMonoMINIFULLAOT;
         public static bool IsMonoInterpreter => _variant == "monointerpreter";
 
         // These platforms have not had their infrastructure updated to support native test assets.
diff --git a/src/tests/Common/Directory.Build.targets b/src/tests/Common/Directory.Build.targets
index 632989cf73f7..cddecf23429a 100644
--- a/src/tests/Common/Directory.Build.targets
+++ b/src/tests/Common/Directory.Build.targets
@@ -15,7 +15,7 @@
     <GCStressDependsOnCoreDisTools Condition="'$(TargetOS)' == 'windows' And ('$(TargetArchitecture)' == 'x64' Or '$(TargetArchitecture)' == 'x86')">true</GCStressDependsOnCoreDisTools>
     <GCStressDependsOnCoreDisTools Condition="'$(TargetOS)' == 'linux' And '$(TargetArchitecture)' == 'x64'">true</GCStressDependsOnCoreDisTools>
     <CopyCoreDisToolsToCoreRoot>false</CopyCoreDisToolsToCoreRoot>
-    <CopyCoreDisToolsToCoreRoot Condition="$(GCStressDependsOnCoreDisTools) And '$(DotNetBuildFromSource)' != 'true'">true</CopyCoreDisToolsToCoreRoot>
+    <CopyCoreDisToolsToCoreRoot Condition="$(GCStressDependsOnCoreDisTools) And '$(DotNetBuildSourceOnly)' != 'true'">true</CopyCoreDisToolsToCoreRoot>
     <!-- Non-desktop OS's use a custom dotnet host, instead of corerun -->
     <IsDesktopOS Condition="'$(TargetsWasi)' != 'true' and '$(TargetsBrowser)' != 'true' and '$(TargetsAndroid)' != 'true' and '$(TargetstvOS)' != 'true' and '$(TargetsiOS)' != 'true' and '$(TargetsMacCatalyst)' != 'true'">true</IsDesktopOS>
   </PropertyGroup>
diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
index 48761b8619c3..5ee032e2842d 100644
--- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
+++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
@@ -714,28 +714,27 @@
     ("LoadAndInsertScalarTest.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128_UInt16_7",                                                                ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "Helpers.Insert(firstOp, ElementIndex, thirdOp, i) != result[i]"}),
     ("LoadAndInsertScalarTest.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128_UInt32_3",                                                                ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "Helpers.Insert(firstOp, ElementIndex, thirdOp, i) != result[i]"}),
     ("LoadAndInsertScalarTest.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128_UInt64_1",                                                                ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()",                        ["ValidateIterResult"] = "Helpers.Insert(firstOp, ElementIndex, thirdOp, i) != result[i]"}),
-    // Tests disabled until mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_Byte_7",                                                                   ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",                          ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_SByte_7",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_Int16_3",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_UInt16_3",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_Int32_1",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_UInt32_1",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_Single_1",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                        ["ValidateIterResult"] = "(BitConverter.SingleToInt32Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.SingleToInt32Bits(result1[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.SingleToInt32Bits(result2[i]))"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_Byte_7",                                                                   ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",                          ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_SByte_7",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_Int16_3",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_UInt16_3",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_Int32_1",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_UInt32_1",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_Single_1",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                        ["ValidateIterResult"] = "(BitConverter.SingleToInt32Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.SingleToInt32Bits(result1[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.SingleToInt32Bits(result2[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input3, ElementIndex, newData[2], i)) != BitConverter.SingleToInt32Bits(result3[i]))"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_Byte_7",                                                                   ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",                          ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_SByte_7",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_Int16_3",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_UInt16_3",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_Int32_1",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_UInt32_1",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_Single_1",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                        ["ValidateIterResult"] = "(BitConverter.SingleToInt32Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.SingleToInt32Bits(result1[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.SingleToInt32Bits(result2[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input3, ElementIndex, newData[2], i)) != BitConverter.SingleToInt32Bits(result3[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input4, ElementIndex, newData[3], i)) != BitConverter.SingleToInt32Bits(result4[i]))"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_Byte_7",                                                                   ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",                          ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_SByte_7",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_Int16_3",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_UInt16_3",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_Int32_1",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_UInt32_1",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x2_Single_1",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                        ["ValidateIterResult"] = "(BitConverter.SingleToInt32Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.SingleToInt32Bits(result1[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.SingleToInt32Bits(result2[i]))"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_Byte_7",                                                                   ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",                          ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_SByte_7",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_Int16_3",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_UInt16_3",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_Int32_1",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_UInt32_1",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x3_Single_1",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                        ["ValidateIterResult"] = "(BitConverter.SingleToInt32Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.SingleToInt32Bits(result1[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.SingleToInt32Bits(result2[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input3, ElementIndex, newData[2], i)) != BitConverter.SingleToInt32Bits(result3[i]))"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_Byte_7",                                                                   ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",                          ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_SByte_7",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_Int16_3",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_UInt16_3",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_Int32_1",                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_UInt32_1",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector64x4_Single_1",                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                        ["ValidateIterResult"] = "(BitConverter.SingleToInt32Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.SingleToInt32Bits(result1[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.SingleToInt32Bits(result2[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input3, ElementIndex, newData[2], i)) != BitConverter.SingleToInt32Bits(result3[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input4, ElementIndex, newData[3], i)) != BitConverter.SingleToInt32Bits(result4[i]))"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64_Byte",                                                                       ["Isa"] = "AdvSimd",                                ["Method"] = "LoadAndReplicateToVector64",                                           ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",                                                                                                                                                        ["ValidateIterResult"] = "firstOp[0] != result[i]"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64_Int16",                                                                      ["Isa"] = "AdvSimd",                                ["Method"] = "LoadAndReplicateToVector64",                                           ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",                                                                                                                                                       ["ValidateIterResult"] = "firstOp[0] != result[i]"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64_Int32",                                                                      ["Isa"] = "AdvSimd",                                ["Method"] = "LoadAndReplicateToVector64",                                           ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",                                                                                                                                                       ["ValidateIterResult"] = "firstOp[0] != result[i]"}),
@@ -750,28 +749,27 @@
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128_Single",                                                                    ["Isa"] = "AdvSimd",                                ["Method"] = "LoadAndReplicateToVector128",                                          ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",                                                                                                                                                      ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[i])"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128_UInt16",                                                                    ["Isa"] = "AdvSimd",                                ["Method"] = "LoadAndReplicateToVector128",                                          ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",                                                                                                                                                      ["ValidateIterResult"] = "firstOp[0] != result[i]"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128_UInt32",                                                                    ["Isa"] = "AdvSimd",                                ["Method"] = "LoadAndReplicateToVector128",                                          ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",                                                                                                                                                      ["ValidateIterResult"] = "firstOp[0] != result[i]"}),
-    // Tests disabled until mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2SByte",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2Byte",                                                                      ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2UShort",                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2Short",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2UInt32",                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2Int32",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2Float",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3SByte",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3Byte",                                                                      ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3UShort",                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3Short",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3UInt32",                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3Int32",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3Float",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4SByte",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4Byte",                                                                      ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4UShort",                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4Short",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4UInt32",                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4Int32",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4Float",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2SByte",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2Byte",                                                                      ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2UShort",                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2Short",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2UInt32",                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2Int32",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x2Float",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3SByte",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3Byte",                                                                      ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3UShort",                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3Short",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3UInt32",                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3Int32",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x3Float",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4SByte",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4Byte",                                                                      ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4UShort",                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4Short",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4UInt32",                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4Int32",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector64x4Float",                                                                     ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadVector64_Byte",                                                                                     ["Isa"] = "AdvSimd",                                ["Method"] = "LoadVector64",                                                         ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",                                                                                                                                                        ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadVector64_Double",                                                                                   ["Isa"] = "AdvSimd",                                ["Method"] = "LoadVector64",                                                         ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Double",                                                                                                                           ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",                                                                                                                                                      ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(firstOp[i]) != BitConverter.DoubleToInt64Bits(result[i])"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadVector64_Int16",                                                                                    ["Isa"] = "AdvSimd",                                ["Method"] = "LoadVector64",                                                         ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",                                                                                                                                                       ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
@@ -792,49 +790,48 @@
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadVector128_UInt16",                                                                                  ["Isa"] = "AdvSimd",                                ["Method"] = "LoadVector128",                                                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",                                                                                                                                                      ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadVector128_UInt32",                                                                                  ["Isa"] = "AdvSimd",                                ["Method"] = "LoadVector128",                                                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",                                                                                                                                                      ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadVector128_UInt64",                                                                                  ["Isa"] = "AdvSimd",                                ["Method"] = "LoadVector128",                                                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",                                                                                                                                                      ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
-    // Tests disabled until mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2SByte",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2Byte",                                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2UShort",                                                                                  ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2Short",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2UInt32",                                                                                  ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2Int32",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2Float",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3SByte",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8] || result3[i] != input[i + 8 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3Byte",                                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8] || result3[i] != input[i + 8 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3UShort",                                                                                  ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4] || result3[i] != input[i + 4 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3Short",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4] || result3[i] != input[i + 4 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3UInt32",                                                                                  ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2] || result3[i] != input[i + 2 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3Int32",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2] || result3[i] != input[i + 2 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3Float",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2] || result3[i] != input[i + 2 * 2]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4SByte",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8] || result3[i] != input[i + 8 * 2] || result4[i] != input[i + 8 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4Byte",                                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8] || result3[i] != input[i + 8 * 2] || result4[i] != input[i + 8 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4UShort",                                                                                  ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4] || result3[i] != input[i + 4 * 2] || result4[i] != input[i + 4 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4Short",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4] || result3[i] != input[i + 4 * 2] || result4[i] != input[i + 4 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4UInt32",                                                                                  ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2] || result3[i] != input[i + 2 * 2] || result4[i] != input[i + 2 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4Int32",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2] || result3[i] != input[i + 2 * 2] || result4[i] != input[i + 2 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4Float",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2] || result3[i] != input[i + 2 * 2] || result4[i] != input[i + 2 * 3]"}), 
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipSByte",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipByte",                                                                            ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipUShort",                                                                          ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipShort",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipUInt32",                                                                          ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipInt32",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipFloat",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipSByte",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipByte",                                                                            ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipUShort",                                                                          ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipShort",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipUInt32",                                                                          ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipInt32",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipFloat",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipSByte",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipByte",                                                                            ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipUShort",                                                                          ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipShort",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipUInt32",                                                                          ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipInt32",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipFloat",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),    
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2SByte",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2Byte",                                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2UShort",                                                                                  ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2Short",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2UInt32",                                                                                  ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2Int32",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2Float",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3SByte",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8] || result3[i] != input[i + 8 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3Byte",                                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8] || result3[i] != input[i + 8 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3UShort",                                                                                  ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4] || result3[i] != input[i + 4 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3Short",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4] || result3[i] != input[i + 4 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3UInt32",                                                                                  ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2] || result3[i] != input[i + 2 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3Int32",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2] || result3[i] != input[i + 2 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3Float",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2] || result3[i] != input[i + 2 * 2]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4SByte",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8] || result3[i] != input[i + 8 * 2] || result4[i] != input[i + 8 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4Byte",                                                                                    ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8] || result3[i] != input[i + 8 * 2] || result4[i] != input[i + 8 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4UShort",                                                                                  ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4] || result3[i] != input[i + 4 * 2] || result4[i] != input[i + 4 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4Short",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4] || result3[i] != input[i + 4 * 2] || result4[i] != input[i + 4 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4UInt32",                                                                                  ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2] || result3[i] != input[i + 2 * 2] || result4[i] != input[i + 2 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4Int32",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2] || result3[i] != input[i + 2 * 2] || result4[i] != input[i + 2 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4Float",                                                                                   ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4",                                                                                                             ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2] || result3[i] != input[i + 2 * 2] || result4[i] != input[i + 2 * 3]"}), 
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipSByte",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipByte",                                                                            ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipUShort",                                                                          ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipShort",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipUInt32",                                                                          ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipInt32",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x2AndUnzipFloat",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipSByte",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipByte",                                                                            ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipUShort",                                                                          ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipShort",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipUInt32",                                                                          ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipInt32",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x3AndUnzipFloat",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipSByte",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipByte",                                                                            ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipUShort",                                                                          ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipShort",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipUInt32",                                                                          ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipInt32",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector64x4AndUnzipFloat",                                                                           ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd",       ["Method"] = "LoadVector64x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),    
     ("VecBinOpTest.template",             new Dictionary<string, string> { ["TestName"] = "Max_Vector64_Byte",                                                                                     ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "Max",                                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "Helpers.Max(left[i], right[i]) != result[i]"}),
     ("VecBinOpTest.template",             new Dictionary<string, string> { ["TestName"] = "Max_Vector64_Int16",                                                                                    ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "Max",                                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "Helpers.Max(left[i], right[i]) != result[i]"}),
     ("VecBinOpTest.template",             new Dictionary<string, string> { ["TestName"] = "Max_Vector64_Int32",                                                                                    ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "Max",                                                                  ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "Helpers.Max(left[i], right[i]) != result[i]"}),
@@ -1705,70 +1702,69 @@
     ("StoreSelectedScalarTest.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128_UInt16_7",                                                             ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "7",                                                                                                                       ["ValidateResult"] = "firstOp[ElementIndex] != result"}),
     ("StoreSelectedScalarTest.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128_UInt32_3",                                                             ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "3",                                                                                                                       ["ValidateResult"] = "firstOp[ElementIndex] != result"}),
     ("StoreSelectedScalarTest.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128_UInt64_1",                                                             ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                              ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",        ["ElementIndex"] = "1",                                                                                                                       ["ValidateResult"] = "firstOp[ElementIndex] != result"}),
-    // Tests disabled until mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-    // ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_SByte",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_Byte",                                                                ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_UShort",                                                              ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_Short",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_UInt32",                                                              ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_Int32",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_Float",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_SByte",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_Byte",                                                                ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_UShort",                                                              ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_Short",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_UInt32",                                                              ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_Int32",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_Float",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_SByte",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_Byte",                                                                ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_UShort",                                                              ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_Short",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_UInt32",                                                              ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_Int32",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_Float",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2SByte",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2Byte",                                                                                   ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2UShort",                                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2Short",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2UInt32",                                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2Int32",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2Float",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3SByte",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3Byte",                                                                                   ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3UShort",                                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3Short",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3UInt32",                                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3Int32",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3Float",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4SByte",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4Byte",                                                                                   ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4UShort",                                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4Short",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4UInt32",                                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4Int32",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4Float",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2SByteAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2ByteAndZip",                                                                             ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2UShortAndZip",                                                                           ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2ShortAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2UInt32AndZip",                                                                           ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2Int32AndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2FloatAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3SByteAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3ByteAndZip",                                                                             ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3UShortAndZip",                                                                           ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3ShortAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3UInt32AndZip",                                                                           ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3Int32AndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3FloatAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4SByteAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4ByteAndZip",                                                                             ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4UShortAndZip",                                                                           ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4ShortAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4UInt32AndZip",                                                                           ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4Int32AndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4FloatAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_SByte",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_Byte",                                                                ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_UShort",                                                              ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_Short",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_UInt32",                                                              ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_Int32",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x2_Float",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_SByte",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_Byte",                                                                ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_UShort",                                                              ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_Short",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_UInt32",                                                              ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_Int32",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x3_Float",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_SByte",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_Byte",                                                                ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_UShort",                                                              ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_Short",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_UInt32",                                                              ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_Int32",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",   new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector64x4_Float",                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                 ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2SByte",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2Byte",                                                                                   ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2UShort",                                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2Short",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2UInt32",                                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2Int32",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2Float",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2",                                                                                                                                                     ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3SByte",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3Byte",                                                                                   ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3UShort",                                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3Short",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3UInt32",                                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3Int32",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3Float",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3",                                                                                                                                                     ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4SByte",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4Byte",                                                                                   ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4UShort",                                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4Short",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4UInt32",                                                                                 ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4Int32",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4Float",                                                                                  ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4",                                                                                                                                                     ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2SByteAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2ByteAndZip",                                                                             ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2UShortAndZip",                                                                           ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2ShortAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2UInt32AndZip",                                                                           ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2Int32AndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x2FloatAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x2AndZip",                                                                                                                                               ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3SByteAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3ByteAndZip",                                                                             ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3UShortAndZip",                                                                           ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3ShortAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3UInt32AndZip",                                                                           ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3Int32AndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x3FloatAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x3AndZip",                                                                                                                                               ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4SByteAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4ByteAndZip",                                                                             ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4UShortAndZip",                                                                           ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4ShortAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4UInt32AndZip",                                                                           ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4Int32AndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector64x4FloatAndZip",                                                                            ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector64x4AndZip",                                                                                                                                               ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "float",                                                               ["LargestVectorSize"] = "8",                                                                 ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
     ("VecBinOpTest.template",             new Dictionary<string, string> { ["TestName"] = "Subtract_Vector64_Byte",                                                                                ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "Subtract",                                                             ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "Helpers.Subtract(left[i], right[i]) != result[i]"}),
     ("VecBinOpTest.template",             new Dictionary<string, string> { ["TestName"] = "Subtract_Vector64_Int16",                                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "Subtract",                                                             ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "Helpers.Subtract(left[i], right[i]) != result[i]"}),
     ("VecBinOpTest.template",             new Dictionary<string, string> { ["TestName"] = "Subtract_Vector64_Int32",                                                                               ["Isa"] = "AdvSimd",       ["LoadIsa"] = "AdvSimd", ["Method"] = "Subtract",                                                             ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "Helpers.Subtract(left[i], right[i]) != result[i]"}),
@@ -2141,70 +2137,68 @@
     ("InsertSelectedScalarTest.template", new Dictionary<string, string> { ["TestName"] = "InsertSelectedScalar_Vector128_UInt32_3_Vector128_UInt32_3",                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "InsertSelectedScalar",                                                 ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                              ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex1"] = "3",       ["ElementIndex2"] = "3",        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "Helpers.Insert(firstOp, ElementIndex1, thirdOp[ElementIndex2], i) != result[i]"}),
     ("InsertSelectedScalarTest.template", new Dictionary<string, string> { ["TestName"] = "InsertSelectedScalar_Vector128_UInt64_1_Vector128_UInt64_1",                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "InsertSelectedScalar",                                                 ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                              ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",        ["ElementIndex1"] = "1",       ["ElementIndex2"] = "1",        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()",                        ["ValidateIterResult"] = "Helpers.Insert(firstOp, ElementIndex1, thirdOp[ElementIndex2], i) != result[i]"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128_Double",                                                                    ["Isa"] = "AdvSimd.Arm64",                          ["Method"] = "LoadAndReplicateToVector128",                                          ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",                                                                                                                                                      ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[i])"}),
-    // Tests disabled until mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_Byte_7",                                                              ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",                          ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_SByte_7",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_Int16_3",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_UInt16_3",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_Int32_1",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_UInt32_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_Int64_0",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int64",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()",         ["ElementIndex"] = "0",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_UInt64_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_Single_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                        ["ValidateIterResult"] = "(BitConverter.SingleToInt32Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.SingleToInt32Bits(result1[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.SingleToInt32Bits(result2[i]))"}),
-    // ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_Double_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Double",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()",                        ["ValidateIterResult"] = "(BitConverter.DoubleToInt64Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.DoubleToInt64Bits(result1[i])) || (BitConverter.DoubleToInt64Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.DoubleToInt64Bits(result2[i]))"}),    
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_Byte_7",                                                              ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",                          ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_SByte_7",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_Int16_3",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_UInt16_3",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_Int32_1",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_UInt32_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_Int64_0",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int64",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()",         ["ElementIndex"] = "0",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_UInt64_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_Single_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                        ["ValidateIterResult"] = "(BitConverter.SingleToInt32Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.SingleToInt32Bits(result1[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.SingleToInt32Bits(result2[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input3, ElementIndex, newData[2], i)) != BitConverter.SingleToInt32Bits(result3[i]))"}),
-    // ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_Double_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Double",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()",                        ["ValidateIterResult"] = "(BitConverter.DoubleToInt64Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.DoubleToInt64Bits(result1[i])) || (BitConverter.DoubleToInt64Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.DoubleToInt64Bits(result2[i])) || (BitConverter.DoubleToInt64Bits(Helpers.Insert(input3, ElementIndex, newData[2], i)) != BitConverter.DoubleToInt64Bits(result3[i]))"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_Byte_7",                                                              ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",                          ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_SByte_7",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_Int16_3",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_UInt16_3",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_Int32_1",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_UInt32_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_Int64_0",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int64",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()",         ["ElementIndex"] = "0",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_UInt64_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_Single_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                        ["ValidateIterResult"] = "(BitConverter.SingleToInt32Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.SingleToInt32Bits(result1[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.SingleToInt32Bits(result2[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input3, ElementIndex, newData[2], i)) != BitConverter.SingleToInt32Bits(result3[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input4, ElementIndex, newData[3], i)) != BitConverter.SingleToInt32Bits(result4[i]))"}),
-    // ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_Double_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Double",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()",                        ["ValidateIterResult"] = "(BitConverter.DoubleToInt64Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.DoubleToInt64Bits(result1[i])) || (BitConverter.DoubleToInt64Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.DoubleToInt64Bits(result2[i])) || (BitConverter.DoubleToInt64Bits(Helpers.Insert(input3, ElementIndex, newData[2], i)) != BitConverter.DoubleToInt64Bits(result3[i])) || (BitConverter.DoubleToInt64Bits(Helpers.Insert(input4, ElementIndex, newData[3], i)) != BitConverter.DoubleToInt64Bits(result4[i]))"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_Byte_7",                                                              ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",                          ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_SByte_7",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_Int16_3",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_UInt16_3",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_Int32_1",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_UInt32_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_Int64_0",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int64",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()",         ["ElementIndex"] = "0",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_UInt64_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_Single_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                        ["ValidateIterResult"] = "(BitConverter.SingleToInt32Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.SingleToInt32Bits(result1[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.SingleToInt32Bits(result2[i]))"}),
+    ("LoadAndInsertScalarx2Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x2_Double_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Double",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()",                        ["ValidateIterResult"] = "(BitConverter.DoubleToInt64Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.DoubleToInt64Bits(result1[i])) || (BitConverter.DoubleToInt64Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.DoubleToInt64Bits(result2[i]))"}),    
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_Byte_7",                                                              ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",                          ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_SByte_7",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_Int16_3",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_UInt16_3",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_Int32_1",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_UInt32_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_Int64_0",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int64",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()",         ["ElementIndex"] = "0",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_UInt64_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i])  || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_Single_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                        ["ValidateIterResult"] = "(BitConverter.SingleToInt32Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.SingleToInt32Bits(result1[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.SingleToInt32Bits(result2[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input3, ElementIndex, newData[2], i)) != BitConverter.SingleToInt32Bits(result3[i]))"}),
+    ("LoadAndInsertScalarx3Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x3_Double_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Double",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()",                        ["ValidateIterResult"] = "(BitConverter.DoubleToInt64Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.DoubleToInt64Bits(result1[i])) || (BitConverter.DoubleToInt64Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.DoubleToInt64Bits(result2[i])) || (BitConverter.DoubleToInt64Bits(Helpers.Insert(input3, ElementIndex, newData[2], i)) != BitConverter.DoubleToInt64Bits(result3[i]))"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_Byte_7",                                                              ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",                          ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_SByte_7",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["ElementIndex"] = "7",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_Int16_3",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_UInt16_3",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["ElementIndex"] = "3",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_Int32_1",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",         ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt32()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_UInt32_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_Int64_0",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Int64",   ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()",         ["ElementIndex"] = "0",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetInt64()",                         ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_UInt64_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()",                        ["ValidateIterResult"] = "(Helpers.Insert(input1, ElementIndex, newData[0], i) != result1[i]) || (Helpers.Insert(input2, ElementIndex, newData[1], i) != result2[i]) || (Helpers.Insert(input3, ElementIndex, newData[2], i) != result3[i])  || (Helpers.Insert(input4, ElementIndex, newData[3], i) != result4[i])"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_Single_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                        ["ValidateIterResult"] = "(BitConverter.SingleToInt32Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.SingleToInt32Bits(result1[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.SingleToInt32Bits(result2[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input3, ElementIndex, newData[2], i)) != BitConverter.SingleToInt32Bits(result3[i])) || (BitConverter.SingleToInt32Bits(Helpers.Insert(input4, ElementIndex, newData[3], i)) != BitConverter.SingleToInt32Bits(result4[i]))"}),
+    ("LoadAndInsertScalarx4Test.template",  new Dictionary<string, string> { ["TestName"] = "LoadAndInsertScalar_Vector128x4_Double_1",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndInsertScalar",                                                  ["RetVectorType"] = "Vector128",  ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector128",  ["Op1BaseType"] = "Double",                                                                                                                           ["LargestVectorSize"] = "16",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["ElementIndex"] = "1",                                        ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()",                        ["ValidateIterResult"] = "(BitConverter.DoubleToInt64Bits(Helpers.Insert(input1, ElementIndex, newData[0], i)) != BitConverter.DoubleToInt64Bits(result1[i])) || (BitConverter.DoubleToInt64Bits(Helpers.Insert(input2, ElementIndex, newData[1], i)) != BitConverter.DoubleToInt64Bits(result2[i])) || (BitConverter.DoubleToInt64Bits(Helpers.Insert(input3, ElementIndex, newData[2], i)) != BitConverter.DoubleToInt64Bits(result3[i])) || (BitConverter.DoubleToInt64Bits(Helpers.Insert(input4, ElementIndex, newData[3], i)) != BitConverter.DoubleToInt64Bits(result4[i]))"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128_Int64",                                                                     ["Isa"] = "AdvSimd.Arm64",                          ["Method"] = "LoadAndReplicateToVector128",                                          ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64",   ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()",                                                                                                                                                       ["ValidateIterResult"] = "firstOp[0] != result[i]"}),
     ("LoadUnOpTest.template",             new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128_UInt64",                                                                    ["Isa"] = "AdvSimd.Arm64",                          ["Method"] = "LoadAndReplicateToVector128",                                          ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",                                                                                                                                                      ["ValidateIterResult"] = "firstOp[0] != result[i]"}),
-    // Tests disabled until mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2SByte",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2Byte",                                                                     ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2UShort",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2Short",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2UInt32",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2Int32",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2Int64",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2UInt64",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),    
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2Float",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2Double",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),    
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3SByte",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3Byte",                                                                     ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3UShort",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3Short",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3UInt32",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3Int32",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3Int64",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3UInt64",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),        
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3Float",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3Double",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),        
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4SByte",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4Byte",                                                                     ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4UShort",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4Short",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4UInt32",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4Int32",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4Int64",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4UInt64",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4Float",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),    
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4Double",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),            
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2SByte",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2Byte",                                                                     ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2UShort",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2Short",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2UInt32",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2Int32",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2Int64",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2UInt64",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),    
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2Float",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x2Double",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x2",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1]"}),    
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3SByte",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3Byte",                                                                     ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3UShort",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3Short",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3UInt32",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3Int32",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3Int64",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3UInt64",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),        
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3Float",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x3Double",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x3",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2]"}),        
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4SByte",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4Byte",                                                                     ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4UShort",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4Short",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4UInt32",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4Int32",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4Int64",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4UInt64",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4Float",                                                                    ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),    
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadAndReplicateToVector128x4Double",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadAndReplicateToVector128x4",                                                                                                      ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[0] || result2[i] != input[1] || result3[i] != input[2] || result4[i] != input[3]"}),            
     ("LoadPairVectorTest.template",       new Dictionary<string, string> { ["TestName"] = "LoadPairScalarVector64_Int32",                                                                          ["Isa"] = "AdvSimd.Arm64",                          ["Method"] = "LoadPairScalarVector64",                                               ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",                                                                                                                                                       ["ValidateIterResult"] = "Helpers.LoadPairScalar(firstOp, i) != result[i]"}),
     ("LoadPairVectorTest.template",       new Dictionary<string, string> { ["TestName"] = "LoadPairScalarVector64_Single",                                                                         ["Isa"] = "AdvSimd.Arm64",                          ["Method"] = "LoadPairScalarVector64",                                               ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Single",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",                                                                                                                                                      ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.LoadPairScalar(firstOp, i)) != BitConverter.SingleToInt32Bits(result[i])"}),
     ("LoadPairVectorTest.template",       new Dictionary<string, string> { ["TestName"] = "LoadPairScalarVector64_UInt32",                                                                         ["Isa"] = "AdvSimd.Arm64",                          ["Method"] = "LoadPairScalarVector64",                                               ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",                                                                                                                                                      ["ValidateIterResult"] = "Helpers.LoadPairScalar(firstOp, i) != result[i]"}),
@@ -2251,67 +2245,66 @@
     ("LoadPairVectorTest.template",       new Dictionary<string, string> { ["TestName"] = "LoadPairVector128NonTemporal_UInt16",                                                                   ["Isa"] = "AdvSimd.Arm64",                          ["Method"] = "LoadPairVector128NonTemporal",                                         ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",                                                                                                                                                      ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
     ("LoadPairVectorTest.template",       new Dictionary<string, string> { ["TestName"] = "LoadPairVector128NonTemporal_UInt32",                                                                   ["Isa"] = "AdvSimd.Arm64",                          ["Method"] = "LoadPairVector128NonTemporal",                                         ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",                                                                                                                                                      ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
     ("LoadPairVectorTest.template",       new Dictionary<string, string> { ["TestName"] = "LoadPairVector128NonTemporal_UInt64",                                                                   ["Isa"] = "AdvSimd.Arm64",                          ["Method"] = "LoadPairVector128NonTemporal",                                         ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",                                                                                                                                                      ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
-    // Tests disabled until mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2SByte",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 16]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2Byte",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 16]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2UShort",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2Short",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2UInt32",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2Int32",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2UInt64",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2Int64",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2Float",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2Double",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3SByte",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 16] || result3[i] != input[i + 16 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3Byte",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 16] || result3[i] != input[i + 16 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3UShort",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]  || result3[i] != input[i + 8 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3Short",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]  || result3[i] != input[i + 8 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3UInt32",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]  || result3[i] != input[i + 4 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3Int32",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]  || result3[i] != input[i + 4 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3UInt64",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]  || result3[i] != input[i + 2 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3Int64",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]  || result3[i] != input[i + 2 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3Float",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]  || result3[i] != input[i + 4 * 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3Double",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]  || result3[i] != input[i + 2 * 2]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4SByte",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 16] || result3[i] != input[i + 16 * 2] || result4[i] != input[i + 16 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4Byte",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 16] || result3[i] != input[i + 16 * 2] || result4[i] != input[i + 16 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4UShort",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]  || result3[i] != input[i + 8 * 2]  || result4[i] != input[i + 8 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4Short",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]  || result3[i] != input[i + 8 * 2]  || result4[i] != input[i + 8 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4UInt32",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]  || result3[i] != input[i + 4 * 2]  || result4[i] != input[i + 4 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4Int32",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]  || result3[i] != input[i + 4 * 2]  || result4[i] != input[i + 4 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4UInt64",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]  || result3[i] != input[i + 2 * 2]  || result4[i] != input[i + 2 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4Int64",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]  || result3[i] != input[i + 2 * 2]  || result4[i] != input[i + 2 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4Float",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]  || result3[i] != input[i + 4 * 2]  || result4[i] != input[i + 4 * 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4Double",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]  || result3[i] != input[i + 2 * 2]  || result4[i] != input[i + 2 * 3]"}),    
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipSByte",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipByte",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipUShort",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipShort",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipUInt32",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipInt32",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipUInt64",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipInt64",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipFloat",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipDouble",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipSByte",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipByte",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipUShort",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipShort",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipUInt32",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipInt32",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipUInt64",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipInt64",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipFloat",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipDouble",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipSByte",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipByte",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipUShort",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipShort",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipUInt32",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipInt32",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipUInt64",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipInt64",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipFloat",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
-    // ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipDouble",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),    
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2SByte",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 16]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2Byte",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 16]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2UShort",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2Short",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2UInt32",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2Int32",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2UInt64",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2Int64",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2Float",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2Double",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3SByte",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 16] || result3[i] != input[i + 16 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3Byte",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 16] || result3[i] != input[i + 16 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3UShort",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]  || result3[i] != input[i + 8 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3Short",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]  || result3[i] != input[i + 8 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3UInt32",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]  || result3[i] != input[i + 4 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3Int32",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]  || result3[i] != input[i + 4 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3UInt64",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]  || result3[i] != input[i + 2 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3Int64",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]  || result3[i] != input[i + 2 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3Float",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]  || result3[i] != input[i + 4 * 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3Double",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]  || result3[i] != input[i + 2 * 2]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4SByte",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 16] || result3[i] != input[i + 16 * 2] || result4[i] != input[i + 16 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4Byte",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 16] || result3[i] != input[i + 16 * 2] || result4[i] != input[i + 16 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4UShort",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]  || result3[i] != input[i + 8 * 2]  || result4[i] != input[i + 8 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4Short",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 8]  || result3[i] != input[i + 8 * 2]  || result4[i] != input[i + 8 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4UInt32",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]  || result3[i] != input[i + 4 * 2]  || result4[i] != input[i + 4 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4Int32",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]  || result3[i] != input[i + 4 * 2]  || result4[i] != input[i + 4 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4UInt64",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]  || result3[i] != input[i + 2 * 2]  || result4[i] != input[i + 2 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4Int64",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]  || result3[i] != input[i + 2 * 2]  || result4[i] != input[i + 2 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4Float",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 4]  || result3[i] != input[i + 4 * 2]  || result4[i] != input[i + 4 * 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4Double",                                                                         ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4",                                                                                                            ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i] || result2[i] != input[i + 2]  || result3[i] != input[i + 2 * 2]  || result4[i] != input[i + 2 * 3]"}),    
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipSByte",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipByte",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipUShort",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipShort",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipUInt32",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipInt32",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipUInt64",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipInt64",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipFloat",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx2Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x2AndUnzipDouble",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x2AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 2] || result2[i] != input[(i * 2) + 1]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipSByte",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipByte",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipUShort",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipShort",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipUInt32",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipInt32",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipUInt64",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipInt64",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipFloat",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx3Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x3AndUnzipDouble",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x3AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 3] || result2[i] != input[(i * 3) + 1] || result3[i] != input[(i * 3) + 2]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipSByte",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipByte",                                                                   ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipUShort",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipShort",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipUInt32",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipInt32",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipUInt64",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipInt64",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipFloat",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "float",                                                                                                                            ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),
+    ("LoadVectorx4Test.template",         new Dictionary<string, string> { ["TestName"] = "LoadVector128x4AndUnzipDouble",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "LoadVector128x4AndUnzip",                                                                                                                    ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "double",                                                                                                                           ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "result1[i] != input[i * 4] || result2[i] != input[(i * 4) + 1] || result3[i] != input[(i * 4) + 2] || result4[i] != input[(i * 4) + 3]"}),    
     ("VecBinOpTest.template",             new Dictionary<string, string> { ["TestName"] = "Max_Vector128_Double",                                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "Max",                                                                  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double",                                                             ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.Max(left[i], right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}),
     ("VecReduceUnOpTest.template",        new Dictionary<string, string> { ["TestName"] = "MaxAcross_Vector64_Byte",                                                                               ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "MaxAcross",                                                            ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Byte",                                                                                                                             ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",                                                                                                                                                        ["ValidateReduceOpResult"] = "Helpers.MaxAcross(firstOp) != result[0]",                                                                                                                           ["ValidateRemainingResults"] = "result[i] != 0"}),
     ("VecReduceUnOpTest.template",        new Dictionary<string, string> { ["TestName"] = "MaxAcross_Vector64_Int16",                                                                              ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "MaxAcross",                                                            ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int16",                                                                                                                            ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",                                                                                                                                                       ["ValidateReduceOpResult"] = "Helpers.MaxAcross(firstOp) != result[0]",                                                                                                                           ["ValidateRemainingResults"] = "result[i] != 0"}),
@@ -2549,97 +2542,96 @@
     ("StoreBinOpTest.template",           new Dictionary<string, string> { ["TestName"] = "StorePairNonTemporal_Vector128_UInt16",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StorePairNonTemporal",                                                                                             ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",        ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "Helpers.Concat(firstOp, secondOp, i) != result[i]"}),
     ("StoreBinOpTest.template",           new Dictionary<string, string> { ["TestName"] = "StorePairNonTemporal_Vector128_UInt32",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StorePairNonTemporal",                                                                                             ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "Helpers.Concat(firstOp, secondOp, i) != result[i]"}),
     ("StoreBinOpTest.template",           new Dictionary<string, string> { ["TestName"] = "StorePairNonTemporal_Vector128_UInt64",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StorePairNonTemporal",                                                                                             ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",        ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "Helpers.Concat(firstOp, secondOp, i) != result[i]"}),
-    // Tests disabled until mono implements these APIs. See https://github.com/dotnet/runtime/issues/93081    
-    // ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_SByte",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_Byte",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_UShort",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_Short",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_UInt32",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_Int32",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_UInt64",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_Int64",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_Float",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_Double",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
-    // ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_SByte",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_Byte",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_UShort",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_Short",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_UInt32",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_Int32",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_UInt64",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_Int64",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_Float",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_Double",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
-    // ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_SByte",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_Byte",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_UShort",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_Short",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_UInt32",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_Int32",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_UInt64",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_Int64",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_Float",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_Double",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2SByte",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Byte",                                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2UShort",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Short",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2UInt32",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Int32",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2UInt64",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Int64",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Float",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Double",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3SByte",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Byte",                                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3UShort",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Short",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3UInt32",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Int32",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3UInt64",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Int64",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Float",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Double",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4SByte",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Byte",                                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4UShort",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Short",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4UInt32",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Int32",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4UInt64",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Int64",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Float",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Double",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2SByteAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2ByteAndZip",                                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2UShortAndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2ShortAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2UInt32AndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Int32AndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2UInt64AndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Int64AndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2FloatAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2DoubleAndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3SByteAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3ByteAndZip",                                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3UShortAndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3ShortAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3UInt32AndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Int32AndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3UInt64AndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Int64AndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3FloatAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3DoubleAndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4SByteAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4ByteAndZip",                                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4UShortAndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4ShortAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4UInt32AndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Int32AndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4UInt64AndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Int64AndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4FloatAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
-    // ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4DoubleAndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_SByte",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_Byte",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_UShort",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_Short",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_UInt32",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_Int32",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_UInt64",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_Int64",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_Float",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx2Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x2_Double",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1]"}),
+    ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_SByte",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_Byte",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_UShort",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_Short",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_UInt32",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_Int32",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_UInt64",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_Int64",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_Float",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx3Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x3_Double",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2]"}),
+    ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_SByte",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_Byte",                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_UShort",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_Short",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_UInt32",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_Int32",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_UInt64",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_Int64",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_Float",                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreSelectedScalarx4Test.template",     new Dictionary<string, string> { ["TestName"] = "StoreSelectedScalar_Vector128x4_Double",                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreSelectedScalar",                                                                                                                                                            ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateResult"] = "input1[index] != result[0] || input2[index] != result[1] || input3[index] != result[2] || input4[index] != result[3]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2SByte",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Byte",                                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2UShort",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Short",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2UInt32",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Int32",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2UInt64",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Int64",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Float",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Double",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2",                                                                                                                                                               ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3SByte",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Byte",                                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3UShort",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Short",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3UInt32",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Int32",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3UInt64",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Int64",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Float",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Double",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3",                                                                                                                                                               ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4SByte",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Byte",                                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4UShort",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Short",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4UInt32",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Int32",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4UInt64",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Int64",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Float",                                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Double",                                                                                ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4",                                                                                                                                                               ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i] || input2[i] != result[OpElementCount + i] || input3[i] != result[(OpElementCount * 2) + i] || input4[i] != result[(OpElementCount * 3) + i]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2SByteAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2ByteAndZip",                                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2UShortAndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2ShortAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2UInt32AndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Int32AndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2UInt64AndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2Int64AndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2FloatAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx2Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x2DoubleAndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x2AndZip",                                                                                                                                                         ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 2] || input2[i] != result[(i * 2) + 1]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3SByteAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3ByteAndZip",                                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3UShortAndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3ShortAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3UInt32AndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Int32AndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3UInt64AndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3Int64AndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3FloatAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx3Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x3DoubleAndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x3AndZip",                                                                                                                                                         ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 3] || input2[i] != result[(i * 3) + 1] || input3[i] != result[(i * 3) + 2]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4SByteAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4ByteAndZip",                                                                            ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",                                                     ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4UShortAndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4ShortAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4UInt32AndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Int32AndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4UInt64AndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4Int64AndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                        ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4FloatAndZip",                                                                           ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "float",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "float",                                                    ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
+    ("StoreVectorx4Test.template",        new Dictionary<string, string> { ["TestName"] = "StoreVector128x4DoubleAndZip",                                                                          ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "StoreVector128x4AndZip",                                                                                                                                                         ["Op1BaseType"] = "double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "double",                                                   ["LargestVectorSize"] = "16",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "input1[i] != result[i * 4] || input2[i] != result[(i * 4) + 1] || input3[i] != result[(i * 4) + 2] || input4[i] != result[(i * 4) + 3]"}),
     ("VecBinOpTest.template",             new Dictionary<string, string> { ["TestName"] = "Subtract_Vector128_Double",                                                                             ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "Subtract",                                                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                       ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.Subtract(left[i], right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}),
     ("SimpleBinOpTest.template",          new Dictionary<string, string> { ["TestName"] = "SubtractSaturateScalar_Vector64_Byte",                                                                  ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "SubtractSaturateScalar",                                               ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",          ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                         ["ValidateFirstResult"] = "Helpers.SubtractSaturate(left[0], right[0]) != result[0]",                                                                                                             ["ValidateRemainingResults"] = "result[i] != 0"}),
     ("SimpleBinOpTest.template",          new Dictionary<string, string> { ["TestName"] = "SubtractSaturateScalar_Vector64_Int16",                                                                 ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "SubtractSaturateScalar",                                               ["RetVectorType"] = "Vector64",  ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector64",  ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector64",  ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",         ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                        ["ValidateFirstResult"] = "Helpers.SubtractSaturate(left[0], right[0]) != result[0]",                                                                                                             ["ValidateRemainingResults"] = "result[i] != 0"}),
@@ -2893,9 +2885,18 @@
     ("SecureHashTernOpTest.template",     new Dictionary<string, string> { ["TestName"] = "ScheduleUpdate1_Vector128_UInt32",                                                                      ["Isa"] = "Sha256",        ["LoadIsa"] = "AdvSimd", ["Method"] = "ScheduleUpdate1",                                                      ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "0x00112233",                               ["NextValueOp2"] = "0x44556677",                               ["NextValueOp3"] = "0x8899AABB",                               ["ExpectedResult"] = "{0x248F1BDF, 0x248F1BDF, 0xB303DDBA, 0xF74821FE}"}),
 };
 
-(string templateFileName, Dictionary<string, string> templateData)[] SveInputs = Array.Empty<(string templateFileName, Dictionary<string, string> templateData)>();
+(string templateFileName, Dictionary<string, string> templateData)[] SveInputs = new []
 {
-    //TODO-SVE: Add SVE tests
+    ("SveLoadMaskedUnOpTest.template",    new Dictionary<string, string> { ["TestName"] = "SveLoadVector_float",                                                                                   ["Isa"] = "Sve",                                    ["Method"] = "LoadVector",                                                           ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single",                                  ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "8",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                        ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
+    ("SveLoadMaskedUnOpTest.template",    new Dictionary<string, string> { ["TestName"] = "SveLoadVector_double",                                                                                  ["Isa"] = "Sve",                                    ["Method"] = "LoadVector",                                                           ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double",                                  ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "8",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                        ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
+    ("SveLoadMaskedUnOpTest.template",    new Dictionary<string, string> { ["TestName"] = "SveLoadVector_sbyte",                                                                                   ["Isa"] = "Sve",                                    ["Method"] = "LoadVector",                                                           ["RetVectorType"] = "Vector",    ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "SByte",                                   ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "8",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                         ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
+    ("SveLoadMaskedUnOpTest.template",    new Dictionary<string, string> { ["TestName"] = "SveLoadVector_short",                                                                                   ["Isa"] = "Sve",                                    ["Method"] = "LoadVector",                                                           ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Int16",                                   ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "8",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                         ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
+    ("SveLoadMaskedUnOpTest.template",    new Dictionary<string, string> { ["TestName"] = "SveLoadVector_int",                                                                                     ["Isa"] = "Sve",                                    ["Method"] = "LoadVector",                                                           ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Int32",                                   ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "8",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                         ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
+    ("SveLoadMaskedUnOpTest.template",    new Dictionary<string, string> { ["TestName"] = "SveLoadVector_long",                                                                                    ["Isa"] = "Sve",                                    ["Method"] = "LoadVector",                                                           ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Int64",   ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Int64",                                   ["Op2BaseType"] = "Int64",                                                               ["LargestVectorSize"] = "8",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                                                         ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
+    ("SveLoadMaskedUnOpTest.template",    new Dictionary<string, string> { ["TestName"] = "SveLoadVector_byte",                                                                                    ["Isa"] = "Sve",                                    ["Method"] = "LoadVector",                                                           ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Byte",    ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Byte",                                    ["Op2BaseType"] = "Byte",                                                                ["LargestVectorSize"] = "8",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                                                          ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
+    ("SveLoadMaskedUnOpTest.template",    new Dictionary<string, string> { ["TestName"] = "SveLoadVector_ushort",                                                                                  ["Isa"] = "Sve",                                    ["Method"] = "LoadVector",                                                           ["RetVectorType"] = "Vector",    ["RetBaseType"] = "UInt16",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "UInt16",                                  ["Op2BaseType"] = "UInt16",                                                              ["LargestVectorSize"] = "8",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",                                                                                        ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
+    ("SveLoadMaskedUnOpTest.template",    new Dictionary<string, string> { ["TestName"] = "SveLoadVector_uint",                                                                                    ["Isa"] = "Sve",                                    ["Method"] = "LoadVector",                                                           ["RetVectorType"] = "Vector",    ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "UInt32",                                  ["Op2BaseType"] = "UInt32",                                                              ["LargestVectorSize"] = "8",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                                                        ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
+    ("SveLoadMaskedUnOpTest.template",    new Dictionary<string, string> { ["TestName"] = "SveLoadVector_ulong",                                                                                   ["Isa"] = "Sve",                                    ["Method"] = "LoadVector",                                                           ["RetVectorType"] = "Vector",    ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "UInt64",                                  ["Op2BaseType"] = "UInt64",                                                              ["LargestVectorSize"] = "8",                                                                ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                                                        ["ValidateIterResult"] = "firstOp[i] != result[i]"}),
 };
 
 
diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs
index 6e2840329342..20b8942132da 100644
--- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs
+++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs
@@ -1522,6 +1522,93 @@
     ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Add", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
     ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Add", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
     ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Add", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Add", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Add", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Add", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Subtract", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Subtract", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Subtract", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Subtract", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Subtract", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Subtract", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Multiply", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Multiply", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Multiply", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Multiply", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Multiply", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Multiply", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Divide", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Divide", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Divide", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Divide", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Divide", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Divide", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Scale", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Scale", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Scale", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Scale", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Scale", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Scale", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Sqrt", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.DoubleToUInt64Bits", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Sqrt", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.DoubleToUInt64Bits", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Sqrt", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.DoubleToUInt64Bits", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Sqrt", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Sqrt", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "Sqrt", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256Int32", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256Int32", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256Int32", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Single", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Single", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Single", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256Single", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256Single", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256Single", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Int32", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Int32", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Int32", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512UInt32", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512UInt32", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512UInt32", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256UInt32", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256UInt32", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256UInt32", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAdd", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAdd", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAdd", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAdd", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAdd", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAdd", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddNegated", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddNegated", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddNegated", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddNegated", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddNegated", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddNegated", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddSubtract", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddSubtract", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddSubtract", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddSubtract", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddSubtract", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddSubtract", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtract", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtract", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtract", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtract", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtract", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtract", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractAdd", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractAdd", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractAdd", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractAdd", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractAdd", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractAdd", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractNegated", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractNegated", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractNegated", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractNegated", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractNegated", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractNegated", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
 };
 
 (string templateFileName, Dictionary<string, string> templateData)[] Avx512F_ScalarUpperInputs = new []
@@ -1544,6 +1631,69 @@
     ("ImmBinOpTest.template",          new Dictionary<string, string> { ["Isa"] = "Avx512F",            ["LoadIsa"] = "Avx512F", ["Method"] = "RoundScaleScalar",                         ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                      ["Imm"] = "2",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                             ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(MathF.Ceiling(right[0]))",                                                                                                                                               ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
     ("ImmBinOpTest.template",          new Dictionary<string, string> { ["Isa"] = "Avx512F",            ["LoadIsa"] = "Avx512F", ["Method"] = "RoundScaleScalar",                         ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double",                                                                                      ["Imm"] = "3",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                             ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((right[0] > 0) ? Math.Floor(right[0]) : Math.Ceiling(right[0]))",                                                                                                        ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(left[i])"}),
     ("ImmBinOpTest.template",          new Dictionary<string, string> { ["Isa"] = "Avx512F",            ["LoadIsa"] = "Avx512F", ["Method"] = "RoundScaleScalar",                         ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                      ["Imm"] = "3",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                             ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((right[0] > 0) ? MathF.Floor(right[0]) : MathF.Ceiling(right[0]))",                                                                                                      ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "AddScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "AddScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "AddScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "AddScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "AddScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "AddScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "DivideScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "DivideScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "DivideScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "DivideScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "DivideScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "DivideScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "SubtractScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "SubtractScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "SubtractScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "SubtractScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "SubtractScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "SubtractScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "SqrtScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "SqrtScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "SqrtScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "SqrtScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "SqrtScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "SqrtScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ScaleScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ScaleScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ScaleScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ScaleScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ScaleScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ScaleScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertScalarToVector128Single", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "1.0", ["FixedInput2"] = "15.0"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertScalarToVector128Single", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "1.0", ["FixedInput2"] = "15.0"}),
+    ("SimpleBinOpEmbRounding.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertScalarToVector128Single", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "1.0", ["FixedInput2"] = "15.0"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddNegatedScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddNegatedScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddNegatedScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddNegatedScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddNegatedScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplyAddNegatedScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractNegatedScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractNegatedScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractNegatedScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["CastingMethod"] = "DoubleToUInt64Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractNegatedScalar", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractNegatedScalar", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
+    ("SimpleTernOpEmbRounding.template",      new Dictionary<string, string> { ["Isa"] = "Avx512F",     ["LoadIsa"] = "Avx512F", ["Method"] = "FusedMultiplySubtractNegatedScalar", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["CastingMethod"] = "SingleToUInt32Bits", ["FixedInput1"] = "0.05", ["FixedInput2"] = "0.45", ["FixedInput3"] = "0.75"}),
 };
 
 (string templateFileName, Dictionary<string, string> templateData)[] Avx512F_VL_Vector128Inputs = new []
@@ -2210,6 +2360,30 @@
     ("ImmUnOpTest.template",           new Dictionary<string, string> { ["Isa"] = "Avx512DQ",           ["LoadIsa"] = "Avx512F", ["Method"] = "ReduceScalar",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single",                                                                                                                                                   ["Imm"] = "16",  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",                                                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(Avx512Verify.Reduce(firstOp[0], 1)) != BitConverter.SingleToInt32Bits(result[0])",                                                                                                                                    ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i])"}),
     ("SimpleBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx512DQ",           ["LoadIsa"] = "Avx512F", ["Method"] = "Xor",                                      ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double",                                                                                                       ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                             ["ValidateFirstResult"] = "(BitConverter.DoubleToInt64Bits(left[0]) ^ BitConverter.DoubleToInt64Bits(right[0])) != BitConverter.DoubleToInt64Bits(result[0])",                                                                                                                  ["ValidateRemainingResults"] = "(BitConverter.DoubleToInt64Bits(left[i]) ^ BitConverter.DoubleToInt64Bits(right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}),
     ("SimpleBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx512DQ",           ["LoadIsa"] = "Avx512F", ["Method"] = "Xor",                                      ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single",                                                                                                       ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                             ["ValidateFirstResult"] = "(BitConverter.SingleToInt32Bits(left[0]) ^ BitConverter.SingleToInt32Bits(right[0])) != BitConverter.SingleToInt32Bits(result[0])",                                                                                                                  ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(left[i]) ^ BitConverter.SingleToInt32Bits(right[i])) != BitConverter.SingleToInt32Bits(result[i])"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256Single", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "10"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256Single", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "10"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256Single", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "10"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256Single", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "10"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256Single", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "10"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector256Single", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.SingleToUInt32Bits", ["FixedInput"] = "10"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Double", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.DoubleToUInt64Bits", ["FixedInput"] = "10"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Double", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.DoubleToUInt64Bits", ["FixedInput"] = "10"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Double", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.DoubleToUInt64Bits", ["FixedInput"] = "10"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Double", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.DoubleToUInt64Bits", ["FixedInput"] = "10"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Double", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.DoubleToUInt64Bits", ["FixedInput"] = "10"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Double", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["CastingMethod"] = "BitConverter.DoubleToUInt64Bits", ["FixedInput"] = "10"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Int64", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Int64", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Int64", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Int64", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Int64", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512Int64", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512UInt64", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512UInt64", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512UInt64", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512UInt64", ["RoundingMode"] = "ToNegativeInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512UInt64", ["RoundingMode"] = "ToPositiveInfinity",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
+    ("SimpleUnaryOpEmbRounding.template",     new Dictionary<string, string> { ["Isa"] = "Avx512DQ",     ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512UInt64", ["RoundingMode"] = "ToZero",  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["CastingMethod"] = "(ulong)", ["FixedInput"] = "29.37"}),
 };
 
 (string templateFileName, Dictionary<string, string> templateData)[] Avx512DQ_ScalarUpperInputs = new []
@@ -2634,12 +2808,25 @@ void ProcessInput(StreamWriter testListFile, string groupName, (string templateF
         testName += ".Tuple3Op";
         suffix += "Tuple3Op";
     }
+    else if (input.templateFileName == "SimpleTernOpEmbRounding.template")
+    {
+        testName += ".EmbeddedRounding";
+        testName += $".{input.templateData["RoundingMode"]}";
+        suffix += "EmbeddedRounding";
+    }
     else if (input.templateFileName == "SimpleBinOpEmbRounding.template")
     {
         testName += ".EmbeddedRounding";
         testName += $".{input.templateData["RoundingMode"]}";
         suffix += "EmbeddedRounding";
     }
+    else if (input.templateFileName == "SimpleUnaryOpEmbRounding.template")
+    {
+        testName +=$".{input.templateData["Op1BaseType"]}";
+        testName += ".EmbeddedRounding";
+        testName += $".{input.templateData["RoundingMode"]}";
+        suffix += "EmbeddedRounding";
+    }
 
     var fileName = Path.Combine(outputDirectory, $"{testName}.cs");
 
diff --git a/src/tests/Common/XUnitWrapperGenerator/RuntimeTestModes.cs b/src/tests/Common/XUnitWrapperGenerator/RuntimeTestModes.cs
index e5a48a348d15..f4d60288ca6e 100644
--- a/src/tests/Common/XUnitWrapperGenerator/RuntimeTestModes.cs
+++ b/src/tests/Common/XUnitWrapperGenerator/RuntimeTestModes.cs
@@ -22,9 +22,9 @@ public enum RuntimeTestModes
         JitMinOpts = 1 << 3, // DOTNET_JITMinOpts is set.
         TailcallStress = 1 << 4, // DOTNET_TailcallStress is set.
 
-        // ZapDisable says to not use NGEN or ReadyToRun images.
+        // DisableR2R says to not use ReadyToRun images.
         // This means we JIT everything.
-        ZapDisable = 1 << 5, // DOTNET_ZapDisable is set.
+        DisableR2R = 1 << 5, // DOTNET_ReadyToRun=0
 
         // GCStress3 forces a GC at various locations, typically transitions
         // to/from the VM from managed code.
@@ -33,6 +33,15 @@ public enum RuntimeTestModes
         // GCStressC forces a GC at every JIT-generated code instruction,
         // including in NGEN/ReadyToRun code.
         GCStressC = 1 << 7, // DOTNET_GCStress includes mode 0xC.
-        AnyGCStress = GCStress3 | GCStressC // Disable when any GCStress is exercised.
+        AnyGCStress = GCStress3 | GCStressC, // Disable when any GCStress is exercised.
+        // TieredCompilation is on by default, but can cause some tests to fail
+        // As TieredCompilation is on by default, it does not count as a stress mode for RegularRun.
+        TieredCompilation = 1 << 8, // DOTNET_TieredCompilation (or COMPlus_TieredCompilation) is not set to 0.
+
+        AnyJitStress = JitStress | JitStressRegs | JitMinOpts | TailcallStress, // Disable when any JIT stress mode is exercised.
+
+        AnyJitOptimizationStress = AnyJitStress | TieredCompilation, // Disable when any JIT non-full optimization stress mode is exercised.
+
+        HeapVerify = 1 << 9, // DOTNET_HeapVerify (or COMPlus_HeapVerify) is set.
     }
 }
diff --git a/src/tests/Common/XUnitWrapperGenerator/XUnitWrapperGenerator.cs b/src/tests/Common/XUnitWrapperGenerator/XUnitWrapperGenerator.cs
index c5a94972b709..06cfdf3022c1 100644
--- a/src/tests/Common/XUnitWrapperGenerator/XUnitWrapperGenerator.cs
+++ b/src/tests/Common/XUnitWrapperGenerator/XUnitWrapperGenerator.cs
@@ -806,6 +806,10 @@ private static IEnumerable<ITestInfo> GetTestMethodInfosForMethod(IMethodSymbol
                         // If we're building tests not for Mono, we can skip handling the specifics of the SkipOnMonoAttribute.
                         continue;
                     }
+                    if (filterAttribute.ConstructorArguments.Length <= 1)
+                    {
+                        return ImmutableArray<ITestInfo>.Empty;
+                    }
                     testInfos = DecorateWithSkipOnPlatform(testInfos, (int)filterAttribute.ConstructorArguments[1].Value!, options);
                     break;
                 case "Xunit.SkipOnPlatformAttribute":
@@ -892,6 +896,10 @@ private static ImmutableArray<ITestInfo> DecorateWithSkipOnCoreClrConfiguration(
         {
             conditions.Add($"{ConditionClass}.IsStressTest");
         }
+        if (skippedTestModes.HasFlag(Xunit.RuntimeTestModes.DisableR2R))
+        {
+            conditions.Add($"!{ConditionClass}.IsDisableR2R");
+        }
         if (skippedTestModes.HasFlag(Xunit.RuntimeTestModes.JitStress))
         {
             conditions.Add($"!{ConditionClass}.IsJitStress");
@@ -908,9 +916,13 @@ private static ImmutableArray<ITestInfo> DecorateWithSkipOnCoreClrConfiguration(
         {
             conditions.Add($"!{ConditionClass}.IsTailcallStress");
         }
-        if (skippedTestModes.HasFlag(Xunit.RuntimeTestModes.ZapDisable))
+        if (skippedTestModes.HasFlag(Xunit.RuntimeTestModes.TieredCompilation))
+        {
+            conditions.Add($"!{ConditionClass}.IsTieredCompilation");
+        }
+        if (skippedTestModes.HasFlag(Xunit.RuntimeTestModes.HeapVerify))
         {
-            conditions.Add($"!{ConditionClass}.IsZapDisable");
+            conditions.Add($"!{ConditionClass}.IsHeapVerify");
         }
 
         if (skippedTestModes.HasFlag(Xunit.RuntimeTestModes.AnyGCStress))
diff --git a/src/tests/Common/helixpublishwitharcade.proj b/src/tests/Common/helixpublishwitharcade.proj
index 557e5e55beea..b5d515334389 100644
--- a/src/tests/Common/helixpublishwitharcade.proj
+++ b/src/tests/Common/helixpublishwitharcade.proj
@@ -413,8 +413,8 @@
       <HelixCommandLines Include="$(_WorkaroundForNuGetMigrations)" />
 
       <!-- Force assemblies to lazy-load for LLVM AOT test runs to enable using tests that fail at AOT time (and as a result can't be AOTd) -->
-      <HelixCommandLines Condition="'$(RuntimeVariant)' == 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) -usewatcher --aot-lazy-assembly-load" />
-      <HelixCommandLines Condition="'$(RuntimeVariant)' != 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) -usewatcher" />
+      <HelixCommandLines Condition="'$(RuntimeVariant)' == 'llvmfullaot' or '$(RuntimeVariant)' == 'minifullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) -usewatcher --aot-lazy-assembly-load" />
+      <HelixCommandLines Condition="'$(RuntimeVariant)' != 'llvmfullaot' and '$(RuntimeVariant)' != 'minifullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) -usewatcher" />
 
       <!--
         Bug Fix: GH Issue #85056 - Helix takes the exit code of the last ran executable. Since that spot has now been
diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj
index d1bc1d1a94c4..73c5bf8a151c 100644
--- a/src/tests/Common/testenvironment.proj
+++ b/src/tests/Common/testenvironment.proj
@@ -53,7 +53,6 @@
       DOTNET_JitStressRegs;
       DOTNET_TailcallStress;
       DOTNET_ReadyToRun;
-      DOTNET_ZapDisable;
       DOTNET_TC_OnStackReplacement;
       DOTNET_TC_QuickJitForLoops;
       DOTNET_TC_OnStackReplacement_InitialCounter;
@@ -81,7 +80,10 @@
       RunningIlasmRoundTrip;
       DOTNET_JitSynthesizeCounts;
       DOTNET_JitCheckSynthesizedCounts;
-      DOTNET_JitEnableCrossBlockLocalAssertionProp
+      DOTNET_JitRLCSEGreedy;
+      DOTNET_JitEnableOptRepeat;
+      DOTNET_JitOptRepeat;
+      DOTNET_JitOptRepeatCount;
     </DOTNETVariables>
   </PropertyGroup>
   <ItemGroup>
@@ -207,11 +209,11 @@
     <TestEnvironment Include="gcstress0x3" GCStress="0x3" />
     <TestEnvironment Include="gcstress0xc" GCStress="0xC" />
     <TestEnvironment Include="gcstress0xf" GCStress="0xF" />
-    <TestEnvironment Include="zapdisable" ZapDisable="1" ReadyToRun="0" />
+    <TestEnvironment Include="disabler2r" ReadyToRun="0" />
     <TestEnvironment Include="heapverify1" HeapVerify="1" />
-    <TestEnvironment Include="gcstress0xc_zapdisable" GCStress="0xC" ZapDisable="1" ReadyToRun="0" />
-    <TestEnvironment Include="gcstress0xc_zapdisable_jitstress2" GCStress="0xC" ZapDisable="1" ReadyToRun="0" JitStress="2" />
-    <TestEnvironment Include="gcstress0xc_zapdisable_heapverify1" GCStress="0xC" ZapDisable="1" ReadyToRun="0" HeapVerify="1" />
+    <TestEnvironment Include="gcstress0xc_disabler2r" GCStress="0xC" ReadyToRun="0" />
+    <TestEnvironment Include="gcstress0xc_disabler2r_jitstress2" GCStress="0xC" ReadyToRun="0" JitStress="2" />
+    <TestEnvironment Include="gcstress0xc_disabler2r_heapverify1" GCStress="0xC" ReadyToRun="0" HeapVerify="1" />
     <TestEnvironment Include="gcstress0xc_jitstress1" GCStress="0xC" JitStress="1" />
     <TestEnvironment Include="gcstress0xc_jitstress2" GCStress="0xC" JitStress="2" />
     <TestEnvironment Include="gcstress0xc_tailcallstress" GCStress="0xC" TailcallStress="1" />
@@ -224,7 +226,6 @@
     <TestEnvironment Include="jitobjectstackallocation" JitObjectStackAllocation="1" TieredCompilation="0" />
     <TestEnvironment Include="jitphysicalpromotion_only" JitStressModeNames="STRESS_NO_OLD_PROMOTION" TieredCompilation="0" />
     <TestEnvironment Include="jitphysicalpromotion_full" JitStressModeNames="STRESS_PHYSICAL_PROMOTION_COST STRESS_NO_OLD_PROMOTION" TieredCompilation="0" />
-    <TestEnvironment Include="jitcrossblocklocalassertionprop" JitEnableCrossBlockLocalAssertionProp="1" TieredCompilation="0" />
     <TestEnvironment Include="jitcfg" JitForceControlFlowGuard="1" />
     <TestEnvironment Include="jitcfg_dispatcher_always" JitForceControlFlowGuard="1" JitCFGUseDispatcher="1" />
     <TestEnvironment Include="jitcfg_dispatcher_never" JitForceControlFlowGuard="1" JitCFGUseDispatcher="0" />
@@ -238,8 +239,10 @@
     <TestEnvironment Include="fullpgo_random_gdv" TieredPGO="1" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0" JitRandomGuardedDevirtualization="1" JitRandomlyCollect64BitCounts="1" />
     <TestEnvironment Include="fullpgo_random_gdv_methodprofiling_only" TieredPGO="1" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0" JitRandomGuardedDevirtualization="1" JitClassProfiling="0" JitDelegateProfiling="1" JitVTableProfiling="1" JitRandomlyCollect64BitCounts="1" />
     <TestEnvironment Include="fullpgo_random_gdv_edge" TieredPGO="1" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0" JitRandomGuardedDevirtualization="1" JitRandomEdgeCounts="1" JitRandomlyCollect64BitCounts="1" />
-    <TestEnvironment Include="syntheticpgo" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0" JitSynthesizeCounts="1" JitCheckSynthesizedProfile="1" />
-    <TestEnvironment Include="syntheticpgo_blend" TieredPGO="1" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0" JitSynthesizeCounts="3" JitCheckSynthesizedProfile="1" />
+    <TestEnvironment Include="syntheticpgo" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0" JitSynthesizeCounts="1" JitCheckSynthesizedCounts="1" />
+    <TestEnvironment Include="syntheticpgo_blend" TieredPGO="1" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0" JitSynthesizeCounts="3" JitCheckSynthesizedCounts="1" />
+    <TestEnvironment Include="jitrlcse" JitRLCSEGreedy="1" />
+    <TestEnvironment Include="jitoptrepeat" JitEnableOptRepeat="1" JitOptRepeat="*" JitOptRepeatCount="2"/>
     <TestEnvironment Include="gcstandalone" Condition="'$(TargetsWindows)' == 'true'" GCName="clrgc.dll"/>
     <TestEnvironment Include="gcstandalone" Condition="'$(TargetsWindows)' != 'true'" GCName="libclrgc.so"/>
     <TestEnvironment Include="gcstandaloneserver" Condition="'$(TargetsWindows)' == 'true'" gcServer="1" GCName="clrgc.dll"/>
@@ -323,7 +326,7 @@
       <_TestEnvFileLine Condition="'false' == 'true' and '$(RuntimeVariant)' == 'llvmaot'" Include="export MONO_ENV_OPTIONS=--llvm" />
 
       <!-- Use Mono in Full AOT mode when running the full-AOT-compiled runtime tests -->
-      <_TestEnvFileLine Condition="'$(RuntimeVariant)' == 'llvmfullaot'" Include="export MONO_ENV_OPTIONS=--full-aot" />
+      <_TestEnvFileLine Condition="'$(RuntimeVariant)' == 'llvmfullaot' or '$(RuntimeVariant)' == 'minifullaot'" Include="export MONO_ENV_OPTIONS=--full-aot" />
 
       <_TestEnvFileLine Condition="'$(RuntimeVariant)' != ''" Include="export DOTNET_RUNTIME_VARIANT=$(RuntimeVariant)" />
 
diff --git a/src/tests/CoreMangLib/system/delegate/delegate/DelegateCombine1.csproj b/src/tests/CoreMangLib/system/delegate/delegate/DelegateCombine1.csproj
index 62c497a09922..9ba31e811009 100644
--- a/src/tests/CoreMangLib/system/delegate/delegate/DelegateCombine1.csproj
+++ b/src/tests/CoreMangLib/system/delegate/delegate/DelegateCombine1.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/delegate/delegate/DelegateCombineImpl.csproj b/src/tests/CoreMangLib/system/delegate/delegate/DelegateCombineImpl.csproj
index df8e80338fad..bcdc37ff1fd6 100644
--- a/src/tests/CoreMangLib/system/delegate/delegate/DelegateCombineImpl.csproj
+++ b/src/tests/CoreMangLib/system/delegate/delegate/DelegateCombineImpl.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/delegate/delegate/DelegateEquals1.csproj b/src/tests/CoreMangLib/system/delegate/delegate/DelegateEquals1.csproj
index 2d8fbf191cbb..675f0977c04b 100644
--- a/src/tests/CoreMangLib/system/delegate/delegate/DelegateEquals1.csproj
+++ b/src/tests/CoreMangLib/system/delegate/delegate/DelegateEquals1.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/delegate/delegate/DelegateGetHashCode1.csproj b/src/tests/CoreMangLib/system/delegate/delegate/DelegateGetHashCode1.csproj
index a597f02daeda..93d280cc3467 100644
--- a/src/tests/CoreMangLib/system/delegate/delegate/DelegateGetHashCode1.csproj
+++ b/src/tests/CoreMangLib/system/delegate/delegate/DelegateGetHashCode1.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/delegate/delegate/DelegateGetInvocationList1.csproj b/src/tests/CoreMangLib/system/delegate/delegate/DelegateGetInvocationList1.csproj
deleted file mode 100644
index e82724aaace7..000000000000
--- a/src/tests/CoreMangLib/system/delegate/delegate/DelegateGetInvocationList1.csproj
+++ /dev/null
@@ -1,14 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-  <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
-    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-    <CLRTestPriority>1</CLRTestPriority>
-  </PropertyGroup>
-  <ItemGroup>
-    <Compile Include="delegategetinvocationlist1.cs" />
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="$(TestSourceDir)Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj" />
-  </ItemGroup>
-</Project>
diff --git a/src/tests/CoreMangLib/system/delegate/delegate/DelegateRemove.csproj b/src/tests/CoreMangLib/system/delegate/delegate/DelegateRemove.csproj
index 0217c4ad8d87..fccdb6634bb1 100644
--- a/src/tests/CoreMangLib/system/delegate/delegate/DelegateRemove.csproj
+++ b/src/tests/CoreMangLib/system/delegate/delegate/DelegateRemove.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/delegate/delegate/delegateRemoveImpl.csproj b/src/tests/CoreMangLib/system/delegate/delegate/delegateRemoveImpl.csproj
index ab802b989cda..8ef2b3c7819d 100644
--- a/src/tests/CoreMangLib/system/delegate/delegate/delegateRemoveImpl.csproj
+++ b/src/tests/CoreMangLib/system/delegate/delegate/delegateRemoveImpl.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/delegate/delegate/delegategetinvocationlist1.cs b/src/tests/CoreMangLib/system/delegate/delegate/delegategetinvocationlist1.cs
deleted file mode 100644
index d0c8f3432736..000000000000
--- a/src/tests/CoreMangLib/system/delegate/delegate/delegategetinvocationlist1.cs
+++ /dev/null
@@ -1,230 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System;
-using System.Globalization;
-using Xunit;
-//test case for delegate GetInvocationList method.
-namespace DelegateTest
-{
-    delegate bool booldelegate();
-    public class DelegateGetInvocationList
-    {
-
-        booldelegate starkWork;
-
-        [Fact]
-        public static int TestEntryPoint()
-        {
-            DelegateGetInvocationList delegateGetInvocationList = new DelegateGetInvocationList();
-
-            TestLibrary.TestFramework.BeginTestCase("DelegateGetInvocationList");
-
-            if (delegateGetInvocationList.RunTests())
-            {
-                TestLibrary.TestFramework.EndTestCase();
-                TestLibrary.TestFramework.LogInformation("PASS");
-                return 100;
-
-            }
-            else
-            {
-                TestLibrary.TestFramework.EndTestCase();
-                TestLibrary.TestFramework.LogInformation("FAIL");
-                return 0;
-            }
-        }
-
-        public bool RunTests()
-        {
-            bool retVal = true;
-
-            TestLibrary.TestFramework.LogInformation("[Positive]");
-            retVal = PosTest1() && retVal;
-            retVal = PosTest2() && retVal;
-            retVal = PosTest3() && retVal;
-            retVal = PosTest4() && retVal;
-            return retVal;
-        }
-
-        // Returns true if the expected result is right
-        // Returns false if the expected result is wrong
-        public bool PosTest1()
-        {
-            bool retVal = true;
-
-            TestLibrary.TestFramework.BeginScenario("PosTest1: Call GetInvocationList against a delegate with one function");
-            try
-            {
-                DelegateGetInvocationList delctor = new DelegateGetInvocationList();
-                booldelegate dStartWork_Bool = new booldelegate(new TestClass().StartWork_Bool);
-                delctor.starkWork = dStartWork_Bool;
-                Delegate[] invocationList = delctor.starkWork.GetInvocationList();
-                if (invocationList.Length != 1)
-                {
-                    TestLibrary.TestFramework.LogError("001", "Call GetInvocationList against a delegate with one function returns wrong result: " + invocationList.Length);
-                    retVal = false;
-                }
-                if (!delctor.starkWork.GetInvocationList()[0].Equals(dStartWork_Bool))
-                {
-                    TestLibrary.TestFramework.LogError("002", " GetInvocationList return error method  ");
-                    retVal = false;
-                }
-                delctor.starkWork();
-            }
-            catch (Exception e)
-            {
-                TestLibrary.TestFramework.LogError("003", "Unexpected exception: " + e);
-                retVal = false;
-            }
-
-            return retVal;
-        }
-        // Returns true if the expected result is right
-        // Returns false if the expected result is wrong
-        public bool PosTest2()
-        {
-            bool retVal = true;
-
-            TestLibrary.TestFramework.BeginScenario("PosTest2: Call GetInvocationList against a delegate with muti different functions ");
-            try
-            {
-                DelegateGetInvocationList delctor = new DelegateGetInvocationList();
-		booldelegate bStartWork_Bool = new booldelegate(new TestClass().StartWork_Bool);
-		booldelegate bWorking_Bool   = new booldelegate(new TestClass().Working_Bool);
-		booldelegate bCompleted_Bool = new booldelegate(new TestClass().Completed_Bool);
-
-                delctor.starkWork += bStartWork_Bool;
-                delctor.starkWork += bWorking_Bool;
-                delctor.starkWork += bCompleted_Bool;
-                Delegate[] invocationList = delctor.starkWork.GetInvocationList();
-                if (invocationList.Length != 3)
-                {
-                    TestLibrary.TestFramework.LogError("004", "Call GetInvocationList against a delegate with one function returns wrong result: " + invocationList.Length);
-                    retVal = false;
-                }
-                if (!delctor.starkWork.GetInvocationList()[0].Equals(bStartWork_Bool)
-                    || !delctor.starkWork.GetInvocationList()[1].Equals(bWorking_Bool)
-                    || !delctor.starkWork.GetInvocationList()[2].Equals(bCompleted_Bool))
-                {
-                    TestLibrary.TestFramework.LogError("005", " GetInvocationList return error method  ");
-                    retVal = false;
-                }
-                delctor.starkWork();
-            }
-            catch (Exception e)
-            {
-                TestLibrary.TestFramework.LogError("006", "Unexpected exception: " + e);
-                retVal = false;
-            }
-
-            return retVal;
-        }
-        // Returns true if the expected result is right
-        // Returns false if the expected result is wrong
-        public bool PosTest3()
-        {
-            bool retVal = true;
-
-            TestLibrary.TestFramework.BeginScenario("PosTest3: Call GetInvocationList against a delegate with muti functions ,some is null");
-            try
-            {
-                DelegateGetInvocationList delctor = new DelegateGetInvocationList();
-		booldelegate bStartWork_Bool = new booldelegate(new TestClass().StartWork_Bool);
-		booldelegate bWorking_Bool   = new booldelegate(new TestClass().Working_Bool);
-		booldelegate bCompleted_Bool = new booldelegate(new TestClass().Completed_Bool);
-
-                delctor.starkWork += bStartWork_Bool;
-                delctor.starkWork += null;
-                delctor.starkWork += bWorking_Bool;
-                delctor.starkWork += bCompleted_Bool;
-                Delegate[] invocationList = delctor.starkWork.GetInvocationList();
-                if (invocationList.Length != 3)
-                {
-                    TestLibrary.TestFramework.LogError("007", "Call GetInvocationList against a delegate with one function returns wrong result: " + invocationList.Length);
-                    retVal = false;
-                }
-                if (!delctor.starkWork.GetInvocationList()[0].Equals(bStartWork_Bool)
-                    || !delctor.starkWork.GetInvocationList()[1].Equals(bWorking_Bool)
-                    || !delctor.starkWork.GetInvocationList()[2].Equals(bCompleted_Bool))
-                {
-                    TestLibrary.TestFramework.LogError("008", " GetInvocationList return error method  ");
-                    retVal = false;
-                }
-                delctor.starkWork();
-            }
-            catch (Exception e)
-            {
-                TestLibrary.TestFramework.LogError("009", "Unexpected exception: " + e);
-                retVal = false;
-            }
-
-            return retVal;
-        }
-
-        // Returns true if the expected result is right
-        // Returns false if the expected result is wrong
-        public bool PosTest4()
-        {
-            bool retVal = true;
-
-            TestLibrary.TestFramework.BeginScenario("PosTest4: Call GetInvocationList against a delegate with muti functions ,some of these are the same");
-            try
-            {
-                DelegateGetInvocationList delctor = new DelegateGetInvocationList();
-		booldelegate bStartWork_Bool = new booldelegate(new TestClass().StartWork_Bool);
-		booldelegate bWorking_Bool   = new booldelegate(new TestClass().Working_Bool);
-		booldelegate bCompleted_Bool = new booldelegate(new TestClass().Completed_Bool);
-
-                delctor.starkWork += bStartWork_Bool;
-                delctor.starkWork += bStartWork_Bool;
-                delctor.starkWork += bWorking_Bool;
-                delctor.starkWork += bCompleted_Bool;
-                Delegate[] invocationList = delctor.starkWork.GetInvocationList();
-                if (invocationList.Length != 4)
-                {
-                    TestLibrary.TestFramework.LogError("010", "Call GetInvocationList against a delegate with one function returns wrong result: " + invocationList.Length);
-                    retVal = false;
-                }
-                if (!delctor.starkWork.GetInvocationList()[0].Equals(bStartWork_Bool)
-                    || !delctor.starkWork.GetInvocationList()[1].Equals(bStartWork_Bool)
-                    || !delctor.starkWork.GetInvocationList()[2].Equals(bWorking_Bool)
-                    || !delctor.starkWork.GetInvocationList()[3].Equals(bCompleted_Bool))
-                {
-                    TestLibrary.TestFramework.LogError("011", " GetInvocationList return error method  ");
-                    retVal = false;
-                }
-                delctor.starkWork();
-            }
-            catch (Exception e)
-            {
-                TestLibrary.TestFramework.LogError("012", "Unexpected exception: " + e);
-                retVal = false;
-            }
-
-            return retVal;
-        }
-
-    }
-    //create testclass for providing test method and test target.
-    class TestClass
-    {
-        public bool StartWork_Bool()
-        {
-            TestLibrary.TestFramework.LogInformation("StartWork_Bool method  is running .");
-            return true;
-        }
-        public bool Working_Bool()
-        {
-            TestLibrary.TestFramework.LogInformation("Working_Bool method  is running .");
-            return true;
-        }
-        public bool Completed_Bool()
-        {
-            TestLibrary.TestFramework.LogInformation("Completed_Bool method  is running .");
-            return true;
-        }
-    }
-
-
-}
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedAdd1.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedAdd1.csproj
index 5fd51c98e988..240a808b81d7 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedAdd1.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedAdd1.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedAdd2.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedAdd2.csproj
index 7bffaebae9b7..eca7ac911ddf 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedAdd2.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedAdd2.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange1.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange1.csproj
index f3c3fda0df9b..8e306f01fb93 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange1.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange1.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange5.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange5.csproj
index 35a762a20f66..a47f201ab35a 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange5.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange5.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange6.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange6.csproj
index cd78a0ff8c91..41d9e2f720ab 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange6.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange6.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange7.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange7.csproj
index df7acc294a9d..9b87dfe6d5ce 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange7.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedCompareExchange7.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedDecrement1.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedDecrement1.csproj
index 8b0fc3f76999..b5f41c9a489d 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedDecrement1.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedDecrement1.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedDecrement2.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedDecrement2.csproj
index 3b3bf9c3630e..b3109aeb5c62 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedDecrement2.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedDecrement2.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange1.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange1.csproj
index cd22e9f75374..759aad8cbdda 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange1.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange1.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange5.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange5.csproj
index 5415ff38ce41..069e3826b9b0 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange5.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange5.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange6.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange6.csproj
index d5d726e7f95b..f385c60cc13b 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange6.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange6.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange7.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange7.csproj
index 67c85a975625..40ca16222e8e 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange7.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedExchange7.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedIncrement1.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedIncrement1.csproj
index 5ff7ad402962..4ac51963437d 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedIncrement1.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedIncrement1.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedIncrement2.csproj b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedIncrement2.csproj
index cee61c6f548e..350cd5772f94 100644
--- a/src/tests/CoreMangLib/system/threading/interlocked/InterlockedIncrement2.csproj
+++ b/src/tests/CoreMangLib/system/threading/interlocked/InterlockedIncrement2.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Directory.Build.props b/src/tests/Directory.Build.props
index 1a08cc2e27a9..28dfc3b471f4 100644
--- a/src/tests/Directory.Build.props
+++ b/src/tests/Directory.Build.props
@@ -163,11 +163,6 @@
     <ExcludeMscorlibFacade>true</ExcludeMscorlibFacade>
   </PropertyGroup>
 
-  <!-- Set default ZapRequire level (used only when CrossGen is enabled) -->
-  <PropertyGroup>
-    <ZapRequire Condition="'$(ZapRequire)' == ''">2</ZapRequire>
-  </PropertyGroup>
-
   <!-- Don't append the RID to the output path for our test tree. Our test builds are already separated by RID
        and including the RID breaks some of our glob expressions in issues.targets -->
   <PropertyGroup>
diff --git a/src/tests/Interop/CMakeLists.txt b/src/tests/Interop/CMakeLists.txt
index 80958de80bea..fa3217993a8a 100644
--- a/src/tests/Interop/CMakeLists.txt
+++ b/src/tests/Interop/CMakeLists.txt
@@ -81,6 +81,7 @@ if(CLR_CMAKE_TARGET_WIN32)
     add_subdirectory(COM/NativeClients/DefaultInterfaces)
     add_subdirectory(COM/NativeClients/Dispatch)
     add_subdirectory(COM/NativeClients/Events)
+    add_subdirectory(COM/NativeClients/MiscTypes)
     add_subdirectory(COM/ComWrappers/MockReferenceTrackerRuntime)
     add_subdirectory(COM/ComWrappers/WeakReference)
 
@@ -105,4 +106,7 @@ if(CLR_CMAKE_TARGET_APPLE)
     add_subdirectory(Swift/SwiftErrorHandling)
     add_subdirectory(Swift/SwiftSelfContext)
     add_subdirectory(Swift/SwiftInvalidCallConv)
+    add_subdirectory(Swift/SwiftAbiStress)
+    add_subdirectory(Swift/SwiftRetAbiStress)
+    add_subdirectory(Swift/SwiftCallbackAbiStress)
 endif()
diff --git a/src/tests/Interop/COM/Dynamic/BasicTest.cs b/src/tests/Interop/COM/Dynamic/BasicTest.cs
index 1e6fa7f5604e..0d1125bdfc12 100644
--- a/src/tests/Interop/COM/Dynamic/BasicTest.cs
+++ b/src/tests/Interop/COM/Dynamic/BasicTest.cs
@@ -43,6 +43,7 @@ public void Run()
 
             String();
             Date();
+            SpecialCasedValueTypes();
             ComObject();
             Null();
 
@@ -385,6 +386,16 @@ private void Date()
             Variant<DateTime>(val, expected);
         }
 
+        private void SpecialCasedValueTypes()
+        {
+            {
+                var val = Guid.NewGuid();
+                var expected = val;
+                // Pass as variant
+                Variant<Guid>(val, expected);
+            }
+        }
+
         private void ComObject()
         {
             Type t = Type.GetTypeFromCLSID(Guid.Parse(ServerGuids.BasicTest));
@@ -423,6 +434,9 @@ private void Null()
 
             obj.String_Property = null;
             Assert.Equal(string.Empty, obj.String_Property);
+
+            obj.Dispatch_Property = new DispatchWrapper(null);
+            Assert.Null(obj.Dispatch_Property);
         }
 
         private void StringWrapper(string toWrap, string expected)
diff --git a/src/tests/Interop/COM/NETClients/MiscTypes/App.manifest b/src/tests/Interop/COM/NETClients/MiscTypes/App.manifest
new file mode 100644
index 000000000000..93dcb090e865
--- /dev/null
+++ b/src/tests/Interop/COM/NETClients/MiscTypes/App.manifest
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="utf-8"?>
+<assembly manifestVersion="1.0" xmlns="urn:schemas-microsoft-com:asm.v1">
+  <assemblyIdentity
+    type="win32"
+    name="NetClientMiscTypes"
+    version="1.0.0.0" />
+
+  <dependency>
+    <dependentAssembly>
+      <!-- RegFree COM -->
+      <assemblyIdentity
+          type="win32"
+          name="COMNativeServer.X"
+          version="1.0.0.0"/>
+    </dependentAssembly>
+  </dependency>
+
+</assembly>
diff --git a/src/tests/Interop/COM/NETClients/MiscTypes/NetClientMiscTypes.csproj b/src/tests/Interop/COM/NETClients/MiscTypes/NetClientMiscTypes.csproj
new file mode 100644
index 000000000000..bd343f7dc8f9
--- /dev/null
+++ b/src/tests/Interop/COM/NETClients/MiscTypes/NetClientMiscTypes.csproj
@@ -0,0 +1,18 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CMakeProjectReference, GC.WaitForPendingFinalizers -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+    <ApplicationManifest>App.manifest</ApplicationManifest>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Program.cs" />
+    <Compile Include="../../ServerContracts/Server.CoClasses.cs" />
+    <Compile Include="../../ServerContracts/Server.Contracts.cs" />
+    <Compile Include="../../ServerContracts/ServerGuids.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <CMakeProjectReference Include="../../NativeServer/CMakeLists.txt" />
+    <ProjectReference Include="$(TestLibraryProjectPath)" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/Interop/COM/NETClients/MiscTypes/Program.cs b/src/tests/Interop/COM/NETClients/MiscTypes/Program.cs
new file mode 100644
index 000000000000..de4945b5af13
--- /dev/null
+++ b/src/tests/Interop/COM/NETClients/MiscTypes/Program.cs
@@ -0,0 +1,107 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Xunit;
+namespace NetClient
+{
+    using System;
+    using System.Runtime.InteropServices;
+
+    using TestLibrary;
+    using Xunit;
+    using Server.Contract;
+    using Server.Contract.Servers;
+
+    struct Struct {}
+
+    public unsafe class Program
+    {
+        [Fact]
+        public static int TestEntryPoint()
+        {
+            // RegFree COM is not supported on Windows Nano
+            if (TestLibrary.Utilities.IsWindowsNanoServer)
+            {
+                return 100;
+            }
+
+            try
+            {
+                ValidationTests();
+                ValidateNegativeTests();
+            }
+            catch (Exception e)
+            {
+                Console.WriteLine($"Test object interop failure: {e}");
+                return 101;
+            }
+
+            return 100;
+        }
+
+        private static void ValidationTests()
+        {
+            Console.WriteLine($"Running {nameof(ValidationTests)} ...");
+
+            var miscTypeTesting = (Server.Contract.Servers.MiscTypesTesting)new Server.Contract.Servers.MiscTypesTestingClass();
+
+            Console.WriteLine("-- Primitives <=> VARIANT...");
+            {
+                object expected = null;
+                Assert.Equal(expected, miscTypeTesting.Marshal_Variant(expected));
+            }
+            {
+                var expected = DBNull.Value;
+                Assert.Equal(expected, miscTypeTesting.Marshal_Variant(expected));
+            }
+            {
+                var expected = (sbyte)0x0f;
+                Assert.Equal(expected, miscTypeTesting.Marshal_Variant(expected));
+            }
+            {
+                var expected = (short)0x07ff;
+                Assert.Equal(expected, miscTypeTesting.Marshal_Variant(expected));
+            }
+            {
+                var expected = (int)0x07ffffff;
+                Assert.Equal(expected, miscTypeTesting.Marshal_Variant(expected));
+            }
+            {
+                var expected = (long)0x07ffffffffffffff;
+                Assert.Equal(expected, miscTypeTesting.Marshal_Variant(expected));
+            }
+            {
+                var expected = true;
+                Assert.Equal(expected, miscTypeTesting.Marshal_Variant(expected));
+            }
+            {
+                var expected = false;
+                Assert.Equal(expected, miscTypeTesting.Marshal_Variant(expected));
+            }
+
+            Console.WriteLine("-- BSTR <=> VARIANT...");
+            {
+                var expected = "The quick Fox jumped over the lazy Dog.";
+                Assert.Equal(expected, miscTypeTesting.Marshal_Variant(expected));
+            }
+
+            Console.WriteLine("-- System.Guid <=> VARIANT...");
+            {
+                var expected = new Guid("{8EFAD956-B33D-46CB-90F4-45F55BA68A96}");
+                Assert.Equal(expected, miscTypeTesting.Marshal_Variant(expected));
+            }
+        }
+
+        private static void ValidateNegativeTests()
+        {
+            Console.WriteLine($"Running {nameof(ValidateNegativeTests)} ...");
+
+            var miscTypeTesting = (Server.Contract.Servers.MiscTypesTesting)new Server.Contract.Servers.MiscTypesTestingClass();
+
+            Console.WriteLine("-- User defined ValueType <=> VARIANT...");
+            {
+                Assert.Throws<NotSupportedException>(() => miscTypeTesting.Marshal_Variant(new Struct()));
+            }
+        }
+    }
+}
diff --git a/src/tests/Interop/COM/NETServer/MiscTypesTesting.cs b/src/tests/Interop/COM/NETServer/MiscTypesTesting.cs
new file mode 100644
index 000000000000..2a31507d29ab
--- /dev/null
+++ b/src/tests/Interop/COM/NETServer/MiscTypesTesting.cs
@@ -0,0 +1,54 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+[ComVisible(true)]
+[Guid(Server.Contract.Guids.MiscTypesTesting)]
+public class MiscTypesTesting : Server.Contract.IMiscTypesTesting
+{
+    object Server.Contract.IMiscTypesTesting.Marshal_Variant(object obj)
+    {
+        if (obj is null)
+        {
+            return null;
+        }
+
+        if (obj is DBNull)
+        {
+            return DBNull.Value;
+        }
+
+        if (obj.GetType().IsValueType)
+        {
+            return CallMemberwiseClone(obj);
+        }
+
+        if (obj is string)
+        {
+            return obj;
+        }
+
+        Environment.FailFast($"Arguments must be ValueTypes or strings: {obj.GetType()}");
+        return null;
+
+        // object.MemberwiseClone() will bitwise copy for ValueTypes.
+        // This is sufficient for the VARIANT marshalling scenario being
+        // tested here.
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "MemberwiseClone")]
+        static extern object CallMemberwiseClone(object obj);
+    }
+
+    object Server.Contract.IMiscTypesTesting.Marshal_Instance_Variant(string init)
+    {
+        if (Guid.TryParse(init, out Guid result))
+        {
+            return result;
+        }
+
+        Environment.FailFast($"Unknown init value: {init}");
+        return null;
+    }
+}
\ No newline at end of file
diff --git a/src/tests/Interop/COM/NativeClients/MiscTypes.csproj b/src/tests/Interop/COM/NativeClients/MiscTypes.csproj
new file mode 100644
index 000000000000..83409dcfceb2
--- /dev/null
+++ b/src/tests/Interop/COM/NativeClients/MiscTypes.csproj
@@ -0,0 +1,6 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <ItemGroup>
+    <CMakeProjectReference Include="MiscTypes/CMakeLists.txt" />
+    <ProjectReference Include="../NETServer/NETServer.csproj" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/Interop/COM/NativeClients/MiscTypes/App.manifest b/src/tests/Interop/COM/NativeClients/MiscTypes/App.manifest
new file mode 100644
index 000000000000..20ffce48d342
--- /dev/null
+++ b/src/tests/Interop/COM/NativeClients/MiscTypes/App.manifest
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="utf-8" standalone="yes" ?>
+<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+  <assemblyIdentity
+    type="win32"
+    name="COMClientMiscTypes"
+    version="1.0.0.0"/>
+
+  <dependency>
+    <dependentAssembly>
+      <!-- RegFree COM - CoreCLR Shim -->
+      <assemblyIdentity
+        type="win32"
+        name="CoreShim.X"
+        version="1.0.0.0"/>
+    </dependentAssembly>
+  </dependency>
+</assembly>
\ No newline at end of file
diff --git a/src/tests/Interop/COM/NativeClients/MiscTypes/CMakeLists.txt b/src/tests/Interop/COM/NativeClients/MiscTypes/CMakeLists.txt
new file mode 100644
index 000000000000..3dcba4671143
--- /dev/null
+++ b/src/tests/Interop/COM/NativeClients/MiscTypes/CMakeLists.txt
@@ -0,0 +1,22 @@
+project (COMClientMiscTypes)
+include_directories( ${INC_PLATFORM_DIR} )
+include_directories( "../../ServerContracts" )
+include_directories( "../../NativeServer" )
+include_directories("../")
+set(SOURCES
+    MiscTypes.cpp
+    App.manifest)
+
+# add the executable
+add_executable (COMClientMiscTypes ${SOURCES})
+target_link_libraries(COMClientMiscTypes PRIVATE ${LINK_LIBRARIES_ADDITIONAL})
+
+# Copy CoreShim manifest to project output
+file(GENERATE OUTPUT $<TARGET_FILE_DIR:${PROJECT_NAME}>/CoreShim.X.manifest INPUT ${CMAKE_CURRENT_SOURCE_DIR}/CoreShim.X.manifest)
+
+# add the install targets
+install (TARGETS COMClientMiscTypes DESTINATION bin)
+# If there's a dynamic ASAN runtime, then copy it to project output.
+if (NOT "${ASAN_RUNTIME}" STREQUAL "")
+    file(COPY "${ASAN_RUNTIME}" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
+endif()
diff --git a/src/tests/Interop/COM/NativeClients/MiscTypes/CoreShim.X.manifest b/src/tests/Interop/COM/NativeClients/MiscTypes/CoreShim.X.manifest
new file mode 100644
index 000000000000..a3c8593ee067
--- /dev/null
+++ b/src/tests/Interop/COM/NativeClients/MiscTypes/CoreShim.X.manifest
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+
+<assemblyIdentity
+  type="win32"
+  name="CoreShim.X"
+  version="1.0.0.0" />
+
+<file name="CoreShim.dll">
+  <!-- MiscTypesTesting -->
+  <comClass
+    clsid="{CCFF894B-A27C-45E0-9B30-6C88D722E843}"
+    threadingModel="Both" />
+</file>
+
+</assembly>
diff --git a/src/tests/Interop/COM/NativeClients/MiscTypes/MiscTypes.cpp b/src/tests/Interop/COM/NativeClients/MiscTypes/MiscTypes.cpp
new file mode 100644
index 000000000000..6fb435be6513
--- /dev/null
+++ b/src/tests/Interop/COM/NativeClients/MiscTypes/MiscTypes.cpp
@@ -0,0 +1,171 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include <xplatform.h>
+#include <cassert>
+#include <Server.Contracts.h>
+#include <windows_version_helpers.h>
+
+// COM headers
+#include <objbase.h>
+#include <combaseapi.h>
+
+#define COM_CLIENT
+#include <Servers.h>
+
+#define THROW_IF_FAILED(exp) { hr = exp; if (FAILED(hr)) { ::printf("FAILURE: 0x%08x = %s\n", hr, #exp); throw hr; } }
+#define THROW_FAIL_IF_FALSE(exp) { if (!(exp)) { ::printf("FALSE: %s\n", #exp); throw E_FAIL; } }
+
+template<COINIT TM>
+struct ComInit
+{
+    const HRESULT Result;
+
+    ComInit()
+        : Result{ ::CoInitializeEx(nullptr, TM) }
+    { }
+
+    ~ComInit()
+    {
+        if (SUCCEEDED(Result))
+            ::CoUninitialize();
+    }
+};
+
+using ComMTA = ComInit<COINIT_MULTITHREADED>;
+void ValidationTests();
+
+int __cdecl main()
+{
+    if (is_windows_nano() == S_OK)
+    {
+        ::puts("RegFree COM is not supported on Windows Nano. Auto-passing this test.\n");
+        return 100;
+    }
+    ComMTA init;
+    if (FAILED(init.Result))
+        return -1;
+
+    try
+    {
+        CoreShimComActivation csact{ W("NETServer"), W("MiscTypesTesting") };
+        ValidationTests();
+    }
+    catch (HRESULT hr)
+    {
+        ::printf("Test Failure: 0x%08x\n", hr);
+        return 101;
+    }
+
+    return 100;
+}
+
+struct VariantMarshalTest
+{
+    VARIANT Input;
+    VARIANT Result;
+    VariantMarshalTest()
+    {
+        ::VariantInit(&Input);
+        ::VariantInit(&Result);
+    }
+    ~VariantMarshalTest()
+    {
+        ::VariantClear(&Input);
+        ::VariantClear(&Result);
+    }
+};
+
+void ValidationTests()
+{
+    ::printf(__FUNCTION__ "() through CoCreateInstance...\n");
+
+    HRESULT hr;
+
+    IMiscTypesTesting *miscTypesTesting;
+    THROW_IF_FAILED(::CoCreateInstance(CLSID_MiscTypesTesting, nullptr, CLSCTX_INPROC, IID_IMiscTypesTesting, (void**)&miscTypesTesting));
+
+    ::printf("-- Primitives <=> VARIANT...\n");
+    {
+        VariantMarshalTest args{};
+        V_VT(&args.Input) = VT_EMPTY;
+        THROW_IF_FAILED(miscTypesTesting->Marshal_Variant(args.Input, &args.Result));
+        THROW_FAIL_IF_FALSE(V_VT(&args.Input) == V_VT(&args.Result));
+    }
+    {
+        VariantMarshalTest args{};
+        V_VT(&args.Input) = VT_NULL;
+        THROW_IF_FAILED(miscTypesTesting->Marshal_Variant(args.Input, &args.Result));
+        THROW_FAIL_IF_FALSE(V_VT(&args.Input) == V_VT(&args.Result));
+    }
+    {
+        VariantMarshalTest args{};
+        V_VT(&args.Input) = VT_I1;
+        V_I1(&args.Input) = 0x0f;
+        THROW_IF_FAILED(miscTypesTesting->Marshal_Variant(args.Input, &args.Result));
+        THROW_FAIL_IF_FALSE(V_I1(&args.Input) == V_I1(&args.Result));
+    }
+    {
+        VariantMarshalTest args{};
+        V_VT(&args.Input) = VT_I2;
+        V_I2(&args.Input) = 0x07ff;
+        THROW_IF_FAILED(miscTypesTesting->Marshal_Variant(args.Input, &args.Result));
+        THROW_FAIL_IF_FALSE(V_I2(&args.Input) == V_I2(&args.Result));
+    }
+    {
+        VariantMarshalTest args{};
+        V_VT(&args.Input) = VT_I4;
+        V_I4(&args.Input) = 0x07ffffff;
+        THROW_IF_FAILED(miscTypesTesting->Marshal_Variant(args.Input, &args.Result));
+        THROW_FAIL_IF_FALSE(V_I4(&args.Input) == V_I4(&args.Result));
+    }
+    {
+        VariantMarshalTest args{};
+        V_VT(&args.Input) = VT_I8;
+        V_I8(&args.Input) = 0x07ffffffffffffff;
+        THROW_IF_FAILED(miscTypesTesting->Marshal_Variant(args.Input, &args.Result));
+        THROW_FAIL_IF_FALSE(V_I8(&args.Input) == V_I8(&args.Result));
+    }
+    {
+        VariantMarshalTest args{};
+        V_VT(&args.Input) = VT_BOOL;
+        V_BOOL(&args.Input) = VARIANT_TRUE;
+        THROW_IF_FAILED(miscTypesTesting->Marshal_Variant(args.Input, &args.Result));
+        THROW_FAIL_IF_FALSE(V_BOOL(&args.Input) == V_BOOL(&args.Result));
+    }
+    {
+        VariantMarshalTest args{};
+        V_VT(&args.Input) = VT_BOOL;
+        V_BOOL(&args.Input) = VARIANT_FALSE;
+        THROW_IF_FAILED(miscTypesTesting->Marshal_Variant(args.Input, &args.Result));
+        THROW_FAIL_IF_FALSE(V_BOOL(&args.Input) == V_BOOL(&args.Result));
+    }
+
+    ::printf("-- BSTR <=> VARIANT...\n");
+    {
+        VariantMarshalTest args{};
+        V_VT(&args.Input) = VT_BSTR;
+        V_BSTR(&args.Input) = ::SysAllocString(W("The quick Fox jumped over the lazy Dog."));
+        THROW_IF_FAILED(miscTypesTesting->Marshal_Variant(args.Input, &args.Result));
+        THROW_FAIL_IF_FALSE(CompareStringOrdinal(V_BSTR(&args.Input), -1, V_BSTR(&args.Result), -1, FALSE) == CSTR_EQUAL);
+    }
+
+    ::printf("-- System.Guid <=> VARIANT...\n");
+    {
+        /* 8EFAD956-B33D-46CB-90F4-45F55BA68A96 */
+        const GUID expected = { 0x8EFAD956, 0xB33D, 0x46CB, { 0x90, 0xF4, 0x45, 0xF5, 0x5B, 0xA6, 0x8A, 0x96} };
+
+        // Get a System.Guid into native
+        VariantMarshalTest guidVar;
+        THROW_IF_FAILED(miscTypesTesting->Marshal_Instance_Variant(W("{8EFAD956-B33D-46CB-90F4-45F55BA68A96}"), &guidVar.Result));
+        THROW_FAIL_IF_FALSE(V_VT(&guidVar.Result) == VT_RECORD);
+        THROW_FAIL_IF_FALSE(memcmp(V_RECORD(&guidVar.Result), &expected, sizeof(expected)) == 0);
+
+        // Use the Guid as input.
+        VariantMarshalTest args{};
+        THROW_IF_FAILED(::VariantCopy(&args.Input, &guidVar.Result));
+        THROW_IF_FAILED(miscTypesTesting->Marshal_Variant(args.Input, &args.Result));
+        THROW_FAIL_IF_FALSE(V_VT(&args.Input) == V_VT(&args.Result));
+        THROW_FAIL_IF_FALSE(memcmp(V_RECORD(&args.Input), V_RECORD(&args.Result), sizeof(expected)) == 0);
+    }
+}
diff --git a/src/tests/Interop/COM/NativeClients/Primitives/CoreShim.X.manifest b/src/tests/Interop/COM/NativeClients/Primitives/CoreShim.X.manifest
index 099f3a36e169..8b8e6ad135a2 100644
--- a/src/tests/Interop/COM/NativeClients/Primitives/CoreShim.X.manifest
+++ b/src/tests/Interop/COM/NativeClients/Primitives/CoreShim.X.manifest
@@ -19,6 +19,10 @@
   <comClass
     clsid="{C73C83E8-51A2-47F8-9B5C-4284458E47A6}"
     threadingModel="Both" />
+  <!-- MiscTypesTesting -->
+  <comClass
+    clsid="{CCFF894B-A27C-45E0-9B30-6C88D722E843}"
+    threadingModel="Both" />
   <!-- ErrorMarshalTesting -->
   <comClass
     clsid="{71CF5C45-106C-4B32-B418-43A463C6041F}"
diff --git a/src/tests/Interop/COM/NativeServer/COMNativeServer.X.manifest b/src/tests/Interop/COM/NativeServer/COMNativeServer.X.manifest
index 8031294fc56f..9f688bc8e22b 100644
--- a/src/tests/Interop/COM/NativeServer/COMNativeServer.X.manifest
+++ b/src/tests/Interop/COM/NativeServer/COMNativeServer.X.manifest
@@ -22,6 +22,11 @@
     clsid="{C73C83E8-51A2-47F8-9B5C-4284458E47A6}"
     threadingModel="Both" />
 
+  <!-- MiscTypesTesting -->
+  <comClass
+    clsid="{CCFF894B-A27C-45E0-9B30-6C88D722E843}"
+    threadingModel="Both" />
+
   <!-- ErrorMarshalTesting -->
   <comClass
     clsid="{71CF5C45-106C-4B32-B418-43A463C6041F}"
diff --git a/src/tests/Interop/COM/NativeServer/MiscTypesTesting.h b/src/tests/Interop/COM/NativeServer/MiscTypesTesting.h
new file mode 100644
index 000000000000..a3e89902f0b5
--- /dev/null
+++ b/src/tests/Interop/COM/NativeServer/MiscTypesTesting.h
@@ -0,0 +1,31 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#pragma once
+
+#include <xplatform.h>
+#include "Servers.h"
+
+class MiscTypesTesting : public UnknownImpl, public IMiscTypesTesting
+{
+public: // IMiscTypesTesting
+    DEF_FUNC(Marshal_Variant)(_In_ VARIANT obj, _Out_ VARIANT* result)
+    {
+        return ::VariantCopy(result, &obj);
+    }
+
+    DEF_FUNC(Marshal_Instance_Variant)(_In_ LPCWSTR init, _Out_ VARIANT* result)
+    {
+        return E_NOTIMPL;
+    }
+
+public: // IUnknown
+    STDMETHOD(QueryInterface)(
+        /* [in] */ REFIID riid,
+        /* [iid_is][out] */ _COM_Outptr_ void __RPC_FAR *__RPC_FAR *ppvObject)
+    {
+        return DoQueryInterface(riid, ppvObject, static_cast<IMiscTypesTesting *>(this));
+    }
+
+    DEFINE_REF_COUNTING();
+};
\ No newline at end of file
diff --git a/src/tests/Interop/COM/NativeServer/Servers.cpp b/src/tests/Interop/COM/NativeServer/Servers.cpp
index f2becfe4d094..05f26be8d474 100644
--- a/src/tests/Interop/COM/NativeServer/Servers.cpp
+++ b/src/tests/Interop/COM/NativeServer/Servers.cpp
@@ -162,6 +162,7 @@ STDAPI DllRegisterServer(void)
     RETURN_IF_FAILED(RegisterClsid(__uuidof(NumericTesting), L"Both"));
     RETURN_IF_FAILED(RegisterClsid(__uuidof(ArrayTesting), L"Both"));
     RETURN_IF_FAILED(RegisterClsid(__uuidof(StringTesting), L"Both"));
+    RETURN_IF_FAILED(RegisterClsid(__uuidof(MiscTypesTesting), L"Both"));
     RETURN_IF_FAILED(RegisterClsid(__uuidof(ErrorMarshalTesting), L"Both"));
     RETURN_IF_FAILED(RegisterClsid(__uuidof(DispatchTesting), L"Both"));
     RETURN_IF_FAILED(RegisterClsid(__uuidof(EventTesting), L"Both"));
@@ -180,6 +181,7 @@ STDAPI DllUnregisterServer(void)
     RETURN_IF_FAILED(RemoveClsid(__uuidof(NumericTesting)));
     RETURN_IF_FAILED(RemoveClsid(__uuidof(ArrayTesting)));
     RETURN_IF_FAILED(RemoveClsid(__uuidof(StringTesting)));
+    RETURN_IF_FAILED(RemoveClsid(__uuidof(MiscTypesTesting)));
     RETURN_IF_FAILED(RemoveClsid(__uuidof(ErrorMarshalTesting)));
     RETURN_IF_FAILED(RemoveClsid(__uuidof(DispatchTesting)));
     RETURN_IF_FAILED(RemoveClsid(__uuidof(EventTesting)));
@@ -202,6 +204,9 @@ STDAPI DllGetClassObject(_In_ REFCLSID rclsid, _In_ REFIID riid, _Out_ LPVOID FA
     if (rclsid == __uuidof(StringTesting))
         return ClassFactoryBasic<StringTesting>::Create(riid, ppv);
 
+    if (rclsid == __uuidof(MiscTypesTesting))
+        return ClassFactoryBasic<MiscTypesTesting>::Create(riid, ppv);
+
     if (rclsid == __uuidof(ErrorMarshalTesting))
         return ClassFactoryBasic<ErrorMarshalTesting>::Create(riid, ppv);
 
diff --git a/src/tests/Interop/COM/NativeServer/Servers.h b/src/tests/Interop/COM/NativeServer/Servers.h
index 7c9ec0300bc6..c87288d2535b 100644
--- a/src/tests/Interop/COM/NativeServer/Servers.h
+++ b/src/tests/Interop/COM/NativeServer/Servers.h
@@ -12,6 +12,7 @@
 class DECLSPEC_UUID("53169A33-E85D-4E3C-B668-24E438D0929B") NumericTesting;
 class DECLSPEC_UUID("B99ABE6A-DFF6-440F-BFB6-55179B8FE18E") ArrayTesting;
 class DECLSPEC_UUID("C73C83E8-51A2-47F8-9B5C-4284458E47A6") StringTesting;
+class DECLSPEC_UUID("CCFF894B-A27C-45E0-9B30-6C88D722E843") MiscTypesTesting;
 class DECLSPEC_UUID("71CF5C45-106C-4B32-B418-43A463C6041F") ErrorMarshalTesting;
 class DECLSPEC_UUID("0F8ACD0C-ECE0-4F2A-BD1B-6BFCA93A0726") DispatchTesting;
 class DECLSPEC_UUID("4DBD9B61-E372-499F-84DE-EFC70AA8A009") EventTesting;
@@ -25,6 +26,7 @@ class DECLSPEC_UUID("4F54231D-9E11-4C0B-8E0B-2EBD8B0E5811") TrackMyLifetimeTesti
 #define CLSID_NumericTesting __uuidof(NumericTesting)
 #define CLSID_ArrayTesting __uuidof(ArrayTesting)
 #define CLSID_StringTesting __uuidof(StringTesting)
+#define CLSID_MiscTypesTesting __uuidof(MiscTypesTesting)
 #define CLSID_ErrorMarshalTesting __uuidof(ErrorMarshalTesting)
 #define CLSID_DispatchTesting __uuidof(DispatchTesting)
 #define CLSID_EventTesting __uuidof(EventTesting)
@@ -38,6 +40,7 @@ class DECLSPEC_UUID("4F54231D-9E11-4C0B-8E0B-2EBD8B0E5811") TrackMyLifetimeTesti
 #define IID_INumericTesting __uuidof(INumericTesting)
 #define IID_IArrayTesting __uuidof(IArrayTesting)
 #define IID_IStringTesting __uuidof(IStringTesting)
+#define IID_IMiscTypesTesting __uuidof(IMiscTypesTesting)
 #define IID_IErrorMarshalTesting __uuidof(IErrorMarshalTesting)
 #define IID_IDispatchTesting __uuidof(IDispatchTesting)
 #define IID_TestingEvents __uuidof(TestingEvents)
@@ -82,6 +85,7 @@ struct CoreShimComActivation
     #include "NumericTesting.h"
     #include "ArrayTesting.h"
     #include "StringTesting.h"
+    #include "MiscTypesTesting.h"
     #include "ErrorMarshalTesting.h"
     #include "DispatchTesting.h"
     #include "EventTesting.h"
diff --git a/src/tests/Interop/COM/ServerContracts/Server.CoClasses.cs b/src/tests/Interop/COM/ServerContracts/Server.CoClasses.cs
index 0b6f988f1a7a..2479e6cd6f08 100644
--- a/src/tests/Interop/COM/ServerContracts/Server.CoClasses.cs
+++ b/src/tests/Interop/COM/ServerContracts/Server.CoClasses.cs
@@ -10,7 +10,7 @@ namespace Server.Contract.Servers
     using System.Runtime.InteropServices;
 
     /// <summary>
-    /// Managed definition of CoClass 
+    /// Managed definition of CoClass
     /// </summary>
     [ComImport]
     [CoClass(typeof(NumericTestingClass))]
@@ -29,7 +29,7 @@ internal class NumericTestingClass
     }
 
     /// <summary>
-    /// Managed definition of CoClass 
+    /// Managed definition of CoClass
     /// </summary>
     [ComImport]
     [CoClass(typeof(ArrayTestingClass))]
@@ -48,7 +48,7 @@ internal class ArrayTestingClass
     }
 
     /// <summary>
-    /// Managed definition of CoClass 
+    /// Managed definition of CoClass
     /// </summary>
     [ComImport]
     [CoClass(typeof(StringTestingClass))]
@@ -67,7 +67,26 @@ internal class StringTestingClass
     }
 
     /// <summary>
-    /// Managed definition of CoClass 
+    /// Managed definition of CoClass
+    /// </summary>
+    [ComImport]
+    [CoClass(typeof(MiscTypesTestingClass))]
+    [Guid("7FBB8677-BDD0-4E5A-B38B-CA92A4555466")]
+    internal interface MiscTypesTesting : Server.Contract.IMiscTypesTesting
+    {
+    }
+
+    /// <summary>
+    /// Managed activation for CoClass
+    /// </summary>
+    [ComImport]
+    [Guid(Server.Contract.Guids.MiscTypesTesting)]
+    internal class MiscTypesTestingClass
+    {
+    }
+
+    /// <summary>
+    /// Managed definition of CoClass
     /// </summary>
     [ComImport]
     [CoClass(typeof(ErrorMarshalTestingClass))]
@@ -86,7 +105,7 @@ internal class ErrorMarshalTestingClass
     }
 
     /// <summary>
-    /// Managed definition of CoClass 
+    /// Managed definition of CoClass
     /// </summary>
     [ComImport]
     [CoClass(typeof(DispatchTestingClass))]
@@ -105,7 +124,7 @@ internal class DispatchTestingClass
     }
 
     /// <summary>
-    /// Managed definition of CoClass 
+    /// Managed definition of CoClass
     /// </summary>
     [ComImport]
     [CoClass(typeof(AggregationTestingClass))]
@@ -124,7 +143,7 @@ internal class AggregationTestingClass
     }
 
     /// <summary>
-    /// Managed definition of CoClass 
+    /// Managed definition of CoClass
     /// </summary>
     [ComImport]
     [CoClass(typeof(ColorTestingClass))]
diff --git a/src/tests/Interop/COM/ServerContracts/Server.Contracts.cs b/src/tests/Interop/COM/ServerContracts/Server.Contracts.cs
index 0bac21e66ee1..dd0f71634e2b 100644
--- a/src/tests/Interop/COM/ServerContracts/Server.Contracts.cs
+++ b/src/tests/Interop/COM/ServerContracts/Server.Contracts.cs
@@ -184,6 +184,17 @@ string Add_BStr(
         void Pass_Through_LCID(out int lcid);
     }
 
+    [ComVisible(true)]
+    [Guid("7FBB8677-BDD0-4E5A-B38B-CA92A4555466")]
+    [InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
+    public interface IMiscTypesTesting
+    {
+        object Marshal_Variant(object obj);
+
+        // Test API for marshalling an arbitrary type via VARIANT
+        object Marshal_Instance_Variant([MarshalAs(UnmanagedType.LPWStr)] string init);
+    }
+
     public struct HResult
     {
         public int hr;
diff --git a/src/tests/Interop/COM/ServerContracts/Server.Contracts.h b/src/tests/Interop/COM/ServerContracts/Server.Contracts.h
index 1eb0528aae4b..d2c26884589e 100644
--- a/src/tests/Interop/COM/ServerContracts/Server.Contracts.h
+++ b/src/tests/Interop/COM/ServerContracts/Server.Contracts.h
@@ -366,6 +366,18 @@ IStringTesting : IUnknown
         /*[out]*/ LCID* outLcid) = 0;
 };
 
+struct __declspec(uuid("7FBB8677-BDD0-4E5A-B38B-CA92A4555466"))
+IMiscTypesTesting : IUnknown
+{
+      virtual HRESULT STDMETHODCALLTYPE Marshal_Variant (
+        /*[in]*/ VARIANT obj,
+        /*[out,retval]*/ VARIANT* result) = 0;
+
+      virtual HRESULT STDMETHODCALLTYPE Marshal_Instance_Variant (
+        /*[in]*/ LPCWSTR init,
+        /*[out,retval]*/ VARIANT* result) = 0;
+};
+
 struct __declspec(uuid("592386a5-6837-444d-9de3-250815d18556"))
 IErrorMarshalTesting : IUnknown
 {
diff --git a/src/tests/Interop/COM/ServerContracts/ServerGuids.cs b/src/tests/Interop/COM/ServerContracts/ServerGuids.cs
index 5336cde54106..8b0c65a3ce15 100644
--- a/src/tests/Interop/COM/ServerContracts/ServerGuids.cs
+++ b/src/tests/Interop/COM/ServerContracts/ServerGuids.cs
@@ -11,6 +11,7 @@ internal sealed class Guids
         public const string NumericTesting = "53169A33-E85D-4E3C-B668-24E438D0929B";
         public const string ArrayTesting = "B99ABE6A-DFF6-440F-BFB6-55179B8FE18E";
         public const string StringTesting = "C73C83E8-51A2-47F8-9B5C-4284458E47A6";
+        public const string MiscTypesTesting = "CCFF894B-A27C-45E0-9B30-6C88D722E843";
         public const string ErrorMarshalTesting = "71CF5C45-106C-4B32-B418-43A463C6041F";
         public const string DispatchTesting = "0F8ACD0C-ECE0-4F2A-BD1B-6BFCA93A0726";
         public const string EventTesting = "4DBD9B61-E372-499F-84DE-EFC70AA8A009";
diff --git a/src/tests/Interop/DllImportSearchPaths/DllImportSearchPathsTest.cs b/src/tests/Interop/DllImportSearchPaths/DllImportSearchPathsTest.cs
index b86a77cbc521..00c7b375a7ed 100644
--- a/src/tests/Interop/DllImportSearchPaths/DllImportSearchPathsTest.cs
+++ b/src/tests/Interop/DllImportSearchPaths/DllImportSearchPathsTest.cs
@@ -21,7 +21,7 @@ public static void AssemblyDirectory_NotFound()
 
     public static bool CanLoadAssemblyInSubdirectory =>
         !TestLibrary.Utilities.IsNativeAot &&
-        !TestLibrary.PlatformDetection.IsMonoLLVMFULLAOT &&
+        !TestLibrary.PlatformDetection.IsMonoFULLAOT &&
         !OperatingSystem.IsAndroid() &&
         !OperatingSystem.IsIOS() &&
         !OperatingSystem.IsTvOS() &&
diff --git a/src/tests/Interop/IJW/CopyConstructorMarshaler/CopyConstructorMarshaler.cs b/src/tests/Interop/IJW/CopyConstructorMarshaler/CopyConstructorMarshaler.cs
index 376b64e623c8..5e60d0712d9c 100644
--- a/src/tests/Interop/IJW/CopyConstructorMarshaler/CopyConstructorMarshaler.cs
+++ b/src/tests/Interop/IJW/CopyConstructorMarshaler/CopyConstructorMarshaler.cs
@@ -26,25 +26,32 @@ public static int TestEntryPoint()
                 object testInstance = Activator.CreateInstance(testType);
                 MethodInfo testMethod = testType.GetMethod("PInvokeNumCopies");
 
+                // On x86, we have an additional copy on every P/Invoke from the "native" parameter to the actual location on the stack.
+                int platformExtra = 0;
+                if (RuntimeInformation.ProcessArchitecture == Architecture.X86)
+                {
+                    platformExtra = 1;
+                }
+    
                 // PInvoke will copy twice. Once from argument to parameter, and once from the managed to native parameter.
-                Assert.Equal(2, (int)testMethod.Invoke(testInstance, null));
+                Assert.Equal(2 + platformExtra, (int)testMethod.Invoke(testInstance, null));
 
                 testMethod = testType.GetMethod("ReversePInvokeNumCopies");
 
                 // Reverse PInvoke will copy 3 times. Two are from the same paths as the PInvoke,
                 // and the third is from the reverse P/Invoke call.
-                Assert.Equal(3, (int)testMethod.Invoke(testInstance, null));
+                Assert.Equal(3 + platformExtra, (int)testMethod.Invoke(testInstance, null));
 
                 testMethod = testType.GetMethod("PInvokeNumCopiesDerivedType");
 
                 // PInvoke will copy twice. Once from argument to parameter, and once from the managed to native parameter.
-                Assert.Equal(2, (int)testMethod.Invoke(testInstance, null));
+                Assert.Equal(2 + platformExtra, (int)testMethod.Invoke(testInstance, null));
 
                 testMethod = testType.GetMethod("ReversePInvokeNumCopiesDerivedType");
 
                 // Reverse PInvoke will copy 3 times. Two are from the same paths as the PInvoke,
                 // and the third is from the reverse P/Invoke call.
-                Assert.Equal(3, (int)testMethod.Invoke(testInstance, null));
+                Assert.Equal(3 + platformExtra, (int)testMethod.Invoke(testInstance, null));
             }
             catch (Exception ex)
             {
@@ -54,6 +61,17 @@ public static int TestEntryPoint()
             return 100;
         }
 
+        [Fact]
+        public static void CopyConstructorsInArgumentStackSlots()
+        {
+            Assembly ijwNativeDll = Assembly.Load("IjwCopyConstructorMarshaler");
+            Type testType = ijwNativeDll.GetType("TestClass");
+            object testInstance = Activator.CreateInstance(testType);
+            MethodInfo testMethod = testType.GetMethod("ExposedThisCopyConstructorScenario");
+
+            Assert.Equal(0, (int)testMethod.Invoke(testInstance, null));
+        }
+
         [DllImport("kernel32.dll")]
         static extern IntPtr LoadLibraryEx(string lpFileName, IntPtr hReservedNull, int dwFlags);
 
diff --git a/src/tests/Interop/IJW/CopyConstructorMarshaler/IjwCopyConstructorMarshaler.cpp b/src/tests/Interop/IJW/CopyConstructorMarshaler/IjwCopyConstructorMarshaler.cpp
index bd1d1b80829d..c3d50cf77836 100644
--- a/src/tests/Interop/IJW/CopyConstructorMarshaler/IjwCopyConstructorMarshaler.cpp
+++ b/src/tests/Interop/IJW/CopyConstructorMarshaler/IjwCopyConstructorMarshaler.cpp
@@ -1,5 +1,90 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
+#pragma unmanaged
+#include <vector>
+#include <iostream>
+
+namespace ExposedThis
+{
+    struct Relative;
+
+    std::vector<Relative*> relatives;
+
+    int numMissedCopies = 0;
+
+    struct Relative
+    {
+        void* relative;
+        Relative()
+        {
+            std::cout << "Registering " << std::hex << this << "\n";
+            relatives.push_back(this);
+            relative = this - 1;
+        }
+
+        Relative(const Relative& other)
+        {
+            std::cout << "Registering copy of " << std::hex << &other << " at " << this << "\n";
+            relatives.push_back(this);
+            relative = this - 1;
+        }
+
+        ~Relative()
+        {
+            auto location = std::find(relatives.begin(), relatives.end(), this);
+            if (location != relatives.end())
+            {
+                std::cout << "Unregistering " << std::hex << this << "\n";
+                relatives.erase(location);
+            }
+            else
+            {
+                std::cout << "Error: Relative object " << std::hex << this << " not registered\n";
+                numMissedCopies++;
+            }
+
+            if (relative != this - 1)
+            {
+                std::cout << " Error: Relative object " << std::hex << this << " has invalid relative pointer " << std::hex << relative << "\n";
+                numMissedCopies++;
+            }
+        }
+    };
+
+    void UseRelative(Relative rel)
+    {
+        std::cout << "Unmanaged: Using relative at address " << std::hex << &rel << "\n";
+    }
+
+    void UseRelativeManaged(Relative rel);
+
+    void CallRelative()
+    {
+        Relative rel;
+        UseRelativeManaged(rel);
+    }
+
+#pragma managed
+
+    int RunScenario()
+    {
+        // Managed to unmanaged
+        {
+            Relative rel;
+            UseRelative(rel);
+        }
+
+        // Unmanaged to managed
+        CallRelative();
+
+        return numMissedCopies;
+    }
+
+    void UseRelativeManaged(Relative rel)
+    {
+        std::cout << "Managed: Using relative at address " << std::hex << &rel << "\n";
+    }
+}
 
 #pragma managed
 class A
@@ -102,4 +187,9 @@ public ref class TestClass
         B b;
         return GetCopyCount_ViaManaged(b);
     }
+
+    int ExposedThisCopyConstructorScenario()
+    {
+        return ExposedThis::RunScenario();
+    }
 };
diff --git a/src/tests/Interop/Interop.csproj b/src/tests/Interop/Interop.csproj
index 6fc22f7bf4f2..2a02c40a4fd5 100644
--- a/src/tests/Interop/Interop.csproj
+++ b/src/tests/Interop/Interop.csproj
@@ -1,7 +1,11 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' == 'mono' and '$(RuntimeVariant)' == 'llvmfullaot'">true</CLRTestTargetUnsupported>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <Configurations>Debug;Release;Checked</Configurations>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' == 'mono' and '$(RuntimeVariant)' == 'minifullaot'">true</CLRTestTargetUnsupported>
   </PropertyGroup>
   <ItemGroup>
     <SupportProject Include="$(TestLibraryProjectPath)" />
diff --git a/src/tests/Interop/MarshalAPI/FunctionPointer/FunctionPointer.cs b/src/tests/Interop/MarshalAPI/FunctionPointer/FunctionPointer.cs
index 766a37efd00c..7b0fd902a7bd 100644
--- a/src/tests/Interop/MarshalAPI/FunctionPointer/FunctionPointer.cs
+++ b/src/tests/Interop/MarshalAPI/FunctionPointer/FunctionPointer.cs
@@ -1,6 +1,8 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 using System;
+using System.IO;
+using System.Reflection;
 using System.Runtime.InteropServices;
 using Xunit;
 
@@ -19,15 +21,13 @@ static class FunctionPointerNative
         [DllImport(nameof(FunctionPointerNative))]
         static unsafe extern void FillOutPtr(IntPtr* p);
 
-	[DllImport(nameof(FunctionPointerNative))]
-	static unsafe extern void FillOutIntParameter(out IntPtr p);
+        [DllImport(nameof(FunctionPointerNative))]
+        static unsafe extern void FillOutIntParameter(out IntPtr p);
     }
 
     delegate void VoidDelegate();
 
     [Fact]
-
-    [ActiveIssue("https://github.com/dotnet/runtimelab/issues/164", typeof(TestLibrary.Utilities), nameof(TestLibrary.Utilities.IsNativeAot))]
     public static void RunGetDelForFcnPtrTest()
     {
         Console.WriteLine($"Running {nameof(RunGetDelForFcnPtrTest)}...");
@@ -40,6 +40,10 @@ public static void RunGetDelForFcnPtrTest()
             VoidDelegate del = (VoidDelegate)Marshal.GetDelegateForFunctionPointer(fcnptr, typeof(VoidDelegate));
             Assert.Equal(md.Target, del.Target);
             Assert.Equal(md.Method, del.Method);
+
+            VoidDelegate del2 = (VoidDelegate)Marshal.GetDelegateForFunctionPointer(fcnptr, typeof(VoidDelegate));
+            Assert.Equal(del, del2);
+            Assert.Equal(del.GetHashCode(), del2.GetHashCode());
         }
 
         // Native FcnPtr -> Delegate
@@ -49,6 +53,10 @@ public static void RunGetDelForFcnPtrTest()
             Assert.Null(del.Target);
             Assert.Equal("Invoke", del.Method.Name);
 
+            VoidDelegate del2 = (VoidDelegate)Marshal.GetDelegateForFunctionPointer(fcnptr, typeof(VoidDelegate));;
+            Assert.Equal(del, del2);
+            Assert.Equal(del.GetHashCode(), del2.GetHashCode());
+
             // Round trip of a native function pointer is never legal for a non-concrete Delegate type
             Assert.Throws<ArgumentException>(() =>
             {
diff --git a/src/tests/Interop/MarshalAPI/FunctionPointer/GenericFunctionPointer.cs b/src/tests/Interop/MarshalAPI/FunctionPointer/GenericFunctionPointer.cs
new file mode 100644
index 000000000000..da2fc75d9138
--- /dev/null
+++ b/src/tests/Interop/MarshalAPI/FunctionPointer/GenericFunctionPointer.cs
@@ -0,0 +1,126 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+using System;
+using System.IO;
+using System.Reflection;
+using System.Runtime.InteropServices;
+using Xunit;
+
+public partial class FunctionPtr
+{
+    public static bool CanRunGenericFunctionPointerTest => !TestLibrary.Utilities.IsMonoRuntime;
+    public static bool CanRunInvalidGenericFunctionPointerTest => !TestLibrary.Utilities.IsNativeAot && !TestLibrary.Utilities.IsMonoRuntime;
+
+    [UnmanagedCallersOnly]
+    static int UnmanagedExportedFunction(float arg)
+    {
+        return Convert.ToInt32(arg);
+    }
+    
+    [UnmanagedCallersOnly]
+    static BlittableGeneric<int> UnmanagedExportedFunctionBlittableGenericInt(float arg)
+    {
+        return new() { X = Convert.ToInt32(arg) };
+    }
+
+    [UnmanagedCallersOnly]
+    static BlittableGeneric<string> UnmanagedExportedFunctionBlittableGenericString(float arg)
+    {
+        return new() { X = Convert.ToInt32(arg) };
+    }
+
+    [UnmanagedCallersOnly]
+    static unsafe void UnmanagedExportedFunctionRefInt(int* pval, float arg)
+    {
+        *pval = Convert.ToInt32(arg);
+    }
+
+    class GenericCaller<T>
+    {
+        internal static unsafe T GenericCalli<U>(void* fnptr, U arg)
+        {
+            return ((delegate* unmanaged<U, T>)fnptr)(arg);
+        }
+
+        internal static unsafe BlittableGeneric<T> WrappedGenericCalli<U>(void* fnptr, U arg)
+        {
+            return ((delegate* unmanaged<U, BlittableGeneric<T>>)fnptr)(arg);
+        }
+
+        internal static unsafe void NonGenericCalli<U>(void* fnptr, ref int val, float arg)
+        {
+            ((delegate* unmanaged<ref int, float, void>)fnptr)(ref val, arg);
+        }
+    }
+
+    struct BlittableGeneric<T>
+    {
+        public int X;
+    }
+
+    [ConditionalTheory(nameof(CanRunGenericFunctionPointerTest))]
+    [InlineData(0f)]
+    [InlineData(1f)]
+    [InlineData(-1f)]
+    [InlineData(42f)]
+    [InlineData(60f)]
+    public static void RunGenericFunctionPointerTest(float inVal)
+    {
+        Console.WriteLine($"Running {nameof(RunGenericFunctionPointerTest)}...");
+        int outVar = 0;
+        int expectedValue = Convert.ToInt32(inVal);
+
+        Console.WriteLine("Testing GenericCalli with int as the return type");
+        unsafe
+        {
+            outVar = GenericCaller<int>.GenericCalli((delegate* unmanaged<float, int>)&UnmanagedExportedFunction, inVal);
+        }
+        Assert.Equal(expectedValue, outVar);
+        
+        outVar = 0;
+        Console.WriteLine("Testing GenericCalli with BlittableGeneric<int> as the return type");
+        unsafe
+        {
+            outVar = GenericCaller<int>.WrappedGenericCalli((delegate* unmanaged<float, BlittableGeneric<int>>)&UnmanagedExportedFunctionBlittableGenericInt, inVal).X;
+        }
+        Assert.Equal(expectedValue, outVar);
+
+        outVar = 0;
+        Console.WriteLine("Testing GenericCalli with BlittableGeneric<string> as the return type");
+        unsafe
+        {
+            outVar = GenericCaller<string>.WrappedGenericCalli((delegate* unmanaged<float, BlittableGeneric<string>>)&UnmanagedExportedFunctionBlittableGenericString, inVal).X;
+        }
+        Assert.Equal(expectedValue, outVar);
+
+        outVar = 0;
+        Console.WriteLine("Testing non-GenericCalli with non-blittable argument in a generic caller");
+        unsafe
+        {
+            GenericCaller<string>.NonGenericCalli<string>((delegate* unmanaged<int*, float, void>)&UnmanagedExportedFunctionRefInt, ref outVar, inVal);
+        }
+        Assert.Equal(expectedValue, outVar);
+    }
+
+    [ConditionalFact(nameof(CanRunInvalidGenericFunctionPointerTest))]
+    public static void RunInvalidGenericFunctionPointerTest()
+    {
+        Console.WriteLine($"Running {nameof(RunInvalidGenericFunctionPointerTest)}...");
+        unsafe
+        {
+            nint fnptr = (nint)(delegate* unmanaged<nint*, nint*>)&ReturnParameter;
+            Console.WriteLine("Testing GenericCalli with string as the parameter type");
+            Assert.Throws<MarshalDirectiveException>(() => GenericCaller<int>.GenericCalli((delegate* unmanaged<string, int>)fnptr, "test"));
+            Console.WriteLine("Testing GenericCalli with string as the return type");
+            Assert.Throws<MarshalDirectiveException>(() => GenericCaller<string>.GenericCalli((delegate* unmanaged<int, string>)fnptr, "test"));
+            Console.WriteLine("Testing GenericCalli with string as both the parameter and return type");
+            Assert.Throws<MarshalDirectiveException>(() => GenericCaller<string>.GenericCalli((delegate* unmanaged<string, string>)fnptr, "test"));
+        }
+    }
+
+    [UnmanagedCallersOnly]
+    static unsafe nint* ReturnParameter(nint* p)
+    {
+        return p;
+    }
+}
diff --git a/src/tests/Interop/NativeLibrary/API/NativeLibraryTests.cs b/src/tests/Interop/NativeLibrary/API/NativeLibraryTests.cs
index 2c43feaa3be9..09a3d6c41b90 100644
--- a/src/tests/Interop/NativeLibrary/API/NativeLibraryTests.cs
+++ b/src/tests/Interop/NativeLibrary/API/NativeLibraryTests.cs
@@ -182,7 +182,7 @@ public void LoadLibrary_AssemblyDirectory()
 
         string subdirectory = Path.Combine(testBinDir, "subdirectory");
 
-        if (!TestLibrary.Utilities.IsNativeAot && !TestLibrary.PlatformDetection.IsMonoLLVMFULLAOT)
+        if (!TestLibrary.Utilities.IsNativeAot && !TestLibrary.PlatformDetection.IsMonoFULLAOT)
         {
             // Library should be found in the assembly directory
             Assembly assemblyInSubdirectory = Assembly.LoadFile(Path.Combine(subdirectory, $"{assembly.GetName().Name}{suffix}.dll"));
diff --git a/src/tests/Interop/PInvoke/Miscellaneous/CopyCtor/CopyCtorTest.cs b/src/tests/Interop/PInvoke/Miscellaneous/CopyCtor/CopyCtorTest.cs
index 8a3c0b7ba0f9..57ec4d963242 100644
--- a/src/tests/Interop/PInvoke/Miscellaneous/CopyCtor/CopyCtorTest.cs
+++ b/src/tests/Interop/PInvoke/Miscellaneous/CopyCtor/CopyCtorTest.cs
@@ -15,20 +15,37 @@ public static unsafe class CopyCtor
     public static unsafe int StructWithCtorTest(StructWithCtor* ptrStruct, ref StructWithCtor refStruct)
     {
         if (ptrStruct->_instanceField != 1)
+        {
+            Console.WriteLine($"Fail: {ptrStruct->_instanceField} != {1}");
             return 1;
+        }
         if (refStruct._instanceField != 2)
+        {
+            Console.WriteLine($"Fail: {refStruct._instanceField} != {2}");
             return 2;
+        }
 
-        if (StructWithCtor.CopyCtorCallCount != 2)
+        int expectedCallCount = 2;
+        if (RuntimeInformation.ProcessArchitecture == Architecture.X86)
+        {
+            expectedCallCount = 4;
+        }
+
+        if (StructWithCtor.CopyCtorCallCount != expectedCallCount)
+        {
+            Console.WriteLine($"Fail: {StructWithCtor.CopyCtorCallCount} != {expectedCallCount}");
             return 3;
-        if (StructWithCtor.DtorCallCount != 2)
+        }
+        if (StructWithCtor.DtorCallCount != expectedCallCount)
+        {
+            Console.WriteLine($"Fail: {StructWithCtor.DtorCallCount} != {expectedCallCount}");
             return 4;
-
+        }
 
         return 100;
     }
 
-    [Fact]
+    [ConditionalFact(typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsWindows))]
     [SkipOnMono("Not supported on Mono")]
     [ActiveIssue("https://github.com/dotnet/runtimelab/issues/155", typeof(TestLibrary.Utilities), nameof(TestLibrary.Utilities.IsNativeAot))]
     public static unsafe void ValidateCopyConstructorAndDestructorCalled()
diff --git a/src/tests/Interop/PInvoke/Primitives/Pointer/CMakeLists.txt b/src/tests/Interop/PInvoke/Primitives/Pointer/CMakeLists.txt
index ca190ec6735d..0d8bf6e334e2 100644
--- a/src/tests/Interop/PInvoke/Primitives/Pointer/CMakeLists.txt
+++ b/src/tests/Interop/PInvoke/Primitives/Pointer/CMakeLists.txt
@@ -1,6 +1,6 @@
 include ("${CLR_INTEROP_TEST_ROOT}/Interop.cmake")
 set(SOURCES
-    NonBlittablePointerNative.cpp
+    PointerNative.cpp
 )
-add_library (NonBlittablePointerNative SHARED ${SOURCES})
-install (TARGETS NonBlittablePointerNative DESTINATION bin)
+add_library (PointerNative SHARED ${SOURCES})
+install (TARGETS PointerNative DESTINATION bin)
diff --git a/src/tests/Interop/PInvoke/Primitives/Pointer/PointerNative.cpp b/src/tests/Interop/PInvoke/Primitives/Pointer/PointerNative.cpp
new file mode 100644
index 000000000000..801718310697
--- /dev/null
+++ b/src/tests/Interop/PInvoke/Primitives/Pointer/PointerNative.cpp
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include <xplatform.h>
+#include <limits>
+
+extern "C" DLL_EXPORT void STDMETHODCALLTYPE Negate(bool* ptr)
+{
+    *ptr = !*ptr;
+}
+
+extern "C" DLL_EXPORT void STDMETHODCALLTYPE GetNaN(float* ptr)
+{
+    *ptr = std::numeric_limits<float>::quiet_NaN();
+}
+
+extern "C" DLL_EXPORT void STDMETHODCALLTYPE NegateDecimal(DECIMAL* ptr)
+{
+    ptr->sign = ptr->sign == 0 ? 0x80 : 0;
+}
diff --git a/src/tests/Interop/PInvoke/Primitives/Pointer/Program.cs b/src/tests/Interop/PInvoke/Primitives/Pointer/Program.cs
index 57663d03a8c0..623494315728 100644
--- a/src/tests/Interop/PInvoke/Primitives/Pointer/Program.cs
+++ b/src/tests/Interop/PInvoke/Primitives/Pointer/Program.cs
@@ -5,23 +5,61 @@
 using System.Runtime.InteropServices;
 using Xunit;
 
-namespace NonBlittablePointer
+namespace Pointer
 {
-    static class NonBlittablePointerNative
+    static class PointerNative
     {
-        [DllImport(nameof(NonBlittablePointerNative))]
+        [DllImport(nameof(PointerNative))]
         public static unsafe extern void Negate(bool* ptr);
+
+        [DllImport(nameof(PointerNative))]
+        public static unsafe extern void GetNaN(float* ptr);
+
+        [DllImport(nameof(PointerNative))]
+        public static unsafe extern void NegateDecimal(decimal* ptr);
+
+        [DllImport(nameof(PointerNative))]
+        public static unsafe extern void GetNaN(BlittableWrapper<float>* ptr);
+
+        public struct BlittableWrapper<T>
+        {
+            public T Value;
+        }
     }
 
     [ActiveIssue("https://github.com/dotnet/runtime/issues/91388", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.PlatformDoesNotSupportNativeTestAssets))]
     public class Program
     {
         [Fact]
-        public static unsafe int TestEntryPoint()
+        public static unsafe void PointerToBool()
         {
             bool value = true;
-            NonBlittablePointerNative.Negate(&value);
-            return value == false ? 100 : 101;
+            PointerNative.Negate(&value);
+            Assert.False(value);
+        }
+
+        [Fact]
+        public static unsafe void PointerToFloat()
+        {
+            float value = 1.0f;
+            PointerNative.GetNaN(&value);
+            Assert.True(float.IsNaN(value));
+        }
+
+        [Fact]
+        public static unsafe void PointerToDecimal()
+        {
+            decimal value = 1.0m;
+            PointerNative.NegateDecimal(&value);
+            Assert.Equal(-1.0m, value);
+        }
+
+        [Fact]
+        public static unsafe void PointerToStructOfGeneric()
+        {
+            PointerNative.BlittableWrapper<float> wrapper = new(){ Value = 1.0f };
+            PointerNative.GetNaN(&wrapper);
+            Assert.True(float.IsNaN(wrapper.Value));
         }
     }
 }
diff --git a/src/tests/Interop/PInvoke/SizeParamIndex/ReversePInvoke/PassingByOut/PassingByOutTest.cs b/src/tests/Interop/PInvoke/SizeParamIndex/ReversePInvoke/PassingByOut/PassingByOutTest.cs
index 5177ad15d5dc..72cad72fc4fe 100644
--- a/src/tests/Interop/PInvoke/SizeParamIndex/ReversePInvoke/PassingByOut/PassingByOutTest.cs
+++ b/src/tests/Interop/PInvoke/SizeParamIndex/ReversePInvoke/PassingByOut/PassingByOutTest.cs
@@ -159,7 +159,7 @@ public static void RunTestByOut()
 
         Console.WriteLine("\tScenario 3 : short ==> int16_t, Array_Size = -1, Return_Array_Size = 20");
         Assert.True(DoCallBack_MarshalShortArray_AsParam_AsByOut(new DelShortArrByOutAsCdeclCaller(TestMethodForShortArray_AsReversePInvokeByOut_AsCdecl)));
-        Console.WriteLine("\t\tMarshalShortArray_AsReversePInvokeByOut_AsCdecl Failed!");
+        Console.WriteLine("\t\tMarshalShortArray_AsReversePInvokeByOut_AsCdecl Passed!");
 
         Console.WriteLine("\tScenario 4 : short ==> int16_t, Array_Size = 10, Return_Array_Size = -1");
         Assert.True(DoCallBack_MarshalShortArrayReturnNegativeSize_AsParam_AsByOut(new DelShortArrByOutAsCdeclCaller(TestMethodForShortArrayReturnNegativeSize_AsReversePInvokeByOut_AsCdecl)));
diff --git a/src/tests/Interop/StringMarshalling/AnsiBSTR/AnsiBStrTest.csproj b/src/tests/Interop/StringMarshalling/AnsiBSTR/AnsiBStrTest.csproj
index f476d5f41f21..cbf26e129303 100644
--- a/src/tests/Interop/StringMarshalling/AnsiBSTR/AnsiBStrTest.csproj
+++ b/src/tests/Interop/StringMarshalling/AnsiBSTR/AnsiBStrTest.csproj
@@ -2,6 +2,8 @@
   <PropertyGroup>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <DefineConstants>$(DefineConstants);ANSIBSTR</DefineConstants>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'minifullaot' or '$(RuntimeVariant)' == 'llvmfullaot')">true</CLRTestTargetUnsupported>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="*.cs" />
diff --git a/src/tests/Interop/StringMarshalling/BSTR/BSTRTest.csproj b/src/tests/Interop/StringMarshalling/BSTR/BSTRTest.csproj
index 08df9c12dfe4..c3b9f50d217f 100644
--- a/src/tests/Interop/StringMarshalling/BSTR/BSTRTest.csproj
+++ b/src/tests/Interop/StringMarshalling/BSTR/BSTRTest.csproj
@@ -2,6 +2,8 @@
   <PropertyGroup>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <DefineConstants>$(DefineConstants);BSTR</DefineConstants>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'minifullaot' or '$(RuntimeVariant)' == 'llvmfullaot')">true</CLRTestTargetUnsupported>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="*.cs" />
@@ -12,5 +14,6 @@
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="$(TestLibraryProjectPath)" />
+    <CMakeProjectReference Include="./CMakeLists.txt" />
   </ItemGroup>
 </Project>
diff --git a/src/tests/Interop/StringMarshalling/LPSTR/LPSTRTest.csproj b/src/tests/Interop/StringMarshalling/LPSTR/LPSTRTest.csproj
index 27998767cb04..f4aa61dc24e8 100644
--- a/src/tests/Interop/StringMarshalling/LPSTR/LPSTRTest.csproj
+++ b/src/tests/Interop/StringMarshalling/LPSTR/LPSTRTest.csproj
@@ -10,5 +10,6 @@
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="$(TestLibraryProjectPath)" />
+    <CMakeProjectReference Include="./CMakeLists.txt" />
   </ItemGroup>
 </Project>
diff --git a/src/tests/Interop/StringMarshalling/LPTSTR/LPTSTRTest.csproj b/src/tests/Interop/StringMarshalling/LPTSTR/LPTSTRTest.csproj
index 52006bf63c1d..af1674c30184 100644
--- a/src/tests/Interop/StringMarshalling/LPTSTR/LPTSTRTest.csproj
+++ b/src/tests/Interop/StringMarshalling/LPTSTR/LPTSTRTest.csproj
@@ -2,6 +2,8 @@
   <PropertyGroup>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <DefineConstants>$(DefineConstants);LPTSTR</DefineConstants>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'minifullaot' or '$(RuntimeVariant)' == 'llvmfullaot')">true</CLRTestTargetUnsupported>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="*.cs" />
diff --git a/src/tests/Interop/StringMarshalling/VBByRefStr/VBByRefStrTest.csproj b/src/tests/Interop/StringMarshalling/VBByRefStr/VBByRefStrTest.csproj
index 621a9f162f3b..a3017d0e8145 100644
--- a/src/tests/Interop/StringMarshalling/VBByRefStr/VBByRefStrTest.csproj
+++ b/src/tests/Interop/StringMarshalling/VBByRefStr/VBByRefStrTest.csproj
@@ -1,6 +1,8 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'minifullaot' or '$(RuntimeVariant)' == 'llvmfullaot')">true</CLRTestTargetUnsupported>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="*.cs" />
diff --git a/src/tests/Interop/Swift/SwiftAbiStress/CMakeLists.txt b/src/tests/Interop/Swift/SwiftAbiStress/CMakeLists.txt
new file mode 100644
index 000000000000..6170fe836784
--- /dev/null
+++ b/src/tests/Interop/Swift/SwiftAbiStress/CMakeLists.txt
@@ -0,0 +1,21 @@
+project(SwiftAbiStress)
+include ("${CLR_INTEROP_TEST_ROOT}/Interop.cmake")
+
+set(SOURCE SwiftAbiStress)
+
+if (NOT SWIFT_COMPILER_TARGET AND CLR_CMAKE_TARGET_OSX)
+    set(SWIFT_PLATFORM "macosx")
+    set(SWIFT_PLATFORM_SUFFIX "")
+    set(SWIFT_DEPLOYMENT_TARGET ${CMAKE_OSX_DEPLOYMENT_TARGET})
+    set(SWIFT_COMPILER_TARGET "${CMAKE_OSX_ARCHITECTURES}-apple-${SWIFT_PLATFORM}${SWIFT_DEPLOYMENT_TARGET}${SWIFT_PLATFORM_SUFFIX}")
+endif()
+
+add_custom_target(${SOURCE} ALL
+    COMMAND xcrun swiftc -target ${SWIFT_COMPILER_TARGET} -emit-library ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}.swift -o ${CMAKE_CURRENT_BINARY_DIR}/lib${SOURCE}.dylib
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}.swift
+    COMMENT "Generating ${SOURCE} library"
+)
+
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/lib${SOURCE}.dylib
+    DESTINATION bin
+)
diff --git a/src/tests/Interop/Swift/SwiftAbiStress/SwiftAbiStress.cs b/src/tests/Interop/Swift/SwiftAbiStress/SwiftAbiStress.cs
new file mode 100644
index 000000000000..9fab793b6e2c
--- /dev/null
+++ b/src/tests/Interop/Swift/SwiftAbiStress/SwiftAbiStress.cs
@@ -0,0 +1,6686 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.InteropServices.Swift;
+using Xunit;
+
+public class SwiftAbiStress
+{
+    private const string SwiftLib = "libSwiftAbiStress.dylib";
+
+    [StructLayout(LayoutKind.Sequential, Size = 14)]
+    struct F0_S0
+    {
+        public double F0;
+        public uint F1;
+        public ushort F2;
+
+        public F0_S0(double f0, uint f1, ushort f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F0_S1
+    {
+        public ulong F0;
+
+        public F0_S1(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F0_S2
+    {
+        public float F0;
+
+        public F0_S2(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress10swiftFunc02a02a12a22a32a42a52a62a7Sis5Int16V_s5Int32Vs6UInt64Vs6UInt16VAA5F0_S0VAA0R3_S1Vs5UInt8VAA0R3_S2VtF")]
+    private static extern nint SwiftFunc0(short a0, int a1, ulong a2, ushort a3, F0_S0 a4, F0_S1 a5, byte a6, F0_S2 a7);
+
+    [Fact]
+    public static void TestSwiftFunc0()
+    {
+        Console.Write("Running SwiftFunc0: ");
+        long result = SwiftFunc0(-23758, 148652722, 3833542748216839160, 21987, new F0_S0(3425626963407448, 989224444, 55562), new F0_S1(1751696348434043356), 14, new F0_S2(1047842));
+        Assert.Equal(-5199645484972017144, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 26)]
+    struct F1_S0
+    {
+        public long F0;
+        public double F1;
+        public sbyte F2;
+        public int F3;
+        public ushort F4;
+
+        public F1_S0(long f0, double f1, sbyte f2, int f3, ushort f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F1_S1
+    {
+        public byte F0;
+
+        public F1_S1(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F1_S2
+    {
+        public short F0;
+
+        public F1_S2(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress10swiftFunc12a02a12a22a3SiAA5F1_S0V_s5UInt8VAA0J3_S1VAA0J3_S2VtF")]
+    private static extern nint SwiftFunc1(F1_S0 a0, byte a1, F1_S1 a2, F1_S2 a3);
+
+    [Fact]
+    public static void TestSwiftFunc1()
+    {
+        Console.Write("Running SwiftFunc1: ");
+        long result = SwiftFunc1(new F1_S0(6106136698885217102, 6195715435808, 121, 676336729, 51621), 121, new F1_S1(101), new F1_S2(-11974));
+        Assert.Equal(-5789188411070459345, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F2_S0
+    {
+        public nint F0;
+        public nuint F1;
+
+        public F2_S0(nint f0, nuint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 26)]
+    struct F2_S1
+    {
+        public long F0;
+        public int F1;
+        public short F2;
+        public long F3;
+        public ushort F4;
+
+        public F2_S1(long f0, int f1, short f2, long f3, ushort f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F2_S2_S0_S0
+    {
+        public nint F0;
+
+        public F2_S2_S0_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F2_S2_S0
+    {
+        public F2_S2_S0_S0 F0;
+
+        public F2_S2_S0(F2_S2_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F2_S2
+    {
+        public F2_S2_S0 F0;
+
+        public F2_S2(F2_S2_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F2_S3
+    {
+        public byte F0;
+
+        public F2_S3(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F2_S4
+    {
+        public int F0;
+        public nuint F1;
+
+        public F2_S4(int f0, nuint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F2_S5
+    {
+        public float F0;
+
+        public F2_S5(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress10swiftFunc22a02a12a22a32a42a52a62a72a82a93a103a113a123a13Sis5Int64V_s5Int16Vs5Int32VAA5F2_S0Vs5UInt8VAvA0W3_S1VAA0W3_S2Vs6UInt16VSfAA0W3_S3VAA0W3_S4VAA0W3_S5VARtF")]
+    private static extern nint SwiftFunc2(long a0, short a1, int a2, F2_S0 a3, byte a4, int a5, F2_S1 a6, F2_S2 a7, ushort a8, float a9, F2_S3 a10, F2_S4 a11, F2_S5 a12, long a13);
+
+    [Fact]
+    public static void TestSwiftFunc2()
+    {
+        Console.Write("Running SwiftFunc2: ");
+        long result = SwiftFunc2(1467471118999515177, -1109, 1443466834, new F2_S0(unchecked((nint)8641951469425609828), unchecked((nuint)3263825339460718643)), 6, 42857709, new F2_S1(6855376760105631967, 2087467091, 25810, 2495195821026007124, 62146), new F2_S2(new F2_S2_S0(new F2_S2_S0_S0(unchecked((nint)561009218247569242)))), 46110, 7547287, new F2_S3(34), new F2_S4(203178131, unchecked((nuint)8676866947888134131)), new F2_S5(7890213), 5623254678629817168);
+        Assert.Equal(-1831688667491861211, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F3_S0_S0
+    {
+        public nint F0;
+        public uint F1;
+
+        public F3_S0_S0(nint f0, uint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F3_S0
+    {
+        public sbyte F0;
+        public F3_S0_S0 F1;
+        public uint F2;
+
+        public F3_S0(sbyte f0, F3_S0_S0 f1, uint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F3_S1
+    {
+        public long F0;
+        public float F1;
+
+        public F3_S1(long f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F3_S2
+    {
+        public float F0;
+
+        public F3_S2(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F3_S3
+    {
+        public byte F0;
+        public nint F1;
+
+        public F3_S3(byte f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 14)]
+    struct F3_S4
+    {
+        public nuint F0;
+        public float F1;
+        public ushort F2;
+
+        public F3_S4(nuint f0, float f1, ushort f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F3_S5
+    {
+        public uint F0;
+        public long F1;
+
+        public F3_S5(uint f0, long f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 3)]
+    struct F3_S6_S0
+    {
+        public short F0;
+        public byte F1;
+
+        public F3_S6_S0(short f0, byte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 5)]
+    struct F3_S6
+    {
+        public F3_S6_S0 F0;
+        public sbyte F1;
+        public byte F2;
+
+        public F3_S6(F3_S6_S0 f0, sbyte f1, byte f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F3_S7
+    {
+        public ulong F0;
+
+        public F3_S7(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress10swiftFunc32a02a12a22a32a42a52a62a72a82a93a103a113a123a13S2i_AA5F3_S0VAA0T3_S1VSdSiAA0T3_S2VAA0T3_S3VAA0T3_S4VAA0T3_S5Vs6UInt16Vs5Int32VAA0T3_S6VSiAA0T3_S7VtF")]
+    private static extern nint SwiftFunc3(nint a0, F3_S0 a1, F3_S1 a2, double a3, nint a4, F3_S2 a5, F3_S3 a6, F3_S4 a7, F3_S5 a8, ushort a9, int a10, F3_S6 a11, nint a12, F3_S7 a13);
+
+    [Fact]
+    public static void TestSwiftFunc3()
+    {
+        Console.Write("Running SwiftFunc3: ");
+        long result = SwiftFunc3(unchecked((nint)3764414362291906102), new F3_S0(23, new F3_S0_S0(unchecked((nint)3007367655161186204), 549733154), 38928730), new F3_S1(338326426991485790, 7517271), 4025506815523052, unchecked((nint)431338169919855088), new F3_S2(7888763), new F3_S3(57, unchecked((nint)8933588466514096604)), new F3_S4(unchecked((nuint)7769316271655125502), 1663231, 27333), new F3_S5(887161443, 4368322322535461551), 32477, 948591564, new F3_S6(new F3_S6_S0(7033, 124), 67, 221), unchecked((nint)6195032215974632640), new F3_S7(4076570630190469380));
+        Assert.Equal(-8840537967093155898, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 6)]
+    struct F4_S0
+    {
+        public ushort F0;
+        public short F1;
+        public short F2;
+
+        public F4_S0(ushort f0, short f1, short f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F4_S1_S0
+    {
+        public uint F0;
+
+        public F4_S1_S0(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F4_S1
+    {
+        public F4_S1_S0 F0;
+        public float F1;
+
+        public F4_S1(F4_S1_S0 f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F4_S2_S0
+    {
+        public nint F0;
+
+        public F4_S2_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F4_S2
+    {
+        public F4_S2_S0 F0;
+        public nint F1;
+
+        public F4_S2(F4_S2_S0 f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F4_S3
+    {
+        public ulong F0;
+        public ulong F1;
+        public long F2;
+
+        public F4_S3(ulong f0, ulong f1, long f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress10swiftFunc42a02a12a22a32a42a52a62a72a82a93a103a113a123a133a143a15S2i_AA5F4_S0VSus6UInt64Vs4Int8VSdAA0V3_S1Vs5UInt8Vs5Int32Vs6UInt32VAvA0V3_S2Vs5Int16VSiAA0V3_S3VA4_tF")]
+    private static extern nint SwiftFunc4(nint a0, F4_S0 a1, nuint a2, ulong a3, sbyte a4, double a5, F4_S1 a6, byte a7, int a8, uint a9, ulong a10, F4_S2 a11, short a12, nint a13, F4_S3 a14, uint a15);
+
+    [Fact]
+    public static void TestSwiftFunc4()
+    {
+        Console.Write("Running SwiftFunc4: ");
+        long result = SwiftFunc4(unchecked((nint)7962207922494873063), new F4_S0(16887, 11193, 20997), unchecked((nuint)938043702598629976), 8692646626431098135, -16, 1244033228990732, new F4_S1(new F4_S1_S0(274421021), 7037264), 154, 1187166500, 1096514224, 7283010216047805604, new F4_S2(new F4_S2_S0(unchecked((nint)3285810526807361976)), unchecked((nint)2934841899954168407)), 3384, unchecked((nint)4857017836321530071), new F4_S3(9030480386017125399, 5466901523025762626, 3430278619936831574), 234522698);
+        Assert.Equal(5366279618472372586, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F5_S0
+    {
+        public nuint F0;
+
+        public F5_S0(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress10swiftFunc52a02a12a22a3SiSu_s6UInt64Vs5UInt8VAA5F5_S0VtF")]
+    private static extern nint SwiftFunc5(nuint a0, ulong a1, byte a2, F5_S0 a3);
+
+    [Fact]
+    public static void TestSwiftFunc5()
+    {
+        Console.Write("Running SwiftFunc5: ");
+        long result = SwiftFunc5(unchecked((nuint)425569624776371773), 8077063517132296390, 126, new F5_S0(unchecked((nuint)8032431538406335990)));
+        Assert.Equal(5832440388901373477, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 17)]
+    struct F6_S0
+    {
+        public int F0;
+        public nint F1;
+        public byte F2;
+
+        public F6_S0(int f0, nint f1, byte f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F6_S1
+    {
+        public nint F0;
+        public float F1;
+
+        public F6_S1(nint f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F6_S2_S0
+    {
+        public double F0;
+
+        public F6_S2_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F6_S2
+    {
+        public F6_S2_S0 F0;
+        public ushort F1;
+
+        public F6_S2(F6_S2_S0 f0, ushort f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F6_S3
+    {
+        public double F0;
+        public double F1;
+        public ulong F2;
+
+        public F6_S3(double f0, double f1, ulong f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F6_S4
+    {
+        public sbyte F0;
+
+        public F6_S4(sbyte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F6_S5
+    {
+        public short F0;
+
+        public F6_S5(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress10swiftFunc62a02a12a22a32a42a52a62a72a82a93a103a113a123a133a14Sis5Int64V_AA5F6_S0VAA0V3_S1VSus5UInt8Vs5Int32VAA0V3_S2VSfs5Int16VAA0V3_S3Vs6UInt16VSds6UInt32VAA0V3_S4VAA0V3_S5VtF")]
+    private static extern nint SwiftFunc6(long a0, F6_S0 a1, F6_S1 a2, nuint a3, byte a4, int a5, F6_S2 a6, float a7, short a8, F6_S3 a9, ushort a10, double a11, uint a12, F6_S4 a13, F6_S5 a14);
+
+    [Fact]
+    public static void TestSwiftFunc6()
+    {
+        Console.Write("Running SwiftFunc6: ");
+        long result = SwiftFunc6(7742402881449217499, new F6_S0(158138445, unchecked((nint)4280990415451108676), 220), new F6_S1(unchecked((nint)7698928046973811162), 478730), unchecked((nuint)7348396082620937303), 76, 638113630, new F6_S2(new F6_S2_S0(55341051405503), 61378), 8235930, -20241, new F6_S3(318363825012010, 3586735152618866, 6630554942616673404), 46432, 744827194985602, 1973021571, new F6_S4(103), new F6_S5(-5345));
+        Assert.Equal(-8871753131984133391, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F7_S0
+    {
+        public short F0;
+        public nint F1;
+
+        public F7_S0(short f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F7_S1
+    {
+        public byte F0;
+
+        public F7_S1(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress10swiftFunc72a02a12a22a32a42a5Sis5Int64V_Sis5UInt8VAA5F7_S0VAA0N3_S1Vs6UInt32VtF")]
+    private static extern nint SwiftFunc7(long a0, nint a1, byte a2, F7_S0 a3, F7_S1 a4, uint a5);
+
+    [Fact]
+    public static void TestSwiftFunc7()
+    {
+        Console.Write("Running SwiftFunc7: ");
+        long result = SwiftFunc7(6953928391541094904, unchecked((nint)2531714261502554653), 224, new F7_S0(14482, unchecked((nint)4704842847707480837)), new F7_S1(148), 659764805);
+        Assert.Equal(5963731324167739917, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F8_S0
+    {
+        public int F0;
+
+        public F8_S0(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress10swiftFunc82a02a12a22a32a42a5Sis6UInt16V_SuAJs6UInt64VAA5F8_S0VALtF")]
+    private static extern nint SwiftFunc8(ushort a0, nuint a1, ushort a2, ulong a3, F8_S0 a4, ulong a5);
+
+    [Fact]
+    public static void TestSwiftFunc8()
+    {
+        Console.Write("Running SwiftFunc8: ");
+        long result = SwiftFunc8(48505, unchecked((nuint)8758330817072549915), 7130, 4163773298933598697, new F8_S0(1934119180), 2843311260726166700);
+        Assert.Equal(1919194302322813426, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F9_S0
+    {
+        public double F0;
+
+        public F9_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F9_S1
+    {
+        public int F0;
+
+        public F9_S1(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress10swiftFunc92a02a12a22a32a42a5Sis5Int64V_SfAA5F9_S0Vs6UInt16VAA0M3_S1VANtF")]
+    private static extern nint SwiftFunc9(long a0, float a1, F9_S0 a2, ushort a3, F9_S1 a4, ushort a5);
+
+    [Fact]
+    public static void TestSwiftFunc9()
+    {
+        Console.Write("Running SwiftFunc9: ");
+        long result = SwiftFunc9(3214937834123081267, 6846768, new F9_S0(1713527158921541), 25670, new F9_S1(1650872599), 39910);
+        Assert.Equal(-5878079645235476214, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F10_S0
+    {
+        public long F0;
+        public uint F1;
+
+        public F10_S0(long f0, uint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F10_S1
+    {
+        public float F0;
+        public byte F1;
+        public nuint F2;
+
+        public F10_S1(float f0, byte f1, nuint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F10_S2
+    {
+        public nuint F0;
+        public ulong F1;
+
+        public F10_S2(nuint f0, ulong f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F10_S3
+    {
+        public float F0;
+
+        public F10_S3(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F10_S4
+    {
+        public long F0;
+
+        public F10_S4(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc102a02a12a22a32a42a52a62a72a82a93a103a113a123a133a143a153a163a173a18Sis6UInt16V_AwA6F10_S0Vs6UInt64VSfs4Int8Vs5Int64VA_A3_Sfs5Int32VA5_A3_A_AA0Z3_S1VA3_AA0Z3_S2VAA0Z3_S3VAA0Z3_S4VtF")]
+    private static extern nint SwiftFunc10(ushort a0, ushort a1, F10_S0 a2, ulong a3, float a4, sbyte a5, long a6, ulong a7, long a8, float a9, int a10, int a11, long a12, ulong a13, F10_S1 a14, long a15, F10_S2 a16, F10_S3 a17, F10_S4 a18);
+
+    [Fact]
+    public static void TestSwiftFunc10()
+    {
+        Console.Write("Running SwiftFunc10: ");
+        long result = SwiftFunc10(57914, 11968, new F10_S0(155502634291755209, 2096010440), 1373054541331378384, 2401784, -16, 9038689080810964859, 521869082023571496, 8919173990791765137, 4890513, 1113752036, 1477591037, 1463349953238439103, 7521124889381630793, new F10_S1(620783, 33, unchecked((nuint)1209731409858919135)), 1560688600815438014, new F10_S2(unchecked((nuint)2244178273746563479), 4252696983313269084), new F10_S3(6539550), new F10_S4(1264398289929487498));
+        Assert.Equal(-5714135075575530569, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F11_S0
+    {
+        public short F0;
+        public sbyte F1;
+        public ulong F2;
+        public short F3;
+
+        public F11_S0(short f0, sbyte f1, ulong f2, short f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F11_S1
+    {
+        public nuint F0;
+
+        public F11_S1(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F11_S2
+    {
+        public short F0;
+
+        public F11_S2(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F11_S3_S0
+    {
+        public float F0;
+
+        public F11_S3_S0(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F11_S3
+    {
+        public F11_S3_S0 F0;
+
+        public F11_S3(F11_S3_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc112a02a12a22a32a42a52a62a72a82a93a103a113a12S2i_s6UInt64Vs5UInt8Vs5Int16VAA6F11_S0VAA0V3_S1Vs6UInt16VSdSis6UInt32VAA0V3_S2VAA0V3_S3Vs4Int8VtF")]
+    private static extern nint SwiftFunc11(nint a0, ulong a1, byte a2, short a3, F11_S0 a4, F11_S1 a5, ushort a6, double a7, nint a8, uint a9, F11_S2 a10, F11_S3 a11, sbyte a12);
+
+    [Fact]
+    public static void TestSwiftFunc11()
+    {
+        Console.Write("Running SwiftFunc11: ");
+        long result = SwiftFunc11(unchecked((nint)6199025647502478201), 6507965430585517144, 205, -31066, new F11_S0(-8843, -2, 7915533514001114122, -3518), new F11_S1(unchecked((nuint)690496938384964820)), 10269, 3817195039757571, unchecked((nint)4394294464475321144), 1182247681, new F11_S2(22246), new F11_S3(new F11_S3_S0(3714370)), 93);
+        Assert.Equal(946399036611801834, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F12_S0
+    {
+        public uint F0;
+
+        public F12_S0(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F12_S1
+    {
+        public byte F0;
+
+        public F12_S1(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F12_S2
+    {
+        public nuint F0;
+
+        public F12_S2(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc122a02a12a22a32a42a52a62a72a82a93a103a11Sis5UInt8V_s5Int32VAA6F12_S0Vs4Int8VAA0T3_S1VAA0T3_S2Vs6UInt32Vs5Int16VA2VA0_APtF")]
+    private static extern nint SwiftFunc12(byte a0, int a1, F12_S0 a2, sbyte a3, F12_S1 a4, F12_S2 a5, uint a6, short a7, sbyte a8, sbyte a9, uint a10, byte a11);
+
+    [Fact]
+    public static void TestSwiftFunc12()
+    {
+        Console.Write("Running SwiftFunc12: ");
+        long result = SwiftFunc12(233, 123593469, new F12_S0(1950949830), -122, new F12_S1(47), new F12_S2(unchecked((nuint)2600645483988824242)), 307825058, -49, -98, -5, 1582160629, 26);
+        Assert.Equal(102839812138332997, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F13_S0_S0_S0
+    {
+        public ulong F0;
+
+        public F13_S0_S0_S0(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F13_S0_S0
+    {
+        public F13_S0_S0_S0 F0;
+
+        public F13_S0_S0(F13_S0_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F13_S0
+    {
+        public sbyte F0;
+        public F13_S0_S0 F1;
+
+        public F13_S0(sbyte f0, F13_S0_S0 f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F13_S1_S0
+    {
+        public ulong F0;
+
+        public F13_S1_S0(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F13_S1
+    {
+        public F13_S1_S0 F0;
+
+        public F13_S1(F13_S1_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc132a02a12a22a32a42a5Sis4Int8V_SdAA6F13_S0VAA0M3_S1VAJSdtF")]
+    private static extern nint SwiftFunc13(sbyte a0, double a1, F13_S0 a2, F13_S1 a3, sbyte a4, double a5);
+
+    [Fact]
+    public static void TestSwiftFunc13()
+    {
+        Console.Write("Running SwiftFunc13: ");
+        long result = SwiftFunc13(-6, 2395768328620295, new F13_S0(44, new F13_S0_S0(new F13_S0_S0_S0(2383685413668225247))), new F13_S1(new F13_S1_S0(5663941717310331870)), -9, 815761320969512);
+        Assert.Equal(-6209025030118540066, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F14_S0
+    {
+        public nint F0;
+
+        public F14_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc142a02a12a22a32a4Sis4Int8V_SiAA6F14_S0VSfSutF")]
+    private static extern nint SwiftFunc14(sbyte a0, nint a1, F14_S0 a2, float a3, nuint a4);
+
+    [Fact]
+    public static void TestSwiftFunc14()
+    {
+        Console.Write("Running SwiftFunc14: ");
+        long result = SwiftFunc14(-78, unchecked((nint)2423976036967433837), new F14_S0(unchecked((nint)2836433146306492236)), 4916388, unchecked((nuint)7716581850692162517));
+        Assert.Equal(1206847964913124869, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F15_S0
+    {
+        public float F0;
+        public short F1;
+        public byte F2;
+        public long F3;
+        public double F4;
+
+        public F15_S0(float f0, short f1, byte f2, long f3, double f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F15_S1_S0
+    {
+        public sbyte F0;
+
+        public F15_S1_S0(sbyte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F15_S1
+    {
+        public uint F0;
+        public F15_S1_S0 F1;
+        public nuint F2;
+        public int F3;
+
+        public F15_S1(uint f0, F15_S1_S0 f1, nuint f2, int f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc152a02a12a22a32a42a52a62a7SiAA6F15_S0V_s6UInt64Vs6UInt32VSuANs5Int16VAA0N3_S1Vs5Int64VtF")]
+    private static extern nint SwiftFunc15(F15_S0 a0, ulong a1, uint a2, nuint a3, ulong a4, short a5, F15_S1 a6, long a7);
+
+    [Fact]
+    public static void TestSwiftFunc15()
+    {
+        Console.Write("Running SwiftFunc15: ");
+        long result = SwiftFunc15(new F15_S0(2392622, -22089, 69, 7123929674797968229, 2951758117520631), 171173680452593621, 357397954, unchecked((nuint)6020399741996935792), 3793854189677149082, 14438, new F15_S1(1572107355, new F15_S1_S0(109), unchecked((nuint)4381395046734445050), 2038949453), 9134476964305239477);
+        Assert.Equal(8801999574220262235, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F16_S0_S0
+    {
+        public double F0;
+
+        public F16_S0_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F16_S0
+    {
+        public nint F0;
+        public nint F1;
+        public F16_S0_S0 F2;
+
+        public F16_S0(nint f0, nint f1, F16_S0_S0 f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F16_S1
+    {
+        public short F0;
+        public ulong F1;
+        public uint F2;
+
+        public F16_S1(short f0, ulong f1, uint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F16_S2
+    {
+        public byte F0;
+        public ulong F1;
+        public float F2;
+
+        public F16_S2(byte f0, ulong f1, float f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F16_S3
+    {
+        public int F0;
+
+        public F16_S3(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc162a02a12a22a32a42a52a6Sis6UInt64V_AA6F16_S0VAA0N3_S1Vs6UInt16Vs5Int16VAA0N3_S2VAA0N3_S3VtF")]
+    private static extern nint SwiftFunc16(ulong a0, F16_S0 a1, F16_S1 a2, ushort a3, short a4, F16_S2 a5, F16_S3 a6);
+
+    [Fact]
+    public static void TestSwiftFunc16()
+    {
+        Console.Write("Running SwiftFunc16: ");
+        long result = SwiftFunc16(3875678837451096765, new F16_S0(unchecked((nint)4720149202348788086), unchecked((nint)7476511841079774603), new F16_S0_S0(1008066799213144)), new F16_S1(3085, 11417298712821513, 12161200), 257, 7667, new F16_S2(186, 2771425808859711833, 3778779), new F16_S3(146689072));
+        Assert.Equal(2726423189537230293, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F17_S0
+    {
+        public short F0;
+
+        public F17_S0(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F17_S1
+    {
+        public long F0;
+        public nuint F1;
+        public ulong F2;
+
+        public F17_S1(long f0, nuint f1, ulong f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F17_S2
+    {
+        public sbyte F0;
+
+        public F17_S2(sbyte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F17_S3
+    {
+        public sbyte F0;
+        public uint F1;
+
+        public F17_S3(sbyte f0, uint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F17_S4
+    {
+        public ulong F0;
+
+        public F17_S4(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F17_S5
+    {
+        public long F0;
+
+        public F17_S5(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc172a02a12a22a32a42a52a62a72a82a9SiAA6F17_S0V_s4Int8VAA0P3_S1VAPSuAA0P3_S2Vs5Int64VAA0P3_S3VAA0P3_S4VAA0P3_S5VtF")]
+    private static extern nint SwiftFunc17(F17_S0 a0, sbyte a1, F17_S1 a2, sbyte a3, nuint a4, F17_S2 a5, long a6, F17_S3 a7, F17_S4 a8, F17_S5 a9);
+
+    [Fact]
+    public static void TestSwiftFunc17()
+    {
+        Console.Write("Running SwiftFunc17: ");
+        long result = SwiftFunc17(new F17_S0(-25916), -37, new F17_S1(927673990059785474, unchecked((nuint)4067467819275701282), 4736163781163880654), 70, unchecked((nuint)1236364146053271187), new F17_S2(54), 6452671878605914679, new F17_S3(17, 1066187627), new F17_S4(961451227454237536), new F17_S5(8720978516408944945));
+        Assert.Equal(6084200789584610530, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F18_S0_S0
+    {
+        public ushort F0;
+        public short F1;
+
+        public F18_S0_S0(ushort f0, short f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F18_S0
+    {
+        public uint F0;
+        public F18_S0_S0 F1;
+        public ushort F2;
+
+        public F18_S0(uint f0, F18_S0_S0 f1, ushort f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F18_S1
+    {
+        public nint F0;
+        public nint F1;
+
+        public F18_S1(nint f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F18_S2_S0
+    {
+        public ulong F0;
+
+        public F18_S2_S0(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F18_S2
+    {
+        public ulong F0;
+        public long F1;
+        public byte F2;
+        public F18_S2_S0 F3;
+
+        public F18_S2(ulong f0, long f1, byte f2, F18_S2_S0 f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc182a02a12a22a32a42a52a62a72a8Sis5UInt8V_SdAA6F18_S0VAA0P3_S1Vs6UInt16Vs5Int64Vs6UInt64VAA0P3_S2VAWtF")]
+    private static extern nint SwiftFunc18(byte a0, double a1, F18_S0 a2, F18_S1 a3, ushort a4, long a5, ulong a6, F18_S2 a7, ulong a8);
+
+    [Fact]
+    public static void TestSwiftFunc18()
+    {
+        Console.Write("Running SwiftFunc18: ");
+        long result = SwiftFunc18(153, 2414022997411914, new F18_S0(795806912, new F18_S0_S0(63552, 11471), 47960), new F18_S1(unchecked((nint)6143080814824714071), unchecked((nint)2654471745636317319)), 51304, 4455723326879920366, 6215563249078191014, new F18_S2(7357905541817922655, 8124331887393558663, 146, new F18_S2_S0(8835007006958775606)), 1308697068118476706);
+        Assert.Equal(-1238401591549550590, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F19_S0
+    {
+        public nint F0;
+        public double F1;
+        public ushort F2;
+
+        public F19_S0(nint f0, double f1, ushort f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc192a02a12a2SiSu_AA6F19_S0Vs5Int16VtF")]
+    private static extern nint SwiftFunc19(nuint a0, F19_S0 a1, short a2);
+
+    [Fact]
+    public static void TestSwiftFunc19()
+    {
+        Console.Write("Running SwiftFunc19: ");
+        long result = SwiftFunc19(unchecked((nuint)2063900917075180131), new F19_S0(unchecked((nint)7420139040061411172), 4412763638361702, 18542), 32656);
+        Assert.Equal(-3737785273912016840, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F20_S0
+    {
+        public ushort F0;
+        public sbyte F1;
+        public ulong F2;
+        public uint F3;
+        public ulong F4;
+
+        public F20_S0(ushort f0, sbyte f1, ulong f2, uint f3, ulong f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F20_S1
+    {
+        public long F0;
+
+        public F20_S1(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc202a02a12a22a32a42a52a6Sis4Int8V_AA6F20_S0Vs6UInt64VSiAA0N3_S1Vs5UInt8Vs5Int64VtF")]
+    private static extern nint SwiftFunc20(sbyte a0, F20_S0 a1, ulong a2, nint a3, F20_S1 a4, byte a5, long a6);
+
+    [Fact]
+    public static void TestSwiftFunc20()
+    {
+        Console.Write("Running SwiftFunc20: ");
+        long result = SwiftFunc20(-90, new F20_S0(13173, -56, 2350829658938201640, 1333911330, 2505424063423776138), 6738010084636609242, unchecked((nint)819908193119917708), new F20_S1(1349820395385212287), 121, 3289915405437061252);
+        Assert.Equal(550863197950258558, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F21_S0
+    {
+        public uint F0;
+
+        public F21_S0(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F21_S1
+    {
+        public nint F0;
+        public uint F1;
+        public byte F2;
+        public short F3;
+
+        public F21_S1(nint f0, uint f1, byte f2, short f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 25)]
+    struct F21_S2
+    {
+        public sbyte F0;
+        public ulong F1;
+        public long F2;
+        public byte F3;
+
+        public F21_S2(sbyte f0, ulong f1, long f2, byte f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F21_S3
+    {
+        public double F0;
+        public nint F1;
+
+        public F21_S3(double f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc212a02a12a22a32a42a52a62a72a82a93a103a113a12Sis6UInt64V_s4Int8VSuSdSfSiAA6F21_S0VAA0U3_S1Vs6UInt16VAA0U3_S2Vs5UInt8VAA0U3_S3Vs5Int16VtF")]
+    private static extern nint SwiftFunc21(ulong a0, sbyte a1, nuint a2, double a3, float a4, nint a5, F21_S0 a6, F21_S1 a7, ushort a8, F21_S2 a9, byte a10, F21_S3 a11, short a12);
+
+    [Fact]
+    public static void TestSwiftFunc21()
+    {
+        Console.Write("Running SwiftFunc21: ");
+        long result = SwiftFunc21(5269012897287813953, -91, unchecked((nuint)1201479654570648238), 3289259914874957, 6706247, unchecked((nint)5524961485867187694), new F21_S0(1842933651), new F21_S1(unchecked((nint)3105907069529682628), 1409834375, 228, 24264), 54652, new F21_S2(-49, 3442352645827709069, 7249278047379449391, 213), 207, new F21_S3(3802489474747093, unchecked((nint)7550982300494612851)), -25738);
+        Assert.Equal(1242333410237260188, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F22_S0
+    {
+        public ushort F0;
+        public uint F1;
+        public short F2;
+        public float F3;
+
+        public F22_S0(ushort f0, uint f1, short f2, float f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F22_S1
+    {
+        public ushort F0;
+        public sbyte F1;
+        public byte F2;
+        public nint F3;
+        public nint F4;
+
+        public F22_S1(ushort f0, sbyte f1, byte f2, nint f3, nint f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F22_S2_S0
+    {
+        public sbyte F0;
+
+        public F22_S2_S0(sbyte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 14)]
+    struct F22_S2
+    {
+        public int F0;
+        public int F1;
+        public uint F2;
+        public byte F3;
+        public F22_S2_S0 F4;
+
+        public F22_S2(int f0, int f1, uint f2, byte f3, F22_S2_S0 f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F22_S3
+    {
+        public short F0;
+        public double F1;
+        public double F2;
+        public int F3;
+
+        public F22_S3(short f0, double f1, double f2, int f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc222a02a12a22a32a42a52a62a7Sis4Int8V_s5Int32VAA6F22_S0VAA0P3_S1VAA0P3_S2Vs6UInt64VAA0P3_S3VSutF")]
+    private static extern nint SwiftFunc22(sbyte a0, int a1, F22_S0 a2, F22_S1 a3, F22_S2 a4, ulong a5, F22_S3 a6, nuint a7);
+
+    [Fact]
+    public static void TestSwiftFunc22()
+    {
+        Console.Write("Running SwiftFunc22: ");
+        long result = SwiftFunc22(-57, 637612850, new F22_S0(39888, 420817324, 7562, 2757302), new F22_S1(61019, -94, 94, unchecked((nint)2606601177110916370), unchecked((nint)5843896711210899037)), new F22_S2(400565495, 1044629988, 1076814110, 26, new F22_S2_S0(-109)), 6520156438560424018, new F22_S3(8735, 4148868269582632, 2501928198596701, 1401343024), unchecked((nuint)5955700101477425475));
+        Assert.Equal(-6205677027164766590, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 6)]
+    struct F23_S0
+    {
+        public uint F0;
+        public short F1;
+
+        public F23_S0(uint f0, short f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F23_S1
+    {
+        public nuint F0;
+        public uint F1;
+
+        public F23_S1(nuint f0, uint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 17)]
+    struct F23_S2
+    {
+        public double F0;
+        public uint F1;
+        public int F2;
+        public byte F3;
+
+        public F23_S2(double f0, uint f1, int f2, byte f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc232a02a12a22a32a4SiAA6F23_S0V_AA0K3_S1VAA0K3_S2VSds6UInt64VtF")]
+    private static extern nint SwiftFunc23(F23_S0 a0, F23_S1 a1, F23_S2 a2, double a3, ulong a4);
+
+    [Fact]
+    public static void TestSwiftFunc23()
+    {
+        Console.Write("Running SwiftFunc23: ");
+        long result = SwiftFunc23(new F23_S0(119750622, -9202), new F23_S1(unchecked((nuint)2015683423731520384), 2106419422), new F23_S2(15243057156671, 484733224, 541045687, 128), 335968113268162, 4104726345028490471);
+        Assert.Equal(-4893219516767457464, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F24_S0
+    {
+        public sbyte F0;
+        public int F1;
+
+        public F24_S0(sbyte f0, int f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F24_S1
+    {
+        public sbyte F0;
+
+        public F24_S1(sbyte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F24_S2
+    {
+        public ushort F0;
+        public short F1;
+        public double F2;
+        public nuint F3;
+
+        public F24_S2(ushort f0, short f1, double f2, nuint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F24_S3
+    {
+        public nint F0;
+
+        public F24_S3(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc242a02a12a22a32a42a5SiAA6F24_S0V_AA0L3_S1VAA0L3_S2VAA0L3_S3VSus6UInt32VtF")]
+    private static extern nint SwiftFunc24(F24_S0 a0, F24_S1 a1, F24_S2 a2, F24_S3 a3, nuint a4, uint a5);
+
+    [Fact]
+    public static void TestSwiftFunc24()
+    {
+        Console.Write("Running SwiftFunc24: ");
+        long result = SwiftFunc24(new F24_S0(-79, 1590520731), new F24_S1(-91), new F24_S2(20580, 5897, 4259258535235558, unchecked((nuint)5376883129922161134)), new F24_S3(unchecked((nint)6329816641466666679)), unchecked((nuint)749917486894435068), 588417470);
+        Assert.Equal(2355459289566446436, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F25_S0_S0
+    {
+        public sbyte F0;
+
+        public F25_S0_S0(sbyte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F25_S0
+    {
+        public float F0;
+        public F25_S0_S0 F1;
+        public uint F2;
+
+        public F25_S0(float f0, F25_S0_S0 f1, uint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F25_S1
+    {
+        public short F0;
+        public sbyte F1;
+        public float F2;
+
+        public F25_S1(short f0, sbyte f1, float f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F25_S2
+    {
+        public long F0;
+        public ushort F1;
+
+        public F25_S2(long f0, ushort f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F25_S3
+    {
+        public ulong F0;
+
+        public F25_S3(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F25_S4
+    {
+        public ushort F0;
+
+        public F25_S4(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc252a02a12a22a32a42a52a62a72a82a93a103a113a12SiSf_AA6F25_S0Vs5Int64Vs5UInt8VAA0S3_S1VSiAA0S3_S2Vs5Int32VA_Sus6UInt64VAA0S3_S3VAA0S3_S4VtF")]
+    private static extern nint SwiftFunc25(float a0, F25_S0 a1, long a2, byte a3, F25_S1 a4, nint a5, F25_S2 a6, int a7, int a8, nuint a9, ulong a10, F25_S3 a11, F25_S4 a12);
+
+    [Fact]
+    public static void TestSwiftFunc25()
+    {
+        Console.Write("Running SwiftFunc25: ");
+        long result = SwiftFunc25(7574050, new F25_S0(6812822, new F25_S0_S0(-56), 265762114), 8887316512771179060, 123, new F25_S1(-7776, 73, 1925304), unchecked((nint)6156508798007114044), new F25_S2(3356802028835066684, 63590), 1072499355, 1592861041, unchecked((nuint)7083962615260029068), 6662060345720879806, new F25_S3(3582316099656415385), new F25_S4(37071));
+        Assert.Equal(3486557296564493762, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F26_S0
+    {
+        public double F0;
+
+        public F26_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc262a02a12a22a32a4Sis6UInt16V_Sds5Int64VAA6F26_S0Vs5UInt8VtF")]
+    private static extern nint SwiftFunc26(ushort a0, double a1, long a2, F26_S0 a3, byte a4);
+
+    [Fact]
+    public static void TestSwiftFunc26()
+    {
+        Console.Write("Running SwiftFunc26: ");
+        long result = SwiftFunc26(61060, 3605567452716741, 1495534128089493599, new F26_S0(1063426277848136), 89);
+        Assert.Equal(5445852553218786939, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F27_S0_S0
+    {
+        public long F0;
+
+        public F27_S0_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F27_S0
+    {
+        public ushort F0;
+        public F27_S0_S0 F1;
+        public double F2;
+
+        public F27_S0(ushort f0, F27_S0_S0 f1, double f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 13)]
+    struct F27_S1
+    {
+        public nint F0;
+        public sbyte F1;
+        public short F2;
+        public byte F3;
+
+        public F27_S1(nint f0, sbyte f1, short f2, byte f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F27_S2
+    {
+        public ushort F0;
+
+        public F27_S2(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F27_S3
+    {
+        public ulong F0;
+        public uint F1;
+
+        public F27_S3(ulong f0, uint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F27_S4
+    {
+        public byte F0;
+
+        public F27_S4(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc272a02a12a22a32a42a52a62a72a82a93a103a113a12SiAA6F27_S0V_S2ds4Int8VAsA0S3_S1Vs5Int16VAA0S3_S2VASs6UInt16VAA0S3_S3VAA0S3_S4Vs6UInt32VtF")]
+    private static extern nint SwiftFunc27(F27_S0 a0, double a1, double a2, sbyte a3, sbyte a4, F27_S1 a5, short a6, F27_S2 a7, sbyte a8, ushort a9, F27_S3 a10, F27_S4 a11, uint a12);
+
+    [Fact]
+    public static void TestSwiftFunc27()
+    {
+        Console.Write("Running SwiftFunc27: ");
+        long result = SwiftFunc27(new F27_S0(7130, new F27_S0_S0(6606060428339642921), 4122923031624866), 1451662996356727, 1529297186262631, 1, 24, new F27_S1(unchecked((nint)5075979081296734546), 75, -3781, 198), -26687, new F27_S2(53456), 90, 35194, new F27_S3(6318217926100193736, 1400016900), new F27_S4(11), 628995828);
+        Assert.Equal(-5428774405932003643, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F28_S0
+    {
+        public double F0;
+        public short F1;
+        public double F2;
+        public ulong F3;
+
+        public F28_S0(double f0, short f1, double f2, ulong f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F28_S1
+    {
+        public nint F0;
+        public uint F1;
+        public ulong F2;
+        public float F3;
+
+        public F28_S1(nint f0, uint f1, ulong f2, float f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F28_S2
+    {
+        public double F0;
+        public ulong F1;
+
+        public F28_S2(double f0, ulong f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F28_S3
+    {
+        public short F0;
+        public ulong F1;
+        public double F2;
+        public int F3;
+
+        public F28_S3(short f0, ulong f1, double f2, int f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F28_S4
+    {
+        public nint F0;
+
+        public F28_S4(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc282a02a12a22a32a42a52a62a72a82a93a103a113a12Sis5UInt8V_s6UInt16VAA6F28_S0VAA0U3_S1VAA0U3_S2Vs6UInt64Vs5Int32Vs5Int64VSdAsA0U3_S3VAA0U3_S4VSftF")]
+    private static extern nint SwiftFunc28(byte a0, ushort a1, F28_S0 a2, F28_S1 a3, F28_S2 a4, ulong a5, int a6, long a7, double a8, ushort a9, F28_S3 a10, F28_S4 a11, float a12);
+
+    [Fact]
+    public static void TestSwiftFunc28()
+    {
+        Console.Write("Running SwiftFunc28: ");
+        long result = SwiftFunc28(190, 17255, new F28_S0(3216710004509072, 9709, 4049245410019897, 6996716492380286220), new F28_S1(unchecked((nint)4097715616866617693), 539407084, 4626633991924578918, 1275504), new F28_S2(3574990895078933, 7178808315522215553), 4610456141729135855, 1303811396, 5390518172407783382, 4435699869971486, 62148, new F28_S3(22518, 4183064684428798988, 4007968538134666, 433839184), new F28_S4(unchecked((nint)4835639581253218785)), 778028);
+        Assert.Equal(-2948821353897526623, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F29_S0
+    {
+        public int F0;
+        public float F1;
+        public short F2;
+
+        public F29_S0(int f0, float f1, short f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F29_S1
+    {
+        public short F0;
+        public sbyte F1;
+        public nuint F2;
+
+        public F29_S1(short f0, sbyte f1, nuint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F29_S2
+    {
+        public ushort F0;
+
+        public F29_S2(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F29_S3
+    {
+        public long F0;
+        public long F1;
+
+        public F29_S3(long f0, long f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc292a02a12a22a32a42a52a62a72a82a93a103a113a123a133a143a15Sis4Int8V_AA6F29_S0Vs5Int32VSuAA0W3_S1Vs6UInt64VAA0W3_S2Vs5Int16Vs5Int64Vs6UInt32VA0_SiAA0W3_S3Vs5UInt8VATSdtF")]
+    private static extern nint SwiftFunc29(sbyte a0, F29_S0 a1, int a2, nuint a3, F29_S1 a4, ulong a5, F29_S2 a6, short a7, long a8, uint a9, ulong a10, nint a11, F29_S3 a12, byte a13, sbyte a14, double a15);
+
+    [Fact]
+    public static void TestSwiftFunc29()
+    {
+        Console.Write("Running SwiftFunc29: ");
+        long result = SwiftFunc29(-24, new F29_S0(1975390147, 2492976, -22918), 1918385726, unchecked((nuint)4330240195518051787), new F29_S1(20662, 37, unchecked((nuint)3480511823780639511)), 2969238117130521039, new F29_S2(39829), -21356, 4236774320019789885, 650424352, 974567590062881682, unchecked((nint)4949995943007509070), new F29_S3(6288374171493526635, 797442718847899480), 23, 47, 3112540527380411);
+        Assert.Equal(-219723436366645712, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F30_S0
+    {
+        public nuint F0;
+        public float F1;
+
+        public F30_S0(nuint f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F30_S1
+    {
+        public ulong F0;
+        public byte F1;
+        public double F2;
+        public nint F3;
+
+        public F30_S1(ulong f0, byte f1, double f2, nint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F30_S2_S0
+    {
+        public short F0;
+        public short F1;
+
+        public F30_S2_S0(short f0, short f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F30_S2_S1
+    {
+        public long F0;
+
+        public F30_S2_S1(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F30_S2
+    {
+        public F30_S2_S0 F0;
+        public F30_S2_S1 F1;
+
+        public F30_S2(F30_S2_S0 f0, F30_S2_S1 f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F30_S3
+    {
+        public sbyte F0;
+        public byte F1;
+        public ulong F2;
+        public uint F3;
+
+        public F30_S3(sbyte f0, byte f1, ulong f2, uint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F30_S4
+    {
+        public ushort F0;
+
+        public F30_S4(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc302a02a12a22a32a42a52a62a72a82a93a103a113a12Sis6UInt16V_s5Int16VAqA6F30_S0VAA0U3_S1VAA0U3_S2Vs6UInt64Vs5Int32VSuAA0U3_S3VAqA0U3_S4Vs4Int8VtF")]
+    private static extern nint SwiftFunc30(ushort a0, short a1, ushort a2, F30_S0 a3, F30_S1 a4, F30_S2 a5, ulong a6, int a7, nuint a8, F30_S3 a9, ushort a10, F30_S4 a11, sbyte a12);
+
+    [Fact]
+    public static void TestSwiftFunc30()
+    {
+        Console.Write("Running SwiftFunc30: ");
+        long result = SwiftFunc30(16858, 2711, 33779, new F30_S0(unchecked((nuint)8711036551441957307), 109551), new F30_S1(5557074438983413757, 145, 1614350045039200, unchecked((nint)962570826922694431)), new F30_S2(new F30_S2_S0(-2145, 18987), new F30_S2_S1(3566641512072703431)), 4070388225227154205, 2068046267, unchecked((nuint)2683069104930642879), new F30_S3(82, 154, 4455096152847314924, 2054397471), 61158, new F30_S4(61860), -85);
+        Assert.Equal(-6493337704322390178, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F31_S0
+    {
+        public nint F0;
+        public float F1;
+        public uint F2;
+        public nint F3;
+
+        public F31_S0(nint f0, float f1, uint f2, nint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc312a02a12a22a3Sis5Int64V_AA6F31_S0Vs6UInt32Vs6UInt64VtF")]
+    private static extern nint SwiftFunc31(long a0, F31_S0 a1, uint a2, ulong a3);
+
+    [Fact]
+    public static void TestSwiftFunc31()
+    {
+        Console.Write("Running SwiftFunc31: ");
+        long result = SwiftFunc31(854114380819209961, new F31_S0(unchecked((nint)8616284744785848913), 2817216, 1674385679, unchecked((nint)6375864278077977066)), 972945684, 1323893099763572702);
+        Assert.Equal(5251289581384890505, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F32_S0
+    {
+        public short F0;
+        public float F1;
+        public long F2;
+
+        public F32_S0(short f0, float f1, long f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F32_S1_S0
+    {
+        public nuint F0;
+
+        public F32_S1_S0(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F32_S1
+    {
+        public byte F0;
+        public F32_S1_S0 F1;
+
+        public F32_S1(byte f0, F32_S1_S0 f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F32_S2
+    {
+        public uint F0;
+        public byte F1;
+        public nuint F2;
+
+        public F32_S2(uint f0, byte f1, nuint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F32_S3_S0
+    {
+        public nuint F0;
+
+        public F32_S3_S0(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F32_S3
+    {
+        public ulong F0;
+        public F32_S3_S0 F1;
+        public ulong F2;
+
+        public F32_S3(ulong f0, F32_S3_S0 f1, ulong f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F32_S4
+    {
+        public double F0;
+        public long F1;
+        public long F2;
+        public float F3;
+
+        public F32_S4(double f0, long f1, long f2, float f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc322a02a12a22a32a42a52a62a72a82a93a10Sis6UInt64V_AA6F32_S0VSdAA0R3_S1VAA0R3_S2VAOSfAA0R3_S3VAA0R3_S4Vs6UInt32Vs5Int16VtF")]
+    private static extern nint SwiftFunc32(ulong a0, F32_S0 a1, double a2, F32_S1 a3, F32_S2 a4, ulong a5, float a6, F32_S3 a7, F32_S4 a8, uint a9, short a10);
+
+    [Fact]
+    public static void TestSwiftFunc32()
+    {
+        Console.Write("Running SwiftFunc32: ");
+        long result = SwiftFunc32(8029377143582007729, new F32_S0(17278, 7967601, 1978436908876178048), 1789368352608636, new F32_S1(255, new F32_S1_S0(unchecked((nuint)6244652548486446415))), new F32_S2(862868498, 29, unchecked((nuint)1969242341467623483)), 5279845618693914949, 1855163, new F32_S3(6102326739757366863, new F32_S3_S0(unchecked((nuint)8768252353660722957)), 3548360060427751308), new F32_S4(4443676345125115, 9168978488997364066, 3214391615557684463, 6052142), 1797618755, 17578);
+        Assert.Equal(-6196943681215505326, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F33_S0
+    {
+        public sbyte F0;
+        public byte F1;
+
+        public F33_S0(sbyte f0, byte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F33_S1
+    {
+        public ushort F0;
+        public byte F1;
+        public long F2;
+
+        public F33_S1(ushort f0, byte f1, long f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F33_S2_S0
+    {
+        public uint F0;
+
+        public F33_S2_S0(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 34)]
+    struct F33_S2
+    {
+        public F33_S2_S0 F0;
+        public nuint F1;
+        public float F2;
+        public double F3;
+        public ushort F4;
+
+        public F33_S2(F33_S2_S0 f0, nuint f1, float f2, double f3, ushort f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F33_S3
+    {
+        public nuint F0;
+
+        public F33_S3(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc332a02a12a22a32a42a52a62a72a82a93a10SiSf_AA6F33_S0Vs6UInt64Vs5Int64VAA0Q3_S1Vs6UInt16VSuAwA0Q3_S2VAA0Q3_S3VSitF")]
+    private static extern nint SwiftFunc33(float a0, F33_S0 a1, ulong a2, long a3, F33_S1 a4, ushort a5, nuint a6, ushort a7, F33_S2 a8, F33_S3 a9, nint a10);
+
+    [Fact]
+    public static void TestSwiftFunc33()
+    {
+        Console.Write("Running SwiftFunc33: ");
+        long result = SwiftFunc33(7854986, new F33_S0(-88, 250), 5301409185013630861, 59840293674446659, new F33_S1(60084, 209, 8486520240421572730), 47187, unchecked((nuint)3062806578924156555), 27556, new F33_S2(new F33_S2_S0(2034603306), unchecked((nuint)8616790058647815090), 6520318, 4264637592867522, 45572), new F33_S3(unchecked((nuint)8100077493474466447)), unchecked((nint)4177526131236757728));
+        Assert.Equal(7131040958707940402, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F34_S0
+    {
+        public byte F0;
+
+        public F34_S0(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc342a02a12a22a32a42a5Sis5Int64V_AA6F34_S0VS2us5UInt8VSdtF")]
+    private static extern nint SwiftFunc34(long a0, F34_S0 a1, nuint a2, nuint a3, byte a4, double a5);
+
+    [Fact]
+    public static void TestSwiftFunc34()
+    {
+        Console.Write("Running SwiftFunc34: ");
+        long result = SwiftFunc34(6297959268257433453, new F34_S0(152), unchecked((nuint)684867108943559069), unchecked((nuint)3028084738078866117), 52, 1123384931674176);
+        Assert.Equal(-7354337608853973520, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F35_S0
+    {
+        public short F0;
+
+        public F35_S0(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 3)]
+    struct F35_S1_S0
+    {
+        public ushort F0;
+        public sbyte F1;
+
+        public F35_S1_S0(ushort f0, sbyte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F35_S1
+    {
+        public long F0;
+        public F35_S1_S0 F1;
+        public float F2;
+
+        public F35_S1(long f0, F35_S1_S0 f1, float f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F35_S2
+    {
+        public ulong F0;
+        public sbyte F1;
+        public uint F2;
+        public long F3;
+
+        public F35_S2(ulong f0, sbyte f1, uint f2, long f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 5)]
+    struct F35_S3_S0_S0
+    {
+        public uint F0;
+        public byte F1;
+
+        public F35_S3_S0_S0(uint f0, byte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F35_S3_S0
+    {
+        public ushort F0;
+        public F35_S3_S0_S0 F1;
+        public double F2;
+
+        public F35_S3_S0(ushort f0, F35_S3_S0_S0 f1, double f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F35_S3
+    {
+        public F35_S3_S0 F0;
+        public uint F1;
+
+        public F35_S3(F35_S3_S0 f0, uint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F35_S4
+    {
+        public float F0;
+
+        public F35_S4(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc352a02a12a22a32a42a52a62a72a82a93a10Sis5UInt8V_AA6F35_S0VA2oA0R3_S1Vs5Int32VAA0R3_S2VSis6UInt32VAA0R3_S3VAA0R3_S4VtF")]
+    private static extern nint SwiftFunc35(byte a0, F35_S0 a1, byte a2, byte a3, F35_S1 a4, int a5, F35_S2 a6, nint a7, uint a8, F35_S3 a9, F35_S4 a10);
+
+    [Fact]
+    public static void TestSwiftFunc35()
+    {
+        Console.Write("Running SwiftFunc35: ");
+        long result = SwiftFunc35(70, new F35_S0(-3405), 57, 4, new F35_S1(1893314071875920321, new F35_S1_S0(21188, -72), 1690358), 331400152, new F35_S2(629066911115913492, 24, 1741513272, 1738852017312447556), unchecked((nint)5964912267274635634), 745754721, new F35_S3(new F35_S3_S0(12969, new F35_S3_S0_S0(1922748035, 11), 1057686301404030), 1301219882), new F35_S4(4792810));
+        Assert.Equal(8413899507614185381, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct F36_S0
+    {
+        public ulong F0;
+        public sbyte F1;
+
+        public F36_S0(ulong f0, sbyte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F36_S1
+    {
+        public long F0;
+        public nuint F1;
+        public nint F2;
+        public int F3;
+
+        public F36_S1(long f0, nuint f1, nint f2, int f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F36_S2
+    {
+        public nint F0;
+
+        public F36_S2(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F36_S3_S0
+    {
+        public float F0;
+
+        public F36_S3_S0(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F36_S3
+    {
+        public long F0;
+        public sbyte F1;
+        public F36_S3_S0 F2;
+
+        public F36_S3(long f0, sbyte f1, F36_S3_S0 f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F36_S4
+    {
+        public nuint F0;
+        public long F1;
+        public double F2;
+        public double F3;
+
+        public F36_S4(nuint f0, long f1, double f2, double f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F36_S5
+    {
+        public byte F0;
+        public byte F1;
+
+        public F36_S5(byte f0, byte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F36_S6
+    {
+        public ushort F0;
+
+        public F36_S6(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc362a02a12a22a32a42a52a62a72a82a93a103a11SiAA6F36_S0V_Sds6UInt64VAA0R3_S1VAA0R3_S2VAA0R3_S3VAA0R3_S4VSfAA0R3_S5Vs5UInt8VSdAA0R3_S6VtF")]
+    private static extern nint SwiftFunc36(F36_S0 a0, double a1, ulong a2, F36_S1 a3, F36_S2 a4, F36_S3 a5, F36_S4 a6, float a7, F36_S5 a8, byte a9, double a10, F36_S6 a11);
+
+    [Fact]
+    public static void TestSwiftFunc36()
+    {
+        Console.Write("Running SwiftFunc36: ");
+        long result = SwiftFunc36(new F36_S0(6433294246214898902, -21), 3881104127408136, 2284220855453859614, new F36_S1(4439404430423666401, unchecked((nuint)6899402977735223119), unchecked((nint)5232137643577323921), 622124401), new F36_S2(unchecked((nint)2215893056133254497)), new F36_S3(929506260159009104, -122, new F36_S3_S0(1015742)), new F36_S4(unchecked((nuint)3900865090022814819), 5812191011379795103, 4189883409333787, 3777993202541206), 1483351, new F36_S5(168, 87), 242, 3899885261689271, new F36_S6(49518));
+        Assert.Equal(624934575149916284, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F37_S0
+    {
+        public int F0;
+
+        public F37_S0(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F37_S1
+    {
+        public uint F0;
+        public uint F1;
+        public float F2;
+
+        public F37_S1(uint f0, uint f1, float f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F37_S2
+    {
+        public int F0;
+        public uint F1;
+        public double F2;
+        public nuint F3;
+
+        public F37_S2(int f0, uint f1, double f2, nuint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F37_S3_S0
+    {
+        public nint F0;
+
+        public F37_S3_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F37_S3
+    {
+        public F37_S3_S0 F0;
+
+        public F37_S3(F37_S3_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc372a02a12a22a32a42a52a62a72a82a93a103a113a123a13S2i_s6UInt64Vs6UInt32Vs5Int32Vs4Int8Vs5UInt8VArA6F37_S0VAA0Y3_S1Vs5Int16VAA0Y3_S2VSuAA0Y3_S3VARtF")]
+    private static extern nint SwiftFunc37(nint a0, ulong a1, uint a2, int a3, sbyte a4, byte a5, ulong a6, F37_S0 a7, F37_S1 a8, short a9, F37_S2 a10, nuint a11, F37_S3 a12, ulong a13);
+
+    [Fact]
+    public static void TestSwiftFunc37()
+    {
+        Console.Write("Running SwiftFunc37: ");
+        long result = SwiftFunc37(unchecked((nint)7997876577338840618), 2916693561268448247, 2045535781, 1617618895, 35, 118, 8729954385529497591, new F37_S0(1590622742), new F37_S1(1445653735, 1780802910, 6918266), -302, new F37_S2(504109544, 1827855745, 3682561033291689, unchecked((nuint)6718188397722828326)), unchecked((nuint)4901939155447291041), new F37_S3(new F37_S3_S0(unchecked((nint)7671123806949823347))), 4910913885588390838);
+        Assert.Equal(-3950862618349704578, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 6)]
+    struct F38_S0
+    {
+        public ushort F0;
+        public short F1;
+        public short F2;
+
+        public F38_S0(ushort f0, short f1, short f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F38_S1
+    {
+        public int F0;
+
+        public F38_S1(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F38_S2
+    {
+        public nuint F0;
+
+        public F38_S2(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc382a02a12a22a32a4Sis6UInt32V_s5Int32VAA6F38_S0VAA0M3_S1VAA0M3_S2VtF")]
+    private static extern nint SwiftFunc38(uint a0, int a1, F38_S0 a2, F38_S1 a3, F38_S2 a4);
+
+    [Fact]
+    public static void TestSwiftFunc38()
+    {
+        Console.Write("Running SwiftFunc38: ");
+        long result = SwiftFunc38(2061218718, 320687949, new F38_S0(53989, -5186, -13102), new F38_S1(1455203558), new F38_S2(unchecked((nuint)4328826644800782496)));
+        Assert.Equal(1423775906233216436, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F39_S0_S0
+    {
+        public nuint F0;
+
+        public F39_S0_S0(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F39_S0_S1
+    {
+        public int F0;
+
+        public F39_S0_S1(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 36)]
+    struct F39_S0
+    {
+        public nint F0;
+        public long F1;
+        public uint F2;
+        public F39_S0_S0 F3;
+        public F39_S0_S1 F4;
+
+        public F39_S0(nint f0, long f1, uint f2, F39_S0_S0 f3, F39_S0_S1 f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F39_S1
+    {
+        public nuint F0;
+        public double F1;
+
+        public F39_S1(nuint f0, double f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc392a02a12a22a32a4SiSu_SuAA6F39_S0VAA0K3_S1VSftF")]
+    private static extern nint SwiftFunc39(nuint a0, nuint a1, F39_S0 a2, F39_S1 a3, float a4);
+
+    [Fact]
+    public static void TestSwiftFunc39()
+    {
+        Console.Write("Running SwiftFunc39: ");
+        long result = SwiftFunc39(unchecked((nuint)8230747730129668979), unchecked((nuint)4736775119629579479), new F39_S0(unchecked((nint)5173491896684902537), 4915765547454462242, 1028369724, new F39_S0_S0(unchecked((nuint)8662559577682755939)), new F39_S0_S1(436709185)), new F39_S1(unchecked((nuint)3203283942912276541), 3029648293570205), 5675124);
+        Assert.Equal(-1722913155676633924, result);
+        Console.WriteLine("OK");
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc402a0Sis5Int32V_tF")]
+    private static extern nint SwiftFunc40(int a0);
+
+    [Fact]
+    public static void TestSwiftFunc40()
+    {
+        Console.Write("Running SwiftFunc40: ");
+        long result = SwiftFunc40(447211275);
+        Assert.Equal(8279520253543879998, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F41_S0
+    {
+        public short F0;
+        public float F1;
+        public ushort F2;
+
+        public F41_S0(short f0, float f1, ushort f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F41_S1
+    {
+        public ushort F0;
+        public ulong F1;
+        public sbyte F2;
+        public float F3;
+        public ulong F4;
+
+        public F41_S1(ushort f0, ulong f1, sbyte f2, float f3, ulong f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F41_S2_S0_S0
+    {
+        public short F0;
+
+        public F41_S2_S0_S0(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F41_S2_S0
+    {
+        public F41_S2_S0_S0 F0;
+
+        public F41_S2_S0(F41_S2_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 22)]
+    struct F41_S2
+    {
+        public int F0;
+        public short F1;
+        public ulong F2;
+        public float F3;
+        public F41_S2_S0 F4;
+
+        public F41_S2(int f0, short f1, ulong f2, float f3, F41_S2_S0 f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc412a02a12a22a32a42a52a62a72a8SiSf_AA6F41_S0VAA0O3_S1VAA0O3_S2Vs6UInt32VSuASSis4Int8VtF")]
+    private static extern nint SwiftFunc41(float a0, F41_S0 a1, F41_S1 a2, F41_S2 a3, uint a4, nuint a5, uint a6, nint a7, sbyte a8);
+
+    [Fact]
+    public static void TestSwiftFunc41()
+    {
+        Console.Write("Running SwiftFunc41: ");
+        long result = SwiftFunc41(5984057, new F41_S0(11791, 7594, 4883), new F41_S1(61253, 4089489613092392334, -39, 4246219, 6241750146529178696), new F41_S2(2097957786, -31595, 2497631910262823657, 1845838, new F41_S2_S0(new F41_S2_S0_S0(-4594))), 2146355885, unchecked((nuint)7552603789122823169), 1034389054, unchecked((nint)5088721772774365291), -61);
+        Assert.Equal(-8371592578322439321, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F42_S0
+    {
+        public uint F0;
+        public ulong F1;
+        public ulong F2;
+
+        public F42_S0(uint f0, ulong f1, ulong f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F42_S1
+    {
+        public double F0;
+        public double F1;
+
+        public F42_S1(double f0, double f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F42_S2_S0
+    {
+        public nint F0;
+
+        public F42_S2_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F42_S2
+    {
+        public byte F0;
+        public long F1;
+        public F42_S2_S0 F2;
+        public nint F3;
+
+        public F42_S2(byte f0, long f1, F42_S2_S0 f2, nint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F42_S3_S0
+    {
+        public short F0;
+
+        public F42_S3_S0(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 6)]
+    struct F42_S3
+    {
+        public float F0;
+        public F42_S3_S0 F1;
+
+        public F42_S3(float f0, F42_S3_S0 f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F42_S4
+    {
+        public uint F0;
+
+        public F42_S4(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F42_S5_S0
+    {
+        public uint F0;
+
+        public F42_S5_S0(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F42_S5
+    {
+        public F42_S5_S0 F0;
+
+        public F42_S5(F42_S5_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F42_S6
+    {
+        public nuint F0;
+
+        public F42_S6(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc422a02a12a22a32a42a52a62a72a8SiAA6F42_S0V_AA0O3_S1Vs6UInt16VAA0O3_S2VAA0O3_S3VAA0O3_S4VAA0O3_S5VAA0O3_S6Vs5Int16VtF")]
+    private static extern nint SwiftFunc42(F42_S0 a0, F42_S1 a1, ushort a2, F42_S2 a3, F42_S3 a4, F42_S4 a5, F42_S5 a6, F42_S6 a7, short a8);
+
+    [Fact]
+    public static void TestSwiftFunc42()
+    {
+        Console.Write("Running SwiftFunc42: ");
+        long result = SwiftFunc42(new F42_S0(1751713754, 1990881383827669198, 7688992749840190173), new F42_S1(2820409929234558, 403450751107933), 8553, new F42_S2(0, 4857265047176672349, new F42_S2_S0(unchecked((nint)1659771770143536426)), unchecked((nint)4175194780289529190)), new F42_S3(2068820, new F42_S3_S0(-19086)), new F42_S4(499069670), new F42_S5(new F42_S5_S0(82826892)), new F42_S6(unchecked((nuint)7728490038553858908)), -843);
+        Assert.Equal(-5733927999088121133, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F43_S0_S0
+    {
+        public long F0;
+
+        public F43_S0_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F43_S0
+    {
+        public F43_S0_S0 F0;
+
+        public F43_S0(F43_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc432a02a12a22a32a42a52a6Sis5Int64V_s5UInt8Vs4Int8VSfAKSiAA6F43_S0VtF")]
+    private static extern nint SwiftFunc43(long a0, byte a1, sbyte a2, float a3, long a4, nint a5, F43_S0 a6);
+
+    [Fact]
+    public static void TestSwiftFunc43()
+    {
+        Console.Write("Running SwiftFunc43: ");
+        long result = SwiftFunc43(4912883404842918819, 157, 103, 5202238, 1699534526741372140, unchecked((nint)5944804412045224395), new F43_S0(new F43_S0_S0(8392262032814776063)));
+        Assert.Equal(7967353118822572137, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F44_S0
+    {
+        public ulong F0;
+
+        public F44_S0(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc442a0SiAA6F44_S0V_tF")]
+    private static extern nint SwiftFunc44(F44_S0 a0);
+
+    [Fact]
+    public static void TestSwiftFunc44()
+    {
+        Console.Write("Running SwiftFunc44: ");
+        long result = SwiftFunc44(new F44_S0(6701010027402704605));
+        Assert.Equal(-2463268961390375024, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F45_S0
+    {
+        public double F0;
+        public nint F1;
+
+        public F45_S0(double f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F45_S1_S0
+    {
+        public double F0;
+
+        public F45_S1_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F45_S1_S1
+    {
+        public float F0;
+
+        public F45_S1_S1(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F45_S1
+    {
+        public ushort F0;
+        public sbyte F1;
+        public F45_S1_S0 F2;
+        public F45_S1_S1 F3;
+
+        public F45_S1(ushort f0, sbyte f1, F45_S1_S0 f2, F45_S1_S1 f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 14)]
+    struct F45_S2
+    {
+        public ulong F0;
+        public float F1;
+        public ushort F2;
+
+        public F45_S2(ulong f0, float f1, ushort f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc452a02a12a22a3SiAA6F45_S0V_AA0J3_S1VAA0J3_S2VSitF")]
+    private static extern nint SwiftFunc45(F45_S0 a0, F45_S1 a1, F45_S2 a2, nint a3);
+
+    [Fact]
+    public static void TestSwiftFunc45()
+    {
+        Console.Write("Running SwiftFunc45: ");
+        long result = SwiftFunc45(new F45_S0(3026820520892803, unchecked((nint)329722294948274546)), new F45_S1(13060, 14, new F45_S1_S0(173821703534560), new F45_S1_S1(6669558)), new F45_S2(7271072737280269762, 2970569, 7063), unchecked((nint)3563249765520844925));
+        Assert.Equal(6216079413995056174, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 25)]
+    struct F46_S0
+    {
+        public long F0;
+        public byte F1;
+        public nuint F2;
+        public sbyte F3;
+
+        public F46_S0(long f0, byte f1, nuint f2, sbyte f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F46_S1
+    {
+        public byte F0;
+
+        public F46_S1(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F46_S2
+    {
+        public nint F0;
+
+        public F46_S2(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F46_S3
+    {
+        public ulong F0;
+        public long F1;
+
+        public F46_S3(ulong f0, long f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F46_S4
+    {
+        public short F0;
+        public int F1;
+        public uint F2;
+
+        public F46_S4(short f0, int f1, uint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F46_S5
+    {
+        public ulong F0;
+
+        public F46_S5(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc462a02a12a22a32a42a52a62a72a82a93a103a113a123a13SiAA6F46_S0V_AA0T3_S1Vs4Int8VSfAA0T3_S2Vs5Int16VAA0T3_S3VAZSfAA0T3_S4Vs6UInt16VSfAvA0T3_S5VtF")]
+    private static extern nint SwiftFunc46(F46_S0 a0, F46_S1 a1, sbyte a2, float a3, F46_S2 a4, short a5, F46_S3 a6, short a7, float a8, F46_S4 a9, ushort a10, float a11, sbyte a12, F46_S5 a13);
+
+    [Fact]
+    public static void TestSwiftFunc46()
+    {
+        Console.Write("Running SwiftFunc46: ");
+        long result = SwiftFunc46(new F46_S0(717422391795779639, 78, unchecked((nuint)7060282015706292416), -116), new F46_S1(18), 3, 2507216, new F46_S2(unchecked((nint)4201483730092308719)), -18720, new F46_S3(2236255490462487034, 3838628161824947390), -9982, 5460360, new F46_S4(-4606, 1433117890, 835780718), 6752, 6275800, 91, new F46_S5(9211362063136377356));
+        Assert.Equal(343358650074914091, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 3)]
+    struct F47_S0_S0
+    {
+        public ushort F0;
+        public sbyte F1;
+
+        public F47_S0_S0(ushort f0, sbyte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F47_S0
+    {
+        public F47_S0_S0 F0;
+        public ushort F1;
+        public nuint F2;
+        public long F3;
+
+        public F47_S0(F47_S0_S0 f0, ushort f1, nuint f2, long f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct F47_S1
+    {
+        public long F0;
+        public byte F1;
+
+        public F47_S1(long f0, byte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc472a02a12a22a3S2i_AA6F47_S0VAA0J3_S1Vs5Int64VtF")]
+    private static extern nint SwiftFunc47(nint a0, F47_S0 a1, F47_S1 a2, long a3);
+
+    [Fact]
+    public static void TestSwiftFunc47()
+    {
+        Console.Write("Running SwiftFunc47: ");
+        long result = SwiftFunc47(unchecked((nint)4962370882457048382), new F47_S0(new F47_S0_S0(58684, -2), 23837, unchecked((nuint)2492821112189780145), 4191553673129943106), new F47_S1(3653010013906471970, 124), 4972057731925125595);
+        Assert.Equal(-2787387042865302571, result);
+        Console.WriteLine("OK");
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc482a02a12a22a32a42a52a6Sis4Int8V_s6UInt32Vs5Int16VSfSiSfAMtF")]
+    private static extern nint SwiftFunc48(sbyte a0, uint a1, short a2, float a3, nint a4, float a5, uint a6);
+
+    [Fact]
+    public static void TestSwiftFunc48()
+    {
+        Console.Write("Running SwiftFunc48: ");
+        long result = SwiftFunc48(93, 1756298153, -26153, 8138154, unchecked((nint)5977260391149529061), 5377189, 1353843369);
+        Assert.Equal(-1595422391414550142, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F49_S0
+    {
+        public ulong F0;
+
+        public F49_S0(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F49_S1_S0
+    {
+        public short F0;
+
+        public F49_S1_S0(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F49_S1_S1
+    {
+        public ushort F0;
+
+        public F49_S1_S1(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F49_S1
+    {
+        public F49_S1_S0 F0;
+        public int F1;
+        public F49_S1_S1 F2;
+        public nuint F3;
+
+        public F49_S1(F49_S1_S0 f0, int f1, F49_S1_S1 f2, nuint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F49_S2
+    {
+        public ushort F0;
+        public byte F1;
+        public float F2;
+        public long F3;
+
+        public F49_S2(ushort f0, byte f1, float f2, long f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F49_S3
+    {
+        public int F0;
+        public float F1;
+
+        public F49_S3(int f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F49_S4
+    {
+        public uint F0;
+        public nint F1;
+        public nint F2;
+
+        public F49_S4(uint f0, nint f1, nint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc492a02a12a22a32a42a52a62a72a82a93a103a113a123a13Sis6UInt64V_s5UInt8VAA6F49_S0VAA0V3_S1VSus6UInt32VSdAA0V3_S2VAA0V3_S3Vs4Int8VAA0V3_S4Vs5Int32VArTtF")]
+    private static extern nint SwiftFunc49(ulong a0, byte a1, F49_S0 a2, F49_S1 a3, nuint a4, uint a5, double a6, F49_S2 a7, F49_S3 a8, sbyte a9, F49_S4 a10, int a11, ulong a12, byte a13);
+
+    [Fact]
+    public static void TestSwiftFunc49()
+    {
+        Console.Write("Running SwiftFunc49: ");
+        long result = SwiftFunc49(1758884505462049879, 12, new F49_S0(1193104697993232570), new F49_S1(new F49_S1_S0(-23214), 1970325915, new F49_S1_S1(20900), unchecked((nuint)8432422526033383651)), unchecked((nuint)2433203633589099643), 1858554667, 2299996688980169, new F49_S2(65085, 158, 5839721, 6998202268068265472), new F49_S3(388389487, 5466404), -56, new F49_S4(1497255814, unchecked((nint)6665924212978484968), unchecked((nint)2332855076356772912)), 2065183786, 3874235334202874682, 6);
+        Assert.Equal(-6839703945099631142, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F50_S0
+    {
+        public sbyte F0;
+        public short F1;
+        public int F2;
+        public uint F3;
+
+        public F50_S0(sbyte f0, short f1, int f2, uint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F50_S1
+    {
+        public int F0;
+
+        public F50_S1(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc502a02a12a2SiAA6F50_S0V_s5UInt8VAA0I3_S1VtF")]
+    private static extern nint SwiftFunc50(F50_S0 a0, byte a1, F50_S1 a2);
+
+    [Fact]
+    public static void TestSwiftFunc50()
+    {
+        Console.Write("Running SwiftFunc50: ");
+        long result = SwiftFunc50(new F50_S0(-64, 4463, 1574267626, 1599903339), 22, new F50_S1(2042416614));
+        Assert.Equal(6447602248618864959, result);
+        Console.WriteLine("OK");
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc512a02a12a2Sis6UInt16V_s4Int8Vs5Int16VtF")]
+    private static extern nint SwiftFunc51(ushort a0, sbyte a1, short a2);
+
+    [Fact]
+    public static void TestSwiftFunc51()
+    {
+        Console.Write("Running SwiftFunc51: ");
+        long result = SwiftFunc51(44154, 95, 13522);
+        Assert.Equal(-2544044281448828766, result);
+        Console.WriteLine("OK");
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc522a02a1Sis5UInt8V_s6UInt64VtF")]
+    private static extern nint SwiftFunc52(byte a0, ulong a1);
+
+    [Fact]
+    public static void TestSwiftFunc52()
+    {
+        Console.Write("Running SwiftFunc52: ");
+        long result = SwiftFunc52(249, 1201897610107180823);
+        Assert.Equal(6106660152306827238, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F53_S0_S0
+    {
+        public long F0;
+        public nuint F1;
+
+        public F53_S0_S0(long f0, nuint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 27)]
+    struct F53_S0
+    {
+        public ulong F0;
+        public F53_S0_S0 F1;
+        public short F2;
+        public byte F3;
+
+        public F53_S0(ulong f0, F53_S0_S0 f1, short f2, byte f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F53_S1_S0
+    {
+        public long F0;
+
+        public F53_S1_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F53_S1
+    {
+        public F53_S1_S0 F0;
+
+        public F53_S1(F53_S1_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F53_S2
+    {
+        public byte F0;
+        public ulong F1;
+        public double F2;
+
+        public F53_S2(byte f0, ulong f1, double f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc532a02a12a22a32a42a52a62a7SiAA6F53_S0V_Sus6UInt64VSfs6UInt32VAA0N3_S1VAA0N3_S2VAPtF")]
+    private static extern nint SwiftFunc53(F53_S0 a0, nuint a1, ulong a2, float a3, uint a4, F53_S1 a5, F53_S2 a6, uint a7);
+
+    [Fact]
+    public static void TestSwiftFunc53()
+    {
+        Console.Write("Running SwiftFunc53: ");
+        long result = SwiftFunc53(new F53_S0(2962492598802212039, new F53_S0_S0(1217181921916443700, unchecked((nuint)7957002726435705223)), -18332, 65), unchecked((nuint)1996569991268125865), 2786689999092271249, 3627618, 1358803132, new F53_S1(new F53_S1_S0(6851624154761347887)), new F53_S2(12, 3669418545199894911, 3500804251230011), 1238561537);
+        Assert.Equal(609186359525793369, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F54_S0_S0
+    {
+        public nint F0;
+
+        public F54_S0_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F54_S0
+    {
+        public F54_S0_S0 F0;
+
+        public F54_S0(F54_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F54_S1
+    {
+        public uint F0;
+
+        public F54_S1(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc542a02a12a22a32a42a52a6Sis4Int8V_s5Int32Vs6UInt32VAA6F54_S0VSfs5UInt8VAA0P3_S1VtF")]
+    private static extern nint SwiftFunc54(sbyte a0, int a1, uint a2, F54_S0 a3, float a4, byte a5, F54_S1 a6);
+
+    [Fact]
+    public static void TestSwiftFunc54()
+    {
+        Console.Write("Running SwiftFunc54: ");
+        long result = SwiftFunc54(56, 918504001, 1944992063, new F54_S0(new F54_S0_S0(unchecked((nint)4622400191672284422))), 7815948, 27, new F54_S1(1866972157));
+        Assert.Equal(604312640974773799, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F55_S0
+    {
+        public double F0;
+
+        public F55_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc552a0SiAA6F55_S0V_tF")]
+    private static extern nint SwiftFunc55(F55_S0 a0);
+
+    [Fact]
+    public static void TestSwiftFunc55()
+    {
+        Console.Write("Running SwiftFunc55: ");
+        long result = SwiftFunc55(new F55_S0(2475083570077114));
+        Assert.Equal(4468870103647778776, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F56_S0_S0
+    {
+        public byte F0;
+
+        public F56_S0_S0(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 5)]
+    struct F56_S0
+    {
+        public float F0;
+        public F56_S0_S0 F1;
+
+        public F56_S0(float f0, F56_S0_S0 f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F56_S1_S0
+    {
+        public short F0;
+
+        public F56_S1_S0(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F56_S1
+    {
+        public F56_S1_S0 F0;
+        public double F1;
+        public nuint F2;
+        public uint F3;
+
+        public F56_S1(F56_S1_S0 f0, double f1, nuint f2, uint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F56_S2
+    {
+        public short F0;
+        public short F1;
+
+        public F56_S2(short f0, short f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F56_S3
+    {
+        public ushort F0;
+
+        public F56_S3(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F56_S4
+    {
+        public nuint F0;
+
+        public F56_S4(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc562a02a12a22a32a4SiAA6F56_S0V_AA0K3_S1VAA0K3_S2VAA0K3_S3VAA0K3_S4VtF")]
+    private static extern nint SwiftFunc56(F56_S0 a0, F56_S1 a1, F56_S2 a2, F56_S3 a3, F56_S4 a4);
+
+    [Fact]
+    public static void TestSwiftFunc56()
+    {
+        Console.Write("Running SwiftFunc56: ");
+        long result = SwiftFunc56(new F56_S0(3251221, new F56_S0_S0(89)), new F56_S1(new F56_S1_S0(-1474), 3308371901004609, unchecked((nuint)3728108803958130353), 1165879205), new F56_S2(-32579, 9771), new F56_S3(42395), new F56_S4(unchecked((nuint)3303076886770130768)));
+        Assert.Equal(7176775198947599357, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F57_S0
+    {
+        public sbyte F0;
+        public uint F1;
+
+        public F57_S0(sbyte f0, uint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F57_S1_S0
+    {
+        public uint F0;
+
+        public F57_S1_S0(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F57_S1_S1
+    {
+        public nuint F0;
+
+        public F57_S1_S1(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F57_S1
+    {
+        public F57_S1_S0 F0;
+        public F57_S1_S1 F1;
+        public short F2;
+
+        public F57_S1(F57_S1_S0 f0, F57_S1_S1 f1, short f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F57_S2
+    {
+        public nuint F0;
+
+        public F57_S2(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc572a02a12a22a32a42a5Sis6UInt32V_AA6F57_S0VAA0M3_S1VSuAA0M3_S2Vs5Int16VtF")]
+    private static extern nint SwiftFunc57(uint a0, F57_S0 a1, F57_S1 a2, nuint a3, F57_S2 a4, short a5);
+
+    [Fact]
+    public static void TestSwiftFunc57()
+    {
+        Console.Write("Running SwiftFunc57: ");
+        long result = SwiftFunc57(567633593, new F57_S0(-86, 696416112), new F57_S1(new F57_S1_S0(1314705768), new F57_S1_S1(unchecked((nuint)4597174980182436219)), 21486), unchecked((nuint)1438778133550518555), new F57_S2(unchecked((nuint)1802821206757818124)), 4133);
+        Assert.Equal(-4086487603375673584, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F58_S0
+    {
+        public long F0;
+
+        public F58_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 26)]
+    struct F58_S1
+    {
+        public nuint F0;
+        public nint F1;
+        public nuint F2;
+        public ushort F3;
+
+        public F58_S1(nuint f0, nint f1, nuint f2, ushort f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc582a02a12a22a32a42a52a62a72a82a93a10Sis5UInt8V_AOSiAA6F58_S0VSfs6UInt64Vs4Int8VAA0R3_S1Vs6UInt16Vs5Int64VA_tF")]
+    private static extern nint SwiftFunc58(byte a0, byte a1, nint a2, F58_S0 a3, float a4, ulong a5, sbyte a6, F58_S1 a7, ushort a8, long a9, long a10);
+
+    [Fact]
+    public static void TestSwiftFunc58()
+    {
+        Console.Write("Running SwiftFunc58: ");
+        long result = SwiftFunc58(51, 253, unchecked((nint)6470303599084560885), new F58_S0(356776366673201597), 612927, 1591484822310744993, -83, new F58_S1(unchecked((nuint)8720809519112624165), unchecked((nint)5290640035451064344), unchecked((nuint)991273095809135742), 45122), 55653, 5992020387203072133, 5336758723611801952);
+        Assert.Equal(-9219400197360619686, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F59_S0
+    {
+        public nuint F0;
+        public byte F1;
+        public float F2;
+        public nint F3;
+
+        public F59_S0(nuint f0, byte f1, float f2, nint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F59_S1
+    {
+        public byte F0;
+        public int F1;
+
+        public F59_S1(byte f0, int f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 13)]
+    struct F59_S2
+    {
+        public nint F0;
+        public uint F1;
+        public sbyte F2;
+
+        public F59_S2(nint f0, uint f1, sbyte f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F59_S3
+    {
+        public sbyte F0;
+        public float F1;
+        public int F2;
+
+        public F59_S3(sbyte f0, float f1, int f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F59_S4_S0
+    {
+        public byte F0;
+
+        public F59_S4_S0(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F59_S4
+    {
+        public F59_S4_S0 F0;
+
+        public F59_S4(F59_S4_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc592a02a12a22a32a42a52a62a72a82a93a103a113a123a13SiAA6F59_S0V_Sfs6UInt32VAA0T3_S1VAA0T3_S2Vs6UInt16VSfS2iS2us5Int16VAA0T3_S3VAA0T3_S4VtF")]
+    private static extern nint SwiftFunc59(F59_S0 a0, float a1, uint a2, F59_S1 a3, F59_S2 a4, ushort a5, float a6, nint a7, nint a8, nuint a9, nuint a10, short a11, F59_S3 a12, F59_S4 a13);
+
+    [Fact]
+    public static void TestSwiftFunc59()
+    {
+        Console.Write("Running SwiftFunc59: ");
+        long result = SwiftFunc59(new F59_S0(unchecked((nuint)1925278801109387173), 250, 6726955, unchecked((nint)4972956627127050696)), 5574199, 1873801510, new F59_S1(124, 272974688), new F59_S2(unchecked((nint)7596794567652280845), 243527419, -47), 26413, 6450212, unchecked((nint)5453709526903953920), unchecked((nint)7927376389197462736), unchecked((nuint)780576731665989106), unchecked((nuint)7709897378564152812), 32023, new F59_S3(80, 4147780, 732950914), new F59_S4(new F59_S4_S0(4)));
+        Assert.Equal(6864551615695935641, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F60_S0
+    {
+        public long F0;
+
+        public F60_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F60_S1
+    {
+        public uint F0;
+
+        public F60_S1(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc602a02a12a22a32a42a52a6Sis5Int32V_s4Int8VAKs6UInt16VSfAA6F60_S0VAA0P3_S1VtF")]
+    private static extern nint SwiftFunc60(int a0, sbyte a1, int a2, ushort a3, float a4, F60_S0 a5, F60_S1 a6);
+
+    [Fact]
+    public static void TestSwiftFunc60()
+    {
+        Console.Write("Running SwiftFunc60: ");
+        long result = SwiftFunc60(2069764774, -78, 1337682119, 39074, 1949913, new F60_S0(6466100081502457656), new F60_S1(762188122));
+        Assert.Equal(-4208534265899748964, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct F61_S0
+    {
+        public ushort F0;
+        public int F1;
+        public sbyte F2;
+
+        public F61_S0(ushort f0, int f1, sbyte f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F61_S1
+    {
+        public double F0;
+        public nint F1;
+
+        public F61_S1(double f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F61_S2
+    {
+        public nint F0;
+        public sbyte F1;
+        public float F2;
+        public ushort F3;
+        public float F4;
+
+        public F61_S2(nint f0, sbyte f1, float f2, ushort f3, float f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F61_S3
+    {
+        public uint F0;
+        public ulong F1;
+        public nuint F2;
+        public nuint F3;
+
+        public F61_S3(uint f0, ulong f1, nuint f2, nuint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F61_S4_S0
+    {
+        public byte F0;
+        public ulong F1;
+
+        public F61_S4_S0(byte f0, ulong f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F61_S4
+    {
+        public F61_S4_S0 F0;
+        public long F1;
+
+        public F61_S4(F61_S4_S0 f0, long f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc612a02a12a22a32a42a52a62a72a82a9SiAA6F61_S0V_s5UInt8VSfAA0P3_S1Vs4Int8Vs5Int64VAA0P3_S2VAA0P3_S3VAA0P3_S4Vs6UInt32VtF")]
+    private static extern nint SwiftFunc61(F61_S0 a0, byte a1, float a2, F61_S1 a3, sbyte a4, long a5, F61_S2 a6, F61_S3 a7, F61_S4 a8, uint a9);
+
+    [Fact]
+    public static void TestSwiftFunc61()
+    {
+        Console.Write("Running SwiftFunc61: ");
+        long result = SwiftFunc61(new F61_S0(37779, 1838776162, -93), 6, 8289829, new F61_S1(87047161428510, unchecked((nint)1184205589182482579)), -29, 6533985246090322241, new F61_S2(unchecked((nint)2633423837220013660), 79, 307426, 32687, 2612234), new F61_S3(1625158302, 1379744644931696533, unchecked((nuint)1592864959164045790), unchecked((nuint)1112656184684227017)), new F61_S4(new F61_S4_S0(196, 2188268123262546231), 2448137925649839798), 691942709);
+        Assert.Equal(-2463957420588616123, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F62_S0
+    {
+        public long F0;
+
+        public F62_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F62_S1
+    {
+        public float F0;
+
+        public F62_S1(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc622a02a12a22a3SiAA6F62_S0V_s5Int16Vs5Int32VAA0J3_S1VtF")]
+    private static extern nint SwiftFunc62(F62_S0 a0, short a1, int a2, F62_S1 a3);
+
+    [Fact]
+    public static void TestSwiftFunc62()
+    {
+        Console.Write("Running SwiftFunc62: ");
+        long result = SwiftFunc62(new F62_S0(7225726265078242156), 26594, 457232718, new F62_S1(5266624));
+        Assert.Equal(1111474357603006336, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F63_S0
+    {
+        public nint F0;
+
+        public F63_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc632a0SiAA6F63_S0V_tF")]
+    private static extern nint SwiftFunc63(F63_S0 a0);
+
+    [Fact]
+    public static void TestSwiftFunc63()
+    {
+        Console.Write("Running SwiftFunc63: ");
+        long result = SwiftFunc63(new F63_S0(unchecked((nint)8434688641118467652)));
+        Assert.Equal(6012989597022805528, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F64_S0
+    {
+        public double F0;
+        public ushort F1;
+        public int F2;
+        public nint F3;
+        public double F4;
+
+        public F64_S0(double f0, ushort f1, int f2, nint f3, double f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F64_S1
+    {
+        public int F0;
+        public float F1;
+        public uint F2;
+
+        public F64_S1(int f0, float f1, uint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc642a02a12a22a32a42a52a62a72a8SiSd_AA6F64_S0Vs5UInt8VAA0O3_S1Vs5Int32Vs6UInt64Vs4Int8VAWSftF")]
+    private static extern nint SwiftFunc64(double a0, F64_S0 a1, byte a2, F64_S1 a3, int a4, ulong a5, sbyte a6, sbyte a7, float a8);
+
+    [Fact]
+    public static void TestSwiftFunc64()
+    {
+        Console.Write("Running SwiftFunc64: ");
+        long result = SwiftFunc64(1537265878737137, new F64_S0(3855732434182818, 17371, 213617860, unchecked((nint)7735022256180276511), 3812880695456163), 18, new F64_S1(484340550, 65067, 1337805733), 1841310158, 1819062569669413729, 17, -123, 4111745);
+        Assert.Equal(2528424114157798731, result);
+        Console.WriteLine("OK");
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc652a02a12a22a3SiSf_SfSuSftF")]
+    private static extern nint SwiftFunc65(float a0, float a1, nuint a2, float a3);
+
+    [Fact]
+    public static void TestSwiftFunc65()
+    {
+        Console.Write("Running SwiftFunc65: ");
+        long result = SwiftFunc65(3752751, 4441416, unchecked((nuint)9195654236823676231), 1490781);
+        Assert.Equal(1666102926850087608, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F66_S0
+    {
+        public long F0;
+
+        public F66_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F66_S1_S0
+    {
+        public ushort F0;
+
+        public F66_S1_S0(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F66_S1
+    {
+        public F66_S1_S0 F0;
+        public float F1;
+
+        public F66_S1(F66_S1_S0 f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct F66_S2
+    {
+        public double F0;
+        public byte F1;
+
+        public F66_S2(double f0, byte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F66_S3
+    {
+        public nuint F0;
+
+        public F66_S3(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc662a02a12a22a3SiAA6F66_S0V_AA0J3_S1VAA0J3_S2VAA0J3_S3VtF")]
+    private static extern nint SwiftFunc66(F66_S0 a0, F66_S1 a1, F66_S2 a2, F66_S3 a3);
+
+    [Fact]
+    public static void TestSwiftFunc66()
+    {
+        Console.Write("Running SwiftFunc66: ");
+        long result = SwiftFunc66(new F66_S0(7984064468330042160), new F66_S1(new F66_S1_S0(61382), 2971351), new F66_S2(463407482163222, 36), new F66_S3(unchecked((nuint)2172521839193002776)));
+        Assert.Equal(4347440879386243204, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F67_S0
+    {
+        public ushort F0;
+
+        public F67_S0(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F67_S1_S0_S0
+    {
+        public long F0;
+
+        public F67_S1_S0_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F67_S1_S0
+    {
+        public F67_S1_S0_S0 F0;
+
+        public F67_S1_S0(F67_S1_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 14)]
+    struct F67_S1
+    {
+        public F67_S1_S0 F0;
+        public uint F1;
+        public short F2;
+
+        public F67_S1(F67_S1_S0 f0, uint f1, short f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc672a02a12a22a32a42a52a62a72a82a9Sis6UInt64V_s6UInt32Vs6UInt16Vs4Int8VAA6F67_S0VAnA0T3_S1VSuANs5Int64VtF")]
+    private static extern nint SwiftFunc67(ulong a0, uint a1, ushort a2, sbyte a3, F67_S0 a4, ulong a5, F67_S1 a6, nuint a7, ulong a8, long a9);
+
+    [Fact]
+    public static void TestSwiftFunc67()
+    {
+        Console.Write("Running SwiftFunc67: ");
+        long result = SwiftFunc67(8417618485778766232, 263682468, 8040, 53, new F67_S0(44582), 2312853538155696297, new F67_S1(new F67_S1_S0(new F67_S1_S0_S0(358347933181524465)), 74416027, -11715), unchecked((nuint)3013147554369331538), 8581312208688354849, 3394216999618959997);
+        Assert.Equal(-6725369964492065998, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F68_S0_S0_S0
+    {
+        public ushort F0;
+
+        public F68_S0_S0_S0(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F68_S0_S0
+    {
+        public F68_S0_S0_S0 F0;
+
+        public F68_S0_S0(F68_S0_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F68_S0
+    {
+        public F68_S0_S0 F0;
+
+        public F68_S0(F68_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F68_S1
+    {
+        public ulong F0;
+        public ushort F1;
+
+        public F68_S1(ulong f0, ushort f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F68_S2
+    {
+        public nuint F0;
+        public nint F1;
+        public ulong F2;
+        public double F3;
+
+        public F68_S2(nuint f0, nint f1, ulong f2, double f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F68_S3
+    {
+        public nint F0;
+        public uint F1;
+        public uint F2;
+        public nuint F3;
+
+        public F68_S3(nint f0, uint f1, uint f2, nuint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F68_S4
+    {
+        public int F0;
+
+        public F68_S4(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc682a02a12a22a32a42a52a62a72a82a93a103a113a123a133a143a15Sis6UInt16V_s5Int64Vs5Int16Vs6UInt64Vs4Int8Vs5Int32Vs5UInt8VAA6F68_S0VA4_AA6F68_S1VAxA6F68_S2VA2xA6F68_S3VAA6F68_S4VtF")]
+    private static extern nint SwiftFunc68(ushort a0, long a1, short a2, ulong a3, sbyte a4, int a5, byte a6, F68_S0 a7, byte a8, F68_S1 a9, short a10, F68_S2 a11, short a12, short a13, F68_S3 a14, F68_S4 a15);
+
+    [Fact]
+    public static void TestSwiftFunc68()
+    {
+        Console.Write("Running SwiftFunc68: ");
+        long result = SwiftFunc68(39378, 1879467527992319684, 2976, 7557363126592644195, -43, 2065185911, 186, new F68_S0(new F68_S0_S0(new F68_S0_S0_S0(38882))), 147, new F68_S1(7550657789172540141, 11186), 19125, new F68_S2(unchecked((nuint)7379823447100459002), unchecked((nint)2947420338952962953), 8170543862699682458, 4004920770933570), -12770, 19448, new F68_S3(unchecked((nint)4813886599424386410), 456733470, 2124904937, unchecked((nuint)4471482098861948789)), new F68_S4(1149728467));
+        Assert.Equal(7624816402828697114, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F69_S0
+    {
+        public uint F0;
+        public nuint F1;
+
+        public F69_S0(uint f0, nuint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F69_S1_S0_S0
+    {
+        public byte F0;
+
+        public F69_S1_S0_S0(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F69_S1_S0
+    {
+        public F69_S1_S0_S0 F0;
+        public sbyte F1;
+
+        public F69_S1_S0(F69_S1_S0_S0 f0, sbyte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F69_S1
+    {
+        public F69_S1_S0 F0;
+        public nuint F1;
+        public nint F2;
+
+        public F69_S1(F69_S1_S0 f0, nuint f1, nint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 11)]
+    struct F69_S2
+    {
+        public float F0;
+        public uint F1;
+        public ushort F2;
+        public sbyte F3;
+
+        public F69_S2(float f0, uint f1, ushort f2, sbyte f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F69_S3
+    {
+        public byte F0;
+        public double F1;
+
+        public F69_S3(byte f0, double f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F69_S4
+    {
+        public double F0;
+
+        public F69_S4(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F69_S5
+    {
+        public ulong F0;
+
+        public F69_S5(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc692a02a12a22a32a42a52a62a72a82a93a103a113a123a13SiAA6F69_S0V_AA0T3_S1VS2is6UInt16Vs5Int16VSdAA0T3_S2VAA0T3_S3VAA0T3_S4VSis5Int32VAA0T3_S5VSftF")]
+    private static extern nint SwiftFunc69(F69_S0 a0, F69_S1 a1, nint a2, nint a3, ushort a4, short a5, double a6, F69_S2 a7, F69_S3 a8, F69_S4 a9, nint a10, int a11, F69_S5 a12, float a13);
+
+    [Fact]
+    public static void TestSwiftFunc69()
+    {
+        Console.Write("Running SwiftFunc69: ");
+        long result = SwiftFunc69(new F69_S0(906404083, unchecked((nuint)2807168213757166759)), new F69_S1(new F69_S1_S0(new F69_S1_S0_S0(186), 23), unchecked((nuint)8471050292345736986), unchecked((nint)8019232101297716588)), unchecked((nint)1646897491666286061), unchecked((nint)4641745789339591736), 16462, 8795, 2000104158043033, new F69_S2(5507285, 2004746552, 63158, -120), new F69_S3(205, 3126404745245894), new F69_S4(1149593901597831), unchecked((nint)7568671357281245424), 32654713, new F69_S5(9162350932434820903), 7511550);
+        Assert.Equal(-6877731561846031803, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F70_S0
+    {
+        public float F0;
+        public long F1;
+
+        public F70_S0(float f0, long f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 6)]
+    struct F70_S1
+    {
+        public ushort F0;
+        public sbyte F1;
+        public short F2;
+
+        public F70_S1(ushort f0, sbyte f1, short f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F70_S2
+    {
+        public ushort F0;
+
+        public F70_S2(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F70_S3
+    {
+        public ushort F0;
+
+        public F70_S3(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc702a02a12a22a32a42a52a62a72a82a9Sis6UInt64V_AA6F70_S0Vs6UInt16Vs4Int8VSfAA0Q3_S1VSiAA0Q3_S2VAA0Q3_S3Vs6UInt32VtF")]
+    private static extern nint SwiftFunc70(ulong a0, F70_S0 a1, ushort a2, sbyte a3, float a4, F70_S1 a5, nint a6, F70_S2 a7, F70_S3 a8, uint a9);
+
+    [Fact]
+    public static void TestSwiftFunc70()
+    {
+        Console.Write("Running SwiftFunc70: ");
+        long result = SwiftFunc70(1536666996478548266, new F70_S0(7778910, 3166989107756003196), 13136, 22, 8164102, new F70_S1(26774, 89, 8871), unchecked((nint)3879856935687439957), new F70_S2(24302), new F70_S3(50084), 1197721391);
+        Assert.Equal(-4661551892929812411, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F71_S0
+    {
+        public nint F0;
+
+        public F71_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F71_S1
+    {
+        public ulong F0;
+
+        public F71_S1(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc712a02a12a22a32a42a5Sis5Int64V_AA6F71_S0Vs4Int8VAA0M3_S1VSfs6UInt32VtF")]
+    private static extern nint SwiftFunc71(long a0, F71_S0 a1, sbyte a2, F71_S1 a3, float a4, uint a5);
+
+    [Fact]
+    public static void TestSwiftFunc71()
+    {
+        Console.Write("Running SwiftFunc71: ");
+        long result = SwiftFunc71(823408652288450499, new F71_S0(unchecked((nint)1673096114526242440)), 64, new F71_S1(1767538531468972832), 3230384, 1139683594);
+        Assert.Equal(1763261422424450798, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F72_S0_S0
+    {
+        public nint F0;
+        public double F1;
+
+        public F72_S0_S0(nint f0, double f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F72_S0
+    {
+        public F72_S0_S0 F0;
+        public uint F1;
+
+        public F72_S0(F72_S0_S0 f0, uint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F72_S1
+    {
+        public nint F0;
+
+        public F72_S1(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F72_S2
+    {
+        public double F0;
+
+        public F72_S2(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc722a02a12a2SiAA6F72_S0V_AA0I3_S1VAA0I3_S2VtF")]
+    private static extern nint SwiftFunc72(F72_S0 a0, F72_S1 a1, F72_S2 a2);
+
+    [Fact]
+    public static void TestSwiftFunc72()
+    {
+        Console.Write("Running SwiftFunc72: ");
+        long result = SwiftFunc72(new F72_S0(new F72_S0_S0(unchecked((nint)42112534105392604), 2206378956781748), 13345585), new F72_S1(unchecked((nint)4236181300943972186)), new F72_S2(3246931881930745));
+        Assert.Equal(5209731649169576491, result);
+        Console.WriteLine("OK");
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc732a0Sis5Int64V_tF")]
+    private static extern nint SwiftFunc73(long a0);
+
+    [Fact]
+    public static void TestSwiftFunc73()
+    {
+        Console.Write("Running SwiftFunc73: ");
+        long result = SwiftFunc73(5717467830857180976);
+        Assert.Equal(4464612974464506231, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 17)]
+    struct F74_S0
+    {
+        public byte F0;
+        public byte F1;
+        public double F2;
+        public byte F3;
+
+        public F74_S0(byte f0, byte f1, double f2, byte f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F74_S1
+    {
+        public short F0;
+        public ushort F1;
+        public long F2;
+        public nuint F3;
+
+        public F74_S1(short f0, ushort f1, long f2, nuint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F74_S2
+    {
+        public short F0;
+        public double F1;
+        public float F2;
+
+        public F74_S2(short f0, double f1, float f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F74_S3
+    {
+        public short F0;
+
+        public F74_S3(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc742a02a12a22a32a42a52a62a72a82a9SiAA6F74_S0V_AA0P3_S1Vs5Int32VAA0P3_S2VSis5Int64Vs5Int16VArA0P3_S3Vs6UInt64VtF")]
+    private static extern nint SwiftFunc74(F74_S0 a0, F74_S1 a1, int a2, F74_S2 a3, nint a4, long a5, short a6, int a7, F74_S3 a8, ulong a9);
+
+    [Fact]
+    public static void TestSwiftFunc74()
+    {
+        Console.Write("Running SwiftFunc74: ");
+        long result = SwiftFunc74(new F74_S0(126, 165, 938186833815961, 37), new F74_S1(26448, 11115, 1477034907611479508, unchecked((nuint)7258103824495664788)), 1024717487, new F74_S2(-32191, 3877433950972112, 1759541), unchecked((nint)306022299836100497), 3906031458927364257, 105, 1354045377, new F74_S3(15217), 2609577929968659839);
+        Assert.Equal(4852068750102322513, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F75_S0_S0_S0
+    {
+        public short F0;
+
+        public F75_S0_S0_S0(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F75_S0_S0
+    {
+        public F75_S0_S0_S0 F0;
+
+        public F75_S0_S0(F75_S0_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F75_S0
+    {
+        public F75_S0_S0 F0;
+        public double F1;
+        public int F2;
+
+        public F75_S0(F75_S0_S0 f0, double f1, int f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F75_S1_S0_S0
+    {
+        public ushort F0;
+
+        public F75_S1_S0_S0(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F75_S1_S0
+    {
+        public nuint F0;
+        public F75_S1_S0_S0 F1;
+        public long F2;
+
+        public F75_S1_S0(nuint f0, F75_S1_S0_S0 f1, long f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F75_S1
+    {
+        public F75_S1_S0 F0;
+        public nint F1;
+
+        public F75_S1(F75_S1_S0 f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F75_S2
+    {
+        public ulong F0;
+
+        public F75_S2(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc752a02a12a22a32a42a52a6SiAA6F75_S0V_SdSiSuSiAA0M3_S1VAA0M3_S2VtF")]
+    private static extern nint SwiftFunc75(F75_S0 a0, double a1, nint a2, nuint a3, nint a4, F75_S1 a5, F75_S2 a6);
+
+    [Fact]
+    public static void TestSwiftFunc75()
+    {
+        Console.Write("Running SwiftFunc75: ");
+        long result = SwiftFunc75(new F75_S0(new F75_S0_S0(new F75_S0_S0_S0(-10229)), 989267098871942, 1700151366), 1809179048674038, unchecked((nint)8327532491216230311), unchecked((nuint)2400790938015665595), unchecked((nint)9058430068368278195), new F75_S1(new F75_S1_S0(unchecked((nuint)2568090042127844270), new F75_S1_S0_S0(56529), 7258043284683232822), unchecked((nint)2580496344876818585)), new F75_S2(2518371079686790475));
+        Assert.Equal(-3602049946494757864, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F76_S0
+    {
+        public nint F0;
+
+        public F76_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 14)]
+    struct F76_S1
+    {
+        public ulong F0;
+        public int F1;
+        public short F2;
+
+        public F76_S1(ulong f0, int f1, short f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F76_S2
+    {
+        public uint F0;
+
+        public F76_S2(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F76_S3
+    {
+        public nint F0;
+
+        public F76_S3(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc762a02a12a22a32a42a52a62a72a82a93a103a113a123a133a143a153a163a173a183a193a20SiSd_s5Int64Vs6UInt16VS2fAA6F76_S0Vs5Int16VAA6F76_S1VAYs6UInt64VA_s5UInt8Vs4Int8VSiAYA11_A11_A3_A_AA6F76_S2VAA6F76_S3VtF")]
+    private static extern nint SwiftFunc76(double a0, long a1, ushort a2, float a3, float a4, F76_S0 a5, short a6, F76_S1 a7, long a8, ulong a9, ushort a10, byte a11, sbyte a12, nint a13, long a14, sbyte a15, sbyte a16, short a17, ushort a18, F76_S2 a19, F76_S3 a20);
+
+    [Fact]
+    public static void TestSwiftFunc76()
+    {
+        Console.Write("Running SwiftFunc76: ");
+        long result = SwiftFunc76(3446176204630463, 6827398998366360089, 5999, 2160153, 1821316, new F76_S0(unchecked((nint)4235786039908553749)), -1803, new F76_S1(7640434214516127655, 1290566778, -25932), 5980518466723941005, 3543741927421110901, 27548, 183, -92, unchecked((nint)2974474557334557206), 6986327999611060205, -10, -27, -1377, 28809, new F76_S2(971874601), new F76_S3(unchecked((nint)1638507434850613054)));
+        Assert.Equal(1945785605876240600, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F77_S0_S0
+    {
+        public sbyte F0;
+
+        public F77_S0_S0(sbyte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F77_S0
+    {
+        public ulong F0;
+        public F77_S0_S0 F1;
+        public sbyte F2;
+
+        public F77_S0(ulong f0, F77_S0_S0 f1, sbyte f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F77_S1
+    {
+        public ulong F0;
+        public nint F1;
+        public int F2;
+
+        public F77_S1(ulong f0, nint f1, int f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F77_S2_S0_S0
+    {
+        public ushort F0;
+
+        public F77_S2_S0_S0(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F77_S2_S0
+    {
+        public F77_S2_S0_S0 F0;
+
+        public F77_S2_S0(F77_S2_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 6)]
+    struct F77_S2
+    {
+        public F77_S2_S0 F0;
+        public short F1;
+        public sbyte F2;
+        public byte F3;
+
+        public F77_S2(F77_S2_S0 f0, short f1, sbyte f2, byte f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 26)]
+    struct F77_S3
+    {
+        public nint F0;
+        public nint F1;
+        public nint F2;
+        public short F3;
+
+        public F77_S3(nint f0, nint f1, nint f2, short f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F77_S4
+    {
+        public double F0;
+        public sbyte F1;
+        public uint F2;
+        public short F3;
+        public uint F4;
+
+        public F77_S4(double f0, sbyte f1, uint f2, short f3, uint f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F77_S5
+    {
+        public nuint F0;
+
+        public F77_S5(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc772a02a12a22a32a42a52a62a72a82a93a10SiAA6F77_S0V_s5Int16VAA0Q3_S1Vs6UInt32VAA0Q3_S2VAA0Q3_S3VAA0Q3_S4Vs6UInt64VAA0Q3_S5Vs6UInt16VSftF")]
+    private static extern nint SwiftFunc77(F77_S0 a0, short a1, F77_S1 a2, uint a3, F77_S2 a4, F77_S3 a5, F77_S4 a6, ulong a7, F77_S5 a8, ushort a9, float a10);
+
+    [Fact]
+    public static void TestSwiftFunc77()
+    {
+        Console.Write("Running SwiftFunc77: ");
+        long result = SwiftFunc77(new F77_S0(5280239821396586490, new F77_S0_S0(-88), -25), -22596, new F77_S1(7240134379191021288, unchecked((nint)7659208338594056339), 884422905), 1341388922, new F77_S2(new F77_S2_S0(new F77_S2_S0_S0(45223)), 7237, -31, 116), new F77_S3(unchecked((nint)1688714381756854732), unchecked((nint)22701789196637865), unchecked((nint)76294687751840896), -6664), new F77_S4(668345825700173, -66, 484390251, -29179, 1983850392), 2083761371968657768, new F77_S5(unchecked((nuint)8754131797018708878)), 60699, 6889813);
+        Assert.Equal(6252428118328671717, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F78_S0
+    {
+        public ushort F0;
+        public nuint F1;
+
+        public F78_S0(ushort f0, nuint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc782a02a1SiAA6F78_S0V_s6UInt64VtF")]
+    private static extern nint SwiftFunc78(F78_S0 a0, ulong a1);
+
+    [Fact]
+    public static void TestSwiftFunc78()
+    {
+        Console.Write("Running SwiftFunc78: ");
+        long result = SwiftFunc78(new F78_S0(29770, unchecked((nuint)3187763107953451651)), 8011100719593217510);
+        Assert.Equal(-3469054734849002121, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F79_S0
+    {
+        public double F0;
+
+        public F79_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc792a02a12a22a3Sis6UInt32V_AA6F79_S0Vs5Int16VSdtF")]
+    private static extern nint SwiftFunc79(uint a0, F79_S0 a1, short a2, double a3);
+
+    [Fact]
+    public static void TestSwiftFunc79()
+    {
+        Console.Write("Running SwiftFunc79: ");
+        long result = SwiftFunc79(125852033, new F79_S0(589854369615867), 32411, 2567161537252427);
+        Assert.Equal(6919439799927692524, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F80_S0
+    {
+        public ulong F0;
+        public double F1;
+
+        public F80_S0(ulong f0, double f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F80_S1_S0
+    {
+        public byte F0;
+
+        public F80_S1_S0(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 13)]
+    struct F80_S1
+    {
+        public int F0;
+        public ushort F1;
+        public uint F2;
+        public F80_S1_S0 F3;
+
+        public F80_S1(int f0, ushort f1, uint f2, F80_S1_S0 f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 22)]
+    struct F80_S2
+    {
+        public ulong F0;
+        public long F1;
+        public uint F2;
+        public ushort F3;
+
+        public F80_S2(ulong f0, long f1, uint f2, ushort f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F80_S3_S0_S0
+    {
+        public nint F0;
+        public long F1;
+        public ulong F2;
+
+        public F80_S3_S0_S0(nint f0, long f1, ulong f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F80_S3_S0
+    {
+        public F80_S3_S0_S0 F0;
+        public uint F1;
+
+        public F80_S3_S0(F80_S3_S0_S0 f0, uint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F80_S3
+    {
+        public F80_S3_S0 F0;
+        public int F1;
+
+        public F80_S3(F80_S3_S0 f0, int f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F80_S4_S0
+    {
+        public float F0;
+
+        public F80_S4_S0(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F80_S4
+    {
+        public F80_S4_S0 F0;
+
+        public F80_S4(F80_S4_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc802a02a12a22a32a42a52a62a72a82a93a10SiAA6F80_S0V_AA0Q3_S1Vs6UInt16Vs5Int64VAA0Q3_S2VSds6UInt64Vs5Int32VAA0Q3_S3VAA0Q3_S4Vs5UInt8VtF")]
+    private static extern nint SwiftFunc80(F80_S0 a0, F80_S1 a1, ushort a2, long a3, F80_S2 a4, double a5, ulong a6, int a7, F80_S3 a8, F80_S4 a9, byte a10);
+
+    [Fact]
+    public static void TestSwiftFunc80()
+    {
+        Console.Write("Running SwiftFunc80: ");
+        long result = SwiftFunc80(new F80_S0(1355360960230091831, 1784308328429357), new F80_S1(1545826500, 60913, 1298907936, new F80_S1_S0(91)), 45929, 1430265567693421435, new F80_S2(5983675317199180530, 4061656029212457057, 1539740932, 57372), 3111292213584236, 1408283785399541904, 157768849, new F80_S3(new F80_S3_S0(new F80_S3_S0_S0(unchecked((nint)7843547046297667291), 5997146939658037534, 1422472621224237194), 579010799), 912968372), new F80_S4(new F80_S4_S0(6160826)), 91);
+        Assert.Equal(-8787757710984015171, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 22)]
+    struct F81_S0
+    {
+        public double F0;
+        public ulong F1;
+        public uint F2;
+        public byte F3;
+        public byte F4;
+
+        public F81_S0(double f0, ulong f1, uint f2, byte f3, byte f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F81_S1
+    {
+        public uint F0;
+
+        public F81_S1(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc812a02a12a22a3SiAA6F81_S0V_s5Int32VSfAA0J3_S1VtF")]
+    private static extern nint SwiftFunc81(F81_S0 a0, int a1, float a2, F81_S1 a3);
+
+    [Fact]
+    public static void TestSwiftFunc81()
+    {
+        Console.Write("Running SwiftFunc81: ");
+        long result = SwiftFunc81(new F81_S0(624904807476328, 8333634025352587313, 1193792370, 12, 123), 1584141967, 2042869, new F81_S1(929252664));
+        Assert.Equal(-2553305027552835633, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 17)]
+    struct F82_S0
+    {
+        public int F0;
+        public short F1;
+        public ulong F2;
+        public sbyte F3;
+
+        public F82_S0(int f0, short f1, ulong f2, sbyte f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F82_S1_S0
+    {
+        public long F0;
+
+        public F82_S1_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F82_S1
+    {
+        public nint F0;
+        public int F1;
+        public F82_S1_S0 F2;
+
+        public F82_S1(nint f0, int f1, F82_S1_S0 f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F82_S2
+    {
+        public nint F0;
+        public long F1;
+        public uint F2;
+        public ushort F3;
+        public long F4;
+
+        public F82_S2(nint f0, long f1, uint f2, ushort f3, long f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F82_S3
+    {
+        public byte F0;
+
+        public F82_S3(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc822a02a12a22a32a42a5SiAA6F82_S0V_AA0L3_S1VAA0L3_S2Vs6UInt32VSiAA0L3_S3VtF")]
+    private static extern nint SwiftFunc82(F82_S0 a0, F82_S1 a1, F82_S2 a2, uint a3, nint a4, F82_S3 a5);
+
+    [Fact]
+    public static void TestSwiftFunc82()
+    {
+        Console.Write("Running SwiftFunc82: ");
+        long result = SwiftFunc82(new F82_S0(1831859482, 13125, 959732722373954890, -77), new F82_S1(unchecked((nint)7895140590879382739), 1095783280, new F82_S1_S0(5569113039995240408)), new F82_S2(unchecked((nint)1146619146691566258), 9105860583981760040, 869172650, 46264, 3390698350483049795), 64268535, unchecked((nint)3935081377884943159), new F82_S3(152));
+        Assert.Equal(545035333243758818, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F83_S0_S0
+    {
+        public byte F0;
+
+        public F83_S0_S0(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F83_S0
+    {
+        public F83_S0_S0 F0;
+        public nint F1;
+        public float F2;
+
+        public F83_S0(F83_S0_S0 f0, nint f1, float f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F83_S1_S0
+    {
+        public double F0;
+
+        public F83_S1_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F83_S1_S1_S0
+    {
+        public ushort F0;
+
+        public F83_S1_S1_S0(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F83_S1_S1
+    {
+        public F83_S1_S1_S0 F0;
+
+        public F83_S1_S1(F83_S1_S1_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F83_S1
+    {
+        public uint F0;
+        public F83_S1_S0 F1;
+        public F83_S1_S1 F2;
+
+        public F83_S1(uint f0, F83_S1_S0 f1, F83_S1_S1 f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F83_S2
+    {
+        public nint F0;
+
+        public F83_S2(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc832a02a12a22a32a42a52a62a7SiSf_AA6F83_S0VAA0N3_S1Vs5Int16VSiSfAA0N3_S2Vs6UInt16VtF")]
+    private static extern nint SwiftFunc83(float a0, F83_S0 a1, F83_S1 a2, short a3, nint a4, float a5, F83_S2 a6, ushort a7);
+
+    [Fact]
+    public static void TestSwiftFunc83()
+    {
+        Console.Write("Running SwiftFunc83: ");
+        long result = SwiftFunc83(215523, new F83_S0(new F83_S0_S0(156), unchecked((nint)6215307075393311297), 6861006), new F83_S1(2039967569, new F83_S1_S0(225951511203809), new F83_S1_S1(new F83_S1_S1_S0(4596))), -9234, unchecked((nint)5460548577590073953), 5802323, new F83_S2(unchecked((nint)7383303204767349238)), 26127);
+        Assert.Equal(-2186229543452098356, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F84_S0
+    {
+        public short F0;
+        public sbyte F1;
+        public ushort F2;
+        public long F3;
+        public short F4;
+
+        public F84_S0(short f0, sbyte f1, ushort f2, long f3, short f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F84_S1
+    {
+        public int F0;
+
+        public F84_S1(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F84_S2_S0
+    {
+        public byte F0;
+        public ulong F1;
+
+        public F84_S2_S0(byte f0, ulong f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct F84_S2
+    {
+        public nuint F0;
+        public F84_S2_S0 F1;
+        public sbyte F2;
+        public double F3;
+
+        public F84_S2(nuint f0, F84_S2_S0 f1, sbyte f2, double f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F84_S3
+    {
+        public uint F0;
+
+        public F84_S3(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F84_S4
+    {
+        public float F0;
+
+        public F84_S4(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc842a02a12a22a32a42a52a62a72a82a93a103a113a12SiAA6F84_S0V_AA0S3_S1Vs6UInt64VAA0S3_S2Vs6UInt32VAA0S3_S3VSuAA0S3_S4VA2Us6UInt16Vs5Int16VSftF")]
+    private static extern nint SwiftFunc84(F84_S0 a0, F84_S1 a1, ulong a2, F84_S2 a3, uint a4, F84_S3 a5, nuint a6, F84_S4 a7, ulong a8, ulong a9, ushort a10, short a11, float a12);
+
+    [Fact]
+    public static void TestSwiftFunc84()
+    {
+        Console.Write("Running SwiftFunc84: ");
+        long result = SwiftFunc84(new F84_S0(-4484, -42, 64729, 6703360336708764515, -523), new F84_S1(1991025572), 3784369034793798079, new F84_S2(unchecked((nuint)8950003885832387073), new F84_S2_S0(212, 2246460359298562967), 110, 694425580701573), 590396201, new F84_S3(954246473), unchecked((nuint)4968200866033916175), new F84_S4(7222444), 6840076578020772755, 257938017424612706, 10826, 12362, 5240097);
+        Assert.Equal(6470148389371753355, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F85_S0_S0_S0
+    {
+        public float F0;
+
+        public F85_S0_S0_S0(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F85_S0_S0
+    {
+        public int F0;
+        public F85_S0_S0_S0 F1;
+
+        public F85_S0_S0(int f0, F85_S0_S0_S0 f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F85_S0
+    {
+        public float F0;
+        public F85_S0_S0 F1;
+        public nint F2;
+        public long F3;
+
+        public F85_S0(float f0, F85_S0_S0 f1, nint f2, long f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F85_S1
+    {
+        public uint F0;
+        public int F1;
+
+        public F85_S1(uint f0, int f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F85_S2
+    {
+        public nuint F0;
+
+        public F85_S2(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc852a02a12a22a32a42a5SiAA6F85_S0V_AA0L3_S1VAA0L3_S2Vs4Int8Vs6UInt32Vs5Int16VtF")]
+    private static extern nint SwiftFunc85(F85_S0 a0, F85_S1 a1, F85_S2 a2, sbyte a3, uint a4, short a5);
+
+    [Fact]
+    public static void TestSwiftFunc85()
+    {
+        Console.Write("Running SwiftFunc85: ");
+        long result = SwiftFunc85(new F85_S0(4799349, new F85_S0_S0(1649441954, new F85_S0_S0_S0(7944727)), unchecked((nint)9152994697049435513), 7643247514693376306), new F85_S1(1545626492, 422887320), new F85_S2(unchecked((nuint)6616620791022054982)), -117, 995038971, 27513);
+        Assert.Equal(-8992223142373774956, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 22)]
+    struct F86_S0
+    {
+        public int F0;
+        public long F1;
+        public int F2;
+        public ushort F3;
+
+        public F86_S0(int f0, long f1, int f2, ushort f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F86_S1_S0
+    {
+        public nuint F0;
+
+        public F86_S1_S0(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F86_S1
+    {
+        public F86_S1_S0 F0;
+        public ushort F1;
+
+        public F86_S1(F86_S1_S0 f0, ushort f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F86_S2
+    {
+        public uint F0;
+
+        public F86_S2(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F86_S3
+    {
+        public short F0;
+
+        public F86_S3(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F86_S4
+    {
+        public nint F0;
+
+        public F86_S4(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F86_S5
+    {
+        public short F0;
+
+        public F86_S5(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc862a02a12a22a32a42a52a62a72a82a9SiAA6F86_S0V_S2iSuAA0P3_S1VAA0P3_S2Vs6UInt64VAA0P3_S3VAA0P3_S4VAA0P3_S5VtF")]
+    private static extern nint SwiftFunc86(F86_S0 a0, nint a1, nint a2, nuint a3, F86_S1 a4, F86_S2 a5, ulong a6, F86_S3 a7, F86_S4 a8, F86_S5 a9);
+
+    [Fact]
+    public static void TestSwiftFunc86()
+    {
+        Console.Write("Running SwiftFunc86: ");
+        long result = SwiftFunc86(new F86_S0(1811942942, 5011425012386160741, 1789481754, 51980), unchecked((nint)6881030792370586912), unchecked((nint)1013091832294910089), unchecked((nuint)7426318018252287878), new F86_S1(new F86_S1_S0(unchecked((nuint)3709534733156518030)), 31161), new F86_S2(2110662074), 1492552132987044101, new F86_S3(18839), new F86_S4(unchecked((nint)3005766501093981786)), new F86_S5(-10373));
+        Assert.Equal(4527117515781509085, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F87_S0_S0
+    {
+        public long F0;
+
+        public F87_S0_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F87_S0
+    {
+        public F87_S0_S0 F0;
+        public float F1;
+        public long F2;
+        public double F3;
+
+        public F87_S0(F87_S0_S0 f0, float f1, long f2, double f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F87_S1
+    {
+        public int F0;
+
+        public F87_S1(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F87_S2_S0
+    {
+        public ushort F0;
+
+        public F87_S2_S0(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F87_S2
+    {
+        public F87_S2_S0 F0;
+
+        public F87_S2(F87_S2_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F87_S3
+    {
+        public int F0;
+
+        public F87_S3(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc872a02a12a22a32a42a52a62a72a82a93a103a113a123a13Sis5Int64V_AA6F87_S0VSus5UInt8VSds5Int16Vs6UInt64VSdSfAA0U3_S1VArA0U3_S2VAA0U3_S3VSftF")]
+    private static extern nint SwiftFunc87(long a0, F87_S0 a1, nuint a2, byte a3, double a4, short a5, ulong a6, double a7, float a8, F87_S1 a9, long a10, F87_S2 a11, F87_S3 a12, float a13);
+
+    [Fact]
+    public static void TestSwiftFunc87()
+    {
+        Console.Write("Running SwiftFunc87: ");
+        long result = SwiftFunc87(8841098117509422820, new F87_S0(new F87_S0_S0(2192442345186020478), 1545304, 750118731442317544, 3418050830544628), unchecked((nuint)6369165430746397674), 71, 487868533855774, -7094, 2907086057865536952, 1643866436526662, 2614039, new F87_S1(248182038), 6870063012628711946, new F87_S2(new F87_S2_S0(30623)), new F87_S3(1817616635), 3689131);
+        Assert.Equal(359195416647062356, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F88_S0
+    {
+        public byte F0;
+        public long F1;
+        public ulong F2;
+        public nint F3;
+
+        public F88_S0(byte f0, long f1, ulong f2, nint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F88_S1
+    {
+        public long F0;
+        public byte F1;
+        public ushort F2;
+
+        public F88_S1(long f0, byte f1, ushort f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F88_S2
+    {
+        public uint F0;
+
+        public F88_S2(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F88_S3_S0
+    {
+        public nint F0;
+
+        public F88_S3_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F88_S3
+    {
+        public int F0;
+        public F88_S3_S0 F1;
+        public sbyte F2;
+        public ushort F3;
+
+        public F88_S3(int f0, F88_S3_S0 f1, sbyte f2, ushort f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F88_S4_S0
+    {
+        public float F0;
+
+        public F88_S4_S0(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 36)]
+    struct F88_S4
+    {
+        public ushort F0;
+        public nuint F1;
+        public sbyte F2;
+        public nint F3;
+        public F88_S4_S0 F4;
+
+        public F88_S4(ushort f0, nuint f1, sbyte f2, nint f3, F88_S4_S0 f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F88_S5
+    {
+        public float F0;
+
+        public F88_S5(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F88_S6
+    {
+        public uint F0;
+
+        public F88_S6(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F88_S7_S0
+    {
+        public nint F0;
+
+        public F88_S7_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F88_S7
+    {
+        public F88_S7_S0 F0;
+
+        public F88_S7(F88_S7_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc882a02a12a22a32a42a52a62a72a82a93a10SiAA6F88_S0V_s4Int8VAA0Q3_S1Vs6UInt64VAA0Q3_S2VAA0Q3_S3VAA0Q3_S4Vs5Int16VAA0Q3_S5VAA0Q3_S6VAA0Q3_S7VtF")]
+    private static extern nint SwiftFunc88(F88_S0 a0, sbyte a1, F88_S1 a2, ulong a3, F88_S2 a4, F88_S3 a5, F88_S4 a6, short a7, F88_S5 a8, F88_S6 a9, F88_S7 a10);
+
+    [Fact]
+    public static void TestSwiftFunc88()
+    {
+        Console.Write("Running SwiftFunc88: ");
+        long result = SwiftFunc88(new F88_S0(66, 2515475983225256977, 8461123965387740223, unchecked((nint)6118352888016174162)), 0, new F88_S1(2355530907227990563, 120, 33210), 2006620539850377306, new F88_S2(2040050135), new F88_S3(1424272615, new F88_S3_S0(unchecked((nint)1176474304741776688)), -37, 57192), new F88_S4(57186, unchecked((nuint)3158759263845266986), 126, unchecked((nint)2352285611293949590), new F88_S4_S0(148232)), -10009, new F88_S5(6466089), new F88_S6(552549040), new F88_S7(new F88_S7_S0(unchecked((nint)4375596076925501643))));
+        Assert.Equal(-6799924240836522873, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F89_S0
+    {
+        public byte F0;
+        public sbyte F1;
+
+        public F89_S0(byte f0, sbyte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F89_S1
+    {
+        public int F0;
+
+        public F89_S1(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F89_S2
+    {
+        public ushort F0;
+
+        public F89_S2(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F89_S3
+    {
+        public double F0;
+        public double F1;
+
+        public F89_S3(double f0, double f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F89_S4
+    {
+        public uint F0;
+
+        public F89_S4(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc892a02a12a22a32a42a52a6SiAA6F89_S0V_AA0M3_S1VAA0M3_S2Vs5UInt8VAA0M3_S3VAA0M3_S4Vs5Int32VtF")]
+    private static extern nint SwiftFunc89(F89_S0 a0, F89_S1 a1, F89_S2 a2, byte a3, F89_S3 a4, F89_S4 a5, int a6);
+
+    [Fact]
+    public static void TestSwiftFunc89()
+    {
+        Console.Write("Running SwiftFunc89: ");
+        long result = SwiftFunc89(new F89_S0(3, -70), new F89_S1(1399800474), new F89_S2(4503), 65, new F89_S3(2901632902048261, 1806714347370258), new F89_S4(536267264), 1925050147);
+        Assert.Equal(-127506756024963910, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F90_S0
+    {
+        public ushort F0;
+        public nint F1;
+
+        public F90_S0(ushort f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F90_S1_S0
+    {
+        public nint F0;
+
+        public F90_S1_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F90_S1
+    {
+        public F90_S1_S0 F0;
+        public nuint F1;
+        public double F2;
+
+        public F90_S1(F90_S1_S0 f0, nuint f1, double f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F90_S2
+    {
+        public ulong F0;
+        public nint F1;
+        public ushort F2;
+
+        public F90_S2(ulong f0, nint f1, ushort f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F90_S3_S0
+    {
+        public long F0;
+
+        public F90_S3_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F90_S3
+    {
+        public F90_S3_S0 F0;
+
+        public F90_S3(F90_S3_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F90_S4
+    {
+        public long F0;
+
+        public F90_S4(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc902a02a12a22a32a42a52a62a7SiAA6F90_S0V_s4Int8VAA0N3_S1VAA0N3_S2VAA0N3_S3Vs6UInt32VAA0N3_S4Vs5UInt8VtF")]
+    private static extern nint SwiftFunc90(F90_S0 a0, sbyte a1, F90_S1 a2, F90_S2 a3, F90_S3 a4, uint a5, F90_S4 a6, byte a7);
+
+    [Fact]
+    public static void TestSwiftFunc90()
+    {
+        Console.Write("Running SwiftFunc90: ");
+        long result = SwiftFunc90(new F90_S0(50891, unchecked((nint)3526500586501844267)), 106, new F90_S1(new F90_S1_S0(unchecked((nint)1338488761303901988)), unchecked((nuint)6173879610835810848), 2724509546394616), new F90_S2(6787849318922951518, unchecked((nint)4947656706973797515), 31166), new F90_S3(new F90_S3_S0(9145287685889642436)), 126339746, new F90_S4(7529643579107652424), 32);
+        Assert.Equal(3094701713551479277, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F91_S0_S0
+    {
+        public int F0;
+
+        public F91_S0_S0(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F91_S0
+    {
+        public F91_S0_S0 F0;
+        public uint F1;
+        public nint F2;
+
+        public F91_S0(F91_S0_S0 f0, uint f1, nint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc912a02a1SiAA6F91_S0V_s5UInt8VtF")]
+    private static extern nint SwiftFunc91(F91_S0 a0, byte a1);
+
+    [Fact]
+    public static void TestSwiftFunc91()
+    {
+        Console.Write("Running SwiftFunc91: ");
+        long result = SwiftFunc91(new F91_S0(new F91_S0_S0(1253970930), 1885655301, unchecked((nint)148902531378116685)), 122);
+        Assert.Equal(887289976736078648, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F92_S0
+    {
+        public ushort F0;
+        public ushort F1;
+
+        public F92_S0(ushort f0, ushort f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F92_S1
+    {
+        public ulong F0;
+
+        public F92_S1(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F92_S2
+    {
+        public ulong F0;
+        public ulong F1;
+
+        public F92_S2(ulong f0, ulong f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F92_S3
+    {
+        public nuint F0;
+
+        public F92_S3(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc922a02a12a22a32a42a52a62a72a82a93a103a113a123a133a143a153a163a173a183a19Sis5Int16V_s6UInt64VSus5Int64VAA6F92_S0VA0_Sds5UInt8Vs4Int8Vs6UInt32VA6_AA6F92_S1VA8_SfAZA4_s5Int32VA8_AA6F92_S2VAA6F92_S3VtF")]
+    private static extern nint SwiftFunc92(short a0, ulong a1, nuint a2, long a3, F92_S0 a4, long a5, double a6, byte a7, sbyte a8, uint a9, sbyte a10, F92_S1 a11, uint a12, float a13, ulong a14, byte a15, int a16, uint a17, F92_S2 a18, F92_S3 a19);
+
+    [Fact]
+    public static void TestSwiftFunc92()
+    {
+        Console.Write("Running SwiftFunc92: ");
+        long result = SwiftFunc92(21276, 3146876064491681609, unchecked((nuint)3037098519528577447), 9061597632723103558, new F92_S0(4967, 61949), 4798856485492542774, 4305543426365472, 182, -21, 270986478, -37, new F92_S1(7527241857214360309), 1301049439, 6192745, 8959151295191616689, 19, 1578403390, 633901437, new F92_S2(4396088615663569948, 4797465448959123058), new F92_S3(unchecked((nuint)7386458829492133332)));
+        Assert.Equal(-7871787038267731510, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F93_S0
+    {
+        public int F0;
+        public nuint F1;
+        public double F2;
+
+        public F93_S0(int f0, nuint f1, double f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F93_S1
+    {
+        public uint F0;
+
+        public F93_S1(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F93_S2
+    {
+        public double F0;
+
+        public F93_S2(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc932a02a12a22a32a4SiAA6F93_S0V_SiAA0K3_S1VSdAA0K3_S2VtF")]
+    private static extern nint SwiftFunc93(F93_S0 a0, nint a1, F93_S1 a2, double a3, F93_S2 a4);
+
+    [Fact]
+    public static void TestSwiftFunc93()
+    {
+        Console.Write("Running SwiftFunc93: ");
+        long result = SwiftFunc93(new F93_S0(982459422, unchecked((nuint)1427174739694078549), 2736620007792094), unchecked((nint)5873331022463084971), new F93_S1(1169579606), 2110866269939297, new F93_S2(2364749142642625));
+        Assert.Equal(432632740260631481, result);
+        Console.WriteLine("OK");
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc942a02a12a22a3Sis6UInt64V_s5Int32VAHs5Int64VtF")]
+    private static extern nint SwiftFunc94(ulong a0, int a1, ulong a2, long a3);
+
+    [Fact]
+    public static void TestSwiftFunc94()
+    {
+        Console.Write("Running SwiftFunc94: ");
+        long result = SwiftFunc94(2878691982818555531, 580037131, 3143309402030542876, 3739683344990129550);
+        Assert.Equal(-330124951832302022, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F95_S0
+    {
+        public long F0;
+
+        public F95_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc952a02a1SiAA6F95_S0V_s5Int64VtF")]
+    private static extern nint SwiftFunc95(F95_S0 a0, long a1);
+
+    [Fact]
+    public static void TestSwiftFunc95()
+    {
+        Console.Write("Running SwiftFunc95: ");
+        long result = SwiftFunc95(new F95_S0(7113705515120682426), 2532424238121218748);
+        Assert.Equal(-5365348133343237200, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F96_S0_S0
+    {
+        public nint F0;
+
+        public F96_S0_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F96_S0
+    {
+        public ulong F0;
+        public double F1;
+        public double F2;
+        public F96_S0_S0 F3;
+
+        public F96_S0(ulong f0, double f1, double f2, F96_S0_S0 f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F96_S1_S0_S0
+    {
+        public double F0;
+
+        public F96_S1_S0_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F96_S1_S0
+    {
+        public F96_S1_S0_S0 F0;
+
+        public F96_S1_S0(F96_S1_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F96_S1
+    {
+        public F96_S1_S0 F0;
+
+        public F96_S1(F96_S1_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F96_S2
+    {
+        public byte F0;
+        public float F1;
+
+        public F96_S2(byte f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F96_S3
+    {
+        public ushort F0;
+
+        public F96_S3(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F96_S4
+    {
+        public nint F0;
+
+        public F96_S4(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F96_S5_S0
+    {
+        public byte F0;
+
+        public F96_S5_S0(byte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F96_S5
+    {
+        public F96_S5_S0 F0;
+
+        public F96_S5(F96_S5_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F96_S6
+    {
+        public ulong F0;
+
+        public F96_S6(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc962a02a12a22a32a42a52a62a72a82a93a103a113a123a133a143a153a163a173a18Sis6UInt16V_AA6F96_S0VAA0Z3_S1VAA0Z3_S2VAWs6UInt64VSis5Int32Vs5Int16VSuAA0Z3_S3VA7_Sis4Int8VA5_s6UInt32VAA0Z3_S4VAA0Z3_S5VAA0Z3_S6VtF")]
+    private static extern nint SwiftFunc96(ushort a0, F96_S0 a1, F96_S1 a2, F96_S2 a3, ushort a4, ulong a5, nint a6, int a7, short a8, nuint a9, F96_S3 a10, short a11, nint a12, sbyte a13, int a14, uint a15, F96_S4 a16, F96_S5 a17, F96_S6 a18);
+
+    [Fact]
+    public static void TestSwiftFunc96()
+    {
+        Console.Write("Running SwiftFunc96: ");
+        long result = SwiftFunc96(21321, new F96_S0(3140378485759721513, 3334385568992933, 2434271617187235, new F96_S0_S0(unchecked((nint)6455348790423327394))), new F96_S1(new F96_S1_S0(new F96_S1_S0_S0(2421227444572952))), new F96_S2(72, 1265762), 13171, 4895217822310904030, unchecked((nint)5923562627585381292), 1083710828, 12717, unchecked((nuint)8000948766038488291), new F96_S3(43225), -19602, unchecked((nint)248571613858478112), 17, 514773482, 1555810858, new F96_S4(unchecked((nint)5975988026010739585)), new F96_S5(new F96_S5_S0(231)), new F96_S6(4299230038366602170));
+        Assert.Equal(-9154394486464436217, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F97_S0
+    {
+        public float F0;
+        public float F1;
+        public nint F2;
+        public nint F3;
+        public nint F4;
+
+        public F97_S0(float f0, float f1, nint f2, nint f3, nint f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc972a02a12a22a32a42a52a62a7Sis4Int8V_s5Int32Vs5UInt8Vs6UInt32VApA6F97_S0VALSitF")]
+    private static extern nint SwiftFunc97(sbyte a0, int a1, byte a2, uint a3, byte a4, F97_S0 a5, sbyte a6, nint a7);
+
+    [Fact]
+    public static void TestSwiftFunc97()
+    {
+        Console.Write("Running SwiftFunc97: ");
+        long result = SwiftFunc97(-90, 2040542494, 255, 990214241, 129, new F97_S0(3372147, 5204115, unchecked((nint)4061871110726583367), unchecked((nint)5498225315328650601), unchecked((nint)4096658558391048200)), -91, unchecked((nint)8125330763927981736));
+        Assert.Equal(-4028368897548286667, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F98_S0_S0_S0
+    {
+        public float F0;
+
+        public F98_S0_S0_S0(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F98_S0_S0
+    {
+        public nuint F0;
+        public F98_S0_S0_S0 F1;
+
+        public F98_S0_S0(nuint f0, F98_S0_S0_S0 f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F98_S0
+    {
+        public long F0;
+        public F98_S0_S0 F1;
+        public nuint F2;
+
+        public F98_S0(long f0, F98_S0_S0 f1, nuint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc982a02a12a22a32a42a5SiAA6F98_S0V_s6UInt16VALs5Int16Vs4Int8Vs6UInt32VtF")]
+    private static extern nint SwiftFunc98(F98_S0 a0, ushort a1, ushort a2, short a3, sbyte a4, uint a5);
+
+    [Fact]
+    public static void TestSwiftFunc98()
+    {
+        Console.Write("Running SwiftFunc98: ");
+        long result = SwiftFunc98(new F98_S0(3497167808648160462, new F98_S0_S0(unchecked((nuint)2747735625017321807), new F98_S0_S0_S0(4681050)), unchecked((nuint)3446511732552970390)), 61052, 18880, -20869, 35, 1056152744);
+        Assert.Equal(7350111494379160095, result);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F99_S0
+    {
+        public ulong F0;
+        public ushort F1;
+        public float F2;
+        public ulong F3;
+
+        public F99_S0(ulong f0, ushort f1, float f2, ulong f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F99_S1_S0
+    {
+        public uint F0;
+
+        public F99_S1_S0(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F99_S1
+    {
+        public F99_S1_S0 F0;
+
+        public F99_S1(F99_S1_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s14SwiftAbiStress11swiftFunc992a02a12a22a3SiAA6F99_S0V_s4Int8VAA0J3_S1Vs5Int64VtF")]
+    private static extern nint SwiftFunc99(F99_S0 a0, sbyte a1, F99_S1 a2, long a3);
+
+    [Fact]
+    public static void TestSwiftFunc99()
+    {
+        Console.Write("Running SwiftFunc99: ");
+        long result = SwiftFunc99(new F99_S0(1210929052346596858, 3796, 3904675, 8849045203219202310), 97, new F99_S1(new F99_S1_S0(498956895)), 241968587946267390);
+        Assert.Equal(7941122870613797512, result);
+        Console.WriteLine("OK");
+    }
+
+}
diff --git a/src/tests/Interop/Swift/SwiftAbiStress/SwiftAbiStress.csproj b/src/tests/Interop/Swift/SwiftAbiStress/SwiftAbiStress.csproj
new file mode 100644
index 000000000000..a57cd84cf884
--- /dev/null
+++ b/src/tests/Interop/Swift/SwiftAbiStress/SwiftAbiStress.csproj
@@ -0,0 +1,16 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CLRTestTargetUnsupported, CMakeProjectReference -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/93631 -->
+    <CLRTestTargetUnsupported Condition="'$(TargetsOSX)' != 'true' and '$(TargetsAppleMobile)' != 'true'">true</CLRTestTargetUnsupported>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="$(TestLibraryProjectPath)" />
+    <CMakeProjectReference Include="CMakeLists.txt" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/Interop/Swift/SwiftAbiStress/SwiftAbiStress.swift b/src/tests/Interop/Swift/SwiftAbiStress/SwiftAbiStress.swift
new file mode 100644
index 000000000000..081cee599370
--- /dev/null
+++ b/src/tests/Interop/Swift/SwiftAbiStress/SwiftAbiStress.swift
@@ -0,0 +1,4662 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+import Foundation
+
+struct HasherFNV1a {
+
+    private var hash: UInt = 14_695_981_039_346_656_037
+    private let prime: UInt = 1_099_511_628_211
+
+    mutating func combine<T>(_ val: T) {
+        for byte in withUnsafeBytes(of: val, Array.init) {
+            hash ^= UInt(byte)
+            hash = hash &* prime
+        }
+    }
+
+    func finalize() -> Int {
+        Int(truncatingIfNeeded: hash)
+    }
+}
+
+@frozen
+public struct F0_S0
+{
+    public let f0 : Double;
+    public let f1 : UInt32;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F0_S1
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F0_S2
+{
+    public let f0 : Float;
+}
+
+public func swiftFunc0(a0: Int16, a1: Int32, a2: UInt64, a3: UInt16, a4: F0_S0, a5: F0_S1, a6: UInt8, a7: F0_S2) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a4.f2);
+    hasher.combine(a5.f0);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F1_S0
+{
+    public let f0 : Int64;
+    public let f1 : Double;
+    public let f2 : Int8;
+    public let f3 : Int32;
+    public let f4 : UInt16;
+}
+
+@frozen
+public struct F1_S1
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F1_S2
+{
+    public let f0 : Int16;
+}
+
+public func swiftFunc1(a0: F1_S0, a1: UInt8, a2: F1_S1, a3: F1_S2) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a0.f4);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a3.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F2_S0
+{
+    public let f0 : Int;
+    public let f1 : UInt;
+}
+
+@frozen
+public struct F2_S1
+{
+    public let f0 : Int64;
+    public let f1 : Int32;
+    public let f2 : Int16;
+    public let f3 : Int64;
+    public let f4 : UInt16;
+}
+
+@frozen
+public struct F2_S2_S0_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F2_S2_S0
+{
+    public let f0 : F2_S2_S0_S0;
+}
+
+@frozen
+public struct F2_S2
+{
+    public let f0 : F2_S2_S0;
+}
+
+@frozen
+public struct F2_S3
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F2_S4
+{
+    public let f0 : Int32;
+    public let f1 : UInt;
+}
+
+@frozen
+public struct F2_S5
+{
+    public let f0 : Float;
+}
+
+public func swiftFunc2(a0: Int64, a1: Int16, a2: Int32, a3: F2_S0, a4: UInt8, a5: Int32, a6: F2_S1, a7: F2_S2, a8: UInt16, a9: Float, a10: F2_S3, a11: F2_S4, a12: F2_S5, a13: Int64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a6.f2);
+    hasher.combine(a6.f3);
+    hasher.combine(a6.f4);
+    hasher.combine(a7.f0.f0.f0);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10.f0);
+    hasher.combine(a11.f0);
+    hasher.combine(a11.f1);
+    hasher.combine(a12.f0);
+    hasher.combine(a13);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F3_S0_S0
+{
+    public let f0 : Int;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F3_S0
+{
+    public let f0 : Int8;
+    public let f1 : F3_S0_S0;
+    public let f2 : UInt32;
+}
+
+@frozen
+public struct F3_S1
+{
+    public let f0 : Int64;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F3_S2
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F3_S3
+{
+    public let f0 : UInt8;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F3_S4
+{
+    public let f0 : UInt;
+    public let f1 : Float;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F3_S5
+{
+    public let f0 : UInt32;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct F3_S6_S0
+{
+    public let f0 : Int16;
+    public let f1 : UInt8;
+}
+
+@frozen
+public struct F3_S6
+{
+    public let f0 : F3_S6_S0;
+    public let f1 : Int8;
+    public let f2 : UInt8;
+}
+
+@frozen
+public struct F3_S7
+{
+    public let f0 : UInt64;
+}
+
+public func swiftFunc3(a0: Int, a1: F3_S0, a2: F3_S1, a3: Double, a4: Int, a5: F3_S2, a6: F3_S3, a7: F3_S4, a8: F3_S5, a9: UInt16, a10: Int32, a11: F3_S6, a12: Int, a13: F3_S7) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1.f0);
+    hasher.combine(a1.f1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5.f0);
+    hasher.combine(a6.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a7.f0);
+    hasher.combine(a7.f1);
+    hasher.combine(a7.f2);
+    hasher.combine(a8.f0);
+    hasher.combine(a8.f1);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    hasher.combine(a11.f0.f0);
+    hasher.combine(a11.f0.f1);
+    hasher.combine(a11.f1);
+    hasher.combine(a11.f2);
+    hasher.combine(a12);
+    hasher.combine(a13.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F4_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int16;
+    public let f2 : Int16;
+}
+
+@frozen
+public struct F4_S1_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F4_S1
+{
+    public let f0 : F4_S1_S0;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F4_S2_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F4_S2
+{
+    public let f0 : F4_S2_S0;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F4_S3
+{
+    public let f0 : UInt64;
+    public let f1 : UInt64;
+    public let f2 : Int64;
+}
+
+public func swiftFunc4(a0: Int, a1: F4_S0, a2: UInt, a3: UInt64, a4: Int8, a5: Double, a6: F4_S1, a7: UInt8, a8: Int32, a9: UInt32, a10: UInt64, a11: F4_S2, a12: Int16, a13: Int, a14: F4_S3, a15: UInt32) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6.f0.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    hasher.combine(a11.f0.f0);
+    hasher.combine(a11.f1);
+    hasher.combine(a12);
+    hasher.combine(a13);
+    hasher.combine(a14.f0);
+    hasher.combine(a14.f1);
+    hasher.combine(a14.f2);
+    hasher.combine(a15);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F5_S0
+{
+    public let f0 : UInt;
+}
+
+public func swiftFunc5(a0: UInt, a1: UInt64, a2: UInt8, a3: F5_S0) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F6_S0
+{
+    public let f0 : Int32;
+    public let f1 : Int;
+    public let f2 : UInt8;
+}
+
+@frozen
+public struct F6_S1
+{
+    public let f0 : Int;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F6_S2_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F6_S2
+{
+    public let f0 : F6_S2_S0;
+    public let f1 : UInt16;
+}
+
+@frozen
+public struct F6_S3
+{
+    public let f0 : Double;
+    public let f1 : Double;
+    public let f2 : UInt64;
+}
+
+@frozen
+public struct F6_S4
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F6_S5
+{
+    public let f0 : Int16;
+}
+
+public func swiftFunc6(a0: Int64, a1: F6_S0, a2: F6_S1, a3: UInt, a4: UInt8, a5: Int32, a6: F6_S2, a7: Float, a8: Int16, a9: F6_S3, a10: UInt16, a11: Double, a12: UInt32, a13: F6_S4, a14: F6_S5) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6.f0.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9.f0);
+    hasher.combine(a9.f1);
+    hasher.combine(a9.f2);
+    hasher.combine(a10);
+    hasher.combine(a11);
+    hasher.combine(a12);
+    hasher.combine(a13.f0);
+    hasher.combine(a14.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F7_S0
+{
+    public let f0 : Int16;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F7_S1
+{
+    public let f0 : UInt8;
+}
+
+public func swiftFunc7(a0: Int64, a1: Int, a2: UInt8, a3: F7_S0, a4: F7_S1, a5: UInt32) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a4.f0);
+    hasher.combine(a5);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F8_S0
+{
+    public let f0 : Int32;
+}
+
+public func swiftFunc8(a0: UInt16, a1: UInt, a2: UInt16, a3: UInt64, a4: F8_S0, a5: UInt64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a5);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F9_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F9_S1
+{
+    public let f0 : Int32;
+}
+
+public func swiftFunc9(a0: Int64, a1: Float, a2: F9_S0, a3: UInt16, a4: F9_S1, a5: UInt16) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a5);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F10_S0
+{
+    public let f0 : Int64;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F10_S1
+{
+    public let f0 : Float;
+    public let f1 : UInt8;
+    public let f2 : UInt;
+}
+
+@frozen
+public struct F10_S2
+{
+    public let f0 : UInt;
+    public let f1 : UInt64;
+}
+
+@frozen
+public struct F10_S3
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F10_S4
+{
+    public let f0 : Int64;
+}
+
+public func swiftFunc10(a0: UInt16, a1: UInt16, a2: F10_S0, a3: UInt64, a4: Float, a5: Int8, a6: Int64, a7: UInt64, a8: Int64, a9: Float, a10: Int32, a11: Int32, a12: Int64, a13: UInt64, a14: F10_S1, a15: Int64, a16: F10_S2, a17: F10_S3, a18: F10_S4) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    hasher.combine(a11);
+    hasher.combine(a12);
+    hasher.combine(a13);
+    hasher.combine(a14.f0);
+    hasher.combine(a14.f1);
+    hasher.combine(a14.f2);
+    hasher.combine(a15);
+    hasher.combine(a16.f0);
+    hasher.combine(a16.f1);
+    hasher.combine(a17.f0);
+    hasher.combine(a18.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F11_S0
+{
+    public let f0 : Int16;
+    public let f1 : Int8;
+    public let f2 : UInt64;
+    public let f3 : Int16;
+}
+
+@frozen
+public struct F11_S1
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F11_S2
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F11_S3_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F11_S3
+{
+    public let f0 : F11_S3_S0;
+}
+
+public func swiftFunc11(a0: Int, a1: UInt64, a2: UInt8, a3: Int16, a4: F11_S0, a5: F11_S1, a6: UInt16, a7: Double, a8: Int, a9: UInt32, a10: F11_S2, a11: F11_S3, a12: Int8) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a4.f2);
+    hasher.combine(a4.f3);
+    hasher.combine(a5.f0);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10.f0);
+    hasher.combine(a11.f0.f0);
+    hasher.combine(a12);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F12_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F12_S1
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F12_S2
+{
+    public let f0 : UInt;
+}
+
+public func swiftFunc12(a0: UInt8, a1: Int32, a2: F12_S0, a3: Int8, a4: F12_S1, a5: F12_S2, a6: UInt32, a7: Int16, a8: Int8, a9: Int8, a10: UInt32, a11: UInt8) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a5.f0);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    hasher.combine(a11);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F13_S0_S0_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F13_S0_S0
+{
+    public let f0 : F13_S0_S0_S0;
+}
+
+@frozen
+public struct F13_S0
+{
+    public let f0 : Int8;
+    public let f1 : F13_S0_S0;
+}
+
+@frozen
+public struct F13_S1_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F13_S1
+{
+    public let f0 : F13_S1_S0;
+}
+
+public func swiftFunc13(a0: Int8, a1: Double, a2: F13_S0, a3: F13_S1, a4: Int8, a5: Double) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1.f0.f0);
+    hasher.combine(a3.f0.f0);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F14_S0
+{
+    public let f0 : Int;
+}
+
+public func swiftFunc14(a0: Int8, a1: Int, a2: F14_S0, a3: Float, a4: UInt) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F15_S0
+{
+    public let f0 : Float;
+    public let f1 : Int16;
+    public let f2 : UInt8;
+    public let f3 : Int64;
+    public let f4 : Double;
+}
+
+@frozen
+public struct F15_S1_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F15_S1
+{
+    public let f0 : UInt32;
+    public let f1 : F15_S1_S0;
+    public let f2 : UInt;
+    public let f3 : Int32;
+}
+
+public func swiftFunc15(a0: F15_S0, a1: UInt64, a2: UInt32, a3: UInt, a4: UInt64, a5: Int16, a6: F15_S1, a7: Int64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a0.f4);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6.f0);
+    hasher.combine(a6.f1.f0);
+    hasher.combine(a6.f2);
+    hasher.combine(a6.f3);
+    hasher.combine(a7);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F16_S0_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F16_S0
+{
+    public let f0 : Int;
+    public let f1 : Int;
+    public let f2 : F16_S0_S0;
+}
+
+@frozen
+public struct F16_S1
+{
+    public let f0 : Int16;
+    public let f1 : UInt64;
+    public let f2 : UInt32;
+}
+
+@frozen
+public struct F16_S2
+{
+    public let f0 : UInt8;
+    public let f1 : UInt64;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F16_S3
+{
+    public let f0 : Int32;
+}
+
+public func swiftFunc16(a0: UInt64, a1: F16_S0, a2: F16_S1, a3: UInt16, a4: Int16, a5: F16_S2, a6: F16_S3) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2.f0);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5.f0);
+    hasher.combine(a5.f1);
+    hasher.combine(a5.f2);
+    hasher.combine(a6.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F17_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F17_S1
+{
+    public let f0 : Int64;
+    public let f1 : UInt;
+    public let f2 : UInt64;
+}
+
+@frozen
+public struct F17_S2
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F17_S3
+{
+    public let f0 : Int8;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F17_S4
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F17_S5
+{
+    public let f0 : Int64;
+}
+
+public func swiftFunc17(a0: F17_S0, a1: Int8, a2: F17_S1, a3: Int8, a4: UInt, a5: F17_S2, a6: Int64, a7: F17_S3, a8: F17_S4, a9: F17_S5) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5.f0);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    hasher.combine(a7.f1);
+    hasher.combine(a8.f0);
+    hasher.combine(a9.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F18_S0_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int16;
+}
+
+@frozen
+public struct F18_S0
+{
+    public let f0 : UInt32;
+    public let f1 : F18_S0_S0;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F18_S1
+{
+    public let f0 : Int;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F18_S2_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F18_S2
+{
+    public let f0 : UInt64;
+    public let f1 : Int64;
+    public let f2 : UInt8;
+    public let f3 : F18_S2_S0;
+}
+
+public func swiftFunc18(a0: UInt8, a1: Double, a2: F18_S0, a3: F18_S1, a4: UInt16, a5: Int64, a6: UInt64, a7: F18_S2, a8: UInt64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1.f0);
+    hasher.combine(a2.f1.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    hasher.combine(a7.f1);
+    hasher.combine(a7.f2);
+    hasher.combine(a7.f3.f0);
+    hasher.combine(a8);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F19_S0
+{
+    public let f0 : Int;
+    public let f1 : Double;
+    public let f2 : UInt16;
+}
+
+public func swiftFunc19(a0: UInt, a1: F19_S0, a2: Int16) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a2);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F20_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int8;
+    public let f2 : UInt64;
+    public let f3 : UInt32;
+    public let f4 : UInt64;
+}
+
+@frozen
+public struct F20_S1
+{
+    public let f0 : Int64;
+}
+
+public func swiftFunc20(a0: Int8, a1: F20_S0, a2: UInt64, a3: Int, a4: F20_S1, a5: UInt8, a6: Int64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a1.f3);
+    hasher.combine(a1.f4);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F21_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F21_S1
+{
+    public let f0 : Int;
+    public let f1 : UInt32;
+    public let f2 : UInt8;
+    public let f3 : Int16;
+}
+
+@frozen
+public struct F21_S2
+{
+    public let f0 : Int8;
+    public let f1 : UInt64;
+    public let f2 : Int64;
+    public let f3 : UInt8;
+}
+
+@frozen
+public struct F21_S3
+{
+    public let f0 : Double;
+    public let f1 : Int;
+}
+
+public func swiftFunc21(a0: UInt64, a1: Int8, a2: UInt, a3: Double, a4: Float, a5: Int, a6: F21_S0, a7: F21_S1, a8: UInt16, a9: F21_S2, a10: UInt8, a11: F21_S3, a12: Int16) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6.f0);
+    hasher.combine(a7.f0);
+    hasher.combine(a7.f1);
+    hasher.combine(a7.f2);
+    hasher.combine(a7.f3);
+    hasher.combine(a8);
+    hasher.combine(a9.f0);
+    hasher.combine(a9.f1);
+    hasher.combine(a9.f2);
+    hasher.combine(a9.f3);
+    hasher.combine(a10);
+    hasher.combine(a11.f0);
+    hasher.combine(a11.f1);
+    hasher.combine(a12);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F22_S0
+{
+    public let f0 : UInt16;
+    public let f1 : UInt32;
+    public let f2 : Int16;
+    public let f3 : Float;
+}
+
+@frozen
+public struct F22_S1
+{
+    public let f0 : UInt16;
+    public let f1 : Int8;
+    public let f2 : UInt8;
+    public let f3 : Int;
+    public let f4 : Int;
+}
+
+@frozen
+public struct F22_S2_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F22_S2
+{
+    public let f0 : Int32;
+    public let f1 : Int32;
+    public let f2 : UInt32;
+    public let f3 : UInt8;
+    public let f4 : F22_S2_S0;
+}
+
+@frozen
+public struct F22_S3
+{
+    public let f0 : Int16;
+    public let f1 : Double;
+    public let f2 : Double;
+    public let f3 : Int32;
+}
+
+public func swiftFunc22(a0: Int8, a1: Int32, a2: F22_S0, a3: F22_S1, a4: F22_S2, a5: UInt64, a6: F22_S3, a7: UInt) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a2.f3);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a3.f2);
+    hasher.combine(a3.f3);
+    hasher.combine(a3.f4);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a4.f2);
+    hasher.combine(a4.f3);
+    hasher.combine(a4.f4.f0);
+    hasher.combine(a5);
+    hasher.combine(a6.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a6.f2);
+    hasher.combine(a6.f3);
+    hasher.combine(a7);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F23_S0
+{
+    public let f0 : UInt32;
+    public let f1 : Int16;
+}
+
+@frozen
+public struct F23_S1
+{
+    public let f0 : UInt;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F23_S2
+{
+    public let f0 : Double;
+    public let f1 : UInt32;
+    public let f2 : Int32;
+    public let f3 : UInt8;
+}
+
+public func swiftFunc23(a0: F23_S0, a1: F23_S1, a2: F23_S2, a3: Double, a4: UInt64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a2.f3);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F24_S0
+{
+    public let f0 : Int8;
+    public let f1 : Int32;
+}
+
+@frozen
+public struct F24_S1
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F24_S2
+{
+    public let f0 : UInt16;
+    public let f1 : Int16;
+    public let f2 : Double;
+    public let f3 : UInt;
+}
+
+@frozen
+public struct F24_S3
+{
+    public let f0 : Int;
+}
+
+public func swiftFunc24(a0: F24_S0, a1: F24_S1, a2: F24_S2, a3: F24_S3, a4: UInt, a5: UInt32) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a1.f0);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a2.f3);
+    hasher.combine(a3.f0);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F25_S0_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F25_S0
+{
+    public let f0 : Float;
+    public let f1 : F25_S0_S0;
+    public let f2 : UInt32;
+}
+
+@frozen
+public struct F25_S1
+{
+    public let f0 : Int16;
+    public let f1 : Int8;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F25_S2
+{
+    public let f0 : Int64;
+    public let f1 : UInt16;
+}
+
+@frozen
+public struct F25_S3
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F25_S4
+{
+    public let f0 : UInt16;
+}
+
+public func swiftFunc25(a0: Float, a1: F25_S0, a2: Int64, a3: UInt8, a4: F25_S1, a5: Int, a6: F25_S2, a7: Int32, a8: Int32, a9: UInt, a10: UInt64, a11: F25_S3, a12: F25_S4) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1.f0);
+    hasher.combine(a1.f2);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a4.f2);
+    hasher.combine(a5);
+    hasher.combine(a6.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    hasher.combine(a11.f0);
+    hasher.combine(a12.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F26_S0
+{
+    public let f0 : Double;
+}
+
+public func swiftFunc26(a0: UInt16, a1: Double, a2: Int64, a3: F26_S0, a4: UInt8) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a4);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F27_S0_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F27_S0
+{
+    public let f0 : UInt16;
+    public let f1 : F27_S0_S0;
+    public let f2 : Double;
+}
+
+@frozen
+public struct F27_S1
+{
+    public let f0 : Int;
+    public let f1 : Int8;
+    public let f2 : Int16;
+    public let f3 : UInt8;
+}
+
+@frozen
+public struct F27_S2
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F27_S3
+{
+    public let f0 : UInt64;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F27_S4
+{
+    public let f0 : UInt8;
+}
+
+public func swiftFunc27(a0: F27_S0, a1: Double, a2: Double, a3: Int8, a4: Int8, a5: F27_S1, a6: Int16, a7: F27_S2, a8: Int8, a9: UInt16, a10: F27_S3, a11: F27_S4, a12: UInt32) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1.f0);
+    hasher.combine(a0.f2);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5.f0);
+    hasher.combine(a5.f1);
+    hasher.combine(a5.f2);
+    hasher.combine(a5.f3);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10.f0);
+    hasher.combine(a10.f1);
+    hasher.combine(a11.f0);
+    hasher.combine(a12);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F28_S0
+{
+    public let f0 : Double;
+    public let f1 : Int16;
+    public let f2 : Double;
+    public let f3 : UInt64;
+}
+
+@frozen
+public struct F28_S1
+{
+    public let f0 : Int;
+    public let f1 : UInt32;
+    public let f2 : UInt64;
+    public let f3 : Float;
+}
+
+@frozen
+public struct F28_S2
+{
+    public let f0 : Double;
+    public let f1 : UInt64;
+}
+
+@frozen
+public struct F28_S3
+{
+    public let f0 : Int16;
+    public let f1 : UInt64;
+    public let f2 : Double;
+    public let f3 : Int32;
+}
+
+@frozen
+public struct F28_S4
+{
+    public let f0 : Int;
+}
+
+public func swiftFunc28(a0: UInt8, a1: UInt16, a2: F28_S0, a3: F28_S1, a4: F28_S2, a5: UInt64, a6: Int32, a7: Int64, a8: Double, a9: UInt16, a10: F28_S3, a11: F28_S4, a12: Float) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a2.f3);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a3.f2);
+    hasher.combine(a3.f3);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10.f0);
+    hasher.combine(a10.f1);
+    hasher.combine(a10.f2);
+    hasher.combine(a10.f3);
+    hasher.combine(a11.f0);
+    hasher.combine(a12);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F29_S0
+{
+    public let f0 : Int32;
+    public let f1 : Float;
+    public let f2 : Int16;
+}
+
+@frozen
+public struct F29_S1
+{
+    public let f0 : Int16;
+    public let f1 : Int8;
+    public let f2 : UInt;
+}
+
+@frozen
+public struct F29_S2
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F29_S3
+{
+    public let f0 : Int64;
+    public let f1 : Int64;
+}
+
+public func swiftFunc29(a0: Int8, a1: F29_S0, a2: Int32, a3: UInt, a4: F29_S1, a5: UInt64, a6: F29_S2, a7: Int16, a8: Int64, a9: UInt32, a10: UInt64, a11: Int, a12: F29_S3, a13: UInt8, a14: Int8, a15: Double) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a4.f2);
+    hasher.combine(a5);
+    hasher.combine(a6.f0);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    hasher.combine(a11);
+    hasher.combine(a12.f0);
+    hasher.combine(a12.f1);
+    hasher.combine(a13);
+    hasher.combine(a14);
+    hasher.combine(a15);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F30_S0
+{
+    public let f0 : UInt;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F30_S1
+{
+    public let f0 : UInt64;
+    public let f1 : UInt8;
+    public let f2 : Double;
+    public let f3 : Int;
+}
+
+@frozen
+public struct F30_S2_S0
+{
+    public let f0 : Int16;
+    public let f1 : Int16;
+}
+
+@frozen
+public struct F30_S2_S1
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F30_S2
+{
+    public let f0 : F30_S2_S0;
+    public let f1 : F30_S2_S1;
+}
+
+@frozen
+public struct F30_S3
+{
+    public let f0 : Int8;
+    public let f1 : UInt8;
+    public let f2 : UInt64;
+    public let f3 : UInt32;
+}
+
+@frozen
+public struct F30_S4
+{
+    public let f0 : UInt16;
+}
+
+public func swiftFunc30(a0: UInt16, a1: Int16, a2: UInt16, a3: F30_S0, a4: F30_S1, a5: F30_S2, a6: UInt64, a7: Int32, a8: UInt, a9: F30_S3, a10: UInt16, a11: F30_S4, a12: Int8) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a4.f2);
+    hasher.combine(a4.f3);
+    hasher.combine(a5.f0.f0);
+    hasher.combine(a5.f0.f1);
+    hasher.combine(a5.f1.f0);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9.f0);
+    hasher.combine(a9.f1);
+    hasher.combine(a9.f2);
+    hasher.combine(a9.f3);
+    hasher.combine(a10);
+    hasher.combine(a11.f0);
+    hasher.combine(a12);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F31_S0
+{
+    public let f0 : Int;
+    public let f1 : Float;
+    public let f2 : UInt32;
+    public let f3 : Int;
+}
+
+public func swiftFunc31(a0: Int64, a1: F31_S0, a2: UInt32, a3: UInt64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a1.f3);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F32_S0
+{
+    public let f0 : Int16;
+    public let f1 : Float;
+    public let f2 : Int64;
+}
+
+@frozen
+public struct F32_S1_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F32_S1
+{
+    public let f0 : UInt8;
+    public let f1 : F32_S1_S0;
+}
+
+@frozen
+public struct F32_S2
+{
+    public let f0 : UInt32;
+    public let f1 : UInt8;
+    public let f2 : UInt;
+}
+
+@frozen
+public struct F32_S3_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F32_S3
+{
+    public let f0 : UInt64;
+    public let f1 : F32_S3_S0;
+    public let f2 : UInt64;
+}
+
+@frozen
+public struct F32_S4
+{
+    public let f0 : Double;
+    public let f1 : Int64;
+    public let f2 : Int64;
+    public let f3 : Float;
+}
+
+public func swiftFunc32(a0: UInt64, a1: F32_S0, a2: Double, a3: F32_S1, a4: F32_S2, a5: UInt64, a6: Float, a7: F32_S3, a8: F32_S4, a9: UInt32, a10: Int16) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1.f0);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a4.f2);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    hasher.combine(a7.f1.f0);
+    hasher.combine(a7.f2);
+    hasher.combine(a8.f0);
+    hasher.combine(a8.f1);
+    hasher.combine(a8.f2);
+    hasher.combine(a8.f3);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F33_S0
+{
+    public let f0 : Int8;
+    public let f1 : UInt8;
+}
+
+@frozen
+public struct F33_S1
+{
+    public let f0 : UInt16;
+    public let f1 : UInt8;
+    public let f2 : Int64;
+}
+
+@frozen
+public struct F33_S2_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F33_S2
+{
+    public let f0 : F33_S2_S0;
+    public let f1 : UInt;
+    public let f2 : Float;
+    public let f3 : Double;
+    public let f4 : UInt16;
+}
+
+@frozen
+public struct F33_S3
+{
+    public let f0 : UInt;
+}
+
+public func swiftFunc33(a0: Float, a1: F33_S0, a2: UInt64, a3: Int64, a4: F33_S1, a5: UInt16, a6: UInt, a7: UInt16, a8: F33_S2, a9: F33_S3, a10: Int) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a4.f2);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8.f0.f0);
+    hasher.combine(a8.f1);
+    hasher.combine(a8.f2);
+    hasher.combine(a8.f3);
+    hasher.combine(a8.f4);
+    hasher.combine(a9.f0);
+    hasher.combine(a10);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F34_S0
+{
+    public let f0 : UInt8;
+}
+
+public func swiftFunc34(a0: Int64, a1: F34_S0, a2: UInt, a3: UInt, a4: UInt8, a5: Double) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F35_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F35_S1_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int8;
+}
+
+@frozen
+public struct F35_S1
+{
+    public let f0 : Int64;
+    public let f1 : F35_S1_S0;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F35_S2
+{
+    public let f0 : UInt64;
+    public let f1 : Int8;
+    public let f2 : UInt32;
+    public let f3 : Int64;
+}
+
+@frozen
+public struct F35_S3_S0_S0
+{
+    public let f0 : UInt32;
+    public let f1 : UInt8;
+}
+
+@frozen
+public struct F35_S3_S0
+{
+    public let f0 : UInt16;
+    public let f1 : F35_S3_S0_S0;
+    public let f2 : Double;
+}
+
+@frozen
+public struct F35_S3
+{
+    public let f0 : F35_S3_S0;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F35_S4
+{
+    public let f0 : Float;
+}
+
+public func swiftFunc35(a0: UInt8, a1: F35_S0, a2: UInt8, a3: UInt8, a4: F35_S1, a5: Int32, a6: F35_S2, a7: Int, a8: UInt32, a9: F35_S3, a10: F35_S4) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1.f0);
+    hasher.combine(a4.f1.f1);
+    hasher.combine(a4.f2);
+    hasher.combine(a5);
+    hasher.combine(a6.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a6.f2);
+    hasher.combine(a6.f3);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9.f0.f0);
+    hasher.combine(a9.f0.f1.f0);
+    hasher.combine(a9.f0.f1.f1);
+    hasher.combine(a9.f0.f2);
+    hasher.combine(a9.f1);
+    hasher.combine(a10.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F36_S0
+{
+    public let f0 : UInt64;
+    public let f1 : Int8;
+}
+
+@frozen
+public struct F36_S1
+{
+    public let f0 : Int64;
+    public let f1 : UInt;
+    public let f2 : Int;
+    public let f3 : Int32;
+}
+
+@frozen
+public struct F36_S2
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F36_S3_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F36_S3
+{
+    public let f0 : Int64;
+    public let f1 : Int8;
+    public let f2 : F36_S3_S0;
+}
+
+@frozen
+public struct F36_S4
+{
+    public let f0 : UInt;
+    public let f1 : Int64;
+    public let f2 : Double;
+    public let f3 : Double;
+}
+
+@frozen
+public struct F36_S5
+{
+    public let f0 : UInt8;
+    public let f1 : UInt8;
+}
+
+@frozen
+public struct F36_S6
+{
+    public let f0 : UInt16;
+}
+
+public func swiftFunc36(a0: F36_S0, a1: Double, a2: UInt64, a3: F36_S1, a4: F36_S2, a5: F36_S3, a6: F36_S4, a7: Float, a8: F36_S5, a9: UInt8, a10: Double, a11: F36_S6) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a3.f2);
+    hasher.combine(a3.f3);
+    hasher.combine(a4.f0);
+    hasher.combine(a5.f0);
+    hasher.combine(a5.f1);
+    hasher.combine(a5.f2.f0);
+    hasher.combine(a6.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a6.f2);
+    hasher.combine(a6.f3);
+    hasher.combine(a7);
+    hasher.combine(a8.f0);
+    hasher.combine(a8.f1);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    hasher.combine(a11.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F37_S0
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F37_S1
+{
+    public let f0 : UInt32;
+    public let f1 : UInt32;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F37_S2
+{
+    public let f0 : Int32;
+    public let f1 : UInt32;
+    public let f2 : Double;
+    public let f3 : UInt;
+}
+
+@frozen
+public struct F37_S3_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F37_S3
+{
+    public let f0 : F37_S3_S0;
+}
+
+public func swiftFunc37(a0: Int, a1: UInt64, a2: UInt32, a3: Int32, a4: Int8, a5: UInt8, a6: UInt64, a7: F37_S0, a8: F37_S1, a9: Int16, a10: F37_S2, a11: UInt, a12: F37_S3, a13: UInt64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    hasher.combine(a8.f0);
+    hasher.combine(a8.f1);
+    hasher.combine(a8.f2);
+    hasher.combine(a9);
+    hasher.combine(a10.f0);
+    hasher.combine(a10.f1);
+    hasher.combine(a10.f2);
+    hasher.combine(a10.f3);
+    hasher.combine(a11);
+    hasher.combine(a12.f0.f0);
+    hasher.combine(a13);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F38_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int16;
+    public let f2 : Int16;
+}
+
+@frozen
+public struct F38_S1
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F38_S2
+{
+    public let f0 : UInt;
+}
+
+public func swiftFunc38(a0: UInt32, a1: Int32, a2: F38_S0, a3: F38_S1, a4: F38_S2) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a3.f0);
+    hasher.combine(a4.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F39_S0_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F39_S0_S1
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F39_S0
+{
+    public let f0 : Int;
+    public let f1 : Int64;
+    public let f2 : UInt32;
+    public let f3 : F39_S0_S0;
+    public let f4 : F39_S0_S1;
+}
+
+@frozen
+public struct F39_S1
+{
+    public let f0 : UInt;
+    public let f1 : Double;
+}
+
+public func swiftFunc39(a0: UInt, a1: UInt, a2: F39_S0, a3: F39_S1, a4: Float) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a2.f3.f0);
+    hasher.combine(a2.f4.f0);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a4);
+    return hasher.finalize()
+}
+
+public func swiftFunc40(a0: Int32) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F41_S0
+{
+    public let f0 : Int16;
+    public let f1 : Float;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F41_S1
+{
+    public let f0 : UInt16;
+    public let f1 : UInt64;
+    public let f2 : Int8;
+    public let f3 : Float;
+    public let f4 : UInt64;
+}
+
+@frozen
+public struct F41_S2_S0_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F41_S2_S0
+{
+    public let f0 : F41_S2_S0_S0;
+}
+
+@frozen
+public struct F41_S2
+{
+    public let f0 : Int32;
+    public let f1 : Int16;
+    public let f2 : UInt64;
+    public let f3 : Float;
+    public let f4 : F41_S2_S0;
+}
+
+public func swiftFunc41(a0: Float, a1: F41_S0, a2: F41_S1, a3: F41_S2, a4: UInt32, a5: UInt, a6: UInt32, a7: Int, a8: Int8) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a2.f3);
+    hasher.combine(a2.f4);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a3.f2);
+    hasher.combine(a3.f3);
+    hasher.combine(a3.f4.f0.f0);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F42_S0
+{
+    public let f0 : UInt32;
+    public let f1 : UInt64;
+    public let f2 : UInt64;
+}
+
+@frozen
+public struct F42_S1
+{
+    public let f0 : Double;
+    public let f1 : Double;
+}
+
+@frozen
+public struct F42_S2_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F42_S2
+{
+    public let f0 : UInt8;
+    public let f1 : Int64;
+    public let f2 : F42_S2_S0;
+    public let f3 : Int;
+}
+
+@frozen
+public struct F42_S3_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F42_S3
+{
+    public let f0 : Float;
+    public let f1 : F42_S3_S0;
+}
+
+@frozen
+public struct F42_S4
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F42_S5_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F42_S5
+{
+    public let f0 : F42_S5_S0;
+}
+
+@frozen
+public struct F42_S6
+{
+    public let f0 : UInt;
+}
+
+public func swiftFunc42(a0: F42_S0, a1: F42_S1, a2: UInt16, a3: F42_S2, a4: F42_S3, a5: F42_S4, a6: F42_S5, a7: F42_S6, a8: Int16) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a3.f2.f0);
+    hasher.combine(a3.f3);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1.f0);
+    hasher.combine(a5.f0);
+    hasher.combine(a6.f0.f0);
+    hasher.combine(a7.f0);
+    hasher.combine(a8);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F43_S0_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F43_S0
+{
+    public let f0 : F43_S0_S0;
+}
+
+public func swiftFunc43(a0: Int64, a1: UInt8, a2: Int8, a3: Float, a4: Int64, a5: Int, a6: F43_S0) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6.f0.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F44_S0
+{
+    public let f0 : UInt64;
+}
+
+public func swiftFunc44(a0: F44_S0) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F45_S0
+{
+    public let f0 : Double;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F45_S1_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F45_S1_S1
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F45_S1
+{
+    public let f0 : UInt16;
+    public let f1 : Int8;
+    public let f2 : F45_S1_S0;
+    public let f3 : F45_S1_S1;
+}
+
+@frozen
+public struct F45_S2
+{
+    public let f0 : UInt64;
+    public let f1 : Float;
+    public let f2 : UInt16;
+}
+
+public func swiftFunc45(a0: F45_S0, a1: F45_S1, a2: F45_S2, a3: Int) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2.f0);
+    hasher.combine(a1.f3.f0);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a3);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F46_S0
+{
+    public let f0 : Int64;
+    public let f1 : UInt8;
+    public let f2 : UInt;
+    public let f3 : Int8;
+}
+
+@frozen
+public struct F46_S1
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F46_S2
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F46_S3
+{
+    public let f0 : UInt64;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct F46_S4
+{
+    public let f0 : Int16;
+    public let f1 : Int32;
+    public let f2 : UInt32;
+}
+
+@frozen
+public struct F46_S5
+{
+    public let f0 : UInt64;
+}
+
+public func swiftFunc46(a0: F46_S0, a1: F46_S1, a2: Int8, a3: Float, a4: F46_S2, a5: Int16, a6: F46_S3, a7: Int16, a8: Float, a9: F46_S4, a10: UInt16, a11: Float, a12: Int8, a13: F46_S5) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a1.f0);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a5);
+    hasher.combine(a6.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9.f0);
+    hasher.combine(a9.f1);
+    hasher.combine(a9.f2);
+    hasher.combine(a10);
+    hasher.combine(a11);
+    hasher.combine(a12);
+    hasher.combine(a13.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F47_S0_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int8;
+}
+
+@frozen
+public struct F47_S0
+{
+    public let f0 : F47_S0_S0;
+    public let f1 : UInt16;
+    public let f2 : UInt;
+    public let f3 : Int64;
+}
+
+@frozen
+public struct F47_S1
+{
+    public let f0 : Int64;
+    public let f1 : UInt8;
+}
+
+public func swiftFunc47(a0: Int, a1: F47_S0, a2: F47_S1, a3: Int64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0.f0);
+    hasher.combine(a1.f0.f1);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a1.f3);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a3);
+    return hasher.finalize()
+}
+
+public func swiftFunc48(a0: Int8, a1: UInt32, a2: Int16, a3: Float, a4: Int, a5: Float, a6: UInt32) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F49_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F49_S1_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F49_S1_S1
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F49_S1
+{
+    public let f0 : F49_S1_S0;
+    public let f1 : Int32;
+    public let f2 : F49_S1_S1;
+    public let f3 : UInt;
+}
+
+@frozen
+public struct F49_S2
+{
+    public let f0 : UInt16;
+    public let f1 : UInt8;
+    public let f2 : Float;
+    public let f3 : Int64;
+}
+
+@frozen
+public struct F49_S3
+{
+    public let f0 : Int32;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F49_S4
+{
+    public let f0 : UInt32;
+    public let f1 : Int;
+    public let f2 : Int;
+}
+
+public func swiftFunc49(a0: UInt64, a1: UInt8, a2: F49_S0, a3: F49_S1, a4: UInt, a5: UInt32, a6: Double, a7: F49_S2, a8: F49_S3, a9: Int8, a10: F49_S4, a11: Int32, a12: UInt64, a13: UInt8) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a3.f0.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a3.f2.f0);
+    hasher.combine(a3.f3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    hasher.combine(a7.f1);
+    hasher.combine(a7.f2);
+    hasher.combine(a7.f3);
+    hasher.combine(a8.f0);
+    hasher.combine(a8.f1);
+    hasher.combine(a9);
+    hasher.combine(a10.f0);
+    hasher.combine(a10.f1);
+    hasher.combine(a10.f2);
+    hasher.combine(a11);
+    hasher.combine(a12);
+    hasher.combine(a13);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F50_S0
+{
+    public let f0 : Int8;
+    public let f1 : Int16;
+    public let f2 : Int32;
+    public let f3 : UInt32;
+}
+
+@frozen
+public struct F50_S1
+{
+    public let f0 : Int32;
+}
+
+public func swiftFunc50(a0: F50_S0, a1: UInt8, a2: F50_S1) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    return hasher.finalize()
+}
+
+public func swiftFunc51(a0: UInt16, a1: Int8, a2: Int16) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    return hasher.finalize()
+}
+
+public func swiftFunc52(a0: UInt8, a1: UInt64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F53_S0_S0
+{
+    public let f0 : Int64;
+    public let f1 : UInt;
+}
+
+@frozen
+public struct F53_S0
+{
+    public let f0 : UInt64;
+    public let f1 : F53_S0_S0;
+    public let f2 : Int16;
+    public let f3 : UInt8;
+}
+
+@frozen
+public struct F53_S1_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F53_S1
+{
+    public let f0 : F53_S1_S0;
+}
+
+@frozen
+public struct F53_S2
+{
+    public let f0 : UInt8;
+    public let f1 : UInt64;
+    public let f2 : Double;
+}
+
+public func swiftFunc53(a0: F53_S0, a1: UInt, a2: UInt64, a3: Float, a4: UInt32, a5: F53_S1, a6: F53_S2, a7: UInt32) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1.f0);
+    hasher.combine(a0.f1.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5.f0.f0);
+    hasher.combine(a6.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a6.f2);
+    hasher.combine(a7);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F54_S0_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F54_S0
+{
+    public let f0 : F54_S0_S0;
+}
+
+@frozen
+public struct F54_S1
+{
+    public let f0 : UInt32;
+}
+
+public func swiftFunc54(a0: Int8, a1: Int32, a2: UInt32, a3: F54_S0, a4: Float, a5: UInt8, a6: F54_S1) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0.f0);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F55_S0
+{
+    public let f0 : Double;
+}
+
+public func swiftFunc55(a0: F55_S0) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F56_S0_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F56_S0
+{
+    public let f0 : Float;
+    public let f1 : F56_S0_S0;
+}
+
+@frozen
+public struct F56_S1_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F56_S1
+{
+    public let f0 : F56_S1_S0;
+    public let f1 : Double;
+    public let f2 : UInt;
+    public let f3 : UInt32;
+}
+
+@frozen
+public struct F56_S2
+{
+    public let f0 : Int16;
+    public let f1 : Int16;
+}
+
+@frozen
+public struct F56_S3
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F56_S4
+{
+    public let f0 : UInt;
+}
+
+public func swiftFunc56(a0: F56_S0, a1: F56_S1, a2: F56_S2, a3: F56_S3, a4: F56_S4) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1.f0);
+    hasher.combine(a1.f0.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a1.f3);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a3.f0);
+    hasher.combine(a4.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F57_S0
+{
+    public let f0 : Int8;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F57_S1_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F57_S1_S1
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F57_S1
+{
+    public let f0 : F57_S1_S0;
+    public let f1 : F57_S1_S1;
+    public let f2 : Int16;
+}
+
+@frozen
+public struct F57_S2
+{
+    public let f0 : UInt;
+}
+
+public func swiftFunc57(a0: UInt32, a1: F57_S0, a2: F57_S1, a3: UInt, a4: F57_S2, a5: Int16) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a2.f0.f0);
+    hasher.combine(a2.f1.f0);
+    hasher.combine(a2.f2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a5);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F58_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F58_S1
+{
+    public let f0 : UInt;
+    public let f1 : Int;
+    public let f2 : UInt;
+    public let f3 : UInt16;
+}
+
+public func swiftFunc58(a0: UInt8, a1: UInt8, a2: Int, a3: F58_S0, a4: Float, a5: UInt64, a6: Int8, a7: F58_S1, a8: UInt16, a9: Int64, a10: Int64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    hasher.combine(a7.f1);
+    hasher.combine(a7.f2);
+    hasher.combine(a7.f3);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F59_S0
+{
+    public let f0 : UInt;
+    public let f1 : UInt8;
+    public let f2 : Float;
+    public let f3 : Int;
+}
+
+@frozen
+public struct F59_S1
+{
+    public let f0 : UInt8;
+    public let f1 : Int32;
+}
+
+@frozen
+public struct F59_S2
+{
+    public let f0 : Int;
+    public let f1 : UInt32;
+    public let f2 : Int8;
+}
+
+@frozen
+public struct F59_S3
+{
+    public let f0 : Int8;
+    public let f1 : Float;
+    public let f2 : Int32;
+}
+
+@frozen
+public struct F59_S4_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F59_S4
+{
+    public let f0 : F59_S4_S0;
+}
+
+public func swiftFunc59(a0: F59_S0, a1: Float, a2: UInt32, a3: F59_S1, a4: F59_S2, a5: UInt16, a6: Float, a7: Int, a8: Int, a9: UInt, a10: UInt, a11: Int16, a12: F59_S3, a13: F59_S4) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a4.f2);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    hasher.combine(a11);
+    hasher.combine(a12.f0);
+    hasher.combine(a12.f1);
+    hasher.combine(a12.f2);
+    hasher.combine(a13.f0.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F60_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F60_S1
+{
+    public let f0 : UInt32;
+}
+
+public func swiftFunc60(a0: Int32, a1: Int8, a2: Int32, a3: UInt16, a4: Float, a5: F60_S0, a6: F60_S1) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5.f0);
+    hasher.combine(a6.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F61_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int32;
+    public let f2 : Int8;
+}
+
+@frozen
+public struct F61_S1
+{
+    public let f0 : Double;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F61_S2
+{
+    public let f0 : Int;
+    public let f1 : Int8;
+    public let f2 : Float;
+    public let f3 : UInt16;
+    public let f4 : Float;
+}
+
+@frozen
+public struct F61_S3
+{
+    public let f0 : UInt32;
+    public let f1 : UInt64;
+    public let f2 : UInt;
+    public let f3 : UInt;
+}
+
+@frozen
+public struct F61_S4_S0
+{
+    public let f0 : UInt8;
+    public let f1 : UInt64;
+}
+
+@frozen
+public struct F61_S4
+{
+    public let f0 : F61_S4_S0;
+    public let f1 : Int64;
+}
+
+public func swiftFunc61(a0: F61_S0, a1: UInt8, a2: Float, a3: F61_S1, a4: Int8, a5: Int64, a6: F61_S2, a7: F61_S3, a8: F61_S4, a9: UInt32) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a6.f2);
+    hasher.combine(a6.f3);
+    hasher.combine(a6.f4);
+    hasher.combine(a7.f0);
+    hasher.combine(a7.f1);
+    hasher.combine(a7.f2);
+    hasher.combine(a7.f3);
+    hasher.combine(a8.f0.f0);
+    hasher.combine(a8.f0.f1);
+    hasher.combine(a8.f1);
+    hasher.combine(a9);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F62_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F62_S1
+{
+    public let f0 : Float;
+}
+
+public func swiftFunc62(a0: F62_S0, a1: Int16, a2: Int32, a3: F62_S1) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F63_S0
+{
+    public let f0 : Int;
+}
+
+public func swiftFunc63(a0: F63_S0) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F64_S0
+{
+    public let f0 : Double;
+    public let f1 : UInt16;
+    public let f2 : Int32;
+    public let f3 : Int;
+    public let f4 : Double;
+}
+
+@frozen
+public struct F64_S1
+{
+    public let f0 : Int32;
+    public let f1 : Float;
+    public let f2 : UInt32;
+}
+
+public func swiftFunc64(a0: Double, a1: F64_S0, a2: UInt8, a3: F64_S1, a4: Int32, a5: UInt64, a6: Int8, a7: Int8, a8: Float) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a1.f3);
+    hasher.combine(a1.f4);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a3.f2);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    return hasher.finalize()
+}
+
+public func swiftFunc65(a0: Float, a1: Float, a2: UInt, a3: Float) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F66_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F66_S1_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F66_S1
+{
+    public let f0 : F66_S1_S0;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F66_S2
+{
+    public let f0 : Double;
+    public let f1 : UInt8;
+}
+
+@frozen
+public struct F66_S3
+{
+    public let f0 : UInt;
+}
+
+public func swiftFunc66(a0: F66_S0, a1: F66_S1, a2: F66_S2, a3: F66_S3) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a1.f0.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a3.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F67_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F67_S1_S0_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F67_S1_S0
+{
+    public let f0 : F67_S1_S0_S0;
+}
+
+@frozen
+public struct F67_S1
+{
+    public let f0 : F67_S1_S0;
+    public let f1 : UInt32;
+    public let f2 : Int16;
+}
+
+public func swiftFunc67(a0: UInt64, a1: UInt32, a2: UInt16, a3: Int8, a4: F67_S0, a5: UInt64, a6: F67_S1, a7: UInt, a8: UInt64, a9: Int64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a5);
+    hasher.combine(a6.f0.f0.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a6.f2);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F68_S0_S0_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F68_S0_S0
+{
+    public let f0 : F68_S0_S0_S0;
+}
+
+@frozen
+public struct F68_S0
+{
+    public let f0 : F68_S0_S0;
+}
+
+@frozen
+public struct F68_S1
+{
+    public let f0 : UInt64;
+    public let f1 : UInt16;
+}
+
+@frozen
+public struct F68_S2
+{
+    public let f0 : UInt;
+    public let f1 : Int;
+    public let f2 : UInt64;
+    public let f3 : Double;
+}
+
+@frozen
+public struct F68_S3
+{
+    public let f0 : Int;
+    public let f1 : UInt32;
+    public let f2 : UInt32;
+    public let f3 : UInt;
+}
+
+@frozen
+public struct F68_S4
+{
+    public let f0 : Int32;
+}
+
+public func swiftFunc68(a0: UInt16, a1: Int64, a2: Int16, a3: UInt64, a4: Int8, a5: Int32, a6: UInt8, a7: F68_S0, a8: UInt8, a9: F68_S1, a10: Int16, a11: F68_S2, a12: Int16, a13: Int16, a14: F68_S3, a15: F68_S4) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7.f0.f0.f0);
+    hasher.combine(a8);
+    hasher.combine(a9.f0);
+    hasher.combine(a9.f1);
+    hasher.combine(a10);
+    hasher.combine(a11.f0);
+    hasher.combine(a11.f1);
+    hasher.combine(a11.f2);
+    hasher.combine(a11.f3);
+    hasher.combine(a12);
+    hasher.combine(a13);
+    hasher.combine(a14.f0);
+    hasher.combine(a14.f1);
+    hasher.combine(a14.f2);
+    hasher.combine(a14.f3);
+    hasher.combine(a15.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F69_S0
+{
+    public let f0 : UInt32;
+    public let f1 : UInt;
+}
+
+@frozen
+public struct F69_S1_S0_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F69_S1_S0
+{
+    public let f0 : F69_S1_S0_S0;
+    public let f1 : Int8;
+}
+
+@frozen
+public struct F69_S1
+{
+    public let f0 : F69_S1_S0;
+    public let f1 : UInt;
+    public let f2 : Int;
+}
+
+@frozen
+public struct F69_S2
+{
+    public let f0 : Float;
+    public let f1 : UInt32;
+    public let f2 : UInt16;
+    public let f3 : Int8;
+}
+
+@frozen
+public struct F69_S3
+{
+    public let f0 : UInt8;
+    public let f1 : Double;
+}
+
+@frozen
+public struct F69_S4
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F69_S5
+{
+    public let f0 : UInt64;
+}
+
+public func swiftFunc69(a0: F69_S0, a1: F69_S1, a2: Int, a3: Int, a4: UInt16, a5: Int16, a6: Double, a7: F69_S2, a8: F69_S3, a9: F69_S4, a10: Int, a11: Int32, a12: F69_S5, a13: Float) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a1.f0.f0.f0);
+    hasher.combine(a1.f0.f1);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    hasher.combine(a7.f1);
+    hasher.combine(a7.f2);
+    hasher.combine(a7.f3);
+    hasher.combine(a8.f0);
+    hasher.combine(a8.f1);
+    hasher.combine(a9.f0);
+    hasher.combine(a10);
+    hasher.combine(a11);
+    hasher.combine(a12.f0);
+    hasher.combine(a13);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F70_S0
+{
+    public let f0 : Float;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct F70_S1
+{
+    public let f0 : UInt16;
+    public let f1 : Int8;
+    public let f2 : Int16;
+}
+
+@frozen
+public struct F70_S2
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F70_S3
+{
+    public let f0 : UInt16;
+}
+
+public func swiftFunc70(a0: UInt64, a1: F70_S0, a2: UInt16, a3: Int8, a4: Float, a5: F70_S1, a6: Int, a7: F70_S2, a8: F70_S3, a9: UInt32) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5.f0);
+    hasher.combine(a5.f1);
+    hasher.combine(a5.f2);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    hasher.combine(a8.f0);
+    hasher.combine(a9);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F71_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F71_S1
+{
+    public let f0 : UInt64;
+}
+
+public func swiftFunc71(a0: Int64, a1: F71_S0, a2: Int8, a3: F71_S1, a4: Float, a5: UInt32) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F72_S0_S0
+{
+    public let f0 : Int;
+    public let f1 : Double;
+}
+
+@frozen
+public struct F72_S0
+{
+    public let f0 : F72_S0_S0;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F72_S1
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F72_S2
+{
+    public let f0 : Double;
+}
+
+public func swiftFunc72(a0: F72_S0, a1: F72_S1, a2: F72_S2) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0.f0);
+    hasher.combine(a0.f0.f1);
+    hasher.combine(a0.f1);
+    hasher.combine(a1.f0);
+    hasher.combine(a2.f0);
+    return hasher.finalize()
+}
+
+public func swiftFunc73(a0: Int64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F74_S0
+{
+    public let f0 : UInt8;
+    public let f1 : UInt8;
+    public let f2 : Double;
+    public let f3 : UInt8;
+}
+
+@frozen
+public struct F74_S1
+{
+    public let f0 : Int16;
+    public let f1 : UInt16;
+    public let f2 : Int64;
+    public let f3 : UInt;
+}
+
+@frozen
+public struct F74_S2
+{
+    public let f0 : Int16;
+    public let f1 : Double;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F74_S3
+{
+    public let f0 : Int16;
+}
+
+public func swiftFunc74(a0: F74_S0, a1: F74_S1, a2: Int32, a3: F74_S2, a4: Int, a5: Int64, a6: Int16, a7: Int32, a8: F74_S3, a9: UInt64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a1.f3);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a3.f2);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8.f0);
+    hasher.combine(a9);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F75_S0_S0_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F75_S0_S0
+{
+    public let f0 : F75_S0_S0_S0;
+}
+
+@frozen
+public struct F75_S0
+{
+    public let f0 : F75_S0_S0;
+    public let f1 : Double;
+    public let f2 : Int32;
+}
+
+@frozen
+public struct F75_S1_S0_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F75_S1_S0
+{
+    public let f0 : UInt;
+    public let f1 : F75_S1_S0_S0;
+    public let f2 : Int64;
+}
+
+@frozen
+public struct F75_S1
+{
+    public let f0 : F75_S1_S0;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F75_S2
+{
+    public let f0 : UInt64;
+}
+
+public func swiftFunc75(a0: F75_S0, a1: Double, a2: Int, a3: UInt, a4: Int, a5: F75_S1, a6: F75_S2) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0.f0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5.f0.f0);
+    hasher.combine(a5.f0.f1.f0);
+    hasher.combine(a5.f0.f2);
+    hasher.combine(a5.f1);
+    hasher.combine(a6.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F76_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F76_S1
+{
+    public let f0 : UInt64;
+    public let f1 : Int32;
+    public let f2 : Int16;
+}
+
+@frozen
+public struct F76_S2
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F76_S3
+{
+    public let f0 : Int;
+}
+
+public func swiftFunc76(a0: Double, a1: Int64, a2: UInt16, a3: Float, a4: Float, a5: F76_S0, a6: Int16, a7: F76_S1, a8: Int64, a9: UInt64, a10: UInt16, a11: UInt8, a12: Int8, a13: Int, a14: Int64, a15: Int8, a16: Int8, a17: Int16, a18: UInt16, a19: F76_S2, a20: F76_S3) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5.f0);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    hasher.combine(a7.f1);
+    hasher.combine(a7.f2);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    hasher.combine(a11);
+    hasher.combine(a12);
+    hasher.combine(a13);
+    hasher.combine(a14);
+    hasher.combine(a15);
+    hasher.combine(a16);
+    hasher.combine(a17);
+    hasher.combine(a18);
+    hasher.combine(a19.f0);
+    hasher.combine(a20.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F77_S0_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F77_S0
+{
+    public let f0 : UInt64;
+    public let f1 : F77_S0_S0;
+    public let f2 : Int8;
+}
+
+@frozen
+public struct F77_S1
+{
+    public let f0 : UInt64;
+    public let f1 : Int;
+    public let f2 : Int32;
+}
+
+@frozen
+public struct F77_S2_S0_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F77_S2_S0
+{
+    public let f0 : F77_S2_S0_S0;
+}
+
+@frozen
+public struct F77_S2
+{
+    public let f0 : F77_S2_S0;
+    public let f1 : Int16;
+    public let f2 : Int8;
+    public let f3 : UInt8;
+}
+
+@frozen
+public struct F77_S3
+{
+    public let f0 : Int;
+    public let f1 : Int;
+    public let f2 : Int;
+    public let f3 : Int16;
+}
+
+@frozen
+public struct F77_S4
+{
+    public let f0 : Double;
+    public let f1 : Int8;
+    public let f2 : UInt32;
+    public let f3 : Int16;
+    public let f4 : UInt32;
+}
+
+@frozen
+public struct F77_S5
+{
+    public let f0 : UInt;
+}
+
+public func swiftFunc77(a0: F77_S0, a1: Int16, a2: F77_S1, a3: UInt32, a4: F77_S2, a5: F77_S3, a6: F77_S4, a7: UInt64, a8: F77_S5, a9: UInt16, a10: Float) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1.f0);
+    hasher.combine(a0.f2);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0.f0.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a4.f2);
+    hasher.combine(a4.f3);
+    hasher.combine(a5.f0);
+    hasher.combine(a5.f1);
+    hasher.combine(a5.f2);
+    hasher.combine(a5.f3);
+    hasher.combine(a6.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a6.f2);
+    hasher.combine(a6.f3);
+    hasher.combine(a6.f4);
+    hasher.combine(a7);
+    hasher.combine(a8.f0);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F78_S0
+{
+    public let f0 : UInt16;
+    public let f1 : UInt;
+}
+
+public func swiftFunc78(a0: F78_S0, a1: UInt64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a1);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F79_S0
+{
+    public let f0 : Double;
+}
+
+public func swiftFunc79(a0: UInt32, a1: F79_S0, a2: Int16, a3: Double) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F80_S0
+{
+    public let f0 : UInt64;
+    public let f1 : Double;
+}
+
+@frozen
+public struct F80_S1_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F80_S1
+{
+    public let f0 : Int32;
+    public let f1 : UInt16;
+    public let f2 : UInt32;
+    public let f3 : F80_S1_S0;
+}
+
+@frozen
+public struct F80_S2
+{
+    public let f0 : UInt64;
+    public let f1 : Int64;
+    public let f2 : UInt32;
+    public let f3 : UInt16;
+}
+
+@frozen
+public struct F80_S3_S0_S0
+{
+    public let f0 : Int;
+    public let f1 : Int64;
+    public let f2 : UInt64;
+}
+
+@frozen
+public struct F80_S3_S0
+{
+    public let f0 : F80_S3_S0_S0;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F80_S3
+{
+    public let f0 : F80_S3_S0;
+    public let f1 : Int32;
+}
+
+@frozen
+public struct F80_S4_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F80_S4
+{
+    public let f0 : F80_S4_S0;
+}
+
+public func swiftFunc80(a0: F80_S0, a1: F80_S1, a2: UInt16, a3: Int64, a4: F80_S2, a5: Double, a6: UInt64, a7: Int32, a8: F80_S3, a9: F80_S4, a10: UInt8) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a1.f3.f0);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a4.f2);
+    hasher.combine(a4.f3);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8.f0.f0.f0);
+    hasher.combine(a8.f0.f0.f1);
+    hasher.combine(a8.f0.f0.f2);
+    hasher.combine(a8.f0.f1);
+    hasher.combine(a8.f1);
+    hasher.combine(a9.f0.f0);
+    hasher.combine(a10);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F81_S0
+{
+    public let f0 : Double;
+    public let f1 : UInt64;
+    public let f2 : UInt32;
+    public let f3 : UInt8;
+    public let f4 : UInt8;
+}
+
+@frozen
+public struct F81_S1
+{
+    public let f0 : UInt32;
+}
+
+public func swiftFunc81(a0: F81_S0, a1: Int32, a2: Float, a3: F81_S1) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a0.f4);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F82_S0
+{
+    public let f0 : Int32;
+    public let f1 : Int16;
+    public let f2 : UInt64;
+    public let f3 : Int8;
+}
+
+@frozen
+public struct F82_S1_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F82_S1
+{
+    public let f0 : Int;
+    public let f1 : Int32;
+    public let f2 : F82_S1_S0;
+}
+
+@frozen
+public struct F82_S2
+{
+    public let f0 : Int;
+    public let f1 : Int64;
+    public let f2 : UInt32;
+    public let f3 : UInt16;
+    public let f4 : Int64;
+}
+
+@frozen
+public struct F82_S3
+{
+    public let f0 : UInt8;
+}
+
+public func swiftFunc82(a0: F82_S0, a1: F82_S1, a2: F82_S2, a3: UInt32, a4: Int, a5: F82_S3) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2.f0);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a2.f3);
+    hasher.combine(a2.f4);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F83_S0_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F83_S0
+{
+    public let f0 : F83_S0_S0;
+    public let f1 : Int;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F83_S1_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F83_S1_S1_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F83_S1_S1
+{
+    public let f0 : F83_S1_S1_S0;
+}
+
+@frozen
+public struct F83_S1
+{
+    public let f0 : UInt32;
+    public let f1 : F83_S1_S0;
+    public let f2 : F83_S1_S1;
+}
+
+@frozen
+public struct F83_S2
+{
+    public let f0 : Int;
+}
+
+public func swiftFunc83(a0: Float, a1: F83_S0, a2: F83_S1, a3: Int16, a4: Int, a5: Float, a6: F83_S2, a7: UInt16) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1.f0);
+    hasher.combine(a2.f2.f0.f0);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6.f0);
+    hasher.combine(a7);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F84_S0
+{
+    public let f0 : Int16;
+    public let f1 : Int8;
+    public let f2 : UInt16;
+    public let f3 : Int64;
+    public let f4 : Int16;
+}
+
+@frozen
+public struct F84_S1
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F84_S2_S0
+{
+    public let f0 : UInt8;
+    public let f1 : UInt64;
+}
+
+@frozen
+public struct F84_S2
+{
+    public let f0 : UInt;
+    public let f1 : F84_S2_S0;
+    public let f2 : Int8;
+    public let f3 : Double;
+}
+
+@frozen
+public struct F84_S3
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F84_S4
+{
+    public let f0 : Float;
+}
+
+public func swiftFunc84(a0: F84_S0, a1: F84_S1, a2: UInt64, a3: F84_S2, a4: UInt32, a5: F84_S3, a6: UInt, a7: F84_S4, a8: UInt64, a9: UInt64, a10: UInt16, a11: Int16, a12: Float) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a0.f4);
+    hasher.combine(a1.f0);
+    hasher.combine(a2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1.f0);
+    hasher.combine(a3.f1.f1);
+    hasher.combine(a3.f2);
+    hasher.combine(a3.f3);
+    hasher.combine(a4);
+    hasher.combine(a5.f0);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    hasher.combine(a11);
+    hasher.combine(a12);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F85_S0_S0_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F85_S0_S0
+{
+    public let f0 : Int32;
+    public let f1 : F85_S0_S0_S0;
+}
+
+@frozen
+public struct F85_S0
+{
+    public let f0 : Float;
+    public let f1 : F85_S0_S0;
+    public let f2 : Int;
+    public let f3 : Int64;
+}
+
+@frozen
+public struct F85_S1
+{
+    public let f0 : UInt32;
+    public let f1 : Int32;
+}
+
+@frozen
+public struct F85_S2
+{
+    public let f0 : UInt;
+}
+
+public func swiftFunc85(a0: F85_S0, a1: F85_S1, a2: F85_S2, a3: Int8, a4: UInt32, a5: Int16) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1.f0);
+    hasher.combine(a0.f1.f1.f0);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a2.f0);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F86_S0
+{
+    public let f0 : Int32;
+    public let f1 : Int64;
+    public let f2 : Int32;
+    public let f3 : UInt16;
+}
+
+@frozen
+public struct F86_S1_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F86_S1
+{
+    public let f0 : F86_S1_S0;
+    public let f1 : UInt16;
+}
+
+@frozen
+public struct F86_S2
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F86_S3
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F86_S4
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F86_S5
+{
+    public let f0 : Int16;
+}
+
+public func swiftFunc86(a0: F86_S0, a1: Int, a2: Int, a3: UInt, a4: F86_S1, a5: F86_S2, a6: UInt64, a7: F86_S3, a8: F86_S4, a9: F86_S5) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a5.f0);
+    hasher.combine(a6);
+    hasher.combine(a7.f0);
+    hasher.combine(a8.f0);
+    hasher.combine(a9.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F87_S0_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F87_S0
+{
+    public let f0 : F87_S0_S0;
+    public let f1 : Float;
+    public let f2 : Int64;
+    public let f3 : Double;
+}
+
+@frozen
+public struct F87_S1
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F87_S2_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F87_S2
+{
+    public let f0 : F87_S2_S0;
+}
+
+@frozen
+public struct F87_S3
+{
+    public let f0 : Int32;
+}
+
+public func swiftFunc87(a0: Int64, a1: F87_S0, a2: UInt, a3: UInt8, a4: Double, a5: Int16, a6: UInt64, a7: Double, a8: Float, a9: F87_S1, a10: Int64, a11: F87_S2, a12: F87_S3, a13: Float) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a1.f3);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9.f0);
+    hasher.combine(a10);
+    hasher.combine(a11.f0.f0);
+    hasher.combine(a12.f0);
+    hasher.combine(a13);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F88_S0
+{
+    public let f0 : UInt8;
+    public let f1 : Int64;
+    public let f2 : UInt64;
+    public let f3 : Int;
+}
+
+@frozen
+public struct F88_S1
+{
+    public let f0 : Int64;
+    public let f1 : UInt8;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F88_S2
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F88_S3_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F88_S3
+{
+    public let f0 : Int32;
+    public let f1 : F88_S3_S0;
+    public let f2 : Int8;
+    public let f3 : UInt16;
+}
+
+@frozen
+public struct F88_S4_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F88_S4
+{
+    public let f0 : UInt16;
+    public let f1 : UInt;
+    public let f2 : Int8;
+    public let f3 : Int;
+    public let f4 : F88_S4_S0;
+}
+
+@frozen
+public struct F88_S5
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F88_S6
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F88_S7_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F88_S7
+{
+    public let f0 : F88_S7_S0;
+}
+
+public func swiftFunc88(a0: F88_S0, a1: Int8, a2: F88_S1, a3: UInt64, a4: F88_S2, a5: F88_S3, a6: F88_S4, a7: Int16, a8: F88_S5, a9: F88_S6, a10: F88_S7) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a5.f0);
+    hasher.combine(a5.f1.f0);
+    hasher.combine(a5.f2);
+    hasher.combine(a5.f3);
+    hasher.combine(a6.f0);
+    hasher.combine(a6.f1);
+    hasher.combine(a6.f2);
+    hasher.combine(a6.f3);
+    hasher.combine(a6.f4.f0);
+    hasher.combine(a7);
+    hasher.combine(a8.f0);
+    hasher.combine(a9.f0);
+    hasher.combine(a10.f0.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F89_S0
+{
+    public let f0 : UInt8;
+    public let f1 : Int8;
+}
+
+@frozen
+public struct F89_S1
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F89_S2
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F89_S3
+{
+    public let f0 : Double;
+    public let f1 : Double;
+}
+
+@frozen
+public struct F89_S4
+{
+    public let f0 : UInt32;
+}
+
+public func swiftFunc89(a0: F89_S0, a1: F89_S1, a2: F89_S2, a3: UInt8, a4: F89_S3, a5: F89_S4, a6: Int32) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a1.f0);
+    hasher.combine(a2.f0);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a5.f0);
+    hasher.combine(a6);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F90_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F90_S1_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F90_S1
+{
+    public let f0 : F90_S1_S0;
+    public let f1 : UInt;
+    public let f2 : Double;
+}
+
+@frozen
+public struct F90_S2
+{
+    public let f0 : UInt64;
+    public let f1 : Int;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F90_S3_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F90_S3
+{
+    public let f0 : F90_S3_S0;
+}
+
+@frozen
+public struct F90_S4
+{
+    public let f0 : Int64;
+}
+
+public func swiftFunc90(a0: F90_S0, a1: Int8, a2: F90_S1, a3: F90_S2, a4: F90_S3, a5: UInt32, a6: F90_S4, a7: UInt8) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a1);
+    hasher.combine(a2.f0.f0);
+    hasher.combine(a2.f1);
+    hasher.combine(a2.f2);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a3.f2);
+    hasher.combine(a4.f0.f0);
+    hasher.combine(a5);
+    hasher.combine(a6.f0);
+    hasher.combine(a7);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F91_S0_S0
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F91_S0
+{
+    public let f0 : F91_S0_S0;
+    public let f1 : UInt32;
+    public let f2 : Int;
+}
+
+public func swiftFunc91(a0: F91_S0, a1: UInt8) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a1);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F92_S0
+{
+    public let f0 : UInt16;
+    public let f1 : UInt16;
+}
+
+@frozen
+public struct F92_S1
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F92_S2
+{
+    public let f0 : UInt64;
+    public let f1 : UInt64;
+}
+
+@frozen
+public struct F92_S3
+{
+    public let f0 : UInt;
+}
+
+public func swiftFunc92(a0: Int16, a1: UInt64, a2: UInt, a3: Int64, a4: F92_S0, a5: Int64, a6: Double, a7: UInt8, a8: Int8, a9: UInt32, a10: Int8, a11: F92_S1, a12: UInt32, a13: Float, a14: UInt64, a15: UInt8, a16: Int32, a17: UInt32, a18: F92_S2, a19: F92_S3) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    hasher.combine(a4.f1);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10);
+    hasher.combine(a11.f0);
+    hasher.combine(a12);
+    hasher.combine(a13);
+    hasher.combine(a14);
+    hasher.combine(a15);
+    hasher.combine(a16);
+    hasher.combine(a17);
+    hasher.combine(a18.f0);
+    hasher.combine(a18.f1);
+    hasher.combine(a19.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F93_S0
+{
+    public let f0 : Int32;
+    public let f1 : UInt;
+    public let f2 : Double;
+}
+
+@frozen
+public struct F93_S1
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F93_S2
+{
+    public let f0 : Double;
+}
+
+public func swiftFunc93(a0: F93_S0, a1: Int, a2: F93_S1, a3: Double, a4: F93_S2) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a1);
+    hasher.combine(a2.f0);
+    hasher.combine(a3);
+    hasher.combine(a4.f0);
+    return hasher.finalize()
+}
+
+public func swiftFunc94(a0: UInt64, a1: Int32, a2: UInt64, a3: Int64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F95_S0
+{
+    public let f0 : Int64;
+}
+
+public func swiftFunc95(a0: F95_S0, a1: Int64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a1);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F96_S0_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F96_S0
+{
+    public let f0 : UInt64;
+    public let f1 : Double;
+    public let f2 : Double;
+    public let f3 : F96_S0_S0;
+}
+
+@frozen
+public struct F96_S1_S0_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F96_S1_S0
+{
+    public let f0 : F96_S1_S0_S0;
+}
+
+@frozen
+public struct F96_S1
+{
+    public let f0 : F96_S1_S0;
+}
+
+@frozen
+public struct F96_S2
+{
+    public let f0 : UInt8;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F96_S3
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F96_S4
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F96_S5_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F96_S5
+{
+    public let f0 : F96_S5_S0;
+}
+
+@frozen
+public struct F96_S6
+{
+    public let f0 : UInt64;
+}
+
+public func swiftFunc96(a0: UInt16, a1: F96_S0, a2: F96_S1, a3: F96_S2, a4: UInt16, a5: UInt64, a6: Int, a7: Int32, a8: Int16, a9: UInt, a10: F96_S3, a11: Int16, a12: Int, a13: Int8, a14: Int32, a15: UInt32, a16: F96_S4, a17: F96_S5, a18: F96_S6) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1.f0);
+    hasher.combine(a1.f1);
+    hasher.combine(a1.f2);
+    hasher.combine(a1.f3.f0);
+    hasher.combine(a2.f0.f0.f0);
+    hasher.combine(a3.f0);
+    hasher.combine(a3.f1);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    hasher.combine(a8);
+    hasher.combine(a9);
+    hasher.combine(a10.f0);
+    hasher.combine(a11);
+    hasher.combine(a12);
+    hasher.combine(a13);
+    hasher.combine(a14);
+    hasher.combine(a15);
+    hasher.combine(a16.f0);
+    hasher.combine(a17.f0.f0);
+    hasher.combine(a18.f0);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F97_S0
+{
+    public let f0 : Float;
+    public let f1 : Float;
+    public let f2 : Int;
+    public let f3 : Int;
+    public let f4 : Int;
+}
+
+public func swiftFunc97(a0: Int8, a1: Int32, a2: UInt8, a3: UInt32, a4: UInt8, a5: F97_S0, a6: Int8, a7: Int) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5.f0);
+    hasher.combine(a5.f1);
+    hasher.combine(a5.f2);
+    hasher.combine(a5.f3);
+    hasher.combine(a5.f4);
+    hasher.combine(a6);
+    hasher.combine(a7);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F98_S0_S0_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F98_S0_S0
+{
+    public let f0 : UInt;
+    public let f1 : F98_S0_S0_S0;
+}
+
+@frozen
+public struct F98_S0
+{
+    public let f0 : Int64;
+    public let f1 : F98_S0_S0;
+    public let f2 : UInt;
+}
+
+public func swiftFunc98(a0: F98_S0, a1: UInt16, a2: UInt16, a3: Int16, a4: Int8, a5: UInt32) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1.f0);
+    hasher.combine(a0.f1.f1.f0);
+    hasher.combine(a0.f2);
+    hasher.combine(a1);
+    hasher.combine(a2);
+    hasher.combine(a3);
+    hasher.combine(a4);
+    hasher.combine(a5);
+    return hasher.finalize()
+}
+
+@frozen
+public struct F99_S0
+{
+    public let f0 : UInt64;
+    public let f1 : UInt16;
+    public let f2 : Float;
+    public let f3 : UInt64;
+}
+
+@frozen
+public struct F99_S1_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F99_S1
+{
+    public let f0 : F99_S1_S0;
+}
+
+public func swiftFunc99(a0: F99_S0, a1: Int8, a2: F99_S1, a3: Int64) -> Int {
+    var hasher = HasherFNV1a()
+    hasher.combine(a0.f0);
+    hasher.combine(a0.f1);
+    hasher.combine(a0.f2);
+    hasher.combine(a0.f3);
+    hasher.combine(a1);
+    hasher.combine(a2.f0.f0);
+    hasher.combine(a3);
+    return hasher.finalize()
+}
+
diff --git a/src/tests/Interop/Swift/SwiftCallbackAbiStress/CMakeLists.txt b/src/tests/Interop/Swift/SwiftCallbackAbiStress/CMakeLists.txt
new file mode 100644
index 000000000000..283ea0eb6543
--- /dev/null
+++ b/src/tests/Interop/Swift/SwiftCallbackAbiStress/CMakeLists.txt
@@ -0,0 +1,21 @@
+project(SwiftCallbackAbiStress)
+include ("${CLR_INTEROP_TEST_ROOT}/Interop.cmake")
+
+set(SOURCE SwiftCallbackAbiStress)
+
+if (NOT SWIFT_COMPILER_TARGET AND CLR_CMAKE_TARGET_OSX)
+    set(SWIFT_PLATFORM "macosx")
+    set(SWIFT_PLATFORM_SUFFIX "")
+    set(SWIFT_DEPLOYMENT_TARGET ${CMAKE_OSX_DEPLOYMENT_TARGET})
+    set(SWIFT_COMPILER_TARGET "${CMAKE_OSX_ARCHITECTURES}-apple-${SWIFT_PLATFORM}${SWIFT_DEPLOYMENT_TARGET}${SWIFT_PLATFORM_SUFFIX}")
+endif()
+
+add_custom_target(${SOURCE} ALL
+    COMMAND xcrun swiftc -target ${SWIFT_COMPILER_TARGET} -emit-library ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}.swift -o ${CMAKE_CURRENT_BINARY_DIR}/lib${SOURCE}.dylib
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}.swift
+    COMMENT "Generating ${SOURCE} library"
+)
+
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/lib${SOURCE}.dylib
+    DESTINATION bin
+)
diff --git a/src/tests/Interop/Swift/SwiftCallbackAbiStress/SwiftCallbackAbiStress.cs b/src/tests/Interop/Swift/SwiftCallbackAbiStress/SwiftCallbackAbiStress.cs
new file mode 100644
index 000000000000..cd00caec4667
--- /dev/null
+++ b/src/tests/Interop/Swift/SwiftCallbackAbiStress/SwiftCallbackAbiStress.cs
@@ -0,0 +1,8574 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#pragma warning disable CS8500
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.ExceptionServices;
+using System.Runtime.InteropServices;
+using System.Runtime.InteropServices.Swift;
+using Xunit;
+
+public unsafe class SwiftCallbackAbiStress
+{
+    private const string SwiftLib = "libSwiftCallbackAbiStress.dylib";
+
+    [StructLayout(LayoutKind.Sequential, Size = 14)]
+    struct F0_S0
+    {
+        public double F0;
+        public uint F1;
+        public ushort F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F0_S1
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F0_S2
+    {
+        public float F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB5Func01fs5Int32VAEs5Int16V_AEs6UInt64Vs6UInt16VAA5F0_S0VAA0K3_S1Vs5UInt8VAA0K3_S2VtXE_tF")]
+    private static extern int SwiftCallbackFunc0(delegate* unmanaged[Swift]<short, int, ulong, ushort, F0_S0, F0_S1, byte, F0_S2, SwiftSelf, int> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static int SwiftCallbackFunc0Callback(short a0, int a1, ulong a2, ushort a3, F0_S0 a4, F0_S1 a5, byte a6, F0_S2 a7, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((short)-17813, a0);
+            Assert.Equal((int)318006528, a1);
+            Assert.Equal((ulong)1195162122024233590, a2);
+            Assert.Equal((ushort)60467, a3);
+            Assert.Equal((double)2239972725713766, a4.F0);
+            Assert.Equal((uint)1404066621, a4.F1);
+            Assert.Equal((ushort)29895, a4.F2);
+            Assert.Equal((ulong)7923486769850554262, a5.F0);
+            Assert.Equal((byte)217, a6);
+            Assert.Equal((float)2497655, a7.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 1579768470;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc0()
+    {
+        Console.Write("Running SwiftCallbackFunc0: ");
+        ExceptionDispatchInfo ex = null;
+        int val = SwiftCallbackFunc0(&SwiftCallbackFunc0Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((int)1579768470, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 3)]
+    struct F1_S0
+    {
+        public ushort F0;
+        public byte F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F1_S1
+    {
+        public byte F0;
+        public ulong F1;
+        public short F2;
+        public float F3;
+        public float F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F1_S2_S0
+    {
+        public uint F0;
+        public double F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct F1_S2
+    {
+        public sbyte F0;
+        public nuint F1;
+        public F1_S2_S0 F2;
+        public nint F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F1_S3
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F1_S4
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F1_S5_S0
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F1_S5
+    {
+        public F1_S5_S0 F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB5Func11fs5UInt8VAEs5Int64V_Sds4Int8VAA5F1_S0VAA0J3_S1VAA0J3_S2VAeigA0J3_S3VSuAA0J3_S4VAA0J3_S5VSitXE_tF")]
+    private static extern byte SwiftCallbackFunc1(delegate* unmanaged[Swift]<long, double, sbyte, F1_S0, F1_S1, F1_S2, byte, sbyte, long, F1_S3, nuint, F1_S4, F1_S5, nint, SwiftSelf, byte> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static byte SwiftCallbackFunc1Callback(long a0, double a1, sbyte a2, F1_S0 a3, F1_S1 a4, F1_S2 a5, byte a6, sbyte a7, long a8, F1_S3 a9, nuint a10, F1_S4 a11, F1_S5 a12, nint a13, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((long)7920511243396412395, a0);
+            Assert.Equal((double)1396130721334528, a1);
+            Assert.Equal((sbyte)-55, a2);
+            Assert.Equal((ushort)33758, a3.F0);
+            Assert.Equal((byte)103, a3.F1);
+            Assert.Equal((byte)201, a4.F0);
+            Assert.Equal((ulong)7390774039746135757, a4.F1);
+            Assert.Equal((short)14699, a4.F2);
+            Assert.Equal((float)7235330, a4.F3);
+            Assert.Equal((float)7189013, a4.F4);
+            Assert.Equal((sbyte)37, a5.F0);
+            Assert.Equal((nuint)unchecked((nuint)3310322731568932038), a5.F1);
+            Assert.Equal((uint)1100328218, a5.F2.F0);
+            Assert.Equal((double)1060779460203640, a5.F2.F1);
+            Assert.Equal((nint)unchecked((nint)8325292022909418877), a5.F3);
+            Assert.Equal((byte)137, a6);
+            Assert.Equal((sbyte)82, a7);
+            Assert.Equal((long)1197537325837505041, a8);
+            Assert.Equal((ushort)46950, a9.F0);
+            Assert.Equal((nuint)unchecked((nuint)8181828233622947597), a10);
+            Assert.Equal((nint)unchecked((nint)1851182205030289056), a11.F0);
+            Assert.Equal((uint)1971014225, a12.F0.F0);
+            Assert.Equal((nint)unchecked((nint)6437995407675718392), a13);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 248;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc1()
+    {
+        Console.Write("Running SwiftCallbackFunc1: ");
+        ExceptionDispatchInfo ex = null;
+        byte val = SwiftCallbackFunc1(&SwiftCallbackFunc1Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((byte)248, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F2_S0
+    {
+        public int F0;
+        public nuint F1;
+        public float F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F2_S1_S0
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F2_S1
+    {
+        public long F0;
+        public ushort F1;
+        public F2_S1_S0 F2;
+        public nint F3;
+        public double F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 11)]
+    struct F2_S2
+    {
+        public float F0;
+        public int F1;
+        public ushort F2;
+        public sbyte F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F2_S3_S0
+    {
+        public sbyte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F2_S3
+    {
+        public F2_S3_S0 F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB5Func21fs4Int8VAeA5F2_S0V_AA0H3_S1VAA0H3_S2VSfs6UInt64VAA0H3_S3VtXE_tF")]
+    private static extern sbyte SwiftCallbackFunc2(delegate* unmanaged[Swift]<F2_S0, F2_S1, F2_S2, float, ulong, F2_S3, SwiftSelf, sbyte> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static sbyte SwiftCallbackFunc2Callback(F2_S0 a0, F2_S1 a1, F2_S2 a2, float a3, ulong a4, F2_S3 a5, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((int)1860840185, a0.F0);
+            Assert.Equal((nuint)unchecked((nuint)5407074783834178811), a0.F1);
+            Assert.Equal((float)6261766, a0.F2);
+            Assert.Equal((long)4033972792915237065, a1.F0);
+            Assert.Equal((ushort)22825, a1.F1);
+            Assert.Equal((ushort)44574, a1.F2.F0);
+            Assert.Equal((nint)unchecked((nint)4536911485304731630), a1.F3);
+            Assert.Equal((double)4282944015147385, a1.F4);
+            Assert.Equal((float)2579193, a2.F0);
+            Assert.Equal((int)586252933, a2.F1);
+            Assert.Equal((ushort)47002, a2.F2);
+            Assert.Equal((sbyte)71, a2.F3);
+            Assert.Equal((float)3225929, a3);
+            Assert.Equal((ulong)3599444831393612282, a4);
+            Assert.Equal((sbyte)13, a5.F0.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 115;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc2()
+    {
+        Console.Write("Running SwiftCallbackFunc2: ");
+        ExceptionDispatchInfo ex = null;
+        sbyte val = SwiftCallbackFunc2(&SwiftCallbackFunc2Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((sbyte)115, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F3_S0_S0
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F3_S0
+    {
+        public F3_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F3_S1
+    {
+        public uint F0;
+        public long F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 3)]
+    struct F3_S2_S0
+    {
+        public short F0;
+        public byte F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 5)]
+    struct F3_S2
+    {
+        public F3_S2_S0 F0;
+        public sbyte F1;
+        public byte F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F3_S3
+    {
+        public ulong F0;
+        public long F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F3_S4
+    {
+        public short F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F3_Ret
+    {
+        public ushort F0;
+        public byte F1;
+        public ushort F2;
+        public float F3;
+
+        public F3_Ret(ushort f0, byte f1, ushort f2, float f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB5Func31fAA6F3_RetVAeA0G3_S0V_Sfs6UInt16VAA0G3_S1VAIs5Int32VAA0G3_S2VSiAA0G3_S3VAA0G3_S4VtXE_tF")]
+    private static extern F3_Ret SwiftCallbackFunc3(delegate* unmanaged[Swift]<F3_S0, float, ushort, F3_S1, ushort, int, F3_S2, nint, F3_S3, F3_S4, SwiftSelf, F3_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F3_Ret SwiftCallbackFunc3Callback(F3_S0 a0, float a1, ushort a2, F3_S1 a3, ushort a4, int a5, F3_S2 a6, nint a7, F3_S3 a8, F3_S4 a9, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nuint)unchecked((nuint)5610153900386943274), a0.F0.F0);
+            Assert.Equal((float)7736836, a1);
+            Assert.Equal((ushort)31355, a2);
+            Assert.Equal((uint)1159208572, a3.F0);
+            Assert.Equal((long)2707818827451590538, a3.F1);
+            Assert.Equal((ushort)37580, a4);
+            Assert.Equal((int)1453603418, a5);
+            Assert.Equal((short)699, a6.F0.F0);
+            Assert.Equal((byte)46, a6.F0.F1);
+            Assert.Equal((sbyte)-125, a6.F1);
+            Assert.Equal((byte)92, a6.F2);
+            Assert.Equal((nint)unchecked((nint)94557706586779834), a7);
+            Assert.Equal((ulong)2368015527878194540, a8.F0);
+            Assert.Equal((long)5026404532195049271, a8.F1);
+            Assert.Equal((short)21807, a9.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F3_Ret(51293, 217, 64666, 5667425);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc3()
+    {
+        Console.Write("Running SwiftCallbackFunc3: ");
+        ExceptionDispatchInfo ex = null;
+        F3_Ret val = SwiftCallbackFunc3(&SwiftCallbackFunc3Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ushort)51293, val.F0);
+        Assert.Equal((byte)217, val.F1);
+        Assert.Equal((ushort)64666, val.F2);
+        Assert.Equal((float)5667425, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F4_S0_S0
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F4_S0
+    {
+        public F4_S0_S0 F0;
+        public float F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F4_Ret_S0
+    {
+        public nint F0;
+
+        public F4_Ret_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 44)]
+    struct F4_Ret
+    {
+        public int F0;
+        public F4_Ret_S0 F1;
+        public nint F2;
+        public short F3;
+        public nint F4;
+        public uint F5;
+
+        public F4_Ret(int f0, F4_Ret_S0 f1, nint f2, short f3, nint f4, uint f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB5Func41fAA6F4_RetVAESd_AA0G3_S0Vs5UInt8Vs5Int32Vs6UInt32VtXE_tF")]
+    private static extern F4_Ret SwiftCallbackFunc4(delegate* unmanaged[Swift]<double, F4_S0, byte, int, uint, SwiftSelf, F4_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F4_Ret SwiftCallbackFunc4Callback(double a0, F4_S0 a1, byte a2, int a3, uint a4, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((double)4282972206489588, a0);
+            Assert.Equal((uint)611688063, a1.F0.F0);
+            Assert.Equal((float)877466, a1.F1);
+            Assert.Equal((byte)53, a2);
+            Assert.Equal((int)965123506, a3);
+            Assert.Equal((uint)1301067653, a4);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F4_Ret(2069454428, new F4_Ret_S0(unchecked((nint)5483154806067048127)), unchecked((nint)2342208892279753870), -21578, unchecked((nint)4641984012938514811), 1691113876);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc4()
+    {
+        Console.Write("Running SwiftCallbackFunc4: ");
+        ExceptionDispatchInfo ex = null;
+        F4_Ret val = SwiftCallbackFunc4(&SwiftCallbackFunc4Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((int)2069454428, val.F0);
+        Assert.Equal((nint)unchecked((nint)5483154806067048127), val.F1.F0);
+        Assert.Equal((nint)unchecked((nint)2342208892279753870), val.F2);
+        Assert.Equal((short)-21578, val.F3);
+        Assert.Equal((nint)unchecked((nint)4641984012938514811), val.F4);
+        Assert.Equal((uint)1691113876, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F5_S0
+    {
+        public nuint F0;
+        public uint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F5_S1_S0
+    {
+        public nint F0;
+        public uint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F5_S1_S1
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F5_S1
+    {
+        public F5_S1_S0 F0;
+        public F5_S1_S1 F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F5_S2
+    {
+        public double F0;
+        public sbyte F1;
+        public nint F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F5_S3
+    {
+        public long F0;
+        public double F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F5_S4
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 26)]
+    struct F5_Ret
+    {
+        public short F0;
+        public int F1;
+        public int F2;
+        public ulong F3;
+        public short F4;
+
+        public F5_Ret(short f0, int f1, int f2, ulong f3, short f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB5Func51fAA6F5_RetVAEs5UInt8V_s5Int16Vs6UInt64VS2uAkgA0G3_S0Vs4Int8VAoA0G3_S1VAA0G3_S2VAA0G3_S3VSdAA0G3_S4Vs6UInt16VS2fAYtXE_tF")]
+    private static extern F5_Ret SwiftCallbackFunc5(delegate* unmanaged[Swift]<byte, short, ulong, nuint, nuint, ulong, byte, F5_S0, sbyte, sbyte, F5_S1, F5_S2, F5_S3, double, F5_S4, ushort, float, float, ushort, SwiftSelf, F5_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F5_Ret SwiftCallbackFunc5Callback(byte a0, short a1, ulong a2, nuint a3, nuint a4, ulong a5, byte a6, F5_S0 a7, sbyte a8, sbyte a9, F5_S1 a10, F5_S2 a11, F5_S3 a12, double a13, F5_S4 a14, ushort a15, float a16, float a17, ushort a18, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((byte)42, a0);
+            Assert.Equal((short)18727, a1);
+            Assert.Equal((ulong)3436765034579128495, a2);
+            Assert.Equal((nuint)unchecked((nuint)6305137336506323506), a3);
+            Assert.Equal((nuint)unchecked((nuint)6280137078630028944), a4);
+            Assert.Equal((ulong)6252650621827449809, a5);
+            Assert.Equal((byte)129, a6);
+            Assert.Equal((nuint)unchecked((nuint)6879980973426111678), a7.F0);
+            Assert.Equal((uint)1952654577, a7.F1);
+            Assert.Equal((sbyte)-34, a8);
+            Assert.Equal((sbyte)102, a9);
+            Assert.Equal((nint)unchecked((nint)8389143657021522019), a10.F0.F0);
+            Assert.Equal((uint)437030241, a10.F0.F1);
+            Assert.Equal((float)7522798, a10.F1.F0);
+            Assert.Equal((double)523364011167530, a11.F0);
+            Assert.Equal((sbyte)16, a11.F1);
+            Assert.Equal((nint)unchecked((nint)3823439046574037759), a11.F2);
+            Assert.Equal((long)3767260839267771462, a12.F0);
+            Assert.Equal((double)1181031208183008, a12.F1);
+            Assert.Equal((double)2338830539621828, a13);
+            Assert.Equal((ushort)36276, a14.F0);
+            Assert.Equal((ushort)41286, a15);
+            Assert.Equal((float)6683955, a16);
+            Assert.Equal((float)6399917, a17);
+            Assert.Equal((ushort)767, a18);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F5_Ret(-23277, 1015782032, 83490460, 2747931081050267058, -10369);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc5()
+    {
+        Console.Write("Running SwiftCallbackFunc5: ");
+        ExceptionDispatchInfo ex = null;
+        F5_Ret val = SwiftCallbackFunc5(&SwiftCallbackFunc5Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((short)-23277, val.F0);
+        Assert.Equal((int)1015782032, val.F1);
+        Assert.Equal((int)83490460, val.F2);
+        Assert.Equal((ulong)2747931081050267058, val.F3);
+        Assert.Equal((short)-10369, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F6_S0_S0
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F6_S0
+    {
+        public sbyte F0;
+        public sbyte F1;
+        public int F2;
+        public F6_S0_S0 F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F6_S1
+    {
+        public int F0;
+        public ulong F1;
+        public ulong F2;
+        public uint F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 11)]
+    struct F6_S2
+    {
+        public long F0;
+        public short F1;
+        public sbyte F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F6_S3
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F6_Ret_S0
+    {
+        public long F0;
+        public uint F1;
+
+        public F6_Ret_S0(long f0, uint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 29)]
+    struct F6_Ret
+    {
+        public F6_Ret_S0 F0;
+        public ulong F1;
+        public float F2;
+        public sbyte F3;
+
+        public F6_Ret(F6_Ret_S0 f0, ulong f1, float f2, sbyte f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB5Func61fAA6F6_RetVAESf_AA0G3_S0Vs5Int64Vs4Int8Vs6UInt16VSuAMs6UInt64VAA0G3_S1Vs5Int16VAA0G3_S2VAA0G3_S3VAMtXE_tF")]
+    private static extern F6_Ret SwiftCallbackFunc6(delegate* unmanaged[Swift]<float, F6_S0, long, sbyte, ushort, nuint, ushort, ulong, F6_S1, short, F6_S2, F6_S3, ushort, SwiftSelf, F6_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F6_Ret SwiftCallbackFunc6Callback(float a0, F6_S0 a1, long a2, sbyte a3, ushort a4, nuint a5, ushort a6, ulong a7, F6_S1 a8, short a9, F6_S2 a10, F6_S3 a11, ushort a12, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((float)2905241, a0);
+            Assert.Equal((sbyte)-27, a1.F0);
+            Assert.Equal((sbyte)-77, a1.F1);
+            Assert.Equal((int)1315779092, a1.F2);
+            Assert.Equal((float)5373970, a1.F3.F0);
+            Assert.Equal((long)7022244764256789748, a2);
+            Assert.Equal((sbyte)-110, a3);
+            Assert.Equal((ushort)2074, a4);
+            Assert.Equal((nuint)unchecked((nuint)3560129042279209151), a5);
+            Assert.Equal((ushort)2200, a6);
+            Assert.Equal((ulong)5730241035812482149, a7);
+            Assert.Equal((int)18625011, a8.F0);
+            Assert.Equal((ulong)242340713355417257, a8.F1);
+            Assert.Equal((ulong)6962175160124965670, a8.F2);
+            Assert.Equal((uint)1983617839, a8.F3);
+            Assert.Equal((short)-28374, a9);
+            Assert.Equal((long)6355748563312062178, a10.F0);
+            Assert.Equal((short)-23189, a10.F1);
+            Assert.Equal((sbyte)81, a10.F2);
+            Assert.Equal((float)4547677, a11.F0);
+            Assert.Equal((ushort)6397, a12);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F6_Ret(new F6_Ret_S0(3036123356548380503, 653452587), 4787954187933165977, 5060002, -68);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc6()
+    {
+        Console.Write("Running SwiftCallbackFunc6: ");
+        ExceptionDispatchInfo ex = null;
+        F6_Ret val = SwiftCallbackFunc6(&SwiftCallbackFunc6Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((long)3036123356548380503, val.F0.F0);
+        Assert.Equal((uint)653452587, val.F0.F1);
+        Assert.Equal((ulong)4787954187933165977, val.F1);
+        Assert.Equal((float)5060002, val.F2);
+        Assert.Equal((sbyte)-68, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F7_S0
+    {
+        public float F0;
+        public long F1;
+        public nuint F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F7_S1
+    {
+        public short F0;
+        public uint F1;
+        public uint F2;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB5Func71fs6UInt16VAEs5Int64V_s5UInt8VSdAeA5F7_S0VAISds6UInt32VAA0J3_S1Vs5Int32VAQSis5Int16VAESis6UInt64VAiStXE_tF")]
+    private static extern ushort SwiftCallbackFunc7(delegate* unmanaged[Swift]<long, byte, double, ushort, F7_S0, byte, double, uint, F7_S1, int, int, nint, short, ushort, nint, ulong, byte, short, SwiftSelf, ushort> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static ushort SwiftCallbackFunc7Callback(long a0, byte a1, double a2, ushort a3, F7_S0 a4, byte a5, double a6, uint a7, F7_S1 a8, int a9, int a10, nint a11, short a12, ushort a13, nint a14, ulong a15, byte a16, short a17, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((long)7625368278886567558, a0);
+            Assert.Equal((byte)70, a1);
+            Assert.Equal((double)2146971972122530, a2);
+            Assert.Equal((ushort)54991, a3);
+            Assert.Equal((float)1072132, a4.F0);
+            Assert.Equal((long)3890459003549150599, a4.F1);
+            Assert.Equal((nuint)unchecked((nuint)56791000421908673), a4.F2);
+            Assert.Equal((byte)227, a5);
+            Assert.Equal((double)3248250571953113, a6);
+            Assert.Equal((uint)1138780108, a7);
+            Assert.Equal((short)-22670, a8.F0);
+            Assert.Equal((uint)1796712687, a8.F1);
+            Assert.Equal((uint)304251857, a8.F2);
+            Assert.Equal((int)1288765591, a9);
+            Assert.Equal((int)1382721790, a10);
+            Assert.Equal((nint)unchecked((nint)6746417265635727373), a11);
+            Assert.Equal((short)-15600, a12);
+            Assert.Equal((ushort)47575, a13);
+            Assert.Equal((nint)unchecked((nint)7200793040165597188), a14);
+            Assert.Equal((ulong)2304985873826892392, a15);
+            Assert.Equal((byte)99, a16);
+            Assert.Equal((short)-9993, a17);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 31412;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc7()
+    {
+        Console.Write("Running SwiftCallbackFunc7: ");
+        ExceptionDispatchInfo ex = null;
+        ushort val = SwiftCallbackFunc7(&SwiftCallbackFunc7Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ushort)31412, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F8_S0
+    {
+        public short F0;
+        public short F1;
+        public nuint F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F8_S1
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F8_Ret_S0
+    {
+        public int F0;
+        public nuint F1;
+        public nint F2;
+
+        public F8_Ret_S0(int f0, nuint f1, nint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 44)]
+    struct F8_Ret
+    {
+        public long F0;
+        public F8_Ret_S0 F1;
+        public nint F2;
+        public uint F3;
+
+        public F8_Ret(long f0, F8_Ret_S0 f1, nint f2, uint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB5Func81fAA6F8_RetVAeA0G3_S0V_AA0G3_S1VtXE_tF")]
+    private static extern F8_Ret SwiftCallbackFunc8(delegate* unmanaged[Swift]<F8_S0, F8_S1, SwiftSelf, F8_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F8_Ret SwiftCallbackFunc8Callback(F8_S0 a0, F8_S1 a1, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((short)16278, a0.F0);
+            Assert.Equal((short)-31563, a0.F1);
+            Assert.Equal((nuint)unchecked((nuint)2171308312325435543), a0.F2);
+            Assert.Equal((long)8923668560896309835, a1.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F8_Ret(4170441467272673523, new F8_Ret_S0(1940721160, unchecked((nuint)6524670832376567295), unchecked((nint)4210781401091965722)), unchecked((nint)3245727696885859461), 855061841);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc8()
+    {
+        Console.Write("Running SwiftCallbackFunc8: ");
+        ExceptionDispatchInfo ex = null;
+        F8_Ret val = SwiftCallbackFunc8(&SwiftCallbackFunc8Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((long)4170441467272673523, val.F0);
+        Assert.Equal((int)1940721160, val.F1.F0);
+        Assert.Equal((nuint)unchecked((nuint)6524670832376567295), val.F1.F1);
+        Assert.Equal((nint)unchecked((nint)4210781401091965722), val.F1.F2);
+        Assert.Equal((nint)unchecked((nint)3245727696885859461), val.F2);
+        Assert.Equal((uint)855061841, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F9_S0_S0
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F9_S0
+    {
+        public F9_S0_S0 F0;
+        public short F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F9_S1_S0
+    {
+        public long F0;
+        public long F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F9_S1
+    {
+        public nint F0;
+        public F9_S1_S0 F1;
+        public float F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 19)]
+    struct F9_S2
+    {
+        public ulong F0;
+        public double F1;
+        public short F2;
+        public sbyte F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F9_S3_S0_S0
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F9_S3_S0
+    {
+        public F9_S3_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F9_S3
+    {
+        public sbyte F0;
+        public F9_S3_S0 F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F9_S4_S0
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct F9_S4
+    {
+        public F9_S4_S0 F0;
+        public sbyte F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F9_S5_S0
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F9_S5
+    {
+        public uint F0;
+        public F9_S5_S0 F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F9_S6
+    {
+        public double F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB5Func91fs6UInt16VAEs4Int8V_s5UInt8Vs5Int64VAA5F9_S0VAA0K3_S1VAA0K3_S2VSdAA0K3_S3VAA0K3_S4VSdAA0K3_S5VAA0K3_S6VtXE_tF")]
+    private static extern ushort SwiftCallbackFunc9(delegate* unmanaged[Swift]<sbyte, byte, long, F9_S0, F9_S1, F9_S2, double, F9_S3, F9_S4, double, F9_S5, F9_S6, SwiftSelf, ushort> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static ushort SwiftCallbackFunc9Callback(sbyte a0, byte a1, long a2, F9_S0 a3, F9_S1 a4, F9_S2 a5, double a6, F9_S3 a7, F9_S4 a8, double a9, F9_S5 a10, F9_S6 a11, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)17, a0);
+            Assert.Equal((byte)104, a1);
+            Assert.Equal((long)8922699691031703191, a2);
+            Assert.Equal((byte)123, a3.F0.F0);
+            Assert.Equal((short)31706, a3.F1);
+            Assert.Equal((nint)unchecked((nint)1804058604961822948), a4.F0);
+            Assert.Equal((long)8772179036715198777, a4.F1.F0);
+            Assert.Equal((long)3320511540592563328, a4.F1.F1);
+            Assert.Equal((float)679540, a4.F2);
+            Assert.Equal((ulong)8642590829466497926, a5.F0);
+            Assert.Equal((double)4116322155252965, a5.F1);
+            Assert.Equal((short)17992, a5.F2);
+            Assert.Equal((sbyte)-48, a5.F3);
+            Assert.Equal((double)414017537937894, a6);
+            Assert.Equal((sbyte)47, a7.F0);
+            Assert.Equal((ulong)7576380984563129085, a7.F1.F0.F0);
+            Assert.Equal((ulong)1356827400304742803, a8.F0.F0);
+            Assert.Equal((sbyte)-17, a8.F1);
+            Assert.Equal((double)4458031413035521, a9);
+            Assert.Equal((uint)352075098, a10.F0);
+            Assert.Equal((uint)1840980094, a10.F1.F0);
+            Assert.Equal((double)396957263013930, a11.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 5567;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc9()
+    {
+        Console.Write("Running SwiftCallbackFunc9: ");
+        ExceptionDispatchInfo ex = null;
+        ushort val = SwiftCallbackFunc9(&SwiftCallbackFunc9Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ushort)5567, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F10_Ret
+    {
+        public long F0;
+        public uint F1;
+        public ushort F2;
+        public uint F3;
+
+        public F10_Ret(long f0, uint f1, ushort f2, uint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func101fAA7F10_RetVAEs5Int16VXE_tF")]
+    private static extern F10_Ret SwiftCallbackFunc10(delegate* unmanaged[Swift]<short, SwiftSelf, F10_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F10_Ret SwiftCallbackFunc10Callback(short a0, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((short)-7168, a0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F10_Ret(7820305774933543349, 1501926289, 39078, 661487951);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc10()
+    {
+        Console.Write("Running SwiftCallbackFunc10: ");
+        ExceptionDispatchInfo ex = null;
+        F10_Ret val = SwiftCallbackFunc10(&SwiftCallbackFunc10Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((long)7820305774933543349, val.F0);
+        Assert.Equal((uint)1501926289, val.F1);
+        Assert.Equal((ushort)39078, val.F2);
+        Assert.Equal((uint)661487951, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F11_S0_S0
+    {
+        public sbyte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F11_S0
+    {
+        public uint F0;
+        public F11_S0_S0 F1;
+        public nuint F2;
+        public int F3;
+        public long F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F11_S1_S0
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F11_S1
+    {
+        public F11_S1_S0 F0;
+        public short F1;
+        public uint F2;
+        public short F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F11_S2
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F11_Ret
+    {
+        public short F0;
+        public short F1;
+        public byte F2;
+        public long F3;
+
+        public F11_Ret(short f0, short f1, byte f2, long f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func111fAA7F11_RetVAEs6UInt32V_Sus6UInt64Vs5Int16VAA0G3_S0VSfs4Int8Vs6UInt16VAA0G3_S1VAGs5Int64VAgA0G3_S2VtXE_tF")]
+    private static extern F11_Ret SwiftCallbackFunc11(delegate* unmanaged[Swift]<uint, nuint, ulong, short, F11_S0, float, sbyte, ushort, F11_S1, uint, long, uint, F11_S2, SwiftSelf, F11_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F11_Ret SwiftCallbackFunc11Callback(uint a0, nuint a1, ulong a2, short a3, F11_S0 a4, float a5, sbyte a6, ushort a7, F11_S1 a8, uint a9, long a10, uint a11, F11_S2 a12, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((uint)454751144, a0);
+            Assert.Equal((nuint)unchecked((nuint)1696592254558667577), a1);
+            Assert.Equal((ulong)5831587230944972245, a2);
+            Assert.Equal((short)15352, a3);
+            Assert.Equal((uint)1306601347, a4.F0);
+            Assert.Equal((sbyte)123, a4.F1.F0);
+            Assert.Equal((nuint)unchecked((nuint)3064471520018434938), a4.F2);
+            Assert.Equal((int)272956246, a4.F3);
+            Assert.Equal((long)3683518307106722029, a4.F4);
+            Assert.Equal((float)5606122, a5);
+            Assert.Equal((sbyte)-126, a6);
+            Assert.Equal((ushort)50801, a7);
+            Assert.Equal((ushort)63467, a8.F0.F0);
+            Assert.Equal((short)-31828, a8.F1);
+            Assert.Equal((uint)2117176776, a8.F2);
+            Assert.Equal((short)-27265, a8.F3);
+            Assert.Equal((uint)1879606687, a9);
+            Assert.Equal((long)4981244336430926707, a10);
+            Assert.Equal((uint)1159924856, a11);
+            Assert.Equal((byte)29, a12.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F11_Ret(7934, -24509, 20, 5470383170748296608);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc11()
+    {
+        Console.Write("Running SwiftCallbackFunc11: ");
+        ExceptionDispatchInfo ex = null;
+        F11_Ret val = SwiftCallbackFunc11(&SwiftCallbackFunc11Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((short)7934, val.F0);
+        Assert.Equal((short)-24509, val.F1);
+        Assert.Equal((byte)20, val.F2);
+        Assert.Equal((long)5470383170748296608, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct F12_S0
+    {
+        public ulong F0;
+        public sbyte F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F12_S1_S0_S0
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F12_S1_S0
+    {
+        public F12_S1_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F12_S1
+    {
+        public ushort F0;
+        public uint F1;
+        public F12_S1_S0 F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F12_Ret
+    {
+        public ulong F0;
+        public nint F1;
+
+        public F12_Ret(ulong f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func121fAA7F12_RetVAeA0G3_S0V_s5Int16Vs6UInt64VAA0G3_S1Vs4Int8VtXE_tF")]
+    private static extern F12_Ret SwiftCallbackFunc12(delegate* unmanaged[Swift]<F12_S0, short, ulong, F12_S1, sbyte, SwiftSelf, F12_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F12_Ret SwiftCallbackFunc12Callback(F12_S0 a0, short a1, ulong a2, F12_S1 a3, sbyte a4, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ulong)3236871137735400659, a0.F0);
+            Assert.Equal((sbyte)-123, a0.F1);
+            Assert.Equal((short)-22828, a1);
+            Assert.Equal((ulong)2132557792366642035, a2);
+            Assert.Equal((ushort)42520, a3.F0);
+            Assert.Equal((uint)879349060, a3.F1);
+            Assert.Equal((ulong)5694370973277919380, a3.F2.F0.F0);
+            Assert.Equal((sbyte)-75, a4);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F12_Ret(4675419585914412295, unchecked((nint)1931022181202552704));
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc12()
+    {
+        Console.Write("Running SwiftCallbackFunc12: ");
+        ExceptionDispatchInfo ex = null;
+        F12_Ret val = SwiftCallbackFunc12(&SwiftCallbackFunc12Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ulong)4675419585914412295, val.F0);
+        Assert.Equal((nint)unchecked((nint)1931022181202552704), val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F13_S0_S0
+    {
+        public long F0;
+        public long F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 22)]
+    struct F13_S0
+    {
+        public F13_S0_S0 F0;
+        public float F1;
+        public short F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F13_S1
+    {
+        public nint F0;
+        public ulong F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F13_S2_S0
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F13_S2
+    {
+        public F13_S2_S0 F0;
+        public double F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 5)]
+    struct F13_S3
+    {
+        public float F0;
+        public sbyte F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F13_S4
+    {
+        public nint F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func131fS2dAA6F13_S0V_s5Int32VSis6UInt16VSuAA0G3_S1VAA0G3_S2VSiSds4Int8VSfSiAA0G3_S3VSuAA0G3_S4VtXE_tF")]
+    private static extern double SwiftCallbackFunc13(delegate* unmanaged[Swift]<F13_S0, int, nint, ushort, nuint, F13_S1, F13_S2, nint, double, sbyte, float, nint, F13_S3, nuint, F13_S4, SwiftSelf, double> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static double SwiftCallbackFunc13Callback(F13_S0 a0, int a1, nint a2, ushort a3, nuint a4, F13_S1 a5, F13_S2 a6, nint a7, double a8, sbyte a9, float a10, nint a11, F13_S3 a12, nuint a13, F13_S4 a14, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((long)9003727031576598067, a0.F0.F0);
+            Assert.Equal((long)8527798284445940986, a0.F0.F1);
+            Assert.Equal((float)3585628, a0.F1);
+            Assert.Equal((short)-12520, a0.F2);
+            Assert.Equal((int)1510815104, a1);
+            Assert.Equal((nint)unchecked((nint)5883331525294982326), a2);
+            Assert.Equal((ushort)60738, a3);
+            Assert.Equal((nuint)unchecked((nuint)5291799143932627546), a4);
+            Assert.Equal((nint)unchecked((nint)1949276559361384602), a5.F0);
+            Assert.Equal((ulong)876048527237138968, a5.F1);
+            Assert.Equal((byte)67, a6.F0.F0);
+            Assert.Equal((double)2455575228564859, a6.F1);
+            Assert.Equal((nint)unchecked((nint)2321408806345977320), a7);
+            Assert.Equal((double)12750323283778, a8);
+            Assert.Equal((sbyte)46, a9);
+            Assert.Equal((float)6774339, a10);
+            Assert.Equal((nint)unchecked((nint)5121910967292140178), a11);
+            Assert.Equal((float)8254279, a12.F0);
+            Assert.Equal((sbyte)-7, a12.F1);
+            Assert.Equal((nuint)unchecked((nuint)7533347207018595125), a13);
+            Assert.Equal((nint)unchecked((nint)6605448167191082938), a14.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 2798050901932855;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc13()
+    {
+        Console.Write("Running SwiftCallbackFunc13: ");
+        ExceptionDispatchInfo ex = null;
+        double val = SwiftCallbackFunc13(&SwiftCallbackFunc13Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((double)2798050901932855, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F14_S0
+    {
+        public sbyte F0;
+        public float F1;
+        public ushort F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F14_S1
+    {
+        public ulong F0;
+        public ulong F1;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func141fs5Int64VA2E_AA6F14_S0Vs4Int8Vs6UInt64VAA0H3_S1VSitXE_tF")]
+    private static extern long SwiftCallbackFunc14(delegate* unmanaged[Swift]<long, F14_S0, sbyte, ulong, F14_S1, nint, SwiftSelf, long> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static long SwiftCallbackFunc14Callback(long a0, F14_S0 a1, sbyte a2, ulong a3, F14_S1 a4, nint a5, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((long)5547219684656041875, a0);
+            Assert.Equal((sbyte)-39, a1.F0);
+            Assert.Equal((float)5768837, a1.F1);
+            Assert.Equal((ushort)53063, a1.F2);
+            Assert.Equal((sbyte)-102, a2);
+            Assert.Equal((ulong)5745438709817040873, a3);
+            Assert.Equal((ulong)2178706453119907411, a4.F0);
+            Assert.Equal((ulong)4424726479787355131, a4.F1);
+            Assert.Equal((nint)unchecked((nint)5693881223150438553), a5);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 5130561516716417305;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc14()
+    {
+        Console.Write("Running SwiftCallbackFunc14: ");
+        ExceptionDispatchInfo ex = null;
+        long val = SwiftCallbackFunc14(&SwiftCallbackFunc14Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((long)5130561516716417305, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F15_S0
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F15_S1
+    {
+        public nint F0;
+        public uint F1;
+        public byte F2;
+        public short F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 25)]
+    struct F15_S2
+    {
+        public sbyte F0;
+        public ulong F1;
+        public long F2;
+        public byte F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F15_S3
+    {
+        public double F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func151fS2is5UInt8V_s6UInt16Vs6UInt64VAIs4Int8VSuSdSfSiAA6F15_S0VAA0K3_S1VAgA0K3_S2VAeA0K3_S3VtXE_tF")]
+    private static extern nint SwiftCallbackFunc15(delegate* unmanaged[Swift]<byte, ushort, ulong, ulong, sbyte, nuint, double, float, nint, F15_S0, F15_S1, ushort, F15_S2, byte, F15_S3, SwiftSelf, nint> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static nint SwiftCallbackFunc15Callback(byte a0, ushort a1, ulong a2, ulong a3, sbyte a4, nuint a5, double a6, float a7, nint a8, F15_S0 a9, F15_S1 a10, ushort a11, F15_S2 a12, byte a13, F15_S3 a14, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((byte)0, a0);
+            Assert.Equal((ushort)31081, a1);
+            Assert.Equal((ulong)8814881608835743979, a2);
+            Assert.Equal((ulong)4283853687332682681, a3);
+            Assert.Equal((sbyte)80, a4);
+            Assert.Equal((nuint)unchecked((nuint)7895994601265649979), a5);
+            Assert.Equal((double)1855521542692398, a6);
+            Assert.Equal((float)3235683, a7);
+            Assert.Equal((nint)unchecked((nint)215122646177738904), a8);
+            Assert.Equal((uint)2044750195, a9.F0);
+            Assert.Equal((nint)unchecked((nint)1772412898183620625), a10.F0);
+            Assert.Equal((uint)131256973, a10.F1);
+            Assert.Equal((byte)153, a10.F2);
+            Assert.Equal((short)25281, a10.F3);
+            Assert.Equal((ushort)50965, a11);
+            Assert.Equal((sbyte)-83, a12.F0);
+            Assert.Equal((ulong)7751486385861474282, a12.F1);
+            Assert.Equal((long)3744400479301818340, a12.F2);
+            Assert.Equal((byte)150, a12.F3);
+            Assert.Equal((byte)179, a13);
+            Assert.Equal((double)3108143600787174, a14.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return unchecked((nint)2326283264176371053);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc15()
+    {
+        Console.Write("Running SwiftCallbackFunc15: ");
+        ExceptionDispatchInfo ex = null;
+        nint val = SwiftCallbackFunc15(&SwiftCallbackFunc15Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)2326283264176371053), val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F16_S0
+    {
+        public sbyte F0;
+        public int F1;
+        public ushort F2;
+        public ushort F3;
+        public uint F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F16_S1
+    {
+        public ushort F0;
+        public sbyte F1;
+        public byte F2;
+        public nint F3;
+        public nint F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F16_S2_S0
+    {
+        public sbyte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 14)]
+    struct F16_S2
+    {
+        public int F0;
+        public int F1;
+        public uint F2;
+        public byte F3;
+        public F16_S2_S0 F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F16_S3
+    {
+        public short F0;
+        public double F1;
+        public double F2;
+        public int F3;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func161fs4Int8VAeA6F16_S0V_s5Int16VSfAA0H3_S1VAA0H3_S2Vs6UInt64VAA0H3_S3VSutXE_tF")]
+    private static extern sbyte SwiftCallbackFunc16(delegate* unmanaged[Swift]<F16_S0, short, float, F16_S1, F16_S2, ulong, F16_S3, nuint, SwiftSelf, sbyte> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static sbyte SwiftCallbackFunc16Callback(F16_S0 a0, short a1, float a2, F16_S1 a3, F16_S2 a4, ulong a5, F16_S3 a6, nuint a7, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)-59, a0.F0);
+            Assert.Equal((int)1181591186, a0.F1);
+            Assert.Equal((ushort)44834, a0.F2);
+            Assert.Equal((ushort)28664, a0.F3);
+            Assert.Equal((uint)404461767, a0.F4);
+            Assert.Equal((short)2482, a1);
+            Assert.Equal((float)2997348, a2);
+            Assert.Equal((ushort)22423, a3.F0);
+            Assert.Equal((sbyte)-106, a3.F1);
+            Assert.Equal((byte)182, a3.F2);
+            Assert.Equal((nint)unchecked((nint)3784074551275084420), a3.F3);
+            Assert.Equal((nint)unchecked((nint)7092934571108982079), a3.F4);
+            Assert.Equal((int)1835134709, a4.F0);
+            Assert.Equal((int)246067261, a4.F1);
+            Assert.Equal((uint)1986526591, a4.F2);
+            Assert.Equal((byte)24, a4.F3);
+            Assert.Equal((sbyte)-112, a4.F4.F0);
+            Assert.Equal((ulong)1465053746911704089, a5);
+            Assert.Equal((short)-27636, a6.F0);
+            Assert.Equal((double)1896887612303356, a6.F1);
+            Assert.Equal((double)4263157082840190, a6.F2);
+            Assert.Equal((int)774653659, a6.F3);
+            Assert.Equal((nuint)unchecked((nuint)3755775782607884861), a7);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 103;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc16()
+    {
+        Console.Write("Running SwiftCallbackFunc16: ");
+        ExceptionDispatchInfo ex = null;
+        sbyte val = SwiftCallbackFunc16(&SwiftCallbackFunc16Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((sbyte)103, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F17_S0
+    {
+        public int F0;
+        public nuint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F17_S1_S0
+    {
+        public double F0;
+        public uint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 17)]
+    struct F17_S1
+    {
+        public F17_S1_S0 F0;
+        public int F1;
+        public byte F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F17_S2
+    {
+        public uint F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func171fS2ds6UInt32V_AA6F17_S0VAA0H3_S1VSds6UInt64VAA0H3_S2VtXE_tF")]
+    private static extern double SwiftCallbackFunc17(delegate* unmanaged[Swift]<uint, F17_S0, F17_S1, double, ulong, F17_S2, SwiftSelf, double> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static double SwiftCallbackFunc17Callback(uint a0, F17_S0 a1, F17_S1 a2, double a3, ulong a4, F17_S2 a5, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((uint)201081002, a0);
+            Assert.Equal((int)2018751226, a1.F0);
+            Assert.Equal((nuint)unchecked((nuint)8488544433072104028), a1.F1);
+            Assert.Equal((double)1190765430157980, a2.F0.F0);
+            Assert.Equal((uint)70252071, a2.F0.F1);
+            Assert.Equal((int)1297775609, a2.F1);
+            Assert.Equal((byte)160, a2.F2);
+            Assert.Equal((double)4290084351352688, a3);
+            Assert.Equal((ulong)4738339757002694731, a4);
+            Assert.Equal((uint)1829312773, a5.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 4214404512040467;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc17()
+    {
+        Console.Write("Running SwiftCallbackFunc17: ");
+        ExceptionDispatchInfo ex = null;
+        double val = SwiftCallbackFunc17(&SwiftCallbackFunc17Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((double)4214404512040467, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F18_S0
+    {
+        public sbyte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F18_S1
+    {
+        public ushort F0;
+        public short F1;
+        public double F2;
+        public nuint F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F18_S2
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F18_Ret_S0
+    {
+        public short F0;
+
+        public F18_Ret_S0(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F18_Ret
+    {
+        public F18_Ret_S0 F0;
+
+        public F18_Ret(F18_Ret_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func181fAA7F18_RetVAeA0G3_S0V_AA0G3_S1VAA0G3_S2VSus6UInt32Vs5Int64Vs5Int16VSdtXE_tF")]
+    private static extern F18_Ret SwiftCallbackFunc18(delegate* unmanaged[Swift]<F18_S0, F18_S1, F18_S2, nuint, uint, long, short, double, SwiftSelf, F18_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F18_Ret SwiftCallbackFunc18Callback(F18_S0 a0, F18_S1 a1, F18_S2 a2, nuint a3, uint a4, long a5, short a6, double a7, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)106, a0.F0);
+            Assert.Equal((ushort)21619, a1.F0);
+            Assert.Equal((short)-4350, a1.F1);
+            Assert.Equal((double)3457288266203248, a1.F2);
+            Assert.Equal((nuint)unchecked((nuint)9020447812661292883), a1.F3);
+            Assert.Equal((nint)unchecked((nint)2317132584983719004), a2.F0);
+            Assert.Equal((nuint)unchecked((nuint)7379425918918939512), a3);
+            Assert.Equal((uint)2055208746, a4);
+            Assert.Equal((long)1042861174364145790, a5);
+            Assert.Equal((short)28457, a6);
+            Assert.Equal((double)1799004152435515, a7);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F18_Ret(new F18_Ret_S0(-2080));
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc18()
+    {
+        Console.Write("Running SwiftCallbackFunc18: ");
+        ExceptionDispatchInfo ex = null;
+        F18_Ret val = SwiftCallbackFunc18(&SwiftCallbackFunc18Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((short)-2080, val.F0.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F19_S0
+    {
+        public short F0;
+        public sbyte F1;
+        public float F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F19_S1
+    {
+        public long F0;
+        public ushort F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F19_S2
+    {
+        public ulong F0;
+        public long F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F19_S3
+    {
+        public uint F0;
+        public int F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F19_Ret_S0
+    {
+        public long F0;
+
+        public F19_Ret_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 56)]
+    struct F19_Ret
+    {
+        public uint F0;
+        public long F1;
+        public ushort F2;
+        public F19_Ret_S0 F3;
+        public double F4;
+        public double F5;
+        public double F6;
+
+        public F19_Ret(uint f0, long f1, ushort f2, F19_Ret_S0 f3, double f4, double f5, double f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func191fAA7F19_RetVAEs5Int64V_s5UInt8VAA0G3_S0VSiAA0G3_S1Vs5Int32VAOSus6UInt64VAA0G3_S2Vs6UInt16VAA0G3_S3Vs4Int8VAGtXE_tF")]
+    private static extern F19_Ret SwiftCallbackFunc19(delegate* unmanaged[Swift]<long, byte, F19_S0, nint, F19_S1, int, int, nuint, ulong, F19_S2, ushort, F19_S3, sbyte, long, SwiftSelf, F19_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F19_Ret SwiftCallbackFunc19Callback(long a0, byte a1, F19_S0 a2, nint a3, F19_S1 a4, int a5, int a6, nuint a7, ulong a8, F19_S2 a9, ushort a10, F19_S3 a11, sbyte a12, long a13, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((long)7456120134117592143, a0);
+            Assert.Equal((byte)114, a1);
+            Assert.Equal((short)-7583, a2.F0);
+            Assert.Equal((sbyte)97, a2.F1);
+            Assert.Equal((float)2768322, a2.F2);
+            Assert.Equal((nint)unchecked((nint)3605245176125291560), a3);
+            Assert.Equal((long)4445885313084714470, a4.F0);
+            Assert.Equal((ushort)15810, a4.F1);
+            Assert.Equal((int)1179699879, a5);
+            Assert.Equal((int)109603412, a6);
+            Assert.Equal((nuint)unchecked((nuint)6521628547431964799), a7);
+            Assert.Equal((ulong)7687430644226018854, a8);
+            Assert.Equal((ulong)8464855230956039883, a9.F0);
+            Assert.Equal((long)861462819289140037, a9.F1);
+            Assert.Equal((ushort)26519, a10);
+            Assert.Equal((uint)1864602741, a11.F0);
+            Assert.Equal((int)397176384, a11.F1);
+            Assert.Equal((sbyte)81, a12);
+            Assert.Equal((long)4909173176891211442, a13);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F19_Ret(301901837, 5183322153843416979, 16744, new F19_Ret_S0(4587948079871666183), 341974742264104, 750011710367955, 681779256292286);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc19()
+    {
+        Console.Write("Running SwiftCallbackFunc19: ");
+        ExceptionDispatchInfo ex = null;
+        F19_Ret val = SwiftCallbackFunc19(&SwiftCallbackFunc19Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((uint)301901837, val.F0);
+        Assert.Equal((long)5183322153843416979, val.F1);
+        Assert.Equal((ushort)16744, val.F2);
+        Assert.Equal((long)4587948079871666183, val.F3.F0);
+        Assert.Equal((double)341974742264104, val.F4);
+        Assert.Equal((double)750011710367955, val.F5);
+        Assert.Equal((double)681779256292286, val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F20_S0_S0
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F20_S0
+    {
+        public short F0;
+        public nuint F1;
+        public F20_S0_S0 F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F20_S1_S0
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 36)]
+    struct F20_S1
+    {
+        public long F0;
+        public nuint F1;
+        public F20_S1_S0 F2;
+        public long F3;
+        public int F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F20_S2
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F20_Ret
+    {
+        public ushort F0;
+        public ushort F1;
+        public double F2;
+        public short F3;
+        public double F4;
+
+        public F20_Ret(ushort f0, ushort f1, double f2, short f3, double f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func201fAA7F20_RetVAeA0G3_S0V_AA0G3_S1VS2fs4Int8VAA0G3_S2VSftXE_tF")]
+    private static extern F20_Ret SwiftCallbackFunc20(delegate* unmanaged[Swift]<F20_S0, F20_S1, float, float, sbyte, F20_S2, float, SwiftSelf, F20_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F20_Ret SwiftCallbackFunc20Callback(F20_S0 a0, F20_S1 a1, float a2, float a3, sbyte a4, F20_S2 a5, float a6, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((short)28858, a0.F0);
+            Assert.Equal((nuint)unchecked((nuint)7024100299344418039), a0.F1);
+            Assert.Equal((ushort)13025, a0.F2.F0);
+            Assert.Equal((long)7900431324553135989, a1.F0);
+            Assert.Equal((nuint)unchecked((nuint)8131425055682506706), a1.F1);
+            Assert.Equal((float)3884322, a1.F2.F0);
+            Assert.Equal((long)605453501265278638, a1.F3);
+            Assert.Equal((int)353756684, a1.F4);
+            Assert.Equal((float)622319, a2);
+            Assert.Equal((float)1401604, a3);
+            Assert.Equal((sbyte)-101, a4);
+            Assert.Equal((uint)1355570413, a5.F0);
+            Assert.Equal((float)2912776, a6);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F20_Ret(53384, 55736, 105589186779121, -24217, 2181722329638192);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc20()
+    {
+        Console.Write("Running SwiftCallbackFunc20: ");
+        ExceptionDispatchInfo ex = null;
+        F20_Ret val = SwiftCallbackFunc20(&SwiftCallbackFunc20Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ushort)53384, val.F0);
+        Assert.Equal((ushort)55736, val.F1);
+        Assert.Equal((double)105589186779121, val.F2);
+        Assert.Equal((short)-24217, val.F3);
+        Assert.Equal((double)2181722329638192, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F21_S0
+    {
+        public double F0;
+        public ulong F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F21_S1
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F21_Ret
+    {
+        public ushort F0;
+        public uint F1;
+        public long F2;
+
+        public F21_Ret(ushort f0, uint f1, long f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func211fAA7F21_RetVAEs5Int32V_s5Int16VAA0G3_S0VAgA0G3_S1Vs5Int64Vs6UInt32VAOs5UInt8Vs6UInt16VtXE_tF")]
+    private static extern F21_Ret SwiftCallbackFunc21(delegate* unmanaged[Swift]<int, short, F21_S0, int, F21_S1, long, uint, long, byte, ushort, SwiftSelf, F21_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F21_Ret SwiftCallbackFunc21Callback(int a0, short a1, F21_S0 a2, int a3, F21_S1 a4, long a5, uint a6, long a7, byte a8, ushort a9, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((int)256017319, a0);
+            Assert.Equal((short)14555, a1);
+            Assert.Equal((double)2102091966108033, a2.F0);
+            Assert.Equal((ulong)8617538752301505079, a2.F1);
+            Assert.Equal((int)834677431, a3);
+            Assert.Equal((ushort)7043, a4.F0);
+            Assert.Equal((long)7166819734655141128, a5);
+            Assert.Equal((uint)965538086, a6);
+            Assert.Equal((long)3827752442102685645, a7);
+            Assert.Equal((byte)110, a8);
+            Assert.Equal((ushort)33646, a9);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F21_Ret(13904, 1020161192, 7669588951617295307);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc21()
+    {
+        Console.Write("Running SwiftCallbackFunc21: ");
+        ExceptionDispatchInfo ex = null;
+        F21_Ret val = SwiftCallbackFunc21(&SwiftCallbackFunc21Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ushort)13904, val.F0);
+        Assert.Equal((uint)1020161192, val.F1);
+        Assert.Equal((long)7669588951617295307, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F22_S0
+    {
+        public nint F0;
+        public float F1;
+        public double F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F22_S1
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F22_S2
+    {
+        public int F0;
+        public double F1;
+        public float F2;
+        public short F3;
+        public ushort F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F22_S3
+    {
+        public long F0;
+        public ushort F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F22_S4
+    {
+        public double F0;
+        public ushort F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 6)]
+    struct F22_S5
+    {
+        public uint F0;
+        public short F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F22_S6
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F22_Ret
+    {
+        public ushort F0;
+        public short F1;
+        public nuint F2;
+
+        public F22_Ret(ushort f0, short f1, nuint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func221fAA7F22_RetVAEs5Int32V_AA0G3_S0VAA0G3_S1VAA0G3_S2VAA0G3_S3Vs4Int8VAA0G3_S4Vs5UInt8Vs6UInt16Vs5Int64VAA0G3_S5VAYSfAA0G3_S6VAWtXE_tF")]
+    private static extern F22_Ret SwiftCallbackFunc22(delegate* unmanaged[Swift]<int, F22_S0, F22_S1, F22_S2, F22_S3, sbyte, F22_S4, byte, ushort, long, F22_S5, long, float, F22_S6, ushort, SwiftSelf, F22_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F22_Ret SwiftCallbackFunc22Callback(int a0, F22_S0 a1, F22_S1 a2, F22_S2 a3, F22_S3 a4, sbyte a5, F22_S4 a6, byte a7, ushort a8, long a9, F22_S5 a10, long a11, float a12, F22_S6 a13, ushort a14, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((int)640156952, a0);
+            Assert.Equal((nint)unchecked((nint)824774470287401457), a1.F0);
+            Assert.Equal((float)6163704, a1.F1);
+            Assert.Equal((double)54328782764685, a1.F2);
+            Assert.Equal((nuint)unchecked((nuint)1679730195865415747), a2.F0);
+            Assert.Equal((int)1462995665, a3.F0);
+            Assert.Equal((double)2554087365600344, a3.F1);
+            Assert.Equal((float)8193295, a3.F2);
+            Assert.Equal((short)16765, a3.F3);
+            Assert.Equal((ushort)45388, a3.F4);
+            Assert.Equal((long)5560492364570389430, a4.F0);
+            Assert.Equal((ushort)48308, a4.F1);
+            Assert.Equal((sbyte)71, a5);
+            Assert.Equal((double)1639169280741045, a6.F0);
+            Assert.Equal((ushort)12045, a6.F1);
+            Assert.Equal((byte)217, a7);
+            Assert.Equal((ushort)62917, a8);
+            Assert.Equal((long)1465918945905384332, a9);
+            Assert.Equal((uint)1364750179, a10.F0);
+            Assert.Equal((short)3311, a10.F1);
+            Assert.Equal((long)9003480567517966914, a11);
+            Assert.Equal((float)2157327, a12);
+            Assert.Equal((float)6647392, a13.F0);
+            Assert.Equal((ushort)1760, a14);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F22_Ret(39726, 21753, unchecked((nuint)5706055053768469840));
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc22()
+    {
+        Console.Write("Running SwiftCallbackFunc22: ");
+        ExceptionDispatchInfo ex = null;
+        F22_Ret val = SwiftCallbackFunc22(&SwiftCallbackFunc22Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ushort)39726, val.F0);
+        Assert.Equal((short)21753, val.F1);
+        Assert.Equal((nuint)unchecked((nuint)5706055053768469840), val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F23_S0
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F23_S1
+    {
+        public nint F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func231fS2dSu_s5UInt8Vs4Int8VA2eA6F23_S0VSuAA0I3_S1VSdtXE_tF")]
+    private static extern double SwiftCallbackFunc23(delegate* unmanaged[Swift]<nuint, byte, sbyte, byte, byte, F23_S0, nuint, F23_S1, double, SwiftSelf, double> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static double SwiftCallbackFunc23Callback(nuint a0, byte a1, sbyte a2, byte a3, byte a4, F23_S0 a5, nuint a6, F23_S1 a7, double a8, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nuint)unchecked((nuint)5779410841248940897), a0);
+            Assert.Equal((byte)192, a1);
+            Assert.Equal((sbyte)-128, a2);
+            Assert.Equal((byte)133, a3);
+            Assert.Equal((byte)20, a4);
+            Assert.Equal((nint)unchecked((nint)2959916071636885436), a5.F0);
+            Assert.Equal((nuint)unchecked((nuint)3651155214497129159), a6);
+            Assert.Equal((nint)unchecked((nint)8141565342203061885), a7.F0);
+            Assert.Equal((double)1465425469608034, a8);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 893532429511039;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc23()
+    {
+        Console.Write("Running SwiftCallbackFunc23: ");
+        ExceptionDispatchInfo ex = null;
+        double val = SwiftCallbackFunc23(&SwiftCallbackFunc23Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((double)893532429511039, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F24_S0
+    {
+        public sbyte F0;
+        public byte F1;
+        public ulong F2;
+        public uint F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F24_S1
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F24_S2_S0
+    {
+        public ushort F0;
+        public uint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F24_S2_S1
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F24_S2
+    {
+        public nint F0;
+        public uint F1;
+        public F24_S2_S0 F2;
+        public F24_S2_S1 F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F24_S3
+    {
+        public short F0;
+        public float F1;
+        public long F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F24_S4
+    {
+        public byte F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func241fS2fs5Int32V_SuAA6F24_S0Vs6UInt16VAA0H3_S1Vs4Int8VAA0H3_S2Vs6UInt64VAqA0H3_S3VSdAA0H3_S4VtXE_tF")]
+    private static extern float SwiftCallbackFunc24(delegate* unmanaged[Swift]<int, nuint, F24_S0, ushort, F24_S1, sbyte, F24_S2, ulong, ulong, F24_S3, double, F24_S4, SwiftSelf, float> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static float SwiftCallbackFunc24Callback(int a0, nuint a1, F24_S0 a2, ushort a3, F24_S1 a4, sbyte a5, F24_S2 a6, ulong a7, ulong a8, F24_S3 a9, double a10, F24_S4 a11, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((int)1710754874, a0);
+            Assert.Equal((nuint)unchecked((nuint)6447433131978039331), a1);
+            Assert.Equal((sbyte)-92, a2.F0);
+            Assert.Equal((byte)181, a2.F1);
+            Assert.Equal((ulong)3710374263631495948, a2.F2);
+            Assert.Equal((uint)257210428, a2.F3);
+            Assert.Equal((ushort)6631, a3);
+            Assert.Equal((ushort)2303, a4.F0);
+            Assert.Equal((sbyte)15, a5);
+            Assert.Equal((nint)unchecked((nint)2509049432824972381), a6.F0);
+            Assert.Equal((uint)616918672, a6.F1);
+            Assert.Equal((ushort)50635, a6.F2.F0);
+            Assert.Equal((uint)1337844540, a6.F2.F1);
+            Assert.Equal((long)335964796567786281, a6.F3.F0);
+            Assert.Equal((ulong)1114365571136806382, a7);
+            Assert.Equal((ulong)8988425145801188208, a8);
+            Assert.Equal((short)31969, a9.F0);
+            Assert.Equal((float)3008861, a9.F1);
+            Assert.Equal((long)5466306080595269107, a9.F2);
+            Assert.Equal((double)2027780227887952, a10);
+            Assert.Equal((byte)234, a11.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 3470219;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc24()
+    {
+        Console.Write("Running SwiftCallbackFunc24: ");
+        ExceptionDispatchInfo ex = null;
+        float val = SwiftCallbackFunc24(&SwiftCallbackFunc24Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((float)3470219, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F25_S0
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F25_S1
+    {
+        public float F0;
+        public sbyte F1;
+        public float F2;
+        public nint F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 25)]
+    struct F25_S2
+    {
+        public nuint F0;
+        public nuint F1;
+        public long F2;
+        public byte F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F25_S3
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F25_S4
+    {
+        public sbyte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F25_Ret
+    {
+        public ulong F0;
+        public long F1;
+        public byte F2;
+        public ushort F3;
+
+        public F25_Ret(ulong f0, long f1, byte f2, ushort f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func251fAA7F25_RetVAeA0G3_S0V_s6UInt16VSuAA0G3_S1Vs5Int16VAA0G3_S2Vs6UInt64VA2qA0G3_S3VAA0G3_S4VtXE_tF")]
+    private static extern F25_Ret SwiftCallbackFunc25(delegate* unmanaged[Swift]<F25_S0, ushort, nuint, F25_S1, short, F25_S2, ulong, ulong, ulong, F25_S3, F25_S4, SwiftSelf, F25_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F25_Ret SwiftCallbackFunc25Callback(F25_S0 a0, ushort a1, nuint a2, F25_S1 a3, short a4, F25_S2 a5, ulong a6, ulong a7, ulong a8, F25_S3 a9, F25_S4 a10, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nuint)unchecked((nuint)6077761381429658786), a0.F0);
+            Assert.Equal((ushort)2300, a1);
+            Assert.Equal((nuint)unchecked((nuint)3498354181807010234), a2);
+            Assert.Equal((float)5360721, a3.F0);
+            Assert.Equal((sbyte)-40, a3.F1);
+            Assert.Equal((float)109485, a3.F2);
+            Assert.Equal((nint)unchecked((nint)2311625789899959825), a3.F3);
+            Assert.Equal((short)-28395, a4);
+            Assert.Equal((nuint)unchecked((nuint)8729509817732080529), a5.F0);
+            Assert.Equal((nuint)unchecked((nuint)860365359368130822), a5.F1);
+            Assert.Equal((long)7498894262834346040, a5.F2);
+            Assert.Equal((byte)218, a5.F3);
+            Assert.Equal((ulong)961687210282504701, a6);
+            Assert.Equal((ulong)7184177441364400868, a7);
+            Assert.Equal((ulong)8389319500274436977, a8);
+            Assert.Equal((float)4437173, a9.F0);
+            Assert.Equal((sbyte)-107, a10.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F25_Ret(8006862079710523876, 7879510716857855733, 114, 3220);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc25()
+    {
+        Console.Write("Running SwiftCallbackFunc25: ");
+        ExceptionDispatchInfo ex = null;
+        F25_Ret val = SwiftCallbackFunc25(&SwiftCallbackFunc25Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ulong)8006862079710523876, val.F0);
+        Assert.Equal((long)7879510716857855733, val.F1);
+        Assert.Equal((byte)114, val.F2);
+        Assert.Equal((ushort)3220, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F26_S0
+    {
+        public sbyte F0;
+        public nint F1;
+        public byte F2;
+        public byte F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F26_S1_S0
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F26_S1
+    {
+        public sbyte F0;
+        public int F1;
+        public short F2;
+        public F26_S1_S0 F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F26_S2
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F26_S3
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct F26_Ret
+    {
+        public nuint F0;
+        public byte F1;
+
+        public F26_Ret(nuint f0, byte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func261fAA7F26_RetVAEs4Int8V_s5UInt8Vs6UInt32VAA0G3_S0VAA0G3_S1VAA0G3_S2VAA0G3_S3VtXE_tF")]
+    private static extern F26_Ret SwiftCallbackFunc26(delegate* unmanaged[Swift]<sbyte, byte, uint, F26_S0, F26_S1, F26_S2, F26_S3, SwiftSelf, F26_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F26_Ret SwiftCallbackFunc26Callback(sbyte a0, byte a1, uint a2, F26_S0 a3, F26_S1 a4, F26_S2 a5, F26_S3 a6, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)-16, a0);
+            Assert.Equal((byte)220, a1);
+            Assert.Equal((uint)72386567, a2);
+            Assert.Equal((sbyte)-33, a3.F0);
+            Assert.Equal((nint)unchecked((nint)6488877286424796715), a3.F1);
+            Assert.Equal((byte)143, a3.F2);
+            Assert.Equal((byte)74, a3.F3);
+            Assert.Equal((sbyte)104, a4.F0);
+            Assert.Equal((int)1719453315, a4.F1);
+            Assert.Equal((short)20771, a4.F2);
+            Assert.Equal((ulong)3636117595999837800, a4.F3.F0);
+            Assert.Equal((long)2279530426119665839, a5.F0);
+            Assert.Equal((byte)207, a6.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F26_Ret(unchecked((nuint)1050319650554930471), 89);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc26()
+    {
+        Console.Write("Running SwiftCallbackFunc26: ");
+        ExceptionDispatchInfo ex = null;
+        F26_Ret val = SwiftCallbackFunc26(&SwiftCallbackFunc26Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nuint)unchecked((nuint)1050319650554930471), val.F0);
+        Assert.Equal((byte)89, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F27_S0
+    {
+        public short F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 3)]
+    struct F27_S1_S0
+    {
+        public ushort F0;
+        public sbyte F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F27_S1
+    {
+        public long F0;
+        public F27_S1_S0 F1;
+        public float F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F27_S2
+    {
+        public ulong F0;
+        public sbyte F1;
+        public uint F2;
+        public long F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F27_S3_S0
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F27_S3
+    {
+        public F27_S3_S0 F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func271fS2fs6UInt64V_s5UInt8VAA6F27_S0VA2gA0I3_S1Vs5Int32VAA0I3_S2VSis6UInt32VAA0I3_S3VtXE_tF")]
+    private static extern float SwiftCallbackFunc27(delegate* unmanaged[Swift]<ulong, byte, F27_S0, byte, byte, F27_S1, int, F27_S2, nint, uint, F27_S3, SwiftSelf, float> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static float SwiftCallbackFunc27Callback(ulong a0, byte a1, F27_S0 a2, byte a3, byte a4, F27_S1 a5, int a6, F27_S2 a7, nint a8, uint a9, F27_S3 a10, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ulong)4847421047018330189, a0);
+            Assert.Equal((byte)214, a1);
+            Assert.Equal((short)31313, a2.F0);
+            Assert.Equal((byte)207, a3);
+            Assert.Equal((byte)174, a4);
+            Assert.Equal((long)4476120319602257660, a5.F0);
+            Assert.Equal((ushort)26662, a5.F1.F0);
+            Assert.Equal((sbyte)-55, a5.F1.F1);
+            Assert.Equal((float)70666, a5.F2);
+            Assert.Equal((int)1340306103, a6);
+            Assert.Equal((ulong)2772939788297637999, a7.F0);
+            Assert.Equal((sbyte)-65, a7.F1);
+            Assert.Equal((uint)7500441, a7.F2);
+            Assert.Equal((long)4926907273817562134, a7.F3);
+            Assert.Equal((nint)unchecked((nint)5862689255099071258), a8);
+            Assert.Equal((uint)1077270996, a9);
+            Assert.Equal((ushort)35167, a10.F0.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 8117856;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc27()
+    {
+        Console.Write("Running SwiftCallbackFunc27: ");
+        ExceptionDispatchInfo ex = null;
+        float val = SwiftCallbackFunc27(&SwiftCallbackFunc27Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((float)8117856, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct F28_S0
+    {
+        public ulong F0;
+        public sbyte F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F28_S1
+    {
+        public long F0;
+        public nuint F1;
+        public nint F2;
+        public int F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F28_S2
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F28_S3
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F28_Ret_S0
+    {
+        public float F0;
+
+        public F28_Ret_S0(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 6)]
+    struct F28_Ret
+    {
+        public F28_Ret_S0 F0;
+        public ushort F1;
+
+        public F28_Ret(F28_Ret_S0 f0, ushort f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func281fAA7F28_RetVAEs6UInt32V_s6UInt16Vs4Int8VAkISfAA0G3_S0VSds6UInt64VAA0G3_S1VAA0G3_S2VAA0G3_S3VtXE_tF")]
+    private static extern F28_Ret SwiftCallbackFunc28(delegate* unmanaged[Swift]<uint, ushort, sbyte, sbyte, ushort, float, F28_S0, double, ulong, F28_S1, F28_S2, F28_S3, SwiftSelf, F28_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F28_Ret SwiftCallbackFunc28Callback(uint a0, ushort a1, sbyte a2, sbyte a3, ushort a4, float a5, F28_S0 a6, double a7, ulong a8, F28_S1 a9, F28_S2 a10, F28_S3 a11, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((uint)893827094, a0);
+            Assert.Equal((ushort)38017, a1);
+            Assert.Equal((sbyte)-90, a2);
+            Assert.Equal((sbyte)-1, a3);
+            Assert.Equal((ushort)16109, a4);
+            Assert.Equal((float)5844449, a5);
+            Assert.Equal((ulong)176269147098539470, a6.F0);
+            Assert.Equal((sbyte)23, a6.F1);
+            Assert.Equal((double)1431426259441210, a7);
+            Assert.Equal((ulong)6103261251702315645, a8);
+            Assert.Equal((long)3776818122826483419, a9.F0);
+            Assert.Equal((nuint)unchecked((nuint)9181420263296840471), a9.F1);
+            Assert.Equal((nint)unchecked((nint)3281861424961082542), a9.F2);
+            Assert.Equal((int)1442905253, a9.F3);
+            Assert.Equal((nint)unchecked((nint)8760009193798370900), a10.F0);
+            Assert.Equal((long)7119917900929398683, a11.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F28_Ret(new F28_Ret_S0(4515425), 25944);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc28()
+    {
+        Console.Write("Running SwiftCallbackFunc28: ");
+        ExceptionDispatchInfo ex = null;
+        F28_Ret val = SwiftCallbackFunc28(&SwiftCallbackFunc28Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((float)4515425, val.F0.F0);
+        Assert.Equal((ushort)25944, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F29_S0
+    {
+        public byte F0;
+        public double F1;
+        public ushort F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F29_S1
+    {
+        public uint F0;
+        public nint F1;
+        public ulong F2;
+        public uint F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F29_S2
+    {
+        public int F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F29_S3
+    {
+        public uint F0;
+        public uint F1;
+        public float F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F29_S4
+    {
+        public int F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F29_Ret_S0
+    {
+        public nint F0;
+        public ulong F1;
+
+        public F29_Ret_S0(nint f0, ulong f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 52)]
+    struct F29_Ret
+    {
+        public nuint F0;
+        public nuint F1;
+        public nuint F2;
+        public F29_Ret_S0 F3;
+        public ulong F4;
+        public uint F5;
+
+        public F29_Ret(nuint f0, nuint f1, nuint f2, F29_Ret_S0 f3, ulong f4, uint f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func291fAA7F29_RetVAeA0G3_S0V_Sis6UInt64Vs5UInt8Vs5Int64VAKSiAA0G3_S1Vs5Int32Vs4Int8VAkiA0G3_S2VAA0G3_S3Vs5Int16VAA0G3_S4Vs6UInt32VtXE_tF")]
+    private static extern F29_Ret SwiftCallbackFunc29(delegate* unmanaged[Swift]<F29_S0, nint, ulong, byte, long, byte, nint, F29_S1, int, sbyte, byte, ulong, F29_S2, F29_S3, short, F29_S4, uint, SwiftSelf, F29_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F29_Ret SwiftCallbackFunc29Callback(F29_S0 a0, nint a1, ulong a2, byte a3, long a4, byte a5, nint a6, F29_S1 a7, int a8, sbyte a9, byte a10, ulong a11, F29_S2 a12, F29_S3 a13, short a14, F29_S4 a15, uint a16, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((byte)152, a0.F0);
+            Assert.Equal((double)737900189383874, a0.F1);
+            Assert.Equal((ushort)33674, a0.F2);
+            Assert.Equal((nint)unchecked((nint)5162040247631126074), a1);
+            Assert.Equal((ulong)6524156301721885895, a2);
+            Assert.Equal((byte)129, a3);
+            Assert.Equal((long)6661424933974053497, a4);
+            Assert.Equal((byte)145, a5);
+            Assert.Equal((nint)unchecked((nint)7521422786615537370), a6);
+            Assert.Equal((uint)1361601345, a7.F0);
+            Assert.Equal((nint)unchecked((nint)3366726213840694614), a7.F1);
+            Assert.Equal((ulong)7767610514138029164, a7.F2);
+            Assert.Equal((uint)1266864987, a7.F3);
+            Assert.Equal((int)1115803878, a8);
+            Assert.Equal((sbyte)5, a9);
+            Assert.Equal((byte)80, a10);
+            Assert.Equal((ulong)2041754562738600205, a11);
+            Assert.Equal((int)1492686870, a12.F0);
+            Assert.Equal((uint)142491811, a13.F0);
+            Assert.Equal((uint)1644962309, a13.F1);
+            Assert.Equal((float)1905811, a13.F2);
+            Assert.Equal((short)-3985, a14);
+            Assert.Equal((int)1921386549, a15.F0);
+            Assert.Equal((uint)1510666400, a16);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F29_Ret(unchecked((nuint)1866868811776234672), unchecked((nuint)8169323498884891375), unchecked((nuint)2528257272266524428), new F29_Ret_S0(unchecked((nint)4705260670026405131), 8299241689326234556), 4459635217352912270, 188636136);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc29()
+    {
+        Console.Write("Running SwiftCallbackFunc29: ");
+        ExceptionDispatchInfo ex = null;
+        F29_Ret val = SwiftCallbackFunc29(&SwiftCallbackFunc29Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nuint)unchecked((nuint)1866868811776234672), val.F0);
+        Assert.Equal((nuint)unchecked((nuint)8169323498884891375), val.F1);
+        Assert.Equal((nuint)unchecked((nuint)2528257272266524428), val.F2);
+        Assert.Equal((nint)unchecked((nint)4705260670026405131), val.F3.F0);
+        Assert.Equal((ulong)8299241689326234556, val.F3.F1);
+        Assert.Equal((ulong)4459635217352912270, val.F4);
+        Assert.Equal((uint)188636136, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 7)]
+    struct F30_S0
+    {
+        public ushort F0;
+        public short F1;
+        public short F2;
+        public sbyte F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F30_S1
+    {
+        public ushort F0;
+        public nuint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F30_S2
+    {
+        public long F0;
+        public sbyte F1;
+        public ushort F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F30_S3
+    {
+        public sbyte F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func301fS2fAA6F30_S0V_AA0G3_S1VAA0G3_S2VAA0G3_S3VSitXE_tF")]
+    private static extern float SwiftCallbackFunc30(delegate* unmanaged[Swift]<F30_S0, F30_S1, F30_S2, F30_S3, nint, SwiftSelf, float> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static float SwiftCallbackFunc30Callback(F30_S0 a0, F30_S1 a1, F30_S2 a2, F30_S3 a3, nint a4, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ushort)50723, a0.F0);
+            Assert.Equal((short)19689, a0.F1);
+            Assert.Equal((short)-6469, a0.F2);
+            Assert.Equal((sbyte)83, a0.F3);
+            Assert.Equal((ushort)51238, a1.F0);
+            Assert.Equal((nuint)unchecked((nuint)5879147675377398012), a1.F1);
+            Assert.Equal((long)7909999288286190848, a2.F0);
+            Assert.Equal((sbyte)-99, a2.F1);
+            Assert.Equal((ushort)61385, a2.F2);
+            Assert.Equal((sbyte)48, a3.F0);
+            Assert.Equal((nint)unchecked((nint)2980085298293056148), a4);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 289587;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc30()
+    {
+        Console.Write("Running SwiftCallbackFunc30: ");
+        ExceptionDispatchInfo ex = null;
+        float val = SwiftCallbackFunc30(&SwiftCallbackFunc30Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((float)289587, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F31_S0
+    {
+        public int F0;
+        public ulong F1;
+        public nuint F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F31_Ret_S0
+    {
+        public uint F0;
+        public float F1;
+        public ushort F2;
+        public short F3;
+        public float F4;
+
+        public F31_Ret_S0(uint f0, float f1, ushort f2, short f3, float f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F31_Ret
+    {
+        public F31_Ret_S0 F0;
+        public ushort F1;
+
+        public F31_Ret(F31_Ret_S0 f0, ushort f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func311fAA7F31_RetVAeA0G3_S0V_SdtXE_tF")]
+    private static extern F31_Ret SwiftCallbackFunc31(delegate* unmanaged[Swift]<F31_S0, double, SwiftSelf, F31_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F31_Ret SwiftCallbackFunc31Callback(F31_S0 a0, double a1, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((int)1072945099, a0.F0);
+            Assert.Equal((ulong)5760996810500287322, a0.F1);
+            Assert.Equal((nuint)unchecked((nuint)3952909367135409979), a0.F2);
+            Assert.Equal((double)2860786541632685, a1);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F31_Ret(new F31_Ret_S0(1236856932, 1761447, 1260, 25704, 6212541), 44632);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc31()
+    {
+        Console.Write("Running SwiftCallbackFunc31: ");
+        ExceptionDispatchInfo ex = null;
+        F31_Ret val = SwiftCallbackFunc31(&SwiftCallbackFunc31Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((uint)1236856932, val.F0.F0);
+        Assert.Equal((float)1761447, val.F0.F1);
+        Assert.Equal((ushort)1260, val.F0.F2);
+        Assert.Equal((short)25704, val.F0.F3);
+        Assert.Equal((float)6212541, val.F0.F4);
+        Assert.Equal((ushort)44632, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F32_Ret
+    {
+        public nuint F0;
+        public double F1;
+        public nint F2;
+
+        public F32_Ret(nuint f0, double f1, nint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func321fAA7F32_RetVAEs6UInt16V_s5Int16VtXE_tF")]
+    private static extern F32_Ret SwiftCallbackFunc32(delegate* unmanaged[Swift]<ushort, short, SwiftSelf, F32_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F32_Ret SwiftCallbackFunc32Callback(ushort a0, short a1, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ushort)21020, a0);
+            Assert.Equal((short)7462, a1);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F32_Ret(unchecked((nuint)868833742355713000), 411817582525317, unchecked((nint)3926422244180816571));
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc32()
+    {
+        Console.Write("Running SwiftCallbackFunc32: ");
+        ExceptionDispatchInfo ex = null;
+        F32_Ret val = SwiftCallbackFunc32(&SwiftCallbackFunc32Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nuint)unchecked((nuint)868833742355713000), val.F0);
+        Assert.Equal((double)411817582525317, val.F1);
+        Assert.Equal((nint)unchecked((nint)3926422244180816571), val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F33_S0
+    {
+        public short F0;
+        public ulong F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F33_S1_S0
+    {
+        public short F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F33_S1
+    {
+        public F33_S1_S0 F0;
+        public uint F1;
+        public nuint F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F33_S2
+    {
+        public uint F0;
+        public ulong F1;
+        public sbyte F2;
+        public sbyte F3;
+        public nuint F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F33_S3_S0_S0
+    {
+        public short F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F33_S3_S0
+    {
+        public F33_S3_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F33_S3
+    {
+        public F33_S3_S0 F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func331fS2uAA6F33_S0V_SfAA0G3_S1Vs6UInt32VSis4Int8VAKSfs5UInt8VSfAkA0G3_S2VSiAA0G3_S3VSiAItXE_tF")]
+    private static extern nuint SwiftCallbackFunc33(delegate* unmanaged[Swift]<F33_S0, float, F33_S1, uint, nint, sbyte, sbyte, float, byte, float, sbyte, F33_S2, nint, F33_S3, nint, uint, SwiftSelf, nuint> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static nuint SwiftCallbackFunc33Callback(F33_S0 a0, float a1, F33_S1 a2, uint a3, nint a4, sbyte a5, sbyte a6, float a7, byte a8, float a9, sbyte a10, F33_S2 a11, nint a12, F33_S3 a13, nint a14, uint a15, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((short)-23471, a0.F0);
+            Assert.Equal((ulong)2736941806609505888, a0.F1);
+            Assert.Equal((float)6930550, a1);
+            Assert.Equal((short)32476, a2.F0.F0);
+            Assert.Equal((uint)165441961, a2.F1);
+            Assert.Equal((nuint)unchecked((nuint)3890227499323387948), a2.F2);
+            Assert.Equal((uint)591524870, a3);
+            Assert.Equal((nint)unchecked((nint)1668420058132495503), a4);
+            Assert.Equal((sbyte)-67, a5);
+            Assert.Equal((sbyte)94, a6);
+            Assert.Equal((float)3180786, a7);
+            Assert.Equal((byte)42, a8);
+            Assert.Equal((float)7674952, a9);
+            Assert.Equal((sbyte)43, a10);
+            Assert.Equal((uint)771356149, a11.F0);
+            Assert.Equal((ulong)3611576949210389997, a11.F1);
+            Assert.Equal((sbyte)-15, a11.F2);
+            Assert.Equal((sbyte)7, a11.F3);
+            Assert.Equal((nuint)unchecked((nuint)2577587324978560192), a11.F4);
+            Assert.Equal((nint)unchecked((nint)8266150294848599489), a12);
+            Assert.Equal((short)9216, a13.F0.F0.F0);
+            Assert.Equal((nint)unchecked((nint)710302565025364450), a14);
+            Assert.Equal((uint)1060812904, a15);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return unchecked((nuint)8322391372382633712);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc33()
+    {
+        Console.Write("Running SwiftCallbackFunc33: ");
+        ExceptionDispatchInfo ex = null;
+        nuint val = SwiftCallbackFunc33(&SwiftCallbackFunc33Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nuint)unchecked((nuint)8322391372382633712), val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F34_S0_S0
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F34_S0
+    {
+        public F34_S0_S0 F0;
+        public nuint F1;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func341fs6UInt16VAEs6UInt32V_AA6F34_S0VSus5Int16VtXE_tF")]
+    private static extern ushort SwiftCallbackFunc34(delegate* unmanaged[Swift]<uint, F34_S0, nuint, short, SwiftSelf, ushort> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static ushort SwiftCallbackFunc34Callback(uint a0, F34_S0 a1, nuint a2, short a3, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((uint)2068009847, a0);
+            Assert.Equal((uint)845123292, a1.F0.F0);
+            Assert.Equal((nuint)unchecked((nuint)5148244462913472487), a1.F1);
+            Assert.Equal((nuint)unchecked((nuint)8632568386462910655), a2);
+            Assert.Equal((short)7058, a3);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 20647;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc34()
+    {
+        Console.Write("Running SwiftCallbackFunc34: ");
+        ExceptionDispatchInfo ex = null;
+        ushort val = SwiftCallbackFunc34(&SwiftCallbackFunc34Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ushort)20647, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F35_S0_S0_S0
+    {
+        public int F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F35_S0_S0
+    {
+        public long F0;
+        public F35_S0_S0_S0 F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F35_S0_S1
+    {
+        public double F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F35_S0
+    {
+        public F35_S0_S0 F0;
+        public int F1;
+        public F35_S0_S1 F2;
+        public nint F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F35_S1
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F35_S2_S0
+    {
+        public double F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F35_S2
+    {
+        public F35_S2_S0 F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func351fs6UInt64VAEs5UInt8V_s4Int8VSfs5Int64VSiAA6F35_S0VAA0K3_S1VAA0K3_S2VtXE_tF")]
+    private static extern ulong SwiftCallbackFunc35(delegate* unmanaged[Swift]<byte, sbyte, float, long, nint, F35_S0, F35_S1, F35_S2, SwiftSelf, ulong> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static ulong SwiftCallbackFunc35Callback(byte a0, sbyte a1, float a2, long a3, nint a4, F35_S0 a5, F35_S1 a6, F35_S2 a7, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((byte)182, a0);
+            Assert.Equal((sbyte)-16, a1);
+            Assert.Equal((float)7763558, a2);
+            Assert.Equal((long)5905028570860904693, a3);
+            Assert.Equal((nint)unchecked((nint)5991001624972063224), a4);
+            Assert.Equal((long)6663912001709962059, a5.F0.F0);
+            Assert.Equal((int)1843939591, a5.F0.F1.F0);
+            Assert.Equal((int)1095170337, a5.F1);
+            Assert.Equal((double)3908756332193409, a5.F2.F0);
+            Assert.Equal((nint)unchecked((nint)8246190362462442203), a5.F3);
+            Assert.Equal((ushort)52167, a6.F0);
+            Assert.Equal((double)283499999631068, a7.F0.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 4329482286317894385;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc35()
+    {
+        Console.Write("Running SwiftCallbackFunc35: ");
+        ExceptionDispatchInfo ex = null;
+        ulong val = SwiftCallbackFunc35(&SwiftCallbackFunc35Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ulong)4329482286317894385, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F36_S0
+    {
+        public uint F0;
+        public long F1;
+        public byte F2;
+        public nuint F3;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func361fS2iSu_SdSus5UInt8Vs5Int64VAA6F36_S0Vs4Int8VtXE_tF")]
+    private static extern nint SwiftCallbackFunc36(delegate* unmanaged[Swift]<nuint, double, nuint, byte, long, F36_S0, sbyte, SwiftSelf, nint> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static nint SwiftCallbackFunc36Callback(nuint a0, double a1, nuint a2, byte a3, long a4, F36_S0 a5, sbyte a6, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nuint)unchecked((nuint)5079603407518207003), a0);
+            Assert.Equal((double)2365862518115571, a1);
+            Assert.Equal((nuint)unchecked((nuint)6495651757722767835), a2);
+            Assert.Equal((byte)46, a3);
+            Assert.Equal((long)1550138390178394449, a4);
+            Assert.Equal((uint)1858960269, a5.F0);
+            Assert.Equal((long)1925263848394986294, a5.F1);
+            Assert.Equal((byte)217, a5.F2);
+            Assert.Equal((nuint)unchecked((nuint)8520779488644482307), a5.F3);
+            Assert.Equal((sbyte)-83, a6);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return unchecked((nint)2889858798271230534);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc36()
+    {
+        Console.Write("Running SwiftCallbackFunc36: ");
+        ExceptionDispatchInfo ex = null;
+        nint val = SwiftCallbackFunc36(&SwiftCallbackFunc36Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)2889858798271230534), val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F37_S0_S0
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F37_S0
+    {
+        public nuint F0;
+        public uint F1;
+        public F37_S0_S0 F2;
+        public float F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F37_S1
+    {
+        public nuint F0;
+        public uint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F37_S2
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F37_Ret
+    {
+        public float F0;
+        public byte F1;
+        public short F2;
+        public ulong F3;
+
+        public F37_Ret(float f0, byte f1, short f2, ulong f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func371fAA7F37_RetVAEs6UInt64V_AA0G3_S0VSds6UInt16VAA0G3_S1VAA0G3_S2VtXE_tF")]
+    private static extern F37_Ret SwiftCallbackFunc37(delegate* unmanaged[Swift]<ulong, F37_S0, double, ushort, F37_S1, F37_S2, SwiftSelf, F37_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F37_Ret SwiftCallbackFunc37Callback(ulong a0, F37_S0 a1, double a2, ushort a3, F37_S1 a4, F37_S2 a5, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ulong)1623104856688575867, a0);
+            Assert.Equal((nuint)unchecked((nuint)3785544303342575322), a1.F0);
+            Assert.Equal((uint)717682682, a1.F1);
+            Assert.Equal((nint)unchecked((nint)2674933748436691896), a1.F2.F0);
+            Assert.Equal((float)3211458, a1.F3);
+            Assert.Equal((double)996705046384579, a2);
+            Assert.Equal((ushort)8394, a3);
+            Assert.Equal((nuint)unchecked((nuint)1048947722954084863), a4.F0);
+            Assert.Equal((uint)252415487, a4.F1);
+            Assert.Equal((ushort)3664, a5.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F37_Ret(433224, 163, -5538, 4525229514824359136);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc37()
+    {
+        Console.Write("Running SwiftCallbackFunc37: ");
+        ExceptionDispatchInfo ex = null;
+        F37_Ret val = SwiftCallbackFunc37(&SwiftCallbackFunc37Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((float)433224, val.F0);
+        Assert.Equal((byte)163, val.F1);
+        Assert.Equal((short)-5538, val.F2);
+        Assert.Equal((ulong)4525229514824359136, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F38_S0_S0
+    {
+        public nint F0;
+        public float F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F38_S0
+    {
+        public F38_S0_S0 F0;
+        public ushort F1;
+        public int F2;
+        public float F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F38_S1
+    {
+        public short F0;
+        public int F1;
+        public uint F2;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func381fS2dAA6F38_S0V_AA0G3_S1VSds5Int16Vs4Int8Vs6UInt32VAISfSiSfAMs5UInt8VSdAKtXE_tF")]
+    private static extern double SwiftCallbackFunc38(delegate* unmanaged[Swift]<F38_S0, F38_S1, double, short, sbyte, uint, short, float, nint, float, uint, byte, double, sbyte, SwiftSelf, double> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static double SwiftCallbackFunc38Callback(F38_S0 a0, F38_S1 a1, double a2, short a3, sbyte a4, uint a5, short a6, float a7, nint a8, float a9, uint a10, byte a11, double a12, sbyte a13, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nint)unchecked((nint)7389960750529773276), a0.F0.F0);
+            Assert.Equal((float)4749108, a0.F0.F1);
+            Assert.Equal((ushort)54323, a0.F1);
+            Assert.Equal((int)634649910, a0.F2);
+            Assert.Equal((float)83587, a0.F3);
+            Assert.Equal((short)-15547, a1.F0);
+            Assert.Equal((int)1747384081, a1.F1);
+            Assert.Equal((uint)851987981, a1.F2);
+            Assert.Equal((double)3543874366683681, a2);
+            Assert.Equal((short)5045, a3);
+            Assert.Equal((sbyte)-32, a4);
+            Assert.Equal((uint)2084540698, a5);
+            Assert.Equal((short)25583, a6);
+            Assert.Equal((float)3158067, a7);
+            Assert.Equal((nint)unchecked((nint)1655263182833369283), a8);
+            Assert.Equal((float)829404, a9);
+            Assert.Equal((uint)1888859844, a10);
+            Assert.Equal((byte)153, a11);
+            Assert.Equal((double)222366180309763, a12);
+            Assert.Equal((sbyte)61, a13);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 2529010496939244;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc38()
+    {
+        Console.Write("Running SwiftCallbackFunc38: ");
+        ExceptionDispatchInfo ex = null;
+        double val = SwiftCallbackFunc38(&SwiftCallbackFunc38Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((double)2529010496939244, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F39_S0_S0
+    {
+        public short F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F39_S0_S1
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F39_S0
+    {
+        public F39_S0_S0 F0;
+        public int F1;
+        public F39_S0_S1 F2;
+        public nuint F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F39_S1
+    {
+        public ushort F0;
+        public byte F1;
+        public float F2;
+        public long F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F39_S2
+    {
+        public int F0;
+        public float F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F39_S3
+    {
+        public uint F0;
+        public nint F1;
+        public nint F2;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func391fS2iAA6F39_S0V_Sus6UInt32VSdAA0G3_S1VAA0G3_S2Vs4Int8VAA0G3_S3Vs5Int32Vs6UInt64Vs5UInt8VtXE_tF")]
+    private static extern nint SwiftCallbackFunc39(delegate* unmanaged[Swift]<F39_S0, nuint, uint, double, F39_S1, F39_S2, sbyte, F39_S3, int, ulong, byte, SwiftSelf, nint> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static nint SwiftCallbackFunc39Callback(F39_S0 a0, nuint a1, uint a2, double a3, F39_S1 a4, F39_S2 a5, sbyte a6, F39_S3 a7, int a8, ulong a9, byte a10, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((short)-31212, a0.F0.F0);
+            Assert.Equal((int)1623216479, a0.F1);
+            Assert.Equal((ushort)7181, a0.F2.F0);
+            Assert.Equal((nuint)unchecked((nuint)8643545152918150186), a0.F3);
+            Assert.Equal((nuint)unchecked((nuint)799631211988519637), a1);
+            Assert.Equal((uint)94381581, a2);
+            Assert.Equal((double)761127371030426, a3);
+            Assert.Equal((ushort)417, a4.F0);
+            Assert.Equal((byte)85, a4.F1);
+            Assert.Equal((float)1543931, a4.F2);
+            Assert.Equal((long)3918460222899735322, a4.F3);
+            Assert.Equal((int)883468300, a5.F0);
+            Assert.Equal((float)2739152, a5.F1);
+            Assert.Equal((sbyte)-94, a6);
+            Assert.Equal((uint)1374766954, a7.F0);
+            Assert.Equal((nint)unchecked((nint)2042223450490396789), a7.F1);
+            Assert.Equal((nint)unchecked((nint)2672454113535023130), a7.F2);
+            Assert.Equal((int)946259065, a8);
+            Assert.Equal((ulong)6805548458517673751, a9);
+            Assert.Equal((byte)61, a10);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return unchecked((nint)3023907365579871618);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc39()
+    {
+        Console.Write("Running SwiftCallbackFunc39: ");
+        ExceptionDispatchInfo ex = null;
+        nint val = SwiftCallbackFunc39(&SwiftCallbackFunc39Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)3023907365579871618), val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F40_S0
+    {
+        public short F0;
+        public int F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F40_S1
+    {
+        public int F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 25)]
+    struct F40_S2
+    {
+        public long F0;
+        public ushort F1;
+        public nint F2;
+        public byte F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F40_S3_S0
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F40_S3
+    {
+        public nuint F0;
+        public double F1;
+        public F40_S3_S0 F2;
+        public double F3;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func401fS2uAA6F40_S0V_s6UInt32Vs5UInt8VAA0G3_S1VAA0G3_S2Vs6UInt64VSuAOSis6UInt16VAgA0G3_S3VSutXE_tF")]
+    private static extern nuint SwiftCallbackFunc40(delegate* unmanaged[Swift]<F40_S0, uint, byte, F40_S1, F40_S2, ulong, nuint, ulong, nint, ushort, uint, F40_S3, nuint, SwiftSelf, nuint> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static nuint SwiftCallbackFunc40Callback(F40_S0 a0, uint a1, byte a2, F40_S1 a3, F40_S2 a4, ulong a5, nuint a6, ulong a7, nint a8, ushort a9, uint a10, F40_S3 a11, nuint a12, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((short)22601, a0.F0);
+            Assert.Equal((int)312892872, a0.F1);
+            Assert.Equal((uint)1040102825, a1);
+            Assert.Equal((byte)56, a2);
+            Assert.Equal((int)101203812, a3.F0);
+            Assert.Equal((long)4298883321494088257, a4.F0);
+            Assert.Equal((ushort)2095, a4.F1);
+            Assert.Equal((nint)unchecked((nint)1536552108568739270), a4.F2);
+            Assert.Equal((byte)220, a4.F3);
+            Assert.Equal((ulong)2564624804830565018, a5);
+            Assert.Equal((nuint)unchecked((nuint)173855559108584219), a6);
+            Assert.Equal((ulong)6222832940831380264, a7);
+            Assert.Equal((nint)unchecked((nint)1898370824516510398), a8);
+            Assert.Equal((ushort)3352, a9);
+            Assert.Equal((uint)1643571476, a10);
+            Assert.Equal((nuint)unchecked((nuint)7940054758811932961), a11.F0);
+            Assert.Equal((double)246670432251533, a11.F1);
+            Assert.Equal((float)7890596, a11.F2.F0);
+            Assert.Equal((double)1094140965415232, a11.F3);
+            Assert.Equal((nuint)unchecked((nuint)2081923113238309816), a12);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return unchecked((nuint)4616766375038360400);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc40()
+    {
+        Console.Write("Running SwiftCallbackFunc40: ");
+        ExceptionDispatchInfo ex = null;
+        nuint val = SwiftCallbackFunc40(&SwiftCallbackFunc40Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nuint)unchecked((nuint)4616766375038360400), val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F41_S0
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F41_Ret
+    {
+        public ulong F0;
+        public double F1;
+        public uint F2;
+        public uint F3;
+
+        public F41_Ret(ulong f0, double f1, uint f2, uint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func411fAA7F41_RetVAeA0G3_S0VXE_tF")]
+    private static extern F41_Ret SwiftCallbackFunc41(delegate* unmanaged[Swift]<F41_S0, SwiftSelf, F41_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F41_Ret SwiftCallbackFunc41Callback(F41_S0 a0, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((uint)1430200072, a0.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F41_Ret(5150172797708870426, 3489330932479773, 833949606, 2098665090);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc41()
+    {
+        Console.Write("Running SwiftCallbackFunc41: ");
+        ExceptionDispatchInfo ex = null;
+        F41_Ret val = SwiftCallbackFunc41(&SwiftCallbackFunc41Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ulong)5150172797708870426, val.F0);
+        Assert.Equal((double)3489330932479773, val.F1);
+        Assert.Equal((uint)833949606, val.F2);
+        Assert.Equal((uint)2098665090, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F42_S0_S0
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F42_S0
+    {
+        public F42_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F42_S1
+    {
+        public uint F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func421fS2is5Int32V_s6UInt32VAA6F42_S0VSfs5UInt8VAA0I3_S1VtXE_tF")]
+    private static extern nint SwiftCallbackFunc42(delegate* unmanaged[Swift]<int, uint, F42_S0, float, byte, F42_S1, SwiftSelf, nint> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static nint SwiftCallbackFunc42Callback(int a0, uint a1, F42_S0 a2, float a3, byte a4, F42_S1 a5, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((int)1046060439, a0);
+            Assert.Equal((uint)1987212952, a1);
+            Assert.Equal((nint)unchecked((nint)4714080408858753964), a2.F0.F0);
+            Assert.Equal((float)2364146, a3);
+            Assert.Equal((byte)25, a4);
+            Assert.Equal((uint)666986488, a5.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return unchecked((nint)4147856807670154637);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc42()
+    {
+        Console.Write("Running SwiftCallbackFunc42: ");
+        ExceptionDispatchInfo ex = null;
+        nint val = SwiftCallbackFunc42(&SwiftCallbackFunc42Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)4147856807670154637), val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F43_S0
+    {
+        public int F0;
+        public int F1;
+        public nint F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F43_S1
+    {
+        public sbyte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F43_Ret
+    {
+        public ushort F0;
+
+        public F43_Ret(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func431fAA7F43_RetVAeA0G3_S0V_AA0G3_S1VtXE_tF")]
+    private static extern F43_Ret SwiftCallbackFunc43(delegate* unmanaged[Swift]<F43_S0, F43_S1, SwiftSelf, F43_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F43_Ret SwiftCallbackFunc43Callback(F43_S0 a0, F43_S1 a1, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((int)406102630, a0.F0);
+            Assert.Equal((int)1946236062, a0.F1);
+            Assert.Equal((nint)unchecked((nint)663606396354980308), a0.F2);
+            Assert.Equal((sbyte)-8, a1.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F43_Ret(18672);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc43()
+    {
+        Console.Write("Running SwiftCallbackFunc43: ");
+        ExceptionDispatchInfo ex = null;
+        F43_Ret val = SwiftCallbackFunc43(&SwiftCallbackFunc43Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ushort)18672, val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F44_S0
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F44_S1_S0
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F44_S1_S1
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F44_S1
+    {
+        public short F0;
+        public short F1;
+        public F44_S1_S0 F2;
+        public F44_S1_S1 F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F44_S2
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F44_S3
+    {
+        public sbyte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F44_Ret_S0
+    {
+        public nuint F0;
+
+        public F44_Ret_S0(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F44_Ret
+    {
+        public nint F0;
+        public F44_Ret_S0 F1;
+        public double F2;
+
+        public F44_Ret(nint f0, F44_Ret_S0 f1, double f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func441fAA7F44_RetVAESd_AA0G3_S0VAA0G3_S1VAA0G3_S2VAA0G3_S3VtXE_tF")]
+    private static extern F44_Ret SwiftCallbackFunc44(delegate* unmanaged[Swift]<double, F44_S0, F44_S1, F44_S2, F44_S3, SwiftSelf, F44_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F44_Ret SwiftCallbackFunc44Callback(double a0, F44_S0 a1, F44_S1 a2, F44_S2 a3, F44_S3 a4, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((double)4281406007431544, a0);
+            Assert.Equal((uint)2097291497, a1.F0);
+            Assert.Equal((short)-10489, a2.F0);
+            Assert.Equal((short)-9573, a2.F1);
+            Assert.Equal((ushort)62959, a2.F2.F0);
+            Assert.Equal((nuint)unchecked((nuint)7144119809173057975), a2.F3.F0);
+            Assert.Equal((nuint)unchecked((nuint)168733393207234277), a3.F0);
+            Assert.Equal((sbyte)64, a4.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F44_Ret(unchecked((nint)7157474620613398513), new F44_Ret_S0(unchecked((nuint)8272092288451488897)), 8724612718809);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc44()
+    {
+        Console.Write("Running SwiftCallbackFunc44: ");
+        ExceptionDispatchInfo ex = null;
+        F44_Ret val = SwiftCallbackFunc44(&SwiftCallbackFunc44Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)7157474620613398513), val.F0);
+        Assert.Equal((nuint)unchecked((nuint)8272092288451488897), val.F1.F0);
+        Assert.Equal((double)8724612718809, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F45_S0
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F45_S1
+    {
+        public nuint F0;
+        public short F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F45_Ret_S0
+    {
+        public float F0;
+
+        public F45_Ret_S0(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 48)]
+    struct F45_Ret
+    {
+        public double F0;
+        public F45_Ret_S0 F1;
+        public long F2;
+        public double F3;
+        public ulong F4;
+        public sbyte F5;
+        public int F6;
+
+        public F45_Ret(double f0, F45_Ret_S0 f1, long f2, double f3, ulong f4, sbyte f5, int f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func451fAA7F45_RetVAeA0G3_S0V_AA0G3_S1Vs5UInt8VtXE_tF")]
+    private static extern F45_Ret SwiftCallbackFunc45(delegate* unmanaged[Swift]<F45_S0, F45_S1, byte, SwiftSelf, F45_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F45_Ret SwiftCallbackFunc45Callback(F45_S0 a0, F45_S1 a1, byte a2, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nuint)unchecked((nuint)5311803360204128233), a0.F0);
+            Assert.Equal((nuint)unchecked((nuint)2204790044275015546), a1.F0);
+            Assert.Equal((short)8942, a1.F1);
+            Assert.Equal((byte)207, a2);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F45_Ret(262658215125446, new F45_Ret_S0(3145713), 4924669542959578265, 2052183120467519, 3135406744871464298, 81, 1000720476);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc45()
+    {
+        Console.Write("Running SwiftCallbackFunc45: ");
+        ExceptionDispatchInfo ex = null;
+        F45_Ret val = SwiftCallbackFunc45(&SwiftCallbackFunc45Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((double)262658215125446, val.F0);
+        Assert.Equal((float)3145713, val.F1.F0);
+        Assert.Equal((long)4924669542959578265, val.F2);
+        Assert.Equal((double)2052183120467519, val.F3);
+        Assert.Equal((ulong)3135406744871464298, val.F4);
+        Assert.Equal((sbyte)81, val.F5);
+        Assert.Equal((int)1000720476, val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 26)]
+    struct F46_Ret
+    {
+        public nuint F0;
+        public double F1;
+        public long F2;
+        public ushort F3;
+
+        public F46_Ret(nuint f0, double f1, long f2, ushort f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func461fAA7F46_RetVAESi_Sus6UInt16VAGs5Int64VtXE_tF")]
+    private static extern F46_Ret SwiftCallbackFunc46(delegate* unmanaged[Swift]<nint, nuint, ushort, ushort, long, SwiftSelf, F46_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F46_Ret SwiftCallbackFunc46Callback(nint a0, nuint a1, ushort a2, ushort a3, long a4, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nint)unchecked((nint)1855296013283572041), a0);
+            Assert.Equal((nuint)unchecked((nuint)1145047910516899437), a1);
+            Assert.Equal((ushort)20461, a2);
+            Assert.Equal((ushort)58204, a3);
+            Assert.Equal((long)1923767011143317115, a4);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F46_Ret(unchecked((nuint)4268855101008870857), 2061088094528291, 541679466428431692, 30655);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc46()
+    {
+        Console.Write("Running SwiftCallbackFunc46: ");
+        ExceptionDispatchInfo ex = null;
+        F46_Ret val = SwiftCallbackFunc46(&SwiftCallbackFunc46Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nuint)unchecked((nuint)4268855101008870857), val.F0);
+        Assert.Equal((double)2061088094528291, val.F1);
+        Assert.Equal((long)541679466428431692, val.F2);
+        Assert.Equal((ushort)30655, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F47_S0
+    {
+        public byte F0;
+        public int F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 13)]
+    struct F47_S1
+    {
+        public nint F0;
+        public uint F1;
+        public sbyte F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F47_S2_S0
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 17)]
+    struct F47_S2
+    {
+        public sbyte F0;
+        public float F1;
+        public int F2;
+        public float F3;
+        public F47_S2_S0 F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F47_S3
+    {
+        public ulong F0;
+        public long F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F47_S4
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F47_Ret
+    {
+        public short F0;
+        public short F1;
+        public long F2;
+
+        public F47_Ret(short f0, short f1, long f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func471fAA7F47_RetVAESi_Sfs6UInt32VAA0G3_S0VAA0G3_S1Vs6UInt16VSfS2iS2us5Int16VAA0G3_S2VAA0G3_S3VAA0G3_S4VtXE_tF")]
+    private static extern F47_Ret SwiftCallbackFunc47(delegate* unmanaged[Swift]<nint, float, uint, F47_S0, F47_S1, ushort, float, nint, nint, nuint, nuint, short, F47_S2, F47_S3, F47_S4, SwiftSelf, F47_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F47_Ret SwiftCallbackFunc47Callback(nint a0, float a1, uint a2, F47_S0 a3, F47_S1 a4, ushort a5, float a6, nint a7, nint a8, nuint a9, nuint a10, short a11, F47_S2 a12, F47_S3 a13, F47_S4 a14, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nint)unchecked((nint)6545360066379352091), a0);
+            Assert.Equal((float)1240616, a1);
+            Assert.Equal((uint)575670382, a2);
+            Assert.Equal((byte)27, a3.F0);
+            Assert.Equal((int)1769677101, a3.F1);
+            Assert.Equal((nint)unchecked((nint)4175209822525678639), a4.F0);
+            Assert.Equal((uint)483151627, a4.F1);
+            Assert.Equal((sbyte)-41, a4.F2);
+            Assert.Equal((ushort)20891, a5);
+            Assert.Equal((float)1011044, a6);
+            Assert.Equal((nint)unchecked((nint)8543308148327168378), a7);
+            Assert.Equal((nint)unchecked((nint)9126721646663585297), a8);
+            Assert.Equal((nuint)unchecked((nuint)5438914191614359864), a9);
+            Assert.Equal((nuint)unchecked((nuint)5284613245897089025), a10);
+            Assert.Equal((short)-9227, a11);
+            Assert.Equal((sbyte)-23, a12.F0);
+            Assert.Equal((float)1294109, a12.F1);
+            Assert.Equal((int)411726757, a12.F2);
+            Assert.Equal((float)6621598, a12.F3);
+            Assert.Equal((byte)249, a12.F4.F0);
+            Assert.Equal((ulong)5281612261430853979, a13.F0);
+            Assert.Equal((long)7161295082465816089, a13.F1);
+            Assert.Equal((ulong)1995556861952451598, a14.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F47_Ret(32110, 21949, 479980404077668674);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc47()
+    {
+        Console.Write("Running SwiftCallbackFunc47: ");
+        ExceptionDispatchInfo ex = null;
+        F47_Ret val = SwiftCallbackFunc47(&SwiftCallbackFunc47Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((short)32110, val.F0);
+        Assert.Equal((short)21949, val.F1);
+        Assert.Equal((long)479980404077668674, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F48_S0
+    {
+        public ulong F0;
+        public short F1;
+        public ulong F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F48_S1_S0
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F48_S1
+    {
+        public double F0;
+        public int F1;
+        public int F2;
+        public F48_S1_S0 F3;
+        public nuint F4;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func481fs5Int64VAEs4Int8V_s5Int16VAIs6UInt32VAA6F48_S0VAkA0K3_S1Vs5Int32VAQs6UInt16VAeKtXE_tF")]
+    private static extern long SwiftCallbackFunc48(delegate* unmanaged[Swift]<sbyte, short, short, uint, F48_S0, uint, F48_S1, int, int, ushort, long, uint, SwiftSelf, long> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static long SwiftCallbackFunc48Callback(sbyte a0, short a1, short a2, uint a3, F48_S0 a4, uint a5, F48_S1 a6, int a7, int a8, ushort a9, long a10, uint a11, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)-34, a0);
+            Assert.Equal((short)11634, a1);
+            Assert.Equal((short)-27237, a2);
+            Assert.Equal((uint)1039294154, a3);
+            Assert.Equal((ulong)1367847206719062131, a4.F0);
+            Assert.Equal((short)22330, a4.F1);
+            Assert.Equal((ulong)689282484471011648, a4.F2);
+            Assert.Equal((uint)1572626904, a5);
+            Assert.Equal((double)3054128759424009, a6.F0);
+            Assert.Equal((int)1677338134, a6.F1);
+            Assert.Equal((int)1257237843, a6.F2);
+            Assert.Equal((float)6264494, a6.F3.F0);
+            Assert.Equal((nuint)unchecked((nuint)8397097040610783205), a6.F4);
+            Assert.Equal((int)1060447208, a7);
+            Assert.Equal((int)269785114, a8);
+            Assert.Equal((ushort)20635, a9);
+            Assert.Equal((long)7679010342730986048, a10);
+            Assert.Equal((uint)1362633148, a11);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 1864372483209206459;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc48()
+    {
+        Console.Write("Running SwiftCallbackFunc48: ");
+        ExceptionDispatchInfo ex = null;
+        long val = SwiftCallbackFunc48(&SwiftCallbackFunc48Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((long)1864372483209206459, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F49_S0_S0
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F49_S0
+    {
+        public F49_S0_S0 F0;
+        public ulong F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F49_Ret
+    {
+        public int F0;
+        public short F1;
+        public byte F2;
+        public byte F3;
+        public sbyte F4;
+        public long F5;
+
+        public F49_Ret(int f0, short f1, byte f2, byte f3, sbyte f4, long f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func491fAA7F49_RetVAeA0G3_S0V_s5Int64VtXE_tF")]
+    private static extern F49_Ret SwiftCallbackFunc49(delegate* unmanaged[Swift]<F49_S0, long, SwiftSelf, F49_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F49_Ret SwiftCallbackFunc49Callback(F49_S0 a0, long a1, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((byte)48, a0.F0.F0);
+            Assert.Equal((ulong)7563394992711018452, a0.F1);
+            Assert.Equal((long)4358370311341042916, a1);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F49_Ret(1638493854, -13624, 61, 236, -97, 3942201385605817844);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc49()
+    {
+        Console.Write("Running SwiftCallbackFunc49: ");
+        ExceptionDispatchInfo ex = null;
+        F49_Ret val = SwiftCallbackFunc49(&SwiftCallbackFunc49Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((int)1638493854, val.F0);
+        Assert.Equal((short)-13624, val.F1);
+        Assert.Equal((byte)61, val.F2);
+        Assert.Equal((byte)236, val.F3);
+        Assert.Equal((sbyte)-97, val.F4);
+        Assert.Equal((long)3942201385605817844, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F50_S0_S0
+    {
+        public double F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F50_S0
+    {
+        public ushort F0;
+        public F50_S0_S0 F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F50_S1
+    {
+        public double F0;
+        public ushort F1;
+        public int F2;
+        public nint F3;
+        public double F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F50_S2
+    {
+        public int F0;
+        public float F1;
+        public uint F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 17)]
+    struct F50_S3
+    {
+        public long F0;
+        public int F1;
+        public float F2;
+        public sbyte F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F50_S4
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F50_S5_S0
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F50_S5
+    {
+        public F50_S5_S0 F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func501fs5UInt8VAeA6F50_S0V_AA0H3_S1VAeA0H3_S2Vs5Int32Vs6UInt64Vs4Int8VAQSfAA0H3_S3VAA0H3_S4VAA0H3_S5VSftXE_tF")]
+    private static extern byte SwiftCallbackFunc50(delegate* unmanaged[Swift]<F50_S0, F50_S1, byte, F50_S2, int, ulong, sbyte, sbyte, float, F50_S3, F50_S4, F50_S5, float, SwiftSelf, byte> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static byte SwiftCallbackFunc50Callback(F50_S0 a0, F50_S1 a1, byte a2, F50_S2 a3, int a4, ulong a5, sbyte a6, sbyte a7, float a8, F50_S3 a9, F50_S4 a10, F50_S5 a11, float a12, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ushort)31857, a0.F0);
+            Assert.Equal((double)1743417849706254, a0.F1.F0);
+            Assert.Equal((double)4104577461772135, a1.F0);
+            Assert.Equal((ushort)13270, a1.F1);
+            Assert.Equal((int)2072598986, a1.F2);
+            Assert.Equal((nint)unchecked((nint)9056978834867675248), a1.F3);
+            Assert.Equal((double)844742439929087, a1.F4);
+            Assert.Equal((byte)87, a2);
+            Assert.Equal((int)1420884537, a3.F0);
+            Assert.Equal((float)78807, a3.F1);
+            Assert.Equal((uint)1081688273, a3.F2);
+            Assert.Equal((int)336878110, a4);
+            Assert.Equal((ulong)1146514566942283069, a5);
+            Assert.Equal((sbyte)-93, a6);
+            Assert.Equal((sbyte)73, a7);
+            Assert.Equal((float)2321639, a8);
+            Assert.Equal((long)1940888991336881606, a9.F0);
+            Assert.Equal((int)688345394, a9.F1);
+            Assert.Equal((float)712275, a9.F2);
+            Assert.Equal((sbyte)-128, a9.F3);
+            Assert.Equal((long)2638503583829414770, a10.F0);
+            Assert.Equal((ushort)23681, a11.F0.F0);
+            Assert.Equal((float)8223218, a12);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 252;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc50()
+    {
+        Console.Write("Running SwiftCallbackFunc50: ");
+        ExceptionDispatchInfo ex = null;
+        byte val = SwiftCallbackFunc50(&SwiftCallbackFunc50Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((byte)252, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F51_S0
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F51_Ret
+    {
+        public ushort F0;
+        public sbyte F1;
+        public nint F2;
+        public ushort F3;
+        public ulong F4;
+
+        public F51_Ret(ushort f0, sbyte f1, nint f2, ushort f3, ulong f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func511fAA7F51_RetVAEs5Int16V_SuAA0G3_S0Vs6UInt64VtXE_tF")]
+    private static extern F51_Ret SwiftCallbackFunc51(delegate* unmanaged[Swift]<short, nuint, F51_S0, ulong, SwiftSelf, F51_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F51_Ret SwiftCallbackFunc51Callback(short a0, nuint a1, F51_S0 a2, ulong a3, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((short)10812, a0);
+            Assert.Equal((nuint)unchecked((nuint)470861239714315155), a1);
+            Assert.Equal((long)5415660333180374788, a2.F0);
+            Assert.Equal((ulong)2389942629143476149, a3);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F51_Ret(28396, 23, unchecked((nint)4042678034578400305), 16166, 8390419605778076733);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc51()
+    {
+        Console.Write("Running SwiftCallbackFunc51: ");
+        ExceptionDispatchInfo ex = null;
+        F51_Ret val = SwiftCallbackFunc51(&SwiftCallbackFunc51Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ushort)28396, val.F0);
+        Assert.Equal((sbyte)23, val.F1);
+        Assert.Equal((nint)unchecked((nint)4042678034578400305), val.F2);
+        Assert.Equal((ushort)16166, val.F3);
+        Assert.Equal((ulong)8390419605778076733, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F52_S0
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F52_S1
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 33)]
+    struct F52_Ret
+    {
+        public float F0;
+        public ushort F1;
+        public long F2;
+        public short F3;
+        public ulong F4;
+        public sbyte F5;
+
+        public F52_Ret(float f0, ushort f1, long f2, short f3, ulong f4, sbyte f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func521fAA7F52_RetVAESi_AA0G3_S0Vs5Int16VAiA0G3_S1VtXE_tF")]
+    private static extern F52_Ret SwiftCallbackFunc52(delegate* unmanaged[Swift]<nint, F52_S0, short, short, F52_S1, SwiftSelf, F52_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F52_Ret SwiftCallbackFunc52Callback(nint a0, F52_S0 a1, short a2, short a3, F52_S1 a4, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nint)unchecked((nint)3233654765973602550), a0);
+            Assert.Equal((float)5997729, a1.F0);
+            Assert.Equal((short)-7404, a2);
+            Assert.Equal((short)-20804, a3);
+            Assert.Equal((ushort)17231, a4.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F52_Ret(3003005, 4886, 1846269873983567093, 24151, 1408198981123859746, -41);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc52()
+    {
+        Console.Write("Running SwiftCallbackFunc52: ");
+        ExceptionDispatchInfo ex = null;
+        F52_Ret val = SwiftCallbackFunc52(&SwiftCallbackFunc52Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((float)3003005, val.F0);
+        Assert.Equal((ushort)4886, val.F1);
+        Assert.Equal((long)1846269873983567093, val.F2);
+        Assert.Equal((short)24151, val.F3);
+        Assert.Equal((ulong)1408198981123859746, val.F4);
+        Assert.Equal((sbyte)-41, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F53_S0_S0_S0
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F53_S0_S0
+    {
+        public F53_S0_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct F53_S0
+    {
+        public sbyte F0;
+        public F53_S0_S0 F1;
+        public byte F2;
+        public nuint F3;
+        public long F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 5)]
+    struct F53_S1
+    {
+        public float F0;
+        public byte F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F53_S2
+    {
+        public sbyte F0;
+        public long F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F53_S3_S0
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F53_S3
+    {
+        public int F0;
+        public uint F1;
+        public F53_S3_S0 F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F53_S4
+    {
+        public short F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F53_S5_S0
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F53_S5_S1_S0
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F53_S5_S1
+    {
+        public F53_S5_S1_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F53_S5
+    {
+        public F53_S5_S0 F0;
+        public nuint F1;
+        public ushort F2;
+        public F53_S5_S1 F3;
+        public sbyte F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F53_S6
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F53_Ret
+    {
+        public nint F0;
+
+        public F53_Ret(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func531fAA7F53_RetVAeA0G3_S0V_s5UInt8Vs5Int64VAA0G3_S1VAA0G3_S2VAA0G3_S3VAkA0G3_S4VAA0G3_S5VAA0G3_S6VtXE_tF")]
+    private static extern F53_Ret SwiftCallbackFunc53(delegate* unmanaged[Swift]<F53_S0, byte, long, F53_S1, F53_S2, F53_S3, long, F53_S4, F53_S5, F53_S6, SwiftSelf, F53_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F53_Ret SwiftCallbackFunc53Callback(F53_S0 a0, byte a1, long a2, F53_S1 a3, F53_S2 a4, F53_S3 a5, long a6, F53_S4 a7, F53_S5 a8, F53_S6 a9, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)-123, a0.F0);
+            Assert.Equal((long)3494916243607193741, a0.F1.F0.F0);
+            Assert.Equal((byte)167, a0.F2);
+            Assert.Equal((nuint)unchecked((nuint)4018943158751734338), a0.F3);
+            Assert.Equal((long)6768175524813742847, a0.F4);
+            Assert.Equal((byte)207, a1);
+            Assert.Equal((long)8667995458064724392, a2);
+            Assert.Equal((float)492157, a3.F0);
+            Assert.Equal((byte)175, a3.F1);
+            Assert.Equal((sbyte)76, a4.F0);
+            Assert.Equal((long)5794486968525461488, a4.F1);
+            Assert.Equal((int)2146070335, a5.F0);
+            Assert.Equal((uint)1109141712, a5.F1);
+            Assert.Equal((ushort)44270, a5.F2.F0);
+            Assert.Equal((long)3581380181786253859, a6);
+            Assert.Equal((short)23565, a7.F0);
+            Assert.Equal((uint)1995174927, a8.F0.F0);
+            Assert.Equal((nuint)unchecked((nuint)5025417700244056666), a8.F1);
+            Assert.Equal((ushort)1847, a8.F2);
+            Assert.Equal((byte)6, a8.F3.F0.F0);
+            Assert.Equal((sbyte)-87, a8.F4);
+            Assert.Equal((nint)unchecked((nint)5737280129078653969), a9.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F53_Ret(unchecked((nint)3955567540648861371));
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc53()
+    {
+        Console.Write("Running SwiftCallbackFunc53: ");
+        ExceptionDispatchInfo ex = null;
+        F53_Ret val = SwiftCallbackFunc53(&SwiftCallbackFunc53Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)3955567540648861371), val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 17)]
+    struct F54_S0
+    {
+        public int F0;
+        public float F1;
+        public nuint F2;
+        public byte F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F54_S1
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F54_S2_S0_S0
+    {
+        public double F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F54_S2_S0
+    {
+        public short F0;
+        public F54_S2_S0_S0 F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct F54_S2
+    {
+        public double F0;
+        public F54_S2_S0 F1;
+        public long F2;
+        public ulong F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F54_S3
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 3)]
+    struct F54_S4
+    {
+        public ushort F0;
+        public sbyte F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F54_S5
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F54_Ret
+    {
+        public short F0;
+        public nint F1;
+
+        public F54_Ret(short f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func541fAA7F54_RetVAEs6UInt16V_AA0G3_S0VSfAA0G3_S1Vs5Int64Vs5Int32VAA0G3_S2VAA0G3_S3VAA0G3_S4VSfAA0G3_S5VtXE_tF")]
+    private static extern F54_Ret SwiftCallbackFunc54(delegate* unmanaged[Swift]<ushort, F54_S0, float, F54_S1, long, int, F54_S2, F54_S3, F54_S4, float, F54_S5, SwiftSelf, F54_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F54_Ret SwiftCallbackFunc54Callback(ushort a0, F54_S0 a1, float a2, F54_S1 a3, long a4, int a5, F54_S2 a6, F54_S3 a7, F54_S4 a8, float a9, F54_S5 a10, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ushort)16440, a0);
+            Assert.Equal((int)922752112, a1.F0);
+            Assert.Equal((float)7843043, a1.F1);
+            Assert.Equal((nuint)unchecked((nuint)1521939500434086364), a1.F2);
+            Assert.Equal((byte)50, a1.F3);
+            Assert.Equal((float)3111108, a2);
+            Assert.Equal((ushort)50535, a3.F0);
+            Assert.Equal((long)4761507229870258916, a4);
+            Assert.Equal((int)1670668155, a5);
+            Assert.Equal((double)432665443852892, a6.F0);
+            Assert.Equal((short)13094, a6.F1.F0);
+            Assert.Equal((double)669143993481144, a6.F1.F1.F0);
+            Assert.Equal((long)30067117315069590, a6.F2);
+            Assert.Equal((ulong)874012622621600805, a6.F3);
+            Assert.Equal((float)7995066, a7.F0);
+            Assert.Equal((ushort)48478, a8.F0);
+            Assert.Equal((sbyte)23, a8.F1);
+            Assert.Equal((float)4383787, a9);
+            Assert.Equal((ushort)61633, a10.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F54_Ret(924, unchecked((nint)7680560643733996038));
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc54()
+    {
+        Console.Write("Running SwiftCallbackFunc54: ");
+        ExceptionDispatchInfo ex = null;
+        F54_Ret val = SwiftCallbackFunc54(&SwiftCallbackFunc54Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((short)924, val.F0);
+        Assert.Equal((nint)unchecked((nint)7680560643733996038), val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F55_S0_S0
+    {
+        public double F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 17)]
+    struct F55_S0
+    {
+        public nuint F0;
+        public F55_S0_S0 F1;
+        public sbyte F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F55_S1
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F55_S2
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F55_Ret_S0
+    {
+        public short F0;
+        public int F1;
+
+        public F55_Ret_S0(short f0, int f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct F55_Ret
+    {
+        public nuint F0;
+        public nint F1;
+        public double F2;
+        public F55_Ret_S0 F3;
+        public ulong F4;
+
+        public F55_Ret(nuint f0, nint f1, double f2, F55_Ret_S0 f3, ulong f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func551fAA7F55_RetVAeA0G3_S0V_s5Int64VAA0G3_S1Vs4Int8VAA0G3_S2VSftXE_tF")]
+    private static extern F55_Ret SwiftCallbackFunc55(delegate* unmanaged[Swift]<F55_S0, long, F55_S1, sbyte, F55_S2, float, SwiftSelf, F55_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F55_Ret SwiftCallbackFunc55Callback(F55_S0 a0, long a1, F55_S1 a2, sbyte a3, F55_S2 a4, float a5, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nuint)unchecked((nuint)2856661562863799725), a0.F0);
+            Assert.Equal((double)1260582440479139, a0.F1.F0);
+            Assert.Equal((sbyte)5, a0.F2);
+            Assert.Equal((long)7945068527720423751, a1);
+            Assert.Equal((nint)unchecked((nint)4321616441998677375), a2.F0);
+            Assert.Equal((sbyte)-68, a3);
+            Assert.Equal((ulong)3311106172201778367, a4.F0);
+            Assert.Equal((float)5600069, a5);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F55_Ret(unchecked((nuint)6916953478574785342), unchecked((nint)6448649235859031640), 1920468532326411, new F55_Ret_S0(30394, 40356024), 6146457824330132360);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc55()
+    {
+        Console.Write("Running SwiftCallbackFunc55: ");
+        ExceptionDispatchInfo ex = null;
+        F55_Ret val = SwiftCallbackFunc55(&SwiftCallbackFunc55Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nuint)unchecked((nuint)6916953478574785342), val.F0);
+        Assert.Equal((nint)unchecked((nint)6448649235859031640), val.F1);
+        Assert.Equal((double)1920468532326411, val.F2);
+        Assert.Equal((short)30394, val.F3.F0);
+        Assert.Equal((int)40356024, val.F3.F1);
+        Assert.Equal((ulong)6146457824330132360, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F56_S0
+    {
+        public double F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func561fs6UInt32VAeA6F56_S0VXE_tF")]
+    private static extern uint SwiftCallbackFunc56(delegate* unmanaged[Swift]<F56_S0, SwiftSelf, uint> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static uint SwiftCallbackFunc56Callback(F56_S0 a0, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((double)3082602006731666, a0.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 1601166926;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc56()
+    {
+        Console.Write("Running SwiftCallbackFunc56: ");
+        ExceptionDispatchInfo ex = null;
+        uint val = SwiftCallbackFunc56(&SwiftCallbackFunc56Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((uint)1601166926, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F57_S0
+    {
+        public long F0;
+        public int F1;
+        public ulong F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F57_S1
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F57_S2
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F57_Ret_S0
+    {
+        public long F0;
+        public byte F1;
+        public short F2;
+
+        public F57_Ret_S0(long f0, byte f1, short f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 13)]
+    struct F57_Ret
+    {
+        public F57_Ret_S0 F0;
+        public byte F1;
+
+        public F57_Ret(F57_Ret_S0 f0, byte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func571fAA7F57_RetVAEs4Int8V_Sus6UInt32Vs5Int64Vs6UInt64Vs5Int16VAkA0G3_S0VAA0G3_S1VAA0G3_S2VtXE_tF")]
+    private static extern F57_Ret SwiftCallbackFunc57(delegate* unmanaged[Swift]<sbyte, nuint, uint, long, ulong, short, long, F57_S0, F57_S1, F57_S2, SwiftSelf, F57_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F57_Ret SwiftCallbackFunc57Callback(sbyte a0, nuint a1, uint a2, long a3, ulong a4, short a5, long a6, F57_S0 a7, F57_S1 a8, F57_S2 a9, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)54, a0);
+            Assert.Equal((nuint)unchecked((nuint)753245150862584974), a1);
+            Assert.Equal((uint)1470962934, a2);
+            Assert.Equal((long)1269392070140776313, a3);
+            Assert.Equal((ulong)2296560034524654667, a4);
+            Assert.Equal((short)12381, a5);
+            Assert.Equal((long)198893062684618980, a6);
+            Assert.Equal((long)1310571041794038100, a7.F0);
+            Assert.Equal((int)18741662, a7.F1);
+            Assert.Equal((ulong)7855196891704523814, a7.F2);
+            Assert.Equal((byte)156, a8.F0);
+            Assert.Equal((float)72045, a9.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F57_Ret(new F57_Ret_S0(3441370978522907304, 105, 24446), 200);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc57()
+    {
+        Console.Write("Running SwiftCallbackFunc57: ");
+        ExceptionDispatchInfo ex = null;
+        F57_Ret val = SwiftCallbackFunc57(&SwiftCallbackFunc57Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((long)3441370978522907304, val.F0.F0);
+        Assert.Equal((byte)105, val.F0.F1);
+        Assert.Equal((short)24446, val.F0.F2);
+        Assert.Equal((byte)200, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F58_S0
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 6)]
+    struct F58_S1
+    {
+        public float F0;
+        public ushort F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F58_S2_S0_S0
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F58_S2_S0
+    {
+        public F58_S2_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F58_S2
+    {
+        public F58_S2_S0 F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func581fS2is6UInt64V_s4Int8VSiAA6F58_S0VAA0I3_S1Vs5Int64VAA0I3_S2Vs5Int32VtXE_tF")]
+    private static extern nint SwiftCallbackFunc58(delegate* unmanaged[Swift]<ulong, sbyte, nint, F58_S0, F58_S1, long, F58_S2, int, SwiftSelf, nint> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static nint SwiftCallbackFunc58Callback(ulong a0, sbyte a1, nint a2, F58_S0 a3, F58_S1 a4, long a5, F58_S2 a6, int a7, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ulong)4612004722568513699, a0);
+            Assert.Equal((sbyte)-96, a1);
+            Assert.Equal((nint)unchecked((nint)1970590839325113617), a2);
+            Assert.Equal((byte)211, a3.F0);
+            Assert.Equal((float)5454927, a4.F0);
+            Assert.Equal((ushort)48737, a4.F1);
+            Assert.Equal((long)921570327236881486, a5);
+            Assert.Equal((nint)unchecked((nint)7726203059421444802), a6.F0.F0.F0);
+            Assert.Equal((int)491616915, a7);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return unchecked((nint)5337995302960578101);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc58()
+    {
+        Console.Write("Running SwiftCallbackFunc58: ");
+        ExceptionDispatchInfo ex = null;
+        nint val = SwiftCallbackFunc58(&SwiftCallbackFunc58Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)5337995302960578101), val);
+        Console.WriteLine("OK");
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func591fs6UInt64VAEs6UInt16V_s5Int64VSitXE_tF")]
+    private static extern ulong SwiftCallbackFunc59(delegate* unmanaged[Swift]<ushort, long, nint, SwiftSelf, ulong> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static ulong SwiftCallbackFunc59Callback(ushort a0, long a1, nint a2, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ushort)9232, a0);
+            Assert.Equal((long)7281011081566942937, a1);
+            Assert.Equal((nint)unchecked((nint)8203439771560005792), a2);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 7843473552989551261;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc59()
+    {
+        Console.Write("Running SwiftCallbackFunc59: ");
+        ExceptionDispatchInfo ex = null;
+        ulong val = SwiftCallbackFunc59(&SwiftCallbackFunc59Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ulong)7843473552989551261, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F60_S0
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F60_S1
+    {
+        public ulong F0;
+        public int F1;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func601fs6UInt64VAESf_Sds5Int64Vs6UInt16VS2fAA6F60_S0Vs5Int16VAA0J3_S1VAmGtXE_tF")]
+    private static extern ulong SwiftCallbackFunc60(delegate* unmanaged[Swift]<float, double, long, ushort, float, float, F60_S0, short, F60_S1, short, long, SwiftSelf, ulong> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static ulong SwiftCallbackFunc60Callback(float a0, double a1, long a2, ushort a3, float a4, float a5, F60_S0 a6, short a7, F60_S1 a8, short a9, long a10, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((float)2682255, a0);
+            Assert.Equal((double)2041676057169359, a1);
+            Assert.Equal((long)5212916666940122160, a2);
+            Assert.Equal((ushort)64444, a3);
+            Assert.Equal((float)6372882, a4);
+            Assert.Equal((float)8028835, a5);
+            Assert.Equal((nint)unchecked((nint)6629286640024570381), a6.F0);
+            Assert.Equal((short)1520, a7);
+            Assert.Equal((ulong)8398497739914283366, a8.F0);
+            Assert.Equal((int)1882981891, a8.F1);
+            Assert.Equal((short)7716, a9);
+            Assert.Equal((long)6631047215535600409, a10);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 1713850918199577358;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc60()
+    {
+        Console.Write("Running SwiftCallbackFunc60: ");
+        ExceptionDispatchInfo ex = null;
+        ulong val = SwiftCallbackFunc60(&SwiftCallbackFunc60Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ulong)1713850918199577358, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F61_S0_S0
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F61_S0
+    {
+        public F61_S0_S0 F0;
+        public long F1;
+        public uint F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F61_S1
+    {
+        public sbyte F0;
+        public float F1;
+        public nint F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F61_S2_S0_S0
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F61_S2_S0
+    {
+        public F61_S2_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F61_S2_S1
+    {
+        public sbyte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct F61_S2
+    {
+        public F61_S2_S0 F0;
+        public F61_S2_S1 F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F61_S3
+    {
+        public ulong F0;
+        public nint F1;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func611fs6UInt32VA2E_AeA6F61_S0VAA0H3_S1VAA0H3_S2Vs4Int8Vs5Int16VAA0H3_S3Vs5Int32VAEtXE_tF")]
+    private static extern uint SwiftCallbackFunc61(delegate* unmanaged[Swift]<uint, uint, F61_S0, F61_S1, F61_S2, sbyte, short, F61_S3, int, uint, SwiftSelf, uint> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static uint SwiftCallbackFunc61Callback(uint a0, uint a1, F61_S0 a2, F61_S1 a3, F61_S2 a4, sbyte a5, short a6, F61_S3 a7, int a8, uint a9, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((uint)1070797065, a0);
+            Assert.Equal((uint)135220309, a1);
+            Assert.Equal((long)6475887024664217162, a2.F0.F0);
+            Assert.Equal((long)563444654083452485, a2.F1);
+            Assert.Equal((uint)1748956360, a2.F2);
+            Assert.Equal((sbyte)-112, a3.F0);
+            Assert.Equal((float)3433396, a3.F1);
+            Assert.Equal((nint)unchecked((nint)8106074956722850624), a3.F2);
+            Assert.Equal((ulong)2318628619979263858, a4.F0.F0.F0);
+            Assert.Equal((sbyte)-93, a4.F1.F0);
+            Assert.Equal((sbyte)-122, a5);
+            Assert.Equal((short)-11696, a6);
+            Assert.Equal((ulong)5229393236090246212, a7.F0);
+            Assert.Equal((nint)unchecked((nint)4021449757638811198), a7.F1);
+            Assert.Equal((int)689517945, a8);
+            Assert.Equal((uint)657677740, a9);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 138627237;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc61()
+    {
+        Console.Write("Running SwiftCallbackFunc61: ");
+        ExceptionDispatchInfo ex = null;
+        uint val = SwiftCallbackFunc61(&SwiftCallbackFunc61Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((uint)138627237, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F62_S0
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F62_Ret
+    {
+        public ushort F0;
+        public long F1;
+        public nint F2;
+        public long F3;
+
+        public F62_Ret(ushort f0, long f1, nint f2, long f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func621fAA7F62_RetVAeA0G3_S0VXE_tF")]
+    private static extern F62_Ret SwiftCallbackFunc62(delegate* unmanaged[Swift]<F62_S0, SwiftSelf, F62_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F62_Ret SwiftCallbackFunc62Callback(F62_S0 a0, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((float)6500993, a0.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F62_Ret(63013, 4076138842444340990, unchecked((nint)6876195265868121021), 223819901796794423);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc62()
+    {
+        Console.Write("Running SwiftCallbackFunc62: ");
+        ExceptionDispatchInfo ex = null;
+        F62_Ret val = SwiftCallbackFunc62(&SwiftCallbackFunc62Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ushort)63013, val.F0);
+        Assert.Equal((long)4076138842444340990, val.F1);
+        Assert.Equal((nint)unchecked((nint)6876195265868121021), val.F2);
+        Assert.Equal((long)223819901796794423, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F63_S0
+    {
+        public nint F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func631fS2fAA6F63_S0V_s5Int16VtXE_tF")]
+    private static extern float SwiftCallbackFunc63(delegate* unmanaged[Swift]<F63_S0, short, SwiftSelf, float> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static float SwiftCallbackFunc63Callback(F63_S0 a0, short a1, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nint)unchecked((nint)8391317504019075904), a0.F0);
+            Assert.Equal((short)11218, a1);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 1458978;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc63()
+    {
+        Console.Write("Running SwiftCallbackFunc63: ");
+        ExceptionDispatchInfo ex = null;
+        float val = SwiftCallbackFunc63(&SwiftCallbackFunc63Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((float)1458978, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F64_S0
+    {
+        public int F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F64_S1
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F64_S2
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F64_Ret_S0
+    {
+        public ushort F0;
+        public nuint F1;
+        public ulong F2;
+
+        public F64_Ret_S0(ushort f0, nuint f1, ulong f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct F64_Ret
+    {
+        public nuint F0;
+        public F64_Ret_S0 F1;
+        public double F2;
+
+        public F64_Ret(nuint f0, F64_Ret_S0 f1, double f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func641fAA7F64_RetVAEs4Int8V_AA0G3_S0VAA0G3_S1VSuAA0G3_S2VtXE_tF")]
+    private static extern F64_Ret SwiftCallbackFunc64(delegate* unmanaged[Swift]<sbyte, F64_S0, F64_S1, nuint, F64_S2, SwiftSelf, F64_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F64_Ret SwiftCallbackFunc64Callback(sbyte a0, F64_S0 a1, F64_S1 a2, nuint a3, F64_S2 a4, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)-22, a0);
+            Assert.Equal((int)1591678205, a1.F0);
+            Assert.Equal((ulong)8355549563000003325, a2.F0);
+            Assert.Equal((nuint)unchecked((nuint)5441989206466502201), a3);
+            Assert.Equal((uint)2097092811, a4.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F64_Ret(unchecked((nuint)7966680593035770540), new F64_Ret_S0(20244, unchecked((nuint)7259704667595065333), 1039021449222712763), 594768504899138);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc64()
+    {
+        Console.Write("Running SwiftCallbackFunc64: ");
+        ExceptionDispatchInfo ex = null;
+        F64_Ret val = SwiftCallbackFunc64(&SwiftCallbackFunc64Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nuint)unchecked((nuint)7966680593035770540), val.F0);
+        Assert.Equal((ushort)20244, val.F1.F0);
+        Assert.Equal((nuint)unchecked((nuint)7259704667595065333), val.F1.F1);
+        Assert.Equal((ulong)1039021449222712763, val.F1.F2);
+        Assert.Equal((double)594768504899138, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F65_S0
+    {
+        public double F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F65_S1
+    {
+        public ushort F0;
+        public nint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F65_S2
+    {
+        public short F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F65_S3
+    {
+        public int F0;
+        public uint F1;
+        public sbyte F2;
+        public nuint F3;
+        public double F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F65_Ret
+    {
+        public nint F0;
+        public nint F1;
+        public nint F2;
+        public float F3;
+
+        public F65_Ret(nint f0, nint f1, nint f2, float f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func651fAA7F65_RetVAeA0G3_S0V_s5Int16VSdSuAA0G3_S1Vs6UInt64VAA0G3_S2VSiAA0G3_S3Vs5Int32Vs5Int64Vs6UInt32VSdtXE_tF")]
+    private static extern F65_Ret SwiftCallbackFunc65(delegate* unmanaged[Swift]<F65_S0, short, double, nuint, F65_S1, ulong, F65_S2, nint, F65_S3, int, long, uint, double, SwiftSelf, F65_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F65_Ret SwiftCallbackFunc65Callback(F65_S0 a0, short a1, double a2, nuint a3, F65_S1 a4, ulong a5, F65_S2 a6, nint a7, F65_S3 a8, int a9, long a10, uint a11, double a12, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((double)2969223123583220, a0.F0);
+            Assert.Equal((short)-10269, a1);
+            Assert.Equal((double)3909264978196109, a2);
+            Assert.Equal((nuint)unchecked((nuint)522883062031213707), a3);
+            Assert.Equal((ushort)37585, a4.F0);
+            Assert.Equal((nint)unchecked((nint)5879827541057349126), a4.F1);
+            Assert.Equal((ulong)1015270399093748716, a5);
+            Assert.Equal((short)19670, a6.F0);
+            Assert.Equal((nint)unchecked((nint)1900026319968050423), a7);
+            Assert.Equal((int)1440511399, a8.F0);
+            Assert.Equal((uint)1203865685, a8.F1);
+            Assert.Equal((sbyte)12, a8.F2);
+            Assert.Equal((nuint)unchecked((nuint)4061296318630567634), a8.F3);
+            Assert.Equal((double)2406524883317724, a8.F4);
+            Assert.Equal((int)1594888000, a9);
+            Assert.Equal((long)2860599972459787263, a10);
+            Assert.Equal((uint)1989052358, a11);
+            Assert.Equal((double)1036075606072593, a12);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F65_Ret(unchecked((nint)7810903219784151958), unchecked((nint)8310527878848492866), unchecked((nint)1357258266300958550), 5970506);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc65()
+    {
+        Console.Write("Running SwiftCallbackFunc65: ");
+        ExceptionDispatchInfo ex = null;
+        F65_Ret val = SwiftCallbackFunc65(&SwiftCallbackFunc65Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)7810903219784151958), val.F0);
+        Assert.Equal((nint)unchecked((nint)8310527878848492866), val.F1);
+        Assert.Equal((nint)unchecked((nint)1357258266300958550), val.F2);
+        Assert.Equal((float)5970506, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 5)]
+    struct F66_Ret_S0
+    {
+        public float F0;
+        public byte F1;
+
+        public F66_Ret_S0(float f0, byte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F66_Ret
+    {
+        public uint F0;
+        public int F1;
+        public uint F2;
+        public F66_Ret_S0 F3;
+        public nint F4;
+
+        public F66_Ret(uint f0, int f1, uint f2, F66_Ret_S0 f3, nint f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func661fAA7F66_RetVAEs5Int64VXE_tF")]
+    private static extern F66_Ret SwiftCallbackFunc66(delegate* unmanaged[Swift]<long, SwiftSelf, F66_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F66_Ret SwiftCallbackFunc66Callback(long a0, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((long)8300712022174991120, a0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F66_Ret(1855065799, 2029697750, 149423164, new F66_Ret_S0(4327716, 116), unchecked((nint)5847795120921557969));
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc66()
+    {
+        Console.Write("Running SwiftCallbackFunc66: ");
+        ExceptionDispatchInfo ex = null;
+        F66_Ret val = SwiftCallbackFunc66(&SwiftCallbackFunc66Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((uint)1855065799, val.F0);
+        Assert.Equal((int)2029697750, val.F1);
+        Assert.Equal((uint)149423164, val.F2);
+        Assert.Equal((float)4327716, val.F3.F0);
+        Assert.Equal((byte)116, val.F3.F1);
+        Assert.Equal((nint)unchecked((nint)5847795120921557969), val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F67_S0
+    {
+        public uint F0;
+        public byte F1;
+        public byte F2;
+        public int F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F67_S1
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F67_S2_S0
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct F67_S2
+    {
+        public ulong F0;
+        public uint F1;
+        public nint F2;
+        public uint F3;
+        public F67_S2_S0 F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F67_S3
+    {
+        public short F0;
+        public ulong F1;
+        public ulong F2;
+        public float F3;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func671fs5Int32VAESd_AA6F67_S0VSfAA0H3_S1Vs5Int16VSuAA0H3_S2Vs6UInt16VS2uAA0H3_S3Vs6UInt64VtXE_tF")]
+    private static extern int SwiftCallbackFunc67(delegate* unmanaged[Swift]<double, F67_S0, float, F67_S1, short, nuint, F67_S2, ushort, nuint, nuint, F67_S3, ulong, SwiftSelf, int> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static int SwiftCallbackFunc67Callback(double a0, F67_S0 a1, float a2, F67_S1 a3, short a4, nuint a5, F67_S2 a6, ushort a7, nuint a8, nuint a9, F67_S3 a10, ulong a11, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((double)2365334314089079, a0);
+            Assert.Equal((uint)1133369490, a1.F0);
+            Assert.Equal((byte)54, a1.F1);
+            Assert.Equal((byte)244, a1.F2);
+            Assert.Equal((int)411611102, a1.F3);
+            Assert.Equal((float)4453912, a2);
+            Assert.Equal((uint)837821989, a3.F0);
+            Assert.Equal((short)-3824, a4);
+            Assert.Equal((nuint)unchecked((nuint)2394019088612006082), a5);
+            Assert.Equal((ulong)2219661088889353540, a6.F0);
+            Assert.Equal((uint)294254132, a6.F1);
+            Assert.Equal((nint)unchecked((nint)5363897228951721947), a6.F2);
+            Assert.Equal((uint)2038380379, a6.F3);
+            Assert.Equal((nint)unchecked((nint)8364879421385869437), a6.F4.F0);
+            Assert.Equal((ushort)27730, a7);
+            Assert.Equal((nuint)unchecked((nuint)1854446871602777695), a8);
+            Assert.Equal((nuint)unchecked((nuint)5020910156102352016), a9);
+            Assert.Equal((short)-2211, a10.F0);
+            Assert.Equal((ulong)5910581461792482729, a10.F1);
+            Assert.Equal((ulong)9095210648679611609, a10.F2);
+            Assert.Equal((float)6138428, a10.F3);
+            Assert.Equal((ulong)4274242076331880276, a11);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 391983354;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc67()
+    {
+        Console.Write("Running SwiftCallbackFunc67: ");
+        ExceptionDispatchInfo ex = null;
+        int val = SwiftCallbackFunc67(&SwiftCallbackFunc67Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((int)391983354, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F68_S0_S0
+    {
+        public sbyte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct F68_S0
+    {
+        public long F0;
+        public F68_S0_S0 F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F68_S1
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F68_S2_S0
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F68_S2_S1_S0
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F68_S2_S1
+    {
+        public F68_S2_S1_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F68_S2
+    {
+        public F68_S2_S0 F0;
+        public F68_S2_S1 F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F68_S3
+    {
+        public short F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F68_Ret
+    {
+        public ushort F0;
+        public long F1;
+
+        public F68_Ret(ushort f0, long f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func681fAA7F68_RetVAEs5UInt8V_Sfs5Int32VSiAA0G3_S0Vs5Int16VSiAISiAA0G3_S1VSdAA0G3_S2VAA0G3_S3VtXE_tF")]
+    private static extern F68_Ret SwiftCallbackFunc68(delegate* unmanaged[Swift]<byte, float, int, nint, F68_S0, short, nint, int, nint, F68_S1, double, F68_S2, F68_S3, SwiftSelf, F68_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F68_Ret SwiftCallbackFunc68Callback(byte a0, float a1, int a2, nint a3, F68_S0 a4, short a5, nint a6, int a7, nint a8, F68_S1 a9, double a10, F68_S2 a11, F68_S3 a12, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((byte)203, a0);
+            Assert.Equal((float)7725681, a1);
+            Assert.Equal((int)323096997, a2);
+            Assert.Equal((nint)unchecked((nint)7745650233784541800), a3);
+            Assert.Equal((long)4103074885750473230, a4.F0);
+            Assert.Equal((sbyte)12, a4.F1.F0);
+            Assert.Equal((short)28477, a5);
+            Assert.Equal((nint)unchecked((nint)3772772447290536725), a6);
+            Assert.Equal((int)1075348149, a7);
+            Assert.Equal((nint)unchecked((nint)2017898311184593242), a8);
+            Assert.Equal((ushort)60280, a9.F0);
+            Assert.Equal((double)4052387873895590, a10);
+            Assert.Equal((nuint)unchecked((nuint)1321857087602747558), a11.F0.F0);
+            Assert.Equal((ulong)9011155097138053416, a11.F1.F0.F0);
+            Assert.Equal((short)8332, a12.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F68_Ret(64088, 8144208533922264568);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc68()
+    {
+        Console.Write("Running SwiftCallbackFunc68: ");
+        ExceptionDispatchInfo ex = null;
+        F68_Ret val = SwiftCallbackFunc68(&SwiftCallbackFunc68Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ushort)64088, val.F0);
+        Assert.Equal((long)8144208533922264568, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F69_S0_S0
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F69_S0
+    {
+        public F69_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F69_S1
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F69_S2
+    {
+        public int F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F69_S3
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F69_S4_S0
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F69_S4
+    {
+        public F69_S4_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F69_Ret
+    {
+        public byte F0;
+        public long F1;
+        public uint F2;
+
+        public F69_Ret(byte f0, long f1, uint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func691fAA7F69_RetVAeA0G3_S0V_Sis5Int32VAA0G3_S1Vs6UInt32Vs4Int8VAA0G3_S2VSiAA0G3_S3VAA0G3_S4VtXE_tF")]
+    private static extern F69_Ret SwiftCallbackFunc69(delegate* unmanaged[Swift]<F69_S0, nint, int, F69_S1, uint, sbyte, F69_S2, nint, F69_S3, F69_S4, SwiftSelf, F69_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F69_Ret SwiftCallbackFunc69Callback(F69_S0 a0, nint a1, int a2, F69_S1 a3, uint a4, sbyte a5, F69_S2 a6, nint a7, F69_S3 a8, F69_S4 a9, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ulong)7154553222175076145, a0.F0.F0);
+            Assert.Equal((nint)unchecked((nint)6685908100026425691), a1);
+            Assert.Equal((int)1166526155, a2);
+            Assert.Equal((long)6042278185730963289, a3.F0);
+            Assert.Equal((uint)182060391, a4);
+            Assert.Equal((sbyte)45, a5);
+            Assert.Equal((int)1886331345, a6.F0);
+            Assert.Equal((nint)unchecked((nint)485542148877875333), a7);
+            Assert.Equal((byte)209, a8.F0);
+            Assert.Equal((long)6856847647688321191, a9.F0.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F69_Ret(52, 5510942427596951043, 1854355776);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc69()
+    {
+        Console.Write("Running SwiftCallbackFunc69: ");
+        ExceptionDispatchInfo ex = null;
+        F69_Ret val = SwiftCallbackFunc69(&SwiftCallbackFunc69Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((byte)52, val.F0);
+        Assert.Equal((long)5510942427596951043, val.F1);
+        Assert.Equal((uint)1854355776, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F70_S0
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F70_S1
+    {
+        public nint F0;
+        public double F1;
+        public short F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F70_S2
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 36)]
+    struct F70_S3
+    {
+        public ushort F0;
+        public double F1;
+        public byte F2;
+        public ulong F3;
+        public int F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F70_S4_S0
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F70_S4
+    {
+        public F70_S4_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F70_Ret
+    {
+        public sbyte F0;
+        public uint F1;
+        public ulong F2;
+        public short F3;
+        public short F4;
+
+        public F70_Ret(sbyte f0, uint f1, ulong f2, short f3, short f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func701fAA7F70_RetVAEs5Int16V_s5UInt8VSis6UInt32VAA0G3_S0Vs5Int32VAA0G3_S1VAA0G3_S2VAA0G3_S3Vs5Int64VAOs6UInt16VS2iSuAA0G3_S4VtXE_tF")]
+    private static extern F70_Ret SwiftCallbackFunc70(delegate* unmanaged[Swift]<short, byte, nint, uint, F70_S0, int, F70_S1, F70_S2, F70_S3, long, int, ushort, nint, nint, nuint, F70_S4, SwiftSelf, F70_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F70_Ret SwiftCallbackFunc70Callback(short a0, byte a1, nint a2, uint a3, F70_S0 a4, int a5, F70_S1 a6, F70_S2 a7, F70_S3 a8, long a9, int a10, ushort a11, nint a12, nint a13, nuint a14, F70_S4 a15, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((short)-13167, a0);
+            Assert.Equal((byte)126, a1);
+            Assert.Equal((nint)unchecked((nint)3641983584484741827), a2);
+            Assert.Equal((uint)1090448265, a3);
+            Assert.Equal((long)3696858216713616004, a4.F0);
+            Assert.Equal((int)1687025402, a5);
+            Assert.Equal((nint)unchecked((nint)714916953527626038), a6.F0);
+            Assert.Equal((double)459810445900614, a6.F1);
+            Assert.Equal((short)4276, a6.F2);
+            Assert.Equal((uint)529194028, a7.F0);
+            Assert.Equal((ushort)40800, a8.F0);
+            Assert.Equal((double)3934985905568056, a8.F1);
+            Assert.Equal((byte)230, a8.F2);
+            Assert.Equal((ulong)7358783417346157372, a8.F3);
+            Assert.Equal((int)187926922, a8.F4);
+            Assert.Equal((long)228428560763393434, a9);
+            Assert.Equal((int)146501405, a10);
+            Assert.Equal((ushort)58804, a11);
+            Assert.Equal((nint)unchecked((nint)7098488973446286248), a12);
+            Assert.Equal((nint)unchecked((nint)1283658442251334575), a13);
+            Assert.Equal((nuint)unchecked((nuint)3644681944588099582), a14);
+            Assert.Equal((nuint)unchecked((nuint)8197135412164695911), a15.F0.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F70_Ret(45, 460004173, 7766748067698372018, 27369, 16509);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc70()
+    {
+        Console.Write("Running SwiftCallbackFunc70: ");
+        ExceptionDispatchInfo ex = null;
+        F70_Ret val = SwiftCallbackFunc70(&SwiftCallbackFunc70Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((sbyte)45, val.F0);
+        Assert.Equal((uint)460004173, val.F1);
+        Assert.Equal((ulong)7766748067698372018, val.F2);
+        Assert.Equal((short)27369, val.F3);
+        Assert.Equal((short)16509, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F71_S0_S0
+    {
+        public int F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F71_S0
+    {
+        public F71_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F71_S1
+    {
+        public long F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func711fs6UInt64VAeA6F71_S0V_AA0H3_S1VtXE_tF")]
+    private static extern ulong SwiftCallbackFunc71(delegate* unmanaged[Swift]<F71_S0, F71_S1, SwiftSelf, ulong> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static ulong SwiftCallbackFunc71Callback(F71_S0 a0, F71_S1 a1, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((int)258165353, a0.F0.F0);
+            Assert.Equal((long)8603744544763953916, a1.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 8460721064583106347;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc71()
+    {
+        Console.Write("Running SwiftCallbackFunc71: ");
+        ExceptionDispatchInfo ex = null;
+        ulong val = SwiftCallbackFunc71(&SwiftCallbackFunc71Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ulong)8460721064583106347, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F72_S0
+    {
+        public int F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F72_Ret
+    {
+        public uint F0;
+        public float F1;
+        public float F2;
+        public long F3;
+
+        public F72_Ret(uint f0, float f1, float f2, long f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func721fAA7F72_RetVAeA0G3_S0V_s5Int64Vs4Int8VtXE_tF")]
+    private static extern F72_Ret SwiftCallbackFunc72(delegate* unmanaged[Swift]<F72_S0, long, sbyte, SwiftSelf, F72_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F72_Ret SwiftCallbackFunc72Callback(F72_S0 a0, long a1, sbyte a2, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((int)2021509367, a0.F0);
+            Assert.Equal((long)2480039820482100351, a1);
+            Assert.Equal((sbyte)91, a2);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F72_Ret(1583929847, 2026234, 8092211, 445254465286132488);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc72()
+    {
+        Console.Write("Running SwiftCallbackFunc72: ");
+        ExceptionDispatchInfo ex = null;
+        F72_Ret val = SwiftCallbackFunc72(&SwiftCallbackFunc72Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((uint)1583929847, val.F0);
+        Assert.Equal((float)2026234, val.F1);
+        Assert.Equal((float)8092211, val.F2);
+        Assert.Equal((long)445254465286132488, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F73_S0
+    {
+        public int F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F73_S1_S0
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F73_S1
+    {
+        public F73_S1_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F73_S2
+    {
+        public int F0;
+        public float F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 11)]
+    struct F73_S3
+    {
+        public nuint F0;
+        public short F1;
+        public sbyte F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F73_S4
+    {
+        public short F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F73_S5
+    {
+        public uint F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func731fs4Int8VAESd_SfAA6F73_S0Vs5Int64VAA0H3_S1VAA0H3_S2Vs5Int16VSdAEs5Int32VAiA0H3_S3VSus6UInt64VAqA0H3_S4Vs5UInt8VAA0H3_S5VtXE_tF")]
+    private static extern sbyte SwiftCallbackFunc73(delegate* unmanaged[Swift]<double, float, F73_S0, long, F73_S1, F73_S2, short, double, sbyte, int, long, F73_S3, nuint, ulong, int, F73_S4, byte, F73_S5, SwiftSelf, sbyte> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static sbyte SwiftCallbackFunc73Callback(double a0, float a1, F73_S0 a2, long a3, F73_S1 a4, F73_S2 a5, short a6, double a7, sbyte a8, int a9, long a10, F73_S3 a11, nuint a12, ulong a13, int a14, F73_S4 a15, byte a16, F73_S5 a17, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((double)3038361048801008, a0);
+            Assert.Equal((float)7870661, a1);
+            Assert.Equal((int)1555231180, a2.F0);
+            Assert.Equal((long)7433951069104961, a3);
+            Assert.Equal((ushort)63298, a4.F0.F0);
+            Assert.Equal((int)1759846580, a5.F0);
+            Assert.Equal((float)1335901, a5.F1);
+            Assert.Equal((short)11514, a6);
+            Assert.Equal((double)695278874601974, a7);
+            Assert.Equal((sbyte)108, a8);
+            Assert.Equal((int)48660527, a9);
+            Assert.Equal((long)7762050749172332624, a10);
+            Assert.Equal((nuint)unchecked((nuint)7486686356276472663), a11.F0);
+            Assert.Equal((short)11622, a11.F1);
+            Assert.Equal((sbyte)112, a11.F2);
+            Assert.Equal((nuint)unchecked((nuint)884183974530885885), a12);
+            Assert.Equal((ulong)7434462110419085390, a13);
+            Assert.Equal((int)170242607, a14);
+            Assert.Equal((short)-26039, a15.F0);
+            Assert.Equal((byte)41, a16);
+            Assert.Equal((uint)191302504, a17.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 76;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc73()
+    {
+        Console.Write("Running SwiftCallbackFunc73: ");
+        ExceptionDispatchInfo ex = null;
+        sbyte val = SwiftCallbackFunc73(&SwiftCallbackFunc73Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((sbyte)76, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 17)]
+    struct F74_S0_S0
+    {
+        public ushort F0;
+        public nuint F1;
+        public sbyte F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F74_S0
+    {
+        public F74_S0_S0 F0;
+        public nint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F74_S1
+    {
+        public float F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func741fs5Int64VAeA6F74_S0V_AA0H3_S1Vs5Int16VtXE_tF")]
+    private static extern long SwiftCallbackFunc74(delegate* unmanaged[Swift]<F74_S0, F74_S1, short, SwiftSelf, long> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static long SwiftCallbackFunc74Callback(F74_S0 a0, F74_S1 a1, short a2, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ushort)59883, a0.F0.F0);
+            Assert.Equal((nuint)unchecked((nuint)5554216411943233256), a0.F0.F1);
+            Assert.Equal((sbyte)126, a0.F0.F2);
+            Assert.Equal((nint)unchecked((nint)724541378819571203), a0.F1);
+            Assert.Equal((float)172601, a1.F0);
+            Assert.Equal((short)27932, a2);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 7382123574052120438;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc74()
+    {
+        Console.Write("Running SwiftCallbackFunc74: ");
+        ExceptionDispatchInfo ex = null;
+        long val = SwiftCallbackFunc74(&SwiftCallbackFunc74Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((long)7382123574052120438, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F75_S0
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F75_S1_S0
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F75_S1
+    {
+        public F75_S1_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F75_S2
+    {
+        public sbyte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F75_S3_S0
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F75_S3
+    {
+        public F75_S3_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 36)]
+    struct F75_Ret
+    {
+        public byte F0;
+        public double F1;
+        public double F2;
+        public long F3;
+        public uint F4;
+
+        public F75_Ret(byte f0, double f1, double f2, long f3, uint f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func751fAA7F75_RetVAEs4Int8V_A2gA0G3_S0VAA0G3_S1VAA0G3_S2VAA0G3_S3VtXE_tF")]
+    private static extern F75_Ret SwiftCallbackFunc75(delegate* unmanaged[Swift]<sbyte, sbyte, sbyte, F75_S0, F75_S1, F75_S2, F75_S3, SwiftSelf, F75_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F75_Ret SwiftCallbackFunc75Callback(sbyte a0, sbyte a1, sbyte a2, F75_S0 a3, F75_S1 a4, F75_S2 a5, F75_S3 a6, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)-105, a0);
+            Assert.Equal((sbyte)71, a1);
+            Assert.Equal((sbyte)108, a2);
+            Assert.Equal((long)7224638108479292438, a3.F0);
+            Assert.Equal((byte)126, a4.F0.F0);
+            Assert.Equal((sbyte)-88, a5.F0);
+            Assert.Equal((ushort)4934, a6.F0.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F75_Ret(8, 494440474432982, 3322048351205313, 7525253715666045341, 7365589);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc75()
+    {
+        Console.Write("Running SwiftCallbackFunc75: ");
+        ExceptionDispatchInfo ex = null;
+        F75_Ret val = SwiftCallbackFunc75(&SwiftCallbackFunc75Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((byte)8, val.F0);
+        Assert.Equal((double)494440474432982, val.F1);
+        Assert.Equal((double)3322048351205313, val.F2);
+        Assert.Equal((long)7525253715666045341, val.F3);
+        Assert.Equal((uint)7365589, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F76_S0
+    {
+        public ushort F0;
+        public nint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F76_S1_S0
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F76_S1
+    {
+        public F76_S1_S0 F0;
+        public nuint F1;
+        public double F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F76_S2
+    {
+        public ulong F0;
+        public nint F1;
+        public ushort F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F76_S3_S0
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F76_S3
+    {
+        public F76_S3_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F76_S4
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F76_S5
+    {
+        public nuint F0;
+        public double F1;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func761fs6UInt64VAEs5UInt8V_AA6F76_S0Vs4Int8VAA0I3_S1VAA0I3_S2VAA0I3_S3Vs6UInt32VAA0I3_S4VAgA0I3_S5VSds5Int16VtXE_tF")]
+    private static extern ulong SwiftCallbackFunc76(delegate* unmanaged[Swift]<byte, F76_S0, sbyte, F76_S1, F76_S2, F76_S3, uint, F76_S4, byte, F76_S5, double, short, SwiftSelf, ulong> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static ulong SwiftCallbackFunc76Callback(byte a0, F76_S0 a1, sbyte a2, F76_S1 a3, F76_S2 a4, F76_S3 a5, uint a6, F76_S4 a7, byte a8, F76_S5 a9, double a10, short a11, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((byte)69, a0);
+            Assert.Equal((ushort)25503, a1.F0);
+            Assert.Equal((nint)unchecked((nint)4872234474620951743), a1.F1);
+            Assert.Equal((sbyte)43, a2);
+            Assert.Equal((nint)unchecked((nint)1199076663426903579), a3.F0.F0);
+            Assert.Equal((nuint)unchecked((nuint)4639522222462236688), a3.F1);
+            Assert.Equal((double)4082956091930029, a3.F2);
+            Assert.Equal((ulong)5171821618947987626, a4.F0);
+            Assert.Equal((nint)unchecked((nint)3369410144919558564), a4.F1);
+            Assert.Equal((ushort)5287, a4.F2);
+            Assert.Equal((long)929854460912895550, a5.F0.F0);
+            Assert.Equal((uint)1208311201, a6);
+            Assert.Equal((long)7033993025788649145, a7.F0);
+            Assert.Equal((byte)58, a8);
+            Assert.Equal((nuint)unchecked((nuint)1401399014740601512), a9.F0);
+            Assert.Equal((double)2523645319232571, a9.F1);
+            Assert.Equal((double)230232835550369, a10);
+            Assert.Equal((short)-22975, a11);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 2608582352406315143;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc76()
+    {
+        Console.Write("Running SwiftCallbackFunc76: ");
+        ExceptionDispatchInfo ex = null;
+        ulong val = SwiftCallbackFunc76(&SwiftCallbackFunc76Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ulong)2608582352406315143, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F77_S0
+    {
+        public long F0;
+        public double F1;
+        public nuint F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F77_S1
+    {
+        public short F0;
+        public float F1;
+        public float F2;
+        public long F3;
+        public long F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F77_S2
+    {
+        public ushort F0;
+        public sbyte F1;
+        public int F2;
+        public float F3;
+        public float F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F77_Ret
+    {
+        public double F0;
+        public ushort F1;
+        public sbyte F2;
+        public nuint F3;
+
+        public F77_Ret(double f0, ushort f1, sbyte f2, nuint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func771fAA7F77_RetVAESd_AA0G3_S0VAA0G3_S1VAA0G3_S2Vs6UInt32VtXE_tF")]
+    private static extern F77_Ret SwiftCallbackFunc77(delegate* unmanaged[Swift]<double, F77_S0, F77_S1, F77_S2, uint, SwiftSelf, F77_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F77_Ret SwiftCallbackFunc77Callback(double a0, F77_S0 a1, F77_S1 a2, F77_S2 a3, uint a4, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((double)1623173949127682, a0);
+            Assert.Equal((long)5204451347781433070, a1.F0);
+            Assert.Equal((double)3469485630755805, a1.F1);
+            Assert.Equal((nuint)unchecked((nuint)7586276835848725004), a1.F2);
+            Assert.Equal((short)2405, a2.F0);
+            Assert.Equal((float)2419792, a2.F1);
+            Assert.Equal((float)6769317, a2.F2);
+            Assert.Equal((long)1542327522833750776, a2.F3);
+            Assert.Equal((long)1297586130846695275, a2.F4);
+            Assert.Equal((ushort)10102, a3.F0);
+            Assert.Equal((sbyte)-48, a3.F1);
+            Assert.Equal((int)14517107, a3.F2);
+            Assert.Equal((float)4856023, a3.F3);
+            Assert.Equal((float)2681358, a3.F4);
+            Assert.Equal((uint)1463251524, a4);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F77_Ret(1601613740657843, 14373, -17, unchecked((nuint)274065318894652498));
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc77()
+    {
+        Console.Write("Running SwiftCallbackFunc77: ");
+        ExceptionDispatchInfo ex = null;
+        F77_Ret val = SwiftCallbackFunc77(&SwiftCallbackFunc77Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((double)1601613740657843, val.F0);
+        Assert.Equal((ushort)14373, val.F1);
+        Assert.Equal((sbyte)-17, val.F2);
+        Assert.Equal((nuint)unchecked((nuint)274065318894652498), val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F78_S0
+    {
+        public nuint F0;
+        public nint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F78_S1_S0
+    {
+        public sbyte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F78_S1
+    {
+        public short F0;
+        public ulong F1;
+        public F78_S1_S0 F2;
+        public int F3;
+        public nint F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F78_S2
+    {
+        public nuint F0;
+        public ulong F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F78_S3
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F78_S4
+    {
+        public ulong F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func781fS2ds6UInt64V_AA6F78_S0VAeA0H3_S1VAA0H3_S2Vs5Int32VAEs5Int64VAA0H3_S3VS2fs6UInt16VAA0H3_S4VSdtXE_tF")]
+    private static extern double SwiftCallbackFunc78(delegate* unmanaged[Swift]<ulong, F78_S0, ulong, F78_S1, F78_S2, int, ulong, long, F78_S3, float, float, ushort, F78_S4, double, SwiftSelf, double> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static double SwiftCallbackFunc78Callback(ulong a0, F78_S0 a1, ulong a2, F78_S1 a3, F78_S2 a4, int a5, ulong a6, long a7, F78_S3 a8, float a9, float a10, ushort a11, F78_S4 a12, double a13, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ulong)6780767594736146373, a0);
+            Assert.Equal((nuint)unchecked((nuint)6264193481541646332), a1.F0);
+            Assert.Equal((nint)unchecked((nint)6600856439035088503), a1.F1);
+            Assert.Equal((ulong)1968254881389492170, a2);
+            Assert.Equal((short)-17873, a3.F0);
+            Assert.Equal((ulong)5581169895682201971, a3.F1);
+            Assert.Equal((sbyte)127, a3.F2.F0);
+            Assert.Equal((int)1942346704, a3.F3);
+            Assert.Equal((nint)unchecked((nint)118658265323815307), a3.F4);
+            Assert.Equal((nuint)unchecked((nuint)1489326778640378879), a4.F0);
+            Assert.Equal((ulong)1427061853707270770, a4.F1);
+            Assert.Equal((int)858391966, a5);
+            Assert.Equal((ulong)5830110056171302270, a6);
+            Assert.Equal((long)2953614358173898788, a7);
+            Assert.Equal((ulong)6761452244699684409, a8.F0);
+            Assert.Equal((float)3452451, a9);
+            Assert.Equal((float)3507119, a10);
+            Assert.Equal((ushort)40036, a11);
+            Assert.Equal((ulong)4800085294404376817, a12.F0);
+            Assert.Equal((double)780368756754436, a13);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 1088544646657969;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc78()
+    {
+        Console.Write("Running SwiftCallbackFunc78: ");
+        ExceptionDispatchInfo ex = null;
+        double val = SwiftCallbackFunc78(&SwiftCallbackFunc78Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((double)1088544646657969, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F79_S0_S0
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F79_S0
+    {
+        public F79_S0_S0 F0;
+        public nint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F79_Ret
+    {
+        public uint F0;
+        public ulong F1;
+        public double F2;
+
+        public F79_Ret(uint f0, ulong f1, double f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func791fAA7F79_RetVAeA0G3_S0V_SftXE_tF")]
+    private static extern F79_Ret SwiftCallbackFunc79(delegate* unmanaged[Swift]<F79_S0, float, SwiftSelf, F79_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F79_Ret SwiftCallbackFunc79Callback(F79_S0 a0, float a1, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nuint)unchecked((nuint)1013911700897046117), a0.F0.F0);
+            Assert.Equal((nint)unchecked((nint)7323935615297665289), a0.F1);
+            Assert.Equal((float)5159506, a1);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F79_Ret(895629788, 4824209192377460356, 2599150646028906);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc79()
+    {
+        Console.Write("Running SwiftCallbackFunc79: ");
+        ExceptionDispatchInfo ex = null;
+        F79_Ret val = SwiftCallbackFunc79(&SwiftCallbackFunc79Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((uint)895629788, val.F0);
+        Assert.Equal((ulong)4824209192377460356, val.F1);
+        Assert.Equal((double)2599150646028906, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F80_S0
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F80_S1_S0_S0
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F80_S1_S0
+    {
+        public F80_S1_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct F80_S1
+    {
+        public nint F0;
+        public F80_S1_S0 F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F80_S2
+    {
+        public ulong F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func801fS2fs6UInt64V_Sis5Int32Vs5Int16VSuAA6F80_S0VAISis4Int8VAGs6UInt32VAA0J3_S1VAA0J3_S2VAEtXE_tF")]
+    private static extern float SwiftCallbackFunc80(delegate* unmanaged[Swift]<ulong, nint, int, short, nuint, F80_S0, short, nint, sbyte, int, uint, F80_S1, F80_S2, ulong, SwiftSelf, float> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static float SwiftCallbackFunc80Callback(ulong a0, nint a1, int a2, short a3, nuint a4, F80_S0 a5, short a6, nint a7, sbyte a8, int a9, uint a10, F80_S1 a11, F80_S2 a12, ulong a13, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ulong)4470427843910624516, a0);
+            Assert.Equal((nint)unchecked((nint)8383677749057878551), a1);
+            Assert.Equal((int)2017117925, a2);
+            Assert.Equal((short)-10531, a3);
+            Assert.Equal((nuint)unchecked((nuint)3438375001906177611), a4);
+            Assert.Equal((ushort)65220, a5.F0);
+            Assert.Equal((short)7107, a6);
+            Assert.Equal((nint)unchecked((nint)7315288835693680178), a7);
+            Assert.Equal((sbyte)-48, a8);
+            Assert.Equal((int)813870434, a9);
+            Assert.Equal((uint)1092037477, a10);
+            Assert.Equal((nint)unchecked((nint)7104962838387954470), a11.F0);
+            Assert.Equal((byte)236, a11.F1.F0.F0);
+            Assert.Equal((ulong)7460392384225808790, a12.F0);
+            Assert.Equal((ulong)364121728483540667, a13);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 5169959;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc80()
+    {
+        Console.Write("Running SwiftCallbackFunc80: ");
+        ExceptionDispatchInfo ex = null;
+        float val = SwiftCallbackFunc80(&SwiftCallbackFunc80Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((float)5169959, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F81_S0
+    {
+        public float F0;
+        public float F1;
+        public nint F2;
+        public nint F3;
+        public nint F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F81_Ret
+    {
+        public nint F0;
+
+        public F81_Ret(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func811fAA7F81_RetVAEs5UInt8V_s6UInt32VAgA0G3_S0Vs4Int8VtXE_tF")]
+    private static extern F81_Ret SwiftCallbackFunc81(delegate* unmanaged[Swift]<byte, uint, byte, F81_S0, sbyte, SwiftSelf, F81_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F81_Ret SwiftCallbackFunc81Callback(byte a0, uint a1, byte a2, F81_S0 a3, sbyte a4, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((byte)53, a0);
+            Assert.Equal((uint)57591489, a1);
+            Assert.Equal((byte)19, a2);
+            Assert.Equal((float)5675845, a3.F0);
+            Assert.Equal((float)6469988, a3.F1);
+            Assert.Equal((nint)unchecked((nint)5775316279348621124), a3.F2);
+            Assert.Equal((nint)unchecked((nint)7699091894067057939), a3.F3);
+            Assert.Equal((nint)unchecked((nint)1049086627558950131), a3.F4);
+            Assert.Equal((sbyte)15, a4);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F81_Ret(unchecked((nint)1055606720535823947));
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc81()
+    {
+        Console.Write("Running SwiftCallbackFunc81: ");
+        ExceptionDispatchInfo ex = null;
+        F81_Ret val = SwiftCallbackFunc81(&SwiftCallbackFunc81Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)1055606720535823947), val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F82_S0_S0
+    {
+        public float F0;
+        public nuint F1;
+        public ushort F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F82_S0
+    {
+        public nuint F0;
+        public F82_S0_S0 F1;
+        public ushort F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F82_S1
+    {
+        public int F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F82_S2
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F82_S3_S0
+    {
+        public int F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F82_S3
+    {
+        public double F0;
+        public nuint F1;
+        public F82_S3_S0 F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F82_S4
+    {
+        public ulong F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func821fS2fs5Int64V_AA6F82_S0Vs5Int16Vs4Int8Vs6UInt32VAA0H3_S1Vs5Int32VAeKSdAA0H3_S2VAA0H3_S3VAA0H3_S4VtXE_tF")]
+    private static extern float SwiftCallbackFunc82(delegate* unmanaged[Swift]<long, F82_S0, short, sbyte, uint, F82_S1, int, long, sbyte, double, F82_S2, F82_S3, F82_S4, SwiftSelf, float> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static float SwiftCallbackFunc82Callback(long a0, F82_S0 a1, short a2, sbyte a3, uint a4, F82_S1 a5, int a6, long a7, sbyte a8, double a9, F82_S2 a10, F82_S3 a11, F82_S4 a12, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((long)6454754584537364459, a0);
+            Assert.Equal((nuint)unchecked((nuint)6703634779264968131), a1.F0);
+            Assert.Equal((float)1010059, a1.F1.F0);
+            Assert.Equal((nuint)unchecked((nuint)4772968591609202284), a1.F1.F1);
+            Assert.Equal((ushort)64552, a1.F1.F2);
+            Assert.Equal((ushort)47126, a1.F2);
+            Assert.Equal((short)9869, a2);
+            Assert.Equal((sbyte)-8, a3);
+            Assert.Equal((uint)1741550381, a4);
+            Assert.Equal((int)705741282, a5.F0);
+            Assert.Equal((int)1998781399, a6);
+            Assert.Equal((long)7787961471254401526, a7);
+            Assert.Equal((sbyte)-27, a8);
+            Assert.Equal((double)4429830670351707, a9);
+            Assert.Equal((nint)unchecked((nint)4975772762589349422), a10.F0);
+            Assert.Equal((double)1423948098664774, a11.F0);
+            Assert.Equal((nuint)unchecked((nuint)504607538824251986), a11.F1);
+            Assert.Equal((int)1940911018, a11.F2.F0);
+            Assert.Equal((ulong)2988623645681463667, a12.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 7514083;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc82()
+    {
+        Console.Write("Running SwiftCallbackFunc82: ");
+        ExceptionDispatchInfo ex = null;
+        float val = SwiftCallbackFunc82(&SwiftCallbackFunc82Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((float)7514083, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F83_S0
+    {
+        public int F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F83_Ret
+    {
+        public short F0;
+
+        public F83_Ret(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func831fAA7F83_RetVAEs4Int8V_AA0G3_S0Vs5Int16VtXE_tF")]
+    private static extern F83_Ret SwiftCallbackFunc83(delegate* unmanaged[Swift]<sbyte, F83_S0, short, SwiftSelf, F83_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F83_Ret SwiftCallbackFunc83Callback(sbyte a0, F83_S0 a1, short a2, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)17, a0);
+            Assert.Equal((int)530755056, a1.F0);
+            Assert.Equal((short)-11465, a2);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F83_Ret(-32475);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc83()
+    {
+        Console.Write("Running SwiftCallbackFunc83: ");
+        ExceptionDispatchInfo ex = null;
+        F83_Ret val = SwiftCallbackFunc83(&SwiftCallbackFunc83Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((short)-32475, val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 36)]
+    struct F84_S0
+    {
+        public nuint F0;
+        public uint F1;
+        public nuint F2;
+        public ulong F3;
+        public int F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F84_S1
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F84_S2
+    {
+        public float F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F84_S3
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F84_S4
+    {
+        public short F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 10)]
+    struct F84_S5
+    {
+        public nint F0;
+        public short F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F84_S6
+    {
+        public short F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F84_S7
+    {
+        public int F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func841fS2is5Int32V_AA6F84_S0VAA0H3_S1VSdAEs5Int16VSdAA0H3_S2VAA0H3_S3VSdAA0H3_S4VAA0H3_S5VAA0H3_S6VAA0H3_S7VSutXE_tF")]
+    private static extern nint SwiftCallbackFunc84(delegate* unmanaged[Swift]<int, F84_S0, F84_S1, double, int, short, double, F84_S2, F84_S3, double, F84_S4, F84_S5, F84_S6, F84_S7, nuint, SwiftSelf, nint> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static nint SwiftCallbackFunc84Callback(int a0, F84_S0 a1, F84_S1 a2, double a3, int a4, short a5, double a6, F84_S2 a7, F84_S3 a8, double a9, F84_S4 a10, F84_S5 a11, F84_S6 a12, F84_S7 a13, nuint a14, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((int)1605022009, a0);
+            Assert.Equal((nuint)unchecked((nuint)6165049220831866664), a1.F0);
+            Assert.Equal((uint)1235491183, a1.F1);
+            Assert.Equal((nuint)unchecked((nuint)7926620970405586826), a1.F2);
+            Assert.Equal((ulong)2633248816907294140, a1.F3);
+            Assert.Equal((int)2012834055, a1.F4);
+            Assert.Equal((nuint)unchecked((nuint)2881830362339122988), a2.F0);
+            Assert.Equal((double)4065309434963087, a3);
+            Assert.Equal((int)1125165825, a4);
+            Assert.Equal((short)-32360, a5);
+            Assert.Equal((double)1145602045200029, a6);
+            Assert.Equal((float)5655563, a7.F0);
+            Assert.Equal((byte)14, a8.F0);
+            Assert.Equal((double)3919593995303128, a9);
+            Assert.Equal((short)26090, a10.F0);
+            Assert.Equal((nint)unchecked((nint)8584898862398781737), a11.F0);
+            Assert.Equal((short)-5185, a11.F1);
+            Assert.Equal((short)144, a12.F0);
+            Assert.Equal((int)2138004352, a13.F0);
+            Assert.Equal((nuint)unchecked((nuint)9102562043027810686), a14);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return unchecked((nint)2320162198211027422);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc84()
+    {
+        Console.Write("Running SwiftCallbackFunc84: ");
+        ExceptionDispatchInfo ex = null;
+        nint val = SwiftCallbackFunc84(&SwiftCallbackFunc84Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)2320162198211027422), val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F85_S0
+    {
+        public double F0;
+        public double F1;
+        public sbyte F2;
+        public int F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F85_S1
+    {
+        public long F0;
+        public ushort F1;
+        public ulong F2;
+        public nuint F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F85_S2
+    {
+        public float F0;
+        public float F1;
+        public uint F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F85_S3
+    {
+        public byte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F85_S4
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F85_S5
+    {
+        public double F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 48)]
+    struct F85_Ret
+    {
+        public uint F0;
+        public ushort F1;
+        public int F2;
+        public double F3;
+        public nint F4;
+        public ulong F5;
+        public long F6;
+
+        public F85_Ret(uint f0, ushort f1, int f2, double f3, nint f4, ulong f5, long f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func851fAA7F85_RetVAeA0G3_S0V_AA0G3_S1Vs6UInt32VAA0G3_S2Vs5Int64VAA0G3_S3VAoA0G3_S4Vs6UInt16Vs5UInt8Vs5Int32VAkYSfAA0G3_S5VAOtXE_tF")]
+    private static extern F85_Ret SwiftCallbackFunc85(delegate* unmanaged[Swift]<F85_S0, F85_S1, uint, F85_S2, long, F85_S3, long, F85_S4, ushort, byte, int, uint, int, float, F85_S5, long, SwiftSelf, F85_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F85_Ret SwiftCallbackFunc85Callback(F85_S0 a0, F85_S1 a1, uint a2, F85_S2 a3, long a4, F85_S3 a5, long a6, F85_S4 a7, ushort a8, byte a9, int a10, uint a11, int a12, float a13, F85_S5 a14, long a15, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((double)4325646965362202, a0.F0);
+            Assert.Equal((double)3313084380250914, a0.F1);
+            Assert.Equal((sbyte)42, a0.F2);
+            Assert.Equal((int)2034100272, a0.F3);
+            Assert.Equal((long)1365643665271339575, a1.F0);
+            Assert.Equal((ushort)25442, a1.F1);
+            Assert.Equal((ulong)3699631470459352980, a1.F2);
+            Assert.Equal((nuint)unchecked((nuint)7611776251925132200), a1.F3);
+            Assert.Equal((uint)911446742, a2);
+            Assert.Equal((float)352423, a3.F0);
+            Assert.Equal((float)7150341, a3.F1);
+            Assert.Equal((uint)2090089360, a3.F2);
+            Assert.Equal((long)5731257538910387688, a4);
+            Assert.Equal((byte)171, a5.F0);
+            Assert.Equal((long)5742887585483060342, a6);
+            Assert.Equal((nuint)unchecked((nuint)1182236975680416316), a7.F0);
+            Assert.Equal((ushort)32137, a8);
+            Assert.Equal((byte)44, a9);
+            Assert.Equal((int)2143531010, a10);
+            Assert.Equal((uint)1271996557, a11);
+            Assert.Equal((int)1035188446, a12);
+            Assert.Equal((float)1925443, a13);
+            Assert.Equal((double)2591574394337603, a14.F0);
+            Assert.Equal((long)721102428782331317, a15);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F85_Ret(1768798158, 27348, 1836190158, 2058478254572549, unchecked((nint)7881716796049851507), 5099946246805224241, 1499623158991084417);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc85()
+    {
+        Console.Write("Running SwiftCallbackFunc85: ");
+        ExceptionDispatchInfo ex = null;
+        F85_Ret val = SwiftCallbackFunc85(&SwiftCallbackFunc85Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((uint)1768798158, val.F0);
+        Assert.Equal((ushort)27348, val.F1);
+        Assert.Equal((int)1836190158, val.F2);
+        Assert.Equal((double)2058478254572549, val.F3);
+        Assert.Equal((nint)unchecked((nint)7881716796049851507), val.F4);
+        Assert.Equal((ulong)5099946246805224241, val.F5);
+        Assert.Equal((long)1499623158991084417, val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 15)]
+    struct F86_S0
+    {
+        public nint F0;
+        public float F1;
+        public short F2;
+        public sbyte F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F86_S1
+    {
+        public double F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F86_S2
+    {
+        public nint F0;
+        public float F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F86_S3
+    {
+        public ushort F0;
+        public float F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 17)]
+    struct F86_Ret
+    {
+        public short F0;
+        public uint F1;
+        public double F2;
+        public byte F3;
+
+        public F86_Ret(short f0, uint f1, double f2, byte f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func861fAA7F86_RetVAESf_s5Int16VSiAGSfAA0G3_S0VAA0G3_S1VAA0G3_S2VSis6UInt32VS2uSfs5Int64VAA0G3_S3VSutXE_tF")]
+    private static extern F86_Ret SwiftCallbackFunc86(delegate* unmanaged[Swift]<float, short, nint, short, float, F86_S0, F86_S1, F86_S2, nint, uint, nuint, nuint, float, long, F86_S3, nuint, SwiftSelf, F86_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F86_Ret SwiftCallbackFunc86Callback(float a0, short a1, nint a2, short a3, float a4, F86_S0 a5, F86_S1 a6, F86_S2 a7, nint a8, uint a9, nuint a10, nuint a11, float a12, long a13, F86_S3 a14, nuint a15, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((float)2913632, a0);
+            Assert.Equal((short)3735, a1);
+            Assert.Equal((nint)unchecked((nint)2773655476379499086), a2);
+            Assert.Equal((short)22973, a3);
+            Assert.Equal((float)8292778, a4);
+            Assert.Equal((nint)unchecked((nint)5562042565258891920), a5.F0);
+            Assert.Equal((float)8370233, a5.F1);
+            Assert.Equal((short)18292, a5.F2);
+            Assert.Equal((sbyte)-32, a5.F3);
+            Assert.Equal((double)486951152980016, a6.F0);
+            Assert.Equal((nint)unchecked((nint)170033426151098456), a7.F0);
+            Assert.Equal((float)3867810, a7.F1);
+            Assert.Equal((nint)unchecked((nint)7390780928011218856), a8);
+            Assert.Equal((uint)1504267943, a9);
+            Assert.Equal((nuint)unchecked((nuint)2046987193814931100), a10);
+            Assert.Equal((nuint)unchecked((nuint)4860202472307588968), a11);
+            Assert.Equal((float)1644019, a12);
+            Assert.Equal((long)8084012412562897328, a13);
+            Assert.Equal((ushort)46301, a14.F0);
+            Assert.Equal((float)5633701, a14.F1);
+            Assert.Equal((nuint)unchecked((nuint)1911608136082175332), a15);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F86_Ret(23398, 842205070, 544883763911905, 215);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc86()
+    {
+        Console.Write("Running SwiftCallbackFunc86: ");
+        ExceptionDispatchInfo ex = null;
+        F86_Ret val = SwiftCallbackFunc86(&SwiftCallbackFunc86Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((short)23398, val.F0);
+        Assert.Equal((uint)842205070, val.F1);
+        Assert.Equal((double)544883763911905, val.F2);
+        Assert.Equal((byte)215, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F87_S0
+    {
+        public int F0;
+        public short F1;
+        public int F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F87_S1
+    {
+        public float F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func871fs6UInt64VAESf_SiAA6F87_S0VAA0H3_S1VtXE_tF")]
+    private static extern ulong SwiftCallbackFunc87(delegate* unmanaged[Swift]<float, nint, F87_S0, F87_S1, SwiftSelf, ulong> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static ulong SwiftCallbackFunc87Callback(float a0, nint a1, F87_S0 a2, F87_S1 a3, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((float)1413086, a0);
+            Assert.Equal((nint)unchecked((nint)4206825694012787823), a1);
+            Assert.Equal((int)70240457, a2.F0);
+            Assert.Equal((short)30503, a2.F1);
+            Assert.Equal((int)671751848, a2.F2);
+            Assert.Equal((float)6641304, a3.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 7817329728997505478;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc87()
+    {
+        Console.Write("Running SwiftCallbackFunc87: ");
+        ExceptionDispatchInfo ex = null;
+        ulong val = SwiftCallbackFunc87(&SwiftCallbackFunc87Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ulong)7817329728997505478, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F88_S0
+    {
+        public sbyte F0;
+        public short F1;
+        public byte F2;
+        public double F3;
+        public ushort F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct F88_S1
+    {
+        public double F0;
+        public byte F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F88_S2
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F88_S3
+    {
+        public sbyte F0;
+        public uint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F88_Ret
+    {
+        public int F0;
+        public uint F1;
+        public nint F2;
+        public ulong F3;
+
+        public F88_Ret(int f0, uint f1, nint f2, ulong f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func881fAA7F88_RetVAeA0G3_S0V_AA0G3_S1VSfSuSfSiAA0G3_S2Vs6UInt64VAA0G3_S3VAMtXE_tF")]
+    private static extern F88_Ret SwiftCallbackFunc88(delegate* unmanaged[Swift]<F88_S0, F88_S1, float, nuint, float, nint, F88_S2, ulong, F88_S3, ulong, SwiftSelf, F88_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F88_Ret SwiftCallbackFunc88Callback(F88_S0 a0, F88_S1 a1, float a2, nuint a3, float a4, nint a5, F88_S2 a6, ulong a7, F88_S3 a8, ulong a9, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)125, a0.F0);
+            Assert.Equal((short)-10705, a0.F1);
+            Assert.Equal((byte)21, a0.F2);
+            Assert.Equal((double)361845689097003, a0.F3);
+            Assert.Equal((ushort)41749, a0.F4);
+            Assert.Equal((double)1754583995806427, a1.F0);
+            Assert.Equal((byte)178, a1.F1);
+            Assert.Equal((float)4705205, a2);
+            Assert.Equal((nuint)unchecked((nuint)5985040566226273121), a3);
+            Assert.Equal((float)2484194, a4);
+            Assert.Equal((nint)unchecked((nint)1904196135427766362), a5);
+            Assert.Equal((nuint)unchecked((nuint)5436710892090266406), a6.F0);
+            Assert.Equal((ulong)4250368992471675181, a7);
+            Assert.Equal((sbyte)-87, a8.F0);
+            Assert.Equal((uint)362108395, a8.F1);
+            Assert.Equal((ulong)3388632419732870796, a9);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F88_Ret(46260161, 1256453227, unchecked((nint)1136413683894590872), 5467618237876965483);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc88()
+    {
+        Console.Write("Running SwiftCallbackFunc88: ");
+        ExceptionDispatchInfo ex = null;
+        F88_Ret val = SwiftCallbackFunc88(&SwiftCallbackFunc88Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((int)46260161, val.F0);
+        Assert.Equal((uint)1256453227, val.F1);
+        Assert.Equal((nint)unchecked((nint)1136413683894590872), val.F2);
+        Assert.Equal((ulong)5467618237876965483, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F89_S0
+    {
+        public double F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F89_Ret_S0
+    {
+        public double F0;
+
+        public F89_Ret_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F89_Ret
+    {
+        public int F0;
+        public F89_Ret_S0 F1;
+        public nuint F2;
+        public long F3;
+
+        public F89_Ret(int f0, F89_Ret_S0 f1, nuint f2, long f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func891fAA7F89_RetVAeA0G3_S0VXE_tF")]
+    private static extern F89_Ret SwiftCallbackFunc89(delegate* unmanaged[Swift]<F89_S0, SwiftSelf, F89_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F89_Ret SwiftCallbackFunc89Callback(F89_S0 a0, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((double)2137010348736191, a0.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F89_Ret(891143792, new F89_Ret_S0(3363709596088133), unchecked((nuint)18782615486598250), 1765451025668395967);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc89()
+    {
+        Console.Write("Running SwiftCallbackFunc89: ");
+        ExceptionDispatchInfo ex = null;
+        F89_Ret val = SwiftCallbackFunc89(&SwiftCallbackFunc89Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((int)891143792, val.F0);
+        Assert.Equal((double)3363709596088133, val.F1.F0);
+        Assert.Equal((nuint)unchecked((nuint)18782615486598250), val.F2);
+        Assert.Equal((long)1765451025668395967, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F90_S0_S0_S0
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F90_S0_S0
+    {
+        public F90_S0_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 34)]
+    struct F90_S0
+    {
+        public F90_S0_S0 F0;
+        public nuint F1;
+        public uint F2;
+        public long F3;
+        public short F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F90_S1
+    {
+        public ushort F0;
+        public short F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F90_S2
+    {
+        public nint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F90_S3
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F90_S4
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F90_Ret
+    {
+        public short F0;
+        public nint F1;
+
+        public F90_Ret(short f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func901fAA7F90_RetVAEs5Int64V_SfAA0G3_S0Vs6UInt32Vs6UInt16VAA0G3_S1VAA0G3_S2VAA0G3_S3VAA0G3_S4VtXE_tF")]
+    private static extern F90_Ret SwiftCallbackFunc90(delegate* unmanaged[Swift]<long, float, F90_S0, uint, ushort, F90_S1, F90_S2, F90_S3, F90_S4, SwiftSelf, F90_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F90_Ret SwiftCallbackFunc90Callback(long a0, float a1, F90_S0 a2, uint a3, ushort a4, F90_S1 a5, F90_S2 a6, F90_S3 a7, F90_S4 a8, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((long)920081051198141017, a0);
+            Assert.Equal((float)661904, a1);
+            Assert.Equal((nuint)unchecked((nuint)3898354148166517637), a2.F0.F0.F0);
+            Assert.Equal((nuint)unchecked((nuint)1003118682503285076), a2.F1);
+            Assert.Equal((uint)1418362079, a2.F2);
+            Assert.Equal((long)3276689793574299746, a2.F3);
+            Assert.Equal((short)-18559, a2.F4);
+            Assert.Equal((uint)1773011602, a3);
+            Assert.Equal((ushort)32638, a4);
+            Assert.Equal((ushort)47129, a5.F0);
+            Assert.Equal((short)-31849, a5.F1);
+            Assert.Equal((nint)unchecked((nint)4795020225668482328), a6.F0);
+            Assert.Equal((nuint)unchecked((nuint)5307513663902191175), a7.F0);
+            Assert.Equal((ulong)7057074401404034083, a8.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F90_Ret(25416, unchecked((nint)5015525780568020281));
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc90()
+    {
+        Console.Write("Running SwiftCallbackFunc90: ");
+        ExceptionDispatchInfo ex = null;
+        F90_Ret val = SwiftCallbackFunc90(&SwiftCallbackFunc90Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((short)25416, val.F0);
+        Assert.Equal((nint)unchecked((nint)5015525780568020281), val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F91_S0
+    {
+        public sbyte F0;
+        public nint F1;
+        public ushort F2;
+        public ushort F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 36)]
+    struct F91_S1
+    {
+        public double F0;
+        public ulong F1;
+        public sbyte F2;
+        public long F3;
+        public float F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F91_S2_S0_S0
+    {
+        public long F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F91_S2_S0
+    {
+        public F91_S2_S0_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F91_S2
+    {
+        public double F0;
+        public F91_S2_S0 F1;
+        public short F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F91_S3_S0
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F91_S3
+    {
+        public F91_S3_S0 F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct F91_Ret
+    {
+        public long F0;
+        public ulong F1;
+        public short F2;
+        public uint F3;
+
+        public F91_Ret(long f0, ulong f1, short f2, uint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func911fAA7F91_RetVAeA0G3_S0V_s5Int16Vs6UInt32VSdAA0G3_S1Vs5Int64Vs6UInt64VSfAA0G3_S2VSiAA0G3_S3VtXE_tF")]
+    private static extern F91_Ret SwiftCallbackFunc91(delegate* unmanaged[Swift]<F91_S0, short, uint, double, F91_S1, long, ulong, float, F91_S2, nint, F91_S3, SwiftSelf, F91_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F91_Ret SwiftCallbackFunc91Callback(F91_S0 a0, short a1, uint a2, double a3, F91_S1 a4, long a5, ulong a6, float a7, F91_S2 a8, nint a9, F91_S3 a10, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)-117, a0.F0);
+            Assert.Equal((nint)unchecked((nint)6851485542307521521), a0.F1);
+            Assert.Equal((ushort)23224, a0.F2);
+            Assert.Equal((ushort)28870, a0.F3);
+            Assert.Equal((short)-26318, a1);
+            Assert.Equal((uint)874052395, a2);
+            Assert.Equal((double)3651199868446152, a3);
+            Assert.Equal((double)3201729800438540, a4.F0);
+            Assert.Equal((ulong)7737032265509566019, a4.F1);
+            Assert.Equal((sbyte)123, a4.F2);
+            Assert.Equal((long)7508633930609553617, a4.F3);
+            Assert.Equal((float)8230501, a4.F4);
+            Assert.Equal((long)2726677037673277403, a5);
+            Assert.Equal((ulong)4990410590084533996, a6);
+            Assert.Equal((float)3864639, a7);
+            Assert.Equal((double)1763083442463892, a8.F0);
+            Assert.Equal((long)6783710957456602933, a8.F1.F0.F0);
+            Assert.Equal((short)2927, a8.F2);
+            Assert.Equal((nint)unchecked((nint)3359440517385934325), a9);
+            Assert.Equal((nuint)unchecked((nuint)3281136825102667421), a10.F0.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F91_Ret(8703949006228331232, 4839530995689756024, 14798, 1337111683);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc91()
+    {
+        Console.Write("Running SwiftCallbackFunc91: ");
+        ExceptionDispatchInfo ex = null;
+        F91_Ret val = SwiftCallbackFunc91(&SwiftCallbackFunc91Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((long)8703949006228331232, val.F0);
+        Assert.Equal((ulong)4839530995689756024, val.F1);
+        Assert.Equal((short)14798, val.F2);
+        Assert.Equal((uint)1337111683, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F92_S0
+    {
+        public double F0;
+        public double F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F92_S1
+    {
+        public uint F0;
+        public long F1;
+        public uint F2;
+        public short F3;
+        public ulong F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F92_S2_S0
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct F92_S2
+    {
+        public uint F0;
+        public long F1;
+        public F92_S2_S0 F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F92_Ret
+    {
+        public int F0;
+
+        public F92_Ret(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func921fAA7F92_RetVAEs6UInt32V_s5Int64VAA0G3_S0VSis5UInt8VAA0G3_S1VAA0G3_S2VAMSis5Int32VtXE_tF")]
+    private static extern F92_Ret SwiftCallbackFunc92(delegate* unmanaged[Swift]<uint, long, F92_S0, nint, byte, F92_S1, F92_S2, byte, nint, int, SwiftSelf, F92_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F92_Ret SwiftCallbackFunc92Callback(uint a0, long a1, F92_S0 a2, nint a3, byte a4, F92_S1 a5, F92_S2 a6, byte a7, nint a8, int a9, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((uint)479487770, a0);
+            Assert.Equal((long)3751818229732502126, a1);
+            Assert.Equal((double)3486664439392893, a2.F0);
+            Assert.Equal((double)1451061144702448, a2.F1);
+            Assert.Equal((nint)unchecked((nint)1103649059951788126), a3);
+            Assert.Equal((byte)17, a4);
+            Assert.Equal((uint)1542537473, a5.F0);
+            Assert.Equal((long)2256304993713022795, a5.F1);
+            Assert.Equal((uint)1773847876, a5.F2);
+            Assert.Equal((short)-4712, a5.F3);
+            Assert.Equal((ulong)2811859744132572185, a5.F4);
+            Assert.Equal((uint)290315682, a6.F0);
+            Assert.Equal((long)4847587202070249866, a6.F1);
+            Assert.Equal((ushort)20774, a6.F2.F0);
+            Assert.Equal((byte)8, a7);
+            Assert.Equal((nint)unchecked((nint)2206063999764082749), a8);
+            Assert.Equal((int)1481391120, a9);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F92_Ret(2031462105);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc92()
+    {
+        Console.Write("Running SwiftCallbackFunc92: ");
+        ExceptionDispatchInfo ex = null;
+        F92_Ret val = SwiftCallbackFunc92(&SwiftCallbackFunc92Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((int)2031462105, val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F93_S0
+    {
+        public sbyte F0;
+        public uint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F93_S1
+    {
+        public uint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F93_Ret
+    {
+        public nint F0;
+        public ulong F1;
+
+        public F93_Ret(nint f0, ulong f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func931fAA7F93_RetVAESu_s6UInt16VSdAA0G3_S0VAA0G3_S1VtXE_tF")]
+    private static extern F93_Ret SwiftCallbackFunc93(delegate* unmanaged[Swift]<nuint, ushort, double, F93_S0, F93_S1, SwiftSelf, F93_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F93_Ret SwiftCallbackFunc93Callback(nuint a0, ushort a1, double a2, F93_S0 a3, F93_S1 a4, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nuint)unchecked((nuint)5170226481546239050), a0);
+            Assert.Equal((ushort)2989, a1);
+            Assert.Equal((double)1630717078645270, a2);
+            Assert.Equal((sbyte)-46, a3.F0);
+            Assert.Equal((uint)859171256, a3.F1);
+            Assert.Equal((uint)254449240, a4.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F93_Ret(unchecked((nint)7713003294977630041), 4769707787914611024);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc93()
+    {
+        Console.Write("Running SwiftCallbackFunc93: ");
+        ExceptionDispatchInfo ex = null;
+        F93_Ret val = SwiftCallbackFunc93(&SwiftCallbackFunc93Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)7713003294977630041), val.F0);
+        Assert.Equal((ulong)4769707787914611024, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F94_S0
+    {
+        public nuint F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F94_S1
+    {
+        public int F0;
+        public nuint F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 14)]
+    struct F94_S2
+    {
+        public nint F0;
+        public uint F1;
+        public ushort F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct F94_S3
+    {
+        public byte F0;
+        public int F1;
+        public float F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F94_S4
+    {
+        public int F0;
+        public long F1;
+        public float F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 19)]
+    struct F94_S5
+    {
+        public short F0;
+        public nuint F1;
+        public short F2;
+        public sbyte F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F94_Ret
+    {
+        public long F0;
+
+        public F94_Ret(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func941fAA7F94_RetVAeA0G3_S0V_s5Int16VAA0G3_S1VAA0G3_S2VAA0G3_S3VSfAA0G3_S4Vs6UInt32VAA0G3_S5VAItXE_tF")]
+    private static extern F94_Ret SwiftCallbackFunc94(delegate* unmanaged[Swift]<F94_S0, short, F94_S1, F94_S2, F94_S3, float, F94_S4, uint, F94_S5, short, SwiftSelf, F94_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F94_Ret SwiftCallbackFunc94Callback(F94_S0 a0, short a1, F94_S1 a2, F94_S2 a3, F94_S3 a4, float a5, F94_S4 a6, uint a7, F94_S5 a8, short a9, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((nuint)unchecked((nuint)8626725032375870186), a0.F0);
+            Assert.Equal((short)-7755, a1);
+            Assert.Equal((int)544707027, a2.F0);
+            Assert.Equal((nuint)unchecked((nuint)2251410026467996594), a2.F1);
+            Assert.Equal((nint)unchecked((nint)2972912419231960385), a3.F0);
+            Assert.Equal((uint)740529487, a3.F1);
+            Assert.Equal((ushort)34526, a3.F2);
+            Assert.Equal((byte)41, a4.F0);
+            Assert.Equal((int)1598856955, a4.F1);
+            Assert.Equal((float)5126603, a4.F2);
+            Assert.Equal((float)7242977, a5);
+            Assert.Equal((int)473684762, a6.F0);
+            Assert.Equal((long)4023878650965716094, a6.F1);
+            Assert.Equal((float)2777693, a6.F2);
+            Assert.Equal((uint)1612378906, a7);
+            Assert.Equal((short)-17074, a8.F0);
+            Assert.Equal((nuint)unchecked((nuint)2666903737827472071), a8.F1);
+            Assert.Equal((short)418, a8.F2);
+            Assert.Equal((sbyte)106, a8.F3);
+            Assert.Equal((short)-14547, a9);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F94_Ret(4965341488842559693);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc94()
+    {
+        Console.Write("Running SwiftCallbackFunc94: ");
+        ExceptionDispatchInfo ex = null;
+        F94_Ret val = SwiftCallbackFunc94(&SwiftCallbackFunc94Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((long)4965341488842559693, val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F95_S0
+    {
+        public ushort F0;
+        public long F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F95_S1
+    {
+        public uint F0;
+        public short F1;
+        public double F2;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F95_S2
+    {
+        public ushort F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F95_Ret_S0
+    {
+        public short F0;
+
+        public F95_Ret_S0(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 14)]
+    struct F95_Ret
+    {
+        public nint F0;
+        public short F1;
+        public sbyte F2;
+        public byte F3;
+        public F95_Ret_S0 F4;
+
+        public F95_Ret(nint f0, short f1, sbyte f2, byte f3, F95_Ret_S0 f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func951fAA7F95_RetVAeA0G3_S0V_SuAA0G3_S1VAA0G3_S2VtXE_tF")]
+    private static extern F95_Ret SwiftCallbackFunc95(delegate* unmanaged[Swift]<F95_S0, nuint, F95_S1, F95_S2, SwiftSelf, F95_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F95_Ret SwiftCallbackFunc95Callback(F95_S0 a0, nuint a1, F95_S1 a2, F95_S2 a3, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((ushort)45388, a0.F0);
+            Assert.Equal((long)6620047889014935849, a0.F1);
+            Assert.Equal((nuint)unchecked((nuint)97365157264460373), a1);
+            Assert.Equal((uint)357234637, a2.F0);
+            Assert.Equal((short)-13720, a2.F1);
+            Assert.Equal((double)3313430568949662, a2.F2);
+            Assert.Equal((ushort)14248, a3.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F95_Ret(unchecked((nint)6503817931835164175), 1481, 117, 79, new F95_Ret_S0(-2735));
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc95()
+    {
+        Console.Write("Running SwiftCallbackFunc95: ");
+        ExceptionDispatchInfo ex = null;
+        F95_Ret val = SwiftCallbackFunc95(&SwiftCallbackFunc95Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)6503817931835164175), val.F0);
+        Assert.Equal((short)1481, val.F1);
+        Assert.Equal((sbyte)117, val.F2);
+        Assert.Equal((byte)79, val.F3);
+        Assert.Equal((short)-2735, val.F4.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct F96_S0
+    {
+        public long F0;
+        public uint F1;
+        public short F2;
+        public double F3;
+        public double F4;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F96_S1
+    {
+        public ulong F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F96_S2
+    {
+        public float F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func961fs6UInt64VAEs6UInt32V_AA6F96_S0VSfAe2gA0I3_S1VAA0I3_S2Vs5Int64VtXE_tF")]
+    private static extern ulong SwiftCallbackFunc96(delegate* unmanaged[Swift]<uint, F96_S0, float, ulong, uint, uint, F96_S1, F96_S2, long, SwiftSelf, ulong> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static ulong SwiftCallbackFunc96Callback(uint a0, F96_S0 a1, float a2, ulong a3, uint a4, uint a5, F96_S1 a6, F96_S2 a7, long a8, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((uint)1103144790, a0);
+            Assert.Equal((long)496343164737276588, a1.F0);
+            Assert.Equal((uint)1541085564, a1.F1);
+            Assert.Equal((short)-16271, a1.F2);
+            Assert.Equal((double)1062575289573718, a1.F3);
+            Assert.Equal((double)570255786498865, a1.F4);
+            Assert.Equal((float)7616839, a2);
+            Assert.Equal((ulong)7370881799887414383, a3);
+            Assert.Equal((uint)390392554, a4);
+            Assert.Equal((uint)1492692139, a5);
+            Assert.Equal((ulong)1666031716012978365, a6.F0);
+            Assert.Equal((float)3427394, a7.F0);
+            Assert.Equal((long)4642371619161527189, a8);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 8803914823303717324;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc96()
+    {
+        Console.Write("Running SwiftCallbackFunc96: ");
+        ExceptionDispatchInfo ex = null;
+        ulong val = SwiftCallbackFunc96(&SwiftCallbackFunc96Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ulong)8803914823303717324, val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F97_S0
+    {
+        public sbyte F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F97_S1
+    {
+        public long F0;
+        public ulong F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct F97_S2
+    {
+        public byte F0;
+        public long F1;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct F97_S3
+    {
+        public double F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F97_Ret_S0
+    {
+        public int F0;
+
+        public F97_Ret_S0(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct F97_Ret
+    {
+        public double F0;
+        public nuint F1;
+        public F97_Ret_S0 F2;
+        public ushort F3;
+        public uint F4;
+
+        public F97_Ret(double f0, nuint f1, F97_Ret_S0 f2, ushort f3, uint f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func971fAA7F97_RetVAeA0G3_S0V_AA0G3_S1VAA0G3_S2VAA0G3_S3VtXE_tF")]
+    private static extern F97_Ret SwiftCallbackFunc97(delegate* unmanaged[Swift]<F97_S0, F97_S1, F97_S2, F97_S3, SwiftSelf, F97_Ret> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static F97_Ret SwiftCallbackFunc97Callback(F97_S0 a0, F97_S1 a1, F97_S2 a2, F97_S3 a3, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((sbyte)-87, a0.F0);
+            Assert.Equal((long)1414208343412494909, a1.F0);
+            Assert.Equal((ulong)453284654311256466, a1.F1);
+            Assert.Equal((byte)224, a2.F0);
+            Assert.Equal((long)1712859616922087053, a2.F1);
+            Assert.Equal((double)3987671154739178, a3.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return new F97_Ret(3262802544778620, unchecked((nuint)988644880611380240), new F97_Ret_S0(1818371708), 15694, 2068394006);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc97()
+    {
+        Console.Write("Running SwiftCallbackFunc97: ");
+        ExceptionDispatchInfo ex = null;
+        F97_Ret val = SwiftCallbackFunc97(&SwiftCallbackFunc97Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((double)3262802544778620, val.F0);
+        Assert.Equal((nuint)unchecked((nuint)988644880611380240), val.F1);
+        Assert.Equal((int)1818371708, val.F2.F0);
+        Assert.Equal((ushort)15694, val.F3);
+        Assert.Equal((uint)2068394006, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct F98_S0
+    {
+        public int F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func981fS2iSf_s6UInt16VAA6F98_S0VAEtXE_tF")]
+    private static extern nint SwiftCallbackFunc98(delegate* unmanaged[Swift]<float, ushort, F98_S0, ushort, SwiftSelf, nint> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static nint SwiftCallbackFunc98Callback(float a0, ushort a1, F98_S0 a2, ushort a3, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((float)2863898, a0);
+            Assert.Equal((ushort)37573, a1);
+            Assert.Equal((int)1073068257, a2.F0);
+            Assert.Equal((ushort)53560, a3);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return unchecked((nint)6686142382639170849);
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc98()
+    {
+        Console.Write("Running SwiftCallbackFunc98: ");
+        ExceptionDispatchInfo ex = null;
+        nint val = SwiftCallbackFunc98(&SwiftCallbackFunc98Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((nint)unchecked((nint)6686142382639170849), val);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct F99_S0
+    {
+        public nint F0;
+        public uint F1;
+        public int F2;
+        public uint F3;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct F99_S1
+    {
+        public short F0;
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct F99_S2
+    {
+        public byte F0;
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s22SwiftCallbackAbiStress05swiftB6Func991fs6UInt64VAEs5Int64V_SuSfs6UInt16VAA6F99_S0Vs5UInt8VSfAMs4Int8VAA0J3_S1VAA0J3_S2VtXE_tF")]
+    private static extern ulong SwiftCallbackFunc99(delegate* unmanaged[Swift]<long, nuint, float, ushort, F99_S0, byte, float, byte, sbyte, F99_S1, F99_S2, SwiftSelf, ulong> func, void* funcContext);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static ulong SwiftCallbackFunc99Callback(long a0, nuint a1, float a2, ushort a3, F99_S0 a4, byte a5, float a6, byte a7, sbyte a8, F99_S1 a9, F99_S2 a10, SwiftSelf self)
+    {
+        try
+        {
+            Assert.Equal((long)1152281003884062246, a0);
+            Assert.Equal((nuint)unchecked((nuint)2482384127373829622), a1);
+            Assert.Equal((float)3361150, a2);
+            Assert.Equal((ushort)2121, a3);
+            Assert.Equal((nint)unchecked((nint)4484545590050696958), a4.F0);
+            Assert.Equal((uint)422528630, a4.F1);
+            Assert.Equal((int)1418346646, a4.F2);
+            Assert.Equal((uint)1281567856, a4.F3);
+            Assert.Equal((byte)223, a5);
+            Assert.Equal((float)1917656, a6);
+            Assert.Equal((byte)103, a7);
+            Assert.Equal((sbyte)-46, a8);
+            Assert.Equal((short)14554, a9.F0);
+            Assert.Equal((byte)68, a10.F0);
+        }
+        catch (Exception ex)
+        {
+            *(ExceptionDispatchInfo*)self.Value = ExceptionDispatchInfo.Capture(ex);
+        }
+
+        return 8220698022338840251;
+    }
+
+    [Fact]
+    public static void TestSwiftCallbackFunc99()
+    {
+        Console.Write("Running SwiftCallbackFunc99: ");
+        ExceptionDispatchInfo ex = null;
+        ulong val = SwiftCallbackFunc99(&SwiftCallbackFunc99Callback, &ex);
+        if (ex != null)
+            ex.Throw();
+
+        Assert.Equal((ulong)8220698022338840251, val);
+        Console.WriteLine("OK");
+    }
+
+}
diff --git a/src/tests/Interop/Swift/SwiftCallbackAbiStress/SwiftCallbackAbiStress.csproj b/src/tests/Interop/Swift/SwiftCallbackAbiStress/SwiftCallbackAbiStress.csproj
new file mode 100644
index 000000000000..a57cd84cf884
--- /dev/null
+++ b/src/tests/Interop/Swift/SwiftCallbackAbiStress/SwiftCallbackAbiStress.csproj
@@ -0,0 +1,16 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CLRTestTargetUnsupported, CMakeProjectReference -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/93631 -->
+    <CLRTestTargetUnsupported Condition="'$(TargetsOSX)' != 'true' and '$(TargetsAppleMobile)' != 'true'">true</CLRTestTargetUnsupported>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="$(TestLibraryProjectPath)" />
+    <CMakeProjectReference Include="CMakeLists.txt" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/Interop/Swift/SwiftCallbackAbiStress/SwiftCallbackAbiStress.swift b/src/tests/Interop/Swift/SwiftCallbackAbiStress/SwiftCallbackAbiStress.swift
new file mode 100644
index 000000000000..3089ae2e5e09
--- /dev/null
+++ b/src/tests/Interop/Swift/SwiftCallbackAbiStress/SwiftCallbackAbiStress.swift
@@ -0,0 +1,3904 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+import Foundation
+
+@frozen
+public struct F0_S0
+{
+    public let f0 : Double;
+    public let f1 : UInt32;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F0_S1
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F0_S2
+{
+    public let f0 : Float;
+}
+
+public func swiftCallbackFunc0(f: (Int16, Int32, UInt64, UInt16, F0_S0, F0_S1, UInt8, F0_S2) -> Int32) -> Int32 {
+    return f(-17813, 318006528, 1195162122024233590, 60467, F0_S0(f0: 2239972725713766, f1: 1404066621, f2: 29895), F0_S1(f0: 7923486769850554262), 217, F0_S2(f0: 2497655))
+}
+
+@frozen
+public struct F1_S0
+{
+    public let f0 : UInt16;
+    public let f1 : UInt8;
+}
+
+@frozen
+public struct F1_S1
+{
+    public let f0 : UInt8;
+    public let f1 : UInt64;
+    public let f2 : Int16;
+    public let f3 : Float;
+    public let f4 : Float;
+}
+
+@frozen
+public struct F1_S2_S0
+{
+    public let f0 : UInt32;
+    public let f1 : Double;
+}
+
+@frozen
+public struct F1_S2
+{
+    public let f0 : Int8;
+    public let f1 : UInt;
+    public let f2 : F1_S2_S0;
+    public let f3 : Int;
+}
+
+@frozen
+public struct F1_S3
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F1_S4
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F1_S5_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F1_S5
+{
+    public let f0 : F1_S5_S0;
+}
+
+public func swiftCallbackFunc1(f: (Int64, Double, Int8, F1_S0, F1_S1, F1_S2, UInt8, Int8, Int64, F1_S3, UInt, F1_S4, F1_S5, Int) -> UInt8) -> UInt8 {
+    return f(7920511243396412395, 1396130721334528, -55, F1_S0(f0: 33758, f1: 103), F1_S1(f0: 201, f1: 7390774039746135757, f2: 14699, f3: 7235330, f4: 7189013), F1_S2(f0: 37, f1: 3310322731568932038, f2: F1_S2_S0(f0: 1100328218, f1: 1060779460203640), f3: 8325292022909418877), 137, 82, 1197537325837505041, F1_S3(f0: 46950), 8181828233622947597, F1_S4(f0: 1851182205030289056), F1_S5(f0: F1_S5_S0(f0: 1971014225)), 6437995407675718392)
+}
+
+@frozen
+public struct F2_S0
+{
+    public let f0 : Int32;
+    public let f1 : UInt;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F2_S1_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F2_S1
+{
+    public let f0 : Int64;
+    public let f1 : UInt16;
+    public let f2 : F2_S1_S0;
+    public let f3 : Int;
+    public let f4 : Double;
+}
+
+@frozen
+public struct F2_S2
+{
+    public let f0 : Float;
+    public let f1 : Int32;
+    public let f2 : UInt16;
+    public let f3 : Int8;
+}
+
+@frozen
+public struct F2_S3_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F2_S3
+{
+    public let f0 : F2_S3_S0;
+}
+
+public func swiftCallbackFunc2(f: (F2_S0, F2_S1, F2_S2, Float, UInt64, F2_S3) -> Int8) -> Int8 {
+    return f(F2_S0(f0: 1860840185, f1: 5407074783834178811, f2: 6261766), F2_S1(f0: 4033972792915237065, f1: 22825, f2: F2_S1_S0(f0: 44574), f3: 4536911485304731630, f4: 4282944015147385), F2_S2(f0: 2579193, f1: 586252933, f2: 47002, f3: 71), 3225929, 3599444831393612282, F2_S3(f0: F2_S3_S0(f0: 13)))
+}
+
+@frozen
+public struct F3_S0_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F3_S0
+{
+    public let f0 : F3_S0_S0;
+}
+
+@frozen
+public struct F3_S1
+{
+    public let f0 : UInt32;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct F3_S2_S0
+{
+    public let f0 : Int16;
+    public let f1 : UInt8;
+}
+
+@frozen
+public struct F3_S2
+{
+    public let f0 : F3_S2_S0;
+    public let f1 : Int8;
+    public let f2 : UInt8;
+}
+
+@frozen
+public struct F3_S3
+{
+    public let f0 : UInt64;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct F3_S4
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F3_Ret
+{
+    public let f0 : UInt16;
+    public let f1 : UInt8;
+    public let f2 : UInt16;
+    public let f3 : Float;
+}
+
+public func swiftCallbackFunc3(f: (F3_S0, Float, UInt16, F3_S1, UInt16, Int32, F3_S2, Int, F3_S3, F3_S4) -> F3_Ret) -> F3_Ret {
+    return f(F3_S0(f0: F3_S0_S0(f0: 5610153900386943274)), 7736836, 31355, F3_S1(f0: 1159208572, f1: 2707818827451590538), 37580, 1453603418, F3_S2(f0: F3_S2_S0(f0: 699, f1: 46), f1: -125, f2: 92), 94557706586779834, F3_S3(f0: 2368015527878194540, f1: 5026404532195049271), F3_S4(f0: 21807))
+}
+
+@frozen
+public struct F4_S0_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F4_S0
+{
+    public let f0 : F4_S0_S0;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F4_Ret_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F4_Ret
+{
+    public let f0 : Int32;
+    public let f1 : F4_Ret_S0;
+    public let f2 : Int;
+    public let f3 : Int16;
+    public let f4 : Int;
+    public let f5 : UInt32;
+}
+
+public func swiftCallbackFunc4(f: (Double, F4_S0, UInt8, Int32, UInt32) -> F4_Ret) -> F4_Ret {
+    return f(4282972206489588, F4_S0(f0: F4_S0_S0(f0: 611688063), f1: 877466), 53, 965123506, 1301067653)
+}
+
+@frozen
+public struct F5_S0
+{
+    public let f0 : UInt;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F5_S1_S0
+{
+    public let f0 : Int;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F5_S1_S1
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F5_S1
+{
+    public let f0 : F5_S1_S0;
+    public let f1 : F5_S1_S1;
+}
+
+@frozen
+public struct F5_S2
+{
+    public let f0 : Double;
+    public let f1 : Int8;
+    public let f2 : Int;
+}
+
+@frozen
+public struct F5_S3
+{
+    public let f0 : Int64;
+    public let f1 : Double;
+}
+
+@frozen
+public struct F5_S4
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F5_Ret
+{
+    public let f0 : Int16;
+    public let f1 : Int32;
+    public let f2 : Int32;
+    public let f3 : UInt64;
+    public let f4 : Int16;
+}
+
+public func swiftCallbackFunc5(f: (UInt8, Int16, UInt64, UInt, UInt, UInt64, UInt8, F5_S0, Int8, Int8, F5_S1, F5_S2, F5_S3, Double, F5_S4, UInt16, Float, Float, UInt16) -> F5_Ret) -> F5_Ret {
+    return f(42, 18727, 3436765034579128495, 6305137336506323506, 6280137078630028944, 6252650621827449809, 129, F5_S0(f0: 6879980973426111678, f1: 1952654577), -34, 102, F5_S1(f0: F5_S1_S0(f0: 8389143657021522019, f1: 437030241), f1: F5_S1_S1(f0: 7522798)), F5_S2(f0: 523364011167530, f1: 16, f2: 3823439046574037759), F5_S3(f0: 3767260839267771462, f1: 1181031208183008), 2338830539621828, F5_S4(f0: 36276), 41286, 6683955, 6399917, 767)
+}
+
+@frozen
+public struct F6_S0_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F6_S0
+{
+    public let f0 : Int8;
+    public let f1 : Int8;
+    public let f2 : Int32;
+    public let f3 : F6_S0_S0;
+}
+
+@frozen
+public struct F6_S1
+{
+    public let f0 : Int32;
+    public let f1 : UInt64;
+    public let f2 : UInt64;
+    public let f3 : UInt32;
+}
+
+@frozen
+public struct F6_S2
+{
+    public let f0 : Int64;
+    public let f1 : Int16;
+    public let f2 : Int8;
+}
+
+@frozen
+public struct F6_S3
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F6_Ret_S0
+{
+    public let f0 : Int64;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F6_Ret
+{
+    public let f0 : F6_Ret_S0;
+    public let f1 : UInt64;
+    public let f2 : Float;
+    public let f3 : Int8;
+}
+
+public func swiftCallbackFunc6(f: (Float, F6_S0, Int64, Int8, UInt16, UInt, UInt16, UInt64, F6_S1, Int16, F6_S2, F6_S3, UInt16) -> F6_Ret) -> F6_Ret {
+    return f(2905241, F6_S0(f0: -27, f1: -77, f2: 1315779092, f3: F6_S0_S0(f0: 5373970)), 7022244764256789748, -110, 2074, 3560129042279209151, 2200, 5730241035812482149, F6_S1(f0: 18625011, f1: 242340713355417257, f2: 6962175160124965670, f3: 1983617839), -28374, F6_S2(f0: 6355748563312062178, f1: -23189, f2: 81), F6_S3(f0: 4547677), 6397)
+}
+
+@frozen
+public struct F7_S0
+{
+    public let f0 : Float;
+    public let f1 : Int64;
+    public let f2 : UInt;
+}
+
+@frozen
+public struct F7_S1
+{
+    public let f0 : Int16;
+    public let f1 : UInt32;
+    public let f2 : UInt32;
+}
+
+public func swiftCallbackFunc7(f: (Int64, UInt8, Double, UInt16, F7_S0, UInt8, Double, UInt32, F7_S1, Int32, Int32, Int, Int16, UInt16, Int, UInt64, UInt8, Int16) -> UInt16) -> UInt16 {
+    return f(7625368278886567558, 70, 2146971972122530, 54991, F7_S0(f0: 1072132, f1: 3890459003549150599, f2: 56791000421908673), 227, 3248250571953113, 1138780108, F7_S1(f0: -22670, f1: 1796712687, f2: 304251857), 1288765591, 1382721790, 6746417265635727373, -15600, 47575, 7200793040165597188, 2304985873826892392, 99, -9993)
+}
+
+@frozen
+public struct F8_S0
+{
+    public let f0 : Int16;
+    public let f1 : Int16;
+    public let f2 : UInt;
+}
+
+@frozen
+public struct F8_S1
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F8_Ret_S0
+{
+    public let f0 : Int32;
+    public let f1 : UInt;
+    public let f2 : Int;
+}
+
+@frozen
+public struct F8_Ret
+{
+    public let f0 : Int64;
+    public let f1 : F8_Ret_S0;
+    public let f2 : Int;
+    public let f3 : UInt32;
+}
+
+public func swiftCallbackFunc8(f: (F8_S0, F8_S1) -> F8_Ret) -> F8_Ret {
+    return f(F8_S0(f0: 16278, f1: -31563, f2: 2171308312325435543), F8_S1(f0: 8923668560896309835))
+}
+
+@frozen
+public struct F9_S0_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F9_S0
+{
+    public let f0 : F9_S0_S0;
+    public let f1 : Int16;
+}
+
+@frozen
+public struct F9_S1_S0
+{
+    public let f0 : Int64;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct F9_S1
+{
+    public let f0 : Int;
+    public let f1 : F9_S1_S0;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F9_S2
+{
+    public let f0 : UInt64;
+    public let f1 : Double;
+    public let f2 : Int16;
+    public let f3 : Int8;
+}
+
+@frozen
+public struct F9_S3_S0_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F9_S3_S0
+{
+    public let f0 : F9_S3_S0_S0;
+}
+
+@frozen
+public struct F9_S3
+{
+    public let f0 : Int8;
+    public let f1 : F9_S3_S0;
+}
+
+@frozen
+public struct F9_S4_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F9_S4
+{
+    public let f0 : F9_S4_S0;
+    public let f1 : Int8;
+}
+
+@frozen
+public struct F9_S5_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F9_S5
+{
+    public let f0 : UInt32;
+    public let f1 : F9_S5_S0;
+}
+
+@frozen
+public struct F9_S6
+{
+    public let f0 : Double;
+}
+
+public func swiftCallbackFunc9(f: (Int8, UInt8, Int64, F9_S0, F9_S1, F9_S2, Double, F9_S3, F9_S4, Double, F9_S5, F9_S6) -> UInt16) -> UInt16 {
+    return f(17, 104, 8922699691031703191, F9_S0(f0: F9_S0_S0(f0: 123), f1: 31706), F9_S1(f0: 1804058604961822948, f1: F9_S1_S0(f0: 8772179036715198777, f1: 3320511540592563328), f2: 679540), F9_S2(f0: 8642590829466497926, f1: 4116322155252965, f2: 17992, f3: -48), 414017537937894, F9_S3(f0: 47, f1: F9_S3_S0(f0: F9_S3_S0_S0(f0: 7576380984563129085))), F9_S4(f0: F9_S4_S0(f0: 1356827400304742803), f1: -17), 4458031413035521, F9_S5(f0: 352075098, f1: F9_S5_S0(f0: 1840980094)), F9_S6(f0: 396957263013930))
+}
+
+@frozen
+public struct F10_Ret
+{
+    public let f0 : Int64;
+    public let f1 : UInt32;
+    public let f2 : UInt16;
+    public let f3 : UInt32;
+}
+
+public func swiftCallbackFunc10(f: (Int16) -> F10_Ret) -> F10_Ret {
+    return f(-7168)
+}
+
+@frozen
+public struct F11_S0_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F11_S0
+{
+    public let f0 : UInt32;
+    public let f1 : F11_S0_S0;
+    public let f2 : UInt;
+    public let f3 : Int32;
+    public let f4 : Int64;
+}
+
+@frozen
+public struct F11_S1_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F11_S1
+{
+    public let f0 : F11_S1_S0;
+    public let f1 : Int16;
+    public let f2 : UInt32;
+    public let f3 : Int16;
+}
+
+@frozen
+public struct F11_S2
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F11_Ret
+{
+    public let f0 : Int16;
+    public let f1 : Int16;
+    public let f2 : UInt8;
+    public let f3 : Int64;
+}
+
+public func swiftCallbackFunc11(f: (UInt32, UInt, UInt64, Int16, F11_S0, Float, Int8, UInt16, F11_S1, UInt32, Int64, UInt32, F11_S2) -> F11_Ret) -> F11_Ret {
+    return f(454751144, 1696592254558667577, 5831587230944972245, 15352, F11_S0(f0: 1306601347, f1: F11_S0_S0(f0: 123), f2: 3064471520018434938, f3: 272956246, f4: 3683518307106722029), 5606122, -126, 50801, F11_S1(f0: F11_S1_S0(f0: 63467), f1: -31828, f2: 2117176776, f3: -27265), 1879606687, 4981244336430926707, 1159924856, F11_S2(f0: 29))
+}
+
+@frozen
+public struct F12_S0
+{
+    public let f0 : UInt64;
+    public let f1 : Int8;
+}
+
+@frozen
+public struct F12_S1_S0_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F12_S1_S0
+{
+    public let f0 : F12_S1_S0_S0;
+}
+
+@frozen
+public struct F12_S1
+{
+    public let f0 : UInt16;
+    public let f1 : UInt32;
+    public let f2 : F12_S1_S0;
+}
+
+@frozen
+public struct F12_Ret
+{
+    public let f0 : UInt64;
+    public let f1 : Int;
+}
+
+public func swiftCallbackFunc12(f: (F12_S0, Int16, UInt64, F12_S1, Int8) -> F12_Ret) -> F12_Ret {
+    return f(F12_S0(f0: 3236871137735400659, f1: -123), -22828, 2132557792366642035, F12_S1(f0: 42520, f1: 879349060, f2: F12_S1_S0(f0: F12_S1_S0_S0(f0: 5694370973277919380))), -75)
+}
+
+@frozen
+public struct F13_S0_S0
+{
+    public let f0 : Int64;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct F13_S0
+{
+    public let f0 : F13_S0_S0;
+    public let f1 : Float;
+    public let f2 : Int16;
+}
+
+@frozen
+public struct F13_S1
+{
+    public let f0 : Int;
+    public let f1 : UInt64;
+}
+
+@frozen
+public struct F13_S2_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F13_S2
+{
+    public let f0 : F13_S2_S0;
+    public let f1 : Double;
+}
+
+@frozen
+public struct F13_S3
+{
+    public let f0 : Float;
+    public let f1 : Int8;
+}
+
+@frozen
+public struct F13_S4
+{
+    public let f0 : Int;
+}
+
+public func swiftCallbackFunc13(f: (F13_S0, Int32, Int, UInt16, UInt, F13_S1, F13_S2, Int, Double, Int8, Float, Int, F13_S3, UInt, F13_S4) -> Double) -> Double {
+    return f(F13_S0(f0: F13_S0_S0(f0: 9003727031576598067, f1: 8527798284445940986), f1: 3585628, f2: -12520), 1510815104, 5883331525294982326, 60738, 5291799143932627546, F13_S1(f0: 1949276559361384602, f1: 876048527237138968), F13_S2(f0: F13_S2_S0(f0: 67), f1: 2455575228564859), 2321408806345977320, 12750323283778, 46, 6774339, 5121910967292140178, F13_S3(f0: 8254279, f1: -7), 7533347207018595125, F13_S4(f0: 6605448167191082938))
+}
+
+@frozen
+public struct F14_S0
+{
+    public let f0 : Int8;
+    public let f1 : Float;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F14_S1
+{
+    public let f0 : UInt64;
+    public let f1 : UInt64;
+}
+
+public func swiftCallbackFunc14(f: (Int64, F14_S0, Int8, UInt64, F14_S1, Int) -> Int64) -> Int64 {
+    return f(5547219684656041875, F14_S0(f0: -39, f1: 5768837, f2: 53063), -102, 5745438709817040873, F14_S1(f0: 2178706453119907411, f1: 4424726479787355131), 5693881223150438553)
+}
+
+@frozen
+public struct F15_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F15_S1
+{
+    public let f0 : Int;
+    public let f1 : UInt32;
+    public let f2 : UInt8;
+    public let f3 : Int16;
+}
+
+@frozen
+public struct F15_S2
+{
+    public let f0 : Int8;
+    public let f1 : UInt64;
+    public let f2 : Int64;
+    public let f3 : UInt8;
+}
+
+@frozen
+public struct F15_S3
+{
+    public let f0 : Double;
+}
+
+public func swiftCallbackFunc15(f: (UInt8, UInt16, UInt64, UInt64, Int8, UInt, Double, Float, Int, F15_S0, F15_S1, UInt16, F15_S2, UInt8, F15_S3) -> Int) -> Int {
+    return f(0, 31081, 8814881608835743979, 4283853687332682681, 80, 7895994601265649979, 1855521542692398, 3235683, 215122646177738904, F15_S0(f0: 2044750195), F15_S1(f0: 1772412898183620625, f1: 131256973, f2: 153, f3: 25281), 50965, F15_S2(f0: -83, f1: 7751486385861474282, f2: 3744400479301818340, f3: 150), 179, F15_S3(f0: 3108143600787174))
+}
+
+@frozen
+public struct F16_S0
+{
+    public let f0 : Int8;
+    public let f1 : Int32;
+    public let f2 : UInt16;
+    public let f3 : UInt16;
+    public let f4 : UInt32;
+}
+
+@frozen
+public struct F16_S1
+{
+    public let f0 : UInt16;
+    public let f1 : Int8;
+    public let f2 : UInt8;
+    public let f3 : Int;
+    public let f4 : Int;
+}
+
+@frozen
+public struct F16_S2_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F16_S2
+{
+    public let f0 : Int32;
+    public let f1 : Int32;
+    public let f2 : UInt32;
+    public let f3 : UInt8;
+    public let f4 : F16_S2_S0;
+}
+
+@frozen
+public struct F16_S3
+{
+    public let f0 : Int16;
+    public let f1 : Double;
+    public let f2 : Double;
+    public let f3 : Int32;
+}
+
+public func swiftCallbackFunc16(f: (F16_S0, Int16, Float, F16_S1, F16_S2, UInt64, F16_S3, UInt) -> Int8) -> Int8 {
+    return f(F16_S0(f0: -59, f1: 1181591186, f2: 44834, f3: 28664, f4: 404461767), 2482, 2997348, F16_S1(f0: 22423, f1: -106, f2: 182, f3: 3784074551275084420, f4: 7092934571108982079), F16_S2(f0: 1835134709, f1: 246067261, f2: 1986526591, f3: 24, f4: F16_S2_S0(f0: -112)), 1465053746911704089, F16_S3(f0: -27636, f1: 1896887612303356, f2: 4263157082840190, f3: 774653659), 3755775782607884861)
+}
+
+@frozen
+public struct F17_S0
+{
+    public let f0 : Int32;
+    public let f1 : UInt;
+}
+
+@frozen
+public struct F17_S1_S0
+{
+    public let f0 : Double;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F17_S1
+{
+    public let f0 : F17_S1_S0;
+    public let f1 : Int32;
+    public let f2 : UInt8;
+}
+
+@frozen
+public struct F17_S2
+{
+    public let f0 : UInt32;
+}
+
+public func swiftCallbackFunc17(f: (UInt32, F17_S0, F17_S1, Double, UInt64, F17_S2) -> Double) -> Double {
+    return f(201081002, F17_S0(f0: 2018751226, f1: 8488544433072104028), F17_S1(f0: F17_S1_S0(f0: 1190765430157980, f1: 70252071), f1: 1297775609, f2: 160), 4290084351352688, 4738339757002694731, F17_S2(f0: 1829312773))
+}
+
+@frozen
+public struct F18_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F18_S1
+{
+    public let f0 : UInt16;
+    public let f1 : Int16;
+    public let f2 : Double;
+    public let f3 : UInt;
+}
+
+@frozen
+public struct F18_S2
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F18_Ret_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F18_Ret
+{
+    public let f0 : F18_Ret_S0;
+}
+
+public func swiftCallbackFunc18(f: (F18_S0, F18_S1, F18_S2, UInt, UInt32, Int64, Int16, Double) -> F18_Ret) -> F18_Ret {
+    return f(F18_S0(f0: 106), F18_S1(f0: 21619, f1: -4350, f2: 3457288266203248, f3: 9020447812661292883), F18_S2(f0: 2317132584983719004), 7379425918918939512, 2055208746, 1042861174364145790, 28457, 1799004152435515)
+}
+
+@frozen
+public struct F19_S0
+{
+    public let f0 : Int16;
+    public let f1 : Int8;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F19_S1
+{
+    public let f0 : Int64;
+    public let f1 : UInt16;
+}
+
+@frozen
+public struct F19_S2
+{
+    public let f0 : UInt64;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct F19_S3
+{
+    public let f0 : UInt32;
+    public let f1 : Int32;
+}
+
+@frozen
+public struct F19_Ret_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F19_Ret
+{
+    public let f0 : UInt32;
+    public let f1 : Int64;
+    public let f2 : UInt16;
+    public let f3 : F19_Ret_S0;
+    public let f4 : Double;
+    public let f5 : Double;
+    public let f6 : Double;
+}
+
+public func swiftCallbackFunc19(f: (Int64, UInt8, F19_S0, Int, F19_S1, Int32, Int32, UInt, UInt64, F19_S2, UInt16, F19_S3, Int8, Int64) -> F19_Ret) -> F19_Ret {
+    return f(7456120134117592143, 114, F19_S0(f0: -7583, f1: 97, f2: 2768322), 3605245176125291560, F19_S1(f0: 4445885313084714470, f1: 15810), 1179699879, 109603412, 6521628547431964799, 7687430644226018854, F19_S2(f0: 8464855230956039883, f1: 861462819289140037), 26519, F19_S3(f0: 1864602741, f1: 397176384), 81, 4909173176891211442)
+}
+
+@frozen
+public struct F20_S0_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F20_S0
+{
+    public let f0 : Int16;
+    public let f1 : UInt;
+    public let f2 : F20_S0_S0;
+}
+
+@frozen
+public struct F20_S1_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F20_S1
+{
+    public let f0 : Int64;
+    public let f1 : UInt;
+    public let f2 : F20_S1_S0;
+    public let f3 : Int64;
+    public let f4 : Int32;
+}
+
+@frozen
+public struct F20_S2
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F20_Ret
+{
+    public let f0 : UInt16;
+    public let f1 : UInt16;
+    public let f2 : Double;
+    public let f3 : Int16;
+    public let f4 : Double;
+}
+
+public func swiftCallbackFunc20(f: (F20_S0, F20_S1, Float, Float, Int8, F20_S2, Float) -> F20_Ret) -> F20_Ret {
+    return f(F20_S0(f0: 28858, f1: 7024100299344418039, f2: F20_S0_S0(f0: 13025)), F20_S1(f0: 7900431324553135989, f1: 8131425055682506706, f2: F20_S1_S0(f0: 3884322), f3: 605453501265278638, f4: 353756684), 622319, 1401604, -101, F20_S2(f0: 1355570413), 2912776)
+}
+
+@frozen
+public struct F21_S0
+{
+    public let f0 : Double;
+    public let f1 : UInt64;
+}
+
+@frozen
+public struct F21_S1
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F21_Ret
+{
+    public let f0 : UInt16;
+    public let f1 : UInt32;
+    public let f2 : Int64;
+}
+
+public func swiftCallbackFunc21(f: (Int32, Int16, F21_S0, Int32, F21_S1, Int64, UInt32, Int64, UInt8, UInt16) -> F21_Ret) -> F21_Ret {
+    return f(256017319, 14555, F21_S0(f0: 2102091966108033, f1: 8617538752301505079), 834677431, F21_S1(f0: 7043), 7166819734655141128, 965538086, 3827752442102685645, 110, 33646)
+}
+
+@frozen
+public struct F22_S0
+{
+    public let f0 : Int;
+    public let f1 : Float;
+    public let f2 : Double;
+}
+
+@frozen
+public struct F22_S1
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F22_S2
+{
+    public let f0 : Int32;
+    public let f1 : Double;
+    public let f2 : Float;
+    public let f3 : Int16;
+    public let f4 : UInt16;
+}
+
+@frozen
+public struct F22_S3
+{
+    public let f0 : Int64;
+    public let f1 : UInt16;
+}
+
+@frozen
+public struct F22_S4
+{
+    public let f0 : Double;
+    public let f1 : UInt16;
+}
+
+@frozen
+public struct F22_S5
+{
+    public let f0 : UInt32;
+    public let f1 : Int16;
+}
+
+@frozen
+public struct F22_S6
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F22_Ret
+{
+    public let f0 : UInt16;
+    public let f1 : Int16;
+    public let f2 : UInt;
+}
+
+public func swiftCallbackFunc22(f: (Int32, F22_S0, F22_S1, F22_S2, F22_S3, Int8, F22_S4, UInt8, UInt16, Int64, F22_S5, Int64, Float, F22_S6, UInt16) -> F22_Ret) -> F22_Ret {
+    return f(640156952, F22_S0(f0: 824774470287401457, f1: 6163704, f2: 54328782764685), F22_S1(f0: 1679730195865415747), F22_S2(f0: 1462995665, f1: 2554087365600344, f2: 8193295, f3: 16765, f4: 45388), F22_S3(f0: 5560492364570389430, f1: 48308), 71, F22_S4(f0: 1639169280741045, f1: 12045), 217, 62917, 1465918945905384332, F22_S5(f0: 1364750179, f1: 3311), 9003480567517966914, 2157327, F22_S6(f0: 6647392), 1760)
+}
+
+@frozen
+public struct F23_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F23_S1
+{
+    public let f0 : Int;
+}
+
+public func swiftCallbackFunc23(f: (UInt, UInt8, Int8, UInt8, UInt8, F23_S0, UInt, F23_S1, Double) -> Double) -> Double {
+    return f(5779410841248940897, 192, -128, 133, 20, F23_S0(f0: 2959916071636885436), 3651155214497129159, F23_S1(f0: 8141565342203061885), 1465425469608034)
+}
+
+@frozen
+public struct F24_S0
+{
+    public let f0 : Int8;
+    public let f1 : UInt8;
+    public let f2 : UInt64;
+    public let f3 : UInt32;
+}
+
+@frozen
+public struct F24_S1
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F24_S2_S0
+{
+    public let f0 : UInt16;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F24_S2_S1
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F24_S2
+{
+    public let f0 : Int;
+    public let f1 : UInt32;
+    public let f2 : F24_S2_S0;
+    public let f3 : F24_S2_S1;
+}
+
+@frozen
+public struct F24_S3
+{
+    public let f0 : Int16;
+    public let f1 : Float;
+    public let f2 : Int64;
+}
+
+@frozen
+public struct F24_S4
+{
+    public let f0 : UInt8;
+}
+
+public func swiftCallbackFunc24(f: (Int32, UInt, F24_S0, UInt16, F24_S1, Int8, F24_S2, UInt64, UInt64, F24_S3, Double, F24_S4) -> Float) -> Float {
+    return f(1710754874, 6447433131978039331, F24_S0(f0: -92, f1: 181, f2: 3710374263631495948, f3: 257210428), 6631, F24_S1(f0: 2303), 15, F24_S2(f0: 2509049432824972381, f1: 616918672, f2: F24_S2_S0(f0: 50635, f1: 1337844540), f3: F24_S2_S1(f0: 335964796567786281)), 1114365571136806382, 8988425145801188208, F24_S3(f0: 31969, f1: 3008861, f2: 5466306080595269107), 2027780227887952, F24_S4(f0: 234))
+}
+
+@frozen
+public struct F25_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F25_S1
+{
+    public let f0 : Float;
+    public let f1 : Int8;
+    public let f2 : Float;
+    public let f3 : Int;
+}
+
+@frozen
+public struct F25_S2
+{
+    public let f0 : UInt;
+    public let f1 : UInt;
+    public let f2 : Int64;
+    public let f3 : UInt8;
+}
+
+@frozen
+public struct F25_S3
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F25_S4
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F25_Ret
+{
+    public let f0 : UInt64;
+    public let f1 : Int64;
+    public let f2 : UInt8;
+    public let f3 : UInt16;
+}
+
+public func swiftCallbackFunc25(f: (F25_S0, UInt16, UInt, F25_S1, Int16, F25_S2, UInt64, UInt64, UInt64, F25_S3, F25_S4) -> F25_Ret) -> F25_Ret {
+    return f(F25_S0(f0: 6077761381429658786), 2300, 3498354181807010234, F25_S1(f0: 5360721, f1: -40, f2: 109485, f3: 2311625789899959825), -28395, F25_S2(f0: 8729509817732080529, f1: 860365359368130822, f2: 7498894262834346040, f3: 218), 961687210282504701, 7184177441364400868, 8389319500274436977, F25_S3(f0: 4437173), F25_S4(f0: -107))
+}
+
+@frozen
+public struct F26_S0
+{
+    public let f0 : Int8;
+    public let f1 : Int;
+    public let f2 : UInt8;
+    public let f3 : UInt8;
+}
+
+@frozen
+public struct F26_S1_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F26_S1
+{
+    public let f0 : Int8;
+    public let f1 : Int32;
+    public let f2 : Int16;
+    public let f3 : F26_S1_S0;
+}
+
+@frozen
+public struct F26_S2
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F26_S3
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F26_Ret
+{
+    public let f0 : UInt;
+    public let f1 : UInt8;
+}
+
+public func swiftCallbackFunc26(f: (Int8, UInt8, UInt32, F26_S0, F26_S1, F26_S2, F26_S3) -> F26_Ret) -> F26_Ret {
+    return f(-16, 220, 72386567, F26_S0(f0: -33, f1: 6488877286424796715, f2: 143, f3: 74), F26_S1(f0: 104, f1: 1719453315, f2: 20771, f3: F26_S1_S0(f0: 3636117595999837800)), F26_S2(f0: 2279530426119665839), F26_S3(f0: 207))
+}
+
+@frozen
+public struct F27_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F27_S1_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int8;
+}
+
+@frozen
+public struct F27_S1
+{
+    public let f0 : Int64;
+    public let f1 : F27_S1_S0;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F27_S2
+{
+    public let f0 : UInt64;
+    public let f1 : Int8;
+    public let f2 : UInt32;
+    public let f3 : Int64;
+}
+
+@frozen
+public struct F27_S3_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F27_S3
+{
+    public let f0 : F27_S3_S0;
+}
+
+public func swiftCallbackFunc27(f: (UInt64, UInt8, F27_S0, UInt8, UInt8, F27_S1, Int32, F27_S2, Int, UInt32, F27_S3) -> Float) -> Float {
+    return f(4847421047018330189, 214, F27_S0(f0: 31313), 207, 174, F27_S1(f0: 4476120319602257660, f1: F27_S1_S0(f0: 26662, f1: -55), f2: 70666), 1340306103, F27_S2(f0: 2772939788297637999, f1: -65, f2: 7500441, f3: 4926907273817562134), 5862689255099071258, 1077270996, F27_S3(f0: F27_S3_S0(f0: 35167)))
+}
+
+@frozen
+public struct F28_S0
+{
+    public let f0 : UInt64;
+    public let f1 : Int8;
+}
+
+@frozen
+public struct F28_S1
+{
+    public let f0 : Int64;
+    public let f1 : UInt;
+    public let f2 : Int;
+    public let f3 : Int32;
+}
+
+@frozen
+public struct F28_S2
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F28_S3
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F28_Ret_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F28_Ret
+{
+    public let f0 : F28_Ret_S0;
+    public let f1 : UInt16;
+}
+
+public func swiftCallbackFunc28(f: (UInt32, UInt16, Int8, Int8, UInt16, Float, F28_S0, Double, UInt64, F28_S1, F28_S2, F28_S3) -> F28_Ret) -> F28_Ret {
+    return f(893827094, 38017, -90, -1, 16109, 5844449, F28_S0(f0: 176269147098539470, f1: 23), 1431426259441210, 6103261251702315645, F28_S1(f0: 3776818122826483419, f1: 9181420263296840471, f2: 3281861424961082542, f3: 1442905253), F28_S2(f0: 8760009193798370900), F28_S3(f0: 7119917900929398683))
+}
+
+@frozen
+public struct F29_S0
+{
+    public let f0 : UInt8;
+    public let f1 : Double;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F29_S1
+{
+    public let f0 : UInt32;
+    public let f1 : Int;
+    public let f2 : UInt64;
+    public let f3 : UInt32;
+}
+
+@frozen
+public struct F29_S2
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F29_S3
+{
+    public let f0 : UInt32;
+    public let f1 : UInt32;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F29_S4
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F29_Ret_S0
+{
+    public let f0 : Int;
+    public let f1 : UInt64;
+}
+
+@frozen
+public struct F29_Ret
+{
+    public let f0 : UInt;
+    public let f1 : UInt;
+    public let f2 : UInt;
+    public let f3 : F29_Ret_S0;
+    public let f4 : UInt64;
+    public let f5 : UInt32;
+}
+
+public func swiftCallbackFunc29(f: (F29_S0, Int, UInt64, UInt8, Int64, UInt8, Int, F29_S1, Int32, Int8, UInt8, UInt64, F29_S2, F29_S3, Int16, F29_S4, UInt32) -> F29_Ret) -> F29_Ret {
+    return f(F29_S0(f0: 152, f1: 737900189383874, f2: 33674), 5162040247631126074, 6524156301721885895, 129, 6661424933974053497, 145, 7521422786615537370, F29_S1(f0: 1361601345, f1: 3366726213840694614, f2: 7767610514138029164, f3: 1266864987), 1115803878, 5, 80, 2041754562738600205, F29_S2(f0: 1492686870), F29_S3(f0: 142491811, f1: 1644962309, f2: 1905811), -3985, F29_S4(f0: 1921386549), 1510666400)
+}
+
+@frozen
+public struct F30_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int16;
+    public let f2 : Int16;
+    public let f3 : Int8;
+}
+
+@frozen
+public struct F30_S1
+{
+    public let f0 : UInt16;
+    public let f1 : UInt;
+}
+
+@frozen
+public struct F30_S2
+{
+    public let f0 : Int64;
+    public let f1 : Int8;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F30_S3
+{
+    public let f0 : Int8;
+}
+
+public func swiftCallbackFunc30(f: (F30_S0, F30_S1, F30_S2, F30_S3, Int) -> Float) -> Float {
+    return f(F30_S0(f0: 50723, f1: 19689, f2: -6469, f3: 83), F30_S1(f0: 51238, f1: 5879147675377398012), F30_S2(f0: 7909999288286190848, f1: -99, f2: 61385), F30_S3(f0: 48), 2980085298293056148)
+}
+
+@frozen
+public struct F31_S0
+{
+    public let f0 : Int32;
+    public let f1 : UInt64;
+    public let f2 : UInt;
+}
+
+@frozen
+public struct F31_Ret_S0
+{
+    public let f0 : UInt32;
+    public let f1 : Float;
+    public let f2 : UInt16;
+    public let f3 : Int16;
+    public let f4 : Float;
+}
+
+@frozen
+public struct F31_Ret
+{
+    public let f0 : F31_Ret_S0;
+    public let f1 : UInt16;
+}
+
+public func swiftCallbackFunc31(f: (F31_S0, Double) -> F31_Ret) -> F31_Ret {
+    return f(F31_S0(f0: 1072945099, f1: 5760996810500287322, f2: 3952909367135409979), 2860786541632685)
+}
+
+@frozen
+public struct F32_Ret
+{
+    public let f0 : UInt;
+    public let f1 : Double;
+    public let f2 : Int;
+}
+
+public func swiftCallbackFunc32(f: (UInt16, Int16) -> F32_Ret) -> F32_Ret {
+    return f(21020, 7462)
+}
+
+@frozen
+public struct F33_S0
+{
+    public let f0 : Int16;
+    public let f1 : UInt64;
+}
+
+@frozen
+public struct F33_S1_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F33_S1
+{
+    public let f0 : F33_S1_S0;
+    public let f1 : UInt32;
+    public let f2 : UInt;
+}
+
+@frozen
+public struct F33_S2
+{
+    public let f0 : UInt32;
+    public let f1 : UInt64;
+    public let f2 : Int8;
+    public let f3 : Int8;
+    public let f4 : UInt;
+}
+
+@frozen
+public struct F33_S3_S0_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F33_S3_S0
+{
+    public let f0 : F33_S3_S0_S0;
+}
+
+@frozen
+public struct F33_S3
+{
+    public let f0 : F33_S3_S0;
+}
+
+public func swiftCallbackFunc33(f: (F33_S0, Float, F33_S1, UInt32, Int, Int8, Int8, Float, UInt8, Float, Int8, F33_S2, Int, F33_S3, Int, UInt32) -> UInt) -> UInt {
+    return f(F33_S0(f0: -23471, f1: 2736941806609505888), 6930550, F33_S1(f0: F33_S1_S0(f0: 32476), f1: 165441961, f2: 3890227499323387948), 591524870, 1668420058132495503, -67, 94, 3180786, 42, 7674952, 43, F33_S2(f0: 771356149, f1: 3611576949210389997, f2: -15, f3: 7, f4: 2577587324978560192), 8266150294848599489, F33_S3(f0: F33_S3_S0(f0: F33_S3_S0_S0(f0: 9216))), 710302565025364450, 1060812904)
+}
+
+@frozen
+public struct F34_S0_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F34_S0
+{
+    public let f0 : F34_S0_S0;
+    public let f1 : UInt;
+}
+
+public func swiftCallbackFunc34(f: (UInt32, F34_S0, UInt, Int16) -> UInt16) -> UInt16 {
+    return f(2068009847, F34_S0(f0: F34_S0_S0(f0: 845123292), f1: 5148244462913472487), 8632568386462910655, 7058)
+}
+
+@frozen
+public struct F35_S0_S0_S0
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F35_S0_S0
+{
+    public let f0 : Int64;
+    public let f1 : F35_S0_S0_S0;
+}
+
+@frozen
+public struct F35_S0_S1
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F35_S0
+{
+    public let f0 : F35_S0_S0;
+    public let f1 : Int32;
+    public let f2 : F35_S0_S1;
+    public let f3 : Int;
+}
+
+@frozen
+public struct F35_S1
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F35_S2_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F35_S2
+{
+    public let f0 : F35_S2_S0;
+}
+
+public func swiftCallbackFunc35(f: (UInt8, Int8, Float, Int64, Int, F35_S0, F35_S1, F35_S2) -> UInt64) -> UInt64 {
+    return f(182, -16, 7763558, 5905028570860904693, 5991001624972063224, F35_S0(f0: F35_S0_S0(f0: 6663912001709962059, f1: F35_S0_S0_S0(f0: 1843939591)), f1: 1095170337, f2: F35_S0_S1(f0: 3908756332193409), f3: 8246190362462442203), F35_S1(f0: 52167), F35_S2(f0: F35_S2_S0(f0: 283499999631068)))
+}
+
+@frozen
+public struct F36_S0
+{
+    public let f0 : UInt32;
+    public let f1 : Int64;
+    public let f2 : UInt8;
+    public let f3 : UInt;
+}
+
+public func swiftCallbackFunc36(f: (UInt, Double, UInt, UInt8, Int64, F36_S0, Int8) -> Int) -> Int {
+    return f(5079603407518207003, 2365862518115571, 6495651757722767835, 46, 1550138390178394449, F36_S0(f0: 1858960269, f1: 1925263848394986294, f2: 217, f3: 8520779488644482307), -83)
+}
+
+@frozen
+public struct F37_S0_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F37_S0
+{
+    public let f0 : UInt;
+    public let f1 : UInt32;
+    public let f2 : F37_S0_S0;
+    public let f3 : Float;
+}
+
+@frozen
+public struct F37_S1
+{
+    public let f0 : UInt;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F37_S2
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F37_Ret
+{
+    public let f0 : Float;
+    public let f1 : UInt8;
+    public let f2 : Int16;
+    public let f3 : UInt64;
+}
+
+public func swiftCallbackFunc37(f: (UInt64, F37_S0, Double, UInt16, F37_S1, F37_S2) -> F37_Ret) -> F37_Ret {
+    return f(1623104856688575867, F37_S0(f0: 3785544303342575322, f1: 717682682, f2: F37_S0_S0(f0: 2674933748436691896), f3: 3211458), 996705046384579, 8394, F37_S1(f0: 1048947722954084863, f1: 252415487), F37_S2(f0: 3664))
+}
+
+@frozen
+public struct F38_S0_S0
+{
+    public let f0 : Int;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F38_S0
+{
+    public let f0 : F38_S0_S0;
+    public let f1 : UInt16;
+    public let f2 : Int32;
+    public let f3 : Float;
+}
+
+@frozen
+public struct F38_S1
+{
+    public let f0 : Int16;
+    public let f1 : Int32;
+    public let f2 : UInt32;
+}
+
+public func swiftCallbackFunc38(f: (F38_S0, F38_S1, Double, Int16, Int8, UInt32, Int16, Float, Int, Float, UInt32, UInt8, Double, Int8) -> Double) -> Double {
+    return f(F38_S0(f0: F38_S0_S0(f0: 7389960750529773276, f1: 4749108), f1: 54323, f2: 634649910, f3: 83587), F38_S1(f0: -15547, f1: 1747384081, f2: 851987981), 3543874366683681, 5045, -32, 2084540698, 25583, 3158067, 1655263182833369283, 829404, 1888859844, 153, 222366180309763, 61)
+}
+
+@frozen
+public struct F39_S0_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F39_S0_S1
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F39_S0
+{
+    public let f0 : F39_S0_S0;
+    public let f1 : Int32;
+    public let f2 : F39_S0_S1;
+    public let f3 : UInt;
+}
+
+@frozen
+public struct F39_S1
+{
+    public let f0 : UInt16;
+    public let f1 : UInt8;
+    public let f2 : Float;
+    public let f3 : Int64;
+}
+
+@frozen
+public struct F39_S2
+{
+    public let f0 : Int32;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F39_S3
+{
+    public let f0 : UInt32;
+    public let f1 : Int;
+    public let f2 : Int;
+}
+
+public func swiftCallbackFunc39(f: (F39_S0, UInt, UInt32, Double, F39_S1, F39_S2, Int8, F39_S3, Int32, UInt64, UInt8) -> Int) -> Int {
+    return f(F39_S0(f0: F39_S0_S0(f0: -31212), f1: 1623216479, f2: F39_S0_S1(f0: 7181), f3: 8643545152918150186), 799631211988519637, 94381581, 761127371030426, F39_S1(f0: 417, f1: 85, f2: 1543931, f3: 3918460222899735322), F39_S2(f0: 883468300, f1: 2739152), -94, F39_S3(f0: 1374766954, f1: 2042223450490396789, f2: 2672454113535023130), 946259065, 6805548458517673751, 61)
+}
+
+@frozen
+public struct F40_S0
+{
+    public let f0 : Int16;
+    public let f1 : Int32;
+}
+
+@frozen
+public struct F40_S1
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F40_S2
+{
+    public let f0 : Int64;
+    public let f1 : UInt16;
+    public let f2 : Int;
+    public let f3 : UInt8;
+}
+
+@frozen
+public struct F40_S3_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F40_S3
+{
+    public let f0 : UInt;
+    public let f1 : Double;
+    public let f2 : F40_S3_S0;
+    public let f3 : Double;
+}
+
+public func swiftCallbackFunc40(f: (F40_S0, UInt32, UInt8, F40_S1, F40_S2, UInt64, UInt, UInt64, Int, UInt16, UInt32, F40_S3, UInt) -> UInt) -> UInt {
+    return f(F40_S0(f0: 22601, f1: 312892872), 1040102825, 56, F40_S1(f0: 101203812), F40_S2(f0: 4298883321494088257, f1: 2095, f2: 1536552108568739270, f3: 220), 2564624804830565018, 173855559108584219, 6222832940831380264, 1898370824516510398, 3352, 1643571476, F40_S3(f0: 7940054758811932961, f1: 246670432251533, f2: F40_S3_S0(f0: 7890596), f3: 1094140965415232), 2081923113238309816)
+}
+
+@frozen
+public struct F41_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F41_Ret
+{
+    public let f0 : UInt64;
+    public let f1 : Double;
+    public let f2 : UInt32;
+    public let f3 : UInt32;
+}
+
+public func swiftCallbackFunc41(f: (F41_S0) -> F41_Ret) -> F41_Ret {
+    return f(F41_S0(f0: 1430200072))
+}
+
+@frozen
+public struct F42_S0_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F42_S0
+{
+    public let f0 : F42_S0_S0;
+}
+
+@frozen
+public struct F42_S1
+{
+    public let f0 : UInt32;
+}
+
+public func swiftCallbackFunc42(f: (Int32, UInt32, F42_S0, Float, UInt8, F42_S1) -> Int) -> Int {
+    return f(1046060439, 1987212952, F42_S0(f0: F42_S0_S0(f0: 4714080408858753964)), 2364146, 25, F42_S1(f0: 666986488))
+}
+
+@frozen
+public struct F43_S0
+{
+    public let f0 : Int32;
+    public let f1 : Int32;
+    public let f2 : Int;
+}
+
+@frozen
+public struct F43_S1
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F43_Ret
+{
+    public let f0 : UInt16;
+}
+
+public func swiftCallbackFunc43(f: (F43_S0, F43_S1) -> F43_Ret) -> F43_Ret {
+    return f(F43_S0(f0: 406102630, f1: 1946236062, f2: 663606396354980308), F43_S1(f0: -8))
+}
+
+@frozen
+public struct F44_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F44_S1_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F44_S1_S1
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F44_S1
+{
+    public let f0 : Int16;
+    public let f1 : Int16;
+    public let f2 : F44_S1_S0;
+    public let f3 : F44_S1_S1;
+}
+
+@frozen
+public struct F44_S2
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F44_S3
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F44_Ret_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F44_Ret
+{
+    public let f0 : Int;
+    public let f1 : F44_Ret_S0;
+    public let f2 : Double;
+}
+
+public func swiftCallbackFunc44(f: (Double, F44_S0, F44_S1, F44_S2, F44_S3) -> F44_Ret) -> F44_Ret {
+    return f(4281406007431544, F44_S0(f0: 2097291497), F44_S1(f0: -10489, f1: -9573, f2: F44_S1_S0(f0: 62959), f3: F44_S1_S1(f0: 7144119809173057975)), F44_S2(f0: 168733393207234277), F44_S3(f0: 64))
+}
+
+@frozen
+public struct F45_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F45_S1
+{
+    public let f0 : UInt;
+    public let f1 : Int16;
+}
+
+@frozen
+public struct F45_Ret_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F45_Ret
+{
+    public let f0 : Double;
+    public let f1 : F45_Ret_S0;
+    public let f2 : Int64;
+    public let f3 : Double;
+    public let f4 : UInt64;
+    public let f5 : Int8;
+    public let f6 : Int32;
+}
+
+public func swiftCallbackFunc45(f: (F45_S0, F45_S1, UInt8) -> F45_Ret) -> F45_Ret {
+    return f(F45_S0(f0: 5311803360204128233), F45_S1(f0: 2204790044275015546, f1: 8942), 207)
+}
+
+@frozen
+public struct F46_Ret
+{
+    public let f0 : UInt;
+    public let f1 : Double;
+    public let f2 : Int64;
+    public let f3 : UInt16;
+}
+
+public func swiftCallbackFunc46(f: (Int, UInt, UInt16, UInt16, Int64) -> F46_Ret) -> F46_Ret {
+    return f(1855296013283572041, 1145047910516899437, 20461, 58204, 1923767011143317115)
+}
+
+@frozen
+public struct F47_S0
+{
+    public let f0 : UInt8;
+    public let f1 : Int32;
+}
+
+@frozen
+public struct F47_S1
+{
+    public let f0 : Int;
+    public let f1 : UInt32;
+    public let f2 : Int8;
+}
+
+@frozen
+public struct F47_S2_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F47_S2
+{
+    public let f0 : Int8;
+    public let f1 : Float;
+    public let f2 : Int32;
+    public let f3 : Float;
+    public let f4 : F47_S2_S0;
+}
+
+@frozen
+public struct F47_S3
+{
+    public let f0 : UInt64;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct F47_S4
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F47_Ret
+{
+    public let f0 : Int16;
+    public let f1 : Int16;
+    public let f2 : Int64;
+}
+
+public func swiftCallbackFunc47(f: (Int, Float, UInt32, F47_S0, F47_S1, UInt16, Float, Int, Int, UInt, UInt, Int16, F47_S2, F47_S3, F47_S4) -> F47_Ret) -> F47_Ret {
+    return f(6545360066379352091, 1240616, 575670382, F47_S0(f0: 27, f1: 1769677101), F47_S1(f0: 4175209822525678639, f1: 483151627, f2: -41), 20891, 1011044, 8543308148327168378, 9126721646663585297, 5438914191614359864, 5284613245897089025, -9227, F47_S2(f0: -23, f1: 1294109, f2: 411726757, f3: 6621598, f4: F47_S2_S0(f0: 249)), F47_S3(f0: 5281612261430853979, f1: 7161295082465816089), F47_S4(f0: 1995556861952451598))
+}
+
+@frozen
+public struct F48_S0
+{
+    public let f0 : UInt64;
+    public let f1 : Int16;
+    public let f2 : UInt64;
+}
+
+@frozen
+public struct F48_S1_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F48_S1
+{
+    public let f0 : Double;
+    public let f1 : Int32;
+    public let f2 : Int32;
+    public let f3 : F48_S1_S0;
+    public let f4 : UInt;
+}
+
+public func swiftCallbackFunc48(f: (Int8, Int16, Int16, UInt32, F48_S0, UInt32, F48_S1, Int32, Int32, UInt16, Int64, UInt32) -> Int64) -> Int64 {
+    return f(-34, 11634, -27237, 1039294154, F48_S0(f0: 1367847206719062131, f1: 22330, f2: 689282484471011648), 1572626904, F48_S1(f0: 3054128759424009, f1: 1677338134, f2: 1257237843, f3: F48_S1_S0(f0: 6264494), f4: 8397097040610783205), 1060447208, 269785114, 20635, 7679010342730986048, 1362633148)
+}
+
+@frozen
+public struct F49_S0_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F49_S0
+{
+    public let f0 : F49_S0_S0;
+    public let f1 : UInt64;
+}
+
+@frozen
+public struct F49_Ret
+{
+    public let f0 : Int32;
+    public let f1 : Int16;
+    public let f2 : UInt8;
+    public let f3 : UInt8;
+    public let f4 : Int8;
+    public let f5 : Int64;
+}
+
+public func swiftCallbackFunc49(f: (F49_S0, Int64) -> F49_Ret) -> F49_Ret {
+    return f(F49_S0(f0: F49_S0_S0(f0: 48), f1: 7563394992711018452), 4358370311341042916)
+}
+
+@frozen
+public struct F50_S0_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F50_S0
+{
+    public let f0 : UInt16;
+    public let f1 : F50_S0_S0;
+}
+
+@frozen
+public struct F50_S1
+{
+    public let f0 : Double;
+    public let f1 : UInt16;
+    public let f2 : Int32;
+    public let f3 : Int;
+    public let f4 : Double;
+}
+
+@frozen
+public struct F50_S2
+{
+    public let f0 : Int32;
+    public let f1 : Float;
+    public let f2 : UInt32;
+}
+
+@frozen
+public struct F50_S3
+{
+    public let f0 : Int64;
+    public let f1 : Int32;
+    public let f2 : Float;
+    public let f3 : Int8;
+}
+
+@frozen
+public struct F50_S4
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F50_S5_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F50_S5
+{
+    public let f0 : F50_S5_S0;
+}
+
+public func swiftCallbackFunc50(f: (F50_S0, F50_S1, UInt8, F50_S2, Int32, UInt64, Int8, Int8, Float, F50_S3, F50_S4, F50_S5, Float) -> UInt8) -> UInt8 {
+    return f(F50_S0(f0: 31857, f1: F50_S0_S0(f0: 1743417849706254)), F50_S1(f0: 4104577461772135, f1: 13270, f2: 2072598986, f3: 9056978834867675248, f4: 844742439929087), 87, F50_S2(f0: 1420884537, f1: 78807, f2: 1081688273), 336878110, 1146514566942283069, -93, 73, 2321639, F50_S3(f0: 1940888991336881606, f1: 688345394, f2: 712275, f3: -128), F50_S4(f0: 2638503583829414770), F50_S5(f0: F50_S5_S0(f0: 23681)), 8223218)
+}
+
+@frozen
+public struct F51_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F51_Ret
+{
+    public let f0 : UInt16;
+    public let f1 : Int8;
+    public let f2 : Int;
+    public let f3 : UInt16;
+    public let f4 : UInt64;
+}
+
+public func swiftCallbackFunc51(f: (Int16, UInt, F51_S0, UInt64) -> F51_Ret) -> F51_Ret {
+    return f(10812, 470861239714315155, F51_S0(f0: 5415660333180374788), 2389942629143476149)
+}
+
+@frozen
+public struct F52_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F52_S1
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F52_Ret
+{
+    public let f0 : Float;
+    public let f1 : UInt16;
+    public let f2 : Int64;
+    public let f3 : Int16;
+    public let f4 : UInt64;
+    public let f5 : Int8;
+}
+
+public func swiftCallbackFunc52(f: (Int, F52_S0, Int16, Int16, F52_S1) -> F52_Ret) -> F52_Ret {
+    return f(3233654765973602550, F52_S0(f0: 5997729), -7404, -20804, F52_S1(f0: 17231))
+}
+
+@frozen
+public struct F53_S0_S0_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F53_S0_S0
+{
+    public let f0 : F53_S0_S0_S0;
+}
+
+@frozen
+public struct F53_S0
+{
+    public let f0 : Int8;
+    public let f1 : F53_S0_S0;
+    public let f2 : UInt8;
+    public let f3 : UInt;
+    public let f4 : Int64;
+}
+
+@frozen
+public struct F53_S1
+{
+    public let f0 : Float;
+    public let f1 : UInt8;
+}
+
+@frozen
+public struct F53_S2
+{
+    public let f0 : Int8;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct F53_S3_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F53_S3
+{
+    public let f0 : Int32;
+    public let f1 : UInt32;
+    public let f2 : F53_S3_S0;
+}
+
+@frozen
+public struct F53_S4
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F53_S5_S0
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F53_S5_S1_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F53_S5_S1
+{
+    public let f0 : F53_S5_S1_S0;
+}
+
+@frozen
+public struct F53_S5
+{
+    public let f0 : F53_S5_S0;
+    public let f1 : UInt;
+    public let f2 : UInt16;
+    public let f3 : F53_S5_S1;
+    public let f4 : Int8;
+}
+
+@frozen
+public struct F53_S6
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F53_Ret
+{
+    public let f0 : Int;
+}
+
+public func swiftCallbackFunc53(f: (F53_S0, UInt8, Int64, F53_S1, F53_S2, F53_S3, Int64, F53_S4, F53_S5, F53_S6) -> F53_Ret) -> F53_Ret {
+    return f(F53_S0(f0: -123, f1: F53_S0_S0(f0: F53_S0_S0_S0(f0: 3494916243607193741)), f2: 167, f3: 4018943158751734338, f4: 6768175524813742847), 207, 8667995458064724392, F53_S1(f0: 492157, f1: 175), F53_S2(f0: 76, f1: 5794486968525461488), F53_S3(f0: 2146070335, f1: 1109141712, f2: F53_S3_S0(f0: 44270)), 3581380181786253859, F53_S4(f0: 23565), F53_S5(f0: F53_S5_S0(f0: 1995174927), f1: 5025417700244056666, f2: 1847, f3: F53_S5_S1(f0: F53_S5_S1_S0(f0: 6)), f4: -87), F53_S6(f0: 5737280129078653969))
+}
+
+@frozen
+public struct F54_S0
+{
+    public let f0 : Int32;
+    public let f1 : Float;
+    public let f2 : UInt;
+    public let f3 : UInt8;
+}
+
+@frozen
+public struct F54_S1
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F54_S2_S0_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F54_S2_S0
+{
+    public let f0 : Int16;
+    public let f1 : F54_S2_S0_S0;
+}
+
+@frozen
+public struct F54_S2
+{
+    public let f0 : Double;
+    public let f1 : F54_S2_S0;
+    public let f2 : Int64;
+    public let f3 : UInt64;
+}
+
+@frozen
+public struct F54_S3
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F54_S4
+{
+    public let f0 : UInt16;
+    public let f1 : Int8;
+}
+
+@frozen
+public struct F54_S5
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F54_Ret
+{
+    public let f0 : Int16;
+    public let f1 : Int;
+}
+
+public func swiftCallbackFunc54(f: (UInt16, F54_S0, Float, F54_S1, Int64, Int32, F54_S2, F54_S3, F54_S4, Float, F54_S5) -> F54_Ret) -> F54_Ret {
+    return f(16440, F54_S0(f0: 922752112, f1: 7843043, f2: 1521939500434086364, f3: 50), 3111108, F54_S1(f0: 50535), 4761507229870258916, 1670668155, F54_S2(f0: 432665443852892, f1: F54_S2_S0(f0: 13094, f1: F54_S2_S0_S0(f0: 669143993481144)), f2: 30067117315069590, f3: 874012622621600805), F54_S3(f0: 7995066), F54_S4(f0: 48478, f1: 23), 4383787, F54_S5(f0: 61633))
+}
+
+@frozen
+public struct F55_S0_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F55_S0
+{
+    public let f0 : UInt;
+    public let f1 : F55_S0_S0;
+    public let f2 : Int8;
+}
+
+@frozen
+public struct F55_S1
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F55_S2
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F55_Ret_S0
+{
+    public let f0 : Int16;
+    public let f1 : Int32;
+}
+
+@frozen
+public struct F55_Ret
+{
+    public let f0 : UInt;
+    public let f1 : Int;
+    public let f2 : Double;
+    public let f3 : F55_Ret_S0;
+    public let f4 : UInt64;
+}
+
+public func swiftCallbackFunc55(f: (F55_S0, Int64, F55_S1, Int8, F55_S2, Float) -> F55_Ret) -> F55_Ret {
+    return f(F55_S0(f0: 2856661562863799725, f1: F55_S0_S0(f0: 1260582440479139), f2: 5), 7945068527720423751, F55_S1(f0: 4321616441998677375), -68, F55_S2(f0: 3311106172201778367), 5600069)
+}
+
+@frozen
+public struct F56_S0
+{
+    public let f0 : Double;
+}
+
+public func swiftCallbackFunc56(f: (F56_S0) -> UInt32) -> UInt32 {
+    return f(F56_S0(f0: 3082602006731666))
+}
+
+@frozen
+public struct F57_S0
+{
+    public let f0 : Int64;
+    public let f1 : Int32;
+    public let f2 : UInt64;
+}
+
+@frozen
+public struct F57_S1
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F57_S2
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F57_Ret_S0
+{
+    public let f0 : Int64;
+    public let f1 : UInt8;
+    public let f2 : Int16;
+}
+
+@frozen
+public struct F57_Ret
+{
+    public let f0 : F57_Ret_S0;
+    public let f1 : UInt8;
+}
+
+public func swiftCallbackFunc57(f: (Int8, UInt, UInt32, Int64, UInt64, Int16, Int64, F57_S0, F57_S1, F57_S2) -> F57_Ret) -> F57_Ret {
+    return f(54, 753245150862584974, 1470962934, 1269392070140776313, 2296560034524654667, 12381, 198893062684618980, F57_S0(f0: 1310571041794038100, f1: 18741662, f2: 7855196891704523814), F57_S1(f0: 156), F57_S2(f0: 72045))
+}
+
+@frozen
+public struct F58_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F58_S1
+{
+    public let f0 : Float;
+    public let f1 : UInt16;
+}
+
+@frozen
+public struct F58_S2_S0_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F58_S2_S0
+{
+    public let f0 : F58_S2_S0_S0;
+}
+
+@frozen
+public struct F58_S2
+{
+    public let f0 : F58_S2_S0;
+}
+
+public func swiftCallbackFunc58(f: (UInt64, Int8, Int, F58_S0, F58_S1, Int64, F58_S2, Int32) -> Int) -> Int {
+    return f(4612004722568513699, -96, 1970590839325113617, F58_S0(f0: 211), F58_S1(f0: 5454927, f1: 48737), 921570327236881486, F58_S2(f0: F58_S2_S0(f0: F58_S2_S0_S0(f0: 7726203059421444802))), 491616915)
+}
+
+public func swiftCallbackFunc59(f: (UInt16, Int64, Int) -> UInt64) -> UInt64 {
+    return f(9232, 7281011081566942937, 8203439771560005792)
+}
+
+@frozen
+public struct F60_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F60_S1
+{
+    public let f0 : UInt64;
+    public let f1 : Int32;
+}
+
+public func swiftCallbackFunc60(f: (Float, Double, Int64, UInt16, Float, Float, F60_S0, Int16, F60_S1, Int16, Int64) -> UInt64) -> UInt64 {
+    return f(2682255, 2041676057169359, 5212916666940122160, 64444, 6372882, 8028835, F60_S0(f0: 6629286640024570381), 1520, F60_S1(f0: 8398497739914283366, f1: 1882981891), 7716, 6631047215535600409)
+}
+
+@frozen
+public struct F61_S0_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F61_S0
+{
+    public let f0 : F61_S0_S0;
+    public let f1 : Int64;
+    public let f2 : UInt32;
+}
+
+@frozen
+public struct F61_S1
+{
+    public let f0 : Int8;
+    public let f1 : Float;
+    public let f2 : Int;
+}
+
+@frozen
+public struct F61_S2_S0_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F61_S2_S0
+{
+    public let f0 : F61_S2_S0_S0;
+}
+
+@frozen
+public struct F61_S2_S1
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F61_S2
+{
+    public let f0 : F61_S2_S0;
+    public let f1 : F61_S2_S1;
+}
+
+@frozen
+public struct F61_S3
+{
+    public let f0 : UInt64;
+    public let f1 : Int;
+}
+
+public func swiftCallbackFunc61(f: (UInt32, UInt32, F61_S0, F61_S1, F61_S2, Int8, Int16, F61_S3, Int32, UInt32) -> UInt32) -> UInt32 {
+    return f(1070797065, 135220309, F61_S0(f0: F61_S0_S0(f0: 6475887024664217162), f1: 563444654083452485, f2: 1748956360), F61_S1(f0: -112, f1: 3433396, f2: 8106074956722850624), F61_S2(f0: F61_S2_S0(f0: F61_S2_S0_S0(f0: 2318628619979263858)), f1: F61_S2_S1(f0: -93)), -122, -11696, F61_S3(f0: 5229393236090246212, f1: 4021449757638811198), 689517945, 657677740)
+}
+
+@frozen
+public struct F62_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F62_Ret
+{
+    public let f0 : UInt16;
+    public let f1 : Int64;
+    public let f2 : Int;
+    public let f3 : Int64;
+}
+
+public func swiftCallbackFunc62(f: (F62_S0) -> F62_Ret) -> F62_Ret {
+    return f(F62_S0(f0: 6500993))
+}
+
+@frozen
+public struct F63_S0
+{
+    public let f0 : Int;
+}
+
+public func swiftCallbackFunc63(f: (F63_S0, Int16) -> Float) -> Float {
+    return f(F63_S0(f0: 8391317504019075904), 11218)
+}
+
+@frozen
+public struct F64_S0
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F64_S1
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F64_S2
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F64_Ret_S0
+{
+    public let f0 : UInt16;
+    public let f1 : UInt;
+    public let f2 : UInt64;
+}
+
+@frozen
+public struct F64_Ret
+{
+    public let f0 : UInt;
+    public let f1 : F64_Ret_S0;
+    public let f2 : Double;
+}
+
+public func swiftCallbackFunc64(f: (Int8, F64_S0, F64_S1, UInt, F64_S2) -> F64_Ret) -> F64_Ret {
+    return f(-22, F64_S0(f0: 1591678205), F64_S1(f0: 8355549563000003325), 5441989206466502201, F64_S2(f0: 2097092811))
+}
+
+@frozen
+public struct F65_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F65_S1
+{
+    public let f0 : UInt16;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F65_S2
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F65_S3
+{
+    public let f0 : Int32;
+    public let f1 : UInt32;
+    public let f2 : Int8;
+    public let f3 : UInt;
+    public let f4 : Double;
+}
+
+@frozen
+public struct F65_Ret
+{
+    public let f0 : Int;
+    public let f1 : Int;
+    public let f2 : Int;
+    public let f3 : Float;
+}
+
+public func swiftCallbackFunc65(f: (F65_S0, Int16, Double, UInt, F65_S1, UInt64, F65_S2, Int, F65_S3, Int32, Int64, UInt32, Double) -> F65_Ret) -> F65_Ret {
+    return f(F65_S0(f0: 2969223123583220), -10269, 3909264978196109, 522883062031213707, F65_S1(f0: 37585, f1: 5879827541057349126), 1015270399093748716, F65_S2(f0: 19670), 1900026319968050423, F65_S3(f0: 1440511399, f1: 1203865685, f2: 12, f3: 4061296318630567634, f4: 2406524883317724), 1594888000, 2860599972459787263, 1989052358, 1036075606072593)
+}
+
+@frozen
+public struct F66_Ret_S0
+{
+    public let f0 : Float;
+    public let f1 : UInt8;
+}
+
+@frozen
+public struct F66_Ret
+{
+    public let f0 : UInt32;
+    public let f1 : Int32;
+    public let f2 : UInt32;
+    public let f3 : F66_Ret_S0;
+    public let f4 : Int;
+}
+
+public func swiftCallbackFunc66(f: (Int64) -> F66_Ret) -> F66_Ret {
+    return f(8300712022174991120)
+}
+
+@frozen
+public struct F67_S0
+{
+    public let f0 : UInt32;
+    public let f1 : UInt8;
+    public let f2 : UInt8;
+    public let f3 : Int32;
+}
+
+@frozen
+public struct F67_S1
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F67_S2_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F67_S2
+{
+    public let f0 : UInt64;
+    public let f1 : UInt32;
+    public let f2 : Int;
+    public let f3 : UInt32;
+    public let f4 : F67_S2_S0;
+}
+
+@frozen
+public struct F67_S3
+{
+    public let f0 : Int16;
+    public let f1 : UInt64;
+    public let f2 : UInt64;
+    public let f3 : Float;
+}
+
+public func swiftCallbackFunc67(f: (Double, F67_S0, Float, F67_S1, Int16, UInt, F67_S2, UInt16, UInt, UInt, F67_S3, UInt64) -> Int32) -> Int32 {
+    return f(2365334314089079, F67_S0(f0: 1133369490, f1: 54, f2: 244, f3: 411611102), 4453912, F67_S1(f0: 837821989), -3824, 2394019088612006082, F67_S2(f0: 2219661088889353540, f1: 294254132, f2: 5363897228951721947, f3: 2038380379, f4: F67_S2_S0(f0: 8364879421385869437)), 27730, 1854446871602777695, 5020910156102352016, F67_S3(f0: -2211, f1: 5910581461792482729, f2: 9095210648679611609, f3: 6138428), 4274242076331880276)
+}
+
+@frozen
+public struct F68_S0_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F68_S0
+{
+    public let f0 : Int64;
+    public let f1 : F68_S0_S0;
+}
+
+@frozen
+public struct F68_S1
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F68_S2_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F68_S2_S1_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F68_S2_S1
+{
+    public let f0 : F68_S2_S1_S0;
+}
+
+@frozen
+public struct F68_S2
+{
+    public let f0 : F68_S2_S0;
+    public let f1 : F68_S2_S1;
+}
+
+@frozen
+public struct F68_S3
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F68_Ret
+{
+    public let f0 : UInt16;
+    public let f1 : Int64;
+}
+
+public func swiftCallbackFunc68(f: (UInt8, Float, Int32, Int, F68_S0, Int16, Int, Int32, Int, F68_S1, Double, F68_S2, F68_S3) -> F68_Ret) -> F68_Ret {
+    return f(203, 7725681, 323096997, 7745650233784541800, F68_S0(f0: 4103074885750473230, f1: F68_S0_S0(f0: 12)), 28477, 3772772447290536725, 1075348149, 2017898311184593242, F68_S1(f0: 60280), 4052387873895590, F68_S2(f0: F68_S2_S0(f0: 1321857087602747558), f1: F68_S2_S1(f0: F68_S2_S1_S0(f0: 9011155097138053416))), F68_S3(f0: 8332))
+}
+
+@frozen
+public struct F69_S0_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F69_S0
+{
+    public let f0 : F69_S0_S0;
+}
+
+@frozen
+public struct F69_S1
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F69_S2
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F69_S3
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F69_S4_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F69_S4
+{
+    public let f0 : F69_S4_S0;
+}
+
+@frozen
+public struct F69_Ret
+{
+    public let f0 : UInt8;
+    public let f1 : Int64;
+    public let f2 : UInt32;
+}
+
+public func swiftCallbackFunc69(f: (F69_S0, Int, Int32, F69_S1, UInt32, Int8, F69_S2, Int, F69_S3, F69_S4) -> F69_Ret) -> F69_Ret {
+    return f(F69_S0(f0: F69_S0_S0(f0: 7154553222175076145)), 6685908100026425691, 1166526155, F69_S1(f0: 6042278185730963289), 182060391, 45, F69_S2(f0: 1886331345), 485542148877875333, F69_S3(f0: 209), F69_S4(f0: F69_S4_S0(f0: 6856847647688321191)))
+}
+
+@frozen
+public struct F70_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F70_S1
+{
+    public let f0 : Int;
+    public let f1 : Double;
+    public let f2 : Int16;
+}
+
+@frozen
+public struct F70_S2
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F70_S3
+{
+    public let f0 : UInt16;
+    public let f1 : Double;
+    public let f2 : UInt8;
+    public let f3 : UInt64;
+    public let f4 : Int32;
+}
+
+@frozen
+public struct F70_S4_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F70_S4
+{
+    public let f0 : F70_S4_S0;
+}
+
+@frozen
+public struct F70_Ret
+{
+    public let f0 : Int8;
+    public let f1 : UInt32;
+    public let f2 : UInt64;
+    public let f3 : Int16;
+    public let f4 : Int16;
+}
+
+public func swiftCallbackFunc70(f: (Int16, UInt8, Int, UInt32, F70_S0, Int32, F70_S1, F70_S2, F70_S3, Int64, Int32, UInt16, Int, Int, UInt, F70_S4) -> F70_Ret) -> F70_Ret {
+    return f(-13167, 126, 3641983584484741827, 1090448265, F70_S0(f0: 3696858216713616004), 1687025402, F70_S1(f0: 714916953527626038, f1: 459810445900614, f2: 4276), F70_S2(f0: 529194028), F70_S3(f0: 40800, f1: 3934985905568056, f2: 230, f3: 7358783417346157372, f4: 187926922), 228428560763393434, 146501405, 58804, 7098488973446286248, 1283658442251334575, 3644681944588099582, F70_S4(f0: F70_S4_S0(f0: 8197135412164695911)))
+}
+
+@frozen
+public struct F71_S0_S0
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F71_S0
+{
+    public let f0 : F71_S0_S0;
+}
+
+@frozen
+public struct F71_S1
+{
+    public let f0 : Int64;
+}
+
+public func swiftCallbackFunc71(f: (F71_S0, F71_S1) -> UInt64) -> UInt64 {
+    return f(F71_S0(f0: F71_S0_S0(f0: 258165353)), F71_S1(f0: 8603744544763953916))
+}
+
+@frozen
+public struct F72_S0
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F72_Ret
+{
+    public let f0 : UInt32;
+    public let f1 : Float;
+    public let f2 : Float;
+    public let f3 : Int64;
+}
+
+public func swiftCallbackFunc72(f: (F72_S0, Int64, Int8) -> F72_Ret) -> F72_Ret {
+    return f(F72_S0(f0: 2021509367), 2480039820482100351, 91)
+}
+
+@frozen
+public struct F73_S0
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F73_S1_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F73_S1
+{
+    public let f0 : F73_S1_S0;
+}
+
+@frozen
+public struct F73_S2
+{
+    public let f0 : Int32;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F73_S3
+{
+    public let f0 : UInt;
+    public let f1 : Int16;
+    public let f2 : Int8;
+}
+
+@frozen
+public struct F73_S4
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F73_S5
+{
+    public let f0 : UInt32;
+}
+
+public func swiftCallbackFunc73(f: (Double, Float, F73_S0, Int64, F73_S1, F73_S2, Int16, Double, Int8, Int32, Int64, F73_S3, UInt, UInt64, Int32, F73_S4, UInt8, F73_S5) -> Int8) -> Int8 {
+    return f(3038361048801008, 7870661, F73_S0(f0: 1555231180), 7433951069104961, F73_S1(f0: F73_S1_S0(f0: 63298)), F73_S2(f0: 1759846580, f1: 1335901), 11514, 695278874601974, 108, 48660527, 7762050749172332624, F73_S3(f0: 7486686356276472663, f1: 11622, f2: 112), 884183974530885885, 7434462110419085390, 170242607, F73_S4(f0: -26039), 41, F73_S5(f0: 191302504))
+}
+
+@frozen
+public struct F74_S0_S0
+{
+    public let f0 : UInt16;
+    public let f1 : UInt;
+    public let f2 : Int8;
+}
+
+@frozen
+public struct F74_S0
+{
+    public let f0 : F74_S0_S0;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F74_S1
+{
+    public let f0 : Float;
+}
+
+public func swiftCallbackFunc74(f: (F74_S0, F74_S1, Int16) -> Int64) -> Int64 {
+    return f(F74_S0(f0: F74_S0_S0(f0: 59883, f1: 5554216411943233256, f2: 126), f1: 724541378819571203), F74_S1(f0: 172601), 27932)
+}
+
+@frozen
+public struct F75_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F75_S1_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F75_S1
+{
+    public let f0 : F75_S1_S0;
+}
+
+@frozen
+public struct F75_S2
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F75_S3_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F75_S3
+{
+    public let f0 : F75_S3_S0;
+}
+
+@frozen
+public struct F75_Ret
+{
+    public let f0 : UInt8;
+    public let f1 : Double;
+    public let f2 : Double;
+    public let f3 : Int64;
+    public let f4 : UInt32;
+}
+
+public func swiftCallbackFunc75(f: (Int8, Int8, Int8, F75_S0, F75_S1, F75_S2, F75_S3) -> F75_Ret) -> F75_Ret {
+    return f(-105, 71, 108, F75_S0(f0: 7224638108479292438), F75_S1(f0: F75_S1_S0(f0: 126)), F75_S2(f0: -88), F75_S3(f0: F75_S3_S0(f0: 4934)))
+}
+
+@frozen
+public struct F76_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F76_S1_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F76_S1
+{
+    public let f0 : F76_S1_S0;
+    public let f1 : UInt;
+    public let f2 : Double;
+}
+
+@frozen
+public struct F76_S2
+{
+    public let f0 : UInt64;
+    public let f1 : Int;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F76_S3_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F76_S3
+{
+    public let f0 : F76_S3_S0;
+}
+
+@frozen
+public struct F76_S4
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F76_S5
+{
+    public let f0 : UInt;
+    public let f1 : Double;
+}
+
+public func swiftCallbackFunc76(f: (UInt8, F76_S0, Int8, F76_S1, F76_S2, F76_S3, UInt32, F76_S4, UInt8, F76_S5, Double, Int16) -> UInt64) -> UInt64 {
+    return f(69, F76_S0(f0: 25503, f1: 4872234474620951743), 43, F76_S1(f0: F76_S1_S0(f0: 1199076663426903579), f1: 4639522222462236688, f2: 4082956091930029), F76_S2(f0: 5171821618947987626, f1: 3369410144919558564, f2: 5287), F76_S3(f0: F76_S3_S0(f0: 929854460912895550)), 1208311201, F76_S4(f0: 7033993025788649145), 58, F76_S5(f0: 1401399014740601512, f1: 2523645319232571), 230232835550369, -22975)
+}
+
+@frozen
+public struct F77_S0
+{
+    public let f0 : Int64;
+    public let f1 : Double;
+    public let f2 : UInt;
+}
+
+@frozen
+public struct F77_S1
+{
+    public let f0 : Int16;
+    public let f1 : Float;
+    public let f2 : Float;
+    public let f3 : Int64;
+    public let f4 : Int64;
+}
+
+@frozen
+public struct F77_S2
+{
+    public let f0 : UInt16;
+    public let f1 : Int8;
+    public let f2 : Int32;
+    public let f3 : Float;
+    public let f4 : Float;
+}
+
+@frozen
+public struct F77_Ret
+{
+    public let f0 : Double;
+    public let f1 : UInt16;
+    public let f2 : Int8;
+    public let f3 : UInt;
+}
+
+public func swiftCallbackFunc77(f: (Double, F77_S0, F77_S1, F77_S2, UInt32) -> F77_Ret) -> F77_Ret {
+    return f(1623173949127682, F77_S0(f0: 5204451347781433070, f1: 3469485630755805, f2: 7586276835848725004), F77_S1(f0: 2405, f1: 2419792, f2: 6769317, f3: 1542327522833750776, f4: 1297586130846695275), F77_S2(f0: 10102, f1: -48, f2: 14517107, f3: 4856023, f4: 2681358), 1463251524)
+}
+
+@frozen
+public struct F78_S0
+{
+    public let f0 : UInt;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F78_S1_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F78_S1
+{
+    public let f0 : Int16;
+    public let f1 : UInt64;
+    public let f2 : F78_S1_S0;
+    public let f3 : Int32;
+    public let f4 : Int;
+}
+
+@frozen
+public struct F78_S2
+{
+    public let f0 : UInt;
+    public let f1 : UInt64;
+}
+
+@frozen
+public struct F78_S3
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F78_S4
+{
+    public let f0 : UInt64;
+}
+
+public func swiftCallbackFunc78(f: (UInt64, F78_S0, UInt64, F78_S1, F78_S2, Int32, UInt64, Int64, F78_S3, Float, Float, UInt16, F78_S4, Double) -> Double) -> Double {
+    return f(6780767594736146373, F78_S0(f0: 6264193481541646332, f1: 6600856439035088503), 1968254881389492170, F78_S1(f0: -17873, f1: 5581169895682201971, f2: F78_S1_S0(f0: 127), f3: 1942346704, f4: 118658265323815307), F78_S2(f0: 1489326778640378879, f1: 1427061853707270770), 858391966, 5830110056171302270, 2953614358173898788, F78_S3(f0: 6761452244699684409), 3452451, 3507119, 40036, F78_S4(f0: 4800085294404376817), 780368756754436)
+}
+
+@frozen
+public struct F79_S0_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F79_S0
+{
+    public let f0 : F79_S0_S0;
+    public let f1 : Int;
+}
+
+@frozen
+public struct F79_Ret
+{
+    public let f0 : UInt32;
+    public let f1 : UInt64;
+    public let f2 : Double;
+}
+
+public func swiftCallbackFunc79(f: (F79_S0, Float) -> F79_Ret) -> F79_Ret {
+    return f(F79_S0(f0: F79_S0_S0(f0: 1013911700897046117), f1: 7323935615297665289), 5159506)
+}
+
+@frozen
+public struct F80_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F80_S1_S0_S0
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F80_S1_S0
+{
+    public let f0 : F80_S1_S0_S0;
+}
+
+@frozen
+public struct F80_S1
+{
+    public let f0 : Int;
+    public let f1 : F80_S1_S0;
+}
+
+@frozen
+public struct F80_S2
+{
+    public let f0 : UInt64;
+}
+
+public func swiftCallbackFunc80(f: (UInt64, Int, Int32, Int16, UInt, F80_S0, Int16, Int, Int8, Int32, UInt32, F80_S1, F80_S2, UInt64) -> Float) -> Float {
+    return f(4470427843910624516, 8383677749057878551, 2017117925, -10531, 3438375001906177611, F80_S0(f0: 65220), 7107, 7315288835693680178, -48, 813870434, 1092037477, F80_S1(f0: 7104962838387954470, f1: F80_S1_S0(f0: F80_S1_S0_S0(f0: 236))), F80_S2(f0: 7460392384225808790), 364121728483540667)
+}
+
+@frozen
+public struct F81_S0
+{
+    public let f0 : Float;
+    public let f1 : Float;
+    public let f2 : Int;
+    public let f3 : Int;
+    public let f4 : Int;
+}
+
+@frozen
+public struct F81_Ret
+{
+    public let f0 : Int;
+}
+
+public func swiftCallbackFunc81(f: (UInt8, UInt32, UInt8, F81_S0, Int8) -> F81_Ret) -> F81_Ret {
+    return f(53, 57591489, 19, F81_S0(f0: 5675845, f1: 6469988, f2: 5775316279348621124, f3: 7699091894067057939, f4: 1049086627558950131), 15)
+}
+
+@frozen
+public struct F82_S0_S0
+{
+    public let f0 : Float;
+    public let f1 : UInt;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F82_S0
+{
+    public let f0 : UInt;
+    public let f1 : F82_S0_S0;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F82_S1
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F82_S2
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F82_S3_S0
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F82_S3
+{
+    public let f0 : Double;
+    public let f1 : UInt;
+    public let f2 : F82_S3_S0;
+}
+
+@frozen
+public struct F82_S4
+{
+    public let f0 : UInt64;
+}
+
+public func swiftCallbackFunc82(f: (Int64, F82_S0, Int16, Int8, UInt32, F82_S1, Int32, Int64, Int8, Double, F82_S2, F82_S3, F82_S4) -> Float) -> Float {
+    return f(6454754584537364459, F82_S0(f0: 6703634779264968131, f1: F82_S0_S0(f0: 1010059, f1: 4772968591609202284, f2: 64552), f2: 47126), 9869, -8, 1741550381, F82_S1(f0: 705741282), 1998781399, 7787961471254401526, -27, 4429830670351707, F82_S2(f0: 4975772762589349422), F82_S3(f0: 1423948098664774, f1: 504607538824251986, f2: F82_S3_S0(f0: 1940911018)), F82_S4(f0: 2988623645681463667))
+}
+
+@frozen
+public struct F83_S0
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F83_Ret
+{
+    public let f0 : Int16;
+}
+
+public func swiftCallbackFunc83(f: (Int8, F83_S0, Int16) -> F83_Ret) -> F83_Ret {
+    return f(17, F83_S0(f0: 530755056), -11465)
+}
+
+@frozen
+public struct F84_S0
+{
+    public let f0 : UInt;
+    public let f1 : UInt32;
+    public let f2 : UInt;
+    public let f3 : UInt64;
+    public let f4 : Int32;
+}
+
+@frozen
+public struct F84_S1
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F84_S2
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct F84_S3
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F84_S4
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F84_S5
+{
+    public let f0 : Int;
+    public let f1 : Int16;
+}
+
+@frozen
+public struct F84_S6
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F84_S7
+{
+    public let f0 : Int32;
+}
+
+public func swiftCallbackFunc84(f: (Int32, F84_S0, F84_S1, Double, Int32, Int16, Double, F84_S2, F84_S3, Double, F84_S4, F84_S5, F84_S6, F84_S7, UInt) -> Int) -> Int {
+    return f(1605022009, F84_S0(f0: 6165049220831866664, f1: 1235491183, f2: 7926620970405586826, f3: 2633248816907294140, f4: 2012834055), F84_S1(f0: 2881830362339122988), 4065309434963087, 1125165825, -32360, 1145602045200029, F84_S2(f0: 5655563), F84_S3(f0: 14), 3919593995303128, F84_S4(f0: 26090), F84_S5(f0: 8584898862398781737, f1: -5185), F84_S6(f0: 144), F84_S7(f0: 2138004352), 9102562043027810686)
+}
+
+@frozen
+public struct F85_S0
+{
+    public let f0 : Double;
+    public let f1 : Double;
+    public let f2 : Int8;
+    public let f3 : Int32;
+}
+
+@frozen
+public struct F85_S1
+{
+    public let f0 : Int64;
+    public let f1 : UInt16;
+    public let f2 : UInt64;
+    public let f3 : UInt;
+}
+
+@frozen
+public struct F85_S2
+{
+    public let f0 : Float;
+    public let f1 : Float;
+    public let f2 : UInt32;
+}
+
+@frozen
+public struct F85_S3
+{
+    public let f0 : UInt8;
+}
+
+@frozen
+public struct F85_S4
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F85_S5
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F85_Ret
+{
+    public let f0 : UInt32;
+    public let f1 : UInt16;
+    public let f2 : Int32;
+    public let f3 : Double;
+    public let f4 : Int;
+    public let f5 : UInt64;
+    public let f6 : Int64;
+}
+
+public func swiftCallbackFunc85(f: (F85_S0, F85_S1, UInt32, F85_S2, Int64, F85_S3, Int64, F85_S4, UInt16, UInt8, Int32, UInt32, Int32, Float, F85_S5, Int64) -> F85_Ret) -> F85_Ret {
+    return f(F85_S0(f0: 4325646965362202, f1: 3313084380250914, f2: 42, f3: 2034100272), F85_S1(f0: 1365643665271339575, f1: 25442, f2: 3699631470459352980, f3: 7611776251925132200), 911446742, F85_S2(f0: 352423, f1: 7150341, f2: 2090089360), 5731257538910387688, F85_S3(f0: 171), 5742887585483060342, F85_S4(f0: 1182236975680416316), 32137, 44, 2143531010, 1271996557, 1035188446, 1925443, F85_S5(f0: 2591574394337603), 721102428782331317)
+}
+
+@frozen
+public struct F86_S0
+{
+    public let f0 : Int;
+    public let f1 : Float;
+    public let f2 : Int16;
+    public let f3 : Int8;
+}
+
+@frozen
+public struct F86_S1
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F86_S2
+{
+    public let f0 : Int;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F86_S3
+{
+    public let f0 : UInt16;
+    public let f1 : Float;
+}
+
+@frozen
+public struct F86_Ret
+{
+    public let f0 : Int16;
+    public let f1 : UInt32;
+    public let f2 : Double;
+    public let f3 : UInt8;
+}
+
+public func swiftCallbackFunc86(f: (Float, Int16, Int, Int16, Float, F86_S0, F86_S1, F86_S2, Int, UInt32, UInt, UInt, Float, Int64, F86_S3, UInt) -> F86_Ret) -> F86_Ret {
+    return f(2913632, 3735, 2773655476379499086, 22973, 8292778, F86_S0(f0: 5562042565258891920, f1: 8370233, f2: 18292, f3: -32), F86_S1(f0: 486951152980016), F86_S2(f0: 170033426151098456, f1: 3867810), 7390780928011218856, 1504267943, 2046987193814931100, 4860202472307588968, 1644019, 8084012412562897328, F86_S3(f0: 46301, f1: 5633701), 1911608136082175332)
+}
+
+@frozen
+public struct F87_S0
+{
+    public let f0 : Int32;
+    public let f1 : Int16;
+    public let f2 : Int32;
+}
+
+@frozen
+public struct F87_S1
+{
+    public let f0 : Float;
+}
+
+public func swiftCallbackFunc87(f: (Float, Int, F87_S0, F87_S1) -> UInt64) -> UInt64 {
+    return f(1413086, 4206825694012787823, F87_S0(f0: 70240457, f1: 30503, f2: 671751848), F87_S1(f0: 6641304))
+}
+
+@frozen
+public struct F88_S0
+{
+    public let f0 : Int8;
+    public let f1 : Int16;
+    public let f2 : UInt8;
+    public let f3 : Double;
+    public let f4 : UInt16;
+}
+
+@frozen
+public struct F88_S1
+{
+    public let f0 : Double;
+    public let f1 : UInt8;
+}
+
+@frozen
+public struct F88_S2
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F88_S3
+{
+    public let f0 : Int8;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F88_Ret
+{
+    public let f0 : Int32;
+    public let f1 : UInt32;
+    public let f2 : Int;
+    public let f3 : UInt64;
+}
+
+public func swiftCallbackFunc88(f: (F88_S0, F88_S1, Float, UInt, Float, Int, F88_S2, UInt64, F88_S3, UInt64) -> F88_Ret) -> F88_Ret {
+    return f(F88_S0(f0: 125, f1: -10705, f2: 21, f3: 361845689097003, f4: 41749), F88_S1(f0: 1754583995806427, f1: 178), 4705205, 5985040566226273121, 2484194, 1904196135427766362, F88_S2(f0: 5436710892090266406), 4250368992471675181, F88_S3(f0: -87, f1: 362108395), 3388632419732870796)
+}
+
+@frozen
+public struct F89_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F89_Ret_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F89_Ret
+{
+    public let f0 : Int32;
+    public let f1 : F89_Ret_S0;
+    public let f2 : UInt;
+    public let f3 : Int64;
+}
+
+public func swiftCallbackFunc89(f: (F89_S0) -> F89_Ret) -> F89_Ret {
+    return f(F89_S0(f0: 2137010348736191))
+}
+
+@frozen
+public struct F90_S0_S0_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F90_S0_S0
+{
+    public let f0 : F90_S0_S0_S0;
+}
+
+@frozen
+public struct F90_S0
+{
+    public let f0 : F90_S0_S0;
+    public let f1 : UInt;
+    public let f2 : UInt32;
+    public let f3 : Int64;
+    public let f4 : Int16;
+}
+
+@frozen
+public struct F90_S1
+{
+    public let f0 : UInt16;
+    public let f1 : Int16;
+}
+
+@frozen
+public struct F90_S2
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct F90_S3
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F90_S4
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F90_Ret
+{
+    public let f0 : Int16;
+    public let f1 : Int;
+}
+
+public func swiftCallbackFunc90(f: (Int64, Float, F90_S0, UInt32, UInt16, F90_S1, F90_S2, F90_S3, F90_S4) -> F90_Ret) -> F90_Ret {
+    return f(920081051198141017, 661904, F90_S0(f0: F90_S0_S0(f0: F90_S0_S0_S0(f0: 3898354148166517637)), f1: 1003118682503285076, f2: 1418362079, f3: 3276689793574299746, f4: -18559), 1773011602, 32638, F90_S1(f0: 47129, f1: -31849), F90_S2(f0: 4795020225668482328), F90_S3(f0: 5307513663902191175), F90_S4(f0: 7057074401404034083))
+}
+
+@frozen
+public struct F91_S0
+{
+    public let f0 : Int8;
+    public let f1 : Int;
+    public let f2 : UInt16;
+    public let f3 : UInt16;
+}
+
+@frozen
+public struct F91_S1
+{
+    public let f0 : Double;
+    public let f1 : UInt64;
+    public let f2 : Int8;
+    public let f3 : Int64;
+    public let f4 : Float;
+}
+
+@frozen
+public struct F91_S2_S0_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct F91_S2_S0
+{
+    public let f0 : F91_S2_S0_S0;
+}
+
+@frozen
+public struct F91_S2
+{
+    public let f0 : Double;
+    public let f1 : F91_S2_S0;
+    public let f2 : Int16;
+}
+
+@frozen
+public struct F91_S3_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F91_S3
+{
+    public let f0 : F91_S3_S0;
+}
+
+@frozen
+public struct F91_Ret
+{
+    public let f0 : Int64;
+    public let f1 : UInt64;
+    public let f2 : Int16;
+    public let f3 : UInt32;
+}
+
+public func swiftCallbackFunc91(f: (F91_S0, Int16, UInt32, Double, F91_S1, Int64, UInt64, Float, F91_S2, Int, F91_S3) -> F91_Ret) -> F91_Ret {
+    return f(F91_S0(f0: -117, f1: 6851485542307521521, f2: 23224, f3: 28870), -26318, 874052395, 3651199868446152, F91_S1(f0: 3201729800438540, f1: 7737032265509566019, f2: 123, f3: 7508633930609553617, f4: 8230501), 2726677037673277403, 4990410590084533996, 3864639, F91_S2(f0: 1763083442463892, f1: F91_S2_S0(f0: F91_S2_S0_S0(f0: 6783710957456602933)), f2: 2927), 3359440517385934325, F91_S3(f0: F91_S3_S0(f0: 3281136825102667421)))
+}
+
+@frozen
+public struct F92_S0
+{
+    public let f0 : Double;
+    public let f1 : Double;
+}
+
+@frozen
+public struct F92_S1
+{
+    public let f0 : UInt32;
+    public let f1 : Int64;
+    public let f2 : UInt32;
+    public let f3 : Int16;
+    public let f4 : UInt64;
+}
+
+@frozen
+public struct F92_S2_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F92_S2
+{
+    public let f0 : UInt32;
+    public let f1 : Int64;
+    public let f2 : F92_S2_S0;
+}
+
+@frozen
+public struct F92_Ret
+{
+    public let f0 : Int32;
+}
+
+public func swiftCallbackFunc92(f: (UInt32, Int64, F92_S0, Int, UInt8, F92_S1, F92_S2, UInt8, Int, Int32) -> F92_Ret) -> F92_Ret {
+    return f(479487770, 3751818229732502126, F92_S0(f0: 3486664439392893, f1: 1451061144702448), 1103649059951788126, 17, F92_S1(f0: 1542537473, f1: 2256304993713022795, f2: 1773847876, f3: -4712, f4: 2811859744132572185), F92_S2(f0: 290315682, f1: 4847587202070249866, f2: F92_S2_S0(f0: 20774)), 8, 2206063999764082749, 1481391120)
+}
+
+@frozen
+public struct F93_S0
+{
+    public let f0 : Int8;
+    public let f1 : UInt32;
+}
+
+@frozen
+public struct F93_S1
+{
+    public let f0 : UInt32;
+}
+
+@frozen
+public struct F93_Ret
+{
+    public let f0 : Int;
+    public let f1 : UInt64;
+}
+
+public func swiftCallbackFunc93(f: (UInt, UInt16, Double, F93_S0, F93_S1) -> F93_Ret) -> F93_Ret {
+    return f(5170226481546239050, 2989, 1630717078645270, F93_S0(f0: -46, f1: 859171256), F93_S1(f0: 254449240))
+}
+
+@frozen
+public struct F94_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct F94_S1
+{
+    public let f0 : Int32;
+    public let f1 : UInt;
+}
+
+@frozen
+public struct F94_S2
+{
+    public let f0 : Int;
+    public let f1 : UInt32;
+    public let f2 : UInt16;
+}
+
+@frozen
+public struct F94_S3
+{
+    public let f0 : UInt8;
+    public let f1 : Int32;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F94_S4
+{
+    public let f0 : Int32;
+    public let f1 : Int64;
+    public let f2 : Float;
+}
+
+@frozen
+public struct F94_S5
+{
+    public let f0 : Int16;
+    public let f1 : UInt;
+    public let f2 : Int16;
+    public let f3 : Int8;
+}
+
+@frozen
+public struct F94_Ret
+{
+    public let f0 : Int64;
+}
+
+public func swiftCallbackFunc94(f: (F94_S0, Int16, F94_S1, F94_S2, F94_S3, Float, F94_S4, UInt32, F94_S5, Int16) -> F94_Ret) -> F94_Ret {
+    return f(F94_S0(f0: 8626725032375870186), -7755, F94_S1(f0: 544707027, f1: 2251410026467996594), F94_S2(f0: 2972912419231960385, f1: 740529487, f2: 34526), F94_S3(f0: 41, f1: 1598856955, f2: 5126603), 7242977, F94_S4(f0: 473684762, f1: 4023878650965716094, f2: 2777693), 1612378906, F94_S5(f0: -17074, f1: 2666903737827472071, f2: 418, f3: 106), -14547)
+}
+
+@frozen
+public struct F95_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct F95_S1
+{
+    public let f0 : UInt32;
+    public let f1 : Int16;
+    public let f2 : Double;
+}
+
+@frozen
+public struct F95_S2
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct F95_Ret_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F95_Ret
+{
+    public let f0 : Int;
+    public let f1 : Int16;
+    public let f2 : Int8;
+    public let f3 : UInt8;
+    public let f4 : F95_Ret_S0;
+}
+
+public func swiftCallbackFunc95(f: (F95_S0, UInt, F95_S1, F95_S2) -> F95_Ret) -> F95_Ret {
+    return f(F95_S0(f0: 45388, f1: 6620047889014935849), 97365157264460373, F95_S1(f0: 357234637, f1: -13720, f2: 3313430568949662), F95_S2(f0: 14248))
+}
+
+@frozen
+public struct F96_S0
+{
+    public let f0 : Int64;
+    public let f1 : UInt32;
+    public let f2 : Int16;
+    public let f3 : Double;
+    public let f4 : Double;
+}
+
+@frozen
+public struct F96_S1
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct F96_S2
+{
+    public let f0 : Float;
+}
+
+public func swiftCallbackFunc96(f: (UInt32, F96_S0, Float, UInt64, UInt32, UInt32, F96_S1, F96_S2, Int64) -> UInt64) -> UInt64 {
+    return f(1103144790, F96_S0(f0: 496343164737276588, f1: 1541085564, f2: -16271, f3: 1062575289573718, f4: 570255786498865), 7616839, 7370881799887414383, 390392554, 1492692139, F96_S1(f0: 1666031716012978365), F96_S2(f0: 3427394), 4642371619161527189)
+}
+
+@frozen
+public struct F97_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct F97_S1
+{
+    public let f0 : Int64;
+    public let f1 : UInt64;
+}
+
+@frozen
+public struct F97_S2
+{
+    public let f0 : UInt8;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct F97_S3
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct F97_Ret_S0
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct F97_Ret
+{
+    public let f0 : Double;
+    public let f1 : UInt;
+    public let f2 : F97_Ret_S0;
+    public let f3 : UInt16;
+    public let f4 : UInt32;
+}
+
+public func swiftCallbackFunc97(f: (F97_S0, F97_S1, F97_S2, F97_S3) -> F97_Ret) -> F97_Ret {
+    return f(F97_S0(f0: -87), F97_S1(f0: 1414208343412494909, f1: 453284654311256466), F97_S2(f0: 224, f1: 1712859616922087053), F97_S3(f0: 3987671154739178))
+}
+
+@frozen
+public struct F98_S0
+{
+    public let f0 : Int32;
+}
+
+public func swiftCallbackFunc98(f: (Float, UInt16, F98_S0, UInt16) -> Int) -> Int {
+    return f(2863898, 37573, F98_S0(f0: 1073068257), 53560)
+}
+
+@frozen
+public struct F99_S0
+{
+    public let f0 : Int;
+    public let f1 : UInt32;
+    public let f2 : Int32;
+    public let f3 : UInt32;
+}
+
+@frozen
+public struct F99_S1
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct F99_S2
+{
+    public let f0 : UInt8;
+}
+
+public func swiftCallbackFunc99(f: (Int64, UInt, Float, UInt16, F99_S0, UInt8, Float, UInt8, Int8, F99_S1, F99_S2) -> UInt64) -> UInt64 {
+    return f(1152281003884062246, 2482384127373829622, 3361150, 2121, F99_S0(f0: 4484545590050696958, f1: 422528630, f2: 1418346646, f3: 1281567856), 223, 1917656, 103, -46, F99_S1(f0: 14554), F99_S2(f0: 68))
+}
+
diff --git a/src/tests/Interop/Swift/SwiftErrorHandling/SwiftErrorHandling.cs b/src/tests/Interop/Swift/SwiftErrorHandling/SwiftErrorHandling.cs
index 67a398d357e1..b1575e04deab 100644
--- a/src/tests/Interop/Swift/SwiftErrorHandling/SwiftErrorHandling.cs
+++ b/src/tests/Interop/Swift/SwiftErrorHandling/SwiftErrorHandling.cs
@@ -23,9 +23,46 @@ public class ErrorHandlingTests
     [DllImport(SwiftLib, EntryPoint = "$s18SwiftErrorHandling018conditionallyThrowB004willE0s5Int32VAE_tKF")]
     public static extern nint conditionallyThrowErrorOnStack(int willThrow, int dummy1, int dummy2, int dummy3, int dummy4, int dummy5, int dummy6, int dummy7, int dummy8, int dummy9, ref SwiftError error);
 
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s18SwiftErrorHandling26nativeFunctionWithCallback03setB0_ys5Int32V_yAEXEtF")]
+    public static extern unsafe void NativeFunctionWithCallback(int setError, delegate* unmanaged[Swift]<SwiftError*, int, void> callback, SwiftError* error);
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s18SwiftErrorHandling26nativeFunctionWithCallback5value03setB0_s5Int32VAF_A3F_AFtXEtF")]
+    public static extern unsafe int NativeFunctionWithCallback(int value, int setError, delegate* unmanaged[Swift]<SwiftError*, int, int, int> callback, SwiftError* error);
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static unsafe void ConditionallySetErrorTo21(SwiftError* error, int setError) {
+        if (setError != 0)
+        {
+            *error = new SwiftError((void*)21);
+        }
+        else
+        {
+            *error = new SwiftError(null);
+        }
+    }
+
+    [UnmanagedCallersOnly(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    private static unsafe int ConditionallySetErrorAndReturn(SwiftError* error, int value, int setError) {
+        if (setError != 0)
+        {
+            *error = new SwiftError((void*)value);
+        }
+        else
+        {
+            *error = new SwiftError(null);
+        }
+
+        return (value * 2);
+    }
+
     [DllImport(SwiftLib, EntryPoint = "$s18SwiftErrorHandling05getMyB7Message4from13messageLengthSPys6UInt16VGSgs0B0_p_s5Int32VztF")]
     public unsafe static extern void* GetErrorMessage(void* handle, out int length);
 
+    [DllImport(SwiftLib, EntryPoint = "$s18SwiftErrorHandling16freeStringBuffer6bufferySpys6UInt16VG_tF")]
+    public unsafe static extern void FreeErrorMessageBuffer(void* stringPtr);
+
     [Fact]
     public unsafe static void TestSwiftErrorThrown()
     {
@@ -89,6 +126,40 @@ public unsafe static void TestSwiftErrorOnStackNotThrown()
         Assert.True(error.Value == null, "No Swift error was expected to be thrown.");
         Assert.True(result == 42, "The result from Swift does not match the expected value.");
     }
+
+    [Fact]
+    [SkipOnMono("needs reverse P/Invoke support")]
+    public static unsafe void TestUnmanagedCallersOnly()
+    {
+        SwiftError error;
+        int expectedValue = 21;
+        NativeFunctionWithCallback(1, &ConditionallySetErrorTo21, &error);
+
+        int value = (int)error.Value;
+        Assert.True(value == expectedValue, string.Format("The value retrieved does not match the expected value. Expected: {0}, Actual: {1}", expectedValue, value));
+
+        NativeFunctionWithCallback(0, &ConditionallySetErrorTo21, &error);
+
+        Assert.True(error.Value == null, "Expected SwiftError value to be null.");
+    }
+
+    [Fact]
+    [SkipOnMono("needs reverse P/Invoke support")]
+    public static unsafe void TestUnmanagedCallersOnlyWithReturn()
+    {
+        SwiftError error;
+        int expectedValue = 42;
+        int retValue = NativeFunctionWithCallback(expectedValue, 1, &ConditionallySetErrorAndReturn, &error);
+
+        int value = (int)error.Value;
+        Assert.True(value == expectedValue, string.Format("The value retrieved does not match the expected value. Expected: {0}, Actual: {1}", expectedValue, value));
+        Assert.True(retValue == (expectedValue * 2), string.Format("Return value does not match expected value. Expected: {0}, Actual: {1}", (expectedValue * 2), retValue));
+
+        retValue = NativeFunctionWithCallback(expectedValue, 0, &ConditionallySetErrorAndReturn, &error);
+
+        Assert.True(error.Value == null, "Expected SwiftError value to be null.");
+        Assert.True(retValue == (expectedValue * 2), string.Format("Return value does not match expected value. Expected: {0}, Actual: {1}", (expectedValue * 2), retValue));
+    }
     
     private static void SetErrorMessageForSwift(string message)
     {
@@ -99,7 +170,7 @@ private unsafe static string GetErrorMessageFromSwift(SwiftError error)
     {
         void* pointer = GetErrorMessage(error.Value, out int messageLength);
         string errorMessage = Marshal.PtrToStringUni((IntPtr)pointer, messageLength);
-        NativeMemory.Free((void*)pointer);
+        FreeErrorMessageBuffer(pointer);
         return errorMessage;
     }
 }
diff --git a/src/tests/Interop/Swift/SwiftErrorHandling/SwiftErrorHandling.csproj b/src/tests/Interop/Swift/SwiftErrorHandling/SwiftErrorHandling.csproj
index a57cd84cf884..89eda99352fd 100644
--- a/src/tests/Interop/Swift/SwiftErrorHandling/SwiftErrorHandling.csproj
+++ b/src/tests/Interop/Swift/SwiftErrorHandling/SwiftErrorHandling.csproj
@@ -3,7 +3,7 @@
     <!-- Needed for CLRTestTargetUnsupported, CMakeProjectReference -->
     <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/93631 -->
+    <!-- Swift interop is supported on Apple platforms only -->
     <CLRTestTargetUnsupported Condition="'$(TargetsOSX)' != 'true' and '$(TargetsAppleMobile)' != 'true'">true</CLRTestTargetUnsupported>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Interop/Swift/SwiftErrorHandling/SwiftErrorHandling.swift b/src/tests/Interop/Swift/SwiftErrorHandling/SwiftErrorHandling.swift
index 20022c0dba3e..9067ea2372db 100644
--- a/src/tests/Interop/Swift/SwiftErrorHandling/SwiftErrorHandling.swift
+++ b/src/tests/Interop/Swift/SwiftErrorHandling/SwiftErrorHandling.swift
@@ -33,3 +33,15 @@ public func getMyErrorMessage(from error: Error, messageLength: inout Int32) ->
     }
     return nil
 }
+
+public func freeStringBuffer(buffer: UnsafeMutablePointer<unichar>) {
+    buffer.deallocate()
+}
+
+public func nativeFunctionWithCallback(setError: Int32, _ callback: (Int32) -> Void) {
+    callback(setError)
+}
+
+public func nativeFunctionWithCallback(value: Int32, setError: Int32, _ callback: (Int32, Int32) -> Int32) -> Int32 {
+    return callback(value, setError)
+}
diff --git a/src/tests/Interop/Swift/SwiftInvalidCallConv/SwiftInvalidCallConv.cs b/src/tests/Interop/Swift/SwiftInvalidCallConv/SwiftInvalidCallConv.cs
index 41c98e3791f9..9e4bc140829f 100644
--- a/src/tests/Interop/Swift/SwiftInvalidCallConv/SwiftInvalidCallConv.cs
+++ b/src/tests/Interop/Swift/SwiftInvalidCallConv/SwiftInvalidCallConv.cs
@@ -5,6 +5,7 @@
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.InteropServices.Swift;
+using System.Numerics;
 using Xunit;
 
 public class InvalidCallingConvTests
@@ -36,6 +37,10 @@ public class StringClass
     [DllImport(SwiftLib, EntryPoint = "$s20SwiftInvalidCallConv10simpleFuncyyF")]
     public static extern void FuncWithNonPrimitiveArg(StringClass arg1);
 
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s20SwiftInvalidCallConv10simpleFuncyyF")]
+    public static extern void FuncWithSIMDArg(Vector4 vec);
+
     [Fact]
     public static void TestFuncWithTwoSelfParameters()
     {
@@ -77,4 +82,12 @@ public static void TestFuncWithNonPrimitiveArg()
         arg1.value = "fail";
         Assert.Throws<InvalidProgramException>(() => FuncWithNonPrimitiveArg(arg1));
     }
+
+    [Fact]
+    public static void TestFuncWithSIMDArg()
+    {
+        // Invalid due to a SIMD argument.
+        Vector4 vec = new Vector4(); // Using Vector4 as it is a SIMD type across all architectures for Mono
+        Assert.Throws<InvalidProgramException>(() => FuncWithSIMDArg(vec));
+    }
 }
diff --git a/src/tests/Interop/Swift/SwiftInvalidCallConv/SwiftInvalidCallConv.csproj b/src/tests/Interop/Swift/SwiftInvalidCallConv/SwiftInvalidCallConv.csproj
index a57cd84cf884..49be10b93939 100644
--- a/src/tests/Interop/Swift/SwiftInvalidCallConv/SwiftInvalidCallConv.csproj
+++ b/src/tests/Interop/Swift/SwiftInvalidCallConv/SwiftInvalidCallConv.csproj
@@ -3,8 +3,10 @@
     <!-- Needed for CLRTestTargetUnsupported, CMakeProjectReference -->
     <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/93631 -->
+    <!-- Swift interop is supported on Apple platforms only -->
     <CLRTestTargetUnsupported Condition="'$(TargetsOSX)' != 'true' and '$(TargetsAppleMobile)' != 'true'">true</CLRTestTargetUnsupported>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/93631 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' != 'mono'">true</CLRTestTargetUnsupported>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="$(MSBuildProjectName).cs" />
diff --git a/src/tests/Interop/Swift/SwiftRetAbiStress/CMakeLists.txt b/src/tests/Interop/Swift/SwiftRetAbiStress/CMakeLists.txt
new file mode 100644
index 000000000000..9f0e2a3423b2
--- /dev/null
+++ b/src/tests/Interop/Swift/SwiftRetAbiStress/CMakeLists.txt
@@ -0,0 +1,21 @@
+project(SwiftRetAbiStress)
+include ("${CLR_INTEROP_TEST_ROOT}/Interop.cmake")
+
+set(SOURCE SwiftRetAbiStress)
+
+if (NOT SWIFT_COMPILER_TARGET AND CLR_CMAKE_TARGET_OSX)
+    set(SWIFT_PLATFORM "macosx")
+    set(SWIFT_PLATFORM_SUFFIX "")
+    set(SWIFT_DEPLOYMENT_TARGET ${CMAKE_OSX_DEPLOYMENT_TARGET})
+    set(SWIFT_COMPILER_TARGET "${CMAKE_OSX_ARCHITECTURES}-apple-${SWIFT_PLATFORM}${SWIFT_DEPLOYMENT_TARGET}${SWIFT_PLATFORM_SUFFIX}")
+endif()
+
+add_custom_target(${SOURCE} ALL
+    COMMAND xcrun swiftc -target ${SWIFT_COMPILER_TARGET} -emit-library ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}.swift -o ${CMAKE_CURRENT_BINARY_DIR}/lib${SOURCE}.dylib
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}.swift
+    COMMENT "Generating ${SOURCE} library"
+)
+
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/lib${SOURCE}.dylib
+    DESTINATION bin
+)
diff --git a/src/tests/Interop/Swift/SwiftRetAbiStress/SwiftRetAbiStress.cs b/src/tests/Interop/Swift/SwiftRetAbiStress/SwiftRetAbiStress.cs
new file mode 100644
index 000000000000..f3bdd60660b7
--- /dev/null
+++ b/src/tests/Interop/Swift/SwiftRetAbiStress/SwiftRetAbiStress.cs
@@ -0,0 +1,3919 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.InteropServices.Swift;
+using Xunit;
+
+public class SwiftRetAbiStress
+{
+    private const string SwiftLib = "libSwiftRetAbiStress.dylib";
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S0
+    {
+        public short F0;
+        public int F1;
+        public ulong F2;
+
+        public S0(short f0, int f1, ulong f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB5Func0AA2S0VyF")]
+    private static extern S0 SwiftRetFunc0();
+
+    [Fact]
+    public static void TestSwiftRetFunc0()
+    {
+        Console.Write("Running SwiftRetFunc0: ");
+        S0 val = SwiftRetFunc0();
+        Assert.Equal((short)-17813, val.F0);
+        Assert.Equal((int)318006528, val.F1);
+        Assert.Equal((ulong)1195162122024233590, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct S1
+    {
+        public short F0;
+        public float F1;
+        public long F2;
+        public uint F3;
+
+        public S1(short f0, float f1, long f2, uint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB5Func1AA2S1VyF")]
+    private static extern S1 SwiftRetFunc1();
+
+    [Fact]
+    public static void TestSwiftRetFunc1()
+    {
+        Console.Write("Running SwiftRetFunc1: ");
+        S1 val = SwiftRetFunc1();
+        Assert.Equal((short)-29793, val.F0);
+        Assert.Equal((float)7351779, val.F1);
+        Assert.Equal((long)133491708229548754, val.F2);
+        Assert.Equal((uint)665726990, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S2_S0
+    {
+        public ulong F0;
+
+        public S2_S0(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct S2
+    {
+        public S2_S0 F0;
+        public byte F1;
+        public ushort F2;
+        public float F3;
+        public int F4;
+
+        public S2(S2_S0 f0, byte f1, ushort f2, float f3, int f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB5Func2AA2S2VyF")]
+    private static extern S2 SwiftRetFunc2();
+
+    [Fact]
+    public static void TestSwiftRetFunc2()
+    {
+        Console.Write("Running SwiftRetFunc2: ");
+        S2 val = SwiftRetFunc2();
+        Assert.Equal((ulong)2153637757371267722, val.F0.F0);
+        Assert.Equal((byte)150, val.F1);
+        Assert.Equal((ushort)48920, val.F2);
+        Assert.Equal((float)3564327, val.F3);
+        Assert.Equal((int)1310569731, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct S3
+    {
+        public long F0;
+        public double F1;
+        public sbyte F2;
+        public int F3;
+        public ushort F4;
+        public byte F5;
+        public double F6;
+
+        public S3(long f0, double f1, sbyte f2, int f3, ushort f4, byte f5, double f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB5Func3AA2S3VyF")]
+    private static extern S3 SwiftRetFunc3();
+
+    [Fact]
+    public static void TestSwiftRetFunc3()
+    {
+        Console.Write("Running SwiftRetFunc3: ");
+        S3 val = SwiftRetFunc3();
+        Assert.Equal((long)5610153900386943274, val.F0);
+        Assert.Equal((double)2431035148834736, val.F1);
+        Assert.Equal((sbyte)111, val.F2);
+        Assert.Equal((int)772269424, val.F3);
+        Assert.Equal((ushort)19240, val.F4);
+        Assert.Equal((byte)146, val.F5);
+        Assert.Equal((double)821805530740405, val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct S4
+    {
+        public sbyte F0;
+        public uint F1;
+        public ulong F2;
+        public long F3;
+
+        public S4(sbyte f0, uint f1, ulong f2, long f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB5Func4AA2S4VyF")]
+    private static extern S4 SwiftRetFunc4();
+
+    [Fact]
+    public static void TestSwiftRetFunc4()
+    {
+        Console.Write("Running SwiftRetFunc4: ");
+        S4 val = SwiftRetFunc4();
+        Assert.Equal((sbyte)125, val.F0);
+        Assert.Equal((uint)377073381, val.F1);
+        Assert.Equal((ulong)964784376430620335, val.F2);
+        Assert.Equal((long)5588038704850976624, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S5_S0
+    {
+        public uint F0;
+        public double F1;
+
+        public S5_S0(uint f0, double f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 49)]
+    struct S5
+    {
+        public ulong F0;
+        public sbyte F1;
+        public nuint F2;
+        public S5_S0 F3;
+        public nint F4;
+        public byte F5;
+
+        public S5(ulong f0, sbyte f1, nuint f2, S5_S0 f3, nint f4, byte f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB5Func5AA2S5VyF")]
+    private static extern S5 SwiftRetFunc5();
+
+    [Fact]
+    public static void TestSwiftRetFunc5()
+    {
+        Console.Write("Running SwiftRetFunc5: ");
+        S5 val = SwiftRetFunc5();
+        Assert.Equal((ulong)5315019731968023493, val.F0);
+        Assert.Equal((sbyte)114, val.F1);
+        Assert.Equal((nuint)unchecked((nuint)1154655179105889397), val.F2);
+        Assert.Equal((uint)1468030771, val.F3.F0);
+        Assert.Equal((double)3066473182924818, val.F3.F1);
+        Assert.Equal((nint)unchecked((nint)6252650621827449809), val.F4);
+        Assert.Equal((byte)129, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct S6
+    {
+        public int F0;
+        public short F1;
+        public long F2;
+        public ushort F3;
+
+        public S6(int f0, short f1, long f2, ushort f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB5Func6AA2S6VyF")]
+    private static extern S6 SwiftRetFunc6();
+
+    [Fact]
+    public static void TestSwiftRetFunc6()
+    {
+        Console.Write("Running SwiftRetFunc6: ");
+        S6 val = SwiftRetFunc6();
+        Assert.Equal((int)743741783, val.F0);
+        Assert.Equal((short)-6821, val.F1);
+        Assert.Equal((long)5908745692727636656, val.F2);
+        Assert.Equal((ushort)64295, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S7_S0
+    {
+        public nint F0;
+
+        public S7_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S7
+    {
+        public S7_S0 F0;
+
+        public S7(S7_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB5Func7AA2S7VyF")]
+    private static extern S7 SwiftRetFunc7();
+
+    [Fact]
+    public static void TestSwiftRetFunc7()
+    {
+        Console.Write("Running SwiftRetFunc7: ");
+        S7 val = SwiftRetFunc7();
+        Assert.Equal((nint)unchecked((nint)7625368278886567558), val.F0.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S8
+    {
+        public nint F0;
+
+        public S8(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB5Func8AA2S8VyF")]
+    private static extern S8 SwiftRetFunc8();
+
+    [Fact]
+    public static void TestSwiftRetFunc8()
+    {
+        Console.Write("Running SwiftRetFunc8: ");
+        S8 val = SwiftRetFunc8();
+        Assert.Equal((nint)unchecked((nint)775279004683334365), val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S9_S0
+    {
+        public short F0;
+        public int F1;
+
+        public S9_S0(short f0, int f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 26)]
+    struct S9
+    {
+        public uint F0;
+        public nint F1;
+        public S9_S0 F2;
+        public ushort F3;
+
+        public S9(uint f0, nint f1, S9_S0 f2, ushort f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB5Func9AA2S9VyF")]
+    private static extern S9 SwiftRetFunc9();
+
+    [Fact]
+    public static void TestSwiftRetFunc9()
+    {
+        Console.Write("Running SwiftRetFunc9: ");
+        S9 val = SwiftRetFunc9();
+        Assert.Equal((uint)1223030410, val.F0);
+        Assert.Equal((nint)unchecked((nint)4720638462358523954), val.F1);
+        Assert.Equal((short)30631, val.F2.F0);
+        Assert.Equal((int)1033774469, val.F2.F1);
+        Assert.Equal((ushort)64474, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S10
+    {
+        public float F0;
+        public float F1;
+
+        public S10(float f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func10AA3S10VyF")]
+    private static extern S10 SwiftRetFunc10();
+
+    [Fact]
+    public static void TestSwiftRetFunc10()
+    {
+        Console.Write("Running SwiftRetFunc10: ");
+        S10 val = SwiftRetFunc10();
+        Assert.Equal((float)3276917, val.F0);
+        Assert.Equal((float)6694615, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 21)]
+    struct S11
+    {
+        public double F0;
+        public nint F1;
+        public uint F2;
+        public sbyte F3;
+
+        public S11(double f0, nint f1, uint f2, sbyte f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func11AA3S11VyF")]
+    private static extern S11 SwiftRetFunc11();
+
+    [Fact]
+    public static void TestSwiftRetFunc11()
+    {
+        Console.Write("Running SwiftRetFunc11: ");
+        S11 val = SwiftRetFunc11();
+        Assert.Equal((double)938206348036312, val.F0);
+        Assert.Equal((nint)unchecked((nint)6559514243876905696), val.F1);
+        Assert.Equal((uint)1357772248, val.F2);
+        Assert.Equal((sbyte)59, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S12
+    {
+        public double F0;
+
+        public S12(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func12AA3S12VyF")]
+    private static extern S12 SwiftRetFunc12();
+
+    [Fact]
+    public static void TestSwiftRetFunc12()
+    {
+        Console.Write("Running SwiftRetFunc12: ");
+        S12 val = SwiftRetFunc12();
+        Assert.Equal((double)1580503485222363, val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct S13
+    {
+        public uint F0;
+
+        public S13(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func13AA3S13VyF")]
+    private static extern S13 SwiftRetFunc13();
+
+    [Fact]
+    public static void TestSwiftRetFunc13()
+    {
+        Console.Write("Running SwiftRetFunc13: ");
+        S13 val = SwiftRetFunc13();
+        Assert.Equal((uint)1381551558, val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct S14_S0_S0
+    {
+        public sbyte F0;
+
+        public S14_S0_S0(sbyte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct S14_S0
+    {
+        public S14_S0_S0 F0;
+
+        public S14_S0(S14_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 26)]
+    struct S14
+    {
+        public int F0;
+        public ushort F1;
+        public sbyte F2;
+        public float F3;
+        public ulong F4;
+        public S14_S0 F5;
+        public sbyte F6;
+
+        public S14(int f0, ushort f1, sbyte f2, float f3, ulong f4, S14_S0 f5, sbyte f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func14AA3S14VyF")]
+    private static extern S14 SwiftRetFunc14();
+
+    [Fact]
+    public static void TestSwiftRetFunc14()
+    {
+        Console.Write("Running SwiftRetFunc14: ");
+        S14 val = SwiftRetFunc14();
+        Assert.Equal((int)1765691191, val.F0);
+        Assert.Equal((ushort)56629, val.F1);
+        Assert.Equal((sbyte)25, val.F2);
+        Assert.Equal((float)2944946, val.F3);
+        Assert.Equal((ulong)951929105049584033, val.F4);
+        Assert.Equal((sbyte)-30, val.F5.F0.F0);
+        Assert.Equal((sbyte)66, val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct S15_S0
+    {
+        public nuint F0;
+        public float F1;
+
+        public S15_S0(nuint f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct S15
+    {
+        public nint F0;
+        public S15_S0 F1;
+        public ushort F2;
+        public int F3;
+
+        public S15(nint f0, S15_S0 f1, ushort f2, int f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func15AA3S15VyF")]
+    private static extern S15 SwiftRetFunc15();
+
+    [Fact]
+    public static void TestSwiftRetFunc15()
+    {
+        Console.Write("Running SwiftRetFunc15: ");
+        S15 val = SwiftRetFunc15();
+        Assert.Equal((nint)unchecked((nint)2090703541638269172), val.F0);
+        Assert.Equal((nuint)unchecked((nuint)6408314016925514463), val.F1.F0);
+        Assert.Equal((float)6534515, val.F1.F1);
+        Assert.Equal((ushort)30438, val.F2);
+        Assert.Equal((int)1745811802, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 33)]
+    struct S16
+    {
+        public uint F0;
+        public ulong F1;
+        public byte F2;
+        public int F3;
+        public nuint F4;
+        public sbyte F5;
+
+        public S16(uint f0, ulong f1, byte f2, int f3, nuint f4, sbyte f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func16AA3S16VyF")]
+    private static extern S16 SwiftRetFunc16();
+
+    [Fact]
+    public static void TestSwiftRetFunc16()
+    {
+        Console.Write("Running SwiftRetFunc16: ");
+        S16 val = SwiftRetFunc16();
+        Assert.Equal((uint)585220635, val.F0);
+        Assert.Equal((ulong)4034210936973794153, val.F1);
+        Assert.Equal((byte)48, val.F2);
+        Assert.Equal((int)1155081155, val.F3);
+        Assert.Equal((nuint)unchecked((nuint)806384837403045657), val.F4);
+        Assert.Equal((sbyte)54, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 3)]
+    struct S17
+    {
+        public byte F0;
+        public sbyte F1;
+        public byte F2;
+
+        public S17(byte f0, sbyte f1, byte f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func17AA3S17VyF")]
+    private static extern S17 SwiftRetFunc17();
+
+    [Fact]
+    public static void TestSwiftRetFunc17()
+    {
+        Console.Write("Running SwiftRetFunc17: ");
+        S17 val = SwiftRetFunc17();
+        Assert.Equal((byte)23, val.F0);
+        Assert.Equal((sbyte)112, val.F1);
+        Assert.Equal((byte)15, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S18_S0
+    {
+        public uint F0;
+        public float F1;
+
+        public S18_S0(uint f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct S18
+    {
+        public S18_S0 F0;
+        public nint F1;
+        public int F2;
+        public ushort F3;
+        public short F4;
+
+        public S18(S18_S0 f0, nint f1, int f2, ushort f3, short f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func18AA3S18VyF")]
+    private static extern S18 SwiftRetFunc18();
+
+    [Fact]
+    public static void TestSwiftRetFunc18()
+    {
+        Console.Write("Running SwiftRetFunc18: ");
+        S18 val = SwiftRetFunc18();
+        Assert.Equal((uint)1964425016, val.F0.F0);
+        Assert.Equal((float)2767295, val.F0.F1);
+        Assert.Equal((nint)unchecked((nint)6016563774923595868), val.F1);
+        Assert.Equal((int)1648562735, val.F2);
+        Assert.Equal((ushort)378, val.F3);
+        Assert.Equal((short)-20536, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct S19
+    {
+        public byte F0;
+        public ushort F1;
+        public float F2;
+        public ulong F3;
+        public int F4;
+
+        public S19(byte f0, ushort f1, float f2, ulong f3, int f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func19AA3S19VyF")]
+    private static extern S19 SwiftRetFunc19();
+
+    [Fact]
+    public static void TestSwiftRetFunc19()
+    {
+        Console.Write("Running SwiftRetFunc19: ");
+        S19 val = SwiftRetFunc19();
+        Assert.Equal((byte)188, val.F0);
+        Assert.Equal((ushort)47167, val.F1);
+        Assert.Equal((float)6781297, val.F2);
+        Assert.Equal((ulong)8140268502944465472, val.F3);
+        Assert.Equal((int)708690468, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S20_S0
+    {
+        public uint F0;
+        public float F1;
+
+        public S20_S0(uint f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct S20
+    {
+        public S20_S0 F0;
+        public byte F1;
+
+        public S20(S20_S0 f0, byte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func20AA3S20VyF")]
+    private static extern S20 SwiftRetFunc20();
+
+    [Fact]
+    public static void TestSwiftRetFunc20()
+    {
+        Console.Write("Running SwiftRetFunc20: ");
+        S20 val = SwiftRetFunc20();
+        Assert.Equal((uint)2019361333, val.F0.F0);
+        Assert.Equal((float)938975, val.F0.F1);
+        Assert.Equal((byte)192, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct S21_S0_S0
+    {
+        public ushort F0;
+
+        public S21_S0_S0(ushort f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct S21_S0
+    {
+        public S21_S0_S0 F0;
+
+        public S21_S0(S21_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 42)]
+    struct S21
+    {
+        public double F0;
+        public double F1;
+        public nuint F2;
+        public nint F3;
+        public ulong F4;
+        public S21_S0 F5;
+
+        public S21(double f0, double f1, nuint f2, nint f3, ulong f4, S21_S0 f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func21AA3S21VyF")]
+    private static extern S21 SwiftRetFunc21();
+
+    [Fact]
+    public static void TestSwiftRetFunc21()
+    {
+        Console.Write("Running SwiftRetFunc21: ");
+        S21 val = SwiftRetFunc21();
+        Assert.Equal((double)1693878073402490, val.F0);
+        Assert.Equal((double)3392111340517811, val.F1);
+        Assert.Equal((nuint)unchecked((nuint)3584917502172813732), val.F2);
+        Assert.Equal((nint)unchecked((nint)665495086154608745), val.F3);
+        Assert.Equal((ulong)2918107814961929578, val.F4);
+        Assert.Equal((ushort)4634, val.F5.F0.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct S22
+    {
+        public uint F0;
+
+        public S22(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func22AA3S22VyF")]
+    private static extern S22 SwiftRetFunc22();
+
+    [Fact]
+    public static void TestSwiftRetFunc22()
+    {
+        Console.Write("Running SwiftRetFunc22: ");
+        S22 val = SwiftRetFunc22();
+        Assert.Equal((uint)640156952, val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 41)]
+    struct S23
+    {
+        public byte F0;
+        public short F1;
+        public ulong F2;
+        public nuint F3;
+        public nuint F4;
+        public ulong F5;
+        public byte F6;
+
+        public S23(byte f0, short f1, ulong f2, nuint f3, nuint f4, ulong f5, byte f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func23AA3S23VyF")]
+    private static extern S23 SwiftRetFunc23();
+
+    [Fact]
+    public static void TestSwiftRetFunc23()
+    {
+        Console.Write("Running SwiftRetFunc23: ");
+        S23 val = SwiftRetFunc23();
+        Assert.Equal((byte)122, val.F0);
+        Assert.Equal((short)28995, val.F1);
+        Assert.Equal((ulong)25673626033589541, val.F2);
+        Assert.Equal((nuint)unchecked((nuint)828363978755325884), val.F3);
+        Assert.Equal((nuint)unchecked((nuint)3065573182429720699), val.F4);
+        Assert.Equal((ulong)1484484917001276079, val.F5);
+        Assert.Equal((byte)209, val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S24
+    {
+        public ulong F0;
+        public ulong F1;
+
+        public S24(ulong f0, ulong f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func24AA3S24VyF")]
+    private static extern S24 SwiftRetFunc24();
+
+    [Fact]
+    public static void TestSwiftRetFunc24()
+    {
+        Console.Write("Running SwiftRetFunc24: ");
+        S24 val = SwiftRetFunc24();
+        Assert.Equal((ulong)2621245238416080387, val.F0);
+        Assert.Equal((ulong)6541787564638363256, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S25_S0
+    {
+        public nint F0;
+
+        public S25_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct S25
+    {
+        public sbyte F0;
+        public sbyte F1;
+        public byte F2;
+        public S25_S0 F3;
+        public uint F4;
+
+        public S25(sbyte f0, sbyte f1, byte f2, S25_S0 f3, uint f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func25AA3S25VyF")]
+    private static extern S25 SwiftRetFunc25();
+
+    [Fact]
+    public static void TestSwiftRetFunc25()
+    {
+        Console.Write("Running SwiftRetFunc25: ");
+        S25 val = SwiftRetFunc25();
+        Assert.Equal((sbyte)30, val.F0);
+        Assert.Equal((sbyte)-8, val.F1);
+        Assert.Equal((byte)168, val.F2);
+        Assert.Equal((nint)unchecked((nint)7601538494489501573), val.F3.F0);
+        Assert.Equal((uint)814523741, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct S26
+    {
+        public float F0;
+
+        public S26(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func26AA3S26VyF")]
+    private static extern S26 SwiftRetFunc26();
+
+    [Fact]
+    public static void TestSwiftRetFunc26()
+    {
+        Console.Write("Running SwiftRetFunc26: ");
+        S26 val = SwiftRetFunc26();
+        Assert.Equal((float)3681545, val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 48)]
+    struct S27
+    {
+        public long F0;
+        public double F1;
+        public sbyte F2;
+        public nint F3;
+        public short F4;
+        public long F5;
+
+        public S27(long f0, double f1, sbyte f2, nint f3, short f4, long f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func27AA3S27VyF")]
+    private static extern S27 SwiftRetFunc27();
+
+    [Fact]
+    public static void TestSwiftRetFunc27()
+    {
+        Console.Write("Running SwiftRetFunc27: ");
+        S27 val = SwiftRetFunc27();
+        Assert.Equal((long)4847421047018330189, val.F0);
+        Assert.Equal((double)3655171692392280, val.F1);
+        Assert.Equal((sbyte)46, val.F2);
+        Assert.Equal((nint)unchecked((nint)4476120319602257660), val.F3);
+        Assert.Equal((short)-6106, val.F4);
+        Assert.Equal((long)5756567968111212829, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S28_S0
+    {
+        public double F0;
+
+        public S28_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S28
+    {
+        public float F0;
+        public short F1;
+        public S28_S0 F2;
+        public double F3;
+        public ulong F4;
+
+        public S28(float f0, short f1, S28_S0 f2, double f3, ulong f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func28AA3S28VyF")]
+    private static extern S28 SwiftRetFunc28();
+
+    [Fact]
+    public static void TestSwiftRetFunc28()
+    {
+        Console.Write("Running SwiftRetFunc28: ");
+        S28 val = SwiftRetFunc28();
+        Assert.Equal((float)3491512, val.F0);
+        Assert.Equal((short)5249, val.F1);
+        Assert.Equal((double)1107064327388314, val.F2.F0);
+        Assert.Equal((double)2170381648425673, val.F3);
+        Assert.Equal((ulong)5138313315157580943, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 34)]
+    struct S29
+    {
+        public ushort F0;
+        public uint F1;
+        public short F2;
+        public int F3;
+        public int F4;
+        public ulong F5;
+        public short F6;
+
+        public S29(ushort f0, uint f1, short f2, int f3, int f4, ulong f5, short f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func29AA3S29VyF")]
+    private static extern S29 SwiftRetFunc29();
+
+    [Fact]
+    public static void TestSwiftRetFunc29()
+    {
+        Console.Write("Running SwiftRetFunc29: ");
+        S29 val = SwiftRetFunc29();
+        Assert.Equal((ushort)39000, val.F0);
+        Assert.Equal((uint)408611655, val.F1);
+        Assert.Equal((short)18090, val.F2);
+        Assert.Equal((int)351857085, val.F3);
+        Assert.Equal((int)1103441843, val.F4);
+        Assert.Equal((ulong)5162040247631126074, val.F5);
+        Assert.Equal((short)-27930, val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S30_S0
+    {
+        public sbyte F0;
+        public sbyte F1;
+        public int F2;
+
+        public S30_S0(sbyte f0, sbyte f1, int f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct S30_S1
+    {
+        public float F0;
+
+        public S30_S1(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct S30
+    {
+        public float F0;
+        public S30_S0 F1;
+        public S30_S1 F2;
+        public long F3;
+
+        public S30(float f0, S30_S0 f1, S30_S1 f2, long f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func30AA3S30VyF")]
+    private static extern S30 SwiftRetFunc30();
+
+    [Fact]
+    public static void TestSwiftRetFunc30()
+    {
+        Console.Write("Running SwiftRetFunc30: ");
+        S30 val = SwiftRetFunc30();
+        Assert.Equal((float)6492602, val.F0);
+        Assert.Equal((sbyte)76, val.F1.F0);
+        Assert.Equal((sbyte)-26, val.F1.F1);
+        Assert.Equal((int)1777644423, val.F1.F2);
+        Assert.Equal((float)6558571, val.F2.F0);
+        Assert.Equal((long)5879147675377398012, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 21)]
+    struct S31
+    {
+        public long F0;
+        public ulong F1;
+        public ushort F2;
+        public ushort F3;
+        public sbyte F4;
+
+        public S31(long f0, ulong f1, ushort f2, ushort f3, sbyte f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func31AA3S31VyF")]
+    private static extern S31 SwiftRetFunc31();
+
+    [Fact]
+    public static void TestSwiftRetFunc31()
+    {
+        Console.Write("Running SwiftRetFunc31: ");
+        S31 val = SwiftRetFunc31();
+        Assert.Equal((long)4699402628739628277, val.F0);
+        Assert.Equal((ulong)7062790893852687562, val.F1);
+        Assert.Equal((ushort)28087, val.F2);
+        Assert.Equal((ushort)11088, val.F3);
+        Assert.Equal((sbyte)69, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S32
+    {
+        public int F0;
+        public ulong F1;
+        public ulong F2;
+        public uint F3;
+        public short F4;
+        public ushort F5;
+
+        public S32(int f0, ulong f1, ulong f2, uint f3, short f4, ushort f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func32AA3S32VyF")]
+    private static extern S32 SwiftRetFunc32();
+
+    [Fact]
+    public static void TestSwiftRetFunc32()
+    {
+        Console.Write("Running SwiftRetFunc32: ");
+        S32 val = SwiftRetFunc32();
+        Assert.Equal((int)688805466, val.F0);
+        Assert.Equal((ulong)8860655326984381661, val.F1);
+        Assert.Equal((ulong)6943423675662271404, val.F2);
+        Assert.Equal((uint)196368476, val.F3);
+        Assert.Equal((short)14229, val.F4);
+        Assert.Equal((ushort)34635, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct S33
+    {
+        public ushort F0;
+        public uint F1;
+        public int F2;
+        public ushort F3;
+        public float F4;
+        public ulong F5;
+        public nint F6;
+
+        public S33(ushort f0, uint f1, int f2, ushort f3, float f4, ulong f5, nint f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func33AA3S33VyF")]
+    private static extern S33 SwiftRetFunc33();
+
+    [Fact]
+    public static void TestSwiftRetFunc33()
+    {
+        Console.Write("Running SwiftRetFunc33: ");
+        S33 val = SwiftRetFunc33();
+        Assert.Equal((ushort)9297, val.F0);
+        Assert.Equal((uint)7963252, val.F1);
+        Assert.Equal((int)556244690, val.F2);
+        Assert.Equal((ushort)19447, val.F3);
+        Assert.Equal((float)6930550, val.F4);
+        Assert.Equal((ulong)126294981263481729, val.F5);
+        Assert.Equal((nint)unchecked((nint)2540579257616511618), val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct S34
+    {
+        public long F0;
+        public uint F1;
+        public ulong F2;
+
+        public S34(long f0, uint f1, ulong f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func34AA3S34VyF")]
+    private static extern S34 SwiftRetFunc34();
+
+    [Fact]
+    public static void TestSwiftRetFunc34()
+    {
+        Console.Write("Running SwiftRetFunc34: ");
+        S34 val = SwiftRetFunc34();
+        Assert.Equal((long)5845561428743737556, val.F0);
+        Assert.Equal((uint)1358941228, val.F1);
+        Assert.Equal((ulong)3701080255861218446, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 34)]
+    struct S35
+    {
+        public float F0;
+        public float F1;
+        public long F2;
+        public byte F3;
+        public double F4;
+        public ushort F5;
+
+        public S35(float f0, float f1, long f2, byte f3, double f4, ushort f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func35AA3S35VyF")]
+    private static extern S35 SwiftRetFunc35();
+
+    [Fact]
+    public static void TestSwiftRetFunc35()
+    {
+        Console.Write("Running SwiftRetFunc35: ");
+        S35 val = SwiftRetFunc35();
+        Assert.Equal((float)5982956, val.F0);
+        Assert.Equal((float)3675164, val.F1);
+        Assert.Equal((long)229451138397478297, val.F2);
+        Assert.Equal((byte)163, val.F3);
+        Assert.Equal((double)2925293762193390, val.F4);
+        Assert.Equal((ushort)5018, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct S36
+    {
+        public int F0;
+        public long F1;
+        public ulong F2;
+
+        public S36(int f0, long f1, ulong f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func36AA3S36VyF")]
+    private static extern S36 SwiftRetFunc36();
+
+    [Fact]
+    public static void TestSwiftRetFunc36()
+    {
+        Console.Write("Running SwiftRetFunc36: ");
+        S36 val = SwiftRetFunc36();
+        Assert.Equal((int)1915776502, val.F0);
+        Assert.Equal((long)2197655909333830531, val.F1);
+        Assert.Equal((ulong)6072941592567177049, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S37
+    {
+        public byte F0;
+        public double F1;
+
+        public S37(byte f0, double f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func37AA3S37VyF")]
+    private static extern S37 SwiftRetFunc37();
+
+    [Fact]
+    public static void TestSwiftRetFunc37()
+    {
+        Console.Write("Running SwiftRetFunc37: ");
+        S37 val = SwiftRetFunc37();
+        Assert.Equal((byte)18, val.F0);
+        Assert.Equal((double)4063164371882658, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S38
+    {
+        public nuint F0;
+        public long F1;
+        public byte F2;
+        public nuint F3;
+
+        public S38(nuint f0, long f1, byte f2, nuint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func38AA3S38VyF")]
+    private static extern S38 SwiftRetFunc38();
+
+    [Fact]
+    public static void TestSwiftRetFunc38()
+    {
+        Console.Write("Running SwiftRetFunc38: ");
+        S38 val = SwiftRetFunc38();
+        Assert.Equal((nuint)unchecked((nuint)7389960750529773276), val.F0);
+        Assert.Equal((long)2725802169582362061, val.F1);
+        Assert.Equal((byte)2, val.F2);
+        Assert.Equal((nuint)unchecked((nuint)3659261019360356514), val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct S39
+    {
+        public int F0;
+        public int F1;
+        public nint F2;
+        public short F3;
+        public ushort F4;
+
+        public S39(int f0, int f1, nint f2, short f3, ushort f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func39AA3S39VyF")]
+    private static extern S39 SwiftRetFunc39();
+
+    [Fact]
+    public static void TestSwiftRetFunc39()
+    {
+        Console.Write("Running SwiftRetFunc39: ");
+        S39 val = SwiftRetFunc39();
+        Assert.Equal((int)50995691, val.F0);
+        Assert.Equal((int)1623216479, val.F1);
+        Assert.Equal((nint)unchecked((nint)2906650346451599789), val.F2);
+        Assert.Equal((short)28648, val.F3);
+        Assert.Equal((ushort)8278, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct S40_S0
+    {
+        public float F0;
+        public byte F1;
+        public sbyte F2;
+        public nuint F3;
+        public double F4;
+
+        public S40_S0(float f0, byte f1, sbyte f2, nuint f3, double f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct S40
+    {
+        public S40_S0 F0;
+        public short F1;
+        public short F2;
+
+        public S40(S40_S0 f0, short f1, short f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func40AA3S40VyF")]
+    private static extern S40 SwiftRetFunc40();
+
+    [Fact]
+    public static void TestSwiftRetFunc40()
+    {
+        Console.Write("Running SwiftRetFunc40: ");
+        S40 val = SwiftRetFunc40();
+        Assert.Equal((float)7087264, val.F0.F0);
+        Assert.Equal((byte)37, val.F0.F1);
+        Assert.Equal((sbyte)-5, val.F0.F2);
+        Assert.Equal((nuint)unchecked((nuint)479915249821490487), val.F0.F3);
+        Assert.Equal((double)144033730096589, val.F0.F4);
+        Assert.Equal((short)28654, val.F1);
+        Assert.Equal((short)16398, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S41
+    {
+        public nuint F0;
+        public nuint F1;
+
+        public S41(nuint f0, nuint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func41AA3S41VyF")]
+    private static extern S41 SwiftRetFunc41();
+
+    [Fact]
+    public static void TestSwiftRetFunc41()
+    {
+        Console.Write("Running SwiftRetFunc41: ");
+        S41 val = SwiftRetFunc41();
+        Assert.Equal((nuint)unchecked((nuint)7923718819069382599), val.F0);
+        Assert.Equal((nuint)unchecked((nuint)1539666179674725957), val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct S42_S0
+    {
+        public int F0;
+
+        public S42_S0(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S42
+    {
+        public uint F0;
+        public long F1;
+        public S42_S0 F2;
+        public nuint F3;
+
+        public S42(uint f0, long f1, S42_S0 f2, nuint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func42AA3S42VyF")]
+    private static extern S42 SwiftRetFunc42();
+
+    [Fact]
+    public static void TestSwiftRetFunc42()
+    {
+        Console.Write("Running SwiftRetFunc42: ");
+        S42 val = SwiftRetFunc42();
+        Assert.Equal((uint)1046060439, val.F0);
+        Assert.Equal((long)8249831314190867613, val.F1);
+        Assert.Equal((int)1097582349, val.F2.F0);
+        Assert.Equal((nuint)unchecked((nuint)2864677262092469436), val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct S43_S0_S0
+    {
+        public float F0;
+
+        public S43_S0_S0(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct S43_S0
+    {
+        public S43_S0_S0 F0;
+
+        public S43_S0(S43_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 5)]
+    struct S43
+    {
+        public S43_S0 F0;
+        public sbyte F1;
+
+        public S43(S43_S0 f0, sbyte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func43AA3S43VyF")]
+    private static extern S43 SwiftRetFunc43();
+
+    [Fact]
+    public static void TestSwiftRetFunc43()
+    {
+        Console.Write("Running SwiftRetFunc43: ");
+        S43 val = SwiftRetFunc43();
+        Assert.Equal((float)1586338, val.F0.F0.F0);
+        Assert.Equal((sbyte)104, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct S44
+    {
+        public byte F0;
+        public int F1;
+        public nint F2;
+        public uint F3;
+
+        public S44(byte f0, int f1, nint f2, uint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func44AA3S44VyF")]
+    private static extern S44 SwiftRetFunc44();
+
+    [Fact]
+    public static void TestSwiftRetFunc44()
+    {
+        Console.Write("Running SwiftRetFunc44: ");
+        S44 val = SwiftRetFunc44();
+        Assert.Equal((byte)94, val.F0);
+        Assert.Equal((int)1109076022, val.F1);
+        Assert.Equal((nint)unchecked((nint)3135595850598607828), val.F2);
+        Assert.Equal((uint)760084013, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S45_S0
+    {
+        public long F0;
+
+        public S45_S0(long f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S45
+    {
+        public short F0;
+        public ulong F1;
+        public nint F2;
+        public S45_S0 F3;
+
+        public S45(short f0, ulong f1, nint f2, S45_S0 f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func45AA3S45VyF")]
+    private static extern S45 SwiftRetFunc45();
+
+    [Fact]
+    public static void TestSwiftRetFunc45()
+    {
+        Console.Write("Running SwiftRetFunc45: ");
+        S45 val = SwiftRetFunc45();
+        Assert.Equal((short)3071, val.F0);
+        Assert.Equal((ulong)5908138438609341766, val.F1);
+        Assert.Equal((nint)unchecked((nint)5870206722419946629), val.F2);
+        Assert.Equal((long)8128455876189744801, val.F3.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S46
+    {
+        public short F0;
+        public sbyte F1;
+        public sbyte F2;
+        public uint F3;
+        public byte F4;
+        public int F5;
+
+        public S46(short f0, sbyte f1, sbyte f2, uint f3, byte f4, int f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func46AA3S46VyF")]
+    private static extern S46 SwiftRetFunc46();
+
+    [Fact]
+    public static void TestSwiftRetFunc46()
+    {
+        Console.Write("Running SwiftRetFunc46: ");
+        S46 val = SwiftRetFunc46();
+        Assert.Equal((short)14794, val.F0);
+        Assert.Equal((sbyte)60, val.F1);
+        Assert.Equal((sbyte)-77, val.F2);
+        Assert.Equal((uint)653898879, val.F3);
+        Assert.Equal((byte)224, val.F4);
+        Assert.Equal((int)266602433, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct S47_S0
+    {
+        public sbyte F0;
+
+        public S47_S0(sbyte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 9)]
+    struct S47
+    {
+        public double F0;
+        public S47_S0 F1;
+
+        public S47(double f0, S47_S0 f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func47AA3S47VyF")]
+    private static extern S47 SwiftRetFunc47();
+
+    [Fact]
+    public static void TestSwiftRetFunc47()
+    {
+        Console.Write("Running SwiftRetFunc47: ");
+        S47 val = SwiftRetFunc47();
+        Assert.Equal((double)3195976594911793, val.F0);
+        Assert.Equal((sbyte)-91, val.F1.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S48
+    {
+        public nint F0;
+
+        public S48(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func48AA3S48VyF")]
+    private static extern S48 SwiftRetFunc48();
+
+    [Fact]
+    public static void TestSwiftRetFunc48()
+    {
+        Console.Write("Running SwiftRetFunc48: ");
+        S48 val = SwiftRetFunc48();
+        Assert.Equal((nint)unchecked((nint)778504172538154682), val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S49_S0_S0
+    {
+        public ulong F0;
+
+        public S49_S0_S0(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S49_S0
+    {
+        public S49_S0_S0 F0;
+
+        public S49_S0(S49_S0_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct S49
+    {
+        public ulong F0;
+        public S49_S0 F1;
+        public sbyte F2;
+        public double F3;
+        public uint F4;
+        public uint F5;
+
+        public S49(ulong f0, S49_S0 f1, sbyte f2, double f3, uint f4, uint f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func49AA3S49VyF")]
+    private static extern S49 SwiftRetFunc49();
+
+    [Fact]
+    public static void TestSwiftRetFunc49()
+    {
+        Console.Write("Running SwiftRetFunc49: ");
+        S49 val = SwiftRetFunc49();
+        Assert.Equal((ulong)4235011519458710874, val.F0);
+        Assert.Equal((ulong)3120420438742285733, val.F1.F0.F0);
+        Assert.Equal((sbyte)-8, val.F2);
+        Assert.Equal((double)1077419570643725, val.F3);
+        Assert.Equal((uint)1985303212, val.F4);
+        Assert.Equal((uint)264580506, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct S50
+    {
+        public int F0;
+
+        public S50(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func50AA3S50VyF")]
+    private static extern S50 SwiftRetFunc50();
+
+    [Fact]
+    public static void TestSwiftRetFunc50()
+    {
+        Console.Write("Running SwiftRetFunc50: ");
+        S50 val = SwiftRetFunc50();
+        Assert.Equal((int)1043912405, val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct S51_S0_S0_S0
+    {
+        public float F0;
+
+        public S51_S0_S0_S0(float f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 6)]
+    struct S51_S0_S0
+    {
+        public S51_S0_S0_S0 F0;
+        public short F1;
+
+        public S51_S0_S0(S51_S0_S0_S0 f0, short f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct S51_S0
+    {
+        public double F0;
+        public S51_S0_S0 F1;
+        public byte F2;
+        public long F3;
+
+        public S51_S0(double f0, S51_S0_S0 f1, byte f2, long f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S51
+    {
+        public S51_S0 F0;
+        public double F1;
+
+        public S51(S51_S0 f0, double f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func51AA3S51VyF")]
+    private static extern S51 SwiftRetFunc51();
+
+    [Fact]
+    public static void TestSwiftRetFunc51()
+    {
+        Console.Write("Running SwiftRetFunc51: ");
+        S51 val = SwiftRetFunc51();
+        Assert.Equal((double)3266680719186600, val.F0.F0);
+        Assert.Equal((float)428247, val.F0.F1.F0.F0);
+        Assert.Equal((short)-24968, val.F0.F1.F1);
+        Assert.Equal((byte)76, val.F0.F2);
+        Assert.Equal((long)183022772513065490, val.F0.F3);
+        Assert.Equal((double)2661928101793033, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 41)]
+    struct S52
+    {
+        public uint F0;
+        public long F1;
+        public uint F2;
+        public ulong F3;
+        public nint F4;
+        public sbyte F5;
+
+        public S52(uint f0, long f1, uint f2, ulong f3, nint f4, sbyte f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func52AA3S52VyF")]
+    private static extern S52 SwiftRetFunc52();
+
+    [Fact]
+    public static void TestSwiftRetFunc52()
+    {
+        Console.Write("Running SwiftRetFunc52: ");
+        S52 val = SwiftRetFunc52();
+        Assert.Equal((uint)1812191671, val.F0);
+        Assert.Equal((long)6594574760089190928, val.F1);
+        Assert.Equal((uint)831147243, val.F2);
+        Assert.Equal((ulong)3301835731003365248, val.F3);
+        Assert.Equal((nint)unchecked((nint)5382332538247340743), val.F4);
+        Assert.Equal((sbyte)-77, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S53_S0
+    {
+        public sbyte F0;
+        public nuint F1;
+
+        public S53_S0(sbyte f0, nuint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 37)]
+    struct S53
+    {
+        public S53_S0 F0;
+        public int F1;
+        public long F2;
+        public float F3;
+        public sbyte F4;
+
+        public S53(S53_S0 f0, int f1, long f2, float f3, sbyte f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func53AA3S53VyF")]
+    private static extern S53 SwiftRetFunc53();
+
+    [Fact]
+    public static void TestSwiftRetFunc53()
+    {
+        Console.Write("Running SwiftRetFunc53: ");
+        S53 val = SwiftRetFunc53();
+        Assert.Equal((sbyte)-123, val.F0.F0);
+        Assert.Equal((nuint)unchecked((nuint)3494916243607193741), val.F0.F1);
+        Assert.Equal((int)1406699798, val.F1);
+        Assert.Equal((long)4018943158751734338, val.F2);
+        Assert.Equal((float)1084415, val.F3);
+        Assert.Equal((sbyte)-8, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S54_S0
+    {
+        public double F0;
+
+        public S54_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S54
+    {
+        public nint F0;
+        public nint F1;
+        public S54_S0 F2;
+        public long F3;
+
+        public S54(nint f0, nint f1, S54_S0 f2, long f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func54AA3S54VyF")]
+    private static extern S54 SwiftRetFunc54();
+
+    [Fact]
+    public static void TestSwiftRetFunc54()
+    {
+        Console.Write("Running SwiftRetFunc54: ");
+        S54 val = SwiftRetFunc54();
+        Assert.Equal((nint)unchecked((nint)8623517456704997133), val.F0);
+        Assert.Equal((nint)unchecked((nint)1521939500434086364), val.F1);
+        Assert.Equal((double)3472783299414218, val.F2.F0);
+        Assert.Equal((long)4761507229870258916, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 22)]
+    struct S55
+    {
+        public short F0;
+        public uint F1;
+        public long F2;
+        public uint F3;
+        public sbyte F4;
+        public byte F5;
+
+        public S55(short f0, uint f1, long f2, uint f3, sbyte f4, byte f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func55AA3S55VyF")]
+    private static extern S55 SwiftRetFunc55();
+
+    [Fact]
+    public static void TestSwiftRetFunc55()
+    {
+        Console.Write("Running SwiftRetFunc55: ");
+        S55 val = SwiftRetFunc55();
+        Assert.Equal((short)-28051, val.F0);
+        Assert.Equal((uint)1759912152, val.F1);
+        Assert.Equal((long)2038322238348454200, val.F2);
+        Assert.Equal((uint)601094102, val.F3);
+        Assert.Equal((sbyte)5, val.F4);
+        Assert.Equal((byte)75, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct S56
+    {
+        public ulong F0;
+        public float F1;
+        public sbyte F2;
+        public int F3;
+
+        public S56(ulong f0, float f1, sbyte f2, int f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func56AA3S56VyF")]
+    private static extern S56 SwiftRetFunc56();
+
+    [Fact]
+    public static void TestSwiftRetFunc56()
+    {
+        Console.Write("Running SwiftRetFunc56: ");
+        S56 val = SwiftRetFunc56();
+        Assert.Equal((ulong)6313168909786453069, val.F0);
+        Assert.Equal((float)6254558, val.F1);
+        Assert.Equal((sbyte)115, val.F2);
+        Assert.Equal((int)847834891, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S57
+    {
+        public nuint F0;
+        public short F1;
+        public sbyte F2;
+        public int F3;
+
+        public S57(nuint f0, short f1, sbyte f2, int f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func57AA3S57VyF")]
+    private static extern S57 SwiftRetFunc57();
+
+    [Fact]
+    public static void TestSwiftRetFunc57()
+    {
+        Console.Write("Running SwiftRetFunc57: ");
+        S57 val = SwiftRetFunc57();
+        Assert.Equal((nuint)unchecked((nuint)546304219852233452), val.F0);
+        Assert.Equal((short)-27416, val.F1);
+        Assert.Equal((sbyte)47, val.F2);
+        Assert.Equal((int)1094575684, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S58
+    {
+        public ulong F0;
+        public ulong F1;
+
+        public S58(ulong f0, ulong f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func58AA3S58VyF")]
+    private static extern S58 SwiftRetFunc58();
+
+    [Fact]
+    public static void TestSwiftRetFunc58()
+    {
+        Console.Write("Running SwiftRetFunc58: ");
+        S58 val = SwiftRetFunc58();
+        Assert.Equal((ulong)4612004722568513699, val.F0);
+        Assert.Equal((ulong)2222525519606580195, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 41)]
+    struct S59
+    {
+        public sbyte F0;
+        public nuint F1;
+        public nint F2;
+        public sbyte F3;
+        public long F4;
+        public byte F5;
+
+        public S59(sbyte f0, nuint f1, nint f2, sbyte f3, long f4, byte f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func59AA3S59VyF")]
+    private static extern S59 SwiftRetFunc59();
+
+    [Fact]
+    public static void TestSwiftRetFunc59()
+    {
+        Console.Write("Running SwiftRetFunc59: ");
+        S59 val = SwiftRetFunc59();
+        Assert.Equal((sbyte)-92, val.F0);
+        Assert.Equal((nuint)unchecked((nuint)7281011081566942937), val.F1);
+        Assert.Equal((nint)unchecked((nint)8203439771560005792), val.F2);
+        Assert.Equal((sbyte)103, val.F3);
+        Assert.Equal((long)1003386607251132236, val.F4);
+        Assert.Equal((byte)6, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S60
+    {
+        public ulong F0;
+        public nint F1;
+
+        public S60(ulong f0, nint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func60AA3S60VyF")]
+    private static extern S60 SwiftRetFunc60();
+
+    [Fact]
+    public static void TestSwiftRetFunc60()
+    {
+        Console.Write("Running SwiftRetFunc60: ");
+        S60 val = SwiftRetFunc60();
+        Assert.Equal((ulong)6922353269487057763, val.F0);
+        Assert.Equal((nint)unchecked((nint)103032455997325768), val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 20)]
+    struct S61_S0
+    {
+        public long F0;
+        public long F1;
+        public float F2;
+
+        public S61_S0(long f0, long f1, float f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 36)]
+    struct S61
+    {
+        public ulong F0;
+        public S61_S0 F1;
+        public short F2;
+        public int F3;
+
+        public S61(ulong f0, S61_S0 f1, short f2, int f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func61AA3S61VyF")]
+    private static extern S61 SwiftRetFunc61();
+
+    [Fact]
+    public static void TestSwiftRetFunc61()
+    {
+        Console.Write("Running SwiftRetFunc61: ");
+        S61 val = SwiftRetFunc61();
+        Assert.Equal((ulong)3465845922566501572, val.F0);
+        Assert.Equal((long)8266662359091888314, val.F1.F0);
+        Assert.Equal((long)7511705648638703076, val.F1.F1);
+        Assert.Equal((float)535470, val.F1.F2);
+        Assert.Equal((short)-5945, val.F2);
+        Assert.Equal((int)523043523, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S62_S0_S0
+    {
+        public nint F0;
+
+        public S62_S0_S0(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S62_S0
+    {
+        public ushort F0;
+        public short F1;
+        public ushort F2;
+        public S62_S0_S0 F3;
+
+        public S62_S0(ushort f0, short f1, ushort f2, S62_S0_S0 f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 26)]
+    struct S62
+    {
+        public S62_S0 F0;
+        public nint F1;
+        public ushort F2;
+
+        public S62(S62_S0 f0, nint f1, ushort f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func62AA3S62VyF")]
+    private static extern S62 SwiftRetFunc62();
+
+    [Fact]
+    public static void TestSwiftRetFunc62()
+    {
+        Console.Write("Running SwiftRetFunc62: ");
+        S62 val = SwiftRetFunc62();
+        Assert.Equal((ushort)50789, val.F0.F0);
+        Assert.Equal((short)30245, val.F0.F1);
+        Assert.Equal((ushort)35063, val.F0.F2);
+        Assert.Equal((nint)unchecked((nint)3102684963408623932), val.F0.F3.F0);
+        Assert.Equal((nint)unchecked((nint)792877586576090769), val.F1);
+        Assert.Equal((ushort)24697, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S63
+    {
+        public double F0;
+        public nint F1;
+        public double F2;
+        public sbyte F3;
+        public float F4;
+
+        public S63(double f0, nint f1, double f2, sbyte f3, float f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func63AA3S63VyF")]
+    private static extern S63 SwiftRetFunc63();
+
+    [Fact]
+    public static void TestSwiftRetFunc63()
+    {
+        Console.Write("Running SwiftRetFunc63: ");
+        S63 val = SwiftRetFunc63();
+        Assert.Equal((double)4097323000009314, val.F0);
+        Assert.Equal((nint)unchecked((nint)4162427097168837193), val.F1);
+        Assert.Equal((double)140736061437152, val.F2);
+        Assert.Equal((sbyte)-59, val.F3);
+        Assert.Equal((float)7331757, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S64_S0
+    {
+        public ulong F0;
+
+        public S64_S0(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S64
+    {
+        public S64_S0 F0;
+        public ulong F1;
+        public long F2;
+        public nint F3;
+
+        public S64(S64_S0 f0, ulong f1, long f2, nint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func64AA3S64VyF")]
+    private static extern S64 SwiftRetFunc64();
+
+    [Fact]
+    public static void TestSwiftRetFunc64()
+    {
+        Console.Write("Running SwiftRetFunc64: ");
+        S64 val = SwiftRetFunc64();
+        Assert.Equal((ulong)2624461610177878495, val.F0.F0);
+        Assert.Equal((ulong)5222178027019975511, val.F1);
+        Assert.Equal((long)9006949357929457355, val.F2);
+        Assert.Equal((nint)unchecked((nint)7966680593035770540), val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct S65
+    {
+        public nint F0;
+        public double F1;
+        public ushort F2;
+        public short F3;
+        public byte F4;
+        public int F5;
+        public ulong F6;
+
+        public S65(nint f0, double f1, ushort f2, short f3, byte f4, int f5, ulong f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func65AA3S65VyF")]
+    private static extern S65 SwiftRetFunc65();
+
+    [Fact]
+    public static void TestSwiftRetFunc65()
+    {
+        Console.Write("Running SwiftRetFunc65: ");
+        S65 val = SwiftRetFunc65();
+        Assert.Equal((nint)unchecked((nint)6080968957098434687), val.F0);
+        Assert.Equal((double)3067343828504927, val.F1);
+        Assert.Equal((ushort)56887, val.F2);
+        Assert.Equal((short)804, val.F3);
+        Assert.Equal((byte)235, val.F4);
+        Assert.Equal((int)121742660, val.F5);
+        Assert.Equal((ulong)9218677163034827308, val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct S66
+    {
+        public sbyte F0;
+        public ulong F1;
+        public uint F2;
+        public ulong F3;
+        public ulong F4;
+
+        public S66(sbyte f0, ulong f1, uint f2, ulong f3, ulong f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func66AA3S66VyF")]
+    private static extern S66 SwiftRetFunc66();
+
+    [Fact]
+    public static void TestSwiftRetFunc66()
+    {
+        Console.Write("Running SwiftRetFunc66: ");
+        S66 val = SwiftRetFunc66();
+        Assert.Equal((sbyte)-16, val.F0);
+        Assert.Equal((ulong)7967447403042597794, val.F1);
+        Assert.Equal((uint)2029697750, val.F2);
+        Assert.Equal((ulong)4180031087394830849, val.F3);
+        Assert.Equal((ulong)5847795120921557969, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S67_S0
+    {
+        public ulong F0;
+
+        public S67_S0(ulong f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 33)]
+    struct S67
+    {
+        public S67_S0 F0;
+        public byte F1;
+        public ushort F2;
+        public ulong F3;
+        public ulong F4;
+        public sbyte F5;
+
+        public S67(S67_S0 f0, byte f1, ushort f2, ulong f3, ulong f4, sbyte f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func67AA3S67VyF")]
+    private static extern S67 SwiftRetFunc67();
+
+    [Fact]
+    public static void TestSwiftRetFunc67()
+    {
+        Console.Write("Running SwiftRetFunc67: ");
+        S67 val = SwiftRetFunc67();
+        Assert.Equal((ulong)4844204675254434929, val.F0.F0);
+        Assert.Equal((byte)135, val.F1);
+        Assert.Equal((ushort)13969, val.F2);
+        Assert.Equal((ulong)4897129719050177731, val.F3);
+        Assert.Equal((ulong)7233638107485862921, val.F4);
+        Assert.Equal((sbyte)-11, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S68_S0
+    {
+        public double F0;
+
+        public S68_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 37)]
+    struct S68
+    {
+        public int F0;
+        public ulong F1;
+        public uint F2;
+        public S68_S0 F3;
+        public int F4;
+        public sbyte F5;
+
+        public S68(int f0, ulong f1, uint f2, S68_S0 f3, int f4, sbyte f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func68AA3S68VyF")]
+    private static extern S68 SwiftRetFunc68();
+
+    [Fact]
+    public static void TestSwiftRetFunc68()
+    {
+        Console.Write("Running SwiftRetFunc68: ");
+        S68 val = SwiftRetFunc68();
+        Assert.Equal((int)1708606840, val.F0);
+        Assert.Equal((ulong)1768121573985581212, val.F1);
+        Assert.Equal((uint)1033319213, val.F2);
+        Assert.Equal((double)2741322436867931, val.F3.F0);
+        Assert.Equal((int)955320338, val.F4);
+        Assert.Equal((sbyte)12, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct S69
+    {
+        public uint F0;
+
+        public S69(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func69AA3S69VyF")]
+    private static extern S69 SwiftRetFunc69();
+
+    [Fact]
+    public static void TestSwiftRetFunc69()
+    {
+        Console.Write("Running SwiftRetFunc69: ");
+        S69 val = SwiftRetFunc69();
+        Assert.Equal((uint)2092746473, val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S70
+    {
+        public byte F0;
+        public float F1;
+
+        public S70(byte f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func70AA3S70VyF")]
+    private static extern S70 SwiftRetFunc70();
+
+    [Fact]
+    public static void TestSwiftRetFunc70()
+    {
+        Console.Write("Running SwiftRetFunc70: ");
+        S70 val = SwiftRetFunc70();
+        Assert.Equal((byte)76, val.F0);
+        Assert.Equal((float)4138467, val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct S71_S0
+    {
+        public sbyte F0;
+        public ulong F1;
+        public long F2;
+
+        public S71_S0(sbyte f0, ulong f1, long f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 26)]
+    struct S71
+    {
+        public S71_S0 F0;
+        public byte F1;
+        public byte F2;
+
+        public S71(S71_S0 f0, byte f1, byte f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func71AA3S71VyF")]
+    private static extern S71 SwiftRetFunc71();
+
+    [Fact]
+    public static void TestSwiftRetFunc71()
+    {
+        Console.Write("Running SwiftRetFunc71: ");
+        S71 val = SwiftRetFunc71();
+        Assert.Equal((sbyte)-98, val.F0.F0);
+        Assert.Equal((ulong)8603744544763953916, val.F0.F1);
+        Assert.Equal((long)8460721064583106347, val.F0.F2);
+        Assert.Equal((byte)10, val.F1);
+        Assert.Equal((byte)88, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct S72
+    {
+        public uint F0;
+
+        public S72(uint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func72AA3S72VyF")]
+    private static extern S72 SwiftRetFunc72();
+
+    [Fact]
+    public static void TestSwiftRetFunc72()
+    {
+        Console.Write("Running SwiftRetFunc72: ");
+        S72 val = SwiftRetFunc72();
+        Assert.Equal((uint)2021509367, val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 48)]
+    struct S73
+    {
+        public nint F0;
+        public short F1;
+        public ulong F2;
+        public float F3;
+        public int F4;
+        public nuint F5;
+        public nuint F6;
+
+        public S73(nint f0, short f1, ulong f2, float f3, int f4, nuint f5, nuint f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func73AA3S73VyF")]
+    private static extern S73 SwiftRetFunc73();
+
+    [Fact]
+    public static void TestSwiftRetFunc73()
+    {
+        Console.Write("Running SwiftRetFunc73: ");
+        S73 val = SwiftRetFunc73();
+        Assert.Equal((nint)unchecked((nint)6222563427944465437), val.F0);
+        Assert.Equal((short)28721, val.F1);
+        Assert.Equal((ulong)1313300783845289148, val.F2);
+        Assert.Equal((float)6761, val.F3);
+        Assert.Equal((int)2074171265, val.F4);
+        Assert.Equal((nuint)unchecked((nuint)6232209228889209160), val.F5);
+        Assert.Equal((nuint)unchecked((nuint)1423931135184844265), val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 19)]
+    struct S74
+    {
+        public short F0;
+        public float F1;
+        public double F2;
+        public ushort F3;
+        public sbyte F4;
+
+        public S74(short f0, float f1, double f2, ushort f3, sbyte f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func74AA3S74VyF")]
+    private static extern S74 SwiftRetFunc74();
+
+    [Fact]
+    public static void TestSwiftRetFunc74()
+    {
+        Console.Write("Running SwiftRetFunc74: ");
+        S74 val = SwiftRetFunc74();
+        Assert.Equal((short)27115, val.F0);
+        Assert.Equal((float)1416098, val.F1);
+        Assert.Equal((double)4468576755457331, val.F2);
+        Assert.Equal((ushort)58864, val.F3);
+        Assert.Equal((sbyte)81, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 1)]
+    struct S75_S0_S0
+    {
+        public sbyte F0;
+
+        public S75_S0_S0(sbyte f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct S75_S0
+    {
+        public S75_S0_S0 F0;
+        public byte F1;
+
+        public S75_S0(S75_S0_S0 f0, byte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 11)]
+    struct S75
+    {
+        public ulong F0;
+        public S75_S0 F1;
+        public byte F2;
+
+        public S75(ulong f0, S75_S0 f1, byte f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func75AA3S75VyF")]
+    private static extern S75 SwiftRetFunc75();
+
+    [Fact]
+    public static void TestSwiftRetFunc75()
+    {
+        Console.Write("Running SwiftRetFunc75: ");
+        S75 val = SwiftRetFunc75();
+        Assert.Equal((ulong)8532911974860912350, val.F0);
+        Assert.Equal((sbyte)-60, val.F1.F0.F0);
+        Assert.Equal((byte)66, val.F1.F1);
+        Assert.Equal((byte)200, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct S76_S0_S0
+    {
+        public short F0;
+
+        public S76_S0_S0(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S76_S0
+    {
+        public sbyte F0;
+        public ulong F1;
+        public S76_S0_S0 F2;
+        public double F3;
+
+        public S76_S0(sbyte f0, ulong f1, S76_S0_S0 f2, double f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 48)]
+    struct S76
+    {
+        public byte F0;
+        public S76_S0 F1;
+        public double F2;
+
+        public S76(byte f0, S76_S0 f1, double f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func76AA3S76VyF")]
+    private static extern S76 SwiftRetFunc76();
+
+    [Fact]
+    public static void TestSwiftRetFunc76()
+    {
+        Console.Write("Running SwiftRetFunc76: ");
+        S76 val = SwiftRetFunc76();
+        Assert.Equal((byte)69, val.F0);
+        Assert.Equal((sbyte)-29, val.F1.F0);
+        Assert.Equal((ulong)4872234474620951743, val.F1.F1);
+        Assert.Equal((short)11036, val.F1.F2.F0);
+        Assert.Equal((double)585486652063917, val.F1.F3);
+        Assert.Equal((double)2265391710186639, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 18)]
+    struct S77
+    {
+        public int F0;
+        public int F1;
+        public int F2;
+        public uint F3;
+        public short F4;
+
+        public S77(int f0, int f1, int f2, uint f3, short f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func77AA3S77VyF")]
+    private static extern S77 SwiftRetFunc77();
+
+    [Fact]
+    public static void TestSwiftRetFunc77()
+    {
+        Console.Write("Running SwiftRetFunc77: ");
+        S77 val = SwiftRetFunc77();
+        Assert.Equal((int)4495211, val.F0);
+        Assert.Equal((int)1364377405, val.F1);
+        Assert.Equal((int)773989694, val.F2);
+        Assert.Equal((uint)1121696315, val.F3);
+        Assert.Equal((short)7589, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S78
+    {
+        public uint F0;
+        public nuint F1;
+
+        public S78(uint f0, nuint f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func78AA3S78VyF")]
+    private static extern S78 SwiftRetFunc78();
+
+    [Fact]
+    public static void TestSwiftRetFunc78()
+    {
+        Console.Write("Running SwiftRetFunc78: ");
+        S78 val = SwiftRetFunc78();
+        Assert.Equal((uint)1767839225, val.F0);
+        Assert.Equal((nuint)unchecked((nuint)7917317019379224114), val.F1);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S79_S0
+    {
+        public double F0;
+        public uint F1;
+        public int F2;
+
+        public S79_S0(double f0, uint f1, int f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S79
+    {
+        public S79_S0 F0;
+        public byte F1;
+        public double F2;
+
+        public S79(S79_S0 f0, byte f1, double f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func79AA3S79VyF")]
+    private static extern S79 SwiftRetFunc79();
+
+    [Fact]
+    public static void TestSwiftRetFunc79()
+    {
+        Console.Write("Running SwiftRetFunc79: ");
+        S79 val = SwiftRetFunc79();
+        Assert.Equal((double)495074072703635, val.F0.F0);
+        Assert.Equal((uint)417605286, val.F0.F1);
+        Assert.Equal((int)171326442, val.F0.F2);
+        Assert.Equal((byte)203, val.F1);
+        Assert.Equal((double)2976663235490421, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 7)]
+    struct S80
+    {
+        public int F0;
+        public short F1;
+        public sbyte F2;
+
+        public S80(int f0, short f1, sbyte f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func80AA3S80VyF")]
+    private static extern S80 SwiftRetFunc80();
+
+    [Fact]
+    public static void TestSwiftRetFunc80()
+    {
+        Console.Write("Running SwiftRetFunc80: ");
+        S80 val = SwiftRetFunc80();
+        Assert.Equal((int)999559959, val.F0);
+        Assert.Equal((short)19977, val.F1);
+        Assert.Equal((sbyte)-4, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S81_S0
+    {
+        public nuint F0;
+
+        public S81_S0(nuint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct S81
+    {
+        public int F0;
+        public S81_S0 F1;
+        public float F2;
+        public long F3;
+        public uint F4;
+        public byte F5;
+        public short F6;
+
+        public S81(int f0, S81_S0 f1, float f2, long f3, uint f4, byte f5, short f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func81AA3S81VyF")]
+    private static extern S81 SwiftRetFunc81();
+
+    [Fact]
+    public static void TestSwiftRetFunc81()
+    {
+        Console.Write("Running SwiftRetFunc81: ");
+        S81 val = SwiftRetFunc81();
+        Assert.Equal((int)452603110, val.F0);
+        Assert.Equal((nuint)unchecked((nuint)6240652733420985265), val.F1.F0);
+        Assert.Equal((float)6469988, val.F2);
+        Assert.Equal((long)5775316279348621124, val.F3);
+        Assert.Equal((uint)1398033592, val.F4);
+        Assert.Equal((byte)105, val.F5);
+        Assert.Equal((short)21937, val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S82
+    {
+        public nint F0;
+
+        public S82(nint f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func82AA3S82VyF")]
+    private static extern S82 SwiftRetFunc82();
+
+    [Fact]
+    public static void TestSwiftRetFunc82()
+    {
+        Console.Write("Running SwiftRetFunc82: ");
+        S82 val = SwiftRetFunc82();
+        Assert.Equal((nint)unchecked((nint)6454754584537364459), val.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct S83
+    {
+        public ulong F0;
+        public uint F1;
+        public float F2;
+        public byte F3;
+        public float F4;
+
+        public S83(ulong f0, uint f1, float f2, byte f3, float f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func83AA3S83VyF")]
+    private static extern S83 SwiftRetFunc83();
+
+    [Fact]
+    public static void TestSwiftRetFunc83()
+    {
+        Console.Write("Running SwiftRetFunc83: ");
+        S83 val = SwiftRetFunc83();
+        Assert.Equal((ulong)2998238441521688907, val.F0);
+        Assert.Equal((uint)9623946, val.F1);
+        Assert.Equal((float)2577885, val.F2);
+        Assert.Equal((byte)156, val.F3);
+        Assert.Equal((float)6678807, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct S84_S0
+    {
+        public short F0;
+
+        public S84_S0(short f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 2)]
+    struct S84
+    {
+        public S84_S0 F0;
+
+        public S84(S84_S0 f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func84AA3S84VyF")]
+    private static extern S84 SwiftRetFunc84();
+
+    [Fact]
+    public static void TestSwiftRetFunc84()
+    {
+        Console.Write("Running SwiftRetFunc84: ");
+        S84 val = SwiftRetFunc84();
+        Assert.Equal((short)16213, val.F0.F0);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 3)]
+    struct S85_S0
+    {
+        public short F0;
+        public sbyte F1;
+
+        public S85_S0(short f0, sbyte f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S85
+    {
+        public long F0;
+        public byte F1;
+        public S85_S0 F2;
+        public float F3;
+        public nint F4;
+
+        public S85(long f0, byte f1, S85_S0 f2, float f3, nint f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func85AA3S85VyF")]
+    private static extern S85 SwiftRetFunc85();
+
+    [Fact]
+    public static void TestSwiftRetFunc85()
+    {
+        Console.Write("Running SwiftRetFunc85: ");
+        S85 val = SwiftRetFunc85();
+        Assert.Equal((long)8858924985061791416, val.F0);
+        Assert.Equal((byte)200, val.F1);
+        Assert.Equal((short)4504, val.F2.F0);
+        Assert.Equal((sbyte)60, val.F2.F1);
+        Assert.Equal((float)5572917, val.F3);
+        Assert.Equal((nint)unchecked((nint)6546369836182556538), val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct S86
+    {
+        public ushort F0;
+        public float F1;
+        public uint F2;
+
+        public S86(ushort f0, float f1, uint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func86AA3S86VyF")]
+    private static extern S86 SwiftRetFunc86();
+
+    [Fact]
+    public static void TestSwiftRetFunc86()
+    {
+        Console.Write("Running SwiftRetFunc86: ");
+        S86 val = SwiftRetFunc86();
+        Assert.Equal((ushort)22762, val.F0);
+        Assert.Equal((float)4672435, val.F1);
+        Assert.Equal((uint)719927700, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct S87
+    {
+        public int F0;
+        public nuint F1;
+        public ulong F2;
+
+        public S87(int f0, nuint f1, ulong f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func87AA3S87VyF")]
+    private static extern S87 SwiftRetFunc87();
+
+    [Fact]
+    public static void TestSwiftRetFunc87()
+    {
+        Console.Write("Running SwiftRetFunc87: ");
+        S87 val = SwiftRetFunc87();
+        Assert.Equal((int)361750184, val.F0);
+        Assert.Equal((nuint)unchecked((nuint)4206825694012787823), val.F1);
+        Assert.Equal((ulong)2885153391732919282, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 12)]
+    struct S88
+    {
+        public uint F0;
+        public short F1;
+        public uint F2;
+
+        public S88(uint f0, short f1, uint f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func88AA3S88VyF")]
+    private static extern S88 SwiftRetFunc88();
+
+    [Fact]
+    public static void TestSwiftRetFunc88()
+    {
+        Console.Write("Running SwiftRetFunc88: ");
+        S88 val = SwiftRetFunc88();
+        Assert.Equal((uint)2125094198, val.F0);
+        Assert.Equal((short)-10705, val.F1);
+        Assert.Equal((uint)182007583, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct S89
+    {
+        public byte F0;
+        public uint F1;
+        public int F2;
+        public sbyte F3;
+        public long F4;
+
+        public S89(byte f0, uint f1, int f2, sbyte f3, long f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func89AA3S89VyF")]
+    private static extern S89 SwiftRetFunc89();
+
+    [Fact]
+    public static void TestSwiftRetFunc89()
+    {
+        Console.Write("Running SwiftRetFunc89: ");
+        S89 val = SwiftRetFunc89();
+        Assert.Equal((byte)175, val.F0);
+        Assert.Equal((uint)1062985476, val.F1);
+        Assert.Equal((int)1019006263, val.F2);
+        Assert.Equal((sbyte)-22, val.F3);
+        Assert.Equal((long)6888877252788498422, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct S90
+    {
+        public byte F0;
+        public int F1;
+        public short F2;
+        public nint F3;
+        public uint F4;
+        public uint F5;
+        public long F6;
+
+        public S90(byte f0, int f1, short f2, nint f3, uint f4, uint f5, long f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func90AA3S90VyF")]
+    private static extern S90 SwiftRetFunc90();
+
+    [Fact]
+    public static void TestSwiftRetFunc90()
+    {
+        Console.Write("Running SwiftRetFunc90: ");
+        S90 val = SwiftRetFunc90();
+        Assert.Equal((byte)221, val.F0);
+        Assert.Equal((int)225825436, val.F1);
+        Assert.Equal((short)-26231, val.F2);
+        Assert.Equal((nint)unchecked((nint)5122880520199505508), val.F3);
+        Assert.Equal((uint)907657092, val.F4);
+        Assert.Equal((uint)707089277, val.F5);
+        Assert.Equal((long)6091814344013414920, val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 28)]
+    struct S91
+    {
+        public double F0;
+        public sbyte F1;
+        public sbyte F2;
+        public uint F3;
+        public nint F4;
+        public sbyte F5;
+        public short F6;
+
+        public S91(double f0, sbyte f1, sbyte f2, uint f3, nint f4, sbyte f5, short f6)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+            F6 = f6;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func91AA3S91VyF")]
+    private static extern S91 SwiftRetFunc91();
+
+    [Fact]
+    public static void TestSwiftRetFunc91()
+    {
+        Console.Write("Running SwiftRetFunc91: ");
+        S91 val = SwiftRetFunc91();
+        Assert.Equal((double)3265110225161261, val.F0);
+        Assert.Equal((sbyte)62, val.F1);
+        Assert.Equal((sbyte)-38, val.F2);
+        Assert.Equal((uint)946023589, val.F3);
+        Assert.Equal((nint)unchecked((nint)4109819715069879890), val.F4);
+        Assert.Equal((sbyte)-73, val.F5);
+        Assert.Equal((short)20363, val.F6);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S92_S0
+    {
+        public float F0;
+        public long F1;
+
+        public S92_S0(float f0, long f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 44)]
+    struct S92
+    {
+        public long F0;
+        public nuint F1;
+        public S92_S0 F2;
+        public int F3;
+        public float F4;
+        public float F5;
+
+        public S92(long f0, nuint f1, S92_S0 f2, int f3, float f4, float f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func92AA3S92VyF")]
+    private static extern S92 SwiftRetFunc92();
+
+    [Fact]
+    public static void TestSwiftRetFunc92()
+    {
+        Console.Write("Running SwiftRetFunc92: ");
+        S92 val = SwiftRetFunc92();
+        Assert.Equal((long)3230438394207610137, val.F0);
+        Assert.Equal((nuint)unchecked((nuint)3003396252681176136), val.F1);
+        Assert.Equal((float)6494422, val.F2.F0);
+        Assert.Equal((long)2971773224350614312, val.F2.F1);
+        Assert.Equal((int)2063694141, val.F3);
+        Assert.Equal((float)3117041, val.F4);
+        Assert.Equal((float)1003760, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 32)]
+    struct S93
+    {
+        public nint F0;
+        public byte F1;
+        public uint F2;
+        public uint F3;
+        public ulong F4;
+
+        public S93(nint f0, byte f1, uint f2, uint f3, ulong f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func93AA3S93VyF")]
+    private static extern S93 SwiftRetFunc93();
+
+    [Fact]
+    public static void TestSwiftRetFunc93()
+    {
+        Console.Write("Running SwiftRetFunc93: ");
+        S93 val = SwiftRetFunc93();
+        Assert.Equal((nint)unchecked((nint)5170226481546239050), val.F0);
+        Assert.Equal((byte)11, val.F1);
+        Assert.Equal((uint)1120259582, val.F2);
+        Assert.Equal((uint)1947849905, val.F3);
+        Assert.Equal((ulong)3690113387392112192, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 40)]
+    struct S94
+    {
+        public ushort F0;
+        public double F1;
+        public short F2;
+        public double F3;
+        public ulong F4;
+
+        public S94(ushort f0, double f1, short f2, double f3, ulong f4)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func94AA3S94VyF")]
+    private static extern S94 SwiftRetFunc94();
+
+    [Fact]
+    public static void TestSwiftRetFunc94()
+    {
+        Console.Write("Running SwiftRetFunc94: ");
+        S94 val = SwiftRetFunc94();
+        Assert.Equal((ushort)57111, val.F0);
+        Assert.Equal((double)1718940123307098, val.F1);
+        Assert.Equal((short)-16145, val.F2);
+        Assert.Equal((double)1099321301986326, val.F3);
+        Assert.Equal((ulong)2972912419231960385, val.F4);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S95_S0
+    {
+        public double F0;
+
+        public S95_S0(double f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 24)]
+    struct S95
+    {
+        public short F0;
+        public S95_S0 F1;
+        public ulong F2;
+
+        public S95(short f0, S95_S0 f1, ulong f2)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func95AA3S95VyF")]
+    private static extern S95 SwiftRetFunc95();
+
+    [Fact]
+    public static void TestSwiftRetFunc95()
+    {
+        Console.Write("Running SwiftRetFunc95: ");
+        S95 val = SwiftRetFunc95();
+        Assert.Equal((short)12620, val.F0);
+        Assert.Equal((double)3232445258308074, val.F1.F0);
+        Assert.Equal((ulong)97365157264460373, val.F2);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 48)]
+    struct S96
+    {
+        public sbyte F0;
+        public double F1;
+        public ulong F2;
+        public ulong F3;
+        public int F4;
+        public long F5;
+
+        public S96(sbyte f0, double f1, ulong f2, ulong f3, int f4, long f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func96AA3S96VyF")]
+    private static extern S96 SwiftRetFunc96();
+
+    [Fact]
+    public static void TestSwiftRetFunc96()
+    {
+        Console.Write("Running SwiftRetFunc96: ");
+        S96 val = SwiftRetFunc96();
+        Assert.Equal((sbyte)3, val.F0);
+        Assert.Equal((double)242355060906873, val.F1);
+        Assert.Equal((ulong)3087879465791321798, val.F2);
+        Assert.Equal((ulong)7363229136420263380, val.F3);
+        Assert.Equal((int)46853328, val.F4);
+        Assert.Equal((long)4148307028758236491, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 16)]
+    struct S97
+    {
+        public ushort F0;
+        public int F1;
+        public ushort F2;
+        public uint F3;
+
+        public S97(ushort f0, int f1, ushort f2, uint f3)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func97AA3S97VyF")]
+    private static extern S97 SwiftRetFunc97();
+
+    [Fact]
+    public static void TestSwiftRetFunc97()
+    {
+        Console.Write("Running SwiftRetFunc97: ");
+        S97 val = SwiftRetFunc97();
+        Assert.Equal((ushort)10651, val.F0);
+        Assert.Equal((int)2068379463, val.F1);
+        Assert.Equal((ushort)57307, val.F2);
+        Assert.Equal((uint)329271020, val.F3);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 48)]
+    struct S98
+    {
+        public double F0;
+        public int F1;
+        public long F2;
+        public nint F3;
+        public float F4;
+        public double F5;
+
+        public S98(double f0, int f1, long f2, nint f3, float f4, double f5)
+        {
+            F0 = f0;
+            F1 = f1;
+            F2 = f2;
+            F3 = f3;
+            F4 = f4;
+            F5 = f5;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func98AA3S98VyF")]
+    private static extern S98 SwiftRetFunc98();
+
+    [Fact]
+    public static void TestSwiftRetFunc98()
+    {
+        Console.Write("Running SwiftRetFunc98: ");
+        S98 val = SwiftRetFunc98();
+        Assert.Equal((double)2250389231883613, val.F0);
+        Assert.Equal((int)1755058358, val.F1);
+        Assert.Equal((long)6686142382639170849, val.F2);
+        Assert.Equal((nint)unchecked((nint)6456632014163315773), val.F3);
+        Assert.Equal((float)2818253, val.F4);
+        Assert.Equal((double)1085859434505817, val.F5);
+        Console.WriteLine("OK");
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 4)]
+    struct S99_S0
+    {
+        public int F0;
+
+        public S99_S0(int f0)
+        {
+            F0 = f0;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential, Size = 8)]
+    struct S99
+    {
+        public S99_S0 F0;
+        public float F1;
+
+        public S99(S99_S0 f0, float f1)
+        {
+            F0 = f0;
+            F1 = f1;
+        }
+    }
+
+    [UnmanagedCallConv(CallConvs = new Type[] { typeof(CallConvSwift) })]
+    [DllImport(SwiftLib, EntryPoint = "$s17SwiftRetAbiStress05swiftB6Func99AA3S99VyF")]
+    private static extern S99 SwiftRetFunc99();
+
+    [Fact]
+    public static void TestSwiftRetFunc99()
+    {
+        Console.Write("Running SwiftRetFunc99: ");
+        S99 val = SwiftRetFunc99();
+        Assert.Equal((int)1117297545, val.F0.F0);
+        Assert.Equal((float)1539294, val.F1);
+        Console.WriteLine("OK");
+    }
+
+}
diff --git a/src/tests/Interop/Swift/SwiftRetAbiStress/SwiftRetAbiStress.csproj b/src/tests/Interop/Swift/SwiftRetAbiStress/SwiftRetAbiStress.csproj
new file mode 100644
index 000000000000..a57cd84cf884
--- /dev/null
+++ b/src/tests/Interop/Swift/SwiftRetAbiStress/SwiftRetAbiStress.csproj
@@ -0,0 +1,16 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CLRTestTargetUnsupported, CMakeProjectReference -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/93631 -->
+    <CLRTestTargetUnsupported Condition="'$(TargetsOSX)' != 'true' and '$(TargetsAppleMobile)' != 'true'">true</CLRTestTargetUnsupported>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="$(TestLibraryProjectPath)" />
+    <CMakeProjectReference Include="CMakeLists.txt" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/Interop/Swift/SwiftRetAbiStress/SwiftRetAbiStress.swift b/src/tests/Interop/Swift/SwiftRetAbiStress/SwiftRetAbiStress.swift
new file mode 100644
index 000000000000..7667f605bbf6
--- /dev/null
+++ b/src/tests/Interop/Swift/SwiftRetAbiStress/SwiftRetAbiStress.swift
@@ -0,0 +1,1614 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+import Foundation
+
+@frozen
+public struct S0
+{
+    public let f0 : Int16;
+    public let f1 : Int32;
+    public let f2 : UInt64;
+}
+
+public func swiftRetFunc0() -> S0 {
+    return S0(f0: -17813, f1: 318006528, f2: 1195162122024233590)
+}
+
+@frozen
+public struct S1
+{
+    public let f0 : Int16;
+    public let f1 : Float;
+    public let f2 : Int64;
+    public let f3 : UInt32;
+}
+
+public func swiftRetFunc1() -> S1 {
+    return S1(f0: -29793, f1: 7351779, f2: 133491708229548754, f3: 665726990)
+}
+
+@frozen
+public struct S2_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct S2
+{
+    public let f0 : S2_S0;
+    public let f1 : UInt8;
+    public let f2 : UInt16;
+    public let f3 : Float;
+    public let f4 : Int32;
+}
+
+public func swiftRetFunc2() -> S2 {
+    return S2(f0: S2_S0(f0: 2153637757371267722), f1: 150, f2: 48920, f3: 3564327, f4: 1310569731)
+}
+
+@frozen
+public struct S3
+{
+    public let f0 : Int64;
+    public let f1 : Double;
+    public let f2 : Int8;
+    public let f3 : Int32;
+    public let f4 : UInt16;
+    public let f5 : UInt8;
+    public let f6 : Double;
+}
+
+public func swiftRetFunc3() -> S3 {
+    return S3(f0: 5610153900386943274, f1: 2431035148834736, f2: 111, f3: 772269424, f4: 19240, f5: 146, f6: 821805530740405)
+}
+
+@frozen
+public struct S4
+{
+    public let f0 : Int8;
+    public let f1 : UInt32;
+    public let f2 : UInt64;
+    public let f3 : Int64;
+}
+
+public func swiftRetFunc4() -> S4 {
+    return S4(f0: 125, f1: 377073381, f2: 964784376430620335, f3: 5588038704850976624)
+}
+
+@frozen
+public struct S5_S0
+{
+    public let f0 : UInt32;
+    public let f1 : Double;
+}
+
+@frozen
+public struct S5
+{
+    public let f0 : UInt64;
+    public let f1 : Int8;
+    public let f2 : UInt;
+    public let f3 : S5_S0;
+    public let f4 : Int;
+    public let f5 : UInt8;
+}
+
+public func swiftRetFunc5() -> S5 {
+    return S5(f0: 5315019731968023493, f1: 114, f2: 1154655179105889397, f3: S5_S0(f0: 1468030771, f1: 3066473182924818), f4: 6252650621827449809, f5: 129)
+}
+
+@frozen
+public struct S6
+{
+    public let f0 : Int32;
+    public let f1 : Int16;
+    public let f2 : Int64;
+    public let f3 : UInt16;
+}
+
+public func swiftRetFunc6() -> S6 {
+    return S6(f0: 743741783, f1: -6821, f2: 5908745692727636656, f3: 64295)
+}
+
+@frozen
+public struct S7_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct S7
+{
+    public let f0 : S7_S0;
+}
+
+public func swiftRetFunc7() -> S7 {
+    return S7(f0: S7_S0(f0: 7625368278886567558))
+}
+
+@frozen
+public struct S8
+{
+    public let f0 : Int;
+}
+
+public func swiftRetFunc8() -> S8 {
+    return S8(f0: 775279004683334365)
+}
+
+@frozen
+public struct S9_S0
+{
+    public let f0 : Int16;
+    public let f1 : Int32;
+}
+
+@frozen
+public struct S9
+{
+    public let f0 : UInt32;
+    public let f1 : Int;
+    public let f2 : S9_S0;
+    public let f3 : UInt16;
+}
+
+public func swiftRetFunc9() -> S9 {
+    return S9(f0: 1223030410, f1: 4720638462358523954, f2: S9_S0(f0: 30631, f1: 1033774469), f3: 64474)
+}
+
+@frozen
+public struct S10
+{
+    public let f0 : Float;
+    public let f1 : Float;
+}
+
+public func swiftRetFunc10() -> S10 {
+    return S10(f0: 3276917, f1: 6694615)
+}
+
+@frozen
+public struct S11
+{
+    public let f0 : Double;
+    public let f1 : Int;
+    public let f2 : UInt32;
+    public let f3 : Int8;
+}
+
+public func swiftRetFunc11() -> S11 {
+    return S11(f0: 938206348036312, f1: 6559514243876905696, f2: 1357772248, f3: 59)
+}
+
+@frozen
+public struct S12
+{
+    public let f0 : Double;
+}
+
+public func swiftRetFunc12() -> S12 {
+    return S12(f0: 1580503485222363)
+}
+
+@frozen
+public struct S13
+{
+    public let f0 : UInt32;
+}
+
+public func swiftRetFunc13() -> S13 {
+    return S13(f0: 1381551558)
+}
+
+@frozen
+public struct S14_S0_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct S14_S0
+{
+    public let f0 : S14_S0_S0;
+}
+
+@frozen
+public struct S14
+{
+    public let f0 : Int32;
+    public let f1 : UInt16;
+    public let f2 : Int8;
+    public let f3 : Float;
+    public let f4 : UInt64;
+    public let f5 : S14_S0;
+    public let f6 : Int8;
+}
+
+public func swiftRetFunc14() -> S14 {
+    return S14(f0: 1765691191, f1: 56629, f2: 25, f3: 2944946, f4: 951929105049584033, f5: S14_S0(f0: S14_S0_S0(f0: -30)), f6: 66)
+}
+
+@frozen
+public struct S15_S0
+{
+    public let f0 : UInt;
+    public let f1 : Float;
+}
+
+@frozen
+public struct S15
+{
+    public let f0 : Int;
+    public let f1 : S15_S0;
+    public let f2 : UInt16;
+    public let f3 : Int32;
+}
+
+public func swiftRetFunc15() -> S15 {
+    return S15(f0: 2090703541638269172, f1: S15_S0(f0: 6408314016925514463, f1: 6534515), f2: 30438, f3: 1745811802)
+}
+
+@frozen
+public struct S16
+{
+    public let f0 : UInt32;
+    public let f1 : UInt64;
+    public let f2 : UInt8;
+    public let f3 : Int32;
+    public let f4 : UInt;
+    public let f5 : Int8;
+}
+
+public func swiftRetFunc16() -> S16 {
+    return S16(f0: 585220635, f1: 4034210936973794153, f2: 48, f3: 1155081155, f4: 806384837403045657, f5: 54)
+}
+
+@frozen
+public struct S17
+{
+    public let f0 : UInt8;
+    public let f1 : Int8;
+    public let f2 : UInt8;
+}
+
+public func swiftRetFunc17() -> S17 {
+    return S17(f0: 23, f1: 112, f2: 15)
+}
+
+@frozen
+public struct S18_S0
+{
+    public let f0 : UInt32;
+    public let f1 : Float;
+}
+
+@frozen
+public struct S18
+{
+    public let f0 : S18_S0;
+    public let f1 : Int;
+    public let f2 : Int32;
+    public let f3 : UInt16;
+    public let f4 : Int16;
+}
+
+public func swiftRetFunc18() -> S18 {
+    return S18(f0: S18_S0(f0: 1964425016, f1: 2767295), f1: 6016563774923595868, f2: 1648562735, f3: 378, f4: -20536)
+}
+
+@frozen
+public struct S19
+{
+    public let f0 : UInt8;
+    public let f1 : UInt16;
+    public let f2 : Float;
+    public let f3 : UInt64;
+    public let f4 : Int32;
+}
+
+public func swiftRetFunc19() -> S19 {
+    return S19(f0: 188, f1: 47167, f2: 6781297, f3: 8140268502944465472, f4: 708690468)
+}
+
+@frozen
+public struct S20_S0
+{
+    public let f0 : UInt32;
+    public let f1 : Float;
+}
+
+@frozen
+public struct S20
+{
+    public let f0 : S20_S0;
+    public let f1 : UInt8;
+}
+
+public func swiftRetFunc20() -> S20 {
+    return S20(f0: S20_S0(f0: 2019361333, f1: 938975), f1: 192)
+}
+
+@frozen
+public struct S21_S0_S0
+{
+    public let f0 : UInt16;
+}
+
+@frozen
+public struct S21_S0
+{
+    public let f0 : S21_S0_S0;
+}
+
+@frozen
+public struct S21
+{
+    public let f0 : Double;
+    public let f1 : Double;
+    public let f2 : UInt;
+    public let f3 : Int;
+    public let f4 : UInt64;
+    public let f5 : S21_S0;
+}
+
+public func swiftRetFunc21() -> S21 {
+    return S21(f0: 1693878073402490, f1: 3392111340517811, f2: 3584917502172813732, f3: 665495086154608745, f4: 2918107814961929578, f5: S21_S0(f0: S21_S0_S0(f0: 4634)))
+}
+
+@frozen
+public struct S22
+{
+    public let f0 : UInt32;
+}
+
+public func swiftRetFunc22() -> S22 {
+    return S22(f0: 640156952)
+}
+
+@frozen
+public struct S23
+{
+    public let f0 : UInt8;
+    public let f1 : Int16;
+    public let f2 : UInt64;
+    public let f3 : UInt;
+    public let f4 : UInt;
+    public let f5 : UInt64;
+    public let f6 : UInt8;
+}
+
+public func swiftRetFunc23() -> S23 {
+    return S23(f0: 122, f1: 28995, f2: 25673626033589541, f3: 828363978755325884, f4: 3065573182429720699, f5: 1484484917001276079, f6: 209)
+}
+
+@frozen
+public struct S24
+{
+    public let f0 : UInt64;
+    public let f1 : UInt64;
+}
+
+public func swiftRetFunc24() -> S24 {
+    return S24(f0: 2621245238416080387, f1: 6541787564638363256)
+}
+
+@frozen
+public struct S25_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct S25
+{
+    public let f0 : Int8;
+    public let f1 : Int8;
+    public let f2 : UInt8;
+    public let f3 : S25_S0;
+    public let f4 : UInt32;
+}
+
+public func swiftRetFunc25() -> S25 {
+    return S25(f0: 30, f1: -8, f2: 168, f3: S25_S0(f0: 7601538494489501573), f4: 814523741)
+}
+
+@frozen
+public struct S26
+{
+    public let f0 : Float;
+}
+
+public func swiftRetFunc26() -> S26 {
+    return S26(f0: 3681545)
+}
+
+@frozen
+public struct S27
+{
+    public let f0 : Int64;
+    public let f1 : Double;
+    public let f2 : Int8;
+    public let f3 : Int;
+    public let f4 : Int16;
+    public let f5 : Int64;
+}
+
+public func swiftRetFunc27() -> S27 {
+    return S27(f0: 4847421047018330189, f1: 3655171692392280, f2: 46, f3: 4476120319602257660, f4: -6106, f5: 5756567968111212829)
+}
+
+@frozen
+public struct S28_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct S28
+{
+    public let f0 : Float;
+    public let f1 : Int16;
+    public let f2 : S28_S0;
+    public let f3 : Double;
+    public let f4 : UInt64;
+}
+
+public func swiftRetFunc28() -> S28 {
+    return S28(f0: 3491512, f1: 5249, f2: S28_S0(f0: 1107064327388314), f3: 2170381648425673, f4: 5138313315157580943)
+}
+
+@frozen
+public struct S29
+{
+    public let f0 : UInt16;
+    public let f1 : UInt32;
+    public let f2 : Int16;
+    public let f3 : Int32;
+    public let f4 : Int32;
+    public let f5 : UInt64;
+    public let f6 : Int16;
+}
+
+public func swiftRetFunc29() -> S29 {
+    return S29(f0: 39000, f1: 408611655, f2: 18090, f3: 351857085, f4: 1103441843, f5: 5162040247631126074, f6: -27930)
+}
+
+@frozen
+public struct S30_S0
+{
+    public let f0 : Int8;
+    public let f1 : Int8;
+    public let f2 : Int32;
+}
+
+@frozen
+public struct S30_S1
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct S30
+{
+    public let f0 : Float;
+    public let f1 : S30_S0;
+    public let f2 : S30_S1;
+    public let f3 : Int64;
+}
+
+public func swiftRetFunc30() -> S30 {
+    return S30(f0: 6492602, f1: S30_S0(f0: 76, f1: -26, f2: 1777644423), f2: S30_S1(f0: 6558571), f3: 5879147675377398012)
+}
+
+@frozen
+public struct S31
+{
+    public let f0 : Int64;
+    public let f1 : UInt64;
+    public let f2 : UInt16;
+    public let f3 : UInt16;
+    public let f4 : Int8;
+}
+
+public func swiftRetFunc31() -> S31 {
+    return S31(f0: 4699402628739628277, f1: 7062790893852687562, f2: 28087, f3: 11088, f4: 69)
+}
+
+@frozen
+public struct S32
+{
+    public let f0 : Int32;
+    public let f1 : UInt64;
+    public let f2 : UInt64;
+    public let f3 : UInt32;
+    public let f4 : Int16;
+    public let f5 : UInt16;
+}
+
+public func swiftRetFunc32() -> S32 {
+    return S32(f0: 688805466, f1: 8860655326984381661, f2: 6943423675662271404, f3: 196368476, f4: 14229, f5: 34635)
+}
+
+@frozen
+public struct S33
+{
+    public let f0 : UInt16;
+    public let f1 : UInt32;
+    public let f2 : Int32;
+    public let f3 : UInt16;
+    public let f4 : Float;
+    public let f5 : UInt64;
+    public let f6 : Int;
+}
+
+public func swiftRetFunc33() -> S33 {
+    return S33(f0: 9297, f1: 7963252, f2: 556244690, f3: 19447, f4: 6930550, f5: 126294981263481729, f6: 2540579257616511618)
+}
+
+@frozen
+public struct S34
+{
+    public let f0 : Int64;
+    public let f1 : UInt32;
+    public let f2 : UInt64;
+}
+
+public func swiftRetFunc34() -> S34 {
+    return S34(f0: 5845561428743737556, f1: 1358941228, f2: 3701080255861218446)
+}
+
+@frozen
+public struct S35
+{
+    public let f0 : Float;
+    public let f1 : Float;
+    public let f2 : Int64;
+    public let f3 : UInt8;
+    public let f4 : Double;
+    public let f5 : UInt16;
+}
+
+public func swiftRetFunc35() -> S35 {
+    return S35(f0: 5982956, f1: 3675164, f2: 229451138397478297, f3: 163, f4: 2925293762193390, f5: 5018)
+}
+
+@frozen
+public struct S36
+{
+    public let f0 : Int32;
+    public let f1 : Int64;
+    public let f2 : UInt64;
+}
+
+public func swiftRetFunc36() -> S36 {
+    return S36(f0: 1915776502, f1: 2197655909333830531, f2: 6072941592567177049)
+}
+
+@frozen
+public struct S37
+{
+    public let f0 : UInt8;
+    public let f1 : Double;
+}
+
+public func swiftRetFunc37() -> S37 {
+    return S37(f0: 18, f1: 4063164371882658)
+}
+
+@frozen
+public struct S38
+{
+    public let f0 : UInt;
+    public let f1 : Int64;
+    public let f2 : UInt8;
+    public let f3 : UInt;
+}
+
+public func swiftRetFunc38() -> S38 {
+    return S38(f0: 7389960750529773276, f1: 2725802169582362061, f2: 2, f3: 3659261019360356514)
+}
+
+@frozen
+public struct S39
+{
+    public let f0 : Int32;
+    public let f1 : Int32;
+    public let f2 : Int;
+    public let f3 : Int16;
+    public let f4 : UInt16;
+}
+
+public func swiftRetFunc39() -> S39 {
+    return S39(f0: 50995691, f1: 1623216479, f2: 2906650346451599789, f3: 28648, f4: 8278)
+}
+
+@frozen
+public struct S40_S0
+{
+    public let f0 : Float;
+    public let f1 : UInt8;
+    public let f2 : Int8;
+    public let f3 : UInt;
+    public let f4 : Double;
+}
+
+@frozen
+public struct S40
+{
+    public let f0 : S40_S0;
+    public let f1 : Int16;
+    public let f2 : Int16;
+}
+
+public func swiftRetFunc40() -> S40 {
+    return S40(f0: S40_S0(f0: 7087264, f1: 37, f2: -5, f3: 479915249821490487, f4: 144033730096589), f1: 28654, f2: 16398)
+}
+
+@frozen
+public struct S41
+{
+    public let f0 : UInt;
+    public let f1 : UInt;
+}
+
+public func swiftRetFunc41() -> S41 {
+    return S41(f0: 7923718819069382599, f1: 1539666179674725957)
+}
+
+@frozen
+public struct S42_S0
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct S42
+{
+    public let f0 : UInt32;
+    public let f1 : Int64;
+    public let f2 : S42_S0;
+    public let f3 : UInt;
+}
+
+public func swiftRetFunc42() -> S42 {
+    return S42(f0: 1046060439, f1: 8249831314190867613, f2: S42_S0(f0: 1097582349), f3: 2864677262092469436)
+}
+
+@frozen
+public struct S43_S0_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct S43_S0
+{
+    public let f0 : S43_S0_S0;
+}
+
+@frozen
+public struct S43
+{
+    public let f0 : S43_S0;
+    public let f1 : Int8;
+}
+
+public func swiftRetFunc43() -> S43 {
+    return S43(f0: S43_S0(f0: S43_S0_S0(f0: 1586338)), f1: 104)
+}
+
+@frozen
+public struct S44
+{
+    public let f0 : UInt8;
+    public let f1 : Int32;
+    public let f2 : Int;
+    public let f3 : UInt32;
+}
+
+public func swiftRetFunc44() -> S44 {
+    return S44(f0: 94, f1: 1109076022, f2: 3135595850598607828, f3: 760084013)
+}
+
+@frozen
+public struct S45_S0
+{
+    public let f0 : Int64;
+}
+
+@frozen
+public struct S45
+{
+    public let f0 : Int16;
+    public let f1 : UInt64;
+    public let f2 : Int;
+    public let f3 : S45_S0;
+}
+
+public func swiftRetFunc45() -> S45 {
+    return S45(f0: 3071, f1: 5908138438609341766, f2: 5870206722419946629, f3: S45_S0(f0: 8128455876189744801))
+}
+
+@frozen
+public struct S46
+{
+    public let f0 : Int16;
+    public let f1 : Int8;
+    public let f2 : Int8;
+    public let f3 : UInt32;
+    public let f4 : UInt8;
+    public let f5 : Int32;
+}
+
+public func swiftRetFunc46() -> S46 {
+    return S46(f0: 14794, f1: 60, f2: -77, f3: 653898879, f4: 224, f5: 266602433)
+}
+
+@frozen
+public struct S47_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct S47
+{
+    public let f0 : Double;
+    public let f1 : S47_S0;
+}
+
+public func swiftRetFunc47() -> S47 {
+    return S47(f0: 3195976594911793, f1: S47_S0(f0: -91))
+}
+
+@frozen
+public struct S48
+{
+    public let f0 : Int;
+}
+
+public func swiftRetFunc48() -> S48 {
+    return S48(f0: 778504172538154682)
+}
+
+@frozen
+public struct S49_S0_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct S49_S0
+{
+    public let f0 : S49_S0_S0;
+}
+
+@frozen
+public struct S49
+{
+    public let f0 : UInt64;
+    public let f1 : S49_S0;
+    public let f2 : Int8;
+    public let f3 : Double;
+    public let f4 : UInt32;
+    public let f5 : UInt32;
+}
+
+public func swiftRetFunc49() -> S49 {
+    return S49(f0: 4235011519458710874, f1: S49_S0(f0: S49_S0_S0(f0: 3120420438742285733)), f2: -8, f3: 1077419570643725, f4: 1985303212, f5: 264580506)
+}
+
+@frozen
+public struct S50
+{
+    public let f0 : Int32;
+}
+
+public func swiftRetFunc50() -> S50 {
+    return S50(f0: 1043912405)
+}
+
+@frozen
+public struct S51_S0_S0_S0
+{
+    public let f0 : Float;
+}
+
+@frozen
+public struct S51_S0_S0
+{
+    public let f0 : S51_S0_S0_S0;
+    public let f1 : Int16;
+}
+
+@frozen
+public struct S51_S0
+{
+    public let f0 : Double;
+    public let f1 : S51_S0_S0;
+    public let f2 : UInt8;
+    public let f3 : Int64;
+}
+
+@frozen
+public struct S51
+{
+    public let f0 : S51_S0;
+    public let f1 : Double;
+}
+
+public func swiftRetFunc51() -> S51 {
+    return S51(f0: S51_S0(f0: 3266680719186600, f1: S51_S0_S0(f0: S51_S0_S0_S0(f0: 428247), f1: -24968), f2: 76, f3: 183022772513065490), f1: 2661928101793033)
+}
+
+@frozen
+public struct S52
+{
+    public let f0 : UInt32;
+    public let f1 : Int64;
+    public let f2 : UInt32;
+    public let f3 : UInt64;
+    public let f4 : Int;
+    public let f5 : Int8;
+}
+
+public func swiftRetFunc52() -> S52 {
+    return S52(f0: 1812191671, f1: 6594574760089190928, f2: 831147243, f3: 3301835731003365248, f4: 5382332538247340743, f5: -77)
+}
+
+@frozen
+public struct S53_S0
+{
+    public let f0 : Int8;
+    public let f1 : UInt;
+}
+
+@frozen
+public struct S53
+{
+    public let f0 : S53_S0;
+    public let f1 : Int32;
+    public let f2 : Int64;
+    public let f3 : Float;
+    public let f4 : Int8;
+}
+
+public func swiftRetFunc53() -> S53 {
+    return S53(f0: S53_S0(f0: -123, f1: 3494916243607193741), f1: 1406699798, f2: 4018943158751734338, f3: 1084415, f4: -8)
+}
+
+@frozen
+public struct S54_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct S54
+{
+    public let f0 : Int;
+    public let f1 : Int;
+    public let f2 : S54_S0;
+    public let f3 : Int64;
+}
+
+public func swiftRetFunc54() -> S54 {
+    return S54(f0: 8623517456704997133, f1: 1521939500434086364, f2: S54_S0(f0: 3472783299414218), f3: 4761507229870258916)
+}
+
+@frozen
+public struct S55
+{
+    public let f0 : Int16;
+    public let f1 : UInt32;
+    public let f2 : Int64;
+    public let f3 : UInt32;
+    public let f4 : Int8;
+    public let f5 : UInt8;
+}
+
+public func swiftRetFunc55() -> S55 {
+    return S55(f0: -28051, f1: 1759912152, f2: 2038322238348454200, f3: 601094102, f4: 5, f5: 75)
+}
+
+@frozen
+public struct S56
+{
+    public let f0 : UInt64;
+    public let f1 : Float;
+    public let f2 : Int8;
+    public let f3 : Int32;
+}
+
+public func swiftRetFunc56() -> S56 {
+    return S56(f0: 6313168909786453069, f1: 6254558, f2: 115, f3: 847834891)
+}
+
+@frozen
+public struct S57
+{
+    public let f0 : UInt;
+    public let f1 : Int16;
+    public let f2 : Int8;
+    public let f3 : Int32;
+}
+
+public func swiftRetFunc57() -> S57 {
+    return S57(f0: 546304219852233452, f1: -27416, f2: 47, f3: 1094575684)
+}
+
+@frozen
+public struct S58
+{
+    public let f0 : UInt64;
+    public let f1 : UInt64;
+}
+
+public func swiftRetFunc58() -> S58 {
+    return S58(f0: 4612004722568513699, f1: 2222525519606580195)
+}
+
+@frozen
+public struct S59
+{
+    public let f0 : Int8;
+    public let f1 : UInt;
+    public let f2 : Int;
+    public let f3 : Int8;
+    public let f4 : Int64;
+    public let f5 : UInt8;
+}
+
+public func swiftRetFunc59() -> S59 {
+    return S59(f0: -92, f1: 7281011081566942937, f2: 8203439771560005792, f3: 103, f4: 1003386607251132236, f5: 6)
+}
+
+@frozen
+public struct S60
+{
+    public let f0 : UInt64;
+    public let f1 : Int;
+}
+
+public func swiftRetFunc60() -> S60 {
+    return S60(f0: 6922353269487057763, f1: 103032455997325768)
+}
+
+@frozen
+public struct S61_S0
+{
+    public let f0 : Int64;
+    public let f1 : Int64;
+    public let f2 : Float;
+}
+
+@frozen
+public struct S61
+{
+    public let f0 : UInt64;
+    public let f1 : S61_S0;
+    public let f2 : Int16;
+    public let f3 : Int32;
+}
+
+public func swiftRetFunc61() -> S61 {
+    return S61(f0: 3465845922566501572, f1: S61_S0(f0: 8266662359091888314, f1: 7511705648638703076, f2: 535470), f2: -5945, f3: 523043523)
+}
+
+@frozen
+public struct S62_S0_S0
+{
+    public let f0 : Int;
+}
+
+@frozen
+public struct S62_S0
+{
+    public let f0 : UInt16;
+    public let f1 : Int16;
+    public let f2 : UInt16;
+    public let f3 : S62_S0_S0;
+}
+
+@frozen
+public struct S62
+{
+    public let f0 : S62_S0;
+    public let f1 : Int;
+    public let f2 : UInt16;
+}
+
+public func swiftRetFunc62() -> S62 {
+    return S62(f0: S62_S0(f0: 50789, f1: 30245, f2: 35063, f3: S62_S0_S0(f0: 3102684963408623932)), f1: 792877586576090769, f2: 24697)
+}
+
+@frozen
+public struct S63
+{
+    public let f0 : Double;
+    public let f1 : Int;
+    public let f2 : Double;
+    public let f3 : Int8;
+    public let f4 : Float;
+}
+
+public func swiftRetFunc63() -> S63 {
+    return S63(f0: 4097323000009314, f1: 4162427097168837193, f2: 140736061437152, f3: -59, f4: 7331757)
+}
+
+@frozen
+public struct S64_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct S64
+{
+    public let f0 : S64_S0;
+    public let f1 : UInt64;
+    public let f2 : Int64;
+    public let f3 : Int;
+}
+
+public func swiftRetFunc64() -> S64 {
+    return S64(f0: S64_S0(f0: 2624461610177878495), f1: 5222178027019975511, f2: 9006949357929457355, f3: 7966680593035770540)
+}
+
+@frozen
+public struct S65
+{
+    public let f0 : Int;
+    public let f1 : Double;
+    public let f2 : UInt16;
+    public let f3 : Int16;
+    public let f4 : UInt8;
+    public let f5 : Int32;
+    public let f6 : UInt64;
+}
+
+public func swiftRetFunc65() -> S65 {
+    return S65(f0: 6080968957098434687, f1: 3067343828504927, f2: 56887, f3: 804, f4: 235, f5: 121742660, f6: 9218677163034827308)
+}
+
+@frozen
+public struct S66
+{
+    public let f0 : Int8;
+    public let f1 : UInt64;
+    public let f2 : UInt32;
+    public let f3 : UInt64;
+    public let f4 : UInt64;
+}
+
+public func swiftRetFunc66() -> S66 {
+    return S66(f0: -16, f1: 7967447403042597794, f2: 2029697750, f3: 4180031087394830849, f4: 5847795120921557969)
+}
+
+@frozen
+public struct S67_S0
+{
+    public let f0 : UInt64;
+}
+
+@frozen
+public struct S67
+{
+    public let f0 : S67_S0;
+    public let f1 : UInt8;
+    public let f2 : UInt16;
+    public let f3 : UInt64;
+    public let f4 : UInt64;
+    public let f5 : Int8;
+}
+
+public func swiftRetFunc67() -> S67 {
+    return S67(f0: S67_S0(f0: 4844204675254434929), f1: 135, f2: 13969, f3: 4897129719050177731, f4: 7233638107485862921, f5: -11)
+}
+
+@frozen
+public struct S68_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct S68
+{
+    public let f0 : Int32;
+    public let f1 : UInt64;
+    public let f2 : UInt32;
+    public let f3 : S68_S0;
+    public let f4 : Int32;
+    public let f5 : Int8;
+}
+
+public func swiftRetFunc68() -> S68 {
+    return S68(f0: 1708606840, f1: 1768121573985581212, f2: 1033319213, f3: S68_S0(f0: 2741322436867931), f4: 955320338, f5: 12)
+}
+
+@frozen
+public struct S69
+{
+    public let f0 : UInt32;
+}
+
+public func swiftRetFunc69() -> S69 {
+    return S69(f0: 2092746473)
+}
+
+@frozen
+public struct S70
+{
+    public let f0 : UInt8;
+    public let f1 : Float;
+}
+
+public func swiftRetFunc70() -> S70 {
+    return S70(f0: 76, f1: 4138467)
+}
+
+@frozen
+public struct S71_S0
+{
+    public let f0 : Int8;
+    public let f1 : UInt64;
+    public let f2 : Int64;
+}
+
+@frozen
+public struct S71
+{
+    public let f0 : S71_S0;
+    public let f1 : UInt8;
+    public let f2 : UInt8;
+}
+
+public func swiftRetFunc71() -> S71 {
+    return S71(f0: S71_S0(f0: -98, f1: 8603744544763953916, f2: 8460721064583106347), f1: 10, f2: 88)
+}
+
+@frozen
+public struct S72
+{
+    public let f0 : UInt32;
+}
+
+public func swiftRetFunc72() -> S72 {
+    return S72(f0: 2021509367)
+}
+
+@frozen
+public struct S73
+{
+    public let f0 : Int;
+    public let f1 : Int16;
+    public let f2 : UInt64;
+    public let f3 : Float;
+    public let f4 : Int32;
+    public let f5 : UInt;
+    public let f6 : UInt;
+}
+
+public func swiftRetFunc73() -> S73 {
+    return S73(f0: 6222563427944465437, f1: 28721, f2: 1313300783845289148, f3: 6761, f4: 2074171265, f5: 6232209228889209160, f6: 1423931135184844265)
+}
+
+@frozen
+public struct S74
+{
+    public let f0 : Int16;
+    public let f1 : Float;
+    public let f2 : Double;
+    public let f3 : UInt16;
+    public let f4 : Int8;
+}
+
+public func swiftRetFunc74() -> S74 {
+    return S74(f0: 27115, f1: 1416098, f2: 4468576755457331, f3: 58864, f4: 81)
+}
+
+@frozen
+public struct S75_S0_S0
+{
+    public let f0 : Int8;
+}
+
+@frozen
+public struct S75_S0
+{
+    public let f0 : S75_S0_S0;
+    public let f1 : UInt8;
+}
+
+@frozen
+public struct S75
+{
+    public let f0 : UInt64;
+    public let f1 : S75_S0;
+    public let f2 : UInt8;
+}
+
+public func swiftRetFunc75() -> S75 {
+    return S75(f0: 8532911974860912350, f1: S75_S0(f0: S75_S0_S0(f0: -60), f1: 66), f2: 200)
+}
+
+@frozen
+public struct S76_S0_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct S76_S0
+{
+    public let f0 : Int8;
+    public let f1 : UInt64;
+    public let f2 : S76_S0_S0;
+    public let f3 : Double;
+}
+
+@frozen
+public struct S76
+{
+    public let f0 : UInt8;
+    public let f1 : S76_S0;
+    public let f2 : Double;
+}
+
+public func swiftRetFunc76() -> S76 {
+    return S76(f0: 69, f1: S76_S0(f0: -29, f1: 4872234474620951743, f2: S76_S0_S0(f0: 11036), f3: 585486652063917), f2: 2265391710186639)
+}
+
+@frozen
+public struct S77
+{
+    public let f0 : Int32;
+    public let f1 : Int32;
+    public let f2 : Int32;
+    public let f3 : UInt32;
+    public let f4 : Int16;
+}
+
+public func swiftRetFunc77() -> S77 {
+    return S77(f0: 4495211, f1: 1364377405, f2: 773989694, f3: 1121696315, f4: 7589)
+}
+
+@frozen
+public struct S78
+{
+    public let f0 : UInt32;
+    public let f1 : UInt;
+}
+
+public func swiftRetFunc78() -> S78 {
+    return S78(f0: 1767839225, f1: 7917317019379224114)
+}
+
+@frozen
+public struct S79_S0
+{
+    public let f0 : Double;
+    public let f1 : UInt32;
+    public let f2 : Int32;
+}
+
+@frozen
+public struct S79
+{
+    public let f0 : S79_S0;
+    public let f1 : UInt8;
+    public let f2 : Double;
+}
+
+public func swiftRetFunc79() -> S79 {
+    return S79(f0: S79_S0(f0: 495074072703635, f1: 417605286, f2: 171326442), f1: 203, f2: 2976663235490421)
+}
+
+@frozen
+public struct S80
+{
+    public let f0 : Int32;
+    public let f1 : Int16;
+    public let f2 : Int8;
+}
+
+public func swiftRetFunc80() -> S80 {
+    return S80(f0: 999559959, f1: 19977, f2: -4)
+}
+
+@frozen
+public struct S81_S0
+{
+    public let f0 : UInt;
+}
+
+@frozen
+public struct S81
+{
+    public let f0 : Int32;
+    public let f1 : S81_S0;
+    public let f2 : Float;
+    public let f3 : Int64;
+    public let f4 : UInt32;
+    public let f5 : UInt8;
+    public let f6 : Int16;
+}
+
+public func swiftRetFunc81() -> S81 {
+    return S81(f0: 452603110, f1: S81_S0(f0: 6240652733420985265), f2: 6469988, f3: 5775316279348621124, f4: 1398033592, f5: 105, f6: 21937)
+}
+
+@frozen
+public struct S82
+{
+    public let f0 : Int;
+}
+
+public func swiftRetFunc82() -> S82 {
+    return S82(f0: 6454754584537364459)
+}
+
+@frozen
+public struct S83
+{
+    public let f0 : UInt64;
+    public let f1 : UInt32;
+    public let f2 : Float;
+    public let f3 : UInt8;
+    public let f4 : Float;
+}
+
+public func swiftRetFunc83() -> S83 {
+    return S83(f0: 2998238441521688907, f1: 9623946, f2: 2577885, f3: 156, f4: 6678807)
+}
+
+@frozen
+public struct S84_S0
+{
+    public let f0 : Int16;
+}
+
+@frozen
+public struct S84
+{
+    public let f0 : S84_S0;
+}
+
+public func swiftRetFunc84() -> S84 {
+    return S84(f0: S84_S0(f0: 16213))
+}
+
+@frozen
+public struct S85_S0
+{
+    public let f0 : Int16;
+    public let f1 : Int8;
+}
+
+@frozen
+public struct S85
+{
+    public let f0 : Int64;
+    public let f1 : UInt8;
+    public let f2 : S85_S0;
+    public let f3 : Float;
+    public let f4 : Int;
+}
+
+public func swiftRetFunc85() -> S85 {
+    return S85(f0: 8858924985061791416, f1: 200, f2: S85_S0(f0: 4504, f1: 60), f3: 5572917, f4: 6546369836182556538)
+}
+
+@frozen
+public struct S86
+{
+    public let f0 : UInt16;
+    public let f1 : Float;
+    public let f2 : UInt32;
+}
+
+public func swiftRetFunc86() -> S86 {
+    return S86(f0: 22762, f1: 4672435, f2: 719927700)
+}
+
+@frozen
+public struct S87
+{
+    public let f0 : Int32;
+    public let f1 : UInt;
+    public let f2 : UInt64;
+}
+
+public func swiftRetFunc87() -> S87 {
+    return S87(f0: 361750184, f1: 4206825694012787823, f2: 2885153391732919282)
+}
+
+@frozen
+public struct S88
+{
+    public let f0 : UInt32;
+    public let f1 : Int16;
+    public let f2 : UInt32;
+}
+
+public func swiftRetFunc88() -> S88 {
+    return S88(f0: 2125094198, f1: -10705, f2: 182007583)
+}
+
+@frozen
+public struct S89
+{
+    public let f0 : UInt8;
+    public let f1 : UInt32;
+    public let f2 : Int32;
+    public let f3 : Int8;
+    public let f4 : Int64;
+}
+
+public func swiftRetFunc89() -> S89 {
+    return S89(f0: 175, f1: 1062985476, f2: 1019006263, f3: -22, f4: 6888877252788498422)
+}
+
+@frozen
+public struct S90
+{
+    public let f0 : UInt8;
+    public let f1 : Int32;
+    public let f2 : Int16;
+    public let f3 : Int;
+    public let f4 : UInt32;
+    public let f5 : UInt32;
+    public let f6 : Int64;
+}
+
+public func swiftRetFunc90() -> S90 {
+    return S90(f0: 221, f1: 225825436, f2: -26231, f3: 5122880520199505508, f4: 907657092, f5: 707089277, f6: 6091814344013414920)
+}
+
+@frozen
+public struct S91
+{
+    public let f0 : Double;
+    public let f1 : Int8;
+    public let f2 : Int8;
+    public let f3 : UInt32;
+    public let f4 : Int;
+    public let f5 : Int8;
+    public let f6 : Int16;
+}
+
+public func swiftRetFunc91() -> S91 {
+    return S91(f0: 3265110225161261, f1: 62, f2: -38, f3: 946023589, f4: 4109819715069879890, f5: -73, f6: 20363)
+}
+
+@frozen
+public struct S92_S0
+{
+    public let f0 : Float;
+    public let f1 : Int64;
+}
+
+@frozen
+public struct S92
+{
+    public let f0 : Int64;
+    public let f1 : UInt;
+    public let f2 : S92_S0;
+    public let f3 : Int32;
+    public let f4 : Float;
+    public let f5 : Float;
+}
+
+public func swiftRetFunc92() -> S92 {
+    return S92(f0: 3230438394207610137, f1: 3003396252681176136, f2: S92_S0(f0: 6494422, f1: 2971773224350614312), f3: 2063694141, f4: 3117041, f5: 1003760)
+}
+
+@frozen
+public struct S93
+{
+    public let f0 : Int;
+    public let f1 : UInt8;
+    public let f2 : UInt32;
+    public let f3 : UInt32;
+    public let f4 : UInt64;
+}
+
+public func swiftRetFunc93() -> S93 {
+    return S93(f0: 5170226481546239050, f1: 11, f2: 1120259582, f3: 1947849905, f4: 3690113387392112192)
+}
+
+@frozen
+public struct S94
+{
+    public let f0 : UInt16;
+    public let f1 : Double;
+    public let f2 : Int16;
+    public let f3 : Double;
+    public let f4 : UInt64;
+}
+
+public func swiftRetFunc94() -> S94 {
+    return S94(f0: 57111, f1: 1718940123307098, f2: -16145, f3: 1099321301986326, f4: 2972912419231960385)
+}
+
+@frozen
+public struct S95_S0
+{
+    public let f0 : Double;
+}
+
+@frozen
+public struct S95
+{
+    public let f0 : Int16;
+    public let f1 : S95_S0;
+    public let f2 : UInt64;
+}
+
+public func swiftRetFunc95() -> S95 {
+    return S95(f0: 12620, f1: S95_S0(f0: 3232445258308074), f2: 97365157264460373)
+}
+
+@frozen
+public struct S96
+{
+    public let f0 : Int8;
+    public let f1 : Double;
+    public let f2 : UInt64;
+    public let f3 : UInt64;
+    public let f4 : Int32;
+    public let f5 : Int64;
+}
+
+public func swiftRetFunc96() -> S96 {
+    return S96(f0: 3, f1: 242355060906873, f2: 3087879465791321798, f3: 7363229136420263380, f4: 46853328, f5: 4148307028758236491)
+}
+
+@frozen
+public struct S97
+{
+    public let f0 : UInt16;
+    public let f1 : Int32;
+    public let f2 : UInt16;
+    public let f3 : UInt32;
+}
+
+public func swiftRetFunc97() -> S97 {
+    return S97(f0: 10651, f1: 2068379463, f2: 57307, f3: 329271020)
+}
+
+@frozen
+public struct S98
+{
+    public let f0 : Double;
+    public let f1 : Int32;
+    public let f2 : Int64;
+    public let f3 : Int;
+    public let f4 : Float;
+    public let f5 : Double;
+}
+
+public func swiftRetFunc98() -> S98 {
+    return S98(f0: 2250389231883613, f1: 1755058358, f2: 6686142382639170849, f3: 6456632014163315773, f4: 2818253, f5: 1085859434505817)
+}
+
+@frozen
+public struct S99_S0
+{
+    public let f0 : Int32;
+}
+
+@frozen
+public struct S99
+{
+    public let f0 : S99_S0;
+    public let f1 : Float;
+}
+
+public func swiftRetFunc99() -> S99 {
+    return S99(f0: S99_S0(f0: 1117297545), f1: 1539294)
+}
+
diff --git a/src/tests/Interop/Swift/SwiftSelfContext/SwiftSelfContext.csproj b/src/tests/Interop/Swift/SwiftSelfContext/SwiftSelfContext.csproj
index a57cd84cf884..89eda99352fd 100644
--- a/src/tests/Interop/Swift/SwiftSelfContext/SwiftSelfContext.csproj
+++ b/src/tests/Interop/Swift/SwiftSelfContext/SwiftSelfContext.csproj
@@ -3,7 +3,7 @@
     <!-- Needed for CLRTestTargetUnsupported, CMakeProjectReference -->
     <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/93631 -->
+    <!-- Swift interop is supported on Apple platforms only -->
     <CLRTestTargetUnsupported Condition="'$(TargetsOSX)' != 'true' and '$(TargetsAppleMobile)' != 'true'">true</CLRTestTargetUnsupported>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
index eaf7f2fa1a9d..65c5118060f3 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp
@@ -17,8 +17,6 @@ typedef enum {
     CONVERT_SENTINEL,
     CONVERT_SATURATING,
     CONVERT_NATIVECOMPILERBEHAVIOR,
-    CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64,
-    CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32,
 } FPtoIntegerConversionType;
 
 extern "C" DLLEXPORT int32_t ConvertDoubleToInt32(double x, FPtoIntegerConversionType t)
@@ -30,11 +28,9 @@ extern "C" DLLEXPORT int32_t ConvertDoubleToInt32(double x, FPtoIntegerConversio
 
     switch (t) {
     case CONVERT_BACKWARD_COMPATIBLE:
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
     case CONVERT_SENTINEL:
         return ((x != x) || (x < INT32_MIN) || (x > INT32_MAX)) ? INT32_MIN : (int32_t)x;
 
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
     case CONVERT_SATURATING:
         return (x != x) ? 0 : (x < INT32_MIN) ? INT32_MIN : (x > INT32_MAX) ? INT32_MAX : (int32_t)x;
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
@@ -53,14 +49,12 @@ extern "C" DLLEXPORT uint32_t ConvertDoubleToUInt32(double x, FPtoIntegerConvers
     const double int64_max_plus_1 = 0x1.p63; // 0x43e0000000000000 // (uint64_t)INT64_MAX + 1;
 
     switch (t) {
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
     case CONVERT_BACKWARD_COMPATIBLE:
         return ((x != x) || (x < INT64_MIN) || (x >= int64_max_plus_1)) ? 0 : (uint32_t)(int64_t)x;
 
     case CONVERT_SENTINEL:
         return ((x != x) || (x < 0) || (x > UINT32_MAX)) ? UINT32_MAX  : (uint32_t)x;
 
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
     case CONVERT_SATURATING:
         return ((x != x) || (x < 0)) ? 0 : (x > UINT32_MAX) ? UINT32_MAX : (uint32_t)x;
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
@@ -70,14 +64,6 @@ extern "C" DLLEXPORT uint32_t ConvertDoubleToUInt32(double x, FPtoIntegerConvers
     return 0;
 }
 
-static uint64_t CppNativeArm32ConvertDoubleToUInt64(double y)
-{
-    const double uintmax_plus_1 = -2.0 * (double)INT32_MIN;
-    uint32_t hi32Bits = ConvertDoubleToUInt32(y / uintmax_plus_1, CONVERT_SATURATING);
-    uint32_t lo32Bits = ConvertDoubleToUInt32(y - (((double)hi32Bits) * uintmax_plus_1), CONVERT_SATURATING);
-    return (((uint64_t)hi32Bits) << 32) + lo32Bits;
-}
-
 extern "C" DLLEXPORT int64_t ConvertDoubleToInt64(double x, FPtoIntegerConversionType t)
 {
     if (t == CONVERT_NATIVECOMPILERBEHAVIOR)
@@ -95,21 +81,10 @@ extern "C" DLLEXPORT int64_t ConvertDoubleToInt64(double x, FPtoIntegerConversio
     const double int32_max_plus1 = ((double)INT32_MAX) + 1;
 
     switch (t) {
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
     case CONVERT_BACKWARD_COMPATIBLE:
     case CONVERT_SENTINEL:
         return ((x != x) || (x < INT64_MIN) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x;
 
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
-        if (x > 0)
-        {
-            return (int64_t)CppNativeArm32ConvertDoubleToUInt64(x);
-        }
-        else
-        {
-            return -(int64_t)CppNativeArm32ConvertDoubleToUInt64(-x);
-        }
-
     case CONVERT_SATURATING:
         return (x != x) ? 0 : (x < INT64_MIN) ? INT64_MIN : (x >= int64_max_plus_1) ? INT64_MAX : (int64_t)x;
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
@@ -142,29 +117,6 @@ extern "C" DLLEXPORT  uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver
     case CONVERT_SATURATING:
         return ((x != x) || (x < 0)) ? 0 : (x >= uint64_max_plus_1) ? UINT64_MAX : (uint64_t)x;
 
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
-        {
-            if (x < int64_max_plus_1)
-            {
-                return (uint64_t)ConvertDoubleToInt64(x, CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32);
-            }
-            else
-            {
-                return (uint64_t)ConvertDoubleToInt64(x - int64_max_plus_1, CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000);
-            }
-        }
-
-    case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
-        if (x < int64_max_plus_1)
-        {
-            return (x < INT64_MIN) ? (uint64_t)INT64_MIN : (uint64_t)(int64_t)x;
-        }
-        else
-        {
-            x -= int64_max_plus_1;
-            x = trunc(x);
-            return (uint64_t)(((x != x) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x) + (0x8000000000000000);
-        }
     case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning
         return 0;
     }
diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
index 5b78783c09e4..d78daddcd838 100644
--- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
+++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs
@@ -19,8 +19,6 @@ public enum FPtoIntegerConversionType
         CONVERT_SENTINEL,
         CONVERT_SATURATING,
         CONVERT_NATIVECOMPILERBEHAVIOR,
-        CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64,
-        CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32,
     }
 
     public enum ConversionType
@@ -87,13 +85,11 @@ public static int ConvertDoubleToInt32(double x, FPtoIntegerConversionType t)
 
             switch (t)
             {
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
                 case FPtoIntegerConversionType.CONVERT_BACKWARD_COMPATIBLE:
                 case FPtoIntegerConversionType.CONVERT_SENTINEL:
                     return (Double.IsNaN(x) || (x<int.MinValue) || (x > int.MaxValue)) ? int.MinValue: (int) x;
 
                 case FPtoIntegerConversionType.CONVERT_SATURATING:
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
                     return Double.IsNaN(x) ? 0 : (x< int.MinValue) ? int.MinValue : (x > int.MaxValue) ? int.MaxValue : (int) x;
             }
             return 0;
@@ -109,7 +105,6 @@ public static uint ConvertDoubleToUInt32(double x, FPtoIntegerConversionType t)
 
             switch (t)
             {
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
                 case FPtoIntegerConversionType.CONVERT_BACKWARD_COMPATIBLE:
                     return (Double.IsNaN(x) || (x < long.MinValue) || (x >= llong_max_plus_1)) ? 0 : (uint)(long)x;
 
@@ -117,7 +112,6 @@ public static uint ConvertDoubleToUInt32(double x, FPtoIntegerConversionType t)
                     return (Double.IsNaN(x) || (x < 0) || (x > uint.MaxValue)) ? uint.MaxValue : (uint)x;
 
                 case FPtoIntegerConversionType.CONVERT_SATURATING:
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
                     return (Double.IsNaN(x) || (x < 0)) ? 0 : (x > uint.MaxValue) ? uint.MaxValue : (uint)x;
             }
 
@@ -136,34 +130,15 @@ public static long ConvertDoubleToInt64(double x, FPtoIntegerConversionType t)
 
             switch (t)
             {
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
                 case FPtoIntegerConversionType.CONVERT_BACKWARD_COMPATIBLE:
                 case FPtoIntegerConversionType.CONVERT_SENTINEL:
                     return (Double.IsNaN(x) || (x < long.MinValue) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x;
 
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
-                    if (x > 0)
-                    {
-                        return (long)CppNativeArm32ConvertDoubleToUInt64(x);
-                    }
-                    else
-                    {
-                        return -(long)CppNativeArm32ConvertDoubleToUInt64(-x);
-                    }
-
                 case FPtoIntegerConversionType.CONVERT_SATURATING:
                     return Double.IsNaN(x) ? 0 : (x < long.MinValue) ? long.MinValue : (x >= llong_max_plus_1) ? long.MaxValue : (long)x;
             }
 
             return 0;
-
-            static ulong CppNativeArm32ConvertDoubleToUInt64(double y)
-            {
-                const double uintmax_plus_1 = -2.0 * (double)int.MinValue;
-                uint hi32Bits = ConvertDoubleToUInt32(y / uintmax_plus_1, FPtoIntegerConversionType.CONVERT_SATURATING);
-                uint lo32Bits = ConvertDoubleToUInt32(y - (((double)hi32Bits) * uintmax_plus_1), FPtoIntegerConversionType.CONVERT_SATURATING);
-                return (((ulong)hi32Bits) << (int)32) + lo32Bits;
-            }
         }
 
         public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
@@ -187,33 +162,6 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t)
 
                 case FPtoIntegerConversionType.CONVERT_SATURATING:
                     return (Double.IsNaN(x) || (x < 0)) ? 0 : (x >= ullong_max_plus_1) ? ulong.MaxValue : (ulong)x;
-
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32:
-                    {
-                        if (x < two63)
-                        {
-                            return (ulong)ConvertDoubleToInt64(x, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32);
-                        }
-                        else
-                        {
-                            return (ulong)ConvertDoubleToInt64(x - two63, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000);
-                        }
-                    }
-
-                case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64:
-
-                    if (x < two63)
-                    {
-                        return (x < long.MinValue) ? unchecked((ulong)long.MinValue) : (ulong)(long)x;
-                    }
-                    else
-                    {
-                        // (double)LLONG_MAX cannot be represented exactly as double
-                        const double llong_max_plus_1 = (double)((ulong)long.MaxValue + 1);
-                        x -= two63;
-                        x = Math.Truncate(x);
-                        return (ulong)((Double.IsNaN(x) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x) + (0x8000000000000000);
-                    }
             }
 
             return 0;
@@ -263,7 +211,7 @@ public static Vector<ulong> ConvertToVectorUInt64(Vector<double> vFloat, FPtoInt
     public class Program
     {
         static int failures = 0;
-        static FPtoIntegerConversionType ManagedConversionRule = FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64;
+        static FPtoIntegerConversionType ManagedConversionRule = FPtoIntegerConversionType.CONVERT_SATURATING;
 
         static void TestBitValue(uint value, double? dblValNullable = null, FPtoIntegerConversionType? tValue = null)
         {
@@ -280,8 +228,6 @@ static void TestBitValue(uint value, double? dblValNullable = null, FPtoIntegerC
 
             if (!tValue.HasValue)
             {
-                TestBitValue(value, dblVal, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64);
-                TestBitValue(value, dblVal, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32);
                 TestBitValue(value, dblVal, FPtoIntegerConversionType.CONVERT_BACKWARD_COMPATIBLE);
                 TestBitValue(value, dblVal, FPtoIntegerConversionType.CONVERT_SATURATING);
                 TestBitValue(value, dblVal, FPtoIntegerConversionType.CONVERT_SENTINEL);
@@ -375,21 +321,8 @@ static void TestBitValue(uint value, double? dblValNullable = null, FPtoIntegerC
         [Fact]
         public static int TestEntryPoint()
         {
-            switch (RuntimeInformation.ProcessArchitecture)
-            {
-                case Architecture.X86:
-                case Architecture.X64:
-                    Program.ManagedConversionRule = FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64;
-                    break;
-
-                case Architecture.Arm:
-                    Program.ManagedConversionRule = FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32;
-                    break;
-
-                case Architecture.Arm64:
-                    Program.ManagedConversionRule = FPtoIntegerConversionType.CONVERT_SATURATING;
-                    break;
-            }
+            Program.ManagedConversionRule = FPtoIntegerConversionType.CONVERT_SATURATING;
+
             Console.WriteLine($"Expected managed float behavior is {Program.ManagedConversionRule} Execute with parameter to adjust");
             Console.WriteLine("Specific test cases");
 
diff --git a/src/tests/JIT/Directed/debugging/debuginfo/tester.csproj b/src/tests/JIT/Directed/debugging/debuginfo/tester.csproj
index b343f6fffb59..902b2546d982 100644
--- a/src/tests/JIT/Directed/debugging/debuginfo/tester.csproj
+++ b/src/tests/JIT/Directed/debugging/debuginfo/tester.csproj
@@ -14,6 +14,7 @@
     <CLRTestEnvironmentVariable Include="DOTNET_JitNoForwardSub" Value="1" />
     <CLRTestEnvironmentVariable Include="DOTNET_JitEnableHeadTailMerge" Value="0" />
     <CLRTestEnvironmentVariable Include="DOTNET_JitEnableCrossBlockLocalAssertionProp" Value="0" />
+    <CLRTestEnvironmentVariable Include="DOTNET_JitEnableOptRepeat" Value="0" />
 
     <ProjectReference Include="tests_d.ilproj" Aliases="tests_d" />
     <ProjectReference Include="tests_r.ilproj" Aliases="tests_r" />
diff --git a/src/tests/JIT/Directed/debugging/poisoning/poison.cs b/src/tests/JIT/Directed/debugging/poisoning/poison.cs
index c5bcccded927..07543fcc5344 100644
--- a/src/tests/JIT/Directed/debugging/poisoning/poison.cs
+++ b/src/tests/JIT/Directed/debugging/poisoning/poison.cs
@@ -8,6 +8,7 @@ public class Program
     [Fact]
     public static unsafe int TestEntryPoint()
     {
+#pragma warning disable CS8500 // takes address of managed type
         bool result = true;
 
         int poisoned;
@@ -16,7 +17,7 @@ public static unsafe int TestEntryPoint()
 
         GCRef zeroed;
         Unsafe.SkipInit(out zeroed);
-        result &= VerifyZero(Unsafe.AsPointer(ref zeroed), Unsafe.SizeOf<GCRef>());
+        result &= VerifyZero(&zeroed, sizeof(GCRef));
 
         WithoutGCRef poisoned2;
         Unsafe.SkipInit(out poisoned2);
@@ -36,9 +37,10 @@ public static unsafe int TestEntryPoint()
 
         GCRef zeroed2;
         Unsafe.SkipInit(out zeroed2);
-        result &= VerifyZero(Unsafe.AsPointer(ref zeroed2), Unsafe.SizeOf<GCRef>());
+        result &= VerifyZero(&zeroed2, sizeof(GCRef));
 
         return result ? 100 : 101;
+#pragma warning restore CS8500
     }
 
     [MethodImpl(MethodImplOptions.NoInlining)]
diff --git a/src/tests/JIT/Directed/tailcall/more_tailcalls.ilproj b/src/tests/JIT/Directed/tailcall/more_tailcalls.ilproj
index 3f6be099a84e..17f0cf113434 100644
--- a/src/tests/JIT/Directed/tailcall/more_tailcalls.ilproj
+++ b/src/tests/JIT/Directed/tailcall/more_tailcalls.ilproj
@@ -1,7 +1,8 @@
 <Project Sdk="Microsoft.NET.Sdk.IL">
   <PropertyGroup>
-    <!-- Needed for CLRTestEnvironmentVariable -->
+    <!-- Needed for CLRTestEnvironmentVariable and GCStressIncompatible -->
     <RequiresProcessIsolation>true</RequiresProcessIsolation>
+    <GCStressIncompatible>true</GCStressIncompatible>
   </PropertyGroup>
   <PropertyGroup>
     <DebugType>PdbOnly</DebugType>
diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/LoadVectorx2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/LoadVectorx2Test.template
index 5a1be5da8225..5e3f2d188aee 100644
--- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/LoadVectorx2Test.template
+++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/LoadVectorx2Test.template
@@ -20,7 +20,6 @@ namespace JIT.HardwareIntrinsics.Arm
     public static partial class Program
     {
         [Fact]
-        [ActiveIssue("https://github.com/dotnet/runtime/pull/92855#issuecomment-1746078670", TestRuntimes.Mono)]
         public static void {TestName}()
         {
             var test = new {TestName}Test();
diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/LoadVectorx3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/LoadVectorx3Test.template
index 166fa4204d98..99c442a16d1d 100644
--- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/LoadVectorx3Test.template
+++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/LoadVectorx3Test.template
@@ -20,7 +20,6 @@ namespace JIT.HardwareIntrinsics.Arm
     public static partial class Program
     {
         [Fact]
-        [ActiveIssue("https://github.com/dotnet/runtime/pull/92855#issuecomment-1746078670", TestRuntimes.Mono)]
         public static void {TestName}()
         {
             var test = new {TestName}Test();
diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/LoadVectorx4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/LoadVectorx4Test.template
index b89a8a051c8a..d747920d7a24 100644
--- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/LoadVectorx4Test.template
+++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/LoadVectorx4Test.template
@@ -20,7 +20,6 @@ namespace JIT.HardwareIntrinsics.Arm
     public static partial class Program
     {
         [Fact]
-        [ActiveIssue("https://github.com/dotnet/runtime/pull/92855#issuecomment-1746078670", TestRuntimes.Mono)]
         public static void {TestName}()
         {
             var test = new {TestName}Test();
diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template
new file mode 100644
index 000000000000..09aaf2f442e1
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadMaskedUnOpTest.template
@@ -0,0 +1,203 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\Arm\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Numerics;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using Xunit;
+
+namespace JIT.HardwareIntrinsics.Arm
+{
+    public static partial class Program
+    {
+        [Fact]
+        public static void {TestName}()
+        {
+            var test = new LoadUnaryOpTest__{TestName}();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works
+                test.RunBasicScenario_Load();
+
+                // Validates calling via reflection works
+                // TODO-SVE: Enable once register allocation exists for predicates.
+                // test.RunReflectionScenario_Load();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class LoadUnaryOpTest__{TestName}
+    {
+        private struct DataTable
+        {
+            private byte[] inArray1;
+            private byte[] outArray;
+
+            private GCHandle inHandle1;
+            private GCHandle outHandle;
+
+            private ulong alignment;
+
+            public DataTable({Op2BaseType}[] inArray1, {RetBaseType}[] outArray, int alignment)
+            {
+                int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op2BaseType}>();
+                int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>();
+                if ((alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfoutArray)
+                {
+                    throw new ArgumentException("Invalid value of alignment");
+                }
+
+                this.inArray1 = new byte[alignment * 2];
+                this.outArray = new byte[alignment * 2];
+
+                this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+                this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+                this.alignment = (ulong)alignment;
+
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+
+            }
+
+            public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+            public void Dispose()
+            {
+                inHandle1.Free();
+                outHandle.Free();
+            }
+
+            private static unsafe void* Align(byte* buffer, ulong expectedAlignment)
+            {
+                return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1));
+            }
+        }
+
+        private static readonly int LargestVectorSize = {LargestVectorSize};
+
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<{RetVectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType});
+        private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType});
+
+        private static {Op2BaseType}[] _data = new {Op2BaseType}[Op2ElementCount];
+
+        private DataTable _dataTable;
+
+        public LoadUnaryOpTest__{TestName}()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data[i] = {NextValueOp2}; }
+            _dataTable = new DataTable(_data, new {RetBaseType}[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => {Isa}.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            //TODO-SVE: Once register allocation exists for predicates, move loadMask into DataTable
+            {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{RetBaseType}(SveMaskPattern.All);
+
+            var result = {Isa}.{Method}(
+                loadMask,
+                ({Op2BaseType}*)(_dataTable.inArray1Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{RetBaseType}(SveMaskPattern.All);
+
+            var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof(Vector<{Op2BaseType}>), typeof({Op2BaseType}*) })
+                                     .Invoke(null, new object[] {
+                                        loadMask,
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof({Op2BaseType}*))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_Load();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            {Op2BaseType}[] inArray = new {Op2BaseType}[Op2ElementCount];
+            {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<{RetVectorType}<{Op2BaseType}>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult({Op2BaseType}[] firstOp, {RetBaseType}[] result, [CallerMemberName] string method = "")
+        {
+            bool succeeded = true;
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if ({ValidateIterResult})
+                {
+                    succeeded = false;
+                    break;
+                }
+            }
+
+            if (!succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op2BaseType}): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"  firstOp: ({string.Join(", ", firstOp)})");
+                TestLibrary.TestFramework.LogInformation($"   result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+
+                Succeeded = false;
+            }
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleBinOpEmbRounding.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleBinOpEmbRounding.template
index da78721c596d..3b6ab6a4de20 100644
--- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleBinOpEmbRounding.template
+++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleBinOpEmbRounding.template
@@ -173,7 +173,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
 
-            var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op2VectorType}<{Op2BaseType}>) , typeof(FloatRoundingMode)})
+            var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op2VectorType}<{Op2BaseType}>), typeof(FloatRoundingMode)})
                                      .Invoke(null, new object[] {
                                         Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr),
                                         Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr),
@@ -307,9 +307,78 @@ namespace JIT.HardwareIntrinsics.X86
         
         private static Dictionary<(string, string, string), ulong[]> binaryEmbRoundingAnswerTable = new Dictionary<(string, string, string), ulong[]>
         {
+            {("Double", "ConvertScalarToVector128Double", "ToNegativeInfinity"), new ulong[] {0x402e000000000000, 0x3ff0000000000000}},
+            {("Single", "ConvertScalarToVector128Single", "ToNegativeInfinity"), new ulong[] {0x41700000, 0x3f800000, 0x3f800000, 0x3f800000}},
+            {("Double", "ConvertScalarToVector128Double", "ToPositiveInfinity"), new ulong[] {0x402e000000000000, 0x3ff0000000000000}},
+            {("Single", "ConvertScalarToVector128Single", "ToPositiveInfinity"), new ulong[] {0x41700000, 0x3f800000, 0x3f800000, 0x3f800000}},
+            {("Double", "ConvertScalarToVector128Double", "ToZero"), new ulong[] {0x402e000000000000, 0x3ff0000000000000}},
+            {("Single", "ConvertScalarToVector128Single", "ToZero"), new ulong[] {0x41700000, 0x3f800000, 0x3f800000, 0x3f800000}},
+            {("Double", "AddScalar", "ToNegativeInfinity"), new ulong[] {0x3fe0000000000000, 0x3fa999999999999a}},
+            {("Single", "AddScalar", "ToNegativeInfinity"), new ulong[] {0x3effffff, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "AddScalar", "ToPositiveInfinity"), new ulong[] {0x3fe0000000000001, 0x3fa999999999999a}},
+            {("Single", "AddScalar", "ToPositiveInfinity"), new ulong[] {0x3f000000, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "AddScalar", "ToZero"), new ulong[] {0x3fe0000000000000, 0x3fa999999999999a}},
+            {("Single", "AddScalar", "ToZero"), new ulong[] {0x3effffff, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "DivideScalar", "ToNegativeInfinity"), new ulong[] {0x3fbc71c71c71c71c, 0x3fa999999999999a}},
+            {("Single", "DivideScalar", "ToNegativeInfinity"), new ulong[] {0x3de38e39, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "DivideScalar", "ToPositiveInfinity"), new ulong[] {0x3fbc71c71c71c71d, 0x3fa999999999999a}},
+            {("Single", "DivideScalar", "ToPositiveInfinity"), new ulong[] {0x3de38e3a, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "DivideScalar", "ToZero"), new ulong[] {0x3fbc71c71c71c71c, 0x3fa999999999999a}},
+            {("Single", "DivideScalar", "ToZero"), new ulong[] {0x3de38e39, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "MultiplyScalar", "ToNegativeInfinity"), new ulong[] {0x3f970a3d70a3d70a, 0x3fa999999999999a}},
+            {("Single", "MultiplyScalar", "ToNegativeInfinity"), new ulong[] {0x3cb851eb, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "MultiplyScalar", "ToPositiveInfinity"), new ulong[] {0x3f970a3d70a3d70b, 0x3fa999999999999a}},
+            {("Single", "MultiplyScalar", "ToPositiveInfinity"), new ulong[] {0x3cb851ec, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "MultiplyScalar", "ToZero"), new ulong[] {0x3f970a3d70a3d70a, 0x3fa999999999999a}},
+            {("Single", "MultiplyScalar", "ToZero"), new ulong[] {0x3cb851eb, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "SubtractScalar", "ToNegativeInfinity"), new ulong[] {0xbfd999999999999a, 0x3fa999999999999a}},
+            {("Single", "SubtractScalar", "ToNegativeInfinity"), new ulong[] {0xbecccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "SubtractScalar", "ToPositiveInfinity"), new ulong[] {0xbfd9999999999999, 0x3fa999999999999a}},
+            {("Single", "SubtractScalar", "ToPositiveInfinity"), new ulong[] {0xbecccccc, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "SubtractScalar", "ToZero"), new ulong[] {0xbfd9999999999999, 0x3fa999999999999a}},
+            {("Single", "SubtractScalar", "ToZero"), new ulong[] {0xbecccccc, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "SqrtScalar", "ToNegativeInfinity"), new ulong[] {0x3fe5775c544ff262, 0x3fa999999999999a}},
+            {("Single", "SqrtScalar", "ToNegativeInfinity"), new ulong[] {0x3f2bbae2, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "SqrtScalar", "ToPositiveInfinity"), new ulong[] {0x3fe5775c544ff263, 0x3fa999999999999a}},
+            {("Single", "SqrtScalar", "ToPositiveInfinity"), new ulong[] {0x3f2bbae3, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "SqrtScalar", "ToZero"), new ulong[] {0x3fe5775c544ff262, 0x3fa999999999999a}},
+            {("Single", "SqrtScalar", "ToZero"), new ulong[] {0x3f2bbae2, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
             {("Double", "Add", "ToNegativeInfinity"), new ulong[] {0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000}},
+            {("Single", "Add", "ToNegativeInfinity"), new ulong[] {0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff}},
             {("Double", "Add", "ToPositiveInfinity"), new ulong[] {0x3fe0000000000001, 0x3fe0000000000001, 0x3fe0000000000001, 0x3fe0000000000001, 0x3fe0000000000001, 0x3fe0000000000001, 0x3fe0000000000001, 0x3fe0000000000001}},
+            {("Single", "Add", "ToPositiveInfinity"), new ulong[] {0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000}},
             {("Double", "Add", "ToZero"), new ulong[] {0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000}},
+            {("Single", "Add", "ToZero"), new ulong[] {0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff, 0x3effffff}},
+            {("Double", "Divide", "ToNegativeInfinity"), new ulong[] {0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c}},
+            {("Single", "Divide", "ToNegativeInfinity"), new ulong[] {0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39}},
+            {("Double", "Divide", "ToPositiveInfinity"), new ulong[] {0x3fbc71c71c71c71d, 0x3fbc71c71c71c71d, 0x3fbc71c71c71c71d, 0x3fbc71c71c71c71d, 0x3fbc71c71c71c71d, 0x3fbc71c71c71c71d, 0x3fbc71c71c71c71d, 0x3fbc71c71c71c71d}},
+            {("Single", "Divide", "ToPositiveInfinity"), new ulong[] {0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a, 0x3de38e3a}},
+            {("Double", "Divide", "ToZero"), new ulong[] {0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c, 0x3fbc71c71c71c71c}},
+            {("Single", "Divide", "ToZero"), new ulong[] {0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39, 0x3de38e39}},
+            {("Double", "Multiply", "ToNegativeInfinity"), new ulong[] {0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a}},
+            {("Single", "Multiply", "ToNegativeInfinity"), new ulong[] {0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb}},
+            {("Double", "Multiply", "ToPositiveInfinity"), new ulong[] {0x3f970a3d70a3d70b, 0x3f970a3d70a3d70b, 0x3f970a3d70a3d70b, 0x3f970a3d70a3d70b, 0x3f970a3d70a3d70b, 0x3f970a3d70a3d70b, 0x3f970a3d70a3d70b, 0x3f970a3d70a3d70b}},
+            {("Single", "Multiply", "ToPositiveInfinity"), new ulong[] {0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec, 0x3cb851ec}},
+            {("Double", "Multiply", "ToZero"), new ulong[] {0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a, 0x3f970a3d70a3d70a}},
+            {("Single", "Multiply", "ToZero"), new ulong[] {0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb, 0x3cb851eb}},
+            {("Double", "Subtract", "ToNegativeInfinity"), new ulong[] {0xbfd999999999999a, 0xbfd999999999999a, 0xbfd999999999999a, 0xbfd999999999999a, 0xbfd999999999999a, 0xbfd999999999999a, 0xbfd999999999999a, 0xbfd999999999999a}},
+            {("Single", "Subtract", "ToNegativeInfinity"), new ulong[] {0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd, 0xbecccccd}},
+            {("Double", "Subtract", "ToPositiveInfinity"), new ulong[] {0xbfd9999999999999, 0xbfd9999999999999, 0xbfd9999999999999, 0xbfd9999999999999, 0xbfd9999999999999, 0xbfd9999999999999, 0xbfd9999999999999, 0xbfd9999999999999}},
+            {("Single", "Subtract", "ToPositiveInfinity"), new ulong[] {0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc}},
+            {("Double", "Subtract", "ToZero"), new ulong[] {0xbfd9999999999999, 0xbfd9999999999999, 0xbfd9999999999999, 0xbfd9999999999999, 0xbfd9999999999999, 0xbfd9999999999999, 0xbfd9999999999999, 0xbfd9999999999999}},
+            {("Single", "Subtract", "ToZero"), new ulong[] {0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc, 0xbecccccc}},
+            {("Double", "Scale", "ToNegativeInfinity"), new ulong[] {0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a}},
+            {("Single", "Scale", "ToNegativeInfinity"), new ulong[] {0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "Scale", "ToPositiveInfinity"), new ulong[] {0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a}},
+            {("Single", "Scale", "ToPositiveInfinity"), new ulong[] {0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "Scale", "ToZero"), new ulong[] {0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a, 0x3fa999999999999a}},
+            {("Single", "Scale", "ToZero"), new ulong[] {0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "ScaleScalar", "ToNegativeInfinity"), new ulong[] {0x3fa999999999999a, 0x3fa999999999999a}},
+            {("Single", "ScaleScalar", "ToNegativeInfinity"), new ulong[] {0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "ScaleScalar", "ToPositiveInfinity"), new ulong[] {0x3fa999999999999a, 0x3fa999999999999a}},
+            {("Single", "ScaleScalar", "ToPositiveInfinity"), new ulong[] {0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "ScaleScalar", "ToZero"), new ulong[] {0x3fa999999999999a, 0x3fa999999999999a}},
+            {("Single", "ScaleScalar", "ToZero"), new ulong[] {0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
         };
     }
 }
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleTernOpEmbRounding.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleTernOpEmbRounding.template
new file mode 100644
index 000000000000..b302aa267a82
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleTernOpEmbRounding.template
@@ -0,0 +1,392 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Collections.Generic;
+using System.Runtime.Intrinsics.X86;
+using Xunit;
+
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        [Fact]
+        public static void {Method}{RetBaseType}{RoundingMode}()
+        {
+            var test = new TernaryOpTest__{Method}{RetBaseType}{RoundingMode}();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if ({LoadIsa}.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class TernaryOpTest__{Method}{RetBaseType}{RoundingMode}
+    {
+        private struct TestStruct
+        {
+            public {Op1VectorType}<{Op1BaseType}> _fld1;
+            public {Op2VectorType}<{Op2BaseType}> _fld2;
+            public {Op2VectorType}<{Op3BaseType}> _fld3;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = ({Op1BaseType}){FixedInput1}; }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = ({Op1BaseType}){FixedInput2}; }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>());
+                for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = ({Op1BaseType}){FixedInput3}; }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3VectorType}<{Op3BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(TernaryOpTest__{Method}{RetBaseType}{RoundingMode} testClass)
+            {
+                var result = {Isa}.{Method}(_fld1, _fld2, _fld3, FloatRoundingMode.{RoundingMode});
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = {LargestVectorSize};
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType});
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType});
+        private static readonly int Op3ElementCount = Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>() / sizeof({Op3BaseType});
+        private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType});
+
+        private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount];
+        private static {Op2BaseType}[] _data2 = new {Op2BaseType}[Op2ElementCount];
+        private static {Op3BaseType}[] _data3 = new {Op3BaseType}[Op3ElementCount];
+
+        private {Op1VectorType}<{Op1BaseType}> _fld1;
+        private {Op2VectorType}<{Op2BaseType}> _fld2;
+        private {Op3VectorType}<{Op3BaseType}> _fld3;
+
+        private SimpleTernaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}, {Op3BaseType}> _dataTable;
+
+        public TernaryOpTest__{Method}{RetBaseType}{RoundingMode}()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = ({Op1BaseType}){FixedInput1}; }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = ({Op1BaseType}){FixedInput2}; }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>());
+            for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = ({Op1BaseType}){FixedInput3}; }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3VectorType}<{Op3BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = ({Op1BaseType}){FixedInput1}; }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = ({Op1BaseType}){FixedInput2}; }
+            for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = ({Op1BaseType}){FixedInput3}; }
+            _dataTable = new SimpleTernaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}, {Op3BaseType}>(_data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => {Isa}.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = {Isa}.{Method}(
+                Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr),
+                Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr),
+                FloatRoundingMode.{RoundingMode}
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = {Isa}.{Method}(
+                {LoadIsa}.Load{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArray1Ptr)),
+                {LoadIsa}.Load{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr)),
+                {LoadIsa}.Load{Op3VectorType}(({Op3BaseType}*)(_dataTable.inArray3Ptr)),
+                FloatRoundingMode.{RoundingMode}
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = {Isa}.{Method}(
+                {LoadIsa}.LoadAligned{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArray1Ptr)),
+                {LoadIsa}.LoadAligned{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr)),
+                {LoadIsa}.LoadAligned{Op3VectorType}(({Op3BaseType}*)(_dataTable.inArray3Ptr)),
+                FloatRoundingMode.{RoundingMode}
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op2VectorType}<{Op2BaseType}>), typeof({Op3VectorType}<{Op3BaseType}>), typeof(FloatRoundingMode) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr),
+                                        Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr),
+                                        FloatRoundingMode.{RoundingMode}
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr);
+            var op2 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr);
+            var op3 = Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr);
+            var result = {Isa}.{Method}(op1, op2, op3, FloatRoundingMode.{RoundingMode});
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(op1, op2, op3, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = {Isa}.{Method}(_fld1, _fld2, _fld3, FloatRoundingMode.{RoundingMode});
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = {Isa}.{Method}(test._fld1, test._fld2, test._fld3, FloatRoundingMode.{RoundingMode});
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            bool succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                succeeded = true;
+            }
+
+            if (!succeeded)
+            {
+                Succeeded = false;
+            }
+        }
+
+        private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, {Op3VectorType}<{Op3BaseType}> op3, void* result, [CallerMemberName] string method = "")
+        {
+            {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount];
+            {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount];
+            {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op3ElementCount];
+            {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1);
+            Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), op2);
+            Unsafe.WriteUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), op3);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>());
+
+            ValidateResult(inArray1, inArray2, inArray3, outArray, method);
+        }
+
+        private void ValidateResult(void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "")
+        {
+            {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount];
+            {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount];
+            {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op3ElementCount];
+            {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(op2), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef<byte>(op3), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>());
+
+            ValidateResult(inArray1, inArray2, inArray3, outArray, method);
+        }
+
+        private void ValidateResult({Op1BaseType}[] firstOp, {Op2BaseType}[] secondOp, {Op3BaseType}[] thirdOp, {RetBaseType}[] result, [CallerMemberName] string method = "")
+        {
+            bool succeeded = true;
+
+            for (int i = 0; i < result.Length; i++)
+            {
+                ulong[] answerTable = TernaryEmbRoundingAnswerTable[("{RetBaseType}", "{Method}", "{RoundingMode}")];
+
+                if (BitConverter.{CastingMethod}(result[i]) != answerTable[i])
+                {
+                    succeeded = false;
+                    Console.WriteLine("Avx512 {Method} Embedded rounding failed on {RetBaseType} with {RoundingMode}:");
+                    foreach (var item in result)
+                    {
+                        Console.Write(item + ", ");
+                    }
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+
+            if (!succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op2VectorType}<{Op2BaseType}>, {Op3VectorType}<{Op3BaseType}>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})");
+                TestLibrary.TestFramework.LogInformation($"secondOp: ({string.Join(", ", secondOp)})");
+                TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+
+                Succeeded = false;
+            }
+        }
+
+        private static Dictionary<(string, string, string), ulong[]> TernaryEmbRoundingAnswerTable = new Dictionary<(string, string, string), ulong[]>
+        {
+            {("Double", "FusedMultiplyAdd", "ToNegativeInfinity"), new ulong[] {0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8}},
+            {("Single", "FusedMultiplyAdd", "ToNegativeInfinity"), new ulong[] {0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f}},
+            {("Double", "FusedMultiplyAdd", "ToPositiveInfinity"), new ulong[] {0x3fe8b851eb851eb9, 0x3fe8b851eb851eb9, 0x3fe8b851eb851eb9, 0x3fe8b851eb851eb9, 0x3fe8b851eb851eb9, 0x3fe8b851eb851eb9, 0x3fe8b851eb851eb9, 0x3fe8b851eb851eb9}},
+            {("Single", "FusedMultiplyAdd", "ToPositiveInfinity"), new ulong[] {0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290, 0x3f45c290}},
+            {("Double", "FusedMultiplyAdd", "ToZero"), new ulong[] {0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8, 0x3fe8b851eb851eb8}},
+            {("Single", "FusedMultiplyAdd", "ToZero"), new ulong[] {0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f, 0x3f45c28f}},
+            {("Double", "FusedMultiplyAddNegated", "ToNegativeInfinity"), new ulong[] {0x3fe747ae147ae147, 0x3fe747ae147ae147, 0x3fe747ae147ae147, 0x3fe747ae147ae147, 0x3fe747ae147ae147, 0x3fe747ae147ae147, 0x3fe747ae147ae147, 0x3fe747ae147ae147}},
+            {("Single", "FusedMultiplyAddNegated", "ToNegativeInfinity"), new ulong[] {0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70}},
+            {("Double", "FusedMultiplyAddNegated", "ToPositiveInfinity"), new ulong[] {0x3fe747ae147ae148, 0x3fe747ae147ae148, 0x3fe747ae147ae148, 0x3fe747ae147ae148, 0x3fe747ae147ae148, 0x3fe747ae147ae148, 0x3fe747ae147ae148, 0x3fe747ae147ae148}},
+            {("Single", "FusedMultiplyAddNegated", "ToPositiveInfinity"), new ulong[] {0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71, 0x3f3a3d71}},
+            {("Double", "FusedMultiplyAddNegated", "ToZero"), new ulong[] {0x3fe747ae147ae147, 0x3fe747ae147ae147, 0x3fe747ae147ae147, 0x3fe747ae147ae147, 0x3fe747ae147ae147, 0x3fe747ae147ae147, 0x3fe747ae147ae147, 0x3fe747ae147ae147}},
+            {("Single", "FusedMultiplyAddNegated", "ToZero"), new ulong[] {0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70, 0x3f3a3d70}},
+            {("Double", "FusedMultiplyAddSubtract", "ToNegativeInfinity"), new ulong[] {0xbfe747ae147ae148, 0x3fe8b851eb851eb8, 0xbfe747ae147ae148, 0x3fe8b851eb851eb8, 0xbfe747ae147ae148, 0x3fe8b851eb851eb8, 0xbfe747ae147ae148, 0x3fe8b851eb851eb8}},
+            {("Single", "FusedMultiplyAddSubtract", "ToNegativeInfinity"), new ulong[] {0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f}},
+            {("Double", "FusedMultiplyAddSubtract", "ToPositiveInfinity"), new ulong[] {0xbfe747ae147ae147, 0x3fe8b851eb851eb9, 0xbfe747ae147ae147, 0x3fe8b851eb851eb9, 0xbfe747ae147ae147, 0x3fe8b851eb851eb9, 0xbfe747ae147ae147, 0x3fe8b851eb851eb9}},
+            {("Single", "FusedMultiplyAddSubtract", "ToPositiveInfinity"), new ulong[] {0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290}},
+            {("Double", "FusedMultiplyAddSubtract", "ToZero"), new ulong[] {0xbfe747ae147ae147, 0x3fe8b851eb851eb8, 0xbfe747ae147ae147, 0x3fe8b851eb851eb8, 0xbfe747ae147ae147, 0x3fe8b851eb851eb8, 0xbfe747ae147ae147, 0x3fe8b851eb851eb8}},
+            {("Single", "FusedMultiplyAddSubtract", "ToZero"), new ulong[] {0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f}},
+            {("Double", "FusedMultiplySubtract", "ToNegativeInfinity"), new ulong[] {0xbfe747ae147ae148, 0xbfe747ae147ae148, 0xbfe747ae147ae148, 0xbfe747ae147ae148, 0xbfe747ae147ae148, 0xbfe747ae147ae148, 0xbfe747ae147ae148, 0xbfe747ae147ae148}},
+            {("Single", "FusedMultiplySubtract", "ToNegativeInfinity"), new ulong[] {0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71, 0xbf3a3d71}},
+            {("Double", "FusedMultiplySubtract", "ToPositiveInfinity"), new ulong[] {0xbfe747ae147ae147, 0xbfe747ae147ae147, 0xbfe747ae147ae147, 0xbfe747ae147ae147, 0xbfe747ae147ae147, 0xbfe747ae147ae147, 0xbfe747ae147ae147, 0xbfe747ae147ae147}},
+            {("Single", "FusedMultiplySubtract", "ToPositiveInfinity"), new ulong[] {0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70}},
+            {("Double", "FusedMultiplySubtract", "ToZero"), new ulong[] {0xbfe747ae147ae147, 0xbfe747ae147ae147, 0xbfe747ae147ae147, 0xbfe747ae147ae147, 0xbfe747ae147ae147, 0xbfe747ae147ae147, 0xbfe747ae147ae147, 0xbfe747ae147ae147}},
+            {("Single", "FusedMultiplySubtract", "ToZero"), new ulong[] {0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70, 0xbf3a3d70}},
+            {("Double", "FusedMultiplySubtractAdd", "ToNegativeInfinity"), new ulong[] {0x3fe8b851eb851eb8, 0xbfe747ae147ae148, 0x3fe8b851eb851eb8, 0xbfe747ae147ae148, 0x3fe8b851eb851eb8, 0xbfe747ae147ae148, 0x3fe8b851eb851eb8, 0xbfe747ae147ae148}},
+            {("Single", "FusedMultiplySubtractAdd", "ToNegativeInfinity"), new ulong[] {0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71, 0x3f45c28f, 0xbf3a3d71}},
+            {("Double", "FusedMultiplySubtractAdd", "ToPositiveInfinity"), new ulong[] {0x3fe8b851eb851eb9, 0xbfe747ae147ae147, 0x3fe8b851eb851eb9, 0xbfe747ae147ae147, 0x3fe8b851eb851eb9, 0xbfe747ae147ae147, 0x3fe8b851eb851eb9, 0xbfe747ae147ae147}},
+            {("Single", "FusedMultiplySubtractAdd", "ToPositiveInfinity"), new ulong[] {0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70, 0x3f45c290, 0xbf3a3d70}},
+            {("Double", "FusedMultiplySubtractAdd", "ToZero"), new ulong[] {0x3fe8b851eb851eb8, 0xbfe747ae147ae147, 0x3fe8b851eb851eb8, 0xbfe747ae147ae147, 0x3fe8b851eb851eb8, 0xbfe747ae147ae147, 0x3fe8b851eb851eb8, 0xbfe747ae147ae147}},
+            {("Single", "FusedMultiplySubtractAdd", "ToZero"), new ulong[] {0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70, 0x3f45c28f, 0xbf3a3d70}},
+            {("Double", "FusedMultiplySubtractNegated", "ToNegativeInfinity"), new ulong[] {0xbfe8b851eb851eb9, 0xbfe8b851eb851eb9, 0xbfe8b851eb851eb9, 0xbfe8b851eb851eb9, 0xbfe8b851eb851eb9, 0xbfe8b851eb851eb9, 0xbfe8b851eb851eb9, 0xbfe8b851eb851eb9}},
+            {("Single", "FusedMultiplySubtractNegated", "ToNegativeInfinity"), new ulong[] {0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290, 0xbf45c290}},
+            {("Double", "FusedMultiplySubtractNegated", "ToPositiveInfinity"), new ulong[] {0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8}},
+            {("Single", "FusedMultiplySubtractNegated", "ToPositiveInfinity"), new ulong[] {0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f}},
+            {("Double", "FusedMultiplySubtractNegated", "ToZero"), new ulong[] {0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8, 0xbfe8b851eb851eb8}},
+            {("Single", "FusedMultiplySubtractNegated", "ToZero"), new ulong[] {0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f, 0xbf45c28f}},
+            {("Double", "FusedMultiplyAddScalar", "ToNegativeInfinity"), new ulong[] {0x3fe8b851eb851eb8, 0x3fa999999999999a}},
+            {("Single", "FusedMultiplyAddScalar", "ToNegativeInfinity"), new ulong[] {0x3f45c28f, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "FusedMultiplyAddScalar", "ToPositiveInfinity"), new ulong[] {0x3fe8b851eb851eb9, 0x3fa999999999999a}},
+            {("Single", "FusedMultiplyAddScalar", "ToPositiveInfinity"), new ulong[] {0x3f45c290, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "FusedMultiplyAddScalar", "ToZero"), new ulong[] {0x3fe8b851eb851eb8, 0x3fa999999999999a}},
+            {("Single", "FusedMultiplyAddScalar", "ToZero"), new ulong[] {0x3f45c28f, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "FusedMultiplyAddNegatedScalar", "ToNegativeInfinity"), new ulong[] {0x3fe747ae147ae147, 0x3fa999999999999a}},
+            {("Single", "FusedMultiplyAddNegatedScalar", "ToNegativeInfinity"), new ulong[] {0x3f3a3d70, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "FusedMultiplyAddNegatedScalar", "ToPositiveInfinity"), new ulong[] {0x3fe747ae147ae148, 0x3fa999999999999a}},
+            {("Single", "FusedMultiplyAddNegatedScalar", "ToPositiveInfinity"), new ulong[] {0x3f3a3d71, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "FusedMultiplyAddNegatedScalar", "ToZero"), new ulong[] {0x3fe747ae147ae147, 0x3fa999999999999a}},
+            {("Single", "FusedMultiplyAddNegatedScalar", "ToZero"), new ulong[] {0x3f3a3d70, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "FusedMultiplySubtractNegatedScalar", "ToNegativeInfinity"), new ulong[] {0xbfe8b851eb851eb9, 0x3fa999999999999a}},
+            {("Single", "FusedMultiplySubtractNegatedScalar", "ToNegativeInfinity"), new ulong[] {0xbf45c290, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "FusedMultiplySubtractNegatedScalar", "ToPositiveInfinity"), new ulong[] {0xbfe8b851eb851eb8, 0x3fa999999999999a}},
+            {("Single", "FusedMultiplySubtractNegatedScalar", "ToPositiveInfinity"), new ulong[] {0xbf45c28f, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "FusedMultiplySubtractNegatedScalar", "ToZero"), new ulong[] {0xbfe8b851eb851eb8, 0x3fa999999999999a}},
+            {("Single", "FusedMultiplySubtractNegatedScalar", "ToZero"), new ulong[] {0xbf45c28f, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "FusedMultiplySubtractScalar", "ToNegativeInfinity"), new ulong[] {0xbfe747ae147ae148, 0x3fa999999999999a}},
+            {("Single", "FusedMultiplySubtractScalar", "ToNegativeInfinity"), new ulong[] {0xbf3a3d71, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "FusedMultiplySubtractScalar", "ToPositiveInfinity"), new ulong[] {0xbfe747ae147ae147, 0x3fa999999999999a}},
+            {("Single", "FusedMultiplySubtractScalar", "ToPositiveInfinity"), new ulong[] {0xbf3a3d70, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+            {("Double", "FusedMultiplySubtractScalar", "ToZero"), new ulong[] {0xbfe747ae147ae147, 0x3fa999999999999a}},
+            {("Single", "FusedMultiplySubtractScalar", "ToZero"), new ulong[] {0xbf3a3d70, 0x3d4ccccd, 0x3d4ccccd, 0x3d4ccccd}},
+        };
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleUnaryOpEmbRounding.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleUnaryOpEmbRounding.template
new file mode 100644
index 000000000000..3464db01af8d
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/SimpleUnaryOpEmbRounding.template
@@ -0,0 +1,341 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Collections.Generic;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using Xunit;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        [Fact]
+        public static void {Method}{Op1BaseType}to{RetBaseType}{RoundingMode}()
+        {
+            var test = new UnaryOpTest__{Method}{Op1BaseType}to{RetBaseType}{RoundingMode}();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if ({LoadIsa}.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class UnaryOpTest__{Method}{Op1BaseType}to{RetBaseType}{RoundingMode}
+    {
+        private struct TestStruct
+        {
+            public {Op1VectorType}<{Op1BaseType}> _fld1;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = ({Op1BaseType}){FixedInput}; }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(UnaryOpTest__{Method}{Op1BaseType}to{RetBaseType}{RoundingMode} testClass)
+            {
+                var result = {Isa}.{Method}(_fld1, FloatRoundingMode.{RoundingMode});
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(_fld1, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = {LargestVectorSize};
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType});
+        private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType});
+
+        private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount];
+
+        private {Op1VectorType}<{Op1BaseType}> _fld1;
+
+        private SimpleUnaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}> _dataTable;
+
+        public UnaryOpTest__{Method}{Op1BaseType}to{RetBaseType}{RoundingMode}()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = ({Op1BaseType}){FixedInput}; }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = ({Op1BaseType}){FixedInput}; }
+            _dataTable = new SimpleUnaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}>(_data1, new {RetBaseType}[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => {Isa}.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = {Isa}.{Method}(
+                Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr),
+                FloatRoundingMode.{RoundingMode}
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = {Isa}.{Method}(
+                {LoadIsa}.Load{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArrayPtr)),
+                FloatRoundingMode.{RoundingMode}
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = {Isa}.{Method}(
+                {LoadIsa}.LoadAligned{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArrayPtr)),
+                FloatRoundingMode.{RoundingMode}
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof(FloatRoundingMode) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr),
+                                        FloatRoundingMode.{RoundingMode}
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr);
+            var result = {Isa}.{Method}(op1, FloatRoundingMode.{RoundingMode});
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(op1, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = {Isa}.{Method}(_fld1, FloatRoundingMode.{RoundingMode});
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = {Isa}.{Method}(test._fld1, FloatRoundingMode.{RoundingMode});
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            bool succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                succeeded = true;
+            }
+
+            if (!succeeded)
+            {
+                Succeeded = false;
+            }
+        }
+
+        private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, void* result, [CallerMemberName] string method = "")
+        {
+            {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount];
+            {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>());
+
+            ValidateResult(inArray1, outArray, method);
+        }
+
+        private void ValidateResult(void* op1, void* result, [CallerMemberName] string method = "")
+        {
+            {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount];
+            {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>());
+
+            ValidateResult(inArray1, outArray, method);
+        }
+
+        private void ValidateResult({Op1BaseType}[] firstOp, {RetBaseType}[] result, [CallerMemberName] string method = "")
+        {
+            bool succeeded = true;
+
+            for (int i = 0; i < result.Length; i++)
+            {
+                ulong[] answerTable = unaryEmbRoundingAnswerTable[("{Isa}", "{Op1BaseType}", "{RetBaseType}", "{Method}", "{RoundingMode}")];
+
+                if ({CastingMethod}(result[i]) != answerTable[i])
+                {
+                    succeeded = false;
+                    Console.WriteLine("Avx512 {Method} Embedded rounding failed on {RetBaseType} with {RoundingMode}:");
+                    foreach (var item in result)
+                    {
+                        Console.Write(item + ", ");
+                    }
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+
+            if (!succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+
+                Succeeded = false;
+            }
+        }
+
+        private static Dictionary<(string, string, string, string, string), ulong[]> unaryEmbRoundingAnswerTable = new Dictionary<(string, string, string, string, string), ulong[]>
+        {
+            {("Avx512F", "Double", "UInt32", "ConvertToVector256UInt32", "ToNegativeInfinity"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512F", "Double", "UInt32", "ConvertToVector256UInt32", "ToPositiveInfinity"), new ulong[] {0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e}},
+            {("Avx512F", "Double", "UInt32", "ConvertToVector256UInt32", "ToZero"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512F", "Single", "UInt32", "ConvertToVector512UInt32", "ToNegativeInfinity"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512F", "Single", "UInt32", "ConvertToVector512UInt32", "ToPositiveInfinity"), new ulong[] {0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e}},
+            {("Avx512F", "Single", "UInt32", "ConvertToVector512UInt32", "ToZero"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512F", "Single", "Int32", "ConvertToVector512Int32", "ToNegativeInfinity"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512F", "Single", "Int32", "ConvertToVector512Int32", "ToPositiveInfinity"), new ulong[] {0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e}},
+            {("Avx512F", "Single", "Int32", "ConvertToVector512Int32", "ToZero"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512F", "Double", "Int32", "ConvertToVector256Int32", "ToNegativeInfinity"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512F", "Double", "Int32", "ConvertToVector256Int32", "ToPositiveInfinity"), new ulong[] {0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e}},
+            {("Avx512F", "Double", "Int32", "ConvertToVector256Int32", "ToZero"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512F", "Int32",  "Single", "ConvertToVector512Single", "ToNegativeInfinity"), new ulong[] {0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000}},
+            {("Avx512F", "Int32",  "Single", "ConvertToVector512Single", "ToPositiveInfinity"), new ulong[] {0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000}},
+            {("Avx512F", "Int32",  "Single", "ConvertToVector512Single", "ToZero"), new ulong[] {0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000, 0x41e80000}},
+            {("Avx512F", "Double", "Single", "ConvertToVector256Single", "ToNegativeInfinity"), new ulong[] {0x41eaf5c2, 0x41eaf5c2, 0x41eaf5c2, 0x41eaf5c2, 0x41eaf5c2, 0x41eaf5c2, 0x41eaf5c2, 0x41eaf5c2}},
+            {("Avx512F", "Double", "Single", "ConvertToVector256Single", "ToPositiveInfinity"), new ulong[] {0x41eaf5c3, 0x41eaf5c3, 0x41eaf5c3, 0x41eaf5c3, 0x41eaf5c3, 0x41eaf5c3, 0x41eaf5c3, 0x41eaf5c3}},
+            {("Avx512F", "Double", "Single", "ConvertToVector256Single", "ToZero"), new ulong[] {0x41eaf5c2, 0x41eaf5c2, 0x41eaf5c2, 0x41eaf5c2, 0x41eaf5c2, 0x41eaf5c2, 0x41eaf5c2, 0x41eaf5c2}},
+            {("Avx512F", "Double", "Double", "Sqrt", "ToNegativeInfinity"), new ulong[] {0x4015ad79b34092ec, 0x4015ad79b34092ec, 0x4015ad79b34092ec, 0x4015ad79b34092ec, 0x4015ad79b34092ec, 0x4015ad79b34092ec, 0x4015ad79b34092ec, 0x4015ad79b34092ec}},
+            {("Avx512F", "Single", "Single", "Sqrt", "ToNegativeInfinity"), new ulong[] {0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd}},
+            {("Avx512F", "Double", "Double", "Sqrt", "ToPositiveInfinity"), new ulong[] {0x4015ad79b34092ed, 0x4015ad79b34092ed, 0x4015ad79b34092ed, 0x4015ad79b34092ed, 0x4015ad79b34092ed, 0x4015ad79b34092ed, 0x4015ad79b34092ed, 0x4015ad79b34092ed}},
+            {("Avx512F", "Single", "Single", "Sqrt", "ToPositiveInfinity"), new ulong[] {0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce, 0x40ad6bce}},
+            {("Avx512F", "Double", "Double", "Sqrt", "ToZero"), new ulong[] {0x4015ad79b34092ec, 0x4015ad79b34092ec, 0x4015ad79b34092ec, 0x4015ad79b34092ec, 0x4015ad79b34092ec, 0x4015ad79b34092ec, 0x4015ad79b34092ec, 0x4015ad79b34092ec}},
+            {("Avx512F", "Single", "Single", "Sqrt", "ToZero"), new ulong[] {0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd, 0x40ad6bcd}},
+            {("Avx512DQ", "Int64", "Double", "ConvertToVector512Double", "ToNegativeInfinity"), new ulong[] {0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000}},
+            {("Avx512DQ", "Int64", "Double", "ConvertToVector512Double", "ToPositiveInfinity"), new ulong[] {0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000}},
+            {("Avx512DQ", "Int64", "Double", "ConvertToVector512Double", "ToZero"), new ulong[] {0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000}},
+            {("Avx512DQ", "Int64", "Single", "ConvertToVector256Single", "ToNegativeInfinity"), new ulong[] {0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000}},
+            {("Avx512DQ", "Int64", "Single", "ConvertToVector256Single", "ToPositiveInfinity"), new ulong[] {0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000}},
+            {("Avx512DQ", "Int64", "Single", "ConvertToVector256Single", "ToZero"), new ulong[] {0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000}},
+            {("Avx512DQ", "UInt64", "Double", "ConvertToVector512Double", "ToNegativeInfinity"), new ulong[] {0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000}},
+            {("Avx512DQ", "UInt64", "Double", "ConvertToVector512Double", "ToPositiveInfinity"), new ulong[] {0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000}},
+            {("Avx512DQ", "UInt64", "Double", "ConvertToVector512Double", "ToZero"), new ulong[] {0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000, 0x4024000000000000}},
+            {("Avx512DQ", "UInt64", "Single", "ConvertToVector256Single", "ToNegativeInfinity"), new ulong[] {0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000}},
+            {("Avx512DQ", "UInt64", "Single", "ConvertToVector256Single", "ToPositiveInfinity"), new ulong[] {0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000}},
+            {("Avx512DQ", "UInt64", "Single", "ConvertToVector256Single", "ToZero"), new ulong[] {0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000, 0x41200000}},
+            {("Avx512DQ", "Double", "Int64",  "ConvertToVector512Int64", "ToNegativeInfinity"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512DQ", "Double", "Int64",  "ConvertToVector512Int64", "ToPositiveInfinity"), new ulong[] {0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e}},
+            {("Avx512DQ", "Double", "Int64",  "ConvertToVector512Int64", "ToZero"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512DQ", "Single", "Int64",  "ConvertToVector512Int64", "ToNegativeInfinity"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512DQ", "Single", "Int64",  "ConvertToVector512Int64", "ToPositiveInfinity"), new ulong[] {0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e}},
+            {("Avx512DQ", "Single", "Int64",  "ConvertToVector512Int64", "ToZero"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512DQ", "Double", "UInt64",  "ConvertToVector512UInt64", "ToNegativeInfinity"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512DQ", "Double", "UInt64",  "ConvertToVector512UInt64", "ToPositiveInfinity"), new ulong[] {0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e}},
+            {("Avx512DQ", "Double", "UInt64",  "ConvertToVector512UInt64", "ToZero"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512DQ", "Single", "UInt64",  "ConvertToVector512UInt64", "ToNegativeInfinity"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+            {("Avx512DQ", "Single", "UInt64",  "ConvertToVector512UInt64", "ToPositiveInfinity"), new ulong[] {0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e}},
+            {("Avx512DQ", "Single", "UInt64",  "ConvertToVector512UInt64", "ToZero"), new ulong[] {0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d}},
+        };
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/X86Base.X64/Program.X86Base.X64.cs b/src/tests/JIT/HardwareIntrinsics/X86/X86Base.X64/Program.X86Base.X64.cs
index 40dc13b975f4..98901cf3dfe8 100644
--- a/src/tests/JIT/HardwareIntrinsics/X86/X86Base.X64/Program.X86Base.X64.cs
+++ b/src/tests/JIT/HardwareIntrinsics/X86/X86Base.X64/Program.X86Base.X64.cs
@@ -4,7 +4,7 @@
 using System.Collections.Generic;
 
 [assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/75767", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMAOT))]
-[assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/75767", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMFULLAOT))]
+[assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/75767", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoFULLAOT))]
 namespace JIT.HardwareIntrinsics.X86._X86Base.X64
 {
     public static partial class Program
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/Program.X86Base.cs b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/Program.X86Base.cs
index 364a8aee4b04..b0001aeeb3e9 100644
--- a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/Program.X86Base.cs
+++ b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/Program.X86Base.cs
@@ -4,7 +4,7 @@
 using System.Collections.Generic;
 
 [assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/75767", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMAOT))]
-[assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/75767", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMFULLAOT))]
+[assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/75767", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoFULLAOT))]
 namespace JIT.HardwareIntrinsics.X86._X86Base
 {
     public static partial class Program
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/Avx512F_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/Avx512F_handwritten_r.csproj
index fd0d21f01bc5..89f2c83fc5a0 100644
--- a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/Avx512F_handwritten_r.csproj
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/Avx512F_handwritten_r.csproj
@@ -44,5 +44,7 @@
     <Compile Include="StoreAlignedNonTemporal.cs" />
     <Compile Include="UnpackHigh.cs" />
     <Compile Include="UnpackLow.cs" />
+    <Compile Include="EmbeddedRounding.Double.cs" />
+    <Compile Include="EmbeddedRounding.Single.cs" />
   </ItemGroup>
 </Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/Avx512F_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/Avx512F_handwritten_ro.csproj
index 082d6f4ee197..03ffa05da0d3 100644
--- a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/Avx512F_handwritten_ro.csproj
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/Avx512F_handwritten_ro.csproj
@@ -44,5 +44,7 @@
     <Compile Include="StoreAlignedNonTemporal.cs" />
     <Compile Include="UnpackHigh.cs" />
     <Compile Include="UnpackLow.cs" />
+    <Compile Include="EmbeddedRounding.Double.cs" />
+    <Compile Include="EmbeddedRounding.Single.cs" />
   </ItemGroup>
 </Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/EmbeddedRounding.Double.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/EmbeddedRounding.Double.cs
new file mode 100644
index 000000000000..1c01761fe92e
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/EmbeddedRounding.Double.cs
@@ -0,0 +1,716 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+using Xunit;
+
+namespace IntelHardwareIntrinsicTest._Avx512F
+{
+    public partial class Program
+    {
+        [Fact]
+        public static unsafe void ConvertToInt32EmbeddedRounding_Double()
+        {
+            int testResult = 1;
+            int answerTable_ToNegativeInfinity = -1;
+            int answerTable_ToPositiveInfinity  = 0;
+            int answerTable_ToZero = 0;
+            if (Avx512F.IsSupported)
+            {
+                Vector128<double> inputVec = Vector128.Create(-0.45, -0.45);
+                int res = Avx512F.ConvertToInt32(inputVec, FloatRoundingMode.ToNegativeInfinity);
+
+                if (res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt32 Embedded rounding failed on double with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.ConvertToInt32(inputVec, FloatRoundingMode.ToPositiveInfinity);
+
+                if (res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt32 Embedded rounding failed on double with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.ConvertToInt32(inputVec, FloatRoundingMode.ToZero);
+
+                if (res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt32 Embedded rounding failed on double with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToUInt32EmbeddedRounding_Double()
+        {
+            int testResult = 1;
+            uint answerTable_ToNegativeInfinity = 4294967295;
+            uint answerTable_ToPositiveInfinity  = 0;
+            uint answerTable_ToZero = 0;
+            if (Avx512F.IsSupported)
+            {
+                Vector128<double> inputVec = Vector128.Create(-0.45, -0.45);
+                uint res = Avx512F.ConvertToUInt32(inputVec, FloatRoundingMode.ToNegativeInfinity);
+
+                if (res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt32 Embedded rounding failed on double with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.ConvertToUInt32(inputVec, FloatRoundingMode.ToPositiveInfinity);
+
+                if (res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt32 Embedded rounding failed on double with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.ConvertToUInt32(inputVec, FloatRoundingMode.ToZero);
+
+                if (res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt32 Embedded rounding failed on double with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToInt64EmbeddedRounding_Double()
+        {
+            int testResult = 1;
+            long answerTable_ToNegativeInfinity = -1;
+            long answerTable_ToPositiveInfinity  = 0;
+            long answerTable_ToZero = 0;
+            if (Avx512F.X64.IsSupported)
+            {
+                Vector128<double> inputVec = Vector128.Create(-0.45, -0.45);
+                long res = Avx512F.X64.ConvertToInt64(inputVec, FloatRoundingMode.ToNegativeInfinity);
+
+                if (res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt64 Embedded rounding failed on double with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.X64.ConvertToInt64(inputVec, FloatRoundingMode.ToPositiveInfinity);
+
+                if (res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt64 Embedded rounding failed on double with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.X64.ConvertToInt64(inputVec, FloatRoundingMode.ToZero);
+
+                if (res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt64 Embedded rounding failed on double with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToUInt64EmbeddedRounding_Double()
+        {
+            int testResult = 1;
+            ulong answerTable_ToNegativeInfinity = 18446744073709551615;
+            ulong answerTable_ToPositiveInfinity  = 0;
+            ulong answerTable_ToZero = 0;
+            if (Avx512F.X64.IsSupported)
+            {
+                Vector128<double> inputVec = Vector128.Create(-0.45, -0.45);
+                ulong res = Avx512F.X64.ConvertToUInt64(inputVec, FloatRoundingMode.ToNegativeInfinity);
+
+                if (res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt64 Embedded rounding failed on double with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.X64.ConvertToUInt64(inputVec, FloatRoundingMode.ToPositiveInfinity);
+
+                if (res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt64 Embedded rounding failed on double with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.X64.ConvertToUInt64(inputVec, FloatRoundingMode.ToZero);
+
+                if (res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt64 Embedded rounding failed on double with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertScalarToVector128DoubleInt64EmbeddedRounding_Double()
+        {
+            int testResult = 1;
+            ulong[] answerTable_ToNegativeInfinity = new ulong[2] {0x402e000000000000, 0xbff0000000000000};
+            ulong[] answerTable_ToPositiveInfinity  = new ulong[2] {0x402e000000000000, 0xbff0000000000000};
+            ulong[] answerTable_ToZero = new ulong[2] {0x402e000000000000, 0xbff0000000000000};     
+            if (Avx512F.X64.IsSupported)
+            {
+                using (TestTable<double> doubleTable = new TestTable<double>(new double[2] { -1.0f, -1.0f}, new double[2]))
+                {
+                    var upper = Unsafe.Read<Vector128<double>>(doubleTable.inArrayPtr);
+                    long value = 15;
+                    var vd3 = Avx512F.X64.ConvertScalarToVector128Double(upper, value, FloatRoundingMode.ToNegativeInfinity);
+                    Unsafe.Write(doubleTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < doubleTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.DoubleToUInt64Bits(doubleTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Double Embedded rounding failed on Int64 input with ToNegativeInfinity:");
+                            foreach (var item in doubleTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = Avx512F.X64.ConvertScalarToVector128Double(upper, value, FloatRoundingMode.ToPositiveInfinity);
+                    Unsafe.Write(doubleTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < doubleTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.DoubleToUInt64Bits(doubleTable.outArray[i]) != answerTable_ToPositiveInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Double Embedded rounding failed on Int64 input with ToPositiveInfinity:");
+                            foreach (var item in doubleTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = Avx512F.X64.ConvertScalarToVector128Double(upper, value, FloatRoundingMode.ToZero);
+                    Unsafe.Write(doubleTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < doubleTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.DoubleToUInt64Bits(doubleTable.outArray[i]) != answerTable_ToZero[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Double Embedded rounding failed on Int64 input with ToZero:");
+                            foreach (var item in doubleTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertScalarToVector128DoubleUInt64EmbeddedRounding_Double()
+        {
+            int testResult = 1;
+            ulong[] answerTable_ToNegativeInfinity = new ulong[2] {0x402e000000000000, 0xbff0000000000000};
+            ulong[] answerTable_ToPositiveInfinity  = new ulong[2] {0x402e000000000000, 0xbff0000000000000};
+            ulong[] answerTable_ToZero = new ulong[2] {0x402e000000000000, 0xbff0000000000000};    
+            if (Avx512F.X64.IsSupported)
+            {
+                using (TestTable<double> doubleTable = new TestTable<double>(new double[2] { -1.0f, -1.0f}, new double[2]))
+                {
+                    var upper = Unsafe.Read<Vector128<double>>(doubleTable.inArrayPtr);
+                    ulong value = 15;
+                    var vd3 = Avx512F.X64.ConvertScalarToVector128Double(upper, value, FloatRoundingMode.ToNegativeInfinity);
+                    Unsafe.Write(doubleTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < doubleTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.DoubleToUInt64Bits(doubleTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Double Embedded rounding failed on UInt64 input with ToNegativeInfinity:");
+                            foreach (var item in doubleTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = Avx512F.X64.ConvertScalarToVector128Double(upper, value, FloatRoundingMode.ToPositiveInfinity);
+                    Unsafe.Write(doubleTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < doubleTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.DoubleToUInt64Bits(doubleTable.outArray[i]) != answerTable_ToPositiveInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Double Embedded rounding failed on UInt64 input with ToPositiveInfinity:");
+                            foreach (var item in doubleTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = Avx512F.X64.ConvertScalarToVector128Double(upper, value, FloatRoundingMode.ToZero);
+                    Unsafe.Write(doubleTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < doubleTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.DoubleToUInt64Bits(doubleTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Double Embedded rounding failed on UInt64 input with ToZero:");
+                            foreach (var item in doubleTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToInt32EmbeddedRoundingReflection_Double()
+        {
+            int testResult = 1;
+            int answerTable_ToNegativeInfinity = -1;
+            int answerTable_ToPositiveInfinity  = 0;
+            int answerTable_ToZero = 0;
+            if (Avx512F.IsSupported)
+            {
+                Vector128<double> inputVec = Vector128.Create(-0.45, -0.45);
+                var res = typeof(Avx512F).GetMethod(nameof(Avx512F.ConvertToInt32), new Type[] { typeof(Vector128<Double>) , typeof(FloatRoundingMode)})
+                        .Invoke(null, new object[] {
+                        inputVec,
+                        FloatRoundingMode.ToNegativeInfinity
+                        });
+
+                if ((int)res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt32 Embedded rounding failed on double with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F).GetMethod(nameof(Avx512F.ConvertToInt32), new Type[] { typeof(Vector128<Double>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToPositiveInfinity
+                    });
+
+                if ((int)res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt32 Embedded rounding failed on double with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F).GetMethod(nameof(Avx512F.ConvertToInt32), new Type[] { typeof(Vector128<Double>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToZero
+                    });
+
+                if ((int)res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt32 Embedded rounding failed on double with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToUInt32EmbeddedRoundingReflection_Double()
+        {
+            int testResult = 1;
+            uint answerTable_ToNegativeInfinity = 4294967295;
+            uint answerTable_ToPositiveInfinity  = 0;
+            uint answerTable_ToZero = 0;
+            if (Avx512F.IsSupported)
+            {
+                Vector128<double> inputVec = Vector128.Create(-0.45, -0.45);
+                var res = typeof(Avx512F).GetMethod(nameof(Avx512F.ConvertToUInt32), new Type[] { typeof(Vector128<Double>) , typeof(FloatRoundingMode)})
+                        .Invoke(null, new object[] {
+                        inputVec,
+                        FloatRoundingMode.ToNegativeInfinity
+                        });
+
+                if ((uint)res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt32 Embedded rounding failed on double with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F).GetMethod(nameof(Avx512F.ConvertToUInt32), new Type[] { typeof(Vector128<Double>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToPositiveInfinity
+                    });
+
+                if ((uint)res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt32 Embedded rounding failed on double with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F).GetMethod(nameof(Avx512F.ConvertToUInt32), new Type[] { typeof(Vector128<Double>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToZero
+                    });
+
+                if ((uint)res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt32 Embedded rounding failed on double with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToInt64EmbeddedRoundingReflection_Double()
+        {
+            int testResult = 1;
+            long answerTable_ToNegativeInfinity = -1;
+            long answerTable_ToPositiveInfinity  = 0;
+            long answerTable_ToZero = 0;
+            if (Avx512F.X64.IsSupported)
+            {
+                Vector128<double> inputVec = Vector128.Create(-0.45, -0.45);
+                
+                var res = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertToInt64), new Type[] { typeof(Vector128<Double>) , typeof(FloatRoundingMode)})
+                                     .Invoke(null, new object[] {
+                                        inputVec,
+                                        FloatRoundingMode.ToNegativeInfinity
+                                     });
+
+                if ((long)res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt64 Embedded rounding failed on double with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertToInt64), new Type[] { typeof(Vector128<Double>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToPositiveInfinity
+                    });
+
+                if ((long)res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt64 Embedded rounding failed on double with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertToInt64), new Type[] { typeof(Vector128<Double>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToZero
+                    });
+
+                if ((long)res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt64 Embedded rounding failed on double with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToUInt64EmbeddedRoundingReflection_Double()
+        {
+            int testResult = 1;
+            ulong answerTable_ToNegativeInfinity = 18446744073709551615;
+            ulong answerTable_ToPositiveInfinity  = 0;
+            ulong answerTable_ToZero = 0;
+            if (Avx512F.X64.IsSupported)
+            {
+                Vector128<double> inputVec = Vector128.Create(-0.45, -0.45);
+                
+                var res = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertToUInt64), new Type[] { typeof(Vector128<Double>) , typeof(FloatRoundingMode)})
+                                     .Invoke(null, new object[] {
+                                        inputVec,
+                                        FloatRoundingMode.ToNegativeInfinity
+                                     });
+
+                if ((ulong)res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt64 Embedded rounding failed on double with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertToUInt64), new Type[] { typeof(Vector128<Double>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToPositiveInfinity
+                    });
+
+                if ((ulong)res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt64 Embedded rounding failed on double with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertToUInt64), new Type[] { typeof(Vector128<Double>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToZero
+                    });
+
+                if ((ulong)res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt64 Embedded rounding failed on double with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertScalarToVector128DoubleInt64EmbeddedRoundingReflection_Double()
+        {
+            int testResult = 1;
+            ulong[] answerTable_ToNegativeInfinity = new ulong[2] {0x402e000000000000, 0xbff0000000000000};
+            ulong[] answerTable_ToPositiveInfinity  = new ulong[2] {0x402e000000000000, 0xbff0000000000000};
+            ulong[] answerTable_ToZero = new ulong[2] {0x402e000000000000, 0xbff0000000000000};     
+            if (Avx512F.X64.IsSupported)
+            {
+                using (TestTable<double> doubleTable = new TestTable<double>(new double[2] { -1.0f, -1.0f}, new double[2]))
+                {
+                    long value = 15;
+                    var vd3 = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertScalarToVector128Double), new Type[] { typeof(Vector128<Double>), typeof(long), typeof(FloatRoundingMode)})
+                            .Invoke(null, new object[] {
+                            Unsafe.Read<Vector128<Double>>(doubleTable.inArrayPtr),
+                            value,
+                            FloatRoundingMode.ToNegativeInfinity
+                            });
+
+                    Unsafe.Write(doubleTable.outArrayPtr, (Vector128<double>)(vd3));
+
+                    for (int i = 0; i < doubleTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.DoubleToUInt64Bits(doubleTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Double Embedded rounding failed on Int64 input with ToNegativeInfinity:");
+                            foreach (var item in doubleTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertScalarToVector128Double), new Type[] { typeof(Vector128<Double>), typeof(long), typeof(FloatRoundingMode)})
+                        .Invoke(null, new object[] {
+                        Unsafe.Read<Vector128<Double>>(doubleTable.inArrayPtr),
+                        value,
+                        FloatRoundingMode.ToPositiveInfinity
+                        });
+
+                    Unsafe.Write(doubleTable.outArrayPtr, (Vector128<double>)(vd3));
+
+                    for (int i = 0; i < doubleTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.DoubleToUInt64Bits(doubleTable.outArray[i]) != answerTable_ToPositiveInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Double Embedded rounding failed on Int64 input with ToPositiveInfinity:");
+                            foreach (var item in doubleTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertScalarToVector128Double), new Type[] { typeof(Vector128<Double>), typeof(long), typeof(FloatRoundingMode)})
+                        .Invoke(null, new object[] {
+                        Unsafe.Read<Vector128<Double>>(doubleTable.inArrayPtr),
+                        value,
+                        FloatRoundingMode.ToZero
+                        });
+
+                    Unsafe.Write(doubleTable.outArrayPtr, (Vector128<double>)(vd3));
+
+                    for (int i = 0; i < doubleTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.DoubleToUInt64Bits(doubleTable.outArray[i]) != answerTable_ToZero[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Double Embedded rounding failed on Int64 input with ToZero:");
+                            foreach (var item in doubleTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertScalarToVector128DoubleUInt64EmbeddedRoundingReflection_Double()
+        {
+            int testResult = 1;
+            ulong[] answerTable_ToNegativeInfinity = new ulong[2] {0x402e000000000000, 0xbff0000000000000};
+            ulong[] answerTable_ToPositiveInfinity  = new ulong[2] {0x402e000000000000, 0xbff0000000000000};
+            ulong[] answerTable_ToZero = new ulong[2] {0x402e000000000000, 0xbff0000000000000};    
+            if (Avx512F.X64.IsSupported)
+            {
+                using (TestTable<double> doubleTable = new TestTable<double>(new double[2] { -1.0f, -1.0f}, new double[2]))
+                {
+                    ulong value = 15;
+                    var vd3 = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertScalarToVector128Double), new Type[] { typeof(Vector128<Double>), typeof(ulong), typeof(FloatRoundingMode)})
+                            .Invoke(null, new object[] {
+                            Unsafe.Read<Vector128<Double>>(doubleTable.inArrayPtr),
+                            value,
+                            FloatRoundingMode.ToNegativeInfinity
+                            });
+                    
+                    Unsafe.Write(doubleTable.outArrayPtr, (Vector128<double>)(vd3));
+
+                    for (int i = 0; i < doubleTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.DoubleToUInt64Bits(doubleTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Double Embedded rounding failed on UInt64 input with ToNegativeInfinity:");
+                            foreach (var item in doubleTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertScalarToVector128Double), new Type[] { typeof(Vector128<Double>), typeof(ulong), typeof(FloatRoundingMode)})
+                        .Invoke(null, new object[] {
+                        Unsafe.Read<Vector128<Double>>(doubleTable.inArrayPtr),
+                        value,
+                        FloatRoundingMode.ToPositiveInfinity
+                        });
+
+                    Unsafe.Write(doubleTable.outArrayPtr, (Vector128<double>)(vd3));
+
+                    for (int i = 0; i < doubleTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.DoubleToUInt64Bits(doubleTable.outArray[i]) != answerTable_ToPositiveInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Double Embedded rounding failed on UInt64 input with ToPositiveInfinity:");
+                            foreach (var item in doubleTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertScalarToVector128Double), new Type[] { typeof(Vector128<Double>), typeof(ulong), typeof(FloatRoundingMode)})
+                        .Invoke(null, new object[] {
+                        Unsafe.Read<Vector128<Double>>(doubleTable.inArrayPtr),
+                        value,
+                        FloatRoundingMode.ToZero
+                        });
+
+                    Unsafe.Write(doubleTable.outArrayPtr, (Vector128<double>)(vd3));
+
+                    for (int i = 0; i < doubleTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.DoubleToUInt64Bits(doubleTable.outArray[i]) != answerTable_ToZero[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Double Embedded rounding failed on UInt64 input with ToZero:");
+                            foreach (var item in doubleTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/EmbeddedRounding.Single.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/EmbeddedRounding.Single.cs
new file mode 100644
index 000000000000..945ebbfe62ea
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512F/EmbeddedRounding.Single.cs
@@ -0,0 +1,784 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+using Xunit;
+
+namespace IntelHardwareIntrinsicTest._Avx512F
+{
+    public partial class Program
+    {
+        [Fact]
+        public static unsafe void ConvertToInt32EmbeddedRounding_Single()
+        {
+            int testResult = 1;
+            int answerTable_ToNegativeInfinity = -1;
+            int answerTable_ToPositiveInfinity  = 0;
+            int answerTable_ToZero = 0;
+            if (Avx512F.IsSupported)
+            {
+                Vector128<float> inputVec = Vector128.Create(-0.45f, -0.45f, -0.45f, -0.45f);
+                int res = Avx512F.ConvertToInt32(inputVec, FloatRoundingMode.ToNegativeInfinity);
+
+                if (res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt32 Embedded rounding failed on float with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.ConvertToInt32(inputVec, FloatRoundingMode.ToPositiveInfinity);
+
+                if (res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt32 Embedded rounding failed on float with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.ConvertToInt32(inputVec, FloatRoundingMode.ToZero);
+
+                if (res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt32 Embedded rounding failed on float with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToUInt32EmbeddedRounding_Single()
+        {
+            int testResult = 1;
+            uint answerTable_ToNegativeInfinity = 4294967295;
+            uint answerTable_ToPositiveInfinity  = 0;
+            uint answerTable_ToZero = 0;
+            if (Avx512F.IsSupported)
+            {
+                Vector128<float> inputVec = Vector128.Create(-0.45f, -0.45f, -0.45f, -0.45f);
+                uint res = Avx512F.ConvertToUInt32(inputVec, FloatRoundingMode.ToNegativeInfinity);
+
+                if (res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt32 Embedded rounding failed on float with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.ConvertToUInt32(inputVec, FloatRoundingMode.ToPositiveInfinity);
+
+                if (res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt32 Embedded rounding failed on float with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.ConvertToUInt32(inputVec, FloatRoundingMode.ToZero);
+
+                if (res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt32 Embedded rounding failed on float with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertScalarToVector128SingleInt32EmbeddedRounding_Single()
+        {
+            int testResult = 1;
+            uint[] answerTable_ToNegativeInfinity = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000};
+            uint[] answerTable_ToPositiveInfinity  = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000};
+            uint[] answerTable_ToZero = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000};     
+            if (Avx512F.IsSupported)
+            {
+                using (TestTable<float> floatTable = new TestTable<float>(new float[4] { -1.0f, -1.0f, -1.0f, -1.0f }, new float[4]))
+                {
+                    var upper = Unsafe.Read<Vector128<float>>(floatTable.inArrayPtr);
+                    int value = 15;
+                    var vd3 = Avx512F.ConvertScalarToVector128Single(upper, value, FloatRoundingMode.ToNegativeInfinity);
+                    Unsafe.Write(floatTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on Int32 input with ToNegativeInfinity:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = Avx512F.ConvertScalarToVector128Single(upper, value, FloatRoundingMode.ToPositiveInfinity);
+                    Unsafe.Write(floatTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToPositiveInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on Int32 input with ToPositiveInfinity:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = Avx512F.ConvertScalarToVector128Single(upper, value, FloatRoundingMode.ToZero);
+                    Unsafe.Write(floatTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on Int32 input with ToZero:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToInt64EmbeddedRounding_Single()
+        {
+            int testResult = 1;
+            long answerTable_ToNegativeInfinity = -1;
+            long answerTable_ToPositiveInfinity  = 0;
+            long answerTable_ToZero = 0;
+            if (Avx512F.X64.IsSupported)
+            {
+                Vector128<float> inputVec = Vector128.Create(-0.45f, -0.45f, -0.45f, -0.45f);
+                long res = Avx512F.X64.ConvertToInt64(inputVec, FloatRoundingMode.ToNegativeInfinity);
+
+                if (res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt64 Embedded rounding failed on float with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.X64.ConvertToInt64(inputVec, FloatRoundingMode.ToPositiveInfinity);
+
+                if (res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt64 Embedded rounding failed on float with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.X64.ConvertToInt64(inputVec, FloatRoundingMode.ToZero);
+
+                if (res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt64 Embedded rounding failed on float with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToUInt64EmbeddedRounding_Single()
+        {
+            int testResult = 1;
+            ulong answerTable_ToNegativeInfinity = 18446744073709551615;
+            ulong answerTable_ToPositiveInfinity  = 0;
+            ulong answerTable_ToZero = 0;
+            if (Avx512F.X64.IsSupported)
+            {
+                Vector128<float> inputVec = Vector128.Create(-0.45f, -0.45f, -0.45f, -0.45f);
+                ulong res = Avx512F.X64.ConvertToUInt64(inputVec, FloatRoundingMode.ToNegativeInfinity);
+
+                if (res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt64 Embedded rounding failed on float with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.X64.ConvertToUInt64(inputVec, FloatRoundingMode.ToPositiveInfinity);
+
+                if (res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt64 Embedded rounding failed on float with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = Avx512F.X64.ConvertToUInt64(inputVec, FloatRoundingMode.ToZero);
+
+                if (res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt64 Embedded rounding failed on float with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertScalarToVector128SingleInt64EmbeddedRounding_Single()
+        {
+            int testResult = 1;
+            uint[] answerTable_ToNegativeInfinity = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000,};
+            uint[] answerTable_ToPositiveInfinity  = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000};
+            uint[] answerTable_ToZero = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000};
+            if (Avx512F.X64.IsSupported)
+            {
+                using (TestTable<float> floatTable = new TestTable<float>(new float[4] { -1.0f, -1.0f, -1.0f, -1.0f }, new float[4]))
+                {
+                    var upper = Unsafe.Read<Vector128<float>>(floatTable.inArrayPtr);
+                    long value = 15;
+                    var vd3 = Avx512F.X64.ConvertScalarToVector128Single(upper, value, FloatRoundingMode.ToNegativeInfinity);
+                    Unsafe.Write(floatTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on Int64 input with ToNegativeInfinity:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = Avx512F.X64.ConvertScalarToVector128Single(upper, value, FloatRoundingMode.ToPositiveInfinity);
+                    Unsafe.Write(floatTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToPositiveInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on Int64 input with ToPositiveInfinity:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = Avx512F.X64.ConvertScalarToVector128Single(upper, value, FloatRoundingMode.ToZero);
+                    Unsafe.Write(floatTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on Int64 input with ToZero:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertScalarToVector128SingleUInt64EmbeddedRounding_Single()
+        {
+            int testResult = 1;
+            uint[] answerTable_ToNegativeInfinity = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000,};
+            uint[] answerTable_ToPositiveInfinity  = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000};
+            uint[] answerTable_ToZero = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000};     
+            if (Avx512F.X64.IsSupported)
+            {
+                using (TestTable<float> floatTable = new TestTable<float>(new float[4] { -1.0f, -1.0f, -1.0f, -1.0f }, new float[4]))
+                {
+                    var upper = Unsafe.Read<Vector128<float>>(floatTable.inArrayPtr);
+                    ulong value = 15;
+                    var vd3 = Avx512F.X64.ConvertScalarToVector128Single(upper, value, FloatRoundingMode.ToNegativeInfinity);
+                    Unsafe.Write(floatTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on UInt64 input with ToNegativeInfinity:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = Avx512F.X64.ConvertScalarToVector128Single(upper, value, FloatRoundingMode.ToPositiveInfinity);
+                    Unsafe.Write(floatTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToPositiveInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on UInt64 input with ToPositiveInfinity:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = Avx512F.X64.ConvertScalarToVector128Single(upper, value, FloatRoundingMode.ToZero);
+                    Unsafe.Write(floatTable.outArrayPtr, vd3);
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on UInt64 input with ToZero:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+        
+        [Fact]
+        public static unsafe void ConvertToInt32EmbeddedRoundingReflection_Single()
+        {
+            int testResult = 1;
+            int answerTable_ToNegativeInfinity = -1;
+            int answerTable_ToPositiveInfinity  = 0;
+            int answerTable_ToZero = 0;
+            if (Avx512F.IsSupported)
+            {
+                Vector128<float> inputVec = Vector128.Create(-0.45f, -0.45f, -0.45f, -0.45f);
+                var res = typeof(Avx512F).GetMethod(nameof(Avx512F.ConvertToInt32), new Type[] { typeof(Vector128<Single>) , typeof(FloatRoundingMode)})
+                        .Invoke(null, new object[] {
+                        inputVec,
+                        FloatRoundingMode.ToNegativeInfinity
+                        });
+
+                if ((int)res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt32 Embedded rounding failed on float with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F).GetMethod(nameof(Avx512F.ConvertToInt32), new Type[] { typeof(Vector128<Single>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToPositiveInfinity
+                    });
+
+                if ((int)res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt32 Embedded rounding failed on float with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F).GetMethod(nameof(Avx512F.ConvertToInt32), new Type[] { typeof(Vector128<Single>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToZero
+                    });
+
+                if ((int)res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt32 Embedded rounding failed on float with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToUInt32EmbeddedRoundingReflection_Single()
+        {
+            int testResult = 1;
+            uint answerTable_ToNegativeInfinity = 4294967295;
+            uint answerTable_ToPositiveInfinity  = 0;
+            uint answerTable_ToZero = 0;
+            if (Avx512F.IsSupported)
+            {
+                Vector128<float> inputVec = Vector128.Create(-0.45f, -0.45f, -0.45f, -0.45f);
+                var res = typeof(Avx512F).GetMethod(nameof(Avx512F.ConvertToUInt32), new Type[] { typeof(Vector128<Single>) , typeof(FloatRoundingMode)})
+                        .Invoke(null, new object[] {
+                        inputVec,
+                        FloatRoundingMode.ToNegativeInfinity
+                        });
+
+                if ((uint)res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt32 Embedded rounding failed on float with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F).GetMethod(nameof(Avx512F.ConvertToUInt32), new Type[] { typeof(Vector128<Single>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToPositiveInfinity
+                    });
+
+                if ((uint)res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt32 Embedded rounding failed on float with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F).GetMethod(nameof(Avx512F.ConvertToUInt32), new Type[] { typeof(Vector128<Single>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToZero
+                    });
+
+                if ((uint)res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt32 Embedded rounding failed on float with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToInt64EmbeddedRoundingReflection_Single()
+        {
+            int testResult = 1;
+            long answerTable_ToNegativeInfinity = -1;
+            long answerTable_ToPositiveInfinity  = 0;
+            long answerTable_ToZero = 0;
+            if (Avx512F.X64.IsSupported)
+            {
+                Vector128<float> inputVec = Vector128.Create(-0.45f, -0.45f, -0.45f, -0.45f);
+                
+                var res = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertToInt64), new Type[] { typeof(Vector128<Single>) , typeof(FloatRoundingMode)})
+                                     .Invoke(null, new object[] {
+                                        inputVec,
+                                        FloatRoundingMode.ToNegativeInfinity
+                                     });
+
+                if ((long)res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt64 Embedded rounding failed on float with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertToInt64), new Type[] { typeof(Vector128<Single>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToPositiveInfinity
+                    });
+
+                if ((long)res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt64 Embedded rounding failed on float with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertToInt64), new Type[] { typeof(Vector128<Single>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToZero
+                    });
+
+                if ((long)res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToInt64 Embedded rounding failed on float with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertToUInt64EmbeddedRoundingReflection_Single()
+        {
+            int testResult = 1;
+            ulong answerTable_ToNegativeInfinity = 18446744073709551615;
+            ulong answerTable_ToPositiveInfinity  = 0;
+            ulong answerTable_ToZero = 0;
+            if (Avx512F.X64.IsSupported)
+            {
+                Vector128<float> inputVec = Vector128.Create(-0.45f, -0.45f, -0.45f, -0.45f);
+                
+                var res = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertToUInt64), new Type[] { typeof(Vector128<Single>) , typeof(FloatRoundingMode)})
+                                     .Invoke(null, new object[] {
+                                        inputVec,
+                                        FloatRoundingMode.ToNegativeInfinity
+                                     });
+
+                if ((ulong)res != answerTable_ToNegativeInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt64 Embedded rounding failed on float with ToNegativeInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertToUInt64), new Type[] { typeof(Vector128<Single>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToPositiveInfinity
+                    });
+
+                if ((ulong)res != answerTable_ToPositiveInfinity)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt64 Embedded rounding failed on float with ToPositiveInfinity:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+
+                res = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertToUInt64), new Type[] { typeof(Vector128<Single>) , typeof(FloatRoundingMode)})
+                    .Invoke(null, new object[] {
+                    inputVec,
+                    FloatRoundingMode.ToZero
+                    });
+
+                if ((ulong)res != answerTable_ToZero)
+                {
+                    Console.WriteLine("Avx512 ConvertToUInt64 Embedded rounding failed on float with ToZero:");
+                    Console.Write(res);
+                    Console.WriteLine();
+                    Assert.Fail("");
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertScalarToVector128SingleInt64EmbeddedRoundingReflection_Single()
+        {
+            int testResult = 1;
+            uint[] answerTable_ToNegativeInfinity = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000,};
+            uint[] answerTable_ToPositiveInfinity  = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000};
+            uint[] answerTable_ToZero = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000};    
+            if (Avx512F.X64.IsSupported)
+            {
+                using (TestTable<float> floatTable = new TestTable<float>(new float[4] { -1.0f, -1.0f, -1.0f, -1.0f}, new float[4]))
+                {
+                    long value = 15;
+                    var vd3 = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertScalarToVector128Single), new Type[] { typeof(Vector128<Single>), typeof(long), typeof(FloatRoundingMode)})
+                            .Invoke(null, new object[] {
+                            Unsafe.Read<Vector128<Single>>(floatTable.inArrayPtr),
+                            value,
+                            FloatRoundingMode.ToNegativeInfinity
+                            });
+
+                    Unsafe.Write(floatTable.outArrayPtr, (Vector128<float>)(vd3));
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on Int64 input with ToNegativeInfinity:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertScalarToVector128Single), new Type[] { typeof(Vector128<Single>), typeof(long), typeof(FloatRoundingMode)})
+                        .Invoke(null, new object[] {
+                        Unsafe.Read<Vector128<Single>>(floatTable.inArrayPtr),
+                        value,
+                        FloatRoundingMode.ToPositiveInfinity
+                        });
+
+                    Unsafe.Write(floatTable.outArrayPtr, (Vector128<float>)(vd3));
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToPositiveInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on Int64 input with ToPositiveInfinity:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertScalarToVector128Single), new Type[] { typeof(Vector128<Single>), typeof(long), typeof(FloatRoundingMode)})
+                        .Invoke(null, new object[] {
+                        Unsafe.Read<Vector128<Single>>(floatTable.inArrayPtr),
+                        value,
+                        FloatRoundingMode.ToZero
+                        });
+
+                    Unsafe.Write(floatTable.outArrayPtr, (Vector128<float>)(vd3));
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToZero[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on Int64 input with ToZero:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+
+        [Fact]
+        public static unsafe void ConvertScalarToVector128SingleUInt64EmbeddedRoundingReflection_Single()
+        {
+            int testResult = 1;
+            uint[] answerTable_ToNegativeInfinity = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000,};
+            uint[] answerTable_ToPositiveInfinity  = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000};
+            uint[] answerTable_ToZero = new uint[4] {0x41700000, 0xbf800000, 0xbf800000, 0xbf800000};   
+            if (Avx512F.X64.IsSupported)
+            {
+                using (TestTable<float> floatTable = new TestTable<float>(new float[4] { -1.0f, -1.0f, -1.0f, -1.0f}, new float[4]))
+                {
+                    ulong value = 15;
+                    var vd3 = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertScalarToVector128Single), new Type[] { typeof(Vector128<Single>), typeof(ulong), typeof(FloatRoundingMode)})
+                            .Invoke(null, new object[] {
+                            Unsafe.Read<Vector128<Single>>(floatTable.inArrayPtr),
+                            value,
+                            FloatRoundingMode.ToNegativeInfinity
+                            });
+                    
+                    Unsafe.Write(floatTable.outArrayPtr, (Vector128<float>)(vd3));
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToNegativeInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on UInt64 input with ToNegativeInfinity:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertScalarToVector128Single), new Type[] { typeof(Vector128<Single>), typeof(ulong), typeof(FloatRoundingMode)})
+                        .Invoke(null, new object[] {
+                        Unsafe.Read<Vector128<Single>>(floatTable.inArrayPtr),
+                        value,
+                        FloatRoundingMode.ToPositiveInfinity
+                        });
+
+                    Unsafe.Write(floatTable.outArrayPtr, (Vector128<float>)(vd3));
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToPositiveInfinity[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on UInt64 input with ToPositiveInfinity:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+
+                    vd3 = typeof(Avx512F.X64).GetMethod(nameof(Avx512F.X64.ConvertScalarToVector128Single), new Type[] { typeof(Vector128<Single>), typeof(ulong), typeof(FloatRoundingMode)})
+                        .Invoke(null, new object[] {
+                        Unsafe.Read<Vector128<Single>>(floatTable.inArrayPtr),
+                        value,
+                        FloatRoundingMode.ToZero
+                        });
+
+                    Unsafe.Write(floatTable.outArrayPtr, (Vector128<float>)(vd3));
+
+                    for (int i = 0; i < floatTable.outArray.Length; i++)
+                    {
+                        if (BitConverter.SingleToUInt32Bits(floatTable.outArray[i]) != answerTable_ToZero[i])
+                        {
+                            Console.WriteLine("Avx512 ConvertScalarToVector128Single Embedded rounding failed on UInt64 input with ToZero:");
+                            foreach (var item in floatTable.outArray)
+                            {
+                                Console.Write(item + ", ");
+                            }
+                            Console.WriteLine();
+                            Assert.Fail("");
+                        }
+                    }
+                }
+            }
+            Assert.Equal(1, testResult);
+        }
+    }
+}
diff --git a/src/tests/JIT/Intrinsics/Interlocked.cs b/src/tests/JIT/Intrinsics/Interlocked.cs
index 5ac15785448e..09e37223dd96 100644
--- a/src/tests/JIT/Intrinsics/Interlocked.cs
+++ b/src/tests/JIT/Intrinsics/Interlocked.cs
@@ -3,16 +3,61 @@
 //
 
 using System;
-using System.Threading;
+using System.Diagnostics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using System.Threading;
 using Xunit;
 
 namespace InterlockedTest
 {
     public unsafe class Program
     {
-        private static int _errors = 0;
+        [StructLayout(LayoutKind.Explicit)]
+        private sealed class Box
+        {
+            [FieldOffset(0)]
+            private long memory;
+            [FieldOffset(8)]
+            private long val;
+            [FieldOffset(16)]
+            public nuint offset;
+
+            public long Memory => memory;
+
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            public ref T GetRef<T>() where T : unmanaged
+            {
+                return ref Unsafe.Add(ref Unsafe.As<long, T>(ref memory), offset);
+            }
+
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            public long GetValue<T>(T value, [CallerLineNumber] int line = 0) where T : unmanaged
+            {
+                long l = val;
+                if (l is not (0L or -1L))
+                {
+                    Console.WriteLine($"Line {line}: found write out of bounds at offset {offset}");
+                    _errors++;
+                }
+                Unsafe.Add(ref Unsafe.As<long, T>(ref l), offset) = value;
+                return l;
+            }
+
+            public void Set(long value, [CallerLineNumber] int line = 0)
+            {
+                if (value != ~val)
+                {
+                    Console.WriteLine($"Line {line}: found corrupt check value at offset {offset}");
+                    _errors++;
+                }
+                memory = val = value;
+            }
+        }
+
+        private static int _errors;
+        private static Box _box;
+        private static uint _staticMemory;
 
         [Fact]
         public static int TestEntryPoint()
@@ -35,144 +80,135 @@ public static int TestEntryPoint()
             [MethodImpl(MethodImplOptions.NoInlining)]
             static delegate*<ref ushort, ushort, ushort, ushort> CompareExchangeUShort() => &Interlocked.CompareExchange;
 
-            long mem = -1;
-            [MethodImpl(MethodImplOptions.NoInlining)]
-            long GetValue<T>(T val) where T : unmanaged
+            _box = new();
+            for (; _box.offset < sizeof(long) / sizeof(ushort); _box.offset++)
             {
-                Unsafe.As<long, T>(ref mem) = val;
-                return mem;
-            }
+                _box.Set(-1);
+                Equals(255, Interlocked.Exchange(ref _box.GetRef<byte>(), 254));
+                Equals(_box.GetValue<byte>(254), _box.Memory);
+                Equals(254, ExchangeByte()(ref _box.GetRef<byte>(), 253));
+                Equals(_box.GetValue<byte>(253), _box.Memory);
+
+                _box.Set(0);
+                Equals(0, Interlocked.Exchange(ref _box.GetRef<sbyte>(), -4));
+                Equals(_box.GetValue<sbyte>(-4), _box.Memory);
+                Equals(-4, ExchangeSByte()(ref _box.GetRef<sbyte>(), -5));
+                Equals(_box.GetValue<sbyte>(-5), _box.Memory);
+
+                _box.Set(-1);
+                Equals(255, Interlocked.CompareExchange(ref _box.GetRef<byte>(), 254, 255));
+                Equals(_box.GetValue<byte>(254), _box.Memory);
+                Equals(254, CompareExchangeByte()(ref _box.GetRef<byte>(), 253, 254));
+                Equals(_box.GetValue<byte>(253), _box.Memory);
+
+                _box.Set(0);
+                Equals(0, Interlocked.CompareExchange(ref _box.GetRef<sbyte>(), -4, 0));
+                Equals(_box.GetValue<sbyte>(-4), _box.Memory);
+                Equals(-4, CompareExchangeSByte()(ref _box.GetRef<sbyte>(), -5, -4));
+                Equals(_box.GetValue<sbyte>(-5), _box.Memory);
 
-            long l = -1;
-            Equals(255, Interlocked.Exchange(ref Unsafe.As<long, byte>(ref l), 254));
-            Equals(GetValue<byte>(254), l);
-            Equals(254, ExchangeByte()(ref Unsafe.As<long, byte>(ref l), 253));
-            Equals(GetValue<byte>(253), l);
-
-            mem = 0;
-            l = 0;
-            Equals(0, Interlocked.Exchange(ref Unsafe.As<long, sbyte>(ref l), -4));
-            Equals(GetValue<sbyte>(-4), l);
-            Equals(-4, ExchangeSByte()(ref Unsafe.As<long, sbyte>(ref l), -5));
-            Equals(GetValue<sbyte>(-5), l);
-
-            mem = -1;
-            l = -1;
-            Equals(255, Interlocked.CompareExchange(ref Unsafe.As<long, byte>(ref l), 254, 255));
-            Equals(GetValue<byte>(254), l);
-            Equals(254, CompareExchangeByte()(ref Unsafe.As<long, byte>(ref l), 253, 254));
-            Equals(GetValue<byte>(253), l);
-
-            mem = 0;
-            l = 0;
-            Equals(0, Interlocked.CompareExchange(ref Unsafe.As<long, sbyte>(ref l), -4, 0));
-            Equals(GetValue<sbyte>(-4), l);
-            Equals(-4, CompareExchangeSByte()(ref Unsafe.As<long, sbyte>(ref l), -5, -4));
-            Equals(GetValue<sbyte>(-5), l);
-
-            Equals(251, Interlocked.CompareExchange(ref Unsafe.As<long, byte>(ref l), 2, 10));
-            Equals(GetValue<byte>(251), l);
-            Equals(251, CompareExchangeByte()(ref Unsafe.As<long, byte>(ref l), 2, 10));
-            Equals(GetValue<byte>(251), l);
-            Equals(-5, Interlocked.CompareExchange(ref Unsafe.As<long, sbyte>(ref l), 2, 10));
-            Equals(GetValue<sbyte>(-5), l);
-            Equals(-5, CompareExchangeSByte()(ref Unsafe.As<long, sbyte>(ref l), 2, 10));
-            Equals(GetValue<sbyte>(-5), l);
-
-            mem = 0;
-            l = 0;
-            Equals(0, Interlocked.Exchange(ref Unsafe.As<long, short>(ref l), -2));
-            Equals(GetValue<short>(-2), l);
-            Equals(-2, ExchangeShort()(ref Unsafe.As<long, short>(ref l), -3));
-            Equals(GetValue<short>(-3), l);
-
-            mem = -1;
-            l = -1;
-            Equals(65535, Interlocked.Exchange(ref Unsafe.As<long, ushort>(ref l), 65532));
-            Equals(GetValue<ushort>(65532), l);
-            Equals(65532, ExchangeUShort()(ref Unsafe.As<long, ushort>(ref l), 65531));
-            Equals(GetValue<ushort>(65531), l);
-
-            mem = 0;
-            l = 0;
-            Equals(0, Interlocked.CompareExchange(ref Unsafe.As<long, short>(ref l), -2, 0));
-            Equals(GetValue<short>(-2), l);
-            Equals(-2, CompareExchangeShort()(ref Unsafe.As<long, short>(ref l), -3, -2));
-            Equals(GetValue<short>(-3), l);
-
-            mem = -1;
-            l = -1;
-            Equals(65535, Interlocked.CompareExchange(ref Unsafe.As<long, ushort>(ref l), 65532, 65535));
-            Equals(GetValue<ushort>(65532), l);
-            Equals(65532, CompareExchangeUShort()(ref Unsafe.As<long, ushort>(ref l), 65531, 65532));
-            Equals(GetValue<ushort>(65531), l);
-
-            Equals(-5, Interlocked.CompareExchange(ref Unsafe.As<long, short>(ref l), 1444, 1555));
-            Equals(GetValue<short>(-5), l);
-            Equals(-5, CompareExchangeShort()(ref Unsafe.As<long, short>(ref l), 1444, 1555));
-            Equals(GetValue<short>(-5), l);
-            Equals(65531, Interlocked.CompareExchange(ref Unsafe.As<long, ushort>(ref l), 1444, 1555));
-            Equals(GetValue<ushort>(65531), l);
-            Equals(65531, CompareExchangeUShort()(ref Unsafe.As<long, ushort>(ref l), 1444, 1555));
-            Equals(GetValue<ushort>(65531), l);
-
-            mem = -1;
-            l = -1;
-            Interlocked.Exchange(ref Unsafe.As<long, byte>(ref l), 123);
-            Equals(GetValue<byte>(123), l);
-            ExchangeByte()(ref Unsafe.As<long, byte>(ref l), 124);
-            Equals(GetValue<byte>(124), l);
-            Interlocked.Exchange(ref Unsafe.As<long, sbyte>(ref l), 125);
-            Equals(GetValue<sbyte>(125), l);
-            ExchangeSByte()(ref Unsafe.As<long, sbyte>(ref l), 126);
-            Equals(GetValue<sbyte>(126), l);
-
-            Interlocked.CompareExchange(ref Unsafe.As<long, byte>(ref l), 55, 126);
-            Equals(GetValue<byte>(55), l);
-            CompareExchangeByte()(ref Unsafe.As<long, byte>(ref l), 56, 55);
-            Equals(GetValue<byte>(56), l);
-            Interlocked.CompareExchange(ref Unsafe.As<long, sbyte>(ref l), 57, 56);
-            Equals(GetValue<sbyte>(57), l);
-            CompareExchangeSByte()(ref Unsafe.As<long, sbyte>(ref l), 58, 57);
-            Equals(GetValue<sbyte>(58), l);
-
-            Interlocked.CompareExchange(ref Unsafe.As<long, byte>(ref l), 10, 2);
-            Equals(GetValue<byte>(58), l);
-            CompareExchangeByte()(ref Unsafe.As<long, byte>(ref l), 10, 2);
-            Equals(GetValue<byte>(58), l);
-            Interlocked.CompareExchange(ref Unsafe.As<long, sbyte>(ref l), 10, 2);
-            Equals(GetValue<sbyte>(58), l);
-            CompareExchangeSByte()(ref Unsafe.As<long, sbyte>(ref l), 10, 2);
-            Equals(GetValue<sbyte>(58), l);
-
-            mem = -1;
-            l = -1;
-            Interlocked.Exchange(ref Unsafe.As<long, short>(ref l), 12345);
-            Equals(GetValue<short>(12345), l);
-            ExchangeShort()(ref Unsafe.As<long, short>(ref l), 12346);
-            Equals(GetValue<short>(12346), l);
-            Interlocked.Exchange(ref Unsafe.As<long, ushort>(ref l), 12347);
-            Equals(GetValue<ushort>(12347), l);
-            ExchangeUShort()(ref Unsafe.As<long, ushort>(ref l), 12348);
-            Equals(GetValue<ushort>(12348), l);
-
-            Interlocked.CompareExchange(ref Unsafe.As<long, short>(ref l), 1234, 12348);
-            Equals(GetValue<short>(1234), l);
-            CompareExchangeShort()(ref Unsafe.As<long, short>(ref l), 1235, 1234);
-            Equals(GetValue<short>(1235), l);
-            Interlocked.CompareExchange(ref Unsafe.As<long, ushort>(ref l), 1236, 1235);
-            Equals(GetValue<ushort>(1236), l);
-            CompareExchangeUShort()(ref Unsafe.As<long, ushort>(ref l), 1237, 1236);
-            Equals(GetValue<ushort>(1237), l);
-
-            Interlocked.CompareExchange(ref Unsafe.As<long, short>(ref l), 1555, 1444);
-            Equals(GetValue<short>(1237), l);
-            CompareExchangeShort()(ref Unsafe.As<long, short>(ref l), 1555, 1444);
-            Equals(GetValue<short>(1237), l);
-            Interlocked.CompareExchange(ref Unsafe.As<long, ushort>(ref l), 1555, 1444);
-            Equals(GetValue<ushort>(1237), l);
-            CompareExchangeUShort()(ref Unsafe.As<long, ushort>(ref l), 1555, 1444);
-            Equals(GetValue<ushort>(1237), l);
+                Equals(251, Interlocked.CompareExchange(ref _box.GetRef<byte>(), 2, 10));
+                Equals(_box.GetValue<byte>(251), _box.Memory);
+                Equals(251, CompareExchangeByte()(ref _box.GetRef<byte>(), 2, 10));
+                Equals(_box.GetValue<byte>(251), _box.Memory);
+                Equals(-5, Interlocked.CompareExchange(ref _box.GetRef<sbyte>(), 2, 10));
+                Equals(_box.GetValue<sbyte>(-5), _box.Memory);
+                Equals(-5, CompareExchangeSByte()(ref _box.GetRef<sbyte>(), 2, 10));
+                Equals(_box.GetValue<sbyte>(-5), _box.Memory);
+
+                _box.Set(-1);
+                _box.Set(0);
+                Equals(0, Interlocked.Exchange(ref _box.GetRef<short>(), -2));
+                Equals(_box.GetValue<short>(-2), _box.Memory);
+                Equals(-2, ExchangeShort()(ref _box.GetRef<short>(), -3));
+                Equals(_box.GetValue<short>(-3), _box.Memory);
+
+                _box.Set(-1);
+                Equals(65535, Interlocked.Exchange(ref _box.GetRef<ushort>(), 65532));
+                Equals(_box.GetValue<ushort>(65532), _box.Memory);
+                Equals(65532, ExchangeUShort()(ref _box.GetRef<ushort>(), 65531));
+                Equals(_box.GetValue<ushort>(65531), _box.Memory);
+
+                _box.Set(0);
+                Equals(0, Interlocked.CompareExchange(ref _box.GetRef<short>(), -2, 0));
+                Equals(_box.GetValue<short>(-2), _box.Memory);
+                Equals(-2, CompareExchangeShort()(ref _box.GetRef<short>(), -3, -2));
+                Equals(_box.GetValue<short>(-3), _box.Memory);
+
+                _box.Set(-1);
+                Equals(65535, Interlocked.CompareExchange(ref _box.GetRef<ushort>(), 65532, 65535));
+                Equals(_box.GetValue<ushort>(65532), _box.Memory);
+                Equals(65532, CompareExchangeUShort()(ref _box.GetRef<ushort>(), 65531, 65532));
+                Equals(_box.GetValue<ushort>(65531), _box.Memory);
+
+                Equals(-5, Interlocked.CompareExchange(ref _box.GetRef<short>(), 1444, 1555));
+                Equals(_box.GetValue<short>(-5), _box.Memory);
+                Equals(-5, CompareExchangeShort()(ref _box.GetRef<short>(), 1444, 1555));
+                Equals(_box.GetValue<short>(-5), _box.Memory);
+                Equals(65531, Interlocked.CompareExchange(ref _box.GetRef<ushort>(), 1444, 1555));
+                Equals(_box.GetValue<ushort>(65531), _box.Memory);
+                Equals(65531, CompareExchangeUShort()(ref _box.GetRef<ushort>(), 1444, 1555));
+                Equals(_box.GetValue<ushort>(65531), _box.Memory);
+
+                _box.Set(0);
+                _box.Set(-1);
+                Interlocked.Exchange(ref _box.GetRef<byte>(), 123);
+                Equals(_box.GetValue<byte>(123), _box.Memory);
+                ExchangeByte()(ref _box.GetRef<byte>(), 124);
+                Equals(_box.GetValue<byte>(124), _box.Memory);
+                Interlocked.Exchange(ref _box.GetRef<sbyte>(), 125);
+                Equals(_box.GetValue<sbyte>(125), _box.Memory);
+                ExchangeSByte()(ref _box.GetRef<sbyte>(), 126);
+                Equals(_box.GetValue<sbyte>(126), _box.Memory);
+
+                Interlocked.CompareExchange(ref _box.GetRef<byte>(), 55, 126);
+                Equals(_box.GetValue<byte>(55), _box.Memory);
+                CompareExchangeByte()(ref _box.GetRef<byte>(), 56, 55);
+                Equals(_box.GetValue<byte>(56), _box.Memory);
+                Interlocked.CompareExchange(ref _box.GetRef<sbyte>(), 57, 56);
+                Equals(_box.GetValue<sbyte>(57), _box.Memory);
+                CompareExchangeSByte()(ref _box.GetRef<sbyte>(), 58, 57);
+                Equals(_box.GetValue<sbyte>(58), _box.Memory);
+
+                Interlocked.CompareExchange(ref _box.GetRef<byte>(), 10, 2);
+                Equals(_box.GetValue<byte>(58), _box.Memory);
+                CompareExchangeByte()(ref _box.GetRef<byte>(), 10, 2);
+                Equals(_box.GetValue<byte>(58), _box.Memory);
+                Interlocked.CompareExchange(ref _box.GetRef<sbyte>(), 10, 2);
+                Equals(_box.GetValue<sbyte>(58), _box.Memory);
+                CompareExchangeSByte()(ref _box.GetRef<sbyte>(), 10, 2);
+                Equals(_box.GetValue<sbyte>(58), _box.Memory);
+
+                _box.Set(0);
+                _box.Set(-1);
+                Interlocked.Exchange(ref _box.GetRef<short>(), 12345);
+                Equals(_box.GetValue<short>(12345), _box.Memory);
+                ExchangeShort()(ref _box.GetRef<short>(), 12346);
+                Equals(_box.GetValue<short>(12346), _box.Memory);
+                Interlocked.Exchange(ref _box.GetRef<ushort>(), 12347);
+                Equals(_box.GetValue<ushort>(12347), _box.Memory);
+                ExchangeUShort()(ref _box.GetRef<ushort>(), 12348);
+                Equals(_box.GetValue<ushort>(12348), _box.Memory);
+
+                Interlocked.CompareExchange(ref _box.GetRef<short>(), 1234, 12348);
+                Equals(_box.GetValue<short>(1234), _box.Memory);
+                CompareExchangeShort()(ref _box.GetRef<short>(), 1235, 1234);
+                Equals(_box.GetValue<short>(1235), _box.Memory);
+                Interlocked.CompareExchange(ref _box.GetRef<ushort>(), 1236, 1235);
+                Equals(_box.GetValue<ushort>(1236), _box.Memory);
+                CompareExchangeUShort()(ref _box.GetRef<ushort>(), 1237, 1236);
+                Equals(_box.GetValue<ushort>(1237), _box.Memory);
+
+                Interlocked.CompareExchange(ref _box.GetRef<short>(), 1555, 1444);
+                Equals(_box.GetValue<short>(1237), _box.Memory);
+                CompareExchangeShort()(ref _box.GetRef<short>(), 1555, 1444);
+                Equals(_box.GetValue<short>(1237), _box.Memory);
+                Interlocked.CompareExchange(ref _box.GetRef<ushort>(), 1555, 1444);
+                Equals(_box.GetValue<ushort>(1237), _box.Memory);
+                CompareExchangeUShort()(ref _box.GetRef<ushort>(), 1555, 1444);
+                Equals(_box.GetValue<ushort>(1237), _box.Memory);
+                _box.Set(0);
+            }
 
             ThrowsNRE(() => { Interlocked.Exchange(ref Unsafe.NullRef<byte>(), 0); });
             ThrowsNRE(() => { Interlocked.Exchange(ref Unsafe.NullRef<sbyte>(), 0); });
@@ -191,21 +227,63 @@ long GetValue<T>(T val) where T : unmanaged
             ThrowsNRE(() => { CompareExchangeShort()(ref Unsafe.NullRef<short>(), 0, 0); });
             ThrowsNRE(() => { CompareExchangeUShort()(ref Unsafe.NullRef<ushort>(), 0, 0); });
 
+            // test for asserts with statics since their addresses are constant which caused issues earlier
+            // test with 4B alignment provided by the uint field
+            _staticMemory = 0;
+            Equals(0, Interlocked.Exchange(ref Unsafe.As<uint, byte>(ref _staticMemory), 255));
+            Equals(255, Unsafe.As<uint, byte>(ref _staticMemory));
+
+            _staticMemory = 0;
+            Equals(0, Interlocked.Exchange(ref Unsafe.As<uint, ushort>(ref _staticMemory), 65535));
+            Equals(65535, Unsafe.As<uint, ushort>(ref _staticMemory));
+
+            _staticMemory = 0;
+            Equals(0, Interlocked.CompareExchange(ref Unsafe.As<uint, byte>(ref _staticMemory), 255, 0));
+            Equals(255, Unsafe.As<uint, byte>(ref _staticMemory));
+            Equals(255, Interlocked.CompareExchange(ref Unsafe.As<uint, byte>(ref _staticMemory), 1, 0));
+            Equals(255, Unsafe.As<uint, byte>(ref _staticMemory));
+
+            _staticMemory = 0;
+            Equals(0, Interlocked.CompareExchange(ref Unsafe.As<uint, ushort>(ref _staticMemory), 65535, 0));
+            Equals(65535, Unsafe.As<uint, ushort>(ref _staticMemory));
+            Equals(65535, Interlocked.CompareExchange(ref Unsafe.As<uint, ushort>(ref _staticMemory), 1, 0));
+            Equals(65535, Unsafe.As<uint, ushort>(ref _staticMemory));
+            
+            // offset the address by 1 to avoid 4B alignment
+            _staticMemory = 0;
+            Equals(0, Interlocked.Exchange(ref Unsafe.Add(ref Unsafe.As<uint, byte>(ref _staticMemory), 1), 255));
+            Equals(255, Unsafe.Add(ref Unsafe.As<uint, byte>(ref _staticMemory), 1));
+
+            _staticMemory = 0;
+            Equals(0, Interlocked.Exchange(ref Unsafe.Add(ref Unsafe.As<uint, ushort>(ref _staticMemory), 1), 65535));
+            Equals(65535, Unsafe.Add(ref Unsafe.As<uint, ushort>(ref _staticMemory), 1));
+
+            _staticMemory = 0;
+            Equals(0, Interlocked.CompareExchange(ref Unsafe.Add(ref Unsafe.As<uint, byte>(ref _staticMemory), 1), 255, 0));
+            Equals(255, Unsafe.Add(ref Unsafe.As<uint, byte>(ref _staticMemory), 1));
+            Equals(255, Interlocked.CompareExchange(ref Unsafe.Add(ref Unsafe.As<uint, byte>(ref _staticMemory), 1), 1, 0));
+            Equals(255, Unsafe.Add(ref Unsafe.As<uint, byte>(ref _staticMemory), 1));
+
+            _staticMemory = 0;
+            Equals(0, Interlocked.CompareExchange(ref Unsafe.Add(ref Unsafe.As<uint, ushort>(ref _staticMemory), 1), 65535, 0));
+            Equals(65535, Unsafe.Add(ref Unsafe.As<uint, ushort>(ref _staticMemory), 1));
+            Equals(65535, Interlocked.CompareExchange(ref Unsafe.Add(ref Unsafe.As<uint, ushort>(ref _staticMemory), 1), 1, 0));
+            Equals(65535, Unsafe.Add(ref Unsafe.As<uint, ushort>(ref _staticMemory), 1));
+            
             return 100 + _errors;
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        static void Equals(long left, long right, [CallerLineNumber] int line = 0, [CallerFilePath] string file = "")
+        private static void Equals(long left, long right, [CallerLineNumber] int line = 0, [CallerFilePath] string file = "")
         {
-            if (left != right)
-            {
-                Console.WriteLine($"{file}:L{line} test failed (expected: equal, actual: {left}-{right}).");
-                _errors++;
-            }
+            if (left == right)
+                return;
+            Console.WriteLine($"{file}:L{line} test failed (not equal, expected: {left}, actual: {right}) at offset {_box.offset}.");
+            _errors++;
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        static void ThrowsNRE(Action action, [CallerLineNumber] int line = 0, [CallerFilePath] string file = "")
+        private static void ThrowsNRE(Action action, [CallerLineNumber] int line = 0, [CallerFilePath] string file = "")
         {
             try
             {
diff --git a/src/tests/JIT/Methodical/Boxing/boxunbox/BoxPatternMatchAndSideEffects.csproj b/src/tests/JIT/Methodical/Boxing/boxunbox/BoxPatternMatchAndSideEffects.csproj
index 17ce8036e6a8..7df006c9f1a4 100644
--- a/src/tests/JIT/Methodical/Boxing/boxunbox/BoxPatternMatchAndSideEffects.csproj
+++ b/src/tests/JIT/Methodical/Boxing/boxunbox/BoxPatternMatchAndSideEffects.csproj
@@ -1,6 +1,8 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
     <DebugType>PdbOnly</DebugType>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'minifullaot' or '$(RuntimeVariant)' == 'llvmfullaot')">true</CLRTestTargetUnsupported>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="$(MSBuildProjectName).cs" />
diff --git a/src/tests/JIT/Methodical/Methodical_others.csproj b/src/tests/JIT/Methodical/Methodical_others.csproj
index 4021ca899228..2fd089bdb777 100644
--- a/src/tests/JIT/Methodical/Methodical_others.csproj
+++ b/src/tests/JIT/Methodical/Methodical_others.csproj
@@ -1,4 +1,8 @@
 <Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'minifullaot' or '$(RuntimeVariant)' == 'llvmfullaot')">true</CLRTestTargetUnsupported>
+  </PropertyGroup>
   <ItemGroup>
     <MergedWrapperProjectReference Include="*/**/*.??proj" />
     <MergedWrapperProjectReference Remove="*/**/*_d.??proj" />
diff --git a/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il b/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il
index b8ccece0a1d6..ff132dd86859 100644
--- a/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il
+++ b/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b28598/b28598.il
@@ -48,6 +48,9 @@ End_Orphan_3:
 } catch [mscorlib]System.OverflowException {
   pop
   leave the_end
+} catch [mscorlib]System.DivideByZeroException {
+  pop
+  leave the_end
 }
 the_end:
 ldc.i4 100
diff --git a/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il b/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il
index 65f3bc2af34f..0422a59b0205 100644
--- a/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il
+++ b/src/tests/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b50027/b50027.il
@@ -684,6 +684,9 @@ leave END
 } catch [mscorlib]System.OverflowException {
 pop
 leave END
+} catch [mscorlib]System.DivideByZeroException {
+  pop
+  leave END
 }
 END:
 ldc.i4 100
diff --git a/src/tests/JIT/Regression/Dev11/External/Dev11_243742/app.cs b/src/tests/JIT/Regression/Dev11/External/Dev11_243742/app.cs
index 393691bd4336..1842f13cc9e0 100644
--- a/src/tests/JIT/Regression/Dev11/External/Dev11_243742/app.cs
+++ b/src/tests/JIT/Regression/Dev11/External/Dev11_243742/app.cs
@@ -3,10 +3,6 @@
 
 /*
 * Regression test for Dev11 243742 [Triton]
-* precommands:
-* set DOTNET_ZAPREQUIRE=2
-* set CORECLR_PREJITType=MDIL
-* del /q nitype.signal
 *
 * Execute:
 * %CORE_ROOT%\fxprun.exe App.exe
diff --git a/src/tests/JIT/Regression/JitBlue/GitHub_26491/GitHub_26491.ilproj b/src/tests/JIT/Regression/JitBlue/GitHub_26491/GitHub_26491.ilproj
index 5ddfd280bde6..623b7601a72c 100644
--- a/src/tests/JIT/Regression/JitBlue/GitHub_26491/GitHub_26491.ilproj
+++ b/src/tests/JIT/Regression/JitBlue/GitHub_26491/GitHub_26491.ilproj
@@ -1,4 +1,8 @@
 <Project Sdk="Microsoft.NET.Sdk.IL">
+  <PropertyGroup>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'minifullaot' or '$(RuntimeVariant)' == 'llvmfullaot')">true</CLRTestTargetUnsupported>
+  </PropertyGroup>
   <ItemGroup>
     <Compile Include="GitHub_26491.il" />
   </ItemGroup>
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_100437/Runtime_100437.cs b/src/tests/JIT/Regression/JitBlue/Runtime_100437/Runtime_100437.cs
new file mode 100644
index 000000000000..810b99acab30
--- /dev/null
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_100437/Runtime_100437.cs
@@ -0,0 +1,112 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.Loader;
+using Xunit;
+
+public class Runtime_100437
+{
+    [Fact]
+    [SkipOnMono("PlatformDetection.IsPreciseGcSupported false on mono", TestPlatforms.Any)]
+    public static void TestNonCollectibleType() => TestCollectibleReadOnlyStatics(nameof(NonCollectibleType));
+
+    [Fact]
+    [SkipOnMono("PlatformDetection.IsPreciseGcSupported false on mono", TestPlatforms.Any)]
+    public static void TestNonCollectibleTypeInSharedGenericCode() => TestCollectibleReadOnlyStatics(nameof(NonCollectibleTypeInSharedGenericCode));
+
+    [Fact]
+    [SkipOnMono("PlatformDetection.IsPreciseGcSupported false on mono", TestPlatforms.Any)]
+    public static void TestNonCollectibleArrayTypeInSharedGenericCode() => TestCollectibleReadOnlyStatics(nameof(NonCollectibleArrayTypeInSharedGenericCode));
+
+    [Fact]
+    [SkipOnMono("PlatformDetection.IsPreciseGcSupported false on mono", TestPlatforms.Any)]
+    public static void TestCollectibleEmptyArray() => TestCollectibleReadOnlyStatics(nameof(CollectibleEmptyArray));
+
+    private static void TestCollectibleReadOnlyStatics(string methodName)
+    {
+        string assemblyPath = typeof(Runtime_100437).Assembly.Location;
+
+        // Skip this test for single file
+        if (string.IsNullOrEmpty(assemblyPath))
+            return;
+
+        WeakReference wr = CreateReadOnlyStaticWeakReference();
+
+        for (int i = 0; i < 10; i++)
+        {
+            GC.Collect();
+            GC.WaitForPendingFinalizers();
+
+            if (!IsTargetAlive(wr))
+                return;
+        }
+
+        throw new Exception("Test failed - readonly static has not been collected.");
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        WeakReference CreateReadOnlyStaticWeakReference()
+        {
+            AssemblyLoadContext alc = new CollectibleAssemblyLoadContext();
+            Assembly a = alc.LoadFromAssemblyPath(assemblyPath);
+            return (WeakReference)a.GetType(nameof(Runtime_100437)).GetMethod(methodName).Invoke(null, new object[] { typeof(Runtime_100437).Assembly });
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        bool IsTargetAlive(WeakReference wr)
+        {
+            return wr.Target != null;
+        }
+    }
+
+    public static WeakReference NonCollectibleType(Assembly assemblyInDefaultContext)
+    {
+        return new WeakReference(Holder.Singleton, trackResurrection: true);
+    }
+
+    public static WeakReference NonCollectibleTypeInSharedGenericCode(Assembly assemblyInDefaultContext)
+    {
+        // Create instance of a non-collectible generic type definition over a collectible type
+        var type = assemblyInDefaultContext.GetType("Runtime_100437+GenericHolder`1", throwOnError: true).MakeGenericType(typeof(Runtime_100437));
+        var field = type.GetField("Singleton", BindingFlags.Static | BindingFlags.Public);
+        return new WeakReference(field.GetValue(null), trackResurrection: true);
+    }
+
+    public static WeakReference NonCollectibleArrayTypeInSharedGenericCode(Assembly assemblyInDefaultContext)
+    {
+        // Create instance of a non-collectible generic type definition over a collectible type
+        var type = assemblyInDefaultContext.GetType("Runtime_100437+GenericArrayHolder`1", throwOnError: true).MakeGenericType(typeof(Runtime_100437));
+        var field = type.GetField("Singleton", BindingFlags.Static | BindingFlags.Public);
+        return new WeakReference(field.GetValue(null), trackResurrection: true);
+    }
+
+    public static WeakReference CollectibleEmptyArray(Assembly assemblyInDefaultContext)
+    {
+        return new WeakReference(Array.Empty<Runtime_100437>(), trackResurrection: true);
+    }
+
+    private class CollectibleAssemblyLoadContext : AssemblyLoadContext
+    {
+        public CollectibleAssemblyLoadContext()
+            : base(isCollectible: true)
+        {
+        }
+    }
+
+    private class Holder
+    {
+        public static readonly object Singleton = new object();
+    }
+
+    private class GenericHolder<T>
+    {
+        public static readonly object Singleton = new object();
+    }
+
+    private class GenericArrayHolder<T>
+    {
+        public static readonly int[] Singleton = new int[0];
+    }
+}
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_100437/Runtime_100437.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_100437/Runtime_100437.csproj
new file mode 100644
index 000000000000..0f460bbb16f7
--- /dev/null
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_100437/Runtime_100437.csproj
@@ -0,0 +1,9 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- uses WaitForPendingFinalizers -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_100466/Runtime_100466.cs b/src/tests/JIT/Regression/JitBlue/Runtime_100466/Runtime_100466.cs
new file mode 100644
index 000000000000..e7d9883c47bd
--- /dev/null
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_100466/Runtime_100466.cs
@@ -0,0 +1,37 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using Xunit;
+
+public static class Runtime_100466
+{
+    [Fact]
+    public static int TestBoxingDoesNotTriggerStaticTypeInitializers()
+    {
+        Foo foo = new Foo();
+        ((object)foo).ToString();
+        return s_cctorTriggered ? -1 : 100;
+    }
+
+    [Fact]
+    public static int TestNullableBoxingDoesNotTriggerStaticTypeInitializers()
+    {
+        FooNullable? nullable = new FooNullable();
+        ((object)nullable).ToString();
+        return s_cctorTriggeredNullable ? -1 : 100;
+    }
+
+    private static bool s_cctorTriggered;
+    private static bool s_cctorTriggeredNullable;
+
+    private struct Foo
+    {
+        static Foo() => s_cctorTriggered = true;
+    }
+
+    private struct FooNullable
+    {
+        static FooNullable() => s_cctorTriggeredNullable = true;
+    }
+}
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_100466/Runtime_100466.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_100466/Runtime_100466.csproj
new file mode 100644
index 000000000000..6c8c63b83414
--- /dev/null
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_100466/Runtime_100466.csproj
@@ -0,0 +1,5 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_56953/Runtime_56953.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_56953/Runtime_56953.csproj
index 2505cf404f01..5e5fa103057d 100644
--- a/src/tests/JIT/Regression/JitBlue/Runtime_56953/Runtime_56953.csproj
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_56953/Runtime_56953.csproj
@@ -10,6 +10,7 @@
   <ItemGroup>
     <Compile Include="$(MSBuildProjectName).cs" />
 
+    <CLRTestEnvironmentVariable Include="DOTNET_JitEnableOptRepeat" Value="1" />
     <CLRTestEnvironmentVariable Include="DOTNET_JitOptRepeat" Value="*" />
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_61510/Runtime_61510.cs b/src/tests/JIT/Regression/JitBlue/Runtime_61510/Runtime_61510.cs
index fe76c604ef1b..122e8d0c604f 100644
--- a/src/tests/JIT/Regression/JitBlue/Runtime_61510/Runtime_61510.cs
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_61510/Runtime_61510.cs
@@ -12,6 +12,7 @@ public unsafe class Runtime_61510
     [Fact]
     public static int TestEntryPoint()
     {
+        // Unsafe.AsPointer is safe since static field is marked with [FixedAddressValueType]
         ref byte result = ref AddZeroByrefToNativeInt((nint)Unsafe.AsPointer(ref s_field));
 
         return Unsafe.AreSame(ref s_field, ref result) ? 100 : 101;
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_62108/Runtime_62108.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_62108/Runtime_62108.csproj
index 6974eec98bd1..060feaa3d7d0 100644
--- a/src/tests/JIT/Regression/JitBlue/Runtime_62108/Runtime_62108.csproj
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_62108/Runtime_62108.csproj
@@ -7,6 +7,7 @@
   <ItemGroup>
     <Compile Include="$(MSBuildProjectName).cs" />
 
+    <CLRTestEnvironmentVariable Include="DOTNET_JitEnableOptRepeat" Value="1" />
     <CLRTestEnvironmentVariable Include="DOTNET_JitOptRepeat" Value="LeafMethod6" />
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692.cs b/src/tests/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692.cs
index 5b85cbb0115a..fe5105d7a91f 100644
--- a/src/tests/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692.cs
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692.cs
@@ -5,6 +5,7 @@
 using System;
 using System.Runtime.Intrinsics.X86;
 using Xunit;
+using System.Runtime.InteropServices;
 
 public unsafe class Runtime_62692
 {
@@ -39,8 +40,8 @@ public static int TestEntryPoint()
             AssertEqual(Problem2(1111, 0xFFFF_FFFF_0000_0001), 3414328792);
             AssertEqual(Problem3(1, 0xFFFF_0001), 0);
             AssertEqual(Problem4(1111, 0xFFFF_FFFF_0000_0001), 3414328792);
-            AssertEqual(Problem5(1111, double.MaxValue), 3307008522);
-            AssertEqual(Problem6(1111, float.MaxValue), 3307008522);
+            AssertEqual(Problem5(1111, double.MaxValue), 1921271346);
+            AssertEqual(Problem6(1111, float.MaxValue), 1921271346);
             AssertEqual(Problem5(1111, double.MinValue), 3307008522);
             AssertEqual(Problem6(1111, float.MinValue), 3307008522);
             AssertEqual(Problem5(1111, -0.0), 3307008522);
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_64700/Runtime_64700.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_64700/Runtime_64700.csproj
index 3738ed466c9d..0243f166700b 100644
--- a/src/tests/JIT/Regression/JitBlue/Runtime_64700/Runtime_64700.csproj
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_64700/Runtime_64700.csproj
@@ -7,6 +7,7 @@
   <ItemGroup>
     <Compile Include="$(MSBuildProjectName).cs" />
 
+    <CLRTestEnvironmentVariable Include="DOTNET_JitEnableOptRepeat" Value="1" />
     <CLRTestEnvironmentVariable Include="DOTNET_JitOptRepeat" Value="ProblemWithCopyProp" />
   </ItemGroup>
 </Project>
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_87393/Runtime_87393.fsproj b/src/tests/JIT/Regression/JitBlue/Runtime_87393/Runtime_87393.fsproj
index 5da8e9a9edeb..01cde35672f6 100644
--- a/src/tests/JIT/Regression/JitBlue/Runtime_87393/Runtime_87393.fsproj
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_87393/Runtime_87393.fsproj
@@ -7,6 +7,7 @@
     <Optimize>True</Optimize>
     <TargetFramework>$(NetCoreAppToolCurrent)</TargetFramework>
     <GCStressIncompatible>True</GCStressIncompatible>
+    <OtherFlags>--tailcalls+</OtherFlags>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="$(MSBuildProjectName).fs" />
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs
index 5de0a28895b2..5ddf45879310 100644
--- a/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs
@@ -22,7 +22,7 @@ public unsafe static void EntryPoint()
         if (Sse2.IsSupported)
         {
             ulong value = 0;
-            Test((byte*)Unsafe.AsPointer(ref value));
+            Test((byte*)&value);
             Assert.True(value == 246);
         }
     }
diff --git a/src/tests/JIT/Regression/Regression_3.csproj b/src/tests/JIT/Regression/Regression_3.csproj
index 4eafe2b2450f..9eeb2fe05007 100644
--- a/src/tests/JIT/Regression/Regression_3.csproj
+++ b/src/tests/JIT/Regression/Regression_3.csproj
@@ -1,4 +1,8 @@
 <Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'minifullaot' or '$(RuntimeVariant)' == 'llvmfullaot')">true</CLRTestTargetUnsupported>
+  </PropertyGroup>
   <ItemGroup>
     <MergedWrapperProjectReference Include="JitBlue/**/*.??proj" />
     <MergedWrapperProjectReference Remove="JitBlue/Runtime_*/**/*.??proj" />
diff --git a/src/tests/JIT/jit64/mcc/common/common.il b/src/tests/JIT/jit64/mcc/common/common.il
index 3b45f9c46e48..83eac3d270fc 100644
--- a/src/tests/JIT/jit64/mcc/common/common.il
+++ b/src/tests/JIT/jit64/mcc/common/common.il
@@ -16207,6919 +16207,6 @@
 
 } // end of class MCCTest.VType8
 
-.class public sequential ansi sealed beforefieldinit MCCTest.VType9
-       extends [mscorlib]System.ValueType
-       implements class MCCTest.CType`1<valuetype MCCTest.VType9>
-{
-  .field public float32 f1
-  .method public hidebysig newslot virtual final
-          instance void  Init(int32 count) cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldarg.1
-    IL_0003:  conv.r4
-    IL_0004:  stfld      float32 MCCTest.VType9::f1
-    IL_0009:  ret
-  } // end of method VType9::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Init() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.1
-    IL_0003:  call       instance void MCCTest.VType9::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VType9::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Zero() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance void MCCTest.VType9::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VType9::Zero
-
-  .method public hidebysig instance void
-          Add(valuetype MCCTest.VType9 val) cil managed
-  {
-    // Code size       22 (0x16)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  dup
-    IL_0003:  ldfld      float32 MCCTest.VType9::f1
-    IL_0008:  ldarga.s   val
-    IL_000a:  ldfld      float32 MCCTest.VType9::f1
-    IL_000f:  add
-    IL_0010:  stfld      float32 MCCTest.VType9::f1
-    IL_0015:  ret
-  } // end of method VType9::Add
-
-  .method public hidebysig newslot virtual final
-          instance void  Check(valuetype MCCTest.VType9 expected) cil managed
-  {
-    // Code size       68 (0x44)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.Type V_1,
-             class MCCTest.ResultVerificationException V_2,
-             bool V_3)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldfld      float32 MCCTest.VType9::f1
-    IL_0007:  ldarga.s   expected
-    IL_0009:  ldfld      float32 MCCTest.VType9::f1
-    IL_000e:  ceq
-    IL_0010:  stloc.3
-    IL_0011:  ldloc.3
-    IL_0012:  brtrue.s   IL_002f
-
-    IL_0014:  nop
-    IL_0015:  ldstr      "f1"
-    IL_001a:  ldarg.0
-    IL_001b:  ldfld      float32 MCCTest.VType9::f1
-    IL_0020:  conv.r8
-    IL_0021:  ldarga.s   expected
-    IL_0023:  ldfld      float32 MCCTest.VType9::f1
-    IL_0028:  conv.r8
-    IL_0029:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_002e:  throw
-
-    IL_002f:  ldnull
-    IL_0030:  stloc.0
-    IL_0031:  ldnull
-    IL_0032:  stloc.1
-    .try
-    {
-      IL_0033:  nop
-      IL_0034:  nop
-      IL_0035:  leave.s    IL_0042
-
-    }  // end .try
-    catch MCCTest.ResultVerificationException
-    {
-      IL_0037:  stloc.2
-      IL_0038:  nop
-      IL_0039:  ldloc.0
-      IL_003a:  ldloc.1
-      IL_003b:  ldloc.2
-      IL_003c:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                    class [mscorlib]System.Type,
-                                                                                    class MCCTest.ResultVerificationException)
-      IL_0041:  throw
-
-    }  // end handler
-    IL_0042:  nop
-    IL_0043:  ret
-  } // end of method VType9::Check
-
-  .method public hidebysig instance string
-          Dump(int32 level) cil managed
-  {
-    // Code size       54 (0x36)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.IO.StringWriter V_1,
-             string V_2)
-    IL_0000:  nop
-    IL_0001:  ldarg.1
-    IL_0002:  call       string MCCTest.FormatUtils::GetPadding(int32)
-    IL_0007:  stloc.0
-    IL_0008:  newobj     instance void [mscorlib]System.IO.StringWriter::.ctor()
-    IL_000d:  stloc.1
-    IL_000e:  ldloc.1
-    IL_000f:  ldloc.0
-    IL_0010:  ldstr      "f1    = "
-    IL_0015:  ldarg.0
-    IL_0016:  ldfld      float32 MCCTest.VType9::f1
-    IL_001b:  box        [mscorlib]System.Single
-    IL_0020:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_0025:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_002a:  nop
-    IL_002b:  ldloc.1
-    IL_002c:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0031:  stloc.2
-    IL_0032:  br.s       IL_0034
-
-    IL_0034:  ldloc.2
-    IL_0035:  ret
-  } // end of method VType9::Dump
-
-  .method public hidebysig instance string
-          Dump() cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (string V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance string MCCTest.VType9::Dump(int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method VType9::Dump
-
-} // end of class MCCTest.VType9
-
-.class public sequential ansi sealed beforefieldinit MCCTest.VTypeA
-       extends [mscorlib]System.ValueType
-       implements class MCCTest.CType`1<valuetype MCCTest.VTypeA>
-{
-  .field public class MCCTest.RType4 f1
-  .field public class MCCTest.RType4 f2
-  .field public class MCCTest.RType4 f3
-  .field public class MCCTest.RType4 f4
-  .field public class MCCTest.RType4 f5
-  .field public class MCCTest.RType4 f6
-  .field public class MCCTest.RType4 f7
-  .field public class MCCTest.RType4 f8
-  .field public class MCCTest.RType4 f9
-  .field public class MCCTest.RType4 f10
-  .field public class MCCTest.RType4 f11
-  .field public class MCCTest.RType4 f12
-  .field public class MCCTest.RType4 f13
-  .method public hidebysig newslot virtual final
-          instance void  Init(int32 count) cil managed
-  {
-    // Code size       314 (0x13a)
-    .maxstack  2
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0007:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f1
-    IL_000c:  ldarg.0
-    IL_000d:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0012:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f2
-    IL_0017:  ldarg.0
-    IL_0018:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_001d:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f3
-    IL_0022:  ldarg.0
-    IL_0023:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0028:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f4
-    IL_002d:  ldarg.0
-    IL_002e:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0033:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f5
-    IL_0038:  ldarg.0
-    IL_0039:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_003e:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f6
-    IL_0043:  ldarg.0
-    IL_0044:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0049:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f7
-    IL_004e:  ldarg.0
-    IL_004f:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0054:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f8
-    IL_0059:  ldarg.0
-    IL_005a:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_005f:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f9
-    IL_0064:  ldarg.0
-    IL_0065:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_006a:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f10
-    IL_006f:  ldarg.0
-    IL_0070:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0075:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f11
-    IL_007a:  ldarg.0
-    IL_007b:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0080:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f12
-    IL_0085:  ldarg.0
-    IL_0086:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_008b:  stfld      class MCCTest.RType4 MCCTest.VTypeA::f13
-    IL_0090:  ldarg.0
-    IL_0091:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f1
-    IL_0096:  ldarg.1
-    IL_0097:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_009c:  nop
-    IL_009d:  ldarg.0
-    IL_009e:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f2
-    IL_00a3:  ldarg.1
-    IL_00a4:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_00a9:  nop
-    IL_00aa:  ldarg.0
-    IL_00ab:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f3
-    IL_00b0:  ldarg.1
-    IL_00b1:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_00b6:  nop
-    IL_00b7:  ldarg.0
-    IL_00b8:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f4
-    IL_00bd:  ldarg.1
-    IL_00be:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_00c3:  nop
-    IL_00c4:  ldarg.0
-    IL_00c5:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f5
-    IL_00ca:  ldarg.1
-    IL_00cb:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_00d0:  nop
-    IL_00d1:  ldarg.0
-    IL_00d2:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f6
-    IL_00d7:  ldarg.1
-    IL_00d8:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_00dd:  nop
-    IL_00de:  ldarg.0
-    IL_00df:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f7
-    IL_00e4:  ldarg.1
-    IL_00e5:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_00ea:  nop
-    IL_00eb:  ldarg.0
-    IL_00ec:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f8
-    IL_00f1:  ldarg.1
-    IL_00f2:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_00f7:  nop
-    IL_00f8:  ldarg.0
-    IL_00f9:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f9
-    IL_00fe:  ldarg.1
-    IL_00ff:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_0104:  nop
-    IL_0105:  ldarg.0
-    IL_0106:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f10
-    IL_010b:  ldarg.1
-    IL_010c:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_0111:  nop
-    IL_0112:  ldarg.0
-    IL_0113:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f11
-    IL_0118:  ldarg.1
-    IL_0119:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_011e:  nop
-    IL_011f:  ldarg.0
-    IL_0120:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f12
-    IL_0125:  ldarg.1
-    IL_0126:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_012b:  nop
-    IL_012c:  ldarg.0
-    IL_012d:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f13
-    IL_0132:  ldarg.1
-    IL_0133:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_0138:  nop
-    IL_0139:  ret
-  } // end of method VTypeA::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Init() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.1
-    IL_0003:  call       instance void MCCTest.VTypeA::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VTypeA::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Zero() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance void MCCTest.VTypeA::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VTypeA::Zero
-
-  .method public hidebysig instance void
-          Add(valuetype MCCTest.VTypeA val) cil managed
-  {
-    // Code size       249 (0xf9)
-    .maxstack  2
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f1
-    IL_0007:  ldarga.s   val
-    IL_0009:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f1
-    IL_000e:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_0013:  nop
-    IL_0014:  ldarg.0
-    IL_0015:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f2
-    IL_001a:  ldarga.s   val
-    IL_001c:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f2
-    IL_0021:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_0026:  nop
-    IL_0027:  ldarg.0
-    IL_0028:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f3
-    IL_002d:  ldarga.s   val
-    IL_002f:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f3
-    IL_0034:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_0039:  nop
-    IL_003a:  ldarg.0
-    IL_003b:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f4
-    IL_0040:  ldarga.s   val
-    IL_0042:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f4
-    IL_0047:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_004c:  nop
-    IL_004d:  ldarg.0
-    IL_004e:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f5
-    IL_0053:  ldarga.s   val
-    IL_0055:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f5
-    IL_005a:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_005f:  nop
-    IL_0060:  ldarg.0
-    IL_0061:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f6
-    IL_0066:  ldarga.s   val
-    IL_0068:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f6
-    IL_006d:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_0072:  nop
-    IL_0073:  ldarg.0
-    IL_0074:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f7
-    IL_0079:  ldarga.s   val
-    IL_007b:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f7
-    IL_0080:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_0085:  nop
-    IL_0086:  ldarg.0
-    IL_0087:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f8
-    IL_008c:  ldarga.s   val
-    IL_008e:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f8
-    IL_0093:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_0098:  nop
-    IL_0099:  ldarg.0
-    IL_009a:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f9
-    IL_009f:  ldarga.s   val
-    IL_00a1:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f9
-    IL_00a6:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_00ab:  nop
-    IL_00ac:  ldarg.0
-    IL_00ad:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f10
-    IL_00b2:  ldarga.s   val
-    IL_00b4:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f10
-    IL_00b9:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_00be:  nop
-    IL_00bf:  ldarg.0
-    IL_00c0:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f11
-    IL_00c5:  ldarga.s   val
-    IL_00c7:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f11
-    IL_00cc:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_00d1:  nop
-    IL_00d2:  ldarg.0
-    IL_00d3:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f12
-    IL_00d8:  ldarga.s   val
-    IL_00da:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f12
-    IL_00df:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_00e4:  nop
-    IL_00e5:  ldarg.0
-    IL_00e6:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f13
-    IL_00eb:  ldarga.s   val
-    IL_00ed:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f13
-    IL_00f2:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_00f7:  nop
-    IL_00f8:  ret
-  } // end of method VTypeA::Add
-
-  .method public hidebysig newslot virtual final
-          instance void  Check(valuetype MCCTest.VTypeA expected) cil managed
-  {
-    // Code size       503 (0x1f7)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.Type V_1,
-             class MCCTest.ResultVerificationException V_2)
-    IL_0000:  nop
-    IL_0001:  ldnull
-    IL_0002:  stloc.0
-    IL_0003:  ldnull
-    IL_0004:  stloc.1
-    .try
-    {
-      IL_0005:  nop
-      IL_0006:  ldstr      "f1"
-      IL_000b:  stloc.0
-      IL_000c:  ldarg.0
-      IL_000d:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f1
-      IL_0012:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0017:  stloc.1
-      IL_0018:  ldarg.0
-      IL_0019:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f1
-      IL_001e:  ldarga.s   expected
-      IL_0020:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f1
-      IL_0025:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_002a:  nop
-      IL_002b:  ldstr      "f2"
-      IL_0030:  stloc.0
-      IL_0031:  ldarg.0
-      IL_0032:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f2
-      IL_0037:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_003c:  stloc.1
-      IL_003d:  ldarg.0
-      IL_003e:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f2
-      IL_0043:  ldarga.s   expected
-      IL_0045:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f2
-      IL_004a:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_004f:  nop
-      IL_0050:  ldstr      "f3"
-      IL_0055:  stloc.0
-      IL_0056:  ldarg.0
-      IL_0057:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f3
-      IL_005c:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0061:  stloc.1
-      IL_0062:  ldarg.0
-      IL_0063:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f3
-      IL_0068:  ldarga.s   expected
-      IL_006a:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f3
-      IL_006f:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_0074:  nop
-      IL_0075:  ldstr      "f4"
-      IL_007a:  stloc.0
-      IL_007b:  ldarg.0
-      IL_007c:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f4
-      IL_0081:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0086:  stloc.1
-      IL_0087:  ldarg.0
-      IL_0088:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f4
-      IL_008d:  ldarga.s   expected
-      IL_008f:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f4
-      IL_0094:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_0099:  nop
-      IL_009a:  ldstr      "f5"
-      IL_009f:  stloc.0
-      IL_00a0:  ldarg.0
-      IL_00a1:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f5
-      IL_00a6:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_00ab:  stloc.1
-      IL_00ac:  ldarg.0
-      IL_00ad:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f5
-      IL_00b2:  ldarga.s   expected
-      IL_00b4:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f5
-      IL_00b9:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_00be:  nop
-      IL_00bf:  ldstr      "f6"
-      IL_00c4:  stloc.0
-      IL_00c5:  ldarg.0
-      IL_00c6:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f6
-      IL_00cb:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_00d0:  stloc.1
-      IL_00d1:  ldarg.0
-      IL_00d2:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f6
-      IL_00d7:  ldarga.s   expected
-      IL_00d9:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f6
-      IL_00de:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_00e3:  nop
-      IL_00e4:  ldstr      "f7"
-      IL_00e9:  stloc.0
-      IL_00ea:  ldarg.0
-      IL_00eb:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f7
-      IL_00f0:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_00f5:  stloc.1
-      IL_00f6:  ldarg.0
-      IL_00f7:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f7
-      IL_00fc:  ldarga.s   expected
-      IL_00fe:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f7
-      IL_0103:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_0108:  nop
-      IL_0109:  ldstr      "f8"
-      IL_010e:  stloc.0
-      IL_010f:  ldarg.0
-      IL_0110:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f8
-      IL_0115:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_011a:  stloc.1
-      IL_011b:  ldarg.0
-      IL_011c:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f8
-      IL_0121:  ldarga.s   expected
-      IL_0123:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f8
-      IL_0128:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_012d:  nop
-      IL_012e:  ldstr      "f9"
-      IL_0133:  stloc.0
-      IL_0134:  ldarg.0
-      IL_0135:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f9
-      IL_013a:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_013f:  stloc.1
-      IL_0140:  ldarg.0
-      IL_0141:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f9
-      IL_0146:  ldarga.s   expected
-      IL_0148:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f9
-      IL_014d:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_0152:  nop
-      IL_0153:  ldstr      "f10"
-      IL_0158:  stloc.0
-      IL_0159:  ldarg.0
-      IL_015a:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f10
-      IL_015f:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0164:  stloc.1
-      IL_0165:  ldarg.0
-      IL_0166:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f10
-      IL_016b:  ldarga.s   expected
-      IL_016d:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f10
-      IL_0172:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_0177:  nop
-      IL_0178:  ldstr      "f11"
-      IL_017d:  stloc.0
-      IL_017e:  ldarg.0
-      IL_017f:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f11
-      IL_0184:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0189:  stloc.1
-      IL_018a:  ldarg.0
-      IL_018b:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f11
-      IL_0190:  ldarga.s   expected
-      IL_0192:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f11
-      IL_0197:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_019c:  nop
-      IL_019d:  ldstr      "f12"
-      IL_01a2:  stloc.0
-      IL_01a3:  ldarg.0
-      IL_01a4:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f12
-      IL_01a9:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_01ae:  stloc.1
-      IL_01af:  ldarg.0
-      IL_01b0:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f12
-      IL_01b5:  ldarga.s   expected
-      IL_01b7:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f12
-      IL_01bc:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_01c1:  nop
-      IL_01c2:  ldstr      "f13"
-      IL_01c7:  stloc.0
-      IL_01c8:  ldarg.0
-      IL_01c9:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f13
-      IL_01ce:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_01d3:  stloc.1
-      IL_01d4:  ldarg.0
-      IL_01d5:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f13
-      IL_01da:  ldarga.s   expected
-      IL_01dc:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f13
-      IL_01e1:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_01e6:  nop
-      IL_01e7:  nop
-      IL_01e8:  leave.s    IL_01f5
-
-    }  // end .try
-    catch MCCTest.ResultVerificationException
-    {
-      IL_01ea:  stloc.2
-      IL_01eb:  nop
-      IL_01ec:  ldloc.0
-      IL_01ed:  ldloc.1
-      IL_01ee:  ldloc.2
-      IL_01ef:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                    class [mscorlib]System.Type,
-                                                                                    class MCCTest.ResultVerificationException)
-      IL_01f4:  throw
-
-    }  // end handler
-    IL_01f5:  nop
-    IL_01f6:  ret
-  } // end of method VTypeA::Check
-
-  .method public hidebysig instance string
-          Dump(int32 level) cil managed
-  {
-    // Code size       740 (0x2e4)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.IO.StringWriter V_1,
-             string V_2)
-    IL_0000:  nop
-    IL_0001:  ldarg.1
-    IL_0002:  call       string MCCTest.FormatUtils::GetPadding(int32)
-    IL_0007:  stloc.0
-    IL_0008:  newobj     instance void [mscorlib]System.IO.StringWriter::.ctor()
-    IL_000d:  stloc.1
-    IL_000e:  ldloc.1
-    IL_000f:  ldloc.0
-    IL_0010:  ldstr      "[Field f1] [Type '{0}']"
-    IL_0015:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_001a:  ldarg.0
-    IL_001b:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f1
-    IL_0020:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0025:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_002a:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_002f:  nop
-    IL_0030:  ldloc.1
-    IL_0031:  ldarg.0
-    IL_0032:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f1
-    IL_0037:  ldarg.1
-    IL_0038:  ldc.i4.1
-    IL_0039:  add
-    IL_003a:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_003f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0044:  nop
-    IL_0045:  ldloc.1
-    IL_0046:  ldloc.0
-    IL_0047:  ldstr      "[Field f2] [Type '{0}']"
-    IL_004c:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0051:  ldarg.0
-    IL_0052:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f2
-    IL_0057:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_005c:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0061:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0066:  nop
-    IL_0067:  ldloc.1
-    IL_0068:  ldarg.0
-    IL_0069:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f2
-    IL_006e:  ldarg.1
-    IL_006f:  ldc.i4.1
-    IL_0070:  add
-    IL_0071:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_0076:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_007b:  nop
-    IL_007c:  ldloc.1
-    IL_007d:  ldloc.0
-    IL_007e:  ldstr      "[Field f3] [Type '{0}']"
-    IL_0083:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0088:  ldarg.0
-    IL_0089:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f3
-    IL_008e:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0093:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0098:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_009d:  nop
-    IL_009e:  ldloc.1
-    IL_009f:  ldarg.0
-    IL_00a0:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f3
-    IL_00a5:  ldarg.1
-    IL_00a6:  ldc.i4.1
-    IL_00a7:  add
-    IL_00a8:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_00ad:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00b2:  nop
-    IL_00b3:  ldloc.1
-    IL_00b4:  ldloc.0
-    IL_00b5:  ldstr      "[Field f4] [Type '{0}']"
-    IL_00ba:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_00bf:  ldarg.0
-    IL_00c0:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f4
-    IL_00c5:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_00ca:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_00cf:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_00d4:  nop
-    IL_00d5:  ldloc.1
-    IL_00d6:  ldarg.0
-    IL_00d7:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f4
-    IL_00dc:  ldarg.1
-    IL_00dd:  ldc.i4.1
-    IL_00de:  add
-    IL_00df:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_00e4:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00e9:  nop
-    IL_00ea:  ldloc.1
-    IL_00eb:  ldloc.0
-    IL_00ec:  ldstr      "[Field f5] [Type '{0}']"
-    IL_00f1:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_00f6:  ldarg.0
-    IL_00f7:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f5
-    IL_00fc:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0101:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0106:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_010b:  nop
-    IL_010c:  ldloc.1
-    IL_010d:  ldarg.0
-    IL_010e:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f5
-    IL_0113:  ldarg.1
-    IL_0114:  ldc.i4.1
-    IL_0115:  add
-    IL_0116:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_011b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0120:  nop
-    IL_0121:  ldloc.1
-    IL_0122:  ldloc.0
-    IL_0123:  ldstr      "[Field f6] [Type '{0}']"
-    IL_0128:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_012d:  ldarg.0
-    IL_012e:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f6
-    IL_0133:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0138:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_013d:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0142:  nop
-    IL_0143:  ldloc.1
-    IL_0144:  ldarg.0
-    IL_0145:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f6
-    IL_014a:  ldarg.1
-    IL_014b:  ldc.i4.1
-    IL_014c:  add
-    IL_014d:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_0152:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0157:  nop
-    IL_0158:  ldloc.1
-    IL_0159:  ldloc.0
-    IL_015a:  ldstr      "[Field f7] [Type '{0}']"
-    IL_015f:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0164:  ldarg.0
-    IL_0165:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f7
-    IL_016a:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_016f:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0174:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0179:  nop
-    IL_017a:  ldloc.1
-    IL_017b:  ldarg.0
-    IL_017c:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f7
-    IL_0181:  ldarg.1
-    IL_0182:  ldc.i4.1
-    IL_0183:  add
-    IL_0184:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_0189:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_018e:  nop
-    IL_018f:  ldloc.1
-    IL_0190:  ldloc.0
-    IL_0191:  ldstr      "[Field f8] [Type '{0}']"
-    IL_0196:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_019b:  ldarg.0
-    IL_019c:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f8
-    IL_01a1:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_01a6:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_01ab:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_01b0:  nop
-    IL_01b1:  ldloc.1
-    IL_01b2:  ldarg.0
-    IL_01b3:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f8
-    IL_01b8:  ldarg.1
-    IL_01b9:  ldc.i4.1
-    IL_01ba:  add
-    IL_01bb:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_01c0:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_01c5:  nop
-    IL_01c6:  ldloc.1
-    IL_01c7:  ldloc.0
-    IL_01c8:  ldstr      "[Field f9] [Type '{0}']"
-    IL_01cd:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_01d2:  ldarg.0
-    IL_01d3:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f9
-    IL_01d8:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_01dd:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_01e2:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_01e7:  nop
-    IL_01e8:  ldloc.1
-    IL_01e9:  ldarg.0
-    IL_01ea:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f9
-    IL_01ef:  ldarg.1
-    IL_01f0:  ldc.i4.1
-    IL_01f1:  add
-    IL_01f2:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_01f7:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_01fc:  nop
-    IL_01fd:  ldloc.1
-    IL_01fe:  ldloc.0
-    IL_01ff:  ldstr      "[Field f10] [Type '{0}']"
-    IL_0204:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0209:  ldarg.0
-    IL_020a:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f10
-    IL_020f:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0214:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0219:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_021e:  nop
-    IL_021f:  ldloc.1
-    IL_0220:  ldarg.0
-    IL_0221:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f10
-    IL_0226:  ldarg.1
-    IL_0227:  ldc.i4.1
-    IL_0228:  add
-    IL_0229:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_022e:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0233:  nop
-    IL_0234:  ldloc.1
-    IL_0235:  ldloc.0
-    IL_0236:  ldstr      "[Field f11] [Type '{0}']"
-    IL_023b:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0240:  ldarg.0
-    IL_0241:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f11
-    IL_0246:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_024b:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0250:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0255:  nop
-    IL_0256:  ldloc.1
-    IL_0257:  ldarg.0
-    IL_0258:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f11
-    IL_025d:  ldarg.1
-    IL_025e:  ldc.i4.1
-    IL_025f:  add
-    IL_0260:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_0265:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_026a:  nop
-    IL_026b:  ldloc.1
-    IL_026c:  ldloc.0
-    IL_026d:  ldstr      "[Field f12] [Type '{0}']"
-    IL_0272:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0277:  ldarg.0
-    IL_0278:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f12
-    IL_027d:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0282:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0287:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_028c:  nop
-    IL_028d:  ldloc.1
-    IL_028e:  ldarg.0
-    IL_028f:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f12
-    IL_0294:  ldarg.1
-    IL_0295:  ldc.i4.1
-    IL_0296:  add
-    IL_0297:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_029c:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_02a1:  nop
-    IL_02a2:  ldloc.1
-    IL_02a3:  ldloc.0
-    IL_02a4:  ldstr      "[Field f13] [Type '{0}']"
-    IL_02a9:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_02ae:  ldarg.0
-    IL_02af:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f13
-    IL_02b4:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_02b9:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_02be:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_02c3:  nop
-    IL_02c4:  ldloc.1
-    IL_02c5:  ldarg.0
-    IL_02c6:  ldfld      class MCCTest.RType4 MCCTest.VTypeA::f13
-    IL_02cb:  ldarg.1
-    IL_02cc:  ldc.i4.1
-    IL_02cd:  add
-    IL_02ce:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_02d3:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_02d8:  nop
-    IL_02d9:  ldloc.1
-    IL_02da:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_02df:  stloc.2
-    IL_02e0:  br.s       IL_02e2
-
-    IL_02e2:  ldloc.2
-    IL_02e3:  ret
-  } // end of method VTypeA::Dump
-
-  .method public hidebysig instance string
-          Dump() cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (string V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance string MCCTest.VTypeA::Dump(int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method VTypeA::Dump
-
-} // end of class MCCTest.VTypeA
-
-.class public sequential ansi sealed beforefieldinit MCCTest.VTypeB
-       extends [mscorlib]System.ValueType
-       implements class MCCTest.CType`1<valuetype MCCTest.VTypeB>
-{
-  .field public valuetype MCCTest.VType5 f1
-  .field public valuetype MCCTest.VType5 f2
-  .field public valuetype MCCTest.VType5 f3
-  .field public valuetype MCCTest.VType5 f4
-  .field public valuetype MCCTest.VType5 f5
-  .field public valuetype MCCTest.VType5 f6
-  .field public valuetype MCCTest.VType5 f7
-  .field public valuetype MCCTest.VType5 f8
-  .field public valuetype MCCTest.VType5 f9
-  .field public valuetype MCCTest.VType5 f10
-  .field public valuetype MCCTest.VType5 f11
-  .field public valuetype MCCTest.VType5 f12
-  .field public valuetype MCCTest.VType5 f13
-  .field public valuetype MCCTest.VType5 f14
-  .field public valuetype MCCTest.VType5 f15
-  .field public valuetype MCCTest.VType5 f16
-  .field public valuetype MCCTest.VType5 f17
-  .field public valuetype MCCTest.VType5 f18
-  .field public valuetype MCCTest.VType5 f19
-  .method public hidebysig newslot virtual final
-          instance void  Init(int32 count) cil managed
-  {
-    // Code size       249 (0xf9)
-    .maxstack  2
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f1
-    IL_0007:  ldarg.1
-    IL_0008:  call       instance void MCCTest.VType5::Init(int32)
-    IL_000d:  nop
-    IL_000e:  ldarg.0
-    IL_000f:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f2
-    IL_0014:  ldarg.1
-    IL_0015:  call       instance void MCCTest.VType5::Init(int32)
-    IL_001a:  nop
-    IL_001b:  ldarg.0
-    IL_001c:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f3
-    IL_0021:  ldarg.1
-    IL_0022:  call       instance void MCCTest.VType5::Init(int32)
-    IL_0027:  nop
-    IL_0028:  ldarg.0
-    IL_0029:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f4
-    IL_002e:  ldarg.1
-    IL_002f:  call       instance void MCCTest.VType5::Init(int32)
-    IL_0034:  nop
-    IL_0035:  ldarg.0
-    IL_0036:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f5
-    IL_003b:  ldarg.1
-    IL_003c:  call       instance void MCCTest.VType5::Init(int32)
-    IL_0041:  nop
-    IL_0042:  ldarg.0
-    IL_0043:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f6
-    IL_0048:  ldarg.1
-    IL_0049:  call       instance void MCCTest.VType5::Init(int32)
-    IL_004e:  nop
-    IL_004f:  ldarg.0
-    IL_0050:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f7
-    IL_0055:  ldarg.1
-    IL_0056:  call       instance void MCCTest.VType5::Init(int32)
-    IL_005b:  nop
-    IL_005c:  ldarg.0
-    IL_005d:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f8
-    IL_0062:  ldarg.1
-    IL_0063:  call       instance void MCCTest.VType5::Init(int32)
-    IL_0068:  nop
-    IL_0069:  ldarg.0
-    IL_006a:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f9
-    IL_006f:  ldarg.1
-    IL_0070:  call       instance void MCCTest.VType5::Init(int32)
-    IL_0075:  nop
-    IL_0076:  ldarg.0
-    IL_0077:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f10
-    IL_007c:  ldarg.1
-    IL_007d:  call       instance void MCCTest.VType5::Init(int32)
-    IL_0082:  nop
-    IL_0083:  ldarg.0
-    IL_0084:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f11
-    IL_0089:  ldarg.1
-    IL_008a:  call       instance void MCCTest.VType5::Init(int32)
-    IL_008f:  nop
-    IL_0090:  ldarg.0
-    IL_0091:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f12
-    IL_0096:  ldarg.1
-    IL_0097:  call       instance void MCCTest.VType5::Init(int32)
-    IL_009c:  nop
-    IL_009d:  ldarg.0
-    IL_009e:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f13
-    IL_00a3:  ldarg.1
-    IL_00a4:  call       instance void MCCTest.VType5::Init(int32)
-    IL_00a9:  nop
-    IL_00aa:  ldarg.0
-    IL_00ab:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f14
-    IL_00b0:  ldarg.1
-    IL_00b1:  call       instance void MCCTest.VType5::Init(int32)
-    IL_00b6:  nop
-    IL_00b7:  ldarg.0
-    IL_00b8:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f15
-    IL_00bd:  ldarg.1
-    IL_00be:  call       instance void MCCTest.VType5::Init(int32)
-    IL_00c3:  nop
-    IL_00c4:  ldarg.0
-    IL_00c5:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f16
-    IL_00ca:  ldarg.1
-    IL_00cb:  call       instance void MCCTest.VType5::Init(int32)
-    IL_00d0:  nop
-    IL_00d1:  ldarg.0
-    IL_00d2:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f17
-    IL_00d7:  ldarg.1
-    IL_00d8:  call       instance void MCCTest.VType5::Init(int32)
-    IL_00dd:  nop
-    IL_00de:  ldarg.0
-    IL_00df:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f18
-    IL_00e4:  ldarg.1
-    IL_00e5:  call       instance void MCCTest.VType5::Init(int32)
-    IL_00ea:  nop
-    IL_00eb:  ldarg.0
-    IL_00ec:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f19
-    IL_00f1:  ldarg.1
-    IL_00f2:  call       instance void MCCTest.VType5::Init(int32)
-    IL_00f7:  nop
-    IL_00f8:  ret
-  } // end of method VTypeB::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Init() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.1
-    IL_0003:  call       instance void MCCTest.VTypeB::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VTypeB::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Zero() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance void MCCTest.VTypeB::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VTypeB::Zero
-
-  .method public hidebysig instance void
-          Add(valuetype MCCTest.VTypeB val) cil managed
-  {
-    // Code size       363 (0x16b)
-    .maxstack  2
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f1
-    IL_0007:  ldarga.s   val
-    IL_0009:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f1
-    IL_000e:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0013:  nop
-    IL_0014:  ldarg.0
-    IL_0015:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f2
-    IL_001a:  ldarga.s   val
-    IL_001c:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f2
-    IL_0021:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0026:  nop
-    IL_0027:  ldarg.0
-    IL_0028:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f3
-    IL_002d:  ldarga.s   val
-    IL_002f:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f3
-    IL_0034:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0039:  nop
-    IL_003a:  ldarg.0
-    IL_003b:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f4
-    IL_0040:  ldarga.s   val
-    IL_0042:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f4
-    IL_0047:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_004c:  nop
-    IL_004d:  ldarg.0
-    IL_004e:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f5
-    IL_0053:  ldarga.s   val
-    IL_0055:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f5
-    IL_005a:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_005f:  nop
-    IL_0060:  ldarg.0
-    IL_0061:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f6
-    IL_0066:  ldarga.s   val
-    IL_0068:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f6
-    IL_006d:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0072:  nop
-    IL_0073:  ldarg.0
-    IL_0074:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f7
-    IL_0079:  ldarga.s   val
-    IL_007b:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f7
-    IL_0080:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0085:  nop
-    IL_0086:  ldarg.0
-    IL_0087:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f8
-    IL_008c:  ldarga.s   val
-    IL_008e:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f8
-    IL_0093:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0098:  nop
-    IL_0099:  ldarg.0
-    IL_009a:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f9
-    IL_009f:  ldarga.s   val
-    IL_00a1:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f9
-    IL_00a6:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_00ab:  nop
-    IL_00ac:  ldarg.0
-    IL_00ad:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f10
-    IL_00b2:  ldarga.s   val
-    IL_00b4:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f10
-    IL_00b9:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_00be:  nop
-    IL_00bf:  ldarg.0
-    IL_00c0:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f11
-    IL_00c5:  ldarga.s   val
-    IL_00c7:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f11
-    IL_00cc:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_00d1:  nop
-    IL_00d2:  ldarg.0
-    IL_00d3:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f12
-    IL_00d8:  ldarga.s   val
-    IL_00da:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f12
-    IL_00df:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_00e4:  nop
-    IL_00e5:  ldarg.0
-    IL_00e6:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f13
-    IL_00eb:  ldarga.s   val
-    IL_00ed:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f13
-    IL_00f2:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_00f7:  nop
-    IL_00f8:  ldarg.0
-    IL_00f9:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f14
-    IL_00fe:  ldarga.s   val
-    IL_0100:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f14
-    IL_0105:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_010a:  nop
-    IL_010b:  ldarg.0
-    IL_010c:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f15
-    IL_0111:  ldarga.s   val
-    IL_0113:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f15
-    IL_0118:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_011d:  nop
-    IL_011e:  ldarg.0
-    IL_011f:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f16
-    IL_0124:  ldarga.s   val
-    IL_0126:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f16
-    IL_012b:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0130:  nop
-    IL_0131:  ldarg.0
-    IL_0132:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f17
-    IL_0137:  ldarga.s   val
-    IL_0139:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f17
-    IL_013e:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0143:  nop
-    IL_0144:  ldarg.0
-    IL_0145:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f18
-    IL_014a:  ldarga.s   val
-    IL_014c:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f18
-    IL_0151:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0156:  nop
-    IL_0157:  ldarg.0
-    IL_0158:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f19
-    IL_015d:  ldarga.s   val
-    IL_015f:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f19
-    IL_0164:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0169:  nop
-    IL_016a:  ret
-  } // end of method VTypeB::Add
-
-  .method public hidebysig newslot virtual final
-          instance void  Check(valuetype MCCTest.VTypeB expected) cil managed
-  {
-    // Code size       820 (0x334)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.Type V_1,
-             class MCCTest.ResultVerificationException V_2)
-    IL_0000:  nop
-    IL_0001:  ldnull
-    IL_0002:  stloc.0
-    IL_0003:  ldnull
-    IL_0004:  stloc.1
-    .try
-    {
-      IL_0005:  nop
-      IL_0006:  ldstr      "f1"
-      IL_000b:  stloc.0
-      IL_000c:  ldarg.0
-      IL_000d:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f1
-      IL_0012:  box        MCCTest.VType5
-      IL_0017:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_001c:  stloc.1
-      IL_001d:  ldarg.0
-      IL_001e:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f1
-      IL_0023:  ldarga.s   expected
-      IL_0025:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f1
-      IL_002a:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_002f:  nop
-      IL_0030:  ldstr      "f2"
-      IL_0035:  stloc.0
-      IL_0036:  ldarg.0
-      IL_0037:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f2
-      IL_003c:  box        MCCTest.VType5
-      IL_0041:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0046:  stloc.1
-      IL_0047:  ldarg.0
-      IL_0048:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f2
-      IL_004d:  ldarga.s   expected
-      IL_004f:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f2
-      IL_0054:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_0059:  nop
-      IL_005a:  ldstr      "f3"
-      IL_005f:  stloc.0
-      IL_0060:  ldarg.0
-      IL_0061:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f3
-      IL_0066:  box        MCCTest.VType5
-      IL_006b:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0070:  stloc.1
-      IL_0071:  ldarg.0
-      IL_0072:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f3
-      IL_0077:  ldarga.s   expected
-      IL_0079:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f3
-      IL_007e:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_0083:  nop
-      IL_0084:  ldstr      "f4"
-      IL_0089:  stloc.0
-      IL_008a:  ldarg.0
-      IL_008b:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f4
-      IL_0090:  box        MCCTest.VType5
-      IL_0095:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_009a:  stloc.1
-      IL_009b:  ldarg.0
-      IL_009c:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f4
-      IL_00a1:  ldarga.s   expected
-      IL_00a3:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f4
-      IL_00a8:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_00ad:  nop
-      IL_00ae:  ldstr      "f5"
-      IL_00b3:  stloc.0
-      IL_00b4:  ldarg.0
-      IL_00b5:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f5
-      IL_00ba:  box        MCCTest.VType5
-      IL_00bf:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_00c4:  stloc.1
-      IL_00c5:  ldarg.0
-      IL_00c6:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f5
-      IL_00cb:  ldarga.s   expected
-      IL_00cd:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f5
-      IL_00d2:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_00d7:  nop
-      IL_00d8:  ldstr      "f6"
-      IL_00dd:  stloc.0
-      IL_00de:  ldarg.0
-      IL_00df:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f6
-      IL_00e4:  box        MCCTest.VType5
-      IL_00e9:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_00ee:  stloc.1
-      IL_00ef:  ldarg.0
-      IL_00f0:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f6
-      IL_00f5:  ldarga.s   expected
-      IL_00f7:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f6
-      IL_00fc:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_0101:  nop
-      IL_0102:  ldstr      "f7"
-      IL_0107:  stloc.0
-      IL_0108:  ldarg.0
-      IL_0109:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f7
-      IL_010e:  box        MCCTest.VType5
-      IL_0113:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0118:  stloc.1
-      IL_0119:  ldarg.0
-      IL_011a:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f7
-      IL_011f:  ldarga.s   expected
-      IL_0121:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f7
-      IL_0126:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_012b:  nop
-      IL_012c:  ldstr      "f8"
-      IL_0131:  stloc.0
-      IL_0132:  ldarg.0
-      IL_0133:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f8
-      IL_0138:  box        MCCTest.VType5
-      IL_013d:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0142:  stloc.1
-      IL_0143:  ldarg.0
-      IL_0144:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f8
-      IL_0149:  ldarga.s   expected
-      IL_014b:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f8
-      IL_0150:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_0155:  nop
-      IL_0156:  ldstr      "f9"
-      IL_015b:  stloc.0
-      IL_015c:  ldarg.0
-      IL_015d:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f9
-      IL_0162:  box        MCCTest.VType5
-      IL_0167:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_016c:  stloc.1
-      IL_016d:  ldarg.0
-      IL_016e:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f9
-      IL_0173:  ldarga.s   expected
-      IL_0175:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f9
-      IL_017a:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_017f:  nop
-      IL_0180:  ldstr      "f10"
-      IL_0185:  stloc.0
-      IL_0186:  ldarg.0
-      IL_0187:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f10
-      IL_018c:  box        MCCTest.VType5
-      IL_0191:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0196:  stloc.1
-      IL_0197:  ldarg.0
-      IL_0198:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f10
-      IL_019d:  ldarga.s   expected
-      IL_019f:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f10
-      IL_01a4:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_01a9:  nop
-      IL_01aa:  ldstr      "f11"
-      IL_01af:  stloc.0
-      IL_01b0:  ldarg.0
-      IL_01b1:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f11
-      IL_01b6:  box        MCCTest.VType5
-      IL_01bb:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_01c0:  stloc.1
-      IL_01c1:  ldarg.0
-      IL_01c2:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f11
-      IL_01c7:  ldarga.s   expected
-      IL_01c9:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f11
-      IL_01ce:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_01d3:  nop
-      IL_01d4:  ldstr      "f12"
-      IL_01d9:  stloc.0
-      IL_01da:  ldarg.0
-      IL_01db:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f12
-      IL_01e0:  box        MCCTest.VType5
-      IL_01e5:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_01ea:  stloc.1
-      IL_01eb:  ldarg.0
-      IL_01ec:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f12
-      IL_01f1:  ldarga.s   expected
-      IL_01f3:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f12
-      IL_01f8:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_01fd:  nop
-      IL_01fe:  ldstr      "f13"
-      IL_0203:  stloc.0
-      IL_0204:  ldarg.0
-      IL_0205:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f13
-      IL_020a:  box        MCCTest.VType5
-      IL_020f:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0214:  stloc.1
-      IL_0215:  ldarg.0
-      IL_0216:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f13
-      IL_021b:  ldarga.s   expected
-      IL_021d:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f13
-      IL_0222:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_0227:  nop
-      IL_0228:  ldstr      "f14"
-      IL_022d:  stloc.0
-      IL_022e:  ldarg.0
-      IL_022f:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f14
-      IL_0234:  box        MCCTest.VType5
-      IL_0239:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_023e:  stloc.1
-      IL_023f:  ldarg.0
-      IL_0240:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f14
-      IL_0245:  ldarga.s   expected
-      IL_0247:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f14
-      IL_024c:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_0251:  nop
-      IL_0252:  ldstr      "f15"
-      IL_0257:  stloc.0
-      IL_0258:  ldarg.0
-      IL_0259:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f15
-      IL_025e:  box        MCCTest.VType5
-      IL_0263:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0268:  stloc.1
-      IL_0269:  ldarg.0
-      IL_026a:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f15
-      IL_026f:  ldarga.s   expected
-      IL_0271:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f15
-      IL_0276:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_027b:  nop
-      IL_027c:  ldstr      "f16"
-      IL_0281:  stloc.0
-      IL_0282:  ldarg.0
-      IL_0283:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f16
-      IL_0288:  box        MCCTest.VType5
-      IL_028d:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0292:  stloc.1
-      IL_0293:  ldarg.0
-      IL_0294:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f16
-      IL_0299:  ldarga.s   expected
-      IL_029b:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f16
-      IL_02a0:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_02a5:  nop
-      IL_02a6:  ldstr      "f17"
-      IL_02ab:  stloc.0
-      IL_02ac:  ldarg.0
-      IL_02ad:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f17
-      IL_02b2:  box        MCCTest.VType5
-      IL_02b7:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_02bc:  stloc.1
-      IL_02bd:  ldarg.0
-      IL_02be:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f17
-      IL_02c3:  ldarga.s   expected
-      IL_02c5:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f17
-      IL_02ca:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_02cf:  nop
-      IL_02d0:  ldstr      "f18"
-      IL_02d5:  stloc.0
-      IL_02d6:  ldarg.0
-      IL_02d7:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f18
-      IL_02dc:  box        MCCTest.VType5
-      IL_02e1:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_02e6:  stloc.1
-      IL_02e7:  ldarg.0
-      IL_02e8:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f18
-      IL_02ed:  ldarga.s   expected
-      IL_02ef:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f18
-      IL_02f4:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_02f9:  nop
-      IL_02fa:  ldstr      "f19"
-      IL_02ff:  stloc.0
-      IL_0300:  ldarg.0
-      IL_0301:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f19
-      IL_0306:  box        MCCTest.VType5
-      IL_030b:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0310:  stloc.1
-      IL_0311:  ldarg.0
-      IL_0312:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f19
-      IL_0317:  ldarga.s   expected
-      IL_0319:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f19
-      IL_031e:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_0323:  nop
-      IL_0324:  nop
-      IL_0325:  leave.s    IL_0332
-
-    }  // end .try
-    catch MCCTest.ResultVerificationException
-    {
-      IL_0327:  stloc.2
-      IL_0328:  nop
-      IL_0329:  ldloc.0
-      IL_032a:  ldloc.1
-      IL_032b:  ldloc.2
-      IL_032c:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                    class [mscorlib]System.Type,
-                                                                                    class MCCTest.ResultVerificationException)
-      IL_0331:  throw
-
-    }  // end handler
-    IL_0332:  nop
-    IL_0333:  ret
-  } // end of method VTypeB::Check
-
-  .method public hidebysig instance string
-          Dump(int32 level) cil managed
-  {
-    // Code size       1165 (0x48d)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.IO.StringWriter V_1,
-             string V_2)
-    IL_0000:  nop
-    IL_0001:  ldarg.1
-    IL_0002:  call       string MCCTest.FormatUtils::GetPadding(int32)
-    IL_0007:  stloc.0
-    IL_0008:  newobj     instance void [mscorlib]System.IO.StringWriter::.ctor()
-    IL_000d:  stloc.1
-    IL_000e:  ldloc.1
-    IL_000f:  ldloc.0
-    IL_0010:  ldstr      "[Field f1] [Type '{0}']"
-    IL_0015:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_001a:  ldarg.0
-    IL_001b:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f1
-    IL_0020:  box        MCCTest.VType5
-    IL_0025:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_002a:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_002f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0034:  nop
-    IL_0035:  ldloc.1
-    IL_0036:  ldarg.0
-    IL_0037:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f1
-    IL_003c:  ldarg.1
-    IL_003d:  ldc.i4.1
-    IL_003e:  add
-    IL_003f:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_0044:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0049:  nop
-    IL_004a:  ldloc.1
-    IL_004b:  ldloc.0
-    IL_004c:  ldstr      "[Field f2] [Type '{0}']"
-    IL_0051:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0056:  ldarg.0
-    IL_0057:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f2
-    IL_005c:  box        MCCTest.VType5
-    IL_0061:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0066:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_006b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0070:  nop
-    IL_0071:  ldloc.1
-    IL_0072:  ldarg.0
-    IL_0073:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f2
-    IL_0078:  ldarg.1
-    IL_0079:  ldc.i4.1
-    IL_007a:  add
-    IL_007b:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_0080:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0085:  nop
-    IL_0086:  ldloc.1
-    IL_0087:  ldloc.0
-    IL_0088:  ldstr      "[Field f3] [Type '{0}']"
-    IL_008d:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0092:  ldarg.0
-    IL_0093:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f3
-    IL_0098:  box        MCCTest.VType5
-    IL_009d:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_00a2:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_00a7:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_00ac:  nop
-    IL_00ad:  ldloc.1
-    IL_00ae:  ldarg.0
-    IL_00af:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f3
-    IL_00b4:  ldarg.1
-    IL_00b5:  ldc.i4.1
-    IL_00b6:  add
-    IL_00b7:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_00bc:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00c1:  nop
-    IL_00c2:  ldloc.1
-    IL_00c3:  ldloc.0
-    IL_00c4:  ldstr      "[Field f4] [Type '{0}']"
-    IL_00c9:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_00ce:  ldarg.0
-    IL_00cf:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f4
-    IL_00d4:  box        MCCTest.VType5
-    IL_00d9:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_00de:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_00e3:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_00e8:  nop
-    IL_00e9:  ldloc.1
-    IL_00ea:  ldarg.0
-    IL_00eb:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f4
-    IL_00f0:  ldarg.1
-    IL_00f1:  ldc.i4.1
-    IL_00f2:  add
-    IL_00f3:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_00f8:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00fd:  nop
-    IL_00fe:  ldloc.1
-    IL_00ff:  ldloc.0
-    IL_0100:  ldstr      "[Field f5] [Type '{0}']"
-    IL_0105:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_010a:  ldarg.0
-    IL_010b:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f5
-    IL_0110:  box        MCCTest.VType5
-    IL_0115:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_011a:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_011f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0124:  nop
-    IL_0125:  ldloc.1
-    IL_0126:  ldarg.0
-    IL_0127:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f5
-    IL_012c:  ldarg.1
-    IL_012d:  ldc.i4.1
-    IL_012e:  add
-    IL_012f:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_0134:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0139:  nop
-    IL_013a:  ldloc.1
-    IL_013b:  ldloc.0
-    IL_013c:  ldstr      "[Field f6] [Type '{0}']"
-    IL_0141:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0146:  ldarg.0
-    IL_0147:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f6
-    IL_014c:  box        MCCTest.VType5
-    IL_0151:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0156:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_015b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0160:  nop
-    IL_0161:  ldloc.1
-    IL_0162:  ldarg.0
-    IL_0163:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f6
-    IL_0168:  ldarg.1
-    IL_0169:  ldc.i4.1
-    IL_016a:  add
-    IL_016b:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_0170:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0175:  nop
-    IL_0176:  ldloc.1
-    IL_0177:  ldloc.0
-    IL_0178:  ldstr      "[Field f7] [Type '{0}']"
-    IL_017d:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0182:  ldarg.0
-    IL_0183:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f7
-    IL_0188:  box        MCCTest.VType5
-    IL_018d:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0192:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0197:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_019c:  nop
-    IL_019d:  ldloc.1
-    IL_019e:  ldarg.0
-    IL_019f:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f7
-    IL_01a4:  ldarg.1
-    IL_01a5:  ldc.i4.1
-    IL_01a6:  add
-    IL_01a7:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_01ac:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_01b1:  nop
-    IL_01b2:  ldloc.1
-    IL_01b3:  ldloc.0
-    IL_01b4:  ldstr      "[Field f8] [Type '{0}']"
-    IL_01b9:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_01be:  ldarg.0
-    IL_01bf:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f8
-    IL_01c4:  box        MCCTest.VType5
-    IL_01c9:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_01ce:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_01d3:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_01d8:  nop
-    IL_01d9:  ldloc.1
-    IL_01da:  ldarg.0
-    IL_01db:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f8
-    IL_01e0:  ldarg.1
-    IL_01e1:  ldc.i4.1
-    IL_01e2:  add
-    IL_01e3:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_01e8:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_01ed:  nop
-    IL_01ee:  ldloc.1
-    IL_01ef:  ldloc.0
-    IL_01f0:  ldstr      "[Field f9] [Type '{0}']"
-    IL_01f5:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_01fa:  ldarg.0
-    IL_01fb:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f9
-    IL_0200:  box        MCCTest.VType5
-    IL_0205:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_020a:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_020f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0214:  nop
-    IL_0215:  ldloc.1
-    IL_0216:  ldarg.0
-    IL_0217:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f9
-    IL_021c:  ldarg.1
-    IL_021d:  ldc.i4.1
-    IL_021e:  add
-    IL_021f:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_0224:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0229:  nop
-    IL_022a:  ldloc.1
-    IL_022b:  ldloc.0
-    IL_022c:  ldstr      "[Field f10] [Type '{0}']"
-    IL_0231:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0236:  ldarg.0
-    IL_0237:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f10
-    IL_023c:  box        MCCTest.VType5
-    IL_0241:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0246:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_024b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0250:  nop
-    IL_0251:  ldloc.1
-    IL_0252:  ldarg.0
-    IL_0253:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f10
-    IL_0258:  ldarg.1
-    IL_0259:  ldc.i4.1
-    IL_025a:  add
-    IL_025b:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_0260:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0265:  nop
-    IL_0266:  ldloc.1
-    IL_0267:  ldloc.0
-    IL_0268:  ldstr      "[Field f11] [Type '{0}']"
-    IL_026d:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0272:  ldarg.0
-    IL_0273:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f11
-    IL_0278:  box        MCCTest.VType5
-    IL_027d:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0282:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0287:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_028c:  nop
-    IL_028d:  ldloc.1
-    IL_028e:  ldarg.0
-    IL_028f:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f11
-    IL_0294:  ldarg.1
-    IL_0295:  ldc.i4.1
-    IL_0296:  add
-    IL_0297:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_029c:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_02a1:  nop
-    IL_02a2:  ldloc.1
-    IL_02a3:  ldloc.0
-    IL_02a4:  ldstr      "[Field f12] [Type '{0}']"
-    IL_02a9:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_02ae:  ldarg.0
-    IL_02af:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f12
-    IL_02b4:  box        MCCTest.VType5
-    IL_02b9:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_02be:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_02c3:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_02c8:  nop
-    IL_02c9:  ldloc.1
-    IL_02ca:  ldarg.0
-    IL_02cb:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f12
-    IL_02d0:  ldarg.1
-    IL_02d1:  ldc.i4.1
-    IL_02d2:  add
-    IL_02d3:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_02d8:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_02dd:  nop
-    IL_02de:  ldloc.1
-    IL_02df:  ldloc.0
-    IL_02e0:  ldstr      "[Field f13] [Type '{0}']"
-    IL_02e5:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_02ea:  ldarg.0
-    IL_02eb:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f13
-    IL_02f0:  box        MCCTest.VType5
-    IL_02f5:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_02fa:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_02ff:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0304:  nop
-    IL_0305:  ldloc.1
-    IL_0306:  ldarg.0
-    IL_0307:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f13
-    IL_030c:  ldarg.1
-    IL_030d:  ldc.i4.1
-    IL_030e:  add
-    IL_030f:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_0314:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0319:  nop
-    IL_031a:  ldloc.1
-    IL_031b:  ldloc.0
-    IL_031c:  ldstr      "[Field f14] [Type '{0}']"
-    IL_0321:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0326:  ldarg.0
-    IL_0327:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f14
-    IL_032c:  box        MCCTest.VType5
-    IL_0331:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0336:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_033b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0340:  nop
-    IL_0341:  ldloc.1
-    IL_0342:  ldarg.0
-    IL_0343:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f14
-    IL_0348:  ldarg.1
-    IL_0349:  ldc.i4.1
-    IL_034a:  add
-    IL_034b:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_0350:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0355:  nop
-    IL_0356:  ldloc.1
-    IL_0357:  ldloc.0
-    IL_0358:  ldstr      "[Field f15] [Type '{0}']"
-    IL_035d:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0362:  ldarg.0
-    IL_0363:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f15
-    IL_0368:  box        MCCTest.VType5
-    IL_036d:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0372:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0377:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_037c:  nop
-    IL_037d:  ldloc.1
-    IL_037e:  ldarg.0
-    IL_037f:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f15
-    IL_0384:  ldarg.1
-    IL_0385:  ldc.i4.1
-    IL_0386:  add
-    IL_0387:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_038c:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0391:  nop
-    IL_0392:  ldloc.1
-    IL_0393:  ldloc.0
-    IL_0394:  ldstr      "[Field f16] [Type '{0}']"
-    IL_0399:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_039e:  ldarg.0
-    IL_039f:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f16
-    IL_03a4:  box        MCCTest.VType5
-    IL_03a9:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_03ae:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_03b3:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_03b8:  nop
-    IL_03b9:  ldloc.1
-    IL_03ba:  ldarg.0
-    IL_03bb:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f16
-    IL_03c0:  ldarg.1
-    IL_03c1:  ldc.i4.1
-    IL_03c2:  add
-    IL_03c3:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_03c8:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_03cd:  nop
-    IL_03ce:  ldloc.1
-    IL_03cf:  ldloc.0
-    IL_03d0:  ldstr      "[Field f17] [Type '{0}']"
-    IL_03d5:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_03da:  ldarg.0
-    IL_03db:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f17
-    IL_03e0:  box        MCCTest.VType5
-    IL_03e5:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_03ea:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_03ef:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_03f4:  nop
-    IL_03f5:  ldloc.1
-    IL_03f6:  ldarg.0
-    IL_03f7:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f17
-    IL_03fc:  ldarg.1
-    IL_03fd:  ldc.i4.1
-    IL_03fe:  add
-    IL_03ff:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_0404:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0409:  nop
-    IL_040a:  ldloc.1
-    IL_040b:  ldloc.0
-    IL_040c:  ldstr      "[Field f18] [Type '{0}']"
-    IL_0411:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0416:  ldarg.0
-    IL_0417:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f18
-    IL_041c:  box        MCCTest.VType5
-    IL_0421:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0426:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_042b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0430:  nop
-    IL_0431:  ldloc.1
-    IL_0432:  ldarg.0
-    IL_0433:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f18
-    IL_0438:  ldarg.1
-    IL_0439:  ldc.i4.1
-    IL_043a:  add
-    IL_043b:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_0440:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0445:  nop
-    IL_0446:  ldloc.1
-    IL_0447:  ldloc.0
-    IL_0448:  ldstr      "[Field f19] [Type '{0}']"
-    IL_044d:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0452:  ldarg.0
-    IL_0453:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeB::f19
-    IL_0458:  box        MCCTest.VType5
-    IL_045d:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0462:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0467:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_046c:  nop
-    IL_046d:  ldloc.1
-    IL_046e:  ldarg.0
-    IL_046f:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeB::f19
-    IL_0474:  ldarg.1
-    IL_0475:  ldc.i4.1
-    IL_0476:  add
-    IL_0477:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_047c:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0481:  nop
-    IL_0482:  ldloc.1
-    IL_0483:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0488:  stloc.2
-    IL_0489:  br.s       IL_048b
-
-    IL_048b:  ldloc.2
-    IL_048c:  ret
-  } // end of method VTypeB::Dump
-
-  .method public hidebysig instance string
-          Dump() cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (string V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance string MCCTest.VTypeB::Dump(int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method VTypeB::Dump
-
-} // end of class MCCTest.VTypeB
-
-.class public sequential ansi sealed beforefieldinit MCCTest.VTypeC
-       extends [mscorlib]System.ValueType
-       implements class MCCTest.CType`1<valuetype MCCTest.VTypeC>
-{
-  .field public class MCCTest.RType4 f1
-  .field public valuetype MCCTest.VType5 f2
-  .field public valuetype MCCTest.VType6 f3
-  .field public class MCCTest.RType4 f4
-  .field public valuetype MCCTest.VType5 f5
-  .field public valuetype MCCTest.VType6 f6
-  .field public class MCCTest.RType4 f7
-  .field public valuetype MCCTest.VType5 f8
-  .field public valuetype MCCTest.VType6 f9
-  .field public class MCCTest.RType4 f10
-  .field public valuetype MCCTest.VType5 f11
-  .field public valuetype MCCTest.VType6 f12
-  .method public hidebysig newslot virtual final
-          instance void  Init(int32 count) cil managed
-  {
-    // Code size       202 (0xca)
-    .maxstack  2
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0007:  stfld      class MCCTest.RType4 MCCTest.VTypeC::f1
-    IL_000c:  ldarg.0
-    IL_000d:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0012:  stfld      class MCCTest.RType4 MCCTest.VTypeC::f4
-    IL_0017:  ldarg.0
-    IL_0018:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_001d:  stfld      class MCCTest.RType4 MCCTest.VTypeC::f7
-    IL_0022:  ldarg.0
-    IL_0023:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0028:  stfld      class MCCTest.RType4 MCCTest.VTypeC::f10
-    IL_002d:  ldarg.0
-    IL_002e:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f1
-    IL_0033:  ldarg.1
-    IL_0034:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_0039:  nop
-    IL_003a:  ldarg.0
-    IL_003b:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f2
-    IL_0040:  ldarg.1
-    IL_0041:  call       instance void MCCTest.VType5::Init(int32)
-    IL_0046:  nop
-    IL_0047:  ldarg.0
-    IL_0048:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f3
-    IL_004d:  ldarg.1
-    IL_004e:  call       instance void MCCTest.VType6::Init(int32)
-    IL_0053:  nop
-    IL_0054:  ldarg.0
-    IL_0055:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f4
-    IL_005a:  ldarg.1
-    IL_005b:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_0060:  nop
-    IL_0061:  ldarg.0
-    IL_0062:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f5
-    IL_0067:  ldarg.1
-    IL_0068:  call       instance void MCCTest.VType5::Init(int32)
-    IL_006d:  nop
-    IL_006e:  ldarg.0
-    IL_006f:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f6
-    IL_0074:  ldarg.1
-    IL_0075:  call       instance void MCCTest.VType6::Init(int32)
-    IL_007a:  nop
-    IL_007b:  ldarg.0
-    IL_007c:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f7
-    IL_0081:  ldarg.1
-    IL_0082:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_0087:  nop
-    IL_0088:  ldarg.0
-    IL_0089:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f8
-    IL_008e:  ldarg.1
-    IL_008f:  call       instance void MCCTest.VType5::Init(int32)
-    IL_0094:  nop
-    IL_0095:  ldarg.0
-    IL_0096:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f9
-    IL_009b:  ldarg.1
-    IL_009c:  call       instance void MCCTest.VType6::Init(int32)
-    IL_00a1:  nop
-    IL_00a2:  ldarg.0
-    IL_00a3:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f10
-    IL_00a8:  ldarg.1
-    IL_00a9:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_00ae:  nop
-    IL_00af:  ldarg.0
-    IL_00b0:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f11
-    IL_00b5:  ldarg.1
-    IL_00b6:  call       instance void MCCTest.VType5::Init(int32)
-    IL_00bb:  nop
-    IL_00bc:  ldarg.0
-    IL_00bd:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f12
-    IL_00c2:  ldarg.1
-    IL_00c3:  call       instance void MCCTest.VType6::Init(int32)
-    IL_00c8:  nop
-    IL_00c9:  ret
-  } // end of method VTypeC::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Init() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.1
-    IL_0003:  call       instance void MCCTest.VTypeC::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VTypeC::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Zero() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance void MCCTest.VTypeC::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VTypeC::Zero
-
-  .method public hidebysig instance void
-          Add(valuetype MCCTest.VTypeC val) cil managed
-  {
-    // Code size       230 (0xe6)
-    .maxstack  2
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f1
-    IL_0007:  ldarga.s   val
-    IL_0009:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f1
-    IL_000e:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_0013:  nop
-    IL_0014:  ldarg.0
-    IL_0015:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f2
-    IL_001a:  ldarga.s   val
-    IL_001c:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f2
-    IL_0021:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0026:  nop
-    IL_0027:  ldarg.0
-    IL_0028:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f3
-    IL_002d:  ldarga.s   val
-    IL_002f:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f3
-    IL_0034:  call       instance void MCCTest.VType6::Add(valuetype MCCTest.VType6)
-    IL_0039:  nop
-    IL_003a:  ldarg.0
-    IL_003b:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f4
-    IL_0040:  ldarga.s   val
-    IL_0042:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f4
-    IL_0047:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_004c:  nop
-    IL_004d:  ldarg.0
-    IL_004e:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f5
-    IL_0053:  ldarga.s   val
-    IL_0055:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f5
-    IL_005a:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_005f:  nop
-    IL_0060:  ldarg.0
-    IL_0061:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f6
-    IL_0066:  ldarga.s   val
-    IL_0068:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f6
-    IL_006d:  call       instance void MCCTest.VType6::Add(valuetype MCCTest.VType6)
-    IL_0072:  nop
-    IL_0073:  ldarg.0
-    IL_0074:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f7
-    IL_0079:  ldarga.s   val
-    IL_007b:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f7
-    IL_0080:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_0085:  nop
-    IL_0086:  ldarg.0
-    IL_0087:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f8
-    IL_008c:  ldarga.s   val
-    IL_008e:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f8
-    IL_0093:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0098:  nop
-    IL_0099:  ldarg.0
-    IL_009a:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f9
-    IL_009f:  ldarga.s   val
-    IL_00a1:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f9
-    IL_00a6:  call       instance void MCCTest.VType6::Add(valuetype MCCTest.VType6)
-    IL_00ab:  nop
-    IL_00ac:  ldarg.0
-    IL_00ad:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f10
-    IL_00b2:  ldarga.s   val
-    IL_00b4:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f10
-    IL_00b9:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_00be:  nop
-    IL_00bf:  ldarg.0
-    IL_00c0:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f11
-    IL_00c5:  ldarga.s   val
-    IL_00c7:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f11
-    IL_00cc:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_00d1:  nop
-    IL_00d2:  ldarg.0
-    IL_00d3:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f12
-    IL_00d8:  ldarga.s   val
-    IL_00da:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f12
-    IL_00df:  call       instance void MCCTest.VType6::Add(valuetype MCCTest.VType6)
-    IL_00e4:  nop
-    IL_00e5:  ret
-  } // end of method VTypeC::Add
-
-  .method public hidebysig newslot virtual final
-          instance void  Check(valuetype MCCTest.VTypeC expected) cil managed
-  {
-    // Code size       506 (0x1fa)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.Type V_1,
-             class MCCTest.ResultVerificationException V_2)
-    IL_0000:  nop
-    IL_0001:  ldnull
-    IL_0002:  stloc.0
-    IL_0003:  ldnull
-    IL_0004:  stloc.1
-    .try
-    {
-      IL_0005:  nop
-      IL_0006:  ldstr      "f1"
-      IL_000b:  stloc.0
-      IL_000c:  ldarg.0
-      IL_000d:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f1
-      IL_0012:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0017:  stloc.1
-      IL_0018:  ldarg.0
-      IL_0019:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f1
-      IL_001e:  ldarga.s   expected
-      IL_0020:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f1
-      IL_0025:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_002a:  nop
-      IL_002b:  ldstr      "f2"
-      IL_0030:  stloc.0
-      IL_0031:  ldarg.0
-      IL_0032:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f2
-      IL_0037:  box        MCCTest.VType5
-      IL_003c:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0041:  stloc.1
-      IL_0042:  ldarg.0
-      IL_0043:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f2
-      IL_0048:  ldarga.s   expected
-      IL_004a:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f2
-      IL_004f:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_0054:  nop
-      IL_0055:  ldstr      "f3"
-      IL_005a:  stloc.0
-      IL_005b:  ldarg.0
-      IL_005c:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f3
-      IL_0061:  box        MCCTest.VType6
-      IL_0066:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_006b:  stloc.1
-      IL_006c:  ldarg.0
-      IL_006d:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f3
-      IL_0072:  ldarga.s   expected
-      IL_0074:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f3
-      IL_0079:  call       instance void MCCTest.VType6::Check(valuetype MCCTest.VType6)
-      IL_007e:  nop
-      IL_007f:  ldstr      "f4"
-      IL_0084:  stloc.0
-      IL_0085:  ldarg.0
-      IL_0086:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f4
-      IL_008b:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0090:  stloc.1
-      IL_0091:  ldarg.0
-      IL_0092:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f4
-      IL_0097:  ldarga.s   expected
-      IL_0099:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f4
-      IL_009e:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_00a3:  nop
-      IL_00a4:  ldstr      "f5"
-      IL_00a9:  stloc.0
-      IL_00aa:  ldarg.0
-      IL_00ab:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f5
-      IL_00b0:  box        MCCTest.VType5
-      IL_00b5:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_00ba:  stloc.1
-      IL_00bb:  ldarg.0
-      IL_00bc:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f5
-      IL_00c1:  ldarga.s   expected
-      IL_00c3:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f5
-      IL_00c8:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_00cd:  nop
-      IL_00ce:  ldstr      "f6"
-      IL_00d3:  stloc.0
-      IL_00d4:  ldarg.0
-      IL_00d5:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f6
-      IL_00da:  box        MCCTest.VType6
-      IL_00df:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_00e4:  stloc.1
-      IL_00e5:  ldarg.0
-      IL_00e6:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f6
-      IL_00eb:  ldarga.s   expected
-      IL_00ed:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f6
-      IL_00f2:  call       instance void MCCTest.VType6::Check(valuetype MCCTest.VType6)
-      IL_00f7:  nop
-      IL_00f8:  ldstr      "f7"
-      IL_00fd:  stloc.0
-      IL_00fe:  ldarg.0
-      IL_00ff:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f7
-      IL_0104:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0109:  stloc.1
-      IL_010a:  ldarg.0
-      IL_010b:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f7
-      IL_0110:  ldarga.s   expected
-      IL_0112:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f7
-      IL_0117:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_011c:  nop
-      IL_011d:  ldstr      "f8"
-      IL_0122:  stloc.0
-      IL_0123:  ldarg.0
-      IL_0124:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f8
-      IL_0129:  box        MCCTest.VType5
-      IL_012e:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0133:  stloc.1
-      IL_0134:  ldarg.0
-      IL_0135:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f8
-      IL_013a:  ldarga.s   expected
-      IL_013c:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f8
-      IL_0141:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_0146:  nop
-      IL_0147:  ldstr      "f9"
-      IL_014c:  stloc.0
-      IL_014d:  ldarg.0
-      IL_014e:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f9
-      IL_0153:  box        MCCTest.VType6
-      IL_0158:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_015d:  stloc.1
-      IL_015e:  ldarg.0
-      IL_015f:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f9
-      IL_0164:  ldarga.s   expected
-      IL_0166:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f9
-      IL_016b:  call       instance void MCCTest.VType6::Check(valuetype MCCTest.VType6)
-      IL_0170:  nop
-      IL_0171:  ldstr      "f10"
-      IL_0176:  stloc.0
-      IL_0177:  ldarg.0
-      IL_0178:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f10
-      IL_017d:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0182:  stloc.1
-      IL_0183:  ldarg.0
-      IL_0184:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f10
-      IL_0189:  ldarga.s   expected
-      IL_018b:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f10
-      IL_0190:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_0195:  nop
-      IL_0196:  ldstr      "f11"
-      IL_019b:  stloc.0
-      IL_019c:  ldarg.0
-      IL_019d:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f11
-      IL_01a2:  box        MCCTest.VType5
-      IL_01a7:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_01ac:  stloc.1
-      IL_01ad:  ldarg.0
-      IL_01ae:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f11
-      IL_01b3:  ldarga.s   expected
-      IL_01b5:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f11
-      IL_01ba:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_01bf:  nop
-      IL_01c0:  ldstr      "f12"
-      IL_01c5:  stloc.0
-      IL_01c6:  ldarg.0
-      IL_01c7:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f12
-      IL_01cc:  box        MCCTest.VType6
-      IL_01d1:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_01d6:  stloc.1
-      IL_01d7:  ldarg.0
-      IL_01d8:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f12
-      IL_01dd:  ldarga.s   expected
-      IL_01df:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f12
-      IL_01e4:  call       instance void MCCTest.VType6::Check(valuetype MCCTest.VType6)
-      IL_01e9:  nop
-      IL_01ea:  nop
-      IL_01eb:  leave.s    IL_01f8
-
-    }  // end .try
-    catch MCCTest.ResultVerificationException
-    {
-      IL_01ed:  stloc.2
-      IL_01ee:  nop
-      IL_01ef:  ldloc.0
-      IL_01f0:  ldloc.1
-      IL_01f1:  ldloc.2
-      IL_01f2:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                    class [mscorlib]System.Type,
-                                                                                    class MCCTest.ResultVerificationException)
-      IL_01f7:  throw
-
-    }  // end handler
-    IL_01f8:  nop
-    IL_01f9:  ret
-  } // end of method VTypeC::Check
-
-  .method public hidebysig instance string
-          Dump(int32 level) cil managed
-  {
-    // Code size       725 (0x2d5)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.IO.StringWriter V_1,
-             string V_2)
-    IL_0000:  nop
-    IL_0001:  ldarg.1
-    IL_0002:  call       string MCCTest.FormatUtils::GetPadding(int32)
-    IL_0007:  stloc.0
-    IL_0008:  newobj     instance void [mscorlib]System.IO.StringWriter::.ctor()
-    IL_000d:  stloc.1
-    IL_000e:  ldloc.1
-    IL_000f:  ldloc.0
-    IL_0010:  ldstr      "[Field f1] [Type '{0}']"
-    IL_0015:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_001a:  ldarg.0
-    IL_001b:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f1
-    IL_0020:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0025:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_002a:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_002f:  nop
-    IL_0030:  ldloc.1
-    IL_0031:  ldarg.0
-    IL_0032:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f1
-    IL_0037:  ldarg.1
-    IL_0038:  ldc.i4.1
-    IL_0039:  add
-    IL_003a:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_003f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0044:  nop
-    IL_0045:  ldloc.1
-    IL_0046:  ldloc.0
-    IL_0047:  ldstr      "[Field f2] [Type '{0}']"
-    IL_004c:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0051:  ldarg.0
-    IL_0052:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f2
-    IL_0057:  box        MCCTest.VType5
-    IL_005c:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0061:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0066:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_006b:  nop
-    IL_006c:  ldloc.1
-    IL_006d:  ldarg.0
-    IL_006e:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f2
-    IL_0073:  ldarg.1
-    IL_0074:  ldc.i4.1
-    IL_0075:  add
-    IL_0076:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_007b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0080:  nop
-    IL_0081:  ldloc.1
-    IL_0082:  ldloc.0
-    IL_0083:  ldstr      "[Field f3] [Type '{0}']"
-    IL_0088:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_008d:  ldarg.0
-    IL_008e:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f3
-    IL_0093:  box        MCCTest.VType6
-    IL_0098:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_009d:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_00a2:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_00a7:  nop
-    IL_00a8:  ldloc.1
-    IL_00a9:  ldarg.0
-    IL_00aa:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f3
-    IL_00af:  ldarg.1
-    IL_00b0:  ldc.i4.1
-    IL_00b1:  add
-    IL_00b2:  call       instance string MCCTest.VType6::Dump(int32)
-    IL_00b7:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00bc:  nop
-    IL_00bd:  ldloc.1
-    IL_00be:  ldloc.0
-    IL_00bf:  ldstr      "[Field f4] [Type '{0}']"
-    IL_00c4:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_00c9:  ldarg.0
-    IL_00ca:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f4
-    IL_00cf:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_00d4:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_00d9:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_00de:  nop
-    IL_00df:  ldloc.1
-    IL_00e0:  ldarg.0
-    IL_00e1:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f4
-    IL_00e6:  ldarg.1
-    IL_00e7:  ldc.i4.1
-    IL_00e8:  add
-    IL_00e9:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_00ee:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00f3:  nop
-    IL_00f4:  ldloc.1
-    IL_00f5:  ldloc.0
-    IL_00f6:  ldstr      "[Field f5] [Type '{0}']"
-    IL_00fb:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0100:  ldarg.0
-    IL_0101:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f5
-    IL_0106:  box        MCCTest.VType5
-    IL_010b:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0110:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0115:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_011a:  nop
-    IL_011b:  ldloc.1
-    IL_011c:  ldarg.0
-    IL_011d:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f5
-    IL_0122:  ldarg.1
-    IL_0123:  ldc.i4.1
-    IL_0124:  add
-    IL_0125:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_012a:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_012f:  nop
-    IL_0130:  ldloc.1
-    IL_0131:  ldloc.0
-    IL_0132:  ldstr      "[Field f6] [Type '{0}']"
-    IL_0137:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_013c:  ldarg.0
-    IL_013d:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f6
-    IL_0142:  box        MCCTest.VType6
-    IL_0147:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_014c:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0151:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0156:  nop
-    IL_0157:  ldloc.1
-    IL_0158:  ldarg.0
-    IL_0159:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f6
-    IL_015e:  ldarg.1
-    IL_015f:  ldc.i4.1
-    IL_0160:  add
-    IL_0161:  call       instance string MCCTest.VType6::Dump(int32)
-    IL_0166:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_016b:  nop
-    IL_016c:  ldloc.1
-    IL_016d:  ldloc.0
-    IL_016e:  ldstr      "[Field f7] [Type '{0}']"
-    IL_0173:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0178:  ldarg.0
-    IL_0179:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f7
-    IL_017e:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0183:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0188:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_018d:  nop
-    IL_018e:  ldloc.1
-    IL_018f:  ldarg.0
-    IL_0190:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f7
-    IL_0195:  ldarg.1
-    IL_0196:  ldc.i4.1
-    IL_0197:  add
-    IL_0198:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_019d:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_01a2:  nop
-    IL_01a3:  ldloc.1
-    IL_01a4:  ldloc.0
-    IL_01a5:  ldstr      "[Field f8] [Type '{0}']"
-    IL_01aa:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_01af:  ldarg.0
-    IL_01b0:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f8
-    IL_01b5:  box        MCCTest.VType5
-    IL_01ba:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_01bf:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_01c4:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_01c9:  nop
-    IL_01ca:  ldloc.1
-    IL_01cb:  ldarg.0
-    IL_01cc:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f8
-    IL_01d1:  ldarg.1
-    IL_01d2:  ldc.i4.1
-    IL_01d3:  add
-    IL_01d4:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_01d9:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_01de:  nop
-    IL_01df:  ldloc.1
-    IL_01e0:  ldloc.0
-    IL_01e1:  ldstr      "[Field f9] [Type '{0}']"
-    IL_01e6:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_01eb:  ldarg.0
-    IL_01ec:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f9
-    IL_01f1:  box        MCCTest.VType6
-    IL_01f6:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_01fb:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0200:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0205:  nop
-    IL_0206:  ldloc.1
-    IL_0207:  ldarg.0
-    IL_0208:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f9
-    IL_020d:  ldarg.1
-    IL_020e:  ldc.i4.1
-    IL_020f:  add
-    IL_0210:  call       instance string MCCTest.VType6::Dump(int32)
-    IL_0215:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_021a:  nop
-    IL_021b:  ldloc.1
-    IL_021c:  ldloc.0
-    IL_021d:  ldstr      "[Field f10] [Type '{0}']"
-    IL_0222:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0227:  ldarg.0
-    IL_0228:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f10
-    IL_022d:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0232:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0237:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_023c:  nop
-    IL_023d:  ldloc.1
-    IL_023e:  ldarg.0
-    IL_023f:  ldfld      class MCCTest.RType4 MCCTest.VTypeC::f10
-    IL_0244:  ldarg.1
-    IL_0245:  ldc.i4.1
-    IL_0246:  add
-    IL_0247:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_024c:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0251:  nop
-    IL_0252:  ldloc.1
-    IL_0253:  ldloc.0
-    IL_0254:  ldstr      "[Field f11] [Type '{0}']"
-    IL_0259:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_025e:  ldarg.0
-    IL_025f:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeC::f11
-    IL_0264:  box        MCCTest.VType5
-    IL_0269:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_026e:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0273:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0278:  nop
-    IL_0279:  ldloc.1
-    IL_027a:  ldarg.0
-    IL_027b:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeC::f11
-    IL_0280:  ldarg.1
-    IL_0281:  ldc.i4.1
-    IL_0282:  add
-    IL_0283:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_0288:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_028d:  nop
-    IL_028e:  ldloc.1
-    IL_028f:  ldloc.0
-    IL_0290:  ldstr      "[Field f12] [Type '{0}']"
-    IL_0295:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_029a:  ldarg.0
-    IL_029b:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeC::f12
-    IL_02a0:  box        MCCTest.VType6
-    IL_02a5:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_02aa:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_02af:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_02b4:  nop
-    IL_02b5:  ldloc.1
-    IL_02b6:  ldarg.0
-    IL_02b7:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeC::f12
-    IL_02bc:  ldarg.1
-    IL_02bd:  ldc.i4.1
-    IL_02be:  add
-    IL_02bf:  call       instance string MCCTest.VType6::Dump(int32)
-    IL_02c4:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_02c9:  nop
-    IL_02ca:  ldloc.1
-    IL_02cb:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_02d0:  stloc.2
-    IL_02d1:  br.s       IL_02d3
-
-    IL_02d3:  ldloc.2
-    IL_02d4:  ret
-  } // end of method VTypeC::Dump
-
-  .method public hidebysig instance string
-          Dump() cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (string V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance string MCCTest.VTypeC::Dump(int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method VTypeC::Dump
-
-} // end of class MCCTest.VTypeC
-
-.class public sequential ansi sealed beforefieldinit MCCTest.VTypeD
-       extends [mscorlib]System.ValueType
-       implements class MCCTest.CType`1<valuetype MCCTest.VTypeD>
-{
-  .field public int32 f1
-  .field public valuetype MCCTest.VType3 f2
-  .field public float64 f3
-  .field public class MCCTest.RType4 f4
-  .field public valuetype MCCTest.VType7 f5
-  .field public uint64 f6
-  .field public float32 f7
-  .field public class MCCTest.RType4 f8
-  .field public valuetype MCCTest.VType6 f9
-  .field public float64 f10
-  .field public int16 f11
-  .field public class MCCTest.RType4 f12
-  .field public valuetype MCCTest.VType5 f13
-  .field public valuetype MCCTest.VType3 f14
-  .field public class MCCTest.RType4 f15
-  .field public valuetype MCCTest.VType7 f16
-  .field public uint32 f17
-  .method public hidebysig newslot virtual final
-          instance void  Init(int32 count) cil managed
-  {
-    // Code size       230 (0xe6)
-    .maxstack  2
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0007:  stfld      class MCCTest.RType4 MCCTest.VTypeD::f4
-    IL_000c:  ldarg.0
-    IL_000d:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0012:  stfld      class MCCTest.RType4 MCCTest.VTypeD::f8
-    IL_0017:  ldarg.0
-    IL_0018:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_001d:  stfld      class MCCTest.RType4 MCCTest.VTypeD::f12
-    IL_0022:  ldarg.0
-    IL_0023:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0028:  stfld      class MCCTest.RType4 MCCTest.VTypeD::f15
-    IL_002d:  ldarg.0
-    IL_002e:  ldarg.1
-    IL_002f:  stfld      int32 MCCTest.VTypeD::f1
-    IL_0034:  ldarg.0
-    IL_0035:  ldflda     valuetype MCCTest.VType3 MCCTest.VTypeD::f2
-    IL_003a:  ldarg.1
-    IL_003b:  call       instance void MCCTest.VType3::Init(int32)
-    IL_0040:  nop
-    IL_0041:  ldarg.0
-    IL_0042:  ldarg.1
-    IL_0043:  conv.r8
-    IL_0044:  stfld      float64 MCCTest.VTypeD::f3
-    IL_0049:  ldarg.0
-    IL_004a:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f4
-    IL_004f:  ldarg.1
-    IL_0050:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_0055:  nop
-    IL_0056:  ldarg.0
-    IL_0057:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeD::f5
-    IL_005c:  ldarg.1
-    IL_005d:  call       instance void MCCTest.VType7::Init(int32)
-    IL_0062:  nop
-    IL_0063:  ldarg.0
-    IL_0064:  ldarg.1
-    IL_0065:  conv.i8
-    IL_0066:  stfld      uint64 MCCTest.VTypeD::f6
-    IL_006b:  ldarg.0
-    IL_006c:  ldarg.1
-    IL_006d:  conv.r4
-    IL_006e:  stfld      float32 MCCTest.VTypeD::f7
-    IL_0073:  ldarg.0
-    IL_0074:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f8
-    IL_0079:  ldarg.1
-    IL_007a:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_007f:  nop
-    IL_0080:  ldarg.0
-    IL_0081:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeD::f9
-    IL_0086:  ldarg.1
-    IL_0087:  call       instance void MCCTest.VType6::Init(int32)
-    IL_008c:  nop
-    IL_008d:  ldarg.0
-    IL_008e:  ldarg.1
-    IL_008f:  conv.r8
-    IL_0090:  stfld      float64 MCCTest.VTypeD::f10
-    IL_0095:  ldarg.0
-    IL_0096:  ldarg.1
-    IL_0097:  conv.i2
-    IL_0098:  stfld      int16 MCCTest.VTypeD::f11
-    IL_009d:  ldarg.0
-    IL_009e:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f12
-    IL_00a3:  ldarg.1
-    IL_00a4:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_00a9:  nop
-    IL_00aa:  ldarg.0
-    IL_00ab:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeD::f13
-    IL_00b0:  ldarg.1
-    IL_00b1:  call       instance void MCCTest.VType5::Init(int32)
-    IL_00b6:  nop
-    IL_00b7:  ldarg.0
-    IL_00b8:  ldflda     valuetype MCCTest.VType3 MCCTest.VTypeD::f14
-    IL_00bd:  ldarg.1
-    IL_00be:  call       instance void MCCTest.VType3::Init(int32)
-    IL_00c3:  nop
-    IL_00c4:  ldarg.0
-    IL_00c5:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f15
-    IL_00ca:  ldarg.1
-    IL_00cb:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_00d0:  nop
-    IL_00d1:  ldarg.0
-    IL_00d2:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeD::f16
-    IL_00d7:  ldarg.1
-    IL_00d8:  call       instance void MCCTest.VType7::Init(int32)
-    IL_00dd:  nop
-    IL_00de:  ldarg.0
-    IL_00df:  ldarg.1
-    IL_00e0:  stfld      uint32 MCCTest.VTypeD::f17
-    IL_00e5:  ret
-  } // end of method VTypeD::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Init() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.1
-    IL_0003:  call       instance void MCCTest.VTypeD::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VTypeD::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Zero() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance void MCCTest.VTypeD::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VTypeD::Zero
-
-  .method public hidebysig instance void
-          Add(valuetype MCCTest.VTypeD val) cil managed
-  {
-    // Code size       333 (0x14d)
-    .maxstack  3
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  dup
-    IL_0003:  ldfld      int32 MCCTest.VTypeD::f1
-    IL_0008:  ldarga.s   val
-    IL_000a:  ldfld      int32 MCCTest.VTypeD::f1
-    IL_000f:  add
-    IL_0010:  stfld      int32 MCCTest.VTypeD::f1
-    IL_0015:  ldarg.0
-    IL_0016:  ldflda     valuetype MCCTest.VType3 MCCTest.VTypeD::f2
-    IL_001b:  ldarga.s   val
-    IL_001d:  ldfld      valuetype MCCTest.VType3 MCCTest.VTypeD::f2
-    IL_0022:  call       instance void MCCTest.VType3::Add(valuetype MCCTest.VType3)
-    IL_0027:  nop
-    IL_0028:  ldarg.0
-    IL_0029:  dup
-    IL_002a:  ldfld      float64 MCCTest.VTypeD::f3
-    IL_002f:  ldarga.s   val
-    IL_0031:  ldfld      float64 MCCTest.VTypeD::f3
-    IL_0036:  add
-    IL_0037:  stfld      float64 MCCTest.VTypeD::f3
-    IL_003c:  ldarg.0
-    IL_003d:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f4
-    IL_0042:  ldarga.s   val
-    IL_0044:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f4
-    IL_0049:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_004e:  nop
-    IL_004f:  ldarg.0
-    IL_0050:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeD::f5
-    IL_0055:  ldarga.s   val
-    IL_0057:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeD::f5
-    IL_005c:  call       instance void MCCTest.VType7::Add(valuetype MCCTest.VType7)
-    IL_0061:  nop
-    IL_0062:  ldarg.0
-    IL_0063:  dup
-    IL_0064:  ldfld      uint64 MCCTest.VTypeD::f6
-    IL_0069:  ldarga.s   val
-    IL_006b:  ldfld      uint64 MCCTest.VTypeD::f6
-    IL_0070:  add
-    IL_0071:  stfld      uint64 MCCTest.VTypeD::f6
-    IL_0076:  ldarg.0
-    IL_0077:  dup
-    IL_0078:  ldfld      float32 MCCTest.VTypeD::f7
-    IL_007d:  ldarga.s   val
-    IL_007f:  ldfld      float32 MCCTest.VTypeD::f7
-    IL_0084:  add
-    IL_0085:  stfld      float32 MCCTest.VTypeD::f7
-    IL_008a:  ldarg.0
-    IL_008b:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f8
-    IL_0090:  ldarga.s   val
-    IL_0092:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f8
-    IL_0097:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_009c:  nop
-    IL_009d:  ldarg.0
-    IL_009e:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeD::f9
-    IL_00a3:  ldarga.s   val
-    IL_00a5:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeD::f9
-    IL_00aa:  call       instance void MCCTest.VType6::Add(valuetype MCCTest.VType6)
-    IL_00af:  nop
-    IL_00b0:  ldarg.0
-    IL_00b1:  dup
-    IL_00b2:  ldfld      float64 MCCTest.VTypeD::f10
-    IL_00b7:  ldarga.s   val
-    IL_00b9:  ldfld      float64 MCCTest.VTypeD::f10
-    IL_00be:  add
-    IL_00bf:  stfld      float64 MCCTest.VTypeD::f10
-    IL_00c4:  ldarg.0
-    IL_00c5:  dup
-    IL_00c6:  ldfld      int16 MCCTest.VTypeD::f11
-    IL_00cb:  ldarga.s   val
-    IL_00cd:  ldfld      int16 MCCTest.VTypeD::f11
-    IL_00d2:  add
-    IL_00d3:  conv.i2
-    IL_00d4:  stfld      int16 MCCTest.VTypeD::f11
-    IL_00d9:  ldarg.0
-    IL_00da:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f12
-    IL_00df:  ldarga.s   val
-    IL_00e1:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f12
-    IL_00e6:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_00eb:  nop
-    IL_00ec:  ldarg.0
-    IL_00ed:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeD::f13
-    IL_00f2:  ldarga.s   val
-    IL_00f4:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeD::f13
-    IL_00f9:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_00fe:  nop
-    IL_00ff:  ldarg.0
-    IL_0100:  ldflda     valuetype MCCTest.VType3 MCCTest.VTypeD::f14
-    IL_0105:  ldarga.s   val
-    IL_0107:  ldfld      valuetype MCCTest.VType3 MCCTest.VTypeD::f14
-    IL_010c:  call       instance void MCCTest.VType3::Add(valuetype MCCTest.VType3)
-    IL_0111:  nop
-    IL_0112:  ldarg.0
-    IL_0113:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f15
-    IL_0118:  ldarga.s   val
-    IL_011a:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f15
-    IL_011f:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_0124:  nop
-    IL_0125:  ldarg.0
-    IL_0126:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeD::f16
-    IL_012b:  ldarga.s   val
-    IL_012d:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeD::f16
-    IL_0132:  call       instance void MCCTest.VType7::Add(valuetype MCCTest.VType7)
-    IL_0137:  nop
-    IL_0138:  ldarg.0
-    IL_0139:  dup
-    IL_013a:  ldfld      uint32 MCCTest.VTypeD::f17
-    IL_013f:  ldarga.s   val
-    IL_0141:  ldfld      uint32 MCCTest.VTypeD::f17
-    IL_0146:  add
-    IL_0147:  stfld      uint32 MCCTest.VTypeD::f17
-    IL_014c:  ret
-  } // end of method VTypeD::Add
-
-  .method public hidebysig newslot virtual final
-          instance void  Check(valuetype MCCTest.VTypeD expected) cil managed
-  {
-    // Code size       742 (0x2e6)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.Type V_1,
-             class MCCTest.ResultVerificationException V_2,
-             bool V_3)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldfld      int32 MCCTest.VTypeD::f1
-    IL_0007:  ldarga.s   expected
-    IL_0009:  ldfld      int32 MCCTest.VTypeD::f1
-    IL_000e:  ceq
-    IL_0010:  stloc.3
-    IL_0011:  ldloc.3
-    IL_0012:  brtrue.s   IL_002f
-
-    IL_0014:  nop
-    IL_0015:  ldstr      "f1"
-    IL_001a:  ldarg.0
-    IL_001b:  ldfld      int32 MCCTest.VTypeD::f1
-    IL_0020:  conv.i8
-    IL_0021:  ldarga.s   expected
-    IL_0023:  ldfld      int32 MCCTest.VTypeD::f1
-    IL_0028:  conv.i8
-    IL_0029:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_002e:  throw
-
-    IL_002f:  ldarg.0
-    IL_0030:  ldfld      float64 MCCTest.VTypeD::f3
-    IL_0035:  ldarga.s   expected
-    IL_0037:  ldfld      float64 MCCTest.VTypeD::f3
-    IL_003c:  ceq
-    IL_003e:  stloc.3
-    IL_003f:  ldloc.3
-    IL_0040:  brtrue.s   IL_005b
-
-    IL_0042:  nop
-    IL_0043:  ldstr      "f3"
-    IL_0048:  ldarg.0
-    IL_0049:  ldfld      float64 MCCTest.VTypeD::f3
-    IL_004e:  ldarga.s   expected
-    IL_0050:  ldfld      float64 MCCTest.VTypeD::f3
-    IL_0055:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_005a:  throw
-
-    IL_005b:  ldarg.0
-    IL_005c:  ldfld      uint64 MCCTest.VTypeD::f6
-    IL_0061:  ldarga.s   expected
-    IL_0063:  ldfld      uint64 MCCTest.VTypeD::f6
-    IL_0068:  ceq
-    IL_006a:  stloc.3
-    IL_006b:  ldloc.3
-    IL_006c:  brtrue.s   IL_008b
-
-    IL_006e:  nop
-    IL_006f:  ldstr      "f6"
-    IL_0074:  ldarg.0
-    IL_0075:  ldfld      uint64 MCCTest.VTypeD::f6
-    IL_007a:  conv.r.un
-    IL_007b:  conv.r8
-    IL_007c:  ldarga.s   expected
-    IL_007e:  ldfld      uint64 MCCTest.VTypeD::f6
-    IL_0083:  conv.r.un
-    IL_0084:  conv.r8
-    IL_0085:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_008a:  throw
-
-    IL_008b:  ldarg.0
-    IL_008c:  ldfld      float32 MCCTest.VTypeD::f7
-    IL_0091:  ldarga.s   expected
-    IL_0093:  ldfld      float32 MCCTest.VTypeD::f7
-    IL_0098:  ceq
-    IL_009a:  stloc.3
-    IL_009b:  ldloc.3
-    IL_009c:  brtrue.s   IL_00b9
-
-    IL_009e:  nop
-    IL_009f:  ldstr      "f7"
-    IL_00a4:  ldarg.0
-    IL_00a5:  ldfld      float32 MCCTest.VTypeD::f7
-    IL_00aa:  conv.r8
-    IL_00ab:  ldarga.s   expected
-    IL_00ad:  ldfld      float32 MCCTest.VTypeD::f7
-    IL_00b2:  conv.r8
-    IL_00b3:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_00b8:  throw
-
-    IL_00b9:  ldarg.0
-    IL_00ba:  ldfld      float64 MCCTest.VTypeD::f10
-    IL_00bf:  ldarga.s   expected
-    IL_00c1:  ldfld      float64 MCCTest.VTypeD::f10
-    IL_00c6:  ceq
-    IL_00c8:  stloc.3
-    IL_00c9:  ldloc.3
-    IL_00ca:  brtrue.s   IL_00e5
-
-    IL_00cc:  nop
-    IL_00cd:  ldstr      "f10"
-    IL_00d2:  ldarg.0
-    IL_00d3:  ldfld      float64 MCCTest.VTypeD::f10
-    IL_00d8:  ldarga.s   expected
-    IL_00da:  ldfld      float64 MCCTest.VTypeD::f10
-    IL_00df:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_00e4:  throw
-
-    IL_00e5:  ldarg.0
-    IL_00e6:  ldfld      int16 MCCTest.VTypeD::f11
-    IL_00eb:  ldarga.s   expected
-    IL_00ed:  ldfld      int16 MCCTest.VTypeD::f11
-    IL_00f2:  ceq
-    IL_00f4:  stloc.3
-    IL_00f5:  ldloc.3
-    IL_00f6:  brtrue.s   IL_0113
-
-    IL_00f8:  nop
-    IL_00f9:  ldstr      "f11"
-    IL_00fe:  ldarg.0
-    IL_00ff:  ldfld      int16 MCCTest.VTypeD::f11
-    IL_0104:  conv.i8
-    IL_0105:  ldarga.s   expected
-    IL_0107:  ldfld      int16 MCCTest.VTypeD::f11
-    IL_010c:  conv.i8
-    IL_010d:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_0112:  throw
-
-    IL_0113:  ldarg.0
-    IL_0114:  ldfld      uint32 MCCTest.VTypeD::f17
-    IL_0119:  ldarga.s   expected
-    IL_011b:  ldfld      uint32 MCCTest.VTypeD::f17
-    IL_0120:  ceq
-    IL_0122:  stloc.3
-    IL_0123:  ldloc.3
-    IL_0124:  brtrue.s   IL_0141
-
-    IL_0126:  nop
-    IL_0127:  ldstr      "f17"
-    IL_012c:  ldarg.0
-    IL_012d:  ldfld      uint32 MCCTest.VTypeD::f17
-    IL_0132:  conv.u8
-    IL_0133:  ldarga.s   expected
-    IL_0135:  ldfld      uint32 MCCTest.VTypeD::f17
-    IL_013a:  conv.u8
-    IL_013b:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_0140:  throw
-
-    IL_0141:  ldnull
-    IL_0142:  stloc.0
-    IL_0143:  ldnull
-    IL_0144:  stloc.1
-    .try
-    {
-      IL_0145:  nop
-      IL_0146:  ldstr      "f2"
-      IL_014b:  stloc.0
-      IL_014c:  ldarg.0
-      IL_014d:  ldfld      valuetype MCCTest.VType3 MCCTest.VTypeD::f2
-      IL_0152:  box        MCCTest.VType3
-      IL_0157:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_015c:  stloc.1
-      IL_015d:  ldarg.0
-      IL_015e:  ldflda     valuetype MCCTest.VType3 MCCTest.VTypeD::f2
-      IL_0163:  ldarga.s   expected
-      IL_0165:  ldfld      valuetype MCCTest.VType3 MCCTest.VTypeD::f2
-      IL_016a:  call       instance void MCCTest.VType3::Check(valuetype MCCTest.VType3)
-      IL_016f:  nop
-      IL_0170:  ldstr      "f4"
-      IL_0175:  stloc.0
-      IL_0176:  ldarg.0
-      IL_0177:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f4
-      IL_017c:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0181:  stloc.1
-      IL_0182:  ldarg.0
-      IL_0183:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f4
-      IL_0188:  ldarga.s   expected
-      IL_018a:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f4
-      IL_018f:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_0194:  nop
-      IL_0195:  ldstr      "f5"
-      IL_019a:  stloc.0
-      IL_019b:  ldarg.0
-      IL_019c:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeD::f5
-      IL_01a1:  box        MCCTest.VType7
-      IL_01a6:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_01ab:  stloc.1
-      IL_01ac:  ldarg.0
-      IL_01ad:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeD::f5
-      IL_01b2:  ldarga.s   expected
-      IL_01b4:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeD::f5
-      IL_01b9:  call       instance void MCCTest.VType7::Check(valuetype MCCTest.VType7)
-      IL_01be:  nop
-      IL_01bf:  ldstr      "f8"
-      IL_01c4:  stloc.0
-      IL_01c5:  ldarg.0
-      IL_01c6:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f8
-      IL_01cb:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_01d0:  stloc.1
-      IL_01d1:  ldarg.0
-      IL_01d2:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f8
-      IL_01d7:  ldarga.s   expected
-      IL_01d9:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f8
-      IL_01de:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_01e3:  nop
-      IL_01e4:  ldstr      "f9"
-      IL_01e9:  stloc.0
-      IL_01ea:  ldarg.0
-      IL_01eb:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeD::f9
-      IL_01f0:  box        MCCTest.VType6
-      IL_01f5:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_01fa:  stloc.1
-      IL_01fb:  ldarg.0
-      IL_01fc:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeD::f9
-      IL_0201:  ldarga.s   expected
-      IL_0203:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeD::f9
-      IL_0208:  call       instance void MCCTest.VType6::Check(valuetype MCCTest.VType6)
-      IL_020d:  nop
-      IL_020e:  ldstr      "f12"
-      IL_0213:  stloc.0
-      IL_0214:  ldarg.0
-      IL_0215:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f12
-      IL_021a:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_021f:  stloc.1
-      IL_0220:  ldarg.0
-      IL_0221:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f12
-      IL_0226:  ldarga.s   expected
-      IL_0228:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f12
-      IL_022d:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_0232:  nop
-      IL_0233:  ldstr      "f13"
-      IL_0238:  stloc.0
-      IL_0239:  ldarg.0
-      IL_023a:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeD::f13
-      IL_023f:  box        MCCTest.VType5
-      IL_0244:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0249:  stloc.1
-      IL_024a:  ldarg.0
-      IL_024b:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeD::f13
-      IL_0250:  ldarga.s   expected
-      IL_0252:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeD::f13
-      IL_0257:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_025c:  nop
-      IL_025d:  ldstr      "f14"
-      IL_0262:  stloc.0
-      IL_0263:  ldarg.0
-      IL_0264:  ldfld      valuetype MCCTest.VType3 MCCTest.VTypeD::f14
-      IL_0269:  box        MCCTest.VType3
-      IL_026e:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0273:  stloc.1
-      IL_0274:  ldarg.0
-      IL_0275:  ldflda     valuetype MCCTest.VType3 MCCTest.VTypeD::f14
-      IL_027a:  ldarga.s   expected
-      IL_027c:  ldfld      valuetype MCCTest.VType3 MCCTest.VTypeD::f14
-      IL_0281:  call       instance void MCCTest.VType3::Check(valuetype MCCTest.VType3)
-      IL_0286:  nop
-      IL_0287:  ldstr      "f15"
-      IL_028c:  stloc.0
-      IL_028d:  ldarg.0
-      IL_028e:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f15
-      IL_0293:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0298:  stloc.1
-      IL_0299:  ldarg.0
-      IL_029a:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f15
-      IL_029f:  ldarga.s   expected
-      IL_02a1:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f15
-      IL_02a6:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_02ab:  nop
-      IL_02ac:  ldstr      "f16"
-      IL_02b1:  stloc.0
-      IL_02b2:  ldarg.0
-      IL_02b3:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeD::f16
-      IL_02b8:  box        MCCTest.VType7
-      IL_02bd:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_02c2:  stloc.1
-      IL_02c3:  ldarg.0
-      IL_02c4:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeD::f16
-      IL_02c9:  ldarga.s   expected
-      IL_02cb:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeD::f16
-      IL_02d0:  call       instance void MCCTest.VType7::Check(valuetype MCCTest.VType7)
-      IL_02d5:  nop
-      IL_02d6:  nop
-      IL_02d7:  leave.s    IL_02e4
-
-    }  // end .try
-    catch MCCTest.ResultVerificationException
-    {
-      IL_02d9:  stloc.2
-      IL_02da:  nop
-      IL_02db:  ldloc.0
-      IL_02dc:  ldloc.1
-      IL_02dd:  ldloc.2
-      IL_02de:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                    class [mscorlib]System.Type,
-                                                                                    class MCCTest.ResultVerificationException)
-      IL_02e3:  throw
-
-    }  // end handler
-    IL_02e4:  nop
-    IL_02e5:  ret
-  } // end of method VTypeD::Check
-
-  .method public hidebysig instance string
-          Dump(int32 level) cil managed
-  {
-    // Code size       808 (0x328)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.IO.StringWriter V_1,
-             string V_2)
-    IL_0000:  nop
-    IL_0001:  ldarg.1
-    IL_0002:  call       string MCCTest.FormatUtils::GetPadding(int32)
-    IL_0007:  stloc.0
-    IL_0008:  newobj     instance void [mscorlib]System.IO.StringWriter::.ctor()
-    IL_000d:  stloc.1
-    IL_000e:  ldloc.1
-    IL_000f:  ldloc.0
-    IL_0010:  ldstr      "f1    = "
-    IL_0015:  ldarg.0
-    IL_0016:  ldfld      int32 MCCTest.VTypeD::f1
-    IL_001b:  box        [mscorlib]System.Int32
-    IL_0020:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_0025:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_002a:  nop
-    IL_002b:  ldloc.1
-    IL_002c:  ldloc.0
-    IL_002d:  ldstr      "[Field f2] [Type '{0}']"
-    IL_0032:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0037:  ldarg.0
-    IL_0038:  ldfld      valuetype MCCTest.VType3 MCCTest.VTypeD::f2
-    IL_003d:  box        MCCTest.VType3
-    IL_0042:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0047:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_004c:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0051:  nop
-    IL_0052:  ldloc.1
-    IL_0053:  ldarg.0
-    IL_0054:  ldflda     valuetype MCCTest.VType3 MCCTest.VTypeD::f2
-    IL_0059:  ldarg.1
-    IL_005a:  ldc.i4.1
-    IL_005b:  add
-    IL_005c:  call       instance string MCCTest.VType3::Dump(int32)
-    IL_0061:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0066:  nop
-    IL_0067:  ldloc.1
-    IL_0068:  ldloc.0
-    IL_0069:  ldstr      "f3    = "
-    IL_006e:  ldarg.0
-    IL_006f:  ldfld      float64 MCCTest.VTypeD::f3
-    IL_0074:  box        [mscorlib]System.Double
-    IL_0079:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_007e:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0083:  nop
-    IL_0084:  ldloc.1
-    IL_0085:  ldloc.0
-    IL_0086:  ldstr      "[Field f4] [Type '{0}']"
-    IL_008b:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0090:  ldarg.0
-    IL_0091:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f4
-    IL_0096:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_009b:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_00a0:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_00a5:  nop
-    IL_00a6:  ldloc.1
-    IL_00a7:  ldarg.0
-    IL_00a8:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f4
-    IL_00ad:  ldarg.1
-    IL_00ae:  ldc.i4.1
-    IL_00af:  add
-    IL_00b0:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_00b5:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00ba:  nop
-    IL_00bb:  ldloc.1
-    IL_00bc:  ldloc.0
-    IL_00bd:  ldstr      "[Field f5] [Type '{0}']"
-    IL_00c2:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_00c7:  ldarg.0
-    IL_00c8:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeD::f5
-    IL_00cd:  box        MCCTest.VType7
-    IL_00d2:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_00d7:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_00dc:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_00e1:  nop
-    IL_00e2:  ldloc.1
-    IL_00e3:  ldarg.0
-    IL_00e4:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeD::f5
-    IL_00e9:  ldarg.1
-    IL_00ea:  ldc.i4.1
-    IL_00eb:  add
-    IL_00ec:  call       instance string MCCTest.VType7::Dump(int32)
-    IL_00f1:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00f6:  nop
-    IL_00f7:  ldloc.1
-    IL_00f8:  ldloc.0
-    IL_00f9:  ldstr      "f6    = "
-    IL_00fe:  ldarg.0
-    IL_00ff:  ldfld      uint64 MCCTest.VTypeD::f6
-    IL_0104:  box        [mscorlib]System.UInt64
-    IL_0109:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_010e:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0113:  nop
-    IL_0114:  ldloc.1
-    IL_0115:  ldloc.0
-    IL_0116:  ldstr      "f7    = "
-    IL_011b:  ldarg.0
-    IL_011c:  ldfld      float32 MCCTest.VTypeD::f7
-    IL_0121:  box        [mscorlib]System.Single
-    IL_0126:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_012b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0130:  nop
-    IL_0131:  ldloc.1
-    IL_0132:  ldloc.0
-    IL_0133:  ldstr      "[Field f8] [Type '{0}']"
-    IL_0138:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_013d:  ldarg.0
-    IL_013e:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f8
-    IL_0143:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0148:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_014d:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0152:  nop
-    IL_0153:  ldloc.1
-    IL_0154:  ldarg.0
-    IL_0155:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f8
-    IL_015a:  ldarg.1
-    IL_015b:  ldc.i4.1
-    IL_015c:  add
-    IL_015d:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_0162:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0167:  nop
-    IL_0168:  ldloc.1
-    IL_0169:  ldloc.0
-    IL_016a:  ldstr      "[Field f9] [Type '{0}']"
-    IL_016f:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0174:  ldarg.0
-    IL_0175:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeD::f9
-    IL_017a:  box        MCCTest.VType6
-    IL_017f:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0184:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0189:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_018e:  nop
-    IL_018f:  ldloc.1
-    IL_0190:  ldarg.0
-    IL_0191:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeD::f9
-    IL_0196:  ldarg.1
-    IL_0197:  ldc.i4.1
-    IL_0198:  add
-    IL_0199:  call       instance string MCCTest.VType6::Dump(int32)
-    IL_019e:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_01a3:  nop
-    IL_01a4:  ldloc.1
-    IL_01a5:  ldloc.0
-    IL_01a6:  ldstr      "f10    = "
-    IL_01ab:  ldarg.0
-    IL_01ac:  ldfld      float64 MCCTest.VTypeD::f10
-    IL_01b1:  box        [mscorlib]System.Double
-    IL_01b6:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_01bb:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_01c0:  nop
-    IL_01c1:  ldloc.1
-    IL_01c2:  ldloc.0
-    IL_01c3:  ldstr      "f11    = "
-    IL_01c8:  ldarg.0
-    IL_01c9:  ldfld      int16 MCCTest.VTypeD::f11
-    IL_01ce:  box        [mscorlib]System.Int16
-    IL_01d3:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_01d8:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_01dd:  nop
-    IL_01de:  ldloc.1
-    IL_01df:  ldloc.0
-    IL_01e0:  ldstr      "[Field f12] [Type '{0}']"
-    IL_01e5:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_01ea:  ldarg.0
-    IL_01eb:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f12
-    IL_01f0:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_01f5:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_01fa:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_01ff:  nop
-    IL_0200:  ldloc.1
-    IL_0201:  ldarg.0
-    IL_0202:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f12
-    IL_0207:  ldarg.1
-    IL_0208:  ldc.i4.1
-    IL_0209:  add
-    IL_020a:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_020f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0214:  nop
-    IL_0215:  ldloc.1
-    IL_0216:  ldloc.0
-    IL_0217:  ldstr      "[Field f13] [Type '{0}']"
-    IL_021c:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0221:  ldarg.0
-    IL_0222:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeD::f13
-    IL_0227:  box        MCCTest.VType5
-    IL_022c:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0231:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0236:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_023b:  nop
-    IL_023c:  ldloc.1
-    IL_023d:  ldarg.0
-    IL_023e:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeD::f13
-    IL_0243:  ldarg.1
-    IL_0244:  ldc.i4.1
-    IL_0245:  add
-    IL_0246:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_024b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0250:  nop
-    IL_0251:  ldloc.1
-    IL_0252:  ldloc.0
-    IL_0253:  ldstr      "[Field f14] [Type '{0}']"
-    IL_0258:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_025d:  ldarg.0
-    IL_025e:  ldfld      valuetype MCCTest.VType3 MCCTest.VTypeD::f14
-    IL_0263:  box        MCCTest.VType3
-    IL_0268:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_026d:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0272:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0277:  nop
-    IL_0278:  ldloc.1
-    IL_0279:  ldarg.0
-    IL_027a:  ldflda     valuetype MCCTest.VType3 MCCTest.VTypeD::f14
-    IL_027f:  ldarg.1
-    IL_0280:  ldc.i4.1
-    IL_0281:  add
-    IL_0282:  call       instance string MCCTest.VType3::Dump(int32)
-    IL_0287:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_028c:  nop
-    IL_028d:  ldloc.1
-    IL_028e:  ldloc.0
-    IL_028f:  ldstr      "[Field f15] [Type '{0}']"
-    IL_0294:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0299:  ldarg.0
-    IL_029a:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f15
-    IL_029f:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_02a4:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_02a9:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_02ae:  nop
-    IL_02af:  ldloc.1
-    IL_02b0:  ldarg.0
-    IL_02b1:  ldfld      class MCCTest.RType4 MCCTest.VTypeD::f15
-    IL_02b6:  ldarg.1
-    IL_02b7:  ldc.i4.1
-    IL_02b8:  add
-    IL_02b9:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_02be:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_02c3:  nop
-    IL_02c4:  ldloc.1
-    IL_02c5:  ldloc.0
-    IL_02c6:  ldstr      "[Field f16] [Type '{0}']"
-    IL_02cb:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_02d0:  ldarg.0
-    IL_02d1:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeD::f16
-    IL_02d6:  box        MCCTest.VType7
-    IL_02db:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_02e0:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_02e5:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_02ea:  nop
-    IL_02eb:  ldloc.1
-    IL_02ec:  ldarg.0
-    IL_02ed:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeD::f16
-    IL_02f2:  ldarg.1
-    IL_02f3:  ldc.i4.1
-    IL_02f4:  add
-    IL_02f5:  call       instance string MCCTest.VType7::Dump(int32)
-    IL_02fa:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_02ff:  nop
-    IL_0300:  ldloc.1
-    IL_0301:  ldloc.0
-    IL_0302:  ldstr      "f17    = "
-    IL_0307:  ldarg.0
-    IL_0308:  ldfld      uint32 MCCTest.VTypeD::f17
-    IL_030d:  box        [mscorlib]System.UInt32
-    IL_0312:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_0317:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_031c:  nop
-    IL_031d:  ldloc.1
-    IL_031e:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0323:  stloc.2
-    IL_0324:  br.s       IL_0326
-
-    IL_0326:  ldloc.2
-    IL_0327:  ret
-  } // end of method VTypeD::Dump
-
-  .method public hidebysig instance string
-          Dump() cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (string V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance string MCCTest.VTypeD::Dump(int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method VTypeD::Dump
-
-} // end of class MCCTest.VTypeD
-
-.class public sequential ansi sealed beforefieldinit MCCTest.VTypeE
-       extends [mscorlib]System.ValueType
-       implements class MCCTest.CType`1<valuetype MCCTest.VTypeE>
-{
-  .field public valuetype MCCTest.VType9 f1
-  .field public class MCCTest.RType4 f2
-  .field public valuetype MCCTest.VType8 f3
-  .field public int16 f4
-  .field public valuetype MCCTest.VType8 f5
-  .field public valuetype MCCTest.VType8 f6
-  .field public float64 f7
-  .field public valuetype MCCTest.VTypeA f8
-  .field public int16 f9
-  .field public valuetype MCCTest.VTypeD f10
-  .field public valuetype MCCTest.VType8 f11
-  .field public valuetype MCCTest.VTypeC f12
-  .field public valuetype MCCTest.VTypeA f13
-  .field public float32 f14
-  .field public int32 f15
-  .field public valuetype MCCTest.VTypeC f16
-  .field public valuetype MCCTest.VType7 f17
-  .field public float64 f18
-  .field public uint64 f19
-  .field public valuetype MCCTest.VType8 f20
-  .field public valuetype MCCTest.VType9 f21
-  .field public valuetype MCCTest.VTypeC f22
-  .field public valuetype MCCTest.VType8 f23
-  .method public hidebysig newslot virtual final
-          instance void  Init(int32 count) cil managed
-  {
-    // Code size       276 (0x114)
-    .maxstack  2
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0007:  stfld      class MCCTest.RType4 MCCTest.VTypeE::f2
-    IL_000c:  ldarg.0
-    IL_000d:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeE::f1
-    IL_0012:  ldarg.1
-    IL_0013:  call       instance void MCCTest.VType9::Init(int32)
-    IL_0018:  nop
-    IL_0019:  ldarg.0
-    IL_001a:  ldfld      class MCCTest.RType4 MCCTest.VTypeE::f2
-    IL_001f:  ldarg.1
-    IL_0020:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_0025:  nop
-    IL_0026:  ldarg.0
-    IL_0027:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f3
-    IL_002c:  ldarg.1
-    IL_002d:  call       instance void MCCTest.VType8::Init(int32)
-    IL_0032:  nop
-    IL_0033:  ldarg.0
-    IL_0034:  ldarg.1
-    IL_0035:  conv.i2
-    IL_0036:  stfld      int16 MCCTest.VTypeE::f4
-    IL_003b:  ldarg.0
-    IL_003c:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f5
-    IL_0041:  ldarg.1
-    IL_0042:  call       instance void MCCTest.VType8::Init(int32)
-    IL_0047:  nop
-    IL_0048:  ldarg.0
-    IL_0049:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f6
-    IL_004e:  ldarg.1
-    IL_004f:  call       instance void MCCTest.VType8::Init(int32)
-    IL_0054:  nop
-    IL_0055:  ldarg.0
-    IL_0056:  ldarg.1
-    IL_0057:  conv.r8
-    IL_0058:  stfld      float64 MCCTest.VTypeE::f7
-    IL_005d:  ldarg.0
-    IL_005e:  ldflda     valuetype MCCTest.VTypeA MCCTest.VTypeE::f8
-    IL_0063:  ldarg.1
-    IL_0064:  call       instance void MCCTest.VTypeA::Init(int32)
-    IL_0069:  nop
-    IL_006a:  ldarg.0
-    IL_006b:  ldarg.1
-    IL_006c:  conv.i2
-    IL_006d:  stfld      int16 MCCTest.VTypeE::f9
-    IL_0072:  ldarg.0
-    IL_0073:  ldflda     valuetype MCCTest.VTypeD MCCTest.VTypeE::f10
-    IL_0078:  ldarg.1
-    IL_0079:  call       instance void MCCTest.VTypeD::Init(int32)
-    IL_007e:  nop
-    IL_007f:  ldarg.0
-    IL_0080:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f11
-    IL_0085:  ldarg.1
-    IL_0086:  call       instance void MCCTest.VType8::Init(int32)
-    IL_008b:  nop
-    IL_008c:  ldarg.0
-    IL_008d:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeE::f12
-    IL_0092:  ldarg.1
-    IL_0093:  call       instance void MCCTest.VTypeC::Init(int32)
-    IL_0098:  nop
-    IL_0099:  ldarg.0
-    IL_009a:  ldflda     valuetype MCCTest.VTypeA MCCTest.VTypeE::f13
-    IL_009f:  ldarg.1
-    IL_00a0:  call       instance void MCCTest.VTypeA::Init(int32)
-    IL_00a5:  nop
-    IL_00a6:  ldarg.0
-    IL_00a7:  ldarg.1
-    IL_00a8:  conv.r4
-    IL_00a9:  stfld      float32 MCCTest.VTypeE::f14
-    IL_00ae:  ldarg.0
-    IL_00af:  ldarg.1
-    IL_00b0:  stfld      int32 MCCTest.VTypeE::f15
-    IL_00b5:  ldarg.0
-    IL_00b6:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeE::f16
-    IL_00bb:  ldarg.1
-    IL_00bc:  call       instance void MCCTest.VTypeC::Init(int32)
-    IL_00c1:  nop
-    IL_00c2:  ldarg.0
-    IL_00c3:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeE::f17
-    IL_00c8:  ldarg.1
-    IL_00c9:  call       instance void MCCTest.VType7::Init(int32)
-    IL_00ce:  nop
-    IL_00cf:  ldarg.0
-    IL_00d0:  ldarg.1
-    IL_00d1:  conv.r8
-    IL_00d2:  stfld      float64 MCCTest.VTypeE::f18
-    IL_00d7:  ldarg.0
-    IL_00d8:  ldarg.1
-    IL_00d9:  conv.i8
-    IL_00da:  stfld      uint64 MCCTest.VTypeE::f19
-    IL_00df:  ldarg.0
-    IL_00e0:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f20
-    IL_00e5:  ldarg.1
-    IL_00e6:  call       instance void MCCTest.VType8::Init(int32)
-    IL_00eb:  nop
-    IL_00ec:  ldarg.0
-    IL_00ed:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeE::f21
-    IL_00f2:  ldarg.1
-    IL_00f3:  call       instance void MCCTest.VType9::Init(int32)
-    IL_00f8:  nop
-    IL_00f9:  ldarg.0
-    IL_00fa:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeE::f22
-    IL_00ff:  ldarg.1
-    IL_0100:  call       instance void MCCTest.VTypeC::Init(int32)
-    IL_0105:  nop
-    IL_0106:  ldarg.0
-    IL_0107:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f23
-    IL_010c:  ldarg.1
-    IL_010d:  call       instance void MCCTest.VType8::Init(int32)
-    IL_0112:  nop
-    IL_0113:  ret
-  } // end of method VTypeE::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Init() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.1
-    IL_0003:  call       instance void MCCTest.VTypeE::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VTypeE::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Zero() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance void MCCTest.VTypeE::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VTypeE::Zero
-
-  .method public hidebysig instance void
-          Add(valuetype MCCTest.VTypeE val) cil managed
-  {
-    // Code size       448 (0x1c0)
-    .maxstack  3
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeE::f1
-    IL_0007:  ldarga.s   val
-    IL_0009:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeE::f1
-    IL_000e:  call       instance void MCCTest.VType9::Add(valuetype MCCTest.VType9)
-    IL_0013:  nop
-    IL_0014:  ldarg.0
-    IL_0015:  ldfld      class MCCTest.RType4 MCCTest.VTypeE::f2
-    IL_001a:  ldarga.s   val
-    IL_001c:  ldfld      class MCCTest.RType4 MCCTest.VTypeE::f2
-    IL_0021:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_0026:  nop
-    IL_0027:  ldarg.0
-    IL_0028:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f3
-    IL_002d:  ldarga.s   val
-    IL_002f:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f3
-    IL_0034:  call       instance void MCCTest.VType8::Add(valuetype MCCTest.VType8)
-    IL_0039:  nop
-    IL_003a:  ldarg.0
-    IL_003b:  dup
-    IL_003c:  ldfld      int16 MCCTest.VTypeE::f4
-    IL_0041:  ldarga.s   val
-    IL_0043:  ldfld      int16 MCCTest.VTypeE::f4
-    IL_0048:  add
-    IL_0049:  conv.i2
-    IL_004a:  stfld      int16 MCCTest.VTypeE::f4
-    IL_004f:  ldarg.0
-    IL_0050:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f5
-    IL_0055:  ldarga.s   val
-    IL_0057:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f5
-    IL_005c:  call       instance void MCCTest.VType8::Add(valuetype MCCTest.VType8)
-    IL_0061:  nop
-    IL_0062:  ldarg.0
-    IL_0063:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f6
-    IL_0068:  ldarga.s   val
-    IL_006a:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f6
-    IL_006f:  call       instance void MCCTest.VType8::Add(valuetype MCCTest.VType8)
-    IL_0074:  nop
-    IL_0075:  ldarg.0
-    IL_0076:  dup
-    IL_0077:  ldfld      float64 MCCTest.VTypeE::f7
-    IL_007c:  ldarga.s   val
-    IL_007e:  ldfld      float64 MCCTest.VTypeE::f7
-    IL_0083:  add
-    IL_0084:  stfld      float64 MCCTest.VTypeE::f7
-    IL_0089:  ldarg.0
-    IL_008a:  ldflda     valuetype MCCTest.VTypeA MCCTest.VTypeE::f8
-    IL_008f:  ldarga.s   val
-    IL_0091:  ldfld      valuetype MCCTest.VTypeA MCCTest.VTypeE::f8
-    IL_0096:  call       instance void MCCTest.VTypeA::Add(valuetype MCCTest.VTypeA)
-    IL_009b:  nop
-    IL_009c:  ldarg.0
-    IL_009d:  dup
-    IL_009e:  ldfld      int16 MCCTest.VTypeE::f9
-    IL_00a3:  ldarga.s   val
-    IL_00a5:  ldfld      int16 MCCTest.VTypeE::f9
-    IL_00aa:  add
-    IL_00ab:  conv.i2
-    IL_00ac:  stfld      int16 MCCTest.VTypeE::f9
-    IL_00b1:  ldarg.0
-    IL_00b2:  ldflda     valuetype MCCTest.VTypeD MCCTest.VTypeE::f10
-    IL_00b7:  ldarga.s   val
-    IL_00b9:  ldfld      valuetype MCCTest.VTypeD MCCTest.VTypeE::f10
-    IL_00be:  call       instance void MCCTest.VTypeD::Add(valuetype MCCTest.VTypeD)
-    IL_00c3:  nop
-    IL_00c4:  ldarg.0
-    IL_00c5:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f11
-    IL_00ca:  ldarga.s   val
-    IL_00cc:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f11
-    IL_00d1:  call       instance void MCCTest.VType8::Add(valuetype MCCTest.VType8)
-    IL_00d6:  nop
-    IL_00d7:  ldarg.0
-    IL_00d8:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeE::f12
-    IL_00dd:  ldarga.s   val
-    IL_00df:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeE::f12
-    IL_00e4:  call       instance void MCCTest.VTypeC::Add(valuetype MCCTest.VTypeC)
-    IL_00e9:  nop
-    IL_00ea:  ldarg.0
-    IL_00eb:  ldflda     valuetype MCCTest.VTypeA MCCTest.VTypeE::f13
-    IL_00f0:  ldarga.s   val
-    IL_00f2:  ldfld      valuetype MCCTest.VTypeA MCCTest.VTypeE::f13
-    IL_00f7:  call       instance void MCCTest.VTypeA::Add(valuetype MCCTest.VTypeA)
-    IL_00fc:  nop
-    IL_00fd:  ldarg.0
-    IL_00fe:  dup
-    IL_00ff:  ldfld      float32 MCCTest.VTypeE::f14
-    IL_0104:  ldarga.s   val
-    IL_0106:  ldfld      float32 MCCTest.VTypeE::f14
-    IL_010b:  add
-    IL_010c:  stfld      float32 MCCTest.VTypeE::f14
-    IL_0111:  ldarg.0
-    IL_0112:  dup
-    IL_0113:  ldfld      int32 MCCTest.VTypeE::f15
-    IL_0118:  ldarga.s   val
-    IL_011a:  ldfld      int32 MCCTest.VTypeE::f15
-    IL_011f:  add
-    IL_0120:  stfld      int32 MCCTest.VTypeE::f15
-    IL_0125:  ldarg.0
-    IL_0126:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeE::f16
-    IL_012b:  ldarga.s   val
-    IL_012d:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeE::f16
-    IL_0132:  call       instance void MCCTest.VTypeC::Add(valuetype MCCTest.VTypeC)
-    IL_0137:  nop
-    IL_0138:  ldarg.0
-    IL_0139:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeE::f17
-    IL_013e:  ldarga.s   val
-    IL_0140:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeE::f17
-    IL_0145:  call       instance void MCCTest.VType7::Add(valuetype MCCTest.VType7)
-    IL_014a:  nop
-    IL_014b:  ldarg.0
-    IL_014c:  dup
-    IL_014d:  ldfld      float64 MCCTest.VTypeE::f18
-    IL_0152:  ldarga.s   val
-    IL_0154:  ldfld      float64 MCCTest.VTypeE::f18
-    IL_0159:  add
-    IL_015a:  stfld      float64 MCCTest.VTypeE::f18
-    IL_015f:  ldarg.0
-    IL_0160:  dup
-    IL_0161:  ldfld      uint64 MCCTest.VTypeE::f19
-    IL_0166:  ldarga.s   val
-    IL_0168:  ldfld      uint64 MCCTest.VTypeE::f19
-    IL_016d:  add
-    IL_016e:  stfld      uint64 MCCTest.VTypeE::f19
-    IL_0173:  ldarg.0
-    IL_0174:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f20
-    IL_0179:  ldarga.s   val
-    IL_017b:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f20
-    IL_0180:  call       instance void MCCTest.VType8::Add(valuetype MCCTest.VType8)
-    IL_0185:  nop
-    IL_0186:  ldarg.0
-    IL_0187:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeE::f21
-    IL_018c:  ldarga.s   val
-    IL_018e:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeE::f21
-    IL_0193:  call       instance void MCCTest.VType9::Add(valuetype MCCTest.VType9)
-    IL_0198:  nop
-    IL_0199:  ldarg.0
-    IL_019a:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeE::f22
-    IL_019f:  ldarga.s   val
-    IL_01a1:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeE::f22
-    IL_01a6:  call       instance void MCCTest.VTypeC::Add(valuetype MCCTest.VTypeC)
-    IL_01ab:  nop
-    IL_01ac:  ldarg.0
-    IL_01ad:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f23
-    IL_01b2:  ldarga.s   val
-    IL_01b4:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f23
-    IL_01b9:  call       instance void MCCTest.VType8::Add(valuetype MCCTest.VType8)
-    IL_01be:  nop
-    IL_01bf:  ret
-  } // end of method VTypeE::Add
-
-  .method public hidebysig newslot virtual final
-          instance void  Check(valuetype MCCTest.VTypeE expected) cil managed
-  {
-    // Code size       1009 (0x3f1)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.Type V_1,
-             class MCCTest.ResultVerificationException V_2,
-             bool V_3)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldfld      int16 MCCTest.VTypeE::f4
-    IL_0007:  ldarga.s   expected
-    IL_0009:  ldfld      int16 MCCTest.VTypeE::f4
-    IL_000e:  ceq
-    IL_0010:  stloc.3
-    IL_0011:  ldloc.3
-    IL_0012:  brtrue.s   IL_002f
-
-    IL_0014:  nop
-    IL_0015:  ldstr      "f4"
-    IL_001a:  ldarg.0
-    IL_001b:  ldfld      int16 MCCTest.VTypeE::f4
-    IL_0020:  conv.i8
-    IL_0021:  ldarga.s   expected
-    IL_0023:  ldfld      int16 MCCTest.VTypeE::f4
-    IL_0028:  conv.i8
-    IL_0029:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_002e:  throw
-
-    IL_002f:  ldarg.0
-    IL_0030:  ldfld      float64 MCCTest.VTypeE::f7
-    IL_0035:  ldarga.s   expected
-    IL_0037:  ldfld      float64 MCCTest.VTypeE::f7
-    IL_003c:  ceq
-    IL_003e:  stloc.3
-    IL_003f:  ldloc.3
-    IL_0040:  brtrue.s   IL_005b
-
-    IL_0042:  nop
-    IL_0043:  ldstr      "f7"
-    IL_0048:  ldarg.0
-    IL_0049:  ldfld      float64 MCCTest.VTypeE::f7
-    IL_004e:  ldarga.s   expected
-    IL_0050:  ldfld      float64 MCCTest.VTypeE::f7
-    IL_0055:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_005a:  throw
-
-    IL_005b:  ldarg.0
-    IL_005c:  ldfld      int16 MCCTest.VTypeE::f9
-    IL_0061:  ldarga.s   expected
-    IL_0063:  ldfld      int16 MCCTest.VTypeE::f9
-    IL_0068:  ceq
-    IL_006a:  stloc.3
-    IL_006b:  ldloc.3
-    IL_006c:  brtrue.s   IL_0089
-
-    IL_006e:  nop
-    IL_006f:  ldstr      "f9"
-    IL_0074:  ldarg.0
-    IL_0075:  ldfld      int16 MCCTest.VTypeE::f9
-    IL_007a:  conv.i8
-    IL_007b:  ldarga.s   expected
-    IL_007d:  ldfld      int16 MCCTest.VTypeE::f9
-    IL_0082:  conv.i8
-    IL_0083:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_0088:  throw
-
-    IL_0089:  ldarg.0
-    IL_008a:  ldfld      float32 MCCTest.VTypeE::f14
-    IL_008f:  ldarga.s   expected
-    IL_0091:  ldfld      float32 MCCTest.VTypeE::f14
-    IL_0096:  ceq
-    IL_0098:  stloc.3
-    IL_0099:  ldloc.3
-    IL_009a:  brtrue.s   IL_00b7
-
-    IL_009c:  nop
-    IL_009d:  ldstr      "f14"
-    IL_00a2:  ldarg.0
-    IL_00a3:  ldfld      float32 MCCTest.VTypeE::f14
-    IL_00a8:  conv.r8
-    IL_00a9:  ldarga.s   expected
-    IL_00ab:  ldfld      float32 MCCTest.VTypeE::f14
-    IL_00b0:  conv.r8
-    IL_00b1:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_00b6:  throw
-
-    IL_00b7:  ldarg.0
-    IL_00b8:  ldfld      int32 MCCTest.VTypeE::f15
-    IL_00bd:  ldarga.s   expected
-    IL_00bf:  ldfld      int32 MCCTest.VTypeE::f15
-    IL_00c4:  ceq
-    IL_00c6:  stloc.3
-    IL_00c7:  ldloc.3
-    IL_00c8:  brtrue.s   IL_00e5
-
-    IL_00ca:  nop
-    IL_00cb:  ldstr      "f15"
-    IL_00d0:  ldarg.0
-    IL_00d1:  ldfld      int32 MCCTest.VTypeE::f15
-    IL_00d6:  conv.i8
-    IL_00d7:  ldarga.s   expected
-    IL_00d9:  ldfld      int32 MCCTest.VTypeE::f15
-    IL_00de:  conv.i8
-    IL_00df:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_00e4:  throw
-
-    IL_00e5:  ldarg.0
-    IL_00e6:  ldfld      float64 MCCTest.VTypeE::f18
-    IL_00eb:  ldarga.s   expected
-    IL_00ed:  ldfld      float64 MCCTest.VTypeE::f18
-    IL_00f2:  ceq
-    IL_00f4:  stloc.3
-    IL_00f5:  ldloc.3
-    IL_00f6:  brtrue.s   IL_0111
-
-    IL_00f8:  nop
-    IL_00f9:  ldstr      "f18"
-    IL_00fe:  ldarg.0
-    IL_00ff:  ldfld      float64 MCCTest.VTypeE::f18
-    IL_0104:  ldarga.s   expected
-    IL_0106:  ldfld      float64 MCCTest.VTypeE::f18
-    IL_010b:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_0110:  throw
-
-    IL_0111:  ldarg.0
-    IL_0112:  ldfld      uint64 MCCTest.VTypeE::f19
-    IL_0117:  ldarga.s   expected
-    IL_0119:  ldfld      uint64 MCCTest.VTypeE::f19
-    IL_011e:  ceq
-    IL_0120:  stloc.3
-    IL_0121:  ldloc.3
-    IL_0122:  brtrue.s   IL_0141
-
-    IL_0124:  nop
-    IL_0125:  ldstr      "f19"
-    IL_012a:  ldarg.0
-    IL_012b:  ldfld      uint64 MCCTest.VTypeE::f19
-    IL_0130:  conv.r.un
-    IL_0131:  conv.r8
-    IL_0132:  ldarga.s   expected
-    IL_0134:  ldfld      uint64 MCCTest.VTypeE::f19
-    IL_0139:  conv.r.un
-    IL_013a:  conv.r8
-    IL_013b:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_0140:  throw
-
-    IL_0141:  ldnull
-    IL_0142:  stloc.0
-    IL_0143:  ldnull
-    IL_0144:  stloc.1
-    .try
-    {
-      IL_0145:  nop
-      IL_0146:  ldstr      "f1"
-      IL_014b:  stloc.0
-      IL_014c:  ldarg.0
-      IL_014d:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeE::f1
-      IL_0152:  box        MCCTest.VType9
-      IL_0157:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_015c:  stloc.1
-      IL_015d:  ldarg.0
-      IL_015e:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeE::f1
-      IL_0163:  ldarga.s   expected
-      IL_0165:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeE::f1
-      IL_016a:  call       instance void MCCTest.VType9::Check(valuetype MCCTest.VType9)
-      IL_016f:  nop
-      IL_0170:  ldstr      "f2"
-      IL_0175:  stloc.0
-      IL_0176:  ldarg.0
-      IL_0177:  ldfld      class MCCTest.RType4 MCCTest.VTypeE::f2
-      IL_017c:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0181:  stloc.1
-      IL_0182:  ldarg.0
-      IL_0183:  ldfld      class MCCTest.RType4 MCCTest.VTypeE::f2
-      IL_0188:  ldarga.s   expected
-      IL_018a:  ldfld      class MCCTest.RType4 MCCTest.VTypeE::f2
-      IL_018f:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_0194:  nop
-      IL_0195:  ldstr      "f3"
-      IL_019a:  stloc.0
-      IL_019b:  ldarg.0
-      IL_019c:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f3
-      IL_01a1:  box        MCCTest.VType8
-      IL_01a6:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_01ab:  stloc.1
-      IL_01ac:  ldarg.0
-      IL_01ad:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f3
-      IL_01b2:  ldarga.s   expected
-      IL_01b4:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f3
-      IL_01b9:  call       instance void MCCTest.VType8::Check(valuetype MCCTest.VType8)
-      IL_01be:  nop
-      IL_01bf:  ldstr      "f5"
-      IL_01c4:  stloc.0
-      IL_01c5:  ldarg.0
-      IL_01c6:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f5
-      IL_01cb:  box        MCCTest.VType8
-      IL_01d0:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_01d5:  stloc.1
-      IL_01d6:  ldarg.0
-      IL_01d7:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f5
-      IL_01dc:  ldarga.s   expected
-      IL_01de:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f5
-      IL_01e3:  call       instance void MCCTest.VType8::Check(valuetype MCCTest.VType8)
-      IL_01e8:  nop
-      IL_01e9:  ldstr      "f6"
-      IL_01ee:  stloc.0
-      IL_01ef:  ldarg.0
-      IL_01f0:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f6
-      IL_01f5:  box        MCCTest.VType8
-      IL_01fa:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_01ff:  stloc.1
-      IL_0200:  ldarg.0
-      IL_0201:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f6
-      IL_0206:  ldarga.s   expected
-      IL_0208:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f6
-      IL_020d:  call       instance void MCCTest.VType8::Check(valuetype MCCTest.VType8)
-      IL_0212:  nop
-      IL_0213:  ldstr      "f8"
-      IL_0218:  stloc.0
-      IL_0219:  ldarg.0
-      IL_021a:  ldfld      valuetype MCCTest.VTypeA MCCTest.VTypeE::f8
-      IL_021f:  box        MCCTest.VTypeA
-      IL_0224:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0229:  stloc.1
-      IL_022a:  ldarg.0
-      IL_022b:  ldflda     valuetype MCCTest.VTypeA MCCTest.VTypeE::f8
-      IL_0230:  ldarga.s   expected
-      IL_0232:  ldfld      valuetype MCCTest.VTypeA MCCTest.VTypeE::f8
-      IL_0237:  call       instance void MCCTest.VTypeA::Check(valuetype MCCTest.VTypeA)
-      IL_023c:  nop
-      IL_023d:  ldstr      "f10"
-      IL_0242:  stloc.0
-      IL_0243:  ldarg.0
-      IL_0244:  ldfld      valuetype MCCTest.VTypeD MCCTest.VTypeE::f10
-      IL_0249:  box        MCCTest.VTypeD
-      IL_024e:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0253:  stloc.1
-      IL_0254:  ldarg.0
-      IL_0255:  ldflda     valuetype MCCTest.VTypeD MCCTest.VTypeE::f10
-      IL_025a:  ldarga.s   expected
-      IL_025c:  ldfld      valuetype MCCTest.VTypeD MCCTest.VTypeE::f10
-      IL_0261:  call       instance void MCCTest.VTypeD::Check(valuetype MCCTest.VTypeD)
-      IL_0266:  nop
-      IL_0267:  ldstr      "f11"
-      IL_026c:  stloc.0
-      IL_026d:  ldarg.0
-      IL_026e:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f11
-      IL_0273:  box        MCCTest.VType8
-      IL_0278:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_027d:  stloc.1
-      IL_027e:  ldarg.0
-      IL_027f:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f11
-      IL_0284:  ldarga.s   expected
-      IL_0286:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f11
-      IL_028b:  call       instance void MCCTest.VType8::Check(valuetype MCCTest.VType8)
-      IL_0290:  nop
-      IL_0291:  ldstr      "f12"
-      IL_0296:  stloc.0
-      IL_0297:  ldarg.0
-      IL_0298:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeE::f12
-      IL_029d:  box        MCCTest.VTypeC
-      IL_02a2:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_02a7:  stloc.1
-      IL_02a8:  ldarg.0
-      IL_02a9:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeE::f12
-      IL_02ae:  ldarga.s   expected
-      IL_02b0:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeE::f12
-      IL_02b5:  call       instance void MCCTest.VTypeC::Check(valuetype MCCTest.VTypeC)
-      IL_02ba:  nop
-      IL_02bb:  ldstr      "f13"
-      IL_02c0:  stloc.0
-      IL_02c1:  ldarg.0
-      IL_02c2:  ldfld      valuetype MCCTest.VTypeA MCCTest.VTypeE::f13
-      IL_02c7:  box        MCCTest.VTypeA
-      IL_02cc:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_02d1:  stloc.1
-      IL_02d2:  ldarg.0
-      IL_02d3:  ldflda     valuetype MCCTest.VTypeA MCCTest.VTypeE::f13
-      IL_02d8:  ldarga.s   expected
-      IL_02da:  ldfld      valuetype MCCTest.VTypeA MCCTest.VTypeE::f13
-      IL_02df:  call       instance void MCCTest.VTypeA::Check(valuetype MCCTest.VTypeA)
-      IL_02e4:  nop
-      IL_02e5:  ldstr      "f16"
-      IL_02ea:  stloc.0
-      IL_02eb:  ldarg.0
-      IL_02ec:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeE::f16
-      IL_02f1:  box        MCCTest.VTypeC
-      IL_02f6:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_02fb:  stloc.1
-      IL_02fc:  ldarg.0
-      IL_02fd:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeE::f16
-      IL_0302:  ldarga.s   expected
-      IL_0304:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeE::f16
-      IL_0309:  call       instance void MCCTest.VTypeC::Check(valuetype MCCTest.VTypeC)
-      IL_030e:  nop
-      IL_030f:  ldstr      "f17"
-      IL_0314:  stloc.0
-      IL_0315:  ldarg.0
-      IL_0316:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeE::f17
-      IL_031b:  box        MCCTest.VType7
-      IL_0320:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0325:  stloc.1
-      IL_0326:  ldarg.0
-      IL_0327:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeE::f17
-      IL_032c:  ldarga.s   expected
-      IL_032e:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeE::f17
-      IL_0333:  call       instance void MCCTest.VType7::Check(valuetype MCCTest.VType7)
-      IL_0338:  nop
-      IL_0339:  ldstr      "f20"
-      IL_033e:  stloc.0
-      IL_033f:  ldarg.0
-      IL_0340:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f20
-      IL_0345:  box        MCCTest.VType8
-      IL_034a:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_034f:  stloc.1
-      IL_0350:  ldarg.0
-      IL_0351:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f20
-      IL_0356:  ldarga.s   expected
-      IL_0358:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f20
-      IL_035d:  call       instance void MCCTest.VType8::Check(valuetype MCCTest.VType8)
-      IL_0362:  nop
-      IL_0363:  ldstr      "f21"
-      IL_0368:  stloc.0
-      IL_0369:  ldarg.0
-      IL_036a:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeE::f21
-      IL_036f:  box        MCCTest.VType9
-      IL_0374:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0379:  stloc.1
-      IL_037a:  ldarg.0
-      IL_037b:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeE::f21
-      IL_0380:  ldarga.s   expected
-      IL_0382:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeE::f21
-      IL_0387:  call       instance void MCCTest.VType9::Check(valuetype MCCTest.VType9)
-      IL_038c:  nop
-      IL_038d:  ldstr      "f22"
-      IL_0392:  stloc.0
-      IL_0393:  ldarg.0
-      IL_0394:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeE::f22
-      IL_0399:  box        MCCTest.VTypeC
-      IL_039e:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_03a3:  stloc.1
-      IL_03a4:  ldarg.0
-      IL_03a5:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeE::f22
-      IL_03aa:  ldarga.s   expected
-      IL_03ac:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeE::f22
-      IL_03b1:  call       instance void MCCTest.VTypeC::Check(valuetype MCCTest.VTypeC)
-      IL_03b6:  nop
-      IL_03b7:  ldstr      "f23"
-      IL_03bc:  stloc.0
-      IL_03bd:  ldarg.0
-      IL_03be:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f23
-      IL_03c3:  box        MCCTest.VType8
-      IL_03c8:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_03cd:  stloc.1
-      IL_03ce:  ldarg.0
-      IL_03cf:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f23
-      IL_03d4:  ldarga.s   expected
-      IL_03d6:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f23
-      IL_03db:  call       instance void MCCTest.VType8::Check(valuetype MCCTest.VType8)
-      IL_03e0:  nop
-      IL_03e1:  nop
-      IL_03e2:  leave.s    IL_03ef
-
-    }  // end .try
-    catch MCCTest.ResultVerificationException
-    {
-      IL_03e4:  stloc.2
-      IL_03e5:  nop
-      IL_03e6:  ldloc.0
-      IL_03e7:  ldloc.1
-      IL_03e8:  ldloc.2
-      IL_03e9:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                    class [mscorlib]System.Type,
-                                                                                    class MCCTest.ResultVerificationException)
-      IL_03ee:  throw
-
-    }  // end handler
-    IL_03ef:  nop
-    IL_03f0:  ret
-  } // end of method VTypeE::Check
-
-  .method public hidebysig instance string
-          Dump(int32 level) cil managed
-  {
-    // Code size       1183 (0x49f)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.IO.StringWriter V_1,
-             string V_2)
-    IL_0000:  nop
-    IL_0001:  ldarg.1
-    IL_0002:  call       string MCCTest.FormatUtils::GetPadding(int32)
-    IL_0007:  stloc.0
-    IL_0008:  newobj     instance void [mscorlib]System.IO.StringWriter::.ctor()
-    IL_000d:  stloc.1
-    IL_000e:  ldloc.1
-    IL_000f:  ldloc.0
-    IL_0010:  ldstr      "[Field f1] [Type '{0}']"
-    IL_0015:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_001a:  ldarg.0
-    IL_001b:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeE::f1
-    IL_0020:  box        MCCTest.VType9
-    IL_0025:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_002a:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_002f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0034:  nop
-    IL_0035:  ldloc.1
-    IL_0036:  ldarg.0
-    IL_0037:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeE::f1
-    IL_003c:  ldarg.1
-    IL_003d:  ldc.i4.1
-    IL_003e:  add
-    IL_003f:  call       instance string MCCTest.VType9::Dump(int32)
-    IL_0044:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0049:  nop
-    IL_004a:  ldloc.1
-    IL_004b:  ldloc.0
-    IL_004c:  ldstr      "[Field f2] [Type '{0}']"
-    IL_0051:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0056:  ldarg.0
-    IL_0057:  ldfld      class MCCTest.RType4 MCCTest.VTypeE::f2
-    IL_005c:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0061:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0066:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_006b:  nop
-    IL_006c:  ldloc.1
-    IL_006d:  ldarg.0
-    IL_006e:  ldfld      class MCCTest.RType4 MCCTest.VTypeE::f2
-    IL_0073:  ldarg.1
-    IL_0074:  ldc.i4.1
-    IL_0075:  add
-    IL_0076:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_007b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0080:  nop
-    IL_0081:  ldloc.1
-    IL_0082:  ldloc.0
-    IL_0083:  ldstr      "[Field f3] [Type '{0}']"
-    IL_0088:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_008d:  ldarg.0
-    IL_008e:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f3
-    IL_0093:  box        MCCTest.VType8
-    IL_0098:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_009d:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_00a2:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_00a7:  nop
-    IL_00a8:  ldloc.1
-    IL_00a9:  ldarg.0
-    IL_00aa:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f3
-    IL_00af:  ldarg.1
-    IL_00b0:  ldc.i4.1
-    IL_00b1:  add
-    IL_00b2:  call       instance string MCCTest.VType8::Dump(int32)
-    IL_00b7:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00bc:  nop
-    IL_00bd:  ldloc.1
-    IL_00be:  ldloc.0
-    IL_00bf:  ldstr      "f4    = "
-    IL_00c4:  ldarg.0
-    IL_00c5:  ldfld      int16 MCCTest.VTypeE::f4
-    IL_00ca:  box        [mscorlib]System.Int16
-    IL_00cf:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_00d4:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00d9:  nop
-    IL_00da:  ldloc.1
-    IL_00db:  ldloc.0
-    IL_00dc:  ldstr      "[Field f5] [Type '{0}']"
-    IL_00e1:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_00e6:  ldarg.0
-    IL_00e7:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f5
-    IL_00ec:  box        MCCTest.VType8
-    IL_00f1:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_00f6:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_00fb:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0100:  nop
-    IL_0101:  ldloc.1
-    IL_0102:  ldarg.0
-    IL_0103:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f5
-    IL_0108:  ldarg.1
-    IL_0109:  ldc.i4.1
-    IL_010a:  add
-    IL_010b:  call       instance string MCCTest.VType8::Dump(int32)
-    IL_0110:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0115:  nop
-    IL_0116:  ldloc.1
-    IL_0117:  ldloc.0
-    IL_0118:  ldstr      "[Field f6] [Type '{0}']"
-    IL_011d:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0122:  ldarg.0
-    IL_0123:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f6
-    IL_0128:  box        MCCTest.VType8
-    IL_012d:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0132:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0137:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_013c:  nop
-    IL_013d:  ldloc.1
-    IL_013e:  ldarg.0
-    IL_013f:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f6
-    IL_0144:  ldarg.1
-    IL_0145:  ldc.i4.1
-    IL_0146:  add
-    IL_0147:  call       instance string MCCTest.VType8::Dump(int32)
-    IL_014c:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0151:  nop
-    IL_0152:  ldloc.1
-    IL_0153:  ldloc.0
-    IL_0154:  ldstr      "f7    = "
-    IL_0159:  ldarg.0
-    IL_015a:  ldfld      float64 MCCTest.VTypeE::f7
-    IL_015f:  box        [mscorlib]System.Double
-    IL_0164:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_0169:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_016e:  nop
-    IL_016f:  ldloc.1
-    IL_0170:  ldloc.0
-    IL_0171:  ldstr      "[Field f8] [Type '{0}']"
-    IL_0176:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_017b:  ldarg.0
-    IL_017c:  ldfld      valuetype MCCTest.VTypeA MCCTest.VTypeE::f8
-    IL_0181:  box        MCCTest.VTypeA
-    IL_0186:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_018b:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0190:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0195:  nop
-    IL_0196:  ldloc.1
-    IL_0197:  ldarg.0
-    IL_0198:  ldflda     valuetype MCCTest.VTypeA MCCTest.VTypeE::f8
-    IL_019d:  ldarg.1
-    IL_019e:  ldc.i4.1
-    IL_019f:  add
-    IL_01a0:  call       instance string MCCTest.VTypeA::Dump(int32)
-    IL_01a5:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_01aa:  nop
-    IL_01ab:  ldloc.1
-    IL_01ac:  ldloc.0
-    IL_01ad:  ldstr      "f9    = "
-    IL_01b2:  ldarg.0
-    IL_01b3:  ldfld      int16 MCCTest.VTypeE::f9
-    IL_01b8:  box        [mscorlib]System.Int16
-    IL_01bd:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_01c2:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_01c7:  nop
-    IL_01c8:  ldloc.1
-    IL_01c9:  ldloc.0
-    IL_01ca:  ldstr      "[Field f10] [Type '{0}']"
-    IL_01cf:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_01d4:  ldarg.0
-    IL_01d5:  ldfld      valuetype MCCTest.VTypeD MCCTest.VTypeE::f10
-    IL_01da:  box        MCCTest.VTypeD
-    IL_01df:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_01e4:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_01e9:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_01ee:  nop
-    IL_01ef:  ldloc.1
-    IL_01f0:  ldarg.0
-    IL_01f1:  ldflda     valuetype MCCTest.VTypeD MCCTest.VTypeE::f10
-    IL_01f6:  ldarg.1
-    IL_01f7:  ldc.i4.1
-    IL_01f8:  add
-    IL_01f9:  call       instance string MCCTest.VTypeD::Dump(int32)
-    IL_01fe:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0203:  nop
-    IL_0204:  ldloc.1
-    IL_0205:  ldloc.0
-    IL_0206:  ldstr      "[Field f11] [Type '{0}']"
-    IL_020b:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0210:  ldarg.0
-    IL_0211:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f11
-    IL_0216:  box        MCCTest.VType8
-    IL_021b:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0220:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0225:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_022a:  nop
-    IL_022b:  ldloc.1
-    IL_022c:  ldarg.0
-    IL_022d:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f11
-    IL_0232:  ldarg.1
-    IL_0233:  ldc.i4.1
-    IL_0234:  add
-    IL_0235:  call       instance string MCCTest.VType8::Dump(int32)
-    IL_023a:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_023f:  nop
-    IL_0240:  ldloc.1
-    IL_0241:  ldloc.0
-    IL_0242:  ldstr      "[Field f12] [Type '{0}']"
-    IL_0247:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_024c:  ldarg.0
-    IL_024d:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeE::f12
-    IL_0252:  box        MCCTest.VTypeC
-    IL_0257:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_025c:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0261:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0266:  nop
-    IL_0267:  ldloc.1
-    IL_0268:  ldarg.0
-    IL_0269:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeE::f12
-    IL_026e:  ldarg.1
-    IL_026f:  ldc.i4.1
-    IL_0270:  add
-    IL_0271:  call       instance string MCCTest.VTypeC::Dump(int32)
-    IL_0276:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_027b:  nop
-    IL_027c:  ldloc.1
-    IL_027d:  ldloc.0
-    IL_027e:  ldstr      "[Field f13] [Type '{0}']"
-    IL_0283:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0288:  ldarg.0
-    IL_0289:  ldfld      valuetype MCCTest.VTypeA MCCTest.VTypeE::f13
-    IL_028e:  box        MCCTest.VTypeA
-    IL_0293:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0298:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_029d:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_02a2:  nop
-    IL_02a3:  ldloc.1
-    IL_02a4:  ldarg.0
-    IL_02a5:  ldflda     valuetype MCCTest.VTypeA MCCTest.VTypeE::f13
-    IL_02aa:  ldarg.1
-    IL_02ab:  ldc.i4.1
-    IL_02ac:  add
-    IL_02ad:  call       instance string MCCTest.VTypeA::Dump(int32)
-    IL_02b2:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_02b7:  nop
-    IL_02b8:  ldloc.1
-    IL_02b9:  ldloc.0
-    IL_02ba:  ldstr      "f14    = "
-    IL_02bf:  ldarg.0
-    IL_02c0:  ldfld      float32 MCCTest.VTypeE::f14
-    IL_02c5:  box        [mscorlib]System.Single
-    IL_02ca:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_02cf:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_02d4:  nop
-    IL_02d5:  ldloc.1
-    IL_02d6:  ldloc.0
-    IL_02d7:  ldstr      "f15    = "
-    IL_02dc:  ldarg.0
-    IL_02dd:  ldfld      int32 MCCTest.VTypeE::f15
-    IL_02e2:  box        [mscorlib]System.Int32
-    IL_02e7:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_02ec:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_02f1:  nop
-    IL_02f2:  ldloc.1
-    IL_02f3:  ldloc.0
-    IL_02f4:  ldstr      "[Field f16] [Type '{0}']"
-    IL_02f9:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_02fe:  ldarg.0
-    IL_02ff:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeE::f16
-    IL_0304:  box        MCCTest.VTypeC
-    IL_0309:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_030e:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0313:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0318:  nop
-    IL_0319:  ldloc.1
-    IL_031a:  ldarg.0
-    IL_031b:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeE::f16
-    IL_0320:  ldarg.1
-    IL_0321:  ldc.i4.1
-    IL_0322:  add
-    IL_0323:  call       instance string MCCTest.VTypeC::Dump(int32)
-    IL_0328:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_032d:  nop
-    IL_032e:  ldloc.1
-    IL_032f:  ldloc.0
-    IL_0330:  ldstr      "[Field f17] [Type '{0}']"
-    IL_0335:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_033a:  ldarg.0
-    IL_033b:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeE::f17
-    IL_0340:  box        MCCTest.VType7
-    IL_0345:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_034a:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_034f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0354:  nop
-    IL_0355:  ldloc.1
-    IL_0356:  ldarg.0
-    IL_0357:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeE::f17
-    IL_035c:  ldarg.1
-    IL_035d:  ldc.i4.1
-    IL_035e:  add
-    IL_035f:  call       instance string MCCTest.VType7::Dump(int32)
-    IL_0364:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0369:  nop
-    IL_036a:  ldloc.1
-    IL_036b:  ldloc.0
-    IL_036c:  ldstr      "f18    = "
-    IL_0371:  ldarg.0
-    IL_0372:  ldfld      float64 MCCTest.VTypeE::f18
-    IL_0377:  box        [mscorlib]System.Double
-    IL_037c:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_0381:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0386:  nop
-    IL_0387:  ldloc.1
-    IL_0388:  ldloc.0
-    IL_0389:  ldstr      "f19    = "
-    IL_038e:  ldarg.0
-    IL_038f:  ldfld      uint64 MCCTest.VTypeE::f19
-    IL_0394:  box        [mscorlib]System.UInt64
-    IL_0399:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_039e:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_03a3:  nop
-    IL_03a4:  ldloc.1
-    IL_03a5:  ldloc.0
-    IL_03a6:  ldstr      "[Field f20] [Type '{0}']"
-    IL_03ab:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_03b0:  ldarg.0
-    IL_03b1:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f20
-    IL_03b6:  box        MCCTest.VType8
-    IL_03bb:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_03c0:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_03c5:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_03ca:  nop
-    IL_03cb:  ldloc.1
-    IL_03cc:  ldarg.0
-    IL_03cd:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f20
-    IL_03d2:  ldarg.1
-    IL_03d3:  ldc.i4.1
-    IL_03d4:  add
-    IL_03d5:  call       instance string MCCTest.VType8::Dump(int32)
-    IL_03da:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_03df:  nop
-    IL_03e0:  ldloc.1
-    IL_03e1:  ldloc.0
-    IL_03e2:  ldstr      "[Field f21] [Type '{0}']"
-    IL_03e7:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_03ec:  ldarg.0
-    IL_03ed:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeE::f21
-    IL_03f2:  box        MCCTest.VType9
-    IL_03f7:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_03fc:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0401:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0406:  nop
-    IL_0407:  ldloc.1
-    IL_0408:  ldarg.0
-    IL_0409:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeE::f21
-    IL_040e:  ldarg.1
-    IL_040f:  ldc.i4.1
-    IL_0410:  add
-    IL_0411:  call       instance string MCCTest.VType9::Dump(int32)
-    IL_0416:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_041b:  nop
-    IL_041c:  ldloc.1
-    IL_041d:  ldloc.0
-    IL_041e:  ldstr      "[Field f22] [Type '{0}']"
-    IL_0423:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0428:  ldarg.0
-    IL_0429:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeE::f22
-    IL_042e:  box        MCCTest.VTypeC
-    IL_0433:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0438:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_043d:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0442:  nop
-    IL_0443:  ldloc.1
-    IL_0444:  ldarg.0
-    IL_0445:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeE::f22
-    IL_044a:  ldarg.1
-    IL_044b:  ldc.i4.1
-    IL_044c:  add
-    IL_044d:  call       instance string MCCTest.VTypeC::Dump(int32)
-    IL_0452:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0457:  nop
-    IL_0458:  ldloc.1
-    IL_0459:  ldloc.0
-    IL_045a:  ldstr      "[Field f23] [Type '{0}']"
-    IL_045f:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0464:  ldarg.0
-    IL_0465:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeE::f23
-    IL_046a:  box        MCCTest.VType8
-    IL_046f:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0474:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0479:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_047e:  nop
-    IL_047f:  ldloc.1
-    IL_0480:  ldarg.0
-    IL_0481:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeE::f23
-    IL_0486:  ldarg.1
-    IL_0487:  ldc.i4.1
-    IL_0488:  add
-    IL_0489:  call       instance string MCCTest.VType8::Dump(int32)
-    IL_048e:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0493:  nop
-    IL_0494:  ldloc.1
-    IL_0495:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_049a:  stloc.2
-    IL_049b:  br.s       IL_049d
-
-    IL_049d:  ldloc.2
-    IL_049e:  ret
-  } // end of method VTypeE::Dump
-
-  .method public hidebysig instance string
-          Dump() cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (string V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance string MCCTest.VTypeE::Dump(int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method VTypeE::Dump
-
-} // end of class MCCTest.VTypeE
-
-.class public sequential ansi sealed beforefieldinit MCCTest.VTypeF
-       extends [mscorlib]System.ValueType
-       implements class MCCTest.CType`1<valuetype MCCTest.VTypeF>
-{
-  .field public char f1
-  .field public float64 f2
-  .field public float32 f3
-  .field public valuetype MCCTest.VType9 f4
-  .field public class MCCTest.RType4 f5
-  .field public char f6
-  .field public float32 f7
-  .field public valuetype MCCTest.VTypeE f8
-  .field public valuetype MCCTest.VType8 f9
-  .field public valuetype MCCTest.VTypeE f10
-  .field public valuetype MCCTest.VType3 f11
-  .field public int64 f12
-  .field public uint16 f13
-  .field public valuetype MCCTest.VType5 f14
-  .field public valuetype MCCTest.VType9 f15
-  .field public class MCCTest.RType4 f16
-  .field public class MCCTest.RType4 f17
-  .field public int32 f18
-  .field public valuetype MCCTest.VType8 f19
-  .field public class MCCTest.RType4 f20
-  .field public valuetype MCCTest.VTypeA f21
-  .field public valuetype MCCTest.VTypeD f22
-  .field public valuetype MCCTest.VType9 f23
-  .field public valuetype MCCTest.VTypeE f24
-  .field public class MCCTest.RType4 f25
-  .field public float32 f26
-  .field public uint8 f27
-  .field public float64 f28
-  .field public valuetype MCCTest.VTypeC f29
-  .field public valuetype MCCTest.VType6 f30
-  .field public float64 f31
-  .field public valuetype MCCTest.VType8 f32
-  .field public int8 f33
-  .field public valuetype MCCTest.VTypeD f34
-  .field public valuetype MCCTest.VTypeE f35
-  .field public valuetype MCCTest.VTypeE f36
-  .field public valuetype MCCTest.VType7 f37
-  .field public valuetype MCCTest.VType9 f38
-  .field public valuetype MCCTest.VType8 f39
-  .field public float32 f40
-  .field public int32 f41
-  .field public char f42
-  .field public valuetype MCCTest.VTypeB f43
-  .field public valuetype MCCTest.VType8 f44
-  .method public hidebysig newslot virtual final
-          instance void  Init(int32 count) cil managed
-  {
-    // Code size       547 (0x223)
-    .maxstack  2
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0007:  stfld      class MCCTest.RType4 MCCTest.VTypeF::f5
-    IL_000c:  ldarg.0
-    IL_000d:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0012:  stfld      class MCCTest.RType4 MCCTest.VTypeF::f16
-    IL_0017:  ldarg.0
-    IL_0018:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_001d:  stfld      class MCCTest.RType4 MCCTest.VTypeF::f17
-    IL_0022:  ldarg.0
-    IL_0023:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0028:  stfld      class MCCTest.RType4 MCCTest.VTypeF::f20
-    IL_002d:  ldarg.0
-    IL_002e:  newobj     instance void MCCTest.RType4::.ctor()
-    IL_0033:  stfld      class MCCTest.RType4 MCCTest.VTypeF::f25
-    IL_0038:  ldarg.0
-    IL_0039:  ldarg.1
-    IL_003a:  conv.u2
-    IL_003b:  stfld      char MCCTest.VTypeF::f1
-    IL_0040:  ldarg.0
-    IL_0041:  ldarg.1
-    IL_0042:  conv.r8
-    IL_0043:  stfld      float64 MCCTest.VTypeF::f2
-    IL_0048:  ldarg.0
-    IL_0049:  ldarg.1
-    IL_004a:  conv.r4
-    IL_004b:  stfld      float32 MCCTest.VTypeF::f3
-    IL_0050:  ldarg.0
-    IL_0051:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f4
-    IL_0056:  ldarg.1
-    IL_0057:  call       instance void MCCTest.VType9::Init(int32)
-    IL_005c:  nop
-    IL_005d:  ldarg.0
-    IL_005e:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f5
-    IL_0063:  ldarg.1
-    IL_0064:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_0069:  nop
-    IL_006a:  ldarg.0
-    IL_006b:  ldarg.1
-    IL_006c:  conv.u2
-    IL_006d:  stfld      char MCCTest.VTypeF::f6
-    IL_0072:  ldarg.0
-    IL_0073:  ldarg.1
-    IL_0074:  conv.r4
-    IL_0075:  stfld      float32 MCCTest.VTypeF::f7
-    IL_007a:  ldarg.0
-    IL_007b:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f8
-    IL_0080:  ldarg.1
-    IL_0081:  call       instance void MCCTest.VTypeE::Init(int32)
-    IL_0086:  nop
-    IL_0087:  ldarg.0
-    IL_0088:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f9
-    IL_008d:  ldarg.1
-    IL_008e:  call       instance void MCCTest.VType8::Init(int32)
-    IL_0093:  nop
-    IL_0094:  ldarg.0
-    IL_0095:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f10
-    IL_009a:  ldarg.1
-    IL_009b:  call       instance void MCCTest.VTypeE::Init(int32)
-    IL_00a0:  nop
-    IL_00a1:  ldarg.0
-    IL_00a2:  ldflda     valuetype MCCTest.VType3 MCCTest.VTypeF::f11
-    IL_00a7:  ldarg.1
-    IL_00a8:  call       instance void MCCTest.VType3::Init(int32)
-    IL_00ad:  nop
-    IL_00ae:  ldarg.0
-    IL_00af:  ldarg.1
-    IL_00b0:  conv.i8
-    IL_00b1:  stfld      int64 MCCTest.VTypeF::f12
-    IL_00b6:  ldarg.0
-    IL_00b7:  ldarg.1
-    IL_00b8:  conv.u2
-    IL_00b9:  stfld      uint16 MCCTest.VTypeF::f13
-    IL_00be:  ldarg.0
-    IL_00bf:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeF::f14
-    IL_00c4:  ldarg.1
-    IL_00c5:  call       instance void MCCTest.VType5::Init(int32)
-    IL_00ca:  nop
-    IL_00cb:  ldarg.0
-    IL_00cc:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f15
-    IL_00d1:  ldarg.1
-    IL_00d2:  call       instance void MCCTest.VType9::Init(int32)
-    IL_00d7:  nop
-    IL_00d8:  ldarg.0
-    IL_00d9:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f16
-    IL_00de:  ldarg.1
-    IL_00df:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_00e4:  nop
-    IL_00e5:  ldarg.0
-    IL_00e6:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f17
-    IL_00eb:  ldarg.1
-    IL_00ec:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_00f1:  nop
-    IL_00f2:  ldarg.0
-    IL_00f3:  ldarg.1
-    IL_00f4:  stfld      int32 MCCTest.VTypeF::f18
-    IL_00f9:  ldarg.0
-    IL_00fa:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f19
-    IL_00ff:  ldarg.1
-    IL_0100:  call       instance void MCCTest.VType8::Init(int32)
-    IL_0105:  nop
-    IL_0106:  ldarg.0
-    IL_0107:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f20
-    IL_010c:  ldarg.1
-    IL_010d:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_0112:  nop
-    IL_0113:  ldarg.0
-    IL_0114:  ldflda     valuetype MCCTest.VTypeA MCCTest.VTypeF::f21
-    IL_0119:  ldarg.1
-    IL_011a:  call       instance void MCCTest.VTypeA::Init(int32)
-    IL_011f:  nop
-    IL_0120:  ldarg.0
-    IL_0121:  ldflda     valuetype MCCTest.VTypeD MCCTest.VTypeF::f22
-    IL_0126:  ldarg.1
-    IL_0127:  call       instance void MCCTest.VTypeD::Init(int32)
-    IL_012c:  nop
-    IL_012d:  ldarg.0
-    IL_012e:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f23
-    IL_0133:  ldarg.1
-    IL_0134:  call       instance void MCCTest.VType9::Init(int32)
-    IL_0139:  nop
-    IL_013a:  ldarg.0
-    IL_013b:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f24
-    IL_0140:  ldarg.1
-    IL_0141:  call       instance void MCCTest.VTypeE::Init(int32)
-    IL_0146:  nop
-    IL_0147:  ldarg.0
-    IL_0148:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f25
-    IL_014d:  ldarg.1
-    IL_014e:  callvirt   instance void MCCTest.RType4::Init(int32)
-    IL_0153:  nop
-    IL_0154:  ldarg.0
-    IL_0155:  ldarg.1
-    IL_0156:  conv.r4
-    IL_0157:  stfld      float32 MCCTest.VTypeF::f26
-    IL_015c:  ldarg.0
-    IL_015d:  ldarg.1
-    IL_015e:  conv.u1
-    IL_015f:  stfld      uint8 MCCTest.VTypeF::f27
-    IL_0164:  ldarg.0
-    IL_0165:  ldarg.1
-    IL_0166:  conv.r8
-    IL_0167:  stfld      float64 MCCTest.VTypeF::f28
-    IL_016c:  ldarg.0
-    IL_016d:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeF::f29
-    IL_0172:  ldarg.1
-    IL_0173:  call       instance void MCCTest.VTypeC::Init(int32)
-    IL_0178:  nop
-    IL_0179:  ldarg.0
-    IL_017a:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeF::f30
-    IL_017f:  ldarg.1
-    IL_0180:  call       instance void MCCTest.VType6::Init(int32)
-    IL_0185:  nop
-    IL_0186:  ldarg.0
-    IL_0187:  ldarg.1
-    IL_0188:  conv.r8
-    IL_0189:  stfld      float64 MCCTest.VTypeF::f31
-    IL_018e:  ldarg.0
-    IL_018f:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f32
-    IL_0194:  ldarg.1
-    IL_0195:  call       instance void MCCTest.VType8::Init(int32)
-    IL_019a:  nop
-    IL_019b:  ldarg.0
-    IL_019c:  ldarg.1
-    IL_019d:  conv.i1
-    IL_019e:  stfld      int8 MCCTest.VTypeF::f33
-    IL_01a3:  ldarg.0
-    IL_01a4:  ldflda     valuetype MCCTest.VTypeD MCCTest.VTypeF::f34
-    IL_01a9:  ldarg.1
-    IL_01aa:  call       instance void MCCTest.VTypeD::Init(int32)
-    IL_01af:  nop
-    IL_01b0:  ldarg.0
-    IL_01b1:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f35
-    IL_01b6:  ldarg.1
-    IL_01b7:  call       instance void MCCTest.VTypeE::Init(int32)
-    IL_01bc:  nop
-    IL_01bd:  ldarg.0
-    IL_01be:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f36
-    IL_01c3:  ldarg.1
-    IL_01c4:  call       instance void MCCTest.VTypeE::Init(int32)
-    IL_01c9:  nop
-    IL_01ca:  ldarg.0
-    IL_01cb:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeF::f37
-    IL_01d0:  ldarg.1
-    IL_01d1:  call       instance void MCCTest.VType7::Init(int32)
-    IL_01d6:  nop
-    IL_01d7:  ldarg.0
-    IL_01d8:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f38
-    IL_01dd:  ldarg.1
-    IL_01de:  call       instance void MCCTest.VType9::Init(int32)
-    IL_01e3:  nop
-    IL_01e4:  ldarg.0
-    IL_01e5:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f39
-    IL_01ea:  ldarg.1
-    IL_01eb:  call       instance void MCCTest.VType8::Init(int32)
-    IL_01f0:  nop
-    IL_01f1:  ldarg.0
-    IL_01f2:  ldarg.1
-    IL_01f3:  conv.r4
-    IL_01f4:  stfld      float32 MCCTest.VTypeF::f40
-    IL_01f9:  ldarg.0
-    IL_01fa:  ldarg.1
-    IL_01fb:  stfld      int32 MCCTest.VTypeF::f41
-    IL_0200:  ldarg.0
-    IL_0201:  ldarg.1
-    IL_0202:  conv.u2
-    IL_0203:  stfld      char MCCTest.VTypeF::f42
-    IL_0208:  ldarg.0
-    IL_0209:  ldflda     valuetype MCCTest.VTypeB MCCTest.VTypeF::f43
-    IL_020e:  ldarg.1
-    IL_020f:  call       instance void MCCTest.VTypeB::Init(int32)
-    IL_0214:  nop
-    IL_0215:  ldarg.0
-    IL_0216:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f44
-    IL_021b:  ldarg.1
-    IL_021c:  call       instance void MCCTest.VType8::Init(int32)
-    IL_0221:  nop
-    IL_0222:  ret
-  } // end of method VTypeF::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Init() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.1
-    IL_0003:  call       instance void MCCTest.VTypeF::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VTypeF::Init
-
-  .method public hidebysig newslot virtual final
-          instance void  Zero() cil managed
-  {
-    // Code size       10 (0xa)
-    .maxstack  8
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance void MCCTest.VTypeF::Init(int32)
-    IL_0008:  nop
-    IL_0009:  ret
-  } // end of method VTypeF::Zero
-
-  .method public hidebysig instance void
-          Add(valuetype MCCTest.VTypeF val) cil managed
-  {
-    // Code size       860 (0x35c)
-    .maxstack  3
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  dup
-    IL_0003:  ldfld      char MCCTest.VTypeF::f1
-    IL_0008:  ldarga.s   val
-    IL_000a:  ldfld      char MCCTest.VTypeF::f1
-    IL_000f:  add
-    IL_0010:  conv.u2
-    IL_0011:  stfld      char MCCTest.VTypeF::f1
-    IL_0016:  ldarg.0
-    IL_0017:  dup
-    IL_0018:  ldfld      float64 MCCTest.VTypeF::f2
-    IL_001d:  ldarga.s   val
-    IL_001f:  ldfld      float64 MCCTest.VTypeF::f2
-    IL_0024:  add
-    IL_0025:  stfld      float64 MCCTest.VTypeF::f2
-    IL_002a:  ldarg.0
-    IL_002b:  dup
-    IL_002c:  ldfld      float32 MCCTest.VTypeF::f3
-    IL_0031:  ldarga.s   val
-    IL_0033:  ldfld      float32 MCCTest.VTypeF::f3
-    IL_0038:  add
-    IL_0039:  stfld      float32 MCCTest.VTypeF::f3
-    IL_003e:  ldarg.0
-    IL_003f:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f4
-    IL_0044:  ldarga.s   val
-    IL_0046:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f4
-    IL_004b:  call       instance void MCCTest.VType9::Add(valuetype MCCTest.VType9)
-    IL_0050:  nop
-    IL_0051:  ldarg.0
-    IL_0052:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f5
-    IL_0057:  ldarga.s   val
-    IL_0059:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f5
-    IL_005e:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_0063:  nop
-    IL_0064:  ldarg.0
-    IL_0065:  dup
-    IL_0066:  ldfld      char MCCTest.VTypeF::f6
-    IL_006b:  ldarga.s   val
-    IL_006d:  ldfld      char MCCTest.VTypeF::f6
-    IL_0072:  add
-    IL_0073:  conv.u2
-    IL_0074:  stfld      char MCCTest.VTypeF::f6
-    IL_0079:  ldarg.0
-    IL_007a:  dup
-    IL_007b:  ldfld      float32 MCCTest.VTypeF::f7
-    IL_0080:  ldarga.s   val
-    IL_0082:  ldfld      float32 MCCTest.VTypeF::f7
-    IL_0087:  add
-    IL_0088:  stfld      float32 MCCTest.VTypeF::f7
-    IL_008d:  ldarg.0
-    IL_008e:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f8
-    IL_0093:  ldarga.s   val
-    IL_0095:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f8
-    IL_009a:  call       instance void MCCTest.VTypeE::Add(valuetype MCCTest.VTypeE)
-    IL_009f:  nop
-    IL_00a0:  ldarg.0
-    IL_00a1:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f9
-    IL_00a6:  ldarga.s   val
-    IL_00a8:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f9
-    IL_00ad:  call       instance void MCCTest.VType8::Add(valuetype MCCTest.VType8)
-    IL_00b2:  nop
-    IL_00b3:  ldarg.0
-    IL_00b4:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f10
-    IL_00b9:  ldarga.s   val
-    IL_00bb:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f10
-    IL_00c0:  call       instance void MCCTest.VTypeE::Add(valuetype MCCTest.VTypeE)
-    IL_00c5:  nop
-    IL_00c6:  ldarg.0
-    IL_00c7:  ldflda     valuetype MCCTest.VType3 MCCTest.VTypeF::f11
-    IL_00cc:  ldarga.s   val
-    IL_00ce:  ldfld      valuetype MCCTest.VType3 MCCTest.VTypeF::f11
-    IL_00d3:  call       instance void MCCTest.VType3::Add(valuetype MCCTest.VType3)
-    IL_00d8:  nop
-    IL_00d9:  ldarg.0
-    IL_00da:  dup
-    IL_00db:  ldfld      int64 MCCTest.VTypeF::f12
-    IL_00e0:  ldarga.s   val
-    IL_00e2:  ldfld      int64 MCCTest.VTypeF::f12
-    IL_00e7:  add
-    IL_00e8:  stfld      int64 MCCTest.VTypeF::f12
-    IL_00ed:  ldarg.0
-    IL_00ee:  dup
-    IL_00ef:  ldfld      uint16 MCCTest.VTypeF::f13
-    IL_00f4:  ldarga.s   val
-    IL_00f6:  ldfld      uint16 MCCTest.VTypeF::f13
-    IL_00fb:  add
-    IL_00fc:  conv.u2
-    IL_00fd:  stfld      uint16 MCCTest.VTypeF::f13
-    IL_0102:  ldarg.0
-    IL_0103:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeF::f14
-    IL_0108:  ldarga.s   val
-    IL_010a:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeF::f14
-    IL_010f:  call       instance void MCCTest.VType5::Add(valuetype MCCTest.VType5)
-    IL_0114:  nop
-    IL_0115:  ldarg.0
-    IL_0116:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f15
-    IL_011b:  ldarga.s   val
-    IL_011d:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f15
-    IL_0122:  call       instance void MCCTest.VType9::Add(valuetype MCCTest.VType9)
-    IL_0127:  nop
-    IL_0128:  ldarg.0
-    IL_0129:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f16
-    IL_012e:  ldarga.s   val
-    IL_0130:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f16
-    IL_0135:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_013a:  nop
-    IL_013b:  ldarg.0
-    IL_013c:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f17
-    IL_0141:  ldarga.s   val
-    IL_0143:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f17
-    IL_0148:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_014d:  nop
-    IL_014e:  ldarg.0
-    IL_014f:  dup
-    IL_0150:  ldfld      int32 MCCTest.VTypeF::f18
-    IL_0155:  ldarga.s   val
-    IL_0157:  ldfld      int32 MCCTest.VTypeF::f18
-    IL_015c:  add
-    IL_015d:  stfld      int32 MCCTest.VTypeF::f18
-    IL_0162:  ldarg.0
-    IL_0163:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f19
-    IL_0168:  ldarga.s   val
-    IL_016a:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f19
-    IL_016f:  call       instance void MCCTest.VType8::Add(valuetype MCCTest.VType8)
-    IL_0174:  nop
-    IL_0175:  ldarg.0
-    IL_0176:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f20
-    IL_017b:  ldarga.s   val
-    IL_017d:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f20
-    IL_0182:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_0187:  nop
-    IL_0188:  ldarg.0
-    IL_0189:  ldflda     valuetype MCCTest.VTypeA MCCTest.VTypeF::f21
-    IL_018e:  ldarga.s   val
-    IL_0190:  ldfld      valuetype MCCTest.VTypeA MCCTest.VTypeF::f21
-    IL_0195:  call       instance void MCCTest.VTypeA::Add(valuetype MCCTest.VTypeA)
-    IL_019a:  nop
-    IL_019b:  ldarg.0
-    IL_019c:  ldflda     valuetype MCCTest.VTypeD MCCTest.VTypeF::f22
-    IL_01a1:  ldarga.s   val
-    IL_01a3:  ldfld      valuetype MCCTest.VTypeD MCCTest.VTypeF::f22
-    IL_01a8:  call       instance void MCCTest.VTypeD::Add(valuetype MCCTest.VTypeD)
-    IL_01ad:  nop
-    IL_01ae:  ldarg.0
-    IL_01af:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f23
-    IL_01b4:  ldarga.s   val
-    IL_01b6:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f23
-    IL_01bb:  call       instance void MCCTest.VType9::Add(valuetype MCCTest.VType9)
-    IL_01c0:  nop
-    IL_01c1:  ldarg.0
-    IL_01c2:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f24
-    IL_01c7:  ldarga.s   val
-    IL_01c9:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f24
-    IL_01ce:  call       instance void MCCTest.VTypeE::Add(valuetype MCCTest.VTypeE)
-    IL_01d3:  nop
-    IL_01d4:  ldarg.0
-    IL_01d5:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f25
-    IL_01da:  ldarga.s   val
-    IL_01dc:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f25
-    IL_01e1:  callvirt   instance void MCCTest.RType4::Add(class MCCTest.RType4)
-    IL_01e6:  nop
-    IL_01e7:  ldarg.0
-    IL_01e8:  dup
-    IL_01e9:  ldfld      float32 MCCTest.VTypeF::f26
-    IL_01ee:  ldarga.s   val
-    IL_01f0:  ldfld      float32 MCCTest.VTypeF::f26
-    IL_01f5:  add
-    IL_01f6:  stfld      float32 MCCTest.VTypeF::f26
-    IL_01fb:  ldarg.0
-    IL_01fc:  dup
-    IL_01fd:  ldfld      uint8 MCCTest.VTypeF::f27
-    IL_0202:  ldarga.s   val
-    IL_0204:  ldfld      uint8 MCCTest.VTypeF::f27
-    IL_0209:  add
-    IL_020a:  conv.u1
-    IL_020b:  stfld      uint8 MCCTest.VTypeF::f27
-    IL_0210:  ldarg.0
-    IL_0211:  dup
-    IL_0212:  ldfld      float64 MCCTest.VTypeF::f28
-    IL_0217:  ldarga.s   val
-    IL_0219:  ldfld      float64 MCCTest.VTypeF::f28
-    IL_021e:  add
-    IL_021f:  stfld      float64 MCCTest.VTypeF::f28
-    IL_0224:  ldarg.0
-    IL_0225:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeF::f29
-    IL_022a:  ldarga.s   val
-    IL_022c:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeF::f29
-    IL_0231:  call       instance void MCCTest.VTypeC::Add(valuetype MCCTest.VTypeC)
-    IL_0236:  nop
-    IL_0237:  ldarg.0
-    IL_0238:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeF::f30
-    IL_023d:  ldarga.s   val
-    IL_023f:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeF::f30
-    IL_0244:  call       instance void MCCTest.VType6::Add(valuetype MCCTest.VType6)
-    IL_0249:  nop
-    IL_024a:  ldarg.0
-    IL_024b:  dup
-    IL_024c:  ldfld      float64 MCCTest.VTypeF::f31
-    IL_0251:  ldarga.s   val
-    IL_0253:  ldfld      float64 MCCTest.VTypeF::f31
-    IL_0258:  add
-    IL_0259:  stfld      float64 MCCTest.VTypeF::f31
-    IL_025e:  ldarg.0
-    IL_025f:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f32
-    IL_0264:  ldarga.s   val
-    IL_0266:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f32
-    IL_026b:  call       instance void MCCTest.VType8::Add(valuetype MCCTest.VType8)
-    IL_0270:  nop
-    IL_0271:  ldarg.0
-    IL_0272:  dup
-    IL_0273:  ldfld      int8 MCCTest.VTypeF::f33
-    IL_0278:  ldarga.s   val
-    IL_027a:  ldfld      int8 MCCTest.VTypeF::f33
-    IL_027f:  add
-    IL_0280:  conv.i1
-    IL_0281:  stfld      int8 MCCTest.VTypeF::f33
-    IL_0286:  ldarg.0
-    IL_0287:  ldflda     valuetype MCCTest.VTypeD MCCTest.VTypeF::f34
-    IL_028c:  ldarga.s   val
-    IL_028e:  ldfld      valuetype MCCTest.VTypeD MCCTest.VTypeF::f34
-    IL_0293:  call       instance void MCCTest.VTypeD::Add(valuetype MCCTest.VTypeD)
-    IL_0298:  nop
-    IL_0299:  ldarg.0
-    IL_029a:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f35
-    IL_029f:  ldarga.s   val
-    IL_02a1:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f35
-    IL_02a6:  call       instance void MCCTest.VTypeE::Add(valuetype MCCTest.VTypeE)
-    IL_02ab:  nop
-    IL_02ac:  ldarg.0
-    IL_02ad:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f36
-    IL_02b2:  ldarga.s   val
-    IL_02b4:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f36
-    IL_02b9:  call       instance void MCCTest.VTypeE::Add(valuetype MCCTest.VTypeE)
-    IL_02be:  nop
-    IL_02bf:  ldarg.0
-    IL_02c0:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeF::f37
-    IL_02c5:  ldarga.s   val
-    IL_02c7:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeF::f37
-    IL_02cc:  call       instance void MCCTest.VType7::Add(valuetype MCCTest.VType7)
-    IL_02d1:  nop
-    IL_02d2:  ldarg.0
-    IL_02d3:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f38
-    IL_02d8:  ldarga.s   val
-    IL_02da:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f38
-    IL_02df:  call       instance void MCCTest.VType9::Add(valuetype MCCTest.VType9)
-    IL_02e4:  nop
-    IL_02e5:  ldarg.0
-    IL_02e6:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f39
-    IL_02eb:  ldarga.s   val
-    IL_02ed:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f39
-    IL_02f2:  call       instance void MCCTest.VType8::Add(valuetype MCCTest.VType8)
-    IL_02f7:  nop
-    IL_02f8:  ldarg.0
-    IL_02f9:  dup
-    IL_02fa:  ldfld      float32 MCCTest.VTypeF::f40
-    IL_02ff:  ldarga.s   val
-    IL_0301:  ldfld      float32 MCCTest.VTypeF::f40
-    IL_0306:  add
-    IL_0307:  stfld      float32 MCCTest.VTypeF::f40
-    IL_030c:  ldarg.0
-    IL_030d:  dup
-    IL_030e:  ldfld      int32 MCCTest.VTypeF::f41
-    IL_0313:  ldarga.s   val
-    IL_0315:  ldfld      int32 MCCTest.VTypeF::f41
-    IL_031a:  add
-    IL_031b:  stfld      int32 MCCTest.VTypeF::f41
-    IL_0320:  ldarg.0
-    IL_0321:  dup
-    IL_0322:  ldfld      char MCCTest.VTypeF::f42
-    IL_0327:  ldarga.s   val
-    IL_0329:  ldfld      char MCCTest.VTypeF::f42
-    IL_032e:  add
-    IL_032f:  conv.u2
-    IL_0330:  stfld      char MCCTest.VTypeF::f42
-    IL_0335:  ldarg.0
-    IL_0336:  ldflda     valuetype MCCTest.VTypeB MCCTest.VTypeF::f43
-    IL_033b:  ldarga.s   val
-    IL_033d:  ldfld      valuetype MCCTest.VTypeB MCCTest.VTypeF::f43
-    IL_0342:  call       instance void MCCTest.VTypeB::Add(valuetype MCCTest.VTypeB)
-    IL_0347:  nop
-    IL_0348:  ldarg.0
-    IL_0349:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f44
-    IL_034e:  ldarga.s   val
-    IL_0350:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f44
-    IL_0355:  call       instance void MCCTest.VType8::Add(valuetype MCCTest.VType8)
-    IL_035a:  nop
-    IL_035b:  ret
-  } // end of method VTypeF::Add
-
-  .method public hidebysig newslot virtual final
-          instance void  Check(valuetype MCCTest.VTypeF expected) cil managed
-  {
-    // Code size       1901 (0x76d)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.Type V_1,
-             class MCCTest.ResultVerificationException V_2,
-             bool V_3)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldfld      char MCCTest.VTypeF::f1
-    IL_0007:  ldarga.s   expected
-    IL_0009:  ldfld      char MCCTest.VTypeF::f1
-    IL_000e:  ceq
-    IL_0010:  stloc.3
-    IL_0011:  ldloc.3
-    IL_0012:  brtrue.s   IL_002f
-
-    IL_0014:  nop
-    IL_0015:  ldstr      "f1"
-    IL_001a:  ldarg.0
-    IL_001b:  ldfld      char MCCTest.VTypeF::f1
-    IL_0020:  conv.u8
-    IL_0021:  ldarga.s   expected
-    IL_0023:  ldfld      char MCCTest.VTypeF::f1
-    IL_0028:  conv.u8
-    IL_0029:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_002e:  throw
-
-    IL_002f:  ldarg.0
-    IL_0030:  ldfld      float64 MCCTest.VTypeF::f2
-    IL_0035:  ldarga.s   expected
-    IL_0037:  ldfld      float64 MCCTest.VTypeF::f2
-    IL_003c:  ceq
-    IL_003e:  stloc.3
-    IL_003f:  ldloc.3
-    IL_0040:  brtrue.s   IL_005b
-
-    IL_0042:  nop
-    IL_0043:  ldstr      "f2"
-    IL_0048:  ldarg.0
-    IL_0049:  ldfld      float64 MCCTest.VTypeF::f2
-    IL_004e:  ldarga.s   expected
-    IL_0050:  ldfld      float64 MCCTest.VTypeF::f2
-    IL_0055:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_005a:  throw
-
-    IL_005b:  ldarg.0
-    IL_005c:  ldfld      float32 MCCTest.VTypeF::f3
-    IL_0061:  ldarga.s   expected
-    IL_0063:  ldfld      float32 MCCTest.VTypeF::f3
-    IL_0068:  ceq
-    IL_006a:  stloc.3
-    IL_006b:  ldloc.3
-    IL_006c:  brtrue.s   IL_0089
-
-    IL_006e:  nop
-    IL_006f:  ldstr      "f3"
-    IL_0074:  ldarg.0
-    IL_0075:  ldfld      float32 MCCTest.VTypeF::f3
-    IL_007a:  conv.r8
-    IL_007b:  ldarga.s   expected
-    IL_007d:  ldfld      float32 MCCTest.VTypeF::f3
-    IL_0082:  conv.r8
-    IL_0083:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_0088:  throw
-
-    IL_0089:  ldarg.0
-    IL_008a:  ldfld      char MCCTest.VTypeF::f6
-    IL_008f:  ldarga.s   expected
-    IL_0091:  ldfld      char MCCTest.VTypeF::f6
-    IL_0096:  ceq
-    IL_0098:  stloc.3
-    IL_0099:  ldloc.3
-    IL_009a:  brtrue.s   IL_00b7
-
-    IL_009c:  nop
-    IL_009d:  ldstr      "f6"
-    IL_00a2:  ldarg.0
-    IL_00a3:  ldfld      char MCCTest.VTypeF::f6
-    IL_00a8:  conv.u8
-    IL_00a9:  ldarga.s   expected
-    IL_00ab:  ldfld      char MCCTest.VTypeF::f6
-    IL_00b0:  conv.u8
-    IL_00b1:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_00b6:  throw
-
-    IL_00b7:  ldarg.0
-    IL_00b8:  ldfld      float32 MCCTest.VTypeF::f7
-    IL_00bd:  ldarga.s   expected
-    IL_00bf:  ldfld      float32 MCCTest.VTypeF::f7
-    IL_00c4:  ceq
-    IL_00c6:  stloc.3
-    IL_00c7:  ldloc.3
-    IL_00c8:  brtrue.s   IL_00e5
-
-    IL_00ca:  nop
-    IL_00cb:  ldstr      "f7"
-    IL_00d0:  ldarg.0
-    IL_00d1:  ldfld      float32 MCCTest.VTypeF::f7
-    IL_00d6:  conv.r8
-    IL_00d7:  ldarga.s   expected
-    IL_00d9:  ldfld      float32 MCCTest.VTypeF::f7
-    IL_00de:  conv.r8
-    IL_00df:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_00e4:  throw
-
-    IL_00e5:  ldarg.0
-    IL_00e6:  ldfld      int64 MCCTest.VTypeF::f12
-    IL_00eb:  ldarga.s   expected
-    IL_00ed:  ldfld      int64 MCCTest.VTypeF::f12
-    IL_00f2:  ceq
-    IL_00f4:  stloc.3
-    IL_00f5:  ldloc.3
-    IL_00f6:  brtrue.s   IL_0111
-
-    IL_00f8:  nop
-    IL_00f9:  ldstr      "f12"
-    IL_00fe:  ldarg.0
-    IL_00ff:  ldfld      int64 MCCTest.VTypeF::f12
-    IL_0104:  ldarga.s   expected
-    IL_0106:  ldfld      int64 MCCTest.VTypeF::f12
-    IL_010b:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_0110:  throw
-
-    IL_0111:  ldarg.0
-    IL_0112:  ldfld      uint16 MCCTest.VTypeF::f13
-    IL_0117:  ldarga.s   expected
-    IL_0119:  ldfld      uint16 MCCTest.VTypeF::f13
-    IL_011e:  ceq
-    IL_0120:  stloc.3
-    IL_0121:  ldloc.3
-    IL_0122:  brtrue.s   IL_013f
-
-    IL_0124:  nop
-    IL_0125:  ldstr      "f13"
-    IL_012a:  ldarg.0
-    IL_012b:  ldfld      uint16 MCCTest.VTypeF::f13
-    IL_0130:  conv.u8
-    IL_0131:  ldarga.s   expected
-    IL_0133:  ldfld      uint16 MCCTest.VTypeF::f13
-    IL_0138:  conv.u8
-    IL_0139:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_013e:  throw
-
-    IL_013f:  ldarg.0
-    IL_0140:  ldfld      int32 MCCTest.VTypeF::f18
-    IL_0145:  ldarga.s   expected
-    IL_0147:  ldfld      int32 MCCTest.VTypeF::f18
-    IL_014c:  ceq
-    IL_014e:  stloc.3
-    IL_014f:  ldloc.3
-    IL_0150:  brtrue.s   IL_016d
-
-    IL_0152:  nop
-    IL_0153:  ldstr      "f18"
-    IL_0158:  ldarg.0
-    IL_0159:  ldfld      int32 MCCTest.VTypeF::f18
-    IL_015e:  conv.i8
-    IL_015f:  ldarga.s   expected
-    IL_0161:  ldfld      int32 MCCTest.VTypeF::f18
-    IL_0166:  conv.i8
-    IL_0167:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_016c:  throw
-
-    IL_016d:  ldarg.0
-    IL_016e:  ldfld      float32 MCCTest.VTypeF::f26
-    IL_0173:  ldarga.s   expected
-    IL_0175:  ldfld      float32 MCCTest.VTypeF::f26
-    IL_017a:  ceq
-    IL_017c:  stloc.3
-    IL_017d:  ldloc.3
-    IL_017e:  brtrue.s   IL_019b
-
-    IL_0180:  nop
-    IL_0181:  ldstr      "f26"
-    IL_0186:  ldarg.0
-    IL_0187:  ldfld      float32 MCCTest.VTypeF::f26
-    IL_018c:  conv.r8
-    IL_018d:  ldarga.s   expected
-    IL_018f:  ldfld      float32 MCCTest.VTypeF::f26
-    IL_0194:  conv.r8
-    IL_0195:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_019a:  throw
-
-    IL_019b:  ldarg.0
-    IL_019c:  ldfld      uint8 MCCTest.VTypeF::f27
-    IL_01a1:  ldarga.s   expected
-    IL_01a3:  ldfld      uint8 MCCTest.VTypeF::f27
-    IL_01a8:  ceq
-    IL_01aa:  stloc.3
-    IL_01ab:  ldloc.3
-    IL_01ac:  brtrue.s   IL_01c9
-
-    IL_01ae:  nop
-    IL_01af:  ldstr      "f27"
-    IL_01b4:  ldarg.0
-    IL_01b5:  ldfld      uint8 MCCTest.VTypeF::f27
-    IL_01ba:  conv.u8
-    IL_01bb:  ldarga.s   expected
-    IL_01bd:  ldfld      uint8 MCCTest.VTypeF::f27
-    IL_01c2:  conv.u8
-    IL_01c3:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_01c8:  throw
-
-    IL_01c9:  ldarg.0
-    IL_01ca:  ldfld      float64 MCCTest.VTypeF::f28
-    IL_01cf:  ldarga.s   expected
-    IL_01d1:  ldfld      float64 MCCTest.VTypeF::f28
-    IL_01d6:  ceq
-    IL_01d8:  stloc.3
-    IL_01d9:  ldloc.3
-    IL_01da:  brtrue.s   IL_01f5
-
-    IL_01dc:  nop
-    IL_01dd:  ldstr      "f28"
-    IL_01e2:  ldarg.0
-    IL_01e3:  ldfld      float64 MCCTest.VTypeF::f28
-    IL_01e8:  ldarga.s   expected
-    IL_01ea:  ldfld      float64 MCCTest.VTypeF::f28
-    IL_01ef:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_01f4:  throw
-
-    IL_01f5:  ldarg.0
-    IL_01f6:  ldfld      float64 MCCTest.VTypeF::f31
-    IL_01fb:  ldarga.s   expected
-    IL_01fd:  ldfld      float64 MCCTest.VTypeF::f31
-    IL_0202:  ceq
-    IL_0204:  stloc.3
-    IL_0205:  ldloc.3
-    IL_0206:  brtrue.s   IL_0221
-
-    IL_0208:  nop
-    IL_0209:  ldstr      "f31"
-    IL_020e:  ldarg.0
-    IL_020f:  ldfld      float64 MCCTest.VTypeF::f31
-    IL_0214:  ldarga.s   expected
-    IL_0216:  ldfld      float64 MCCTest.VTypeF::f31
-    IL_021b:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_0220:  throw
-
-    IL_0221:  ldarg.0
-    IL_0222:  ldfld      int8 MCCTest.VTypeF::f33
-    IL_0227:  ldarga.s   expected
-    IL_0229:  ldfld      int8 MCCTest.VTypeF::f33
-    IL_022e:  ceq
-    IL_0230:  stloc.3
-    IL_0231:  ldloc.3
-    IL_0232:  brtrue.s   IL_024f
-
-    IL_0234:  nop
-    IL_0235:  ldstr      "f33"
-    IL_023a:  ldarg.0
-    IL_023b:  ldfld      int8 MCCTest.VTypeF::f33
-    IL_0240:  conv.i8
-    IL_0241:  ldarga.s   expected
-    IL_0243:  ldfld      int8 MCCTest.VTypeF::f33
-    IL_0248:  conv.i8
-    IL_0249:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_024e:  throw
-
-    IL_024f:  ldarg.0
-    IL_0250:  ldfld      float32 MCCTest.VTypeF::f40
-    IL_0255:  ldarga.s   expected
-    IL_0257:  ldfld      float32 MCCTest.VTypeF::f40
-    IL_025c:  ceq
-    IL_025e:  stloc.3
-    IL_025f:  ldloc.3
-    IL_0260:  brtrue.s   IL_027d
-
-    IL_0262:  nop
-    IL_0263:  ldstr      "f40"
-    IL_0268:  ldarg.0
-    IL_0269:  ldfld      float32 MCCTest.VTypeF::f40
-    IL_026e:  conv.r8
-    IL_026f:  ldarga.s   expected
-    IL_0271:  ldfld      float32 MCCTest.VTypeF::f40
-    IL_0276:  conv.r8
-    IL_0277:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  float64,
-                                                                                  float64)
-    IL_027c:  throw
-
-    IL_027d:  ldarg.0
-    IL_027e:  ldfld      int32 MCCTest.VTypeF::f41
-    IL_0283:  ldarga.s   expected
-    IL_0285:  ldfld      int32 MCCTest.VTypeF::f41
-    IL_028a:  ceq
-    IL_028c:  stloc.3
-    IL_028d:  ldloc.3
-    IL_028e:  brtrue.s   IL_02ab
-
-    IL_0290:  nop
-    IL_0291:  ldstr      "f41"
-    IL_0296:  ldarg.0
-    IL_0297:  ldfld      int32 MCCTest.VTypeF::f41
-    IL_029c:  conv.i8
-    IL_029d:  ldarga.s   expected
-    IL_029f:  ldfld      int32 MCCTest.VTypeF::f41
-    IL_02a4:  conv.i8
-    IL_02a5:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_02aa:  throw
-
-    IL_02ab:  ldarg.0
-    IL_02ac:  ldfld      char MCCTest.VTypeF::f42
-    IL_02b1:  ldarga.s   expected
-    IL_02b3:  ldfld      char MCCTest.VTypeF::f42
-    IL_02b8:  ceq
-    IL_02ba:  stloc.3
-    IL_02bb:  ldloc.3
-    IL_02bc:  brtrue.s   IL_02d9
-
-    IL_02be:  nop
-    IL_02bf:  ldstr      "f42"
-    IL_02c4:  ldarg.0
-    IL_02c5:  ldfld      char MCCTest.VTypeF::f42
-    IL_02ca:  conv.u8
-    IL_02cb:  ldarga.s   expected
-    IL_02cd:  ldfld      char MCCTest.VTypeF::f42
-    IL_02d2:  conv.u8
-    IL_02d3:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                  int64,
-                                                                                  int64)
-    IL_02d8:  throw
-
-    IL_02d9:  ldnull
-    IL_02da:  stloc.0
-    IL_02db:  ldnull
-    IL_02dc:  stloc.1
-    .try
-    {
-      IL_02dd:  nop
-      IL_02de:  ldstr      "f4"
-      IL_02e3:  stloc.0
-      IL_02e4:  ldarg.0
-      IL_02e5:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f4
-      IL_02ea:  box        MCCTest.VType9
-      IL_02ef:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_02f4:  stloc.1
-      IL_02f5:  ldarg.0
-      IL_02f6:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f4
-      IL_02fb:  ldarga.s   expected
-      IL_02fd:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f4
-      IL_0302:  call       instance void MCCTest.VType9::Check(valuetype MCCTest.VType9)
-      IL_0307:  nop
-      IL_0308:  ldstr      "f5"
-      IL_030d:  stloc.0
-      IL_030e:  ldarg.0
-      IL_030f:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f5
-      IL_0314:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0319:  stloc.1
-      IL_031a:  ldarg.0
-      IL_031b:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f5
-      IL_0320:  ldarga.s   expected
-      IL_0322:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f5
-      IL_0327:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_032c:  nop
-      IL_032d:  ldstr      "f8"
-      IL_0332:  stloc.0
-      IL_0333:  ldarg.0
-      IL_0334:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f8
-      IL_0339:  box        MCCTest.VTypeE
-      IL_033e:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0343:  stloc.1
-      IL_0344:  ldarg.0
-      IL_0345:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f8
-      IL_034a:  ldarga.s   expected
-      IL_034c:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f8
-      IL_0351:  call       instance void MCCTest.VTypeE::Check(valuetype MCCTest.VTypeE)
-      IL_0356:  nop
-      IL_0357:  ldstr      "f9"
-      IL_035c:  stloc.0
-      IL_035d:  ldarg.0
-      IL_035e:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f9
-      IL_0363:  box        MCCTest.VType8
-      IL_0368:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_036d:  stloc.1
-      IL_036e:  ldarg.0
-      IL_036f:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f9
-      IL_0374:  ldarga.s   expected
-      IL_0376:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f9
-      IL_037b:  call       instance void MCCTest.VType8::Check(valuetype MCCTest.VType8)
-      IL_0380:  nop
-      IL_0381:  ldstr      "f10"
-      IL_0386:  stloc.0
-      IL_0387:  ldarg.0
-      IL_0388:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f10
-      IL_038d:  box        MCCTest.VTypeE
-      IL_0392:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0397:  stloc.1
-      IL_0398:  ldarg.0
-      IL_0399:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f10
-      IL_039e:  ldarga.s   expected
-      IL_03a0:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f10
-      IL_03a5:  call       instance void MCCTest.VTypeE::Check(valuetype MCCTest.VTypeE)
-      IL_03aa:  nop
-      IL_03ab:  ldstr      "f11"
-      IL_03b0:  stloc.0
-      IL_03b1:  ldarg.0
-      IL_03b2:  ldfld      valuetype MCCTest.VType3 MCCTest.VTypeF::f11
-      IL_03b7:  box        MCCTest.VType3
-      IL_03bc:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_03c1:  stloc.1
-      IL_03c2:  ldarg.0
-      IL_03c3:  ldflda     valuetype MCCTest.VType3 MCCTest.VTypeF::f11
-      IL_03c8:  ldarga.s   expected
-      IL_03ca:  ldfld      valuetype MCCTest.VType3 MCCTest.VTypeF::f11
-      IL_03cf:  call       instance void MCCTest.VType3::Check(valuetype MCCTest.VType3)
-      IL_03d4:  nop
-      IL_03d5:  ldstr      "f14"
-      IL_03da:  stloc.0
-      IL_03db:  ldarg.0
-      IL_03dc:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeF::f14
-      IL_03e1:  box        MCCTest.VType5
-      IL_03e6:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_03eb:  stloc.1
-      IL_03ec:  ldarg.0
-      IL_03ed:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeF::f14
-      IL_03f2:  ldarga.s   expected
-      IL_03f4:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeF::f14
-      IL_03f9:  call       instance void MCCTest.VType5::Check(valuetype MCCTest.VType5)
-      IL_03fe:  nop
-      IL_03ff:  ldstr      "f15"
-      IL_0404:  stloc.0
-      IL_0405:  ldarg.0
-      IL_0406:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f15
-      IL_040b:  box        MCCTest.VType9
-      IL_0410:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0415:  stloc.1
-      IL_0416:  ldarg.0
-      IL_0417:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f15
-      IL_041c:  ldarga.s   expected
-      IL_041e:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f15
-      IL_0423:  call       instance void MCCTest.VType9::Check(valuetype MCCTest.VType9)
-      IL_0428:  nop
-      IL_0429:  ldstr      "f16"
-      IL_042e:  stloc.0
-      IL_042f:  ldarg.0
-      IL_0430:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f16
-      IL_0435:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_043a:  stloc.1
-      IL_043b:  ldarg.0
-      IL_043c:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f16
-      IL_0441:  ldarga.s   expected
-      IL_0443:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f16
-      IL_0448:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_044d:  nop
-      IL_044e:  ldstr      "f17"
-      IL_0453:  stloc.0
-      IL_0454:  ldarg.0
-      IL_0455:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f17
-      IL_045a:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_045f:  stloc.1
-      IL_0460:  ldarg.0
-      IL_0461:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f17
-      IL_0466:  ldarga.s   expected
-      IL_0468:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f17
-      IL_046d:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_0472:  nop
-      IL_0473:  ldstr      "f19"
-      IL_0478:  stloc.0
-      IL_0479:  ldarg.0
-      IL_047a:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f19
-      IL_047f:  box        MCCTest.VType8
-      IL_0484:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0489:  stloc.1
-      IL_048a:  ldarg.0
-      IL_048b:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f19
-      IL_0490:  ldarga.s   expected
-      IL_0492:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f19
-      IL_0497:  call       instance void MCCTest.VType8::Check(valuetype MCCTest.VType8)
-      IL_049c:  nop
-      IL_049d:  ldstr      "f20"
-      IL_04a2:  stloc.0
-      IL_04a3:  ldarg.0
-      IL_04a4:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f20
-      IL_04a9:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_04ae:  stloc.1
-      IL_04af:  ldarg.0
-      IL_04b0:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f20
-      IL_04b5:  ldarga.s   expected
-      IL_04b7:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f20
-      IL_04bc:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_04c1:  nop
-      IL_04c2:  ldstr      "f21"
-      IL_04c7:  stloc.0
-      IL_04c8:  ldarg.0
-      IL_04c9:  ldfld      valuetype MCCTest.VTypeA MCCTest.VTypeF::f21
-      IL_04ce:  box        MCCTest.VTypeA
-      IL_04d3:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_04d8:  stloc.1
-      IL_04d9:  ldarg.0
-      IL_04da:  ldflda     valuetype MCCTest.VTypeA MCCTest.VTypeF::f21
-      IL_04df:  ldarga.s   expected
-      IL_04e1:  ldfld      valuetype MCCTest.VTypeA MCCTest.VTypeF::f21
-      IL_04e6:  call       instance void MCCTest.VTypeA::Check(valuetype MCCTest.VTypeA)
-      IL_04eb:  nop
-      IL_04ec:  ldstr      "f22"
-      IL_04f1:  stloc.0
-      IL_04f2:  ldarg.0
-      IL_04f3:  ldfld      valuetype MCCTest.VTypeD MCCTest.VTypeF::f22
-      IL_04f8:  box        MCCTest.VTypeD
-      IL_04fd:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0502:  stloc.1
-      IL_0503:  ldarg.0
-      IL_0504:  ldflda     valuetype MCCTest.VTypeD MCCTest.VTypeF::f22
-      IL_0509:  ldarga.s   expected
-      IL_050b:  ldfld      valuetype MCCTest.VTypeD MCCTest.VTypeF::f22
-      IL_0510:  call       instance void MCCTest.VTypeD::Check(valuetype MCCTest.VTypeD)
-      IL_0515:  nop
-      IL_0516:  ldstr      "f23"
-      IL_051b:  stloc.0
-      IL_051c:  ldarg.0
-      IL_051d:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f23
-      IL_0522:  box        MCCTest.VType9
-      IL_0527:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_052c:  stloc.1
-      IL_052d:  ldarg.0
-      IL_052e:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f23
-      IL_0533:  ldarga.s   expected
-      IL_0535:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f23
-      IL_053a:  call       instance void MCCTest.VType9::Check(valuetype MCCTest.VType9)
-      IL_053f:  nop
-      IL_0540:  ldstr      "f24"
-      IL_0545:  stloc.0
-      IL_0546:  ldarg.0
-      IL_0547:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f24
-      IL_054c:  box        MCCTest.VTypeE
-      IL_0551:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0556:  stloc.1
-      IL_0557:  ldarg.0
-      IL_0558:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f24
-      IL_055d:  ldarga.s   expected
-      IL_055f:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f24
-      IL_0564:  call       instance void MCCTest.VTypeE::Check(valuetype MCCTest.VTypeE)
-      IL_0569:  nop
-      IL_056a:  ldstr      "f25"
-      IL_056f:  stloc.0
-      IL_0570:  ldarg.0
-      IL_0571:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f25
-      IL_0576:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_057b:  stloc.1
-      IL_057c:  ldarg.0
-      IL_057d:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f25
-      IL_0582:  ldarga.s   expected
-      IL_0584:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f25
-      IL_0589:  callvirt   instance void MCCTest.RType4::Check(class MCCTest.RType4)
-      IL_058e:  nop
-      IL_058f:  ldstr      "f29"
-      IL_0594:  stloc.0
-      IL_0595:  ldarg.0
-      IL_0596:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeF::f29
-      IL_059b:  box        MCCTest.VTypeC
-      IL_05a0:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_05a5:  stloc.1
-      IL_05a6:  ldarg.0
-      IL_05a7:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeF::f29
-      IL_05ac:  ldarga.s   expected
-      IL_05ae:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeF::f29
-      IL_05b3:  call       instance void MCCTest.VTypeC::Check(valuetype MCCTest.VTypeC)
-      IL_05b8:  nop
-      IL_05b9:  ldstr      "f30"
-      IL_05be:  stloc.0
-      IL_05bf:  ldarg.0
-      IL_05c0:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeF::f30
-      IL_05c5:  box        MCCTest.VType6
-      IL_05ca:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_05cf:  stloc.1
-      IL_05d0:  ldarg.0
-      IL_05d1:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeF::f30
-      IL_05d6:  ldarga.s   expected
-      IL_05d8:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeF::f30
-      IL_05dd:  call       instance void MCCTest.VType6::Check(valuetype MCCTest.VType6)
-      IL_05e2:  nop
-      IL_05e3:  ldstr      "f32"
-      IL_05e8:  stloc.0
-      IL_05e9:  ldarg.0
-      IL_05ea:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f32
-      IL_05ef:  box        MCCTest.VType8
-      IL_05f4:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_05f9:  stloc.1
-      IL_05fa:  ldarg.0
-      IL_05fb:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f32
-      IL_0600:  ldarga.s   expected
-      IL_0602:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f32
-      IL_0607:  call       instance void MCCTest.VType8::Check(valuetype MCCTest.VType8)
-      IL_060c:  nop
-      IL_060d:  ldstr      "f34"
-      IL_0612:  stloc.0
-      IL_0613:  ldarg.0
-      IL_0614:  ldfld      valuetype MCCTest.VTypeD MCCTest.VTypeF::f34
-      IL_0619:  box        MCCTest.VTypeD
-      IL_061e:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0623:  stloc.1
-      IL_0624:  ldarg.0
-      IL_0625:  ldflda     valuetype MCCTest.VTypeD MCCTest.VTypeF::f34
-      IL_062a:  ldarga.s   expected
-      IL_062c:  ldfld      valuetype MCCTest.VTypeD MCCTest.VTypeF::f34
-      IL_0631:  call       instance void MCCTest.VTypeD::Check(valuetype MCCTest.VTypeD)
-      IL_0636:  nop
-      IL_0637:  ldstr      "f35"
-      IL_063c:  stloc.0
-      IL_063d:  ldarg.0
-      IL_063e:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f35
-      IL_0643:  box        MCCTest.VTypeE
-      IL_0648:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_064d:  stloc.1
-      IL_064e:  ldarg.0
-      IL_064f:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f35
-      IL_0654:  ldarga.s   expected
-      IL_0656:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f35
-      IL_065b:  call       instance void MCCTest.VTypeE::Check(valuetype MCCTest.VTypeE)
-      IL_0660:  nop
-      IL_0661:  ldstr      "f36"
-      IL_0666:  stloc.0
-      IL_0667:  ldarg.0
-      IL_0668:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f36
-      IL_066d:  box        MCCTest.VTypeE
-      IL_0672:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0677:  stloc.1
-      IL_0678:  ldarg.0
-      IL_0679:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f36
-      IL_067e:  ldarga.s   expected
-      IL_0680:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f36
-      IL_0685:  call       instance void MCCTest.VTypeE::Check(valuetype MCCTest.VTypeE)
-      IL_068a:  nop
-      IL_068b:  ldstr      "f37"
-      IL_0690:  stloc.0
-      IL_0691:  ldarg.0
-      IL_0692:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeF::f37
-      IL_0697:  box        MCCTest.VType7
-      IL_069c:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_06a1:  stloc.1
-      IL_06a2:  ldarg.0
-      IL_06a3:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeF::f37
-      IL_06a8:  ldarga.s   expected
-      IL_06aa:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeF::f37
-      IL_06af:  call       instance void MCCTest.VType7::Check(valuetype MCCTest.VType7)
-      IL_06b4:  nop
-      IL_06b5:  ldstr      "f38"
-      IL_06ba:  stloc.0
-      IL_06bb:  ldarg.0
-      IL_06bc:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f38
-      IL_06c1:  box        MCCTest.VType9
-      IL_06c6:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_06cb:  stloc.1
-      IL_06cc:  ldarg.0
-      IL_06cd:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f38
-      IL_06d2:  ldarga.s   expected
-      IL_06d4:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f38
-      IL_06d9:  call       instance void MCCTest.VType9::Check(valuetype MCCTest.VType9)
-      IL_06de:  nop
-      IL_06df:  ldstr      "f39"
-      IL_06e4:  stloc.0
-      IL_06e5:  ldarg.0
-      IL_06e6:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f39
-      IL_06eb:  box        MCCTest.VType8
-      IL_06f0:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_06f5:  stloc.1
-      IL_06f6:  ldarg.0
-      IL_06f7:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f39
-      IL_06fc:  ldarga.s   expected
-      IL_06fe:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f39
-      IL_0703:  call       instance void MCCTest.VType8::Check(valuetype MCCTest.VType8)
-      IL_0708:  nop
-      IL_0709:  ldstr      "f43"
-      IL_070e:  stloc.0
-      IL_070f:  ldarg.0
-      IL_0710:  ldfld      valuetype MCCTest.VTypeB MCCTest.VTypeF::f43
-      IL_0715:  box        MCCTest.VTypeB
-      IL_071a:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_071f:  stloc.1
-      IL_0720:  ldarg.0
-      IL_0721:  ldflda     valuetype MCCTest.VTypeB MCCTest.VTypeF::f43
-      IL_0726:  ldarga.s   expected
-      IL_0728:  ldfld      valuetype MCCTest.VTypeB MCCTest.VTypeF::f43
-      IL_072d:  call       instance void MCCTest.VTypeB::Check(valuetype MCCTest.VTypeB)
-      IL_0732:  nop
-      IL_0733:  ldstr      "f44"
-      IL_0738:  stloc.0
-      IL_0739:  ldarg.0
-      IL_073a:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f44
-      IL_073f:  box        MCCTest.VType8
-      IL_0744:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-      IL_0749:  stloc.1
-      IL_074a:  ldarg.0
-      IL_074b:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f44
-      IL_0750:  ldarga.s   expected
-      IL_0752:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f44
-      IL_0757:  call       instance void MCCTest.VType8::Check(valuetype MCCTest.VType8)
-      IL_075c:  nop
-      IL_075d:  nop
-      IL_075e:  leave.s    IL_076b
-
-    }  // end .try
-    catch MCCTest.ResultVerificationException
-    {
-      IL_0760:  stloc.2
-      IL_0761:  nop
-      IL_0762:  ldloc.0
-      IL_0763:  ldloc.1
-      IL_0764:  ldloc.2
-      IL_0765:  newobj     instance void MCCTest.ResultVerificationException::.ctor(string,
-                                                                                    class [mscorlib]System.Type,
-                                                                                    class MCCTest.ResultVerificationException)
-      IL_076a:  throw
-
-    }  // end handler
-    IL_076b:  nop
-    IL_076c:  ret
-  } // end of method VTypeF::Check
-
-  .method public hidebysig instance string
-          Dump(int32 level) cil managed
-  {
-    // Code size       2144 (0x860)
-    .maxstack  4
-    .locals init (string V_0,
-             class [mscorlib]System.IO.StringWriter V_1,
-             string V_2)
-    IL_0000:  nop
-    IL_0001:  ldarg.1
-    IL_0002:  call       string MCCTest.FormatUtils::GetPadding(int32)
-    IL_0007:  stloc.0
-    IL_0008:  newobj     instance void [mscorlib]System.IO.StringWriter::.ctor()
-    IL_000d:  stloc.1
-    IL_000e:  ldloc.1
-    IL_000f:  ldloc.0
-    IL_0010:  ldstr      "f1    = "
-    IL_0015:  ldarg.0
-    IL_0016:  ldfld      char MCCTest.VTypeF::f1
-    IL_001b:  box        [mscorlib]System.Char
-    IL_0020:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_0025:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_002a:  nop
-    IL_002b:  ldloc.1
-    IL_002c:  ldloc.0
-    IL_002d:  ldstr      "f2    = "
-    IL_0032:  ldarg.0
-    IL_0033:  ldfld      float64 MCCTest.VTypeF::f2
-    IL_0038:  box        [mscorlib]System.Double
-    IL_003d:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_0042:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0047:  nop
-    IL_0048:  ldloc.1
-    IL_0049:  ldloc.0
-    IL_004a:  ldstr      "f3    = "
-    IL_004f:  ldarg.0
-    IL_0050:  ldfld      float32 MCCTest.VTypeF::f3
-    IL_0055:  box        [mscorlib]System.Single
-    IL_005a:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_005f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0064:  nop
-    IL_0065:  ldloc.1
-    IL_0066:  ldloc.0
-    IL_0067:  ldstr      "[Field f4] [Type '{0}']"
-    IL_006c:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0071:  ldarg.0
-    IL_0072:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f4
-    IL_0077:  box        MCCTest.VType9
-    IL_007c:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0081:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0086:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_008b:  nop
-    IL_008c:  ldloc.1
-    IL_008d:  ldarg.0
-    IL_008e:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f4
-    IL_0093:  ldarg.1
-    IL_0094:  ldc.i4.1
-    IL_0095:  add
-    IL_0096:  call       instance string MCCTest.VType9::Dump(int32)
-    IL_009b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00a0:  nop
-    IL_00a1:  ldloc.1
-    IL_00a2:  ldloc.0
-    IL_00a3:  ldstr      "[Field f5] [Type '{0}']"
-    IL_00a8:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_00ad:  ldarg.0
-    IL_00ae:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f5
-    IL_00b3:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_00b8:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_00bd:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_00c2:  nop
-    IL_00c3:  ldloc.1
-    IL_00c4:  ldarg.0
-    IL_00c5:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f5
-    IL_00ca:  ldarg.1
-    IL_00cb:  ldc.i4.1
-    IL_00cc:  add
-    IL_00cd:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_00d2:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00d7:  nop
-    IL_00d8:  ldloc.1
-    IL_00d9:  ldloc.0
-    IL_00da:  ldstr      "f6    = "
-    IL_00df:  ldarg.0
-    IL_00e0:  ldfld      char MCCTest.VTypeF::f6
-    IL_00e5:  box        [mscorlib]System.Char
-    IL_00ea:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_00ef:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_00f4:  nop
-    IL_00f5:  ldloc.1
-    IL_00f6:  ldloc.0
-    IL_00f7:  ldstr      "f7    = "
-    IL_00fc:  ldarg.0
-    IL_00fd:  ldfld      float32 MCCTest.VTypeF::f7
-    IL_0102:  box        [mscorlib]System.Single
-    IL_0107:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_010c:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0111:  nop
-    IL_0112:  ldloc.1
-    IL_0113:  ldloc.0
-    IL_0114:  ldstr      "[Field f8] [Type '{0}']"
-    IL_0119:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_011e:  ldarg.0
-    IL_011f:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f8
-    IL_0124:  box        MCCTest.VTypeE
-    IL_0129:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_012e:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0133:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0138:  nop
-    IL_0139:  ldloc.1
-    IL_013a:  ldarg.0
-    IL_013b:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f8
-    IL_0140:  ldarg.1
-    IL_0141:  ldc.i4.1
-    IL_0142:  add
-    IL_0143:  call       instance string MCCTest.VTypeE::Dump(int32)
-    IL_0148:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_014d:  nop
-    IL_014e:  ldloc.1
-    IL_014f:  ldloc.0
-    IL_0150:  ldstr      "[Field f9] [Type '{0}']"
-    IL_0155:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_015a:  ldarg.0
-    IL_015b:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f9
-    IL_0160:  box        MCCTest.VType8
-    IL_0165:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_016a:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_016f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0174:  nop
-    IL_0175:  ldloc.1
-    IL_0176:  ldarg.0
-    IL_0177:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f9
-    IL_017c:  ldarg.1
-    IL_017d:  ldc.i4.1
-    IL_017e:  add
-    IL_017f:  call       instance string MCCTest.VType8::Dump(int32)
-    IL_0184:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0189:  nop
-    IL_018a:  ldloc.1
-    IL_018b:  ldloc.0
-    IL_018c:  ldstr      "[Field f10] [Type '{0}']"
-    IL_0191:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0196:  ldarg.0
-    IL_0197:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f10
-    IL_019c:  box        MCCTest.VTypeE
-    IL_01a1:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_01a6:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_01ab:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_01b0:  nop
-    IL_01b1:  ldloc.1
-    IL_01b2:  ldarg.0
-    IL_01b3:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f10
-    IL_01b8:  ldarg.1
-    IL_01b9:  ldc.i4.1
-    IL_01ba:  add
-    IL_01bb:  call       instance string MCCTest.VTypeE::Dump(int32)
-    IL_01c0:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_01c5:  nop
-    IL_01c6:  ldloc.1
-    IL_01c7:  ldloc.0
-    IL_01c8:  ldstr      "[Field f11] [Type '{0}']"
-    IL_01cd:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_01d2:  ldarg.0
-    IL_01d3:  ldfld      valuetype MCCTest.VType3 MCCTest.VTypeF::f11
-    IL_01d8:  box        MCCTest.VType3
-    IL_01dd:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_01e2:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_01e7:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_01ec:  nop
-    IL_01ed:  ldloc.1
-    IL_01ee:  ldarg.0
-    IL_01ef:  ldflda     valuetype MCCTest.VType3 MCCTest.VTypeF::f11
-    IL_01f4:  ldarg.1
-    IL_01f5:  ldc.i4.1
-    IL_01f6:  add
-    IL_01f7:  call       instance string MCCTest.VType3::Dump(int32)
-    IL_01fc:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0201:  nop
-    IL_0202:  ldloc.1
-    IL_0203:  ldloc.0
-    IL_0204:  ldstr      "f12    = "
-    IL_0209:  ldarg.0
-    IL_020a:  ldfld      int64 MCCTest.VTypeF::f12
-    IL_020f:  box        [mscorlib]System.Int64
-    IL_0214:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_0219:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_021e:  nop
-    IL_021f:  ldloc.1
-    IL_0220:  ldloc.0
-    IL_0221:  ldstr      "f13    = "
-    IL_0226:  ldarg.0
-    IL_0227:  ldfld      uint16 MCCTest.VTypeF::f13
-    IL_022c:  box        [mscorlib]System.UInt16
-    IL_0231:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_0236:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_023b:  nop
-    IL_023c:  ldloc.1
-    IL_023d:  ldloc.0
-    IL_023e:  ldstr      "[Field f14] [Type '{0}']"
-    IL_0243:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0248:  ldarg.0
-    IL_0249:  ldfld      valuetype MCCTest.VType5 MCCTest.VTypeF::f14
-    IL_024e:  box        MCCTest.VType5
-    IL_0253:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0258:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_025d:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0262:  nop
-    IL_0263:  ldloc.1
-    IL_0264:  ldarg.0
-    IL_0265:  ldflda     valuetype MCCTest.VType5 MCCTest.VTypeF::f14
-    IL_026a:  ldarg.1
-    IL_026b:  ldc.i4.1
-    IL_026c:  add
-    IL_026d:  call       instance string MCCTest.VType5::Dump(int32)
-    IL_0272:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0277:  nop
-    IL_0278:  ldloc.1
-    IL_0279:  ldloc.0
-    IL_027a:  ldstr      "[Field f15] [Type '{0}']"
-    IL_027f:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0284:  ldarg.0
-    IL_0285:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f15
-    IL_028a:  box        MCCTest.VType9
-    IL_028f:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0294:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0299:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_029e:  nop
-    IL_029f:  ldloc.1
-    IL_02a0:  ldarg.0
-    IL_02a1:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f15
-    IL_02a6:  ldarg.1
-    IL_02a7:  ldc.i4.1
-    IL_02a8:  add
-    IL_02a9:  call       instance string MCCTest.VType9::Dump(int32)
-    IL_02ae:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_02b3:  nop
-    IL_02b4:  ldloc.1
-    IL_02b5:  ldloc.0
-    IL_02b6:  ldstr      "[Field f16] [Type '{0}']"
-    IL_02bb:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_02c0:  ldarg.0
-    IL_02c1:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f16
-    IL_02c6:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_02cb:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_02d0:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_02d5:  nop
-    IL_02d6:  ldloc.1
-    IL_02d7:  ldarg.0
-    IL_02d8:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f16
-    IL_02dd:  ldarg.1
-    IL_02de:  ldc.i4.1
-    IL_02df:  add
-    IL_02e0:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_02e5:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_02ea:  nop
-    IL_02eb:  ldloc.1
-    IL_02ec:  ldloc.0
-    IL_02ed:  ldstr      "[Field f17] [Type '{0}']"
-    IL_02f2:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_02f7:  ldarg.0
-    IL_02f8:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f17
-    IL_02fd:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0302:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0307:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_030c:  nop
-    IL_030d:  ldloc.1
-    IL_030e:  ldarg.0
-    IL_030f:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f17
-    IL_0314:  ldarg.1
-    IL_0315:  ldc.i4.1
-    IL_0316:  add
-    IL_0317:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_031c:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0321:  nop
-    IL_0322:  ldloc.1
-    IL_0323:  ldloc.0
-    IL_0324:  ldstr      "f18    = "
-    IL_0329:  ldarg.0
-    IL_032a:  ldfld      int32 MCCTest.VTypeF::f18
-    IL_032f:  box        [mscorlib]System.Int32
-    IL_0334:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_0339:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_033e:  nop
-    IL_033f:  ldloc.1
-    IL_0340:  ldloc.0
-    IL_0341:  ldstr      "[Field f19] [Type '{0}']"
-    IL_0346:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_034b:  ldarg.0
-    IL_034c:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f19
-    IL_0351:  box        MCCTest.VType8
-    IL_0356:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_035b:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0360:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0365:  nop
-    IL_0366:  ldloc.1
-    IL_0367:  ldarg.0
-    IL_0368:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f19
-    IL_036d:  ldarg.1
-    IL_036e:  ldc.i4.1
-    IL_036f:  add
-    IL_0370:  call       instance string MCCTest.VType8::Dump(int32)
-    IL_0375:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_037a:  nop
-    IL_037b:  ldloc.1
-    IL_037c:  ldloc.0
-    IL_037d:  ldstr      "[Field f20] [Type '{0}']"
-    IL_0382:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0387:  ldarg.0
-    IL_0388:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f20
-    IL_038d:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0392:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0397:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_039c:  nop
-    IL_039d:  ldloc.1
-    IL_039e:  ldarg.0
-    IL_039f:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f20
-    IL_03a4:  ldarg.1
-    IL_03a5:  ldc.i4.1
-    IL_03a6:  add
-    IL_03a7:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_03ac:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_03b1:  nop
-    IL_03b2:  ldloc.1
-    IL_03b3:  ldloc.0
-    IL_03b4:  ldstr      "[Field f21] [Type '{0}']"
-    IL_03b9:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_03be:  ldarg.0
-    IL_03bf:  ldfld      valuetype MCCTest.VTypeA MCCTest.VTypeF::f21
-    IL_03c4:  box        MCCTest.VTypeA
-    IL_03c9:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_03ce:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_03d3:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_03d8:  nop
-    IL_03d9:  ldloc.1
-    IL_03da:  ldarg.0
-    IL_03db:  ldflda     valuetype MCCTest.VTypeA MCCTest.VTypeF::f21
-    IL_03e0:  ldarg.1
-    IL_03e1:  ldc.i4.1
-    IL_03e2:  add
-    IL_03e3:  call       instance string MCCTest.VTypeA::Dump(int32)
-    IL_03e8:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_03ed:  nop
-    IL_03ee:  ldloc.1
-    IL_03ef:  ldloc.0
-    IL_03f0:  ldstr      "[Field f22] [Type '{0}']"
-    IL_03f5:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_03fa:  ldarg.0
-    IL_03fb:  ldfld      valuetype MCCTest.VTypeD MCCTest.VTypeF::f22
-    IL_0400:  box        MCCTest.VTypeD
-    IL_0405:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_040a:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_040f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0414:  nop
-    IL_0415:  ldloc.1
-    IL_0416:  ldarg.0
-    IL_0417:  ldflda     valuetype MCCTest.VTypeD MCCTest.VTypeF::f22
-    IL_041c:  ldarg.1
-    IL_041d:  ldc.i4.1
-    IL_041e:  add
-    IL_041f:  call       instance string MCCTest.VTypeD::Dump(int32)
-    IL_0424:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0429:  nop
-    IL_042a:  ldloc.1
-    IL_042b:  ldloc.0
-    IL_042c:  ldstr      "[Field f23] [Type '{0}']"
-    IL_0431:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0436:  ldarg.0
-    IL_0437:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f23
-    IL_043c:  box        MCCTest.VType9
-    IL_0441:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0446:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_044b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0450:  nop
-    IL_0451:  ldloc.1
-    IL_0452:  ldarg.0
-    IL_0453:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f23
-    IL_0458:  ldarg.1
-    IL_0459:  ldc.i4.1
-    IL_045a:  add
-    IL_045b:  call       instance string MCCTest.VType9::Dump(int32)
-    IL_0460:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0465:  nop
-    IL_0466:  ldloc.1
-    IL_0467:  ldloc.0
-    IL_0468:  ldstr      "[Field f24] [Type '{0}']"
-    IL_046d:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0472:  ldarg.0
-    IL_0473:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f24
-    IL_0478:  box        MCCTest.VTypeE
-    IL_047d:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0482:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0487:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_048c:  nop
-    IL_048d:  ldloc.1
-    IL_048e:  ldarg.0
-    IL_048f:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f24
-    IL_0494:  ldarg.1
-    IL_0495:  ldc.i4.1
-    IL_0496:  add
-    IL_0497:  call       instance string MCCTest.VTypeE::Dump(int32)
-    IL_049c:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_04a1:  nop
-    IL_04a2:  ldloc.1
-    IL_04a3:  ldloc.0
-    IL_04a4:  ldstr      "[Field f25] [Type '{0}']"
-    IL_04a9:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_04ae:  ldarg.0
-    IL_04af:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f25
-    IL_04b4:  callvirt   instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_04b9:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_04be:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_04c3:  nop
-    IL_04c4:  ldloc.1
-    IL_04c5:  ldarg.0
-    IL_04c6:  ldfld      class MCCTest.RType4 MCCTest.VTypeF::f25
-    IL_04cb:  ldarg.1
-    IL_04cc:  ldc.i4.1
-    IL_04cd:  add
-    IL_04ce:  callvirt   instance string MCCTest.RType4::Dump(int32)
-    IL_04d3:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_04d8:  nop
-    IL_04d9:  ldloc.1
-    IL_04da:  ldloc.0
-    IL_04db:  ldstr      "f26    = "
-    IL_04e0:  ldarg.0
-    IL_04e1:  ldfld      float32 MCCTest.VTypeF::f26
-    IL_04e6:  box        [mscorlib]System.Single
-    IL_04eb:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_04f0:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_04f5:  nop
-    IL_04f6:  ldloc.1
-    IL_04f7:  ldloc.0
-    IL_04f8:  ldstr      "f27    = "
-    IL_04fd:  ldarg.0
-    IL_04fe:  ldfld      uint8 MCCTest.VTypeF::f27
-    IL_0503:  box        [mscorlib]System.Byte
-    IL_0508:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_050d:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0512:  nop
-    IL_0513:  ldloc.1
-    IL_0514:  ldloc.0
-    IL_0515:  ldstr      "f28    = "
-    IL_051a:  ldarg.0
-    IL_051b:  ldfld      float64 MCCTest.VTypeF::f28
-    IL_0520:  box        [mscorlib]System.Double
-    IL_0525:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_052a:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_052f:  nop
-    IL_0530:  ldloc.1
-    IL_0531:  ldloc.0
-    IL_0532:  ldstr      "[Field f29] [Type '{0}']"
-    IL_0537:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_053c:  ldarg.0
-    IL_053d:  ldfld      valuetype MCCTest.VTypeC MCCTest.VTypeF::f29
-    IL_0542:  box        MCCTest.VTypeC
-    IL_0547:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_054c:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_0551:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0556:  nop
-    IL_0557:  ldloc.1
-    IL_0558:  ldarg.0
-    IL_0559:  ldflda     valuetype MCCTest.VTypeC MCCTest.VTypeF::f29
-    IL_055e:  ldarg.1
-    IL_055f:  ldc.i4.1
-    IL_0560:  add
-    IL_0561:  call       instance string MCCTest.VTypeC::Dump(int32)
-    IL_0566:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_056b:  nop
-    IL_056c:  ldloc.1
-    IL_056d:  ldloc.0
-    IL_056e:  ldstr      "[Field f30] [Type '{0}']"
-    IL_0573:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0578:  ldarg.0
-    IL_0579:  ldfld      valuetype MCCTest.VType6 MCCTest.VTypeF::f30
-    IL_057e:  box        MCCTest.VType6
-    IL_0583:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0588:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_058d:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0592:  nop
-    IL_0593:  ldloc.1
-    IL_0594:  ldarg.0
-    IL_0595:  ldflda     valuetype MCCTest.VType6 MCCTest.VTypeF::f30
-    IL_059a:  ldarg.1
-    IL_059b:  ldc.i4.1
-    IL_059c:  add
-    IL_059d:  call       instance string MCCTest.VType6::Dump(int32)
-    IL_05a2:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_05a7:  nop
-    IL_05a8:  ldloc.1
-    IL_05a9:  ldloc.0
-    IL_05aa:  ldstr      "f31    = "
-    IL_05af:  ldarg.0
-    IL_05b0:  ldfld      float64 MCCTest.VTypeF::f31
-    IL_05b5:  box        [mscorlib]System.Double
-    IL_05ba:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_05bf:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_05c4:  nop
-    IL_05c5:  ldloc.1
-    IL_05c6:  ldloc.0
-    IL_05c7:  ldstr      "[Field f32] [Type '{0}']"
-    IL_05cc:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_05d1:  ldarg.0
-    IL_05d2:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f32
-    IL_05d7:  box        MCCTest.VType8
-    IL_05dc:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_05e1:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_05e6:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_05eb:  nop
-    IL_05ec:  ldloc.1
-    IL_05ed:  ldarg.0
-    IL_05ee:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f32
-    IL_05f3:  ldarg.1
-    IL_05f4:  ldc.i4.1
-    IL_05f5:  add
-    IL_05f6:  call       instance string MCCTest.VType8::Dump(int32)
-    IL_05fb:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0600:  nop
-    IL_0601:  ldloc.1
-    IL_0602:  ldloc.0
-    IL_0603:  ldstr      "f33    = "
-    IL_0608:  ldarg.0
-    IL_0609:  ldfld      int8 MCCTest.VTypeF::f33
-    IL_060e:  box        [mscorlib]System.SByte
-    IL_0613:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_0618:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_061d:  nop
-    IL_061e:  ldloc.1
-    IL_061f:  ldloc.0
-    IL_0620:  ldstr      "[Field f34] [Type '{0}']"
-    IL_0625:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_062a:  ldarg.0
-    IL_062b:  ldfld      valuetype MCCTest.VTypeD MCCTest.VTypeF::f34
-    IL_0630:  box        MCCTest.VTypeD
-    IL_0635:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_063a:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_063f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0644:  nop
-    IL_0645:  ldloc.1
-    IL_0646:  ldarg.0
-    IL_0647:  ldflda     valuetype MCCTest.VTypeD MCCTest.VTypeF::f34
-    IL_064c:  ldarg.1
-    IL_064d:  ldc.i4.1
-    IL_064e:  add
-    IL_064f:  call       instance string MCCTest.VTypeD::Dump(int32)
-    IL_0654:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0659:  nop
-    IL_065a:  ldloc.1
-    IL_065b:  ldloc.0
-    IL_065c:  ldstr      "[Field f35] [Type '{0}']"
-    IL_0661:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0666:  ldarg.0
-    IL_0667:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f35
-    IL_066c:  box        MCCTest.VTypeE
-    IL_0671:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0676:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_067b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0680:  nop
-    IL_0681:  ldloc.1
-    IL_0682:  ldarg.0
-    IL_0683:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f35
-    IL_0688:  ldarg.1
-    IL_0689:  ldc.i4.1
-    IL_068a:  add
-    IL_068b:  call       instance string MCCTest.VTypeE::Dump(int32)
-    IL_0690:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0695:  nop
-    IL_0696:  ldloc.1
-    IL_0697:  ldloc.0
-    IL_0698:  ldstr      "[Field f36] [Type '{0}']"
-    IL_069d:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_06a2:  ldarg.0
-    IL_06a3:  ldfld      valuetype MCCTest.VTypeE MCCTest.VTypeF::f36
-    IL_06a8:  box        MCCTest.VTypeE
-    IL_06ad:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_06b2:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_06b7:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_06bc:  nop
-    IL_06bd:  ldloc.1
-    IL_06be:  ldarg.0
-    IL_06bf:  ldflda     valuetype MCCTest.VTypeE MCCTest.VTypeF::f36
-    IL_06c4:  ldarg.1
-    IL_06c5:  ldc.i4.1
-    IL_06c6:  add
-    IL_06c7:  call       instance string MCCTest.VTypeE::Dump(int32)
-    IL_06cc:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_06d1:  nop
-    IL_06d2:  ldloc.1
-    IL_06d3:  ldloc.0
-    IL_06d4:  ldstr      "[Field f37] [Type '{0}']"
-    IL_06d9:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_06de:  ldarg.0
-    IL_06df:  ldfld      valuetype MCCTest.VType7 MCCTest.VTypeF::f37
-    IL_06e4:  box        MCCTest.VType7
-    IL_06e9:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_06ee:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_06f3:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_06f8:  nop
-    IL_06f9:  ldloc.1
-    IL_06fa:  ldarg.0
-    IL_06fb:  ldflda     valuetype MCCTest.VType7 MCCTest.VTypeF::f37
-    IL_0700:  ldarg.1
-    IL_0701:  ldc.i4.1
-    IL_0702:  add
-    IL_0703:  call       instance string MCCTest.VType7::Dump(int32)
-    IL_0708:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_070d:  nop
-    IL_070e:  ldloc.1
-    IL_070f:  ldloc.0
-    IL_0710:  ldstr      "[Field f38] [Type '{0}']"
-    IL_0715:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_071a:  ldarg.0
-    IL_071b:  ldfld      valuetype MCCTest.VType9 MCCTest.VTypeF::f38
-    IL_0720:  box        MCCTest.VType9
-    IL_0725:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_072a:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_072f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0734:  nop
-    IL_0735:  ldloc.1
-    IL_0736:  ldarg.0
-    IL_0737:  ldflda     valuetype MCCTest.VType9 MCCTest.VTypeF::f38
-    IL_073c:  ldarg.1
-    IL_073d:  ldc.i4.1
-    IL_073e:  add
-    IL_073f:  call       instance string MCCTest.VType9::Dump(int32)
-    IL_0744:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0749:  nop
-    IL_074a:  ldloc.1
-    IL_074b:  ldloc.0
-    IL_074c:  ldstr      "[Field f39] [Type '{0}']"
-    IL_0751:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0756:  ldarg.0
-    IL_0757:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f39
-    IL_075c:  box        MCCTest.VType8
-    IL_0761:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0766:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_076b:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0770:  nop
-    IL_0771:  ldloc.1
-    IL_0772:  ldarg.0
-    IL_0773:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f39
-    IL_0778:  ldarg.1
-    IL_0779:  ldc.i4.1
-    IL_077a:  add
-    IL_077b:  call       instance string MCCTest.VType8::Dump(int32)
-    IL_0780:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0785:  nop
-    IL_0786:  ldloc.1
-    IL_0787:  ldloc.0
-    IL_0788:  ldstr      "f40    = "
-    IL_078d:  ldarg.0
-    IL_078e:  ldfld      float32 MCCTest.VTypeF::f40
-    IL_0793:  box        [mscorlib]System.Single
-    IL_0798:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_079d:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_07a2:  nop
-    IL_07a3:  ldloc.1
-    IL_07a4:  ldloc.0
-    IL_07a5:  ldstr      "f41    = "
-    IL_07aa:  ldarg.0
-    IL_07ab:  ldfld      int32 MCCTest.VTypeF::f41
-    IL_07b0:  box        [mscorlib]System.Int32
-    IL_07b5:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_07ba:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_07bf:  nop
-    IL_07c0:  ldloc.1
-    IL_07c1:  ldloc.0
-    IL_07c2:  ldstr      "f42    = "
-    IL_07c7:  ldarg.0
-    IL_07c8:  ldfld      char MCCTest.VTypeF::f42
-    IL_07cd:  box        [mscorlib]System.Char
-    IL_07d2:  call       string [mscorlib]System.String::Concat(object,
-                                                                object,
-                                                                object)
-    IL_07d7:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_07dc:  nop
-    IL_07dd:  ldloc.1
-    IL_07de:  ldloc.0
-    IL_07df:  ldstr      "[Field f43] [Type '{0}']"
-    IL_07e4:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_07e9:  ldarg.0
-    IL_07ea:  ldfld      valuetype MCCTest.VTypeB MCCTest.VTypeF::f43
-    IL_07ef:  box        MCCTest.VTypeB
-    IL_07f4:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_07f9:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_07fe:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_0803:  nop
-    IL_0804:  ldloc.1
-    IL_0805:  ldarg.0
-    IL_0806:  ldflda     valuetype MCCTest.VTypeB MCCTest.VTypeF::f43
-    IL_080b:  ldarg.1
-    IL_080c:  ldc.i4.1
-    IL_080d:  add
-    IL_080e:  call       instance string MCCTest.VTypeB::Dump(int32)
-    IL_0813:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0818:  nop
-    IL_0819:  ldloc.1
-    IL_081a:  ldloc.0
-    IL_081b:  ldstr      "[Field f44] [Type '{0}']"
-    IL_0820:  call       string [mscorlib]System.String::Concat(string,
-                                                                string)
-    IL_0825:  ldarg.0
-    IL_0826:  ldfld      valuetype MCCTest.VType8 MCCTest.VTypeF::f44
-    IL_082b:  box        MCCTest.VType8
-    IL_0830:  call       instance class [mscorlib]System.Type [mscorlib]System.Object::GetType()
-    IL_0835:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_083a:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string,
-                                                                                 object)
-    IL_083f:  nop
-    IL_0840:  ldloc.1
-    IL_0841:  ldarg.0
-    IL_0842:  ldflda     valuetype MCCTest.VType8 MCCTest.VTypeF::f44
-    IL_0847:  ldarg.1
-    IL_0848:  ldc.i4.1
-    IL_0849:  add
-    IL_084a:  call       instance string MCCTest.VType8::Dump(int32)
-    IL_084f:  callvirt   instance void [mscorlib]System.IO.TextWriter::WriteLine(string)
-    IL_0854:  nop
-    IL_0855:  ldloc.1
-    IL_0856:  callvirt   instance string [mscorlib]System.Object::ToString()
-    IL_085b:  stloc.2
-    IL_085c:  br.s       IL_085e
-
-    IL_085e:  ldloc.2
-    IL_085f:  ret
-  } // end of method VTypeF::Dump
-
-  .method public hidebysig instance string
-          Dump() cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (string V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldc.i4.0
-    IL_0003:  call       instance string MCCTest.VTypeF::Dump(int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method VTypeF::Dump
-
-} // end of class MCCTest.VTypeF
-
 .class public auto ansi beforefieldinit MCCTest.Common
        extends [mscorlib]System.Object
 {
@@ -23285,132 +16372,6 @@
     IL_000c:  ret
   } // end of method Common::CheckResult
 
-  .method public hidebysig static int32  CheckResult(valuetype MCCTest.VType9 actual,
-                                                     int32 count) cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (int32 V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldarg.1
-    IL_0003:  call       int32 class MCCTest.Common2`1<valuetype MCCTest.VType9>::CheckResult(!0,
-                                                                                              int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method Common::CheckResult
-
-  .method public hidebysig static int32  CheckResult(valuetype MCCTest.VTypeA actual,
-                                                     int32 count) cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (int32 V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldarg.1
-    IL_0003:  call       int32 class MCCTest.Common2`1<valuetype MCCTest.VTypeA>::CheckResult(!0,
-                                                                                              int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method Common::CheckResult
-
-  .method public hidebysig static int32  CheckResult(valuetype MCCTest.VTypeB actual,
-                                                     int32 count) cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (int32 V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldarg.1
-    IL_0003:  call       int32 class MCCTest.Common2`1<valuetype MCCTest.VTypeB>::CheckResult(!0,
-                                                                                              int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method Common::CheckResult
-
-  .method public hidebysig static int32  CheckResult(valuetype MCCTest.VTypeC actual,
-                                                     int32 count) cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (int32 V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldarg.1
-    IL_0003:  call       int32 class MCCTest.Common2`1<valuetype MCCTest.VTypeC>::CheckResult(!0,
-                                                                                              int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method Common::CheckResult
-
-  .method public hidebysig static int32  CheckResult(valuetype MCCTest.VTypeD actual,
-                                                     int32 count) cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (int32 V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldarg.1
-    IL_0003:  call       int32 class MCCTest.Common2`1<valuetype MCCTest.VTypeD>::CheckResult(!0,
-                                                                                              int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method Common::CheckResult
-
-  .method public hidebysig static int32  CheckResult(valuetype MCCTest.VTypeE actual,
-                                                     int32 count) cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (int32 V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldarg.1
-    IL_0003:  call       int32 class MCCTest.Common2`1<valuetype MCCTest.VTypeE>::CheckResult(!0,
-                                                                                              int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method Common::CheckResult
-
-  .method public hidebysig static int32  CheckResult(valuetype MCCTest.VTypeF actual,
-                                                     int32 count) cil managed
-  {
-    // Code size       13 (0xd)
-    .maxstack  2
-    .locals init (int32 V_0)
-    IL_0000:  nop
-    IL_0001:  ldarg.0
-    IL_0002:  ldarg.1
-    IL_0003:  call       int32 class MCCTest.Common2`1<valuetype MCCTest.VTypeF>::CheckResult(!0,
-                                                                                              int32)
-    IL_0008:  stloc.0
-    IL_0009:  br.s       IL_000b
-
-    IL_000b:  ldloc.0
-    IL_000c:  ret
-  } // end of method Common::CheckResult
-
   .method public hidebysig specialname rtspecialname
           instance void  .ctor() cil managed
   {
diff --git a/src/tests/JIT/jit64/opt/cse/HugeArray1.csproj b/src/tests/JIT/jit64/opt/cse/HugeArray1.csproj
index da12a89427f9..e6438b89c251 100644
--- a/src/tests/JIT/jit64/opt/cse/HugeArray1.csproj
+++ b/src/tests/JIT/jit64/opt/cse/HugeArray1.csproj
@@ -3,10 +3,6 @@
     <!-- Needed for GCStressIncompatible -->
     <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <GCStressIncompatible>true</GCStressIncompatible>
-
-    <!-- The test is too complex to compile on macOS where secondary threads have small stack size by default
-         and that is not enough for Roslyn, see https://github.com/dotnet/runtime/issues/87879 -->
-    <DisableProjectBuild Condition="'$(HostOS)' == 'osx'">true</DisableProjectBuild>
   </PropertyGroup>
   <PropertyGroup>
     <DebugType>Full</DebugType>
diff --git a/src/tests/JIT/jit64/opt/cse/hugeSimpleExpr1.csproj b/src/tests/JIT/jit64/opt/cse/hugeSimpleExpr1.csproj
index 7ae624af5725..b32d6d0b3059 100644
--- a/src/tests/JIT/jit64/opt/cse/hugeSimpleExpr1.csproj
+++ b/src/tests/JIT/jit64/opt/cse/hugeSimpleExpr1.csproj
@@ -4,10 +4,6 @@
     <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <!-- Timeout on Arm64 -->
     <GCStressIncompatible>true</GCStressIncompatible>
-
-    <!-- The test is too complex to compile on macOS where secondary threads have small stack size by default
-         and that is not enough for Roslyn, see https://github.com/dotnet/runtime/issues/87879 -->
-    <DisableProjectBuild Condition="'$(HostOS)' == 'osx'">true</DisableProjectBuild>
   </PropertyGroup>
   <PropertyGroup>
     <DebugType>Full</DebugType>
diff --git a/src/tests/JIT/jit64/opt/rngchk/RngchkStress2_o.csproj b/src/tests/JIT/jit64/opt/rngchk/RngchkStress2_o.csproj
index 88a3a3372846..24e01e4305a8 100644
--- a/src/tests/JIT/jit64/opt/rngchk/RngchkStress2_o.csproj
+++ b/src/tests/JIT/jit64/opt/rngchk/RngchkStress2_o.csproj
@@ -2,10 +2,6 @@
   <PropertyGroup>
     <DebugType>PdbOnly</DebugType>
     <Optimize>True</Optimize>
-
-    <!-- The test is too complex to compile on macOS where secondary threads have small stack size by default
-         and that is not enough for Roslyn, see https://github.com/dotnet/runtime/issues/87879 -->
-    <DisableProjectBuild Condition="'$(HostOS)' == 'osx'">true</DisableProjectBuild>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="RngchkStress2.cs" />
diff --git a/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs
new file mode 100644
index 000000000000..0d18e7bf5351
--- /dev/null
+++ b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs
@@ -0,0 +1,80 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+public unsafe class MemsetMemcpyNullref
+{
+    [Fact]
+    public static void MemsetMemcpyThrowNullRefonNull()
+    {
+        Assert.Throws<NullReferenceException>(() => MemoryInit(null));
+        Assert.Throws<NullReferenceException>(() => MemoryCopy(null, null));
+        Assert.Throws<NullReferenceException>(() =>
+            {
+                // Check when only src is null
+                HugeStruct hs = default;
+                MemoryCopy(&hs, null);
+            });
+        Assert.Throws<NullReferenceException>(() =>
+            {
+                // Check when only dst is null
+                HugeStruct hs = default;
+                MemoryCopy(null, &hs);
+            });
+
+        // Check various lengths
+        uint[] lengths = [1, 10, 100, 1000, 10000, 100000, 1000000];
+        foreach (uint length in lengths)
+        {
+            Assert.Throws<NullReferenceException>(() => MemoryInitByref(ref Unsafe.NullRef<byte>(), length));
+            Assert.Throws<NullReferenceException>(() => MemoryCopyByref(ref Unsafe.NullRef<byte>(), ref Unsafe.NullRef<byte>(), length));
+        }
+
+        // These APIs are not expected to fail/throw on zero length, even if pointers are not valid
+        byte valid = 0;
+        MemoryInitByref(ref Unsafe.NullRef<byte>(), 0);
+        MemoryCopyByref(ref Unsafe.NullRef<byte>(), ref valid, 0);
+        MemoryCopyByref(ref valid, ref Unsafe.NullRef<byte>(), 0);
+        MemoryCopyByref(ref Unsafe.NullRef<byte>(), ref Unsafe.NullRef<byte>(), 0);
+
+        byte valid2 = 0;
+        MemoryInitByrefZeroLen(ref valid);
+        MemoryInitByrefZeroLen(ref Unsafe.NullRef<byte>());
+        MemoryCopyByrefZeroLen(ref valid, ref valid2);
+        MemoryCopyByrefZeroLen(ref valid, ref Unsafe.NullRef<byte>());
+        MemoryCopyByrefZeroLen(ref Unsafe.NullRef<byte>(), ref valid2);
+        MemoryCopyByrefZeroLen(ref Unsafe.NullRef<byte>(), ref Unsafe.NullRef<byte>());
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static void MemoryCopy(HugeStruct* dst, HugeStruct* src) => 
+        *dst = *src;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static void MemoryCopyByref(ref byte dst, ref byte src, uint len) => 
+        Unsafe.CopyBlockUnaligned(ref dst, ref src, len);
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static void MemoryInit(HugeStruct* dst) => 
+        *dst = default;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static void MemoryInitByref(ref byte dst, uint len) => 
+        Unsafe.InitBlockUnaligned(ref dst, 42, len);
+
+    private struct HugeStruct
+    {
+        public fixed byte Data[20_000];
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static void MemoryCopyByrefZeroLen(ref byte dst, ref byte src) => 
+        Unsafe.CopyBlockUnaligned(ref dst, ref src, 0);
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static void MemoryInitByrefZeroLen(ref byte dst) => 
+        Unsafe.InitBlockUnaligned(ref dst, 42, 0);
+}
diff --git a/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.csproj b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.csproj
new file mode 100644
index 000000000000..23d7b90be536
--- /dev/null
+++ b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.csproj
@@ -0,0 +1,10 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs"/>
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il b/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il
deleted file mode 100644
index 068f11ad7b61..000000000000
--- a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il
+++ /dev/null
@@ -1,99 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-.assembly extern System.Runtime { .publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A ) }
-.assembly extern xunit.core {}
-.assembly extern System.Runtime.Extensions {}
-.assembly BufferMemmoveTailCall {
-    // Allow access to private members of System.Private.CoreLib
-    .custom instance void System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute::.ctor(string) = (
-        01 00 16 53 79 73 74 65 6d 2e 50 72 69 76 61 74
-        65 2e 43 6f 72 65 4c 69 62 00 00
-    )
-}
-
-.class public abstract auto ansi sealed beforefieldinit TailCallBufferMemmove
-       extends [System.Runtime]System.Object
-{
-  .method public hidebysig static int32 Main() cil managed
-  {
-    .custom instance void [xunit.core]Xunit.FactAttribute::.ctor() = (
-        01 00 00 00
-    )
-    .maxstack 8
-    .entrypoint
-
-    // C#:
-    // byte[] src = new byte[32];
-    // Test(ref src[0]);
-
-    ldc.i4.s 32
-    newarr [System.Runtime]System.Byte
-    ldc.i4.0
-    ldelema [System.Runtime]System.Byte
-    call void TailCallBufferMemmove::Test(uint8&)
-    
-    // return 100;
-    ldc.i4.s 100
-    ret
-  }
-
-  .method private hidebysig static void Test (uint8& src) cil managed noinlining 
-  {
-    .maxstack 3
-
-    // C#:
-    // byte* data = stackalloc byte[64]; // to trigger slow helper-based tail calls
-    // Buffer.Memmove(ref Unsafe.AsRef<byte>(data), ref src, 64);
-
-    ldc.i4.s 64
-    conv.u
-    localloc
-    call !!0& [System.Runtime]System.Runtime.CompilerServices.Unsafe::AsRef<uint8>(void*)
-    ldarg.0
-    ldc.i4.s 64
-    conv.i
-    tail. call void [System.Runtime]System.Buffer::Memmove(uint8&, uint8&, native uint)
-    ret
-  }
-}
-
-// C#:
-// namespace System.Runtime.CompilerServices
-// {
-//     public class IgnoresAccessChecksToAttribute : Attribute
-//     {
-//         public IgnoresAccessChecksToAttribute(string assemblyName)
-//         {
-//             AssemblyName = assemblyName;
-//         }
-//         public string AssemblyName { get; }
-//     }
-// }
-//
-.class public auto ansi beforefieldinit System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute
-    extends [System.Runtime]System.Attribute
-{
-  .field private initonly string '<AssemblyName>k__BackingField'
-  .method public hidebysig specialname rtspecialname instance void .ctor (string assemblyName) cil managed 
-  {
-    .maxstack 8
-    ldarg.0
-    call instance void [System.Runtime]System.Attribute::.ctor()
-    ldarg.0
-    ldarg.1
-    stfld string System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute::'<AssemblyName>k__BackingField'
-    ret
-  }
-  .method public hidebysig specialname instance string get_AssemblyName () cil managed 
-  {
-    .maxstack 8
-    ldarg.0
-    ldfld string System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute::'<AssemblyName>k__BackingField'
-    ret
-  }
-  .property instance string AssemblyName()
-  {
-    .get instance string System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute::get_AssemblyName()
-  }
-}
diff --git a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.ilproj b/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.ilproj
deleted file mode 100644
index 5fa250452852..000000000000
--- a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.ilproj
+++ /dev/null
@@ -1,8 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk.IL">
-  <PropertyGroup>
-    <Optimize>True</Optimize>
-  </PropertyGroup>
-  <ItemGroup>
-    <Compile Include="$(MSBuildProjectName).il" />
-  </ItemGroup>
-</Project>
diff --git a/src/tests/Loader/classloader/DefaultInterfaceMethods/constrainedcall/constrained2_brl.il b/src/tests/Loader/classloader/DefaultInterfaceMethods/constrainedcall/constrained2_brl.il
new file mode 100644
index 000000000000..9382ff66730c
--- /dev/null
+++ b/src/tests/Loader/classloader/DefaultInterfaceMethods/constrainedcall/constrained2_brl.il
@@ -0,0 +1,153 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.assembly extern System.Console { .publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A ) }
+.assembly extern System.Runtime { .publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A ) }
+.assembly extern Microsoft.DotNet.XUnitExtensions {}
+.assembly extern xunit.core {}
+.assembly constrained2_brl { }
+
+.class interface private auto ansi abstract
+  IAdder
+{
+  .method public hidebysig newslot virtual
+    instance int32 Add(int32) cil managed
+  {
+    ldstr "Calling DIM from ByRefLike type is invalid"
+    newobj instance void [System.Runtime]System.Exception::.ctor(string)
+    throw
+  }
+}
+
+.class private sequential ansi sealed beforefieldinit Adder
+  extends [System.Runtime]System.ValueType
+  implements IAdder
+{
+  .custom instance void [System.Runtime]System.Runtime.CompilerServices.IsByRefLikeAttribute::.ctor() = (
+      01 00 00 00
+  )
+
+  .field private int32 _field
+
+  .method public hidebysig specialname rtspecialname
+    instance void .ctor (int32) cil managed
+  {
+    ldarg.0
+    ldarg.1
+    stfld int32 Adder::_field
+    ret
+  }
+
+  .method public hidebysig newslot virtual
+    instance int32 Add(int32) cil managed
+  {
+    // Load field and add with argument
+    ldarg.0
+    dup
+    ldfld int32 valuetype Adder::_field
+    ldarg.1
+    add
+
+    // Update the field
+    stfld int32 valuetype Adder::_field
+
+    // Return the field value
+    ldarg.0
+    ldfld int32 valuetype Adder::_field
+    ret
+  }
+}
+
+.class private sequential ansi sealed beforefieldinit Adder_Invalid
+  extends [System.Runtime]System.ValueType
+  implements IAdder
+{
+  .custom instance void [System.Runtime]System.Runtime.CompilerServices.IsByRefLikeAttribute::.ctor() = (
+      01 00 00 00
+  )
+
+  .method public hidebysig specialname rtspecialname
+    instance void .ctor (int32) cil managed
+  {
+    ret
+  }
+
+  //
+  // Deferring to the DIM on IAdder
+  //
+}
+
+.method public hidebysig static int32 Check<byreflike (IAdder) T>(!!0, int32)
+{
+  ldarga.s 0
+  ldarg.1
+  constrained. !!0
+  callvirt instance int32 IAdder::Add(int32)
+  ret
+}
+
+.class public auto ansi abstract sealed beforefieldinit constrained2_brl
+    extends [System.Runtime]System.Object
+{
+  .method public hidebysig static int32 Main()
+  {
+      .custom instance void [xunit.core]Xunit.FactAttribute::.ctor() = (
+          01 00 00 00
+      )
+      .custom instance void [Microsoft.DotNet.XUnitExtensions]Xunit.SkipOnMonoAttribute::.ctor(string, valuetype [Microsoft.DotNet.XUnitExtensions]Xunit.TestPlatforms) = (
+        01 00 2c 4d 6f 6e 6f 20 64 6f 65 73 20 6e 6f 74
+        20 73 75 70 70 6f 72 74 20 42 79 52 65 66 4c 69
+        6b 65 20 67 65 6e 65 72 69 63 73 20 79 65 74 ff
+        ff ff ff 00 00
+      )
+      .entrypoint
+
+      .locals init (
+        valuetype Adder,
+        valuetype Adder_Invalid
+      )
+
+      // Initialize Adder instance
+      ldloca.s 0
+      ldc.i4 10
+      call instance void Adder::.ctor(int32)
+
+      ldstr "Validate constrained call of ByRefLike interface method passes"
+      call void [System.Console]System.Console::WriteLine(string)
+      ldloc.0
+      ldc.i4 20
+      call int32 Check<valuetype Adder>(!!0, int32)
+      ldc.i4 30
+      ceq
+      brfalse FAIL
+
+      // Initialize Adder_Invalid instance
+      ldloca.s 1
+      ldc.i4 10
+      call instance void Adder_Invalid::.ctor(int32)
+
+      .try
+      {
+        ldstr "Validate constrained call of ByRefLike interface DIM fails"
+        call void [System.Console]System.Console::WriteLine(string)
+
+        ldloc.1
+        ldc.i4 20
+        call int32 Check<valuetype Adder_Invalid>(!!0, int32)
+        leave FAIL
+      }
+      catch [System.Runtime]System.Exception
+      {
+        pop
+        leave ExpectedFailure
+      }
+
+  ExpectedFailure:
+      ldc.i4 100
+      ret
+
+  FAIL:
+      ldc.i4 101
+      ret
+  }
+}
diff --git a/src/tests/Loader/classloader/DefaultInterfaceMethods/constrainedcall/constrained2_brl.ilproj b/src/tests/Loader/classloader/DefaultInterfaceMethods/constrainedcall/constrained2_brl.ilproj
new file mode 100644
index 000000000000..8aceddffe8f5
--- /dev/null
+++ b/src/tests/Loader/classloader/DefaultInterfaceMethods/constrainedcall/constrained2_brl.ilproj
@@ -0,0 +1,5 @@
+<Project Sdk="Microsoft.NET.Sdk.IL">
+  <ItemGroup>
+    <Compile Include="constrained2_brl.il" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/Loader/classloader/generics/ByRefLike/GenericTypeSubstitution.cs b/src/tests/Loader/classloader/generics/ByRefLike/GenericTypeSubstitution.cs
index 3453b2a1d3e5..75b22b974792 100644
--- a/src/tests/Loader/classloader/generics/ByRefLike/GenericTypeSubstitution.cs
+++ b/src/tests/Loader/classloader/generics/ByRefLike/GenericTypeSubstitution.cs
@@ -12,32 +12,29 @@
 public class GenericTypeSubstitution
 {
     [Fact]
-    [SkipOnMono("Mono does not support ByRefLike generics yet")]
     public static void AllowByRefLike_Substituted_For_AllowByRefLike()
     {
         Console.WriteLine($"{nameof(AllowByRefLike_Substituted_For_AllowByRefLike)}...");
-        
+
         Console.WriteLine($" -- Instantiate: {Exec.TypeSubstitutionInterfaceImplementationAllowByRefLike()}");
         Console.WriteLine($" -- Instantiate: {Exec.TypeSubstitutionInheritanceAllowByRefLike()}");
         Console.WriteLine($" -- Instantiate: {Exec.TypeSubstitutionFieldAllowByRefLike()}");
     }
 
     [Fact]
-    [SkipOnMono("Mono does not support ByRefLike generics yet")]
     public static void NonByRefLike_Substituted_For_AllowByRefLike()
     {
+        Console.WriteLine($"{nameof(NonByRefLike_Substituted_For_AllowByRefLike)}...");
+
         Console.WriteLine($" -- Instantiate: {Exec.TypeSubstitutionInterfaceImplementationNonByRefLike()}");
         Console.WriteLine($" -- Instantiate: {Exec.TypeSubstitutionInheritanceNonByRefLike()}");
         Console.WriteLine($" -- Instantiate: {Exec.TypeSubstitutionFieldNonByRefLike()}");
     }
 
     [Fact]
-    [ActiveIssue("To be created", TestRuntimes.CoreCLR)]
-    [SkipOnMono("Mono does not support ByRefLike generics yet")]
-    public static void AllowByRefLike_Substituted_For_NonByRefLike_Invalid()
+    public static void AllowByRefLike_Substituted_For_NonByRefLike()
     {
-        Assert.Throws<TypeLoadException>(() => { Exec.TypeSubstitutionInterfaceImplementationAllowByRefLikeIntoNonByRefLike(); });
-        Assert.Throws<TypeLoadException>(() => { Exec.TypeSubstitutionInheritanceAllowByRefLikeIntoNonByRefLike(); });
-        Assert.Throws<TypeLoadException>(() => { Exec.TypeSubstitutionFieldAllowByRefLikeIntoNonByRefLike(); });
+        Console.WriteLine($"{nameof(AllowByRefLike_Substituted_For_NonByRefLike)}...");
+        Exec.TypeSubstitutionFieldAllowNonByRefLikeIntoNonByRefLike();
     }
 }
\ No newline at end of file
diff --git a/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharp.il b/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharp.il
index d81452d398af..72f0dc67f69e 100644
--- a/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharp.il
+++ b/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharp.il
@@ -344,9 +344,22 @@
         ret
     }
 
+    .field public static class InvalidCSharp.GenericClass_Over`1<!T> StaticField1
     .field public static !T StaticField
 }
 
+.class public sequential ansi sealed beforefieldinit InvalidCSharp.GenericClass_IndependentConstraints`2<byreflike T, (!T) U>
+    extends [System.Runtime]System.Object
+{
+    .method public hidebysig specialname rtspecialname
+        instance void .ctor () cil managed
+    {
+        ldarg.0
+        call instance void [System.Runtime]System.Object::.ctor()
+        ret
+    }
+}
+
 .class public sequential ansi sealed beforefieldinit ByRefLikeType
     extends [System.Runtime]System.ValueType
 {
@@ -355,6 +368,19 @@
     )
 }
 
+.class interface public auto ansi abstract InvalidCSharp.EmptyInterface
+{
+}
+
+.class public sequential ansi sealed beforefieldinit InvalidCSharp.ByRefLikeTypeWithInterface
+    extends [System.Runtime]System.ValueType
+    implements InvalidCSharp.EmptyInterface
+{
+    .custom instance void [System.Runtime]System.Runtime.CompilerServices.IsByRefLikeAttribute::.ctor() = (
+        01 00 00 00
+    )
+}
+
 .class public sequential ansi sealed beforefieldinit RegularValueType
     extends [System.Runtime]System.ValueType
 {
@@ -439,15 +465,6 @@
 }
 
 // Invalid generic substitution of non-allow-byreflike with allow-byreflike
-.class interface public auto ansi abstract InvalidCSharp.GenericDerivedInterface_Invalid`1<byreflike T>
-    implements class InvalidCSharp.GenericInterface_Invalid`1<!T>
-{
-}
-
-.class public sequential ansi sealed beforefieldinit InvalidCSharp.GenericDerivedClass_Invalid`1<byreflike T>
-    extends class InvalidCSharp.GenericClass_Invalid`1<!T>
-{
-}
 
 .class public sequential ansi sealed beforefieldinit InvalidCSharp.GenericValueTypeWrapper_Invalid`1<byreflike T>
     extends [System.Runtime]System.ValueType
@@ -525,6 +542,27 @@
         callvirt instance string [System.Runtime]System.Object::ToString()
         ret
     }
+    .method public hidebysig static
+        class [System.Runtime]System.Type GenericByRefLike_ConstraintsAreIndependent_Int32_Int32() cil managed
+    {
+        newobj instance void class InvalidCSharp.GenericClass_IndependentConstraints`2<int32, int32>::.ctor()
+        callvirt instance class [System.Runtime]System.Type [System.Runtime]System.Object::GetType()
+        ret
+    }
+    .method public hidebysig static
+        class [System.Runtime]System.Type GenericByRefLike_ConstraintsAreIndependent_Interface_ByRefLike_Invalid() cil managed
+    {
+        newobj instance void class InvalidCSharp.GenericClass_IndependentConstraints`2<class InvalidCSharp.EmptyInterface, valuetype InvalidCSharp.ByRefLikeTypeWithInterface>::.ctor()
+        callvirt instance class [System.Runtime]System.Type [System.Runtime]System.Object::GetType()
+        ret
+    }
+    .method public hidebysig static
+        class [System.Runtime]System.Type GenericByRefLike_ConstraintsAreIndependent_ByRefLike_ByRefLike_Invalid() cil managed
+    {
+        newobj instance void class InvalidCSharp.GenericClass_IndependentConstraints`2<valuetype InvalidCSharp.ByRefLikeTypeWithInterface, valuetype InvalidCSharp.ByRefLikeTypeWithInterface>::.ctor()
+        callvirt instance class [System.Runtime]System.Type [System.Runtime]System.Object::GetType()
+        ret
+    }
 
     .method public hidebysig static
         object BoxAsObject() cil managed
@@ -798,27 +836,9 @@
         callvirt instance string [System.Runtime]System.Object::ToString()
         ret
     }
-    
-    .method public hidebysig static
-        string TypeSubstitutionInterfaceImplementationAllowByRefLikeIntoNonByRefLike() cil managed
-    {
-        ldtoken class InvalidCSharp.GenericDerivedInterface_Invalid`1<valuetype RegularValueType>
-        call class [System.Runtime]System.Type [System.Runtime]System.Type::GetTypeFromHandle(valuetype [System.Runtime]System.RuntimeTypeHandle)
-        callvirt instance string [System.Runtime]System.Object::ToString()
-        ret
-    }
-
-    .method public hidebysig static
-        string TypeSubstitutionInheritanceAllowByRefLikeIntoNonByRefLike() cil managed
-    {
-        ldtoken class InvalidCSharp.GenericDerivedClass_Invalid`1<valuetype RegularValueType>
-        call class [System.Runtime]System.Type [System.Runtime]System.Type::GetTypeFromHandle(valuetype [System.Runtime]System.RuntimeTypeHandle)
-        callvirt instance string [System.Runtime]System.Object::ToString()
-        ret
-    }
 
     .method public hidebysig static
-        string TypeSubstitutionFieldAllowByRefLikeIntoNonByRefLike() cil managed
+        string TypeSubstitutionFieldAllowNonByRefLikeIntoNonByRefLike() cil managed
     {
         ldtoken valuetype InvalidCSharp.GenericValueTypeWrapper_Invalid`1<valuetype RegularValueType>
         call class [System.Runtime]System.Type [System.Runtime]System.Type::GetTypeFromHandle(valuetype [System.Runtime]System.RuntimeTypeHandle)
diff --git a/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharp.ilproj b/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharp.ilproj
index 58dc9527f804..8e167b20da08 100644
--- a/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharp.ilproj
+++ b/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharp.ilproj
@@ -2,7 +2,6 @@
   <PropertyGroup>
     <OutputType>library</OutputType>
     <MonoAotIncompatible>true</MonoAotIncompatible>
-    <DisableProjectBuild Condition="'$(RuntimeFlavor)' == 'Mono'">true</DisableProjectBuild>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="InvalidCSharp.il" />
diff --git a/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharpNegative.il b/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharpNegative.il
new file mode 100644
index 000000000000..08767e372ea4
--- /dev/null
+++ b/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharpNegative.il
@@ -0,0 +1,143 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.assembly extern System.Console { .publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A ) }
+.assembly extern System.Runtime { .publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A ) }
+
+.assembly InvalidCSharpNegative { }
+
+.class public sequential ansi sealed beforefieldinit ByRefLikeType
+    extends [System.Runtime]System.ValueType
+{
+    .custom instance void [System.Runtime]System.Runtime.CompilerServices.IsByRefLikeAttribute::.ctor() = (
+        01 00 00 00
+    )
+}
+
+//
+// Begin invalid
+//
+
+.class public sequential ansi sealed beforefieldinit InvalidCSharpNegative.GenericClass_Invalid`1<T>
+    extends [System.Runtime]System.Object
+{
+    .method public hidebysig specialname rtspecialname
+        instance void .ctor () cil managed
+    {
+        ldarg.0
+        call instance void [System.Runtime]System.Object::.ctor()
+        ret
+    }
+}
+
+.class interface public auto ansi abstract InvalidCSharpNegative.GenericInterface_Invalid`1<T>
+{
+}
+
+.class public sequential ansi sealed beforefieldinit InvalidCSharpNegative.GenericValueType_Invalid`1<T>
+    extends [System.Runtime]System.ValueType
+{
+}
+
+// Invalid generic substitution of non-allow-byreflike with allow-byreflike
+.class interface public auto ansi abstract InvalidCSharpNegative.GenericDerivedInterface_Invalid`1<byreflike T>
+    implements class InvalidCSharpNegative.GenericInterface_Invalid`1<!T>
+{
+}
+
+.class public sequential ansi sealed beforefieldinit InvalidCSharpNegative.GenericDerivedClass_Invalid`1<byreflike T>
+    extends class InvalidCSharpNegative.GenericClass_Invalid`1<!T>
+{
+}
+
+.class public sequential ansi sealed beforefieldinit InvalidCSharpNegative.GenericValueTypeWrapper_Invalid`1<byreflike T>
+    extends [System.Runtime]System.ValueType
+{
+    .field public valuetype InvalidCSharpNegative.GenericValueType_Invalid`1<!T> fld;
+}
+
+.class public auto ansi beforefieldinit InvalidCSharpNegative.BaseClassWithGenericMethod
+    extends [System.Runtime]System.Object
+{
+    .method public hidebysig newslot virtual
+        instance void AcceptsByRefLike<byreflike T> () cil managed
+    {
+        ret
+    }
+    .method public hidebysig specialname rtspecialname
+        instance void .ctor () cil managed
+    {
+        ldarg.0
+        call instance void [System.Runtime]System.Object::.ctor()
+        ret
+    }
+}
+
+.class public auto ansi beforefieldinit InvalidCSharpNegative.DerivedClassWithGenericMethod_Invalid
+    extends InvalidCSharpNegative.BaseClassWithGenericMethod
+{
+    .method public hidebysig static
+        class InvalidCSharpNegative.BaseClassWithGenericMethod Create () cil managed noinlining
+    {
+        newobj instance void InvalidCSharpNegative.DerivedClassWithGenericMethod_Invalid::.ctor()
+        ret
+    }
+    .method public hidebysig virtual
+        instance void AcceptsByRefLike<T> () cil managed // Missing constraint
+    {
+        ret
+    }
+    .method private hidebysig specialname rtspecialname
+        instance void .ctor () cil managed
+    {
+        ldarg.0
+        call instance void InvalidCSharpNegative.BaseClassWithGenericMethod::.ctor()
+        ret
+    }
+}
+
+// Entry points
+
+.class public auto ansi abstract sealed beforefieldinit Exec
+    extends [System.Runtime]System.Object
+{
+    .method public hidebysig static
+        string TypeSubstitutionInterfaceImplementationAllowByRefLikeIntoNonByRefLike() cil managed
+    {
+        ldtoken class InvalidCSharpNegative.GenericDerivedInterface_Invalid`1<valuetype ByRefLikeType>
+        call class [System.Runtime]System.Type [System.Runtime]System.Type::GetTypeFromHandle(valuetype [System.Runtime]System.RuntimeTypeHandle)
+        callvirt instance string [System.Runtime]System.Object::ToString()
+        ret
+    }
+
+    .method public hidebysig static
+        string TypeSubstitutionInheritanceAllowByRefLikeIntoNonByRefLike() cil managed
+    {
+        ldtoken class InvalidCSharpNegative.GenericDerivedClass_Invalid`1<valuetype ByRefLikeType>
+        call class [System.Runtime]System.Type [System.Runtime]System.Type::GetTypeFromHandle(valuetype [System.Runtime]System.RuntimeTypeHandle)
+        callvirt instance string [System.Runtime]System.Object::ToString()
+        ret
+    }
+
+    .method public hidebysig static
+        string TypeSubstitutionFieldAllowByRefLikeIntoNonByRefLike() cil managed
+    {
+        ldtoken valuetype InvalidCSharpNegative.GenericValueTypeWrapper_Invalid`1<valuetype ByRefLikeType>
+        call class [System.Runtime]System.Type [System.Runtime]System.Type::GetTypeFromHandle(valuetype [System.Runtime]System.RuntimeTypeHandle)
+        callvirt instance string [System.Runtime]System.Object::ToString()
+        ret
+    }
+
+    .method public hidebysig static
+        void OverrideMethodNotByRefLike() cil managed
+    {
+        .locals init (
+            [0] class InvalidCSharpNegative.BaseClassWithGenericMethod
+        )
+        call class InvalidCSharpNegative.BaseClassWithGenericMethod InvalidCSharpNegative.DerivedClassWithGenericMethod_Invalid::Create()
+        stloc.0
+        ldloc.0
+        callvirt instance void InvalidCSharpNegative.BaseClassWithGenericMethod::AcceptsByRefLike<valuetype ByRefLikeType>()
+        ret
+    }
+}
\ No newline at end of file
diff --git a/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharpNegative.ilproj b/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharpNegative.ilproj
new file mode 100644
index 000000000000..18c2a182f432
--- /dev/null
+++ b/src/tests/Loader/classloader/generics/ByRefLike/InvalidCSharpNegative.ilproj
@@ -0,0 +1,9 @@
+<Project Sdk="Microsoft.NET.Sdk.IL">
+  <PropertyGroup>
+    <OutputType>library</OutputType>
+    <MonoAotIncompatible>true</MonoAotIncompatible>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="InvalidCSharpNegative.il" />
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/src/tests/Loader/classloader/generics/ByRefLike/Validate.cs b/src/tests/Loader/classloader/generics/ByRefLike/Validate.cs
index 4d70945b4c74..50a7b42e06ed 100644
--- a/src/tests/Loader/classloader/generics/ByRefLike/Validate.cs
+++ b/src/tests/Loader/classloader/generics/ByRefLike/Validate.cs
@@ -12,7 +12,6 @@
 public class Validate
 {
     [Fact]
-    [SkipOnMono("Mono does not support ByRefLike generics yet")]
     public static void Validate_TypeLoad()
     {
         Console.WriteLine($"{nameof(Validate_TypeLoad)}...");
@@ -21,15 +20,17 @@ public static void Validate_TypeLoad()
         Console.WriteLine($" -- Instantiate: {Exec.GenericInterface()}");
         Console.WriteLine($" -- Instantiate: {Exec.GenericValueType()}");
         Console.WriteLine($" -- Instantiate: {Exec.GenericByRefLike()}");
+        Console.WriteLine($" -- Instantiate: {Exec.GenericByRefLike_ConstraintsAreIndependent_Int32_Int32()}");
 
         Assert.Throws<TypeLoadException>(() => { Exec.GenericClass_Invalid(); });
         Assert.Throws<TypeLoadException>(() => { Exec.GenericInterface_Invalid(); });
         Assert.Throws<TypeLoadException>(() => { Exec.GenericValueType_Invalid(); });
         Assert.Throws<TypeLoadException>(() => { Exec.GenericByRefLike_Invalid(); });
+        Assert.Throws<TypeLoadException>(() => { Exec.GenericByRefLike_ConstraintsAreIndependent_Interface_ByRefLike_Invalid(); });
+        Assert.Throws<TypeLoadException>(() => { Exec.GenericByRefLike_ConstraintsAreIndependent_ByRefLike_ByRefLike_Invalid(); });
     }
 
     [Fact]
-    [SkipOnMono("Mono does not support ByRefLike generics yet")]
     public static void Validate_Casting_Scenarios()
     {
         Console.WriteLine($"{nameof(Validate_Casting_Scenarios)}...");
@@ -44,7 +45,6 @@ public static void Validate_Casting_Scenarios()
     }
 
     [Fact]
-    [SkipOnMono("Mono does not support ByRefLike generics yet")]
     public static void Validate_RecognizedOpCodeSequences_Scenarios()
     {
         Console.WriteLine($"{nameof(Validate_RecognizedOpCodeSequences_Scenarios)}...");
@@ -56,7 +56,6 @@ public static void Validate_RecognizedOpCodeSequences_Scenarios()
     }
 
     [Fact]
-    [SkipOnMono("Mono does not support ByRefLike generics yet")]
     public static void Validate_InvalidOpCode_Scenarios()
     {
         Console.WriteLine($"{nameof(Validate_InvalidOpCode_Scenarios)}...");
@@ -73,7 +72,6 @@ public static void Validate_InvalidOpCode_Scenarios()
     }
 
     [Fact]
-    [SkipOnMono("Mono does not support ByRefLike generics yet")]
     public static void Validate_Inlining_Behavior()
     {
         Console.WriteLine($"{nameof(Validate_Inlining_Behavior)}...");
@@ -85,7 +83,6 @@ public static void Validate_Inlining_Behavior()
     }
 
     // [Fact]
-    [SkipOnMono("Mono does not support ByRefLike generics yet")]
     public static void Validate_MemberDiscoveryViaReflection_ForSpanReadOnlySpan()
     {
         Console.WriteLine($"{nameof(Validate_MemberDiscoveryViaReflection_ForSpanReadOnlySpan)}...");
diff --git a/src/tests/Loader/classloader/generics/ByRefLike/Validate.csproj b/src/tests/Loader/classloader/generics/ByRefLike/Validate.csproj
index 8bb4ee77df06..83580f549743 100644
--- a/src/tests/Loader/classloader/generics/ByRefLike/Validate.csproj
+++ b/src/tests/Loader/classloader/generics/ByRefLike/Validate.csproj
@@ -2,7 +2,6 @@
   <PropertyGroup>
     <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
     <RequiresProcessIsolation>true</RequiresProcessIsolation>
-    <DisableProjectBuild Condition="'$(RuntimeFlavor)' == 'Mono'">true</DisableProjectBuild>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="Validate.cs" />
diff --git a/src/tests/Loader/classloader/generics/ByRefLike/ValidateNegative.cs b/src/tests/Loader/classloader/generics/ByRefLike/ValidateNegative.cs
new file mode 100644
index 000000000000..78f091b14aca
--- /dev/null
+++ b/src/tests/Loader/classloader/generics/ByRefLike/ValidateNegative.cs
@@ -0,0 +1,32 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using InvalidCSharpNegative;
+
+using Xunit;
+
+public class ValidateNegative
+{
+    [Fact]
+    [SkipOnMono("https://github.com/dotnet/runtime/issues/99820")]
+    public static void AllowByRefLike_Substituted_For_NonByRefLike_Invalid()
+    {
+        Console.WriteLine($"{nameof(AllowByRefLike_Substituted_For_NonByRefLike_Invalid)}...");
+
+        Assert.Throws<TypeLoadException>(() => { Exec.TypeSubstitutionInterfaceImplementationAllowByRefLikeIntoNonByRefLike(); });
+        Assert.Throws<TypeLoadException>(() => { Exec.OverrideMethodNotByRefLike(); });
+    }
+
+    [Fact]
+    public static void AllowByRefLike_Substituted_For_NonByRefLike_Invalid_Class()
+    {
+        Console.WriteLine($"{nameof(AllowByRefLike_Substituted_For_NonByRefLike_Invalid_Class)}...");
+
+        Assert.Throws<TypeLoadException>(() => { Exec.TypeSubstitutionInheritanceAllowByRefLikeIntoNonByRefLike(); });
+        Assert.Throws<TypeLoadException>(() => { Exec.TypeSubstitutionFieldAllowByRefLikeIntoNonByRefLike(); });
+    }
+}
\ No newline at end of file
diff --git a/src/tests/Loader/classloader/generics/ByRefLike/ValidateNegative.csproj b/src/tests/Loader/classloader/generics/ByRefLike/ValidateNegative.csproj
new file mode 100644
index 000000000000..283b6f846aac
--- /dev/null
+++ b/src/tests/Loader/classloader/generics/ByRefLike/ValidateNegative.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+
+    <!-- Crossgen is not intended to serve as an IL verifier, and is not designed to provide error semantics on bad input. -->
+    <CrossGenTest>false</CrossGenTest>
+    <NativeAotIncompatible>true</NativeAotIncompatible>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="ValidateNegative.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="$(TestLibraryProjectPath)" />
+    <ProjectReference Include="InvalidCSharpNegative.ilproj" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/Loader/classloader/generics/Constraints/General/ManyGenConstraints.csproj b/src/tests/Loader/classloader/generics/Constraints/General/ManyGenConstraints.csproj
index 2c2218e82333..f709f4e1599d 100644
--- a/src/tests/Loader/classloader/generics/Constraints/General/ManyGenConstraints.csproj
+++ b/src/tests/Loader/classloader/generics/Constraints/General/ManyGenConstraints.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Constraints/Recursion/RecursiveConstraints.csproj b/src/tests/Loader/classloader/generics/Constraints/Recursion/RecursiveConstraints.csproj
index 8ef772178de7..81c7a60c0008 100644
--- a/src/tests/Loader/classloader/generics/Constraints/Recursion/RecursiveConstraints.csproj
+++ b/src/tests/Loader/classloader/generics/Constraints/Recursion/RecursiveConstraints.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Constraints/Regressions/532403/VSW532403.csproj b/src/tests/Loader/classloader/generics/Constraints/Regressions/532403/VSW532403.csproj
index bb6093d8a5d4..6664cae7db25 100644
--- a/src/tests/Loader/classloader/generics/Constraints/Regressions/532403/VSW532403.csproj
+++ b/src/tests/Loader/classloader/generics/Constraints/Regressions/532403/VSW532403.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Constraints/Regressions/ddb62403/bug62403.csproj b/src/tests/Loader/classloader/generics/Constraints/Regressions/ddb62403/bug62403.csproj
index 1ec607e4edb5..5b82efa29e5b 100644
--- a/src/tests/Loader/classloader/generics/Constraints/Regressions/ddb62403/bug62403.csproj
+++ b/src/tests/Loader/classloader/generics/Constraints/Regressions/ddb62403/bug62403.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Constraints/Regressions/dev10_512868/dev10_512868.csproj b/src/tests/Loader/classloader/generics/Constraints/Regressions/dev10_512868/dev10_512868.csproj
index c89d7e6e4560..2f4f60eeb1a0 100644
--- a/src/tests/Loader/classloader/generics/Constraints/Regressions/dev10_512868/dev10_512868.csproj
+++ b/src/tests/Loader/classloader/generics/Constraints/Regressions/dev10_512868/dev10_512868.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Constraints/Regressions/vsw609874/vsw609874.csproj b/src/tests/Loader/classloader/generics/Constraints/Regressions/vsw609874/vsw609874.csproj
index c89d7e6e4560..2f4f60eeb1a0 100644
--- a/src/tests/Loader/classloader/generics/Constraints/Regressions/vsw609874/vsw609874.csproj
+++ b/src/tests/Loader/classloader/generics/Constraints/Regressions/vsw609874/vsw609874.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/GenericParams10k.csproj b/src/tests/Loader/classloader/generics/GenericMethods/GenericParams10k.csproj
index 51d4fb253a98..3d73f2fb3604 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/GenericParams10k.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/GenericParams10k.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method001.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method001.csproj
index c086fca65513..8c6e7e770f27 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method001.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method001.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method001a.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method001a.csproj
index 208aa1488202..353ef8291587 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method001a.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method001a.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method001b.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method001b.csproj
index b3459767f194..8d954c69307d 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method001b.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method001b.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method001c.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method001c.csproj
index 3ed2b07ec980..46cf61f183c4 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method001c.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method001c.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method001d.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method001d.csproj
index 2935a603dbe0..aaa52c6b260e 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method001d.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method001d.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method001e.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method001e.csproj
index e847ee924785..8cf4c580059b 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method001e.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method001e.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method001f.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method001f.csproj
index 0f4a97142172..65db239d7dd3 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method001f.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method001f.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method001g.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method001g.csproj
index f4440218be1d..f94f3cdd8856 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method001g.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method001g.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method001h.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method001h.csproj
index dc938c874e05..2fe46ed16534 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method001h.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method001h.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method001i.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method001i.csproj
index 8d9298995aa0..94fe407edc09 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method001i.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method001i.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method001j.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method001j.csproj
index 33ffc1cfb16b..799b22d10e6e 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method001j.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method001j.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method002.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method002.csproj
index 946ea8a77bd7..63ea6aca07e2 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method002.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method002.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method003.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method003.csproj
index 594ee9fef54f..68706d3093d7 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method003.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method003.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method004.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method004.csproj
index 31cc0d663813..dd4a68827039 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method004.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method004.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method005.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method005.csproj
index e798714219bd..8ceb031c170d 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method005.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method005.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method006.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method006.csproj
index 38ac15719023..ef63274af903 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method006.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method006.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method007.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method007.csproj
index 596f8ef7936e..ab5eaeb17856 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method007.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method007.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method008.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method008.csproj
index 52d76c8672a0..a4302ec81323 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method008.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method008.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method009.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method009.csproj
index 1fc2fbdace9b..ace0d7581ee8 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method009.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method009.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method010.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method010.csproj
index d089902623bb..9f2f2652f775 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method010.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method010.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method011.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method011.csproj
index 47aca2de21a0..37ddfa18cd95 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method011.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method011.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method012.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method012.csproj
index a0392fefc948..45f3ca7355ec 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method012.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method012.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method013.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method013.csproj
index d0407574314c..13623712952b 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method013.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method013.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/GenericMethods/method015.csproj b/src/tests/Loader/classloader/generics/GenericMethods/method015.csproj
index db88efc837e7..3ca53e9624fb 100644
--- a/src/tests/Loader/classloader/generics/GenericMethods/method015.csproj
+++ b/src/tests/Loader/classloader/generics/GenericMethods/method015.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Nesting/NestedGenericClasses.csproj b/src/tests/Loader/classloader/generics/Instantiation/Nesting/NestedGenericClasses.csproj
index a7ddd26fbc31..44654df4fcf3 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Nesting/NestedGenericClasses.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Nesting/NestedGenericClasses.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
 
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Nesting/NestedGenericStructs.csproj b/src/tests/Loader/classloader/generics/Instantiation/Nesting/NestedGenericStructs.csproj
index 920394f99b1a..7c09762450ad 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Nesting/NestedGenericStructs.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Nesting/NestedGenericStructs.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
 
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Nesting/NestedGenericTypesMix.csproj b/src/tests/Loader/classloader/generics/Instantiation/Nesting/NestedGenericTypesMix.csproj
index 294ef3f76fb3..5ad4c6c9cb0f 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Nesting/NestedGenericTypesMix.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Nesting/NestedGenericTypesMix.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
 
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase01.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase01.csproj
index 7953775d28c4..4cbdc08bc28b 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase01.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase02.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase02.csproj
index 44868791735f..77db5ba6fd2f 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase02.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase03.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase03.csproj
index d023fae22bde..858af65a6347 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase03.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase04.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase04.csproj
index d703c95f2632..33f9e98808b9 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase04.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase04.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase05.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase05.csproj
index f78b5d77b021..df8a48076c4d 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase05.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase05.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase06.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase06.csproj
index 0f7844d9f890..6018995e1f42 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase06.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase06.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase07.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase07.csproj
index 0ead39c00064..29e2f8eaa616 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase07.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/AbstractBase07.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/CuriouslyRecurringThroughInterface.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/CuriouslyRecurringThroughInterface.csproj
index b8738ddef263..202822111865 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/CuriouslyRecurringThroughInterface.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/CuriouslyRecurringThroughInterface.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface01.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface01.csproj
index 5aacd58afb34..3235b20484f8 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface01.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface02.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface02.csproj
index 5f641abb827d..1b3e15868f4c 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface02.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface03.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface03.csproj
index 217f2d747e7d..ef8b8186572f 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface03.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface04.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface04.csproj
index f89c4168ba36..c69d34861eda 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface04.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface04.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface05.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface05.csproj
index 71180b18ba51..a4bdad4393d0 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface05.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface05.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface06.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface06.csproj
index b7a60876b7c3..6666677b8334 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface06.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface06.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface07.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface07.csproj
index 0799901fe56e..2da4e6e7098f 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface07.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface07.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface08.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface08.csproj
index 4590e691f35c..b616f030c417 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface08.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface08.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface09.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface09.csproj
index c9fe5dc81c0e..23e65cf68088 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface09.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface09.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface10.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface10.csproj
index e49a107e40b2..235457c8eb63 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface10.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface10.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface11.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface11.csproj
index 627b65d910f6..07d64728abfa 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface11.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface11.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface12.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface12.csproj
index f224bc5b9aa1..96f7f846a8ef 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface12.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface12.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface13.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface13.csproj
index 500d10f3d837..208cdb0d0da4 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface13.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface13.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface14.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface14.csproj
index 1177d418b8c2..2427ca71e4d5 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface14.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/MultipleInterface14.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass01.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass01.csproj
index 80c0e3ee0aed..79514ee1e58d 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass01.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass02.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass02.csproj
index 2253855939d5..1eaafc4cd9cf 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass02.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass03.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass03.csproj
index a41e18b387c7..28c26e00cb61 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass03.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass04.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass04.csproj
index 56539d789f86..47247dc30276 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass04.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass04.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass05.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass05.csproj
index 38fa6ed91454..b841ab36f657 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass05.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass05.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass06.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass06.csproj
index ff8025959d61..096d8c02fbcd 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass06.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedBaseClass06.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass01.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass01.csproj
index e0505e3bab36..6fd7311321c0 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass01.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass02.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass02.csproj
index 4856728202d9..c14be97dfbc8 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass02.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass03.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass03.csproj
index 79f0170e49cf..23b3afcada72 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass03.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass04.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass04.csproj
index 433129f2a025..a237ddd570a8 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass04.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedClass04.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface01.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface01.csproj
index f722bbdc62b9..75c20764a6e6 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface01.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface02.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface02.csproj
index 8a1cbd165dca..d27d4dda129a 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface02.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface03.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface03.csproj
index 16d21b4b1ce0..941b8626796e 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface03.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface04.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface04.csproj
index 77ca5dee7ba3..9f50aa1ec2ed 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface04.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface04.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface05.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface05.csproj
index 8365da354389..76d48f6347f1 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface05.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface05.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface06.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface06.csproj
index 312c46eb244e..fef6888c71b9 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface06.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface06.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface07.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface07.csproj
index 06833a98398d..11e0cc11c53e 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface07.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface07.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface08.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface08.csproj
index 88c8b97b9f61..c190e0fa2c11 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface08.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedInterface08.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct01.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct01.csproj
index 1d65664ed074..2ca68eedb077 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct01.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct02.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct02.csproj
index f0682ee6ba5d..b53421624c99 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct02.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct03.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct03.csproj
index 83322742d5df..c64edbba761c 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct03.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct04.csproj b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct04.csproj
index 1023dc4ad1c7..bebc8a72601e 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct04.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Positive/NestedStruct04.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Recursion/GenTypeItself.csproj b/src/tests/Loader/classloader/generics/Instantiation/Recursion/GenTypeItself.csproj
index eb9b5d0e7a75..224d11e391b2 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Recursion/GenTypeItself.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Recursion/GenTypeItself.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Recursion/RecursiveInheritance.csproj b/src/tests/Loader/classloader/generics/Instantiation/Recursion/RecursiveInheritance.csproj
index 39dff3d6cda7..8fa764fc977e 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Recursion/RecursiveInheritance.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Recursion/RecursiveInheritance.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Recursion/Struct_ImplementMscorlibGenInterface.csproj b/src/tests/Loader/classloader/generics/Instantiation/Recursion/Struct_ImplementMscorlibGenInterface.csproj
index 4dcab943f82e..01c2d760cae6 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Recursion/Struct_ImplementMscorlibGenInterface.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Recursion/Struct_ImplementMscorlibGenInterface.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Recursion/genrecur.csproj b/src/tests/Loader/classloader/generics/Instantiation/Recursion/genrecur.csproj
index 64a7b3698cce..107c26c652a3 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Recursion/genrecur.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Recursion/genrecur.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Instantiation/Regressions/607/DevDiv607.csproj b/src/tests/Loader/classloader/generics/Instantiation/Regressions/607/DevDiv607.csproj
index 68ac9315c5f1..c7a324171171 100644
--- a/src/tests/Loader/classloader/generics/Instantiation/Regressions/607/DevDiv607.csproj
+++ b/src/tests/Loader/classloader/generics/Instantiation/Regressions/607/DevDiv607.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base01a_auto.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base01a_auto.csproj
index f71a8589384c..204d3fd16ecd 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base01a_auto.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base01a_auto.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base01a_auto_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base01a_auto_ser.csproj
index 236f0c5a0c2f..09274bcb604f 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base01a_auto_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base01a_auto_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base01a_seq.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base01a_seq.csproj
index 21708c13e11f..cfff4171656b 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base01a_seq.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base01a_seq.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base01a_seq_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base01a_seq_ser.csproj
index 80c8c3ab1539..dd4818d11acb 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base01a_seq_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base01a_seq_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base01b_auto_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base01b_auto_ser.csproj
index 7053dbf796a3..9467efd77b7c 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base01b_auto_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base01b_auto_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base01b_seq.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base01b_seq.csproj
index bcc984bc693b..246ea44440cd 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base01b_seq.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base01b_seq.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base01b_seq_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base01b_seq_ser.csproj
index 09679c2945f5..e08549fd605b 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base01b_seq_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base01b_seq_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base01c_seq_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base01c_seq_ser.csproj
index 7c648520b044..8ce62a53a952 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base01c_seq_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base01c_seq_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base01d_seq_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base01d_seq_ser.csproj
index e5629b9344ad..00ab51dc7eba 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base01d_seq_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base01d_seq_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base02a_auto.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base02a_auto.csproj
index 658c26136d1f..beb7ef2508ea 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base02a_auto.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base02a_auto.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base02a_auto_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base02a_auto_ser.csproj
index 2b1eb72b0dc2..9f7f8e9e7e22 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base02a_auto_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base02a_auto_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base02a_seq.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base02a_seq.csproj
index 8f61162fea4a..46b747ef4b00 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base02a_seq.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base02a_seq.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base02a_seq_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base02a_seq_ser.csproj
index 129bff138463..e7c44258714c 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base02a_seq_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base02a_seq_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base02b_auto_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base02b_auto_ser.csproj
index 8b654da34466..056564682188 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base02b_auto_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base02b_auto_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base02b_seq.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base02b_seq.csproj
index a64634704507..b41214a92204 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base02b_seq.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base02b_seq.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base02b_seq_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base02b_seq_ser.csproj
index 28743feb79df..b420a6e7fc34 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base02b_seq_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base02b_seq_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base02c_seq_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base02c_seq_ser.csproj
index 85280329ae25..bfd61e00698a 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base02c_seq_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base02c_seq_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/Base02d_seq_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/Base02d_seq_ser.csproj
index 6188ac0434fd..dca149f16620 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/Base02d_seq_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/Base02d_seq_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/class01_auto.csproj b/src/tests/Loader/classloader/generics/Layout/General/class01_auto.csproj
index 27fb2a7d75b7..6cb5d604d28e 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/class01_auto.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/class01_auto.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/class01_auto_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/class01_auto_ser.csproj
index c0f2975e50dd..efa770d8fe55 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/class01_auto_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/class01_auto_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/class01_seq.csproj b/src/tests/Loader/classloader/generics/Layout/General/class01_seq.csproj
index 26d455097028..791d1cc3f97a 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/class01_seq.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/class01_seq.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/class01_seq_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/class01_seq_ser.csproj
index f4600fb5520d..0f83bd75b675 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/class01_seq_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/class01_seq_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/class02_auto.csproj b/src/tests/Loader/classloader/generics/Layout/General/class02_auto.csproj
index ddb7fcc232a2..3cde5f402700 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/class02_auto.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/class02_auto.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/class02_auto_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/class02_auto_ser.csproj
index ec4da9e4e46f..8657fee9557b 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/class02_auto_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/class02_auto_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/class02_seq_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/class02_seq_ser.csproj
index 8c86b4e73eab..bd3b01d693cc 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/class02_seq_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/class02_seq_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/struct01_auto.csproj b/src/tests/Loader/classloader/generics/Layout/General/struct01_auto.csproj
index c921dd4db4db..4c291dc73bdb 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/struct01_auto.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/struct01_auto.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/struct01_auto_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/struct01_auto_ser.csproj
index 6b1c34d6cd5a..117ea4e449cf 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/struct01_auto_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/struct01_auto_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/struct01_seq.csproj b/src/tests/Loader/classloader/generics/Layout/General/struct01_seq.csproj
index 8a768ed51ed0..80a550560735 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/struct01_seq.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/struct01_seq.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/struct01_seq_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/struct01_seq_ser.csproj
index accd25809e40..bbd5f24a1f4b 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/struct01_seq_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/struct01_seq_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/struct02_auto.csproj b/src/tests/Loader/classloader/generics/Layout/General/struct02_auto.csproj
index 5368dcca0a7f..e9be77270808 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/struct02_auto.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/struct02_auto.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/struct02_auto_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/struct02_auto_ser.csproj
index e1705b730c50..3651552e145a 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/struct02_auto_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/struct02_auto_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/struct02_seq.csproj b/src/tests/Loader/classloader/generics/Layout/General/struct02_seq.csproj
index e5a5388017cb..2e45e393089e 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/struct02_seq.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/struct02_seq.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/General/struct02_seq_ser.csproj b/src/tests/Loader/classloader/generics/Layout/General/struct02_seq_ser.csproj
index 1068af7de33b..0a28962614c3 100644
--- a/src/tests/Loader/classloader/generics/Layout/General/struct02_seq_ser.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/General/struct02_seq_ser.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/Specific/Negative002.csproj b/src/tests/Loader/classloader/generics/Layout/Specific/Negative002.csproj
index ed5c57779ed0..97df6ae55732 100644
--- a/src/tests/Loader/classloader/generics/Layout/Specific/Negative002.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/Specific/Negative002.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/Specific/Negative004.csproj b/src/tests/Loader/classloader/generics/Layout/Specific/Negative004.csproj
index fbde1300d22b..edd37b233aaf 100644
--- a/src/tests/Loader/classloader/generics/Layout/Specific/Negative004.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/Specific/Negative004.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/Specific/Negative_ExplicitGen.csproj b/src/tests/Loader/classloader/generics/Layout/Specific/Negative_ExplicitGen.csproj
index fa73278f556b..c62c68dc72e4 100644
--- a/src/tests/Loader/classloader/generics/Layout/Specific/Negative_ExplicitGen.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/Specific/Negative_ExplicitGen.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/Specific/Positive007.csproj b/src/tests/Loader/classloader/generics/Layout/Specific/Positive007.csproj
index 77a3a6e874cd..888927fb3006 100644
--- a/src/tests/Loader/classloader/generics/Layout/Specific/Positive007.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/Specific/Positive007.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/Specific/Positive008.csproj b/src/tests/Loader/classloader/generics/Layout/Specific/Positive008.csproj
index 9484055f53d9..d6c507aed9c2 100644
--- a/src/tests/Loader/classloader/generics/Layout/Specific/Positive008.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/Specific/Positive008.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/Specific/Positive009.csproj b/src/tests/Loader/classloader/generics/Layout/Specific/Positive009.csproj
index b8e543b9c252..c8450de3991b 100644
--- a/src/tests/Loader/classloader/generics/Layout/Specific/Positive009.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/Specific/Positive009.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/Specific/Positive010.csproj b/src/tests/Loader/classloader/generics/Layout/Specific/Positive010.csproj
index 4f4fc2cdd1ae..17e2563ff492 100644
--- a/src/tests/Loader/classloader/generics/Layout/Specific/Positive010.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/Specific/Positive010.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Layout/Specific/SelfRecursiveGenerics.csproj b/src/tests/Loader/classloader/generics/Layout/Specific/SelfRecursiveGenerics.csproj
index 534993d1721c..d1251938c825 100644
--- a/src/tests/Loader/classloader/generics/Layout/Specific/SelfRecursiveGenerics.csproj
+++ b/src/tests/Loader/classloader/generics/Layout/Specific/SelfRecursiveGenerics.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>0</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Misc/ConstraintsAndInheritance.csproj b/src/tests/Loader/classloader/generics/Misc/ConstraintsAndInheritance.csproj
index fa916aa744c2..29ac08340c5a 100644
--- a/src/tests/Loader/classloader/generics/Misc/ConstraintsAndInheritance.csproj
+++ b/src/tests/Loader/classloader/generics/Misc/ConstraintsAndInheritance.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Misc/TestWithManyParams.csproj b/src/tests/Loader/classloader/generics/Misc/TestWithManyParams.csproj
index f977c77ae727..0ac36527b8ef 100644
--- a/src/tests/Loader/classloader/generics/Misc/TestWithManyParams.csproj
+++ b/src/tests/Loader/classloader/generics/Misc/TestWithManyParams.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Pointers/Pointers.cs b/src/tests/Loader/classloader/generics/Pointers/Pointers.cs
index f8c1e6116dbf..8017887d710c 100644
--- a/src/tests/Loader/classloader/generics/Pointers/Pointers.cs
+++ b/src/tests/Loader/classloader/generics/Pointers/Pointers.cs
@@ -106,18 +106,20 @@ public static void PointerArray()
     [MethodImpl(MethodImplOptions.NoInlining)]
     private static void PointerArrayImpl()
     {
-        int*[] intPtrArray = new int*[5];
-        Span<int> intSpan = stackalloc int[intPtrArray.Length];
-        int* intArray = (int*)Unsafe.AsPointer(ref intSpan.GetPinnableReference());
+        int intAllocationSize = 5;
+        int*[] intPtrArray = new int*[intAllocationSize];
+        int* intArray = stackalloc int[intAllocationSize];
+        Span<int> intSpan = new Span<int>(intArray, intAllocationSize);
         for (int i = 0; i < intPtrArray.Length; i++)
         {
             intArray[i] = i;
             intPtrArray[i] = &intArray[i];
         }
 
-        Struct*[] structPtrArray = new Struct*[5];
-        Span<Struct> structSpan = stackalloc Struct[structPtrArray.Length];
-        Struct* structArray = (Struct*)Unsafe.AsPointer(ref structSpan.GetPinnableReference());
+        int structAllocationSize = 5;
+        Struct*[] structPtrArray = new Struct*[structAllocationSize];
+        Struct* structArray = stackalloc Struct[structAllocationSize];
+        Span<Struct> structSpan = new Span<Struct>(structArray, structAllocationSize);
         for (int i = 0; i < structPtrArray.Length; i++)
         {
             structArray[i] = new Struct() { Num = i };
diff --git a/src/tests/Loader/classloader/generics/Pointers/Pointers.csproj b/src/tests/Loader/classloader/generics/Pointers/Pointers.csproj
index f714a84b8e65..0bf583423126 100644
--- a/src/tests/Loader/classloader/generics/Pointers/Pointers.csproj
+++ b/src/tests/Loader/classloader/generics/Pointers/Pointers.csproj
@@ -1,8 +1,6 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
 
diff --git a/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem1.csproj b/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem1.csproj
index 89c67c49ea5c..de1869708b20 100644
--- a/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem1.csproj
+++ b/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem1.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem2.csproj b/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem2.csproj
index 06043326ab1c..04714616c883 100644
--- a/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem2.csproj
+++ b/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem2.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem3.csproj b/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem3.csproj
index f6c66ab50d53..bd14fba4b87f 100644
--- a/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem3.csproj
+++ b/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem3.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem4.csproj b/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem4.csproj
index f120e1fbf142..5bb202cbd850 100644
--- a/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem4.csproj
+++ b/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem4.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem5.csproj b/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem5.csproj
index 7953d5354383..38b846416b69 100644
--- a/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem5.csproj
+++ b/src/tests/Loader/classloader/generics/Statics/Regressions/524571/StaticsProblem5.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtual.csproj b/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtual.csproj
index 927edec7229e..fcb1d07d3b56 100644
--- a/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtual.csproj
+++ b/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtual.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtualNewslot.csproj b/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtualNewslot.csproj
index 00ed992dbae5..4e50ecc86e24 100644
--- a/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtualNewslot.csproj
+++ b/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtualNewslot.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtualNewslot_Interface.csproj b/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtualNewslot_Interface.csproj
index bb5deb5849d5..35471d30610a 100644
--- a/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtualNewslot_Interface.csproj
+++ b/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtualNewslot_Interface.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtual_Interface.csproj b/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtual_Interface.csproj
index 653d9bcd466c..91d44c41ebe0 100644
--- a/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtual_Interface.csproj
+++ b/src/tests/Loader/classloader/generics/VSD/Class2_ImplicitOverrideVirtual_Interface.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/VSD/Class_ExplicitOverrideVirtualNewslotFinal.csproj b/src/tests/Loader/classloader/generics/VSD/Class_ExplicitOverrideVirtualNewslotFinal.csproj
index 7c29a4aa4e68..46efc6f1c73b 100644
--- a/src/tests/Loader/classloader/generics/VSD/Class_ExplicitOverrideVirtualNewslotFinal.csproj
+++ b/src/tests/Loader/classloader/generics/VSD/Class_ExplicitOverrideVirtualNewslotFinal.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/VSD/Class_ImplicitOverrideVirtualNewslot.csproj b/src/tests/Loader/classloader/generics/VSD/Class_ImplicitOverrideVirtualNewslot.csproj
index 21d3b96d83b0..be2ba6e04200 100644
--- a/src/tests/Loader/classloader/generics/VSD/Class_ImplicitOverrideVirtualNewslot.csproj
+++ b/src/tests/Loader/classloader/generics/VSD/Class_ImplicitOverrideVirtualNewslot.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/VSD/Class_ImplicitOverrideVirtualNewslotFinal.csproj b/src/tests/Loader/classloader/generics/VSD/Class_ImplicitOverrideVirtualNewslotFinal.csproj
index fc51293fef8e..edc6f8e0ffb0 100644
--- a/src/tests/Loader/classloader/generics/VSD/Class_ImplicitOverrideVirtualNewslotFinal.csproj
+++ b/src/tests/Loader/classloader/generics/VSD/Class_ImplicitOverrideVirtualNewslotFinal.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/VSD/Struct_ExplicitOverrideVirtualNewslotFinal.csproj b/src/tests/Loader/classloader/generics/VSD/Struct_ExplicitOverrideVirtualNewslotFinal.csproj
index 22f5bf97f87a..2ce960724e70 100644
--- a/src/tests/Loader/classloader/generics/VSD/Struct_ExplicitOverrideVirtualNewslotFinal.csproj
+++ b/src/tests/Loader/classloader/generics/VSD/Struct_ExplicitOverrideVirtualNewslotFinal.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/VSD/Struct_ImplicitOverrideVirtualNewslotFinal.csproj b/src/tests/Loader/classloader/generics/VSD/Struct_ImplicitOverrideVirtualNewslotFinal.csproj
index 373e90e1d910..bf2b46b85e27 100644
--- a/src/tests/Loader/classloader/generics/VSD/Struct_ImplicitOverrideVirtualNewslotFinal.csproj
+++ b/src/tests/Loader/classloader/generics/VSD/Struct_ImplicitOverrideVirtualNewslotFinal.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/Delegates/Delegates001.csproj b/src/tests/Loader/classloader/generics/Variance/Delegates/Delegates001.csproj
index 7c828839bb0f..fbb9de4d9aa9 100644
--- a/src/tests/Loader/classloader/generics/Variance/Delegates/Delegates001.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/Delegates/Delegates001.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/Delegates/Delegates002.csproj b/src/tests/Loader/classloader/generics/Variance/Delegates/Delegates002.csproj
index 075ae95c28ac..b5819a5b1884 100644
--- a/src/tests/Loader/classloader/generics/Variance/Delegates/Delegates002.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/Delegates/Delegates002.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/CastClass001.csproj b/src/tests/Loader/classloader/generics/Variance/IL/CastClass001.csproj
index 3d5e1e285cc3..87641b5647de 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/CastClass001.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/CastClass001.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/CastClass004.csproj b/src/tests/Loader/classloader/generics/Variance/IL/CastClass004.csproj
index eae11787d012..14b65b490048 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/CastClass004.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/CastClass004.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/InterfaceInheritanceTest2.csproj b/src/tests/Loader/classloader/generics/Variance/IL/InterfaceInheritanceTest2.csproj
index 5fe3d08721c6..4437a426d791 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/InterfaceInheritanceTest2.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/InterfaceInheritanceTest2.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/IsInst001.csproj b/src/tests/Loader/classloader/generics/Variance/IL/IsInst001.csproj
index 60f157828385..7b887d6c07e3 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/IsInst001.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/IsInst001.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/IsInst002.csproj b/src/tests/Loader/classloader/generics/Variance/IL/IsInst002.csproj
index b2b511251dd3..d0fcfde1e469 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/IsInst002.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/IsInst002.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/IsInst003.csproj b/src/tests/Loader/classloader/generics/Variance/IL/IsInst003.csproj
index c25f6e207ec5..179d0709d013 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/IsInst003.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/IsInst003.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/IsInst004.csproj b/src/tests/Loader/classloader/generics/Variance/IL/IsInst004.csproj
index 6e01b63cea40..bae859676a8f 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/IsInst004.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/IsInst004.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/IsInst005.csproj b/src/tests/Loader/classloader/generics/Variance/IL/IsInst005.csproj
index 1d9374d412e4..c3b300ad9bb9 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/IsInst005.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/IsInst005.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/IsInst006.csproj b/src/tests/Loader/classloader/generics/Variance/IL/IsInst006.csproj
index acd897ba30cf..e67cdd20a83b 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/IsInst006.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/IsInst006.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/Unbox001.csproj b/src/tests/Loader/classloader/generics/Variance/IL/Unbox001.csproj
index b915f6cf8b7f..445d228d29bb 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/Unbox001.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/Unbox001.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/Unbox002.csproj b/src/tests/Loader/classloader/generics/Variance/IL/Unbox002.csproj
index fd2f47175602..e5e4a235020a 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/Unbox002.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/Unbox002.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/Unbox003.csproj b/src/tests/Loader/classloader/generics/Variance/IL/Unbox003.csproj
index 4c4a57471417..4ebcdb2b9a05 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/Unbox003.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/Unbox003.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/Unbox004.csproj b/src/tests/Loader/classloader/generics/Variance/IL/Unbox004.csproj
index 4e9f11eb2d0e..956f87a80926 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/Unbox004.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/Unbox004.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/Unbox005.csproj b/src/tests/Loader/classloader/generics/Variance/IL/Unbox005.csproj
index 909d56d8e64c..3a39d2d4d46a 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/Unbox005.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/Unbox005.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/Unbox006.csproj b/src/tests/Loader/classloader/generics/Variance/IL/Unbox006.csproj
index 64ac48d5ab05..c289327fd7f0 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/Unbox006.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/Unbox006.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/IL/vsw543506.csproj b/src/tests/Loader/classloader/generics/Variance/IL/vsw543506.csproj
index 4a556ec3ab03..81615c31ccf0 100644
--- a/src/tests/Loader/classloader/generics/Variance/IL/vsw543506.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/IL/vsw543506.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/Interfaces/Interfaces001.csproj b/src/tests/Loader/classloader/generics/Variance/Interfaces/Interfaces001.csproj
index 8c4092f2da36..f5efc4b19eb5 100644
--- a/src/tests/Loader/classloader/generics/Variance/Interfaces/Interfaces001.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/Interfaces/Interfaces001.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/Variance/Interfaces/Interfaces002.csproj b/src/tests/Loader/classloader/generics/Variance/Interfaces/Interfaces002.csproj
index 8f64f3f405c3..a7967c679df9 100644
--- a/src/tests/Loader/classloader/generics/Variance/Interfaces/Interfaces002.csproj
+++ b/src/tests/Loader/classloader/generics/Variance/Interfaces/Interfaces002.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/123712/repro123712.csproj b/src/tests/Loader/classloader/generics/regressions/123712/repro123712.csproj
index 0e921c58fd8f..6acdca1f5a8b 100644
--- a/src/tests/Loader/classloader/generics/regressions/123712/repro123712.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/123712/repro123712.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/137310/test137310.csproj b/src/tests/Loader/classloader/generics/regressions/137310/test137310.csproj
index 340752ba0e8d..8f74245660c0 100644
--- a/src/tests/Loader/classloader/generics/regressions/137310/test137310.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/137310/test137310.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/188892/test188892.csproj b/src/tests/Loader/classloader/generics/regressions/188892/test188892.csproj
index c02a6c475f5c..c217ccc8e011 100644
--- a/src/tests/Loader/classloader/generics/regressions/188892/test188892.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/188892/test188892.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/334376/b334376.csproj b/src/tests/Loader/classloader/generics/regressions/334376/b334376.csproj
index 567b1d271e84..7d58542d2ee0 100644
--- a/src/tests/Loader/classloader/generics/regressions/334376/b334376.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/334376/b334376.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/341477/Test_341477.csproj b/src/tests/Loader/classloader/generics/regressions/341477/Test_341477.csproj
index 572bdbfbd0a4..3615b165ffe9 100644
--- a/src/tests/Loader/classloader/generics/regressions/341477/Test_341477.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/341477/Test_341477.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/433497/vsw433497.csproj b/src/tests/Loader/classloader/generics/regressions/433497/vsw433497.csproj
index ca517f19ffd6..7f41c293dacf 100644
--- a/src/tests/Loader/classloader/generics/regressions/433497/vsw433497.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/433497/vsw433497.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/448208/b448208.csproj b/src/tests/Loader/classloader/generics/regressions/448208/b448208.csproj
index e15bdd6f09d2..803a892c7c8e 100644
--- a/src/tests/Loader/classloader/generics/regressions/448208/b448208.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/448208/b448208.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/515341/vsw515341.csproj b/src/tests/Loader/classloader/generics/regressions/515341/vsw515341.csproj
index 0d6e754390e7..de97b0135d5e 100644
--- a/src/tests/Loader/classloader/generics/regressions/515341/vsw515341.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/515341/vsw515341.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/536564/vsw536564.csproj b/src/tests/Loader/classloader/generics/regressions/536564/vsw536564.csproj
index 2e8801229a09..140fc6b89d3a 100644
--- a/src/tests/Loader/classloader/generics/regressions/536564/vsw536564.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/536564/vsw536564.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/DD117522/Test.csproj b/src/tests/Loader/classloader/generics/regressions/DD117522/Test.csproj
index 572bdbfbd0a4..3615b165ffe9 100644
--- a/src/tests/Loader/classloader/generics/regressions/DD117522/Test.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/DD117522/Test.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/dd95372/dd95372.csproj b/src/tests/Loader/classloader/generics/regressions/dd95372/dd95372.csproj
index c89d7e6e4560..2f4f60eeb1a0 100644
--- a/src/tests/Loader/classloader/generics/regressions/dd95372/dd95372.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/dd95372/dd95372.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/ddb3422/ddb3422.csproj b/src/tests/Loader/classloader/generics/regressions/ddb3422/ddb3422.csproj
index c89d7e6e4560..2f4f60eeb1a0 100644
--- a/src/tests/Loader/classloader/generics/regressions/ddb3422/ddb3422.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/ddb3422/ddb3422.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/vsw237932/repro237932.csproj b/src/tests/Loader/classloader/generics/regressions/vsw237932/repro237932.csproj
index 508fc4707cda..6c99efefe97a 100644
--- a/src/tests/Loader/classloader/generics/regressions/vsw237932/repro237932.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/vsw237932/repro237932.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/vsw395780/testExplicitOverride.csproj b/src/tests/Loader/classloader/generics/regressions/vsw395780/testExplicitOverride.csproj
index db199133e75a..86efb7f4314a 100644
--- a/src/tests/Loader/classloader/generics/regressions/vsw395780/testExplicitOverride.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/vsw395780/testExplicitOverride.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/vsw514968/vsw514968.csproj b/src/tests/Loader/classloader/generics/regressions/vsw514968/vsw514968.csproj
index c89d7e6e4560..2f4f60eeb1a0 100644
--- a/src/tests/Loader/classloader/generics/regressions/vsw514968/vsw514968.csproj
+++ b/src/tests/Loader/classloader/generics/regressions/vsw514968/vsw514968.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/generics/regressions/vsw524571/StaticsProblem5.csproj b/src/tests/Loader/classloader/generics/regressions/vsw524571/StaticsProblem5.csproj
deleted file mode 100644
index 48ea931058c8..000000000000
--- a/src/tests/Loader/classloader/generics/regressions/vsw524571/StaticsProblem5.csproj
+++ /dev/null
@@ -1,11 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-  <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
-    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-    <CLRTestPriority>1</CLRTestPriority>
-  </PropertyGroup>
-  <ItemGroup>
-    <Compile Include="staticsproblem5.cs" />
-  </ItemGroup>
-</Project>
diff --git a/src/tests/Loader/classloader/generics/regressions/vsw524571/staticsproblem5.cs b/src/tests/Loader/classloader/generics/regressions/vsw524571/staticsproblem5.cs
deleted file mode 100644
index fac29a2c8153..000000000000
--- a/src/tests/Loader/classloader/generics/regressions/vsw524571/staticsproblem5.cs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-
-// the subtype here that contains a Canonical type is Node<NodeStruct<NodeSys<a[]>>>
-
-using System;
-
-using Xunit;
-
-public class Node<a> 
-{
-    public static Node<a> leaf;
-
-        static Node() 
-    {
-            leaf = new Node<a>();
-            Console.WriteLine("Node<A>'s .cctor ran, where A was {0}.", typeof(a));
-        Console.WriteLine("Leaf: {0}", leaf);
-        }
-}
-
-public struct NodeStruct<a> { }
-
-public class NodeSys<a> { }
-
-public class SystemMap<a>
-{
-    public Node <NodeStruct<NodeSys<a[]>>> root;
-
-    public SystemMap(a x)
-        {
-            Console.WriteLine("Accessing a static from Node<NodeStruct<NodeSys<a[]>>>...");
-            this.root = Node<NodeStruct<NodeSys<a[]>>>.leaf;
-        }
-    public bool Eval()
-    {
-        Console.WriteLine("Read a static from Node<NodeStruct<NodeSys<a[]>>>.  Got: {0}",
-                    (root == null) ? "<null>" : root.ToString());
-                
-        if (root == null)
-            return false;
-        else
-            return true;
-
-    }
-}
-
-
-public class Test
-{
-    [Fact]
-    public static void TestEntryPoint() 
-    { 
-        Console.WriteLine("-------------------------------------------------------------------");
-        SystemMap<Int32>  y1 = new SystemMap<Int32> (5);
-        Console.WriteLine("-------------------------------------------------------------------");
-        SystemMap<String> y2 = new SystemMap<String> ("S");
-        Console.WriteLine("-------------------------------------------------------------------");
-        SystemMap<Object> y3 = new SystemMap<Object> ("S");
-        Console.WriteLine("-------------------------------------------------------------------");
-
-        Assert.True(y1.Eval());
-        Assert.True(y2.Eval());
-        Assert.True(y3.Eval());
-    }
-}
diff --git a/src/tests/Regressions/coreclr/GitHub_22888/test22888resources.csproj b/src/tests/Regressions/coreclr/GitHub_22888/test22888resources.csproj
index ca6163be24fc..8db6492ea2e3 100644
--- a/src/tests/Regressions/coreclr/GitHub_22888/test22888resources.csproj
+++ b/src/tests/Regressions/coreclr/GitHub_22888/test22888resources.csproj
@@ -9,7 +9,7 @@
     <Compile Include="test22888resources.cs" />
   </ItemGroup>
   <ItemGroup>
-    <EmbeddedResource Include="test22888.resx">
+    <EmbeddedResource Include="test22888.resx" LogicalName="test22888resources.test22888.resources">
       <Generator>ResXFileCodeGenerator</Generator>
     </EmbeddedResource>
   </ItemGroup>
diff --git a/src/tests/Regressions/coreclr/GitHub_87879/test87879.cs b/src/tests/Regressions/coreclr/GitHub_87879/test87879.cs
new file mode 100644
index 000000000000..7eb42eb89912
--- /dev/null
+++ b/src/tests/Regressions/coreclr/GitHub_87879/test87879.cs
@@ -0,0 +1,39 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Threading;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+namespace test87879;
+
+public class test87879
+{
+    [Fact, SkipLocalsInit]
+    public static void TestEntryPoint()
+    {
+        //determine the expected available stack size 1.5MB, minus a little bit (384kB) for overhead.
+        var expectedSize = 0x180000 - 0x60000;
+
+        //allocate on the stack as specified above
+        Span<byte> bytes = stackalloc byte[expectedSize];
+        Consume(bytes);
+        Console.WriteLine("Main thread succeeded.");
+
+        //repeat on a secondary thread
+        Thread t = new Thread([SkipLocalsInit] () =>
+        {
+            Span<byte> bytes = stackalloc byte[expectedSize];
+            Consume(bytes);
+        });
+        t.Start();
+        t.Join();
+        Console.WriteLine("Secondary thread succeeded.");
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static void Consume(Span<byte> bytes)
+    {
+    }
+}
diff --git a/src/tests/Regressions/coreclr/GitHub_87879/test87879.csproj b/src/tests/Regressions/coreclr/GitHub_87879/test87879.csproj
new file mode 100644
index 000000000000..fdd2160133b6
--- /dev/null
+++ b/src/tests/Regressions/coreclr/GitHub_87879/test87879.csproj
@@ -0,0 +1,11 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <CLRTestTargetUnsupported>true</CLRTestTargetUnsupported>
+    <CLRTestTargetUnsupported Condition="'$(TargetsWindows)' == 'true' OR '$(TargetsAppleMobile)' == 'true' OR ('$(TargetsUnix)' == 'true' AND '$(TargetsMobile)' != 'true')">false</CLRTestTargetUnsupported>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="test87879.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests.Generics.cs b/src/tests/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests.Generics.cs
new file mode 100644
index 000000000000..e1029797bf12
--- /dev/null
+++ b/src/tests/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests.Generics.cs
@@ -0,0 +1,460 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+using Xunit;
+
+struct Struct { }
+
+public static unsafe class UnsafeAccessorsTestsGenerics
+{
+    class MyList<T>
+    {
+        public const string StaticGenericFieldName = nameof(_GF);
+        public const string StaticFieldName = nameof(_F);
+        public const string GenericFieldName = nameof(_list);
+
+        static MyList()
+        {
+            _F = typeof(T).ToString();
+        }
+
+        public static void SetStaticGenericField(T val) => _GF = val;
+        private static T _GF;
+        private static string _F;
+
+        private List<T> _list;
+
+        public MyList() => _list = new();
+
+        private MyList(int i) => _list = new(i);
+
+        private MyList(List<T> list) => _list = list;
+
+        private void Clear() => _list.Clear();
+
+        private void Add(T t) => _list.Add(t);
+
+        private void AddWithIgnore<U>(T t, U _) => _list.Add(t);
+
+        private bool CanCastToElementType<U>(U t) => t is T;
+
+        private static bool CanUseElementType<U>(U t) => t is T;
+
+        private static Type ElementType() => typeof(T);
+
+        private void Add(int a) =>
+            Unsafe.As<List<int>>(_list).Add(a);
+
+        private void Add(string a) =>
+            Unsafe.As<List<string>>(_list).Add(a);
+
+        private void Add(Struct a) =>
+            Unsafe.As<List<Struct>>(_list).Add(a);
+
+        public int Count => _list.Count;
+
+        public int Capacity => _list.Capacity;
+    }
+
+    [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/89439", TestRuntimes.Mono)]
+    public static void Verify_Generic_AccessStaticFieldClass()
+    {
+        Console.WriteLine($"Running {nameof(Verify_Generic_AccessStaticFieldClass)}");
+
+        Assert.Equal(typeof(int).ToString(), GetPrivateStaticFieldInt((MyList<int>)null));
+
+        Assert.Equal(typeof(string).ToString(), GetPrivateStaticFieldString((MyList<string>)null));
+
+        Assert.Equal(typeof(Struct).ToString(), GetPrivateStaticFieldStruct((MyList<Struct>)null));
+
+        {
+            int expected = 10;
+            MyList<int>.SetStaticGenericField(expected);
+            Assert.Equal(expected, GetPrivateStaticField((MyList<int>)null));
+        }
+        {
+            string expected = "abc";
+            MyList<string>.SetStaticGenericField(expected);
+            Assert.Equal(expected, GetPrivateStaticField((MyList<string>)null));
+        }
+
+        [UnsafeAccessor(UnsafeAccessorKind.StaticField, Name=MyList<int>.StaticFieldName)]
+        extern static ref string GetPrivateStaticFieldInt(MyList<int> d);
+
+        [UnsafeAccessor(UnsafeAccessorKind.StaticField, Name=MyList<string>.StaticFieldName)]
+        extern static ref string GetPrivateStaticFieldString(MyList<string> d);
+
+        [UnsafeAccessor(UnsafeAccessorKind.StaticField, Name=MyList<Struct>.StaticFieldName)]
+        extern static ref string GetPrivateStaticFieldStruct(MyList<Struct> d);
+
+        [UnsafeAccessor(UnsafeAccessorKind.StaticField, Name=MyList<int>.StaticGenericFieldName)]
+        extern static ref V GetPrivateStaticField<V>(MyList<V> d);
+    }
+
+    [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/89439", TestRuntimes.Mono)]
+    public static void Verify_Generic_AccessFieldClass()
+    {
+        Console.WriteLine($"Running {nameof(Verify_Generic_AccessFieldClass)}");
+        {
+            MyList<int> a = new();
+            Assert.NotNull(GetPrivateField(a));
+        }
+        {
+            MyList<string> a = new();
+            Assert.NotNull(GetPrivateField(a));
+        }
+        {
+            MyList<Struct> a = new();
+            Assert.NotNull(GetPrivateField(a));
+        }
+
+        [UnsafeAccessor(UnsafeAccessorKind.Field, Name=MyList<object>.GenericFieldName)]
+        extern static ref List<V> GetPrivateField<V>(MyList<V> a);
+    }
+
+    class Base
+    {
+        protected virtual string CreateMessageGeneric<T>(T t) => $"{nameof(Base)}:{t}";
+    }
+
+    class GenericBase<T> : Base
+    {
+        protected virtual string CreateMessage(T t) => $"{nameof(GenericBase<T>)}:{t}";
+        protected override string CreateMessageGeneric<U>(U u) => $"{nameof(GenericBase<T>)}:{u}";
+    }
+
+    sealed class Derived1 : GenericBase<string>
+    {
+        protected override string CreateMessage(string u) => $"{nameof(Derived1)}:{u}";
+        protected override string CreateMessageGeneric<U>(U t) => $"{nameof(Derived1)}:{t}";
+    }
+
+    sealed class Derived2 : GenericBase<string>
+    {
+    }
+
+    [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/89439", TestRuntimes.Mono)]
+    public static void Verify_Generic_InheritanceMethodResolution()
+    {
+        string expect = "abc";
+        Console.WriteLine($"Running {nameof(Verify_Generic_InheritanceMethodResolution)}");
+        {
+            Base a = new();
+            Assert.Equal($"{nameof(Base)}:1", CreateMessage<int>(a, 1));
+            Assert.Equal($"{nameof(Base)}:{expect}", CreateMessage<string>(a, expect));
+            Assert.Equal($"{nameof(Base)}:{nameof(Struct)}", CreateMessage<Struct>(a, new Struct()));
+        }
+        {
+            GenericBase<int> a = new();
+            Assert.Equal($"{nameof(GenericBase<int>)}:1", CreateMessage<int>(a, 1));
+            Assert.Equal($"{nameof(GenericBase<int>)}:{expect}", CreateMessage<string>(a, expect));
+            Assert.Equal($"{nameof(GenericBase<int>)}:{nameof(Struct)}", CreateMessage<Struct>(a, new Struct()));
+        }
+        {
+            GenericBase<string> a = new();
+            Assert.Equal($"{nameof(GenericBase<string>)}:1", CreateMessage<int>(a, 1));
+            Assert.Equal($"{nameof(GenericBase<string>)}:{expect}", CreateMessage<string>(a, expect));
+            Assert.Equal($"{nameof(GenericBase<string>)}:{nameof(Struct)}", CreateMessage<Struct>(a, new Struct()));
+        }
+        {
+            GenericBase<Struct> a = new();
+            Assert.Equal($"{nameof(GenericBase<Struct>)}:1", CreateMessage<int>(a, 1));
+            Assert.Equal($"{nameof(GenericBase<Struct>)}:{expect}", CreateMessage<string>(a, expect));
+            Assert.Equal($"{nameof(GenericBase<Struct>)}:{nameof(Struct)}", CreateMessage<Struct>(a, new Struct()));
+        }
+        {
+            Derived1 a = new();
+            Assert.Equal($"{nameof(Derived1)}:1", CreateMessage<int>(a, 1));
+            Assert.Equal($"{nameof(Derived1)}:{expect}", CreateMessage<string>(a, expect));
+            Assert.Equal($"{nameof(Derived1)}:{nameof(Struct)}", CreateMessage<Struct>(a, new Struct()));
+        }
+        {
+            // Verify resolution of generic override logic.
+            Derived1 a1 = new();
+            Derived2 a2 = new();
+            Assert.Equal($"{nameof(Derived1)}:{expect}", Accessors<string>.CreateMessage(a1, expect));
+            Assert.Equal($"{nameof(GenericBase<string>)}:{expect}", Accessors<string>.CreateMessage(a2, expect));
+        }
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "CreateMessageGeneric")]
+        extern static string CreateMessage<W>(Base b, W w);
+    }
+
+    sealed class Accessors<V>
+    {
+        [UnsafeAccessor(UnsafeAccessorKind.Constructor)]
+        public extern static MyList<V> Create(int a);
+
+        [UnsafeAccessor(UnsafeAccessorKind.Constructor)]
+        public extern static MyList<V> CreateWithList(List<V> a);
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = ".ctor")]
+        public extern static void CallCtorAsMethod(MyList<V> l, List<V> a);
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "Add")]
+        public extern static void AddInt(MyList<V> l, int a);
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "Add")]
+        public extern static void AddString(MyList<V> l, string a);
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "Add")]
+        public extern static void AddStruct(MyList<V> l, Struct a);
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "Clear")]
+        public extern static void Clear(MyList<V> l);
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "Add")]
+        public extern static void Add(MyList<V> l, V element);
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "AddWithIgnore")]
+        public extern static void AddWithIgnore<W>(MyList<V> l, V element, W ignore);
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "CanCastToElementType")]
+        public extern static bool CanCastToElementType<W>(MyList<V> l, W element);
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "CreateMessage")]
+        public extern static string CreateMessage(GenericBase<V> b, V v);
+
+        [UnsafeAccessor(UnsafeAccessorKind.StaticMethod, Name = "ElementType")]
+        public extern static Type ElementType(MyList<V> l);
+
+        [UnsafeAccessor(UnsafeAccessorKind.StaticMethod, Name = "CanUseElementType")]
+        public extern static bool CanUseElementType<W>(MyList<V> l, W element);
+    }
+
+    [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/89439", TestRuntimes.Mono)]
+    public static void Verify_Generic_CallCtor()
+    {
+        Console.WriteLine($"Running {nameof(Verify_Generic_CallCtor)}");
+
+        // Call constructor with non-generic parameter
+        {
+            MyList<int> a = Accessors<int>.Create(1);
+            Assert.Equal(1, a.Capacity);
+        }
+        {
+            MyList<string> a = Accessors<string>.Create(2);
+            Assert.Equal(2, a.Capacity);
+        }
+        {
+            MyList<Struct> a = Accessors<Struct>.Create(3);
+            Assert.Equal(3, a.Capacity);
+        }
+
+        // Call constructor using generic parameter
+        {
+            MyList<int> a = Accessors<int>.CreateWithList([ 1 ]);
+            Assert.Equal(1, a.Count);
+        }
+        {
+            MyList<string> a = Accessors<string>.CreateWithList([ "1", "2" ]);
+            Assert.Equal(2, a.Count);
+        }
+        {
+            MyList<Struct> a = Accessors<Struct>.CreateWithList([new Struct(), new Struct(), new Struct()]);
+            Assert.Equal(3, a.Count);
+        }
+
+        // Call constructors as methods
+        {
+            MyList<int> a = (MyList<int>)RuntimeHelpers.GetUninitializedObject(typeof(MyList<int>));
+            Accessors<int>.CallCtorAsMethod(a, [1]);
+            Assert.Equal(1, a.Count);
+        }
+        {
+            MyList<string> a = (MyList<string>)RuntimeHelpers.GetUninitializedObject(typeof(MyList<string>));
+            Accessors<string>.CallCtorAsMethod(a, ["1", "2"]);
+            Assert.Equal(2, a.Count);
+        }
+        {
+            MyList<Struct> a = (MyList<Struct>)RuntimeHelpers.GetUninitializedObject(typeof(MyList<Struct>));
+            Accessors<Struct>.CallCtorAsMethod(a, [new Struct(), new Struct(), new Struct()]);
+            Assert.Equal(3, a.Count);
+        }
+    }
+
+    [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/89439", TestRuntimes.Mono)]
+    public static void Verify_Generic_GenericTypeNonGenericInstanceMethod()
+    {
+        Console.WriteLine($"Running {nameof(Verify_Generic_GenericTypeNonGenericInstanceMethod)}");
+        {
+            MyList<int> a = new();
+            Accessors<int>.AddInt(a, 1);
+            Assert.Equal(1, a.Count);
+            Accessors<int>.Clear(a);
+            Assert.Equal(0, a.Count);
+        }
+        {
+            MyList<string> a = new();
+            Accessors<string>.AddString(a, "1");
+            Accessors<string>.AddString(a, "2");
+            Assert.Equal(2, a.Count);
+            Accessors<string>.Clear(a);
+            Assert.Equal(0, a.Count);
+        }
+        {
+            MyList<Struct> a = new();
+            Accessors<Struct>.AddStruct(a, new Struct());
+            Accessors<Struct>.AddStruct(a, new Struct());
+            Accessors<Struct>.AddStruct(a, new Struct());
+            Assert.Equal(3, a.Count);
+            Accessors<Struct>.Clear(a);
+            Assert.Equal(0, a.Count);
+        }
+    }
+
+    [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/89439", TestRuntimes.Mono)]
+    public static void Verify_Generic_GenericTypeGenericInstanceMethod()
+    {
+        Console.WriteLine($"Running {nameof(Verify_Generic_GenericTypeGenericInstanceMethod)}");
+        {
+            MyList<int> a = new();
+            Assert.True(Accessors<int>.CanCastToElementType<int>(a, 1));
+            Assert.False(Accessors<int>.CanCastToElementType<string>(a, string.Empty));
+            Assert.False(Accessors<int>.CanCastToElementType<Struct>(a, new Struct()));
+            Assert.Equal(0, a.Count);
+            Accessors<int>.Add(a, 1);
+            Accessors<int>.AddWithIgnore<int>(a, 1, 1);
+            Accessors<int>.AddWithIgnore<string>(a, 1, string.Empty);
+            Accessors<int>.AddWithIgnore<Struct>(a, 1, new Struct());
+            Assert.Equal(4, a.Count);
+        }
+        {
+            MyList<string> a = new();
+            Assert.False(Accessors<string>.CanCastToElementType<int>(a, 1));
+            Assert.True(Accessors<string>.CanCastToElementType<string>(a, string.Empty));
+            Assert.False(Accessors<string>.CanCastToElementType<Struct>(a, new Struct()));
+            Assert.Equal(0, a.Count);
+            Accessors<string>.Add(a, string.Empty);
+            Accessors<string>.AddWithIgnore<int>(a, string.Empty, 1);
+            Accessors<string>.AddWithIgnore<string>(a, string.Empty, string.Empty);
+            Accessors<string>.AddWithIgnore<Struct>(a, string.Empty, new Struct());
+            Assert.Equal(4, a.Count);
+        }
+        {
+            MyList<Struct> a = new();
+            Assert.False(Accessors<Struct>.CanCastToElementType<int>(a, 1));
+            Assert.False(Accessors<Struct>.CanCastToElementType<string>(a, string.Empty));
+            Assert.True(Accessors<Struct>.CanCastToElementType<Struct>(a, new Struct()));
+            Assert.Equal(0, a.Count);
+            Accessors<Struct>.Add(a, new Struct());
+            Accessors<Struct>.AddWithIgnore<int>(a, new Struct(), 1);
+            Accessors<Struct>.AddWithIgnore<string>(a, new Struct(), string.Empty);
+            Accessors<Struct>.AddWithIgnore<Struct>(a, new Struct(), new Struct());
+            Assert.Equal(4, a.Count);
+        }
+    }
+
+    [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/89439", TestRuntimes.Mono)]
+    public static void Verify_Generic_GenericTypeNonGenericStaticMethod()
+    {
+        Console.WriteLine($"Running {nameof(Verify_Generic_GenericTypeNonGenericStaticMethod)}");
+        {
+            Assert.Equal(typeof(int), Accessors<int>.ElementType(null));
+            Assert.Equal(typeof(string), Accessors<string>.ElementType(null));
+            Assert.Equal(typeof(Struct), Accessors<Struct>.ElementType(null));
+        }
+    }
+
+    [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/89439", TestRuntimes.Mono)]
+    public static void Verify_Generic_GenericTypeGenericStaticMethod()
+    {
+        Console.WriteLine($"Running {nameof(Verify_Generic_GenericTypeGenericStaticMethod)}");
+        {
+            Assert.True(Accessors<int>.CanUseElementType<int>(null, 1));
+            Assert.False(Accessors<int>.CanUseElementType<string>(null, string.Empty));
+            Assert.False(Accessors<int>.CanUseElementType<Struct>(null, new Struct()));
+        }
+        {
+            Assert.False(Accessors<string>.CanUseElementType<int>(null, 1));
+            Assert.True(Accessors<string>.CanUseElementType<string>(null, string.Empty));
+            Assert.False(Accessors<string>.CanUseElementType<Struct>(null, new Struct()));
+        }
+        {
+            Assert.False(Accessors<Struct>.CanUseElementType<int>(null, 1));
+            Assert.False(Accessors<Struct>.CanUseElementType<string>(null, string.Empty));
+            Assert.True(Accessors<Struct>.CanUseElementType<Struct>(null, new Struct()));
+        }
+    }
+
+    class ClassWithConstraints
+    {
+        private string M<T, U>() where T : U, IEquatable<T>
+            => $"{typeof(T)}|{typeof(U)}";
+
+        private static string SM<T, U>() where T : U, IEquatable<T>
+            => $"{typeof(T)}|{typeof(U)}";
+    }
+
+    [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/89439", TestRuntimes.Mono)]
+    public static void Verify_Generic_ConstraintEnforcement()
+    {
+        Console.WriteLine($"Running {nameof(Verify_Generic_ConstraintEnforcement)}");
+
+        Assert.Equal($"{typeof(string)}|{typeof(object)}", CallMethod<string, object>(new ClassWithConstraints()));
+        Assert.Equal($"{typeof(string)}|{typeof(object)}", CallStaticMethod<string, object>(null));
+        Assert.Throws<InvalidProgramException>(() => CallMethod_NoConstraints<string, object>(new ClassWithConstraints()));
+        Assert.Throws<InvalidProgramException>(() => CallMethod_MissingConstraint<string, object>(new ClassWithConstraints()));
+        Assert.Throws<InvalidProgramException>(() => CallStaticMethod_NoConstraints<string, object>(null));
+        Assert.Throws<InvalidProgramException>(() => CallStaticMethod_MissingConstraint<string, object>(null));
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "M")]
+        extern static string CallMethod<V,W>(ClassWithConstraints c) where V : W, IEquatable<V>;
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "M")]
+        extern static string CallMethod_NoConstraints<V,W>(ClassWithConstraints c);
+
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name = "M")]
+        extern static string CallMethod_MissingConstraint<V,W>(ClassWithConstraints c) where V : W;
+
+        [UnsafeAccessor(UnsafeAccessorKind.StaticMethod, Name = "SM")]
+        extern static string CallStaticMethod<V,W>(ClassWithConstraints c) where V : W, IEquatable<V>;
+
+        [UnsafeAccessor(UnsafeAccessorKind.StaticMethod, Name = "SM")]
+        extern static string CallStaticMethod_NoConstraints<V,W>(ClassWithConstraints c);
+
+        [UnsafeAccessor(UnsafeAccessorKind.StaticMethod, Name = "SM")]
+        extern static string CallStaticMethod_MissingConstraint<V,W>(ClassWithConstraints c) where V : W;
+    }
+
+    class Invalid
+    {
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name=nameof(ToString))]
+        public static extern string CallToString<U>(U a);
+    }
+
+    class Invalid<T>
+    {
+        [UnsafeAccessor(UnsafeAccessorKind.Method, Name=nameof(ToString))]
+        public static extern string CallToString(T a);
+    }
+
+    [Fact]
+    [ActiveIssue("https://github.com/dotnet/runtime/issues/89439", TestRuntimes.Mono)]
+    public static void Verify_Generic_InvalidUseUnsafeAccessor()
+    {
+        Console.WriteLine($"Running {nameof(Verify_Generic_InvalidUseUnsafeAccessor)}");
+
+        Assert.Throws<BadImageFormatException>(() => Invalid.CallToString<int>(0));
+        Assert.Throws<BadImageFormatException>(() => Invalid<int>.CallToString(0));
+        Assert.Throws<BadImageFormatException>(() => Invalid.CallToString<string>(string.Empty));
+        Assert.Throws<BadImageFormatException>(() => Invalid<string>.CallToString(string.Empty));
+        Assert.Throws<BadImageFormatException>(() => Invalid.CallToString<Struct>(new Struct()));
+        Assert.Throws<BadImageFormatException>(() => Invalid<Struct>.CallToString(new Struct()));
+    }
+}
diff --git a/src/tests/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests.cs b/src/tests/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests.cs
index 6e0a562f32a9..30f65993da6c 100644
--- a/src/tests/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests.cs
+++ b/src/tests/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests.cs
@@ -85,33 +85,6 @@ struct UserDataValue
         public string GetFieldValue() => _f;
     }
 
-    class UserDataGenericClass<T>
-    {
-        public const string StaticGenericFieldName = nameof(_GF);
-        public const string GenericFieldName = nameof(_gf);
-        public const string StaticGenericMethodName = nameof(_GM);
-        public const string GenericMethodName = nameof(_gm);
-
-        public const string StaticFieldName = nameof(_F);
-        public const string FieldName = nameof(_f);
-        public const string StaticMethodName = nameof(_M);
-        public const string MethodName = nameof(_m);
-
-        private static T _GF;
-        private T _gf;
-
-        private static string _F = PrivateStatic;
-        private string _f;
-
-        public UserDataGenericClass() { _f = Private; }
-
-        private static string _GM(T s, ref T sr, in T si) => typeof(T).ToString();
-        private string _gm(T s, ref T sr, in T si) => typeof(T).ToString();
-
-        private static string _M(string s, ref string sr, in string si) => s;
-        private string _m(string s, ref string sr, in string si) => s;
-    }
-
     [UnsafeAccessor(UnsafeAccessorKind.Constructor)]
     extern static UserDataClass CallPrivateConstructorClass();
 
@@ -215,23 +188,6 @@ public static void Verify_AccessFieldClass()
         extern static ref string GetPrivateField(UserDataClass d);
     }
 
-    [Fact]
-    [ActiveIssue("https://github.com/dotnet/runtime/issues/92633")]
-    public static void Verify_AccessStaticFieldGenericClass()
-    {
-        Console.WriteLine($"Running {nameof(Verify_AccessStaticFieldGenericClass)}");
-
-        Assert.Equal(PrivateStatic, GetPrivateStaticFieldInt((UserDataGenericClass<int>)null));
-
-        Assert.Equal(PrivateStatic, GetPrivateStaticFieldString((UserDataGenericClass<string>)null));
-
-        [UnsafeAccessor(UnsafeAccessorKind.StaticField, Name=UserDataGenericClass<int>.StaticFieldName)]
-        extern static ref string GetPrivateStaticFieldInt(UserDataGenericClass<int> d);
-
-        [UnsafeAccessor(UnsafeAccessorKind.StaticField, Name=UserDataGenericClass<string>.StaticFieldName)]
-        extern static ref string GetPrivateStaticFieldString(UserDataGenericClass<string> d);
-    }
-
     [Fact]
     public static void Verify_AccessStaticFieldValue()
     {
@@ -259,23 +215,6 @@ public static void Verify_AccessFieldValue()
         extern static ref string GetPrivateField(ref UserDataValue d);
     }
 
-    [Fact]
-    [ActiveIssue("https://github.com/dotnet/runtime/issues/92633")]
-    public static void Verify_AccessFieldGenericClass()
-    {
-        Console.WriteLine($"Running {nameof(Verify_AccessFieldGenericClass)}");
-
-        Assert.Equal(Private, GetPrivateFieldInt(new UserDataGenericClass<int>()));
-
-        Assert.Equal(Private, GetPrivateFieldString(new UserDataGenericClass<string>()));
-
-        [UnsafeAccessor(UnsafeAccessorKind.Field, Name=UserDataGenericClass<int>.FieldName)]
-        extern static ref string GetPrivateFieldInt(UserDataGenericClass<int> d);
-
-        [UnsafeAccessor(UnsafeAccessorKind.Field, Name=UserDataGenericClass<string>.FieldName)]
-        extern static ref string GetPrivateFieldString(UserDataGenericClass<string> d);
-    }
-
     [Fact]
     public static void Verify_AccessStaticMethodClass()
     {
@@ -587,15 +526,6 @@ class Invalid
     {
         [UnsafeAccessor(UnsafeAccessorKind.Method, Name=nameof(ToString))]
         public extern string NonStatic(string a);
-
-        [UnsafeAccessor(UnsafeAccessorKind.Method, Name=nameof(ToString))]
-        public static extern string CallToString<U>(U a);
-    }
-
-    class Invalid<T>
-    {
-        [UnsafeAccessor(UnsafeAccessorKind.Method, Name=nameof(ToString))]
-        public static extern string CallToString(T a);
     }
 
     [Fact]
@@ -620,8 +550,6 @@ public static void Verify_InvalidUseUnsafeAccessor()
         Assert.Throws<BadImageFormatException>(() => LookUpFailsOnPointers(null));
         Assert.Throws<BadImageFormatException>(() => LookUpFailsOnFunctionPointers(null));
         Assert.Throws<BadImageFormatException>(() => new Invalid().NonStatic(string.Empty));
-        Assert.Throws<BadImageFormatException>(() => Invalid.CallToString<string>(string.Empty));
-        Assert.Throws<BadImageFormatException>(() => Invalid<string>.CallToString(string.Empty));
         Assert.Throws<BadImageFormatException>(() =>
         {
             string str = string.Empty;
diff --git a/src/tests/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests.csproj b/src/tests/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests.csproj
index 876d006ea96e..f551f9b48c24 100644
--- a/src/tests/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests.csproj
+++ b/src/tests/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests.csproj
@@ -6,6 +6,7 @@
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="UnsafeAccessorsTests.cs" />
+    <Compile Include="UnsafeAccessorsTests.Generics.cs" />
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="$(TestSourceDir)Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj" />
diff --git a/src/tests/baseservices/exceptions/generics/GenericExceptions.csproj b/src/tests/baseservices/exceptions/generics/GenericExceptions.csproj
index d5783ff8da22..e6fe8fe67d2e 100644
--- a/src/tests/baseservices/exceptions/generics/GenericExceptions.csproj
+++ b/src/tests/baseservices/exceptions/generics/GenericExceptions.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/GenericExceptions01.csproj b/src/tests/baseservices/exceptions/generics/GenericExceptions01.csproj
index 03b7210c47be..72d475ba503b 100644
--- a/src/tests/baseservices/exceptions/generics/GenericExceptions01.csproj
+++ b/src/tests/baseservices/exceptions/generics/GenericExceptions01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/GenericExceptions02.csproj b/src/tests/baseservices/exceptions/generics/GenericExceptions02.csproj
index 2c5de57ccbae..ae70ebf7cefd 100644
--- a/src/tests/baseservices/exceptions/generics/GenericExceptions02.csproj
+++ b/src/tests/baseservices/exceptions/generics/GenericExceptions02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/GenericExceptions03.csproj b/src/tests/baseservices/exceptions/generics/GenericExceptions03.csproj
index ce64432fb28d..2fce2eaca121 100644
--- a/src/tests/baseservices/exceptions/generics/GenericExceptions03.csproj
+++ b/src/tests/baseservices/exceptions/generics/GenericExceptions03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/GenericExceptions04.csproj b/src/tests/baseservices/exceptions/generics/GenericExceptions04.csproj
index 2c3a4c45291e..6535bc437a9a 100644
--- a/src/tests/baseservices/exceptions/generics/GenericExceptions04.csproj
+++ b/src/tests/baseservices/exceptions/generics/GenericExceptions04.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/GenericExceptions05.csproj b/src/tests/baseservices/exceptions/generics/GenericExceptions05.csproj
index 3765c54d5624..1b9f61083da7 100644
--- a/src/tests/baseservices/exceptions/generics/GenericExceptions05.csproj
+++ b/src/tests/baseservices/exceptions/generics/GenericExceptions05.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/GenericExceptions06.csproj b/src/tests/baseservices/exceptions/generics/GenericExceptions06.csproj
index af24deb6dafa..c917862094be 100644
--- a/src/tests/baseservices/exceptions/generics/GenericExceptions06.csproj
+++ b/src/tests/baseservices/exceptions/generics/GenericExceptions06.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/GenericExceptions07.csproj b/src/tests/baseservices/exceptions/generics/GenericExceptions07.csproj
index 2bcef54754b7..a0ebe3f2ee27 100644
--- a/src/tests/baseservices/exceptions/generics/GenericExceptions07.csproj
+++ b/src/tests/baseservices/exceptions/generics/GenericExceptions07.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/GenericExceptions08.csproj b/src/tests/baseservices/exceptions/generics/GenericExceptions08.csproj
index 054a1932d554..3bcc6b03e8fa 100644
--- a/src/tests/baseservices/exceptions/generics/GenericExceptions08.csproj
+++ b/src/tests/baseservices/exceptions/generics/GenericExceptions08.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter001.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter001.csproj
index dc18d729fe7a..7f806a3c2f92 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter001.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter001.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter002.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter002.csproj
index 3845a34a4d09..7c1176fa609f 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter002.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter002.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter003.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter003.csproj
index 7b358c3392fc..6e104fbe797c 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter003.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter003.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter004.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter004.csproj
index a6a2d923fb4f..279399b52798 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter004.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter004.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter005.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter005.csproj
index 56994aa6c4f3..958ea32d206a 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter005.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter005.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter006.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter006.csproj
index 5df08c34b946..ea038a6fe8b9 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter006.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter006.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter007.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter007.csproj
index cdd49832c286..6cd2f2b9bf6c 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter007.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter007.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter008.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter008.csproj
index f0026cb33657..c194af35860f 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter008.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter008.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter009.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter009.csproj
index dfa30401d312..2e711e8aba4e 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter009.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter009.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter010.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter010.csproj
index d5c4ed1e2889..23652668c5ec 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter010.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter010.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter011.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter011.csproj
index d9b5d6850919..3f83a6adf2ed 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter011.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter011.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter012.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter012.csproj
index 22c3166a50f1..950b15d6cd1a 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter012.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter012.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter013.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter013.csproj
index ff021d4b3262..2e96e1aff3ac 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter013.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter013.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter014.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter014.csproj
index afbb1d3bd0bc..0d33d5b9be16 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter014.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter014.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter015.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter015.csproj
index d8049c3fbe2f..ae880bb0150d 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter015.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter015.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter016.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter016.csproj
index e99dabb58986..a0ee293a6b0d 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter016.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter016.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter017.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter017.csproj
index 86a450a1a2e7..6bcc053e05bb 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter017.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter017.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/TypeParameter018.csproj b/src/tests/baseservices/exceptions/generics/TypeParameter018.csproj
index 903a55af5dcb..105379ca5d78 100644
--- a/src/tests/baseservices/exceptions/generics/TypeParameter018.csproj
+++ b/src/tests/baseservices/exceptions/generics/TypeParameter018.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/nested-try-catch01.csproj b/src/tests/baseservices/exceptions/generics/nested-try-catch01.csproj
index f6b41fddeac9..dbdfa2d377bd 100644
--- a/src/tests/baseservices/exceptions/generics/nested-try-catch01.csproj
+++ b/src/tests/baseservices/exceptions/generics/nested-try-catch01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/nested-try-catch02.csproj b/src/tests/baseservices/exceptions/generics/nested-try-catch02.csproj
index 1617596282f0..f26bfe9bb3f7 100644
--- a/src/tests/baseservices/exceptions/generics/nested-try-catch02.csproj
+++ b/src/tests/baseservices/exceptions/generics/nested-try-catch02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/nested-try-catch03.csproj b/src/tests/baseservices/exceptions/generics/nested-try-catch03.csproj
index 738238d9bc92..1b2004857dca 100644
--- a/src/tests/baseservices/exceptions/generics/nested-try-catch03.csproj
+++ b/src/tests/baseservices/exceptions/generics/nested-try-catch03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/nested-try-catch04.csproj b/src/tests/baseservices/exceptions/generics/nested-try-catch04.csproj
index f23b1af391a3..b3998185a788 100644
--- a/src/tests/baseservices/exceptions/generics/nested-try-catch04.csproj
+++ b/src/tests/baseservices/exceptions/generics/nested-try-catch04.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/nested-try-catch05.csproj b/src/tests/baseservices/exceptions/generics/nested-try-catch05.csproj
index edbae10df7aa..dbfbbed5f922 100644
--- a/src/tests/baseservices/exceptions/generics/nested-try-catch05.csproj
+++ b/src/tests/baseservices/exceptions/generics/nested-try-catch05.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/nested-try-catch06.csproj b/src/tests/baseservices/exceptions/generics/nested-try-catch06.csproj
index 1a8848673d70..5a68ee5c2e60 100644
--- a/src/tests/baseservices/exceptions/generics/nested-try-catch06.csproj
+++ b/src/tests/baseservices/exceptions/generics/nested-try-catch06.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/nested-try-catch07.csproj b/src/tests/baseservices/exceptions/generics/nested-try-catch07.csproj
index 6f1e90be47d0..ccefff116a29 100644
--- a/src/tests/baseservices/exceptions/generics/nested-try-catch07.csproj
+++ b/src/tests/baseservices/exceptions/generics/nested-try-catch07.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/nested-try-catch08.csproj b/src/tests/baseservices/exceptions/generics/nested-try-catch08.csproj
index 96f1da5418d0..01576e7507a5 100644
--- a/src/tests/baseservices/exceptions/generics/nested-try-catch08.csproj
+++ b/src/tests/baseservices/exceptions/generics/nested-try-catch08.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/nested-try-catch09.csproj b/src/tests/baseservices/exceptions/generics/nested-try-catch09.csproj
index 8256e1565c8b..7f530c570443 100644
--- a/src/tests/baseservices/exceptions/generics/nested-try-catch09.csproj
+++ b/src/tests/baseservices/exceptions/generics/nested-try-catch09.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/nested-try-catch10.csproj b/src/tests/baseservices/exceptions/generics/nested-try-catch10.csproj
index 4a1655d6fb26..c33ef120f6ea 100644
--- a/src/tests/baseservices/exceptions/generics/nested-try-catch10.csproj
+++ b/src/tests/baseservices/exceptions/generics/nested-try-catch10.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-finally-struct01.csproj b/src/tests/baseservices/exceptions/generics/try-catch-finally-struct01.csproj
index a6534d221210..4e188f2dae4a 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-finally-struct01.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-finally-struct01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-finally-struct02.csproj b/src/tests/baseservices/exceptions/generics/try-catch-finally-struct02.csproj
index 3a6af4a7ae8e..b1047e46a873 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-finally-struct02.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-finally-struct02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-finally-struct03.csproj b/src/tests/baseservices/exceptions/generics/try-catch-finally-struct03.csproj
index 9bc8e66b5e82..d095314deddd 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-finally-struct03.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-finally-struct03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-finally01.csproj b/src/tests/baseservices/exceptions/generics/try-catch-finally01.csproj
index b67cf46857c7..81c4e9142606 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-finally01.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-finally01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-finally02.csproj b/src/tests/baseservices/exceptions/generics/try-catch-finally02.csproj
index 032dba68bb40..d4260df4be68 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-finally02.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-finally02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-finally03.csproj b/src/tests/baseservices/exceptions/generics/try-catch-finally03.csproj
index 580e8b9591f2..3d56ae4c7828 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-finally03.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-finally03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-struct01.csproj b/src/tests/baseservices/exceptions/generics/try-catch-struct01.csproj
index 615b14d7ffeb..7ce9f25a19e3 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-struct01.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-struct01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-struct02.csproj b/src/tests/baseservices/exceptions/generics/try-catch-struct02.csproj
index 45a8d6f28a5f..0beeb108d89d 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-struct02.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-struct02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-struct03.csproj b/src/tests/baseservices/exceptions/generics/try-catch-struct03.csproj
index cc79353d01ae..22ac9f30c5ab 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-struct03.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-struct03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-struct04.csproj b/src/tests/baseservices/exceptions/generics/try-catch-struct04.csproj
index 4b4d1629aae9..81624e15a799 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-struct04.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-struct04.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-struct05.csproj b/src/tests/baseservices/exceptions/generics/try-catch-struct05.csproj
index 748846d245e8..2624870ea7b9 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-struct05.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-struct05.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-struct06.csproj b/src/tests/baseservices/exceptions/generics/try-catch-struct06.csproj
index f42d32e8dd6c..c48e30beb996 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-struct06.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-struct06.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-struct07.csproj b/src/tests/baseservices/exceptions/generics/try-catch-struct07.csproj
index f40d6aafc39e..b6e8de9cec11 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-struct07.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-struct07.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-struct08.csproj b/src/tests/baseservices/exceptions/generics/try-catch-struct08.csproj
index 1e067003b88c..044a752bed49 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-struct08.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-struct08.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch-struct09.csproj b/src/tests/baseservices/exceptions/generics/try-catch-struct09.csproj
index 715127fb9182..1dc412c6cee0 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch-struct09.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch-struct09.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch01.csproj b/src/tests/baseservices/exceptions/generics/try-catch01.csproj
index 30420223216e..8fa490a181e4 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch01.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch02.csproj b/src/tests/baseservices/exceptions/generics/try-catch02.csproj
index 39dbdcccdad6..344b69e54f20 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch02.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch03.csproj b/src/tests/baseservices/exceptions/generics/try-catch03.csproj
index 6bc663aad219..8c6658c74f20 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch03.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch04.csproj b/src/tests/baseservices/exceptions/generics/try-catch04.csproj
index 2f35d40d3b57..1d413e06589e 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch04.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch04.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch05.csproj b/src/tests/baseservices/exceptions/generics/try-catch05.csproj
index dcc537b3285c..63ccab17b796 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch05.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch05.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch06.csproj b/src/tests/baseservices/exceptions/generics/try-catch06.csproj
index 1c6c563763c9..c3b70da83b79 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch06.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch06.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch07.csproj b/src/tests/baseservices/exceptions/generics/try-catch07.csproj
index aedfb705e321..3321ef0db6cf 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch07.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch07.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch08.csproj b/src/tests/baseservices/exceptions/generics/try-catch08.csproj
index 51d290d8bac9..0f6e6e3e62b8 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch08.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch08.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-catch09.csproj b/src/tests/baseservices/exceptions/generics/try-catch09.csproj
index f259a5fafa40..685ea0095cf9 100644
--- a/src/tests/baseservices/exceptions/generics/try-catch09.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-catch09.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-finally-struct01.csproj b/src/tests/baseservices/exceptions/generics/try-finally-struct01.csproj
index 02c87355d4a7..43872f3faf4f 100644
--- a/src/tests/baseservices/exceptions/generics/try-finally-struct01.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-finally-struct01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-finally-struct02.csproj b/src/tests/baseservices/exceptions/generics/try-finally-struct02.csproj
index aa37ab9f9899..caeeaaee98c7 100644
--- a/src/tests/baseservices/exceptions/generics/try-finally-struct02.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-finally-struct02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-finally-struct03.csproj b/src/tests/baseservices/exceptions/generics/try-finally-struct03.csproj
index 1e53e744a3f0..79fe8fea0105 100644
--- a/src/tests/baseservices/exceptions/generics/try-finally-struct03.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-finally-struct03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-finally01.csproj b/src/tests/baseservices/exceptions/generics/try-finally01.csproj
index 2ed02426831e..9c1ed14ff94a 100644
--- a/src/tests/baseservices/exceptions/generics/try-finally01.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-finally01.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-finally02.csproj b/src/tests/baseservices/exceptions/generics/try-finally02.csproj
index 513da1e67733..6cae99717d19 100644
--- a/src/tests/baseservices/exceptions/generics/try-finally02.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-finally02.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/generics/try-finally03.csproj b/src/tests/baseservices/exceptions/generics/try-finally03.csproj
index db55dfb6c83a..fd340ca213cb 100644
--- a/src/tests/baseservices/exceptions/generics/try-finally03.csproj
+++ b/src/tests/baseservices/exceptions/generics/try-finally03.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/simple/ParallelCrashTester.csproj b/src/tests/baseservices/exceptions/simple/ParallelCrashTester.csproj
index 30bcef8c9c8b..f60f1f053aaa 100644
--- a/src/tests/baseservices/exceptions/simple/ParallelCrashTester.csproj
+++ b/src/tests/baseservices/exceptions/simple/ParallelCrashTester.csproj
@@ -1,6 +1,8 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
     <CLRTestPriority>1</CLRTestPriority>
+    <!-- Needed for GCStressIncompatible -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <!-- Temporarily disabled due to https://github.com/dotnet/runtime/issues/80356 -->
     <GCStressIncompatible>true</GCStressIncompatible>
   </PropertyGroup>
diff --git a/src/tests/baseservices/exceptions/unhandled/dependencytodelete.cs b/src/tests/baseservices/exceptions/unhandled/dependencytodelete.cs
new file mode 100644
index 000000000000..3897a8779df7
--- /dev/null
+++ b/src/tests/baseservices/exceptions/unhandled/dependencytodelete.cs
@@ -0,0 +1,12 @@
+using System;
+
+namespace Dependency
+{
+    public class DependencyClass
+    {
+        public static void Hello()
+        {
+            Console.WriteLine("Hello");
+        }
+    }
+}
diff --git a/src/tests/sizeondisk/sodbench/SoDBench.csproj b/src/tests/baseservices/exceptions/unhandled/dependencytodelete.csproj
similarity index 52%
rename from src/tests/sizeondisk/sodbench/SoDBench.csproj
rename to src/tests/baseservices/exceptions/unhandled/dependencytodelete.csproj
index 6b63b65f0eb4..fa1f2d01f80e 100644
--- a/src/tests/sizeondisk/sodbench/SoDBench.csproj
+++ b/src/tests/baseservices/exceptions/unhandled/dependencytodelete.csproj
@@ -1,11 +1,9 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <OutputType>exe</OutputType>
     <CLRTestKind>BuildOnly</CLRTestKind>
-    <DebugType>pdbonly</DebugType>
-    <Optimize>true</Optimize>
+    <OutputType>Library</OutputType>
   </PropertyGroup>
   <ItemGroup>
-    <Compile Include="SoDBench.cs" />
+    <Compile Include="dependencytodelete.cs" />
   </ItemGroup>
 </Project>
diff --git a/src/tests/baseservices/exceptions/unhandled/unhandledTester.cs b/src/tests/baseservices/exceptions/unhandled/unhandledTester.cs
index 151c8b635166..cca4e573c0a1 100644
--- a/src/tests/baseservices/exceptions/unhandled/unhandledTester.cs
+++ b/src/tests/baseservices/exceptions/unhandled/unhandledTester.cs
@@ -14,14 +14,14 @@ namespace TestUnhandledExceptionTester
 {
     public class Program
     {
-        static void RunExternalProcess(string unhandledType)
+        static void RunExternalProcess(string unhandledType, string assembly)
         {
             List<string> lines = new List<string>();
 
             Process testProcess = new Process();
 
             testProcess.StartInfo.FileName = Path.Combine(Environment.GetEnvironmentVariable("CORE_ROOT"), "corerun");
-            testProcess.StartInfo.Arguments = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "unhandled.dll") + " " + unhandledType;
+            testProcess.StartInfo.Arguments = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), assembly) + " " + unhandledType;
             testProcess.StartInfo.RedirectStandardError = true;
             // Disable creating dump since the target process is expected to fail with an unhandled exception
             testProcess.StartInfo.Environment.Remove("DOTNET_DbgEnableMiniDump");
@@ -37,6 +37,7 @@ static void RunExternalProcess(string unhandledType)
             testProcess.Start();
             testProcess.BeginErrorReadLine();
             testProcess.WaitForExit();
+            Console.WriteLine($"Test process {assembly} with argument {unhandledType} exited");
             testProcess.CancelErrorRead();
 
             int expectedExitCode;
@@ -111,13 +112,17 @@ static void RunExternalProcess(string unhandledType)
                     throw new Exception("Missing exception source frame");
                 }
             }
+
+            Console.WriteLine("Test process exited with expected error code and produced expected output");
         }
 
         [Fact]
         public static void TestEntryPoint()
         {
-            RunExternalProcess("main");
-            RunExternalProcess("foreign");
+            RunExternalProcess("main", "unhandled.dll");
+            RunExternalProcess("foreign", "unhandled.dll");
+            File.Delete(Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "dependencytodelete.dll"));
+            RunExternalProcess("missingdependency", "unhandledmissingdependency.dll");
         }
     }
 }
diff --git a/src/tests/baseservices/exceptions/unhandled/unhandledTester.csproj b/src/tests/baseservices/exceptions/unhandled/unhandledTester.csproj
index 99e154e74cd6..fff804f1924b 100644
--- a/src/tests/baseservices/exceptions/unhandled/unhandledTester.csproj
+++ b/src/tests/baseservices/exceptions/unhandled/unhandledTester.csproj
@@ -17,5 +17,10 @@
       <OutputItemType>Content</OutputItemType>
       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
     </ProjectReference>
+    <ProjectReference Include="unhandledmissingdependency.csproj">
+      <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
+      <OutputItemType>Content</OutputItemType>
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </ProjectReference>
   </ItemGroup>
 </Project>
diff --git a/src/tests/baseservices/exceptions/unhandled/unhandledmissingdependency.cs b/src/tests/baseservices/exceptions/unhandled/unhandledmissingdependency.cs
new file mode 100644
index 000000000000..4863cafae875
--- /dev/null
+++ b/src/tests/baseservices/exceptions/unhandled/unhandledmissingdependency.cs
@@ -0,0 +1,12 @@
+using Dependency;
+
+namespace DependencyTest
+{
+    internal class Program
+    {
+        static void Main(string[] args)
+        {
+            DependencyClass.Hello();
+        }
+    }
+}
diff --git a/src/tests/baseservices/exceptions/unhandled/unhandledmissingdependency.csproj b/src/tests/baseservices/exceptions/unhandled/unhandledmissingdependency.csproj
new file mode 100644
index 000000000000..d388263ca3bc
--- /dev/null
+++ b/src/tests/baseservices/exceptions/unhandled/unhandledmissingdependency.csproj
@@ -0,0 +1,19 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needs explicit Main to return the proper "unhandled exception" exit code -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+    <ReferenceXUnitWrapperGenerator>false</ReferenceXUnitWrapperGenerator>
+    <CLRTestKind>BuildOnly</CLRTestKind>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <Optimize>true</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="unhandledmissingdependency.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="dependencytodelete.csproj" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="$(TestSourceDir)Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/build.proj b/src/tests/build.proj
index 3592d7c07613..7f2a0b6b38af 100644
--- a/src/tests/build.proj
+++ b/src/tests/build.proj
@@ -115,15 +115,15 @@
       <TestAssemblies Include="%(TestAssemblyPaths.Identity)" Condition="Exists(%(TestAssemblyPaths.Identity))" />
       <TestDirsWithDuplicates Include="$([System.IO.Path]::GetDirectoryName('%(TestAssemblies.Identity)'))" />
     </ItemGroup>
-      
-    <Error 
+
+    <Error
       Text="No tests found for Mono AOT compilation. Make sure that the desired test cases are not excluded from AOT compilation."
       Condition="!Exists(%(TestAssemblies.Identity))" />
 
     <RemoveDuplicates Inputs="@(TestDirsWithDuplicates)">
       <Output TaskParameter="Filtered" ItemName="TestDirs" />
     </RemoveDuplicates>
-    <ItemGroup>  
+    <ItemGroup>
       <TestsAndAssociatedAssemblies Include="%(TestDirs.Identity)/**/*.dll" Exclude="@(NoMonoAotAssemblyPaths)" />
       <CoreRootDlls Include="$(CORE_ROOT)/*.dll" Exclude="$(CORE_ROOT)/xunit.performance.api.dll;$(CORE_ROOT)/Microsoft.CodeAnalysis.VisualBasic.dll" />
       <AllDlls Condition="'$(MonoFullAot)' == 'true'" Include="@(TestsAndAssociatedAssemblies);@(CoreRootDlls)" />
@@ -144,15 +144,22 @@
       <MonoAotOption Condition="'$(MonoForceInterpreter)' == 'true'" Include="interp" />
       <MonoAotOption Condition="'$(MonoFullAot)' == 'true'" Include="full" />
       <MonoAotOption Condition="'$(MonoFullAot)' == 'true'" Include="nimt-trampolines=2000" />
-      <MonoAotOption Condition="'$(MonoFullAot)' == 'true'" Include="ntrampolines=20000" />
+      <MonoAotOption Condition="'$(MonoFullAot)' == 'true'" Include="ntrampolines=80000" />
       <MonoAotOption Condition="'$(MonoFullAot)' == 'true'" Include="nrgctx-fetch-trampolines=256" />
       <MonoAotOption Condition="'$(MonoFullAot)' == 'true'" Include="ngsharedvt-trampolines=8400" />
       <MonoAotOption Condition="'$(MonoFullAot)' == 'true'" Include="nftnptr-arg-trampolines=4000" />
       <MonoAotOption Condition="'$(MonoFullAot)' == 'true'" Include="nrgctx-trampolines=21000" />
-      <MonoAotOption Include="llvm" />
-      <MonoAotOption Include="llvm-path=$(MonoLlvmPath)" />
+      <MonoAotOption Condition="'$(RuntimeVariant)' == 'llvmaot' or '$(RuntimeVariant)' == 'llvmfullaot'" Include="llvm" />
+      <MonoAotOption Condition="'$(RuntimeVariant)' == 'llvmaot' or '$(RuntimeVariant)' == 'llvmfullaot'" Include="llvm-path=$(MonoLlvmPath)" />
       <MonoAotOption Condition="'$(__MonoToolPrefix)' != ''" Include="tool-prefix=$(__MonoToolPrefix)" />
     </ItemGroup>
+    <ItemGroup Condition="'$(MonoFullAot)' == 'true' and '$(TargetOS)' == 'linux'">
+      <MonoAotOption Include="as-name=clang" />
+      <MonoAotOption Include="as-options='-c -x assembler'" />
+      <MonoAotOption Include="tool-prefix=''" />
+      <MonoAotOption Include="ld-name=clang" />
+      <MonoAotOption Include="ld-options='-fuse-ld=lld'" />
+    </ItemGroup>
     <ItemGroup Condition="'$(TargetArchitecture)' == 'arm64'">
       <MonoAotOption Include="mattr=crc" />
       <MonoAotOption Include="mattr=crypto" />
@@ -376,8 +383,8 @@
     <Using Namespace="System.Linq"/>
     <Using Namespace="System.Text"/>
 
-    <!-- 
-      This code fragment updates the default run script for Helix. 
+    <!--
+      This code fragment updates the default run script for Helix.
       With the library tests, there is a single entry point provided as MainLibraryFileName. With the runtime tests, a bundle may have multiple main entry points. The code makes the following changes:
       - For each main entry point, it adds the mlaunch environment parameters MONO_APPLE_APP_ENTRY_POINT_LIB_NAME and MONO_APPLE_APP_ASSEMBLY_LOAD_PREFIX
       - After each execution, it renames the log file to match the MONO_APPLE_APP_ENTRY_POINT_LIB_NAME entry point
@@ -392,7 +399,7 @@
         foreach (ITaskItem item in EntryPoints)
         {
             string value = item.ItemSpec;
-            
+
             // Add the MONO_APPLE_APP_ENTRY_POINT_LIB_NAME and MONO_APPLE_APP_ASSEMBLY_LOAD_PREFIX environment parameters to the run script
             string output = lastLine.Replace("apple test", "apple run").Replace("--signal-app-end", $"--expected-exit-code=100 --set-env=MONO_APPLE_APP_ENTRY_POINT_LIB_NAME={value} --set-env=MONO_APPLE_APP_ASSEMBLY_LOAD_PREFIX={value.Split('/')[0]}") + "&& echo \"Test passed\" || { echo \"Test failed\"; exit 1; }";
             resultBuilder.AppendLine(output);
@@ -543,6 +550,7 @@
   <Target Name="RestorePackage">
     <PropertyGroup>
       <_ConfigurationProperties>/p:TargetOS=$(TargetOS) /p:TargetArchitecture=$(TargetArchitecture) /p:Configuration=$(Configuration) /p:CrossBuild=$(CrossBuild)</_ConfigurationProperties>
+      <_ConfigurationProperties Condition="'$(UseLocalAppHostPack)' == 'true'">$(_ConfigurationProperties) -p:EnableAppHostPackDownload=false -p:EnableTargetingPackDownload=false -p:EnableRuntimePackDownload=false</_ConfigurationProperties>
       <DotnetRestoreCommand>"$(DotNetTool)" restore $(RestoreProj) $(PackageVersionArg) /p:SetTFMForRestore=true $(_ConfigurationProperties)</DotnetRestoreCommand>
     </PropertyGroup>
     <Exec Command="$(DotnetRestoreCommand)"/>
diff --git a/src/tests/issues.targets b/src/tests/issues.targets
index c1468004a8ec..6b21cc19290a 100644
--- a/src/tests/issues.targets
+++ b/src/tests/issues.targets
@@ -5,9 +5,6 @@
         <ExcludeList Include="$(XunitTestBinBase)/readytorun/DynamicMethodGCStress/DynamicMethodGCStress/*">
             <Issue>timeout</Issue>
         </ExcludeList>
-        <ExcludeList Include="$(XunitTestBinBase)/Regressions/coreclr/GitHub_22888/test22888/*">
-            <Issue>https://github.com/dotnet/runtime/issues/13703</Issue>
-        </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/Interop/PInvoke/Int128/Int128Test/*">
             <Issue>https://github.com/dotnet/runtime/issues/74209</Issue>
         </ExcludeList>
@@ -75,7 +72,7 @@
         <ExcludeList Include="$(XunitTestBinBase)/readytorun/GenericCycleDetection/Depth3Test/*">
           <Issue>https://github.com/dotnet/runtime/issues/88586</Issue>
         </ExcludeList>
-        <ExcludeList Include="$(XunitTestBinBase)/Interop/Swift/**">
+        <ExcludeList Include="$(XunitTestBinBase)/Interop/Swift/SwiftInvalidCallConv/*">
             <Issue>https://github.com/dotnet/runtime/issues/93631</Issue>
         </ExcludeList>
     </ItemGroup>
@@ -247,9 +244,6 @@
         <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/DevDiv_590771/DevDiv_590771/*">
             <Issue>needs triage</Issue>
         </ExcludeList>
-        <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_56953/Runtime_56953/*">
-            <Issue>https://github.com/dotnet/runtime/issues/67870</Issue>
-        </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/Exceptions/UnwindFpBleedTest/UnwindFpBleedTest/*">
           <Issue>Windows's unwinder in ARM64v8 queue doesn't contain FP unwind fix</Issue>
         </ExcludeList>
@@ -425,9 +419,6 @@
         <ExcludeList Include="$(XunitTestBinBase)/JIT/jit64/regress/vsw/373472/**">
             <Issue>Allocates large contiguous array that is not consistently available on 32-bit platforms</Issue>
         </ExcludeList>
-        <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_56953/Runtime_56953/*">
-            <Issue>https://github.com/dotnet/runtime/issues/57856</Issue>
-        </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/profiler/multiple/multiple/*">
             <Issue>https://github.com/dotnet/runtime/issues/57875</Issue>
         </ExcludeList>
@@ -772,9 +763,6 @@
         <ExcludeList Include="$(XunitTestBinBase)/Interop/LayoutClass/LayoutClassTest/**">
             <Issue>https://github.com/dotnet/runtime/issues/81673</Issue>
         </ExcludeList>
-        <ExcludeList Include="$(XunitTestBinBase)/Interop/MarshalAPI/FunctionPointer/FunctionPtrTest/*">
-            <Issue>https://github.com/dotnet/runtimelab/issues/164</Issue>
-        </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/Interop/NativeLibrary/AssemblyLoadContext/ResolveUnmanagedDllTests/*">
             <Issue>https://github.com/dotnet/runtimelab/issues/165</Issue>
         </ExcludeList>
@@ -1012,9 +1000,6 @@
         <ExcludeList Include="$(XunitTestBinBase)/Loader/ContextualReflection/ContextualReflection/*">
             <Issue>https://github.com/dotnet/runtimelab/issues/165</Issue>
         </ExcludeList>
-        <ExcludeList Include="$(XunitTestBinBase)/reflection/Modifiers/modifiers/*">
-            <Issue>https://github.com/dotnet/runtimelab/issues/201</Issue>
-        </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/reflection/SetValue/TrySetReadonlyStaticField/*">
             <Issue>https://github.com/dotnet/runtimelab/issues/200</Issue>
         </ExcludeList>
@@ -1093,9 +1078,6 @@
         <ExcludeList Include="$(XunitTestBinBase)/JIT/Stress/ABI/tailcalls_do/**">
             <Issue>https://github.com/dotnet/runtimelab/issues/155: Reflection.Emit</Issue>
         </ExcludeList>
-        <ExcludeList Include="$(XunitTestBinBase)/managed/Compilation/Compilation/**">
-            <Issue>expects to see System.Private.CoreLib in CORE_ROOT</Issue>
-        </ExcludeList>
 
         <!-- Complex constrained calls -->
         <!-- https://github.com/dotnet/runtimelab/issues/1431 -->
@@ -1178,6 +1160,9 @@
 
     <!-- Known failures for mono runtime on *all* architectures/operating systems in *all* runtime modes -->
     <ItemGroup Condition="'$(RuntimeFlavor)' == 'mono'" >
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_62692/Runtime_62692_*/**">
+            <Issue>https://github.com/dotnet/runtime/issues/100368</Issue>
+        </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/StaticVirtualMethods/Reabstraction/**">
             <Issue>https://github.com/dotnet/runtime/issues/88775</Issue>
         </ExcludeList>
@@ -1185,13 +1170,10 @@
             <Issue>https://github.com/dotnet/runtime/issues/88689</Issue>
         </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/Regressions/coreclr/GitHub_85240/test85240/**">
-            <Issue>https://github.com/dotnet/runtime/issues/71095</Issue>
-        </ExcludeList>
-        <ExcludeList Include="$(XunitTestBinBase)/JIT/opt/Remainder/Regressions/Regression1/**">
-            <Issue>https://github.com/dotnet/runtime/issues/79022</Issue>
+            <Issue>https://github.com/dotnet/runtime/issues/90308</Issue>
         </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/Interop/PInvoke/Int128/Int128TestFieldLayout/*">
-            <Issue>https://github.com/dotnet/runtime/issues/74223</Issue>
+            <Issue>https://github.com/dotnet/runtime/issues/69399</Issue>
         </ExcludeList>
         <ExcludeList Include = "$(XunitTestBinBase)/JIT/HardwareIntrinsics/X86/Sse42.X64/Crc32_*/**">
             <Issue>https://github.com/dotnet/runtime/issues/54185</Issue>
@@ -1421,6 +1403,9 @@
         <ExcludeList Include="$(XunitTestBinBase)/Interop/ICustomMarshaler/ConflictingNames/MultipleALCs/**">
             <Issue>https://github.com/dotnet/runtime/issues/34072</Issue>
         </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/Regressions/coreclr/GitHub_22888/test22888/*">
+            <Issue>https://github.com/dotnet/runtime/issues/34072</Issue>
+        </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/Interop/ICustomMarshaler/Primitives/ICustomMarshaler_TargetUnix/**">
             <Issue>https://github.com/dotnet/runtime/issues/34374</Issue>
         </ExcludeList>
@@ -1879,6 +1864,18 @@
         <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219/*">
             <Issue>https://github.com/dotnet/runtime/issues/90374</Issue>
         </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/opt/Structs/MemsetMemcpyNullref/*">
+            <Issue>https://github.com/dotnet/runtime/issues/98628</Issue>
+        </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/Interop/Swift/SwiftAbiStress/**">
+            <Issue>https://github.com/dotnet/runtime/issues/93631: Swift frozen struct support is not implemented on Mono yet</Issue>
+        </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/Interop/Swift/SwiftRetAbiStress/**">
+            <Issue>https://github.com/dotnet/runtime/issues/93631: Swift frozen struct support is not implemented on Mono yet</Issue>
+        </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/Interop/Swift/SwiftCallbackAbiStress/**">
+            <Issue>https://github.com/dotnet/runtime/issues/93631: Swift reverse pinvokes are not implemented on Mono yet</Issue>
+        </ExcludeList>
     </ItemGroup>
 
     <!-- Known failures for mono runtime on Windows -->
@@ -1977,9 +1974,6 @@
         <ExcludeList Include = "$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_34170/**">
             <Issue>https://github.com/dotnet/runtime/issues/46622</Issue>
         </ExcludeList>
-        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Directed/zeroinit/tail_zeroinit/**">
-            <Issue>https://github.com/dotnet/runtime/issues/37955</Issue>
-        </ExcludeList>
         <ExcludeList Include = "$(XunitTestBinBase)/JIT/Regression/CLR-x86-JIT/V1-M12-Beta2/b59952/b59952/**">
             <Issue>needs triage</Issue>
         </ExcludeList>
@@ -2276,7 +2270,7 @@
         </ExcludeList>
     </ItemGroup>
 
-    <ItemGroup Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'llvmfullaot' or '$(TargetsAppleMobile)' == 'true')">
+    <ItemGroup Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'llvmfullaot' or '$(RuntimeVariant)' == 'minifullaot' or '$(TargetsAppleMobile)' == 'true')">
         <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/explicitlayout/objrefandnonobjrefoverlap/case1/**">
             <Issue>expected failure: overlapped structs fail at AOT compile time, not runtime</Issue>
         </ExcludeList>
@@ -2439,7 +2433,7 @@
         </ExcludeList>
     </ItemGroup>
 
-    <ItemGroup Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'llvmfullaot' or '$(RuntimeVariant)' == 'llvmaot' or '$(TargetsAppleMobile)' == 'true')">
+    <ItemGroup Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'llvmfullaot' or '$(RuntimeVariant)' == 'llvmaot' or '$(RuntimeVariant)' == 'minifullaot' or '$(TargetsAppleMobile)' == 'true')">
         <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/CLR-x86-JIT/V1.1-M1-Beta1/b143840/b143840/*">
             <Issue>https://github.com/dotnet/runtime/issues/48914</Issue>
         </ExcludeList>
@@ -2511,7 +2505,78 @@
         </ExcludeList>
     </ItemGroup>
 
-    <ItemGroup Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'llvmfullaot' or '$(RuntimeVariant)' == 'llvmaot' or '$(TargetOS)' == 'ios') and '$(TargetArchitecture)' == 'arm64'">
+    <ItemGroup Condition="'$(RuntimeFlavor)' == 'mono' and '$(RuntimeVariant)' == 'llvmfullaot'">
+        <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/Invoke/25params/25paramMixed_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/Invoke/SEH/catchfault_jmp_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/Invoke/SEH/catchfinally_jmpind_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/Invoke/SEH/catchfinally_jmp_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_array_nz_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_array_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_gc_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_inst_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_value_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_virt_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/VT/callconv/jumper4_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/VT/callconv/jumper5_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/VT/callconv/jumps2_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/perffix/primitivevt/callconv3_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/perffix/primitivevt/callconv3_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/jit64/localloc/call/call05_dynamic/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/jit64/verif/sniff/fg/ver_fg_13/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/Invoke/25params/25paramMixed_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/Invoke/SEH/catchfault_jmp_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/Invoke/SEH/catchfinally_jmpind_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/Invoke/SEH/catchfinally_jmp_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_array_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_array_nz_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_gc_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_inst_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_value_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_virt_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/VT/callconv/jumps2_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/SIMD/Vector3Interop_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/SIMD/Vector3Interop_ro/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/HardwareIntrinsics/X86/Sse2.X64/StoreNonTemporal_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/HardwareIntrinsics/X86/Sse2.X64/StoreNonTemporal_ro/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/coverage/importer/badendfinally/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/coverage/importer/Desktop/badendfinally_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/coverage/importer/Desktop/badendfinally_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/coverage/importer/ceeillegal/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/coverage/importer/Desktop/ceeillegal_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/coverage/importer/Desktop/ceeillegal_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/HardwareIntrinsics/General/Vector128_1/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/baseservices/varargs/varargsupport_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/Boxing/misc/tailjump_r/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Regressions/coreclr/16354/notimplemented/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/CLR-x86-JIT/V1.2-M01/b13452/b13452/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/CLR-x86-JIT/V2.0-Beta2/b353858/b353858/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/Boxing/misc/tailjump_d/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/zeroinit/tail_zeroinit/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/callconv/ThisCall/ThisCallTest/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/pinvoke/tail_pinvoke/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/generics/ByRefLike/ValidateNegative/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_80731/Runtime_80731/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/opt/Devirtualization/Comparer_get_Default/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/DictionaryExpansion/DictionaryExpansion/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/explicitlayout/NestedStructs/case03/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/explicitlayout/NestedStructs/case04/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/explicitlayout/NestedStructs/case05/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/explicitlayout/Regressions/ASURT/ASURT150271/test13/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/MethodImpl/CovariantReturns/ReturnTypeValidation/ImplicitOverrideSameSigAsDecl/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/MethodImpl/CovariantReturns/ReturnTypeValidation/OverrideSameSigAsDecl/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/MethodImpl/CovariantReturns/UnitTest/UnitTest_GVM/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/MethodImpl/generics_override1/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/Statics/Misc/LiteralStatic/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/CustomAttributes/DynamicObjects/**" />
+
+        <!-- Tracking issue: https://github.com/dotnet/runtime/issues/92883 -->
+        <ExcludeList Include="$(XunitTestBinBase)/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/baseservices/TieredCompilation/TieredVtableMethodTests/**" />
+        <ExcludeList Include="$(XunitTestBinBase)/baseservices/varargs/varargsupport/**" />
+    </ItemGroup>
+
+    <ItemGroup Condition="'$(RuntimeFlavor)' == 'mono' and '$(RuntimeVariant)' == 'llvmfullaot' and '$(TargetArchitecture)' == 'x64'">
     </ItemGroup>
 
     <ItemGroup Condition="'$(RuntimeFlavor)' == 'mono' and '$(RuntimeVariant)' == 'minijit' and '$(TargetArchitecture)' == 'arm64'">
@@ -3124,6 +3189,13 @@
         </ExcludeList>
     </ItemGroup>
 
+    <!-- Apple mobile NativeAOT -->
+    <ItemGroup Condition="'$(XunitTestBinBase)' != '' and '$(TargetsAppleMobile)' == 'true' and '$(TestBuildMode)' == 'nativeaot'">
+        <ExcludeList Include = "$(XunitTestBinBase)/nativeaot/SmokeTests/TrimmingBehaviors/**">
+            <Issue>https://github.com/dotnet/runtime/issues/100226</Issue>
+        </ExcludeList>
+    </ItemGroup>
+
     <ItemGroup Condition="'$(TargetArchitecture)' == 'wasm'">
         <ExcludeList Include = "$(XunitTestBinBase)/baseservices/exceptions/unhandled/unhandled/**">
             <Issue>System.Diagnostics.Process is not supported</Issue>
@@ -3656,9 +3728,158 @@
    </ItemGroup>
 
     <ItemGroup Condition="'$(RuntimeFlavor)' == 'mono' and '$(TargetArchitecture)' == 'riscv64'">
-        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/gc_poll/**">
-            <Issue>https://github.com/dotnet/runtime/issues/54906</Issue>
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/eh/finallyexec/loopinfinally_do/*">
+            <Issue>https://github.com/dotnet/runtime/issues/65704</Issue>
+        </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/eh/finallyexec/loopinfinally_ro/*">
+            <Issue>https://github.com/dotnet/runtime/issues/65704</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Regression/VS-ia64-JIT/M00/b108366/b108366/*">
+            <Issue>https://github.com/dotnet/runtime/issues/54393</Issue>
+        </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/Interop/StructMarshalling/ReversePInvoke/MarshalExpStruct/DelegatePInvoke/DelegatePInvokeTest/**">
+            <Issue>https://github.com/dotnet/runtime/issues/65695</Issue>
+        </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/Interop/StructMarshalling/ReversePInvoke/MarshalExpStruct/ReversePInvokeManaged/ReversePInvokeTest/**">
+            <Issue>https://github.com/dotnet/runtime/issues/65695</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/jit64/opt/cse/hugeSimpleExpr1/**">
+            <Issue> The function size is Huge(about 6MB), out of the ability of inst jal, can't jump to thunk area </Issue>
         </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/jit64/opt/cse/HugeArray1/**">
+            <Issue> The function size is Huge(about 6MB), out of the ability of inst jal, can't jump to thunk area </Issue>
+        </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/jit64/opt/rngchk/RngchkStress3/*">
+            <Issue>timeout</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/ilasm/PortablePdb/IlasmPortablePdbTests/**">
+            <Issue> Did not find ilasm or ildasm in CORE_ROOT directory </Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/ilasm/System/Runtime/CompilerServices/MethodImplOptionsTests/**">
+            <Issue> Did not find ilasm or ildasm in CORE_ROOT directory </Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/bigevent/bigevent/**">
+            <Issue>Can't find file dotnet-diagnostic-{pid}-*-socket</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/eventsourceerror/eventsourceerror/**">
+            <Issue>Can't find file dotnet-diagnostic-{pid}-*-socket</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/buffersize/buffersize/**">
+            <Issue>Can't find file dotnet-diagnostic-{pid}-*-socket</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/enabledisable/enabledisable/**">
+            <Issue>Can't find file dotnet-diagnostic-{pid}-*-socket</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/gcdump/gcdump/**">
+            <Issue>Can't find file dotnet-diagnostic-{pid}-*-socket</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/processinfo/processinfo/**">
+            <Issue>Can't find file dotnet-diagnostic-{pid}-*-socket</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/processinfo2/processinfo2/**">
+            <Issue>Can't find file dotnet-diagnostic-{pid}-*-socket</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/processinfo3/processinfo3/**">
+            <Issue>Can't find file dotnet-diagnostic-{pid}-*-socket</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/providervalidation/providervalidation/**">
+            <Issue>Can't find file dotnet-diagnostic-{pid}-*-socket</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/rundownvalidation/rundownvalidation/**">
+            <Issue>Can't find file dotnet-diagnostic-{pid}-*-socket</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/reverse/reverse/**">
+           <Issue> Process has start correctly, but EventPipeClient.ListAvailablePorts() still contain this PID</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/processenvironment/processenvironment/**">
+            <Issue>https://github.com/dotnet/runtime/issues/74891</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/diagnosticport/diagnosticport/**">
+            <Issue>https://github.com/dotnet/runtime/issues/74891</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/pauseonstart/pauseonstart/**">
+            <Issue>https://github.com/dotnet/runtime/issues/74891</Issue>
+        </ExcludeList>
+        <ExcludeList Include = "$(XunitTestBinBase)/tracing/eventpipe/reverseouter/reverseouter/**">
+            <Issue>https://github.com/dotnet/runtime/issues/74891</Issue>
+        </ExcludeList>
+    </ItemGroup>
+
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <ItemGroup Condition="'$(RuntimeFlavor)' == 'mono' and '$(RuntimeVariant)' == 'minifullaot'">
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/Invoke/25params/25paramMixed_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/Invoke/SEH/catchfault_jmp_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/Invoke/SEH/catchfinally_jmpind_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/Invoke/SEH/catchfinally_jmp_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_array_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_array_nz_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_gc_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_inst_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_value_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_virt_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/VT/callconv/jumps2_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Directed/zeroinit/tail_zeroinit/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Directed/callconv/ThisCall/ThisCallTest/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Directed/pinvoke/tail_pinvoke/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/Invoke/25params/25paramMixed_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/Invoke/SEH/catchfault_jmp_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/Invoke/SEH/catchfinally_jmpind_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/Invoke/SEH/catchfinally_jmp_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_array_nz_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_array_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_gc_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_inst_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_value_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/tailcall/deep_virt_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/VT/callconv/jumper4_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/VT/callconv/jumper5_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/VT/callconv/jumps2_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Regressions/coreclr/16354/notimplemented/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Directed/perffix/primitivevt/callconv3_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Directed/perffix/primitivevt/callconv3_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/SIMD/Vector3Interop_r/Vector3Interop_r.**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/SIMD/Vector3Interop_ro/Vector3Interop_ro.**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Regression/CLR-x86-JIT/V1.2-M01/b13452/b13452/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Regression/CLR-x86-JIT/V2.0-Beta2/b353858/b353858/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/Boxing/misc/tailjump_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/Boxing/misc/tailjump_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/baseservices/compilerservices/UnsafeAccessors/UnsafeAccessorsTests/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/baseservices/TieredCompilation/TieredVtableMethodTests/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/baseservices/varargs/varargsupport/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/opt/Devirtualization/Comparer_get_Default/Comparer_get_Default.**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_80731/Runtime_80731/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Loader/classloader/DictionaryExpansion/DictionaryExpansion/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Loader/classloader/explicitlayout/NestedStructs/case03/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Loader/classloader/explicitlayout/NestedStructs/case04/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Loader/classloader/explicitlayout/NestedStructs/case05/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Loader/classloader/explicitlayout/Regressions/ASURT/ASURT150271/test13/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Loader/classloader/MethodImpl/generics_override1/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Loader/classloader/Statics/Misc/LiteralStatic/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Loader/CustomAttributes/DynamicObjects/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/jit64/localloc/call/call05_dynamic/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/jit64/verif/sniff/fg/ver_fg_13/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Directed/coverage/importer/badendfinally/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Directed/coverage/importer/Desktop/badendfinally_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Directed/coverage/importer/Desktop/badendfinally_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Directed/coverage/importer/ceeillegal/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Directed/coverage/importer/Desktop/ceeillegal_d/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Directed/coverage/importer/Desktop/ceeillegal_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/SIMD/Vector3Interop_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/SIMD/Vector3Interop_ro/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/opt/Devirtualization/Comparer_get_Default/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/baseservices/varargs/varargsupport_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Loader/classloader/MethodImpl/CovariantReturns/ReturnTypeValidation/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Loader/classloader/MethodImpl/CovariantReturns/UnitTest/UnitTest_GVM/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Interop/StringMarshalling/AnsiBSTR/AnsiBStrTest/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Interop/StringMarshalling/BSTR/BSTRTest/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Interop/StringMarshalling/LPTSTR/LPTSTRTest/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Interop/StringMarshalling/VBByRefStr/VBByRefStrTest/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Methodical/Boxing/boxunbox/BoxPatternMatchAndSideEffects/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Intrinsics/TypeIntrinsics_r/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/JIT/Intrinsics/TypeIntrinsics_ro/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/ilasm/System/Runtime/CompilerServices/MethodImplOptionsTests/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/ilasm/PortablePdb/IlasmPortablePdbTests/**" />
+        <ExcludeList Include = "$(XunitTestBinBase)/Loader/classloader/generics/ByRefLike/**" />
     </ItemGroup>
 
     <Target Name="GetFilteredExcludeList" Returns="@(FilteredExcludeList)">
diff --git a/src/tests/managed/Compilation/Compilation.cs b/src/tests/managed/Compilation/Compilation.cs
index 0b1e9d2cd987..4852618cea3e 100644
--- a/src/tests/managed/Compilation/Compilation.cs
+++ b/src/tests/managed/Compilation/Compilation.cs
@@ -8,24 +8,34 @@
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.IO;
+using Xunit;
 
-class Program
+public class Program
 {
-    static int Main()
+    [Fact]
+    public static int TestEntryPoint()
     {
         Console.WriteLine("Starting the test");
         string codeFile = @"HelloWorld.cs";
 
-        var sourceTree = new List<SyntaxTree>(){SyntaxFactory.ParseSyntaxTree(File.ReadAllText(codeFile))};
+        var sourceTree = new List<SyntaxTree>()
+        {
+            SyntaxFactory.ParseSyntaxTree(File.ReadAllText(codeFile))
+        };
+
+        string mscorlibFile = Path.Combine(Environment.GetEnvironmentVariable("CORE_ROOT"),
+                                           "System.Private.CoreLib.dll");
 
-        string mscorlibFile = Path.Combine(Environment.GetEnvironmentVariable("CORE_ROOT"), "System.Private.CoreLib.dll");
         Console.WriteLine("Using reference to: {0}", mscorlibFile);
-        var reference = new List<MetadataReference>(){ MetadataReference.CreateFromFile(mscorlibFile)};
+        var reference = new List<MetadataReference>()
+        {
+            MetadataReference.CreateFromFile(mscorlibFile)
+        };
 
         var compilation = CSharpCompilation.Create("helloworld", sourceTree, reference);
-
         Console.WriteLine("Test compiled");
         var result = compilation.Emit(new FileStream("helloworld.exe", FileMode.Create));
+
         if (!result.Success)
         {
             return -1;
diff --git a/src/tests/managed/Compilation/Compilation.csproj b/src/tests/managed/Compilation/Compilation.csproj
index f6521d8ff2df..acf9a54b6868 100644
--- a/src/tests/managed/Compilation/Compilation.csproj
+++ b/src/tests/managed/Compilation/Compilation.csproj
@@ -1,9 +1,12 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <OutputType>Exe</OutputType>
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <GCStressIncompatible>true</GCStressIncompatible>
     <!-- Test unsupported outside of windows -->
     <CLRTestTargetUnsupported Condition="'$(TargetsWindows)' != 'true'">true</CLRTestTargetUnsupported>
+
+    <!-- Expects to find a corelib it can compile against -->
+    <NativeAotIncompatible>true</NativeAotIncompatible>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="$(MSBuildProjectName).cs" />
diff --git a/src/tests/managed/Managed.csproj b/src/tests/managed/Managed.csproj
new file mode 100644
index 000000000000..305a3dfa0b61
--- /dev/null
+++ b/src/tests/managed/Managed.csproj
@@ -0,0 +1,11 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <ItemGroup>
+    <MergedWrapperProjectReference Include="*/**/*.??proj" />
+  </ItemGroup>
+
+  <PropertyGroup>
+    <BuildAsStandalone>false</BuildAsStandalone>
+  </PropertyGroup>
+
+  <Import Project="$(TestSourceDir)MergedTestRunner.targets" />
+</Project>
diff --git a/src/tests/nativeaot/CustomMain/CustomMain.cs b/src/tests/nativeaot/CustomMain/CustomMain.cs
index 914809961d77..97db548fe54d 100644
--- a/src/tests/nativeaot/CustomMain/CustomMain.cs
+++ b/src/tests/nativeaot/CustomMain/CustomMain.cs
@@ -24,7 +24,7 @@ static Program()
         Console.WriteLine("hello from static constructor");
     }
 
-    [UnmanagedCallersOnly(EntryPoint = "IncrementExitCode", CallConvs = new Type[] { typeof(CallConvCdecl) })]
+    [UnmanagedCallersOnly(EntryPoint = "IncrementExitCode")]
     static void IncrementExitCode(int amount)
     {
         s_exitCode += amount;
diff --git a/src/tests/nativeaot/SmokeTests/ControlFlowGuard/ControlFlowGuard.csproj b/src/tests/nativeaot/SmokeTests/ControlFlowGuard/ControlFlowGuard.csproj
index 0ea909b29cb7..1fd766631b4a 100644
--- a/src/tests/nativeaot/SmokeTests/ControlFlowGuard/ControlFlowGuard.csproj
+++ b/src/tests/nativeaot/SmokeTests/ControlFlowGuard/ControlFlowGuard.csproj
@@ -5,7 +5,8 @@
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <ControlFlowGuard>Guard</ControlFlowGuard>
     <Optimize>true</Optimize>
-    <CLRTestTargetUnsupported Condition="'$(TargetsWindows)' != 'true'">true</CLRTestTargetUnsupported>
+    <!-- x86 support tracked at https://github.com/dotnet/runtime/issues/99516 -->
+    <CLRTestTargetUnsupported Condition="'$(TargetsWindows)' != 'true' or '$(TargetArchitecture)' == 'x86' ">true</CLRTestTargetUnsupported>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="ControlFlowGuard.cs" />
diff --git a/src/tests/nativeaot/SmokeTests/DotnetJs/DotnetJs.csproj b/src/tests/nativeaot/SmokeTests/DotnetJs/DotnetJs.csproj
index cd14c2a1310c..dbacf8e895bc 100644
--- a/src/tests/nativeaot/SmokeTests/DotnetJs/DotnetJs.csproj
+++ b/src/tests/nativeaot/SmokeTests/DotnetJs/DotnetJs.csproj
@@ -3,8 +3,9 @@
     <OutputType>Exe</OutputType>
     <CLRTestKind>BuildAndRun</CLRTestKind>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-    <CLRTestTargetUnsupported Condition="'$(TargetsBrowser)' != 'true'">true</CLRTestTargetUnsupported>
+    <CLRTestTargetUnsupported>true</CLRTestTargetUnsupported>
     <DotNetJsApi>true</DotNetJsApi>
+    <EmitCompilerGeneratedFiles>true</EmitCompilerGeneratedFiles>
   </PropertyGroup>
   <ItemGroup>
     <!-- Needed because of DisableImplicitFrameworkReferences=true in src\tests\Directory.Build.targets -->
diff --git a/src/tests/nativeaot/SmokeTests/DynamicGenerics/universal_generics.cs b/src/tests/nativeaot/SmokeTests/DynamicGenerics/universal_generics.cs
index 12415d8ec581..85fc88fb9058 100644
--- a/src/tests/nativeaot/SmokeTests/DynamicGenerics/universal_generics.cs
+++ b/src/tests/nativeaot/SmokeTests/DynamicGenerics/universal_generics.cs
@@ -461,7 +461,9 @@ public class UnmanagedByRef<T> : Base where T : struct, IGetValue
         [MethodImpl(MethodImplOptions.NoInlining)]
         public unsafe void TestAsPointer(T x)
         {
-            IntPtr unsafeValuePtr = (IntPtr)Unsafe.AsPointer(ref x);
+#pragma warning disable CS8500 // takes address of managed type
+            IntPtr unsafeValuePtr = (IntPtr)&x;
+#pragma warning restore CS8500
             GC.Collect();
             GC.Collect();
             GC.Collect();
@@ -487,6 +489,7 @@ public unsafe void TestGeneralFunction(T x)
         [MethodImpl(MethodImplOptions.NoInlining)]
         unsafe IntPtr someFuncWithByRefArgs(ref T x)
         {
+            // Unsafe.AsPointer is safe since the reference is expected to be pinned
             return (IntPtr)Unsafe.AsPointer(ref x);
         }
 
diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs
index e628938c57db..28b678b5c2b2 100644
--- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs
+++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs
@@ -22,7 +22,7 @@ static int Main()
 
         long lowerBound, upperBound;
         lowerBound = 1300 * 1024; // ~1.3 MB
-        upperBound = 1750 * 1024; // ~1.75 MB
+        upperBound = 1900 * 1024; // ~1.90 MB
 
         if (fileSize < lowerBound || fileSize > upperBound)
         {
diff --git a/src/tests/nativeaot/SmokeTests/HelloWasm/HelloWasm.cs b/src/tests/nativeaot/SmokeTests/HelloWasm/HelloWasm.cs
index 5e9c264b59b6..e6a8b3a2e8bb 100644
--- a/src/tests/nativeaot/SmokeTests/HelloWasm/HelloWasm.cs
+++ b/src/tests/nativeaot/SmokeTests/HelloWasm/HelloWasm.cs
@@ -384,6 +384,8 @@ private static unsafe int Main(string[] args)
 
         TestCkFinite();
 
+        TestFloatToIntConversions();
+
         TestIntOverflows();
 
 #if !CODEGEN_WASI // TODO-LLVM: stack traces on WASI.
@@ -3034,6 +3036,35 @@ private static unsafe bool CkFinite64(ulong value)
         return CkFiniteTest.CkFinite64(*(double*)(&value));
     }
 
+    private static void TestFloatToIntConversions()
+    {
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static T HideFromOptimizations<T>(T value) => value;
+
+        StartTest("Test float to int conversions");
+        if ((int)HideFromOptimizations(1245.6789d) != 1245)
+        {
+            FailTest("(int)1245.6789d not equal to 1245");
+            return;
+        }
+        if ((int)HideFromOptimizations(double.NaN) != 0)
+        {
+            FailTest("(int)double.NaN not equal to 0");
+            return;
+        }
+        if ((int)HideFromOptimizations(double.PositiveInfinity) != int.MaxValue)
+        {
+            FailTest("(int)double.PositiveInfinity not equal to int.MaxValue");
+            return;
+        }
+        if ((int)HideFromOptimizations(double.NegativeInfinity) != int.MinValue)
+        {
+            FailTest("(int)double.NegativeInfinity not equal to int.MinValue");
+            return;
+        }
+        PassTest();
+    }
+
     static void TestIntOverflows()
     {
         TestCharInOvf();
diff --git a/src/tests/nativeaot/SmokeTests/HelloWasm/Microsoft.JSInterop.js b/src/tests/nativeaot/SmokeTests/HelloWasm/Microsoft.JSInterop.js
index c03290f3cdc3..993586cbc58a 100644
--- a/src/tests/nativeaot/SmokeTests/HelloWasm/Microsoft.JSInterop.js
+++ b/src/tests/nativeaot/SmokeTests/HelloWasm/Microsoft.JSInterop.js
@@ -233,8 +233,4 @@ var DotNet;
         return value instanceof DotNetObject ? value.serializeAsArg() : value;
     }
 })(DotNet || (DotNet = {}));
-//
-//# sourceMappingURL=Microsoft.JSInterop.js.map
-//
-//
 
diff --git a/src/tests/nativeaot/SmokeTests/Preinitialization/Preinitialization.cs b/src/tests/nativeaot/SmokeTests/Preinitialization/Preinitialization.cs
index 5252e1f2d58c..78223dca967f 100644
--- a/src/tests/nativeaot/SmokeTests/Preinitialization/Preinitialization.cs
+++ b/src/tests/nativeaot/SmokeTests/Preinitialization/Preinitialization.cs
@@ -59,6 +59,7 @@ private static int Main()
         TestIsValueType.Run();
         TestIndirectLoads.Run();
         TestInitBlock.Run();
+        TestDataflow.Run();
 #else
         Console.WriteLine("Preinitialization is disabled in multimodule builds for now. Skipping test.");
 #endif
@@ -403,7 +404,15 @@ public ReferenceType(int intValue, double doubleValue)
 
     public static void Run()
     {
-        Assert.IsPreinitialized(typeof(TestReferenceTypeAllocation));
+        if (RuntimeInformation.ProcessArchitecture is Architecture.Arm or Architecture.Wasm)
+        {
+            // Because of the double field, this is not preinitialized
+            Assert.IsLazyInitialized(typeof(TestReferenceTypeAllocation));
+        }
+        else
+        {
+            Assert.IsPreinitialized(typeof(TestReferenceTypeAllocation));
+        }
         Assert.AreEqual(12345, s_referenceType.IntValue);
         Assert.AreEqual(3.14159, s_referenceType.DoubleValue);
     }
@@ -1050,11 +1059,13 @@ public static void Run()
             int val = AccessCookie<C1>();
             Assert.AreEqual(42, val);
 
-            val = (int)typeof(ClassWithTemplate<>).MakeGenericType(typeof(C2)).GetField("Cookie").GetValue(null);
+            val = (int)typeof(ClassWithTemplate<>).MakeGenericType(GetC2()).GetField("Cookie").GetValue(null);
             Assert.AreEqual(42, val);
+            static Type GetC2() => typeof(C2);
 
-            val = (int)typeof(TestSharedCode).GetMethod(nameof(AccessCookie)).MakeGenericMethod(typeof(C3)).Invoke(null, Array.Empty<object>());
+            val = (int)typeof(TestSharedCode).GetMethod(nameof(AccessCookie)).MakeGenericMethod(GetC3()).Invoke(null, Array.Empty<object>());
             Assert.AreEqual(42, val);
+            static Type GetC3() => typeof(C3);
         }
 
         {
@@ -1062,13 +1073,15 @@ public static void Run()
             object val = AccessArray<C1>();
             Assert.AreEqual(int.MaxValue, GC.GetGeneration(val));
 
-            val = typeof(ClassWithTemplate<>).MakeGenericType(typeof(C4)).GetField("Array").GetValue(null);
+            val = typeof(ClassWithTemplate<>).MakeGenericType(GetC4()).GetField("Array").GetValue(null);
             Assert.AreEqual(0, GC.GetGeneration(val));
             Assert.AreEqual(nameof(C4), val.GetType().GetElementType().Name);
+            static Type GetC4() => typeof(C4);
 
-            val = typeof(TestSharedCode).GetMethod(nameof(AccessArray)).MakeGenericMethod(typeof(C5)).Invoke(null, Array.Empty<object>());
+            val = typeof(TestSharedCode).GetMethod(nameof(AccessArray)).MakeGenericMethod(GetC5()).Invoke(null, Array.Empty<object>());
             Assert.AreEqual(0, GC.GetGeneration(val));
             Assert.AreEqual(nameof(C5), val.GetType().GetElementType().Name);
+            static Type GetC5() => typeof(C5);
         }
     }
 }
@@ -1310,15 +1323,17 @@ public static void Run()
 
 class TestIndirectLoads
 {
-    static unsafe U Read<T, U>(T val) where T : unmanaged where U : unmanaged
-        => *(U*)&val;
+    static unsafe sbyte Read(byte val) => *(sbyte*)&val;
+    static unsafe short Read(ushort val) => *(short*)&val;
+    static unsafe int Read(uint val) => *(int*)&val;
+    static unsafe long Read(ulong val) => *(long*)&val;
 
     class LdindTester
     {
-        public static sbyte SByte = Read<byte, sbyte>(byte.MaxValue);
-        public static short Short = Read<ushort, short>(ushort.MaxValue);
-        public static int Int = Read<uint, int>(uint.MaxValue);
-        public static long Long = Read<ulong, long>(ulong.MaxValue);
+        public static sbyte SByte = Read(byte.MaxValue);
+        public static short Short = Read(ushort.MaxValue);
+        public static int Int = Read(uint.MaxValue);
+        public static long Long = Read(ulong.MaxValue);
     }
 
     public static void Run()
@@ -1367,6 +1382,22 @@ public static void Run()
     }
 }
 
+class TestDataflow
+{
+    [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicMethods)]
+    public static Type TheType = typeof(MyType);
+
+    class MyType
+    {
+        public static void TheMethod() => Console.WriteLine("Hello");
+    }
+
+    public static void Run()
+    {
+        TheType.GetMethod("TheMethod").Invoke(null, []);
+    }
+}
+
 static class Assert
 {
     [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2070:UnrecognizedReflectionPattern",
diff --git a/src/tests/nativeaot/SmokeTests/Reflection/Reflection.cs b/src/tests/nativeaot/SmokeTests/Reflection/Reflection.cs
index 985fe0648c8a..febdecd3ebc5 100644
--- a/src/tests/nativeaot/SmokeTests/Reflection/Reflection.cs
+++ b/src/tests/nativeaot/SmokeTests/Reflection/Reflection.cs
@@ -1044,7 +1044,9 @@ class TestTypesInMethodSignatures
     {
         interface IUnreferenced { }
 
-        class UnreferencedBaseType : IUnreferenced { }
+        interface IReferenced { }
+
+        class UnreferencedBaseType : IUnreferenced, IReferenced { }
         class UnreferencedMidType : UnreferencedBaseType { }
         class ReferencedDerivedType : UnreferencedMidType { }
 
@@ -1063,8 +1065,9 @@ public static void Run()
 
             Assert.Equal(count, 3);
 
-            // This one could in theory fail if we start trimming interface lists
+            // We expect to see only IReferenced but not IUnreferenced
             Assert.Equal(1, mi.GetParameters()[0].ParameterType.GetInterfaces().Length);
+            Assert.Equal(typeof(IReferenced), mi.GetParameters()[0].ParameterType.GetInterfaces()[0]);
         }
     }
 
@@ -1511,6 +1514,7 @@ public static void Run()
             try
             {
                 Type.GetType("System.Span`1[[System.Byte, System.Runtime]][], System.Runtime");
+                Type.GetType("System.Collections.Generic.Dictionary`2[System.String]");
             }
             catch { }
 
@@ -1825,9 +1829,10 @@ public static void Run()
                 typeof(GenericType<>).MakeGenericType(typeof(object)).GetMethod("Gimme");
             }
 
-            var t = (Type)s_type.MakeGenericType(typeof(double)).GetMethod("Gimme").Invoke(null, Array.Empty<object>());
+            var t = (Type)s_type.MakeGenericType(GetDouble()).GetMethod("Gimme").Invoke(null, Array.Empty<object>());
             if (t != typeof(double))
                 throw new Exception();
+            static Type GetDouble() => typeof(double);
         }
     }
 
@@ -2433,9 +2438,10 @@ class Atom { }
 
         public static void Run()
         {
-            var mi = typeof(TestMdArrayLoad).GetMethod(nameof(MakeMdArray)).MakeGenericMethod(typeof(Atom));
+            var mi = typeof(TestMdArrayLoad).GetMethod(nameof(MakeMdArray)).MakeGenericMethod(GetAtom());
             if ((Type)mi.Invoke(null, Array.Empty<object>()) != typeof(Atom[,,]))
                 throw new Exception();
+            static Type GetAtom() => typeof(Atom);
         }
     }
 
@@ -2447,9 +2453,10 @@ class Atom { }
 
         public static void Run()
         {
-            var mi = typeof(TestByRefTypeLoad).GetMethod(nameof(MakeFnPtrType)).MakeGenericMethod(typeof(Atom));
+            var mi = typeof(TestByRefTypeLoad).GetMethod(nameof(MakeFnPtrType)).MakeGenericMethod(GetAtom());
             if ((Type)mi.Invoke(null, Array.Empty<object>()) != typeof(delegate*<ref Atom>))
                 throw new Exception();
+            static Type GetAtom() => typeof(Atom);
         }
     }
 
diff --git a/src/tests/nativeaot/SmokeTests/TrimmingBehaviors/Dataflow.cs b/src/tests/nativeaot/SmokeTests/TrimmingBehaviors/Dataflow.cs
index fe050ada673e..a75ca2bcd0c4 100644
--- a/src/tests/nativeaot/SmokeTests/TrimmingBehaviors/Dataflow.cs
+++ b/src/tests/nativeaot/SmokeTests/TrimmingBehaviors/Dataflow.cs
@@ -5,6 +5,7 @@
 using System.Reflection;
 using System.Diagnostics.CodeAnalysis;
 using System.Runtime.InteropServices;
+using System.Runtime.CompilerServices;
 
 #pragma warning disable 649 // 'blah' is never assigned to
 #pragma warning disable 169 // 'blah' is never used
@@ -25,7 +26,10 @@ public static int Run()
         TestDynamicDependency.Run();
         TestDynamicDependencyWithGenerics.Run();
         TestObjectGetTypeDataflow.Run();
+        TestMakeGenericDataflow.Run();
+        TestMakeGenericDataflowInvalid.Run();
         TestMarshalIntrinsics.Run();
+        Regression97758.Run();
 
         return 100;
     }
@@ -586,6 +590,117 @@ public static void Run()
         }
     }
 
+    class TestMakeGenericDataflow
+    {
+        class Gen1<T, U>
+        {
+            public static void Bridge() { }
+        }
+
+        class Gen1
+        {
+            public static void Bridge<T, U>() { }
+        }
+
+
+        class Gen2<T>
+        {
+            public static void Bridge() { }
+        }
+
+        class Gen2
+        {
+            public static void Bridge<T>() { }
+        }
+
+        struct MyStruct<T> { }
+
+        static void DoBridgeT1<T, U>() => typeof(Gen1<,>).MakeGenericType([typeof(T), typeof(U)]).GetMethod(nameof(Gen1<T, U>.Bridge)).Invoke(null, []);
+
+        static void DoBridgeT2<T>() => typeof(Gen2<>).MakeGenericType([typeof(MyStruct<T>)]).GetMethod(nameof(Gen2<T>.Bridge)).Invoke(null, []);
+
+        static void DoBridgeM1<T, U>() => typeof(Gen1).GetMethod(nameof(Gen1.Bridge)).MakeGenericMethod([typeof(T), typeof(U)]).Invoke(null, []);
+
+        static void DoBridgeM2<T>() => typeof(Gen2).GetMethod(nameof(Gen2.Bridge)).MakeGenericMethod([typeof(MyStruct<T>)]).Invoke(null, []);
+
+        public static void Run()
+        {
+            DoBridgeT1<string, string>();
+            DoBridgeT1<string, int>();
+            DoBridgeT1<int, int>();
+
+            DoBridgeT2<string>();
+            DoBridgeT2<int>();
+
+            DoBridgeM1<string, string>();
+            DoBridgeM1<string, int>();
+            DoBridgeM1<int, int>();
+
+            DoBridgeM2<string>();
+            DoBridgeM2<int>();
+
+            typeof(Gen1<,>).MakeGenericType([typeof(float), typeof(string)]).GetMethod(nameof(Gen1<float, string>.Bridge)).Invoke(null, []);
+            typeof(Gen2<>).MakeGenericType([typeof(MyStruct<float>)]).GetMethod(nameof(Gen2<float>.Bridge)).Invoke(null, []);
+            typeof(Gen1).GetMethod(nameof(Gen1.Bridge)).MakeGenericMethod([typeof(float), typeof(string)]).Invoke(null, []);
+            typeof(Gen2).GetMethod(nameof(Gen2.Bridge)).MakeGenericMethod([typeof(MyStruct<float>)]).Invoke(null, []);
+        }
+    }
+
+    class TestMakeGenericDataflowInvalid
+    {
+        class Gen<T> { }
+
+        class Gen
+        {
+            public static void Bridge<T>() { }
+        }
+
+        public static void Run()
+        {
+            try
+            {
+                typeof(Gen<>).MakeGenericType(null);
+            }
+            catch (ArgumentException) { }
+
+            try
+            {
+                typeof(Gen<>).MakeGenericType([]);
+            }
+            catch (ArgumentException) { }
+
+            try
+            {
+                typeof(Gen<>).MakeGenericType([typeof(float), typeof(double)]);
+            }
+            catch (ArgumentException) { }
+
+            try
+            {
+                typeof(Gen<>).MakeGenericType([typeof(Gen<>)]);
+            }
+            catch (ArgumentException) { }
+
+            try
+            {
+                typeof(Gen).GetMethod("Bridge").MakeGenericMethod(null);
+            }
+            catch (ArgumentException) { }
+
+            try
+            {
+                typeof(Gen).GetMethod("Bridge").MakeGenericMethod([]);
+            }
+            catch (ArgumentException) { }
+
+            try
+            {
+                typeof(Gen).GetMethod("Bridge").MakeGenericMethod([typeof(float), typeof(double)]);
+            }
+            catch (ArgumentException) { }
+        }
+    }
+
     class TestMarshalIntrinsics
     {
         [StructLayout(LayoutKind.Sequential)]
@@ -631,6 +746,34 @@ static void SanityTest()
             }
         }
     }
+
+    class Regression97758
+    {
+        class Foo<T>
+        {
+            public static void Trigger()
+            {
+                typeof(Bar).GetConstructor([]).Invoke([]);
+
+                if (typeof(T).IsValueType && (object)default(T) == null)
+                {
+                    if (!RuntimeFeature.IsDynamicCodeCompiled)
+                        return;
+
+                    Unreachable();
+                }
+
+                static void Unreachable() { }
+            }
+        }
+
+        class Bar { }
+
+        public static void Run()
+        {
+            Foo<int>.Trigger();
+        }
+    }
 }
 
 static class Assert
diff --git a/src/tests/nativeaot/SmokeTests/TrimmingBehaviors/DeadCodeElimination.cs b/src/tests/nativeaot/SmokeTests/TrimmingBehaviors/DeadCodeElimination.cs
index d54c801ccee3..076ce813828c 100644
--- a/src/tests/nativeaot/SmokeTests/TrimmingBehaviors/DeadCodeElimination.cs
+++ b/src/tests/nativeaot/SmokeTests/TrimmingBehaviors/DeadCodeElimination.cs
@@ -26,6 +26,7 @@ public static int Run()
         TestUnmodifiableStaticFieldOptimization.Run();
         TestUnmodifiableInstanceFieldOptimization.Run();
         TestGetMethodOptimization.Run();
+        TestTypeOfCodegenBranchElimination.Run();
 
         return 100;
     }
@@ -340,6 +341,8 @@ public static void Run()
 
     class TestTypeEquals
     {
+        sealed class Gen<T> { }
+
         sealed class Never { }
 
         static Type s_type = null;
@@ -350,6 +353,9 @@ public static void Run()
             // despite the typeof
             Console.WriteLine(s_type == typeof(Never));
 
+            // This was a compiler crash
+            Console.WriteLine(typeof(object) == typeof(Gen<>));
+
 #if !DEBUG
             ThrowIfPresent(typeof(TestTypeEquals), nameof(Never));
 #endif
@@ -639,6 +645,132 @@ public static void Run()
         }
     }
 
+    class TestTypeOfCodegenBranchElimination
+    {
+        class Never1 { }
+        class Never2 { }
+        class Never3 { }
+        class Never4<T> { }
+        class Never5<T> { }
+        class Never6<T> { }
+
+        class Canary1 { }
+        class Canary2 { }
+        class Canary3 { }
+        class Canary4 { }
+        class Canary5 { }
+        class Canary6 { }
+
+        class Maybe1<T> { }
+
+        class Marker1 { }
+
+        class Atom1 { }
+
+        interface IDynamicCastableImplemented { void A(); }
+        [DynamicInterfaceCastableImplementation]
+        interface IDynamicCastableImplementedImpl : IDynamicCastableImplemented { void IDynamicCastableImplemented.A() { } }
+        class DynamicInterfaceCastable : IDynamicInterfaceCastable
+        {
+            RuntimeTypeHandle IDynamicInterfaceCastable.GetInterfaceImplementation(RuntimeTypeHandle interfaceType) => typeof(IDynamicCastableImplementedImpl).TypeHandle;
+            bool IDynamicInterfaceCastable.IsInterfaceImplemented(RuntimeTypeHandle interfaceType, bool throwIfNotImplemented) => true;
+        }
+
+        [UnconditionalSuppressMessage("AotAnalysis", "IL3050:UnrecognizedAotPattern",
+            Justification = "That's the point")]
+        public static void Run()
+        {
+            if (GetUnknownType().GetType() == typeof(Never1))
+            {
+                Consume(new Canary1());
+            }
+#if !DEBUG
+            ThrowIfPresentWithUsableMethodTable(typeof(TestTypeOfCodegenBranchElimination), nameof(Canary1));
+#endif
+
+            if (GetUnknownType() is Never2)
+            {
+                Consume(new Canary2());
+            }
+#if !DEBUG
+            ThrowIfPresentWithUsableMethodTable(typeof(TestTypeOfCodegenBranchElimination), nameof(Canary2));
+#endif
+
+            IsNever3<object>(new object());
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static void IsNever3<T>(object o)
+            {
+                if (typeof(T) == typeof(Never3))
+                {
+                    Consume(new Canary3());
+                }
+            }
+#if false // This optimization is disabled for now, don't check.
+            ThrowIfPresentWithUsableMethodTable(typeof(TestTypeOfCodegenBranchElimination), nameof(Canary3));
+#endif
+
+            // *********
+
+            if (GetUnknownType().GetType() == typeof(Never4<object>))
+            {
+                Consume(new Canary4());
+            }
+#if !DEBUG
+            ThrowIfPresentWithUsableMethodTable(typeof(TestTypeOfCodegenBranchElimination), nameof(Canary4));
+#endif
+
+            if (GetUnknownType() is Never5<object>)
+            {
+                Consume(new Canary5());
+            }
+#if !DEBUG
+            ThrowIfPresentWithUsableMethodTable(typeof(TestTypeOfCodegenBranchElimination), nameof(Canary5));
+#endif
+
+            IsNever6<object>(new object());
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static void IsNever6<T>(object o)
+            {
+                if (typeof(T) == typeof(Never6<object>))
+                {
+                    Consume(new Canary6());
+                }
+            }
+#if false // This optimization is disabled for now, don't check.
+            ThrowIfPresentWithUsableMethodTable(typeof(TestTypeOfCodegenBranchElimination), nameof(Canary6));
+#endif
+
+            // ************
+
+            Activator.CreateInstance(typeof(Maybe1<>).MakeGenericType(GetAtom1()));
+
+            if (GetUnknownType().GetType() == typeof(Maybe1<object>))
+            {
+                // This should not be optimized away because Maybe1<object> is possible
+                // with the type loader template for MakeGeneric above.
+                Consume(new Marker1());
+            }
+            ThrowIfNotPresent(typeof(TestTypeOfCodegenBranchElimination), nameof(Marker1));
+
+            // ************
+
+            if (GetDynamicInterfaceCastableType() is not IDynamicCastableImplemented)
+               throw new Exception();
+
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static object GetDynamicInterfaceCastableType() => new DynamicInterfaceCastable();
+
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static void Consume(object o) { }
+
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static object GetUnknownType() => new object();
+
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static Type GetAtom1() => typeof(Atom1);
+        }
+    }
+
     [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2070:UnrecognizedReflectionPattern",
         Justification = "That's the point")]
     private static Type GetTypeSecretly(Type testType, string typeName) => testType.GetNestedType(typeName, BindingFlags.NonPublic | BindingFlags.Public);
diff --git a/src/tests/nativeaot/SmokeTests/UnitTests/Devirtualization.cs b/src/tests/nativeaot/SmokeTests/UnitTests/Devirtualization.cs
index f2c70d7b1ee6..c69c01315094 100644
--- a/src/tests/nativeaot/SmokeTests/UnitTests/Devirtualization.cs
+++ b/src/tests/nativeaot/SmokeTests/UnitTests/Devirtualization.cs
@@ -111,7 +111,8 @@ public static void Run()
             TestIntf1((IIntf1)new Intf1CastableImpl(), 456);
 
             TestIntf2(new Intf2Impl1(), 123);
-            TestIntf2((IIntf2)Activator.CreateInstance(typeof(Intf2Impl2<>).MakeGenericType(typeof(object))), 456);
+            TestIntf2((IIntf2)Activator.CreateInstance(typeof(Intf2Impl2<>).MakeGenericType(GetObject())), 456);
+            static Type GetObject() => typeof(object);
         }
     }
 
diff --git a/src/tests/nativeaot/SmokeTests/UnitTests/Generics.cs b/src/tests/nativeaot/SmokeTests/UnitTests/Generics.cs
index 145d7b3d0c3e..a9d3d44c758d 100644
--- a/src/tests/nativeaot/SmokeTests/UnitTests/Generics.cs
+++ b/src/tests/nativeaot/SmokeTests/UnitTests/Generics.cs
@@ -1976,8 +1976,10 @@ private static void TestDynamicStaticFields()
             Foo<object>.s_floatField = 12.34f;
             Foo<object>.s_longField1 = 0x1111;
 
-            var fooDynamicOfClassType = typeof(Foo<>).MakeGenericType(typeof(ClassType)).GetTypeInfo();
-            var fooDynamicOfClassType2 = typeof(Foo<>).MakeGenericType(typeof(ClassType2)).GetTypeInfo();
+            var fooDynamicOfClassType = typeof(Foo<>).MakeGenericType(GetClassType()).GetTypeInfo();
+            static Type GetClassType() => typeof(ClassType);
+            var fooDynamicOfClassType2 = typeof(Foo<>).MakeGenericType(GetClassType2()).GetTypeInfo();
+            static Type GetClassType2() => typeof(ClassType2);
 
             FieldInfo fi = fooDynamicOfClassType.GetDeclaredField("s_intField");
             FieldInfo fi2 = fooDynamicOfClassType2.GetDeclaredField("s_intField");
@@ -2031,7 +2033,8 @@ private static void TestDynamicInvokeStubs()
             heh2.GenericVirtualMethod(new Program(), "ayy");
 
             // Simple method invocation
-            var dynamicBaseOfString = typeof(DynamicBase<>).MakeGenericType(typeof(string));
+            var dynamicBaseOfString = typeof(DynamicBase<>).MakeGenericType(GetString());
+            static Type GetString() => typeof(string);
             object obj = Activator.CreateInstance(dynamicBaseOfString);
             {
                 var simpleMethod = dynamicBaseOfString.GetTypeInfo().GetDeclaredMethod("SimpleMethod");
@@ -2054,7 +2057,7 @@ private static void TestDynamicInvokeStubs()
             }
 
             {
-                var dynamicDerivedOfString = typeof(DynamicDerived<>).MakeGenericType(typeof(string));
+                var dynamicDerivedOfString = typeof(DynamicDerived<>).MakeGenericType(GetString());
                 object dynamicDerivedObj = Activator.CreateInstance(dynamicDerivedOfString);
                 var virtualMethodDynamicDerived = dynamicDerivedOfString.GetTypeInfo().GetDeclaredMethod("VirtualMethod");
                 string result = (string)virtualMethodDynamicDerived.Invoke(dynamicDerivedObj, new[] { "fad" });
@@ -2063,7 +2066,7 @@ private static void TestDynamicInvokeStubs()
 
             // Test generic method invocation
             {
-                var genericMethod = dynamicBaseOfString.GetTypeInfo().GetDeclaredMethod("GenericMethod").MakeGenericMethod(new[] { typeof(string) });
+                var genericMethod = dynamicBaseOfString.GetTypeInfo().GetDeclaredMethod("GenericMethod").MakeGenericMethod(new[] { GetString() });
                 string result = (string)genericMethod.Invoke(obj, new[] { "hey", "hello" });
 
                 Verify("System.Stringhello", result);
@@ -2072,15 +2075,15 @@ private static void TestDynamicInvokeStubs()
             // Test GVM invocation
             {
                 var genericMethod = dynamicBaseOfString.GetTypeInfo().GetDeclaredMethod("GenericVirtualMethod");
-                genericMethod = genericMethod.MakeGenericMethod(new[] { typeof(string) });
+                genericMethod = genericMethod.MakeGenericMethod(new[] { GetString() });
                 string result = (string)genericMethod.Invoke(obj, new[] { "hey", "hello" });
                 Verify("DynamicBaseSystem.Stringhello", result);
             }
 
             {
-                var dynamicDerivedOfString = typeof(DynamicDerived<>).MakeGenericType(typeof(string));
+                var dynamicDerivedOfString = typeof(DynamicDerived<>).MakeGenericType(GetString());
                 object dynamicDerivedObj = Activator.CreateInstance(dynamicDerivedOfString);
-                var virtualMethodDynamicDerived = dynamicDerivedOfString.GetTypeInfo().GetDeclaredMethod("GenericVirtualMethod").MakeGenericMethod(new[] { typeof(string) });
+                var virtualMethodDynamicDerived = dynamicDerivedOfString.GetTypeInfo().GetDeclaredMethod("GenericVirtualMethod").MakeGenericMethod(new[] { GetString() });
                 string result = (string)virtualMethodDynamicDerived.Invoke(dynamicDerivedObj, new[] { "hey", "fad" });
                 Verify("DynamicDerivedSystem.Stringfad", result);
             }
diff --git a/src/tests/nativeaot/SmokeTests/UnitTests/Interfaces.cs b/src/tests/nativeaot/SmokeTests/UnitTests/Interfaces.cs
index 4f34959be84b..b5023e8c8416 100644
--- a/src/tests/nativeaot/SmokeTests/UnitTests/Interfaces.cs
+++ b/src/tests/nativeaot/SmokeTests/UnitTests/Interfaces.cs
@@ -1583,11 +1583,13 @@ class Gen<T> where T : IFoo
         [UnconditionalSuppressMessage("AOT", "IL3050", Justification = "MakeGenericType - Intentional")]
         public static void Run()
         {
-            var r = (string)typeof(Gen<>).MakeGenericType(typeof(Baz)).GetMethod("GrabCookie").Invoke(null, Array.Empty<object>());
+            var r = (string)typeof(Gen<>).MakeGenericType(GetBaz()).GetMethod("GrabCookie").Invoke(null, Array.Empty<object>());
+            static Type GetBaz() => typeof(Baz);
             if (r != "IBar")
                 throw new Exception(r);
 
-            r = (string)typeof(Gen<>).MakeGenericType(typeof(IBar)).GetMethod("GrabCookie").Invoke(null, Array.Empty<object>());
+            r = (string)typeof(Gen<>).MakeGenericType(GetIBar()).GetMethod("GrabCookie").Invoke(null, Array.Empty<object>());
+            static Type GetIBar() => typeof(IBar);
             if (r != "IBar")
                 throw new Exception(r);
         }
@@ -1620,15 +1622,18 @@ class Gen<T> where T : IFoo
         [UnconditionalSuppressMessage("AOT", "IL3050", Justification = "MakeGenericType - Intentional")]
         public static void Run()
         {
-            Activator.CreateInstance(typeof(Baz<>).MakeGenericType(typeof(Atom1)));
+            Activator.CreateInstance(typeof(Baz<>).MakeGenericType(GetAtom1()));
 
-            var r = (string)typeof(Gen<>).MakeGenericType(typeof(Baz<>).MakeGenericType(typeof(Atom1))).GetMethod("GrabCookie").Invoke(null, Array.Empty<object>());
+            var r = (string)typeof(Gen<>).MakeGenericType(typeof(Baz<>).MakeGenericType(GetAtom1())).GetMethod("GrabCookie").Invoke(null, Array.Empty<object>());
             if (r != "IBar<Atom1>")
                 throw new Exception(r);
 
-            r = (string)typeof(Gen<>).MakeGenericType(typeof(IBar<>).MakeGenericType(typeof(Atom2))).GetMethod("GrabCookie").Invoke(null, Array.Empty<object>());
+            r = (string)typeof(Gen<>).MakeGenericType(typeof(IBar<>).MakeGenericType(GetAtom2())).GetMethod("GrabCookie").Invoke(null, Array.Empty<object>());
             if (r != "IBar<Atom2>")
                 throw new Exception(r);
+
+            static Type GetAtom1() => typeof(Atom1);
+            static Type GetAtom2() => typeof(Atom2);
         }
     }
 
diff --git a/src/tests/profiler/multiple/multiple.cs b/src/tests/profiler/multiple/multiple.cs
index aa0388fa0eb2..0d686ae0691a 100644
--- a/src/tests/profiler/multiple/multiple.cs
+++ b/src/tests/profiler/multiple/multiple.cs
@@ -35,9 +35,9 @@ public static int RunTest(String[] args)
             }
 
             Console.WriteLine("Waiting for profilers to all detach");
-            if (!_profilerDone.WaitOne(TimeSpan.FromMinutes(5)))
+            if (!_profilerDone.WaitOne(TimeSpan.FromMinutes(10)))
             {
-                Console.WriteLine("Profiler did not set the callback, test will fail.");
+                throw new Exception("Test timed out waiting for the profilers to set the callback, test will fail.");
             }
 
             return 100;
diff --git a/src/tests/profiler/native/multiple/multiple.cpp b/src/tests/profiler/native/multiple/multiple.cpp
index 6b8fba571858..c36e4a8efa49 100644
--- a/src/tests/profiler/native/multiple/multiple.cpp
+++ b/src/tests/profiler/native/multiple/multiple.cpp
@@ -24,7 +24,6 @@ HRESULT MultiplyLoaded::InitializeCommon(IUnknown* pICorProfilerInfoUnk)
     Profiler::Initialize(pICorProfilerInfoUnk);
 
     HRESULT hr = S_OK;
-    printf("Setting exception mask\n");
     if (FAILED(hr = pCorProfilerInfo->SetEventMask2(COR_PRF_MONITOR_EXCEPTIONS, 0)))
     {
         _failures++;
@@ -37,11 +36,13 @@ HRESULT MultiplyLoaded::InitializeCommon(IUnknown* pICorProfilerInfoUnk)
 
 HRESULT MultiplyLoaded::Initialize(IUnknown* pICorProfilerInfoUnk)
 {
+    printf("MultiplyLoaded::Initialize\n");
     return InitializeCommon(pICorProfilerInfoUnk);
 }
 
 HRESULT MultiplyLoaded::InitializeForAttach(IUnknown* pICorProfilerInfoUnk, void* pvClientData, UINT cbClientData)
 {
+    printf("MultiplyLoaded::InitializeForAttach\n");
     return InitializeCommon(pICorProfilerInfoUnk);
 }
 
@@ -56,8 +57,8 @@ HRESULT MultiplyLoaded::ProfilerDetachSucceeded()
     ++_detachCount;
 
     printf("ProfilerDetachSucceeded _detachCount=%d\n", _detachCount.load());
-    if (_detachCount == (MAX_PROFILERS - 1)
-        &&  _exceptionThrownSeenCount >= (MAX_PROFILERS - 1)
+    if (_detachCount == MAX_PROFILERS
+        &&  _exceptionThrownSeenCount >= MAX_PROFILERS
         &&  _failures == 0)
     {
         printf("PROFILER TEST PASSES\n");
@@ -69,9 +70,7 @@ HRESULT MultiplyLoaded::ProfilerDetachSucceeded()
 
 HRESULT MultiplyLoaded::ExceptionThrown(ObjectID thrownObjectId)
 {
-    int seen = _exceptionThrownSeenCount++;
-
-    printf("MultiplyLoaded::ExceptionThrown, number seen = %d\n", seen);
+    printf("MultiplyLoaded::ExceptionThrown, number seen = %d\n", ++_exceptionThrownSeenCount);
 
     thread detachThread([&]()
         {
diff --git a/src/tests/readytorun/coreroot_determinism/readytorun_coreroot_determinism.csproj b/src/tests/readytorun/coreroot_determinism/readytorun_coreroot_determinism.csproj
index 87b490351410..56b857d79356 100644
--- a/src/tests/readytorun/coreroot_determinism/readytorun_coreroot_determinism.csproj
+++ b/src/tests/readytorun/coreroot_determinism/readytorun_coreroot_determinism.csproj
@@ -1,4 +1,8 @@
 <Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'minifullaot' or '$(RuntimeVariant)' == 'llvmfullaot')">true</CLRTestTargetUnsupported>
+  </PropertyGroup>
   <ItemGroup>
     <MergedWrapperProjectReference Include="coreroot_determinism.csproj" />
   </ItemGroup>
diff --git a/src/tests/readytorun/readytorun.csproj b/src/tests/readytorun/readytorun.csproj
index d815ac504e63..cb657444dd8d 100644
--- a/src/tests/readytorun/readytorun.csproj
+++ b/src/tests/readytorun/readytorun.csproj
@@ -1,4 +1,8 @@
 <Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Tracking issue: https://github.com/dotnet/runtime/issues/90427 -->
+    <CLRTestTargetUnsupported Condition="'$(RuntimeFlavor)' == 'mono' and ('$(RuntimeVariant)' == 'minifullaot' or '$(RuntimeVariant)' == 'llvmfullaot')">true</CLRTestTargetUnsupported>
+  </PropertyGroup>
   <ItemGroup>
     <MergedWrapperProjectReference Include="*/**/*.??proj" />
     <MergedWrapperProjectReference Remove="tests/testv1/*.??proj" />
diff --git a/src/tests/readytorun/tests/genericsload/usegenericfield.csproj b/src/tests/readytorun/tests/genericsload/usegenericfield.csproj
index 67b266b8658f..d543cd9e4769 100644
--- a/src/tests/readytorun/tests/genericsload/usegenericfield.csproj
+++ b/src/tests/readytorun/tests/genericsload/usegenericfield.csproj
@@ -2,7 +2,6 @@
   <PropertyGroup>
     <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
     <RequiresProcessIsolation>true</RequiresProcessIsolation>
-    <ZapRequire>1</ZapRequire>
     <CLRTestPriority>1</CLRTestPriority>
 
     <!-- This is an explicit crossgen test -->
diff --git a/src/tests/sizeondisk/Directory.Build.props b/src/tests/sizeondisk/Directory.Build.props
deleted file mode 100644
index 6c6181ee1b34..000000000000
--- a/src/tests/sizeondisk/Directory.Build.props
+++ /dev/null
@@ -1,7 +0,0 @@
-<Project>
-  <Import Project="$([MSBuild]::GetPathOfFileAbove(Directory.Build.props, $(MSBuildThisFileDirectory)..))" />
-
-  <PropertyGroup>
-    <ProjectAssetsFile>$(TestSourceDir)performance/obj/project.assets.json</ProjectAssetsFile>
-  </PropertyGroup>
-</Project>
diff --git a/src/tests/sizeondisk/sodbench/SoDBench.cs b/src/tests/sizeondisk/sodbench/SoDBench.cs
deleted file mode 100644
index 10c916f09bd8..000000000000
--- a/src/tests/sizeondisk/sodbench/SoDBench.cs
+++ /dev/null
@@ -1,754 +0,0 @@
-using CommandLine;
-using CommandLine.Text;
-using Newtonsoft.Json;
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.IO;
-using System.Linq;
-using System.Net.Http;
-using System.Text;
-using System.Reflection;
-using System.Threading;
-using System.Threading.Tasks;
-
-namespace SoDBench
-{
-    // A simple tree node for tracking file and directory names and sizes
-    // Does not have to accurately represent the true file system; only what we care about
-    class SizeReportingNode
-    {
-        public SizeReportingNode(string name, long? size=null, bool expand=true)
-        {
-            Name = name;
-            _size = size;
-            Expanded = expand;
-        }
-
-        public SizeReportingNode(FileInfo file, bool expand=true)
-        {
-            Name = file.Name;
-            _size = file.Length;
-            Expanded = expand;
-        }
-
-        // Builds out the tree starting from a directory
-        public SizeReportingNode(DirectoryInfo dir, int? reportingDepth=null)
-        {
-            Name = dir.Name;
-
-            foreach (var childDir in dir.EnumerateDirectories())
-            {
-                AddChild(new SizeReportingNode(childDir));
-            }
-
-            foreach (var childFile in dir.EnumerateFiles())
-            {
-                AddChild(new SizeReportingNode(childFile));
-            }
-
-            if (reportingDepth != null)
-            {
-                LimitReportingDepth(reportingDepth ?? 0);
-            }
-        }
-
-
-        // The directory containing this node
-        public SizeReportingNode Parent { get; set; }
-
-        // All the directories and files this node contains
-        public List<SizeReportingNode> Children {get; private set;} = new List<SizeReportingNode>();
-
-        // The file or directory name
-        public string Name { get; set; }
-
-        public bool Expanded { get; set; } = true;
-
-        // A list version of the path up to the root level we care about
-        public List<string> SegmentedPath {
-            get
-            {
-                if (Parent != null)
-                {
-                    var path = Parent.SegmentedPath;
-                    path.Add(Name);
-                    return path;
-                }
-                return new List<string> { Name };
-            }
-        }
-
-        // The size of the file or directory
-        public long Size {
-            get
-            {
-                if (_size == null)
-                {
-                    _size = 0;
-                    foreach (var node in Children)
-                    {
-                        _size += node.Size;
-                    }
-                }
-                return _size ?? 0;
-            }
-
-            private set
-            {
-                _size = value;
-            }
-        }
-
-
-        // Add the adoptee node as a child and set the adoptee's parent
-        public void AddChild(SizeReportingNode adoptee)
-        {
-            Children.Add(adoptee);
-            adoptee.Parent = this;
-            _size = null;
-        }
-
-        public void LimitReportingDepth(int depth)
-        {
-            if (depth <= 0)
-            {
-                Expanded = false;
-            }
-
-            foreach (var childNode in Children)
-            {
-                childNode.LimitReportingDepth(depth-1);
-            }
-        }
-
-        // Return a CSV formatted string representation of the tree
-        public string FormatAsCsv()
-        {
-            return FormatAsCsv(new StringBuilder()).ToString();
-        }
-
-        // Add to the string build a csv formatted representation of the tree
-        public StringBuilder FormatAsCsv(StringBuilder builder)
-        {
-            string path = String.Join(",", SegmentedPath.Select(s => Csv.Escape(s)));
-            builder.AppendLine($"{path},{Size}");
-
-            if (Expanded)
-            {
-                foreach (var childNode in Children)
-                {
-                    childNode.FormatAsCsv(builder);
-                }
-            }
-
-            return builder;
-        }
-
-        private long? _size = null;
-    }
-
-    class Program
-    {
-        public static readonly string NugetConfig =
-        @"<?xml version='1.0' encoding='utf-8'?>
-        <configuration>
-        <packageSources>
-            <add key='dotnet-public' value='https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-public/nuget/v3/index.json' protocolVersion='3' />
-            <add key='myget-legacy' value='https://pkgs.dev.azure.com/dnceng/public/_packaging/myget-legacy/nuget/v3/index.json' protocolVersion='3' />
-        </packageSources>
-        </configuration>";
-
-        public static readonly string[] NewTemplates = new string[] {
-            "console",
-            "classlib",
-            "mstest",
-            "xunit",
-            "web",
-            "mvc",
-            "razor",
-            "webapi",
-            "nugetconfig",
-            "webconfig",
-            "sln",
-            "page",
-            "viewimports",
-            "viewstart"
-        };
-
-        public static readonly string[] OperatingSystems = new string[] {
-            "win10-x64",
-            "win10-x86",
-            "ubuntu.16.10-x64",
-            "rhel.7-x64"
-        };
-
-        static FileInfo s_dotnetExe;
-        static DirectoryInfo s_sandboxDir;
-        static DirectoryInfo s_fallbackDir;
-        static DirectoryInfo s_corelibsDir;
-        static bool s_keepArtifacts;
-        static string s_targetArchitecture;
-        static string s_dotnetChannel;
-
-        static void Main(string[] args)
-        {
-            try
-            {
-                var options = SoDBenchOptions.Parse(args);
-
-                s_targetArchitecture = options.TargetArchitecture;
-                s_dotnetChannel = options.DotnetChannel;
-                s_keepArtifacts = options.KeepArtifacts;
-
-                if (!String.IsNullOrWhiteSpace(options.DotnetExecutable))
-                {
-                    s_dotnetExe = new FileInfo(options.DotnetExecutable);
-                }
-
-                if (s_sandboxDir == null)
-                {
-                    // Truncate the Guid used for anti-collision because a full Guid results in expanded paths over 260 chars (the Windows max)
-                    s_sandboxDir = new DirectoryInfo(Path.Combine(Path.GetTempPath(), $"sod{Guid.NewGuid().ToString().Substring(0,13)}"));
-                    s_sandboxDir.Create();
-                    Console.WriteLine($"** Running inside sandbox directory: {s_sandboxDir}");
-                }
-
-                if (s_dotnetExe == null)
-                {
-                    if(!String.IsNullOrEmpty(options.CoreLibrariesDirectory))
-                    {
-                        Console.WriteLine($"** Using core libraries found at {options.CoreLibrariesDirectory}");
-                        s_corelibsDir = new DirectoryInfo(options.CoreLibrariesDirectory);
-                    }
-                    else
-                    {
-                        var coreroot = Environment.GetEnvironmentVariable("CORE_ROOT");
-                        if (!String.IsNullOrEmpty(coreroot) && Directory.Exists(coreroot))
-                        {
-                            Console.WriteLine($"** Using core libraries from CORE_ROOT at {coreroot}");
-                            s_corelibsDir = new DirectoryInfo(coreroot);
-                        }
-                        else
-                        {
-                            Console.WriteLine("** Using default dotnet-cli core libraries");
-                        }
-                    }
-
-                    PrintHeader("** Installing Dotnet CLI");
-                    s_dotnetExe = SetupDotnet();
-                }
-
-                if (s_fallbackDir == null)
-                {
-                    s_fallbackDir = new DirectoryInfo(Path.Combine(s_sandboxDir.FullName, "fallback"));
-                    s_fallbackDir.Create();
-                }
-
-                Console.WriteLine($"** Path to dotnet executable: {s_dotnetExe.FullName}");
-
-                PrintHeader("** Starting acquisition size test");
-                var acquisition = GetAcquisitionSize();
-
-                PrintHeader("** Running deployment size test");
-                var deployment = GetDeploymentSize();
-
-                var root = new SizeReportingNode("Dotnet Total");
-                root.AddChild(acquisition);
-                root.AddChild(deployment);
-
-                var formattedStr = root.FormatAsCsv();
-
-                File.WriteAllText(options.OutputFilename, formattedStr);
-
-                if (options.Verbose)
-                    Console.WriteLine($"** CSV Output:\n{formattedStr}");
-            }
-            finally
-            {
-                if (!s_keepArtifacts && s_sandboxDir != null)
-                {
-                    PrintHeader("** Cleaning up sandbox directory");
-                    DeleteDirectory(s_sandboxDir);
-                    s_sandboxDir = null;
-                }
-            }
-        }
-
-        private static void PrintHeader(string message)
-        {
-            Console.WriteLine();
-            Console.WriteLine("**********************************************************************");
-            Console.WriteLine($"** {message}");
-            Console.WriteLine("**********************************************************************");
-        }
-
-        private static SizeReportingNode GetAcquisitionSize()
-        {
-            var result = new SizeReportingNode("Acquisition Size");
-
-            // Arbitrary command to trigger first time setup
-            ProcessStartInfo dotnet = new ProcessStartInfo()
-            {
-                WorkingDirectory = s_sandboxDir.FullName,
-                FileName = s_dotnetExe.FullName,
-                Arguments = "new"
-            };
-
-            // Used to set where the packages will be unpacked to.
-            // There is a no guarantee that this is a stable method, but is the only way currently to set the fallback folder location
-            dotnet.Environment["DOTNET_CLI_TEST_FALLBACKFOLDER"] = s_fallbackDir.FullName;
-
-            LaunchProcess(dotnet, 180000);
-
-            Console.WriteLine("\n** Measuring total size of acquired files");
-
-            result.AddChild(new SizeReportingNode(s_fallbackDir, 1));
-
-            var dotnetNode = new SizeReportingNode(s_dotnetExe.Directory);
-            var reportingDepths = new Dictionary<string, int>
-            {
-                {"additionalDeps", 1},
-                {"host", 0},
-                {"sdk", 2},
-                {"shared", 2},
-                {"store", 3}
-            };
-            foreach (var childNode in dotnetNode.Children)
-            {
-                int depth = 0;
-                if (reportingDepths.TryGetValue(childNode.Name, out depth))
-                {
-                    childNode.LimitReportingDepth(depth);
-                }
-            }
-            result.AddChild(dotnetNode);
-
-            return result;
-        }
-
-        private static SizeReportingNode GetDeploymentSize()
-        {
-            // Write the NuGet.Config file
-            var nugetConfFile = new FileInfo(Path.Combine(s_sandboxDir.FullName, "NuGet.Config"));
-            File.WriteAllText(nugetConfFile.FullName, NugetConfig);
-
-            var result = new SizeReportingNode("Deployment Size");
-            foreach (string template in NewTemplates)
-            {
-                var templateNode = new SizeReportingNode(template);
-                result.AddChild(templateNode);
-
-                foreach (var os in OperatingSystems)
-                {
-                    Console.WriteLine($"\n\n** Deploying {template}/{os}");
-
-                    var deploymentSandbox = new DirectoryInfo(Path.Combine(s_sandboxDir.FullName, template, os));
-                    var publishDir = new DirectoryInfo(Path.Combine(deploymentSandbox.FullName, "publish"));
-                    deploymentSandbox.Create();
-
-                    ProcessStartInfo dotnetNew = new ProcessStartInfo()
-                    {
-                        FileName = s_dotnetExe.FullName,
-                        Arguments = $"new {template}",
-                        UseShellExecute = false,
-                        WorkingDirectory = deploymentSandbox.FullName
-                    };
-                    dotnetNew.Environment["DOTNET_CLI_TEST_FALLBACKFOLDER"] = s_fallbackDir.FullName;
-
-                    ProcessStartInfo dotnetRestore = new ProcessStartInfo()
-                    {
-                        FileName = s_dotnetExe.FullName,
-                        Arguments = $"restore --runtime {os}",
-                        UseShellExecute = false,
-                        WorkingDirectory = deploymentSandbox.FullName
-                    };
-                    dotnetRestore.Environment["DOTNET_CLI_TEST_FALLBACKFOLDER"] = s_fallbackDir.FullName;
-
-                    ProcessStartInfo dotnetPublish = new ProcessStartInfo()
-                    {
-                        FileName = s_dotnetExe.FullName,
-                        // The UserSharedCompiler flag is set to false to prevent handles from being held that will later cause deletion of the installed SDK to fail.
-                        Arguments = $"publish -c Release --runtime {os} --output {publishDir.FullName} /p:UseSharedCompilation=false /p:UseRazorBuildServer=false",
-                        UseShellExecute = false,
-                        WorkingDirectory = deploymentSandbox.FullName
-                    };
-                    dotnetPublish.Environment["DOTNET_CLI_TEST_FALLBACKFOLDER"] = s_fallbackDir.FullName;
-
-                    try
-                    {
-                        LaunchProcess(dotnetNew, 180000);
-                        if (deploymentSandbox.EnumerateFiles().Any(f => f.Name.EndsWith("proj")))
-                        {
-                            LaunchProcess(dotnetRestore, 180000);
-                            LaunchProcess(dotnetPublish, 180000);
-                        }
-                        else
-                        {
-                            Console.WriteLine($"** {template} does not have a project file to restore or publish");
-                        }
-                    }
-                    catch (Exception e)
-                    {
-                        Console.Error.WriteLine(e.Message);
-                        continue;
-                    }
-
-                    // If we published this project, only report it's published size
-                    if (publishDir.Exists)
-                    {
-                        var publishNode = new SizeReportingNode(publishDir, 0);
-                        publishNode.Name = deploymentSandbox.Name;
-                        templateNode.AddChild(publishNode);
-
-                        if (publishNode.Size <= 0) {
-                            throw new InvalidOperationException($"{publishNode.Name} reports as invalid size {publishNode.Size}");
-                        }
-                    }
-                    else
-                    {
-                        templateNode.AddChild(new SizeReportingNode(deploymentSandbox, 0));
-                    }
-                }
-            }
-            return result;
-        }
-
-        private static void DownloadDotnetInstaller()
-        {
-            var psi = new ProcessStartInfo() {
-                WorkingDirectory = s_sandboxDir.FullName,
-                FileName = @"powershell.exe",
-                Arguments = $"-NoProfile wget https://raw.githubusercontent.com/dotnet/cli/master/scripts/obtain/dotnet-install.ps1 -OutFile Dotnet-Install.ps1"
-            };
-            LaunchProcess(psi, 180000);
-        }
-
-        private static void InstallSharedRuntime()
-        {
-            var psi = new ProcessStartInfo() {
-                WorkingDirectory = s_sandboxDir.FullName,
-                FileName = @"powershell.exe",
-                Arguments = $"-NoProfile -ExecutionPolicy Bypass -File .\\Dotnet-Install.ps1 -Runtime dotnet -InstallDir .dotnet -Channel {s_dotnetChannel} -Architecture {s_targetArchitecture}"
-            };
-            LaunchProcess(psi, 180000);
-        }
-
-        private static void InstallDotnet()
-        {
-            var psi = new ProcessStartInfo() {
-                WorkingDirectory = s_sandboxDir.FullName,
-                FileName = @"powershell.exe",
-                Arguments = $"-NoProfile -ExecutionPolicy Bypass -File .\\Dotnet-Install.ps1 -InstallDir .dotnet -Channel {s_dotnetChannel} -Architecture {s_targetArchitecture}"
-            };
-            LaunchProcess(psi, 180000);
-        }
-
-        private static void ModifySharedFramework()
-        {
-            // Current working directory is the <coreclr repo root>/sandbox directory.
-            Console.WriteLine($"** Modifying the shared framework.");
-
-            var sourcedi = s_corelibsDir;
-
-            // Get the directory containing the newest version of Microsodt.NETCore.App libraries
-            var targetdi = new DirectoryInfo(
-                new DirectoryInfo(Path.Combine(s_sandboxDir.FullName, ".dotnet", "shared", "Microsoft.NETCore.App"))
-                .GetDirectories("*")
-                .OrderBy(s => s.Name)
-                .Last()
-                .FullName);
-
-            Console.WriteLine($"| Source : {sourcedi.FullName}");
-            Console.WriteLine($"| Target : {targetdi.FullName}");
-
-            var compiledBinariesOfInterest = new string[] {
-                "clretwrc.dll",
-                "clrjit.dll",
-                "coreclr.dll",
-                "mscordaccore.dll",
-                "mscordbi.dll",
-                "mscorrc.dll",
-                "sos.dll",
-                "SOS.NETCore.dll",
-                "System.Private.CoreLib.dll"
-            };
-
-            foreach (var compiledBinaryOfInterest in compiledBinariesOfInterest)
-            {
-                foreach (FileInfo fi in targetdi.GetFiles(compiledBinaryOfInterest))
-                {
-                    var sourceFilePath = Path.Combine(sourcedi.FullName, fi.Name);
-                    var targetFilePath = Path.Combine(targetdi.FullName, fi.Name);
-
-                    if (File.Exists(sourceFilePath))
-                    {
-                        File.Copy(sourceFilePath, targetFilePath, true);
-                        Console.WriteLine($"|   Copied file - '{fi.Name}'");
-                    }
-                }
-            }
-        }
-
-        private static FileInfo SetupDotnet()
-        {
-            DownloadDotnetInstaller();
-            InstallSharedRuntime();
-            InstallDotnet();
-            if (s_corelibsDir != null)
-            {
-                ModifySharedFramework();
-            }
-
-            var dotnetExe = new FileInfo(Path.Combine(s_sandboxDir.FullName, ".dotnet", "dotnet.exe"));
-            Debug.Assert(dotnetExe.Exists);
-
-            return dotnetExe;
-        }
-
-        private static void LaunchProcess(ProcessStartInfo processStartInfo, int timeoutMilliseconds, IDictionary<string, string> environment = null)
-        {
-            Console.WriteLine();
-            Console.WriteLine($"{System.Security.Principal.WindowsIdentity.GetCurrent().Name}@{Environment.MachineName} \"{processStartInfo.WorkingDirectory}\"");
-            Console.WriteLine($"[{DateTime.Now}] $ {processStartInfo.FileName} {processStartInfo.Arguments}");
-
-            if (environment != null)
-            {
-                foreach (KeyValuePair<string, string> pair in environment)
-                {
-                    if (!processStartInfo.Environment.ContainsKey(pair.Key))
-                        processStartInfo.Environment.Add(pair.Key, pair.Value);
-                    else
-                        processStartInfo.Environment[pair.Key] = pair.Value;
-                }
-            }
-
-            using (var p = new Process() { StartInfo = processStartInfo })
-            {
-                p.Start();
-                if (p.WaitForExit(timeoutMilliseconds) == false)
-                {
-                    // FIXME: What about clean/kill child processes?
-                    p.Kill();
-                    throw new TimeoutException($"The process '{processStartInfo.FileName} {processStartInfo.Arguments}' timed out.");
-                }
-
-                if (p.ExitCode != 0)
-                    throw new Exception($"{processStartInfo.FileName} exited with error code {p.ExitCode}");
-            }
-        }
-
-        /// <summary>
-        /// Provides an interface to parse the command line arguments passed to the SoDBench.
-        /// </summary>
-        private sealed class SoDBenchOptions
-        {
-            public SoDBenchOptions() { }
-
-            private static string NormalizePath(string path)
-            {
-                if (String.IsNullOrWhiteSpace(path))
-                    throw new InvalidOperationException($"'{path}' is an invalid path: cannot be null or whitespace");
-
-                if (path.Any(c => Path.GetInvalidPathChars().Contains(c)))
-                    throw new InvalidOperationException($"'{path}' is an invalid path: contains invalid characters");
-
-                return Path.IsPathRooted(path) ? path : Path.GetFullPath(path);
-            }
-
-            [Option('o', Required = false, HelpText = "Specifies the output file name for the csv document")]
-            public string OutputFilename
-            {
-                get { return _outputFilename; }
-
-                set
-                {
-                    _outputFilename = NormalizePath(value);
-                }
-            }
-
-            [Option("dotnet", Required = false, HelpText = "Specifies the location of dotnet cli to use.")]
-            public string DotnetExecutable
-            {
-                get { return _dotnetExe; }
-
-                set
-                {
-                    _dotnetExe = NormalizePath(value);
-                }
-            }
-
-            [Option("corelibs", Required = false, HelpText = "Specifies the location of .NET Core libraries to patch into dotnet. Cannot be used with --dotnet")]
-            public string CoreLibrariesDirectory
-            {
-                get { return _corelibsDir; }
-
-                set
-                {
-                    _corelibsDir = NormalizePath(value);
-                }
-            }
-
-            [Option("architecture", Required = false, Default = "x64", HelpText = "JitBench target architecture (It must match the built product that was copied into sandbox).")]
-            public string TargetArchitecture { get; set; }
-
-            [Option("channel", Required = false, Default = "master", HelpText = "Specifies the channel to use when installing the dotnet-cli")]
-            public string DotnetChannel { get; set; }
-
-            [Option('v', Required = false, HelpText = "Sets output to verbose")]
-            public bool Verbose { get; set; }
-
-            [Option("keep-artifacts", Required = false, HelpText = "Specifies that artifacts of this run should be kept")]
-            public bool KeepArtifacts { get; set; }
-
-            public static SoDBenchOptions Parse(string[] args)
-            {
-                using (var parser = new Parser((settings) => {
-                    settings.CaseInsensitiveEnumValues = true;
-                    settings.CaseSensitive = false;
-                    settings.HelpWriter = new StringWriter();
-                    settings.IgnoreUnknownArguments = true;
-                }))
-                {
-                    SoDBenchOptions options = null;
-                    parser.ParseArguments<SoDBenchOptions>(args)
-                        .WithParsed(parsed => options = parsed)
-                        .WithNotParsed(errors => {
-                            foreach (Error error in errors)
-                            {
-                                switch (error.Tag)
-                                {
-                                    case ErrorType.MissingValueOptionError:
-                                        throw new ArgumentException(
-                                                $"Missing value option for command line argument '{(error as MissingValueOptionError).NameInfo.NameText}'");
-                                    case ErrorType.HelpRequestedError:
-                                        Console.WriteLine(Usage());
-                                        Environment.Exit(0);
-                                        break;
-                                    case ErrorType.VersionRequestedError:
-                                        Console.WriteLine(new AssemblyName(typeof(SoDBenchOptions).GetTypeInfo().Assembly.FullName).Version);
-                                        Environment.Exit(0);
-                                        break;
-                                    case ErrorType.BadFormatTokenError:
-                                    case ErrorType.UnknownOptionError:
-                                    case ErrorType.MissingRequiredOptionError:
-                                    case ErrorType.MutuallyExclusiveSetError:
-                                    case ErrorType.BadFormatConversionError:
-                                    case ErrorType.SequenceOutOfRangeError:
-                                    case ErrorType.RepeatedOptionError:
-                                    case ErrorType.NoVerbSelectedError:
-                                    case ErrorType.BadVerbSelectedError:
-                                    case ErrorType.HelpVerbRequestedError:
-                                        break;
-                                }
-                            }
-                        });
-
-                    if (options != null && !String.IsNullOrEmpty(options.DotnetExecutable) && !String.IsNullOrEmpty(options.CoreLibrariesDirectory))
-                    {
-                        throw new ArgumentException("--dotnet and --corlibs cannot be used together");
-                    }
-
-                    return options;
-                }
-            }
-
-            public static string Usage()
-            {
-                var parser = new Parser((parserSettings) =>
-                {
-                    parserSettings.CaseInsensitiveEnumValues = true;
-                    parserSettings.CaseSensitive = false;
-                    parserSettings.EnableDashDash = true;
-                    parserSettings.HelpWriter = new StringWriter();
-                    parserSettings.IgnoreUnknownArguments = true;
-                });
-
-                var helpTextString = new HelpText
-                {
-                    AddDashesToOption = true,
-                    AddEnumValuesToHelpText = true,
-                    AdditionalNewLineAfterOption = false,
-                    Heading = "SoDBench",
-                    MaximumDisplayWidth = 80,
-                }.AddOptions(parser.ParseArguments<SoDBenchOptions>(new string[] { "--help" })).ToString();
-                return helpTextString;
-            }
-
-            private string _dotnetExe;
-            private string _corelibsDir;
-            private string _outputFilename = "measurement.csv";
-        }
-
-        private static void DeleteDirectory(DirectoryInfo dir, uint maxWait=10000)
-        {
-            foreach (var subdir in dir.GetDirectories())
-            {
-                DeleteDirectory(subdir);
-            }
-
-            // Give it time to actually delete all the files
-            var files = dir.GetFiles();
-            bool wait = true;
-            uint waitTime = 0;
-            while (wait)
-            {
-                wait = false;
-
-                foreach (var f in files)
-                {
-                    if (File.Exists(f.FullName))
-                    {
-                        try
-                        {
-                            File.Delete(f.FullName);
-                        }
-                        catch (IOException) { if (waitTime > maxWait) throw; }
-                        catch (UnauthorizedAccessException) { if (waitTime > maxWait) throw; }
-
-                        if (File.Exists(f.FullName))
-                        {
-                            wait = true;
-
-                            // Print a message every 3 seconds if the thread is stuck
-                            if (waitTime != 0 && waitTime % 3000 == 0)
-                            {
-                                Console.WriteLine($"Waiting to delete {f.FullName}");
-                            }
-                        }
-                    }
-                }
-
-                // Try again in 100ms
-                if (wait)
-                {
-                    Thread.Sleep(100);
-                    waitTime += 100;
-                }
-            }
-
-            Directory.Delete(dir.FullName);
-        }
-    }
-
-    // A simple class for escaping strings for CSV writing
-    // https://stackoverflow.com/a/769713
-    // Used instead of a package because only these < 20 lines of code are needed
-    public static class Csv
-    {
-        public static string Escape( string s )
-        {
-            if ( s.Contains( QUOTE ) )
-                s = s.Replace( QUOTE, ESCAPED_QUOTE );
-
-            if ( s.IndexOfAny( CHARACTERS_THAT_MUST_BE_QUOTED ) > -1 )
-                s = QUOTE + s + QUOTE;
-
-            return s;
-        }
-
-        private const string QUOTE = "\"";
-        private const string ESCAPED_QUOTE = "\"\"";
-        private static char[] CHARACTERS_THAT_MUST_BE_QUOTED = { ',', '"', '\n' };
-    }
-}
diff --git a/src/tests/sizeondisk/sodbench/THIRD-PARTY-NOTICES.TXT b/src/tests/sizeondisk/sodbench/THIRD-PARTY-NOTICES.TXT
deleted file mode 100644
index 6e029eec3579..000000000000
--- a/src/tests/sizeondisk/sodbench/THIRD-PARTY-NOTICES.TXT
+++ /dev/null
@@ -1,25 +0,0 @@
-.NET uses third-party libraries or other resources that may be
-distributed under licenses different than the .NET software.
-
-Attributions and license notices for test cases originally authored by
-third parties can be found in the respective test directories.
-
-In the event that we accidentally failed to list a required notice, please
-bring it to our attention. Post an issue or email us:
-
-            dotnet@microsoft.com
-
-The attached notices are provided for information only.
-
-License notice for Stack Overflow
--------------------------------------
-
-Policy: https://stackoverflow.com/help/licensing
-License: https://creativecommons.org/licenses/by-sa/3.0/
-
-Title: Dealing with commas in a CSV file
-Content: https://stackoverflow.com/a/769713
-Question author: Bob The Janitor -- https://stackoverflow.com/users/55102/bob-the-janitor
-Answer author: harp -- https://stackoverflow.com/users/4525/harpo
-
-Use: https://github.com/dotnet/runtime/blob/4893732ba881a4fb9023af1d6d4e64bb2a6eddbc/src/tests/sizeondisk/sodbench/SoDBench.cs#L735
diff --git a/src/tools/illink/illink.sln b/src/tools/illink/illink.sln
index 87e99d208c7f..9b8d904942ba 100644
--- a/src/tools/illink/illink.sln
+++ b/src/tools/illink/illink.sln
@@ -223,6 +223,7 @@ Global
 		SolutionGuid = {E43A3901-42B0-48CA-BB36-5CD40A99A6EE}
 	EndGlobalSection
 	GlobalSection(SharedMSBuildProjectFiles) = preSolution
+		test\Trimming.Tests.Shared\Trimming.Tests.Shared.projitems*{400a1561-b6b6-482d-9e4c-3ddaede5bd07}*SharedItemsImports = 5
 		src\ILLink.Shared\ILLink.Shared.projitems*{dd28e2b1-057b-4b4d-a04d-b2ebd9e76e46}*SharedItemsImports = 5
 		src\ILLink.Shared\ILLink.Shared.projitems*{f1a44a78-34ee-408b-8285-9a26f0e7d4f2}*SharedItemsImports = 5
 		src\ILLink.Shared\ILLink.Shared.projitems*{ff598e93-8e9e-4091-9f50-61a7572663ae}*SharedItemsImports = 13
diff --git a/src/tools/illink/src/ILLink.CodeFix/ILLink.CodeFixProvider.csproj b/src/tools/illink/src/ILLink.CodeFix/ILLink.CodeFixProvider.csproj
index 3771de91b840..5dd13cb77721 100644
--- a/src/tools/illink/src/ILLink.CodeFix/ILLink.CodeFixProvider.csproj
+++ b/src/tools/illink/src/ILLink.CodeFix/ILLink.CodeFixProvider.csproj
@@ -9,8 +9,8 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Condition="'$(DotNetBuildFromSource)' != 'true'" Include="Microsoft.CodeAnalysis.CSharp.Workspaces" Version="$(MicrosoftCodeAnalysisVersion_LatestVS)" PrivateAssets="all" />
-    <PackageReference Condition="'$(DotNetBuildFromSource)' == 'true'" Include="Microsoft.CodeAnalysis.CSharp.Workspaces" Version="$(MicrosoftCodeAnalysisVersion)" PrivateAssets="all" />
+    <PackageReference Condition="'$(DotNetBuildSourceOnly)' != 'true'" Include="Microsoft.CodeAnalysis.CSharp.Workspaces" Version="$(MicrosoftCodeAnalysisVersion_LatestVS)" PrivateAssets="all" />
+    <PackageReference Condition="'$(DotNetBuildSourceOnly)' == 'true'" Include="Microsoft.CodeAnalysis.CSharp.Workspaces" Version="$(MicrosoftCodeAnalysisVersion)" PrivateAssets="all" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/FeatureChecksValue.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/FeatureChecksValue.cs
index 268833431274..028628f2dd59 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/FeatureChecksValue.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/FeatureChecksValue.cs
@@ -13,11 +13,15 @@ namespace ILLink.RoslynAnalyzer.DataFlow
 	// For now, this is only designed to track the built-in "features"/"capabilities"
 	// like RuntimeFeatures.IsDynamicCodeSupported, where a true return value
 	// indicates that a feature/capability is available.
-	public record struct FeatureChecksValue : INegate<FeatureChecksValue>
+	public record struct FeatureChecksValue : INegate<FeatureChecksValue>, IDeepCopyValue<FeatureChecksValue>
 	{
 		public ValueSet<string> EnabledFeatures;
 		public ValueSet<string> DisabledFeatures;
 
+		public static readonly FeatureChecksValue All = new FeatureChecksValue (ValueSet<string>.Unknown, ValueSet<string>.Empty);
+
+		public static readonly FeatureChecksValue None = new FeatureChecksValue (ValueSet<string>.Empty, ValueSet<string>.Empty);
+
 		public FeatureChecksValue (string enabledFeature)
 		{
 			EnabledFeatures = new ValueSet<string> (enabledFeature);
@@ -48,5 +52,10 @@ public FeatureChecksValue Negate ()
 		{
 			return new FeatureChecksValue (DisabledFeatures.DeepCopy (), EnabledFeatures.DeepCopy ());
 		}
+
+		public FeatureChecksValue DeepCopy ()
+		{
+			return new FeatureChecksValue (EnabledFeatures.DeepCopy (), DisabledFeatures.DeepCopy ());
+		}
 	}
 }
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/FeatureCheckVisitor.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/FeatureChecksVisitor.cs
similarity index 60%
rename from src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/FeatureCheckVisitor.cs
rename to src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/FeatureChecksVisitor.cs
index 707294718fda..6a927f74fd15 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/FeatureCheckVisitor.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/FeatureChecksVisitor.cs
@@ -24,7 +24,7 @@ namespace ILLink.RoslynAnalyzer.DataFlow
 	// (a set features that are checked to be enabled or disabled).
 	// The visitor takes a LocalDataFlowState as an argument, allowing for checks that
 	// depend on the current dataflow state.
-	public class FeatureChecksVisitor : OperationVisitor<StateValue, FeatureChecksValue?>
+	public class FeatureChecksVisitor : OperationVisitor<StateValue, FeatureChecksValue>
 	{
 		DataFlowAnalyzerContext _dataFlowAnalyzerContext;
 
@@ -33,32 +33,48 @@ public FeatureChecksVisitor (DataFlowAnalyzerContext dataFlowAnalyzerContext)
 			_dataFlowAnalyzerContext = dataFlowAnalyzerContext;
 		}
 
-		public override FeatureChecksValue? VisitArgument (IArgumentOperation operation, StateValue state)
+		public override FeatureChecksValue DefaultVisit (IOperation operation, StateValue state)
+		{
+			// Visiting a non-understood pattern should return the empty set of features, which will
+			// prevent this check from acting as a guard for any feature.
+			return FeatureChecksValue.None;
+		}
+
+		public override FeatureChecksValue VisitArgument (IArgumentOperation operation, StateValue state)
 		{
 			return Visit (operation.Value, state);
 		}
 
-		public override FeatureChecksValue? VisitPropertyReference (IPropertyReferenceOperation operation, StateValue state)
+		public override FeatureChecksValue VisitPropertyReference (IPropertyReferenceOperation operation, StateValue state)
 		{
+			// A single property may serve as a feature check for multiple features.
+			FeatureChecksValue featureChecks = FeatureChecksValue.None;
 			foreach (var analyzer in _dataFlowAnalyzerContext.EnabledRequiresAnalyzers) {
-				if (analyzer.IsRequiresCheck (_dataFlowAnalyzerContext.Compilation, operation.Property)) {
-					return new FeatureChecksValue (analyzer.FeatureName);
+				if (analyzer.IsFeatureGuard (operation.Property, _dataFlowAnalyzerContext.Compilation)) {
+					var featureCheck = new FeatureChecksValue (analyzer.RequiresAttributeFullyQualifiedName);
+					featureChecks = featureChecks.And (featureCheck);
 				}
 			}
 
-			return null;
+			return featureChecks;
 		}
 
-		public override FeatureChecksValue? VisitUnaryOperator (IUnaryOperation operation, StateValue state)
+		public override FeatureChecksValue VisitUnaryOperator (IUnaryOperation operation, StateValue state)
 		{
 			if (operation.OperatorKind is not UnaryOperatorKind.Not)
-				return null;
+				return FeatureChecksValue.None;
 
-			FeatureChecksValue? context = Visit (operation.Operand, state);
-			if (context == null)
-				return null;
+			FeatureChecksValue context = Visit (operation.Operand, state);
+			return context.Negate ();
+		}
 
-			return context.Value.Negate ();
+		public override FeatureChecksValue VisitLiteral (ILiteralOperation operation, StateValue state)
+		{
+			// 'false' can guard any feature
+			if (GetConstantBool (operation.ConstantValue) is false)
+				return FeatureChecksValue.All;
+
+			return FeatureChecksValue.None;
 		}
 
 		public bool? GetLiteralBool (IOperation operation)
@@ -77,7 +93,7 @@ public FeatureChecksVisitor (DataFlowAnalyzerContext dataFlowAnalyzerContext)
 			return value;
 		}
 
-		public override FeatureChecksValue? VisitBinaryOperator (IBinaryOperation operation, StateValue state)
+		public override FeatureChecksValue VisitBinaryOperator (IBinaryOperation operation, StateValue state)
 		{
 			bool expectEqual;
 			switch (operation.OperatorKind) {
@@ -88,36 +104,32 @@ public FeatureChecksVisitor (DataFlowAnalyzerContext dataFlowAnalyzerContext)
 					expectEqual = false;
 					break;
 				default:
-					return null;
+					return FeatureChecksValue.None;
 			}
 
 			if (GetLiteralBool (operation.LeftOperand) is bool leftBool) {
-				if (Visit (operation.RightOperand, state) is not FeatureChecksValue rightValue)
-					return null;
+				FeatureChecksValue rightValue = Visit (operation.RightOperand, state);
 				return leftBool == expectEqual
 					? rightValue
 					: rightValue.Negate ();
 			}
 
 			if (GetLiteralBool (operation.RightOperand) is bool rightBool) {
-				if (Visit (operation.LeftOperand, state) is not FeatureChecksValue leftValue)
-					return null;
+				FeatureChecksValue leftValue = Visit (operation.LeftOperand, state);
 				return rightBool == expectEqual
 					? leftValue
 					: leftValue.Negate ();
 			}
 
-			return null;
+			return FeatureChecksValue.None;
 		}
 
-		public override FeatureChecksValue? VisitIsPattern (IIsPatternOperation operation, StateValue state)
+		public override FeatureChecksValue VisitIsPattern (IIsPatternOperation operation, StateValue state)
 		{
 			if (GetExpectedValueFromPattern (operation.Pattern) is not bool patternValue)
-				return null;
-
-			if (Visit (operation.Value, state) is not FeatureChecksValue value)
-				return null;
+				return FeatureChecksValue.None;
 
+			FeatureChecksValue value = Visit (operation.Value, state);
 			return patternValue
 				? value
 				: value.Negate ();
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/LocalDataFlowVisitor.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/LocalDataFlowVisitor.cs
index bbaeff53f0c5..84065ecc25fe 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/LocalDataFlowVisitor.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlow/LocalDataFlowVisitor.cs
@@ -88,12 +88,12 @@ public LocalDataFlowVisitor (
 				return null;
 
 			var branchValue = Visit (branchValueOperation, state);
-
+			TConditionValue conditionValue = GetConditionValue (branchValueOperation, state);
 			if (block.Block.ConditionKind != ControlFlowConditionKind.None) {
 				// BranchValue may represent a value used in a conditional branch to the ConditionalSuccessor.
 				// If so, give the analysis an opportunity to model the checked condition, and return the model
 				// of the condition back to the generic analysis. It will be applied to the state of each outgoing branch.
-				return GetConditionValue (branchValueOperation, state);
+				return conditionValue;
 			}
 
 			// If not, the BranchValue represents a return or throw value associated with the FallThroughSuccessor of this block.
@@ -118,10 +118,13 @@ public LocalDataFlowVisitor (
 			// We don't want the return operation because this might have multiple possible return values in general.
 			var current = state.Current;
 			HandleReturnValue (branchValue, branchValueOperation, in current.Context);
+			// Must be called for every return value even if it did not return an understood condition,
+			// because the non-understood conditions will produce warnings for FeatureGuard properties.
+			HandleReturnConditionValue (conditionValue, branchValueOperation);
 			return null;
 		}
 
-		public abstract TConditionValue? GetConditionValue (
+		public abstract TConditionValue GetConditionValue (
 			IOperation branchValueOperation,
 			LocalDataFlowState<TValue, TContext, TValueLattice, TContextLattice> state);
 
@@ -146,6 +149,10 @@ public abstract void HandleReturnValue (
 			IOperation operation,
 			in TContext context);
 
+		public abstract void HandleReturnConditionValue (
+			TConditionValue returnConditionValue,
+			IOperation branchValueOperation);
+
 		// This is called for any method call, which includes:
 		// - Normal invocation operation
 		// - Accessing property value - which is treated as a call to the getter
@@ -776,9 +783,7 @@ TValue HandleMethodCallHelper (
 
 				// Get the condition value that is being asserted. If the attribute is DoesNotReturnIf(true),
 				// the condition value needs to be negated so that we can assert the false condition.
-				if (GetConditionValue (argumentOperation, state) is not TConditionValue conditionValue)
-					continue;
-
+				TConditionValue conditionValue = GetConditionValue (argumentOperation, state);
 				var current = state.Current;
 				ApplyCondition (
 					doesNotReturnIfConditionValue == false
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/DataflowAnalyzerContext.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlowAnalyzerContext.cs
similarity index 100%
rename from src/tools/illink/src/ILLink.RoslynAnalyzer/DataflowAnalyzerContext.cs
rename to src/tools/illink/src/ILLink.RoslynAnalyzer/DataFlowAnalyzerContext.cs
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/DynamicallyAccessedMembersAnalyzer.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/DynamicallyAccessedMembersAnalyzer.cs
index 173bb667a4d1..c7f7eb2eb6aa 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/DynamicallyAccessedMembersAnalyzer.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/DynamicallyAccessedMembersAnalyzer.cs
@@ -21,6 +21,7 @@ public class DynamicallyAccessedMembersAnalyzer : DiagnosticAnalyzer
 		internal const string DynamicallyAccessedMembersAttribute = nameof (DynamicallyAccessedMembersAttribute);
 		public const string attributeArgument = "attributeArgument";
 		public const string FullyQualifiedDynamicallyAccessedMembersAttribute = "System.Diagnostics.CodeAnalysis." + DynamicallyAccessedMembersAttribute;
+		public const string FullyQualifiedFeatureGuardAttribute  = "System.Diagnostics.CodeAnalysis.FeatureGuardAttribute";
 		public static Lazy<ImmutableArray<RequiresAnalyzerBase>> RequiresAnalyzers { get; } = new Lazy<ImmutableArray<RequiresAnalyzerBase>> (GetRequiresAnalyzers);
 		static ImmutableArray<RequiresAnalyzerBase> GetRequiresAnalyzers () =>
 			ImmutableArray.Create<RequiresAnalyzerBase> (
@@ -51,6 +52,8 @@ public static ImmutableArray<DiagnosticDescriptor> GetSupportedDiagnostics ()
 			diagDescriptorsArrayBuilder.Add (DiagnosticDescriptors.GetDiagnosticDescriptor (DiagnosticId.UnrecognizedTypeNameInTypeGetType));
 			diagDescriptorsArrayBuilder.Add (DiagnosticDescriptors.GetDiagnosticDescriptor (DiagnosticId.UnrecognizedParameterInMethodCreateInstance));
 			diagDescriptorsArrayBuilder.Add (DiagnosticDescriptors.GetDiagnosticDescriptor (DiagnosticId.ParametersOfAssemblyCreateInstanceCannotBeAnalyzed));
+			diagDescriptorsArrayBuilder.Add (DiagnosticDescriptors.GetDiagnosticDescriptor (DiagnosticId.ReturnValueDoesNotMatchFeatureGuards));
+			diagDescriptorsArrayBuilder.Add (DiagnosticDescriptors.GetDiagnosticDescriptor (DiagnosticId.InvalidFeatureGuard));
 
 			foreach (var requiresAnalyzer in RequiresAnalyzers.Value) {
 				foreach (var diagnosticDescriptor in requiresAnalyzer.SupportedDiagnostics)
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/ILLink.RoslynAnalyzer.csproj b/src/tools/illink/src/ILLink.RoslynAnalyzer/ILLink.RoslynAnalyzer.csproj
index 80fd6a902203..bc410523d5d7 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/ILLink.RoslynAnalyzer.csproj
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/ILLink.RoslynAnalyzer.csproj
@@ -1,4 +1,4 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
     <TargetFramework>netstandard2.0</TargetFramework>
@@ -7,8 +7,14 @@
     <EnableXlfLocalization>false</EnableXlfLocalization>
     <EnableDefaultEmbeddedResourceItems>false</EnableDefaultEmbeddedResourceItems>
     <AnalysisLevel>Latest</AnalysisLevel>
-    <NoWarn Condition="'$(DotNetBuildFromSource)' == 'true'">$(NoWarn);CS8524</NoWarn>
+    <NoWarn Condition="'$(DotNetBuildSourceOnly)' == 'true'">$(NoWarn);CS8524</NoWarn>
     <AnalyzerLanguage>cs</AnalyzerLanguage>
+    <!-- The analyzer needs to process deeply nested expressions in corelib.
+         This can blow up the stack if using unoptimized code (due to large
+         stack frames with many temporary locals for debugging support), so we
+         optimize the analyzer even in Debug builds. Note: we still use the
+         Debug configuration to get Debug asserts. -->
+    <Optimize>true</Optimize>
   </PropertyGroup>
 
   <ItemGroup>
@@ -16,9 +22,9 @@
   </ItemGroup>
 
   <ItemGroup>
-    <PackageReference Condition="'$(DotNetBuildFromSource)' != 'true'" Include="Microsoft.CodeAnalysis.CSharp" Version="$(MicrosoftCodeAnalysisVersion_LatestVS)" PrivateAssets="all" />
-    <PackageReference Condition="'$(DotNetBuildFromSource)' == 'true'" Include="Microsoft.CodeAnalysis.CSharp" Version="$(MicrosoftCodeAnalysisVersion)" PrivateAssets="all" />
-    <PackageReference Condition="'$(DotNetBuildFromSource)' != 'true'" Include="StaticCs" Version="$(StaticCsVersion)">
+    <PackageReference Condition="'$(DotNetBuildSourceOnly)' != 'true'" Include="Microsoft.CodeAnalysis.CSharp" Version="$(MicrosoftCodeAnalysisVersion_LatestVS)" PrivateAssets="all" />
+    <PackageReference Condition="'$(DotNetBuildSourceOnly)' == 'true'" Include="Microsoft.CodeAnalysis.CSharp" Version="$(MicrosoftCodeAnalysisVersion)" PrivateAssets="all" />
+    <PackageReference Condition="'$(DotNetBuildSourceOnly)' != 'true'" Include="StaticCs" Version="$(StaticCsVersion)">
       <PrivateAssets>all</PrivateAssets>
       <ExcludeAssets>contentfiles</ExcludeAssets> <!-- We include our own copy of the ClosedAttribute to work in source build -->
     </PackageReference>
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/ISymbolExtensions.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/ISymbolExtensions.cs
index 7e830f7c6ecd..3f1903ba12ce 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/ISymbolExtensions.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/ISymbolExtensions.cs
@@ -1,9 +1,14 @@
 // Copyright (c) .NET Foundation and contributors. All rights reserved.
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 
+using System.Collections.Immutable;
+using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
+using System.Linq;
 using System.Text;
 using Microsoft.CodeAnalysis;
+using ILLink.RoslynAnalyzer.DataFlow;
+using ILLink.Shared.DataFlow;
 
 namespace ILLink.RoslynAnalyzer
 {
@@ -34,6 +39,14 @@ internal static bool TryGetAttribute (this ISymbol member, string attributeName,
 			return false;
 		}
 
+		internal static IEnumerable<AttributeData> GetAttributes (this ISymbol member, string attributeName)
+		{
+			foreach (var attr in member.GetAttributes ()) {
+				if (attr.AttributeClass is { } attrClass && attrClass.HasName (attributeName))
+					yield return attr;
+			}
+		}
+
 		internal static DynamicallyAccessedMemberTypes GetDynamicallyAccessedMemberTypes (this ISymbol symbol)
 		{
 			if (!TryGetAttribute (symbol, DynamicallyAccessedMembersAnalyzer.DynamicallyAccessedMembersAttribute, out var dynamicallyAccessedMembers))
@@ -58,6 +71,16 @@ internal static DynamicallyAccessedMemberTypes GetDynamicallyAccessedMemberTypes
 			return (DynamicallyAccessedMemberTypes) dynamicallyAccessedMembers.ConstructorArguments[0].Value!;
 		}
 
+		internal static ValueSet<string> GetFeatureGuardAnnotations (this IPropertySymbol propertySymbol)
+		{
+			HashSet<string> featureSet = new ();
+			foreach (var featureGuardAttribute in propertySymbol.GetAttributes (DynamicallyAccessedMembersAnalyzer.FullyQualifiedFeatureGuardAttribute)) {
+				if (featureGuardAttribute.ConstructorArguments is [TypedConstant { Value: INamedTypeSymbol featureType }])
+					featureSet.Add (featureType.GetDisplayName ());
+			}
+			return featureSet.Count == 0 ? ValueSet<string>.Empty : new ValueSet<string> (featureSet);
+		}
+
 		internal static bool TryGetReturnAttribute (this IMethodSymbol member, string attributeName, [NotNullWhen (returnValue: true)] out AttributeData? attribute)
 		{
 			attribute = null;
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresAnalyzerBase.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresAnalyzerBase.cs
index d951404845cd..c31a69a24d24 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresAnalyzerBase.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresAnalyzerBase.cs
@@ -5,8 +5,9 @@
 using System.Collections.Immutable;
 using System.Diagnostics.CodeAnalysis;
 using System.Linq;
-using ILLink.Shared;
 using ILLink.RoslynAnalyzer.DataFlow;
+using ILLink.Shared;
+using ILLink.Shared.DataFlow;
 using Microsoft.CodeAnalysis;
 using Microsoft.CodeAnalysis.CSharp;
 using Microsoft.CodeAnalysis.CSharp.Syntax;
@@ -19,9 +20,7 @@ public abstract class RequiresAnalyzerBase : DiagnosticAnalyzer
 	{
 		private protected abstract string RequiresAttributeName { get; }
 
-		internal abstract string FeatureName { get; }
-
-		private protected abstract string RequiresAttributeFullyQualifiedName { get; }
+		internal abstract string RequiresAttributeFullyQualifiedName { get; }
 
 		private protected abstract DiagnosticTargets AnalyzerDiagnosticTargets { get; }
 
@@ -301,7 +300,23 @@ protected virtual bool CreateSpecialIncompatibleMembersDiagnostic (
 		// - false return value indicating that a feature is supported
 		// - feature settings supplied by the project
 		// - custom feature checks defined in library code
-		internal virtual bool IsRequiresCheck (Compilation compilation, IPropertySymbol propertySymbol) => false;
+		private protected virtual bool IsRequiresCheck (IPropertySymbol propertySymbol, Compilation compilation) => false;
+
+		internal static bool IsAnnotatedFeatureGuard (IPropertySymbol propertySymbol, string featureName)
+		{
+			// Only respect FeatureGuardAttribute on static boolean properties.
+			if (!propertySymbol.IsStatic || propertySymbol.Type.SpecialType != SpecialType.System_Boolean || propertySymbol.SetMethod != null)
+				return false;
+
+			ValueSet<string> featureCheckAnnotations = propertySymbol.GetFeatureGuardAnnotations ();
+			return featureCheckAnnotations.Contains (featureName);
+		}
+
+		internal bool IsFeatureGuard (IPropertySymbol propertySymbol, Compilation compilation)
+		{
+			return IsAnnotatedFeatureGuard (propertySymbol, RequiresAttributeFullyQualifiedName)
+				|| IsRequiresCheck (propertySymbol, compilation);
+		}
 
 		internal bool CheckAndCreateRequiresDiagnostic (
 			IOperation operation,
@@ -312,7 +327,7 @@ internal bool CheckAndCreateRequiresDiagnostic (
 			[NotNullWhen (true)] out Diagnostic? diagnostic)
 		{
 			// Warnings are not emitted if the featureContext says the feature is available.
-			if (featureContext.IsEnabled (FeatureName)) {
+			if (featureContext.IsEnabled (RequiresAttributeFullyQualifiedName)) {
 				diagnostic = null;
 				return false;
 			}
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresAssemblyFilesAnalyzer.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresAssemblyFilesAnalyzer.cs
index e8807896d937..8949b249b35e 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresAssemblyFilesAnalyzer.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresAssemblyFilesAnalyzer.cs
@@ -36,9 +36,7 @@ public sealed class RequiresAssemblyFilesAnalyzer : RequiresAnalyzerBase
 
 		private protected override string RequiresAttributeName => RequiresAssemblyFilesAttribute;
 
-		internal override string FeatureName => "AssemblyFiles";
-
-		private protected override string RequiresAttributeFullyQualifiedName => RequiresAssemblyFilesAttributeFullyQualifiedName;
+		internal override string RequiresAttributeFullyQualifiedName => RequiresAssemblyFilesAttributeFullyQualifiedName;
 
 		private protected override DiagnosticTargets AnalyzerDiagnosticTargets => DiagnosticTargets.MethodOrConstructor | DiagnosticTargets.Property | DiagnosticTargets.Event;
 
@@ -61,7 +59,7 @@ internal override bool IsAnalyzerEnabled (AnalyzerOptions options)
 			return true;
 		}
 
-		internal override bool IsRequiresCheck (Compilation compilation, IPropertySymbol propertySymbol)
+		private protected override bool IsRequiresCheck (IPropertySymbol propertySymbol, Compilation compilation)
 		{
 			// "IsAssemblyFilesSupported" is treated as a requires check for testing purposes only, and
 			// is not officially-supported product behavior.
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresDynamicCodeAnalyzer.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresDynamicCodeAnalyzer.cs
index 5232ca9a9854..34bb7808d203 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresDynamicCodeAnalyzer.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresDynamicCodeAnalyzer.cs
@@ -25,9 +25,7 @@ public sealed class RequiresDynamicCodeAnalyzer : RequiresAnalyzerBase
 
 		private protected override string RequiresAttributeName => RequiresDynamicCodeAttribute;
 
-		internal override string FeatureName => "DynamicCode";
-
-		private protected override string RequiresAttributeFullyQualifiedName => FullyQualifiedRequiresDynamicCodeAttribute;
+		internal override string RequiresAttributeFullyQualifiedName => FullyQualifiedRequiresDynamicCodeAttribute;
 
 		private protected override DiagnosticTargets AnalyzerDiagnosticTargets => DiagnosticTargets.MethodOrConstructor | DiagnosticTargets.Class;
 
@@ -40,7 +38,7 @@ public sealed class RequiresDynamicCodeAnalyzer : RequiresAnalyzerBase
 		internal override bool IsAnalyzerEnabled (AnalyzerOptions options) =>
 			options.IsMSBuildPropertyValueTrue (MSBuildPropertyOptionNames.EnableAotAnalyzer);
 
-		internal override bool IsRequiresCheck (Compilation compilation, IPropertySymbol propertySymbol) {
+		private protected override bool IsRequiresCheck (IPropertySymbol propertySymbol, Compilation compilation) {
 			var runtimeFeaturesType = compilation.GetTypeByMetadataName ("System.Runtime.CompilerServices.RuntimeFeature");
 			if (runtimeFeaturesType == null)
 				return false;
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresUnreferencedCodeAnalyzer.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresUnreferencedCodeAnalyzer.cs
index 3623150b7520..69c38629c43e 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresUnreferencedCodeAnalyzer.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/RequiresUnreferencedCodeAnalyzer.cs
@@ -49,11 +49,7 @@ private Action<SymbolAnalysisContext> typeDerivesFromRucBase {
 
 		private protected override string RequiresAttributeName => RequiresUnreferencedCodeAttribute;
 
-		public const string UnreferencedCode = nameof (UnreferencedCode);
-
-		internal override string FeatureName => UnreferencedCode;
-
-		private protected override string RequiresAttributeFullyQualifiedName => FullyQualifiedRequiresUnreferencedCodeAttribute;
+		internal override string RequiresAttributeFullyQualifiedName => FullyQualifiedRequiresUnreferencedCodeAttribute;
 
 		private protected override DiagnosticTargets AnalyzerDiagnosticTargets => DiagnosticTargets.MethodOrConstructor | DiagnosticTargets.Class;
 
@@ -66,7 +62,7 @@ private Action<SymbolAnalysisContext> typeDerivesFromRucBase {
 		internal override bool IsAnalyzerEnabled (AnalyzerOptions options) =>
 			options.IsMSBuildPropertyValueTrue (MSBuildPropertyOptionNames.EnableTrimAnalyzer);
 
-		internal override bool IsRequiresCheck (Compilation compilation, IPropertySymbol propertySymbol)
+		private protected override bool IsRequiresCheck (IPropertySymbol propertySymbol, Compilation compilation)
 		{
 			// "IsUnreferencedCodeSupported" is treated as a requires check for testing purposes only, and
 			// is not officially-supported product behavior.
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/FeatureCheckReturnValuePattern.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/FeatureCheckReturnValuePattern.cs
new file mode 100644
index 000000000000..fe81dbb79ec3
--- /dev/null
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/FeatureCheckReturnValuePattern.cs
@@ -0,0 +1,70 @@
+// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System.Collections.Generic;
+using ILLink.Shared;
+using ILLink.Shared.DataFlow;
+using ILLink.Shared.TrimAnalysis;
+using ILLink.RoslynAnalyzer.DataFlow;
+using Microsoft.CodeAnalysis;
+
+namespace ILLink.RoslynAnalyzer.TrimAnalysis
+{
+	public readonly record struct FeatureCheckReturnValuePattern
+	{
+		public FeatureChecksValue ReturnValue { get; init; }
+		public ValueSet<string> FeatureCheckAnnotations { get; init; }
+		public IOperation Operation { get; init; }
+		public IPropertySymbol OwningSymbol { get; init; }
+
+		public FeatureCheckReturnValuePattern (
+			FeatureChecksValue returnValue,
+			ValueSet<string> featureCheckAnnotations,
+			IOperation operation,
+			IPropertySymbol owningSymbol)
+		{
+			ReturnValue = returnValue.DeepCopy ();
+			FeatureCheckAnnotations = featureCheckAnnotations.DeepCopy ();
+			Operation = operation;
+			OwningSymbol = owningSymbol;
+		}
+
+		public IEnumerable<Diagnostic> CollectDiagnostics (DataFlowAnalyzerContext context)
+		{
+			var diagnosticContext = new DiagnosticContext (Operation.Syntax.GetLocation ());
+			// For now, feature check validation is enabled only when trim analysis is enabled.
+			if (!context.EnableTrimAnalyzer)
+				return diagnosticContext.Diagnostics;
+
+			if (!OwningSymbol.IsStatic || OwningSymbol.Type.SpecialType != SpecialType.System_Boolean || OwningSymbol.SetMethod != null) {
+				// Warn about invalid feature checks (non-static or non-bool properties or properties with setter)
+				diagnosticContext.AddDiagnostic (
+					DiagnosticId.InvalidFeatureGuard);
+				return diagnosticContext.Diagnostics;
+			}
+
+			if (ReturnValue == FeatureChecksValue.All)
+				return diagnosticContext.Diagnostics;
+
+			ValueSet<string> returnValueFeatures = ReturnValue.EnabledFeatures;
+			// For any analyzer-supported feature that this property is declared to guard,
+			// the abstract return value must include that feature
+			// (indicating it is known to be enabled when the return value is true).
+			foreach (string feature in FeatureCheckAnnotations.GetKnownValues ()) {
+				foreach (var analyzer in context.EnabledRequiresAnalyzers) {
+					if (feature != analyzer.RequiresAttributeFullyQualifiedName)
+						continue;
+
+					if (!returnValueFeatures.Contains (feature)) {
+						diagnosticContext.AddDiagnostic (
+							DiagnosticId.ReturnValueDoesNotMatchFeatureGuards,
+							OwningSymbol.GetDisplayName (),
+							feature);
+					}
+				}
+			}
+
+			return diagnosticContext.Diagnostics;
+		}
+	}
+}
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisAssignmentPattern.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisAssignmentPattern.cs
index 5dfc31db3486..2ffcbc43ae88 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisAssignmentPattern.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisAssignmentPattern.cs
@@ -58,7 +58,7 @@ public IEnumerable<Diagnostic> CollectDiagnostics (DataFlowAnalyzerContext conte
 			var diagnosticContext = new DiagnosticContext (Operation.Syntax.GetLocation ());
 			if (context.EnableTrimAnalyzer &&
 				!OwningSymbol.IsInRequiresUnreferencedCodeAttributeScope (out _) &&
-				!FeatureContext.IsEnabled (RequiresUnreferencedCodeAnalyzer.UnreferencedCode)) {
+				!FeatureContext.IsEnabled (RequiresUnreferencedCodeAnalyzer.FullyQualifiedRequiresUnreferencedCodeAttribute)) {
 				foreach (var sourceValue in Source.AsEnumerable ()) {
 					foreach (var targetValue in Target.AsEnumerable ()) {
 						// The target should always be an annotated value, but the visitor design currently prevents
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisGenericInstantiationPattern.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisGenericInstantiationPattern.cs
index 8d484e66036b..26f275085fa8 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisGenericInstantiationPattern.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisGenericInstantiationPattern.cs
@@ -48,7 +48,7 @@ public IEnumerable<Diagnostic> CollectDiagnostics (DataFlowAnalyzerContext conte
 			DiagnosticContext diagnosticContext = new (Operation.Syntax.GetLocation ());
 			if (context.EnableTrimAnalyzer &&
 				!OwningSymbol.IsInRequiresUnreferencedCodeAttributeScope (out _) &&
-				!FeatureContext.IsEnabled (RequiresUnreferencedCodeAnalyzer.UnreferencedCode)) {
+				!FeatureContext.IsEnabled (RequiresUnreferencedCodeAnalyzer.FullyQualifiedRequiresUnreferencedCodeAttribute)) {
 				switch (GenericInstantiation) {
 				case INamedTypeSymbol type:
 					GenericArgumentDataFlow.ProcessGenericArgumentDataFlow (diagnosticContext, type);
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisMethodCallPattern.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisMethodCallPattern.cs
index 8341afa2ea5f..3dfd7fa28552 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisMethodCallPattern.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisMethodCallPattern.cs
@@ -77,7 +77,7 @@ public IEnumerable<Diagnostic> CollectDiagnostics (DataFlowAnalyzerContext conte
 			DiagnosticContext diagnosticContext = new (Operation.Syntax.GetLocation ());
 			if (context.EnableTrimAnalyzer &&
 				!OwningSymbol.IsInRequiresUnreferencedCodeAttributeScope(out _) &&
-				!FeatureContext.IsEnabled (RequiresUnreferencedCodeAnalyzer.UnreferencedCode))
+				!FeatureContext.IsEnabled (RequiresUnreferencedCodeAnalyzer.FullyQualifiedRequiresUnreferencedCodeAttribute))
 			{
 				TrimAnalysisVisitor.HandleCall(Operation, OwningSymbol, CalledMethod, Instance, Arguments, diagnosticContext, default, out var _);
 			}
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisPatternStore.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisPatternStore.cs
index 78de8fdf4235..dd66d802934b 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisPatternStore.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisPatternStore.cs
@@ -17,16 +17,20 @@ public readonly struct TrimAnalysisPatternStore
 		readonly Dictionary<IOperation, TrimAnalysisGenericInstantiationPattern> GenericInstantiationPatterns;
 		readonly Dictionary<IOperation, TrimAnalysisMethodCallPattern> MethodCallPatterns;
 		readonly Dictionary<IOperation, TrimAnalysisReflectionAccessPattern> ReflectionAccessPatterns;
+		readonly Dictionary<IOperation, FeatureCheckReturnValuePattern> FeatureCheckReturnValuePatterns;
 		readonly ValueSetLattice<SingleValue> Lattice;
 		readonly FeatureContextLattice FeatureContextLattice;
 
-		public TrimAnalysisPatternStore (ValueSetLattice<SingleValue> lattice, FeatureContextLattice featureContextLattice)
+		public TrimAnalysisPatternStore (
+			ValueSetLattice<SingleValue> lattice,
+			FeatureContextLattice featureContextLattice)
 		{
 			AssignmentPatterns = new Dictionary<(IOperation, bool), TrimAnalysisAssignmentPattern> ();
 			FieldAccessPatterns = new Dictionary<IOperation, TrimAnalysisFieldAccessPattern> ();
 			GenericInstantiationPatterns = new Dictionary<IOperation, TrimAnalysisGenericInstantiationPattern> ();
 			MethodCallPatterns = new Dictionary<IOperation, TrimAnalysisMethodCallPattern> ();
 			ReflectionAccessPatterns = new Dictionary<IOperation, TrimAnalysisReflectionAccessPattern> ();
+			FeatureCheckReturnValuePatterns = new Dictionary<IOperation, FeatureCheckReturnValuePattern> ();
 			Lattice = lattice;
 			FeatureContextLattice = featureContextLattice;
 		}
@@ -89,6 +93,16 @@ public void Add (TrimAnalysisReflectionAccessPattern pattern)
 			ReflectionAccessPatterns[pattern.Operation] = pattern.Merge (Lattice, FeatureContextLattice, existingPattern);
 		}
 
+		public void Add (FeatureCheckReturnValuePattern pattern)
+		{
+			if (!FeatureCheckReturnValuePatterns.TryGetValue (pattern.Operation, out var existingPattern)) {
+				FeatureCheckReturnValuePatterns.Add (pattern.Operation, pattern);
+				return;
+			}
+
+			Debug.Assert (existingPattern == pattern, "Return values should be identical");
+		}
+
 		public IEnumerable<Diagnostic> CollectDiagnostics (DataFlowAnalyzerContext context)
 		{
 			foreach (var assignmentPattern in AssignmentPatterns.Values) {
@@ -115,6 +129,11 @@ public IEnumerable<Diagnostic> CollectDiagnostics (DataFlowAnalyzerContext conte
 				foreach (var diagnostic in reflectionAccessPattern.CollectDiagnostics (context))
 					yield return diagnostic;
 			}
+
+			foreach (var returnValuePattern in FeatureCheckReturnValuePatterns.Values) {
+				foreach (var diagnostic in returnValuePattern.CollectDiagnostics (context))
+					yield return diagnostic;
+			}
 		}
 	}
 }
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisReflectionAccessPattern.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisReflectionAccessPattern.cs
index 0e4c45a9f011..85897420596f 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisReflectionAccessPattern.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisReflectionAccessPattern.cs
@@ -50,7 +50,7 @@ public IEnumerable<Diagnostic> CollectDiagnostics (DataFlowAnalyzerContext conte
 			DiagnosticContext diagnosticContext = new (Operation.Syntax.GetLocation ());
 			if (context.EnableTrimAnalyzer &&
 				!OwningSymbol.IsInRequiresUnreferencedCodeAttributeScope (out _) &&
-				!FeatureContext.IsEnabled (RequiresUnreferencedCodeAnalyzer.UnreferencedCode)) {
+				!FeatureContext.IsEnabled (RequiresUnreferencedCodeAnalyzer.FullyQualifiedRequiresUnreferencedCodeAttribute)) {
 				foreach (var diagnostic in ReflectionAccessAnalyzer.GetDiagnosticsForReflectionAccessToDAMOnMethod (diagnosticContext, ReferencedMethod))
 					diagnosticContext.AddDiagnostic (diagnostic);
 			}
diff --git a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisVisitor.cs b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisVisitor.cs
index 6f118ac8c479..9db61498b28c 100644
--- a/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisVisitor.cs
+++ b/src/tools/illink/src/ILLink.RoslynAnalyzer/TrimAnalysis/TrimAnalysisVisitor.cs
@@ -59,12 +59,12 @@ public TrimAnalysisVisitor (
 			_featureChecksVisitor = new FeatureChecksVisitor (dataFlowAnalyzerContext);
 		}
 
-		public override FeatureChecksValue? GetConditionValue (IOperation branchValueOperation, StateValue state)
+		public override FeatureChecksValue GetConditionValue (IOperation branchValueOperation, StateValue state)
 		{
 			return _featureChecksVisitor.Visit (branchValueOperation, state);
 		}
 
-		public override void ApplyCondition (FeatureChecksValue featureChecksValue,  ref LocalStateAndContext<MultiValue, FeatureContext> currentState)
+		public override void ApplyCondition (FeatureChecksValue featureChecksValue, ref LocalStateAndContext<MultiValue, FeatureContext> currentState)
 		{
 			currentState.Context = currentState.Context.Union (new FeatureContext (featureChecksValue.EnabledFeatures));
 		}
@@ -426,6 +426,29 @@ public override void HandleReturnValue (MultiValue returnValue, IOperation opera
 			}
 		}
 
+		public override void HandleReturnConditionValue (FeatureChecksValue returnConditionValue, IOperation operation)
+		{
+			// Return statements should only happen inside of method bodies.
+			Debug.Assert (OwningSymbol is IMethodSymbol);
+			if (OwningSymbol is not IMethodSymbol method)
+				return;
+
+			// FeatureGuard validation needs to happen only for property getters.
+			// Include properties with setters here because they will get validated later.
+			if (method.MethodKind != MethodKind.PropertyGet)
+				return;
+
+			IPropertySymbol propertySymbol = (IPropertySymbol) method.AssociatedSymbol!;
+			var featureCheckAnnotations = propertySymbol.GetFeatureGuardAnnotations ();
+
+			// If there are no feature checks, there is nothing to validate.
+			if (featureCheckAnnotations.IsEmpty())
+				return;
+
+			TrimAnalysisPatterns.Add (
+				new FeatureCheckReturnValuePattern (returnConditionValue, featureCheckAnnotations, operation, propertySymbol));
+		}
+
 		public override MultiValue HandleDelegateCreation (IMethodSymbol method, IOperation operation, in FeatureContext featureContext)
 		{
 			TrimAnalysisPatterns.Add (new TrimAnalysisReflectionAccessPattern (
diff --git a/src/tools/illink/src/ILLink.Shared/DiagnosticId.cs b/src/tools/illink/src/ILLink.Shared/DiagnosticId.cs
index 1c33bb084a04..5d40efbbaaf9 100644
--- a/src/tools/illink/src/ILLink.Shared/DiagnosticId.cs
+++ b/src/tools/illink/src/ILLink.Shared/DiagnosticId.cs
@@ -202,6 +202,10 @@ public enum DiagnosticId
 		GenericRecursionCycle = 3054,
 		CorrectnessOfAbstractDelegatesCannotBeGuaranteed = 3055,
 		RequiresDynamicCodeOnStaticConstructor = 3056,
+
+		// Feature guard diagnostic ids.
+		ReturnValueDoesNotMatchFeatureGuards = 4000,
+		InvalidFeatureGuard = 4001
 	}
 
 	public static class DiagnosticIdExtensions
diff --git a/src/tools/illink/src/ILLink.Shared/SharedStrings.resx b/src/tools/illink/src/ILLink.Shared/SharedStrings.resx
index 111c1c5877de..c9bb62de5263 100644
--- a/src/tools/illink/src/ILLink.Shared/SharedStrings.resx
+++ b/src/tools/illink/src/ILLink.Shared/SharedStrings.resx
@@ -1197,4 +1197,16 @@
   <data name="RedundantSuppressionTitle" xml:space="preserve">
     <value>Unused 'UnconditionalSuppressMessageAttribute' found. Consider removing the unused warning suppression.</value>
   </data>
-</root>
\ No newline at end of file
+  <data name="ReturnValueDoesNotMatchFeatureGuardsMessage" xml:space="preserve">
+    <value>Return value does not match FeatureGuardAttribute '{1}'.</value>
+  </data>
+  <data name="ReturnValueDoesNotMatchFeatureGuardsTitle" xml:space="preserve">
+    <value>Return value does not match FeatureGuard annotations of the property. The check should return false whenever any of the features referenced in the FeatureGuard annotations is disabled.</value>
+  </data>
+  <data name="InvalidFeatureGuardMessage" xml:space="preserve">
+    <value>Invalid FeatureGuardAttribute. The attribute must be placed on a static boolean property with only a 'get' accessor.</value>
+  </data>
+  <data name="InvalidFeatureGuardTitle" xml:space="preserve">
+    <value>Invalid FeatureGuardAttribute.</value>
+  </data>
+</root>
diff --git a/src/tools/illink/src/ILLink.Shared/TrimAnalysis/IntrinsicId.cs b/src/tools/illink/src/ILLink.Shared/TrimAnalysis/IntrinsicId.cs
index a9d3d4e83dd4..9a8e2e343d79 100644
--- a/src/tools/illink/src/ILLink.Shared/TrimAnalysis/IntrinsicId.cs
+++ b/src/tools/illink/src/ILLink.Shared/TrimAnalysis/IntrinsicId.cs
@@ -54,6 +54,10 @@ internal enum IntrinsicId
 		// the reflection body scanner.
 		RequiresReflectionBodyScanner_Sentinel = 1000,
 		/// <summary>
+		/// <see cref="System.Array.CreateInstance(System.Type, int)"/>
+		/// </summary>
+		Array_CreateInstance,
+		/// <summary>
 		/// <see cref="System.Type.MakeGenericType(System.Type[])"/>
 		/// </summary>
 		Type_MakeGenericType,
diff --git a/src/tools/illink/src/ILLink.Shared/TrimAnalysis/Intrinsics.cs b/src/tools/illink/src/ILLink.Shared/TrimAnalysis/Intrinsics.cs
index 72378937ec80..bdc94c11195c 100644
--- a/src/tools/illink/src/ILLink.Shared/TrimAnalysis/Intrinsics.cs
+++ b/src/tools/illink/src/ILLink.Shared/TrimAnalysis/Intrinsics.cs
@@ -293,6 +293,12 @@ public static IntrinsicId GetIntrinsicIdForMethod (MethodProxy calledMethod)
 				"Empty" when calledMethod.IsDeclaredOnType ("System.Array")
 					=> IntrinsicId.Array_Empty,
 
+				// static System.Array.CreateInstance (System.Type type, int length)
+				"CreateInstance" when calledMethod.IsDeclaredOnType ("System.Array")
+					&& calledMethod.HasMetadataParametersCount (2)
+					&& calledMethod.HasParameterOfType ((ParameterIndex) 1, "System.Int32")
+					=> IntrinsicId.Array_CreateInstance,
+
 				// static System.Activator.CreateInstance (System.Type type)
 				// static System.Activator.CreateInstance (System.Type type, bool nonPublic)
 				// static System.Activator.CreateInstance (System.Type type, params object?[]? args)
diff --git a/src/tools/illink/src/linker/BannedSymbols.txt b/src/tools/illink/src/linker/BannedSymbols.txt
index ada1bd0b4e10..f7e6db3c773b 100644
--- a/src/tools/illink/src/linker/BannedSymbols.txt
+++ b/src/tools/illink/src/linker/BannedSymbols.txt
@@ -10,3 +10,5 @@ P:Mono.Cecil.Cil.MethodBody.Instructions;Use LinkContext.GetMethodIL or BannedAp
 P:Mono.Cecil.Cil.MethodBody.ExceptionHandlers;Use LinkContext.GetMethodIL or BannedApiExtensions.ExceptionHandlers(Mono.Linker.LinkContext) instead
 P:Mono.Cecil.Cil.MethodBody.Variables;Use LinkContext.GetMethodIL or BannedApiExtensions.Variables(Mono.Linker.LinkContext) instead
 M:Mono.Linker.Steps.ILProvider/MethodIL.Create;Use ILProvider GetMethodIL instead
+M:Mono.Linker.Steps.MarkScopeStack.PushScope;Use PushLocalScope instead to avoid boxing
+M:Mono.Linker.Steps.MarkScopeStack.PopToParent;Use PopToParentScope instead to avoid boxing
diff --git a/src/tools/illink/src/linker/CompatibilitySuppressions.xml b/src/tools/illink/src/linker/CompatibilitySuppressions.xml
index 4a0a6296c4e2..7bf0a1e0ce69 100644
--- a/src/tools/illink/src/linker/CompatibilitySuppressions.xml
+++ b/src/tools/illink/src/linker/CompatibilitySuppressions.xml
@@ -253,6 +253,10 @@
     <DiagnosticId>CP0001</DiagnosticId>
     <Target>T:Mono.Linker.ILogger</Target>
   </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0001</DiagnosticId>
+    <Target>T:Mono.Linker.InterfaceImplementor</Target>
+  </Suppression>
   <Suppression>
     <DiagnosticId>CP0001</DiagnosticId>
     <Target>T:Mono.Linker.InternalErrorException</Target>
@@ -1481,10 +1485,6 @@
     <DiagnosticId>CP0002</DiagnosticId>
     <Target>M:Mono.Linker.OverrideInformation.get_IsOverrideOfInterfaceMember</Target>
   </Suppression>
-  <Suppression>
-    <DiagnosticId>CP0002</DiagnosticId>
-    <Target>M:Mono.Linker.OverrideInformation.get_IsStaticInterfaceMethodPair</Target>
-  </Suppression>
   <Suppression>
     <DiagnosticId>CP0002</DiagnosticId>
     <Target>M:Mono.Linker.Steps.BaseStep.get_MarkingHelpers</Target>
diff --git a/src/tools/illink/src/linker/Linker.Dataflow/ReflectionMethodBodyScanner.cs b/src/tools/illink/src/linker/Linker.Dataflow/ReflectionMethodBodyScanner.cs
index 2e3fbc4b0cef..72e9339bc3bc 100644
--- a/src/tools/illink/src/linker/Linker.Dataflow/ReflectionMethodBodyScanner.cs
+++ b/src/tools/illink/src/linker/Linker.Dataflow/ReflectionMethodBodyScanner.cs
@@ -263,6 +263,7 @@ public static bool HandleCall (
 				}
 				break;
 
+			case IntrinsicId.Array_CreateInstance:
 			case IntrinsicId.Enum_GetValues:
 			case IntrinsicId.Marshal_SizeOf:
 			case IntrinsicId.Marshal_OffsetOf:
diff --git a/src/tools/illink/src/linker/Linker.Steps/MarkScopeStack.cs b/src/tools/illink/src/linker/Linker.Steps/MarkScopeStack.cs
index 00f5edec5379..ab1ad448754f 100644
--- a/src/tools/illink/src/linker/Linker.Steps/MarkScopeStack.cs
+++ b/src/tools/illink/src/linker/Linker.Steps/MarkScopeStack.cs
@@ -22,7 +22,7 @@ public Scope (in MessageOrigin origin)
 
 		readonly Stack<Scope> _scopeStack;
 
-		readonly struct LocalScope : IDisposable
+		internal readonly struct LocalScope : IDisposable
 		{
 			readonly MessageOrigin _origin;
 			readonly MarkScopeStack _scopeStack;
@@ -51,7 +51,7 @@ public void Dispose ()
 			}
 		}
 
-		readonly struct ParentScope : IDisposable
+		internal readonly struct ParentScope : IDisposable
 		{
 			readonly Scope _parentScope;
 			readonly Scope _childScope;
@@ -78,6 +78,21 @@ public MarkScopeStack ()
 			_scopeStack = new Stack<Scope> ();
 		}
 
+		internal LocalScope PushLocalScope (in MessageOrigin origin)
+		{
+			return new LocalScope (origin, this);
+		}
+
+		internal LocalScope PushLocalScope (in Scope scope)
+		{
+			return new LocalScope (scope, this);
+		}
+
+		internal ParentScope PopToParentScope ()
+		{
+			return new ParentScope (this);
+		}
+
 		public IDisposable PushScope (in MessageOrigin origin)
 		{
 			return new LocalScope (origin, this);
diff --git a/src/tools/illink/src/linker/Linker.Steps/MarkStep.cs b/src/tools/illink/src/linker/Linker.Steps/MarkStep.cs
index bb5e95e0a38d..02fd1412caf8 100644
--- a/src/tools/illink/src/linker/Linker.Steps/MarkStep.cs
+++ b/src/tools/illink/src/linker/Linker.Steps/MarkStep.cs
@@ -35,8 +35,8 @@
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Linq;
+using System.Reflection.Metadata.Ecma335;
 using System.Reflection.Runtime.TypeParsing;
-using System.Runtime.CompilerServices;
 using System.Text.RegularExpressions;
 using ILLink.Shared;
 using ILLink.Shared.TrimAnalysis;
@@ -297,7 +297,7 @@ bool ProcessInternalsVisibleAttributes ()
 				Debug.Assert (attr.Provider is ModuleDefinition or AssemblyDefinition);
 				var assembly = (provider is ModuleDefinition module) ? module.Assembly : provider as AssemblyDefinition;
 
-				using var assemblyScope = ScopeStack.PushScope (new MessageOrigin (assembly));
+				using var assemblyScope = ScopeStack.PushLocalScope (new MessageOrigin (assembly));
 
 				if (!Annotations.IsMarked (attr.Attribute) && IsInternalsVisibleAttributeAssemblyMarked (attr.Attribute)) {
 					MarkCustomAttribute (attr.Attribute, new DependencyInfo (DependencyKind.AssemblyOrModuleAttribute, attr.Provider));
@@ -362,7 +362,7 @@ internal void MarkEntireType (TypeDefinition type, in DependencyInfo reason)
 
 			// Prevent cases where there's nothing on the stack (can happen when marking entire assemblies)
 			// In which case we would generate warnings with no source (hard to debug)
-			using var _ = ScopeStack.CurrentScope.Origin.Provider == null ? ScopeStack.PushScope (new MessageOrigin (type)) : null;
+			using MarkScopeStack.LocalScope? _ = ScopeStack.CurrentScope.Origin.Provider == null ? ScopeStack.PushLocalScope (new MessageOrigin (type)) : null;
 
 			if (!_entireTypesMarked.Add (type))
 				return;
@@ -405,7 +405,7 @@ internal void MarkEntireType (TypeDefinition type, in DependencyInfo reason)
 
 			if (type.HasEvents) {
 				foreach (var ev in type.Events) {
-					MarkEventVisibleToReflection (ev, new DependencyInfo (DependencyKind.MemberOfType, type), ScopeStack.CurrentScope.Origin);
+					MarkEventVisibleToReflection (ev, new DependencyInfo (DependencyKind.MemberOfType, ScopeStack.CurrentScope.Origin), ScopeStack.CurrentScope.Origin);
 				}
 			}
 		}
@@ -451,7 +451,7 @@ bool MarkFullyPreservedAssemblies ()
 
 			// Setup empty scope - there has to be some scope setup since we're doing marking below
 			// but there's no "origin" right now (command line is the origin really)
-			using var localScope = ScopeStack.PushScope (new MessageOrigin ((ICustomAttributeProvider?) null));
+			using var localScope = ScopeStack.PushLocalScope (new MessageOrigin ((ICustomAttributeProvider?) null));
 
 			// Beware: this works on loaded assemblies, not marked assemblies, so it should not be tied to marking.
 			// We could further optimize this to only iterate through assemblies if the last mark iteration loaded
@@ -488,7 +488,7 @@ bool ProcessPrimaryQueue ()
 
 		bool ProcessMarkedPending ()
 		{
-			using var emptyScope = ScopeStack.PushScope (new MessageOrigin (null as ICustomAttributeProvider));
+			using var emptyScope = ScopeStack.PushLocalScope (new MessageOrigin (null as ICustomAttributeProvider));
 
 			bool marked = false;
 			foreach (var pending in Annotations.GetMarkedPending ()) {
@@ -498,7 +498,7 @@ bool ProcessMarkedPending ()
 				if (Annotations.IsProcessed (pending.Key))
 					continue;
 
-				using var localScope = ScopeStack.PushScope (pending.Value);
+				using var localScope = ScopeStack.PushLocalScope (pending.Value);
 
 				switch (pending.Key) {
 				case TypeDefinition type:
@@ -572,7 +572,7 @@ protected virtual void EnqueueMethod (MethodDefinition method, in DependencyInfo
 		void ProcessVirtualMethods ()
 		{
 			foreach ((var method, var scope) in _virtual_methods) {
-				using (ScopeStack.PushScope (scope)) {
+				using (ScopeStack.PushLocalScope (scope)) {
 					ProcessVirtualMethod (method);
 				}
 			}
@@ -597,7 +597,7 @@ void ProcessMarkedTypesWithInterfaces ()
 				// UnusedInterfaces optimization is turned off mark all interface implementations
 				bool unusedInterfacesOptimizationEnabled = Context.IsOptimizationEnabled (CodeOptimizations.UnusedInterfaces, type);
 
-				using (ScopeStack.PushScope (scope)) {
+				using (ScopeStack.PushLocalScope (scope)) {
 					if (Annotations.IsInstantiated (type) || Annotations.IsRelevantToVariantCasting (type) ||
 						!unusedInterfacesOptimizationEnabled) {
 						MarkInterfaceImplementations (type);
@@ -685,7 +685,7 @@ void ProcessPendingBodies ()
 			for (int i = 0; i < _unreachableBodies.Count; i++) {
 				(var body, var scope) = _unreachableBodies[i];
 				if (Annotations.IsInstantiated (body.Method.DeclaringType)) {
-					using (ScopeStack.PushScope (scope))
+					using (ScopeStack.PushLocalScope (scope))
 						MarkMethodBody (body);
 
 					_unreachableBodies.RemoveAt (i--);
@@ -701,17 +701,16 @@ void ProcessVirtualMethod (MethodDefinition method)
 				var defaultImplementations = Annotations.GetDefaultInterfaceImplementations (method);
 				if (defaultImplementations is not null) {
 					foreach (var dimInfo in defaultImplementations) {
-						ProcessDefaultImplementation (dimInfo.ImplementingType, dimInfo.InterfaceImpl, dimInfo.DefaultInterfaceMethod);
+						ProcessDefaultImplementation (dimInfo);
 
-						var ov = new OverrideInformation (method, dimInfo.DefaultInterfaceMethod, Context);
-						if (IsInterfaceImplementationMethodNeededByTypeDueToInterface (ov, dimInfo.ImplementingType))
-							MarkMethod (ov.Override, new DependencyInfo (DependencyKind.Override, ov.Base), ScopeStack.CurrentScope.Origin);
+						if (IsInterfaceImplementationMethodNeededByTypeDueToInterface (dimInfo))
+							MarkMethod (dimInfo.Override, new DependencyInfo (DependencyKind.Override, dimInfo.Base), ScopeStack.CurrentScope.Origin);
 					}
 				}
 				var overridingMethods = Annotations.GetOverrides (method);
 				if (overridingMethods is not null) {
-					foreach (var ov in overridingMethods) {
-						if (IsInterfaceImplementationMethodNeededByTypeDueToInterface (ov, ov.Override.DeclaringType))
+					foreach (OverrideInformation ov in overridingMethods) {
+						if (IsInterfaceImplementationMethodNeededByTypeDueToInterface (ov))
 							MarkMethod (ov.Override, new DependencyInfo (DependencyKind.Override, ov.Base), ScopeStack.CurrentScope.Origin);
 					}
 				}
@@ -819,13 +818,14 @@ bool RequiresInterfaceRecursively (TypeDefinition typeToExamine, TypeDefinition
 			return false;
 		}
 
-		void ProcessDefaultImplementation (TypeDefinition typeWithDefaultImplementedInterfaceMethod, InterfaceImplementation implementation, MethodDefinition implementationMethod)
+		void ProcessDefaultImplementation (OverrideInformation ov)
 		{
-			if ((!implementationMethod.IsStatic && !Annotations.IsInstantiated (typeWithDefaultImplementedInterfaceMethod))
-				|| implementationMethod.IsStatic && !Annotations.IsRelevantToVariantCasting (typeWithDefaultImplementedInterfaceMethod))
+			Debug.Assert (ov.IsOverrideOfInterfaceMember);
+			if ((!ov.Override.IsStatic && !Annotations.IsInstantiated (ov.InterfaceImplementor.Implementor))
+				|| ov.Override.IsStatic && !Annotations.IsRelevantToVariantCasting (ov.InterfaceImplementor.Implementor))
 				return;
 
-			MarkInterfaceImplementation (implementation);
+			MarkInterfaceImplementation (ov.InterfaceImplementor.InterfaceImplementation);
 		}
 
 		void MarkMarshalSpec (IMarshalInfoProvider spec, in DependencyInfo reason)
@@ -874,7 +874,7 @@ void MarkCustomAttributes (ICustomAttributeProvider provider, in DependencyInfo
 				return;
 
 			IMemberDefinition providerMember = (IMemberDefinition) provider; ;
-			using (ScopeStack.PushScope (new MessageOrigin (providerMember)))
+			using (ScopeStack.PushLocalScope (new MessageOrigin (providerMember)))
 				foreach (var dynamicDependency in Annotations.GetLinkerAttributes<DynamicDependency> (providerMember))
 					MarkDynamicDependency (dynamicDependency, providerMember);
 		}
@@ -1407,7 +1407,7 @@ protected virtual void MarkAssembly (AssemblyDefinition assembly, DependencyInfo
 			if (CheckProcessed (assembly))
 				return;
 
-			using var assemblyScope = ScopeStack.PushScope (new MessageOrigin (assembly));
+			using var assemblyScope = ScopeStack.PushLocalScope (new MessageOrigin (assembly));
 
 			EmbeddedXmlInfo.ProcessDescriptors (assembly, Context);
 
@@ -1537,7 +1537,7 @@ bool ProcessLazyAttributes ()
 				Debug.Assert (provider is ModuleDefinition or AssemblyDefinition);
 				var assembly = (provider is ModuleDefinition module) ? module.Assembly : provider as AssemblyDefinition;
 
-				using var assemblyScope = ScopeStack.PushScope (new MessageOrigin (assembly));
+				using var assemblyScope = ScopeStack.PushLocalScope (new MessageOrigin (assembly));
 
 				var resolved = Context.Resolve (customAttribute.Constructor);
 				if (resolved == null) {
@@ -1607,7 +1607,7 @@ bool ProcessLateMarkedAttributes ()
 				}
 
 				markOccurred = true;
-				using (ScopeStack.PushScope (scope)) {
+				using (ScopeStack.PushLocalScope (scope)) {
 					MarkCustomAttribute (customAttribute, reason);
 				}
 			}
@@ -1788,7 +1788,7 @@ void MarkField (FieldDefinition field, in DependencyInfo reason, in MessageOrigi
 			// Use the original scope for marking the declaring type - it provides better warning message location
 			MarkType (field.DeclaringType, new DependencyInfo (DependencyKind.DeclaringType, field));
 
-			using var fieldScope = ScopeStack.PushScope (new MessageOrigin (field));
+			using var fieldScope = ScopeStack.PushLocalScope (new MessageOrigin (field));
 			MarkType (field.FieldType, new DependencyInfo (DependencyKind.FieldType, field));
 			MarkCustomAttributes (field, new DependencyInfo (DependencyKind.CustomAttribute, field));
 			MarkMarshalSpec (field, new DependencyInfo (DependencyKind.FieldMarshalSpec, field));
@@ -2007,7 +2007,7 @@ internal void MarkStaticConstructorVisibleToReflection (TypeDefinition type, in
 			if (reference == null)
 				return null;
 
-			using var localScope = origin.HasValue ? ScopeStack.PushScope (origin.Value) : null;
+			using MarkScopeStack.LocalScope? localScope = origin.HasValue ? ScopeStack.PushLocalScope (origin.Value) : null;
 
 			(reference, reason) = GetOriginalType (reference, reason);
 
@@ -2053,7 +2053,7 @@ internal void MarkStaticConstructorVisibleToReflection (TypeDefinition type, in
 			if (type.Scope is ModuleDefinition module)
 				MarkModule (module, new DependencyInfo (DependencyKind.ScopeOfType, type));
 
-			using var typeScope = ScopeStack.PushScope (new MessageOrigin (type));
+			using var typeScope = ScopeStack.PushLocalScope (new MessageOrigin (type));
 
 			foreach (Action<TypeDefinition> handleMarkType in MarkContext.MarkTypeActions)
 				handleMarkType (type);
@@ -2141,7 +2141,7 @@ internal void MarkStaticConstructorVisibleToReflection (TypeDefinition type, in
 					}
 				}
 				if (ShouldMarkTypeStaticConstructor (type) && reason.Kind != DependencyKind.TriggersCctorForCalledMethod) {
-					using (ScopeStack.PopToParent ())
+					using (ScopeStack.PopToParentScope ())
 						MarkStaticConstructor (type, new DependencyInfo (DependencyKind.CctorForType, type), ScopeStack.CurrentScope.Origin);
 				}
 			}
@@ -2440,7 +2440,7 @@ void MarkNamedProperty (TypeDefinition type, string property_name, in Dependency
 				if (property.Name != property_name)
 					continue;
 
-				using (ScopeStack.PushScope (new MessageOrigin (property))) {
+				using (ScopeStack.PushLocalScope (new MessageOrigin (property))) {
 					// This marks methods directly without reporting the property.
 					MarkMethod (property.GetMethod, reason, ScopeStack.CurrentScope.Origin);
 					MarkMethod (property.SetMethod, reason, ScopeStack.CurrentScope.Origin);
@@ -2450,26 +2450,27 @@ void MarkNamedProperty (TypeDefinition type, string property_name, in Dependency
 
 		void MarkInterfaceImplementations (TypeDefinition type)
 		{
-			if (!type.HasInterfaces)
+			var ifaces = Annotations.GetRecursiveInterfaces (type);
+			if (ifaces is null)
 				return;
-
-			foreach (var iface in type.Interfaces) {
+			foreach (var (ifaceType, impls) in ifaces) {
 				// Only mark interface implementations of interface types that have been marked.
 				// This enables stripping of interfaces that are never used
-				if (ShouldMarkInterfaceImplementation (type, iface))
-					MarkInterfaceImplementation (iface, new MessageOrigin (type));
+				if (ShouldMarkInterfaceImplementationList (type, impls, ifaceType))
+					MarkInterfaceImplementationList (impls, new MessageOrigin (type));
 			}
 		}
 
-		protected virtual bool ShouldMarkInterfaceImplementation (TypeDefinition type, InterfaceImplementation iface)
+
+		protected virtual bool ShouldMarkInterfaceImplementationList (TypeDefinition type, List<InterfaceImplementation> ifaces, TypeReference ifaceType)
 		{
-			if (Annotations.IsMarked (iface))
+			if (ifaces.All (Annotations.IsMarked))
 				return false;
 
 			if (!Context.IsOptimizationEnabled (CodeOptimizations.UnusedInterfaces, type))
 				return true;
 
-			if (Context.Resolve (iface.InterfaceType) is not TypeDefinition resolvedInterfaceType)
+			if (Context.Resolve (ifaceType) is not TypeDefinition resolvedInterfaceType)
 				return false;
 
 			if (Annotations.IsMarked (resolvedInterfaceType))
@@ -2549,11 +2550,11 @@ bool IsMethodNeededByTypeDueToPreservedScope (MethodDefinition method)
 		/// <summary>
 		/// Returns true if the override method is required due to the interface that the base method is declared on. See doc at <see href="docs/methods-kept-by-interface.md"/> for explanation of logic.
 		/// </summary>
-		bool IsInterfaceImplementationMethodNeededByTypeDueToInterface (OverrideInformation overrideInformation, TypeDefinition typeThatImplsInterface)
+		bool IsInterfaceImplementationMethodNeededByTypeDueToInterface (OverrideInformation overrideInformation)
 		{
 			var @base = overrideInformation.Base;
 			var method = overrideInformation.Override;
-			Debug.Assert (@base.DeclaringType.IsInterface);
+			Debug.Assert (overrideInformation.IsOverrideOfInterfaceMember);
 			if (@base is null || method is null || @base.DeclaringType is null)
 				return false;
 
@@ -2562,7 +2563,7 @@ bool IsInterfaceImplementationMethodNeededByTypeDueToInterface (OverrideInformat
 
 			// If the interface implementation is not marked, do not mark the implementation method
 			// A type that doesn't implement the interface isn't required to have methods that implement the interface.
-			InterfaceImplementation? iface = overrideInformation.MatchingInterfaceImplementation;
+			InterfaceImplementation? iface = overrideInformation.InterfaceImplementor.InterfaceImplementation;
 			if (!((iface is not null && Annotations.IsMarked (iface))
 				|| IsInterfaceImplementationMarkedRecursively (method.DeclaringType, @base.DeclaringType)))
 				return false;
@@ -2580,12 +2581,12 @@ bool IsInterfaceImplementationMethodNeededByTypeDueToInterface (OverrideInformat
 			// If the method is static and the implementing type is relevant to variant casting, mark the implementation method.
 			// A static method may only be called through a constrained call if the type is relevant to variant casting.
 			if (@base.IsStatic)
-				return Annotations.IsRelevantToVariantCasting (typeThatImplsInterface)
+				return Annotations.IsRelevantToVariantCasting (overrideInformation.InterfaceImplementor.Implementor)
 					|| IgnoreScope (@base.DeclaringType.Scope);
 
 			// If the implementing type is marked as instantiated, mark the implementation method.
 			// If the type is not instantiated, do not mark the implementation method
-			return Annotations.IsInstantiated (typeThatImplsInterface);
+			return Annotations.IsInstantiated (overrideInformation.InterfaceImplementor.Implementor);
 		}
 
 		static bool IsSpecialSerializationConstructor (MethodDefinition method)
@@ -2804,7 +2805,7 @@ void MarkGenericArguments (IGenericInstance instance)
 					// The only two implementations of IGenericInstance both derive from MemberReference
 					Debug.Assert (instance is MemberReference);
 
-					using var _ = ScopeStack.CurrentScope.Origin.Provider == null ? ScopeStack.PushScope (new MessageOrigin (((MemberReference) instance).Resolve ())) : null;
+					using MarkScopeStack.LocalScope? _ = ScopeStack.CurrentScope.Origin.Provider == null ? ScopeStack.PushLocalScope (new MessageOrigin (((MemberReference) instance).Resolve ())) : null;
 					var scanner = new GenericArgumentDataFlow (Context, this, ScopeStack.CurrentScope.Origin);
 					scanner.ProcessGenericArgumentDataFlow (parameter, argument);
 				}
@@ -2832,7 +2833,7 @@ void MarkGenericArguments (IGenericInstance instance)
 
 		void ApplyPreserveInfo (TypeDefinition type)
 		{
-			using var typeScope = ScopeStack.PushScope (new MessageOrigin (type));
+			using var typeScope = ScopeStack.PushLocalScope (new MessageOrigin (type));
 
 			if (Annotations.TryGetPreserve (type, out TypePreserve preserve)) {
 				if (!Annotations.SetAppliedPreserve (type, preserve))
@@ -3203,8 +3204,8 @@ protected virtual void ProcessMethod (MethodDefinition method, in DependencyInfo
 				throw new InternalErrorException ($"Unsupported method dependency {reason.Kind}");
 #endif
 			ScopeStack.AssertIsEmpty ();
-			using var parentScope = ScopeStack.PushScope (new MarkScopeStack.Scope (origin));
-			using var methodScope = ScopeStack.PushScope (new MessageOrigin (method));
+			using var parentScope = ScopeStack.PushLocalScope (new MarkScopeStack.Scope (origin));
+			using var methodScope = ScopeStack.PushLocalScope (new MessageOrigin (method));
 
 			bool markedForCall =
 				reason.Kind == DependencyKind.DirectCall ||
@@ -3256,7 +3257,7 @@ protected virtual void ProcessMethod (MethodDefinition method, in DependencyInfo
 					// Only if the interface method is referenced, then all the methods which implemented must be kept, but not the other way round.
 					if (!markAllOverrides &&
 						Context.Resolve (@base) is MethodDefinition baseDefinition
-						&& new OverrideInformation.OverridePair (baseDefinition, method).IsStaticInterfaceMethodPair ())
+						&& baseDefinition.DeclaringType.IsInterface && baseDefinition.IsStatic && method.IsStatic)
 						continue;
 					MarkMethod (@base, new DependencyInfo (DependencyKind.MethodImplOverride, method), ScopeStack.CurrentScope.Origin);
 					MarkExplicitInterfaceImplementation (method, @base);
@@ -3324,6 +3325,7 @@ static DependencyKind PropagateDependencyKindToAccessors (DependencyKind parentD
 			case DependencyKind.AlreadyMarked:
 			case DependencyKind.TypePreserve:
 			case DependencyKind.PreservedMethod:
+			case DependencyKind.DynamicallyAccessedMemberOnType:
 				return parentDependencyKind;
 
 			default:
@@ -3360,7 +3362,7 @@ protected virtual void MarkRequirementsForInstantiatedTypes (TypeDefinition type
 
 			Annotations.MarkInstantiated (type);
 
-			using var typeScope = ScopeStack.PushScope (new MessageOrigin (type));
+			using var typeScope = ScopeStack.PushLocalScope (new MessageOrigin (type));
 
 			MarkInterfaceImplementations (type);
 
@@ -3450,7 +3452,7 @@ bool MarkDisablePrivateReflectionAttribute ()
 			if (disablePrivateReflection == null)
 				throw new LinkerFatalErrorException (MessageContainer.CreateErrorMessage (null, DiagnosticId.CouldNotFindType, "System.Runtime.CompilerServices.DisablePrivateReflectionAttribute"));
 
-			using (ScopeStack.PushScope (new MessageOrigin (null as ICustomAttributeProvider))) {
+			using (ScopeStack.PushLocalScope (new MessageOrigin (null as ICustomAttributeProvider))) {
 				MarkType (disablePrivateReflection, DependencyInfo.DisablePrivateReflectionRequirement);
 
 				var ctor = MarkMethodIf (disablePrivateReflection.Methods, MethodDefinitionExtensions.IsDefaultConstructor, new DependencyInfo (DependencyKind.DisablePrivateReflectionRequirement, disablePrivateReflection), ScopeStack.CurrentScope.Origin);
@@ -3570,7 +3572,7 @@ protected internal void MarkProperty (PropertyDefinition prop, in DependencyInfo
 			if (!Annotations.MarkProcessed (prop, reason))
 				return;
 
-			using var propertyScope = ScopeStack.PushScope (new MessageOrigin (prop));
+			using var propertyScope = ScopeStack.PushLocalScope (new MessageOrigin (prop));
 
 			// Consider making this more similar to MarkEvent method?
 			MarkCustomAttributes (prop, new DependencyInfo (DependencyKind.CustomAttribute, prop));
@@ -3582,15 +3584,15 @@ protected internal virtual void MarkEvent (EventDefinition evt, in DependencyInf
 			if (!Annotations.MarkProcessed (evt, reason))
 				return;
 
-			using var eventScope = ScopeStack.PushScope (new MessageOrigin (evt));
-
-			MarkCustomAttributes (evt, new DependencyInfo (DependencyKind.CustomAttribute, evt));
-
+			var origin = reason.Source is IMemberDefinition member ? new MessageOrigin (member) : ScopeStack.CurrentScope.Origin;
 			DependencyKind dependencyKind = PropagateDependencyKindToAccessors (reason.Kind, DependencyKind.EventMethod);
-			MarkMethodIfNotNull (evt.AddMethod, new DependencyInfo (dependencyKind, evt), ScopeStack.CurrentScope.Origin);
-			MarkMethodIfNotNull (evt.InvokeMethod, new DependencyInfo (dependencyKind, evt), ScopeStack.CurrentScope.Origin);
-			MarkMethodIfNotNull (evt.RemoveMethod, new DependencyInfo (dependencyKind, evt), ScopeStack.CurrentScope.Origin);
+			MarkMethodIfNotNull (evt.AddMethod, new DependencyInfo (dependencyKind, evt), origin);
+			MarkMethodIfNotNull (evt.InvokeMethod, new DependencyInfo (dependencyKind, evt), origin);
+			MarkMethodIfNotNull (evt.RemoveMethod, new DependencyInfo (dependencyKind, evt), origin);
+
+			using var eventScope = ScopeStack.PushLocalScope (new MessageOrigin (evt));
 
+			MarkCustomAttributes (evt, new DependencyInfo (DependencyKind.CustomAttribute, evt));
 			DoAdditionalEventProcessing (evt);
 		}
 
@@ -3681,7 +3683,7 @@ bool MarkAndCheckRequiresReflectionMethodBodyScanner (MethodIL methodIL)
 
 			requiresReflectionMethodBodyScanner =
 				ReflectionMethodBodyScanner.RequiresReflectionMethodBodyScannerForMethodBody (Context, methodIL.Method);
-			using var _ = ScopeStack.PushScope (new MessageOrigin (methodIL.Method));
+			using var _ = ScopeStack.PushLocalScope (new MessageOrigin (methodIL.Method));
 			foreach (Instruction instruction in methodIL.Instructions)
 				MarkInstruction (instruction, methodIL.Method, ref requiresReflectionMethodBodyScanner);
 
@@ -3764,8 +3766,7 @@ protected virtual void MarkInstruction (Instruction instruction, MethodDefinitio
 					ScopeStack.UpdateCurrentScopeInstructionOffset (instruction.Offset);
 					if (markForReflectionAccess) {
 						MarkMethodVisibleToReflection (methodReference, new DependencyInfo (dependencyKind, method), ScopeStack.CurrentScope.Origin);
-					}
-					else {
+					} else {
 						MarkMethod (methodReference, new DependencyInfo (dependencyKind, method), ScopeStack.CurrentScope.Origin);
 					}
 					break;
@@ -3825,6 +3826,12 @@ protected virtual void MarkInstruction (Instruction instruction, MethodDefinitio
 			}
 		}
 
+		void MarkInterfaceImplementationList (List<InterfaceImplementation> ifaces, MessageOrigin? origin = null, DependencyInfo? reason = null)
+		{
+			foreach (var iface in ifaces) {
+				MarkInterfaceImplementation (iface, origin, reason);
+			}
+		}
 
 		protected internal virtual void MarkInterfaceImplementation (InterfaceImplementation iface, MessageOrigin? origin = null, DependencyInfo? reason = null)
 		{
@@ -3832,7 +3839,7 @@ protected internal virtual void MarkInterfaceImplementation (InterfaceImplementa
 				return;
 			Annotations.MarkProcessed (iface, reason ?? new DependencyInfo (DependencyKind.InterfaceImplementationOnType, ScopeStack.CurrentScope.Origin.Provider));
 
-			using var localScope = origin.HasValue ? ScopeStack.PushScope (origin.Value) : null;
+			using MarkScopeStack.LocalScope? localScope = origin.HasValue ? ScopeStack.PushLocalScope (origin.Value) : null;
 
 			// Blame the type that has the interfaceimpl, expecting the type itself to get marked for other reasons.
 			MarkCustomAttributes (iface, new DependencyInfo (DependencyKind.CustomAttribute, iface));
diff --git a/src/tools/illink/src/linker/Linker.Steps/RootAssemblyInputStep.cs b/src/tools/illink/src/linker/Linker.Steps/RootAssemblyInputStep.cs
index d29432e8b60c..b167ed5f58be 100644
--- a/src/tools/illink/src/linker/Linker.Steps/RootAssemblyInputStep.cs
+++ b/src/tools/illink/src/linker/Linker.Steps/RootAssemblyInputStep.cs
@@ -78,7 +78,8 @@ protected override void Process ()
 					CodeOptimizations.RemoveLinkAttributes |
 					CodeOptimizations.RemoveSubstitutions |
 					CodeOptimizations.RemoveDynamicDependencyAttribute |
-					CodeOptimizations.OptimizeTypeHierarchyAnnotations, assembly.Name.Name);
+					CodeOptimizations.OptimizeTypeHierarchyAnnotations |
+					CodeOptimizations.SubstituteFeatureGuards, assembly.Name.Name);
 
 				// Enable EventSource special handling
 				Context.DisableEventSourceSpecialHandling = false;
diff --git a/src/tools/illink/src/linker/Linker/Annotations.cs b/src/tools/illink/src/linker/Linker/Annotations.cs
index 8f7747cba354..a7f77cf0efae 100644
--- a/src/tools/illink/src/linker/Linker/Annotations.cs
+++ b/src/tools/illink/src/linker/Linker/Annotations.cs
@@ -34,6 +34,7 @@
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Linq;
+using System.Reflection.Metadata.Ecma335;
 using ILLink.Shared.TrimAnalysis;
 using Mono.Cecil;
 using Mono.Cecil.Cil;
@@ -462,7 +463,7 @@ public bool IsPublic (IMetadataTokenProvider provider)
 		/// DefaultInterfaceMethod is the method that implements <paramref name="method"/>.
 		/// </summary>
 		/// <param name="method">The interface method to find default implementations for</param>
-		public IEnumerable<(TypeDefinition ImplementingType, InterfaceImplementation InterfaceImpl, MethodDefinition DefaultInterfaceMethod)>? GetDefaultInterfaceImplementations (MethodDefinition method)
+		public IEnumerable<OverrideInformation>? GetDefaultInterfaceImplementations (MethodDefinition method)
 		{
 			return TypeMapInfo.GetDefaultInterfaceImplementations (method);
 		}
@@ -717,5 +718,10 @@ public void EnqueueVirtualMethod (MethodDefinition method)
 			if (FlowAnnotations.RequiresVirtualMethodDataFlowAnalysis (method) || HasLinkerAttribute<RequiresUnreferencedCodeAttribute> (method))
 				VirtualMethodsWithAnnotationsToValidate.Add (method);
 		}
+
+		internal List<(TypeReference, List<InterfaceImplementation>)>? GetRecursiveInterfaces (TypeDefinition type)
+		{
+			return TypeMapInfo.GetRecursiveInterfaces (type);
+		}
 	}
 }
diff --git a/src/tools/illink/src/linker/Linker/CustomAttributeSource.cs b/src/tools/illink/src/linker/Linker/CustomAttributeSource.cs
index f11419de4b4b..f32e93a07857 100644
--- a/src/tools/illink/src/linker/Linker/CustomAttributeSource.cs
+++ b/src/tools/illink/src/linker/Linker/CustomAttributeSource.cs
@@ -52,6 +52,14 @@ public bool TryGetEmbeddedXmlInfo (ICustomAttributeProvider provider, [NotNullWh
 			return xmlInfo != null;
 		}
 
+		public IEnumerable<CustomAttribute> GetCustomAttributes (ICustomAttributeProvider provider, string attributeNamespace, string attributeName)
+		{
+			foreach (var attr in GetCustomAttributes (provider)) {
+				if (attr.AttributeType.Namespace == attributeNamespace && attr.AttributeType.Name == attributeName)
+					yield return attr;
+			}
+		}
+
 		public IEnumerable<CustomAttribute> GetCustomAttributes (ICustomAttributeProvider provider)
 		{
 			if (provider.HasCustomAttributes) {
diff --git a/src/tools/illink/src/linker/Linker/DictionaryExtensions.cs b/src/tools/illink/src/linker/Linker/DictionaryExtensions.cs
new file mode 100644
index 000000000000..9cf8945fe480
--- /dev/null
+++ b/src/tools/illink/src/linker/Linker/DictionaryExtensions.cs
@@ -0,0 +1,20 @@
+﻿// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System.Collections.Generic;
+
+namespace Mono.Linker
+{
+	internal static class DictionaryExtensions
+	{
+		public static void AddToList<TKey, TElement> (this Dictionary<TKey, List<TElement>> me, TKey key, TElement value)
+			where TKey : notnull
+		{
+			if (!me.TryGetValue (key, out List<TElement>? valueList)) {
+				valueList = new ();
+				me[key] = valueList;
+			}
+			valueList.Add (value);
+		}
+	}
+}
diff --git a/src/tools/illink/src/linker/Linker/Driver.cs b/src/tools/illink/src/linker/Linker/Driver.cs
index 742ee2140b9c..f699127b16c3 100644
--- a/src/tools/illink/src/linker/Linker/Driver.cs
+++ b/src/tools/illink/src/linker/Linker/Driver.cs
@@ -100,7 +100,7 @@ public static bool ProcessResponseFile (string[] args, out Queue<string> result)
 		{
 			result = new Queue<string> ();
 			foreach (string arg in args) {
-				if (arg.StartsWith ("@")) {
+				if (arg.StartsWith ('@')) {
 					try {
 						string responseFileName = arg.Substring (1);
 						using (var responseFileText = new StreamReader (responseFileName))
@@ -933,7 +933,7 @@ protected bool AddMarkHandler (Pipeline pipeline, string arg)
 		bool TryGetCustomAssembly (ref string arg, [NotNullWhen (true)] out Assembly? assembly)
 		{
 			assembly = null;
-			int pos = arg.IndexOf (",");
+			int pos = arg.IndexOf (',');
 			if (pos == -1)
 				return false;
 
@@ -963,7 +963,7 @@ protected bool AddCustomStep (Pipeline pipeline, string arg)
 				}
 				customStepName = parts[1];
 
-				if (!parts[0].StartsWith ("-") && !parts[0].StartsWith ("+")) {
+				if (!parts[0].StartsWith ('-') && !parts[0].StartsWith ('+')) {
 					Context.LogError (null, DiagnosticId.ExpectedSignToControlNewStepInsertion);
 					return false;
 				}
@@ -1179,6 +1179,9 @@ protected bool GetOptimizationName (string text, out CodeOptimizations optimizat
 			case "sealer":
 				optimization = CodeOptimizations.Sealer;
 				return true;
+			case "substitutefeatureguards":
+				optimization = CodeOptimizations.SubstituteFeatureGuards;
+				return true;
 			}
 
 			Context.LogError (null, DiagnosticId.InvalidOptimizationValue, text);
@@ -1236,7 +1239,7 @@ bool GetBoolParam (string token, Action<bool> action)
 				return true;
 			}
 
-			if (arg.StartsWith ("-") || arg.StartsWith ("/")) {
+			if (arg.StartsWith ('-') || arg.StartsWith ('/')) {
 				action (true);
 				return true;
 			}
@@ -1269,7 +1272,7 @@ bool GetStringParam (string token, [NotNullWhen (true)] out string? value)
 				return null;
 
 			var arg = arguments.Peek ();
-			if (arg.StartsWith ("-") || arg.StartsWith ("/"))
+			if (arg.StartsWith ('-') || arg.StartsWith ('/'))
 				return null;
 
 			arguments.Dequeue ();
@@ -1361,6 +1364,7 @@ static void Usage ()
 			Console.WriteLine ("                               unreachablebodies: Instance methods that are marked but not executed are converted to throws");
 			Console.WriteLine ("                               unusedinterfaces: Removes interface types from declaration when not used");
 			Console.WriteLine ("                               unusedtypechecks: Inlines never successful type checks");
+			Console.WriteLine ("                               substitutefeatureguards: Substitutes properties annotated as FeatureGuard(typeof(RequiresUnreferencedCodeAttribute)) to false");
 			Console.WriteLine ("  --enable-opt NAME [ASM]    Enable one of the additional optimizations globaly or for a specific assembly name");
 			Console.WriteLine ("                               sealer: Any method or type which does not have override is marked as sealed");
 			Console.WriteLine ("  --explicit-reflection      Adds to members never used through reflection DisablePrivateReflection attribute. Defaults to false");
diff --git a/src/tools/illink/src/linker/Linker/InterfaceImplementor.cs b/src/tools/illink/src/linker/Linker/InterfaceImplementor.cs
new file mode 100644
index 000000000000..e981ce872703
--- /dev/null
+++ b/src/tools/illink/src/linker/Linker/InterfaceImplementor.cs
@@ -0,0 +1,59 @@
+// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Mono.Cecil;
+
+namespace Mono.Linker
+{
+	public class InterfaceImplementor
+	{
+		/// <summary>
+		/// The type that implements <see cref="InterfaceImplementor.InterfaceType"/>.
+		/// </summary>
+		public TypeDefinition Implementor { get; }
+		/// <summary>
+		/// The .interfaceimpl on <see cref="InterfaceImplementor.Implementor"/>that points to <see cref="InterfaceImplementor.InterfaceType"/>
+		/// </summary>
+		public InterfaceImplementation InterfaceImplementation { get; }
+		/// <summary>
+		/// The type of the interface that is implemented by <see cref="InterfaceImplementor.Implementor"/>
+		/// </summary>
+		public TypeDefinition InterfaceType { get; }
+
+		public InterfaceImplementor (TypeDefinition implementor, InterfaceImplementation interfaceImplementation, TypeDefinition interfaceType, IMetadataResolver resolver)
+		{
+			Implementor = implementor;
+			InterfaceImplementation = interfaceImplementation;
+			InterfaceType = interfaceType;
+			Debug.Assert(resolver.Resolve (interfaceImplementation.InterfaceType) == interfaceType);
+		}
+
+		public static InterfaceImplementor Create(TypeDefinition implementor, TypeDefinition interfaceType, IMetadataResolver resolver)
+		{
+			foreach(InterfaceImplementation iface in implementor.Interfaces) {
+				if (resolver.Resolve(iface.InterfaceType) == interfaceType) {
+					return new InterfaceImplementor(implementor, iface, interfaceType, resolver);
+				}
+			}
+
+			Queue<TypeDefinition> ifacesToCheck = new ();
+			ifacesToCheck.Enqueue(implementor);
+			while (ifacesToCheck.Count > 0) {
+				var currentIface = ifacesToCheck.Dequeue ();
+
+				foreach(InterfaceImplementation ifaceImpl in currentIface.Interfaces) {
+					var iface = resolver.Resolve (ifaceImpl.InterfaceType);
+					if (iface == interfaceType) {
+						return new InterfaceImplementor(implementor, ifaceImpl, interfaceType, resolver);
+					}
+					ifacesToCheck.Enqueue (iface);
+				}
+			}
+			throw new InvalidOperationException ($"Type '{implementor.FullName}' does not implement interface '{interfaceType.FullName}' directly or through any interfaces");
+		}
+	}
+}
diff --git a/src/tools/illink/src/linker/Linker/LinkContext.cs b/src/tools/illink/src/linker/Linker/LinkContext.cs
index 41fcba1a6f05..4af4cdc654ab 100644
--- a/src/tools/illink/src/linker/Linker/LinkContext.cs
+++ b/src/tools/illink/src/linker/Linker/LinkContext.cs
@@ -246,7 +246,8 @@ protected LinkContext (Pipeline pipeline, ILogger logger, string outputDirectory
 				CodeOptimizations.RemoveLinkAttributes |
 				CodeOptimizations.RemoveSubstitutions |
 				CodeOptimizations.RemoveDynamicDependencyAttribute |
-				CodeOptimizations.OptimizeTypeHierarchyAnnotations;
+				CodeOptimizations.OptimizeTypeHierarchyAnnotations |
+				CodeOptimizations.SubstituteFeatureGuards;
 
 			DisableEventSourceSpecialHandling = true;
 
@@ -266,7 +267,7 @@ public bool HasFeatureValue (string feature, bool value)
 
 		public TypeDefinition? GetType (string fullName)
 		{
-			int pos = fullName.IndexOf (",");
+			int pos = fullName.IndexOf (',');
 			fullName = TypeReferenceExtensions.ToCecilName (fullName);
 			if (pos == -1) {
 				foreach (AssemblyDefinition asm in GetReferencedAssemblies ()) {
@@ -1144,5 +1145,10 @@ public enum CodeOptimizations
 		/// Otherwise, type annotation will only be applied with calls to object.GetType()
 		/// </summary>
 		OptimizeTypeHierarchyAnnotations = 1 << 24,
+
+		/// <summary>
+		/// Option to substitute properties annotated as FeatureGuard(typeof(RequiresUnreferencedCodeAttribute)) with false
+		/// </summary>
+		SubstituteFeatureGuards = 1 << 25,
 	}
 }
diff --git a/src/tools/illink/src/linker/Linker/MemberActionStore.cs b/src/tools/illink/src/linker/Linker/MemberActionStore.cs
index 84f2cfd7fb46..51d792fc45b4 100644
--- a/src/tools/illink/src/linker/Linker/MemberActionStore.cs
+++ b/src/tools/illink/src/linker/Linker/MemberActionStore.cs
@@ -2,7 +2,9 @@
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
+using ILLink.Shared;
 using Mono.Cecil;
 
 namespace Mono.Linker
@@ -20,7 +22,7 @@ public MemberActionStore (LinkContext context)
 			_context = context;
 		}
 
-		public bool TryGetSubstitutionInfo (MemberReference member, [NotNullWhen (true)] out SubstitutionInfo? xmlInfo)
+		private bool TryGetSubstitutionInfo (MemberReference member, [NotNullWhen (true)] out SubstitutionInfo? xmlInfo)
 		{
 			var assembly = member.Module.Assembly;
 			if (!_embeddedXmlInfos.TryGetValue (assembly, out xmlInfo)) {
@@ -41,6 +43,9 @@ public MethodAction GetAction (MethodDefinition method)
 					return action;
 			}
 
+			if (TryGetFeatureCheckValue (method, out _))
+				return MethodAction.ConvertToStub;
+
 			return MethodAction.Nothing;
 		}
 
@@ -49,10 +54,78 @@ public bool TryGetMethodStubValue (MethodDefinition method, out object? value)
 			if (PrimarySubstitutionInfo.MethodStubValues.TryGetValue (method, out value))
 				return true;
 
-			if (!TryGetSubstitutionInfo (method, out var embeddedXml))
+			if (TryGetSubstitutionInfo (method, out var embeddedXml)
+				&& embeddedXml.MethodStubValues.TryGetValue (method, out value))
+				return true;
+
+			if (TryGetFeatureCheckValue (method, out bool bValue)) {
+				value = bValue ? 1 : 0;
+				return true;
+			}
+
+			return false;
+		}
+
+		internal bool TryGetFeatureCheckValue (MethodDefinition method, out bool value)
+		{
+			value = false;
+
+			if (!method.IsStatic)
+				return false;
+
+			if (method.ReturnType.MetadataType != MetadataType.Boolean)
+				return false;
+
+			if (FindProperty (method) is not PropertyDefinition property)
 				return false;
 
-			return embeddedXml.MethodStubValues.TryGetValue (method, out value);
+			if (property.SetMethod != null)
+				return false;
+
+			foreach (var featureSwitchDefinitionAttribute in _context.CustomAttributes.GetCustomAttributes (property, "System.Diagnostics.CodeAnalysis", "FeatureSwitchDefinitionAttribute")) {
+				if (featureSwitchDefinitionAttribute.ConstructorArguments is not [CustomAttributeArgument { Value: string switchName }])
+					continue;
+
+				// If there's a FeatureSwitchDefinition, don't continue looking for FeatureGuard.
+				// We don't want to infer feature switch settings from FeatureGuard.
+				return _context.FeatureSettings.TryGetValue (switchName, out value);
+			}
+
+			if (!_context.IsOptimizationEnabled (CodeOptimizations.SubstituteFeatureGuards, method))
+				return false;
+
+			foreach (var featureGuardAttribute in _context.CustomAttributes.GetCustomAttributes (property, "System.Diagnostics.CodeAnalysis", "FeatureGuardAttribute")) {
+				if (featureGuardAttribute.ConstructorArguments is not [CustomAttributeArgument { Value: TypeReference featureType }])
+					continue;
+
+				if (featureType.Namespace == "System.Diagnostics.CodeAnalysis") {
+					switch (featureType.Name) {
+					case "RequiresUnreferencedCodeAttribute":
+						return true;
+					case "RequiresDynamicCodeAttribute":
+						if (_context.FeatureSettings.TryGetValue (
+								"System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeSupported",
+								out bool isDynamicCodeSupported)
+							&& !isDynamicCodeSupported)
+							return true;
+						break;
+					}
+				}
+			}
+
+			return false;
+
+			static PropertyDefinition? FindProperty (MethodDefinition method) {
+				if (!method.IsGetter)
+					return null;
+
+				foreach (var property in method.DeclaringType.Properties) {
+					if (property.GetMethod == method)
+						return property;
+				}
+
+				return null;
+			}
 		}
 
 		public bool TryGetFieldUserValue (FieldDefinition field, out object? value)
diff --git a/src/tools/illink/src/linker/Linker/MethodReferenceComparer.cs b/src/tools/illink/src/linker/Linker/MethodReferenceComparer.cs
new file mode 100644
index 000000000000..3f4fa06684a0
--- /dev/null
+++ b/src/tools/illink/src/linker/Linker/MethodReferenceComparer.cs
@@ -0,0 +1,171 @@
+﻿// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Mono.Cecil;
+
+namespace Mono.Linker
+{
+	// Copied from https://github.com/jbevain/cecil/blob/master/Mono.Cecil/MethodReferenceComparer.cs
+	internal sealed class MethodReferenceComparer : EqualityComparer<MethodReference>
+	{
+		// Initialized lazily for each thread
+		[ThreadStatic]
+		static List<MethodReference>? xComparisonStack;
+
+		[ThreadStatic]
+		static List<MethodReference>? yComparisonStack;
+
+		public readonly ITryResolveMetadata _resolver;
+
+		public MethodReferenceComparer(ITryResolveMetadata resolver)
+		{
+			_resolver = resolver;
+		}
+
+		public override bool Equals (MethodReference? x, MethodReference? y)
+		{
+			return AreEqual (x, y, _resolver);
+		}
+
+		public override int GetHashCode (MethodReference obj)
+		{
+			return GetHashCodeFor (obj);
+		}
+
+		public static bool AreEqual (MethodReference? x, MethodReference? y, ITryResolveMetadata resolver)
+		{
+			if (ReferenceEquals (x, y))
+				return true;
+
+			if (x is null ^ y is null)
+				return false;
+
+			Debug.Assert (x is not null);
+			Debug.Assert (y is not null);
+
+			if (x.HasThis != y.HasThis)
+				return false;
+
+#pragma warning disable RS0030 // MethodReference.HasParameters is banned - this code is copied from Cecil
+			if (x.HasParameters != y.HasParameters)
+				return false;
+#pragma warning restore RS0030
+
+			if (x.HasGenericParameters != y.HasGenericParameters)
+				return false;
+
+#pragma warning disable RS0030 // MethodReference.HasParameters is banned - this code is copied from Cecil
+			if (x.Parameters.Count != y.Parameters.Count)
+				return false;
+#pragma warning restore RS0030
+
+			if (x.Name != y.Name)
+				return false;
+
+			if (!TypeReferenceEqualityComparer.AreEqual (x.DeclaringType, y.DeclaringType, resolver))
+				return false;
+
+			var xGeneric = x as GenericInstanceMethod;
+			var yGeneric = y as GenericInstanceMethod;
+			if (xGeneric != null || yGeneric != null) {
+				if (xGeneric == null || yGeneric == null)
+					return false;
+
+				if (xGeneric.GenericArguments.Count != yGeneric.GenericArguments.Count)
+					return false;
+
+				for (int i = 0; i < xGeneric.GenericArguments.Count; i++)
+					if (!TypeReferenceEqualityComparer.AreEqual (xGeneric.GenericArguments[i], yGeneric.GenericArguments[i], resolver))
+						return false;
+			}
+
+			var xResolved = resolver.TryResolve (x);
+			var yResolved = resolver.TryResolve (y);
+
+			if (xResolved != yResolved)
+				return false;
+
+			if (xResolved == null) {
+				// We couldn't resolve either method. In order for them to be equal, their parameter types _must_ match. But wait, there's a twist!
+				// There exists a situation where we might get into a recursive state: parameter type comparison might lead to comparing the same
+				// methods again if the parameter types are generic parameters whose owners are these methods. We guard against these by using a
+				// thread static list of all our comparisons carried out in the stack so far, and if we're in progress of comparing them already,
+				// we'll just say that they match.
+
+				xComparisonStack ??= new List<MethodReference> ();
+
+				yComparisonStack ??= new List<MethodReference> ();
+
+				for (int i = 0; i < xComparisonStack.Count; i++) {
+					if (xComparisonStack[i] == x && yComparisonStack[i] == y)
+						return true;
+				}
+
+				xComparisonStack.Add (x);
+
+				try {
+					yComparisonStack.Add (y);
+
+					try {
+#pragma warning disable RS0030 // MethodReference.HasParameters is banned - this code is copied from Cecil
+						for (int i = 0; i < x.Parameters.Count; i++) {
+							if (!TypeReferenceEqualityComparer.AreEqual (x.Parameters[i].ParameterType, y.Parameters[i].ParameterType, resolver))
+								return false;
+						}
+#pragma warning restore RS0030
+					} finally {
+						yComparisonStack.RemoveAt (yComparisonStack.Count - 1);
+					}
+				} finally {
+					xComparisonStack.RemoveAt (xComparisonStack.Count - 1);
+				}
+			}
+
+			return true;
+		}
+
+		public static bool AreSignaturesEqual (MethodReference x, MethodReference y, ITryResolveMetadata resolver, TypeComparisonMode comparisonMode = TypeComparisonMode.Exact)
+		{
+			if (x.HasThis != y.HasThis)
+				return false;
+
+#pragma warning disable RS0030 // MethodReference.HasParameters is banned - this code is copied from Cecil
+			if (x.Parameters.Count != y.Parameters.Count)
+				return false;
+#pragma warning restore RS0030
+
+			if (x.GenericParameters.Count != y.GenericParameters.Count)
+				return false;
+
+#pragma warning disable RS0030 // MethodReference.HasParameters is banned - this code is copied from Cecil
+			for (var i = 0; i < x.Parameters.Count; i++)
+				if (!TypeReferenceEqualityComparer.AreEqual (x.Parameters[i].ParameterType, y.Parameters[i].ParameterType, resolver, comparisonMode))
+					return false;
+#pragma warning restore RS0030
+
+			if (!TypeReferenceEqualityComparer.AreEqual (x.ReturnType, y.ReturnType, resolver, comparisonMode))
+				return false;
+
+			return true;
+		}
+
+		public static int GetHashCodeFor (MethodReference obj)
+		{
+			// a very good prime number
+			const int hashCodeMultiplier = 486187739;
+
+			var genericInstanceMethod = obj as GenericInstanceMethod;
+			if (genericInstanceMethod != null) {
+				var hashCode = GetHashCodeFor (genericInstanceMethod.ElementMethod);
+				for (var i = 0; i < genericInstanceMethod.GenericArguments.Count; i++)
+					hashCode = hashCode * hashCodeMultiplier + TypeReferenceEqualityComparer.GetHashCodeFor (genericInstanceMethod.GenericArguments[i]);
+				return hashCode;
+			}
+
+			return TypeReferenceEqualityComparer.GetHashCodeFor (obj.DeclaringType) * hashCodeMultiplier + obj.Name.GetHashCode ();
+		}
+	}
+}
diff --git a/src/tools/illink/src/linker/Linker/OverrideInformation.cs b/src/tools/illink/src/linker/Linker/OverrideInformation.cs
index 077353eb2ee7..0727d5d25c19 100644
--- a/src/tools/illink/src/linker/Linker/OverrideInformation.cs
+++ b/src/tools/illink/src/linker/Linker/OverrideInformation.cs
@@ -3,71 +3,39 @@
 
 using System.Diagnostics;
 using Mono.Cecil;
+using System.Diagnostics.CodeAnalysis;
 
 namespace Mono.Linker
 {
 	[DebuggerDisplay ("{Override}")]
 	public class OverrideInformation
 	{
-		readonly ITryResolveMetadata resolver;
-		readonly OverridePair _pair;
-		private InterfaceImplementation? _matchingInterfaceImplementation;
+		public MethodDefinition Base { get; }
 
-		public OverrideInformation (MethodDefinition @base, MethodDefinition @override, ITryResolveMetadata resolver, InterfaceImplementation? matchingInterfaceImplementation = null)
-		{
-			_pair = new OverridePair (@base, @override);
-			_matchingInterfaceImplementation = matchingInterfaceImplementation;
-			this.resolver = resolver;
-		}
-		public readonly record struct OverridePair (MethodDefinition Base, MethodDefinition Override)
-		{
-			public bool IsStaticInterfaceMethodPair () => Base.DeclaringType.IsInterface && Base.IsStatic && Override.IsStatic;
-			public InterfaceImplementation? GetMatchingInterfaceImplementation (ITryResolveMetadata resolver)
-			{
-				if (!Base.DeclaringType.IsInterface)
-					return null;
-				var interfaceType = Base.DeclaringType;
-				foreach (var @interface in Override.DeclaringType.Interfaces) {
-					if (resolver.TryResolve (@interface.InterfaceType)?.Equals (interfaceType) == true) {
-						return @interface;
-					}
-				}
-				return null;
-			}
-		}
+		public MethodDefinition Override { get; }
 
-		public MethodDefinition Base { get => _pair.Base; }
-		public MethodDefinition Override { get => _pair.Override; }
-		public InterfaceImplementation? MatchingInterfaceImplementation {
-			get {
-				if (_matchingInterfaceImplementation is not null)
-					return _matchingInterfaceImplementation;
-				_matchingInterfaceImplementation = _pair.GetMatchingInterfaceImplementation (resolver);
-				return _matchingInterfaceImplementation;
-			}
-		}
+		internal InterfaceImplementor? InterfaceImplementor { get; }
 
-		public bool IsOverrideOfInterfaceMember {
-			get {
-				if (MatchingInterfaceImplementation != null)
-					return true;
-
-				return Base.DeclaringType.IsInterface;
-			}
+		internal OverrideInformation (MethodDefinition @base, MethodDefinition @override, InterfaceImplementor? interfaceImplementor = null)
+		{
+			Base = @base;
+			Override = @override;
+			InterfaceImplementor = interfaceImplementor;
+			// Ensure we have an interface implementation if the base method is from an interface and the override method is on a class
+			Debug.Assert(@base.DeclaringType.IsInterface && interfaceImplementor != null
+						|| !@base.DeclaringType.IsInterface && interfaceImplementor == null);
+			// Ensure the interfaceImplementor is for the interface we expect
+			Debug.Assert (@base.DeclaringType.IsInterface ? interfaceImplementor!.InterfaceType == @base.DeclaringType : true);
 		}
 
-		public TypeDefinition? InterfaceType {
-			get {
-				if (!IsOverrideOfInterfaceMember)
-					return null;
+		public InterfaceImplementation? MatchingInterfaceImplementation
+			=> InterfaceImplementor?.InterfaceImplementation;
 
-				if (MatchingInterfaceImplementation != null)
-					return resolver.TryResolve (MatchingInterfaceImplementation.InterfaceType);
-
-				return Base.DeclaringType;
-			}
-		}
+		public TypeDefinition? InterfaceType
+			=> InterfaceImplementor?.InterfaceType;
 
-		public bool IsStaticInterfaceMethodPair => _pair.IsStaticInterfaceMethodPair ();
+		[MemberNotNullWhen (true, nameof (InterfaceImplementor), nameof (MatchingInterfaceImplementation))]
+		public bool IsOverrideOfInterfaceMember
+			=> InterfaceImplementor != null;
 	}
 }
diff --git a/src/tools/illink/src/linker/Linker/TypeComparisonMode.cs b/src/tools/illink/src/linker/Linker/TypeComparisonMode.cs
new file mode 100644
index 000000000000..e3f95bd14855
--- /dev/null
+++ b/src/tools/illink/src/linker/Linker/TypeComparisonMode.cs
@@ -0,0 +1,17 @@
+﻿// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+namespace Mono.Linker
+{
+	// Copied from https://github.com/jbevain/cecil/blob/master/Mono.Cecil/TypeComparisonMode.cs
+	internal enum TypeComparisonMode
+	{
+		Exact,
+		SignatureOnly,
+
+		/// <summary>
+		/// Types can be in different assemblies, as long as the module, assembly, and type names match they will be considered equal
+		/// </summary>
+		SignatureOnlyLoose
+	}
+}
diff --git a/src/tools/illink/src/linker/Linker/TypeMapInfo.cs b/src/tools/illink/src/linker/Linker/TypeMapInfo.cs
index a2f118adf9fb..d2813c8982c4 100644
--- a/src/tools/illink/src/linker/Linker/TypeMapInfo.cs
+++ b/src/tools/illink/src/linker/Linker/TypeMapInfo.cs
@@ -29,9 +29,11 @@
 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 //
 
+using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
+using System.Linq;
 using Mono.Cecil;
 
 namespace Mono.Linker
@@ -43,7 +45,7 @@ public class TypeMapInfo
 		readonly LinkContext context;
 		protected readonly Dictionary<MethodDefinition, List<OverrideInformation>> base_methods = new Dictionary<MethodDefinition, List<OverrideInformation>> ();
 		protected readonly Dictionary<MethodDefinition, List<OverrideInformation>> override_methods = new Dictionary<MethodDefinition, List<OverrideInformation>> ();
-		protected readonly Dictionary<MethodDefinition, List<(TypeDefinition InstanceType, InterfaceImplementation ImplementationProvider, MethodDefinition DefaultImplementationMethod)>> default_interface_implementations = new Dictionary<MethodDefinition, List<(TypeDefinition, InterfaceImplementation, MethodDefinition)>> ();
+		protected readonly Dictionary<MethodDefinition, List<OverrideInformation>> default_interface_implementations = new Dictionary<MethodDefinition, List<OverrideInformation>> ();
 
 		public TypeMapInfo (LinkContext context)
 		{
@@ -92,47 +94,34 @@ public void EnsureProcessed (AssemblyDefinition assembly)
 		/// DefaultInterfaceMethod is the method that implements <paramref name="method"/>.
 		/// </summary>
 		/// <param name="method">The interface method to find default implementations for</param>
-		public IEnumerable<(TypeDefinition ImplementingType, InterfaceImplementation InterfaceImpl, MethodDefinition DefaultImplementationMethod)>? GetDefaultInterfaceImplementations (MethodDefinition baseMethod)
+		public IEnumerable<OverrideInformation>? GetDefaultInterfaceImplementations (MethodDefinition baseMethod)
 		{
 			default_interface_implementations.TryGetValue (baseMethod, out var ret);
 			return ret;
 		}
 
-		public void AddBaseMethod (MethodDefinition method, MethodDefinition @base, InterfaceImplementation? matchingInterfaceImplementation)
+		public void AddBaseMethod (MethodDefinition method, MethodDefinition @base, InterfaceImplementor? interfaceImplementor)
 		{
-			if (!base_methods.TryGetValue (method, out List<OverrideInformation>? methods)) {
-				methods = new List<OverrideInformation> ();
-				base_methods[method] = methods;
-			}
-
-			methods.Add (new OverrideInformation (@base, method, context, matchingInterfaceImplementation));
+			base_methods.AddToList (method, new OverrideInformation (@base, method, interfaceImplementor));
 		}
 
-		public void AddOverride (MethodDefinition @base, MethodDefinition @override, InterfaceImplementation? matchingInterfaceImplementation = null)
+		public void AddOverride (MethodDefinition @base, MethodDefinition @override, InterfaceImplementor? interfaceImplementor = null)
 		{
-			if (!override_methods.TryGetValue (@base, out List<OverrideInformation>? methods)) {
-				methods = new List<OverrideInformation> ();
-				override_methods.Add (@base, methods);
-			}
-
-			methods.Add (new OverrideInformation (@base, @override, context, matchingInterfaceImplementation));
+			override_methods.AddToList (@base, new OverrideInformation (@base, @override, interfaceImplementor));
 		}
 
-		public void AddDefaultInterfaceImplementation (MethodDefinition @base, TypeDefinition implementingType, (InterfaceImplementation, MethodDefinition) matchingInterfaceImplementation)
+		public void AddDefaultInterfaceImplementation (MethodDefinition @base, InterfaceImplementor interfaceImplementor, MethodDefinition defaultImplementationMethod)
 		{
 			Debug.Assert(@base.DeclaringType.IsInterface);
-			if (!default_interface_implementations.TryGetValue (@base, out var implementations)) {
-				implementations = new List<(TypeDefinition, InterfaceImplementation, MethodDefinition)> ();
-				default_interface_implementations.Add (@base, implementations);
-			}
-
-			implementations.Add ((implementingType, matchingInterfaceImplementation.Item1, matchingInterfaceImplementation.Item2));
+			default_interface_implementations.AddToList (@base, new OverrideInformation (@base, defaultImplementationMethod, interfaceImplementor));
 		}
 
+		Dictionary<TypeDefinition, List<(TypeReference, List<InterfaceImplementation>)>> interfaces = new ();
 		protected virtual void MapType (TypeDefinition type)
 		{
 			MapVirtualMethods (type);
 			MapInterfaceMethodsInTypeHierarchy (type);
+			interfaces[type] = GetRecursiveInterfaceImplementations (type);
 
 			if (!type.HasNestedTypes)
 				return;
@@ -141,6 +130,50 @@ protected virtual void MapType (TypeDefinition type)
 				MapType (nested);
 		}
 
+		internal List<(TypeReference, List<InterfaceImplementation>)>? GetRecursiveInterfaces (TypeDefinition type)
+		{
+			if (interfaces.TryGetValue (type, out var value))
+				return value;
+			return null;
+		}
+
+		List<(TypeReference, List<InterfaceImplementation>)> GetRecursiveInterfaceImplementations (TypeDefinition type)
+		{
+			List<(TypeReference, List<InterfaceImplementation>)> firstImplementationChain = new ();
+
+			AddRecursiveInterfaces (type, [], firstImplementationChain, context);
+			Debug.Assert (firstImplementationChain.All (kvp => context.Resolve (kvp.Item1) == context.Resolve (kvp.Item2.Last ().InterfaceType)));
+
+			return firstImplementationChain;
+
+			static void AddRecursiveInterfaces (TypeReference typeRef, IEnumerable<InterfaceImplementation> pathToType, List<(TypeReference, List<InterfaceImplementation>)> firstImplementationChain, LinkContext Context)
+			{
+				var type = Context.TryResolve (typeRef);
+				if (type is null)
+					return;
+				// Get all explicit interfaces of this type
+				foreach (var iface in type.Interfaces) {
+					var interfaceType = iface.InterfaceType.TryInflateFrom (typeRef, Context);
+					if (interfaceType is null) {
+						continue;
+					}
+					if (!firstImplementationChain.Any (i => TypeReferenceEqualityComparer.AreEqual (i.Item1, interfaceType, Context))) {
+						firstImplementationChain.Add ((interfaceType, pathToType.Append (iface).ToList ()));
+					}
+				}
+
+				// Recursive interfaces after all direct interfaces to preserve Inherit/Implement tree order
+				foreach (var iface in type.Interfaces) {
+					// If we can't resolve the interface type we can't find recursive interfaces
+					var ifaceDirectlyOnType = iface.InterfaceType.TryInflateFrom (typeRef, Context);
+					if (ifaceDirectlyOnType is null) {
+						continue;
+					}
+					AddRecursiveInterfaces (ifaceDirectlyOnType, pathToType.Append (iface), firstImplementationChain, Context);
+				}
+			}
+		}
+
 		void MapInterfaceMethodsInTypeHierarchy (TypeDefinition type)
 		{
 			if (!type.HasInterfaces)
@@ -168,20 +201,20 @@ void MapInterfaceMethodsInTypeHierarchy (TypeDefinition type)
 						// Try to find an implementation with a name/sig match on the current type
 						MethodDefinition? exactMatchOnType = TryMatchMethod (type, interfaceMethod);
 						if (exactMatchOnType != null) {
-							AnnotateMethods (resolvedInterfaceMethod, exactMatchOnType);
+							AnnotateMethods (resolvedInterfaceMethod, exactMatchOnType, new (type, interfaceImpl.OriginalImpl, resolvedInterfaceMethod.DeclaringType, context));
 							continue;
 						}
 
 						// Next try to find an implementation with a name/sig match in the base hierarchy
 						var @base = GetBaseMethodInTypeHierarchy (type, interfaceMethod);
 						if (@base != null) {
-							AnnotateMethods (resolvedInterfaceMethod, @base, interfaceImpl.OriginalImpl);
+							AnnotateMethods (resolvedInterfaceMethod, @base, new (type, interfaceImpl.OriginalImpl, resolvedInterfaceMethod.DeclaringType, context));
 							continue;
 						}
 					}
 
 					// Look for a default implementation last.
-					FindAndAddDefaultInterfaceImplementations (type, resolvedInterfaceMethod);
+					FindAndAddDefaultInterfaceImplementations (type, type, resolvedInterfaceMethod, interfaceImpl.OriginalImpl);
 				}
 			}
 		}
@@ -211,24 +244,29 @@ void MapVirtualMethod (MethodDefinition method)
 			if (@base == null)
 				return;
 
+			Debug.Assert(!@base.DeclaringType.IsInterface);
+
 			AnnotateMethods (@base, method);
 		}
 
 		void MapOverrides (MethodDefinition method)
 		{
-			foreach (MethodReference override_ref in method.Overrides) {
-				MethodDefinition? @override = context.TryResolve (override_ref);
-				if (@override == null)
+			foreach (MethodReference baseMethodRef in method.Overrides) {
+				MethodDefinition? baseMethod = context.TryResolve (baseMethodRef);
+				if (baseMethod == null)
 					continue;
-
-				AnnotateMethods (@override, method);
+				if (baseMethod.DeclaringType.IsInterface) {
+					AnnotateMethods (baseMethod, method, InterfaceImplementor.Create (method.DeclaringType, baseMethod.DeclaringType, context));
+				} else {
+					AnnotateMethods (baseMethod, method);
+				}
 			}
 		}
 
-		void AnnotateMethods (MethodDefinition @base, MethodDefinition @override, InterfaceImplementation? matchingInterfaceImplementation = null)
+		void AnnotateMethods (MethodDefinition @base, MethodDefinition @override, InterfaceImplementor? interfaceImplementor = null)
 		{
-			AddBaseMethod (@override, @base, matchingInterfaceImplementation);
-			AddOverride (@base, @override, matchingInterfaceImplementation);
+			AddBaseMethod (@override, @base, interfaceImplementor);
+			AddOverride (@base, @override, interfaceImplementor);
 		}
 
 		MethodDefinition? GetBaseMethodInTypeHierarchy (MethodDefinition method)
@@ -279,16 +317,23 @@ void AnnotateMethods (MethodDefinition @base, MethodDefinition @override, Interf
 			return context.TryResolve (type)?.BaseType;
 		}
 
-		// Returns a list of default implementations of the given interface method on this type.
-		// Note that this returns a list to potentially cover the diamond case (more than one
-		// most specific implementation of the given interface methods). ILLink needs to preserve
-		// all the implementations so that the proper exception can be thrown at runtime.
-		void FindAndAddDefaultInterfaceImplementations (TypeDefinition type, MethodDefinition interfaceMethod)
+		/// <summary>
+		/// Returns a list of default implementations of the given interface method on this type.
+		/// Note that this returns a list to potentially cover the diamond case (more than one
+		/// most specific implementation of the given interface methods). ILLink needs to preserve
+		/// all the implementations so that the proper exception can be thrown at runtime.
+		/// </summary>
+		/// <param name="type">The type that implements (directly or via a base interface) the declaring interface of <paramref name="interfaceMethod"/></param>
+		/// <param name="interfaceMethod">The method to find a default implementation for</param>
+		/// <param name="implOfInterface">
+		/// The InterfaceImplementation on <paramref name="type"/> that points to the DeclaringType of <paramref name="interfaceMethod"/>.
+		/// </param>
+		void FindAndAddDefaultInterfaceImplementations (TypeDefinition typeThatImplementsInterface, TypeDefinition typeThatMayHaveDIM, MethodDefinition interfaceMethodToBeImplemented, InterfaceImplementation originalInterfaceImpl)
 		{
 			// Go over all interfaces, trying to find a method that is an explicit MethodImpl of the
 			// interface method in question.
 
-			foreach (var interfaceImpl in type.Interfaces) {
+			foreach (var interfaceImpl in typeThatMayHaveDIM.Interfaces) {
 				var potentialImplInterface = context.TryResolve (interfaceImpl.InterfaceType);
 				if (potentialImplInterface == null)
 					continue;
@@ -296,9 +341,9 @@ void FindAndAddDefaultInterfaceImplementations (TypeDefinition type, MethodDefin
 				bool foundImpl = false;
 
 				foreach (var potentialImplMethod in potentialImplInterface.Methods) {
-					if (potentialImplMethod == interfaceMethod &&
+					if (potentialImplMethod == interfaceMethodToBeImplemented &&
 						!potentialImplMethod.IsAbstract) {
-						AddDefaultInterfaceImplementation (interfaceMethod, type, (interfaceImpl, potentialImplMethod));
+						AddDefaultInterfaceImplementation (interfaceMethodToBeImplemented, new (typeThatImplementsInterface, originalInterfaceImpl, interfaceMethodToBeImplemented.DeclaringType, context), potentialImplMethod);
 						foundImpl = true;
 						break;
 					}
@@ -307,9 +352,9 @@ void FindAndAddDefaultInterfaceImplementations (TypeDefinition type, MethodDefin
 						continue;
 
 					// This method is an override of something. Let's see if it's the method we are looking for.
-					foreach (var @override in potentialImplMethod.Overrides) {
-						if (context.TryResolve (@override) == interfaceMethod) {
-							AddDefaultInterfaceImplementation (interfaceMethod, type, (interfaceImpl, @potentialImplMethod));
+					foreach (var baseMethod in potentialImplMethod.Overrides) {
+						if (context.TryResolve (baseMethod) == interfaceMethodToBeImplemented) {
+							AddDefaultInterfaceImplementation (interfaceMethodToBeImplemented, new (typeThatImplementsInterface, originalInterfaceImpl, interfaceMethodToBeImplemented.DeclaringType, context), @potentialImplMethod);
 							foundImpl = true;
 							break;
 						}
@@ -323,7 +368,7 @@ void FindAndAddDefaultInterfaceImplementations (TypeDefinition type, MethodDefin
 				// We haven't found a MethodImpl on the current interface, but one of the interfaces
 				// this interface requires could still provide it.
 				if (!foundImpl) {
-					FindAndAddDefaultInterfaceImplementations (potentialImplInterface, interfaceMethod);
+					FindAndAddDefaultInterfaceImplementations (typeThatImplementsInterface, potentialImplInterface, interfaceMethodToBeImplemented, originalInterfaceImpl);
 				}
 			}
 		}
diff --git a/src/tools/illink/src/linker/Linker/TypeReferenceEqualityComparer.cs b/src/tools/illink/src/linker/Linker/TypeReferenceEqualityComparer.cs
new file mode 100644
index 000000000000..ee0e0d48ba9b
--- /dev/null
+++ b/src/tools/illink/src/linker/Linker/TypeReferenceEqualityComparer.cs
@@ -0,0 +1,270 @@
+﻿// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Mono.Cecil;
+
+namespace Mono.Linker
+{
+	// Copied from https://github.com/jbevain/cecil/blob/master/Mono.Cecil/TypeReferenceComparer.cs
+	internal sealed class TypeReferenceEqualityComparer : EqualityComparer<TypeReference>
+	{
+		public readonly ITryResolveMetadata _resolver;
+
+		public TypeReferenceEqualityComparer(ITryResolveMetadata resolver)
+		{
+			_resolver = resolver;
+		}
+
+		public override bool Equals (TypeReference? x, TypeReference? y)
+		{
+			return AreEqual (x, y, _resolver);
+		}
+
+		public override int GetHashCode (TypeReference obj)
+		{
+			return GetHashCodeFor (obj);
+		}
+
+		public static bool AreEqual (TypeReference? a, TypeReference? b, ITryResolveMetadata resolver, TypeComparisonMode comparisonMode = TypeComparisonMode.Exact)
+		{
+			if (ReferenceEquals (a, b))
+				return true;
+
+			if (a == null || b == null)
+				return false;
+
+			var aMetadataType = a.MetadataType;
+			var bMetadataType = b.MetadataType;
+
+			if (aMetadataType == MetadataType.GenericInstance || bMetadataType == MetadataType.GenericInstance) {
+				if (aMetadataType != bMetadataType)
+					return false;
+
+				return AreEqual ((GenericInstanceType) a, (GenericInstanceType) b, resolver, comparisonMode);
+			}
+
+			if (aMetadataType == MetadataType.Array || bMetadataType == MetadataType.Array) {
+				if (aMetadataType != bMetadataType)
+					return false;
+
+				var a1 = (ArrayType) a;
+				var b1 = (ArrayType) b;
+				if (a1.Rank != b1.Rank)
+					return false;
+
+				return AreEqual (a1.ElementType, b1.ElementType, resolver, comparisonMode);
+			}
+
+			if (aMetadataType == MetadataType.Var || bMetadataType == MetadataType.Var) {
+				if (aMetadataType != bMetadataType)
+					return false;
+
+				return AreEqual ((GenericParameter) a, (GenericParameter) b, resolver, comparisonMode);
+			}
+
+			if (aMetadataType == MetadataType.MVar || bMetadataType == MetadataType.MVar) {
+				if (aMetadataType != bMetadataType)
+					return false;
+
+				return AreEqual ((GenericParameter) a, (GenericParameter) b, resolver, comparisonMode);
+			}
+
+			if (aMetadataType == MetadataType.ByReference || bMetadataType == MetadataType.ByReference) {
+				if (aMetadataType != bMetadataType)
+					return false;
+
+				return AreEqual (((ByReferenceType) a).ElementType, ((ByReferenceType) b).ElementType, resolver, comparisonMode);
+			}
+
+			if (aMetadataType == MetadataType.Pointer || bMetadataType == MetadataType.Pointer) {
+				if (aMetadataType != bMetadataType)
+					return false;
+
+				return AreEqual (((PointerType) a).ElementType, ((PointerType) b).ElementType, resolver, comparisonMode);
+			}
+
+			if (aMetadataType == MetadataType.RequiredModifier || bMetadataType == MetadataType.RequiredModifier) {
+				if (aMetadataType != bMetadataType)
+					return false;
+
+				var a1 = (RequiredModifierType) a;
+				var b1 = (RequiredModifierType) b;
+
+				return AreEqual (a1.ModifierType, b1.ModifierType, resolver, comparisonMode) && AreEqual (a1.ElementType, b1.ElementType, resolver, comparisonMode);
+			}
+
+			if (aMetadataType == MetadataType.OptionalModifier || bMetadataType == MetadataType.OptionalModifier) {
+				if (aMetadataType != bMetadataType)
+					return false;
+
+				var a1 = (OptionalModifierType) a;
+				var b1 = (OptionalModifierType) b;
+
+				return AreEqual (a1.ModifierType, b1.ModifierType, resolver, comparisonMode) && AreEqual (a1.ElementType, b1.ElementType, resolver, comparisonMode);
+			}
+
+			if (aMetadataType == MetadataType.Pinned || bMetadataType == MetadataType.Pinned) {
+				if (aMetadataType != bMetadataType)
+					return false;
+
+				return AreEqual (((PinnedType) a).ElementType, ((PinnedType) b).ElementType, resolver, comparisonMode);
+			}
+
+			if (aMetadataType == MetadataType.Sentinel || bMetadataType == MetadataType.Sentinel) {
+				if (aMetadataType != bMetadataType)
+					return false;
+
+				return AreEqual (((SentinelType) a).ElementType, ((SentinelType) b).ElementType, resolver, comparisonMode);
+			}
+
+			if (!a.Name.Equals (b.Name) || !a.Namespace.Equals (b.Namespace))
+				return false;
+
+			var xDefinition = resolver.TryResolve (a);
+			var yDefinition = resolver.TryResolve (b);
+			if (xDefinition == null || yDefinition == null)
+				return false;
+
+			// For loose signature the types could be in different assemblies, as long as the type names match we will consider them equal
+			if (comparisonMode == TypeComparisonMode.SignatureOnlyLoose) {
+				if (xDefinition.Module.Name != yDefinition.Module.Name)
+					return false;
+
+				if (xDefinition.Module.Assembly.Name.Name != yDefinition.Module.Assembly.Name.Name)
+					return false;
+
+				return xDefinition.FullName == yDefinition.FullName;
+			}
+
+			return xDefinition == yDefinition;
+		}
+
+		static bool AreEqual (GenericParameter a, GenericParameter b, ITryResolveMetadata resolver, TypeComparisonMode comparisonMode = TypeComparisonMode.Exact)
+		{
+			if (ReferenceEquals (a, b))
+				return true;
+
+			if (a.Position != b.Position)
+				return false;
+
+			if (a.Type != b.Type)
+				return false;
+
+			var aOwnerType = a.Owner as TypeReference;
+			if (aOwnerType != null && AreEqual (aOwnerType, b.Owner as TypeReference, resolver, comparisonMode))
+				return true;
+
+			var aOwnerMethod = a.Owner as MethodReference;
+			if (aOwnerMethod != null && comparisonMode != TypeComparisonMode.SignatureOnlyLoose && MethodReferenceComparer.AreEqual (aOwnerMethod, b.Owner as MethodReference, resolver))
+				return true;
+
+			return comparisonMode == TypeComparisonMode.SignatureOnly || comparisonMode == TypeComparisonMode.SignatureOnlyLoose;
+		}
+
+		static bool AreEqual (GenericInstanceType a, GenericInstanceType b, ITryResolveMetadata resolver, TypeComparisonMode comparisonMode = TypeComparisonMode.Exact)
+		{
+			if (ReferenceEquals (a, b))
+				return true;
+
+			var aGenericArgumentsCount = a.GenericArguments.Count;
+			if (aGenericArgumentsCount != b.GenericArguments.Count)
+				return false;
+
+			if (!AreEqual (a.ElementType, b.ElementType, resolver, comparisonMode))
+				return false;
+
+			for (int i = 0; i < aGenericArgumentsCount; i++)
+				if (!AreEqual (a.GenericArguments[i], b.GenericArguments[i], resolver, comparisonMode))
+					return false;
+
+			return true;
+		}
+
+		public static int GetHashCodeFor (TypeReference obj)
+		{
+			// a very good prime number
+			const int hashCodeMultiplier = 486187739;
+			// prime numbers
+			const int genericInstanceTypeMultiplier = 31;
+			const int byReferenceMultiplier = 37;
+			const int pointerMultiplier = 41;
+			const int requiredModifierMultiplier = 43;
+			const int optionalModifierMultiplier = 47;
+			const int pinnedMultiplier = 53;
+			const int sentinelMultiplier = 59;
+
+			var metadataType = obj.MetadataType;
+
+			if (metadataType == MetadataType.GenericInstance) {
+				var genericInstanceType = (GenericInstanceType) obj;
+				var hashCode = GetHashCodeFor (genericInstanceType.ElementType) * hashCodeMultiplier + genericInstanceTypeMultiplier;
+				for (var i = 0; i < genericInstanceType.GenericArguments.Count; i++)
+					hashCode = hashCode * hashCodeMultiplier + GetHashCodeFor (genericInstanceType.GenericArguments[i]);
+				return hashCode;
+			}
+
+			if (metadataType == MetadataType.Array) {
+				var arrayType = (ArrayType) obj;
+				return GetHashCodeFor (arrayType.ElementType) * hashCodeMultiplier + arrayType.Rank.GetHashCode ();
+			}
+
+			if (metadataType == MetadataType.Var || metadataType == MetadataType.MVar) {
+				var genericParameter = (GenericParameter) obj;
+				var hashCode = genericParameter.Position.GetHashCode () * hashCodeMultiplier + ((int) metadataType).GetHashCode ();
+
+				var ownerTypeReference = genericParameter.Owner as TypeReference;
+				if (ownerTypeReference != null)
+					return hashCode * hashCodeMultiplier + GetHashCodeFor (ownerTypeReference);
+
+				var ownerMethodReference = genericParameter.Owner as MethodReference;
+				if (ownerMethodReference != null)
+					return hashCode * hashCodeMultiplier + MethodReferenceComparer.GetHashCodeFor (ownerMethodReference);
+
+				throw new InvalidOperationException ("Generic parameter encountered with invalid owner");
+			}
+
+			if (metadataType == MetadataType.ByReference) {
+				var byReferenceType = (ByReferenceType) obj;
+				return GetHashCodeFor (byReferenceType.ElementType) * hashCodeMultiplier * byReferenceMultiplier;
+			}
+
+			if (metadataType == MetadataType.Pointer) {
+				var pointerType = (PointerType) obj;
+				return GetHashCodeFor (pointerType.ElementType) * hashCodeMultiplier * pointerMultiplier;
+			}
+
+			if (metadataType == MetadataType.RequiredModifier) {
+				var requiredModifierType = (RequiredModifierType) obj;
+				var hashCode = GetHashCodeFor (requiredModifierType.ElementType) * requiredModifierMultiplier;
+				hashCode = hashCode * hashCodeMultiplier + GetHashCodeFor (requiredModifierType.ModifierType);
+				return hashCode;
+			}
+
+			if (metadataType == MetadataType.OptionalModifier) {
+				var optionalModifierType = (OptionalModifierType) obj;
+				var hashCode = GetHashCodeFor (optionalModifierType.ElementType) * optionalModifierMultiplier;
+				hashCode = hashCode * hashCodeMultiplier + GetHashCodeFor (optionalModifierType.ModifierType);
+				return hashCode;
+			}
+
+			if (metadataType == MetadataType.Pinned) {
+				var pinnedType = (PinnedType) obj;
+				return GetHashCodeFor (pinnedType.ElementType) * hashCodeMultiplier * pinnedMultiplier;
+			}
+
+			if (metadataType == MetadataType.Sentinel) {
+				var sentinelType = (SentinelType) obj;
+				return GetHashCodeFor (sentinelType.ElementType) * hashCodeMultiplier * sentinelMultiplier;
+			}
+
+			if (metadataType == MetadataType.FunctionPointer) {
+				throw new NotImplementedException ("We currently don't handle function pointer types.");
+			}
+
+			return obj.Namespace.GetHashCode () * hashCodeMultiplier + obj.FullName.GetHashCode ();
+		}
+	}
+}
diff --git a/src/tools/illink/src/linker/Linker/TypeReferenceExtensions.cs b/src/tools/illink/src/linker/Linker/TypeReferenceExtensions.cs
index 5092fe1158e3..6189eae6d794 100644
--- a/src/tools/illink/src/linker/Linker/TypeReferenceExtensions.cs
+++ b/src/tools/illink/src/linker/Linker/TypeReferenceExtensions.cs
@@ -151,6 +151,13 @@ void parseArrayDimensions (ArrayType at)
 			return null;
 		}
 
+		public static TypeReference? TryInflateFrom (this TypeReference typeToInflate, TypeReference maybeGenericInstanceProvider, ITryResolveMetadata resolver)
+		{
+			if (maybeGenericInstanceProvider is GenericInstanceType genericInstanceProvider)
+				return InflateGenericType (genericInstanceProvider, typeToInflate, resolver);
+			return typeToInflate;
+		}
+
 		public static IEnumerable<(TypeReference InflatedInterface, InterfaceImplementation OriginalImpl)> GetInflatedInterfaces (this TypeReference typeRef, ITryResolveMetadata resolver)
 		{
 			var typeDef = resolver.TryResolve (typeRef);
diff --git a/src/tools/illink/src/linker/Mono.Linker.csproj b/src/tools/illink/src/linker/Mono.Linker.csproj
index e5cdbb86c5eb..f19ff4063e68 100644
--- a/src/tools/illink/src/linker/Mono.Linker.csproj
+++ b/src/tools/illink/src/linker/Mono.Linker.csproj
@@ -15,7 +15,8 @@
     <DisablePackageBaselineValidation>true</DisablePackageBaselineValidation>
     <!-- There are currently no translations, so the satellite assemblies are a waste of space. -->
     <EnableXlfLocalization>false</EnableXlfLocalization>
-    <NoWarn Condition="'$(DotNetBuildFromSource)' == 'true'">$(NoWarn);CS8524</NoWarn>
+    <NoWarn Condition="'$(DotNetBuildSourceOnly)' == 'true'">$(NoWarn);CS8524</NoWarn>
+    <NoWarn>$(NoWarn);CA1866</NoWarn>
     <ContractProject>$(MSBuildThisFileDirectory)ref\Mono.Linker.csproj</ContractProject>
     <RollForward>Major</RollForward>
     <UseAppHost>false</UseAppHost>
@@ -76,7 +77,7 @@
   </ItemGroup>
 
   <ItemGroup>
-    <PackageReference Condition="'$(DotNetBuildFromSource)' != 'true'" Include="StaticCs" Version="$(StaticCsVersion)">
+    <PackageReference Condition="'$(DotNetBuildSourceOnly)' != 'true'" Include="StaticCs" Version="$(StaticCsVersion)">
       <PrivateAssets>all</PrivateAssets>
       <ExcludeAssets>contentfiles</ExcludeAssets> <!-- We include our own copy of the ClosedAttribute to work in source build -->
     </PackageReference>
@@ -84,7 +85,7 @@
   </ItemGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.CodeAnalysis.BannedApiAnalyzers" Version="$(MicrosoftCodeAnalysisBannedApiAnalyzersVersion)" Condition="'$(DotNetBuildFromSource)' != 'true'">
+    <PackageReference Include="Microsoft.CodeAnalysis.BannedApiAnalyzers" Version="$(MicrosoftCodeAnalysisBannedApiAnalyzersVersion)" Condition="'$(DotNetBuildSourceOnly)' != 'true'">
       <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
       <PrivateAssets>all</PrivateAssets>
     </PackageReference>
diff --git a/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/DataFlowTests.cs b/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/DataFlowTests.cs
index 2b1cf973d8a2..087c7afea46a 100644
--- a/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/DataFlowTests.cs
+++ b/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/DataFlowTests.cs
@@ -149,6 +149,12 @@ public Task FeatureCheckDataFlow ()
 			return RunTest ();
 		}
 
+		[Fact]
+		public Task FeatureGuardAttributeDataFlow ()
+		{
+			return RunTest ();
+		}
+
 		[Fact]
 		public Task FieldDataFlow ()
 		{
diff --git a/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/SubstitutionsTests.cs b/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/SubstitutionsTests.cs
new file mode 100644
index 000000000000..551284ef38a7
--- /dev/null
+++ b/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/SubstitutionsTests.cs
@@ -0,0 +1,17 @@
+using System;
+using System.Threading.Tasks;
+using Xunit;
+
+namespace ILLink.RoslynAnalyzer.Tests
+{
+	public sealed partial class SubstitutionsTests : LinkerTestBase
+	{
+		protected override string TestSuiteName => "Substitutions";
+
+		[Fact]
+		public Task FeatureGuardSubstitutions ()
+		{
+			return RunTest ();
+		}
+	}
+}
diff --git a/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/Inheritance.Interfaces.DefaultInterfaceMethodsTests.g.cs b/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/Inheritance.Interfaces.DefaultInterfaceMethodsTests.g.cs
index 4b3f387a3901..19189bfcd017 100644
--- a/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/Inheritance.Interfaces.DefaultInterfaceMethodsTests.g.cs
+++ b/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/Inheritance.Interfaces.DefaultInterfaceMethodsTests.g.cs
@@ -15,6 +15,12 @@ public Task DefaultInterfaceMethodCallIntoClass ()
 			return RunTest (allowMissingWarnings: true);
 		}
 
+		[Fact]
+		public Task DimProvidedByRecursiveInterface ()
+		{
+			return RunTest (allowMissingWarnings: true);
+		}
+
 		[Fact]
 		public Task GenericDefaultInterfaceMethods ()
 		{
@@ -39,6 +45,12 @@ public Task MostSpecificDefaultImplementationKeptStatic ()
 			return RunTest (allowMissingWarnings: true);
 		}
 
+		[Fact]
+		public Task MultipleDimsProvidedByRecursiveInterface ()
+		{
+			return RunTest (allowMissingWarnings: true);
+		}
+
 		[Fact]
 		public Task SimpleDefaultInterfaceMethod ()
 		{
@@ -51,6 +63,12 @@ public Task StaticDefaultInterfaceMethodOnStruct ()
 			return RunTest (allowMissingWarnings: true);
 		}
 
+		[Fact]
+		public Task StaticDimProvidedByUnreferencedIfaceInHierarchy ()
+		{
+			return RunTest (allowMissingWarnings: true);
+		}
+
 		[Fact]
 		public Task UnusedDefaultInterfaceImplementation ()
 		{
diff --git a/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/Inheritance.Interfaces.RecursiveInterfacesTests.g.cs b/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/Inheritance.Interfaces.RecursiveInterfacesTests.g.cs
new file mode 100644
index 000000000000..d436348e800b
--- /dev/null
+++ b/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/Inheritance.Interfaces.RecursiveInterfacesTests.g.cs
@@ -0,0 +1,31 @@
+﻿using System;
+using System.Threading.Tasks;
+using Xunit;
+
+namespace ILLink.RoslynAnalyzer.Tests.Inheritance.Interfaces
+{
+	public sealed partial class RecursiveInterfacesTests : LinkerTestBase
+	{
+
+		protected override string TestSuiteName => "Inheritance.Interfaces.RecursiveInterfaces";
+
+		[Fact]
+		public Task GenericInterfaceImplementedRecursively ()
+		{
+			return RunTest (allowMissingWarnings: true);
+		}
+
+		[Fact]
+		public Task InterfaceImplementedRecursively ()
+		{
+			return RunTest (allowMissingWarnings: true);
+		}
+
+		[Fact]
+		public Task RecursiveInterfaceKept ()
+		{
+			return RunTest (allowMissingWarnings: true);
+		}
+
+	}
+}
diff --git a/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/Inheritance.InterfacesTests.g.cs b/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/Inheritance.InterfacesTests.g.cs
index 2e1a2bbcb345..07680c1c9d5e 100644
--- a/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/Inheritance.InterfacesTests.g.cs
+++ b/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/Inheritance.InterfacesTests.g.cs
@@ -15,6 +15,12 @@ public Task CanDisableUnusedInterfaces ()
 			return RunTest (allowMissingWarnings: true);
 		}
 
+		[Fact]
+		public Task InterfaceImplementedThroughBaseInterface ()
+		{
+			return RunTest (allowMissingWarnings: true);
+		}
+
 		[Fact]
 		public Task InterfaceOnUninstantiatedTypeRemoved ()
 		{
@@ -27,6 +33,12 @@ public Task InterfaceVariants ()
 			return RunTest (allowMissingWarnings: true);
 		}
 
+		[Fact]
+		public Task InterfaceVariantsGeneric ()
+		{
+			return RunTest (allowMissingWarnings: true);
+		}
+
 		[Fact]
 		public Task InterfaceWithoutNewSlot ()
 		{
diff --git a/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/SubstitutionsTests.g.cs b/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/SubstitutionsTests.g.cs
index 1dd1a52a1a99..2e3c9ca38841 100644
--- a/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/SubstitutionsTests.g.cs
+++ b/src/tools/illink/test/ILLink.RoslynAnalyzer.Tests/generated/ILLink.RoslynAnalyzer.Tests.Generator/ILLink.RoslynAnalyzer.Tests.TestCaseGenerator/SubstitutionsTests.g.cs
@@ -7,8 +7,6 @@ namespace ILLink.RoslynAnalyzer.Tests
 	public sealed partial class SubstitutionsTests : LinkerTestBase
 	{
 
-		protected override string TestSuiteName => "Substitutions";
-
 		[Fact]
 		public Task EmbeddedFieldSubstitutionsInReferencedAssembly ()
 		{
@@ -45,6 +43,12 @@ public Task EmbeddedSubstitutionsNotProcessedWithIgnoreSubstitutionsAndRemoved (
 			return RunTest (allowMissingWarnings: true);
 		}
 
+		[Fact]
+		public Task FeatureGuardSubstitutionsDisabled ()
+		{
+			return RunTest (allowMissingWarnings: true);
+		}
+
 		[Fact]
 		public Task InitField ()
 		{
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases.Expectations/Support/FeatureGuardAttribute.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases.Expectations/Support/FeatureGuardAttribute.cs
new file mode 100644
index 000000000000..a4351d0fa8ef
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases.Expectations/Support/FeatureGuardAttribute.cs
@@ -0,0 +1,17 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Diagnostics.CodeAnalysis
+{
+    // Allow AttributeTargets.Method for testing invalid usages of a custom FeatureGuardAttribute
+    [AttributeUsage (AttributeTargets.Property | AttributeTargets.Method, Inherited = false, AllowMultiple = true)]
+    public sealed class FeatureGuardAttribute : Attribute
+    {
+        public Type FeatureType { get; }
+
+        public FeatureGuardAttribute (Type featureType)
+        {
+            FeatureType = featureType;
+        }
+    }
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases.Expectations/Support/FeatureSwitchDefinitionAttribute.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases.Expectations/Support/FeatureSwitchDefinitionAttribute.cs
new file mode 100644
index 000000000000..71b030ab299f
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases.Expectations/Support/FeatureSwitchDefinitionAttribute.cs
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Diagnostics.CodeAnalysis
+{
+	[AttributeUsage(AttributeTargets.Property, Inherited = false)]
+	public sealed class FeatureSwitchDefinitionAttribute : Attribute
+	{
+		public string SwitchName { get; }
+
+		public FeatureSwitchDefinitionAttribute (string switchName)
+		{
+			SwitchName = switchName;
+		}
+	}
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/AttributePropertyDataflow.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/AttributePropertyDataflow.cs
index 15523ffb02d6..f953784dc375 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/AttributePropertyDataflow.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/AttributePropertyDataflow.cs
@@ -224,9 +224,7 @@ public static void Test ()
 			// where the owning symbol is not a method.
 			[Kept]
 			[KeptAttributeAttribute (typeof (KeepsPublicMethodsAttribute))]
-			// NativeAot doesn't handle the type name on fields: https://github.com/dotnet/runtime/issues/92259
-			[ExpectedWarning ("IL2105", "Mono.Linker.Tests.Cases.DataFlow.AttributePropertyDataflow+AttributeWithConditionalExpression+ClassWithKeptPublicMethods", ProducedBy = Tool.NativeAot)]
-			[ExpectedWarning ("IL2026", "--ClassWithKeptPublicMethods--", ProducedBy = Tool.Trimmer)]
+			[ExpectedWarning ("IL2026", "--ClassWithKeptPublicMethods--", ProducedBy = Tool.Trimmer | Tool.NativeAot)]
 			[KeepsPublicMethods (TypeName = 1 + 1 == 2 ? "Mono.Linker.Tests.Cases.DataFlow.AttributePropertyDataflow+AttributeWithConditionalExpression+ClassWithKeptPublicMethods" : null)]
 			public static int field;
 
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/Dependencies/TestFeatures.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/Dependencies/TestFeatures.cs
new file mode 100644
index 000000000000..942c9f3586dd
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/Dependencies/TestFeatures.cs
@@ -0,0 +1,12 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace ILLink.RoslynAnalyzer
+{
+	public class TestFeatures
+	{
+		public static bool IsUnreferencedCodeSupported => true;
+
+		public static bool IsAssemblyFilesSupported => true;
+	}
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/DynamicObjects.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/DynamicObjects.cs
index a9f8f00296b5..d048bec91e5d 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/DynamicObjects.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/DynamicObjects.cs
@@ -54,26 +54,27 @@ static void MethodWithDynamicParameter (dynamic arg)
 				arg.MethodWithDynamicParameter (arg);
 			}
 
-			[ExpectedWarning ("IL2026", "Microsoft.CSharp.RuntimeBinder.Binder.InvokeConstructor")]
-			[ExpectedWarning ("IL3050", ProducedBy = Tool.NativeAot)] // https://github.com/dotnet/runtime/issues/94427
-			static void ObjectCreationDynamicArgument ()
-			{
-				dynamic dynamicObject = "Some string";
-				var x = new ClassWithDynamicCtor (dynamicObject);
-			}
-
-			class ClassWithDynamicCtor
-			{
-				public ClassWithDynamicCtor (dynamic arg)
-				{
-				}
-			}
+			// Roslyn codegen no longer produces a call to Binder.InvokeConstructor.
+			// [ExpectedWarning ("IL2026", "Microsoft.CSharp.RuntimeBinder.Binder.InvokeConstructor")]
+			// [ExpectedWarning ("IL3050", ProducedBy = Tool.NativeAot)] // https://github.com/dotnet/runtime/issues/94427
+			// static void ObjectCreationDynamicArgument ()
+			// {
+			// 	dynamic dynamicObject = "Some string";
+			// 	var x = new ClassWithDynamicCtor (dynamicObject);
+			// }
+
+			// class ClassWithDynamicCtor
+			// {
+			// 	public ClassWithDynamicCtor (dynamic arg)
+			// 	{
+			// 	}
+			// }
 
 			public static void Test ()
 			{
 				DynamicArgument ();
 				DynamicParameter ();
-				ObjectCreationDynamicArgument ();
+				// ObjectCreationDynamicArgument ();
 			}
 		}
 
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/ExponentialDataFlow.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/ExponentialDataFlow.cs
index 55d07efff70c..2f749beae0f8 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/ExponentialDataFlow.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/ExponentialDataFlow.cs
@@ -58,7 +58,7 @@ class GenericTypeWithRequires<[DynamicallyAccessedMembers (DynamicallyAccessedMe
 			{
 			}
 
-			[ExpectedWarning ("IL3050", ProducedBy = Tool.Analyzer | Tool.NativeAot)]
+			[ExpectedWarning ("IL3050", ProducedBy = Tool.Analyzer)]
 			[ExpectedWarning ("IL2090", "'T'")]
 			public static void Test<T> ()
 			{
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/FeatureCheckDataFlow.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/FeatureCheckDataFlow.cs
index 29f18ea70638..8c8c3baf5ab5 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/FeatureCheckDataFlow.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/FeatureCheckDataFlow.cs
@@ -20,7 +20,8 @@ namespace Mono.Linker.Tests.Cases.DataFlow
 	// Note: the XML must be passed as an embedded resource named ILLink.Substitutions.xml,
 	// not as a separate substitution file, for it to work with NativeAot.
 	// Related: https://github.com/dotnet/runtime/issues/88647
-	[SetupCompileResource ("FeatureCheckDataFlowTestSubstitutions.xml", "ILLink.Substitutions.xml")]
+	[SetupCompileBefore ("TestFeatures.dll", new[] { "Dependencies/TestFeatures.cs" },
+		resources: new object[] { new [] { "FeatureCheckDataFlowTestSubstitutions.xml", "ILLink.Substitutions.xml" } })]
 	[IgnoreSubstitutions (false)]
 	public class FeatureCheckDataFlow
 	{
@@ -525,14 +526,14 @@ static void CallTestUnreferencedCodeUnguarded ()
 				RequiresUnreferencedCode ();
 			}
 
-			static void CallTestRequiresDynamicCodeGuarded ()
+			static void CallTestDynamicCodeGuarded ()
 			{
 				if (RuntimeFeature.IsDynamicCodeSupported)
 					RequiresDynamicCode ();
 			}
 
 			[ExpectedWarning ("IL3050", nameof (RequiresDynamicCode), ProducedBy = Tool.Analyzer | Tool.NativeAot)]
-			static void CallTestRequiresDynamicCodeUnguarded ()
+			static void CallTestDynamicCodeUnguarded ()
 			{
 				RequiresDynamicCode ();
 			}
@@ -554,8 +555,8 @@ public static void Test ()
 			{
 				CallTestUnreferencedCodeGuarded ();
 				CallTestUnreferencedCodeUnguarded ();
-				CallTestRequiresDynamicCodeGuarded ();
-				CallTestRequiresDynamicCodeUnguarded ();
+				CallTestDynamicCodeGuarded ();
+				CallTestDynamicCodeUnguarded ();
 				CallTestAssemblyFilesGuarded ();
 				CallTestAssemblyFilesUnguarded ();
 			}
@@ -1154,6 +1155,7 @@ static void GuardedLocalFunction ()
 
 			public static void Test ()
 			{
+				// Use the IEnumerable to mark the IEnumerable methods
 				GuardInIterator ();
 				StateFlowsAcrossYield ();
 				GuardInAsync ();
@@ -1220,12 +1222,3 @@ class ClassWithRequires
 		class RequiresAllGeneric<[DynamicallyAccessedMembers (DynamicallyAccessedMemberTypes.All)] T> {}
 	}
 }
-
-namespace ILLink.RoslynAnalyzer
-{
-	class TestFeatures
-	{
-		public static bool IsUnreferencedCodeSupported => true;
-		public static bool IsAssemblyFilesSupported => true;
-	}
-}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/FeatureCheckDataFlowTestSubstitutions.xml b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/FeatureCheckDataFlowTestSubstitutions.xml
index c096cf07d6e7..db0bf3703367 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/FeatureCheckDataFlowTestSubstitutions.xml
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/FeatureCheckDataFlowTestSubstitutions.xml
@@ -1,5 +1,5 @@
 <linker>
-  <assembly fullname="test, Version=0.0.0.0, Culture=neutral, PublicKeyToken=null">
+  <assembly fullname="TestFeatures, Version=0.0.0.0, Culture=neutral, PublicKeyToken=null">
     <type fullname="ILLink.RoslynAnalyzer.TestFeatures">
       <method signature="System.Boolean get_IsUnreferencedCodeSupported()" body="stub" value="false" />
       <method signature="System.Boolean get_IsAssemblyFilesSupported()" body="stub" value="false" />
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/FeatureGuardAttributeDataFlow.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/FeatureGuardAttributeDataFlow.cs
new file mode 100644
index 000000000000..60aa4f18a066
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/DataFlow/FeatureGuardAttributeDataFlow.cs
@@ -0,0 +1,518 @@
+// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using ILLink.RoslynAnalyzer;
+using Mono.Linker.Tests.Cases.Expectations.Assertions;
+using Mono.Linker.Tests.Cases.Expectations.Helpers;
+using Mono.Linker.Tests.Cases.Expectations.Metadata;
+
+namespace Mono.Linker.Tests.Cases.DataFlow
+{
+	[SkipKeptItemsValidation]
+	[ExpectedNoWarnings]
+	[SetupCompileBefore ("TestFeatures.dll", new[] { "Dependencies/TestFeatures.cs" })]
+	public class FeatureGuardAttributeDataFlow
+	{
+		public static void Main ()
+		{
+			ValidGuardBodies.Test ();
+			InvalidGuardBodies.Test ();
+			InvalidFeatureGuards.Test ();
+		}
+
+		class ValidGuardBodies {
+
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool ReturnFalseGuard => false;
+
+			static void TestReturnFalseGuard ()
+			{
+				if (ReturnFalseGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool DirectGuard => TestFeatures.IsUnreferencedCodeSupported;
+
+			static void TestDirectGuard ()
+			{
+				if (DirectGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool IndirectGuard => DirectGuard;
+
+			static void TestIndirectGuard ()
+			{
+				if (IndirectGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			// Analyzer doesn't understand this pattern because it compiles into a CFG that effectively
+			// looks like this:
+			//
+			//     bool tmp;
+			//     if (TestFeatures.IsUnreferencedCodeSupported)
+			//         tmp = OtherCondition ();
+			//     else
+			//         tmp = false;
+			//     return tmp;
+			//
+			// The analyzer doesn't do constant propagation of the boolean, so it doesn't know that
+			// the return value is always false when TestFeatures.IsUnreferencedCodeSupported is false.
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool AndGuard => TestFeatures.IsUnreferencedCodeSupported && OtherCondition ();
+
+			static void TestAndGuard ()
+			{
+				if (AndGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool NotNotGuard => !!TestFeatures.IsUnreferencedCodeSupported;
+
+			static void TestNotNotGuard ()
+			{
+				if (NotNotGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool EqualsTrueGuard => TestFeatures.IsUnreferencedCodeSupported == true;
+
+			static void TestEqualsTrueGuard ()
+			{
+				if (EqualsTrueGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool TrueEqualsGuard => true == TestFeatures.IsUnreferencedCodeSupported;
+
+			static void TestTrueEqualsGuard ()
+			{
+				if (TrueEqualsGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool NotEqualsFalseGuard => TestFeatures.IsUnreferencedCodeSupported != false;
+
+			static void TestNotEqualsFalseGuard ()
+			{
+				if (NotEqualsFalseGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool FalseNotEqualsGuard => false != TestFeatures.IsUnreferencedCodeSupported;
+
+			static void TestFalseNotEqualsGuard ()
+			{
+				if (FalseNotEqualsGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool IsTrueGuard => TestFeatures.IsUnreferencedCodeSupported is true;
+
+			static void TestIsTrueGuard ()
+			{
+				if (IsTrueGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool IsNotFalseGuard => TestFeatures.IsUnreferencedCodeSupported is not false;
+
+			static void TestIsNotFalseGuard ()
+			{
+				if (IsNotFalseGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool IfReturnTrueGuard {
+				get {
+					if (TestFeatures.IsUnreferencedCodeSupported)
+						return true;
+					return false;
+				}
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool ElseReturnTrueGuard {
+				get {
+					if (!TestFeatures.IsUnreferencedCodeSupported)
+						return false;
+					else
+						return true;
+				}
+			}
+
+			static void TestElseReturnTrueGuard ()
+			{
+				if (ElseReturnTrueGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			static void TestIfReturnTrueGuard ()
+			{
+				if (IfReturnTrueGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool AssertReturnFalseGuard {
+				 get {
+					Debug.Assert (TestFeatures.IsUnreferencedCodeSupported);
+					return false;
+				 }
+			}
+
+			static void TestAssertReturnFalseGuard ()
+			{
+				if (AssertReturnFalseGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool AssertNotReturnFalseGuard {
+				 get {
+					Debug.Assert (!TestFeatures.IsUnreferencedCodeSupported);
+					return false;
+				 }
+			}
+
+			static void TestAssertNotReturnFalseGuard ()
+			{
+				if (AssertNotReturnFalseGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool AssertReturnTrueGuard {
+				 get {
+					Debug.Assert (TestFeatures.IsUnreferencedCodeSupported);
+					return true;
+				 }
+			}
+
+			static void TestAssertReturnTrueGuard ()
+			{
+				if (AssertReturnTrueGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool ThrowGuard {
+				get {
+					if (!TestFeatures.IsUnreferencedCodeSupported)
+						throw new Exception ();
+					return false;
+				}
+			}
+
+			static void TestThrowGuard ()
+			{
+				if (ThrowGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool TernaryIfGuard => TestFeatures.IsUnreferencedCodeSupported ? true : false;
+
+			static void TestTernaryIfGuard ()
+			{
+				if (TernaryIfGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool TernaryElseGuard => !TestFeatures.IsUnreferencedCodeSupported ? false : true;
+
+			static void TestTernaryElseGuard ()
+			{
+				if (TernaryElseGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			public static void Test ()
+			{
+				TestDirectGuard ();
+				TestIndirectGuard ();
+
+				TestReturnFalseGuard ();
+				TestAndGuard ();
+				TestNotNotGuard ();
+				TestEqualsTrueGuard ();
+				TestTrueEqualsGuard ();
+				TestNotEqualsFalseGuard ();
+				TestFalseNotEqualsGuard ();
+				TestIsTrueGuard ();
+				TestIsNotFalseGuard ();
+				TestIfReturnTrueGuard ();
+				TestElseReturnTrueGuard ();
+				TestAssertReturnFalseGuard ();
+				TestAssertNotReturnFalseGuard ();
+				TestAssertReturnTrueGuard ();
+				TestThrowGuard ();
+				TestTernaryIfGuard ();
+				TestTernaryElseGuard ();
+			}
+		}
+
+		class InvalidGuardBodies {
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool ReturnTrueGuard => true;
+
+			static void TestReturnTrueGuard ()
+			{
+				if (ReturnTrueGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool OtherConditionGuard => OtherCondition ();
+
+			static void TestOtherConditionGuard ()
+			{
+				if (OtherConditionGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool OrGuard => TestFeatures.IsUnreferencedCodeSupported || OtherCondition ();
+
+			static void TestOrGuard ()
+			{
+				if (OrGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool NotGuard => !TestFeatures.IsUnreferencedCodeSupported;
+
+			static void TestNotGuard ()
+			{
+				if (NotGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool EqualsFalseGuard => TestFeatures.IsUnreferencedCodeSupported == false;
+
+			static void TestEqualsFalseGuard ()
+			{
+				if (EqualsFalseGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool FalseEqualsGuard => false == TestFeatures.IsUnreferencedCodeSupported;
+
+			static void TestFalseEqualsGuard ()
+			{
+				if (FalseEqualsGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool NotEqualsTrueGuard => TestFeatures.IsUnreferencedCodeSupported != true;
+
+			static void TestNotEqualsTrueGuard ()
+			{
+				if (NotEqualsTrueGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool TrueNotEqualsGuard => true != TestFeatures.IsUnreferencedCodeSupported;
+
+			static void TestTrueNotEqualsGuard ()
+			{
+				if (TrueNotEqualsGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool IsNotTrueGuard => TestFeatures.IsUnreferencedCodeSupported is not true;
+
+			static void TestIsNotTrueGuard ()
+			{
+				if (IsNotTrueGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool IsFalseGuard => TestFeatures.IsUnreferencedCodeSupported is false;
+
+			static void TestIsFalseGuard ()
+			{
+				if (IsFalseGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool IfReturnFalseGuard {
+				get {
+					if (TestFeatures.IsUnreferencedCodeSupported)
+						return false;
+					return true;
+				}
+			}
+
+			static void TestIfReturnFalseGuard ()
+			{
+				if (IfReturnFalseGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool ElseReturnFalseGuard {
+				get {
+					if (!TestFeatures.IsUnreferencedCodeSupported)
+						return true;
+					else
+						return false;
+				}
+			}
+
+			static void TestElseReturnFalseGuard ()
+			{
+				if (ElseReturnFalseGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool AssertNotReturnTrueGuard {
+				 get {
+					Debug.Assert (!TestFeatures.IsUnreferencedCodeSupported);
+					return true;
+				 }
+			}
+
+			static void TestAssertNotReturnTrueGuard ()
+			{
+				if (AssertNotReturnTrueGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			public static void Test ()
+			{
+				TestOtherConditionGuard ();
+
+				TestReturnTrueGuard ();
+				TestOrGuard ();
+				TestNotGuard ();
+				TestEqualsFalseGuard ();
+				TestFalseEqualsGuard ();
+				TestNotEqualsTrueGuard ();
+				TestTrueNotEqualsGuard ();
+				TestIsNotTrueGuard ();
+				TestIsFalseGuard ();
+				TestIfReturnFalseGuard ();
+				TestElseReturnFalseGuard ();
+				TestAssertNotReturnTrueGuard ();
+			}
+		}
+
+		class InvalidFeatureGuards {
+			[ExpectedWarning ("IL4001", ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static int NonBooleanProperty => 0;
+
+			[ExpectedWarning ("IL2026", nameof (RequiresUnreferencedCodeAttribute))]
+			static void TestNonBooleanProperty ()
+			{
+				if (NonBooleanProperty == 0)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4001", ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			bool NonStaticProperty => true;
+
+			[ExpectedWarning ("IL2026", nameof (RequiresUnreferencedCodeAttribute))]
+			static void TestNonStaticProperty ()
+			{
+				var instance = new InvalidFeatureGuards ();
+				if (instance.NonStaticProperty)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool SetOnlyProperty { set => throw null; }
+
+			[ExpectedWarning ("IL2026", nameof (RequiresUnreferencedCodeAttribute))]
+			static void TestSetOnlyProperty ()
+			{
+				if (SetOnlyProperty = true)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4001", ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool GetAndSetProperty { get => true; set => throw null; }
+
+			[ExpectedWarning ("IL2026", nameof (RequiresUnreferencedCodeAttribute))]
+			static void TestGetAndSetProperty ()
+			{
+				if (GetAndSetProperty)
+					RequiresUnreferencedCode ();
+			}
+
+			// No warning for this case because we don't validate that the attribute usage matches
+			// the expected AttributeUsage.Property for assemblies that define their own version
+			// of FeatureGuardAttribute.
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool Method () => true;
+
+			[ExpectedWarning ("IL2026", nameof (RequiresUnreferencedCodeAttribute))]
+			static void TestMethod ()
+			{
+				if (Method ())
+					RequiresUnreferencedCode ();
+			}
+
+			public static void Test ()
+			{
+				TestNonBooleanProperty ();
+				TestNonStaticProperty ();
+				TestSetOnlyProperty ();
+				TestGetAndSetProperty ();
+				TestMethod ();
+			}
+		}
+
+		[RequiresUnreferencedCode (nameof (RequiresUnreferencedCode))]
+		static void RequiresUnreferencedCode () { }
+
+		static bool OtherCondition () => true;
+	}
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/Dependencies/DimProvidedByRecursiveInterface.il b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/Dependencies/DimProvidedByRecursiveInterface.il
new file mode 100644
index 000000000000..c85892208989
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/Dependencies/DimProvidedByRecursiveInterface.il
@@ -0,0 +1,86 @@
+// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+.assembly extern mscorlib { }
+
+.assembly 'library' { }
+
+.class public auto ansi abstract sealed beforefieldinit Program
+    extends [mscorlib]System.Object
+{
+    // Nested Types
+    .class interface nested public auto ansi abstract beforefieldinit IFoo
+    {
+        // Methods
+        .method public hidebysig newslot abstract virtual
+            instance void Method () cil managed
+        {
+        } // end of method IFoo::Method
+
+    } // end of class IFoo
+
+    .class interface nested public auto ansi abstract beforefieldinit IBar
+        implements Program/IFoo
+    {
+        // Methods
+        .method public final hidebysig virtual
+            instance void Program.IFoo.Method () cil managed
+        {
+            .override method instance void Program/IFoo::Method()
+            // Method begins at RVA 0x2068
+            // Code size 2 (0x2)
+            .maxstack 8
+
+            IL_0000: nop
+            IL_0001: ret
+        } // end of method IBar::Program.IFoo.Method
+
+    } // end of class IBar
+
+    .class interface nested public auto ansi abstract beforefieldinit IBaz
+        implements Program/IBar
+    {
+    } // end of class IBaz
+
+    .class nested public auto ansi beforefieldinit MyFoo
+        extends [mscorlib]System.Object
+        implements Program/IBaz
+    {
+        // Methods
+        .method public hidebysig specialname rtspecialname
+            instance void .ctor () cil managed
+        {
+            // Method begins at RVA 0x2076
+            // Code size 8 (0x8)
+            .maxstack 8
+
+            IL_0000: ldarg.0
+            IL_0001: call instance void [mscorlib]System.Object::.ctor()
+            IL_0006: nop
+            IL_0007: ret
+        } // end of method MyFoo::.ctor
+
+    } // end of class MyFoo
+
+
+    // Methods
+    .method public hidebysig static
+        void CallMethod (
+            class Program/IFoo foo
+        ) cil managed
+    {
+        .custom instance void [mscorlib]mscorlib.CompilerServices.NullableContextAttribute::.ctor(uint8) = (
+            01 00 01 00 00
+        )
+        // Method begins at RVA 0x2050
+        // Code size 9 (0x9)
+        .maxstack 8
+
+        IL_0000: nop
+        IL_0001: ldarg.0
+        IL_0002: callvirt instance void Program/IFoo::Method()
+        IL_0007: nop
+        IL_0008: ret
+    } // end of method Program::CallMethod
+
+} // end of class Program
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/Dependencies/MultipleDimsProvidedByRecursiveInterface.il b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/Dependencies/MultipleDimsProvidedByRecursiveInterface.il
new file mode 100644
index 000000000000..4937513f2853
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/Dependencies/MultipleDimsProvidedByRecursiveInterface.il
@@ -0,0 +1,86 @@
+// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+.assembly extern mscorlib { }
+
+.assembly 'library' { }
+
+.class public auto ansi abstract sealed beforefieldinit Program
+    extends [mscorlib]System.Object
+{
+    // Nested Types
+    .class interface nested public auto ansi abstract beforefieldinit I0
+    {
+        // Methods
+        .method public hidebysig newslot abstract virtual
+            instance void Method () cil managed
+        {
+        } // end of method I0::Method
+
+    } // end of class I0
+
+    .class interface nested public auto ansi abstract beforefieldinit I00
+        implements Program/I0
+    {
+        // Methods
+        .method public final hidebysig virtual
+            instance void Program.I0.Method () cil managed
+        {
+            .override method instance void Program/I0::Method()
+            // Method begins at RVA 0x2068
+            // Code size 2 (0x2)
+            .maxstack 8
+
+            IL_0000: nop
+            IL_0001: ret
+        } // end of method I00::Program.I0.Method
+    } // end of class I00
+
+    .class interface nested public auto ansi abstract beforefieldinit I01
+        implements Program/I0
+    {
+        // Methods
+        .method public final hidebysig virtual
+            instance void Program.I0.Method () cil managed
+        {
+            .override method instance void Program/I0::Method()
+            // Method begins at RVA 0x2068
+            // Code size 2 (0x2)
+            .maxstack 8
+
+            IL_0000: nop
+            IL_0001: ret
+        } // end of method I00::Program.I0.Method
+    } // end of class I00
+
+    .class interface nested public auto ansi abstract beforefieldinit I000
+        implements Program/I00
+    {
+    } // end of class I000
+
+    .class interface nested public auto ansi abstract beforefieldinit I010
+        implements Program/I01
+    {
+    } // end of class I000
+
+    .class nested public auto ansi beforefieldinit MyFoo
+        extends [mscorlib]System.Object
+        implements Program/I000, Program/I010
+    {
+        // Methods
+        .method public hidebysig specialname rtspecialname
+            instance void .ctor () cil managed
+        {
+            // Method begins at RVA 0x2076
+            // Code size 8 (0x8)
+            .maxstack 8
+
+            IL_0000: ldarg.0
+            IL_0001: call instance void [mscorlib]System.Object::.ctor()
+            IL_0006: nop
+            IL_0007: ret
+        } // end of method MyFoo::.ctor
+
+    } // end of class MyFoo
+
+} // end of class Program
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/Dependencies/StaticDimProvidedByUnreferencedIfaceInHierarchy.il b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/Dependencies/StaticDimProvidedByUnreferencedIfaceInHierarchy.il
new file mode 100644
index 000000000000..949d5e6fbf4a
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/Dependencies/StaticDimProvidedByUnreferencedIfaceInHierarchy.il
@@ -0,0 +1,70 @@
+// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+.assembly extern mscorlib { }
+
+.assembly 'library' { }
+
+.class public auto ansi abstract sealed beforefieldinit Program
+    extends [mscorlib]System.Object
+{
+    // Nested Types
+    .class interface nested public auto ansi abstract beforefieldinit IBase
+    {
+        // Methods
+        .method public hidebysig abstract virtual static
+            void Method () cil managed
+        {
+        } // end of method IBase::Method
+
+    } // end of class IBase
+
+    .class interface nested public auto ansi abstract beforefieldinit I2
+        implements Program/IBase
+    {
+        // Methods
+        .method public hidebysig static
+            void Program.IBase.Method () cil managed
+        {
+            .override method void Program/IBase::Method()
+            // Method begins at RVA 0x205f
+            // Code size 2 (0x2)
+            .maxstack 8
+
+            IL_0000: nop
+            IL_0001: ret
+        } // end of method I2::Program.IBase.Method
+
+    } // end of class I2
+
+    .class interface nested public auto ansi abstract beforefieldinit I3
+        implements Program/I2
+    {
+    } // end of class I3
+
+    .class interface nested public auto ansi abstract beforefieldinit I4
+        implements Program/I3
+    {
+    } // end of class I4
+
+
+    // Methods
+    .method public hidebysig static
+        void CallMethod<(Program/IBase) T> () cil managed
+    {
+        .param constraint T, Program/IBase
+            .custom instance void [mscorlib]mscorlib.CompilerServices.NullableAttribute::.ctor(uint8) = (
+                01 00 01 00 00
+            )
+        // Method begins at RVA 0x2050
+        // Code size 14 (0xe)
+        .maxstack 8
+
+        IL_0000: nop
+        IL_0001: constrained. !!T
+        IL_0007: call void Program/IBase::Method()
+        IL_000c: nop
+        IL_000d: ret
+    } // end of method Program::CallMethod
+
+} // end of class Program
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/DimProvidedByRecursiveInterface.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/DimProvidedByRecursiveInterface.cs
new file mode 100644
index 000000000000..743df7227a80
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/DimProvidedByRecursiveInterface.cs
@@ -0,0 +1,67 @@
+
+
+using Mono.Linker.Tests.Cases.Expectations.Assertions;
+using Mono.Linker.Tests.Cases.Expectations.Metadata;
+
+namespace Mono.Linker.Tests.Cases.Inheritance.Interfaces.DefaultInterfaceMethods
+{
+	[SetupLinkerArgument ("--skip-unresolved", "true")]
+	[TestCaseRequirements (TestRunCharacteristics.SupportsDefaultInterfaceMethods, "Requires support for default interface methods")]
+	[Define ("IL_ASSEMBLY_AVAILABLE")]
+	[SetupCompileBefore ("library.dll", new[] { "Dependencies/DimProvidedByRecursiveInterface.il" })]
+	[SkipILVerify]
+
+#if IL_ASSEMBLY_AVAILABLE
+	[KeptMemberInAssembly ("library.dll", typeof(Program.IFoo), "Method()")]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.IBar))]
+	[KeptMemberInAssembly ("library.dll", typeof(Program.IBar), "Program.IFoo.Method()")]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.IBar), "library.dll", typeof (Program.IFoo))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.MyFoo), "library.dll", typeof (Program.IBaz))]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.IBaz))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.IBaz), "library.dll", typeof (Program.IBar))]
+	[KeptMemberInAssembly ("library.dll", typeof(Program), "CallMethod(Program/IFoo)")]
+#endif
+	class DimProvidedByRecursiveInterface
+	{
+		static void Main ()
+		{
+#if IL_ASSEMBLY_AVAILABLE
+			Program.IFoo foo = new Program.MyFoo ();
+			Program.CallMethod(foo);
+#endif
+		}
+	}
+}
+
+
+
+// public static class Program
+// {
+// 	[Kept]
+// 	interface IFoo
+// 	{
+// 		void Method();
+// 	}
+
+// 	[Kept]
+// 	interface IBar : IFoo
+// 	{
+// 		[Kept]
+// 		void IFoo.Method() { }
+// 	}
+
+// 	[Kept]
+// 	interface IBaz: IBar /* not IFoo */
+// 	{
+// 	}
+
+// 	[Kept]
+// 	[KeptInterface(typeof(IBaz))]
+// 	class MyFoo : IBaz /* not IBar, not IFoo */
+// 	{ }
+
+// 	static void CallMethod(IFoo foo)
+// 	{
+// 		foo.Method();
+// 	}
+// }
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/MultipleDimsProvidedByRecursiveInterface.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/MultipleDimsProvidedByRecursiveInterface.cs
new file mode 100644
index 000000000000..ccaddf8b5309
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/MultipleDimsProvidedByRecursiveInterface.cs
@@ -0,0 +1,88 @@
+
+
+using Mono.Linker.Tests.Cases.Expectations.Assertions;
+using Mono.Linker.Tests.Cases.Expectations.Metadata;
+
+namespace Mono.Linker.Tests.Cases.Inheritance.Interfaces.DefaultInterfaceMethods
+{
+	[SetupLinkerArgument ("--skip-unresolved", "true")]
+	[TestCaseRequirements (TestRunCharacteristics.SupportsDefaultInterfaceMethods, "Requires support for default interface methods")]
+	[Define ("IL_ASSEMBLY_AVAILABLE")]
+	[SetupCompileBefore ("library.dll", new[] { "Dependencies/MultipleDimsProvidedByRecursiveInterface.il" })]
+	[SkipILVerify]
+
+#if IL_ASSEMBLY_AVAILABLE
+	// Both DIMs on I01 and I00 should be kept because one is not more specific than another.
+	[KeptMemberInAssembly ("library.dll", typeof(Program.I0), "Method()")]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.I00))]
+	[KeptMemberInAssembly ("library.dll", typeof(Program.I00), "Program.I0.Method()")]
+	// Bug: DIM resolution doesn't look at recursive interfaces
+	//[KeptMemberInAssembly ("library.dll", typeof(Program.I01), "Program.I0.Method()")]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.I00), "library.dll", typeof (Program.I0))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.MyFoo), "library.dll", typeof (Program.I000))]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.I000))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.I000), "library.dll", typeof (Program.I00))]
+	// Bug: DIM resolution doesn't look at recursive interfaces
+	//[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.MyFoo), "library.dll", typeof (Program.I010))]
+	//[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.I010), "library.dll", typeof (Program.I01))]
+	//[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.I01), "library.dll", typeof (Program.I0))]
+#endif
+	class MultipleDimsProvidedByRecursiveInterface
+	{
+		static void Main ()
+		{
+#if IL_ASSEMBLY_AVAILABLE
+			Program.I0 foo = new Program.MyFoo ();
+			CallMethod(foo);
+#endif
+		}
+#if IL_ASSEMBLY_AVAILABLE
+		[Kept]
+		static void CallMethod(Program.I0 foo)
+		{
+			foo.Method();
+		}
+#endif
+	}
+}
+
+
+
+// public static class Program
+// {
+// 	[Kept]
+// 	interface I0
+// 	{
+// 		void Method();
+// 	}
+
+// 	[Kept]
+// 	interface I00 : I0
+// 	{
+// 		[Kept]
+// 		void I0.Method() { }
+// 	}
+
+// 	[Kept]
+// 	interface I000: I00 /* not I0 */
+// 	{
+// 	}
+
+// 	[Kept]
+// 	interface I01 : I0
+// 	{
+// 		[Kept]
+// 		void I0.Method() { }
+// 	}
+
+// 	[Kept]
+// 	interface I010: I01 /* not I0 */
+// 	{
+// 	}
+
+// 	[Kept]
+// 	[KeptInterface(typeof(I000))]
+// 	class MyFoo : I000, I010
+// 	{ }
+
+// }
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/StaticDimProvidedByUnreferencedIfaceInHierarchy.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/StaticDimProvidedByUnreferencedIfaceInHierarchy.cs
new file mode 100644
index 000000000000..2d10e78d146f
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/DefaultInterfaceMethods/StaticDimProvidedByUnreferencedIfaceInHierarchy.cs
@@ -0,0 +1,69 @@
+
+
+using Mono.Linker.Tests.Cases.Expectations.Assertions;
+using Mono.Linker.Tests.Cases.Expectations.Metadata;
+
+namespace Mono.Linker.Tests.Cases.Inheritance.Interfaces.DefaultInterfaceMethods
+{
+	[SetupLinkerArgument ("--skip-unresolved", "true")]
+	[TestCaseRequirements (TestRunCharacteristics.SupportsDefaultInterfaceMethods, "Requires support for default interface methods")]
+	[Define ("IL_ASSEMBLY_AVAILABLE")]
+	[SetupCompileBefore ("library.dll", new[] { "Dependencies/StaticDimProvidedByUnreferencedIfaceInHierarchy.il" })]
+	[SkipILVerify]
+
+#if IL_ASSEMBLY_AVAILABLE
+	[KeptMemberInAssembly ("library.dll", typeof(Program), "CallMethod<#1>()")]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.IBase))]
+	[KeptMemberInAssembly ("library.dll", typeof(Program.IBase), "Method()")]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.I4))]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.I2))]
+	[KeptMemberInAssembly ("library.dll", typeof(Program.I2), "Program.IBase.Method()")]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.I2), "library.dll", typeof (Program.IBase))]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.I3))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.I3), "library.dll", typeof (Program.I2))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.I4), "library.dll", typeof (Program.I3))]
+#endif
+	class StaticDimProvidedByUnreferencedIfaceInHierarchy
+	{
+		static void Main ()
+		{
+#if IL_ASSEMBLY_AVAILABLE
+			Program.CallMethod<Program.I4>();
+#endif
+		}
+	}
+}
+
+
+
+// public static class Program
+// {
+//	[Kept]
+//	interface IBase
+//	{
+//		[Kept]
+// 		static abstract void Method();
+// 	}
+
+//	[Kept]
+//	[KeptInterface(typeof(IBase)]
+//	interface I2 : IBase
+//	{
+//		[Kept]
+//		static void IBase.Method() { }
+//	}
+
+//	[Kept]
+// 	[KeptInterface(typeof(I2)]
+// 	interface I3 : I2 { }
+
+//	[Kept]
+//	[KeptInterface(typeof(I3)]
+//	interface I4 : I3 { }
+
+//	[Kept]
+//	static void CallMethod<T>() where T : IBase
+//	{
+//		T.Method();
+//	}
+// }
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/Dependencies/InterfaceImplementedThroughBaseInterface.il b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/Dependencies/InterfaceImplementedThroughBaseInterface.il
new file mode 100644
index 000000000000..61080f8b7d06
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/Dependencies/InterfaceImplementedThroughBaseInterface.il
@@ -0,0 +1,48 @@
+// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+.assembly extern mscorlib { }
+
+.assembly 'library' { }
+
+.class interface public auto ansi abstract beforefieldinit IBase
+{
+    // Methods
+    .method public hidebysig newslot abstract virtual 
+        instance void M () cil managed 
+    {
+    } // end of method IBase::M
+
+} // end of class IBase
+
+.class interface public auto ansi abstract beforefieldinit IDerived
+    implements IBase
+{
+} // end of class IDerived
+
+.class public auto ansi beforefieldinit C
+    extends [System.Runtime]System.Object
+    implements IDerived
+{
+    // Methods
+    .method private final hidebysig newslot virtual 
+        instance void IBase.M () cil managed 
+    {
+        .override method instance void IBase::M()
+        // Method begins at RVA 0x2050
+        // Code size 2 (0x2)
+        .maxstack 8
+
+        IL_0001: ret
+    } // end of method C::IBase.M
+
+    .method public hidebysig specialname rtspecialname 
+        instance void .ctor () cil managed 
+    {
+        // Method begins at RVA 0x2053
+        // Code size 8 (0x8)
+        .maxstack 8
+
+        IL_0007: ret
+    } // end of method C::.ctor
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/InterfaceImplementedThroughBaseInterface.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/InterfaceImplementedThroughBaseInterface.cs
new file mode 100644
index 000000000000..e701fb9c28ba
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/InterfaceImplementedThroughBaseInterface.cs
@@ -0,0 +1,34 @@
+﻿// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Mono.Linker.Tests.Cases.Expectations.Assertions;
+using Mono.Linker.Tests.Cases.Expectations.Metadata;
+
+namespace Mono.Linker.Tests.Cases.Inheritance.Interfaces
+{
+	[SetupLinkerArgument ("--skip-unresolved", "true")]
+	[SetupLinkerArgument ("-a", "test.exe", "library")]
+	[SetupLinkerArgument ("-a", "library.dll", "library")]
+	[TestCaseRequirements (TestRunCharacteristics.SupportsDefaultInterfaceMethods, "Requires support for default interface methods")]
+	[Define ("IL_ASSEMBLY_AVAILABLE")]
+	[SetupCompileBefore ("library.dll", new[] { "Dependencies/InterfaceImplementedThroughBaseInterface.il" })]
+	[SkipILVerify]
+
+#if IL_ASSEMBLY_AVAILABLE
+	[KeptMemberInAssembly ("library.dll", typeof(C), "IBase.M()")]
+#endif
+	[KeptMember(".ctor()")]
+	public class InterfaceImplementedThroughBaseInterface
+	{
+		public static void Main ()
+		{
+		}
+	}
+}
+
+
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/InterfaceVariantsGeneric.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/InterfaceVariantsGeneric.cs
new file mode 100644
index 000000000000..17f5aed992eb
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/InterfaceVariantsGeneric.cs
@@ -0,0 +1,50 @@
+﻿// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using Mono.Linker.Tests.Cases.Expectations.Assertions;
+using Mono.Linker.Tests.Cases.Expectations.Helpers;
+using Mono.Linker.Tests.Cases.Expectations.Metadata;
+
+namespace Mono.Linker.Tests.Cases.Inheritance.Interfaces
+{
+	public class InterfaceVariantsGeneric
+	{
+		public static void Main ()
+		{
+			G<int, float> g = new C ();
+			g.M (1, 2.0f);
+		}
+
+		[Kept]
+		interface G<T, U>
+		{
+			[Kept]
+			void M (T t, U u);
+		}
+		[Kept]
+		public class MyT { }
+		[Kept]
+		public class MyU { }
+		[Kept]
+		[KeptInterface (typeof (G<int, float>))]
+		[KeptInterface (typeof (G<long, double>))]
+		[KeptInterface (typeof (G<MyT, MyU>))]
+		[KeptMember (".ctor()")]
+		public class C : G<int, float>, G<long, double>, G<MyT, MyU>
+		{
+			[Kept]
+			public void M (int t, float u) { }
+
+			public void M (long t, double u) { }
+
+			[Kept]
+			public void M (MyT t, MyU u) { }
+
+			[Kept]
+			void G<long,double>.M(long t, double u) { }
+		}
+	}
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/OnReferenceType/BaseProvidesInterfaceMember/GenericInterfaceWithMethodManyVariations.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/OnReferenceType/BaseProvidesInterfaceMember/GenericInterfaceWithMethodManyVariations.cs
index 078f4b94c60b..79ad8eaaa58c 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/OnReferenceType/BaseProvidesInterfaceMember/GenericInterfaceWithMethodManyVariations.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/OnReferenceType/BaseProvidesInterfaceMember/GenericInterfaceWithMethodManyVariations.cs
@@ -68,4 +68,4 @@ class Bar
 		{
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/Dependencies/GenericInterfaceImplementedRecursively.il b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/Dependencies/GenericInterfaceImplementedRecursively.il
new file mode 100644
index 000000000000..7ff59e54ad06
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/Dependencies/GenericInterfaceImplementedRecursively.il
@@ -0,0 +1,45 @@
+// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+.assembly extern mscorlib { }
+
+.assembly 'library' { }
+
+.class public auto ansi abstract sealed beforefieldinit Program
+    extends [mscorlib]System.Object
+{
+    // Nested Types
+    .class interface nested public auto ansi abstract beforefieldinit IBase`1<T>
+    {
+    } // end of class IBase
+
+    .class interface nested public auto ansi abstract beforefieldinit IMiddle`1<T>
+        implements class Program/IBase`1<!T>
+    {
+    } // end of class IMiddle
+
+    .class interface nested public auto ansi abstract beforefieldinit IDerived`1<T>
+        implements class Program/IMiddle`1<!T>
+    {
+    } // end of class IDerived
+
+    .class nested public auto ansi beforefieldinit C
+        extends [mscorlib]System.Object
+        implements class Program/IDerived`1<int32>
+    {
+        // Methods
+        .method public hidebysig specialname rtspecialname
+            instance void .ctor () cil managed
+        {
+            // Method begins at RVA 0x2066
+            // Code size 8 (0x8)
+            .maxstack 8
+
+            IL_0000: ldarg.0
+            IL_0001: call instance void [mscorlib]System.Object::.ctor()
+            IL_0006: nop
+            IL_0007: ret
+        } // end of method C::.ctor
+
+    } // end of class C
+} // end of class Program
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/Dependencies/InterfaceImplementedRecursively.il b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/Dependencies/InterfaceImplementedRecursively.il
new file mode 100644
index 000000000000..c1d2943997da
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/Dependencies/InterfaceImplementedRecursively.il
@@ -0,0 +1,67 @@
+// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+.assembly extern mscorlib { }
+
+.assembly 'library' { }
+
+.class public auto ansi abstract sealed beforefieldinit Program
+    extends [System.Runtime]System.Object
+{
+    // Nested Types
+    .class interface nested public auto ansi abstract beforefieldinit IBase
+    {
+    } // end of class IBase
+
+    .class interface nested public auto ansi abstract beforefieldinit IMiddle
+        implements Program/IBase
+    {
+    } // end of class IMiddle
+
+    .class interface nested public auto ansi abstract beforefieldinit IDerived
+        implements Program/IMiddle
+    {
+    } // end of class IDerived
+
+    .class nested public auto ansi beforefieldinit C
+        extends [System.Runtime]System.Object
+        implements Program/IDerived
+    {
+        // Methods
+        .method public hidebysig specialname rtspecialname
+            instance void .ctor () cil managed
+        {
+            // Method begins at RVA 0x2066
+            // Code size 8 (0x8)
+            .maxstack 8
+
+            IL_0000: ldarg.0
+            IL_0001: call instance void [System.Runtime]System.Object::.ctor()
+            IL_0006: nop
+            IL_0007: ret
+        } // end of method C::.ctor
+
+    } // end of class C
+
+
+    // Methods
+    .method public hidebysig static
+        void Main () cil managed
+    {
+        // Method begins at RVA 0x2050
+        // Code size 10 (0xa)
+        .maxstack 1
+        .locals init (
+            [0] class Program/IBase b,
+            [1] class Program/C c
+        )
+
+        IL_0000: nop
+        IL_0001: ldnull
+        IL_0002: stloc.0
+        IL_0003: newobj instance void Program/C::.ctor()
+        IL_0008: stloc.1
+        IL_0009: ret
+    } // end of method Program::Main
+
+} // end of class Program
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/Dependencies/RecursiveInterfaceTwoImplementationPaths.il b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/Dependencies/RecursiveInterfaceTwoImplementationPaths.il
new file mode 100644
index 000000000000..854b0cd4a45b
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/Dependencies/RecursiveInterfaceTwoImplementationPaths.il
@@ -0,0 +1,62 @@
+
+// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+.assembly extern mscorlib { }
+
+.assembly 'library' { }
+
+.class public auto ansi abstract sealed beforefieldinit Library
+    extends [mscorlib]System.Object
+{
+    .class interface nested public auto ansi abstract beforefieldinit I0
+    {
+    } // end of class I0
+
+    .class interface nested public auto ansi abstract beforefieldinit I00
+        implements Library/I0
+    {
+    } // end of class I00
+
+    .class interface nested public auto ansi abstract beforefieldinit I01
+        implements Library/I0
+    {
+    } // end of class I01
+
+    .class interface nested public auto ansi abstract beforefieldinit I000
+        implements Library/I00
+    {
+    } // end of class I000
+
+    .class interface nested public auto ansi abstract beforefieldinit I010
+        implements Library/I01
+    {
+    } // end of class I010
+
+    .class interface nested public auto ansi abstract beforefieldinit I0100
+        implements Library/I010
+    {
+    } // end of class I010
+
+    .class nested public auto ansi beforefieldinit MyClass
+        extends [mscorlib]System.Object
+        implements Library/I0100,
+                   Library/I000
+    {
+        // Methods
+        .method public hidebysig specialname rtspecialname
+            instance void .ctor () cil managed
+        {
+            // Method begins at RVA 0x2076
+            // Code size 8 (0x8)
+            .maxstack 8
+
+            IL_0000: ldarg.0
+            IL_0001: call instance void [mscorlib]System.Object::.ctor()
+            IL_0006: nop
+            IL_0007: ret
+        } // end of method MyFoo::.ctor
+
+    } // end of class MyFoo
+
+} // end of class Library
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/GenericInterfaceImplementedRecursively.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/GenericInterfaceImplementedRecursively.cs
new file mode 100644
index 000000000000..bb0d318cac1a
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/GenericInterfaceImplementedRecursively.cs
@@ -0,0 +1,42 @@
+﻿// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Mono.Linker.Tests.Cases.Expectations.Assertions;
+using Mono.Linker.Tests.Cases.Expectations.Metadata;
+
+namespace Mono.Linker.Tests.Cases.Inheritance.Interfaces.RecursiveInterfaces
+{
+	[SetupLinkerArgument ("--skip-unresolved", "true")]
+	[TestCaseRequirements (TestRunCharacteristics.SupportsDefaultInterfaceMethods, "Requires support for default interface methods")]
+	[Define ("IL_ASSEMBLY_AVAILABLE")]
+	[SetupCompileBefore ("library.dll", new[] { "Dependencies/GenericInterfaceImplementedRecursively.il" })]
+	[SkipILVerify]
+#if IL_ASSEMBLY_AVAILABLE
+	[KeptTypeInAssembly ("library.dll", typeof(Program.IBase<>))]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.IMiddle<>))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.IMiddle<>), "library.dll", typeof (Program.IBase<>))]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.IDerived<>))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.IDerived<>), "library.dll", typeof (Program.IMiddle<>))]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.C))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.C), "library.dll", typeof (Program.IDerived<int>))]
+#endif
+	/// <summary>
+	/// This test case is to verify that the linker will keep all the metadata necessary for C to implement IBase when an interfaceImpl isn't directly on C.
+	/// </summary>
+	class GenericInterfaceImplementedRecursively
+	{
+		public static void Main()
+		{
+
+#if IL_ASSEMBLY_AVAILABLE
+			Program.IBase<int> _ = null;
+			_ = new Program.C();
+#endif
+		}
+	}
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/InterfaceImplementedRecursively.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/InterfaceImplementedRecursively.cs
new file mode 100644
index 000000000000..89f59777c5fc
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/InterfaceImplementedRecursively.cs
@@ -0,0 +1,49 @@
+﻿// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Mono.Linker.Tests.Cases.Expectations.Assertions;
+using Mono.Linker.Tests.Cases.Expectations.Metadata;
+
+namespace Mono.Linker.Tests.Cases.Inheritance.Interfaces.RecursiveInterfaces
+{
+	[SetupLinkerArgument ("--skip-unresolved", "true")]
+	[TestCaseRequirements (TestRunCharacteristics.SupportsDefaultInterfaceMethods, "Requires support for default interface methods")]
+	[Define ("IL_ASSEMBLY_AVAILABLE")]
+	[SetupCompileBefore ("library.dll", new[] { "Dependencies/InterfaceImplementedRecursively.il" })]
+	[SkipILVerify]
+#if IL_ASSEMBLY_AVAILABLE
+	[KeptTypeInAssembly ("library.dll", typeof(Program.IBase))]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.IMiddle))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.IMiddle), "library.dll", typeof (Program.IBase))]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.IDerived))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.IDerived), "library.dll", typeof (Program.IMiddle))]
+	[KeptTypeInAssembly ("library.dll", typeof(Program.C))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Program.C), "library.dll", typeof (Program.IDerived))]
+#endif
+	/// <summary>
+	/// This test case is to verify that the linker will keep all the metadata necessary for C to implement IBase when an interfaceImpl isn't directly on C.
+	/// </summary>
+	class InterfaceImplementedRecursively
+	{
+		public static void Main()
+		{
+
+#if IL_ASSEMBLY_AVAILABLE
+			Program.IBase b = null;
+			object c = new Program.C();
+
+#endif
+		}
+	}
+	// interface IBase {}
+	// interface IMiddle : IBase {}
+	// interface IDerived : IMiddle {}
+	// class C : IDerived
+	// {
+	// }
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/RecursiveInterfaceKept.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/RecursiveInterfaceKept.cs
new file mode 100644
index 000000000000..56e08d67be74
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Inheritance.Interfaces/RecursiveInterfaces/RecursiveInterfaceKept.cs
@@ -0,0 +1,36 @@
+﻿// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using Mono.Linker.Tests.Cases.Expectations.Assertions;
+using Mono.Linker.Tests.Cases.Expectations.Metadata;
+
+namespace Mono.Linker.Tests.Cases.Inheritance.Interfaces.RecursiveInterfaces
+{
+	/// <summary>
+	/// This tests that when a type implements an interface recursively (via implementations on implemented interfaces),
+	/// the interface implementations kept are in type declaration order according to ECMA-335 12.2
+	/// </summary>
+	[TestCaseRequirements (TestRunCharacteristics.SupportsDefaultInterfaceMethods, "Requires support for default interface methods")]
+	[Define ("IL_ASSEMBLY_AVAILABLE")]
+	[SetupCompileBefore ("library.dll", new[] { "Dependencies/RecursiveInterfaceTwoImplementationPaths.il" })]
+	[SkipILVerify]
+#if IL_ASSEMBLY_AVAILABLE
+	[KeptTypeInAssembly ("library.dll", typeof(Library.MyClass))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Library.MyClass), "library.dll", typeof (Library.I0100))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Library.I0100), "library.dll", typeof (Library.I010))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Library.I010), "library.dll", typeof (Library.I01))]
+	[KeptInterfaceOnTypeInAssembly ("library.dll", typeof (Library.I01), "library.dll", typeof (Library.I0))]
+	[RemovedTypeInAssembly("library.dll", typeof(Library.I00))]
+	[RemovedTypeInAssembly("library.dll", typeof(Library.I000))]
+	[RemovedInterfaceOnTypeInAssembly("library.dll", typeof (Library.MyClass), "library.dll", typeof (Library.I000))]
+#endif
+	public class RecursiveInterfaceKept
+	{
+		public static void Main()
+		{
+#if IL_ASSEMBLY_AVAILABLE
+			Library.I0 _ = new Library.MyClass();
+#endif
+		}
+	}
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Libraries/RootLibrary.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Libraries/RootLibrary.cs
index 34c38504d088..9e8154e1a408 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/Libraries/RootLibrary.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Libraries/RootLibrary.cs
@@ -18,6 +18,7 @@ namespace Mono.Linker.Tests.Cases.Libraries
 	[SetupLinkerArgument ("-a", "test.exe", "library")]
 	[SetupLinkerArgument ("--enable-opt", "ipconstprop")]
 	[VerifyMetadataNames]
+	[SetupLinkerArgument ("--feature", "Mono.Linker.Tests.Cases.Libraries.RootLibrary.FeatureGuardSubstitutionsTest.FeatureSwitch", "false")]
 	public class RootLibrary
 	{
 		private int field;
@@ -161,6 +162,48 @@ private void LocalMethod ()
 			}
 		}
 
+		[Kept]
+		public class FeatureGuardSubstitutionsTest
+		{
+			[Kept]
+			[KeptAttributeAttribute (typeof (FeatureGuardAttribute))]
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			private static bool GuardUnreferencedCode {
+				[Kept]
+				get => throw null;
+			}
+
+			[Kept]
+			// Body is not modified because feature guard substitutions are disabled in library mode
+			private static void TestGuard () {
+				if (GuardUnreferencedCode)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureSwitchDefinition ("Mono.Linker.Tests.Cases.Libraries.RootLibrary.FeatureGuardSubstitutionsTest.FeatureSwitch")]
+			private static bool FeatureSwitch => throw null;
+
+			[Kept]
+			// Feature switches are still substituted in library mode if explicitly passed on the command-line
+			[ExpectBodyModified]
+			private static void TestFeatureSwitch () {
+				if (FeatureSwitch)
+					RequiresUnreferencedCode ();
+			}
+
+			[Kept]
+			public FeatureGuardSubstitutionsTest ()
+			{
+				TestGuard ();
+				TestFeatureSwitch ();
+			}
+
+			[Kept]
+			[KeptAttributeAttribute (typeof (RequiresUnreferencedCodeAttribute))]
+			[RequiresUnreferencedCode (nameof (RequiresUnreferencedCode))]
+			private static void RequiresUnreferencedCode () { }
+		}
+
 		[Kept]
 		[KeptInterface (typeof (I))]
 		public class IfaceClass : I
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/LinkXml/TypeWithPreserveFieldsHasBackingFieldsOfPropertiesRemoved.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/LinkXml/TypeWithPreserveFieldsHasBackingFieldsOfPropertiesRemoved.cs
index 4384c8d2a433..f516a7945f9a 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/LinkXml/TypeWithPreserveFieldsHasBackingFieldsOfPropertiesRemoved.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/LinkXml/TypeWithPreserveFieldsHasBackingFieldsOfPropertiesRemoved.cs
@@ -40,32 +40,28 @@ class Unused : IFoo<int>, IFoo<string>, IFoo<Cat>, IFoo2<int>, IFoo3<int, string
 			int IFoo<IFoo<int>>.Bar { get; set; }
 		}
 
-		[Kept (By = Tool.NativeAot)]
 		interface IDog
 		{
 			string Name { get; set; }
 		}
 
-		[Kept]
+		[Kept (By = Tool.Trimmer)]
 		interface IFoo<T>
 		{
 
 			int Bar { get; set; }
 		}
 
-		[Kept (By = Tool.NativeAot)]
 		interface IFoo2<T>
 		{
 			int Bar2 { get; set; }
 		}
 
-		[Kept (By = Tool.NativeAot)]
 		interface IFoo3<T, K, J>
 		{
 			int Bar3 { get; set; }
 		}
 
-		[Kept (By = Tool.NativeAot)]
 		class Cat
 		{
 		}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/LinkXml/UnusedInterfaceTypeOnTypeWithPreserveNothingIsRemoved.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/LinkXml/UnusedInterfaceTypeOnTypeWithPreserveNothingIsRemoved.cs
index 31659ef97725..ad436e324c4d 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/LinkXml/UnusedInterfaceTypeOnTypeWithPreserveNothingIsRemoved.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/LinkXml/UnusedInterfaceTypeOnTypeWithPreserveNothingIsRemoved.cs
@@ -10,7 +10,6 @@ public static void Main ()
 		{
 		}
 
-		[Kept (By = Tool.NativeAot)]
 		interface IFoo
 		{
 		}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Mono.Linker.Tests.Cases.csproj b/src/tools/illink/test/Mono.Linker.Tests.Cases/Mono.Linker.Tests.Cases.csproj
index b30d39a672b4..2b025d9861a1 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/Mono.Linker.Tests.Cases.csproj
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Mono.Linker.Tests.Cases.csproj
@@ -8,6 +8,7 @@
     <Compile Remove="LinkXml\Dependencies\UsedNonRequiredExportedTypeIsKept_fwd.cs" />
     <Compile Remove="LinkXml\Dependencies\UsedNonRequiredExportedTypeIsKeptWhenRooted_fwd.cs" />
     <Compile Remove="References\Dependencies\CustomMarkHandlerSaveAssembly.cs" />
+    <Compile Remove="Substitutions\Dependencies\TestFeatures.cs" />
     <Compile Remove="TypeForwarding\Dependencies\AnotherLibraryForwarder.cs" />
     <Compile Remove="TypeForwarding\Dependencies\AssemblyReferenceIsRemovedWhenUnused_Library.cs" />
     <Compile Remove="TypeForwarding\Dependencies\ForwarderLibrary.cs" />
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Reflection/TypeHierarchyReflectionWarnings.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Reflection/TypeHierarchyReflectionWarnings.cs
index f1e2faf60c46..13ecc2ec2906 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/Reflection/TypeHierarchyReflectionWarnings.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Reflection/TypeHierarchyReflectionWarnings.cs
@@ -246,10 +246,11 @@ class AnnotatedPublicEvents
 			public delegate void MyEventHandler (object sender, int i);
 
 			[Kept]
-			[ExpectedWarning ("IL2026", "--RUC on add_RUCEvent--", ProducedBy = Tool.Trimmer)]
 			public event MyEventHandler RUCEvent {
 				[Kept]
 				[ExpectedWarning ("IL2112", nameof (AnnotatedPublicEvents), "--RUC on add_RUCEvent--")]
+				// https://github.com/dotnet/runtime/issues/100499
+				[ExpectedWarning ("IL2112", nameof (AnnotatedPublicEvents), "--RUC on add_RUCEvent--", ProducedBy = Tool.Trimmer)]
 				[KeptAttributeAttribute (typeof (RequiresUnreferencedCodeAttribute))]
 				[RequiresUnreferencedCode ("--RUC on add_RUCEvent--")]
 				add { }
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/RequiresCapability/BasicRequires.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/RequiresCapability/BasicRequires.cs
index d7643f8abe1c..d9536b935810 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/RequiresCapability/BasicRequires.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/RequiresCapability/BasicRequires.cs
@@ -137,7 +137,6 @@ static void TestRequiresFromNameOf ()
 
 		class OnEventMethod
 		{
-			[ExpectedWarning ("IL2026", "--EventToTestRemove.remove--", ProducedBy = Tool.Trimmer)]
 			static event EventHandler EventToTestRemove {
 				add { }
 				[RequiresUnreferencedCode ("Message for --EventToTestRemove.remove--")]
@@ -146,7 +145,6 @@ static event EventHandler EventToTestRemove {
 				remove { }
 			}
 
-			[ExpectedWarning ("IL2026", "--EventToTestAdd.add--", ProducedBy = Tool.Trimmer)]
 			static event EventHandler EventToTestAdd {
 				[RequiresUnreferencedCode ("Message for --EventToTestAdd.add--")]
 				[RequiresAssemblyFiles ("Message for --EventToTestAdd.add--")]
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/RequiresCapability/RequiresOnClass.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/RequiresCapability/RequiresOnClass.cs
index 1489f1152688..f1faf7684750 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/RequiresCapability/RequiresOnClass.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/RequiresCapability/RequiresOnClass.cs
@@ -492,9 +492,6 @@ class MemberTypesWithRequires
 			public static int field;
 			public static int Property { get; set; }
 
-			// These should not be reported https://github.com/mono/linker/issues/2218
-			[ExpectedWarning ("IL2026", "MemberTypesWithRequires.Event.add", ProducedBy = Tool.Trimmer)]
-			[ExpectedWarning ("IL2026", "MemberTypesWithRequires.Event.remove", ProducedBy = Tool.Trimmer)]
 			public static event EventHandler Event;
 		}
 
@@ -838,24 +835,21 @@ public static void Test ()
 
 		class ReflectionAccessOnEvents
 		{
-			// Most of the tests in this run into https://github.com/dotnet/linker/issues/2218
+			// Most of the tests in this run into https://github.com/dotnet/runtime/issues/100499
 			// So for now keeping just a very simple test
 
 			[RequiresUnreferencedCode ("--WithRequires--")]
 			[RequiresDynamicCode ("--WithRequires--")]
 			class WithRequires
 			{
-				// These should be reported only in TestDirectReflectionAccess
-				// https://github.com/mono/linker/issues/2218
-				[ExpectedWarning ("IL2026", "StaticEvent.add", ProducedBy = Tool.Trimmer)]
-				[ExpectedWarning ("IL2026", "StaticEvent.remove", ProducedBy = Tool.Trimmer)]
 				public static event EventHandler StaticEvent;
 			}
 
 			[ExpectedWarning ("IL2026", "StaticEvent.add")]
 			[ExpectedWarning ("IL3050", "StaticEvent.add", ProducedBy = Tool.NativeAot)]
-			// https://github.com/mono/linker/issues/2218
-			[ExpectedWarning ("IL2026", "StaticEvent.remove", ProducedBy = Tool.Analyzer | Tool.NativeAot)]
+			// https://github.com/dotnet/runtime/issues/100499
+			[ExpectedWarning ("IL2026", "StaticEvent.add", ProducedBy = Tool.Trimmer)]
+			[ExpectedWarning ("IL2026", "StaticEvent.remove")]
 			[ExpectedWarning ("IL3050", "StaticEvent.remove", ProducedBy = Tool.NativeAot)]
 			static void TestDirectReflectionAccess ()
 			{
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Substitutions/Dependencies/TestFeatures.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Substitutions/Dependencies/TestFeatures.cs
new file mode 100644
index 000000000000..942c9f3586dd
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Substitutions/Dependencies/TestFeatures.cs
@@ -0,0 +1,12 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace ILLink.RoslynAnalyzer
+{
+	public class TestFeatures
+	{
+		public static bool IsUnreferencedCodeSupported => true;
+
+		public static bool IsAssemblyFilesSupported => true;
+	}
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Substitutions/FeatureGuardSubstitutions.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Substitutions/FeatureGuardSubstitutions.cs
new file mode 100644
index 000000000000..e34f2b4bbfd3
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Substitutions/FeatureGuardSubstitutions.cs
@@ -0,0 +1,405 @@
+ // Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using ILLink.RoslynAnalyzer;
+using Mono.Linker.Tests.Cases.Expectations.Assertions;
+using Mono.Linker.Tests.Cases.Expectations.Helpers;
+using Mono.Linker.Tests.Cases.Expectations.Metadata;
+
+namespace Mono.Linker.Tests.Cases.Substitutions
+{
+	[ExpectedNoWarnings]
+	[SetupCompileBefore ("TestFeatures.dll", new[] { "Dependencies/TestFeatures.cs" })]
+	[SetupCompileResource ("FeatureGuardSubstitutions.xml", "ILLink.Substitutions.xml")]
+	[IgnoreSubstitutions (false)] 
+#if NATIVEAOT
+	// ILC has different constant propagation behavior than ILLink, and we don't have
+	// the test infrastructure to check for different IL sequences between ILLink/ILC.
+	// Just validate the warning behavior instead.
+	[SkipKeptItemsValidation]
+#else
+	// Tell linker to treat RequiresDynamicCodeAttribute as a disabled feature:
+	[SetupLinkerArgument ("--feature", "System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeSupported", "false")]
+#endif
+	[SetupLinkerArgument ("--feature", "Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.DefineFeatureGuard.FeatureSwitch", "false")]
+	[SetupLinkerArgument ("--feature", "Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.DefineFeatureGuard.FeatureSwitchAndGuard", "false")]
+	[SetupLinkerArgument ("--feature", "Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.FeatureGuardPrecedence.GuardAndSwitch", "true")]
+	[SetupLinkerArgument ("--feature", "Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.FeatureGuardPrecedence.SwitchWithXml", "false")]
+	[SetupLinkerArgument ("--feature", "Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.FeatureGuardPrecedence.GuardAndSwitchWithXml", "false")]
+	public class FeatureGuardSubstitutions
+	{
+		public static void Main ()
+		{
+			DefineFeatureGuard.Test ();
+			FeatureGuardPrecedence.Test ();
+		}
+
+		[Kept]
+		class DefineFeatureGuard {
+			[FeatureGuard (typeof(RequiresDynamicCodeAttribute))]
+			static bool GuardDynamicCode => RuntimeFeature.IsDynamicCodeSupported;
+
+			[Kept]
+			[ExpectedInstructionSequence (new[] {
+				"nop",
+				"ldc.i4.0",
+				"stloc.0",
+				"ldloc.0",
+				"brfalse.s il_6",
+				"ret"
+			})]
+			static void TestGuardDynamicCode ()
+			{
+				if (GuardDynamicCode)
+					RequiresDynamicCode ();
+			}
+
+			[FeatureGuard (typeof(RequiresUnreferencedCodeAttribute))]
+			static bool GuardUnreferencedCode => TestFeatures.IsUnreferencedCodeSupported;
+
+			[Kept]
+			[ExpectedInstructionSequence (new[] {
+				"nop",
+				"ldc.i4.0",
+				"stloc.0",
+				"ldloc.0",
+				"brfalse.s il_6",
+				"ret"
+			})]
+
+			static void TestGuardUnreferencedCode ()
+			{
+				if (GuardUnreferencedCode)
+					RequiresUnreferencedCode ();
+			}
+
+			[Kept]
+			[KeptAttributeAttribute (typeof (FeatureGuardAttribute))]
+			[FeatureGuard (typeof(RequiresAssemblyFilesAttribute))]
+			static bool GuardAssemblyFiles {
+				[Kept]
+				get => TestFeatures.IsAssemblyFilesSupported;
+			}
+
+			[Kept]
+			// Linker doesn't treat RequiresAssemblyFilesAttribute as a disabled feature, so it's not removed.
+			static void TestGuardAssemblyFiles ()
+			{
+				if (GuardAssemblyFiles)
+					RequiresAssemblyFiles ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresDynamicCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureGuard (typeof (RequiresDynamicCodeAttribute))]
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool GuardDynamicCodeAndUnreferencedCode => RuntimeFeature.IsDynamicCodeSupported && TestFeatures.IsUnreferencedCodeSupported;
+
+			[Kept]
+			[ExpectedInstructionSequence (new[] {
+				"nop",
+				"ldc.i4.0",
+				"stloc.0",
+				"ldloc.0",
+				"brfalse.s il_6",
+				"ret"
+			})]
+
+			static void TestMultipleGuards ()
+			{
+				if (GuardDynamicCodeAndUnreferencedCode) {
+					RequiresDynamicCode ();
+					RequiresUnreferencedCode ();
+				}
+			}
+
+			static class UnreferencedCode {
+				[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+				public static bool GuardUnreferencedCode => TestFeatures.IsUnreferencedCodeSupported;
+			}
+
+			static class UnreferencedCodeIndirect {
+				[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+				public static bool GuardUnreferencedCode => UnreferencedCode.GuardUnreferencedCode;
+			}
+
+			// Currently there is no way to annotate a feature type as depending on another feature,
+			// so indirect guards are expressed the same way as direct guards, by using
+			// FeatureGuardAttribute that references the underlying feature type.
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool GuardUnreferencedCodeIndirect => UnreferencedCodeIndirect.GuardUnreferencedCode;
+
+			[Kept]
+			[ExpectedInstructionSequence (new[] {
+				"nop",
+				"ldc.i4.0",
+				"stloc.0",
+				"ldloc.0",
+				"brfalse.s il_6",
+				"ret"
+			})]
+
+			static void TestIndirectGuard ()
+			{
+				if (GuardUnreferencedCodeIndirect)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureSwitchDefinition ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.DefineFeatureGuard.FeatureSwitch")]
+			static bool FeatureSwitch => AppContext.TryGetSwitch ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.DefineFeatureGuard.FeatureSwitch", out bool isEnabled) && isEnabled;
+
+			[ExpectedWarning ("IL2026", ProducedBy = Tool.Analyzer)] // Analyzer doesn't respect FeatureSwitchDefinition or feature settings
+			[ExpectedInstructionSequence (new[] {
+				"nop",
+				"ldc.i4.0",
+				"stloc.0",
+				"ldloc.0",
+				"brfalse.s il_6",
+				"ret"
+			})]
+
+			[Kept]
+			static void TestFeatureSwitch ()
+			{
+				if (FeatureSwitch)
+					RequiresUnreferencedCode ();
+			}
+
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureSwitchDefinition ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.DefineFeatureGuard.FeatureSwitchAndGuard")]
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool FeatureSwitchAndGuard => AppContext.TryGetSwitch ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.DefineFeatureGuard.FeatureSwitchAndGuard", out bool isEnabled) && isEnabled;
+
+			[Kept]
+			[ExpectedInstructionSequence (new[] {
+				"nop",
+				"ldc.i4.0",
+				"stloc.0",
+				"ldloc.0",
+				"brfalse.s il_6",
+				"ret"
+			})]
+
+			static void TestFeatureSwitchAndGuard ()
+			{
+				if (FeatureSwitchAndGuard)
+					RequiresUnreferencedCode ();
+			}
+
+			static class UnreferencedCodeCycle {
+				[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+				public static bool IsSupported => UnreferencedCodeCycle.IsSupported;
+			}
+
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool GuardUnreferencedCodeCycle => TestFeatures.IsUnreferencedCodeSupported;
+
+			[Kept]
+			[ExpectedInstructionSequence (new[] {
+				"nop",
+				"ldc.i4.0",
+				"stloc.0",
+				"ldloc.0",
+				"brfalse.s il_6",
+				"ret"
+			})]
+
+			static void TestFeatureDependencyCycle1 ()
+			{
+				if (GuardUnreferencedCodeCycle)
+					RequiresUnreferencedCode ();
+			}
+
+			static class UnreferencedCodeCycle2_A {
+				[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+				public static bool IsSupported => UnreferencedCodeCycle2_A.IsSupported;
+			}
+
+			static class UnreferencedCodeCycle2_B {
+				[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+				public static bool IsSupported => UnreferencedCodeCycle2_B.IsSupported;
+			}
+
+			static class UnreferencedCodeCycle2 {
+				[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+				public static bool IsSupported => UnreferencedCodeCycle2_A.IsSupported;
+			}
+
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool GuardUnreferencedCodeCycle2 => TestFeatures.IsUnreferencedCodeSupported;
+
+			[Kept]
+			[ExpectedInstructionSequence (new[] {
+				"nop",
+				"ldc.i4.0",
+				"stloc.0",
+				"ldloc.0",
+				"brfalse.s il_6",
+				"ret"
+			})]
+			static void TestFeatureDependencyCycle2 ()
+			{
+				if (GuardUnreferencedCodeCycle2)
+					RequiresUnreferencedCode ();
+			}
+
+			[Kept]
+			public static void Test ()
+			{
+				TestGuardDynamicCode ();
+				TestGuardUnreferencedCode ();
+				TestGuardAssemblyFiles ();
+				TestMultipleGuards ();
+				TestIndirectGuard ();
+				TestFeatureDependencyCycle1 ();
+				TestFeatureDependencyCycle2 ();
+				TestFeatureSwitch ();
+				TestFeatureSwitchAndGuard ();
+			}
+		}
+
+		[Kept]
+		class FeatureGuardPrecedence {
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureSwitchDefinition ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.FeatureGuardPrecedence.GuardAndSwitch")]
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool GuardAndSwitch => AppContext.TryGetSwitch ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.FeatureGuardPrecedence.GuardAndSwitch", out bool isEnabled) && isEnabled;
+
+			[Kept]
+			[ExpectedInstructionSequence (new[] {
+				"nop",
+				"ldc.i4.1",
+				"stloc.0",
+				"ldloc.0",
+				"pop",
+				"call System.Void Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions::RequiresUnreferencedCode()",
+				"nop",
+				"ret"
+			})]
+			// ILLink/ILCompiler ignore FeatureGuard on properties that also have FeatureSwitchDefinition
+			[ExpectedWarning ("IL2026", ProducedBy = Tool.Trimmer | Tool.NativeAot)]
+			static void TestSwitchWinsOverGuard ()
+			{
+				if (GuardAndSwitch)
+					RequiresUnreferencedCode ();
+			}
+
+			[Kept]
+			[KeptAttributeAttribute (typeof (FeatureSwitchDefinitionAttribute))]
+			[KeptAttributeAttribute (typeof (FeatureGuardAttribute))]
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureSwitchDefinition ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.FeatureGuardPrecedence.GuardAndSwitchNotSet")]
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool GuardAndSwitchNotSet {
+				[Kept]
+				get => AppContext.TryGetSwitch ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.FeatureGuardPrecedence.GuardAndSwitchNotSet", out bool isEnabled) && isEnabled;
+			}
+
+			[Kept]
+			// No IL modifications because feature is not set, and FeatureGuard is ignored due to FeatureSwitchDefinition.
+			[ExpectedWarning ("IL2026", ProducedBy = Tool.Trimmer | Tool.NativeAot)]
+			static void TestSwitchNotSetWinsOverGuard ()
+			{
+				if (GuardAndSwitchNotSet)
+					RequiresUnreferencedCode ();
+			}
+
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool GuardWithXml => TestFeatures.IsUnreferencedCodeSupported;
+
+			[Kept]
+			[ExpectedInstructionSequence (new[] {
+				"nop",
+				"ldc.i4.1",
+				"stloc.0",
+				"ldloc.0",
+				"pop",
+				"call System.Void Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions::RequiresUnreferencedCode()",
+				"nop",
+				"ret"
+			})]
+			[ExpectedWarning ("IL2026", ProducedBy = Tool.Trimmer | Tool.NativeAot)]
+			static void TestXmlWinsOverGuard ()
+			{
+				if (GuardWithXml)
+					RequiresUnreferencedCode ();
+			}
+
+			[KeptAttributeAttribute (typeof (FeatureSwitchDefinitionAttribute))]
+			[KeptAttributeAttribute (typeof (FeatureGuardAttribute))]
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureSwitchDefinition ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.FeatureGuardPrecedence.SwitchWithXml")]
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool SwitchWithXml => AppContext.TryGetSwitch ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.FeatureGuardPrecedence.SwitchWithXml", out bool isEnabled) && isEnabled;
+
+			[Kept]
+			// XML substitutions win despite FeatureSwitchDefinition and feature settings.
+			[ExpectedInstructionSequence (new[] {
+				"nop",
+				"ldc.i4.1",
+				"stloc.0",
+				"ldloc.0",
+				"pop",
+				"call System.Void Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions::RequiresUnreferencedCode()",
+				"nop",
+				"ret"
+			})]
+			[ExpectedWarning ("IL2026", ProducedBy = Tool.Trimmer | Tool.NativeAot)]
+			static void TestXmlWinsOverSwitch () {
+				if (SwitchWithXml)
+					RequiresUnreferencedCode ();
+			}
+
+			[KeptAttributeAttribute (typeof (FeatureSwitchDefinitionAttribute))]
+			[KeptAttributeAttribute (typeof (FeatureGuardAttribute))]
+			[ExpectedWarning ("IL4000", nameof (RequiresUnreferencedCodeAttribute), ProducedBy = Tool.Analyzer)]
+			[FeatureSwitchDefinition ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardPrecedence.GuardAndSwitchWithXml")]
+			[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+			static bool GuardAndSwitchWithXml => AppContext.TryGetSwitch ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions.FeatureGuardPrecedence.GuardAndSwitchWithXml", out bool isEnabled) && isEnabled;
+
+			[Kept]
+			// XML substitutions win despite FeatureSwitchDefinition and feature settings.
+			[ExpectedInstructionSequence (new[] {
+				"nop",
+				"ldc.i4.1",
+				"stloc.0",
+				"ldloc.0",
+				"pop",
+				"call System.Void Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions::RequiresUnreferencedCode()",
+				"nop",
+				"ret"
+			})]
+			[ExpectedWarning ("IL2026", ProducedBy = Tool.Trimmer | Tool.NativeAot)]
+			static void TestXmlWinsOverGuardAndSwitch () {
+				if (GuardAndSwitchWithXml)
+					RequiresUnreferencedCode ();
+			}
+
+			[Kept]
+			public static void Test () {
+				TestSwitchWinsOverGuard ();
+				TestSwitchNotSetWinsOverGuard ();
+				TestXmlWinsOverGuard ();
+				TestXmlWinsOverSwitch ();
+				TestXmlWinsOverGuardAndSwitch ();
+			}
+		}
+
+		[RequiresDynamicCode (nameof (RequiresDynamicCode))]
+		static void RequiresDynamicCode () { }
+
+		[Kept]
+		[KeptAttributeAttribute (typeof (RequiresUnreferencedCodeAttribute))]
+		[RequiresUnreferencedCode (nameof (RequiresUnreferencedCode))]
+		static void RequiresUnreferencedCode () { }
+
+		[Kept]
+		[KeptAttributeAttribute (typeof (RequiresAssemblyFilesAttribute))]
+		[RequiresAssemblyFiles (nameof (RequiresAssemblyFiles))]
+		static void RequiresAssemblyFiles () { }
+	}
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Substitutions/FeatureGuardSubstitutions.xml b/src/tools/illink/test/Mono.Linker.Tests.Cases/Substitutions/FeatureGuardSubstitutions.xml
new file mode 100644
index 000000000000..ab5947a1a6b4
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Substitutions/FeatureGuardSubstitutions.xml
@@ -0,0 +1,10 @@
+<linker>
+    <assembly fullname="test, Version=0.0.0.0, Culture=neutral, PublicKeyToken=null">
+        <!-- Override the substitutions implied by FeatureGuard/FeatureSwitchDefinition. -->
+        <type fullname="Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutions/FeatureGuardPrecedence">
+            <method signature="System.Boolean get_GuardWithXml()" body="stub" value="true" />
+            <method signature="System.Boolean get_SwitchWithXml()" body="stub" value="true" />
+            <method signature="System.Boolean get_GuardAndSwitchWithXml()" body="stub" value="true" />
+        </type>
+    </assembly>
+</linker>
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/Substitutions/FeatureGuardSubstitutionsDisabled.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/Substitutions/FeatureGuardSubstitutionsDisabled.cs
new file mode 100644
index 000000000000..198595d79394
--- /dev/null
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/Substitutions/FeatureGuardSubstitutionsDisabled.cs
@@ -0,0 +1,63 @@
+// Copyright (c) .NET Foundation and contributors. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using ILLink.RoslynAnalyzer;
+using Mono.Linker.Tests.Cases.Expectations.Assertions;
+using Mono.Linker.Tests.Cases.Expectations.Helpers;
+using Mono.Linker.Tests.Cases.Expectations.Metadata;
+
+namespace Mono.Linker.Tests.Cases.Substitutions
+{
+	[ExpectedNoWarnings]
+	[SetupCompileBefore ("TestFeatures.dll", new[] { "Dependencies/TestFeatures.cs" })]
+	[SetupLinkerArgument ("--disable-opt", "substitutefeatureguards")]
+	[SetupLinkerArgument ("--feature", "Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutionsDisabled.FeatureSwitch", "false")]
+	public class FeatureGuardSubstitutionsDisabled
+	{
+		public static void Main ()
+		{
+			TestGuard ();
+			TestFeatureSwitch ();
+		}
+
+		[Kept]
+		[ExpectedWarning ("IL4000", ProducedBy = Tool.Analyzer)]
+		[KeptAttributeAttribute (typeof (FeatureGuardAttribute))]
+		[FeatureGuard (typeof (RequiresUnreferencedCodeAttribute))]
+		static bool GuardUnreferencedCode {
+			[Kept]
+			get => throw null;
+		}
+
+		[Kept]
+		// Body is not modified because feature guard substitutions are disabled in this test
+		[ExpectedWarning ("IL2026")]
+		static void TestGuard ()
+		{
+			if (GuardUnreferencedCode)
+				RequiresUnreferencedCode ();
+		}
+
+		[FeatureSwitchDefinition ("Mono.Linker.Tests.Cases.Substitutions.FeatureGuardSubstitutionsDisabled.FeatureSwitch")]
+		static bool FeatureSwitch => throw null;
+
+		[Kept]
+		[ExpectedWarning ("IL2026", ProducedBy = Tool.Analyzer)]
+		// Feature switches are still substituted when feature guard substitutions are disabled
+		[ExpectBodyModified]
+		static void TestFeatureSwitch ()
+		{
+			if (FeatureSwitch)
+				RequiresUnreferencedCode ();
+		}
+
+		[Kept]
+		[KeptAttributeAttribute (typeof (RequiresUnreferencedCodeAttribute))]
+		[RequiresUnreferencedCode (nameof (RequiresUnreferencedCode))]
+		static void RequiresUnreferencedCode () { }
+	}
+}
diff --git a/src/tools/illink/test/Mono.Linker.Tests.Cases/UnreachableBlock/CompilerGeneratedCodeSubstitutions.cs b/src/tools/illink/test/Mono.Linker.Tests.Cases/UnreachableBlock/CompilerGeneratedCodeSubstitutions.cs
index 5b1edc37671a..3c59c3e446aa 100644
--- a/src/tools/illink/test/Mono.Linker.Tests.Cases/UnreachableBlock/CompilerGeneratedCodeSubstitutions.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests.Cases/UnreachableBlock/CompilerGeneratedCodeSubstitutions.cs
@@ -1,6 +1,7 @@
 ﻿using System;
 using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
+using System.Linq;
 using System.Threading.Tasks;
 using Mono.Linker.Tests.Cases.Expectations.Assertions;
 using Mono.Linker.Tests.Cases.Expectations.Metadata;
@@ -174,7 +175,8 @@ static IEnumerable<int> TestBranchWithYieldBefore ()
 
 			public static void Test ()
 			{
-				TestBranchWithNormalCall ();
+				// Use the IEnumerable to mark the IEnumerable methods
+				foreach (var _ in TestBranchWithNormalCall ()) ;
 				TestBranchWithYieldAfter ();
 				TestBranchWithYieldBefore ();
 			}
diff --git a/src/tools/illink/test/Mono.Linker.Tests/TestCasesRunner/AssemblyChecker.cs b/src/tools/illink/test/Mono.Linker.Tests/TestCasesRunner/AssemblyChecker.cs
index 8ed15724cc11..467c2272c1be 100644
--- a/src/tools/illink/test/Mono.Linker.Tests/TestCasesRunner/AssemblyChecker.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests/TestCasesRunner/AssemblyChecker.cs
@@ -345,7 +345,7 @@ IEnumerable<string> VerifyInterfaces (TypeDefinition src, TypeDefinition linked)
 				}
 
 				if (expectedInterfaces.Any ()) {
-					yield return $"Expected interfaces were not found on {src}";
+					yield return $"Expected interfaces were not found on {src}. Expected to find: \n{string.Join(Environment.NewLine, expectedInterfaces)}\n";
 				}
 			}
 		}
diff --git a/src/tools/illink/test/Mono.Linker.Tests/TestCasesRunner/ResultChecker.cs b/src/tools/illink/test/Mono.Linker.Tests/TestCasesRunner/ResultChecker.cs
index 7e730cb0b592..2f05eab1e4c6 100644
--- a/src/tools/illink/test/Mono.Linker.Tests/TestCasesRunner/ResultChecker.cs
+++ b/src/tools/illink/test/Mono.Linker.Tests/TestCasesRunner/ResultChecker.cs
@@ -101,6 +101,7 @@ public virtual void Check (TrimmedTestCaseResult linkResult)
 					if (!HasActiveSkipKeptItemsValidationAttribute(linkResult.TestCase.FindTypeDefinition (original))) {
 						CreateAssemblyChecker (original, linked, linkResult).Verify ();
 					}
+					CreateILChecker ().Check(linkResult, original);
 				}
 
 				VerifyLinkingOfOtherAssemblies (original);
@@ -279,7 +280,6 @@ protected virtual void AdditionalChecking (TrimmedTestCaseResult linkResult, Ass
 
 		protected virtual void InitialChecking (TrimmedTestCaseResult linkResult, AssemblyDefinition original, AssemblyDefinition linked)
 		{
-			CreateILChecker ().Check(linkResult, original);
 			ValidateTypeRefsHaveValidAssemblyRefs (linked);
 		}
 
diff --git a/src/workloads/workloads.csproj b/src/workloads/workloads.csproj
index 2c0bb6784c7d..2f91a14f59c6 100644
--- a/src/workloads/workloads.csproj
+++ b/src/workloads/workloads.csproj
@@ -31,7 +31,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.DotNet.Build.Tasks.Workloads" Version="$(MicrosoftDotNetBuildTasksWorkloadsPackageVersion)" GeneratePathProperty="true" />
-    <PackageReference Include="Microsoft.Signed.WiX" Version="$(WixPackageVersion)" GeneratePathProperty="true" />
+    <PackageReference Include="Microsoft.Signed.WiX" Version="$(MicrosoftSignedWixVersion)" GeneratePathProperty="true" />
     <PackageReference Include="MicroBuild.Plugins.SwixBuild.Dotnet" Version="$(SwixPackageVersion)" GeneratePathProperty="true" />
     <PackageReference Include="Microsoft.DotNet.Build.Tasks.Installers" Version="$(MicrosoftDotNetBuildTasksInstallersVersion)" GeneratePathProperty="true" />
   </ItemGroup>
@@ -187,6 +187,15 @@
       <VSDrop Include="%(PartitionedSwixProjects.ZipFile)" SourceDirectory="%(ManifestOutputPath)" />
     </ItemGroup>
 
+    <!-- Generate metadata for VSDROP automation. This information cannot be obtained during staging when insertions are triggered -->
+    <ItemGroup>
+      <VSDropMetadata Include="$(FileVersion)" />
+      <VSDropMetadata Include="$(BUILD_REPOSITORY_NAME)" />
+      <VSDropMetadata Include="$(BUILD_SOURCEBRANCH)"/>
+    </ItemGroup>
+
+    <WriteLinesToFile File="%(VSDrop.SourceDirectory)\.metadata" Lines="@(VSDropMetadata)" Overwrite="true" Condition="'$(OfficialBuild)' == 'true'" />
+
     <MakeDir Directories="$(ArtifactsNonShippingPackagesDir)" />
     <MakeDir Directories="$(VisualStudioSetupInsertionPath)" />
     <ZipDirectory Overwrite="true" SourceDirectory="%(SourceDirectory)"